diff --git a/.github/bot_config.yml b/.github/bot_config.yml new file mode 100644 index 0000000000000000000000000000000000000000..1164444c1dc4dfcd2630f283f3d4e72cf1c7265d --- /dev/null +++ b/.github/bot_config.yml @@ -0,0 +1,25 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +# A list of assignees +assignees: + - saikumarchalla + - ravikyram diff --git a/.github/scripts/pylint.sh b/.github/scripts/pylint.sh new file mode 100755 index 0000000000000000000000000000000000000000..bb2ebebd8a87199a2138ef513cfd930af5b822bf --- /dev/null +++ b/.github/scripts/pylint.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Pylint wrapper extracted from main TensorFlow, sharing same exceptions. +# Specify --incremental to only check files touched since last commit on master, +# otherwise will recursively check current directory (full repo takes long!). + +set -euo pipefail + +# Download latest configs from main TensorFlow repo. +wget -q -O /tmp/pylintrc https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/tools/ci_build/pylintrc + +SCRIPT_DIR=/tmp + +num_cpus() { + # Get the number of CPUs + if [[ -f /proc/cpuinfo ]]; then + N_CPUS=$(grep -c ^processor /proc/cpuinfo) + else + # Fallback method + N_CPUS=`getconf _NPROCESSORS_ONLN` + fi + if [[ -z ${N_CPUS} ]]; then + die "ERROR: Unable to determine the number of CPUs" + fi + + echo ${N_CPUS} +} + +get_changed_files_in_last_non_merge_git_commit() { + git diff --name-only $(git merge-base master $(git branch --show-current)) +} + +# List Python files changed in the last non-merge git commit that still exist, +# i.e., not removed. +# Usage: get_py_files_to_check [--incremental] +get_py_files_to_check() { + if [[ "$1" == "--incremental" ]]; then + CHANGED_PY_FILES=$(get_changed_files_in_last_non_merge_git_commit | \ + grep '.*\.py$') + + # Do not include files removed in the last non-merge commit. + PY_FILES="" + for PY_FILE in ${CHANGED_PY_FILES}; do + if [[ -f "${PY_FILE}" ]]; then + PY_FILES="${PY_FILES} ${PY_FILE}" + fi + done + + echo "${PY_FILES}" + else + find . -name '*.py' + fi +} + +do_pylint() { + if [[ $# == 1 ]] && [[ "$1" == "--incremental" ]]; then + PYTHON_SRC_FILES=$(get_py_files_to_check --incremental) + + if [[ -z "${PYTHON_SRC_FILES}" ]]; then + echo "do_pylint will NOT run due to --incremental flag and due to the "\ +"absence of Python code changes in the last commit." + return 0 + fi + elif [[ $# != 0 ]]; then + echo "Invalid syntax for invoking do_pylint" + echo "Usage: do_pylint [--incremental]" + return 1 + else + PYTHON_SRC_FILES=$(get_py_files_to_check) + fi + + # Something happened. TF no longer has Python code if this branch is taken + if [[ -z ${PYTHON_SRC_FILES} ]]; then + echo "do_pylint found no Python files to check. Returning." + return 0 + fi + + # Now that we know we have to do work, check if `pylint` is installed + PYLINT_BIN="python3.8 -m pylint" + + echo "" + echo "check whether pylint is available or not." + echo "" + ${PYLINT_BIN} --version + if [[ $? -eq 0 ]] + then + echo "" + echo "pylint available, proceeding with pylint sanity check." + echo "" + else + echo "" + echo "pylint not available." + echo "" + return 1 + fi + + # Configure pylint using the following file + PYLINTRC_FILE="${SCRIPT_DIR}/pylintrc" + + if [[ ! -f "${PYLINTRC_FILE}" ]]; then + die "ERROR: Cannot find pylint rc file at ${PYLINTRC_FILE}" + fi + + # Run pylint in parallel, after some disk setup + NUM_SRC_FILES=$(echo ${PYTHON_SRC_FILES} | wc -w) + NUM_CPUS=$(num_cpus) + + echo "Running pylint on ${NUM_SRC_FILES} files with ${NUM_CPUS} "\ +"parallel jobs..." + echo "" + + PYLINT_START_TIME=$(date +'%s') + OUTPUT_FILE="$(mktemp)_pylint_output.log" + ERRORS_FILE="$(mktemp)_pylint_errors.log" + + rm -rf ${OUTPUT_FILE} + rm -rf ${ERRORS_FILE} + + set +e + # When running, filter to only contain the error code lines. Removes module + # header, removes lines of context that show up from some lines. + # Also, don't redirect stderr as this would hide pylint fatal errors. + ${PYLINT_BIN} --rcfile="${PYLINTRC_FILE}" --output-format=parseable \ + --jobs=${NUM_CPUS} ${PYTHON_SRC_FILES} | grep '\[[CEFW]' > ${OUTPUT_FILE} + PYLINT_END_TIME=$(date +'%s') + + echo "" + echo "pylint took $((PYLINT_END_TIME - PYLINT_START_TIME)) s" + echo "" + + # Report only what we care about + # Ref https://pylint.readthedocs.io/en/latest/technical_reference/features.html + # E: all errors + # W0311 bad-indentation + # W0312 mixed-indentation + # C0330 bad-continuation + # C0301 line-too-long + # C0326 bad-whitespace + # W0611 unused-import + # W0622 redefined-builtin + grep -E '(\[E|\[W0311|\[W0312|\[C0330|\[C0301|\[C0326|\[W0611|\[W0622)' ${OUTPUT_FILE} > ${ERRORS_FILE} + + # Determine counts of errors + N_FORBID_ERRORS=$(wc -l ${ERRORS_FILE} | cut -d' ' -f1) + set -e + + # Now, print the errors we should fix + echo "" + if [[ ${N_FORBID_ERRORS} != 0 ]]; then + echo "Found ${N_FORBID_ERRORS} pylint errors:" + cat ${ERRORS_FILE} + fi + + echo "" + if [[ ${N_FORBID_ERRORS} != 0 ]]; then + echo "FAIL: Found ${N_FORBID_ERRORS} errors" + return 1 + else + echo "PASS: Found no errors" + fi +} + +do_pylint "$@" + diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 0000000000000000000000000000000000000000..7eef5309ecdf53125eb976f90c3b62f1a31a55d4 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,39 @@ + # Copyright 2019 The TensorFlow Authors. All Rights Reserved. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # ============================================================================ + # + # THIS IS A GENERATED DOCKERFILE. + # + # This file was assembled from multiple pieces, whose use is documented + # throughout. Please refer to the TensorFlow dockerfiles documentation + # for more information. + +# Number of days of inactivity before an Issue or Pull Request becomes stale +daysUntilStale: 7 +# Number of days of inactivity before a stale Issue or Pull Request is closed +daysUntilClose: 7 +# Only issues or pull requests with all of these labels are checked if stale. Defaults to `[]` (disabled) +onlyLabels: + - stat:awaiting response +# Comment to post when marking as stale. Set to `false` to disable +markComment: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you. +# Comment to post when removing the stale label. Set to `false` to disable +unmarkComment: false +closeComment: > + Closing as stale. Please reopen if you'd like to work on this further. +limitPerRun: 30 +# Limit to only `issues` or `pulls` +only: issues diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..744f440b053ddb5391a827b5406ddb9ad94eccef --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +name: CI +on: pull_request + +jobs: + pylint: + runs-on: ubuntu-latest + + steps: + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install pylint 2.4.4 + run: | + python -m pip install --upgrade pip + pip install pylint==2.4.4 + + - name: Checkout code + uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Fetch master for diff + run: git fetch origin master:master + + - name: Run pylint script + run: bash ./.github/scripts/pylint.sh --incremental diff --git a/CODEOWNERS b/CODEOWNERS index 36b7ebd4e779dc110c53d49ed73cf43f519ca211..3e5bdd0f41abc3495046a390402b4d7fc6df88a4 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,61 +1,26 @@ * @tensorflow/tf-garden-team @tensorflow/tf-model-garden-team /official/ @rachellj218 @saberkun @jaeyounkim -/official/nlp/ @saberkun @chenGitHuber @lehougoogle @rachellj218 -/official/vision/ @pengchongjin @xianzhidu @yeqingli @arashwan @saberkun @rachellj218 -/research/adv_imagenet_models/ @alexeykurakin -/research/adversarial_crypto/ @dave-andersen -/research/adversarial_logit_pairing/ @alexeykurakin +/official/nlp/ @saberkun @chenGitHuber @lehougoogle @rachellj218 @jaeyounkim +/official/vision/ @xianzhidu @yeqingli @arashwan @saberkun @rachellj218 @jaeyounkim +/official/vision/beta/projects/assemblenet/ @mryoo +/official/vision/beta/projects/deepmac_maskrcnn/ @vighneshbirodkar +/official/vision/beta/projects/simclr/ @luotigerlsx @chentingpc @saxenasaurabh /research/adversarial_text/ @rsepassi @a-dai /research/attention_ocr/ @xavigibert /research/audioset/ @plakal @dpwe /research/autoaugment/* @barretzoph -/research/autoencoders/ @snurkabill -/research/brain_coder/ @danabo -/research/cognitive_mapping_and_planning/ @s-gupta -/research/compression/ @nmjohn +/research/cognitive_planning/ @s-gupta /research/cvt_text/ @clarkkev @lmthang -/research/deep_contextual_bandits/ @rikel /research/deep_speech/ @yhliang2018 /research/deeplab/ @aquariusjay @yknzhu @gpapan /research/delf/ @andrefaraujo -/research/domain_adaptation/ @bousmalis @dmrd /research/efficient-hrl/ @ofirnachum -/research/feelvos/ @pvoigtlaender @yuningchai @aquariusjay -/research/fivo/ @dieterichlawson -/research/global_objectives/ @mackeya-google -/research/im2txt/ @cshallue -/research/inception/ @shlens @vincentvanhoucke -/research/keypointnet/ @mnorouzi -/research/learned_optimizer/ @olganw @nirum -/research/learning_to_remember_rare_events/ @lukaszkaiser @ofirnachum -/research/learning_unsupervised_learning/ @lukemetz @nirum -/research/lexnet_nc/ @vered1986 @waterson /research/lfads/ @jazcollins @sussillo -/research/lm_1b/ @oriolvinyals @panyx0718 -/research/lm_commonsense/ @thtrieu /research/lstm_object_detection/ @yinxiaoli @yongzhe2160 /research/marco/ @vincentvanhoucke -/research/maskgan/ @liamb315 @a-dai -/research/namignizer/ @knathanieltucker -/research/neural_gpu/ @lukaszkaiser -/research/neural_programmer/ @arvind2505 -/research/next_frame_prediction/ @panyx0718 /research/object_detection/ @jch1 @tombstone @pkulzc /research/pcl_rl/ @ofirnachum -/research/ptn/ @xcyan @arkanath @hellojas @honglaklee -/research/qa_kg/ @yuyuz -/research/real_nvp/ @laurent-dinh /research/rebar/ @gjtucker -/research/sentiment_analysis/ @sculd -/research/seq2species/ @apbusia @depristo -/research/skip_thoughts/ @cshallue +/research/seq_flow_lite/ @thunderfyc /research/slim/ @sguada @marksandler2 -/research/steve/ @buckman-google -/research/street/ @theraysmith -/research/struct2depth/ @aneliaangelova -/research/swivel/ @waterson -/research/tcn/ @coreylynch @sermanet -/research/textsum/ @panyx0718 @peterjliu -/research/transformer/ @daviddao /research/vid2depth/ @rezama -/research/video_prediction/ @cbfinn diff --git a/README.md b/README.md index 203051feb7acbf3f6501d5c29516841958bedb75..c86ec4a705de8e630167e6113629204fc70dc696 100644 --- a/README.md +++ b/README.md @@ -14,17 +14,6 @@ can take full advantage of TensorFlow for their research and product development ## [Announcements](https://github.com/tensorflow/models/wiki/Announcements) -| Date | News | -|------|------| -| July 10, 2020 | TensorFlow 2 meets the [Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) ([Blog](https://blog.tensorflow.org/2020/07/tensorflow-2-meets-object-detection-api.html)) | -| June 30, 2020 | [SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization](https://github.com/tensorflow/models/tree/master/official/vision/detection#train-a-spinenet-49-based-mask-r-cnn) released ([Tweet](https://twitter.com/GoogleAI/status/1278016712978264064)) | -| June 17, 2020 | [Context R-CNN: Long Term Temporal Context for Per-Camera Object Detection](https://github.com/tensorflow/models/tree/master/research/object_detection#june-17th-2020) released ([Tweet](https://twitter.com/GoogleAI/status/1276571419422253057)) | -| May 21, 2020 | [Unifying Deep Local and Global Features for Image Search (DELG)](https://github.com/tensorflow/models/tree/master/research/delf#delg) code released | -| May 19, 2020 | [MobileDets: Searching for Object Detection Architectures for Mobile Accelerators](https://github.com/tensorflow/models/tree/master/research/object_detection#may-19th-2020) released | -| May 7, 2020 | [MnasFPN with MobileNet-V2 backbone](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#mobile-models) released for object detection | -| May 1, 2020 | [DELF: DEep Local Features](https://github.com/tensorflow/models/tree/master/research/delf) updated to support TensorFlow 2.1 | -| March 31, 2020 | [Introducing the Model Garden for TensorFlow 2](https://blog.tensorflow.org/2020/03/introducing-model-garden-for-tensorflow-2.html) ([Tweet](https://twitter.com/TensorFlow/status/1245029834633297921)) | - ## Contributions [![help wanted:paper implementation](https://img.shields.io/github/issues/tensorflow/models/help%20wanted%3Apaper%20implementation)](https://github.com/tensorflow/models/labels/help%20wanted%3Apaper%20implementation) diff --git a/community/README.md b/community/README.md index c399c31efa667e34443de1e9fb7c834aa644d16b..bf8b0d34fb8de2095c9208e952e096f53512aabf 100644 --- a/community/README.md +++ b/community/README.md @@ -19,6 +19,15 @@ This repository provides a curated list of the GitHub repositories with machine | [ResNet 101](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet101) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | | [ResNet 50](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | | [ResNet 50v1.5](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50v1_5) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference
• FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | +| [EfficientNet](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Classification/ConvNets/efficientnet) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/pdf/1905.11946.pdf) | • Automatic mixed precision
• Horovod Multi-GPU training (NCCL)
• Multi-node training on a Pyxis/Enroot Slurm cluster
• XLA | [NVIDIA](https://github.com/NVIDIA) | + +### Object Detection + +| Model | Paper | Features | Maintainer | +|-------|-------|----------|------------| +| [R-FCN](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/rfcn) | [R-FCN: Object Detection
via Region-based Fully Convolutional Networks](https://arxiv.org/pdf/1605.06409) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | +| [SSD-MobileNet](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-mobilenet) | [MobileNets: Efficient Convolutional Neural Networks
for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861) | • Int8 Inference
• FP32 Inference | [Intel](https://github.com/IntelAI) | +| [SSD-ResNet34](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-resnet34) | [SSD: Single Shot MultiBox Detector](https://arxiv.org/pdf/1512.02325) | • Int8 Inference
• FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | ### Segmentation @@ -27,6 +36,25 @@ This repository provides a curated list of the GitHub repositories with machine | [Mask R-CNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN) | [Mask R-CNN](https://arxiv.org/abs/1703.06870) | • Automatic Mixed Precision
• Multi-GPU training support with Horovod
• TensorRT | [NVIDIA](https://github.com/NVIDIA) | | [U-Net Medical Image Segmentation](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical) | [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) | • Automatic Mixed Precision
• Multi-GPU training support with Horovod
• TensorRT | [NVIDIA](https://github.com/NVIDIA) | +## Natural Language Processing + +| Model | Paper | Features | Maintainer | +|-------|-------|----------|------------| +| [BERT](https://github.com/IntelAI/models/tree/master/benchmarks/language_modeling/tensorflow/bert_large) | [BERT: Pre-training of Deep Bidirectional Transformers
for Language Understanding](https://arxiv.org/pdf/1810.04805) | • FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | +| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/BERT) | [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/pdf/1810.04805) | • Horovod Multi-GPU
• Multi-node with Horovod and Pyxis/Enroot Slurm cluster
• XLA
• Automatic mixed precision
• LAMB | [NVIDIA](https://github.com/NVIDIA) | +| [ELECTRA](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/ELECTRA) | [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/forum?id=r1xMH1BtvB) | • Automatic Mixed Precision
• Multi-GPU training support with Horovod
• Multi-node training on a Pyxis/Enroot Slurm cluster | [NVIDIA](https://github.com/NVIDIA) | +| [GNMT](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/mlperf_gnmt) | [Google’s Neural Machine Translation System:
Bridging the Gap between Human and Machine Translation](https://arxiv.org/pdf/1609.08144) | • FP32 Inference | [Intel](https://github.com/IntelAI) | +| [Transformer-LT (Official)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_lt_official) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Inference | [Intel](https://github.com/IntelAI) | +| [Transformer-LT (MLPerf)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_mlperf) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Training | [Intel](https://github.com/IntelAI) | + +## Recommendation Systems + +| Model | Paper | Features | Maintainer | +|-------|-------|----------|------------| +| [Wide & Deep](https://github.com/IntelAI/models/tree/master/benchmarks/recommendation/tensorflow/wide_deep_large_ds) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • FP32 Inference
• FP32 Training | [Intel](https://github.com/IntelAI) | +| [Wide & Deep](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/WideAndDeep) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • Automatic mixed precision
• Multi-GPU training support with Horovod
• XLA | [NVIDIA](https://github.com/NVIDIA) | +| [DLRM](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/DLRM) | [Deep Learning Recommendation Model for Personalization and Recommendation Systems](https://arxiv.org/pdf/1906.00091.pdf) | • Automatic Mixed Precision
• Hybrid-parallel multiGPU training using Horovod all2all
• Multinode training for Pyxis/Enroot Slurm clusters
• XLA
• Criteo dataset preprocessing with Spark on GPU | [NVIDIA](https://github.com/NVIDIA) | + ## Contributions If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute). diff --git a/official/README-TPU.md b/official/README-TPU.md index 8a54f95314abc2bae40d11acdf5439939acf7583..28a5a0a73d210e9fe6e00db38d0e911e3d771ddf 100644 --- a/official/README-TPU.md +++ b/official/README-TPU.md @@ -23,3 +23,7 @@ be used to classify ImageNet's dataset of 1000 classes. See [Training ResNet on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/resnet-2.x) tutorial and [Tensorboard.dev metrics](https://tensorboard.dev/experiment/CxlDK8YMRrSpYEGtBRpOhg). * [retinanet](vision/detection): A fast and powerful object detector. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/b8NRnWU3TqG6Rw0UxueU6Q). +* [shapemask](vision/detection): An object detection and instance segmentation model using shape priors. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/ZbXgVoc6Rf6mBRlPj0JpLA). + +## Recommendation +* [ncf](recommendation): Neural Collaborative Filtering. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/0k3gKjZlR1ewkVTRyLB6IQ). diff --git a/official/README.md b/official/README.md index 77e43ea9c15e9a18cfee3fb757016cf5091d0c28..c53decf083e302896fc4a7a92525cb2128ef6352 100644 --- a/official/README.md +++ b/official/README.md @@ -19,7 +19,7 @@ In the near future, we will add: * State-of-the-art language understanding models. * State-of-the-art image classification models. -* State-of-the-art objection detection and instance segmentation models. +* State-of-the-art object detection and instance segmentation models. ## Table of Contents @@ -41,6 +41,7 @@ In the near future, we will add: |-------|-------------------| | [MNIST](vision/image_classification) | A basic model to classify digits from the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) | | [ResNet](vision/image_classification) | [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) | +| [ResNet-RS](vision/beta/MODEL_GARDEN.md) | [Revisiting ResNets: Improved Training and Scaling Strategies](https://arxiv.org/abs/2103.07579) | | [EfficientNet](vision/image_classification) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) | #### Object Detection and Segmentation @@ -61,6 +62,7 @@ In the near future, we will add: | [NHNet (News Headline generation model)](nlp/nhnet) | [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386) | | [Transformer](nlp/transformer) | [Attention Is All You Need](https://arxiv.org/abs/1706.03762) | | [XLNet](nlp/xlnet) | [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) | +| [MobileBERT](nlp/projects/mobilebert) | [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) | ### Recommendation @@ -98,17 +100,30 @@ pip3 install tf-nightly #### Method 1: Install the TensorFlow Model Garden pip package -**tf-models-nightly** is the nightly Model Garden package -created daily automatically. pip will install all models -and dependencies automatically. +**tf-models-official** is the stable Model Garden package. +pip will install all models and dependencies automatically. ```shell -pip install tf-models-nightly +pip install tf-models-official +``` + +If you are using nlp packages, please also install **tensorflow-text**: + +```shell +pip install tensorflow-text ``` Please check out our [example](colab/fine_tuning_bert.ipynb) to learn how to use a PIP package. +Note that **tf-models-official** may not include the latest changes in this +github repo. To include latest changes, you may install **tf-models-nightly**, +which is the nightly Model Garden package created daily automatically. + +```shell +pip install tf-models-nightly +``` + #### Method 2: Clone the source 1. Clone the GitHub repository: @@ -136,6 +151,27 @@ os.environ['PYTHONPATH'] += ":/path/to/models" pip3 install --user -r official/requirements.txt ``` +Finally, if you are using nlp packages, please also install +**tensorflow-text-nightly**: + +```shell +pip3 install tensorflow-text-nightly +``` + ## Contributions If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute). + +## Citing TF Official Model Garden + +To cite this repository: + +``` +@software{tfmodels2020github, + author = {Chen Chen and Xianzhi Du and Le Hou and Jaeyoun Kim and Jing Li and + Yeqing Li and Abdullah Rashwan and Fan Yang and Hongkun Yu}, + title = {TensorFlow Official Model Garden}, + url = {https://github.com/tensorflow/models/tree/master/official}, + year = {2020}, +} +``` diff --git a/official/benchmark/benchmark_wrappers.py b/official/benchmark/benchmark_wrappers.py deleted file mode 100644 index 3d38b690c7865e0ab560e59422a2454e44be052d..0000000000000000000000000000000000000000 --- a/official/benchmark/benchmark_wrappers.py +++ /dev/null @@ -1,97 +0,0 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utils to annotate and trace benchmarks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import flags -from absl import logging -from absl.testing import flagsaver - -FLAGS = flags.FLAGS - -flags.DEFINE_multi_string( - 'benchmark_method_flags', None, - 'Optional list of runtime flags of the form key=value. Specify ' - 'multiple times to specify different flags. These will override the FLAGS ' - 'object directly after hardcoded settings in individual benchmark methods ' - 'before they call _run_and_report benchmark. Example if we set ' - '--benchmark_method_flags=train_steps=10 and a benchmark method hardcodes ' - 'FLAGS.train_steps=10000 and later calls _run_and_report_benchmark, ' - 'it\'ll only run for 10 steps. This is useful for ' - 'debugging/profiling workflows.') - - -def enable_runtime_flags(decorated_func): - """Sets attributes from --benchmark_method_flags for method execution. - - @enable_runtime_flags decorator temporarily adds flags passed in via - --benchmark_method_flags and runs the decorated function in that context. - - A user can set --benchmark_method_flags=train_steps=5 to run the benchmark - method in the snippet below with FLAGS.train_steps=5 for debugging (without - modifying the benchmark code). - - class ModelBenchmark(): - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self): - # run benchmark ... - # report benchmark results ... - - def benchmark_method(self): - FLAGS.train_steps = 1000 - ... - self._run_and_report_benchmark() - - Args: - decorated_func: The method that runs the benchmark after previous setup - execution that set some flags. - - Returns: - new_func: The same method which executes in a temporary context where flag - overrides from --benchmark_method_flags are active. - """ - - def runner(*args, **kwargs): - """Creates a temporary context to activate --benchmark_method_flags.""" - if FLAGS.benchmark_method_flags: - saved_flag_values = flagsaver.save_flag_values() - for key_value in FLAGS.benchmark_method_flags: - key, value = key_value.split('=', 1) - try: - numeric_float = float(value) - numeric_int = int(numeric_float) - if abs(numeric_int) == abs(numeric_float): - flag_value = numeric_int - else: - flag_value = numeric_float - except ValueError: - flag_value = value - logging.info('Setting --%s=%s', key, flag_value) - setattr(FLAGS, key, flag_value) - else: - saved_flag_values = None - try: - result = decorated_func(*args, **kwargs) - return result - finally: - if saved_flag_values: - flagsaver.restore_flag_values(saved_flag_values) - - return runner diff --git a/official/benchmark/bert_benchmark.py b/official/benchmark/bert_benchmark.py deleted file mode 100644 index 35daac672ebe87434e99db8c7c3bbcc67a8061e4..0000000000000000000000000000000000000000 --- a/official/benchmark/bert_benchmark.py +++ /dev/null @@ -1,365 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes BERT benchmarks and accuracy tests.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import json -import math -import os -import time - -# pylint: disable=g-bad-import-order -from absl import flags -from absl.testing import flagsaver -import tensorflow as tf -# pylint: enable=g-bad-import-order - -from official.benchmark import bert_benchmark_utils as benchmark_utils -from official.benchmark import owner_utils -from official.nlp.bert import configs -from official.nlp.bert import run_classifier -from official.utils.misc import distribution_utils -from official.benchmark import benchmark_wrappers - -# pylint: disable=line-too-long -PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt' -CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record' -CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record' -CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data' -MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json' -# pylint: enable=line-too-long - -TMP_DIR = os.getenv('TMPDIR') -FLAGS = flags.FLAGS - - -class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase): - """Base class to hold methods common to test classes in the module.""" - - def __init__(self, output_dir=None, tpu=None): - super(BertClassifyBenchmarkBase, self).__init__(output_dir, tpu=tpu) - self.num_epochs = None - self.num_steps_per_epoch = None - FLAGS.steps_per_loop = 1 - - @flagsaver.flagsaver - def _run_bert_classifier(self, callbacks=None, use_ds=True): - """Starts BERT classification task.""" - with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader: - input_meta_data = json.loads(reader.read().decode('utf-8')) - - bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) - epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs - if self.num_steps_per_epoch: - steps_per_epoch = self.num_steps_per_epoch - else: - train_data_size = input_meta_data['train_data_size'] - steps_per_epoch = int(train_data_size / FLAGS.train_batch_size) - warmup_steps = int(epochs * steps_per_epoch * 0.1) - eval_steps = int( - math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size)) - if self.tpu: - strategy = distribution_utils.get_distribution_strategy( - distribution_strategy='tpu', tpu_address=self.tpu) - else: - strategy = distribution_utils.get_distribution_strategy( - distribution_strategy='mirrored' if use_ds else 'off', - num_gpus=self.num_gpus) - - max_seq_length = input_meta_data['max_seq_length'] - train_input_fn = run_classifier.get_dataset_fn( - FLAGS.train_data_path, - max_seq_length, - FLAGS.train_batch_size, - is_training=True) - eval_input_fn = run_classifier.get_dataset_fn( - FLAGS.eval_data_path, - max_seq_length, - FLAGS.eval_batch_size, - is_training=False) - _, summary = run_classifier.run_bert_classifier( - strategy, - bert_config, - input_meta_data, - FLAGS.model_dir, - epochs, - steps_per_epoch, - FLAGS.steps_per_loop, - eval_steps, - warmup_steps, - FLAGS.learning_rate, - FLAGS.init_checkpoint, - train_input_fn, - eval_input_fn, - training_callbacks=False, - custom_callbacks=callbacks) - return summary - - -class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): - """Short benchmark performance tests for BERT model. - - Tests BERT classification performance in different GPU, TPU configurations. - The naming convention of below test cases follow - `benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and - `benchmark_(topology)_tpu_(dataset type)` for TPUs. - """ - - def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs): - super(BertClassifyBenchmarkReal, self).__init__( - output_dir=output_dir, tpu=tpu) - - self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH - self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH - self.bert_config_file = MODEL_CONFIG_FILE_PATH - self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH - - # Since we only care about performance metrics, we limit - # the number of training steps and epochs to prevent unnecessarily - # long tests. - self.num_steps_per_epoch = 100 - self.num_epochs = 1 - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - training_summary_path, - min_accuracy=0, - max_accuracy=1, - use_ds=True): - """Starts BERT performance benchmark test.""" - start_time_sec = time.time() - summary = self._run_bert_classifier( - callbacks=[self.timer_callback], use_ds=use_ds) - wall_time_sec = time.time() - start_time_sec - - # Since we do not load from any pretrained checkpoints, we ignore all - # accuracy metrics. - summary.pop('eval_metrics', None) - summary['start_time_sec'] = start_time_sec - - super(BertClassifyBenchmarkReal, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=min_accuracy, - max_accuracy=max_accuracy) - - def benchmark_1_gpu_mrpc(self): - """Test BERT model performance with 1 GPU.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc') - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - FLAGS.train_batch_size = 4 - FLAGS.eval_batch_size = 4 - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - def benchmark_1_gpu_mrpc_xla(self): - """Test BERT model performance with 1 GPU.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla') - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - FLAGS.train_batch_size = 4 - FLAGS.eval_batch_size = 4 - FLAGS.enable_xla = True - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - def benchmark_1_gpu_mrpc_no_dist_strat(self): - """Test BERT model performance with 1 GPU, no distribution strategy.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat') - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - FLAGS.train_batch_size = 4 - FLAGS.eval_batch_size = 4 - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path, use_ds=False) - - @owner_utils.Owner('tf-model-garden') - def benchmark_8_gpu_mrpc(self): - """Test BERT model performance with 8 GPUs.""" - - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc') - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - def benchmark_1_gpu_amp_mrpc_no_dist_strat(self): - """Performance for 1 GPU no DS with automatic mixed precision.""" - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_amp_mrpc_no_dist_strat') - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - FLAGS.train_batch_size = 4 - FLAGS.eval_batch_size = 4 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path, use_ds=False) - - def benchmark_8_gpu_amp_mrpc(self): - """Test BERT model performance with 8 GPUs with automatic mixed precision.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc') - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - FLAGS.train_batch_size = 32 - FLAGS.eval_batch_size = 32 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path, use_ds=False) - - @owner_utils.Owner('tf-model-garden') - def benchmark_2x2_tpu_mrpc(self): - """Test BERT model performance with 2x2 TPU.""" - - self._setup() - FLAGS.steps_per_loop = 50 - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc') - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - FLAGS.train_batch_size = 32 - FLAGS.eval_batch_size = 32 - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path, use_ds=False) - - -class BertClassifyAccuracy(BertClassifyBenchmarkBase): - """Short accuracy test for BERT model. - - Tests BERT classification task model accuracy. The naming - convention of below test cases follow - `benchmark_(number of gpus)_gpu_(dataset type)` format. - """ - - def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs): - self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH - self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH - self.bert_config_file = MODEL_CONFIG_FILE_PATH - self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH - self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH - - super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - training_summary_path, - min_accuracy=0.84, - max_accuracy=0.88): - """Starts BERT accuracy benchmark test.""" - - start_time_sec = time.time() - summary = self._run_bert_classifier(callbacks=[self.timer_callback]) - wall_time_sec = time.time() - start_time_sec - - super(BertClassifyAccuracy, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=min_accuracy, - max_accuracy=max_accuracy) - - def _setup(self): - super(BertClassifyAccuracy, self)._setup() - FLAGS.train_data_path = self.train_data_path - FLAGS.eval_data_path = self.eval_data_path - FLAGS.input_meta_data_path = self.input_meta_data_path - FLAGS.bert_config_file = self.bert_config_file - FLAGS.init_checkpoint = self.pretrained_checkpoint_path - - @owner_utils.Owner('tf-model-garden') - def benchmark_8_gpu_mrpc(self): - """Run BERT model accuracy test with 8 GPUs. - - Due to comparatively small cardinality of MRPC dataset, training - accuracy metric has high variance between trainings. As so, we - set the wide range of allowed accuracy (84% to 88%). - """ - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc') - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - def benchmark_8_gpu_mrpc_xla(self): - """Run BERT model accuracy test with 8 GPUs with XLA.""" - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla') - FLAGS.enable_xla = True - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - @owner_utils.Owner('tf-model-garden') - def benchmark_2x2_tpu_mrpc(self): - """Run BERT model accuracy test on 2x2 TPU.""" - self._setup() - FLAGS.steps_per_loop = 50 - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc') - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/bert_benchmark_utils.py b/official/benchmark/bert_benchmark_utils.py deleted file mode 100644 index 705a243315616080fe15c70925ed74a905818cdc..0000000000000000000000000000000000000000 --- a/official/benchmark/bert_benchmark_utils.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions or classes shared between BERT benchmarks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time - -# pylint: disable=g-bad-import-order -import numpy as np -from absl import flags -import tensorflow as tf -# pylint: enable=g-bad-import-order - -from official.utils.flags import core as flags_core -from official.benchmark.perfzero_benchmark import PerfZeroBenchmark - -FLAGS = flags.FLAGS - - -class BenchmarkTimerCallback(tf.keras.callbacks.Callback): - """Callback that records time it takes to run each batch.""" - - def __init__(self, num_batches_to_skip=10): - super(BenchmarkTimerCallback, self).__init__() - self.batch_start_times = {} - self.batch_stop_times = {} - - def on_batch_begin(self, batch, logs=None): - self.batch_start_times[batch] = time.time() - - def on_batch_end(self, batch, logs=None): - # If there are multiple steps_per_loop, the end batch index will not be the - # same as the starting index. Use the last starting index instead. - if batch not in self.batch_start_times: - batch = max(self.batch_start_times.keys()) - - self.batch_stop_times[batch] = time.time() - - def get_examples_per_sec(self, batch_size, num_batches_to_skip=1): - batch_durations = [] - for batch in self.batch_start_times: - if batch in self.batch_stop_times and batch >= num_batches_to_skip: - batch_durations.append(self.batch_stop_times[batch] - - self.batch_start_times[batch]) - return batch_size / np.mean(batch_durations) - - def get_startup_time(self, program_start_time): - return self.batch_start_times[0] - program_start_time - - -class BertBenchmarkBase(PerfZeroBenchmark): - """Base class to hold methods common to test classes.""" - local_flags = None - - def __init__(self, output_dir=None, tpu=None, **kwargs): - super(BertBenchmarkBase, self).__init__( - output_dir=output_dir, tpu=tpu, **kwargs) - self.num_gpus = 8 - self.timer_callback = None - - def _setup(self): - """Sets up and resets flags before each test.""" - super(BertBenchmarkBase, self)._setup() - self.timer_callback = BenchmarkTimerCallback() - - def _report_benchmark(self, stats, wall_time_sec, min_accuracy, max_accuracy): - """Report benchmark results by writing to local protobuf file. - - Args: - stats: dict returned from BERT models with known entries. - wall_time_sec: the during of the benchmark execution in seconds - min_accuracy: Minimum classification accuracy constraint to verify - correctness of the model. - max_accuracy: Maximum classification accuracy constraint to verify - correctness of the model. - """ - metrics = [{ - 'name': 'training_loss', - 'value': stats['train_loss'], - }] - if self.timer_callback: - metrics.append({ - 'name': - 'exp_per_second', - 'value': - self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size * - FLAGS.steps_per_loop) - }) - else: - metrics.append({ - 'name': 'exp_per_second', - 'value': 0.0, - }) - if self.timer_callback and 'start_time_sec' in stats: - metrics.append({ - 'name': 'startup_time', - 'value': self.timer_callback.get_startup_time(stats['start_time_sec']) - }) - - if 'eval_metrics' in stats: - metrics.append({ - 'name': 'eval_accuracy', - 'value': stats['eval_metrics'], - 'min_value': min_accuracy, - 'max_value': max_accuracy, - }) - flags_str = flags_core.get_nondefault_flags_as_str() - self.report_benchmark( - iters=stats['total_training_steps'], - wall_time=wall_time_sec, - metrics=metrics, - extras={'flags': flags_str}) diff --git a/official/benchmark/bert_pretrain_benchmark.py b/official/benchmark/bert_pretrain_benchmark.py deleted file mode 100644 index d63c894847d8e9e9308523d3efcb06c162d323c6..0000000000000000000000000000000000000000 --- a/official/benchmark/bert_pretrain_benchmark.py +++ /dev/null @@ -1,179 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes benchmark testing for bert pretraining.""" -# pylint: disable=line-too-long -from __future__ import print_function - -import json -import os -import time -from typing import Optional - -from absl import flags -from absl import logging -import tensorflow as tf # pylint: disable=g-bad-import-order - -from official.benchmark import benchmark_wrappers -from official.benchmark import bert_benchmark_utils -from official.benchmark import owner_utils -from official.nlp.bert import run_pretraining -from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils - -# Pretrain masked lanauge modeling accuracy range: -MIN_MLM_ACCURACY = 0.635 -MAX_MLM_ACCURACY = 0.645 - -# Pretrain next sentence prediction accuracy range: -MIN_NSP_ACCURACY = 0.94 -MAX_NSP_ACCURACY = 0.96 - -BERT_PRETRAIN_FILES_SEQ128 = 'gs://mlcompass-data/bert/pretraining_data/seq_128/wikipedia.tfrecord*,gs://mlcompass-data/bert/pretraining_data/seq_128/books.tfrecord*' -BERT_BASE_CONFIG_FILE = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12/bert_config.json' - -FLAGS = flags.FLAGS - - -class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase): - """Benchmark accuracy tests for BERT Pretraining.""" - - def __init__(self, - output_dir: Optional[str] = None, - tpu: Optional[str] = None, - **kwargs): - """Inits BertPretrainAccuracyBenchmark class. - - Args: - output_dir: Directory where to output e.g. log files - tpu: TPU name to use in a TPU benchmark. - **kwargs: Additional keyword arguments. - """ - super(BertPretrainAccuracyBenchmark, self).__init__( - output_dir=output_dir, tpu=tpu, **kwargs) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, summary_path: str, report_accuracy: bool): - """Runs and reports the benchmark given the provided configuration.""" - distribution = distribution_utils.get_distribution_strategy( - distribution_strategy='tpu', tpu_address=self.tpu) - logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str()) - start_time_sec = time.time() - run_pretraining.run_bert_pretrain( - strategy=distribution, custom_callbacks=self.timer_callback) - wall_time_sec = time.time() - start_time_sec - - with tf.io.gfile.GFile(summary_path, 'rb') as reader: - summary = json.loads(reader.read().decode('utf-8')) - self._report_benchmark(summary, start_time_sec, wall_time_sec, - report_accuracy) - - def _report_benchmark(self, summary, start_time_sec, wall_time_sec, - report_accuracy): - metrics = [{ - 'name': 'train_loss', - 'value': summary['train_loss'], - }, { - 'name': - 'exp_per_second', - 'value': - self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size * - FLAGS.steps_per_loop) - }, { - 'name': 'startup_time', - 'value': self.timer_callback.get_startup_time(start_time_sec) - }] - if report_accuracy: - metrics.extend([{ - 'name': 'masked_lm_accuracy', - 'value': summary['masked_lm_accuracy'], - 'min_value': MIN_MLM_ACCURACY, - 'max_value': MAX_MLM_ACCURACY, - }, { - 'name': 'next_sentence_accuracy', - 'value': summary['next_sentence_accuracy'], - 'min_value': MIN_NSP_ACCURACY, - 'max_value': MAX_NSP_ACCURACY, - }]) - self.report_benchmark( - iters=summary['total_training_steps'], - wall_time=wall_time_sec, - metrics=metrics, - extras={'flags': flags_core.get_nondefault_flags_as_str()}) - - def _specify_common_flags(self): - FLAGS.bert_config_file = BERT_BASE_CONFIG_FILE - FLAGS.train_batch_size = 512 - FLAGS.learning_rate = 1e-4 - FLAGS.warmup_steps = 10000 - FLAGS.steps_per_loop = 10000 - FLAGS.distribution_strategy = 'tpu' - FLAGS.input_files = BERT_PRETRAIN_FILES_SEQ128 - FLAGS.max_seq_length = 128 - FLAGS.max_predictions_per_seq = 20 - FLAGS.dtype = 'bf16' - - @owner_utils.Owner('tf-model-garden') - def benchmark_accuracy_8x8_tpu_bf16_seq128_500k_steps(self): - """Test bert pretraining with 8x8 TPU for 500k steps.""" - # This is used for accuracy test. - self._setup() - self._specify_common_flags() - FLAGS.num_steps_per_epoch = 500000 - FLAGS.num_train_epochs = 1 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_accuracy_8x8_tpu_bf16_seq128_500k_steps') - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - # Set train_summary_interval to -1 to disable training summary, because - # writing summary to gcs may fail and summaries are not needed for this - # accuracy benchmark test. - FLAGS.train_summary_interval = -1 - self._run_and_report_benchmark(summary_path=summary_path, - report_accuracy=True) - - @owner_utils.Owner('tf-model-garden') - def benchmark_perf_4x4_tpu_bf16_seq128_10k_steps(self): - """Test bert pretraining with 4x4 TPU for 10000 steps.""" - self._setup() - self._specify_common_flags() - FLAGS.num_steps_per_epoch = 5000 - FLAGS.num_train_epochs = 2 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_perf_4x4_tpu_bf16_seq128_10k_steps') - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - # Disable accuracy check. - self._run_and_report_benchmark( - summary_path=summary_path, report_accuracy=False) - - @owner_utils.Owner('tf-model-garden') - def benchmark_perf_8x8_tpu_bf16_seq128_10k_steps(self): - """Test bert pretraining with 8x8 TPU for 10000 steps.""" - self._setup() - self._specify_common_flags() - FLAGS.num_steps_per_epoch = 5000 - FLAGS.num_train_epochs = 2 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_perf_8x8_tpu_bf16_seq128_10k_steps') - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - # Disable accuracy check. - self._run_and_report_benchmark(summary_path=summary_path, - report_accuracy=False) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/bert_squad_benchmark.py b/official/benchmark/bert_squad_benchmark.py deleted file mode 100644 index dab90a485b9a2c22d11da82ec1d9c320ea0db114..0000000000000000000000000000000000000000 --- a/official/benchmark/bert_squad_benchmark.py +++ /dev/null @@ -1,608 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes BERT SQuAD benchmarks and accuracy tests.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import os -import time - -# pylint: disable=g-bad-import-order -from absl import flags -from absl import logging -from absl.testing import flagsaver -import tensorflow as tf -# pylint: enable=g-bad-import-order - -from official.benchmark import bert_benchmark_utils as benchmark_utils -from official.benchmark import owner_utils -from official.nlp.bert import run_squad -from official.utils.misc import distribution_utils -from official.utils.misc import keras_utils -from official.benchmark import benchmark_wrappers - - -# pylint: disable=line-too-long -PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt' -SQUAD_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/squad/squad_train.tf_record' -SQUAD_PREDICT_FILE = 'gs://tf-perfzero-data/bert/squad/dev-v1.1.json' -SQUAD_VOCAB_FILE = 'gs://tf-perfzero-data/bert/squad/vocab.txt' -SQUAD_MEDIUM_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/squad/squad_medium_meta_data' -SQUAD_LONG_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/squad/squad_long_meta_data' -SQUAD_FULL_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/squad/squad_full_meta_data' -MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json' -# pylint: enable=line-too-long - -TMP_DIR = os.getenv('TMPDIR') -FLAGS = flags.FLAGS - - -class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase): - """Base class to hold methods common to test classes in the module.""" - - def __init__(self, output_dir=None, tpu=None): - super(BertSquadBenchmarkBase, self).__init__(output_dir=output_dir, tpu=tpu) - - def _read_training_summary_from_file(self): - """Reads the training summary from a file.""" - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - with tf.io.gfile.GFile(summary_path, 'rb') as reader: - return json.loads(reader.read().decode('utf-8')) - - def _read_input_meta_data_from_file(self): - """Reads the input metadata from a file.""" - with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader: - return json.loads(reader.read().decode('utf-8')) - - def _get_distribution_strategy(self, ds_type='mirrored'): - """Gets the distribution strategy. - - Args: - ds_type: String, the distribution strategy type to be used. Can be - 'mirrored', 'multi_worker_mirrored', 'tpu' and 'off'. - - Returns: - A `tf.distribute.DistibutionStrategy` object. - """ - if self.tpu or ds_type == 'tpu': - return distribution_utils.get_distribution_strategy( - distribution_strategy='tpu', tpu_address=self.tpu) - elif ds_type == 'multi_worker_mirrored': - # Configures cluster spec for multi-worker distribution strategy. - _ = distribution_utils.configure_cluster(FLAGS.worker_hosts, - FLAGS.task_index) - return distribution_utils.get_distribution_strategy( - distribution_strategy=ds_type, - num_gpus=self.num_gpus, - all_reduce_alg=FLAGS.all_reduce_alg) - - def _init_gpu_and_data_threads(self): - """Set env variables before any TF calls.""" - if FLAGS.tf_gpu_thread_mode: - keras_utils.set_gpu_thread_mode_and_count( - per_gpu_thread_count=FLAGS.per_gpu_thread_count, - gpu_thread_mode=FLAGS.tf_gpu_thread_mode, - num_gpus=self.num_gpus, - datasets_num_private_threads=FLAGS.datasets_num_private_threads) - - @flagsaver.flagsaver - def _train_squad(self, run_eagerly=False, ds_type='mirrored'): - """Runs BERT SQuAD training. Uses mirrored strategy by default.""" - self._init_gpu_and_data_threads() - input_meta_data = self._read_input_meta_data_from_file() - strategy = self._get_distribution_strategy(ds_type) - - run_squad.train_squad( - strategy=strategy, - input_meta_data=input_meta_data, - run_eagerly=run_eagerly, - custom_callbacks=[self.timer_callback]) - - @flagsaver.flagsaver - def _evaluate_squad(self, ds_type='mirrored'): - """Runs BERT SQuAD evaluation. Uses mirrored strategy by default.""" - self._init_gpu_and_data_threads() - input_meta_data = self._read_input_meta_data_from_file() - strategy = self._get_distribution_strategy(ds_type) - - if input_meta_data.get('version_2_with_negative', False): - logging.error('In memory evaluation result for SQuAD v2 is not accurate') - eval_metrics = run_squad.eval_squad(strategy=strategy, - input_meta_data=input_meta_data) - # Use F1 score as reported evaluation metric. - self.eval_metrics = eval_metrics['final_f1'] - - -class BertSquadBenchmarkReal(BertSquadBenchmarkBase): - """Short benchmark performance tests for BERT SQuAD model. - - Tests BERT SQuAD performance in different GPU configurations. - The naming convention of below test cases follow - `benchmark_(number of gpus)_gpu` format for GPUs and - `benchmark_(topology)_tpu` format for TPUs. - """ - - def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs): - super(BertSquadBenchmarkReal, self).__init__(output_dir=output_dir, tpu=tpu) - - def _setup(self): - """Sets up the benchmark and SQuAD flags.""" - super(BertSquadBenchmarkReal, self)._setup() - FLAGS.train_data_path = SQUAD_TRAIN_DATA_PATH - FLAGS.predict_file = SQUAD_PREDICT_FILE - FLAGS.vocab_file = SQUAD_VOCAB_FILE - FLAGS.bert_config_file = MODEL_CONFIG_FILE_PATH - FLAGS.num_train_epochs = 1 - FLAGS.steps_per_loop = 100 - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - run_eagerly=False, - ds_type='mirrored'): - """Runs the benchmark and reports various metrics.""" - if FLAGS.train_batch_size <= 4 or run_eagerly: - FLAGS.input_meta_data_path = SQUAD_MEDIUM_INPUT_META_DATA_PATH - else: - FLAGS.input_meta_data_path = SQUAD_LONG_INPUT_META_DATA_PATH - start_time_sec = time.time() - self._train_squad(run_eagerly=run_eagerly, ds_type=ds_type) - wall_time_sec = time.time() - start_time_sec - - summary = self._read_training_summary_from_file() - summary['start_time_sec'] = start_time_sec - - super(BertSquadBenchmarkReal, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=0, - max_accuracy=1) - - def benchmark_1_gpu(self): - """Tests BERT SQuAD model performance with 1 GPU.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad') - FLAGS.train_batch_size = 4 - - self._run_and_report_benchmark() - - def benchmark_1_gpu_eager(self): - """Tests BERT SQuAD model performance with 1 GPU.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad_eager') - FLAGS.train_batch_size = 2 - - self._run_and_report_benchmark(run_eagerly=True) - - def benchmark_1_gpu_xla(self): - """Tests BERT SQuAD model performance with 1 GPU with XLA.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla_squad') - # XLA runs out of memory when running with batch size 4. - FLAGS.train_batch_size = 3 - FLAGS.enable_xla = True - - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat(self): - """Tests BERT SQuAD model performance with 1 GPU without DS.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat_squad') - FLAGS.train_batch_size = 4 - - self._run_and_report_benchmark(ds_type='off') - - def benchmark_1_gpu_eager_no_dist_strat(self): - """Tests BERT SQuAD model performance with 1 GPU with eager execution.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_eager_no_dist_strat_squad') - FLAGS.train_batch_size = 4 - - self._run_and_report_benchmark(ds_type='off', run_eagerly=True) - - @owner_utils.Owner('tf-model-garden') - def benchmark_8_gpu(self): - """Tests BERT SQuAD model performance with 8 GPUs.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad') - FLAGS.train_batch_size = 24 - FLAGS.tf_gpu_thread_mode = 'gpu_private' - - self._run_and_report_benchmark() - - def benchmark_1_gpu_fp16_eager(self): - """Tests BERT SQuAD model performance with 1 GPU and FP16.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad_fp16_eager') - FLAGS.train_batch_size = 4 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 'dynamic' - - self._run_and_report_benchmark(run_eagerly=True) - - def benchmark_1_gpu_fp16(self): - """Tests BERT SQuAD model performance with 1 GPU and FP16.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad_fp16') - FLAGS.train_batch_size = 4 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 'dynamic' - - self._run_and_report_benchmark() - - def benchmark_1_gpu_xla_fp16(self): - """Tests BERT SQuAD model performance with 1 GPU with XLA and FP16.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla_squad_fp16') - FLAGS.train_batch_size = 4 - FLAGS.enable_xla = True - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 'dynamic' - - self._run_and_report_benchmark() - - def benchmark_8_gpu_fp16(self): - """Tests BERT SQuAD model performance with 8 GPUs.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad_fp16') - FLAGS.train_batch_size = 32 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 'dynamic' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - - self._run_and_report_benchmark() - - def benchmark_8_gpu_xla_fp16(self): - """Tests BERT SQuAD model performance with 8 GPUs with XLA.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad_fp16') - FLAGS.train_batch_size = 32 - FLAGS.enable_xla = True - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 'dynamic' - - self._run_and_report_benchmark() - - def benchmark_1_gpu_amp(self): - """Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp_squad') - FLAGS.train_batch_size = 4 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - - self._run_and_report_benchmark() - - def benchmark_8_gpu_amp(self): - """Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_squad') - FLAGS.train_batch_size = 32 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - - self._run_and_report_benchmark() - - @owner_utils.Owner('tf-model-garden') - def benchmark_2x2_tpu(self): - """Tests BERT SQuAD model performance with 2x2 TPU.""" - - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu') - FLAGS.train_batch_size = 48 - FLAGS.predict_batch_size = 48 - FLAGS.mode = 'train' - FLAGS.learning_rate = 8e-5 - FLAGS.num_train_epochs = 1 - FLAGS.steps_per_loop = 100 - FLAGS.do_lower_case = True - FLAGS.init_checkpoint = PRETRAINED_CHECKPOINT_PATH - self._run_and_report_benchmark() - - -class BertSquadAccuracy(BertSquadBenchmarkBase): - """Short accuracy test for BERT SQuAD model. - - Tests BERT SQuAD accuracy. The naming convention of below test cases follow - `benchmark_(number of gpus)_gpu` format for GPUs and - `benchmark_(topology)_tpu` format for TPUs. - """ - - def __init__(self, output_dir=None, tpu=None, **kwargs): - super(BertSquadAccuracy, self).__init__(output_dir=output_dir, tpu=tpu) - - def _setup(self): - """Sets up the benchmark and SQuAD flags.""" - super(BertSquadAccuracy, self)._setup() - FLAGS.train_data_path = SQUAD_TRAIN_DATA_PATH - FLAGS.predict_file = SQUAD_PREDICT_FILE - FLAGS.vocab_file = SQUAD_VOCAB_FILE - FLAGS.input_meta_data_path = SQUAD_FULL_INPUT_META_DATA_PATH - FLAGS.bert_config_file = MODEL_CONFIG_FILE_PATH - FLAGS.init_checkpoint = PRETRAINED_CHECKPOINT_PATH - FLAGS.num_train_epochs = 2 - FLAGS.steps_per_loop = 100 - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - run_eagerly=False, - ds_type='mirrored'): - """Runs the benchmark and reports various metrics.""" - start_time_sec = time.time() - self._train_squad(run_eagerly=run_eagerly, ds_type=ds_type) - self._evaluate_squad(ds_type=ds_type) - wall_time_sec = time.time() - start_time_sec - - summary = self._read_training_summary_from_file() - summary['eval_metrics'] = self.eval_metrics - summary['start_time_sec'] = start_time_sec - - super(BertSquadAccuracy, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=0.900, - max_accuracy=0.920) - - def benchmark_1_gpu_eager(self): - """Tests BERT SQuAD model accuracy with 1 GPU with eager execution.""" - - self._setup() - self.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad_eager') - FLAGS.train_batch_size = 4 - - self._run_and_report_benchmark(ds_type='off', run_eagerly=True) - - @owner_utils.Owner('tf-model-garden') - def benchmark_8_gpu(self): - """Tests BERT SQuAD model accuracy with 8 GPUs.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad') - FLAGS.train_batch_size = 24 - FLAGS.tf_gpu_thread_mode = 'gpu_private' - - self._run_and_report_benchmark() - - def benchmark_8_gpu_fp16(self): - """Tests BERT SQuAD model accuracy with 8 GPUs and FP16.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad_fp16') - FLAGS.train_batch_size = 32 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 'dynamic' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - - self._run_and_report_benchmark() - - def benchmark_8_gpu_xla(self): - """Tests BERT SQuAD model accuracy with 8 GPUs.""" - - self._setup() - self.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad_xla') - FLAGS.train_batch_size = 32 - FLAGS.enable_xla = True - FLAGS.tf_gpu_thread_mode = 'gpu_private' - - self._run_and_report_benchmark() - - @owner_utils.Owner('tf-model-garden') - def benchmark_2x2_tpu(self): - """Tests BERT SQuAD model accuracy with 2x2 TPU.""" - - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu') - FLAGS.train_batch_size = 48 - - self._run_and_report_benchmark() - - -class BertSquadMultiWorkerAccuracy(BertSquadBenchmarkBase): - """BERT SQuAD distributed accuracy tests with multiple workers.""" - - def __init__(self, output_dir=None, tpu=None, **kwargs): - super(BertSquadMultiWorkerAccuracy, self).__init__( - output_dir=output_dir, tpu=tpu) - - def _setup(self): - """Sets up the benchmark and SQuAD flags.""" - super(BertSquadMultiWorkerAccuracy, self)._setup() - FLAGS.train_data_path = SQUAD_TRAIN_DATA_PATH - FLAGS.predict_file = SQUAD_PREDICT_FILE - FLAGS.vocab_file = SQUAD_VOCAB_FILE - FLAGS.input_meta_data_path = SQUAD_FULL_INPUT_META_DATA_PATH - FLAGS.bert_config_file = MODEL_CONFIG_FILE_PATH - FLAGS.init_checkpoint = PRETRAINED_CHECKPOINT_PATH - FLAGS.num_train_epochs = 2 - FLAGS.steps_per_loop = 100 - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - use_ds=True, - run_eagerly=False): - """Runs the benchmark and reports various metrics.""" - start_time_sec = time.time() - self._train_squad(run_eagerly=run_eagerly, - ds_type='multi_worker_mirrored') - self._evaluate_squad(ds_type='multi_worker_mirrored') - wall_time_sec = time.time() - start_time_sec - - summary = self._read_training_summary_from_file() - summary['eval_metrics'] = self.eval_metrics - - super(BertSquadMultiWorkerAccuracy, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=0.900, - max_accuracy=0.920) - - def _benchmark_common(self, num_workers, all_reduce_alg): - """Common to all benchmarks in this class.""" - self._setup() - - num_gpus = 8 - FLAGS.num_gpus = num_gpus - FLAGS.dtype = 'fp16' - FLAGS.enable_xla = False - FLAGS.distribution_strategy = 'multi_worker_mirrored' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 32 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_8_gpu_{}_worker_fp16_{}_tweaked'.format( - num_workers, all_reduce_alg)) - FLAGS.train_batch_size = 4 * num_gpus * num_workers - FLAGS.all_reduce_alg = all_reduce_alg - - self._run_and_report_benchmark() - - def benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked(self): - """8 GPUs per worker, 2 workers, fp16, ring all-reduce.""" - self._benchmark_common(num_workers=2, all_reduce_alg='ring') - - def benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked(self): - """8 GPUs per worker, 2 workers, fp16, nccl all-reduce.""" - self._benchmark_common(num_workers=2, all_reduce_alg='nccl') - - def benchmark_8_gpu_8_workers_fp16_ring_tweaked(self): - """8 GPUs per worker, 8 workers, fp16, ring all-reduce.""" - self._benchmark_common(num_workers=8, all_reduce_alg='ring') - - def benchmark_8_gpu_8_workers_fp16_nccl_tweaked(self): - """8 GPUs per worker, 8 workers, fp16, nccl all-reduce.""" - self._benchmark_common(num_workers=8, all_reduce_alg='nccl') - - -class BertSquadMultiWorkerBenchmark(BertSquadBenchmarkBase): - """BERT SQuAD distributed benchmark tests with multiple workers.""" - - def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs): - super(BertSquadMultiWorkerBenchmark, self).__init__( - output_dir=output_dir, tpu=tpu) - - def _setup(self): - """Sets up the benchmark and SQuAD flags.""" - super(BertSquadMultiWorkerBenchmark, self)._setup() - FLAGS.train_data_path = SQUAD_TRAIN_DATA_PATH - FLAGS.predict_file = SQUAD_PREDICT_FILE - FLAGS.vocab_file = SQUAD_VOCAB_FILE - FLAGS.input_meta_data_path = SQUAD_FULL_INPUT_META_DATA_PATH - FLAGS.bert_config_file = MODEL_CONFIG_FILE_PATH - FLAGS.num_train_epochs = 1 - FLAGS.steps_per_loop = 100 - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - use_ds=True, - run_eagerly=False): - """Runs the benchmark and reports various metrics.""" - if FLAGS.train_batch_size <= 4 * 8: - FLAGS.input_meta_data_path = SQUAD_LONG_INPUT_META_DATA_PATH - else: - FLAGS.input_meta_data_path = SQUAD_FULL_INPUT_META_DATA_PATH - start_time_sec = time.time() - self._train_squad(run_eagerly=run_eagerly, - ds_type='multi_worker_mirrored') - wall_time_sec = time.time() - start_time_sec - - summary = self._read_training_summary_from_file() - summary['start_time_sec'] = start_time_sec - - super(BertSquadMultiWorkerBenchmark, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=0, - max_accuracy=1) - - def _benchmark_common(self, num_workers, all_reduce_alg): - """Common to all benchmarks in this class.""" - self._setup() - - num_gpus = 8 - FLAGS.num_gpus = num_gpus - FLAGS.dtype = 'fp16' - FLAGS.enable_xla = False - FLAGS.distribution_strategy = 'multi_worker_mirrored' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 32 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_8_gpu_{}_worker_fp16_{}_tweaked'.format( - num_workers, all_reduce_alg)) - FLAGS.train_batch_size = 4 * num_gpus * num_workers - FLAGS.all_reduce_alg = all_reduce_alg - - self._run_and_report_benchmark() - - def benchmark_8_gpu_1_worker_fp16_ring_tweaked(self): - """8 GPUs per worker, 1 worker, fp16, ring all-reduce.""" - self._benchmark_common(num_workers=1, all_reduce_alg='ring') - - def benchmark_8_gpu_1_worker_fp16_nccl_tweaked(self): - """8 GPUs per worker, 1 worker, fp16, nccl all-reduce.""" - self._benchmark_common(num_workers=1, all_reduce_alg='nccl') - - def benchmark_8_gpu_2_workers_fp16_ring_tweaked(self): - """8 GPUs per worker, 2 workers, fp16, ring all-reduce.""" - self._benchmark_common(num_workers=2, all_reduce_alg='ring') - - def benchmark_8_gpu_2_workers_fp16_nccl_tweaked(self): - """8 GPUs per worker, 2 workers, fp16, nccl all-reduce.""" - self._benchmark_common(num_workers=2, all_reduce_alg='nccl') - - def benchmark_8_gpu_8_workers_fp16_ring_tweaked(self): - """8 GPUs per worker, 8 workers, fp16, ring all-reduce.""" - self._benchmark_common(num_workers=8, all_reduce_alg='ring') - - def benchmark_8_gpu_8_workers_fp16_nccl_tweaked(self): - """8 GPUs per worker, 8 workers, fp16, nccl all-reduce.""" - self._benchmark_common(num_workers=8, all_reduce_alg='nccl') - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/datastore/schema/benchmark_metric.json b/official/benchmark/datastore/schema/benchmark_metric.json deleted file mode 100644 index cc571d480605241e7c71d2e4cabdaf6ad3da9295..0000000000000000000000000000000000000000 --- a/official/benchmark/datastore/schema/benchmark_metric.json +++ /dev/null @@ -1,56 +0,0 @@ -[ - { - "description": "The ID of the benchmark run, where this metric should tie to.", - "mode": "REQUIRED", - "name": "run_id", - "type": "STRING" - }, - { - "description": "The name of the metric, which should be descriptive. E.g. training_loss, accuracy.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The unit of the metric. E.g. MB per sec.", - "mode": "NULLABLE", - "name": "unit", - "type": "STRING" - }, - { - "description": "The value of the metric.", - "mode": "NULLABLE", - "name": "value", - "type": "FLOAT" - }, - { - "description": "The timestamp when the metric is recorded.", - "mode": "REQUIRED", - "name": "timestamp", - "type": "TIMESTAMP" - }, - { - "description": "The global step when this metric is recorded.", - "mode": "NULLABLE", - "name": "global_step", - "type": "INTEGER" - }, - { - "description": "Free format metadata for the extra information about the metric.", - "mode": "REPEATED", - "name": "extras", - "type": "RECORD", - "fields": [ - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "value", - "type": "STRING" - } - ] - } -] diff --git a/official/benchmark/datastore/schema/benchmark_run.json b/official/benchmark/datastore/schema/benchmark_run.json deleted file mode 100644 index 58e5ddcadeff98b05c328c2798071f9cd73ef9d2..0000000000000000000000000000000000000000 --- a/official/benchmark/datastore/schema/benchmark_run.json +++ /dev/null @@ -1,368 +0,0 @@ -[ - { - "description": "The UUID of the run for the benchmark.", - "mode": "REQUIRED", - "name": "model_id", - "type": "STRING" - }, - { - "description": "The name of the model, E.g ResNet50, LeNet-5 etc.", - "mode": "REQUIRED", - "name": "model_name", - "type": "STRING" - }, - { - "description": "The date when the test of the model is started", - "mode": "REQUIRED", - "name": "run_date", - "type": "TIMESTAMP" - }, - { - "description": "The unique name for a test by the combination of key parameters, eg batch size, num of GPU, etc. It is hardware independent.", - "mode": "NULLABLE", - "name": "test_id", - "type": "STRING" - }, - { - "description": "The tensorflow version information.", - "fields": [ - { - "description": "Version of the tensorflow. E.g. 1.7.0-rc0", - "mode": "REQUIRED", - "name": "version", - "type": "STRING" - }, - { - "description": "Git Hash of the tensorflow", - "mode": "NULLABLE", - "name": "git_hash", - "type": "STRING" - }, - { - "description": "The channel of the tensorflow binary, eg, nightly, RC, final, custom.", - "mode": "NULLABLE", - "name": "channel", - "type": "STRING" - }, - { - "description": "Identify anything special about the build, eg CUDA 10, NCCL, MKL, etc.", - "mode": "NULLABLE", - "name": "build_type", - "type": "STRING" - } - ], - "mode": "REQUIRED", - "name": "tensorflow_version", - "type": "RECORD" - }, - { - "description": "The arbitrary attribute of the model.", - "fields": [ - { - "description": "The name of the attribute.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The value of the attribute.", - "mode": "NULLABLE", - "name": "value", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "attribute", - "type": "RECORD" - }, - { - "description": "Environment variables when the benchmark run is executed.", - "fields": [ - { - "description": "The name of the variable.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The value of the variable.", - "mode": "NULLABLE", - "name": "value", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "environment_variable", - "type": "RECORD" - }, - { - "description": "TF Environment variables when the benchmark run is executed.", - "fields": [ - { - "description": "The name of the variable.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The value of the variable.", - "mode": "NULLABLE", - "name": "value", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "tensorflow_environment_variables", - "type": "RECORD" - }, - { - "description": "The list of parameters run with the model. It could contain hyperparameters or others.", - "fields": [ - { - "description": "The name of the parameter.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The string value of the parameter.", - "mode": "NULLABLE", - "name": "string_value", - "type": "STRING" - }, - { - "description": "The bool value of the parameter.", - "mode": "NULLABLE", - "name": "bool_value", - "type": "STRING" - }, - { - "description": "The int/long value of the parameter.", - "mode": "NULLABLE", - "name": "long_value", - "type": "INTEGER" - }, - { - "description": "The double/float value of parameter.", - "mode": "NULLABLE", - "name": "float_value", - "type": "FLOAT" - } - ], - "mode": "REPEATED", - "name": "run_parameters", - "type": "RECORD" - }, - { - "description": "The dataset that run with the benchmark.", - "mode": "NULLABLE", - "name": "dataset", - "type": "RECORD", - "fields": [ - { - "description": "The name of the dataset that the model is trained/validated with. E.g ImageNet, mnist.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The arbitrary attribute of the dataset.", - "fields": [ - { - "description": "The name of the attribute.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The value of the attribute.", - "mode": "NULLABLE", - "name": "value", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "attribute", - "type": "RECORD" - } - ] - }, - { - "description": "Used to differentiate from AWS, GCE or DGX-1 at a high level", - "mode": "NULLABLE", - "name": "test_environment", - "type": "STRING" - }, - { - "description": "The machine configuration of the benchmark run.", - "mode": "NULLABLE", - "name": "machine_config", - "type": "RECORD", - "fields": [ - { - "description": "The platform information of the benchmark run.", - "mode": "NULLABLE", - "name": "platform_info", - "type": "RECORD", - "fields": [ - { - "description": "Eg: 64bit.", - "mode": "NULLABLE", - "name": "bits", - "type": "STRING" - }, - { - "description": "Eg: ELF.", - "mode": "NULLABLE", - "name": "linkage", - "type": "STRING" - }, - { - "description": "Eg: i386.", - "mode": "NULLABLE", - "name": "machine", - "type": "STRING" - }, - { - "description": "Eg: 3.13.0-76-generic.", - "mode": "NULLABLE", - "name": "release", - "type": "STRING" - }, - { - "description": "Eg: Linux.", - "mode": "NULLABLE", - "name": "system", - "type": "STRING" - }, - { - "description": "Eg: #120-Ubuntu SMP Mon Jan 18 15:59:10 UTC 2016.", - "mode": "NULLABLE", - "name": "version", - "type": "STRING" - } - ] - }, - { - "description": "The CPU information of the benchmark run.", - "mode": "NULLABLE", - "name": "cpu_info", - "type": "RECORD", - "fields": [ - { - "mode": "NULLABLE", - "name": "num_cores", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "num_cores_allowed", - "type": "INTEGER" - }, - { - "description" : "How fast are those CPUs.", - "mode": "NULLABLE", - "name": "mhz_per_cpu", - "type": "FLOAT" - }, - { - "description" : "Additional CPU info, Eg: Intel Ivybridge with HyperThreading (24 cores).", - "mode": "NULLABLE", - "name": "cpu_info", - "type": "STRING" - }, - { - "description" : "What kind of cpu scaling is enabled on the host. Eg performance, ondemand, conservative, mixed.", - "mode": "NULLABLE", - "name": "cpu_governor", - "type": "STRING" - }, - { - "description": "Cache size of the CPUs.", - "mode": "NULLABLE", - "name": "cache_size", - "type": "RECORD", - "fields": [ - { - "mode": "NULLABLE", - "name": "level", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "size", - "type": "INTEGER" - } - ] - } - ] - }, - { - "mode": "NULLABLE", - "name": "gpu_info", - "type": "RECORD", - "fields": [ - { - "mode": "NULLABLE", - "name": "count", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "model", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "cuda_version", - "type": "STRING" - } - ] - }, - { - "description": "The cloud instance inforation if the benchmark run is executed on cloud", - "mode": "NULLABLE", - "name": "cloud_info", - "type": "RECORD", - "fields": [ - { - "description": "The instance type, E.g. n1-standard-4.", - "mode": "NULLABLE", - "name": "instance_type", - "type": "STRING" - }, - { - "description": "The arbitrary attribute of the cloud info.", - "fields": [ - { - "description": "The name of the attribute.", - "mode": "REQUIRED", - "name": "name", - "type": "STRING" - }, - { - "description": "The value of the attribute.", - "mode": "NULLABLE", - "name": "value", - "type": "STRING" - } - ], - "mode": "REPEATED", - "name": "attribute", - "type": "RECORD" - } - ] - }, - { - "mode": "NULLABLE", - "name": "memory_total", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "memory_available", - "type": "STRING" - } - ] - } -] diff --git a/official/benchmark/datastore/schema/benchmark_run_status.json b/official/benchmark/datastore/schema/benchmark_run_status.json deleted file mode 100644 index f7ac59eb8042c181e8996d9e1a0e7ee79f6f0343..0000000000000000000000000000000000000000 --- a/official/benchmark/datastore/schema/benchmark_run_status.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "description": "The UUID of the run for the benchmark.", - "mode": "REQUIRED", - "name": "run_id", - "type": "STRING" - }, - { - "description": "The status of the run for the benchmark. Eg, running, failed, success", - "mode": "REQUIRED", - "name": "status", - "type": "STRING" - } -] \ No newline at end of file diff --git a/official/benchmark/keras_benchmark.py b/official/benchmark/keras_benchmark.py deleted file mode 100644 index 770674ac658f213d614f0a3704a0bbb200bb94aa..0000000000000000000000000000000000000000 --- a/official/benchmark/keras_benchmark.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes Keras benchmarks and accuracy tests.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from official.benchmark.perfzero_benchmark import PerfZeroBenchmark -from official.utils.flags import core as flags_core - - -class KerasBenchmark(PerfZeroBenchmark): - """Base benchmark class with methods to simplify testing.""" - - def __init__(self, - output_dir=None, - default_flags=None, - flag_methods=None, - tpu=None): - super(KerasBenchmark, self).__init__( - output_dir=output_dir, - default_flags=default_flags, - flag_methods=flag_methods, - tpu=tpu) - - def _report_benchmark(self, - stats, - wall_time_sec, - top_1_max=None, - top_1_min=None, - log_steps=None, - total_batch_size=None, - warmup=1, - start_time_sec=None): - """Report benchmark results by writing to local protobuf file. - - Args: - stats: dict returned from keras models with known entries. - wall_time_sec: the during of the benchmark execution in seconds - top_1_max: highest passing level for top_1 accuracy. - top_1_min: lowest passing level for top_1 accuracy. - log_steps: How often the log was created for stats['step_timestamp_log']. - total_batch_size: Global batch-size. - warmup: number of entries in stats['step_timestamp_log'] to ignore. - start_time_sec: the start time of the program in seconds since epoch - """ - - metrics = [] - if 'accuracy_top_1' in stats: - metrics.append({'name': 'accuracy_top_1', - 'value': stats['accuracy_top_1'], - 'min_value': top_1_min, - 'max_value': top_1_max}) - metrics.append({'name': 'top_1_train_accuracy', - 'value': stats['training_accuracy_top_1']}) - - if (warmup and 'step_timestamp_log' in stats and - len(stats['step_timestamp_log']) > warmup): - # first entry in the time_log is start of step 1. The rest of the - # entries are the end of each step recorded - time_log = stats['step_timestamp_log'] - elapsed = time_log[-1].timestamp - time_log[warmup].timestamp - num_examples = ( - total_batch_size * log_steps * (len(time_log) - warmup - 1)) - examples_per_sec = num_examples / elapsed - metrics.append({'name': 'exp_per_second', - 'value': examples_per_sec}) - - if 'avg_exp_per_second' in stats: - metrics.append({'name': 'avg_exp_per_second', - 'value': stats['avg_exp_per_second']}) - - if start_time_sec and 'step_timestamp_log' in stats: - time_log = stats['step_timestamp_log'] - # time_log[0] is recorded at the beginning of the first step. - startup_time = time_log[0].timestamp - start_time_sec - metrics.append({'name': 'startup_time', 'value': startup_time}) - - flags_str = flags_core.get_nondefault_flags_as_str() - self.report_benchmark( - iters=-1, - wall_time=wall_time_sec, - metrics=metrics, - extras={'flags': flags_str}) diff --git a/official/benchmark/keras_cifar_benchmark.py b/official/benchmark/keras_cifar_benchmark.py deleted file mode 100644 index 694200f66678a1bc9bc44194377a52489a1b97f3..0000000000000000000000000000000000000000 --- a/official/benchmark/keras_cifar_benchmark.py +++ /dev/null @@ -1,402 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes Keras benchmarks and accuracy tests.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -from absl import flags -import tensorflow as tf # pylint: disable=g-bad-import-order - -from official.benchmark import keras_benchmark -from official.benchmark import benchmark_wrappers -from official.benchmark.models import resnet_cifar_main - -MIN_TOP_1_ACCURACY = 0.929 -MAX_TOP_1_ACCURACY = 0.938 - -FLAGS = flags.FLAGS -CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin' - - -class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): - """Accuracy tests for ResNet56 Keras CIFAR-10.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - """A benchmark class. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - - self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME) - flag_methods = [resnet_cifar_main.define_cifar_flags] - - super(Resnet56KerasAccuracy, self).__init__( - output_dir=output_dir, flag_methods=flag_methods) - - def _setup(self): - super(Resnet56KerasAccuracy, self)._setup() - FLAGS.use_tensor_lr = False - - def benchmark_graph_1_gpu(self): - """Test keras based model with Keras fit and distribution strategies.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu') - FLAGS.dtype = 'fp32' - self._run_and_report_benchmark() - - def benchmark_1_gpu(self): - """Test keras based model with eager and distribution strategies.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - self._run_and_report_benchmark() - - def benchmark_cpu(self): - """Test keras based model on CPU.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_cpu') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - def benchmark_cpu_no_dist_strat(self): - """Test keras based model on CPU without distribution strategies.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'off' - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - def benchmark_cpu_no_dist_strat_run_eagerly(self): - """Test keras based model on CPU w/forced eager and no dist_strat.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_cpu_no_dist_strat_run_eagerly') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat(self): - """Test keras based model with eager and no dist strat.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly(self): - """Test keras based model w/forced eager and no dist_strat.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_graph_1_gpu_no_dist_strat(self): - """Test keras based model with Keras fit but not distribution strategies.""" - self._setup() - FLAGS.distribution_strategy = 'off' - FLAGS.num_gpus = 1 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat') - FLAGS.dtype = 'fp32' - self._run_and_report_benchmark() - - def benchmark_2_gpu(self): - """Test keras based model with eager and distribution strategies.""" - self._setup() - FLAGS.num_gpus = 2 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - self._run_and_report_benchmark() - - def benchmark_graph_2_gpu(self): - """Test keras based model with Keras fit and distribution strategies.""" - self._setup() - FLAGS.num_gpus = 2 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 - FLAGS.train_epochs = 182 - FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu') - FLAGS.dtype = 'fp32' - self._run_and_report_benchmark() - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self): - start_time_sec = time.time() - stats = resnet_cifar_main.run(FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(Resnet56KerasAccuracy, self)._report_benchmark( - stats, - wall_time_sec, - top_1_min=MIN_TOP_1_ACCURACY, - top_1_max=MAX_TOP_1_ACCURACY, - total_batch_size=FLAGS.batch_size, - log_steps=100) - - -class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): - """Short performance tests for ResNet56 via Keras and CIFAR-10.""" - - def __init__(self, output_dir=None, default_flags=None): - flag_methods = [resnet_cifar_main.define_cifar_flags] - - super(Resnet56KerasBenchmarkBase, self).__init__( - output_dir=output_dir, - flag_methods=flag_methods, - default_flags=default_flags) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self): - start_time_sec = time.time() - stats = resnet_cifar_main.run(FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(Resnet56KerasBenchmarkBase, self)._report_benchmark( - stats, - wall_time_sec, - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_1_gpu(self): - """Test 1 gpu.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu_xla(self): - """Test 1 gpu with xla enabled.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = False - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_graph_1_gpu(self): - """Test 1 gpu graph.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.enable_eager = False - FLAGS.run_eagerly = False - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat(self): - """Test 1 gpu without distribution strategies.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_graph_1_gpu_no_dist_strat(self): - """Test 1 gpu graph mode without distribution strategies.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.enable_eager = False - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly(self): - """Test 1 gpu without distribution strategy and forced eager.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 128 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_2_gpu(self): - """Test 2 gpu.""" - self._setup() - FLAGS.num_gpus = 2 - FLAGS.enable_eager = True - FLAGS.run_eagerly = False - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu') - FLAGS.batch_size = 128 * 2 # 2 GPUs - self._run_and_report_benchmark() - - def benchmark_graph_2_gpu(self): - """Test 2 gpu graph mode.""" - self._setup() - FLAGS.num_gpus = 2 - FLAGS.enable_eager = False - FLAGS.run_eagerly = False - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu') - FLAGS.batch_size = 128 * 2 # 2 GPUs - self._run_and_report_benchmark() - - def benchmark_cpu(self): - """Test cpu.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.enable_eager = True - FLAGS.model_dir = self._get_model_dir('benchmark_cpu') - FLAGS.batch_size = 128 - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - def benchmark_graph_cpu(self): - """Test cpu graph mode.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.enable_eager = False - FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu') - FLAGS.batch_size = 128 - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - def benchmark_cpu_no_dist_strat_run_eagerly(self): - """Test cpu without distribution strategy and forced eager.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.distribution_strategy = 'off' - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.model_dir = self._get_model_dir( - 'benchmark_cpu_no_dist_strat_run_eagerly') - FLAGS.batch_size = 128 - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - def benchmark_cpu_no_dist_strat(self): - """Test cpu without distribution strategies.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat') - FLAGS.batch_size = 128 - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - def benchmark_graph_cpu_no_dist_strat(self): - """Test cpu graph mode without distribution strategies.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.enable_eager = False - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu_no_dist_strat') - FLAGS.batch_size = 128 - FLAGS.data_format = 'channels_last' - self._run_and_report_benchmark() - - -class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase): - """Synthetic benchmarks for ResNet56 and Keras.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - default_flags = {} - default_flags['skip_eval'] = True - default_flags['use_synthetic_data'] = True - default_flags['train_steps'] = 110 - default_flags['log_steps'] = 10 - default_flags['use_tensor_lr'] = False - - super(Resnet56KerasBenchmarkSynth, self).__init__( - output_dir=output_dir, default_flags=default_flags) - - -class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase): - """Real data benchmarks for ResNet56 and Keras.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - default_flags = {} - default_flags['skip_eval'] = True - default_flags['data_dir'] = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME) - default_flags['train_steps'] = 110 - default_flags['log_steps'] = 10 - default_flags['use_tensor_lr'] = False - - super(Resnet56KerasBenchmarkReal, self).__init__( - output_dir=output_dir, default_flags=default_flags) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/keras_imagenet_benchmark.py b/official/benchmark/keras_imagenet_benchmark.py deleted file mode 100644 index 63a48dfb1222b65311652e3bee4241854a55043e..0000000000000000000000000000000000000000 --- a/official/benchmark/keras_imagenet_benchmark.py +++ /dev/null @@ -1,1724 +0,0 @@ -# Lint as: python3 -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes Keras benchmarks and accuracy tests.""" -# pylint: disable=line-too-long -from __future__ import print_function - -import json -import os -import time - -from typing import Any, MutableMapping, Optional - -from absl import flags -import tensorflow as tf # pylint: disable=g-bad-import-order - -from official.benchmark import benchmark_wrappers -from official.benchmark import keras_benchmark -from official.benchmark.models import resnet_imagenet_main -from official.vision.image_classification import classifier_trainer - -MIN_TOP_1_ACCURACY = 0.76 -MAX_TOP_1_ACCURACY = 0.77 - -MOBILENET_V1_MIN_TOP_1_ACCURACY = 0.65 -MOBILENET_V1_MAX_TOP_1_ACCURACY = 0.68 - -# Range of top-1 accracies for model optimization techniques. -# Each item indicates (MIN_TOP_1_ACCURACY, MAX_TOP_1_ACCURACY). -MODEL_OPTIMIZATION_TOP_1_ACCURACY = { - 'RESNET50_FINETUNE_PRUNING': (0.76, 0.77), - 'MOBILENET_V1_FINETUNE_PRUNING': (0.67, 0.68), -} - -FLAGS = flags.FLAGS - - -def _get_classifier_parameters( - num_gpus: int = 0, - builder: str = 'records', - skip_eval: bool = False, - distribution_strategy: str = 'mirrored', - per_replica_batch_size: int = 128, - epochs: int = 90, - steps: int = 0, - epochs_between_evals: int = 1, - dtype: str = 'float32', - enable_xla: bool = False, - run_eagerly: bool = False, - gpu_thread_mode: Optional[str] = None, - dataset_num_private_threads: Optional[int] = None, - loss_scale: Optional[str] = None, - report_metrics: bool = True, - batchnorm_spatial_persistent: bool = False) -> MutableMapping[str, Any]: - """Gets classifier trainer's ResNet parameters.""" - return { - 'runtime': { - 'num_gpus': num_gpus, - 'distribution_strategy': distribution_strategy, - 'run_eagerly': run_eagerly, - 'enable_xla': enable_xla, - 'dataset_num_private_threads': dataset_num_private_threads, - 'gpu_thread_mode': gpu_thread_mode, - 'loss_scale': loss_scale, - 'batchnorm_spatial_persistent': batchnorm_spatial_persistent, - }, - 'train_dataset': { - 'builder': builder, - 'use_per_replica_batch_size': True, - 'batch_size': per_replica_batch_size, - 'image_size': 224, - 'dtype': dtype, - }, - 'validation_dataset': { - 'builder': builder, - 'batch_size': per_replica_batch_size, - 'use_per_replica_batch_size': True, - 'image_size': 224, - 'dtype': dtype, - }, - 'train': { - 'epochs': epochs, - 'steps': steps, - 'callbacks': { - 'enable_tensorboard': False, - 'enable_checkpoint_and_export': False, - 'enable_time_history': True, - }, - 'metrics': ['accuracy'] if report_metrics else [], - }, - 'model': { - 'loss': { - 'label_smoothing': 0.1, - }, - }, - 'evaluation': { - 'epochs_between_evals': epochs_between_evals, - 'skip_eval': skip_eval, - }, - } - - -class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark): - """Benchmark accuracy tests for ResNet50 in Keras.""" - - def __init__(self, - output_dir: Optional[str] = None, - root_data_dir: Optional[str] = None, - **kwargs): - """A benchmark class. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - - flag_methods = [classifier_trainer.define_classifier_flags] - - self.data_dir = os.path.join(root_data_dir, 'imagenet') - super(Resnet50KerasAccuracy, self).__init__( - output_dir=output_dir, flag_methods=flag_methods) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark( - self, - experiment_name: str, - top_1_min: float = MIN_TOP_1_ACCURACY, - top_1_max: float = MAX_TOP_1_ACCURACY, - num_gpus: int = 0, - distribution_strategy: str = 'mirrored', - per_replica_batch_size: int = 128, - epochs: int = 90, - steps: int = 0, - epochs_between_evals: int = 1, - dtype: str = 'float32', - enable_xla: bool = False, - run_eagerly: bool = False, - gpu_thread_mode: Optional[str] = None, - dataset_num_private_threads: Optional[int] = None, - loss_scale: Optional[str] = None): - """Runs and reports the benchmark given the provided configuration.""" - FLAGS.model_type = 'resnet' - FLAGS.dataset = 'imagenet' - FLAGS.mode = 'train_and_eval' - FLAGS.data_dir = self.data_dir - FLAGS.model_dir = self._get_model_dir(experiment_name) - parameters = _get_classifier_parameters( - num_gpus=num_gpus, - distribution_strategy=distribution_strategy, - per_replica_batch_size=per_replica_batch_size, - epochs=epochs, - steps=steps, - epochs_between_evals=epochs_between_evals, - dtype=dtype, - enable_xla=enable_xla, - run_eagerly=run_eagerly, - gpu_thread_mode=gpu_thread_mode, - dataset_num_private_threads=dataset_num_private_threads, - report_metrics=True, - loss_scale=loss_scale, - batchnorm_spatial_persistent=True) - FLAGS.params_override = json.dumps(parameters) - total_batch_size = num_gpus * per_replica_batch_size - - start_time_sec = time.time() - stats = classifier_trainer.run(flags.FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(Resnet50KerasAccuracy, self)._report_benchmark( - stats, - wall_time_sec, - top_1_min=top_1_min, - top_1_max=top_1_max, - total_batch_size=total_batch_size, - log_steps=100) - - def benchmark_8_gpu(self): - """Tests Keras model with eager, dist_strat and 8 GPUs.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8_gpu', - num_gpus=8, - per_replica_batch_size=128, - epochs=90, - epochs_between_evals=10, - dtype='float32') - - def benchmark_8_gpu_fp16(self): - """Tests Keras model with eager, dist_strat, 8 GPUs, and fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8_gpu_fp16', - num_gpus=8, - per_replica_batch_size=256, - epochs=90, - epochs_between_evals=10, - dtype='float16') - - def benchmark_xla_8_gpu_fp16(self): - """Tests Keras model with XLA, eager, dist_strat, 8 GPUs and fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu_fp16', - num_gpus=8, - per_replica_batch_size=256, - epochs=90, - epochs_between_evals=10, - dtype='float16', - enable_xla=True) - - def benchmark_xla_8_gpu_fp16_dynamic(self): - """Tests Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu_fp16_dynamic', - top_1_min=0.736, - num_gpus=8, - per_replica_batch_size=256, - epochs=90, - epochs_between_evals=10, - dtype='float16', - loss_scale='dynamic') - - def _get_model_dir(self, folder_name): - return os.path.join(self.output_dir, folder_name) - - -class MobilenetV1KerasAccuracy(keras_benchmark.KerasBenchmark): - """Benchmark accuracy tests for MobilenetV1 in Keras.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - """A benchmark class. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - - flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] - - self.data_dir = os.path.join(root_data_dir, 'imagenet') - super(MobilenetV1KerasAccuracy, self).__init__( - output_dir=output_dir, - flag_methods=flag_methods, - default_flags={ - 'model': 'mobilenet', - 'optimizer': 'mobilenet_default', - 'initial_learning_rate_per_sample': 0.00039, - }) - - def benchmark_8_gpu(self): - """Test Keras model with eager, dist_strat and 8 GPUs.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 * 8 - FLAGS.train_epochs = 90 - FLAGS.epochs_between_evals = 10 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - self._run_and_report_benchmark() - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - top_1_min=MOBILENET_V1_MIN_TOP_1_ACCURACY, - top_1_max=MOBILENET_V1_MAX_TOP_1_ACCURACY): - start_time_sec = time.time() - stats = resnet_imagenet_main.run(flags.FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(MobilenetV1KerasAccuracy, self)._report_benchmark( - stats, - wall_time_sec, - top_1_min=top_1_min, - top_1_max=top_1_max, - total_batch_size=FLAGS.batch_size, - log_steps=100) - - def _get_model_dir(self, folder_name): - return os.path.join(self.output_dir, folder_name) - - -class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): - """Resnet50 (classifier_trainer) benchmarks.""" - - def __init__(self, output_dir=None, default_flags=None, - tpu=None, dataset_builder='records', train_epochs=1, - train_steps=110, data_dir=None): - flag_methods = [classifier_trainer.define_classifier_flags] - - self.dataset_builder = dataset_builder - self.train_epochs = train_epochs - self.train_steps = train_steps - self.data_dir = data_dir - - super(Resnet50KerasClassifierBenchmarkBase, self).__init__( - output_dir=output_dir, - flag_methods=flag_methods, - default_flags=default_flags, - tpu=tpu) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark( - self, - experiment_name: str, - skip_steps: Optional[int] = None, - top_1_min: float = MIN_TOP_1_ACCURACY, - top_1_max: float = MAX_TOP_1_ACCURACY, - num_gpus: int = 0, - num_tpus: int = 0, - distribution_strategy: str = 'mirrored', - per_replica_batch_size: int = 128, - epochs_between_evals: int = 1, - dtype: str = 'float32', - enable_xla: bool = False, - run_eagerly: bool = False, - gpu_thread_mode: Optional[str] = None, - dataset_num_private_threads: Optional[int] = None, - loss_scale: Optional[str] = None): - """Runs and reports the benchmark given the provided configuration.""" - FLAGS.model_type = 'resnet' - FLAGS.dataset = 'imagenet' - FLAGS.mode = 'train_and_eval' - FLAGS.data_dir = self.data_dir - FLAGS.model_dir = self._get_model_dir(experiment_name) - parameters = _get_classifier_parameters( - builder=self.dataset_builder, - skip_eval=True, - num_gpus=num_gpus, - distribution_strategy=distribution_strategy, - per_replica_batch_size=per_replica_batch_size, - epochs=self.train_epochs, - steps=self.train_steps, - epochs_between_evals=epochs_between_evals, - dtype=dtype, - enable_xla=enable_xla, - gpu_thread_mode=gpu_thread_mode, - dataset_num_private_threads=dataset_num_private_threads, - loss_scale=loss_scale, - report_metrics=False, - batchnorm_spatial_persistent=True) - FLAGS.params_override = json.dumps(parameters) - if distribution_strategy == 'tpu': - total_batch_size = num_tpus * per_replica_batch_size - else: - total_batch_size = num_gpus * per_replica_batch_size - - start_time_sec = time.time() - stats = classifier_trainer.run(flags.FLAGS) - wall_time_sec = time.time() - start_time_sec - # Number of logged step time entries that are excluded in performance - # report. We keep results from last 100 batches, or skip the steps based on - # input skip_steps. - warmup = (skip_steps or (self.train_steps - 100)) // FLAGS.log_steps - - super(Resnet50KerasClassifierBenchmarkBase, self)._report_benchmark( - stats, - wall_time_sec, - total_batch_size=total_batch_size, - log_steps=FLAGS.log_steps, - warmup=warmup, - start_time_sec=start_time_sec) - - def benchmark_1_gpu_no_dist_strat(self): - """Tests Keras model with 1 GPU, no distribution strategy.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_1_gpu_no_dist_strat', - num_gpus=1, - distribution_strategy='off', - per_replica_batch_size=128) - - def benchmark_1_gpu_no_dist_strat_run_eagerly(self): - """Tests Keras model with 1 GPU, no distribution strategy, run eagerly.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly', - num_gpus=1, - run_eagerly=True, - distribution_strategy='off', - per_replica_batch_size=64) - - def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): - """Tests with 1 GPU, no distribution strategy, fp16, run eagerly.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly_fp16', - num_gpus=1, - run_eagerly=True, - distribution_strategy='off', - dtype='float16', - per_replica_batch_size=128) - - def benchmark_1_gpu(self): - """Tests Keras model with 1 GPU.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_1_gpu', - num_gpus=1, - distribution_strategy='one_device', - per_replica_batch_size=128) - - def benchmark_xla_1_gpu(self): - """Tests Keras model with XLA and 1 GPU.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_1_gpu', - num_gpus=1, - enable_xla=True, - distribution_strategy='one_device', - per_replica_batch_size=128) - - def benchmark_1_gpu_fp16(self): - """Tests Keras model with 1 GPU and fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_1_gpu_fp16', - num_gpus=1, - distribution_strategy='one_device', - dtype='float16', - per_replica_batch_size=256) - - def benchmark_1_gpu_fp16_dynamic(self): - """Tests Keras model with 1 GPU, fp16, and dynamic loss scaling.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_1_gpu_fp16_dynamic', - num_gpus=1, - distribution_strategy='one_device', - dtype='float16', - per_replica_batch_size=256, - loss_scale='dynamic') - - def benchmark_xla_1_gpu_fp16(self): - """Tests Keras model with XLA, 1 GPU and fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_1_gpu_fp16', - num_gpus=1, - enable_xla=True, - distribution_strategy='one_device', - dtype='float16', - per_replica_batch_size=256) - - def benchmark_xla_1_gpu_fp16_tweaked(self): - """Tests Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_1_gpu_fp16_tweaked', - num_gpus=1, - enable_xla=True, - distribution_strategy='one_device', - dtype='float16', - per_replica_batch_size=256, - gpu_thread_mode='gpu_private') - - def benchmark_xla_1_gpu_fp16_dynamic(self): - """Tests Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_1_gpu_fp16_dynamic', - num_gpus=1, - enable_xla=True, - distribution_strategy='one_device', - dtype='float16', - per_replica_batch_size=256, - loss_scale='dynamic') - - def benchmark_8_gpu(self): - """Tests Keras model with 8 GPUs.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8_gpu', - num_gpus=8, - distribution_strategy='mirrored', - per_replica_batch_size=128) - - def benchmark_8_gpu_tweaked(self): - """Tests Keras model with manual config tuning and 8 GPUs.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8_gpu_tweaked', - num_gpus=8, - distribution_strategy='mirrored', - per_replica_batch_size=128, - dataset_num_private_threads=14) - - def benchmark_xla_8_gpu(self): - """Tests Keras model with XLA and 8 GPUs.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu', - num_gpus=8, - enable_xla=True, - distribution_strategy='mirrored', - per_replica_batch_size=128) - - def benchmark_xla_8_gpu_tweaked(self): - """Tests Keras model with manual config tuning, 8 GPUs, and XLA.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu_tweaked', - num_gpus=8, - enable_xla=True, - distribution_strategy='mirrored', - per_replica_batch_size=128, - gpu_thread_mode='gpu_private', - dataset_num_private_threads=24) - - def benchmark_8_gpu_fp16(self): - """Tests Keras model with 8 GPUs and fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8_gpu_fp16', - num_gpus=8, - dtype='float16', - distribution_strategy='mirrored', - per_replica_batch_size=256) - - def benchmark_8_gpu_fp16_tweaked(self): - """Tests Keras model with 8 GPUs, fp16, and manual config tuning.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8_gpu_fp16_tweaked', - num_gpus=8, - dtype='float16', - distribution_strategy='mirrored', - per_replica_batch_size=256, - gpu_thread_mode='gpu_private', - dataset_num_private_threads=40) - - def benchmark_8_gpu_fp16_dynamic_tweaked(self): - """Tests Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8_gpu_fp16_dynamic_tweaked', - num_gpus=8, - dtype='float16', - distribution_strategy='mirrored', - per_replica_batch_size=256, - loss_scale='dynamic', - gpu_thread_mode='gpu_private', - dataset_num_private_threads=40) - - def benchmark_xla_8_gpu_fp16(self): - """Tests Keras model with XLA, 8 GPUs and fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu_fp16', - dtype='float16', - num_gpus=8, - enable_xla=True, - distribution_strategy='mirrored', - per_replica_batch_size=256) - - def benchmark_xla_8_gpu_fp16_tweaked(self): - """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu_fp16_tweaked', - dtype='float16', - num_gpus=8, - enable_xla=True, - distribution_strategy='mirrored', - per_replica_batch_size=256, - gpu_thread_mode='gpu_private', - dataset_num_private_threads=48) - - def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self): - """Tests with manual config tuning, XLA, 8 GPUs and fp16. - - Delay performance measurement for stable performance on 96 vCPU platforms. - """ - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu_fp16_tweaked_delay_measure', - dtype='float16', - num_gpus=8, - enable_xla=True, - distribution_strategy='mirrored', - per_replica_batch_size=256, - gpu_thread_mode='gpu_private', - dataset_num_private_threads=48, - steps=310) - - def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self): - """Tests Keras model with config tuning, XLA, 8 GPUs and dynamic fp16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_xla_8_gpu_fp16_dynamic_tweaked', - dtype='float16', - num_gpus=8, - enable_xla=True, - distribution_strategy='mirrored', - per_replica_batch_size=256, - gpu_thread_mode='gpu_private', - loss_scale='dynamic', - dataset_num_private_threads=48) - - def benchmark_2x2_tpu_bf16(self): - """Test Keras model with 2x2 TPU, bf16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_2x2_tpu_bf16', - dtype='bfloat16', - num_tpus=8, - distribution_strategy='tpu', - per_replica_batch_size=128) - - def benchmark_4x4_tpu_bf16(self): - """Test Keras model with 4x4 TPU, bf16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_4x4_tpu_bf16', - dtype='bfloat16', - num_tpus=32, - distribution_strategy='tpu', - per_replica_batch_size=128) - - def benchmark_8x8_tpu_bf16(self): - """Test Keras model with 8x8 TPU, bf16.""" - self._setup() - self._run_and_report_benchmark( - experiment_name='benchmark_8x8_tpu_bf16', - dtype='bfloat16', - num_tpus=128, - distribution_strategy='tpu', - per_replica_batch_size=64) - - def fill_report_object(self, stats): - super(Resnet50KerasClassifierBenchmarkBase, self).fill_report_object( - stats, - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - -class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): - """Resnet50 benchmarks.""" - - def __init__(self, output_dir=None, default_flags=None, tpu=None): - flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] - - super(Resnet50KerasBenchmarkBase, self).__init__( - output_dir=output_dir, - flag_methods=flag_methods, - default_flags=default_flags, - tpu=tpu) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, skip_steps=None): - start_time_sec = time.time() - stats = resnet_imagenet_main.run(FLAGS) - wall_time_sec = time.time() - start_time_sec - # Number of logged step time entries that are excluded in performance - # report. We keep results from last 100 batches, or skip the steps based on - # input skip_steps. - warmup = (skip_steps or (FLAGS.train_steps - 100)) // FLAGS.log_steps - - super(Resnet50KerasBenchmarkBase, self)._report_benchmark( - stats, - wall_time_sec, - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - warmup=warmup, - start_time_sec=start_time_sec) - - def benchmark_1_gpu_no_dist_strat(self): - """Test Keras model with 1 GPU, no distribution strategy.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly(self): - """Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly') - FLAGS.batch_size = 64 - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked(self): - """Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.explicit_gpu_placement = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked') - FLAGS.batch_size = 64 - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): - """Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked(self): - """Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.explicit_gpu_placement = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu(self): - """Test Keras model with 1 GPU.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu_amp(self): - """Test Keras model with 1 GPU with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp') - FLAGS.batch_size = 256 - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu(self): - """Test Keras model with XLA and 1 GPU.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_amp(self): - """Test Keras model with XLA and 1 GPU with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp') - FLAGS.batch_size = 256 - self._run_and_report_benchmark() - - def benchmark_1_gpu_fp16(self): - """Test Keras model with 1 GPU and fp16.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - self._run_and_report_benchmark() - - def benchmark_1_gpu_fp16_dynamic(self): - """Test Keras model with 1 GPU, fp16, and dynamic loss scaling.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - FLAGS.loss_scale = 'dynamic' - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_fp16(self): - """Test Keras model with XLA, 1 GPU and fp16.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_fp16_tweaked(self): - """Test Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - FLAGS.tf_gpu_thread_mode = 'gpu_private' - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_fp16_dynamic(self): - """Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - FLAGS.loss_scale = 'dynamic' - self._run_and_report_benchmark() - - def benchmark_8_gpu(self): - """Test Keras model with 8 GPUs.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - FLAGS.batch_size = 128 * 8 # 8 GPUs - self._run_and_report_benchmark() - - def benchmark_8_gpu_amp(self): - """Test Keras model with 8 GPUs with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.enable_eager = True - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp') - FLAGS.batch_size = 256 * 8 # 8 GPUs - self._run_and_report_benchmark() - - def benchmark_8_gpu_tweaked(self): - """Test Keras model with manual config tuning and 8 GPUs.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked') - FLAGS.batch_size = 128 * 8 # 8 GPUs - FLAGS.datasets_num_private_threads = 14 - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu(self): - """Test Keras model with XLA and 8 GPUs.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu') - FLAGS.batch_size = 128 * 8 # 8 GPUs - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_amp(self): - """Test Keras model with XLA and 8 GPUs with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.enable_eager = True - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp') - FLAGS.batch_size = 256 * 8 # 8 GPUs - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_tweaked(self): - """Test Keras model with manual config tuning, 8 GPUs, and XLA.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_tweaked') - FLAGS.batch_size = 128 * 8 - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 24 - self._run_and_report_benchmark() - - def benchmark_8_gpu_fp16(self): - """Test Keras model with 8 GPUs and fp16.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16') - FLAGS.batch_size = 256 * 8 # 8 GPUs - self._run_and_report_benchmark() - - def benchmark_8_gpu_fp16_tweaked(self): - """Test Keras model with 8 GPUs, fp16, and manual config tuning.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked') - FLAGS.batch_size = 256 * 8 # 8 GPUs - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.dataset_num_private_threads = 40 - self._run_and_report_benchmark() - - def benchmark_8_gpu_fp16_dynamic_tweaked(self): - """Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_8_gpu_fp16_dynamic_tweaked') - FLAGS.batch_size = 256 * 8 # 8 GPUs - FLAGS.loss_scale = 'dynamic' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.dataset_num_private_threads = 40 - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_fp16(self): - """Test Keras model with XLA, 8 GPUs and fp16.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16') - FLAGS.batch_size = 256 * 8 # 8 GPUs - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_fp16_tweaked(self): - """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked') - FLAGS.batch_size = 256 * 8 # 8 GPUs - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 48 - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self): - """Test with manual config tuning, XLA, 8 GPUs and fp16. - - Delay performance measurement for stable performance on 96 vCPU platforms. - """ - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_xla_8_gpu_fp16_tweaked_delay_measure') - FLAGS.batch_size = 256 * 8 - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 48 - FLAGS.train_steps = 310 - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self): - """Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_xla_8_gpu_fp16_dynamic_tweaked') - FLAGS.batch_size = 256 * 8 # 8 GPUs - FLAGS.loss_scale = 'dynamic' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 48 - self._run_and_report_benchmark() - - def benchmark_2x2_tpu_bf16(self): - """Test Keras model with 2x2 TPU, bf16.""" - self._setup() - - FLAGS.dtype = 'bf16' - FLAGS.distribution_strategy = 'tpu' - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_bf16') - FLAGS.batch_size = 1024 - self._run_and_report_benchmark() - - def benchmark_4x4_tpu_bf16(self): - """Test Keras model with 4x4 TPU, bf16.""" - self._setup() - - FLAGS.dtype = 'bf16' - FLAGS.distribution_strategy = 'tpu' - FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu_bf16') - FLAGS.batch_size = 4096 - self._run_and_report_benchmark() - - def benchmark_8x8_tpu_bf16(self): - """Test Keras model with 8x8 TPU, bf16.""" - self._setup() - - FLAGS.dtype = 'bf16' - FLAGS.distribution_strategy = 'tpu' - FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_bf16') - FLAGS.batch_size = 8192 - self._run_and_report_benchmark() - - def fill_report_object(self, stats): - super(Resnet50KerasBenchmarkBase, self).fill_report_object( - stats, - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - -class Resnet50KerasBenchmarkSynth(Resnet50KerasClassifierBenchmarkBase): - """Resnet50 synthetic benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, tpu=None, **kwargs): - def_flags = {} - def_flags['log_steps'] = 10 - - super(Resnet50KerasBenchmarkSynth, self).__init__( - output_dir=output_dir, default_flags=def_flags, tpu=tpu, - dataset_builder='synthetic', train_epochs=1, train_steps=110) - - -class Resnet50KerasBenchmarkReal(Resnet50KerasClassifierBenchmarkBase): - """Resnet50 real data benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, tpu=None, **kwargs): - data_dir = os.path.join(root_data_dir, 'imagenet') - def_flags = {} - def_flags['log_steps'] = 10 - - super(Resnet50KerasBenchmarkReal, self).__init__( - output_dir=output_dir, default_flags=def_flags, tpu=tpu, - dataset_builder='records', train_epochs=1, train_steps=110, - data_dir=data_dir) - - -class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase): - """Resnet50 real data (stored in remote storage) benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - def_flags = {} - def_flags['skip_eval'] = True - def_flags['report_accuracy_metrics'] = False - def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') - # Defining multiple epochs overrides the train_steps setting in benchmarks. - def_flags['train_epochs'] = 2 - # Cache dataset so performance is stable after the first epoch. - def_flags['training_dataset_cache'] = True - def_flags['log_steps'] = 100 - # Note that for single GPU and pure eager tests which are less likely to be - # input bound and more stable, these tests will run for shorter time by - # overriding FLAGS.train_epochs, train_seteps, log_steps in benchmark - # methods, and skip_steps in _run_and_report_benchmark(). - - super(Resnet50KerasBenchmarkRemoteData, self).__init__( - output_dir=output_dir, default_flags=def_flags) - - def _override_flags_to_run_test_shorter(self): - FLAGS.train_epochs = 1 - FLAGS.train_steps = 300 - FLAGS.log_steps = 10 - - def benchmark_1_gpu_no_dist_strat(self): - """Test Keras model with 1 GPU, no distribution strategy.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') - FLAGS.batch_size = 128 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly(self): - """Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly') - FLAGS.batch_size = 64 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked(self): - """Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.explicit_gpu_placement = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked') - FLAGS.batch_size = 64 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): - """Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 128 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked(self): - """Test with 1 GPU, no distribution strategy, fp16, run eagerly.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.run_eagerly = True - FLAGS.explicit_gpu_placement = True - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 128 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu(self): - """Test Keras model with 1 GPU.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') - FLAGS.batch_size = 128 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu_amp(self): - """Test Keras model with 1 GPU with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp') - FLAGS.batch_size = 256 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu(self): - """Test Keras model with XLA and 1 GPU.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu') - FLAGS.batch_size = 128 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_amp(self): - """Test Keras model with XLA and 1 GPU with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp') - FLAGS.batch_size = 256 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu_fp16(self): - """Test Keras model with 1 GPU and fp16.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_1_gpu_fp16_dynamic(self): - """Test Keras model with 1 GPU, fp16, and dynamic loss scaling.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - FLAGS.loss_scale = 'dynamic' - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_fp16(self): - """Test Keras model with XLA, 1 GPU and fp16.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_fp16_tweaked(self): - """Test Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - FLAGS.tf_gpu_thread_mode = 'gpu_private' - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_fp16_dynamic(self): - """Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.enable_eager = True - FLAGS.enable_xla = True - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic') - FLAGS.dtype = 'fp16' - FLAGS.batch_size = 256 - FLAGS.loss_scale = 'dynamic' - self._override_flags_to_run_test_shorter() - self._run_and_report_benchmark() - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self): - if FLAGS.num_gpus == 1 or FLAGS.run_eagerly: - # For single GPU and pure eager tests which are less likely to be input - # bound and more stable, run for shorter time and use the default - # skip_steps. - skip_steps = None - else: - # skip the first epoch for performance measurement. - skip_steps = 600 - super(Resnet50KerasBenchmarkRemoteData, - self)._run_and_report_benchmark(skip_steps=skip_steps) - - -class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): - """Trivial model with real data benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] - - def_flags = {} - def_flags['use_trivial_model'] = True - def_flags['skip_eval'] = True - def_flags['report_accuracy_metrics'] = False - def_flags['dtype'] = 'fp16' - def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') - def_flags['train_steps'] = 600 - def_flags['log_steps'] = 100 - def_flags['distribution_strategy'] = 'mirrored' - - super(TrivialKerasBenchmarkReal, self).__init__( - output_dir=output_dir, - flag_methods=flag_methods, - default_flags=def_flags) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self): - start_time_sec = time.time() - stats = resnet_imagenet_main.run(FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(TrivialKerasBenchmarkReal, self)._report_benchmark( - stats, - wall_time_sec, - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_8_gpu_warmup(self): - """Dummy test that runs over an epoch to warmup the machine.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.enable_eager = True - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup') - FLAGS.batch_size = 256 * 8 - FLAGS.train_steps = 700 - self._run_and_report_benchmark() - - def fill_report_object(self, stats): - super(TrivialKerasBenchmarkReal, self).fill_report_object( - stats, - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - -class Resnet50MultiWorkerKerasAccuracy(keras_benchmark.KerasBenchmark): - """Resnet50 distributed accuracy tests with multiple workers.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - flag_methods = [classifier_trainer.define_imagenet_keras_flags] - self.data_dir = os.path.join(root_data_dir, 'imagenet') - super(Resnet50MultiWorkerKerasAccuracy, self).__init__( - output_dir=output_dir, flag_methods=flag_methods) - - def _benchmark_common(self, eager, num_workers, all_reduce_alg): - """Common to all benchmarks in this class.""" - self._setup() - - num_gpus = 8 - FLAGS.num_gpus = num_gpus - FLAGS.data_dir = self.data_dir - FLAGS.train_epochs = 90 - FLAGS.epochs_between_evals = 10 - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = eager - FLAGS.enable_xla = False - FLAGS.distribution_strategy = 'multi_worker_mirrored' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 32 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_{}_8_gpu_{}_worker_fp16_{}_tweaked'.format( - 'eager' if eager else 'graph', num_workers, all_reduce_alg)) - FLAGS.batch_size = 256 * num_gpus * num_workers - FLAGS.all_reduce_alg = all_reduce_alg - - self._run_and_report_benchmark() - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - top_1_min=MIN_TOP_1_ACCURACY, - top_1_max=MAX_TOP_1_ACCURACY): - start_time_sec = time.time() - stats = classifier_trainer.run(flags.FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(Resnet50MultiWorkerKerasAccuracy, self)._report_benchmark( - stats, - wall_time_sec, - top_1_min=top_1_min, - top_1_max=top_1_max, - total_batch_size=FLAGS.batch_size, - log_steps=100) - - def _get_model_dir(self, folder_name): - return os.path.join(self.output_dir, folder_name) - - def benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked(self): - """Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce.""" - self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='ring') - - def benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked(self): - """Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce.""" - self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='nccl') - - def benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked(self): - """Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce.""" - self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='ring') - - def benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked(self): - """Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce.""" - self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='nccl') - - -class Resnet50MultiWorkerKerasBenchmark(Resnet50KerasBenchmarkBase): - """Resnet50 distributed benchmark tests with multiple workers.""" - - def __init__(self, output_dir=None, default_flags=None): - super(Resnet50MultiWorkerKerasBenchmark, self).__init__( - output_dir=output_dir, default_flags=default_flags) - - def _benchmark_common(self, eager, num_workers, all_reduce_alg): - """Common to all benchmarks in this class.""" - self._setup() - - num_gpus = 8 - FLAGS.num_gpus = num_gpus - FLAGS.dtype = 'fp16' - FLAGS.enable_eager = eager - FLAGS.enable_xla = False - FLAGS.distribution_strategy = 'multi_worker_mirrored' - FLAGS.tf_gpu_thread_mode = 'gpu_private' - FLAGS.datasets_num_private_threads = 32 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_{}_8_gpu_{}_worker_fp16_{}_tweaked'.format( - 'eager' if eager else 'graph', num_workers, all_reduce_alg)) - FLAGS.batch_size = 256 * num_gpus * num_workers - FLAGS.all_reduce_alg = all_reduce_alg - - self._run_and_report_benchmark() - - def benchmark_eager_8_gpu_1_worker_fp16_ring_tweaked(self): - """Eager, 8 GPUs per worker, 1 worker, fp16, ring all-reduce.""" - self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='ring') - - def benchmark_eager_8_gpu_1_worker_fp16_nccl_tweaked(self): - """Eager, 8 GPUs per worker, 1 worker, fp16, nccl all-reduce.""" - self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='nccl') - - def benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked(self): - """Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce.""" - self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='ring') - - def benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked(self): - """Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce.""" - self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='nccl') - - def benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked(self): - """Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce.""" - self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='ring') - - def benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked(self): - """Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce.""" - self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='nccl') - - -class Resnet50MultiWorkerKerasBenchmarkSynth(Resnet50MultiWorkerKerasBenchmark): - """Resnet50 multi-worker synthetic data benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - def_flags = {} - def_flags['skip_eval'] = True - def_flags['report_accuracy_metrics'] = False - def_flags['use_synthetic_data'] = True - def_flags['train_steps'] = 110 - def_flags['log_steps'] = 10 - - super(Resnet50MultiWorkerKerasBenchmarkSynth, self).__init__( - output_dir=output_dir, default_flags=def_flags) - - -class Resnet50MultiWorkerKerasBenchmarkReal(Resnet50MultiWorkerKerasBenchmark): - """Resnet50 multi-worker real data benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - def_flags = {} - def_flags['skip_eval'] = True - def_flags['report_accuracy_metrics'] = False - def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') - def_flags['train_steps'] = 110 - def_flags['log_steps'] = 10 - - super(Resnet50MultiWorkerKerasBenchmarkReal, self).__init__( - output_dir=output_dir, default_flags=def_flags) - - -# TODO(kimjaehong): It also should be also cover other metheods of model -# optimization techniques. In that time, this class will change to something -# like 'KerasModelOptimizationAccuracyBase'. -class KerasPruningAccuracyBase(keras_benchmark.KerasBenchmark): - """Benchmark accuracy tests for pruning method.""" - - def __init__(self, - output_dir=None, - root_data_dir=None, - default_flags=None, - **kwargs): - """A accuracy benchmark class for pruning method. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - default_flags: default flags - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - if default_flags is None: - default_flags = {} - default_flags['pruning_method'] = 'polynomial_decay' - default_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') - - flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags] - - super(KerasPruningAccuracyBase, self).__init__( - output_dir=output_dir, - flag_methods=flag_methods, - default_flags=default_flags, - **kwargs) - - def benchmark_8_gpu(self): - """Test Keras model with eager, dist_strat and 8 GPUs.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.batch_size = 32 * 8 - FLAGS.train_epochs = 90 - FLAGS.epochs_between_evals = 10 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - FLAGS.dtype = 'fp32' - FLAGS.enable_eager = True - self._run_and_report_benchmark() - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - top_1_min=MODEL_OPTIMIZATION_TOP_1_ACCURACY[ - 'RESNET50_FINETUNE_PRUNING'][0], - top_1_max=MODEL_OPTIMIZATION_TOP_1_ACCURACY[ - 'RESNET50_FINETUNE_PRUNING'][1]): - start_time_sec = time.time() - stats = resnet_imagenet_main.run(flags.FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(KerasPruningAccuracyBase, self)._report_benchmark( - stats, - wall_time_sec, - top_1_min=top_1_min, - top_1_max=top_1_max, - total_batch_size=FLAGS.batch_size, - log_steps=100) - - -class MobilenetV1KerasPruningAccuracy(KerasPruningAccuracyBase): - """Benchmark accuracy tests for MobilenetV1 with pruning method.""" - - def __init__(self, root_data_dir=None, **kwargs): - default_flags = { - 'model': 'mobilenet', - 'optimizer': 'mobilenet_default', - 'initial_learning_rate_per_sample': 0.00007, - 'pretrained_filepath': tf.train.latest_checkpoint( - os.path.join(root_data_dir, 'mobilenet_v1')), - 'pruning_begin_step': 0, - 'pruning_end_step': 100000, - 'pruning_initial_sparsity': 0.0, - 'pruning_final_sparsity': 0.5, - 'pruning_frequency': 100, - } - super(MobilenetV1KerasPruningAccuracy, self).__init__( - root_data_dir=root_data_dir, - default_flags=default_flags, - **kwargs) - - def _run_and_report_benchmark(self): - super(MobilenetV1KerasPruningAccuracy, self)._run_and_report_benchmark( - top_1_min=\ - MODEL_OPTIMIZATION_TOP_1_ACCURACY['MOBILENET_V1_FINETUNE_PRUNING'][0], - top_1_max=\ - MODEL_OPTIMIZATION_TOP_1_ACCURACY['MOBILENET_V1_FINETUNE_PRUNING'][1]) - - -class Resnet50KerasPruningAccuracy(KerasPruningAccuracyBase): - """Benchmark accuracy tests for resnet50 with pruning method.""" - - def __init__(self, root_data_dir=None, **kwargs): - default_flags = { - 'model': 'resnet50_v1.5', - 'optimizer': 'mobilenet_default', - 'initial_learning_rate_per_sample': 0.0000039, - 'pretrained_filepath': tf.train.latest_checkpoint( - os.path.join(root_data_dir, 'resnet50')), - 'pruning_begin_step': 0, - 'pruning_end_step': 50000, - 'pruning_initial_sparsity': 0.0, - 'pruning_final_sparsity': 0.5, - 'pruning_frequency': 100, - } - super(Resnet50KerasPruningAccuracy, self).__init__( - root_data_dir=root_data_dir, - default_flags=default_flags, - **kwargs) - - def _run_and_report_benchmark(self): - super(Resnet50KerasPruningAccuracy, self)._run_and_report_benchmark( - top_1_min=\ - MODEL_OPTIMIZATION_TOP_1_ACCURACY['RESNET50_FINETUNE_PRUNING'][0], - top_1_max=\ - MODEL_OPTIMIZATION_TOP_1_ACCURACY['RESNET50_FINETUNE_PRUNING'][1]) - - -class KerasPruningBenchmarkRealBase(Resnet50KerasBenchmarkBase): - """Pruning method benchmarks.""" - - def __init__(self, root_data_dir=None, default_flags=None, **kwargs): - if default_flags is None: - default_flags = {} - default_flags.update({ - 'skip_eval': True, - 'report_accuracy_metrics': False, - 'data_dir': os.path.join(root_data_dir, 'imagenet'), - 'train_steps': 110, - 'log_steps': 10, - 'pruning_method': 'polynomial_decay', - 'pruning_begin_step': 0, - 'pruning_end_step': 50000, - 'pruning_initial_sparsity': 0, - 'pruning_final_sparsity': 0.5, - 'pruning_frequency': 100, - }) - super(KerasPruningBenchmarkRealBase, self).__init__( - default_flags=default_flags, **kwargs) - - -class MobilenetV1KerasPruningBenchmarkReal(KerasPruningBenchmarkRealBase): - """Pruning method benchmarks for MobilenetV1.""" - - def __init__(self, **kwargs): - default_flags = { - 'model': 'mobilenet', - 'optimizer': 'mobilenet_default', - } - super(MobilenetV1KerasPruningBenchmarkReal, self).__init__( - default_flags=default_flags, **kwargs) - - -class Resnet50KerasPruningBenchmarkReal(KerasPruningBenchmarkRealBase): - """Pruning method benchmarks for resnet50.""" - - def __init__(self, **kwargs): - default_flags = { - 'model': 'resnet50_v1.5', - 'optimizer': 'mobilenet_default', - } - super(Resnet50KerasPruningBenchmarkReal, self).__init__( - default_flags=default_flags, **kwargs) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/models/cifar_preprocessing.py b/official/benchmark/models/cifar_preprocessing.py deleted file mode 100644 index 18d7fe630e194953c8c5f3f7552c7104c6155c9a..0000000000000000000000000000000000000000 --- a/official/benchmark/models/cifar_preprocessing.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides utilities to Cifar-10 dataset.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from absl import logging -import tensorflow as tf - -from official.vision.image_classification.resnet import imagenet_preprocessing - -HEIGHT = 32 -WIDTH = 32 -NUM_CHANNELS = 3 -_DEFAULT_IMAGE_BYTES = HEIGHT * WIDTH * NUM_CHANNELS -# The record is the image plus a one-byte label -_RECORD_BYTES = _DEFAULT_IMAGE_BYTES + 1 - -# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits. -NUM_IMAGES = { - 'train': 50000, - 'validation': 10000, -} -_NUM_DATA_FILES = 5 -NUM_CLASSES = 10 - - -def parse_record(raw_record, is_training, dtype): - """Parses a record containing a training example of an image. - - The input record is parsed into a label and image, and the image is passed - through preprocessing steps (cropping, flipping, and so on). - - This method converts the label to one hot to fit the loss function. - - Args: - raw_record: scalar Tensor tf.string containing a serialized - Example protocol buffer. - is_training: A boolean denoting whether the input is for training. - dtype: Data type to use for input images. - - Returns: - Tuple with processed image tensor and one-hot-encoded label tensor. - """ - # Convert bytes to a vector of uint8 that is record_bytes long. - record_vector = tf.io.decode_raw(raw_record, tf.uint8) - - # The first byte represents the label, which we convert from uint8 to int32 - # and then to one-hot. - label = tf.cast(record_vector[0], tf.int32) - - # The remaining bytes after the label represent the image, which we reshape - # from [depth * height * width] to [depth, height, width]. - depth_major = tf.reshape(record_vector[1:_RECORD_BYTES], - [NUM_CHANNELS, HEIGHT, WIDTH]) - - # Convert from [depth, height, width] to [height, width, depth], and cast as - # float32. - image = tf.cast(tf.transpose(a=depth_major, perm=[1, 2, 0]), tf.float32) - - image = preprocess_image(image, is_training) - image = tf.cast(image, dtype) - - return image, label - - -def preprocess_image(image, is_training): - """Preprocess a single image of layout [height, width, depth].""" - if is_training: - # Resize the image to add four extra pixels on each side. - image = tf.image.resize_with_crop_or_pad( - image, HEIGHT + 8, WIDTH + 8) - - # Randomly crop a [HEIGHT, WIDTH] section of the image. - image = tf.image.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS]) - - # Randomly flip the image horizontally. - image = tf.image.random_flip_left_right(image) - - # Subtract off the mean and divide by the variance of the pixels. - image = tf.image.per_image_standardization(image) - return image - - -def get_filenames(is_training, data_dir): - """Returns a list of filenames.""" - assert tf.io.gfile.exists(data_dir), ( - 'Run cifar10_download_and_extract.py first to download and extract the ' - 'CIFAR-10 data.') - - if is_training: - return [ - os.path.join(data_dir, 'data_batch_%d.bin' % i) - for i in range(1, _NUM_DATA_FILES + 1) - ] - else: - return [os.path.join(data_dir, 'test_batch.bin')] - - -def input_fn(is_training, - data_dir, - batch_size, - dtype=tf.float32, - datasets_num_private_threads=None, - parse_record_fn=parse_record, - input_context=None, - drop_remainder=False): - """Input function which provides batches for train or eval. - - Args: - is_training: A boolean denoting whether the input is for training. - data_dir: The directory containing the input data. - batch_size: The number of samples per batch. - dtype: Data type to use for images/features - datasets_num_private_threads: Number of private threads for tf.data. - parse_record_fn: Function to use for parsing the records. - input_context: A `tf.distribute.InputContext` object passed in by - `tf.distribute.Strategy`. - drop_remainder: A boolean indicates whether to drop the remainder of the - batches. If True, the batch dimension will be static. - - Returns: - A dataset that can be used for iteration. - """ - filenames = get_filenames(is_training, data_dir) - dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES) - - if input_context: - logging.info( - 'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d', - input_context.input_pipeline_id, input_context.num_input_pipelines) - dataset = dataset.shard(input_context.num_input_pipelines, - input_context.input_pipeline_id) - - return imagenet_preprocessing.process_record_dataset( - dataset=dataset, - is_training=is_training, - batch_size=batch_size, - shuffle_buffer=NUM_IMAGES['train'], - parse_record_fn=parse_record_fn, - dtype=dtype, - datasets_num_private_threads=datasets_num_private_threads, - drop_remainder=drop_remainder - ) diff --git a/official/benchmark/models/resnet_cifar_main.py b/official/benchmark/models/resnet_cifar_main.py deleted file mode 100644 index 4a02fec8b96e25228e6e0467d646c26995f944fc..0000000000000000000000000000000000000000 --- a/official/benchmark/models/resnet_cifar_main.py +++ /dev/null @@ -1,284 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Runs a ResNet model on the Cifar-10 dataset.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import app -from absl import flags -from absl import logging -import numpy as np -import tensorflow as tf -from official.benchmark.models import cifar_preprocessing -from official.benchmark.models import resnet_cifar_model -from official.benchmark.models import synthetic_util -from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils -from official.utils.misc import keras_utils -from official.vision.image_classification.resnet import common - - -LR_SCHEDULE = [ # (multiplier, epoch to start) tuples - (0.1, 91), (0.01, 136), (0.001, 182) -] - - -def learning_rate_schedule(current_epoch, - current_batch, - batches_per_epoch, - batch_size): - """Handles linear scaling rule and LR decay. - - Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the - provided scaling factor. - - Args: - current_epoch: integer, current epoch indexed from 0. - current_batch: integer, current batch in the current epoch, indexed from 0. - batches_per_epoch: integer, number of steps in an epoch. - batch_size: integer, total batch sized. - - Returns: - Adjusted learning rate. - """ - del current_batch, batches_per_epoch # not used - initial_learning_rate = common.BASE_LEARNING_RATE * batch_size / 128 - learning_rate = initial_learning_rate - for mult, start_epoch in LR_SCHEDULE: - if current_epoch >= start_epoch: - learning_rate = initial_learning_rate * mult - else: - break - return learning_rate - - -class LearningRateBatchScheduler(tf.keras.callbacks.Callback): - """Callback to update learning rate on every batch (not epoch boundaries). - - N.B. Only support Keras optimizers, not TF optimizers. - - Attributes: - schedule: a function that takes an epoch index and a batch index as input - (both integer, indexed from 0) and returns a new learning rate as - output (float). - """ - - def __init__(self, schedule, batch_size, steps_per_epoch): - super(LearningRateBatchScheduler, self).__init__() - self.schedule = schedule - self.steps_per_epoch = steps_per_epoch - self.batch_size = batch_size - self.epochs = -1 - self.prev_lr = -1 - - def on_epoch_begin(self, epoch, logs=None): - if not hasattr(self.model.optimizer, 'learning_rate'): - raise ValueError('Optimizer must have a "learning_rate" attribute.') - self.epochs += 1 - - def on_batch_begin(self, batch, logs=None): - """Executes before step begins.""" - lr = self.schedule(self.epochs, - batch, - self.steps_per_epoch, - self.batch_size) - if not isinstance(lr, (float, np.float32, np.float64)): - raise ValueError('The output of the "schedule" function should be float.') - if lr != self.prev_lr: - self.model.optimizer.learning_rate = lr # lr should be a float here - self.prev_lr = lr - logging.debug( - 'Epoch %05d Batch %05d: LearningRateBatchScheduler ' - 'change learning rate to %s.', self.epochs, batch, lr) - - -def run(flags_obj): - """Run ResNet Cifar-10 training and eval loop using native Keras APIs. - - Args: - flags_obj: An object containing parsed flag values. - - Raises: - ValueError: If fp16 is passed as it is not currently supported. - - Returns: - Dictionary of training and eval stats. - """ - keras_utils.set_session_config( - enable_xla=flags_obj.enable_xla) - - # Execute flag override logic for better model performance - if flags_obj.tf_gpu_thread_mode: - keras_utils.set_gpu_thread_mode_and_count( - per_gpu_thread_count=flags_obj.per_gpu_thread_count, - gpu_thread_mode=flags_obj.tf_gpu_thread_mode, - num_gpus=flags_obj.num_gpus, - datasets_num_private_threads=flags_obj.datasets_num_private_threads) - common.set_cudnn_batchnorm_mode() - - dtype = flags_core.get_tf_dtype(flags_obj) - if dtype == 'fp16': - raise ValueError('dtype fp16 is not supported in Keras. Use the default ' - 'value(fp32).') - - data_format = flags_obj.data_format - if data_format is None: - data_format = ('channels_first' if tf.config.list_physical_devices('GPU') - else 'channels_last') - tf.keras.backend.set_image_data_format(data_format) - - strategy = distribution_utils.get_distribution_strategy( - distribution_strategy=flags_obj.distribution_strategy, - num_gpus=flags_obj.num_gpus, - all_reduce_alg=flags_obj.all_reduce_alg, - num_packs=flags_obj.num_packs) - - if strategy: - # flags_obj.enable_get_next_as_optional controls whether enabling - # get_next_as_optional behavior in DistributedIterator. If true, last - # partial batch can be supported. - strategy.extended.experimental_enable_get_next_as_optional = ( - flags_obj.enable_get_next_as_optional - ) - - strategy_scope = distribution_utils.get_strategy_scope(strategy) - - if flags_obj.use_synthetic_data: - synthetic_util.set_up_synthetic_data() - input_fn = common.get_synth_input_fn( - height=cifar_preprocessing.HEIGHT, - width=cifar_preprocessing.WIDTH, - num_channels=cifar_preprocessing.NUM_CHANNELS, - num_classes=cifar_preprocessing.NUM_CLASSES, - dtype=flags_core.get_tf_dtype(flags_obj), - drop_remainder=True) - else: - synthetic_util.undo_set_up_synthetic_data() - input_fn = cifar_preprocessing.input_fn - - train_input_dataset = input_fn( - is_training=True, - data_dir=flags_obj.data_dir, - batch_size=flags_obj.batch_size, - parse_record_fn=cifar_preprocessing.parse_record, - datasets_num_private_threads=flags_obj.datasets_num_private_threads, - dtype=dtype, - # Setting drop_remainder to avoid the partial batch logic in normalization - # layer, which triggers tf.where and leads to extra memory copy of input - # sizes between host and GPU. - drop_remainder=(not flags_obj.enable_get_next_as_optional)) - - eval_input_dataset = None - if not flags_obj.skip_eval: - eval_input_dataset = input_fn( - is_training=False, - data_dir=flags_obj.data_dir, - batch_size=flags_obj.batch_size, - parse_record_fn=cifar_preprocessing.parse_record) - - steps_per_epoch = ( - cifar_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size) - lr_schedule = 0.1 - if flags_obj.use_tensor_lr: - initial_learning_rate = common.BASE_LEARNING_RATE * flags_obj.batch_size / 128 - lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay( - boundaries=list(p[1] * steps_per_epoch for p in LR_SCHEDULE), - values=[initial_learning_rate] + - list(p[0] * initial_learning_rate for p in LR_SCHEDULE)) - - with strategy_scope: - optimizer = common.get_optimizer(lr_schedule) - model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES) - model.compile( - loss='sparse_categorical_crossentropy', - optimizer=optimizer, - metrics=(['sparse_categorical_accuracy'] - if flags_obj.report_accuracy_metrics else None), - run_eagerly=flags_obj.run_eagerly) - - train_epochs = flags_obj.train_epochs - - callbacks = common.get_callbacks() - - if not flags_obj.use_tensor_lr: - lr_callback = LearningRateBatchScheduler( - schedule=learning_rate_schedule, - batch_size=flags_obj.batch_size, - steps_per_epoch=steps_per_epoch) - callbacks.append(lr_callback) - - # if mutliple epochs, ignore the train_steps flag. - if train_epochs <= 1 and flags_obj.train_steps: - steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch) - train_epochs = 1 - - num_eval_steps = (cifar_preprocessing.NUM_IMAGES['validation'] // - flags_obj.batch_size) - - validation_data = eval_input_dataset - if flags_obj.skip_eval: - if flags_obj.set_learning_phase_to_train: - # TODO(haoyuzhang): Understand slowdown of setting learning phase when - # not using distribution strategy. - tf.keras.backend.set_learning_phase(1) - num_eval_steps = None - validation_data = None - - if not strategy and flags_obj.explicit_gpu_placement: - # TODO(b/135607227): Add device scope automatically in Keras training loop - # when not using distribition strategy. - no_dist_strat_device = tf.device('/device:GPU:0') - no_dist_strat_device.__enter__() - - history = model.fit(train_input_dataset, - epochs=train_epochs, - steps_per_epoch=steps_per_epoch, - callbacks=callbacks, - validation_steps=num_eval_steps, - validation_data=validation_data, - validation_freq=flags_obj.epochs_between_evals, - verbose=2) - eval_output = None - if not flags_obj.skip_eval: - eval_output = model.evaluate(eval_input_dataset, - steps=num_eval_steps, - verbose=2) - - if not strategy and flags_obj.explicit_gpu_placement: - no_dist_strat_device.__exit__() - - stats = common.build_stats(history, eval_output, callbacks) - return stats - - -def define_cifar_flags(): - common.define_keras_flags(dynamic_loss_scale=False) - - flags_core.set_defaults(data_dir='/tmp/cifar10_data/cifar-10-batches-bin', - model_dir='/tmp/cifar10_model', - epochs_between_evals=10, - batch_size=128) - - -def main(_): - return run(flags.FLAGS) - - -if __name__ == '__main__': - logging.set_verbosity(logging.INFO) - define_cifar_flags() - app.run(main) diff --git a/official/benchmark/models/resnet_cifar_model.py b/official/benchmark/models/resnet_cifar_model.py deleted file mode 100644 index 1b507381f1b6907fdfb078d8316f3621a9e2b8f7..0000000000000000000000000000000000000000 --- a/official/benchmark/models/resnet_cifar_model.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""ResNet56 model for Keras adapted from tf.keras.applications.ResNet50. - -# Reference: -- [Deep Residual Learning for Image Recognition]( - https://arxiv.org/abs/1512.03385) -Adapted from code contributed by BigMoyan. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import tensorflow as tf -from tensorflow.python.keras import backend -from tensorflow.python.keras import initializers -from tensorflow.python.keras import layers -from tensorflow.python.keras import regularizers - - -BATCH_NORM_DECAY = 0.997 -BATCH_NORM_EPSILON = 1e-5 -L2_WEIGHT_DECAY = 2e-4 - - -def identity_building_block(input_tensor, - kernel_size, - filters, - stage, - block, - training=None): - """The identity block is the block that has no conv layer at shortcut. - - Arguments: - input_tensor: input tensor - kernel_size: default 3, the kernel size of - middle conv layer at main path - filters: list of integers, the filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: current block label, used for generating layer names - training: Only used if training keras model with Estimator. In other - scenarios it is handled automatically. - - Returns: - Output tensor for the block. - """ - filters1, filters2 = filters - if backend.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = layers.Conv2D(filters1, kernel_size, - padding='same', use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - name=conv_name_base + '2a')(input_tensor) - x = layers.BatchNormalization( - axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, - name=bn_name_base + '2a')(x, training=training) - x = layers.Activation('relu')(x) - - x = layers.Conv2D(filters2, kernel_size, - padding='same', use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - name=conv_name_base + '2b')(x) - x = layers.BatchNormalization( - axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, - name=bn_name_base + '2b')(x, training=training) - - x = layers.add([x, input_tensor]) - x = layers.Activation('relu')(x) - return x - - -def conv_building_block(input_tensor, - kernel_size, - filters, - stage, - block, - strides=(2, 2), - training=None): - """A block that has a conv layer at shortcut. - - Arguments: - input_tensor: input tensor - kernel_size: default 3, the kernel size of - middle conv layer at main path - filters: list of integers, the filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: current block label, used for generating layer names - strides: Strides for the first conv layer in the block. - training: Only used if training keras model with Estimator. In other - scenarios it is handled automatically. - - Returns: - Output tensor for the block. - - Note that from stage 3, - the first conv layer at main path is with strides=(2, 2) - And the shortcut should have strides=(2, 2) as well - """ - filters1, filters2 = filters - if tf.keras.backend.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = layers.Conv2D(filters1, kernel_size, strides=strides, - padding='same', use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - name=conv_name_base + '2a')(input_tensor) - x = layers.BatchNormalization( - axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, - name=bn_name_base + '2a')(x, training=training) - x = layers.Activation('relu')(x) - - x = layers.Conv2D(filters2, kernel_size, padding='same', use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - name=conv_name_base + '2b')(x) - x = layers.BatchNormalization( - axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, - name=bn_name_base + '2b')(x, training=training) - - shortcut = layers.Conv2D(filters2, (1, 1), strides=strides, use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - name=conv_name_base + '1')(input_tensor) - shortcut = layers.BatchNormalization( - axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, - name=bn_name_base + '1')(shortcut, training=training) - - x = layers.add([x, shortcut]) - x = layers.Activation('relu')(x) - return x - - -def resnet_block(input_tensor, - size, - kernel_size, - filters, - stage, - conv_strides=(2, 2), - training=None): - """A block which applies conv followed by multiple identity blocks. - - Arguments: - input_tensor: input tensor - size: integer, number of constituent conv/identity building blocks. - A conv block is applied once, followed by (size - 1) identity blocks. - kernel_size: default 3, the kernel size of - middle conv layer at main path - filters: list of integers, the filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - conv_strides: Strides for the first conv layer in the block. - training: Only used if training keras model with Estimator. In other - scenarios it is handled automatically. - - Returns: - Output tensor after applying conv and identity blocks. - """ - - x = conv_building_block(input_tensor, kernel_size, filters, stage=stage, - strides=conv_strides, block='block_0', - training=training) - for i in range(size - 1): - x = identity_building_block(x, kernel_size, filters, stage=stage, - block='block_%d' % (i + 1), training=training) - return x - - -def resnet(num_blocks, classes=10, training=None): - """Instantiates the ResNet architecture. - - Arguments: - num_blocks: integer, the number of conv/identity blocks in each block. - The ResNet contains 3 blocks with each block containing one conv block - followed by (layers_per_block - 1) number of idenity blocks. Each - conv/idenity block has 2 convolutional layers. With the input - convolutional layer and the pooling layer towards the end, this brings - the total size of the network to (6*num_blocks + 2) - classes: optional number of classes to classify images into - training: Only used if training keras model with Estimator. In other - scenarios it is handled automatically. - - Returns: - A Keras model instance. - """ - - input_shape = (32, 32, 3) - img_input = layers.Input(shape=input_shape) - - if backend.image_data_format() == 'channels_first': - x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)), - name='transpose')(img_input) - bn_axis = 1 - else: # channel_last - x = img_input - bn_axis = 3 - - x = layers.ZeroPadding2D(padding=(1, 1), name='conv1_pad')(x) - x = layers.Conv2D(16, (3, 3), - strides=(1, 1), - padding='valid', use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - name='conv1')(x) - x = layers.BatchNormalization(axis=bn_axis, - momentum=BATCH_NORM_DECAY, - epsilon=BATCH_NORM_EPSILON, - name='bn_conv1',)(x, training=training) - x = layers.Activation('relu')(x) - - x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[16, 16], - stage=2, conv_strides=(1, 1), training=training) - - x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[32, 32], - stage=3, conv_strides=(2, 2), training=training) - - x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[64, 64], - stage=4, conv_strides=(2, 2), training=training) - - rm_axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3] - x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x) - x = layers.Dense(classes, - activation='softmax', - kernel_initializer=initializers.RandomNormal(stddev=0.01), - kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), - name='fc10')(x) - - inputs = img_input - # Create model. - model = tf.keras.models.Model(inputs, x, name='resnet56') - - return model - - -resnet20 = functools.partial(resnet, num_blocks=3) -resnet32 = functools.partial(resnet, num_blocks=5) -resnet56 = functools.partial(resnet, num_blocks=9) -resnet10 = functools.partial(resnet, num_blocks=110) diff --git a/official/benchmark/models/resnet_cifar_test.py b/official/benchmark/models/resnet_cifar_test.py deleted file mode 100644 index c160f44eca1b6faf9def08860ebbdc6403d352e3..0000000000000000000000000000000000000000 --- a/official/benchmark/models/resnet_cifar_test.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Test the keras ResNet model with Cifar data.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tempfile - -import tensorflow as tf - -from tensorflow.python.eager import context -from tensorflow.python.platform import googletest -from official.benchmark.models import cifar_preprocessing -from official.benchmark.models import resnet_cifar_main -from official.utils.testing import integration - - -class KerasCifarTest(googletest.TestCase): - """Unit tests for Keras ResNet with Cifar.""" - - _extra_flags = [ - "-batch_size", "4", - "-train_steps", "1", - "-use_synthetic_data", "true" - ] - _tempdir = None - - def get_temp_dir(self): - if not self._tempdir: - self._tempdir = tempfile.mkdtemp(dir=googletest.GetTempDir()) - return self._tempdir - - @classmethod - def setUpClass(cls): # pylint: disable=invalid-name - super(KerasCifarTest, cls).setUpClass() - resnet_cifar_main.define_cifar_flags() - - def setUp(self): - super(KerasCifarTest, self).setUp() - cifar_preprocessing.NUM_IMAGES["validation"] = 4 - - def tearDown(self): - super(KerasCifarTest, self).tearDown() - tf.io.gfile.rmtree(self.get_temp_dir()) - - def test_end_to_end_no_dist_strat(self): - """Test Keras model with 1 GPU, no distribution strategy.""" - - extra_flags = [ - "-distribution_strategy", "off", - "-model_dir", "keras_cifar_no_dist_strat", - "-data_format", "channels_last", - ] - extra_flags = extra_flags + self._extra_flags - - integration.run_synthetic( - main=resnet_cifar_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_graph_no_dist_strat(self): - """Test Keras model in legacy graph mode with 1 GPU, no dist strat.""" - extra_flags = [ - "-enable_eager", "false", - "-distribution_strategy", "off", - "-model_dir", "keras_cifar_graph_no_dist_strat", - "-data_format", "channels_last", - ] - extra_flags = extra_flags + self._extra_flags - - integration.run_synthetic( - main=resnet_cifar_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_1_gpu(self): - """Test Keras model with 1 GPU.""" - - if context.num_gpus() < 1: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(1, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "1", - "-distribution_strategy", "mirrored", - "-model_dir", "keras_cifar_1_gpu", - "-data_format", "channels_last", - ] - extra_flags = extra_flags + self._extra_flags - - integration.run_synthetic( - main=resnet_cifar_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_graph_1_gpu(self): - """Test Keras model in legacy graph mode with 1 GPU.""" - if context.num_gpus() < 1: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(1, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "1", - "-noenable_eager", - "-distribution_strategy", "mirrored", - "-model_dir", "keras_cifar_graph_1_gpu", - "-data_format", "channels_last", - ] - extra_flags = extra_flags + self._extra_flags - - integration.run_synthetic( - main=resnet_cifar_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_2_gpu(self): - """Test Keras model with 2 GPUs.""" - - if context.num_gpus() < 2: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(2, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "2", - "-distribution_strategy", "mirrored", - "-model_dir", "keras_cifar_2_gpu", - ] - extra_flags = extra_flags + self._extra_flags - - integration.run_synthetic( - main=resnet_cifar_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_graph_2_gpu(self): - """Test Keras model in legacy graph mode with 2 GPUs.""" - if context.num_gpus() < 2: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(2, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "2", - "-enable_eager", "false", - "-distribution_strategy", "mirrored", - "-model_dir", "keras_cifar_graph_2_gpu", - ] - extra_flags = extra_flags + self._extra_flags - - integration.run_synthetic( - main=resnet_cifar_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - -if __name__ == "__main__": - googletest.main() diff --git a/official/benchmark/models/resnet_imagenet_main.py b/official/benchmark/models/resnet_imagenet_main.py deleted file mode 100644 index 5a3cd503126e8796aed8a59164e9dcd6bef9c1dc..0000000000000000000000000000000000000000 --- a/official/benchmark/models/resnet_imagenet_main.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Runs a ResNet model on the ImageNet dataset.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from absl import app -from absl import flags -from absl import logging -import tensorflow as tf - -import tensorflow_model_optimization as tfmot -from official.modeling import performance -from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils -from official.utils.misc import keras_utils -from official.utils.misc import model_helpers -from official.vision.image_classification import test_utils -from official.vision.image_classification.resnet import common -from official.vision.image_classification.resnet import imagenet_preprocessing -from official.vision.image_classification.resnet import resnet_model - - -def run(flags_obj): - """Run ResNet ImageNet training and eval loop using native Keras APIs. - - Args: - flags_obj: An object containing parsed flag values. - - Raises: - ValueError: If fp16 is passed as it is not currently supported. - NotImplementedError: If some features are not currently supported. - - Returns: - Dictionary of training and eval stats. - """ - keras_utils.set_session_config( - enable_xla=flags_obj.enable_xla) - - # Execute flag override logic for better model performance - if flags_obj.tf_gpu_thread_mode: - keras_utils.set_gpu_thread_mode_and_count( - per_gpu_thread_count=flags_obj.per_gpu_thread_count, - gpu_thread_mode=flags_obj.tf_gpu_thread_mode, - num_gpus=flags_obj.num_gpus, - datasets_num_private_threads=flags_obj.datasets_num_private_threads) - common.set_cudnn_batchnorm_mode() - - dtype = flags_core.get_tf_dtype(flags_obj) - performance.set_mixed_precision_policy( - flags_core.get_tf_dtype(flags_obj), - flags_core.get_loss_scale(flags_obj, default_for_fp16=128)) - - data_format = flags_obj.data_format - if data_format is None: - data_format = ('channels_first' if tf.config.list_physical_devices('GPU') - else 'channels_last') - tf.keras.backend.set_image_data_format(data_format) - - # Configures cluster spec for distribution strategy. - _ = distribution_utils.configure_cluster(flags_obj.worker_hosts, - flags_obj.task_index) - - strategy = distribution_utils.get_distribution_strategy( - distribution_strategy=flags_obj.distribution_strategy, - num_gpus=flags_obj.num_gpus, - all_reduce_alg=flags_obj.all_reduce_alg, - num_packs=flags_obj.num_packs, - tpu_address=flags_obj.tpu) - - if strategy: - # flags_obj.enable_get_next_as_optional controls whether enabling - # get_next_as_optional behavior in DistributedIterator. If true, last - # partial batch can be supported. - strategy.extended.experimental_enable_get_next_as_optional = ( - flags_obj.enable_get_next_as_optional - ) - - strategy_scope = distribution_utils.get_strategy_scope(strategy) - - # pylint: disable=protected-access - if flags_obj.use_synthetic_data: - input_fn = common.get_synth_input_fn( - height=imagenet_preprocessing.DEFAULT_IMAGE_SIZE, - width=imagenet_preprocessing.DEFAULT_IMAGE_SIZE, - num_channels=imagenet_preprocessing.NUM_CHANNELS, - num_classes=imagenet_preprocessing.NUM_CLASSES, - dtype=dtype, - drop_remainder=True) - else: - input_fn = imagenet_preprocessing.input_fn - - # When `enable_xla` is True, we always drop the remainder of the batches - # in the dataset, as XLA-GPU doesn't support dynamic shapes. - drop_remainder = flags_obj.enable_xla - - # Current resnet_model.resnet50 input format is always channel-last. - # We use keras_application mobilenet model which input format is depends on - # the keras beckend image data format. - # This use_keras_image_data_format flags indicates whether image preprocessor - # output format should be same as the keras backend image data format or just - # channel-last format. - use_keras_image_data_format = (flags_obj.model == 'mobilenet') - train_input_dataset = input_fn( - is_training=True, - data_dir=flags_obj.data_dir, - batch_size=flags_obj.batch_size, - parse_record_fn=imagenet_preprocessing.get_parse_record_fn( - use_keras_image_data_format=use_keras_image_data_format), - datasets_num_private_threads=flags_obj.datasets_num_private_threads, - dtype=dtype, - drop_remainder=drop_remainder, - tf_data_experimental_slack=flags_obj.tf_data_experimental_slack, - training_dataset_cache=flags_obj.training_dataset_cache, - ) - - eval_input_dataset = None - if not flags_obj.skip_eval: - eval_input_dataset = input_fn( - is_training=False, - data_dir=flags_obj.data_dir, - batch_size=flags_obj.batch_size, - parse_record_fn=imagenet_preprocessing.get_parse_record_fn( - use_keras_image_data_format=use_keras_image_data_format), - dtype=dtype, - drop_remainder=drop_remainder) - - lr_schedule = common.PiecewiseConstantDecayWithWarmup( - batch_size=flags_obj.batch_size, - epoch_size=imagenet_preprocessing.NUM_IMAGES['train'], - warmup_epochs=common.LR_SCHEDULE[0][1], - boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]), - multipliers=list(p[0] for p in common.LR_SCHEDULE), - compute_lr_on_cpu=True) - steps_per_epoch = ( - imagenet_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size) - - with strategy_scope: - if flags_obj.optimizer == 'resnet50_default': - optimizer = common.get_optimizer(lr_schedule) - elif flags_obj.optimizer == 'mobilenet_default': - initial_learning_rate = \ - flags_obj.initial_learning_rate_per_sample * flags_obj.batch_size - optimizer = tf.keras.optimizers.SGD( - learning_rate=tf.keras.optimizers.schedules.ExponentialDecay( - initial_learning_rate, - decay_steps=steps_per_epoch * flags_obj.num_epochs_per_decay, - decay_rate=flags_obj.lr_decay_factor, - staircase=True), - momentum=0.9) - if flags_obj.fp16_implementation == 'graph_rewrite': - # Note: when flags_obj.fp16_implementation == "graph_rewrite", dtype as - # determined by flags_core.get_tf_dtype(flags_obj) would be 'float32' - # which will ensure tf.compat.v2.keras.mixed_precision and - # tf.train.experimental.enable_mixed_precision_graph_rewrite do not double - # up. - optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite( - optimizer) - - # TODO(hongkuny): Remove trivial model usage and move it to benchmark. - if flags_obj.use_trivial_model: - model = test_utils.trivial_model(imagenet_preprocessing.NUM_CLASSES) - elif flags_obj.model == 'resnet50_v1.5': - model = resnet_model.resnet50( - num_classes=imagenet_preprocessing.NUM_CLASSES) - elif flags_obj.model == 'mobilenet': - # TODO(kimjaehong): Remove layers attribute when minimum TF version - # support 2.0 layers by default. - model = tf.keras.applications.mobilenet.MobileNet( - weights=None, - classes=imagenet_preprocessing.NUM_CLASSES, - layers=tf.keras.layers) - if flags_obj.pretrained_filepath: - model.load_weights(flags_obj.pretrained_filepath) - - if flags_obj.pruning_method == 'polynomial_decay': - if dtype != tf.float32: - raise NotImplementedError( - 'Pruning is currently only supported on dtype=tf.float32.') - pruning_params = { - 'pruning_schedule': - tfmot.sparsity.keras.PolynomialDecay( - initial_sparsity=flags_obj.pruning_initial_sparsity, - final_sparsity=flags_obj.pruning_final_sparsity, - begin_step=flags_obj.pruning_begin_step, - end_step=flags_obj.pruning_end_step, - frequency=flags_obj.pruning_frequency), - } - model = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params) - elif flags_obj.pruning_method: - raise NotImplementedError( - 'Only polynomial_decay is currently supported.') - - model.compile( - loss='sparse_categorical_crossentropy', - optimizer=optimizer, - metrics=(['sparse_categorical_accuracy'] - if flags_obj.report_accuracy_metrics else None), - run_eagerly=flags_obj.run_eagerly) - - train_epochs = flags_obj.train_epochs - - callbacks = common.get_callbacks( - pruning_method=flags_obj.pruning_method, - enable_checkpoint_and_export=flags_obj.enable_checkpoint_and_export, - model_dir=flags_obj.model_dir) - - # if mutliple epochs, ignore the train_steps flag. - if train_epochs <= 1 and flags_obj.train_steps: - steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch) - train_epochs = 1 - - num_eval_steps = ( - imagenet_preprocessing.NUM_IMAGES['validation'] // flags_obj.batch_size) - - validation_data = eval_input_dataset - if flags_obj.skip_eval: - # Only build the training graph. This reduces memory usage introduced by - # control flow ops in layers that have different implementations for - # training and inference (e.g., batch norm). - if flags_obj.set_learning_phase_to_train: - # TODO(haoyuzhang): Understand slowdown of setting learning phase when - # not using distribution strategy. - tf.keras.backend.set_learning_phase(1) - num_eval_steps = None - validation_data = None - - if not strategy and flags_obj.explicit_gpu_placement: - # TODO(b/135607227): Add device scope automatically in Keras training loop - # when not using distribition strategy. - no_dist_strat_device = tf.device('/device:GPU:0') - no_dist_strat_device.__enter__() - - history = model.fit(train_input_dataset, - epochs=train_epochs, - steps_per_epoch=steps_per_epoch, - callbacks=callbacks, - validation_steps=num_eval_steps, - validation_data=validation_data, - validation_freq=flags_obj.epochs_between_evals, - verbose=2) - - eval_output = None - if not flags_obj.skip_eval: - eval_output = model.evaluate(eval_input_dataset, - steps=num_eval_steps, - verbose=2) - - if flags_obj.pruning_method: - model = tfmot.sparsity.keras.strip_pruning(model) - if flags_obj.enable_checkpoint_and_export: - if dtype == tf.bfloat16: - logging.warning('Keras model.save does not support bfloat16 dtype.') - else: - # Keras model.save assumes a float32 input designature. - export_path = os.path.join(flags_obj.model_dir, 'saved_model') - model.save(export_path, include_optimizer=False) - - if not strategy and flags_obj.explicit_gpu_placement: - no_dist_strat_device.__exit__() - - stats = common.build_stats(history, eval_output, callbacks) - return stats - - -def define_imagenet_keras_flags(): - common.define_keras_flags( - model=True, - optimizer=True, - pretrained_filepath=True) - common.define_pruning_flags() - flags_core.set_defaults() - flags.adopt_module_key_flags(common) - - -def main(_): - model_helpers.apply_clean(flags.FLAGS) - stats = run(flags.FLAGS) - logging.info('Run stats:\n%s', stats) - - -if __name__ == '__main__': - logging.set_verbosity(logging.INFO) - define_imagenet_keras_flags() - app.run(main) diff --git a/official/benchmark/models/resnet_imagenet_test.py b/official/benchmark/models/resnet_imagenet_test.py deleted file mode 100644 index 45c35d539ce2d7fcd0df30ed1d520e47e51312fa..0000000000000000000000000000000000000000 --- a/official/benchmark/models/resnet_imagenet_test.py +++ /dev/null @@ -1,249 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Test the keras ResNet model with ImageNet data.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -import tensorflow as tf - -from tensorflow.python.eager import context -from official.benchmark.models import resnet_imagenet_main -from official.utils.testing import integration -from official.vision.image_classification.resnet import imagenet_preprocessing - - -@parameterized.parameters( - "resnet", - # "resnet_polynomial_decay", b/151854314 - "mobilenet", - # "mobilenet_polynomial_decay" b/151854314 -) -class KerasImagenetTest(tf.test.TestCase): - """Unit tests for Keras Models with ImageNet.""" - _default_flags_dict = [ - "-batch_size", "4", - "-train_steps", "1", - "-use_synthetic_data", "true", - "-data_format", "channels_last", - ] - _extra_flags_dict = { - "resnet": [ - "-model", "resnet50_v1.5", - "-optimizer", "resnet50_default", - ], - "resnet_polynomial_decay": [ - "-model", "resnet50_v1.5", - "-optimizer", "resnet50_default", - "-pruning_method", "polynomial_decay", - ], - "mobilenet": [ - "-model", "mobilenet", - "-optimizer", "mobilenet_default", - ], - "mobilenet_polynomial_decay": [ - "-model", "mobilenet", - "-optimizer", "mobilenet_default", - "-pruning_method", "polynomial_decay", - ], - } - _tempdir = None - - @classmethod - def setUpClass(cls): # pylint: disable=invalid-name - super(KerasImagenetTest, cls).setUpClass() - resnet_imagenet_main.define_imagenet_keras_flags() - - def setUp(self): - super(KerasImagenetTest, self).setUp() - imagenet_preprocessing.NUM_IMAGES["validation"] = 4 - self.policy = \ - tf.keras.mixed_precision.experimental.global_policy() - - def tearDown(self): - super(KerasImagenetTest, self).tearDown() - tf.io.gfile.rmtree(self.get_temp_dir()) - tf.keras.mixed_precision.experimental.set_policy(self.policy) - - def get_extra_flags_dict(self, flags_key): - return self._extra_flags_dict[flags_key] + self._default_flags_dict - - def test_end_to_end_no_dist_strat(self, flags_key): - """Test Keras model with 1 GPU, no distribution strategy.""" - - extra_flags = [ - "-distribution_strategy", "off", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_graph_no_dist_strat(self, flags_key): - """Test Keras model in legacy graph mode with 1 GPU, no dist strat.""" - extra_flags = [ - "-enable_eager", "false", - "-distribution_strategy", "off", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_1_gpu(self, flags_key): - """Test Keras model with 1 GPU.""" - - if context.num_gpus() < 1: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(1, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "1", - "-distribution_strategy", "mirrored", - "-enable_checkpoint_and_export", "1", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_1_gpu_fp16(self, flags_key): - """Test Keras model with 1 GPU and fp16.""" - - if context.num_gpus() < 1: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available" - .format(1, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "1", - "-dtype", "fp16", - "-distribution_strategy", "mirrored", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - if "polynomial_decay" in extra_flags: - self.skipTest("Pruning with fp16 is not currently supported.") - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_2_gpu(self, flags_key): - """Test Keras model with 2 GPUs.""" - - if context.num_gpus() < 2: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(2, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "2", - "-distribution_strategy", "mirrored", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_xla_2_gpu(self, flags_key): - """Test Keras model with XLA and 2 GPUs.""" - - if context.num_gpus() < 2: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(2, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "2", - "-enable_xla", "true", - "-distribution_strategy", "mirrored", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_2_gpu_fp16(self, flags_key): - """Test Keras model with 2 GPUs and fp16.""" - - if context.num_gpus() < 2: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(2, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "2", - "-dtype", "fp16", - "-distribution_strategy", "mirrored", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - if "polynomial_decay" in extra_flags: - self.skipTest("Pruning with fp16 is not currently supported.") - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - def test_end_to_end_xla_2_gpu_fp16(self, flags_key): - """Test Keras model with XLA, 2 GPUs and fp16.""" - if context.num_gpus() < 2: - self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(2, context.num_gpus())) - - extra_flags = [ - "-num_gpus", "2", - "-dtype", "fp16", - "-enable_xla", "true", - "-distribution_strategy", "mirrored", - ] - extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) - - if "polynomial_decay" in extra_flags: - self.skipTest("Pruning with fp16 is not currently supported.") - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/benchmark/models/resnet_imagenet_test_tpu.py b/official/benchmark/models/resnet_imagenet_test_tpu.py deleted file mode 100644 index 7fd72c404139b723407cc9a68c8afddd158ed691..0000000000000000000000000000000000000000 --- a/official/benchmark/models/resnet_imagenet_test_tpu.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Test the keras ResNet model with ImageNet data on TPU.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -import tensorflow as tf -from official.benchmark.models import resnet_imagenet_main -from official.utils.testing import integration -from official.vision.image_classification.resnet import imagenet_preprocessing - - -class KerasImagenetTest(tf.test.TestCase, parameterized.TestCase): - """Unit tests for Keras Models with ImageNet.""" - - _extra_flags_dict = { - "resnet": [ - "-batch_size", "4", - "-train_steps", "1", - "-use_synthetic_data", "true" - "-model", "resnet50_v1.5", - "-optimizer", "resnet50_default", - ], - "resnet_polynomial_decay": [ - "-batch_size", "4", - "-train_steps", "1", - "-use_synthetic_data", "true", - "-model", "resnet50_v1.5", - "-optimizer", "resnet50_default", - "-pruning_method", "polynomial_decay", - ], - } - _tempdir = None - - @classmethod - def setUpClass(cls): # pylint: disable=invalid-name - super(KerasImagenetTest, cls).setUpClass() - resnet_imagenet_main.define_imagenet_keras_flags() - - def setUp(self): - super(KerasImagenetTest, self).setUp() - imagenet_preprocessing.NUM_IMAGES["validation"] = 4 - self.policy = \ - tf.keras.mixed_precision.experimental.global_policy() - - def tearDown(self): - super(KerasImagenetTest, self).tearDown() - tf.io.gfile.rmtree(self.get_temp_dir()) - tf.keras.mixed_precision.experimental.set_policy(self.policy) - - @parameterized.parameters([ - "resnet", - # "resnet_polynomial_decay" b/151854314 - ]) - def test_end_to_end_tpu(self, flags_key): - """Test Keras model with TPU distribution strategy.""" - - extra_flags = [ - "-distribution_strategy", "tpu", - "-data_format", "channels_last", - "-enable_checkpoint_and_export", "1", - ] - extra_flags = extra_flags + self._extra_flags_dict[flags_key] - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - @parameterized.parameters(["resnet"]) - def test_end_to_end_tpu_bf16(self, flags_key): - """Test Keras model with TPU and bfloat16 activation.""" - - extra_flags = [ - "-distribution_strategy", "tpu", - "-data_format", "channels_last", - "-dtype", "bf16", - ] - extra_flags = extra_flags + self._extra_flags_dict[flags_key] - - integration.run_synthetic( - main=resnet_imagenet_main.run, - tmp_root=self.get_temp_dir(), - extra_flags=extra_flags - ) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/benchmark/models/shakespeare/README.md b/official/benchmark/models/shakespeare/README.md deleted file mode 100644 index 5395cc9642845ffb8bf36fdbc4f93bb450ba557f..0000000000000000000000000000000000000000 --- a/official/benchmark/models/shakespeare/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Shakespeare character LSTM model - -This is an implemention of a simple character LSTM used to generate text. - -## Instructions - -First download the source data: - -``` -wget https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt -``` - -Note that files other than shakepeare.txt can also be used to train the model to generater other text. - -Then train the model: - -```python -python3 shakespeare_main.py --training_data shakespeare.txt \ - --model_dir /tmp/shakespeare -``` - -This will place model checkpoints in `/tmp/shakespeare`, so that we can use them to make predictions. - -Then generate predictions: - -```python -python3 shakespeare_main.py --training_data shakespeare.txt \ - --model_dir /tmp/shakespeare --notrain --predict_context=ROMEO: -``` - -Change `--predict_context` and `--predict_length` to suit your needs. diff --git a/official/benchmark/models/shakespeare/__init__.py b/official/benchmark/models/shakespeare/__init__.py deleted file mode 100644 index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000 --- a/official/benchmark/models/shakespeare/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/official/benchmark/models/shakespeare/shakespeare_main.py b/official/benchmark/models/shakespeare/shakespeare_main.py deleted file mode 100644 index 6928dd1d61491acf84b969a52c7f0693617ac7f0..0000000000000000000000000000000000000000 --- a/official/benchmark/models/shakespeare/shakespeare_main.py +++ /dev/null @@ -1,313 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Runs a character LSTM model trained on Shakespeare.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import os - -# pylint: disable=wrong-import-order -from absl import app -from absl import flags -import numpy as np -import tensorflow as tf -# pylint: enable=wrong-import-order - -from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils -from official.utils.misc import keras_utils - -EMBEDDING_DIM = 256 -RNN_UNITS = 1024 -SEQ_LENGTH = 100 -# Calculated by running batch_size=1 -BATCHES_PER_EPOCH = 11043 - - -def define_flags(): - """Define the flags for the Shakespeare character LSTM.""" - flags_core.define_base(data_dir=False, - clean=False, - train_epochs=True, - epochs_between_evals=False, - stop_threshold=False, - num_gpu=True, - export_dir=False, - run_eagerly=True, - distribution_strategy=True) - - flags_core.define_performance(num_parallel_calls=False, - inter_op=False, - intra_op=False, - synthetic_data=False, - max_train_steps=False, - dtype=True, - loss_scale=True, - enable_xla=True) - - flags_core.set_defaults(train_epochs=43, - batch_size=64) - - flags.DEFINE_boolean(name='enable_eager', default=True, help='Enable eager?') - flags.DEFINE_boolean( - name='train', default=True, - help='If true trains the model.') - flags.DEFINE_string( - name='predict_context', default=None, - help='If set, makes a prediction with the given context.') - flags.DEFINE_integer( - name='predict_length', default=1000, - help='Length of the predicted text including the context.') - flags.DEFINE_integer(name='train_steps', default=None, - help='Overrides train_steps per epoch if not None.') - flags.DEFINE_integer( - name='log_steps', default=100, - help='For every log_steps, we log the timing information such as ' - 'examples per second.') - flags.DEFINE_string( - name='training_data', default=None, - help='Path to file containing the training data.') - flags.DEFINE_boolean(name='cudnn', default=True, help='Use CuDNN LSTM.') - - -def get_dataset(path_to_file, batch_size=None, seq_length=SEQ_LENGTH): - """Creates a dataset from a given text file. - - Args: - path_to_file: The path to the training data. - batch_size: Batch size to use. - seq_length: The length of the LSTM sequence. - - Returns: - A tuple, consisting of the Dataset and the class to character mapping - and character to class mapping. - """ - with tf.io.gfile.GFile(path_to_file, 'rb') as train_data: - text = train_data.read().decode(encoding='utf-8') - - # Create vocab - vocab = sorted(set(text)) - char2idx = {u: i for i, u in enumerate(vocab)} - idx2char = np.array(vocab) - - # Split text into sequence length + 1 chucks to create examples - text_as_int = np.array([char2idx[c] for c in text]) - char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) - sequences = char_dataset.batch(seq_length+1, drop_remainder=True) - - def split_input_target(chunk): - input_text = chunk[:-1] - target_text = chunk[1:] - return input_text, tf.one_hot(target_text, len(vocab)) - dataset = sequences.map(split_input_target) - dataset = dataset.shuffle(10000).repeat() - dataset = dataset.batch(batch_size, drop_remainder=True) - - return dataset, idx2char, char2idx - - -def build_model(vocab_size, - embedding_dim=EMBEDDING_DIM, - rnn_units=RNN_UNITS, - batch_size=None, - stateful=False, - use_cudnn=True): - """Builds the Shakespeare model. - - Args: - vocab_size: The number of character classes in the input. - embedding_dim: The dimension of the embedding space for each class. - rnn_units: The number of RNN units in the layer. - batch_size: When predicting, the batch size of the predictions. - stateful: If true, the LSTM is stateful. - - Returns: - A Keras Model. - """ - LSTM = functools.partial(tf.keras.layers.LSTM, implementation=2) - - # By indirecting the activation through a lambda layer, the logic to dispatch - # to CuDNN in V2 doesn't trigger and we force the LSTM to run in non-CuDNN - # mode. - lstm_activation = ('tanh' if use_cudnn else - lambda x: tf.math.tanh(x)) - - batch_shape = [batch_size if stateful else None, None] - return tf.keras.Sequential([ - tf.keras.layers.Embedding(vocab_size, embedding_dim, - batch_input_shape=batch_shape), - LSTM(rnn_units, - activation=lstm_activation, - return_sequences=True, - stateful=stateful, - recurrent_initializer='glorot_uniform'), - tf.keras.layers.Dense(vocab_size), - tf.keras.layers.Softmax(dtype=tf.float32)]) - - -def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None): - """Trains a Shakespeare model. - - Args: - flags_obj: An object containing parsed flag values.s - dataset: the training data set. - vocab_size: the number of unique character classes. - strategy: distribution strategy to use. - checkpoint_dir: if not None, the directory in which to make checkpoints. - - Returns: - The training history and callbacks. - """ - if flags_obj.train_steps: - train_steps = flags_obj.train_steps - else: - train_steps = BATCHES_PER_EPOCH // flags_obj.batch_size - strategy_scope = distribution_utils.get_strategy_scope(strategy) - - with strategy_scope: - model = build_model(vocab_size=vocab_size, batch_size=flags_obj.batch_size, - use_cudnn=flags_obj.cudnn) - - # When keras_use_ctl is False, Model.fit() automatically applies - # loss scaling so we don't need to create a LossScaleOptimizer. - model.compile( - optimizer=tf.keras.optimizers.Adam(), - loss=tf.keras.losses.CategoricalCrossentropy(), - metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'), - tf.keras.metrics.Recall(top_k=5, name='RecallAt5')], - run_eagerly=flags_obj.run_eagerly) - - callbacks = [] - if checkpoint_dir: - checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}') - checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( - filepath=checkpoint_prefix, - save_weights_only=True) - callbacks.append(checkpoint_callback) - time_callback = keras_utils.TimeHistory(flags_obj.batch_size, - flags_obj.log_steps) - callbacks.append(time_callback) - history = model.fit(dataset, - epochs=flags_obj.train_epochs, - steps_per_epoch=train_steps, - callbacks=callbacks, - verbose=2) - return history, callbacks - - -def make_prediction(checkpoint_dir, length, context, idx2char, char2idx): - """Make predictions from a Shakespeare model. - - Args: - checkpoint_dir: the directory from which to load checkpoints - length: the total length of the generated text (including the context). - context: the initial text with which the LSTM is primed. - idx2char: the character class to character mapping. - char2idx: the character to character class mapping. - - Returns: - A generated string of text of the given length. - """ - prediction_model = build_model( - vocab_size=len(idx2char), batch_size=1, stateful=True) - prediction_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir)) - prediction_model.build(tf.TensorShape([1, None])) - - input_eval = [char2idx[s] for s in context] - input_eval = tf.expand_dims(input_eval, 0) - - text_generated = [] - - prediction_model.reset_states() - for _ in range(length - len(context)): - predictions = prediction_model(input_eval) - predictions = tf.squeeze(predictions, 0) - - # We applied a softmax to the output of the model so that - # tf.keras.metrics.Recall would work. We need logits for - # tf.random.categorical, so we convert the probabilities back to log odds - predictions = tf.math.log(predictions / (1 - predictions)) - - random_output = tf.random.categorical(predictions, num_samples=1) - selected_id = random_output[-1, 0].numpy() - input_eval = tf.expand_dims([selected_id], 0) - text_generated.append(idx2char[selected_id]) - - return context + ''.join(text_generated) - - -def run(flags_obj): - """Run Shakespeare training and predict. - - Args: - flags_obj: An object containing parsed flag values. - - Returns: - Dictionary with status from the run. - """ - if not flags_obj.training_data: - raise ValueError( - 'Must set the path to a training data file. e.g download the following ' - 'https://storage.googleapis.com/download.tensorflow.org/data/' - 'shakespeare.txt') - - if flags_obj.dtype == 'fp16': - policy = tf.keras.mixed_precision.experimental.Policy( - 'mixed_float16', - loss_scale=flags_core.get_loss_scale(flags_obj, - default_for_fp16='dynamic')) - tf.keras.mixed_precision.experimental.set_policy(policy) - - keras_utils.set_session_config( - enable_xla=flags_obj.enable_xla) - - strategy = distribution_utils.get_distribution_strategy( - distribution_strategy=flags_obj.distribution_strategy, - num_gpus=flags_obj.num_gpus) - - dataset, idx2char, char2idx = get_dataset(flags_obj.training_data, - batch_size=flags_obj.batch_size) - stats = {} - if flags_obj.train: - history, callbacks = train_model(flags_obj, dataset, - len(idx2char), strategy, - checkpoint_dir=flags_obj.model_dir) - - stats['history'] = history.history - stats['callbacks'] = callbacks - - if flags_obj.predict_context: - if not flags_obj.model_dir: - raise ValueError('Must set model_dir to get predictions.') - print(make_prediction(flags_obj.model_dir, - flags_obj.predict_length, - flags_obj.predict_context, - idx2char, - char2idx)) - - return stats - - -def main(_): - flags_obj = flags.FLAGS - run(flags_obj) - - -if __name__ == '__main__': - define_flags() - app.run(main) diff --git a/official/benchmark/models/synthetic_util.py b/official/benchmark/models/synthetic_util.py deleted file mode 100644 index c14d0223dc417e6b0bd220f65dc3db0291bb773c..0000000000000000000000000000000000000000 --- a/official/benchmark/models/synthetic_util.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Helper functions to generate data directly on devices.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import random -import string - -from absl import logging -import tensorflow as tf - - -# The `SyntheticDataset` is a temporary solution for generating synthetic data -# directly on devices. It is only useful for Keras with Distribution -# Strategies. We will have better support in `tf.data` or Distribution Strategy -# later. -class SyntheticDataset(object): - """A dataset that generates synthetic data on each device.""" - - def __init__(self, dataset, split_by=1): - # dataset.take(1) doesn't have GPU kernel. - with tf.device('device:CPU:0'): - tensor = tf.data.experimental.get_single_element(dataset.take(1)) - flat_tensor = tf.nest.flatten(tensor) - variable_data = [] - initializers = [] - for t in flat_tensor: - rebatched_t = tf.split(t, num_or_size_splits=split_by, axis=0)[0] - assert rebatched_t.shape.is_fully_defined(), rebatched_t.shape - v = tf.compat.v1.get_local_variable(self._random_name(), - initializer=rebatched_t) - variable_data.append(v) - initializers.append(v.initializer) - input_data = tf.nest.pack_sequence_as(tensor, variable_data) - self._iterator = SyntheticIterator(input_data, initializers) - - def _random_name(self, size=10, chars=string.ascii_uppercase + string.digits): - return ''.join(random.choice(chars) for _ in range(size)) - - def __iter__(self): - return self._iterator - - def make_one_shot_iterator(self): - return self._iterator - - def make_initializable_iterator(self): - return self._iterator - - -class SyntheticIterator(object): - """A dataset that generates synthetic data on each device.""" - - def __init__(self, input_data, initializers): - self._input_data = input_data - self._initializers = initializers - - def get_next(self): - return self._input_data - - def next(self): - return self.__next__() - - def __next__(self): - try: - return self.get_next() - except tf.errors.OutOfRangeError: - raise StopIteration - - def initialize(self): - if tf.executing_eagerly(): - return tf.no_op() - else: - return self._initializers - - -def _monkey_patch_dataset_method(strategy): - """Monkey-patch `strategy`'s `make_dataset_iterator` method.""" - def make_dataset(self, dataset): - logging.info('Using pure synthetic data.') - with self.scope(): - if self.extended._global_batch_size: # pylint: disable=protected-access - return SyntheticDataset(dataset, self.num_replicas_in_sync) - else: - return SyntheticDataset(dataset) - - def make_iterator(self, dataset): - dist_dataset = make_dataset(self, dataset) - return iter(dist_dataset) - - strategy.orig_make_dataset_iterator = strategy.make_dataset_iterator - strategy.make_dataset_iterator = make_iterator - strategy.orig_distribute_dataset = strategy.experimental_distribute_dataset - strategy.experimental_distribute_dataset = make_dataset - - -def _undo_monkey_patch_dataset_method(strategy): - if hasattr(strategy, 'orig_make_dataset_iterator'): - strategy.make_dataset_iterator = strategy.orig_make_dataset_iterator - if hasattr(strategy, 'orig_distribute_dataset'): - strategy.make_dataset_iterator = strategy.orig_distribute_dataset - - -def set_up_synthetic_data(): - _monkey_patch_dataset_method(tf.distribute.OneDeviceStrategy) - _monkey_patch_dataset_method(tf.distribute.MirroredStrategy) - _monkey_patch_dataset_method( - tf.distribute.experimental.MultiWorkerMirroredStrategy) - - -def undo_set_up_synthetic_data(): - _undo_monkey_patch_dataset_method(tf.distribute.OneDeviceStrategy) - _undo_monkey_patch_dataset_method(tf.distribute.MirroredStrategy) - _undo_monkey_patch_dataset_method( - tf.distribute.experimental.MultiWorkerMirroredStrategy) diff --git a/official/benchmark/ncf_keras_benchmark.py b/official/benchmark/ncf_keras_benchmark.py deleted file mode 100644 index 170c99a33f46f14f977182c4e8a6d7ffbf96682d..0000000000000000000000000000000000000000 --- a/official/benchmark/ncf_keras_benchmark.py +++ /dev/null @@ -1,488 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes Keras benchmarks and accuracy tests.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -from absl import flags -from absl import logging -from absl.testing import flagsaver -import tensorflow as tf -from official.benchmark import benchmark_wrappers -from official.benchmark import owner_utils -from official.benchmark.perfzero_benchmark import PerfZeroBenchmark -from official.recommendation import ncf_common -from official.recommendation import ncf_keras_main -from official.utils.flags import core - -FLAGS = flags.FLAGS -NCF_DATA_DIR_NAME = 'movielens_data' -NCF_TF_REGRESSION_DATA_DIR_NAME = 'gs://tf-regression/ncf/data' - - -class NCFKerasBenchmarkBase(PerfZeroBenchmark): - """Base class for NCF model benchmark.""" - - def __init__(self, output_dir=None, default_flags=None, **kwargs): - super(NCFKerasBenchmarkBase, self).__init__(output_dir, default_flags, - **kwargs) - - # Run all benchmarks with ml_perf flag. - self.default_flags['ml_perf'] = True - - def _setup(self): - """Sets up and resets flags before each test.""" - logging.set_verbosity(logging.INFO) - if NCFKerasBenchmarkBase.local_flags is None: - ncf_common.define_ncf_flags() - # Loads flags to get defaults to then override. List cannot be empty. - flags.FLAGS(['foo']) - core.set_defaults(**self.default_flags) - saved_flag_values = flagsaver.save_flag_values() - NCFKerasBenchmarkBase.local_flags = saved_flag_values - else: - flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, hr_at_10_min=0, hr_at_10_max=0): - start_time_sec = time.time() - stats = ncf_keras_main.run_ncf(FLAGS) - wall_time_sec = time.time() - start_time_sec - - metrics = [] - metrics.append({'name': 'exp_per_second', - 'value': stats['avg_exp_per_second']}) - - if hr_at_10_min > 0: - metrics.append({'name': 'hr_at_10', - 'value': stats['eval_hit_rate'], - 'min_value': hr_at_10_min, - 'max_value': hr_at_10_max}) - - metrics.append({'name': 'train_loss', - 'value': stats['loss']}) - - self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics) - - -class NCFKerasAccuracy(NCFKerasBenchmarkBase): - """Benchmark NCF model using real data.""" - - def __init__(self, - output_dir=None, - root_data_dir=None, - default_flags=None, - **kwargs): - root_data_dir = root_data_dir if root_data_dir else '' - default_flags = {} - default_flags['dataset'] = 'ml-20m' - default_flags['num_gpus'] = 1 - default_flags['train_epochs'] = 10 - default_flags['clean'] = True - default_flags['batch_size'] = 99000 - default_flags['learning_rate'] = 0.00382059 - default_flags['beta1'] = 0.783529 - default_flags['beta2'] = 0.909003 - default_flags['epsilon'] = 1.45439e-07 - default_flags['layers'] = [256, 256, 128, 64] - default_flags['num_factors'] = 64 - default_flags['hr_threshold'] = 0.635 - default_flags['ml_perf'] = True - default_flags['use_synthetic_data'] = False - default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME) - - super(NCFKerasAccuracy, self).__init__( - output_dir=output_dir, - default_flags=default_flags, - **kwargs) - - def _run_and_report_benchmark_mlperf_like(self): - """Run test and report results. - - Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but - we want it recorded. - """ - self._run_and_report_benchmark(hr_at_10_min=0.61) - - def _run_and_report_benchmark(self, hr_at_10_min=0.630, hr_at_10_max=0.645): - """Run test and report results. - - Note: Target is 0.635, but some runs are below that level. Until we have - multi-run tests, we have to accept a lower target. - - Args: - hr_at_10_min: Minimum acceptable hr@10 value. - hr_at_10_max: Maximum acceptable hr@10 value. - """ - super(NCFKerasAccuracy, self)._run_and_report_benchmark( - hr_at_10_min=hr_at_10_min, - hr_at_10_max=hr_at_10_max) - - def _set_8_gpu_defaults(self): - FLAGS.num_gpus = 8 - FLAGS.learning_rate = 0.0045 - FLAGS.beta1 = 0.25 - FLAGS.beta2 = 0.5 - FLAGS.epsilon = 1e-8 - FLAGS.train_epochs = 14 - FLAGS.batch_size = 99000 - FLAGS.eval_batch_size = 160000 - FLAGS.train_dataset_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME, - 'training_cycle_*/*') - FLAGS.eval_dataset_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME, - 'eval_data/*') - FLAGS.input_meta_data_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME, - 'metadata') - FLAGS.data_dir = NCF_TF_REGRESSION_DATA_DIR_NAME - - def benchmark_1_gpu_early_stop(self): - self._setup() - FLAGS.early_stopping = True - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_early_stop(self): - self._setup() - FLAGS.distribution_strategy = 'off' - FLAGS.early_stopping = True - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self): - self._setup() - FLAGS.distribution_strategy = 'off' - FLAGS.early_stopping = True - FLAGS.run_eagerly = True - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_early_stop(self): - self._setup() - FLAGS.early_stopping = True - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def benchmark_1_gpu_ctl_early_stop(self): - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.early_stopping = True - self._run_and_report_benchmark() - - def benchmark_1_gpu_ctl_run_eagerly_early_stop(self): - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.early_stopping = True - FLAGS.run_eagerly = True - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_ctl_early_stop(self): - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.early_stopping = True - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def benchmark_2_gpus_early_stop(self): - self._setup() - FLAGS.early_stopping = True - FLAGS.num_gpus = 2 - FLAGS.eval_batch_size = 160000 - self._run_and_report_benchmark() - - def benchmark_2_gpus_ctl_early_stop(self): - """NCF with custom training loop. Works only in TF 2.0.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.early_stopping = True - FLAGS.num_gpus = 2 - FLAGS.eval_batch_size = 160000 - self._run_and_report_benchmark() - -############################################# -# Tests below with mlperf in the test name are of two types: -# 1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission. -# 2) 8 GPU tests are based on MLPerf 0.5 and use NVIDIA's hyper parameters. -# -# The purpose of both is to get a number to compare to existing results. To do -# this the number of epochs is held constant rather than a race to a given -# accuracy. The accuracy validation is done by the "early_stop" tests. -############################################# - - def benchmark_1_gpu_mlperf_like(self): - """1 GPU using keras fit/compile.""" - self._setup() - FLAGS.train_epochs = 7 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_no_dist_strat_mlperf_like(self): - """1 GPU using compile/fit without dist_strat.""" - self._setup() - FLAGS.train_epochs = 7 - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self): - self._setup() - FLAGS.train_epochs = 7 - FLAGS.distribution_strategy = 'off' - FLAGS.run_eagerly = True - self._run_and_report_benchmark_mlperf_like() - - def benchmark_xla_1_gpu_mlperf_like(self): - """1 GPU using compile/fit with XLA.""" - self._setup() - FLAGS.train_epochs = 7 - FLAGS.enable_xla = True - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_ctl_mlperf_like(self): - """1 GPU using CTL.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.train_epochs = 7 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_ctl_fp16_mlperf_like(self): - """1 GPU using CTL and FP16.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.train_epochs = 7 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_fp16_mlperf_like(self): - """1 GPU using FP16.""" - self._setup() - FLAGS.train_epochs = 7 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_ctl_fp16_graph_rewrite_mlperf_like(self): - """1 GPU using CTL and FP16 graph rewrite.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.train_epochs = 7 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_fp16_graph_rewrite_mlperf_like(self): - """1 GPU using FP16 graph rewrite.""" - self._setup() - FLAGS.train_epochs = 7 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self): - """1 GPU using CTL with eager and distribution strategy.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.run_eagerly = True - FLAGS.train_epochs = 7 - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_ctl_mlperf_like(self): - """1 GPU using CTL with XLA.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.enable_xla = True - FLAGS.train_epochs = 7 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_xla_1_gpu_fp16_mlperf_like(self): - """1 GPU using with XLA and FP16.""" - self._setup() - FLAGS.enable_xla = True - FLAGS.train_epochs = 7 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_xla_1_gpu_ctl_fp16_mlperf_like(self): - """1 GPU using CTL with XLA and FP16.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.enable_xla = True - FLAGS.train_epochs = 7 - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_8_gpu_mlperf_like(self): - """8 GPU using keras fit/compile.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.train_epochs = 17 - FLAGS.batch_size = 1048576 - FLAGS.eval_batch_size = 160000 - FLAGS.learning_rate = 0.0045 - FLAGS.beta1 = 0.25 - FLAGS.beta2 = 0.5 - FLAGS.epsilon = 1e-8 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_8_gpu_ctl_mlperf_like(self): - """8 GPU using CTL.""" - self._setup() - FLAGS.keras_use_ctl = True - FLAGS.num_gpus = 8 - FLAGS.train_epochs = 17 - FLAGS.batch_size = 1048576 - FLAGS.eval_batch_size = 160000 - FLAGS.learning_rate = 0.0045 - FLAGS.beta1 = 0.25 - FLAGS.beta2 = 0.5 - FLAGS.epsilon = 1e-8 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_8_gpu_tf_data_ctl_mlperf_like(self): - """8 GPU using CTL.""" - self._setup() - self._set_8_gpu_defaults() - FLAGS.keras_use_ctl = True - self._run_and_report_benchmark_mlperf_like() - - def benchmark_8_gpu_tf_data_fp16_mlperf_like(self): - """8 GPU FP16.""" - self._setup() - self._set_8_gpu_defaults() - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_8_gpu_tf_data_ctl_fp16_mlperf_like(self): - """8 GPU FP16 using CTL.""" - self._setup() - self._set_8_gpu_defaults() - FLAGS.keras_use_ctl = True - FLAGS.dtype = 'fp16' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - def benchmark_8_gpu_tf_data_ctl_fp16_graph_rewrite_mlperf_like(self): - """8 GPU FP16 graph rewrite using CTL.""" - self._setup() - self._set_8_gpu_defaults() - FLAGS.keras_use_ctl = True - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.loss_scale = 8192 - self._run_and_report_benchmark_mlperf_like() - - -class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase): - """NCF Keras throughput benchmarks.""" - - def __init__(self, - output_dir=None, - root_data_dir=None, - default_flags=None, - **kwargs): - - root_data_dir = root_data_dir if root_data_dir else '' - default_flags = {} - default_flags['dataset'] = 'ml-20m' - default_flags['num_gpus'] = 1 - default_flags['train_epochs'] = 14 - default_flags['clean'] = True - default_flags['batch_size'] = 99000 - default_flags['eval_batch_size'] = 160000 - default_flags['learning_rate'] = 0.00382059 - default_flags['beta1'] = 0.783529 - default_flags['beta2'] = 0.909003 - default_flags['epsilon'] = 1.45439e-07 - default_flags['layers'] = [256, 256, 128, 64] - default_flags['num_factors'] = 64 - default_flags['hr_threshold'] = 0.635 - default_flags['ml_perf'] = True - default_flags['use_synthetic_data'] = False - default_flags['train_dataset_path'] = os.path.join( - NCF_TF_REGRESSION_DATA_DIR_NAME, 'training_cycle_*/*') - default_flags['eval_dataset_path'] = os.path.join( - NCF_TF_REGRESSION_DATA_DIR_NAME, 'eval_data/*') - default_flags['input_meta_data_path'] = os.path.join( - NCF_TF_REGRESSION_DATA_DIR_NAME, 'metadata') - default_flags['data_dir'] = NCF_TF_REGRESSION_DATA_DIR_NAME - - super(NCFKerasBenchmarkReal, self).__init__( - output_dir=output_dir, default_flags=default_flags, **kwargs) - - def benchmark_2x2_tpu(self): - """2x2 TPU using CTL with distribution strategy.""" - self._setup() - FLAGS.distribution_strategy = 'tpu' - FLAGS.keras_use_ctl = True - FLAGS.num_gpus = 0 - FLAGS.train_epochs = 1 - self._run_and_report_benchmark() - - @owner_utils.Owner('tf-graph-compiler') - def benchmark_2x2_tpu_mlir(self): - """2x2 TPU using CTL with distribution strategy using the MLIR bridge.""" - self._setup() - FLAGS.distribution_strategy = 'tpu' - FLAGS.keras_use_ctl = True - FLAGS.num_gpus = 0 - FLAGS.train_epochs = 1 - tf.config.experimental.enable_mlir_bridge() - self._run_and_report_benchmark() - - -class NCFKerasSynth(NCFKerasBenchmarkBase): - """Benchmark NCF model using synthetic data.""" - - def __init__(self, - output_dir=None, - default_flags=None, - **kwargs): - - default_flags = {} - default_flags['dataset'] = 'ml-20m' - default_flags['num_gpus'] = 1 - default_flags['train_epochs'] = 8 - default_flags['batch_size'] = 99000 - default_flags['eval_batch_size'] = 160000 - default_flags['learning_rate'] = 0.00382059 - default_flags['beta1'] = 0.783529 - default_flags['beta2'] = 0.909003 - default_flags['epsilon'] = 1.45439e-07 - default_flags['layers'] = [256, 256, 128, 64] - default_flags['num_factors'] = 64 - default_flags['hr_threshold'] = 0.635 - default_flags['use_synthetic_data'] = True - - super(NCFKerasSynth, self).__init__( - output_dir=output_dir, - default_flags=default_flags, - **kwargs) - - def benchmark_1_gpu(self): - self._setup() - self._run_and_report_benchmark() - - def benchmark_2_gpus(self): - self._setup() - FLAGS.num_gpus = 2 - self._run_and_report_benchmark() - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/nhnet_benchmark.py b/official/benchmark/nhnet_benchmark.py deleted file mode 100644 index 7eac36b204a4f064216fb4c81effff06d8c7e6f0..0000000000000000000000000000000000000000 --- a/official/benchmark/nhnet_benchmark.py +++ /dev/null @@ -1,148 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes benchmark testing for bert pretraining.""" -# pylint: disable=line-too-long -from __future__ import print_function - -import time -from typing import Optional - -from absl import flags -import tensorflow as tf - -from official.benchmark import benchmark_wrappers -from official.benchmark import owner_utils -from official.benchmark import perfzero_benchmark -from official.nlp.nhnet import trainer -from official.utils.flags import core as flags_core - -MIN_LOSS = 0.40 -MAX_LOSS = 0.55 -NHNET_DATA = 'gs://tf-perfzero-data/nhnet/v1/processed/train.tfrecord*' -PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12/bert_model.ckpt' - -FLAGS = flags.FLAGS - - -class NHNetBenchmark(perfzero_benchmark.PerfZeroBenchmark): - """Base benchmark class for NHNet.""" - - def __init__(self, output_dir=None, default_flags=None, tpu=None, **kwargs): - self.default_flags = default_flags or {} - flag_methods = trainer.define_flags() - super(NHNetBenchmark, self).__init__( - output_dir=output_dir, - default_flags=default_flags, - flag_methods=flag_methods, - tpu=tpu, - **kwargs) - - def _report_benchmark(self, - stats, - wall_time_sec, - max_value=None, - min_value=None): - """Report benchmark results by writing to local protobuf file. - - Args: - stats: dict returned from keras models with known entries. - wall_time_sec: the during of the benchmark execution in seconds - max_value: highest passing level. - min_value: lowest passing level. - """ - - metrics = [] - metrics.append({ - 'name': 'training_loss', - 'value': stats['training_loss'], - 'min_value': min_value, - 'max_value': max_value - }) - # These metrics are placeholders to avoid PerfZero failure. - metrics.append({ - 'name': 'exp_per_second', - 'value': 0.0, - }) - metrics.append({ - 'name': 'startup_time', - 'value': 9999., - }) - flags_str = flags_core.get_nondefault_flags_as_str() - self.report_benchmark( - iters=-1, - wall_time=wall_time_sec, - metrics=metrics, - extras={'flags': flags_str}) - - -class NHNetAccuracyBenchmark(NHNetBenchmark): - """Benchmark accuracy tests for NHNet.""" - - def __init__(self, - output_dir: Optional[str] = None, - tpu: Optional[str] = None, - **kwargs): - default_flags = dict( - mode='train', - train_file_pattern=NHNET_DATA, - train_batch_size=1024, - model_type='nhnet', - len_title=15, - len_passage=200, - num_encoder_layers=12, - num_decoder_layers=12, - num_nhnet_articles=5, - steps_per_loop=1000, - params_override='init_from_bert2bert=false') - super(NHNetAccuracyBenchmark, self).__init__( - output_dir=output_dir, default_flags=default_flags, tpu=tpu, **kwargs) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, max_value=MAX_LOSS, min_value=MIN_LOSS): - """Runs and reports the benchmark given the provided configuration.""" - start_time_sec = time.time() - stats = trainer.run() - wall_time_sec = time.time() - start_time_sec - self._report_benchmark( - stats, wall_time_sec, max_value=max_value, min_value=min_value) - - @owner_utils.Owner('tf-model-garden') - def benchmark_accuracy_4x4_tpu_f32_50k_steps(self): - """Test bert pretraining with 4x4 TPU for 50k steps.""" - # This is used for accuracy test. - self._setup() - FLAGS.train_steps = 50000 - FLAGS.checkpoint_interval = FLAGS.train_steps - FLAGS.distribution_strategy = 'tpu' - FLAGS.init_checkpoint = PRETRAINED_CHECKPOINT_PATH - FLAGS.model_dir = self._get_model_dir( - 'benchmark_accuracy_4x4_tpu_bf32_50k_steps') - self._run_and_report_benchmark() - - @owner_utils.Owner('tf-model-garden') - def benchmark_accuracy_4x4_tpu_f32_1k_steps(self): - """Test bert pretraining with 4x4 TPU for 1k steps.""" - self._setup() - FLAGS.train_steps = 1000 - FLAGS.checkpoint_interval = FLAGS.train_steps - FLAGS.distribution_strategy = 'tpu' - FLAGS.model_dir = self._get_model_dir( - 'benchmark_accuracy_4x4_tpu_bf32_1k_steps') - self._run_and_report_benchmark() - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/owner_utils.py b/official/benchmark/owner_utils.py deleted file mode 100644 index e7d189d7b9a2ba05a0bd3af8cb970d52cc85f5a0..0000000000000000000000000000000000000000 --- a/official/benchmark/owner_utils.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utils to set Owner annotations on benchmarks. - -@owner_utils.Owner('owner_team/user') can be set either at the benchmark class -level / benchmark method level or both. - -Runner frameworks can use owner_utils.GetOwner(benchmark_method) to get the -actual owner. Python inheritance for the owner attribute is respected. (E.g -method level owner takes precedence over class level). - -See owner_utils_test for associated tests and more examples. - -The decorator can be applied both at the method level and at the class level. - -Simple example: -=============== - -class MLBenchmark: - - @Owner('example_id') - def benchmark_method_1_gpu(self): - return True -""" - - -def Owner(owner_name): - """Sets the owner attribute on a decorated method or class.""" - - def _Wrapper(func_or_class): - """Sets the benchmark owner attribute.""" - func_or_class.__benchmark__owner__ = owner_name - return func_or_class - - return _Wrapper - - -def GetOwner(benchmark_method_or_class): - """Gets the inherited owner attribute for this benchmark. - - Checks for existence of __benchmark__owner__. If it's not present, looks for - it in the parent class's attribute list. - - Args: - benchmark_method_or_class: A benchmark method or class. - - Returns: - string - the associated owner if present / None. - """ - if hasattr(benchmark_method_or_class, '__benchmark__owner__'): - return benchmark_method_or_class.__benchmark__owner__ - elif hasattr(benchmark_method_or_class, '__self__'): - if hasattr(benchmark_method_or_class.__self__, '__benchmark__owner__'): - return benchmark_method_or_class.__self__.__benchmark__owner__ - return None diff --git a/official/benchmark/owner_utils_test.py b/official/benchmark/owner_utils_test.py deleted file mode 100644 index 588bb80378fbf7ba5a6aec470f24fc1c4ad995b2..0000000000000000000000000000000000000000 --- a/official/benchmark/owner_utils_test.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for official.benchmark.owner_utils.""" - -from absl.testing import absltest - -from official.benchmark import owner_utils - - -@owner_utils.Owner('static_owner') -def static_function(foo=5): - return foo - - -def static_function_without_owner(foo=5): - return foo - - -class BenchmarkClassWithoutOwner: - - def method_without_owner(self): - return 100 - - @owner_utils.Owner('method_owner') - def method_with_owner(self): - return 200 - - -@owner_utils.Owner('class_owner') -class SomeBenchmarkClass: - - def method_inherited_owner(self): - return 123 - - @owner_utils.Owner('method_owner') - def method_override_owner(self): - return 345 - - -@owner_utils.Owner('new_class_owner') -class InheritedClass(SomeBenchmarkClass): - - def method_inherited_owner(self): - return 456 - - @owner_utils.Owner('new_method_owner') - def method_override_owner(self): - return 567 - - -class OwnerUtilsTest(absltest.TestCase): - """Tests to assert for owner decorator functionality.""" - - def test_owner_tag_missing(self): - self.assertEqual(None, owner_utils.GetOwner(static_function_without_owner)) - - benchmark_class = BenchmarkClassWithoutOwner() - self.assertEqual(None, - owner_utils.GetOwner(benchmark_class.method_without_owner)) - self.assertEqual(100, benchmark_class.method_without_owner()) - - self.assertEqual('method_owner', - owner_utils.GetOwner(benchmark_class.method_with_owner)) - self.assertEqual(200, benchmark_class.method_with_owner()) - - def test_owner_attributes_static(self): - self.assertEqual('static_owner', owner_utils.GetOwner(static_function)) - self.assertEqual(5, static_function(5)) - - def test_owner_attributes_per_class(self): - level1 = SomeBenchmarkClass() - self.assertEqual('class_owner', - owner_utils.GetOwner(level1.method_inherited_owner)) - self.assertEqual(123, level1.method_inherited_owner()) - - self.assertEqual('method_owner', - owner_utils.GetOwner(level1.method_override_owner)) - self.assertEqual(345, level1.method_override_owner()) - - def test_owner_attributes_inherited_class(self): - level2 = InheritedClass() - self.assertEqual('new_class_owner', - owner_utils.GetOwner(level2.method_inherited_owner)) - self.assertEqual(456, level2.method_inherited_owner()) - - self.assertEqual('new_method_owner', - owner_utils.GetOwner(level2.method_override_owner)) - self.assertEqual(567, level2.method_override_owner()) - - -if __name__ == '__main__': - absltest.main() diff --git a/official/benchmark/perfzero_benchmark.py b/official/benchmark/perfzero_benchmark.py deleted file mode 100644 index bedc1320217d1b9469333a8cdfdf70c56de34f77..0000000000000000000000000000000000000000 --- a/official/benchmark/perfzero_benchmark.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utils for creating PerfZero benchmarks.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from absl import flags -from absl import logging -from absl.testing import flagsaver -import tensorflow as tf - -FLAGS = flags.FLAGS - - -class PerfZeroBenchmark(tf.test.Benchmark): - """Common methods used in PerfZero Benchmarks. - - Handles the resetting of flags between tests, loading of default_flags, - overriding of defaults. PerfZero (OSS) runs each test in a separate - process reducing some need to reset the flags. - """ - local_flags = None - - def __init__(self, - output_dir=None, - default_flags=None, - root_data_dir=None, - flag_methods=None, - tpu=None): - """Initialize class. - - Args: - output_dir: Base directory to store all output for the test. - default_flags: Set of flags to pass to model. - root_data_dir: Optional param used by child classes to look for the - dataset. - flag_methods: Set of flag methods to run during setup. - tpu: (optional) TPU name to use in a TPU benchmark. - """ - if os.getenv('BENCHMARK_OUTPUT_DIR'): - self.output_dir = os.getenv('BENCHMARK_OUTPUT_DIR') - elif output_dir: - self.output_dir = output_dir - else: - self.output_dir = '/tmp' - self.default_flags = default_flags or {} - self.flag_methods = flag_methods or {} - - if os.getenv('BENCHMARK_TPU'): - resolved_tpu = os.getenv('BENCHMARK_TPU') - elif tpu: - resolved_tpu = tpu - else: - resolved_tpu = None - - if resolved_tpu: - # TPU models are expected to accept a --tpu=name flag. PerfZero creates - # the TPU at runtime and passes the TPU's name to this flag. - self.default_flags['tpu'] = resolved_tpu - - logging.info('root_data_dir: %s', root_data_dir) - - @property - def tpu(self): - return self.default_flags.get('tpu', None) - - def _get_model_dir(self, folder_name): - """Returns directory to store info, e.g. saved model and event log.""" - return os.path.join(self.output_dir, folder_name) - - def _setup(self): - """Sets up and resets flags before each test.""" - logging.set_verbosity(logging.INFO) - if PerfZeroBenchmark.local_flags is None: - for flag_method in self.flag_methods: - flag_method() - # Loads flags to get defaults to then override. List cannot be empty. - flags.FLAGS(['foo']) - # Overrides flag values with defaults for the class of tests. - for k, v in self.default_flags.items(): - setattr(FLAGS, k, v) - saved_flag_values = flagsaver.save_flag_values() - PerfZeroBenchmark.local_flags = saved_flag_values - else: - flagsaver.restore_flag_values(PerfZeroBenchmark.local_flags) diff --git a/official/benchmark/resnet_ctl_imagenet_benchmark.py b/official/benchmark/resnet_ctl_imagenet_benchmark.py deleted file mode 100644 index 0e70e8da969ec9b02a2de00d1973bdd2aa5f2b51..0000000000000000000000000000000000000000 --- a/official/benchmark/resnet_ctl_imagenet_benchmark.py +++ /dev/null @@ -1,452 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes CTL benchmarks and accuracy tests.""" -# pylint: disable=line-too-long,g-bad-import-order -from __future__ import print_function - -import os -import time - -from absl import flags -import tensorflow as tf - -from official.benchmark import owner_utils -from official.vision.image_classification.resnet import common -from official.vision.image_classification.resnet import resnet_ctl_imagenet_main -from official.benchmark.perfzero_benchmark import PerfZeroBenchmark -from official.benchmark import benchmark_wrappers -from official.utils.flags import core as flags_core - -MIN_TOP_1_ACCURACY = 0.76 -MAX_TOP_1_ACCURACY = 0.77 - -FLAGS = flags.FLAGS - - -class CtlBenchmark(PerfZeroBenchmark): - """Base benchmark class with methods to simplify testing.""" - - def __init__(self, output_dir=None, default_flags=None, flag_methods=None): - self.default_flags = default_flags or {} - self.flag_methods = flag_methods or {} - super(CtlBenchmark, self).__init__( - output_dir=output_dir, - default_flags=self.default_flags, - flag_methods=self.flag_methods) - - def _report_benchmark(self, - stats, - wall_time_sec, - top_1_max=None, - top_1_min=None, - total_batch_size=None, - log_steps=None, - warmup=1, - start_time_sec=None): - """Report benchmark results by writing to local protobuf file. - - Args: - stats: dict returned from keras models with known entries. - wall_time_sec: the during of the benchmark execution in seconds - top_1_max: highest passing level for top_1 accuracy. - top_1_min: lowest passing level for top_1 accuracy. - total_batch_size: Global batch-size. - log_steps: How often the log was created for stats['step_timestamp_log']. - warmup: number of entries in stats['step_timestamp_log'] to ignore. - start_time_sec: the start time of the program in seconds since epoch. - """ - - metrics = [] - if 'eval_acc' in stats: - metrics.append({ - 'name': 'accuracy_top_1', - 'value': stats['eval_acc'], - 'min_value': top_1_min, - 'max_value': top_1_max - }) - metrics.append({'name': 'eval_loss', 'value': stats['eval_loss']}) - - metrics.append({ - 'name': 'top_1_train_accuracy', - 'value': stats['train_acc'] - }) - metrics.append({'name': 'train_loss', 'value': stats['train_loss']}) - - if (warmup and 'step_timestamp_log' in stats and - len(stats['step_timestamp_log']) > warmup + 1): - # first entry in the time_log is start of step 0. The rest of the - # entries are the end of each step recorded - time_log = stats['step_timestamp_log'] - steps_elapsed = time_log[-1].batch_index - time_log[warmup].batch_index - time_elapsed = time_log[-1].timestamp - time_log[warmup].timestamp - examples_per_sec = total_batch_size * (steps_elapsed / time_elapsed) - metrics.append({'name': 'exp_per_second', 'value': examples_per_sec}) - - if 'avg_exp_per_second' in stats: - metrics.append({ - 'name': 'avg_exp_per_second', - 'value': stats['avg_exp_per_second'] - }) - - if start_time_sec and 'step_timestamp_log' in stats: - time_log = stats['step_timestamp_log'] - # time_log[0] is recorded at the beginning of the first step. - startup_time = time_log[0].timestamp - start_time_sec - metrics.append({'name': 'startup_time', 'value': startup_time}) - - flags_str = flags_core.get_nondefault_flags_as_str() - self.report_benchmark( - iters=-1, - wall_time=wall_time_sec, - metrics=metrics, - extras={'flags': flags_str}) - - -class Resnet50CtlAccuracy(CtlBenchmark): - """Benchmark accuracy tests for ResNet50 in CTL.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - """A benchmark class. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more named - arguments before updating the constructor. - """ - - flag_methods = [common.define_keras_flags] - - self.data_dir = os.path.join(root_data_dir, 'imagenet') - super(Resnet50CtlAccuracy, self).__init__( - output_dir=output_dir, flag_methods=flag_methods) - - def benchmark_8_gpu(self): - """Test Keras model with eager, dist_strat and 8 GPUs.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 128 * 8 - FLAGS.train_epochs = 90 - FLAGS.epochs_between_evals = 10 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - FLAGS.dtype = 'fp32' - self._run_and_report_benchmark() - - def benchmark_8_gpu_fp16(self): - """Test Keras model with eager, 8 GPUs with tf.keras mixed precision.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 256 * 8 - FLAGS.train_epochs = 90 - FLAGS.epochs_between_evals = 10 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16') - FLAGS.dtype = 'fp16' - self._run_and_report_benchmark() - - def benchmark_8_gpu_amp(self): - """Test Keras model with 8 GPUs and mixed precision via graph rewrite.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.data_dir - FLAGS.batch_size = 256 * 8 - FLAGS.train_epochs = 90 - FLAGS.epochs_between_evals = 10 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp') - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - self._run_and_report_benchmark() - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self): - start_time_sec = time.time() - stats = resnet_ctl_imagenet_main.run(flags.FLAGS) - wall_time_sec = time.time() - start_time_sec - - super(Resnet50CtlAccuracy, self)._report_benchmark( - stats, - wall_time_sec, - top_1_min=MIN_TOP_1_ACCURACY, - top_1_max=MAX_TOP_1_ACCURACY, - total_batch_size=FLAGS.batch_size, - log_steps=100, - start_time_sec=start_time_sec) - - -class Resnet50CtlBenchmarkBase(CtlBenchmark): - """Resnet50 benchmarks.""" - - def __init__(self, output_dir=None, default_flags=None): - flag_methods = [common.define_keras_flags] - - super(Resnet50CtlBenchmarkBase, self).__init__( - output_dir=output_dir, - flag_methods=flag_methods, - default_flags=default_flags) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self): - start_time_sec = time.time() - stats = resnet_ctl_imagenet_main.run(FLAGS) - wall_time_sec = time.time() - start_time_sec - - # Warmup means the number of logged step time entries that are excluded in - # performance report. Default to exclude 1 FLAGS.log_steps time. - super(Resnet50CtlBenchmarkBase, self)._report_benchmark( - stats, - wall_time_sec, - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - warmup=1, - start_time_sec=start_time_sec) - - def benchmark_1_gpu_no_dist_strat(self): - """Test Keras model with 1 GPU, no distribution strategy.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'off' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu(self): - """Test Keras model with 1 GPU.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_1_gpu_fp16(self): - """Test Keras model with 1 GPU with tf.keras mixed precision.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16') - FLAGS.batch_size = 256 - FLAGS.dtype = 'fp16' - self._run_and_report_benchmark() - - def benchmark_1_gpu_amp(self): - """Test Keras model with 1 GPU with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp') - FLAGS.batch_size = 256 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_amp(self): - """Test Keras model with XLA and 1 GPU with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp') - FLAGS.batch_size = 256 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def benchmark_1_gpu_eager(self): - """Test Keras model with 1 GPU in pure eager mode.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_eager') - FLAGS.batch_size = 120 - FLAGS.use_tf_function = False - FLAGS.use_tf_while_loop = False - FLAGS.single_l2_loss_op = True - self._run_and_report_benchmark() - - def benchmark_1_gpu_fp16_eager(self): - """Test Keras model with 1 GPU with fp16 and pure eager mode.""" - self._setup() - - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'one_device' - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_eager') - FLAGS.batch_size = 240 - FLAGS.dtype = 'fp16' - FLAGS.use_tf_function = False - FLAGS.use_tf_while_loop = False - FLAGS.single_l2_loss_op = True - self._run_and_report_benchmark() - - def benchmark_8_gpu(self): - """Test Keras model with 8 GPUs.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - FLAGS.batch_size = 128 * 8 # 8 GPUs - self._run_and_report_benchmark() - - def benchmark_8_gpu_fp16(self): - """Test Keras model with 8 GPUs with tf.keras mixed precision.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16') - FLAGS.batch_size = 256 * 8 # 8 GPUs - FLAGS.dtype = 'fp16' - self._run_and_report_benchmark() - - def benchmark_8_gpu_eager(self): - """Test Keras model with 8 GPUs, eager, fp32.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.use_tf_function = False - FLAGS.use_tf_while_loop = False - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_eager') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_8_gpu_eager_fp16(self): - """Test Keras model with 8 GPUs, eager, fp16.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.use_tf_function = False - FLAGS.use_tf_while_loop = False - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_eager_fp16') - FLAGS.batch_size = 128 - self._run_and_report_benchmark() - - def benchmark_8_gpu_amp(self): - """Test Keras model with 8 GPUs with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp') - FLAGS.batch_size = 256 * 8 # 8 GPUs - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_amp(self): - """Test Keras model with XLA and 8 GPUs with automatic mixed precision.""" - self._setup() - - FLAGS.num_gpus = 8 - FLAGS.distribution_strategy = 'mirrored' - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp') - FLAGS.batch_size = 256 * 8 # 8 GPUs - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def _set_df_common(self): - FLAGS.steps_per_loop = 500 - FLAGS.train_epochs = 2 - FLAGS.train_steps = None - FLAGS.skip_eval = True - FLAGS.enable_eager = True - FLAGS.enable_tensorboard = False - FLAGS.distribution_strategy = 'tpu' - FLAGS.report_accuracy_metrics = False - FLAGS.log_steps = 50 - FLAGS.single_l2_loss_op = True - FLAGS.use_tf_function = True - FLAGS.enable_checkpoint_and_export = False - - def benchmark_2x2_tpu_bf16(self): - self._setup() - self._set_df_common() - FLAGS.batch_size = 1024 - FLAGS.dtype = 'bf16' - self._run_and_report_benchmark() - - def benchmark_4x4_tpu_bf16(self): - self._setup() - self._set_df_common() - FLAGS.batch_size = 4096 - FLAGS.dtype = 'bf16' - self._run_and_report_benchmark() - - @owner_utils.Owner('tf-graph-compiler') - def benchmark_4x4_tpu_bf16_mlir(self): - """Run resnet model on 4x4 with the MLIR Bridge enabled.""" - self._setup() - self._set_df_common() - FLAGS.batch_size = 4096 - FLAGS.dtype = 'bf16' - tf.config.experimental.enable_mlir_bridge() - self._run_and_report_benchmark() - - def benchmark_8x16_tpu_bf16(self): - self._setup() - self._set_df_common() - FLAGS.batch_size = 8192 - FLAGS.dtype = 'bf16' - self._run_and_report_benchmark() - - def fill_report_object(self, stats): - super(Resnet50CtlBenchmarkBase, self).fill_report_object( - stats, total_batch_size=FLAGS.batch_size, log_steps=FLAGS.log_steps) - - -class Resnet50CtlBenchmarkSynth(Resnet50CtlBenchmarkBase): - """Resnet50 synthetic benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - def_flags = {} - def_flags['skip_eval'] = True - def_flags['use_synthetic_data'] = True - def_flags['train_steps'] = 110 - def_flags['steps_per_loop'] = 20 - def_flags['log_steps'] = 10 - - super(Resnet50CtlBenchmarkSynth, self).__init__( - output_dir=output_dir, default_flags=def_flags) - - -class Resnet50CtlBenchmarkReal(Resnet50CtlBenchmarkBase): - """Resnet50 real data benchmark tests.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - def_flags = {} - def_flags['skip_eval'] = True - def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') - def_flags['train_steps'] = 110 - def_flags['steps_per_loop'] = 20 - def_flags['log_steps'] = 10 - - super(Resnet50CtlBenchmarkReal, self).__init__( - output_dir=output_dir, default_flags=def_flags) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/retinanet_benchmark.py b/official/benchmark/retinanet_benchmark.py deleted file mode 100644 index 62119aff94de6ed9f1df3e84df6380835e0ab7a6..0000000000000000000000000000000000000000 --- a/official/benchmark/retinanet_benchmark.py +++ /dev/null @@ -1,293 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes RetinaNet benchmarks and accuracy tests.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=g-bad-import-order -import json -import time - -from absl import flags -from absl.testing import flagsaver -import tensorflow as tf -# pylint: enable=g-bad-import-order - -from official.benchmark import benchmark_wrappers -from official.benchmark import perfzero_benchmark -from official.utils.flags import core as flags_core -from official.utils.misc import keras_utils -from official.vision.detection import main as detection -from official.vision.detection.configs import base_config - -FLAGS = flags.FLAGS - -# pylint: disable=line-too-long -COCO_TRAIN_DATA = 'gs://tf-perfzero-data/coco/train*' -COCO_EVAL_DATA = 'gs://tf-perfzero-data/coco/val*' -COCO_EVAL_JSON = 'gs://tf-perfzero-data/coco/instances_val2017.json' -RESNET_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07' -# pylint: enable=line-too-long - - -class DetectionBenchmarkBase(perfzero_benchmark.PerfZeroBenchmark): - """Base class to hold methods common to test classes.""" - - def __init__(self, **kwargs): - super(DetectionBenchmarkBase, self).__init__(**kwargs) - self.timer_callback = None - - def _report_benchmark(self, stats, start_time_sec, wall_time_sec, min_ap, - max_ap, warmup): - """Report benchmark results by writing to local protobuf file. - - Args: - stats: dict returned from Detection models with known entries. - start_time_sec: the start of the benchmark execution in seconds - wall_time_sec: the duration of the benchmark execution in seconds - min_ap: Minimum detection AP constraint to verify correctness of the - model. - max_ap: Maximum detection AP accuracy constraint to verify correctness of - the model. - warmup: Number of time log entries to ignore when computing examples/sec. - """ - metrics = [{ - 'name': 'total_loss', - 'value': stats['total_loss'], - }] - if self.timer_callback: - metrics.append({ - 'name': 'exp_per_second', - 'value': self.timer_callback.get_examples_per_sec(warmup) - }) - metrics.append({ - 'name': 'startup_time', - 'value': self.timer_callback.get_startup_time(start_time_sec) - }) - else: - metrics.append({ - 'name': 'exp_per_second', - 'value': 0.0, - }) - - if 'eval_metrics' in stats: - metrics.append({ - 'name': 'AP', - 'value': stats['AP'], - 'min_value': min_ap, - 'max_value': max_ap, - }) - flags_str = flags_core.get_nondefault_flags_as_str() - self.report_benchmark( - iters=stats['total_steps'], - wall_time=wall_time_sec, - metrics=metrics, - extras={'flags': flags_str}) - - -class RetinanetBenchmarkBase(DetectionBenchmarkBase): - """Base class to hold methods common to test classes in the module.""" - - def __init__(self, **kwargs): - self.train_data_path = COCO_TRAIN_DATA - self.eval_data_path = COCO_EVAL_DATA - self.eval_json_path = COCO_EVAL_JSON - self.resnet_checkpoint_path = RESNET_CHECKPOINT_PATH - super(RetinanetBenchmarkBase, self).__init__(**kwargs) - - def _run_detection_main(self): - """Starts detection job.""" - if self.timer_callback: - FLAGS.log_steps = 0 # prevent detection.run from adding the same callback - return detection.run(callbacks=[self.timer_callback]) - else: - return detection.run() - - -class RetinanetAccuracy(RetinanetBenchmarkBase): - """Accuracy test for RetinaNet model. - - Tests RetinaNet detection task model accuracy. The naming - convention of below test cases follow - `benchmark_(number of gpus)_gpu_(dataset type)` format. - """ - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - params, - min_ap=0.325, - max_ap=0.35, - do_eval=True, - warmup=1): - """Starts RetinaNet accuracy benchmark test.""" - FLAGS.params_override = json.dumps(params) - # Need timer callback to measure performance - self.timer_callback = keras_utils.TimeHistory( - batch_size=params['train']['batch_size'], - log_steps=FLAGS.log_steps, - ) - - start_time_sec = time.time() - FLAGS.mode = 'train' - summary, _ = self._run_detection_main() - wall_time_sec = time.time() - start_time_sec - - if do_eval: - FLAGS.mode = 'eval' - eval_metrics = self._run_detection_main() - summary.update(eval_metrics) - - summary['total_steps'] = params['train']['total_steps'] - self._report_benchmark(summary, start_time_sec, wall_time_sec, min_ap, - max_ap, warmup) - - def _setup(self): - super(RetinanetAccuracy, self)._setup() - FLAGS.model = 'retinanet' - - def _params(self): - return { - 'architecture': { - 'use_bfloat16': True, - }, - 'train': { - 'batch_size': 64, - 'iterations_per_loop': 100, - 'total_steps': 22500, - 'train_file_pattern': self.train_data_path, - 'checkpoint': { - 'path': self.resnet_checkpoint_path, - 'prefix': 'resnet50/' - }, - # Speed up ResNet training when loading from the checkpoint. - 'frozen_variable_prefix': base_config.RESNET_FROZEN_VAR_PREFIX, - }, - 'eval': { - 'batch_size': 8, - 'eval_samples': 5000, - 'val_json_file': self.eval_json_path, - 'eval_file_pattern': self.eval_data_path, - }, - } - - @flagsaver.flagsaver - def benchmark_8_gpu_coco(self): - """Run RetinaNet model accuracy test with 8 GPUs.""" - self._setup() - params = self._params() - FLAGS.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_coco') - FLAGS.strategy_type = 'mirrored' - self._run_and_report_benchmark(params) - - -class RetinanetBenchmarkReal(RetinanetAccuracy): - """Short benchmark performance tests for RetinaNet model. - - Tests RetinaNet performance in different GPU configurations. - The naming convention of below test cases follow - `benchmark_(number of gpus)_gpu` format. - """ - - def _setup(self): - super(RetinanetBenchmarkReal, self)._setup() - # Use negative value to avoid saving checkpoints. - FLAGS.save_checkpoint_freq = -1 - - @flagsaver.flagsaver - def benchmark_8_gpu_coco(self): - """Run RetinaNet model accuracy test with 8 GPUs.""" - self._setup() - params = self._params() - params['architecture']['use_bfloat16'] = False - params['train']['total_steps'] = 1875 # One epoch. - # The iterations_per_loop must be one, otherwise the number of examples per - # second would be wrong. Currently only support calling callback per batch - # when each loop only runs on one batch, i.e. host loop for one step. The - # performance of this situation might be lower than the case of - # iterations_per_loop > 1. - # Related bug: b/135933080 - params['train']['iterations_per_loop'] = 1 - params['eval']['eval_samples'] = 8 - FLAGS.num_gpus = 8 - FLAGS.model_dir = self._get_model_dir('real_benchmark_8_gpu_coco') - FLAGS.strategy_type = 'mirrored' - self._run_and_report_benchmark(params) - - @flagsaver.flagsaver - def benchmark_1_gpu_coco(self): - """Run RetinaNet model accuracy test with 1 GPU.""" - self._setup() - params = self._params() - params['architecture']['use_bfloat16'] = False - params['train']['batch_size'] = 8 - params['train']['total_steps'] = 200 - params['train']['iterations_per_loop'] = 1 - params['eval']['eval_samples'] = 8 - FLAGS.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('real_benchmark_1_gpu_coco') - FLAGS.strategy_type = 'one_device' - self._run_and_report_benchmark(params) - - @flagsaver.flagsaver - def benchmark_xla_1_gpu_coco(self): - """Run RetinaNet model accuracy test with 1 GPU and XLA enabled.""" - self._setup() - params = self._params() - params['architecture']['use_bfloat16'] = False - params['train']['batch_size'] = 8 - params['train']['total_steps'] = 200 - params['train']['iterations_per_loop'] = 1 - params['eval']['eval_samples'] = 8 - FLAGS.num_gpus = 1 - FLAGS.model_dir = self._get_model_dir('real_benchmark_xla_1_gpu_coco') - FLAGS.strategy_type = 'one_device' - FLAGS.enable_xla = True - self._run_and_report_benchmark(params) - - @flagsaver.flagsaver - def benchmark_2x2_tpu_coco(self): - """Run RetinaNet model accuracy test with 4 TPUs.""" - self._setup() - params = self._params() - params['train']['batch_size'] = 64 - params['train']['total_steps'] = 1875 # One epoch. - params['train']['iterations_per_loop'] = 500 - FLAGS.model_dir = self._get_model_dir('real_benchmark_2x2_tpu_coco') - FLAGS.strategy_type = 'tpu' - self._run_and_report_benchmark(params, do_eval=False, warmup=0) - - @flagsaver.flagsaver - def benchmark_2x2_tpu_spinenet_coco(self): - """Run SpineNet with RetinaNet model accuracy test with 4 TPUs.""" - self._setup() - params = self._params() - params['architecture']['backbone'] = 'spinenet' - params['architecture']['multilevel_features'] = 'identity' - params['architecture']['use_bfloat16'] = False - params['train']['batch_size'] = 64 - params['train']['total_steps'] = 1875 # One epoch. - params['train']['iterations_per_loop'] = 500 - params['train']['checkpoint']['path'] = '' - FLAGS.model_dir = self._get_model_dir( - 'real_benchmark_2x2_tpu_spinenet_coco') - FLAGS.strategy_type = 'tpu' - self._run_and_report_benchmark(params, do_eval=False, warmup=0) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/shakespeare_benchmark.py b/official/benchmark/shakespeare_benchmark.py deleted file mode 100644 index 430ab75da5300e3c374bbe56c2c02befb4dc2dff..0000000000000000000000000000000000000000 --- a/official/benchmark/shakespeare_benchmark.py +++ /dev/null @@ -1,355 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes Shakespeare (LSTM) benchmark and accuracy tests.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -from absl import flags -import tensorflow as tf # pylint: disable=g-bad-import-order - -from official.benchmark.models.shakespeare import shakespeare_main -from official.utils.flags import core as flags_core -from official.utils.misc import keras_utils -from official.benchmark import benchmark_wrappers -from official.benchmark.perfzero_benchmark import PerfZeroBenchmark - -SHAKESPEARE_TRAIN_DATA = 'shakespeare/shakespeare.txt' -TMP_DIR = os.getenv('TMPDIR') -FLAGS = flags.FLAGS - - -class ShakespeareBenchmarkBase(PerfZeroBenchmark): - """Base class for Shakespeare (LSTM) benchmark and accuracy tests.""" - - def __init__(self, output_dir=None, default_flags=None, root_data_dir=None): - super(ShakespeareBenchmarkBase, self).__init__( - output_dir=output_dir, - default_flags=default_flags, - flag_methods=[shakespeare_main.define_flags]) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - top_1_train_min=0.91, - top_1_train_max=0.94, - warmup=1, - log_steps=100): - """Report benchmark results by writing to local protobuf file. - - Average epoch time is calculated by skipping the first epoch. This average - ignores time spent between epoch and is recorded by begin and end epoch. To - skip accuracy check set `top_1_train_min=None`. - - Args: - top_1_train_min: lowest passing value. - top_1_train_max: highest passing value. - warmup: number of entries in `timestamp_log` to ignore. - log_steps: How often the log was created for `timestamp_log`. - """ - total_batch_size = FLAGS.batch_size - metrics = [] - start_time_sec = time.time() - stats = shakespeare_main.run(FLAGS) - wall_time_sec = time.time() - start_time_sec - - if top_1_train_min: - metrics.append({'name': 'accuracy_top_1_train', - 'value': stats['history']['RecallAt1'][-1], - 'min_value': top_1_train_min, - 'max_value': top_1_train_max}) - - # Look for the time history callback which was used during keras.fit - for callback in stats['callbacks']: - if isinstance(callback, keras_utils.TimeHistory): - epoch_timings = callback.epoch_runtime_log - if len(epoch_timings) > 1: - average_time = sum(epoch_timings[1:]) / len(epoch_timings[1:]) - metrics.append({'name': 'avg_epoch_time', - 'value': average_time}) - - # First entry in timestamp_log is the start of step 1. The rest of the - # entries are the end of each step recorded. - time_log = callback.timestamp_log - elapsed = time_log[-1].timestamp - time_log[warmup].timestamp - num_examples = ( - total_batch_size * log_steps * (len(time_log) - warmup - 1)) - if elapsed > 0: - examples_per_sec = num_examples / elapsed - metrics.append({'name': 'exp_per_second', - 'value': examples_per_sec}) - - flags_str = flags_core.get_nondefault_flags_as_str() - self.report_benchmark(iters=-1, wall_time=wall_time_sec, - metrics=metrics, - extras={'flags': flags_str}) - - -class ShakespeareAccuracy(ShakespeareBenchmarkBase): - """Shakespeare accuracy tests. - - This is not an ideal test. The best we can use for the accuracy check is to - validate top_1 of the training set. At batch size 64 the top_1 training - stabilizes to ~0.92 around 40-45 epochs. - """ - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - """Shakespeare accuracy tests. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA) - super(ShakespeareAccuracy, self).__init__( - output_dir=output_dir, root_data_dir=root_data_dir) - - def benchmark_cpu(self): - """Benchmark cpu.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.training_data = self.train_data - FLAGS.batch_size = 64 - FLAGS.train_epochs = 43 - FLAGS.model_dir = '' - self._run_and_report_benchmark() - - def benchmark_cpu_no_ds_run_eagerly(self): - """Benchmark cpu without distribution strategies and run eagerly.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.training_data = self.train_data - FLAGS.batch_size = 64 - FLAGS.train_epochs = 43 - FLAGS.model_dir = '' - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_1_gpu(self): - """Benchmark 1 gpu.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.training_data = self.train_data - FLAGS.batch_size = 64 - FLAGS.train_epochs = 43 - FLAGS.model_dir = '' - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_ds(self): - """Benchmark 1 gpu without distribution strategies.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.training_data = self.train_data - FLAGS.batch_size = 64 - FLAGS.train_epochs = 43 - FLAGS.model_dir = '' - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_ds_run_eagerly(self): - """Benchmark 1 gpu without distribution strategies and run eagerly.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.training_data = self.train_data - FLAGS.batch_size = 64 - FLAGS.train_epochs = 43 - FLAGS.model_dir = '' - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu(self): - """Benchmark 1 gpu w/xla.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.training_data = self.train_data - FLAGS.batch_size = 64 - FLAGS.train_epochs = 43 - FLAGS.model_dir = '' - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def benchmark_8_gpu(self): - """Benchmark 8 gpu. - - This is test is for accuracy not scaling. The batch-size is not scaled to - the number of gpus. - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.training_data = self.train_data - FLAGS.batch_size = 64 - FLAGS.train_epochs = 43 - FLAGS.model_dir = '' - self._run_and_report_benchmark() - - -class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase): - """Benchmark accuracy tests.""" - - def __init__(self, output_dir=None, root_data_dir=TMP_DIR, **kwargs): - """Benchmark tests w/Keras. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA) - - def_flags = {} - def_flags['training_data'] = self.train_data - def_flags['model_dir'] = '' - def_flags['train_epochs'] = 4 - def_flags['log_steps'] = 50 - - super(ShakespeareKerasBenchmarkReal, self).__init__( - output_dir=output_dir, - root_data_dir=root_data_dir, - default_flags=def_flags) - - def benchmark_cpu(self): - """Benchmark cpu.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.batch_size = 64 - self._run_and_report_benchmark() - - def benchmark_cpu_no_ds_run_eagerly(self): - """Benchmark cpu without distribution strategy and run eagerly.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.batch_size = 64 - FLAGS.distribution_strategy = 'off' - FLAGS.run_eagerly = True - self._run_and_report_benchmark() - - def benchmark_cpu_no_ds(self): - """Benchmark cpu without distribution strategy.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.batch_size = 64 - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_cpu_no_ds_force_v2(self): - """Benchmark cpu no ds, and force v2.""" - self._setup() - FLAGS.num_gpus = 0 - FLAGS.batch_size = 64 - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_1_gpu(self): - """Benchmark 1 gpu.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 64 - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_cudnn(self): - """Benchmark 1 gpu with CuDNN disabled.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 64 - FLAGS.cudnn = False - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_ds(self): - """Benchmark 1 gpu without distribution strategies.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 64 - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_1_gpu_no_ds_run_eagerly(self): - """Benchmark 1 gpu.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 64 - FLAGS.run_eagerly = True - FLAGS.distribution_strategy = 'off' - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu(self): - """Benchmark 1 gpu.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 64 - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def benchmark_xla_1_gpu_no_cudnn(self): - """Benchmark 1 gpu w/xla and CuDNN disabled.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 64 - FLAGS.cudnn = False - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def benchmark_8_gpu(self): - """Benchmark 8 gpu.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.batch_size = 64 * 8 - FLAGS.log_steps = 10 - self._run_and_report_benchmark() - - def benchmark_8_gpu_no_cudnn(self): - """Benchmark 8 gpu with CuDNN disabled.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.batch_size = 64 * 8 - FLAGS.log_steps = 10 - FLAGS.cudnn = False - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu(self): - """Benchmark 8 gpu w/xla.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = 64 * 8 - FLAGS.log_steps = 10 - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def benchmark_xla_8_gpu_no_cudnn(self): - """Benchmark 8 gpu w/xla and CuDNN disabled.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.batch_size = 64 * 8 - FLAGS.log_steps = 10 - FLAGS.cudnn = False - FLAGS.enable_xla = True - self._run_and_report_benchmark() - - def _run_and_report_benchmark(self): - """Run and report benchmark.""" - super(ShakespeareKerasBenchmarkReal, self)._run_and_report_benchmark( - top_1_train_min=None, log_steps=FLAGS.log_steps) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/tfhub_memory_usage_benchmark.py b/official/benchmark/tfhub_memory_usage_benchmark.py deleted file mode 100644 index 7f50ecf6b3e0c95c78c0ac574131321a1e41fceb..0000000000000000000000000000000000000000 --- a/official/benchmark/tfhub_memory_usage_benchmark.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Runs a memory usage benchmark for a Tensorflow Hub model. - -Loads a SavedModel and records memory usage. -""" -import functools -import time - -from absl import flags -import tensorflow as tf -import tensorflow_hub as hub - -from official.benchmark.perfzero_benchmark import PerfZeroBenchmark - -FLAGS = flags.FLAGS - - -class TfHubMemoryUsageBenchmark(PerfZeroBenchmark): - """A benchmark measuring memory usage for a given TF Hub SavedModel.""" - - def __init__(self, - hub_model_handle_list=None, - output_dir=None, - default_flags=None, - root_data_dir=None, - **kwargs): - super(TfHubMemoryUsageBenchmark, self).__init__( - output_dir=output_dir, default_flags=default_flags, **kwargs) - if hub_model_handle_list: - for hub_model_handle in hub_model_handle_list.split(';'): - # Converts a model handle of the form - # https://tfhub.dev/google/nnlm-en-dim128/1 to valid python method name - # like google_nnlm_en_dim128_1. - hub_model_method_name = hub_model_handle.replace( - 'https://tfhub.dev', - '').replace('/', '_').replace('-', '_').strip('_') - setattr( - self, 'benchmark_' + hub_model_method_name, - functools.partial(self.benchmark_memory_usage, hub_model_handle)) - - def benchmark_memory_usage( - self, hub_model_handle='https://tfhub.dev/google/nnlm-en-dim128/1'): - start_time_sec = time.time() - self.load_model(hub_model_handle) - wall_time_sec = time.time() - start_time_sec - - metrics = [] - self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics) - - def load_model(self, hub_model_handle): - """Loads a TF Hub module.""" - hub.load(hub_model_handle) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/transformer_benchmark.py b/official/benchmark/transformer_benchmark.py deleted file mode 100644 index e61201aa174af4882c6dbab28e10fe64d8cc1377..0000000000000000000000000000000000000000 --- a/official/benchmark/transformer_benchmark.py +++ /dev/null @@ -1,757 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes Transformer w/Keras benchmark and accuracy tests.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -from absl import flags -import tensorflow as tf -from official.benchmark import benchmark_wrappers -from official.benchmark import owner_utils -from official.benchmark.perfzero_benchmark import PerfZeroBenchmark -from official.nlp.transformer import misc -from official.nlp.transformer import transformer_main as transformer_main -from official.utils.flags import core as flags_core - -TRANSFORMER_EN2DE_DATA_DIR_NAME = 'wmt32k-en2de-official' -EN2DE_2014_BLEU_DATA_DIR_NAME = 'newstest2014' -FLAGS = flags.FLAGS -TMP_DIR = os.getenv('TMPDIR') - - -class TransformerBenchmark(PerfZeroBenchmark): - """Methods common to executing transformer w/keras tests. - - Code under test for the Transformer Keras models report the same data and - require the same FLAG setup. - """ - - def __init__(self, output_dir=None, default_flags=None, root_data_dir=None, - flag_methods=None, tpu=None): - root_data_dir = root_data_dir if root_data_dir else '' - - self.train_data_dir = os.path.join(root_data_dir, - TRANSFORMER_EN2DE_DATA_DIR_NAME) - - self.vocab_file = os.path.join(root_data_dir, - TRANSFORMER_EN2DE_DATA_DIR_NAME, - 'vocab.ende.32768') - - self.bleu_source = os.path.join(root_data_dir, - EN2DE_2014_BLEU_DATA_DIR_NAME, - 'newstest2014.en') - - self.bleu_ref = os.path.join(root_data_dir, - EN2DE_2014_BLEU_DATA_DIR_NAME, - 'newstest2014.de') - - if default_flags is None: - default_flags = {} - default_flags['data_dir'] = self.train_data_dir - default_flags['vocab_file'] = self.vocab_file - - super(TransformerBenchmark, self).__init__( - output_dir=output_dir, - default_flags=default_flags, - flag_methods=flag_methods, - tpu=tpu) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - bleu_max=None, - bleu_min=None, - log_steps=None, - total_batch_size=None, - warmup=1): - """Report benchmark results by writing to local protobuf file. - - Args: - bleu_max: highest passing level for bleu score. - bleu_min: lowest passing level for bleu score. - log_steps: How often the log was created for stats['step_timestamp_log']. - total_batch_size: Global batch-size. - warmup: number of entries in stats['step_timestamp_log'] to ignore. - """ - start_time_sec = time.time() - task = transformer_main.TransformerTask(FLAGS) - stats = task.train() - wall_time_sec = time.time() - start_time_sec - - metrics = [] - if 'bleu_uncased' in stats: - if 'bleu_uncased_history' in stats: - bleu_uncased_best = max(stats['bleu_uncased_history'], - key=lambda x: x[1]) - metrics.append({'name': 'bleu_uncased', - 'value': bleu_uncased_best[1], - 'min_value': bleu_min, - 'max_value': bleu_max}) - metrics.append({'name': 'bleu_best_score_iteration', - 'value': bleu_uncased_best[0]}) - metrics.append({'name': 'bleu_uncased_last', - 'value': stats['bleu_uncased']}) - else: - metrics.append({'name': 'bleu_uncased', - 'value': stats['bleu_uncased'], - 'min_value': bleu_min, - 'max_value': bleu_max}) - - if (warmup and 'step_timestamp_log' in stats and - len(stats['step_timestamp_log']) > warmup + 1): - # first entry in the time_log is start of step 1. The rest of the - # entries are the end of each step recorded - time_log = stats['step_timestamp_log'] - elapsed = time_log[-1].timestamp - time_log[warmup].timestamp - num_examples = ( - total_batch_size * log_steps * (len(time_log) - warmup - 1)) - examples_per_sec = num_examples / elapsed - metrics.append({'name': 'exp_per_second', - 'value': examples_per_sec}) - - if 'avg_exp_per_second' in stats: - metrics.append({'name': 'avg_exp_per_second', - 'value': stats['avg_exp_per_second']}) - - if 'step_timestamp_log' in stats: - time_log = stats['step_timestamp_log'] - metrics.append({'name': 'startup_time', - 'value': time_log[0].timestamp - start_time_sec}) - - flags_str = flags_core.get_nondefault_flags_as_str() - self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics, - extras={'flags': flags_str}) - - -class TransformerBaseKerasAccuracy(TransformerBenchmark): - """Benchmark accuracy tests for Transformer Base model w/ Keras.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - """Benchmark accuracy tests for Transformer Base model w/ Keras. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - flag_methods = [misc.define_transformer_flags] - - super(TransformerBaseKerasAccuracy, self).__init__( - output_dir=output_dir, root_data_dir=root_data_dir, - flag_methods=flag_methods) - - def benchmark_1_gpu(self): - """Benchmark 1 gpu. - - The paper uses 8 GPUs and a much larger effective batch size, this is will - not converge to the 27.3 BLEU (uncased) SOTA. - """ - self._setup() - FLAGS.num_gpus = 1 - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'base' - FLAGS.batch_size = 2048 - FLAGS.train_steps = 1000 - FLAGS.steps_between_evals = 500 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') - # These bleu scores are based on test runs after at this limited - # number of steps and batch size after verifying SOTA at 8xV100s. - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=25.3, - bleu_max=26) - - def benchmark_1_gpu_static_batch(self): - """Benchmark 1 gpu with static_batch. - - The paper uses 8 GPUs and a much larger effective batch size, this is will - not converge to the 27.3 BLEU (uncased) SOTA. - """ - self._setup() - FLAGS.num_gpus = 1 - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'base' - FLAGS.batch_size = 4096 - FLAGS.train_steps = 100000 - FLAGS.steps_between_evals = 5000 - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_static_batch') - # These bleu scores are based on test runs after at this limited - # number of steps and batch size after verifying SOTA at 8xV100s. - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=25.3, - bleu_max=26) - - def benchmark_8_gpu(self): - """Benchmark 8 gpu. - - Should converge to 27.3 BLEU (uncased). This has not been confirmed yet. - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'base' - FLAGS.batch_size = 4096*8 - FLAGS.train_steps = 100000 - FLAGS.steps_between_evals = 20000 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=27, - bleu_max=28) - - def benchmark_8_gpu_static_batch(self): - """Benchmark 8 gpu. - - Should converge to 27.3 BLEU (uncased). This has not been confirmed yet. - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'base' - FLAGS.batch_size = 4096*8 - FLAGS.train_steps = 100000 - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.steps_between_evals = 5000 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=27, - bleu_max=28) - - -class TransformerBigKerasAccuracy(TransformerBenchmark): - """Benchmark accuracy tests for Transformer Big model w/ Keras.""" - - def __init__(self, output_dir=None, root_data_dir=None, **kwargs): - """Benchmark accuracy tests for Transformer Big model w/ Keras. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more - named arguments before updating the constructor. - """ - flag_methods = [misc.define_transformer_flags] - - super(TransformerBigKerasAccuracy, self).__init__( - output_dir=output_dir, root_data_dir=root_data_dir, - flag_methods=flag_methods) - - def benchmark_8_gpu(self): - """Benchmark 8 gpu. - - Over 6 runs with eval every 20K steps the average highest value was 28.195 - (bleu uncased). 28.424 was the highest and 27.96 the lowest. The values are - the highest value seen during a run and occurred at a median of iteration 9. - Iterations are not epochs, an iteration is a number of steps between evals. - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'big' - FLAGS.batch_size = 3072*8 - FLAGS.train_steps = 20000 * 12 - FLAGS.steps_between_evals = 20000 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=27.9, - bleu_max=29.2) - - def benchmark_8_gpu_static_batch(self): - """Benchmark 8 gpu. - - Should converge to 28.4 BLEU (uncased). This has not be verified yet." - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'big' - FLAGS.batch_size = 3072*8 - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.train_steps = 20000 * 12 - FLAGS.steps_between_evals = 20000 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=28, - bleu_max=29.2) - - def benchmark_8_gpu_fp16(self): - """Benchmark 8 gpu with dynamic batch and fp16. - - Over 6 runs with eval every 20K steps the average highest value was 28.247 - (bleu uncased). 28.424 was the highest and 28.09 the lowest. The values are - the highest value seen during a run and occurred at a median of iteration - 11. While this could be interpreted as worse than FP32, if looking at the - first iteration at which 28 is passed FP16 performs equal and possibly - better. Although not part of the initial test runs, the highest value - recorded with the arguments below was 28.9 at iteration 12. Iterations are - not epochs, an iteration is a number of steps between evals. - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'big' - FLAGS.batch_size = 3072*8 - FLAGS.train_steps = 20000 * 12 - FLAGS.steps_between_evals = 20000 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=28, - bleu_max=29.2) - - def benchmark_8_gpu_fp16_amp(self): - """Benchmark 8 gpu with dynamic batch and fp16 with automatic mixed precision. - - Should converge to 28.4 BLEU (uncased). This has not be verified yet." - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.fp16_implementation = 'graph_rewrite' - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'big' - FLAGS.batch_size = 3072*8 - FLAGS.train_steps = 20000 * 12 - FLAGS.steps_between_evals = 20000 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_amp') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=28, - bleu_max=29) - - def benchmark_8_gpu_static_batch_fp16(self): - """Benchmark 8 gpu with static batch and fp16. - - Should converge to 28.4 BLEU (uncased). This has not be verified yet." - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'big' - FLAGS.batch_size = 3072*8 - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.train_steps = 400000 - FLAGS.steps_between_evals = 20000 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch_fp16') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=28, - bleu_max=29.2) - - def benchmark_xla_8_gpu_static_batch_fp16(self): - """Benchmark 8 gpu with static batch, XLA, and FP16. - - Should converge to 28.4 BLEU (uncased). This has not be verified yet." - """ - self._setup() - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.enable_xla = True - FLAGS.data_dir = self.train_data_dir - FLAGS.vocab_file = self.vocab_file - # Sets values directly to avoid validation check. - FLAGS['bleu_source'].value = self.bleu_source - FLAGS['bleu_ref'].value = self.bleu_ref - FLAGS.param_set = 'big' - FLAGS.batch_size = 3072*8 - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.train_steps = 400000 - FLAGS.steps_between_evals = 20000 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_xla_8_gpu_static_batch_fp16') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps, - bleu_min=28, - bleu_max=29.2) - - -class TransformerKerasBenchmark(TransformerBenchmark): - """Benchmarks for Transformer (Base and Big) using Keras.""" - - def __init__(self, output_dir=None, default_flags=None, - root_data_dir=None, batch_per_gpu=4096, tpu=None): - """Initialize. - - Args: - output_dir: Based directory for saving artifacts, e.g. checkpoints. - default_flags: default flags to use for all tests. - root_data_dir: root directory for data, e.g. training. - batch_per_gpu: batch size to use per gpu. - tpu: Target TPU to use. - """ - flag_methods = [misc.define_transformer_flags] - self.batch_per_gpu = batch_per_gpu - - super(TransformerKerasBenchmark, self).__init__( - output_dir=output_dir, - default_flags=default_flags, - root_data_dir=root_data_dir, - flag_methods=flag_methods, - tpu=tpu) - - def benchmark_1_gpu_no_dist_strat(self): - """Benchmark 1 gpu without distribution strategy.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'off' - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_1_gpu_no_dist_strat_static_batch(self): - """Benchmark 1 gpu without distribution strategy with static batch.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.distribution_strategy = 'off' - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_ds_sb') - FLAGS.static_batch = True - FLAGS.max_length = 64 - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_1_gpu(self): - """Benchmark 1 gpu.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_1_gpu_fp16(self): - """Benchmark 1 gpu FP16.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16') - FLAGS.dtype = 'fp16' - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_1_gpu(self): - """Benchmark 1 gpu w/xla.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu') - FLAGS.enable_xla = True - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_1_gpu_fp16(self): - """Benchmark 1 gpu w/xla and FP16.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16') - FLAGS.enable_xla = True - FLAGS.dtype = 'fp16' - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_1_gpu_static_batch(self): - """Benchmark 1 gpu with static batch.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_static_batch') - FLAGS.static_batch = True - FLAGS.max_length = 64 - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_1_gpu_static_batch(self): - """Benchmark 1 gpu with static batch w/xla.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_static_batch') - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.enable_xla = True - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_1_gpu_static_batch_fp16(self): - """Benchmark 1 gpu with static batch FP16.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir( - 'benchmark_1_gpu_static_batch_fp16') - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.dtype = 'fp16' - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_1_gpu_static_batch_fp16(self): - """Benchmark 1 gpu with static batch w/xla and FP16.""" - self._setup() - FLAGS.num_gpus = 1 - FLAGS.batch_size = self.batch_per_gpu - FLAGS.model_dir = self._get_model_dir( - 'benchmark_xla_1_gpu_static_batch_fp16') - FLAGS.static_batch = True - FLAGS.max_length = 64 - FLAGS.enable_xla = True - FLAGS.dtype = 'fp16' - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_8_gpu(self): - """Benchmark 8 gpu.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_8_gpu_fp16(self): - """Benchmark 8 gpu FP16.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_8_gpu(self): - """Benchmark 8 gpu w/xla.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.enable_xla = True - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_8_gpu_fp16(self): - """Benchmark 8 gpu w/xla and FP16.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.enable_xla = True - FLAGS.dtype = 'fp16' - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16') - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_8_gpu_static_batch(self): - """Benchmark 8 gpu with static batch.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch') - FLAGS.static_batch = True - FLAGS.max_length = 64 - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_8_gpu_static_batch_fp16(self): - """Benchmark 8 gpu with static batch FP16.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.dtype = 'fp16' - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_8_gpu_static_batch_fp16') - FLAGS.static_batch = True - FLAGS.max_length = 64 - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_8_gpu_static_batch(self): - """Benchmark 8 gpu with static batch w/xla.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.enable_xla = True - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_static_batch') - FLAGS.static_batch = True - FLAGS.max_length = 64 - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_xla_8_gpu_static_batch_fp16(self): - """Benchmark 8 gpu with static batch w/xla and FP16.""" - self._setup() - FLAGS.num_gpus = 8 - FLAGS.enable_xla = True - FLAGS.dtype = 'fp16' - FLAGS.batch_size = self.batch_per_gpu * 8 - FLAGS.model_dir = self._get_model_dir( - 'benchmark_xla_8_gpu_static_batch_fp16') - FLAGS.static_batch = True - FLAGS.max_length = 64 - self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - -class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark): - """Transformer based version real data benchmark tests.""" - - def __init__(self, output_dir=TMP_DIR, root_data_dir=TMP_DIR, **kwargs): - def_flags = {} - def_flags['param_set'] = 'base' - def_flags['train_steps'] = 50 - def_flags['log_steps'] = 10 - - super(TransformerBaseKerasBenchmarkReal, self).__init__( - output_dir=output_dir, default_flags=def_flags, - root_data_dir=root_data_dir, batch_per_gpu=4096) - - -class TransformerBigKerasBenchmarkReal(TransformerKerasBenchmark): - """Transformer based version real data benchmark tests.""" - - def __init__(self, output_dir=TMP_DIR, root_data_dir=TMP_DIR, - tpu=None, **kwargs): - def_flags = {} - def_flags['param_set'] = 'big' - def_flags['train_steps'] = 50 - def_flags['log_steps'] = 10 - - super(TransformerBigKerasBenchmarkReal, self).__init__( - output_dir=output_dir, default_flags=def_flags, - root_data_dir=root_data_dir, batch_per_gpu=3072, - tpu=tpu) - - def benchmark_2x2_tpu(self): - """Port of former snaggletooth transformer_big model on 2x2.""" - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu') - FLAGS.train_steps = 300 - FLAGS.log_steps = 150 - FLAGS.steps_between_evals = 150 - FLAGS.distribution_strategy = 'tpu' - FLAGS.static_batch = True - FLAGS.use_ctl = True - FLAGS.batch_size = 6144 - FLAGS.max_length = 64 - FLAGS.decode_batch_size = 32 - FLAGS.decode_max_length = 97 - FLAGS.padded_decode = True - FLAGS.enable_checkpointing = False - - self._run_and_report_benchmark( - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - def benchmark_4x4_tpu(self): - """Port of former GCP transformer_big model on 4x4.""" - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu') - FLAGS.train_steps = 300 - FLAGS.log_steps = 150 - FLAGS.steps_between_evals = 150 - FLAGS.distribution_strategy = 'tpu' - FLAGS.static_batch = True - FLAGS.use_ctl = True - FLAGS.batch_size = 24576 - FLAGS.max_length = 64 - FLAGS.decode_batch_size = 32 - FLAGS.decode_max_length = 97 - FLAGS.padded_decode = True - FLAGS.enable_checkpointing = False - - self._run_and_report_benchmark( - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - @owner_utils.Owner('tf-graph-compiler') - def benchmark_4x4_tpu_mlir(self): - """Run transformer_big model on 4x4 with the MLIR Bridge enabled.""" - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu') - FLAGS.train_steps = 300 - FLAGS.log_steps = 150 - FLAGS.steps_between_evals = 150 - FLAGS.distribution_strategy = 'tpu' - FLAGS.static_batch = True - FLAGS.use_ctl = True - FLAGS.batch_size = 24576 - FLAGS.max_length = 64 - FLAGS.decode_batch_size = 32 - FLAGS.decode_max_length = 97 - FLAGS.padded_decode = True - FLAGS.enable_checkpointing = False - tf.config.experimental.enable_mlir_bridge() - - self._run_and_report_benchmark( - total_batch_size=FLAGS.batch_size, - log_steps=FLAGS.log_steps) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/unet3d_benchmark.py b/official/benchmark/unet3d_benchmark.py deleted file mode 100644 index 8c811e483e4d1935487f1175baf6f5786632c952..0000000000000000000000000000000000000000 --- a/official/benchmark/unet3d_benchmark.py +++ /dev/null @@ -1,151 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes benchmark testing for 3D Unet model.""" -# pylint: disable=line-too-long -from __future__ import print_function - -import functools -import os -import time -from typing import Optional -from absl import flags -import tensorflow as tf # pylint: disable=g-bad-import-order - -from official.benchmark import benchmark_wrappers -from official.benchmark import keras_benchmark -from official.benchmark import owner_utils -from official.vision.segmentation import unet_main as unet_training_lib -from official.vision.segmentation import unet_model as unet_model_lib - -UNET3D_MIN_ACCURACY = 0.90 -UNET3D_MAX_ACCURACY = 0.98 -UNET_TRAINING_FILES = 'gs://mlcompass-data/unet3d/train_data/*' -UNET_EVAL_FILES = 'gs://mlcompass-data/unet3d/eval_data/*' -UNET_MODEL_CONFIG_FILE = 'gs://mlcompass-data/unet3d/config/unet_config.yaml' - -FLAGS = flags.FLAGS - - -class Unet3DAccuracyBenchmark(keras_benchmark.KerasBenchmark): - """Benchmark accuracy tests for UNet3D model in Keras.""" - - def __init__(self, - output_dir: Optional[str] = None, - root_data_dir: Optional[str] = None, - **kwargs): - """A benchmark class. - - Args: - output_dir: directory where to output e.g. log files - root_data_dir: directory under which to look for dataset - **kwargs: arbitrary named arguments. This is needed to make the - constructor forward compatible in case PerfZero provides more named - arguments before updating the constructor. - """ - - flag_methods = [unet_training_lib.define_unet3d_flags] - - # UNet3D model in Keras.""" - self.training_file_pattern = UNET_TRAINING_FILES - self.eval_file_pattern = UNET_EVAL_FILES - - # TODO(hongjunchoi): Create and use shared config file instead. - self.config_file = UNET_MODEL_CONFIG_FILE - super(Unet3DAccuracyBenchmark, self).__init__( - output_dir=output_dir, flag_methods=flag_methods) - - def _set_benchmark_parameters(self, experiment_name): - """Overrides training parameters for benchmark tests.""" - FLAGS.model_dir = self._get_model_dir(experiment_name) - FLAGS.mode = 'train' - FLAGS.training_file_pattern = self.training_file_pattern - FLAGS.eval_file_pattern = self.eval_file_pattern - FLAGS.config_file = self.config_file - FLAGS.lr_init_value = 0.00005 - FLAGS.lr_decay_rate = 0.5 - FLAGS.epochs = 3 - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - experiment_name: str, - min_accuracy: float = UNET3D_MIN_ACCURACY, - max_accuracy: float = UNET3D_MAX_ACCURACY, - distribution_strategy: str = 'tpu', - epochs: int = 10, - steps: int = 0, - epochs_between_evals: int = 1, - dtype: str = 'float32', - enable_xla: bool = False, - run_eagerly: bool = False): - """Runs and reports the benchmark given the provided configuration.""" - params = unet_training_lib.extract_params(FLAGS) - strategy = unet_training_lib.create_distribution_strategy(params) - - input_dtype = params.dtype - if input_dtype == 'float16' or input_dtype == 'bfloat16': - policy = tf.keras.mixed_precision.experimental.Policy( - 'mixed_bfloat16' if input_dtype == 'bfloat16' else 'mixed_float16') - tf.keras.mixed_precision.experimental.set_policy(policy) - - stats = {} - start_time_sec = time.time() - with strategy.scope(): - unet_model = unet_model_lib.build_unet_model(params) - history = unet_training_lib.train( - params, strategy, unet_model, - functools.partial(unet_training_lib.get_train_dataset, params), - functools.partial(unet_training_lib.get_eval_dataset, params)) - - stats['accuracy_top_1'] = history.history['val_metric_accuracy'][-1] - stats['training_accuracy_top_1'] = history.history['metric_accuracy'][-1] - wall_time_sec = time.time() - start_time_sec - - super(Unet3DAccuracyBenchmark, self)._report_benchmark( - stats, - wall_time_sec, - top_1_min=min_accuracy, - top_1_max=max_accuracy, - total_batch_size=params.train_batch_size) - - def _get_model_dir(self, folder_name): - return os.path.join(self.output_dir, folder_name) - - @owner_utils.Owner('tf-model-garden') - def benchmark_4x4_tpu_bf16(self): - """Test Keras model with 4x4 TPU, fp16.""" - experiment_name = 'benchmark_4x4_tpu_fp16' - self._setup() - self._set_benchmark_parameters(experiment_name) - self._run_and_report_benchmark( - experiment_name=experiment_name, - dtype='bfloat16', - distribution_strategy='tpu') - - @owner_utils.Owner('tf-graph-compiler') - def benchmark_4x4_tpu_bf16_mlir(self): - """Test Keras model with 4x4 TPU, fp16 and MLIR enabled.""" - experiment_name = 'benchmark_4x4_tpu_fp16_mlir' - tf.config.experimental.enable_mlir_bridge() - self._setup() - self._set_benchmark_parameters(experiment_name) - self._run_and_report_benchmark( - experiment_name=experiment_name, - dtype='bfloat16', - distribution_strategy='tpu') - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/benchmark/xlnet_benchmark.py b/official/benchmark/xlnet_benchmark.py deleted file mode 100644 index 4df69cf081a4a06000ed46ea66ac742cb1c39e02..0000000000000000000000000000000000000000 --- a/official/benchmark/xlnet_benchmark.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executes XLNet benchmarks and accuracy tests.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import os -import time - -# pylint: disable=g-bad-import-order -from absl import flags -from absl.testing import flagsaver -import tensorflow as tf -# pylint: enable=g-bad-import-order - -from official.benchmark import bert_benchmark_utils as benchmark_utils -from official.benchmark import owner_utils -from official.nlp.xlnet import run_classifier -from official.nlp.xlnet import run_squad -from official.benchmark import benchmark_wrappers - - -# pylint: disable=line-too-long -PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/xlnet/large/xlnet_model-1' -CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/xlnet/imdb/spiece.model.len-512.train.tf_record' -CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/xlnet/imdb/spiece.model.len-512.dev.eval.tf_record' -SQUAD_DATA_PATH = 'gs://tf-perfzero-data/xlnet/squadv2_cased/' -# pylint: enable=line-too-long - -FLAGS = flags.FLAGS - - -class XLNetBenchmarkBase(benchmark_utils.BertBenchmarkBase): - """Base class to hold methods common to test classes in the module.""" - - def __init__(self, output_dir=None, tpu=None): - super(XLNetBenchmarkBase, self).__init__(output_dir=output_dir, tpu=tpu) - self.num_epochs = None - self.num_steps_per_epoch = None - - @flagsaver.flagsaver - def _run_xlnet_classifier(self): - """Starts XLNet classification task.""" - run_classifier.main(unused_argv=None) - - @flagsaver.flagsaver - def _run_xlnet_squad(self): - """Starts XLNet classification task.""" - run_squad.main(unused_argv=None) - - -class XLNetClassifyAccuracy(XLNetBenchmarkBase): - """Short accuracy test for XLNet classifier model. - - Tests XLNet classification task model accuracy. The naming - convention of below test cases follow - `benchmark_(number of gpus)_gpu_(dataset type)` format. - """ - - def __init__(self, output_dir=None, tpu=None, **kwargs): - self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH - self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH - self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH - - super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - training_summary_path, - min_accuracy=0.95, - max_accuracy=0.97): - """Starts XLNet accuracy benchmark test.""" - - start_time_sec = time.time() - self._run_xlnet_classifier() - wall_time_sec = time.time() - start_time_sec - - with tf.io.gfile.GFile(training_summary_path, 'rb') as reader: - summary = json.loads(reader.read().decode('utf-8')) - - super(XLNetClassifyAccuracy, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=min_accuracy, - max_accuracy=max_accuracy) - - def _setup(self): - super(XLNetClassifyAccuracy, self)._setup() - FLAGS.test_data_size = 25024 - FLAGS.train_batch_size = 16 - FLAGS.seq_len = 512 - FLAGS.mem_len = 0 - FLAGS.n_layer = 24 - FLAGS.d_model = 1024 - FLAGS.d_embed = 1024 - FLAGS.n_head = 16 - FLAGS.d_head = 64 - FLAGS.d_inner = 4096 - FLAGS.untie_r = True - FLAGS.n_class = 2 - FLAGS.ff_activation = 'gelu' - FLAGS.strategy_type = 'mirror' - FLAGS.learning_rate = 2e-5 - FLAGS.train_steps = 4000 - FLAGS.warmup_steps = 500 - FLAGS.iterations = 200 - FLAGS.bi_data = False - FLAGS.init_checkpoint = self.pretrained_checkpoint_path - FLAGS.train_tfrecord_path = self.train_data_path - FLAGS.test_tfrecord_path = self.eval_data_path - - @owner_utils.Owner('tf-model-garden') - def benchmark_8_gpu_imdb(self): - """Run XLNet model accuracy test with 8 GPUs.""" - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_imdb') - # Sets timer_callback to None as we do not use it now. - self.timer_callback = None - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - @owner_utils.Owner('tf-model-garden') - def benchmark_2x2_tpu_imdb(self): - """Run XLNet model accuracy test on 2x2 tpu.""" - self._setup() - FLAGS.strategy_type = 'tpu' - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_imdb') - # Sets timer_callback to None as we do not use it now. - self.timer_callback = None - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - -class XLNetSquadAccuracy(XLNetBenchmarkBase): - """Short accuracy test for XLNet squad model. - - Tests XLNet squad task model accuracy. The naming - convention of below test cases follow - `benchmark_(number of gpus)_gpu_(dataset type)` format. - """ - - def __init__(self, output_dir=None, tpu=None, **kwargs): - self.train_data_path = SQUAD_DATA_PATH - self.predict_file = os.path.join(SQUAD_DATA_PATH, "dev-v2.0.json") - self.test_data_path = os.path.join(SQUAD_DATA_PATH, "12048.eval.tf_record") - self.spiece_model_file = os.path.join(SQUAD_DATA_PATH, "spiece.cased.model") - self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH - - super(XLNetSquadAccuracy, self).__init__(output_dir=output_dir, tpu=tpu) - - @benchmark_wrappers.enable_runtime_flags - def _run_and_report_benchmark(self, - training_summary_path, - min_accuracy=87.0, - max_accuracy=89.0): - """Starts XLNet accuracy benchmark test.""" - - start_time_sec = time.time() - self._run_xlnet_squad() - wall_time_sec = time.time() - start_time_sec - - with tf.io.gfile.GFile(training_summary_path, 'rb') as reader: - summary = json.loads(reader.read().decode('utf-8')) - - super(XLNetSquadAccuracy, self)._report_benchmark( - stats=summary, - wall_time_sec=wall_time_sec, - min_accuracy=min_accuracy, - max_accuracy=max_accuracy) - - def _setup(self): - super(XLNetSquadAccuracy, self)._setup() - FLAGS.train_batch_size = 16 - FLAGS.seq_len = 512 - FLAGS.mem_len = 0 - FLAGS.n_layer = 24 - FLAGS.d_model = 1024 - FLAGS.d_embed = 1024 - FLAGS.n_head = 16 - FLAGS.d_head = 64 - FLAGS.d_inner = 4096 - FLAGS.untie_r = True - FLAGS.ff_activation = 'gelu' - FLAGS.strategy_type = 'mirror' - FLAGS.learning_rate = 3e-5 - FLAGS.train_steps = 8000 - FLAGS.warmup_steps = 1000 - FLAGS.iterations = 1000 - FLAGS.bi_data = False - FLAGS.init_checkpoint = self.pretrained_checkpoint_path - FLAGS.train_tfrecord_path = self.train_data_path - FLAGS.test_tfrecord_path = self.test_data_path - FLAGS.spiece_model_file = self.spiece_model_file - FLAGS.predict_file = self.predict_file - FLAGS.adam_epsilon = 1e-6 - FLAGS.lr_layer_decay_rate = 0.75 - - @owner_utils.Owner('tf-model-garden') - def benchmark_8_gpu_squadv2(self): - """Run XLNet model squad v2 accuracy test with 8 GPUs.""" - self._setup() - FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squadv2') - FLAGS.predict_dir = FLAGS.model_dir - # Sets timer_callback to None as we do not use it now. - self.timer_callback = None - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - @owner_utils.Owner('tf-model-garden') - def benchmark_2x2_tpu_squadv2(self): - """Run XLNet model squad v2 accuracy test on 2x2 tpu.""" - self._setup() - FLAGS.strategy_type = 'tpu' - FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_squadv2') - FLAGS.predict_dir = FLAGS.model_dir - # Sets timer_callback to None as we do not use it now. - self.timer_callback = None - - summary_path = os.path.join(FLAGS.model_dir, - 'summaries/training_summary.txt') - self._run_and_report_benchmark(summary_path) - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/colab/decoding_api_in_tf_nlp.ipynb b/official/colab/decoding_api_in_tf_nlp.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..726b382e228265fa1e19c2af3150e7cc32a0ec56 --- /dev/null +++ b/official/colab/decoding_api_in_tf_nlp.ipynb @@ -0,0 +1,492 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "vXLA5InzXydn" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "RuRlpLL-X0R_" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fsACVQpVSifi" + }, + "source": [ + "### Install the TensorFlow Model Garden pip package\n", + "\n", + "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", + "which is the nightly Model Garden package created daily automatically.\n", + "* pip will install all models and dependencies automatically." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hYEwGTeCXnnX" + }, + "source": [ + "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/tutorials/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/decoding_api_in_tf_nlp.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2j-xhrsVQOQT" + }, + "outputs": [], + "source": [ + "pip install tf-models-nightly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BjP7zwxmskpY" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from official import nlp\n", + "from official.nlp.modeling.ops import sampling_module\n", + "from official.nlp.modeling.ops import beam_search" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0AWgyo-IQ5sP" + }, + "source": [ + "# Decoding API\n", + "This API provides an interface to experiment with different decoding strategies used for auto-regressive models.\n", + "\n", + "1. The following sampling strategies are provided in sampling_module.py, which inherits from the base Decoding class:\n", + " * [top_p](https://arxiv.org/abs/1904.09751) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L65) \n", + "\n", + " This implementation chooses most probable logits with cumulative probabilities upto top_p.\n", + "\n", + " * [top_k](https://arxiv.org/pdf/1805.04833.pdf) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L48)\n", + "\n", + " At each timestep, this implementation samples from top-k logits based on their probability distribution\n", + "\n", + " * Greedy : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L26)\n", + "\n", + " This implementation returns the top logits based on probabilities.\n", + "\n", + "2. Beam search is provided in beam_search.py. [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search.py)\n", + "\n", + " This implementation reduces the risk of missing hidden high probability logits by keeping the most likely num_beams of logits at each time step and eventually choosing the logits that has the overall highest probability." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfOj7oaBRQnS" + }, + "source": [ + "## Initialize Sampling Module in TF-NLP.\n", + "\n", + "\n", + "\u003e **symbols_to_logits_fn** : This is a closure implemented by the users of the API. The input to this closure will be \n", + "```\n", + "Args:\n", + " 1] ids [batch_size, .. (index + 1 or 1 if padded_decode is True)],\n", + " 2] index [scalar] : current decoded step,\n", + " 3] cache [nested dictionary of tensors].\n", + "Returns:\n", + " 1] tensor for next-step logits [batch_size, vocab]\n", + " 2] the updated_cache [nested dictionary of tensors].\n", + "```\n", + "This closure calls the model to predict the logits for the 'index+1' step. The cache is used for faster decoding.\n", + "Here is a [reference](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search_test.py#L88) implementation for the above closure.\n", + "\n", + "\n", + "\u003e **length_normalization_fn** : Closure for returning length normalization parameter.\n", + "```\n", + "Args: \n", + " 1] length : scalar for decoded step index.\n", + " 2] dtype : data-type of output tensor\n", + "Returns:\n", + " 1] value of length normalization factor.\n", + "Example :\n", + " def _length_norm(length, dtype):\n", + " return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)\n", + "```\n", + "\n", + "\u003e **vocab_size** : Output vocabulary size.\n", + "\n", + "\u003e **max_decode_length** : Scalar for total number of decoding steps.\n", + "\n", + "\u003e **eos_id** : Decoding will stop if all output decoded ids in the batch have this ID.\n", + "\n", + "\u003e **padded_decode** : Set this to True if running on TPU. Tensors are padded to max_decoding_length if this is True.\n", + "\n", + "\u003e **top_k** : top_k is enabled if this value is \u003e 1.\n", + "\n", + "\u003e **top_p** : top_p is enabled if this value is \u003e 0 and \u003c 1.0\n", + "\n", + "\u003e **sampling_temperature** : This is used to re-estimate the softmax output. Temperature skews the distribution towards high probability tokens and lowers the mass in tail distribution. Value has to be positive. Low temperature is equivalent to greedy and makes the distribution sharper, while high temperature makes it more flat.\n", + "\n", + "\u003e **enable_greedy** : By default, this is true and greedy decoding is enabled.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lV1RRp6ihnGX" + }, + "source": [ + "# Initialize the Model Hyper-parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eTsGp2gaKLdE" + }, + "outputs": [], + "source": [ + "params = {}\n", + "params['num_heads'] = 2\n", + "params['num_layers'] = 2\n", + "params['batch_size'] = 2\n", + "params['n_dims'] = 256\n", + "params['max_decode_length'] = 4" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UGvmd0_dRFYI" + }, + "source": [ + "## What is a Cache?\n", + "In auto-regressive architectures like Transformer based [Encoder-Decoder](https://arxiv.org/abs/1706.03762) models, \n", + "Cache is used for fast sequential decoding.\n", + "It is a nested dictionary storing pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention blocks) for every layer.\n", + "\n", + "```\n", + "{\n", + " 'layer_%d' % layer: {\n", + " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n", + " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n", + " } for layer in range(params['num_layers']),\n", + " 'model_specific_item' : Model specific tensor shape,\n", + "}\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CYXkoplAij01" + }, + "source": [ + "# Initialize cache. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D6kfZOOKgkm1" + }, + "outputs": [], + "source": [ + "cache = {\n", + " 'layer_%d' % layer: {\n", + " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32),\n", + " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']/params['num_heads']], dtype=tf.float32)\n", + " } for layer in range(params['num_layers'])\n", + " }\n", + "print(\"cache key shape for layer 1 :\", cache['layer_1']['k'].shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nNY3Xn8SiblP" + }, + "source": [ + "# Define closure for length normalization. **optional.**\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T92ccAzlnGqh" + }, + "outputs": [], + "source": [ + "def length_norm(length, dtype):\n", + " \"\"\"Return length normalization factor.\"\"\"\n", + " return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "syl7I5nURPgW" + }, + "source": [ + "# Create model_fn\n", + " In practice, this will be replaced by an actual model implementation such as [here](https://github.com/tensorflow/models/blob/master/official/nlp/transformer/transformer.py#L236)\n", + "```\n", + "Args:\n", + "i : Step that is being decoded.\n", + "Returns:\n", + " logit probabilities of size [batch_size, 1, vocab_size]\n", + "```\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AhzSkRisRdB6" + }, + "outputs": [], + "source": [ + "probabilities = tf.constant([[[0.3, 0.4, 0.3], [0.3, 0.3, 0.4],\n", + " [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]],\n", + " [[0.2, 0.5, 0.3], [0.2, 0.7, 0.1],\n", + " [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]]])\n", + "def model_fn(i):\n", + " return probabilities[:, i, :]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DBMUkaVmVZBg" + }, + "source": [ + "# Initialize symbols_to_logits_fn\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FAJ4CpbfVdjr" + }, + "outputs": [], + "source": [ + "def _symbols_to_logits_fn():\n", + " \"\"\"Calculates logits of the next tokens.\"\"\"\n", + " def symbols_to_logits_fn(ids, i, temp_cache):\n", + " del ids\n", + " logits = tf.cast(tf.math.log(model_fn(i)), tf.float32)\n", + " return logits, temp_cache\n", + " return symbols_to_logits_fn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R_tV3jyWVL47" + }, + "source": [ + "# Greedy \n", + "Greedy decoding selects the token id with the highest probability as its next id: $id_t = argmax_{w}P(id | id_{1:t-1})$ at each timestep $t$. The following sketch shows greedy decoding. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aGt9idSkVQEJ" + }, + "outputs": [], + "source": [ + "greedy_obj = sampling_module.SamplingModule(\n", + " length_normalization_fn=None,\n", + " dtype=tf.float32,\n", + " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", + " vocab_size=3,\n", + " max_decode_length=params['max_decode_length'],\n", + " eos_id=10,\n", + " padded_decode=False)\n", + "ids, _ = greedy_obj.generate(\n", + " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", + "print(\"Greedy Decoded Ids:\", ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s4pTTsQXVz5O" + }, + "source": [ + "# top_k sampling\n", + "In *Top-K* sampling, the *K* most likely next token ids are filtered and the probability mass is redistributed among only those *K* ids. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pCLWIn6GV5_G" + }, + "outputs": [], + "source": [ + "top_k_obj = sampling_module.SamplingModule(\n", + " length_normalization_fn=length_norm,\n", + " dtype=tf.float32,\n", + " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", + " vocab_size=3,\n", + " max_decode_length=params['max_decode_length'],\n", + " eos_id=10,\n", + " sample_temperature=tf.constant(1.0),\n", + " top_k=tf.constant(3),\n", + " padded_decode=False,\n", + " enable_greedy=False)\n", + "ids, _ = top_k_obj.generate(\n", + " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", + "print(\"top-k sampled Ids:\", ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Jp3G-eE_WI4Y" + }, + "source": [ + "# top_p sampling\n", + "Instead of sampling only from the most likely *K* token ids, in *Top-p* sampling chooses from the smallest possible set of ids whose cumulative probability exceeds the probability *p*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rEGdIWcuWILO" + }, + "outputs": [], + "source": [ + "top_p_obj = sampling_module.SamplingModule(\n", + " length_normalization_fn=length_norm,\n", + " dtype=tf.float32,\n", + " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", + " vocab_size=3,\n", + " max_decode_length=params['max_decode_length'],\n", + " eos_id=10,\n", + " sample_temperature=tf.constant(1.0),\n", + " top_p=tf.constant(0.9),\n", + " padded_decode=False,\n", + " enable_greedy=False)\n", + "ids, _ = top_p_obj.generate(\n", + " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", + "print(\"top-p sampled Ids:\", ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2hcuyJ2VWjDz" + }, + "source": [ + "# Beam search decoding\n", + "Beam search reduces the risk of missing hidden high probability token ids by keeping the most likely num_beams of hypotheses at each time step and eventually choosing the hypothesis that has the overall highest probability. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cJ3WzvSrWmSA" + }, + "outputs": [], + "source": [ + "beam_size = 2\n", + "params['batch_size'] = 1\n", + "beam_cache = {\n", + " 'layer_%d' % layer: {\n", + " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32),\n", + " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32)\n", + " } for layer in range(params['num_layers'])\n", + " }\n", + "print(\"cache key shape for layer 1 :\", beam_cache['layer_1']['k'].shape)\n", + "ids, _ = beam_search.sequence_beam_search(\n", + " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", + " initial_ids=tf.constant([9], tf.int32),\n", + " initial_cache=beam_cache,\n", + " vocab_size=3,\n", + " beam_size=beam_size,\n", + " alpha=0.6,\n", + " max_decode_length=params['max_decode_length'],\n", + " eos_id=10,\n", + " padded_decode=False,\n", + " dtype=tf.float32)\n", + "print(\"Beam search ids:\", ids)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "decoding_api_in_tf_nlp.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/official/colab/fine_tuning_bert.ipynb b/official/colab/fine_tuning_bert.ipynb index 443674b6b9f1292d25f26cc06e3359506763bfce..ad34d68d66770273a055cbaf345c52df734bfa79 100644 --- a/official/colab/fine_tuning_bert.ipynb +++ b/official/colab/fine_tuning_bert.ipynb @@ -3,7 +3,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "vXLA5InzXydn" }, "source": [ @@ -12,11 +11,9 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "cellView": "form", - "colab": {}, - "colab_type": "code", "id": "RuRlpLL-X0R_" }, "outputs": [], @@ -37,7 +34,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "1mLJmVotXs64" }, "source": [ @@ -47,7 +43,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "hYEwGTeCXnnX" }, "source": [ @@ -64,13 +59,15 @@ " \u003ctd\u003e\n", " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/fine_tuning_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub model\u003c/a\u003e\n", + " \u003c/td\u003e\n", "\u003c/table\u003e" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "YN2ACivEPxgD" }, "source": [ @@ -82,7 +79,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "s2d9S2CSSO1z" }, "source": [ @@ -92,34 +88,30 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "fsACVQpVSifi" }, "source": [ "### Install the TensorFlow Model Garden pip package\n", "\n", - "* `tf-models-nightly` is the nightly Model Garden package created daily automatically.\n", + "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", + "which is the nightly Model Garden package created daily automatically.\n", "* pip will install all models and dependencies automatically." ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "NvNr2svBM-p3" }, "outputs": [], "source": [ - "!pip install -q tf-nightly\n", - "!pip install -q tf-models-nightly" + "!pip install -q tf-models-official==2.4.0" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "U-7qPCjWUAyy" }, "source": [ @@ -128,10 +120,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "lXsXev5MNr20" }, "outputs": [], @@ -160,13 +150,12 @@ "import official.nlp.data.classifier_data_lib\n", "import official.nlp.modeling.losses\n", "import official.nlp.modeling.models\n", - "import official.nlp.modeling.networks" + "import official.nlp.modeling.networks\n" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "mbanlzTvJBsz" }, "source": [ @@ -176,7 +165,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "PpW0x8TpR8DT" }, "source": [ @@ -185,45 +173,39 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "vzRHOLciR8eq" }, "outputs": [], "source": [ - "gs_folder_bert = \"gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12\"\n", + "gs_folder_bert = \"gs://cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12\"\n", "tf.io.gfile.listdir(gs_folder_bert)" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "9uFskufsR2LT" }, "source": [ - "You can get a pre-trained BERT encoder from TensorFlow Hub here:" + "You can get a pre-trained BERT encoder from [TensorFlow Hub](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2):" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "e0dAkUttJAzj" }, "outputs": [], "source": [ - "hub_url_bert = \"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2\"" + "hub_url_bert = \"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3\"" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Qv6abtRvH4xO" }, "source": [ @@ -236,7 +218,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "28DvUhC1YUiB" }, "source": [ @@ -252,10 +233,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "Ijikx5OsH9AT" }, "outputs": [], @@ -267,10 +246,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "xf9zz4vLYXjr" }, "outputs": [], @@ -281,7 +258,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "ZgBg2r2nYT-K" }, "source": [ @@ -290,10 +266,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "IQrHxv7W7jH5" }, "outputs": [], @@ -304,7 +278,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "vhsVWYNxazz5" }, "source": [ @@ -313,10 +286,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "n0gfc_VTayfQ" }, "outputs": [], @@ -327,7 +298,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "38zJcap6xkbC" }, "source": [ @@ -336,10 +306,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "xON_i6SkwApW" }, "outputs": [], @@ -353,7 +321,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "9fbTyfJpNr7x" }, "source": [ @@ -363,7 +330,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "wqeN54S61ZKQ" }, "source": [ @@ -376,10 +342,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "idxyhmrCQcw5" }, "outputs": [], @@ -395,7 +359,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "zYHDSquU2lDU" }, "source": [ @@ -404,10 +367,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "L_OfOYPg853R" }, "outputs": [], @@ -421,7 +382,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "kkAXLtuyWWDI" }, "source": [ @@ -435,7 +395,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "62UTWLQd9-LB" }, "source": [ @@ -446,10 +405,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "bdL-dRNRBRJT" }, "outputs": [], @@ -460,7 +417,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "UrPktnqpwqie" }, "source": [ @@ -469,10 +425,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "BR7BmtU498Bh" }, "outputs": [], @@ -490,10 +444,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "has42aUdfky-" }, "outputs": [], @@ -505,7 +457,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "MU9lTWy_xXbb" }, "source": [ @@ -514,10 +465,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "USD8uihw-g4J" }, "outputs": [], @@ -530,7 +479,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xmNv4l4k-dBZ" }, "source": [ @@ -540,7 +488,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "DIWjNIKq-ldh" }, "source": [ @@ -553,7 +500,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "ulNZ4U96-8JZ" }, "source": [ @@ -562,10 +508,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "EezOO9qj91kP" }, "outputs": [], @@ -578,7 +522,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "rxLenwAvCkBf" }, "source": [ @@ -587,10 +530,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "2CetH_5C9P2m" }, "outputs": [], @@ -606,7 +547,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "P5UBnCn8Ii6s" }, "source": [ @@ -617,10 +557,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "sDGiWYPLEd5a" }, "outputs": [], @@ -661,10 +599,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "yuLKxf6zHxw-" }, "outputs": [], @@ -682,7 +618,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "7FC5aLVxKVKK" }, "source": [ @@ -691,10 +626,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "jyjTdGpFhO_1" }, "outputs": [], @@ -708,7 +641,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "FSwymsbkbLDA" }, "source": [ @@ -718,7 +650,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Efrj3Cn1kLAp" }, "source": [ @@ -728,7 +659,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xxpOY5r2Ayq6" }, "source": [ @@ -737,10 +667,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "ujapVfZ_AKW7" }, "outputs": [], @@ -758,7 +686,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "96ldxDSwkVkj" }, "source": [ @@ -769,10 +696,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "cH682__U0FBv" }, "outputs": [], @@ -784,7 +709,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "XqKp3-5GIZlw" }, "source": [ @@ -793,10 +717,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "bAQblMIjwkvx" }, "outputs": [], @@ -807,7 +729,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "sFmVG4SKZAw8" }, "source": [ @@ -816,10 +737,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "VTjgPbp4ZDKo" }, "outputs": [], @@ -834,7 +753,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Q0NTdwZsQK8n" }, "source": [ @@ -845,10 +763,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "8L__-erBwLIQ" }, "outputs": [], @@ -859,7 +775,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "mKAvkQc3heSy" }, "source": [ @@ -870,23 +785,20 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "97Ll2Gichd_Y" }, "outputs": [], "source": [ - "checkpoint = tf.train.Checkpoint(model=bert_encoder)\n", - "checkpoint.restore(\n", + "checkpoint = tf.train.Checkpoint(encoder=bert_encoder)\n", + "checkpoint.read(\n", " os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "2oHOql35k3Dd" }, "source": [ @@ -896,7 +808,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "115caFLMk-_l" }, "source": [ @@ -908,10 +819,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "w8qXKRZuCwW4" }, "outputs": [], @@ -934,7 +843,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "pXRGxiRNEHS2" }, "source": [ @@ -943,10 +851,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "eQNA16bhDpky" }, "outputs": [], @@ -957,7 +863,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xqu_K71fJQB8" }, "source": [ @@ -967,7 +872,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "78FEUOOEkoP0" }, "source": [ @@ -977,7 +881,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "OTNcA0O0nSq9" }, "source": [ @@ -986,10 +889,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "nzi8hjeTQTRs" }, "outputs": [], @@ -1012,7 +913,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "IFtKFWbNKb0u" }, "source": [ @@ -1023,10 +923,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "9ZoUgDUNJPz3" }, "outputs": [], @@ -1046,7 +944,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "7ynJibkBRTJF" }, "source": [ @@ -1055,26 +952,22 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "umo0ttrgRYIM" }, "outputs": [], "source": [ "result = bert_classifier(my_examples, training=False)\n", "\n", - "result = tf.argmax(result).numpy()\n", + "result = tf.argmax(result, axis=-1).numpy()\n", "result" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "utGl0M3aZCE4" }, "outputs": [], @@ -1085,7 +978,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "fVo_AnT0l26j" }, "source": [ @@ -1096,10 +988,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "Nl5x6nElZqkP" }, "outputs": [], @@ -1110,10 +1000,9 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", + "collapsed": true, "id": "y_ACvKPsVUXC" }, "outputs": [], @@ -1134,7 +1023,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "eQceYqRFT_Eg" }, "source": [ @@ -1144,7 +1032,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "SaC1RlFawUpc" }, "source": [ @@ -1155,7 +1042,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "CwUdjFBkzUgh" }, "source": [ @@ -1167,7 +1053,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "2UTQrkyOT5wD" }, "source": [ @@ -1176,10 +1061,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "XQeDFOzYR9Z9" }, "outputs": [], @@ -1192,7 +1075,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "XrFQbfErUWxa" }, "source": [ @@ -1201,10 +1083,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "ymw7GOHpSHKU" }, "outputs": [], @@ -1231,7 +1111,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "uX_Sp-wTUoRm" }, "source": [ @@ -1240,10 +1119,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "rkHxIK57SQ_r" }, "outputs": [], @@ -1264,7 +1141,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "stbaVouogvzS" }, "source": [ @@ -1273,10 +1149,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "gwhrlQl4gxVF" }, "outputs": [], @@ -1287,7 +1161,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "dbJ76vSJj77j" }, "source": [ @@ -1297,7 +1170,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "9J95LFRohiYw" }, "source": [ @@ -1306,10 +1178,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "gCvaLLAxPuMc" }, "outputs": [], @@ -1351,10 +1221,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "rutkBadrhzdR" }, "outputs": [], @@ -1379,10 +1247,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "59TVgt4Z7fuU" }, "outputs": [], @@ -1393,7 +1259,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "QbklKt-w_CiI" }, "source": [ @@ -1406,17 +1271,38 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", - "id": "lo6479At4sP1" + "id": "GDWrHm0BGpbX" }, "outputs": [], "source": [ "# Note: 350MB download.\n", - "import tensorflow_hub as hub\n", - "hub_encoder = hub.KerasLayer(hub_url_bert, trainable=True)\n", + "import tensorflow_hub as hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "Y29meH0qGq_5" + }, + "outputs": [], + "source": [ + "hub_model_name = \"bert_en_uncased_L-12_H-768_A-12\" #@param [\"bert_en_uncased_L-24_H-1024_A-16\", \"bert_en_wwm_cased_L-24_H-1024_A-16\", \"bert_en_uncased_L-12_H-768_A-12\", \"bert_en_wwm_uncased_L-24_H-1024_A-16\", \"bert_en_cased_L-24_H-1024_A-16\", \"bert_en_cased_L-12_H-768_A-12\", \"bert_zh_L-12_H-768_A-12\", \"bert_multi_cased_L-12_H-768_A-12\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lo6479At4sP1" + }, + "outputs": [], + "source": [ + "hub_encoder = hub.KerasLayer(f\"https://tfhub.dev/tensorflow/{hub_model_name}/3\",\n", + " trainable=True)\n", "\n", "print(f\"The Hub encoder has {len(hub_encoder.trainable_variables)} trainable variables\")" ] @@ -1424,7 +1310,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "iTzF574wivQv" }, "source": [ @@ -1433,29 +1318,27 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "XEcYrCR45Uwo" }, "outputs": [], "source": [ "result = hub_encoder(\n", - " inputs=[glue_train['input_word_ids'][:10],\n", - " glue_train['input_mask'][:10],\n", - " glue_train['input_type_ids'][:10],],\n", + " inputs=dict(\n", + " input_word_ids=glue_train['input_word_ids'][:10],\n", + " input_mask=glue_train['input_mask'][:10],\n", + " input_type_ids=glue_train['input_type_ids'][:10],),\n", " training=False,\n", ")\n", "\n", - "print(\"Pooled output shape:\", result[0].shape)\n", - "print(\"Sequence output shape:\", result[1].shape)" + "print(\"Pooled output shape:\", result['pooled_output'].shape)\n", + "print(\"Sequence output shape:\", result['sequence_output'].shape)" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "cjojn8SmLSRI" }, "source": [ @@ -1466,35 +1349,33 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "9nTDaApyLR70" }, "outputs": [], "source": [ - "hub_classifier, hub_encoder = bert.bert_models.classifier_model(\n", - " # Caution: Most of `bert_config` is ignored if you pass a hub url.\n", - " bert_config=bert_config, hub_module_url=hub_url_bert, num_labels=2)" + "hub_classifier = nlp.modeling.models.BertClassifier(\n", + " bert_encoder,\n", + " num_classes=2,\n", + " dropout_rate=0.1,\n", + " initializer=tf.keras.initializers.TruncatedNormal(\n", + " stddev=0.02))" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xMJX3wV0_v7I" }, "source": [ - "The one downside to loading this model from TFHub is that the structure of internal keras layers is not restored. So it's more difficult to inspect or modify the model. The `TransformerEncoder` model is now a single layer:" + "The one downside to loading this model from TFHub is that the structure of internal keras layers is not restored. So it's more difficult to inspect or modify the model. The `BertEncoder` model is now a single layer:" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "pD71dnvhM2QS" }, "outputs": [], @@ -1504,10 +1385,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "nLZD-isBzNKi" }, "outputs": [], @@ -1522,7 +1401,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "ZxSqH0dNAgXV" }, "source": [ @@ -1530,13 +1408,12 @@ "\n", "### Low level model building\n", "\n", - "If you need a more control over the construction of the model it's worth noting that the `classifier_model` function used earlier is really just a thin wrapper over the `nlp.modeling.networks.TransformerEncoder` and `nlp.modeling.models.BertClassifier` classes. Just remember that if you start modifying the architecture it may not be correct or possible to reload the pre-trained checkpoint so you'll need to retrain from scratch." + "If you need a more control over the construction of the model it's worth noting that the `classifier_model` function used earlier is really just a thin wrapper over the `nlp.modeling.networks.BertEncoder` and `nlp.modeling.models.BertClassifier` classes. Just remember that if you start modifying the architecture it may not be correct or possible to reload the pre-trained checkpoint so you'll need to retrain from scratch." ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "0cgABEwDj06P" }, "source": [ @@ -1545,45 +1422,40 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "5r_yqhBFSVEM" }, "outputs": [], "source": [ - "transformer_config = config_dict.copy()\n", + "bert_encoder_config = config_dict.copy()\n", "\n", "# You need to rename a few fields to make this work:\n", - "transformer_config['attention_dropout_rate'] = transformer_config.pop('attention_probs_dropout_prob')\n", - "transformer_config['activation'] = tf_utils.get_activation(transformer_config.pop('hidden_act'))\n", - "transformer_config['dropout_rate'] = transformer_config.pop('hidden_dropout_prob')\n", - "transformer_config['initializer'] = tf.keras.initializers.TruncatedNormal(\n", - " stddev=transformer_config.pop('initializer_range'))\n", - "transformer_config['max_sequence_length'] = transformer_config.pop('max_position_embeddings')\n", - "transformer_config['num_layers'] = transformer_config.pop('num_hidden_layers')\n", + "bert_encoder_config['attention_dropout_rate'] = bert_encoder_config.pop('attention_probs_dropout_prob')\n", + "bert_encoder_config['activation'] = tf_utils.get_activation(bert_encoder_config.pop('hidden_act'))\n", + "bert_encoder_config['dropout_rate'] = bert_encoder_config.pop('hidden_dropout_prob')\n", + "bert_encoder_config['initializer'] = tf.keras.initializers.TruncatedNormal(\n", + " stddev=bert_encoder_config.pop('initializer_range'))\n", + "bert_encoder_config['max_sequence_length'] = bert_encoder_config.pop('max_position_embeddings')\n", + "bert_encoder_config['num_layers'] = bert_encoder_config.pop('num_hidden_layers')\n", "\n", - "transformer_config" + "bert_encoder_config" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "rIO8MI7LLijh" }, "outputs": [], "source": [ - "manual_encoder = nlp.modeling.networks.TransformerEncoder(**transformer_config)" + "manual_encoder = nlp.modeling.networks.BertEncoder(**bert_encoder_config)" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "4a4tFSg9krRi" }, "source": [ @@ -1592,23 +1464,20 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "X6N9NEqfXJCx" }, "outputs": [], "source": [ - "checkpoint = tf.train.Checkpoint(model=manual_encoder)\n", - "checkpoint.restore(\n", + "checkpoint = tf.train.Checkpoint(encoder=manual_encoder)\n", + "checkpoint.read(\n", " os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "1BPiPO4ykuwM" }, "source": [ @@ -1617,10 +1486,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "hlVdgJKmj389" }, "outputs": [], @@ -1634,7 +1501,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "nJMXvVgJkyBv" }, "source": [ @@ -1643,10 +1509,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "tQX57GJ6wkAb" }, "outputs": [], @@ -1654,17 +1518,14 @@ "manual_classifier = nlp.modeling.models.BertClassifier(\n", " bert_encoder,\n", " num_classes=2,\n", - " dropout_rate=transformer_config['dropout_rate'],\n", - " initializer=tf.keras.initializers.TruncatedNormal(\n", - " stddev=bert_config.initializer_range))" + " dropout_rate=bert_encoder_config['dropout_rate'],\n", + " initializer=bert_encoder_config['initializer'])" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "kB-nBWhQk0dS" }, "outputs": [], @@ -1675,7 +1536,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "E6AJlOSyIO1L" }, "source": [ @@ -1688,10 +1548,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "28Dv3BPRlFTD" }, "outputs": [], @@ -1703,7 +1561,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "LRjcHr0UlT8c" }, "source": [ @@ -1714,10 +1571,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "MHY8K6kDngQn" }, "outputs": [], @@ -1733,10 +1588,9 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", + "collapsed": true, "id": "wKIcSprulu3P" }, "outputs": [], @@ -1752,7 +1606,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "IMTC_gfAl_PZ" }, "source": [ @@ -1761,10 +1614,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "YRt3VTmBmCBY" }, "outputs": [], @@ -1786,7 +1637,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "l8D9Lv3Bn740" }, "source": [ @@ -1795,10 +1645,8 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "2Hf2rpRXk89N" }, "outputs": [], diff --git a/official/colab/nlp/customize_encoder.ipynb b/official/colab/nlp/customize_encoder.ipynb index 18b45d3a66fcaab007d25c1d6db1cd461509daa2..aeddb29f96352fbd4c8df3540e6bd4b8fe70bb8b 100644 --- a/official/colab/nlp/customize_encoder.ipynb +++ b/official/colab/nlp/customize_encoder.ipynb @@ -1,9 +1,23 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Customizing a Transformer Encoder", + "private_outputs": true, + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, "cells": [ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Bp8t2AI8i7uP" }, "source": [ @@ -12,14 +26,10 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "cellView": "form", - "colab": {}, - "colab_type": "code", "id": "rxPj2Lsni9O4" }, - "outputs": [], "source": [ "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", @@ -32,12 +42,13 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "6xS-9i5DrRvO" }, "source": [ @@ -47,30 +58,28 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Mwb9uw1cDXsa" }, "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/nlp/customize_encoder\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/models/official/colab/nlp/customize_encoder.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "iLrcV4IyrcGX" }, "source": [ @@ -84,7 +93,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "YYxdyoWgsl8t" }, "source": [ @@ -94,34 +102,30 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "fEJSFutUsn_h" }, "source": [ "### Install the TensorFlow Model Garden pip package\n", "\n", - "* `tf-models-nightly` is the nightly Model Garden package created daily automatically.\n", + "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", + "which is the nightly Model Garden package created daily automatically.\n", "* `pip` will install all models and dependencies automatically." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "thsKZDjhswhR" }, - "outputs": [], "source": [ - "!pip install -q tf-nightly\n", - "!pip install -q tf-models-nightly" - ] + "!pip install -q tf-models-official==2.4.0" + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "hpf7JPCVsqtv" }, "source": [ @@ -130,13 +134,9 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "my4dp-RMssQe" }, - "outputs": [], "source": [ "import numpy as np\n", "import tensorflow as tf\n", @@ -144,12 +144,13 @@ "from official.modeling import activations\n", "from official.nlp import modeling\n", "from official.nlp.modeling import layers, losses, models, networks" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "vjDmVsFfs85n" }, "source": [ @@ -160,13 +161,9 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "Oav8sbgstWc-" }, - "outputs": [], "source": [ "cfg = {\n", " \"vocab_size\": 100,\n", @@ -177,22 +174,23 @@ " \"activation\": activations.gelu,\n", " \"dropout_rate\": 0.1,\n", " \"attention_dropout_rate\": 0.1,\n", - " \"sequence_length\": 16,\n", + " \"max_sequence_length\": 16,\n", " \"type_vocab_size\": 2,\n", " \"initializer\": tf.keras.initializers.TruncatedNormal(stddev=0.02),\n", "}\n", - "bert_encoder = modeling.networks.TransformerEncoder(**cfg)\n", + "bert_encoder = modeling.networks.BertEncoder(**cfg)\n", "\n", "def build_classifier(bert_encoder):\n", " return modeling.models.BertClassifier(bert_encoder, num_classes=2)\n", "\n", "canonical_classifier_model = build_classifier(bert_encoder)" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Qe2UWI6_tsHo" }, "source": [ @@ -203,31 +201,28 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "csED2d-Yt5h6" }, - "outputs": [], "source": [ "def predict(model):\n", " batch_size = 3\n", " np.random.seed(0)\n", " word_ids = np.random.randint(\n", - " cfg[\"vocab_size\"], size=(batch_size, cfg[\"sequence_length\"]))\n", - " mask = np.random.randint(2, size=(batch_size, cfg[\"sequence_length\"]))\n", + " cfg[\"vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n", + " mask = np.random.randint(2, size=(batch_size, cfg[\"max_sequence_length\"]))\n", " type_ids = np.random.randint(\n", - " cfg[\"type_vocab_size\"], size=(batch_size, cfg[\"sequence_length\"]))\n", + " cfg[\"type_vocab_size\"], size=(batch_size, cfg[\"max_sequence_length\"]))\n", " print(model([word_ids, mask, type_ids], training=False))\n", "\n", "predict(canonical_classifier_model)" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "PzKStEK9t_Pb" }, "source": [ @@ -239,7 +234,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "rmwQfhj6fmKz" }, "source": [ @@ -250,7 +244,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xsMgEVHAui11" }, "source": [ @@ -263,26 +256,21 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "-JBabpa2AOz8" }, "source": [ "#### Without Customization\n", "\n", - "Without any customization, `EncoderScaffold` behaves the same the canonical `TransformerEncoder`.\n", + "Without any customization, `EncoderScaffold` behaves the same the canonical `BertEncoder`.\n", "\n", - "As shown in the following example, `EncoderScaffold` can load `TransformerEncoder`'s weights and output the same values:" + "As shown in the following example, `EncoderScaffold` can load `BertEncoder`'s weights and output the same values:" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "ktNzKuVByZQf" }, - "outputs": [], "source": [ "default_hidden_cfg = dict(\n", " num_attention_heads=cfg[\"num_attention_heads\"],\n", @@ -296,10 +284,9 @@ " vocab_size=cfg[\"vocab_size\"],\n", " type_vocab_size=cfg[\"type_vocab_size\"],\n", " hidden_size=cfg[\"hidden_size\"],\n", - " seq_length=cfg[\"sequence_length\"],\n", " initializer=tf.keras.initializers.TruncatedNormal(0.02),\n", " dropout_rate=cfg[\"dropout_rate\"],\n", - " max_seq_length=cfg[\"sequence_length\"],\n", + " max_seq_length=cfg[\"max_sequence_length\"]\n", ")\n", "default_kwargs = dict(\n", " hidden_cfg=default_hidden_cfg,\n", @@ -309,17 +296,19 @@ " return_all_layer_outputs=True,\n", " pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(0.02),\n", ")\n", + "\n", "encoder_scaffold = modeling.networks.EncoderScaffold(**default_kwargs)\n", "classifier_model_from_encoder_scaffold = build_classifier(encoder_scaffold)\n", "classifier_model_from_encoder_scaffold.set_weights(\n", " canonical_classifier_model.get_weights())\n", "predict(classifier_model_from_encoder_scaffold)" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "sMaUmLyIuwcs" }, "source": [ @@ -332,18 +321,14 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "LTinnaG6vcsw" }, - "outputs": [], "source": [ "word_ids = tf.keras.layers.Input(\n", - " shape=(cfg['sequence_length'],), dtype=tf.int32, name=\"input_word_ids\")\n", + " shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_word_ids\")\n", "mask = tf.keras.layers.Input(\n", - " shape=(cfg['sequence_length'],), dtype=tf.int32, name=\"input_mask\")\n", + " shape=(cfg['max_sequence_length'],), dtype=tf.int32, name=\"input_mask\")\n", "embedding_layer = modeling.layers.OnDeviceEmbedding(\n", " vocab_size=cfg['vocab_size'],\n", " embedding_width=cfg['hidden_size'],\n", @@ -353,12 +338,13 @@ "attention_mask = layers.SelfAttentionMask()([word_embeddings, mask])\n", "new_embedding_network = tf.keras.Model([word_ids, mask],\n", " [word_embeddings, attention_mask])" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "HN7_yu-6O3qI" }, "source": [ @@ -368,21 +354,18 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "fO9zKFE4OpHp" }, - "outputs": [], "source": [ "tf.keras.utils.plot_model(new_embedding_network, show_shapes=True, dpi=48)" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "9cOaGQHLv12W" }, "source": [ @@ -391,13 +374,9 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "mtFDMNf2vIl9" }, - "outputs": [], "source": [ "kwargs = dict(default_kwargs)\n", "\n", @@ -412,12 +391,13 @@ "\n", "# Assert that there are only two inputs.\n", "assert len(classifier_model.inputs) == 2" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Z73ZQDtmwg9K" }, "source": [ @@ -432,13 +412,9 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "uAIarLZgw6pA" }, - "outputs": [], "source": [ "kwargs = dict(default_kwargs)\n", "\n", @@ -452,12 +428,13 @@ "\n", "# Assert that the variable `rezero_alpha` from ReZeroTransformer exists.\n", "assert 'rezero_alpha' in ''.join([x.name for x in classifier_model.trainable_weights])" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "6PMHFdvnxvR0" }, "source": [ @@ -470,7 +447,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "D6FejlgwyAy_" }, "source": [ @@ -485,13 +461,9 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "nFrSMrZuyNeQ" }, - "outputs": [], "source": [ "# Use TalkingHeadsAttention\n", "hidden_cfg = dict(default_hidden_cfg)\n", @@ -508,12 +480,13 @@ "\n", "# Assert that the variable `pre_softmax_weight` from TalkingHeadsAttention exists.\n", "assert 'pre_softmax_weight' in ''.join([x.name for x in classifier_model.trainable_weights])" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "kuEJcTyByVvI" }, "source": [ @@ -528,13 +501,9 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "XAbKy_l4y_-i" }, - "outputs": [], "source": [ "# Use TalkingHeadsAttention\n", "hidden_cfg = dict(default_hidden_cfg)\n", @@ -551,12 +520,13 @@ "\n", "# Assert that the variable `gate` from GatedFeedforward exists.\n", "assert 'gate' in ''.join([x.name for x in classifier_model.trainable_weights])" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "a_8NWUhkzeAq" }, "source": [ @@ -564,29 +534,26 @@ "\n", "Finally, you could also build a new encoder using building blocks in the modeling library.\n", "\n", - "See [AlbertTransformerEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/albert_transformer_encoder.py) as an example:\n" + "See [AlbertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/albert_encoder.py) as an example:\n" ] }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "xsiA3RzUzmUM" }, - "outputs": [], "source": [ - "albert_encoder = modeling.networks.AlbertTransformerEncoder(**cfg)\n", + "albert_encoder = modeling.networks.AlbertEncoder(**cfg)\n", "classifier_model = build_classifier(albert_encoder)\n", "# ... Train the model ...\n", "predict(classifier_model)" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "MeidDfhlHKSO" }, "source": [ @@ -595,31 +562,14 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "Uv_juT22HERW" }, - "outputs": [], "source": [ "tf.keras.utils.plot_model(albert_encoder, show_shapes=True, dpi=48)" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "Customizing a Transformer Encoder", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" + ], + "execution_count": null, + "outputs": [] } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/official/colab/nlp/nlp_modeling_library_intro.ipynb b/official/colab/nlp/nlp_modeling_library_intro.ipynb index 722d115a38f05df123d6bc07298fb0a40177e64f..e4ce780c96bfbf679c91891f38b08ac3b0bb983e 100644 --- a/official/colab/nlp/nlp_modeling_library_intro.ipynb +++ b/official/colab/nlp/nlp_modeling_library_intro.ipynb @@ -3,7 +3,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "80xnUmoI7fBX" }, "source": [ @@ -15,8 +14,6 @@ "execution_count": null, "metadata": { "cellView": "form", - "colab": {}, - "colab_type": "code", "id": "8nvTnfs6Q692" }, "outputs": [], @@ -37,7 +34,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "WmfcMK5P5C1G" }, "source": [ @@ -47,7 +43,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "cH-oJ8R6AHMK" }, "source": [ @@ -70,7 +65,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "0H_EFIhq4-MJ" }, "source": [ @@ -82,7 +76,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "2N97-dps_nUk" }, "source": [ @@ -92,13 +85,13 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "459ygAVl_rg0" }, "source": [ "### Install the TensorFlow Model Garden pip package\n", "\n", - "* `tf-models-nightly` is the nightly Model Garden package created daily automatically.\n", + "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", + "which is the nightly Model Garden package created daily automatically.\n", "* `pip` will install all models and dependencies automatically." ] }, @@ -106,20 +99,16 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "Y-qGkdh6_sZc" }, "outputs": [], "source": [ - "!pip install -q tf-nightly\n", - "!pip install -q tf-models-nightly" + "!pip install -q tf-models-official==2.4.0" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "e4huSSwyAG_5" }, "source": [ @@ -130,8 +119,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "jqYXqtjBAJd9" }, "outputs": [], @@ -146,7 +133,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "djBQWjvy-60Y" }, "source": [ @@ -160,13 +146,12 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "MKuHVlsCHmiq" }, "source": [ - "### Build a `BertPretrainer` model wrapping `TransformerEncoder`\n", + "### Build a `BertPretrainer` model wrapping `BertEncoder`\n", "\n", - "The [TransformerEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/transformer_encoder.py) implements the Transformer-based encoder as described in [BERT paper](https://arxiv.org/abs/1810.04805). It includes the embedding lookups and transformer layers, but not the masked language model or classification task networks.\n", + "The [BertEncoder](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/bert_encoder.py) implements the Transformer-based encoder as described in [BERT paper](https://arxiv.org/abs/1810.04805). It includes the embedding lookups and transformer layers, but not the masked language model or classification task networks.\n", "\n", "The [BertPretrainer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_pretrainer.py) allows a user to pass in a transformer stack, and instantiates the masked language model and classification networks that are used to create the training objectives." ] @@ -175,8 +160,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "EXkcXz-9BwB3" }, "outputs": [], @@ -184,14 +167,13 @@ "# Build a small transformer network.\n", "vocab_size = 100\n", "sequence_length = 16\n", - "network = modeling.networks.TransformerEncoder(\n", + "network = modeling.networks.BertEncoder(\n", " vocab_size=vocab_size, num_layers=2, sequence_length=16)" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "0NH5irV5KTMS" }, "source": [ @@ -204,8 +186,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "lZNoZkBrIoff" }, "outputs": [], @@ -217,8 +197,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "o7eFOZXiIl-b" }, "outputs": [], @@ -232,7 +210,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "d5h5HT7gNHx_" }, "source": [ @@ -243,8 +220,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "2tcNfm03IBF7" }, "outputs": [], @@ -256,8 +231,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "F2oHrXGUIS0M" }, "outputs": [], @@ -280,7 +253,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "bnx3UCHniCS5" }, "source": [ @@ -292,8 +264,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "k30H4Q86f52x" }, "outputs": [], @@ -316,7 +286,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "wrmSs8GjHxVw" }, "source": [ @@ -328,7 +297,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "k8cQVFvBCV4s" }, "source": [ @@ -342,28 +310,25 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "xrLLEWpfknUW" }, "source": [ - "### Build a BertSpanLabeler wrapping TransformerEncoder\n", + "### Build a BertSpanLabeler wrapping BertEncoder\n", "\n", "[BertSpanLabeler](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_span_labeler.py) implements a simple single-span start-end predictor (that is, a model that predicts two values: a start token index and an end token index), suitable for SQuAD-style tasks.\n", "\n", - "Note that `BertSpanLabeler` wraps a `TransformerEncoder`, the weights of which can be restored from the above pretraining model.\n" + "Note that `BertSpanLabeler` wraps a `BertEncoder`, the weights of which can be restored from the above pretraining model.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "B941M4iUCejO" }, "outputs": [], "source": [ - "network = modeling.networks.TransformerEncoder(\n", + "network = modeling.networks.BertEncoder(\n", " vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n", "\n", "# Create a BERT trainer with the created network.\n", @@ -373,7 +338,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "QpB9pgj4PpMg" }, "source": [ @@ -384,8 +348,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "RbqRNJCLJu4H" }, "outputs": [], @@ -397,8 +359,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "fUf1vRxZJwio" }, "outputs": [], @@ -417,7 +377,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "WqhgQaN1lt-G" }, "source": [ @@ -429,8 +388,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "waqs6azNl3Nn" }, "outputs": [], @@ -450,7 +407,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "Zdf03YtZmd_d" }, "source": [ @@ -460,7 +416,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "0A1XnGSTChg9" }, "source": [ @@ -472,26 +427,23 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "MSK8OpZgnQa9" }, "source": [ - "### Build a BertClassifier model wrapping TransformerEncoder\n", + "### Build a BertClassifier model wrapping BertEncoder\n", "\n", - "[BertClassifier](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_classifier.py) implements a simple token classification model containing a single classification head using the `TokenClassification` network." + "[BertClassifier](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/models/bert_classifier.py) implements a [CLS] token classification model containing a single classification head." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "cXXCsffkCphk" }, "outputs": [], "source": [ - "network = modeling.networks.TransformerEncoder(\n", + "network = modeling.networks.BertEncoder(\n", " vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length)\n", "\n", "# Create a BERT trainer with the created network.\n", @@ -503,7 +455,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "8tZKueKYP4bB" }, "source": [ @@ -514,8 +465,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "snlutm9ZJgEZ" }, "outputs": [], @@ -527,8 +476,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "yyHPHsqBJkCz" }, "outputs": [], @@ -546,7 +493,6 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "w--a2mg4nzKm" }, "source": [ @@ -559,23 +505,20 @@ "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, - "colab_type": "code", "id": "9X0S1DoFn_5Q" }, "outputs": [], "source": [ "labels = np.random.randint(num_classes, size=(batch_size))\n", "\n", - "loss = modeling.losses.weighted_sparse_categorical_crossentropy_loss(\n", - " labels=labels, predictions=tf.nn.log_softmax(logits, axis=-1))\n", + "loss = tf.keras.losses.sparse_categorical_crossentropy(\n", + " labels, logits, from_logits=True)\n", "print(loss)" ] }, { "cell_type": "markdown", "metadata": { - "colab_type": "text", "id": "mzBqOylZo3og" }, "source": [ diff --git a/official/colab/uncertainty_quantification_with_sngp_bert.ipynb b/official/colab/uncertainty_quantification_with_sngp_bert.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c408377a5857b073c6643aafb3b5ef8212bc7001 --- /dev/null +++ b/official/colab/uncertainty_quantification_with_sngp_bert.ipynb @@ -0,0 +1,641 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "vs3a5tGVAWGI" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "HYfsarcYBJQp" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aOpqCFEyBQDd" + }, + "source": [ + "# Uncertainty-aware Deep Language Learning with BERT-SNGP" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6MlSYP6cBT61" + }, + "source": [ + "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/official_models/tutorials/uncertainty_quantification_with_sngp_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/models/blob/master/official/colab/uncertainty_quantification_with_sngp_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/official/colab/uncertainty_quantification_with_sngp_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView on GitHub\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/docs/models/official/colab/fine_tuning_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub model\u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-IM5IzM26GBh" + }, + "source": [ + "In the [SNGP tutorial](https://www.tensorflow.org/tutorials/uncertainty/sngp), you learned how to build SNGP model on top of a deep residual network to improve its ability to quantify its uncertainty. In this tutorial, you will apply SNGP to a natural language understanding (NLU) task by building it on top of a deep BERT encoder to improve deep NLU model's ability in detecting out-of-scope queries. \n", + "\n", + "Specifically, you will:\n", + "* Build BERT-SNGP, a SNGP-augmented [BERT](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2) model.\n", + "* Load the [CLINC Out-of-scope (OOS)](https://www.tensorflow.org/datasets/catalog/clinc_oos) intent detection dataset.\n", + "* Train the BERT-SNGP model.\n", + "* Evaluate the BERT-SNGP model's performance in uncertainty calibration and out-of-domain detection.\n", + "\n", + "Beyond CLINC OOS, the SNGP model has been applied to large-scale datasets such as [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), and to the image datasets such as [CIFAR-100](https://www.tensorflow.org/datasets/catalog/cifar100) and [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012). \n", + "For benchmark results of SNGP and other uncertainty methods, as well as high-quality implementation with end-to-end training / evaluation scripts, you can check out the [Uncertainty Baselines](https://github.com/google/uncertainty-baselines) benchmark." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-bsids4eAYYI" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3sgnLBKk7iuR" + }, + "outputs": [], + "source": [ + "!pip install tf-models-nightly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M42dnVSk7dVy" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "import sklearn.metrics\n", + "import sklearn.calibration\n", + "\n", + "import tensorflow_hub as hub\n", + "import tensorflow_datasets as tfds\n", + "\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "\n", + "import official.nlp.modeling.layers as layers\n", + "import official.nlp.optimization as optimization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cnRQfguq6GZj" + }, + "source": [ + "First implement a standard BERT classifier following the [classify text with BERT](https://www.tensorflow.org/tutorials/text/classify_text_with_bert) tutorial. We will use the [BERT-base](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3) encoder, and the built-in [`ClassificationHead`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/cls_head.py) as the classifier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "bNBEGs7s6NHB" + }, + "outputs": [], + "source": [ + "#@title Standard BERT model\n", + "\n", + "PREPROCESS_HANDLE = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'\n", + "MODEL_HANDLE = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3'\n", + "\n", + "class BertClassifier(tf.keras.Model):\n", + " def __init__(self, \n", + " num_classes=150, inner_dim=768, dropout_rate=0.1,\n", + " **classifier_kwargs):\n", + " \n", + " super().__init__()\n", + " self.classifier_kwargs = classifier_kwargs\n", + "\n", + " # Initiate the BERT encoder components.\n", + " self.bert_preprocessor = hub.KerasLayer(PREPROCESS_HANDLE, name='preprocessing')\n", + " self.bert_hidden_layer = hub.KerasLayer(MODEL_HANDLE, trainable=True, name='bert_encoder')\n", + "\n", + " # Defines the encoder and classification layers.\n", + " self.bert_encoder = self.make_bert_encoder()\n", + " self.classifier = self.make_classification_head(num_classes, inner_dim, dropout_rate)\n", + "\n", + " def make_bert_encoder(self):\n", + " text_inputs = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')\n", + " encoder_inputs = self.bert_preprocessor(text_inputs)\n", + " encoder_outputs = self.bert_hidden_layer(encoder_inputs)\n", + " return tf.keras.Model(text_inputs, encoder_outputs)\n", + "\n", + " def make_classification_head(self, num_classes, inner_dim, dropout_rate):\n", + " return layers.ClassificationHead(\n", + " num_classes=num_classes, \n", + " inner_dim=inner_dim,\n", + " dropout_rate=dropout_rate,\n", + " **self.classifier_kwargs)\n", + "\n", + " def call(self, inputs, **kwargs):\n", + " encoder_outputs = self.bert_encoder(inputs)\n", + " classifier_inputs = encoder_outputs['sequence_output']\n", + " return self.classifier(classifier_inputs, **kwargs)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SbhbNbKk6WNR" + }, + "source": [ + "### Build SNGP model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p7YakN0V6Oif" + }, + "source": [ + "To implement a BERT-SNGP model, you only need to replace the `ClassificationHead` with the built-in [`GaussianProcessClassificationHead`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/cls_head.py). Spectral normalization is already pre-packaged into this classification head. Like in the [SNGP tutorial](https://www.tensorflow.org/tutorials/uncertainty/sngp), add a covariance reset callback to the model, so the model automatically reset the covariance estimator at the begining of a new epoch to avoid counting the same data twice." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QCaJy85y8WeE" + }, + "outputs": [], + "source": [ + "class ResetCovarianceCallback(tf.keras.callbacks.Callback):\n", + "\n", + " def on_epoch_begin(self, epoch, logs=None):\n", + " \"\"\"Resets covariance matrix at the begining of the epoch.\"\"\"\n", + " if epoch \u003e 0:\n", + " self.model.classifier.reset_covariance_matrix()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YoHgOuiZ6Q4y" + }, + "outputs": [], + "source": [ + "class SNGPBertClassifier(BertClassifier):\n", + "\n", + " def make_classification_head(self, num_classes, inner_dim, dropout_rate):\n", + " return layers.GaussianProcessClassificationHead(\n", + " num_classes=num_classes, \n", + " inner_dim=inner_dim,\n", + " dropout_rate=dropout_rate,\n", + " gp_cov_momentum=-1,\n", + " temperature=30.,\n", + " **self.classifier_kwargs)\n", + "\n", + " def fit(self, *args, **kwargs):\n", + " \"\"\"Adds ResetCovarianceCallback to model callbacks.\"\"\"\n", + " kwargs['callbacks'] = list(kwargs.get('callbacks', []))\n", + " kwargs['callbacks'].append(ResetCovarianceCallback())\n", + "\n", + " return super().fit(*args, **kwargs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UOj5YWTt6dCe" + }, + "source": [ + "Note: The `GaussianProcessClassificationHead` takes a new argument `temperature`. It corresponds to the $\\lambda$ parameter in the __mean-field approximation__ introduced in the [SNGP tutorial](https://www.tensorflow.org/tutorials/uncertainty/sngp). In practice, this value is usually treated as a hyperparamter, and is finetuned to optimize the model's calibration performance." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qdU90uDT6hFq" + }, + "source": [ + "### Load CLINC OOS dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AnuNeyHw6kH7" + }, + "source": [ + "Now load the [CLINC OOS](https://www.tensorflow.org/datasets/catalog/clinc_oos) intent detection dataset. This dataset contains 15000 user's spoken queries collected over 150 intent classes, it also contains 1000 out-of-domain (OOD) sentences that are not covered by any of the known classes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mkMZN2iA6hhg" + }, + "outputs": [], + "source": [ + "(clinc_train, clinc_test, clinc_test_oos), ds_info = tfds.load(\n", + " 'clinc_oos', split=['train', 'test', 'test_oos'], with_info=True, batch_size=-1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UJSL2nm8Bo02" + }, + "source": [ + "Make the train and test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cgkOOZOq6fQL" + }, + "outputs": [], + "source": [ + "train_examples = clinc_train['text']\n", + "train_labels = clinc_train['intent']\n", + "\n", + "# Makes the in-domain (IND) evaluation data.\n", + "ind_eval_data = (clinc_test['text'], clinc_test['intent'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Kw76f6caBq_E" + }, + "source": [ + "Create a OOD evaluation dataset. For this, combine the in-domain test data `clinc_test` and the out-of-domain data `clinc_test_oos`. We will also assign label 0 to the in-domain examples, and label 1 to the out-of-domain examples. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uVFuzecR64FJ" + }, + "outputs": [], + "source": [ + "test_data_size = ds_info.splits['test'].num_examples\n", + "oos_data_size = ds_info.splits['test_oos'].num_examples\n", + "\n", + "# Combines the in-domain and out-of-domain test examples.\n", + "oos_texts = tf.concat([clinc_test['text'], clinc_test_oos['text']], axis=0)\n", + "oos_labels = tf.constant([0] * test_data_size + [1] * oos_data_size)\n", + "\n", + "# Converts into a TF dataset.\n", + "ood_eval_dataset = tf.data.Dataset.from_tensor_slices(\n", + " {\"text\": oos_texts, \"label\": oos_labels})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZcHwfwfU6qCE" + }, + "source": [ + "### Train and evaluate" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_VTY6KYc6sBB" + }, + "source": [ + "First set up the basic training configurations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_-uUkUtk6qWC" + }, + "outputs": [], + "source": [ + "TRAIN_EPOCHS = 3\n", + "TRAIN_BATCH_SIZE = 32\n", + "EVAL_BATCH_SIZE = 256" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tiEjMdFV6wXQ" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "def bert_optimizer(learning_rate, \n", + " batch_size=TRAIN_BATCH_SIZE, epochs=TRAIN_EPOCHS, \n", + " warmup_rate=0.1):\n", + " \"\"\"Creates an AdamWeightDecay optimizer with learning rate schedule.\"\"\"\n", + " train_data_size = ds_info.splits['train'].num_examples\n", + " \n", + " steps_per_epoch = int(train_data_size / batch_size)\n", + " num_train_steps = steps_per_epoch * epochs\n", + " num_warmup_steps = int(warmup_rate * num_train_steps) \n", + "\n", + " # Creates learning schedule.\n", + " lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(\n", + " initial_learning_rate=learning_rate,\n", + " decay_steps=num_train_steps,\n", + " end_learning_rate=0.0) \n", + " \n", + " return optimization.AdamWeightDecay(\n", + " learning_rate=lr_schedule,\n", + " weight_decay_rate=0.01,\n", + " epsilon=1e-6,\n", + " exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KX_Hzl3l6w-H" + }, + "outputs": [], + "source": [ + "optimizer = bert_optimizer(learning_rate=1e-4)\n", + "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", + "metrics = tf.metrics.SparseCategoricalAccuracy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ptn9Cupe6z7o" + }, + "outputs": [], + "source": [ + "fit_configs = dict(batch_size=TRAIN_BATCH_SIZE,\n", + " epochs=TRAIN_EPOCHS,\n", + " validation_batch_size=EVAL_BATCH_SIZE, \n", + " validation_data=ind_eval_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0ZK5PBwW61jd" + }, + "outputs": [], + "source": [ + "sngp_model = SNGPBertClassifier()\n", + "sngp_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)\n", + "sngp_model.fit(train_examples, train_labels, **fit_configs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cpDsgTYx63tO" + }, + "source": [ + "### Evaluate OOD performance" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d5NGVe7L67bB" + }, + "source": [ + "Evaluate how well the model can detect the unfamiliar out-of-domain queries. For rigorous evaluation, use the OOD evaluation dataset `ood_eval_dataset` built earlier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "yyLgt_lL7APo" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "def oos_predict(model, ood_eval_dataset, **model_kwargs):\n", + " oos_labels = []\n", + " oos_probs = []\n", + "\n", + " ood_eval_dataset = ood_eval_dataset.batch(EVAL_BATCH_SIZE)\n", + " for oos_batch in ood_eval_dataset:\n", + " oos_text_batch = oos_batch[\"text\"]\n", + " oos_label_batch = oos_batch[\"label\"] \n", + "\n", + " pred_logits = model(oos_text_batch, **model_kwargs)\n", + " pred_probs_all = tf.nn.softmax(pred_logits, axis=-1)\n", + " pred_probs = tf.reduce_max(pred_probs_all, axis=-1)\n", + "\n", + " oos_labels.append(oos_label_batch)\n", + " oos_probs.append(pred_probs)\n", + "\n", + " oos_probs = tf.concat(oos_probs, axis=0)\n", + " oos_labels = tf.concat(oos_labels, axis=0) \n", + "\n", + " return oos_probs, oos_labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dmc2tVXs6_uo" + }, + "source": [ + "Computes the OOD probabilities as $1 - p(x)$, where $p(x)=softmax(logit(x))$ is the predictive probability." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_9aFVVDO7C7o" + }, + "outputs": [], + "source": [ + "sngp_probs, ood_labels = oos_predict(sngp_model, ood_eval_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_PC0wwZp7GJD" + }, + "outputs": [], + "source": [ + "ood_probs = 1 - sngp_probs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AsandMTX7HjX" + }, + "source": [ + "Now evaluate how well the model's uncertainty score `ood_probs` predicts the out-of-domain label. First compute the Area under precision-recall curve (AUPRC) for OOD probability v.s. OOD detection accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0u5Wx8AP7Mdx" + }, + "outputs": [], + "source": [ + "precision, recall, _ = sklearn.metrics.precision_recall_curve(ood_labels, ood_probs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "axcctOsh7N5A" + }, + "outputs": [], + "source": [ + "auprc = sklearn.metrics.auc(recall, precision)\n", + "print(f'SNGP AUPRC: {auprc:.4f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U_GEqxq-7Q1Y" + }, + "source": [ + "This matches the SNGP performance reported at the CLINC OOS benchmark under the [Uncertainty Baselines](https://github.com/google/uncertainty-baselines)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8H4vYcyd7Ux2" + }, + "source": [ + "Next, examine the model's quality in [uncertainty calibration](https://scikit-learn.org/stable/modules/calibration.html), i.e., whether the model's predictive probability corresponds to its predictive accuracy. A well-calibrated model is considered trust-worthy, since, for example, its predictive probability $p(x)=0.8$ means that the model is correct 80% of the time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x5GxrSWJ7SYn" + }, + "outputs": [], + "source": [ + "prob_true, prob_pred = sklearn.calibration.calibration_curve(\n", + " ood_labels, ood_probs, n_bins=10, strategy='quantile')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ozzJM-D-7XVq" + }, + "outputs": [], + "source": [ + "plt.plot(prob_pred, prob_true)\n", + "\n", + "plt.plot([0., 1.], [0., 1.], c='k', linestyle=\"--\")\n", + "plt.xlabel('Predictive Probability')\n", + "plt.ylabel('Predictive Accuracy')\n", + "plt.title('Calibration Plots, SNGP')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "36M6HeHx7ZI4" + }, + "source": [ + "## Resources and further reading" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xdFTpyaP0A-N" + }, + "source": [ + "* See the [SNGP tutorial](https://www.tensorflow.org/tutorials/uncertainty/sngp) for an detailed walkthrough of implementing SNGP from scratch. \n", + "* See [Uncertainty Baselines](https://github.com/google/uncertainty-baselines) for the implementation of SNGP model (and many other uncertainty methods) on a wide variety of benchmark datasets (e.g., [CIFAR](https://www.tensorflow.org/datasets/catalog/cifar100), [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012), [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), etc).\n", + "* For a deeper understanding of the SNGP method, check out the paper [Simple and Principled Uncertainty Estimation with Deterministic Deep Learning via Distance Awareness](https://arxiv.org/abs/2006.10108).\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "uncertainty_quantification_with_sngp_bert.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/official/common/__init__.py b/official/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a25710c222e3327cb20e000db5df5c5651c4a2cc --- /dev/null +++ b/official/common/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/official/common/dataset_fn.py b/official/common/dataset_fn.py new file mode 100644 index 0000000000000000000000000000000000000000..4ac16a31b555588368a6c0aba73adbe62a95c2eb --- /dev/null +++ b/official/common/dataset_fn.py @@ -0,0 +1,42 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utility library for picking an appropriate dataset function.""" + +from typing import Any, Callable, Union, Type + +import tensorflow as tf + +PossibleDatasetType = Union[Type[tf.data.Dataset], Callable[[tf.Tensor], Any]] + + +def pick_dataset_fn(file_type: str) -> PossibleDatasetType: + if file_type == 'tfrecord': + return tf.data.TFRecordDataset + + raise ValueError('Unrecognized file_type: {}'.format(file_type)) diff --git a/official/common/distribute_utils.py b/official/common/distribute_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6368e8d1d199db63928f2a4444d0c126eafe488f --- /dev/null +++ b/official/common/distribute_utils.py @@ -0,0 +1,230 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper functions for running models in a distributed setting.""" + +import json +import os +import tensorflow as tf + + +def _collective_communication(all_reduce_alg): + """Return a CollectiveCommunication based on all_reduce_alg. + + Args: + all_reduce_alg: a string specifying which collective communication to pick, + or None. + + Returns: + tf.distribute.experimental.CollectiveCommunication object + + Raises: + ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"] + """ + collective_communication_options = { + None: tf.distribute.experimental.CollectiveCommunication.AUTO, + "ring": tf.distribute.experimental.CollectiveCommunication.RING, + "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL + } + if all_reduce_alg not in collective_communication_options: + raise ValueError( + "When used with `multi_worker_mirrored`, valid values for " + "all_reduce_alg are [`ring`, `nccl`]. Supplied value: {}".format( + all_reduce_alg)) + return collective_communication_options[all_reduce_alg] + + +def _mirrored_cross_device_ops(all_reduce_alg, num_packs): + """Return a CrossDeviceOps based on all_reduce_alg and num_packs. + + Args: + all_reduce_alg: a string specifying which cross device op to pick, or None. + num_packs: an integer specifying number of packs for the cross device op. + + Returns: + tf.distribute.CrossDeviceOps object or None. + + Raises: + ValueError: if `all_reduce_alg` not in [None, "nccl", "hierarchical_copy"]. + """ + if all_reduce_alg is None: + return None + mirrored_all_reduce_options = { + "nccl": tf.distribute.NcclAllReduce, + "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce + } + if all_reduce_alg not in mirrored_all_reduce_options: + raise ValueError( + "When used with `mirrored`, valid values for all_reduce_alg are " + "[`nccl`, `hierarchical_copy`]. Supplied value: {}".format( + all_reduce_alg)) + cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg] + return cross_device_ops_class(num_packs=num_packs) + + +def tpu_initialize(tpu_address): + """Initializes TPU for TF 2.x training. + + Args: + tpu_address: string, bns address of master TPU worker. + + Returns: + A TPUClusterResolver. + """ + cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( + tpu=tpu_address) + if tpu_address not in ("", "local"): + tf.config.experimental_connect_to_cluster(cluster_resolver) + tf.tpu.experimental.initialize_tpu_system(cluster_resolver) + return cluster_resolver + + +def get_distribution_strategy(distribution_strategy="mirrored", + num_gpus=0, + all_reduce_alg=None, + num_packs=1, + tpu_address=None, + **kwargs): + """Return a DistributionStrategy for running the model. + + Args: + distribution_strategy: a string specifying which distribution strategy to + use. Accepted values are "off", "one_device", "mirrored", + "parameter_server", "multi_worker_mirrored", and "tpu" -- case + insensitive. "off" means not to use Distribution Strategy; "tpu" means to + use TPUStrategy using `tpu_address`. + num_gpus: Number of GPUs to run this model. + all_reduce_alg: Optional. Specifies which algorithm to use when performing + all-reduce. For `MirroredStrategy`, valid values are "nccl" and + "hierarchical_copy". For `MultiWorkerMirroredStrategy`, valid values are + "ring" and "nccl". If None, DistributionStrategy will choose based on + device topology. + num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce` + or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`. + tpu_address: Optional. String that represents TPU to connect to. Must not be + None if `distribution_strategy` is set to `tpu`. + **kwargs: Additional kwargs for internal usages. + + Returns: + tf.distribute.DistibutionStrategy object. + Raises: + ValueError: if `distribution_strategy` is "off" or "one_device" and + `num_gpus` is larger than 1; or `num_gpus` is negative or if + `distribution_strategy` is `tpu` but `tpu_address` is not specified. + """ + del kwargs + if num_gpus < 0: + raise ValueError("`num_gpus` can not be negative.") + + if not isinstance(distribution_strategy, str): + msg = ("distribution_strategy must be a string but got: %s." % + (distribution_strategy,)) + if distribution_strategy == False: # pylint: disable=singleton-comparison,g-explicit-bool-comparison + msg += (" If you meant to pass the string 'off', make sure you add " + "quotes around 'off' so that yaml interprets it as a string " + "instead of a bool.") + raise ValueError(msg) + + distribution_strategy = distribution_strategy.lower() + if distribution_strategy == "off": + if num_gpus > 1: + raise ValueError("When {} GPUs are specified, distribution_strategy " + "flag cannot be set to `off`.".format(num_gpus)) + return None + + if distribution_strategy == "tpu": + # When tpu_address is an empty string, we communicate with local TPUs. + cluster_resolver = tpu_initialize(tpu_address) + return tf.distribute.TPUStrategy(cluster_resolver) + + if distribution_strategy == "multi_worker_mirrored": + return tf.distribute.experimental.MultiWorkerMirroredStrategy( + communication=_collective_communication(all_reduce_alg)) + + if distribution_strategy == "one_device": + if num_gpus == 0: + return tf.distribute.OneDeviceStrategy("device:CPU:0") + if num_gpus > 1: + raise ValueError("`OneDeviceStrategy` can not be used for more than " + "one device.") + return tf.distribute.OneDeviceStrategy("device:GPU:0") + + if distribution_strategy == "mirrored": + if num_gpus == 0: + devices = ["device:CPU:0"] + else: + devices = ["device:GPU:%d" % i for i in range(num_gpus)] + return tf.distribute.MirroredStrategy( + devices=devices, + cross_device_ops=_mirrored_cross_device_ops(all_reduce_alg, num_packs)) + + if distribution_strategy == "parameter_server": + cluster_resolver = tf.distribute.cluster_resolver.TFConfigClusterResolver() + return tf.distribute.experimental.ParameterServerStrategy(cluster_resolver) + + raise ValueError("Unrecognized Distribution Strategy: %r" % + distribution_strategy) + + +def configure_cluster(worker_hosts=None, task_index=-1): + """Set multi-worker cluster spec in TF_CONFIG environment variable. + + Args: + worker_hosts: comma-separated list of worker ip:port pairs. + task_index: index of the worker. + + Returns: + Number of workers in the cluster. + """ + tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) + if tf_config: + num_workers = ( + len(tf_config["cluster"].get("chief", [])) + + len(tf_config["cluster"].get("worker", []))) + elif worker_hosts: + workers = worker_hosts.split(",") + num_workers = len(workers) + if num_workers > 1 and task_index < 0: + raise ValueError("Must specify task_index when number of workers > 1") + task_index = 0 if num_workers == 1 else task_index + os.environ["TF_CONFIG"] = json.dumps({ + "cluster": { + "worker": workers + }, + "task": { + "type": "worker", + "index": task_index + } + }) + else: + num_workers = 1 + return num_workers + + +def get_strategy_scope(strategy): + if strategy: + strategy_scope = strategy.scope() + else: + strategy_scope = DummyContextManager() + + return strategy_scope + + +class DummyContextManager(object): + + def __enter__(self): + pass + + def __exit__(self, *args): + pass diff --git a/official/common/distribute_utils_test.py b/official/common/distribute_utils_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1b13c6e1ca169d0f0b39afc6abd606fe32369076 --- /dev/null +++ b/official/common/distribute_utils_test.py @@ -0,0 +1,59 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Tests for distribution util functions.""" + +import tensorflow as tf + +from official.common import distribute_utils + + +class GetDistributionStrategyTest(tf.test.TestCase): + """Tests for get_distribution_strategy.""" + + def test_one_device_strategy_cpu(self): + ds = distribute_utils.get_distribution_strategy(num_gpus=0) + self.assertEquals(ds.num_replicas_in_sync, 1) + self.assertEquals(len(ds.extended.worker_devices), 1) + self.assertIn('CPU', ds.extended.worker_devices[0]) + + def test_one_device_strategy_gpu(self): + ds = distribute_utils.get_distribution_strategy(num_gpus=1) + self.assertEquals(ds.num_replicas_in_sync, 1) + self.assertEquals(len(ds.extended.worker_devices), 1) + self.assertIn('GPU', ds.extended.worker_devices[0]) + + def test_mirrored_strategy(self): + ds = distribute_utils.get_distribution_strategy(num_gpus=5) + self.assertEquals(ds.num_replicas_in_sync, 5) + self.assertEquals(len(ds.extended.worker_devices), 5) + for device in ds.extended.worker_devices: + self.assertIn('GPU', device) + + def test_no_strategy(self): + ds = distribute_utils.get_distribution_strategy('off') + self.assertIsNone(ds) + + def test_invalid_strategy(self): + with self.assertRaisesRegexp( + ValueError, + 'distribution_strategy must be a string but got: False. If'): + distribute_utils.get_distribution_strategy(False) + with self.assertRaisesRegexp( + ValueError, 'distribution_strategy must be a string but got: 1'): + distribute_utils.get_distribution_strategy(1) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/common/flags.py b/official/common/flags.py new file mode 100644 index 0000000000000000000000000000000000000000..206aa0fa011d49ebca0d19ad5c759faaed2e64a3 --- /dev/null +++ b/official/common/flags.py @@ -0,0 +1,92 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The central place to define flags.""" + +from absl import flags + + +def define_flags(): + """Defines flags.""" + flags.DEFINE_string( + 'experiment', default=None, help='The experiment type registered.') + + flags.DEFINE_enum( + 'mode', + default=None, + enum_values=[ + 'train', 'eval', 'train_and_eval', 'continuous_eval', + 'continuous_train_and_eval', 'train_and_validate' + ], + help='Mode to run: `train`, `eval`, `train_and_eval`, ' + '`continuous_eval`, `continuous_train_and_eval` and ' + '`train_and_validate` (which is not implemented in ' + 'the open source version).') + + flags.DEFINE_string( + 'model_dir', + default=None, + help='The directory where the model and training/evaluation summaries' + 'are stored.') + + flags.DEFINE_multi_string( + 'config_file', + default=None, + help='YAML/JSON files which specifies overrides. The override order ' + 'follows the order of args. Note that each file ' + 'can be used as an override template to override the default parameters ' + 'specified in Python. If the same parameter is specified in both ' + '`--config_file` and `--params_override`, `config_file` will be used ' + 'first, followed by params_override.') + + flags.DEFINE_string( + 'params_override', + default=None, + help='a YAML/JSON string or a YAML file which specifies additional ' + 'overrides over the default parameters and those specified in ' + '`--config_file`. Note that this is supposed to be used only to override ' + 'the model parameters, but not the parameters like TPU specific flags. ' + 'One canonical use case of `--config_file` and `--params_override` is ' + 'users first define a template config file using `--config_file`, then ' + 'use `--params_override` to adjust the minimal set of tuning parameters, ' + 'for example setting up different `train_batch_size`. The final override ' + 'order of parameters: default_model_params --> params from config_file ' + '--> params in params_override. See also the help message of ' + '`--config_file`.') + + # The libraries rely on gin often make mistakes that include flags inside + # the library files which causes conflicts. + try: + flags.DEFINE_multi_string( + 'gin_file', default=None, help='List of paths to the config files.') + except flags.DuplicateFlagError: + pass + + try: + flags.DEFINE_multi_string( + 'gin_params', + default=None, + help='Newline separated list of Gin parameter bindings.') + except flags.DuplicateFlagError: + pass + + flags.DEFINE_string( + 'tpu', + default=None, + help='The Cloud TPU to use for training. This should be either the name ' + 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 ' + 'url.') + + flags.DEFINE_string( + 'tf_data_service', default=None, help='The tf.data service address') diff --git a/official/common/registry_imports.py b/official/common/registry_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..06f3384db6283cbef08070f3678d0afe36e50c08 --- /dev/null +++ b/official/common/registry_imports.py @@ -0,0 +1,20 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""All necessary imports for registration.""" +# pylint: disable=unused-import +from official.nlp import tasks +from official.nlp.configs import experiment_configs +from official.utils.testing import mock_task +from official.vision import beta diff --git a/official/core/__init__.py b/official/core/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/core/__init__.py +++ b/official/core/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/core/base_task.py b/official/core/base_task.py index 4477b1ca4cc272da3309917f1ae95188c0cfdc7d..3ef5d0d5984026f2b3d1272ff5cba42c706e2841 100644 --- a/official/core/base_task.py +++ b/official/core/base_task.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,52 +11,81 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Defines the base task abstraction.""" import abc -import functools -from typing import Any, Callable, Optional +from typing import Optional -import six +from absl import logging import tensorflow as tf -from official.modeling.hyperparams import config_definitions as cfg -from official.utils import registry +from official.core import config_definitions +from official.modeling import optimization +from official.modeling import performance + +OptimizationConfig = optimization.OptimizationConfig +RuntimeConfig = config_definitions.RuntimeConfig -@six.add_metaclass(abc.ABCMeta) -class Task(tf.Module): +class Task(tf.Module, metaclass=abc.ABCMeta): """A single-replica view of training procedure. - Tasks provide artifacts for training/evalution procedures, including - loading/iterating over Datasets, initializing the model, calculating the loss - and customized metrics with reduction. + Tasks provide artifacts for training/validation procedures, including + loading/iterating over Datasets, training/validation steps, calculating the + loss and customized metrics with reduction. """ # Special keys in train/validate step returned logs. loss = "loss" - def __init__(self, params: cfg.TaskConfig, logging_dir: str = None): + def __init__(self, params, logging_dir: str = None, name: str = None): """Task initialization. Args: - params: cfg.TaskConfig instance. + params: the task configuration instance, which can be any of dataclass, + ConfigDict, namedtuple, etc. logging_dir: a string pointing to where the model, summaries etc. will be saved. You can also write additional stuff in this directory. + name: the task name. """ + super().__init__(name=name) self._task_config = params self._logging_dir = logging_dir @property - def task_config(self) -> cfg.TaskConfig: + def task_config(self): return self._task_config @property def logging_dir(self) -> str: return self._logging_dir + @classmethod + def create_optimizer(cls, optimizer_config: OptimizationConfig, + runtime_config: Optional[RuntimeConfig] = None): + """Creates an TF optimizer from configurations. + + Args: + optimizer_config: the parameters of the Optimization settings. + runtime_config: the parameters of the runtime. + + Returns: + A tf.optimizers.Optimizer object. + """ + opt_factory = optimization.OptimizerFactory(optimizer_config) + optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate()) + # Configuring optimizer when loss_scale is set in runtime config. This helps + # avoiding overflow/underflow for float16 computations. + if runtime_config and runtime_config.loss_scale: + optimizer = performance.configure_optimizer( + optimizer, + use_float16=runtime_config.mixed_precision_dtype == "float16", + loss_scale=runtime_config.loss_scale) + + return optimizer + def initialize(self, model: tf.keras.Model): - """A callback function used as CheckpointManager's init_fn. + """[Optional] A callback function used as CheckpointManager's init_fn. This function will be called when no checkpoint is found for the model. If there is a checkpoint, the checkpoint will be loaded and this function @@ -67,54 +95,34 @@ class Task(tf.Module): Args: model: The keras.Model built or used by this task. """ - pass + ckpt_dir_or_file = self.task_config.init_checkpoint + logging.info("Trying to load pretrained checkpoint from %s", + ckpt_dir_or_file) + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + if not ckpt_dir_or_file: + return + + if hasattr(model, "checkpoint_items"): + checkpoint_items = model.checkpoint_items + else: + checkpoint_items = dict(model=model) + ckpt = tf.train.Checkpoint(**checkpoint_items) + status = ckpt.read(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + logging.info("Finished loading pretrained checkpoint from %s", + ckpt_dir_or_file) - @abc.abstractmethod def build_model(self) -> tf.keras.Model: - """Creates model architecture. + """[Optional] Creates model architecture. Returns: A model instance. """ - def compile_model(self, - model: tf.keras.Model, - optimizer: tf.keras.optimizers.Optimizer, - loss=None, - train_step: Optional[Callable[..., Any]] = None, - validation_step: Optional[Callable[..., Any]] = None, - **kwargs) -> tf.keras.Model: - """Compiles the model with objects created by the task. - - The method should not be used in any customized training implementation. - - Args: - model: a keras.Model. - optimizer: the keras optimizer. - loss: a callable/list of losses. - train_step: optional train step function defined by the task. - validation_step: optional validation_step step function defined by the - task. - **kwargs: other kwargs consumed by keras.Model compile(). - - Returns: - a compiled keras.Model. - """ - if bool(loss is None) == bool(train_step is None): - raise ValueError("`loss` and `train_step` should be exclusive to " - "each other.") - model.compile(optimizer=optimizer, loss=loss, **kwargs) - - if train_step: - model.train_step = functools.partial( - train_step, model=model, optimizer=model.optimizer) - if validation_step: - model.test_step = functools.partial(validation_step, model=model) - return model - @abc.abstractmethod def build_inputs(self, - params: cfg.DataConfig, + params, input_context: Optional[tf.distribute.InputContext] = None): """Returns a dataset or a nested structure of dataset functions. @@ -122,7 +130,8 @@ class Task(tf.Module): With distributed training, this method runs on remote hosts. Args: - params: hyperparams to create input pipelines. + params: hyperparams to create input pipelines, which can be any of + dataclass, ConfigDict, namedtuple, etc. input_context: optional distribution input pipeline context. Returns: @@ -155,26 +164,30 @@ class Task(tf.Module): return [] def process_metrics(self, metrics, labels, model_outputs): - """Process and update metrics. Called when using custom training loop API. + """Process and update metrics. + + Called when using custom training loop API. Args: - metrics: a nested structure of metrics objects. - The return of function self.build_metrics. + metrics: a nested structure of metrics objects. The return of function + self.build_metrics. labels: a tensor or a nested structure of tensors. - model_outputs: a tensor or a nested structure of tensors. - For example, output of the keras model built by self.build_model. + model_outputs: a tensor or a nested structure of tensors. For example, + output of the keras model built by self.build_model. """ for metric in metrics: metric.update_state(labels, model_outputs) def process_compiled_metrics(self, compiled_metrics, labels, model_outputs): - """Process and update compiled_metrics. call when using compile/fit API. + """Process and update compiled_metrics. + + call when using compile/fit API. Args: compiled_metrics: the compiled metrics (model.compiled_metrics). labels: a tensor or a nested structure of tensors. - model_outputs: a tensor or a nested structure of tensors. - For example, output of the keras model built by self.build_model. + model_outputs: a tensor or a nested structure of tensors. For example, + output of the keras model built by self.build_model. """ compiled_metrics.update_state(labels, model_outputs) @@ -203,8 +216,14 @@ class Task(tf.Module): with tf.GradientTape() as tape: outputs = model(features, training=True) # Computes per-replica loss. - loss = self.build_losses( - labels=labels, model_outputs=outputs, aux_losses=model.losses) + if model.compiled_loss: + loss = model.compiled_loss( + labels, outputs, regularization_losses=model.losses) + loss += self.build_losses( + labels=labels, model_outputs=outputs, aux_losses=None) + else: + loss = self.build_losses( + labels=labels, model_outputs=outputs, aux_losses=model.losses) # Scales loss as the default gradients allreduce performs sum inside the # optimizer. scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync @@ -212,22 +231,22 @@ class Task(tf.Module): # For mixed precision, when a LossScaleOptimizer is used, the loss is # scaled to avoid numeric underflow. if isinstance(optimizer, - tf.keras.mixed_precision.experimental.LossScaleOptimizer): + tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) tvars = model.trainable_variables grads = tape.gradient(scaled_loss, tvars) if isinstance(optimizer, - tf.keras.mixed_precision.experimental.LossScaleOptimizer): + tf.keras.mixed_precision.LossScaleOptimizer): grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(list(zip(grads, tvars))) logs = {self.loss: loss} if metrics: self.process_metrics(metrics, labels, outputs) - logs.update({m.name: m.result() for m in metrics}) - elif model.compiled_metrics: + if model.compiled_metrics: self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics or []}) logs.update({m.name: m.result() for m in model.metrics}) return logs @@ -254,9 +273,9 @@ class Task(tf.Module): logs = {self.loss: loss} if metrics: self.process_metrics(metrics, labels, outputs) - logs.update({m.name: m.result() for m in metrics}) - elif model.compiled_metrics: + if model.compiled_metrics: self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics or []}) logs.update({m.name: m.result() for m in model.metrics}) return logs @@ -278,53 +297,8 @@ class Task(tf.Module): """Optional aggregation over logs returned from a validation step.""" pass - def reduce_aggregated_logs(self, aggregated_logs): + def reduce_aggregated_logs(self, + aggregated_logs, + global_step: Optional[tf.Tensor] = None): """Optional reduce of aggregated logs over validation steps.""" return {} - - -_REGISTERED_TASK_CLS = {} - - -# TODO(b/158268740): Move these outside the base class file. -# TODO(b/158741360): Add type annotations once pytype checks across modules. -def register_task_cls(task_config_cls): - """Decorates a factory of Tasks for lookup by a subclass of TaskConfig. - - This decorator supports registration of tasks as follows: - - ``` - @dataclasses.dataclass - class MyTaskConfig(TaskConfig): - # Add fields here. - pass - - @register_task_cls(MyTaskConfig) - class MyTask(Task): - # Inherits def __init__(self, task_config). - pass - - my_task_config = MyTaskConfig() - my_task = get_task(my_task_config) # Returns MyTask(my_task_config). - ``` - - Besisdes a class itself, other callables that create a Task from a TaskConfig - can be decorated by the result of this function, as long as there is at most - one registration for each config class. - - Args: - task_config_cls: a subclass of TaskConfig (*not* an instance of TaskConfig). - Each task_config_cls can only be used for a single registration. - - Returns: - A callable for use as class decorator that registers the decorated class - for creation from an instance of task_config_cls. - """ - return registry.register(_REGISTERED_TASK_CLS, task_config_cls) - - -# The user-visible get_task() is defined after classes have been registered. -# TODO(b/158741360): Add type annotations once pytype checks across modules. -def get_task_cls(task_config_cls): - task_cls = registry.lookup(_REGISTERED_TASK_CLS, task_config_cls) - return task_cls diff --git a/official/core/base_trainer.py b/official/core/base_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..a13bac9adb732e753293c97503adbeddb505d175 --- /dev/null +++ b/official/core/base_trainer.py @@ -0,0 +1,453 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Standard Trainer implementation. + +The base trainer implements the Orbit `StandardTrainable` and +`StandardEvaluable` interfaces. Trainers inside this project should be +interchangable and independent on model architectures and tasks. +""" +import functools +from absl import logging +import gin +import orbit +import tensorflow as tf + +from official.core import base_task +from official.core import config_definitions +from official.modeling import optimization + + +ExperimentConfig = config_definitions.ExperimentConfig +TrainerConfig = config_definitions.TrainerConfig + + +class Recovery: + """Built-in model blowup recovery module. + + Checks the loss value by the given threshold. If applicable, recover the + model by reading the checkpoint on disk. + """ + + def __init__(self, + loss_upper_bound: float, + checkpoint_manager: tf.train.CheckpointManager, + recovery_begin_steps: int = 0, + recovery_max_trials: int = 3): + self.recover_counter = 0 + self.recovery_begin_steps = recovery_begin_steps + self.recovery_max_trials = recovery_max_trials + self.loss_upper_bound = loss_upper_bound + self.checkpoint_manager = checkpoint_manager + + def should_recover(self, loss_value, global_step): + if tf.math.is_nan(loss_value): + return True + if (global_step >= self.recovery_begin_steps and + loss_value > self.loss_upper_bound): + return True + return False + + def maybe_recover(self, loss_value, global_step): + """Conditionally recovers the training by triggering checkpoint restoration. + + Args: + loss_value: the loss value as a float. + global_step: the number of global training steps. + + Raises: + RuntimeError: when recovery happens more than the max number of trials, + the job should crash. + """ + if not self.should_recover(loss_value, global_step): + return + self.recover_counter += 1 + if self.recover_counter > self.recovery_max_trials: + raise RuntimeError( + "The loss value is NaN after training loop and it happens %d times." % + self.recover_counter) + # Loads the previous good checkpoint. + checkpoint_path = self.checkpoint_manager.restore_or_initialize() + logging.warning( + "Recovering the model from checkpoint: %s. The loss value becomes " + "%f at step %d.", checkpoint_path, loss_value, global_step) + + +class _AsyncTrainer(orbit.StandardTrainer, orbit.StandardEvaluator): + """Trainer class for both sync and async Strategy.""" + + def init_async(self): + """Initializes the Async Trainer base class.""" + assert isinstance(self._strategy, tf.distribute.Strategy) + self._is_async = isinstance( + self._strategy, tf.distribute.experimental.ParameterServerStrategy) + self._coordinator = None + if self._is_async: + self._coordinator = ( + tf.distribute.experimental.coordinator.ClusterCoordinator( + self._strategy)) + + def join(self): + """Join all async steps. Only useful in aysnc training.""" + if getattr(self, "_is_async", False): + self._coordinator.join() + + def create_train_loop_fn(self): + """Creates a eval loop from the given step function and options.""" + train_loop_fn = super().create_train_loop_fn() + if getattr(self, "_is_async", False): + + def _async_loop_fn(iterator, num_steps): + self._coordinator.schedule(train_loop_fn, args=(iterator, num_steps)) + + return _async_loop_fn + else: + return train_loop_fn + + def create_eval_loop_fn(self, has_state: bool): + """Creates a training loop from the given step function and options.""" + eval_loop_fn = super().create_eval_loop_fn(has_state) + + if getattr(self, "_is_async", False): + if has_state: + raise ValueError( + "Stateful eval loop is not supported in async training.") + + def _async_loop_fn(iterator, num_steps, state=None, reduce_fn=None): + assert state is None + assert reduce_fn is None + self._coordinator.schedule(eval_loop_fn, args=(iterator, num_steps)) + + return _async_loop_fn + else: + return eval_loop_fn + + def distribute_dataset(self, dataset_or_fn, *args, **kwargs): + """A utility function to help create a `tf.distribute.DistributedDataset`. + + Args: + dataset_or_fn: A instance of `tf.data.Dataset`, or a "dataset function" + returning a `tf.data.Dataset`. If it is a function, it may optionally + have an argument named `input_context` which will be passed a + `tf.distribute.InputContext` instance. + *args: Any positional arguments to pass through to `dataset_or_fn`. + **kwargs: Any keyword arguments to pass through to `dataset_or_fn`. + Returns: + A distributed Dataset. + """ + if getattr(self, "_is_async", False): + per_worker_dataset_fn = functools.partial( + orbit.utils.make_distributed_dataset, self._strategy, dataset_or_fn, + *args, **kwargs) + per_worker_dataset_fn = tf.function(per_worker_dataset_fn) + + return self._coordinator.create_per_worker_dataset(per_worker_dataset_fn) + else: + return orbit.utils.make_distributed_dataset(self._strategy, dataset_or_fn, + *args, **kwargs) + + +def get_runtime_options(config: ExperimentConfig): + """Get tf.distribute.RunOptions from config.""" + xla_options = {} + if config.runtime.tpu_enable_xla_dynamic_padder is not None: + xla_options["enable_xla_dynamic_padder"] = ( + config.runtime.tpu_enable_xla_dynamic_padder) + return tf.distribute.RunOptions( + experimental_xla_options=tf.tpu.XLAOptions(**xla_options)) + + +@gin.configurable +class Trainer(_AsyncTrainer): + """Implements the common trainer shared for TensorFlow models.""" + + # pylint: disable=super-init-not-called + def __init__(self, + config: ExperimentConfig, + task: base_task.Task, + model: tf.keras.Model, + optimizer: tf.optimizers.Optimizer, + train: bool = True, + evaluate: bool = True, + checkpoint_exporter=None): + """Initialize common trainer for TensorFlow models. + + Args: + config: An `ExperimentConfig` instance specifying experiment config. + task: A base_task.Task instance. + model: The model instance, e.g. a tf.keras.Model instance. + optimizer: tf.optimizers.Optimizer instance. + train: bool, whether or not this trainer will be used for training. + default to True. + evaluate: bool, whether or not this trainer will be used for evaluation. + default to True. + checkpoint_exporter: an object that has the `maybe_export_checkpoint` + interface. + """ + # Gets the current distribution strategy. If not inside any strategy scope, + # it gets a single-replica no-op strategy. + self._strategy = tf.distribute.get_strategy() + self._validate_params(config) + self._config = config + self._task = task + self._model = model + self._optimizer = optimizer + self._checkpoint_exporter = checkpoint_exporter + self._recovery = None + # Runtime options are only applied to train_step. + # We use default for eval_step. + self._runtime_options = get_runtime_options(config) + + # Creates a shadow copy of the weights to store weights moving average. + if isinstance(self._optimizer, optimization.ExponentialMovingAverage): + self._optimizer.shadow_copy(self._model) + + # global_step increases by 1 after each training iteration. + # We should have global_step.numpy() == self.optimizer.iterations.numpy() + # when there is only 1 optimizer. + self._global_step = orbit.utils.create_global_step() + if hasattr(self.model, "checkpoint_items"): + checkpoint_items = self.model.checkpoint_items + else: + checkpoint_items = {} + self._checkpoint = tf.train.Checkpoint( + global_step=self.global_step, + model=self.model, + optimizer=self.optimizer, + **checkpoint_items) + + self._train_loss = tf.keras.metrics.Mean("training_loss", dtype=tf.float32) + self._validation_loss = tf.keras.metrics.Mean( + "validation_loss", dtype=tf.float32) + self._train_metrics = self.task.build_metrics( + training=True) + self.model.metrics + self._validation_metrics = self.task.build_metrics( + training=False) + self.model.metrics + + self.init_async() + + if train: + train_dataset = self.distribute_dataset( + self.task.build_inputs, self.config.task.train_data) + orbit.StandardTrainer.__init__( + self, + train_dataset, + options=orbit.StandardTrainerOptions( + use_tf_while_loop=config.trainer.train_tf_while_loop, + use_tf_function=config.trainer.train_tf_function, + use_tpu_summary_optimization=config.trainer.allow_tpu_summary)) + + if evaluate: + eval_dataset = self.distribute_dataset( + self.task.build_inputs, self.config.task.validation_data) + orbit.StandardEvaluator.__init__( + self, + eval_dataset, + options=orbit.StandardEvaluatorOptions( + use_tf_function=config.trainer.eval_tf_function, + use_tf_while_loop=config.trainer.eval_tf_while_loop)) + + def _validate_params(self, config): + r"""Validates if the configuration object passed to the Trainer. + + The experiment configuration should be structured as: + \trainer + \task + \train_data + \validation_data + + Args: + config: a namedtuple, dataclass, ConfigDict, etc. + """ + if not hasattr(config, "trainer"): + raise AttributeError("The trainer requires the configuration contains an" + " attribute `trainer`.") + + if not hasattr(config, "task"): + raise AttributeError("The trainer requires the configuration contains an" + " attribute `task`.") + + if not hasattr(config.task, "train_data"): + raise AttributeError("The trainer requires the configuration contains an" + " attribute `task.train_data`.") + + if not hasattr(config.task, "validation_data"): + raise AttributeError("The trainer requires the configuration contains an" + " attribute `task.validation_data`.") + + @property + def strategy(self): + return self._strategy + + @property + def config(self): + return self._config + + @property + def task(self): + return self._task + + @property + def model(self): + return self._model + + @property + def optimizer(self): + if hasattr(self, "_optimizer"): + return self._optimizer + else: + return None + + @property + def global_step(self): + return self._global_step + + @property + def train_loss(self): + """Accesses the training loss metric object.""" + return self._train_loss + + @property + def validation_loss(self): + """Accesses the validation loss metric object.""" + return self._validation_loss + + @property + def train_metrics(self): + """Accesses all training metric objects.""" + return self._train_metrics + + @property + def validation_metrics(self): + """Accesses all validation metric metric objects.""" + return self._validation_metrics + + def initialize(self): + """A callback function. + + This function will be called when no checkpoint found for the model. + If there is a checkpoint, the checkpoint will be loaded and this function + will not be called. Tasks may use this callback function to load a + pretrained checkpoint, saved under a directory other than the model_dir. + """ + self.task.initialize(self.model) + + @property + def checkpoint(self): + """Accesses the training checkpoint.""" + return self._checkpoint + + def add_recovery(self, params: TrainerConfig, + checkpoint_manager: tf.train.CheckpointManager): + if params.recovery_max_trials >= 0: + self._recovery = Recovery( + loss_upper_bound=params.loss_upper_bound, + recovery_begin_steps=params.recovery_begin_steps, + recovery_max_trials=params.recovery_max_trials, + checkpoint_manager=checkpoint_manager) + + def train_loop_end(self): + """See base class.""" + self.join() + # Checks if the model numeric status is stable and conducts the checkpoint + # recovery accordingly. + if self._recovery: + self._recovery.maybe_recover(self.train_loss.result().numpy(), + self.global_step.numpy()) + logs = {} + for metric in self.train_metrics + [self.train_loss]: + logs[metric.name] = metric.result() + metric.reset_states() + if callable(self.optimizer.learning_rate): + logs["learning_rate"] = self.optimizer.learning_rate(self.global_step) + else: + logs["learning_rate"] = self.optimizer.learning_rate + return logs + + def train_step(self, iterator): + """See base class.""" + + def step_fn(inputs): + if self.config.runtime.enable_xla and (self.config.runtime.num_gpus > 0): + task_train_step = tf.function(self.task.train_step, jit_compile=True) + else: + task_train_step = self.task.train_step + logs = task_train_step( + inputs, + model=self.model, + optimizer=self.optimizer, + metrics=self.train_metrics) + self._train_loss.update_state(logs[self.task.loss]) + self.global_step.assign_add(1) + + self.strategy.run( + step_fn, args=(next(iterator),), options=self._runtime_options) + + def eval_begin(self): + """Sets up metrics.""" + for metric in self.validation_metrics + [self.validation_loss]: + metric.reset_states() + # Swaps weights to test on weights moving average. + if self.optimizer and isinstance( + self.optimizer, optimization.ExponentialMovingAverage): + self.optimizer.swap_weights() + + def eval_step(self, iterator): + """See base class.""" + + def step_fn(inputs): + logs = self.task.validation_step( + inputs, model=self.model, metrics=self.validation_metrics) + if self.task.loss in logs: + self._validation_loss.update_state(logs[self.task.loss]) + return logs + + distributed_outputs = self.strategy.run(step_fn, args=(next(iterator),)) + return tf.nest.map_structure(self.strategy.experimental_local_results, + distributed_outputs) + + def eval_end(self, aggregated_logs=None): + """Processes evaluation results.""" + self.join() + logs = {} + for metric in self.validation_metrics: + logs[metric.name] = metric.result() + if self.validation_loss.count.numpy() != 0: + logs[self.validation_loss.name] = self.validation_loss.result() + else: + # `self.validation_loss` metric was not updated, because the validation + # loss was not returned from the task's `validation_step` method. + logging.info("The task did not report validation loss.") + if aggregated_logs: + metrics = self.task.reduce_aggregated_logs( + aggregated_logs, global_step=self.global_step) + logs.update(metrics) + + if self._checkpoint_exporter: + self._checkpoint_exporter.maybe_export_checkpoint( + self.checkpoint, logs, self.global_step.numpy()) + metric_name = self.config.trainer.best_checkpoint_eval_metric + logs["best_" + + metric_name] = self._checkpoint_exporter.best_ckpt_logs[metric_name] + + # Swaps back weights after testing when EMA is used. + # This happens after best checkpoint export so that average weights used for + # eval are exported instead of regular weights. + if self.optimizer and isinstance( + self.optimizer, optimization.ExponentialMovingAverage): + self.optimizer.swap_weights() + return logs + + def eval_reduce(self, state=None, step_outputs=None): + return self.task.aggregate_logs(state, step_outputs) diff --git a/official/core/base_trainer_test.py b/official/core/base_trainer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a346173a587b1c4e65d6d4d62c357debcafb6a81 --- /dev/null +++ b/official/core/base_trainer_test.py @@ -0,0 +1,382 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tensorflow_models.core.trainers.trainer.""" +# pylint: disable=g-direct-tensorflow-import +import multiprocessing +import os +import sys + +from absl.testing import parameterized +import numpy as np +import portpicker +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.core import base_trainer as trainer_lib +from official.core import config_definitions as cfg +from official.core import train_lib +from official.utils.testing import mock_task + +TPU_TEST = 'test_tpu' in sys.argv[0] +GPU_TEST = 'test_gpu' in sys.argv[0] + + +def all_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ],) + + +def create_in_process_cluster(num_workers, num_ps): + """Creates and starts local servers and returns the cluster_resolver.""" + worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] + ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] + + cluster_dict = {} + cluster_dict['worker'] = ['localhost:%s' % port for port in worker_ports] + if num_ps > 0: + cluster_dict['ps'] = ['localhost:%s' % port for port in ps_ports] + + cluster_spec = tf.train.ClusterSpec(cluster_dict) + + # Workers need some inter_ops threads to work properly. + worker_config = tf.compat.v1.ConfigProto() + if multiprocessing.cpu_count() < num_workers + 1: + worker_config.inter_op_parallelism_threads = num_workers + 1 + + for i in range(num_workers): + tf.distribute.Server( + cluster_spec, + job_name='worker', + task_index=i, + config=worker_config, + protocol='grpc') + + for i in range(num_ps): + tf.distribute.Server( + cluster_spec, job_name='ps', task_index=i, protocol='grpc') + + cluster_resolver = tf.distribute.cluster_resolver.SimpleClusterResolver( + cluster_spec, rpc_layer='grpc') + return cluster_resolver + + +def dataset_fn(input_context=None): + del input_context + + def dummy_data(_): + return tf.zeros((1, 1), dtype=tf.float32) + + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + dataset = dataset.map( + dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + return dataset + + +class MockAsyncTrainer(trainer_lib._AsyncTrainer): + """Mock AsyncTrainer to test the _AsyncTrainer class.""" + + def __init__(self): + self._strategy = tf.distribute.get_strategy() + self.init_async() + + self.global_step = tf.Variable( + 0, + dtype=tf.int64, + name='global_step', + trainable=False, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) + self.eval_global_step = tf.Variable( + 0, + dtype=tf.int64, + name='eval_global_step', + trainable=False, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) + + train_dataset = self.distribute_dataset(dataset_fn) + trainer_lib.orbit.StandardTrainer.__init__( + self, train_dataset, options=trainer_lib.orbit.StandardTrainerOptions()) + + eval_dataset = self.distribute_dataset(dataset_fn) + trainer_lib.orbit.StandardEvaluator.__init__( + self, + eval_dataset, + options=trainer_lib.orbit.StandardEvaluatorOptions( + use_tf_while_loop=True)) + + def train_loop_begin(self): + self.global_step.assign(0) + + def train_step(self, iterator): + + def replica_step(_): + self.global_step.assign_add(1) + + self._strategy.run(replica_step, args=(next(iterator),)) + + def train_loop_end(self): + self.join() + return self.global_step.numpy() + + def eval_begin(self): + self.eval_global_step.assign(0) + + def eval_step(self, iterator): + + def replica_step(_): + self.eval_global_step.assign_add(1) + + self._strategy.run(replica_step, args=(next(iterator),)) + + def eval_end(self): + self.join() + return self.eval_global_step.numpy() + + +class TrainerTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super().setUp() + self._config = cfg.ExperimentConfig( + trainer=cfg.TrainerConfig( + optimizer_config=cfg.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd' + }, + 'learning_rate': { + 'type': 'constant' + } + }))) + + def create_test_trainer(self, config, model_dir=None, task=None): + task = task or mock_task.MockTask(config.task, logging_dir=model_dir) + ckpt_exporter = train_lib.maybe_create_best_ckpt_exporter(config, model_dir) + trainer = trainer_lib.Trainer( + config, + task, + model=task.build_model(), + optimizer=task.create_optimizer(config.trainer.optimizer_config, + config.runtime), + checkpoint_exporter=ckpt_exporter) + return trainer + + @combinations.generate(all_strategy_combinations()) + def test_trainer_train(self, distribution): + with distribution.scope(): + trainer = self.create_test_trainer(self._config) + logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('training_loss', logs) + self.assertIn('learning_rate', logs) + + def test_base_async_trainer(self): + if TPU_TEST or GPU_TEST: + self.skipTest('Aysnc training is not available on GPU/GPU.') + num_workers = 3 + num_ps = 2 + cluster_resolver = create_in_process_cluster(num_workers, num_ps) + distribution = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver) + with distribution.scope(): + trainer = MockAsyncTrainer() + trainer.init_async() + self.assertIsInstance( + trainer._coordinator, + tf.distribute.experimental.coordinator.ClusterCoordinator) + self.assertEqual(trainer.train(tf.constant(10)), 10) + self.assertEqual(trainer.evaluate(tf.constant(11)), 11) + + def test_async_trainer_train(self): + if TPU_TEST or GPU_TEST: + self.skipTest('Aysnc training is not available on GPU/GPU.') + num_workers = 3 + num_ps = 2 + cluster_resolver = create_in_process_cluster(num_workers, num_ps) + distribution = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver) + with distribution.scope(): + config = cfg.ExperimentConfig(**self._config.as_dict()) + config.trainer.eval_tf_while_loop = True + trainer = self.create_test_trainer(config) + logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('training_loss', logs) + self.assertIn('learning_rate', logs) + + def test_async_trainer_validate(self): + if TPU_TEST or GPU_TEST: + self.skipTest('Aysnc training is not available on GPU/GPU.') + num_workers = 3 + num_ps = 2 + cluster_resolver = create_in_process_cluster(num_workers, num_ps) + distribution = tf.distribute.experimental.ParameterServerStrategy( + cluster_resolver) + with distribution.scope(): + config = cfg.ExperimentConfig(**self._config.as_dict()) + config.trainer.eval_tf_while_loop = True + trainer = self.create_test_trainer(config) + logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('acc', logs) + self.assertIn('validation_loss', logs) + + @combinations.generate(all_strategy_combinations()) + def test_trainer_validate(self, distribution): + with distribution.scope(): + trainer = self.create_test_trainer(self._config) + logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync) + self.assertIn('validation_loss', logs) + + @combinations.generate(all_strategy_combinations()) + def test_trainer_validate_without_loss(self, distribution): + + class MockTaskWithoutValidationLoss(mock_task.MockTask): + + def validation_step(self, inputs, model, metrics=None): + # Disable validation loss. + logs = super().validation_step(inputs, model) + del logs[self.loss] + return logs + + with distribution.scope(): + task = MockTaskWithoutValidationLoss() + trainer = self.create_test_trainer(self._config, task=task) + logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync) + self.assertNotIn('validation_loss', logs) + + @combinations.generate( + combinations.combine( + mixed_precision_dtype=['float32', 'bfloat16', 'float16'], + loss_scale=[None, 'dynamic', 128, 256], + )) + def test_configure_optimizer(self, mixed_precision_dtype, loss_scale): + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig( + mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale), + trainer=cfg.TrainerConfig( + optimizer_config=cfg.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd' + }, + 'learning_rate': { + 'type': 'constant' + }, + }))) + trainer = self.create_test_trainer(config) + if mixed_precision_dtype != 'float16': + self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) + elif mixed_precision_dtype == 'float16' and loss_scale is None: + self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) + else: + self.assertIsInstance(trainer.optimizer, + tf.keras.mixed_precision.LossScaleOptimizer) + + metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('training_loss', metrics) + + def test_export_best_ckpt(self): + config = cfg.ExperimentConfig( + trainer=cfg.TrainerConfig( + best_checkpoint_export_subdir='best_ckpt', + best_checkpoint_eval_metric='acc', + optimizer_config=cfg.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd' + }, + 'learning_rate': { + 'type': 'constant' + } + }))) + model_dir = self.get_temp_dir() + trainer = self.create_test_trainer(config, model_dir=model_dir) + trainer.train(tf.convert_to_tensor(1, dtype=tf.int32)) + trainer.evaluate(tf.convert_to_tensor(1, dtype=tf.int32)) + self.assertTrue( + tf.io.gfile.exists(os.path.join(model_dir, 'best_ckpt', 'info.json'))) + + def test_recovery(self): + config = cfg.ExperimentConfig( + trainer=cfg.TrainerConfig( + loss_upper_bound=0.5, + recovery_max_trials=2, + optimizer_config=cfg.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd' + }, + 'learning_rate': { + 'type': 'constant' + } + }))) + model_dir = self.get_temp_dir() + trainer = self.create_test_trainer(config, model_dir=model_dir) + checkpoint_manager = tf.train.CheckpointManager( + trainer.checkpoint, self.get_temp_dir(), max_to_keep=2) + checkpoint_manager.save() + trainer.add_recovery(config.trainer, checkpoint_manager=checkpoint_manager) + before_weights = trainer.model.get_weights() + _ = trainer.train(tf.convert_to_tensor(1, dtype=tf.int32)) + # The training loss is 1.0 and upper_bound is 0.5, so the recover happens. + after_weights = trainer.model.get_weights() + for left, right in zip(before_weights, after_weights): + self.assertAllEqual(left, right) + + # Let's the loss be NaN and max_trials = 0 to see RuntimeError. + config = cfg.ExperimentConfig( + trainer=cfg.TrainerConfig( + recovery_max_trials=0, + optimizer_config=cfg.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd' + }, + 'learning_rate': { + 'type': 'constant' + } + }))) + task = mock_task.MockTask(config.task, logging_dir=model_dir) + + def build_losses(labels, model_outputs, aux_losses=None): + del labels, model_outputs + return tf.constant([np.nan], tf.float32) + aux_losses + + task.build_losses = build_losses + trainer = trainer_lib.Trainer( + config, + task, + model=task.build_model(), + optimizer=task.create_optimizer(config.trainer.optimizer_config, + config.runtime)) + trainer.add_recovery(config.trainer, checkpoint_manager=checkpoint_manager) + with self.assertRaises(RuntimeError): + _ = trainer.train(tf.convert_to_tensor(2, dtype=tf.int32)) + + def test_model_with_compiled_loss(self): + task = mock_task.MockTask() + model = task.build_model() + model.compile(loss=tf.keras.losses.CategoricalCrossentropy()) + trainer = trainer_lib.Trainer( + self._config, + task, + model=model, + optimizer=task.create_optimizer(self._config.trainer.optimizer_config)) + logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('training_loss', logs) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/core/config_definitions.py b/official/core/config_definitions.py new file mode 100644 index 0000000000000000000000000000000000000000..7b29823ebe2bded777a29aab5cd2428f821081f5 --- /dev/null +++ b/official/core/config_definitions.py @@ -0,0 +1,252 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common configuration settings.""" + +from typing import Optional, Sequence, Union + +import dataclasses + +from official.modeling.hyperparams import base_config +from official.modeling.optimization.configs import optimization_config + +OptimizationConfig = optimization_config.OptimizationConfig + + +@dataclasses.dataclass +class DataConfig(base_config.Config): + """The base configuration for building datasets. + + Attributes: + input_path: The path to the input. It can be either (1) a str indicating + a file path/pattern, or (2) a str indicating multiple file paths/patterns + separated by comma (e.g "a, b, c" or no spaces "a,b,c"), or + (3) a list of str, each of which is a file path/pattern or multiple file + paths/patterns separated by comma. + It should not be specified when the following `tfds_name` is specified. + tfds_name: The name of the tensorflow dataset (TFDS). It should not be + specified when the above `input_path` is specified. + tfds_split: A str indicating which split of the data to load from TFDS. It + is required when above `tfds_name` is specified. + global_batch_size: The global batch size across all replicas. + is_training: Whether this data is used for training or not. + drop_remainder: Whether the last batch should be dropped in the case it has + fewer than `global_batch_size` elements. + shuffle_buffer_size: The buffer size used for shuffling training data. + cache: Whether to cache dataset examples. If `True`, we will cache the + dataset after applying the decode_fn and parse_fn. It can be used to avoid + re-reading from disk, re-decoding and re-parsing the example on the + second epoch, but it requires significant memory overhead. + cycle_length: The number of files that will be processed concurrently when + interleaving files. + block_length: The number of consecutive elements to produce from each input + element before cycling to another input element when interleaving files. + deterministic: A boolean controlling whether determinism should be enforced. + sharding: Whether sharding is used in the input pipeline. + enable_tf_data_service: A boolean indicating whether to enable tf.data + service for the input pipeline. + tf_data_service_address: The URI of a tf.data service to offload + preprocessing onto during training. The URI should be in the format + "protocol://address", e.g. "grpc://tf-data-service:5050". It can be + overridden by `FLAGS.tf_data_service` flag in the binary. + tf_data_service_job_name: The name of the tf.data service job. This + argument makes it possible for multiple datasets to share the same job. + The default behavior is that the dataset creates anonymous, exclusively + owned jobs. + tfds_data_dir: A str specifying the directory to read/write TFDS data. + tfds_as_supervised: A bool. When loading dataset from TFDS, if True, the + returned tf.data.Dataset will have a 2-tuple structure (input, label) + according to builder.info.supervised_keys; if False, the default, the + returned tf.data.Dataset will have a dictionary with all the features. + tfds_skip_decoding_feature: A str to indicate which features are skipped for + decoding when loading dataset from TFDS. Use comma to separate multiple + features. The main use case is to skip the image/video decoding for better + performance. + seed: An optional seed to use for deterministic shuffling/preprocessing. + """ + input_path: Union[Sequence[str], str] = "" + tfds_name: str = "" + tfds_split: str = "" + global_batch_size: int = 0 + is_training: bool = None + drop_remainder: bool = True + shuffle_buffer_size: int = 100 + cache: bool = False + cycle_length: Optional[int] = None + block_length: int = 1 + deterministic: Optional[bool] = None + sharding: bool = True + enable_tf_data_service: bool = False + tf_data_service_address: Optional[str] = None + tf_data_service_job_name: Optional[str] = None + tfds_data_dir: str = "" + tfds_as_supervised: bool = False + tfds_skip_decoding_feature: str = "" + seed: Optional[int] = None + + +@dataclasses.dataclass +class RuntimeConfig(base_config.Config): + """High-level configurations for Runtime. + + These include parameters that are not directly related to the experiment, + e.g. directories, accelerator type, etc. + + Attributes: + distribution_strategy: e.g. 'mirrored', 'tpu', etc. + enable_xla: Whether or not to enable XLA. + per_gpu_thread_count: thread count per GPU. + gpu_thread_mode: Whether and how the GPU device uses its own threadpool. + dataset_num_private_threads: Number of threads for a private threadpool + created for all datasets computation. + tpu: The address of the TPU to use, if any. + num_gpus: The number of GPUs to use, if any. + worker_hosts: comma-separated list of worker ip:port pairs for running + multi-worker models with DistributionStrategy. + task_index: If multi-worker training, the task index of this worker. + all_reduce_alg: Defines the algorithm for performing all-reduce. + num_packs: Sets `num_packs` in the cross device ops used in + MirroredStrategy. For details, see tf.distribute.NcclAllReduce. + mixed_precision_dtype: dtype of mixed precision policy. It can be 'float32', + 'float16', or 'bfloat16'. + loss_scale: The type of loss scale, or 'float' value. This is used when + setting the mixed precision policy. + run_eagerly: Whether or not to run the experiment eagerly. + batchnorm_spatial_persistent: Whether or not to enable the spatial + persistent mode for CuDNN batch norm kernel for improved GPU performance. + """ + distribution_strategy: str = "mirrored" + enable_xla: bool = False + gpu_thread_mode: Optional[str] = None + dataset_num_private_threads: Optional[int] = None + per_gpu_thread_count: int = 0 + tpu: Optional[str] = None + num_gpus: int = 0 + worker_hosts: Optional[str] = None + task_index: int = -1 + all_reduce_alg: Optional[str] = None + num_packs: int = 1 + mixed_precision_dtype: Optional[str] = None + loss_scale: Optional[Union[str, float]] = None + run_eagerly: bool = False + batchnorm_spatial_persistent: bool = False + + # XLA runtime params. + # XLA params are only applied to the train_step. + # These augments can improve training speed. They can also improve eval, but + # may reduce usability and users would need to make changes to code. + + # Whether to enable XLA dynamic padder + # infrastructure to handle dynamic shapes inputs inside XLA. True by + # default. Disabling this may cause correctness issues with dynamic shapes + # inputs, as XLA will just assume the inputs are with padded shapes. However + # users can optionally set it to False to improve device time if masking is + # already handled in the user side. + # If None, will respect XLA default. + tpu_enable_xla_dynamic_padder: Optional[bool] = None + + # Global model parallelism configurations. + num_cores_per_replica: int = 1 + default_shard_dim: int = -1 + + def model_parallelism(self): + return dict( + num_cores_per_replica=self.num_cores_per_replica, + default_shard_dim=self.default_shard_dim) + + +@dataclasses.dataclass +class TrainerConfig(base_config.Config): + """Configuration for trainer. + + Attributes: + optimizer_config: optimizer config, it includes optimizer, learning rate, + and warmup schedule configs. + train_tf_while_loop: whether or not to use tf while loop. + train_tf_function: whether or not to use tf_function for training loop. + eval_tf_function: whether or not to use tf_function for eval. + allow_tpu_summary: Whether to allow summary happen inside the XLA program + runs on TPU through automatic outside compilation. + steps_per_loop: number of steps per loop. + summary_interval: number of steps between each summary. + checkpoint_interval: number of steps between checkpoints. + max_to_keep: max checkpoints to keep. + continuous_eval_timeout: maximum number of seconds to wait between + checkpoints, if set to None, continuous eval will wait indefinitely. This + is only used continuous_train_and_eval and continuous_eval modes. Default + value is 1 hrs. + train_steps: number of train steps. + validation_steps: number of eval steps. If `None`, the entire eval dataset + is used. + validation_interval: number of training steps to run between evaluations. + best_checkpoint_export_subdir: if set, the trainer will keep track of the + best evaluation metric, and export the corresponding best checkpoint under + `model_dir/best_checkpoint_export_subdir`. Note that this only works if + mode contains eval (such as `train_and_eval`, `continuous_eval`, and + `continuous_train_and_eval`). + best_checkpoint_eval_metric: for exporting the best checkpoint, which + evaluation metric the trainer should monitor. This can be any evaluation + metric appears on tensorboard. + best_checkpoint_metric_comp: for exporting the best checkpoint, how the + trainer should compare the evaluation metrics. This can be either `higher` + (higher the better) or `lower` (lower the better). + validation_summary_subdir: A 'str', sub directory for saving eval summary. + """ + optimizer_config: OptimizationConfig = OptimizationConfig() + # Orbit settings. + train_tf_while_loop: bool = True + train_tf_function: bool = True + eval_tf_function: bool = True + eval_tf_while_loop: bool = False + allow_tpu_summary: bool = False + # Trainer intervals. + steps_per_loop: int = 1000 + summary_interval: int = 1000 + checkpoint_interval: int = 1000 + # Checkpoint manager. + max_to_keep: int = 5 + continuous_eval_timeout: int = 60 * 60 + # Train/Eval routines. + train_steps: int = 0 + # Sets validation steps to be -1 to evaluate the entire dataset. + validation_steps: int = -1 + validation_interval: int = 1000 + # Best checkpoint export. + best_checkpoint_export_subdir: str = "" + best_checkpoint_eval_metric: str = "" + best_checkpoint_metric_comp: str = "higher" + # Blowup recovery. + loss_upper_bound: float = 1e6 + recovery_begin_steps: int = 0 # Enforcing the loss bound after these steps. + # When max trials < 0, no recovery module; max trials = 0, we will check + # the condition and fail the job if the condition happens; max trials > 0, + # we will retore the model states. + recovery_max_trials: int = 0 + validation_summary_subdir: str = "validation" + + +@dataclasses.dataclass +class TaskConfig(base_config.Config): + init_checkpoint: str = "" + model: base_config.Config = None + train_data: DataConfig = DataConfig() + validation_data: DataConfig = DataConfig() + + +@dataclasses.dataclass +class ExperimentConfig(base_config.Config): + """Top-level configuration.""" + task: TaskConfig = TaskConfig() + trainer: TrainerConfig = TrainerConfig() + runtime: RuntimeConfig = RuntimeConfig() diff --git a/official/core/exp_factory.py b/official/core/exp_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..7536c0f8fe46dc7224fe56c8fadcb1b70ccc278e --- /dev/null +++ b/official/core/exp_factory.py @@ -0,0 +1,36 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Experiment factory methods.""" + +from official.core import config_definitions as cfg +from official.core import registry + + +_REGISTERED_CONFIGS = {} + + +def register_config_factory(name): + """Register ExperimentConfig factory method.""" + return registry.register(_REGISTERED_CONFIGS, name) + + +def get_exp_config_creater(exp_name: str): + """Looks up ExperimentConfig factory methods.""" + exp_creater = registry.lookup(_REGISTERED_CONFIGS, exp_name) + return exp_creater + + +def get_exp_config(exp_name: str) -> cfg.ExperimentConfig: + return get_exp_config_creater(exp_name)() diff --git a/official/core/export_base.py b/official/core/export_base.py new file mode 100644 index 0000000000000000000000000000000000000000..0937db0c62aaf1d9080c81f48c2c2d8f87c9f1ae --- /dev/null +++ b/official/core/export_base.py @@ -0,0 +1,109 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base class for model export.""" + +import abc +import functools +from typing import Any, Callable, Dict, Mapping, List, Optional, Text, Union + +import tensorflow as tf +from tensorflow.python.saved_model.model_utils import export_utils + + +class ExportModule(tf.Module, metaclass=abc.ABCMeta): + """Base Export Module.""" + + def __init__(self, + params, + model: Union[tf.Module, tf.keras.Model], + inference_step: Optional[Callable[..., Any]] = None): + """Instantiates an ExportModel. + + Args: + params: A dataclass for parameters to the module. + model: A model instance which contains weights and forward computation. + inference_step: An optional callable to define how the model is called. + """ + super().__init__(name=None) + self.model = model + self.params = params + + if inference_step is not None: + self.inference_step = functools.partial(inference_step, model=self.model) + else: + self.inference_step = functools.partial( + self.model.__call__, training=False) + + @abc.abstractmethod + def serve(self) -> Mapping[Text, tf.Tensor]: + """The bare inference function which should run on all devices. + + Expecting tensors are passed in through keyword arguments. Returns a + dictionary of tensors, when the keys will be used inside the SignatureDef. + """ + + @abc.abstractmethod + def get_inference_signatures( + self, function_keys: Dict[Text, Text]) -> Mapping[Text, Any]: + """Get defined function signatures.""" + + +def export(export_module: ExportModule, + function_keys: Union[List[Text], Dict[Text, Text]], + export_savedmodel_dir: Text, + checkpoint_path: Optional[Text] = None, + timestamped: bool = True, + save_options: Optional[tf.saved_model.SaveOptions] = None) -> Text: + """Exports to SavedModel format. + + Args: + export_module: a ExportModule with the keras Model and serving tf.functions. + function_keys: a list of string keys to retrieve pre-defined serving + signatures. The signaute keys will be set with defaults. If a dictionary + is provided, the values will be used as signature keys. + export_savedmodel_dir: Output saved model directory. + checkpoint_path: Object-based checkpoint path or directory. + timestamped: Whether to export the savedmodel to a timestamped directory. + save_options: `SaveOptions` for `tf.saved_model.save`. + + Returns: + The savedmodel directory path. + """ + ckpt_dir_or_file = checkpoint_path + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + if ckpt_dir_or_file: + checkpoint = tf.train.Checkpoint(model=export_module.model) + checkpoint.read( + ckpt_dir_or_file).assert_existing_objects_matched().expect_partial() + if isinstance(function_keys, list): + if len(function_keys) == 1: + function_keys = { + function_keys[0]: tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + } + else: + raise ValueError( + "If the function_keys is a list, it must contain a single element. %s" + % function_keys) + + signatures = export_module.get_inference_signatures(function_keys) + if timestamped: + export_dir = export_utils.get_timestamped_export_dir( + export_savedmodel_dir).decode("utf-8") + else: + export_dir = export_savedmodel_dir + tf.saved_model.save( + export_module, export_dir, signatures=signatures, options=save_options) + return export_dir diff --git a/official/core/export_base_test.py b/official/core/export_base_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3358a4d4fc181ce2f285494ba9c536df268689c3 --- /dev/null +++ b/official/core/export_base_test.py @@ -0,0 +1,88 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.core.export_base.""" +import os +from typing import Any, Dict, Mapping, Text + +import tensorflow as tf + +from official.core import export_base + + +class TestModule(export_base.ExportModule): + + @tf.function + def serve(self, inputs: tf.Tensor) -> Mapping[Text, tf.Tensor]: + return {'outputs': self.inference_step(inputs)} + + def get_inference_signatures( + self, function_keys: Dict[Text, Text]) -> Mapping[Text, Any]: + input_signature = tf.TensorSpec(shape=[None, None], dtype=tf.float32) + return {'foo': self.serve.get_concrete_function(input_signature)} + + +class ExportBaseTest(tf.test.TestCase): + + def test_export_module(self): + tmp_dir = self.get_temp_dir() + model = tf.keras.layers.Dense(2) + inputs = tf.ones([2, 4], tf.float32) + expected_output = model(inputs, training=False) + module = TestModule(params=None, model=model) + ckpt_path = tf.train.Checkpoint(model=model).save( + os.path.join(tmp_dir, 'ckpt')) + export_dir = export_base.export( + module, ['foo'], + export_savedmodel_dir=tmp_dir, + checkpoint_path=ckpt_path, + timestamped=True) + self.assertTrue(os.path.exists(os.path.join(export_dir, 'saved_model.pb'))) + self.assertTrue( + os.path.exists( + os.path.join(export_dir, 'variables', 'variables.index'))) + self.assertTrue( + os.path.exists( + os.path.join(export_dir, 'variables', + 'variables.data-00000-of-00001'))) + + imported = tf.saved_model.load(export_dir) + output = imported.signatures['foo'](inputs) + self.assertAllClose(output['outputs'].numpy(), expected_output.numpy()) + + def test_custom_inference_step(self): + tmp_dir = self.get_temp_dir() + model = tf.keras.layers.Dense(2) + inputs = tf.ones([2, 4], tf.float32) + + def _inference_step(inputs, model): + return tf.nn.softmax(model(inputs, training=False)) + + module = TestModule( + params=None, model=model, inference_step=_inference_step) + expected_output = _inference_step(inputs, model) + ckpt_path = tf.train.Checkpoint(model=model).save( + os.path.join(tmp_dir, 'ckpt')) + export_dir = export_base.export( + module, ['foo'], + export_savedmodel_dir=tmp_dir, + checkpoint_path=ckpt_path, + timestamped=False) + imported = tf.saved_model.load(export_dir) + output = imported.signatures['foo'](inputs) + self.assertAllClose(output['outputs'].numpy(), expected_output.numpy()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/core/input_reader.py b/official/core/input_reader.py index f0c4c06afc6a6bcdbb8c15342641e50773cead03..f6704456170f7402dd34fe186750e7b731186b8e 100644 --- a/official/core/input_reader.py +++ b/official/core/input_reader.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,71 +11,79 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""A common dataset reader.""" +"""A common dataset reader.""" +import random from typing import Any, Callable, List, Optional +from absl import logging import tensorflow as tf import tensorflow_datasets as tfds -from official.modeling.hyperparams import config_definitions as cfg +from official.core import config_definitions as cfg + + +def _get_random_integer(): + return random.randint(0, (1 << 31) - 1) + + +def _maybe_map_fn(dataset: tf.data.Dataset, + fn: Optional[Callable[..., Any]] = None) -> tf.data.Dataset: + """Calls dataset.map if a valid function is passed in.""" + return dataset if fn is None else dataset.map( + fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) class InputReader: """Input reader that returns a tf.data.Dataset instance.""" + # A static random number which is the same across different InputReader + # instances. + static_randnum = _get_random_integer() + def __init__(self, params: cfg.DataConfig, - shards: Optional[List[str]] = None, dataset_fn=tf.data.TFRecordDataset, decoder_fn: Optional[Callable[..., Any]] = None, + sample_fn: Optional[Callable[..., Any]] = None, parser_fn: Optional[Callable[..., Any]] = None, - dataset_transform_fn: Optional[Callable[[tf.data.Dataset], - tf.data.Dataset]] = None, + transform_and_batch_fn: Optional[Callable[ + [tf.data.Dataset, Optional[tf.distribute.InputContext]], + tf.data.Dataset]] = None, postprocess_fn: Optional[Callable[..., Any]] = None): """Initializes an InputReader instance. Args: params: A config_definitions.DataConfig object. - shards: A list of files to be read. If given, read from these files. - Otherwise, read from params.input_path. dataset_fn: A `tf.data.Dataset` that consumes the input files. For example, it can be `tf.data.TFRecordDataset`. decoder_fn: An optional `callable` that takes the serialized data string and decodes them into the raw tensor dictionary. + sample_fn: An optional `callable` that takes a `tf.data.Dataset` object as + input and outputs the transformed dataset. It performs sampling on the + decoded raw tensors dict before the parser_fn. parser_fn: An optional `callable` that takes the decoded raw tensors dict and parse them into a dictionary of tensors that can be consumed by the model. It will be executed after decoder_fn. - dataset_transform_fn: An optional `callable` that takes a - `tf.data.Dataset` object and returns a `tf.data.Dataset`. It will be - executed after parser_fn. + transform_and_batch_fn: An optional `callable` that takes a + `tf.data.Dataset` object and an optional `tf.distribute.InputContext` as + input, and returns a `tf.data.Dataset` object. It will be executed after + `parser_fn` to transform and batch the dataset; if None, after + `parser_fn` is executed, the dataset will be batched into per-replica + batch size. postprocess_fn: A optional `callable` that processes batched tensors. It will be executed after batching. """ if params.input_path and params.tfds_name: raise ValueError('At most one of `input_path` and `tfds_name` can be ' - 'specified, but got %s and %s.' % ( - params.input_path, params.tfds_name)) - self._shards = shards + 'specified, but got %s and %s.' % + (params.input_path, params.tfds_name)) self._tfds_builder = None - if self._shards: - self._num_files = len(self._shards) - elif not params.tfds_name: - self._input_patterns = params.input_path.strip().split(',') - self._num_files = 0 - for input_pattern in self._input_patterns: - input_pattern = input_pattern.strip() - if not input_pattern: - continue - matched_files = tf.io.gfile.glob(input_pattern) - if not matched_files: - raise ValueError('%s does not match any files.' % input_pattern) - else: - self._num_files += len(matched_files) - if self._num_files == 0: - raise ValueError('%s does not match any files.' % params.input_path) + self._matched_files = [] + if params.input_path: + self._matched_files = self._match_files(params.input_path) else: + # Read dataset from TFDS. if not params.tfds_split: raise ValueError( '`tfds_name` is %s, but `tfds_split` is not specified.' % @@ -91,61 +98,141 @@ class InputReader: self._cache = params.cache self._cycle_length = params.cycle_length self._block_length = params.block_length + self._deterministic = params.deterministic self._sharding = params.sharding - self._examples_consume = params.examples_consume self._tfds_split = params.tfds_split - self._tfds_download = params.tfds_download self._tfds_as_supervised = params.tfds_as_supervised self._tfds_skip_decoding_feature = params.tfds_skip_decoding_feature self._dataset_fn = dataset_fn self._decoder_fn = decoder_fn + self._sample_fn = sample_fn self._parser_fn = parser_fn - self._dataset_transform_fn = dataset_transform_fn + self._transform_and_batch_fn = transform_and_batch_fn self._postprocess_fn = postprocess_fn + # When tf.data service is enabled, each data service worker should get + # different random seeds. Thus, we set `seed` to None. + if params.seed is not None: + self._seed = params.seed + elif params.enable_tf_data_service: + self._seed = _get_random_integer() + else: + self._seed = None - def _read_sharded_files( - self, - input_context: Optional[tf.distribute.InputContext] = None): - """Reads a dataset from sharded files.""" - # Read from `self._shards` if it is provided. - if self._shards: - dataset = tf.data.Dataset.from_tensor_slices(self._shards) + self._enable_tf_data_service = ( + params.enable_tf_data_service and params.tf_data_service_address) + self._tf_data_service_address = params.tf_data_service_address + if self._enable_tf_data_service: + # Add a random seed as the tf.data service job name suffix, so tf.data + # service doesn't reuse the previous state if TPU worker gets preempted. + self._tf_data_service_job_name = ( + params.tf_data_service_job_name + str(self.static_randnum)) + self._enable_round_robin_tf_data_service = params.get( + 'enable_round_robin_tf_data_service', False) + + def _match_files(self, input_path: str) -> List[str]: + """Matches files from an input_path.""" + matched_files = [] + # Read dataset from files. + usage = ('`input_path` should be either (1) a str indicating a file ' + 'path/pattern, or (2) a str indicating multiple file ' + 'paths/patterns separated by comma (e.g "a, b, c" or no spaces ' + '"a,b,c", or (3) a list of str, each of which is a file ' + 'path/pattern or multiple file paths/patterns separated by ' + 'comma, but got: %s') + if isinstance(input_path, str): + input_path_list = [input_path] + elif isinstance(input_path, (list, tuple)): + if any(not isinstance(x, str) for x in input_path): + raise ValueError(usage % input_path) + input_path_list = input_path else: - dataset = tf.data.Dataset.list_files( - self._input_patterns, shuffle=self._is_training) + raise ValueError(usage % input_path) + + for input_path in input_path_list: + input_patterns = input_path.strip().split(',') + for input_pattern in input_patterns: + input_pattern = input_pattern.strip() + if not input_pattern: + continue + if '*' in input_pattern or '?' in input_pattern: + tmp_matched_files = tf.io.gfile.glob(input_pattern) + if not tmp_matched_files: + raise ValueError('%s does not match any files.' % input_pattern) + matched_files.extend(tmp_matched_files) + else: + matched_files.append(input_pattern) + + if not matched_files: + raise ValueError('%s does not match any files.' % input_path) + + return matched_files + + def _shard_files_then_read( + self, + matched_files: List[str], + dataset_fn, + input_context: Optional[tf.distribute.InputContext] = None + ) -> tf.data.Dataset: + """Shards the data files and then sent a split to every worker to read.""" + dataset = tf.data.Dataset.from_tensor_slices(matched_files) + + # Shuffle and repeat at file level. + # If cache is enabled, `reshuffle_each_iteration` is set to False, + # because we will read the same cached data in every iteration anyway. + if self._is_training: + dataset = dataset.shuffle( + len(matched_files), + seed=self._seed, + reshuffle_each_iteration=True if not self._cache else False) + + # Do not enable sharding if tf.data service is enabled, as sharding will be + # handled inside tf.data service. if self._sharding and input_context and ( - input_context.num_input_pipelines > 1): + input_context.num_input_pipelines > 1 and + not self._enable_tf_data_service): dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) - if self._is_training: + + # If cache is enabled, we will call `repeat()` later after `cache()`. + if self._is_training and not self._cache: dataset = dataset.repeat() dataset = dataset.interleave( - map_func=self._dataset_fn, + map_func=dataset_fn, cycle_length=self._cycle_length, block_length=self._block_length, - num_parallel_calls=tf.data.experimental.AUTOTUNE) + num_parallel_calls=(self._cycle_length if self._cycle_length else + tf.data.experimental.AUTOTUNE), + deterministic=self._deterministic) return dataset - def _read_single_file( + def _read_files_then_shard( self, - input_context: Optional[tf.distribute.InputContext] = None): - """Reads a dataset from a single file.""" - # Read from `self._shards` if it is provided. - dataset = self._dataset_fn(self._shards or self._input_patterns) + matched_files: List[str], + dataset_fn, + input_context: Optional[tf.distribute.InputContext] = None + ) -> tf.data.Dataset: + """Sends all data files to every worker and then shard by data.""" + dataset = dataset_fn(matched_files) - # When `input_file` is a path to a single file, disable auto sharding + # When `input_file` is a path to a single file or the number of files is + # less than the number of input pipelines, disable auto sharding # so that same input file is sent to all workers. options = tf.data.Options() options.experimental_distribute.auto_shard_policy = ( tf.data.experimental.AutoShardPolicy.OFF) dataset = dataset.with_options(options) + # Do not enable sharding if tf.data service is enabled, as sharding will be + # handled inside tf.data service. if self._sharding and input_context and ( - input_context.num_input_pipelines > 1): + input_context.num_input_pipelines > 1 and + not self._enable_tf_data_service): dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) - if self._is_training: + + # If cache is enabled, we will call `repeat()` later after `cache()`. + if self._is_training and not self._cache: dataset = dataset.repeat() return dataset @@ -154,13 +241,14 @@ class InputReader: input_context: Optional[tf.distribute.InputContext] = None ) -> tf.data.Dataset: """Reads a dataset from tfds.""" - if self._tfds_download: - self._tfds_builder.download_and_prepare() + # No op if exist. + self._tfds_builder.download_and_prepare() read_config = tfds.ReadConfig( interleave_cycle_length=self._cycle_length, interleave_block_length=self._block_length, - input_context=input_context) + input_context=input_context, + shuffle_seed=self._seed) decoders = {} if self._tfds_skip_decoding_feature: for skip_feature in self._tfds_skip_decoding_feature.split(','): @@ -172,7 +260,8 @@ class InputReader: decoders=decoders, read_config=read_config) - if self._is_training: + # If cache is enabled, we will call `repeat()` later after `cache()`. + if self._is_training and not self._cache: dataset = dataset.repeat() return dataset @@ -185,42 +274,117 @@ class InputReader: raise ValueError('tfds_info is not available, because the dataset ' 'is not loaded from tfds.') - def read( + def _read_decode_and_parse_dataset( self, - input_context: Optional[tf.distribute.InputContext] = None - ) -> tf.data.Dataset: - """Generates a tf.data.Dataset object.""" - if self._tfds_builder: + matched_files: List[str], + dataset_fn, + batch_size: int, + input_context: Optional[tf.distribute.InputContext] = None, + tfds_builder: bool = False) -> tf.data.Dataset: + """Returns a tf.data.Dataset object after reading, decoding, and parsing.""" + if tfds_builder: dataset = self._read_tfds(input_context) - elif self._num_files > 1: - dataset = self._read_sharded_files(input_context) + elif len(matched_files) > 1: + if input_context and (len(matched_files) < + input_context.num_input_pipelines): + logging.warn( + 'The number of files %d is less than the number of input pipelines ' + '%d. We will send all input files to every worker. ' + 'Please consider sharding your data into more files.', + len(matched_files), input_context.num_input_pipelines) + dataset = self._read_files_then_shard(matched_files, + dataset_fn, + input_context) + else: + dataset = self._shard_files_then_read(matched_files, + dataset_fn, + input_context) + elif len(matched_files) == 1: + dataset = self._read_files_then_shard(matched_files, + dataset_fn, + input_context) else: - assert self._num_files == 1 - dataset = self._read_single_file(input_context) + raise ValueError('It is unexpected that `tfds_builder` is None and ' + 'there is also no `matched_files`.') - if self._cache: - dataset = dataset.cache() + # If cache is enabled, we will call `shuffle()` later after `cache()`. + if self._is_training and not self._cache: + dataset = dataset.shuffle(self._shuffle_buffer_size, seed=self._seed) - if self._is_training: - dataset = dataset.shuffle(self._shuffle_buffer_size) + dataset = _maybe_map_fn(dataset, self._decoder_fn) + if self._sample_fn is not None: + dataset = dataset.apply(self._sample_fn) + dataset = _maybe_map_fn(dataset, self._parser_fn) - if self._examples_consume > 0: - dataset = dataset.take(self._examples_consume) + if self._cache: + dataset = dataset.cache() + if self._is_training: + dataset = dataset.repeat() + dataset = dataset.shuffle(self._shuffle_buffer_size, seed=self._seed) - def maybe_map_fn(dataset, fn): - return dataset if fn is None else dataset.map( - fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) + if self._transform_and_batch_fn is not None: + dataset = self._transform_and_batch_fn(dataset, input_context) + else: + per_replica_batch_size = input_context.get_per_replica_batch_size( + batch_size) if input_context else batch_size + dataset = dataset.batch( + per_replica_batch_size, drop_remainder=self._drop_remainder + ) - dataset = maybe_map_fn(dataset, self._decoder_fn) - dataset = maybe_map_fn(dataset, self._parser_fn) + return dataset - if self._dataset_transform_fn is not None: - dataset = self._dataset_transform_fn(dataset) + def _maybe_apply_data_service( + self, + dataset: tf.data.Dataset, + input_context: Optional[tf.distribute.InputContext] = None + ) -> tf.data.Dataset: + """Potentially distributes a dataset.""" + if self._enable_tf_data_service and input_context: + if self._enable_round_robin_tf_data_service: + replicas_per_input_pipeline = input_context.num_replicas_in_sync // ( + input_context.num_input_pipelines) + base_consumer_index = input_context.input_pipeline_id * ( + replicas_per_input_pipeline) + num_consumers = input_context.num_input_pipelines * ( + replicas_per_input_pipeline) + range_dataset = tf.data.Dataset.range(replicas_per_input_pipeline) + dataset = range_dataset.map(lambda i: dataset.apply( # pylint: disable=g-long-lambda + tf.data.experimental.service.distribute( + processing_mode='parallel_epochs', + service=self._tf_data_service_address, + job_name=self._tf_data_service_job_name, + consumer_index=base_consumer_index + i, + num_consumers=num_consumers))) + # Use parallel interleave to read multiple batches from a tf.data + # service worker in parallel. + dataset = dataset.interleave( + lambda x: x, + cycle_length=replicas_per_input_pipeline, + num_parallel_calls=replicas_per_input_pipeline, + deterministic=True) + else: + dataset = dataset.apply( + tf.data.experimental.service.distribute( + processing_mode='parallel_epochs', + service=self._tf_data_service_address, + job_name=self._tf_data_service_job_name)) + return dataset - per_replica_batch_size = input_context.get_per_replica_batch_size( - self._global_batch_size) if input_context else self._global_batch_size + def read( + self, + input_context: Optional[tf.distribute.InputContext] = None + ) -> tf.data.Dataset: + """Generates a tf.data.Dataset object.""" + dataset = self._read_decode_and_parse_dataset(self._matched_files, + self._dataset_fn, + self._global_batch_size, + input_context, + self._tfds_builder) + dataset = _maybe_map_fn(dataset, self._postprocess_fn) + dataset = self._maybe_apply_data_service(dataset, input_context) - dataset = dataset.batch( - per_replica_batch_size, drop_remainder=self._drop_remainder) - dataset = maybe_map_fn(dataset, self._postprocess_fn) + if self._deterministic is not None: + options = tf.data.Options() + options.experimental_deterministic = self._deterministic + dataset = dataset.with_options(options) return dataset.prefetch(tf.data.experimental.AUTOTUNE) diff --git a/official/utils/registry.py b/official/core/registry.py similarity index 95% rename from official/utils/registry.py rename to official/core/registry.py index 4aff59813f11b1085860faac8c62ca8ce9e0a1f1..0ea96b0629941f6eeba3d150abea0e094b80bae6 100644 --- a/official/utils/registry.py +++ b/official/core/registry.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Registry utility.""" diff --git a/official/utils/registry_test.py b/official/core/registry_test.py similarity index 84% rename from official/utils/registry_test.py rename to official/core/registry_test.py index 6cb230c75891aaebb8306bb84a235e2d2ecd70e5..0d0639c6b10d5f9d587593d52dd6f2458c83bcd5 100644 --- a/official/utils/registry_test.py +++ b/official/core/registry_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,15 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for registry.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for registry.""" import tensorflow as tf -from official.utils import registry +from official.core import registry class RegistryTest(tf.test.TestCase): @@ -31,18 +26,20 @@ class RegistryTest(tf.test.TestCase): @registry.register(collection, 'functions/func_0') def func_test(): pass - self.assertEqual( - registry.lookup(collection, 'functions/func_0'), func_test) + + self.assertEqual(registry.lookup(collection, 'functions/func_0'), func_test) @registry.register(collection, 'classes/cls_0') class ClassRegistryKey: pass + self.assertEqual( registry.lookup(collection, 'classes/cls_0'), ClassRegistryKey) @registry.register(collection, ClassRegistryKey) class ClassRegistryValue: pass + self.assertEqual( registry.lookup(collection, ClassRegistryKey), ClassRegistryValue) @@ -52,12 +49,15 @@ class RegistryTest(tf.test.TestCase): @registry.register(collection, 'functions/func_0') def func_test0(): pass + @registry.register(collection, 'func_1') def func_test1(): pass + @registry.register(collection, func_test1) def func_test2(): pass + expected_collection = { 'functions': { 'func_0': func_test0, @@ -73,10 +73,13 @@ class RegistryTest(tf.test.TestCase): @registry.register(collection, 'functions/func_0') def func_test0(): # pylint: disable=unused-variable pass + with self.assertRaises(KeyError): + @registry.register(collection, 'functions/func_0/sub_func') def func_test1(): # pylint: disable=unused-variable pass + with self.assertRaises(LookupError): registry.lookup(collection, 'non-exist') diff --git a/official/core/task_factory.py b/official/core/task_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..b01925a29724b289541b3a2437020cc19f22897e --- /dev/null +++ b/official/core/task_factory.py @@ -0,0 +1,67 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A global factory to register and access all registered tasks.""" + +from official.core import registry + +_REGISTERED_TASK_CLS = {} + + +# TODO(b/158741360): Add type annotations once pytype checks across modules. +def register_task_cls(task_config_cls): + """Decorates a factory of Tasks for lookup by a subclass of TaskConfig. + + This decorator supports registration of tasks as follows: + + ``` + @dataclasses.dataclass + class MyTaskConfig(TaskConfig): + # Add fields here. + pass + + @register_task_cls(MyTaskConfig) + class MyTask(Task): + # Inherits def __init__(self, task_config). + pass + + my_task_config = MyTaskConfig() + my_task = get_task(my_task_config) # Returns MyTask(my_task_config). + ``` + + Besisdes a class itself, other callables that create a Task from a TaskConfig + can be decorated by the result of this function, as long as there is at most + one registration for each config class. + + Args: + task_config_cls: a subclass of TaskConfig (*not* an instance of TaskConfig). + Each task_config_cls can only be used for a single registration. + + Returns: + A callable for use as class decorator that registers the decorated class + for creation from an instance of task_config_cls. + """ + return registry.register(_REGISTERED_TASK_CLS, task_config_cls) + + +def get_task(task_config, **kwargs): + """Creates a Task (of suitable subclass type) from task_config.""" + return get_task_cls(task_config.__class__)(task_config, **kwargs) + + +# The user-visible get_task() is defined after classes have been registered. +# TODO(b/158741360): Add type annotations once pytype checks across modules. +def get_task_cls(task_config_cls): + task_cls = registry.lookup(_REGISTERED_TASK_CLS, task_config_cls) + return task_cls diff --git a/official/core/train_lib.py b/official/core/train_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..ba66c2a75627213f6c50bcdab555aa36b668d21b --- /dev/null +++ b/official/core/train_lib.py @@ -0,0 +1,135 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFM common training driver library.""" +# pytype: disable=attribute-error +import os +from typing import Any, Mapping, Tuple, Optional + +# Import libraries +from absl import logging +import orbit +import tensorflow as tf + +from official.core import base_task +from official.core import base_trainer +from official.core import config_definitions +from official.core import train_utils + +maybe_create_best_ckpt_exporter = train_utils.maybe_create_best_ckpt_exporter + + +def run_experiment( + distribution_strategy: tf.distribute.Strategy, + task: base_task.Task, + mode: str, + params: config_definitions.ExperimentConfig, + model_dir: str, + run_post_eval: bool = False, + save_summary: bool = True, + trainer: Optional[base_trainer.Trainer] = None +) -> Tuple[tf.keras.Model, Mapping[str, Any]]: + """Runs train/eval configured by the experiment params. + + Args: + distribution_strategy: A distribution distribution_strategy. + task: A Task instance. + mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' + or 'continuous_eval'. + params: ExperimentConfig instance. + model_dir: A 'str', a path to store model checkpoints and summaries. + run_post_eval: Whether to run post eval once after training, metrics logs + are returned. + save_summary: Whether to save train and validation summary. + trainer: the base_trainer.Trainer instance. It should be created within the + strategy.scope(). + + Returns: + A 2-tuple of (model, eval_logs). + model: `tf.keras.Model` instance. + eval_logs: returns eval metrics logs when run_post_eval is set to True, + otherwise, returns {}. + """ + + with distribution_strategy.scope(): + if not trainer: + trainer = train_utils.create_trainer( + params, + task, + train='train' in mode, + evaluate=('eval' in mode) or run_post_eval, + checkpoint_exporter=maybe_create_best_ckpt_exporter( + params, model_dir)) + + if trainer.checkpoint: + checkpoint_manager = tf.train.CheckpointManager( + trainer.checkpoint, + directory=model_dir, + max_to_keep=params.trainer.max_to_keep, + step_counter=trainer.global_step, + checkpoint_interval=params.trainer.checkpoint_interval, + init_fn=trainer.initialize) + # Adds recovery handling. + trainer.add_recovery(params.trainer, checkpoint_manager=checkpoint_manager) + else: + checkpoint_manager = None + + controller = orbit.Controller( + strategy=distribution_strategy, + trainer=trainer if 'train' in mode else None, + evaluator=trainer, + global_step=trainer.global_step, + steps_per_loop=params.trainer.steps_per_loop, + checkpoint_manager=checkpoint_manager, + summary_dir=os.path.join(model_dir, 'train') if (save_summary) else None, + eval_summary_dir=os.path.join(model_dir, + params.trainer.validation_summary_subdir) if + (save_summary) else None, + summary_interval=params.trainer.summary_interval if + (save_summary) else None) + + logging.info('Starts to execute mode: %s', mode) + with distribution_strategy.scope(): + if mode == 'train': + controller.train(steps=params.trainer.train_steps) + elif mode == 'train_and_eval': + controller.train_and_evaluate( + train_steps=params.trainer.train_steps, + eval_steps=params.trainer.validation_steps, + eval_interval=params.trainer.validation_interval) + elif mode == 'eval': + controller.evaluate(steps=params.trainer.validation_steps) + elif mode == 'continuous_eval': + + def timeout_fn(): + if trainer.global_step.numpy() >= params.trainer.train_steps: + return True + return False + + controller.evaluate_continuously( + steps=params.trainer.validation_steps, + timeout=params.trainer.continuous_eval_timeout, + timeout_fn=timeout_fn) + else: + raise NotImplementedError('The mode is not implemented: %s' % mode) + + if hasattr(trainer.model, 'count_params'): + logging.info('Number of trainable params in model: %f Millions.', + trainer.model.count_params() / 10.**6) + if run_post_eval: + with distribution_strategy.scope(): + return trainer.model, trainer.evaluate( + tf.convert_to_tensor(params.trainer.validation_steps)) + else: + return trainer.model, {} diff --git a/official/core/train_lib_test.py b/official/core/train_lib_test.py new file mode 100644 index 0000000000000000000000000000000000000000..132134fbd49bf1316ed3ea35bfefbfc13d190a18 --- /dev/null +++ b/official/core/train_lib_test.py @@ -0,0 +1,137 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for train_ctl_lib.""" +import json +import os + +from absl import flags +from absl.testing import flagsaver +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.common import flags as tfm_flags +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils + +FLAGS = flags.FLAGS + +tfm_flags.define_flags() + + +class TrainTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(TrainTest, self).setUp() + self._test_config = { + 'trainer': { + 'checkpoint_interval': 10, + 'steps_per_loop': 10, + 'summary_interval': 10, + 'train_steps': 10, + 'validation_steps': 5, + 'validation_interval': 10, + 'continuous_eval_timeout': 1, + 'validation_summary_subdir': 'validation', + 'optimizer_config': { + 'optimizer': { + 'type': 'sgd', + }, + 'learning_rate': { + 'type': 'constant' + } + } + }, + } + + @combinations.generate( + combinations.combine( + distribution_strategy=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + flag_mode=['train', 'eval', 'train_and_eval'], + run_post_eval=[True, False])) + def test_end_to_end(self, distribution_strategy, flag_mode, run_post_eval): + model_dir = self.get_temp_dir() + flags_dict = dict( + experiment='mock', + mode=flag_mode, + model_dir=model_dir, + params_override=json.dumps(self._test_config)) + with flagsaver.flagsaver(**flags_dict): + params = train_utils.parse_configuration(flags.FLAGS) + train_utils.serialize_config(params, model_dir) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + _, logs = train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=flag_mode, + params=params, + model_dir=model_dir, + run_post_eval=run_post_eval) + + if 'eval' in flag_mode: + self.assertTrue( + tf.io.gfile.exists( + os.path.join(model_dir, + params.trainer.validation_summary_subdir))) + if run_post_eval: + self.assertNotEmpty(logs) + else: + self.assertEmpty(logs) + self.assertNotEmpty( + tf.io.gfile.glob(os.path.join(model_dir, 'params.yaml'))) + if flag_mode == 'eval': + return + self.assertNotEmpty( + tf.io.gfile.glob(os.path.join(model_dir, 'checkpoint'))) + # Tests continuous evaluation. + _, logs = train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode='continuous_eval', + params=params, + model_dir=model_dir, + run_post_eval=run_post_eval) + print(logs) + + def test_parse_configuration(self): + model_dir = self.get_temp_dir() + flags_dict = dict( + experiment='mock', + mode='train', + model_dir=model_dir, + params_override=json.dumps(self._test_config)) + with flagsaver.flagsaver(**flags_dict): + params = train_utils.parse_configuration(flags.FLAGS, lock_return=True) + with self.assertRaises(ValueError): + params.override({'task': {'init_checkpoint': 'Foo'}}) + + params = train_utils.parse_configuration(flags.FLAGS, lock_return=False) + params.override({'task': {'init_checkpoint': 'Bar'}}) + self.assertEqual(params.task.init_checkpoint, 'Bar') + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/core/train_utils.py b/official/core/train_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..32a3cd7be6d8ac87b30428aa5e5cd15ca1ab802a --- /dev/null +++ b/official/core/train_utils.py @@ -0,0 +1,369 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Training utils.""" +import copy +import json +import os +import pprint +from typing import Any, Callable, Dict, List, Optional + +from absl import logging +import dataclasses +import gin +import orbit +import tensorflow as tf + +from official.core import base_task +from official.core import base_trainer +from official.core import config_definitions +from official.core import exp_factory +from official.modeling import hyperparams + + +def get_leaf_nested_dict(d: Dict[str, Any], keys: List[str]) -> Dict[str, Any]: + """Get leaf from a dictionary with arbitrary depth with a list of keys. + + Args: + d: The dictionary to extract value from. + keys: The list of keys to extract values recursively. + + Returns: + The value of the leaf. + + Raises: + KeyError: If the value of keys extracted is a dictionary. + """ + leaf = d + for k in keys: + if not isinstance(leaf, dict) or k not in leaf: + raise KeyError( + 'Path not exist while traversing the dictionary: d with keys' + ': %s.' % keys) + leaf = leaf[k] + + if isinstance(leaf, dict): + raise KeyError('The value extracted with keys: %s is not a leaf of the ' + 'dictionary: %s.' % (keys, d)) + return leaf + + +def cast_leaf_nested_dict(d: Dict[str, Any], + cast_fn: Callable[[Any], Any]) -> Dict[str, Any]: + """Cast the leaves of a dictionary with arbitrary depth in place. + + Args: + d: The dictionary to extract value from. + cast_fn: The casting function. + + Returns: + A dictionray with the same structure as d. + """ + for key, value in d.items(): + if isinstance(value, dict): + d[key] = cast_leaf_nested_dict(value, cast_fn) + else: + d[key] = cast_fn(value) + return d + + +def maybe_create_best_ckpt_exporter(params: config_definitions.ExperimentConfig, + data_dir: str) -> Any: + """Maybe create a BestCheckpointExporter object, according to the config.""" + export_subdir = params.trainer.best_checkpoint_export_subdir + metric_name = params.trainer.best_checkpoint_eval_metric + metric_comp = params.trainer.best_checkpoint_metric_comp + if data_dir and export_subdir and metric_name: + best_ckpt_dir = os.path.join(data_dir, export_subdir) + best_ckpt_exporter = BestCheckpointExporter(best_ckpt_dir, metric_name, + metric_comp) + logging.info( + 'Created the best checkpoint exporter. ' + 'data_dir: %s, export_subdir: %s, metric_name: %s', data_dir, + export_subdir, metric_name) + else: + best_ckpt_exporter = None + + return best_ckpt_exporter + + +# TODO(b/180147589): Add tests for this module. +class BestCheckpointExporter: + """Keeps track of the best result, and saves its checkpoint. + + Orbit will support an API for checkpoint exporter. This class will be used + together with orbit once this functionality is ready. + """ + + def __init__(self, export_dir: str, metric_name: str, metric_comp: str): + """Initialization. + + Args: + export_dir: The directory that will contain exported checkpoints. + metric_name: Indicates which metric to look at, when determining which + result is better. If eval_logs being passed to maybe_export_checkpoint + is a nested dictionary, use `|` as a seperator for different layers. + metric_comp: Indicates how to compare results. Either `lower` or `higher`. + """ + self._export_dir = export_dir + self._metric_name = metric_name.split('|') + self._metric_comp = metric_comp + if self._metric_comp not in ('lower', 'higher'): + raise ValueError('best checkpoint metric comp must be one of ' + 'higher, lower. Got: {}'.format(self._metric_comp)) + tf.io.gfile.makedirs(os.path.dirname(self.best_ckpt_logs_path)) + self._best_ckpt_logs = self._maybe_load_best_eval_metric() + self._checkpoint_manager = None + + def _get_checkpoint_manager(self, checkpoint): + """Gets an existing checkpoint manager or creates a new one.""" + if self._checkpoint_manager is None or (self._checkpoint_manager.checkpoint + != checkpoint): + logging.info('Creates a new checkpoint manager.') + self._checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + directory=self._export_dir, + max_to_keep=1, + checkpoint_name='best_ckpt') + + return self._checkpoint_manager + + def maybe_export_checkpoint(self, checkpoint, eval_logs, global_step): + logging.info('[BestCheckpointExporter] received eval_logs: %s, at step: %d', + eval_logs, global_step) + if self._best_ckpt_logs is None or self._new_metric_is_better( + self._best_ckpt_logs, eval_logs): + self._best_ckpt_logs = eval_logs + self._export_best_eval_metric(checkpoint, self._best_ckpt_logs, + global_step) + + def _maybe_load_best_eval_metric(self): + if not tf.io.gfile.exists(self.best_ckpt_logs_path): + return None + with tf.io.gfile.GFile(self.best_ckpt_logs_path, 'r') as reader: + return json.loads(reader.read()) + + def _new_metric_is_better(self, old_logs, new_logs): + """Check if the metric in new_logs is better than the metric in old_logs.""" + old_value = float( + orbit.utils.get_value( + get_leaf_nested_dict(old_logs, self._metric_name))) + new_value = float( + orbit.utils.get_value( + get_leaf_nested_dict(new_logs, self._metric_name))) + + logging.info('[BestCheckpointExporter] comparing results. old: %f, new: %f', + old_value, new_value) + if self._metric_comp == 'higher': + if new_value > old_value: + logging.info('[BestCheckpointExporter] ' + 'the new number is better since it is higher.') + return True + else: # self._metric_comp == 'lower': + if new_value < old_value: + logging.info('[BestCheckpointExporter] ' + 'the new number is better since it is lower.') + return True + return False + + def _export_best_eval_metric(self, checkpoint, eval_logs, global_step): + """Export evaluation results of the best checkpoint into a json file.""" + eval_logs_ext = copy.copy(eval_logs) + eval_logs_ext['best_ckpt_global_step'] = global_step + eval_logs_ext = cast_leaf_nested_dict( + eval_logs_ext, lambda x: float(orbit.utils.get_value(x))) + # Saving json file is very fast. + with tf.io.gfile.GFile(self.best_ckpt_logs_path, 'w') as writer: + writer.write(json.dumps(eval_logs_ext, indent=4) + '\n') + + self._get_checkpoint_manager(checkpoint).save() + + @property + def best_ckpt_logs(self): + return self._best_ckpt_logs + + @property + def best_ckpt_logs_path(self): + return os.path.join(self._export_dir, 'info.json') + + @property + def best_ckpt_path(self): + """Returns the best ckpt path or None if there is no ckpt yet.""" + return tf.train.latest_checkpoint(self._export_dir) + + +@gin.configurable +def create_trainer(params: config_definitions.ExperimentConfig, + task: base_task.Task, + train: bool, + evaluate: bool, + checkpoint_exporter: Optional[BestCheckpointExporter] = None, + trainer_cls=base_trainer.Trainer) -> base_trainer.Trainer: + """Create trainer.""" + logging.info('Running default trainer.') + model = task.build_model() + optimizer = task.create_optimizer(params.trainer.optimizer_config, + params.runtime) + return trainer_cls( + params, + task, + model=model, + optimizer=optimizer, + train=train, + evaluate=evaluate, + checkpoint_exporter=checkpoint_exporter) + + +@dataclasses.dataclass +class ParseConfigOptions: + """Use this dataclass instead of FLAGS to customize parse_configuration().""" + experiment: str + config_file: List[str] + tpu: str = '' + tf_data_service: str = '' + params_override: str = '' + + def __contains__(self, name): + return name in dataclasses.asdict(self) + + +def parse_configuration(flags_obj, lock_return=True, print_return=True): + """Parses ExperimentConfig from flags.""" + + # 1. Get the default config from the registered experiment. + params = exp_factory.get_exp_config(flags_obj.experiment) + + # 2. Get the first level of override from `--config_file`. + # `--config_file` is typically used as a template that specifies the common + # override for a particular experiment. + for config_file in flags_obj.config_file or []: + params = hyperparams.override_params_dict( + params, config_file, is_strict=True) + + # 3. Override the TPU address and tf.data service address. + params.override({ + 'runtime': { + 'tpu': flags_obj.tpu, + }, + }) + if ('tf_data_service' in flags_obj and flags_obj.tf_data_service and + isinstance(params.task, config_definitions.TaskConfig)): + params.override({ + 'task': { + 'train_data': { + 'tf_data_service_address': flags_obj.tf_data_service, + }, + 'validation_data': { + 'tf_data_service_address': flags_obj.tf_data_service, + } + } + }) + + # 4. Get the second level of override from `--params_override`. + # `--params_override` is typically used as a further override over the + # template. For example, one may define a particular template for training + # ResNet50 on ImageNet in a config file and pass it via `--config_file`, + # then define different learning rates and pass it via `--params_override`. + if flags_obj.params_override: + params = hyperparams.override_params_dict( + params, flags_obj.params_override, is_strict=True) + + params.validate() + if lock_return: + params.lock() + + if print_return: + pp = pprint.PrettyPrinter() + logging.info('Final experiment parameters: %s', + pp.pformat(params.as_dict())) + + return params + + +def serialize_config(params: config_definitions.ExperimentConfig, + model_dir: str): + """Serializes and saves the experiment config.""" + params_save_path = os.path.join(model_dir, 'params.yaml') + logging.info('Saving experiment configuration to %s', params_save_path) + tf.io.gfile.makedirs(model_dir) + hyperparams.save_params_dict_to_yaml(params, params_save_path) + + +def save_gin_config(filename_surfix: str, model_dir: str): + """Serializes and saves the experiment config.""" + gin_save_path = os.path.join( + model_dir, 'operative_config.{}.gin'.format(filename_surfix)) + logging.info('Saving gin configurations to %s', gin_save_path) + tf.io.gfile.makedirs(model_dir) + with tf.io.gfile.GFile(gin_save_path, 'w') as f: + f.write(gin.operative_config_str()) + + +def read_global_step_from_checkpoint(ckpt_file_path): + """Read global step from checkpoint, or get global step from its filename.""" + global_step = tf.Variable(-1, dtype=tf.int64) + ckpt = tf.train.Checkpoint(global_step=global_step) + try: + ckpt.restore(ckpt_file_path).expect_partial() + global_step_maybe_restored = global_step.numpy() + except tf.errors.InvalidArgumentError: + global_step_maybe_restored = -1 + + if global_step_maybe_restored == -1: + raise ValueError('global_step not found in checkpoint {}. ' + 'If you want to run finetune eval jobs, you need to ' + 'make sure that your pretrain model writes ' + 'global_step in its checkpoints.'.format(ckpt_file_path)) + global_step_restored = global_step.numpy() + logging.info('get global_step %d from checkpoint %s', global_step_restored, + ckpt_file_path) + return global_step_restored + + +def write_json_summary(log_dir, global_step, eval_metrics): + """Dump evaluation metrics to json file.""" + serializable_dict = {} + for name, value in eval_metrics.items(): + if hasattr(value, 'numpy'): + serializable_dict[name] = str(value.numpy()) + else: + serializable_dict[name] = str(value) + output_json = os.path.join(log_dir, 'metrics-{}.json'.format(global_step)) + logging.info('Evaluation results at pretrain step %d: %s', global_step, + serializable_dict) + with tf.io.gfile.GFile(output_json, 'w') as writer: + writer.write(json.dumps(serializable_dict, indent=4) + '\n') + + +def write_summary(summary_writer, global_step, eval_metrics): + """Write evaluation metrics to TF summary.""" + numeric_dict = {} + for name, value in eval_metrics.items(): + numeric_dict[name] = float(orbit.utils.get_value(value)) + with summary_writer.as_default(): + for name, value in numeric_dict.items(): + tf.summary.scalar(name, value, step=global_step) + summary_writer.flush() + + +def remove_ckpts(model_dir): + """Remove model checkpoints, so we can restart.""" + ckpts = os.path.join(model_dir, 'ckpt-*') + logging.info('removing checkpoint files %s', ckpts) + for file_to_remove in tf.io.gfile.glob(ckpts): + tf.io.gfile.rmtree(file_to_remove) + + file_to_remove = os.path.join(model_dir, 'checkpoint') + if tf.io.gfile.exists(file_to_remove): + tf.io.gfile.remove(file_to_remove) diff --git a/official/core/train_utils_test.py b/official/core/train_utils_test.py new file mode 100644 index 0000000000000000000000000000000000000000..134950c2bb68f3bce9719504edf5e6ca5e907fbb --- /dev/null +++ b/official/core/train_utils_test.py @@ -0,0 +1,56 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.core.train_utils.""" + +import tensorflow as tf + +from official.core import train_utils + + +class TrainUtilsTest(tf.test.TestCase): + + def test_get_leaf_nested_dict(self): + d = {'a': {'i': {'x': 5}}} + self.assertEqual(train_utils.get_leaf_nested_dict(d, ['a', 'i', 'x']), 5) + + def test_get_leaf_nested_dict_not_leaf(self): + with self.assertRaisesRegex(KeyError, 'The value extracted with keys.*'): + d = {'a': {'i': {'x': 5}}} + train_utils.get_leaf_nested_dict(d, ['a', 'i']) + + def test_get_leaf_nested_dict_path_not_exist_missing_key(self): + with self.assertRaisesRegex(KeyError, 'Path not exist while traversing .*'): + d = {'a': {'i': {'x': 5}}} + train_utils.get_leaf_nested_dict(d, ['a', 'i', 'y']) + + def test_get_leaf_nested_dict_path_not_exist_out_of_range(self): + with self.assertRaisesRegex(KeyError, 'Path not exist while traversing .*'): + d = {'a': {'i': {'x': 5}}} + train_utils.get_leaf_nested_dict(d, ['a', 'i', 'z']) + + def test_get_leaf_nested_dict_path_not_exist_meets_leaf(self): + with self.assertRaisesRegex(KeyError, 'Path not exist while traversing .*'): + d = {'a': {'i': 5}} + train_utils.get_leaf_nested_dict(d, ['a', 'i', 'z']) + + def test_cast_leaf_nested_dict(self): + d = {'a': {'i': {'x': '123'}}, 'b': 456.5} + d = train_utils.cast_leaf_nested_dict(d, int) + self.assertEqual(d['a']['i']['x'], 123) + self.assertEqual(d['b'], 456) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/modeling/__init__.py b/official/modeling/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/modeling/__init__.py +++ b/official/modeling/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/modeling/activations/__init__.py b/official/modeling/activations/__init__.py index 2b558fef3cb276c61e58d93c219db6a899c107ef..086e1fb975f8517dcff3c020f5fd932f6e55edc7 100644 --- a/official/modeling/activations/__init__.py +++ b/official/modeling/activations/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Activations package definition.""" from official.modeling.activations.gelu import gelu +from official.modeling.activations.relu import relu6 +from official.modeling.activations.sigmoid import hard_sigmoid from official.modeling.activations.swish import hard_swish from official.modeling.activations.swish import identity from official.modeling.activations.swish import simple_swish diff --git a/official/modeling/activations/gelu.py b/official/modeling/activations/gelu.py index c045bffa95b29e069831b548701b76d1b8e76c0d..a73294aa5493747af66d9bbbc2cc26914600d7cf 100644 --- a/official/modeling/activations/gelu.py +++ b/official/modeling/activations/gelu.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,14 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Gaussian error linear unit.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import math +"""Gaussian error linear unit.""" import tensorflow as tf @@ -35,6 +29,4 @@ def gelu(x): Returns: `x` with the GELU activation applied. """ - cdf = 0.5 * (1.0 + tf.tanh( - (math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3))))) - return x * cdf + return tf.keras.activations.gelu(x, approximate=True) diff --git a/official/modeling/activations/gelu_test.py b/official/modeling/activations/gelu_test.py index dc3b95ca8be16c058c592247684e45d419b50cc5..cfe1950d9f112c3c33421c410ecdd4ceedd6f1d7 100644 --- a/official/modeling/activations/gelu_test.py +++ b/official/modeling/activations/gelu_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for the Gaussian error linear unit.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for the Gaussian error linear unit.""" import tensorflow as tf diff --git a/official/modeling/activations/relu.py b/official/modeling/activations/relu.py new file mode 100644 index 0000000000000000000000000000000000000000..b3941b2f3462fa6a3eea28e023a4450bcc070797 --- /dev/null +++ b/official/modeling/activations/relu.py @@ -0,0 +1,31 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Customized Relu activation.""" + +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package='Text') +def relu6(features): + """Computes the Relu6 activation function. + + Args: + features: A `Tensor` representing preactivation values. + + Returns: + The activation value. + """ + features = tf.convert_to_tensor(features) + return tf.nn.relu6(features) diff --git a/official/modeling/activations/relu_test.py b/official/modeling/activations/relu_test.py new file mode 100644 index 0000000000000000000000000000000000000000..215f189ea9a00ed93bf012d33429fd82b3dc7ca6 --- /dev/null +++ b/official/modeling/activations/relu_test.py @@ -0,0 +1,35 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the customized Relu activation.""" + +import tensorflow as tf + +from tensorflow.python.keras import \ + keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.modeling import activations + + +@keras_parameterized.run_all_keras_modes +class CustomizedReluTest(keras_parameterized.TestCase): + + def test_relu6(self): + features = [[.25, 0, -.25], [-1, -2, 3]] + customized_relu6_data = activations.relu6(features) + relu6_data = tf.nn.relu6(features) + self.assertAllClose(customized_relu6_data, relu6_data) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/modeling/activations/sigmoid.py b/official/modeling/activations/sigmoid.py new file mode 100644 index 0000000000000000000000000000000000000000..e815f7ee8c5c0803b7e07274e2665ca98eb468bd --- /dev/null +++ b/official/modeling/activations/sigmoid.py @@ -0,0 +1,31 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Customized Sigmoid activation.""" + +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package='Text') +def hard_sigmoid(features): + """Computes the hard sigmoid activation function. + + Args: + features: A `Tensor` representing preactivation values. + + Returns: + The activation value. + """ + features = tf.convert_to_tensor(features) + return tf.nn.relu6(features + tf.constant(3.)) * 0.16667 diff --git a/official/modeling/activations/sigmoid_test.py b/official/modeling/activations/sigmoid_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6aad90ef3645b08708dbfde155654070c40d72ce --- /dev/null +++ b/official/modeling/activations/sigmoid_test.py @@ -0,0 +1,40 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the customized Sigmoid activation.""" + +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import \ + keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.modeling import activations + + +@keras_parameterized.run_all_keras_modes +class CustomizedSigmoidTest(keras_parameterized.TestCase): + + def _hard_sigmoid_nn(self, x): + x = np.float32(x) + return tf.nn.relu6(x + 3.) * 0.16667 + + def test_hard_sigmoid(self): + features = [[.25, 0, -.25], [-1, -2, 3]] + customized_hard_sigmoid_data = activations.hard_sigmoid(features) + sigmoid_data = self._hard_sigmoid_nn(features) + self.assertAllClose(customized_hard_sigmoid_data, sigmoid_data) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/modeling/activations/swish.py b/official/modeling/activations/swish.py index 1d799613095efe1a16dade9673adddee05f2679d..7fcac2b2bffb175edd2256fbcdeb539ed4246e8c 100644 --- a/official/modeling/activations/swish.py +++ b/official/modeling/activations/swish.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Customized Swish activation.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Customized Swish activation.""" import tensorflow as tf diff --git a/official/modeling/activations/swish_test.py b/official/modeling/activations/swish_test.py index 22042e9a290a420805fc75bbfca6ded6e917d9eb..3cb9495d8d19a3b89e4a9b2db0679090ac1e3e9d 100644 --- a/official/modeling/activations/swish_test.py +++ b/official/modeling/activations/swish_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for the customized Swish activation.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for the customized Swish activation.""" import numpy as np import tensorflow as tf diff --git a/official/modeling/hyperparams/__init__.py b/official/modeling/hyperparams/__init__.py index 87c00e7f2a1934800cac21405aa924f2ddc1f241..bcbc0aedd3d6013c14c641d9e61a0a717f188ec5 100644 --- a/official/modeling/hyperparams/__init__.py +++ b/official/modeling/hyperparams/__init__.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Hyperparams package definition.""" # pylint: disable=g-multiple-import from official.modeling.hyperparams.base_config import * diff --git a/official/modeling/hyperparams/base_config.py b/official/modeling/hyperparams/base_config.py index b03f069c8bdae2317bd57ac9b2cc4c91ce9d169b..5e9b25ee13ae1e666b4571f6d79794d877ca1609 100644 --- a/official/modeling/hyperparams/base_config.py +++ b/official/modeling/hyperparams/base_config.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,17 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Base configurations to standardize experiments.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Base configurations to standardize experiments.""" import copy import functools from typing import Any, List, Mapping, Optional, Type +from absl import logging import dataclasses import tensorflow as tf @@ -35,11 +30,15 @@ from official.modeling.hyperparams import params_dict class Config(params_dict.ParamsDict): """The base configuration class that supports YAML/JSON based overrides. - * It recursively enforces a whitelist of basic types and container types, so + Because of YAML/JSON serialization limitations, some semantics of dataclass + are not supported: + * It recursively enforces a allowlist of basic types and container types, so it avoids surprises with copy and reuse caused by unanticipated types. - * It converts dict to Config even within sequences, + * Warning: it converts Dict to `Config` even within sequences, e.g. for config = Config({'key': [([{'a': 42}],)]), type(config.key[0][0][0]) is Config rather than dict. + If you define/annotate some field as Dict, the field will convert to a + `Config` instance and lose the dictionary type. """ # It's safe to add bytes and other immutable types here. @@ -142,10 +141,11 @@ class Config(params_dict.ParamsDict): return subconfig_type def __post_init__(self, default_params, restrictions, *args, **kwargs): - super().__init__(default_params=default_params, - restrictions=restrictions, - *args, - **kwargs) + super().__init__( + default_params=default_params, + restrictions=restrictions, + *args, + **kwargs) def _set(self, k, v): """Overrides same method in ParamsDict. @@ -160,13 +160,32 @@ class Config(params_dict.ParamsDict): RuntimeError """ subconfig_type = self._get_subconfig_type(k) - if isinstance(v, dict): + + def is_null(k): if k not in self.__dict__ or not self.__dict__[k]: + return True + return False + + if isinstance(v, dict): + if is_null(k): # If the key not exist or the value is None, a new Config-family object # sould be created for the key. self.__dict__[k] = subconfig_type(v) else: self.__dict__[k].override(v) + elif not is_null(k) and isinstance(v, self.SEQUENCE_TYPES) and all( + [not isinstance(e, self.IMMUTABLE_TYPES) for e in v]): + if len(self.__dict__[k]) == len(v): + for i in range(len(v)): + self.__dict__[k][i].override(v[i]) + elif not all([isinstance(e, self.IMMUTABLE_TYPES) for e in v]): + logging.warning( + "The list/tuple don't match the value dictionaries provided. Thus, " + 'the list/tuple is determined by the type annotation and ' + 'values provided. This is error-prone.') + self.__dict__[k] = self._import_config(v, subconfig_type) + else: + self.__dict__[k] = self._import_config(v, subconfig_type) else: self.__dict__[k] = self._import_config(v, subconfig_type) @@ -220,16 +239,19 @@ class Config(params_dict.ParamsDict): } def replace(self, **kwargs): - """Like `override`, but returns a copy with the current config unchanged.""" - params = self.__class__(self) - params.override(kwargs, is_strict=True) + """Overrides/returns a unlocked copy with the current config unchanged.""" + # pylint: disable=protected-access + params = copy.deepcopy(self) + params._locked = False + params._override(kwargs, is_strict=True) + # pylint: enable=protected-access return params @classmethod def from_yaml(cls, file_path: str): # Note: This only works if the Config has all default values. with tf.io.gfile.GFile(file_path, 'r') as f: - loaded = yaml.load(f) + loaded = yaml.load(f, Loader=yaml.FullLoader) config = cls() config.override(loaded) return config diff --git a/official/modeling/hyperparams/base_config_test.py b/official/modeling/hyperparams/base_config_test.py index 501f95899f526c8eab7cbfaaafb65433389ce0d8..3e64ec532ab46efc0d0986f982e351eaf3f7153e 100644 --- a/official/modeling/hyperparams/base_config_test.py +++ b/official/modeling/hyperparams/base_config_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== import pprint from typing import List, Tuple @@ -45,6 +43,17 @@ class DumpConfig3(DumpConfig2): g: Tuple[DumpConfig1, ...] = (DumpConfig1(),) +@dataclasses.dataclass +class DumpConfig4(DumpConfig2): + x: int = 3 + + +@dataclasses.dataclass +class DummyConfig5(base_config.Config): + y: Tuple[DumpConfig2, ...] = (DumpConfig2(), DumpConfig4()) + z: Tuple[str] = ('a',) + + class BaseConfigTest(parameterized.TestCase, tf.test.TestCase): def assertHasSameTypes(self, c, d, msg=''): @@ -106,6 +115,22 @@ class BaseConfigTest(parameterized.TestCase, tf.test.TestCase): self.assertEqual(config.g[0].a, 4) self.assertEqual(config.g[0].b, 'new text 3') + def test_replace(self): + config = DumpConfig2() + new_config = config.replace(e={'a': 2}) + self.assertEqual(new_config.e.a, 2) + self.assertIsInstance(new_config.e, DumpConfig1) + + config = DumpConfig2(e=DumpConfig2()) + new_config = config.replace(e={'c': 4}) + self.assertEqual(new_config.e.c, 4) + self.assertIsInstance(new_config.e, DumpConfig2) + + config = DumpConfig3() + new_config = config.replace(g=[{'a': 4, 'b': 'new text 3'}]) + self.assertIsInstance(new_config.g[0], DumpConfig1) + self.assertEqual(new_config.g[0].a, 4) + @parameterized.parameters( ('_locked', "The key '_locked' is internally reserved."), ('_restrictions', "The key '_restrictions' is internally reserved."), @@ -147,10 +172,8 @@ class BaseConfigTest(parameterized.TestCase, tf.test.TestCase): params.override({'c': {'c3': 30}}, is_strict=True) config = base_config.Config({'key': [{'a': 42}]}) - config.override({'key': [{'b': 43}]}) - self.assertEqual(config.key[0].b, 43) - with self.assertRaisesRegex(AttributeError, 'The key `a` does not exist'): - _ = config.key[0].a + with self.assertRaisesRegex(KeyError, "The key 'b' does not exist"): + config.override({'key': [{'b': 43}]}) @parameterized.parameters( (lambda x: x, 'Unknown type'), @@ -294,6 +317,44 @@ class BaseConfigTest(parameterized.TestCase, tf.test.TestCase): ]), "['s', 1, 1.0, True, None, {}, [], (), {8: 9, (2,): (3, [4], {6: 7})}]") + def test_with_restrictions(self): + restrictions = ['e.a[a-zA-Z][\w\.]*) # variable name: "var" or "x" \s*=\s* ((?P\'(.*?)\' # single quote @@ -44,6 +41,26 @@ _PARAM_RE = re.compile(r""" _CONST_VALUE_RE = re.compile(r'(\d.*|-\d.*|None)') +# Yaml loader with an implicit resolver to parse float decimal and exponential +# format. The regular experission parse the following cases: +# 1- Decimal number with an optional exponential term. +# 2- Integer number with an exponential term. +# 3- Decimal number with an optional exponential term. +# 4- Decimal number. + +LOADER = yaml.SafeLoader +LOADER.add_implicit_resolver( + 'tag:yaml.org,2002:float', + re.compile(r''' + ^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? + | + [-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) + | + \\.[0-9_]+(?:[eE][-+][0-9]+)? + | + [-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*)$''', re.X), + list('-+0123456789.')) + class ParamsDict(object): """A hyperparameter container class.""" @@ -72,7 +89,6 @@ class ParamsDict(object): if default_params is None: default_params = {} self.override(default_params, is_strict=False) - self.validate() def _set(self, k, v): if isinstance(v, dict): @@ -138,8 +154,8 @@ class ParamsDict(object): ValueError: if the ParamsDict instance has been locked. """ if k in ParamsDict.RESERVED_ATTR: - raise AttributeError('The key `{}` is reserved. No change is allowes. ' - .format(k)) + raise AttributeError( + 'The key `{}` is reserved. No change is allowes. '.format(k)) if k not in self.__dict__.keys(): raise AttributeError('The key `{}` does not exist. '.format(k)) if self._locked: @@ -150,13 +166,13 @@ class ParamsDict(object): """Override the ParamsDict with a set of given params. Args: - override_params: a dict or a ParamsDict specifying the parameters to - be overridden. + override_params: a dict or a ParamsDict specifying the parameters to be + overridden. is_strict: a boolean specifying whether override is strict or not. If - True, keys in `override_params` must be present in the ParamsDict. - If False, keys in `override_params` can be different from what is - currently defined in the ParamsDict. In this case, the ParamsDict will - be extended to include the new keys. + True, keys in `override_params` must be present in the ParamsDict. If + False, keys in `override_params` can be different from what is currently + defined in the ParamsDict. In this case, the ParamsDict will be extended + to include the new keys. """ if self._locked: raise ValueError('The ParamsDict has been locked. No change is allowed.') @@ -230,7 +246,7 @@ class ParamsDict(object): ['a.a1 == b.ccc.a1', 'a.a2 <= b.bb.bb2'] What it enforces are: - - a.a1 = 1 == b.ccc.a1 = 2 + - a.a1 = 1 == b.ccc.a1 = 1 - a.a2 = 2 <= b.bb.bb2 = 20 Raises: @@ -240,6 +256,7 @@ class ParamsDict(object): (2) any inconsistency violating the restriction is found. ValueError: if the restriction defined in the string is not supported. """ + def _get_kv(dotted_string, params_dict): """Get keys and values indicated by dotted_string.""" if _CONST_VALUE_RE.match(dotted_string) is not None: @@ -270,56 +287,64 @@ class ParamsDict(object): tokens = restriction.split('==') _, left_v, _, right_v = _get_kvs(tokens, params_dict) if left_v != right_v: - raise KeyError('Found inconsistncy between key `{}` and key `{}`.' - .format(tokens[0], tokens[1])) + raise KeyError( + 'Found inconsistncy between key `{}` and key `{}`.'.format( + tokens[0], tokens[1])) elif '!=' in restriction: tokens = restriction.split('!=') _, left_v, _, right_v = _get_kvs(tokens, params_dict) if left_v == right_v: - raise KeyError('Found inconsistncy between key `{}` and key `{}`.' - .format(tokens[0], tokens[1])) + raise KeyError( + 'Found inconsistncy between key `{}` and key `{}`.'.format( + tokens[0], tokens[1])) elif '<' in restriction: tokens = restriction.split('<') _, left_v, _, right_v = _get_kvs(tokens, params_dict) if left_v >= right_v: - raise KeyError('Found inconsistncy between key `{}` and key `{}`.' - .format(tokens[0], tokens[1])) + raise KeyError( + 'Found inconsistncy between key `{}` and key `{}`.'.format( + tokens[0], tokens[1])) elif '<=' in restriction: tokens = restriction.split('<=') _, left_v, _, right_v = _get_kvs(tokens, params_dict) if left_v > right_v: - raise KeyError('Found inconsistncy between key `{}` and key `{}`.' - .format(tokens[0], tokens[1])) + raise KeyError( + 'Found inconsistncy between key `{}` and key `{}`.'.format( + tokens[0], tokens[1])) elif '>' in restriction: tokens = restriction.split('>') _, left_v, _, right_v = _get_kvs(tokens, params_dict) if left_v <= right_v: - raise KeyError('Found inconsistncy between key `{}` and key `{}`.' - .format(tokens[0], tokens[1])) + raise KeyError( + 'Found inconsistncy between key `{}` and key `{}`.'.format( + tokens[0], tokens[1])) elif '>=' in restriction: tokens = restriction.split('>=') _, left_v, _, right_v = _get_kvs(tokens, params_dict) if left_v < right_v: - raise KeyError('Found inconsistncy between key `{}` and key `{}`.' - .format(tokens[0], tokens[1])) + raise KeyError( + 'Found inconsistncy between key `{}` and key `{}`.'.format( + tokens[0], tokens[1])) else: raise ValueError('Unsupported relation in restriction.') -def read_yaml_to_params_dict(file_path): +def read_yaml_to_params_dict(file_path: str): """Reads a YAML file to a ParamsDict.""" with tf.io.gfile.GFile(file_path, 'r') as f: - params_dict = yaml.load(f) + params_dict = yaml.load(f, Loader=LOADER) return ParamsDict(params_dict) def save_params_dict_to_yaml(params, file_path): """Saves the input ParamsDict to a YAML file.""" with tf.io.gfile.GFile(file_path, 'w') as f: + def _my_list_rep(dumper, data): # u'tag:yaml.org,2002:seq' is the YAML internal tag for sequence. return dumper.represent_sequence( u'tag:yaml.org,2002:seq', data, flow_style=True) + yaml.add_representer(list, _my_list_rep) yaml.dump(params.as_dict(), f, default_flow_style=False) @@ -408,8 +433,8 @@ def override_params_dict(params, dict_or_string_or_yaml_file, is_strict): Args: params: a ParamsDict object to be overridden. - dict_or_string_or_yaml_file: a Python dict, JSON/YAML/CSV string or - path to a YAML file specifying the parameters to be overridden. + dict_or_string_or_yaml_file: a Python dict, JSON/YAML/CSV string or path to + a YAML file specifying the parameters to be overridden. is_strict: a boolean specifying whether override is strict or not. Returns: @@ -428,12 +453,12 @@ def override_params_dict(params, dict_or_string_or_yaml_file, is_strict): nested_csv_str_to_json_str(dict_or_string_or_yaml_file)) except ValueError: pass - params_dict = yaml.load(dict_or_string_or_yaml_file) + params_dict = yaml.load(dict_or_string_or_yaml_file, Loader=LOADER) if isinstance(params_dict, dict): params.override(params_dict, is_strict) else: with tf.io.gfile.GFile(dict_or_string_or_yaml_file) as f: - params.override(yaml.load(f), is_strict) + params.override(yaml.load(f, Loader=yaml.FullLoader), is_strict) else: raise ValueError('Unknown input type to parse.') return params diff --git a/official/modeling/hyperparams/params_dict_test.py b/official/modeling/hyperparams/params_dict_test.py index 169ffa47ceff5717c2ae375f7e1114c5b05f3ea1..248a81652a496266fb9656d40f77e665e8606f10 100644 --- a/official/modeling/hyperparams/params_dict_test.py +++ b/official/modeling/hyperparams/params_dict_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Tests for params_dict.py.""" @@ -56,8 +55,7 @@ class ParamsDictTest(tf.test.TestCase): def test_setattr(self): params = params_dict.ParamsDict() - params.override( - {'a': 'aa', 'b': 2, 'c': None}, is_strict=False) + params.override({'a': 'aa', 'b': 2, 'c': None}, is_strict=False) params.c = 'ccc' self.assertEqual(params.a, 'aa') self.assertEqual(params.b, 2) @@ -65,17 +63,23 @@ class ParamsDictTest(tf.test.TestCase): def test_getattr(self): params = params_dict.ParamsDict() - params.override( - {'a': 'aa', 'b': 2, 'c': None}, is_strict=False) + params.override({'a': 'aa', 'b': 2, 'c': None}, is_strict=False) self.assertEqual(params.a, 'aa') self.assertEqual(params.b, 2) self.assertEqual(params.c, None) def test_delattr(self): params = params_dict.ParamsDict() - params.override( - {'a': 'aa', 'b': 2, 'c': None, 'd': {'d1': 1, 'd2': 10}}, - is_strict=False) + params.override({ + 'a': 'aa', + 'b': 2, + 'c': None, + 'd': { + 'd1': 1, + 'd2': 10 + } + }, + is_strict=False) del params.c self.assertEqual(params.a, 'aa') self.assertEqual(params.b, 2) @@ -87,22 +91,26 @@ class ParamsDictTest(tf.test.TestCase): def test_contains(self): params = params_dict.ParamsDict() - params.override( - {'a': 'aa'}, is_strict=False) + params.override({'a': 'aa'}, is_strict=False) self.assertIn('a', params) self.assertNotIn('b', params) def test_get(self): params = params_dict.ParamsDict() - params.override( - {'a': 'aa'}, is_strict=False) + params.override({'a': 'aa'}, is_strict=False) self.assertEqual(params.get('a'), 'aa') self.assertEqual(params.get('b', 2), 2) self.assertEqual(params.get('b'), None) def test_override_is_strict_true(self): - params = params_dict.ParamsDict( - {'a': 'aa', 'b': 2, 'c': {'c1': 'cc', 'c2': 20}}) + params = params_dict.ParamsDict({ + 'a': 'aa', + 'b': 2, + 'c': { + 'c1': 'cc', + 'c2': 20 + } + }) params.override({'a': 2, 'c': {'c1': 'ccc'}}, is_strict=True) self.assertEqual(params.a, 2) self.assertEqual(params.c.c1, 'ccc') @@ -112,8 +120,14 @@ class ParamsDictTest(tf.test.TestCase): params.override({'c': {'c3': 30}}, is_strict=True) def test_override_is_strict_false(self): - params = params_dict.ParamsDict( - {'a': 'aa', 'b': 2, 'c': {'c1': 10, 'c2': 20}}) + params = params_dict.ParamsDict({ + 'a': 'aa', + 'b': 2, + 'c': { + 'c1': 10, + 'c2': 20 + } + }) params.override({'a': 2, 'c': {'c3': 3000}}, is_strict=False) self.assertEqual(params.a, 2) self.assertEqual(params.c.c3, 3000) @@ -123,8 +137,14 @@ class ParamsDictTest(tf.test.TestCase): self.assertEqual(params.c.c4, 4444) def test_as_dict(self): - params = params_dict.ParamsDict( - {'a': 'aa', 'b': 2, 'c': {'c1': 10, 'c2': 20}}) + params = params_dict.ParamsDict({ + 'a': 'aa', + 'b': 2, + 'c': { + 'c1': 10, + 'c2': 20 + } + }) params_d = params.as_dict() self.assertEqual(params_d['a'], 'aa') self.assertEqual(params_d['b'], 2) @@ -134,21 +154,27 @@ class ParamsDictTest(tf.test.TestCase): def test_validate(self): # Raise error due to the unknown parameter. with self.assertRaises(KeyError): - params = params_dict.ParamsDict( - {'a': 1, 'b': {'a': 11}}, ['a == c']) + params = params_dict.ParamsDict({'a': 1, 'b': {'a': 11}}, ['a == c']) + params.validate() # OK to check equality of two nested dicts. - params = params_dict.ParamsDict( - {'a': 1, 'b': {'a': 10}, 'c': {'a': 10}}, ['b == c']) + params = params_dict.ParamsDict({ + 'a': 1, + 'b': { + 'a': 10 + }, + 'c': { + 'a': 10 + } + }, ['b == c']) # Raise error due to inconsistency with self.assertRaises(KeyError): - params = params_dict.ParamsDict( - {'a': 1, 'c': {'a': 10}}, ['a == c.a']) + params = params_dict.ParamsDict({'a': 1, 'c': {'a': 10}}, ['a == c.a']) + params.validate() # Valid rule. - params = params_dict.ParamsDict( - {'a': 1, 'c': {'a': 1}}, ['a == c.a']) + params = params_dict.ParamsDict({'a': 1, 'c': {'a': 1}}, ['a == c.a']) # Overridding violates the existing rule, raise error upon validate. params.override({'a': 11}) @@ -156,12 +182,21 @@ class ParamsDictTest(tf.test.TestCase): params.validate() # Valid restrictions with constant. - params = params_dict.ParamsDict( - {'a': None, 'c': {'a': 1}}, ['a == None', 'c.a == 1']) + params = params_dict.ParamsDict({ + 'a': None, + 'c': { + 'a': 1 + } + }, ['a == None', 'c.a == 1']) params.validate() with self.assertRaises(KeyError): - params = params_dict.ParamsDict( - {'a': 4, 'c': {'a': 1}}, ['a == None', 'c.a == 1']) + params = params_dict.ParamsDict({ + 'a': 4, + 'c': { + 'a': 1 + } + }, ['a == None', 'c.a == 1']) + params.validate() class ParamsDictIOTest(tf.test.TestCase): @@ -173,8 +208,14 @@ class ParamsDictIOTest(tf.test.TestCase): return temp_file def test_save_params_dict_to_yaml(self): - params = params_dict.ParamsDict( - {'a': 'aa', 'b': 2, 'c': {'c1': 10, 'c2': 20}}) + params = params_dict.ParamsDict({ + 'a': 'aa', + 'b': 2, + 'c': { + 'c1': 10, + 'c2': 20 + } + }) output_yaml_file = os.path.join(self.get_temp_dir(), 'params.yaml') params_dict.save_params_dict_to_yaml(params, output_yaml_file) @@ -203,7 +244,12 @@ class ParamsDictIOTest(tf.test.TestCase): def test_override_params_dict_using_dict(self): params = params_dict.ParamsDict({ - 'a': 1, 'b': 2.5, 'c': [3, 4], 'd': 'hello', 'e': False}) + 'a': 1, + 'b': 2.5, + 'c': [3, 4], + 'd': 'hello', + 'e': False + }) override_dict = {'b': 5.2, 'c': [30, 40]} params = params_dict.override_params_dict( params, override_dict, is_strict=True) @@ -215,7 +261,12 @@ class ParamsDictIOTest(tf.test.TestCase): def test_override_params_dict_using_yaml_string(self): params = params_dict.ParamsDict({ - 'a': 1, 'b': 2.5, 'c': [3, 4], 'd': 'hello', 'e': False}) + 'a': 1, + 'b': 2.5, + 'c': [3, 4], + 'd': 'hello', + 'e': False + }) override_yaml_string = "'b': 5.2\n'c': [30, 40]" params = params_dict.override_params_dict( params, override_yaml_string, is_strict=True) @@ -227,8 +278,18 @@ class ParamsDictIOTest(tf.test.TestCase): def test_override_params_dict_using_json_string(self): params = params_dict.ParamsDict({ - 'a': 1, 'b': {'b1': 2, 'b2': [2, 3],}, - 'd': {'d1': {'d2': 'hello'}}, 'e': False}) + 'a': 1, + 'b': { + 'b1': 2, + 'b2': [2, 3], + }, + 'd': { + 'd1': { + 'd2': 'hello' + } + }, + 'e': False + }) override_json_string = "{ b: { b2: [3, 4] }, d: { d1: { d2: 'hi' } } }" params = params_dict.override_params_dict( params, override_json_string, is_strict=True) @@ -240,8 +301,18 @@ class ParamsDictIOTest(tf.test.TestCase): def test_override_params_dict_using_csv_string(self): params = params_dict.ParamsDict({ - 'a': 1, 'b': {'b1': 2, 'b2': [2, 3],}, - 'd': {'d1': {'d2': 'hello'}}, 'e': False}) + 'a': 1, + 'b': { + 'b1': 2, + 'b2': [2, 3], + }, + 'd': { + 'd1': { + 'd2': 'hello' + } + }, + 'e': False + }) override_csv_string = "b.b2=[3,4], d.d1.d2='hi, world', e=gs://test" params = params_dict.override_params_dict( params, override_csv_string, is_strict=True) @@ -250,10 +321,23 @@ class ParamsDictIOTest(tf.test.TestCase): self.assertEqual([3, 4], params.b.b2) self.assertEqual('hi, world', params.d.d1.d2) self.assertEqual('gs://test', params.e) + # Test different float formats + override_csv_string = 'b.b2=-1.e-3, d.d1.d2=+0.001, e=1e+3, a=-1.5E-3' + params = params_dict.override_params_dict( + params, override_csv_string, is_strict=True) + self.assertEqual(-1e-3, params.b.b2) + self.assertEqual(0.001, params.d.d1.d2) + self.assertEqual(1e3, params.e) + self.assertEqual(-1.5e-3, params.a) def test_override_params_dict_using_yaml_file(self): params = params_dict.ParamsDict({ - 'a': 1, 'b': 2.5, 'c': [3, 4], 'd': 'hello', 'e': False}) + 'a': 1, + 'b': 2.5, + 'c': [3, 4], + 'd': 'hello', + 'e': False + }) override_yaml_file = self.write_temp_file( 'params.yaml', r""" b: 5.2 @@ -321,8 +405,7 @@ class IOTest(tf.test.TestCase): def test_csv_str_load_unsupported_datatypes(self): csv_str = 'a=[[1,2,3],[4,5,6]]' - self.assertRaises(ValueError, - params_dict.nested_csv_str_to_json_str, + self.assertRaises(ValueError, params_dict.nested_csv_str_to_json_str, csv_str) def test_csv_str_to_json_str_spacing(self): diff --git a/official/modeling/multitask/__init__.py b/official/modeling/multitask/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/modeling/multitask/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/modeling/multitask/base_model.py b/official/modeling/multitask/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..976b0d8e31bcb3f980e79f13d64c108c8fb72e93 --- /dev/null +++ b/official/modeling/multitask/base_model.py @@ -0,0 +1,60 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Abstraction of multi-task model.""" +from typing import Text, Dict + +import tensorflow as tf + + +class MultiTaskBaseModel(tf.Module): + """Base class that holds multi-task model computation.""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._sub_tasks = self._instantiate_sub_tasks() + + def _instantiate_sub_tasks(self) -> Dict[Text, tf.keras.Model]: + """Abstract function that sets up the computation for each sub-task. + + Returns: + A map from task name (as string) to a tf.keras.Model object that + represents the sub-task in the multi-task pool. + """ + raise NotImplementedError( + "_instantiate_sub_task_models() is not implemented.") + + @property + def sub_tasks(self): + """Fetch a map of task name (string) to task model (tf.keras.Model).""" + return self._sub_tasks + + def initialize(self): + """Optional function that loads a pre-train checkpoint.""" + return diff --git a/official/modeling/multitask/base_trainer.py b/official/modeling/multitask/base_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..8b3674d8776d03382ee9e14784cc5ad7775c1cd8 --- /dev/null +++ b/official/modeling/multitask/base_trainer.py @@ -0,0 +1,176 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Multitask base trainer implementation. + +The trainer derives from the Orbit `StandardTrainer` class. +""" +from typing import Union +import gin +import orbit +import tensorflow as tf + +from official.modeling.multitask import base_model +from official.modeling.multitask import multitask + + +@gin.configurable +class MultiTaskBaseTrainer(orbit.StandardTrainer): + """Multitask base trainer.""" + + def __init__(self, + multi_task: multitask.MultiTask, + multi_task_model: Union[tf.keras.Model, + base_model.MultiTaskBaseModel], + optimizer: tf.optimizers.Optimizer, + trainer_options=None): + self._strategy = tf.distribute.get_strategy() + self._multi_task = multi_task + self._multi_task_model = multi_task_model + self._optimizer = optimizer + + self._training_losses = None + self._training_metrics = None + self._global_step = orbit.utils.create_global_step() + + if hasattr(self.multi_task_model, "checkpoint_items"): + checkpoint_items = self.multi_task_model.checkpoint_items + else: + checkpoint_items = {} + + self._checkpoint = tf.train.Checkpoint( + model=self.multi_task_model, + optimizer=self.optimizer, + global_step=self.global_step, + **checkpoint_items) + + train_datasets = {} + for name, task in self.multi_task.tasks.items(): + train_datasets[name] = orbit.utils.make_distributed_dataset( + self.strategy, task.build_inputs, task.task_config.train_data) + + super().__init__( + train_dataset=train_datasets, + options=trainer_options or orbit.StandardTrainerOptions()) + + def train_loop_begin(self): + """Clean up states that hold losses and metrics.""" + for _, train_loss_metric in self.training_losses.items(): + train_loss_metric.reset_states() + + for _, metrics in self.training_metrics.items(): + for metric in metrics: + metric.reset_states() + + def train_loop_end(self): + """Record loss and metric values per task.""" + result = {} + for task_name, loss in self.training_losses.items(): + result[task_name] = {loss.name: loss.result()} + for task_name, task_metrics in self.training_metrics.items(): + result[task_name].update( + {metric.name: metric.result() for metric in task_metrics}) + # Note that, the learning rate schedule is managed by the keras optimizer + # internally, which respects the number of backward pass as `iterations`. + # The learning rate schedule does not follow the trainer logical global + # step of multiple tasks. + if callable(self.optimizer.learning_rate): + result["learning_rate"] = self.optimizer.learning_rate( + self.optimizer.iterations) + else: + result["learning_rate"] = self.optimizer.learning_rate + return result + + @property + def checkpoint(self): + """Accesses the training checkpoint.""" + return self._checkpoint + + @property + def training_losses(self): + """Access training loss metric objects for all tasks.""" + if self._training_losses is None: + # Builds the per-task metrics and losses. + # This the total summed training loss of tasks in the joint training. + self._training_losses = dict( + total_loss=tf.keras.metrics.Mean("training_loss", dtype=tf.float32)) + for name in self.multi_task.tasks: + self._training_losses[name] = tf.keras.metrics.Mean( + "training_loss", dtype=tf.float32) + return self._training_losses + + @property + def training_metrics(self): + """Access training metric metric objects for all tasks.""" + if self._training_metrics is None: + # Builds the per-task metrics and losses. + self._training_metrics = {} + for name, task in self.multi_task.tasks.items(): + self._training_metrics[name] = task.build_metrics(training=True) + return self._training_metrics + + @property + def strategy(self): + return self._strategy + + @property + def multi_task(self): + return self._multi_task + + @property + def multi_task_model(self): + return self._multi_task_model + + @property + def optimizer(self): + return self._optimizer + + @property + def global_step(self): + return self._global_step + + def train_step(self, iterator_map): + """The default train step calling the multi-task train step. + + Args: + iterator_map: a dictionary of task names and per-task dataset iterators. + """ + + def step_fn(inputs): + losses = self.multi_task.joint_train_step( + inputs, + multi_task_model=self.multi_task_model, + optimizer=self.optimizer, + task_metrics=self.training_metrics) + for key, loss in losses.items(): + self.training_losses[key].update_state(loss) + + self.strategy.run( + step_fn, args=(tf.nest.map_structure(next, iterator_map),)) + self.global_step.assign_add(1) diff --git a/official/modeling/multitask/base_trainer_test.py b/official/modeling/multitask/base_trainer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2427ff85f2af4c79fb3f7f3cc40c9fc82c0a7e61 --- /dev/null +++ b/official/modeling/multitask/base_trainer_test.py @@ -0,0 +1,90 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for multitask.base_trainer.""" +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.modeling.multitask import base_trainer +from official.modeling.multitask import configs +from official.modeling.multitask import multitask +from official.modeling.multitask import test_utils + + +def all_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + mode="eager", + ) + + +class BaseTrainerTest(tf.test.TestCase, parameterized.TestCase): + + @combinations.generate(all_strategy_combinations()) + def test_multitask_joint_trainer(self, distribution): + with distribution.scope(): + tasks = [ + test_utils.MockFooTask(params=test_utils.FooConfig(), name="foo"), + test_utils.MockBarTask(params=test_utils.BarConfig(), name="bar") + ] + task_weights = {"foo": 1.0, "bar": 1.0} + test_multitask = multitask.MultiTask( + tasks=tasks, task_weights=task_weights) + test_optimizer = tf.keras.optimizers.SGD(0.1) + model = test_utils.MockMultiTaskModel() + test_trainer = base_trainer.MultiTaskBaseTrainer( + multi_task=test_multitask, + multi_task_model=model, + optimizer=test_optimizer) + results = test_trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertContainsSubset(["training_loss", "bar_acc"], + results["bar"].keys()) + self.assertContainsSubset(["training_loss", "foo_acc"], + results["foo"].keys()) + + def test_trainer_with_configs(self): + config = configs.MultiTaskConfig( + task_routines=(configs.TaskRoutine( + task_name="foo", + task_config=test_utils.FooConfig(), + task_weight=0.5), + configs.TaskRoutine( + task_name="bar", + task_config=test_utils.BarConfig(), + task_weight=0.5))) + test_multitask = multitask.MultiTask.from_config(config) + test_optimizer = tf.keras.optimizers.SGD(0.1) + model = test_utils.MockMultiTaskModel() + test_trainer = base_trainer.MultiTaskBaseTrainer( + multi_task=test_multitask, + multi_task_model=model, + optimizer=test_optimizer) + results = test_trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertContainsSubset(["training_loss", "bar_acc"], + results["bar"].keys()) + self.assertContainsSubset(["training_loss", "foo_acc"], + results["foo"].keys()) + self.assertEqual(test_multitask.task_weight("foo"), 0.5) + self.assertEqual(test_trainer.global_step.numpy(), 5) + self.assertIn("learning_rate", results) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/modeling/multitask/configs.py b/official/modeling/multitask/configs.py new file mode 100644 index 0000000000000000000000000000000000000000..563dbffe15f418fb865956e6403af2425ca15547 --- /dev/null +++ b/official/modeling/multitask/configs.py @@ -0,0 +1,79 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Configuration definitions for multi-task training.""" +from typing import Optional, Tuple + +import dataclasses + +from official.core import config_definitions as cfg +from official.modeling import hyperparams + + +@dataclasses.dataclass +class TaskRoutine(hyperparams.Config): + task_name: str = "" + task_config: cfg.TaskConfig = None + eval_steps: Optional[int] = None + task_weight: Optional[float] = 1.0 + + +@dataclasses.dataclass +class MultiTaskConfig(hyperparams.Config): + init_checkpoint: str = "" + model: hyperparams.Config = None + task_routines: Tuple[TaskRoutine, ...] = () + + +@dataclasses.dataclass +class ProportionalSampleConfig(hyperparams.Config): + alpha: float = 1.0 + + +@dataclasses.dataclass +class AnnealingSampleConfig(hyperparams.Config): + steps_per_epoch: int = 5 + total_steps: int = 20 + + +@dataclasses.dataclass +class TaskSamplingConfig(hyperparams.OneOfConfig): + type: str = "" + uniform: hyperparams.Config = hyperparams.Config() + proportional: ProportionalSampleConfig = ProportionalSampleConfig() + annealing: AnnealingSampleConfig = AnnealingSampleConfig() + + +@dataclasses.dataclass +class MultiTaskTrainerConfig(cfg.TrainerConfig): + trainer_type: str = "interleaving" + task_sampler: TaskSamplingConfig = TaskSamplingConfig(type="proportional") + + +@dataclasses.dataclass +class MultiTaskExperimentConfig(hyperparams.Config): + """An experiment config for multi-task training and multi-task evaluation.""" + task: MultiTaskConfig = MultiTaskConfig() + trainer: MultiTaskTrainerConfig = MultiTaskTrainerConfig() + runtime: cfg.RuntimeConfig = cfg.RuntimeConfig() + + +@dataclasses.dataclass +class MultiEvalExperimentConfig(cfg.ExperimentConfig): + """An experiment config for single-task training and multi-task evaluation. + + Attributes: + eval_tasks: individual evaluation tasks. + """ + eval_tasks: MultiTaskConfig = MultiTaskConfig() diff --git a/official/modeling/multitask/evaluator.py b/official/modeling/multitask/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..bc89881df990d369feee234ab7ec67589dd4eaca --- /dev/null +++ b/official/modeling/multitask/evaluator.py @@ -0,0 +1,172 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Multitask Evaluator implementation. + +The evaluator implements the Orbit `AbstractEvaluator` interface. +""" +from typing import Optional, Union +import gin +import orbit +import tensorflow as tf + +from official.core import train_utils +from official.modeling.multitask import base_model +from official.modeling.multitask import multitask + + +@gin.configurable +class MultiTaskEvaluator(orbit.AbstractEvaluator): + """Implements the common trainer shared for TensorFlow models.""" + + def __init__( + self, + task: multitask.MultiTask, + model: Union[tf.keras.Model, base_model.MultiTaskBaseModel], + global_step: Optional[tf.Variable] = None, + checkpoint_exporter: Optional[train_utils.BestCheckpointExporter] = None): + """Initialize common trainer for TensorFlow models. + + Args: + task: A multitask.MultiTask instance. + model: tf.keras.Model instance. + global_step: the global step variable. + checkpoint_exporter: an object that has the `maybe_export_checkpoint` + interface. + """ + # Gets the current distribution strategy. If not inside any strategy scope, + # it gets a single-replica no-op strategy. + self._strategy = tf.distribute.get_strategy() + self._task = task + self._model = model + self._global_step = global_step or orbit.utils.create_global_step() + self._checkpoint_exporter = checkpoint_exporter + self._checkpoint = tf.train.Checkpoint( + global_step=self.global_step, + model=self.model) + + self._validation_losses = None + self._validation_metrics = None + + # Builds per-task datasets. + self.eval_datasets = {} + for name, task in self.task.tasks.items(): + self.eval_datasets[name] = orbit.utils.make_distributed_dataset( + self.strategy, task.build_inputs, task.task_config.validation_data) + + # Builds per-task validation loops. + def get_function(task_name, task): + + task_metrics = self.validation_metrics[task_name] + task_loss = self.validation_losses[task_name] + if isinstance(self.model, base_model.MultiTaskBaseModel): + model = self.model.sub_tasks[task_name] + else: + model = self.model + + def step_fn(inputs): + logs = task.validation_step(inputs, model=model, metrics=task_metrics) + task_loss.update_state(logs[task.loss]) + return logs + + @tf.function + def eval_step_fn(iterator): + distributed_outputs = self.strategy.run(step_fn, args=(next(iterator),)) + return tf.nest.map_structure(self.strategy.experimental_local_results, + distributed_outputs) + + return orbit.utils.create_loop_fn(eval_step_fn) + + self.task_fns = { + name: get_function(name, task) + for name, task in self.task.tasks.items() + } + + @property + def strategy(self): + return self._strategy + + @property + def task(self): + return self._task + + @property + def model(self): + return self._model + + @property + def global_step(self): + return self._global_step + + @property + def validation_losses(self): + """Accesses the validation loss metric object.""" + if self._validation_losses is None: + # Builds the per-task metrics and losses. + self._validation_losses = {} + for name in self.task.tasks: + self._validation_losses[name] = tf.keras.metrics.Mean( + "validation_loss", dtype=tf.float32) + return self._validation_losses + + @property + def validation_metrics(self): + """Accesses all validation metric metric objects.""" + if self._validation_metrics is None: + # Builds the per-task metrics and losses. + self._validation_metrics = {} + for name, task in self.task.tasks.items(): + self._validation_metrics[name] = task.build_metrics(training=False) + return self._validation_metrics + + @property + def checkpoint(self): + """Accesses the training checkpoint.""" + return self._checkpoint + + def evaluate(self, num_steps: tf.Tensor): + """Performs evaluation for each `EvalTask`.""" + for metric in self.validation_losses.values(): + metric.reset_states() + for metrics in self.validation_metrics.values(): + for metric in metrics: + metric.reset_states() + results = {} + eval_iters = tf.nest.map_structure(iter, self.eval_datasets) + + for name, task_eval_loop in self.task_fns.items(): + outputs = None + eval_iter = eval_iters[name] + task = self.task.tasks[name] + task_eval_steps = self.task.task_eval_steps(name) or num_steps + outputs = task_eval_loop( + eval_iter, + task_eval_steps, + state=outputs, + reduce_fn=task.aggregate_logs) + task_metrics = self.validation_metrics[name] + task_loss = self.validation_losses[name] + logs = {} + for metric in task_metrics + [task_loss]: + logs[metric.name] = metric.result() + if outputs: + metrics = task.reduce_aggregated_logs( + outputs, global_step=self.global_step) + logs.update(metrics) + results[name] = logs + + if self._checkpoint_exporter: + self._checkpoint_exporter.maybe_export_checkpoint( + self.checkpoint, results, self.global_step.numpy()) + return results diff --git a/official/modeling/multitask/evaluator_test.py b/official/modeling/multitask/evaluator_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6bee73eacbe625e53fdf69388072e5dbf161499a --- /dev/null +++ b/official/modeling/multitask/evaluator_test.py @@ -0,0 +1,138 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for multitask.evaluator.""" +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.core import base_task +from official.core import config_definitions as cfg +from official.modeling.multitask import evaluator +from official.modeling.multitask import multitask + + +def all_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + mode="eager", + ) + + +class MockModel(tf.keras.Model): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dense = tf.keras.layers.Dense(1) + + def call(self, inputs): + print(inputs, type(inputs)) + if "y" in inputs: + self.add_loss(tf.zeros((1,), dtype=tf.float32)) + else: + self.add_loss(tf.ones((1,), dtype=tf.float32)) + return self.dense(inputs["x"]) + + +class MockTask(base_task.Task): + """Mock task object for testing.""" + + def build_metrics(self, training: bool = True): + del training + return [tf.keras.metrics.Accuracy(name="acc")] + + def build_inputs(self, params): + + def generate_data(_): + x = tf.zeros(shape=(2,), dtype=tf.float32) + label = tf.zeros([1], dtype=tf.int32) + if self.name == "bar": + return dict(x=x, y=x), label + else: + return dict(x=x), label + + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + dataset = dataset.map( + generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + return dataset.prefetch(buffer_size=1).batch(2, drop_remainder=True) + + def validation_step(self, inputs, model: tf.keras.Model, metrics=None): + logs = super().validation_step(inputs, model, metrics) + logs["counter"] = tf.ones((1,), dtype=tf.float32) + return logs + + def aggregate_logs(self, state, step_outputs): + if state is None: + state = {} + for key, value in step_outputs.items(): + if key not in state: + state[key] = [] + state[key].append( + np.concatenate([np.expand_dims(v.numpy(), axis=0) for v in value])) + return state + + def reduce_aggregated_logs(self, + aggregated_logs, + global_step=None): + for k, v in aggregated_logs.items(): + aggregated_logs[k] = np.sum(np.stack(v, axis=0)) + return aggregated_logs + + +class EvaluatorTest(tf.test.TestCase, parameterized.TestCase): + + @combinations.generate(all_strategy_combinations()) + def test_multitask_evaluator(self, distribution): + with distribution.scope(): + tasks = [ + MockTask(params=cfg.TaskConfig(), name="bar"), + MockTask(params=cfg.TaskConfig(), name="foo") + ] + test_multitask = multitask.MultiTask(tasks=tasks) + model = MockModel() + test_evaluator = evaluator.MultiTaskEvaluator( + task=test_multitask, model=model) + results = test_evaluator.evaluate(tf.convert_to_tensor(1, dtype=tf.int32)) + self.assertContainsSubset(["validation_loss", "acc"], results["bar"].keys()) + self.assertContainsSubset(["validation_loss", "acc"], results["foo"].keys()) + self.assertEqual(results["bar"]["validation_loss"], 0.0) + self.assertEqual(results["foo"]["validation_loss"], 1.0) + + @combinations.generate(all_strategy_combinations()) + def test_multitask_evaluator_numpy_metrics(self, distribution): + with distribution.scope(): + tasks = [ + MockTask(params=cfg.TaskConfig(), name="bar"), + MockTask(params=cfg.TaskConfig(), name="foo") + ] + test_multitask = multitask.MultiTask(tasks=tasks) + model = MockModel() + test_evaluator = evaluator.MultiTaskEvaluator( + task=test_multitask, model=model) + results = test_evaluator.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertEqual(results["bar"]["counter"], + 5. * distribution.num_replicas_in_sync) + self.assertEqual(results["foo"]["counter"], + 5. * distribution.num_replicas_in_sync) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/modeling/multitask/interleaving_trainer.py b/official/modeling/multitask/interleaving_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..36d16cce79c729f4c31eadb1643ee6f2e239540d --- /dev/null +++ b/official/modeling/multitask/interleaving_trainer.py @@ -0,0 +1,92 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Multitask trainer that interleaves each task's train step.""" +from typing import Union +import gin +import orbit +import tensorflow as tf +from official.modeling.multitask import base_model +from official.modeling.multitask import base_trainer +from official.modeling.multitask import multitask +from official.modeling.multitask import task_sampler as sampler + + +@gin.configurable +class MultiTaskInterleavingTrainer(base_trainer.MultiTaskBaseTrainer): + """MultiTask trainer that interleaves task update.""" + + def __init__(self, + multi_task: multitask.MultiTask, + multi_task_model: Union[tf.keras.Model, + base_model.MultiTaskBaseModel], + optimizer: tf.optimizers.Optimizer, + task_sampler: sampler.TaskSampler, + trainer_options=None): + super(MultiTaskInterleavingTrainer, self).__init__( + multi_task=multi_task, + multi_task_model=multi_task_model, + optimizer=optimizer, + trainer_options=trainer_options) + self._task_sampler = task_sampler + + # Build per task train step. + def _get_task_step(task_name, task): + + def step_fn(inputs): + if isinstance(self.multi_task_model, base_model.MultiTaskBaseModel): + task_model = self.multi_task_model.sub_tasks[task_name] + else: + task_model = self.multi_task_model + task_logs = task.train_step( + inputs, + model=task_model, + optimizer=self.optimizer, + metrics=self.training_metrics[task_name]) + self.training_losses[task_name].update_state(task_logs[task.loss]) + + return step_fn + + self._task_train_step_map = { + name: _get_task_step(name, task) + for name, task in self.multi_task.tasks.items() + } + + # TODO(haozhangthu): Add taskwise step counter to train_loop_end for logging + # on TensorBoard. + self._task_step_counters = { + name: orbit.utils.create_global_step() for name in self.multi_task.tasks + } + + def task_step_counter(self, name): + return self._task_step_counters[name] + + def train_step(self, iterator_map): + # Sample one task to train according to a multinomial distribution + rn = tf.random.stateless_uniform(shape=[], seed=(0, self.global_step)) + cumulative_sample_distribution = self._task_sampler.task_cumulative_distribution( + self.global_step) + # Prepend a [0.0] for indexing convenience. + cumulative_sample_distribution = tf.concat( + [tf.constant([0.0], dtype=tf.float32), cumulative_sample_distribution], + axis=0) + + for idx, (name, _) in enumerate(self.multi_task.tasks.items()): + begin = cumulative_sample_distribution[idx] + end = cumulative_sample_distribution[idx + 1] + if rn >= begin and rn < end: + self._strategy.run( + self._task_train_step_map[name], args=(next(iterator_map[name]),)) + self.global_step.assign_add(1) + self.task_step_counter(name).assign_add(1) diff --git a/official/modeling/multitask/interleaving_trainer_test.py b/official/modeling/multitask/interleaving_trainer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0ccc2670dc4c70a2b95e831fa56f1a4205931e36 --- /dev/null +++ b/official/modeling/multitask/interleaving_trainer_test.py @@ -0,0 +1,101 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for multitask.interleaving_trainer.""" +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.modeling.multitask import configs +from official.modeling.multitask import interleaving_trainer +from official.modeling.multitask import multitask +from official.modeling.multitask import task_sampler +from official.modeling.multitask import test_utils + + +def all_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + mode="eager", + ) + + +class InterleavingTrainerTest(tf.test.TestCase, parameterized.TestCase): + + @combinations.generate(all_strategy_combinations()) + def test_multitask_interleaving_trainer(self, distribution): + with distribution.scope(): + tasks = [ + test_utils.MockFooTask(params=test_utils.FooConfig(), name="foo"), + test_utils.MockBarTask(params=test_utils.BarConfig(), name="bar") + ] + test_multitask = multitask.MultiTask(tasks=tasks) + test_optimizer = tf.keras.optimizers.SGD(0.1) + model = test_utils.MockMultiTaskModel() + sampler = task_sampler.UniformTaskSampler( + task_weights=test_multitask.task_weights) + test_trainer = interleaving_trainer.MultiTaskInterleavingTrainer( + multi_task=test_multitask, + multi_task_model=model, + optimizer=test_optimizer, + task_sampler=sampler) + results = test_trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertContainsSubset(["training_loss", "bar_acc"], + results["bar"].keys()) + self.assertContainsSubset(["training_loss", "foo_acc"], + results["foo"].keys()) + + @combinations.generate(all_strategy_combinations()) + def test_trainer_with_configs(self, distribution): + config = configs.MultiTaskConfig( + task_routines=(configs.TaskRoutine( + task_name="foo", + task_config=test_utils.FooConfig(), + task_weight=3.0), + configs.TaskRoutine( + task_name="bar", + task_config=test_utils.BarConfig(), + task_weight=1.0))) + with distribution.scope(): + test_multitask = multitask.MultiTask.from_config(config) + test_optimizer = tf.keras.optimizers.SGD(0.1) + model = test_utils.MockMultiTaskModel() + num_step = 1000 + sampler = task_sampler.AnnealingTaskSampler( + task_weights=test_multitask.task_weights, + steps_per_epoch=num_step/5, + total_steps=num_step) + test_trainer = interleaving_trainer.MultiTaskInterleavingTrainer( + multi_task=test_multitask, + multi_task_model=model, + optimizer=test_optimizer, + task_sampler=sampler) + results = test_trainer.train(tf.convert_to_tensor(num_step, dtype=tf.int32)) + self.assertContainsSubset(["training_loss", "bar_acc"], + results["bar"].keys()) + self.assertContainsSubset(["training_loss", "foo_acc"], + results["foo"].keys()) + self.assertEqual(test_trainer.global_step.numpy(), num_step) + bar_sampled_step = test_trainer.task_step_counter("bar").numpy() + foo_sampled_step = test_trainer.task_step_counter("foo").numpy() + self.assertEqual(bar_sampled_step + foo_sampled_step, num_step) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/modeling/multitask/multitask.py b/official/modeling/multitask/multitask.py new file mode 100644 index 0000000000000000000000000000000000000000..faff37237c2e52673f885ea2b845071cc0beb2b2 --- /dev/null +++ b/official/modeling/multitask/multitask.py @@ -0,0 +1,148 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Experimental MultiTask base class for multi-task training/evaluation.""" +import abc +from typing import Dict, List, Optional, Text, Union + +import tensorflow as tf +from official.core import base_task +from official.core import config_definitions +from official.core import task_factory +from official.modeling import optimization +from official.modeling.multitask import base_model +from official.modeling.multitask import configs + +OptimizationConfig = optimization.OptimizationConfig +RuntimeConfig = config_definitions.RuntimeConfig + + +class MultiTask(tf.Module, metaclass=abc.ABCMeta): + """A multi-task class to manage multiple tasks.""" + + def __init__(self, + tasks: Union[Dict[Text, base_task.Task], List[base_task.Task]], + task_weights: Optional[Dict[str, Union[float, int]]] = None, + task_eval_steps: Optional[Dict[str, int]] = None, + name: Optional[str] = None): + """MultiTask initialization. + + Args: + tasks: a list or a flat dict of Task. + task_weights: a dict of (task, task weight), task weight can be applied + directly during loss summation in a joint backward step, or it can be + used to sample task among interleaved backward step. + task_eval_steps: a dict of (task, eval steps). + name: the instance name of a MultiTask object. + """ + super().__init__(name=name) + if isinstance(tasks, list): + self._tasks = {} + for task in tasks: + if task.name in self._tasks: + raise ValueError("Duplicated tasks found, task.name is %s" % + task.name) + self._tasks[task.name] = task + elif isinstance(tasks, dict): + self._tasks = tasks + else: + raise ValueError("The tasks argument has an invalid type: %s" % + type(tasks)) + self._task_eval_steps = task_eval_steps or {} + self._task_eval_steps = dict([ + (name, self._task_eval_steps.get(name, None)) for name in self.tasks + ]) + self._task_weights = task_weights or {} + self._task_weights = dict([ + (name, self._task_weights.get(name, 1.0)) for name in self.tasks + ]) + + @classmethod + def from_config(cls, config: configs.MultiTaskConfig, logging_dir=None): + tasks = {} + task_eval_steps = {} + task_weights = {} + for task_routine in config.task_routines: + task_name = task_routine.task_name + tasks[task_name] = task_factory.get_task( + task_routine.task_config, logging_dir=logging_dir) + task_eval_steps[task_name] = task_routine.eval_steps + task_weights[task_name] = task_routine.task_weight + return cls( + tasks, task_eval_steps=task_eval_steps, task_weights=task_weights) + + @property + def tasks(self): + return self._tasks + + def task_eval_steps(self, task_name): + return self._task_eval_steps[task_name] + + def task_weight(self, task_name): + return self._task_weights[task_name] + + @property + def task_weights(self): + return self._task_weights + + @classmethod + def create_optimizer(cls, + optimizer_config: OptimizationConfig, + runtime_config: Optional[RuntimeConfig] = None): + return base_task.Task.create_optimizer( + optimizer_config=optimizer_config, runtime_config=runtime_config) + + def joint_train_step(self, task_inputs, + multi_task_model: base_model.MultiTaskBaseModel, + optimizer: tf.keras.optimizers.Optimizer, task_metrics): + """The joint train step. + + Args: + task_inputs: a dictionary of task names and per-task features. + multi_task_model: a MultiTaskBaseModel instance. + optimizer: a tf.optimizers.Optimizer. + task_metrics: a dictionary of task names and per-task metrics. + + Returns: + A dictionary of losses, inculding per-task losses and their weighted sum. + """ + losses = {} + with tf.GradientTape() as tape: + total_loss = 0.0 + for name, model in multi_task_model.sub_tasks.items(): + inputs = task_inputs[name] + if isinstance(inputs, tuple) and len(inputs) == 2: + features, labels = inputs + elif isinstance(inputs, dict): + features, labels = inputs, inputs + else: + raise ValueError("The iterator output is neither a tuple nor a " + "dictionary. It is not implemented to support " + "such outputs.") + outputs = model(features, training=True) + task_loss = self.tasks[name].build_losses(labels, outputs) + task_weight = self.task_weight(name) + total_loss += task_weight * task_loss + losses[name] = task_loss + self.tasks[name].process_metrics(task_metrics[name], labels, outputs) + + # Scales loss as the default gradients allreduce performs sum inside + # the optimizer. + scaled_loss = total_loss / tf.distribute.get_strategy( + ).num_replicas_in_sync + tvars = multi_task_model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + optimizer.apply_gradients(list(zip(grads, tvars))) + losses["total_loss"] = total_loss + return losses diff --git a/official/modeling/multitask/task_sampler.py b/official/modeling/multitask/task_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..0cc61e26e6f0f549850992f29313f41d895b9d05 --- /dev/null +++ b/official/modeling/multitask/task_sampler.py @@ -0,0 +1,121 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils to sample tasks for interleaved optimization.""" +import abc +from typing import Union, Dict, Text +import tensorflow as tf + +from official.modeling.multitask import configs + + +class TaskSampler(tf.Module, metaclass=abc.ABCMeta): + """An abstract class defining task sampling API for interleaving trainer.""" + + def __init__(self, task_weights: Dict[Text, Union[float, int]]): + self._task_weights = task_weights + + @abc.abstractmethod + def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: + """Compute cumulative distribution to sample tasks. + + It calculates the cumulative distribution of the multinomial task + distribution with respect to which to be sampled against. + + Args: + global_step: A tensor indicating current progess of training. + + Returns: + A float tensor with shape (#(task), 1) that represents the cumulative + sampling distribution. + """ + pass + + +class UniformTaskSampler(TaskSampler): + """Sample all tasks uniformly.""" + + def __init__(self, task_weights: Dict[Text, Union[float, int]]): + super(UniformTaskSampler, self).__init__(task_weights=task_weights) + self._uniform_cumulative = tf.math.cumsum( + tf.constant( + [1.0 / len(self._task_weights)] * len(self._task_weights), + dtype=tf.float32)) + + def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: + del global_step + return self._uniform_cumulative + + +class ProportionalTaskSampler(TaskSampler): + """Sample tasks proportional to task weights.""" + + def __init__(self, + task_weights: Dict[Text, Union[float, int]], + alpha: float = 1.0): + super(ProportionalTaskSampler, self).__init__(task_weights=task_weights) + self._alpha = tf.cast(alpha, dtype=tf.float32) + task_weight_dict_ordered_list = tf.constant( + [weight for _, weight in self._task_weights.items()], dtype=tf.float32) + task_sizes = tf.math.pow(task_weight_dict_ordered_list, self._alpha) + task_distribution = task_sizes / tf.reduce_sum(task_sizes) + self._porportional_cumulative = tf.math.cumsum(task_distribution) + + def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: + del global_step + return self._porportional_cumulative + + +class AnnealingTaskSampler(TaskSampler): + """Sample tasks according to task weights as well as training progress.""" + + def __init__(self, + task_weights: Dict[Text, Union[float, int]], + steps_per_epoch: int, + total_steps: int): + super(AnnealingTaskSampler, self).__init__(task_weights=task_weights) + self._steps_per_epoch = tf.cast(steps_per_epoch, dtype=tf.float32) + self._total_epochs = tf.cast( + total_steps / self._steps_per_epoch, dtype=tf.float32) + + def task_cumulative_distribution(self, global_step: tf.Tensor) -> tf.Tensor: + cur_epoch = tf.math.floor( + tf.cast(global_step, dtype=tf.float32) / self._steps_per_epoch) + alpha = 1.0 - 0.8 * (cur_epoch - 1) / (self._total_epochs - 1 + 1e-10) + task_weight_dict_ordered_list = [ + weight for _, weight in self._task_weights.items() + ] + task_sizes = tf.math.pow( + tf.constant(task_weight_dict_ordered_list, dtype=tf.float32), + tf.cast(alpha, dtype=tf.float32)) + dynamic_task_distribution = task_sizes / tf.reduce_sum(task_sizes) + return tf.math.cumsum(dynamic_task_distribution) + + +def get_task_sampler(config: configs.TaskSamplingConfig, + task_weights: Dict[Text, float]) -> TaskSampler: + """Utils to create task sampler with configuration and task weights.""" + oneof_config = config.get() + if config.type == 'uniform': + return UniformTaskSampler(task_weights=task_weights) + elif config.type == 'proportional': + return ProportionalTaskSampler( + task_weights=task_weights, alpha=oneof_config.alpha) + elif config.type == 'annealing': + return AnnealingTaskSampler( + task_weights=task_weights, + steps_per_epoch=oneof_config.steps_per_epoch, + total_steps=oneof_config.total_steps) + else: + raise RuntimeError('Task sampler type not supported') diff --git a/official/modeling/multitask/task_sampler_test.py b/official/modeling/multitask/task_sampler_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5b4695049952dab250f9fdac3d6bfd134e2c644d --- /dev/null +++ b/official/modeling/multitask/task_sampler_test.py @@ -0,0 +1,75 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for multitask.task_sampler.""" +import tensorflow as tf + +from official.modeling.multitask import configs +from official.modeling.multitask import task_sampler as sampler + + +class TaskSamplerTest(tf.test.TestCase): + + def setUp(self): + super(TaskSamplerTest, self).setUp() + self._task_weights = {'A': 1.0, 'B': 2.0, 'C': 3.0} + + def test_uniform_sample_distribution(self): + uniform_sampler = sampler.get_task_sampler( + configs.TaskSamplingConfig(type='uniform'), self._task_weights) + for step in range(5): + cumulative_distribution = uniform_sampler.task_cumulative_distribution( + tf.constant(step, dtype=tf.int64)) + self.assertAllClose([0.333333, 0.666666, 1.0], + cumulative_distribution.numpy()) + + def test_proportional_sample_distribution(self): + prop_sampler = sampler.get_task_sampler( + configs.TaskSamplingConfig( + type='proportional', + proportional=configs.ProportionalSampleConfig(alpha=2.0)), + self._task_weights) + # CucmulativeOf(Normalize([1.0^2, 2.0^2, 3.0^2])) + for step in range(5): + cumulative_distribution = prop_sampler.task_cumulative_distribution( + tf.constant(step, dtype=tf.int64)) + self.assertAllClose([0.07142857, 0.35714286, 1.0], + cumulative_distribution.numpy()) + + def test_annealing_sample_distribution(self): + num_epoch = 3 + step_per_epoch = 6 + annel_sampler = sampler.get_task_sampler( + configs.TaskSamplingConfig( + type='annealing', + annealing=configs.AnnealingSampleConfig( + steps_per_epoch=step_per_epoch, + total_steps=step_per_epoch * num_epoch)), self._task_weights) + + global_step = tf.Variable( + 0, dtype=tf.int64, name='global_step', trainable=False) + expected_cumulative_epochs = [[0.12056106, 0.4387236, 1.0], + [0.16666667, 0.5, 1.0], + [0.22477472, 0.5654695, 1.0]] + for epoch in range(num_epoch): + for _ in range(step_per_epoch): + cumulative_distribution = annel_sampler.task_cumulative_distribution( + tf.constant(global_step, dtype=tf.int64)) + global_step.assign_add(1) + self.assertAllClose(expected_cumulative_epochs[epoch], + cumulative_distribution.numpy()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/modeling/multitask/test_utils.py b/official/modeling/multitask/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..aa831223817b4968615f5aa87c1e3fbc39021218 --- /dev/null +++ b/official/modeling/multitask/test_utils.py @@ -0,0 +1,125 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Testing utils for mock models and tasks.""" +from typing import Dict, Text +import tensorflow as tf +from official.core import base_task +from official.core import config_definitions as cfg +from official.core import task_factory +from official.modeling.multitask import base_model + + +class MockFooModel(tf.keras.Model): + """A mock model can consume 'foo' and 'bar' inputs.""" + + def __init__(self, shared_layer, *args, **kwargs): + super().__init__(*args, **kwargs) + self._share_layer = shared_layer + self._foo_specific_layer = tf.keras.layers.Dense(1) + + def call(self, inputs): + self.add_loss(tf.zeros((1,), dtype=tf.float32)) + if "foo" in inputs: + input_tensor = inputs["foo"] + else: + input_tensor = inputs["bar"] + return self._foo_specific_layer(self._share_layer(input_tensor)) + + +class MockBarModel(tf.keras.Model): + + def __init__(self, shared_layer, *args, **kwargs): + super().__init__(*args, **kwargs) + self._share_layer = shared_layer + self._bar_specific_layer = tf.keras.layers.Dense(1) + + def call(self, inputs): + self.add_loss(tf.zeros((2,), dtype=tf.float32)) + return self._bar_specific_layer(self._share_layer(inputs["bar"])) + + +class MockMultiTaskModel(base_model.MultiTaskBaseModel): + + def __init__(self, *args, **kwargs): + self._shared_dense = tf.keras.layers.Dense(1) + super().__init__(*args, **kwargs) + + def _instantiate_sub_tasks(self) -> Dict[Text, tf.keras.Model]: + return { + "foo": MockFooModel(self._shared_dense), + "bar": MockBarModel(self._shared_dense) + } + + +def mock_data(feature_name): + """Mock dataset function.""" + + def _generate_data(_): + x = tf.zeros(shape=(2,), dtype=tf.float32) + label = tf.zeros([1], dtype=tf.int32) + return {feature_name: x}, label + + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + dataset = dataset.map( + _generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + return dataset.prefetch(buffer_size=1).batch(2, drop_remainder=True) + + +class FooConfig(cfg.TaskConfig): + pass + + +class BarConfig(cfg.TaskConfig): + pass + + +@task_factory.register_task_cls(FooConfig) +class MockFooTask(base_task.Task): + """Mock foo task object for testing.""" + + def build_metrics(self, training: bool = True): + del training + return [tf.keras.metrics.Accuracy(name="foo_acc")] + + def build_inputs(self, params): + return mock_data("foo") + + def build_model(self) -> tf.keras.Model: + return MockFooModel(shared_layer=tf.keras.layers.Dense(1)) + + def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: + loss = tf.keras.losses.mean_squared_error(labels, model_outputs) + if aux_losses: + loss += tf.add_n(aux_losses) + return tf.reduce_mean(loss) + + +@task_factory.register_task_cls(BarConfig) +class MockBarTask(base_task.Task): + """Mock bar task object for testing.""" + + def build_metrics(self, training: bool = True): + del training + return [tf.keras.metrics.Accuracy(name="bar_acc")] + + def build_inputs(self, params): + return mock_data("bar") + + def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: + loss = tf.keras.losses.mean_squared_error(labels, model_outputs) + if aux_losses: + loss += tf.add_n(aux_losses) + return tf.reduce_mean(loss) diff --git a/official/modeling/multitask/train_lib.py b/official/modeling/multitask/train_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..b2fa9a0e76fdee2d734e1c050f8201fcab1e7ede --- /dev/null +++ b/official/modeling/multitask/train_lib.py @@ -0,0 +1,244 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Multitask training driver library.""" +# pytype: disable=attribute-error +import os +from absl import logging +import orbit +import tensorflow as tf +from official.core import base_task +from official.core import base_trainer as core_lib +from official.core import train_utils +from official.modeling.multitask import base_model +from official.modeling.multitask import base_trainer +from official.modeling.multitask import configs +from official.modeling.multitask import evaluator as evaluator_lib +from official.modeling.multitask import interleaving_trainer +from official.modeling.multitask import multitask +from official.modeling.multitask import task_sampler + +TRAINERS = { + 'interleaving': interleaving_trainer.MultiTaskInterleavingTrainer, + 'joint': base_trainer.MultiTaskBaseTrainer +} + + +def run_experiment(*, distribution_strategy: tf.distribute.Strategy, + task: multitask.MultiTask, + model: base_model.MultiTaskBaseModel, mode: str, + params: configs.MultiTaskExperimentConfig, + model_dir: str) -> base_model.MultiTaskBaseModel: + """Runs train/eval configured by the experiment params. + + Args: + distribution_strategy: A distribution distribution_strategy. + task: A MultiTaskTask instance. + model: A MultiTaskBaseModel instance. + mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' + or 'continuous_eval'. + params: ExperimentConfig instance. + model_dir: A 'str', a path to store model checkpoints and summaries. + + Returns: + model: `base_model.MultiTaskBaseModel` instance. + """ + + is_training = 'train' in mode + is_eval = 'eval' in mode + with distribution_strategy.scope(): + optimizer = task.create_optimizer(params.trainer.optimizer_config, + params.runtime) + kwargs = dict(multi_task=task, multi_task_model=model, optimizer=optimizer) + if params.trainer.trainer_type == 'interleaving': + sampler = task_sampler.get_task_sampler(params.trainer.task_sampler, + task.task_weights) + kwargs.update(dict(task_sampler=sampler)) + trainer = TRAINERS[params.trainer.trainer_type]( + **kwargs) if is_training else None + if is_eval: + evaluator = evaluator_lib.MultiTaskEvaluator( + task=task, + model=model, + global_step=trainer.global_step if is_training else None) + else: + evaluator = None + + if trainer: + checkpoint = trainer.checkpoint + global_step = trainer.global_step + else: + checkpoint = evaluator.checkpoint + global_step = evaluator.global_step + + # TODO(hongkuny,haozhangthu): Revisit initialization method. + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + directory=model_dir, + max_to_keep=params.trainer.max_to_keep, + step_counter=global_step, + checkpoint_interval=params.trainer.checkpoint_interval, + init_fn=model.initialize) + + controller = orbit.Controller( + strategy=distribution_strategy, + trainer=trainer, + evaluator=evaluator, + global_step=global_step, + steps_per_loop=params.trainer.steps_per_loop, + checkpoint_manager=checkpoint_manager, + summary_dir=os.path.join(model_dir, 'train'), + eval_summary_dir=os.path.join(model_dir, 'validation'), + summary_interval=params.trainer.summary_interval) + + logging.info('Starts to execute mode: %s', mode) + with distribution_strategy.scope(): + if mode == 'train': + controller.train(steps=params.trainer.train_steps) + elif mode == 'train_and_eval': + controller.train_and_evaluate( + train_steps=params.trainer.train_steps, + eval_steps=params.trainer.validation_steps, + eval_interval=params.trainer.validation_interval) + elif mode == 'eval': + controller.evaluate(steps=params.trainer.validation_steps) + elif mode == 'continuous_eval': + + def timeout_fn(): + if evaluator.global_step.numpy() >= params.trainer.train_steps: + return True + return False + + controller.evaluate_continuously( + steps=params.trainer.validation_steps, + timeout=params.trainer.continuous_eval_timeout, + timeout_fn=timeout_fn) + else: + raise NotImplementedError('The mode is not implemented: %s' % mode) + + return model + + +def run_experiment_with_multitask_eval( + *, + distribution_strategy: tf.distribute.Strategy, + train_task: base_task.Task, + eval_tasks: multitask.MultiTask, + mode: str, + params: configs.MultiEvalExperimentConfig, + model_dir: str, + run_post_eval: bool = False, + save_summary: bool = True) -> tf.keras.Model: + """Runs train/eval configured by the experiment params. + + Args: + distribution_strategy: A distribution distribution_strategy. + train_task: A base_task.Task instance. + eval_tasks: A multitask.MultiTask with evaluation tasks. + mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' + or 'continuous_eval'. + params: MultiEvalExperimentConfig instance. + model_dir: A 'str', a path to store model checkpoints and summaries. + run_post_eval: Whether to run post eval once after training, metrics logs + are returned. + save_summary: Whether to save train and validation summary. + + Returns: + model: `tf.keras.Model` instance. + """ + + is_training = 'train' in mode + is_eval = 'eval' in mode + with distribution_strategy.scope(): + optimizer = train_task.create_optimizer(params.trainer.optimizer_config, + params.runtime) + model = train_task.build_model() + if is_training: + trainer = core_lib.Trainer( + config=params, + task=train_task, + model=model, + optimizer=optimizer, + train=True, + evaluate=False) + else: + trainer = None + if is_eval: + evaluator = evaluator_lib.MultiTaskEvaluator( + task=eval_tasks, + model=model, + global_step=trainer.global_step if is_training else None, + checkpoint_exporter=train_utils.maybe_create_best_ckpt_exporter( + params, model_dir)) + else: + evaluator = None + + if trainer: + checkpoint = trainer.checkpoint + global_step = trainer.global_step + else: + checkpoint = evaluator.checkpoint + global_step = evaluator.global_step + + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + directory=model_dir, + max_to_keep=params.trainer.max_to_keep, + step_counter=global_step, + checkpoint_interval=params.trainer.checkpoint_interval, + init_fn=trainer.initialize if trainer else None) + + controller = orbit.Controller( + strategy=distribution_strategy, + trainer=trainer, + evaluator=evaluator, + global_step=global_step, + steps_per_loop=params.trainer.steps_per_loop, + checkpoint_manager=checkpoint_manager, + summary_dir=os.path.join(model_dir, 'train') if save_summary else None, + eval_summary_dir=os.path.join(model_dir, 'validation') if + (save_summary) else None, + summary_interval=params.trainer.summary_interval if + (save_summary) else None) + + logging.info('Starts to execute mode: %s', mode) + with distribution_strategy.scope(): + if mode == 'train': + controller.train(steps=params.trainer.train_steps) + elif mode == 'train_and_eval': + controller.train_and_evaluate( + train_steps=params.trainer.train_steps, + eval_steps=params.trainer.validation_steps, + eval_interval=params.trainer.validation_interval) + elif mode == 'eval': + controller.evaluate(steps=params.trainer.validation_steps) + elif mode == 'continuous_eval': + + def timeout_fn(): + if evaluator.global_step.numpy() >= params.trainer.train_steps: + return True + return False + + controller.evaluate_continuously( + steps=params.trainer.validation_steps, + timeout=params.trainer.continuous_eval_timeout, + timeout_fn=timeout_fn) + else: + raise NotImplementedError('The mode is not implemented: %s' % mode) + + if run_post_eval: + return model, evaluator.evaluate( + tf.convert_to_tensor(params.trainer.validation_steps)) + else: + return model, {} diff --git a/official/modeling/multitask/train_lib_test.py b/official/modeling/multitask/train_lib_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7d8a78d7eb0146229fc747e178d32ba2fec1e05f --- /dev/null +++ b/official/modeling/multitask/train_lib_test.py @@ -0,0 +1,120 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for multitask.train_lib.""" +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.core import task_factory +from official.modeling.hyperparams import params_dict +from official.modeling.multitask import configs +from official.modeling.multitask import multitask +from official.modeling.multitask import test_utils +from official.modeling.multitask import train_lib + + +class TrainLibTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super().setUp() + self._test_config = { + 'trainer': { + 'checkpoint_interval': 10, + 'steps_per_loop': 10, + 'summary_interval': 10, + 'train_steps': 10, + 'validation_steps': 5, + 'validation_interval': 10, + 'continuous_eval_timeout': 1, + 'optimizer_config': { + 'optimizer': { + 'type': 'sgd', + }, + 'learning_rate': { + 'type': 'constant' + } + } + }, + } + + @combinations.generate( + combinations.combine( + distribution_strategy=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + mode='eager', + flag_mode=['train', 'eval', 'train_and_eval'])) + def test_end_to_end(self, distribution_strategy, flag_mode): + model_dir = self.get_temp_dir() + experiment_config = configs.MultiTaskExperimentConfig( + task=configs.MultiTaskConfig( + task_routines=( + configs.TaskRoutine( + task_name='foo', + task_config=test_utils.FooConfig()), + configs.TaskRoutine( + task_name='bar', task_config=test_utils.BarConfig())))) + experiment_config = params_dict.override_params_dict( + experiment_config, self._test_config, is_strict=False) + with distribution_strategy.scope(): + test_multitask = multitask.MultiTask.from_config(experiment_config.task) + model = test_utils.MockMultiTaskModel() + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=test_multitask, + model=model, + mode=flag_mode, + params=experiment_config, + model_dir=model_dir) + + @combinations.generate( + combinations.combine( + distribution_strategy=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + mode='eager', + flag_mode=['train', 'eval', 'train_and_eval'])) + def test_end_to_end_multi_eval(self, distribution_strategy, flag_mode): + model_dir = self.get_temp_dir() + experiment_config = configs.MultiEvalExperimentConfig( + task=test_utils.FooConfig(), + eval_tasks=configs.MultiTaskConfig( + task_routines=( + configs.TaskRoutine( + task_name='foo', + task_config=test_utils.FooConfig()), + configs.TaskRoutine( + task_name='bar', task_config=test_utils.BarConfig())))) + experiment_config = params_dict.override_params_dict( + experiment_config, self._test_config, is_strict=False) + with distribution_strategy.scope(): + train_task = task_factory.get_task(experiment_config.task) + eval_tasks = multitask.MultiTask.from_config(experiment_config.eval_tasks) + train_lib.run_experiment_with_multitask_eval( + distribution_strategy=distribution_strategy, + train_task=train_task, + eval_tasks=eval_tasks, + mode=flag_mode, + params=experiment_config, + model_dir=model_dir) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/modeling/optimization/__init__.py b/official/modeling/optimization/__init__.py index b5c6292b64d922144b7ced18c8c9460617e05492..08b0e69c6253db6b352739ccb1d75bef793a7a17 100644 --- a/official/modeling/optimization/__init__.py +++ b/official/modeling/optimization/__init__.py @@ -1,7 +1,23 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Optimization package definition.""" # pylint: disable=wildcard-import from official.modeling.optimization.configs.learning_rate_config import * from official.modeling.optimization.configs.optimization_config import * from official.modeling.optimization.configs.optimizer_config import * +from official.modeling.optimization.ema_optimizer import ExponentialMovingAverage +from official.modeling.optimization.lr_schedule import * from official.modeling.optimization.optimizer_factory import OptimizerFactory diff --git a/official/modeling/optimization/configs/__init__.py b/official/modeling/optimization/configs/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/modeling/optimization/configs/__init__.py +++ b/official/modeling/optimization/configs/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/modeling/optimization/configs/learning_rate_config.py b/official/modeling/optimization/configs/learning_rate_config.py index 2a0625e0a75040e115e91c6be5b89bddb0de06b0..f6556922ef2e60190d6b5af0e10995ef2052f6dd 100644 --- a/official/modeling/optimization/configs/learning_rate_config.py +++ b/official/modeling/optimization/configs/learning_rate_config.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Dataclasses for learning rate schedule config.""" from typing import List, Optional @@ -50,16 +49,13 @@ class StepwiseLrConfig(base_config.Config): Attributes: name: The name of the learning rate schedule. Defaults to PiecewiseConstant. - boundaries: A list of ints of strictly increasing entries. - Defaults to None. + boundaries: A list of ints of strictly increasing entries. Defaults to None. values: A list of floats that specifies the values for the intervals defined - by `boundaries`. It should have one more element than `boundaries`. - The learning rate is computed as follows: - [0, boundaries[0]] -> values[0] - [boundaries[0], boundaries[1]] -> values[1] - [boundaries[n-1], boundaries[n]] -> values[n] - [boundaries[n], end] -> values[n+1] - Defaults to None. + by `boundaries`. It should have one more element than `boundaries`. + The learning rate is computed as follows: [0, boundaries[0]] -> + values[0] [boundaries[0], boundaries[1]] -> values[1] + [boundaries[n-1], boundaries[n]] -> values[n] [boundaries[n], + end] -> values[n+1] Defaults to None. """ name: str = 'PiecewiseConstantDecay' boundaries: Optional[List[int]] = None @@ -74,13 +70,12 @@ class ExponentialLrConfig(base_config.Config): Attributes: name: The name of the learning rate schedule. Defaults to ExponentialDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to - None. - decay_steps: A positive integer that is used for decay computation. - Defaults to None. + initial_learning_rate: A float. The initial learning rate. Defaults to None. + decay_steps: A positive integer that is used for decay computation. Defaults + to None. decay_rate: A float. Defaults to None. staircase: A boolean, if true, learning rate is decreased at discreate - intervals. Defaults to False. + intervals. Defaults to False. """ name: str = 'ExponentialDecay' initial_learning_rate: Optional[float] = None @@ -97,14 +92,13 @@ class PolynomialLrConfig(base_config.Config): Attributes: name: The name of the learning rate schedule. Defaults to PolynomialDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to - None. - decay_steps: A positive integer that is used for decay computation. - Defaults to None. + initial_learning_rate: A float. The initial learning rate. Defaults to None. + decay_steps: A positive integer that is used for decay computation. Defaults + to None. end_learning_rate: A float. The minimal end learning rate. power: A float. The power of the polynomial. Defaults to linear, 1.0. cycle: A boolean, whether or not it should cycle beyond decay_steps. - Defaults to False. + Defaults to False. """ name: str = 'PolynomialDecay' initial_learning_rate: Optional[float] = None @@ -123,12 +117,11 @@ class CosineLrConfig(base_config.Config): Attributes: name: The name of the learning rate schedule. Defaults to CosineDecay. - initial_learning_rate: A float. The initial learning rate. Defaults to - None. - decay_steps: A positive integer that is used for decay computation. - Defaults to None. + initial_learning_rate: A float. The initial learning rate. Defaults to None. + decay_steps: A positive integer that is used for decay computation. Defaults + to None. alpha: A float. Minimum learning rate value as a fraction of - initial_learning_rate. + initial_learning_rate. """ name: str = 'CosineDecay' initial_learning_rate: Optional[float] = None @@ -136,6 +129,66 @@ class CosineLrConfig(base_config.Config): alpha: float = 0.0 +@dataclasses.dataclass +class DirectPowerLrConfig(base_config.Config): + """Configuration for DirectPower learning rate decay. + + This class configures a schedule following follows lr * (step)^power. + + Attributes: + name: The name of the learning rate schedule. Defaults to DirectPowerDecay. + initial_learning_rate: A float. The initial learning rate. Defaults to None. + power: A float. Defaults to -0.5, for sqrt decay. + """ + name: str = 'DirectPowerDecay' + initial_learning_rate: Optional[float] = None + power: float = -0.5 + + +@dataclasses.dataclass +class PowerAndLinearDecayLrConfig(base_config.Config): + """Configuration for DirectPower learning rate decay. + + This class configures a schedule following follows lr * (step)^power for the + first total_decay_steps * (1 - linear_decay_fraction) steps, and follows + lr * (step)^power * (total_decay_steps - step) / (total_decay_steps * + linear_decay_fraction) for the rest of the steps. + + Attributes: + name: The name of the learning rate schedule. Defaults to DirectPowerDecay. + initial_learning_rate: A float. The initial learning rate. Defaults to None. + power: A float. Defaults to -0.5, for sqrt decay. + """ + name: str = 'PowerAndLinearDecay' + initial_learning_rate: Optional[float] = None + total_decay_steps: Optional[int] = None + power: float = -0.5 + linear_decay_fraction: float = 0.1 + + +@dataclasses.dataclass +class PowerDecayWithOffsetLrConfig(base_config.Config): + """Configuration for power learning rate decay with step offset. + + Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`. + Otherwise, learning rate equals to lr * (step - offset)^power. + + Attributes: + name: The name of the learning rate schedule. + Defaults to PowerDecayWithOffset. + initial_learning_rate: A float. The initial learning rate. Defaults to None. + power: A float. Defaults to -0.5, for sqrt decay. + offset: An integer. Power decay happens after `offset` steps. + pre_offset_learning_rate: A float. The constant learning rate before + `offset` steps. + """ + name: str = 'PowerDecayWithOffset' + initial_learning_rate: Optional[float] = None + power: float = -0.5 + offset: int = 0 + pre_offset_learning_rate: float = 1.0e6 + + @dataclasses.dataclass class LinearWarmupConfig(base_config.Config): """Configuration for linear warmup schedule config. @@ -173,4 +226,3 @@ class PolynomialWarmupConfig(base_config.Config): name: str = 'polynomial' power: float = 1 warmup_steps: Optional[int] = None - diff --git a/official/modeling/optimization/configs/optimization_config.py b/official/modeling/optimization/configs/optimization_config.py index 23e112e1b6197a8505a18b9b8d573012d1dd5e73..a1b5931b35b226a93f4d1fad560ffa844585d95e 100644 --- a/official/modeling/optimization/configs/optimization_config.py +++ b/official/modeling/optimization/configs/optimization_config.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Dataclasses for optimization configs. This file define the dataclass for optimization configs (OptimizationConfig). @@ -40,6 +39,8 @@ class OptimizerConfig(oneof.OneOfConfig): adamw: adam with weight decay. lamb: lamb optimizer. rmsprop: rmsprop optimizer. + lars: lars optimizer. + adagrad: adagrad optimizer. """ type: Optional[str] = None sgd: opt_cfg.SGDConfig = opt_cfg.SGDConfig() @@ -47,6 +48,8 @@ class OptimizerConfig(oneof.OneOfConfig): adamw: opt_cfg.AdamWeightDecayConfig = opt_cfg.AdamWeightDecayConfig() lamb: opt_cfg.LAMBConfig = opt_cfg.LAMBConfig() rmsprop: opt_cfg.RMSPropConfig = opt_cfg.RMSPropConfig() + lars: opt_cfg.LARSConfig = opt_cfg.LARSConfig() + adagrad: opt_cfg.AdagradConfig = opt_cfg.AdagradConfig() @dataclasses.dataclass @@ -60,6 +63,10 @@ class LrConfig(oneof.OneOfConfig): exponential: exponential learning rate config. polynomial: polynomial learning rate config. cosine: cosine learning rate config. + power: step^power learning rate config. + power_linear: learning rate config of step^power followed by + step^power*linear. + power_with_offset: power decay with a step offset. """ type: Optional[str] = None constant: lr_cfg.ConstantLrConfig = lr_cfg.ConstantLrConfig() @@ -67,6 +74,11 @@ class LrConfig(oneof.OneOfConfig): exponential: lr_cfg.ExponentialLrConfig = lr_cfg.ExponentialLrConfig() polynomial: lr_cfg.PolynomialLrConfig = lr_cfg.PolynomialLrConfig() cosine: lr_cfg.CosineLrConfig = lr_cfg.CosineLrConfig() + power: lr_cfg.DirectPowerLrConfig = lr_cfg.DirectPowerLrConfig() + power_linear: lr_cfg.PowerAndLinearDecayLrConfig = ( + lr_cfg.PowerAndLinearDecayLrConfig()) + power_with_offset: lr_cfg.PowerDecayWithOffsetLrConfig = ( + lr_cfg.PowerDecayWithOffsetLrConfig()) @dataclasses.dataclass @@ -89,9 +101,12 @@ class OptimizationConfig(base_config.Config): Attributes: optimizer: optimizer oneof config. + ema: optional exponential moving average optimizer config, if specified, ema + optimizer will be used. learning_rate: learning rate oneof config. warmup: warmup oneof config. """ optimizer: OptimizerConfig = OptimizerConfig() + ema: Optional[opt_cfg.EMAConfig] = None learning_rate: LrConfig = LrConfig() warmup: WarmupConfig = WarmupConfig() diff --git a/official/modeling/optimization/configs/optimization_config_test.py b/official/modeling/optimization/configs/optimization_config_test.py index 6dcd55e0e2071a23cae1494ae29c5efa282d052a..02b99f592e9ba4f66ccd9e906eee5158b2b1b13e 100644 --- a/official/modeling/optimization/configs/optimization_config_test.py +++ b/official/modeling/optimization/configs/optimization_config_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for optimization_config.py.""" import tensorflow as tf @@ -26,15 +25,15 @@ class OptimizerConfigTest(tf.test.TestCase): def test_no_optimizer(self): optimizer = optimization_config.OptimizationConfig({}).optimizer.get() - self.assertEqual(optimizer, None) + self.assertIsNone(optimizer) def test_no_lr_schedule(self): lr = optimization_config.OptimizationConfig({}).learning_rate.get() - self.assertEqual(lr, None) + self.assertIsNone(lr) def test_no_warmup_schedule(self): warmup = optimization_config.OptimizationConfig({}).warmup.get() - self.assertEqual(warmup, None) + self.assertIsNone(warmup) def test_config(self): opt_config = optimization_config.OptimizationConfig({ @@ -50,12 +49,11 @@ class OptimizerConfigTest(tf.test.TestCase): 'type': 'linear' } }) - self.assertEqual(opt_config.optimizer.get(), - opt_cfg.SGDConfig()) + self.assertEqual(opt_config.optimizer.get(), opt_cfg.SGDConfig()) self.assertEqual(opt_config.learning_rate.get(), lr_cfg.PolynomialLrConfig()) - self.assertEqual(opt_config.warmup.get(), - lr_cfg.LinearWarmupConfig()) + self.assertEqual(opt_config.warmup.get(), lr_cfg.LinearWarmupConfig()) + if __name__ == '__main__': tf.test.main() diff --git a/official/modeling/optimization/configs/optimizer_config.py b/official/modeling/optimization/configs/optimizer_config.py index 5e7ca2d0c195883b0af7a5920bc13402bada4139..37f9db50f59b0d8b41be530fac1aaf884785ab9d 100644 --- a/official/modeling/optimization/configs/optimizer_config.py +++ b/official/modeling/optimization/configs/optimizer_config.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Dataclasses for optimizer configs.""" from typing import List, Optional @@ -21,7 +20,24 @@ from official.modeling.hyperparams import base_config @dataclasses.dataclass -class SGDConfig(base_config.Config): +class BaseOptimizerConfig(base_config.Config): + """Base optimizer config. + + Attributes: + clipnorm: float >= 0 or None. If not None, Gradients will be clipped when + their L2 norm exceeds this value. + clipvalue: float >= 0 or None. If not None, Gradients will be clipped when + their absolute value exceeds this value. + global_clipnorm: float >= 0 or None. If not None, gradient of all weights is + clipped so that their global norm is no higher than this value + """ + clipnorm: Optional[float] = None + clipvalue: Optional[float] = None + global_clipnorm: Optional[float] = None + + +@dataclasses.dataclass +class SGDConfig(BaseOptimizerConfig): """Configuration for SGD optimizer. The attributes for this class matches the arguments of tf.keras.optimizer.SGD. @@ -39,7 +55,7 @@ class SGDConfig(base_config.Config): @dataclasses.dataclass -class RMSPropConfig(base_config.Config): +class RMSPropConfig(BaseOptimizerConfig): """Configuration for RMSProp optimizer. The attributes for this class matches the arguments of @@ -60,7 +76,25 @@ class RMSPropConfig(base_config.Config): @dataclasses.dataclass -class AdamConfig(base_config.Config): +class AdagradConfig(BaseOptimizerConfig): + """Configuration for Adagrad optimizer. + + The attributes of this class match the arguments of + tf.keras.optimizer.Adagrad. + + Attributes: + name: name of the optimizer. + initial_accumulator_value: A floating point value. Starting value for the + accumulators, must be non-negative. + epsilon: A small floating point value to avoid zero denominator. + """ + name: str = "Adagrad" + initial_accumulator_value: float = 0.1 + epsilon: float = 1e-07 + + +@dataclasses.dataclass +class AdamConfig(BaseOptimizerConfig): """Configuration for Adam optimizer. The attributes for this class matches the arguments of @@ -72,7 +106,7 @@ class AdamConfig(base_config.Config): beta_2: decay rate for 2st order moments. epsilon: epsilon value used for numerical stability in Adam optimizer. amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". + the paper "On the Convergence of Adam and beyond". """ name: str = "Adam" beta_1: float = 0.9 @@ -82,7 +116,7 @@ class AdamConfig(base_config.Config): @dataclasses.dataclass -class AdamWeightDecayConfig(base_config.Config): +class AdamWeightDecayConfig(BaseOptimizerConfig): """Configuration for Adam optimizer with weight decay. Attributes: @@ -91,12 +125,14 @@ class AdamWeightDecayConfig(base_config.Config): beta_2: decay rate for 2st order moments. epsilon: epsilon value used for numerical stability in the optimizer. amsgrad: boolean. Whether to apply AMSGrad variant of this algorithm from - the paper "On the Convergence of Adam and beyond". + the paper "On the Convergence of Adam and beyond". weight_decay_rate: float. Weight decay rate. Default to 0. include_in_weight_decay: list[str], or None. List of weight names to include - in weight decay. - include_in_weight_decay: list[str], or None. List of weight names to not - include in weight decay. + in weight decay. + exclude_from_weight_decay: list[str], or None. List of weight names to not + include in weight decay. + gradient_clip_norm: A positive float. Clips the gradients to this maximum + L2-norm. Default to 1.0. """ name: str = "AdamWeightDecay" beta_1: float = 0.9 @@ -106,10 +142,11 @@ class AdamWeightDecayConfig(base_config.Config): weight_decay_rate: float = 0.0 include_in_weight_decay: Optional[List[str]] = None exclude_from_weight_decay: Optional[List[str]] = None + gradient_clip_norm: float = 1.0 @dataclasses.dataclass -class LAMBConfig(base_config.Config): +class LAMBConfig(BaseOptimizerConfig): """Configuration for LAMB optimizer. The attributes for this class matches the arguments of @@ -122,12 +159,11 @@ class LAMBConfig(base_config.Config): epsilon: epsilon value used for numerical stability in LAMB optimizer. weight_decay_rate: float. Weight decay rate. Default to 0. exclude_from_weight_decay: List of regex patterns of variables excluded from - weight decay. Variables whose name contain a - substring matching the pattern will be excluded. + weight decay. Variables whose name contain a substring matching the + pattern will be excluded. exclude_from_layer_adaptation: List of regex patterns of variables excluded - from layer adaptation. Variables whose name - contain a substring matching the pattern will - be excluded. + from layer adaptation. Variables whose name contain a substring matching + the pattern will be excluded. """ name: str = "LAMB" beta_1: float = 0.9 @@ -136,3 +172,53 @@ class LAMBConfig(base_config.Config): weight_decay_rate: float = 0.0 exclude_from_weight_decay: Optional[List[str]] = None exclude_from_layer_adaptation: Optional[List[str]] = None + + +@dataclasses.dataclass +class EMAConfig(BaseOptimizerConfig): + """Exponential moving average optimizer config. + + Attributes: + name: 'str', name of the optimizer. + average_decay: 'float', average decay value. + start_step: 'int', start step to apply moving average. + dynamic_decay: 'bool', whether to apply dynamic decay or not. + """ + name: str = "ExponentialMovingAverage" + average_decay: float = 0.99 + start_step: int = 0 + dynamic_decay: bool = True + + +@dataclasses.dataclass +class LARSConfig(BaseOptimizerConfig): + """Layer-wise adaptive rate scaling config. + + Attributes: + name: 'str', name of the optimizer. + momentum: `float` hyperparameter >= 0 that accelerates gradient descent in + the relevant direction and dampens oscillations. Defaults to 0.9. + eeta: `float` LARS coefficient as used in the paper. Default set to LARS + coefficient from the paper. (eeta / weight_decay) determines the highest + scaling factor in LARS.. + weight_decay_rate: `float` for weight decay. + nesterov: 'boolean' for whether to use nesterov momentum. + classic_momentum: `boolean` for whether to use classic (or popular) + momentum. The learning rate is applied during momentum update in classic + momentum, but after momentum for popular momentum. + exclude_from_weight_decay: A list of `string` for variable screening, if any + of the string appears in a variable's name, the variable will be excluded + for computing weight decay. For example, one could specify the list like + ['batch_normalization', 'bias'] to exclude BN and bias from weight decay. + exclude_from_layer_adaptation: Similar to exclude_from_weight_decay, but for + layer adaptation. If it is None, it will be defaulted the same as + exclude_from_weight_decay. + """ + name: str = "LARS" + momentum: float = 0.9 + eeta: float = 0.001 + weight_decay_rate: float = 0.0 + nesterov: bool = False + classic_momentum: bool = True + exclude_from_weight_decay: Optional[List[str]] = None + exclude_from_layer_adaptation: Optional[List[str]] = None diff --git a/official/modeling/optimization/ema_optimizer.py b/official/modeling/optimization/ema_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..88cb41a65a4d9ab04a320088c9da8ca00f5cc2a6 --- /dev/null +++ b/official/modeling/optimization/ema_optimizer.py @@ -0,0 +1,243 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Exponential moving average optimizer.""" + +from typing import Text, List + +import tensorflow as tf + +# pylint: disable=protected-access + + +class ExponentialMovingAverage(tf.keras.optimizers.Optimizer): + """Optimizer that computes an exponential moving average of the variables. + + Empirically it has been found that using the moving average of the trained + parameters of a deep network is better than using its trained parameters + directly. This optimizer allows you to compute this moving average and swap + the variables at save time so that any code outside of the training loop + will use by default the average values instead of the original ones. + + Example of usage for training: + ```python + opt = tf.keras.optimizers.SGD(learning_rate) + opt = ExponentialMovingAverage(opt) + + opt.shadow_copy(model) + ``` + + At test time, swap the shadow variables to evaluate on the averaged weights: + ```python + opt.swap_weights() + # Test eval the model here + opt.swap_weights() + ``` + """ + + def __init__(self, + optimizer: tf.keras.optimizers.Optimizer, + average_decay: float = 0.99, + start_step: int = 0, + dynamic_decay: bool = True, + name: Text = 'ExponentialMovingAverage', + **kwargs): + """Construct a new ExponentialMovingAverage optimizer. + + Args: + optimizer: `tf.keras.optimizers.Optimizer` that will be + used to compute and apply gradients. + average_decay: float. Decay to use to maintain the moving averages + of trained variables. + start_step: int. What step to start the moving average. + dynamic_decay: bool. Whether to change the decay based on the number + of optimizer updates. Decay will start at 0.1 and gradually increase + up to `average_decay` after each optimizer update. This behavior is + similar to `tf.train.ExponentialMovingAverage` in TF 1.x. + name: Optional name for the operations created when applying + gradients. Defaults to "moving_average". + **kwargs: keyword arguments. Allowed to be {`clipnorm`, + `clipvalue`, `lr`, `decay`}. + """ + super().__init__(name, **kwargs) + self._average_decay = average_decay + self._start_step = tf.constant(start_step, tf.float32) + self._dynamic_decay = dynamic_decay + self._optimizer = optimizer + self._track_trackable(self._optimizer, 'base_optimizer') + + def shadow_copy(self, model: tf.keras.Model): + """Creates shadow variables for the given model weights.""" + for var in model.weights: + self.add_slot(var, 'average', initializer='zeros') + self._average_weights = [ + self.get_slot(var, 'average') for var in model.weights + ] + self._model_weights = model.weights + + @property + def has_shadow_copy(self): + """Whether this optimizer has created shadow variables.""" + return self._model_weights is not None + + def _create_slots(self, var_list): + self._optimizer._create_slots(var_list=var_list) # pylint: disable=protected-access + + def apply_gradients(self, grads_and_vars, name: Text = None): + result = self._optimizer.apply_gradients(grads_and_vars, name) + self.update_average(self.iterations) + return result + + @tf.function + def update_average(self, step: tf.Tensor): + step = tf.cast(step, tf.float32) + if step < self._start_step: + decay = tf.constant(0., tf.float32) + elif self._dynamic_decay: + decay = step - self._start_step + decay = tf.minimum(self._average_decay, (1. + decay) / (10. + decay)) + else: + decay = self._average_decay + + def _apply_moving(v_moving, v_normal): + diff = v_moving - v_normal + v_moving.assign_sub(tf.cast(1. - decay, v_moving.dtype) * diff) + return v_moving + + def _update(strategy, v_moving_and_v_normal): + for v_moving, v_normal in v_moving_and_v_normal: + strategy.extended.update(v_moving, _apply_moving, args=(v_normal,)) + + ctx = tf.distribute.get_replica_context() + return ctx.merge_call(_update, args=(zip(self._average_weights, + self._model_weights),)) + + def swap_weights(self): + """Swap the average and moving weights. + + This is a convenience method to allow one to evaluate the averaged weights + at test time. Loads the weights stored in `self._average` into the model, + keeping a copy of the original model weights. Swapping twice will return + the original weights. + """ + if tf.distribute.in_cross_replica_context(): + strategy = tf.distribute.get_strategy() + strategy.run(self._swap_weights, args=()) + else: + raise ValueError('Swapping weights must occur under a ' + 'tf.distribute.Strategy') + + @tf.function + def _swap_weights(self): + def fn_0(a, b): + a.assign_add(b) + return a + def fn_1(b, a): + b.assign(a - b) + return b + def fn_2(a, b): + a.assign_sub(b) + return a + + def swap(strategy, a_and_b): + """Swap `a` and `b` and mirror to all devices.""" + for a, b in a_and_b: + strategy.extended.update(a, fn_0, args=(b,)) # a = a + b + strategy.extended.update(b, fn_1, args=(a,)) # b = a - b + strategy.extended.update(a, fn_2, args=(b,)) # a = a - b + + ctx = tf.distribute.get_replica_context() + return ctx.merge_call( + swap, args=(zip(self._average_weights, self._model_weights),)) + + def assign_average_vars(self, var_list: List[tf.Variable]): + """Assign variables in var_list with their respective averages. + + Args: + var_list: List of model variables to be assigned to their average. + Returns: + assign_op: The op corresponding to the assignment operation of + variables to their average. + """ + assign_op = tf.group([ + var.assign(self.get_slot(var, 'average')) for var in var_list + if var.trainable + ]) + return assign_op + + def _create_hypers(self): + self._optimizer._create_hypers() # pylint: disable=protected-access + + def _prepare(self, var_list): + return self._optimizer._prepare(var_list=var_list) # pylint: disable=protected-access + + @property + def iterations(self): + return self._optimizer.iterations + + @iterations.setter + def iterations(self, variable): + self._optimizer.iterations = variable + + @property + def weights(self): + # return self._weights + self._optimizer.weights + return self._optimizer.weights + + def variables(self): + return self._weights + [self.iterations] + + @property + def lr(self): + return self._optimizer._get_hyper('learning_rate') + + @lr.setter + def lr(self, lr): + self._optimizer._set_hyper('learning_rate', lr) + + @property + def learning_rate(self): + return self._optimizer._get_hyper('learning_rate') + + @learning_rate.setter + def learning_rate(self, learning_rate): # pylint: disable=redefined-outer-name + self._optimizer._set_hyper('learning_rate', learning_rate) + + def _resource_apply_dense(self, grad, var): + return self._optimizer._resource_apply_dense(grad, var) + + def _resource_apply_sparse(self, grad, var, indices): + return self._optimizer._resource_apply_sparse(grad, var, indices) + + def _resource_apply_sparse_duplicate_indices(self, grad, var, indices): + return self._optimizer._resource_apply_sparse_duplicate_indices( + grad, var, indices) + + def get_config(self): + config = { + 'optimizer': tf.keras.optimizers.serialize(self._optimizer), + 'average_decay': self._average_decay, + 'start_step': self._start_step, + 'dynamic_decay': self._dynamic_decay, + } + base_config = super(ExponentialMovingAverage, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config, custom_objects=None): + optimizer = tf.keras.optimizers.deserialize( + config.pop('optimizer'), + custom_objects=custom_objects, + ) + return cls(optimizer, **config) diff --git a/official/modeling/optimization/lars_optimizer.py b/official/modeling/optimization/lars_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ac15042756c02c3d3e2da22419cac2e04522b57e --- /dev/null +++ b/official/modeling/optimization/lars_optimizer.py @@ -0,0 +1,186 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Layer-wise adaptive rate scaling optimizer.""" +import re +from typing import Text, List, Optional + +import tensorflow as tf + + +# pylint: disable=protected-access + + +class LARS(tf.keras.optimizers.Optimizer): + """Layer-wise Adaptive Rate Scaling for large batch training. + + Introduced by "Large Batch Training of Convolutional Networks" by Y. You, + I. Gitman, and B. Ginsburg. (https://arxiv.org/abs/1708.03888) + """ + + def __init__(self, + learning_rate: float = 0.01, + momentum: float = 0.9, + weight_decay_rate: float = 0.0, + eeta: float = 0.001, + nesterov: bool = False, + classic_momentum: bool = True, + exclude_from_weight_decay: Optional[List[Text]] = None, + exclude_from_layer_adaptation: Optional[List[Text]] = None, + name: Text = "LARS", + **kwargs): + """Constructs a LARSOptimizer. + + Args: + learning_rate: `float` for learning rate. Defaults to 0.01. + momentum: `float` hyperparameter >= 0 that accelerates gradient descent + in the relevant direction and dampens oscillations. Defaults to 0.9. + weight_decay_rate: `float` for weight decay. + eeta: `float` LARS coefficient as used in the paper. Default set to LARS + coefficient from the paper. (eeta / weight_decay) determines the + highest scaling factor in LARS.. + nesterov: 'boolean' for whether to use nesterov momentum. + classic_momentum: `boolean` for whether to use classic (or popular) + momentum. The learning rate is applied during momentum update in + classic momentum, but after momentum for popular momentum. + exclude_from_weight_decay: A list of `string` for variable screening, if + any of the string appears in a variable's name, the variable will be + excluded for computing weight decay. For example, one could specify + the list like ['batch_normalization', 'bias'] to exclude BN and bias + from weight decay. + exclude_from_layer_adaptation: Similar to exclude_from_weight_decay, but + for layer adaptation. If it is None, it will be defaulted the same as + exclude_from_weight_decay. + name: `Text` as optional name for the operations created when applying + gradients. Defaults to "LARS". + **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`, + `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip + gradients by value, `decay` is included for backward compatibility to + allow time inverse decay of learning rate. `lr` is included for + backward compatibility, recommended to use `learning_rate` instead. + """ + super(LARS, self).__init__(name, **kwargs) + + self._set_hyper("learning_rate", learning_rate) + self._set_hyper("decay", self._initial_decay) + self.momentum = momentum + self.weight_decay_rate = weight_decay_rate + self.eeta = eeta + self.nesterov = nesterov + self.classic_momentum = classic_momentum + self.exclude_from_weight_decay = exclude_from_weight_decay + # exclude_from_layer_adaptation is set to exclude_from_weight_decay if the + # arg is None. + if exclude_from_layer_adaptation: + self.exclude_from_layer_adaptation = exclude_from_layer_adaptation + else: + self.exclude_from_layer_adaptation = exclude_from_weight_decay + + def _create_slots(self, var_list): + for v in var_list: + self.add_slot(v, "momentum") + + def _resource_apply_dense(self, grad, param, apply_state=None): + if grad is None or param is None: + return tf.no_op() + + var_device, var_dtype = param.device, param.dtype.base_dtype + coefficients = ((apply_state or {}).get((var_device, var_dtype)) or + self._fallback_apply_state(var_device, var_dtype)) + learning_rate = coefficients["lr_t"] + + param_name = param.name + + v = self.get_slot(param, "momentum") + + if self._use_weight_decay(param_name): + grad += self.weight_decay_rate * param + + if self.classic_momentum: + trust_ratio = 1.0 + if self._do_layer_adaptation(param_name): + w_norm = tf.norm(param, ord=2) + g_norm = tf.norm(grad, ord=2) + trust_ratio = tf.where( + tf.greater(w_norm, 0), + tf.where(tf.greater(g_norm, 0), (self.eeta * w_norm / g_norm), 1.0), + 1.0) + scaled_lr = learning_rate * trust_ratio + + next_v = tf.multiply(self.momentum, v) + scaled_lr * grad + if self.nesterov: + update = tf.multiply(self.momentum, next_v) + scaled_lr * grad + else: + update = next_v + next_param = param - update + else: + next_v = tf.multiply(self.momentum, v) + grad + if self.nesterov: + update = tf.multiply(self.momentum, next_v) + grad + else: + update = next_v + + trust_ratio = 1.0 + if self._do_layer_adaptation(param_name): + w_norm = tf.norm(param, ord=2) + v_norm = tf.norm(update, ord=2) + trust_ratio = tf.where( + tf.greater(w_norm, 0), + tf.where(tf.greater(v_norm, 0), (self.eeta * w_norm / v_norm), 1.0), + 1.0) + scaled_lr = trust_ratio * learning_rate + next_param = param - scaled_lr * update + + return tf.group(*[ + param.assign(next_param, use_locking=False), + v.assign(next_v, use_locking=False) + ]) + + def _resource_apply_sparse(self, grad, handle, indices, apply_state): + raise NotImplementedError("Applying sparse gradients is not implemented.") + + def _use_weight_decay(self, param_name): + """Whether to use L2 weight decay for `param_name`.""" + if not self.weight_decay_rate: + return False + if self.exclude_from_weight_decay: + for r in self.exclude_from_weight_decay: + if re.search(r, param_name) is not None: + return False + return True + + def _do_layer_adaptation(self, param_name): + """Whether to do layer-wise learning rate adaptation for `param_name`.""" + if self.exclude_from_layer_adaptation: + for r in self.exclude_from_layer_adaptation: + if re.search(r, param_name) is not None: + return False + return True + + def get_config(self): + config = super(LARS, self).get_config() + config.update({ + "learning_rate": self._serialize_hyperparameter("learning_rate"), + "decay": self._serialize_hyperparameter("decay"), + "momentum": self.momentum, + "classic_momentum": self.classic_momentum, + "weight_decay_rate": self.weight_decay_rate, + "eeta": self.eeta, + "nesterov": self.nesterov, + }) + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/modeling/optimization/lr_schedule.py b/official/modeling/optimization/lr_schedule.py index d5dd6fb6fb1478297e579a4be5b87ab5ae25f40e..dcaa156b9dbb8583a47d013d960cbae026196dee 100644 --- a/official/modeling/optimization/lr_schedule.py +++ b/official/modeling/optimization/lr_schedule.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Learning rate schedule classes.""" from typing import Mapping, Any, Union, Optional @@ -41,12 +40,11 @@ class LinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): Args: after_warmup_lr_sched: tf.keras.optimizers.schedules .LearningRateSchedule or a constant. - warmup_steps: int. number of the warmup steps. - warmup_learning_rate: floating point number. Initial learning rate for the - warmup. + warmup_steps: Number of the warmup steps. + warmup_learning_rate: Initial learning rate for the warmup. name: Optional, name of warmup schedule. """ - super(LinearWarmup, self).__init__() + super().__init__() self._name = name self._after_warmup_lr_sched = after_warmup_lr_sched self._warmup_steps = warmup_steps @@ -103,7 +101,7 @@ class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule): warmup_steps: int, power: float = 1.0, name: str = "PolynomialWarmup"): - super(PolynomialWarmUp, self).__init__() + super().__init__() if isinstance(after_warmup_lr_sched, tf.keras.optimizers.schedules.LearningRateSchedule): self._initial_learning_rate = after_warmup_lr_sched(warmup_steps) @@ -122,7 +120,14 @@ class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule): # learning rate will be `global_step/num_warmup_steps * init_lr`. global_step_float = tf.cast(step, tf.float32) warmup_steps_float = tf.cast(self._warmup_steps, tf.float32) - warmup_percent_done = global_step_float / warmup_steps_float + + if self._warmup_steps <= 0: + warmup_percent_done = 1.0 + else: + # A zero `step` may cause Inf. So make `step` positive. + step_non_zero = tf.math.maximum(global_step_float, 1.0) + warmup_percent_done = step_non_zero / warmup_steps_float + warmup_learning_rate = ( self._initial_learning_rate * tf.math.pow(warmup_percent_done, self._power)) @@ -148,8 +153,154 @@ class PolynomialWarmUp(tf.keras.optimizers.schedules.LearningRateSchedule): config = {"after_warmup_lr_sched": self._after_warmup_lr_sched} # pytype: disable=attribute-error config.update({ - "warmup_steps": self._warmup_setps, + "warmup_steps": self._warmup_steps, "power": self._power, "name": self._name }) return config + + +class DirectPowerDecay(tf.keras.optimizers.schedules.LearningRateSchedule): + """Learning rate schedule follows lr * (step)^power.""" + + def __init__(self, + initial_learning_rate: float, + power: float = 1.0, + name: str = "DirectPowerDecay"): + """Initialize configuration of the learning rate schedule. + + Args: + initial_learning_rate: The initial learning rate. + power: The order of the polynomial. + name: Optional, name of warmup schedule. + """ + super().__init__() + self._initial_learning_rate = initial_learning_rate + self._power = power + self._name = name + + def __call__(self, step): + with tf.name_scope(self._name or "DirectPowerDecay"): + step = tf.cast(step, tf.float32) + learning_rate = self._initial_learning_rate + # A zero `step` may cause Inf. So make `step` positive. + step_non_zero = tf.math.maximum(step, 1.0) + learning_rate *= tf.math.pow(step_non_zero, self._power) + return learning_rate + + def get_config(self): + """Get the configuration of the learning rate schedule.""" + return { + "initial_learning_rate": self._initial_learning_rate, + "power": self._power, + "name": self._name, + } + + +class PowerAndLinearDecay(tf.keras.optimizers.schedules.LearningRateSchedule): + """Learning rate schedule with multiplied by linear decay at the end. + + follows lr * (step)^power for the first total_decay_steps * + (1 - linear_decay_fraction) steps, and follows lr * (step)^power * + (total_decay_steps - step) / (total_decay_steps * linear_decay_fraction) + for the rest of the steps. + """ + + def __init__(self, + initial_learning_rate: float, + total_decay_steps: int, + power: float = 1.0, + linear_decay_fraction: float = 0.1, + name: str = "PowerAndLinearDecay"): + """Initialize configuration of the learning rate schedule. + + Args: + initial_learning_rate: The initial learning rate. + total_decay_steps: The total number of steps for power + linear decay. + power: The order of the polynomial. + linear_decay_fraction: In the last `linear_decay_fraction` steps, + the learning rate will be multiplied by a linear decay. + name: Optional, name of warmup schedule. + """ + super().__init__() + self._initial_learning_rate = initial_learning_rate + self._total_decay_steps = total_decay_steps + self._power = power + self._linear_decay_fraction = linear_decay_fraction + self._name = name + + def __call__(self, step): + with tf.name_scope(self._name or "PowerAndLinearDecay"): + step = tf.cast(step, tf.float32) + learning_rate = self._initial_learning_rate + # A zero `step` may cause Inf. So make `step` positive. + step_non_zero = tf.math.maximum(step, 1.0) + learning_rate *= tf.math.pow(step_non_zero, self._power) + if self._total_decay_steps * self._linear_decay_fraction > 0: + learning_rate *= tf.minimum( + 1.0, (self._total_decay_steps - step) / + (self._total_decay_steps * self._linear_decay_fraction)) + learning_rate = tf.maximum(0.0, learning_rate) + return learning_rate + + def get_config(self): + """Get the configuration of the learning rate schedule.""" + return { + "initial_learning_rate": self._initial_learning_rate, + "total_decay_steps": self._total_decay_steps, + "power": self._power, + "linear_decay_fraction": self._linear_decay_fraction, + "name": self._name, + } + + +class PowerDecayWithOffset(tf.keras.optimizers.schedules.LearningRateSchedule): + """Power learning rate decay with offset. + + Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`. + Otherwise, learning rate equals to lr * (step - offset)^power. + """ + + def __init__(self, + initial_learning_rate: float, + power: float = 1.0, + offset: int = 0, + pre_offset_learning_rate: float = 1.0e6, + name: str = "PowerDecayWithOffset"): + """Initialize configuration of the learning rate schedule. + + Args: + initial_learning_rate: The initial learning rate. + power: The order of the polynomial. + offset: The offset when computing the power decay. + pre_offset_learning_rate: The maximum learning rate we'll use. + name: Optional, name of warmup schedule. + """ + super().__init__() + self._initial_learning_rate = initial_learning_rate + self._power = power + self._offset = offset + self._pre_offset_lr = pre_offset_learning_rate + self._name = name + + def __call__(self, step): + with tf.name_scope(self._name or "PowerDecayWithOffset"): + step = tf.cast(step, tf.float32) + lr_after_offset = tf.math.pow( + tf.math.maximum(step - self._offset, 1.0), self._power) * ( + self._initial_learning_rate) + + sign = tf.cast(step > self._offset, tf.float32) + lr_combined = (1.0 - sign) * self._pre_offset_lr + sign * lr_after_offset + # Power may give infinitely large LR. So cap it with pre_offset_lr. + return tf.math.minimum(lr_combined, self._pre_offset_lr) + + def get_config(self): + """Get the configuration of the learning rate schedule.""" + return { + "initial_learning_rate": self._initial_learning_rate, + "power": self._power, + "offset": self._offset, + "pre_offset_learning_rate": self._pre_offset_lr, + "name": self._name, + } diff --git a/official/modeling/optimization/optimizer_factory.py b/official/modeling/optimization/optimizer_factory.py index c9ac04c42213c1a5904f162f369148ec43b0af82..c5080989642c9dfd14267b18b5051e88ca60ed99 100644 --- a/official/modeling/optimization/optimizer_factory.py +++ b/official/modeling/optimization/optimizer_factory.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,14 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Optimizer factory class.""" -from typing import Union +from typing import Callable, Union +import gin import tensorflow as tf - import tensorflow_addons.optimizers as tfa_optimizers +from official.modeling.optimization import ema_optimizer +from official.modeling.optimization import lars_optimizer from official.modeling.optimization import lr_schedule from official.modeling.optimization.configs import optimization_config as opt_cfg from official.nlp import optimization as nlp_optimization @@ -29,14 +30,19 @@ OPTIMIZERS_CLS = { 'adam': tf.keras.optimizers.Adam, 'adamw': nlp_optimization.AdamWeightDecay, 'lamb': tfa_optimizers.LAMB, - 'rmsprop': tf.keras.optimizers.RMSprop + 'rmsprop': tf.keras.optimizers.RMSprop, + 'lars': lars_optimizer.LARS, + 'adagrad': tf.keras.optimizers.Adagrad, } LR_CLS = { 'stepwise': tf.keras.optimizers.schedules.PiecewiseConstantDecay, 'polynomial': tf.keras.optimizers.schedules.PolynomialDecay, 'exponential': tf.keras.optimizers.schedules.ExponentialDecay, - 'cosine': tf.keras.experimental.CosineDecay + 'cosine': tf.keras.experimental.CosineDecay, + 'power': lr_schedule.DirectPowerDecay, + 'power_linear': lr_schedule.PowerAndLinearDecay, + 'power_with_offset': lr_schedule.PowerDecayWithOffset, } WARMUP_CLS = { @@ -45,7 +51,7 @@ WARMUP_CLS = { } -class OptimizerFactory(object): +class OptimizerFactory: """Optimizer factory class. This class builds learning rate and optimizer based on an optimization config. @@ -88,7 +94,10 @@ class OptimizerFactory(object): self._optimizer_config = config.optimizer.get() self._optimizer_type = config.optimizer.type - if self._optimizer_type is None: + self._use_ema = config.ema is not None + self._ema_config = config.ema + + if self._optimizer_config is None: raise ValueError('Optimizer type must be specified') self._lr_config = config.learning_rate.get() @@ -121,9 +130,12 @@ class OptimizerFactory(object): return lr + @gin.configurable def build_optimizer( - self, lr: Union[tf.keras.optimizers.schedules.LearningRateSchedule, - float]): + self, + lr: Union[tf.keras.optimizers.schedules.LearningRateSchedule, float], + postprocessor: Callable[[tf.keras.optimizers.Optimizer], + tf.keras.optimizers.Optimizer] = None): """Build optimizer. Builds optimizer from config. It takes learning rate as input, and builds @@ -131,15 +143,33 @@ class OptimizerFactory(object): rate built using self.build_lr() is passed as an argument to this method. Args: - lr: A floating point value, or - a tf.keras.optimizers.schedules.LearningRateSchedule instance. + lr: A floating point value, or a + tf.keras.optimizers.schedules.LearningRateSchedule instance. + postprocessor: An optional function for postprocessing the optimizer. It + takes an optimizer and returns an optimizer. + Returns: tf.keras.optimizers.Optimizer instance. """ optimizer_dict = self._optimizer_config.as_dict() + ## Delete clipnorm and clipvalue if None + if optimizer_dict['clipnorm'] is None: + del optimizer_dict['clipnorm'] + if optimizer_dict['clipvalue'] is None: + del optimizer_dict['clipvalue'] + optimizer_dict['learning_rate'] = lr optimizer = OPTIMIZERS_CLS[self._optimizer_type](**optimizer_dict) - return optimizer + if self._use_ema: + optimizer = ema_optimizer.ExponentialMovingAverage( + optimizer, **self._ema_config.as_dict()) + if postprocessor: + optimizer = postprocessor(optimizer) + assert isinstance(optimizer, tf.keras.optimizers.Optimizer), ( + 'OptimizerFactory.build_optimizer returning a non-optimizer object: ' + '{}'.format(optimizer)) + + return optimizer diff --git a/official/modeling/optimization/optimizer_factory_test.py b/official/modeling/optimization/optimizer_factory_test.py index b3218778528eea895fc83c4da59ad5bcccbfa655..aa780be49733a6a1a373f82d608308eba405b9ed 100644 --- a/official/modeling/optimization/optimizer_factory_test.py +++ b/official/modeling/optimization/optimizer_factory_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,11 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for optimizer_factory.py.""" +"""Tests for optimizer_factory.py.""" from absl.testing import parameterized - +import numpy as np import tensorflow as tf from official.modeling.optimization import optimizer_factory @@ -25,12 +23,8 @@ from official.modeling.optimization.configs import optimization_config class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.parameters( - ('sgd'), - ('rmsprop'), - ('adam'), - ('adamw'), - ('lamb')) + @parameterized.parameters(('sgd'), ('rmsprop'), ('adam'), ('adamw'), ('lamb'), + ('lars'), ('adagrad')) def test_optimizers(self, optimizer_type): params = { 'optimizer': { @@ -50,26 +44,63 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() - optimizer = opt_factory.build_optimizer(lr) + optimizer = opt_factory.build_optimizer(lr, postprocessor=lambda x: x) self.assertIsInstance(optimizer, optimizer_cls) self.assertEqual(expected_optimizer_config, optimizer.get_config()) - def test_missing_types(self): + @parameterized.parameters((None, None), (1.0, None), (None, 1.0)) + def test_gradient_clipping(self, clipnorm, clipvalue): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'clipnorm': clipnorm, + 'clipvalue': clipvalue + } + }, + 'learning_rate': { + 'type': 'constant', + 'constant': { + 'learning_rate': 1.0 + } } } + + opt_config = optimization_config.OptimizationConfig(params) + opt_factory = optimizer_factory.OptimizerFactory(opt_config) + lr = opt_factory.build_learning_rate() + optimizer = opt_factory.build_optimizer(lr) + + var0 = tf.Variable([1.0, 2.0]) + var1 = tf.Variable([3.0, 4.0]) + + grads0 = tf.constant([0.1, 0.1]) + grads1 = tf.constant([2.0, 3.0]) + + grads_and_vars = list(zip([grads0, grads1], [var0, var1])) + optimizer.apply_gradients(grads_and_vars) + + self.assertAllClose(np.array([0.9, 1.9]), var0.numpy()) + if clipvalue is not None: + self.assertAllClose(np.array([2.0, 3.0]), var1.numpy()) + elif clipnorm is not None: + self.assertAllClose(np.array([2.4452999, 3.1679497]), var1.numpy()) + else: + self.assertAllClose(np.array([1.0, 1.0]), var1.numpy()) + + def test_missing_types(self): + params = {'optimizer': {'type': 'sgd', 'sgd': {'momentum': 0.9}}} with self.assertRaises(ValueError): optimizer_factory.OptimizerFactory( optimization_config.OptimizationConfig(params)) params = { 'learning_rate': { 'type': 'stepwise', - 'stepwise': {'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001]} + 'stepwise': { + 'boundaries': [10000, 20000], + 'values': [0.1, 0.01, 0.001] + } } } with self.assertRaises(ValueError): @@ -80,22 +111,20 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'momentum': 0.9 + } }, 'learning_rate': { 'type': 'stepwise', - 'stepwise': {'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001]} + 'stepwise': { + 'boundaries': [10000, 20000], + 'values': [0.1, 0.01, 0.001] + } } } - expected_lr_step_values = [ - [0, 0.1], - [5000, 0.1], - [10000, 0.1], - [10001, 0.01], - [20000, 0.01], - [20001, 0.001] - ] + expected_lr_step_values = [[0, 0.1], [5000, 0.1], [10000, 0.1], + [10001, 0.01], [20000, 0.01], [20001, 0.001]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() @@ -107,28 +136,28 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'momentum': 0.9 + } }, 'learning_rate': { 'type': 'stepwise', - 'stepwise': {'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001]} + 'stepwise': { + 'boundaries': [10000, 20000], + 'values': [0.1, 0.01, 0.001] + } }, 'warmup': { 'type': 'linear', - 'linear': {'warmup_steps': 500, 'warmup_learning_rate': 0.01} + 'linear': { + 'warmup_steps': 500, + 'warmup_learning_rate': 0.01 + } } } - expected_lr_step_values = [ - [0, 0.01], - [250, 0.055], - [500, 0.1], - [5500, 0.1], - [10000, 0.1], - [10001, 0.01], - [20000, 0.01], - [20001, 0.001] - ] + expected_lr_step_values = [[0, 0.01], [250, 0.055], [500, 0.1], [5500, 0.1], + [10000, 0.1], [10001, 0.01], [20000, 0.01], + [20001, 0.001]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() @@ -140,7 +169,9 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'momentum': 0.9 + } }, 'learning_rate': { 'type': 'exponential', @@ -170,7 +201,9 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'momentum': 0.9 + } }, 'learning_rate': { 'type': 'polynomial', @@ -194,7 +227,9 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'momentum': 0.9 + } }, 'learning_rate': { 'type': 'cosine', @@ -204,11 +239,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): } } } - expected_lr_step_values = [[0, 0.1], - [250, 0.08535534], - [500, 0.04999999], - [750, 0.01464466], - [1000, 0]] + expected_lr_step_values = [[0, 0.1], [250, 0.08535534], [500, 0.04999999], + [750, 0.01464466], [1000, 0]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() @@ -220,7 +252,9 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'momentum': 0.9 + } }, 'learning_rate': { 'type': 'constant', @@ -250,28 +284,52 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): params = { 'optimizer': { 'type': 'sgd', - 'sgd': {'momentum': 0.9} + 'sgd': { + 'momentum': 0.9 + } }, 'learning_rate': { 'type': 'stepwise', - 'stepwise': {'boundaries': [10000, 20000], - 'values': [0.1, 0.01, 0.001]} + 'stepwise': { + 'boundaries': [10000, 20000], + 'values': [0.1, 0.01, 0.001] + } }, 'warmup': { 'type': 'polynomial', - 'polynomial': {'warmup_steps': 500, 'power': 2.} + 'polynomial': { + 'warmup_steps': 500, + 'power': 2. + } } } - expected_lr_step_values = [ - [0, 0.0], - [250, 0.025], - [500, 0.1], - [5500, 0.1], - [10000, 0.1], - [10001, 0.01], - [20000, 0.01], - [20001, 0.001] - ] + expected_lr_step_values = [[0, 0.0], [250, 0.025], [500, 0.1], [5500, 0.1], + [10000, 0.1], [10001, 0.01], [20000, 0.01], + [20001, 0.001]] + opt_config = optimization_config.OptimizationConfig(params) + opt_factory = optimizer_factory.OptimizerFactory(opt_config) + lr = opt_factory.build_learning_rate() + + for step, value in expected_lr_step_values: + self.assertAlmostEqual(lr(step).numpy(), value, places=6) + + def test_power_lr_schedule(self): + params = { + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'power', + 'power': { + 'initial_learning_rate': 1.0, + 'power': -1.0 + } + } + } + expected_lr_step_values = [[0, 1.0], [1, 1.0], [250, 1. / 250.]] opt_config = optimization_config.OptimizationConfig(params) opt_factory = optimizer_factory.OptimizerFactory(opt_config) lr = opt_factory.build_learning_rate() @@ -279,5 +337,59 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): for step, value in expected_lr_step_values: self.assertAlmostEqual(lr(step).numpy(), value) + def test_power_linear_lr_schedule(self): + params = { + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'power_linear', + 'power_linear': { + 'initial_learning_rate': 1.0, + 'power': -1.0, + 'linear_decay_fraction': 0.5, + 'total_decay_steps': 100, + } + } + } + expected_lr_step_values = [[0, 1.0], [1, 1.0], [40, 1. / 40.], + [60, 1. / 60. * 0.8]] + opt_config = optimization_config.OptimizationConfig(params) + opt_factory = optimizer_factory.OptimizerFactory(opt_config) + lr = opt_factory.build_learning_rate() + + for step, value in expected_lr_step_values: + self.assertAlmostEqual(lr(step).numpy(), value) + + def test_power_with_offset_lr_schedule(self): + params = { + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'power_with_offset', + 'power_with_offset': { + 'initial_learning_rate': 1.0, + 'power': -1.0, + 'offset': 10, + 'pre_offset_learning_rate': 3.0, + } + } + } + expected_lr_step_values = [[1, 3.0], [10, 3.0], [20, 1. / 10.]] + opt_config = optimization_config.OptimizationConfig(params) + opt_factory = optimizer_factory.OptimizerFactory(opt_config) + lr = opt_factory.build_learning_rate() + + for step, value in expected_lr_step_values: + self.assertAlmostEqual(lr(step).numpy(), value) + + if __name__ == '__main__': tf.test.main() diff --git a/official/modeling/performance.py b/official/modeling/performance.py index 4b264f53256db66326ee4e51c5a29676e273eca9..e4c6225b87d228aa596db0691686915596b3dd63 100644 --- a/official/modeling/performance.py +++ b/official/modeling/performance.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,45 +11,75 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Functions and classes related to training performance.""" +from absl import logging import tensorflow as tf def configure_optimizer(optimizer, use_float16=False, use_graph_rewrite=False, - loss_scale="dynamic"): + loss_scale='dynamic', + use_experimental_api=False): """Configures optimizer object with performance options.""" + if use_experimental_api: + logging.warning('Passing use_experimental_api=True is deprecated. The ' + 'argument will be removed in the future.') if use_float16: - # Wraps optimizer with a LossScaleOptimizer. This is done automatically - # in compile() with the "mixed_float16" policy, but since we do not call - # compile(), we must wrap the optimizer manually. - optimizer = ( - tf.keras.mixed_precision.experimental.LossScaleOptimizer( - optimizer, loss_scale=loss_scale)) + # TODO(b/171936854): Move all methods to non-experimental api. + if use_experimental_api: + # Wraps optimizer with a LossScaleOptimizer. This is done automatically + # in compile() with the "mixed_float16" policy, but since we do not call + # compile(), we must wrap the optimizer manually. + optimizer = ( + tf.keras.mixed_precision.experimental.LossScaleOptimizer( + optimizer, loss_scale=loss_scale)) + elif loss_scale == 'dynamic': + optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer) + else: + # loss_scale is a number. We interpret that as a fixed loss scale. + optimizer = tf.keras.mixed_precision.LossScaleOptimizer( + optimizer, dynamic=False, initial_scale=loss_scale) if use_graph_rewrite: # Note: the model dtype must be 'float32', which will ensure - # tf.ckeras.mixed_precision and - # tf.train.experimental.enable_mixed_precision_graph_rewrite do not double - # up. - optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite( - optimizer) + # tf.keras.mixed_precision and enable_mixed_precision_graph_rewrite do not + # double up. + optimizer = ( + tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite( + optimizer)) return optimizer -def set_mixed_precision_policy(dtype, loss_scale=None): +def set_mixed_precision_policy(dtype, loss_scale=None, + use_experimental_api=False): """Sets mix precision policy.""" + if use_experimental_api: + logging.warning('Passing use_experimental_api=True is deprecated. The ' + 'argument will be removed in the future.') + assert use_experimental_api or loss_scale is None, ( + 'loss_scale cannot be specified if use_experimental_api is False. If the ' + 'non-experimental API is used, specify the loss scaling configuration ' + 'when creating the LossScaleOptimizer instead.' + ) if dtype == tf.float16: - policy = tf.keras.mixed_precision.experimental.Policy( - 'mixed_float16', loss_scale=loss_scale) - tf.keras.mixed_precision.experimental.set_policy(policy) + # TODO(b/171936854): Move all methods to non-experimental api. + if use_experimental_api: + policy = tf.keras.mixed_precision.experimental.Policy( + 'mixed_float16', loss_scale=loss_scale) + tf.keras.mixed_precision.experimental.set_policy(policy) + else: + tf.keras.mixed_precision.set_global_policy('mixed_float16') elif dtype == tf.bfloat16: - policy = tf.keras.mixed_precision.experimental.Policy( - 'mixed_bfloat16') - tf.keras.mixed_precision.experimental.set_policy(policy) + if use_experimental_api: + tf.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') + else: + tf.keras.mixed_precision.set_global_policy('mixed_bfloat16') elif dtype == tf.float32: - tf.keras.mixed_precision.experimental.set_policy('float32') + if use_experimental_api: + tf.keras.mixed_precision.experimental.set_policy('float32') + else: + tf.keras.mixed_precision.set_global_policy('float32') else: - raise ValueError("Unexpected dtype: %s" % dtype) + raise ValueError('Unexpected dtype: %s' % dtype) diff --git a/official/modeling/progressive/policies.py b/official/modeling/progressive/policies.py new file mode 100644 index 0000000000000000000000000000000000000000..cf9598d9787e079f6fb14b43a674560a1dfd9659 --- /dev/null +++ b/official/modeling/progressive/policies.py @@ -0,0 +1,173 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base ProgressivePolicy definition for progressive training. + +To write a progressive model, subclass ProgressivePolicy and implement its +abstract methods to handle each training stage. +""" + +import abc +from typing import Any, Mapping +from absl import logging +import dataclasses +import six +import tensorflow as tf +from official.modeling.hyperparams import base_config +from official.modeling.progressive import utils + + +@dataclasses.dataclass +class ProgressiveConfig(base_config.Config): + pass + + +@six.add_metaclass(abc.ABCMeta) +class ProgressivePolicy: + """The APIs for handling progressive training stages. + + Attributes: + cur_model: The model for the current progressive training stage. + cur_train_dataset: The train dataset function for the current stage. + cur_eval_dataset: The eval dataset function for the current stage. + cur_optimizer: The optimizer for the current stage. + cur_checkpoint_items: Items to be saved in and restored from checkpoints, + for the progressive trainer. + is_last_stage: Whether it is currently in the last stage. + + Interfaces: + is_stage_advancing: Returns if progressive training is advancing to the + next stage. + update_pt_stage: Update progressive training stage. + """ + + def __init__(self): + """Initialize stage policy.""" + self._cur_train_dataset = None + self._cur_eval_dataset = None + self._volatiles = utils.VolatileTrackable(optimizer=None, model=None) + + stage_id = 0 + self._stage_id = tf.Variable( + stage_id, + trainable=False, + dtype=tf.int64, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, + shape=[]) + self._volatiles.reassign_trackable( + optimizer=self.get_optimizer(stage_id), + model=self.get_model(stage_id, old_model=None)) + + def compute_stage_id(self, global_step: int) -> int: + for stage_id in range(self.num_stages()): + global_step -= self.num_steps(stage_id) + if global_step < 0: + return stage_id + logging.error('Global step %d found no matching progressive stages. ' + 'Default to the last stage.', global_step) + return self.num_stages() - 1 + + @abc.abstractmethod + def num_stages(self) -> int: + """Return the total number of progressive stages.""" + pass + + @abc.abstractmethod + def num_steps(self, stage_id: int) -> int: + """Return the total number of steps in this stage.""" + pass + + @abc.abstractmethod + def get_model(self, + stage_id: int, + old_model: tf.keras.Model = None) -> tf.keras.Model: + """Return model for this stage. For initialization, `old_model` = None.""" + pass + + @abc.abstractmethod + def get_optimizer(self, stage_id: int) -> tf.keras.optimizers.Optimizer: + """Return optimizer for this stage.""" + pass + + @abc.abstractmethod + def get_train_dataset(self, stage_id: int) -> tf.data.Dataset: + """Return training Dataset for this stage.""" + pass + + @abc.abstractmethod + def get_eval_dataset(self, stage_id: int) -> tf.data.Dataset: + """Return evaluation Dataset for this stage.""" + pass + + @property + def cur_model(self) -> tf.keras.Model: + return self._volatiles.model + + @property + def cur_train_dataset(self) -> tf.data.Dataset: + if self._cur_train_dataset is None: + self._cur_train_dataset = self.get_train_dataset(self._stage_id.numpy()) + return self._cur_train_dataset + + @property + def cur_eval_dataset(self) -> tf.data.Dataset: + if self._cur_eval_dataset is None: + self._cur_eval_dataset = self.get_eval_dataset(self._stage_id.numpy()) + return self._cur_eval_dataset + + @property + def cur_optimizer(self) -> tf.keras.optimizers.Optimizer: + return self._volatiles.optimizer + + @property + def is_last_stage(self) -> bool: + stage_id = self._stage_id.numpy() + return stage_id >= self.num_stages() - 1 + + @property + def cur_checkpoint_items(self) -> Mapping[str, Any]: + return dict(stage_id=self._stage_id, volatiles=self._volatiles) + + def is_stage_advancing(self, global_step: int) -> bool: + old_stage_id = self._stage_id.numpy() + new_stage_id = self.compute_stage_id(global_step) + return old_stage_id != new_stage_id + + def update_pt_stage(self, global_step: int, pass_old_model=True) -> None: + """Update progressive training internal status. + + Call this after a training loop ends. + + Args: + global_step: an integer scalar of the current global step. + pass_old_model: whether to pass the old_model to get_model() function. + This is set to False if the old_model is irrelevant (e.g, just a default + model from stage 0). + """ + old_stage_id = self._stage_id.numpy() + new_stage_id = self.compute_stage_id(global_step) + logging.info('Switching stage from %d to %d', old_stage_id, new_stage_id) + + # Update stage id. + self._stage_id.assign(new_stage_id) + # Update dataset function. + self._cur_train_dataset = None + self._cur_eval_dataset = None + + # Update optimizer and model. + new_optimizer = self.get_optimizer(new_stage_id) + self._volatiles.reassign_trackable(optimizer=new_optimizer) + new_model = self.get_model( + new_stage_id, old_model=self.cur_model if pass_old_model else None) + self._volatiles.reassign_trackable(model=new_model) diff --git a/official/modeling/progressive/train.py b/official/modeling/progressive/train.py new file mode 100644 index 0000000000000000000000000000000000000000..d30ffe3059d7c009f0884a8cafe7a28048000c0a --- /dev/null +++ b/official/modeling/progressive/train.py @@ -0,0 +1,69 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFM binary for the progressive trainer.""" + +from absl import app +from absl import flags +import gin + +from official.common import distribute_utils +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_utils +from official.modeling import performance +from official.modeling.progressive import train_lib + +FLAGS = flags.FLAGS + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu, + **params.runtime.model_parallelism()) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + train_utils.save_gin_config(FLAGS.mode, model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/modeling/progressive/train_lib.py b/official/modeling/progressive/train_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..a096aebf1492602b4903db764ee3a447346c833f --- /dev/null +++ b/official/modeling/progressive/train_lib.py @@ -0,0 +1,126 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFM progressive training driver library. + +Compared to the common training driver, the only difference is that we use +prog_trainer_lib.ProgressiveTrainer instead of the base trainer. +""" + +# pytype: disable=attribute-error +import os +from typing import Any, Mapping, Tuple + +# Import libraries +from absl import logging +import orbit +import tensorflow as tf +from official.core import base_task +from official.core import config_definitions +from official.core import train_lib as base_train_lib +from official.modeling.progressive import trainer as prog_trainer_lib + + +def run_experiment(distribution_strategy: tf.distribute.Strategy, + task: base_task.Task, + mode: str, + params: config_definitions.ExperimentConfig, + model_dir: str, + run_post_eval: bool = False, + save_summary: bool = True) \ +-> Tuple[tf.keras.Model, Mapping[str, Any]]: + """Runs train/eval configured by the experiment params. + + Args: + distribution_strategy: A distribution distribution_strategy. + task: A Task instance. + mode: A 'str', specifying the mode. Can be 'train', 'eval', 'train_and_eval' + or 'continuous_eval'. + params: ExperimentConfig instance. + model_dir: A 'str', a path to store model checkpoints and summaries. + run_post_eval: Whether to run post eval once after training, metrics logs + are returned. + save_summary: Whether to save train and validation summary. + + Returns: + A 2-tuple of (model, eval_logs). + model: `tf.keras.Model` instance. + eval_logs: returns eval metrics logs when run_post_eval is set to True, + otherwise, returns {}. + """ + + with distribution_strategy.scope(): + logging.info('Running progressive trainer.') + trainer = prog_trainer_lib.ProgressiveTrainer( + params, task, ckpt_dir=model_dir, + train='train' in mode, + evaluate=('eval' in mode) or run_post_eval, + checkpoint_exporter=base_train_lib.maybe_create_best_ckpt_exporter( + params, model_dir)) + + if trainer.checkpoint: + checkpoint_manager = tf.train.CheckpointManager( + trainer.checkpoint, + directory=model_dir, + max_to_keep=params.trainer.max_to_keep, + step_counter=trainer.global_step, + checkpoint_interval=params.trainer.checkpoint_interval, + init_fn=trainer.initialize) + else: + checkpoint_manager = None + + controller = orbit.Controller( + strategy=distribution_strategy, + trainer=trainer if 'train' in mode else None, + evaluator=trainer, + global_step=trainer.global_step, + steps_per_loop=params.trainer.steps_per_loop, + checkpoint_manager=checkpoint_manager, + summary_dir=os.path.join(model_dir, 'train') if (save_summary) else None, + eval_summary_dir=os.path.join(model_dir, 'validation') if + (save_summary) else None, + summary_interval=params.trainer.summary_interval if + (save_summary) else None) + + logging.info('Starts to execute mode: %s', mode) + with distribution_strategy.scope(): + if mode == 'train': + controller.train(steps=params.trainer.train_steps) + elif mode == 'train_and_eval': + controller.train_and_evaluate( + train_steps=params.trainer.train_steps, + eval_steps=params.trainer.validation_steps, + eval_interval=params.trainer.validation_interval) + elif mode == 'eval': + controller.evaluate(steps=params.trainer.validation_steps) + elif mode == 'continuous_eval': + + def timeout_fn(): + if trainer.global_step.numpy() >= params.trainer.train_steps: + return True + return False + + controller.evaluate_continuously( + steps=params.trainer.validation_steps, + timeout=params.trainer.continuous_eval_timeout, + timeout_fn=timeout_fn) + else: + raise NotImplementedError('The mode is not implemented: %s' % mode) + + if run_post_eval: + with distribution_strategy.scope(): + return trainer.model, trainer.evaluate( + tf.convert_to_tensor(params.trainer.validation_steps)) + else: + return trainer.model, {} diff --git a/official/modeling/progressive/train_lib_test.py b/official/modeling/progressive/train_lib_test.py new file mode 100644 index 0000000000000000000000000000000000000000..178fc6df7b3a50dd0097535673d3435d42625153 --- /dev/null +++ b/official/modeling/progressive/train_lib_test.py @@ -0,0 +1,183 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the progressive train_lib.""" +import os + +from absl import flags +from absl.testing import parameterized +import dataclasses +import orbit +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.common import flags as tfm_flags +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.core import config_definitions as cfg +from official.core import task_factory +from official.modeling import optimization +from official.modeling.hyperparams import params_dict +from official.modeling.progressive import policies +from official.modeling.progressive import train_lib +from official.modeling.progressive import trainer as prog_trainer_lib +from official.utils.testing import mock_task + +FLAGS = flags.FLAGS + +tfm_flags.define_flags() + + +@dataclasses.dataclass +class ProgTaskConfig(cfg.TaskConfig): + pass + + +@task_factory.register_task_cls(ProgTaskConfig) +class ProgMockTask(policies.ProgressivePolicy, mock_task.MockTask): + """Progressive task for testing.""" + + def __init__(self, params: cfg.TaskConfig, logging_dir: str = None): + mock_task.MockTask.__init__( + self, params=params, logging_dir=logging_dir) + policies.ProgressivePolicy.__init__(self) + + def num_stages(self): + return 2 + + def num_steps(self, stage_id): + return 2 if stage_id == 0 else 4 + + def get_model(self, stage_id, old_model=None): + del stage_id, old_model + return self.build_model() + + def get_optimizer(self, stage_id): + """Build optimizer for each stage.""" + params = optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'adamw', + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 0.01, + 'end_learning_rate': 0.0, + 'power': 1.0, + 'decay_steps': 10, + }, + }, + 'warmup': { + 'polynomial': { + 'power': 1, + 'warmup_steps': 2, + }, + 'type': 'polynomial', + } + }) + opt_factory = optimization.OptimizerFactory(params) + optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate()) + + return optimizer + + def get_train_dataset(self, stage_id): + del stage_id + strategy = tf.distribute.get_strategy() + return orbit.utils.make_distributed_dataset( + strategy, self.build_inputs, None) + + def get_eval_dataset(self, stage_id): + del stage_id + strategy = tf.distribute.get_strategy() + return orbit.utils.make_distributed_dataset( + strategy, self.build_inputs, None) + + +class TrainTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(TrainTest, self).setUp() + self._test_config = { + 'trainer': { + 'checkpoint_interval': 10, + 'steps_per_loop': 10, + 'summary_interval': 10, + 'train_steps': 10, + 'validation_steps': 5, + 'validation_interval': 10, + 'continuous_eval_timeout': 1, + 'optimizer_config': { + 'optimizer': { + 'type': 'sgd', + }, + 'learning_rate': { + 'type': 'constant' + } + } + }, + } + + @combinations.generate( + combinations.combine( + distribution_strategy=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + flag_mode=['train', 'eval', 'train_and_eval'], + run_post_eval=[True, False])) + def test_end_to_end(self, distribution_strategy, flag_mode, run_post_eval): + model_dir = self.get_temp_dir() + experiment_config = cfg.ExperimentConfig( + trainer=prog_trainer_lib.ProgressiveTrainerConfig(), + task=ProgTaskConfig()) + experiment_config = params_dict.override_params_dict( + experiment_config, self._test_config, is_strict=False) + + with distribution_strategy.scope(): + task = task_factory.get_task(experiment_config.task, + logging_dir=model_dir) + + _, logs = train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=flag_mode, + params=experiment_config, + model_dir=model_dir, + run_post_eval=run_post_eval) + + if run_post_eval: + self.assertNotEmpty(logs) + else: + self.assertEmpty(logs) + + if flag_mode == 'eval': + return + self.assertNotEmpty( + tf.io.gfile.glob(os.path.join(model_dir, 'checkpoint'))) + # Tests continuous evaluation. + _, logs = train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode='continuous_eval', + params=experiment_config, + model_dir=model_dir, + run_post_eval=run_post_eval) + print(logs) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/modeling/progressive/trainer.py b/official/modeling/progressive/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..d294078873ff78b2728a77c130ed0cd710153132 --- /dev/null +++ b/official/modeling/progressive/trainer.py @@ -0,0 +1,273 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Progressive Trainer implementation. + +The trainer implements the Orbit `StandardTrainable` and +`StandardEvaluable` interfaces. Trainers inside this project should be +interchangable and independent on model architectures and tasks. +""" +import os +from typing import Any, Optional + +# Import libraries +from absl import logging + +import dataclasses +import gin +import orbit +import tensorflow as tf +from official.core import base_task +from official.core import base_trainer as trainer_lib +from official.core import config_definitions +from official.modeling.progressive import policies +from official.modeling.progressive import utils + +ExperimentConfig = config_definitions.ExperimentConfig + + +@dataclasses.dataclass +class ProgressiveTrainerConfig(config_definitions.TrainerConfig): + """Configuration for progressive trainer. + + Attributes: + progressive: A task-specific config. Users can subclass ProgressiveConfig + and define any task-specific settings in their subclass. + export_checkpoint: A bool. Whether to export checkpoints in non-progressive + manner (without the volatiles wrapper) such that your down-stream tasks + can load checkpoints from a progressive trainer as if it is a regular + checkpoint. + export_checkpoint_interval: A bool. The number of steps between exporting + checkpoints. If None (by default), will use the same value as + TrainerConfig.checkpoint_interval. + export_only_final_stage_ckpt: A bool. Whether to just export checkpoints + during the final progressive training stage. In other words, whether to + not export small, partial models. In many cases, it is not meaningful to + finetune a small, partial model in down-stream tasks. + """ + progressive: Optional[policies.ProgressiveConfig] = None + export_checkpoint: bool = True + export_checkpoint_interval: Optional[int] = None + export_only_final_stage_ckpt: bool = True + + +@gin.configurable +class ProgressiveTrainer(trainer_lib.Trainer): + """Implements the progressive trainer shared for TensorFlow models.""" + + def __init__( + self, + config: ExperimentConfig, + prog_task: base_task.Task, # also implemented ProgressivePolicy. + ckpt_dir: str = '', + train: bool = True, + evaluate: bool = True, + checkpoint_exporter: Any = None): + """Initialize common trainer for TensorFlow models. + + Args: + config: An `ExperimentConfig` instance specifying experiment config. + prog_task: An instance both implemented policies.ProgressivePolicy and + base_task.Task. + ckpt_dir: Checkpoint directory. + train: bool, whether or not this trainer will be used for training. + default to True. + evaluate: bool, whether or not this trainer will be used for evaluation. + default to True. + checkpoint_exporter: an object that has the `maybe_export_checkpoint` + interface. + """ + # Gets the current distribution strategy. If not inside any strategy scope, + # it gets a single-replica no-op strategy. + self._strategy = tf.distribute.get_strategy() + self._config = config + self._runtime_options = trainer_lib.get_runtime_options(config) + self._task = prog_task + + # Directory for non-progressive checkpoint + self._export_ckpt_dir = os.path.join(ckpt_dir, 'exported_ckpts') + tf.io.gfile.makedirs(self._export_ckpt_dir) + + # Receive other checkpoint export, e.g, best checkpoint exporter. + # TODO(lehou): unify the checkpoint exporting logic, although the default + # setting does not use checkpoint_exporter. + self._checkpoint_exporter = checkpoint_exporter + + self._global_step = orbit.utils.create_global_step() + + self._checkpoint = utils.CheckpointWithHooks( + before_load_hook=self._update_pt_stage_from_ckpt, + global_step=self.global_step, + **self._task.cur_checkpoint_items) + + self._train_loss = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) + self._validation_loss = tf.keras.metrics.Mean( + 'validation_loss', dtype=tf.float32) + self._train_metrics = self.task.build_metrics( + training=True) + self.model.metrics + self._validation_metrics = self.task.build_metrics( + training=False) + self.model.metrics + + if train: + orbit.StandardTrainer.__init__( + self, + None, # Manage train_dataset by ourselves, not by StandardTrainer. + options=orbit.StandardTrainerOptions( + use_tf_while_loop=config.trainer.train_tf_while_loop, + use_tf_function=config.trainer.train_tf_function)) + + if evaluate: + orbit.StandardEvaluator.__init__( + self, + None, # Manage train_dataset by ourselves, not by StandardEvaluator. + options=orbit.StandardEvaluatorOptions( + use_tf_function=config.trainer.eval_tf_function)) + + @property + def model(self): + return self._task.cur_model + + @property + def optimizer(self): + return self._task.cur_optimizer + + # override + @property + def train_dataset(self): + """Overriding StandardTrainer.train_dataset.""" + return self._task.cur_train_dataset + + # override + @train_dataset.setter + def train_dataset(self, _): + raise SyntaxError('Please do not set train_dataset. Progressive training ' + 'relies on progressive policy to manager train dataset.') + + # override + @property + def eval_dataset(self): + """Overriding StandardEvaluator.eval_dataset.""" + return self._task.cur_eval_dataset + + # override + @eval_dataset.setter + def eval_dataset(self, _): + raise SyntaxError('Please do not set eval_dataset. Progressive training ' + 'relies on progressive policy to manager eval dataset.') + + def train_loop_end(self): + """See base class.""" + logs = {} + for metric in self.train_metrics + [self.train_loss]: + logs[metric.name] = metric.result() + metric.reset_states() + if callable(self.optimizer.learning_rate): + logs['learning_rate'] = self.optimizer.learning_rate( + self.optimizer.iterations) + else: + logs['learning_rate'] = self.optimizer.learning_rate + + self._maybe_export_non_progressive_checkpoint(self._export_ckpt_dir) + if self._task.is_stage_advancing(self.global_step.numpy()): + old_train_dataset = self.train_dataset + + # Update progressive properties + self._task.update_pt_stage(self.global_step.numpy()) + + # Setting `self._train_loop_fn` and `self._eval_loop_fn` to None will + # rebuild the train and eval functions with the updated model. + self._train_loop_fn = None + self._eval_loop_fn = None + + if self.train_dataset != old_train_dataset: + # Setting `self._train_iter` to None will rebuild the dataset iterator. + self._train_iter = None + + return logs + + def _update_pt_stage_from_ckpt(self, ckpt_file): + """Update stage properties based on the global_step variable in a ckpt file. + + Before loading variables from a checkpoint file, we need to go to the + correct stage and build corresponding model and optimizer, to make sure that + we retore variables of the right model and optimizer. + + Args: + ckpt_file: Checkpoint file that will be restored/read from. + """ + if not ckpt_file: + return + ckpt = tf.train.Checkpoint(global_step=self.global_step) + ckpt.read(ckpt_file).expect_partial().assert_existing_objects_matched() + + if self._task.is_stage_advancing(self.global_step.numpy()): + old_train_dataset = self.train_dataset + + # Update progressive properties + self._task.update_pt_stage(self.global_step.numpy(), pass_old_model=False) + + # Setting `self._train_loop_fn` and `self._eval_loop_fn` to None will + # rebuild the train and eval functions with the updated model. + self._train_loop_fn = None + self._eval_loop_fn = None + + if self.train_dataset != old_train_dataset: + # Setting `self._train_iter` to None will rebuild the dataset iterator. + self._train_iter = None + + def _maybe_export_non_progressive_checkpoint(self, export_ckpt_dir): + """Export checkpoints in non-progressive format. + + This basically removes the wrapping of self._task.cur_checkpoint_items + -- just save the model, optimizer, etc., directly. + The purpose is to let your down-stream tasks to use these checkpoints. + + Args: + export_ckpt_dir: A str. folder of exported checkpoints. + """ + if not self.config.trainer.export_checkpoint: + logging.info('Not exporting checkpoints.') + return + if not self._task.is_last_stage and ( + self.config.trainer.export_only_final_stage_ckpt): + logging.info('Not exporting checkpoints until the last stage.') + return + + global_step_np = self.global_step.numpy() + if self.config.trainer.export_checkpoint_interval is None: + step_interval = self.config.trainer.checkpoint_interval + else: + step_interval = self.config.trainer.export_checkpoint_interval + if global_step_np % step_interval != 0 and ( + global_step_np < self._config.trainer.train_steps): + logging.info('Not exporting checkpoints in global step: %d.', + global_step_np) + return + + # Create a checkpoint object just now, to make sure we use + # progressive_policy.cur_model and progressive_policy.cur_optimizer of the + # current stage. + if hasattr(self.model, 'checkpoint_items'): + checkpoint_items = self.model.checkpoint_items + else: + checkpoint_items = {} + checkpoint = tf.train.Checkpoint( + global_step=self.global_step, + model=self.model, + optimizer=self.optimizer, + **checkpoint_items) + file_prefix = os.path.join(export_ckpt_dir, + 'ckpt-{}'.format(global_step_np)) + checkpoint.save(file_prefix=file_prefix) + logging.info('Checkpoints exported: %s.', file_prefix) diff --git a/official/modeling/progressive/trainer_test.py b/official/modeling/progressive/trainer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7d4ab3e22e042cdffc8e175b534db935daf8c849 --- /dev/null +++ b/official/modeling/progressive/trainer_test.py @@ -0,0 +1,238 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the progressive trainer.""" +# pylint: disable=g-direct-tensorflow-import +import os + +from absl.testing import parameterized +import orbit +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.core import config_definitions as cfg +from official.modeling import optimization +from official.modeling.progressive import policies +from official.modeling.progressive import trainer as trainer_lib +from official.nlp.configs import bert +from official.utils.testing import mock_task + + +def all_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ],) + + +def get_exp_config(): + return cfg.ExperimentConfig( + task=cfg.TaskConfig( + model=bert.PretrainerConfig()), + trainer=trainer_lib.ProgressiveTrainerConfig( + export_checkpoint=True, + export_checkpoint_interval=1, + export_only_final_stage_ckpt=False)) + + +class TestPolicy(policies.ProgressivePolicy, mock_task.MockTask): + """Just for testing purposes.""" + + def __init__(self, strategy, task_config, change_train_dataset=True): + self._strategy = strategy + self._change_train_dataset = change_train_dataset + self._my_train_dataset = None + mock_task.MockTask.__init__(self, params=task_config, logging_dir=None) + policies.ProgressivePolicy.__init__(self) + + def num_stages(self) -> int: + return 2 + + def num_steps(self, stage_id: int) -> int: + return 2 if stage_id == 0 else 4 + + def get_model(self, + stage_id: int, + old_model: tf.keras.Model) -> tf.keras.Model: + del stage_id, old_model + return self.build_model() + + def get_optimizer(self, stage_id: int) -> tf.keras.optimizers.Optimizer: + optimizer_type = 'sgd' if stage_id == 0 else 'adamw' + optimizer_config = cfg.OptimizationConfig({ + 'optimizer': {'type': optimizer_type}, + 'learning_rate': {'type': 'constant'}}) + opt_factory = optimization.OptimizerFactory(optimizer_config) + return opt_factory.build_optimizer(opt_factory.build_learning_rate()) + + def get_train_dataset(self, stage_id: int) -> tf.data.Dataset: + if not self._change_train_dataset and self._my_train_dataset: + return self._my_train_dataset + if self._strategy: + self._my_train_dataset = orbit.utils.make_distributed_dataset( + self._strategy, + self._build_inputs, + stage_id) + else: + self._my_train_dataset = self._build_inputs(stage_id) + return self._my_train_dataset + + def get_eval_dataset(self, stage_id: int) -> tf.data.Dataset: + if self._strategy: + return orbit.utils.make_distributed_dataset( + self._strategy, + self._build_inputs, + stage_id) + return self._build_inputs(stage_id) + + def _build_inputs(self, stage_id): + def dummy_data(_): + batch_size = 2 if stage_id == 0 else 1 + x = tf.zeros(shape=(batch_size, 2), dtype=tf.float32) + label = tf.zeros(shape=(batch_size, 1), dtype=tf.float32) + return x, label + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + return dataset.map( + dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + + +class TrainerTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(TrainerTest, self).setUp() + self._config = get_exp_config() + + def create_test_trainer(self, distribution, model_dir, change_train_dataset): + trainer = trainer_lib.ProgressiveTrainer( + self._config, + prog_task=TestPolicy( + distribution, self._config.task, change_train_dataset), + ckpt_dir=model_dir) + return trainer + + @combinations.generate(all_strategy_combinations()) + def test_checkpointing(self, distribution): + model_dir = self.get_temp_dir() + ckpt_file = os.path.join(model_dir, 'ckpt') + with distribution.scope(): + trainer = self.create_test_trainer(distribution, model_dir, True) + self.assertFalse(trainer._task.is_last_stage) + trainer.train(tf.convert_to_tensor(4, dtype=tf.int32)) + self.assertTrue(trainer._task.is_last_stage) + trainer.checkpoint.save(ckpt_file) + + trainer = self.create_test_trainer(distribution, model_dir, True) + self.assertFalse(trainer._task.is_last_stage) + trainer.checkpoint.restore(ckpt_file + '-1') + self.assertTrue(trainer._task.is_last_stage) + + @combinations.generate(all_strategy_combinations()) + def test_train_dataset(self, distribution): + model_dir = self.get_temp_dir() + with distribution.scope(): + trainer = self.create_test_trainer(distribution, model_dir, True) + # Using dataset of stage == 0 + train_iter = tf.nest.map_structure(iter, trainer.train_dataset) + train_data = train_iter.next()[0] + if distribution.num_replicas_in_sync > 1: + train_data = train_data.values[0] + self.assertEqual(train_data.shape[0], 2) + + trainer.train(tf.convert_to_tensor(4, dtype=tf.int32)) + # Using dataset of stage == 1 + train_iter = tf.nest.map_structure(iter, trainer.train_dataset) + train_data = train_iter.next()[0] + if distribution.num_replicas_in_sync > 1: + train_data = train_data.values[0] + self.assertEqual(train_data.shape[0], 1) + + with self.assertRaises(SyntaxError): + trainer.train_dataset = None + + @combinations.generate(all_strategy_combinations()) + def test_train_dataset_no_switch(self, distribution): + model_dir = self.get_temp_dir() + with distribution.scope(): + trainer = self.create_test_trainer(distribution, model_dir, False) + trainer.train(tf.convert_to_tensor(2, dtype=tf.int32)) + # _train_iter is not reset since the dataset is not changed. + self.assertIsNotNone(trainer._train_iter) + with distribution.scope(): + trainer = self.create_test_trainer(distribution, model_dir, True) + trainer.train(tf.convert_to_tensor(2, dtype=tf.int32)) + # _train_iter is reset since the dataset changed. + self.assertIsNone(trainer._train_iter) + + +class TrainerWithMaskedLMTaskTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(TrainerWithMaskedLMTaskTest, self).setUp() + self._config = get_exp_config() + + def create_test_trainer(self, distribution): + trainer = trainer_lib.ProgressiveTrainer( + self._config, + prog_task=TestPolicy(distribution, self._config.task), + ckpt_dir=self.get_temp_dir()) + return trainer + + @combinations.generate(all_strategy_combinations()) + def test_trainer_train(self, distribution): + with distribution.scope(): + trainer = self.create_test_trainer(distribution) + logs = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('training_loss', logs) + self.assertIn('learning_rate', logs) + + @combinations.generate(all_strategy_combinations()) + def test_trainer_validate(self, distribution): + with distribution.scope(): + trainer = self.create_test_trainer(distribution) + logs = trainer.evaluate(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('validation_loss', logs) + self.assertEqual(logs['counter'], 5. * distribution.num_replicas_in_sync) + + @combinations.generate( + combinations.combine( + mixed_precision_dtype=['float32', 'bfloat16', 'float16'], + loss_scale=[None, 'dynamic', 128, 256], + )) + def test_configure_optimizer(self, mixed_precision_dtype, loss_scale): + config = cfg.ExperimentConfig( + task=cfg.TaskConfig( + model=bert.PretrainerConfig()), + runtime=cfg.RuntimeConfig( + mixed_precision_dtype=mixed_precision_dtype, loss_scale=loss_scale), + trainer=trainer_lib.ProgressiveTrainerConfig( + export_checkpoint=True, + export_checkpoint_interval=1, + export_only_final_stage_ckpt=False)) + task = TestPolicy(None, config.task) + trainer = trainer_lib.ProgressiveTrainer(config, task, self.get_temp_dir()) + if mixed_precision_dtype != 'float16': + self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) + elif mixed_precision_dtype == 'float16' and loss_scale is None: + self.assertIsInstance(trainer.optimizer, tf.keras.optimizers.SGD) + + metrics = trainer.train(tf.convert_to_tensor(5, dtype=tf.int32)) + self.assertIn('training_loss', metrics) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/modeling/progressive/utils.py b/official/modeling/progressive/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..192170cb87825de6972ab4a85a6b556ee40600c4 --- /dev/null +++ b/official/modeling/progressive/utils.py @@ -0,0 +1,56 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Util classes and functions.""" + +from absl import logging +import tensorflow as tf + +# pylint: disable=g-direct-tensorflow-import +from tensorflow.python.training.tracking import tracking + + +class VolatileTrackable(tracking.AutoTrackable): + """A util class to keep Trackables that might change instances.""" + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def reassign_trackable(self, **kwargs): + for k, v in kwargs.items(): + delattr(self, k) # untrack this object + setattr(self, k, v) # track the new object + + +class CheckpointWithHooks(tf.train.Checkpoint): + """Same as tf.train.Checkpoint but supports hooks. + + In progressive training, use this class instead of tf.train.Checkpoint. + + Since the network architecture changes during progressive training, we need to + prepare something (like switch to the correct architecture) before loading the + checkpoint. This class supports a hook that will be executed before checkpoint + loading. + """ + + def __init__(self, before_load_hook, **kwargs): + self._before_load_hook = before_load_hook + super(CheckpointWithHooks, self).__init__(**kwargs) + + # override + def read(self, save_path, options=None): + self._before_load_hook(save_path) + logging.info('Ran before_load_hook.') + super(CheckpointWithHooks, self).read(save_path=save_path, options=options) diff --git a/official/modeling/tf_utils.py b/official/modeling/tf_utils.py index 14b6a3f1f8f64635ee90facc1874e359a2d05229..6b3429672b1b03a899e3d1e04ac1aab9212a260f 100644 --- a/official/modeling/tf_utils.py +++ b/official/modeling/tf_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Common TF utilities.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Common TF utilities.""" import six import tensorflow as tf @@ -29,8 +25,7 @@ from official.modeling import activations None, "tf.keras.layers.Layer supports multiple positional args and kwargs as " "input tensors. pack/unpack inputs to override __call__ is no longer " - "needed." -) + "needed.") def pack_inputs(inputs): """Pack a list of `inputs` tensors to a tuple. @@ -55,8 +50,7 @@ def pack_inputs(inputs): None, "tf.keras.layers.Layer supports multiple positional args and kwargs as " "input tensors. pack/unpack inputs to override __call__ is no longer " - "needed." -) + "needed.") def unpack_inputs(inputs): """unpack a tuple of `inputs` tensors to a tuple. @@ -88,27 +82,44 @@ def is_special_none_tensor(tensor): return tensor.shape.ndims == 0 and tensor.dtype == tf.int32 -def get_activation(identifier): +def get_activation(identifier, use_keras_layer=False): """Maps a identifier to a Python function, e.g., "relu" => `tf.nn.relu`. It checks string first and if it is one of customized activation not in TF, the corresponding activation will be returned. For non-customized activation names and callable identifiers, always fallback to tf.keras.activations.get. + Prefers using keras layers when use_keras_layer=True. Now it only supports + 'relu', 'linear', 'identity', 'swish'. + Args: identifier: String name of the activation function or callable. + use_keras_layer: If True, use keras layer if identifier is allow-listed. Returns: - A Python function corresponding to the activation function. + A Python function corresponding to the activation function or a keras + activation layer when use_keras_layer=True. """ if isinstance(identifier, six.string_types): + identifier = str(identifier).lower() + if use_keras_layer: + keras_layer_allowlist = { + "relu": "relu", + "linear": "linear", + "identity": "linear", + "swish": "swish", + "relu6": tf.nn.relu6, + } + if identifier in keras_layer_allowlist: + return tf.keras.layers.Activation(keras_layer_allowlist[identifier]) name_to_fn = { "gelu": activations.gelu, "simple_swish": activations.simple_swish, "hard_swish": activations.hard_swish, + "relu6": activations.relu6, + "hard_sigmoid": activations.hard_sigmoid, "identity": activations.identity, } - identifier = str(identifier).lower() if identifier in name_to_fn: return tf.keras.activations.get(name_to_fn[identifier]) return tf.keras.activations.get(identifier) diff --git a/official/modeling/training/__init__.py b/official/modeling/training/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/official/nlp/README.md b/official/nlp/README.md index 156f5c42858be92f20ed6bc157ddd8593cbc4329..dfa047b4ed3f0bea46e4b4db48578bd543f3e984 100644 --- a/official/nlp/README.md +++ b/official/nlp/README.md @@ -20,8 +20,11 @@ to experiment new research ideas. We provide modeling library to allow users to train custom models for new research ideas. Detailed intructions can be found in READMEs in each folder. -* [modeling/](modeling): modeling library that provides building blocks (e.g., Layers, Networks, and Models) that can be assembled into transformer-based achitectures . -* [data/](data): binaries and utils for input preprocessing, tokenization, etc. +* [modeling/](modeling): modeling library that provides building blocks + (e.g.,Layers, Networks, and Models) that can be assembled into + transformer-based achitectures . +* [data/](data): binaries and utils for input preprocessing, tokenization, + etc. ### State-of-the-Art models and examples @@ -29,9 +32,31 @@ We provide SoTA model implementations, pre-trained models, training and evaluation examples, and command lines. Detail instructions can be found in the READMEs for specific papers. -1. [BERT](bert): [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) by Devlin et al., 2018 -2. [ALBERT](albert): [A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942) by Lan et al., 2019 -3. [XLNet](xlnet): [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) by Yang et al., 2019 -4. [Transformer for translation](transformer): [Attention Is All You Need](https://arxiv.org/abs/1706.03762) by Vaswani et al., 2017 -5. [NHNet](nhnet): [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386) by Gu et al, 2020 - +1. [BERT](bert): [BERT: Pre-training of Deep Bidirectional Transformers for + Language Understanding](https://arxiv.org/abs/1810.04805) by Devlin et al., + 2018 +2. [ALBERT](albert): + [A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942) + by Lan et al., 2019 +3. [XLNet](xlnet): + [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) + by Yang et al., 2019 +4. [Transformer for translation](transformer): + [Attention Is All You Need](https://arxiv.org/abs/1706.03762) by Vaswani et + al., 2017 +5. [NHNet](nhnet): + [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386) + by Gu et al, 2020 + +### Common Training Driver + +We provide a single common driver [train.py](train.py) to train above SoTA +models on popluar tasks. Please see [docs/train.md](docs/train.md) for +more details. + + +### Pre-trained models with checkpoints and TF-Hub + +We provide a large collection of baselines and checkpoints for NLP pre-trained +models. Please see [docs/pretrained_models.md](docs/pretrained_models.md) for +more details. diff --git a/official/nlp/__init__.py b/official/nlp/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/__init__.py +++ b/official/nlp/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/albert/README.md b/official/nlp/albert/README.md index cfb726c90ef9a638d5fd0485e341c232a86bdac2..69620e0579ff37f04a16626fa461fafdb1630b8f 100644 --- a/official/nlp/albert/README.md +++ b/official/nlp/albert/README.md @@ -148,7 +148,7 @@ python ../data/create_finetuning_data.py \ --meta_data_file_path=${OUTPUT_DIR}/${TASK_NAME}_meta_data \ --fine_tuning_task_type=classification --max_seq_length=128 \ --classification_task_name=${TASK_NAME} \ - --tokenizer_impl=sentence_piece + --tokenization=SentencePiece ``` * SQUAD @@ -177,7 +177,7 @@ python ../data/create_finetuning_data.py \ --train_data_output_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_train.tf_record \ --meta_data_file_path=${OUTPUT_DIR}/squad_${SQUAD_VERSION}_meta_data \ --fine_tuning_task_type=squad --max_seq_length=384 \ - --tokenizer_impl=sentence_piece + --tokenization=SentencePiece ``` ## Fine-tuning with ALBERT diff --git a/official/nlp/albert/__init__.py b/official/nlp/albert/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/albert/__init__.py +++ b/official/nlp/albert/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/albert/configs.py b/official/nlp/albert/configs.py index 10fbb79bd50cc224f4192819bfb428cde357ef3c..6fd6fdff7b97e7a0dce385eb4edd22de6d23b6d0 100644 --- a/official/nlp/albert/configs.py +++ b/official/nlp/albert/configs.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""The ALBERT configurations.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""The ALBERT configurations.""" import six @@ -26,10 +22,7 @@ from official.nlp.bert import configs class AlbertConfig(configs.BertConfig): """Configuration for `ALBERT`.""" - def __init__(self, - num_hidden_groups=1, - inner_group_num=1, - **kwargs): + def __init__(self, num_hidden_groups=1, inner_group_num=1, **kwargs): """Constructs AlbertConfig. Args: @@ -43,8 +36,7 @@ class AlbertConfig(configs.BertConfig): super(AlbertConfig, self).__init__(**kwargs) # TODO(chendouble): 'inner_group_num' and 'num_hidden_groups' are always 1 - # in the released ALBERT. Support other values in AlbertTransformerEncoder - # if needed. + # in the released ALBERT. Support other values in AlbertEncoder if needed. if inner_group_num != 1 or num_hidden_groups != 1: raise ValueError("We only support 'inner_group_num' and " "'num_hidden_groups' as 1.") diff --git a/official/nlp/albert/export_albert_tfhub.py b/official/nlp/albert/export_albert_tfhub.py deleted file mode 100644 index 9a1af1a17735c5f0b995bb5e431fe143ffffa1d1..0000000000000000000000000000000000000000 --- a/official/nlp/albert/export_albert_tfhub.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A script to export the ALBERT core model as a TF-Hub SavedModel.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -from absl import app -from absl import flags -import tensorflow as tf -from typing import Text - -from official.nlp.albert import configs -from official.nlp.bert import bert_models - -FLAGS = flags.FLAGS - -flags.DEFINE_string("albert_config_file", None, - "Albert configuration file to define core albert layers.") -flags.DEFINE_string("model_checkpoint_path", None, - "File path to TF model checkpoint.") -flags.DEFINE_string("export_path", None, "TF-Hub SavedModel destination path.") -flags.DEFINE_string( - "sp_model_file", None, - "The sentence piece model file that the ALBERT model was trained on.") - - -def create_albert_model( - albert_config: configs.AlbertConfig) -> tf.keras.Model: - """Creates an ALBERT keras core model from ALBERT configuration. - - Args: - albert_config: An `AlbertConfig` to create the core model. - - Returns: - A keras model. - """ - # Adds input layers just as placeholders. - input_word_ids = tf.keras.layers.Input( - shape=(None,), dtype=tf.int32, name="input_word_ids") - input_mask = tf.keras.layers.Input( - shape=(None,), dtype=tf.int32, name="input_mask") - input_type_ids = tf.keras.layers.Input( - shape=(None,), dtype=tf.int32, name="input_type_ids") - transformer_encoder = bert_models.get_transformer_encoder( - albert_config, sequence_length=None) - sequence_output, pooled_output = transformer_encoder( - [input_word_ids, input_mask, input_type_ids]) - # To keep consistent with legacy hub modules, the outputs are - # "pooled_output" and "sequence_output". - return tf.keras.Model( - inputs=[input_word_ids, input_mask, input_type_ids], - outputs=[pooled_output, sequence_output]), transformer_encoder - - -def export_albert_tfhub(albert_config: configs.AlbertConfig, - model_checkpoint_path: Text, hub_destination: Text, - sp_model_file: Text): - """Restores a tf.keras.Model and saves for TF-Hub.""" - core_model, encoder = create_albert_model(albert_config) - checkpoint = tf.train.Checkpoint(model=encoder) - checkpoint.restore(model_checkpoint_path).assert_consumed() - core_model.sp_model_file = tf.saved_model.Asset(sp_model_file) - core_model.save(hub_destination, include_optimizer=False, save_format="tf") - - -def main(_): - albert_config = configs.AlbertConfig.from_json_file( - FLAGS.albert_config_file) - export_albert_tfhub(albert_config, FLAGS.model_checkpoint_path, - FLAGS.export_path, FLAGS.sp_model_file) - - -if __name__ == "__main__": - app.run(main) diff --git a/official/nlp/albert/export_albert_tfhub_test.py b/official/nlp/albert/export_albert_tfhub_test.py deleted file mode 100644 index 4973090365b7ce6527ef1e4458e3f334ea1a5d1b..0000000000000000000000000000000000000000 --- a/official/nlp/albert/export_albert_tfhub_test.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests official.nlp.albert.export_albert_tfhub.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np - -import tensorflow as tf -import tensorflow_hub as hub - -from official.nlp.albert import configs -from official.nlp.albert import export_albert_tfhub - - -class ExportAlbertTfhubTest(tf.test.TestCase): - - def test_export_albert_tfhub(self): - # Exports a savedmodel for TF-Hub - albert_config = configs.AlbertConfig( - vocab_size=100, - embedding_size=8, - hidden_size=16, - intermediate_size=32, - max_position_embeddings=128, - num_attention_heads=2, - num_hidden_layers=1) - bert_model, encoder = export_albert_tfhub.create_albert_model(albert_config) - model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") - checkpoint = tf.train.Checkpoint(model=encoder) - checkpoint.save(os.path.join(model_checkpoint_dir, "test")) - model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) - - sp_model_file = os.path.join(self.get_temp_dir(), "sp_tokenizer.model") - with tf.io.gfile.GFile(sp_model_file, "w") as f: - f.write("dummy content") - - hub_destination = os.path.join(self.get_temp_dir(), "hub") - export_albert_tfhub.export_albert_tfhub( - albert_config, - model_checkpoint_path, - hub_destination, - sp_model_file=sp_model_file) - - # Restores a hub KerasLayer. - hub_layer = hub.KerasLayer(hub_destination, trainable=True) - - if hasattr(hub_layer, "resolved_object"): - with tf.io.gfile.GFile( - hub_layer.resolved_object.sp_model_file.asset_path.numpy()) as f: - self.assertEqual("dummy content", f.read()) - # Checks the hub KerasLayer. - for source_weight, hub_weight in zip(bert_model.trainable_weights, - hub_layer.trainable_weights): - self.assertAllClose(source_weight.numpy(), hub_weight.numpy()) - - dummy_ids = np.zeros((2, 10), dtype=np.int32) - hub_outputs = hub_layer([dummy_ids, dummy_ids, dummy_ids]) - source_outputs = bert_model([dummy_ids, dummy_ids, dummy_ids]) - - # The outputs of hub module are "pooled_output" and "sequence_output", - # while the outputs of encoder is in reversed order, i.e., - # "sequence_output" and "pooled_output". - encoder_outputs = reversed(encoder([dummy_ids, dummy_ids, dummy_ids])) - self.assertEqual(hub_outputs[0].shape, (2, 16)) - self.assertEqual(hub_outputs[1].shape, (2, 10, 16)) - for source_output, hub_output, encoder_output in zip( - source_outputs, hub_outputs, encoder_outputs): - self.assertAllClose(source_output.numpy(), hub_output.numpy()) - self.assertAllClose(source_output.numpy(), encoder_output.numpy()) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/nlp/albert/run_classifier.py b/official/nlp/albert/run_classifier.py index 7b1371cc052775d3182c51a36926add43dee416e..70e26267f32d98b68a7758615961302333e75c50 100644 --- a/official/nlp/albert/run_classifier.py +++ b/official/nlp/albert/run_classifier.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,25 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""ALBERT classification finetuning runner in tf2.x.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""ALBERT classification finetuning runner in tf2.x.""" import json import os +# Import libraries from absl import app from absl import flags from absl import logging import tensorflow as tf - +from official.common import distribute_utils from official.nlp.albert import configs as albert_configs from official.nlp.bert import bert_models from official.nlp.bert import run_classifier as run_classifier_bert -from official.utils.misc import distribution_utils FLAGS = flags.FLAGS @@ -76,7 +71,7 @@ def main(_): if not FLAGS.model_dir: FLAGS.model_dir = '/tmp/bert20/' - strategy = distribution_utils.get_distribution_strategy( + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=FLAGS.distribution_strategy, num_gpus=FLAGS.num_gpus, tpu_address=FLAGS.tpu) diff --git a/official/nlp/albert/run_squad.py b/official/nlp/albert/run_squad.py index 28a171a3f4a377ab174418c3b466b22680ad5734..cd635443ffd039c248cdc77bf38bb52148c05685 100644 --- a/official/nlp/albert/run_squad.py +++ b/official/nlp/albert/run_squad.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,27 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Run ALBERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Run ALBERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x.""" import json import os import time +# Import libraries from absl import app from absl import flags from absl import logging import tensorflow as tf - +from official.common import distribute_utils from official.nlp.albert import configs as albert_configs from official.nlp.bert import run_squad_helper from official.nlp.bert import tokenization from official.nlp.data import squad_lib_sp -from official.utils.misc import distribution_utils flags.DEFINE_string( 'sp_model_file', None, @@ -103,9 +99,8 @@ def main(_): # Configures cluster spec for multi-worker distribution strategy. if FLAGS.num_gpus > 0: - _ = distribution_utils.configure_cluster(FLAGS.worker_hosts, - FLAGS.task_index) - strategy = distribution_utils.get_distribution_strategy( + _ = distribute_utils.configure_cluster(FLAGS.worker_hosts, FLAGS.task_index) + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=FLAGS.distribution_strategy, num_gpus=FLAGS.num_gpus, all_reduce_alg=FLAGS.all_reduce_alg, diff --git a/official/nlp/albert/tf2_albert_encoder_checkpoint_converter.py b/official/nlp/albert/tf2_albert_encoder_checkpoint_converter.py index 402bc1445bed575362598d09212d14d03b629179..131ed16bd0de88201ca2e0895ed0e93cc0fe5328 100644 --- a/official/nlp/albert/tf2_albert_encoder_checkpoint_converter.py +++ b/official/nlp/albert/tf2_albert_encoder_checkpoint_converter.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,25 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """A converter from a tf1 ALBERT encoder checkpoint to a tf2 encoder checkpoint. The conversion will yield an object-oriented checkpoint that can be used -to restore a AlbertTransformerEncoder object. +to restore an AlbertEncoder object. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os from absl import app from absl import flags import tensorflow as tf -from official.modeling import activations +from official.modeling import tf_utils from official.nlp.albert import configs from official.nlp.bert import tf1_checkpoint_converter_lib +from official.nlp.modeling import models from official.nlp.modeling import networks FLAGS = flags.FLAGS @@ -42,6 +39,14 @@ flags.DEFINE_string( "BertModel, with no task heads.)") flags.DEFINE_string("converted_checkpoint_path", None, "Name for the created object-based V2 checkpoint.") +flags.DEFINE_string("checkpoint_model_name", "encoder", + "The name of the model when saving the checkpoint, i.e., " + "the checkpoint will be saved using: " + "tf.train.Checkpoint(FLAGS.checkpoint_model_name=model).") +flags.DEFINE_enum( + "converted_model", "encoder", ["encoder", "pretrainer"], + "Whether to convert the checkpoint to a `AlbertEncoder` model or a " + "`BertPretrainerV2` model (with mlm but without classification heads).") ALBERT_NAME_REPLACEMENTS = ( @@ -55,11 +60,12 @@ ALBERT_NAME_REPLACEMENTS = ( ("group_0/inner_group_0/", ""), ("attention_1/self", "self_attention"), ("attention_1/output/dense", "self_attention/attention_output"), - ("LayerNorm/", "self_attention_layer_norm/"), + ("transformer/LayerNorm/", "transformer/self_attention_layer_norm/"), ("ffn_1/intermediate/dense", "intermediate"), ("ffn_1/intermediate/output/dense", "output"), - ("LayerNorm_1/", "output_layer_norm/"), + ("transformer/LayerNorm_1/", "transformer/output_layer_norm/"), ("pooler/dense", "pooler_transform"), + ("cls/predictions", "bert/cls/predictions"), ("cls/predictions/output_bias", "cls/predictions/output_bias/bias"), ("cls/seq_relationship/output_bias", "predictions/transform/logits/bias"), ("cls/seq_relationship/output_weights", @@ -68,32 +74,54 @@ ALBERT_NAME_REPLACEMENTS = ( def _create_albert_model(cfg): - """Creates a BERT keras core model from BERT configuration. + """Creates an ALBERT keras core model from BERT configuration. Args: - cfg: A `BertConfig` to create the core model. + cfg: A `AlbertConfig` to create the core model. Returns: A keras model. """ - albert_encoder = networks.AlbertTransformerEncoder( + albert_encoder = networks.AlbertEncoder( vocab_size=cfg.vocab_size, hidden_size=cfg.hidden_size, embedding_width=cfg.embedding_size, num_layers=cfg.num_hidden_layers, num_attention_heads=cfg.num_attention_heads, intermediate_size=cfg.intermediate_size, - activation=activations.gelu, + activation=tf_utils.get_activation(cfg.hidden_act), dropout_rate=cfg.hidden_dropout_prob, attention_dropout_rate=cfg.attention_probs_dropout_prob, - sequence_length=cfg.max_position_embeddings, + max_sequence_length=cfg.max_position_embeddings, type_vocab_size=cfg.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=cfg.initializer_range)) return albert_encoder -def convert_checkpoint(bert_config, output_path, v1_checkpoint): +def _create_pretrainer_model(cfg): + """Creates a pretrainer with AlbertEncoder from ALBERT configuration. + + Args: + cfg: A `BertConfig` to create the core model. + + Returns: + A BertPretrainerV2 model. + """ + albert_encoder = _create_albert_model(cfg) + pretrainer = models.BertPretrainerV2( + encoder_network=albert_encoder, + mlm_activation=tf_utils.get_activation(cfg.hidden_act), + mlm_initializer=tf.keras.initializers.TruncatedNormal( + stddev=cfg.initializer_range)) + # Makes sure masked_lm layer's variables in pretrainer are created. + _ = pretrainer(pretrainer.inputs) + return pretrainer + + +def convert_checkpoint(bert_config, output_path, v1_checkpoint, + checkpoint_model_name, + converted_model="encoder"): """Converts a V1 checkpoint into an OO V2 checkpoint.""" output_dir, _ = os.path.split(output_path) @@ -109,9 +137,16 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint): exclude_patterns=["adam", "Adam"]) # Create a V2 checkpoint from the temporary checkpoint. - model = _create_albert_model(bert_config) + if converted_model == "encoder": + model = _create_albert_model(bert_config) + elif converted_model == "pretrainer": + model = _create_pretrainer_model(bert_config) + else: + raise ValueError("Unsupported converted_model: %s" % converted_model) + tf1_checkpoint_converter_lib.create_v2_checkpoint(model, temporary_checkpoint, - output_path) + output_path, + checkpoint_model_name) # Clean up the temporary checkpoint, if it exists. try: @@ -124,8 +159,12 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint): def main(_): output_path = FLAGS.converted_checkpoint_path v1_checkpoint = FLAGS.checkpoint_to_convert + checkpoint_model_name = FLAGS.checkpoint_model_name + converted_model = FLAGS.converted_model albert_config = configs.AlbertConfig.from_json_file(FLAGS.albert_config_file) - convert_checkpoint(albert_config, output_path, v1_checkpoint) + convert_checkpoint(albert_config, output_path, v1_checkpoint, + checkpoint_model_name, + converted_model=converted_model) if __name__ == "__main__": diff --git a/official/nlp/bert/README.md b/official/nlp/bert/README.md index c26a87df520b9d9bb9cccefd515abc0bf4a399c7..037ff0b1ff8c6ea22bcf692bb8f786320b7d2d48 100644 --- a/official/nlp/bert/README.md +++ b/official/nlp/bert/README.md @@ -1,5 +1,11 @@ # BERT (Bidirectional Encoder Representations from Transformers) +**WARNING**: We are on the way to deprecate most of the code in this directory. +Please see +[this link](https://github.com/tensorflow/models/blob/master/official/nlp/docs/train.md) +for the new tutorial and use the new code in `nlp/modeling`. This README is +still correct for this legacy implementation. + The academic paper which describes BERT in detail and provides full results on a number of tasks can be found here: https://arxiv.org/abs/1810.04805. @@ -46,6 +52,8 @@ The new checkpoints are:** 12-layer, 768-hidden, 12-heads , 110M parameters * **[`BERT-Large, Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/cased_L-24_H-1024_A-16.tar.gz)**: 24-layer, 1024-hidden, 16-heads, 340M parameters +* **[`BERT-Base, Multilingual Cased`](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/keras_bert/multi_cased_L-12_H-768_A-12.tar.gz)**: + 104 languages, 12-layer, 768-hidden, 12-heads, 110M parameters We recommend to host checkpoints on Google Cloud storage buckets when you use Cloud GPU/TPU. @@ -70,21 +78,21 @@ Checkpoints featuring native serialized Keras models Pretrained tf.hub modules in TF 2.x SavedModel format can be found in the following links: -* **[`BERT-Large, Uncased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_uncased_L-24_H-1024_A-16/1)**: +* **[`BERT-Large, Uncased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_uncased_L-24_H-1024_A-16/)**: 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Large, Cased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_cased_L-24_H-1024_A-16/1)**: +* **[`BERT-Large, Cased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_cased_L-24_H-1024_A-16/)**: 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1)**: +* **[`BERT-Base, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/)**: 12-layer, 768-hidden, 12-heads, 110M parameters -* **[`BERT-Large, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/1)**: +* **[`BERT-Large, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/)**: 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/1)**: +* **[`BERT-Base, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/)**: 12-layer, 768-hidden, 12-heads , 110M parameters -* **[`BERT-Large, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-24_H-1024_A-16/1)**: +* **[`BERT-Large, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-24_H-1024_A-16/)**: 24-layer, 1024-hidden, 16-heads, 340M parameters -* **[`BERT-Base, Multilingual Cased`](https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/1)**: +* **[`BERT-Base, Multilingual Cased`](https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/)**: 104 languages, 12-layer, 768-hidden, 12-heads, 110M parameters -* **[`BERT-Base, Chinese`](https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/1)**: +* **[`BERT-Base, Chinese`](https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/)**: Chinese Simplified and Traditional, 12-layer, 768-hidden, 12-heads, 110M parameters @@ -123,6 +131,23 @@ which is essentially branched from [BERT research repo](https://github.com/googl to get processed pre-training data and it adapts to TF2 symbols and python3 compatibility. +Running the pre-training script requires an input and output directory, as well as a vocab file. Note that max_seq_length will need to match the sequence length parameter you specify when you run pre-training. + +Example shell script to call create_pretraining_data.py +``` +export WORKING_DIR='local disk or cloud location' +export BERT_DIR='local disk or cloud location' +python models/official/nlp/data/create_pretraining_data.py \ + --input_file=$WORKING_DIR/input/input.txt \ + --output_file=$WORKING_DIR/output/tf_examples.tfrecord \ + --vocab_file=$BERT_DIR/wwm_uncased_L-24_H-1024_A-16/vocab.txt \ + --do_lower_case=True \ + --max_seq_length=512 \ + --max_predictions_per_seq=76 \ + --masked_lm_prob=0.15 \ + --random_seed=12345 \ + --dupe_factor=5 +``` ### Fine-tuning @@ -184,6 +209,8 @@ python ../data/create_finetuning_data.py \ --fine_tuning_task_type=squad --max_seq_length=384 ``` +Note: To create fine-tuning data with SQUAD 2.0, you need to add flag `--version_2_with_negative=True`. + ## Fine-tuning with BERT ### Cloud GPUs and TPUs diff --git a/official/nlp/bert/__init__.py b/official/nlp/bert/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..a25710c222e3327cb20e000db5df5c5651c4a2cc 100644 --- a/official/nlp/bert/__init__.py +++ b/official/nlp/bert/__init__.py @@ -1 +1,15 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/bert/bert_cloud_tpu.md b/official/nlp/bert/bert_cloud_tpu.md index e5e6758a8bdc216744b7770d7eb8b5ff47408493..baf6f9bdc0c155cb53b30cea5f404aa166c3a2c6 100644 --- a/official/nlp/bert/bert_cloud_tpu.md +++ b/official/nlp/bert/bert_cloud_tpu.md @@ -4,17 +4,17 @@ This tutorial shows you how to train the Bidirectional Encoder Representations f ## Set up Cloud Storage and Compute Engine VM 1. [Open a cloud shell window](https://console.cloud.google.com/?cloudshell=true&_ga=2.11844148.-1612541229.1552429951) -2. Create a variable for the project's name: +2. Create a variable for the project's id: ``` -export PROJECT_NAME=your-project_name +export PROJECT_ID=your-project_id ``` 3. Configure `gcloud` command-line tool to use the project where you want to create Cloud TPU. ``` -gcloud config set project ${PROJECT_NAME} +gcloud config set project ${PROJECT_ID} ``` 4. Create a Cloud Storage bucket using the following command: ``` -gsutil mb -p ${PROJECT_NAME} -c standard -l europe-west4 -b on gs://your-bucket-name +gsutil mb -p ${PROJECT_ID} -c standard -l europe-west4 -b on gs://your-bucket-name ``` This Cloud Storage bucket stores the data you use to train your model and the training results. 5. Launch a Compute Engine VM and Cloud TPU using the ctpu up command. diff --git a/official/nlp/bert/bert_models.py b/official/nlp/bert/bert_models.py index 9d16150d0c353e6626b911b32c9961c4712c8aed..8a0fceb725e9eddc8e8ea25924d4013fe88b2048 100644 --- a/official/nlp/bert/bert_models.py +++ b/official/nlp/bert/bert_models.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""BERT models that are compatible with TF 2.0.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""BERT models that are compatible with TF 2.0.""" import gin import tensorflow as tf @@ -104,29 +100,29 @@ class BertPretrainLossAndMetricLayer(tf.keras.layers.Layer): @gin.configurable def get_transformer_encoder(bert_config, - sequence_length, + sequence_length=None, transformer_encoder_cls=None, output_range=None): """Gets a 'TransformerEncoder' object. Args: bert_config: A 'modeling.BertConfig' or 'modeling.AlbertConfig' object. - sequence_length: Maximum sequence length of the training data. + sequence_length: [Deprecated]. transformer_encoder_cls: A EncoderScaffold class. If it is None, uses the default BERT encoder implementation. output_range: the sequence output range, [0, output_range). Default setting is to return the entire sequence output. Returns: - A networks.TransformerEncoder object. + A encoder object. """ + del sequence_length if transformer_encoder_cls is not None: # TODO(hongkuny): evaluate if it is better to put cfg definition in gin. embedding_cfg = dict( vocab_size=bert_config.vocab_size, type_vocab_size=bert_config.type_vocab_size, hidden_size=bert_config.hidden_size, - seq_length=sequence_length, max_seq_length=bert_config.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range), @@ -161,18 +157,17 @@ def get_transformer_encoder(bert_config, activation=tf_utils.get_activation(bert_config.hidden_act), dropout_rate=bert_config.hidden_dropout_prob, attention_dropout_rate=bert_config.attention_probs_dropout_prob, - sequence_length=sequence_length, max_sequence_length=bert_config.max_position_embeddings, type_vocab_size=bert_config.type_vocab_size, embedding_width=bert_config.embedding_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range)) if isinstance(bert_config, albert_configs.AlbertConfig): - return networks.AlbertTransformerEncoder(**kwargs) + return networks.AlbertEncoder(**kwargs) else: assert isinstance(bert_config, configs.BertConfig) kwargs['output_range'] = output_range - return networks.TransformerEncoder(**kwargs) + return networks.BertEncoder(**kwargs) def pretrain_model(bert_config, diff --git a/official/nlp/bert/bert_models_test.py b/official/nlp/bert/bert_models_test.py index 93763b45bfc53c5d32de2df7f7f0f72894e9556f..8c4a52a20d343e3d7cc5f0ccac250d5f4f036667 100644 --- a/official/nlp/bert/bert_models_test.py +++ b/official/nlp/bert/bert_models_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import tensorflow as tf @@ -48,16 +44,16 @@ class BertModelsTest(tf.test.TestCase): initializer=None, use_next_sentence_label=True) self.assertIsInstance(model, tf.keras.Model) - self.assertIsInstance(encoder, networks.TransformerEncoder) + self.assertIsInstance(encoder, networks.BertEncoder) # model has one scalar output: loss value. - self.assertEqual(model.output.shape.as_list(), [None,]) + self.assertEqual(model.output.shape.as_list(), [ + None, + ]) # Expect two output from encoder: sequence and classification output. self.assertIsInstance(encoder.output, list) self.assertLen(encoder.output, 2) - # shape should be [batch size, seq_length, hidden_size] - self.assertEqual(encoder.output[0].shape.as_list(), [None, 5, 16]) # shape should be [batch size, hidden_size] self.assertEqual(encoder.output[1].shape.as_list(), [None, 16]) @@ -74,16 +70,12 @@ class BertModelsTest(tf.test.TestCase): # Expect two output from model: start positions and end positions self.assertIsInstance(model.output, list) self.assertLen(model.output, 2) - # shape should be [batch size, seq_length] - self.assertEqual(model.output[0].shape.as_list(), [None, 5]) - # shape should be [batch size, seq_length] - self.assertEqual(model.output[1].shape.as_list(), [None, 5]) # Expect two output from core_model: sequence and classification output. self.assertIsInstance(core_model.output, list) self.assertLen(core_model.output, 2) - # shape should be [batch size, seq_length, hidden_size] - self.assertEqual(core_model.output[0].shape.as_list(), [None, 5, 16]) + # shape should be [batch size, None, hidden_size] + self.assertEqual(core_model.output[0].shape.as_list(), [None, None, 16]) # shape should be [batch size, hidden_size] self.assertEqual(core_model.output[1].shape.as_list(), [None, 16]) @@ -104,8 +96,8 @@ class BertModelsTest(tf.test.TestCase): # Expect two output from core_model: sequence and classification output. self.assertIsInstance(core_model.output, list) self.assertLen(core_model.output, 2) - # shape should be [batch size, 1, hidden_size] - self.assertEqual(core_model.output[0].shape.as_list(), [None, 1, 16]) + # shape should be [batch size, None, hidden_size] + self.assertEqual(core_model.output[0].shape.as_list(), [None, None, 16]) # shape should be [batch size, hidden_size] self.assertEqual(core_model.output[1].shape.as_list(), [None, 16]) diff --git a/official/nlp/bert/common_flags.py b/official/nlp/bert/common_flags.py index 06a376d63de5447ddd67810f2cf6be3399f2a958..55570052b99e850c6e6861f00c39a7a3fabf3729 100644 --- a/official/nlp/bert/common_flags.py +++ b/official/nlp/bert/common_flags.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Defining common flags used across all BERT models/applications.""" from absl import flags @@ -73,9 +73,22 @@ def define_common_bert_flags(): 'If specified, init_checkpoint flag should not be used.') flags.DEFINE_bool('hub_module_trainable', True, 'True to make keras layers in the hub module trainable.') - flags.DEFINE_string('sub_model_export_name', None, - 'If set, `sub_model` checkpoints are exported into ' - 'FLAGS.model_dir/FLAGS.sub_model_export_name.') + flags.DEFINE_string( + 'sub_model_export_name', None, + 'If set, `sub_model` checkpoints are exported into ' + 'FLAGS.model_dir/FLAGS.sub_model_export_name.') + flags.DEFINE_bool('explicit_allreduce', False, + 'True to use explicit allreduce instead of the implicit ' + 'allreduce in optimizer.apply_gradients(). If fp16 mixed ' + 'precision training is used, this also enables allreduce ' + 'gradients in fp16.') + flags.DEFINE_integer('allreduce_bytes_per_pack', 0, + 'Number of bytes of a gradient pack for allreduce. ' + 'Should be positive integer, if set to 0, all ' + 'gradients are in one pack. Breaking gradient into ' + 'packs could enable overlap between allreduce and ' + 'backprop computation. This flag only takes effect ' + 'when explicit_allreduce is set to True.') flags_core.define_log_steps() @@ -87,7 +100,6 @@ def define_common_bert_flags(): synthetic_data=False, max_train_steps=False, dtype=True, - dynamic_loss_scale=True, loss_scale=True, all_reduce_alg=True, num_packs=False, diff --git a/official/nlp/bert/configs.py b/official/nlp/bert/configs.py index b3f9082655f490e010ff2a341c40d488eb1097c1..950c32d0bfad3e06f3d14baf042a916de2eb2828 100644 --- a/official/nlp/bert/configs.py +++ b/official/nlp/bert/configs.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""The main BERT model and related functions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""The main BERT model and related functions.""" import copy import json + import six import tensorflow as tf @@ -105,4 +102,3 @@ class BertConfig(object): def to_json_string(self): """Serializes this instance to a JSON string.""" return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" - diff --git a/official/nlp/bert/export_tfhub.py b/official/nlp/bert/export_tfhub.py index 5a49a3df54a64ceacbe1235b870d17bc84d8a488..833e7c10582f9252f59b3b7584a5bcca0b6f4991 100644 --- a/official/nlp/bert/export_tfhub.py +++ b/official/nlp/bert/export_tfhub.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,18 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""A script to export the BERT core model as a TF-Hub SavedModel.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""A script to export BERT as a TF-Hub SavedModel. + +This script is **DEPRECATED** for exporting BERT encoder models; +see the error message in by main() for details. +""" + +from typing import Text + +# Import libraries from absl import app from absl import flags from absl import logging import tensorflow as tf -from typing import Text from official.nlp.bert import bert_models from official.nlp.bert import configs @@ -35,9 +37,12 @@ flags.DEFINE_string("model_checkpoint_path", None, flags.DEFINE_string("export_path", None, "TF-Hub SavedModel destination path.") flags.DEFINE_string("vocab_file", None, "The vocabulary file that the BERT model was trained on.") -flags.DEFINE_bool("do_lower_case", None, "Whether to lowercase. If None, " - "do_lower_case will be enabled if 'uncased' appears in the " - "name of --vocab_file") +flags.DEFINE_bool( + "do_lower_case", None, "Whether to lowercase. If None, " + "do_lower_case will be enabled if 'uncased' appears in the " + "name of --vocab_file") +flags.DEFINE_enum("model_type", "encoder", ["encoder", "squad"], + "What kind of BERT model to export.") def create_bert_model(bert_config: configs.BertConfig) -> tf.keras.Model: @@ -68,8 +73,10 @@ def create_bert_model(bert_config: configs.BertConfig) -> tf.keras.Model: def export_bert_tfhub(bert_config: configs.BertConfig, - model_checkpoint_path: Text, hub_destination: Text, - vocab_file: Text, do_lower_case: bool = None): + model_checkpoint_path: Text, + hub_destination: Text, + vocab_file: Text, + do_lower_case: bool = None): """Restores a tf.keras.Model and saves for TF-Hub.""" # If do_lower_case is not explicit, default to checking whether "uncased" is # in the vocab file name @@ -78,17 +85,54 @@ def export_bert_tfhub(bert_config: configs.BertConfig, logging.info("Using do_lower_case=%s based on name of vocab_file=%s", do_lower_case, vocab_file) core_model, encoder = create_bert_model(bert_config) - checkpoint = tf.train.Checkpoint(model=encoder) + checkpoint = tf.train.Checkpoint( + model=encoder, # Legacy checkpoints. + encoder=encoder) checkpoint.restore(model_checkpoint_path).assert_existing_objects_matched() core_model.vocab_file = tf.saved_model.Asset(vocab_file) core_model.do_lower_case = tf.Variable(do_lower_case, trainable=False) core_model.save(hub_destination, include_optimizer=False, save_format="tf") +def export_bert_squad_tfhub(bert_config: configs.BertConfig, + model_checkpoint_path: Text, + hub_destination: Text, + vocab_file: Text, + do_lower_case: bool = None): + """Restores a tf.keras.Model for BERT with SQuAD and saves for TF-Hub.""" + # If do_lower_case is not explicit, default to checking whether "uncased" is + # in the vocab file name + if do_lower_case is None: + do_lower_case = "uncased" in vocab_file + logging.info("Using do_lower_case=%s based on name of vocab_file=%s", + do_lower_case, vocab_file) + span_labeling, _ = bert_models.squad_model(bert_config, max_seq_length=None) + checkpoint = tf.train.Checkpoint(model=span_labeling) + checkpoint.restore(model_checkpoint_path).assert_existing_objects_matched() + span_labeling.vocab_file = tf.saved_model.Asset(vocab_file) + span_labeling.do_lower_case = tf.Variable(do_lower_case, trainable=False) + span_labeling.save(hub_destination, include_optimizer=False, save_format="tf") + + def main(_): bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) - export_bert_tfhub(bert_config, FLAGS.model_checkpoint_path, FLAGS.export_path, - FLAGS.vocab_file, FLAGS.do_lower_case) + if FLAGS.model_type == "encoder": + deprecation_note = ( + "nlp/bert/export_tfhub is **DEPRECATED** for exporting BERT encoder " + "models. Please switch to nlp/tools/export_tfhub for exporting BERT " + "(and other) encoders with dict inputs/outputs conforming to " + "https://www.tensorflow.org/hub/common_saved_model_apis/text#transformer-encoders" + ) + logging.error(deprecation_note) + print("\n\nNOTICE:", deprecation_note, "\n") + export_bert_tfhub(bert_config, FLAGS.model_checkpoint_path, + FLAGS.export_path, FLAGS.vocab_file, FLAGS.do_lower_case) + elif FLAGS.model_type == "squad": + export_bert_squad_tfhub(bert_config, FLAGS.model_checkpoint_path, + FLAGS.export_path, FLAGS.vocab_file, + FLAGS.do_lower_case) + else: + raise ValueError("Unsupported model_type %s." % FLAGS.model_type) if __name__ == "__main__": diff --git a/official/nlp/bert/export_tfhub_test.py b/official/nlp/bert/export_tfhub_test.py index 6b6fd40f5e1be5d5e8d4699d54c048add7435523..77030dd3fde7d4c4d73bea0fdea017848b1e253f 100644 --- a/official/nlp/bert/export_tfhub_test.py +++ b/official/nlp/bert/export_tfhub_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,26 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests official.nlp.bert.export_tfhub.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests official.nlp.bert.export_tfhub.""" import os +from absl.testing import parameterized import numpy as np - import tensorflow as tf import tensorflow_hub as hub + from official.nlp.bert import configs from official.nlp.bert import export_tfhub -class ExportTfhubTest(tf.test.TestCase): +class ExportTfhubTest(tf.test.TestCase, parameterized.TestCase): - def test_export_tfhub(self): + @parameterized.parameters("model", "encoder") + def test_export_tfhub(self, ckpt_key_name): # Exports a savedmodel for TF-Hub hidden_size = 16 bert_config = configs.BertConfig( @@ -42,7 +40,7 @@ class ExportTfhubTest(tf.test.TestCase): num_hidden_layers=1) bert_model, encoder = export_tfhub.create_bert_model(bert_config) model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") - checkpoint = tf.train.Checkpoint(model=encoder) + checkpoint = tf.train.Checkpoint(**{ckpt_key_name: encoder}) checkpoint.save(os.path.join(model_checkpoint_dir, "test")) model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) @@ -91,6 +89,7 @@ class ExportTfhubTest(tf.test.TestCase): outputs = np.concatenate( [hub_layer(inputs, training=training)[0] for _ in range(num_runs)]) return np.mean(np.std(outputs, axis=0)) + self.assertLess(_dropout_mean_stddev(training=False), 1e-6) self.assertGreater(_dropout_mean_stddev(training=True), 1e-3) diff --git a/official/nlp/bert/input_pipeline.py b/official/nlp/bert/input_pipeline.py index ed3fd173d4379a75ab1e2e5a9ba0bbdcbaa0be42..0c0f7615c37142ca039ad9fc68d98776a6b6b7b8 100644 --- a/official/nlp/bert/input_pipeline.py +++ b/official/nlp/bert/input_pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""BERT model input pipelines.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""BERT model input pipelines.""" import tensorflow as tf @@ -36,11 +32,13 @@ def decode_record(record, name_to_features): return example -def single_file_dataset(input_file, name_to_features): +def single_file_dataset(input_file, name_to_features, num_samples=None): """Creates a single-file dataset to be passed for BERT custom training.""" # For training, we want a lot of parallel reading and shuffling. # For eval, we want no shuffling and parallel reading doesn't matter. d = tf.data.TFRecordDataset(input_file) + if num_samples: + d = d.take(num_samples) d = d.map( lambda record: decode_record(record, name_to_features), num_parallel_calls=tf.data.experimental.AUTOTUNE) @@ -156,7 +154,8 @@ def create_classifier_dataset(file_path, is_training=True, input_pipeline_context=None, label_type=tf.int64, - include_sample_weights=False): + include_sample_weights=False, + num_samples=None): """Creates input dataset from (tf)records files for train/eval.""" name_to_features = { 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64), @@ -166,7 +165,8 @@ def create_classifier_dataset(file_path, } if include_sample_weights: name_to_features['weight'] = tf.io.FixedLenFeature([], tf.float32) - dataset = single_file_dataset(file_path, name_to_features) + dataset = single_file_dataset(file_path, name_to_features, + num_samples=num_samples) # The dataset is always sharded by number of hosts. # num_input_pipelines is the number of hosts rather than number of cores. @@ -258,7 +258,7 @@ def create_retrieval_dataset(file_path, 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64), 'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64), 'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64), - 'int_iden': tf.io.FixedLenFeature([1], tf.int64), + 'example_id': tf.io.FixedLenFeature([1], tf.int64), } dataset = single_file_dataset(file_path, name_to_features) @@ -274,12 +274,29 @@ def create_retrieval_dataset(file_path, 'input_mask': record['input_mask'], 'input_type_ids': record['segment_ids'] } - y = record['int_iden'] + y = record['example_id'] return (x, y) dataset = dataset.map( _select_data_from_record, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.batch(batch_size, drop_remainder=False) + + def _pad_to_batch(x, y): + cur_size = tf.shape(y)[0] + pad_size = batch_size - cur_size + + pad_ids = tf.zeros(shape=[pad_size, seq_length], dtype=tf.int32) + for key in ('input_word_ids', 'input_mask', 'input_type_ids'): + x[key] = tf.concat([x[key], pad_ids], axis=0) + + pad_labels = -tf.ones(shape=[pad_size, 1], dtype=tf.int32) + y = tf.concat([y, pad_labels], axis=0) + return x, y + + dataset = dataset.map( + _pad_to_batch, + num_parallel_calls=tf.data.experimental.AUTOTUNE) + dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) return dataset diff --git a/official/nlp/bert/model_saving_utils.py b/official/nlp/bert/model_saving_utils.py index 13d2c9ed02f9a98d9dcbb2a60c46fa5cd13bb666..1d69750878bd8a89482958874b5f059193f6d7f5 100644 --- a/official/nlp/bert/model_saving_utils.py +++ b/official/nlp/bert/model_saving_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Utilities to save models.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Utilities to save models.""" import os @@ -32,19 +27,19 @@ def export_bert_model(model_export_path: typing.Text, restore_model_using_load_weights: bool = False) -> None: """Export BERT model for serving which does not include the optimizer. - Arguments: + Args: model_export_path: Path to which exported model will be saved. model: Keras model object to export. checkpoint_dir: Path from which model weights will be loaded, if specified. restore_model_using_load_weights: Whether to use checkpoint.restore() API - for custom checkpoint or to use model.load_weights() API. - There are 2 different ways to save checkpoints. One is using - tf.train.Checkpoint and another is using Keras model.save_weights(). - Custom training loop implementation uses tf.train.Checkpoint API - and Keras ModelCheckpoint callback internally uses model.save_weights() - API. Since these two API's cannot be used toghether, model loading logic - must be take into account how model checkpoint was saved. + for custom checkpoint or to use model.load_weights() API. There are 2 + different ways to save checkpoints. One is using tf.train.Checkpoint and + another is using Keras model.save_weights(). Custom training loop + implementation uses tf.train.Checkpoint API and Keras ModelCheckpoint + callback internally uses model.save_weights() API. Since these two API's + cannot be used toghether, model loading logic must be take into account + how model checkpoint was saved. Raises: ValueError when either model_export_path or model is not specified. @@ -55,14 +50,10 @@ def export_bert_model(model_export_path: typing.Text, raise ValueError('model must be a tf.keras.Model object.') if checkpoint_dir: - # Keras compile/fit() was used to save checkpoint using - # model.save_weights(). if restore_model_using_load_weights: model_weight_path = os.path.join(checkpoint_dir, 'checkpoint') assert tf.io.gfile.exists(model_weight_path) model.load_weights(model_weight_path) - - # tf.train.Checkpoint API was used via custom training loop logic. else: checkpoint = tf.train.Checkpoint(model=model) diff --git a/official/nlp/bert/model_training_utils.py b/official/nlp/bert/model_training_utils.py index 071e18b3453a7291fd4ece111811ac1e1243a5cd..bfd19d1a44d8bcd066ea8fc88b54358e7495eb70 100644 --- a/official/nlp/bert/model_training_utils.py +++ b/official/nlp/bert/model_training_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""A light weight utilities to train NLP models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""A light weight utilities to train NLP models.""" import json import os @@ -25,8 +21,8 @@ import tempfile from absl import logging import tensorflow as tf from tensorflow.python.util import deprecation +from official.common import distribute_utils from official.staging.training import grad_utils -from official.utils.misc import distribution_utils _SUMMARY_TXT = 'training_summary.txt' _MIN_SUMMARY_STEPS = 10 @@ -65,8 +61,7 @@ def _get_input_iterator(input_fn, strategy): # pass callable that returns a dataset. if not callable(input_fn): raise ValueError('`input_fn` should be a closure that returns a dataset.') - iterator = iter( - strategy.experimental_distribute_datasets_from_function(input_fn)) + iterator = iter(strategy.distribute_datasets_from_function(input_fn)) return iterator @@ -75,6 +70,13 @@ def _float_metric_value(metric): return metric.result().numpy().astype(float) +def clip_by_global_norm_callback(grads_and_vars): + """Performs gradient clipping.""" + grads, variables = zip(*grads_and_vars) + (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) + return zip(clipped_grads, variables) + + def steps_to_run(current_step, steps_per_epoch, steps_per_loop): """Calculates steps to run on device.""" if steps_per_loop <= 0: @@ -126,10 +128,11 @@ def run_customized_training_loop( explicit_allreduce=False, pre_allreduce_callbacks=None, post_allreduce_callbacks=None, - train_summary_interval=0): + train_summary_interval=0, + allreduce_bytes_per_pack=0): """Run BERT pretrain model training using low-level API. - Arguments: + Args: _sentinel: Used to prevent positional parameters. Internal, do not use. strategy: Distribution strategy on which to run low level training loop. model_fn: Function that returns a tuple (model, sub_model). Caller of this @@ -156,16 +159,16 @@ def run_customized_training_loop( evaluation is skipped. eval_steps: Number of steps to run evaluation. Required if `eval_input_fn` is not none. - metric_fn: A metrics function that returns a Keras Metric object to record - evaluation result using evaluation dataset or with training dataset - after every epoch. + metric_fn: A metrics function that returns either a Keras Metric object or + a list of Keras Metric objects to record evaluation result using + evaluation dataset or with training dataset after every epoch. init_checkpoint: Optional checkpoint to load to `sub_model` returned by `model_fn`. custom_callbacks: A list of Keras Callbacks objects to run during training. More specifically, `on_train_begin(), on_train_end(), on_batch_begin()`, `on_batch_end()`, `on_epoch_begin()`, - `on_epoch_end()` methods are invoked during training. - Note that some metrics may be missing from `logs`. + `on_epoch_end()` methods are invoked during training. Note that some + metrics may be missing from `logs`. run_eagerly: Whether to run model training in pure eager execution. This should be disable for TPUStrategy. sub_model_export_name: If not None, will export `sub_model` returned by @@ -194,6 +197,11 @@ def run_customized_training_loop( when explicit_allreduce=True. train_summary_interval: Step interval for training summaries. If the value is a negative number, then training summaries are not enabled. + allreduce_bytes_per_pack: A non-negative integer. Breaks collective + operations into packs of certain size. If it's zero, all gradients are + in one pack. Breaking gradient into packs could enable overlap between + allreduce and backprop computation. This flag only takes effect when + explicit_allreduce is set to True.' Returns: Trained model. @@ -237,7 +245,9 @@ def run_customized_training_loop( assert tf.executing_eagerly() if run_eagerly: - if isinstance(strategy, tf.distribute.experimental.TPUStrategy): + if isinstance( + strategy, + (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy)): raise ValueError( 'TPUStrategy should not run eagerly as it heavily relies on graph' ' optimization for the distributed system.') @@ -253,7 +263,7 @@ def run_customized_training_loop( train_iterator = _get_input_iterator(train_input_fn, strategy) eval_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) - with distribution_utils.get_strategy_scope(strategy): + with distribute_utils.get_strategy_scope(strategy): # To correctly place the model weights on accelerators, # model and optimizer should be created in scope. model, sub_model = model_fn() @@ -273,12 +283,14 @@ def run_customized_training_loop( logging.info( 'Checkpoint file %s found and restoring from ' 'initial checkpoint for core model.', init_checkpoint) - checkpoint = tf.train.Checkpoint(model=sub_model) - checkpoint.restore(init_checkpoint).assert_existing_objects_matched() + checkpoint = tf.train.Checkpoint(model=sub_model, encoder=sub_model) + checkpoint.read(init_checkpoint).assert_existing_objects_matched() logging.info('Loading from checkpoint file completed') train_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) - eval_metrics = [metric_fn()] if metric_fn else [] + eval_metrics = metric_fn() if metric_fn else [] + if not isinstance(eval_metrics, list): + eval_metrics = [eval_metrics] # If evaluation is required, make a copy of metric as it will be used by # both train and evaluation. train_metrics = [ @@ -325,10 +337,10 @@ def run_customized_training_loop( grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss, training_vars, pre_allreduce_callbacks, - post_allreduce_callbacks) + post_allreduce_callbacks, + allreduce_bytes_per_pack) else: - if isinstance(optimizer, - tf.keras.mixed_precision.experimental.LossScaleOptimizer): + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): with tape: scaled_loss = optimizer.get_scaled_loss(loss) scaled_grads = tape.gradient(scaled_loss, training_vars) @@ -458,8 +470,7 @@ def run_customized_training_loop( callback_list.on_train_begin() while current_step < total_training_steps and not model.stop_training: if current_step % steps_per_epoch == 0: - callback_list.on_epoch_begin( - int(current_step / steps_per_epoch) + 1) + callback_list.on_epoch_begin(int(current_step / steps_per_epoch) + 1) # Training loss/metric are taking average over steps inside micro # training loop. We reset the their values before each round. @@ -524,13 +535,14 @@ def run_customized_training_loop( _save_checkpoint(strategy, checkpoint, model_dir, checkpoint_name.format(step=current_step)) if eval_input_fn: - logging.info('Running evaluation after step: %s.', current_step) - logs = _run_evaluation(current_step, - _get_input_iterator(eval_input_fn, strategy)) # Re-initialize evaluation metric. eval_loss_metric.reset_states() for metric in eval_metrics + model.metrics: metric.reset_states() + + logging.info('Running evaluation after step: %s.', current_step) + logs = _run_evaluation(current_step, + _get_input_iterator(eval_input_fn, strategy)) # We add train_loss here rather than call on_batch_end twice to make # sure that no duplicated values are generated. logs['loss'] = train_loss @@ -548,6 +560,11 @@ def run_customized_training_loop( _save_checkpoint(strategy, checkpoint, model_dir, checkpoint_name.format(step=current_step)) if eval_input_fn: + # Re-initialize evaluation metric. + eval_loss_metric.reset_states() + for metric in eval_metrics + model.metrics: + metric.reset_states() + logging.info('Running final evaluation after training is complete.') logs = _run_evaluation(current_step, _get_input_iterator(eval_input_fn, strategy)) diff --git a/official/nlp/bert/model_training_utils_test.py b/official/nlp/bert/model_training_utils_test.py index 4c85a6c9b520a1b4e39e6abdfde503b35034d29e..544b66834002d09dfabd90169e6f53fa9f2bbaf3 100644 --- a/official/nlp/bert/model_training_utils_test.py +++ b/official/nlp/bert/model_training_utils_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,16 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for official.modeling.training.model_training_utils.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for official.modeling.training.model_training_utils.""" import os from absl import logging +from absl.testing import flagsaver from absl.testing import parameterized from absl.testing.absltest import mock import numpy as np @@ -28,20 +25,22 @@ import tensorflow as tf from tensorflow.python.distribute import combinations from tensorflow.python.distribute import strategy_combinations +from official.nlp.bert import common_flags from official.nlp.bert import model_training_utils +common_flags.define_common_bert_flags() + + def eager_strategy_combinations(): return combinations.combine( distribution=[ strategy_combinations.default_strategy, - strategy_combinations.tpu_strategy, + strategy_combinations.cloud_tpu_strategy, strategy_combinations.one_device_strategy_gpu, strategy_combinations.mirrored_strategy_with_gpu_and_cpu, strategy_combinations.mirrored_strategy_with_two_gpus, - ], - mode='eager', - ) + ],) def eager_gpu_strategy_combinations(): @@ -51,9 +50,7 @@ def eager_gpu_strategy_combinations(): strategy_combinations.one_device_strategy_gpu, strategy_combinations.mirrored_strategy_with_gpu_and_cpu, strategy_combinations.mirrored_strategy_with_two_gpus, - ], - mode='eager', - ) + ],) def create_fake_data_input_fn(batch_size, features_shape, num_classes): @@ -106,9 +103,8 @@ def create_model_fn(input_shape, num_classes, use_float16=False): tf.reduce_mean(input_layer), name='mean_input', aggregation='mean') model.optimizer = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9) if use_float16: - model.optimizer = ( - tf.keras.mixed_precision.experimental.LossScaleOptimizer( - model.optimizer, loss_scale='dynamic')) + model.optimizer = tf.keras.mixed_precision.LossScaleOptimizer( + model.optimizer) return model, sub_model return _model_fn @@ -139,9 +135,9 @@ class RecordingCallback(tf.keras.callbacks.Callback): def __init__(self): self.batch_begin = [] # (batch, logs) - self.batch_end = [] # (batch, logs) + self.batch_end = [] # (batch, logs) self.epoch_begin = [] # (epoch, logs) - self.epoch_end = [] # (epoch, logs) + self.epoch_end = [] # (epoch, logs) def on_batch_begin(self, batch, logs=None): self.batch_begin.append((batch, logs)) @@ -162,6 +158,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): super(ModelTrainingUtilsTest, self).setUp() self._model_fn = create_model_fn(input_shape=[128], num_classes=3) + @flagsaver.flagsaver def run_training(self, strategy, model_dir, steps_per_loop, run_eagerly): input_fn = create_fake_data_input_fn( batch_size=8, features_shape=[128], num_classes=3) @@ -184,8 +181,10 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(eager_strategy_combinations()) def test_train_eager_single_step(self, distribution): - model_dir = self.get_temp_dir() - if isinstance(distribution, tf.distribute.experimental.TPUStrategy): + model_dir = self.create_tempdir().full_path + if isinstance( + distribution, + (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy)): with self.assertRaises(ValueError): self.run_training( distribution, model_dir, steps_per_loop=1, run_eagerly=True) @@ -195,9 +194,8 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(eager_gpu_strategy_combinations()) def test_train_eager_mixed_precision(self, distribution): - model_dir = self.get_temp_dir() - policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16') - tf.keras.mixed_precision.experimental.set_policy(policy) + model_dir = self.create_tempdir().full_path + tf.keras.mixed_precision.set_global_policy('mixed_float16') self._model_fn = create_model_fn( input_shape=[128], num_classes=3, use_float16=True) self.run_training( @@ -205,24 +203,26 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(eager_strategy_combinations()) def test_train_check_artifacts(self, distribution): - model_dir = self.get_temp_dir() + model_dir = self.create_tempdir().full_path self.run_training( distribution, model_dir, steps_per_loop=10, run_eagerly=False) # Two checkpoints should be saved after two epochs. files = map(os.path.basename, tf.io.gfile.glob(os.path.join(model_dir, 'ctl_step_*index'))) - self.assertCountEqual(['ctl_step_20.ckpt-1.index', - 'ctl_step_40.ckpt-2.index'], files) + self.assertCountEqual( + ['ctl_step_20.ckpt-1.index', 'ctl_step_40.ckpt-2.index'], files) # Three submodel checkpoints should be saved after two epochs (one after # each epoch plus one final). - files = map(os.path.basename, - tf.io.gfile.glob(os.path.join(model_dir, - 'my_submodel_name*index'))) - self.assertCountEqual(['my_submodel_name.ckpt-3.index', - 'my_submodel_name_step_20.ckpt-1.index', - 'my_submodel_name_step_40.ckpt-2.index'], files) + files = map( + os.path.basename, + tf.io.gfile.glob(os.path.join(model_dir, 'my_submodel_name*index'))) + self.assertCountEqual([ + 'my_submodel_name.ckpt-3.index', + 'my_submodel_name_step_20.ckpt-1.index', + 'my_submodel_name_step_40.ckpt-2.index' + ], files) self.assertNotEmpty( tf.io.gfile.glob( @@ -247,7 +247,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(eager_strategy_combinations()) def test_train_check_callbacks(self, distribution): - model_dir = self.get_temp_dir() + model_dir = self.create_tempdir().full_path callback = RecordingCallback() callbacks = [callback] input_fn = create_fake_data_input_fn( @@ -286,9 +286,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): combinations.combine( distribution=[ strategy_combinations.one_device_strategy_gpu, - ], - mode='eager', - )) + ],)) def test_train_check_artifacts_non_chief(self, distribution): # We shouldn't export artifacts on non-chief workers. Since there's no easy # way to test with real MultiWorkerMirroredStrategy, we patch the strategy @@ -298,7 +296,7 @@ class ModelTrainingUtilsTest(tf.test.TestCase, parameterized.TestCase): new_callable=mock.PropertyMock, return_value=False), \ mock.patch.object(extended.__class__, 'should_save_summary', new_callable=mock.PropertyMock, return_value=False): - model_dir = self.get_temp_dir() + model_dir = self.create_tempdir().full_path self.run_training( distribution, model_dir, steps_per_loop=10, run_eagerly=False) self.assertEmpty(tf.io.gfile.listdir(model_dir)) diff --git a/official/nlp/bert/run_classifier.py b/official/nlp/bert/run_classifier.py index e2eb525ae4335091c78eb4ead72494f8021a7f89..1979af5fb3706350953c3cf7542c026719d5d908 100644 --- a/official/nlp/bert/run_classifier.py +++ b/official/nlp/bert/run_classifier.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """BERT classification or regression finetuning runner in TF 2.x.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import functools import json import math import os +# Import libraries from absl import app from absl import flags from absl import logging import gin import tensorflow as tf +from official.common import distribute_utils from official.modeling import performance from official.nlp import optimization from official.nlp.bert import bert_models @@ -34,7 +33,6 @@ from official.nlp.bert import common_flags from official.nlp.bert import configs as bert_configs from official.nlp.bert import input_pipeline from official.nlp.bert import model_saving_utils -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils flags.DEFINE_enum( @@ -52,6 +50,9 @@ flags.DEFINE_string( 'input_meta_data_path', None, 'Path to file that contains meta data about input ' 'to be used for training and evaluation.') +flags.DEFINE_integer('train_data_size', None, 'Number of training samples ' + 'to use. If None, uses the full train data. ' + '(default: None).') flags.DEFINE_string('predict_checkpoint_path', None, 'Path to the checkpoint for predictions.') flags.DEFINE_integer( @@ -91,7 +92,8 @@ def get_dataset_fn(input_file_pattern, global_batch_size, is_training, label_type=tf.int64, - include_sample_weights=False): + include_sample_weights=False, + num_samples=None): """Gets a closure to create a dataset.""" def _dataset_fn(ctx=None): @@ -105,7 +107,8 @@ def get_dataset_fn(input_file_pattern, is_training=is_training, input_pipeline_context=ctx, label_type=label_type, - include_sample_weights=include_sample_weights) + include_sample_weights=include_sample_weights, + num_samples=num_samples) return dataset return _dataset_fn @@ -216,8 +219,8 @@ def run_keras_compile_fit(model_dir, optimizer = bert_model.optimizer if init_checkpoint: - checkpoint = tf.train.Checkpoint(model=sub_model) - checkpoint.restore(init_checkpoint).assert_existing_objects_matched() + checkpoint = tf.train.Checkpoint(model=sub_model, encoder=sub_model) + checkpoint.read(init_checkpoint).assert_existing_objects_matched() if not isinstance(metric_fn, (list, tuple)): metric_fn = [metric_fn] @@ -225,7 +228,7 @@ def run_keras_compile_fit(model_dir, optimizer=optimizer, loss=loss_fn, metrics=[fn() for fn in metric_fn], - experimental_steps_per_execution=steps_per_loop) + steps_per_execution=steps_per_loop) summary_dir = os.path.join(model_dir, 'summaries') summary_callback = tf.keras.callbacks.TensorBoard(summary_dir) @@ -262,6 +265,7 @@ def run_keras_compile_fit(model_dir, def get_predictions_and_labels(strategy, trained_model, eval_input_fn, + is_regression=False, return_probs=False): """Obtains predictions of trained model on evaluation data. @@ -272,6 +276,7 @@ def get_predictions_and_labels(strategy, strategy: Distribution strategy. trained_model: Trained model with preloaded weights. eval_input_fn: Input function for evaluation data. + is_regression: Whether it is a regression task. return_probs: Whether to return probabilities of classes. Returns: @@ -287,8 +292,11 @@ def get_predictions_and_labels(strategy, """Replicated predictions.""" inputs, labels = inputs logits = trained_model(inputs, training=False) - probabilities = tf.nn.softmax(logits) - return probabilities, labels + if not is_regression: + probabilities = tf.nn.softmax(logits) + return probabilities, labels + else: + return logits, labels outputs, labels = strategy.run(_test_step_fn, args=(next(iterator),)) # outputs: current batch logits as a tuple of shard logits @@ -314,8 +322,7 @@ def get_predictions_and_labels(strategy, tf.experimental.async_clear_error() return preds, golds - test_iter = iter( - strategy.experimental_distribute_datasets_from_function(eval_input_fn)) + test_iter = iter(strategy.distribute_datasets_from_function(eval_input_fn)) predictions, labels = _run_evaluation(test_iter) return predictions, labels @@ -341,9 +348,12 @@ def export_classifier(model_export_path, input_meta_data, bert_config, raise ValueError('Export path is not specified: %s' % model_dir) # Export uses float32 for now, even if training uses mixed precision. - tf.keras.mixed_precision.experimental.set_policy('float32') + tf.keras.mixed_precision.set_global_policy('float32') classifier_model = bert_models.classifier_model( - bert_config, input_meta_data.get('num_labels', 1))[0] + bert_config, + input_meta_data.get('num_labels', 1), + hub_module_url=FLAGS.hub_module_url, + hub_module_trainable=False)[0] model_saving_utils.export_bert_model( model_export_path, model=classifier_model, checkpoint_dir=model_dir) @@ -365,6 +375,9 @@ def run_bert(strategy, epochs = FLAGS.num_train_epochs * FLAGS.num_eval_per_epoch train_data_size = ( input_meta_data['train_data_size'] // FLAGS.num_eval_per_epoch) + if FLAGS.train_data_size: + train_data_size = min(train_data_size, FLAGS.train_data_size) + logging.info('Updated train_data_size: %s', train_data_size) steps_per_epoch = int(train_data_size / FLAGS.train_batch_size) warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size) eval_steps = int( @@ -430,7 +443,7 @@ def custom_main(custom_callbacks=None, custom_metrics=None): FLAGS.model_dir) return - strategy = distribution_utils.get_distribution_strategy( + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=FLAGS.distribution_strategy, num_gpus=FLAGS.num_gpus, tpu_address=FLAGS.tpu) @@ -443,9 +456,10 @@ def custom_main(custom_callbacks=None, custom_metrics=None): include_sample_weights=include_sample_weights) if FLAGS.mode == 'predict': + num_labels = input_meta_data.get('num_labels', 1) with strategy.scope(): classifier_model = bert_models.classifier_model( - bert_config, input_meta_data['num_labels'])[0] + bert_config, num_labels)[0] checkpoint = tf.train.Checkpoint(model=classifier_model) latest_checkpoint_file = ( FLAGS.predict_checkpoint_path or @@ -456,7 +470,11 @@ def custom_main(custom_callbacks=None, custom_metrics=None): checkpoint.restore( latest_checkpoint_file).assert_existing_objects_matched() preds, _ = get_predictions_and_labels( - strategy, classifier_model, eval_input_fn, return_probs=True) + strategy, + classifier_model, + eval_input_fn, + is_regression=(num_labels == 1), + return_probs=True) output_predict_file = os.path.join(FLAGS.model_dir, 'test_results.tsv') with tf.io.gfile.GFile(output_predict_file, 'w') as writer: logging.info('***** Predict results *****') @@ -475,7 +493,8 @@ def custom_main(custom_callbacks=None, custom_metrics=None): FLAGS.train_batch_size, is_training=True, label_type=label_type, - include_sample_weights=include_sample_weights) + include_sample_weights=include_sample_weights, + num_samples=FLAGS.train_data_size) run_bert( strategy, input_meta_data, diff --git a/official/nlp/bert/run_pretraining.py b/official/nlp/bert/run_pretraining.py index 44a18fea0ce9d79bea61294e91f0ac00c2ea45e6..6e21b991d7d41a017c34ae9dc139c772f004d91d 100644 --- a/official/nlp/bert/run_pretraining.py +++ b/official/nlp/bert/run_pretraining.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,17 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Run masked LM/next sentence pre-training for BERT in TF 2.x.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +# Import libraries from absl import app from absl import flags from absl import logging import gin import tensorflow as tf +from official.common import distribute_utils from official.modeling import performance from official.nlp import optimization from official.nlp.bert import bert_models @@ -29,7 +28,6 @@ from official.nlp.bert import common_flags from official.nlp.bert import configs from official.nlp.bert import input_pipeline from official.nlp.bert import model_training_utils -from official.utils.misc import distribution_utils flags.DEFINE_string('input_files', None, @@ -105,7 +103,11 @@ def run_customized_training(strategy, train_batch_size, use_next_sentence_label=True, train_summary_interval=0, - custom_callbacks=None): + custom_callbacks=None, + explicit_allreduce=False, + pre_allreduce_callbacks=None, + post_allreduce_callbacks=None, + allreduce_bytes_per_pack=0): """Run BERT pretrain model training using low-level API.""" train_input_fn = get_pretrain_dataset_fn(input_files, max_seq_length, @@ -139,6 +141,10 @@ def run_customized_training(strategy, steps_per_loop=steps_per_loop, epochs=epochs, sub_model_export_name='pretrained/bert_model', + explicit_allreduce=explicit_allreduce, + pre_allreduce_callbacks=pre_allreduce_callbacks, + post_allreduce_callbacks=post_allreduce_callbacks, + allreduce_bytes_per_pack=allreduce_bytes_per_pack, train_summary_interval=train_summary_interval, custom_callbacks=custom_callbacks) @@ -158,6 +164,12 @@ def run_bert_pretrain(strategy, custom_callbacks=None): performance.set_mixed_precision_policy(common_flags.dtype()) + # Only when explicit_allreduce = True, post_allreduce_callbacks and + # allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no + # longer implicitly allreduce gradients, users manually allreduce gradient and + # pass the allreduced grads_and_vars to apply_gradients(). + # With explicit_allreduce = True, clip_by_global_norm is moved to after + # allreduce. return run_customized_training( strategy, bert_config, @@ -176,16 +188,25 @@ def run_bert_pretrain(strategy, custom_callbacks=None): FLAGS.train_batch_size, FLAGS.use_next_sentence_label, FLAGS.train_summary_interval, - custom_callbacks=custom_callbacks) + custom_callbacks=custom_callbacks, + explicit_allreduce=FLAGS.explicit_allreduce, + pre_allreduce_callbacks=[ + model_training_utils.clip_by_global_norm_callback + ], + allreduce_bytes_per_pack=FLAGS.allreduce_bytes_per_pack) def main(_): gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_param) if not FLAGS.model_dir: FLAGS.model_dir = '/tmp/bert20/' - strategy = distribution_utils.get_distribution_strategy( + # Configures cluster spec for multi-worker distribution strategy. + if FLAGS.num_gpus > 0: + _ = distribute_utils.configure_cluster(FLAGS.worker_hosts, FLAGS.task_index) + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=FLAGS.distribution_strategy, num_gpus=FLAGS.num_gpus, + all_reduce_alg=FLAGS.all_reduce_alg, tpu_address=FLAGS.tpu) if strategy: print('***** Number of cores used : ', strategy.num_replicas_in_sync) diff --git a/official/nlp/bert/run_squad.py b/official/nlp/bert/run_squad.py index b12925cfaad2337c28483325c5f942df651add62..8cafb917620abe6d969fecb563c3794bc78afc00 100644 --- a/official/nlp/bert/run_squad.py +++ b/official/nlp/bert/run_squad.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,28 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Run BERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Run BERT on SQuAD 1.1 and SQuAD 2.0 in TF 2.x.""" import json import os import time +# Import libraries from absl import app from absl import flags from absl import logging import gin import tensorflow as tf - +from official.common import distribute_utils from official.nlp.bert import configs as bert_configs from official.nlp.bert import run_squad_helper from official.nlp.bert import tokenization from official.nlp.data import squad_lib as squad_lib_wp -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils @@ -104,9 +100,8 @@ def main(_): # Configures cluster spec for multi-worker distribution strategy. if FLAGS.num_gpus > 0: - _ = distribution_utils.configure_cluster(FLAGS.worker_hosts, - FLAGS.task_index) - strategy = distribution_utils.get_distribution_strategy( + _ = distribute_utils.configure_cluster(FLAGS.worker_hosts, FLAGS.task_index) + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=FLAGS.distribution_strategy, num_gpus=FLAGS.num_gpus, all_reduce_alg=FLAGS.all_reduce_alg, diff --git a/official/nlp/bert/run_squad_helper.py b/official/nlp/bert/run_squad_helper.py index b03e356d91bdf6a9edf9486f505526852c6c7ef6..8736de9b053314fd77733a653131cd8179f1fa54 100644 --- a/official/nlp/bert/run_squad_helper.py +++ b/official/nlp/bert/run_squad_helper.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Library for running BERT family models on SQuAD 1.1/2.0 in TF 2.x.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import collections import json import os + from absl import flags from absl import logging import tensorflow as tf @@ -39,10 +37,10 @@ from official.utils.misc import keras_utils def define_common_squad_flags(): """Defines common flags used by SQuAD tasks.""" flags.DEFINE_enum( - 'mode', 'train_and_eval', - ['train_and_eval', 'train_and_predict', - 'train', 'eval', 'predict', 'export_only'], - 'One of {"train_and_eval", "train_and_predict", ' + 'mode', 'train_and_eval', [ + 'train_and_eval', 'train_and_predict', 'train', 'eval', 'predict', + 'export_only' + ], 'One of {"train_and_eval", "train_and_predict", ' '"train", "eval", "predict", "export_only"}. ' '`train_and_eval`: train & predict to json files & compute eval metrics. ' '`train_and_predict`: train & predict to json files. ' @@ -60,12 +58,12 @@ def define_common_squad_flags(): # Model training specific flags. flags.DEFINE_integer('train_batch_size', 32, 'Total batch size for training.') # Predict processing related. - flags.DEFINE_string('predict_file', None, - 'SQuAD prediction json file path. ' - '`predict` mode supports multiple files: one can use ' - 'wildcard to specify multiple files and it can also be ' - 'multiple file patterns separated by comma. Note that ' - '`eval` mode only supports a single predict file.') + flags.DEFINE_string( + 'predict_file', None, 'SQuAD prediction json file path. ' + '`predict` mode supports multiple files: one can use ' + 'wildcard to specify multiple files and it can also be ' + 'multiple file patterns separated by comma. Note that ' + '`eval` mode only supports a single predict file.') flags.DEFINE_bool( 'do_lower_case', True, 'Whether to lower case the input text. Should be True for uncased ' @@ -97,10 +95,7 @@ def define_common_squad_flags(): FLAGS = flags.FLAGS -def squad_loss_fn(start_positions, - end_positions, - start_logits, - end_logits): +def squad_loss_fn(start_positions, end_positions, start_logits, end_logits): """Returns sparse categorical crossentropy for start/end logits.""" start_loss = tf.keras.losses.sparse_categorical_crossentropy( start_positions, start_logits, from_logits=True) @@ -118,11 +113,8 @@ def get_loss_fn(): start_positions = labels['start_positions'] end_positions = labels['end_positions'] start_logits, end_logits = model_outputs - return squad_loss_fn( - start_positions, - end_positions, - start_logits, - end_logits) + return squad_loss_fn(start_positions, end_positions, start_logits, + end_logits) return _loss_fn @@ -168,7 +160,7 @@ def get_squad_model_to_predict(strategy, bert_config, checkpoint_path, """Gets a squad model to make predictions.""" with strategy.scope(): # Prediction always uses float32, even if training uses mixed precision. - tf.keras.mixed_precision.experimental.set_policy('float32') + tf.keras.mixed_precision.set_global_policy('float32') squad_model, _ = bert_models.squad_model( bert_config, input_meta_data['max_seq_length'], @@ -182,11 +174,8 @@ def get_squad_model_to_predict(strategy, bert_config, checkpoint_path, return squad_model -def predict_squad_customized(strategy, - input_meta_data, - predict_tfrecord_path, - num_steps, - squad_model): +def predict_squad_customized(strategy, input_meta_data, predict_tfrecord_path, + num_steps, squad_model): """Make predictions using a Bert-based squad model.""" predict_dataset_fn = get_dataset_fn( predict_tfrecord_path, @@ -194,8 +183,7 @@ def predict_squad_customized(strategy, FLAGS.predict_batch_size, is_training=False) predict_iterator = iter( - strategy.experimental_distribute_datasets_from_function( - predict_dataset_fn)) + strategy.distribute_datasets_from_function(predict_dataset_fn)) @tf.function def predict_step(iterator): @@ -259,8 +247,7 @@ def train_squad(strategy, hub_module_trainable=FLAGS.hub_module_trainable) optimizer = optimization.create_optimizer(FLAGS.learning_rate, steps_per_epoch * epochs, - warmup_steps, - FLAGS.end_lr, + warmup_steps, FLAGS.end_lr, FLAGS.optimizer_type) squad_model.optimizer = performance.configure_optimizer( @@ -269,15 +256,12 @@ def train_squad(strategy, use_graph_rewrite=common_flags.use_graph_rewrite()) return squad_model, core_model - # If explicit_allreduce = True, apply_gradients() no longer implicitly - # allreduce gradients, users manually allreduce gradient and pass the - # allreduced grads_and_vars to apply_gradients(). clip_by_global_norm will be - # applied to allreduced gradients. - def clip_by_global_norm_callback(grads_and_vars): - grads, variables = zip(*grads_and_vars) - (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) - return zip(clipped_grads, variables) - + # Only when explicit_allreduce = True, post_allreduce_callbacks and + # allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no + # longer implicitly allreduce gradients, users manually allreduce gradient and + # pass the allreduced grads_and_vars to apply_gradients(). + # With explicit_allreduce = True, clip_by_global_norm is moved to after + # allreduce. model_training_utils.run_customized_training_loop( strategy=strategy, model_fn=_get_squad_model, @@ -291,8 +275,11 @@ def train_squad(strategy, sub_model_export_name=sub_model_export_name, run_eagerly=run_eagerly, custom_callbacks=custom_callbacks, - explicit_allreduce=False, - post_allreduce_callbacks=[clip_by_global_norm_callback]) + explicit_allreduce=FLAGS.explicit_allreduce, + pre_allreduce_callbacks=[ + model_training_utils.clip_by_global_norm_callback + ], + allreduce_bytes_per_pack=FLAGS.allreduce_bytes_per_pack) def prediction_output_squad(strategy, input_meta_data, tokenizer, squad_lib, @@ -344,8 +331,9 @@ def prediction_output_squad(strategy, input_meta_data, tokenizer, squad_lib, logging.info(' Batch size = %d', FLAGS.predict_batch_size) num_steps = int(dataset_size / FLAGS.predict_batch_size) - all_results = predict_squad_customized( - strategy, input_meta_data, eval_writer.filename, num_steps, squad_model) + all_results = predict_squad_customized(strategy, input_meta_data, + eval_writer.filename, num_steps, + squad_model) all_predictions, all_nbest_json, scores_diff_json = ( squad_lib.postprocess_output( @@ -362,8 +350,12 @@ def prediction_output_squad(strategy, input_meta_data, tokenizer, squad_lib, return all_predictions, all_nbest_json, scores_diff_json -def dump_to_files(all_predictions, all_nbest_json, scores_diff_json, - squad_lib, version_2_with_negative, file_prefix=''): +def dump_to_files(all_predictions, + all_nbest_json, + scores_diff_json, + squad_lib, + version_2_with_negative, + file_prefix=''): """Save output to json files.""" output_prediction_file = os.path.join(FLAGS.model_dir, '%spredictions.json' % file_prefix) @@ -452,8 +444,7 @@ def eval_squad(strategy, dataset_json = json.load(reader) pred_dataset = dataset_json['data'] if input_meta_data.get('version_2_with_negative', False): - eval_metrics = squad_evaluate_v2_0.evaluate(pred_dataset, - all_predictions, + eval_metrics = squad_evaluate_v2_0.evaluate(pred_dataset, all_predictions, scores_diff_json) else: eval_metrics = squad_evaluate_v1_1.evaluate(pred_dataset, all_predictions) @@ -474,7 +465,7 @@ def export_squad(model_export_path, input_meta_data, bert_config): if not model_export_path: raise ValueError('Export path is not specified: %s' % model_export_path) # Export uses float32 for now, even if training uses mixed precision. - tf.keras.mixed_precision.experimental.set_policy('float32') + tf.keras.mixed_precision.set_global_policy('float32') squad_model, _ = bert_models.squad_model(bert_config, input_meta_data['max_seq_length']) model_saving_utils.export_bert_model( diff --git a/official/nlp/bert/serving.py b/official/nlp/bert/serving.py index 895f61dc37adf40d93ea347817abbb18966e157e..7e27869c74b30ae5ce1a8a9b75760d0d8013640a 100644 --- a/official/nlp/bert/serving.py +++ b/official/nlp/bert/serving.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Examples of SavedModel export for tf-serving.""" from absl import app @@ -22,11 +21,11 @@ import tensorflow as tf from official.nlp.bert import bert_models from official.nlp.bert import configs -flags.DEFINE_integer("sequence_length", None, - "Sequence length to parse the tf.Example. If " - "sequence_length > 0, add a signature for serialized " - "tf.Example and define the parsing specification by the " - "sequence_length.") +flags.DEFINE_integer( + "sequence_length", None, "Sequence length to parse the tf.Example. If " + "sequence_length > 0, add a signature for serialized " + "tf.Example and define the parsing specification by the " + "sequence_length.") flags.DEFINE_string("bert_config_file", None, "Bert configuration file to define core bert layers.") flags.DEFINE_string("model_checkpoint_path", None, diff --git a/official/nlp/bert/squad_evaluate_v1_1.py b/official/nlp/bert/squad_evaluate_v1_1.py index c7f4f4de66813cb4fbdc59cc716911fac064f0c9..a39f571c37b002ab10cfe36a1454827d91512945 100644 --- a/official/nlp/bert/squad_evaluate_v1_1.py +++ b/official/nlp/bert/squad_evaluate_v1_1.py @@ -1,4 +1,5 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Evaluation of SQuAD predictions (version 1.1). The functions are copied from @@ -22,15 +23,12 @@ Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, Percy Liang https://nlp.stanford.edu/pubs/rajpurkar2016squad.pdf """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import collections import re import string # pylint: disable=g-bad-import-order + from absl import logging # pylint: enable=g-bad-import-order diff --git a/official/nlp/bert/squad_evaluate_v2_0.py b/official/nlp/bert/squad_evaluate_v2_0.py index 54fb84e993c3459ffdd2b3d90f870e4d178ab54f..12c5a7e3d6b406e45e4f91580f8b4198733db37c 100644 --- a/official/nlp/bert/squad_evaluate_v2_0.py +++ b/official/nlp/bert/squad_evaluate_v2_0.py @@ -1,4 +1,5 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Evaluation script for SQuAD version 2.0. The functions are copied and modified from @@ -22,10 +23,6 @@ This file is expected to map question ID's to the model's predicted probability that a question is unanswerable. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import collections import re import string diff --git a/official/nlp/bert/tf1_checkpoint_converter_lib.py b/official/nlp/bert/tf1_checkpoint_converter_lib.py index 122e455210ae70cd9af04912b95a600a3d23d09a..035a694385abfede7314188e38ab6801b6fef70a 100644 --- a/official/nlp/bert/tf1_checkpoint_converter_lib.py +++ b/official/nlp/bert/tf1_checkpoint_converter_lib.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,11 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + r"""Convert checkpoints created by Estimator (tf1) to be Keras compatible.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import numpy as np import tensorflow.compat.v1 as tf # TF 1.x @@ -53,6 +50,7 @@ BERT_V2_NAME_REPLACEMENTS = ( ("output/dense", "output"), ("output/LayerNorm", "output_layer_norm"), ("pooler/dense", "pooler_transform"), + ("cls/predictions", "bert/cls/predictions"), ("cls/predictions/output_bias", "cls/predictions/output_bias/bias"), ("cls/seq_relationship/output_bias", "predictions/transform/logits/bias"), ("cls/seq_relationship/output_weights", @@ -111,11 +109,20 @@ def _get_new_shape(name, shape, num_heads): return None -def create_v2_checkpoint(model, src_checkpoint, output_path): +def create_v2_checkpoint(model, + src_checkpoint, + output_path, + checkpoint_model_name="model"): """Converts a name-based matched TF V1 checkpoint to TF V2 checkpoint.""" # Uses streaming-restore in eager model to read V1 name-based checkpoints. model.load_weights(src_checkpoint).assert_existing_objects_matched() - checkpoint = tf.train.Checkpoint(model=model) + if hasattr(model, "checkpoint_items"): + checkpoint_items = model.checkpoint_items + else: + checkpoint_items = {} + + checkpoint_items[checkpoint_model_name] = model + checkpoint = tf.train.Checkpoint(**checkpoint_items) checkpoint.save(output_path) @@ -164,7 +171,6 @@ def convert(checkpoint_from_path, new_shape = _get_new_shape(new_var_name, tensor.shape, num_heads) if new_shape: tf.logging.info("Veriable %s has a shape change from %s to %s", - var_name, tensor.shape, new_shape) tensor = np.reshape(tensor, new_shape) diff --git a/official/nlp/bert/tf2_encoder_checkpoint_converter.py b/official/nlp/bert/tf2_encoder_checkpoint_converter.py index 2faf6ea2cfb9f0d71d0a79dff101e0408fa41778..9fced5daee95479c28cffd2b63dffcf6f2d90408 100644 --- a/official/nlp/bert/tf2_encoder_checkpoint_converter.py +++ b/official/nlp/bert/tf2_encoder_checkpoint_converter.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """A converter from a V1 BERT encoder checkpoint to a V2 encoder checkpoint. The conversion will yield an object-oriented checkpoint that can be used -to restore a TransformerEncoder object. +to restore a BertEncoder or BertPretrainerV2 object (see the `converted_model` +FLAG below). """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os @@ -27,9 +25,10 @@ from absl import app from absl import flags import tensorflow as tf -from official.modeling import activations +from official.modeling import tf_utils from official.nlp.bert import configs from official.nlp.bert import tf1_checkpoint_converter_lib +from official.nlp.modeling import models from official.nlp.modeling import networks FLAGS = flags.FLAGS @@ -42,6 +41,14 @@ flags.DEFINE_string( "BertModel, with no task heads.)") flags.DEFINE_string("converted_checkpoint_path", None, "Name for the created object-based V2 checkpoint.") +flags.DEFINE_string("checkpoint_model_name", "encoder", + "The name of the model when saving the checkpoint, i.e., " + "the checkpoint will be saved using: " + "tf.train.Checkpoint(FLAGS.checkpoint_model_name=model).") +flags.DEFINE_enum( + "converted_model", "encoder", ["encoder", "pretrainer"], + "Whether to convert the checkpoint to a `BertEncoder` model or a " + "`BertPretrainerV2` model (with mlm but without classification heads).") def _create_bert_model(cfg): @@ -49,19 +56,20 @@ def _create_bert_model(cfg): Args: cfg: A `BertConfig` to create the core model. + Returns: - A TransformerEncoder netowork. + A BertEncoder network. """ - bert_encoder = networks.TransformerEncoder( + bert_encoder = networks.BertEncoder( vocab_size=cfg.vocab_size, hidden_size=cfg.hidden_size, num_layers=cfg.num_hidden_layers, num_attention_heads=cfg.num_attention_heads, intermediate_size=cfg.intermediate_size, - activation=activations.gelu, + activation=tf_utils.get_activation(cfg.hidden_act), dropout_rate=cfg.hidden_dropout_prob, attention_dropout_rate=cfg.attention_probs_dropout_prob, - sequence_length=cfg.max_position_embeddings, + max_sequence_length=cfg.max_position_embeddings, type_vocab_size=cfg.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( stddev=cfg.initializer_range), @@ -70,13 +78,39 @@ def _create_bert_model(cfg): return bert_encoder -def convert_checkpoint(bert_config, output_path, v1_checkpoint): +def _create_bert_pretrainer_model(cfg): + """Creates a BERT keras core model from BERT configuration. + + Args: + cfg: A `BertConfig` to create the core model. + + Returns: + A BertPretrainerV2 model. + """ + bert_encoder = _create_bert_model(cfg) + pretrainer = models.BertPretrainerV2( + encoder_network=bert_encoder, + mlm_activation=tf_utils.get_activation(cfg.hidden_act), + mlm_initializer=tf.keras.initializers.TruncatedNormal( + stddev=cfg.initializer_range)) + # Makes sure the pretrainer variables are created. + _ = pretrainer(pretrainer.inputs) + return pretrainer + + +def convert_checkpoint(bert_config, + output_path, + v1_checkpoint, + checkpoint_model_name="model", + converted_model="encoder"): """Converts a V1 checkpoint into an OO V2 checkpoint.""" output_dir, _ = os.path.split(output_path) + tf.io.gfile.makedirs(output_dir) # Create a temporary V1 name-converted checkpoint in the output directory. temporary_checkpoint_dir = os.path.join(output_dir, "temp_v1") temporary_checkpoint = os.path.join(temporary_checkpoint_dir, "ckpt") + tf1_checkpoint_converter_lib.convert( checkpoint_from_path=v1_checkpoint, checkpoint_to_path=temporary_checkpoint, @@ -85,10 +119,17 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint): permutations=tf1_checkpoint_converter_lib.BERT_V2_PERMUTATIONS, exclude_patterns=["adam", "Adam"]) + if converted_model == "encoder": + model = _create_bert_model(bert_config) + elif converted_model == "pretrainer": + model = _create_bert_pretrainer_model(bert_config) + else: + raise ValueError("Unsupported converted_model: %s" % converted_model) + # Create a V2 checkpoint from the temporary checkpoint. - model = _create_bert_model(bert_config) tf1_checkpoint_converter_lib.create_v2_checkpoint(model, temporary_checkpoint, - output_path) + output_path, + checkpoint_model_name) # Clean up the temporary checkpoint, if it exists. try: @@ -98,11 +139,21 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint): pass -def main(_): +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + output_path = FLAGS.converted_checkpoint_path v1_checkpoint = FLAGS.checkpoint_to_convert + checkpoint_model_name = FLAGS.checkpoint_model_name + converted_model = FLAGS.converted_model bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) - convert_checkpoint(bert_config, output_path, v1_checkpoint) + convert_checkpoint( + bert_config=bert_config, + output_path=output_path, + v1_checkpoint=v1_checkpoint, + checkpoint_model_name=checkpoint_model_name, + converted_model=converted_model) if __name__ == "__main__": diff --git a/official/nlp/bert/tokenization.py b/official/nlp/bert/tokenization.py index b0f7e27e320c727c4eee511fc63ebb63929250c7..ea1546e3c29f33c593c64a4341366254da328b86 100644 --- a/official/nlp/bert/tokenization.py +++ b/official/nlp/bert/tokenization.py @@ -1,5 +1,4 @@ -# coding=utf-8 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,17 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# coding=utf-8 """Tokenization classes implementation. The file is forked from: https://github.com/google-research/bert/blob/master/tokenization.py. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import collections import re import unicodedata @@ -421,7 +417,7 @@ def preprocess_text(inputs, remove_space=True, lower=False): """Preprocesses data by removing extra space and normalize data. This method is used together with sentence piece tokenizer and is forked from: - https://github.com/google-research/google-research/blob/master/albert/tokenization.py + https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py Args: inputs: The input text. @@ -454,7 +450,7 @@ def encode_pieces(sp_model, text, sample=False): """Segements text into pieces. This method is used together with sentence piece tokenizer and is forked from: - https://github.com/google-research/google-research/blob/master/albert/tokenization.py + https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py Args: @@ -496,7 +492,7 @@ def encode_ids(sp_model, text, sample=False): """Segments text and return token ids. This method is used together with sentence piece tokenizer and is forked from: - https://github.com/google-research/google-research/blob/master/albert/tokenization.py + https://github.com/google-research/google-research/blob/e1f6fa00/albert/tokenization.py Args: sp_model: A spm.SentencePieceProcessor object. diff --git a/official/nlp/bert/tokenization_test.py b/official/nlp/bert/tokenization_test.py index 4a0503c3ed6999e3bd81aec4de8f7d64ec733bd9..07759de20b7c6eaf1a964c110da645215c10753a 100644 --- a/official/nlp/bert/tokenization_test.py +++ b/official/nlp/bert/tokenization_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import tempfile diff --git a/official/nlp/configs/__init__.py b/official/nlp/configs/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/configs/__init__.py +++ b/official/nlp/configs/__init__.py @@ -1 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/official/nlp/configs/bert.py b/official/nlp/configs/bert.py index fad49e29debd0864448b00899725b55101c8f293..cf78de0388bf76b68cd6df8cc656842bbfc90b64 100644 --- a/official/nlp/configs/bert.py +++ b/official/nlp/configs/bert.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Multi-head BERT encoder network with classification heads. Includes configurations and instantiation methods. @@ -20,13 +19,9 @@ Includes configurations and instantiation methods. from typing import List, Optional, Text import dataclasses -import tensorflow as tf -from official.modeling import tf_utils from official.modeling.hyperparams import base_config from official.nlp.configs import encoders -from official.nlp.modeling import layers -from official.nlp.modeling.models import bert_pretrainer @dataclasses.dataclass @@ -40,32 +35,9 @@ class ClsHeadConfig(base_config.Config): @dataclasses.dataclass -class BertPretrainerConfig(base_config.Config): - """BERT encoder configuration.""" - encoder: encoders.TransformerEncoderConfig = ( - encoders.TransformerEncoderConfig()) +class PretrainerConfig(base_config.Config): + """Pretrainer configuration.""" + encoder: encoders.EncoderConfig = encoders.EncoderConfig() cls_heads: List[ClsHeadConfig] = dataclasses.field(default_factory=list) - - -def instantiate_classification_heads_from_cfgs( - cls_head_configs: List[ClsHeadConfig]) -> List[layers.ClassificationHead]: - return [ - layers.ClassificationHead(**cfg.as_dict()) for cfg in cls_head_configs - ] if cls_head_configs else [] - - -def instantiate_pretrainer_from_cfg( - config: BertPretrainerConfig, - encoder_network: Optional[tf.keras.Model] = None -) -> bert_pretrainer.BertPretrainerV2: - """Instantiates a BertPretrainer from the config.""" - encoder_cfg = config.encoder - if encoder_network is None: - encoder_network = encoders.instantiate_encoder_from_cfg(encoder_cfg) - return bert_pretrainer.BertPretrainerV2( - mlm_activation=tf_utils.get_activation(encoder_cfg.hidden_activation), - mlm_initializer=tf.keras.initializers.TruncatedNormal( - stddev=encoder_cfg.initializer_range), - encoder_network=encoder_network, - classification_heads=instantiate_classification_heads_from_cfgs( - config.cls_heads)) + mlm_activation: str = "gelu" + mlm_initializer_range: float = 0.02 diff --git a/official/nlp/configs/bert_test.py b/official/nlp/configs/bert_test.py deleted file mode 100644 index 871ab45373c430667f2cf45f93492947aaa3c4e9..0000000000000000000000000000000000000000 --- a/official/nlp/configs/bert_test.py +++ /dev/null @@ -1,66 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for BERT configurations and models instantiation.""" - -import tensorflow as tf - -from official.nlp.configs import bert -from official.nlp.configs import encoders - - -class BertModelsTest(tf.test.TestCase): - - def test_network_invocation(self): - config = bert.BertPretrainerConfig( - encoder=encoders.TransformerEncoderConfig(vocab_size=10, num_layers=1)) - _ = bert.instantiate_pretrainer_from_cfg(config) - - # Invokes with classification heads. - config = bert.BertPretrainerConfig( - encoder=encoders.TransformerEncoderConfig(vocab_size=10, num_layers=1), - cls_heads=[ - bert.ClsHeadConfig( - inner_dim=10, num_classes=2, name="next_sentence") - ]) - _ = bert.instantiate_pretrainer_from_cfg(config) - - with self.assertRaises(ValueError): - config = bert.BertPretrainerConfig( - encoder=encoders.TransformerEncoderConfig( - vocab_size=10, num_layers=1), - cls_heads=[ - bert.ClsHeadConfig( - inner_dim=10, num_classes=2, name="next_sentence"), - bert.ClsHeadConfig( - inner_dim=10, num_classes=2, name="next_sentence") - ]) - _ = bert.instantiate_pretrainer_from_cfg(config) - - def test_checkpoint_items(self): - config = bert.BertPretrainerConfig( - encoder=encoders.TransformerEncoderConfig(vocab_size=10, num_layers=1), - cls_heads=[ - bert.ClsHeadConfig( - inner_dim=10, num_classes=2, name="next_sentence") - ]) - encoder = bert.instantiate_pretrainer_from_cfg(config) - self.assertSameElements( - encoder.checkpoint_items.keys(), - ["encoder", "masked_lm", "next_sentence.pooler_dense"]) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/nlp/configs/electra.py b/official/nlp/configs/electra.py index 1dae96a86ac225038aa1cbb72141e586cea687a6..5e62297667a470fd192779d8dc7f5c5117836804 100644 --- a/official/nlp/configs/electra.py +++ b/official/nlp/configs/electra.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,71 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """ELECTRA model configurations and instantiation methods.""" -from typing import List, Optional +from typing import List import dataclasses -import tensorflow as tf -from official.modeling import tf_utils from official.modeling.hyperparams import base_config from official.nlp.configs import bert from official.nlp.configs import encoders -from official.nlp.modeling import layers -from official.nlp.modeling.models import electra_pretrainer @dataclasses.dataclass -class ELECTRAPretrainerConfig(base_config.Config): +class ElectraPretrainerConfig(base_config.Config): """ELECTRA pretrainer configuration.""" num_masked_tokens: int = 76 sequence_length: int = 512 num_classes: int = 2 discriminator_loss_weight: float = 50.0 - generator_encoder: encoders.TransformerEncoderConfig = ( - encoders.TransformerEncoderConfig()) - discriminator_encoder: encoders.TransformerEncoderConfig = ( - encoders.TransformerEncoderConfig()) + tie_embeddings: bool = True + disallow_correct: bool = False + generator_encoder: encoders.EncoderConfig = encoders.EncoderConfig() + discriminator_encoder: encoders.EncoderConfig = encoders.EncoderConfig() cls_heads: List[bert.ClsHeadConfig] = dataclasses.field(default_factory=list) - - -def instantiate_classification_heads_from_cfgs( - cls_head_configs: List[bert.ClsHeadConfig] -) -> List[layers.ClassificationHead]: - if cls_head_configs: - return [ - layers.ClassificationHead(**cfg.as_dict()) for cfg in cls_head_configs - ] - else: - return [] - - -def instantiate_pretrainer_from_cfg( - config: ELECTRAPretrainerConfig, - generator_network: Optional[tf.keras.Model] = None, - discriminator_network: Optional[tf.keras.Model] = None, - ) -> electra_pretrainer.ElectraPretrainer: - """Instantiates ElectraPretrainer from the config.""" - generator_encoder_cfg = config.generator_encoder - discriminator_encoder_cfg = config.discriminator_encoder - if generator_network is None: - generator_network = encoders.instantiate_encoder_from_cfg( - generator_encoder_cfg) - if discriminator_network is None: - discriminator_network = encoders.instantiate_encoder_from_cfg( - discriminator_encoder_cfg) - return electra_pretrainer.ElectraPretrainer( - generator_network=generator_network, - discriminator_network=discriminator_network, - vocab_size=config.generator_encoder.vocab_size, - num_classes=config.num_classes, - sequence_length=config.sequence_length, - last_hidden_dim=config.generator_encoder.hidden_size, - num_token_predictions=config.num_masked_tokens, - mlm_activation=tf_utils.get_activation( - generator_encoder_cfg.hidden_activation), - mlm_initializer=tf.keras.initializers.TruncatedNormal( - stddev=generator_encoder_cfg.initializer_range), - classification_heads=instantiate_classification_heads_from_cfgs( - config.cls_heads)) diff --git a/official/nlp/configs/electra_test.py b/official/nlp/configs/electra_test.py deleted file mode 100644 index d06d64a95d6ef987cdb34a471521853001f11339..0000000000000000000000000000000000000000 --- a/official/nlp/configs/electra_test.py +++ /dev/null @@ -1,49 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for ELECTRA configurations and models instantiation.""" - -import tensorflow as tf - -from official.nlp.configs import bert -from official.nlp.configs import electra -from official.nlp.configs import encoders - - -class ELECTRAModelsTest(tf.test.TestCase): - - def test_network_invocation(self): - config = electra.ELECTRAPretrainerConfig( - generator_encoder=encoders.TransformerEncoderConfig( - vocab_size=10, num_layers=1), - discriminator_encoder=encoders.TransformerEncoderConfig( - vocab_size=10, num_layers=2), - ) - _ = electra.instantiate_pretrainer_from_cfg(config) - - # Invokes with classification heads. - config = electra.ELECTRAPretrainerConfig( - generator_encoder=encoders.TransformerEncoderConfig( - vocab_size=10, num_layers=1), - discriminator_encoder=encoders.TransformerEncoderConfig( - vocab_size=10, num_layers=2), - cls_heads=[ - bert.ClsHeadConfig( - inner_dim=10, num_classes=2, name="next_sentence") - ]) - _ = electra.instantiate_pretrainer_from_cfg(config) - -if __name__ == "__main__": - tf.test.main() diff --git a/official/nlp/configs/encoders.py b/official/nlp/configs/encoders.py index 6286af135c7b097dce79c3d8b4c5e66e1666968a..690c8b83e3846ce6a641b222ece5685e96492d8d 100644 --- a/official/nlp/configs/encoders.py +++ b/official/nlp/configs/encoders.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,22 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Transformer Encoders. -Includes configurations and instantiation methods. +Includes configurations and factory methods. """ +from typing import Optional + +from absl import logging import dataclasses import gin import tensorflow as tf +from official.modeling import hyperparams from official.modeling import tf_utils -from official.modeling.hyperparams import base_config from official.nlp.modeling import networks +from official.nlp.projects.bigbird import encoder as bigbird_encoder @dataclasses.dataclass -class TransformerEncoderConfig(base_config.Config): +class BertEncoderConfig(hyperparams.Config): """BERT encoder configuration.""" vocab_size: int = 30522 hidden_size: int = 768 @@ -40,56 +43,303 @@ class TransformerEncoderConfig(base_config.Config): max_position_embeddings: int = 512 type_vocab_size: int = 2 initializer_range: float = 0.02 + embedding_size: Optional[int] = None + output_range: Optional[int] = None + return_all_encoder_outputs: bool = False + + +@dataclasses.dataclass +class MobileBertEncoderConfig(hyperparams.Config): + """MobileBERT encoder configuration. + + Attributes: + word_vocab_size: number of words in the vocabulary. + word_embed_size: word embedding size. + type_vocab_size: number of word types. + max_sequence_length: maximum length of input sequence. + num_blocks: number of transformer block in the encoder model. + hidden_size: the hidden size for the transformer block. + num_attention_heads: number of attention heads in the transformer block. + intermediate_size: the size of the "intermediate" (a.k.a., feed forward) + layer. + hidden_activation: the non-linear activation function to apply to the + output of the intermediate/feed-forward layer. + hidden_dropout_prob: dropout probability for the hidden layers. + attention_probs_dropout_prob: dropout probability of the attention + probabilities. + intra_bottleneck_size: the size of bottleneck. + initializer_range: The stddev of the truncated_normal_initializer for + initializing all weight matrices. + use_bottleneck_attention: Use attention inputs from the bottleneck + transformation. If true, the following `key_query_shared_bottleneck` + will be ignored. + key_query_shared_bottleneck: whether to share linear transformation for keys + and queries. + num_feedforward_networks: number of stacked feed-forward networks. + normalization_type: the type of normalization_type, only 'no_norm' and + 'layer_norm' are supported. 'no_norm' represents the element-wise linear + transformation for the student model, as suggested by the original + MobileBERT paper. 'layer_norm' is used for the teacher model. + classifier_activation: if using the tanh activation for the final + representation of the [CLS] token in fine-tuning. + """ + word_vocab_size: int = 30522 + word_embed_size: int = 128 + type_vocab_size: int = 2 + max_sequence_length: int = 512 + num_blocks: int = 24 + hidden_size: int = 512 + num_attention_heads: int = 4 + intermediate_size: int = 4096 + hidden_activation: str = "gelu" + hidden_dropout_prob: float = 0.1 + attention_probs_dropout_prob: float = 0.1 + intra_bottleneck_size: int = 1024 + initializer_range: float = 0.02 + use_bottleneck_attention: bool = False + key_query_shared_bottleneck: bool = False + num_feedforward_networks: int = 1 + normalization_type: str = "layer_norm" + classifier_activation: bool = True + input_mask_dtype: str = "int32" + + +@dataclasses.dataclass +class AlbertEncoderConfig(hyperparams.Config): + """ALBERT encoder configuration.""" + vocab_size: int = 30000 + embedding_width: int = 128 + hidden_size: int = 768 + num_layers: int = 12 + num_attention_heads: int = 12 + hidden_activation: str = "gelu" + intermediate_size: int = 3072 + dropout_rate: float = 0.0 + attention_dropout_rate: float = 0.0 + max_position_embeddings: int = 512 + type_vocab_size: int = 2 + initializer_range: float = 0.02 + + +@dataclasses.dataclass +class BigBirdEncoderConfig(hyperparams.Config): + """BigBird encoder configuration.""" + vocab_size: int = 50358 + hidden_size: int = 768 + num_layers: int = 12 + num_attention_heads: int = 12 + hidden_activation: str = "gelu" + intermediate_size: int = 3072 + dropout_rate: float = 0.1 + attention_dropout_rate: float = 0.1 + max_position_embeddings: int = 4096 + num_rand_blocks: int = 3 + block_size: int = 64 + type_vocab_size: int = 16 + initializer_range: float = 0.02 + embedding_width: Optional[int] = None + use_gradient_checkpointing: bool = False + + +@dataclasses.dataclass +class XLNetEncoderConfig(hyperparams.Config): + """XLNet encoder configuration.""" + vocab_size: int = 32000 + num_layers: int = 24 + hidden_size: int = 1024 + num_attention_heads: int = 16 + head_size: int = 64 + inner_size: int = 4096 + inner_activation: str = "gelu" + dropout_rate: float = 0.1 + attention_dropout_rate: float = 0.1 + attention_type: str = "bi" + bi_data: bool = False + tie_attention_biases: bool = False + memory_length: int = 0 + same_length: bool = False + clamp_length: int = -1 + reuse_length: int = 0 + use_cls_mask: bool = False + embedding_width: int = 1024 + initializer_range: float = 0.02 + two_stream: bool = False + + +@dataclasses.dataclass +class EncoderConfig(hyperparams.OneOfConfig): + """Encoder configuration.""" + type: Optional[str] = "bert" + albert: AlbertEncoderConfig = AlbertEncoderConfig() + bert: BertEncoderConfig = BertEncoderConfig() + bigbird: BigBirdEncoderConfig = BigBirdEncoderConfig() + mobilebert: MobileBertEncoderConfig = MobileBertEncoderConfig() + xlnet: XLNetEncoderConfig = XLNetEncoderConfig() + + +ENCODER_CLS = { + "bert": networks.BertEncoder, + "mobilebert": networks.MobileBERTEncoder, + "albert": networks.AlbertEncoder, + "bigbird": bigbird_encoder.BigBirdEncoder, + "xlnet": networks.XLNetBase, +} @gin.configurable -def instantiate_encoder_from_cfg(config: TransformerEncoderConfig, - encoder_cls=networks.TransformerEncoder): - """Instantiate a Transformer encoder network from TransformerEncoderConfig.""" +def build_encoder(config: EncoderConfig, + embedding_layer: Optional[tf.keras.layers.Layer] = None, + encoder_cls=None, + bypass_config: bool = False): + """Instantiate a Transformer encoder network from EncoderConfig. + + Args: + config: the one-of encoder config, which provides encoder parameters of a + chosen encoder. + embedding_layer: an external embedding layer passed to the encoder. + encoder_cls: an external encoder cls not included in the supported encoders, + usually used by gin.configurable. + bypass_config: whether to ignore config instance to create the object with + `encoder_cls`. + + Returns: + An encoder instance. + """ + encoder_type = config.type + encoder_cfg = config.get() + encoder_cls = encoder_cls or ENCODER_CLS[encoder_type] + logging.info("Encoder class: %s to build...", encoder_cls.__name__) + if bypass_config: + return encoder_cls() if encoder_cls.__name__ == "EncoderScaffold": embedding_cfg = dict( - vocab_size=config.vocab_size, - type_vocab_size=config.type_vocab_size, - hidden_size=config.hidden_size, - seq_length=None, - max_seq_length=config.max_position_embeddings, + vocab_size=encoder_cfg.vocab_size, + type_vocab_size=encoder_cfg.type_vocab_size, + hidden_size=encoder_cfg.hidden_size, + max_seq_length=encoder_cfg.max_position_embeddings, initializer=tf.keras.initializers.TruncatedNormal( - stddev=config.initializer_range), - dropout_rate=config.dropout_rate, + stddev=encoder_cfg.initializer_range), + dropout_rate=encoder_cfg.dropout_rate, ) hidden_cfg = dict( - num_attention_heads=config.num_attention_heads, - intermediate_size=config.intermediate_size, + num_attention_heads=encoder_cfg.num_attention_heads, + intermediate_size=encoder_cfg.intermediate_size, intermediate_activation=tf_utils.get_activation( - config.hidden_activation), - dropout_rate=config.dropout_rate, - attention_dropout_rate=config.attention_dropout_rate, + encoder_cfg.hidden_activation), + dropout_rate=encoder_cfg.dropout_rate, + attention_dropout_rate=encoder_cfg.attention_dropout_rate, kernel_initializer=tf.keras.initializers.TruncatedNormal( - stddev=config.initializer_range), + stddev=encoder_cfg.initializer_range), ) kwargs = dict( embedding_cfg=embedding_cfg, hidden_cfg=hidden_cfg, - num_hidden_instances=config.num_layers, - pooled_output_dim=config.hidden_size, + num_hidden_instances=encoder_cfg.num_layers, + pooled_output_dim=encoder_cfg.hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( - stddev=config.initializer_range)) + stddev=encoder_cfg.initializer_range), + return_all_layer_outputs=encoder_cfg.return_all_encoder_outputs, + dict_outputs=True) return encoder_cls(**kwargs) - if encoder_cls.__name__ != "TransformerEncoder": - raise ValueError("Unknown encoder network class. %s" % str(encoder_cls)) - encoder_network = encoder_cls( - vocab_size=config.vocab_size, - hidden_size=config.hidden_size, - num_layers=config.num_layers, - num_attention_heads=config.num_attention_heads, - intermediate_size=config.intermediate_size, - activation=tf_utils.get_activation(config.hidden_activation), - dropout_rate=config.dropout_rate, - attention_dropout_rate=config.attention_dropout_rate, - sequence_length=None, - max_sequence_length=config.max_position_embeddings, - type_vocab_size=config.type_vocab_size, + if encoder_type == "mobilebert": + return encoder_cls( + word_vocab_size=encoder_cfg.word_vocab_size, + word_embed_size=encoder_cfg.word_embed_size, + type_vocab_size=encoder_cfg.type_vocab_size, + max_sequence_length=encoder_cfg.max_sequence_length, + num_blocks=encoder_cfg.num_blocks, + hidden_size=encoder_cfg.hidden_size, + num_attention_heads=encoder_cfg.num_attention_heads, + intermediate_size=encoder_cfg.intermediate_size, + intermediate_act_fn=encoder_cfg.hidden_activation, + hidden_dropout_prob=encoder_cfg.hidden_dropout_prob, + attention_probs_dropout_prob=encoder_cfg.attention_probs_dropout_prob, + intra_bottleneck_size=encoder_cfg.intra_bottleneck_size, + initializer_range=encoder_cfg.initializer_range, + use_bottleneck_attention=encoder_cfg.use_bottleneck_attention, + key_query_shared_bottleneck=encoder_cfg.key_query_shared_bottleneck, + num_feedforward_networks=encoder_cfg.num_feedforward_networks, + normalization_type=encoder_cfg.normalization_type, + classifier_activation=encoder_cfg.classifier_activation, + input_mask_dtype=encoder_cfg.input_mask_dtype) + + if encoder_type == "albert": + return encoder_cls( + vocab_size=encoder_cfg.vocab_size, + embedding_width=encoder_cfg.embedding_width, + hidden_size=encoder_cfg.hidden_size, + num_layers=encoder_cfg.num_layers, + num_attention_heads=encoder_cfg.num_attention_heads, + max_sequence_length=encoder_cfg.max_position_embeddings, + type_vocab_size=encoder_cfg.type_vocab_size, + intermediate_size=encoder_cfg.intermediate_size, + activation=tf_utils.get_activation(encoder_cfg.hidden_activation), + dropout_rate=encoder_cfg.dropout_rate, + attention_dropout_rate=encoder_cfg.attention_dropout_rate, + initializer=tf.keras.initializers.TruncatedNormal( + stddev=encoder_cfg.initializer_range), + dict_outputs=True) + + if encoder_type == "bigbird": + return encoder_cls( + vocab_size=encoder_cfg.vocab_size, + hidden_size=encoder_cfg.hidden_size, + num_layers=encoder_cfg.num_layers, + num_attention_heads=encoder_cfg.num_attention_heads, + intermediate_size=encoder_cfg.intermediate_size, + activation=tf_utils.get_activation(encoder_cfg.hidden_activation), + dropout_rate=encoder_cfg.dropout_rate, + attention_dropout_rate=encoder_cfg.attention_dropout_rate, + num_rand_blocks=encoder_cfg.num_rand_blocks, + block_size=encoder_cfg.block_size, + max_position_embeddings=encoder_cfg.max_position_embeddings, + type_vocab_size=encoder_cfg.type_vocab_size, + initializer=tf.keras.initializers.TruncatedNormal( + stddev=encoder_cfg.initializer_range), + embedding_width=encoder_cfg.embedding_width, + use_gradient_checkpointing=encoder_cfg.use_gradient_checkpointing) + + if encoder_type == "xlnet": + return encoder_cls( + vocab_size=encoder_cfg.vocab_size, + num_layers=encoder_cfg.num_layers, + hidden_size=encoder_cfg.hidden_size, + num_attention_heads=encoder_cfg.num_attention_heads, + head_size=encoder_cfg.head_size, + inner_size=encoder_cfg.inner_size, + dropout_rate=encoder_cfg.dropout_rate, + attention_dropout_rate=encoder_cfg.attention_dropout_rate, + attention_type=encoder_cfg.attention_type, + bi_data=encoder_cfg.bi_data, + two_stream=encoder_cfg.two_stream, + tie_attention_biases=encoder_cfg.tie_attention_biases, + memory_length=encoder_cfg.memory_length, + clamp_length=encoder_cfg.clamp_length, + reuse_length=encoder_cfg.reuse_length, + inner_activation=encoder_cfg.inner_activation, + use_cls_mask=encoder_cfg.use_cls_mask, + embedding_width=encoder_cfg.embedding_width, + initializer=tf.keras.initializers.RandomNormal( + stddev=encoder_cfg.initializer_range)) + + # Uses the default BERTEncoder configuration schema to create the encoder. + # If it does not match, please add a switch branch by the encoder type. + return encoder_cls( + vocab_size=encoder_cfg.vocab_size, + hidden_size=encoder_cfg.hidden_size, + num_layers=encoder_cfg.num_layers, + num_attention_heads=encoder_cfg.num_attention_heads, + intermediate_size=encoder_cfg.intermediate_size, + activation=tf_utils.get_activation(encoder_cfg.hidden_activation), + dropout_rate=encoder_cfg.dropout_rate, + attention_dropout_rate=encoder_cfg.attention_dropout_rate, + max_sequence_length=encoder_cfg.max_position_embeddings, + type_vocab_size=encoder_cfg.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( - stddev=config.initializer_range)) - return encoder_network + stddev=encoder_cfg.initializer_range), + output_range=encoder_cfg.output_range, + embedding_width=encoder_cfg.embedding_size, + embedding_layer=embedding_layer, + return_all_encoder_outputs=encoder_cfg.return_all_encoder_outputs, + dict_outputs=True) diff --git a/official/nlp/configs/encoders_test.py b/official/nlp/configs/encoders_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1fe3b16e97846cb38c65c58c6f01d8c29cf3f246 --- /dev/null +++ b/official/nlp/configs/encoders_test.py @@ -0,0 +1,42 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.configs.encoders.""" +import os + +import tensorflow as tf + +from official.modeling import hyperparams +from official.nlp.configs import encoders + + +class EncodersTest(tf.test.TestCase): + + def test_encoder_from_yaml(self): + config = encoders.EncoderConfig( + type="bert", bert=encoders.BertEncoderConfig(num_layers=1)) + encoder = encoders.build_encoder(config) + ckpt = tf.train.Checkpoint(encoder=encoder) + ckpt_path = ckpt.save(self.get_temp_dir() + "/ckpt") + params_save_path = os.path.join(self.get_temp_dir(), "params.yaml") + hyperparams.save_params_dict_to_yaml(config, params_save_path) + + retored_cfg = encoders.EncoderConfig.from_yaml(params_save_path) + retored_encoder = encoders.build_encoder(retored_cfg) + status = tf.train.Checkpoint(encoder=retored_encoder).restore(ckpt_path) + status.assert_consumed() + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/configs/experiment_configs.py b/official/nlp/configs/experiment_configs.py new file mode 100644 index 0000000000000000000000000000000000000000..78294e656c48ea0f7425164845f03485ddd7e442 --- /dev/null +++ b/official/nlp/configs/experiment_configs.py @@ -0,0 +1,19 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Experiments definition.""" +# pylint: disable=unused-import +from official.nlp.configs import finetuning_experiments +from official.nlp.configs import pretraining_experiments +from official.nlp.configs import wmt_transformer_experiments diff --git a/official/nlp/configs/experiments/glue_mnli_matched.yaml b/official/nlp/configs/experiments/glue_mnli_matched.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29dfcb68b9c314d309239c321dde4ec4f439da1d --- /dev/null +++ b/official/nlp/configs/experiments/glue_mnli_matched.yaml @@ -0,0 +1,49 @@ +task: + hub_module_url: '' + model: + num_classes: 3 + init_checkpoint: '' + metric_type: 'accuracy' + train_data: + drop_remainder: true + global_batch_size: 32 + input_path: '' + is_training: true + seq_length: 128 + label_type: 'int' + validation_data: + drop_remainder: false + global_batch_size: 32 + input_path: '' + is_training: false + seq_length: 128 + label_type: 'int' +trainer: + checkpoint_interval: 3000 + optimizer_config: + learning_rate: + polynomial: + # 100% of train_steps. + decay_steps: 36813 + end_learning_rate: 0.0 + initial_learning_rate: 3.0e-05 + power: 1.0 + type: polynomial + optimizer: + type: adamw + warmup: + polynomial: + power: 1 + # ~10% of train_steps. + warmup_steps: 3681 + type: polynomial + steps_per_loop: 1000 + summary_interval: 1000 + # Training data size 392,702 examples, 3 epochs. + train_steps: 36813 + validation_interval: 6135 + # Eval data size = 9815 examples. + validation_steps: 307 + best_checkpoint_export_subdir: 'best_ckpt' + best_checkpoint_eval_metric: 'cls_accuracy' + best_checkpoint_metric_comp: 'higher' diff --git a/official/nlp/configs/experiments/squad_v1.yaml b/official/nlp/configs/experiments/squad_v1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a69710a58f7dfa4e044bceb73c5870701ca39189 --- /dev/null +++ b/official/nlp/configs/experiments/squad_v1.yaml @@ -0,0 +1,50 @@ +task: + hub_module_url: '' + max_answer_length: 30 + n_best_size: 20 + null_score_diff_threshold: 0.0 + init_checkpoint: '' + train_data: + drop_remainder: true + global_batch_size: 48 + input_path: '' + is_training: true + seq_length: 384 + validation_data: + do_lower_case: true + doc_stride: 128 + drop_remainder: false + global_batch_size: 48 + input_path: '' + is_training: false + query_length: 64 + seq_length: 384 + tokenization: WordPiece + version_2_with_negative: false + vocab_file: '' +trainer: + checkpoint_interval: 1000 + max_to_keep: 5 + optimizer_config: + learning_rate: + polynomial: + decay_steps: 3699 + end_learning_rate: 0.0 + initial_learning_rate: 8.0e-05 + power: 1.0 + type: polynomial + optimizer: + type: adamw + warmup: + polynomial: + power: 1 + warmup_steps: 370 + type: polynomial + steps_per_loop: 1000 + summary_interval: 1000 + train_steps: 3699 + validation_interval: 1000 + validation_steps: 226 + best_checkpoint_export_subdir: 'best_ckpt' + best_checkpoint_eval_metric: 'final_f1' + best_checkpoint_metric_comp: 'higher' diff --git a/official/nlp/configs/finetuning_experiments.py b/official/nlp/configs/finetuning_experiments.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ad27f3400b780bf2fb196080b3ef3091a26f49 --- /dev/null +++ b/official/nlp/configs/finetuning_experiments.py @@ -0,0 +1,139 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Finetuning experiment configurations.""" +# pylint: disable=g-doc-return-or-yield,line-too-long +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import optimization +from official.nlp.data import question_answering_dataloader +from official.nlp.data import sentence_prediction_dataloader +from official.nlp.data import tagging_dataloader +from official.nlp.tasks import question_answering +from official.nlp.tasks import sentence_prediction +from official.nlp.tasks import tagging + + +@exp_factory.register_config_factory('bert/sentence_prediction') +def bert_sentence_prediction() -> cfg.ExperimentConfig: + r"""BERT GLUE.""" + config = cfg.ExperimentConfig( + task=sentence_prediction.SentencePredictionConfig( + train_data=sentence_prediction_dataloader + .SentencePredictionDataConfig(), + validation_data=sentence_prediction_dataloader + .SentencePredictionDataConfig( + is_training=False, drop_remainder=False)), + trainer=cfg.TrainerConfig( + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'adamw', + 'adamw': { + 'weight_decay_rate': + 0.01, + 'exclude_from_weight_decay': + ['LayerNorm', 'layer_norm', 'bias'], + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 3e-5, + 'end_learning_rate': 0.0, + } + }, + 'warmup': { + 'type': 'polynomial' + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + config.task.model.encoder.type = 'bert' + return config + + +@exp_factory.register_config_factory('bert/squad') +def bert_squad() -> cfg.ExperimentConfig: + """BERT Squad V1/V2.""" + config = cfg.ExperimentConfig( + task=question_answering.QuestionAnsweringConfig( + train_data=question_answering_dataloader.QADataConfig(), + validation_data=question_answering_dataloader.QADataConfig()), + trainer=cfg.TrainerConfig( + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'adamw', + 'adamw': { + 'weight_decay_rate': + 0.01, + 'exclude_from_weight_decay': + ['LayerNorm', 'layer_norm', 'bias'], + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 8e-5, + 'end_learning_rate': 0.0, + } + }, + 'warmup': { + 'type': 'polynomial' + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + config.task.model.encoder.type = 'bert' + return config + + +@exp_factory.register_config_factory('bert/tagging') +def bert_tagging() -> cfg.ExperimentConfig: + """BERT tagging task.""" + config = cfg.ExperimentConfig( + task=tagging.TaggingConfig( + train_data=tagging_dataloader.TaggingDataConfig(), + validation_data=tagging_dataloader.TaggingDataConfig( + is_training=False, drop_remainder=False)), + trainer=cfg.TrainerConfig( + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'adamw', + 'adamw': { + 'weight_decay_rate': + 0.01, + 'exclude_from_weight_decay': + ['LayerNorm', 'layer_norm', 'bias'], + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 8e-5, + 'end_learning_rate': 0.0, + } + }, + 'warmup': { + 'type': 'polynomial' + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None', + ]) + return config diff --git a/official/nlp/configs/models/bert_en_uncased_base.yaml b/official/nlp/configs/models/bert_en_uncased_base.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e49bc5430ed0135aa6d981421aad623f4f1fac9 --- /dev/null +++ b/official/nlp/configs/models/bert_en_uncased_base.yaml @@ -0,0 +1,16 @@ +task: + model: + encoder: + type: bert + bert: + attention_dropout_rate: 0.1 + dropout_rate: 0.1 + hidden_activation: gelu + hidden_size: 768 + initializer_range: 0.02 + intermediate_size: 3072 + max_position_embeddings: 512 + num_attention_heads: 12 + num_layers: 12 + type_vocab_size: 2 + vocab_size: 30522 diff --git a/official/nlp/configs/pretraining_experiments.py b/official/nlp/configs/pretraining_experiments.py new file mode 100644 index 0000000000000000000000000000000000000000..1635c1e5bf4123f2b8638242cc84e93665fe6885 --- /dev/null +++ b/official/nlp/configs/pretraining_experiments.py @@ -0,0 +1,82 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pretraining experiment configurations.""" +# pylint: disable=g-doc-return-or-yield,line-too-long +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import optimization +from official.nlp.data import pretrain_dataloader +from official.nlp.data import pretrain_dynamic_dataloader +from official.nlp.tasks import masked_lm + +_TRAINER = cfg.TrainerConfig( + train_steps=1000000, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'adamw', + 'adamw': { + 'weight_decay_rate': + 0.01, + 'exclude_from_weight_decay': [ + 'LayerNorm', 'layer_norm', 'bias' + ], + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 1e-4, + 'end_learning_rate': 0.0, + } + }, + 'warmup': { + 'type': 'polynomial' + } + })) + + +@exp_factory.register_config_factory('bert/pretraining') +def bert_pretraining() -> cfg.ExperimentConfig: + """BERT pretraining experiment.""" + config = cfg.ExperimentConfig( + task=masked_lm.MaskedLMConfig( + train_data=pretrain_dataloader.BertPretrainDataConfig(), + validation_data=pretrain_dataloader.BertPretrainDataConfig( + is_training=False)), + trainer=_TRAINER, + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + return config + + +@exp_factory.register_config_factory('bert/pretraining_dynamic') +def bert_dynamic() -> cfg.ExperimentConfig: + """BERT base with dynamic input sequences. + + TPU needs to run with tf.data service with round-robin behavior. + """ + config = cfg.ExperimentConfig( + task=masked_lm.MaskedLMConfig( + train_data=pretrain_dynamic_dataloader.BertPretrainDataConfig(), + validation_data=pretrain_dataloader.BertPretrainDataConfig( + is_training=False)), + trainer=_TRAINER, + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + return config diff --git a/official/nlp/configs/wmt_transformer_experiments.py b/official/nlp/configs/wmt_transformer_experiments.py new file mode 100644 index 0000000000000000000000000000000000000000..23eed0778673dda5a6e82ee052fcf65ac64ef421 --- /dev/null +++ b/official/nlp/configs/wmt_transformer_experiments.py @@ -0,0 +1,110 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# pylint: disable=g-doc-return-or-yield,line-too-long +"""WMT translation configurations.""" + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import optimization +from official.nlp.data import wmt_dataloader +from official.nlp.tasks import translation + + +@exp_factory.register_config_factory('wmt_transformer/large') +def wmt_transformer_large() -> cfg.ExperimentConfig: + """WMT Transformer Large. + + Please refer to + tensorflow_models/official/nlp/data/train_sentencepiece.py + to generate sentencepiece_model + and pass + --params_override=task.sentencepiece_model_path='YOUR_PATH' + to the train script. + """ + learning_rate = 2.0 + hidden_size = 1024 + learning_rate *= (hidden_size**-0.5) + warmup_steps = 16000 + train_steps = 300000 + token_batch_size = 24576 + encdecoder = translation.EncDecoder( + num_attention_heads=16, intermediate_size=hidden_size * 4) + config = cfg.ExperimentConfig( + task=translation.TranslationConfig( + model=translation.ModelConfig( + encoder=encdecoder, + decoder=encdecoder, + embedding_width=hidden_size, + padded_decode=True, + decode_max_length=100), + train_data=wmt_dataloader.WMTDataConfig( + tfds_name='wmt14_translate/de-en', + tfds_split='train', + src_lang='en', + tgt_lang='de', + is_training=True, + global_batch_size=token_batch_size, + static_batch=True, + max_seq_length=64 + ), + validation_data=wmt_dataloader.WMTDataConfig( + tfds_name='wmt14_translate/de-en', + tfds_split='test', + src_lang='en', + tgt_lang='de', + is_training=False, + global_batch_size=32, + static_batch=True, + max_seq_length=100, + ), + sentencepiece_model_path=None, + ), + trainer=cfg.TrainerConfig( + train_steps=train_steps, + validation_steps=-1, + steps_per_loop=1000, + summary_interval=1000, + checkpoint_interval=5000, + validation_interval=5000, + max_to_keep=1, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'adam', + 'adam': { + 'beta_2': 0.997, + 'epsilon': 1e-9, + }, + }, + 'learning_rate': { + 'type': 'power', + 'power': { + 'initial_learning_rate': learning_rate, + 'power': -0.5, + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': warmup_steps, + 'warmup_learning_rate': 0.0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.sentencepiece_model_path != None', + ]) + return config diff --git a/official/nlp/continuous_finetune_lib.py b/official/nlp/continuous_finetune_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..996547138c08023306e453dfb4b3a9e13c766ae3 --- /dev/null +++ b/official/nlp/continuous_finetune_lib.py @@ -0,0 +1,215 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFM continuous finetuning+eval training driver library.""" +import gc +import os +import time +from typing import Any, Mapping, Optional + +from absl import logging +import tensorflow as tf + +from official.common import distribute_utils +from official.core import config_definitions +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance +from official.modeling.multitask import configs +from official.modeling.multitask import multitask +from official.modeling.multitask import train_lib as multitask_train_lib + + +def _flatten_dict(xs): + """Flatten a nested dictionary. + + The nested keys are flattened to a tuple. + + Example:: + + xs = {'foo': 1, 'bar': {'a': 2, 'b': {}}} + flat_xs = flatten_dict(xs) + print(flat_xs) + # { + # ('foo',): 1, + # ('bar', 'a'): 2, + # } + + Note that empty dictionaries are ignored and + will not be restored by `unflatten_dict`. + + Args: + xs: a nested dictionary + + Returns: + The flattened dictionary. + """ + assert isinstance(xs, dict), 'input is not a dict' + + def _flatten(xs, prefix): + if not isinstance(xs, dict): + return {prefix: xs} + result = {} + for key, value in xs.items(): + path = prefix + (key,) + result.update(_flatten(value, path)) + return result + + return _flatten(xs, ()) + + +def run_continuous_finetune( + mode: str, + params: config_definitions.ExperimentConfig, + model_dir: str, + run_post_eval: bool = False, + pretrain_steps: Optional[int] = None, +) -> Mapping[str, Any]: + """Run modes with continuous training. + + Currently only supports continuous_train_and_eval. + + Args: + mode: A 'str', specifying the mode. continuous_train_and_eval - monitors a + checkpoint directory. Once a new checkpoint is discovered, loads the + checkpoint, finetune the model by training it (probably on another dataset + or with another task), then evaluate the finetuned model. + params: ExperimentConfig instance. + model_dir: A 'str', a path to store model checkpoints and summaries. + run_post_eval: Whether to run post eval once after training, metrics logs + are returned. + pretrain_steps: Optional, the number of total training steps for the + pretraining job. + + Returns: + eval logs: returns eval metrics logs when run_post_eval is set to True, + othewise, returns {}. + """ + + assert mode == 'continuous_train_and_eval', ( + 'Only continuous_train_and_eval is supported by continuous_finetune. ' + 'Got mode: {}'.format(mode)) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + + retry_times = 0 + while not tf.io.gfile.isdir(params.task.init_checkpoint): + # Wait for the init_checkpoint directory to be created. + if retry_times >= 60: + raise ValueError( + 'ExperimentConfig.task.init_checkpoint must be a directory for ' + 'continuous_train_and_eval mode.') + retry_times += 1 + time.sleep(60) + + summary_writer = tf.summary.create_file_writer( + os.path.join(model_dir, 'eval')) + + global_step = 0 + + def timeout_fn(): + if pretrain_steps and global_step < pretrain_steps: + # Keeps waiting for another timeout period. + logging.info( + 'Continue waiting for new checkpoint as current pretrain ' + 'global_step=%d and target is %d.', global_step, pretrain_steps) + return False + # Quits the loop. + return True + + for pretrain_ckpt in tf.train.checkpoints_iterator( + checkpoint_dir=params.task.init_checkpoint, + min_interval_secs=10, + timeout=params.trainer.continuous_eval_timeout, + timeout_fn=timeout_fn): + + # If there are checkpoints, they might be the finetune checkpoint of a + # different pretrained checkpoint. So we just remove all checkpoints. + train_utils.remove_ckpts(model_dir) + + with distribution_strategy.scope(): + global_step = train_utils.read_global_step_from_checkpoint(pretrain_ckpt) + # Replaces params.task.init_checkpoint to make sure that we load + # exactly this pretrain checkpoint. + if params.trainer.best_checkpoint_export_subdir: + best_ckpt_subdir = '{}_{}'.format( + params.trainer.best_checkpoint_export_subdir, global_step) + params_replaced = params.replace( + task={'init_checkpoint': pretrain_ckpt}, + trainer={'best_checkpoint_export_subdir': best_ckpt_subdir}) + else: + params_replaced = params.replace(task={'init_checkpoint': pretrain_ckpt}) + params_replaced.lock() + logging.info('Running finetuning with params: %s', params_replaced) + + with distribution_strategy.scope(): + if isinstance(params, configs.MultiEvalExperimentConfig): + task = task_factory.get_task(params_replaced.task) + eval_tasks = multitask.MultiTask.from_config(params_replaced.eval_tasks) + (_, + eval_metrics) = multitask_train_lib.run_experiment_with_multitask_eval( + distribution_strategy=distribution_strategy, + train_task=task, + eval_tasks=eval_tasks, + mode='train_and_eval', + params=params_replaced, + model_dir=model_dir, + run_post_eval=True, + save_summary=False) + else: + task = task_factory.get_task( + params_replaced.task, logging_dir=model_dir) + _, eval_metrics = train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode='train_and_eval', + params=params_replaced, + model_dir=model_dir, + run_post_eval=True, + save_summary=False) + logging.info('Evaluation finished. Pretrain global_step: %d', global_step) + train_utils.write_json_summary(model_dir, global_step, eval_metrics) + + if not os.path.basename(model_dir): # if model_dir.endswith('/') + summary_grp = os.path.dirname(model_dir) + '_' + task.name + else: + summary_grp = os.path.basename(model_dir) + '_' + task.name + summaries = {} + for name, value in _flatten_dict(eval_metrics).items(): + summaries[summary_grp + '/' + '-'.join(name)] = value + train_utils.write_summary(summary_writer, global_step, summaries) + + train_utils.remove_ckpts(model_dir) + # In TF2, the resource life cycle is bound with the python object life + # cycle. Force trigger python garbage collection here so those resources + # can be deallocated in time, so it doesn't cause OOM when allocating new + # objects. + # TODO(b/169178664): Fix cycle reference in Keras model and revisit to see + # if we need gc here. + gc.collect() + + if run_post_eval: + return eval_metrics + return {} diff --git a/official/nlp/continuous_finetune_lib_test.py b/official/nlp/continuous_finetune_lib_test.py new file mode 100644 index 0000000000000000000000000000000000000000..08ee381dce133d73e18e697b938cab92d04f2ff0 --- /dev/null +++ b/official/nlp/continuous_finetune_lib_test.py @@ -0,0 +1,98 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from absl import flags +from absl.testing import flagsaver +from absl.testing import parameterized +import tensorflow as tf + +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.nlp import continuous_finetune_lib + +FLAGS = flags.FLAGS + +tfm_flags.define_flags() + + +class ContinuousFinetuneTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super().setUp() + self._model_dir = os.path.join(self.get_temp_dir(), 'model_dir') + + def testContinuousFinetune(self): + pretrain_steps = 1 + src_model_dir = self.get_temp_dir() + flags_dict = dict( + experiment='mock', + mode='continuous_train_and_eval', + model_dir=self._model_dir, + params_override={ + 'task': { + 'init_checkpoint': src_model_dir, + }, + 'trainer': { + 'continuous_eval_timeout': 1, + 'steps_per_loop': 1, + 'train_steps': 1, + 'validation_steps': 1, + 'best_checkpoint_export_subdir': 'best_ckpt', + 'best_checkpoint_eval_metric': 'acc', + 'optimizer_config': { + 'optimizer': { + 'type': 'sgd' + }, + 'learning_rate': { + 'type': 'constant' + } + } + } + }) + + with flagsaver.flagsaver(**flags_dict): + # Train and save some checkpoints. + params = train_utils.parse_configuration(flags.FLAGS) + distribution_strategy = tf.distribute.get_strategy() + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=src_model_dir) + _ = train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode='train', + params=params, + model_dir=src_model_dir) + + params = train_utils.parse_configuration(FLAGS) + eval_metrics = continuous_finetune_lib.run_continuous_finetune( + FLAGS.mode, + params, + FLAGS.model_dir, + run_post_eval=True, + pretrain_steps=pretrain_steps) + self.assertIn('best_acc', eval_metrics) + + self.assertFalse( + tf.io.gfile.exists(os.path.join(FLAGS.model_dir, 'checkpoint'))) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/data/__init__.py b/official/nlp/data/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/data/__init__.py +++ b/official/nlp/data/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/data/classifier_data_lib.py b/official/nlp/data/classifier_data_lib.py index 0c882d9c09f916b6e04b96d2d325884384860923..222485a9f4f64115ea7cc3732a9a4fbcced8b3c6 100644 --- a/official/nlp/data/classifier_data_lib.py +++ b/official/nlp/data/classifier_data_lib.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,16 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""BERT library to process data for classification task.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""BERT library to process data for classification task.""" import collections import csv import importlib +import json import os from absl import logging @@ -39,7 +36,7 @@ class InputExample(object): text_b=None, label=None, weight=None, - int_iden=None): + example_id=None): """Constructs a InputExample. Args: @@ -53,15 +50,15 @@ class InputExample(object): examples, but not for test examples. weight: (Optional) float. The weight of the example to be used during training. - int_iden: (Optional) int. The int identification number of example in the - corpus. + example_id: (Optional) int. The int identification number of example in + the corpus. """ self.guid = guid self.text_a = text_a self.text_b = text_b self.label = label self.weight = weight - self.int_iden = int_iden + self.example_id = example_id class InputFeatures(object): @@ -74,14 +71,14 @@ class InputFeatures(object): label_id, is_real_example=True, weight=None, - int_iden=None): + example_id=None): self.input_ids = input_ids self.input_mask = input_mask self.segment_ids = segment_ids self.label_id = label_id self.is_real_example = is_real_example self.weight = weight - self.int_iden = int_iden + self.example_id = example_id class DataProcessor(object): @@ -123,6 +120,63 @@ class DataProcessor(object): lines.append(line) return lines + @classmethod + def _read_jsonl(cls, input_file): + """Reads a json line file.""" + with tf.io.gfile.GFile(input_file, "r") as f: + lines = [] + for json_str in f: + lines.append(json.loads(json_str)) + return lines + + +class AxProcessor(DataProcessor): + """Processor for the AX dataset (GLUE diagnostics dataset).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_labels(self): + """See base class.""" + return ["contradiction", "entailment", "neutral"] + + @staticmethod + def get_processor_name(): + """See base class.""" + return "AX" + + def _create_examples(self, lines, set_type): + """Creates examples for the training/dev/test sets.""" + text_a_index = 1 if set_type == "test" else 8 + text_b_index = 2 if set_type == "test" else 9 + examples = [] + for i, line in enumerate(lines): + # Skip header. + if i == 0: + continue + guid = "%s-%s" % (set_type, self.process_text_fn(line[0])) + text_a = self.process_text_fn(line[text_a_index]) + text_b = self.process_text_fn(line[text_b_index]) + if set_type == "test": + label = "contradiction" + else: + label = self.process_text_fn(line[-1]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + class ColaProcessor(DataProcessor): """Processor for the CoLA data set (GLUE version).""" @@ -152,10 +206,10 @@ class ColaProcessor(DataProcessor): return "COLA" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] - for (i, line) in enumerate(lines): - # Only the test set has a header + for i, line in enumerate(lines): + # Only the test set has a header. if set_type == "test" and i == 0: continue guid = "%s-%s" % (set_type, i) @@ -170,9 +224,55 @@ class ColaProcessor(DataProcessor): return examples +class ImdbProcessor(DataProcessor): + """Processor for the IMDb dataset.""" + + def get_labels(self): + return ["neg", "pos"] + + def get_train_examples(self, data_dir): + return self._create_examples(os.path.join(data_dir, "train")) + + def get_dev_examples(self, data_dir): + return self._create_examples(os.path.join(data_dir, "test")) + + @staticmethod + def get_processor_name(): + """See base class.""" + return "IMDB" + + def _create_examples(self, data_dir): + """Creates examples.""" + examples = [] + for label in ["neg", "pos"]: + cur_dir = os.path.join(data_dir, label) + for filename in tf.io.gfile.listdir(cur_dir): + if not filename.endswith("txt"): + continue + + if len(examples) % 1000 == 0: + logging.info("Loading dev example %d", len(examples)) + + path = os.path.join(cur_dir, filename) + with tf.io.gfile.GFile(path, "r") as f: + text = f.read().strip().replace("
", " ") + examples.append( + InputExample( + guid="unused_id", text_a=text, text_b=None, label=label)) + return examples + + class MnliProcessor(DataProcessor): """Processor for the MultiNLI data set (GLUE version).""" + def __init__(self, + mnli_type="matched", + process_text_fn=tokenization.convert_to_unicode): + super(MnliProcessor, self).__init__(process_text_fn) + if mnli_type not in ("matched", "mismatched"): + raise ValueError("Invalid `mnli_type`: %s" % mnli_type) + self.mnli_type = mnli_type + def get_train_examples(self, data_dir): """See base class.""" return self._create_examples( @@ -180,14 +280,23 @@ class MnliProcessor(DataProcessor): def get_dev_examples(self, data_dir): """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")), - "dev_matched") + if self.mnli_type == "matched": + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")), + "dev_matched") + else: + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev_mismatched.tsv")), + "dev_mismatched") def get_test_examples(self, data_dir): """See base class.""" - return self._create_examples( - self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test") + if self.mnli_type == "matched": + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test") + else: + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test_mismatched.tsv")), "test") def get_labels(self): """See base class.""" @@ -199,9 +308,9 @@ class MnliProcessor(DataProcessor): return "MNLI" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, self.process_text_fn(line[0])) @@ -244,9 +353,9 @@ class MrpcProcessor(DataProcessor): return "MRPC" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, i) @@ -290,7 +399,7 @@ class PawsxProcessor(DataProcessor): self._read_tsv(os.path.join(data_dir, language, train_tsv))[1:]) examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): guid = "train-%d" % i text_a = self.process_text_fn(line[1]) text_b = self.process_text_fn(line[2]) @@ -307,7 +416,7 @@ class PawsxProcessor(DataProcessor): self._read_tsv(os.path.join(data_dir, lang, "dev_2k.tsv"))[1:]) examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): guid = "dev-%d" % i text_a = self.process_text_fn(line[1]) text_b = self.process_text_fn(line[2]) @@ -321,7 +430,7 @@ class PawsxProcessor(DataProcessor): examples_by_lang = {k: [] for k in self.supported_languages} for lang in self.supported_languages: lines = self._read_tsv(os.path.join(data_dir, lang, "test_2k.tsv"))[1:] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): guid = "test-%d" % i text_a = self.process_text_fn(line[1]) text_b = self.process_text_fn(line[2]) @@ -368,9 +477,9 @@ class QnliProcessor(DataProcessor): return "QNLI" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, 1) @@ -415,18 +524,24 @@ class QqpProcessor(DataProcessor): return "QQP" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, line[0]) - try: - text_a = line[3] - text_b = line[4] - label = line[5] - except IndexError: - continue + if set_type == "test": + text_a = line[1] + text_b = line[2] + label = "0" + else: + # There appear to be some garbage lines in the train dataset. + try: + text_a = line[3] + text_b = line[4] + label = line[5] + except IndexError: + continue examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples @@ -462,7 +577,7 @@ class RteProcessor(DataProcessor): return "RTE" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] for i, line in enumerate(lines): if i == 0: @@ -507,9 +622,9 @@ class SstProcessor(DataProcessor): return "SST-2" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, i) @@ -558,7 +673,7 @@ class StsBProcessor(DataProcessor): return "STS-B" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] for i, line in enumerate(lines): if i == 0: @@ -671,7 +786,7 @@ class TfdsProcessor(DataProcessor): return "TFDS_" + self.dataset_name def _create_examples(self, split_name, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" if split_name not in self.dataset: raise ValueError("Split {} not available.".format(split_name)) dataset = self.dataset[split_name].as_numpy_iterator() @@ -731,7 +846,7 @@ class WnliProcessor(DataProcessor): return "WNLI" def _create_examples(self, lines, set_type): - """Creates examples for the training and dev sets.""" + """Creates examples for the training/dev/test sets.""" examples = [] for i, line in enumerate(lines): if i == 0: @@ -777,7 +892,7 @@ class XnliProcessor(DataProcessor): "multinli.train.%s.tsv" % language))[1:]) examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): guid = "train-%d" % i text_a = self.process_text_fn(line[0]) text_b = self.process_text_fn(line[1]) @@ -792,7 +907,7 @@ class XnliProcessor(DataProcessor): """See base class.""" lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv")) examples = [] - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "dev-%d" % i @@ -807,7 +922,7 @@ class XnliProcessor(DataProcessor): """See base class.""" lines = self._read_tsv(os.path.join(data_dir, "xnli.test.tsv")) examples_by_lang = {k: [] for k in XnliProcessor.supported_languages} - for (i, line) in enumerate(lines): + for i, line in enumerate(lines): if i == 0: continue guid = "test-%d" % i @@ -833,45 +948,104 @@ class XtremePawsxProcessor(DataProcessor): """Processor for the XTREME PAWS-X data set.""" supported_languages = ["de", "en", "es", "fr", "ja", "ko", "zh"] + def __init__(self, + process_text_fn=tokenization.convert_to_unicode, + translated_data_dir=None, + only_use_en_dev=True): + """See base class. + + Args: + process_text_fn: See base class. + translated_data_dir: If specified, will also include translated data in + the training and testing data. + only_use_en_dev: If True, only use english dev data. Otherwise, use dev + data from all languages. + """ + super(XtremePawsxProcessor, self).__init__(process_text_fn) + self.translated_data_dir = translated_data_dir + self.only_use_en_dev = only_use_en_dev + def get_train_examples(self, data_dir): """See base class.""" - lines = self._read_tsv(os.path.join(data_dir, "train-en.tsv")) examples = [] - for (i, line) in enumerate(lines): - guid = "train-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + if self.translated_data_dir is None: + lines = self._read_tsv(os.path.join(data_dir, "train-en.tsv")) + for i, line in enumerate(lines): + guid = "train-%d" % i + text_a = self.process_text_fn(line[0]) + text_b = self.process_text_fn(line[1]) + label = self.process_text_fn(line[2]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + else: + for lang in self.supported_languages: + lines = self._read_tsv( + os.path.join(self.translated_data_dir, "translate-train", + f"en-{lang}-translated.tsv")) + for i, line in enumerate(lines): + guid = f"train-{lang}-{i}" + text_a = self.process_text_fn(line[2]) + text_b = self.process_text_fn(line[3]) + label = self.process_text_fn(line[4]) + examples.append( + InputExample( + guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples def get_dev_examples(self, data_dir): """See base class.""" - lines = self._read_tsv(os.path.join(data_dir, "dev-en.tsv")) - examples = [] - for (i, line) in enumerate(lines): - guid = "dev-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + if self.only_use_en_dev: + lines = self._read_tsv(os.path.join(data_dir, "dev-en.tsv")) + for i, line in enumerate(lines): + guid = "dev-%d" % i + text_a = self.process_text_fn(line[0]) + text_b = self.process_text_fn(line[1]) + label = self.process_text_fn(line[2]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + else: + for lang in self.supported_languages: + lines = self._read_tsv(os.path.join(data_dir, f"dev-{lang}.tsv")) + for i, line in enumerate(lines): + guid = f"dev-{lang}-{i}" + text_a = self.process_text_fn(line[0]) + text_b = self.process_text_fn(line[1]) + label = self.process_text_fn(line[2]) + examples.append( + InputExample( + guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples def get_test_examples(self, data_dir): """See base class.""" - examples_by_lang = {k: [] for k in self.supported_languages} + examples_by_lang = {} for lang in self.supported_languages: + examples_by_lang[lang] = [] lines = self._read_tsv(os.path.join(data_dir, f"test-{lang}.tsv")) - for (i, line) in enumerate(lines): - guid = "test-%d" % i + for i, line in enumerate(lines): + guid = f"test-{lang}-{i}" text_a = self.process_text_fn(line[0]) text_b = self.process_text_fn(line[1]) label = "0" examples_by_lang[lang].append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + if self.translated_data_dir is not None: + for lang in self.supported_languages: + if lang == "en": + continue + examples_by_lang[f"{lang}-en"] = [] + lines = self._read_tsv( + os.path.join(self.translated_data_dir, "translate-test", + f"test-{lang}-en-translated.tsv")) + for i, line in enumerate(lines): + guid = f"test-{lang}-en-{i}" + text_a = self.process_text_fn(line[2]) + text_b = self.process_text_fn(line[3]) + label = "0" + examples_by_lang[f"{lang}-en"].append( + InputExample( + guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples_by_lang def get_labels(self): @@ -891,45 +1065,111 @@ class XtremeXnliProcessor(DataProcessor): "ur", "vi", "zh" ] + def __init__(self, + process_text_fn=tokenization.convert_to_unicode, + translated_data_dir=None, + only_use_en_dev=True): + """See base class. + + Args: + process_text_fn: See base class. + translated_data_dir: If specified, will also include translated data in + the training data. + only_use_en_dev: If True, only use english dev data. Otherwise, use dev + data from all languages. + """ + super(XtremeXnliProcessor, self).__init__(process_text_fn) + self.translated_data_dir = translated_data_dir + self.only_use_en_dev = only_use_en_dev + def get_train_examples(self, data_dir): """See base class.""" lines = self._read_tsv(os.path.join(data_dir, "train-en.tsv")) examples = [] - for (i, line) in enumerate(lines): - guid = "train-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + if self.translated_data_dir is None: + for i, line in enumerate(lines): + guid = "train-%d" % i + text_a = self.process_text_fn(line[0]) + text_b = self.process_text_fn(line[1]) + label = self.process_text_fn(line[2]) + if label == self.process_text_fn("contradictory"): + label = self.process_text_fn("contradiction") + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + else: + for lang in self.supported_languages: + lines = self._read_tsv( + os.path.join(self.translated_data_dir, "translate-train", + f"en-{lang}-translated.tsv")) + for i, line in enumerate(lines): + guid = f"train-{lang}-{i}" + text_a = self.process_text_fn(line[2]) + text_b = self.process_text_fn(line[3]) + label = self.process_text_fn(line[4]) + if label == self.process_text_fn("contradictory"): + label = self.process_text_fn("contradiction") + examples.append( + InputExample( + guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples def get_dev_examples(self, data_dir): """See base class.""" - lines = self._read_tsv(os.path.join(data_dir, "dev-en.tsv")) examples = [] - for (i, line) in enumerate(lines): - guid = "dev-%d" % i - text_a = self.process_text_fn(line[0]) - text_b = self.process_text_fn(line[1]) - label = self.process_text_fn(line[2]) - examples.append( - InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + if self.only_use_en_dev: + lines = self._read_tsv(os.path.join(data_dir, "dev-en.tsv")) + for i, line in enumerate(lines): + guid = "dev-%d" % i + text_a = self.process_text_fn(line[0]) + text_b = self.process_text_fn(line[1]) + label = self.process_text_fn(line[2]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + else: + for lang in self.supported_languages: + lines = self._read_tsv(os.path.join(data_dir, f"dev-{lang}.tsv")) + for i, line in enumerate(lines): + guid = f"dev-{lang}-{i}" + text_a = self.process_text_fn(line[0]) + text_b = self.process_text_fn(line[1]) + label = self.process_text_fn(line[2]) + if label == self.process_text_fn("contradictory"): + label = self.process_text_fn("contradiction") + examples.append( + InputExample( + guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples def get_test_examples(self, data_dir): """See base class.""" - examples_by_lang = {k: [] for k in self.supported_languages} + examples_by_lang = {} for lang in self.supported_languages: + examples_by_lang[lang] = [] lines = self._read_tsv(os.path.join(data_dir, f"test-{lang}.tsv")) - for (i, line) in enumerate(lines): - guid = f"test-{i}" + for i, line in enumerate(lines): + guid = f"test-{lang}-{i}" text_a = self.process_text_fn(line[0]) text_b = self.process_text_fn(line[1]) label = "contradiction" examples_by_lang[lang].append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + if self.translated_data_dir is not None: + for lang in self.supported_languages: + if lang == "en": + continue + examples_by_lang[f"{lang}-en"] = [] + lines = self._read_tsv( + os.path.join(self.translated_data_dir, "translate-test", + f"test-{lang}-en-translated.tsv")) + for i, line in enumerate(lines): + guid = f"test-{lang}-en-{i}" + text_a = self.process_text_fn(line[2]) + text_b = self.process_text_fn(line[3]) + label = "contradiction" + examples_by_lang[f"{lang}-en"].append( + InputExample( + guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples_by_lang def get_labels(self): @@ -965,6 +1205,11 @@ def convert_single_example(ex_index, example, label_list, max_seq_length, if len(tokens_a) > max_seq_length - 2: tokens_a = tokens_a[0:(max_seq_length - 2)] + seg_id_a = 0 + seg_id_b = 1 + seg_id_cls = 0 + seg_id_pad = 0 + # The convention in BERT is: # (a) For sequence pairs: # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP] @@ -986,19 +1231,19 @@ def convert_single_example(ex_index, example, label_list, max_seq_length, tokens = [] segment_ids = [] tokens.append("[CLS]") - segment_ids.append(0) + segment_ids.append(seg_id_cls) for token in tokens_a: tokens.append(token) - segment_ids.append(0) + segment_ids.append(seg_id_a) tokens.append("[SEP]") - segment_ids.append(0) + segment_ids.append(seg_id_a) if tokens_b: for token in tokens_b: tokens.append(token) - segment_ids.append(1) + segment_ids.append(seg_id_b) tokens.append("[SEP]") - segment_ids.append(1) + segment_ids.append(seg_id_b) input_ids = tokenizer.convert_tokens_to_ids(tokens) @@ -1010,7 +1255,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length, while len(input_ids) < max_seq_length: input_ids.append(0) input_mask.append(0) - segment_ids.append(0) + segment_ids.append(seg_id_pad) assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length @@ -1027,7 +1272,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length, logging.info("segment_ids: %s", " ".join([str(x) for x in segment_ids])) logging.info("label: %s (id = %s)", example.label, str(label_id)) logging.info("weight: %s", example.weight) - logging.info("int_iden: %s", str(example.int_iden)) + logging.info("example_id: %s", example.example_id) feature = InputFeatures( input_ids=input_ids, @@ -1036,11 +1281,86 @@ def convert_single_example(ex_index, example, label_list, max_seq_length, label_id=label_id, is_real_example=True, weight=example.weight, - int_iden=example.int_iden) + example_id=example.example_id) return feature +class AXgProcessor(DataProcessor): + """Processor for the AXg dataset (SuperGLUE diagnostics dataset).""" + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "AX-g.jsonl")), "test") + + def get_labels(self): + """See base class.""" + return ["entailment", "not_entailment"] + + @staticmethod + def get_processor_name(): + """See base class.""" + return "AXg" + + def _create_examples(self, lines, set_type): + """Creates examples for the training/dev/test sets.""" + examples = [] + for line in lines: + guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"]))) + text_a = self.process_text_fn(line["premise"]) + text_b = self.process_text_fn(line["hypothesis"]) + label = self.process_text_fn(line["label"]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class SuperGLUERTEProcessor(DataProcessor): + """Processor for the RTE dataset (SuperGLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "train.jsonl")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "val.jsonl")), "dev") + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test") + + def get_labels(self): + """See base class.""" + # All datasets are converted to 2-class split, where for 3-class datasets we + # collapse neutral and contradiction into not_entailment. + return ["entailment", "not_entailment"] + + @staticmethod + def get_processor_name(): + """See base class.""" + return "RTESuperGLUE" + + def _create_examples(self, lines, set_type): + """Creates examples for the training/dev/test sets.""" + examples = [] + for i, line in enumerate(lines): + guid = "%s-%s" % (set_type, i) + text_a = self.process_text_fn(line["premise"]) + text_b = self.process_text_fn(line["hypothesis"]) + if set_type == "test": + label = "entailment" + else: + label = self.process_text_fn(line["label"]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + def file_based_convert_examples_to_features(examples, label_list, max_seq_length, @@ -1052,7 +1372,7 @@ def file_based_convert_examples_to_features(examples, tf.io.gfile.makedirs(os.path.dirname(output_file)) writer = tf.io.TFRecordWriter(output_file) - for (ex_index, example) in enumerate(examples): + for ex_index, example in enumerate(examples): if ex_index % 10000 == 0: logging.info("Writing example %d of %d", ex_index, len(examples)) @@ -1079,8 +1399,10 @@ def file_based_convert_examples_to_features(examples, [int(feature.is_real_example)]) if feature.weight is not None: features["weight"] = create_float_feature([feature.weight]) - if feature.int_iden is not None: - features["int_iden"] = create_int_feature([feature.int_iden]) + if feature.example_id is not None: + features["example_id"] = create_int_feature([feature.example_id]) + else: + features["example_id"] = create_int_feature([ex_index]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) @@ -1113,7 +1435,7 @@ def generate_tf_record_from_data_file(processor, max_seq_length=128): """Generates and saves training data into a tf record file. - Arguments: + Args: processor: Input processor object to be used for generating data. Subclass of `DataProcessor`. data_dir: Directory that contains train/eval/test data to process. @@ -1137,13 +1459,15 @@ def generate_tf_record_from_data_file(processor, label_type = getattr(processor, "label_type", None) is_regression = getattr(processor, "is_regression", False) has_sample_weights = getattr(processor, "weight_key", False) - assert train_data_output_path - train_input_data_examples = processor.get_train_examples(data_dir) - file_based_convert_examples_to_features(train_input_data_examples, label_list, - max_seq_length, tokenizer, - train_data_output_path, label_type) - num_training_data = len(train_input_data_examples) + num_training_data = 0 + if train_data_output_path: + train_input_data_examples = processor.get_train_examples(data_dir) + file_based_convert_examples_to_features(train_input_data_examples, + label_list, max_seq_length, + tokenizer, train_data_output_path, + label_type) + num_training_data = len(train_input_data_examples) if eval_data_output_path: eval_input_data_examples = processor.get_dev_examples(data_dir) diff --git a/official/nlp/data/create_finetuning_data.py b/official/nlp/data/create_finetuning_data.py index 3e208afc33312656d9a9892e11d9bd90624e87bf..14b2bbc04632dafbfd38fff55704817fffd47c38 100644 --- a/official/nlp/data/create_finetuning_data.py +++ b/official/nlp/data/create_finetuning_data.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,17 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""BERT finetuning task dataset generator.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""BERT finetuning task dataset generator.""" import functools import json import os +# Import libraries from absl import app from absl import flags import tensorflow as tf @@ -49,41 +46,60 @@ flags.DEFINE_string( "The input data dir. Should contain the .tsv files (or other data files) " "for the task.") -flags.DEFINE_enum("classification_task_name", "MNLI", - ["COLA", "MNLI", "MRPC", "PAWS-X", "QNLI", "QQP", "RTE", - "SST-2", "STS-B", "WNLI", "XNLI", "XTREME-XNLI", - "XTREME-PAWS-X"], - "The name of the task to train BERT classifier. The " - "difference between XTREME-XNLI and XNLI is: 1. the format " - "of input tsv files; 2. the dev set for XTREME is english " - "only and for XNLI is all languages combined. Same for " - "PAWS-X.") - -# XNLI task specific flag. +flags.DEFINE_enum( + "classification_task_name", "MNLI", [ + "AX", "COLA", "IMDB", "MNLI", "MRPC", "PAWS-X", "QNLI", "QQP", "RTE", + "SST-2", "STS-B", "WNLI", "XNLI", "XTREME-XNLI", "XTREME-PAWS-X", + "AX-g", "SUPERGLUE-RTE" + ], "The name of the task to train BERT classifier. The " + "difference between XTREME-XNLI and XNLI is: 1. the format " + "of input tsv files; 2. the dev set for XTREME is english " + "only and for XNLI is all languages combined. Same for " + "PAWS-X.") + +# MNLI task-specific flag. +flags.DEFINE_enum("mnli_type", "matched", ["matched", "mismatched"], + "The type of MNLI dataset.") + +# XNLI task-specific flag. flags.DEFINE_string( "xnli_language", "en", - "Language of training data for XNIL task. If the value is 'all', the data " + "Language of training data for XNLI task. If the value is 'all', the data " "of all languages will be used for training.") -# PAWS-X task specific flag. +# PAWS-X task-specific flag. flags.DEFINE_string( "pawsx_language", "en", - "Language of trainig data for PAWS-X task. If the value is 'all', the data " + "Language of training data for PAWS-X task. If the value is 'all', the data " "of all languages will be used for training.") -# Retrieva task specific flags +# XTREME classification specific flags. Only used in XtremePawsx and XtremeXnli. +flags.DEFINE_string( + "translated_input_data_dir", None, + "The translated input data dir. Should contain the .tsv files (or other " + "data files) for the task.") + +# Retrieval task-specific flags. flags.DEFINE_enum("retrieval_task_name", "bucc", ["bucc", "tatoeba"], "The name of sentence retrieval task for scoring") -# Tagging task specific flags +# Tagging task-specific flags. flags.DEFINE_enum("tagging_task_name", "panx", ["panx", "udpos"], "The name of BERT tagging (token classification) task.") -# BERT Squad task specific flags. +flags.DEFINE_bool("tagging_only_use_en_train", True, + "Whether only use english training data in tagging.") + +# BERT Squad task-specific flags. flags.DEFINE_string( "squad_data_file", None, "The input data file in for generating training data for BERT squad task.") +flags.DEFINE_string( + "translated_squad_data_folder", None, + "The translated data folder for generating training data for BERT squad " + "task.") + flags.DEFINE_integer( "doc_stride", 128, "When splitting up a long document into chunks, how much stride to " @@ -98,6 +114,14 @@ flags.DEFINE_bool( "version_2_with_negative", False, "If true, the SQuAD examples contain some that do not have an answer.") +flags.DEFINE_bool( + "xlnet_format", False, + "If true, then data will be preprocessed in a paragraph, query, class order" + " instead of the BERT-style class, paragraph, query order.") + +# XTREME specific flags. +flags.DEFINE_bool("only_use_en_dev", True, "Whether only use english dev data.") + # Shared flags across BERT fine-tuning tasks. flags.DEFINE_string("vocab_file", None, "The vocabulary file that the BERT model was trained on.") @@ -136,36 +160,35 @@ flags.DEFINE_string("sp_model_file", "", "The path to the model used by sentence piece tokenizer.") flags.DEFINE_enum( - "tokenizer_impl", "word_piece", ["word_piece", "sentence_piece"], - "Specifies the tokenizer implementation, i.e., whehter to use word_piece " - "or sentence_piece tokenizer. Canonical BERT uses word_piece tokenizer, " - "while ALBERT uses sentence_piece tokenizer.") + "tokenization", "WordPiece", ["WordPiece", "SentencePiece"], + "Specifies the tokenizer implementation, i.e., whether to use WordPiece " + "or SentencePiece tokenizer. Canonical BERT uses WordPiece tokenizer, " + "while ALBERT uses SentencePiece tokenizer.") -flags.DEFINE_string("tfds_params", "", - "Comma-separated list of TFDS parameter assigments for " - "generic classfication data import (for more details " - "see the TfdsProcessor class documentation).") +flags.DEFINE_string( + "tfds_params", "", "Comma-separated list of TFDS parameter assigments for " + "generic classfication data import (for more details " + "see the TfdsProcessor class documentation).") def generate_classifier_dataset(): """Generates classifier dataset and returns input meta data.""" - assert (FLAGS.input_data_dir and FLAGS.classification_task_name - or FLAGS.tfds_params) + assert (FLAGS.input_data_dir and FLAGS.classification_task_name or + FLAGS.tfds_params) - if FLAGS.tokenizer_impl == "word_piece": + if FLAGS.tokenization == "WordPiece": tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) processor_text_fn = tokenization.convert_to_unicode else: - assert FLAGS.tokenizer_impl == "sentence_piece" + assert FLAGS.tokenization == "SentencePiece" tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) processor_text_fn = functools.partial( tokenization.preprocess_text, lower=FLAGS.do_lower_case) if FLAGS.tfds_params: processor = classifier_data_lib.TfdsProcessor( - tfds_params=FLAGS.tfds_params, - process_text_fn=processor_text_fn) + tfds_params=FLAGS.tfds_params, process_text_fn=processor_text_fn) return classifier_data_lib.generate_tf_record_from_data_file( processor, None, @@ -176,31 +199,51 @@ def generate_classifier_dataset(): max_seq_length=FLAGS.max_seq_length) else: processors = { + "ax": + classifier_data_lib.AxProcessor, "cola": classifier_data_lib.ColaProcessor, + "imdb": + classifier_data_lib.ImdbProcessor, "mnli": - classifier_data_lib.MnliProcessor, + functools.partial( + classifier_data_lib.MnliProcessor, mnli_type=FLAGS.mnli_type), "mrpc": classifier_data_lib.MrpcProcessor, "qnli": classifier_data_lib.QnliProcessor, - "qqp": classifier_data_lib.QqpProcessor, - "rte": classifier_data_lib.RteProcessor, + "qqp": + classifier_data_lib.QqpProcessor, + "rte": + classifier_data_lib.RteProcessor, "sst-2": classifier_data_lib.SstProcessor, "sts-b": classifier_data_lib.StsBProcessor, "xnli": - functools.partial(classifier_data_lib.XnliProcessor, - language=FLAGS.xnli_language), + functools.partial( + classifier_data_lib.XnliProcessor, + language=FLAGS.xnli_language), "paws-x": - functools.partial(classifier_data_lib.PawsxProcessor, - language=FLAGS.pawsx_language), - "wnli": classifier_data_lib.WnliProcessor, + functools.partial( + classifier_data_lib.PawsxProcessor, + language=FLAGS.pawsx_language), + "wnli": + classifier_data_lib.WnliProcessor, "xtreme-xnli": - functools.partial(classifier_data_lib.XtremeXnliProcessor), + functools.partial( + classifier_data_lib.XtremeXnliProcessor, + translated_data_dir=FLAGS.translated_input_data_dir, + only_use_en_dev=FLAGS.only_use_en_dev), "xtreme-paws-x": - functools.partial(classifier_data_lib.XtremePawsxProcessor) + functools.partial( + classifier_data_lib.XtremePawsxProcessor, + translated_data_dir=FLAGS.translated_input_data_dir, + only_use_en_dev=FLAGS.only_use_en_dev), + "ax-g": + classifier_data_lib.AXgProcessor, + "superglue-rte": + classifier_data_lib.SuperGLUERTEProcessor } task_name = FLAGS.classification_task_name.lower() if task_name not in processors: @@ -219,20 +262,19 @@ def generate_classifier_dataset(): def generate_regression_dataset(): """Generates regression dataset and returns input meta data.""" - if FLAGS.tokenizer_impl == "word_piece": + if FLAGS.tokenization == "WordPiece": tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) processor_text_fn = tokenization.convert_to_unicode else: - assert FLAGS.tokenizer_impl == "sentence_piece" + assert FLAGS.tokenization == "SentencePiece" tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) processor_text_fn = functools.partial( tokenization.preprocess_text, lower=FLAGS.do_lower_case) if FLAGS.tfds_params: processor = classifier_data_lib.TfdsProcessor( - tfds_params=FLAGS.tfds_params, - process_text_fn=processor_text_fn) + tfds_params=FLAGS.tfds_params, process_text_fn=processor_text_fn) return classifier_data_lib.generate_tf_record_from_data_file( processor, None, @@ -248,28 +290,42 @@ def generate_regression_dataset(): def generate_squad_dataset(): """Generates squad training dataset and returns input meta data.""" assert FLAGS.squad_data_file - if FLAGS.tokenizer_impl == "word_piece": + if FLAGS.tokenization == "WordPiece": return squad_lib_wp.generate_tf_record_from_json_file( - FLAGS.squad_data_file, FLAGS.vocab_file, FLAGS.train_data_output_path, - FLAGS.max_seq_length, FLAGS.do_lower_case, FLAGS.max_query_length, - FLAGS.doc_stride, FLAGS.version_2_with_negative) + input_file_path=FLAGS.squad_data_file, + vocab_file_path=FLAGS.vocab_file, + output_path=FLAGS.train_data_output_path, + translated_input_folder=FLAGS.translated_squad_data_folder, + max_seq_length=FLAGS.max_seq_length, + do_lower_case=FLAGS.do_lower_case, + max_query_length=FLAGS.max_query_length, + doc_stride=FLAGS.doc_stride, + version_2_with_negative=FLAGS.version_2_with_negative, + xlnet_format=FLAGS.xlnet_format) else: - assert FLAGS.tokenizer_impl == "sentence_piece" + assert FLAGS.tokenization == "SentencePiece" return squad_lib_sp.generate_tf_record_from_json_file( - FLAGS.squad_data_file, FLAGS.sp_model_file, - FLAGS.train_data_output_path, FLAGS.max_seq_length, FLAGS.do_lower_case, - FLAGS.max_query_length, FLAGS.doc_stride, FLAGS.version_2_with_negative) + input_file_path=FLAGS.squad_data_file, + sp_model_file=FLAGS.sp_model_file, + output_path=FLAGS.train_data_output_path, + translated_input_folder=FLAGS.translated_squad_data_folder, + max_seq_length=FLAGS.max_seq_length, + do_lower_case=FLAGS.do_lower_case, + max_query_length=FLAGS.max_query_length, + doc_stride=FLAGS.doc_stride, + xlnet_format=FLAGS.xlnet_format, + version_2_with_negative=FLAGS.version_2_with_negative) def generate_retrieval_dataset(): """Generate retrieval test and dev dataset and returns input meta data.""" assert (FLAGS.input_data_dir and FLAGS.retrieval_task_name) - if FLAGS.tokenizer_impl == "word_piece": + if FLAGS.tokenization == "WordPiece": tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) processor_text_fn = tokenization.convert_to_unicode else: - assert FLAGS.tokenizer_impl == "sentence_piece" + assert FLAGS.tokenization == "SentencePiece" tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) processor_text_fn = functools.partial( tokenization.preprocess_text, lower=FLAGS.do_lower_case) @@ -286,34 +342,38 @@ def generate_retrieval_dataset(): processor = processors[task_name](process_text_fn=processor_text_fn) return sentence_retrieval_lib.generate_sentence_retrevial_tf_record( - processor, - FLAGS.input_data_dir, - tokenizer, - FLAGS.eval_data_output_path, - FLAGS.test_data_output_path, - FLAGS.max_seq_length) + processor, FLAGS.input_data_dir, tokenizer, FLAGS.eval_data_output_path, + FLAGS.test_data_output_path, FLAGS.max_seq_length) def generate_tagging_dataset(): """Generates tagging dataset.""" processors = { - "panx": tagging_data_lib.PanxProcessor, - "udpos": tagging_data_lib.UdposProcessor, + "panx": + functools.partial( + tagging_data_lib.PanxProcessor, + only_use_en_train=FLAGS.tagging_only_use_en_train, + only_use_en_dev=FLAGS.only_use_en_dev), + "udpos": + functools.partial( + tagging_data_lib.UdposProcessor, + only_use_en_train=FLAGS.tagging_only_use_en_train, + only_use_en_dev=FLAGS.only_use_en_dev), } task_name = FLAGS.tagging_task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % task_name) - if FLAGS.tokenizer_impl == "word_piece": + if FLAGS.tokenization == "WordPiece": tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) processor_text_fn = tokenization.convert_to_unicode - elif FLAGS.tokenizer_impl == "sentence_piece": + elif FLAGS.tokenization == "SentencePiece": tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) processor_text_fn = functools.partial( tokenization.preprocess_text, lower=FLAGS.do_lower_case) else: - raise ValueError("Unsupported tokenizer_impl: %s" % FLAGS.tokenizer_impl) + raise ValueError("Unsupported tokenization: %s" % FLAGS.tokenization) processor = processors[task_name]() return tagging_data_lib.generate_tf_record_from_data_file( @@ -323,12 +383,12 @@ def generate_tagging_dataset(): def main(_): - if FLAGS.tokenizer_impl == "word_piece": + if FLAGS.tokenization == "WordPiece": if not FLAGS.vocab_file: raise ValueError( "FLAG vocab_file for word-piece tokenizer is not specified.") else: - assert FLAGS.tokenizer_impl == "sentence_piece" + assert FLAGS.tokenization == "SentencePiece" if not FLAGS.sp_model_file: raise ValueError( "FLAG sp_model_file for sentence-piece tokenizer is not specified.") diff --git a/official/nlp/data/create_pretraining_data.py b/official/nlp/data/create_pretraining_data.py index 79dac57ac8775687673604af6fb2fb50c9f74244..93b7723d125a6e4916a8a595ef4c5a4b470bdcc9 100644 --- a/official/nlp/data/create_pretraining_data.py +++ b/official/nlp/data/create_pretraining_data.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Create masked LM/next sentence masked_lm TF examples for BERT.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import collections +import itertools import random +# Import libraries from absl import app from absl import flags from absl import logging @@ -48,10 +47,20 @@ flags.DEFINE_bool( "do_whole_word_mask", False, "Whether to use whole word masking rather than per-WordPiece masking.") +flags.DEFINE_integer( + "max_ngram_size", None, + "Mask contiguous whole words (n-grams) of up to `max_ngram_size` using a " + "weighting scheme to favor shorter n-grams. " + "Note: `--do_whole_word_mask=True` must also be set when n-gram masking.") + flags.DEFINE_bool( "gzip_compress", False, "Whether to use `GZIP` compress option to get compressed TFRecord files.") +flags.DEFINE_bool( + "use_v2_feature_names", False, + "Whether to use the feature names consistent with the models.") + flags.DEFINE_integer("max_seq_length", 128, "Maximum sequence length.") flags.DEFINE_integer("max_predictions_per_seq", 20, @@ -101,8 +110,8 @@ class TrainingInstance(object): def write_instance_to_example_files(instances, tokenizer, max_seq_length, max_predictions_per_seq, output_files, - gzip_compress): - """Create TF example files from `TrainingInstance`s.""" + gzip_compress, use_v2_feature_names): + """Creates TF example files from `TrainingInstance`s.""" writers = [] for output_file in output_files: writers.append( @@ -139,9 +148,14 @@ def write_instance_to_example_files(instances, tokenizer, max_seq_length, next_sentence_label = 1 if instance.is_random_next else 0 features = collections.OrderedDict() - features["input_ids"] = create_int_feature(input_ids) + if use_v2_feature_names: + features["input_word_ids"] = create_int_feature(input_ids) + features["input_type_ids"] = create_int_feature(segment_ids) + else: + features["input_ids"] = create_int_feature(input_ids) + features["segment_ids"] = create_int_feature(segment_ids) + features["input_mask"] = create_int_feature(input_mask) - features["segment_ids"] = create_int_feature(segment_ids) features["masked_lm_positions"] = create_int_feature(masked_lm_positions) features["masked_lm_ids"] = create_int_feature(masked_lm_ids) features["masked_lm_weights"] = create_float_feature(masked_lm_weights) @@ -192,7 +206,8 @@ def create_training_instances(input_files, masked_lm_prob, max_predictions_per_seq, rng, - do_whole_word_mask=False): + do_whole_word_mask=False, + max_ngram_size=None): """Create `TrainingInstance`s from raw text.""" all_documents = [[]] @@ -229,7 +244,7 @@ def create_training_instances(input_files, create_instances_from_document( all_documents, document_index, max_seq_length, short_seq_prob, masked_lm_prob, max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask)) + do_whole_word_mask, max_ngram_size)) rng.shuffle(instances) return instances @@ -238,7 +253,8 @@ def create_training_instances(input_files, def create_instances_from_document( all_documents, document_index, max_seq_length, short_seq_prob, masked_lm_prob, max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask=False): + do_whole_word_mask=False, + max_ngram_size=None): """Creates `TrainingInstance`s for a single document.""" document = all_documents[document_index] @@ -337,7 +353,7 @@ def create_instances_from_document( (tokens, masked_lm_positions, masked_lm_labels) = create_masked_lm_predictions( tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask) + do_whole_word_mask, max_ngram_size) instance = TrainingInstance( tokens=tokens, segment_ids=segment_ids, @@ -355,72 +371,238 @@ def create_instances_from_document( MaskedLmInstance = collections.namedtuple("MaskedLmInstance", ["index", "label"]) +# A _Gram is a [half-open) interval of token indices which form a word. +# E.g., +# words: ["The", "doghouse"] +# tokens: ["The", "dog", "##house"] +# grams: [(0,1), (1,3)] +_Gram = collections.namedtuple("_Gram", ["begin", "end"]) + + +def _window(iterable, size): + """Helper to create a sliding window iterator with a given size. + + E.g., + input = [1, 2, 3, 4] + _window(input, 1) => [1], [2], [3], [4] + _window(input, 2) => [1, 2], [2, 3], [3, 4] + _window(input, 3) => [1, 2, 3], [2, 3, 4] + _window(input, 4) => [1, 2, 3, 4] + _window(input, 5) => None + + Args: + iterable: elements to iterate over. + size: size of the window. + + Yields: + Elements of `iterable` batched into a sliding window of length `size`. + """ + i = iter(iterable) + window = [] + try: + for e in range(0, size): + window.append(next(i)) + yield window + except StopIteration: + # handle the case where iterable's length is less than the window size. + return + for e in i: + window = window[1:] + [e] + yield window + + +def _contiguous(sorted_grams): + """Test whether a sequence of grams is contiguous. + + Args: + sorted_grams: _Grams which are sorted in increasing order. + Returns: + True if `sorted_grams` are touching each other. + + E.g., + _contiguous([(1, 4), (4, 5), (5, 10)]) == True + _contiguous([(1, 2), (4, 5)]) == False + """ + for a, b in _window(sorted_grams, 2): + if a.end != b.begin: + return False + return True + + +def _masking_ngrams(grams, max_ngram_size, max_masked_tokens, rng): + """Create a list of masking {1, ..., n}-grams from a list of one-grams. + + This is an extention of 'whole word masking' to mask multiple, contiguous + words such as (e.g., "the red boat"). + + Each input gram represents the token indices of a single word, + words: ["the", "red", "boat"] + tokens: ["the", "red", "boa", "##t"] + grams: [(0,1), (1,2), (2,4)] + + For a `max_ngram_size` of three, possible outputs masks include: + 1-grams: (0,1), (1,2), (2,4) + 2-grams: (0,2), (1,4) + 3-grams; (0,4) + + Output masks will not overlap and contain less than `max_masked_tokens` total + tokens. E.g., for the example above with `max_masked_tokens` as three, + valid outputs are, + [(0,1), (1,2)] # "the", "red" covering two tokens + [(1,2), (2,4)] # "red", "boa", "##t" covering three tokens + + The length of the selected n-gram follows a zipf weighting to + favor shorter n-gram sizes (weight(1)=1, weight(2)=1/2, weight(3)=1/3, ...). + + Args: + grams: List of one-grams. + max_ngram_size: Maximum number of contiguous one-grams combined to create + an n-gram. + max_masked_tokens: Maximum total number of tokens to be masked. + rng: `random.Random` generator. + + Returns: + A list of n-grams to be used as masks. + """ + if not grams: + return None + + grams = sorted(grams) + num_tokens = grams[-1].end + + # Ensure our grams are valid (i.e., they don't overlap). + for a, b in _window(grams, 2): + if a.end > b.begin: + raise ValueError("overlapping grams: {}".format(grams)) + + # Build map from n-gram length to list of n-grams. + ngrams = {i: [] for i in range(1, max_ngram_size+1)} + for gram_size in range(1, max_ngram_size+1): + for g in _window(grams, gram_size): + if _contiguous(g): + # Add an n-gram which spans these one-grams. + ngrams[gram_size].append(_Gram(g[0].begin, g[-1].end)) + + # Shuffle each list of n-grams. + for v in ngrams.values(): + rng.shuffle(v) + + # Create the weighting for n-gram length selection. + # Stored cummulatively for `random.choices` below. + cummulative_weights = list( + itertools.accumulate([1./n for n in range(1, max_ngram_size+1)])) + + output_ngrams = [] + # Keep a bitmask of which tokens have been masked. + masked_tokens = [False] * num_tokens + # Loop until we have enough masked tokens or there are no more candidate + # n-grams of any length. + # Each code path should ensure one or more elements from `ngrams` are removed + # to guarentee this loop terminates. + while (sum(masked_tokens) < max_masked_tokens and + sum(len(s) for s in ngrams.values())): + # Pick an n-gram size based on our weights. + sz = random.choices(range(1, max_ngram_size+1), + cum_weights=cummulative_weights)[0] + + # Ensure this size doesn't result in too many masked tokens. + # E.g., a two-gram contains _at least_ two tokens. + if sum(masked_tokens) + sz > max_masked_tokens: + # All n-grams of this length are too long and can be removed from + # consideration. + ngrams[sz].clear() + continue -def create_masked_lm_predictions(tokens, masked_lm_prob, - max_predictions_per_seq, vocab_words, rng, - do_whole_word_mask): - """Creates the predictions for the masked LM objective.""" + # All of the n-grams of this size have been used. + if not ngrams[sz]: + continue + + # Choose a random n-gram of the given size. + gram = ngrams[sz].pop() + num_gram_tokens = gram.end-gram.begin + + # Check if this would add too many tokens. + if num_gram_tokens + sum(masked_tokens) > max_masked_tokens: + continue - cand_indexes = [] - for (i, token) in enumerate(tokens): - if token == "[CLS]" or token == "[SEP]": + # Check if any of the tokens in this gram have already been masked. + if sum(masked_tokens[gram.begin:gram.end]): continue - # Whole Word Masking means that if we mask all of the wordpieces - # corresponding to an original word. When a word has been split into - # WordPieces, the first token does not have any marker and any subsequence - # tokens are prefixed with ##. So whenever we see the ## token, we - # append it to the previous set of word indexes. - # - # Note that Whole Word Masking does *not* change the training code - # at all -- we still predict each WordPiece independently, softmaxed - # over the entire vocabulary. - if (do_whole_word_mask and len(cand_indexes) >= 1 and - token.startswith("##")): - cand_indexes[-1].append(i) + + # Found a usable n-gram! Mark its tokens as masked and add it to return. + masked_tokens[gram.begin:gram.end] = [True] * (gram.end-gram.begin) + output_ngrams.append(gram) + return output_ngrams + + +def _wordpieces_to_grams(tokens): + """Reconstitue grams (words) from `tokens`. + + E.g., + tokens: ['[CLS]', 'That', 'lit', '##tle', 'blue', 'tru', '##ck', '[SEP]'] + grams: [ [1,2), [2, 4), [4,5) , [5, 6)] + + Args: + tokens: list of wordpieces + Returns: + List of _Grams representing spans of whole words + (without "[CLS]" and "[SEP]"). + """ + grams = [] + gram_start_pos = None + for i, token in enumerate(tokens): + if gram_start_pos is not None and token.startswith("##"): + continue + if gram_start_pos is not None: + grams.append(_Gram(gram_start_pos, i)) + if token not in ["[CLS]", "[SEP]"]: + gram_start_pos = i else: - cand_indexes.append([i]) + gram_start_pos = None + if gram_start_pos is not None: + grams.append(_Gram(gram_start_pos, len(tokens))) + return grams - rng.shuffle(cand_indexes) - output_tokens = list(tokens) +def create_masked_lm_predictions(tokens, masked_lm_prob, + max_predictions_per_seq, vocab_words, rng, + do_whole_word_mask, + max_ngram_size=None): + """Creates the predictions for the masked LM objective.""" + if do_whole_word_mask: + grams = _wordpieces_to_grams(tokens) + else: + # Here we consider each token to be a word to allow for sub-word masking. + if max_ngram_size: + raise ValueError("cannot use ngram masking without whole word masking") + grams = [_Gram(i, i+1) for i in range(0, len(tokens)) + if tokens[i] not in ["[CLS]", "[SEP]"]] num_to_predict = min(max_predictions_per_seq, max(1, int(round(len(tokens) * masked_lm_prob)))) - + # Generate masks. If `max_ngram_size` in [0, None] it means we're doing + # whole word masking or token level masking. Both of these can be treated + # as the `max_ngram_size=1` case. + masked_grams = _masking_ngrams(grams, max_ngram_size or 1, + num_to_predict, rng) masked_lms = [] - covered_indexes = set() - for index_set in cand_indexes: - if len(masked_lms) >= num_to_predict: - break - # If adding a whole-word mask would exceed the maximum number of - # predictions, then just skip this candidate. - if len(masked_lms) + len(index_set) > num_to_predict: - continue - is_any_index_covered = False - for index in index_set: - if index in covered_indexes: - is_any_index_covered = True - break - if is_any_index_covered: - continue - for index in index_set: - covered_indexes.add(index) - - masked_token = None - # 80% of the time, replace with [MASK] - if rng.random() < 0.8: - masked_token = "[MASK]" + output_tokens = list(tokens) + for gram in masked_grams: + # 80% of the time, replace all n-gram tokens with [MASK] + if rng.random() < 0.8: + replacement_action = lambda idx: "[MASK]" + else: + # 10% of the time, keep all the original n-gram tokens. + if rng.random() < 0.5: + replacement_action = lambda idx: tokens[idx] + # 10% of the time, replace each n-gram token with a random word. else: - # 10% of the time, keep original - if rng.random() < 0.5: - masked_token = tokens[index] - # 10% of the time, replace with random word - else: - masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)] + replacement_action = lambda idx: rng.choice(vocab_words) - output_tokens[index] = masked_token + for idx in range(gram.begin, gram.end): + output_tokens[idx] = replacement_action(idx) + masked_lms.append(MaskedLmInstance(index=idx, label=tokens[idx])) - masked_lms.append(MaskedLmInstance(index=index, label=tokens[index])) assert len(masked_lms) <= num_to_predict masked_lms = sorted(masked_lms, key=lambda x: x.index) @@ -467,7 +649,7 @@ def main(_): instances = create_training_instances( input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor, FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq, - rng, FLAGS.do_whole_word_mask) + rng, FLAGS.do_whole_word_mask, FLAGS.max_ngram_size) output_files = FLAGS.output_file.split(",") logging.info("*** Writing to output files ***") @@ -476,7 +658,8 @@ def main(_): write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length, FLAGS.max_predictions_per_seq, output_files, - FLAGS.gzip_compress) + FLAGS.gzip_compress, + FLAGS.use_v2_feature_names) if __name__ == "__main__": diff --git a/official/nlp/data/create_pretraining_data_test.py b/official/nlp/data/create_pretraining_data_test.py new file mode 100644 index 0000000000000000000000000000000000000000..79a38ba8506ac428d48188f0eb4fbf2ce26b4422 --- /dev/null +++ b/official/nlp/data/create_pretraining_data_test.py @@ -0,0 +1,128 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.create_pretraining_data.""" +import random + +import tensorflow as tf + +from official.nlp.data import create_pretraining_data as cpd + +_VOCAB_WORDS = ["vocab_1", "vocab_2"] + + +class CreatePretrainingDataTest(tf.test.TestCase): + + def assertTokens(self, input_tokens, output_tokens, masked_positions, + masked_labels): + # Ensure the masked positions are unique. + self.assertCountEqual(masked_positions, set(masked_positions)) + + # Ensure we can reconstruct the input from the output. + reconstructed_tokens = output_tokens + for pos, label in zip(masked_positions, masked_labels): + reconstructed_tokens[pos] = label + self.assertEqual(input_tokens, reconstructed_tokens) + + # Ensure each label is valid. + for pos, label in zip(masked_positions, masked_labels): + output_token = output_tokens[pos] + if (output_token == "[MASK]" or output_token in _VOCAB_WORDS or + output_token == input_tokens[pos]): + continue + self.fail("invalid mask value: {}".format(output_token)) + + def test_wordpieces_to_grams(self): + tests = [ + (["That", "cone"], [(0, 1), (1, 2)]), + (["That", "cone", "##s"], [(0, 1), (1, 3)]), + (["Swit", "##zer", "##land"], [(0, 3)]), + (["[CLS]", "Up", "##dog"], [(1, 3)]), + (["[CLS]", "Up", "##dog", "[SEP]", "Down"], [(1, 3), (4, 5)]), + ] + for inp, expected in tests: + output = cpd._wordpieces_to_grams(inp) + self.assertEqual(expected, output) + + def test_window(self): + input_list = [1, 2, 3, 4] + window_outputs = [ + (1, [[1], [2], [3], [4]]), + (2, [[1, 2], [2, 3], [3, 4]]), + (3, [[1, 2, 3], [2, 3, 4]]), + (4, [[1, 2, 3, 4]]), + (5, []), + ] + for window, expected in window_outputs: + output = cpd._window(input_list, window) + self.assertEqual(expected, list(output)) + + def test_create_masked_lm_predictions(self): + tokens = ["[CLS]", "a", "##a", "b", "##b", "c", "##c", "[SEP]"] + rng = random.Random(123) + for _ in range(0, 5): + output_tokens, masked_positions, masked_labels = ( + cpd.create_masked_lm_predictions( + tokens=tokens, + masked_lm_prob=1.0, + max_predictions_per_seq=3, + vocab_words=_VOCAB_WORDS, + rng=rng, + do_whole_word_mask=False, + max_ngram_size=None)) + self.assertEqual(len(masked_positions), 3) + self.assertEqual(len(masked_labels), 3) + self.assertTokens(tokens, output_tokens, masked_positions, masked_labels) + + def test_create_masked_lm_predictions_whole_word(self): + tokens = ["[CLS]", "a", "##a", "b", "##b", "c", "##c", "[SEP]"] + rng = random.Random(345) + for _ in range(0, 5): + output_tokens, masked_positions, masked_labels = ( + cpd.create_masked_lm_predictions( + tokens=tokens, + masked_lm_prob=1.0, + max_predictions_per_seq=3, + vocab_words=_VOCAB_WORDS, + rng=rng, + do_whole_word_mask=True, + max_ngram_size=None)) + # since we can't get exactly three tokens without breaking a word we + # only take two. + self.assertEqual(len(masked_positions), 2) + self.assertEqual(len(masked_labels), 2) + self.assertTokens(tokens, output_tokens, masked_positions, masked_labels) + # ensure that we took an entire word. + self.assertIn(masked_labels, [["a", "##a"], ["b", "##b"], ["c", "##c"]]) + + def test_create_masked_lm_predictions_ngram(self): + tokens = ["[CLS]"] + ["tok{}".format(i) for i in range(0, 512)] + ["[SEP]"] + rng = random.Random(345) + for _ in range(0, 5): + output_tokens, masked_positions, masked_labels = ( + cpd.create_masked_lm_predictions( + tokens=tokens, + masked_lm_prob=1.0, + max_predictions_per_seq=76, + vocab_words=_VOCAB_WORDS, + rng=rng, + do_whole_word_mask=True, + max_ngram_size=3)) + self.assertEqual(len(masked_positions), 76) + self.assertEqual(len(masked_labels), 76) + self.assertTokens(tokens, output_tokens, masked_positions, masked_labels) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/data/create_xlnet_pretraining_data.py b/official/nlp/data/create_xlnet_pretraining_data.py new file mode 100644 index 0000000000000000000000000000000000000000..363164fcae001a61da53b0bb6e0afb9f4e92fd42 --- /dev/null +++ b/official/nlp/data/create_xlnet_pretraining_data.py @@ -0,0 +1,721 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Create LM TF examples for XLNet.""" + +import json +import math +import os + +import random +from typing import Iterable, Mapping, List, Optional, Tuple +import unicodedata + +# Import libraries + +from absl import app +from absl import flags +from absl import logging + +import dataclasses +import numpy as np +import tensorflow as tf + +from official.nlp.bert import tokenization + +special_symbols = { + "": 0, + "": 1, + "": 2, + "": 3, + "": 4, + "": 5, + "": 6, + "": 7, + "": 8, +} + +FLAGS = flags.FLAGS + +flags.DEFINE_integer("seq_length", 512, + help="Sequence length.") +flags.DEFINE_integer("reuse_length", 256, + help="Number of token that can be reused as memory. " + "Could be half of `seq_len`.") +flags.DEFINE_string("input_file", None, + "Input raw text file (or comma-separated list of files).") +flags.DEFINE_string( + "save_dir", None, + "Directory for saving processed data.") +flags.DEFINE_string("sp_model_file", "", + "The path to the model used by sentence piece tokenizer.") +flags.DEFINE_bool("use_eod_token", True, + "Whether or not to include EOD tokens.") +flags.DEFINE_bool("bi_data", True, "Whether or not to use bi-directional data.") +flags.DEFINE_bool( + "do_lower_case", True, + "Whether to lower case the input text. Should be True for uncased " + "models and False for cased models.") +flags.DEFINE_integer("per_host_batch_size", 32, "Batch size per host.") +flags.DEFINE_integer("num_cores_per_host", 16, + "The number of (TPU) cores per host.") +flags.DEFINE_string("prefix", "", "Filename prefix.") +flags.DEFINE_string("suffix", "", "Filename suffix.") + +flags.DEFINE_integer("task_id", None, + "The id of the current task.") +flags.DEFINE_integer("num_tasks", None, + "The total number of tasks.") +flags.DEFINE_integer("num_passes", 1, "The number of times to run the script.") + + +@dataclasses.dataclass +class TrainingInstance: + """Representation of a single XLNet Pretraining instance.""" + data: Iterable[int] + segment_ids: Iterable[int] + boundary_indices: Iterable[int] + label: int + + def to_feature(self) -> Mapping[str, tf.train.Feature]: + feat = lambda x: tf.train.Feature(int64_list=tf.train.Int64List(value=x)) + return dict( + input_word_ids=feat(self.data), + input_type_ids=feat(self.segment_ids), + boundary_indices=feat(self.boundary_indices), + label=feat([self.label])) + + def to_example(self) -> tf.train.Example: + return tf.train.Example( + features=tf.train.Features(feature=self.to_feature())) + + def __str__(self): + def seq_to_str(seq): + return " ".join([str(x) for x in seq]) + + s = "" + s += "tokens: %s\n" % seq_to_str(self.data) + s += "segment_ids: %s\n" % seq_to_str(self.segment_ids) + s += "boundary_indices: %s\n" % seq_to_str(self.boundary_indices) + s += "label: %s\n" % self.label + s += "\n" + return s + + def __repr__(self): + return self.__str__() + + +def _preprocess_line(line: str, do_lower_case: bool = False) -> str: + """Preprocesses an individual raw text line. + + This function will: + - Remove extraneous spaces. + - Replace `` with ", and '' with ". + - Replaces accents. + - Applies lower casing. + + Args: + line: The input line to preprocess. + do_lower_case: Whether or not to lower case the text. + + Returns: + The preprocessed line. + + """ + line = " ".join(line.split()) + line = line.replace("``", "\"").replace("''", "\"") + + # Replace accents. + line = unicodedata.normalize("NFKD", line) + line = "".join([c for c in line if not unicodedata.combining(c)]) + + if do_lower_case: + line = line.lower() + return line + + +def preprocess_and_tokenize_input_files( + input_files: Iterable[str], + tokenizer: tokenization.FullSentencePieceTokenizer, + use_eod: bool = True, + do_lower_case: bool = False, + log_example_freq: int = 100000) -> List[Tuple[np.array, np.array]]: + """Preprocesses and encodes raw text from input files. + + This function preprocesses raw text and encodes them into tokens using a + `SentencePieceModel` tokenization method. This also provides the sentence + indicator for each token. + + Args: + input_files: The list of input file names. + tokenizer: The SentencePiece tokenizer that has the attribute `sp_model`. + use_eod: Whether or not to use an EOD indicator. If `False`, then EOD is + not included. + do_lower_case: Whether or not to apply lower casing during raw text + preprocessing. + log_example_freq: The optional field for how many lines to process before + emitting an info log. + + Returns: + The preprocessed list. Each entry in the list is a tuple consisting of + the token IDs and the sentence IDs. + + """ + all_data = [] + eod_symbol = special_symbols[""] + + total_number_of_lines = 0 + + # Input file format: + # (1) One sentence per line. These should ideally be actual sentences, not + # entire paragraphs or arbitrary spans of text. (Because we use the + # sentence boundaries for the "next sentence prediction" task). + # (2) Blank lines between documents. Document boundaries are needed so + # that the "next sentence prediction" task doesn't span between documents. + for input_file in input_files: + line_count = 0 + logging.info("Preprocessing %s", input_file) + + all_tokens = [] + all_sentence_ids = [] + + sentence_id = True + + with tf.io.gfile.GFile(input_file, "rb") as reader: + while True: + line = tokenization.convert_to_unicode(reader.readline()) + if not line: + break + + line_count += 1 + if line_count % log_example_freq == 0: + logging.info("Loading line %d", line_count) + + line = line.strip() + + if not line: + if use_eod: + token_ids = [eod_symbol] + sentence_id = not sentence_id + else: + continue + else: + preprocessed_line = _preprocess_line( + line=line, do_lower_case=do_lower_case) + token_ids = tokenization.encode_ids( + sp_model=tokenizer.sp_model, text=preprocessed_line) + + all_tokens.extend(token_ids) + all_sentence_ids.extend([sentence_id] * len(token_ids)) + sentence_id = not sentence_id + logging.info("Finished processing %s. Number of lines: %d", + input_file, line_count) + if line_count == 0: + continue + total_number_of_lines += line_count + all_tokens = np.array(all_tokens, dtype=np.int64) + all_sentence_ids = np.array(all_sentence_ids, dtype=np.bool) + all_data.append((all_tokens, all_sentence_ids)) + + logging.info("Completed text preprocessing. Total number of lines: %d", + total_number_of_lines) + return all_data + + +def _reshape_to_batch_dimensions( + tokens: np.array, + sentence_ids: np.array, + per_host_batch_size: int) -> Tuple[np.array, np.array]: + """Truncates and reshapes input data with a batch major dimension. + + Args: + tokens: The input token ids. This should have the same shape as + `sentence_ids`. + sentence_ids: The input sentence ids. This should have the same shape as + `token_ids`. + per_host_batch_size: The target per-host batch size. + + Returns: + The tuple of reshaped tokens and sentence_ids. + """ + num_steps = len(tokens) // per_host_batch_size + truncated_data_length = num_steps * per_host_batch_size + + logging.info("per_host_batch_size: %d", per_host_batch_size) + logging.info("num_steps: %d", num_steps) + def truncate_and_reshape(a): + return a[:truncated_data_length].reshape((per_host_batch_size, num_steps)) + + return (truncate_and_reshape(tokens), truncate_and_reshape(sentence_ids)) + + +def _create_a_and_b_segments( + tokens: np.array, + sentence_ids: np.array, + begin_index: int, + total_length: int, + no_cut_probability: float = 0.5): + """Splits segments A and B from a single instance of tokens and sentence ids. + + Args: + tokens: The 1D input token ids. This represents an individual entry within a + batch. + sentence_ids: The 1D input sentence ids. This represents an indivdual entry + within a batch. This should be the same length as `tokens`. + begin_index: The reference beginning index to split data. + total_length: The target combined length of segments A and B. + no_cut_probability: The probability of not cutting a segment despite + a cut possibly existing. + + Returns: + A tuple consisting of A data, B data, and label. + + """ + data_length = tokens.shape[0] + if begin_index + total_length >= data_length: + logging.info("[_create_segments]: begin_index %d + total_length %d >= " + "data_length %d", begin_index, total_length, data_length) + return None + + end_index = begin_index + 1 + cut_indices = [] + + # Identify all indices where sentence IDs change from one to the next. + while end_index < data_length: + if sentence_ids[end_index] != sentence_ids[end_index - 1]: + if end_index - begin_index >= total_length: + break + cut_indices.append(end_index) + end_index += 1 + + a_begin = begin_index + + if not cut_indices or random.random() < no_cut_probability: + # Segments A and B are contained within the same sentence. + label = 0 + if not cut_indices: + a_end = end_index + else: + a_end = random.choice(cut_indices) + b_length = max(1, total_length - (a_end - a_begin)) + b_begin = random.randint(0, data_length - 1 - b_length) + b_end = b_begin + b_length + + while b_begin > 0 and sentence_ids[b_begin - 1] == sentence_ids[b_begin]: + b_begin -= 1 + while (b_end < data_length - 1 and + sentence_ids[b_end - 1] == sentence_ids[b_end]): + b_end += 1 + else: + # Segments A and B are different sentences. + label = 1 + a_end = random.choice(cut_indices) + b_begin = a_end + b_end = end_index + + while a_end - a_begin + b_end - b_begin > total_length: + if a_end - a_begin > b_end - b_begin: + # Delete only the right side for the LM objective. + a_end -= 1 + else: + b_end -= 1 + if a_end >= data_length or b_end >= data_length: + logging.info("[_create_segments]: a_end %d or b_end %d >= data_length %d", + a_end, b_end, data_length) + return None + + a_data = tokens[a_begin: a_end] + b_data = tokens[b_begin: b_end] + return a_data, b_data, label + + +def _is_functional_piece(piece: str) -> bool: + return piece != "" and piece.startswith("<") and piece.endswith(">") + + +def _is_start_piece(piece: str) -> bool: + special_pieces = set(list('!"#$%&\"()*+,-./:;?@[\\]^_`{|}~')) + if (piece.startswith("▁") or piece in special_pieces): + return True + else: + return False + + +def _get_boundary_indices( + data: np.array, + tokenizer: tokenization.FullSentencePieceTokenizer) -> np.array: + """Gets the boundary indices of whole words.""" + seq_length = len(data) + boundary_indices = [] + for index, piece in enumerate(tokenizer.convert_ids_to_tokens(data.tolist())): + if _is_start_piece(piece) and not _is_functional_piece(piece): + boundary_indices.append(index) + boundary_indices.append(seq_length) + return boundary_indices + + +def _convert_tokens_to_instances( + tokens: np.array, + sentence_ids: np.array, + per_host_batch_size: int, + seq_length: int, + reuse_length: int, + bi_data: bool, + tokenizer: tokenization.FullSentencePieceTokenizer, + num_cores_per_host: int = 0, + logging_frequency: int = 500) -> List[TrainingInstance]: + """Converts tokens and sentence IDs into individual training instances. + + The format of data in the XLNet pretraining task is very similar to the + BERT pretraining task. Two segments A and B are randomly sampled, and the + contatenation of A and B into a single sequence is used to perform + language modeling. + + To create an XLNet Pretraining instance from a single long sequence, S: + - Create a segment of length `reuse_length`. This first segment represents + past tokens. During modeling, this segment is used to cache obtained + content representations for the segment recurrence mechanism. + - Similar to BERT, create a segment of length `seq_length` - `reuse_length` + composed of A and B segments. + For XLNet, the order is "A", "SEP", "B", "SEP", "CLS". + + Args: + tokens: All tokens concatenated into a single list. + sentence_ids: All sentence IDs concatenated into a single list. + per_host_batch_size: The target batch size per host. + seq_length: The max sequence length. + reuse_length: The number of tokens to use from the previous segment. + bi_data: Whether or not to use bidirectional data. + tokenizer: The SentencePiece tokenizer that has the attribute `sp_model`. + num_cores_per_host: The number of cores per host. This is required if + `bi_data` = `True`. + logging_frequency: The frequency at which to log status updates. + + Returns: + A list of `TrainingInstance` objects. + """ + instances = [] + + per_core_batch_size = (per_host_batch_size // num_cores_per_host + if bi_data else None) + + if bi_data: + logging.info("Bi-directional data enabled.") + assert per_host_batch_size % (2 * num_cores_per_host) == 0 + forward_tokens, forward_sentence_ids = _reshape_to_batch_dimensions( + tokens=tokens, + sentence_ids=sentence_ids, + per_host_batch_size=per_host_batch_size // 2) + forward_data_shape = (num_cores_per_host, 1, per_core_batch_size // 2, -1) + + forward_tokens = forward_tokens.reshape(forward_data_shape) + forward_sentence_ids = forward_sentence_ids.reshape(forward_data_shape) + + backwards_tokens = forward_tokens[:, :, :, ::-1] + backwards_sentence_ids = forward_sentence_ids[:, :, :, ::-1] + + tokens = np.concatenate([forward_tokens, backwards_tokens], 1).reshape( + per_host_batch_size, -1) + sentence_ids = np.concatenate( + [forward_sentence_ids, backwards_sentence_ids]).reshape( + per_host_batch_size, -1) + else: + logging.info("Bi-directional data disabled.") + tokens, sentence_ids = _reshape_to_batch_dimensions( + tokens=tokens, + sentence_ids=sentence_ids, + per_host_batch_size=per_host_batch_size) + + logging.info("Tokens shape: %s", tokens.shape) + + data_length = tokens.shape[1] + sep = np.array([special_symbols[""]], dtype=np.int64) + cls = np.array([special_symbols[""]], dtype=np.int64) + # 2 sep, 1 cls + num_special_tokens = 3 + + data_index = 0 + batch_number = 0 + step_size = reuse_length if reuse_length else seq_length + num_batches = math.ceil(data_length / step_size) + + while data_index + seq_length <= data_length: + if batch_number % logging_frequency == 0: + logging.info("Processing batch %d of %d", batch_number, num_batches) + + for batch_index in range(per_host_batch_size): + previous_segment_tokens = tokens[ + batch_index, data_index: data_index + reuse_length] + + results = _create_a_and_b_segments( + tokens=tokens[batch_index], + sentence_ids=sentence_ids[batch_index], + begin_index=data_index + reuse_length, + total_length=seq_length - reuse_length - num_special_tokens) + + if results is None: + logging.info("Stopping at data index: %d", data_index) + break + a_data, b_data, label = results + + data = np.concatenate( + [previous_segment_tokens, a_data, sep, b_data, sep, cls]) + a_length = a_data.shape[0] + b_length = b_data.shape[0] + segment_ids = ([0] * (reuse_length + a_length) + [0] + + [1] * b_length + [1] + [2]) + boundary_indices = _get_boundary_indices(tokenizer=tokenizer, + data=data) + assert len(data) == seq_length + assert len(segment_ids) == seq_length + assert len(boundary_indices) > 0 # pylint: disable=g-explicit-length-test + + instances.append(TrainingInstance( + data=data, + segment_ids=segment_ids, + boundary_indices=boundary_indices, + label=label)) + batch_number += 1 + data_index += step_size + return instances + + +def write_instances_to_tfrecord( + instances: Iterable[TrainingInstance], + save_path: str): + """Writes instances to TFRecord.""" + record_writer = tf.io.TFRecordWriter(save_path) + logging.info("Start writing to %s.", save_path) + + for i, instance in enumerate(instances): + if i < 5: + logging.info("Instance %d: %s", i, str(instance)) + record_writer.write(instance.to_example().SerializeToString()) + + record_writer.close() + logging.info("Done writing %s.", save_path) + + +def shuffle_and_combine_preprocessed_data( + all_data: List[Tuple[np.array, np.array]]) -> Tuple[np.array, np.array]: + """Shuffles and combines preprocessed token/sentence IDs from documents.""" + document_permutation = np.random.permutation(len(all_data)) + + previous_sentence_id = None + + all_tokens, all_sentence_ids = [], [] + for document_index in document_permutation: + tokens, sentence_ids = all_data[document_index] + # pylint: disable=g-explicit-length-test + if len(tokens) == 0: + continue + if (previous_sentence_id is not None and + sentence_ids[0] == previous_sentence_id): + sentence_ids = np.logical_not(sentence_ids) + + all_tokens.append(tokens) + all_sentence_ids.append(sentence_ids) + + previous_sentence_id = sentence_ids[-1] + + return np.concatenate(all_tokens), np.concatenate(all_sentence_ids) + + +def get_tfrecord_name( + per_host_batch_size: int, + num_cores_per_host: int, + seq_length: int, + bi_data: bool, + reuse_length: int, + do_lower_case: bool, + use_eod_token: bool, + prefix: str = "", + suffix: str = "", + pass_id: int = 0, + num_passes: int = 1, + task_id: int = None, + num_tasks: int = None) -> str: + """Formats the resulting TFRecord name based on provided inputs.""" + components = [] + if prefix: + components.append(prefix) + components.append("seqlen-{}".format(seq_length)) + if reuse_length == 0: + components.append("memless") + else: + components.append("reuse-{}".format(reuse_length)) + components.append("bs-{}".format(per_host_batch_size)) + components.append("cores-{}".format(num_cores_per_host)) + + if do_lower_case: + components.append("uncased") + else: + components.append("cased") + if use_eod_token: + components.append("eod") + if bi_data: + components.append("bi") + else: + components.append("uni") + + if suffix: + components.append(suffix) + + s = "_".join(components) + ".tfrecord" + if num_passes == 1 and task_id is None: + return s + + if task_id is None: + num_tasks = 1 + task_id = 0 + + current_shard = task_id * num_passes + pass_id + total_shards = num_tasks * num_passes + return s + "-{}-of-{}".format(current_shard, total_shards) + + +def create_tfrecords( + tokenizer: tokenization.FullSentencePieceTokenizer, + input_file_or_files: str, + use_eod_token: bool, + do_lower_case: bool, + per_host_batch_size: int, + seq_length: int, + reuse_length: int, + bi_data: bool, + num_cores_per_host: int, + save_dir: str, + prefix: str = "", + suffix: str = "", + num_tasks: Optional[int] = None, + task_id: Optional[int] = None, + num_passes: int = 1): + """Runs the end-to-end preprocessing pipeline.""" + + logging.info("Input configuration:") + logging.info("input file(s): %s", input_file_or_files) + logging.info("use_eod_token: %s", use_eod_token) + logging.info("do_lower_case: %s", do_lower_case) + logging.info("per_host_batch_size: %d", per_host_batch_size) + logging.info("seq_length: %d", seq_length) + logging.info("reuse_length: %d", reuse_length) + logging.info("bi_data: %s", bi_data) + logging.info("num_cores_per_host: %d", num_cores_per_host) + logging.info("save_dir: %s", save_dir) + if task_id is not None and num_tasks is not None: + logging.info("task_id: %d", task_id) + logging.info("num_tasks: %d", num_tasks) + + input_files = [] + for input_pattern in input_file_or_files.split(","): + input_files.extend(tf.io.gfile.glob(input_pattern)) + + logging.info("*** Reading from input files ***") + for input_file in input_files: + logging.info(" %s", input_file) + + logging.info("Shuffling the files with a fixed random seed.") + np.random.shuffle(input_files) + if num_tasks is not None: + assert task_id is not None + logging.info("Total number of input files: %d", len(input_files)) + logging.info("Splitting into %d shards of %d files each.", + num_tasks, len(input_files) // num_tasks) + input_files = input_files[task_id::num_tasks] + + all_data = preprocess_and_tokenize_input_files( + input_files=input_files, + tokenizer=tokenizer, + use_eod=use_eod_token, + do_lower_case=do_lower_case) + for pass_id in range(num_passes): + logging.info("Beginning pass %d of %d", pass_id, num_passes) + tokens, sentence_ids = shuffle_and_combine_preprocessed_data(all_data) + + assert len(tokens) == len(sentence_ids) + + filename = get_tfrecord_name( + per_host_batch_size=per_host_batch_size, + num_cores_per_host=num_cores_per_host, + seq_length=seq_length, + bi_data=bi_data, + use_eod_token=use_eod_token, + reuse_length=reuse_length, + do_lower_case=do_lower_case, + prefix=prefix, + suffix=suffix, + pass_id=pass_id, + num_passes=num_passes, + num_tasks=num_tasks, + task_id=task_id) + save_path = os.path.join(save_dir, filename) + if os.path.exists(save_path): + # If the path already exists, then we were probably preempted but + # previously wrote this file. + logging.info("%s already exists, skipping this batch.", save_path) + else: + instances = _convert_tokens_to_instances( + tokenizer=tokenizer, + tokens=tokens, + sentence_ids=sentence_ids, + per_host_batch_size=per_host_batch_size, + seq_length=seq_length, + reuse_length=reuse_length, + bi_data=bi_data, + num_cores_per_host=num_cores_per_host) + write_instances_to_tfrecord(instances=instances, save_path=save_path) + + if task_id is None or task_id == 0: + corpus_info = { + "vocab_size": 32000, + "per_host_batch_size": per_host_batch_size, + "num_cores_per_host": num_cores_per_host, + "seq_length": seq_length, + "reuse_length": reuse_length, + "do_lower_case": do_lower_case, + "bi_data": bi_data, + "use_eod_token": use_eod_token, + } + corpus_fname = os.path.basename(filename) + ".json" + corpus_destination = os.path.join(save_dir, corpus_fname) + logging.info("Saving corpus info to %s", corpus_destination) + + with tf.io.gfile.GFile(corpus_destination, "w") as fp: + json.dump(corpus_info, fp) + + +def main(_): + tokenizer = tokenization.FullSentencePieceTokenizer(FLAGS.sp_model_file) + create_tfrecords( + tokenizer=tokenizer, + input_file_or_files=FLAGS.input_file, + use_eod_token=FLAGS.use_eod_token, + do_lower_case=FLAGS.do_lower_case, + per_host_batch_size=FLAGS.per_host_batch_size, + seq_length=FLAGS.seq_length, + reuse_length=FLAGS.reuse_length, + bi_data=FLAGS.bi_data, + num_cores_per_host=FLAGS.num_cores_per_host, + save_dir=FLAGS.save_dir, + prefix=FLAGS.prefix, + suffix=FLAGS.suffix, + num_tasks=FLAGS.num_tasks, + task_id=FLAGS.task_id, + num_passes=FLAGS.num_passes) + + +if __name__ == "__main__": + np.random.seed(0) + logging.set_verbosity(logging.INFO) + app.run(main) diff --git a/official/nlp/data/create_xlnet_pretraining_data_test.py b/official/nlp/data/create_xlnet_pretraining_data_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5630411a7eb0e92b2baf6e203547d1c9063ebd79 --- /dev/null +++ b/official/nlp/data/create_xlnet_pretraining_data_test.py @@ -0,0 +1,355 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.create_xlnet_pretraining_data.""" +import os +import tempfile +from typing import List + +from absl import logging +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from official.nlp.data import create_xlnet_pretraining_data as cpd + +_VOCAB_WORDS = ["vocab_1", "vocab_2"] + + +# pylint: disable=invalid-name +def _create_files( + temp_dir: str, file_contents: List[List[str]]) -> List[str]: + """Writes arbitrary documents into files.""" + root_dir = tempfile.mkdtemp(dir=temp_dir) + files = [] + + for i, file_content in enumerate(file_contents): + destination = os.path.join(root_dir, "%d.txt" % i) + with open(destination, "wb") as f: + for line in file_content: + f.write(line.encode("utf-8")) + files.append(destination) + return files + + +def _get_mock_tokenizer(): + """Creates a mock tokenizer.""" + + class MockSpieceModel: + """Mock Spiece model for testing.""" + + def __init__(self): + self._special_piece_to_id = { + "": 0, + } + for piece in set(list('!"#$%&\"()*+,-./:;?@[\\]^_`{|}~')): + self._special_piece_to_id[piece] = 1 + + def EncodeAsPieces(self, inputs: str) -> List[str]: + return inputs + + def SampleEncodeAsPieces(self, + inputs: str, + nbest_size: int, + theta: float) -> List[str]: + del nbest_size, theta + return inputs + + def PieceToId(self, piece: str) -> int: + return ord(piece[0]) + + def IdToPiece(self, id_: int) -> str: + return chr(id_) * 3 + + class Tokenizer: + """Mock Tokenizer for testing.""" + + def __init__(self): + self.sp_model = MockSpieceModel() + + def convert_ids_to_tokens(self, ids: List[int]) -> List[str]: + return [self.sp_model.IdToPiece(id_) for id_ in ids] + + return Tokenizer() + + +class PreprocessDataTest(tf.test.TestCase): + + def test_remove_extraneous_space(self): + line = " abc " + output = cpd._preprocess_line(line) + self.assertEqual(output, "abc") + + def test_symbol_replacements(self): + self.assertEqual(cpd._preprocess_line("``abc``"), "\"abc\"") + self.assertEqual(cpd._preprocess_line("''abc''"), "\"abc\"") + + def test_accent_replacements(self): + self.assertEqual(cpd._preprocess_line("åbc"), "abc") + + def test_lower_case(self): + self.assertEqual(cpd._preprocess_line("ABC", do_lower_case=True), "abc") + + def test_end_to_end(self): + self.assertEqual( + cpd._preprocess_line("HelLo ``wórLd``", do_lower_case=True), + "hello \"world\"") + + +class PreprocessAndTokenizeFilesTest(tf.test.TestCase): + + def test_basic_end_to_end(self): + documents = [ + [ + "This is sentence 1.\n", + "This is sentence 2.\n", + "Sentence 3 is what this is.\n", + ], + [ + "This is the second document.\n", + "This is the second line of the second document.\n" + ], + ] + input_files = _create_files(temp_dir=self.get_temp_dir(), + file_contents=documents) + all_data = cpd.preprocess_and_tokenize_input_files( + input_files=input_files, + tokenizer=_get_mock_tokenizer(), + log_example_freq=1) + + self.assertEqual(len(all_data), len(documents)) + for token_ids, sentence_ids in all_data: + self.assertEqual(len(token_ids), len(sentence_ids)) + + def test_basic_correctness(self): + documents = [["a\n", "b\n", "c\n"]] + input_files = _create_files(temp_dir=self.get_temp_dir(), + file_contents=documents) + all_data = cpd.preprocess_and_tokenize_input_files( + input_files=input_files, + tokenizer=_get_mock_tokenizer(), + log_example_freq=1) + + token_ids, sentence_ids = all_data[0] + + self.assertAllClose(token_ids, [97, 98, 99]) + self.assertAllClose(sentence_ids, [True, False, True]) + + def test_correctness_with_spaces_and_accents(self): + documents = [[ + " å \n", + "b \n", + " c \n", + ]] + input_files = _create_files(temp_dir=self.get_temp_dir(), + file_contents=documents) + all_data = cpd.preprocess_and_tokenize_input_files( + input_files=input_files, + tokenizer=_get_mock_tokenizer(), + log_example_freq=1) + + token_ids, sentence_ids = all_data[0] + + self.assertAllClose(token_ids, [97, 98, 99]) + self.assertAllClose(sentence_ids, [True, False, True]) + + +class BatchReshapeTests(tf.test.TestCase): + + def test_basic_functionality(self): + per_host_batch_size = 3 + mock_shape = (20,) + + # Should truncate and reshape. + expected_result_shape = (3, 6) + + tokens = np.zeros(mock_shape) + sentence_ids = np.zeros(mock_shape) + + reshaped_data = cpd._reshape_to_batch_dimensions( + tokens=tokens, + sentence_ids=sentence_ids, + per_host_batch_size=per_host_batch_size) + for values in reshaped_data: + self.assertEqual(len(values.flatten()) % per_host_batch_size, 0) + self.assertAllClose(values.shape, expected_result_shape) + + +class CreateSegmentsTest(tf.test.TestCase): + + def test_basic_functionality(self): + data_length = 10 + tokens = np.arange(data_length) + sentence_ids = np.concatenate([np.zeros(data_length // 2), + np.ones(data_length // 2)]) + begin_index = 0 + total_length = 8 + a_data, b_data, label = cpd._create_a_and_b_segments( + tokens=tokens, + sentence_ids=sentence_ids, + begin_index=begin_index, + total_length=total_length, + no_cut_probability=0.) + self.assertAllClose(a_data, [0, 1, 2, 3]) + self.assertAllClose(b_data, [5, 6, 7, 8]) + self.assertEqual(label, 1) + + def test_no_cut(self): + data_length = 10 + tokens = np.arange(data_length) + sentence_ids = np.zeros(data_length) + + begin_index = 0 + total_length = 8 + a_data, b_data, label = cpd._create_a_and_b_segments( + tokens=tokens, + sentence_ids=sentence_ids, + begin_index=begin_index, + total_length=total_length, + no_cut_probability=0.) + self.assertGreater(len(a_data), 0) + self.assertGreater(len(b_data), 0) + self.assertEqual(label, 0) + + def test_no_cut_with_probability(self): + data_length = 10 + tokens = np.arange(data_length) + sentence_ids = np.concatenate([np.zeros(data_length // 2), + np.ones(data_length // 2)]) + begin_index = 0 + total_length = 8 + a_data, b_data, label = cpd._create_a_and_b_segments( + tokens=tokens, + sentence_ids=sentence_ids, + begin_index=begin_index, + total_length=total_length, + no_cut_probability=1.) + self.assertGreater(len(a_data), 0) + self.assertGreater(len(b_data), 0) + self.assertEqual(label, 0) + + +class CreateInstancesTest(tf.test.TestCase): + """Tests conversions of Token/Sentence IDs to training instances.""" + + def test_basic(self): + data_length = 12 + tokens = np.arange(data_length) + sentence_ids = np.zeros(data_length) + seq_length = 8 + instances = cpd._convert_tokens_to_instances( + tokens=tokens, + sentence_ids=sentence_ids, + per_host_batch_size=2, + seq_length=seq_length, + reuse_length=4, + tokenizer=_get_mock_tokenizer(), + bi_data=False, + num_cores_per_host=1, + logging_frequency=1) + for instance in instances: + self.assertEqual(len(instance.data), seq_length) + self.assertEqual(len(instance.segment_ids), seq_length) + self.assertIsInstance(instance.label, int) + self.assertIsInstance(instance.boundary_indices, list) + + +class TFRecordPathTests(tf.test.TestCase): + + def test_basic(self): + base_kwargs = dict( + per_host_batch_size=1, + num_cores_per_host=1, + seq_length=2, + reuse_length=1) + + config1 = dict( + prefix="test", + suffix="", + bi_data=True, + use_eod_token=False, + do_lower_case=True) + config1.update(base_kwargs) + expectation1 = "test_seqlen-2_reuse-1_bs-1_cores-1_uncased_bi.tfrecord" + self.assertEqual(cpd.get_tfrecord_name(**config1), expectation1) + + config2 = dict( + prefix="", + suffix="test", + bi_data=False, + use_eod_token=False, + do_lower_case=False) + config2.update(base_kwargs) + expectation2 = "seqlen-2_reuse-1_bs-1_cores-1_cased_uni_test.tfrecord" + self.assertEqual(cpd.get_tfrecord_name(**config2), expectation2) + + config3 = dict( + prefix="", + suffix="", + use_eod_token=True, + bi_data=False, + do_lower_case=True) + config3.update(base_kwargs) + expectation3 = "seqlen-2_reuse-1_bs-1_cores-1_uncased_eod_uni.tfrecord" + self.assertEqual(cpd.get_tfrecord_name(**config3), expectation3) + + +class TestCreateTFRecords(parameterized.TestCase, tf.test.TestCase): + + @parameterized.named_parameters( + ("bi_data_only", True, False, False), + ("eod_token_only", False, True, True), + ("lower_case_only", False, False, True), + ("all_enabled", True, True, True), + ) + def test_end_to_end(self, + bi_data: bool, + use_eod_token: bool, + do_lower_case: bool): + tokenizer = _get_mock_tokenizer() + + num_documents = 5 + sentences_per_document = 10 + document_length = 50 + + documents = [ + ["a " * document_length for _ in range(sentences_per_document)] + for _ in range(num_documents)] + + save_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) + files = _create_files(temp_dir=self.get_temp_dir(), file_contents=documents) + + cpd.create_tfrecords( + tokenizer=tokenizer, + input_file_or_files=",".join(files), + use_eod_token=use_eod_token, + do_lower_case=do_lower_case, + per_host_batch_size=8, + seq_length=8, + reuse_length=4, + bi_data=bi_data, + num_cores_per_host=2, + save_dir=save_dir) + + self.assertTrue(any(filter(lambda x: x.endswith(".json"), + os.listdir(save_dir)))) + self.assertTrue(any(filter(lambda x: x.endswith(".tfrecord"), + os.listdir(save_dir)))) + + +if __name__ == "__main__": + np.random.seed(0) + logging.set_verbosity(logging.INFO) + tf.test.main() diff --git a/official/nlp/data/data_loader.py b/official/nlp/data/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..2b181270658f42f0819f01fbed5af7989b1d3e5d --- /dev/null +++ b/official/nlp/data/data_loader.py @@ -0,0 +1,48 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An abstraction that NLP models define input pipelines.""" + +import abc +from typing import Optional + +import tensorflow as tf + + +class DataLoader(metaclass=abc.ABCMeta): + """An abstract class defining the APIs for tf.data input pipeline.""" + + @abc.abstractmethod + def load( + self, + input_context: Optional[tf.distribute.InputContext] = None + ) -> tf.data.Dataset: + """Implements DataLoader load method. + + Builds the entire input pipeline inside the load method. Users can define + states inside the DataLoader class and returns a tf.data dataset + object. + + Args: + input_context: This is a context class that is passed to the user's input + function and contains information about the compute replicas and input + pipelines. This object is used for multi-host inputs and passed by the + distribution strategy. + + Returns: + A per-host tf.data dataset. Note that, we usually create the distributed + dataset through the load method, so we should not directly return a + distributed dataset here. + """ + pass diff --git a/official/nlp/data/data_loader_factory.py b/official/nlp/data/data_loader_factory.py index a88caea67fe93f4b5166bb8bcf97841082fdd449..9602ea295283e5490d1bcb5cc67df9f99ebdb0ca 100644 --- a/official/nlp/data/data_loader_factory.py +++ b/official/nlp/data/data_loader_factory.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """A global factory to access NLP registered data loaders.""" -from official.utils import registry +from official.core import registry _REGISTERED_DATA_LOADER_CLS = {} diff --git a/official/nlp/data/data_loader_factory_test.py b/official/nlp/data/data_loader_factory_test.py new file mode 100644 index 0000000000000000000000000000000000000000..8aa86757df64a445692ce4bf8ff64e6649b6dfa6 --- /dev/null +++ b/official/nlp/data/data_loader_factory_test.py @@ -0,0 +1,45 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.data_loader_factory.""" + +import dataclasses +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.nlp.data import data_loader_factory + + +@dataclasses.dataclass +class MyDataConfig(cfg.DataConfig): + is_training: bool = True + + +@data_loader_factory.register_data_loader_cls(MyDataConfig) +class MyDataLoader: + + def __init__(self, params): + self.params = params + + +class DataLoaderFactoryTest(tf.test.TestCase): + + def test_register_and_load(self): + train_config = MyDataConfig() + train_loader = data_loader_factory.get_data_loader(train_config) + self.assertTrue(train_loader.params.is_training) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/data/pretrain_dataloader.py b/official/nlp/data/pretrain_dataloader.py index 985a7a5cc6c3f2e8a811d4fafbe6c731a1033f20..dbb7953c3fd7f1562d7b3ec07c58b09eefef8e25 100644 --- a/official/nlp/data/pretrain_dataloader.py +++ b/official/nlp/data/pretrain_dataloader.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,15 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Loads dataset for the BERT pretraining task.""" from typing import Mapping, Optional +from absl import logging + import dataclasses +import numpy as np import tensorflow as tf - +from official.core import config_definitions as cfg from official.core import input_reader -from official.modeling.hyperparams import config_definitions as cfg +from official.nlp.data import data_loader from official.nlp.data import data_loader_factory @@ -34,10 +36,16 @@ class BertPretrainDataConfig(cfg.DataConfig): max_predictions_per_seq: int = 76 use_next_sentence_label: bool = True use_position_id: bool = False + # Historically, BERT implementations take `input_ids` and `segment_ids` as + # feature names. Inside the TF Model Garden implementation, the Keras model + # inputs are set as `input_word_ids` and `input_type_ids`. When + # v2_feature_names is True, the data loader assumes the tf.Examples use + # `input_word_ids` and `input_type_ids` as keys. + use_v2_feature_names: bool = False @data_loader_factory.register_data_loader_cls(BertPretrainDataConfig) -class BertPretrainDataLoader: +class BertPretrainDataLoader(data_loader.DataLoader): """A class to load dataset for bert pretraining task.""" def __init__(self, params): @@ -52,15 +60,10 @@ class BertPretrainDataLoader: self._use_next_sentence_label = params.use_next_sentence_label self._use_position_id = params.use_position_id - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" + def _name_to_features(self): name_to_features = { - 'input_ids': - tf.io.FixedLenFeature([self._seq_length], tf.int64), 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'segment_ids': - tf.io.FixedLenFeature([self._seq_length], tf.int64), 'masked_lm_positions': tf.io.FixedLenFeature([self._max_predictions_per_seq], tf.int64), 'masked_lm_ids': @@ -68,13 +71,27 @@ class BertPretrainDataLoader: 'masked_lm_weights': tf.io.FixedLenFeature([self._max_predictions_per_seq], tf.float32), } + if self._params.use_v2_feature_names: + name_to_features.update({ + 'input_word_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), + 'input_type_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), + }) + else: + name_to_features.update({ + 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), + 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), + }) if self._use_next_sentence_label: name_to_features['next_sentence_labels'] = tf.io.FixedLenFeature([1], tf.int64) if self._use_position_id: name_to_features['position_ids'] = tf.io.FixedLenFeature( [self._seq_length], tf.int64) + return name_to_features + def _decode(self, record: tf.Tensor): + """Decodes a serialized tf.Example.""" + name_to_features = self._name_to_features() example = tf.io.parse_single_example(record, name_to_features) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. @@ -90,13 +107,17 @@ class BertPretrainDataLoader: def _parse(self, record: Mapping[str, tf.Tensor]): """Parses raw tensors into a dict of tensors to be consumed by the model.""" x = { - 'input_word_ids': record['input_ids'], 'input_mask': record['input_mask'], - 'input_type_ids': record['segment_ids'], 'masked_lm_positions': record['masked_lm_positions'], 'masked_lm_ids': record['masked_lm_ids'], 'masked_lm_weights': record['masked_lm_weights'], } + if self._params.use_v2_feature_names: + x['input_word_ids'] = record['input_word_ids'] + x['input_type_ids'] = record['input_type_ids'] + else: + x['input_word_ids'] = record['input_ids'] + x['input_type_ids'] = record['segment_ids'] if self._use_next_sentence_label: x['next_sentence_labels'] = record['next_sentence_labels'] if self._use_position_id: @@ -109,3 +130,475 @@ class BertPretrainDataLoader: reader = input_reader.InputReader( params=self._params, decoder_fn=self._decode, parser_fn=self._parse) return reader.read(input_context) + + +@dataclasses.dataclass +class XLNetPretrainDataConfig(cfg.DataConfig): + """Data config for XLNet pretraining task. + + Attributes: + input_path: See base class. + global_batch_size: See base calss. + is_training: See base class. + seq_length: The length of each sequence. + max_predictions_per_seq: The number of predictions per sequence. + reuse_length: The number of tokens in a previous segment to reuse. This + should be the same value used during pretrain data creation. + sample_strategy: The strategy used to sample factorization permutations. + Possible values: 'single_token', 'whole_word', 'token_span', 'word_span'. + min_num_tokens: The minimum number of tokens to sample in a span. + This is used when `sample_strategy` is 'token_span'. + max_num_tokens: The maximum number of tokens to sample in a span. + This is used when `sample_strategy` is 'token_span'. + min_num_words: The minimum number of words to sample in a span. + This is used when `sample_strategy` is 'word_span'. + max_num_words: The maximum number of words to sample in a span. + This is used when `sample_strategy` is 'word_span'. + permutation_size: The length of the longest permutation. This can be set + to `reuse_length`. This should NOT be greater than `reuse_length`, + otherwise this may introduce data leaks. + leak_ratio: The percentage of masked tokens that are leaked. + segment_sep_id: The ID of the SEP token used when preprocessing + the dataset. + segment_cls_id: The ID of the CLS token used when preprocessing + the dataset. + + """ + input_path: str = '' + global_batch_size: int = 512 + is_training: bool = True + seq_length: int = 512 + max_predictions_per_seq: int = 76 + reuse_length: int = 256 + sample_strategy: str = 'word_span' + min_num_tokens: int = 1 + max_num_tokens: int = 5 + min_num_words: int = 1 + max_num_words: int = 5 + permutation_size: int = 256 + leak_ratio: float = 0.1 + segment_sep_id: int = 4 + segment_cls_id: int = 3 + + +@data_loader_factory.register_data_loader_cls(XLNetPretrainDataConfig) +class XLNetPretrainDataLoader(data_loader.DataLoader): + """A class to load dataset for xlnet pretraining task.""" + + def __init__(self, params: XLNetPretrainDataConfig): + """Inits `XLNetPretrainDataLoader` class. + + Args: + params: A `XLNetPretrainDataConfig` object. + """ + self._params = params + self._seq_length = params.seq_length + self._max_predictions_per_seq = params.max_predictions_per_seq + self._reuse_length = params.reuse_length + self._num_replicas_in_sync = None + self._permutation_size = params.permutation_size + self._sep_id = params.segment_sep_id + self._cls_id = params.segment_cls_id + self._sample_strategy = params.sample_strategy + self._leak_ratio = params.leak_ratio + + def _decode(self, record: tf.Tensor): + """Decodes a serialized tf.Example.""" + name_to_features = { + 'input_word_ids': + tf.io.FixedLenFeature([self._seq_length], tf.int64), + 'input_type_ids': + tf.io.FixedLenFeature([self._seq_length], tf.int64), + 'boundary_indices': + tf.io.VarLenFeature(tf.int64), + } + example = tf.io.parse_single_example(record, name_to_features) + + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + for name in list(example.keys()): + t = example[name] + if t.dtype == tf.int64: + t = tf.cast(t, tf.int32) + example[name] = t + + return example + + def _parse(self, record: Mapping[str, tf.Tensor]): + """Parses raw tensors into a dict of tensors to be consumed by the model.""" + x = {} + + inputs = record['input_word_ids'] + x['input_type_ids'] = record['input_type_ids'] + + if self._sample_strategy in ['whole_word', 'word_span']: + boundary = tf.sparse.to_dense(record['boundary_indices']) + else: + boundary = None + + input_mask = self._online_sample_mask(inputs=inputs, boundary=boundary) + + if self._reuse_length > 0: + if self._permutation_size > self._reuse_length: + logging.warning( + '`permutation_size` is greater than `reuse_length` (%d > %d).' + 'This may introduce data leakage.', + self._permutation_size, self._reuse_length) + + # Enable the memory mechanism. + # Permute the reuse and non-reuse segments separately. + non_reuse_len = self._seq_length - self._reuse_length + if not (self._reuse_length % self._permutation_size == 0 + and non_reuse_len % self._permutation_size == 0): + raise ValueError('`reuse_length` and `seq_length` should both be ' + 'a multiple of `permutation_size`.') + + # Creates permutation mask and target mask for the first reuse_len tokens. + # The tokens in this part are reused from the last sequence. + perm_mask_0, target_mask_0, tokens_0, masked_0 = self._get_factorization( + inputs=inputs[:self._reuse_length], + input_mask=input_mask[:self._reuse_length]) + + # Creates permutation mask and target mask for the rest of tokens in + # current example, which are concatentation of two new segments. + perm_mask_1, target_mask_1, tokens_1, masked_1 = self._get_factorization( + inputs[self._reuse_length:], input_mask[self._reuse_length:]) + + perm_mask_0 = tf.concat( + [perm_mask_0, + tf.zeros([self._reuse_length, non_reuse_len], dtype=tf.int32)], + axis=1) + perm_mask_1 = tf.concat( + [tf.ones([non_reuse_len, self._reuse_length], dtype=tf.int32), + perm_mask_1], axis=1) + perm_mask = tf.concat([perm_mask_0, perm_mask_1], axis=0) + target_mask = tf.concat([target_mask_0, target_mask_1], axis=0) + tokens = tf.concat([tokens_0, tokens_1], axis=0) + masked_tokens = tf.concat([masked_0, masked_1], axis=0) + else: + # Disable the memory mechanism. + if self._seq_length % self._permutation_size != 0: + raise ValueError('`seq_length` should be a multiple of ' + '`permutation_size`.') + # Permute the entire sequence together + perm_mask, target_mask, tokens, masked_tokens = self._get_factorization( + inputs=inputs, input_mask=input_mask) + x['permutation_mask'] = tf.reshape( + perm_mask, [self._seq_length, self._seq_length]) + x['input_word_ids'] = tokens + x['masked_tokens'] = masked_tokens + + target = tokens + if self._max_predictions_per_seq is not None: + indices = tf.range(self._seq_length, dtype=tf.int32) + bool_target_mask = tf.cast(target_mask, tf.bool) + indices = tf.boolean_mask(indices, bool_target_mask) + + # account for extra padding due to CLS/SEP. + actual_num_predict = tf.shape(indices)[0] + pad_len = self._max_predictions_per_seq - actual_num_predict + + target_mapping = tf.one_hot(indices, self._seq_length, dtype=tf.int32) + paddings = tf.zeros([pad_len, self._seq_length], + dtype=target_mapping.dtype) + target_mapping = tf.concat([target_mapping, paddings], axis=0) + x['target_mapping'] = tf.reshape( + target_mapping, [self._max_predictions_per_seq, self._seq_length]) + + target = tf.boolean_mask(target, bool_target_mask) + paddings = tf.zeros([pad_len], dtype=target.dtype) + target = tf.concat([target, paddings], axis=0) + x['target'] = tf.reshape(target, [self._max_predictions_per_seq]) + + target_mask = tf.concat([ + tf.ones([actual_num_predict], dtype=tf.int32), + tf.zeros([pad_len], dtype=tf.int32) + ], axis=0) + x['target_mask'] = tf.reshape(target_mask, + [self._max_predictions_per_seq]) + else: + x['target'] = tf.reshape(target, [self._seq_length]) + x['target_mask'] = tf.reshape(target_mask, [self._seq_length]) + return x + + def _index_pair_to_mask(self, + begin_indices: tf.Tensor, + end_indices: tf.Tensor, + inputs: tf.Tensor) -> tf.Tensor: + """Converts beginning and end indices into an actual mask.""" + non_func_mask = tf.logical_and( + tf.not_equal(inputs, self._sep_id), tf.not_equal(inputs, self._cls_id)) + all_indices = tf.where( + non_func_mask, + tf.range(self._seq_length, dtype=tf.int32), + tf.constant(-1, shape=[self._seq_length], dtype=tf.int32)) + candidate_matrix = tf.cast( + tf.logical_and(all_indices[None, :] >= begin_indices[:, None], + all_indices[None, :] < end_indices[:, None]), tf.float32) + cumsum_matrix = tf.reshape( + tf.cumsum(tf.reshape(candidate_matrix, [-1])), [-1, self._seq_length]) + masked_matrix = tf.cast(cumsum_matrix <= self._max_predictions_per_seq, + tf.float32) + target_mask = tf.reduce_sum(candidate_matrix * masked_matrix, axis=0) + return tf.cast(target_mask, tf.bool) + + def _single_token_mask(self, inputs: tf.Tensor) -> tf.Tensor: + """Samples individual tokens as prediction targets.""" + all_indices = tf.range(self._seq_length, dtype=tf.int32) + non_func_mask = tf.logical_and( + tf.not_equal(inputs, self._sep_id), tf.not_equal(inputs, self._cls_id)) + non_func_indices = tf.boolean_mask(all_indices, non_func_mask) + + masked_pos = tf.random.shuffle(non_func_indices) + masked_pos = tf.sort(masked_pos[:self._max_predictions_per_seq]) + + sparse_indices = tf.stack( + [tf.zeros_like(masked_pos), masked_pos], axis=-1) + sparse_indices = tf.cast(sparse_indices, tf.int64) + + sparse_indices = tf.sparse.SparseTensor( + sparse_indices, + values=tf.ones_like(masked_pos), + dense_shape=(1, self._seq_length)) + + target_mask = tf.sparse.to_dense( + sp_input=sparse_indices, + default_value=0) + + return tf.squeeze(tf.cast(target_mask, tf.bool)) + + def _whole_word_mask(self, + inputs: tf.Tensor, + boundary: tf.Tensor) -> tf.Tensor: + """Samples whole words as prediction targets.""" + pair_indices = tf.concat([boundary[:-1, None], boundary[1:, None]], axis=1) + cand_pair_indices = tf.random.shuffle( + pair_indices)[:self._max_predictions_per_seq] + begin_indices = cand_pair_indices[:, 0] + end_indices = cand_pair_indices[:, 1] + + return self._index_pair_to_mask( + begin_indices=begin_indices, + end_indices=end_indices, + inputs=inputs) + + def _token_span_mask(self, inputs: tf.Tensor) -> tf.Tensor: + """Samples token spans as prediction targets.""" + min_num_tokens = self._params.min_num_tokens + max_num_tokens = self._params.max_num_tokens + + mask_alpha = self._seq_length / self._max_predictions_per_seq + round_to_int = lambda x: tf.cast(tf.round(x), tf.int32) + + # Sample span lengths from a zipf distribution + span_len_seq = np.arange(min_num_tokens, max_num_tokens + 1) + probs = np.array([1.0 / (i + 1) for i in span_len_seq]) + + probs /= np.sum(probs) + logits = tf.constant(np.log(probs), dtype=tf.float32) + span_lens = tf.random.categorical( + logits=logits[None], + num_samples=self._max_predictions_per_seq, + dtype=tf.int32, + )[0] + min_num_tokens + + # Sample the ratio [0.0, 1.0) of left context lengths + span_lens_float = tf.cast(span_lens, tf.float32) + left_ratio = tf.random.uniform( + shape=[self._max_predictions_per_seq], minval=0.0, maxval=1.0) + left_ctx_len = left_ratio * span_lens_float * (mask_alpha - 1) + left_ctx_len = round_to_int(left_ctx_len) + + # Compute the offset from left start to the right end + right_offset = round_to_int(span_lens_float * mask_alpha) - left_ctx_len + + # Get the actual begin and end indices + begin_indices = ( + tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) + end_indices = begin_indices + span_lens + + # Remove out of range indices + valid_idx_mask = end_indices < self._seq_length + begin_indices = tf.boolean_mask(begin_indices, valid_idx_mask) + end_indices = tf.boolean_mask(end_indices, valid_idx_mask) + + # Shuffle valid indices + num_valid = tf.cast(tf.shape(begin_indices)[0], tf.int32) + order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int32)) + begin_indices = tf.gather(begin_indices, order) + end_indices = tf.gather(end_indices, order) + + return self._index_pair_to_mask( + begin_indices=begin_indices, + end_indices=end_indices, + inputs=inputs) + + def _word_span_mask(self, + inputs: tf.Tensor, + boundary: tf.Tensor): + """Sample whole word spans as prediction targets.""" + min_num_words = self._params.min_num_words + max_num_words = self._params.max_num_words + + # Note: 1.2 is the token-to-word ratio + mask_alpha = self._seq_length / self._max_predictions_per_seq / 1.2 + round_to_int = lambda x: tf.cast(tf.round(x), tf.int32) + + # Sample span lengths from a zipf distribution + span_len_seq = np.arange(min_num_words, max_num_words + 1) + probs = np.array([1.0 / (i + 1) for i in span_len_seq]) + probs /= np.sum(probs) + logits = tf.constant(np.log(probs), dtype=tf.float32) + + # Sample `num_predict` words here: note that this is over sampling + span_lens = tf.random.categorical( + logits=logits[None], + num_samples=self._max_predictions_per_seq, + dtype=tf.int32, + )[0] + min_num_words + + # Sample the ratio [0.0, 1.0) of left context lengths + span_lens_float = tf.cast(span_lens, tf.float32) + left_ratio = tf.random.uniform( + shape=[self._max_predictions_per_seq], minval=0.0, maxval=1.0) + left_ctx_len = left_ratio * span_lens_float * (mask_alpha - 1) + + left_ctx_len = round_to_int(left_ctx_len) + right_offset = round_to_int(span_lens_float * mask_alpha) - left_ctx_len + + begin_indices = ( + tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) + end_indices = begin_indices + span_lens + + # Remove out of range indices + max_boundary_index = tf.cast(tf.shape(boundary)[0] - 1, tf.int32) + valid_idx_mask = end_indices < max_boundary_index + begin_indices = tf.boolean_mask(begin_indices, valid_idx_mask) + end_indices = tf.boolean_mask(end_indices, valid_idx_mask) + + begin_indices = tf.gather(boundary, begin_indices) + end_indices = tf.gather(boundary, end_indices) + + # Shuffle valid indices + num_valid = tf.cast(tf.shape(begin_indices)[0], tf.int32) + order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int32)) + begin_indices = tf.gather(begin_indices, order) + end_indices = tf.gather(end_indices, order) + + return self._index_pair_to_mask( + begin_indices=begin_indices, + end_indices=end_indices, + inputs=inputs) + + def _online_sample_mask(self, + inputs: tf.Tensor, + boundary: tf.Tensor) -> tf.Tensor: + """Samples target positions for predictions. + + Descriptions of each strategy: + - 'single_token': Samples individual tokens as prediction targets. + - 'token_span': Samples spans of tokens as prediction targets. + - 'whole_word': Samples individual words as prediction targets. + - 'word_span': Samples spans of words as prediction targets. + + Args: + inputs: The input tokens. + boundary: The `int` Tensor of indices indicating whole word boundaries. + This is used in 'whole_word' and 'word_span' + + Returns: + The sampled `bool` input mask. + + Raises: + `ValueError`: if `max_predictions_per_seq` is not set or if boundary is + not provided for 'whole_word' and 'word_span' sample strategies. + """ + if self._max_predictions_per_seq is None: + raise ValueError('`max_predictions_per_seq` must be set.') + + if boundary is None and 'word' in self._sample_strategy: + raise ValueError('`boundary` must be provided for {} strategy'.format( + self._sample_strategy)) + + if self._sample_strategy == 'single_token': + return self._single_token_mask(inputs) + elif self._sample_strategy == 'token_span': + return self._token_span_mask(inputs) + elif self._sample_strategy == 'whole_word': + return self._whole_word_mask(inputs, boundary) + elif self._sample_strategy == 'word_span': + return self._word_span_mask(inputs, boundary) + else: + raise NotImplementedError('Invalid sample strategy.') + + def _get_factorization(self, + inputs: tf.Tensor, + input_mask: tf.Tensor): + """Samples a permutation of the factorization order. + + Args: + inputs: the input tokens. + input_mask: the `bool` Tensor of the same shape as `inputs`. + If `True`, then this means select for partial prediction. + + Returns: + perm_mask: An `int32` Tensor of shape [seq_length, seq_length] consisting + of 0s and 1s. If perm_mask[i][j] == 0, then this means that the i-th + token (in original order) cannot attend to the jth attention token. + target_mask: An `int32` Tensor of shape [seq_len] consisting of 0s and 1s. + If target_mask[i] == 1, then the i-th token needs to be predicted and + the mask will be used as input. This token will be included in the loss. + If target_mask[i] == 0, then the token (or [SEP], [CLS]) will be used as + input. This token will not be included in the loss. + tokens: int32 Tensor of shape [seq_length]. + masked_tokens: int32 Tensor of shape [seq_length]. + + """ + factorization_length = tf.shape(inputs)[0] + # Generate permutation indices + index = tf.range(factorization_length, dtype=tf.int32) + index = tf.transpose(tf.reshape(index, [-1, self._permutation_size])) + index = tf.random.shuffle(index) + index = tf.reshape(tf.transpose(index), [-1]) + + input_mask = tf.cast(input_mask, tf.bool) + + # non-functional tokens + non_func_tokens = tf.logical_not( + tf.logical_or( + tf.equal(inputs, self._sep_id), tf.equal(inputs, self._cls_id))) + masked_tokens = tf.logical_and(input_mask, non_func_tokens) + non_masked_or_func_tokens = tf.logical_not(masked_tokens) + + smallest_index = -2 * tf.ones([factorization_length], dtype=tf.int32) + + # Similar to BERT, randomly leak some masked tokens + if self._leak_ratio > 0: + leak_tokens = tf.logical_and( + masked_tokens, + tf.random.uniform([factorization_length], + maxval=1.0) < self._leak_ratio) + can_attend_self = tf.logical_or(non_masked_or_func_tokens, leak_tokens) + else: + can_attend_self = non_masked_or_func_tokens + to_index = tf.where(can_attend_self, smallest_index, index) + from_index = tf.where(can_attend_self, to_index + 1, to_index) + + # For masked tokens, can attend if i > j + # For context tokens, always can attend each other + can_attend = from_index[:, None] > to_index[None, :] + + perm_mask = tf.cast(can_attend, tf.int32) + + # Only masked tokens are included in the loss + target_mask = tf.cast(masked_tokens, tf.int32) + + return perm_mask, target_mask, inputs, masked_tokens + + def load(self, input_context: Optional[tf.distribute.InputContext] = None): + """Returns a tf.dataset.Dataset.""" + if input_context: + self._num_replicas_in_sync = input_context.num_replicas_in_sync + reader = input_reader.InputReader( + params=self._params, decoder_fn=self._decode, parser_fn=self._parse) + return reader.read(input_context) diff --git a/official/nlp/data/pretrain_dataloader_test.py b/official/nlp/data/pretrain_dataloader_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5f3807c907ad9cbb2007425ac13bba620491dce6 --- /dev/null +++ b/official/nlp/data/pretrain_dataloader_test.py @@ -0,0 +1,242 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.pretrain_dataloader.""" +import itertools +import os + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.nlp.data import pretrain_dataloader + + +def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f + + +def _create_fake_bert_dataset( + output_path, + seq_length, + max_predictions_per_seq, + use_position_id, + use_next_sentence_label, + use_v2_feature_names=False): + """Creates a fake dataset.""" + writer = tf.io.TFRecordWriter(output_path) + + def create_float_feature(values): + f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) + return f + + for _ in range(100): + features = {} + input_ids = np.random.randint(100, size=(seq_length)) + features["input_mask"] = create_int_feature(np.ones_like(input_ids)) + if use_v2_feature_names: + features["input_word_ids"] = create_int_feature(input_ids) + features["input_type_ids"] = create_int_feature(np.ones_like(input_ids)) + else: + features["input_ids"] = create_int_feature(input_ids) + features["segment_ids"] = create_int_feature(np.ones_like(input_ids)) + + features["masked_lm_positions"] = create_int_feature( + np.random.randint(100, size=(max_predictions_per_seq))) + features["masked_lm_ids"] = create_int_feature( + np.random.randint(100, size=(max_predictions_per_seq))) + features["masked_lm_weights"] = create_float_feature( + [1.0] * max_predictions_per_seq) + + if use_next_sentence_label: + features["next_sentence_labels"] = create_int_feature([1]) + + if use_position_id: + features["position_ids"] = create_int_feature(range(0, seq_length)) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +def _create_fake_xlnet_dataset( + output_path, seq_length, max_predictions_per_seq): + """Creates a fake dataset.""" + writer = tf.io.TFRecordWriter(output_path) + for _ in range(100): + features = {} + input_ids = np.random.randint(100, size=(seq_length)) + num_boundary_indices = np.random.randint(1, seq_length) + + if max_predictions_per_seq is not None: + input_mask = np.zeros_like(input_ids) + input_mask[:max_predictions_per_seq] = 1 + np.random.shuffle(input_mask) + else: + input_mask = np.ones_like(input_ids) + + features["input_mask"] = create_int_feature(input_mask) + features["input_word_ids"] = create_int_feature(input_ids) + features["input_type_ids"] = create_int_feature(np.ones_like(input_ids)) + features["boundary_indices"] = create_int_feature( + sorted(np.random.randint(seq_length, size=(num_boundary_indices)))) + features["target"] = create_int_feature(input_ids + 1) + features["label"] = create_int_feature([1]) + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +class BertPretrainDataTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters(itertools.product( + (False, True), + (False, True), + )) + def test_load_data(self, use_next_sentence_label, use_position_id): + train_data_path = os.path.join(self.get_temp_dir(), "train.tf_record") + seq_length = 128 + max_predictions_per_seq = 20 + _create_fake_bert_dataset( + train_data_path, + seq_length, + max_predictions_per_seq, + use_next_sentence_label=use_next_sentence_label, + use_position_id=use_position_id) + data_config = pretrain_dataloader.BertPretrainDataConfig( + input_path=train_data_path, + max_predictions_per_seq=max_predictions_per_seq, + seq_length=seq_length, + global_batch_size=10, + is_training=True, + use_next_sentence_label=use_next_sentence_label, + use_position_id=use_position_id) + + dataset = pretrain_dataloader.BertPretrainDataLoader(data_config).load() + features = next(iter(dataset)) + self.assertLen(features, + 6 + int(use_next_sentence_label) + int(use_position_id)) + self.assertIn("input_word_ids", features) + self.assertIn("input_mask", features) + self.assertIn("input_type_ids", features) + self.assertIn("masked_lm_positions", features) + self.assertIn("masked_lm_ids", features) + self.assertIn("masked_lm_weights", features) + + self.assertEqual("next_sentence_labels" in features, + use_next_sentence_label) + self.assertEqual("position_ids" in features, use_position_id) + + def test_v2_feature_names(self): + train_data_path = os.path.join(self.get_temp_dir(), "train.tf_record") + seq_length = 128 + max_predictions_per_seq = 20 + _create_fake_bert_dataset( + train_data_path, + seq_length, + max_predictions_per_seq, + use_next_sentence_label=True, + use_position_id=False, + use_v2_feature_names=True) + data_config = pretrain_dataloader.BertPretrainDataConfig( + input_path=train_data_path, + max_predictions_per_seq=max_predictions_per_seq, + seq_length=seq_length, + global_batch_size=10, + is_training=True, + use_next_sentence_label=True, + use_position_id=False, + use_v2_feature_names=True) + + dataset = pretrain_dataloader.BertPretrainDataLoader(data_config).load() + features = next(iter(dataset)) + self.assertIn("input_word_ids", features) + self.assertIn("input_mask", features) + self.assertIn("input_type_ids", features) + self.assertIn("masked_lm_positions", features) + self.assertIn("masked_lm_ids", features) + self.assertIn("masked_lm_weights", features) + + +class XLNetPretrainDataTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters(itertools.product( + ("single_token", "whole_word", "token_span"), + (0, 64), + (20, None), + )) + def test_load_data( + self, sample_strategy, reuse_length, max_predictions_per_seq): + train_data_path = os.path.join(self.get_temp_dir(), "train.tf_record") + seq_length = 128 + batch_size = 5 + + _create_fake_xlnet_dataset( + train_data_path, seq_length, max_predictions_per_seq) + + data_config = pretrain_dataloader.XLNetPretrainDataConfig( + input_path=train_data_path, + max_predictions_per_seq=max_predictions_per_seq, + seq_length=seq_length, + global_batch_size=batch_size, + is_training=True, + reuse_length=reuse_length, + sample_strategy=sample_strategy, + min_num_tokens=1, + max_num_tokens=2, + permutation_size=seq_length // 2, + leak_ratio=0.1) + + if max_predictions_per_seq is None: + with self.assertRaises(ValueError): + dataset = pretrain_dataloader.XLNetPretrainDataLoader( + data_config).load() + features = next(iter(dataset)) + else: + dataset = pretrain_dataloader.XLNetPretrainDataLoader(data_config).load() + features = next(iter(dataset)) + + self.assertIn("input_word_ids", features) + self.assertIn("input_type_ids", features) + self.assertIn("permutation_mask", features) + self.assertIn("masked_tokens", features) + self.assertIn("target", features) + self.assertIn("target_mask", features) + + self.assertAllClose(features["input_word_ids"].shape, + (batch_size, seq_length)) + self.assertAllClose(features["input_type_ids"].shape, + (batch_size, seq_length)) + self.assertAllClose(features["permutation_mask"].shape, + (batch_size, seq_length, seq_length)) + self.assertAllClose(features["masked_tokens"].shape, + (batch_size, seq_length,)) + if max_predictions_per_seq is not None: + self.assertIn("target_mapping", features) + self.assertAllClose(features["target_mapping"].shape, + (batch_size, max_predictions_per_seq, seq_length)) + self.assertAllClose(features["target_mask"].shape, + (batch_size, max_predictions_per_seq)) + self.assertAllClose(features["target"].shape, + (batch_size, max_predictions_per_seq)) + else: + self.assertAllClose(features["target_mask"].shape, + (batch_size, seq_length)) + self.assertAllClose(features["target"].shape, + (batch_size, seq_length)) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/data/pretrain_dynamic_dataloader.py b/official/nlp/data/pretrain_dynamic_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..c1de4ba54b86a3386708e3f56b76e8e3726c397d --- /dev/null +++ b/official/nlp/data/pretrain_dynamic_dataloader.py @@ -0,0 +1,211 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dataset loader for the pre-training with dynamic sequence length.""" +from typing import Optional, Tuple + +import dataclasses +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import input_reader +from official.nlp.data import data_loader_factory +from official.nlp.data import pretrain_dataloader + + +@dataclasses.dataclass +class BertPretrainDataConfig(cfg.DataConfig): + """Data config for BERT pretraining task (tasks/masked_lm).""" + input_path: str = '' + global_batch_size: int = 512 + is_training: bool = True + seq_bucket_lengths: Tuple[int, ...] = (128, 256, 384, 512,) + # TODO(rxsang): `seq_bucket_window_scale` is only useful when round robin + # tf.data service is disabled. Deprecate this flag once we always enable round + # robin tf.data service. + seq_bucket_window_scale: int = 8 + use_next_sentence_label: bool = True + use_position_id: bool = False + deterministic: bool = False + enable_tf_data_service: bool = False + enable_round_robin_tf_data_service: bool = False + tf_data_service_job_name: str = 'bert_pretrain' + use_v2_feature_names: bool = False + + +@data_loader_factory.register_data_loader_cls(BertPretrainDataConfig) +class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader): + """Dataset loader for bert-style pretraining with dynamic sequenece length. + + Bucketizes the input id features by the seq_bucket_lengths and features are + padded to the bucket boundaries. The mask features are usually short than + input id features and can also be dynamic. We require the mask feature lengths + within a bucket must be the same. For example, with [128, 256] buckets, + the mask features for bucket 128 should always have the length as X and + features for bucket 256 should always have the length as Y. + + The dataloader does not filter out empty masks. Make sure to handle this + in the model. + """ + + def __init__(self, params): + self._params = params + if len(params.seq_bucket_lengths) < 1: + raise ValueError('The seq_bucket_lengths cannot be empty.') + self._seq_bucket_lengths = params.seq_bucket_lengths + self._seq_bucket_window_scale = params.seq_bucket_window_scale + self._global_batch_size = params.global_batch_size + self._use_next_sentence_label = params.use_next_sentence_label + self._use_position_id = params.use_position_id + self._drop_remainder = params.drop_remainder + self._enable_tf_data_service = params.enable_tf_data_service + self._enable_round_robin_tf_data_service = ( + params.enable_round_robin_tf_data_service) + self._mask_keys = [ + 'masked_lm_positions', 'masked_lm_ids', 'masked_lm_weights' + ] + + def _decode(self, record: tf.Tensor): + """Decodes a serialized tf.Example.""" + name_to_features = { + 'input_ids': tf.io.VarLenFeature(tf.int64), + 'input_mask': tf.io.VarLenFeature(tf.int64), + 'segment_ids': tf.io.VarLenFeature(tf.int64), + 'masked_lm_positions': tf.io.VarLenFeature(tf.int64), + 'masked_lm_ids': tf.io.VarLenFeature(tf.int64), + 'masked_lm_weights': tf.io.VarLenFeature(tf.float32), + } + if self._use_next_sentence_label: + name_to_features['next_sentence_labels'] = tf.io.FixedLenFeature([1], + tf.int64) + dynamic_keys = ['input_ids', 'input_mask', 'segment_ids'] + if self._use_position_id: + name_to_features['position_ids'] = tf.io.VarLenFeature(tf.int64) + dynamic_keys.append('position_ids') + + example = tf.io.parse_single_example(record, name_to_features) + for key in dynamic_keys + self._mask_keys: + example[key] = tf.sparse.to_dense(example[key]) + + # Truncate padded data after the first non pad in the + # sequence length dimension. + # Pad before the first non pad from the back should not be removed. + mask = tf.math.greater( + tf.math.cumsum(example['input_ids'], reverse=True), 0) + for key in dynamic_keys: + example[key] = tf.boolean_mask(example[key], mask) + + # masked_lm_ids should be 0 padded. + # Change mask features to -1 padding so that we can differentiate + # padding from data or from bucketizing. + mask = tf.math.not_equal(example['masked_lm_ids'], 0) + example['masked_lm_ids'] = tf.where( + mask, example['masked_lm_ids'], + -tf.ones(tf.shape(example['masked_lm_ids']), dtype=example[key].dtype)) + + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + # tf.data service uses dataset graph fingerprint to distinguish input + # pipeline jobs, thus we sort the keys here to make sure they are generated + # in a deterministic order each time the dataset function is traced. + for name in sorted(list(example.keys())): + t = example[name] + if t.dtype == tf.int64: + t = tf.cast(t, tf.int32) + example[name] = t + + return example + + def _bucketize_and_batch( + self, + dataset, + input_context: Optional[tf.distribute.InputContext] = None): + """Bucketize by sequence length and batch the datasets.""" + per_replica_batch_size = input_context.get_per_replica_batch_size( + self._global_batch_size) if input_context else self._global_batch_size + + def element_length_func(example, seq_len_dim): + return tf.shape(example['input_word_ids'])[seq_len_dim] + + bucket_boundaries = [length + 1 for length in self._seq_bucket_lengths] + bucket_batch_sizes = [per_replica_batch_size] * (len(bucket_boundaries) + 1) + + # Bucketize and batch the dataset with per replica batch size first. + dataset = dataset.apply( + tf.data.experimental.bucket_by_sequence_length( + lambda example: tf.cast(element_length_func(example, 0), tf.int32), + bucket_boundaries, + bucket_batch_sizes, + pad_to_bucket_boundary=True, + drop_remainder=self._drop_remainder)) + if input_context: + window_size = input_context.num_replicas_in_sync + if self._enable_tf_data_service and ( + not self._enable_round_robin_tf_data_service): + # If tf.data service is enabled but round-robin behavior is not enabled, + # different TPU workers may fetch data from one tf.data service worker + # in different speed. We set the window size to be + # `seq_bucket_window_scale` larger to leave buffer if some workers are + # fetching data faster than others, so all the data within the same + # global batch can still have more chances to be in the same bucket. + window_size *= self._seq_bucket_window_scale + + # Group `num_replicas_in_sync` batches from same bucket together, so all + # replicas can get the same sequence length for one global step. + dataset = dataset.apply( + tf.data.experimental.group_by_window( + key_func=lambda example: tf.cast( # pylint: disable=g-long-lambda + element_length_func(example, 1), tf.int64), + reduce_func=lambda _, x: tf.data.Dataset.from_tensors(x), + window_size=window_size)) + dataset = dataset.flat_map(lambda x: x) + + def _remove_pads_from_bucketize(features): + # All mask features must have the same effective length. + # The real masked ids padding token is -1 and 0 comes from + # bucket_by_sequence_length. + mask = tf.math.not_equal(features['masked_lm_ids'], 0) + + mask_per_example = tf.math.reduce_sum(tf.cast(mask, tf.int32), axis=1) + normalized = tf.cast( + mask_per_example / tf.math.reduce_max(mask_per_example), tf.int32) + assert_op = tf.debugging.assert_equal( + tf.math.reduce_sum(normalized), per_replica_batch_size, + 'Number of non padded mask tokens is not the same for each example ' + 'in the same sequence length.') + with tf.control_dependencies([assert_op]): + for key in self._mask_keys: + features[key] = tf.reshape( + tf.boolean_mask( + features[key], mask), [per_replica_batch_size, -1]) + # Revert masked_lm_ids to be 0-padded. + mask = tf.math.not_equal(features['masked_lm_ids'], -1) + features['masked_lm_ids'] = tf.where( + mask, features['masked_lm_ids'], + tf.zeros( + tf.shape(features['masked_lm_ids']), + dtype=features['masked_lm_ids'].dtype)) + return features + + dataset = dataset.map(_remove_pads_from_bucketize) + return dataset + + def load(self, input_context: Optional[tf.distribute.InputContext] = None): + """Returns a tf.dataset.Dataset.""" + reader = input_reader.InputReader( + params=self._params, + decoder_fn=self._decode, + parser_fn=self._parse, + transform_and_batch_fn=self._bucketize_and_batch) + return reader.read(input_context) diff --git a/official/nlp/data/pretrain_dynamic_dataloader_test.py b/official/nlp/data/pretrain_dynamic_dataloader_test.py new file mode 100644 index 0000000000000000000000000000000000000000..acda2682733c3598046117b09034755a9a50c74c --- /dev/null +++ b/official/nlp/data/pretrain_dynamic_dataloader_test.py @@ -0,0 +1,242 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for nlp.data.pretrain_dynamic_dataloader.""" +import os + +from absl import logging +from absl.testing import parameterized +import numpy as np +import orbit +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.nlp.configs import bert +from official.nlp.configs import encoders +from official.nlp.data import pretrain_dataloader +from official.nlp.data import pretrain_dynamic_dataloader +from official.nlp.tasks import masked_lm + + +def _create_fake_dataset(output_path, seq_length, num_masked_tokens, + max_seq_length, num_examples): + """Creates a fake dataset.""" + writer = tf.io.TFRecordWriter(output_path) + + def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f + + def create_float_feature(values): + f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) + return f + + for _ in range(num_examples): + features = {} + padding = np.zeros(shape=(max_seq_length - seq_length), dtype=np.int32) + input_ids = np.random.randint(low=1, high=100, size=(seq_length)) + features['input_ids'] = create_int_feature( + np.concatenate((input_ids, padding))) + features['input_mask'] = create_int_feature( + np.concatenate((np.ones_like(input_ids), padding))) + features['segment_ids'] = create_int_feature( + np.concatenate((np.ones_like(input_ids), padding))) + features['position_ids'] = create_int_feature( + np.concatenate((np.ones_like(input_ids), padding))) + features['masked_lm_positions'] = create_int_feature( + np.random.randint(60, size=(num_masked_tokens), dtype=np.int64)) + features['masked_lm_ids'] = create_int_feature( + np.random.randint(100, size=(num_masked_tokens), dtype=np.int64)) + features['masked_lm_weights'] = create_float_feature( + np.ones((num_masked_tokens,), dtype=np.float32)) + features['next_sentence_labels'] = create_int_feature(np.array([0])) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +class PretrainDynamicDataLoaderTest(tf.test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.combine( + distribution_strategy=[ + strategy_combinations.cloud_tpu_strategy, + ], + mode='eager')) + def test_distribution_strategy(self, distribution_strategy): + max_seq_length = 128 + batch_size = 8 + input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') + _create_fake_dataset( + input_path, + seq_length=60, + num_masked_tokens=20, + max_seq_length=max_seq_length, + num_examples=batch_size) + data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig( + is_training=False, + input_path=input_path, + seq_bucket_lengths=[64, 128], + global_batch_size=batch_size) + dataloader = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader( + data_config) + distributed_ds = orbit.utils.make_distributed_dataset( + distribution_strategy, dataloader.load) + train_iter = iter(distributed_ds) + with distribution_strategy.scope(): + config = masked_lm.MaskedLMConfig( + init_checkpoint=self.get_temp_dir(), + model=bert.PretrainerConfig( + encoders.EncoderConfig( + bert=encoders.BertEncoderConfig( + vocab_size=30522, num_layers=1)), + cls_heads=[ + bert.ClsHeadConfig( + inner_dim=10, num_classes=2, name='next_sentence') + ]), + train_data=data_config) + task = masked_lm.MaskedLMTask(config) + model = task.build_model() + metrics = task.build_metrics() + + @tf.function + def step_fn(features): + return task.validation_step(features, model, metrics=metrics) + + distributed_outputs = distribution_strategy.run( + step_fn, args=(next(train_iter),)) + local_results = tf.nest.map_structure( + distribution_strategy.experimental_local_results, distributed_outputs) + logging.info('Dynamic padding: local_results= %s', str(local_results)) + dynamic_metrics = {} + for metric in metrics: + dynamic_metrics[metric.name] = metric.result() + + data_config = pretrain_dataloader.BertPretrainDataConfig( + is_training=False, + input_path=input_path, + seq_length=max_seq_length, + max_predictions_per_seq=20, + global_batch_size=batch_size) + dataloader = pretrain_dataloader.BertPretrainDataLoader(data_config) + distributed_ds = orbit.utils.make_distributed_dataset( + distribution_strategy, dataloader.load) + train_iter = iter(distributed_ds) + with distribution_strategy.scope(): + metrics = task.build_metrics() + + @tf.function + def step_fn_b(features): + return task.validation_step(features, model, metrics=metrics) + + distributed_outputs = distribution_strategy.run( + step_fn_b, args=(next(train_iter),)) + local_results = tf.nest.map_structure( + distribution_strategy.experimental_local_results, distributed_outputs) + logging.info('Static padding: local_results= %s', str(local_results)) + static_metrics = {} + for metric in metrics: + static_metrics[metric.name] = metric.result() + for key in static_metrics: + # We need to investigate the differences on losses. + if key != 'next_sentence_loss': + self.assertEqual(dynamic_metrics[key], static_metrics[key]) + + def test_load_dataset(self): + max_seq_length = 128 + batch_size = 2 + input_path_1 = os.path.join(self.get_temp_dir(), 'train_1.tf_record') + _create_fake_dataset( + input_path_1, + seq_length=60, + num_masked_tokens=20, + max_seq_length=max_seq_length, + num_examples=batch_size) + input_path_2 = os.path.join(self.get_temp_dir(), 'train_2.tf_record') + _create_fake_dataset( + input_path_2, + seq_length=100, + num_masked_tokens=70, + max_seq_length=max_seq_length, + num_examples=batch_size) + input_paths = ','.join([input_path_1, input_path_2]) + data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig( + is_training=False, + input_path=input_paths, + seq_bucket_lengths=[64, 128], + use_position_id=True, + global_batch_size=batch_size) + dataset = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader( + data_config).load() + dataset_it = iter(dataset) + features = next(dataset_it) + self.assertCountEqual([ + 'input_word_ids', + 'input_mask', + 'input_type_ids', + 'next_sentence_labels', + 'masked_lm_positions', + 'masked_lm_ids', + 'masked_lm_weights', + 'position_ids', + ], features.keys()) + # Sequence length dimension should be bucketized and pad to 64. + self.assertEqual(features['input_word_ids'].shape, (batch_size, 64)) + self.assertEqual(features['input_mask'].shape, (batch_size, 64)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, 64)) + self.assertEqual(features['position_ids'].shape, (batch_size, 64)) + self.assertEqual(features['masked_lm_positions'].shape, (batch_size, 20)) + features = next(dataset_it) + self.assertEqual(features['input_word_ids'].shape, (batch_size, 128)) + self.assertEqual(features['input_mask'].shape, (batch_size, 128)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, 128)) + self.assertEqual(features['position_ids'].shape, (batch_size, 128)) + self.assertEqual(features['masked_lm_positions'].shape, (batch_size, 70)) + + def test_load_dataset_not_same_masks(self): + max_seq_length = 128 + batch_size = 2 + input_path_1 = os.path.join(self.get_temp_dir(), 'train_3.tf_record') + _create_fake_dataset( + input_path_1, + seq_length=60, + num_masked_tokens=20, + max_seq_length=max_seq_length, + num_examples=batch_size) + input_path_2 = os.path.join(self.get_temp_dir(), 'train_4.tf_record') + _create_fake_dataset( + input_path_2, + seq_length=60, + num_masked_tokens=15, + max_seq_length=max_seq_length, + num_examples=batch_size) + input_paths = ','.join([input_path_1, input_path_2]) + data_config = pretrain_dynamic_dataloader.BertPretrainDataConfig( + is_training=False, + input_path=input_paths, + seq_bucket_lengths=[64, 128], + use_position_id=True, + global_batch_size=batch_size * 2) + dataset = pretrain_dynamic_dataloader.PretrainingDynamicDataLoader( + data_config).load() + dataset_it = iter(dataset) + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, '.*Number of non padded mask tokens.*'): + next(dataset_it) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/data/question_answering_dataloader.py b/official/nlp/data/question_answering_dataloader.py index 08c7047e4afd80999899c34f2c5855ad2ef18634..0f721ed773a927e8caa8c3cfbaa5cf2ef6c896e5 100644 --- a/official/nlp/data/question_answering_dataloader.py +++ b/official/nlp/data/question_answering_dataloader.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,20 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Loads dataset for the question answering (e.g, SQuAD) task.""" from typing import Mapping, Optional + import dataclasses import tensorflow as tf - +from official.core import config_definitions as cfg from official.core import input_reader -from official.modeling.hyperparams import config_definitions as cfg +from official.nlp.data import data_loader from official.nlp.data import data_loader_factory @dataclasses.dataclass class QADataConfig(cfg.DataConfig): """Data config for question answering task (tasks/question_answering).""" + # For training, `input_path` is expected to be a pre-processed TFRecord file, + # while for evaluation, it is expected to be a raw JSON file (b/173814590). input_path: str = '' global_batch_size: int = 48 is_training: bool = True @@ -36,19 +38,23 @@ class QADataConfig(cfg.DataConfig): input_preprocessed_data_path: str = '' doc_stride: int = 128 query_length: int = 64 + # The path to the vocab file of word piece tokenizer or the + # model of the sentence piece tokenizer. vocab_file: str = '' tokenization: str = 'WordPiece' # WordPiece or SentencePiece do_lower_case: bool = True + xlnet_format: bool = False @data_loader_factory.register_data_loader_cls(QADataConfig) -class QuestionAnsweringDataLoader: +class QuestionAnsweringDataLoader(data_loader.DataLoader): """A class to load dataset for sentence prediction (classification) task.""" def __init__(self, params): self._params = params self._seq_length = params.seq_length self._is_training = params.is_training + self._xlnet_format = params.xlnet_format def _decode(self, record: tf.Tensor): """Decodes a serialized tf.Example.""" @@ -57,6 +63,13 @@ class QuestionAnsweringDataLoader: 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), } + if self._xlnet_format: + name_to_features['class_index'] = tf.io.FixedLenFeature([], tf.int64) + name_to_features['paragraph_mask'] = tf.io.FixedLenFeature( + [self._seq_length], tf.int64) + if self._is_training: + name_to_features['is_impossible'] = tf.io.FixedLenFeature([], tf.int64) + if self._is_training: name_to_features['start_positions'] = tf.io.FixedLenFeature([], tf.int64) name_to_features['end_positions'] = tf.io.FixedLenFeature([], tf.int64) @@ -78,7 +91,7 @@ class QuestionAnsweringDataLoader: """Parses raw tensors into a dict of tensors to be consumed by the model.""" x, y = {}, {} for name, tensor in record.items(): - if name in ('start_positions', 'end_positions'): + if name in ('start_positions', 'end_positions', 'is_impossible'): y[name] = tensor elif name == 'input_ids': x['input_word_ids'] = tensor @@ -86,6 +99,8 @@ class QuestionAnsweringDataLoader: x['input_type_ids'] = tensor else: x[name] = tensor + if name == 'start_positions' and self._xlnet_format: + x[name] = tensor return (x, y) def load(self, input_context: Optional[tf.distribute.InputContext] = None): diff --git a/official/nlp/data/question_answering_dataloader_test.py b/official/nlp/data/question_answering_dataloader_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c853bc080cddf9fc5c26a0f7f21cff19088bad9f --- /dev/null +++ b/official/nlp/data/question_answering_dataloader_test.py @@ -0,0 +1,74 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.question_answering_dataloader.""" +import os + +import numpy as np +import tensorflow as tf + +from official.nlp.data import question_answering_dataloader + + +def _create_fake_dataset(output_path, seq_length): + """Creates a fake dataset.""" + writer = tf.io.TFRecordWriter(output_path) + + def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f + + for _ in range(100): + features = {} + input_ids = np.random.randint(100, size=(seq_length)) + features['input_ids'] = create_int_feature(input_ids) + features['input_mask'] = create_int_feature(np.ones_like(input_ids)) + features['segment_ids'] = create_int_feature(np.ones_like(input_ids)) + features['start_positions'] = create_int_feature(np.array([0])) + features['end_positions'] = create_int_feature(np.array([10])) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +class QuestionAnsweringDataTest(tf.test.TestCase): + + def test_load_dataset(self): + seq_length = 128 + batch_size = 10 + input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') + _create_fake_dataset(input_path, seq_length) + data_config = question_answering_dataloader.QADataConfig( + is_training=True, + input_path=input_path, + seq_length=seq_length, + global_batch_size=batch_size) + dataset = question_answering_dataloader.QuestionAnsweringDataLoader( + data_config).load() + features, labels = next(iter(dataset)) + + self.assertCountEqual(['input_word_ids', 'input_mask', 'input_type_ids'], + features.keys()) + self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) + + self.assertCountEqual(['start_positions', 'end_positions'], labels.keys()) + self.assertEqual(labels['start_positions'].shape, (batch_size,)) + self.assertEqual(labels['end_positions'].shape, (batch_size,)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/data/sentence_prediction_dataloader.py b/official/nlp/data/sentence_prediction_dataloader.py index c95b1e232840247cea244324e89a8b031c13a88f..766595bfe84352fb01287fd6f86cbd6d51c80dcf 100644 --- a/official/nlp/data/sentence_prediction_dataloader.py +++ b/official/nlp/data/sentence_prediction_dataloader.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,16 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Loads dataset for the sentence prediction (classification) task.""" -from typing import Mapping, Optional +import functools +from typing import List, Mapping, Optional + import dataclasses import tensorflow as tf +import tensorflow_hub as hub +from official.common import dataset_fn +from official.core import config_definitions as cfg from official.core import input_reader -from official.modeling.hyperparams import config_definitions as cfg +from official.nlp import modeling +from official.nlp.data import data_loader from official.nlp.data import data_loader_factory +LABEL_TYPES_MAP = {'int': tf.int64, 'float': tf.float32} + @dataclasses.dataclass class SentencePredictionDataConfig(cfg.DataConfig): @@ -30,24 +37,32 @@ class SentencePredictionDataConfig(cfg.DataConfig): global_batch_size: int = 32 is_training: bool = True seq_length: int = 128 + label_type: str = 'int' + # Whether to include the example id number. + include_example_id: bool = False @data_loader_factory.register_data_loader_cls(SentencePredictionDataConfig) -class SentencePredictionDataLoader: +class SentencePredictionDataLoader(data_loader.DataLoader): """A class to load dataset for sentence prediction (classification) task.""" def __init__(self, params): self._params = params self._seq_length = params.seq_length + self._include_example_id = params.include_example_id def _decode(self, record: tf.Tensor): """Decodes a serialized tf.Example.""" + label_type = LABEL_TYPES_MAP[self._params.label_type] name_to_features = { 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'label_ids': tf.io.FixedLenFeature([], tf.int64), + 'label_ids': tf.io.FixedLenFeature([], label_type), } + if self._include_example_id: + name_to_features['example_id'] = tf.io.FixedLenFeature([], tf.int64) + example = tf.io.parse_single_example(record, name_to_features) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. @@ -67,6 +82,9 @@ class SentencePredictionDataLoader: 'input_mask': record['input_mask'], 'input_type_ids': record['segment_ids'] } + if self._include_example_id: + x['example_id'] = record['example_id'] + y = record['label_ids'] return (x, y) @@ -75,3 +93,147 @@ class SentencePredictionDataLoader: reader = input_reader.InputReader( params=self._params, decoder_fn=self._decode, parser_fn=self._parse) return reader.read(input_context) + + +@dataclasses.dataclass +class SentencePredictionTextDataConfig(cfg.DataConfig): + """Data config for sentence prediction task with raw text.""" + # Either set `input_path`... + input_path: str = '' + # Either `int` or `float`. + label_type: str = 'int' + # ...or `tfds_name` and `tfds_split` to specify input. + tfds_name: str = '' + tfds_split: str = '' + # The name of the text feature fields. The text features will be + # concatenated in order. + text_fields: Optional[List[str]] = None + label_field: str = 'label' + global_batch_size: int = 32 + seq_length: int = 128 + is_training: bool = True + # Either build preprocessing with Python code by specifying these values + # for modeling.layers.BertTokenizer()/SentencepieceTokenizer().... + tokenization: str = 'WordPiece' # WordPiece or SentencePiece + # Text vocab file if tokenization is WordPiece, or sentencepiece.ModelProto + # file if tokenization is SentencePiece. + vocab_file: str = '' + lower_case: bool = True + # ...or load preprocessing from a SavedModel at this location. + preprocessing_hub_module_url: str = '' + # Either tfrecord or sstsable or recordio. + file_type: str = 'tfrecord' + include_example_id: bool = False + + +class TextProcessor(tf.Module): + """Text features processing for sentence prediction task.""" + + def __init__(self, + seq_length: int, + vocab_file: Optional[str] = None, + tokenization: Optional[str] = None, + lower_case: Optional[bool] = True, + preprocessing_hub_module_url: Optional[str] = None): + if preprocessing_hub_module_url: + self._preprocessing_hub_module = hub.load(preprocessing_hub_module_url) + self._tokenizer = self._preprocessing_hub_module.tokenize + self._pack_inputs = functools.partial( + self._preprocessing_hub_module.bert_pack_inputs, + seq_length=seq_length) + return + + if tokenization == 'WordPiece': + self._tokenizer = modeling.layers.BertTokenizer( + vocab_file=vocab_file, lower_case=lower_case) + elif tokenization == 'SentencePiece': + self._tokenizer = modeling.layers.SentencepieceTokenizer( + model_file_path=vocab_file, lower_case=lower_case, + strip_diacritics=True) # Strip diacritics to follow ALBERT model + else: + raise ValueError('Unsupported tokenization: %s' % tokenization) + + self._pack_inputs = modeling.layers.BertPackInputs( + seq_length=seq_length, + special_tokens_dict=self._tokenizer.get_special_tokens_dict()) + + def __call__(self, segments): + segments = [self._tokenizer(s) for s in segments] + # BertTokenizer returns a RaggedTensor with shape [batch, word, subword], + # and SentencepieceTokenizer returns a RaggedTensor with shape + # [batch, sentencepiece], + segments = [ + tf.cast(x.merge_dims(1, -1) if x.shape.rank > 2 else x, tf.int32) + for x in segments + ] + return self._pack_inputs(segments) + + +@data_loader_factory.register_data_loader_cls(SentencePredictionTextDataConfig) +class SentencePredictionTextDataLoader(data_loader.DataLoader): + """Loads dataset with raw text for sentence prediction task.""" + + def __init__(self, params): + if bool(params.tfds_name) != bool(params.tfds_split): + raise ValueError('`tfds_name` and `tfds_split` should be specified or ' + 'unspecified at the same time.') + if bool(params.tfds_name) == bool(params.input_path): + raise ValueError('Must specify either `tfds_name` and `tfds_split` ' + 'or `input_path`.') + if not params.text_fields: + raise ValueError('Unexpected empty text fields.') + if bool(params.vocab_file) == bool(params.preprocessing_hub_module_url): + raise ValueError('Must specify exactly one of vocab_file (with matching ' + 'lower_case flag) or preprocessing_hub_module_url.') + + self._params = params + self._text_fields = params.text_fields + self._label_field = params.label_field + self._label_type = params.label_type + self._include_example_id = params.include_example_id + self._text_processor = TextProcessor( + seq_length=params.seq_length, + vocab_file=params.vocab_file, + tokenization=params.tokenization, + lower_case=params.lower_case, + preprocessing_hub_module_url=params.preprocessing_hub_module_url) + + def _bert_preprocess(self, record: Mapping[str, tf.Tensor]): + """Berts preprocess.""" + segments = [record[x] for x in self._text_fields] + model_inputs = self._text_processor(segments) + if self._include_example_id: + model_inputs['example_id'] = record['example_id'] + y = record[self._label_field] + return model_inputs, y + + def _decode(self, record: tf.Tensor): + """Decodes a serialized tf.Example.""" + name_to_features = {} + for text_field in self._text_fields: + name_to_features[text_field] = tf.io.FixedLenFeature([], tf.string) + + label_type = LABEL_TYPES_MAP[self._label_type] + name_to_features[self._label_field] = tf.io.FixedLenFeature([], label_type) + if self._include_example_id: + name_to_features['example_id'] = tf.io.FixedLenFeature([], tf.int64) + example = tf.io.parse_single_example(record, name_to_features) + + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + for name in example: + t = example[name] + if t.dtype == tf.int64: + t = tf.cast(t, tf.int32) + example[name] = t + + return example + + def load(self, input_context: Optional[tf.distribute.InputContext] = None): + """Returns a tf.dataset.Dataset.""" + reader = input_reader.InputReader( + dataset_fn=dataset_fn.pick_dataset_fn(self._params.file_type), + decoder_fn=self._decode if self._params.input_path else None, + params=self._params, + postprocess_fn=self._bert_preprocess) + return reader.read(input_context) diff --git a/official/nlp/data/sentence_prediction_dataloader_test.py b/official/nlp/data/sentence_prediction_dataloader_test.py new file mode 100644 index 0000000000000000000000000000000000000000..cbced2ad2c315db616631e3f70c61d424fbdc51b --- /dev/null +++ b/official/nlp/data/sentence_prediction_dataloader_test.py @@ -0,0 +1,249 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.sentence_prediction_dataloader.""" +import os + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from sentencepiece import SentencePieceTrainer +from official.nlp.data import sentence_prediction_dataloader as loader + + +def _create_fake_preprocessed_dataset(output_path, seq_length, label_type): + """Creates a fake dataset.""" + writer = tf.io.TFRecordWriter(output_path) + + def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f + + def create_float_feature(values): + f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) + return f + + for _ in range(100): + features = {} + input_ids = np.random.randint(100, size=(seq_length)) + features['input_ids'] = create_int_feature(input_ids) + features['input_mask'] = create_int_feature(np.ones_like(input_ids)) + features['segment_ids'] = create_int_feature(np.ones_like(input_ids)) + + if label_type == 'int': + features['label_ids'] = create_int_feature([1]) + elif label_type == 'float': + features['label_ids'] = create_float_feature([0.5]) + else: + raise ValueError('Unsupported label_type: %s' % label_type) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +def _create_fake_raw_dataset(output_path, text_fields, label_type): + """Creates a fake tf record file.""" + writer = tf.io.TFRecordWriter(output_path) + + def create_str_feature(value): + f = tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + return f + + def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f + + def create_float_feature(values): + f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) + return f + + for _ in range(100): + features = {} + for text_field in text_fields: + features[text_field] = create_str_feature([b'hello world']) + + if label_type == 'int': + features['label'] = create_int_feature([0]) + elif label_type == 'float': + features['label'] = create_float_feature([0.5]) + else: + raise ValueError('Unexpected label_type: %s' % label_type) + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +def _create_fake_sentencepiece_model(output_dir): + vocab = ['a', 'b', 'c', 'd', 'e', 'abc', 'def', 'ABC', 'DEF'] + model_prefix = os.path.join(output_dir, 'spm_model') + input_text_file_path = os.path.join(output_dir, 'train_input.txt') + with tf.io.gfile.GFile(input_text_file_path, 'w') as f: + f.write(' '.join(vocab + ['\n'])) + # Add 7 more tokens: , , [CLS], [SEP], [MASK], , . + full_vocab_size = len(vocab) + 7 + flags = dict( + model_prefix=model_prefix, + model_type='word', + input=input_text_file_path, + pad_id=0, + unk_id=1, + control_symbols='[CLS],[SEP],[MASK]', + vocab_size=full_vocab_size, + bos_id=full_vocab_size - 2, + eos_id=full_vocab_size - 1) + SentencePieceTrainer.Train(' '.join( + ['--{}={}'.format(k, v) for k, v in flags.items()])) + return model_prefix + '.model' + + +def _create_fake_vocab_file(vocab_file_path): + tokens = ['[PAD]'] + for i in range(1, 100): + tokens.append('[unused%d]' % i) + tokens.extend(['[UNK]', '[CLS]', '[SEP]', '[MASK]', 'hello', 'world']) + with tf.io.gfile.GFile(vocab_file_path, 'w') as outfile: + outfile.write('\n'.join(tokens)) + + +class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters(('int', tf.int32), ('float', tf.float32)) + def test_load_dataset(self, label_type, expected_label_type): + input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') + batch_size = 10 + seq_length = 128 + _create_fake_preprocessed_dataset(input_path, seq_length, label_type) + data_config = loader.SentencePredictionDataConfig( + input_path=input_path, + seq_length=seq_length, + global_batch_size=batch_size, + label_type=label_type) + dataset = loader.SentencePredictionDataLoader(data_config).load() + features, labels = next(iter(dataset)) + self.assertCountEqual(['input_word_ids', 'input_mask', 'input_type_ids'], + features.keys()) + self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) + self.assertEqual(labels.shape, (batch_size,)) + self.assertEqual(labels.dtype, expected_label_type) + + +class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase, + parameterized.TestCase): + + @parameterized.parameters(True, False) + def test_python_wordpiece_preprocessing(self, use_tfds): + batch_size = 10 + seq_length = 256 # Non-default value. + lower_case = True + + tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record') + text_fields = ['sentence1', 'sentence2'] + if not use_tfds: + _create_fake_raw_dataset(tf_record_path, text_fields, label_type='int') + + vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt') + _create_fake_vocab_file(vocab_file_path) + + data_config = loader.SentencePredictionTextDataConfig( + input_path='' if use_tfds else tf_record_path, + tfds_name='glue/mrpc' if use_tfds else '', + tfds_split='train' if use_tfds else '', + text_fields=text_fields, + global_batch_size=batch_size, + seq_length=seq_length, + is_training=True, + lower_case=lower_case, + vocab_file=vocab_file_path) + dataset = loader.SentencePredictionTextDataLoader(data_config).load() + features, labels = next(iter(dataset)) + self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'], + features.keys()) + self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) + self.assertEqual(labels.shape, (batch_size,)) + + @parameterized.parameters(True, False) + def test_python_sentencepiece_preprocessing(self, use_tfds): + batch_size = 10 + seq_length = 256 # Non-default value. + lower_case = True + + tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record') + text_fields = ['sentence1', 'sentence2'] + if not use_tfds: + _create_fake_raw_dataset(tf_record_path, text_fields, label_type='int') + + sp_model_file_path = _create_fake_sentencepiece_model(self.get_temp_dir()) + data_config = loader.SentencePredictionTextDataConfig( + input_path='' if use_tfds else tf_record_path, + tfds_name='glue/mrpc' if use_tfds else '', + tfds_split='train' if use_tfds else '', + text_fields=text_fields, + global_batch_size=batch_size, + seq_length=seq_length, + is_training=True, + lower_case=lower_case, + tokenization='SentencePiece', + vocab_file=sp_model_file_path, + ) + dataset = loader.SentencePredictionTextDataLoader(data_config).load() + features, labels = next(iter(dataset)) + self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'], + features.keys()) + self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) + self.assertEqual(labels.shape, (batch_size,)) + + @parameterized.parameters(True, False) + def test_saved_model_preprocessing(self, use_tfds): + batch_size = 10 + seq_length = 256 # Non-default value. + + tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record') + text_fields = ['sentence1', 'sentence2'] + if not use_tfds: + _create_fake_raw_dataset(tf_record_path, text_fields, label_type='float') + + vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt') + _create_fake_vocab_file(vocab_file_path) + data_config = loader.SentencePredictionTextDataConfig( + input_path='' if use_tfds else tf_record_path, + tfds_name='glue/mrpc' if use_tfds else '', + tfds_split='train' if use_tfds else '', + text_fields=text_fields, + global_batch_size=batch_size, + seq_length=seq_length, + is_training=True, + preprocessing_hub_module_url=( + 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'), + label_type='int' if use_tfds else 'float', + ) + dataset = loader.SentencePredictionTextDataLoader(data_config).load() + features, labels = next(iter(dataset)) + self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'], + features.keys()) + self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) + self.assertEqual(labels.shape, (batch_size,)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/data/sentence_retrieval_lib.py b/official/nlp/data/sentence_retrieval_lib.py index d8e83ae579f8221b93e790ea62b91c3d6d2b9e90..0bfd8e4dec5afba3eb00ff23e3f75a0cc5818958 100644 --- a/official/nlp/data/sentence_retrieval_lib.py +++ b/official/nlp/data/sentence_retrieval_lib.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """BERT library to process data for cross lingual sentence retrieval task.""" import os @@ -25,8 +25,7 @@ class BuccProcessor(classifier_data_lib.DataProcessor): """Procssor for Xtreme BUCC data set.""" supported_languages = ["de", "fr", "ru", "zh"] - def __init__(self, - process_text_fn=tokenization.convert_to_unicode): + def __init__(self, process_text_fn=tokenization.convert_to_unicode): super(BuccProcessor, self).__init__(process_text_fn) self.languages = BuccProcessor.supported_languages @@ -50,11 +49,11 @@ class BuccProcessor(classifier_data_lib.DataProcessor): examples = [] for (i, line) in enumerate(lines): guid = "%s-%s" % (set_type, i) - int_iden = int(line[0].split("-")[1]) + example_id = int(line[0].split("-")[1]) text_a = self.process_text_fn(line[1]) examples.append( classifier_data_lib.InputExample( - guid=guid, text_a=text_a, int_iden=int_iden)) + guid=guid, text_a=text_a, example_id=example_id)) return examples @@ -66,8 +65,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor): "nl", "pt", "ru", "sw", "ta", "te", "th", "tl", "tr", "ur", "vi", "zh" ] - def __init__(self, - process_text_fn=tokenization.convert_to_unicode): + def __init__(self, process_text_fn=tokenization.convert_to_unicode): super(TatoebaProcessor, self).__init__(process_text_fn) self.languages = TatoebaProcessor.supported_languages @@ -88,7 +86,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor): text_a = self.process_text_fn(line[0]) examples.append( classifier_data_lib.InputExample( - guid=guid, text_a=text_a, int_iden=i)) + guid=guid, text_a=text_a, example_id=i)) return examples diff --git a/official/nlp/data/squad_lib.py b/official/nlp/data/squad_lib.py index fbf4c604123c541e7830ffa7176b182a843eef58..e96838664c38db4f6cdc2d39f10ad68baeac25e5 100644 --- a/official/nlp/data/squad_lib.py +++ b/official/nlp/data/squad_lib.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,19 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Library to process data for SQuAD 1.1 and SQuAD 2.0.""" +"""Library to process data for SQuAD 1.1 and SQuAD 2.0.""" # pylint: disable=g-bad-import-order -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import collections import copy import json import math import os + import six from absl import logging @@ -40,8 +36,8 @@ class SquadExample(object): Attributes: qas_id: ID of the question-answer pair. question_text: Original text for the question. - doc_tokens: The list of tokens in the context obtained by splitting - on whitespace only. + doc_tokens: The list of tokens in the context obtained by splitting on + whitespace only. orig_answer_text: Original text for the answer. start_position: Starting index of the answer in `doc_tokens`. end_position: Ending index of the answer in `doc_tokens`. @@ -96,6 +92,8 @@ class InputFeatures(object): input_ids, input_mask, segment_ids, + paragraph_mask=None, + class_index=None, start_position=None, end_position=None, is_impossible=None): @@ -111,6 +109,8 @@ class InputFeatures(object): self.start_position = start_position self.end_position = end_position self.is_impossible = is_impossible + self.paragraph_mask = paragraph_mask + self.class_index = class_index class FeatureWriter(object): @@ -138,6 +138,11 @@ class FeatureWriter(object): features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) + if feature.paragraph_mask is not None: + features["paragraph_mask"] = create_int_feature(feature.paragraph_mask) + if feature.class_index is not None: + features["class_index"] = create_int_feature([feature.class_index]) + if self.is_training: features["start_positions"] = create_int_feature([feature.start_position]) features["end_positions"] = create_int_feature([feature.end_position]) @@ -153,11 +158,20 @@ class FeatureWriter(object): self._writer.close() -def read_squad_examples(input_file, is_training, version_2_with_negative): +def read_squad_examples(input_file, is_training, + version_2_with_negative, + translated_input_folder=None): """Read a SQuAD json file into a list of SquadExample.""" with tf.io.gfile.GFile(input_file, "r") as reader: input_data = json.load(reader)["data"] + if translated_input_folder is not None: + translated_files = tf.io.gfile.glob( + os.path.join(translated_input_folder, "*.json")) + for file in translated_files: + with tf.io.gfile.GFile(file, "r") as reader: + input_data.extend(json.load(reader)["data"]) + def is_whitespace(c): if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: return True @@ -209,8 +223,8 @@ def read_squad_examples(input_file, is_training, version_2_with_negative): # # Note that this means for training mode, every example is NOT # guaranteed to be preserved. - actual_text = " ".join( - doc_tokens[start_position:(end_position + 1)]) + actual_text = " ".join(doc_tokens[start_position:(end_position + + 1)]) cleaned_answer_text = " ".join( tokenization.whitespace_tokenize(orig_answer_text)) if actual_text.find(cleaned_answer_text) == -1: @@ -242,6 +256,7 @@ def convert_examples_to_features(examples, max_query_length, is_training, output_fn, + xlnet_format=False, batch_size=None): """Loads a data file into a list of `InputBatch`s.""" @@ -303,25 +318,54 @@ def convert_examples_to_features(examples, token_to_orig_map = {} token_is_max_context = {} segment_ids = [] - tokens.append("[CLS]") - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append("[SEP]") - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append("[SEP]") - segment_ids.append(1) + + # Paragraph mask used in XLNet. + # 1 represents paragraph and class tokens. + # 0 represents query and other special tokens. + paragraph_mask = [] + + # pylint: disable=cell-var-from-loop + def process_query(seg_q): + for token in query_tokens: + tokens.append(token) + segment_ids.append(seg_q) + paragraph_mask.append(0) + tokens.append("[SEP]") + segment_ids.append(seg_q) + paragraph_mask.append(0) + + def process_paragraph(seg_p): + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(seg_p) + paragraph_mask.append(1) + tokens.append("[SEP]") + segment_ids.append(seg_p) + paragraph_mask.append(0) + + def process_class(seg_class): + class_index = len(segment_ids) + tokens.append("[CLS]") + segment_ids.append(seg_class) + paragraph_mask.append(1) + return class_index + + if xlnet_format: + seg_p, seg_q, seg_class, seg_pad = 0, 1, 2, 3 + process_paragraph(seg_p) + process_query(seg_q) + class_index = process_class(seg_class) + else: + seg_p, seg_q, seg_class, seg_pad = 1, 0, 0, 0 + class_index = process_class(seg_class) + process_query(seg_q) + process_paragraph(seg_p) input_ids = tokenizer.convert_tokens_to_ids(tokens) @@ -333,35 +377,30 @@ def convert_examples_to_features(examples, while len(input_ids) < max_seq_length: input_ids.append(0) input_mask.append(0) - segment_ids.append(0) + segment_ids.append(seg_pad) + paragraph_mask.append(0) assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length assert len(segment_ids) == max_seq_length + assert len(paragraph_mask) == max_seq_length + + start_position = 0 + end_position = 0 + span_contains_answer = False - start_position = None - end_position = None if is_training and not example.is_impossible: # For training, if our document chunk does not contain an annotation # we throw it out, since there is nothing to predict. doc_start = doc_span.start doc_end = doc_span.start + doc_span.length - 1 - out_of_span = False - if not (tok_start_position >= doc_start and - tok_end_position <= doc_end): - out_of_span = True - if out_of_span: - start_position = 0 - end_position = 0 - else: - doc_offset = len(query_tokens) + 2 + span_contains_answer = (tok_start_position >= doc_start and + tok_end_position <= doc_end) + if span_contains_answer: + doc_offset = 0 if xlnet_format else len(query_tokens) + 2 start_position = tok_start_position - doc_start + doc_offset end_position = tok_end_position - doc_start + doc_offset - if is_training and example.is_impossible: - start_position = 0 - end_position = 0 - if example_index < 20: logging.info("*** Example ***") logging.info("unique_id: %s", (unique_id)) @@ -381,19 +420,25 @@ def convert_examples_to_features(examples, logging.info("input_ids: %s", " ".join([str(x) for x in input_ids])) logging.info("input_mask: %s", " ".join([str(x) for x in input_mask])) logging.info("segment_ids: %s", " ".join([str(x) for x in segment_ids])) - if is_training and example.is_impossible: - logging.info("impossible example") - if is_training and not example.is_impossible: - answer_text = " ".join(tokens[start_position:(end_position + 1)]) - logging.info("start_position: %d", (start_position)) - logging.info("end_position: %d", (end_position)) - logging.info("answer: %s", tokenization.printable_text(answer_text)) + logging.info("paragraph_mask: %s", " ".join( + [str(x) for x in paragraph_mask])) + logging.info("class_index: %d", class_index) + if is_training: + if span_contains_answer: + answer_text = " ".join(tokens[start_position:(end_position + 1)]) + logging.info("start_position: %d", (start_position)) + logging.info("end_position: %d", (end_position)) + logging.info("answer: %s", tokenization.printable_text(answer_text)) + else: + logging.info("document span doesn't contain answer") feature = InputFeatures( unique_id=unique_id, example_index=example_index, doc_span_index=doc_span_index, tokens=tokens, + paragraph_mask=paragraph_mask, + class_index=class_index, token_to_orig_map=token_to_orig_map, token_is_max_context=token_is_max_context, input_ids=input_ids, @@ -401,7 +446,7 @@ def convert_examples_to_features(examples, segment_ids=segment_ids, start_position=start_position, end_position=end_position, - is_impossible=example.is_impossible) + is_impossible=not span_contains_answer) # Run callback if is_training: @@ -520,15 +565,16 @@ def write_predictions(all_examples, logging.info("Writing nbest to: %s", (output_nbest_file)) all_predictions, all_nbest_json, scores_diff_json = ( - postprocess_output(all_examples=all_examples, - all_features=all_features, - all_results=all_results, - n_best_size=n_best_size, - max_answer_length=max_answer_length, - do_lower_case=do_lower_case, - version_2_with_negative=version_2_with_negative, - null_score_diff_threshold=null_score_diff_threshold, - verbose=verbose)) + postprocess_output( + all_examples=all_examples, + all_features=all_features, + all_results=all_results, + n_best_size=n_best_size, + max_answer_length=max_answer_length, + do_lower_case=do_lower_case, + version_2_with_negative=version_2_with_negative, + null_score_diff_threshold=null_score_diff_threshold, + verbose=verbose)) write_to_json_files(all_predictions, output_prediction_file) write_to_json_files(all_nbest_json, output_nbest_file) @@ -544,6 +590,7 @@ def postprocess_output(all_examples, do_lower_case, version_2_with_negative=False, null_score_diff_threshold=0.0, + xlnet_format=False, verbose=False): """Postprocess model output, to form predicton results.""" @@ -572,46 +619,54 @@ def postprocess_output(all_examples, null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score for (feature_index, feature) in enumerate(features): + if feature.unique_id not in unique_id_to_result: + logging.info("Skip eval example %s, not in pred.", feature.unique_id) + continue result = unique_id_to_result[feature.unique_id] - start_indexes = _get_best_indexes(result.start_logits, n_best_size) - end_indexes = _get_best_indexes(result.end_logits, n_best_size) + # if we could have irrelevant answers, get the min score of irrelevant if version_2_with_negative: - feature_null_score = result.start_logits[0] + result.end_logits[0] + if xlnet_format: + feature_null_score = result.class_logits + else: + feature_null_score = result.start_logits[0] + result.end_logits[0] if feature_null_score < score_null: score_null = feature_null_score min_null_feature_index = feature_index null_start_logit = result.start_logits[0] null_end_logit = result.end_logits[0] - for start_index in start_indexes: - for end_index in end_indexes: - # We could hypothetically create invalid predictions, e.g., predict - # that the start of the span is in the question. We throw out all - # invalid predictions. - if start_index >= len(feature.tokens): - continue - if end_index >= len(feature.tokens): - continue - if start_index not in feature.token_to_orig_map: - continue - if end_index not in feature.token_to_orig_map: - continue - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction( - feature_index=feature_index, - start_index=start_index, - end_index=end_index, - start_logit=result.start_logits[start_index], - end_logit=result.end_logits[end_index])) - - if version_2_with_negative: + for (start_index, start_logit, + end_index, end_logit) in _get_best_indexes_and_logits( + result=result, + n_best_size=n_best_size, + xlnet_format=xlnet_format): + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_logit=start_logit, + end_logit=end_logit)) + + if version_2_with_negative and not xlnet_format: prelim_predictions.append( _PrelimPrediction( feature_index=min_null_feature_index, @@ -633,7 +688,7 @@ def postprocess_output(all_examples, if len(nbest) >= n_best_size: break feature = features[pred.feature_index] - if pred.start_index > 0: # this is a non-null prediction + if pred.start_index > 0 or xlnet_format: # this is a non-null prediction tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] @@ -666,7 +721,7 @@ def postprocess_output(all_examples, end_logit=pred.end_logit)) # if we didn't inlude the empty option in the n-best, inlcude it - if version_2_with_negative: + if version_2_with_negative and not xlnet_format: if "" not in seen_predictions: nbest.append( _NbestPrediction( @@ -707,13 +762,18 @@ def postprocess_output(all_examples, # pytype: disable=attribute-error # predict "" iff the null score - the score of best non-null > threshold if best_non_null_entry is not None: - score_diff = score_null - best_non_null_entry.start_logit - ( - best_non_null_entry.end_logit) - scores_diff_json[example.qas_id] = score_diff - if score_diff > null_score_diff_threshold: - all_predictions[example.qas_id] = "" - else: + if xlnet_format: + score_diff = score_null + scores_diff_json[example.qas_id] = score_diff all_predictions[example.qas_id] = best_non_null_entry.text + else: + score_diff = score_null - best_non_null_entry.start_logit - ( + best_non_null_entry.end_logit) + scores_diff_json[example.qas_id] = score_diff + if score_diff > null_score_diff_threshold: + all_predictions[example.qas_id] = "" + else: + all_predictions[example.qas_id] = best_non_null_entry.text else: logging.warning("best_non_null_entry is None") scores_diff_json[example.qas_id] = score_null @@ -825,16 +885,29 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose=False): return output_text -def _get_best_indexes(logits, n_best_size): - """Get the n-best logits from a list.""" - index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) - - best_indexes = [] - for i in range(len(index_and_score)): # pylint: disable=consider-using-enumerate - if i >= n_best_size: - break - best_indexes.append(index_and_score[i][0]) - return best_indexes +def _get_best_indexes_and_logits(result, + n_best_size, + xlnet_format=False): + """Generates the n-best indexes and logits from a list.""" + if xlnet_format: + for i in range(n_best_size): + for j in range(n_best_size): + j_index = i * n_best_size + j + yield (result.start_indexes[i], result.start_logits[i], + result.end_indexes[j_index], result.end_logits[j_index]) + else: + start_index_and_score = sorted(enumerate(result.start_logits), + key=lambda x: x[1], reverse=True) + end_index_and_score = sorted(enumerate(result.end_logits), + key=lambda x: x[1], reverse=True) + for i in range(len(start_index_and_score)): + if i >= n_best_size: + break + for j in range(len(end_index_and_score)): + if j >= n_best_size: + break + yield (start_index_and_score[i][0], start_index_and_score[i][1], + end_index_and_score[j][0], end_index_and_score[j][1]) def _compute_softmax(scores): @@ -863,16 +936,19 @@ def _compute_softmax(scores): def generate_tf_record_from_json_file(input_file_path, vocab_file_path, output_path, + translated_input_folder=None, max_seq_length=384, do_lower_case=True, max_query_length=64, doc_stride=128, - version_2_with_negative=False): + version_2_with_negative=False, + xlnet_format=False): """Generates and saves training data into a tf record file.""" train_examples = read_squad_examples( input_file=input_file_path, is_training=True, - version_2_with_negative=version_2_with_negative) + version_2_with_negative=version_2_with_negative, + translated_input_folder=translated_input_folder) tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file_path, do_lower_case=do_lower_case) train_writer = FeatureWriter(filename=output_path, is_training=True) @@ -883,7 +959,8 @@ def generate_tf_record_from_json_file(input_file_path, doc_stride=doc_stride, max_query_length=max_query_length, is_training=True, - output_fn=train_writer.process_feature) + output_fn=train_writer.process_feature, + xlnet_format=xlnet_format) train_writer.close() meta_data = { diff --git a/official/nlp/data/squad_lib_sp.py b/official/nlp/data/squad_lib_sp.py index c65f713fd09bc4858f77f8ce823b17467606271c..b5d6dbcd0c51203ec8bc763559d1b17829168545 100644 --- a/official/nlp/data/squad_lib_sp.py +++ b/official/nlp/data/squad_lib_sp.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Run ALBERT on SQuAD 1.1 and SQuAD 2.0 using sentence piece tokenization. The file is forked from: https://github.com/google-research/ALBERT/blob/master/run_squad_sp.py """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import collections import copy import json import math import os + from absl import logging import numpy as np import tensorflow as tf @@ -89,6 +86,8 @@ class InputFeatures(object): input_mask, segment_ids, paragraph_len, + class_index=None, + paragraph_mask=None, start_position=None, end_position=None, is_impossible=None): @@ -101,19 +100,31 @@ class InputFeatures(object): self.tokens = tokens self.input_ids = input_ids self.input_mask = input_mask + self.paragraph_mask = paragraph_mask self.segment_ids = segment_ids self.paragraph_len = paragraph_len + self.class_index = class_index self.start_position = start_position self.end_position = end_position self.is_impossible = is_impossible -def read_squad_examples(input_file, is_training, version_2_with_negative): +def read_squad_examples(input_file, + is_training, + version_2_with_negative, + translated_input_folder=None): """Read a SQuAD json file into a list of SquadExample.""" del version_2_with_negative with tf.io.gfile.GFile(input_file, "r") as reader: input_data = json.load(reader)["data"] + if translated_input_folder is not None: + translated_files = tf.io.gfile.glob( + os.path.join(translated_input_folder, "*.json")) + for file in translated_files: + with tf.io.gfile.GFile(file, "r") as reader: + input_data.extend(json.load(reader)["data"]) + examples = [] for entry in input_data: for paragraph in entry["paragraphs"]: @@ -197,6 +208,7 @@ def convert_examples_to_features(examples, is_training, output_fn, do_lower_case, + xlnet_format=False, batch_size=None): """Loads a data file into a list of `InputBatch`s.""" cnt_pos, cnt_neg = 0, 0 @@ -246,6 +258,7 @@ def convert_examples_to_features(examples, f = np.zeros((max_n, max_m), dtype=np.float32) g = {} + # pylint: disable=cell-var-from-loop def _lcs_match(max_dist, n=n, m=m): """Longest-common-substring algorithm.""" @@ -277,6 +290,7 @@ def convert_examples_to_features(examples, remove_space=False) == tok_cat_text[j] and f_prev + 1 > f[i, j]): g[(i, j)] = 2 f[i, j] = f_prev + 1 + # pylint: enable=cell-var-from-loop max_dist = abs(n - m) + 5 @@ -354,6 +368,7 @@ def convert_examples_to_features(examples, "DocSpan", ["start", "length"]) doc_spans = [] start_offset = 0 + while start_offset < len(all_doc_tokens): length = len(all_doc_tokens) - start_offset if length > max_tokens_for_doc: @@ -368,34 +383,62 @@ def convert_examples_to_features(examples, token_is_max_context = {} segment_ids = [] + # Paragraph mask used in XLNet. + # 1 represents paragraph and class tokens. + # 0 represents query and other special tokens. + paragraph_mask = [] + cur_tok_start_to_orig_index = [] cur_tok_end_to_orig_index = [] - tokens.append(tokenizer.sp_model.PieceToId("[CLS]")) - segment_ids.append(0) - for token in query_tokens: - tokens.append(token) - segment_ids.append(0) - tokens.append(tokenizer.sp_model.PieceToId("[SEP]")) - segment_ids.append(0) - - for i in range(doc_span.length): - split_token_index = doc_span.start + i - - cur_tok_start_to_orig_index.append( - tok_start_to_orig_index[split_token_index]) - cur_tok_end_to_orig_index.append( - tok_end_to_orig_index[split_token_index]) - - is_max_context = _check_is_max_context(doc_spans, doc_span_index, - split_token_index) - token_is_max_context[len(tokens)] = is_max_context - tokens.append(all_doc_tokens[split_token_index]) - segment_ids.append(1) - tokens.append(tokenizer.sp_model.PieceToId("[SEP]")) - segment_ids.append(1) - - paragraph_len = len(tokens) + # pylint: disable=cell-var-from-loop + def process_query(seg_q): + for token in query_tokens: + tokens.append(token) + segment_ids.append(seg_q) + paragraph_mask.append(0) + tokens.append(tokenizer.sp_model.PieceToId("[SEP]")) + segment_ids.append(seg_q) + paragraph_mask.append(0) + + def process_paragraph(seg_p): + for i in range(doc_span.length): + split_token_index = doc_span.start + i + + cur_tok_start_to_orig_index.append( + tok_start_to_orig_index[split_token_index]) + cur_tok_end_to_orig_index.append( + tok_end_to_orig_index[split_token_index]) + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(seg_p) + paragraph_mask.append(1) + tokens.append(tokenizer.sp_model.PieceToId("[SEP]")) + segment_ids.append(seg_p) + paragraph_mask.append(0) + return len(tokens) + + def process_class(seg_class): + class_index = len(segment_ids) + tokens.append(tokenizer.sp_model.PieceToId("[CLS]")) + segment_ids.append(seg_class) + paragraph_mask.append(1) + return class_index + + if xlnet_format: + seg_p, seg_q, seg_class, seg_pad = 0, 1, 2, 3 + paragraph_len = process_paragraph(seg_p) + process_query(seg_q) + class_index = process_class(seg_class) + else: + seg_p, seg_q, seg_class, seg_pad = 1, 0, 0, 0 + class_index = process_class(seg_class) + process_query(seg_q) + paragraph_len = process_paragraph(seg_p) + input_ids = tokens # The mask has 1 for real tokens and 0 for padding tokens. Only real @@ -406,11 +449,13 @@ def convert_examples_to_features(examples, while len(input_ids) < max_seq_length: input_ids.append(0) input_mask.append(0) - segment_ids.append(0) + segment_ids.append(seg_pad) + paragraph_mask.append(0) assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length assert len(segment_ids) == max_seq_length + assert len(paragraph_mask) == max_seq_length span_is_impossible = example.is_impossible start_position = None @@ -430,13 +475,13 @@ def convert_examples_to_features(examples, end_position = 0 span_is_impossible = True else: - doc_offset = len(query_tokens) + 2 + doc_offset = 0 if xlnet_format else len(query_tokens) + 2 start_position = tok_start_position - doc_start + doc_offset end_position = tok_end_position - doc_start + doc_offset if is_training and span_is_impossible: - start_position = 0 - end_position = 0 + start_position = class_index + end_position = class_index if example_index < 20: logging.info("*** Example ***") @@ -456,6 +501,9 @@ def convert_examples_to_features(examples, logging.info("input_ids: %s", " ".join([str(x) for x in input_ids])) logging.info("input_mask: %s", " ".join([str(x) for x in input_mask])) logging.info("segment_ids: %s", " ".join([str(x) for x in segment_ids])) + logging.info("paragraph_mask: %s", " ".join( + [str(x) for x in paragraph_mask])) + logging.info("class_index: %d", class_index) if is_training and span_is_impossible: logging.info("impossible example span") @@ -489,8 +537,10 @@ def convert_examples_to_features(examples, tokens=[tokenizer.sp_model.IdToPiece(x) for x in tokens], input_ids=input_ids, input_mask=input_mask, + paragraph_mask=paragraph_mask, segment_ids=segment_ids, paragraph_len=paragraph_len, + class_index=class_index, start_position=start_position, end_position=end_position, is_impossible=span_is_impossible) @@ -580,15 +630,16 @@ def write_predictions(all_examples, logging.info("Writing nbest to: %s", (output_nbest_file)) all_predictions, all_nbest_json, scores_diff_json = ( - postprocess_output(all_examples=all_examples, - all_features=all_features, - all_results=all_results, - n_best_size=n_best_size, - max_answer_length=max_answer_length, - do_lower_case=do_lower_case, - version_2_with_negative=version_2_with_negative, - null_score_diff_threshold=null_score_diff_threshold, - verbose=verbose)) + postprocess_output( + all_examples=all_examples, + all_features=all_features, + all_results=all_results, + n_best_size=n_best_size, + max_answer_length=max_answer_length, + do_lower_case=do_lower_case, + version_2_with_negative=version_2_with_negative, + null_score_diff_threshold=null_score_diff_threshold, + verbose=verbose)) write_to_json_files(all_predictions, output_prediction_file) write_to_json_files(all_nbest_json, output_nbest_file) @@ -604,11 +655,11 @@ def postprocess_output(all_examples, do_lower_case, version_2_with_negative=False, null_score_diff_threshold=0.0, + xlnet_format=False, verbose=False): """Postprocess model output, to form predicton results.""" del do_lower_case, verbose - example_index_to_features = collections.defaultdict(list) for feature in all_features: example_index_to_features[feature.example_index].append(feature) @@ -635,47 +686,53 @@ def postprocess_output(all_examples, null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score for (feature_index, feature) in enumerate(features): + if feature.unique_id not in unique_id_to_result: + logging.info("Skip eval example %s, not in pred.", feature.unique_id) + continue result = unique_id_to_result[feature.unique_id] - start_indexes = _get_best_indexes(result.start_logits, n_best_size) - end_indexes = _get_best_indexes(result.end_logits, n_best_size) + # if we could have irrelevant answers, get the min score of irrelevant if version_2_with_negative: - feature_null_score = result.start_logits[0] + result.end_logits[0] + if xlnet_format: + feature_null_score = result.class_logits + else: + feature_null_score = result.start_logits[0] + result.end_logits[0] if feature_null_score < score_null: score_null = feature_null_score min_null_feature_index = feature_index null_start_logit = result.start_logits[0] null_end_logit = result.end_logits[0] - for start_index in start_indexes: - for end_index in end_indexes: - doc_offset = feature.tokens.index("[SEP]") + 1 - # We could hypothetically create invalid predictions, e.g., predict - # that the start of the span is in the question. We throw out all - # invalid predictions. - if start_index - doc_offset >= len(feature.tok_start_to_orig_index): - continue - if end_index - doc_offset >= len(feature.tok_end_to_orig_index): - continue - # if start_index not in feature.tok_start_to_orig_index: - # continue - # if end_index not in feature.tok_end_to_orig_index: - # continue - if not feature.token_is_max_context.get(start_index, False): - continue - if end_index < start_index: - continue - length = end_index - start_index + 1 - if length > max_answer_length: - continue - prelim_predictions.append( - _PrelimPrediction( - feature_index=feature_index, - start_index=start_index - doc_offset, - end_index=end_index - doc_offset, - start_logit=result.start_logits[start_index], - end_logit=result.end_logits[end_index])) - - if version_2_with_negative: + + doc_offset = 0 if xlnet_format else feature.tokens.index("[SEP]") + 1 + + for (start_index, start_logit, + end_index, end_logit) in _get_best_indexes_and_logits( + result=result, + n_best_size=n_best_size, + xlnet_format=xlnet_format): + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index - doc_offset >= len(feature.tok_start_to_orig_index): + continue + if end_index - doc_offset >= len(feature.tok_end_to_orig_index): + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index - doc_offset, + end_index=end_index - doc_offset, + start_logit=start_logit, + end_logit=end_logit)) + + if version_2_with_negative and not xlnet_format: prelim_predictions.append( _PrelimPrediction( feature_index=min_null_feature_index, @@ -697,7 +754,7 @@ def postprocess_output(all_examples, if len(nbest) >= n_best_size: break feature = features[pred.feature_index] - if pred.start_index >= 0: # this is a non-null prediction + if pred.start_index >= 0 or xlnet_format: # this is a non-null prediction tok_start_to_orig_index = feature.tok_start_to_orig_index tok_end_to_orig_index = feature.tok_end_to_orig_index start_orig_pos = tok_start_to_orig_index[pred.start_index] @@ -719,8 +776,8 @@ def postprocess_output(all_examples, start_logit=pred.start_logit, end_logit=pred.end_logit)) - # if we didn't inlude the empty option in the n-best, inlcude it - if version_2_with_negative: + # if we didn't inlude the empty option in the n-best, include it + if version_2_with_negative and not xlnet_format: if "" not in seen_predictions: nbest.append( _NbestPrediction( @@ -759,14 +816,19 @@ def postprocess_output(all_examples, all_predictions[example.qas_id] = nbest_json[0]["text"] else: assert best_non_null_entry is not None - # predict "" iff the null score - the score of best non-null > threshold - score_diff = score_null - best_non_null_entry.start_logit - ( - best_non_null_entry.end_logit) - scores_diff_json[example.qas_id] = score_diff - if score_diff > null_score_diff_threshold: - all_predictions[example.qas_id] = "" - else: + if xlnet_format: + score_diff = score_null + scores_diff_json[example.qas_id] = score_diff all_predictions[example.qas_id] = best_non_null_entry.text + else: + # predict "" iff the null score - the score of best non-null > threshold + score_diff = score_null - best_non_null_entry.start_logit - ( + best_non_null_entry.end_logit) + scores_diff_json[example.qas_id] = score_diff + if score_diff > null_score_diff_threshold: + all_predictions[example.qas_id] = "" + else: + all_predictions[example.qas_id] = best_non_null_entry.text all_nbest_json[example.qas_id] = nbest_json @@ -778,16 +840,29 @@ def write_to_json_files(json_records, json_file): writer.write(json.dumps(json_records, indent=4) + "\n") -def _get_best_indexes(logits, n_best_size): - """Get the n-best logits from a list.""" - index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) - - best_indexes = [] - for i in range(len(index_and_score)): - if i >= n_best_size: - break - best_indexes.append(index_and_score[i][0]) - return best_indexes +def _get_best_indexes_and_logits(result, + n_best_size, + xlnet_format=False): + """Generates the n-best indexes and logits from a list.""" + if xlnet_format: + for i in range(n_best_size): + for j in range(n_best_size): + j_index = i * n_best_size + j + yield (result.start_indexes[i], result.start_logits[i], + result.end_indexes[j_index], result.end_logits[j_index]) + else: + start_index_and_score = sorted(enumerate(result.start_logits), + key=lambda x: x[1], reverse=True) + end_index_and_score = sorted(enumerate(result.end_logits), + key=lambda x: x[1], reverse=True) + for i in range(len(start_index_and_score)): + if i >= n_best_size: + break + for j in range(len(end_index_and_score)): + if j >= n_best_size: + break + yield (start_index_and_score[i][0], start_index_and_score[i][1], + end_index_and_score[j][0], end_index_and_score[j][1]) def _compute_softmax(scores): @@ -837,6 +912,10 @@ class FeatureWriter(object): features["input_ids"] = create_int_feature(feature.input_ids) features["input_mask"] = create_int_feature(feature.input_mask) features["segment_ids"] = create_int_feature(feature.segment_ids) + if feature.paragraph_mask is not None: + features["paragraph_mask"] = create_int_feature(feature.paragraph_mask) + if feature.class_index is not None: + features["class_index"] = create_int_feature([feature.class_index]) if self.is_training: features["start_positions"] = create_int_feature([feature.start_position]) @@ -856,19 +935,23 @@ class FeatureWriter(object): def generate_tf_record_from_json_file(input_file_path, sp_model_file, output_path, + translated_input_folder=None, max_seq_length=384, do_lower_case=True, max_query_length=64, doc_stride=128, + xlnet_format=False, version_2_with_negative=False): """Generates and saves training data into a tf record file.""" train_examples = read_squad_examples( input_file=input_file_path, is_training=True, - version_2_with_negative=version_2_with_negative) + version_2_with_negative=version_2_with_negative, + translated_input_folder=translated_input_folder) tokenizer = tokenization.FullSentencePieceTokenizer( sp_model_file=sp_model_file) - train_writer = FeatureWriter(filename=output_path, is_training=True) + train_writer = FeatureWriter( + filename=output_path, is_training=True) number_of_examples = convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, @@ -877,6 +960,7 @@ def generate_tf_record_from_json_file(input_file_path, max_query_length=max_query_length, is_training=True, output_fn=train_writer.process_feature, + xlnet_format=xlnet_format, do_lower_case=do_lower_case) train_writer.close() diff --git a/official/nlp/data/tagging_data_lib.py b/official/nlp/data/tagging_data_lib.py index c97fd9382f493209f61b0672c04b544259164372..f6b9c19744be9b6b3730e65dd24c1e19730f8e47 100644 --- a/official/nlp/data/tagging_data_lib.py +++ b/official/nlp/data/tagging_data_lib.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Library to process data for tagging task such as NER/POS.""" import collections import os @@ -19,6 +19,7 @@ import os from absl import logging import tensorflow as tf +from official.nlp.bert import tokenization from official.nlp.data import classifier_data_lib # A negative label id for the padding label, which will not contribute @@ -33,9 +34,14 @@ _UNK_TOKEN = "[UNK]" class InputExample(object): """A single training/test example for token classification.""" - def __init__(self, sentence_id, words=None, label_ids=None): + def __init__(self, + sentence_id, + sub_sentence_id=0, + words=None, + label_ids=None): """Constructs an InputExample.""" self.sentence_id = sentence_id + self.sub_sentence_id = sub_sentence_id self.words = words if words else [] self.label_ids = label_ids if label_ids else [] @@ -84,13 +90,48 @@ class PanxProcessor(classifier_data_lib.DataProcessor): "tr", "et", "fi", "hu" ] + def __init__(self, + process_text_fn=tokenization.convert_to_unicode, + only_use_en_train=True, + only_use_en_dev=True): + """See base class. + + Args: + process_text_fn: See base class. + only_use_en_train: If True, only use english training data. Otherwise, use + training data from all languages. + only_use_en_dev: If True, only use english dev data. Otherwise, use dev + data from all languages. + """ + super(PanxProcessor, self).__init__(process_text_fn) + self.only_use_en_train = only_use_en_train + self.only_use_en_dev = only_use_en_dev + def get_train_examples(self, data_dir): - return _read_one_file( + examples = _read_one_file( os.path.join(data_dir, "train-en.tsv"), self.get_labels()) + if not self.only_use_en_train: + for language in self.supported_languages: + if language == "en": + continue + examples.extend( + _read_one_file( + os.path.join(data_dir, f"train-{language}.tsv"), + self.get_labels())) + return examples def get_dev_examples(self, data_dir): - return _read_one_file( + examples = _read_one_file( os.path.join(data_dir, "dev-en.tsv"), self.get_labels()) + if not self.only_use_en_dev: + for language in self.supported_languages: + if language == "en": + continue + examples.extend( + _read_one_file( + os.path.join(data_dir, f"dev-{language}.tsv"), + self.get_labels())) + return examples def get_test_examples(self, data_dir): examples_dict = {} @@ -115,13 +156,49 @@ class UdposProcessor(classifier_data_lib.DataProcessor): "ta", "te", "th", "tl", "tr", "ur", "vi", "yo", "zh" ] + def __init__(self, + process_text_fn=tokenization.convert_to_unicode, + only_use_en_train=True, + only_use_en_dev=True): + """See base class. + + Args: + process_text_fn: See base class. + only_use_en_train: If True, only use english training data. Otherwise, use + training data from all languages. + only_use_en_dev: If True, only use english dev data. Otherwise, use dev + data from all languages. + """ + super(UdposProcessor, self).__init__(process_text_fn) + self.only_use_en_train = only_use_en_train + self.only_use_en_dev = only_use_en_dev + def get_train_examples(self, data_dir): - return _read_one_file( - os.path.join(data_dir, "train-en.tsv"), self.get_labels()) + if self.only_use_en_train: + examples = _read_one_file( + os.path.join(data_dir, "train-en.tsv"), self.get_labels()) + else: + examples = [] + # Uses glob because some languages are missing in train. + for filepath in tf.io.gfile.glob(os.path.join(data_dir, "train-*.tsv")): + examples.extend( + _read_one_file( + filepath, + self.get_labels())) + return examples def get_dev_examples(self, data_dir): - return _read_one_file( - os.path.join(data_dir, "dev-en.tsv"), self.get_labels()) + if self.only_use_en_dev: + examples = _read_one_file( + os.path.join(data_dir, "dev-en.tsv"), self.get_labels()) + else: + examples = [] + for filepath in tf.io.gfile.glob(os.path.join(data_dir, "dev-*.tsv")): + examples.extend( + _read_one_file( + filepath, + self.get_labels())) + return examples def get_test_examples(self, data_dir): examples_dict = {} @@ -146,11 +223,11 @@ def _tokenize_example(example, max_length, tokenizer, text_preprocessing=None): # Needs additional [CLS] and [SEP] tokens. max_length = max_length - 2 new_examples = [] - new_example = InputExample(sentence_id=example.sentence_id) - for i, word in enumerate(example.words): - if any([x < 0 for x in example.label_ids]): - raise ValueError("Unexpected negative label_id: %s" % example.label_ids) + new_example = InputExample(sentence_id=example.sentence_id, sub_sentence_id=0) + if any([x < 0 for x in example.label_ids]): + raise ValueError("Unexpected negative label_id: %s" % example.label_ids) + for i, word in enumerate(example.words): if text_preprocessing: word = text_preprocessing(word) subwords = tokenizer.tokenize(word) @@ -160,7 +237,10 @@ def _tokenize_example(example, max_length, tokenizer, text_preprocessing=None): if len(subwords) + len(new_example.words) > max_length: # Start a new example. new_examples.append(new_example) - new_example = InputExample(sentence_id=example.sentence_id) + last_sub_sentence_id = new_example.sub_sentence_id + new_example = InputExample( + sentence_id=example.sentence_id, + sub_sentence_id=last_sub_sentence_id + 1) for j, subword in enumerate(subwords): # Use the real label for the first subword, and pad label for @@ -203,6 +283,7 @@ def _convert_single_example(example, max_seq_length, tokenizer): features["segment_ids"] = create_int_feature(segment_ids) features["label_ids"] = create_int_feature(label_ids) features["sentence_id"] = create_int_feature([example.sentence_id]) + features["sub_sentence_id"] = create_int_feature([example.sub_sentence_id]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) return tf_example @@ -267,12 +348,12 @@ def write_example_to_file(examples, logging.info("Writing example %d of %d to %s", ex_index, len(examples), output_file) - tokenized_examples = _tokenize_example(example, max_seq_length, - tokenizer, text_preprocessing) + tokenized_examples = _tokenize_example(example, max_seq_length, tokenizer, + text_preprocessing) num_tokenized_examples += len(tokenized_examples) for per_tokenized_example in tokenized_examples: - tf_example = _convert_single_example( - per_tokenized_example, max_seq_length, tokenizer) + tf_example = _convert_single_example(per_tokenized_example, + max_seq_length, tokenizer) writer.write(tf_example.SerializeToString()) writer.close() @@ -307,17 +388,16 @@ def token_classification_meta_data(train_data_size, return meta_data -def generate_tf_record_from_data_file(processor, - data_dir, - tokenizer, - max_seq_length, - train_data_output_path, +def generate_tf_record_from_data_file(processor, data_dir, tokenizer, + max_seq_length, train_data_output_path, eval_data_output_path, test_data_output_path, text_preprocessing): """Generates tfrecord files from the raw data.""" - common_kwargs = dict(tokenizer=tokenizer, max_seq_length=max_seq_length, - text_preprocessing=text_preprocessing) + common_kwargs = dict( + tokenizer=tokenizer, + max_seq_length=max_seq_length, + text_preprocessing=text_preprocessing) train_examples = processor.get_train_examples(data_dir) train_data_size = write_example_to_file( train_examples, output_file=train_data_output_path, **common_kwargs) diff --git a/official/nlp/data/tagging_data_lib_test.py b/official/nlp/data/tagging_data_lib_test.py new file mode 100644 index 0000000000000000000000000000000000000000..afbfebdef30586faa1ec0f362ee15d10461df1c3 --- /dev/null +++ b/official/nlp/data/tagging_data_lib_test.py @@ -0,0 +1,108 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.tagging_data_lib.""" +import os +import random + +from absl.testing import parameterized +import tensorflow as tf + +from official.nlp.bert import tokenization +from official.nlp.data import tagging_data_lib + + +def _create_fake_file(filename, labels, is_test): + + def write_one_sentence(writer, length): + for _ in range(length): + line = "hiworld" + if not is_test: + line += "\t%s" % (labels[random.randint(0, len(labels) - 1)]) + writer.write(line + "\n") + + # Writes two sentences with length of 3 and 12 respectively. + with tf.io.gfile.GFile(filename, "w") as writer: + write_one_sentence(writer, 3) + writer.write("\n") + write_one_sentence(writer, 12) + + +class TaggingDataLibTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(TaggingDataLibTest, self).setUp() + + self.processors = { + "panx": tagging_data_lib.PanxProcessor, + "udpos": tagging_data_lib.UdposProcessor, + } + self.vocab_file = os.path.join(self.get_temp_dir(), "vocab.txt") + with tf.io.gfile.GFile(self.vocab_file, "w") as writer: + writer.write("\n".join(["[CLS]", "[SEP]", "hi", "##world", "[UNK]"])) + + @parameterized.parameters( + {"task_type": "panx"}, + {"task_type": "udpos"}, + ) + def test_generate_tf_record(self, task_type): + processor = self.processors[task_type]() + input_data_dir = os.path.join(self.get_temp_dir(), task_type) + tf.io.gfile.mkdir(input_data_dir) + # Write fake train file. + _create_fake_file( + os.path.join(input_data_dir, "train-en.tsv"), + processor.get_labels(), + is_test=False) + + # Write fake dev file. + _create_fake_file( + os.path.join(input_data_dir, "dev-en.tsv"), + processor.get_labels(), + is_test=False) + + # Write fake test files. + for lang in processor.supported_languages: + _create_fake_file( + os.path.join(input_data_dir, "test-%s.tsv" % lang), + processor.get_labels(), + is_test=True) + + output_path = os.path.join(self.get_temp_dir(), task_type, "output") + tokenizer = tokenization.FullTokenizer( + vocab_file=self.vocab_file, do_lower_case=True) + metadata = tagging_data_lib.generate_tf_record_from_data_file( + processor, + input_data_dir, + tokenizer, + max_seq_length=8, + train_data_output_path=os.path.join(output_path, "train.tfrecord"), + eval_data_output_path=os.path.join(output_path, "eval.tfrecord"), + test_data_output_path=os.path.join(output_path, "test_{}.tfrecord"), + text_preprocessing=tokenization.convert_to_unicode) + + self.assertEqual(metadata["train_data_size"], 5) + files = tf.io.gfile.glob(output_path + "/*") + expected_files = [] + expected_files.append(os.path.join(output_path, "train.tfrecord")) + expected_files.append(os.path.join(output_path, "eval.tfrecord")) + for lang in processor.supported_languages: + expected_files.append( + os.path.join(output_path, "test_%s.tfrecord" % lang)) + + self.assertCountEqual(files, expected_files) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/data/tagging_data_loader.py b/official/nlp/data/tagging_data_loader.py deleted file mode 100644 index 9ede37091b9328bedd0ccb6fd78c126ab3237458..0000000000000000000000000000000000000000 --- a/official/nlp/data/tagging_data_loader.py +++ /dev/null @@ -1,82 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Loads dataset for the tagging (e.g., NER/POS) task.""" -from typing import Mapping, Optional -import dataclasses -import tensorflow as tf - -from official.core import input_reader -from official.modeling.hyperparams import config_definitions as cfg -from official.nlp.data import data_loader_factory - - -@dataclasses.dataclass -class TaggingDataConfig(cfg.DataConfig): - """Data config for tagging (tasks/tagging).""" - is_training: bool = True - seq_length: int = 128 - include_sentence_id: bool = False - - -@data_loader_factory.register_data_loader_cls(TaggingDataConfig) -class TaggingDataLoader: - """A class to load dataset for tagging (e.g., NER and POS) task.""" - - def __init__(self, params: TaggingDataConfig): - self._params = params - self._seq_length = params.seq_length - self._include_sentence_id = params.include_sentence_id - - def _decode(self, record: tf.Tensor): - """Decodes a serialized tf.Example.""" - name_to_features = { - 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - 'label_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), - } - if self._include_sentence_id: - name_to_features['sentence_id'] = tf.io.FixedLenFeature([], tf.int64) - - example = tf.io.parse_single_example(record, name_to_features) - - # tf.Example only supports tf.int64, but the TPU only supports tf.int32. - # So cast all int64 to int32. - for name in example: - t = example[name] - if t.dtype == tf.int64: - t = tf.cast(t, tf.int32) - example[name] = t - - return example - - def _parse(self, record: Mapping[str, tf.Tensor]): - """Parses raw tensors into a dict of tensors to be consumed by the model.""" - x = { - 'input_word_ids': record['input_ids'], - 'input_mask': record['input_mask'], - 'input_type_ids': record['segment_ids'] - } - if self._include_sentence_id: - x['sentence_id'] = record['sentence_id'] - y = record['label_ids'] - return (x, y) - - def load(self, input_context: Optional[tf.distribute.InputContext] = None): - """Returns a tf.dataset.Dataset.""" - reader = input_reader.InputReader( - params=self._params, decoder_fn=self._decode, parser_fn=self._parse) - return reader.read(input_context) diff --git a/official/nlp/data/tagging_dataloader.py b/official/nlp/data/tagging_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..daecb8e3d8c75e2a6127f9be2892fd504d1a4385 --- /dev/null +++ b/official/nlp/data/tagging_dataloader.py @@ -0,0 +1,85 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Loads dataset for the tagging (e.g., NER/POS) task.""" +from typing import Mapping, Optional + +import dataclasses +import tensorflow as tf +from official.core import config_definitions as cfg +from official.core import input_reader +from official.nlp.data import data_loader +from official.nlp.data import data_loader_factory + + +@dataclasses.dataclass +class TaggingDataConfig(cfg.DataConfig): + """Data config for tagging (tasks/tagging).""" + is_training: bool = True + seq_length: int = 128 + include_sentence_id: bool = False + + +@data_loader_factory.register_data_loader_cls(TaggingDataConfig) +class TaggingDataLoader(data_loader.DataLoader): + """A class to load dataset for tagging (e.g., NER and POS) task.""" + + def __init__(self, params: TaggingDataConfig): + self._params = params + self._seq_length = params.seq_length + self._include_sentence_id = params.include_sentence_id + + def _decode(self, record: tf.Tensor): + """Decodes a serialized tf.Example.""" + name_to_features = { + 'input_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), + 'input_mask': tf.io.FixedLenFeature([self._seq_length], tf.int64), + 'segment_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), + 'label_ids': tf.io.FixedLenFeature([self._seq_length], tf.int64), + } + if self._include_sentence_id: + name_to_features['sentence_id'] = tf.io.FixedLenFeature([], tf.int64) + name_to_features['sub_sentence_id'] = tf.io.FixedLenFeature([], tf.int64) + + example = tf.io.parse_single_example(record, name_to_features) + + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + for name in example: + t = example[name] + if t.dtype == tf.int64: + t = tf.cast(t, tf.int32) + example[name] = t + + return example + + def _parse(self, record: Mapping[str, tf.Tensor]): + """Parses raw tensors into a dict of tensors to be consumed by the model.""" + x = { + 'input_word_ids': record['input_ids'], + 'input_mask': record['input_mask'], + 'input_type_ids': record['segment_ids'] + } + if self._include_sentence_id: + x['sentence_id'] = record['sentence_id'] + x['sub_sentence_id'] = record['sub_sentence_id'] + + y = record['label_ids'] + return (x, y) + + def load(self, input_context: Optional[tf.distribute.InputContext] = None): + """Returns a tf.dataset.Dataset.""" + reader = input_reader.InputReader( + params=self._params, decoder_fn=self._decode, parser_fn=self._parse) + return reader.read(input_context) diff --git a/official/nlp/data/tagging_dataloader_test.py b/official/nlp/data/tagging_dataloader_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2ff5fc7f2fa9e2715cac68a9648a2dd920405a60 --- /dev/null +++ b/official/nlp/data/tagging_dataloader_test.py @@ -0,0 +1,82 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.tagging_data_loader.""" +import os + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.nlp.data import tagging_dataloader + + +def _create_fake_dataset(output_path, seq_length, include_sentence_id): + """Creates a fake dataset.""" + writer = tf.io.TFRecordWriter(output_path) + + def create_int_feature(values): + f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + return f + + for i in range(100): + features = {} + input_ids = np.random.randint(100, size=(seq_length)) + features['input_ids'] = create_int_feature(input_ids) + features['input_mask'] = create_int_feature(np.ones_like(input_ids)) + features['segment_ids'] = create_int_feature(np.ones_like(input_ids)) + features['label_ids'] = create_int_feature( + np.random.randint(10, size=(seq_length))) + if include_sentence_id: + features['sentence_id'] = create_int_feature([i]) + features['sub_sentence_id'] = create_int_feature([0]) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + +class TaggingDataLoaderTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters(True, False) + def test_load_dataset(self, include_sentence_id): + seq_length = 16 + batch_size = 10 + train_data_path = os.path.join(self.get_temp_dir(), 'train.tf_record') + _create_fake_dataset(train_data_path, seq_length, include_sentence_id) + data_config = tagging_dataloader.TaggingDataConfig( + input_path=train_data_path, + seq_length=seq_length, + global_batch_size=batch_size, + include_sentence_id=include_sentence_id) + + dataset = tagging_dataloader.TaggingDataLoader(data_config).load() + features, labels = next(iter(dataset)) + + expected_keys = ['input_word_ids', 'input_mask', 'input_type_ids'] + if include_sentence_id: + expected_keys.extend(['sentence_id', 'sub_sentence_id']) + self.assertCountEqual(expected_keys, features.keys()) + + self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_mask'].shape, (batch_size, seq_length)) + self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length)) + self.assertEqual(labels.shape, (batch_size, seq_length)) + if include_sentence_id: + self.assertEqual(features['sentence_id'].shape, (batch_size,)) + self.assertEqual(features['sub_sentence_id'].shape, (batch_size,)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/data/train_sentencepiece.py b/official/nlp/data/train_sentencepiece.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3b05c46472e55c9b804da2aa45dabfd4867b7f --- /dev/null +++ b/official/nlp/data/train_sentencepiece.py @@ -0,0 +1,133 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A script to train sentencepiece model from tensorflow datasets. + +Reserved tokens: +pad: 0, +eos: 1, +unk: 2 +(bos is not reserved) +""" + +import os +import tempfile +from typing import List, Tuple + +from absl import app +from absl import flags +from absl import logging +import tensorflow as tf +import tensorflow_datasets as tfds + +from sentencepiece import SentencePieceTrainer + + +FLAGS = flags.FLAGS +flags.DEFINE_string("output_model_path", None, + "Path to save the the sentencepiece model.") +flags.mark_flag_as_required("output_model_path") + +flags.DEFINE_string("tfds_dir", None, "Directory of the tfds.") +flags.DEFINE_string("tfds_name", "wmt14_translate/de-en", + "Name of the dataset we generate vacabulay from.") +flags.DEFINE_string("tfds_split", "train", "Split of the dataset.") +flags.DEFINE_integer("vocab_size", 32000, "Size of vocabulary.") +flags.DEFINE_integer( + "max_char", -1, + "Maximum number of characters to use. " + "If a non-positive number is provided, all sentences are used.") +flags.DEFINE_string("model_type", "bpe", + "Model algorithm: unigram, bpe, word or char.") +flags.DEFINE_float("character_coverage", 0.9995, + "Character coverage to determine the minimum symbols") +flags.DEFINE_list( + "data_keys", ["en", "de"], + "Comma-separated list of keys to use for training the vocabulary.") + + +def dump_chars_to_textfile(dataset: tf.data.Dataset, + data_keys: Tuple[str], + max_char: int = -1): + """Write part of a TFDS sentence dataset to lines in a text file. + + Args: + dataset: tf.dataset containing string-data. + data_keys: what keys in dataset to dump from. + max_char: max character to dump to text file. + + Returns: + name of temp file with dataset bytes, exact number of characters dumped. + """ + ds_iter = dataset.as_numpy_iterator() + with tempfile.NamedTemporaryFile(delete=False) as outfp: + char_count = 0 + while True: + example = next(ds_iter, None) + if example is None or ( + max_char > 0 and char_count > max_char): + break + for k in data_keys: + line = example[k] + b"\n" + char_count += len(line) + outfp.write(line) + return outfp.name + + +def train_sentencepiece( + file_path: str, + model_path: str, + vocab_size: int, + character_coverage: float, + model_type: str): + """Train SentencePiece tokenizer from subset of tf dataset. + + Args: + file_path: path of data to train sentencepiece. + model_path: path of model file to save vocab model to. + vocab_size: size of vocab tokens to train. + character_coverage: amount of characters covered by the model, good defaults + are 0.9995 for languages with rich character set like Japanese or Chinese + and 1.0 for other languages with small character set. + model_type: type of sentencepiece vocab to train. + + Returns: + path to the trained sentencepiece vocabulary model. + """ + argstr = " ".join([ + f"--input={file_path}", f"--vocab_size={vocab_size}", + f"--character_coverage={character_coverage}", + f"--model_prefix={model_path}", f"--model_type={model_type}", + "--bos_id=-1", "--pad_id=0", "--eos_id=1", "--unk_id=2" + ]) + SentencePieceTrainer.Train(argstr) + + +def main(argv: List[str]): + del argv + builder = tfds.builder(FLAGS.tfds_name, data_dir=FLAGS.tfds_dir) + ds = builder.as_dataset(split=FLAGS.tfds_split) + tmp_filename = dump_chars_to_textfile(ds, FLAGS.data_keys, FLAGS.max_char) + logging.info("Sentencepiece model will be placed here: %s", + FLAGS.output_model_path) + train_sentencepiece(tmp_filename, + FLAGS.output_model_path, + FLAGS.vocab_size, + FLAGS.character_coverage, + FLAGS.model_type) + os.remove(tmp_filename) + + +if __name__ == "__main__": + app.run(main) diff --git a/official/nlp/data/wmt_dataloader.py b/official/nlp/data/wmt_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..e0521ad47805b05b83287248f9db80dd1881e140 --- /dev/null +++ b/official/nlp/data/wmt_dataloader.py @@ -0,0 +1,295 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Input pipeline for the transformer model to read, filter, and batch examples. + +Batching scheme + + Prior to batching, elements in the dataset are grouped by length (max between + 'inputs' and 'targets' length). Each group is then batched such that: + group_batch_size * length <= batch_size. + + Another way to view batch_size is the maximum number of tokens in each batch. + + Once batched, each element in the dataset will have the shape: + {'inputs': [group_batch_size, padded_input_length], + 'targets': [group_batch_size, padded_target_length]} + Lengths are padded to the longest 'inputs' or 'targets' sequence in the batch + (padded_input_length and padded_target_length can be different). + + This batching scheme decreases the fraction of padding tokens per training + batch, thus improving the training speed significantly. +""" +from typing import Dict, Optional + +import dataclasses +import tensorflow as tf +import tensorflow_text as tftxt +from official.core import config_definitions as cfg +from official.core import input_reader +from official.nlp.data import data_loader +from official.nlp.data import data_loader_factory + +# Example grouping constants. Defines length boundaries for each group. +# These values are the defaults used in Tensor2Tensor. +_MIN_BOUNDARY = 8 +_BOUNDARY_SCALE = 1.1 + + +def _get_example_length(example): + """Returns the maximum length between the example inputs and targets.""" + length = tf.maximum(tf.shape(example[0])[0], tf.shape(example[1])[0]) + return length + + +def _create_min_max_boundaries(max_length, + min_boundary=_MIN_BOUNDARY, + boundary_scale=_BOUNDARY_SCALE): + """Create min and max boundary lists up to max_length. + + For example, when max_length=24, min_boundary=4 and boundary_scale=2, the + returned values will be: + buckets_min = [0, 4, 8, 16] + buckets_max = [4, 8, 16, 25] + + Args: + max_length: The maximum length of example in dataset. + min_boundary: Minimum length in boundary. + boundary_scale: Amount to scale consecutive boundaries in the list. + + Returns: + min and max boundary lists + + """ + # Create bucket boundaries list by scaling the previous boundary or adding 1 + # (to ensure increasing boundary sizes). + bucket_boundaries = [] + x = min_boundary + while x < max_length: + bucket_boundaries.append(x) + x = max(x + 1, int(x * boundary_scale)) + + # Create min and max boundary lists from the initial list. + buckets_min = [0] + bucket_boundaries + buckets_max = bucket_boundaries + [max_length + 1] + return buckets_min, buckets_max + + +def _batch_examples(dataset, batch_size, max_length): + """Group examples by similar lengths, and return batched dataset. + + Each batch of similar-length examples are padded to the same length, and may + have different number of elements in each batch, such that: + group_batch_size * padded_length <= batch_size. + + This decreases the number of padding tokens per batch, which improves the + training speed. + + Args: + dataset: Dataset of unbatched examples. + batch_size: Max number of tokens per batch of examples. + max_length: Max number of tokens in an example input or target sequence. + + Returns: + Dataset of batched examples with similar lengths. + """ + # Get min and max boundary lists for each example. These are used to calculate + # the `bucket_id`, which is the index at which: + # buckets_min[bucket_id] <= len(example) < buckets_max[bucket_id] + # Note that using both min and max lists improves the performance. + buckets_min, buckets_max = _create_min_max_boundaries(max_length) + + # Create list of batch sizes for each bucket_id, so that + # bucket_batch_size[bucket_id] * buckets_max[bucket_id] <= batch_size + bucket_batch_sizes = [int(batch_size) // x for x in buckets_max] + + # Validates bucket batch sizes. + if any([batch_size <= 0 for batch_size in bucket_batch_sizes]): + raise ValueError( + 'The token budget, global batch size, is too small to yeild 0 bucket ' + 'window: %s' % str(bucket_batch_sizes)) + + # bucket_id will be a tensor, so convert this list to a tensor as well. + bucket_batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64) + + def example_to_bucket_id(example): + """Return int64 bucket id for this example, calculated based on length.""" + example_input = example['inputs'] + example_target = example['targets'] + seq_length = _get_example_length((example_input, example_target)) + + conditions_c = tf.logical_and( + tf.less_equal(buckets_min, seq_length), tf.less(seq_length, + buckets_max)) + bucket_id = tf.reduce_min(tf.where(conditions_c)) + return bucket_id + + def window_size_fn(bucket_id): + """Return number of examples to be grouped when given a bucket id.""" + return bucket_batch_sizes[bucket_id] + + def batching_fn(bucket_id, grouped_dataset): + """Batch and add padding to a dataset of elements with similar lengths.""" + bucket_batch_size = window_size_fn(bucket_id) + + # Batch the dataset and add padding so that all input sequences in the + # examples have the same length, and all target sequences have the same + # lengths as well. Resulting lengths of inputs and targets can differ. + padded_shapes = dict([ + (name, [None] * len(spec.shape)) + for name, spec in grouped_dataset.element_spec.items() + ]) + return grouped_dataset.padded_batch(bucket_batch_size, padded_shapes) + + return dataset.apply( + tf.data.experimental.group_by_window( + key_func=example_to_bucket_id, + reduce_func=batching_fn, + window_size=None, + window_size_func=window_size_fn)) + + +@dataclasses.dataclass +class WMTDataConfig(cfg.DataConfig): + """Data config for WMT translation.""" + max_seq_length: int = 64 + static_batch: bool = False + sentencepiece_model_path: str = '' + src_lang: str = '' + tgt_lang: str = '' + transform_and_batch: bool = True + has_unique_id: bool = False + + +@data_loader_factory.register_data_loader_cls(WMTDataConfig) +class WMTDataLoader(data_loader.DataLoader): + """A class to load dataset for WMT translation task.""" + + def __init__(self, params: WMTDataConfig): + self._params = params + self._max_seq_length = params.max_seq_length + self._static_batch = params.static_batch + self._global_batch_size = params.global_batch_size + if self._params.transform_and_batch: + self._tokenizer = tftxt.SentencepieceTokenizer( + model=tf.io.gfile.GFile(params.sentencepiece_model_path, 'rb').read(), + add_eos=True) + + def _decode(self, record: tf.Tensor): + """Decodes a serialized tf.Example.""" + name_to_features = { + self._params.src_lang: tf.io.FixedLenFeature([], tf.string), + self._params.tgt_lang: tf.io.FixedLenFeature([], tf.string), + } + if self._params.has_unique_id: + name_to_features['unique_id'] = tf.io.FixedLenFeature([], tf.int64) + example = tf.io.parse_single_example(record, name_to_features) + # tf.Example only supports tf.int64, but the TPU only supports tf.int32. + # So cast all int64 to int32. + for name in example: + t = example[name] + if t.dtype == tf.int64: + t = tf.cast(t, tf.int32) + example[name] = t + return example + + def _tokenize(self, inputs) -> Dict[str, tf.Tensor]: + tokenized_inputs = {} + for k, v in inputs.items(): + if k == self._params.src_lang: + tokenized_inputs['inputs'] = self._tokenizer.tokenize(v) + elif k == self._params.tgt_lang: + tokenized_inputs['targets'] = self._tokenizer.tokenize(v) + else: + tokenized_inputs[k] = v + print(tokenized_inputs) + return tokenized_inputs + + def _filter_max_length(self, inputs): + # return tf.constant(True) + return tf.logical_and( + tf.shape(inputs['inputs'])[0] <= self._max_seq_length, + tf.shape(inputs['targets'])[0] <= self._max_seq_length) + + def _maybe_truncate(self, inputs): + truncated_inputs = {} + for k, v in inputs.items(): + if k == 'inputs' or k == 'targets': + truncated_inputs[k] = tf.pad( + v[:self._max_seq_length - 1], [[0, 1]], + constant_values=1) if tf.shape(v)[0] > self._max_seq_length else v + else: + truncated_inputs[k] = v + return truncated_inputs + + def _tokenize_bucketize_and_batch( + self, + dataset, + input_context: Optional[tf.distribute.InputContext] = None): + dataset = dataset.map( + self._tokenize, num_parallel_calls=tf.data.experimental.AUTOTUNE) + + if self._params.is_training: + dataset = dataset.filter(self._filter_max_length) + else: + dataset = dataset.map( + self._maybe_truncate, + num_parallel_calls=tf.data.experimental.AUTOTUNE) + + per_replica_batch_size = input_context.get_per_replica_batch_size( + self._global_batch_size) if input_context else self._global_batch_size + if self._static_batch: + padded_shapes = {} + for name, _ in dataset.element_spec.items(): + if name == 'unique_id': + padded_shapes[name] = [] + else: + padded_shapes[name] = [self._max_seq_length + ] if self._static_batch else [None] + batch_size = per_replica_batch_size + if self._params.is_training: + batch_size = int(batch_size // self._max_seq_length) + dataset = dataset.padded_batch( + batch_size, + padded_shapes, + drop_remainder=True) + else: + # Group and batch such that each batch has examples of similar length. + dataset = _batch_examples(dataset, per_replica_batch_size, + self._max_seq_length) + # Prefetch the next element to improve speed of input pipeline. + dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + return dataset + + def load(self, input_context: Optional[tf.distribute.InputContext] = None): + """Returns a tf.dataset.Dataset.""" + decoder_fn = None + # Only decode for TFRecords. + if self._params.input_path: + decoder_fn = self._decode + + def _identity( + dataset, input_context: Optional[tf.distribute.InputContext] = None): + del input_context + return dataset + + transform_and_batch_fn = _identity + if self._params.transform_and_batch: + transform_and_batch_fn = self._tokenize_bucketize_and_batch + + reader = input_reader.InputReader( + params=self._params, + decoder_fn=decoder_fn, + transform_and_batch_fn=transform_and_batch_fn) + return reader.read(input_context) diff --git a/official/nlp/data/wmt_dataloader_test.py b/official/nlp/data/wmt_dataloader_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a4454d96d889504251d50070863b9447b2263648 --- /dev/null +++ b/official/nlp/data/wmt_dataloader_test.py @@ -0,0 +1,130 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.data.wmt_dataloader.""" +import os +from absl.testing import parameterized + +import tensorflow as tf + +from sentencepiece import SentencePieceTrainer +from official.nlp.data import wmt_dataloader + + +def _generate_line_file(filepath, lines): + with tf.io.gfile.GFile(filepath, 'w') as f: + for l in lines: + f.write('{}\n'.format(l)) + + +def _generate_record_file(filepath, src_lines, tgt_lines, unique_id=False): + writer = tf.io.TFRecordWriter(filepath) + for i, (src, tgt) in enumerate(zip(src_lines, tgt_lines)): + features = { + 'en': tf.train.Feature( + bytes_list=tf.train.BytesList( + value=[src.encode()])), + 'reverse_en': tf.train.Feature( + bytes_list=tf.train.BytesList( + value=[tgt.encode()])), + } + if unique_id: + features['unique_id'] = tf.train.Feature( + int64_list=tf.train.Int64List(value=[i])), + example = tf.train.Example( + features=tf.train.Features( + feature=features)) + writer.write(example.SerializeToString()) + writer.close() + + +def _train_sentencepiece(input_path, vocab_size, model_path, eos_id=1): + argstr = ' '.join([ + f'--input={input_path}', f'--vocab_size={vocab_size}', + '--character_coverage=0.995', + f'--model_prefix={model_path}', '--model_type=bpe', + '--bos_id=-1', '--pad_id=0', f'--eos_id={eos_id}', '--unk_id=2' + ]) + SentencePieceTrainer.Train(argstr) + + +class WMTDataLoaderTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(WMTDataLoaderTest, self).setUp() + self._temp_dir = self.get_temp_dir() + src_lines = [ + 'abc ede fg', + 'bbcd ef a g', + 'de f a a g' + ] + tgt_lines = [ + 'dd cc a ef g', + 'bcd ef a g', + 'gef cd ba' + ] + self._record_train_input_path = os.path.join(self._temp_dir, 'train.record') + _generate_record_file(self._record_train_input_path, src_lines, tgt_lines) + self._record_test_input_path = os.path.join(self._temp_dir, 'test.record') + _generate_record_file(self._record_test_input_path, src_lines, tgt_lines, + unique_id=True) + self._sentencepeice_input_path = os.path.join(self._temp_dir, 'inputs.txt') + _generate_line_file(self._sentencepeice_input_path, src_lines + tgt_lines) + sentencepeice_model_prefix = os.path.join(self._temp_dir, 'sp') + _train_sentencepiece(self._sentencepeice_input_path, 20, + sentencepeice_model_prefix) + self._sentencepeice_model_path = '{}.model'.format( + sentencepeice_model_prefix) + + @parameterized.named_parameters( + ('train_static', True, True, 100, (2, 35)), + ('train_non_static', True, False, 100, (12, 7)), + ('non_train_static', False, True, 3, (3, 35)), + ('non_train_non_static', False, False, 50, (2, 7)),) + def test_load_dataset( + self, is_training, static_batch, batch_size, expected_shape): + data_config = wmt_dataloader.WMTDataConfig( + input_path=self._record_train_input_path + if is_training else self._record_test_input_path, + max_seq_length=35, + global_batch_size=batch_size, + is_training=is_training, + static_batch=static_batch, + src_lang='en', + tgt_lang='reverse_en', + sentencepiece_model_path=self._sentencepeice_model_path) + dataset = wmt_dataloader.WMTDataLoader(data_config).load() + examples = next(iter(dataset)) + inputs, targets = examples['inputs'], examples['targets'] + self.assertEqual(inputs.shape, expected_shape) + self.assertEqual(targets.shape, expected_shape) + + def test_load_dataset_raise_invalid_window(self): + batch_tokens_size = 10 # this is too small to form buckets. + data_config = wmt_dataloader.WMTDataConfig( + input_path=self._record_train_input_path, + max_seq_length=100, + global_batch_size=batch_tokens_size, + is_training=True, + static_batch=False, + src_lang='en', + tgt_lang='reverse_en', + sentencepiece_model_path=self._sentencepeice_model_path) + with self.assertRaisesRegex( + ValueError, 'The token budget, global batch size, is too small.*'): + _ = wmt_dataloader.WMTDataLoader(data_config).load() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/docs/pretrained_models.md b/official/nlp/docs/pretrained_models.md new file mode 100644 index 0000000000000000000000000000000000000000..0c836b33b7d5d07932d56303ca61647c3053cee4 --- /dev/null +++ b/official/nlp/docs/pretrained_models.md @@ -0,0 +1,72 @@ +# Pre-trained Models + +We provide a large collection of baselines and checkpoints for NLP pre-trained +models. + +## How to Load Pretrained Models + +### How to Initialize from Checkpoint + +**Note:** TF-HUB/Savedmodel is the preferred way to distribute models as it is +self-contained. Please consider using TF-HUB for finetuning tasks first. + +If you use the [NLP training library](train.md), +you can specify the checkpoint path link directly when launching your job. For +example, to initialize the model from the checkpoint, you can specify +`--params_override=task.init_checkpoint=PATH_TO_INIT_CKPT` as: + +``` +python3 train.py \ + --params_override=task.init_checkpoint=PATH_TO_INIT_CKPT +``` + +### How to load TF-HUB SavedModel + +Finetuning tasks such as question answering (SQuAD) and sentence +prediction (GLUE) support loading a model from TF-HUB. These built-in tasks +support a specific `task.hub_module_url` parameter. To set this parameter, +replace `--params_override=task.init_checkpoint=...` with +`--params_override=task.hub_module_url=TF_HUB_URL`, like below: + +``` +python3 train.py \ + --params_override=task.hub_module_url=https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3 +``` + +## BERT + +Public BERT pre-trained models released by the BERT authors. + +We released both checkpoints and tf.hub modules as the pretrained models for +fine-tuning. They are TF 2.x compatible and are converted from the checkpoints +released in TF 1.x official BERT repository +[google-research/bert](https://github.com/google-research/bert) +in order to keep consistent with BERT paper. + +### Checkpoints + +Model | Configuration | Training Data | Checkpoint & Vocabulary | TF-HUB SavedModels +---------------------------------------- | :--------------------------: | ------------: | ----------------------: | ------: +BERT-base uncased English | uncased_L-12_H-768_A-12 | Wiki + Books | [uncased_L-12_H-768_A-12](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12.tar.gz) | [`BERT-Base, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/) +BERT-base cased English | cased_L-12_H-768_A-12 | Wiki + Books | [cased_L-12_H-768_A-12](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/cased_L-12_H-768_A-12.tar.gz) | [`BERT-Base, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/) +BERT-large uncased English | uncased_L-24_H-1024_A-16 | Wiki + Books | [uncased_L-24_H-1024_A-16](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/uncased_L-24_H-1024_A-16.tar.gz) | [`BERT-Large, Uncased`](https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/) +BERT-large cased English | cased_L-24_H-1024_A-16 | Wiki + Books | [cased_L-24_H-1024_A-16](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/cased_L-24_H-1024_A-16.tar.gz) | [`BERT-Large, Cased`](https://tfhub.dev/tensorflow/bert_en_cased_L-24_H-1024_A-16/) +BERT-large, Uncased (Whole Word Masking) | wwm_uncased_L-24_H-1024_A-16 | Wiki + Books | [wwm_uncased_L-24_H-1024_A-16](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/wwm_uncased_L-24_H-1024_A-16.tar.gz) | [`BERT-Large, Uncased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_uncased_L-24_H-1024_A-16/) +BERT-large, Cased (Whole Word Masking) | wwm_cased_L-24_H-1024_A-16 | Wiki + Books | [wwm_cased_L-24_H-1024_A-16](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/wwm_cased_L-24_H-1024_A-16.tar.gz) | [`BERT-Large, Cased (Whole Word Masking)`](https://tfhub.dev/tensorflow/bert_en_wwm_cased_L-24_H-1024_A-16/) +BERT-base MultiLingual | multi_cased_L-12_H-768_A-12 | Wiki + Books | [multi_cased_L-12_H-768_A-12](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/multi_cased_L-12_H-768_A-12.tar.gz) | [`BERT-Base, Multilingual Cased`](https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/) +BERT-base Chinese | chinese_L-12_H-768_A-12 | Wiki + Books | [chinese_L-12_H-768_A-12](https://storage.googleapis.com/cloud-tpu-checkpoints/bert/v3/chinese_L-12_H-768_A-12.tar.gz) | [`BERT-Base, Chinese`](https://tfhub.dev/tensorflow/bert_zh_L-12_H-768_A-12/) + +You may explore more in the TF-Hub BERT collection: +https://tfhub.dev/google/collections/bert/1 + +### BERT variants + +We also have pretrained BERT models with variants in both network architecture +and training methodologies. These models achieve higher downstream accuracy +scores. + +Model | Configuration | Training Data | TF-HUB SavedModels | Comment +-------------------------------- | :----------------------: | -----------------------: | ------------------------------------------------------------------------------------: | ------: +BERT-base talking heads + ggelu | uncased_L-12_H-768_A-12 | Wiki + Books | [talkheads_ggelu_base](https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1) | BERT-base trained with [talking heads attention](https://arxiv.org/abs/2003.02436) and [gated GeLU](https://arxiv.org/abs/2002.05202). +BERT-large talking heads + ggelu | uncased_L-24_H-1024_A-16 | Wiki + Books | [talkheads_ggelu_large](https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_large/1) | BERT-large trained with [talking heads attention](https://arxiv.org/abs/2003.02436) and [gated GeLU](https://arxiv.org/abs/2002.05202). +LAMBERT-large uncased English | uncased_L-24_H-1024_A-16 | Wiki + Books | [lambert](https://tfhub.dev/tensorflow/lambert_en_uncased_L-24_H-1024_A-16/1) | BERT trained with LAMB and techniques from RoBERTa. diff --git a/official/nlp/docs/tfhub.md b/official/nlp/docs/tfhub.md new file mode 100644 index 0000000000000000000000000000000000000000..c6fe9a2f8f4514dfb8c1419abac35e0a993fde9d --- /dev/null +++ b/official/nlp/docs/tfhub.md @@ -0,0 +1,292 @@ +# Exporting a pre-trained Encoder to TF Hub + +## Overview + +This doc explains how to use TF-NLP's +[export_tfhub](https://github.com/tensorflow/models/blob/master/official/nlp/tools/export_tfhub.py) +tool to export pre-trained Transformer encoders to SavedModels suitable for +publication on TF Hub. (For the steps after that, see TF Hub's +[publisher guide](https://www.tensorflow.org/hub/publish).) +For testing purposes, those SavedModels can also be used from their export +locations on the filesystem. + +On TF Hub, Transformer encoders for text come as a pair of SavedModels: + +* The preprocessing model applies a tokenizer with a fixed vocab plus some + additional logic to turn text into Transformer inputs. +* The encoder model (or "model" for short) applies the pre-trained Transformer + encoder. + +TF Hub defines +[Common APIs](https://www.tensorflow.org/hub/common_saved_model_apis/text#transformer-encoders) +for all SavedModels of those two respective types, encapsulating the particular +choice of preprocessing logic and Encoder architecture. + +## Exporting the Encoder + +There is a choice between exporting just the encoder, or the encoder plus the +prediction head for the masked language model (MLM) task from pre-training. + +Exporting just the encoder suffices for many straightforward applications. + +### Exporting the Encoder alone + +To export an encoder-only model, you can set `--export_type=model` and run the +tool like this: + +```shell +python official/nlp/tools/export_tfhub.py \ + --encoder_config_file=${BERT_DIR:?}/bert_encoder.yaml \ + --model_checkpoint_path=${BERT_DIR:?}/bert_model.ckpt \ + --vocab_file=${BERT_DIR:?}/vocab.txt \ + --export_type=model \ + --export_path=/tmp/bert_model +``` + +The flag `--encoder_config_file` refers to a YAML file representing the +[encoders.EncoderConfig](https://github.com/tensorflow/models/search?q=EncoderConfig+path%3Aofficial%2Fnlp%2Fconfigs+filename%3Aencoders.py) +dataclass, which supports multiple encoders (e.g., BERT, ALBERT). Instead of +`--encoder_config_file`, you can set `--bert_config_file` to a legacy +`bert_config.json` file to export a BERT model. If the model definition involves +[GIN](https://github.com/google/gin-config), the flags `--gin_file` and +`--gin_params` must be set accordingly, consistent with pre-training. + +The `--model_checkpoint_path` refers to an object-based (TF2) checkpoint written +by +[BertPretrainerV2](https://github.com/tensorflow/models/search?q=BertPretrainerV2+filename%3Abert_pretrainer.py), +or any other checkpoint that can be restored to +`tf.train.Checkpoint(encoder=encoder)` for the encoder defined by the config +flags. Legacy checkpoints with `model=` instead of `encoder=` are also supported +for now. + +The exported SavedModel expects dict inputs and outputs as follows, implementing +a specialization of the respective +[Common SavedModel API](https://www.tensorflow.org/hub/common_saved_model_apis/text#transformer-encoders): + +```python +encoder = hub.load(...) +encoder_inputs = dict( + input_word_ids=..., # Shape [batch, seq_length], dtype=int32 + input_mask=..., # Shape [batch, seq_length], dtype=int32 + input_type_ids=..., # Shape [batch, seq_length], dtype=int32 +) +encoder_outputs = encoder(encoder_inputs) +assert encoder_outputs.keys() == { + "pooled_output", # Shape [batch_size, width], dtype=float32 + "default", # Alias for "pooled_output" (aligns with other models) + "sequence_output", # Shape [batch_size, seq_length, width], dtype=float32 + "encoder_outputs", # List of Tensors with outputs of all transformer layers +} +``` + +The encoder's pooler layer is restored from the `--model_checkpoint_path`. +However, unlike classic BERT, `BertPretrainerV2` does not train the pooler layer +of the encoder. You have three options to handle that: + +* Set flag `--copy_pooler_dense_to_encoder` to copy the pooling layer from the + `ClassificationHead` passed to `BertPretrainerV2` for the next sentence + prediction task. This mimicks classic BERT, but is not recommended for new + models (see next item). +* Leave flag `--copy_pooler_dense_to_encoder` unset and export the untrained, + randomly initialized pooling layer of the encoder. Folklore (as of 2020) has + it that an untrained pooler gets fine-tuned better than a pre-trained + pooler, so this is the default. +* Leave flag `--copy_pooler_dense_to_encoder` unset and perform your own + initialization of the pooling layer before export. For example, Google's + [BERT Experts](https://tfhub.dev/google/collections/experts/bert/1) + published in October 2020 initialize it to the identity map, reporting equal + gains if fine-tuning, and more predictable behavior if not. + +In any case, at this time, the export tool requires the encoder model to *have* +a `pooled_output`, whether trained or not. (This can be revised in the future.) + +The encoder model does not include any preprocessing logic, but for the benefit +of users who take preprocessing into their own hands, the relevant information +is attached from flags `--vocab_file` or `--sp_model_file`, resp., and +`--do_lower_case`, which need to be set in exactly the same way as for the +preprocessing model (see below). + +The root object of the exported SavedModel stores the resulting values as +attributes on the root object: + +```python +encoder = hub.load(...) +# Gets the filename of the respective tf.saved_model.Asset object. +if hasattr(encoder, "vocab_file"): + print("Wordpiece vocab at", encoder.vocab_file.asset_path.numpy()) +elif hasattr(encoder, "sp_model_file"): + print("SentencePiece model at", encoder.sp_model_file.asset_path.numpy()) +# Gets the value of a scalar bool tf.Variable. +print("...using do_lower_case =", encoder.do_lower_case.numpy()) +``` + +New users are encouraged to ignore these attributes and use the preprocessing +model instead. However, there are legacy users, and advanced users that require +access to the full vocab. + +### Exporting the Encoder with a Masked Language Model head + +To export an encoder and the masked language model it was trained with, first +read the preceding section about exporting just the encoder. All the +explanations there on setting the right flags apply here as well, up to the +following differences. + +The masked language model is added to the export by changing flag +`--export_type` from `model` to `model_with_mlm`, so the export command looks +like this: + +```shell +python official/nlp/tools/export_tfhub.py \ + --encoder_config_file=${BERT_DIR:?}/bert_encoder.yaml \ + --model_checkpoint_path=${BERT_DIR:?}/bert_model.ckpt \ + --vocab_file=${BERT_DIR:?}/vocab.txt \ + --export_type=model_with_mlm \ + --export_path=/tmp/bert_model +``` + +The `--model_checkpoint_path` refers to an object-based (TF2) checkpoint written +by +[BertPretrainerV2](https://github.com/tensorflow/models/search?q=BertPretrainerV2+filename%3Abert_pretrainer.py), +or any other checkpoint that can be restored to +`tf.train.Checkpoint(**BertPretrainerV2(...).checkpoint_items)` with the encoder +defined by the config flags. + +This is a more comprehensive requirement on the checkpoint than for +`--export_type=model`; not all Transformer encoders and not all pre-training +techniques can satisfy it. For example, +[ELECTRA](https://arxiv.org/abs/2003.10555) uses the BERT architecture but is +pre-trained without an MLM task. + +The root object of the exported SavedModel is called in the same way as above. +In addition, the SavedModel has an `mlm` subobject that can be called as follows +to output an `mlm_logits` tensor as well: + +```python +mlm_inputs = dict( + input_word_ids=..., # Shape [batch, seq_length], dtype=int32 + input_mask=..., # Shape [batch, seq_length], dtype=int32 + input_type_ids=..., # Shape [batch, seq_length], dtype=int32 + masked_lm_positions=..., # Shape [batch, num_predictions], dtype=int32 +) +mlm_outputs = encoder.mlm(mlm_inputs) +assert mlm_outputs.keys() == { + "pooled_output", # Shape [batch, width], dtype=float32 + "sequence_output", # Shape [batch, seq_length, width], dtype=float32 + "encoder_outputs", # List of Tensors with outputs of all transformer layers + "mlm_logits" # Shape [batch, num_predictions, vocab_size], dtype=float32 +} +``` + +The extra subobject imposes a moderate size overhead. + +### Exporting from a TF1 BERT checkpoint + +A BERT model trained with the +[original BERT implementation for TF1](https://github.com/google-research/bert) +can be exported after converting its checkpoint with the +[tf2_encoder_checkpoint_converter](https://github.com/tensorflow/models/blob/master/official/nlp/bert/tf2_encoder_checkpoint_converter.py) +tool. + +After that, run +[export_tfhub](https://github.com/tensorflow/models/blob/master/official/nlp/tools/export_tfhub.py) +per the instructions above on the converted checkpoint. Do not set +`--copy_pooler_dense_to_encoder`, because the pooler layer is part of the +converted encoder. For `--vocab_file` and `--do_lower_case`, the values from TF1 +BERT can be used verbatim. + +## Exporting the preprocessing model + +You can skip this step if TF Hub already has a preprocessing model that does +exactly what your encoder needs (same tokenizer, same vocab, same normalization +settings (`do_lower_case`)). You can inspect its collection of +[Transformer Encoders for Text](https://tfhub.dev/google/collections/transformer_encoders_text/1) +and click through to models with a similar input domain to find their +preprocessing models. + +To export the preprocessing model, set `--export_type=preprocessing` and run the +export tool like this: + +```shell +python official/nlp/tools/export_tfhub.py \ + --vocab_file=${BERT_DIR:?}/vocab.txt \ + --do_lower_case=True \ + --export_type=preprocessing \ + --export_path=/tmp/bert_preprocessing +``` + +Note: Set flag `--experimental_disable_assert_in_preprocessing` when exporting +to users of the public TensorFlow releases 2.4.x to avoid a fatal ops placement +issue when preprocessing is used within Dataset.map() on TPU workers. +This is not an issue with TF2.3 and TF2.5+. + +Flag `--vocab_file` specifies the vocab file used with +[BertTokenizer](https://github.com/tensorflow/models/search?q=BertTokenizer+filename%3Atext_layers.py). +For models that use the +[SentencepieceTokenizer](https://github.com/tensorflow/models/search?q=SentencepieceTokenizer+filename%3Atext_layers.py), +set flag `--sp_model_file` instead. + +The boolean flag `--do_lower_case` controls text normalization (as in the +respective tokenizer classes, so it's a bit more than just smashing case). If +unset, do_lower_case will be enabled if 'uncased' appears in --vocab_file, or +unconditionally if --sp_model_file is set, mimicking the conventions of BERT and +ALBERT, respectively. For programmatic use, or if in doubt, it's best to set +`--do_lower_case` explicity. + +If the definition of preprocessing involved +[GIN](https://github.com/google/gin-config), +the flags `--gin_file` and `--gin_params` would have to be set accordingly, +consistent with pre-training. (At the time of this writing, no such GIN +configurables exist in the code.) + +The exported SavedModel can be called in the following way for a single segment +input. + +```python +preprocessor = hub.load(...) +text_input = ... # Shape [batch_size], dtype=tf.string +encoder_inputs = preprocessor(text_input, seq_length=seq_length) +assert encoder_inputs.keys() == { + "input_word_ids", # Shape [batch_size, seq_length], dtype=int32 + "input_mask", # Shape [batch_size, seq_length], dtype=int32 + "input_type_ids" # Shape [batch_size, seq_length], dtype=int32 +} +``` + +Flag `--default_seq_length` controls the value of `seq_length` if that argument +is omitted in the usage example above. The flag defaults to 128, because +mutiples of 128 work best for Cloud TPUs, yet the cost of attention computation +grows quadratically with `seq_length`. + +Beyond this example, the exported SavedModel implements the full set interface +from the preprocessor API for text embeddings with preprocessed inputs and with +Transformer encoders from TF Hub's +[Common APIs for text](https://www.tensorflow.org/hub/common_saved_model_apis/text). + +Please see +[tfhub.dev/tensorflow/bert_en_uncased_preprocess](https://tfhub.dev/tensorflow/bert_en_uncased_preprocess) +for the full documentation of one preprocessing model exported with this tool, +especially how custom trimming of inputs can happen between `.tokenize` and +`.bert_pack_inputs`. + +Using the `encoder.mlm()` interface requires masking of tokenized inputs by user +code. The necessary information on the vocabulary encapsulated in the +preprocessing model can be obtained like this (uniformly across tokenizers): + +```python +special_tokens_dict = preprocess.tokenize.get_special_tokens_dict() +vocab_size = int(special_tokens_dict["vocab_size"]) +padding_id = int(special_tokens_dict["padding_id"]) # [PAD] or +start_of_sequence_id = int(special_tokens_dict["start_of_sequence_id"]) # [CLS] +end_of_segment_id = int(special_tokens_dict["end_of_segment_id"]) # [SEP] +mask_id = int(special_tokens_dict["mask_id"]) # [MASK] +``` + +## Testing the exported models + +Please test your SavedModels before publication by fine-tuning them on a +suitable task and comparing performance and accuracy to a baseline experiment +built from equivalent Python code. +The +[trainer doc](https://github.com/tensorflow/models/blob/master/official/nlp/docs/train.md) +has instructions how to run BERT on MNLI and other tasks from the GLUE +benchmark. diff --git a/official/nlp/docs/train.md b/official/nlp/docs/train.md new file mode 100644 index 0000000000000000000000000000000000000000..d2ad9d7622d64a51cda7ba21e60e91779d2c243c --- /dev/null +++ b/official/nlp/docs/train.md @@ -0,0 +1,181 @@ +# Model Garden NLP Common Training Driver + +[train.py](https://github.com/tensorflow/models/blob/master/official/nlp/train.py) is the common training driver that supports multiple +NLP tasks (e.g., pre-training, GLUE and SQuAD fine-tuning etc) and multiple +models (e.g., BERT, ALBERT, MobileBERT etc). + +## Experiment Configuration + +[train.py] is driven by configs defined by the [ExperimentConfig](https://github.com/tensorflow/models/blob/master/official/core/config_definitions.py) +including configurations for `task`, `trainer` and `runtime`. The pre-defined +NLP related [ExperimentConfig](https://github.com/tensorflow/models/blob/master/official/core/config_definitions.py) can be found in +[configs/experiment_configs.py](https://github.com/tensorflow/models/blob/master/official/nlp/configs/experiment_configs.py). + +## Experiment Registry + +We use an [experiment registry](https://github.com/tensorflow/models/blob/master/official/core/exp_factory.py) to build a mapping +between experiment type to experiment configuration instance. For example, +[configs/finetuning_experiments.py](https://github.com/tensorflow/models/blob/master/official/nlp/configs/finetuning_experiments.py) +registers `bert/sentence_prediction` and `bert/squad` experiments. User can use +`--experiment` FLAG to invoke a registered experiment configuration, +e.g., `--experiment=bert/sentence_prediction`. + +## Overriding Configuration via Yaml and FLAGS + +The registered experiment configuration can be overridden by one or +multiple Yaml files provided by `--config_file` FLAG. For example: + +```shell +--config_file=configs/experiments/glue_mnli_matched.yaml \ +--config_file=configs/models/bert_en_uncased_base.yaml +``` + +In addition, experiment configuration can be further overriden by +`params_override` FLAG. For example: + +```shell + --params_override=task.train_data.input_path=/some/path,task.hub_module_url=/some/tfhub +``` + +## Run on Cloud TPUs + +Next, we will describe how to run the [train.py](https://github.com/tensorflow/models/blob/master/official/nlp/train.py) on Cloud TPUs. + +### Setup +First, you need to create a `tf-nightly` TPU with +[ctpu tool](https://github.com/tensorflow/tpu/tree/master/tools/ctpu): + +```shell +export TPU_NAME=YOUR_TPU_NAME +ctpu up -name $TPU_NAME --tf-version=nightly --tpu-size=YOUR_TPU_SIZE --project=YOUR_PROJECT +``` + +and then install Model Garden and required dependencies: + +```shell +git clone https://github.com/tensorflow/models.git +export PYTHONPATH=$PYTHONPATH:/path/to/models +pip3 install --user -r official/requirements.txt +``` + +### Fine-tuning Sentence Classification with BERT from TF-Hub + +This example fine-tunes BERT-base from TF-Hub on the the Multi-Genre Natural +Language Inference (MultiNLI) corpus using TPUs. + +Firstly, you can prepare the fine-tuning data using +[`create_finetuning_data.py`](https://github.com/tensorflow/models/blob/master/official/nlp/data/create_finetuning_data.py) script. +For GLUE tasks, you can (1) download the +[GLUE data](https://gluebenchmark.com/tasks) by running +[this script](https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e) +and unpack it to some directory `$GLUE_DIR`, (2) prepare the vocabulary file, +and (3) run the following command: + +```shell +export GLUE_DIR=~/glue +export VOCAB_FILE=~/uncased_L-12_H-768_A-12/vocab.txt + +export TASK_NAME=MNLI +export OUTPUT_DATA_DIR=gs://some_bucket/datasets +python3 data/create_finetuning_data.py \ + --input_data_dir=${GLUE_DIR}/${TASK_NAME}/ \ + --vocab_file=${VOCAB_FILE} \ + --train_data_output_path=${OUTPUT_DATA_DIR}/${TASK_NAME}_train.tf_record \ + --eval_data_output_path=${OUTPUT_DATA_DIR}/${TASK_NAME}_eval.tf_record \ + --meta_data_file_path=${OUTPUT_DATA_DIR}/${TASK_NAME}_meta_data \ + --fine_tuning_task_type=classification --max_seq_length=128 \ + --classification_task_name=${TASK_NAME} +``` + +Resulting training and evaluation datasets in `tf_record` format will be later +passed to [train.py](train.py). We will support to read dataset from +tensorflow_datasets (TFDS) and use tf.text for pre-processing soon. + +Then you can execute the following commands to start the training and evaluation +job. + +```shell +export INPUT_DATA_DIR=gs://some_bucket/datasets +export OUTPUT_DIR=gs://some_bucket/my_output_dir + +# See tfhub BERT collection for more tfhub models: +# https://tfhub.dev/google/collections/bert/1 +export BERT_HUB_URL=https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3 + +# Override the configurations by FLAGS. Alternatively, you can directly edit +# `configs/experiments/glue_mnli_matched.yaml` to specify corresponding fields. +export PARAMS=task.train_data.input_path=$INPUT_DATA_DIR/mnli_train.tf_record +export PARAMS=$PARAMS,task.validation_data.input_path=$INPUT_DATA_DIR/mnli_eval.tf_record +export PARAMS=$PARAMS,task.hub_module_url=$BERT_HUB_URL +export PARAMS=$PARAMS,runtime.distribution_strategy=tpu + +python3 train.py \ + --experiment=bert/sentence_prediction \ + --mode=train_and_eval \ + --model_dir=$OUTPUT_DIR \ + --config_file=configs/experiments/glue_mnli_matched.yaml \ + --tfhub_cache_dir=$OUTPUT_DIR/hub_cache \ + --tpu=${TPU_NAME} \ + --params_override=$PARAMS + +``` + +You can monitor the training progress in the console and find the output +models in `$OUTPUT_DIR`. + +### Fine-tuning SQuAD with a pre-trained BERT checkpoint + +This example fine-tunes a pre-trained BERT checkpoint on the +Stanford Question Answering Dataset (SQuAD) using TPUs. +The [SQuAD website](https://rajpurkar.github.io/SQuAD-explorer/) contains +detailed information about the SQuAD datasets and evaluation. After downloading +the SQuAD datasets and the [pre-trained BERT checkpoints](https://github.com/tensorflow/models/blob/master/official/nlp/docs/pretrained_models.md), +you can run the following command to prepare the `tf_record` files: + +```shell +export SQUAD_DIR=~/squad +export BERT_DIR=~/uncased_L-12_H-768_A-12 +export OUTPUT_DATA_DIR=gs://some_bucket/datasets + +python3 create_finetuning_data.py \ + --squad_data_file=${SQUAD_DIR}/train-v1.1.json \ + --vocab_file=${BERT_DIR}/vocab.txt \ + --train_data_output_path=${OUTPUT_DATA_DIR}/train.tf_record \ + --meta_data_file_path=${OUTPUT_DATA_DIR}/squad_meta_data \ + --fine_tuning_task_type=squad --max_seq_length=384 +``` + +Note: To create fine-tuning data with SQuAD 2.0, you need to add flag `--version_2_with_negative=True`. + +Then, you can start the training and evaluation jobs: + +```shell +export SQUAD_DIR=~/squad +export INPUT_DATA_DIR=gs://some_bucket/datasets +export OUTPUT_DIR=gs://some_bucket/my_output_dir + +# See the following link for more pre-trained checkpoints: +# https://github.com/tensorflow/models/blob/master/official/nlp/docs/pretrained_models.md +export BERT_DIR=~/uncased_L-12_H-768_A-12 + +# Override the configurations by FLAGS. Alternatively, you can directly edit +# `configs/experiments/squad_v1.1.yaml` to specify corresponding fields. +# Also note that the training data is the pre-processed tf_record file, while +# the validation file is the raw json file. +export PARAMS=task.train_data.input_path=$INPUT_DATA_DIR/train.tf_record +export PARAMS=$PARAMS,task.validation_data.input_path=$SQUAD_DIR/dev-v1.1.json +export PARAMS=$PARAMS,task.validation_data.vocab_file=$BERT_DIR/vocab.txt +export PARAMS=$PARAMS,task.init_checkpoint=$BERT_DIR/bert_model.ckpt +export PARAMS=$PARAMS,runtime.distribution_strategy=tpu + +python3 train.py \ + --experiment=bert/squad \ + --mode=train_and_eval \ + --model_dir=$OUTPUT_DIR \ + --config_file=configs/experiments/squad_v1.1.yaml \ + --tpu=${TPU_NAME} \ + --params_override=$PARAMS + +``` + +Note: More examples about pre-training will come soon. diff --git a/official/nlp/finetuning/binary_helper.py b/official/nlp/finetuning/binary_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..20fb822cd5106d931b635a0474ff431ddc0a08d6 --- /dev/null +++ b/official/nlp/finetuning/binary_helper.py @@ -0,0 +1,403 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The helper for finetuning binaries.""" +import json +import math +import sys +from typing import Any, Dict, List, Optional + +from absl import logging +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.modeling import hyperparams +from official.nlp.configs import encoders +from official.nlp.data import question_answering_dataloader +from official.nlp.data import sentence_prediction_dataloader +from official.nlp.data import tagging_dataloader +from official.nlp.tasks import question_answering +from official.nlp.tasks import sentence_prediction +from official.nlp.tasks import tagging + + +def override_trainer_cfg(trainer_cfg: cfg.TrainerConfig, learning_rate: float, + num_epoch: int, global_batch_size: int, + warmup_ratio: float, training_data_size: int, + eval_data_size: int, num_eval_per_epoch: int, + best_checkpoint_export_subdir: str, + best_checkpoint_eval_metric: str, + best_checkpoint_metric_comp: str): + """Overrides a `cfg.TrainerConfig` object.""" + steps_per_epoch = training_data_size // global_batch_size + train_steps = steps_per_epoch * num_epoch + # TODO(b/165081095): always set to -1 after the bug is resolved. + if eval_data_size: + eval_steps = int(math.ceil(eval_data_size / global_batch_size)) + else: + eval_steps = -1 # exhaust the validation data. + warmp_steps = int(train_steps * warmup_ratio) + validation_interval = steps_per_epoch // num_eval_per_epoch + trainer_cfg.override({ + 'optimizer_config': { + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'decay_steps': train_steps, + 'initial_learning_rate': learning_rate, + 'end_learning_rate': 0, + } + }, + 'optimizer': { + 'type': 'adamw', + }, + 'warmup': { + 'polynomial': { + 'warmup_steps': warmp_steps, + }, + 'type': 'polynomial', + }, + }, + 'train_steps': train_steps, + 'validation_interval': validation_interval, + 'validation_steps': eval_steps, + 'best_checkpoint_export_subdir': best_checkpoint_export_subdir, + 'best_checkpoint_eval_metric': best_checkpoint_eval_metric, + 'best_checkpoint_metric_comp': best_checkpoint_metric_comp, + }) + + +def load_model_config_file(model_config_file: str) -> Dict[str, Any]: + """Loads bert config json file or `encoders.EncoderConfig` in yaml file.""" + if not model_config_file: + # model_config_file may be empty when using tf.hub. + return {} + + try: + encoder_config = encoders.EncoderConfig() + encoder_config = hyperparams.override_params_dict( + encoder_config, model_config_file, is_strict=True) + logging.info('Load encoder_config yaml file from %s.', model_config_file) + return encoder_config.as_dict() + except KeyError: + pass + + logging.info('Load bert config json file from %s', model_config_file) + with tf.io.gfile.GFile(model_config_file, 'r') as reader: + text = reader.read() + config = json.loads(text) + + def get_value(key1, key2): + if key1 in config and key2 in config: + raise ValueError('Unexpected that both %s and %s are in config.' % + (key1, key2)) + + return config[key1] if key1 in config else config[key2] + + def get_value_or_none(key): + return config[key] if key in config else None + + # Support both legacy bert_config attributes and the new config attributes. + return { + 'bert': { + 'attention_dropout_rate': + get_value('attention_dropout_rate', + 'attention_probs_dropout_prob'), + 'dropout_rate': + get_value('dropout_rate', 'hidden_dropout_prob'), + 'hidden_activation': + get_value('hidden_activation', 'hidden_act'), + 'hidden_size': + config['hidden_size'], + 'embedding_size': + get_value_or_none('embedding_size'), + 'initializer_range': + config['initializer_range'], + 'intermediate_size': + config['intermediate_size'], + 'max_position_embeddings': + config['max_position_embeddings'], + 'num_attention_heads': + config['num_attention_heads'], + 'num_layers': + get_value('num_layers', 'num_hidden_layers'), + 'type_vocab_size': + config['type_vocab_size'], + 'vocab_size': + config['vocab_size'], + } + } + + +def override_sentence_prediction_task_config( + task_cfg: sentence_prediction.SentencePredictionConfig, + model_config_file: str, + init_checkpoint: str, + hub_module_url: str, + global_batch_size: int, + train_input_path: str, + validation_input_path: str, + seq_length: int, + num_classes: int, + metric_type: Optional[str] = 'accuracy', + label_type: Optional[str] = 'int'): + """Overrides a `SentencePredictionConfig` object.""" + task_cfg.override({ + 'init_checkpoint': init_checkpoint, + 'metric_type': metric_type, + 'model': { + 'num_classes': num_classes, + 'encoder': load_model_config_file(model_config_file), + }, + 'hub_module_url': hub_module_url, + 'train_data': { + 'drop_remainder': True, + 'global_batch_size': global_batch_size, + 'input_path': train_input_path, + 'is_training': True, + 'seq_length': seq_length, + 'label_type': label_type, + }, + 'validation_data': { + 'drop_remainder': False, + 'global_batch_size': global_batch_size, + 'input_path': validation_input_path, + 'is_training': False, + 'seq_length': seq_length, + 'label_type': label_type, + } + }) + + +def override_qa_task_config( + task_cfg: question_answering.QuestionAnsweringConfig, + model_config_file: str, init_checkpoint: str, hub_module_url: str, + global_batch_size: int, train_input_path: str, validation_input_path: str, + seq_length: int, tokenization: str, vocab_file: str, do_lower_case: bool, + version_2_with_negative: bool): + """Overrides a `QuestionAnsweringConfig` object.""" + task_cfg.override({ + 'init_checkpoint': init_checkpoint, + 'model': { + 'encoder': load_model_config_file(model_config_file), + }, + 'hub_module_url': hub_module_url, + 'train_data': { + 'drop_remainder': True, + 'global_batch_size': global_batch_size, + 'input_path': train_input_path, + 'is_training': True, + 'seq_length': seq_length, + }, + 'validation_data': { + 'do_lower_case': do_lower_case, + 'drop_remainder': False, + 'global_batch_size': global_batch_size, + 'input_path': validation_input_path, + 'is_training': False, + 'seq_length': seq_length, + 'tokenization': tokenization, + 'version_2_with_negative': version_2_with_negative, + 'vocab_file': vocab_file, + } + }) + + +def override_tagging_task_config(task_cfg: tagging.TaggingConfig, + model_config_file: str, init_checkpoint: str, + hub_module_url: str, global_batch_size: int, + train_input_path: str, + validation_input_path: str, seq_length: int, + class_names: List[str]): + """Overrides a `TaggingConfig` object.""" + task_cfg.override({ + 'init_checkpoint': init_checkpoint, + 'model': { + 'encoder': load_model_config_file(model_config_file), + }, + 'hub_module_url': hub_module_url, + 'train_data': { + 'drop_remainder': True, + 'global_batch_size': global_batch_size, + 'input_path': train_input_path, + 'is_training': True, + 'seq_length': seq_length, + }, + 'validation_data': { + 'drop_remainder': False, + 'global_batch_size': global_batch_size, + 'input_path': validation_input_path, + 'is_training': False, + 'seq_length': seq_length, + }, + 'class_names': class_names, + }) + + +def write_glue_classification(task, + model, + input_file, + output_file, + predict_batch_size, + seq_length, + class_names, + label_type='int', + min_float_value=None, + max_float_value=None): + """Makes classification predictions for glue and writes to output file. + + Args: + task: `Task` instance. + model: `keras.Model` instance. + input_file: Input test data file path. + output_file: Output test data file path. + predict_batch_size: Batch size for prediction. + seq_length: Input sequence length. + class_names: List of string class names. + label_type: String denoting label type ('int', 'float'), defaults to 'int'. + min_float_value: If set, predictions will be min-clipped to this value (only + for regression when `label_type` is set to 'float'). Defaults to `None` + (no clipping). + max_float_value: If set, predictions will be max-clipped to this value (only + for regression when `label_type` is set to 'float'). Defaults to `None` + (no clipping). + """ + if label_type not in ('int', 'float'): + raise ValueError('Unsupported `label_type`. Given: %s, expected `int` or ' + '`float`.' % label_type) + + data_config = sentence_prediction_dataloader.SentencePredictionDataConfig( + input_path=input_file, + global_batch_size=predict_batch_size, + is_training=False, + seq_length=seq_length, + label_type=label_type, + drop_remainder=False, + include_example_id=True) + predictions = sentence_prediction.predict(task, data_config, model) + + if label_type == 'float': + min_float_value = (-sys.float_info.max + if min_float_value is None else min_float_value) + max_float_value = ( + sys.float_info.max if max_float_value is None else max_float_value) + + # Clip predictions to range [min_float_value, max_float_value]. + predictions = [ + min(max(prediction, min_float_value), max_float_value) + for prediction in predictions + ] + + with tf.io.gfile.GFile(output_file, 'w') as writer: + writer.write('index\tprediction\n') + for index, prediction in enumerate(predictions): + if label_type == 'float': + # Regression. + writer.write('%d\t%.3f\n' % (index, prediction)) + else: + # Classification. + writer.write('%d\t%s\n' % (index, class_names[prediction])) + + +def write_xtreme_classification(task, + model, + input_file, + output_file, + predict_batch_size, + seq_length, + class_names, + translated_input_file=None, + test_time_aug_wgt=0.3): + """Makes classification predictions for xtreme and writes to output file.""" + data_config = sentence_prediction_dataloader.SentencePredictionDataConfig( + input_path=input_file, + seq_length=seq_length, + is_training=False, + label_type='int', + global_batch_size=predict_batch_size, + drop_remainder=False, + include_example_id=True) + if translated_input_file is not None: + data_config_aug = ( + sentence_prediction_dataloader.SentencePredictionDataConfig( + input_path=translated_input_file, + seq_length=seq_length, + is_training=False, + label_type='int', + global_batch_size=predict_batch_size, + drop_remainder=False, + include_example_id=True)) + else: + data_config_aug = None + predictions = sentence_prediction.predict(task, data_config, model, + data_config_aug, test_time_aug_wgt) + with tf.io.gfile.GFile(output_file, 'w') as writer: + for prediction in predictions: + writer.write('%s\n' % class_names[prediction]) + + +def write_question_answering(task, + model, + input_file, + output_file, + predict_batch_size, + seq_length, + tokenization, + vocab_file, + do_lower_case, + version_2_with_negative=False): + """Makes question answering predictions and writes to output file.""" + data_config = question_answering_dataloader.QADataConfig( + do_lower_case=do_lower_case, + doc_stride=128, + drop_remainder=False, + global_batch_size=predict_batch_size, + input_path=input_file, + is_training=False, + query_length=64, + seq_length=seq_length, + tokenization=tokenization, + version_2_with_negative=version_2_with_negative, + vocab_file=vocab_file) + all_predictions, _, _ = question_answering.predict(task, data_config, model) + with tf.io.gfile.GFile(output_file, 'w') as writer: + writer.write(json.dumps(all_predictions, indent=4) + '\n') + + +def write_tagging(task, model, input_file, output_file, predict_batch_size, + seq_length): + """Makes tagging predictions and writes to output file.""" + data_config = tagging_dataloader.TaggingDataConfig( + input_path=input_file, + is_training=False, + seq_length=seq_length, + global_batch_size=predict_batch_size, + drop_remainder=False, + include_sentence_id=True) + results = tagging.predict(task, data_config, model) + class_names = task.task_config.class_names + last_sentence_id = -1 + + with tf.io.gfile.GFile(output_file, 'w') as writer: + for sentence_id, _, predict_ids in results: + token_labels = [class_names[x] for x in predict_ids] + assert sentence_id == last_sentence_id or ( + sentence_id == last_sentence_id + 1) + + if sentence_id != last_sentence_id and last_sentence_id != -1: + writer.write('\n') + + writer.write('\n'.join(token_labels)) + writer.write('\n') + last_sentence_id = sentence_id diff --git a/official/nlp/finetuning/glue/flags.py b/official/nlp/finetuning/glue/flags.py new file mode 100644 index 0000000000000000000000000000000000000000..0f684fc916fb178cdaa542855ce3cffaa8627a9d --- /dev/null +++ b/official/nlp/finetuning/glue/flags.py @@ -0,0 +1,164 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common flags for GLUE finetuning binary.""" +from typing import Callable + +from absl import flags +from absl import logging + + +def define_flags(): + """Defines flags.""" + + # =========================================================================== + # Glue binary flags. + # =========================================================================== + flags.DEFINE_enum( + 'mode', 'train_eval_and_predict', + ['train_eval_and_predict', 'train_eval', 'predict'], + 'The mode to run the binary. If `train_eval_and_predict` ' + 'it will (1) train on the training data and (2) evaluate on ' + 'the validation data and (3) finally generate predictions ' + 'on the prediction data; if `train_eval`, it will only ' + 'run training and evaluation; if `predict`, it will only ' + 'run prediction using the model in `model_dir`.') + + flags.DEFINE_enum('task_name', None, [ + 'AX', 'COLA', 'MNLI', 'MRPC', 'QNLI', 'QQP', 'RTE', 'SST-2', 'STS-B', + 'WNLI' + ], 'The type of GLUE task.') + + flags.DEFINE_string('train_input_path', None, + 'The file path to the training data.') + + flags.DEFINE_string('validation_input_path', None, + 'The file path to the evaluation data.') + + flags.DEFINE_string('test_input_path', None, + 'The file path to the test input data.') + + flags.DEFINE_string('test_output_path', None, + 'The file path to the test output data.') + + flags.DEFINE_string('model_dir', '', 'The model directory containing ' + 'subdirectories for each task. Only needed for "predict" ' + 'mode. For all other modes, if not provided, a unique ' + 'directory will be created automatically for each run.') + + flags.DEFINE_string( + 'input_meta_data_path', None, 'Path to file that contains ' + 'metadata about input file. It is output by the `create_finetuning_data` ' + 'binary. Required for all modes except "predict".') + + flags.DEFINE_string('init_checkpoint', '', + 'Initial checkpoint from a pre-trained BERT model.') + + flags.DEFINE_string( + 'model_config_file', '', 'The config file specifying the architecture ' + 'of the pre-trained model. The file can be either a bert_config.json ' + 'file or `encoders.EncoderConfig` in yaml file.') + + flags.DEFINE_string( + 'hub_module_url', '', 'TF-Hub path/url to a pretrained model. If ' + 'specified, `init_checkpoint` and `model_config_file` flag should not be ' + 'used.') + + flags.DEFINE_multi_string('gin_file', None, + 'List of paths to the gin config files.') + + flags.DEFINE_multi_string('gin_params', None, + 'Newline separated list of gin parameter bindings.') + + flags.DEFINE_multi_string( + 'config_file', None, 'This is the advanced usage to specify the ' + '`ExperimentConfig` directly. When specified, ' + 'we will ignore FLAGS related to `ExperimentConfig` such as ' + '`train_input_path`, `validation_input_path` and following hparams.') + + # =========================================================================== + # Tuning hparams. + # =========================================================================== + flags.DEFINE_integer('global_batch_size', 32, + 'Global batch size for train/eval/predict.') + + flags.DEFINE_float('learning_rate', 3e-5, 'Initial learning rate.') + + flags.DEFINE_integer('num_epoch', 3, 'Number of training epochs.') + + flags.DEFINE_float('warmup_ratio', 0.1, + 'Proportion of learning rate warmup steps.') + + flags.DEFINE_integer('num_eval_per_epoch', 2, + 'Number of evaluations to run per epoch.') + + +def validate_flags(flags_obj: flags.FlagValues, + file_exists_fn: Callable[[str], bool]): + """Raises ValueError if any flags are misconfigured. + + Args: + flags_obj: A `flags.FlagValues` object, usually from `flags.FLAG`. + file_exists_fn: A callable to decide if a file path exists or not. + """ + + def _check_path_exists(flag_path, flag_name): + if not file_exists_fn(flag_path): + raise ValueError('Flag `%s` at %s does not exist.' % + (flag_name, flag_path)) + + def _validate_path(flag_path, flag_name): + if not flag_path: + raise ValueError('Flag `%s` must be provided in mode %s.' % + (flag_name, flags_obj.mode)) + _check_path_exists(flag_path, flag_name) + + if 'train' in flags_obj.mode: + _validate_path(flags_obj.train_input_path, 'train_input_path') + _validate_path(flags_obj.input_meta_data_path, 'input_meta_data_path') + + if flags_obj.gin_file: + for gin_file in flags_obj.gin_file: + _check_path_exists(gin_file, 'gin_file') + if flags_obj.config_file: + for config_file in flags_obj.config_file: + _check_path_exists(config_file, 'config_file') + + if 'eval' in flags_obj.mode: + _validate_path(flags_obj.validation_input_path, 'validation_input_path') + + if flags_obj.mode == 'predict': + # model_dir is only needed strictly in 'predict' mode. + _validate_path(flags_obj.model_dir, 'model_dir') + + if 'predict' in flags_obj.mode: + _validate_path(flags_obj.test_input_path, 'test_input_path') + + if not flags_obj.config_file and flags_obj.mode != 'predict': + if flags_obj.hub_module_url: + if flags_obj.init_checkpoint or flags_obj.model_config_file: + raise ValueError( + 'When `hub_module_url` is specified, `init_checkpoint` and ' + '`model_config_file` should be empty.') + logging.info( + 'Using the pretrained tf.hub from %s', flags_obj.hub_module_url) + else: + if not (flags_obj.init_checkpoint and flags_obj.model_config_file): + raise ValueError('Both `init_checkpoint` and `model_config_file` ' + 'should be specified if `config_file` is not ' + 'specified.') + _validate_path(flags_obj.model_config_file, 'model_config_file') + logging.info( + 'Using the pretrained checkpoint from %s and model_config_file from ' + '%s.', flags_obj.init_checkpoint, flags_obj.model_config_file) diff --git a/official/nlp/finetuning/glue/run_glue.py b/official/nlp/finetuning/glue/run_glue.py new file mode 100644 index 0000000000000000000000000000000000000000..aa1b047f3e6413e84e7f5882cbfcef26e3c2cad3 --- /dev/null +++ b/official/nlp/finetuning/glue/run_glue.py @@ -0,0 +1,264 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Runs prediction to generate submission files for GLUE tasks.""" +import functools +import json +import os +import pprint + +from absl import app +from absl import flags +from absl import logging + +import gin +import tensorflow as tf + +from official.common import distribute_utils +# Imports registered experiment configs. +from official.common import registry_imports # pylint: disable=unused-import +from official.core import exp_factory +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling.hyperparams import params_dict +from official.nlp.finetuning import binary_helper +from official.nlp.finetuning.glue import flags as glue_flags + + +# Device configs. +flags.DEFINE_string('distribution_strategy', 'tpu', + 'The Distribution Strategy to use for training.') +flags.DEFINE_string( + 'tpu', '', + 'The Cloud TPU to use for training. This should be either the name ' + 'used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.') +flags.DEFINE_integer('num_gpus', 1, 'The number of GPUs to use at each worker.') + +FLAGS = flags.FLAGS + +EXPERIMENT_TYPE = 'bert/sentence_prediction' +BEST_CHECKPOINT_EXPORT_SUBDIR = 'best_ckpt' + +EVAL_METRIC_MAP = { + 'AX': 'matthews_corrcoef', + 'COLA': 'matthews_corrcoef', + 'MNLI': 'cls_accuracy', + 'MRPC': 'cls_accuracy', + 'QNLI': 'cls_accuracy', + 'QQP': 'cls_accuracy', + 'RTE': 'cls_accuracy', + 'SST-2': 'cls_accuracy', + 'STS-B': 'pearson_spearman_corr', + 'WNLI': 'cls_accuracy', +} + +AX_CLASS_NAMES = ['contradiction', 'entailment', 'neutral'] +COLA_CLASS_NAMES = ['0', '1'] +MNLI_CLASS_NAMES = ['contradiction', 'entailment', 'neutral'] +MRPC_CLASS_NAMES = ['0', '1'] +QNLI_CLASS_NAMES = ['entailment', 'not_entailment'] +QQP_CLASS_NAMES = ['0', '1'] +RTE_CLASS_NAMES = ['entailment', 'not_entailment'] +SST_2_CLASS_NAMES = ['0', '1'] +WNLI_CLASS_NAMES = ['0', '1'] + + +def _override_exp_config_by_file(exp_config, exp_config_files): + """Overrides an `ExperimentConfig` object by files.""" + for exp_config_file in exp_config_files: + if not tf.io.gfile.exists(exp_config_file): + raise ValueError('%s does not exist.' % exp_config_file) + params_dict.override_params_dict( + exp_config, exp_config_file, is_strict=True) + + return exp_config + + +def _override_exp_config_by_flags(exp_config, input_meta_data): + """Overrides an `ExperimentConfig` object by flags.""" + if FLAGS.task_name in ('AX', 'COLA',): + override_task_cfg_fn = functools.partial( + binary_helper.override_sentence_prediction_task_config, + num_classes=input_meta_data['num_labels'], + metric_type='matthews_corrcoef') + elif FLAGS.task_name in ('MNLI', 'MRPC', 'QNLI', 'QQP', 'RTE', 'SST-2', + 'WNLI'): + override_task_cfg_fn = functools.partial( + binary_helper.override_sentence_prediction_task_config, + num_classes=input_meta_data['num_labels']) + elif FLAGS.task_name in ('STS-B',): + override_task_cfg_fn = functools.partial( + binary_helper.override_sentence_prediction_task_config, + num_classes=1, + metric_type='pearson_spearman_corr', + label_type='float') + else: + raise ValueError('Task %s not supported.' % FLAGS.task_name) + + binary_helper.override_trainer_cfg( + exp_config.trainer, + learning_rate=FLAGS.learning_rate, + num_epoch=FLAGS.num_epoch, + global_batch_size=FLAGS.global_batch_size, + warmup_ratio=FLAGS.warmup_ratio, + training_data_size=input_meta_data['train_data_size'], + eval_data_size=input_meta_data['eval_data_size'], + num_eval_per_epoch=FLAGS.num_eval_per_epoch, + best_checkpoint_export_subdir=BEST_CHECKPOINT_EXPORT_SUBDIR, + best_checkpoint_eval_metric=EVAL_METRIC_MAP[FLAGS.task_name], + best_checkpoint_metric_comp='higher') + + override_task_cfg_fn( + exp_config.task, + model_config_file=FLAGS.model_config_file, + init_checkpoint=FLAGS.init_checkpoint, + hub_module_url=FLAGS.hub_module_url, + global_batch_size=FLAGS.global_batch_size, + train_input_path=FLAGS.train_input_path, + validation_input_path=FLAGS.validation_input_path, + seq_length=input_meta_data['max_seq_length']) + return exp_config + + +def _get_exp_config(input_meta_data, exp_config_files): + """Gets an `ExperimentConfig` object.""" + exp_config = exp_factory.get_exp_config(EXPERIMENT_TYPE) + + if exp_config_files: + logging.info( + 'Loading `ExperimentConfig` from file, and flags will be ignored.') + exp_config = _override_exp_config_by_file(exp_config, exp_config_files) + else: + logging.info('Loading `ExperimentConfig` from flags.') + exp_config = _override_exp_config_by_flags(exp_config, input_meta_data) + + exp_config.validate() + exp_config.lock() + + pp = pprint.PrettyPrinter() + logging.info('Final experiment parameters: %s', + pp.pformat(exp_config.as_dict())) + + return exp_config + + +def _write_submission_file(task, seq_length): + """Writes submission files that can be uploaded to the leaderboard.""" + tf.io.gfile.makedirs(os.path.dirname(FLAGS.test_output_path)) + model = task.build_model() + + ckpt_file = tf.train.latest_checkpoint( + os.path.join(FLAGS.model_dir, BEST_CHECKPOINT_EXPORT_SUBDIR)) + logging.info('Restoring checkpoints from %s', ckpt_file) + checkpoint = tf.train.Checkpoint(model=model) + checkpoint.read(ckpt_file).expect_partial() + + write_fn = binary_helper.write_glue_classification + write_fn_map = { + 'AX': + functools.partial( + write_fn, class_names=AX_CLASS_NAMES), + 'COLA': + functools.partial( + write_fn, class_names=COLA_CLASS_NAMES), + 'MNLI': + functools.partial( + write_fn, class_names=MNLI_CLASS_NAMES), + 'MRPC': + functools.partial( + write_fn, class_names=MRPC_CLASS_NAMES), + 'QNLI': + functools.partial( + write_fn, class_names=QNLI_CLASS_NAMES), + 'QQP': + functools.partial( + write_fn, class_names=QQP_CLASS_NAMES), + 'RTE': + functools.partial( + write_fn, class_names=RTE_CLASS_NAMES), + 'SST-2': + functools.partial( + write_fn, class_names=SST_2_CLASS_NAMES), + 'STS-B': + # No class_names (regression), clip predictions to [0.0, 5.0] per glue + # benchmark grader. + functools.partial( + write_fn, class_names=None, label_type='float', + min_float_value=0.0, max_float_value=5.0), + 'WNLI': + functools.partial( + write_fn, class_names=WNLI_CLASS_NAMES), + } + logging.info('Predicting %s', FLAGS.test_input_path) + write_fn_map[FLAGS.task_name]( + task=task, + model=model, + input_file=FLAGS.test_input_path, + output_file=FLAGS.test_output_path, + predict_batch_size=( + task.task_config.train_data.global_batch_size), + seq_length=seq_length) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + glue_flags.validate_flags(FLAGS, file_exists_fn=tf.io.gfile.exists) + + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=FLAGS.distribution_strategy, + num_gpus=FLAGS.num_gpus, + tpu_address=FLAGS.tpu) + + with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader: + input_meta_data = json.loads(reader.read().decode('utf-8')) + + with distribution_strategy.scope(): + task = None + if 'train_eval' in FLAGS.mode: + logging.info('Starting training and eval...') + logging.info('Model dir: %s', FLAGS.model_dir) + + exp_config = _get_exp_config( + input_meta_data=input_meta_data, + exp_config_files=FLAGS.config_file) + train_utils.serialize_config(exp_config, FLAGS.model_dir) + task = task_factory.get_task(exp_config.task, logging_dir=FLAGS.model_dir) + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode='train_and_eval', + params=exp_config, + model_dir=FLAGS.model_dir) + + if 'predict' in FLAGS.mode: + logging.info('Starting predict...') + # When mode is `predict`, `task` will be None. + if task is None: + exp_config = _get_exp_config( + input_meta_data=input_meta_data, + exp_config_files=[os.path.join(FLAGS.model_dir, 'params.yaml')]) + task = task_factory.get_task( + exp_config.task, logging_dir=FLAGS.model_dir) + _write_submission_file(task, input_meta_data['max_seq_length']) + + +if __name__ == '__main__': + glue_flags.define_flags() + flags.mark_flag_as_required('mode') + flags.mark_flag_as_required('task_name') + app.run(main) diff --git a/official/nlp/keras_nlp/README.md b/official/nlp/keras_nlp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c5bbb13182d3baef1176ad71d02aa4ae61bbd34 --- /dev/null +++ b/official/nlp/keras_nlp/README.md @@ -0,0 +1,37 @@ +# keras-nlp + +## Layers + +Layers are the fundamental building blocks for NLP models. They can be used to +assemble new layers, networks, or models. + +* [TransformerEncoderBlock](layers/transformer_encoder_block.py) implements + an optionally masked transformer as described in + ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762). + +* [OnDeviceEmbedding](layers/on_device_embedding.py) implements efficient + embedding lookups designed for TPU-based models. + +* [PositionalEmbedding](layers/position_embedding.py) creates a positional + embedding as described in ["BERT: Pre-training of Deep Bidirectional + Transformers for Language Understanding"](https://arxiv.org/abs/1810.04805). + +* [SelfAttentionMask](layers/self_attention_mask.py) creates a 3D attention + mask from a 2D tensor mask. + +* [MaskedLM](layers/masked_lm.py) implements a masked language model. It + assumes the embedding table variable is passed to it. + + +## Encoders + +Encoders are combinations of layers (and possibly other encoders). They are +sub-units of models that would not be trained alone. It encapsulates common +network structures like a classification head or a transformer encoder into an +easily handled object with a standardized configuration. + +* [BertEncoder](encoders/bert_encoder.py) implements a bi-directional + Transformer-based encoder as described in + ["BERT: Pre-training of Deep Bidirectional Transformers for Language + Understanding"](https://arxiv.org/abs/1810.04805). It includes the embedding + lookups, transformer layers and pooling layer. diff --git a/official/nlp/keras_nlp/__init__.py b/official/nlp/keras_nlp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d3e84b1c23182207e3e04294fa54985e43692463 --- /dev/null +++ b/official/nlp/keras_nlp/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-NLP package definition.""" +# pylint: disable=wildcard-import +from official.nlp.keras_nlp import encoders +from official.nlp.keras_nlp import layers diff --git a/official/nlp/keras_nlp/contributing.md b/official/nlp/keras_nlp/contributing.md new file mode 100644 index 0000000000000000000000000000000000000000..b9ec1716d96621852afef06e96ee5f4e3523cd02 --- /dev/null +++ b/official/nlp/keras_nlp/contributing.md @@ -0,0 +1,21 @@ +## Contributing to KerasNLP + +Patches to KerasNLP are welcome! + +The source-of-truth repository lives under +[TF Model Garden NLP](https://github.com/tensorflow/models/tree/master/official/nlp/keras_nlp), +and is mirrored as a read-only repository under +[keras-team/keras-nlp](https://github.com/keras-team/keras-nlp). +Contributions should be made as PRs to the TF Model Garden repository. +This is to ensure the codebase is rigorously tested with state-of-art models +on different accelerators. +In the long run, we will move development to the current repository `keras-team/keras-nlp`. + +## :heavy_check_mark: Contributor checklist + +1. Ensure you have signed the [Contributor License Agreement](https://cla.developers.google.com/about/google-individual?csw=1). + * All code contributors are required to sign a Contributor License Agreement. + * Please read this [troubleshooting guide](Contributor-License-Agreements#troubleshooting-clas) + if you encounter an issue. +2. Please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute). +3. Check if your changes are consistent with the [TensorFlow coding style](https://www.tensorflow.org/community/contribute/code_style). diff --git a/official/nlp/keras_nlp/encoders/__init__.py b/official/nlp/keras_nlp/encoders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4fb045ee247d83e5dc84f1ca8732dcf25148047d --- /dev/null +++ b/official/nlp/keras_nlp/encoders/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-NLP layers package definition.""" +from official.nlp.keras_nlp.encoders.bert_encoder import BertEncoder diff --git a/official/nlp/keras_nlp/encoders/bert_encoder.py b/official/nlp/keras_nlp/encoders/bert_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..bd8ff3190a01c9620e4e7a01ea6b75b882ef6f8c --- /dev/null +++ b/official/nlp/keras_nlp/encoders/bert_encoder.py @@ -0,0 +1,262 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Bert encoder network.""" +# pylint: disable=g-classes-have-attributes + +import collections +from absl import logging +import tensorflow as tf + +from official.nlp.keras_nlp import layers + + +@tf.keras.utils.register_keras_serializable(package='keras_nlp') +class BertEncoder(tf.keras.Model): + """Bi-directional Transformer-based encoder network. + + This network implements a bi-directional Transformer-based encoder as + described in "BERT: Pre-training of Deep Bidirectional Transformers for + Language Understanding" (https://arxiv.org/abs/1810.04805). It includes the + embedding lookups and transformer layers, but not the masked language model + or classification task networks. + + The default values for this object are taken from the BERT-Base implementation + in "BERT: Pre-training of Deep Bidirectional Transformers for Language + Understanding". + + *Note* that the network is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: + vocab_size: The size of the token vocabulary. + hidden_size: The size of the transformer hidden layers. + num_layers: The number of transformer layers. + num_attention_heads: The number of attention heads for each transformer. The + hidden size must be divisible by the number of attention heads. + max_sequence_length: The maximum sequence length that this encoder can + consume. If None, max_sequence_length uses the value from sequence length. + This determines the variable shape for positional embeddings. + type_vocab_size: The number of types that the 'type_ids' input can take. + inner_dim: The output dimension of the first Dense layer in a two-layer + feedforward network for each transformer. + inner_activation: The activation for the first Dense layer in a two-layer + feedforward network for each transformer. + output_dropout: Dropout probability for the post-attention and output + dropout. + attention_dropout: The dropout rate to use for the attention layers + within the transformer layers. + initializer: The initialzer to use for all weights in this encoder. + output_range: The sequence output range, [0, output_range), by slicing the + target sequence of the last transformer layer. `None` means the entire + target sequence will attend to the source sequence, which yields the full + output. + embedding_width: The width of the word embeddings. If the embedding width is + not equal to hidden size, embedding parameters will be factorized into two + matrices in the shape of ['vocab_size', 'embedding_width'] and + ['embedding_width', 'hidden_size'] ('embedding_width' is usually much + smaller than 'hidden_size'). + embedding_layer: An optional Layer instance which will be called to + generate embeddings for the input word IDs. + """ + + def __init__( + self, + vocab_size, + hidden_size=768, + num_layers=12, + num_attention_heads=12, + max_sequence_length=512, + type_vocab_size=16, + inner_dim=3072, + inner_activation=lambda x: tf.keras.activations.gelu(x, approximate=True), + output_dropout=0.1, + attention_dropout=0.1, + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + output_range=None, + embedding_width=None, + embedding_layer=None, + **kwargs): + activation = tf.keras.activations.get(inner_activation) + initializer = tf.keras.initializers.get(initializer) + + word_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_word_ids') + mask = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_mask') + type_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_type_ids') + + if embedding_width is None: + embedding_width = hidden_size + + if embedding_layer is None: + embedding_layer_inst = layers.OnDeviceEmbedding( + vocab_size=vocab_size, + embedding_width=embedding_width, + initializer=initializer, + name='word_embeddings') + else: + embedding_layer_inst = embedding_layer + word_embeddings = embedding_layer_inst(word_ids) + + # Always uses dynamic slicing for simplicity. + position_embedding_layer = layers.PositionEmbedding( + initializer=initializer, + max_length=max_sequence_length, + name='position_embedding') + position_embeddings = position_embedding_layer(word_embeddings) + type_embedding_layer = layers.OnDeviceEmbedding( + vocab_size=type_vocab_size, + embedding_width=embedding_width, + initializer=initializer, + use_one_hot=True, + name='type_embeddings') + type_embeddings = type_embedding_layer(type_ids) + + embeddings = tf.keras.layers.Add()( + [word_embeddings, position_embeddings, type_embeddings]) + + embedding_norm_layer = tf.keras.layers.LayerNormalization( + name='embeddings/layer_norm', axis=-1, epsilon=1e-12, dtype=tf.float32) + + embeddings = embedding_norm_layer(embeddings) + embeddings = (tf.keras.layers.Dropout(rate=output_dropout)(embeddings)) + + # We project the 'embedding' output to 'hidden_size' if it is not already + # 'hidden_size'. + if embedding_width != hidden_size: + embedding_projection = tf.keras.layers.experimental.EinsumDense( + '...x,xy->...y', + output_shape=hidden_size, + bias_axes='y', + kernel_initializer=initializer, + name='embedding_projection') + embeddings = embedding_projection(embeddings) + else: + embedding_projection = None + + transformer_layers = [] + data = embeddings + attention_mask = layers.SelfAttentionMask()(data, mask) + encoder_outputs = [] + for i in range(num_layers): + if i == num_layers - 1 and output_range is not None: + transformer_output_range = output_range + else: + transformer_output_range = None + layer = layers.TransformerEncoderBlock( + num_attention_heads=num_attention_heads, + inner_dim=inner_dim, + inner_activation=inner_activation, + output_dropout=output_dropout, + attention_dropout=attention_dropout, + output_range=transformer_output_range, + kernel_initializer=initializer, + name='transformer/layer_%d' % i) + transformer_layers.append(layer) + data = layer([data, attention_mask]) + encoder_outputs.append(data) + + last_encoder_output = encoder_outputs[-1] + # Applying a tf.slice op (through subscript notation) to a Keras tensor + # like this will create a SliceOpLambda layer. This is better than a Lambda + # layer with Python code, because that is fundamentally less portable. + first_token_tensor = last_encoder_output[:, 0, :] + pooler_layer = tf.keras.layers.Dense( + units=hidden_size, + activation='tanh', + kernel_initializer=initializer, + name='pooler_transform') + cls_output = pooler_layer(first_token_tensor) + + outputs = dict( + sequence_output=encoder_outputs[-1], + pooled_output=cls_output, + encoder_outputs=encoder_outputs, + ) + + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. + super(BertEncoder, self).__init__( + inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs) + + config_dict = { + 'vocab_size': vocab_size, + 'hidden_size': hidden_size, + 'num_layers': num_layers, + 'num_attention_heads': num_attention_heads, + 'max_sequence_length': max_sequence_length, + 'type_vocab_size': type_vocab_size, + 'inner_dim': inner_dim, + 'inner_activation': tf.keras.activations.serialize(activation), + 'output_dropout': output_dropout, + 'attention_dropout': attention_dropout, + 'initializer': tf.keras.initializers.serialize(initializer), + 'output_range': output_range, + 'embedding_width': embedding_width, + 'embedding_layer': embedding_layer, + } + + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + self._pooler_layer = pooler_layer + self._transformer_layers = transformer_layers + self._embedding_norm_layer = embedding_norm_layer + self._embedding_layer = embedding_layer_inst + self._position_embedding_layer = position_embedding_layer + self._type_embedding_layer = type_embedding_layer + if embedding_projection is not None: + self._embedding_projection = embedding_projection + + def get_embedding_table(self): + return self._embedding_layer.embeddings + + def get_embedding_layer(self): + return self._embedding_layer + + def get_config(self): + return dict(self._config._asdict()) + + @property + def transformer_layers(self): + """List of Transformer layers in the encoder.""" + return self._transformer_layers + + @property + def pooler_layer(self): + """The pooler dense layer after the transformer layers.""" + return self._pooler_layer + + @classmethod + def from_config(cls, config, custom_objects=None): + if 'embedding_layer' in config and config['embedding_layer'] is not None: + warn_string = ( + 'You are reloading a model that was saved with a ' + 'potentially-shared embedding layer object. If you contine to ' + 'train this model, the embedding layer will no longer be shared. ' + 'To work around this, load the model outside of the Keras API.') + print('WARNING: ' + warn_string) + logging.warn(warn_string) + + return cls(**config) diff --git a/official/nlp/keras_nlp/encoders/bert_encoder_test.py b/official/nlp/keras_nlp/encoders/bert_encoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..4805749d7a8c6e3d8090ee229be286501573bb03 --- /dev/null +++ b/official/nlp/keras_nlp/encoders/bert_encoder_test.py @@ -0,0 +1,232 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for transformer-based bert encoder network.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.keras_nlp.encoders import bert_encoder + + +# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It +# guarantees forward compatibility of this code for the V2 switchover. +@keras_parameterized.run_all_keras_modes +class BertEncoderTest(keras_parameterized.TestCase): + + def tearDown(self): + super(BertEncoderTest, self).tearDown() + tf.keras.mixed_precision.set_global_policy("float32") + + def test_network_creation(self): + hidden_size = 32 + sequence_length = 21 + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + dict_outputs = test_network([word_ids, mask, type_ids]) + data = dict_outputs["sequence_output"] + pooled = dict_outputs["pooled_output"] + + self.assertIsInstance(test_network.transformer_layers, list) + self.assertLen(test_network.transformer_layers, 3) + self.assertIsInstance(test_network.pooler_layer, tf.keras.layers.Dense) + + expected_data_shape = [None, sequence_length, hidden_size] + expected_pooled_shape = [None, hidden_size] + self.assertAllEqual(expected_data_shape, data.shape.as_list()) + self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) + + # The default output dtype is float32. + self.assertAllEqual(tf.float32, data.dtype) + self.assertAllEqual(tf.float32, pooled.dtype) + + def test_all_encoder_outputs_network_creation(self): + hidden_size = 32 + sequence_length = 21 + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + dict_outputs = test_network([word_ids, mask, type_ids]) + all_encoder_outputs = dict_outputs["encoder_outputs"] + pooled = dict_outputs["pooled_output"] + + expected_data_shape = [None, sequence_length, hidden_size] + expected_pooled_shape = [None, hidden_size] + self.assertLen(all_encoder_outputs, 3) + for data in all_encoder_outputs: + self.assertAllEqual(expected_data_shape, data.shape.as_list()) + self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) + + # The default output dtype is float32. + self.assertAllEqual(tf.float32, all_encoder_outputs[-1].dtype) + self.assertAllEqual(tf.float32, pooled.dtype) + + def test_network_creation_with_float16_dtype(self): + hidden_size = 32 + sequence_length = 21 + tf.keras.mixed_precision.set_global_policy("mixed_float16") + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + dict_outputs = test_network([word_ids, mask, type_ids]) + data = dict_outputs["sequence_output"] + pooled = dict_outputs["pooled_output"] + + expected_data_shape = [None, sequence_length, hidden_size] + expected_pooled_shape = [None, hidden_size] + self.assertAllEqual(expected_data_shape, data.shape.as_list()) + self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) + + # If float_dtype is set to float16, the data output is float32 (from a layer + # norm) and pool output should be float16. + self.assertAllEqual(tf.float32, data.dtype) + self.assertAllEqual(tf.float16, pooled.dtype) + + @parameterized.named_parameters( + ("all_sequence", None, 21), + ("output_range", 1, 1), + ) + def test_network_invocation(self, output_range, out_seq_len): + hidden_size = 32 + sequence_length = 21 + vocab_size = 57 + num_types = 7 + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=vocab_size, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3, + type_vocab_size=num_types, + output_range=output_range) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + dict_outputs = test_network([word_ids, mask, type_ids]) + data = dict_outputs["sequence_output"] + pooled = dict_outputs["pooled_output"] + + # Create a model based off of this network: + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + + # Invoke the model. We can't validate the output data here (the model is too + # complex) but this will catch structural runtime errors. + batch_size = 3 + word_id_data = np.random.randint( + vocab_size, size=(batch_size, sequence_length)) + mask_data = np.random.randint(2, size=(batch_size, sequence_length)) + type_id_data = np.random.randint( + num_types, size=(batch_size, sequence_length)) + outputs = model.predict([word_id_data, mask_data, type_id_data]) + self.assertEqual(outputs[0].shape[1], out_seq_len) + + # Creates a BertEncoder with max_sequence_length != sequence_length + max_sequence_length = 128 + test_network = bert_encoder.BertEncoder( + vocab_size=vocab_size, + hidden_size=hidden_size, + max_sequence_length=max_sequence_length, + num_attention_heads=2, + num_layers=3, + type_vocab_size=num_types) + dict_outputs = test_network([word_ids, mask, type_ids]) + data = dict_outputs["sequence_output"] + pooled = dict_outputs["pooled_output"] + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + outputs = model.predict([word_id_data, mask_data, type_id_data]) + self.assertEqual(outputs[0].shape[1], sequence_length) + + # Creates a BertEncoder with embedding_width != hidden_size + test_network = bert_encoder.BertEncoder( + vocab_size=vocab_size, + hidden_size=hidden_size, + max_sequence_length=max_sequence_length, + num_attention_heads=2, + num_layers=3, + type_vocab_size=num_types, + embedding_width=16) + dict_outputs = test_network([word_ids, mask, type_ids]) + data = dict_outputs["sequence_output"] + pooled = dict_outputs["pooled_output"] + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + outputs = model.predict([word_id_data, mask_data, type_id_data]) + self.assertEqual(outputs[0].shape[-1], hidden_size) + self.assertTrue(hasattr(test_network, "_embedding_projection")) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + vocab_size=100, + hidden_size=32, + num_layers=3, + num_attention_heads=2, + max_sequence_length=21, + type_vocab_size=12, + inner_dim=1223, + inner_activation="relu", + output_dropout=0.05, + attention_dropout=0.22, + initializer="glorot_uniform", + output_range=-1, + embedding_width=16, + embedding_layer=None) + network = bert_encoder.BertEncoder(**kwargs) + expected_config = dict(kwargs) + expected_config["inner_activation"] = tf.keras.activations.serialize( + tf.keras.activations.get(expected_config["inner_activation"])) + expected_config["initializer"] = tf.keras.initializers.serialize( + tf.keras.initializers.get(expected_config["initializer"])) + self.assertEqual(network.get_config(), expected_config) + # Create another network object from the first object's config. + new_network = bert_encoder.BertEncoder.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + # Tests model saving/loading. + model_path = self.get_temp_dir() + "/model" + network.save(model_path) + _ = tf.keras.models.load_model(model_path) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/keras_nlp/layers/__init__.py b/official/nlp/keras_nlp/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9e1ad960b1186974e410460ea96d4a34c99ddf90 --- /dev/null +++ b/official/nlp/keras_nlp/layers/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-NLP layers package definition.""" +from official.nlp.keras_nlp.layers.masked_lm import MaskedLM +from official.nlp.keras_nlp.layers.on_device_embedding import OnDeviceEmbedding +from official.nlp.keras_nlp.layers.position_embedding import PositionEmbedding +from official.nlp.keras_nlp.layers.self_attention_mask import SelfAttentionMask +from official.nlp.keras_nlp.layers.transformer_encoder_block import TransformerEncoderBlock diff --git a/official/nlp/keras_nlp/layers/masked_lm.py b/official/nlp/keras_nlp/layers/masked_lm.py new file mode 100644 index 0000000000000000000000000000000000000000..a624169d45bf42fad0f1d8cd5c44f46dbd1ad74b --- /dev/null +++ b/official/nlp/keras_nlp/layers/masked_lm.py @@ -0,0 +1,123 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Masked language model network.""" +# pylint: disable=g-classes-have-attributes +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package='keras_nlp') +class MaskedLM(tf.keras.layers.Layer): + """Masked language model network head for BERT modeling. + + This layer implements a masked language model based on the provided + transformer based encoder. It assumes that the encoder network being passed + has a "get_embedding_table()" method. + + Example: + ```python + encoder=keras_nlp.BertEncoder(...) + lm_layer=MaskedLM(embedding_table=encoder.get_embedding_table()) + ``` + + Args: + embedding_table: The embedding table from encoder network. + activation: The activation, if any, for the dense layer. + initializer: The initializer for the dense layer. Defaults to a Glorot + uniform initializer. + output: The output style for this layer. Can be either 'logits' or + 'predictions'. + """ + + def __init__(self, + embedding_table, + activation=None, + initializer='glorot_uniform', + output='logits', + name=None, + **kwargs): + super(MaskedLM, self).__init__(name=name, **kwargs) + self.embedding_table = embedding_table + self.activation = activation + self.initializer = tf.keras.initializers.get(initializer) + + if output not in ('predictions', 'logits'): + raise ValueError( + ('Unknown `output` value "%s". `output` can be either "logits" or ' + '"predictions"') % output) + self._output_type = output + + def build(self, input_shape): + self._vocab_size, hidden_size = self.embedding_table.shape + self.dense = tf.keras.layers.Dense( + hidden_size, + activation=self.activation, + kernel_initializer=self.initializer, + name='transform/dense') + self.layer_norm = tf.keras.layers.LayerNormalization( + axis=-1, epsilon=1e-12, name='transform/LayerNorm') + self.bias = self.add_weight( + 'output_bias/bias', + shape=(self._vocab_size,), + initializer='zeros', + trainable=True) + + super(MaskedLM, self).build(input_shape) + + def call(self, sequence_data, masked_positions): + masked_lm_input = self._gather_indexes(sequence_data, masked_positions) + lm_data = self.dense(masked_lm_input) + lm_data = self.layer_norm(lm_data) + lm_data = tf.matmul(lm_data, self.embedding_table, transpose_b=True) + logits = tf.nn.bias_add(lm_data, self.bias) + masked_positions_length = masked_positions.shape.as_list()[1] or tf.shape( + masked_positions)[1] + logits = tf.reshape(logits, + [-1, masked_positions_length, self._vocab_size]) + if self._output_type == 'logits': + return logits + return tf.nn.log_softmax(logits) + + def get_config(self): + raise NotImplementedError('MaskedLM cannot be directly serialized because ' + 'it has variable sharing logic.') + + def _gather_indexes(self, sequence_tensor, positions): + """Gathers the vectors at the specific positions, for performance. + + Args: + sequence_tensor: Sequence output of shape + (`batch_size`, `seq_length`, num_hidden) where num_hidden is number of + hidden units. + positions: Positions ids of tokens in sequence to mask for pretraining + of with dimension (batch_size, num_predictions) where + `num_predictions` is maximum number of tokens to mask out and predict + per each sequence. + + Returns: + Masked out sequence tensor of shape (batch_size * num_predictions, + num_hidden). + """ + sequence_shape = tf.shape(sequence_tensor) + batch_size, seq_length = sequence_shape[0], sequence_shape[1] + width = sequence_tensor.shape.as_list()[2] or sequence_shape[2] + + flat_offsets = tf.reshape( + tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1]) + flat_positions = tf.reshape(positions + flat_offsets, [-1]) + flat_sequence_tensor = tf.reshape(sequence_tensor, + [batch_size * seq_length, width]) + output_tensor = tf.gather(flat_sequence_tensor, flat_positions) + + return output_tensor diff --git a/official/nlp/keras_nlp/layers/on_device_embedding.py b/official/nlp/keras_nlp/layers/on_device_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..51dab628fd3bd6716dba963cb4c93fb2eb03bcec --- /dev/null +++ b/official/nlp/keras_nlp/layers/on_device_embedding.py @@ -0,0 +1,106 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-based one-hot embedding layer.""" +# pylint: disable=g-classes-have-attributes + +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package="keras_nlp") +class OnDeviceEmbedding(tf.keras.layers.Layer): + """Performs an embedding lookup suitable for accelerator devices. + + This layer uses either tf.gather or tf.one_hot to translate integer indices to + float embeddings. + + Args: + vocab_size: Number of elements in the vocabulary. + embedding_width: Output size of the embedding layer. + initializer: The initializer to use for the embedding weights. Defaults to + "glorot_uniform". + use_one_hot: Whether to use tf.one_hot over tf.gather for the embedding + lookup. Defaults to False (that is, using tf.gather). Setting this option + to True may improve performance, especially on small vocabulary sizes, but + will generally require more memory. + scale_factor: Whether to scale the output embeddings. Defaults to None (that + is, not to scale). Setting this option to a float will let values in + output embeddings multiplied by scale_factor. + """ + + def __init__(self, + vocab_size, + embedding_width, + initializer="glorot_uniform", + use_one_hot=False, + scale_factor=None, + **kwargs): + + super(OnDeviceEmbedding, self).__init__(**kwargs) + self._vocab_size = vocab_size + self._embedding_width = embedding_width + self._initializer = initializer + self._use_one_hot = use_one_hot + self._scale_factor = scale_factor + + def get_config(self): + config = { + "vocab_size": self._vocab_size, + "embedding_width": self._embedding_width, + "initializer": self._initializer, + "use_one_hot": self._use_one_hot, + "scale_factor": self._scale_factor, + } + base_config = super(OnDeviceEmbedding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + self.embeddings = self.add_weight( + "embeddings", + shape=[self._vocab_size, self._embedding_width], + initializer=self._initializer, + dtype=tf.float32) + + super(OnDeviceEmbedding, self).build(input_shape) + + def call(self, inputs): + flat_inputs = tf.reshape(inputs, [-1]) + if self._use_one_hot: + dtype = self._compute_dtype + if not tf.dtypes.as_dtype(dtype).is_floating: + # TensorFlow 1 compatibility. In TF1, self._compute_dtype is int32 + # instead of a floating-point dtype, as the dtype is inferred from the + # dtype of the inputs + dtype = tf.float32 + one_hot_data = tf.one_hot( + flat_inputs, depth=self._vocab_size, dtype=dtype) + embeddings = tf.matmul(one_hot_data, self.embeddings) + else: + embeddings = tf.gather(self.embeddings, flat_inputs) + embeddings = tf.reshape( + embeddings, + # Work around b/142213824: prefer concat to shape over a Python list. + tf.concat([tf.shape(inputs), [self._embedding_width]], axis=0)) + embeddings.set_shape(inputs.shape.as_list() + [self._embedding_width]) + if self._scale_factor: + embeddings *= self._scale_factor + return embeddings + + @property + def vocab_size(self): + return self._vocab_size + + @property + def embedding_width(self): + return self._embedding_width diff --git a/official/nlp/modeling/layers/on_device_embedding_test.py b/official/nlp/keras_nlp/layers/on_device_embedding_test.py similarity index 86% rename from official/nlp/modeling/layers/on_device_embedding_test.py rename to official/nlp/keras_nlp/layers/on_device_embedding_test.py index e2b9b98f181470ea233d8297550a2dd92786baae..ed0057ed57aa10a9819c204029cd531ca94bacf3 100644 --- a/official/nlp/modeling/layers/on_device_embedding_test.py +++ b/official/nlp/keras_nlp/layers/on_device_embedding_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,18 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based one-hot embedding layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Keras-based one-hot embedding layer.""" import numpy as np import tensorflow as tf from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.nlp.modeling.layers import on_device_embedding +from official.nlp.keras_nlp.layers import on_device_embedding # This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It @@ -49,9 +45,9 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase): def test_layer_creation_with_mixed_precision(self): vocab_size = 31 embedding_width = 27 - policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16") test_layer = on_device_embedding.OnDeviceEmbedding( - vocab_size=vocab_size, embedding_width=embedding_width, dtype=policy) + vocab_size=vocab_size, embedding_width=embedding_width, + dtype="mixed_float16") # Create a 2-dimensional input (the first dimension is implicit). sequence_length = 23 input_tensor = tf.keras.Input(shape=(sequence_length), dtype=tf.int32) @@ -87,10 +83,9 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase): def test_layer_invocation_with_mixed_precision(self): vocab_size = 31 embedding_width = 27 - policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16") test_layer = on_device_embedding.OnDeviceEmbedding( vocab_size=vocab_size, embedding_width=embedding_width, - dtype=policy) + dtype="mixed_float16") # Create a 2-dimensional input (the first dimension is implicit). sequence_length = 23 input_tensor = tf.keras.Input(shape=(sequence_length), dtype=tf.int32) @@ -128,11 +123,10 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase): def test_one_hot_layer_creation_with_mixed_precision(self): vocab_size = 31 embedding_width = 27 - policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16") test_layer = on_device_embedding.OnDeviceEmbedding( vocab_size=vocab_size, embedding_width=embedding_width, - dtype=policy, + dtype="mixed_float16", use_one_hot=True) # Create a 2-dimensional input (the first dimension is implicit). sequence_length = 23 @@ -171,11 +165,10 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase): def test_one_hot_layer_invocation_with_mixed_precision(self): vocab_size = 31 embedding_width = 27 - policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16") test_layer = on_device_embedding.OnDeviceEmbedding( vocab_size=vocab_size, embedding_width=embedding_width, - dtype=policy, + dtype="mixed_float16", use_one_hot=True) # Create a 2-dimensional input (the first dimension is implicit). sequence_length = 23 @@ -193,6 +186,28 @@ class OnDeviceEmbeddingTest(keras_parameterized.TestCase): output = model.predict(input_data) self.assertEqual(tf.float16, output.dtype) + def test_use_scale_layer_invocation(self): + vocab_size = 31 + embedding_width = 27 + test_layer = on_device_embedding.OnDeviceEmbedding( + vocab_size=vocab_size, embedding_width=embedding_width, + scale_factor=embedding_width**0.5) + # Create a 2-dimensional input (the first dimension is implicit). + sequence_length = 23 + input_tensor = tf.keras.Input(shape=(sequence_length), dtype=tf.int32) + output_tensor = test_layer(input_tensor) + + # Create a model from the test layer. + model = tf.keras.Model(input_tensor, output_tensor) + + # Invoke the model on test data. We can't validate the output data itself + # (the NN is too complex) but this will rule out structural runtime errors. + batch_size = 3 + input_data = np.random.randint( + vocab_size, size=(batch_size, sequence_length)) + output = model.predict(input_data) + self.assertEqual(tf.float32, output.dtype) + if __name__ == "__main__": tf.test.main() diff --git a/official/nlp/keras_nlp/layers/position_embedding.py b/official/nlp/keras_nlp/layers/position_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..208a7674992b8aae21d3c3b68e3b8c7afcaa96f9 --- /dev/null +++ b/official/nlp/keras_nlp/layers/position_embedding.py @@ -0,0 +1,88 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-based positional embedding layer.""" +# pylint: disable=g-classes-have-attributes +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package="keras_nlp") +class PositionEmbedding(tf.keras.layers.Layer): + """Creates a positional embedding. + + Example: + ```python + position_embedding = PositionEmbedding(max_length=100) + inputs = tf.keras.Input((100, 32), dtype=tf.float32) + outputs = position_embedding(inputs) + ``` + + + Args: + max_length: The maximum size of the dynamic sequence. + initializer: The initializer to use for the embedding weights. Defaults to + "glorot_uniform". + + Reference: This layer creates a positional embedding as described in + [BERT: Pre-training of Deep Bidirectional Transformers for Language + Understanding](https://arxiv.org/abs/1810.04805). + """ + + def __init__(self, + max_length, + initializer="glorot_uniform", + **kwargs): + + super(PositionEmbedding, self).__init__(**kwargs) + if max_length is None: + raise ValueError( + "`max_length` must be an Integer, not `None`." + ) + self._max_length = max_length + self._initializer = tf.keras.initializers.get(initializer) + + def get_config(self): + config = { + "max_length": self._max_length, + "initializer": tf.keras.initializers.serialize(self._initializer), + } + base_config = super(PositionEmbedding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + dimension_list = input_shape.as_list() + + if len(dimension_list) != 3: + raise ValueError("PositionEmbedding expects a 3-dimensional input tensor " + "of shape [batch, sequence, width], got " + "{}".format(input_shape)) + seq_length = dimension_list[1] + width = dimension_list[2] + + if self._max_length is not None: + weight_sequence_length = self._max_length + else: + weight_sequence_length = seq_length + + self._position_embeddings = self.add_weight( + "embeddings", + shape=[weight_sequence_length, width], + initializer=self._initializer) + + super(PositionEmbedding, self).build(input_shape) + + def call(self, inputs): + input_shape = tf.shape(inputs) + position_embeddings = self._position_embeddings[:input_shape[1], :] + return tf.broadcast_to(position_embeddings, input_shape) diff --git a/official/nlp/keras_nlp/layers/position_embedding_test.py b/official/nlp/keras_nlp/layers/position_embedding_test.py new file mode 100644 index 0000000000000000000000000000000000000000..eaae808f095f0a9f59e8832651d384e492fb9494 --- /dev/null +++ b/official/nlp/keras_nlp/layers/position_embedding_test.py @@ -0,0 +1,101 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Keras-based positional embedding layer.""" + +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.keras_nlp.layers import position_embedding + + +# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It +# guarantees forward compatibility of this code for the V2 switchover. +@keras_parameterized.run_all_keras_modes +class PositionEmbeddingLayerTest(keras_parameterized.TestCase): + + def test_static_layer_output_shape(self): + # Create a 3-dimensional input (the first dimension is implicit). + sequence_length = 21 + test_layer = position_embedding.PositionEmbedding( + max_length=sequence_length) + width = 30 + input_tensor = tf.keras.Input(shape=(sequence_length, width)) + output_tensor = test_layer(input_tensor) + + # When using static positional embedding shapes, the output is expected + # to be the same as the input shape in all dimensions save batch. + expected_output_shape = [None, sequence_length, width] + self.assertEqual(expected_output_shape, output_tensor.shape.as_list()) + # The default output dtype for this layer should be tf.float32. + self.assertEqual(tf.float32, output_tensor.dtype) + + def test_float16_dtype(self): + # Create a 3-dimensional input (the first dimension is implicit). + sequence_length = 21 + test_layer = position_embedding.PositionEmbedding( + max_length=sequence_length, dtype="float16") + width = 30 + input_tensor = tf.keras.Input(shape=(sequence_length, width)) + output_tensor = test_layer(input_tensor) + + # When using static positional embedding shapes, the output is expected + # to be the same as the input shape in all dimensions save batch. + expected_output_shape = [None, sequence_length, width] + self.assertEqual(expected_output_shape, output_tensor.shape.as_list()) + # The default output dtype for this layer should be tf.float32. + self.assertEqual(tf.float16, output_tensor.dtype) + + def test_dynamic_layer_output_shape(self): + max_sequence_length = 40 + test_layer = position_embedding.PositionEmbedding( + max_length=max_sequence_length) + # Create a 3-dimensional input (the first dimension is implicit). + width = 30 + input_tensor = tf.keras.Input(shape=(None, width)) + output_tensor = test_layer(input_tensor) + + # When using dynamic positional embedding shapes, the output is expected + # to be the same as the input shape in all dimensions - but may be None if + # the input shape is None there. + expected_output_shape = [None, None, width] + self.assertEqual(expected_output_shape, output_tensor.shape.as_list()) + + def test_dynamic_layer_slicing(self): + max_sequence_length = 40 + test_layer = position_embedding.PositionEmbedding( + max_length=max_sequence_length) + # Create a 3-dimensional input (the first dimension is implicit). + width = 30 + input_tensor = tf.keras.Input(shape=(None, width)) + output_tensor = test_layer(input_tensor) + + model = tf.keras.Model(input_tensor, output_tensor) + + # Create input data that is shorter than max_sequence_length, which should + # trigger a down-slice. + input_length = 17 + # Note: This test explicitly uses a batch size of 1. This is to get around + # Keras' restriction on Model invocations: inputs are expected to have the + # same batch cardinality as outputs. In practice, this layer should be used + # inside a model, where it can be projected when added to another tensor. + input_data = np.ones((1, input_length, width)) + output_data = model.predict(input_data) + + self.assertAllEqual([1, input_length, width], output_data.shape) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/keras_nlp/layers/self_attention_mask.py b/official/nlp/keras_nlp/layers/self_attention_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..1c0033f71a7b6b9c115c5ed09b85ace0e27c0979 --- /dev/null +++ b/official/nlp/keras_nlp/layers/self_attention_mask.py @@ -0,0 +1,55 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras layer that creates a self-attention mask.""" + +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package='keras_nlp') +class SelfAttentionMask(tf.keras.layers.Layer): + """Create 3D attention mask from a 2D tensor mask. + + inputs[0]: from_tensor: 2D or 3D Tensor of shape + [batch_size, from_seq_length, ...]. + inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length]. + + Returns: + float Tensor of shape [batch_size, from_seq_length, to_seq_length]. + """ + + def call(self, inputs, to_mask): + from_shape = tf.shape(inputs) + batch_size = from_shape[0] + from_seq_length = from_shape[1] + + to_shape = tf.shape(to_mask) + to_seq_length = to_shape[1] + + to_mask = tf.cast( + tf.reshape(to_mask, [batch_size, 1, to_seq_length]), + dtype=inputs.dtype) + + # We don't assume that `from_tensor` is a mask (although it could be). We + # don't actually care if we attend *from* padding tokens (only *to* padding) + # tokens so we create a tensor of all ones. + # + # `broadcast_ones` = [batch_size, from_seq_length, 1] + broadcast_ones = tf.ones( + shape=[batch_size, from_seq_length, 1], dtype=inputs.dtype) + + # Here we broadcast along two dimensions to create the mask. + mask = broadcast_ones * to_mask + + return mask diff --git a/official/nlp/keras_nlp/layers/transformer_encoder_block.py b/official/nlp/keras_nlp/layers/transformer_encoder_block.py new file mode 100644 index 0000000000000000000000000000000000000000..04f35558a365443121020cf46c0eed710c1b58bc --- /dev/null +++ b/official/nlp/keras_nlp/layers/transformer_encoder_block.py @@ -0,0 +1,300 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-based TransformerEncoder block layer.""" + +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package="keras_nlp") +class TransformerEncoderBlock(tf.keras.layers.Layer): + """TransformerEncoderBlock layer. + + This layer implements the Transformer Encoder from + "Attention Is All You Need". (https://arxiv.org/abs/1706.03762), + which combines a `tf.keras.layers.MultiHeadAttention` layer with a + two-layer feedforward network. + + References: + [Attention Is All You Need](https://arxiv.org/abs/1706.03762) + [BERT: Pre-training of Deep Bidirectional Transformers for Language + Understanding](https://arxiv.org/abs/1810.04805) + """ + + def __init__(self, + num_attention_heads, + inner_dim, + inner_activation, + output_range=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + use_bias=True, + norm_first=False, + norm_epsilon=1e-12, + output_dropout=0.0, + attention_dropout=0.0, + inner_dropout=0.0, + attention_initializer=None, + **kwargs): + """Initializes `TransformerEncoderBlock`. + + Args: + num_attention_heads: Number of attention heads. + inner_dim: The output dimension of the first Dense layer in a two-layer + feedforward network. + inner_activation: The activation for the first Dense layer in a two-layer + feedforward network. + output_range: the sequence output range, [0, output_range) for slicing the + target sequence. `None` means the target sequence is not sliced. + kernel_initializer: Initializer for dense layer kernels. + bias_initializer: Initializer for dense layer biases. + kernel_regularizer: Regularizer for dense layer kernels. + bias_regularizer: Regularizer for dense layer biases. + activity_regularizer: Regularizer for dense layer activity. + kernel_constraint: Constraint for dense layer kernels. + bias_constraint: Constraint for dense layer kernels. + use_bias: Whether to enable use_bias in attention layer. If set False, + use_bias in attention layer is disabled. + norm_first: Whether to normalize inputs to attention and intermediate + dense layers. If set False, output of attention and intermediate dense + layers is normalized. + norm_epsilon: Epsilon value to initialize normalization layers. + output_dropout: Dropout probability for the post-attention and output + dropout. + attention_dropout: Dropout probability for within the attention layer. + inner_dropout: Dropout probability for the first Dense layer in a + two-layer feedforward network. + attention_initializer: Initializer for kernels of attention layers. If set + `None`, attention layers use kernel_initializer as initializer for + kernel. + **kwargs: keyword arguments/ + """ + super().__init__(**kwargs) + + self._num_heads = num_attention_heads + self._inner_dim = inner_dim + self._inner_activation = inner_activation + self._attention_dropout = attention_dropout + self._output_dropout = output_dropout + self._output_range = output_range + self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) + self._bias_initializer = tf.keras.initializers.get(bias_initializer) + self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) + self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) + self._activity_regularizer = tf.keras.regularizers.get(activity_regularizer) + self._kernel_constraint = tf.keras.constraints.get(kernel_constraint) + self._bias_constraint = tf.keras.constraints.get(bias_constraint) + self._use_bias = use_bias + self._norm_first = norm_first + self._norm_epsilon = norm_epsilon + self._inner_dropout = inner_dropout + if attention_initializer: + self._attention_initializer = tf.keras.initializers.get( + attention_initializer) + else: + self._attention_initializer = self._kernel_initializer + + def build(self, input_shape): + if isinstance(input_shape, tf.TensorShape): + input_tensor_shape = input_shape + elif isinstance(input_shape, (list, tuple)): + input_tensor_shape = tf.TensorShape(input_shape[0]) + else: + raise ValueError( + "The type of input shape argument is not supported, got: %s" % + type(input_shape)) + if len(input_tensor_shape.as_list()) != 3: + raise ValueError("TransformerEncoderBlock expects a three-dimensional " + "input of shape [batch, sequence, width].") + hidden_size = input_tensor_shape[-1] + if hidden_size % self._num_heads != 0: + raise ValueError( + "The input size (%d) is not a multiple of the number of attention " + "heads (%d)" % (hidden_size, self._num_heads)) + self._attention_head_size = int(hidden_size // self._num_heads) + common_kwargs = dict( + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activity_regularizer=self._activity_regularizer, + kernel_constraint=self._kernel_constraint, + bias_constraint=self._bias_constraint) + self._attention_layer = tf.keras.layers.MultiHeadAttention( + num_heads=self._num_heads, + key_dim=self._attention_head_size, + dropout=self._attention_dropout, + use_bias=self._use_bias, + kernel_initializer=self._attention_initializer, + name="self_attention", + **common_kwargs) + self._attention_dropout = tf.keras.layers.Dropout(rate=self._output_dropout) + # Use float32 in layernorm for numeric stability. + # It is probably safe in mixed_float16, but we haven't validated this yet. + self._attention_layer_norm = ( + tf.keras.layers.LayerNormalization( + name="self_attention_layer_norm", + axis=-1, + epsilon=self._norm_epsilon, + dtype=tf.float32)) + self._intermediate_dense = tf.keras.layers.experimental.EinsumDense( + "abc,cd->abd", + output_shape=(None, self._inner_dim), + bias_axes="d", + kernel_initializer=self._kernel_initializer, + name="intermediate", + **common_kwargs) + policy = tf.keras.mixed_precision.global_policy() + if policy.name == "mixed_bfloat16": + # bfloat16 causes BERT with the LAMB optimizer to not converge + # as well, so we use float32. + # TODO(b/154538392): Investigate this. + policy = tf.float32 + self._intermediate_activation_layer = tf.keras.layers.Activation( + self._inner_activation, dtype=policy) + self._inner_dropout_layer = tf.keras.layers.Dropout( + rate=self._inner_dropout) + self._output_dense = tf.keras.layers.experimental.EinsumDense( + "abc,cd->abd", + output_shape=(None, hidden_size), + bias_axes="d", + name="output", + kernel_initializer=self._kernel_initializer, + **common_kwargs) + self._output_dropout = tf.keras.layers.Dropout(rate=self._output_dropout) + # Use float32 in layernorm for numeric stability. + self._output_layer_norm = tf.keras.layers.LayerNormalization( + name="output_layer_norm", + axis=-1, + epsilon=self._norm_epsilon, + dtype=tf.float32) + + super(TransformerEncoderBlock, self).build(input_shape) + + def get_config(self): + config = { + "num_attention_heads": + self._num_heads, + "inner_dim": + self._inner_dim, + "inner_activation": + self._inner_activation, + "output_dropout": + self._output_dropout, + "attention_dropout": + self._attention_dropout, + "output_range": + self._output_range, + "kernel_initializer": + tf.keras.initializers.serialize(self._kernel_initializer), + "bias_initializer": + tf.keras.initializers.serialize(self._bias_initializer), + "kernel_regularizer": + tf.keras.regularizers.serialize(self._kernel_regularizer), + "bias_regularizer": + tf.keras.regularizers.serialize(self._bias_regularizer), + "activity_regularizer": + tf.keras.regularizers.serialize(self._activity_regularizer), + "kernel_constraint": + tf.keras.constraints.serialize(self._kernel_constraint), + "bias_constraint": + tf.keras.constraints.serialize(self._bias_constraint), + "use_bias": + self._use_bias, + "norm_first": + self._norm_first, + "norm_epsilon": + self._norm_epsilon, + "inner_dropout": + self._inner_dropout, + "attention_initializer": + tf.keras.initializers.serialize(self._attention_initializer) + } + base_config = super(TransformerEncoderBlock, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs): + """Transformer self-attention encoder block call. + + Args: + inputs: a single tensor or a list of tensors. + `input tensor` as the single sequence of embeddings. + [`input tensor`, `attention mask`] to have the additional attention + mask. + [`query tensor`, `key value tensor`, `attention mask`] to have separate + input streams for the query, and key/value to the multi-head + attention. + + Returns: + An ouput tensor with the same dimensions as input/query tensor. + """ + if isinstance(inputs, (list, tuple)): + if len(inputs) == 2: + input_tensor, attention_mask = inputs + key_value = None + elif len(inputs) == 3: + input_tensor, key_value, attention_mask = inputs + else: + raise ValueError("Unexpected inputs to %s with length at %d" % + (self.__class__, len(inputs))) + else: + input_tensor, key_value, attention_mask = (inputs, None, None) + + if self._output_range: + if self._norm_first: + source_tensor = input_tensor[:, 0:self._output_range, :] + input_tensor = self._attention_layer_norm(input_tensor) + if key_value is not None: + key_value = self._attention_layer_norm(key_value) + target_tensor = input_tensor[:, 0:self._output_range, :] + if attention_mask is not None: + attention_mask = attention_mask[:, 0:self._output_range, :] + else: + if self._norm_first: + source_tensor = input_tensor + input_tensor = self._attention_layer_norm(input_tensor) + if key_value is not None: + key_value = self._attention_layer_norm(key_value) + target_tensor = input_tensor + + if key_value is None: + key_value = input_tensor + attention_output = self._attention_layer( + query=target_tensor, value=key_value, attention_mask=attention_mask) + attention_output = self._attention_dropout(attention_output) + if self._norm_first: + attention_output = source_tensor + attention_output + else: + attention_output = self._attention_layer_norm(target_tensor + + attention_output) + if self._norm_first: + source_attention_output = attention_output + attention_output = self._output_layer_norm(attention_output) + inner_output = self._intermediate_dense(attention_output) + inner_output = self._intermediate_activation_layer(inner_output) + inner_output = self._inner_dropout_layer(inner_output) + layer_output = self._output_dense(inner_output) + layer_output = self._output_dropout(layer_output) + + if self._norm_first: + return source_attention_output + layer_output + + # During mixed precision training, layer norm output is always fp32 for now. + # Casts fp32 for the subsequent add. + layer_output = tf.cast(layer_output, tf.float32) + return self._output_layer_norm(layer_output + attention_output) diff --git a/official/nlp/keras_nlp/layers/transformer_encoder_block_test.py b/official/nlp/keras_nlp/layers/transformer_encoder_block_test.py new file mode 100644 index 0000000000000000000000000000000000000000..65447a32e499dcc10549ba288ce8a3a8b106436b --- /dev/null +++ b/official/nlp/keras_nlp/layers/transformer_encoder_block_test.py @@ -0,0 +1,301 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Keras-based transformer block layer.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.keras_nlp.layers.transformer_encoder_block import TransformerEncoderBlock + + +@keras_parameterized.run_all_keras_modes +@parameterized.named_parameters( + ('base', TransformerEncoderBlock)) +class TransformerEncoderBlockLayerTest(keras_parameterized.TestCase): + + def tearDown(self): + super(TransformerEncoderBlockLayerTest, self).tearDown() + tf.keras.mixed_precision.set_global_policy('float32') + + def test_layer_creation(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, inner_activation='relu') + sequence_length = 21 + width = 80 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + output_tensor = test_layer(data_tensor) + # The default output of a transformer layer should be the same as the input. + self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) + + def test_layer_creation_with_mask(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, inner_activation='relu') + sequence_length = 21 + width = 80 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + # Create a 2-dimensional input (the first dimension is implicit). + mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) + output_tensor = test_layer([data_tensor, mask_tensor]) + # The default output of a transformer layer should be the same as the input. + self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) + + def test_layer_invocation(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, inner_activation='relu') + sequence_length = 21 + width = 80 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + output_tensor = test_layer(data_tensor) + + # Create a model from the test layer. + model = tf.keras.Model(data_tensor, output_tensor) + + # Invoke the model on test data. We can't validate the output data itself + # (the NN is too complex) but this will rule out structural runtime errors. + batch_size = 6 + input_data = 10 * np.random.random_sample( + (batch_size, sequence_length, width)) + _ = model.predict(input_data) + + def test_layer_invocation_with_mask(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, inner_activation='relu') + sequence_length = 21 + width = 80 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + # Create a 2-dimensional input (the first dimension is implicit). + mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) + output_tensor = test_layer([data_tensor, mask_tensor]) + + # Create a model from the test layer. + model = tf.keras.Model([data_tensor, mask_tensor], output_tensor) + + # Invoke the model on test data. We can't validate the output data itself + # (the NN is too complex) but this will rule out structural runtime errors. + batch_size = 6 + input_data = 10 * np.random.random_sample( + (batch_size, sequence_length, width)) + # The attention mask should be of shape (batch, from_seq_len, to_seq_len), + # which here is (batch, sequence_length, sequence_length) + mask_data = np.random.randint( + 2, size=(batch_size, sequence_length, sequence_length)) + _ = model.predict([input_data, mask_data]) + + def test_layer_output_range(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, inner_activation='relu') + sequence_length = 21 + width = 80 + + batch_size = 6 + input_data = 10 * np.random.random_sample( + (batch_size, sequence_length, width)) + mask_data = np.random.randint( + 2, size=(batch_size, sequence_length, sequence_length)) + output_tensor = test_layer([input_data, mask_data]) + + # The layer only attends to the first token and outputs the first token + # embedding. + new_layer = transformer_cls( + num_attention_heads=10, + inner_dim=2048, + inner_activation='relu', + output_range=1) + _ = new_layer([input_data, mask_data]) + new_layer.set_weights(test_layer.get_weights()) + new_output_tensor = new_layer([input_data, mask_data]) + self.assertAllClose( + new_output_tensor, output_tensor[:, 0:1, :], atol=5e-5, rtol=0.003) + + def test_layer_output_range_without_mask(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, + inner_activation='relu', norm_first=True) + sequence_length = 21 + width = 80 + + batch_size = 6 + input_data = 10 * np.random.random_sample( + (batch_size, sequence_length, width)) + output_tensor = test_layer(input_data) + + # The layer only attends to the first token and outputs the first token + # embedding. + new_layer = transformer_cls( + num_attention_heads=10, + inner_dim=2048, + inner_activation='relu', + output_range=1, + norm_first=True) + _ = new_layer(input_data) + new_layer.set_weights(test_layer.get_weights()) + new_output_tensor = new_layer(input_data) + self.assertAllClose( + new_output_tensor, output_tensor[:, 0:1, :], atol=5e-5, rtol=0.003) + + def test_layer_output_range_with_pre_norm(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, + inner_activation='relu', norm_first=True) + sequence_length = 21 + width = 80 + + batch_size = 6 + input_data = 10 * np.random.random_sample( + (batch_size, sequence_length, width)) + mask_data = np.random.randint( + 2, size=(batch_size, sequence_length, sequence_length)) + output_tensor = test_layer([input_data, mask_data]) + + # The layer only attends to the first token and outputs the first token + # embedding. + new_layer = transformer_cls( + num_attention_heads=10, + inner_dim=2048, + inner_activation='relu', + output_range=1, + norm_first=True) + _ = new_layer([input_data, mask_data]) + new_layer.set_weights(test_layer.get_weights()) + new_output_tensor = new_layer([input_data, mask_data]) + self.assertAllClose( + new_output_tensor, output_tensor[:, 0:1, :], atol=5e-5, rtol=0.003) + + def test_layer_invocation_with_float16_dtype(self, transformer_cls): + tf.keras.mixed_precision.set_global_policy('mixed_float16') + test_layer = transformer_cls( + num_attention_heads=10, inner_dim=2048, inner_activation='relu') + sequence_length = 21 + width = 80 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + # Create a 2-dimensional input (the first dimension is implicit). + mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) + output_tensor = test_layer([data_tensor, mask_tensor]) + + # Create a model from the test layer. + model = tf.keras.Model([data_tensor, mask_tensor], output_tensor) + + # Invoke the model on test data. We can't validate the output data itself + # (the NN is too complex) but this will rule out structural runtime errors. + batch_size = 6 + input_data = (10 * np.random.random_sample( + (batch_size, sequence_length, width))) + # The attention mask should be of shape (batch, from_seq_len, to_seq_len), + # which here is (batch, sequence_length, sequence_length) + mask_data = np.random.randint( + 2, size=(batch_size, sequence_length, sequence_length)) + _ = model.predict([input_data, mask_data]) + + def test_transform_with_initializer(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, + inner_dim=2048, + inner_activation='relu', + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) + sequence_length = 21 + width = 80 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + output = test_layer(data_tensor) + # The default output of a transformer layer should be the same as the input. + self.assertEqual(data_tensor.shape.as_list(), output.shape.as_list()) + + def test_dynamic_layer_sequence(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=10, + inner_dim=2048, + inner_activation='relu', + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) + # Create a 3-dimensional input (the first dimension is implicit). + width = 30 + input_tensor = tf.keras.Input(shape=(None, width)) + output_tensor = test_layer(input_tensor) + model = tf.keras.Model(input_tensor, output_tensor) + + input_length = 17 + input_data = np.ones((1, input_length, width)) + output_data = model.predict(input_data) + + self.assertAllEqual([1, input_length, width], output_data.shape) + + def test_separate_qkv(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=2, + inner_dim=128, + inner_activation='relu', + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) + # Forward path. + q_tensor = tf.zeros([2, 4, 16], dtype=tf.float32) + kv_tensor = tf.zeros([2, 8, 16], dtype=tf.float32) + dummy_mask = tf.zeros([2, 4, 8], dtype=tf.float32) + inputs = [q_tensor, kv_tensor, dummy_mask] + output = test_layer(inputs) + self.assertEqual(output.shape, q_tensor.shape) + + +@keras_parameterized.run_all_keras_modes +class TransformerArgumentTest(keras_parameterized.TestCase): + + def test_use_bias_norm_first(self): + num_attention_heads = 2 + hidden_size = 16 + encoder_block = TransformerEncoderBlock( + num_attention_heads=num_attention_heads, + inner_dim=32, + inner_activation='relu', + output_dropout=0.1, + attention_dropout=0.1, + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + inner_dropout=0.1, + attention_initializer=tf.keras.initializers.RandomUniform( + minval=0., maxval=1.)) + # Forward path. + dummy_tensor = tf.zeros([2, 4, 16], dtype=tf.float32) + dummy_mask = tf.zeros([2, 4, 4], dtype=tf.float32) + inputs = [dummy_tensor, dummy_mask] + output = encoder_block(inputs) + self.assertEqual(output.shape, (2, 4, hidden_size)) + + def test_get_config(self): + num_attention_heads = 2 + encoder_block = TransformerEncoderBlock( + num_attention_heads=num_attention_heads, + inner_dim=32, + inner_activation='relu', + output_dropout=0.1, + attention_dropout=0.1, + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + inner_dropout=0.1, + attention_initializer=tf.keras.initializers.RandomUniform( + minval=0., maxval=1.)) + encoder_block_config = encoder_block.get_config() + new_encoder_block = TransformerEncoderBlock.from_config( + encoder_block_config) + self.assertEqual(encoder_block_config, new_encoder_block.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/keras_nlp/requirements.txt b/official/nlp/keras_nlp/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c765b1ead22ae537e3c3722665f07d00d1ba3a93 --- /dev/null +++ b/official/nlp/keras_nlp/requirements.txt @@ -0,0 +1 @@ +numpy>=1.15.4 diff --git a/official/nlp/keras_nlp/setup.py b/official/nlp/keras_nlp/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..0611a450ef2a4cada65cd9feb2be5f3f3dd6f7f4 --- /dev/null +++ b/official/nlp/keras_nlp/setup.py @@ -0,0 +1,69 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Setup script.""" + +import os + +from setuptools import find_packages +from setuptools import setup + +version = '0.0.1' + + +def _get_requirements(): + """Parses requirements.txt file.""" + install_requires_tmp = [] + dependency_links_tmp = [] + with open( + os.path.join(os.path.dirname(__file__), './requirements.txt'), 'r') as f: + for line in f: + package_name = line.strip() + # Skip empty line or comments starting with "#". + if not package_name or package_name[0] == '#': + continue + if package_name.startswith('-e '): + dependency_links_tmp.append(package_name[3:].strip()) + else: + install_requires_tmp.append(package_name) + return install_requires_tmp, dependency_links_tmp + +install_requires, dependency_links = _get_requirements() + +install_requires.append('tf-nightly') + +setup( + name='keras-nlp', + version=version, + description='Keras Natural Language Processing Library', + url='https://github.com/keras-team/keras-nlp', + author='The Keras authors', + author_email='keras-team@google.com', + license='Apache License 2.0', + install_requires=install_requires, + classifiers=[ + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.6', + 'Operating System :: Unix', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: MacOS', + 'Intended Audience :: Science/Research', + 'Topic :: Scientific/Engineering', + 'Topic :: Software Development' + ], + packages=find_packages(exclude=('tests',)), + exclude_package_data={'': ['*_test.py',],}, + dependency_links=dependency_links, + python_requires='>=3.6', +) diff --git a/official/nlp/metrics/__init__.py b/official/nlp/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/nlp/metrics/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/metrics/bleu.py b/official/nlp/metrics/bleu.py new file mode 100644 index 0000000000000000000000000000000000000000..ce92296ce54b91f7a89ae4a93c5e51d7e6b0834b --- /dev/null +++ b/official/nlp/metrics/bleu.py @@ -0,0 +1,185 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Script to compute official BLEU score. + +Source: +https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py +""" + +import collections +import math +import re +import sys +import unicodedata + +import numpy as np +import tensorflow as tf + + +class UnicodeRegex(object): + """Ad-hoc hack to recognize all punctuation and symbols.""" + + def __init__(self): + punctuation = self.property_chars("P") + self.nondigit_punct_re = re.compile(r"([^\d])([" + punctuation + r"])") + self.punct_nondigit_re = re.compile(r"([" + punctuation + r"])([^\d])") + self.symbol_re = re.compile("([" + self.property_chars("S") + "])") + + def property_chars(self, prefix): + return "".join( + chr(x) + for x in range(sys.maxunicode) + if unicodedata.category(chr(x)).startswith(prefix)) + + +uregex = UnicodeRegex() + + +def bleu_tokenize(string): + r"""Tokenize a string following the official BLEU implementation. + + See https://github.com/moses-smt/mosesdecoder/' + 'blob/master/scripts/generic/mteval-v14.pl#L954-L983 + In our case, the input string is expected to be just one line + and no HTML entities de-escaping is needed. + So we just tokenize on punctuation and symbols, + except when a punctuation is preceded and followed by a digit + (e.g. a comma/dot as a thousand/decimal separator). + + Note that a numer (e.g. a year) followed by a dot at the end of sentence + is NOT tokenized, + i.e. the dot stays with the number because `s/(\p{P})(\P{N})/ $1 $2/g` + does not match this case (unless we add a space after each sentence). + However, this error is already in the original mteval-v14.pl + and we want to be consistent with it. + + Args: + string: the input string + + Returns: + a list of tokens + """ + string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string) + string = uregex.punct_nondigit_re.sub(r" \1 \2", string) + string = uregex.symbol_re.sub(r" \1 ", string) + return string.split() + + +def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): + """Compute BLEU for two files (reference and hypothesis translation).""" + ref_lines = tf.io.gfile.GFile(ref_filename).read().strip().splitlines() + hyp_lines = tf.io.gfile.GFile(hyp_filename).read().strip().splitlines() + return bleu_on_list(ref_lines, hyp_lines, case_sensitive) + + +def _get_ngrams_with_counter(segment, max_order): + """Extracts all n-grams up to a given maximum order from an input segment. + + Args: + segment: text segment from which n-grams will be extracted. + max_order: maximum length in tokens of the n-grams returned by this + methods. + + Returns: + The Counter containing all n-grams upto max_order in segment + with a count of how many times each n-gram occurred. + """ + ngram_counts = collections.Counter() + for order in range(1, max_order + 1): + for i in range(0, len(segment) - order + 1): + ngram = tuple(segment[i:i + order]) + ngram_counts[ngram] += 1 + return ngram_counts + + +def compute_bleu(reference_corpus, translation_corpus, max_order=4, + use_bp=True): + """Computes BLEU score of translated segments against one or more references. + + Args: + reference_corpus: list of references for each translation. Each + reference should be tokenized into a list of tokens. + translation_corpus: list of translations to score. Each translation + should be tokenized into a list of tokens. + max_order: Maximum n-gram order to use when computing BLEU score. + use_bp: boolean, whether to apply brevity penalty. + + Returns: + BLEU score. + """ + reference_length = 0 + translation_length = 0 + bp = 1.0 + geo_mean = 0 + + matches_by_order = [0] * max_order + possible_matches_by_order = [0] * max_order + precisions = [] + + for (references, translations) in zip(reference_corpus, translation_corpus): + reference_length += len(references) + translation_length += len(translations) + ref_ngram_counts = _get_ngrams_with_counter(references, max_order) + translation_ngram_counts = _get_ngrams_with_counter(translations, max_order) + + overlap = dict((ngram, + min(count, translation_ngram_counts[ngram])) + for ngram, count in ref_ngram_counts.items()) + + for ngram in overlap: + matches_by_order[len(ngram) - 1] += overlap[ngram] + for ngram in translation_ngram_counts: + possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ + ngram] + + precisions = [0] * max_order + smooth = 1.0 + + for i in range(0, max_order): + if possible_matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] + if matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[ + i] + else: + smooth *= 2 + precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) + else: + precisions[i] = 0.0 + + if max(precisions) > 0: + p_log_sum = sum(math.log(p) for p in precisions if p) + geo_mean = math.exp(p_log_sum / max_order) + + if use_bp: + ratio = translation_length / reference_length + bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 + bleu = geo_mean * bp + return np.float32(bleu) + + +def bleu_on_list(ref_lines, hyp_lines, case_sensitive=False): + """Compute BLEU for two list of strings (reference and hypothesis).""" + if len(ref_lines) != len(hyp_lines): + raise ValueError( + "Reference and translation files have different number of " + "lines (%d VS %d). If training only a few steps (100-200), the " + "translation may be empty." % (len(ref_lines), len(hyp_lines))) + if not case_sensitive: + ref_lines = [x.lower() for x in ref_lines] + hyp_lines = [x.lower() for x in hyp_lines] + ref_tokens = [bleu_tokenize(x) for x in ref_lines] + hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] + return compute_bleu(ref_tokens, hyp_tokens) * 100 diff --git a/official/nlp/metrics/bleu_test.py b/official/nlp/metrics/bleu_test.py new file mode 100644 index 0000000000000000000000000000000000000000..e410ae80598a47ee660a56ae1ba8c73df20389c5 --- /dev/null +++ b/official/nlp/metrics/bleu_test.py @@ -0,0 +1,72 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test functions in compute_blue.py.""" + +import tempfile + +import tensorflow as tf + +from official.nlp.metrics import bleu + + +class ComputeBleuTest(tf.test.TestCase): + + def _create_temp_file(self, text): + temp_file = tempfile.NamedTemporaryFile(delete=False) + with tf.io.gfile.GFile(temp_file.name, "w") as w: + w.write(text) + return temp_file.name + + def test_bleu_same(self): + ref = self._create_temp_file("test 1 two 3\nmore tests!") + hyp = self._create_temp_file("test 1 two 3\nmore tests!") + + uncased_score = bleu.bleu_wrapper(ref, hyp, False) + cased_score = bleu.bleu_wrapper(ref, hyp, True) + self.assertEqual(100, uncased_score) + self.assertEqual(100, cased_score) + + def test_bleu_same_different_case(self): + ref = self._create_temp_file("Test 1 two 3\nmore tests!") + hyp = self._create_temp_file("test 1 two 3\nMore tests!") + uncased_score = bleu.bleu_wrapper(ref, hyp, False) + cased_score = bleu.bleu_wrapper(ref, hyp, True) + self.assertEqual(100, uncased_score) + self.assertLess(cased_score, 100) + + def test_bleu_different(self): + ref = self._create_temp_file("Testing\nmore tests!") + hyp = self._create_temp_file("Dog\nCat") + uncased_score = bleu.bleu_wrapper(ref, hyp, False) + cased_score = bleu.bleu_wrapper(ref, hyp, True) + self.assertLess(uncased_score, 100) + self.assertLess(cased_score, 100) + + def test_bleu_tokenize(self): + s = "Test0, 1 two, 3" + tokenized = bleu.bleu_tokenize(s) + self.assertEqual(["Test0", ",", "1", "two", ",", "3"], tokenized) + + def test_bleu_list(self): + ref = ["test 1 two 3", "more tests!"] + hyp = ["test 1 two 3", "More tests!"] + uncased_score = bleu.bleu_on_list(ref, hyp, False) + cased_score = bleu.bleu_on_list(ref, hyp, True) + self.assertEqual(uncased_score, 100) + self.assertLess(cased_score, 100) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/modeling/README.md b/official/nlp/modeling/README.md index 78f0cf239afb234740dc41a86d400ddd6c0240ec..99c7c361f9716b380a9287306558b872238afa7e 100644 --- a/official/nlp/modeling/README.md +++ b/official/nlp/modeling/README.md @@ -1,18 +1,21 @@ # NLP Modeling Library -This library provides a set of Keras primitives (Layers, Networks, and Models) -that can be assembled into transformer-based models. They are -flexible, validated, interoperable, and both TF1 and TF2 compatible. +This library provides a set of Keras primitives (`tf.keras.Layer` and +`tf.keras.Model`) that can be assembled into transformer-based models. +They are flexible, validated, interoperable, and both TF1 and TF2 compatible. * [`layers`](layers) are the fundamental building blocks for NLP models. -They can be used to assemble new layers, networks, or models. +They can be used to assemble new `tf.keras` layers or models. -* [`networks`](networks) are combinations of layers (and possibly other networks). They are sub-units of models that would not be trained alone. They -encapsulate common network structures like a classification head -or a transformer encoder into an easily handled object with a -standardized configuration. +* [`networks`](networks) are combinations of `tf.keras` layers (and possibly +other networks). They are `tf.keras` models that would not be trained alone. +It encapsulates common network structures like a transformer encoder into an +easily handled object with a standardized configuration. -* [`models`](models) are combinations of layers and networks that would be trained. Pre-built canned models are provided as both convenience functions and canonical examples. +* [`models`](models) are combinations of `tf.keras` layers and models that can +be trained. Several pre-built canned models are provided to train encoder +networks. These models are intended as both convenience functions and canonical +examples. * [`losses`](losses) contains common loss computation used in NLP tasks. @@ -22,7 +25,9 @@ Please see the colab for how to build transformer-based NLP models using above primitives. Besides the pre-defined primitives, it also provides scaffold classes to allow -easy experimentation with noval achitectures, e.g., you don’t need to fork a whole Transformer object to try a different kind of attention primitive, for instance. +easy experimentation with noval achitectures, e.g., you don’t need to fork a +whole Transformer object to try a different kind of attention primitive, +for instance. * [`TransformerScaffold`](layers/transformer_scaffold.py) implements the Transformer from ["Attention Is All You Need"] @@ -43,4 +48,5 @@ Please see the colab (https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb) for how to use scaffold classes to build noval achitectures. -BERT and ALBERT models in this repo are implemented using this library. Code examples can be found in the corresponding model folder. +BERT and ALBERT models in this repo are implemented using this library. +Code examples can be found in the corresponding model folder. diff --git a/official/nlp/modeling/__init__.py b/official/nlp/modeling/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..4800c66635834448a53116213975036eb88d9321 100644 --- a/official/nlp/modeling/__init__.py +++ b/official/nlp/modeling/__init__.py @@ -1 +1,24 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""NLP Modeling Library. + +This library provides a set of Keras primitives (`tf.keras.Layer` and +`tf.keras.Model`) that can be assembled into transformer-based models. +They are flexible, validated, interoperable, and both TF1 and TF2 compatible. +""" +from official.nlp.modeling import layers +from official.nlp.modeling import losses +from official.nlp.modeling import models +from official.nlp.modeling import networks diff --git a/official/nlp/modeling/layers/README.md b/official/nlp/modeling/layers/README.md index 1dcadc6af5d4fe5596aca4cfc0c5952d14d6df4b..89a846bb8561178fdebc572a2899419f73b52e30 100644 --- a/official/nlp/modeling/layers/README.md +++ b/official/nlp/modeling/layers/README.md @@ -1,7 +1,7 @@ # Layers Layers are the fundamental building blocks for NLP models. They can be used to -assemble new layers, networks, or models. +assemble new `tf.keras` layers or models. * [MultiHeadAttention](attention.py) implements an optionally masked attention between query, key, value tensors as described in @@ -11,6 +11,12 @@ assemble new layers, networks, or models. * [CachedAttention](attention.py) implements an attention layer with cache used for auto-agressive decoding. +* [MatMulWithMargin](mat_mul_with_margin.py) implements a matrix + multiplication with margin layer used for training retrieval / ranking + tasks, as described in ["Improving Multilingual Sentence Embedding using + Bi-directional Dual Encoder with Additive Margin + Softmax"](https://www.ijcai.org/Proceedings/2019/0746.pdf). + * [MultiChannelAttention](multi_channel_attention.py) implements an variant of multi-head attention which can be used to merge multiple streams for cross-attentions. @@ -23,9 +29,13 @@ assemble new layers, networks, or models. described in ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762). -* [TransformerDecoderLayer](transformer.py) TransformerDecoderLayer is made up - of self multi-head attention, cross multi-head attention and - feedforward network. +* [TransformerDecoderBlock](transformer.py) TransformerDecoderBlock is made up + of self multi-head attention, cross multi-head attention and feedforward + network. + +* [RandomFeatureGaussianProcess](gaussian_process.py) implements random + feature-based Gaussian process described in ["Random Features for + Large-Scale Kernel Machines"](https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf). * [ReZeroTransformer](rezero_transformer.py) implements Transformer with ReZero described in @@ -41,6 +51,11 @@ assemble new layers, networks, or models. * [SelfAttentionMask](self_attention_mask.py) creates a 3D attention mask from a 2D tensor mask. +* [SpectralNormalization](spectral_normalization.py) implements a tf.Wrapper + that applies spectral normalization regularization to a given layer. See + [Spectral Norm Regularization for Improving the Generalizability of + Deep Learning](https://arxiv.org/abs/1705.10941) + * [MaskedSoftmax](masked_softmax.py) implements a softmax with an optional masking input. If no mask is provided to this layer, it performs a standard softmax; however, if a mask tensor is applied (which should be 1 in @@ -54,6 +69,42 @@ assemble new layers, networks, or models. * [ClassificationHead](cls_head.py) A pooling head over a sequence of embeddings, commonly used by classification tasks. +* [GaussianProcessClassificationHead](cls_head.py) A spectral-normalized + neural Gaussian process (SNGP)-based classification head as described in + ["Simple and Principled Uncertainty Estimation with Deterministic Deep + Learning via Distance Awareness"](https://arxiv.org/abs/2006.10108). + * [GatedFeedforward](gated_feedforward.py) implements the gated linear layer feedforward as described in ["GLU Variants Improve Transformer"](https://arxiv.org/abs/2002.05202). + +* [MultiHeadRelativeAttention](relative_attention.py) implements a variant + of multi-head attention with support for relative position encodings as + described in "Transformer-XL: Attentive Language Models Beyond a + Fixed-Length Context"(https://arxiv.org/abs/1901.02860). This also has + extended support for segment-based attention, a re-parameterization + introduced in "XLNet: Generalized Autoregressive Pretraining for Language + Understanding" (https://arxiv.org/abs/1906.08237). + +* [TwoStreamRelativeAttention](relative_attention.py) implements a variant + of multi-head relative attention as described in "XLNet: Generalized + Autoregressive Pretraining for Language Understanding" + (https://arxiv.org/abs/1906.08237). This takes in a query and content + stream and applies self attention. + +* [TransformerXL](transformer_xl.py) implements Transformer XL introduced in + "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" + (https://arxiv.org/abs/1901.02860). This contains `TransformerXLBlock`, a + block containing either one or two stream relative self-attention as well as + subsequent feedforward networks. It also contains `TransformerXL`, which + contains attention biases as well as multiple `TransformerXLBlocks`. + +* [MobileBertEmbedding](mobile_bert_layers.py) and + [MobileBertTransformer](mobile_bert_layers.py) implement the embedding layer + and also transformer layer proposed in the + [MobileBERT paper](https://arxiv.org/pdf/2004.02984.pdf). + +* [BertPackInputs](text_layers.py) and + [BertTokenizer](text_layers.py) and [SentencepieceTokenizer](text_layers.py) + implements the layer to tokenize raw text and pack them into the inputs for + BERT models. diff --git a/official/nlp/modeling/layers/__init__.py b/official/nlp/modeling/layers/__init__.py index 2cd8e7b9e59ceab76e268f83907833eec32c73ce..afa4e2a979bcb120d1111e4a9682c0d8ae71fc50 100644 --- a/official/nlp/modeling/layers/__init__.py +++ b/official/nlp/modeling/layers/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,20 +11,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Layers package definition.""" + +"""Layers are the fundamental building blocks for NLP models. + +They can be used to assemble new `tf.keras` layers or models. +""" # pylint: disable=wildcard-import from official.nlp.modeling.layers.attention import * from official.nlp.modeling.layers.cls_head import * from official.nlp.modeling.layers.dense_einsum import DenseEinsum from official.nlp.modeling.layers.gated_feedforward import GatedFeedforward +from official.nlp.modeling.layers.gaussian_process import RandomFeatureGaussianProcess from official.nlp.modeling.layers.masked_lm import MaskedLM from official.nlp.modeling.layers.masked_softmax import MaskedSoftmax +from official.nlp.modeling.layers.mat_mul_with_margin import MatMulWithMargin +from official.nlp.modeling.layers.mobile_bert_layers import MobileBertEmbedding +from official.nlp.modeling.layers.mobile_bert_layers import MobileBertMaskedLM +from official.nlp.modeling.layers.mobile_bert_layers import MobileBertTransformer from official.nlp.modeling.layers.multi_channel_attention import * from official.nlp.modeling.layers.on_device_embedding import OnDeviceEmbedding -from official.nlp.modeling.layers.position_embedding import PositionEmbedding +from official.nlp.modeling.layers.position_embedding import RelativePositionBias +from official.nlp.modeling.layers.position_embedding import RelativePositionEmbedding +from official.nlp.modeling.layers.relative_attention import MultiHeadRelativeAttention +from official.nlp.modeling.layers.relative_attention import TwoStreamRelativeAttention from official.nlp.modeling.layers.rezero_transformer import ReZeroTransformer from official.nlp.modeling.layers.self_attention_mask import SelfAttentionMask +from official.nlp.modeling.layers.spectral_normalization import * from official.nlp.modeling.layers.talking_heads_attention import TalkingHeadsAttention +from official.nlp.modeling.layers.text_layers import BertPackInputs +from official.nlp.modeling.layers.text_layers import BertTokenizer +from official.nlp.modeling.layers.text_layers import SentencepieceTokenizer +from official.nlp.modeling.layers.tn_transformer_expand_condense import TNTransformerExpandCondense from official.nlp.modeling.layers.transformer import * from official.nlp.modeling.layers.transformer_scaffold import TransformerScaffold +from official.nlp.modeling.layers.transformer_xl import TransformerXL +from official.nlp.modeling.layers.transformer_xl import TransformerXLBlock diff --git a/official/nlp/modeling/layers/attention.py b/official/nlp/modeling/layers/attention.py index 99692b281794385a97af341d03dea0ee6c46b95b..9b13b89695d2e869b723270d5ac6ed929a3d1369 100644 --- a/official/nlp/modeling/layers/attention.py +++ b/official/nlp/modeling/layers/attention.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,481 +11,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based attention layer.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import collections import math -import string -import numpy as np import tensorflow as tf -from official.nlp.modeling.layers import masked_softmax - EinsumDense = tf.keras.layers.experimental.EinsumDense -_CHR_IDX = string.ascii_lowercase - - -def _build_attention_equation(qkv_rank, attn_axes): - """Builds einsum equations for the attention computation. - - Query, key, value inputs after projection are expected to have the shape as: - (bs, , , num_heads, channels). - bs and are treated as . - The attention operations can be generalized: - (1) Query-key dot product: - (, , num_heads, channels), (, - , num_heads, channels) -> (, - num_heads, , ) - (2) Combination: - (, num_heads, , ), - (, , num_heads, channels) -> (, - , num_heads, channels) - - Args: - qkv_rank: the rank of query, key, value tensors. - attn_axes: a list/tuple of axes, [1, rank), that will do attention. - - Returns: - Einsum equations. - """ - target_notation = _CHR_IDX[:qkv_rank] - # `batch_dims` includes the head dim. - batch_dims = tuple(np.delete(range(qkv_rank), attn_axes + (qkv_rank - 1,))) - letter_offset = qkv_rank - source_notation = "" - for i in range(qkv_rank): - if i in batch_dims or i == qkv_rank - 1: - source_notation += target_notation[i] - else: - source_notation += _CHR_IDX[letter_offset] - letter_offset += 1 - - product_notation = "".join([target_notation[i] for i in batch_dims] + - [target_notation[i] for i in attn_axes] + - [source_notation[i] for i in attn_axes]) - dot_product_equation = "%s,%s->%s" % (source_notation, target_notation, - product_notation) - attn_scores_rank = len(product_notation) - combine_equation = "%s,%s->%s" % (product_notation, source_notation, - target_notation) - return dot_product_equation, combine_equation, attn_scores_rank - - -def _build_proj_equation(free_dims, bound_dims, output_dims): - """Builds an einsum equation for projections inside multi-head attention.""" - input_str = "" - kernel_str = "" - output_str = "" - bias_axes = "" - letter_offset = 0 - for i in range(free_dims): - char = _CHR_IDX[i + letter_offset] - input_str += char - output_str += char - - letter_offset += free_dims - for i in range(bound_dims): - char = _CHR_IDX[i + letter_offset] - input_str += char - kernel_str += char - - letter_offset += bound_dims - for i in range(output_dims): - char = _CHR_IDX[i + letter_offset] - kernel_str += char - output_str += char - bias_axes += char - equation = "%s,%s->%s" % (input_str, kernel_str, output_str) - - return equation, bias_axes, len(output_str) - - -def _get_output_shape(output_rank, known_last_dims): - return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims) - - -@tf.keras.utils.register_keras_serializable(package="Text") -class MultiHeadAttention(tf.keras.layers.Layer): - """MultiHeadAttention layer. - - This is an implementation of multi-headed attention based on "Attention - is all you Need". If `query`, `key,` `value` are the same, then - this is self-attention. Each timestep in `query` attends to the - corresponding sequence in `key`, and returns a fixed-width vector. - - This layer first projects `query`, `key` and `value`. These are - (effectively) a list of tensors of length `num_attention_heads`, where the - corresponding shapes are [batch_size, , key_size], - [batch_size, , key_size], - [batch_size, , value_size]. - - Then, the query and key tensors are dot-producted and scaled. These are - softmaxed to obtain attention probabilities. The value tensors are then - interpolated by these probabilities, then concatenated back to a single - tensor. - - Finally, the result tensor with the last dimension as value_size can take an - linear projection and return. - - Examples: - - Performs 1D cross-attention over two sequence inputs with an attention mask. - Returns the additional attention weights over heads. - - >>> layer = MultiHeadAttention(num_heads=2, key_size=2, - ... return_attention_scores=True) - >>> target = tf.keras.Input(shape=[8, 16]) - >>> source = tf.keras.Input(shape=[4, 16]) - >>> mask_tensor = tf.keras.Input(shape=[8, 4]) - >>> output_tensor, weights = layer([target, source]) - >>> print(output_tensor.shape), print(weights.shape) - (None, 8, 16) (None, 2, 8, 4) - - Performs 2D self-attention over a 5D input tensor on axes 2 and 3. - - >>> layer = MultiHeadAttention(num_heads=2, key_size=2, attention_axes=(2, 3)) - >>> input_tensor = tf.keras.Input(shape=[5, 3, 4, 16]) - >>> output_tensor = layer([input_tensor, input_tensor]) - >>> print(output_tensor.shape) - (None, 5, 3, 4, 16) - - Arguments: - num_heads: Number of attention heads. - key_size: Size of each attention head for query and key. - value_size: Size of each attention head for value. - dropout: Dropout probability. - use_bias: Boolean, whether the dense layers use bias vectors/matrices. - output_shape: The expected shape of an output tensor, besides the batch and - sequence dims. If not specified, projects back to the key feature dim. - attention_axes: axes over which the attention is applied. `None` means - attention over all axes, but batch, heads, and features. - return_attention_scores: bool, if `True`, returns the multi-head - attention scores as an additional output argument. - kernel_initializer: Initializer for dense layer kernels. - bias_initializer: Initializer for dense layer biases. - kernel_regularizer: Regularizer for dense layer kernels. - bias_regularizer: Regularizer for dense layer biases. - activity_regularizer: Regularizer for dense layer activity. - kernel_constraint: Constraint for dense layer kernels. - bias_constraint: Constraint for dense layer kernels. - """ - - def __init__(self, - num_heads, - key_size, - value_size=None, - dropout=0.0, - use_bias=True, - output_shape=None, - attention_axes=None, - return_attention_scores=False, - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(MultiHeadAttention, self).__init__(**kwargs) - self._num_heads = num_heads - self._key_size = key_size - self._value_size = value_size if value_size else key_size - self._dropout = dropout - self._use_bias = use_bias - self._output_shape = output_shape - self._return_attention_scores = return_attention_scores - self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) - self._bias_initializer = tf.keras.initializers.get(bias_initializer) - self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) - self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) - self._kernel_constraint = tf.keras.constraints.get(kernel_constraint) - self._bias_constraint = tf.keras.constraints.get(bias_constraint) - if attention_axes is not None and not isinstance(attention_axes, - collections.abc.Sized): - self._attention_axes = (attention_axes,) - else: - self._attention_axes = attention_axes - - def get_config(self): - config = { - "num_heads": - self._num_heads, - "key_size": - self._key_size, - "value_size": - self._value_size, - "dropout": - self._dropout, - "use_bias": - self._use_bias, - "output_shape": - self._output_shape, - "attention_axes": - self._attention_axes, - "return_attention_scores": - self._return_attention_scores, - "kernel_initializer": - tf.keras.initializers.serialize(self._kernel_initializer), - "bias_initializer": - tf.keras.initializers.serialize(self._bias_initializer), - "kernel_regularizer": - tf.keras.regularizers.serialize(self._kernel_regularizer), - "bias_regularizer": - tf.keras.regularizers.serialize(self._bias_regularizer), - "activity_regularizer": - tf.keras.regularizers.serialize(self._activity_regularizer), - "kernel_constraint": - tf.keras.constraints.serialize(self._kernel_constraint), - "bias_constraint": - tf.keras.constraints.serialize(self._bias_constraint) - } - base_config = super(MultiHeadAttention, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def build(self, input_shape): - inputs_len = len(input_shape) - if inputs_len > 3 or inputs_len < 2: - raise ValueError( - "Expects inputs list of length 2 or 3, namely [query, value] or " - "[query, value, key]. " - "Given length: %d" % inputs_len) - tensor_shapes = tf.nest.map_structure(tf.TensorShape, input_shape) - query_shape = tensor_shapes[0] - value_shape = tensor_shapes[1] - key_shape = tensor_shapes[2] if inputs_len == 3 else value_shape - - common_kwargs = dict( - kernel_initializer=self._kernel_initializer, - bias_initializer=self._bias_initializer, - kernel_regularizer=self._kernel_regularizer, - bias_regularizer=self._bias_regularizer, - activity_regularizer=self._activity_regularizer, - kernel_constraint=self._kernel_constraint, - bias_constraint=self._bias_constraint) - - free_dims = query_shape.rank - 1 - einsum_equation, bias_axes, output_rank = _build_proj_equation( - free_dims, bound_dims=1, output_dims=2) - self._query_dense = EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, - [self._num_heads, self._key_size]), - bias_axes=bias_axes if self._use_bias else None, - name="query", - **common_kwargs) - einsum_equation, bias_axes, output_rank = _build_proj_equation( - key_shape.rank - 1, bound_dims=1, output_dims=2) - self._key_dense = EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, - [self._num_heads, self._key_size]), - bias_axes=bias_axes if self._use_bias else None, - name="key", - **common_kwargs) - einsum_equation, bias_axes, output_rank = _build_proj_equation( - value_shape.rank - 1, bound_dims=1, output_dims=2) - self._value_dense = EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, - [self._num_heads, self._value_size]), - bias_axes=bias_axes if self._use_bias else None, - name="value", - **common_kwargs) - - # Builds the attention computations for multi-head dot product attention. - # These computations could be wrapped into the keras attention layer once it - # support mult-head einsum computations. - self._build_attention(output_rank) - if self._output_shape: - if not isinstance(self._output_shape, collections.abc.Sized): - output_shape = [self._output_shape] - else: - output_shape = self._output_shape - else: - output_shape = [query_shape[-1]] - einsum_equation, bias_axes, output_rank = _build_proj_equation( - free_dims, bound_dims=2, output_dims=len(output_shape)) - self._output_dense = EinsumDense( - einsum_equation, - output_shape=_get_output_shape(output_rank - 1, output_shape), - bias_axes=bias_axes if self._use_bias else None, - name="attention_output", - **common_kwargs) - super(MultiHeadAttention, self).build(input_shape) - - def _build_attention(self, qkv_rank): - """Builds multi-head dot-product attention computations. - - This function builds attributes necessary for `_compute_attention` to - costomize attention computation to replace the default dot-product - attention. - - Args: - qkv_rank: the rank of query, key, value tensors. - """ - if self._attention_axes is None: - self._attention_axes = tuple(range(1, qkv_rank - 2)) - else: - self._attention_axes = tuple(self._attention_axes) - self._dot_product_equation, self._combine_equation, attn_scores_rank = ( - _build_attention_equation(qkv_rank, attn_axes=self._attention_axes)) - norm_axes = tuple( - range(attn_scores_rank - len(self._attention_axes), attn_scores_rank)) - self._masked_softmax = masked_softmax.MaskedSoftmax( - mask_expansion_axes=[1], normalization_axes=norm_axes) - self._dropout_layer = tf.keras.layers.Dropout(rate=self._dropout) - - def _compute_attention(self, - query_tensor, - key_tensor, - value_tensor, - attention_mask=None): - """Applies Dot-product attention with query, key, value tensors. - - This function defines the computation inside `call` with projected - multi-head Q, K, V inputs. Users can override this function for customized - attention implementation. - - Args: - query_tensor: Projected query `Tensor` of shape `[B, T, N, key_size]`. - key_tensor: Projected key `Tensor` of shape `[B, T, N, key_size]`. - value_tensor: Projected value `Tensor` of shape `[B, T, N, value_size]`. - attention_mask: a boolean mask of shape `[B, T, S]`, that prevents - attention to certain positions. - - Returns: - attention_output: Multi-headed outputs of attention computation. - attention_scores: Multi-headed attention weights. - """ - # Take the dot product between "query" and "key" to get the raw - # attention scores. - attention_scores = tf.einsum(self._dot_product_equation, key_tensor, - query_tensor) - attention_scores = tf.multiply(attention_scores, - 1.0 / math.sqrt(float(self._key_size))) - - # Normalize the attention scores to probabilities. - # `attention_scores` = [B, N, T, S] - attention_scores = self._masked_softmax(attention_scores, attention_mask) - - # This is actually dropping out entire tokens to attend to, which might - # seem a bit unusual, but is taken from the original Transformer paper. - attention_scores_dropout = self._dropout_layer(attention_scores) - - # `context_layer` = [B, T, N, H] - attention_output = tf.einsum(self._combine_equation, - attention_scores_dropout, value_tensor) - return attention_output, attention_scores - - def call(self, inputs, attention_mask=None): - """Implements the forward pass. - - Size glossary: - * Number of heads (H): the number of attention heads. - * Value size (V): the size of each value embedding per head. - * Key size (K): the size of each key embedding per head. Equally, the size - of each query embedding per head. Typically K <= V. - * Batch dimensions (B). - * Query (target) attention axes shape (T). - * Value (source) attention axes shape (S), the rank must match the target. - - Args: - inputs: List of the following tensors: - * query: Query `Tensor` of shape `[B, T, dim]`. - * value: Value `Tensor` of shape `[B, S, dim]`. - * key: Optional key `Tensor` of shape `[B, S, dim]`. If not given, will - use `value` for both `key` and `value`, which is the most common case. - attention_mask: a boolean mask of shape `[B, T, S]`, that prevents - attention to certain positions. - - Returns: - attention_output: The result of the computation, of shape [B, T, E], - where `T` is for target sequence shapes and `E` is the query input last - dimension if `output_shape` is `None`. Otherwise, the multi-head outputs - are project to the shape specified by `output_shape`. - attention_scores: [Optional] multi-head attention coeffients over - attention - axes. - """ - inputs_len = len(inputs) - if inputs_len > 3 or inputs_len < 2: - raise ValueError( - "Expects inputs list of length 2 or 3, namely [query, value] or " - "[query, value, key]. " - "Given length: %d" % inputs_len) - query = inputs[0] - value = inputs[1] - key = inputs[2] if inputs_len == 3 else value - - # N = `num_attention_heads` - # H = `size_per_head` - # `query_tensor` = [B, T, N ,H] - query_tensor = self._query_dense(query) - - # `key_tensor` = [B, S, N, H] - key_tensor = self._key_dense(key) - - # `value_tensor` = [B, S, N, H] - value_tensor = self._value_dense(value) - - attention_output, attention_scores = self._compute_attention( - query_tensor, key_tensor, value_tensor, attention_mask) - attention_output = self._output_dense(attention_output) - - if self._return_attention_scores: - return attention_output, attention_scores - return attention_output +MultiHeadAttention = tf.keras.layers.MultiHeadAttention @tf.keras.utils.register_keras_serializable(package="Text") -class CachedAttention(MultiHeadAttention): +class CachedAttention(tf.keras.layers.MultiHeadAttention): """Attention layer with cache used for auto-agressive decoding. - Arguments are the same as `MultiHeadAttention` layer. + Arguments are the same as `tf.keras.layers.MultiHeadAttention` layer. """ - def _update_cache(self, key_tensor, value_tensor, cache, decode_loop_step): + def _update_cache(self, key, value, cache, decode_loop_step): """Updates cache states and gets full-length key/value tensors.""" # Combines cached keys and values with new keys and values. if decode_loop_step is not None: # TPU special case. key_seq_dim = cache["key"].shape.as_list()[1] indices = tf.reshape( - tf.one_hot(decode_loop_step, key_seq_dim, dtype=key_tensor.dtype), + tf.one_hot(decode_loop_step, key_seq_dim, dtype=key.dtype), [1, key_seq_dim, 1, 1]) - key_tensor = cache["key"] + key_tensor * indices + key = cache["key"] + key * indices value_seq_dim = cache["value"].shape.as_list()[1] indices = tf.reshape( - tf.one_hot(decode_loop_step, value_seq_dim, dtype=value_tensor.dtype), + tf.one_hot(decode_loop_step, value_seq_dim, dtype=value.dtype), [1, value_seq_dim, 1, 1]) - value_tensor = cache["value"] + value_tensor * indices + value = cache["value"] + value * indices else: - key_tensor = tf.concat( - [tf.cast(cache["key"], key_tensor.dtype), key_tensor], axis=1) - value_tensor = tf.concat( - [tf.cast(cache["value"], value_tensor.dtype), value_tensor], axis=1) + key = tf.concat([tf.cast(cache["key"], key.dtype), key], axis=1) + value = tf.concat([tf.cast(cache["value"], value.dtype), value], axis=1) # Update cache - cache["key"] = key_tensor - cache["value"] = value_tensor + cache["key"] = key + cache["value"] = value - return key_tensor, value_tensor + return key, value def call(self, - inputs, + query, + value, + key=None, attention_mask=None, cache=None, - decode_loop_step=None): - from_tensor = inputs[0] - to_tensor = inputs[1] + decode_loop_step=None, + return_attention_scores=False): + if not self._built_from_signature: + self._build_from_signature(query=query, value=value, key=key) + if key is None: + key = value # Scalar dimensions referenced here: # B = batch size (number of sequences) @@ -494,25 +73,23 @@ class CachedAttention(MultiHeadAttention): # T = `to_tensor` sequence length # N = `num_attention_heads` # H = `size_per_head` - # `query_tensor` = [B, F, N ,H] - query_tensor = self._query_dense(from_tensor) + # `query` = [B, F, N ,H] + query = self._query_dense(query) - # `key_tensor` = [B, T, N, H] - key_tensor = self._key_dense(to_tensor) + # `key` = [B, T, N, H] + key = self._key_dense(key) - # `value_tensor` = [B, T, N, H] - value_tensor = self._value_dense(to_tensor) + # `value` = [B, T, N, H] + value = self._value_dense(value) if cache: - key_tensor, value_tensor = self._update_cache(key_tensor, value_tensor, - cache, decode_loop_step) + key, value = self._update_cache(key, value, cache, decode_loop_step) + + query = tf.multiply(query, 1.0 / math.sqrt(float(self._key_dim))) # Take the dot product between "query" and "key" to get the raw # attention scores. - attention_scores = tf.einsum(self._dot_product_equation, key_tensor, - query_tensor) - attention_scores = tf.multiply(attention_scores, - 1.0 / math.sqrt(float(self._key_size))) + attention_scores = tf.einsum(self._dot_product_equation, key, query) # Normalize the attention scores to probabilities. # `attention_scores` = [B, N, F, T] @@ -523,8 +100,8 @@ class CachedAttention(MultiHeadAttention): attention_scores = self._dropout_layer(attention_scores) # `context_layer` = [B, F, N, H] attention_output = tf.einsum(self._combine_equation, attention_scores, - value_tensor) + value) attention_output = self._output_dense(attention_output) - if self._return_attention_scores: + if return_attention_scores: return attention_output, attention_scores, cache return attention_output, cache diff --git a/official/nlp/modeling/layers/attention_test.py b/official/nlp/modeling/layers/attention_test.py index ceb96f5084d795cdbafa7cdb352fb4692034f803..e09f88980cc60d35a40b755c45cad1a802dbfadc 100644 --- a/official/nlp/modeling/layers/attention_test.py +++ b/official/nlp/modeling/layers/attention_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,14 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for the attention layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for the attention layer.""" -from absl.testing import parameterized import numpy as np import tensorflow as tf @@ -26,164 +21,6 @@ from tensorflow.python.keras import keras_parameterized # pylint: disable=g-dir from official.nlp.modeling.layers import attention -# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It -# guarantees forward compatibility of this code for the V2 switchover. -@keras_parameterized.run_all_keras_modes -class MultiHeadAttentionTest(keras_parameterized.TestCase): - - @parameterized.named_parameters( - ("key_value_same_proj", None, None, [40, 80]), - ("key_value_different_proj", 32, 60, [40, 60]), - ) - def test_non_masked_attention(self, value_size, output_shape, output_dims): - """Test that the attention layer can be created without a mask tensor.""" - test_layer = attention.MultiHeadAttention( - num_heads=12, - key_size=64, - value_size=value_size, - output_shape=output_shape) - # Create a 3-dimensional input (the first dimension is implicit). - query = tf.keras.Input(shape=(40, 80)) - value = tf.keras.Input(shape=(20, 80)) - output = test_layer([query, value]) - self.assertEqual(output.shape.as_list(), [None] + output_dims) - - def test_non_masked_self_attention(self): - """Test with one input (self-attenntion) and no mask tensor.""" - test_layer = attention.MultiHeadAttention(num_heads=12, key_size=64) - # Create a 3-dimensional input (the first dimension is implicit). - query = tf.keras.Input(shape=(40, 80)) - output = test_layer([query, query]) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - - def test_attention_scores(self): - """Test attention outputs with coefficients.""" - test_layer = attention.MultiHeadAttention( - num_heads=12, key_size=64, return_attention_scores=True) - # Create a 3-dimensional input (the first dimension is implicit). - query = tf.keras.Input(shape=(40, 80)) - output, coef = test_layer([query, query]) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - self.assertEqual(coef.shape.as_list(), [None, 12, 40, 40]) - - @parameterized.named_parameters(("with_bias", True), ("no_bias", False)) - def test_masked_attention(self, use_bias): - """Test with a mask tensor.""" - test_layer = attention.MultiHeadAttention( - num_heads=2, key_size=2, use_bias=use_bias) - # Create a 3-dimensional input (the first dimension is implicit). - batch_size = 3 - query = tf.keras.Input(shape=(4, 8)) - value = tf.keras.Input(shape=(2, 8)) - mask_tensor = tf.keras.Input(shape=(4, 2)) - output = test_layer([query, value], mask_tensor) - - # Create a model containing the test layer. - model = tf.keras.Model([query, value, mask_tensor], output) - - # Generate data for the input (non-mask) tensors. - from_data = 10 * np.random.random_sample((batch_size, 4, 8)) - to_data = 10 * np.random.random_sample((batch_size, 2, 8)) - - # Invoke the data with a random set of mask data. This should mask at least - # one element. - mask_data = np.random.randint(2, size=(batch_size, 4, 2)) - masked_output_data = model.predict([from_data, to_data, mask_data]) - - # Invoke the same data, but with a null mask (where no elements are masked). - null_mask_data = np.ones((batch_size, 4, 2)) - unmasked_output_data = model.predict([from_data, to_data, null_mask_data]) - - # Because one data is masked and one is not, the outputs should not be the - # same. - self.assertNotAllClose(masked_output_data, unmasked_output_data) - - # Tests the layer with three inputs: Q, K, V. - key = tf.keras.Input(shape=(2, 8)) - output = test_layer([query, value, key], mask_tensor) - model = tf.keras.Model([query, value, key, mask_tensor], output) - - masked_output_data = model.predict([from_data, to_data, to_data, mask_data]) - unmasked_output_data = model.predict( - [from_data, to_data, to_data, null_mask_data]) - # Because one data is masked and one is not, the outputs should not be the - # same. - self.assertNotAllClose(masked_output_data, unmasked_output_data) - - if use_bias: - self.assertLen(test_layer._query_dense.trainable_variables, 2) - self.assertLen(test_layer._output_dense.trainable_variables, 2) - else: - self.assertLen(test_layer._query_dense.trainable_variables, 1) - self.assertLen(test_layer._output_dense.trainable_variables, 1) - - def test_initializer(self): - """Test with a specified initializer.""" - test_layer = attention.MultiHeadAttention( - num_heads=12, - key_size=64, - kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) - # Create a 3-dimensional input (the first dimension is implicit). - query = tf.keras.Input(shape=(40, 80)) - output = test_layer([query, query]) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - - @parameterized.named_parameters( - ("4d_inputs_one_free_batch", [3, 4], [3, 2], [4, 2], (2,)), - ("4D_inputs_2D_attention", [3, 4], [3, 2], [3, 4, 3, 2], (1, 2)), - ("5D_inputs_2D_attention", [5, 3, 4], [5, 3, 2], [3, 4, 3, 2], (2, 3))) - def test_high_dim_attention(self, q_dims, v_dims, mask_dims, attention_axes): - """Test with a mask tensor.""" - test_layer = attention.MultiHeadAttention( - num_heads=2, key_size=2, attention_axes=attention_axes) - batch_size, hidden_size = 3, 8 - # Generate data for the input (non-mask) tensors. - query_shape = [batch_size] + q_dims + [hidden_size] - value_shape = [batch_size] + v_dims + [hidden_size] - mask_shape = [batch_size] + mask_dims - query = 10 * np.random.random_sample(query_shape) - value = 10 * np.random.random_sample(value_shape) - - # Invoke the data with a random set of mask data. This should mask at least - # one element. - mask_data = np.random.randint(2, size=mask_shape).astype("bool") - output = test_layer([query, value], mask_data) - - # Invoke the same data, but with a null mask (where no elements are masked). - null_mask_data = np.ones(mask_shape) - unmasked_output = test_layer([query, value], null_mask_data) - # Because one data is masked and one is not, the outputs should not be the - # same. - self.assertNotAllClose(output, unmasked_output) - - -class SubclassAttention(attention.MultiHeadAttention): - - def _build_attention(self, qkv_rank): - pass - - def _compute_attention(self, - query_tensor, - key_tensor, - value_tensor, - attention_mask=None): - return value_tensor, None - - -@keras_parameterized.run_all_keras_modes -class AttentionSubclassTest(keras_parameterized.TestCase): - - def test_initializer(self): - """Test with a specified initializer.""" - test_layer = SubclassAttention( - num_heads=12, - key_size=64) - # Create a 3-dimensional input (the first dimension is implicit). - query = tf.keras.Input(shape=(40, 80)) - output = test_layer([query, query]) - self.assertEqual(output.shape.as_list(), [None, 40, 80]) - - def _create_cache(batch_size, init_decode_length, num_heads, head_size): return { "key": @@ -208,7 +45,7 @@ class CachedAttentionTest(keras_parameterized.TestCase): init_decode_length = 0 # Directly tests the keras layer. cache = _create_cache(batch_size, init_decode_length, num_heads, head_size) - layer = attention.CachedAttention(num_heads=num_heads, key_size=head_size) + layer = attention.CachedAttention(num_heads=num_heads, key_dim=head_size) # Generate data for the input (non-mask) tensors. from_data = tf.zeros((batch_size, from_seq_length, 8), dtype=np.float32) @@ -216,12 +53,14 @@ class CachedAttentionTest(keras_parameterized.TestCase): # one element. mask_data = np.random.randint( 2, size=(batch_size, from_seq_length, from_seq_length)) - masked_output_data, cache = layer([from_data, from_data], mask_data, cache) + masked_output_data, cache = layer( + query=from_data, value=from_data, attention_mask=mask_data, cache=cache) self.assertEqual(masked_output_data.shape, (3, 4, 8)) self.assertEqual(cache["value"].shape, (3, 4, 2, 2)) # Tests inputs without cache. - masked_output_data, cache = layer([from_data, from_data, mask_data]) + masked_output_data, cache = layer( + query=from_data, value=from_data, attention_mask=mask_data) self.assertEqual(masked_output_data.shape, (3, 4, 8)) self.assertIsNone(cache) @@ -235,7 +74,7 @@ class CachedAttentionTest(keras_parameterized.TestCase): # Directly tests the keras layer. cache = _create_cache(batch_size, init_decode_length, num_heads, head_size) - layer = attention.CachedAttention(num_heads=num_heads, key_size=head_size) + layer = attention.CachedAttention(num_heads=num_heads, key_dim=head_size) # Generate data for the input (non-mask) tensors. from_data = tf.zeros((batch_size, from_seq_length, 8), dtype=np.float32) @@ -243,10 +82,12 @@ class CachedAttentionTest(keras_parameterized.TestCase): mask_data = np.random.randint( 2, size=(batch_size, from_seq_length, from_seq_length), dtype=np.int32) # Testing the invocation directly as Keras cannot consume inputs correctly. - masked_output_data, cache = layer([from_data, from_data], - mask_data, - cache, - decode_loop_step=decode_loop_step) + masked_output_data, cache = layer( + query=from_data, + value=from_data, + attention_mask=mask_data, + cache=cache, + decode_loop_step=decode_loop_step) self.assertEqual(masked_output_data.shape, (3, 4, 8)) self.assertEqual(cache["value"].shape, (3, 4, 2, 2)) diff --git a/official/nlp/modeling/layers/cls_head.py b/official/nlp/modeling/layers/cls_head.py index 0240511429e58453fa9c483be120705347a0c754..945bd03cbae4e9c5c1b21ce4deacb751bfce0d29 100644 --- a/official/nlp/modeling/layers/cls_head.py +++ b/official/nlp/modeling/layers/cls_head.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,17 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """A Classification head layer which is common used with sequence encoders.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import tensorflow as tf from official.modeling import tf_utils +from official.nlp.modeling.layers import gaussian_process +from official.nlp.modeling.layers import spectral_normalization + class ClassificationHead(tf.keras.layers.Layer): """Pooling head for sentence-level classification tasks.""" @@ -38,7 +36,8 @@ class ClassificationHead(tf.keras.layers.Layer): """Initializes the `ClassificationHead`. Args: - inner_dim: The dimensionality of inner projection layer. + inner_dim: The dimensionality of inner projection layer. If 0 or `None` + then only the output projection layer is created. num_classes: Number of output classes. cls_token_idx: The index inside the sequence to pool. activation: Dense layer activation. @@ -46,7 +45,7 @@ class ClassificationHead(tf.keras.layers.Layer): initializer: Initializer for dense layer kernels. **kwargs: Keyword arguments. """ - super(ClassificationHead, self).__init__(**kwargs) + super().__init__(**kwargs) self.dropout_rate = dropout_rate self.inner_dim = inner_dim self.num_classes = num_classes @@ -54,24 +53,31 @@ class ClassificationHead(tf.keras.layers.Layer): self.initializer = tf.keras.initializers.get(initializer) self.cls_token_idx = cls_token_idx - self.dense = tf.keras.layers.Dense( - units=inner_dim, - activation=self.activation, - kernel_initializer=self.initializer, - name="pooler_dense") - self.dropout = tf.keras.layers.Dropout(rate=self.dropout_rate) + if self.inner_dim: + self.dense = tf.keras.layers.Dense( + units=self.inner_dim, + activation=self.activation, + kernel_initializer=self.initializer, + name="pooler_dense") + self.dropout = tf.keras.layers.Dropout(rate=self.dropout_rate) + self.out_proj = tf.keras.layers.Dense( units=num_classes, kernel_initializer=self.initializer, name="logits") def call(self, features): - x = features[:, self.cls_token_idx, :] # take token. - x = self.dense(x) - x = self.dropout(x) + if not self.inner_dim: + x = features + else: + x = features[:, self.cls_token_idx, :] # take token. + x = self.dense(x) + x = self.dropout(x) + x = self.out_proj(x) return x def get_config(self): config = { + "cls_token_idx": self.cls_token_idx, "dropout_rate": self.dropout_rate, "num_classes": self.num_classes, "inner_dim": self.inner_dim, @@ -88,3 +94,241 @@ class ClassificationHead(tf.keras.layers.Layer): @property def checkpoint_items(self): return {self.dense.name: self.dense} + + +class MultiClsHeads(tf.keras.layers.Layer): + """Pooling heads sharing the same pooling stem.""" + + def __init__(self, + inner_dim, + cls_list, + cls_token_idx=0, + activation="tanh", + dropout_rate=0.0, + initializer="glorot_uniform", + **kwargs): + """Initializes the `MultiClsHeads`. + + Args: + inner_dim: The dimensionality of inner projection layer. If 0 or `None` + then only the output projection layer is created. + cls_list: a list of pairs of (classification problem name and the numbers + of classes. + cls_token_idx: The index inside the sequence to pool. + activation: Dense layer activation. + dropout_rate: Dropout probability. + initializer: Initializer for dense layer kernels. + **kwargs: Keyword arguments. + """ + super().__init__(**kwargs) + self.dropout_rate = dropout_rate + self.inner_dim = inner_dim + self.cls_list = cls_list + self.activation = tf_utils.get_activation(activation) + self.initializer = tf.keras.initializers.get(initializer) + self.cls_token_idx = cls_token_idx + + if self.inner_dim: + self.dense = tf.keras.layers.Dense( + units=inner_dim, + activation=self.activation, + kernel_initializer=self.initializer, + name="pooler_dense") + self.dropout = tf.keras.layers.Dropout(rate=self.dropout_rate) + self.out_projs = [] + for name, num_classes in cls_list: + self.out_projs.append( + tf.keras.layers.Dense( + units=num_classes, kernel_initializer=self.initializer, + name=name)) + + def call(self, features): + if not self.inner_dim: + x = features + else: + x = features[:, self.cls_token_idx, :] # take token. + x = self.dense(x) + x = self.dropout(x) + + outputs = {} + for proj_layer in self.out_projs: + outputs[proj_layer.name] = proj_layer(x) + return outputs + + def get_config(self): + config = { + "dropout_rate": self.dropout_rate, + "cls_token_idx": self.cls_token_idx, + "cls_list": self.cls_list, + "inner_dim": self.inner_dim, + "activation": tf.keras.activations.serialize(self.activation), + "initializer": tf.keras.initializers.serialize(self.initializer), + } + config.update(super().get_config()) + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def checkpoint_items(self): + items = {self.dense.name: self.dense} + items.update({v.name: v for v in self.out_projs}) + return items + + +class GaussianProcessClassificationHead(ClassificationHead): + """Gaussian process-based pooling head for sentence classification. + + This class implements a classifier head for BERT encoder that is based on the + spectral-normalized neural Gaussian process (SNGP) [1]. SNGP is a simple + method to improve a neural network's uncertainty quantification ability + without sacrificing accuracy or lantency. It applies spectral normalization to + the hidden pooler layer, and then replaces the dense output layer with a + Gaussian process. + + + [1]: Jeremiah Liu et al. Simple and Principled Uncertainty Estimation with + Deterministic Deep Learning via Distance Awareness. + In _Neural Information Processing Systems_, 2020. + https://arxiv.org/abs/2006.10108 + """ + + def __init__(self, + inner_dim, + num_classes, + cls_token_idx=0, + activation="tanh", + dropout_rate=0.0, + initializer="glorot_uniform", + use_spec_norm=True, + use_gp_layer=True, + temperature=None, + **kwargs): + """Initializes the `GaussianProcessClassificationHead`. + + Args: + inner_dim: The dimensionality of inner projection layer. If 0 or `None` + then only the output projection layer is created. + num_classes: Number of output classes. + cls_token_idx: The index inside the sequence to pool. + activation: Dense layer activation. + dropout_rate: Dropout probability. + initializer: Initializer for dense layer kernels. + use_spec_norm: Whether to apply spectral normalization to pooler layer. + use_gp_layer: Whether to use Gaussian process as the output layer. + temperature: The temperature parameter to be used for mean-field + approximation during inference. If None then no mean-field adjustment is + applied. + **kwargs: Additional keyword arguments. + """ + # Collects spectral normalization and Gaussian process args from kwargs. + self.use_spec_norm = use_spec_norm + self.use_gp_layer = use_gp_layer + self.spec_norm_kwargs = extract_spec_norm_kwargs(kwargs) + self.gp_layer_kwargs = extract_gp_layer_kwargs(kwargs) + self.temperature = temperature + + super().__init__( + inner_dim=inner_dim, + num_classes=num_classes, + cls_token_idx=cls_token_idx, + activation=activation, + dropout_rate=dropout_rate, + initializer=initializer, + **kwargs) + + # Applies spectral normalization to the dense pooler layer. + if self.use_spec_norm and hasattr(self, "dense"): + self.dense = spectral_normalization.SpectralNormalization( + self.dense, inhere_layer_name=True, **self.spec_norm_kwargs) + + # Replace Dense output layer with the Gaussian process layer. + if use_gp_layer: + self.out_proj = gaussian_process.RandomFeatureGaussianProcess( + self.num_classes, + kernel_initializer=self.initializer, + name="logits", + **self.gp_layer_kwargs) + + def call(self, features, training=False, return_covmat=False): + """Returns model output. + + Dring training, the model returns raw logits. During evaluation, the model + returns uncertainty adjusted logits, and (optionally) the covariance matrix. + + Arguments: + features: A tensor of input features, shape (batch_size, feature_dim). + training: Whether the model is in training mode. + return_covmat: Whether the model should also return covariance matrix if + `use_gp_layer=True`. During training, it is recommended to set + `return_covmat=False` to be compatible with the standard Keras pipelines + (e.g., `model.fit()`). + + Returns: + logits: Uncertainty-adjusted predictive logits, shape + (batch_size, num_classes). + covmat: (Optional) Covariance matrix, shape (batch_size, batch_size). + Returned only when return_covmat=True. + """ + logits = super().call(features) + + # Extracts logits and covariance matrix from model output. + if self.use_gp_layer: + logits, covmat = logits + else: + covmat = None + + # Computes the uncertainty-adjusted logits during evaluation. + if not training: + logits = gaussian_process.mean_field_logits( + logits, covmat, mean_field_factor=self.temperature) + + if return_covmat and covmat is not None: + return logits, covmat + return logits + + def reset_covariance_matrix(self): + """Resets covariance matrix of the Gaussian process layer.""" + if hasattr(self.out_proj, "reset_covariance_matrix"): + self.out_proj.reset_covariance_matrix() + + def get_config(self): + config = dict( + use_spec_norm=self.use_spec_norm, use_gp_layer=self.use_gp_layer) + + config.update(self.spec_norm_kwargs) + config.update(self.gp_layer_kwargs) + config["temperature"] = self.temperature + + config.update(super(GaussianProcessClassificationHead, self).get_config()) + return config + + +def extract_gp_layer_kwargs(kwargs): + """Extracts Gaussian process layer configs from a given kwarg.""" + + return dict( + num_inducing=kwargs.pop("num_inducing", 1024), + normalize_input=kwargs.pop("normalize_input", True), + gp_cov_momentum=kwargs.pop("gp_cov_momentum", 0.999), + gp_cov_ridge_penalty=kwargs.pop("gp_cov_ridge_penalty", 1.), + scale_random_features=kwargs.pop("scale_random_features", False), + l2_regularization=kwargs.pop("l2_regularization", 1e-6), + gp_cov_likelihood=kwargs.pop("gp_cov_likelihood", "gaussian"), + return_gp_cov=kwargs.pop("return_gp_cov", True), + return_random_features=kwargs.pop("return_random_features", False), + use_custom_random_features=kwargs.pop("use_custom_random_features", True), + custom_random_features_initializer=kwargs.pop( + "custom_random_features_initializer", "random_normal"), + custom_random_features_activation=kwargs.pop( + "custom_random_features_activation", None)) + + +def extract_spec_norm_kwargs(kwargs): + """Extracts spectral normalization configs from a given kwarg.""" + + return dict( + iteration=kwargs.pop("iteration", 1), + norm_multiplier=kwargs.pop("norm_multiplier", .99)) diff --git a/official/nlp/modeling/layers/cls_head_test.py b/official/nlp/modeling/layers/cls_head_test.py index ea671f94f5806800f1f5ce07df9fffeff7a3ab68..b115bddd145f3c75a07085cbcb09dcaf69c82a2c 100644 --- a/official/nlp/modeling/layers/cls_head_test.py +++ b/official/nlp/modeling/layers/cls_head_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,15 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for cls_head.""" +from absl.testing import parameterized import tensorflow as tf from official.nlp.modeling.layers import cls_head -class ClassificationHead(tf.test.TestCase): +class ClassificationHeadTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(("no_pooler_layer", 0, 2), + ("has_pooler_layer", 5, 4)) + def test_pooler_layer(self, inner_dim, num_weights_expected): + test_layer = cls_head.ClassificationHead(inner_dim=inner_dim, num_classes=2) + features = tf.zeros(shape=(2, 10, 10), dtype=tf.float32) + _ = test_layer(features) + + num_weights_observed = len(test_layer.get_weights()) + self.assertEqual(num_weights_observed, num_weights_expected) def test_layer_invocation(self): test_layer = cls_head.ClassificationHead(inner_dim=5, num_classes=2) @@ -38,5 +48,151 @@ class ClassificationHead(tf.test.TestCase): self.assertAllEqual(layer.get_config(), new_layer.get_config()) +class MultiClsHeadsTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(("no_pooler_layer", 0, 4), + ("has_pooler_layer", 5, 6)) + def test_pooler_layer(self, inner_dim, num_weights_expected): + cls_list = [("foo", 2), ("bar", 3)] + test_layer = cls_head.MultiClsHeads(inner_dim=inner_dim, cls_list=cls_list) + features = tf.zeros(shape=(2, 10, 10), dtype=tf.float32) + _ = test_layer(features) + + num_weights_observed = len(test_layer.get_weights()) + self.assertEqual(num_weights_observed, num_weights_expected) + + def test_layer_invocation(self): + cls_list = [("foo", 2), ("bar", 3)] + test_layer = cls_head.MultiClsHeads(inner_dim=5, cls_list=cls_list) + features = tf.zeros(shape=(2, 10, 10), dtype=tf.float32) + outputs = test_layer(features) + self.assertAllClose(outputs["foo"], [[0., 0.], [0., 0.]]) + self.assertAllClose(outputs["bar"], [[0., 0., 0.], [0., 0., 0.]]) + self.assertSameElements(test_layer.checkpoint_items.keys(), + ["pooler_dense", "foo", "bar"]) + + def test_layer_serialization(self): + cls_list = [("foo", 2), ("bar", 3)] + test_layer = cls_head.MultiClsHeads(inner_dim=5, cls_list=cls_list) + new_layer = cls_head.MultiClsHeads.from_config(test_layer.get_config()) + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(test_layer.get_config(), new_layer.get_config()) + + +class GaussianProcessClassificationHead(tf.test.TestCase, + parameterized.TestCase): + + def setUp(self): + super().setUp() + self.spec_norm_kwargs = dict(norm_multiplier=1.,) + self.gp_layer_kwargs = dict(num_inducing=512) + + @parameterized.named_parameters(("no_pooler_layer", 0, 7), + ("has_pooler_layer", 5, 11)) + def test_pooler_layer(self, inner_dim, num_weights_expected): + test_layer = cls_head.GaussianProcessClassificationHead( + inner_dim=inner_dim, + num_classes=2, + use_spec_norm=True, + use_gp_layer=True, + initializer="zeros", + **self.spec_norm_kwargs, + **self.gp_layer_kwargs) + features = tf.zeros(shape=(2, 10, 10), dtype=tf.float32) + _ = test_layer(features) + + num_weights_observed = len(test_layer.get_weights()) + self.assertEqual(num_weights_observed, num_weights_expected) + + def test_layer_invocation(self): + test_layer = cls_head.GaussianProcessClassificationHead( + inner_dim=5, + num_classes=2, + use_spec_norm=True, + use_gp_layer=True, + initializer="zeros", + **self.spec_norm_kwargs, + **self.gp_layer_kwargs) + features = tf.zeros(shape=(2, 10, 10), dtype=tf.float32) + output = test_layer(features) + self.assertAllClose(output, [[0., 0.], [0., 0.]]) + self.assertSameElements(test_layer.checkpoint_items.keys(), + ["pooler_dense"]) + + @parameterized.named_parameters( + ("gp_layer_with_covmat", True, True), + ("gp_layer_no_covmat", True, False), + ("dense_layer_with_covmat", False, True), + ("dense_layer_no_covmat", False, False)) + def test_sngp_output_shape(self, use_gp_layer, return_covmat): + batch_size = 32 + num_classes = 2 + + test_layer = cls_head.GaussianProcessClassificationHead( + inner_dim=5, + num_classes=num_classes, + use_spec_norm=True, + use_gp_layer=use_gp_layer, + **self.spec_norm_kwargs, + **self.gp_layer_kwargs) + + features = tf.zeros(shape=(batch_size, 10, 10), dtype=tf.float32) + outputs = test_layer(features, return_covmat=return_covmat) + + if use_gp_layer and return_covmat: + self.assertIsInstance(outputs, tuple) + self.assertEqual(outputs[0].shape, (batch_size, num_classes)) + self.assertEqual(outputs[1].shape, (batch_size, batch_size)) + else: + self.assertIsInstance(outputs, tf.Tensor) + self.assertEqual(outputs.shape, (batch_size, num_classes)) + + def test_sngp_train_logits(self): + """Checks if temperature scaling is disabled during training.""" + features = tf.zeros(shape=(5, 10, 10), dtype=tf.float32) + + gp_layer = cls_head.GaussianProcessClassificationHead( + inner_dim=5, num_classes=2) + + # Without temperature. + gp_layer.temperature = None + outputs_no_temp = gp_layer(features, training=True) + + # With temperature. + gp_layer.temperature = 10. + outputs_with_temp = gp_layer(features, training=True) + + self.assertAllEqual(outputs_no_temp, outputs_with_temp) + + def test_layer_serialization(self): + layer = cls_head.GaussianProcessClassificationHead( + inner_dim=5, + num_classes=2, + use_spec_norm=True, + use_gp_layer=True, + **self.spec_norm_kwargs, + **self.gp_layer_kwargs) + new_layer = cls_head.GaussianProcessClassificationHead.from_config( + layer.get_config()) + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(layer.get_config(), new_layer.get_config()) + + def test_sngp_kwargs_serialization(self): + """Tests if SNGP-specific kwargs are added during serialization.""" + layer = cls_head.GaussianProcessClassificationHead( + inner_dim=5, + num_classes=2, + use_spec_norm=True, + use_gp_layer=True, + **self.spec_norm_kwargs, + **self.gp_layer_kwargs) + layer_config = layer.get_config() + + # The config value should equal to those defined in setUp(). + self.assertEqual(layer_config["norm_multiplier"], 1.) + self.assertEqual(layer_config["num_inducing"], 512) + if __name__ == "__main__": tf.test.main() diff --git a/official/nlp/modeling/layers/dense_einsum.py b/official/nlp/modeling/layers/dense_einsum.py index 4d57799fc9ecaf95b61d18601646070d331a93a7..f54c14b72b34c3c36713da6f86db7eb02e876212 100644 --- a/official/nlp/modeling/layers/dense_einsum.py +++ b/official/nlp/modeling/layers/dense_einsum.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based einsum layer.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import tensorflow as tf @@ -28,11 +24,11 @@ _CHR_IDX = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"] @tf.keras.utils.register_keras_serializable(package="Text") class DenseEinsum(tf.keras.layers.Layer): - """A densely connected layer that uses tf.einsum as the backing computation. + """A densely connected layer that uses `tf.einsum` as the backing computation. This layer can perform einsum calculations of arbitrary dimensionality. - Arguments: + Args: output_shape: Positive integer or tuple, dimensionality of the output space. num_summed_dimensions: The number of dimensions to sum over. Standard 2D matmul should use 1, 3D matmul should use 2, and so forth. @@ -59,9 +55,8 @@ class DenseEinsum(tf.keras.layers.Layer): `(batch_size, units)`. """ - @deprecation.deprecated( - None, "DenseEinsum is deprecated. Please use " - "tf.keras.experimental.EinsumDense layer instead.") + @deprecation.deprecated(None, "DenseEinsum is deprecated. Please use " + "tf.keras.experimental.EinsumDense layer instead.") def __init__(self, output_shape, num_summed_dimensions=1, diff --git a/official/nlp/modeling/layers/dense_einsum_test.py b/official/nlp/modeling/layers/dense_einsum_test.py index 57a60fe52fa835c09df228274d42ed7eb8f39595..f2910c7f6c695ec760aed42054de88f2bfe85891 100644 --- a/official/nlp/modeling/layers/dense_einsum_test.py +++ b/official/nlp/modeling/layers/dense_einsum_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based einsum layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Keras-based einsum layer.""" import numpy as np import tensorflow as tf diff --git a/official/nlp/modeling/layers/gated_feedforward.py b/official/nlp/modeling/layers/gated_feedforward.py index 11c912885a7b8eb68e6d764653275fb2b5d2de92..2de2940658c68c9cd324df339a8f90a1d0038c12 100644 --- a/official/nlp/modeling/layers/gated_feedforward.py +++ b/official/nlp/modeling/layers/gated_feedforward.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based gated feedforward layer.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import gin import tensorflow as tf @@ -32,22 +28,22 @@ class GatedFeedforward(tf.keras.layers.Layer): (https://arxiv.org/abs/2002.05202). In additional, it allows to stack multiple feedforward blocks and specify the position of dropout layer. - Arguments: + Args: intermediate_size: Size of the intermediate layer. intermediate_activation: Activation for the intermediate layer. dropout: Dropout probability for the output dropout. - use_gate: Whether to use gated linear units. If True, assuming `GELU` as - the activation and omitting bias, will apply + use_gate: Whether to use gated linear units. If True, assuming `GELU` as the + activation and omitting bias, will apply `GEGLU(x, W, V, W_2) = (GEGLU(xW) * xV)W2`; if False, will follow - "Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper - and apply `FFN(x, W, W_2) = GELU(xW_1)W_2.` - num_blocks: The number of feedforward blocks to stack. Each block contains - a (gated) linear layer and a fully connected layer followed by dropout, + "Attention Is All You Need" (https://arxiv.org/abs/1706.03762) paper and + apply `FFN(x, W, W_2) = GELU(xW_1)W_2.` + num_blocks: The number of feedforward blocks to stack. Each block contains a + (gated) linear layer and a fully connected layer followed by dropout, layer norm and residual. dropout_position: Where to apply the dropout, the value can be either `before_residual` or `after_residual`. If `before_residual`, will apply - `layer_output = layer_norm(dropout(layer_output) + layer_input)`; - if `after residual`, will apply + `layer_output = layer_norm(dropout(layer_output) + layer_input)`; if + `after residual`, will apply `layer_output = dropout(layer_norm(layer_output + layer_input))`. kernel_initializer: Initializer for dense layer kernels. bias_initializer: Initializer for dense layer biases. @@ -63,6 +59,7 @@ class GatedFeedforward(tf.keras.layers.Layer): intermediate_activation, dropout, use_gate=True, + apply_output_layer_norm=True, num_blocks=1, dropout_position="before_residual", kernel_initializer="glorot_uniform", @@ -79,6 +76,7 @@ class GatedFeedforward(tf.keras.layers.Layer): self._dropout = dropout self._use_gate = use_gate self._num_blocks = num_blocks + self._apply_output_layer_norm = apply_output_layer_norm self._dropout_position = dropout_position if self._dropout_position not in ("before_residual", "after_residual"): raise ValueError( @@ -110,7 +108,7 @@ class GatedFeedforward(tf.keras.layers.Layer): self._output_dense = [] self._output_dropout = [] self._output_layer_norm = [] - activation_policy = tf.keras.mixed_precision.experimental.global_policy() + activation_policy = tf.keras.mixed_precision.global_policy() if activation_policy.name == "mixed_bfloat16": # bfloat16 causes BERT with the LAMB optimizer to not converge # as well, so we use float32. @@ -124,8 +122,9 @@ class GatedFeedforward(tf.keras.layers.Layer): bias_axes="d", name="intermediate_%d" % i, **common_kwargs)) - self._intermediate_activation_layers.append(tf.keras.layers.Activation( - self._intermediate_activation, dtype=activation_policy)) + self._intermediate_activation_layers.append( + tf.keras.layers.Activation( + self._intermediate_activation, dtype=activation_policy)) if self._use_gate: self._gate_dense.append( tf.keras.layers.experimental.EinsumDense( @@ -141,15 +140,15 @@ class GatedFeedforward(tf.keras.layers.Layer): bias_axes="d", name="output_%d" % i, **common_kwargs)) - self._output_dropout.append( - tf.keras.layers.Dropout(rate=self._dropout)) + self._output_dropout.append(tf.keras.layers.Dropout(rate=self._dropout)) # Use float32 in layernorm for numeric stability. - self._output_layer_norm.append( - tf.keras.layers.LayerNormalization( - name="output_layer_norm_%d" % i, - axis=-1, - epsilon=1e-12, - dtype=tf.float32)) + if self._apply_output_layer_norm: + self._output_layer_norm.append( + tf.keras.layers.LayerNormalization( + name="output_layer_norm_%d" % i, + axis=-1, + epsilon=1e-12, + dtype=tf.float32)) def get_config(self): config = { @@ -203,7 +202,8 @@ class GatedFeedforward(tf.keras.layers.Layer): # add. if layer_input.dtype == tf.float32: layer_output = tf.cast(layer_output, tf.float32) - layer_output = self._output_layer_norm[i](layer_output + layer_input) + if self._apply_output_layer_norm: + layer_output = self._output_layer_norm[i](layer_output + layer_input) if self._dropout_position == "after_residual": layer_output = self._output_dropout[i](layer_output) diff --git a/official/nlp/modeling/layers/gated_feedforward_test.py b/official/nlp/modeling/layers/gated_feedforward_test.py index 8daeb5d32fde9be2765fe3819b13ee9a13546f55..46d4f4bb258cf6ea6726679c0d730ac37da50461 100644 --- a/official/nlp/modeling/layers/gated_feedforward_test.py +++ b/official/nlp/modeling/layers/gated_feedforward_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based gated feedforward layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Keras-based gated feedforward layer.""" from absl.testing import parameterized import numpy as np @@ -33,7 +29,7 @@ class GatedFeedforwardTest(keras_parameterized.TestCase): def tearDown(self): super(GatedFeedforwardTest, self).tearDown() - tf.keras.mixed_precision.experimental.set_policy("float32") + tf.keras.mixed_precision.set_global_policy("float32") @parameterized.parameters( (True, 1, "after_residual", "float32"), @@ -46,7 +42,7 @@ class GatedFeedforwardTest(keras_parameterized.TestCase): (False, 1, "before_residual", "mixed_float16"), ) def test_layer_creation(self, use_gate, num_blocks, dropout_position, dtype): - tf.keras.mixed_precision.experimental.set_policy(dtype) + tf.keras.mixed_precision.set_global_policy(dtype) kwargs = dict( intermediate_size=128, intermediate_activation="relu", @@ -78,7 +74,7 @@ class GatedFeedforwardTest(keras_parameterized.TestCase): ) def test_layer_invocation(self, use_gate, num_blocks, dropout_position, dtype): - tf.keras.mixed_precision.experimental.set_policy(dtype) + tf.keras.mixed_precision.set_global_policy(dtype) kwargs = dict( intermediate_size=16, intermediate_activation="relu", @@ -123,5 +119,6 @@ class GatedFeedforwardTest(keras_parameterized.TestCase): # If the serialization was successful, the new config should match the old. self.assertAllEqual(test_layer.get_config(), new_layer.get_config()) + if __name__ == "__main__": tf.test.main() diff --git a/official/nlp/modeling/layers/gaussian_process.py b/official/nlp/modeling/layers/gaussian_process.py new file mode 100644 index 0000000000000000000000000000000000000000..3729d8ee6cfaebe3b6c0a077cc3ee7706295bb10 --- /dev/null +++ b/official/nlp/modeling/layers/gaussian_process.py @@ -0,0 +1,495 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Definitions for random feature Gaussian process layer.""" +import math +import tensorflow as tf + + +_SUPPORTED_LIKELIHOOD = ('binary_logistic', 'poisson', 'gaussian') + + +class RandomFeatureGaussianProcess(tf.keras.layers.Layer): + """Gaussian process layer with random feature approximation [1]. + + During training, the model updates the maximum a posteriori (MAP) logits + estimates and posterior precision matrix using minibatch statistics. During + inference, the model divides the MAP logit estimates by the predictive + standard deviation, which is equivalent to approximating the posterior mean + of the predictive probability via the mean-field approximation. + + User can specify different types of random features by setting + `use_custom_random_features=True`, and change the initializer and activations + of the custom random features. For example: + + MLP Kernel: initializer='random_normal', activation=tf.nn.relu + RBF Kernel: initializer='random_normal', activation=tf.math.cos + + A linear kernel can also be specified by setting gp_kernel_type='linear' and + `use_custom_random_features=True`. + + [1]: Ali Rahimi and Benjamin Recht. Random Features for Large-Scale Kernel + Machines. In _Neural Information Processing Systems_, 2007. + https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf + + Attributes: + units: (int) The dimensionality of layer. + num_inducing: (int) The number of random features for the approximation. + is_training: (tf.bool) Whether the layer is set in training mode. If so the + layer updates the Gaussian process' variance estimate using statistics + computed from the incoming minibatches. + """ + + def __init__(self, + units, + num_inducing=1024, + gp_kernel_type='gaussian', + gp_kernel_scale=1., + gp_output_bias=0., + normalize_input=False, + gp_kernel_scale_trainable=False, + gp_output_bias_trainable=False, + gp_cov_momentum=0.999, + gp_cov_ridge_penalty=1., + scale_random_features=True, + use_custom_random_features=True, + custom_random_features_initializer=None, + custom_random_features_activation=None, + l2_regularization=1e-6, + gp_cov_likelihood='gaussian', + return_gp_cov=True, + return_random_features=False, + dtype=None, + name='random_feature_gaussian_process', + **gp_output_kwargs): + """Initializes a random-feature Gaussian process layer instance. + + Args: + units: (int) Number of output units. + num_inducing: (int) Number of random Fourier features used for + approximating the Gaussian process. + gp_kernel_type: (string) The type of kernel function to use for Gaussian + process. Currently default to 'gaussian' which is the Gaussian RBF + kernel. + gp_kernel_scale: (float) The length-scale parameter of the a + shift-invariant kernel function, i.e., for RBF kernel: + exp(-|x1 - x2|**2 / gp_kernel_scale). + gp_output_bias: (float) Scalar initial value for the bias vector. + normalize_input: (bool) Whether to normalize the input to Gaussian + process. + gp_kernel_scale_trainable: (bool) Whether the length scale variable is + trainable. + gp_output_bias_trainable: (bool) Whether the bias is trainable. + gp_cov_momentum: (float) A discount factor used to compute the moving + average for posterior covariance matrix. + gp_cov_ridge_penalty: (float) Initial Ridge penalty to posterior + covariance matrix. + scale_random_features: (bool) Whether to scale the random feature + by sqrt(2. / num_inducing). + use_custom_random_features: (bool) Whether to use custom random + features implemented using tf.keras.layers.Dense. + custom_random_features_initializer: (str or callable) Initializer for + the random features. Default to random normal which approximates a RBF + kernel function if activation function is cos. + custom_random_features_activation: (callable) Activation function for the + random feature layer. Default to cosine which approximates a RBF + kernel function. + l2_regularization: (float) The strength of l2 regularization on the output + weights. + gp_cov_likelihood: (string) Likelihood to use for computing Laplace + approximation for covariance matrix. Default to `gaussian`. + return_gp_cov: (bool) Whether to also return GP covariance matrix. + If False then no covariance learning is performed. + return_random_features: (bool) Whether to also return random features. + dtype: (tf.DType) Input data type. + name: (string) Layer name. + **gp_output_kwargs: Additional keyword arguments to dense output layer. + """ + super(RandomFeatureGaussianProcess, self).__init__(name=name, dtype=dtype) + self.units = units + self.num_inducing = num_inducing + + self.normalize_input = normalize_input + self.gp_input_scale = 1. / tf.sqrt(gp_kernel_scale) + self.gp_feature_scale = tf.sqrt(2. / float(num_inducing)) + + self.scale_random_features = scale_random_features + self.return_random_features = return_random_features + self.return_gp_cov = return_gp_cov + + self.gp_kernel_type = gp_kernel_type + self.gp_kernel_scale = gp_kernel_scale + self.gp_output_bias = gp_output_bias + self.gp_kernel_scale_trainable = gp_kernel_scale_trainable + self.gp_output_bias_trainable = gp_output_bias_trainable + + self.use_custom_random_features = use_custom_random_features + self.custom_random_features_initializer = custom_random_features_initializer + self.custom_random_features_activation = custom_random_features_activation + + self.l2_regularization = l2_regularization + self.gp_output_kwargs = gp_output_kwargs + + self.gp_cov_momentum = gp_cov_momentum + self.gp_cov_ridge_penalty = gp_cov_ridge_penalty + self.gp_cov_likelihood = gp_cov_likelihood + + if self.use_custom_random_features: + # Default to Gaussian RBF kernel. + self.random_features_bias_initializer = tf.random_uniform_initializer( + minval=0., maxval=2. * math.pi) + if self.custom_random_features_initializer is None: + self.custom_random_features_initializer = ( + tf.keras.initializers.RandomNormal(stddev=1.)) + if self.custom_random_features_activation is None: + self.custom_random_features_activation = tf.math.cos + + def build(self, input_shape): + # Defines model layers. + if self.normalize_input: + self._input_norm_layer = tf.keras.layers.LayerNormalization( + name='gp_input_normalization') + self._input_norm_layer.build(input_shape) + input_shape = self._input_norm_layer.compute_output_shape(input_shape) + + self._random_feature = self._make_random_feature_layer( + name='gp_random_feature') + self._random_feature.build(input_shape) + input_shape = self._random_feature.compute_output_shape(input_shape) + + if self.return_gp_cov: + self._gp_cov_layer = LaplaceRandomFeatureCovariance( + momentum=self.gp_cov_momentum, + ridge_penalty=self.gp_cov_ridge_penalty, + likelihood=self.gp_cov_likelihood, + dtype=self.dtype, + name='gp_covariance') + self._gp_cov_layer.build(input_shape) + + self._gp_output_layer = tf.keras.layers.Dense( + units=self.units, + use_bias=False, + kernel_regularizer=tf.keras.regularizers.l2(self.l2_regularization), + dtype=self.dtype, + name='gp_output_weights', + **self.gp_output_kwargs) + self._gp_output_layer.build(input_shape) + + self._gp_output_bias = tf.Variable( + initial_value=[self.gp_output_bias] * self.units, + dtype=self.dtype, + trainable=self.gp_output_bias_trainable, + name='gp_output_bias') + + self.built = True + + def _make_random_feature_layer(self, name): + """Defines random feature layer depending on kernel type.""" + if not self.use_custom_random_features: + # Use default RandomFourierFeatures layer from tf.keras. + return tf.keras.layers.experimental.RandomFourierFeatures( + output_dim=self.num_inducing, + kernel_initializer=self.gp_kernel_type, + scale=self.gp_kernel_scale, + trainable=self.gp_kernel_scale_trainable, + dtype=self.dtype, + name=name) + + if self.gp_kernel_type.lower() == 'linear': + custom_random_feature_layer = tf.keras.layers.Lambda( + lambda x: x, name=name) + else: + # Use user-supplied configurations. + custom_random_feature_layer = tf.keras.layers.Dense( + units=self.num_inducing, + use_bias=True, + activation=self.custom_random_features_activation, + kernel_initializer=self.custom_random_features_initializer, + bias_initializer=self.random_features_bias_initializer, + trainable=False, + name=name) + + return custom_random_feature_layer + + def reset_covariance_matrix(self): + """Resets covariance matrix of the GP layer. + + This function is useful for reseting the model's covariance matrix at the + begining of a new epoch. + """ + self._gp_cov_layer.reset_precision_matrix() + + def call(self, inputs, global_step=None, training=None): + # Computes random features. + gp_inputs = inputs + if self.normalize_input: + gp_inputs = self._input_norm_layer(gp_inputs) + elif self.use_custom_random_features: + # Supports lengthscale for custom random feature layer by directly + # rescaling the input. + gp_input_scale = tf.cast(self.gp_input_scale, inputs.dtype) + gp_inputs = gp_inputs * gp_input_scale + + gp_feature = self._random_feature(gp_inputs) + + if self.scale_random_features: + # Scale random feature by 2. / sqrt(num_inducing) following [1]. + # When using GP layer as the output layer of a nerual network, + # it is recommended to turn this scaling off to prevent it from changing + # the learning rate to the hidden layers. + gp_feature_scale = tf.cast(self.gp_feature_scale, inputs.dtype) + gp_feature = gp_feature * gp_feature_scale + + # Computes posterior center (i.e., MAP estimate) and variance. + gp_output = self._gp_output_layer(gp_feature) + self._gp_output_bias + + if self.return_gp_cov: + gp_covmat = self._gp_cov_layer(gp_feature, gp_output, training) + + # Assembles model output. + model_output = [gp_output,] + if self.return_gp_cov: + model_output.append(gp_covmat) + if self.return_random_features: + model_output.append(gp_feature) + + return model_output + + +class LaplaceRandomFeatureCovariance(tf.keras.layers.Layer): + """Computes the Gaussian Process covariance using Laplace method. + + At training time, this layer updates the Gaussian process posterior using + model features in minibatches. + + Attributes: + momentum: (float) A discount factor used to compute the moving average for + posterior precision matrix. Analogous to the momentum factor in batch + normalization. If -1 then update covariance matrix using a naive sum + without momentum, which is desirable if the goal is to compute the exact + covariance matrix by passing through data once (say in the final epoch). + ridge_penalty: (float) Initial Ridge penalty to weight covariance matrix. + This value is used to stablize the eigenvalues of weight covariance + estimate so that the matrix inverse can be computed for Cov = inv(t(X) * X + + s * I). The ridge factor s cannot be too large since otherwise it will + dominate the t(X) * X term and make covariance estimate not meaningful. + likelihood: (str) The likelihood to use for computing Laplace approximation + for the covariance matrix. Can be one of ('binary_logistic', 'poisson', + 'gaussian'). + """ + + def __init__(self, + momentum=0.999, + ridge_penalty=1., + likelihood='gaussian', + dtype=None, + name='laplace_covariance'): + if likelihood not in _SUPPORTED_LIKELIHOOD: + raise ValueError( + f'"likelihood" must be one of {_SUPPORTED_LIKELIHOOD}, got {likelihood}.' + ) + self.ridge_penalty = ridge_penalty + self.momentum = momentum + self.likelihood = likelihood + super(LaplaceRandomFeatureCovariance, self).__init__(dtype=dtype, name=name) + + def compute_output_shape(self, input_shape): + gp_feature_dim = input_shape[-1] + return tf.TensorShape([gp_feature_dim, gp_feature_dim]) + + def build(self, input_shape): + gp_feature_dim = input_shape[-1] + + # Convert gp_feature_dim to int value for TF1 compatibility. + if isinstance(gp_feature_dim, tf.compat.v1.Dimension): + gp_feature_dim = gp_feature_dim.value + + # Posterior precision matrix for the GP's random feature coefficients. + self.initial_precision_matrix = ( + self.ridge_penalty * tf.eye(gp_feature_dim, dtype=self.dtype)) + + self.precision_matrix = ( + self.add_weight( + name='gp_precision_matrix', + shape=(gp_feature_dim, gp_feature_dim), + dtype=self.dtype, + initializer=tf.keras.initializers.Identity(self.ridge_penalty), + trainable=False, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA)) + self.built = True + + def make_precision_matrix_update_op(self, + gp_feature, + logits, + precision_matrix): + """Defines update op for the precision matrix of feature weights.""" + if self.likelihood != 'gaussian': + if logits is None: + raise ValueError( + f'"logits" cannot be None when likelihood={self.likelihood}') + + if logits.shape[-1] != 1: + raise ValueError( + f'likelihood={self.likelihood} only support univariate logits.' + f'Got logits dimension: {logits.shape[-1]}') + + batch_size = tf.shape(gp_feature)[0] + batch_size = tf.cast(batch_size, dtype=gp_feature.dtype) + + # Computes batch-specific normalized precision matrix. + if self.likelihood == 'binary_logistic': + prob = tf.sigmoid(logits) + prob_multiplier = prob * (1. - prob) + elif self.likelihood == 'poisson': + prob_multiplier = tf.exp(logits) + else: + prob_multiplier = 1. + + gp_feature_adjusted = tf.sqrt(prob_multiplier) * gp_feature + precision_matrix_minibatch = tf.matmul( + gp_feature_adjusted, gp_feature_adjusted, transpose_a=True) + + # Updates the population-wise precision matrix. + if self.momentum > 0: + # Use moving-average updates to accumulate batch-specific precision + # matrices. + precision_matrix_minibatch = precision_matrix_minibatch / batch_size + precision_matrix_new = ( + self.momentum * precision_matrix + + (1. - self.momentum) * precision_matrix_minibatch) + else: + # Compute exact population-wise covariance without momentum. + # If use this option, make sure to pass through data only once. + precision_matrix_new = precision_matrix + precision_matrix_minibatch + + # Returns the update op. + return precision_matrix.assign(precision_matrix_new) + + def reset_precision_matrix(self): + """Resets precision matrix to its initial value. + + This function is useful for reseting the model's covariance matrix at the + begining of a new epoch. + """ + precision_matrix_reset_op = self.precision_matrix.assign( + self.initial_precision_matrix) + self.add_update(precision_matrix_reset_op) + + def compute_predictive_covariance(self, gp_feature): + """Computes posterior predictive variance. + + Approximates the Gaussian process posterior using random features. + Given training random feature Phi_tr (num_train, num_hidden) and testing + random feature Phi_ts (batch_size, num_hidden). The predictive covariance + matrix is computed as (assuming Gaussian likelihood): + + s * Phi_ts @ inv(t(Phi_tr) * Phi_tr + s * I) @ t(Phi_ts), + + where s is the ridge factor to be used for stablizing the inverse, and I is + the identity matrix with shape (num_hidden, num_hidden). + + Args: + gp_feature: (tf.Tensor) The random feature of testing data to be used for + computing the covariance matrix. Shape (batch_size, gp_hidden_size). + + Returns: + (tf.Tensor) Predictive covariance matrix, shape (batch_size, batch_size). + """ + # Computes the covariance matrix of the feature coefficient. + feature_cov_matrix = tf.linalg.inv(self.precision_matrix) + + # Computes the covariance matrix of the gp prediction. + cov_feature_product = tf.matmul( + feature_cov_matrix, gp_feature, transpose_b=True) * self.ridge_penalty + gp_cov_matrix = tf.matmul(gp_feature, cov_feature_product) + return gp_cov_matrix + + def _get_training_value(self, training=None): + if training is None: + training = tf.keras.backend.learning_phase() + + if isinstance(training, int): + training = bool(training) + + return training + + def call(self, inputs, logits=None, training=None): + """Minibatch updates the GP's posterior precision matrix estimate. + + Args: + inputs: (tf.Tensor) GP random features, shape (batch_size, + gp_hidden_size). + logits: (tf.Tensor) Pre-activation output from the model. Needed + for Laplace approximation under a non-Gaussian likelihood. + training: (tf.bool) whether or not the layer is in training mode. If in + training mode, the gp_weight covariance is updated using gp_feature. + + Returns: + gp_stddev (tf.Tensor): GP posterior predictive variance, + shape (batch_size, batch_size). + """ + batch_size = tf.shape(inputs)[0] + training = self._get_training_value(training) + + if training: + # Define and register the update op for feature precision matrix. + precision_matrix_update_op = self.make_precision_matrix_update_op( + gp_feature=inputs, + logits=logits, + precision_matrix=self.precision_matrix) + self.add_update(precision_matrix_update_op) + # Return null estimate during training. + return tf.eye(batch_size, dtype=self.dtype) + else: + # Return covariance estimate during inference. + return self.compute_predictive_covariance(gp_feature=inputs) + + +def mean_field_logits(logits, covariance_matrix=None, mean_field_factor=1.): + """Adjust the model logits so its softmax approximates the posterior mean [1]. + + [1]: Zhiyun Lu, Eugene Ie, Fei Sha. Uncertainty Estimation with Infinitesimal + Jackknife. _arXiv preprint arXiv:2006.07584_, 2020. + https://arxiv.org/abs/2006.07584 + + Arguments: + logits: A float tensor of shape (batch_size, num_classes). + covariance_matrix: The covariance matrix of shape (batch_size, batch_size). + If None then it assumes the covariance_matrix is an identity matrix. + mean_field_factor: The scale factor for mean-field approximation, used to + adjust the influence of posterior variance in posterior mean + approximation. If covariance_matrix=None then it is used as the + temperature parameter for temperature scaling. + + Returns: + Tensor of adjusted logits, shape (batch_size, num_classes). + """ + if mean_field_factor is None or mean_field_factor < 0: + return logits + + # Compute standard deviation. + if covariance_matrix is None: + variances = 1. + else: + variances = tf.linalg.diag_part(covariance_matrix) + + # Compute scaling coefficient for mean-field approximation. + logits_scale = tf.sqrt(1. + variances * mean_field_factor) + + if len(logits.shape) > 1: + # Cast logits_scale to compatible dimension. + logits_scale = tf.expand_dims(logits_scale, axis=-1) + + return logits / logits_scale diff --git a/official/nlp/modeling/layers/gaussian_process_test.py b/official/nlp/modeling/layers/gaussian_process_test.py new file mode 100644 index 0000000000000000000000000000000000000000..37958fa742326dc7cde6e1c4625c2b4ba77d2a2d --- /dev/null +++ b/official/nlp/modeling/layers/gaussian_process_test.py @@ -0,0 +1,268 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for Gaussian process functions.""" +import os +import shutil + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from official.nlp.modeling.layers import gaussian_process + + +def exact_gaussian_kernel(x1, x2): + """Computes exact Gaussian kernel value(s) for tensors x1 and x2.""" + x1_squared = tf.reduce_sum(tf.square(x1), list(range(1, len(x1.shape)))) + x2_squared = tf.reduce_sum(tf.square(x2), list(range(1, len(x2.shape)))) + square = (x1_squared[:, tf.newaxis] + x2_squared[tf.newaxis, :] - + 2 * tf.matmul(x1, x2, transpose_b=True)) + return tf.math.exp(-square / 2.) + + +def _generate_normal_data(num_sample, num_dim, loc): + """Generates random data sampled from i.i.d. normal distribution.""" + return np.random.normal( + size=(num_sample, num_dim), loc=loc, scale=1. / np.sqrt(num_dim)) + + +def _generate_rbf_data(x_data, orthogonal=True): + """Generates high-dim data that is the eigen components of a RBF kernel.""" + k_rbf = exact_gaussian_kernel(x_data, x_data) + x_orth, x_diag, _ = np.linalg.svd(k_rbf) + if orthogonal: + return x_orth + return np.diag(np.sqrt(x_diag)).dot(x_orth.T) + + +def _make_minibatch_iterator(data_numpy, batch_size, num_epoch): + """Makes a tf.data.Dataset for given batch size and num epoches.""" + dataset = tf.data.Dataset.from_tensor_slices(data_numpy) + dataset = dataset.repeat(num_epoch).batch(batch_size) + return iter(dataset) + + +def _compute_posterior_kernel(x_tr, x_ts, kernel_func, ridge_penalty): + """Computes the posterior covariance matrix of a Gaussian process.""" + num_sample = x_tr.shape[0] + + k_tt_inv = tf.linalg.inv( + kernel_func(x_tr, x_tr) + ridge_penalty * np.eye(num_sample)) + k_ts = kernel_func(x_tr, x_ts) + k_ss = kernel_func(x_ts, x_ts) + + return k_ss - tf.matmul(k_ts, tf.matmul(k_tt_inv, k_ts), transpose_a=True) + + +class GaussianProcessTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(GaussianProcessTest, self).setUp() + self.num_data_dim = 10 + self.num_inducing = 1024 + self.num_train_sample = 1024 + self.num_test_sample = 256 + self.prec_tolerance = {'atol': 1e-3, 'rtol': 5e-2} + self.cov_tolerance = {'atol': 5e-2, 'rtol': 2.} + + self.rbf_kern_func = exact_gaussian_kernel + + self.x_tr = _generate_normal_data( + self.num_train_sample, self.num_data_dim, loc=0.) + self.x_ts = _generate_normal_data( + self.num_test_sample, self.num_data_dim, loc=1.) + + def test_layer_build(self): + """Tests if layer.built=True after building.""" + rfgp_model = gaussian_process.RandomFeatureGaussianProcess(units=1) + rfgp_model.build(input_shape=self.x_tr.shape) + + self.assertTrue(rfgp_model.built) + + @parameterized.named_parameters(('rbf_data', False), + ('orthogonal_data', True)) + def test_laplace_covariance_minibatch(self, generate_orthogonal_data): + """Tests if model correctly learns population-lvel precision matrix.""" + batch_size = 50 + epochs = 1000 + x_data = _generate_rbf_data(self.x_ts, generate_orthogonal_data) + data_iterator = _make_minibatch_iterator(x_data, batch_size, epochs) + + # Estimates precision matrix using minibatch. + cov_estimator = gaussian_process.LaplaceRandomFeatureCovariance( + momentum=0.999, ridge_penalty=0) + + for minibatch_data in data_iterator: + _ = cov_estimator(minibatch_data, training=True) + + # Evaluation + prec_mat_expected = x_data.T.dot(x_data) + prec_mat_computed = ( + cov_estimator.precision_matrix.numpy() * self.num_test_sample) + + np.testing.assert_allclose(prec_mat_computed, prec_mat_expected, + **self.prec_tolerance) + + def test_random_feature_prior_approximation(self): + """Tests random feature GP's ability in approximating exact GP prior.""" + num_inducing = 10240 + rfgp_model = gaussian_process.RandomFeatureGaussianProcess( + units=1, + num_inducing=num_inducing, + normalize_input=False, + gp_kernel_type='gaussian', + return_random_features=True) + + # Extract random features. + _, _, gp_feature = rfgp_model(self.x_tr, training=True) + gp_feature_np = gp_feature.numpy() + + prior_kernel_computed = gp_feature_np.dot(gp_feature_np.T) + prior_kernel_expected = self.rbf_kern_func(self.x_tr, self.x_tr) + np.testing.assert_allclose(prior_kernel_computed, prior_kernel_expected, + **self.cov_tolerance) + + def test_random_feature_posterior_approximation(self): + """Tests random feature GP's ability in approximating exact GP posterior.""" + # Set momentum = 0.5 so posterior precision matrix is 0.5 * (I + K). + gp_cov_momentum = 0.5 + gp_cov_ridge_penalty = 1. + num_inducing = 1024 + + rfgp_model = gaussian_process.RandomFeatureGaussianProcess( + units=1, + num_inducing=num_inducing, + normalize_input=False, + gp_kernel_type='gaussian', + gp_cov_momentum=gp_cov_momentum, + gp_cov_ridge_penalty=gp_cov_ridge_penalty) + + # Computes posterior covariance on test data. + _, _ = rfgp_model(self.x_tr, training=True) + _, gp_cov_ts = rfgp_model(self.x_ts, training=False) + + # Scale up covariance estimate since prec matrix is down-scaled by momentum. + post_kernel_computed = gp_cov_ts * gp_cov_momentum + post_kernel_expected = _compute_posterior_kernel(self.x_tr, self.x_ts, + self.rbf_kern_func, + gp_cov_ridge_penalty) + np.testing.assert_allclose(post_kernel_computed, post_kernel_expected, + **self.cov_tolerance) + + def test_random_feature_linear_kernel(self): + """Tests if linear kernel indeed leads to an identity mapping.""" + # Specify linear kernel + gp_kernel_type = 'linear' + normalize_input = False + scale_random_features = False + use_custom_random_features = True + + rfgp_model = gaussian_process.RandomFeatureGaussianProcess( + units=1, + normalize_input=normalize_input, + gp_kernel_type=gp_kernel_type, + scale_random_features=scale_random_features, + use_custom_random_features=use_custom_random_features, + return_random_features=True) + + _, _, gp_feature = rfgp_model(self.x_tr, training=True) + + # Check if linear kernel leads to identity mapping. + np.testing.assert_allclose(gp_feature, self.x_tr, **self.prec_tolerance) + + def test_no_matrix_update_during_test(self): + """Tests if the precision matrix is not updated during testing.""" + rfgp_model = gaussian_process.RandomFeatureGaussianProcess(units=1) + + # Training. + _, gp_covmat_null = rfgp_model(self.x_tr, training=True) + precision_mat_before_test = rfgp_model._gp_cov_layer.precision_matrix + + # Testing. + _ = rfgp_model(self.x_ts, training=False) + precision_mat_after_test = rfgp_model._gp_cov_layer.precision_matrix + + self.assertAllClose( + gp_covmat_null, tf.eye(self.num_train_sample), atol=1e-4) + self.assertAllClose( + precision_mat_before_test, precision_mat_after_test, atol=1e-4) + + def test_state_saving_and_loading(self): + """Tests if the loaded model returns same results.""" + input_data = np.random.random((1, 2)) + rfgp_model = gaussian_process.RandomFeatureGaussianProcess(units=1) + + inputs = tf.keras.Input((2,), batch_size=1) + outputs = rfgp_model(inputs) + model = tf.keras.Model(inputs, outputs) + gp_output, gp_covmat = model.predict(input_data) + + # Save and then load the model. + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + saved_model_dir = os.path.join(temp_dir, 'rfgp_model') + model.save(saved_model_dir) + new_model = tf.keras.models.load_model(saved_model_dir) + + gp_output_new, gp_covmat_new = new_model.predict(input_data) + self.assertAllClose(gp_output, gp_output_new, atol=1e-4) + self.assertAllClose(gp_covmat, gp_covmat_new, atol=1e-4) + + +class MeanFieldLogitsTest(tf.test.TestCase): + + def testMeanFieldLogitsLikelihood(self): + """Tests if scaling is correct under different likelihood.""" + batch_size = 10 + num_classes = 12 + variance = 1.5 + mean_field_factor = 2. + + rng = np.random.RandomState(0) + tf.random.set_seed(1) + logits = rng.randn(batch_size, num_classes) + covmat = tf.linalg.diag([variance] * batch_size) + + logits_logistic = gaussian_process.mean_field_logits( + logits, covmat, mean_field_factor=mean_field_factor) + + self.assertAllClose(logits_logistic, logits / 2., atol=1e-4) + + def testMeanFieldLogitsTemperatureScaling(self): + """Tests using mean_field_logits as temperature scaling method.""" + batch_size = 10 + num_classes = 12 + + rng = np.random.RandomState(0) + tf.random.set_seed(1) + logits = rng.randn(batch_size, num_classes) + + # Test if there's no change to logits when mean_field_factor < 0. + logits_no_change = gaussian_process.mean_field_logits( + logits, covariance_matrix=None, mean_field_factor=-1) + + # Test if mean_field_logits functions as a temperature scaling method when + # mean_field_factor > 0, with temperature = sqrt(1. + mean_field_factor). + logits_scale_by_two = gaussian_process.mean_field_logits( + logits, covariance_matrix=None, mean_field_factor=3.) + + self.assertAllClose(logits_no_change, logits, atol=1e-4) + self.assertAllClose(logits_scale_by_two, logits / 2., atol=1e-4) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/layers/masked_lm.py b/official/nlp/modeling/layers/masked_lm.py index 3b81556f4c7d82e79c9d9cda4894a26fde6a93f7..0da745b732d78ba888de2d7a3243f750a0a795a0 100644 --- a/official/nlp/modeling/layers/masked_lm.py +++ b/official/nlp/modeling/layers/masked_lm.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,114 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Masked language model network.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import tensorflow as tf - -from official.modeling import tf_utils - - -@tf.keras.utils.register_keras_serializable(package='Text') -class MaskedLM(tf.keras.layers.Layer): - """Masked language model network head for BERT modeling. - - This network implements a masked language model based on the provided network. - It assumes that the network being passed has a "get_embedding_table()" method. - - Arguments: - embedding_table: The embedding table of the targets. - activation: The activation, if any, for the dense layer. - initializer: The intializer for the dense layer. Defaults to a Glorot - uniform initializer. - output: The output style for this network. Can be either 'logits' or - 'predictions'. - """ - - def __init__(self, - embedding_table, - activation=None, - initializer='glorot_uniform', - output='logits', - name='cls/predictions', - **kwargs): - super(MaskedLM, self).__init__(name=name, **kwargs) - self.embedding_table = embedding_table - self.activation = activation - self.initializer = tf.keras.initializers.get(initializer) - - if output not in ('predictions', 'logits'): - raise ValueError( - ('Unknown `output` value "%s". `output` can be either "logits" or ' - '"predictions"') % output) - self._output_type = output - - def build(self, input_shape): - self._vocab_size, hidden_size = self.embedding_table.shape - self.dense = tf.keras.layers.Dense( - hidden_size, - activation=self.activation, - kernel_initializer=self.initializer, - name='transform/dense') - self.layer_norm = tf.keras.layers.LayerNormalization( - axis=-1, epsilon=1e-12, name='transform/LayerNorm') - self.bias = self.add_weight( - 'output_bias/bias', - shape=(self._vocab_size,), - initializer='zeros', - trainable=True) - - super(MaskedLM, self).build(input_shape) - - def call(self, sequence_data, masked_positions): - masked_lm_input = self._gather_indexes(sequence_data, masked_positions) - lm_data = self.dense(masked_lm_input) - lm_data = self.layer_norm(lm_data) - lm_data = tf.matmul(lm_data, self.embedding_table, transpose_b=True) - logits = tf.nn.bias_add(lm_data, self.bias) - - masked_positions_shape = tf_utils.get_shape_list( - masked_positions, name='masked_positions_tensor') - logits = tf.reshape(logits, - [-1, masked_positions_shape[1], self._vocab_size]) - if self._output_type == 'logits': - return logits - return tf.nn.log_softmax(logits) - - def get_config(self): - raise NotImplementedError('MaskedLM cannot be directly serialized because ' - 'it has variable sharing logic.') - - def _gather_indexes(self, sequence_tensor, positions): - """Gathers the vectors at the specific positions. - - Args: - sequence_tensor: Sequence output of `BertModel` layer of shape - (`batch_size`, `seq_length`, num_hidden) where num_hidden is number of - hidden units of `BertModel` layer. - positions: Positions ids of tokens in sequence to mask for pretraining - of with dimension (batch_size, num_predictions) where - `num_predictions` is maximum number of tokens to mask out and predict - per each sequence. - - Returns: - Masked out sequence tensor of shape (batch_size * num_predictions, - num_hidden). - """ - sequence_shape = tf_utils.get_shape_list( - sequence_tensor, name='sequence_output_tensor') - batch_size, seq_length, width = sequence_shape +from official.nlp import keras_nlp - flat_offsets = tf.reshape( - tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1]) - flat_positions = tf.reshape(positions + flat_offsets, [-1]) - flat_sequence_tensor = tf.reshape(sequence_tensor, - [batch_size * seq_length, width]) - output_tensor = tf.gather(flat_sequence_tensor, flat_positions) - return output_tensor +MaskedLM = keras_nlp.layers.MaskedLM diff --git a/official/nlp/modeling/layers/masked_lm_test.py b/official/nlp/modeling/layers/masked_lm_test.py index 12e28ec95ff49c95c2729efeae04382bad5c611f..53b3b4a22b2696a4e7e8b2566f0691418b8d8e0f 100644 --- a/official/nlp/modeling/layers/masked_lm_test.py +++ b/official/nlp/modeling/layers/masked_lm_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for masked language model network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for masked language model network.""" import numpy as np import tensorflow as tf @@ -24,7 +20,7 @@ import tensorflow as tf from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import from official.nlp.modeling.layers import masked_lm -from official.nlp.modeling.networks import transformer_encoder +from official.nlp.modeling.networks import bert_encoder # This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It @@ -34,25 +30,22 @@ class MaskedLMTest(keras_parameterized.TestCase): def create_layer(self, vocab_size, - sequence_length, hidden_size, output='predictions', xformer_stack=None): # First, create a transformer stack that we can use to get the LM's # vocabulary weight. if xformer_stack is None: - xformer_stack = transformer_encoder.TransformerEncoder( + xformer_stack = bert_encoder.BertEncoder( vocab_size=vocab_size, num_layers=1, - sequence_length=sequence_length, hidden_size=hidden_size, num_attention_heads=4, ) # Create a maskedLM from the transformer stack. test_layer = masked_lm.MaskedLM( - embedding_table=xformer_stack.get_embedding_table(), - output=output) + embedding_table=xformer_stack.get_embedding_table(), output=output) return test_layer def test_layer_creation(self): @@ -61,9 +54,7 @@ class MaskedLMTest(keras_parameterized.TestCase): hidden_size = 64 num_predictions = 21 test_layer = self.create_layer( - vocab_size=vocab_size, - sequence_length=sequence_length, - hidden_size=hidden_size) + vocab_size=vocab_size, hidden_size=hidden_size) # Make sure that the output tensor of the masked LM is the right shape. lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) @@ -78,22 +69,19 @@ class MaskedLMTest(keras_parameterized.TestCase): sequence_length = 32 hidden_size = 64 num_predictions = 21 - xformer_stack = transformer_encoder.TransformerEncoder( + xformer_stack = bert_encoder.BertEncoder( vocab_size=vocab_size, num_layers=1, - sequence_length=sequence_length, hidden_size=hidden_size, num_attention_heads=4, ) test_layer = self.create_layer( vocab_size=vocab_size, - sequence_length=sequence_length, hidden_size=hidden_size, xformer_stack=xformer_stack, output='predictions') logit_layer = self.create_layer( vocab_size=vocab_size, - sequence_length=sequence_length, hidden_size=hidden_size, xformer_stack=xformer_stack, output='logits') @@ -133,9 +121,7 @@ class MaskedLMTest(keras_parameterized.TestCase): hidden_size = 64 num_predictions = 21 test_layer = self.create_layer( - vocab_size=vocab_size, - sequence_length=sequence_length, - hidden_size=hidden_size) + vocab_size=vocab_size, hidden_size=hidden_size) # Create a model from the masked LM layer. lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) @@ -154,8 +140,7 @@ class MaskedLMTest(keras_parameterized.TestCase): def test_unknown_output_type_fails(self): with self.assertRaisesRegex(ValueError, 'Unknown `output` value "bad".*'): - _ = self.create_layer( - vocab_size=8, sequence_length=8, hidden_size=8, output='bad') + _ = self.create_layer(vocab_size=8, hidden_size=8, output='bad') if __name__ == '__main__': diff --git a/official/nlp/modeling/layers/masked_softmax.py b/official/nlp/modeling/layers/masked_softmax.py index 42a9e97a329e6c2892bb584f38375888a7fbdd2f..06b1994c7b8e5a6a8624130b2a7c6608b2332cf6 100644 --- a/official/nlp/modeling/layers/masked_softmax.py +++ b/official/nlp/modeling/layers/masked_softmax.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +11,35 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based softmax layer with optional masking.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import tensorflow as tf +def _large_compatible_negative(tensor_type): + """Large negative number as Tensor. + + This function is necessary because the standard value for epsilon + in this module (-1e9) cannot be represented using `tf.float16`. + + Args: + tensor_type: A dtype to determine the type. + + Returns: + A large negative number. + """ + if tensor_type == tf.float16: + return tf.float16.min + return -1e9 + + @tf.keras.utils.register_keras_serializable(package='Text') class MaskedSoftmax(tf.keras.layers.Layer): """Performs a softmax with optional masking on a tensor. - Arguments: + Args: mask_expansion_axes: Any axes that should be padded on the mask tensor. normalization_axes: On which axes the softmax should perform. """ @@ -50,9 +63,9 @@ class MaskedSoftmax(tf.keras.layers.Layer): # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for - # positions we want to attend and -10000.0 for masked positions. - adder = (1.0 - tf.cast(mask, scores.dtype)) * -10000.0 - + # positions we want to attend and -1.e9 for masked positions. + adder = (1.0 - tf.cast(mask, scores.dtype)) * _large_compatible_negative( + scores.dtype) # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. scores += adder diff --git a/official/nlp/modeling/layers/masked_softmax_test.py b/official/nlp/modeling/layers/masked_softmax_test.py index befe0f786a7b4d84a5dc975d1780acdd2c964a2c..802b6848211122c29fcbaef4e014f5094dd25939 100644 --- a/official/nlp/modeling/layers/masked_softmax_test.py +++ b/official/nlp/modeling/layers/masked_softmax_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based masked softmax layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Keras-based masked softmax layer.""" import numpy as np import tensorflow as tf diff --git a/official/nlp/modeling/layers/mat_mul_with_margin.py b/official/nlp/modeling/layers/mat_mul_with_margin.py new file mode 100644 index 0000000000000000000000000000000000000000..1fe3156caf35e1010f5838173373004add09b819 --- /dev/null +++ b/official/nlp/modeling/layers/mat_mul_with_margin.py @@ -0,0 +1,69 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dot product with margin layer.""" +# pylint: disable=g-classes-have-attributes + +from typing import Tuple +# Import libraries +import tensorflow as tf + +from official.modeling import tf_utils + + +@tf.keras.utils.register_keras_serializable(package='Text') +class MatMulWithMargin(tf.keras.layers.Layer): + """This layer computs a dot product matrix given two encoded inputs. + + Args: + logit_scale: The scaling factor of dot products when doing training. + logit_margin: The margin value between the positive and negative examples + when doing training. + """ + + def __init__(self, + logit_scale=1.0, + logit_margin=0.0, + **kwargs): + super(MatMulWithMargin, self).__init__(**kwargs) + self.logit_scale = logit_scale + self.logit_margin = logit_margin + + def call(self, left_encoded: tf.Tensor, + right_encoded: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: + batch_size = tf_utils.get_shape_list( + left_encoded, name='sequence_output_tensor')[0] + + # Left -> Right dot product. + left_dot_products = tf.matmul( + left_encoded, right_encoded, transpose_b=True) + + self.left_logits = self.logit_scale * ( + left_dot_products - self.logit_margin * tf.eye(batch_size)) + + # Right -> Left dot product. + self.right_logits = tf.transpose(self.left_logits) + + return (self.left_logits, self.right_logits) + + def get_config(self): + config = { + 'logit_scale': self.logit_scale, + 'logit_margin': self.logit_margin} + config.update(super(MatMulWithMargin, self).get_config()) + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/nlp/modeling/layers/mat_mul_with_margin_test.py b/official/nlp/modeling/layers/mat_mul_with_margin_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1ceea013caee4d060e245dcba5bec590c57937da --- /dev/null +++ b/official/nlp/modeling/layers/mat_mul_with_margin_test.py @@ -0,0 +1,52 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for mat_mul_with_margin layer.""" + +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling.layers import mat_mul_with_margin + + +class MatMulWithMarginTest(keras_parameterized.TestCase): + + def test_layer_invocation(self): + """Validate that the Keras object can be created and invoked.""" + input_width = 512 + test_layer = mat_mul_with_margin.MatMulWithMargin() + # Create a 2-dimensional input (the first dimension is implicit). + left_encoded = tf.keras.Input(shape=(input_width,), dtype=tf.float32) + right_encoded = tf.keras.Input(shape=(input_width,), dtype=tf.float32) + left_logits, right_logits = test_layer(left_encoded, right_encoded) + + # Validate that the outputs are of the expected shape. + expected_output_shape = [None, None] + self.assertEqual(expected_output_shape, left_logits.shape.as_list()) + self.assertEqual(expected_output_shape, right_logits.shape.as_list()) + + def test_serialize_deserialize(self): + # Create a layer object that sets all of its config options. + layer = mat_mul_with_margin.MatMulWithMargin() + + # Create another layer object from the first object's config. + new_layer = mat_mul_with_margin.MatMulWithMargin.from_config( + layer.get_config()) + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(layer.get_config(), new_layer.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/layers/mobile_bert_layers.py b/official/nlp/modeling/layers/mobile_bert_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..fa493d022e6b50efbbb643bb17235411d8ca1f1e --- /dev/null +++ b/official/nlp/modeling/layers/mobile_bert_layers.py @@ -0,0 +1,554 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""MobileBERT embedding and transformer layers.""" +import tensorflow as tf + +from official.nlp import keras_nlp + + +@tf.keras.utils.register_keras_serializable(package='Text') +class NoNorm(tf.keras.layers.Layer): + """Apply element-wise linear transformation to the last dimension.""" + + def __init__(self, name=None): + super(NoNorm, self).__init__(name=name) + + def build(self, shape): + kernal_size = shape[-1] + self.bias = self.add_weight('beta', + shape=[kernal_size], + initializer='zeros') + self.scale = self.add_weight('gamma', + shape=[kernal_size], + initializer='ones') + + def call(self, feature): + output = feature * self.scale + self.bias + return output + + +def _get_norm_layer(normalization_type='no_norm', name=None): + """Get normlization layer. + + Args: + normalization_type: String. The type of normalization_type, only + `no_norm` and `layer_norm` are supported. + name: Name for the norm layer. + + Returns: + layer norm class. + """ + if normalization_type == 'no_norm': + layer = NoNorm(name=name) + elif normalization_type == 'layer_norm': + layer = tf.keras.layers.LayerNormalization( + name=name, + axis=-1, + epsilon=1e-12, + dtype=tf.float32) + else: + raise NotImplementedError('Only "no_norm" and "layer_norm" and supported.') + return layer + + +@tf.keras.utils.register_keras_serializable(package='Text') +class MobileBertEmbedding(tf.keras.layers.Layer): + """Performs an embedding lookup for MobileBERT. + + This layer includes word embedding, token type embedding, position embedding. + """ + + def __init__(self, + word_vocab_size, + word_embed_size, + type_vocab_size, + output_embed_size, + max_sequence_length=512, + normalization_type='no_norm', + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + dropout_rate=0.1, + **kwargs): + """Class initialization. + + Args: + word_vocab_size: Number of words in the vocabulary. + word_embed_size: Word embedding size. + type_vocab_size: Number of word types. + output_embed_size: Embedding size for the final embedding output. + max_sequence_length: Maximum length of input sequence. + normalization_type: String. The type of normalization_type, only + `no_norm` and `layer_norm` are supported. + initializer: The initializer to use for the embedding weights and + linear projection weights. + dropout_rate: Dropout rate. + **kwargs: keyword arguments. + """ + super(MobileBertEmbedding, self).__init__(**kwargs) + self.word_vocab_size = word_vocab_size + self.word_embed_size = word_embed_size + self.type_vocab_size = type_vocab_size + self.output_embed_size = output_embed_size + self.max_sequence_length = max_sequence_length + self.normalization_type = normalization_type + self.initializer = tf.keras.initializers.get(initializer) + self.dropout_rate = dropout_rate + + self.word_embedding = keras_nlp.layers.OnDeviceEmbedding( + self.word_vocab_size, + self.word_embed_size, + initializer=initializer, + name='word_embedding') + self.type_embedding = keras_nlp.layers.OnDeviceEmbedding( + self.type_vocab_size, + self.output_embed_size, + initializer=initializer, + name='type_embedding') + self.pos_embedding = keras_nlp.layers.PositionEmbedding( + max_length=max_sequence_length, + initializer=initializer, + name='position_embedding') + self.word_embedding_proj = tf.keras.layers.experimental.EinsumDense( + 'abc,cd->abd', + output_shape=[None, self.output_embed_size], + kernel_initializer=initializer, + bias_axes='d', + name='embedding_projection') + self.layer_norm = _get_norm_layer(normalization_type, 'embedding_norm') + self.dropout_layer = tf.keras.layers.Dropout( + self.dropout_rate, + name='embedding_dropout') + + def get_config(self): + config = { + 'word_vocab_size': self.word_vocab_size, + 'word_embed_size': self.word_embed_size, + 'type_vocab_size': self.type_vocab_size, + 'output_embed_size': self.output_embed_size, + 'max_sequence_length': self.max_sequence_length, + 'normalization_type': self.normalization_type, + 'initializer': tf.keras.initializers.serialize(self.initializer), + 'dropout_rate': self.dropout_rate + } + base_config = super(MobileBertEmbedding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, input_ids, token_type_ids=None): + word_embedding_out = self.word_embedding(input_ids) + word_embedding_out = tf.concat( + [tf.pad(word_embedding_out[:, 1:], ((0, 0), (0, 1), (0, 0))), + word_embedding_out, + tf.pad(word_embedding_out[:, :-1], ((0, 0), (1, 0), (0, 0)))], + axis=2) + word_embedding_out = self.word_embedding_proj(word_embedding_out) + + pos_embedding_out = self.pos_embedding(word_embedding_out) + embedding_out = word_embedding_out + pos_embedding_out + if token_type_ids is not None: + type_embedding_out = self.type_embedding(token_type_ids) + embedding_out += type_embedding_out + embedding_out = self.layer_norm(embedding_out) + embedding_out = self.dropout_layer(embedding_out) + + return embedding_out + + +@tf.keras.utils.register_keras_serializable(package='Text') +class MobileBertTransformer(tf.keras.layers.Layer): + """Transformer block for MobileBERT. + + An implementation of one layer (block) of Transformer with bottleneck and + inverted-bottleneck for MobilerBERT. + + Original paper for MobileBERT: + https://arxiv.org/pdf/2004.02984.pdf + """ + + def __init__(self, + hidden_size=512, + num_attention_heads=4, + intermediate_size=512, + intermediate_act_fn='relu', + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + intra_bottleneck_size=128, + use_bottleneck_attention=False, + key_query_shared_bottleneck=True, + num_feedforward_networks=4, + normalization_type='no_norm', + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + **kwargs): + """Class initialization. + + Args: + hidden_size: Hidden size for the Transformer input and output tensor. + num_attention_heads: Number of attention heads in the Transformer. + intermediate_size: The size of the "intermediate" (a.k.a., feed + forward) layer. + intermediate_act_fn: The non-linear activation function to apply + to the output of the intermediate/feed-forward layer. + hidden_dropout_prob: Dropout probability for the hidden layers. + attention_probs_dropout_prob: Dropout probability of the attention + probabilities. + intra_bottleneck_size: Size of bottleneck. + use_bottleneck_attention: Use attention inputs from the bottleneck + transformation. If true, the following `key_query_shared_bottleneck` + will be ignored. + key_query_shared_bottleneck: Whether to share linear transformation for + keys and queries. + num_feedforward_networks: Number of stacked feed-forward networks. + normalization_type: The type of normalization_type, only `no_norm` and + `layer_norm` are supported. `no_norm` represents the element-wise + linear transformation for the student model, as suggested by the + original MobileBERT paper. `layer_norm` is used for the teacher model. + initializer: The initializer to use for the embedding weights and + linear projection weights. + **kwargs: keyword arguments. + + Raises: + ValueError: A Tensor shape or parameter is invalid. + """ + super(MobileBertTransformer, self).__init__(**kwargs) + self.hidden_size = hidden_size + self.num_attention_heads = num_attention_heads + self.intermediate_size = intermediate_size + self.intermediate_act_fn = intermediate_act_fn + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.intra_bottleneck_size = intra_bottleneck_size + self.use_bottleneck_attention = use_bottleneck_attention + self.key_query_shared_bottleneck = key_query_shared_bottleneck + self.num_feedforward_networks = num_feedforward_networks + self.normalization_type = normalization_type + self.initializer = tf.keras.initializers.get(initializer) + + if intra_bottleneck_size % num_attention_heads != 0: + raise ValueError( + (f'The bottleneck size {intra_bottleneck_size} is not a multiple ' + f'of the number of attention heads {num_attention_heads}.')) + attention_head_size = int(intra_bottleneck_size / num_attention_heads) + + self.block_layers = {} + # add input bottleneck + dense_layer_2d = tf.keras.layers.experimental.EinsumDense( + 'abc,cd->abd', + output_shape=[None, self.intra_bottleneck_size], + bias_axes='d', + kernel_initializer=initializer, + name='bottleneck_input/dense') + layer_norm = _get_norm_layer(self.normalization_type, + name='bottleneck_input/norm') + self.block_layers['bottleneck_input'] = [dense_layer_2d, + layer_norm] + + if self.key_query_shared_bottleneck: + dense_layer_2d = tf.keras.layers.experimental.EinsumDense( + 'abc,cd->abd', + output_shape=[None, self.intra_bottleneck_size], + bias_axes='d', + kernel_initializer=initializer, + name='kq_shared_bottleneck/dense') + layer_norm = _get_norm_layer(self.normalization_type, + name='kq_shared_bottleneck/norm') + self.block_layers['kq_shared_bottleneck'] = [dense_layer_2d, + layer_norm] + + # add attention layer + attention_layer = tf.keras.layers.MultiHeadAttention( + num_heads=self.num_attention_heads, + key_dim=attention_head_size, + value_dim=attention_head_size, + dropout=self.attention_probs_dropout_prob, + output_shape=self.intra_bottleneck_size, + kernel_initializer=initializer, + name='attention') + layer_norm = _get_norm_layer(self.normalization_type, + name='attention/norm') + self.block_layers['attention'] = [attention_layer, + layer_norm] + + # add stacked feed-forward networks + self.block_layers['ffn'] = [] + for ffn_layer_idx in range(self.num_feedforward_networks): + layer_prefix = f'ffn_layer_{ffn_layer_idx}' + layer_name = layer_prefix + '/intermediate_dense' + intermediate_layer = tf.keras.layers.experimental.EinsumDense( + 'abc,cd->abd', + activation=self.intermediate_act_fn, + output_shape=[None, self.intermediate_size], + bias_axes='d', + kernel_initializer=initializer, + name=layer_name) + layer_name = layer_prefix + '/output_dense' + output_layer = tf.keras.layers.experimental.EinsumDense( + 'abc,cd->abd', + output_shape=[None, self.intra_bottleneck_size], + bias_axes='d', + kernel_initializer=initializer, + name=layer_name) + layer_name = layer_prefix + '/norm' + layer_norm = _get_norm_layer(self.normalization_type, + name=layer_name) + self.block_layers['ffn'].append([intermediate_layer, + output_layer, + layer_norm]) + + # add output bottleneck + bottleneck = tf.keras.layers.experimental.EinsumDense( + 'abc,cd->abd', + output_shape=[None, self.hidden_size], + activation=None, + bias_axes='d', + kernel_initializer=initializer, + name='bottleneck_output/dense') + dropout_layer = tf.keras.layers.Dropout( + self.hidden_dropout_prob, + name='bottleneck_output/dropout') + layer_norm = _get_norm_layer(self.normalization_type, + name='bottleneck_output/norm') + self.block_layers['bottleneck_output'] = [bottleneck, + dropout_layer, + layer_norm] + + def get_config(self): + config = { + 'hidden_size': self.hidden_size, + 'num_attention_heads': self.num_attention_heads, + 'intermediate_size': self.intermediate_size, + 'intermediate_act_fn': self.intermediate_act_fn, + 'hidden_dropout_prob': self.hidden_dropout_prob, + 'attention_probs_dropout_prob': self.attention_probs_dropout_prob, + 'intra_bottleneck_size': self.intra_bottleneck_size, + 'use_bottleneck_attention': self.use_bottleneck_attention, + 'key_query_shared_bottleneck': self.key_query_shared_bottleneck, + 'num_feedforward_networks': self.num_feedforward_networks, + 'normalization_type': self.normalization_type, + 'initializer': tf.keras.initializers.serialize(self.initializer), + } + base_config = super(MobileBertTransformer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, + input_tensor, + attention_mask=None, + return_attention_scores=False): + """Implementes the forward pass. + + Args: + input_tensor: Float tensor of shape + `(batch_size, seq_length, hidden_size)`. + attention_mask: (optional) int32 tensor of shape + `(batch_size, seq_length, seq_length)`, with 1 for positions that can + be attended to and 0 in positions that should not be. + return_attention_scores: If return attention score. + + Returns: + layer_output: Float tensor of shape + `(batch_size, seq_length, hidden_size)`. + attention_scores (Optional): Only when return_attention_scores is True. + + Raises: + ValueError: A Tensor shape or parameter is invalid. + """ + input_width = input_tensor.shape.as_list()[-1] + if input_width != self.hidden_size: + raise ValueError( + (f'The width of the input tensor {input_width} != ' + f'hidden size {self.hidden_size}')) + + prev_output = input_tensor + # input bottleneck + dense_layer = self.block_layers['bottleneck_input'][0] + layer_norm = self.block_layers['bottleneck_input'][1] + layer_input = dense_layer(prev_output) + layer_input = layer_norm(layer_input) + + if self.use_bottleneck_attention: + key_tensor = layer_input + query_tensor = layer_input + value_tensor = layer_input + elif self.key_query_shared_bottleneck: + dense_layer = self.block_layers['kq_shared_bottleneck'][0] + layer_norm = self.block_layers['kq_shared_bottleneck'][1] + shared_attention_input = dense_layer(prev_output) + shared_attention_input = layer_norm(shared_attention_input) + key_tensor = shared_attention_input + query_tensor = shared_attention_input + value_tensor = prev_output + else: + key_tensor = prev_output + query_tensor = prev_output + value_tensor = prev_output + + # attention layer + attention_layer = self.block_layers['attention'][0] + layer_norm = self.block_layers['attention'][1] + attention_output, attention_scores = attention_layer( + query_tensor, + value_tensor, + key_tensor, + attention_mask, + return_attention_scores=True, + ) + attention_output = layer_norm(attention_output + layer_input) + + # stacked feed-forward networks + layer_input = attention_output + for ffn_idx in range(self.num_feedforward_networks): + intermediate_layer = self.block_layers['ffn'][ffn_idx][0] + output_layer = self.block_layers['ffn'][ffn_idx][1] + layer_norm = self.block_layers['ffn'][ffn_idx][2] + intermediate_output = intermediate_layer(layer_input) + layer_output = output_layer(intermediate_output) + layer_output = layer_norm(layer_output + layer_input) + layer_input = layer_output + + # output bottleneck + bottleneck = self.block_layers['bottleneck_output'][0] + dropout_layer = self.block_layers['bottleneck_output'][1] + layer_norm = self.block_layers['bottleneck_output'][2] + layer_output = bottleneck(layer_output) + layer_output = dropout_layer(layer_output) + layer_output = layer_norm(layer_output + prev_output) + + if return_attention_scores: + return layer_output, attention_scores + else: + return layer_output + + +@tf.keras.utils.register_keras_serializable(package='Text') +class MobileBertMaskedLM(tf.keras.layers.Layer): + """Masked language model network head for BERT modeling. + + This layer implements a masked language model based on the provided + transformer based encoder. It assumes that the encoder network being passed + has a "get_embedding_table()" method. Different from canonical BERT's masked + LM layer, when the embedding width is smaller than hidden_size, it adds an + extra output weights in shape [vocab_size, (hidden_size - embedding_width)]. + """ + + def __init__(self, + embedding_table, + activation=None, + initializer='glorot_uniform', + output='logits', + **kwargs): + """Class initialization. + + Args: + embedding_table: The embedding table from encoder network. + activation: The activation, if any, for the dense layer. + initializer: The initializer for the dense layer. Defaults to a Glorot + uniform initializer. + output: The output style for this layer. Can be either `logits` or + `predictions`. + **kwargs: keyword arguments. + """ + super(MobileBertMaskedLM, self).__init__(**kwargs) + self.embedding_table = embedding_table + self.activation = activation + self.initializer = tf.keras.initializers.get(initializer) + + if output not in ('predictions', 'logits'): + raise ValueError( + ('Unknown `output` value "%s". `output` can be either "logits" or ' + '"predictions"') % output) + self._output_type = output + + def build(self, input_shape): + self._vocab_size, embedding_width = self.embedding_table.shape + hidden_size = input_shape[-1] + self.dense = tf.keras.layers.Dense( + hidden_size, + activation=self.activation, + kernel_initializer=self.initializer, + name='transform/dense') + + if hidden_size > embedding_width: + self.extra_output_weights = self.add_weight( + 'extra_output_weights', + shape=(self._vocab_size, hidden_size - embedding_width), + initializer=self.initializer, + trainable=True) + elif hidden_size == embedding_width: + self.extra_output_weights = None + else: + raise ValueError( + 'hidden size %d cannot be smaller than embedding width %d.' % + (hidden_size, embedding_width)) + + self.layer_norm = tf.keras.layers.LayerNormalization( + axis=-1, epsilon=1e-12, name='transform/LayerNorm') + self.bias = self.add_weight( + 'output_bias/bias', + shape=(self._vocab_size,), + initializer='zeros', + trainable=True) + + super(MobileBertMaskedLM, self).build(input_shape) + + def call(self, sequence_data, masked_positions): + masked_lm_input = self._gather_indexes(sequence_data, masked_positions) + lm_data = self.dense(masked_lm_input) + lm_data = self.layer_norm(lm_data) + if self.extra_output_weights is None: + lm_data = tf.matmul(lm_data, self.embedding_table, transpose_b=True) + else: + lm_data = tf.matmul( + lm_data, + tf.concat([self.embedding_table, self.extra_output_weights], axis=1), + transpose_b=True) + + logits = tf.nn.bias_add(lm_data, self.bias) + masked_positions_length = masked_positions.shape.as_list()[1] or tf.shape( + masked_positions)[1] + logits = tf.reshape(logits, + [-1, masked_positions_length, self._vocab_size]) + if self._output_type == 'logits': + return logits + return tf.nn.log_softmax(logits) + + def get_config(self): + raise NotImplementedError('MaskedLM cannot be directly serialized because ' + 'it has variable sharing logic.') + + def _gather_indexes(self, sequence_tensor, positions): + """Gathers the vectors at the specific positions. + + Args: + sequence_tensor: Sequence output of `BertModel` layer of shape + `(batch_size, seq_length, num_hidden)` where `num_hidden` is number of + hidden units of `BertModel` layer. + positions: Positions ids of tokens in sequence to mask for pretraining + of with dimension `(batch_size, num_predictions)` where + `num_predictions` is maximum number of tokens to mask out and predict + per each sequence. + + Returns: + Masked out sequence tensor of shape + `(batch_size * num_predictions, num_hidden)`. + """ + sequence_shape = tf.shape(sequence_tensor) + batch_size, seq_length = sequence_shape[0], sequence_shape[1] + width = sequence_tensor.shape.as_list()[2] or sequence_shape[2] + + flat_offsets = tf.reshape( + tf.range(0, batch_size, dtype=tf.int32) * seq_length, [-1, 1]) + flat_positions = tf.reshape(positions + flat_offsets, [-1]) + flat_sequence_tensor = tf.reshape(sequence_tensor, + [batch_size * seq_length, width]) + output_tensor = tf.gather(flat_sequence_tensor, flat_positions) + + return output_tensor diff --git a/official/nlp/modeling/layers/mobile_bert_layers_test.py b/official/nlp/modeling/layers/mobile_bert_layers_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3edeec0539a1f8cf74e0063b50246f5fcbc764ae --- /dev/null +++ b/official/nlp/modeling/layers/mobile_bert_layers_test.py @@ -0,0 +1,273 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from official.nlp.modeling.layers import mobile_bert_layers +from official.nlp.modeling.networks import mobile_bert_encoder + + +def generate_fake_input(batch_size=1, seq_len=5, vocab_size=10000, seed=0): + """Generate consistent fake integer input sequences.""" + np.random.seed(seed) + fake_input = [] + for _ in range(batch_size): + fake_input.append([]) + for _ in range(seq_len): + fake_input[-1].append(np.random.randint(0, vocab_size)) + fake_input = np.asarray(fake_input) + return fake_input + + +class MobileBertEncoderTest(parameterized.TestCase, tf.test.TestCase): + + def test_embedding_layer_with_token_type(self): + layer = mobile_bert_layers.MobileBertEmbedding(10, 8, 2, 16) + input_seq = tf.Variable([[2, 3, 4, 5]]) + token_type = tf.Variable([[0, 1, 1, 1]]) + output = layer(input_seq, token_type) + output_shape = output.shape.as_list() + expected_shape = [1, 4, 16] + self.assertListEqual(output_shape, expected_shape, msg=None) + + def test_embedding_layer_without_token_type(self): + layer = mobile_bert_layers.MobileBertEmbedding(10, 8, 2, 16) + input_seq = tf.Variable([[2, 3, 4, 5]]) + output = layer(input_seq) + output_shape = output.shape.as_list() + expected_shape = [1, 4, 16] + self.assertListEqual(output_shape, expected_shape, msg=None) + + def test_embedding_layer_get_config(self): + layer = mobile_bert_layers.MobileBertEmbedding( + word_vocab_size=16, + word_embed_size=32, + type_vocab_size=4, + output_embed_size=32, + max_sequence_length=32, + normalization_type='layer_norm', + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01), + dropout_rate=0.5) + layer_config = layer.get_config() + new_layer = mobile_bert_layers.MobileBertEmbedding.from_config(layer_config) + self.assertEqual(layer_config, new_layer.get_config()) + + def test_no_norm(self): + layer = mobile_bert_layers.NoNorm() + feature = tf.random.normal([2, 3, 4]) + output = layer(feature) + output_shape = output.shape.as_list() + expected_shape = [2, 3, 4] + self.assertListEqual(output_shape, expected_shape, msg=None) + + @parameterized.named_parameters(('with_kq_shared_bottleneck', False), + ('without_kq_shared_bottleneck', True)) + def test_transfomer_kq_shared_bottleneck(self, is_kq_shared): + feature = tf.random.uniform([2, 3, 512]) + layer = mobile_bert_layers.MobileBertTransformer( + key_query_shared_bottleneck=is_kq_shared) + output = layer(feature) + output_shape = output.shape.as_list() + expected_shape = [2, 3, 512] + self.assertListEqual(output_shape, expected_shape, msg=None) + + def test_transfomer_with_mask(self): + feature = tf.random.uniform([2, 3, 512]) + input_mask = [[[0., 0., 1.], [0., 0., 1.], [0., 0., 1.]], + [[0., 1., 1.], [0., 1., 1.], [0., 1., 1.]]] + input_mask = np.asarray(input_mask) + layer = mobile_bert_layers.MobileBertTransformer() + output = layer(feature, input_mask) + output_shape = output.shape.as_list() + expected_shape = [2, 3, 512] + self.assertListEqual(output_shape, expected_shape, msg=None) + + def test_transfomer_return_attention_score(self): + sequence_length = 5 + num_attention_heads = 8 + feature = tf.random.uniform([2, sequence_length, 512]) + layer = mobile_bert_layers.MobileBertTransformer( + num_attention_heads=num_attention_heads) + _, attention_score = layer(feature, return_attention_scores=True) + expected_shape = [2, num_attention_heads, sequence_length, sequence_length] + self.assertListEqual( + attention_score.shape.as_list(), expected_shape, msg=None) + + def test_transformer_get_config(self): + layer = mobile_bert_layers.MobileBertTransformer( + hidden_size=32, + num_attention_heads=2, + intermediate_size=48, + intermediate_act_fn='gelu', + hidden_dropout_prob=0.5, + attention_probs_dropout_prob=0.4, + intra_bottleneck_size=64, + use_bottleneck_attention=True, + key_query_shared_bottleneck=False, + num_feedforward_networks=2, + normalization_type='layer_norm', + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.01), + name='block') + layer_config = layer.get_config() + new_layer = mobile_bert_layers.MobileBertTransformer.from_config( + layer_config) + self.assertEqual(layer_config, new_layer.get_config()) + + +class MobileBertMaskedLMTest(tf.test.TestCase): + + def create_layer(self, + vocab_size, + hidden_size, + embedding_width, + output='predictions', + xformer_stack=None): + # First, create a transformer stack that we can use to get the LM's + # vocabulary weight. + if xformer_stack is None: + xformer_stack = mobile_bert_encoder.MobileBERTEncoder( + word_vocab_size=vocab_size, + num_blocks=1, + hidden_size=hidden_size, + num_attention_heads=4, + word_embed_size=embedding_width) + + # Create a maskedLM from the transformer stack. + test_layer = mobile_bert_layers.MobileBertMaskedLM( + embedding_table=xformer_stack.get_embedding_table(), output=output) + return test_layer + + def test_layer_creation(self): + vocab_size = 100 + sequence_length = 32 + hidden_size = 64 + embedding_width = 32 + num_predictions = 21 + test_layer = self.create_layer( + vocab_size=vocab_size, + hidden_size=hidden_size, + embedding_width=embedding_width) + + # Make sure that the output tensor of the masked LM is the right shape. + lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) + masked_positions = tf.keras.Input(shape=(num_predictions,), dtype=tf.int32) + output = test_layer(lm_input_tensor, masked_positions=masked_positions) + + expected_output_shape = [None, num_predictions, vocab_size] + self.assertEqual(expected_output_shape, output.shape.as_list()) + + def test_layer_invocation_with_external_logits(self): + vocab_size = 100 + sequence_length = 32 + hidden_size = 64 + embedding_width = 32 + num_predictions = 21 + xformer_stack = mobile_bert_encoder.MobileBERTEncoder( + word_vocab_size=vocab_size, + num_blocks=1, + hidden_size=hidden_size, + num_attention_heads=4, + word_embed_size=embedding_width) + test_layer = self.create_layer( + vocab_size=vocab_size, + hidden_size=hidden_size, + embedding_width=embedding_width, + xformer_stack=xformer_stack, + output='predictions') + logit_layer = self.create_layer( + vocab_size=vocab_size, + hidden_size=hidden_size, + embedding_width=embedding_width, + xformer_stack=xformer_stack, + output='logits') + + # Create a model from the masked LM layer. + lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) + masked_positions = tf.keras.Input(shape=(num_predictions,), dtype=tf.int32) + output = test_layer(lm_input_tensor, masked_positions) + logit_output = logit_layer(lm_input_tensor, masked_positions) + logit_output = tf.keras.layers.Activation(tf.nn.log_softmax)(logit_output) + logit_layer.set_weights(test_layer.get_weights()) + model = tf.keras.Model([lm_input_tensor, masked_positions], output) + logits_model = tf.keras.Model(([lm_input_tensor, masked_positions]), + logit_output) + + # Invoke the masked LM on some fake data to make sure there are no runtime + # errors in the code. + batch_size = 3 + lm_input_data = 10 * np.random.random_sample( + (batch_size, sequence_length, hidden_size)) + masked_position_data = np.random.randint( + sequence_length, size=(batch_size, num_predictions)) + # ref_outputs = model.predict([lm_input_data, masked_position_data]) + # outputs = logits_model.predict([lm_input_data, masked_position_data]) + ref_outputs = model([lm_input_data, masked_position_data]) + outputs = logits_model([lm_input_data, masked_position_data]) + + # Ensure that the tensor shapes are correct. + expected_output_shape = (batch_size, num_predictions, vocab_size) + self.assertEqual(expected_output_shape, ref_outputs.shape) + self.assertEqual(expected_output_shape, outputs.shape) + self.assertAllClose(ref_outputs, outputs) + + def test_layer_invocation(self): + vocab_size = 100 + sequence_length = 32 + hidden_size = 64 + embedding_width = 32 + num_predictions = 21 + test_layer = self.create_layer( + vocab_size=vocab_size, + hidden_size=hidden_size, + embedding_width=embedding_width) + + # Create a model from the masked LM layer. + lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) + masked_positions = tf.keras.Input(shape=(num_predictions,), dtype=tf.int32) + output = test_layer(lm_input_tensor, masked_positions) + model = tf.keras.Model([lm_input_tensor, masked_positions], output) + + # Invoke the masked LM on some fake data to make sure there are no runtime + # errors in the code. + batch_size = 3 + lm_input_data = 10 * np.random.random_sample( + (batch_size, sequence_length, hidden_size)) + masked_position_data = np.random.randint( + 2, size=(batch_size, num_predictions)) + _ = model.predict([lm_input_data, masked_position_data]) + + def test_unknown_output_type_fails(self): + with self.assertRaisesRegex(ValueError, 'Unknown `output` value "bad".*'): + _ = self.create_layer( + vocab_size=8, hidden_size=8, embedding_width=4, output='bad') + + def test_hidden_size_smaller_than_embedding_width(self): + hidden_size = 8 + sequence_length = 32 + num_predictions = 20 + with self.assertRaisesRegex( + ValueError, 'hidden size 8 cannot be smaller than embedding width 16.'): + test_layer = self.create_layer( + vocab_size=8, hidden_size=8, embedding_width=16) + lm_input_tensor = tf.keras.Input(shape=(sequence_length, hidden_size)) + masked_positions = tf.keras.Input( + shape=(num_predictions,), dtype=tf.int32) + _ = test_layer(lm_input_tensor, masked_positions) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/layers/multi_channel_attention.py b/official/nlp/modeling/layers/multi_channel_attention.py index 16fb2f29f32e6d235a2f37a11e3afdf8dff5a0c2..936a2fe8e8b5f838506e202b0828c50eec38bc6e 100644 --- a/official/nlp/modeling/layers/multi_channel_attention.py +++ b/official/nlp/modeling/layers/multi_channel_attention.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,29 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Multi-channel Attention.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - import math import tensorflow as tf from official.modeling import tf_utils -from official.nlp.modeling.layers import attention from official.nlp.modeling.layers import masked_softmax class VotingAttention(tf.keras.layers.Layer): """Voting Attention layer. - Arguments: - num_heads: the number of attention heads. - head_size: per-head hidden size. + Args: + num_heads: The number of attention heads. + head_size: Per-head hidden size. kernel_initializer: Initializer for dense layer kernels. bias_initializer: Initializer for dense layer biases. kernel_regularizer: Regularizer for dense layer kernels. @@ -107,43 +100,61 @@ class VotingAttention(tf.keras.layers.Layer): return tf.nn.softmax(doc_attention_probs + infadder) -class MultiChannelAttention(attention.MultiHeadAttention): +class MultiChannelAttention(tf.keras.layers.MultiHeadAttention): """Multi-channel Attention layer. - Introduced in: https://arxiv.org/abs/2001.09386. Expects multiple - cross-attention target sequences. + Introduced in, [Generating Representative Headlines for News Stories + ](https://arxiv.org/abs/2001.09386). Expects multiple cross-attention + target sequences. + + Call args: + query: Query `Tensor` of shape `[B, T, dim]`. + value: Value `Tensor` of shape `[B, A, S, dim]`, where A denotes the + context_attention_weights: Context weights of shape `[B, N, T, A]`, where N + is the number of attention heads. Combines multi-channel sources + context tensors according to the distribution among channels. + key: Optional key `Tensor` of shape `[B, A, S, dim]`. If not given, will use + `value` for both `key` and `value`, which is the most common case. + attention_mask: A boolean mask of shape `[B, T, S]`, that prevents attention + to certain positions. """ - def _build_attention(self, qkv_rank): - super(MultiChannelAttention, self)._build_attention(qkv_rank) + def _build_attention(self, rank): + super(MultiChannelAttention, self)._build_attention(rank) self._masked_softmax = masked_softmax.MaskedSoftmax(mask_expansion_axes=[2]) - def call(self, inputs, attention_mask=None): - from_tensor = inputs[0] - to_tensor = inputs[1] - doc_attention_probs = inputs[2] + def call(self, + query, + value, + key=None, + context_attention_weights=None, + attention_mask=None): + if not self._built_from_signature: + self._build_from_signature(query, value, key=key) + if key is None: + key = value # Scalar dimensions referenced here: # B = batch size (number of stories) # A = num_docs (number of docs) - # F = `from_tensor` sequence length - # T = `to_tensor` sequence length + # F = target sequence length + # T = source sequence length # N = `num_attention_heads` # H = `size_per_head` # `query_tensor` = [B, F, N ,H] - query_tensor = self._query_dense(from_tensor) + query_tensor = self._query_dense(query) # `key_tensor` = [B, A, T, N, H] - key_tensor = self._key_dense(to_tensor) + key_tensor = self._key_dense(key) # `value_tensor` = [B, A, T, N, H] - value_tensor = self._value_dense(to_tensor) + value_tensor = self._value_dense(value) # Take the dot product between "query" and "key" to get the raw # attention scores. attention_scores = tf.einsum("BATNH,BFNH->BANFT", key_tensor, query_tensor) attention_scores = tf.multiply(attention_scores, - 1.0 / math.sqrt(float(self._key_size))) + 1.0 / math.sqrt(float(self._key_dim))) # Normalize the attention scores to probabilities. # `attention_probs` = [B, A, N, F, T] @@ -156,7 +167,7 @@ class MultiChannelAttention(attention.MultiHeadAttention): # `context_layer` = [B, F, N, H] context_layer = tf.einsum("BANFT,BATNH->BAFNH", attention_probs, value_tensor) - attention_output = tf.einsum("BNFA,BAFNH->BFNH", doc_attention_probs, + attention_output = tf.einsum("BNFA,BAFNH->BFNH", context_attention_weights, context_layer) attention_output = self._output_dense(attention_output) return attention_output diff --git a/official/nlp/modeling/layers/multi_channel_attention_test.py b/official/nlp/modeling/layers/multi_channel_attention_test.py index ab6e0e7fec48635d09e6e30c3ad247044ae9785f..0710ae5146e5689a37e623bf0cab1a2084a0075c 100644 --- a/official/nlp/modeling/layers/multi_channel_attention_test.py +++ b/official/nlp/modeling/layers/multi_channel_attention_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for nlp.nhnet.multi_channel_attention.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for nlp.nhnet.multi_channel_attention.""" import numpy as np import tensorflow as tf @@ -41,14 +36,18 @@ class MultiChannelAttentionTest(tf.test.TestCase): num_heads = 2 num_docs = 5 attention_layer = multi_channel_attention.MultiChannelAttention( - num_heads, key_size=2) + num_heads, key_dim=2) from_data = 10 * np.random.random_sample((3, 4, 8)) to_data = 10 * np.random.random_sample((3, num_docs, 2, 8)) mask_data = np.random.randint(2, size=(3, num_docs, 4, 2)) doc_probs = np.random.randint( 2, size=(3, num_heads, 4, num_docs)).astype(float) - outputs = attention_layer([from_data, to_data, doc_probs], mask_data) + outputs = attention_layer( + query=from_data, + value=to_data, + context_attention_weights=doc_probs, + attention_mask=mask_data) self.assertEqual(outputs.shape, (3, 4, 8)) diff --git a/official/nlp/modeling/layers/on_device_embedding.py b/official/nlp/modeling/layers/on_device_embedding.py index 739cdb7e4dde157ef52d7a98769a4c40819634a7..3a2c870ba9323da184df331190f0275b832e21ec 100644 --- a/official/nlp/modeling/layers/on_device_embedding.py +++ b/official/nlp/modeling/layers/on_device_embedding.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,78 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based one-hot embedding layer.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import tensorflow as tf - - -@tf.keras.utils.register_keras_serializable(package="Text") -class OnDeviceEmbedding(tf.keras.layers.Layer): - """Performs an embedding lookup suitable for accelerator devices. - - This layer uses either tf.gather or tf.one_hot to translate integer indices to - float embeddings. - - Arguments: - vocab_size: Number of elements in the vocabulary. - embedding_width: Output size of the embedding layer. - initializer: The initializer to use for the embedding weights. Defaults to - "glorot_uniform". - use_one_hot: Whether to use tf.one_hot over tf.gather for the embedding - lookup. Defaults to False (that is, using tf.gather). Setting this option - to True may improve performance, especially on small vocabulary sizes, but - will generally require more memory. - """ - - def __init__(self, - vocab_size, - embedding_width, - initializer="glorot_uniform", - use_one_hot=False, - **kwargs): - - super(OnDeviceEmbedding, self).__init__(**kwargs) - self._vocab_size = vocab_size - self._embedding_width = embedding_width - self._initializer = initializer - self._use_one_hot = use_one_hot - - def get_config(self): - config = { - "vocab_size": self._vocab_size, - "embedding_width": self._embedding_width, - "initializer": self._initializer, - "use_one_hot": self._use_one_hot, - } - base_config = super(OnDeviceEmbedding, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - def build(self, input_shape): - self.embeddings = self.add_weight( - "embeddings", - shape=[self._vocab_size, self._embedding_width], - initializer=self._initializer, - dtype=tf.float32) +from official.nlp import keras_nlp - super(OnDeviceEmbedding, self).build(input_shape) - def call(self, inputs): - flat_inputs = tf.reshape(inputs, [-1]) - if self._use_one_hot: - one_hot_data = tf.one_hot( - flat_inputs, depth=self._vocab_size, dtype=self.embeddings.dtype) - embeddings = tf.matmul(one_hot_data, self.embeddings) - else: - embeddings = tf.gather(self.embeddings, flat_inputs) - embeddings = tf.reshape( - embeddings, - # Work around b/142213824: prefer concat to shape over a Python list. - tf.concat([tf.shape(inputs), [self._embedding_width]], axis=0)) - embeddings.set_shape(inputs.shape.as_list() + [self._embedding_width]) - return embeddings +OnDeviceEmbedding = keras_nlp.layers.OnDeviceEmbedding diff --git a/official/nlp/modeling/layers/position_embedding.py b/official/nlp/modeling/layers/position_embedding.py index 169e54de112d9a3ce65e9fa68f066a107d35c7a4..5f362ddb3632bcc1a07cf0694ed3a4032f932e6d 100644 --- a/official/nlp/modeling/layers/position_embedding.py +++ b/official/nlp/modeling/layers/position_embedding.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,115 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based positional embedding layer.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - import math +from typing import Optional import tensorflow as tf from official.modeling import tf_utils - -@tf.keras.utils.register_keras_serializable(package="Text") -class PositionEmbedding(tf.keras.layers.Layer): - """Creates a positional embedding. - - This layer creates a positional embedding as described in "BERT: Pre-training - of Deep Bidirectional Transformers for Language Understanding" - (https://arxiv.org/abs/1810.04805). - - This layer can be set up to either create a statically shaped slice or a - dynamically shaped slice. If `use_dynamic_slicing` is True, the input tensor - can have a dynamic 1st dimension, while if `use_dynamic_slicing` is False the - input size must be fixed. - - Arguments: - use_dynamic_slicing: Whether to use the dynamic slicing path. - max_sequence_length: The maximum size of the dynamic sequence. Only - applicable if `use_dynamic_slicing` is True. - initializer: The initializer to use for the embedding weights. Defaults to - "glorot_uniform". - """ - - def __init__(self, - initializer="glorot_uniform", - use_dynamic_slicing=False, - max_sequence_length=None, - **kwargs): - # We need to have a default dtype of float32, since the inputs (which Keras - # usually uses to infer the dtype) will always be int32. - if "dtype" not in kwargs: - kwargs["dtype"] = "float32" - - super(PositionEmbedding, self).__init__(**kwargs) - if use_dynamic_slicing and max_sequence_length is None: - raise ValueError( - "If `use_dynamic_slicing` is True, `max_sequence_length` must be set." - ) - self._max_sequence_length = max_sequence_length - self._initializer = tf.keras.initializers.get(initializer) - self._use_dynamic_slicing = use_dynamic_slicing - - def get_config(self): - config = { - "max_sequence_length": self._max_sequence_length, - "initializer": tf.keras.initializers.serialize(self._initializer), - "use_dynamic_slicing": self._use_dynamic_slicing, - } - base_config = super(PositionEmbedding, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def build(self, input_shape): - """Implements build() for the layer.""" - dimension_list = input_shape.as_list() - - if len(dimension_list) != 3: - raise ValueError("PositionEmbedding expects a 3-dimensional input tensor " - "of shape [batch, sequence, width]") - seq_length = dimension_list[1] - width = dimension_list[2] - - # If we are not using dynamic slicing, we must assume that the sequence - # length is fixed and max_sequence_length should not be specified. - if not self._use_dynamic_slicing: - if seq_length is None: - raise ValueError( - "PositionEmbedding must have `use_dynamic_slicing` set " - "to True (and max_sequence_length set) when the " - "sequence (1st) dimension of the input is None.") - if self._max_sequence_length is not None: - raise ValueError( - "When `use_dynamic_slicing` is False, max_sequence_length should " - "not be specified and we ought to use seq_length to get the " - "variable shape.") - - if self._max_sequence_length is not None: - weight_sequence_length = self._max_sequence_length - else: - weight_sequence_length = seq_length - - self._position_embeddings = self.add_weight( - "embeddings", - shape=[weight_sequence_length, width], - initializer=self._initializer) - - super(PositionEmbedding, self).build(input_shape) - - def call(self, inputs): - """Implements call() for the layer.""" - input_shape = tf_utils.get_shape_list(inputs, expected_rank=3) - if self._use_dynamic_slicing: - position_embeddings = self._position_embeddings[:input_shape[1], :] - else: - position_embeddings = self._position_embeddings - - return tf.broadcast_to(position_embeddings, input_shape) +Initializer = tf.keras.initializers.Initializer @tf.keras.utils.register_keras_serializable(package="Text") @@ -131,16 +33,16 @@ class RelativePositionEmbedding(tf.keras.layers.Layer): "Attention is All You Need", section 3.5. (https://arxiv.org/abs/1706.03762). - Arguments: + Args: hidden_size: Size of the hidden layer. min_timescale: Minimum scale that will be applied at each position max_timescale: Maximum scale that will be applied at each position. """ def __init__(self, - hidden_size, - min_timescale=1.0, - max_timescale=1.0e4, + hidden_size: int, + min_timescale: float = 1.0, + max_timescale: float = 1.0e4, **kwargs): # We need to have a default dtype of float32, since the inputs (which Keras # usually uses to infer the dtype) will always be int32. @@ -150,7 +52,7 @@ class RelativePositionEmbedding(tf.keras.layers.Layer): if "dtype" not in kwargs: kwargs["dtype"] = "float32" - super(RelativePositionEmbedding, self).__init__(**kwargs) + super().__init__(**kwargs) self._hidden_size = hidden_size self._min_timescale = min_timescale self._max_timescale = max_timescale @@ -160,7 +62,6 @@ class RelativePositionEmbedding(tf.keras.layers.Layer): "hidden_size": self._hidden_size, "min_timescale": self._min_timescale, "max_timescale": self._max_timescale, - "length": self._length, } base_config = super(RelativePositionEmbedding, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -172,22 +73,20 @@ class RelativePositionEmbedding(tf.keras.layers.Layer): inputs: An tensor whose second dimension will be used as `length`. If `None`, the other `length` argument must be specified. length: An optional integer specifying the number of positions. If both - `inputs` and `length` are spcified, `length` must be equal to the - second dimension of `inputs`. + `inputs` and `length` are spcified, `length` must be equal to the second + dimension of `inputs`. Returns: - A tensor in shape of [length, hidden_size]. + A tensor in shape of `(length, hidden_size)`. """ if inputs is None and length is None: - raise ValueError( - "If inputs is None, `length` must be set in " - "RelativePositionEmbedding().") + raise ValueError("If inputs is None, `length` must be set in " + "RelativePositionEmbedding().") if inputs is not None: input_shape = tf_utils.get_shape_list(inputs) if length is not None and length != input_shape[1]: raise ValueError( - "If inputs is not None, `length` must equal to input_shape[1]." - ) + "If inputs is not None, `length` must equal to input_shape[1].") length = input_shape[1] position = tf.cast(tf.range(length), tf.float32) num_timescales = self._hidden_size // 2 @@ -198,8 +97,141 @@ class RelativePositionEmbedding(tf.keras.layers.Layer): inv_timescales = min_timescale * tf.exp( tf.cast(tf.range(num_timescales), tf.float32) * -log_timescale_increment) - scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, - 0) - position_embeddings = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], - axis=1) + scaled_time = tf.expand_dims(position, 1) * tf.expand_dims( + inv_timescales, 0) + position_embeddings = tf.concat( + [tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) return position_embeddings + + +def _relative_position_bucket(relative_position, + bidirectional=True, + num_buckets=32, + max_distance=128): + """Translate relative position to a bucket number for relative attention. + + The relative position is defined as memory_position - query_position, i.e. + the distance in tokens from the attending position to the attended-to + position. + + If `bidirectional=False`, then positive relative positions are invalid. + + We use smaller buckets for small absolute relative_position and larger + buckets for larger absolute relative_positions. + + All relative positions >=max_distance map to the same bucket. + + All relative positions <=-max_distance map to the same bucket. + + This should allow for more graceful generalization to longer sequences + than the model has been trained on. + + Args: + relative_position: An int32 Tensor + bidirectional: A boolean - whether the attention is bidirectional + num_buckets: An integer + max_distance: An integer + + Returns: + A Tensor with the same shape as relative_position, containing int32 + values in the range [0, num_buckets) + """ + ret = 0 + n = -relative_position + if bidirectional: + num_buckets //= 2 + ret += tf.cast(tf.math.less(n, 0), tf.int32) * num_buckets + n = tf.math.abs(n) + else: + n = tf.math.maximum(n, 0) + # now n is in the range [0, inf) + max_exact = num_buckets // 2 + is_small = tf.math.less(n, max_exact) + val_if_large = max_exact + tf.dtypes.cast( + tf.math.log(tf.cast(n, tf.float32) / max_exact) / + math.log(max_distance / max_exact) * (num_buckets - max_exact), + tf.int32, + ) + val_if_large = tf.math.minimum(val_if_large, num_buckets - 1) + ret += tf.where(is_small, n, val_if_large) + return ret + + +@tf.keras.utils.register_keras_serializable(package="Text") +class RelativePositionBias(tf.keras.layers.Layer): + """Relative position embedding via per-head bias in T5 style. + + Reference implementation in MeshTF: + https://github.com/tensorflow/mesh/blob/master/mesh_tensorflow/transformer/transformer_layers.py#L1000 + + This layer implements the relative position bias used in "Exploring the Limits + of Transfer Learning with a Unified Text-to-Text Transformer" + (https://arxiv.org/abs/1910.10683) + """ + + def __init__(self, + num_heads: int, + relative_attention_num_buckets: int = 32, + relative_attention_max_distance: int = 128, + bidirectional: bool = True, + embeddings_initializer: Optional[Initializer] = None, + **kwargs): + super().__init__(**kwargs) + self.num_heads = num_heads + self.relative_attention_num_buckets = relative_attention_num_buckets + self.bidirectional = bidirectional + self.relative_attention_max_distance = relative_attention_max_distance + if embeddings_initializer: + self._embed_init = embeddings_initializer + else: + self._embed_init = tf.keras.initializers.TruncatedNormal(stddev=1.0) + with tf.name_scope(self.name): + self._relative_attention_bias = self.add_weight( + "rel_embedding", + shape=[self.relative_attention_num_buckets, self.num_heads], + initializer=self._embed_init, + dtype=self.dtype, + trainable=True) + + def get_config(self): + config = { + "num_heads": + self.num_heads, + "relative_attention_num_buckets": + self.relative_attention_num_buckets, + "relative_attention_max_distance": + self.relative_attention_max_distance, + "bidirectional": + self.bidirectional, + "embeddings_initializer": + tf.keras.initializers.serialize(self._embed_init), + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, query: tf.Tensor, key: tf.Tensor): + """Implements the forward pass. + + Args: + query: query input tensor shape [batch, query length, hidden size]. + key: key input tensor shape [batch, key length, hidden size]. + + Returns: + A tensor in shape of [batch, heads, query length, key length]. + """ + batch_size, qlen = tf_utils.get_shape_list(query)[:2] + klen = tf_utils.get_shape_list(key)[1] + context_position = tf.range(qlen)[:, None] + memory_position = tf.range(klen)[None, :] + relative_position = memory_position - context_position + rp_bucket = _relative_position_bucket( + relative_position, + bidirectional=self.bidirectional, + num_buckets=self.relative_attention_num_buckets, + max_distance=self.relative_attention_max_distance) + values = tf.nn.embedding_lookup(self._relative_attention_bias, rp_bucket) + values = tf.expand_dims( + tf.transpose(values, [2, 0, 1]), + axis=0) # shape (1, num_heads, qlen, klen) + values = tf.tile(values, [batch_size, 1, 1, 1]) + return values diff --git a/official/nlp/modeling/layers/position_embedding_test.py b/official/nlp/modeling/layers/position_embedding_test.py index 89a29af7a4e8dc7c0369131f635d1c6abe74fdbc..4ad66835df2f00fd6df15aaab5770e3cfa6e6510 100644 --- a/official/nlp/modeling/layers/position_embedding_test.py +++ b/official/nlp/modeling/layers/position_embedding_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based positional embedding layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Keras-based positional embedding layer.""" +from absl.testing import parameterized import numpy as np import tensorflow as tf @@ -28,75 +25,7 @@ from official.nlp.modeling.layers import position_embedding # This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It # guarantees forward compatibility of this code for the V2 switchover. @keras_parameterized.run_all_keras_modes -class PositionEmbeddingLayerTest(keras_parameterized.TestCase): - - def test_static_layer_output_shape(self): - test_layer = position_embedding.PositionEmbedding() - # Create a 3-dimensional input (the first dimension is implicit). - sequence_length = 21 - width = 30 - input_tensor = tf.keras.Input(shape=(sequence_length, width)) - output_tensor = test_layer(input_tensor) - - # When using static positional embedding shapes, the output is expected - # to be the same as the input shape in all dimensions save batch. - expected_output_shape = [None, sequence_length, width] - self.assertEqual(expected_output_shape, output_tensor.shape.as_list()) - # The default output dtype for this layer should be tf.float32. - self.assertEqual(tf.float32, output_tensor.dtype) - - def test_float16_dtype(self): - test_layer = position_embedding.PositionEmbedding(dtype="float16") - # Create a 3-dimensional input (the first dimension is implicit). - sequence_length = 21 - width = 30 - input_tensor = tf.keras.Input(shape=(sequence_length, width)) - output_tensor = test_layer(input_tensor) - - # When using static positional embedding shapes, the output is expected - # to be the same as the input shape in all dimensions save batch. - expected_output_shape = [None, sequence_length, width] - self.assertEqual(expected_output_shape, output_tensor.shape.as_list()) - # The default output dtype for this layer should be tf.float32. - self.assertEqual(tf.float16, output_tensor.dtype) - - def test_dynamic_layer_output_shape(self): - max_sequence_length = 40 - test_layer = position_embedding.PositionEmbedding( - use_dynamic_slicing=True, max_sequence_length=max_sequence_length) - # Create a 3-dimensional input (the first dimension is implicit). - width = 30 - input_tensor = tf.keras.Input(shape=(None, width)) - output_tensor = test_layer(input_tensor) - - # When using dynamic positional embedding shapes, the output is expected - # to be the same as the input shape in all dimensions - but may be None if - # the input shape is None there. - expected_output_shape = [None, None, width] - self.assertEqual(expected_output_shape, output_tensor.shape.as_list()) - - def test_dynamic_layer_slicing(self): - max_sequence_length = 40 - test_layer = position_embedding.PositionEmbedding( - use_dynamic_slicing=True, max_sequence_length=max_sequence_length) - # Create a 3-dimensional input (the first dimension is implicit). - width = 30 - input_tensor = tf.keras.Input(shape=(None, width)) - output_tensor = test_layer(input_tensor) - - model = tf.keras.Model(input_tensor, output_tensor) - - # Create input data that is shorter than max_sequence_length, which should - # trigger a down-slice. - input_length = 17 - # Note: This test explicitly uses a batch size of 1. This is to get around - # Keras' restriction on Model invocations: inputs are expected to have the - # same batch cardinality as outputs. In practice, this layer should be used - # inside a model, where it can be projected when added to another tensor. - input_data = np.ones((1, input_length, width)) - output_data = model.predict(input_data) - - self.assertAllEqual([1, input_length, width], output_data.shape) +class RelativePositionEmbeddingLayerTest(keras_parameterized.TestCase): def test_relative_tensor_input(self): hidden_size = 8 @@ -127,5 +56,33 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase): expected_output_tensor = tf.constant([[0, 0, 0, 0, 1, 1, 1, 1]]) self.assertAllEqual(output_tensor, expected_output_tensor) + +@keras_parameterized.run_all_keras_modes +class RelativePositionBiasTest(keras_parameterized.TestCase): + + @parameterized.named_parameters(("bidirectional", True), + ("unidirectional", False)) + def test_relative_position_bias(self, bidirectional): + query = tf.zeros((4, 4, 2)) + key = tf.zeros((4, 2, 2)) + l = position_embedding.RelativePositionBias( + num_heads=3, + bidirectional=bidirectional, + name="foo") + self.assertEqual(l(query, key).shape, (4, 3, 4, 2)) + self.assertLen(l.trainable_variables, 1) + self.assertEqual(l.trainable_variables[0].name, "foo/rel_embedding:0") + + def test_relative_position_bucket(self): + context_position = tf.range(3)[:, None] + memory_position = tf.range(2)[None, :] + relative_position = memory_position - context_position + outputs = position_embedding._relative_position_bucket(relative_position) + self.assertAllEqual(outputs.numpy(), np.array([[0, 17], [1, 0], [2, 1]])) + outputs = position_embedding._relative_position_bucket( + relative_position, bidirectional=False) + self.assertAllEqual(outputs.numpy(), np.array([[0, 0], [1, 0], [2, 1]])) + + if __name__ == "__main__": tf.test.main() diff --git a/official/nlp/modeling/layers/relative_attention.py b/official/nlp/modeling/layers/relative_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..be18c9d1eb0bdedab8b7bd07964b5aefadcfbe61 --- /dev/null +++ b/official/nlp/modeling/layers/relative_attention.py @@ -0,0 +1,499 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-based relative attention layers.""" +import math +import string +import tensorflow as tf + +_CHR_IDX = string.ascii_lowercase + + +def _build_proj_equation(free_dims, bound_dims, output_dims): + """Builds an einsum equation for projections inside multi-head attention.""" + input_str = "" + kernel_str = "" + output_str = "" + bias_axes = "" + letter_offset = 0 + for i in range(free_dims): + char = _CHR_IDX[i + letter_offset] + input_str += char + output_str += char + + letter_offset += free_dims + for i in range(bound_dims): + char = _CHR_IDX[i + letter_offset] + input_str += char + kernel_str += char + + letter_offset += bound_dims + for i in range(output_dims): + char = _CHR_IDX[i + letter_offset] + kernel_str += char + output_str += char + bias_axes += char + equation = "%s,%s->%s" % (input_str, kernel_str, output_str) + + return equation, bias_axes, len(output_str) + + +def _get_output_shape(output_rank, known_last_dims): + return [None] * (output_rank - len(known_last_dims)) + list(known_last_dims) + + +def _rel_shift(x, klen=-1): + """Performs relative shift to form the relative attention score.""" + + x = tf.transpose(x, perm=[2, 3, 0, 1]) + x_size = tf.shape(x) + + x = tf.reshape(x, [x_size[1], x_size[0], x_size[2], x_size[3]]) + x = tf.slice(x, [1, 0, 0, 0], [-1, -1, -1, -1]) + x = tf.reshape(x, [x_size[0], x_size[1] - 1, x_size[2], x_size[3]]) + x = tf.slice(x, [0, 0, 0, 0], [-1, klen, -1, -1]) + + x = tf.transpose(x, perm=[2, 3, 0, 1]) + + return x + + +@tf.keras.utils.register_keras_serializable(package="Text") +class MultiHeadRelativeAttention(tf.keras.layers.MultiHeadAttention): + """A multi-head attention layer with relative attention + position encoding. + + This layer shares the same input/output projections as the common + `tf.keras.layers.MultiHeadAttention` layer. + + When it calculates attention logits, position encoding is projected to form + relative keys. The logits are composed by shifted relative logits and content + logits. + + **Note: This layer is currently experimental. + + Attributes: + kernel_initializer: The kernel initializer. Defaults to variance_scaling. + + Call args: + query: Query `Tensor` of shape `[B, T, dim]`. + value: Value `Tensor` of shape `[B, S, dim]`. + content_attention_bias: Bias `Tensor` for content based attention of shape + `[num_heads, dim]`. + positional_attention_bias: Bias `Tensor` for position based attention of + shape `[num_heads, dim]`. + key: Optional key `Tensor` of shape `[B, S, dim]`. If not given, will use + `value` for both `key` and `value`, which is the most common case. + relative_position_encoding: Relative positional encoding `Tensor` of shape + `[B, L, dim]`. + segment_matrix: Optional `Tensor` representing segmentation IDs used in + XLNet of shape `[B, S, S + M]`. + segment_encoding: Optional `Tensor` representing the segmentation + encoding as used in XLNet of shape `[2, num_heads, dim]`. + segment_attention_bias: Optional trainable bias parameter added to the + query had when calculating the segment-based attention score used in + XLNet of shape `[num_heads, dim]`. + state: Optional `Tensor` of shape `[B, M, E]` where M is the length of the + state or memory. + If passed, this is also attended over as in Transformer XL. + attention_mask: A boolean mask of shape `[B, T, S]` that prevents attention + to certain positions. + """ + + def __init__(self, + kernel_initializer="variance_scaling", + **kwargs): + super().__init__(kernel_initializer=kernel_initializer, + **kwargs) + + def _build_from_signature(self, query, value, key=None): + super(MultiHeadRelativeAttention, self)._build_from_signature( + query=query, + value=value, + key=key) + if hasattr(value, "shape"): + value_shape = tf.TensorShape(value.shape) + else: + value_shape = value + if key is None: + key_shape = value_shape + elif hasattr(key, "shape"): + key_shape = tf.TensorShape(key.shape) + else: + key_shape = key + + common_kwargs = dict( + kernel_initializer=self._kernel_initializer, + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activity_regularizer=self._activity_regularizer, + kernel_constraint=self._kernel_constraint, + bias_constraint=self._bias_constraint) + + with tf.init_scope(): + einsum_equation, _, output_rank = _build_proj_equation( + key_shape.rank - 1, bound_dims=1, output_dims=2) + self._encoding_dense = tf.keras.layers.experimental.EinsumDense( + einsum_equation, + output_shape=_get_output_shape(output_rank - 1, + [self._num_heads, self._key_dim]), + bias_axes=None, + name="encoding", + **common_kwargs) + + def compute_attention(self, + query, + key, + value, + position, + content_attention_bias, + positional_attention_bias, + segment_matrix=None, + segment_encoding=None, + segment_attention_bias=None, + attention_mask=None): + """Computes the attention. + + This function defines the computation inside `call` with projected + multihead Q, K, V, R inputs. + + Args: + query: Projected query `Tensor` of shape `[B, T, N, key_dim]`. + key: Projected key `Tensor` of shape `[B, S + M, N, key_dim]`. + value: Projected value `Tensor` of shape `[B, S + M, N, key_dim]`. + position: Projected position `Tensor` of shape `[B, L, N, key_dim]`. + content_attention_bias: Trainable bias parameter added to the query head + when calculating the content-based attention score. + positional_attention_bias: Trainable bias parameter added to the query + head when calculating the position-based attention score. + segment_matrix: Optional `Tensor` representing segmentation IDs used in + XLNet. + segment_encoding: Optional trainable `Tensor` representing the + segmentation encoding as used in XLNet. + segment_attention_bias: Optional trainable bias parameter added to the + query had when calculating the segment-based attention score used in + XLNet. + attention_mask: (default None) Optional mask that is added to attention + logits. If state is not None, the mask source sequence dimension should + extend M. + + Returns: + attention_output: Multi-headed output of attention computation of shape + `[B, S, N, key_dim]`. + + """ + content_attention = tf.einsum(self._dot_product_equation, + key, + query + content_attention_bias) + positional_attention = tf.einsum(self._dot_product_equation, + position, + query + positional_attention_bias) + positional_attention = _rel_shift( + positional_attention, klen=tf.shape(content_attention)[3]) + + if segment_matrix is not None: + segment_attention = tf.einsum("bind,snd->bnis", + query + segment_attention_bias, + segment_encoding) + target_shape = tf.shape(positional_attention) + segment_attention = tf.where( + tf.broadcast_to(tf.expand_dims(segment_matrix, 1), target_shape), + tf.broadcast_to(segment_attention[:, :, :, 1:], target_shape), + tf.broadcast_to(segment_attention[:, :, :, :1], target_shape)) + attention_sum = ( + content_attention + positional_attention + segment_attention) + else: + attention_sum = content_attention + positional_attention + + attention_scores = tf.multiply( + attention_sum, 1.0 / math.sqrt(float(self._key_dim))) + + attention_scores = self._masked_softmax(attention_scores, attention_mask) + + attention_output = self._dropout_layer(attention_scores) + + attention_output = tf.einsum(self._combine_equation, + attention_output, + value) + return attention_output + + def call(self, + query, + value, + content_attention_bias, + positional_attention_bias, + key=None, + relative_position_encoding=None, + segment_matrix=None, + segment_encoding=None, + segment_attention_bias=None, + state=None, + attention_mask=None): + """Compute multi-head relative attention over inputs. + + Size glossary: + * Number of heads (H): the number of attention heads. + * Value size (V): the size of each value embedding per head. + * Key size (K): the size of each key embedding per head. Equally, the size + of each query embedding per head. Typically K <= V. + * Batch dimensions (B). + * Query (target) attention axes shape (T). + * Value (source) attention axes shape (S), the rank must match the target. + * Encoding length (L): The relative positional encoding length. + + Args: + query: attention input. + value: attention input. + content_attention_bias: A trainable bias parameter added to the query + head when calculating the content-based attention score. + positional_attention_bias: A trainable bias parameter added to the query + head when calculating the position-based attention score. + key: attention input. + relative_position_encoding: relative positional encoding for key and + value. + segment_matrix: Optional `Tensor` representing segmentation IDs used in + XLNet. + segment_encoding: Optional `Tensor` representing the segmentation + encoding as used in XLNet. + segment_attention_bias: Optional trainable bias parameter added to the + query had when calculating the segment-based attention score used in + XLNet. + state: (default None) optional state. If passed, this is also attended + over as in TransformerXL. + attention_mask: (default None) Optional mask that is added to attention + logits. If state is not None, the mask source sequence dimension should + extend M. + + Returns: + attention_output: The result of the computation, of shape [B, T, E], + where `T` is for target sequence shapes and `E` is the query input last + dimension if `output_shape` is `None`. Otherwise, the multi-head outputs + are projected to the shape specified by `output_shape`. + """ + if not self._built_from_signature: + self._build_from_signature(query, value, key=key) + if key is None: + key = value + if state is not None and state.shape.ndims > 1: + value = tf.concat([state, value], 1) + key = tf.concat([state, key], 1) + + # `query` = [B, T, N ,H] + query = self._query_dense(query) + + # `key` = [B, S + M, N, H] + key = self._key_dense(key) + + # `value` = [B, S + M, N, H] + value = self._value_dense(value) + + # `position` = [B, L, N, H] + position = self._encoding_dense(relative_position_encoding) + + attention_output = self.compute_attention( + query=query, + key=key, + value=value, + position=position, + content_attention_bias=content_attention_bias, + positional_attention_bias=positional_attention_bias, + segment_matrix=segment_matrix, + segment_encoding=segment_encoding, + segment_attention_bias=segment_attention_bias, + attention_mask=attention_mask) + + # `attention_output` = [B, S, N, H] + attention_output = self._output_dense(attention_output) + + return attention_output + + +@tf.keras.utils.register_keras_serializable(package="Text") +class TwoStreamRelativeAttention(MultiHeadRelativeAttention): + """Two-stream relative self-attention for XLNet. + + In XLNet, each token has two associated vectors at each self-attention layer, + the content stream (h) and the query stream (g). + + The content stream is the self-attention stream as in Transformer XL and + represents the context and content (the token itself). + + The query stream only has access to contextual information and the position, + but not the content. + + This layer shares the same build signature as + `tf.keras.layers.MultiHeadAttention` but has different input/output + projections. + + **Note: This layer is currently experimental. + + Call args: + content_stream: `Tensor` of shape `[B, T, dim]`. + content_attention_bias: Bias `Tensor` for content based attention of shape + `[num_heads, dim]`. + positional_attention_bias: Bias `Tensor` for position based attention of + shape `[num_heads, dim]`. + query_stream: `Tensor` of shape `[B, P, dim]`. + target_mapping: `Tensor` of shape `[B, P, S]`. + relative_position_encoding: Relative positional encoding `Tensor` of shape + `[B, L, dim]`. + segment_matrix: Optional `Tensor` representing segmentation IDs used in + XLNet of shape `[B, S, S + M]`. + segment_encoding: Optional `Tensor` representing the segmentation + encoding as used in XLNet of shape `[2, num_heads, dim]`. + segment_attention_bias: Optional trainable bias parameter added to the + query had when calculating the segment-based attention score used in + XLNet of shape `[num_heads, dim]`. + state: Optional `Tensor` of shape [B, M, E] where M is the length of the + state or memory. + If passed, this is also attended over as in Transformer XL. + content_attention_mask: a boolean mask of shape `[B, T, S]` that + prevents attention to certain positions for content attention computation. + query_attention_mask: a boolean mask of shape `[B, T, S]` that + prevents attention to certain position for query attention computation. + """ + + def call(self, + content_stream, + content_attention_bias, + positional_attention_bias, + query_stream, + relative_position_encoding, + target_mapping=None, + segment_matrix=None, + segment_encoding=None, + segment_attention_bias=None, + state=None, + content_attention_mask=None, + query_attention_mask=None): + """Compute multi-head relative attention over inputs. + + Size glossary: + * Number of heads (H): the number of attention heads. + * Value size (V): the size of each value embedding per head. + * Key size (K): the size of each key embedding per head. Equally, the size + of each query embedding per head. Typically K <= V. + * Number of predictions (P): the number of predictions. + * Batch dimensions (B). + * Query (target) attention axes shape (T). + * Value (source) attention axes shape (S), the rank must match the target. + * Encoding length (L): The relative positional encoding length. + + Args: + content_stream: The content representation, commonly referred to as h. + This serves a similar role to the standard hidden states in + Transformer-XL. + content_attention_bias: A trainable bias parameter added to the query + head when calculating the content-based attention score. + positional_attention_bias: A trainable bias parameter added to the query + head when calculating the position-based attention score. + query_stream: The query representation, commonly referred to as g. + This only has access to contextual information and position, but not + content. If not provided, then this is MultiHeadRelativeAttention with + self-attention. + relative_position_encoding: relative positional encoding for key and + value. + target_mapping: Optional `Tensor` representing the target mapping used + in partial prediction. + segment_matrix: Optional `Tensor` representing segmentation IDs used in + XLNet. + segment_encoding: Optional `Tensor` representing the segmentation + encoding as used in XLNet. + segment_attention_bias: Optional trainable bias parameter added to the + query head when calculating the segment-based attention score. + state: (default None) optional state. If passed, this is also attended + over as in TransformerXL and XLNet. + content_attention_mask: (default None) Optional mask that is added to + content attention logits. If state is not None, the mask source sequence + dimension should extend M. + query_attention_mask: (default None) Optional mask that is added to + query attention logits. If state is not None, the mask source sequence + dimension should extend M. + + Returns: + content_attention_output, query_attention_output: the results of the + computation, both of shape [B, T, E]. `T` is for target sequence shapes, + `E` is the query input last dimension if `output_shape` is `None`. + Otherwise, the multi-head outputs are projected to the shape specified + by `output_shape`. + """ + if not self._built_from_signature: + self._build_from_signature(content_stream, content_stream, content_stream) + if state is not None and state.shape.ndims > 1: + content_and_memory_stream = tf.concat([state, content_stream], 1) + else: + content_and_memory_stream = content_stream + + # `query` = [B, T, N, H] + query = self._query_dense(content_stream) + + # `key` = [B, S + M, N, H] + key = self._key_dense(content_and_memory_stream) + + # `value` = [B, S + M, N, H] + value = self._value_dense(content_and_memory_stream) + + # `position` = [B, L, N, H] + position = self._encoding_dense(relative_position_encoding) + + content_attention_output = self.compute_attention( + query=query, + key=key, + value=value, + position=position, + content_attention_bias=content_attention_bias, + positional_attention_bias=positional_attention_bias, + segment_matrix=segment_matrix, + segment_encoding=segment_encoding, + segment_attention_bias=segment_attention_bias, + attention_mask=content_attention_mask) + + # `content_attention_output` = [B, S, N, H] + content_attention_output = self._output_dense(content_attention_output) + + query_attention_output = None + if query_stream is not None: + query = self._query_dense(query_stream) + if target_mapping is not None: + query = tf.einsum("bmnd,bml->blnd", query, target_mapping) + query_attention_output = self.compute_attention( + query=query, + key=key, + value=value, + position=position, + content_attention_bias=content_attention_bias, + positional_attention_bias=positional_attention_bias, + segment_matrix=segment_matrix, + segment_encoding=segment_encoding, + segment_attention_bias=segment_attention_bias, + attention_mask=query_attention_mask) + query_attention_output = tf.einsum("blnd,bml->bmnd", + query_attention_output, + target_mapping) + else: + query_attention_output = self.compute_attention( + query=query, + key=key, + value=value, + position=position, + content_attention_bias=content_attention_bias, + positional_attention_bias=positional_attention_bias, + segment_matrix=segment_matrix, + segment_encoding=segment_encoding, + segment_attention_bias=segment_attention_bias, + attention_mask=query_attention_mask) + query_attention_output = self._output_dense(query_attention_output) + + return content_attention_output, query_attention_output + diff --git a/official/nlp/modeling/layers/relative_attention_test.py b/official/nlp/modeling/layers/relative_attention_test.py new file mode 100644 index 0000000000000000000000000000000000000000..b092bc6740c187e482ae6ebe4917f81ed67c40c3 --- /dev/null +++ b/official/nlp/modeling/layers/relative_attention_test.py @@ -0,0 +1,191 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the attention layer.""" + +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling.layers import relative_attention + + +def _create_mock_attention_data( + num_heads, + key_dim, + value_dim, + seq_length, + batch_size, + memory_length=0, + num_predictions=2, + two_stream=False, + include_state=False, + include_mask=False, + include_segment=False): + """Creates mock testing data. + + Args: + num_heads: `int`, Number of attention heads. + key_dim: `int`, Size of query head. + value_dim: `int`, Size of key, value dim. + seq_length: `int`, Sequence length of the input. + batch_size: `int`, the batch size. + memory_length: optional `int`, the length of the state. Defaults to 0. + num_predictions: `int`, the number of predictions used in two stream + attention. + two_stream: `bool`, whether or not to generate two stream data. + include_state: optional `bool`, whether or not to include state data. + include_mask: optional `bool`, whether or not to include mask data. + include_segment: optional `bool`, whether or not to include segment data. + + Returns: + A dictionary with `str` as keys and `Tensor` as values. + """ + query_shape = (batch_size, seq_length, key_dim) + value_shape = (batch_size, seq_length, value_dim) + encoding_shape = (batch_size, seq_length * 2, key_dim) + attention_bias_shape = (num_heads, key_dim) + + data = dict( + relative_position_encoding=tf.random.normal(shape=encoding_shape), + content_attention_bias=tf.random.normal(shape=attention_bias_shape), + positional_attention_bias=tf.random.normal(shape=attention_bias_shape)) + + if two_stream: + query_stream_shape = (batch_size, num_predictions, key_dim) + target_mapping_shape = (batch_size, num_predictions, seq_length) + stream_data = dict( + content_stream=tf.random.normal(shape=query_shape), + query_stream=tf.random.normal(shape=query_stream_shape), + target_mapping=tf.random.normal(shape=target_mapping_shape)) + else: + stream_data = dict( + query=tf.random.normal(shape=query_shape), + value=tf.random.normal(shape=value_shape), + key=tf.random.normal(shape=value_shape)) + + data.update(stream_data) + + if include_state: + total_seq_length = seq_length + memory_length + state_data = dict( + state=tf.random.normal(shape=(batch_size, memory_length, value_dim))) + data.update(state_data) + else: + total_seq_length = seq_length + + if include_mask: + mask_shape = (batch_size, num_heads, seq_length, total_seq_length) + mask_data = np.random.randint(2, size=mask_shape).astype("float32") + if two_stream: + mask_data = dict( + content_attention_mask=mask_data, + query_attention_mask=mask_data) + else: + mask_data = dict(attention_mask=mask_data) + data.update(mask_data) + + if include_segment: + segment_encoding_shape = (2, num_heads, key_dim) + segment_matrix = np.random.randint( + 2, size=(batch_size, seq_length, total_seq_length)) + segment_matrix = tf.math.equal(segment_matrix, 1) + segment_data = dict( + segment_attention_bias=tf.random.normal(shape=attention_bias_shape), + segment_encoding=tf.random.normal(shape=segment_encoding_shape), + segment_matrix=segment_matrix) + data.update(segment_data) + + return data + + +@keras_parameterized.run_all_keras_modes +class MultiHeadRelativeAttentionTest(keras_parameterized.TestCase): + + @combinations.generate(combinations.combine( + value_dim=[32, 64], + memory_length=[0, 4], + state=[True, False], + mask=[True, False], + segment=[True, False])) + def test_attention_scores(self, + value_dim, + memory_length, + state, + mask, + segment): + """Tests combinations of attention score calculations.""" + batch_size, num_heads, key_dim, seq_length = 2, 12, 64, 8 + test_layer = relative_attention.MultiHeadRelativeAttention( + num_heads=num_heads, + key_dim=key_dim, + value_dim=value_dim) + data = _create_mock_attention_data( + num_heads=num_heads, + key_dim=key_dim, + value_dim=value_dim, + seq_length=seq_length, + memory_length=memory_length, + two_stream=False, + batch_size=batch_size, + include_state=state, + include_mask=mask, + include_segment=segment) + output = test_layer(**data) + self.assertEqual(output.shape, [batch_size, seq_length, key_dim]) + + +@keras_parameterized.run_all_keras_modes +class TwoStreamRelativeAttentionTest(keras_parameterized.TestCase): + + @combinations.generate(combinations.combine( + num_predictions=[2, 10], + memory_length=[0, 4], + state=[True, False], + mask=[True, False], + segment=[True, False])) + def test_attention_scores(self, + num_predictions, + memory_length, + state, + mask, + segment): + """Tests combinations of attention score calculations.""" + batch_size, num_heads, key_dim, seq_length = 2, 12, 64, 8 + test_layer = relative_attention.TwoStreamRelativeAttention( + num_heads=num_heads, + key_dim=key_dim, + value_dim=key_dim) + data = _create_mock_attention_data( + num_heads=num_heads, + key_dim=key_dim, + value_dim=key_dim, + seq_length=seq_length, + memory_length=memory_length, + num_predictions=num_predictions, + two_stream=True, + batch_size=batch_size, + include_state=state, + include_mask=mask, + include_segment=segment) + content_output, query_output, = test_layer(**data) + self.assertEqual(content_output.shape, [batch_size, seq_length, key_dim]) + self.assertEqual(query_output.shape, [batch_size, num_predictions, key_dim]) + + +if __name__ == "__main__": + np.random.seed(0) + tf.random.set_seed(0) + tf.test.main() diff --git a/official/nlp/modeling/layers/rezero_transformer.py b/official/nlp/modeling/layers/rezero_transformer.py index 2780a0ba5613070d8490555ee5ceb14c25cc90bc..0bcc781893ce9681f7c93cc958ae74bdf3d95aa3 100644 --- a/official/nlp/modeling/layers/rezero_transformer.py +++ b/official/nlp/modeling/layers/rezero_transformer.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,19 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based rezero-transformer block layer (Transformer with ReZero).""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import gin import tensorflow as tf -from official.nlp.modeling.layers import attention - @tf.keras.utils.register_keras_serializable(package="Text") @gin.configurable @@ -35,7 +29,7 @@ class ReZeroTransformer(tf.keras.layers.Layer): The residual connection implements the ReZero method. (https://arxiv.org/abs/2003.04887) - Arguments: + Args: num_attention_heads: Number of attention heads. intermediate_size: Size of the intermediate layer. intermediate_activation: Activation for the intermediate layer. @@ -88,7 +82,7 @@ class ReZeroTransformer(tf.keras.layers.Layer): def build(self, input_shape): input_tensor = input_shape[0] if len(input_shape) == 2 else input_shape input_tensor_shape = tf.TensorShape(input_tensor) - if len(input_tensor_shape) != 3: + if len(input_tensor_shape.as_list()) != 3: raise ValueError("TransformerLayer expects a three-dimensional input of " "shape [batch, sequence, width].") batch_size, sequence_length, hidden_size = input_tensor_shape @@ -116,9 +110,9 @@ class ReZeroTransformer(tf.keras.layers.Layer): activity_regularizer=self._activity_regularizer, kernel_constraint=self._kernel_constraint, bias_constraint=self._bias_constraint) - self._attention_layer = attention.MultiHeadAttention( + self._attention_layer = tf.keras.layers.MultiHeadAttention( num_heads=self._num_heads, - key_size=self._attention_head_size, + key_dim=self._attention_head_size, dropout=self._attention_dropout_rate, name="self_attention", **common_kwargs) @@ -138,7 +132,7 @@ class ReZeroTransformer(tf.keras.layers.Layer): bias_axes="d", name="intermediate", **common_kwargs) - policy = tf.keras.mixed_precision.experimental.global_policy() + policy = tf.keras.mixed_precision.global_policy() if policy.name == "mixed_bfloat16": # bfloat16 causes BERT with the LAMB optimizer to not converge # as well, so we use float32. @@ -161,7 +155,8 @@ class ReZeroTransformer(tf.keras.layers.Layer): self._rezero_a = self.add_weight( name="rezero_alpha", initializer=tf.keras.initializers.Zeros(), - trainable=True, dtype=tf.float32) + trainable=True, + dtype=tf.float32) super(ReZeroTransformer, self).build(input_shape) @@ -213,9 +208,9 @@ class ReZeroTransformer(tf.keras.layers.Layer): attention_mask = attention_mask[:, 0:self._output_range, :] else: target_tensor = input_tensor - attention_inputs = [target_tensor, input_tensor] - attention_output = self._attention_layer(attention_inputs, attention_mask) + attention_output = self._attention_layer( + query=target_tensor, value=input_tensor, attention_mask=attention_mask) attention_output = self._attention_dropout(attention_output) attention_output = target_tensor + self._rezero_a * attention_output if self._use_layer_norm: diff --git a/official/nlp/modeling/layers/rezero_transformer_test.py b/official/nlp/modeling/layers/rezero_transformer_test.py index 6ef0aa218c70c919f62492b00ef5f53348dd5938..c97142806979bfcb667b43e91ab5eefff028b246 100644 --- a/official/nlp/modeling/layers/rezero_transformer_test.py +++ b/official/nlp/modeling/layers/rezero_transformer_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based rezero-transformer block layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Keras-based rezero-transformer block layer.""" import numpy as np import tensorflow as tf @@ -32,10 +28,10 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase): def tearDown(self): super(TransformerWithReZeroLayerTest, self).tearDown() - tf.keras.mixed_precision.experimental.set_policy('float32') + tf.keras.mixed_precision.set_global_policy('float32') def test_layer_invocation_with_float16_dtype(self): - tf.keras.mixed_precision.experimental.set_policy('mixed_float16') + tf.keras.mixed_precision.set_global_policy('mixed_float16') test_layer = rezero_transformer.ReZeroTransformer( num_attention_heads=10, intermediate_size=2048, @@ -95,9 +91,9 @@ class TransformerWithReZeroLayerTest(keras_parameterized.TestCase): input_data = np.random.rand(2, input_length, width) + 2.0 output_data = model.predict(input_data) - input_data_normed = ( - input_data - np.mean(input_data, axis=-1, keepdims=True)) / ( - np.std(input_data, axis=-1, keepdims=True)) + input_data_normed = (input_data - + np.mean(input_data, axis=-1, keepdims=True)) / ( + np.std(input_data, axis=-1, keepdims=True)) self.assertAllClose(input_data_normed, output_data) diff --git a/official/nlp/modeling/layers/self_attention_mask.py b/official/nlp/modeling/layers/self_attention_mask.py index 933b4960dc0a86d4a0a767f8017853c2f2290d16..1dd14b975e6196e9d5b8bcd66fab30cbace948fe 100644 --- a/official/nlp/modeling/layers/self_attention_mask.py +++ b/official/nlp/modeling/layers/self_attention_mask.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,53 +11,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Keras layer that creates a self-attention mask.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Keras layer that creates a self-attention mask.""" import tensorflow as tf -from official.modeling import tf_utils + +from official.nlp.keras_nlp import layers @tf.keras.utils.register_keras_serializable(package='Text') -class SelfAttentionMask(tf.keras.layers.Layer): - """Create 3D attention mask from a 2D tensor mask. +class SelfAttentionMask(layers.SelfAttentionMask): + """Creates 3D attention mask from a 2D tensor mask. + **Warning: Please use the `keras_nlp.layers.SelfAttentionMask`.** inputs[0]: from_tensor: 2D or 3D Tensor of shape - [batch_size, from_seq_length, ...]. - inputs[1]: to_mask: int32 Tensor of shape [batch_size, to_seq_length]. + `(batch_size, from_seq_length, ...)`. + inputs[1]: to_mask: int32 Tensor of shape `(batch_size, to_seq_length)`. Returns: - float Tensor of shape [batch_size, from_seq_length, to_seq_length]. + Float Tensor of shape `(batch_size, from_seq_length, to_seq_length)`. """ def call(self, inputs): - from_tensor = inputs[0] - to_mask = inputs[1] - from_shape = tf_utils.get_shape_list(from_tensor, expected_rank=[2, 3]) - batch_size = from_shape[0] - from_seq_length = from_shape[1] - - to_shape = tf_utils.get_shape_list(to_mask, expected_rank=2) - to_seq_length = to_shape[1] - - to_mask = tf.cast( - tf.reshape(to_mask, [batch_size, 1, to_seq_length]), - dtype=from_tensor.dtype) - - # We don't assume that `from_tensor` is a mask (although it could be). We - # don't actually care if we attend *from* padding tokens (only *to* padding) - # tokens so we create a tensor of all ones. - # - # `broadcast_ones` = [batch_size, from_seq_length, 1] - broadcast_ones = tf.ones( - shape=[batch_size, from_seq_length, 1], dtype=from_tensor.dtype) - - # Here we broadcast along two dimensions to create the mask. - mask = broadcast_ones * to_mask - - return mask + if isinstance(inputs, list): + return super().call(inputs[0], inputs[1]) + else: + return super().call(inputs) diff --git a/official/nlp/modeling/layers/spectral_normalization.py b/official/nlp/modeling/layers/spectral_normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..dbc851b40c14c03b8ebfa2e693715081acf1b469 --- /dev/null +++ b/official/nlp/modeling/layers/spectral_normalization.py @@ -0,0 +1,295 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Normalization layers. + +## References: + +[1] Yuichi Yoshida, Takeru Miyato. Spectral Norm Regularization for Improving + the Generalizability of Deep Learning. + _arXiv preprint arXiv:1705.10941_, 2017. https://arxiv.org/abs/1705.10941 + +[2] Takeru Miyato, Toshiki Kataoka, Masanori Koyama, Yuichi Yoshida. + Spectral normalization for generative adversarial networks. + In _International Conference on Learning Representations_, 2018. + +[3] Henry Gouk, Eibe Frank, Bernhard Pfahringer, Michael Cree. + Regularisation of neural networks by enforcing lipschitz continuity. + _arXiv preprint arXiv:1804.04368_, 2018. https://arxiv.org/abs/1804.04368 +""" + +import numpy as np +import tensorflow as tf + + +class SpectralNormalization(tf.keras.layers.Wrapper): + """Implements spectral normalization for Dense layer.""" + + def __init__(self, + layer, + iteration=1, + norm_multiplier=0.95, + training=True, + aggregation=tf.VariableAggregation.MEAN, + inhere_layer_name=False, + **kwargs): + """Initializer. + + Args: + layer: (tf.keras.layers.Layer) A TF Keras layer to apply normalization to. + iteration: (int) The number of power iteration to perform to estimate + weight matrix's singular value. + norm_multiplier: (float) Multiplicative constant to threshold the + normalization. Usually under normalization, the singular value will + converge to this value. + training: (bool) Whether to perform power iteration to update the singular + value estimate. + aggregation: (tf.VariableAggregation) Indicates how a distributed variable + will be aggregated. Accepted values are constants defined in the class + tf.VariableAggregation. + inhere_layer_name: (bool) Whether to inhere the name of the input layer. + **kwargs: (dict) Other keyword arguments for the layers.Wrapper class. + """ + self.iteration = iteration + self.do_power_iteration = training + self.aggregation = aggregation + self.norm_multiplier = norm_multiplier + + # Set layer name. + wrapper_name = kwargs.pop('name', None) + if inhere_layer_name: + wrapper_name = layer.name + + if not isinstance(layer, tf.keras.layers.Layer): + raise ValueError('`layer` must be a `tf.keras.layer.Layer`. ' + 'Observed `{}`'.format(layer)) + super(SpectralNormalization, self).__init__( + layer, name=wrapper_name, **kwargs) + + def build(self, input_shape): + super(SpectralNormalization, self).build(input_shape) + self.layer.kernel._aggregation = self.aggregation # pylint: disable=protected-access + self._dtype = self.layer.kernel.dtype + + self.w = self.layer.kernel + self.w_shape = self.w.shape.as_list() + self.uv_initializer = tf.initializers.random_normal() + + self.v = self.add_weight( + shape=(1, np.prod(self.w_shape[:-1])), + initializer=self.uv_initializer, + trainable=False, + name='v', + dtype=self.dtype, + aggregation=self.aggregation) + + self.u = self.add_weight( + shape=(1, self.w_shape[-1]), + initializer=self.uv_initializer, + trainable=False, + name='u', + dtype=self.dtype, + aggregation=self.aggregation) + + self.update_weights() + + def call(self, inputs, *, training=None): + training = self.do_power_iteration if training is None else training + u_update_op, v_update_op, w_update_op = self.update_weights( + training=training) + output = self.layer(inputs) + w_restore_op = self.restore_weights() + + # Register update ops. + self.add_update(u_update_op) + self.add_update(v_update_op) + self.add_update(w_update_op) + self.add_update(w_restore_op) + + return output + + def update_weights(self, *, training=True): + w_reshaped = tf.reshape(self.w, [-1, self.w_shape[-1]]) + + u_hat = self.u + v_hat = self.v + + if training: + for _ in range(self.iteration): + v_hat = tf.nn.l2_normalize(tf.matmul(u_hat, tf.transpose(w_reshaped))) + u_hat = tf.nn.l2_normalize(tf.matmul(v_hat, w_reshaped)) + + sigma = tf.matmul(tf.matmul(v_hat, w_reshaped), tf.transpose(u_hat)) + # Convert sigma from a 1x1 matrix to a scalar. + sigma = tf.reshape(sigma, []) + u_update_op = self.u.assign(u_hat) + v_update_op = self.v.assign(v_hat) + + # Bound spectral norm to be not larger than self.norm_multiplier. + w_norm = tf.cond((self.norm_multiplier / sigma) < 1, lambda: # pylint:disable=g-long-lambda + (self.norm_multiplier / sigma) * self.w, lambda: self.w) + + w_update_op = self.layer.kernel.assign(w_norm) + return u_update_op, v_update_op, w_update_op + + def restore_weights(self): + """Restores layer weights to maintain gradient update (See Alg 1 of [1]).""" + return self.layer.kernel.assign(self.w) + + +class SpectralNormalizationConv2D(tf.keras.layers.Wrapper): + """Implements spectral normalization for Conv2D layer based on [3].""" + + def __init__(self, + layer, + iteration=1, + norm_multiplier=0.95, + training=True, + aggregation=tf.VariableAggregation.MEAN, + legacy_mode=False, + **kwargs): + """Initializer. + + Args: + layer: (tf.keras.layers.Layer) A TF Keras layer to apply normalization to. + iteration: (int) The number of power iteration to perform to estimate + weight matrix's singular value. + norm_multiplier: (float) Multiplicative constant to threshold the + normalization. Usually under normalization, the singular value will + converge to this value. + training: (bool) Whether to perform power iteration to update the singular + value estimate. + aggregation: (tf.VariableAggregation) Indicates how a distributed variable + will be aggregated. Accepted values are constants defined in the class + tf.VariableAggregation. + legacy_mode: (bool) Whether to use the legacy implementation where the + dimension of the u and v vectors are set to the batch size. It should + not be enabled unless for backward compatibility reasons. + **kwargs: (dict) Other keyword arguments for the layers.Wrapper class. + """ + self.iteration = iteration + self.do_power_iteration = training + self.aggregation = aggregation + self.norm_multiplier = norm_multiplier + self.legacy_mode = legacy_mode + + # Set layer attributes. + layer._name += '_spec_norm' + + if not isinstance(layer, tf.keras.layers.Conv2D): + raise ValueError( + 'layer must be a `tf.keras.layer.Conv2D` instance. You passed: {input}' + .format(input=layer)) + super(SpectralNormalizationConv2D, self).__init__(layer, **kwargs) + + def build(self, input_shape): + self.layer.build(input_shape) + self.layer.kernel._aggregation = self.aggregation # pylint: disable=protected-access + self._dtype = self.layer.kernel.dtype + + # Shape (kernel_size_1, kernel_size_2, in_channel, out_channel). + self.w = self.layer.kernel + self.w_shape = self.w.shape.as_list() + self.strides = self.layer.strides + + # Set the dimensions of u and v vectors. + batch_size = input_shape[0] + uv_dim = batch_size if self.legacy_mode else 1 + + # Resolve shapes. + in_height = input_shape[1] + in_width = input_shape[2] + in_channel = self.w_shape[2] + + out_height = in_height // self.strides[0] + out_width = in_width // self.strides[1] + out_channel = self.w_shape[3] + + self.in_shape = (uv_dim, in_height, in_width, in_channel) + self.out_shape = (uv_dim, out_height, out_width, out_channel) + self.uv_initializer = tf.initializers.random_normal() + + self.v = self.add_weight( + shape=self.in_shape, + initializer=self.uv_initializer, + trainable=False, + name='v', + dtype=self.dtype, + aggregation=self.aggregation) + + self.u = self.add_weight( + shape=self.out_shape, + initializer=self.uv_initializer, + trainable=False, + name='u', + dtype=self.dtype, + aggregation=self.aggregation) + + super(SpectralNormalizationConv2D, self).build() + + def call(self, inputs): + u_update_op, v_update_op, w_update_op = self.update_weights() + output = self.layer(inputs) + w_restore_op = self.restore_weights() + + # Register update ops. + self.add_update(u_update_op) + self.add_update(v_update_op) + self.add_update(w_update_op) + self.add_update(w_restore_op) + + return output + + def update_weights(self): + """Computes power iteration for convolutional filters based on [3].""" + # Initialize u, v vectors. + u_hat = self.u + v_hat = self.v + + if self.do_power_iteration: + for _ in range(self.iteration): + # Updates v. + v_ = tf.nn.conv2d_transpose( + u_hat, + self.w, + output_shape=self.in_shape, + strides=self.strides, + padding='SAME') + v_hat = tf.nn.l2_normalize(tf.reshape(v_, [1, -1])) + v_hat = tf.reshape(v_hat, v_.shape) + + # Updates u. + u_ = tf.nn.conv2d(v_hat, self.w, strides=self.strides, padding='SAME') + u_hat = tf.nn.l2_normalize(tf.reshape(u_, [1, -1])) + u_hat = tf.reshape(u_hat, u_.shape) + + v_w_hat = tf.nn.conv2d(v_hat, self.w, strides=self.strides, padding='SAME') + + sigma = tf.matmul(tf.reshape(v_w_hat, [1, -1]), tf.reshape(u_hat, [-1, 1])) + # Convert sigma from a 1x1 matrix to a scalar. + sigma = tf.reshape(sigma, []) + + u_update_op = self.u.assign(u_hat) + v_update_op = self.v.assign(v_hat) + + w_norm = tf.cond((self.norm_multiplier / sigma) < 1, lambda: # pylint:disable=g-long-lambda + (self.norm_multiplier / sigma) * self.w, lambda: self.w) + + w_update_op = self.layer.kernel.assign(w_norm) + + return u_update_op, v_update_op, w_update_op + + def restore_weights(self): + """Restores layer weights to maintain gradient update (See Alg 1 of [1]).""" + return self.layer.kernel.assign(self.w) diff --git a/official/nlp/modeling/layers/spectral_normalization_test.py b/official/nlp/modeling/layers/spectral_normalization_test.py new file mode 100644 index 0000000000000000000000000000000000000000..e2162ac6c2ab860eeabdba42889ccbd0d9fdb97a --- /dev/null +++ b/official/nlp/modeling/layers/spectral_normalization_test.py @@ -0,0 +1,86 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for normalization layers. + +## References: + +[1] Hanie Sedghi, Vineet Gupta, Philip M. Long. + The Singular Values of Convolutional Layers. + In _International Conference on Learning Representations_, 2019. +""" +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from official.nlp.modeling.layers import spectral_normalization + +DenseLayer = tf.keras.layers.Dense(10) +Conv2DLayer = tf.keras.layers.Conv2D(filters=64, kernel_size=3, padding='valid') + + +def _compute_spectral_norm(weight): + if weight.ndim > 2: + # Computes Conv2D via FFT transform as in [1]. + weight = np.fft.fft2(weight, weight.shape[1:3], axes=[0, 1]) + return np.max(np.linalg.svd(weight, compute_uv=False)) + + +class NormalizationTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(NormalizationTest, self).setUp() + self.num_iterations = 1000 + self.norm_multiplier = 0.95 + + @parameterized.named_parameters( + ('Dense', + (None, 10), DenseLayer, spectral_normalization.SpectralNormalization), + ('Conv2D', (None, 32, 32, 3), Conv2DLayer, + spectral_normalization.SpectralNormalizationConv2D)) + def test_spec_norm_magnitude(self, input_shape, layer, norm_wrapper): + """Tests if the weights spectral norm converges to norm_multiplier.""" + layer.build(input_shape) + sn_layer = norm_wrapper( + layer, + iteration=self.num_iterations, + norm_multiplier=self.norm_multiplier) + + # Perform normalization. + sn_layer.build(input_shape) + sn_layer.update_weights() + normalized_kernel = sn_layer.layer.kernel.numpy() + + spectral_norm_computed = _compute_spectral_norm(normalized_kernel) + spectral_norm_expected = self.norm_multiplier + self.assertAllClose( + spectral_norm_computed, spectral_norm_expected, atol=5e-2) + + # Test that the normalized layer is K-Lipschitz. In particular, if the layer + # is a function f, then ||f(x1) - f(x2)||_2 <= K * ||(x1 - x2)||_2, where K + # is the norm multiplier. + new_input_shape = (16,) + input_shape[1:] + new_input = tf.random.uniform(new_input_shape) + delta_vec = tf.random.uniform(new_input_shape) + output1 = sn_layer(new_input) + output2 = sn_layer(new_input + delta_vec) + + delta_input = tf.norm(tf.reshape(delta_vec, (-1,))).numpy() + delta_output = tf.norm(tf.reshape(output2 - output1, (-1,))).numpy() + self.assertLessEqual(delta_output, self.norm_multiplier * delta_input) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/layers/talking_heads_attention.py b/official/nlp/modeling/layers/talking_heads_attention.py index c65ba1e66165617aaf5652c2f77015e9a3eb7ccb..bddfacaa86d1dea6afd7ec67b4608d15cfc36a81 100644 --- a/official/nlp/modeling/layers/talking_heads_attention.py +++ b/official/nlp/modeling/layers/talking_heads_attention.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Talking Head Attention layer.""" # pylint: disable=g-classes-have-attributes import math @@ -20,14 +20,12 @@ import string import gin import tensorflow as tf -from official.nlp.modeling.layers import attention - _CHR_IDX = string.ascii_lowercase @tf.keras.utils.register_keras_serializable(package="Text") @gin.configurable -class TalkingHeadsAttention(attention.MultiHeadAttention): +class TalkingHeadsAttention(tf.keras.layers.MultiHeadAttention): """Implements Talking-Heads Attention. This is an implementation of Talking-Heads Attention based on the paper @@ -35,12 +33,12 @@ class TalkingHeadsAttention(attention.MultiHeadAttention): multi-head attention by including linearprojections across the attention-heads dimension, immediately before and after the softmax operation. - See the base class `MultiHeadAttention` for more details. + See the base class `tf.keras.layers.MultiHeadAttention` for more details. - Arguments: + Args: num_heads: Number of attention heads. - key_size: Size of each attention head for query and key. - value_size: Size of each attention head for value. + key_dim: Size of each attention head for query and key. + value_dim: Size of each attention head for value. dropout: Dropout probability. use_bias: Boolean, whether the dense layers use bias vectors/matrices. output_shape: The expected shape of an output tensor, besides the batch and @@ -65,7 +63,7 @@ class TalkingHeadsAttention(attention.MultiHeadAttention): that will be applied on attention scores before and after softmax. Args: - qkv_rank: the rank of query, key, value tensors after projection. + qkv_rank: The rank of query, key, value tensors after projection. """ super(TalkingHeadsAttention, self)._build_attention(qkv_rank) @@ -107,18 +105,21 @@ class TalkingHeadsAttention(attention.MultiHeadAttention): query_tensor, key_tensor, value_tensor, - attention_mask=None): + attention_mask=None, + training=None): """Applies Dot-product attention with query, key, value tensors. This function overrides base class to apply additional linear projection on attention scores before and after softmax. Args: - query_tensor: Projected query `Tensor` of shape `[B, T, N, key_size]`. - key_tensor: Projected key `Tensor` of shape `[B, T, N, key_size]`. - value_tensor: Projected value `Tensor` of shape `[B, T, N, value_size]`. + query_tensor: Projected query `Tensor` of shape `[B, T, N, key_dim]`. + key_tensor: Projected key `Tensor` of shape `[B, T, N, key_dim]`. + value_tensor: Projected value `Tensor` of shape `[B, T, N, value_dim]`. attention_mask: a boolean mask of shape `[B, T, S]`, that prevents attention to certain positions. + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). Returns: attention_output: Multi-headed outputs of attention computation. @@ -129,7 +130,7 @@ class TalkingHeadsAttention(attention.MultiHeadAttention): attention_scores = tf.einsum(self._dot_product_equation, key_tensor, query_tensor) attention_scores = tf.multiply(attention_scores, - 1.0 / math.sqrt(float(self._key_size))) + 1.0 / math.sqrt(float(self._key_dim))) # Apply linear projection before softmax attention_scores = tf.einsum(self._talking_heads_equation, attention_scores, @@ -145,7 +146,8 @@ class TalkingHeadsAttention(attention.MultiHeadAttention): # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. - attention_scores_dropout = self._dropout_layer(attention_scores) + attention_scores_dropout = self._dropout_layer( + attention_scores, training=training) # `context_layer` = [B, T, N, H] attention_output = tf.einsum(self._combine_equation, diff --git a/official/nlp/modeling/layers/talking_heads_attention_test.py b/official/nlp/modeling/layers/talking_heads_attention_test.py index ed24eda26c6f532b5e5011f5bfc8109eeca68a03..579384bb754952187682bb8dcfdb74fe9e0b6478 100644 --- a/official/nlp/modeling/layers/talking_heads_attention_test.py +++ b/official/nlp/modeling/layers/talking_heads_attention_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for the attention layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for the attention layer.""" from absl.testing import parameterized import numpy as np @@ -36,35 +32,36 @@ class TalkingHeadsAttentionTest(keras_parameterized.TestCase): ("key_value_same_proj", None, None, [40, 80]), ("key_value_different_proj", 32, 60, [40, 60]), ) - def test_non_masked_attention(self, value_size, output_shape, output_dims): + def test_non_masked_attention(self, value_dim, output_shape, output_dims): """Test that the attention layer can be created without a mask tensor.""" test_layer = talking_heads_attention.TalkingHeadsAttention( num_heads=12, - key_size=64, - value_size=value_size, + key_dim=64, + value_dim=value_dim, output_shape=output_shape) # Create a 3-dimensional input (the first dimension is implicit). query = tf.keras.Input(shape=(40, 80)) value = tf.keras.Input(shape=(20, 80)) - output = test_layer([query, value]) + output = test_layer(query=query, value=value) self.assertEqual(output.shape.as_list(), [None] + output_dims) def test_non_masked_self_attention(self): """Test with one input (self-attenntion) and no mask tensor.""" test_layer = talking_heads_attention.TalkingHeadsAttention( - num_heads=12, key_size=64) + num_heads=12, key_dim=64) # Create a 3-dimensional input (the first dimension is implicit). query = tf.keras.Input(shape=(40, 80)) - output = test_layer([query, query]) + output = test_layer(query=query, value=query) self.assertEqual(output.shape.as_list(), [None, 40, 80]) def test_attention_scores(self): """Test attention outputs with coefficients.""" test_layer = talking_heads_attention.TalkingHeadsAttention( - num_heads=12, key_size=64, return_attention_scores=True) + num_heads=12, key_dim=64) # Create a 3-dimensional input (the first dimension is implicit). query = tf.keras.Input(shape=(40, 80)) - output, coef = test_layer([query, query]) + output, coef = test_layer(query=query, value=query, + return_attention_scores=True) self.assertEqual(output.shape.as_list(), [None, 40, 80]) self.assertEqual(coef.shape.as_list(), [None, 12, 40, 40]) @@ -72,13 +69,13 @@ class TalkingHeadsAttentionTest(keras_parameterized.TestCase): def test_masked_attention(self, use_bias): """Test with a mask tensor.""" test_layer = talking_heads_attention.TalkingHeadsAttention( - num_heads=12, key_size=2, use_bias=use_bias) + num_heads=12, key_dim=2, use_bias=use_bias) # Create a 3-dimensional input (the first dimension is implicit). batch_size = 3 query = tf.keras.Input(shape=(4, 8)) value = tf.keras.Input(shape=(2, 8)) mask_tensor = tf.keras.Input(shape=(4, 2)) - output = test_layer([query, value], mask_tensor) + output = test_layer(query=query, value=value, attention_mask=mask_tensor) # Create a model containing the test layer. model = tf.keras.Model([query, value, mask_tensor], output) @@ -102,7 +99,8 @@ class TalkingHeadsAttentionTest(keras_parameterized.TestCase): # Tests the layer with three inputs: Q, K, V. key = tf.keras.Input(shape=(2, 8)) - output = test_layer([query, value, key], mask_tensor) + output = test_layer( + query=query, value=value, key=key, attention_mask=mask_tensor) model = tf.keras.Model([query, value, key, mask_tensor], output) masked_output_data = model.predict([from_data, to_data, to_data, mask_data]) @@ -123,11 +121,11 @@ class TalkingHeadsAttentionTest(keras_parameterized.TestCase): """Test with a specified initializer.""" test_layer = talking_heads_attention.TalkingHeadsAttention( num_heads=12, - key_size=64, + key_dim=64, kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) # Create a 3-dimensional input (the first dimension is implicit). query = tf.keras.Input(shape=(40, 80)) - output = test_layer([query, query]) + output = test_layer(query=query, value=query) self.assertEqual(output.shape.as_list(), [None, 40, 80]) @parameterized.named_parameters( @@ -137,7 +135,7 @@ class TalkingHeadsAttentionTest(keras_parameterized.TestCase): def test_high_dim_attention(self, q_dims, v_dims, mask_dims, attention_axes): """Test with a mask tensor.""" test_layer = talking_heads_attention.TalkingHeadsAttention( - num_heads=12, key_size=2, attention_axes=attention_axes) + num_heads=12, key_dim=2, attention_axes=attention_axes) batch_size, hidden_size = 3, 8 # Generate data for the input (non-mask) tensors. query_shape = [batch_size] + q_dims + [hidden_size] @@ -149,11 +147,12 @@ class TalkingHeadsAttentionTest(keras_parameterized.TestCase): # Invoke the data with a random set of mask data. This should mask at least # one element. mask_data = np.random.randint(2, size=mask_shape).astype("bool") - output = test_layer([query, value], mask_data) + output = test_layer(query=query, value=value, attention_mask=mask_data) # Invoke the same data, but with a null mask (where no elements are masked). null_mask_data = np.ones(mask_shape) - unmasked_output = test_layer([query, value], null_mask_data) + unmasked_output = test_layer( + query=query, value=value, attention_mask=null_mask_data) # Because one data is masked and one is not, the outputs should not be the # same. self.assertNotAllClose(output, unmasked_output) diff --git a/official/nlp/modeling/layers/text_layers.py b/official/nlp/modeling/layers/text_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..df7049e6fa9df8cb53d3b1208aab92c46bcf5a74 --- /dev/null +++ b/official/nlp/modeling/layers/text_layers.py @@ -0,0 +1,704 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras Layers for BERT-specific preprocessing.""" +from typing import Any, Dict, List, Optional, Union + +from absl import logging +import tensorflow as tf + +try: + import tensorflow_text as text # pylint: disable=g-import-not-at-top +except ImportError: + text = None +except tf.errors.NotFoundError as e: + logging.warn("Encountered error when importing tensorflow_text: %s", e) + text = None + + +def _check_if_tf_text_installed(): + if text is None: + raise ImportError("import tensorflow_text failed, please install " + "'tensorflow-text-nightly'.") + + +def _iterative_vectorized_fair_share(capacity: tf.Tensor, + limit: Union[int, tf.Tensor]): + """Iterative algorithm for max min fairness algorithm. + + Reference: https://en.wikipedia.org/wiki/Max-min_fairness + + The idea is for each example with some number of segments and a limit of + total segment length allowed, we grant each segment a fair share of the + limit. For example, if every segment has the same length, no work to do. + If one segment has below average length, its share will be spilt to others + fairly. In this way, the longest segment will be the shortest among all + potential capacity assignments. + + Args: + capacity: A rank-2 Tensor of #Segments x Batch. + limit: The largest permissible number of tokens in total across one example. + + Returns: + A rank-2 Tensor with new segment capacity assignment such that + the total number of tokens in each example does not exceed the `limit`. + """ + # Firstly, we calculate the lower bound of the capacity assignment. + per_seg_limit = limit // capacity.shape[0] + limit_mask = tf.ones(capacity.shape, dtype=tf.int64) * per_seg_limit + lower_bound = tf.minimum(capacity, limit_mask) + + # This step makes up the capacity that already statisfy the capacity limit. + remaining_cap_sum = limit - tf.math.reduce_sum(lower_bound, axis=0) + remaining_cap_mat = capacity - lower_bound + new_cap = lower_bound + remaining_cap_mat * tf.cast( + tf.math.reduce_sum(remaining_cap_mat, axis=0) <= remaining_cap_sum, + tf.int64) + + # Process iteratively. This step is O(#segments), see analysis below. + while True: + remaining_limit = limit - tf.math.reduce_sum(new_cap, axis=0) + remaining_cap = capacity - new_cap + masked_remaining_slots = tf.cast(remaining_cap > 0, tf.int64) + remaining_cap_col_slots = tf.reduce_sum(masked_remaining_slots, axis=0) + masked_remaining_limit = tf.cast(remaining_cap_col_slots > 0, + tf.int64) * remaining_limit + # Total remaining segment limit is different for each example. + per_seg_limit = masked_remaining_limit // ( + tf.cast(remaining_cap_col_slots <= 0, tf.int64) + + remaining_cap_col_slots) # +1 to make sure 0/0 = 0 + + # Note that for each step, there is at least one more segment being + # fulfilled or the loop is finished. + # The idea is, if remaining per example limit > smallest among segments, + # the smallest segment ask is fullfilled. Otherwise, all remaining segments + # are truncated, the assignment is finished. + if tf.math.reduce_sum(per_seg_limit) > 0: + remaining_slots_mat = tf.cast(remaining_cap > 0, tf.int64) + new_cap = new_cap + remaining_slots_mat * per_seg_limit + else: + # Leftover assignment of limit that is smaller than #slots. + new_remained_assignment_mask = tf.cast( + (tf.cumsum(masked_remaining_slots, axis=0) <= masked_remaining_limit) + & (masked_remaining_slots > 0), tf.int64) + new_cap = new_cap + new_remained_assignment_mask + break + return new_cap + + +def round_robin_truncate_inputs( + inputs: Union[tf.RaggedTensor, List[tf.RaggedTensor]], + limit: Union[int, tf.Tensor], +) -> Union[tf.RaggedTensor, List[tf.RaggedTensor]]: + """Truncates a list of batched segments to fit a per-example length limit. + + Available space is assigned one token at a time in a round-robin fashion + to the inputs that still need some, until the limit is reached. + (Or equivalently: the longest input is truncated by one token until the total + length of inputs fits the limit.) Examples that fit the limit as passed in + remain unchanged. + + Args: + inputs: A list of rank-2 RaggedTensors. The i-th example is given by + the i-th row in each list element, that is, `inputs[:][i, :]`. + limit: The largest permissible number of tokens in total across one example. + + Returns: + A list of rank-2 RaggedTensors at corresponding indices with the inputs, + in which the rows of each RaggedTensor have been truncated such that + the total number of tokens in each example does not exceed the `limit`. + """ + if not isinstance(inputs, (list, tuple)): + return round_robin_truncate_inputs([inputs], limit)[0] + limit = tf.cast(limit, tf.int64) + if not all(rt.shape.rank == 2 for rt in inputs): + raise ValueError("All inputs must have shape [batch_size, (items)]") + if len(inputs) == 1: + return [_truncate_row_lengths(inputs[0], limit)] + elif len(inputs) == 2: + size_a, size_b = [rt.row_lengths() for rt in inputs] + # Here's a brain-twister: This does round-robin assignment of quota + # to both inputs until the limit is reached. Hint: consider separately + # the cases of zero, one, or two inputs exceeding half the limit. + floor_half = limit // 2 + ceil_half = limit - floor_half + quota_a = tf.minimum(size_a, ceil_half + tf.nn.relu(floor_half - size_b)) + quota_b = tf.minimum(size_b, floor_half + tf.nn.relu(ceil_half - size_a)) + return [_truncate_row_lengths(inputs[0], quota_a), + _truncate_row_lengths(inputs[1], quota_b)] + else: + # Note that we don't merge with the 2 input case because the full algorithm + # is more expensive. + capacity = tf.stack([rt.row_lengths() for rt in inputs]) # #Segments x B + new_capacity = _iterative_vectorized_fair_share(capacity, limit) + return [ + _truncate_row_lengths(inputs[i], new_capacity[i]) + for i in range(capacity.shape[0]) + ] + + +def _truncate_row_lengths(ragged_tensor: tf.RaggedTensor, + new_lengths: tf.Tensor) -> tf.RaggedTensor: + """Truncates the rows of `ragged_tensor` to the given row lengths.""" + new_lengths = tf.broadcast_to(new_lengths, + ragged_tensor.bounding_shape()[0:1]) + def fn(x): + row, new_length = x + return row[0:new_length] + fn_dtype = tf.RaggedTensorSpec(dtype=ragged_tensor.dtype, + ragged_rank=ragged_tensor.ragged_rank - 1) + result = tf.map_fn(fn, (ragged_tensor, new_lengths), dtype=fn_dtype) + # Work around broken shape propagation: without this, result has unknown rank. + flat_values_shape = [None] * ragged_tensor.flat_values.shape.rank + result = result.with_flat_values( + tf.ensure_shape(result.flat_values, flat_values_shape)) + + return result + + +class BertTokenizer(tf.keras.layers.Layer): + """Wraps BertTokenizer with pre-defined vocab as a Keras Layer. + + Attributes: + tokenize_with_offsets: If true, calls + `text.BertTokenizer.tokenize_with_offsets()` instead of plain + `text.BertTokenizer.tokenize()` and outputs a triple of + `(tokens, start_offsets, limit_offsets)`. + raw_table_access: An object with methods `.lookup(keys) and `.size()` + that operate on the raw lookup table of tokens. It can be used to + look up special token synbols like `[MASK]`. + """ + + def __init__(self, *, + vocab_file: str, + lower_case: bool, + tokenize_with_offsets: bool = False, + **kwargs): + """Initialize a `BertTokenizer` layer. + + Args: + vocab_file: A Python string with the path of the vocabulary file. + This is a text file with newline-separated wordpiece tokens. + This layer initializes a lookup table from it that gets used with + `text.BertTokenizer`. + lower_case: A Python boolean forwarded to `text.BertTokenizer`. + If true, input text is converted to lower case (where applicable) + before tokenization. This must be set to match the way in which + the `vocab_file` was created. + tokenize_with_offsets: A Python boolean. If true, this layer calls + `text.BertTokenizer.tokenize_with_offsets()` instead of plain + `text.BertTokenizer.tokenize()` and outputs a triple of + `(tokens, start_offsets, limit_offsets)` + insead of just tokens. + **kwargs: Standard arguments to `Layer()`. + + Raises: + ImportError: If importing `tensorflow_text` failed. + """ + _check_if_tf_text_installed() + + self.tokenize_with_offsets = tokenize_with_offsets + # TODO(b/177326279): Stop storing the vocab table initializer as an + # attribute when https://github.com/tensorflow/tensorflow/issues/46456 + # has been fixed in the TensorFlow versions of the TF Hub users that load + # a SavedModel created from this layer. Due to that issue, loading such a + # SavedModel forgets to add .vocab_table._initializer as a trackable + # dependency of .vocab_table, so that saving it again to a second SavedModel + # (e.g., the final model built using TF Hub) does not properly track + # the ._vocab_table._initializer._filename as an Asset. + self._vocab_table, self._vocab_initializer_donotuse = ( + self._create_vocab_table_and_initializer(vocab_file)) + self._special_tokens_dict = self._create_special_tokens_dict( + self._vocab_table, vocab_file) + super().__init__(**kwargs) + self._bert_tokenizer = text.BertTokenizer( + self._vocab_table, lower_case=lower_case) + + @property + def vocab_size(self): + return self._vocab_table.size() + + def _create_vocab_table_and_initializer(self, vocab_file): + vocab_initializer = tf.lookup.TextFileInitializer( + vocab_file, + key_dtype=tf.string, key_index=tf.lookup.TextFileIndex.WHOLE_LINE, + value_dtype=tf.int64, value_index=tf.lookup.TextFileIndex.LINE_NUMBER) + vocab_table = tf.lookup.StaticHashTable(vocab_initializer, default_value=-1) + return vocab_table, vocab_initializer + + def call(self, inputs: tf.Tensor): + """Calls `text.BertTokenizer` on inputs. + + Args: + inputs: A string Tensor of shape `(batch_size,)`. + + Returns: + One or three of `RaggedTensors` if `tokenize_with_offsets` is False or + True, respectively. These are + tokens: A `RaggedTensor` of shape + `[batch_size, (words), (pieces_per_word)]` + and type int32. `tokens[i,j,k]` contains the k-th wordpiece of the + j-th word in the i-th input. + start_offsets, limit_offsets: If `tokenize_with_offsets` is True, + RaggedTensors of type int64 with the same indices as tokens. + Element `[i,j,k]` contains the byte offset at the start, or past the + end, resp., for the k-th wordpiece of the j-th word in the i-th input. + """ + # Prepare to reshape the result to work around broken shape inference. + batch_size = tf.shape(inputs)[0] + def _reshape(rt): + values = rt.values + row_splits = rt.row_splits + row_splits = tf.reshape(row_splits, [batch_size + 1]) + return tf.RaggedTensor.from_row_splits(values, row_splits) + + # Call the tokenizer. + if self.tokenize_with_offsets: + tokens, start_offsets, limit_offsets = ( + self._bert_tokenizer.tokenize_with_offsets(inputs)) + tokens = tf.cast(tokens, dtype=tf.int32) + return _reshape(tokens), _reshape(start_offsets), _reshape(limit_offsets) + else: + tokens = self._bert_tokenizer.tokenize(inputs) + tokens = tf.cast(tokens, dtype=tf.int32) + return _reshape(tokens) + + def get_config(self): + # Skip in tf.saved_model.save(); fail if called direcly. + raise NotImplementedError("TODO(b/170480226): implement") + + def get_special_tokens_dict(self): + """Returns dict of token ids, keyed by standard names for their purpose. + + Returns: + A dict from Python strings to Python integers. Each key is a standard + name for a special token describing its use. (For example, "padding_id" + is what BERT traditionally calls "[PAD]" but others may call "".) + The corresponding value is the integer token id. If a special token + is not found, its entry is omitted from the dict. + + The supported keys and tokens are: + * start_of_sequence_id: looked up from "[CLS]" + * end_of_segment_id: looked up from "[SEP]" + * padding_id: looked up form "[PAD]" + * mask_id: looked up from "[MASK]" + * vocab_size: one past the largest token id used + """ + return self._special_tokens_dict + + def _create_special_tokens_dict(self, vocab_table, vocab_file): + special_tokens = dict(start_of_sequence_id="[CLS]", + end_of_segment_id="[SEP]", + padding_id="[PAD]", + mask_id="[MASK]") + with tf.init_scope(): + if tf.executing_eagerly(): + special_token_ids = vocab_table.lookup( + tf.constant(list(special_tokens.values()), tf.string)) + vocab_size = vocab_table.size() + else: + # A blast from the past: non-eager init context while building Model. + # This can happen with Estimator or tf.compat.v1.disable_v2_behavior(). + logging.warning( + "Non-eager init context; computing " + "BertTokenizer's special_tokens_dict in tf.compat.v1.Session") + with tf.Graph().as_default(): + local_vocab_table, _ = self._create_vocab_table_and_initializer( + vocab_file) + special_token_ids_tensor = local_vocab_table.lookup( + tf.constant(list(special_tokens.values()), tf.string)) + vocab_size_tensor = local_vocab_table.size() + init_ops = [tf.compat.v1.initialize_all_tables()] + with tf.compat.v1.Session() as sess: + sess.run(init_ops) + special_token_ids, vocab_size = sess.run( + [special_token_ids_tensor, vocab_size_tensor]) + result = dict( + vocab_size=int(vocab_size) # Numpy to Python. + ) + for k, v in zip(special_tokens, special_token_ids): + v = int(v) + if v >= 0: + result[k] = v + else: + logging.warning("Could not find %s as token \"%s\" in vocab file %s", + k, special_tokens[k], vocab_file) + return result + + +class SentencepieceTokenizer(tf.keras.layers.Layer): + """Wraps `tf_text.SentencepieceTokenizer` as a Keras Layer. + + Attributes: + tokenize_with_offsets: If true, calls + `SentencepieceTokenizer.tokenize_with_offsets()` + instead of plain `.tokenize()` and outputs a triple of + `(tokens, start_offsets, limit_offsets)`. + """ + + def __init__(self, + *, + lower_case: bool, + model_file_path: Optional[str] = None, + model_serialized_proto: Optional[str] = None, + tokenize_with_offsets: bool = False, + nbest_size: int = 0, + alpha: float = 1.0, + strip_diacritics: bool = False, + **kwargs): + """Initializes a SentencepieceTokenizer layer. + + Args: + lower_case: A Python boolean indicating whether to lowercase the string + before tokenization. NOTE: New models are encouraged to build `*_cf` + (case folding) normalization into the Sentencepiece model itself and + avoid this extra step. + model_file_path: A Python string with the path of the sentencepiece model. + Exactly one of `model_file_path` and `model_serialized_proto` can be + specified. In either case, the Keras model config for this layer will + store the actual proto (not a filename passed here). + model_serialized_proto: The sentencepiece model serialized proto string. + tokenize_with_offsets: A Python boolean. If true, this layer calls + `SentencepieceTokenizer.tokenize_with_offsets()` instead of + plain `.tokenize()` and outputs a triple of + `(tokens, start_offsets, limit_offsets)` insead of just tokens. + Note that when following `strip_diacritics` is set to True, returning + offsets is not supported now. + nbest_size: A scalar for sampling: + nbest_size = {0,1}: No sampling is performed. (default) + nbest_size > 1: samples from the nbest_size results. + nbest_size < 0: assuming that nbest_size is infinite and samples + from the all hypothesis (lattice) using + forward-filtering-and-backward-sampling algorithm. + alpha: A scalar for a smoothing parameter. Inverse temperature for + probability rescaling. + strip_diacritics: Whether to strip diacritics or not. Note that stripping + diacritics requires additional text normalization and dropping bytes, + which makes it impossible to keep track of the offsets now. Hence + when `strip_diacritics` is set to True, we don't yet support + `tokenize_with_offsets`. NOTE: New models are encouraged to put this + into custom normalization rules for the Sentencepiece model itself to + avoid this extra step and the limitation regarding offsets. + **kwargs: standard arguments to `Layer()`. + + Raises: + ImportError: if importing tensorflow_text failed. + """ + _check_if_tf_text_installed() + super().__init__(**kwargs) + if bool(model_file_path) == bool(model_serialized_proto): + raise ValueError("Exact one of `model_file_path` and " + "`model_serialized_proto` can be specified.") + # TODO(b/181866850): Support tokenize_with_offsets for strip_diacritics=True + if tokenize_with_offsets and strip_diacritics: + raise ValueError("`tokenize_with_offsets` is not supported when " + "`strip_diacritics` is set to True.") + if model_file_path: + self._model_serialized_proto = tf.io.gfile.GFile(model_file_path, + "rb").read() + else: + self._model_serialized_proto = model_serialized_proto + + self._lower_case = lower_case + self.tokenize_with_offsets = tokenize_with_offsets + self._nbest_size = nbest_size + self._alpha = alpha + self._strip_diacritics = strip_diacritics + self._tokenizer = self._create_tokenizer() + self._special_tokens_dict = self._create_special_tokens_dict() + + def _create_tokenizer(self): + return text.SentencepieceTokenizer( + model=self._model_serialized_proto, + out_type=tf.int32, + nbest_size=self._nbest_size, + alpha=self._alpha) + + @property + def vocab_size(self): + return self._tokenizer.vocab_size() + + def call(self, inputs: tf.Tensor): + """Calls `text.SentencepieceTokenizer` on inputs. + + Args: + inputs: A string Tensor of shape `(batch_size,)`. + + Returns: + One or three of RaggedTensors if tokenize_with_offsets is False or True, + respectively. These are + tokens: A RaggedTensor of shape `[batch_size, (pieces)]` and type `int32`. + `tokens[i,j]` contains the j-th piece in the i-th input. + start_offsets, limit_offsets: If `tokenize_with_offsets` is True, + RaggedTensors of type `int64` with the same indices as tokens. + Element `[i,j]` contains the byte offset at the start, or past the + end, resp., for the j-th piece in the i-th input. + """ + if self._strip_diacritics: + if self.tokenize_with_offsets: + raise ValueError("`tokenize_with_offsets` is not supported yet when " + "`strip_diacritics` is set to True (b/181866850).") + inputs = text.normalize_utf8(inputs, "NFD") + inputs = tf.strings.regex_replace(inputs, r"\p{Mn}", "") + + if self._lower_case: + inputs = text.case_fold_utf8(inputs) + + # Prepare to reshape the result to work around broken shape inference. + batch_size = tf.shape(inputs)[0] + def _reshape(rt): + values = rt.values + row_splits = rt.row_splits + row_splits = tf.reshape(row_splits, [batch_size + 1]) + return tf.RaggedTensor.from_row_splits(values, row_splits) + + # Call the tokenizer. + if self.tokenize_with_offsets: + tokens, start_offsets, limit_offsets = ( + self._tokenizer.tokenize_with_offsets(inputs)) + return _reshape(tokens), _reshape(start_offsets), _reshape(limit_offsets) + else: + tokens = self._tokenizer.tokenize(inputs) + return _reshape(tokens) + + def get_config(self): + # Skip in tf.saved_model.save(); fail if called direcly. + raise NotImplementedError("TODO(b/170480226): implement") + + def get_special_tokens_dict(self): + """Returns dict of token ids, keyed by standard names for their purpose. + + Returns: + A dict from Python strings to Python integers. Each key is a standard + name for a special token describing its use. (For example, "padding_id" + is what Sentencepiece calls "" but others may call "[PAD]".) + The corresponding value is the integer token id. If a special token + is not found, its entry is omitted from the dict. + + The supported keys and tokens are: + * start_of_sequence_id: looked up from "[CLS]" + * end_of_segment_id: looked up from "[SEP]" + * padding_id: looked up from "" + * mask_id: looked up from "[MASK]" + * vocab_size: one past the largest token id used + """ + return self._special_tokens_dict + + def _create_special_tokens_dict(self): + special_tokens = dict( + start_of_sequence_id=b"[CLS]", + end_of_segment_id=b"[SEP]", + padding_id=b"", + mask_id=b"[MASK]") + with tf.init_scope(): + if tf.executing_eagerly(): + special_token_ids = self._tokenizer.string_to_id( + tf.constant(list(special_tokens.values()), tf.string)) + inverse_tokens = self._tokenizer.id_to_string(special_token_ids) + vocab_size = self._tokenizer.vocab_size() + else: + # A blast from the past: non-eager init context while building Model. + # This can happen with Estimator or tf.compat.v1.disable_v2_behavior(). + logging.warning( + "Non-eager init context; computing SentencepieceTokenizer's " + "special_tokens_dict in tf.compat.v1.Session") + with tf.Graph().as_default(): + local_tokenizer = self._create_tokenizer() + special_token_ids_tensor = local_tokenizer.string_to_id( + tf.constant(list(special_tokens.values()), tf.string)) + inverse_tokens_tensor = local_tokenizer.id_to_string( + special_token_ids_tensor) + vocab_size_tensor = local_tokenizer.vocab_size() + with tf.compat.v1.Session() as sess: + special_token_ids, inverse_tokens, vocab_size = sess.run( + [special_token_ids_tensor, inverse_tokens_tensor, + vocab_size_tensor]) + result = dict( + vocab_size=int(vocab_size) # Numpy to Python. + ) + for name, token_id, inverse_token in zip(special_tokens, + special_token_ids, + inverse_tokens): + if special_tokens[name] == inverse_token: + result[name] = int(token_id) + else: + logging.warning( + "Could not find %s as token \"%s\" in sentencepiece model, " + "got \"%s\"", name, special_tokens[name], inverse_token) + return result + + +class BertPackInputs(tf.keras.layers.Layer): + """Packs tokens into model inputs for BERT.""" + + def __init__(self, + seq_length, + *, + start_of_sequence_id=None, + end_of_segment_id=None, + padding_id=None, + special_tokens_dict=None, + truncator="round_robin", + **kwargs): + """Initializes with a target `seq_length`, relevant token ids and truncator. + + Args: + seq_length: The desired output length. Must not exceed the max_seq_length + that was fixed at training time for the BERT model receiving the inputs. + start_of_sequence_id: The numeric id of the token that is to be placed + at the start of each sequence (called "[CLS]" for BERT). + end_of_segment_id: The numeric id of the token that is to be placed + at the end of each input segment (called "[SEP]" for BERT). + padding_id: The numeric id of the token that is to be placed into the + unused positions after the last segment in the sequence + (called "[PAD]" for BERT). + special_tokens_dict: Optionally, a dict from Python strings to Python + integers that contains values for `start_of_sequence_id`, + `end_of_segment_id` and `padding_id`. (Further values in the dict are + silenty ignored.) If this is passed, separate *_id arguments must be + omitted. + truncator: The algorithm to truncate a list of batched segments to fit a + per-example length limit. The value can be either `round_robin` or + `waterfall`: + (1) For "round_robin" algorithm, available space is assigned + one token at a time in a round-robin fashion to the inputs that still + need some, until the limit is reached. It currently only supports + one or two segments. + (2) For "waterfall" algorithm, the allocation of the budget is done + using a "waterfall" algorithm that allocates quota in a + left-to-right manner and fills up the buckets until we run out of + budget. It support arbitrary number of segments. + + **kwargs: standard arguments to `Layer()`. + + Raises: + ImportError: if importing `tensorflow_text` failed. + """ + _check_if_tf_text_installed() + super().__init__(**kwargs) + self.seq_length = seq_length + if truncator not in ("round_robin", "waterfall"): + raise ValueError("Only 'round_robin' and 'waterfall' algorithms are " + "supported, but got %s" % truncator) + self.truncator = truncator + self._init_token_ids( + start_of_sequence_id=start_of_sequence_id, + end_of_segment_id=end_of_segment_id, + padding_id=padding_id, + special_tokens_dict=special_tokens_dict) + + def _init_token_ids( + self, *, + start_of_sequence_id, + end_of_segment_id, + padding_id, + special_tokens_dict): + usage = ("Must pass either all of start_of_sequence_id, end_of_segment_id, " + "padding_id as arguments, or else a special_tokens_dict " + "with those keys.") + special_tokens_args = [start_of_sequence_id, end_of_segment_id, padding_id] + if special_tokens_dict is None: + if any(x is None for x in special_tokens_args): + return ValueError(usage) + self.start_of_sequence_id = int(start_of_sequence_id) + self.end_of_segment_id = int(end_of_segment_id) + self.padding_id = int(padding_id) + else: + if any(x is not None for x in special_tokens_args): + return ValueError(usage) + self.start_of_sequence_id = int( + special_tokens_dict["start_of_sequence_id"]) + self.end_of_segment_id = int(special_tokens_dict["end_of_segment_id"]) + self.padding_id = int(special_tokens_dict["padding_id"]) + + def get_config(self) -> Dict[str, Any]: + config = super().get_config() + config["seq_length"] = self.seq_length + config["start_of_sequence_id"] = self.start_of_sequence_id + config["end_of_segment_id"] = self.end_of_segment_id + config["padding_id"] = self.padding_id + config["truncator"] = self.truncator + return config + + def call(self, inputs: Union[tf.RaggedTensor, List[tf.RaggedTensor]]): + """Adds special tokens to pack a list of segments into BERT input Tensors. + + Args: + inputs: A Python list of one or two RaggedTensors, each with the batched + values one input segment. The j-th segment of the i-th input example + consists of slice `inputs[j][i, ...]`. + + Returns: + A nest of Tensors for use as input to the BERT TransformerEncoder. + """ + # BertPackInputsSavedModelWrapper relies on only calling bert_pack_inputs() + return BertPackInputs.bert_pack_inputs( + inputs, self.seq_length, + start_of_sequence_id=self.start_of_sequence_id, + end_of_segment_id=self.end_of_segment_id, + padding_id=self.padding_id, + truncator=self.truncator) + + @staticmethod + def bert_pack_inputs(inputs: Union[tf.RaggedTensor, List[tf.RaggedTensor]], + seq_length: Union[int, tf.Tensor], + start_of_sequence_id: Union[int, tf.Tensor], + end_of_segment_id: Union[int, tf.Tensor], + padding_id: Union[int, tf.Tensor], + truncator="round_robin"): + """Freestanding equivalent of the BertPackInputs layer.""" + _check_if_tf_text_installed() + # Sanitize inputs. + if not isinstance(inputs, (list, tuple)): + inputs = [inputs] + if not inputs: + raise ValueError("At least one input is required for packing") + input_ranks = [rt.shape.rank for rt in inputs] + if None in input_ranks or len(set(input_ranks)) > 1: + raise ValueError("All inputs for packing must have the same known rank, " + "found ranks " + ",".join(input_ranks)) + # Flatten inputs to [batch_size, (tokens)]. + if input_ranks[0] > 2: + inputs = [rt.merge_dims(1, -1) for rt in inputs] + # In case inputs weren't truncated (as they should have been), + # fall back to some ad-hoc truncation. + num_special_tokens = len(inputs) + 1 + if truncator == "round_robin": + trimmed_segments = round_robin_truncate_inputs( + inputs, seq_length - num_special_tokens) + elif truncator == "waterfall": + trimmed_segments = text.WaterfallTrimmer( + seq_length - num_special_tokens).trim(inputs) + else: + raise ValueError("Unsupported truncator: %s" % truncator) + # Combine segments. + segments_combined, segment_ids = text.combine_segments( + trimmed_segments, + start_of_sequence_id=start_of_sequence_id, + end_of_segment_id=end_of_segment_id) + # Pad to dense Tensors. + input_word_ids, _ = text.pad_model_inputs(segments_combined, seq_length, + pad_value=padding_id) + input_type_ids, input_mask = text.pad_model_inputs(segment_ids, seq_length, + pad_value=0) + # Work around broken shape inference. + output_shape = tf.stack([ + inputs[0].nrows(out_type=tf.int32), # batch_size + tf.cast(seq_length, dtype=tf.int32)]) + def _reshape(t): + return tf.reshape(t, output_shape) + # Assemble nest of input tensors as expected by BERT TransformerEncoder. + return dict(input_word_ids=_reshape(input_word_ids), + input_mask=_reshape(input_mask), + input_type_ids=_reshape(input_type_ids)) diff --git a/official/nlp/modeling/layers/text_layers_test.py b/official/nlp/modeling/layers/text_layers_test.py new file mode 100644 index 0000000000000000000000000000000000000000..10546b472372a9bcd4486a5d603ea8295d80b507 --- /dev/null +++ b/official/nlp/modeling/layers/text_layers_test.py @@ -0,0 +1,542 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests bert.text_layers.""" + +import os +import tempfile + +import numpy as np +import tensorflow as tf + +from sentencepiece import SentencePieceTrainer +from official.nlp.modeling.layers import text_layers + + +class RoundRobinTruncatorTest(tf.test.TestCase): + + def _test_input(self, start, lengths): + return tf.ragged.constant([[start + 10 * j + i + for i in range(length)] + for j, length in enumerate(lengths)], + dtype=tf.int32) + + def test_single_segment(self): + # Single segment. + single_input = self._test_input(11, [4, 5, 6]) + expected_single_output = tf.ragged.constant( + [[11, 12, 13, 14], + [21, 22, 23, 24, 25], + [31, 32, 33, 34, 35], # Truncated. + ]) + + self.assertAllEqual( + expected_single_output, + text_layers.round_robin_truncate_inputs(single_input, limit=5)) + # Test wrapping in a singleton list. + actual_single_list_output = text_layers.round_robin_truncate_inputs( + [single_input], limit=5) + self.assertIsInstance(actual_single_list_output, list) + self.assertAllEqual(expected_single_output, actual_single_list_output[0]) + + def test_two_segments(self): + input_a = self._test_input(111, [1, 2, 2, 3, 4, 5]) + input_b = self._test_input(211, [1, 3, 4, 2, 2, 5]) + expected_a = tf.ragged.constant( + [[111], + [121, 122], + [131, 132], + [141, 142, 143], + [151, 152, 153], # Truncated. + [161, 162, 163], # Truncated. + ]) + expected_b = tf.ragged.constant( + [[211], + [221, 222, 223], + [231, 232, 233], # Truncated. + [241, 242], + [251, 252], + [261, 262], # Truncated. + ]) + actual_a, actual_b = text_layers.round_robin_truncate_inputs( + [input_a, input_b], limit=5) + self.assertAllEqual(expected_a, actual_a) + self.assertAllEqual(expected_b, actual_b) + + def test_three_segments(self): + input_a = self._test_input(111, [1, 2, 2, 3, 4, 5, 1]) + input_b = self._test_input(211, [1, 3, 4, 2, 2, 5, 8]) + input_c = self._test_input(311, [1, 3, 4, 2, 2, 5, 10]) + seg_limit = 8 + expected_a = tf.ragged.constant([ + [111], + [121, 122], + [131, 132], + [141, 142, 143], + [151, 152, 153, 154], + [161, 162, 163], # Truncated + [171] + ]) + expected_b = tf.ragged.constant([ + [211], + [221, 222, 223], + [231, 232, 233], # Truncated + [241, 242], + [251, 252], + [261, 262, 263], # Truncated + [271, 272, 273, 274] # Truncated + ]) + expected_c = tf.ragged.constant([ + [311], + [321, 322, 323], + [331, 332, 333], # Truncated + [341, 342], + [351, 352], + [361, 362], # Truncated + [371, 372, 373] # Truncated + ]) + actual_a, actual_b, actual_c = text_layers.round_robin_truncate_inputs( + [input_a, input_b, input_c], limit=seg_limit) + self.assertAllEqual(expected_a, actual_a) + self.assertAllEqual(expected_b, actual_b) + self.assertAllEqual(expected_c, actual_c) + input_cap = tf.math.reduce_sum( + tf.stack([rt.row_lengths() for rt in [input_a, input_b, input_c]]), + axis=0) + per_example_usage = tf.math.reduce_sum( + tf.stack([rt.row_lengths() for rt in [actual_a, actual_b, actual_c]]), + axis=0) + self.assertTrue(all(per_example_usage <= tf.minimum(seg_limit, input_cap))) + + +# This test covers the in-process behavior of a BertTokenizer layer. +# For saving, restoring, and the restored behavior (incl. shape inference), +# see nlp/tools/export_tfhub_lib_test.py. +class BertTokenizerTest(tf.test.TestCase): + + def _make_vocab_file(self, vocab, filename="vocab.txt"): + path = os.path.join( + tempfile.mkdtemp(dir=self.get_temp_dir()), # New subdir each time. + filename) + with tf.io.gfile.GFile(path, "w") as f: + f.write("\n".join(vocab + [""])) + return path + + def test_uncased(self): + vocab_file = self._make_vocab_file( + ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "d", "##ef", "abc", "xy"]) + bert_tokenize = text_layers.BertTokenizer( + vocab_file=vocab_file, lower_case=True) + inputs = tf.constant(["abc def", "ABC DEF d"]) + token_ids = bert_tokenize(inputs) + self.assertAllEqual(token_ids, tf.ragged.constant([[[6], [4, 5]], + [[6], [4, 5], [4]]])) + bert_tokenize.tokenize_with_offsets = True + token_ids_2, start_offsets, limit_offsets = bert_tokenize(inputs) + self.assertAllEqual(token_ids, token_ids_2) + self.assertAllEqual(start_offsets, tf.ragged.constant([[[0], [4, 5]], + [[0], [4, 5], [8]]])) + self.assertAllEqual(limit_offsets, tf.ragged.constant([[[3], [5, 7]], + [[3], [5, 7], [9]]])) + self.assertEqual(bert_tokenize.vocab_size.numpy(), 8) + + # Repeat the above and test that case matters with lower_case=False. + def test_cased(self): + vocab_file = self._make_vocab_file( + ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "d", "##ef", "abc", "ABC"]) + bert_tokenize = text_layers.BertTokenizer( + vocab_file=vocab_file, lower_case=False, tokenize_with_offsets=True) + inputs = tf.constant(["abc def", "ABC DEF"]) + token_ids, start_offsets, limit_offsets = bert_tokenize(inputs) + self.assertAllEqual(token_ids, tf.ragged.constant([[[6], [4, 5]], + [[7], [1]]])) + self.assertAllEqual(start_offsets, tf.ragged.constant([[[0], [4, 5]], + [[0], [4]]])) + self.assertAllEqual(limit_offsets, tf.ragged.constant([[[3], [5, 7]], + [[3], [7]]])) + + def test_special_tokens_complete(self): + vocab_file = self._make_vocab_file( + ["foo", "[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]", "xy"]) + bert_tokenize = text_layers.BertTokenizer( + vocab_file=vocab_file, lower_case=True) + self.assertDictEqual(bert_tokenize.get_special_tokens_dict(), + dict(padding_id=1, + start_of_sequence_id=3, + end_of_segment_id=4, + mask_id=5, + vocab_size=7)) + + def test_special_tokens_partial(self): + vocab_file = self._make_vocab_file( + ["[PAD]", "[CLS]", "[SEP]"]) + bert_tokenize = text_layers.BertTokenizer( + vocab_file=vocab_file, lower_case=True) + self.assertDictEqual(bert_tokenize.get_special_tokens_dict(), + dict(padding_id=0, + start_of_sequence_id=1, + end_of_segment_id=2, + vocab_size=3)) # No mask_id, + + def test_special_tokens_in_estimator(self): + """Tests getting special tokens without an Eager init context.""" + vocab_file = self._make_vocab_file( + ["[PAD]", "[UNK]", "[CLS]", "[SEP]", "d", "##ef", "abc", "xy"]) + + def input_fn(): + with tf.init_scope(): + self.assertFalse(tf.executing_eagerly()) + # Build a preprocessing Model. + sentences = tf.keras.layers.Input(shape=[], dtype=tf.string) + bert_tokenizer = text_layers.BertTokenizer( + vocab_file=vocab_file, lower_case=True) + special_tokens_dict = bert_tokenizer.get_special_tokens_dict() + for k, v in special_tokens_dict.items(): + self.assertIsInstance(v, int, "Unexpected type for {}".format(k)) + tokens = bert_tokenizer(sentences) + packed_inputs = text_layers.BertPackInputs( + 4, special_tokens_dict=special_tokens_dict)(tokens) + preprocessing = tf.keras.Model(sentences, packed_inputs) + # Map the dataset. + ds = tf.data.Dataset.from_tensors( + (tf.constant(["abc", "DEF"]), tf.constant([0, 1]))) + ds = ds.map(lambda features, labels: (preprocessing(features), labels)) + return ds + + def model_fn(features, labels, mode): + del labels # Unused. + return tf.estimator.EstimatorSpec(mode=mode, + predictions=features["input_word_ids"]) + + estimator = tf.estimator.Estimator(model_fn=model_fn) + outputs = list(estimator.predict(input_fn)) + self.assertAllEqual(outputs, np.array([[2, 6, 3, 0], + [2, 4, 5, 3]])) + + +# This test covers the in-process behavior of a SentencepieceTokenizer layer. +class SentencepieceTokenizerTest(tf.test.TestCase): + + def setUp(self): + super().setUp() + # Make a sentencepiece model. + tmp_dir = self.get_temp_dir() + tempfile.mkdtemp(dir=tmp_dir) + vocab = ["a", "b", "c", "d", "e", "abc", "def", "ABC", "DEF"] + model_prefix = os.path.join(tmp_dir, "spm_model") + input_text_file_path = os.path.join(tmp_dir, "train_input.txt") + with tf.io.gfile.GFile(input_text_file_path, "w") as f: + f.write(" ".join(vocab + ["\n"])) + # Add 7 more tokens: , , [CLS], [SEP], [MASK], , . + full_vocab_size = len(vocab) + 7 + flags = dict( + model_prefix=model_prefix, + model_type="word", + input=input_text_file_path, + pad_id=0, unk_id=1, control_symbols="[CLS],[SEP],[MASK]", + vocab_size=full_vocab_size, + bos_id=full_vocab_size-2, eos_id=full_vocab_size-1) + SentencePieceTrainer.Train( + " ".join(["--{}={}".format(k, v) for k, v in flags.items()])) + self._spm_path = model_prefix + ".model" + + def test_uncased(self): + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, lower_case=True, nbest_size=0) + + inputs = tf.constant(["abc def", "ABC DEF d"]) + token_ids = sentencepiece_tokenizer(inputs) + self.assertAllEqual( + token_ids, + tf.ragged.constant([[8, 12], [8, 12, 11]])) + sentencepiece_tokenizer.tokenize_with_offsets = True + token_ids_2, start_offsets, limit_offsets = sentencepiece_tokenizer(inputs) + self.assertAllEqual(token_ids, token_ids_2) + self.assertAllEqual( + start_offsets, tf.ragged.constant([[0, 3], [0, 3, 7]])) + self.assertAllEqual( + limit_offsets, tf.ragged.constant([[3, 7], [3, 7, 9]])) + self.assertEqual(sentencepiece_tokenizer.vocab_size.numpy(), 16) + + # Repeat the above and test that case matters with lower_case=False. + def test_cased(self): + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, + lower_case=False, + nbest_size=0, + tokenize_with_offsets=False) + + inputs = tf.constant(["abc def", "ABC DEF d"]) + token_ids = sentencepiece_tokenizer(inputs) + self.assertAllEqual( + token_ids, + tf.ragged.constant([[8, 12], [5, 6, 11]])) + sentencepiece_tokenizer.tokenize_with_offsets = True + token_ids_2, start_offsets, limit_offsets = sentencepiece_tokenizer(inputs) + self.assertAllEqual(token_ids, token_ids_2) + self.assertAllEqual( + start_offsets, + tf.ragged.constant([[0, 3], [0, 3, 7]])) + self.assertAllEqual( + limit_offsets, + tf.ragged.constant([[3, 7], [3, 7, 9]])) + + def test_special_tokens(self): + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, lower_case=True, nbest_size=0) + self.assertDictEqual(sentencepiece_tokenizer.get_special_tokens_dict(), + dict(padding_id=0, + start_of_sequence_id=2, + end_of_segment_id=3, + mask_id=4, + vocab_size=16)) + + def test_special_tokens_in_estimator(self): + """Tests getting special tokens without an Eager init context.""" + + def input_fn(): + with tf.init_scope(): + self.assertFalse(tf.executing_eagerly()) + # Build a preprocessing Model. + sentences = tf.keras.layers.Input(shape=[], dtype=tf.string) + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, lower_case=True, nbest_size=0) + special_tokens_dict = sentencepiece_tokenizer.get_special_tokens_dict() + for k, v in special_tokens_dict.items(): + self.assertIsInstance(v, int, "Unexpected type for {}".format(k)) + tokens = sentencepiece_tokenizer(sentences) + packed_inputs = text_layers.BertPackInputs( + 4, special_tokens_dict=special_tokens_dict)(tokens) + preprocessing = tf.keras.Model(sentences, packed_inputs) + # Map the dataset. + ds = tf.data.Dataset.from_tensors( + (tf.constant(["abc", "DEF"]), tf.constant([0, 1]))) + ds = ds.map(lambda features, labels: (preprocessing(features), labels)) + return ds + + def model_fn(features, labels, mode): + del labels # Unused. + return tf.estimator.EstimatorSpec(mode=mode, + predictions=features["input_word_ids"]) + + estimator = tf.estimator.Estimator(model_fn=model_fn) + outputs = list(estimator.predict(input_fn)) + self.assertAllEqual(outputs, np.array([[2, 8, 3, 0], + [2, 12, 3, 0]])) + + def test_strip_diacritics(self): + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, + lower_case=True, + nbest_size=0, + strip_diacritics=True) + inputs = tf.constant(["a b c d e", "ă ḅ č ḓ é"]) + token_ids = sentencepiece_tokenizer(inputs) + self.assertAllEqual( + token_ids, + tf.ragged.constant([[7, 9, 10, 11, 13], [7, 9, 10, 11, 13]])) + + def test_fail_on_tokenize_with_offsets_and_strip_diacritics(self): + # Raise an error in init(). + with self.assertRaises(ValueError): + text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, + tokenize_with_offsets=True, + lower_case=True, + nbest_size=0, + strip_diacritics=True) + + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, + lower_case=True, + nbest_size=0, + strip_diacritics=True) + sentencepiece_tokenizer.tokenize_with_offsets = True + + # Raise an error in call(): + inputs = tf.constant(["abc def", "ABC DEF d", "Äffin"]) + with self.assertRaises(ValueError): + sentencepiece_tokenizer(inputs) + + def test_serialize_deserialize(self): + self.skipTest("b/170480226") + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, + lower_case=False, + nbest_size=0, + tokenize_with_offsets=False, + name="sentencepiece_tokenizer_layer") + config = sentencepiece_tokenizer.get_config() + new_tokenizer = text_layers.SentencepieceTokenizer.from_config(config) + self.assertEqual(config, new_tokenizer.get_config()) + inputs = tf.constant(["abc def", "ABC DEF d"]) + token_ids = sentencepiece_tokenizer(inputs) + token_ids_2 = new_tokenizer(inputs) + self.assertAllEqual(token_ids, token_ids_2) + + # TODO(b/170480226): Remove once tf_hub_export_lib_test.py covers saving. + def test_saving(self): + sentencepiece_tokenizer = text_layers.SentencepieceTokenizer( + model_file_path=self._spm_path, lower_case=True, nbest_size=0) + inputs = tf.keras.layers.Input([], dtype=tf.string) + outputs = sentencepiece_tokenizer(inputs) + model = tf.keras.Model(inputs, outputs) + export_path = tempfile.mkdtemp(dir=self.get_temp_dir()) + model.save(export_path, signatures={}) + + +class BertPackInputsTest(tf.test.TestCase): + + def test_round_robin_correct_outputs(self): + bpi = text_layers.BertPackInputs( + 10, + start_of_sequence_id=1001, + end_of_segment_id=1002, + padding_id=999, + truncator="round_robin") + # Single input, rank 2. + bert_inputs = bpi( + tf.ragged.constant([[11, 12, 13], + [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]])) + self.assertAllEqual( + bert_inputs["input_word_ids"], + tf.constant([[1001, 11, 12, 13, 1002, 999, 999, 999, 999, 999], + [1001, 21, 22, 23, 24, 25, 26, 27, 28, 1002]])) + self.assertAllEqual( + bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])) + self.assertAllEqual( + bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])) + + # Two inputs, rank 3. Truncation does not respect word boundaries. + bert_inputs = bpi([ + tf.ragged.constant([[[111], [112, 113]], + [[121, 122, 123], [124, 125, 126], [127, 128]]]), + tf.ragged.constant([[[211, 212], [213]], + [[221, 222], [223, 224, 225], [226, 227, 228]]]) + ]) + self.assertAllEqual( + bert_inputs["input_word_ids"], + tf.constant([[1001, 111, 112, 113, 1002, 211, 212, 213, 1002, 999], + [1001, 121, 122, 123, 124, 1002, 221, 222, 223, 1002]])) + self.assertAllEqual( + bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])) + self.assertAllEqual( + bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1]])) + + # Three inputs. rank 3. + bert_inputs = bpi([ + tf.ragged.constant([[[111], [112, 113]], + [[121, 122, 123], [124, 125, 126], [127, 128]]]), + tf.ragged.constant([[[211, 212], [213]], + [[221, 222], [223, 224, 225], [226, 227, 228]]]), + tf.ragged.constant([[[311, 312], [313]], + [[321, 322], [323, 324, 325], [326, 327, 328]]]) + ]) + self.assertAllEqual( + bert_inputs["input_word_ids"], + tf.constant([[1001, 111, 112, 1002, 211, 212, 1002, 311, 312, 1002], + [1001, 121, 122, 1002, 221, 222, 1002, 321, 322, 1002]])) + + def test_waterfall_correct_outputs(self): + bpi = text_layers.BertPackInputs( + 10, + start_of_sequence_id=1001, + end_of_segment_id=1002, + padding_id=999, + truncator="waterfall") + # Single input, rank 2. + bert_inputs = bpi( + tf.ragged.constant([[11, 12, 13], + [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]])) + self.assertAllEqual( + bert_inputs["input_word_ids"], + tf.constant([[1001, 11, 12, 13, 1002, 999, 999, 999, 999, 999], + [1001, 21, 22, 23, 24, 25, 26, 27, 28, 1002]])) + self.assertAllEqual( + bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])) + self.assertAllEqual( + bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])) + + # Two inputs, rank 3. Truncation does not respect word boundaries. + bert_inputs = bpi([ + tf.ragged.constant([[[111], [112, 113]], + [[121, 122, 123], [124, 125, 126], [127, 128]]]), + tf.ragged.constant([[[211, 212], [213]], + [[221, 222], [223, 224, 225], [226, 227, 228]]]) + ]) + self.assertAllEqual( + bert_inputs["input_word_ids"], + tf.constant([[1001, 111, 112, 113, 1002, 211, 212, 213, 1002, 999], + [1001, 121, 122, 123, 124, 125, 126, 127, 1002, 1002]])) + self.assertAllEqual( + bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])) + self.assertAllEqual( + bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])) + + # Three inputs, rank 3. Truncation does not respect word boundaries. + bert_inputs = bpi([ + tf.ragged.constant([[[111], [112, 113]], + [[121, 122, 123], [124, 125, 126], [127, 128]]]), + tf.ragged.constant([[[211], [212]], + [[221, 222], [223, 224, 225], [226, 227, 228]]]), + tf.ragged.constant([[[311, 312], [313]], + [[321, 322], [323, 324, 325], [326, 327]]]) + ]) + self.assertAllEqual( + bert_inputs["input_word_ids"], + tf.constant([[1001, 111, 112, 113, 1002, 211, 212, 1002, 311, 1002], + [1001, 121, 122, 123, 124, 125, 126, 1002, 1002, 1002]])) + self.assertAllEqual( + bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])) + self.assertAllEqual( + bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 1, 1, 1, 2, 2], + [0, 0, 0, 0, 0, 0, 0, 0, 1, 2]])) + + def test_special_tokens_dict(self): + special_tokens_dict = dict(start_of_sequence_id=1001, + end_of_segment_id=1002, + padding_id=999, + extraneous_key=666) + bpi = text_layers.BertPackInputs(10, + special_tokens_dict=special_tokens_dict) + bert_inputs = bpi( + tf.ragged.constant([[11, 12, 13], + [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]])) + self.assertAllEqual( + bert_inputs["input_word_ids"], + tf.constant([[1001, 11, 12, 13, 1002, 999, 999, 999, 999, 999], + [1001, 21, 22, 23, 24, 25, 26, 27, 28, 1002]])) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/modeling/layers/tn_expand_condense.py b/official/nlp/modeling/layers/tn_expand_condense.py new file mode 100644 index 0000000000000000000000000000000000000000..c4bd08c5dcadc02defe46e0e2bb23e369ffd389b --- /dev/null +++ b/official/nlp/modeling/layers/tn_expand_condense.py @@ -0,0 +1,180 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ExpandCondense tensor network layer used in TN-BERT.""" +# pylint: disable=g-classes-have-attributes +from typing import List, Optional, Text, Any, Dict +import tensorflow as tf + +Layer = tf.keras.layers.Layer +activations = tf.keras.activations +initializers = tf.keras.initializers + + +@tf.keras.utils.register_keras_serializable(package='Text') +class TNExpandCondense(Layer): + """A TPU-optimized TensorNetwork layer. + + Designed for use in models that currently use Dense layers to achieve + up projection followed by down projection. + + This layer is a TPU-optimized combination of 3 operations: + Expand, Apply Activation, and Condense. The layer projects up from + `input_shape[-1]` to `input_shape[-1] * proj_multiplier`, applies + `self.activation`, and then condenses back to `input_shape[-1]`. + + Note the input shape and output shape will be identical. + + Args: + proj_multiplier: Positive integer, multiple of `input_shape[-1]` to project + up to. Must be one of `[2, 4, 6, 8]`. + use_bias: Boolean, whether the layer uses a bias vector. + activation: Activation function to use between Expand and Condense. If you + don't specify anything, no activation is applied + (ie. "linear" activation: `a(x) = x`). + kernel_initializer: Initializer for the weight matrices. + bias_initializer: Initializer for the bias vector. + Input shape: + N-D tensor with shape: `(batch_size, ..., input_shape[-1])`. + Output shape: + N-D tensor with shape: `(batch_size, ..., input_shape[-1])`. + """ + + def __init__(self, + proj_multiplier: int, + use_bias: Optional[bool] = True, + activation: Optional[Text] = 'relu', + kernel_initializer: Optional[Text] = 'glorot_uniform', + bias_initializer: Optional[Text] = 'zeros', + **kwargs) -> None: + + # Allow specification of input_dim instead of input_shape, + # for compatability with Keras layers that support this + if 'input_shape' not in kwargs and 'input_dim' in kwargs: + kwargs['input_shape'] = (kwargs.pop('input_dim'),) + + super(TNExpandCondense, self).__init__(**kwargs) + + assert proj_multiplier in [ + 2, 4, 6, 8, 10, 12 + ], 'proj_multiplier needs to be one of [2, 4, 6, 8, 10, 12]' + self.proj_multiplier = proj_multiplier + + self.use_bias = use_bias + self.activation = activations.get(activation) + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + + def build(self, input_shape: List[int]) -> None: + # Disable the attribute-defined-outside-init violations in this function + # pylint: disable=attribute-defined-outside-init + if input_shape[-1] is None: + raise ValueError( + 'The last dimension of the inputs to `TNExpandCondense` ' + 'should be defined. Found `None`.') + + super(TNExpandCondense, self).build(input_shape) + + self.proj_size = self.proj_multiplier * input_shape[-1] + + assert (self.proj_size // input_shape[-1]) * input_shape[ + -1] == self.proj_size, (f'{self.proj_size} / {input_shape[-1]} must be ' + f'round') + assert (input_shape[-1] // 128 + ) * 128 == input_shape[-1], f'{input_shape[-1]} / 128 must be round' + + self.w1 = self.add_weight( + name='w1', + shape=(input_shape[-1], input_shape[-1]), + trainable=True, + initializer=self.kernel_initializer) + + self.w2 = self.add_weight( + name='w2', + shape=(128, (128 * (self.proj_size // input_shape[-1]))), + trainable=True, + initializer=self.kernel_initializer) + + self.w3 = self.add_weight( + name='w3', + shape=(128 * (self.proj_size // input_shape[-1]), 128), + trainable=True, + initializer=self.kernel_initializer) + self.w4 = self.add_weight( + name='w4', + shape=(input_shape[-1] // 128, 128, input_shape[-1]), + trainable=True, + initializer=self.kernel_initializer) + + if self.use_bias: + self.bias = self.add_weight( + name='b', + shape=(input_shape[-1] // 128, 1, + 128 * (self.proj_size // input_shape[-1])), + trainable=True, + initializer=self.bias_initializer) + else: + self.bias = None + + def call(self, inputs: tf.Tensor, **kwargs): + orig_shape = tf.shape(inputs) + input_dim = inputs.shape[-1] + tmp = tf.reshape(inputs, (-1, input_dim)) + # Shape is (BatchSeq, input_dim) + + # Expansion network + tmp = tf.einsum('ab,Qb->aQ', self.w1, tmp) + # Note: Letter Q will always represent the BatchSeq axis. + tmp = tf.reshape(tmp, (input_dim // 128, 128, -1)) + tmp = tf.einsum('abQ,bd->aQd', tmp, self.w2) + + # Apply activation and then Condense + tmp = self.activation(tmp + self.bias) + tmp = tf.einsum('aQd,db->aQb', tmp, self.w3) + tmp = tf.einsum('aQb,abd->Qd', tmp, self.w4) + + out = tf.reshape(tmp, orig_shape) + return out + + def compute_output_shape(self, input_shape: List[int]) -> List[int]: + return input_shape + + def get_config(self) -> Dict[Any, Any]: + """Returns the config of the layer. + + The same layer can be reinstantiated later + (without its trained weights) from this configuration. + + Returns: + Python dictionary containing the configuration of the layer. + """ + config = {} + + # Include the layer-specific arguments + args = ['proj_multiplier', 'use_bias'] + for arg in args: + config[arg] = getattr(self, arg) + + # Serialize the activation + config['activation'] = activations.serialize(getattr(self, 'activation')) + + # Serialize the initializers + decomp_initializers = ['kernel_initializer', 'bias_initializer'] + for initializer_arg in decomp_initializers: + config[initializer_arg] = initializers.serialize( + getattr(self, initializer_arg)) + + # Get base config + base_config = super(TNExpandCondense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/official/nlp/modeling/layers/tn_expand_condense_test.py b/official/nlp/modeling/layers/tn_expand_condense_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ae39b8550252537fb44e42406f5051641eecd893 --- /dev/null +++ b/official/nlp/modeling/layers/tn_expand_condense_test.py @@ -0,0 +1,180 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ExpandCondense tensor network layer.""" + +import os + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +# pylint: disable=g-direct-tensorflow-import +from tensorflow.python.keras.testing_utils import layer_test +from official.nlp.modeling.layers.tn_expand_condense import TNExpandCondense + + +class TNLayerTest(tf.test.TestCase, parameterized.TestCase): + """Unit tests for ExpandCondense TN layer. + """ + + def setUp(self): + super(TNLayerTest, self).setUp() + self.labels = np.concatenate((np.ones((50, 1)), np.zeros((50, 1))), axis=0) + + def _build_model(self, data, proj_multiple=2): + model = tf.keras.models.Sequential() + model.add( + TNExpandCondense( + proj_multiplier=proj_multiple, + use_bias=True, + activation='relu', + input_shape=(data.shape[-1],))) + model.add(tf.keras.layers.Dense(1, activation='sigmoid')) + return model + + @parameterized.parameters((768, 6), (1024, 2)) + def test_keras_layer(self, input_dim, proj_multiple): + self.skipTest('Disable the test for now since it imports ' + 'keras.testing_utils, will reenable this test after we ' + 'fix the b/184578869') + # TODO(scottzhu): Reenable after fix b/184578869 + data = np.random.normal(size=(100, input_dim)) + data = data.astype(np.float32) + layer_test( + TNExpandCondense, + kwargs={ + 'proj_multiplier': proj_multiple, + 'input_shape': data.shape + }, + input_shape=data.shape, + input_data=data, + expected_output_shape=(None, data.shape[-1]), + expected_output_dtype=data.dtype) + + @parameterized.parameters((768, 6), (1024, 2)) + def test_train(self, input_dim, proj_multiple): + data = np.random.randint(10, size=(100, input_dim)) + model = self._build_model(data, proj_multiple) + tf.random.set_seed(0) + + model.compile( + optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + + # Train the model for 5 epochs + history = model.fit(data, self.labels, epochs=5, batch_size=32) + + # Check that loss decreases and accuracy increases + self.assertGreater(history.history['loss'][0], history.history['loss'][-1]) + self.assertLess( + history.history['accuracy'][0], history.history['accuracy'][-1]) + + @parameterized.parameters((768, 6), (1024, 2)) + def test_weights_change(self, input_dim, proj_multiple): + tf.random.set_seed(0) + data = np.random.randint(10, size=(100, input_dim)) + model = self._build_model(data, proj_multiple) + model.compile( + optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + + before = model.get_weights() + + model.fit(data, self.labels, epochs=5, batch_size=32) + + after = model.get_weights() + # Make sure every layer's weights changed + for i, _ in enumerate(before): + self.assertTrue((after[i] != before[i]).any()) + + @parameterized.parameters((768, 6), (1024, 2)) + def test_output_shape(self, input_dim, proj_multiple): + data = np.random.randint(10, size=(100, input_dim)) + model = self._build_model(data, proj_multiple) + input_shape = data.shape + + actual_output_shape = model(data).shape + expected_output_shape = model.compute_output_shape(input_shape) + + self.assertEqual(expected_output_shape, actual_output_shape) + + @parameterized.parameters((768, 6), (1024, 2)) + def test_expandcondense_num_parameters(self, input_dim, proj_multiple): + data = np.random.randint(10, size=(100, input_dim)) + proj_size = proj_multiple * data.shape[-1] + model = tf.keras.models.Sequential() + model.add( + TNExpandCondense( + proj_multiplier=proj_multiple, + use_bias=True, + activation='relu', + input_shape=(data.shape[-1],))) + + w1_params = data.shape[-1]**2 + w2_params = 128 * 128 * (proj_size // data.shape[-1]) + w3_params = 128 * 128 * (proj_size // data.shape[-1]) + w4_params = (data.shape[-1] // 128) * 128 * data.shape[-1] + bias_params = ((data.shape[-1] // 128) * 128 * + (proj_size // data.shape[-1])) + + expected_num_parameters = (w1_params + w2_params + w3_params + + w4_params) + bias_params + + self.assertEqual(expected_num_parameters, model.count_params()) + + @parameterized.parameters((912, 6), (200, 2)) + def test_incorrect_sizes(self, input_dim, proj_multiple): + data = np.random.randint(10, size=(100, input_dim)) + + with self.assertRaises(AssertionError): + model = self._build_model(data, proj_multiple) + model.compile(optimizer='adam', loss='binary_crossentropy') + + @parameterized.parameters((768, 6), (1024, 2)) + def test_config(self, input_dim, proj_multiple): + data = np.random.randint(10, size=(100, input_dim)) + model = self._build_model(data, proj_multiple) + + expected_num_parameters = model.layers[0].count_params() + + # Serialize model and use config to create new layer + model_config = model.get_config() + layer_config = model_config['layers'][1]['config'] + + new_model = TNExpandCondense.from_config(layer_config) + + # Build the layer so we can count params below + new_model.build(layer_config['batch_input_shape']) + + # Check that original layer had same num params as layer built from config + self.assertEqual(expected_num_parameters, new_model.count_params()) + + @parameterized.parameters((768, 6), (1024, 2)) + def test_model_save(self, input_dim, proj_multiple): + data = np.random.randint(10, size=(100, input_dim)) + model = self._build_model(data, proj_multiple) + + model.compile( + optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + + # Train the model for 5 epochs + model.fit(data, self.labels, epochs=5, batch_size=32) + + save_path = os.path.join(self.get_temp_dir(), 'test_model') + model.save(save_path) + loaded_model = tf.keras.models.load_model(save_path) + + # Compare model predictions and loaded_model predictions + self.assertAllEqual(model.predict(data), loaded_model.predict(data)) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/layers/tn_transformer_expand_condense.py b/official/nlp/modeling/layers/tn_transformer_expand_condense.py new file mode 100644 index 0000000000000000000000000000000000000000..c244fcb1cd051a88eebd363dace39914745c582c --- /dev/null +++ b/official/nlp/modeling/layers/tn_transformer_expand_condense.py @@ -0,0 +1,253 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TN-BERT TNTransformerExpandCondense employing Expand-Condense layer instead of Dense.""" +# pylint: disable=g-classes-have-attributes +# Import libraries + +import gin +import tensorflow as tf + +from official.nlp.modeling.layers.tn_expand_condense import TNExpandCondense + + +@tf.keras.utils.register_keras_serializable(package="Text") +@gin.configurable +class TNTransformerExpandCondense(tf.keras.layers.Layer): + """Transformer layer using tensor network Expand-Condense layer. + + This layer implements the Transformer from transformer.py, with a single + tensor network layer replacing the usual intermediate and output Dense + layers. + + Args: + num_attention_heads: Number of attention heads. + intermediate_size: Size of the intermediate layer. + intermediate_activation: Activation for the intermediate layer. + dropout_rate: Dropout probability for the post-attention and output dropout. + attention_dropout_rate: Dropout probability for within the attention layer. + output_range: the sequence output range, [0, output_range) by slicing the + target sequence. `None` means the target sequence is not sliced. + kernel_initializer: Initializer for dense layer kernels. + bias_initializer: Initializer for dense layer biases. + kernel_regularizer: Regularizer for dense layer kernels. + bias_regularizer: Regularizer for dense layer biases. + activity_regularizer: Regularizer for dense layer activity. + kernel_constraint: Constraint for dense layer kernels. + bias_constraint: Constraint for dense layer kernels. + use_bias: Whether to enable use_bias in attention layer. If set to False, + use_bias in attention layer is disabled. + norm_first: Whether to normalize inputs to attention and intermediate dense + layers. If set False, output of attention and intermediate dense layers is + normalized. + norm_epsilon: Epsilon value to initialize normalization layers. + intermediate_dropout: Dropout probability for intermediate_dropout_layer. + attention_initializer: Initializer for kernels of attention layers. If set + `None`, attention layers use kernel_initializer as initializer for kernel. + """ + + def __init__(self, + num_attention_heads, + intermediate_size, + intermediate_activation, + dropout_rate=0.0, + attention_dropout_rate=0.0, + output_range=None, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + use_bias=True, + norm_first=False, + norm_epsilon=1e-12, + intermediate_dropout=0.0, + attention_initializer=None, + **kwargs): + super(TNTransformerExpandCondense, self).__init__(**kwargs) + + self._num_heads = num_attention_heads + self._intermediate_size = intermediate_size + self._intermediate_activation = intermediate_activation + self._attention_dropout_rate = attention_dropout_rate + self._dropout_rate = dropout_rate + self._output_range = output_range + self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) + self._bias_initializer = tf.keras.initializers.get(bias_initializer) + self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) + self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) + self._activity_regularizer = tf.keras.regularizers.get(activity_regularizer) + self._kernel_constraint = tf.keras.constraints.get(kernel_constraint) + self._bias_constraint = tf.keras.constraints.get(bias_constraint) + self._use_bias = use_bias + self._norm_first = norm_first + self._norm_epsilon = norm_epsilon + self._intermediate_dropout = intermediate_dropout + if attention_initializer: + self._attention_initializer = tf.keras.initializers.get( + attention_initializer) + else: + self._attention_initializer = self._kernel_initializer + + def build(self, input_shape): + input_tensor = input_shape[0] if len(input_shape) == 2 else input_shape + input_tensor_shape = tf.TensorShape(input_tensor) + if len(input_tensor_shape.as_list()) != 3: + raise ValueError( + "TNTransformerExpandCondense expects a three-dimensional input of " + "shape [batch, sequence, width].") + batch_size, sequence_length, hidden_size = input_tensor_shape + + if len(input_shape) == 2: + mask_tensor_shape = tf.TensorShape(input_shape[1]) + expected_mask_tensor_shape = tf.TensorShape( + [batch_size, sequence_length, sequence_length]) + if not expected_mask_tensor_shape.is_compatible_with(mask_tensor_shape): + raise ValueError( + "When passing a mask tensor to TNTransformerExpandCondense, the " + "mask tensor must be of shape [batch, " + "sequence_length, sequence_length] (here %s). Got a " + "mask tensor of shape %s." % + (expected_mask_tensor_shape, mask_tensor_shape)) + if hidden_size % self._num_heads != 0: + raise ValueError( + "The input size (%d) is not a multiple of the number of attention " + "heads (%d)" % (hidden_size, self._num_heads)) + self._attention_head_size = int(hidden_size // self._num_heads) + common_kwargs = dict( + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activity_regularizer=self._activity_regularizer, + kernel_constraint=self._kernel_constraint, + bias_constraint=self._bias_constraint) + self._attention_layer = tf.keras.layers.MultiHeadAttention( + num_heads=self._num_heads, + key_dim=self._attention_head_size, + dropout=self._attention_dropout_rate, + use_bias=self._use_bias, + kernel_initializer=self._attention_initializer, + name="self_attention", + **common_kwargs) + self._attention_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + # Use float32 in layernorm for numeric stability. + # It is probably safe in mixed_float16, but we haven't validated this yet. + self._attention_layer_norm = ( + tf.keras.layers.LayerNormalization( + name="self_attention_layer_norm", + axis=-1, + epsilon=self._norm_epsilon, + dtype=tf.float32)) + + # Substitute Dense layers with a single Expand-Condense layer. + self._output_dense = TNExpandCondense( + 4, + use_bias=True, + activation=self._intermediate_activation, + kernel_initializer=self._kernel_initializer, + bias_initializer=self._bias_initializer) + + self._output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + # Use float32 in layernorm for numeric stability. + self._output_layer_norm = tf.keras.layers.LayerNormalization( + name="output_layer_norm", + axis=-1, + epsilon=self._norm_epsilon, + dtype=tf.float32) + + super(TNTransformerExpandCondense, self).build(input_shape) + + def get_config(self): + config = { + "num_attention_heads": + self._num_heads, + "intermediate_size": + self._intermediate_size, + "intermediate_activation": + self._intermediate_activation, + "dropout_rate": + self._dropout_rate, + "attention_dropout_rate": + self._attention_dropout_rate, + "output_range": + self._output_range, + "kernel_initializer": + tf.keras.initializers.serialize(self._kernel_initializer), + "bias_initializer": + tf.keras.initializers.serialize(self._bias_initializer), + "kernel_regularizer": + tf.keras.regularizers.serialize(self._kernel_regularizer), + "bias_regularizer": + tf.keras.regularizers.serialize(self._bias_regularizer), + "activity_regularizer": + tf.keras.regularizers.serialize(self._activity_regularizer), + "kernel_constraint": + tf.keras.constraints.serialize(self._kernel_constraint), + "bias_constraint": + tf.keras.constraints.serialize(self._bias_constraint), + "use_bias": + self._use_bias, + "norm_first": + self._norm_first, + "norm_epsilon": + self._norm_epsilon, + "intermediate_dropout": + self._intermediate_dropout, + "attention_initializer": + tf.keras.initializers.serialize(self._attention_initializer) + } + base_config = super(TNTransformerExpandCondense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs): + if isinstance(inputs, (list, tuple)) and len(inputs) == 2: + input_tensor, attention_mask = inputs + else: + input_tensor, attention_mask = (inputs, None) + + if self._output_range: + target_tensor = input_tensor[:, 0:self._output_range, :] + attention_mask = attention_mask[:, 0:self._output_range, :] + else: + if self._norm_first: + source_tensor = input_tensor + input_tensor = self._attention_layer_norm(input_tensor) + target_tensor = input_tensor + + attention_output = self._attention_layer( + query=target_tensor, value=input_tensor, attention_mask=attention_mask) + attention_output = self._attention_dropout(attention_output) + if self._norm_first: + attention_output = source_tensor + attention_output + else: + attention_output = self._attention_layer_norm(target_tensor + + attention_output) + if self._norm_first: + source_attention_output = attention_output + attention_output = self._output_layer_norm(attention_output) + + layer_output = self._output_dense(attention_output) + layer_output = self._output_dropout(layer_output) + # During mixed precision training, attention_output is from layer norm and + # is always fp32 for now. Cast layer_output to fp32 for the subsequent + # add. + layer_output = tf.cast(layer_output, tf.float32) + if self._norm_first: + layer_output = source_attention_output + layer_output + else: + layer_output = self._output_layer_norm(layer_output + attention_output) + + return layer_output diff --git a/official/nlp/modeling/layers/tn_transformer_test.py b/official/nlp/modeling/layers/tn_transformer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a21193e7c7b10b2aef1ae3b0e68c74e191149e2e --- /dev/null +++ b/official/nlp/modeling/layers/tn_transformer_test.py @@ -0,0 +1,214 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for TN-BERT transformer.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling.layers.tn_transformer_expand_condense import TNTransformerExpandCondense + + +# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It +# guarantees forward compatibility of this code for the V2 switchover. +@keras_parameterized.run_all_keras_modes +@parameterized.named_parameters(('tn', TNTransformerExpandCondense)) +class TransformerLayerTest(keras_parameterized.TestCase): + + def tearDown(self): + super(TransformerLayerTest, self).tearDown() + tf.keras.mixed_precision.set_global_policy('float32') + + def test_layer_creation(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu') + sequence_length = 21 + width = 256 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + output_tensor = test_layer(data_tensor) + # The default output of a transformer layer should be the same as the input. + self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) + + def test_layer_creation_with_mask(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu') + sequence_length = 21 + width = 256 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + # Create a 2-dimensional input (the first dimension is implicit). + mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) + output_tensor = test_layer([data_tensor, mask_tensor]) + # The default output of a transformer layer should be the same as the input. + self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) + + def test_layer_creation_with_incorrect_mask_fails(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu') + sequence_length = 21 + width = 256 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + # Create a 2-dimensional input (the first dimension is implicit). + mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length - 3)) + with self.assertRaisesRegex(ValueError, 'When passing a mask tensor.*'): + _ = test_layer([data_tensor, mask_tensor]) + + def test_layer_invocation(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu') + sequence_length = 21 + width = 256 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + output_tensor = test_layer(data_tensor) + + # Create a model from the test layer. + model = tf.keras.Model(data_tensor, output_tensor) + + # Invoke the model on test data. We can't validate the output data itself + # (the NN is too complex) but this will rule out structural runtime errors. + batch_size = 6 + input_data = 16 * np.random.random_sample( + (batch_size, sequence_length, width)) + _ = model.predict(input_data) + + def test_layer_invocation_with_mask(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu') + sequence_length = 21 + width = 256 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + # Create a 2-dimensional input (the first dimension is implicit). + mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) + output_tensor = test_layer([data_tensor, mask_tensor]) + + # Create a model from the test layer. + model = tf.keras.Model([data_tensor, mask_tensor], output_tensor) + + # Invoke the model on test data. We can't validate the output data itself + # (the NN is too complex) but this will rule out structural runtime errors. + batch_size = 6 + input_data = 16 * np.random.random_sample( + (batch_size, sequence_length, width)) + # The attention mask should be of shape (batch, from_seq_len, to_seq_len), + # which here is (batch, sequence_length, sequence_length) + mask_data = np.random.randint( + 2, size=(batch_size, sequence_length, sequence_length)) + _ = model.predict([input_data, mask_data]) + + def test_layer_output_range(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu') + sequence_length = 21 + width = 256 + + batch_size = 6 + input_data = 16 * np.random.random_sample( + (batch_size, sequence_length, width)) + mask_data = np.random.randint( + 2, size=(batch_size, sequence_length, sequence_length)) + output_tensor = test_layer([input_data, mask_data]) + + # The layer only attends to the first token and outputs the first token + # embeeding. + new_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu', + output_range=1) + _ = new_layer([input_data, mask_data]) + new_layer.set_weights(test_layer.get_weights()) + new_output_tensor = new_layer([input_data, mask_data]) + self.assertAllClose( + new_output_tensor, output_tensor[:, 0:1, :], atol=5e-5, rtol=0.003) + + def test_layer_invocation_with_float16_dtype(self, transformer_cls): + tf.keras.mixed_precision.set_global_policy('mixed_float16') + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu') + sequence_length = 21 + width = 256 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + # Create a 2-dimensional input (the first dimension is implicit). + mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) + output_tensor = test_layer([data_tensor, mask_tensor]) + + # Create a model from the test layer. + model = tf.keras.Model([data_tensor, mask_tensor], output_tensor) + + # Invoke the model on test data. We can't validate the output data itself + # (the NN is too complex) but this will rule out structural runtime errors. + batch_size = 6 + input_data = (16 * np.random.random_sample( + (batch_size, sequence_length, width))) + # The attention mask should be of shape (batch, from_seq_len, to_seq_len), + # which here is (batch, sequence_length, sequence_length) + mask_data = np.random.randint( + 2, size=(batch_size, sequence_length, sequence_length)) + _ = model.predict([input_data, mask_data]) + + def test_transform_with_initializer(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu', + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) + sequence_length = 21 + width = 256 + # Create a 3-dimensional input (the first dimension is implicit). + data_tensor = tf.keras.Input(shape=(sequence_length, width)) + output = test_layer(data_tensor) + # The default output of a transformer layer should be the same as the input. + self.assertEqual(data_tensor.shape.as_list(), output.shape.as_list()) + + def test_dynamic_layer_sequence(self, transformer_cls): + test_layer = transformer_cls( + num_attention_heads=16, + intermediate_size=2048, + intermediate_activation='relu', + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) + # Create a 3-dimensional input (the first dimension is implicit). + width = 256 + input_tensor = tf.keras.Input(shape=(None, width)) + output_tensor = test_layer(input_tensor) + model = tf.keras.Model(input_tensor, output_tensor) + + input_length = 17 + input_data = np.ones((1, input_length, width)) + output_data = model.predict(input_data) + + self.assertAllEqual([1, input_length, width], output_data.shape) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/layers/transformer.py b/official/nlp/modeling/layers/transformer.py index c3772f8aa6cf29b52371f86b984340cfcbd38e33..1c113e5adf227a3d289907b26df9dd666ad1e465 100644 --- a/official/nlp/modeling/layers/transformer.py +++ b/official/nlp/modeling/layers/transformer.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,30 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based transformer block layer.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import gin import tensorflow as tf +from official.nlp import keras_nlp from official.nlp.modeling.layers import attention from official.nlp.modeling.layers import multi_channel_attention from official.nlp.modeling.layers.util import tf_function_if_eager @tf.keras.utils.register_keras_serializable(package="Text") -class Transformer(tf.keras.layers.Layer): +class Transformer(keras_nlp.layers.TransformerEncoderBlock): """Transformer layer. This layer implements the Transformer from "Attention Is All You Need". (https://arxiv.org/abs/1706.03762). - Arguments: + Args: num_attention_heads: Number of attention heads. intermediate_size: Size of the intermediate layer. intermediate_activation: Activation for the intermediate layer. @@ -49,6 +46,15 @@ class Transformer(tf.keras.layers.Layer): activity_regularizer: Regularizer for dense layer activity. kernel_constraint: Constraint for dense layer kernels. bias_constraint: Constraint for dense layer kernels. + use_bias: Whether to enable use_bias in attention layer. If set False, + use_bias in attention layer is disabled. + norm_first: Whether to normalize inputs to attention and intermediate dense + layers. If set False, output of attention and intermediate dense layers is + normalized. + norm_epsilon: Epsilon value to initialize normalization layers. + intermediate_dropout: Dropout probability for intermediate_dropout_layer. + attention_initializer: Initializer for kernels of attention layers. If set + `None`, attention layers use kernel_initializer as initializer for kernel. """ def __init__(self, @@ -65,161 +71,32 @@ class Transformer(tf.keras.layers.Layer): activity_regularizer=None, kernel_constraint=None, bias_constraint=None, + use_bias=True, + norm_first=False, + norm_epsilon=1e-12, + intermediate_dropout=0.0, + attention_initializer=None, **kwargs): - super(Transformer, self).__init__(**kwargs) - - self._num_heads = num_attention_heads - self._intermediate_size = intermediate_size - self._intermediate_activation = intermediate_activation - self._attention_dropout_rate = attention_dropout_rate - self._dropout_rate = dropout_rate - self._output_range = output_range - self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) - self._bias_initializer = tf.keras.initializers.get(bias_initializer) - self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) - self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) - self._activity_regularizer = tf.keras.regularizers.get(activity_regularizer) - self._kernel_constraint = tf.keras.constraints.get(kernel_constraint) - self._bias_constraint = tf.keras.constraints.get(bias_constraint) - - def build(self, input_shape): - input_tensor = input_shape[0] if len(input_shape) == 2 else input_shape - input_tensor_shape = tf.TensorShape(input_tensor) - if len(input_tensor_shape) != 3: - raise ValueError("TransformerLayer expects a three-dimensional input of " - "shape [batch, sequence, width].") - batch_size, sequence_length, hidden_size = input_tensor_shape - - if len(input_shape) == 2: - mask_tensor_shape = tf.TensorShape(input_shape[1]) - expected_mask_tensor_shape = tf.TensorShape( - [batch_size, sequence_length, sequence_length]) - if not expected_mask_tensor_shape.is_compatible_with(mask_tensor_shape): - raise ValueError("When passing a mask tensor to TransformerLayer, the " - "mask tensor must be of shape [batch, " - "sequence_length, sequence_length] (here %s). Got a " - "mask tensor of shape %s." % - (expected_mask_tensor_shape, mask_tensor_shape)) - if hidden_size % self._num_heads != 0: - raise ValueError( - "The input size (%d) is not a multiple of the number of attention " - "heads (%d)" % (hidden_size, self._num_heads)) - self._attention_head_size = int(hidden_size // self._num_heads) - common_kwargs = dict( - kernel_initializer=self._kernel_initializer, - bias_initializer=self._bias_initializer, - kernel_regularizer=self._kernel_regularizer, - bias_regularizer=self._bias_regularizer, - activity_regularizer=self._activity_regularizer, - kernel_constraint=self._kernel_constraint, - bias_constraint=self._bias_constraint) - self._attention_layer = attention.MultiHeadAttention( - num_heads=self._num_heads, - key_size=self._attention_head_size, - dropout=self._attention_dropout_rate, - name="self_attention", - **common_kwargs) - # pylint: disable=protected-access - self._attention_layer.build([input_tensor_shape] * 3) - self._attention_output_dense = self._attention_layer._output_dense - # pylint: enable=protected-access - self._attention_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) - # Use float32 in layernorm for numeric stability. - # It is probably safe in mixed_float16, but we haven't validated this yet. - self._attention_layer_norm = ( - tf.keras.layers.LayerNormalization( - name="self_attention_layer_norm", - axis=-1, - epsilon=1e-12, - dtype=tf.float32)) - self._intermediate_dense = tf.keras.layers.experimental.EinsumDense( - "abc,cd->abd", - output_shape=(None, self._intermediate_size), - bias_axes="d", - name="intermediate", - **common_kwargs) - policy = tf.keras.mixed_precision.experimental.global_policy() - if policy.name == "mixed_bfloat16": - # bfloat16 causes BERT with the LAMB optimizer to not converge - # as well, so we use float32. - # TODO(b/154538392): Investigate this. - policy = tf.float32 - self._intermediate_activation_layer = tf.keras.layers.Activation( - self._intermediate_activation, dtype=policy) - self._output_dense = tf.keras.layers.experimental.EinsumDense( - "abc,cd->abd", - output_shape=(None, hidden_size), - bias_axes="d", - name="output", - **common_kwargs) - self._output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) - # Use float32 in layernorm for numeric stability. - self._output_layer_norm = tf.keras.layers.LayerNormalization( - name="output_layer_norm", axis=-1, epsilon=1e-12, dtype=tf.float32) - - super(Transformer, self).build(input_shape) - - def get_config(self): - config = { - "num_attention_heads": - self._num_heads, - "intermediate_size": - self._intermediate_size, - "intermediate_activation": - self._intermediate_activation, - "dropout_rate": - self._dropout_rate, - "attention_dropout_rate": - self._attention_dropout_rate, - "output_range": - self._output_range, - "kernel_initializer": - tf.keras.initializers.serialize(self._kernel_initializer), - "bias_initializer": - tf.keras.initializers.serialize(self._bias_initializer), - "kernel_regularizer": - tf.keras.regularizers.serialize(self._kernel_regularizer), - "bias_regularizer": - tf.keras.regularizers.serialize(self._bias_regularizer), - "activity_regularizer": - tf.keras.regularizers.serialize(self._activity_regularizer), - "kernel_constraint": - tf.keras.constraints.serialize(self._kernel_constraint), - "bias_constraint": - tf.keras.constraints.serialize(self._bias_constraint) - } - base_config = super(Transformer, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def call(self, inputs): - if isinstance(inputs, (list, tuple)) and len(inputs) == 2: - input_tensor, attention_mask = inputs - else: - input_tensor, attention_mask = (inputs, None) - - if self._output_range: - target_tensor = input_tensor[:, 0:self._output_range, :] - attention_mask = attention_mask[:, 0:self._output_range, :] - else: - target_tensor = input_tensor - attention_inputs = [target_tensor, input_tensor] - - attention_output = self._attention_layer(attention_inputs, attention_mask) - attention_output = self._attention_dropout(attention_output) - attention_output = self._attention_layer_norm(target_tensor + - attention_output) - intermediate_output = self._intermediate_dense(attention_output) - intermediate_output = self._intermediate_activation_layer( - intermediate_output) - layer_output = self._output_dense(intermediate_output) - layer_output = self._output_dropout(layer_output) - # During mixed precision training, attention_output is from layer norm and - # is always fp32 for now. Cast layer_output to fp32 for the subsequent - # add. - layer_output = tf.cast(layer_output, tf.float32) - layer_output = self._output_layer_norm(layer_output + attention_output) - - return layer_output + super().__init__( + num_attention_heads=num_attention_heads, + inner_dim=intermediate_size, + inner_activation=intermediate_activation, + output_dropout=dropout_rate, + attention_dropout=attention_dropout_rate, + output_range=output_range, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + use_bias=use_bias, + norm_first=norm_first, + norm_epsilon=norm_epsilon, + inner_dropout=intermediate_dropout, + attention_initializer=attention_initializer, + **kwargs) @tf.keras.utils.register_keras_serializable(package="Text") @@ -228,11 +105,11 @@ class CompiledTransformer(Transformer): @tf_function_if_eager(experimental_compile=True) def call(self, inputs): - return super(CompiledTransformer, self).call(inputs) + return super().call(inputs) @tf.keras.utils.register_keras_serializable(package="Text") -class TransformerDecoderLayer(tf.keras.layers.Layer): +class TransformerDecoderBlock(tf.keras.layers.Layer): """Single transformer layer for decoder. It has three sub-layers: @@ -240,7 +117,7 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): (2) a encoder-decoder attention. (3) a positionwise fully connected feed-forward network. - Arguments: + Args: num_attention_heads: Number of attention heads. intermediate_size: Size of the intermediate layer. intermediate_activation: Activation for the intermediate layer. @@ -255,6 +132,15 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): activity_regularizer: Regularizer for dense layer activity. kernel_constraint: Constraint for dense layer kernels. bias_constraint: Constraint for dense layer kernels. + use_bias: Whether to enable use_bias in attention layer. If set False, + use_bias in attention layer is disabled. + norm_first: Whether to normalize inputs to attention and intermediate dense + layers. If set False, output of attention and intermediate dense layers is + normalized. + norm_epsilon: Epsilon value to initialize normalization layers. + intermediate_dropout: Dropout probability for intermediate_dropout_layer. + attention_initializer: Initializer for kernels of attention layers. If set + `None`, attention layers use kernel_initializer as initializer for kernel. """ def __init__(self, @@ -271,8 +157,13 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): activity_regularizer=None, kernel_constraint=None, bias_constraint=None, + use_bias=True, + norm_first=False, + norm_epsilon=1e-12, + intermediate_dropout=0.0, + attention_initializer=None, **kwargs): - super(TransformerDecoderLayer, self).__init__(**kwargs) + super().__init__(**kwargs) self.num_attention_heads = num_attention_heads self.intermediate_size = intermediate_size self.intermediate_activation = tf.keras.activations.get( @@ -287,6 +178,15 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): self._activity_regularizer = tf.keras.regularizers.get(activity_regularizer) self._kernel_constraint = tf.keras.constraints.get(kernel_constraint) self._bias_constraint = tf.keras.constraints.get(bias_constraint) + self._use_bias = use_bias + self._norm_first = norm_first + self._norm_epsilon = norm_epsilon + self._intermediate_dropout = intermediate_dropout + if attention_initializer: + self._attention_initializer = tf.keras.initializers.get( + attention_initializer) + else: + self._attention_initializer = self._kernel_initializer if self.multi_channel_cross_attention: self._cross_attention_cls = multi_channel_attention.MultiChannelAttention else: @@ -294,7 +194,7 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): def build(self, input_shape): target_tensor_shape = tf.TensorShape(input_shape[0]) - if len(target_tensor_shape) != 3: + if len(target_tensor_shape.as_list()) != 3: raise ValueError("TransformerLayer expects a three-dimensional input of " "shape [batch, sequence, width].") hidden_size = target_tensor_shape[2] @@ -302,9 +202,8 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): raise ValueError( "The hidden size (%d) is not a multiple of the number of attention " "heads (%d)" % (hidden_size, self.num_attention_heads)) - self.attention_head_size = int(hidden_size / self.num_attention_heads) + self.attention_head_size = int(hidden_size) // self.num_attention_heads common_kwargs = dict( - kernel_initializer=self._kernel_initializer, bias_initializer=self._bias_initializer, kernel_regularizer=self._kernel_regularizer, bias_regularizer=self._bias_regularizer, @@ -314,27 +213,35 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): # Self attention. self.self_attention = attention.CachedAttention( num_heads=self.num_attention_heads, - key_size=self.attention_head_size, + key_dim=self.attention_head_size, dropout=self.attention_dropout_rate, + use_bias=self._use_bias, + kernel_initializer=self._attention_initializer, name="self_attention", **common_kwargs) self.self_attention_output_dense = tf.keras.layers.experimental.EinsumDense( "abc,cd->abd", output_shape=(None, hidden_size), bias_axes="d", + kernel_initializer=self._kernel_initializer, name="output", **common_kwargs) self.self_attention_dropout = tf.keras.layers.Dropout( rate=self.dropout_rate) self.self_attention_layer_norm = ( tf.keras.layers.LayerNormalization( - name="self_attention_layer_norm", axis=-1, epsilon=1e-12)) + name="self_attention_layer_norm", + axis=-1, + epsilon=self._norm_epsilon, + dtype="float32")) # Encoder-decoder attention. self.encdec_attention = self._cross_attention_cls( num_heads=self.num_attention_heads, - key_size=self.attention_head_size, + key_dim=self.attention_head_size, dropout=self.attention_dropout_rate, output_shape=hidden_size, + use_bias=self._use_bias, + kernel_initializer=self._attention_initializer, name="attention/encdec", **common_kwargs) @@ -342,27 +249,77 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): rate=self.dropout_rate) self.encdec_attention_layer_norm = ( tf.keras.layers.LayerNormalization( - name="attention/encdec_output_layer_norm", axis=-1, epsilon=1e-12)) + name="attention/encdec_output_layer_norm", + axis=-1, + epsilon=self._norm_epsilon, + dtype="float32")) # Feed-forward projection. self.intermediate_dense = tf.keras.layers.experimental.EinsumDense( "abc,cd->abd", output_shape=(None, self.intermediate_size), bias_axes="d", + kernel_initializer=self._kernel_initializer, name="intermediate", **common_kwargs) self.intermediate_activation_layer = tf.keras.layers.Activation( self.intermediate_activation) + self._intermediate_dropout_layer = tf.keras.layers.Dropout( + rate=self._intermediate_dropout) self.output_dense = tf.keras.layers.experimental.EinsumDense( "abc,cd->abd", output_shape=(None, hidden_size), bias_axes="d", + kernel_initializer=self._kernel_initializer, name="output", **common_kwargs) self.output_dropout = tf.keras.layers.Dropout(rate=self.dropout_rate) self.output_layer_norm = tf.keras.layers.LayerNormalization( - name="output_layer_norm", axis=-1, epsilon=1e-12) - super(TransformerDecoderLayer, self).build(input_shape) + name="output_layer_norm", axis=-1, + epsilon=self._norm_epsilon, dtype="float32") + super().build(input_shape) + + def get_config(self): + config = { + "num_attention_heads": + self.num_attention_heads, + "intermediate_size": + self.intermediate_size, + "intermediate_activation": + self.intermediate_activation, + "dropout_rate": + self.dropout_rate, + "attention_dropout_rate": + self.attention_dropout_rate, + "multi_channel_cross_attention": + self.multi_channel_cross_attention, + "kernel_initializer": + tf.keras.initializers.serialize(self._kernel_initializer), + "bias_initializer": + tf.keras.initializers.serialize(self._bias_initializer), + "kernel_regularizer": + tf.keras.regularizers.serialize(self._kernel_regularizer), + "bias_regularizer": + tf.keras.regularizers.serialize(self._bias_regularizer), + "activity_regularizer": + tf.keras.regularizers.serialize(self._activity_regularizer), + "kernel_constraint": + tf.keras.constraints.serialize(self._kernel_constraint), + "bias_constraint": + tf.keras.constraints.serialize(self._bias_constraint), + "use_bias": + self._use_bias, + "norm_first": + self._norm_first, + "norm_epsilon": + self._norm_epsilon, + "intermediate_dropout": + self._intermediate_dropout, + "attention_initializer": + tf.keras.initializers.serialize(self._attention_initializer) + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) def common_layers_with_encoder(self): """Gets layer objects that can make a Transformer encoder block.""" @@ -375,36 +332,58 @@ class TransformerDecoderLayer(tf.keras.layers.Layer): if self.multi_channel_cross_attention: if len(inputs) != 5: raise ValueError( - "TransformerDecoderLayer must have 5 inputs, when it uses " + "TransformerDecoderBlock must have 5 inputs, when it uses " "multi_channel_cross_attention. But it got: %d" % len(inputs)) elif len(inputs) != 4: raise ValueError( - "TransformerDecoderLayer must have 4 inputs, but it got: %d" % + "TransformerDecoderBlock must have 4 inputs, but it got: %d" % len(inputs)) input_tensor, memory, attention_mask, self_attention_mask = inputs[:4] - self_attention_inputs = [input_tensor, input_tensor] + source_tensor = input_tensor + if self._norm_first: + input_tensor = self.self_attention_layer_norm(input_tensor) self_attention_output, cache = self.self_attention( - self_attention_inputs, + query=input_tensor, + value=input_tensor, attention_mask=self_attention_mask, cache=cache, decode_loop_step=decode_loop_step) self_attention_output = self.self_attention_dropout(self_attention_output) - self_attention_output = self.self_attention_layer_norm( - input_tensor + self_attention_output) - - cross_attn_inputs = [self_attention_output, memory] + if self._norm_first: + self_attention_output = source_tensor + self_attention_output + else: + self_attention_output = self.self_attention_layer_norm( + input_tensor + self_attention_output) + if self._norm_first: + source_self_attention_output = self_attention_output + self_attention_output = self.encdec_attention_layer_norm( + self_attention_output) + cross_attn_inputs = dict( + query=self_attention_output, + value=memory, + attention_mask=attention_mask) if self.multi_channel_cross_attention: # Accesses the 5-th input tensor for the doc-attention probabilities. - cross_attn_inputs.append(inputs[-1]) - attention_output = self.encdec_attention(cross_attn_inputs, attention_mask) + cross_attn_inputs["context_attention_weights"] = inputs[-1] + attention_output = self.encdec_attention(**cross_attn_inputs) attention_output = self.encdec_attention_dropout(attention_output) - attention_output = self.encdec_attention_layer_norm(self_attention_output + - attention_output) + if self._norm_first: + attention_output = source_self_attention_output + attention_output + else: + attention_output = self.encdec_attention_layer_norm( + self_attention_output + attention_output) + if self._norm_first: + source_attention_output = attention_output + attention_output = self.output_layer_norm(attention_output) intermediate_output = self.intermediate_dense(attention_output) intermediate_output = self.intermediate_activation_layer( intermediate_output) + intermediate_output = self._intermediate_dropout_layer(intermediate_output) layer_output = self.output_dense(intermediate_output) layer_output = self.output_dropout(layer_output) - layer_output = self.output_layer_norm(layer_output + attention_output) + if self._norm_first: + layer_output = source_attention_output + layer_output + else: + layer_output = self.output_layer_norm(layer_output + attention_output) return layer_output, cache diff --git a/official/nlp/modeling/layers/transformer_scaffold.py b/official/nlp/modeling/layers/transformer_scaffold.py index d988febfa68a3e45d3919892ba677c85350f71d6..4f6de71ceafe5b40442ae68c9bffb2e90cfa7c5b 100644 --- a/official/nlp/modeling/layers/transformer_scaffold.py +++ b/official/nlp/modeling/layers/transformer_scaffold.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,14 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Keras-based transformer scaffold layer.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +from absl import logging import gin import tensorflow as tf @@ -38,7 +35,7 @@ class TransformerScaffold(tf.keras.layers.Layer): instantiate the class with the config, or pass a class instance to `attention_cls`/`feedforward_cls`. - Arguments: + Args: num_attention_heads: Number of attention heads. intermediate_size: Size of the intermediate layer. intermediate_activation: Activation for the intermediate layer. @@ -46,28 +43,25 @@ class TransformerScaffold(tf.keras.layers.Layer): attention_cfg: The config with which to instantiate `attention_cls`. Ignored if attention_cls is a layer instance or None. If `attention_cls` is a class, but `attention_cfg` is None, following kwargs will be used to - instantiate the attention instance: - { + instantiate the attention instance: { "num_heads": num_attention_heads, - "key_size": int(hidden_size // num_attention_heads), + "key_dim": int(hidden_size // num_attention_heads), "dropout": attention_dropout_rate, - "name": "self_attention" - }, where `hidden_size` is the input tensor's last dimension. + "name": "self_attention" }, where `hidden_size` is the input tensor's + last dimension. feedforward_cls: A class to instantiate feedforward layer, or a layer - instance. If None, will use the standard feedforward layer as described - in "Attention Is All You Need" paper. If not None, the instantiated - feedforward layer is expected to take the output of attention as input - and its output is this transformer layer's output. + instance. If None, will use the standard feedforward layer as described in + "Attention Is All You Need" paper. If not None, the instantiated + feedforward layer is expected to take the output of attention as input and + its output is this transformer layer's output. feedforward_cfg: The config with which to instantiate `feedforward_cls`. - Ignored if feedforward_cls is a layer instance or is None. - If `feedforward_cls` is a class, but `feedforward_cfg` is None, following - kwargs will be used to instantiate the feedforward instance: - { + Ignored if feedforward_cls is a layer instance or is None. If + `feedforward_cls` is a class, but `feedforward_cfg` is None, following + kwargs will be used to instantiate the feedforward instance: { "intermediate_size": intermediate_size, "intermediate_activation": intermediate_activation, "dropout": dropout_rate, - "name": "feedforward" - }. + "name": "feedforward" }. dropout_rate: Dropout probability for the post-attention and output dropout. attention_dropout_rate: Dropout probability for within the attention layer. kernel_initializer: Initializer for dense layer kernels. @@ -89,6 +83,7 @@ class TransformerScaffold(tf.keras.layers.Layer): feedforward_cfg=None, dropout_rate=0.0, attention_dropout_rate=0.0, + norm_first=False, kernel_initializer="glorot_uniform", bias_initializer="zeros", kernel_regularizer=None, @@ -103,6 +98,7 @@ class TransformerScaffold(tf.keras.layers.Layer): self._attention_cls = attention_cls self._feedforward_cls = feedforward_cls self._feedforward_cfg = feedforward_cfg + self._norm_first = norm_first self._num_heads = num_attention_heads self._intermediate_size = intermediate_size self._intermediate_activation = intermediate_activation @@ -116,24 +112,14 @@ class TransformerScaffold(tf.keras.layers.Layer): self._bias_constraint = tf.keras.constraints.get(bias_constraint) def build(self, input_shape): - input_tensor = input_shape[0] if len(input_shape) == 2 else input_shape - input_tensor_shape = tf.TensorShape(input_tensor) - if len(input_tensor_shape) != 3: + input_tensor_shape = input_shape[0] if ( + len(input_shape) == 2) else input_shape + input_tensor_shape = tf.TensorShape(input_tensor_shape) + if len(input_tensor_shape.as_list()) != 3: raise ValueError( "TransformerScaffold expects a three-dimensional input of " "shape [batch, sequence, width].") - batch_size, sequence_length, hidden_size = input_tensor_shape - - if len(input_shape) == 2: - mask_tensor_shape = tf.TensorShape(input_shape[1]) - expected_mask_tensor_shape = tf.TensorShape( - [batch_size, sequence_length, sequence_length]) - if not expected_mask_tensor_shape.is_compatible_with(mask_tensor_shape): - raise ValueError("When passing a mask tensor to TransformerLayer, the " - "mask tensor must be of shape [batch, " - "sequence_length, sequence_length] (here %s). Got a " - "mask tensor of shape %s." % - (expected_mask_tensor_shape, mask_tensor_shape)) + hidden_size = input_tensor_shape[-1] if hidden_size % self._num_heads != 0: raise ValueError( "The input size (%d) is not a multiple of the number of attention " @@ -160,7 +146,7 @@ class TransformerScaffold(tf.keras.layers.Layer): default_attention_cfg = { "num_heads": self._num_heads, - "key_size": self._attention_head_size, + "key_dim": self._attention_head_size, "dropout": self._attention_dropout_rate, "name": "self_attention" } @@ -185,12 +171,16 @@ class TransformerScaffold(tf.keras.layers.Layer): else: self._feedforward_block = None + # self._dropout_rate controls dropout rates at two places: + # after attention, and after FFN. self._attention_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) # Use float32 in layernorm for numeric stability. # It is probably safe in mixed_float16, but we haven't validated this yet. self._attention_layer_norm = ( tf.keras.layers.LayerNormalization( - name="self_attention_layer_norm", axis=-1, epsilon=1e-12, + name="self_attention_layer_norm", + axis=-1, + epsilon=1e-12, dtype=tf.float32)) if self._feedforward_block is None: @@ -200,7 +190,7 @@ class TransformerScaffold(tf.keras.layers.Layer): bias_axes="d", name="intermediate", **common_kwargs) - policy = tf.keras.mixed_precision.experimental.global_policy() + policy = tf.keras.mixed_precision.global_policy() if policy.name == "mixed_bfloat16": # bfloat16 causes BERT with the LAMB optimizer to not converge # as well, so we use float32. @@ -221,6 +211,7 @@ class TransformerScaffold(tf.keras.layers.Layer): name="output_layer_norm", axis=-1, epsilon=1e-12, dtype=tf.float32) super(TransformerScaffold, self).build(input_shape) + logging.info("%s configs: %s", self.__class__.__name__, self.get_config()) def get_config(self): config = { @@ -238,6 +229,8 @@ class TransformerScaffold(tf.keras.layers.Layer): self._dropout_rate, "attention_dropout_rate": self._attention_dropout_rate, + "norm_first": + self._norm_first, "kernel_initializer": tf.keras.initializers.serialize(self._kernel_initializer), "bias_initializer": @@ -256,30 +249,57 @@ class TransformerScaffold(tf.keras.layers.Layer): base_config = super(TransformerScaffold, self).get_config() return dict(list(base_config.items()) + list(config.items())) - def call(self, inputs): + def call(self, inputs, training=None): if isinstance(inputs, (list, tuple)) and len(inputs) == 2: input_tensor, attention_mask = inputs else: input_tensor, attention_mask = (inputs, None) - attention_inputs = [input_tensor, input_tensor] + if self._norm_first: + source_tensor = input_tensor + input_tensor = self._attention_layer_norm(input_tensor, training=training) + + attention_output = self._attention_layer( + query=input_tensor, value=input_tensor, attention_mask=attention_mask, + training=training) + attention_output = self._attention_dropout(attention_output, + training=training) + + if self._norm_first: + attention_output = source_tensor + attention_output + else: + attention_output = self._attention_layer_norm(input_tensor + + attention_output, + training=training) + if self._norm_first: + source_attention_output = attention_output + attention_output = self._output_layer_norm(attention_output, + training=training) - attention_output = self._attention_layer(attention_inputs, attention_mask) - attention_output = self._attention_dropout(attention_output) - attention_output = self._attention_layer_norm(input_tensor + - attention_output) if self._feedforward_block is None: intermediate_output = self._intermediate_dense(attention_output) intermediate_output = self._intermediate_activation_layer( intermediate_output) - layer_output = self._output_dense(intermediate_output) - layer_output = self._output_dropout(layer_output) + layer_output = self._output_dense(intermediate_output, training=training) + layer_output = self._output_dropout(layer_output, training=training) # During mixed precision training, attention_output is from layer norm # and is always fp32 for now. Cast layer_output to fp32 for the subsequent # add. layer_output = tf.cast(layer_output, tf.float32) - layer_output = self._output_layer_norm(layer_output + attention_output) + if self._norm_first: + layer_output = source_attention_output + layer_output + else: + layer_output = self._output_layer_norm(layer_output + attention_output, + training=training) else: - layer_output = self._feedforward_block(attention_output) + if self._norm_first: + # if norm_first, assume the feedforward block will not apply layer norm + layer_output = self._feedforward_block(attention_output, + training=training) + layer_output += source_attention_output + else: + # if not norm_first, assume that the feedforwad does apply layer norm + layer_output = self._feedforward_block(attention_output, + training=training) return layer_output diff --git a/official/nlp/modeling/layers/transformer_scaffold_test.py b/official/nlp/modeling/layers/transformer_scaffold_test.py index ad919889569501c1c29a3c0f88f3e1d1621aec3a..5267a27efd627e3418ab76526505ec2b4617147d 100644 --- a/official/nlp/modeling/layers/transformer_scaffold_test.py +++ b/official/nlp/modeling/layers/transformer_scaffold_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,14 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based transformer block layer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import json +"""Tests for Keras-based transformer block layer.""" import numpy as np import tensorflow as tf @@ -39,10 +33,10 @@ class ValidatedAttentionLayer(attention.MultiHeadAttention): super(ValidatedAttentionLayer, self).__init__(**kwargs) self.list = call_list - def call(self, inputs, attention_mask=None): + def call(self, query, value, attention_mask=None): self.list.append(True) return super(ValidatedAttentionLayer, self).call( - inputs, attention_mask=attention_mask) + query, value, attention_mask=attention_mask) def get_config(self): config = super(ValidatedAttentionLayer, self).get_config() @@ -89,7 +83,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): def tearDown(self): super(TransformerLayerTest, self).tearDown() - tf.keras.mixed_precision.experimental.set_policy('float32') + tf.keras.mixed_precision.set_global_policy('float32') def test_layer_creation(self): sequence_length = 21 @@ -98,7 +92,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } test_layer = transformer_scaffold.TransformerScaffold( @@ -126,7 +120,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } feedforward_call_list = [] @@ -164,7 +158,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } test_layer = transformer_scaffold.TransformerScaffold( @@ -186,30 +180,6 @@ class TransformerLayerTest(keras_parameterized.TestCase): self.assertNotEmpty(call_list) self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") - def test_layer_creation_with_incorrect_mask_fails(self): - sequence_length = 21 - width = 80 - - call_list = [] - attention_layer_cfg = { - 'num_heads': 10, - 'key_size': 8, - 'call_list': call_list, - } - test_layer = transformer_scaffold.TransformerScaffold( - attention_cls=ValidatedAttentionLayer, - attention_cfg=attention_layer_cfg, - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - # Create a 2-dimensional input (the first dimension is implicit). - mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length - 3)) - with self.assertRaisesRegex(ValueError, 'When passing a mask tensor.*'): - _ = test_layer([data_tensor, mask_tensor]) - def test_layer_invocation(self): sequence_length = 21 width = 80 @@ -217,7 +187,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } test_layer = transformer_scaffold.TransformerScaffold( @@ -252,7 +222,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } feedforward_call_list = [] @@ -303,7 +273,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } test_layer = transformer_scaffold.TransformerScaffold( @@ -338,14 +308,14 @@ class TransformerLayerTest(keras_parameterized.TestCase): self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") def test_layer_invocation_with_float16_dtype(self): - tf.keras.mixed_precision.experimental.set_policy('mixed_float16') + tf.keras.mixed_precision.set_global_policy('mixed_float16') sequence_length = 21 width = 80 call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } test_layer = transformer_scaffold.TransformerScaffold( @@ -386,7 +356,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, } test_layer = transformer_scaffold.TransformerScaffold( @@ -414,7 +384,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, 'name': 'test_layer', } @@ -447,12 +417,11 @@ class TransformerLayerTest(keras_parameterized.TestCase): # Serialize the model config. Pass the serialized data through json to # ensure that we can serialize this layer to disk. - serialized_data = json.dumps(model.get_config()) - post_string_serialized_data = json.loads(serialized_data) + serialized_data = model.get_config() # Create a new model from the old config, and copy the weights. These models # should have identical outputs. - new_model = tf.keras.Model.from_config(post_string_serialized_data) + new_model = tf.keras.Model.from_config(serialized_data) new_model.set_weights(model.get_weights()) output = new_model.predict([input_data, mask_data]) @@ -474,7 +443,7 @@ class TransformerLayerTest(keras_parameterized.TestCase): call_list = [] attention_layer_cfg = { 'num_heads': 10, - 'key_size': 8, + 'key_dim': 8, 'call_list': call_list, 'name': 'test_layer', } @@ -512,14 +481,10 @@ class TransformerLayerTest(keras_parameterized.TestCase): 2, size=(batch_size, sequence_length, sequence_length)) pre_serialization_output = model.predict([input_data, mask_data]) - # Serialize the model config. Pass the serialized data through json to - # ensure that we can serialize this layer to disk. - serialized_data = json.dumps(model.get_config()) - post_string_serialized_data = json.loads(serialized_data) - + serialized_data = model.get_config() # Create a new model from the old config, and copy the weights. These models # should have identical outputs. - new_model = tf.keras.Model.from_config(post_string_serialized_data) + new_model = tf.keras.Model.from_config(serialized_data) new_model.set_weights(model.get_weights()) output = new_model.predict([input_data, mask_data]) diff --git a/official/nlp/modeling/layers/transformer_test.py b/official/nlp/modeling/layers/transformer_test.py index 841feb9948cb69abe1b1b73364b6f09fa2bde836..0c6c472ec4dfc643450b2d584ce3fdb3f34dffa5 100644 --- a/official/nlp/modeling/layers/transformer_test.py +++ b/official/nlp/modeling/layers/transformer_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,210 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Keras-based transformer block layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Keras-based transformer block layer.""" -from absl.testing import parameterized -import numpy as np import tensorflow as tf from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import from official.nlp.modeling.layers import transformer -# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It -# guarantees forward compatibility of this code for the V2 switchover. -@keras_parameterized.run_all_keras_modes -@parameterized.named_parameters(('base', transformer.Transformer), - ('xla', transformer.CompiledTransformer)) -class TransformerLayerTest(keras_parameterized.TestCase): - - def tearDown(self): - super(TransformerLayerTest, self).tearDown() - tf.keras.mixed_precision.experimental.set_policy('float32') - - def test_layer_creation(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - sequence_length = 21 - width = 80 - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - output_tensor = test_layer(data_tensor) - # The default output of a transformer layer should be the same as the input. - self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) - - def test_layer_creation_with_mask(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - sequence_length = 21 - width = 80 - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - # Create a 2-dimensional input (the first dimension is implicit). - mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) - output_tensor = test_layer([data_tensor, mask_tensor]) - # The default output of a transformer layer should be the same as the input. - self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list()) - - def test_layer_creation_with_incorrect_mask_fails(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - sequence_length = 21 - width = 80 - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - # Create a 2-dimensional input (the first dimension is implicit). - mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length - 3)) - with self.assertRaisesRegex(ValueError, 'When passing a mask tensor.*'): - _ = test_layer([data_tensor, mask_tensor]) - - def test_layer_invocation(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - sequence_length = 21 - width = 80 - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - output_tensor = test_layer(data_tensor) - - # Create a model from the test layer. - model = tf.keras.Model(data_tensor, output_tensor) - - # Invoke the model on test data. We can't validate the output data itself - # (the NN is too complex) but this will rule out structural runtime errors. - batch_size = 6 - input_data = 10 * np.random.random_sample( - (batch_size, sequence_length, width)) - _ = model.predict(input_data) - - def test_layer_invocation_with_mask(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - sequence_length = 21 - width = 80 - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - # Create a 2-dimensional input (the first dimension is implicit). - mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) - output_tensor = test_layer([data_tensor, mask_tensor]) - - # Create a model from the test layer. - model = tf.keras.Model([data_tensor, mask_tensor], output_tensor) - - # Invoke the model on test data. We can't validate the output data itself - # (the NN is too complex) but this will rule out structural runtime errors. - batch_size = 6 - input_data = 10 * np.random.random_sample( - (batch_size, sequence_length, width)) - # The attention mask should be of shape (batch, from_seq_len, to_seq_len), - # which here is (batch, sequence_length, sequence_length) - mask_data = np.random.randint( - 2, size=(batch_size, sequence_length, sequence_length)) - _ = model.predict([input_data, mask_data]) - - def test_layer_output_range(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - sequence_length = 21 - width = 80 - - batch_size = 6 - input_data = 10 * np.random.random_sample( - (batch_size, sequence_length, width)) - mask_data = np.random.randint( - 2, size=(batch_size, sequence_length, sequence_length)) - output_tensor = test_layer([input_data, mask_data]) - - # The layer only attends to the first token and outputs the first token - # embeeding. - new_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu', - output_range=1) - _ = new_layer([input_data, mask_data]) - new_layer.set_weights(test_layer.get_weights()) - new_output_tensor = new_layer([input_data, mask_data]) - self.assertAllClose(new_output_tensor, output_tensor[:, 0:1, :]) - - def test_layer_invocation_with_float16_dtype(self, transformer_cls): - tf.keras.mixed_precision.experimental.set_policy('mixed_float16') - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu') - sequence_length = 21 - width = 80 - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - # Create a 2-dimensional input (the first dimension is implicit). - mask_tensor = tf.keras.Input(shape=(sequence_length, sequence_length)) - output_tensor = test_layer([data_tensor, mask_tensor]) - - # Create a model from the test layer. - model = tf.keras.Model([data_tensor, mask_tensor], output_tensor) - - # Invoke the model on test data. We can't validate the output data itself - # (the NN is too complex) but this will rule out structural runtime errors. - batch_size = 6 - input_data = (10 * np.random.random_sample( - (batch_size, sequence_length, width))) - # The attention mask should be of shape (batch, from_seq_len, to_seq_len), - # which here is (batch, sequence_length, sequence_length) - mask_data = np.random.randint( - 2, size=(batch_size, sequence_length, sequence_length)) - _ = model.predict([input_data, mask_data]) - - def test_transform_with_initializer(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu', - kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) - sequence_length = 21 - width = 80 - # Create a 3-dimensional input (the first dimension is implicit). - data_tensor = tf.keras.Input(shape=(sequence_length, width)) - output = test_layer(data_tensor) - # The default output of a transformer layer should be the same as the input. - self.assertEqual(data_tensor.shape.as_list(), output.shape.as_list()) - - def test_dynamic_layer_sequence(self, transformer_cls): - test_layer = transformer_cls( - num_attention_heads=10, - intermediate_size=2048, - intermediate_activation='relu', - kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)) - # Create a 3-dimensional input (the first dimension is implicit). - width = 30 - input_tensor = tf.keras.Input(shape=(None, width)) - output_tensor = test_layer(input_tensor) - model = tf.keras.Model(input_tensor, output_tensor) - - input_length = 17 - input_data = np.ones((1, input_length, width)) - output_data = model.predict(input_data) - - self.assertAllEqual([1, input_length, width], output_data.shape) - - def _create_cache(batch_size, init_decode_length, num_heads, head_size): return { 'key': @@ -227,12 +32,12 @@ def _create_cache(batch_size, init_decode_length, num_heads, head_size): @keras_parameterized.run_all_keras_modes -class TransformerDecoderLayerTest(keras_parameterized.TestCase): +class TransformerDecoderBlockTest(keras_parameterized.TestCase): def test_decoder_block_with_cache(self): num_attention_heads = 2 hidden_size = 16 - decoder_block = transformer.TransformerDecoderLayer( + decoder_block = transformer.TransformerDecoderBlock( num_attention_heads=num_attention_heads, intermediate_size=32, intermediate_activation='relu', @@ -248,6 +53,47 @@ class TransformerDecoderLayerTest(keras_parameterized.TestCase): self.assertEqual(output.shape, (2, 4, hidden_size)) self.assertEqual(cache['value'].shape, (2, 4, 2, 8)) + def test_use_bias_norm_first(self): + num_attention_heads = 2 + hidden_size = 16 + decoder_block = transformer.TransformerDecoderBlock( + num_attention_heads=num_attention_heads, + intermediate_size=32, + intermediate_activation='relu', + dropout_rate=0.1, + attention_dropout_rate=0.1, + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + intermediate_dropout=0.1, + attention_initializer=tf.keras.initializers.RandomUniform( + minval=0., maxval=1.)) + # Forward path. + dummy_tensor = tf.zeros([2, 4, 16], dtype=tf.float32) + dummy_mask = tf.zeros([2, 4, 4], dtype=tf.float32) + inputs = [dummy_tensor, dummy_tensor, dummy_mask, dummy_mask] + output, _ = decoder_block(inputs) + self.assertEqual(output.shape, (2, 4, hidden_size)) + + def test_get_config(self): + num_attention_heads = 2 + decoder_block = transformer.TransformerDecoderBlock( + num_attention_heads=num_attention_heads, + intermediate_size=32, + intermediate_activation='relu', + dropout_rate=0.1, + attention_dropout_rate=0.1, + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + intermediate_dropout=0.1, + attention_initializer=tf.keras.initializers.RandomUniform( + minval=0., maxval=1.)) + decoder_block_config = decoder_block.get_config() + new_decoder_block = transformer.TransformerDecoderBlock.from_config( + decoder_block_config) + self.assertEqual(decoder_block_config, new_decoder_block.get_config()) + if __name__ == '__main__': tf.test.main() diff --git a/official/nlp/modeling/layers/transformer_xl.py b/official/nlp/modeling/layers/transformer_xl.py new file mode 100644 index 0000000000000000000000000000000000000000..748957398c923bf0069d7ad0f41c486b9c8ac947 --- /dev/null +++ b/official/nlp/modeling/layers/transformer_xl.py @@ -0,0 +1,559 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-based Transformer XL layer.""" + +from absl import logging + +import tensorflow as tf + +from official.nlp.modeling.layers import relative_attention + + +def _cache_memory(current_state, previous_state, memory_length, reuse_length=0): + """Caches hidden states into memory. + + Args: + current_state: `Tensor`, the current state. + previous_state: `Tensor`, the previous state. + memory_length: `int`, the number of tokens to cache. + reuse_length: `int`, the number of tokens in the current batch to be cached + and reused in the future. + + Returns: + A `Tensor`, representing the cached state with stopped gradients. + + """ + if memory_length is None or memory_length == 0: + return None + else: + if reuse_length > 0: + current_state = current_state[:, :reuse_length, :] + + if previous_state is None: + new_mem = current_state[:, -memory_length:, :] + else: + new_mem = tf.concat( + [previous_state, current_state], 1)[:, -memory_length:, :] + + return tf.stop_gradient(new_mem) + + +@tf.keras.utils.register_keras_serializable(package="Text") +class TransformerXLBlock(tf.keras.layers.Layer): + """Transformer XL block. + + This implements a Transformer XL block from "Transformer-XL: Attentive + Language Models Beyond a Fixed-Length Context" + (https://arxiv.org/abs/1901.02860). + + This block is further extended to allow for the Transformer-XL + re-parameterization in "XLNet: Generalized Autoregressive Pretraining for + Language Understanding" (https://arxiv.org/abs/1906.08237). + + Given an input stream, this block computes attention, applies dropouts and + layer norms and feeds into the FFN network. + + **Note: This layer is currently experimental. + + Attributes: + vocab_size: The size of the token vocabulary. + hidden_size: The size of the transformer hidden layers. + num_attention_heads: The number of attention heads. + head_size: The dimension size of each attention head. + inner_size: The inner size for the transformer layers. + dropout_rate: Dropout rate for the output of this layer. + attention_dropout_rate: Dropout rate on attention probabilities. + two_stream: Whether or not to use `TwoStreamRelativeAttention` used in the + XLNet pretrainer. If `False`, then it will use + `MultiHeadRelativeAttention` as in Transformer XL. + norm_epsilon: Epsilon value to initialize normalization layers. + inner_activation: The activation to use for the inner + FFN layers. + kernel_initializer: Initializer for dense layer kernels. + inner_dropout: Dropout probability for the inner dropout + layer. + """ + + def __init__(self, + vocab_size, + hidden_size, + num_attention_heads, + head_size, + inner_size, + dropout_rate, + attention_dropout_rate, + two_stream=False, + norm_epsilon=1e-12, + inner_activation="relu", + kernel_initializer="variance_scaling", + inner_dropout=0.0, + **kwargs): + """Initializes TransformerXLBlock layer.""" + + super(TransformerXLBlock, self).__init__(**kwargs) + self._vocab_size = vocab_size + self._num_heads = num_attention_heads + self._head_size = head_size + self._hidden_size = hidden_size + self._inner_size = inner_size + self._dropout_rate = dropout_rate + self._attention_dropout_rate = attention_dropout_rate + self._inner_activation = inner_activation + self._norm_epsilon = norm_epsilon + self._kernel_initializer = kernel_initializer + self._inner_dropout = inner_dropout + self._two_stream = two_stream + if two_stream: + self._attention_layer_type = relative_attention.TwoStreamRelativeAttention + else: + self._attention_layer_type = relative_attention.MultiHeadRelativeAttention + + def build(self, input_shape): + input_tensor = input_shape[0] if len(input_shape) == 2 else input_shape + input_tensor_shape = tf.TensorShape(input_tensor) + if len(input_tensor_shape.as_list()) != 3: + raise ValueError("TransformerLayer expects a three-dimensional input of " + "shape [batch, sequence, width].") + batch_size, sequence_length, hidden_size = input_tensor_shape + + if len(input_shape) == 2: + mask_tensor_shape = tf.TensorShape(input_shape[1]) + expected_mask_tensor_shape = tf.TensorShape( + [batch_size, sequence_length, sequence_length]) + if not expected_mask_tensor_shape.is_compatible_with(mask_tensor_shape): + raise ValueError("When passing a mask tensor to TransformerXLBlock, " + "the mask tensor must be of shape [batch, " + "sequence_length, sequence_length] (here %s). Got a " + "mask tensor of shape %s." % + (expected_mask_tensor_shape, mask_tensor_shape)) + if hidden_size % self._num_heads != 0: + raise ValueError( + "The input size (%d) is not a multiple of the number of attention " + "heads (%d)" % (hidden_size, self._num_heads)) + self._attention_layer = self._attention_layer_type( + num_heads=self._num_heads, + key_dim=self._head_size, + value_dim=self._head_size, + dropout=self._attention_dropout_rate, + use_bias=False, + kernel_initializer=self._kernel_initializer, + name="rel_attn") + self._attention_dropout = tf.keras.layers.Dropout( + rate=self._attention_dropout_rate) + self._attention_layer_norm = tf.keras.layers.LayerNormalization( + name="self_attention_layer_norm", + axis=-1, + epsilon=self._norm_epsilon, + dtype=tf.float32) + self._inner_dense = tf.keras.layers.experimental.EinsumDense( + "abc,cd->abd", + output_shape=(None, self._inner_size), + bias_axes="d", + kernel_initializer=self._kernel_initializer, + name="inner") + + self._inner_activation_layer = tf.keras.layers.Activation( + self._inner_activation) + self._inner_dropout_layer = tf.keras.layers.Dropout( + rate=self._inner_dropout) + self._output_dense = tf.keras.layers.experimental.EinsumDense( + "abc,cd->abd", + output_shape=(None, hidden_size), + bias_axes="d", + name="output", + kernel_initializer=self._kernel_initializer) + self._output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + self._output_layer_norm = tf.keras.layers.LayerNormalization( + name="output_layer_norm", + axis=-1, + epsilon=self._norm_epsilon) + + super(TransformerXLBlock, self).build(input_shape) + + def get_config(self): + config = { + "vocab_size": + self._vocab_size, + "hidden_size": + self._hidden_size, + "num_attention_heads": + self._num_heads, + "head_size": + self._head_size, + "inner_size": + self._inner_size, + "dropout_rate": + self._dropout_rate, + "attention_dropout_rate": + self._attention_dropout_rate, + "two_stream": + self._two_stream, + "norm_epsilon": + self._norm_epsilon, + "inner_activation": + self._inner_activation, + "kernel_initializer": + self._kernel_initializer, + "inner_dropout": + self._inner_dropout, + } + base_config = super(TransformerXLBlock, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, + content_stream, + content_attention_bias, + positional_attention_bias, + relative_position_encoding=None, + segment_matrix=None, + segment_encoding=None, + segment_attention_bias=None, + state=None, + content_attention_mask=None, + query_stream=None, + query_attention_mask=None, + target_mapping=None): + """Implements `call` for the Layer. + + Args: + content_stream: `Tensor`, the input content stream. This is the standard + input to Transformer XL and is commonly referred to as `h` in XLNet. + content_attention_bias: Bias `Tensor` for content based attention of shape + `[num_heads, dim]`. + positional_attention_bias: Bias `Tensor` for position based attention of + shape `[num_heads, dim]`. + relative_position_encoding: Relative positional encoding `Tensor` of shape + `[B, L, dim]`. + segment_matrix: Optional `Tensor` of shape `[B, S, S + M]`. Used in XLNet, + but not in Transformer XL. + segment_encoding: Optional `Tensor` of shape `[2, num_heads, dim]`. Used + in XLNet, but not in Transformer XL. + segment_attention_bias: Optional bias `Tensor` for segment based attention + of shape `[num_heads, dim]`. + state: Optional `Tensor` of shape `[B, M, E]`, where M is the length of + the state or memory. If passed, this is also attended over as in + Transformer XL. + content_attention_mask: Optional `Tensor` representing the mask that is + added to content attention logits. If state is not None, the mask source + sequence dimension should extend M. + query_stream: Optional `Tensor`, the query stream. This is introduced in + `TwoStreamRelativeAttention`/XLNet pretrainer. This is ignored if + `two_stream` is `False`. + query_attention_mask: Optional `Tensor` representing the mask that is + added to query attention logits. If state is not None, the mask source + sequence dimension should extend M. + target_mapping: Optional `Tensor` representing the target mapping when + calculating query attention. + + Returns: + A `dict` object, containing the key value pairs for `content_attention` + and (if `two_stream` is `True`) `query_attention`. + + """ + if not self._two_stream and query_stream is not None: + logging.warning("`query_stream` was provided but two stream attention is " + "disabled. `query_stream` will be ignored.") + if self._two_stream: + attention_kwargs = dict( + content_stream=content_stream, + query_stream=query_stream, + query_attention_mask=query_attention_mask, + target_mapping=target_mapping, + content_attention_mask=content_attention_mask) + else: + attention_kwargs = dict( + query=content_stream, + value=content_stream, + key=content_stream, + attention_mask=content_attention_mask) + + common_attention_kwargs = dict( + content_attention_bias=content_attention_bias, + relative_position_encoding=relative_position_encoding, + positional_attention_bias=positional_attention_bias, + segment_matrix=segment_matrix, + segment_encoding=segment_encoding, + segment_attention_bias=segment_attention_bias, + state=state) + + attention_kwargs.update(common_attention_kwargs) + attention_output = self._attention_layer(**attention_kwargs) + + if self._two_stream: + attention_streams = attention_output + input_streams = [content_stream, query_stream] + else: + attention_streams = [attention_output] + input_streams = [content_stream] + + attention_keys = ["content_attention", "query_attention"] + attention_output = {} + for attention_stream, input_stream, attention_key in zip( + attention_streams, input_streams, attention_keys): + attention_stream = self._attention_dropout(attention_stream) + attention_stream = self._attention_layer_norm( + attention_stream + input_stream) + inner_output = self._inner_dense(attention_stream) + inner_output = self._inner_activation_layer( + inner_output) + inner_output = self._inner_dropout_layer( + inner_output) + layer_output = self._output_dense(inner_output) + layer_output = self._output_dropout(layer_output) + layer_output = self._output_layer_norm(layer_output + attention_stream) + attention_output[attention_key] = layer_output + + return attention_output + + +class TransformerXL(tf.keras.layers.Layer): + """Transformer XL. + + This layer combines multiple Transformer XL blocks from "Transformer-XL: + Attentive Language Models Beyond a Fixed-Length Context" + (https://arxiv.org/abs/1901.02860). + + This layer handles the attention biases as well as memory caching and reuse + as in Transformer XL and XLNet. + + + Attributes: + vocab_size: The number of tokens in vocabulary. + num_layers: The number of layers. + hidden_size: The hidden size. + num_attention_heads: The number of attention heads. + head_size: The dimension size of each attention head. + inner_size: The hidden size in feed-forward layers. + dropout_rate: Dropout rate used in each Transformer XL block. + attention_dropout_rate: Dropout rate on attention probabilities. + two_stream: Whether or not to use `TwoStreamRelativeAttention` used + in the XLNet pretrainer. If `False`, then it will use + `MultiHeadRelativeAttention` as in Transformer XL. + initializer: The initializer to use for attention biases. + tie_attention_biases: Whether or not to tie biases together. If `True`, then + each Transformer XL block shares the same trainable attention bias. If + `False`, then each block has its own attention bias. This is usually set + to `True`. + memory_length: The number of tokens to cache. + reuse_length: The number of tokens in the current batch to be cached + and reused in the future. + inner_activation: The activation to use in the inner layers + for Transformer XL blocks. Typically "relu" or "gelu". + """ + + def __init__(self, + vocab_size, + num_layers, + hidden_size, + num_attention_heads, + head_size, + inner_size, + dropout_rate, + attention_dropout_rate, + initializer, + two_stream=False, + tie_attention_biases=True, + memory_length=None, + reuse_length=None, + inner_activation="relu", + **kwargs): + """Initializes TransformerXL.""" + super(TransformerXL, self).__init__(**kwargs) + + self._vocab_size = vocab_size + self._initializer = initializer + self._num_layers = num_layers + self._hidden_size = hidden_size + self._num_attention_heads = num_attention_heads + self._head_size = head_size + self._inner_size = inner_size + self._inner_activation = inner_activation + self._dropout_rate = dropout_rate + self._attention_dropout_rate = attention_dropout_rate + self._tie_attention_biases = tie_attention_biases + self._two_stream = two_stream + + self._memory_length = memory_length + self._reuse_length = reuse_length + + if self._tie_attention_biases: + attention_bias_shape = [self._num_attention_heads, self._head_size] + else: + attention_bias_shape = [self._num_layers, self._num_attention_heads, + self._head_size] + + self.content_attention_bias = self.add_weight( + "content_attention_bias", + shape=attention_bias_shape, + dtype=tf.float32, + initializer=self._initializer) + self.positional_attention_bias = self.add_weight( + "positional_attention_bias", + shape=attention_bias_shape, + dtype=tf.float32, + initializer=self._initializer) + self.segment_attention_bias = self.add_weight( + "segment_attention_bias", + shape=attention_bias_shape, + dtype=tf.float32, + initializer=self._initializer) + + self.transformer_xl_layers = [] + for i in range(self._num_layers): + self.transformer_xl_layers.append( + TransformerXLBlock( + vocab_size=self._vocab_size, + hidden_size=self._head_size * self._num_attention_heads, + num_attention_heads=self._num_attention_heads, + head_size=self._head_size, + inner_size=self._inner_size, + dropout_rate=self._dropout_rate, + attention_dropout_rate=self._attention_dropout_rate, + norm_epsilon=1e-12, + inner_activation=self._inner_activation, + two_stream=self._two_stream, + kernel_initializer="variance_scaling", + name="layer_%d" % i)) + + self.output_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + + def get_config(self): + config = { + "vocab_size": + self._vocab_size, + "num_layers": + self._num_layers, + "hidden_size": + self._hidden_size, + "num_attention_heads": + self._num_attention_heads, + "head_size": + self._head_size, + "inner_size": + self._inner_size, + "dropout_rate": + self._dropout_rate, + "attention_dropout_rate": + self._attention_dropout_rate, + "initializer": + self._initializer, + "two_stream": + self._two_stream, + "tie_attention_biases": + self._tie_attention_biases, + "memory_length": + self._memory_length, + "reuse_length": + self._reuse_length, + "inner_activation": + self._inner_activation, + } + base_config = super(TransformerXL, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, + content_stream, + relative_position_encoding, + segment_matrix=None, + segment_embedding=None, + state=None, + content_attention_mask=None, + query_stream=None, + query_attention_mask=None, + target_mapping=None): + """Implements call() for the layer. + + Args: + content_stream: `Tensor`, the input content stream. This is the standard + input to Transformer XL and is commonly referred to as `h` in XLNet. + relative_position_encoding: Relative positional encoding `Tensor` of shape + `[B, L, dim]`. + segment_matrix: Optional `Tensor` of shape `[B, S, S + M]`. Used in XLNet, + but not in Transformer XL. + segment_embedding: Optional `Tensor` of shape `[2, num_heads, dim]`. Used + in XLNet, but not in Transformer XL. + state: Optional `Tensor` of shape `[B, M, E]`, where M is the length of + the state or memory. If passed, this is also attended over as in + Transformer XL. + content_attention_mask: Optional `Tensor` representing the mask that is + added to content attention logits. If state is not None, the mask source + sequence dimension should extend M. + query_stream: Optional `Tensor`, the query stream. This is introduced in + `TwoStreamRelativeAttention`/XLNet pretrainer. This is ignored if + `two_stream` is `False`. + query_attention_mask: Optional `Tensor` representing the mask that is + added to query attention logits. If state is not None, the mask source + sequence dimension should extend M. + target_mapping: Optional `Tensor` representing the target mapping when + calculating query attention. + + Returns: + A tuple consisting of the attention output and the list of cached memory + states. + The attention output is `content_attention` if `two_stream` is `False`, + otherwise it is `query_attention`. + """ + new_mems = [] + + if state is None: + state = [None] * self._num_layers + for i in range(self._num_layers): + # cache new mems + new_mems.append( + _cache_memory(content_stream, state[i], + self._memory_length, self._reuse_length)) + + # segment bias + if segment_matrix is None: + segment_attention_bias = None + segment_encoding = None + else: + segment_attention_bias = (self.segment_attention_bias + if self._tie_attention_biases + else self.segment_attention_bias[i]) + segment_encoding = segment_embedding[i] + + content_attention_bias = (self.content_attention_bias + if self._tie_attention_biases + else self.content_attention_bias[i]) + positional_attention_bias = (self.positional_attention_bias + if self._tie_attention_biases + else self.positional_attention_bias[i]) + transformer_xl_layer = self.transformer_xl_layers[i] + transformer_xl_output = transformer_xl_layer( + content_stream=content_stream, + content_attention_bias=content_attention_bias, + positional_attention_bias=positional_attention_bias, + relative_position_encoding=relative_position_encoding, + segment_matrix=segment_matrix, + segment_encoding=segment_encoding, + segment_attention_bias=segment_attention_bias, + state=state[i], + content_attention_mask=content_attention_mask, + query_attention_mask=query_attention_mask, + query_stream=query_stream, + target_mapping=target_mapping) + content_stream = transformer_xl_output["content_attention"] + if self._two_stream: + query_stream = transformer_xl_output["query_attention"] + else: + query_stream = None + + if self._two_stream: + output_stream = query_stream + else: + output_stream = content_stream + + return output_stream, new_mems diff --git a/official/nlp/modeling/layers/transformer_xl_test.py b/official/nlp/modeling/layers/transformer_xl_test.py new file mode 100644 index 0000000000000000000000000000000000000000..94945c962a0a1e897340770005b6c9678e28a050 --- /dev/null +++ b/official/nlp/modeling/layers/transformer_xl_test.py @@ -0,0 +1,276 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Transformer XL.""" + +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import + +from official.nlp.modeling.layers import transformer_xl + + +def create_mock_transformer_xl_data( + batch_size, + num_heads, + head_size, + hidden_size, + seq_length, + memory_length=0, + num_predictions=2, + two_stream=False, + num_layers=1, + include_biases=True, + include_state=False, + include_mask=False, + include_segment=False): + """Creates mock testing data. + + Args: + batch_size: `int`, the batch size. + num_heads: `int`, number of attention heads. + head_size: `int`, the size of each attention head. + hidden_size: `int`, the layer's hidden size. + seq_length: `int`, Sequence length of the input. + memory_length: optional `int`, the length of the state. Defaults to 0. + num_predictions: `int`, the number of predictions used in two stream + attention. + two_stream: `bool`, whether or not to generate two stream data. + num_layers: `int`, the number of Transformer XL blocks. + include_biases: optional `bool`, whether or not to include attention biases. + include_state: optional `bool`, whether or not to include state data. + include_mask: optional `bool`, whether or not to include mask data. + include_segment: optional `bool`, whether or not to include segment data. + + Returns: + A dictionary with `str` as keys and `Tensor` as values. + """ + encoding_shape = (batch_size, seq_length * 2, hidden_size) + + data = dict( + relative_position_encoding=tf.random.normal(shape=encoding_shape), + content_stream=tf.random.normal( + shape=(batch_size, seq_length, hidden_size))) + + if include_biases: + attention_bias_shape = (num_heads, head_size) + data.update(dict( + content_attention_bias=tf.random.normal(shape=attention_bias_shape), + segment_attention_bias=tf.random.normal(shape=attention_bias_shape), + positional_attention_bias=tf.random.normal(shape=attention_bias_shape))) + + if two_stream: + data.update(dict( + query_stream=tf.random.normal( + shape=(batch_size, num_predictions, hidden_size)), + target_mapping=tf.random.normal( + shape=(batch_size, num_predictions, seq_length)))) + + if include_state: + total_seq_length = seq_length + memory_length + if num_layers > 1: + state_shape = (num_layers, batch_size, memory_length, hidden_size) + else: + state_shape = (batch_size, memory_length, hidden_size) + data.update(dict( + state=tf.random.normal(shape=state_shape))) + else: + total_seq_length = seq_length + + if include_mask: + mask_shape = (batch_size, num_heads, seq_length, total_seq_length) + mask_data = np.random.randint(2, size=mask_shape).astype("float32") + data["content_attention_mask"] = mask_data + if two_stream: + data["query_attention_mask"] = mask_data + + if include_segment: + # A transformer XL block takes an individual segment "encoding" from the + # entirety of the Transformer XL segment "embedding". + if num_layers > 1: + segment_encoding_shape = (num_layers, 2, num_heads, head_size) + segment_encoding_name = "segment_embedding" + else: + segment_encoding_shape = (2, num_heads, head_size) + segment_encoding_name = "segment_encoding" + + segment_matrix = np.random.randint( + 2, size=(batch_size, seq_length, total_seq_length)) + data["segment_matrix"] = tf.math.equal(segment_matrix, 1) + data[segment_encoding_name] = tf.random.normal(shape=segment_encoding_shape) + + return data + + +@keras_parameterized.run_all_keras_modes +class TransformerXLBlockTest(keras_parameterized.TestCase): + + @combinations.generate(combinations.combine( + memory_length=[0, 4], + two_stream=[True, False], + state=[True, False], + mask=[True, False], + segment=[True, False])) + def test_transformer_xl_block( + self, + two_stream, + memory_length, + state, + mask, + segment): + """Tests combinations of Transformer XL block calculations.""" + batch_size, num_heads, head_size, seq_length = 2, 12, 64, 8 + hidden_size, num_predictions, inner_size = 24, 8, 12 + + data = create_mock_transformer_xl_data( + include_biases=True, + num_heads=num_heads, + head_size=head_size, + hidden_size=hidden_size, + seq_length=seq_length, + batch_size=batch_size, + memory_length=memory_length, + num_predictions=num_predictions, + two_stream=two_stream, + include_state=state, + include_mask=mask, + include_segment=segment) + + test_layer = transformer_xl.TransformerXLBlock( + vocab_size=32000, + hidden_size=hidden_size, + num_attention_heads=num_heads, + head_size=head_size, + inner_size=inner_size, + dropout_rate=0., + attention_dropout_rate=0., + two_stream=two_stream) + output = test_layer(**data) + content_attention = output["content_attention"] + self.assertEqual(content_attention.shape, + [batch_size, seq_length, hidden_size]) + + if two_stream: + self.assertIn("query_attention", output) + self.assertEqual(output["query_attention"].shape, + [batch_size, num_predictions, hidden_size]) + else: + self.assertNotIn("query_attention", output) + + def test_get_config(self): + transformer_xl_block = transformer_xl.TransformerXLBlock( + vocab_size=32000, + head_size=64, + num_attention_heads=2, + hidden_size=10, + inner_size=50, + dropout_rate=0., + attention_dropout_rate=0., + two_stream=False) + transformer_xl_block_config = transformer_xl_block.get_config() + new_block = transformer_xl.TransformerXLBlock.from_config( + transformer_xl_block_config) + self.assertEqual(transformer_xl_block_config, new_block.get_config()) + + +@keras_parameterized.run_all_keras_modes +class TransformerXLTest(keras_parameterized.TestCase): + + @combinations.generate(combinations.combine( + two_stream=[True, False], + memory_length=[0, 4], + reuse_length=[0, 4], + tie_attention_biases=[True, False], + state=[True, False], + mask=[True, False], + segment=[True, False])) + def test_transformer_xl( + self, + two_stream, + memory_length, + reuse_length, + tie_attention_biases, + state, + mask, + segment): + batch_size, num_heads, head_size, seq_length = 2, 12, 64, 8 + hidden_size, num_predictions, inner_size = 24, 8, 12 + num_layers = 3 + + data = create_mock_transformer_xl_data( + include_biases=False, + num_heads=num_heads, + head_size=head_size, + hidden_size=hidden_size, + seq_length=seq_length, + batch_size=batch_size, + memory_length=memory_length, + num_predictions=num_predictions, + two_stream=two_stream, + num_layers=num_layers, + include_state=state, + include_mask=mask, + include_segment=segment) + transformer_xl_layer = transformer_xl.TransformerXL( + vocab_size=32000, + num_layers=num_layers, + head_size=head_size, + hidden_size=hidden_size, + num_attention_heads=num_heads, + inner_size=inner_size, + dropout_rate=0., + attention_dropout_rate=0., + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + two_stream=two_stream, + tie_attention_biases=tie_attention_biases, + memory_length=memory_length, + reuse_length=reuse_length, + inner_activation="relu") + attention_output, cached_memory_states = transformer_xl_layer(**data) + if two_stream: + self.assertEqual(attention_output.shape, + [batch_size, num_predictions, hidden_size]) + else: + self.assertEqual(attention_output.shape, + [batch_size, seq_length, hidden_size]) + self.assertEqual(len(cached_memory_states), num_layers) + + def test_get_config(self): + transformer_xl_layer = transformer_xl.TransformerXL( + vocab_size=32000, + num_layers=12, + hidden_size=36, + head_size=12, + num_attention_heads=12, + inner_size=12, + dropout_rate=0., + attention_dropout_rate=0., + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + two_stream=False, + tie_attention_biases=True, + memory_length=0, + reuse_length=0, + inner_activation="relu") + transformer_xl_config = transformer_xl_layer.get_config() + new_transformer_xl = transformer_xl.TransformerXL.from_config( + transformer_xl_config) + self.assertEqual(transformer_xl_config, new_transformer_xl.get_config()) + + +if __name__ == "__main__": + np.random.seed(0) + tf.random.set_seed(0) + tf.test.main() diff --git a/official/nlp/modeling/layers/util.py b/official/nlp/modeling/layers/util.py index 354f216ea4ea743fb48be256126df100abe5cfa9..280e2d2f61332ae8cb702366dd2e99ea27225f6f 100644 --- a/official/nlp/modeling/layers/util.py +++ b/official/nlp/modeling/layers/util.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Keras-based transformer block layer.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Keras-based transformer block layer.""" import functools diff --git a/official/nlp/modeling/losses/__init__.py b/official/nlp/modeling/losses/__init__.py index 7a396eb988289f7fde62be5f8ba641133c0e1543..cdd2c29f1b50d965af0202b86e2b0cf34e679315 100644 --- a/official/nlp/modeling/losses/__init__.py +++ b/official/nlp/modeling/losses/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,6 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Activations package definition. Subject to change.""" + +"""Losses contains common loss computation used in NLP (subject to change).""" from official.nlp.modeling.losses.weighted_sparse_categorical_crossentropy import loss as weighted_sparse_categorical_crossentropy_loss diff --git a/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy.py b/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy.py index cd532f858242574a6697c4d6b8791c7f97ef1cf7..d777800c611cb83ae5a04c2394ce89cecef50e51 100644 --- a/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy.py +++ b/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Weighted sparse categorical cross-entropy losses.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Weighted sparse categorical cross-entropy losses.""" import tensorflow as tf diff --git a/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy_test.py b/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy_test.py index b6e1a01d76961dc47fa1482ddf616787fa365842..f890d5b7e35c8dd50747554dcf99dd752449c890 100644 --- a/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy_test.py +++ b/official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for masked LM loss.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for masked LM loss.""" import numpy as np import tensorflow as tf @@ -39,7 +34,7 @@ class ClassificationLossTest(keras_parameterized.TestCase): output="predictions"): # First, create a transformer stack that we can use to get the LM's # vocabulary weight. - xformer_stack = networks.TransformerEncoder( + xformer_stack = networks.BertEncoder( vocab_size=vocab_size, num_layers=1, sequence_length=sequence_length, @@ -204,5 +199,6 @@ class ClassificationLossTest(keras_parameterized.TestCase): expected_loss_data = 6.4222 self.assertAllClose(expected_loss_data, loss_data, rtol=1e-3) + if __name__ == "__main__": tf.test.main() diff --git a/official/nlp/modeling/models/README.md b/official/nlp/modeling/models/README.md index c2e572b6fe07631c17f37b29723fc7a0ac94a81e..22fd8193c29135588b7a227a2422fe66199f59d8 100644 --- a/official/nlp/modeling/models/README.md +++ b/official/nlp/modeling/models/README.md @@ -1,17 +1,17 @@ # Models -Models are combinations of layers and networks that would be trained. +Models are combinations of `tf.keras` layers and models that can be trained. -Several pre-built canned models are provided to train encoder networks. These -models are intended as both convenience functions and canonical examples. +Several pre-built canned models are provided to train encoder networks. +These models are intended as both convenience functions and canonical examples. * [`BertClassifier`](bert_classifier.py) implements a simple classification model containing a single classification head using the Classification network. It can be used as a regression model as well. * [`BertTokenClassifier`](bert_token_classifier.py) implements a simple token -classification model containing a single classification head using the -TokenClassification network. +classification model containing a single classification head over the sequence +output embeddings. * [`BertSpanLabeler`](bert_span_labeler.py) implementats a simple single-span start-end predictor (that is, a model that predicts two values: a start token @@ -20,3 +20,6 @@ index and an end token index), suitable for SQuAD-style tasks. * [`BertPretrainer`](bert_pretrainer.py) implements a masked LM and a classification head using the Masked LM and Classification networks, respectively. + +* [`DualEncoder`](dual_encoder.py) implements a dual encoder model, suitbale for +retrieval tasks. diff --git a/official/nlp/modeling/models/__init__.py b/official/nlp/modeling/models/__init__.py index a072f36b79be3a2a8978034bcdc629d5a9c707d1..ce59fd260cf4044bf64964f142afc0748eaaf87f 100644 --- a/official/nlp/modeling/models/__init__.py +++ b/official/nlp/modeling/models/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Models package definition.""" + +"""Models are combinations of `tf.keras` layers and models that can be trained. + +Several pre-built canned models are provided to train encoder networks. +These models are intended as both convenience functions and canonical examples. +""" from official.nlp.modeling.models.bert_classifier import BertClassifier -from official.nlp.modeling.models.bert_pretrainer import BertPretrainer +from official.nlp.modeling.models.bert_pretrainer import * from official.nlp.modeling.models.bert_span_labeler import BertSpanLabeler from official.nlp.modeling.models.bert_token_classifier import BertTokenClassifier +from official.nlp.modeling.models.dual_encoder import DualEncoder from official.nlp.modeling.models.electra_pretrainer import ElectraPretrainer +from official.nlp.modeling.models.seq2seq_transformer import * +from official.nlp.modeling.models.xlnet import XLNetClassifier +from official.nlp.modeling.models.xlnet import XLNetPretrainer +from official.nlp.modeling.models.xlnet import XLNetSpanLabeler diff --git a/official/nlp/modeling/models/bert_classifier.py b/official/nlp/modeling/models/bert_classifier.py index b144b4d1b9eda61654ef25c05ab54eae69ec7b84..1e96e4d003a34f327b49e0029a626c8979ac22b5 100644 --- a/official/nlp/modeling/models/bert_classifier.py +++ b/official/nlp/modeling/models/bert_classifier.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,18 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Trainer network for BERT-style models.""" -# pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""BERT cls-token classifier.""" +# pylint: disable=g-classes-have-attributes +import collections import tensorflow as tf from official.nlp.modeling import layers -from official.nlp.modeling import networks @tf.keras.utils.register_keras_serializable(package='Text') @@ -37,7 +32,10 @@ class BertClassifier(tf.keras.Model): instantiates a classification network based on the passed `num_classes` argument. If `num_classes` is set to 1, a regression network is instantiated. - Arguments: + *Note* that the model is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: network: A transformer network. This network should output a sequence output and a classification output. Furthermore, it should expose its embedding table via a "get_embedding_table" method. @@ -45,8 +43,12 @@ class BertClassifier(tf.keras.Model): initializer: The initializer (if any) to use in the classification networks. Defaults to a Glorot uniform initializer. dropout_rate: The dropout probability of the cls head. - use_encoder_pooler: Whether to use the pooler layer pre-defined inside - the encoder. + use_encoder_pooler: Whether to use the pooler layer pre-defined inside the + encoder. + cls_head: (Optional) The layer instance to use for the classifier head. + It should take in the output from network and produce the final logits. + If set, the arguments ('num_classes', 'initializer', 'dropout_rate', + 'use_encoder_pooler') will be ignored. """ def __init__(self, @@ -55,15 +57,11 @@ class BertClassifier(tf.keras.Model): initializer='glorot_uniform', dropout_rate=0.1, use_encoder_pooler=True, + cls_head=None, **kwargs): - self._self_setattr_tracking = False - self._network = network - self._config = { - 'network': network, - 'num_classes': num_classes, - 'initializer': initializer, - 'use_encoder_pooler': use_encoder_pooler, - } + self.num_classes = num_classes + self.initializer = initializer + self.use_encoder_pooler = use_encoder_pooler # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use @@ -73,36 +71,73 @@ class BertClassifier(tf.keras.Model): if use_encoder_pooler: # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. - _, cls_output = network(inputs) - cls_output = tf.keras.layers.Dropout(rate=dropout_rate)(cls_output) + outputs = network(inputs) + if isinstance(outputs, list): + cls_inputs = outputs[1] + else: + cls_inputs = outputs['pooled_output'] + cls_inputs = tf.keras.layers.Dropout(rate=dropout_rate)(cls_inputs) + else: + outputs = network(inputs) + if isinstance(outputs, list): + cls_inputs = outputs[0] + else: + cls_inputs = outputs['sequence_output'] - self.classifier = networks.Classification( - input_width=cls_output.shape[-1], - num_classes=num_classes, - initializer=initializer, - output='logits', - name='sentence_prediction') - predictions = self.classifier(cls_output) + if cls_head: + classifier = cls_head else: - sequence_output, _ = network(inputs) - self.classifier = layers.ClassificationHead( - inner_dim=sequence_output.shape[-1], + classifier = layers.ClassificationHead( + inner_dim=0 if use_encoder_pooler else cls_inputs.shape[-1], num_classes=num_classes, initializer=initializer, dropout_rate=dropout_rate, name='sentence_prediction') - predictions = self.classifier(sequence_output) + predictions = classifier(cls_inputs) + + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. super(BertClassifier, self).__init__( inputs=inputs, outputs=predictions, **kwargs) + self._network = network + self._cls_head = cls_head + + config_dict = self._make_config_dict() + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + self.classifier = classifier @property def checkpoint_items(self): - return dict(encoder=self._network) + items = dict(encoder=self._network) + if hasattr(self.classifier, 'checkpoint_items'): + for key, item in self.classifier.checkpoint_items.items(): + items['.'.join([self.classifier.name, key])] = item + return items def get_config(self): - return self._config + return dict(self._config._asdict()) @classmethod def from_config(cls, config, custom_objects=None): return cls(**config) + + def _make_config_dict(self): + return { + 'network': self._network, + 'num_classes': self.num_classes, + 'initializer': self.initializer, + 'use_encoder_pooler': self.use_encoder_pooler, + 'cls_head': self._cls_head, + } diff --git a/official/nlp/modeling/models/bert_classifier_test.py b/official/nlp/modeling/models/bert_classifier_test.py index 8e00c0313c61e1d8b1c4075933efee6eddcefd58..c44038031ad455bc1cd3424f63d2d75d7ac6a708 100644 --- a/official/nlp/modeling/models/bert_classifier_test.py +++ b/official/nlp/modeling/models/bert_classifier_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,17 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for BERT trainer network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for BERT trainer network.""" from absl.testing import parameterized import tensorflow as tf from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling import layers from official.nlp.modeling import networks from official.nlp.modeling.models import bert_classifier @@ -31,14 +28,15 @@ from official.nlp.modeling.models import bert_classifier @keras_parameterized.run_all_keras_modes class BertClassifierTest(keras_parameterized.TestCase): - @parameterized.parameters(1, 3) - def test_bert_trainer(self, num_classes): + @parameterized.named_parameters(('single_cls', 1, False), ('3_cls', 3, False), + ('3_cls_dictoutputs', 3, True)) + def test_bert_trainer(self, num_classes, dict_outputs): """Validate that the Keras object can be created.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 - test_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) + test_network = networks.BertEncoder( + vocab_size=vocab_size, num_layers=2, dict_outputs=dict_outputs) # Create a BERT trainer with the created network. bert_trainer_model = bert_classifier.BertClassifier( @@ -56,17 +54,22 @@ class BertClassifierTest(keras_parameterized.TestCase): expected_classification_shape = [None, num_classes] self.assertAllEqual(expected_classification_shape, cls_outs.shape.as_list()) - @parameterized.parameters(1, 2) - def test_bert_trainer_tensor_call(self, num_classes): + @parameterized.named_parameters( + ('single_cls', 1, False), + ('2_cls', 2, False), + ('single_cls_custom_head', 1, True), + ('2_cls_custom_head', 2, True)) + def test_bert_trainer_tensor_call(self, num_classes, use_custom_head): """Validate that the Keras object can be invoked.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( - vocab_size=100, num_layers=2, sequence_length=2) + test_network = networks.BertEncoder(vocab_size=100, num_layers=2) + cls_head = layers.GaussianProcessClassificationHead( + inner_dim=0, num_classes=num_classes) if use_custom_head else None # Create a BERT trainer with the created network. bert_trainer_model = bert_classifier.BertClassifier( - test_network, num_classes=num_classes) + test_network, num_classes=num_classes, cls_head=cls_head) # Create a set of 2-dimensional data tensors to feed into the model. word_ids = tf.constant([[1, 1], [2, 2]], dtype=tf.int32) @@ -78,17 +81,21 @@ class BertClassifierTest(keras_parameterized.TestCase): # too complex: this simply ensures we're not hitting runtime errors.) _ = bert_trainer_model([word_ids, mask, type_ids]) - def test_serialize_deserialize(self): + @parameterized.named_parameters( + ('default_cls_head', None), + ('sngp_cls_head', layers.GaussianProcessClassificationHead( + inner_dim=0, num_classes=4))) + def test_serialize_deserialize(self, cls_head): """Validate that the BERT trainer can be serialized and deserialized.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( + test_network = networks.BertEncoder( vocab_size=100, num_layers=2, sequence_length=5) # Create a BERT trainer with the created network. (Note that all the args # are different, so we can catch any serialization mismatches.) bert_trainer_model = bert_classifier.BertClassifier( - test_network, num_classes=4, initializer='zeros') + test_network, num_classes=4, initializer='zeros', cls_head=cls_head) # Create another BERT trainer via serialization and deserialization. config = bert_trainer_model.get_config() diff --git a/official/nlp/modeling/models/bert_pretrainer.py b/official/nlp/modeling/models/bert_pretrainer.py index 0e9fad8e584b71e955066a304a33644e1bfaf37f..3587e008b72140092d60a39f495a15b456e44b12 100644 --- a/official/nlp/modeling/models/bert_pretrainer.py +++ b/official/nlp/modeling/models/bert_pretrainer.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,17 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Trainer network for BERT-style models.""" -# pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""BERT Pre-training model.""" +# pylint: disable=g-classes-have-attributes +import collections import copy from typing import List, Optional +from absl import logging import gin import tensorflow as tf @@ -31,17 +28,18 @@ from official.nlp.modeling import networks @tf.keras.utils.register_keras_serializable(package='Text') class BertPretrainer(tf.keras.Model): - """BERT network training model. + """BERT pretraining model. - This is an implementation of the network structure surrounding a transformer - encoder as described in "BERT: Pre-training of Deep Bidirectional Transformers - for Language Understanding" (https://arxiv.org/abs/1810.04805). + [Note] Please use the new `BertPretrainerV2` for your projects. The BertPretrainer allows a user to pass in a transformer stack, and instantiates the masked language model and classification networks that are used to create the training objectives. - Arguments: + *Note* that the model is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: network: A transformer network. This network should output a sequence output and a classification output. num_classes: Number of classes to predict from the classification network. @@ -52,8 +50,8 @@ class BertPretrainer(tf.keras.Model): None, no activation will be used. initializer: The initializer (if any) to use in the masked LM and classification networks. Defaults to a Glorot uniform initializer. - output: The output style for this network. Can be either 'logits' or - 'predictions'. + output: The output style for this network. Can be either `logits` or + `predictions`. """ def __init__(self, @@ -65,21 +63,12 @@ class BertPretrainer(tf.keras.Model): initializer='glorot_uniform', output='logits', **kwargs): - self._self_setattr_tracking = False - self._config = { - 'network': network, - 'num_classes': num_classes, - 'num_token_predictions': num_token_predictions, - 'activation': activation, - 'initializer': initializer, - 'output': output, - } - self.encoder = network + # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a copy of the network inputs for use # when we construct the Model object at the end of init. (We keep a copy # because we'll be adding another tensor to the copy later.) - network_inputs = self.encoder.inputs + network_inputs = network.inputs inputs = copy.copy(network_inputs) # Because we have a copy of inputs to create this Model object, we can @@ -87,7 +76,7 @@ class BertPretrainer(tf.keras.Model): # Note that, because of how deferred construction happens, we can't use # the copy of the list here - by the time the network is invoked, the list # object contains the additional input added below. - sequence_output, cls_output = self.encoder(network_inputs) + sequence_output, cls_output = network(network_inputs) # The encoder network may get outputs from all layers. if isinstance(sequence_output, list): @@ -95,7 +84,8 @@ class BertPretrainer(tf.keras.Model): if isinstance(cls_output, list): cls_output = cls_output[-1] sequence_output_length = sequence_output.shape.as_list()[1] - if sequence_output_length < num_token_predictions: + if sequence_output_length is not None and (sequence_output_length < + num_token_predictions): raise ValueError( "The passed network's output length is %s, which is less than the " 'requested num_token_predictions %s.' % @@ -108,48 +98,74 @@ class BertPretrainer(tf.keras.Model): inputs.append(masked_lm_positions) if embedding_table is None: - embedding_table = self.encoder.get_embedding_table() - self.masked_lm = layers.MaskedLM( + embedding_table = network.get_embedding_table() + masked_lm = layers.MaskedLM( embedding_table=embedding_table, activation=activation, initializer=initializer, output=output, name='cls/predictions') - lm_outputs = self.masked_lm( + lm_outputs = masked_lm( sequence_output, masked_positions=masked_lm_positions) - self.classification = networks.Classification( + classification = networks.Classification( input_width=cls_output.shape[-1], num_classes=num_classes, initializer=initializer, output=output, name='classification') - sentence_outputs = self.classification(cls_output) + sentence_outputs = classification(cls_output) super(BertPretrainer, self).__init__( inputs=inputs, outputs=dict(masked_lm=lm_outputs, classification=sentence_outputs), **kwargs) + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. + config_dict = { + 'network': network, + 'num_classes': num_classes, + 'num_token_predictions': num_token_predictions, + 'activation': activation, + 'initializer': initializer, + 'output': output, + } + + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + + self.encoder = network + self.classification = classification + self.masked_lm = masked_lm + def get_config(self): - return self._config + return dict(self._config._asdict()) @classmethod def from_config(cls, config, custom_objects=None): return cls(**config) -# TODO(hongkuny): Migrate to BertPretrainerV2 for all usages. @tf.keras.utils.register_keras_serializable(package='Text') @gin.configurable class BertPretrainerV2(tf.keras.Model): """BERT pretraining model V2. - (Experimental). Adds the masked language model head and optional classification heads upon the transformer encoder. - Arguments: + Args: encoder_network: A transformer network. This network should output a sequence output and a classification output. mlm_activation: The activation (if any) to use in the masked LM network. If @@ -158,11 +174,16 @@ class BertPretrainerV2(tf.keras.Model): to a Glorot uniform initializer. classification_heads: A list of optional head layers to transform on encoder sequence outputs. + customized_masked_lm: A customized masked_lm layer. If None, will create + a standard layer from `layers.MaskedLM`; if not None, will use the + specified masked_lm layer. Above arguments `mlm_activation` and + `mlm_initializer` will be ignored. name: The name of the model. Inputs: Inputs defined by the encoder network, plus `masked_lm_positions` as a dictionary. - Outputs: A dictionary of `lm_output` and classification head outputs keyed by - head names. + Outputs: A dictionary of `lm_output`, classification head outputs keyed by + head names, and also outputs from `encoder_network`, keyed by + `sequence_output` and `encoder_outputs` (if any). """ def __init__( @@ -171,27 +192,24 @@ class BertPretrainerV2(tf.keras.Model): mlm_activation=None, mlm_initializer='glorot_uniform', classification_heads: Optional[List[tf.keras.layers.Layer]] = None, + customized_masked_lm: Optional[tf.keras.layers.Layer] = None, name: str = 'bert', **kwargs): - self._self_setattr_tracking = False + super().__init__(self, name=name, **kwargs) self._config = { 'encoder_network': encoder_network, 'mlm_initializer': mlm_initializer, 'classification_heads': classification_heads, 'name': name, } - self.encoder_network = encoder_network inputs = copy.copy(self.encoder_network.inputs) - sequence_output, _ = self.encoder_network(inputs) - self.classification_heads = classification_heads or [] if len(set([cls.name for cls in self.classification_heads])) != len( self.classification_heads): raise ValueError('Classification heads should have unique names.') - outputs = dict() - self.masked_lm = layers.MaskedLM( + self.masked_lm = customized_masked_lm or layers.MaskedLM( embedding_table=self.encoder_network.get_embedding_table(), activation=mlm_activation, initializer=mlm_initializer, @@ -199,13 +217,45 @@ class BertPretrainerV2(tf.keras.Model): masked_lm_positions = tf.keras.layers.Input( shape=(None,), name='masked_lm_positions', dtype=tf.int32) inputs.append(masked_lm_positions) - outputs['lm_output'] = self.masked_lm( - sequence_output, masked_positions=masked_lm_positions) - for cls_head in self.classification_heads: - outputs[cls_head.name] = cls_head(sequence_output) + self.inputs = inputs + + def call(self, inputs): + if isinstance(inputs, list): + logging.warning('List inputs to BertPretrainer are discouraged.') + inputs = dict([ + (ref.name, tensor) for ref, tensor in zip(self.inputs, inputs) + ]) - super(BertPretrainerV2, self).__init__( - inputs=inputs, outputs=outputs, name=name, **kwargs) + outputs = dict() + encoder_network_outputs = self.encoder_network(inputs) + if isinstance(encoder_network_outputs, list): + outputs['pooled_output'] = encoder_network_outputs[1] + # When `encoder_network` was instantiated with return_all_encoder_outputs + # set to True, `encoder_network_outputs[0]` is a list containing + # all transformer layers' output. + if isinstance(encoder_network_outputs[0], list): + outputs['encoder_outputs'] = encoder_network_outputs[0] + outputs['sequence_output'] = encoder_network_outputs[0][-1] + else: + outputs['sequence_output'] = encoder_network_outputs[0] + elif isinstance(encoder_network_outputs, dict): + outputs = encoder_network_outputs + else: + raise ValueError('encoder_network\'s output should be either a list ' + 'or a dict, but got %s' % encoder_network_outputs) + sequence_output = outputs['sequence_output'] + # Inference may not have masked_lm_positions and mlm_logits is not needed. + if 'masked_lm_positions' in inputs: + masked_lm_positions = inputs['masked_lm_positions'] + outputs['mlm_logits'] = self.masked_lm( + sequence_output, masked_positions=masked_lm_positions) + for cls_head in self.classification_heads: + cls_outputs = cls_head(sequence_output) + if isinstance(cls_outputs, dict): + outputs.update(cls_outputs) + else: + outputs[cls_head.name] = cls_outputs + return outputs @property def checkpoint_items(self): diff --git a/official/nlp/modeling/models/bert_pretrainer_test.py b/official/nlp/modeling/models/bert_pretrainer_test.py index 2ee1c6e2c1bc4b2c5d42e74a714cdbc930036d6e..d5fb96ef79ff9f4c0ce51403f73eaf44e546d7a9 100644 --- a/official/nlp/modeling/models/bert_pretrainer_test.py +++ b/official/nlp/modeling/models/bert_pretrainer_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,16 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for BERT trainer network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for BERT pretrainer model.""" +import itertools +from absl.testing import parameterized import tensorflow as tf from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling import layers from official.nlp.modeling import networks from official.nlp.modeling.models import bert_pretrainer @@ -35,8 +34,10 @@ class BertPretrainerTest(keras_parameterized.TestCase): # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 - test_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) + test_network = networks.BertEncoder( + vocab_size=vocab_size, + num_layers=2, + max_sequence_length=sequence_length) # Create a BERT trainer with the created network. num_classes = 3 @@ -68,7 +69,7 @@ class BertPretrainerTest(keras_parameterized.TestCase): """Validate that the Keras object can be invoked.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( + test_network = networks.BertEncoder( vocab_size=100, num_layers=2, sequence_length=2) # Create a BERT trainer with the created network. @@ -90,8 +91,8 @@ class BertPretrainerTest(keras_parameterized.TestCase): """Validate that the BERT trainer can be serialized and deserialized.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( - vocab_size=100, num_layers=2, sequence_length=5) + test_network = networks.BertEncoder( + vocab_size=100, num_layers=2, max_sequence_length=5) # Create a BERT trainer with the created network. (Note that all the args # are different, so we can catch any serialization mismatches.) @@ -109,36 +110,112 @@ class BertPretrainerTest(keras_parameterized.TestCase): self.assertAllEqual(bert_trainer_model.get_config(), new_bert_trainer_model.get_config()) - def test_bert_pretrainerv2(self): + +class BertPretrainerV2Test(keras_parameterized.TestCase): + + @parameterized.parameters(itertools.product( + (False, True), + (False, True), + (False, True), + (False, True), + )) + def test_bert_pretrainerv2(self, dict_outputs, return_all_encoder_outputs, + use_customized_masked_lm, has_masked_lm_positions): """Validate that the Keras object can be created.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 - test_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) + hidden_size = 48 + num_layers = 2 + test_network = networks.BertEncoder( + vocab_size=vocab_size, + num_layers=num_layers, + hidden_size=hidden_size, + max_sequence_length=sequence_length, + return_all_encoder_outputs=return_all_encoder_outputs, + dict_outputs=dict_outputs) # Create a BERT trainer with the created network. + if use_customized_masked_lm: + customized_masked_lm = layers.MaskedLM( + embedding_table=test_network.get_embedding_table()) + else: + customized_masked_lm = None + bert_trainer_model = bert_pretrainer.BertPretrainerV2( - encoder_network=test_network) + encoder_network=test_network, customized_masked_lm=customized_masked_lm) num_token_predictions = 20 # Create a set of 2-dimensional inputs (the first dimension is implicit). - word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - lm_mask = tf.keras.Input(shape=(num_token_predictions,), dtype=tf.int32) + inputs = dict( + input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), + input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), + input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32)) + if has_masked_lm_positions: + inputs['masked_lm_positions'] = tf.keras.Input( + shape=(num_token_predictions,), dtype=tf.int32) # Invoke the trainer model on the inputs. This causes the layer to be built. - outputs = bert_trainer_model([word_ids, mask, type_ids, lm_mask]) + outputs = bert_trainer_model(inputs) + + has_encoder_outputs = dict_outputs or return_all_encoder_outputs + expected_keys = ['sequence_output', 'pooled_output'] + if has_encoder_outputs: + expected_keys.append('encoder_outputs') + if has_masked_lm_positions: + expected_keys.append('mlm_logits') + self.assertSameElements(outputs.keys(), expected_keys) # Validate that the outputs are of the expected shape. expected_lm_shape = [None, num_token_predictions, vocab_size] - self.assertAllEqual(expected_lm_shape, outputs['lm_output'].shape.as_list()) + if has_masked_lm_positions: + self.assertAllEqual(expected_lm_shape, + outputs['mlm_logits'].shape.as_list()) + + expected_sequence_output_shape = [None, sequence_length, hidden_size] + self.assertAllEqual(expected_sequence_output_shape, + outputs['sequence_output'].shape.as_list()) + + expected_pooled_output_shape = [None, hidden_size] + self.assertAllEqual(expected_pooled_output_shape, + outputs['pooled_output'].shape.as_list()) + + def test_multiple_cls_outputs(self): + """Validate that the Keras object can be created.""" + # Build a transformer network to use within the BERT trainer. + vocab_size = 100 + sequence_length = 512 + hidden_size = 48 + num_layers = 2 + test_network = networks.BertEncoder( + vocab_size=vocab_size, + num_layers=num_layers, + hidden_size=hidden_size, + max_sequence_length=sequence_length, + dict_outputs=True) + + bert_trainer_model = bert_pretrainer.BertPretrainerV2( + encoder_network=test_network, + classification_heads=[layers.MultiClsHeads( + inner_dim=5, cls_list=[('foo', 2), ('bar', 3)])]) + num_token_predictions = 20 + # Create a set of 2-dimensional inputs (the first dimension is implicit). + inputs = dict( + input_word_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), + input_mask=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), + input_type_ids=tf.keras.Input(shape=(sequence_length,), dtype=tf.int32), + masked_lm_positions=tf.keras.Input( + shape=(num_token_predictions,), dtype=tf.int32)) + + # Invoke the trainer model on the inputs. This causes the layer to be built. + outputs = bert_trainer_model(inputs) + self.assertEqual(outputs['foo'].shape.as_list(), [None, 2]) + self.assertEqual(outputs['bar'].shape.as_list(), [None, 3]) def test_v2_serialize_deserialize(self): """Validate that the BERT trainer can be serialized and deserialized.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( + test_network = networks.BertEncoder( vocab_size=100, num_layers=2, sequence_length=5) # Create a BERT trainer with the created network. (Note that all the args diff --git a/official/nlp/modeling/models/bert_span_labeler.py b/official/nlp/modeling/models/bert_span_labeler.py index 2dd9ab13f518373b6bf82800256d75df9d553750..a444ebbf9cc3839693daa0dc3c8bc097e7397c41 100644 --- a/official/nlp/modeling/models/bert_span_labeler.py +++ b/official/nlp/modeling/models/bert_span_labeler.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,14 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Trainer network for BERT-style models.""" -# pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""BERT Question Answering model.""" +# pylint: disable=g-classes-have-attributes +import collections import tensorflow as tf from official.nlp.modeling import networks @@ -32,17 +28,20 @@ class BertSpanLabeler(tf.keras.Model): encoder as described in "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" (https://arxiv.org/abs/1810.04805). - The BertSpanLabeler allows a user to pass in a transformer stack, and + The BertSpanLabeler allows a user to pass in a transformer encoder, and instantiates a span labeling network based on a single dense layer. - Arguments: + *Note* that the model is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: network: A transformer network. This network should output a sequence output and a classification output. Furthermore, it should expose its embedding - table via a "get_embedding_table" method. + table via a `get_embedding_table` method. initializer: The initializer (if any) to use in the span labeling network. Defaults to a Glorot uniform initializer. - output: The output style for this network. Can be either 'logits' or - 'predictions'. + output: The output style for this network. Can be either `logit`' or + `predictions`. """ def __init__(self, @@ -50,13 +49,6 @@ class BertSpanLabeler(tf.keras.Model): initializer='glorot_uniform', output='logits', **kwargs): - self._self_setattr_tracking = False - self._network = network - self._config = { - 'network': network, - 'initializer': initializer, - 'output': output, - } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use @@ -65,16 +57,25 @@ class BertSpanLabeler(tf.keras.Model): # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. - sequence_output, _ = network(inputs) + outputs = network(inputs) + if isinstance(outputs, list): + sequence_output = outputs[0] + else: + sequence_output = outputs['sequence_output'] + + # The input network (typically a transformer model) may get outputs from all + # layers. When this case happens, we retrieve the last layer output. + if isinstance(sequence_output, list): + sequence_output = sequence_output[-1] # This is an instance variable for ease of access to the underlying task # network. - self.span_labeling = networks.SpanLabeling( + span_labeling = networks.SpanLabeling( input_width=sequence_output.shape[-1], initializer=initializer, output=output, name='span_labeling') - start_logits, end_logits = self.span_labeling(sequence_output) + start_logits, end_logits = span_labeling(sequence_output) # Use identity layers wrapped in lambdas to explicitly name the output # tensors. This allows us to use string-keyed dicts in Keras fit/predict/ @@ -88,15 +89,36 @@ class BertSpanLabeler(tf.keras.Model): logits = [start_logits, end_logits] + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. super(BertSpanLabeler, self).__init__( inputs=inputs, outputs=logits, **kwargs) + self._network = network + config_dict = { + 'network': network, + 'initializer': initializer, + 'output': output, + } + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + self.span_labeling = span_labeling @property def checkpoint_items(self): return dict(encoder=self._network) def get_config(self): - return self._config + return dict(self._config._asdict()) @classmethod def from_config(cls, config, custom_objects=None): diff --git a/official/nlp/modeling/models/bert_span_labeler_test.py b/official/nlp/modeling/models/bert_span_labeler_test.py index d05e91b52c9ba69a65df7dee4783ffc4113b8a3c..413c2121c6f5e849f0dc2dc1c39069559db1cd33 100644 --- a/official/nlp/modeling/models/bert_span_labeler_test.py +++ b/official/nlp/modeling/models/bert_span_labeler_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for BERT trainer network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for BERT trainer network.""" +from absl.testing import parameterized import tensorflow as tf from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import @@ -30,13 +27,14 @@ from official.nlp.modeling.models import bert_span_labeler @keras_parameterized.run_all_keras_modes class BertSpanLabelerTest(keras_parameterized.TestCase): - def test_bert_trainer(self): + @parameterized.parameters(True, False) + def test_bert_trainer(self, dict_outputs): """Validate that the Keras object can be created.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 - test_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) + test_network = networks.BertEncoder( + vocab_size=vocab_size, num_layers=2, dict_outputs=dict_outputs) # Create a BERT trainer with the created network. bert_trainer_model = bert_span_labeler.BertSpanLabeler(test_network) @@ -59,9 +57,7 @@ class BertSpanLabelerTest(keras_parameterized.TestCase): """Validate compilation using explicit output names.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 - sequence_length = 512 - test_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) + test_network = networks.BertEncoder(vocab_size=vocab_size, num_layers=2) # Create a BERT trainer with the created network. bert_trainer_model = bert_span_labeler.BertSpanLabeler(test_network) @@ -80,8 +76,7 @@ class BertSpanLabelerTest(keras_parameterized.TestCase): """Validate that the Keras object can be invoked.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( - vocab_size=100, num_layers=2, sequence_length=2) + test_network = networks.BertEncoder(vocab_size=100, num_layers=2) # Create a BERT trainer with the created network. bert_trainer_model = bert_span_labeler.BertSpanLabeler(test_network) @@ -100,7 +95,7 @@ class BertSpanLabelerTest(keras_parameterized.TestCase): """Validate that the BERT trainer can be serialized and deserialized.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( + test_network = networks.BertEncoder( vocab_size=100, num_layers=2, sequence_length=5) # Create a BERT trainer with the created network. (Note that all the args diff --git a/official/nlp/modeling/models/bert_token_classifier.py b/official/nlp/modeling/models/bert_token_classifier.py index 4967d71776d685c8631d19d3c07a9fc1e8a25bf6..340d92fd662103393514415489e22c8d5dac0d76 100644 --- a/official/nlp/modeling/models/bert_token_classifier.py +++ b/official/nlp/modeling/models/bert_token_classifier.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,18 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Trainer network for BERT-style models.""" -# pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""BERT token classifier.""" +# pylint: disable=g-classes-have-attributes +import collections import tensorflow as tf -from official.nlp.modeling import networks - @tf.keras.utils.register_keras_serializable(package='Text') class BertTokenClassifier(tf.keras.Model): @@ -36,15 +30,21 @@ class BertTokenClassifier(tf.keras.Model): instantiates a token classification network based on the passed `num_classes` argument. - Arguments: + *Note* that the model is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: network: A transformer network. This network should output a sequence output and a classification output. Furthermore, it should expose its embedding - table via a "get_embedding_table" method. + table via a `get_embedding_table` method. num_classes: Number of classes to predict from the classification network. initializer: The initializer (if any) to use in the classification networks. Defaults to a Glorot uniform initializer. - output: The output style for this network. Can be either 'logits' or - 'predictions'. + output: The output style for this network. Can be either `logits` or + `predictions`. + dropout_rate: The dropout probability of the token classification head. + output_encoder_outputs: Whether to include intermediate sequence output + in the final output. """ def __init__(self, @@ -53,15 +53,8 @@ class BertTokenClassifier(tf.keras.Model): initializer='glorot_uniform', output='logits', dropout_rate=0.1, + output_encoder_outputs=False, **kwargs): - self._self_setattr_tracking = False - self._network = network - self._config = { - 'network': network, - 'num_classes': num_classes, - 'initializer': initializer, - 'output': output, - } # We want to use the inputs of the passed network as the inputs to this # Model. To do this, we need to keep a handle to the network inputs for use @@ -70,27 +63,70 @@ class BertTokenClassifier(tf.keras.Model): # Because we have a copy of inputs to create this Model object, we can # invoke the Network object with its own input tensors to start the Model. - sequence_output, _ = network(inputs) - sequence_output = tf.keras.layers.Dropout( - rate=dropout_rate)(sequence_output) - - self.classifier = networks.TokenClassification( - input_width=sequence_output.shape[-1], - num_classes=num_classes, - initializer=initializer, - output=output, - name='classification') - predictions = self.classifier(sequence_output) + outputs = network(inputs) + if isinstance(outputs, list): + sequence_output = outputs[0] + else: + sequence_output = outputs['sequence_output'] + sequence_output = tf.keras.layers.Dropout(rate=dropout_rate)( + sequence_output) + + classifier = tf.keras.layers.Dense( + num_classes, + activation=None, + kernel_initializer=initializer, + name='predictions/transform/logits') + logits = classifier(sequence_output) + if output == 'logits': + output_tensors = {'logits': logits} + elif output == 'predictions': + output_tensors = { + 'predictions': tf.keras.layers.Activation(tf.nn.log_softmax)(logits) + } + else: + raise ValueError( + ('Unknown `output` value "%s". `output` can be either "logits" or ' + '"predictions"') % output) + if output_encoder_outputs: + output_tensors['encoder_outputs'] = sequence_output + + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. super(BertTokenClassifier, self).__init__( - inputs=inputs, outputs=predictions, **kwargs) + inputs=inputs, outputs=output_tensors, **kwargs) + + self._network = network + config_dict = { + 'network': network, + 'num_classes': num_classes, + 'initializer': initializer, + 'output': output, + 'output_encoder_outputs': output_encoder_outputs + } + + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + + self.classifier = classifier + self.logits = logits @property def checkpoint_items(self): return dict(encoder=self._network) def get_config(self): - return self._config + return dict(self._config._asdict()) @classmethod def from_config(cls, config, custom_objects=None): diff --git a/official/nlp/modeling/models/bert_token_classifier_test.py b/official/nlp/modeling/models/bert_token_classifier_test.py index 970b531cf5673e4040ceb417ffb67a8ef6aea70a..8af0897638d850a25590b475ae7be65365271343 100644 --- a/official/nlp/modeling/models/bert_token_classifier_test.py +++ b/official/nlp/modeling/models/bert_token_classifier_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for BERT trainer network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for BERT token classifier.""" +from absl.testing import parameterized import tensorflow as tf from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import @@ -30,19 +27,26 @@ from official.nlp.modeling.models import bert_token_classifier @keras_parameterized.run_all_keras_modes class BertTokenClassifierTest(keras_parameterized.TestCase): - def test_bert_trainer(self): + @parameterized.parameters((True, True), (False, False)) + def test_bert_trainer(self, dict_outputs, output_encoder_outputs): """Validate that the Keras object can be created.""" # Build a transformer network to use within the BERT trainer. vocab_size = 100 sequence_length = 512 - test_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) + hidden_size = 768 + test_network = networks.BertEncoder( + vocab_size=vocab_size, + num_layers=2, + max_sequence_length=sequence_length, + dict_outputs=dict_outputs, + hidden_size=hidden_size) # Create a BERT trainer with the created network. num_classes = 3 bert_trainer_model = bert_token_classifier.BertTokenClassifier( test_network, - num_classes=num_classes) + num_classes=num_classes, + output_encoder_outputs=output_encoder_outputs) # Create a set of 2-dimensional inputs (the first dimension is implicit). word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) @@ -50,19 +54,25 @@ class BertTokenClassifierTest(keras_parameterized.TestCase): type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) # Invoke the trainer model on the inputs. This causes the layer to be built. - sequence_outs = bert_trainer_model([word_ids, mask, type_ids]) + outputs = bert_trainer_model([word_ids, mask, type_ids]) + if output_encoder_outputs: + logits = outputs['logits'] + encoder_outputs = outputs['encoder_outputs'] + self.assertAllEqual(encoder_outputs.shape.as_list(), + [None, sequence_length, hidden_size]) + else: + logits = outputs['logits'] # Validate that the outputs are of the expected shape. expected_classification_shape = [None, sequence_length, num_classes] - self.assertAllEqual(expected_classification_shape, - sequence_outs.shape.as_list()) + self.assertAllEqual(expected_classification_shape, logits.shape.as_list()) def test_bert_trainer_tensor_call(self): """Validate that the Keras object can be invoked.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( - vocab_size=100, num_layers=2, sequence_length=2) + test_network = networks.BertEncoder( + vocab_size=100, num_layers=2, max_sequence_length=2) # Create a BERT trainer with the created network. bert_trainer_model = bert_token_classifier.BertTokenClassifier( @@ -82,8 +92,8 @@ class BertTokenClassifierTest(keras_parameterized.TestCase): """Validate that the BERT trainer can be serialized and deserialized.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_network = networks.TransformerEncoder( - vocab_size=100, num_layers=2, sequence_length=5) + test_network = networks.BertEncoder( + vocab_size=100, num_layers=2, max_sequence_length=5) # Create a BERT trainer with the created network. (Note that all the args # are different, so we can catch any serialization mismatches.) diff --git a/official/nlp/modeling/models/dual_encoder.py b/official/nlp/modeling/models/dual_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..7fa496e89623c88866b2183b39ba67fb4c4e156b --- /dev/null +++ b/official/nlp/modeling/models/dual_encoder.py @@ -0,0 +1,162 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Trainer network for dual encoder style models.""" +# pylint: disable=g-classes-have-attributes +import collections +import tensorflow as tf + +from official.nlp.modeling import layers + + +@tf.keras.utils.register_keras_serializable(package='Text') +class DualEncoder(tf.keras.Model): + """A dual encoder model based on a transformer-based encoder. + + This is an implementation of the dual encoder network structure based on the + transfomer stack, as described in ["Language-agnostic BERT Sentence + Embedding"](https://arxiv.org/abs/2007.01852) + + The DualEncoder allows a user to pass in a transformer stack, and build a dual + encoder model based on the transformer stack. + + Args: + network: A transformer network which should output an encoding output. + max_seq_length: The maximum allowed sequence length for transformer. + normalize: If set to True, normalize the encoding produced by transfomer. + logit_scale: The scaling factor of dot products when doing training. + logit_margin: The margin between positive and negative when doing training. + output: The output style for this network. Can be either `logits` or + `predictions`. If set to `predictions`, it will output the embedding + producted by transformer network. + """ + + def __init__(self, + network: tf.keras.Model, + max_seq_length: int = 32, + normalize: bool = True, + logit_scale: float = 1.0, + logit_margin: float = 0.0, + output: str = 'logits', + **kwargs) -> None: + + if output == 'logits': + left_word_ids = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='left_word_ids') + left_mask = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='left_mask') + left_type_ids = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='left_type_ids') + else: + # Keep the consistant with legacy BERT hub module input names. + left_word_ids = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='input_word_ids') + left_mask = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='input_mask') + left_type_ids = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='input_type_ids') + + left_inputs = [left_word_ids, left_mask, left_type_ids] + left_outputs = network(left_inputs) + if isinstance(left_outputs, list): + left_sequence_output, left_encoded = left_outputs + else: + left_sequence_output = left_outputs['sequence_output'] + left_encoded = left_outputs['pooled_output'] + if normalize: + left_encoded = tf.keras.layers.Lambda( + lambda x: tf.nn.l2_normalize(x, axis=1))( + left_encoded) + + if output == 'logits': + right_word_ids = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='right_word_ids') + right_mask = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='right_mask') + right_type_ids = tf.keras.layers.Input( + shape=(max_seq_length,), dtype=tf.int32, name='right_type_ids') + + right_inputs = [right_word_ids, right_mask, right_type_ids] + right_outputs = network(right_inputs) + if isinstance(right_outputs, list): + _, right_encoded = right_outputs + else: + right_encoded = right_outputs['pooled_output'] + if normalize: + right_encoded = tf.keras.layers.Lambda( + lambda x: tf.nn.l2_normalize(x, axis=1))( + right_encoded) + + dot_products = layers.MatMulWithMargin( + logit_scale=logit_scale, + logit_margin=logit_margin, + name='dot_product') + + inputs = [ + left_word_ids, left_mask, left_type_ids, right_word_ids, right_mask, + right_type_ids + ] + left_logits, right_logits = dot_products(left_encoded, right_encoded) + + outputs = dict(left_logits=left_logits, right_logits=right_logits) + + elif output == 'predictions': + inputs = [left_word_ids, left_mask, left_type_ids] + + # To keep consistent with legacy BERT hub modules, the outputs are + # "pooled_output" and "sequence_output". + outputs = dict( + sequence_output=left_sequence_output, pooled_output=left_encoded) + else: + raise ValueError('output type %s is not supported' % output) + + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. + super(DualEncoder, self).__init__(inputs=inputs, outputs=outputs, **kwargs) + + config_dict = { + 'network': network, + 'max_seq_length': max_seq_length, + 'normalize': normalize, + 'logit_scale': logit_scale, + 'logit_margin': logit_margin, + 'output': output, + } + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + + self.network = network + + def get_config(self): + return dict(self._config._asdict()) + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def checkpoint_items(self): + """Returns a dictionary of items to be additionally checkpointed.""" + items = dict(encoder=self.network) + return items diff --git a/official/nlp/modeling/models/dual_encoder_test.py b/official/nlp/modeling/models/dual_encoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..da71da8091ce6846ffc6d7dc97782d3041f2f94e --- /dev/null +++ b/official/nlp/modeling/models/dual_encoder_test.py @@ -0,0 +1,125 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for dual encoder network.""" + +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling import networks +from official.nlp.modeling.models import dual_encoder + + +# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It +# guarantees forward compatibility of this code for the V2 switchover. +@keras_parameterized.run_all_keras_modes +class DualEncoderTest(keras_parameterized.TestCase): + + @parameterized.parameters((192, 'logits'), (768, 'predictions')) + def test_dual_encoder(self, hidden_size, output): + """Validate that the Keras object can be created.""" + # Build a transformer network to use within the dual encoder model. + vocab_size = 100 + sequence_length = 512 + test_network = networks.BertEncoder( + vocab_size=vocab_size, + num_layers=2, + hidden_size=hidden_size, + sequence_length=sequence_length, + dict_outputs=True) + + # Create a dual encoder model with the created network. + dual_encoder_model = dual_encoder.DualEncoder( + test_network, max_seq_length=sequence_length, output=output) + + # Create a set of 2-dimensional inputs (the first dimension is implicit). + left_word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + left_mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + left_type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + + right_word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + right_mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + right_type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + + if output == 'logits': + outputs = dual_encoder_model([ + left_word_ids, left_mask, left_type_ids, right_word_ids, right_mask, + right_type_ids + ]) + _ = outputs['left_logits'] + elif output == 'predictions': + outputs = dual_encoder_model([left_word_ids, left_mask, left_type_ids]) + # Validate that the outputs are of the expected shape. + expected_sequence_shape = [None, sequence_length, 768] + self.assertAllEqual(expected_sequence_shape, + outputs['sequence_output'].shape.as_list()) + left_encoded = outputs['pooled_output'] + expected_encoding_shape = [None, 768] + self.assertAllEqual(expected_encoding_shape, left_encoded.shape.as_list()) + + @parameterized.parameters((192, 'logits'), (768, 'predictions')) + def test_dual_encoder_tensor_call(self, hidden_size, output): + """Validate that the Keras object can be invoked.""" + # Build a transformer network to use within the dual encoder model. (Here, + # we use # a short sequence_length for convenience.) + sequence_length = 2 + test_network = networks.BertEncoder( + vocab_size=100, num_layers=2, sequence_length=sequence_length) + + # Create a dual encoder model with the created network. + dual_encoder_model = dual_encoder.DualEncoder( + test_network, max_seq_length=sequence_length, output=output) + + # Create a set of 2-dimensional data tensors to feed into the model. + word_ids = tf.constant([[1, 1], [2, 2]], dtype=tf.int32) + mask = tf.constant([[1, 1], [1, 0]], dtype=tf.int32) + type_ids = tf.constant([[1, 1], [2, 2]], dtype=tf.int32) + + # Invoke the model model on the tensors. In Eager mode, this does the + # actual calculation. (We can't validate the outputs, since the network is + # too complex: this simply ensures we're not hitting runtime errors.) + if output == 'logits': + _ = dual_encoder_model( + [word_ids, mask, type_ids, word_ids, mask, type_ids]) + elif output == 'predictions': + _ = dual_encoder_model([word_ids, mask, type_ids]) + + def test_serialize_deserialize(self): + """Validate that the dual encoder model can be serialized / deserialized.""" + # Build a transformer network to use within the dual encoder model. (Here, + # we use a short sequence_length for convenience.) + sequence_length = 32 + test_network = networks.BertEncoder( + vocab_size=100, num_layers=2, sequence_length=sequence_length) + + # Create a dual encoder model with the created network. (Note that all the + # args are different, so we can catch any serialization mismatches.) + dual_encoder_model = dual_encoder.DualEncoder( + test_network, max_seq_length=sequence_length, output='predictions') + + # Create another dual encoder model via serialization and deserialization. + config = dual_encoder_model.get_config() + new_dual_encoder = dual_encoder.DualEncoder.from_config(config) + + # Validate that the config can be forced to JSON. + _ = new_dual_encoder.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(dual_encoder_model.get_config(), + new_dual_encoder.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/models/electra_pretrainer.py b/official/nlp/modeling/models/electra_pretrainer.py index ffd078ef217bd0a791e16c60922e67eee74b651d..dcbbc552175625455edb0395a686d2a254419ddc 100644 --- a/official/nlp/modeling/models/electra_pretrainer.py +++ b/official/nlp/modeling/models/electra_pretrainer.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Trainer network for ELECTRA models.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import copy + import tensorflow as tf from official.modeling import tf_utils @@ -39,7 +36,10 @@ class ElectraPretrainer(tf.keras.Model): model (at generator side) and classification networks (at discriminator side) that are used to create the training objectives. - Arguments: + *Note* that the model is constructed by Keras Subclass API, where layers are + defined inside `__init__` and `call()` implements the computation. + + Args: generator_network: A transformer network for generator, this network should output a sequence output and an optional classification output. discriminator_network: A transformer network for discriminator, this network @@ -47,15 +47,13 @@ class ElectraPretrainer(tf.keras.Model): vocab_size: Size of generator output vocabulary num_classes: Number of classes to predict from the classification network for the generator network (not used now) - sequence_length: Input sequence length - last_hidden_dim: Last hidden dim of generator transformer output num_token_predictions: Number of tokens to predict from the masked LM. mlm_activation: The activation (if any) to use in the masked LM and classification networks. If None, no activation will be used. mlm_initializer: The initializer (if any) to use in the masked LM and classification networks. Defaults to a Glorot uniform initializer. - output_type: The output style for this network. Can be either 'logits' or - 'predictions'. + output_type: The output style for this network. Can be either `logits` or + `predictions`. disallow_correct: Whether to disallow the generator to generate the exact same token in the original sentence """ @@ -65,8 +63,6 @@ class ElectraPretrainer(tf.keras.Model): discriminator_network, vocab_size, num_classes, - sequence_length, - last_hidden_dim, num_token_predictions, mlm_activation=None, mlm_initializer='glorot_uniform', @@ -79,8 +75,6 @@ class ElectraPretrainer(tf.keras.Model): 'discriminator_network': discriminator_network, 'vocab_size': vocab_size, 'num_classes': num_classes, - 'sequence_length': sequence_length, - 'last_hidden_dim': last_hidden_dim, 'num_token_predictions': num_token_predictions, 'mlm_activation': mlm_activation, 'mlm_initializer': mlm_initializer, @@ -94,8 +88,6 @@ class ElectraPretrainer(tf.keras.Model): self.discriminator_network = discriminator_network self.vocab_size = vocab_size self.num_classes = num_classes - self.sequence_length = sequence_length - self.last_hidden_dim = last_hidden_dim self.num_token_predictions = num_token_predictions self.mlm_activation = mlm_activation self.mlm_initializer = mlm_initializer @@ -108,10 +100,15 @@ class ElectraPretrainer(tf.keras.Model): output=output_type, name='generator_masked_lm') self.classification = layers.ClassificationHead( - inner_dim=last_hidden_dim, + inner_dim=generator_network.get_config()['hidden_size'], num_classes=num_classes, initializer=mlm_initializer, name='generator_classification_head') + self.discriminator_projection = tf.keras.layers.Dense( + units=discriminator_network.get_config()['hidden_size'], + activation=mlm_activation, + kernel_initializer=mlm_initializer, + name='discriminator_projection_head') self.discriminator_head = tf.keras.layers.Dense( units=1, kernel_initializer=mlm_initializer) @@ -123,13 +120,13 @@ class ElectraPretrainer(tf.keras.Model): Returns: outputs: A dict of pretrainer model outputs, including - (1) lm_outputs: a [batch_size, num_token_predictions, vocab_size] tensor - indicating logits on masked positions. - (2) sentence_outputs: a [batch_size, num_classes] tensor indicating + (1) lm_outputs: A `[batch_size, num_token_predictions, vocab_size]` + tensor indicating logits on masked positions. + (2) sentence_outputs: A `[batch_size, num_classes]` tensor indicating logits for nsp task. - (3) disc_logits: a [batch_size, sequence_length] tensor indicating + (3) disc_logits: A `[batch_size, sequence_length]` tensor indicating logits for discriminator replaced token detection task. - (4) disc_label: a [batch_size, sequence_length] tensor indicating + (4) disc_label: A `[batch_size, sequence_length]` tensor indicating target labels for discriminator replaced token detection task. """ input_word_ids = inputs['input_word_ids'] @@ -138,14 +135,11 @@ class ElectraPretrainer(tf.keras.Model): masked_lm_positions = inputs['masked_lm_positions'] ### Generator ### - sequence_output, cls_output = self.generator_network( - [input_word_ids, input_mask, input_type_ids]) - + sequence_output = self.generator_network( + [input_word_ids, input_mask, input_type_ids])['sequence_output'] # The generator encoder network may get outputs from all layers. if isinstance(sequence_output, list): sequence_output = sequence_output[-1] - if isinstance(cls_output, list): - cls_output = cls_output[-1] lm_outputs = self.masked_lm(sequence_output, masked_lm_positions) sentence_outputs = self.classification(sequence_output) @@ -156,16 +150,17 @@ class ElectraPretrainer(tf.keras.Model): ### Discriminator ### disc_input = fake_data['inputs'] disc_label = fake_data['is_fake_tokens'] - disc_sequence_output, _ = self.discriminator_network([ + disc_sequence_output = self.discriminator_network([ disc_input['input_word_ids'], disc_input['input_mask'], disc_input['input_type_ids'] - ]) + ])['sequence_output'] # The discriminator encoder network may get outputs from all layers. if isinstance(disc_sequence_output, list): disc_sequence_output = disc_sequence_output[-1] - disc_logits = self.discriminator_head(disc_sequence_output) + disc_logits = self.discriminator_head( + self.discriminator_projection(disc_sequence_output)) disc_logits = tf.squeeze(disc_logits, axis=-1) outputs = { @@ -181,7 +176,7 @@ class ElectraPretrainer(tf.keras.Model): """Generate corrupted data for discriminator. Args: - inputs: A dict of all inputs, same as the input of call() function + inputs: A dict of all inputs, same as the input of `call()` function mlm_logits: The generator's output logits duplicate: Whether to copy the original inputs dict during modifications @@ -214,6 +209,12 @@ class ElectraPretrainer(tf.keras.Model): 'sampled_tokens': sampled_tokens } + @property + def checkpoint_items(self): + """Returns a dictionary of items to be additionally checkpointed.""" + items = dict(encoder=self.discriminator_network) + return items + def get_config(self): return self._config @@ -226,16 +227,18 @@ def scatter_update(sequence, updates, positions): """Scatter-update a sequence. Args: - sequence: A [batch_size, seq_len] or [batch_size, seq_len, depth] tensor - updates: A tensor of size batch_size*seq_len(*depth) - positions: A [batch_size, n_positions] tensor + sequence: A `[batch_size, seq_len]` or `[batch_size, seq_len, depth]` + tensor. + updates: A tensor of size `batch_size*seq_len(*depth)`. + positions: A `[batch_size, n_positions]` tensor. Returns: - updated_sequence: A [batch_size, seq_len] or [batch_size, seq_len, depth] - tensor of "sequence" with elements at "positions" replaced by the values - at "updates". Updates to index 0 are ignored. If there are duplicated - positions the update is only applied once. - updates_mask: A [batch_size, seq_len] mask tensor of which inputs were + updated_sequence: A `[batch_size, seq_len]` or + `[batch_size, seq_len, depth]` tensor of "sequence" with elements at + "positions" replaced by the values at "updates". Updates to index 0 are + ignored. If there are duplicated positions the update is only + applied once. + updates_mask: A `[batch_size, seq_len]` mask tensor of which inputs were updated. """ shape = tf_utils.get_shape_list(sequence, expected_rank=[2, 3]) @@ -288,14 +291,14 @@ def sample_from_softmax(logits, disallow=None): """Implement softmax sampling using gumbel softmax trick. Args: - logits: A [batch_size, num_token_predictions, vocab_size] tensor indicating - the generator output logits for each masked position. + logits: A `[batch_size, num_token_predictions, vocab_size]` tensor + indicating the generator output logits for each masked position. disallow: If `None`, we directly sample tokens from the logits. Otherwise, - this is a tensor of size [batch_size, num_token_predictions, vocab_size] + this is a tensor of size `[batch_size, num_token_predictions, vocab_size]` indicating the true word id in each masked position. Returns: - sampled_tokens: A [batch_size, num_token_predictions, vocab_size] one hot + sampled_tokens: A `[batch_size, num_token_predictions, vocab_size]` one hot tensor indicating the sampled word id in each masked position. """ if disallow is not None: diff --git a/official/nlp/modeling/models/electra_pretrainer_test.py b/official/nlp/modeling/models/electra_pretrainer_test.py index 29fa4668ab02c03fd4c5b10efd96f889a65ffc32..107cb561b6454ffc78c30bfb4a4dc4da9eceb094 100644 --- a/official/nlp/modeling/models/electra_pretrainer_test.py +++ b/official/nlp/modeling/models/electra_pretrainer_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for ELECTRA pre trainer network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for ELECTRA pre trainer network.""" import tensorflow as tf @@ -35,10 +31,16 @@ class ElectraPretrainerTest(keras_parameterized.TestCase): # Build a transformer network to use within the ELECTRA trainer. vocab_size = 100 sequence_length = 512 - test_generator_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) - test_discriminator_network = networks.TransformerEncoder( - vocab_size=vocab_size, num_layers=2, sequence_length=sequence_length) + test_generator_network = networks.BertEncoder( + vocab_size=vocab_size, + num_layers=2, + max_sequence_length=sequence_length, + dict_outputs=True) + test_discriminator_network = networks.BertEncoder( + vocab_size=vocab_size, + num_layers=2, + max_sequence_length=sequence_length, + dict_outputs=True) # Create a ELECTRA trainer with the created network. num_classes = 3 @@ -48,8 +50,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase): discriminator_network=test_discriminator_network, vocab_size=vocab_size, num_classes=num_classes, - sequence_length=sequence_length, - last_hidden_dim=768, num_token_predictions=num_token_predictions, disallow_correct=True) @@ -89,10 +89,10 @@ class ElectraPretrainerTest(keras_parameterized.TestCase): """Validate that the Keras object can be invoked.""" # Build a transformer network to use within the ELECTRA trainer. (Here, we # use a short sequence_length for convenience.) - test_generator_network = networks.TransformerEncoder( - vocab_size=100, num_layers=4, sequence_length=3) - test_discriminator_network = networks.TransformerEncoder( - vocab_size=100, num_layers=4, sequence_length=3) + test_generator_network = networks.BertEncoder( + vocab_size=100, num_layers=4, max_sequence_length=3, dict_outputs=True) + test_discriminator_network = networks.BertEncoder( + vocab_size=100, num_layers=4, max_sequence_length=3, dict_outputs=True) # Create a ELECTRA trainer with the created network. eletrca_trainer_model = electra_pretrainer.ElectraPretrainer( @@ -101,7 +101,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase): vocab_size=100, num_classes=2, sequence_length=3, - last_hidden_dim=768, num_token_predictions=2) # Create a set of 2-dimensional data tensors to feed into the model. @@ -127,10 +126,10 @@ class ElectraPretrainerTest(keras_parameterized.TestCase): """Validate that the ELECTRA trainer can be serialized and deserialized.""" # Build a transformer network to use within the BERT trainer. (Here, we use # a short sequence_length for convenience.) - test_generator_network = networks.TransformerEncoder( - vocab_size=100, num_layers=4, sequence_length=3) - test_discriminator_network = networks.TransformerEncoder( - vocab_size=100, num_layers=4, sequence_length=3) + test_generator_network = networks.BertEncoder( + vocab_size=100, num_layers=4, max_sequence_length=3) + test_discriminator_network = networks.BertEncoder( + vocab_size=100, num_layers=4, max_sequence_length=3) # Create a ELECTRA trainer with the created network. (Note that all the args # are different, so we can catch any serialization mismatches.) @@ -140,7 +139,6 @@ class ElectraPretrainerTest(keras_parameterized.TestCase): vocab_size=100, num_classes=2, sequence_length=3, - last_hidden_dim=768, num_token_predictions=2) # Create another BERT trainer via serialization and deserialization. diff --git a/official/nlp/modeling/models/seq2seq_transformer.py b/official/nlp/modeling/models/seq2seq_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..15d7b1bedc1dea48429fd7db274aff9ea9a8c833 --- /dev/null +++ b/official/nlp/modeling/models/seq2seq_transformer.py @@ -0,0 +1,589 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implement Seq2Seq Transformer model by TF official NLP library. + +Model paper: https://arxiv.org/pdf/1706.03762.pdf +""" +import math + +import tensorflow as tf +from official.modeling import tf_utils +from official.nlp import keras_nlp +from official.nlp.modeling import layers +from official.nlp.modeling.ops import beam_search + +EOS_ID = 1 + + +@tf.keras.utils.register_keras_serializable(package="Text") +class Seq2SeqTransformer(tf.keras.Model): + """Transformer model with Keras. + + Implemented as described in: https://arxiv.org/pdf/1706.03762.pdf + + The Transformer model consists of an encoder and decoder. The input is an int + sequence (or a batch of sequences). The encoder produces a continuous + representation, and the decoder uses the encoder output to generate + probabilities for the output sequence. + """ + + def __init__(self, + vocab_size=33708, + embedding_width=512, + dropout_rate=0.0, + padded_decode=False, + decode_max_length=None, + extra_decode_length=0, + beam_size=4, + alpha=0.6, + encoder_layer=None, + decoder_layer=None, + eos_id=EOS_ID, + **kwargs): + """Initialize layers to build Transformer model. + + Args: + vocab_size: Size of vocabulary. + embedding_width: Size of hidden layer for embedding. + dropout_rate: Dropout probability. + padded_decode: Whether to max_sequence_length padding is used. If set + False, max_sequence_length padding is not used. + decode_max_length: maximum number of steps to decode a sequence. + extra_decode_length: Beam search will run extra steps to decode. + beam_size: Number of beams for beam search + alpha: The strength of length normalization for beam search. + encoder_layer: An initialized encoder layer. + decoder_layer: An initialized decoder layer. + eos_id: Id of end of sentence token. + **kwargs: other keyword arguments. + """ + super().__init__(**kwargs) + self._vocab_size = vocab_size + self._embedding_width = embedding_width + self._dropout_rate = dropout_rate + self._padded_decode = padded_decode + self._decode_max_length = decode_max_length + self._extra_decode_length = extra_decode_length + self._beam_size = beam_size + self._alpha = alpha + self._eos_id = eos_id + self.embedding_lookup = keras_nlp.layers.OnDeviceEmbedding( + vocab_size=self._vocab_size, + embedding_width=self._embedding_width, + initializer=tf.random_normal_initializer( + mean=0., stddev=self._embedding_width**-0.5), + scale_factor=self._embedding_width**0.5) + self.encoder_layer = encoder_layer + self.decoder_layer = decoder_layer + self.position_embedding = layers.RelativePositionEmbedding( + hidden_size=self._embedding_width) + self.encoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + self.decoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + + def get_config(self): + config = { + "vocab_size": self._vocab_size, + "hidden_size": self._embedding_width, + "dropout_rate": self._dropout_rate, + "padded_decode": self._padded_decode, + "decode_max_length": self._decode_max_length, + "eos_id": self._eos_id, + "extra_decode_length": self._extra_decode_length, + "beam_size": self._beam_size, + "alpha": self._alpha, + "encoder_layer": self.encoder_layer, + "decoder_layer": self.decoder_layer + } + base_config = super(Seq2SeqTransformer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _embedding_linear(self, embedding_matrix, x): + """Uses embeddings as linear transformation weights.""" + batch_size = tf.shape(x)[0] + length = tf.shape(x)[1] + hidden_size = tf.shape(x)[2] + vocab_size = tf.shape(embedding_matrix)[0] + + x = tf.reshape(x, [-1, hidden_size]) + logits = tf.matmul(x, tf.cast(embedding_matrix, x.dtype), transpose_b=True) + + return tf.reshape(logits, [batch_size, length, vocab_size]) + + def call(self, inputs): + """Calculate target logits or inferred target sequences. + + Args: + inputs: a dictionary of tensors. + Feature `inputs`: int tensor with shape `[batch_size, input_length]`. + Feature `targets` (optional): None or int tensor with shape + `[batch_size, target_length]`. + + Returns: + If targets is defined, then return logits for each word in the target + sequence, which is a float tensor with shape + `(batch_size, target_length, vocab_size)`. If target is `None`, then + generate output sequence one token at a time and + returns a dictionary { + outputs: `(batch_size, decoded_length)` + scores: `(batch_size, 1)`} + Even when `float16` is used, the output tensor(s) are always `float32`. + + Raises: + NotImplementedError: If try to use padded decode method on CPU/GPUs. + """ + sources = inputs["inputs"] + targets = inputs.get("targets", None) + # Prepare inputs to the layer stack by adding positional encodings and + # applying dropout. + embedded_inputs = self.embedding_lookup(sources) + embedding_mask = tf.cast(tf.not_equal(sources, 0), embedded_inputs.dtype) + embedded_inputs *= tf.expand_dims(embedding_mask, -1) + # Attention_mask generation. + input_shape = tf_utils.get_shape_list(sources, expected_rank=2) + attention_mask = tf.cast( + tf.reshape( + tf.not_equal(sources, 0), [input_shape[0], 1, input_shape[1]]), + dtype=sources.dtype) + broadcast_ones = tf.ones( + shape=[input_shape[0], input_shape[1], 1], dtype=sources.dtype) + attention_mask = broadcast_ones * attention_mask + + pos_encoding = self.position_embedding(embedded_inputs) + pos_encoding = tf.cast(pos_encoding, embedded_inputs.dtype) + encoder_inputs = embedded_inputs + pos_encoding + + encoder_inputs = self.encoder_dropout(encoder_inputs) + + encoder_outputs = self.encoder_layer( + encoder_inputs, attention_mask=attention_mask) + + if targets is None: + if self._padded_decode: + max_decode_length = self._decode_max_length + else: + max_decode_length = self._decode_max_length or ( + tf.shape(encoder_outputs)[1] + self._extra_decode_length) + symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length) + + batch_size = tf.shape(encoder_outputs)[0] + # Create initial set of IDs that will be passed to symbols_to_logits_fn. + initial_ids = tf.zeros([batch_size], dtype=tf.int32) + + # Create cache storing decoder attention values for each layer. + init_decode_length = (max_decode_length if self._padded_decode else 0) + num_heads = self.decoder_layer.num_attention_heads + dim_per_head = self._embedding_width // num_heads + + # Cache dtype needs to match beam_search dtype. + # pylint: disable=g-complex-comprehension + cache = { + str(layer): { + "key": + tf.zeros( + [batch_size, init_decode_length, num_heads, dim_per_head], + dtype=self.compute_dtype), + "value": + tf.zeros( + [batch_size, init_decode_length, num_heads, dim_per_head], + dtype=self.compute_dtype) + } for layer in range(self.decoder_layer.num_layers) + } + # pylint: enable=g-complex-comprehension + + # Add encoder output and attention bias to the cache. + encoder_outputs = tf.cast(encoder_outputs, dtype=self.compute_dtype) + attention_mask = tf.cast( + tf.reshape( + tf.not_equal(sources, 0), [input_shape[0], 1, input_shape[1]]), + dtype=self.compute_dtype) + cache["encoder_outputs"] = encoder_outputs + cache["encoder_decoder_attention_mask"] = attention_mask + + # Use beam search to find the top beam_size sequences and scores. + decoded_ids, scores = beam_search.sequence_beam_search( + symbols_to_logits_fn=symbols_to_logits_fn, + initial_ids=initial_ids, + initial_cache=cache, + vocab_size=self._vocab_size, + beam_size=self._beam_size, + alpha=self._alpha, + max_decode_length=max_decode_length, + eos_id=self._eos_id, + padded_decode=self._padded_decode, + dtype=self.compute_dtype) + + # Get the top sequence for each batch element + top_decoded_ids = decoded_ids[:, 0, 1:] + top_scores = scores[:, 0] + + return {"outputs": top_decoded_ids, "scores": top_scores} + + decoder_inputs = self.embedding_lookup(targets) + embedding_mask = tf.cast(tf.not_equal(targets, 0), decoder_inputs.dtype) + decoder_inputs *= tf.expand_dims(embedding_mask, -1) + # Shift targets to the right, and remove the last element + decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] + length = tf.shape(decoder_inputs)[1] + pos_encoding = self.position_embedding(decoder_inputs) + pos_encoding = tf.cast(pos_encoding, embedded_inputs.dtype) + decoder_inputs += pos_encoding + + decoder_inputs = self.decoder_dropout(decoder_inputs) + + decoder_shape = tf_utils.get_shape_list(decoder_inputs, expected_rank=3) + batch_size = decoder_shape[0] + decoder_length = decoder_shape[1] + + self_attention_mask = tf.linalg.band_part(tf.ones([length, length]), -1, 0) + self_attention_mask = tf.reshape(self_attention_mask, [1, length, length]) + self_attention_mask = tf.tile(self_attention_mask, [batch_size, 1, 1]) + + attention_mask = tf.cast( + tf.expand_dims(tf.not_equal(sources, 0), axis=1), dtype=sources.dtype) + attention_mask = tf.tile(attention_mask, [1, decoder_length, 1]) + + outputs = self.decoder_layer( + decoder_inputs, + encoder_outputs, + self_attention_mask=self_attention_mask, + cross_attention_mask=attention_mask) + logits = self._embedding_linear(self.embedding_lookup.embeddings, outputs) + # Model outputs should be float32 to avoid numeric issues. + # https://www.tensorflow.org/guide/mixed_precision#building_the_model + logits = tf.cast(logits, tf.float32) + return logits + + def _get_symbols_to_logits_fn(self, max_decode_length): + """Returns a decoding function that calculates logits of the next tokens.""" + timing_signal = self.position_embedding( + inputs=None, length=max_decode_length + 1) + timing_signal = tf.cast(timing_signal, dtype=self.compute_dtype) + decoder_self_attention_mask = tf.linalg.band_part( + tf.ones([max_decode_length, max_decode_length], + dtype=self.compute_dtype), -1, 0) + decoder_self_attention_mask = tf.reshape( + decoder_self_attention_mask, [1, max_decode_length, max_decode_length]) + + def symbols_to_logits_fn(ids, i, cache): + """Generate logits for next potential IDs. + + Args: + ids: Current decoded sequences. int tensor with shape `(batch_size * + beam_size, i + 1)`. + i: Loop index. + cache: Dictionary of values storing the encoder output, encoder-decoder + attention bias, and previous decoder attention values. + + Returns: + Tuple of + (logits with shape `(batch_size * beam_size, vocab_size)`, + updated cache values) + """ + # Set decoder input to the last generated IDs + decoder_input = ids[:, -1:] + + # Preprocess decoder input by getting embeddings and adding timing signal. + # decoder_input = self.embedding_softmax_layer(decoder_input) + source_decoder_input = decoder_input + decoder_input = self.embedding_lookup(decoder_input) + embedding_mask = tf.cast( + tf.not_equal(source_decoder_input, 0), decoder_input.dtype) + decoder_input *= tf.expand_dims(embedding_mask, -1) + decoder_input += timing_signal[i] + if self._padded_decode: + # indexing does not work on TPU. + bias_shape = decoder_self_attention_mask.shape.as_list() + self_attention_mask = tf.slice(decoder_self_attention_mask, [0, i, 0], + [bias_shape[0], 1, bias_shape[2]]) + else: + self_attention_mask = decoder_self_attention_mask[:, i:i + 1, :i + 1] + decoder_shape = tf_utils.get_shape_list(decoder_input, expected_rank=3) + batch_size = decoder_shape[0] + decoder_length = decoder_shape[1] + + self_attention_mask = tf.tile(self_attention_mask, [batch_size, 1, 1]) + attention_mask = cache.get("encoder_decoder_attention_mask") + attention_mask = tf.tile(attention_mask, [1, decoder_length, 1]) + + decoder_outputs = self.decoder_layer( + decoder_input, + cache.get("encoder_outputs"), + self_attention_mask=self_attention_mask, + cross_attention_mask=attention_mask, + cache=cache, + decode_loop_step=i if self._padded_decode else None) + + decoder_outputs = tf.cast(decoder_outputs, dtype=self.compute_dtype) + logits = self._embedding_linear(self.embedding_lookup.embeddings, + decoder_outputs) + logits = tf.squeeze(logits, axis=[1]) + return logits, cache + + return symbols_to_logits_fn + + +class TransformerEncoder(tf.keras.layers.Layer): + """Transformer encoder. + + Transformer encoder is made up of N identical layers. Each layer is composed + of the sublayers: + 1. Self-attention layer + 2. Feedforward network (which is 2 fully-connected layers) + """ + + def __init__(self, + num_layers=6, + num_attention_heads=8, + intermediate_size=2048, + activation="relu", + dropout_rate=0.0, + attention_dropout_rate=0.0, + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + intermediate_dropout=0.0, + **kwargs): + """Initialize a Transformer encoder. + + Args: + num_layers: Number of layers. + num_attention_heads: Number of attention heads. + intermediate_size: Size of the intermediate (Feedforward) layer. + activation: Activation for the intermediate layer. + dropout_rate: Dropout probability. + attention_dropout_rate: Dropout probability for attention layers. + use_bias: Whether to enable use_bias in attention layer. If set False, + use_bias in attention layer is disabled. + norm_first: Whether to normalize inputs to attention and intermediate + dense layers. If set False, output of attention and intermediate dense + layers is normalized. + norm_epsilon: Epsilon value to initialize normalization layers. + intermediate_dropout: Dropout probability for intermediate_dropout_layer. + **kwargs: key word arguemnts passed to tf.keras.layers.Layer. + """ + + super(TransformerEncoder, self).__init__(**kwargs) + self.num_layers = num_layers + self.num_attention_heads = num_attention_heads + self._intermediate_size = intermediate_size + self._activation = activation + self._dropout_rate = dropout_rate + self._attention_dropout_rate = attention_dropout_rate + self._use_bias = use_bias + self._norm_first = norm_first + self._norm_epsilon = norm_epsilon + self._intermediate_dropout = intermediate_dropout + + def build(self, input_shape): + """Implements build() for the layer.""" + self.encoder_layers = [] + for i in range(self.num_layers): + self.encoder_layers.append( + keras_nlp.layers.TransformerEncoderBlock( + num_attention_heads=self.num_attention_heads, + inner_dim=self._intermediate_size, + inner_activation=self._activation, + output_dropout=self._dropout_rate, + attention_dropout=self._attention_dropout_rate, + use_bias=self._use_bias, + norm_first=self._norm_first, + norm_epsilon=self._norm_epsilon, + inner_dropout=self._intermediate_dropout, + attention_initializer=attention_initializer(input_shape[2]), + name=("layer_%d" % i))) + self.output_normalization = tf.keras.layers.LayerNormalization( + epsilon=self._norm_epsilon, dtype="float32") + super(TransformerEncoder, self).build(input_shape) + + def get_config(self): + config = { + "num_layers": self.num_layers, + "num_attention_heads": self.num_attention_heads, + "intermediate_size": self._intermediate_size, + "activation": self._activation, + "dropout_rate": self._dropout_rate, + "attention_dropout_rate": self._attention_dropout_rate, + "use_bias": self._use_bias, + "norm_first": self._norm_first, + "norm_epsilon": self._norm_epsilon, + "intermediate_dropout": self._intermediate_dropout + } + base_config = super(TransformerEncoder, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, encoder_inputs, attention_mask=None): + """Return the output of the encoder. + + Args: + encoder_inputs: A tensor with shape `(batch_size, input_length, + hidden_size)`. + attention_mask: A mask for the encoder self-attention layer with shape + `(batch_size, input_length, input_length)`. + + Returns: + Output of encoder which is a `float32` tensor with shape + `(batch_size, input_length, hidden_size)`. + """ + for layer_idx in range(self.num_layers): + encoder_inputs = self.encoder_layers[layer_idx]( + [encoder_inputs, attention_mask]) + + output_tensor = encoder_inputs + output_tensor = self.output_normalization(output_tensor) + + return output_tensor + + +class TransformerDecoder(tf.keras.layers.Layer): + """Transformer decoder. + + Like the encoder, the decoder is made up of N identical layers. + Each layer is composed of the sublayers: + 1. Self-attention layer + 2. Multi-headed attention layer combining encoder outputs with results from + the previous self-attention layer. + 3. Feedforward network (2 fully-connected layers) + """ + + def __init__(self, + num_layers=6, + num_attention_heads=8, + intermediate_size=2048, + activation="relu", + dropout_rate=0.0, + attention_dropout_rate=0.0, + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + intermediate_dropout=0.0, + **kwargs): + """Initialize a Transformer decoder. + + Args: + num_layers: Number of layers. + num_attention_heads: Number of attention heads. + intermediate_size: Size of the intermediate (Feedforward) layer. + activation: Activation for the intermediate layer. + dropout_rate: Dropout probability. + attention_dropout_rate: Dropout probability for attention layers. + use_bias: Whether to enable use_bias in attention layer. If set `False`, + use_bias in attention layer is disabled. + norm_first: Whether to normalize inputs to attention and intermediate + dense layers. If set `False`, output of attention and intermediate dense + layers is normalized. + norm_epsilon: Epsilon value to initialize normalization layers. + intermediate_dropout: Dropout probability for intermediate_dropout_layer. + **kwargs: key word arguemnts passed to tf.keras.layers.Layer. + """ + super(TransformerDecoder, self).__init__(**kwargs) + self.num_layers = num_layers + self.num_attention_heads = num_attention_heads + self._intermediate_size = intermediate_size + self._activation = activation + self._dropout_rate = dropout_rate + self._attention_dropout_rate = attention_dropout_rate + self._use_bias = use_bias + self._norm_first = norm_first + self._norm_epsilon = norm_epsilon + self._intermediate_dropout = intermediate_dropout + + def build(self, input_shape): + """Implements build() for the layer.""" + self.decoder_layers = [] + for i in range(self.num_layers): + self.decoder_layers.append( + layers.TransformerDecoderBlock( + num_attention_heads=self.num_attention_heads, + intermediate_size=self._intermediate_size, + intermediate_activation=self._activation, + dropout_rate=self._dropout_rate, + attention_dropout_rate=self._attention_dropout_rate, + use_bias=self._use_bias, + norm_first=self._norm_first, + norm_epsilon=self._norm_epsilon, + intermediate_dropout=self._intermediate_dropout, + attention_initializer=attention_initializer(input_shape[2]), + name=("layer_%d" % i))) + self.output_normalization = tf.keras.layers.LayerNormalization( + epsilon=1e-6, dtype="float32") + super(TransformerDecoder, self).build(input_shape) + + def get_config(self): + config = { + "num_layers": self.num_layers, + "num_attention_heads": self.num_attention_heads, + "intermediate_size": self._intermediate_size, + "activation": self._activation, + "dropout_rate": self._dropout_rate, + "attention_dropout_rate": self._attention_dropout_rate, + "use_bias": self._use_bias, + "norm_first": self._norm_first, + "norm_epsilon": self._norm_epsilon, + "intermediate_dropout": self._intermediate_dropout + } + base_config = super(TransformerDecoder, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, + target, + memory, + self_attention_mask=None, + cross_attention_mask=None, + cache=None, + decode_loop_step=None): + """Return the output of the decoder layer stacks. + + Args: + target: A tensor with shape `(batch_size, target_length, hidden_size)`. + memory: A tensor with shape `(batch_size, input_length, hidden_size)`. + self_attention_mask: A tensor with shape `(batch_size, target_len, + target_length)`, the mask for decoder self-attention layer. + cross_attention_mask: A tensor with shape `(batch_size, target_length, + input_length)` which is the mask for encoder-decoder attention layer. + cache: (Used for fast decoding) A nested dictionary storing previous + decoder self-attention values. The items are: + {layer_n: {"k": A tensor with shape `(batch_size, i, key_channels)`, + "v": A tensor with shape `(batch_size, i, value_channels)`}, + ...} + decode_loop_step: An integer, the step number of the decoding loop. Used + only for autoregressive inference on TPU. + + Returns: + Output of decoder. + float32 tensor with shape `(batch_size, target_length, hidden_size`). + """ + + output_tensor = target + for layer_idx in range(self.num_layers): + transformer_inputs = [ + output_tensor, memory, cross_attention_mask, self_attention_mask + ] + # Gets the cache for decoding. + if cache is None: + output_tensor, _ = self.decoder_layers[layer_idx](transformer_inputs) + else: + cache_layer_idx = str(layer_idx) + output_tensor, cache[cache_layer_idx] = self.decoder_layers[layer_idx]( + transformer_inputs, + cache=cache[cache_layer_idx], + decode_loop_step=decode_loop_step) + return self.output_normalization(output_tensor) + + +def attention_initializer(hidden_size): + """Initializer for attention layers in Seq2SeqTransformer.""" + hidden_size = int(hidden_size) + limit = math.sqrt(6.0 / (hidden_size + hidden_size)) + return tf.keras.initializers.RandomUniform(minval=-limit, maxval=limit) diff --git a/official/nlp/modeling/models/seq2seq_transformer_test.py b/official/nlp/modeling/models/seq2seq_transformer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7a3cc79d956f2b38d2eadb8a36a9ed8e9b6ed027 --- /dev/null +++ b/official/nlp/modeling/models/seq2seq_transformer_test.py @@ -0,0 +1,126 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Transformer model.""" + +from absl import logging +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.nlp.modeling.models import seq2seq_transformer + + +class Seq2SeqTransformerTest(tf.test.TestCase, parameterized.TestCase): + + def _build_model(self, padded_decode, decode_max_length): + num_layers = 1 + num_attention_heads = 2 + intermediate_size = 32 + vocab_size = 100 + embedding_width = 16 + encdec_kwargs = dict( + num_layers=num_layers, + num_attention_heads=num_attention_heads, + intermediate_size=intermediate_size, + activation="relu", + dropout_rate=0.01, + attention_dropout_rate=0.01, + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + intermediate_dropout=0.01) + encoder_layer = seq2seq_transformer.TransformerEncoder(**encdec_kwargs) + decoder_layer = seq2seq_transformer.TransformerDecoder(**encdec_kwargs) + + return seq2seq_transformer.Seq2SeqTransformer( + vocab_size=vocab_size, + embedding_width=embedding_width, + dropout_rate=0.01, + padded_decode=padded_decode, + decode_max_length=decode_max_length, + beam_size=4, + alpha=0.6, + encoder_layer=encoder_layer, + decoder_layer=decoder_layer) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + ], + mode="eager")) + def test_create_model_with_ds(self, distribution): + with distribution.scope(): + padded_decode = isinstance( + distribution, + (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy)) + decode_max_length = 10 + batch_size = 4 + model = self._build_model(padded_decode, decode_max_length) + + @tf.function + def step(inputs): + + def _step_fn(inputs): + return model(inputs) + + outputs = distribution.run(_step_fn, args=(inputs,)) + return tf.nest.map_structure(distribution.experimental_local_results, + outputs) + + fake_inputs = dict( + inputs=np.zeros((batch_size, decode_max_length), dtype=np.int32)) + local_outputs = step(fake_inputs) + logging.info("local_outputs=%s", local_outputs) + self.assertEqual(local_outputs["outputs"][0].shape, (4, 10)) + + fake_inputs = dict( + inputs=np.zeros((batch_size, decode_max_length), dtype=np.int32), + targets=np.zeros((batch_size, 8), dtype=np.int32)) + local_outputs = step(fake_inputs) + logging.info("local_outputs=%s", local_outputs) + self.assertEqual(local_outputs[0].shape, (4, 8, 100)) + + @parameterized.parameters(True, False) + def test_create_savedmodel(self, padded_decode): + decode_max_length = 10 + model = self._build_model(padded_decode, decode_max_length) + + class SaveModule(tf.Module): + + def __init__(self, model): + super(SaveModule, self).__init__() + self.model = model + + @tf.function + def serve(self, inputs): + return self.model.call(dict(inputs=inputs)) + + save_module = SaveModule(model) + if padded_decode: + tensor_shape = (4, 10) + else: + tensor_shape = (None, None) + signatures = dict( + serving_default=save_module.serve.get_concrete_function( + tf.TensorSpec(shape=tensor_shape, dtype=tf.int32, name="inputs"))) + tf.saved_model.save(save_module, self.get_temp_dir(), signatures=signatures) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/modeling/models/xlnet.py b/official/nlp/modeling/models/xlnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4b5a54e7b8fc1059bbd4b50cf9ed0eea7926c2a1 --- /dev/null +++ b/official/nlp/modeling/models/xlnet.py @@ -0,0 +1,342 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""XLNet models.""" +# pylint: disable=g-classes-have-attributes + +from typing import Any, Mapping, Union + +import tensorflow as tf + +from official.nlp.modeling import layers +from official.nlp.modeling import networks + + +class XLNetMaskedLM(tf.keras.layers.Layer): + """XLNet pretraining head.""" + + def __init__(self, + vocab_size: int, + hidden_size: int, + initializer: str = 'glorot_uniform', + activation: str = 'gelu', + name=None, + **kwargs): + super().__init__(name=name, **kwargs) + self._vocab_size = vocab_size + self._hidden_size = hidden_size + self._initializer = initializer + self._activation = activation + + def build(self, input_shape): + self.dense = tf.keras.layers.Dense( + units=self._hidden_size, + activation=self._activation, + kernel_initializer=self._initializer, + name='transform/dense') + self.layer_norm = tf.keras.layers.LayerNormalization( + axis=-1, epsilon=1e-12, name='transform/LayerNorm') + self.bias = self.add_weight( + 'output_bias/bias', + shape=(self._vocab_size,), + initializer='zeros', + trainable=True) + super().build(input_shape) + + def call(self, + sequence_data: tf.Tensor, + embedding_table: tf.Tensor): + lm_data = self.dense(sequence_data) + lm_data = self.layer_norm(lm_data) + lm_data = tf.matmul(lm_data, embedding_table, transpose_b=True) + logits = tf.nn.bias_add(lm_data, self.bias) + return logits + + def get_config(self) -> Mapping[str, Any]: + config = { + 'vocab_size': + self._vocab_size, + 'hidden_size': + self._hidden_size, + 'initializer': + self._initializer + } + base_config = super(XLNetMaskedLM, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +@tf.keras.utils.register_keras_serializable(package='Text') +class XLNetPretrainer(tf.keras.Model): + """XLNet-based pretrainer. + + This is an implementation of the network structure surrounding a + Transformer-XL encoder as described in "XLNet: Generalized Autoregressive + Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237). + + Args: + network: An XLNet/Transformer-XL based network. This network should output a + sequence output and list of `state` tensors. + mlm_activation: The activation (if any) to use in the Masked LM network. If + None, then no activation will be used. + mlm_initializer: The initializer (if any) to use in the masked LM. Defaults + to a Glorot uniform initializer. + + """ + + def __init__( + self, + network: Union[tf.keras.layers.Layer, tf.keras.Model], + mlm_activation=None, + mlm_initializer='glorot_uniform', + name: str = None, + **kwargs): + super().__init__(name=name, **kwargs) + self._config = { + 'network': network, + 'mlm_activation': mlm_activation, + 'mlm_initializer': mlm_initializer, + } + self._network = network + self._hidden_size = network.get_config()['hidden_size'] + self._vocab_size = network.get_config()['vocab_size'] + self._activation = mlm_activation + self._initializer = mlm_initializer + self._masked_lm = XLNetMaskedLM( + vocab_size=self._vocab_size, + hidden_size=self._hidden_size, + initializer=self._initializer) + + def call(self, inputs: Mapping[str, Any]): + input_word_ids = inputs['input_word_ids'] + input_type_ids = inputs['input_type_ids'] + masked_tokens = inputs['masked_tokens'] + permutation_mask = inputs['permutation_mask'] + target_mapping = inputs['target_mapping'] + state = inputs.get('state', None) + + attention_output, state = self._network( + input_ids=input_word_ids, + segment_ids=input_type_ids, + input_mask=None, + state=state, + permutation_mask=permutation_mask, + target_mapping=target_mapping, + masked_tokens=masked_tokens) + + embedding_table = self._network.get_embedding_lookup_table() + mlm_outputs = self._masked_lm( + sequence_data=attention_output, + embedding_table=embedding_table) + return mlm_outputs, state + + def get_config(self) -> Mapping[str, Any]: + return self._config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def checkpoint_items(self): + return dict(encoder=self._network) + + +@tf.keras.utils.register_keras_serializable(package='Text') +class XLNetClassifier(tf.keras.Model): + """Classifier model based on XLNet. + + This is an implementation of the network structure surrounding a + Transformer-XL encoder as described in "XLNet: Generalized Autoregressive + Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237). + + Note: This model does not use utilize the memory mechanism used in the + original XLNet Classifier. + + Args: + network: An XLNet/Transformer-XL based network. This network should output a + sequence output and list of `state` tensors. + num_classes: Number of classes to predict from the classification network. + initializer: The initializer (if any) to use in the classification networks. + Defaults to a RandomNormal initializer. + summary_type: Method used to summarize a sequence into a compact vector. + dropout_rate: The dropout probability of the cls head. + """ + + def __init__( + self, + network: Union[tf.keras.layers.Layer, tf.keras.Model], + num_classes: int, + initializer: tf.keras.initializers.Initializer = 'random_normal', + summary_type: str = 'last', + dropout_rate: float = 0.1, + **kwargs): + super().__init__(**kwargs) + self._network = network + self._initializer = initializer + self._summary_type = summary_type + self._num_classes = num_classes + self._config = { + 'network': network, + 'initializer': initializer, + 'num_classes': num_classes, + 'summary_type': summary_type, + 'dropout_rate': dropout_rate, + } + + if summary_type == 'last': + cls_token_idx = -1 + elif summary_type == 'first': + cls_token_idx = 0 + else: + raise ValueError('Invalid summary type provided: %s.' % summary_type) + + self.classifier = layers.ClassificationHead( + inner_dim=network.get_config()['hidden_size'], + num_classes=num_classes, + initializer=initializer, + dropout_rate=dropout_rate, + cls_token_idx=cls_token_idx, + name='sentence_prediction') + + def call(self, inputs: Mapping[str, Any]): + input_ids = inputs['input_word_ids'] + segment_ids = inputs['input_type_ids'] + input_mask = tf.cast(inputs['input_mask'], tf.float32) + state = inputs.get('mems', None) + + attention_output, _ = self._network( + input_ids=input_ids, + segment_ids=segment_ids, + input_mask=input_mask, + state=state) + + logits = self.classifier(attention_output) + + return logits + + def get_config(self): + return self._config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def checkpoint_items(self): + items = dict(encoder=self._network) + if hasattr(self.classifier, 'checkpoint_items'): + for key, item in self.classifier.checkpoint_items.items(): + items['.'.join([self.classifier.name, key])] = item + return items + + +@tf.keras.utils.register_keras_serializable(package='Text') +class XLNetSpanLabeler(tf.keras.Model): + """Span labeler model based on XLNet. + + This is an implementation of the network structure surrounding a + Transformer-XL encoder as described in "XLNet: Generalized Autoregressive + Pretraining for Language Understanding" (https://arxiv.org/abs/1906.08237). + + Args: + network: A transformer network. This network should output a sequence output + and a classification output. Furthermore, it should expose its embedding + table via a "get_embedding_table" method. + start_n_top: Beam size for span start. + end_n_top: Beam size for span end. + dropout_rate: The dropout rate for the span labeling layer. + span_labeling_activation: The activation for the span labeling head. + initializer: The initializer (if any) to use in the span labeling network. + Defaults to a Glorot uniform initializer. + """ + + def __init__( + self, + network: Union[tf.keras.layers.Layer, tf.keras.Model], + start_n_top: int = 5, + end_n_top: int = 5, + dropout_rate: float = 0.1, + span_labeling_activation: tf.keras.initializers.Initializer = 'tanh', + initializer: tf.keras.initializers.Initializer = 'glorot_uniform', + **kwargs): + super().__init__(**kwargs) + self._config = { + 'network': network, + 'start_n_top': start_n_top, + 'end_n_top': end_n_top, + 'dropout_rate': dropout_rate, + 'span_labeling_activation': span_labeling_activation, + 'initializer': initializer, + } + network_config = network.get_config() + try: + input_width = network_config['inner_size'] + self._xlnet_base = True + except KeyError: + # BertEncoder uses 'intermediate_size' due to legacy naming. + input_width = network_config['intermediate_size'] + self._xlnet_base = False + + self._network = network + self._initializer = initializer + self._start_n_top = start_n_top + self._end_n_top = end_n_top + self._dropout_rate = dropout_rate + self._activation = span_labeling_activation + self.span_labeling = networks.XLNetSpanLabeling( + input_width=input_width, + start_n_top=self._start_n_top, + end_n_top=self._end_n_top, + activation=self._activation, + dropout_rate=self._dropout_rate, + initializer=self._initializer) + + def call(self, inputs: Mapping[str, Any]): + input_word_ids = inputs['input_word_ids'] + input_type_ids = inputs['input_type_ids'] + input_mask = inputs['input_mask'] + class_index = inputs['class_index'] + paragraph_mask = inputs['paragraph_mask'] + start_positions = inputs.get('start_positions', None) + + if self._xlnet_base: + attention_output, _ = self._network( + input_ids=input_word_ids, + segment_ids=input_type_ids, + input_mask=input_mask) + else: + network_output_dict = self._network(dict( + input_word_ids=input_word_ids, + input_type_ids=input_type_ids, + input_mask=input_mask)) + attention_output = network_output_dict['sequence_output'] + + outputs = self.span_labeling( + sequence_data=attention_output, + class_index=class_index, + paragraph_mask=paragraph_mask, + start_positions=start_positions) + return outputs + + @property + def checkpoint_items(self): + return dict(encoder=self._network) + + def get_config(self): + return self._config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + diff --git a/official/nlp/modeling/models/xlnet_test.py b/official/nlp/modeling/models/xlnet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..74480a48d9b029fdd7f27f543d490e1f7854bf7f --- /dev/null +++ b/official/nlp/modeling/models/xlnet_test.py @@ -0,0 +1,328 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for XLNet classifier network.""" + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling import networks +from official.nlp.modeling.models import xlnet + + +def _get_xlnet_base() -> tf.keras.layers.Layer: + """Returns a trivial base XLNet model.""" + return networks.XLNetBase( + vocab_size=100, + num_layers=2, + hidden_size=4, + num_attention_heads=2, + head_size=2, + inner_size=2, + dropout_rate=0., + attention_dropout_rate=0., + attention_type='bi', + bi_data=True, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + two_stream=False, + tie_attention_biases=True, + reuse_length=0, + inner_activation='relu') + + +# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It +# guarantees forward compatibility of this code for the V2 switchover. +@keras_parameterized.run_all_keras_modes +class XLNetMaskedLMTest(keras_parameterized.TestCase): + + def test_xlnet_masked_lm_head(self): + hidden_size = 10 + seq_length = 8 + batch_size = 2 + masked_lm = xlnet.XLNetMaskedLM(vocab_size=10, + hidden_size=hidden_size, + initializer='glorot_uniform') + sequence_data = np.random.uniform(size=(batch_size, seq_length)) + embedding_table = np.random.uniform(size=(hidden_size, hidden_size)) + mlm_output = masked_lm(sequence_data, embedding_table) + self.assertAllClose(mlm_output.shape, (batch_size, hidden_size)) + + +@keras_parameterized.run_all_keras_modes +class XLNetPretrainerTest(keras_parameterized.TestCase): + + def test_xlnet_trainer(self): + """Validates that the Keras object can be created.""" + seq_length = 4 + num_predictions = 2 + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetPretrainer(network=xlnet_base) + inputs = dict( + input_word_ids=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_word_ids'), + input_type_ids=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_type_ids'), + input_mask=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_mask'), + permutation_mask=tf.keras.layers.Input( + shape=(seq_length, seq_length,), dtype=tf.int32, + name='permutation_mask'), + target_mapping=tf.keras.layers.Input( + shape=(num_predictions, seq_length), dtype=tf.int32, + name='target_mapping'), + masked_tokens=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='masked_tokens')) + logits, _ = xlnet_trainer_model(inputs) + + # [None, hidden_size, vocab_size] + expected_output_shape = [None, 4, 100] + self.assertAllEqual(expected_output_shape, logits.shape.as_list()) + + def test_xlnet_tensor_call(self): + """Validates that the Keras object can be invoked.""" + seq_length = 4 + batch_size = 2 + num_predictions = 2 + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetPretrainer(network=xlnet_base) + + sequence_shape = (batch_size, seq_length) + inputs = dict( + input_word_ids=np.random.randint( + 10, size=sequence_shape, dtype='int32'), + input_type_ids=np.random.randint(2, size=sequence_shape, dtype='int32'), + input_mask=np.random.randint(2, size=sequence_shape).astype('int32'), + permutation_mask=np.random.randint( + 2, size=(batch_size, seq_length, seq_length)).astype('int32'), + target_mapping=np.random.randint( + 10, size=(num_predictions, seq_length), dtype='int32'), + masked_tokens=np.random.randint( + 10, size=sequence_shape, dtype='int32')) + xlnet_trainer_model(inputs) + + def test_serialize_deserialize(self): + """Validates that the XLNet trainer can be serialized and deserialized.""" + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetPretrainer( + network=xlnet_base, + mlm_activation='gelu', + mlm_initializer='random_normal') + + # Create another XLNet trainer via serialization and deserialization. + config = xlnet_trainer_model.get_config() + new_xlnet_trainer_model = xlnet.XLNetPretrainer.from_config( + config) + + # Validate that the config can be forced to JSON. + _ = new_xlnet_trainer_model.to_json() + + # If serialization was successful, then the new config should match the old. + self.assertAllEqual(xlnet_trainer_model.get_config(), + new_xlnet_trainer_model.get_config()) + + +@keras_parameterized.run_all_keras_modes +class XLNetClassifierTest(keras_parameterized.TestCase): + + def test_xlnet_trainer(self): + """Validate that the Keras object can be created.""" + num_classes = 2 + seq_length = 4 + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetClassifier( + network=xlnet_base, + num_classes=num_classes, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + summary_type='last', + dropout_rate=0.1) + inputs = dict( + input_word_ids=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_word_ids'), + input_type_ids=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_type_ids'), + input_mask=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_mask'), + permutation_mask=tf.keras.layers.Input( + shape=(seq_length, seq_length,), dtype=tf.int32, + name='permutation_mask'), + masked_tokens=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='masked_tokens')) + logits = xlnet_trainer_model(inputs) + + expected_classification_shape = [None, num_classes] + self.assertAllEqual(expected_classification_shape, logits.shape.as_list()) + + @parameterized.parameters(1, 2) + def test_xlnet_tensor_call(self, num_classes): + """Validates that the Keras object can be invoked.""" + seq_length = 4 + batch_size = 2 + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetClassifier( + network=xlnet_base, + num_classes=num_classes, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + summary_type='last', + dropout_rate=0.1) + + sequence_shape = (batch_size, seq_length) + inputs = dict( + input_word_ids=np.random.randint( + 10, size=sequence_shape, dtype='int32'), + input_type_ids=np.random.randint(2, size=sequence_shape, dtype='int32'), + input_mask=np.random.randint(2, size=sequence_shape).astype('int32'), + permutation_mask=np.random.randint( + 2, size=(batch_size, seq_length, seq_length)).astype('int32'), + masked_tokens=np.random.randint( + 10, size=sequence_shape, dtype='int32')) + xlnet_trainer_model(inputs) + + def test_serialize_deserialize(self): + """Validates that the XLNet trainer can be serialized and deserialized.""" + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetClassifier( + network=xlnet_base, + num_classes=2, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + summary_type='last', + dropout_rate=0.1) + + # Create another XLNet trainer via serialization and deserialization. + config = xlnet_trainer_model.get_config() + new_xlnet_trainer_model = xlnet.XLNetClassifier.from_config( + config) + + # Validate that the config can be forced to JSON. + _ = new_xlnet_trainer_model.to_json() + + # If serialization was successful, then the new config should match the old. + self.assertAllEqual(xlnet_trainer_model.get_config(), + new_xlnet_trainer_model.get_config()) + + +@keras_parameterized.run_all_keras_modes +class XLNetSpanLabelerTest(keras_parameterized.TestCase): + + def test_xlnet_trainer(self): + """Validate that the Keras object can be created.""" + top_n = 2 + seq_length = 4 + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetSpanLabeler( + network=xlnet_base, + start_n_top=top_n, + end_n_top=top_n, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + span_labeling_activation='tanh', + dropout_rate=0.1) + inputs = dict( + input_word_ids=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_word_ids'), + input_type_ids=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_type_ids'), + input_mask=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='input_mask'), + paragraph_mask=tf.keras.layers.Input( + shape=(seq_length,), dtype=tf.int32, name='paragraph_mask'), + class_index=tf.keras.layers.Input( + shape=(), dtype=tf.int32, name='class_index'), + start_positions=tf.keras.layers.Input( + shape=(), dtype=tf.int32, name='start_positions')) + outputs = xlnet_trainer_model(inputs) + self.assertIsInstance(outputs, dict) + + # Test tensor value calls for the created model. + batch_size = 2 + sequence_shape = (batch_size, seq_length) + inputs = dict( + input_word_ids=np.random.randint( + 10, size=sequence_shape, dtype='int32'), + input_type_ids=np.random.randint(2, size=sequence_shape, dtype='int32'), + input_mask=np.random.randint(2, size=sequence_shape).astype('int32'), + paragraph_mask=np.random.randint( + 1, size=(sequence_shape)).astype('int32'), + class_index=np.random.randint(1, size=(batch_size)).astype('uint8'), + start_positions=tf.random.uniform( + shape=(batch_size,), maxval=5, dtype=tf.int32)) + + common_keys = { + 'start_logits', 'end_logits', 'start_predictions', 'end_predictions', + 'class_logits', + } + inference_keys = { + 'start_top_predictions', 'end_top_predictions', 'start_top_index', + 'end_top_index', + } + + outputs = xlnet_trainer_model(inputs) + self.assertSetEqual(common_keys | inference_keys, set(outputs.keys())) + + outputs = xlnet_trainer_model(inputs, training=True) + self.assertIsInstance(outputs, dict) + self.assertSetEqual(common_keys, set(outputs.keys())) + self.assertIsInstance(outputs, dict) + + def test_serialize_deserialize(self): + """Validates that the XLNet trainer can be serialized and deserialized.""" + # Build a simple XLNet based network to use with the XLNet trainer. + xlnet_base = _get_xlnet_base() + + # Create an XLNet trainer with the created network. + xlnet_trainer_model = xlnet.XLNetSpanLabeler( + network=xlnet_base, + start_n_top=2, + end_n_top=2, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + span_labeling_activation='tanh', + dropout_rate=0.1) + + # Create another XLNet trainer via serialization and deserialization. + config = xlnet_trainer_model.get_config() + new_xlnet_trainer_model = xlnet.XLNetSpanLabeler.from_config( + config) + + # Validate that the config can be forced to JSON. + _ = new_xlnet_trainer_model.to_json() + + # If serialization was successful, then the new config should match the old. + self.assertAllEqual(xlnet_trainer_model.get_config(), + new_xlnet_trainer_model.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/networks/README.md b/official/nlp/modeling/networks/README.md index 42347373edc1e2999019c7259dda78bc58138ef2..b192399a7276ef122725f40d2b0e3d237805e644 100644 --- a/official/nlp/modeling/networks/README.md +++ b/official/nlp/modeling/networks/README.md @@ -1,27 +1,39 @@ # Networks -Networks are combinations of layers (and possibly other networks). They are sub-units of models that would not be trained alone. It -encapsulates common network structures like a classification head -or a transformer encoder into an easily handled object with a -standardized configuration. +Networks are combinations of `tf.keras` layers (and possibly other networks). +They are `tf.keras` models that would not be trained alone. It encapsulates +common network structures like a transformer encoder into an easily +handled object with a standardized configuration. -* [`TransformerEncoder`](transformer_encoder.py) implements a bi-directional +* [`BertEncoder`](bert_encoder.py) implements a bi-directional Transformer-based encoder as described in ["BERT: Pre-training of Deep -Bidirectional Transformers for Language Understanding"](https://arxiv.org/abs/1810.04805). It includes the embedding lookups, -transformer layers and pooling layer. +Bidirectional Transformers for Language Understanding"](https://arxiv.org/abs/1810.04805). +It includes the embedding lookups, transformer layers and pooling layer. -* [`AlbertTransformerEncoder`](albert_transformer_encoder.py) implements a +* [`AlbertEncoder`](albert_encoder.py) implements a Transformer-encoder described in the paper ["ALBERT: A Lite BERT for -Self-supervised Learning of Language Representations] -(https://arxiv.org/abs/1909.11942). Compared with [BERT](https://arxiv.org/abs/1810.04805), ALBERT refactorizes embedding parameters -into two smaller matrices and shares parameters across layers. +Self-supervised Learning of Language Representations"] +(https://arxiv.org/abs/1909.11942). Compared with [BERT](https://arxiv.org/abs/1810.04805), +ALBERT refactorizes embedding parameters into two smaller matrices and shares +parameters across layers. + +* [`MobileBERTEncoder`](mobile_bert_encoder.py) implements the +MobileBERT network described in the paper ["MobileBERT: a Compact Task-Agnostic +BERT for Resource-Limited Devices"](https://arxiv.org/abs/2004.02984). * [`Classification`](classification.py) contains a single hidden layer, and is intended for use as a classification or regression (if number of classes is set to 1) head. -* [`TokenClassification`](token_classification.py) contains a single hidden -layer, and is intended for use as a token classification head. +* [`PackedSequenceEmbedding`](packed_sequence_embedding.py) implements an +embedding network that supports packed sequences and position ids. -* [`SpanLabeling`](span_labeling.py) implements a single-span labeler (that is, a prediction head that can predict one start and end index per batch item) based on a single dense hidden layer. It can be used in the SQuAD task. +* [`SpanLabeling`](span_labeling.py) implements a single-span labeler +(that is, a prediction head that can predict one start and end index per batch +item) based on a single dense hidden layer. It can be used in the SQuAD task. +* [`XLNetBase`](xlnet_base.py) implements the base network used in "XLNet: +Generalized Autoregressive Pretraining for Language Understanding" +(https://arxiv.org/abs/1906.08237). It includes embedding lookups, +relative position encodings, mask computations, segment matrix computations and +Transformer XL layers using one or two stream relative self-attention. diff --git a/official/nlp/modeling/networks/__init__.py b/official/nlp/modeling/networks/__init__.py index b8443e9f9303326a82212ef3da4e3057218522bb..e33256831429ade7b51b71b59c2fcfe2869280ef 100644 --- a/official/nlp/modeling/networks/__init__.py +++ b/official/nlp/modeling/networks/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,11 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Networks package definition.""" -from official.nlp.modeling.networks.albert_transformer_encoder import AlbertTransformerEncoder + +"""Networks are combinations of `tf.keras` layers (and possibly other networks). + +They are `tf.keras` models that would not be trained alone. It encapsulates +common network structures like a transformer encoder into an easily +handled object with a standardized configuration. +""" +from official.nlp.modeling.networks.albert_encoder import AlbertEncoder +from official.nlp.modeling.networks.bert_encoder import BertEncoder from official.nlp.modeling.networks.classification import Classification from official.nlp.modeling.networks.encoder_scaffold import EncoderScaffold +from official.nlp.modeling.networks.mobile_bert_encoder import MobileBERTEncoder +from official.nlp.modeling.networks.packed_sequence_embedding import PackedSequenceEmbedding from official.nlp.modeling.networks.span_labeling import SpanLabeling -from official.nlp.modeling.networks.token_classification import TokenClassification -from official.nlp.modeling.networks.transformer_encoder import TransformerEncoder +from official.nlp.modeling.networks.span_labeling import XLNetSpanLabeling +from official.nlp.modeling.networks.xlnet_base import XLNetBase +# Backward compatibility. The modules are deprecated. +TransformerEncoder = BertEncoder diff --git a/official/nlp/modeling/networks/albert_encoder.py b/official/nlp/modeling/networks/albert_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..479a75d8132bb3c5920c7646621694ce395d4833 --- /dev/null +++ b/official/nlp/modeling/networks/albert_encoder.py @@ -0,0 +1,211 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ALBERT (https://arxiv.org/abs/1810.04805) text encoder network.""" +# pylint: disable=g-classes-have-attributes +import collections +import tensorflow as tf + +from official.modeling import activations +from official.nlp import keras_nlp +from official.nlp.modeling import layers + + +@tf.keras.utils.register_keras_serializable(package='Text') +class AlbertEncoder(tf.keras.Model): + """ALBERT (https://arxiv.org/abs/1810.04805) text encoder network. + + This network implements the encoder described in the paper "ALBERT: A Lite + BERT for Self-supervised Learning of Language Representations" + (https://arxiv.org/abs/1909.11942). + + Compared with BERT (https://arxiv.org/abs/1810.04805), ALBERT refactorizes + embedding parameters into two smaller matrices and shares parameters + across layers. + + The default values for this object are taken from the ALBERT-Base + implementation described in the paper. + + *Note* that the network is constructed by Keras Functional API. + + Args: + vocab_size: The size of the token vocabulary. + embedding_width: The width of the word embeddings. If the embedding width is + not equal to hidden size, embedding parameters will be factorized into two + matrices in the shape of `(vocab_size, embedding_width)` and + `(embedding_width, hidden_size)`, where `embedding_width` is usually much + smaller than `hidden_size`. + hidden_size: The size of the transformer hidden layers. + num_layers: The number of transformer layers. + num_attention_heads: The number of attention heads for each transformer. The + hidden size must be divisible by the number of attention heads. + max_sequence_length: The maximum sequence length that this encoder can + consume. If None, max_sequence_length uses the value from sequence length. + This determines the variable shape for positional embeddings. + type_vocab_size: The number of types that the 'type_ids' input can take. + intermediate_size: The intermediate size for the transformer layers. + activation: The activation to use for the transformer layers. + dropout_rate: The dropout rate to use for the transformer layers. + attention_dropout_rate: The dropout rate to use for the attention layers + within the transformer layers. + initializer: The initialzer to use for all weights in this encoder. + dict_outputs: Whether to use a dictionary as the model outputs. + """ + + def __init__(self, + vocab_size, + embedding_width=128, + hidden_size=768, + num_layers=12, + num_attention_heads=12, + max_sequence_length=512, + type_vocab_size=16, + intermediate_size=3072, + activation=activations.gelu, + dropout_rate=0.1, + attention_dropout_rate=0.1, + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + dict_outputs=False, + **kwargs): + activation = tf.keras.activations.get(activation) + initializer = tf.keras.initializers.get(initializer) + + word_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_word_ids') + mask = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_mask') + type_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_type_ids') + + if embedding_width is None: + embedding_width = hidden_size + embedding_layer = layers.OnDeviceEmbedding( + vocab_size=vocab_size, + embedding_width=embedding_width, + initializer=initializer, + name='word_embeddings') + word_embeddings = embedding_layer(word_ids) + + # Always uses dynamic slicing for simplicity. + position_embedding_layer = keras_nlp.layers.PositionEmbedding( + initializer=initializer, + max_length=max_sequence_length, + name='position_embedding') + position_embeddings = position_embedding_layer(word_embeddings) + + type_embeddings = ( + layers.OnDeviceEmbedding( + vocab_size=type_vocab_size, + embedding_width=embedding_width, + initializer=initializer, + use_one_hot=True, + name='type_embeddings')(type_ids)) + + embeddings = tf.keras.layers.Add()( + [word_embeddings, position_embeddings, type_embeddings]) + embeddings = ( + tf.keras.layers.LayerNormalization( + name='embeddings/layer_norm', + axis=-1, + epsilon=1e-12, + dtype=tf.float32)(embeddings)) + embeddings = (tf.keras.layers.Dropout(rate=dropout_rate)(embeddings)) + # We project the 'embedding' output to 'hidden_size' if it is not already + # 'hidden_size'. + if embedding_width != hidden_size: + embeddings = tf.keras.layers.experimental.EinsumDense( + '...x,xy->...y', + output_shape=hidden_size, + bias_axes='y', + kernel_initializer=initializer, + name='embedding_projection')( + embeddings) + + data = embeddings + attention_mask = keras_nlp.layers.SelfAttentionMask()(data, mask) + shared_layer = keras_nlp.layers.TransformerEncoderBlock( + num_attention_heads=num_attention_heads, + inner_dim=intermediate_size, + inner_activation=activation, + output_dropout=dropout_rate, + attention_dropout=attention_dropout_rate, + kernel_initializer=initializer, + name='transformer') + encoder_outputs = [] + for _ in range(num_layers): + data = shared_layer([data, attention_mask]) + encoder_outputs.append(data) + + # Applying a tf.slice op (through subscript notation) to a Keras tensor + # like this will create a SliceOpLambda layer. This is better than a Lambda + # layer with Python code, because that is fundamentally less portable. + first_token_tensor = data[:, 0, :] + cls_output = tf.keras.layers.Dense( + units=hidden_size, + activation='tanh', + kernel_initializer=initializer, + name='pooler_transform')( + first_token_tensor) + if dict_outputs: + outputs = dict( + sequence_output=data, + encoder_outputs=encoder_outputs, + pooled_output=cls_output, + ) + else: + outputs = [data, cls_output] + + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. + super(AlbertEncoder, self).__init__( + inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs) + config_dict = { + 'vocab_size': vocab_size, + 'embedding_width': embedding_width, + 'hidden_size': hidden_size, + 'num_layers': num_layers, + 'num_attention_heads': num_attention_heads, + 'max_sequence_length': max_sequence_length, + 'type_vocab_size': type_vocab_size, + 'intermediate_size': intermediate_size, + 'activation': tf.keras.activations.serialize(activation), + 'dropout_rate': dropout_rate, + 'attention_dropout_rate': attention_dropout_rate, + 'initializer': tf.keras.initializers.serialize(initializer), + } + + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + self._embedding_layer = embedding_layer + self._position_embedding_layer = position_embedding_layer + + def get_embedding_table(self): + return self._embedding_layer.embeddings + + def get_config(self): + return dict(self._config._asdict()) + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/nlp/modeling/networks/albert_encoder_test.py b/official/nlp/modeling/networks/albert_encoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f3cb60c36f9938397a55d17eef00b19cedfdd819 --- /dev/null +++ b/official/nlp/modeling/networks/albert_encoder_test.py @@ -0,0 +1,188 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ALBERT transformer-based text encoder network.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling.networks import albert_encoder + + +# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It +# guarantees forward compatibility of this code for the V2 switchover. +@keras_parameterized.run_all_keras_modes +class AlbertEncoderTest(keras_parameterized.TestCase): + + def tearDown(self): + super(AlbertEncoderTest, self).tearDown() + tf.keras.mixed_precision.set_global_policy("float32") + + @parameterized.named_parameters( + dict(testcase_name="default", expected_dtype=tf.float32), + dict(testcase_name="with_float16_dtype", expected_dtype=tf.float16), + ) + def test_network_creation(self, expected_dtype): + hidden_size = 32 + sequence_length = 21 + + kwargs = dict( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3) + if expected_dtype == tf.float16: + tf.keras.mixed_precision.set_global_policy("mixed_float16") + + # Create a small TransformerEncoder for testing. + test_network = albert_encoder.AlbertEncoder(**kwargs) + + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + data, pooled = test_network([word_ids, mask, type_ids]) + + expected_data_shape = [None, sequence_length, hidden_size] + expected_pooled_shape = [None, hidden_size] + self.assertAllEqual(expected_data_shape, data.shape.as_list()) + self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) + + # If float_dtype is set to float16, the data output is float32 (from a layer + # norm) and pool output should be float16. + self.assertEqual(tf.float32, data.dtype) + self.assertEqual(expected_dtype, pooled.dtype) + + # ALBERT has additonal 'embedding_hidden_mapping_in' weights and + # it shares transformer weights. + self.assertNotEmpty( + [x for x in test_network.weights if "embedding_projection/" in x.name]) + self.assertNotEmpty( + [x for x in test_network.weights if "transformer/" in x.name]) + self.assertEmpty( + [x for x in test_network.weights if "transformer/layer" in x.name]) + + def test_network_invocation(self): + hidden_size = 32 + sequence_length = 21 + vocab_size = 57 + num_types = 7 + num_layers = 3 + # Create a small TransformerEncoder for testing. + test_network = albert_encoder.AlbertEncoder( + vocab_size=vocab_size, + embedding_width=8, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=num_layers, + type_vocab_size=num_types) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + data, pooled = test_network([word_ids, mask, type_ids]) + + # Create a model based off of this network: + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + + # Invoke the model. We can't validate the output data here (the model is too + # complex) but this will catch structural runtime errors. + batch_size = 3 + word_id_data = np.random.randint( + vocab_size, size=(batch_size, sequence_length)) + mask_data = np.random.randint(2, size=(batch_size, sequence_length)) + type_id_data = np.random.randint( + num_types, size=(batch_size, sequence_length)) + list_outputs = model.predict([word_id_data, mask_data, type_id_data]) + + # Creates a TransformerEncoder with max_sequence_length != sequence_length + max_sequence_length = 128 + test_network = albert_encoder.AlbertEncoder( + vocab_size=vocab_size, + embedding_width=8, + hidden_size=hidden_size, + max_sequence_length=max_sequence_length, + num_attention_heads=2, + num_layers=num_layers, + type_vocab_size=num_types) + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + _ = model.predict([word_id_data, mask_data, type_id_data]) + + # Tests dictionary outputs. + test_network_dict = albert_encoder.AlbertEncoder( + vocab_size=vocab_size, + embedding_width=8, + hidden_size=hidden_size, + max_sequence_length=max_sequence_length, + num_attention_heads=2, + num_layers=num_layers, + type_vocab_size=num_types, + dict_outputs=True) + _ = test_network_dict([word_ids, mask, type_ids]) + test_network_dict.set_weights(test_network.get_weights()) + list_outputs = test_network([word_id_data, mask_data, type_id_data]) + dict_outputs = test_network_dict( + dict( + input_word_ids=word_id_data, + input_mask=mask_data, + input_type_ids=type_id_data)) + self.assertAllEqual(list_outputs[0], dict_outputs["sequence_output"]) + self.assertAllEqual(list_outputs[1], dict_outputs["pooled_output"]) + self.assertLen(dict_outputs["pooled_output"], num_layers) + + def test_serialize_deserialize(self): + tf.keras.mixed_precision.set_global_policy("mixed_float16") + # Create a network object that sets all of its config options. + kwargs = dict( + vocab_size=100, + embedding_width=8, + hidden_size=32, + num_layers=3, + num_attention_heads=2, + max_sequence_length=21, + type_vocab_size=12, + intermediate_size=1223, + activation="relu", + dropout_rate=0.05, + attention_dropout_rate=0.22, + initializer="glorot_uniform") + network = albert_encoder.AlbertEncoder(**kwargs) + + expected_config = dict(kwargs) + expected_config["activation"] = tf.keras.activations.serialize( + tf.keras.activations.get(expected_config["activation"])) + expected_config["initializer"] = tf.keras.initializers.serialize( + tf.keras.initializers.get(expected_config["initializer"])) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = ( + albert_encoder.AlbertEncoder.from_config( + network.get_config())) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/modeling/networks/albert_transformer_encoder.py b/official/nlp/modeling/networks/albert_transformer_encoder.py deleted file mode 100644 index 398fb00c18c7341765beec50e9b0e6ecaee46e5c..0000000000000000000000000000000000000000 --- a/official/nlp/modeling/networks/albert_transformer_encoder.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""ALBERT (https://arxiv.org/abs/1810.04805) text encoder network.""" -# pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import tensorflow as tf - -from official.modeling import activations -from official.nlp.modeling import layers - - -@tf.keras.utils.register_keras_serializable(package='Text') -class AlbertTransformerEncoder(tf.keras.Model): - """ALBERT (https://arxiv.org/abs/1810.04805) text encoder network. - - This network implements the encoder described in the paper "ALBERT: A Lite - BERT for Self-supervised Learning of Language Representations" - (https://arxiv.org/abs/1909.11942). - - Compared with BERT (https://arxiv.org/abs/1810.04805), ALBERT refactorizes - embedding parameters into two smaller matrices and shares parameters - across layers. - - The default values for this object are taken from the ALBERT-Base - implementation described in the paper. - - Arguments: - vocab_size: The size of the token vocabulary. - embedding_width: The width of the word embeddings. If the embedding width is - not equal to hidden size, embedding parameters will be factorized into two - matrices in the shape of ['vocab_size', 'embedding_width'] and - ['embedding_width', 'hidden_size'] ('embedding_width' is usually much - smaller than 'hidden_size'). - hidden_size: The size of the transformer hidden layers. - num_layers: The number of transformer layers. - num_attention_heads: The number of attention heads for each transformer. The - hidden size must be divisible by the number of attention heads. - sequence_length: The sequence length that this encoder expects. If None, the - sequence length is dynamic; if an integer, the encoder will require - sequences padded to this length. - max_sequence_length: The maximum sequence length that this encoder can - consume. If None, max_sequence_length uses the value from sequence length. - This determines the variable shape for positional embeddings. - type_vocab_size: The number of types that the 'type_ids' input can take. - intermediate_size: The intermediate size for the transformer layers. - activation: The activation to use for the transformer layers. - dropout_rate: The dropout rate to use for the transformer layers. - attention_dropout_rate: The dropout rate to use for the attention layers - within the transformer layers. - initializer: The initialzer to use for all weights in this encoder. - """ - - def __init__(self, - vocab_size, - embedding_width=128, - hidden_size=768, - num_layers=12, - num_attention_heads=12, - sequence_length=512, - max_sequence_length=None, - type_vocab_size=16, - intermediate_size=3072, - activation=activations.gelu, - dropout_rate=0.1, - attention_dropout_rate=0.1, - initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), - **kwargs): - activation = tf.keras.activations.get(activation) - initializer = tf.keras.initializers.get(initializer) - - if not max_sequence_length: - max_sequence_length = sequence_length - self._self_setattr_tracking = False - self._config_dict = { - 'vocab_size': vocab_size, - 'embedding_width': embedding_width, - 'hidden_size': hidden_size, - 'num_layers': num_layers, - 'num_attention_heads': num_attention_heads, - 'sequence_length': sequence_length, - 'max_sequence_length': max_sequence_length, - 'type_vocab_size': type_vocab_size, - 'intermediate_size': intermediate_size, - 'activation': tf.keras.activations.serialize(activation), - 'dropout_rate': dropout_rate, - 'attention_dropout_rate': attention_dropout_rate, - 'initializer': tf.keras.initializers.serialize(initializer), - } - - word_ids = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name='input_word_ids') - mask = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name='input_mask') - type_ids = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name='input_type_ids') - - if embedding_width is None: - embedding_width = hidden_size - self._embedding_layer = layers.OnDeviceEmbedding( - vocab_size=vocab_size, - embedding_width=embedding_width, - initializer=initializer, - name='word_embeddings') - word_embeddings = self._embedding_layer(word_ids) - - # Always uses dynamic slicing for simplicity. - self._position_embedding_layer = layers.PositionEmbedding( - initializer=initializer, - use_dynamic_slicing=True, - max_sequence_length=max_sequence_length, - name='position_embedding') - position_embeddings = self._position_embedding_layer(word_embeddings) - - type_embeddings = ( - layers.OnDeviceEmbedding( - vocab_size=type_vocab_size, - embedding_width=embedding_width, - initializer=initializer, - use_one_hot=True, - name='type_embeddings')(type_ids)) - - embeddings = tf.keras.layers.Add()( - [word_embeddings, position_embeddings, type_embeddings]) - embeddings = ( - tf.keras.layers.LayerNormalization( - name='embeddings/layer_norm', - axis=-1, - epsilon=1e-12, - dtype=tf.float32)(embeddings)) - embeddings = (tf.keras.layers.Dropout(rate=dropout_rate)(embeddings)) - # We project the 'embedding' output to 'hidden_size' if it is not already - # 'hidden_size'. - if embedding_width != hidden_size: - embeddings = tf.keras.layers.experimental.EinsumDense( - '...x,xy->...y', - output_shape=hidden_size, - bias_axes='y', - kernel_initializer=initializer, - name='embedding_projection')( - embeddings) - - data = embeddings - attention_mask = layers.SelfAttentionMask()([data, mask]) - shared_layer = layers.Transformer( - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - intermediate_activation=activation, - dropout_rate=dropout_rate, - attention_dropout_rate=attention_dropout_rate, - kernel_initializer=initializer, - name='transformer') - for _ in range(num_layers): - data = shared_layer([data, attention_mask]) - - first_token_tensor = ( - tf.keras.layers.Lambda(lambda x: tf.squeeze(x[:, 0:1, :], axis=1))(data) - ) - cls_output = tf.keras.layers.Dense( - units=hidden_size, - activation='tanh', - kernel_initializer=initializer, - name='pooler_transform')( - first_token_tensor) - - super(AlbertTransformerEncoder, self).__init__( - inputs=[word_ids, mask, type_ids], outputs=[data, cls_output], **kwargs) - - def get_embedding_table(self): - return self._embedding_layer.embeddings - - def get_config(self): - return self._config_dict - - @classmethod - def from_config(cls, config): - return cls(**config) diff --git a/official/nlp/modeling/networks/albert_transformer_encoder_test.py b/official/nlp/modeling/networks/albert_transformer_encoder_test.py deleted file mode 100644 index 44368e494ae04dd9b92c63987e6881aabd8ff4c2..0000000000000000000000000000000000000000 --- a/official/nlp/modeling/networks/albert_transformer_encoder_test.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for ALBERT transformer-based text encoder network.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.nlp.modeling.networks import albert_transformer_encoder - - -# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It -# guarantees forward compatibility of this code for the V2 switchover. -@keras_parameterized.run_all_keras_modes -class AlbertTransformerEncoderTest(keras_parameterized.TestCase): - - def tearDown(self): - super(AlbertTransformerEncoderTest, self).tearDown() - tf.keras.mixed_precision.experimental.set_policy("float32") - - @parameterized.named_parameters( - dict(testcase_name="default", expected_dtype=tf.float32), - dict( - testcase_name="with_float16_dtype", - expected_dtype=tf.float16), - ) - def test_network_creation(self, expected_dtype): - hidden_size = 32 - sequence_length = 21 - - kwargs = dict( - vocab_size=100, - hidden_size=hidden_size, - sequence_length=sequence_length, - num_attention_heads=2, - num_layers=3) - if expected_dtype == tf.float16: - tf.keras.mixed_precision.experimental.set_policy("mixed_float16") - - # Create a small TransformerEncoder for testing. - test_network = albert_transformer_encoder.AlbertTransformerEncoder(**kwargs) - - # Create the inputs (note that the first dimension is implicit). - word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - data, pooled = test_network([word_ids, mask, type_ids]) - - expected_data_shape = [None, sequence_length, hidden_size] - expected_pooled_shape = [None, hidden_size] - self.assertAllEqual(expected_data_shape, data.shape.as_list()) - self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) - - # If float_dtype is set to float16, the data output is float32 (from a layer - # norm) and pool output should be float16. - self.assertEqual(tf.float32, data.dtype) - self.assertEqual(expected_dtype, pooled.dtype) - - # ALBERT has additonal 'embedding_hidden_mapping_in' weights and - # it shares transformer weights. - self.assertNotEmpty( - [x for x in test_network.weights if "embedding_projection/" in x.name]) - self.assertNotEmpty( - [x for x in test_network.weights if "transformer/" in x.name]) - self.assertEmpty( - [x for x in test_network.weights if "transformer/layer" in x.name]) - - def test_network_invocation(self): - hidden_size = 32 - sequence_length = 21 - vocab_size = 57 - num_types = 7 - # Create a small TransformerEncoder for testing. - test_network = albert_transformer_encoder.AlbertTransformerEncoder( - vocab_size=vocab_size, - embedding_width=8, - hidden_size=hidden_size, - sequence_length=sequence_length, - num_attention_heads=2, - num_layers=3, - type_vocab_size=num_types) - self.assertTrue( - test_network._position_embedding_layer._use_dynamic_slicing) - # Create the inputs (note that the first dimension is implicit). - word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - data, pooled = test_network([word_ids, mask, type_ids]) - - # Create a model based off of this network: - model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) - - # Invoke the model. We can't validate the output data here (the model is too - # complex) but this will catch structural runtime errors. - batch_size = 3 - word_id_data = np.random.randint( - vocab_size, size=(batch_size, sequence_length)) - mask_data = np.random.randint(2, size=(batch_size, sequence_length)) - type_id_data = np.random.randint( - num_types, size=(batch_size, sequence_length)) - _ = model.predict([word_id_data, mask_data, type_id_data]) - - # Creates a TransformerEncoder with max_sequence_length != sequence_length - max_sequence_length = 128 - test_network = albert_transformer_encoder.AlbertTransformerEncoder( - vocab_size=vocab_size, - embedding_width=8, - hidden_size=hidden_size, - sequence_length=sequence_length, - max_sequence_length=max_sequence_length, - num_attention_heads=2, - num_layers=3, - type_vocab_size=num_types) - self.assertTrue(test_network._position_embedding_layer._use_dynamic_slicing) - model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) - _ = model.predict([word_id_data, mask_data, type_id_data]) - - def test_serialize_deserialize(self): - tf.keras.mixed_precision.experimental.set_policy("mixed_float16") - # Create a network object that sets all of its config options. - kwargs = dict( - vocab_size=100, - embedding_width=8, - hidden_size=32, - num_layers=3, - num_attention_heads=2, - sequence_length=21, - max_sequence_length=21, - type_vocab_size=12, - intermediate_size=1223, - activation="relu", - dropout_rate=0.05, - attention_dropout_rate=0.22, - initializer="glorot_uniform") - network = albert_transformer_encoder.AlbertTransformerEncoder(**kwargs) - - expected_config = dict(kwargs) - expected_config["activation"] = tf.keras.activations.serialize( - tf.keras.activations.get(expected_config["activation"])) - expected_config["initializer"] = tf.keras.initializers.serialize( - tf.keras.initializers.get(expected_config["initializer"])) - self.assertEqual(network.get_config(), expected_config) - - # Create another network object from the first object's config. - new_network = ( - albert_transformer_encoder.AlbertTransformerEncoder.from_config( - network.get_config())) - - # Validate that the config can be forced to JSON. - _ = new_network.to_json() - - # If the serialization was successful, the new config should match the old. - self.assertAllEqual(network.get_config(), new_network.get_config()) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/nlp/modeling/networks/bert_encoder.py b/official/nlp/modeling/networks/bert_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..496e1d46e1970192f6fe4d47a82fb62854b36883 --- /dev/null +++ b/official/nlp/modeling/networks/bert_encoder.py @@ -0,0 +1,150 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Transformer-based text encoder network.""" +# pylint: disable=g-classes-have-attributes +import collections +import tensorflow as tf + +from official.modeling import activations +from official.nlp import keras_nlp + + +# This class is being replaced by keras_nlp.encoders.BertEncoder and merely +# acts as a wrapper if you need: 1) list outputs instead of dict outputs, +# 2) shared embedding layer. +@tf.keras.utils.register_keras_serializable(package='Text') +class BertEncoder(keras_nlp.encoders.BertEncoder): + """Bi-directional Transformer-based encoder network. + + This network implements a bi-directional Transformer-based encoder as + described in "BERT: Pre-training of Deep Bidirectional Transformers for + Language Understanding" (https://arxiv.org/abs/1810.04805). It includes the + embedding lookups and transformer layers, but not the masked language model + or classification task networks. + + The default values for this object are taken from the BERT-Base implementation + in "BERT: Pre-training of Deep Bidirectional Transformers for Language + Understanding". + + *Note* that the network is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: + vocab_size: The size of the token vocabulary. + hidden_size: The size of the transformer hidden layers. + num_layers: The number of transformer layers. + num_attention_heads: The number of attention heads for each transformer. The + hidden size must be divisible by the number of attention heads. + sequence_length: [Deprecated]. TODO(hongkuny): remove this argument once no + user is using it. + max_sequence_length: The maximum sequence length that this encoder can + consume. If None, max_sequence_length uses the value from sequence length. + This determines the variable shape for positional embeddings. + type_vocab_size: The number of types that the 'type_ids' input can take. + intermediate_size: The intermediate size for the transformer layers. + activation: The activation to use for the transformer layers. + dropout_rate: The dropout rate to use for the transformer layers. + attention_dropout_rate: The dropout rate to use for the attention layers + within the transformer layers. + initializer: The initialzer to use for all weights in this encoder. + return_all_encoder_outputs: Whether to output sequence embedding outputs of + all encoder transformer layers. Note: when the following `dict_outputs` + argument is True, all encoder outputs are always returned in the dict, + keyed by `encoder_outputs`. + output_range: The sequence output range, [0, output_range), by slicing the + target sequence of the last transformer layer. `None` means the entire + target sequence will attend to the source sequence, which yields the full + output. + embedding_width: The width of the word embeddings. If the embedding width is + not equal to hidden size, embedding parameters will be factorized into two + matrices in the shape of `(vocab_size, embedding_width)` and + `(embedding_width, hidden_size)`, where `embedding_width` is usually much + smaller than `hidden_size`. + embedding_layer: The word embedding layer. `None` means we will create a new + embedding layer. Otherwise, we will reuse the given embedding layer. This + parameter is originally added for ELECTRA model which needs to tie the + generator embeddings with the discriminator embeddings. + dict_outputs: Whether to use a dictionary as the model outputs. + """ + + def __init__(self, + vocab_size, + hidden_size=768, + num_layers=12, + num_attention_heads=12, + sequence_length=None, + max_sequence_length=512, + type_vocab_size=16, + intermediate_size=3072, + activation=activations.gelu, + dropout_rate=0.1, + attention_dropout_rate=0.1, + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + return_all_encoder_outputs=False, + output_range=None, + embedding_width=None, + embedding_layer=None, + dict_outputs=False, + **kwargs): + + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. + super(BertEncoder, self).__init__( + vocab_size=vocab_size, + hidden_size=hidden_size, + num_layers=num_layers, + num_attention_heads=num_attention_heads, + max_sequence_length=max_sequence_length, + type_vocab_size=type_vocab_size, + inner_dim=intermediate_size, + inner_activation=activation, + output_dropout=dropout_rate, + attention_dropout=attention_dropout_rate, + initializer=initializer, + output_range=output_range, + embedding_width=embedding_width, + embedding_layer=embedding_layer) + + self._embedding_layer_instance = embedding_layer + + # Replace arguments from keras_nlp.encoders.BertEncoder. + config_dict = self._config._asdict() + config_dict['activation'] = config_dict.pop('inner_activation') + config_dict['intermediate_size'] = config_dict.pop('inner_dim') + config_dict['dropout_rate'] = config_dict.pop('output_dropout') + config_dict['attention_dropout_rate'] = config_dict.pop('attention_dropout') + config_dict['dict_outputs'] = dict_outputs + config_dict['return_all_encoder_outputs'] = return_all_encoder_outputs + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + + if dict_outputs: + return + else: + nested_output = self._nested_outputs + cls_output = nested_output['pooled_output'] + if return_all_encoder_outputs: + encoder_outputs = nested_output['encoder_outputs'] + outputs = [encoder_outputs, cls_output] + else: + sequence_output = nested_output['sequence_output'] + outputs = [sequence_output, cls_output] + super(keras_nlp.encoders.BertEncoder, self).__init__( + inputs=self.inputs, outputs=outputs, **kwargs) diff --git a/official/nlp/modeling/networks/bert_encoder_test.py b/official/nlp/modeling/networks/bert_encoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c7dd11155d21ce3b4ca63d6fd7954779b0682ab3 --- /dev/null +++ b/official/nlp/modeling/networks/bert_encoder_test.py @@ -0,0 +1,254 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for transformer-based bert encoder network.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling.networks import bert_encoder + + +# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It +# guarantees forward compatibility of this code for the V2 switchover. +@keras_parameterized.run_all_keras_modes +class BertEncoderTest(keras_parameterized.TestCase): + + def tearDown(self): + super(BertEncoderTest, self).tearDown() + tf.keras.mixed_precision.set_global_policy("float32") + + def test_network_creation(self): + hidden_size = 32 + sequence_length = 21 + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + data, pooled = test_network([word_ids, mask, type_ids]) + + self.assertIsInstance(test_network.transformer_layers, list) + self.assertLen(test_network.transformer_layers, 3) + self.assertIsInstance(test_network.pooler_layer, tf.keras.layers.Dense) + + expected_data_shape = [None, sequence_length, hidden_size] + expected_pooled_shape = [None, hidden_size] + self.assertAllEqual(expected_data_shape, data.shape.as_list()) + self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) + + # The default output dtype is float32. + self.assertAllEqual(tf.float32, data.dtype) + self.assertAllEqual(tf.float32, pooled.dtype) + + test_network_dict = bert_encoder.BertEncoder( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3, + dict_outputs=True) + # Create the inputs (note that the first dimension is implicit). + inputs = dict( + input_word_ids=word_ids, input_mask=mask, input_type_ids=type_ids) + _ = test_network_dict(inputs) + + test_network_dict.set_weights(test_network.get_weights()) + batch_size = 2 + vocab_size = 100 + num_types = 2 + word_id_data = np.random.randint( + vocab_size, size=(batch_size, sequence_length)) + mask_data = np.random.randint(2, size=(batch_size, sequence_length)) + type_id_data = np.random.randint( + num_types, size=(batch_size, sequence_length)) + list_outputs = test_network([word_id_data, mask_data, type_id_data]) + dict_outputs = test_network_dict( + dict( + input_word_ids=word_id_data, + input_mask=mask_data, + input_type_ids=type_id_data)) + self.assertAllEqual(list_outputs[0], dict_outputs["sequence_output"]) + self.assertAllEqual(list_outputs[1], dict_outputs["pooled_output"]) + + def test_all_encoder_outputs_network_creation(self): + hidden_size = 32 + sequence_length = 21 + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3, + return_all_encoder_outputs=True) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + all_encoder_outputs, pooled = test_network([word_ids, mask, type_ids]) + + expected_data_shape = [None, sequence_length, hidden_size] + expected_pooled_shape = [None, hidden_size] + self.assertLen(all_encoder_outputs, 3) + for data in all_encoder_outputs: + self.assertAllEqual(expected_data_shape, data.shape.as_list()) + self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) + + # The default output dtype is float32. + self.assertAllEqual(tf.float32, all_encoder_outputs[-1].dtype) + self.assertAllEqual(tf.float32, pooled.dtype) + + def test_network_creation_with_float16_dtype(self): + hidden_size = 32 + sequence_length = 21 + tf.keras.mixed_precision.set_global_policy("mixed_float16") + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=100, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + data, pooled = test_network([word_ids, mask, type_ids]) + + expected_data_shape = [None, sequence_length, hidden_size] + expected_pooled_shape = [None, hidden_size] + self.assertAllEqual(expected_data_shape, data.shape.as_list()) + self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) + + # If float_dtype is set to float16, the data output is float32 (from a layer + # norm) and pool output should be float16. + self.assertAllEqual(tf.float32, data.dtype) + self.assertAllEqual(tf.float16, pooled.dtype) + + @parameterized.named_parameters( + ("all_sequence", None, 21), + ("output_range", 1, 1), + ) + def test_network_invocation(self, output_range, out_seq_len): + hidden_size = 32 + sequence_length = 21 + vocab_size = 57 + num_types = 7 + # Create a small BertEncoder for testing. + test_network = bert_encoder.BertEncoder( + vocab_size=vocab_size, + hidden_size=hidden_size, + num_attention_heads=2, + num_layers=3, + type_vocab_size=num_types, + output_range=output_range) + # Create the inputs (note that the first dimension is implicit). + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + data, pooled = test_network([word_ids, mask, type_ids]) + + # Create a model based off of this network: + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + + # Invoke the model. We can't validate the output data here (the model is too + # complex) but this will catch structural runtime errors. + batch_size = 3 + word_id_data = np.random.randint( + vocab_size, size=(batch_size, sequence_length)) + mask_data = np.random.randint(2, size=(batch_size, sequence_length)) + type_id_data = np.random.randint( + num_types, size=(batch_size, sequence_length)) + outputs = model.predict([word_id_data, mask_data, type_id_data]) + self.assertEqual(outputs[0].shape[1], out_seq_len) + + # Creates a BertEncoder with max_sequence_length != sequence_length + max_sequence_length = 128 + test_network = bert_encoder.BertEncoder( + vocab_size=vocab_size, + hidden_size=hidden_size, + max_sequence_length=max_sequence_length, + num_attention_heads=2, + num_layers=3, + type_vocab_size=num_types) + data, pooled = test_network([word_ids, mask, type_ids]) + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + outputs = model.predict([word_id_data, mask_data, type_id_data]) + self.assertEqual(outputs[0].shape[1], sequence_length) + + # Creates a BertEncoder with embedding_width != hidden_size + test_network = bert_encoder.BertEncoder( + vocab_size=vocab_size, + hidden_size=hidden_size, + max_sequence_length=max_sequence_length, + num_attention_heads=2, + num_layers=3, + type_vocab_size=num_types, + embedding_width=16) + data, pooled = test_network([word_ids, mask, type_ids]) + model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + outputs = model.predict([word_id_data, mask_data, type_id_data]) + self.assertEqual(outputs[0].shape[-1], hidden_size) + self.assertTrue(hasattr(test_network, "_embedding_projection")) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + vocab_size=100, + hidden_size=32, + num_layers=3, + num_attention_heads=2, + max_sequence_length=21, + type_vocab_size=12, + intermediate_size=1223, + activation="relu", + dropout_rate=0.05, + attention_dropout_rate=0.22, + initializer="glorot_uniform", + return_all_encoder_outputs=False, + output_range=-1, + embedding_width=16, + dict_outputs=True, + embedding_layer=None) + network = bert_encoder.BertEncoder(**kwargs) + expected_config = dict(kwargs) + expected_config["activation"] = tf.keras.activations.serialize( + tf.keras.activations.get(expected_config["activation"])) + expected_config["initializer"] = tf.keras.initializers.serialize( + tf.keras.initializers.get(expected_config["initializer"])) + + self.assertEqual(network.get_config(), expected_config) + # Create another network object from the first object's config. + new_network = bert_encoder.BertEncoder.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + # Tests model saving/loading. + model_path = self.get_temp_dir() + "/model" + network.save(model_path) + _ = tf.keras.models.load_model(model_path) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/modeling/networks/classification.py b/official/nlp/modeling/networks/classification.py index fc326136cd18593bc5e06dd2f68a1e0da17a1409..79a8a1b013774e04f4fc6a75a2735258b287b0a6 100644 --- a/official/nlp/modeling/networks/classification.py +++ b/official/nlp/modeling/networks/classification.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,14 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Classification and regression network.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - +import collections import tensorflow as tf @@ -29,15 +25,18 @@ class Classification(tf.keras.Model): This network implements a simple classifier head based on a dense layer. If num_classes is one, it can be considered as a regression problem. - Arguments: + *Note* that the network is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: input_width: The innermost dimension of the input tensor to this network. num_classes: The number of classes that this network should classify to. If equal to 1, a regression problem is assumed. activation: The activation, if any, for the dense layer in this network. - initializer: The intializer for the dense layer in this network. Defaults to - a Glorot uniform initializer. - output: The output style for this network. Can be either 'logits' or - 'predictions'. + initializer: The initializer for the dense layer in this network. Defaults + to a Glorot uniform initializer. + output: The output style for this network. Can be either `logits` or + `predictions`. """ def __init__(self, @@ -46,35 +45,27 @@ class Classification(tf.keras.Model): initializer='glorot_uniform', output='logits', **kwargs): - self._self_setattr_tracking = False - self._config_dict = { - 'input_width': input_width, - 'num_classes': num_classes, - 'initializer': initializer, - 'output': output, - } cls_output = tf.keras.layers.Input( shape=(input_width,), name='cls_output', dtype=tf.float32) - self.logits = tf.keras.layers.Dense( + logits = tf.keras.layers.Dense( num_classes, activation=None, kernel_initializer=initializer, name='predictions/transform/logits')( cls_output) - policy = tf.keras.mixed_precision.experimental.global_policy() - if policy.name == 'mixed_bfloat16': - # b/158514794: bf16 is not stable with post-softmax cross-entropy. - policy = tf.float32 - predictions = tf.keras.layers.Activation(tf.nn.log_softmax, - dtype=policy)(self.logits) - if output == 'logits': - output_tensors = self.logits + output_tensors = logits elif output == 'predictions': - output_tensors = predictions + policy = tf.keras.mixed_precision.global_policy() + if policy.name == 'mixed_bfloat16': + # b/158514794: bf16 is not stable with post-softmax cross-entropy. + policy = tf.float32 + output_tensors = tf.keras.layers.Activation( + tf.nn.log_softmax, dtype=policy)( + logits) else: raise ValueError( ('Unknown `output` value "%s". `output` can be either "logits" or ' @@ -83,8 +74,30 @@ class Classification(tf.keras.Model): super(Classification, self).__init__( inputs=[cls_output], outputs=output_tensors, **kwargs) + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. + config_dict = { + 'input_width': input_width, + 'num_classes': num_classes, + 'initializer': initializer, + 'output': output, + } + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + self.logits = logits + def get_config(self): - return self._config_dict + return dict(self._config._asdict()) @classmethod def from_config(cls, config, custom_objects=None): diff --git a/official/nlp/modeling/networks/classification_test.py b/official/nlp/modeling/networks/classification_test.py index 457c135be4bce0c11faef36f099515ba4b0e8c53..ba0360855ec344225398f1e689dfa08106a42656 100644 --- a/official/nlp/modeling/networks/classification_test.py +++ b/official/nlp/modeling/networks/classification_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for classification network.""" from __future__ import absolute_import @@ -92,8 +92,8 @@ class ClassificationTest(keras_parameterized.TestCase): self.assertAllClose(outputs, calculated_softmax) @parameterized.parameters(1, 10) - def test_network_invocation_with_internal_and_external_logits(self, - num_classes): + def test_network_invocation_with_internal_and_external_logits( + self, num_classes): """Validate that the logit outputs are correct.""" input_width = 512 test_object = classification.Classification( diff --git a/official/nlp/modeling/networks/encoder_scaffold.py b/official/nlp/modeling/networks/encoder_scaffold.py index ec9b2d102db9c3a49de509e9d9011bcf6a758e7f..ee8aff8fe22459a78319712926b64367cb84ea3e 100644 --- a/official/nlp/modeling/networks/encoder_scaffold.py +++ b/official/nlp/modeling/networks/encoder_scaffold.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,19 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Transformer-based text encoder network.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - import inspect +from absl import logging import gin import tensorflow as tf +from official.nlp import keras_nlp from official.nlp.modeling import layers @@ -42,146 +38,143 @@ class EncoderScaffold(tf.keras.Model): class (which will replace the Transformer instantiation in the encoder). For each of these custom injection points, users can pass either a class or a class instance. If a class is passed, that class will be instantiated using - the 'embedding_cfg' or 'hidden_cfg' argument, respectively; if an instance + the `embedding_cfg` or `hidden_cfg` argument, respectively; if an instance is passed, that instance will be invoked. (In the case of hidden_cls, the instance will be invoked 'num_hidden_instances' times. If the hidden_cls is not overridden, a default transformer layer will be instantiated. - Arguments: + *Note* that the network is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: pooled_output_dim: The dimension of pooled output. - pooler_layer_initializer: The initializer for the classification - layer. + pooler_layer_initializer: The initializer for the classification layer. embedding_cls: The class or instance to use to embed the input data. This - class or instance defines the inputs to this encoder and outputs - (1) embeddings tensor with shape [batch_size, seq_length, hidden_size] and - (2) attention masking with tensor [batch_size, seq_length, seq_length]. - If embedding_cls is not set, a default embedding network - (from the original BERT paper) will be created. + class or instance defines the inputs to this encoder and outputs (1) + embeddings tensor with shape `(batch_size, seq_length, hidden_size)` and + (2) attention masking with tensor `(batch_size, seq_length, seq_length)`. + If `embedding_cls` is not set, a default embedding network (from the + original BERT paper) will be created. embedding_cfg: A dict of kwargs to pass to the embedding_cls, if it needs to - be instantiated. If embedding_cls is not set, a config dict must be - passed to 'embedding_cfg' with the following values: - "vocab_size": The size of the token vocabulary. - "type_vocab_size": The size of the type vocabulary. - "hidden_size": The hidden size for this encoder. - "max_seq_length": The maximum sequence length for this encoder. - "seq_length": The sequence length for this encoder. - "initializer": The initializer for the embedding portion of this encoder. - "dropout_rate": The dropout rate to apply before the encoding layers. + be instantiated. If `embedding_cls` is not set, a config dict must be + passed to `embedding_cfg` with the following values: + `vocab_size`: The size of the token vocabulary. + `type_vocab_size`: The size of the type vocabulary. + `hidden_size`: The hidden size for this encoder. + `max_seq_length`: The maximum sequence length for this encoder. + `seq_length`: The sequence length for this encoder. + `initializer`: The initializer for the embedding portion of this encoder. + `dropout_rate`: The dropout rate to apply before the encoding layers. embedding_data: A reference to the embedding weights that will be used to train the masked language model, if necessary. This is optional, and only - needed if (1) you are overriding embedding_cls and (2) are doing standard - pretraining. + needed if (1) you are overriding `embedding_cls` and (2) are doing + standard pretraining. num_hidden_instances: The number of times to instantiate and/or invoke the hidden_cls. - hidden_cls: The class or instance to encode the input data. If hidden_cls is - not set, a KerasBERT transformer layer will be used as the encoder class. + hidden_cls: The class or instance to encode the input data. If `hidden_cls` + is not set, a KerasBERT transformer layer will be used as the encoder + class. hidden_cfg: A dict of kwargs to pass to the hidden_cls, if it needs to be instantiated. If hidden_cls is not set, a config dict must be passed to - 'hidden_cfg' with the following values: - "num_attention_heads": The number of attention heads. The hidden size - must be divisible by num_attention_heads. - "intermediate_size": The intermediate size of the transformer. - "intermediate_activation": The activation to apply in the transfomer. - "dropout_rate": The overall dropout rate for the transformer layers. - "attention_dropout_rate": The dropout rate for the attention layers. - "kernel_initializer": The initializer for the transformer layers. + `hidden_cfg` with the following values: + `num_attention_heads`: The number of attention heads. The hidden size + must be divisible by `num_attention_heads`. + `intermediate_size`: The intermediate size of the transformer. + `intermediate_activation`: The activation to apply in the transfomer. + `dropout_rate`: The overall dropout rate for the transformer layers. + `attention_dropout_rate`: The dropout rate for the attention layers. + `kernel_initializer`: The initializer for the transformer layers. + layer_norm_before_pooling: Whether to add a layer norm before the pooling + layer. You probably want to turn this on if you set `norm_first=True` in + transformer layers. return_all_layer_outputs: Whether to output sequence embedding outputs of all encoder transformer layers. + dict_outputs: Whether to use a dictionary as the model outputs. """ - def __init__( - self, - pooled_output_dim, - pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( - stddev=0.02), - embedding_cls=None, - embedding_cfg=None, - embedding_data=None, - num_hidden_instances=1, - hidden_cls=layers.Transformer, - hidden_cfg=None, - return_all_layer_outputs=False, - **kwargs): - self._self_setattr_tracking = False - self._hidden_cls = hidden_cls - self._hidden_cfg = hidden_cfg - self._num_hidden_instances = num_hidden_instances - self._pooled_output_dim = pooled_output_dim - self._pooler_layer_initializer = pooler_layer_initializer - self._embedding_cls = embedding_cls - self._embedding_cfg = embedding_cfg - self._embedding_data = embedding_data - self._return_all_layer_outputs = return_all_layer_outputs - self._kwargs = kwargs + def __init__(self, + pooled_output_dim, + pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( + stddev=0.02), + embedding_cls=None, + embedding_cfg=None, + embedding_data=None, + num_hidden_instances=1, + hidden_cls=layers.Transformer, + hidden_cfg=None, + layer_norm_before_pooling=False, + return_all_layer_outputs=False, + dict_outputs=False, + **kwargs): if embedding_cls: if inspect.isclass(embedding_cls): - self._embedding_network = embedding_cls( + embedding_network = embedding_cls( **embedding_cfg) if embedding_cfg else embedding_cls() else: - self._embedding_network = embedding_cls - inputs = self._embedding_network.inputs - embeddings, attention_mask = self._embedding_network(inputs) + embedding_network = embedding_cls + inputs = embedding_network.inputs + embeddings, attention_mask = embedding_network(inputs) + embedding_layer = None + position_embedding_layer = None + type_embedding_layer = None + embedding_norm_layer = None else: - self._embedding_network = None + embedding_network = None + seq_length = embedding_cfg.get('seq_length', None) word_ids = tf.keras.layers.Input( - shape=(embedding_cfg['seq_length'],), - dtype=tf.int32, - name='input_word_ids') + shape=(seq_length,), dtype=tf.int32, name='input_word_ids') mask = tf.keras.layers.Input( - shape=(embedding_cfg['seq_length'],), - dtype=tf.int32, - name='input_mask') + shape=(seq_length,), dtype=tf.int32, name='input_mask') type_ids = tf.keras.layers.Input( - shape=(embedding_cfg['seq_length'],), - dtype=tf.int32, - name='input_type_ids') + shape=(seq_length,), dtype=tf.int32, name='input_type_ids') inputs = [word_ids, mask, type_ids] - self._embedding_layer = layers.OnDeviceEmbedding( + embedding_layer = keras_nlp.layers.OnDeviceEmbedding( vocab_size=embedding_cfg['vocab_size'], embedding_width=embedding_cfg['hidden_size'], initializer=embedding_cfg['initializer'], name='word_embeddings') - word_embeddings = self._embedding_layer(word_ids) + word_embeddings = embedding_layer(word_ids) # Always uses dynamic slicing for simplicity. - self._position_embedding_layer = layers.PositionEmbedding( + position_embedding_layer = keras_nlp.layers.PositionEmbedding( initializer=embedding_cfg['initializer'], - use_dynamic_slicing=True, - max_sequence_length=embedding_cfg['max_seq_length'], + max_length=embedding_cfg['max_seq_length'], name='position_embedding') - position_embeddings = self._position_embedding_layer(word_embeddings) + position_embeddings = position_embedding_layer(word_embeddings) - type_embeddings = ( - layers.OnDeviceEmbedding( - vocab_size=embedding_cfg['type_vocab_size'], - embedding_width=embedding_cfg['hidden_size'], - initializer=embedding_cfg['initializer'], - use_one_hot=True, - name='type_embeddings')(type_ids)) + type_embedding_layer = keras_nlp.layers.OnDeviceEmbedding( + vocab_size=embedding_cfg['type_vocab_size'], + embedding_width=embedding_cfg['hidden_size'], + initializer=embedding_cfg['initializer'], + use_one_hot=True, + name='type_embeddings') + type_embeddings = type_embedding_layer(type_ids) embeddings = tf.keras.layers.Add()( [word_embeddings, position_embeddings, type_embeddings]) - embeddings = ( - tf.keras.layers.LayerNormalization( - name='embeddings/layer_norm', - axis=-1, - epsilon=1e-12, - dtype=tf.float32)(embeddings)) + + embedding_norm_layer = tf.keras.layers.LayerNormalization( + name='embeddings/layer_norm', + axis=-1, + epsilon=1e-12, + dtype=tf.float32) + embeddings = embedding_norm_layer(embeddings) + embeddings = ( tf.keras.layers.Dropout( rate=embedding_cfg['dropout_rate'])(embeddings)) - attention_mask = layers.SelfAttentionMask()([embeddings, mask]) + attention_mask = keras_nlp.layers.SelfAttentionMask()(embeddings, mask) data = embeddings layer_output_data = [] - self._hidden_layers = [] + hidden_layers = [] for _ in range(num_hidden_instances): if inspect.isclass(hidden_cls): layer = hidden_cls(**hidden_cfg) if hidden_cfg else hidden_cls() @@ -189,43 +182,95 @@ class EncoderScaffold(tf.keras.Model): layer = hidden_cls data = layer([data, attention_mask]) layer_output_data.append(data) - self._hidden_layers.append(layer) + hidden_layers.append(layer) + + if layer_norm_before_pooling: + # Normalize the final output. + output_layer_norm = tf.keras.layers.LayerNormalization( + name='final_layer_norm', + axis=-1, + epsilon=1e-12) + layer_output_data[-1] = output_layer_norm(layer_output_data[-1]) - first_token_tensor = ( - tf.keras.layers.Lambda(lambda x: tf.squeeze(x[:, 0:1, :], axis=1))( - layer_output_data[-1])) - self._pooler_layer = tf.keras.layers.Dense( + last_layer_output = layer_output_data[-1] + # Applying a tf.slice op (through subscript notation) to a Keras tensor + # like this will create a SliceOpLambda layer. This is better than a Lambda + # layer with Python code, because that is fundamentally less portable. + first_token_tensor = last_layer_output[:, 0, :] + pooler_layer = tf.keras.layers.Dense( units=pooled_output_dim, activation='tanh', kernel_initializer=pooler_layer_initializer, name='cls_transform') - cls_output = self._pooler_layer(first_token_tensor) + cls_output = pooler_layer(first_token_tensor) - if return_all_layer_outputs: + if dict_outputs: + outputs = dict( + sequence_output=layer_output_data[-1], + pooled_output=cls_output, + encoder_outputs=layer_output_data, + ) + elif return_all_layer_outputs: outputs = [layer_output_data, cls_output] else: outputs = [layer_output_data[-1], cls_output] + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. super(EncoderScaffold, self).__init__( inputs=inputs, outputs=outputs, **kwargs) + self._hidden_cls = hidden_cls + self._hidden_cfg = hidden_cfg + self._num_hidden_instances = num_hidden_instances + self._pooled_output_dim = pooled_output_dim + self._pooler_layer_initializer = pooler_layer_initializer + self._embedding_cls = embedding_cls + self._embedding_cfg = embedding_cfg + self._embedding_data = embedding_data + self._layer_norm_before_pooling = layer_norm_before_pooling + self._return_all_layer_outputs = return_all_layer_outputs + self._dict_outputs = dict_outputs + self._kwargs = kwargs + + self._embedding_layer = embedding_layer + self._embedding_network = embedding_network + self._position_embedding_layer = position_embedding_layer + self._type_embedding_layer = type_embedding_layer + self._embedding_norm_layer = embedding_norm_layer + self._hidden_layers = hidden_layers + if self._layer_norm_before_pooling: + self._output_layer_norm = output_layer_norm + self._pooler_layer = pooler_layer + + logging.info('EncoderScaffold configs: %s', self.get_config()) + def get_config(self): config_dict = { - 'num_hidden_instances': - self._num_hidden_instances, - 'pooled_output_dim': - self._pooled_output_dim, - 'pooler_layer_initializer': - self._pooler_layer_initializer, - 'embedding_cls': - self._embedding_network, - 'embedding_cfg': - self._embedding_cfg, - 'hidden_cfg': - self._hidden_cfg, - 'return_all_layer_outputs': - self._return_all_layer_outputs, + 'num_hidden_instances': self._num_hidden_instances, + 'pooled_output_dim': self._pooled_output_dim, + 'pooler_layer_initializer': self._pooler_layer_initializer, + 'embedding_cls': self._embedding_network, + 'embedding_cfg': self._embedding_cfg, + 'layer_norm_before_pooling': self._layer_norm_before_pooling, + 'return_all_layer_outputs': self._return_all_layer_outputs, + 'dict_outputs': self._dict_outputs, } + if self._hidden_cfg: + config_dict['hidden_cfg'] = {} + for k, v in self._hidden_cfg.items(): + # `self._hidden_cfg` may contain `class`, e.g., when `hidden_cfg` is + # `TransformerScaffold`, its `attention_cls` argument can be a `class`. + if inspect.isclass(v): + config_dict['hidden_cfg'][k] = tf.keras.utils.get_registered_name(v) + else: + config_dict['hidden_cfg'][k] = v + if inspect.isclass(self._hidden_cls): config_dict['hidden_cls_string'] = tf.keras.utils.get_registered_name( self._hidden_cls) diff --git a/official/nlp/modeling/networks/encoder_scaffold_test.py b/official/nlp/modeling/networks/encoder_scaffold_test.py index 664bccd08e11720918e0060458dc934350d2d594..f743a2ccaacbbbdd30a09f70559e0cef01972751 100644 --- a/official/nlp/modeling/networks/encoder_scaffold_test.py +++ b/official/nlp/modeling/networks/encoder_scaffold_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for transformer-based text encoder network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for EncoderScaffold network.""" from absl.testing import parameterized import numpy as np @@ -35,9 +31,10 @@ from official.nlp.modeling.networks import encoder_scaffold @tf.keras.utils.register_keras_serializable(package="TestOnly") class ValidatedTransformerLayer(layers.Transformer): - def __init__(self, call_list, **kwargs): + def __init__(self, call_list, call_class=None, **kwargs): super(ValidatedTransformerLayer, self).__init__(**kwargs) self.list = call_list + self.call_class = call_class def call(self, inputs): self.list.append(True) @@ -45,10 +42,16 @@ class ValidatedTransformerLayer(layers.Transformer): def get_config(self): config = super(ValidatedTransformerLayer, self).get_config() - config["call_list"] = [] + config["call_list"] = self.list + config["call_class"] = tf.keras.utils.get_registered_name(self.call_class) return config +@tf.keras.utils.register_keras_serializable(package="TestLayerOnly") +class TestLayer(tf.keras.layers.Layer): + pass + + # This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It # guarantees forward compatibility of this code for the V2 switchover. @keras_parameterized.run_all_keras_modes @@ -56,7 +59,7 @@ class EncoderScaffoldLayerClassTest(keras_parameterized.TestCase): def tearDown(self): super(EncoderScaffoldLayerClassTest, self).tearDown() - tf.keras.mixed_precision.experimental.set_policy("float32") + tf.keras.mixed_precision.set_global_policy("float32") @parameterized.named_parameters( dict(testcase_name="only_final_output", return_all_layer_outputs=False), @@ -101,6 +104,7 @@ class EncoderScaffoldLayerClassTest(keras_parameterized.TestCase): hidden_cls=ValidatedTransformerLayer, hidden_cfg=hidden_cfg, embedding_cfg=embedding_cfg, + layer_norm_before_pooling=True, return_all_layer_outputs=return_all_layer_outputs) # Create the inputs (note that the first dimension is implicit). word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) @@ -132,8 +136,10 @@ class EncoderScaffoldLayerClassTest(keras_parameterized.TestCase): self.assertNotEmpty(call_list) self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") + self.assertTrue(hasattr(test_network, "_output_layer_norm")) + def test_network_creation_with_float16_dtype(self): - tf.keras.mixed_precision.experimental.set_policy("mixed_float16") + tf.keras.mixed_precision.set_global_policy("mixed_float16") hidden_size = 32 sequence_length = 21 embedding_cfg = { @@ -218,16 +224,17 @@ class EncoderScaffoldLayerClassTest(keras_parameterized.TestCase): pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=0.02), hidden_cfg=hidden_cfg, - embedding_cfg=embedding_cfg) + embedding_cfg=embedding_cfg, + dict_outputs=True) # Create the inputs (note that the first dimension is implicit). word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - data, pooled = test_network([word_ids, mask, type_ids]) + outputs = test_network([word_ids, mask, type_ids]) # Create a model based off of this network: - model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + model = tf.keras.Model([word_ids, mask, type_ids], outputs) # Invoke the model. We can't validate the output data here (the model is too # complex) but this will catch structural runtime errors. @@ -237,7 +244,8 @@ class EncoderScaffoldLayerClassTest(keras_parameterized.TestCase): mask_data = np.random.randint(2, size=(batch_size, sequence_length)) type_id_data = np.random.randint( num_types, size=(batch_size, sequence_length)) - _ = model.predict([word_id_data, mask_data, type_id_data]) + preds = model.predict([word_id_data, mask_data, type_id_data]) + self.assertEqual(preds["pooled_output"].shape, (3, hidden_size)) # Creates a EncoderScaffold with max_sequence_length != sequence_length num_types = 7 @@ -272,8 +280,8 @@ class EncoderScaffoldLayerClassTest(keras_parameterized.TestCase): stddev=0.02), hidden_cfg=hidden_cfg, embedding_cfg=embedding_cfg) - - model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) + outputs = test_network([word_ids, mask, type_ids]) + model = tf.keras.Model([word_ids, mask, type_ids], outputs) _ = model.predict([word_id_data, mask_data, type_id_data]) def test_serialize_deserialize(self): @@ -323,6 +331,28 @@ class EncoderScaffoldLayerClassTest(keras_parameterized.TestCase): self.assertAllEqual(network.get_config(), new_network.get_config()) +class Embeddings(tf.keras.Model): + + def __init__(self, vocab_size, hidden_size): + super().__init__() + self.inputs = [ + tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name="input_word_ids"), + tf.keras.layers.Input(shape=(None,), dtype=tf.int32, name="input_mask") + ] + self.attention_mask = layers.SelfAttentionMask() + self.embedding_layer = layers.OnDeviceEmbedding( + vocab_size=vocab_size, + embedding_width=hidden_size, + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + name="word_embeddings") + + def call(self, inputs): + word_ids, mask = inputs + word_embeddings = self.embedding_layer(word_ids) + return word_embeddings, self.attention_mask([word_embeddings, mask]) + + @keras_parameterized.run_all_keras_modes class EncoderScaffoldEmbeddingNetworkTest(keras_parameterized.TestCase): @@ -334,20 +364,7 @@ class EncoderScaffoldEmbeddingNetworkTest(keras_parameterized.TestCase): # Build an embedding network to swap in for the default network. This one # will have 2 inputs (mask and word_ids) instead of 3, and won't use # positional embeddings. - - word_ids = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name="input_word_ids") - mask = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name="input_mask") - embedding_layer = layers.OnDeviceEmbedding( - vocab_size=vocab_size, - embedding_width=hidden_size, - initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), - name="word_embeddings") - word_embeddings = embedding_layer(word_ids) - attention_mask = layers.SelfAttentionMask()([word_embeddings, mask]) - network = tf.keras.Model([word_ids, mask], - [word_embeddings, attention_mask]) + network = Embeddings(vocab_size, hidden_size) hidden_cfg = { "num_attention_heads": @@ -371,8 +388,7 @@ class EncoderScaffoldEmbeddingNetworkTest(keras_parameterized.TestCase): pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=0.02), hidden_cfg=hidden_cfg, - embedding_cls=network, - embedding_data=embedding_layer.embeddings) + embedding_cls=network) # Create the inputs (note that the first dimension is implicit). word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) @@ -390,11 +406,6 @@ class EncoderScaffoldEmbeddingNetworkTest(keras_parameterized.TestCase): mask_data = np.random.randint(2, size=(batch_size, sequence_length)) _ = model.predict([word_id_data, mask_data]) - # Test that we can get the embedding data that we passed to the object. This - # is necessary to support standard language model training. - self.assertIs(embedding_layer.embeddings, - test_network.get_embedding_table()) - def test_serialize_deserialize(self): hidden_size = 32 sequence_length = 21 @@ -556,7 +567,8 @@ class EncoderScaffoldHiddenInstanceTest(keras_parameterized.TestCase): self.assertNotEmpty(call_list) self.assertTrue(call_list[0], "The passed layer class wasn't instantiated.") - def test_serialize_deserialize(self): + @parameterized.parameters(True, False) + def test_serialize_deserialize(self, use_hidden_cls_instance): hidden_size = 32 sequence_length = 21 vocab_size = 57 @@ -587,21 +599,27 @@ class EncoderScaffoldHiddenInstanceTest(keras_parameterized.TestCase): "kernel_initializer": tf.keras.initializers.TruncatedNormal(stddev=0.02), "call_list": - call_list + call_list, + "call_class": + TestLayer } # Create a small EncoderScaffold for testing. This time, we pass an already- # instantiated layer object. - - xformer = ValidatedTransformerLayer(**hidden_cfg) - - test_network = encoder_scaffold.EncoderScaffold( + kwargs = dict( num_hidden_instances=3, pooled_output_dim=hidden_size, pooler_layer_initializer=tf.keras.initializers.TruncatedNormal( stddev=0.02), - hidden_cls=xformer, embedding_cfg=embedding_cfg) + if use_hidden_cls_instance: + xformer = ValidatedTransformerLayer(**hidden_cfg) + test_network = encoder_scaffold.EncoderScaffold( + hidden_cls=xformer, **kwargs) + else: + test_network = encoder_scaffold.EncoderScaffold( + hidden_cls=ValidatedTransformerLayer, hidden_cfg=hidden_cfg, **kwargs) + # Create another network object from the first object's config. new_network = encoder_scaffold.EncoderScaffold.from_config( test_network.get_config()) diff --git a/official/nlp/modeling/networks/mobile_bert_encoder.py b/official/nlp/modeling/networks/mobile_bert_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..d431135c2695acb89feac372ea865d212904e53a --- /dev/null +++ b/official/nlp/modeling/networks/mobile_bert_encoder.py @@ -0,0 +1,185 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""MobileBERT text encoder network.""" +import gin +import tensorflow as tf + +from official.nlp import keras_nlp +from official.nlp.modeling import layers + + +@gin.configurable +class MobileBERTEncoder(tf.keras.Model): + """A Keras functional API implementation for MobileBERT encoder.""" + + def __init__(self, + word_vocab_size=30522, + word_embed_size=128, + type_vocab_size=2, + max_sequence_length=512, + num_blocks=24, + hidden_size=512, + num_attention_heads=4, + intermediate_size=512, + intermediate_act_fn='relu', + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + intra_bottleneck_size=128, + initializer_range=0.02, + use_bottleneck_attention=False, + key_query_shared_bottleneck=True, + num_feedforward_networks=4, + normalization_type='no_norm', + classifier_activation=False, + input_mask_dtype='int32', + **kwargs): + """Class initialization. + + Args: + word_vocab_size: Number of words in the vocabulary. + word_embed_size: Word embedding size. + type_vocab_size: Number of word types. + max_sequence_length: Maximum length of input sequence. + num_blocks: Number of transformer block in the encoder model. + hidden_size: Hidden size for the transformer block. + num_attention_heads: Number of attention heads in the transformer block. + intermediate_size: The size of the "intermediate" (a.k.a., feed + forward) layer. + intermediate_act_fn: The non-linear activation function to apply + to the output of the intermediate/feed-forward layer. + hidden_dropout_prob: Dropout probability for the hidden layers. + attention_probs_dropout_prob: Dropout probability of the attention + probabilities. + intra_bottleneck_size: Size of bottleneck. + initializer_range: The stddev of the `truncated_normal_initializer` for + initializing all weight matrices. + use_bottleneck_attention: Use attention inputs from the bottleneck + transformation. If true, the following `key_query_shared_bottleneck` + will be ignored. + key_query_shared_bottleneck: Whether to share linear transformation for + keys and queries. + num_feedforward_networks: Number of stacked feed-forward networks. + normalization_type: The type of normalization_type, only `no_norm` and + `layer_norm` are supported. `no_norm` represents the element-wise linear + transformation for the student model, as suggested by the original + MobileBERT paper. `layer_norm` is used for the teacher model. + classifier_activation: If using the tanh activation for the final + representation of the `[CLS]` token in fine-tuning. + input_mask_dtype: The dtype of `input_mask` tensor, which is one of the + input tensors of this encoder. Defaults to `int32`. If you want + to use `tf.lite` quantization, which does not support `Cast` op, + please set this argument to `tf.float32` and feed `input_mask` + tensor with values in `float32` to avoid `tf.cast` in the computation. + **kwargs: Other keyworded and arguments. + """ + self._self_setattr_tracking = False + initializer = tf.keras.initializers.TruncatedNormal( + stddev=initializer_range) + + # layer instantiation + self.embedding_layer = layers.MobileBertEmbedding( + word_vocab_size=word_vocab_size, + word_embed_size=word_embed_size, + type_vocab_size=type_vocab_size, + output_embed_size=hidden_size, + max_sequence_length=max_sequence_length, + normalization_type=normalization_type, + initializer=initializer, + dropout_rate=hidden_dropout_prob) + + self._transformer_layers = [] + for layer_idx in range(num_blocks): + transformer = layers.MobileBertTransformer( + hidden_size=hidden_size, + num_attention_heads=num_attention_heads, + intermediate_size=intermediate_size, + intermediate_act_fn=intermediate_act_fn, + hidden_dropout_prob=hidden_dropout_prob, + attention_probs_dropout_prob=attention_probs_dropout_prob, + intra_bottleneck_size=intra_bottleneck_size, + use_bottleneck_attention=use_bottleneck_attention, + key_query_shared_bottleneck=key_query_shared_bottleneck, + num_feedforward_networks=num_feedforward_networks, + normalization_type=normalization_type, + initializer=initializer, + name=f'transformer_layer_{layer_idx}') + self._transformer_layers.append(transformer) + + # input tensor + input_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_word_ids') + input_mask = tf.keras.layers.Input( + shape=(None,), dtype=input_mask_dtype, name='input_mask') + type_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_type_ids') + self.inputs = [input_ids, input_mask, type_ids] + + # The dtype of `attention_mask` will the same as the dtype of `input_mask`. + attention_mask = keras_nlp.layers.SelfAttentionMask()(input_mask, + input_mask) + + # build the computation graph + all_layer_outputs = [] + all_attention_scores = [] + embedding_output = self.embedding_layer(input_ids, type_ids) + all_layer_outputs.append(embedding_output) + prev_output = embedding_output + + for layer_idx in range(num_blocks): + layer_output, attention_score = self._transformer_layers[layer_idx]( + prev_output, + attention_mask, + return_attention_scores=True) + all_layer_outputs.append(layer_output) + all_attention_scores.append(attention_score) + prev_output = layer_output + first_token = tf.squeeze(prev_output[:, 0:1, :], axis=1) + + if classifier_activation: + self._pooler_layer = tf.keras.layers.experimental.EinsumDense( + 'ab,bc->ac', + output_shape=hidden_size, + activation=tf.tanh, + bias_axes='c', + kernel_initializer=initializer, + name='pooler') + first_token = self._pooler_layer(first_token) + else: + self._pooler_layer = None + + outputs = dict( + sequence_output=prev_output, + pooled_output=first_token, + encoder_outputs=all_layer_outputs, + attention_scores=all_attention_scores) + + super(MobileBERTEncoder, self).__init__( + inputs=self.inputs, outputs=outputs, **kwargs) + + def get_embedding_table(self): + return self.embedding_layer.word_embedding.embeddings + + def get_embedding_layer(self): + return self.embedding_layer.word_embedding + + @property + def transformer_layers(self): + """List of Transformer layers in the encoder.""" + return self._transformer_layers + + @property + def pooler_layer(self): + """The pooler dense layer after the transformer layers.""" + return self._pooler_layer diff --git a/official/nlp/modeling/networks/mobile_bert_encoder_test.py b/official/nlp/modeling/networks/mobile_bert_encoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2360e7202f87686a83d11bf8d9fd66d1281c1cf1 --- /dev/null +++ b/official/nlp/modeling/networks/mobile_bert_encoder_test.py @@ -0,0 +1,169 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf +from official.nlp.modeling import models +from official.nlp.modeling.networks import mobile_bert_encoder + + +def generate_fake_input(batch_size=1, seq_len=5, vocab_size=10000, seed=0): + """Generate consistent fake integer input sequences.""" + np.random.seed(seed) + fake_input = [] + for _ in range(batch_size): + fake_input.append([]) + for _ in range(seq_len): + fake_input[-1].append(np.random.randint(0, vocab_size)) + fake_input = np.asarray(fake_input) + return fake_input + + +class MobileBertEncoderTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.named_parameters( + ('default_setting', 'relu', True, 'no_norm', False), + ('gelu', 'gelu', True, 'no_norm', False), + ('kq_not_shared', 'relu', False, 'no_norm', False), + ('layer_norm', 'relu', True, 'layer_norm', False), + ('use_pooler', 'relu', True, 'no_norm', True), + ('with_pooler_layer', 'relu', True, 'layer_norm', False)) + def test_mobilebert_encoder(self, act_fn, kq_shared_bottleneck, + normalization_type, use_pooler): + hidden_size = 32 + sequence_length = 16 + num_blocks = 3 + test_network = mobile_bert_encoder.MobileBERTEncoder( + word_vocab_size=100, + hidden_size=hidden_size, + num_blocks=num_blocks, + intermediate_act_fn=act_fn, + key_query_shared_bottleneck=kq_shared_bottleneck, + normalization_type=normalization_type, + classifier_activation=use_pooler) + + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + outputs = test_network([word_ids, mask, type_ids]) + layer_output, pooler_output = outputs['sequence_output'], outputs[ + 'pooled_output'] + + self.assertIsInstance(test_network.transformer_layers, list) + self.assertLen(test_network.transformer_layers, num_blocks) + + layer_output_shape = [None, sequence_length, hidden_size] + self.assertAllEqual(layer_output.shape.as_list(), layer_output_shape) + pooler_output_shape = [None, hidden_size] + self.assertAllEqual(pooler_output.shape.as_list(), pooler_output_shape) + self.assertAllEqual(tf.float32, layer_output.dtype) + + def test_mobilebert_encoder_return_all_layer_output(self): + hidden_size = 32 + sequence_length = 16 + num_blocks = 3 + test_network = mobile_bert_encoder.MobileBERTEncoder( + word_vocab_size=100, + hidden_size=hidden_size, + num_blocks=num_blocks) + + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + outputs = test_network([word_ids, mask, type_ids]) + all_layer_output = outputs['encoder_outputs'] + + self.assertIsInstance(all_layer_output, list) + self.assertLen(all_layer_output, num_blocks + 1) + + @parameterized.parameters('int32', 'float32') + def test_mobilebert_encoder_invocation(self, input_mask_dtype): + vocab_size = 100 + hidden_size = 32 + sequence_length = 16 + num_blocks = 3 + test_network = mobile_bert_encoder.MobileBERTEncoder( + word_vocab_size=vocab_size, + hidden_size=hidden_size, + num_blocks=num_blocks, + input_mask_dtype=input_mask_dtype) + + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=input_mask_dtype) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + outputs = test_network([word_ids, mask, type_ids]) + model = tf.keras.Model([word_ids, mask, type_ids], outputs) + + input_seq = generate_fake_input( + batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) + input_mask = generate_fake_input( + batch_size=1, seq_len=sequence_length, vocab_size=2) + token_type = generate_fake_input( + batch_size=1, seq_len=sequence_length, vocab_size=2) + outputs = model.predict([input_seq, input_mask, token_type]) + + sequence_output_shape = [1, sequence_length, hidden_size] + self.assertAllEqual(outputs['sequence_output'].shape, sequence_output_shape) + pooled_output_shape = [1, hidden_size] + self.assertAllEqual(outputs['pooled_output'].shape, pooled_output_shape) + + def test_mobilebert_encoder_invocation_with_attention_score(self): + vocab_size = 100 + hidden_size = 32 + sequence_length = 16 + num_blocks = 3 + test_network = mobile_bert_encoder.MobileBERTEncoder( + word_vocab_size=vocab_size, + hidden_size=hidden_size, + num_blocks=num_blocks) + + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + outputs = test_network([word_ids, mask, type_ids]) + model = tf.keras.Model([word_ids, mask, type_ids], outputs) + + input_seq = generate_fake_input( + batch_size=1, seq_len=sequence_length, vocab_size=vocab_size) + input_mask = generate_fake_input( + batch_size=1, seq_len=sequence_length, vocab_size=2) + token_type = generate_fake_input( + batch_size=1, seq_len=sequence_length, vocab_size=2) + outputs = model.predict([input_seq, input_mask, token_type]) + self.assertLen(outputs['attention_scores'], num_blocks) + + @parameterized.named_parameters( + ('sequence_classification', models.BertClassifier, [None, 5]), + ('token_classification', models.BertTokenClassifier, [None, 16, 5])) + def test_mobilebert_encoder_for_downstream_task(self, task, prediction_shape): + hidden_size = 32 + sequence_length = 16 + mobilebert_encoder = mobile_bert_encoder.MobileBERTEncoder( + word_vocab_size=100, hidden_size=hidden_size) + num_classes = 5 + classifier = task(network=mobilebert_encoder, num_classes=num_classes) + + word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) + prediction = classifier([word_ids, mask, type_ids]) + if task == models.BertTokenClassifier: + prediction = prediction['logits'] + self.assertAllEqual(prediction.shape.as_list(), prediction_shape) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/networks/packed_sequence_embedding.py b/official/nlp/modeling/networks/packed_sequence_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..fd534890e0560809fffd8f2ba3fb9a78d5c2ccd8 --- /dev/null +++ b/official/nlp/modeling/networks/packed_sequence_embedding.py @@ -0,0 +1,319 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An embedding network supporting packed sequences and position ids.""" +# pylint: disable=g-classes-have-attributes +import collections +import tensorflow as tf + +from official.modeling import tf_utils +from official.nlp import keras_nlp +from official.nlp.modeling import layers + + +@tf.keras.utils.register_keras_serializable(package='Text') +class PackedSequenceEmbedding(tf.keras.Model): + """An embedding network supporting packed sequences and position ids. + + This network implements an embedding layer similar to the one described in + "BERT: Pre-training of Deep Bidirectional Transformers for Language + Understanding" (https://arxiv.org/abs/1810.04805). On top of it, it supports + to (1) pack multiple sequences into one sequence and (2) allow additional + "position_ids" as input. + + Args: + vocab_size: The size of the token vocabulary. + type_vocab_size: The size of the type vocabulary. + embedding_width: Width of token embeddings. + hidden_size: The output size for this encoder. + max_seq_length: The maximum sequence length for this encoder. + initializer: The initializer for the embedding portion of this encoder. + dropout_rate: The dropout rate to apply before the encoding layers. + pack_multiple_sequences: If `True`, we can feed multiple sequences into one + sequence for training and inference (they don't impact each other). + use_position_id: Whether to expect `position_ids` as an input to the + network. If False, the `position_ids` will be inferred: (1) when + pack_multiple_sequences is False, we assume the position ids are `0, 1, + 2, ..., seq_length - 1`; (2) when `pack_multiple_sequences` is `True`, + there may be multiple sub sequences, and for each sub sequence, its + position ids start from 0, 1, 2, ... + """ + + def __init__(self, + vocab_size, + type_vocab_size, + embedding_width, + hidden_size, + max_seq_length, + initializer, + dropout_rate, + use_position_id=False, + pack_multiple_sequences=False, + **kwargs): + initializer = tf.keras.initializers.get(initializer) + config_dict = { + 'vocab_size': vocab_size, + 'type_vocab_size': type_vocab_size, + 'embedding_width': embedding_width, + 'hidden_size': hidden_size, + 'max_seq_length': max_seq_length, + 'initializer': tf.keras.initializers.serialize(initializer), + 'dropout_rate': dropout_rate, + 'use_position_id': use_position_id, + 'pack_multiple_sequences': pack_multiple_sequences, + } + + word_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_word_ids') + mask = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_mask') + type_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_type_ids') + inputs = { + 'input_word_ids': word_ids, + 'input_mask': mask, + 'input_type_ids': type_ids, + } + if use_position_id: + position_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='position_ids') + inputs['position_ids'] = position_ids + else: + position_ids = None + + if pack_multiple_sequences: + sub_seq_mask = PackedSequenceMask()(word_ids) + else: + sub_seq_mask = None + + embedding_layer = layers.OnDeviceEmbedding( + vocab_size=vocab_size, + embedding_width=embedding_width, + initializer=initializer, + name='word_embeddings') + word_embeddings = embedding_layer(word_ids) + + # Always uses dynamic slicing for simplicity. + position_embedding_layer = PositionEmbeddingWithSubSeqMask( + initializer=initializer, + use_dynamic_slicing=True, + max_sequence_length=max_seq_length, + name='position_embedding') + position_embeddings = position_embedding_layer( + word_embeddings, position_ids, sub_seq_mask) + + type_embeddings = ( + layers.OnDeviceEmbedding( + vocab_size=type_vocab_size, + embedding_width=embedding_width, + initializer=initializer, + use_one_hot=True, + name='type_embeddings')(type_ids)) + + embeddings = tf.keras.layers.Add()( + [word_embeddings, position_embeddings, type_embeddings]) + embeddings = tf.keras.layers.LayerNormalization( + name='embeddings/layer_norm', axis=-1, epsilon=1e-12, dtype=tf.float32)( + embeddings) + embeddings = tf.keras.layers.Dropout( + rate=dropout_rate, dtype=tf.float32)( + embeddings) + + if embedding_width != hidden_size: + embeddings = tf.keras.layers.experimental.EinsumDense( + '...x,xy->...y', + output_shape=hidden_size, + bias_axes=None, + kernel_initializer=initializer, + name='embedding_projection')( + embeddings) + + attention_mask = keras_nlp.layers.SelfAttentionMask()(embeddings, mask) + if sub_seq_mask is not None: + attention_mask = tf.keras.layers.Lambda( + lambda x: x[0] * tf.cast(x[1], x[0].dtype))( + [attention_mask, sub_seq_mask]) + + outputs = [embeddings, attention_mask] + super(PackedSequenceEmbedding, self).__init__( + inputs=inputs, outputs=outputs, **kwargs) + # TF does not track immutable attrs which do not contain Trackables, + # so by creating a config namedtuple instead of a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + self._embedding_layer = embedding_layer + self._position_embedding_layer = position_embedding_layer + + def get_embedding_table(self): + return self._embedding_layer.embeddings + + def get_config(self): + return dict(self._config._asdict()) + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + +@tf.keras.utils.register_keras_serializable(package='Text') +class PackedSequenceMask(tf.keras.layers.Layer): + """A layer to create a mask to indicate multiple sub sequences.""" + + def call(self, input_ids): + """Implements call() for the layer. + + Args: + input_ids: int32 Tensor of shape [batch_size, seq_length]. + + Returns: + boolean Tensor of shape [batch_size, seq_length, seq_length]. [x, y, z] + is True if for x'th instance in a batch, y'th token and z'th token are + from the same sub sequence. + """ + # Suppose + # - the first token in the parent sequence is [CLS]. + # - every sequence starts from [CLS]. + # - every sequence only contains one [CLS]. + seq_start_token = input_ids[:, 0:1] + seq_start_loc = tf.cast(tf.equal(input_ids, seq_start_token), tf.int32) + # Set different ids for different sub sequences. + seq_ids = tf.expand_dims(tf.cumsum(seq_start_loc, -1), -1) + return tf.equal(seq_ids, tf.transpose(seq_ids, [0, 2, 1])) + + +@tf.keras.utils.register_keras_serializable(package='Text') +class PositionEmbeddingWithSubSeqMask(tf.keras.layers.Layer): + """Creates a positional embedding with sub-sequence masking. + + This layer creates a positional embedding as described in "BERT: Pre-training + of Deep Bidirectional Transformers for Language Understanding" + (https://arxiv.org/abs/1810.04805). On top of it, it supports + `position_ids` and `sub_sequence_mask` tensors. + + This layer can be set up to either create a statically shaped slice or a + dynamically shaped slice. If `use_dynamic_slicing` is True, the input tensor + can have a dynamic 1st dimension, while if `use_dynamic_slicing` is False the + input size must be fixed. + + Args: + initializer: The initializer to use for the embedding weights. Defaults to + "glorot_uniform". + use_dynamic_slicing: Whether to use the dynamic slicing path. + max_sequence_length: The maximum size of the dynamic sequence. Only + applicable if `use_dynamic_slicing` is True. + """ + + def __init__(self, + initializer='glorot_uniform', + use_dynamic_slicing=False, + max_sequence_length=None, + **kwargs): + # We need to have a default dtype of float32, since the inputs (which Keras + # usually uses to infer the dtype) will always be int32. + if 'dtype' not in kwargs: + kwargs['dtype'] = 'float32' + + super(PositionEmbeddingWithSubSeqMask, self).__init__(**kwargs) + if use_dynamic_slicing and max_sequence_length is None: + raise ValueError( + 'If `use_dynamic_slicing` is True, `max_sequence_length` must be set.' + ) + self._max_sequence_length = max_sequence_length + self._initializer = tf.keras.initializers.get(initializer) + self._use_dynamic_slicing = use_dynamic_slicing + + def get_config(self): + config = { + 'max_sequence_length': self._max_sequence_length, + 'initializer': tf.keras.initializers.serialize(self._initializer), + 'use_dynamic_slicing': self._use_dynamic_slicing, + } + base_config = super(PositionEmbeddingWithSubSeqMask, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + """Implements build() for the layer.""" + dimension_list = input_shape.as_list() + + if len(dimension_list) != 3: + raise ValueError('PositionEmbedding expects a 3-dimensional input tensor ' + 'of shape [batch, sequence, width]') + seq_length = dimension_list[1] + width = dimension_list[2] + + # If we are not using dynamic slicing, we must assume that the sequence + # length is fixed and max_sequence_length should not be specified. + if not self._use_dynamic_slicing: + if seq_length is None: + raise ValueError( + 'PositionEmbedding must have `use_dynamic_slicing` set ' + 'to True (and max_sequence_length set) when the ' + 'sequence (1st) dimension of the input is None.') + if self._max_sequence_length is not None: + raise ValueError( + 'When `use_dynamic_slicing` is False, max_sequence_length should ' + 'not be specified and we ought to use seq_length to get the ' + 'variable shape.') + + if self._max_sequence_length is not None: + weight_sequence_length = self._max_sequence_length + else: + weight_sequence_length = seq_length + + self._position_embeddings = self.add_weight( + 'embeddings', + shape=[weight_sequence_length, width], + initializer=self._initializer) + + super(PositionEmbeddingWithSubSeqMask, self).build(input_shape) + + def call(self, inputs, position_ids=None, sub_sequence_mask=None): + """Implements call() for the layer. + + When `position_ids` is specified, it will return the position embeddings + corresponding to this `position_ids`; otherwise, `position_ids` will be + inferred in the following way: + + (1) When `sub_sequence_mask` is None, we assume the position ids are + 0, 1, 2, ..., seq_length - 1. + (2) When `sub_sequence_mask` is specified, there may be multiple sub + sequences, and for each sub sequence, its position ids start from + 0, 1, 2, ... + + Args: + inputs: Word embeddings in shape [batch, seq_length, embedding_dim]. + position_ids: An optional int32 tensor in shape [batch, seq_length]. + sub_sequence_mask: An optional bool tensor in shape [batch, seq_length, + seq_length]. [x, y, z] is True if for x'th instance in a batch, y'th + token and z'th token are from the same sub sequence. + + Returns: + The position embeddings in shape [batch, seq_length, embedding_dim]. + """ + input_shape = tf_utils.get_shape_list(inputs, expected_rank=3) + if self._use_dynamic_slicing: + position_embeddings = self._position_embeddings[:input_shape[1], :] + else: + position_embeddings = self._position_embeddings + + if position_ids is not None: + return tf.gather(position_embeddings, position_ids) + + if sub_sequence_mask is None: + return tf.broadcast_to(position_embeddings, input_shape) + else: + sub_sequence_mask = tf.cast(sub_sequence_mask, tf.int32) + # For each sub sequence, its position ids start from 0, 1, 2, ... + position_ids = tf.linalg.diag_part(tf.cumsum(sub_sequence_mask, -1)) - 1 + return tf.gather(position_embeddings, position_ids) diff --git a/official/nlp/modeling/networks/packed_sequence_embedding_test.py b/official/nlp/modeling/networks/packed_sequence_embedding_test.py new file mode 100644 index 0000000000000000000000000000000000000000..bfab20ba33898d66fc6e4e4e8e13b30548ac00bb --- /dev/null +++ b/official/nlp/modeling/networks/packed_sequence_embedding_test.py @@ -0,0 +1,134 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.modeling.networks.packed_sequence_embedding.""" + +# Import libraries + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.nlp.modeling.networks import packed_sequence_embedding + + +class PackedSequenceEmbeddingTest(tf.test.TestCase, parameterized.TestCase): + + def tearDown(self): + super(PackedSequenceEmbeddingTest, self).tearDown() + tf.keras.mixed_precision.set_global_policy('float32') + + @parameterized.parameters([ + (True, True, True), + (False, False, True), + (False, True, False), + (True, False, False), + ]) + def test_network_creation(self, use_position_id, pack_multiple_sequences, + use_float16): + """Validate that the Keras object can be created.""" + if use_float16: + tf.keras.mixed_precision.set_global_policy('mixed_float16') + seq_length = 16 + vocab_size = 100 + max_position_embeddings = 32 + type_vocab_size = 2 + embedding_width = 16 + hidden_size = 32 + embedding_cfg = dict( + vocab_size=vocab_size, + type_vocab_size=2, + embedding_width=embedding_width, + hidden_size=hidden_size, + max_seq_length=max_position_embeddings, + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + dropout_rate=0.1, + use_position_id=use_position_id, + pack_multiple_sequences=pack_multiple_sequences, + ) + test_object = packed_sequence_embedding.PackedSequenceEmbedding( + **embedding_cfg) + + input_word_ids = tf.keras.Input(shape=(seq_length,), dtype=tf.int32) + input_mask = tf.keras.Input(shape=(seq_length,), dtype=tf.int32) + input_type_ids = tf.keras.Input(shape=(seq_length,), dtype=tf.int32) + network_inputs = { + 'input_word_ids': input_word_ids, + 'input_mask': input_mask, + 'input_type_ids': input_type_ids, + } + if use_position_id: + network_inputs['position_ids'] = tf.keras.Input( + shape=(seq_length,), dtype=tf.int32) + + embedding, mask = test_object(network_inputs) + + # Create a model based off of this network: + model = tf.keras.Model(network_inputs, [embedding, mask]) + + # Invoke the model. We can't validate the output data here (the model is too + # complex) but this will catch structural runtime errors. + batch_size = 3 + word_id_data = np.random.randint(vocab_size, size=(batch_size, seq_length)) + mask_data = np.random.randint(2, size=(batch_size, seq_length)) + type_id_data = np.random.randint( + type_vocab_size, size=(batch_size, seq_length)) + feed_input = { + 'input_word_ids': word_id_data, + 'input_mask': mask_data, + 'input_type_ids': type_id_data, + } + if use_position_id: + feed_input['position_ids'] = np.random.randint( + seq_length, size=(batch_size, seq_length)) + embeddings, attention_mask = model.predict(feed_input) + expected_embeddings_shape = [3, seq_length, hidden_size] + expected_attention_mask_shape = [3, seq_length, seq_length] + self.assertAllEqual(expected_embeddings_shape, embeddings.shape) + self.assertAllEqual(expected_attention_mask_shape, attention_mask.shape) + + def test_serialize_deserialize(self): + tf.keras.mixed_precision.set_global_policy('mixed_float16') + # Create a network object that sets all of its config options. + embedding_cfg = dict( + vocab_size=100, + type_vocab_size=2, + embedding_width=64, + hidden_size=64, + max_seq_length=32, + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + dropout_rate=0.1, + use_position_id=True, + pack_multiple_sequences=False, + ) + network = packed_sequence_embedding.PackedSequenceEmbedding(**embedding_cfg) + + expected_config = dict(embedding_cfg) + expected_config['initializer'] = tf.keras.initializers.serialize( + tf.keras.initializers.get(expected_config['initializer'])) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = packed_sequence_embedding.PackedSequenceEmbedding.from_config( + network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/networks/span_labeling.py b/official/nlp/modeling/networks/span_labeling.py index 2d704c33b6d62ae059d01b81bca146ca1c5adca4..efbf69d19216b24b1af492b4eec7a080d8457265 100644 --- a/official/nlp/modeling/networks/span_labeling.py +++ b/official/nlp/modeling/networks/span_labeling.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,30 +11,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Span labeling network.""" # pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - +import collections import tensorflow as tf +def _apply_paragraph_mask(logits, paragraph_mask): + """Applies a position mask to calculated logits.""" + masked_logits = logits * (paragraph_mask) - 1e30 * (1 - paragraph_mask) + return tf.nn.log_softmax(masked_logits, -1), masked_logits + + @tf.keras.utils.register_keras_serializable(package='Text') class SpanLabeling(tf.keras.Model): """Span labeling network head for BERT modeling. This network implements a simple single-span labeler based on a dense layer. + *Note* that the network is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). - Arguments: + Args: input_width: The innermost dimension of the input tensor to this network. activation: The activation, if any, for the dense layer in this network. - initializer: The intializer for the dense layer in this network. Defaults to - a Glorot uniform initializer. - output: The output style for this network. Can be either 'logits' or - 'predictions'. + initializer: The initializer for the dense layer in this network. Defaults + to a Glorot uniform initializer. + output: The output style for this network. Can be either `logits` or + `predictions`. """ def __init__(self, @@ -43,13 +47,6 @@ class SpanLabeling(tf.keras.Model): initializer='glorot_uniform', output='logits', **kwargs): - self._self_setattr_tracking = False - self._config = { - 'input_width': input_width, - 'activation': activation, - 'initializer': initializer, - 'output': output, - } sequence_data = tf.keras.layers.Input( shape=(None, input_width), name='sequence_data', dtype=tf.float32) @@ -60,16 +57,14 @@ class SpanLabeling(tf.keras.Model): kernel_initializer=initializer, name='predictions/transform/logits')( sequence_data) - self.start_logits, self.end_logits = ( - tf.keras.layers.Lambda(self._split_output_tensor)(intermediate_logits)) + start_logits, end_logits = self._split_output_tensor(intermediate_logits) start_predictions = tf.keras.layers.Activation(tf.nn.log_softmax)( - self.start_logits) - end_predictions = tf.keras.layers.Activation(tf.nn.log_softmax)( - self.end_logits) + start_logits) + end_predictions = tf.keras.layers.Activation(tf.nn.log_softmax)(end_logits) if output == 'logits': - output_tensors = [self.start_logits, self.end_logits] + output_tensors = [start_logits, end_logits] elif output == 'predictions': output_tensors = [start_predictions, end_predictions] else: @@ -77,13 +72,264 @@ class SpanLabeling(tf.keras.Model): ('Unknown `output` value "%s". `output` can be either "logits" or ' '"predictions"') % output) + # b/164516224 + # Once we've created the network using the Functional API, we call + # super().__init__ as though we were invoking the Functional API Model + # constructor, resulting in this object having all the properties of a model + # created using the Functional API. Once super().__init__ is called, we + # can assign attributes to `self` - note that all `self` assignments are + # below this line. super(SpanLabeling, self).__init__( inputs=[sequence_data], outputs=output_tensors, **kwargs) + config_dict = { + 'input_width': input_width, + 'activation': activation, + 'initializer': initializer, + 'output': output, + } + # We are storing the config dict as a namedtuple here to ensure checkpoint + # compatibility with an earlier version of this model which did not track + # the config dict attribute. TF does not track immutable attrs which + # do not contain Trackables, so by creating a config namedtuple instead of + # a dict we avoid tracking it. + config_cls = collections.namedtuple('Config', config_dict.keys()) + self._config = config_cls(**config_dict) + self.start_logits = start_logits + self.end_logits = end_logits def _split_output_tensor(self, tensor): transposed_tensor = tf.transpose(tensor, [2, 0, 1]) return tf.unstack(transposed_tensor) + def get_config(self): + return dict(self._config._asdict()) + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + +class XLNetSpanLabeling(tf.keras.layers.Layer): + """Span labeling network head for XLNet on SQuAD2.0. + + This networks implements a span-labeler based on dense layers and question + possibility classification. This is the complex version seen in the original + XLNet implementation. + + This applies a dense layer to the input sequence data to predict the start + positions, and then uses either the true start positions (if training) or + beam search to predict the end positions. + + **Note: `compute_with_beam_search` will not work with the Functional API + (https://www.tensorflow.org/guide/keras/functional). + + Args: + input_width: The innermost dimension of the input tensor to this network. + start_n_top: Beam size for span start. + end_n_top: Beam size for span end. + activation: The activation, if any, for the dense layer in this network. + dropout_rate: The dropout rate used for answer classification. + initializer: The initializer for the dense layer in this network. Defaults + to a Glorot uniform initializer. + """ + + def __init__(self, + input_width, + start_n_top=5, + end_n_top=5, + activation='tanh', + dropout_rate=0., + initializer='glorot_uniform', + **kwargs): + super().__init__(**kwargs) + self._config = { + 'input_width': input_width, + 'activation': activation, + 'initializer': initializer, + 'start_n_top': start_n_top, + 'end_n_top': end_n_top, + 'dropout_rate': dropout_rate, + } + if start_n_top <= 1: + raise ValueError('`start_n_top` must be greater than 1.') + self._start_n_top = start_n_top + self._end_n_top = end_n_top + self.start_logits_dense = tf.keras.layers.Dense( + units=1, + kernel_initializer=initializer, + name='predictions/transform/start_logits') + + self.end_logits_inner_dense = tf.keras.layers.Dense( + units=input_width, + kernel_initializer=initializer, + activation=activation, + name='predictions/transform/end_logits/inner') + self.end_logits_layer_norm = tf.keras.layers.LayerNormalization( + axis=-1, epsilon=1e-12, + name='predictions/transform/end_logits/layernorm') + self.end_logits_output_dense = tf.keras.layers.Dense( + units=1, + kernel_initializer=initializer, + name='predictions/transform/end_logits/output') + + self.answer_logits_inner = tf.keras.layers.Dense( + units=input_width, + kernel_initializer=initializer, + activation=activation, + name='predictions/transform/answer_logits/inner') + self.answer_logits_dropout = tf.keras.layers.Dropout(rate=dropout_rate) + self.answer_logits_output = tf.keras.layers.Dense( + units=1, + kernel_initializer=initializer, + use_bias=False, + name='predictions/transform/answer_logits/output') + + def end_logits(self, inputs): + """Computes the end logits. + + Input shapes into the inner, layer norm, output layers should match. + + During training, inputs shape should be + [batch_size, seq_length, input_width]. + + During inference, input shapes should be + [batch_size, seq_length, start_n_top, input_width]. + + Args: + inputs: The input for end logits. + + Returns: + Calculated end logits. + + """ + if len(tf.shape(inputs)) == 3: + # inputs: [B, S, H] -> [B, S, 1, H] + inputs = tf.expand_dims(inputs, axis=2) + + end_logits = self.end_logits_inner_dense(inputs) + end_logits = self.end_logits_layer_norm(end_logits) + end_logits = self.end_logits_output_dense(end_logits) + end_logits = tf.squeeze(end_logits) + return end_logits + + def call(self, + sequence_data, + class_index, + paragraph_mask=None, + start_positions=None, + training=False): + """Implements call(). + + Einsum glossary: + - b: the batch size. + - l: the sequence length. + - h: the hidden size, or input width. + - k: the start/end top n. + + Args: + sequence_data: The input sequence data of shape + `(batch_size, seq_length, input_width)`. + class_index: The class indices of the inputs of shape `(batch_size,)`. + paragraph_mask: Invalid position mask such as query and special symbols + (e.g. PAD, SEP, CLS) of shape `(batch_size,)`. + start_positions: The start positions of each example of shape + `(batch_size,)`. + training: Whether or not this is the training phase. + + Returns: + A dictionary with the keys `start_predictions`, `end_predictions`, + `start_logits`, `end_logits`. + + If inference, then `start_top_predictions`, `start_top_index`, + `end_top_predictions`, `end_top_index` are also included. + + """ + paragraph_mask = tf.cast(paragraph_mask, dtype=sequence_data.dtype) + class_index = tf.reshape(class_index, [-1]) + + seq_length = tf.shape(sequence_data)[1] + start_logits = self.start_logits_dense(sequence_data) + start_logits = tf.squeeze(start_logits, -1) + start_predictions, masked_start_logits = _apply_paragraph_mask( + start_logits, paragraph_mask) + + compute_with_beam_search = not training or start_positions is None + + if compute_with_beam_search: + # Compute end logits using beam search. + start_top_predictions, start_top_index = tf.nn.top_k( + start_predictions, k=self._start_n_top) + start_index = tf.one_hot( + start_top_index, depth=seq_length, axis=-1, dtype=tf.float32) + # start_index: [batch_size, end_n_top, seq_length] + + start_features = tf.einsum('blh,bkl->bkh', sequence_data, start_index) + start_features = tf.tile(start_features[:, None, :, :], + [1, seq_length, 1, 1]) + # start_features: [batch_size, seq_length, end_n_top, input_width] + + end_input = tf.tile(sequence_data[:, :, None], + [1, 1, self._start_n_top, 1]) + end_input = tf.concat([end_input, start_features], axis=-1) + # end_input: [batch_size, seq_length, end_n_top, 2*input_width] + paragraph_mask = paragraph_mask[:, None, :] + end_logits = self.end_logits(end_input) + + # Note: this will fail if start_n_top is not >= 1. + end_logits = tf.transpose(end_logits, [0, 2, 1]) + else: + start_positions = tf.reshape(start_positions, [-1]) + start_index = tf.one_hot( + start_positions, depth=seq_length, axis=-1, dtype=tf.float32) + # start_index: [batch_size, seq_length] + + start_features = tf.einsum('blh,bl->bh', sequence_data, start_index) + start_features = tf.tile(start_features[:, None, :], [1, seq_length, 1]) + # start_features: [batch_size, seq_length, input_width] + + end_input = tf.concat([sequence_data, start_features], + axis=-1) + # end_input: [batch_size, seq_length, 2*input_width] + end_logits = self.end_logits(end_input) + end_predictions, masked_end_logits = _apply_paragraph_mask( + end_logits, paragraph_mask) + + output_dict = dict( + start_predictions=start_predictions, + end_predictions=end_predictions, + start_logits=masked_start_logits, + end_logits=masked_end_logits) + + if not training: + end_top_predictions, end_top_index = tf.nn.top_k( + end_predictions, k=self._end_n_top) + end_top_predictions = tf.reshape( + end_top_predictions, + [-1, self._start_n_top * self._end_n_top]) + end_top_index = tf.reshape( + end_top_index, + [-1, self._start_n_top * self._end_n_top]) + output_dict['start_top_predictions'] = start_top_predictions + output_dict['start_top_index'] = start_top_index + output_dict['end_top_predictions'] = end_top_predictions + output_dict['end_top_index'] = end_top_index + + # get the representation of CLS + class_index = tf.one_hot(class_index, seq_length, axis=-1, dtype=tf.float32) + class_feature = tf.einsum('blh,bl->bh', sequence_data, class_index) + + # get the representation of START + start_p = tf.nn.softmax(masked_start_logits, axis=-1) + start_feature = tf.einsum('blh,bl->bh', sequence_data, start_p) + + answer_feature = tf.concat([start_feature, class_feature], -1) + answer_feature = self.answer_logits_inner(answer_feature) + answer_feature = self.answer_logits_dropout(answer_feature) + class_logits = self.answer_logits_output(answer_feature) + class_logits = tf.squeeze(class_logits, -1) + output_dict['class_logits'] = class_logits + return output_dict + def get_config(self): return self._config diff --git a/official/nlp/modeling/networks/span_labeling_test.py b/official/nlp/modeling/networks/span_labeling_test.py index 8533a77b7830c1abe921fa93cd4e0cd7e8229475..45084520e0cccdb21d6e1aae146a8cb3e2fe9f99 100644 --- a/official/nlp/modeling/networks/span_labeling_test.py +++ b/official/nlp/modeling/networks/span_labeling_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for span_labeling network.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for span_labeling network.""" import numpy as np import tensorflow as tf @@ -170,5 +165,144 @@ class SpanLabelingTest(keras_parameterized.TestCase): _ = span_labeling.SpanLabeling(input_width=10, output='bad') +@keras_parameterized.run_all_keras_modes +class XLNetSpanLabelingTest(keras_parameterized.TestCase): + + def test_basic_invocation_train(self): + batch_size = 2 + seq_length = 8 + hidden_size = 4 + sequence_data = np.random.uniform( + size=(batch_size, seq_length, hidden_size)).astype('float32') + paragraph_mask = np.random.uniform( + size=(batch_size, seq_length)).astype('float32') + class_index = np.random.uniform(size=(batch_size)).astype('uint8') + start_positions = np.zeros(shape=(batch_size)).astype('uint8') + + layer = span_labeling.XLNetSpanLabeling( + input_width=hidden_size, + start_n_top=2, + end_n_top=2, + activation='tanh', + dropout_rate=0., + initializer='glorot_uniform') + output = layer(sequence_data=sequence_data, + class_index=class_index, + paragraph_mask=paragraph_mask, + start_positions=start_positions, + training=True) + + expected_keys = { + 'start_logits', 'end_logits', 'class_logits', 'start_predictions', + 'end_predictions', + } + self.assertSetEqual(expected_keys, set(output.keys())) + + def test_basic_invocation_beam_search(self): + batch_size = 2 + seq_length = 8 + hidden_size = 4 + top_n = 5 + sequence_data = np.random.uniform( + size=(batch_size, seq_length, hidden_size)).astype('float32') + paragraph_mask = np.random.uniform( + size=(batch_size, seq_length)).astype('float32') + class_index = np.random.uniform(size=(batch_size)).astype('uint8') + + layer = span_labeling.XLNetSpanLabeling( + input_width=hidden_size, + start_n_top=top_n, + end_n_top=top_n, + activation='tanh', + dropout_rate=0., + initializer='glorot_uniform') + output = layer(sequence_data=sequence_data, + class_index=class_index, + paragraph_mask=paragraph_mask, + training=False) + expected_keys = { + 'start_top_predictions', 'end_top_predictions', 'class_logits', + 'start_top_index', 'end_top_index', 'start_logits', + 'end_logits', 'start_predictions', 'end_predictions' + } + self.assertSetEqual(expected_keys, set(output.keys())) + + def test_subclass_invocation(self): + """Tests basic invocation of this layer wrapped in a subclass.""" + seq_length = 8 + hidden_size = 4 + batch_size = 2 + + sequence_data = tf.keras.Input(shape=(seq_length, hidden_size), + dtype=tf.float32) + class_index = tf.keras.Input(shape=(), dtype=tf.uint8) + paragraph_mask = tf.keras.Input(shape=(seq_length), dtype=tf.float32) + start_positions = tf.keras.Input(shape=(), dtype=tf.int32) + + layer = span_labeling.XLNetSpanLabeling( + input_width=hidden_size, + start_n_top=5, + end_n_top=5, + activation='tanh', + dropout_rate=0., + initializer='glorot_uniform') + + output = layer(sequence_data=sequence_data, + class_index=class_index, + paragraph_mask=paragraph_mask, + start_positions=start_positions) + model = tf.keras.Model( + inputs={ + 'sequence_data': sequence_data, + 'class_index': class_index, + 'paragraph_mask': paragraph_mask, + 'start_positions': start_positions, + }, + outputs=output) + + sequence_data = tf.random.uniform( + shape=(batch_size, seq_length, hidden_size), dtype=tf.float32) + paragraph_mask = tf.random.uniform( + shape=(batch_size, seq_length), dtype=tf.float32) + class_index = tf.ones(shape=(batch_size,), dtype=tf.uint8) + start_positions = tf.random.uniform( + shape=(batch_size,), maxval=5, dtype=tf.int32) + + inputs = dict(sequence_data=sequence_data, + paragraph_mask=paragraph_mask, + class_index=class_index, + start_positions=start_positions) + + output = model(inputs) + self.assertIsInstance(output, dict) + + # Test `call` without training flag. + output = model(inputs, training=False) + self.assertIsInstance(output, dict) + + # Test `call` with training flag. + # Note: this fails due to incompatibility with the functional API. + with self.assertRaisesRegexp(AssertionError, + 'Could not compute output KerasTensor'): + model(inputs, training=True) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + network = span_labeling.XLNetSpanLabeling( + input_width=128, + start_n_top=5, + end_n_top=1, + activation='tanh', + dropout_rate=0.34, + initializer='zeros') + + # Create another network object from the first object's config. + new_network = span_labeling.XLNetSpanLabeling.from_config( + network.get_config()) + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + if __name__ == '__main__': tf.test.main() diff --git a/official/nlp/modeling/networks/token_classification.py b/official/nlp/modeling/networks/token_classification.py deleted file mode 100644 index ff6163481e6f267a5aefac352ff38447a275a13a..0000000000000000000000000000000000000000 --- a/official/nlp/modeling/networks/token_classification.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Classification network.""" -# pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import tensorflow as tf - - -@tf.keras.utils.register_keras_serializable(package='Text') -class TokenClassification(tf.keras.Model): - """TokenClassification network head for BERT modeling. - - This network implements a simple token classifier head based on a dense layer. - - Arguments: - input_width: The innermost dimension of the input tensor to this network. - num_classes: The number of classes that this network should classify to. - activation: The activation, if any, for the dense layer in this network. - initializer: The intializer for the dense layer in this network. Defaults to - a Glorot uniform initializer. - output: The output style for this network. Can be either 'logits' or - 'predictions'. - """ - - def __init__(self, - input_width, - num_classes, - initializer='glorot_uniform', - output='logits', - **kwargs): - self._self_setattr_tracking = False - self._config_dict = { - 'input_width': input_width, - 'num_classes': num_classes, - 'initializer': initializer, - 'output': output, - } - - sequence_data = tf.keras.layers.Input( - shape=(None, input_width), name='sequence_data', dtype=tf.float32) - - self.logits = tf.keras.layers.Dense( - num_classes, - activation=None, - kernel_initializer=initializer, - name='predictions/transform/logits')( - sequence_data) - predictions = tf.keras.layers.Activation(tf.nn.log_softmax)(self.logits) - - if output == 'logits': - output_tensors = self.logits - elif output == 'predictions': - output_tensors = predictions - else: - raise ValueError( - ('Unknown `output` value "%s". `output` can be either "logits" or ' - '"predictions"') % output) - - super(TokenClassification, self).__init__( - inputs=[sequence_data], outputs=output_tensors, **kwargs) - - def get_config(self): - return self._config_dict - - @classmethod - def from_config(cls, config, custom_objects=None): - return cls(**config) diff --git a/official/nlp/modeling/networks/token_classification_test.py b/official/nlp/modeling/networks/token_classification_test.py deleted file mode 100644 index eb695c7845b125a5f34d82ff38218ca2dccdfe54..0000000000000000000000000000000000000000 --- a/official/nlp/modeling/networks/token_classification_test.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for token classification network.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.nlp.modeling.networks import token_classification - - -# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It -# guarantees forward compatibility of this code for the V2 switchover. -@keras_parameterized.run_all_keras_modes -class TokenClassificationTest(keras_parameterized.TestCase): - - def test_network_creation(self): - """Validate that the Keras object can be created.""" - sequence_length = 5 - input_width = 512 - num_classes = 10 - test_object = token_classification.TokenClassification( - input_width=input_width, num_classes=num_classes) - # Create a 3-dimensional input (the first dimension is implicit). - sequence_data = tf.keras.Input(shape=(sequence_length, input_width), - dtype=tf.float32) - output = test_object(sequence_data) - - # Validate that the outputs are of the expected shape. - expected_output_shape = [None, sequence_length, num_classes] - self.assertEqual(expected_output_shape, output.shape.as_list()) - - def test_network_invocation(self): - """Validate that the Keras object can be invoked.""" - sequence_length = 5 - input_width = 512 - num_classes = 10 - test_object = token_classification.TokenClassification( - input_width=input_width, num_classes=num_classes, output='predictions') - # Create a 3-dimensional input (the first dimension is implicit). - sequence_data = tf.keras.Input(shape=(sequence_length, input_width), - dtype=tf.float32) - output = test_object(sequence_data) - - # Invoke the network as part of a Model. - model = tf.keras.Model(sequence_data, output) - input_data = 10 * np.random.random_sample((3, sequence_length, input_width)) - _ = model.predict(input_data) - - def test_network_invocation_with_internal_logits(self): - """Validate that the logit outputs are correct.""" - sequence_length = 5 - input_width = 512 - num_classes = 10 - test_object = token_classification.TokenClassification( - input_width=input_width, num_classes=num_classes, output='predictions') - - # Create a 3-dimensional input (the first dimension is implicit). - sequence_data = tf.keras.Input(shape=(sequence_length, input_width), - dtype=tf.float32) - output = test_object(sequence_data) - model = tf.keras.Model(sequence_data, output) - logits_model = tf.keras.Model(test_object.inputs, test_object.logits) - - batch_size = 3 - input_data = 10 * np.random.random_sample( - (batch_size, sequence_length, input_width)) - outputs = model.predict(input_data) - logits = logits_model.predict(input_data) - - # Ensure that the tensor shapes are correct. - expected_output_shape = (batch_size, sequence_length, num_classes) - self.assertEqual(expected_output_shape, outputs.shape) - self.assertEqual(expected_output_shape, logits.shape) - - # Ensure that the logits, when softmaxed, create the outputs. - input_tensor = tf.keras.Input(expected_output_shape[1:]) - output_tensor = tf.keras.layers.Activation(tf.nn.log_softmax)(input_tensor) - softmax_model = tf.keras.Model(input_tensor, output_tensor) - - calculated_softmax = softmax_model.predict(logits) - self.assertAllClose(outputs, calculated_softmax) - - def test_network_invocation_with_internal_and_external_logits(self): - """Validate that the logit outputs are correct.""" - sequence_length = 5 - input_width = 512 - num_classes = 10 - test_object = token_classification.TokenClassification( - input_width=input_width, num_classes=num_classes, output='logits') - - # Create a 3-dimensional input (the first dimension is implicit). - sequence_data = tf.keras.Input(shape=(sequence_length, input_width), - dtype=tf.float32) - output = test_object(sequence_data) - model = tf.keras.Model(sequence_data, output) - logits_model = tf.keras.Model(test_object.inputs, test_object.logits) - - batch_size = 3 - input_data = 10 * np.random.random_sample( - (batch_size, sequence_length, input_width)) - outputs = model.predict(input_data) - logits = logits_model.predict(input_data) - - # Ensure that the tensor shapes are correct. - expected_output_shape = (batch_size, sequence_length, num_classes) - self.assertEqual(expected_output_shape, outputs.shape) - self.assertEqual(expected_output_shape, logits.shape) - - self.assertAllClose(outputs, logits) - - def test_network_invocation_with_logit_output(self): - """Validate that the logit outputs are correct.""" - sequence_length = 5 - input_width = 512 - num_classes = 10 - test_object = token_classification.TokenClassification( - input_width=input_width, num_classes=num_classes, output='predictions') - logit_object = token_classification.TokenClassification( - input_width=input_width, num_classes=num_classes, output='logits') - logit_object.set_weights(test_object.get_weights()) - - # Create a 3-dimensional input (the first dimension is implicit). - sequence_data = tf.keras.Input(shape=(sequence_length, input_width), - dtype=tf.float32) - output = test_object(sequence_data) - logit_output = logit_object(sequence_data) - - model = tf.keras.Model(sequence_data, output) - logits_model = tf.keras.Model(sequence_data, logit_output) - - batch_size = 3 - input_data = 10 * np.random.random_sample( - (batch_size, sequence_length, input_width)) - outputs = model.predict(input_data) - logits = logits_model.predict(input_data) - - # Ensure that the tensor shapes are correct. - expected_output_shape = (batch_size, sequence_length, num_classes) - self.assertEqual(expected_output_shape, outputs.shape) - self.assertEqual(expected_output_shape, logits.shape) - - # Ensure that the logits, when softmaxed, create the outputs. - input_tensor = tf.keras.Input(expected_output_shape[1:]) - output_tensor = tf.keras.layers.Activation(tf.nn.log_softmax)(input_tensor) - softmax_model = tf.keras.Model(input_tensor, output_tensor) - - calculated_softmax = softmax_model.predict(logits) - self.assertAllClose(outputs, calculated_softmax) - - def test_serialize_deserialize(self): - # Create a network object that sets all of its config options. - network = token_classification.TokenClassification( - input_width=128, - num_classes=10, - initializer='zeros', - output='predictions') - - # Create another network object from the first object's config. - new_network = token_classification.TokenClassification.from_config( - network.get_config()) - - # Validate that the config can be forced to JSON. - _ = new_network.to_json() - - # If the serialization was successful, the new config should match the old. - self.assertAllEqual(network.get_config(), new_network.get_config()) - - def test_unknown_output_type_fails(self): - with self.assertRaisesRegex(ValueError, 'Unknown `output` value "bad".*'): - _ = token_classification.TokenClassification( - input_width=128, num_classes=10, output='bad') - - -if __name__ == '__main__': - tf.test.main() diff --git a/official/nlp/modeling/networks/transformer_encoder.py b/official/nlp/modeling/networks/transformer_encoder.py deleted file mode 100644 index 7c6054ddcc242d5184c6e0e4dcd5102e6955b915..0000000000000000000000000000000000000000 --- a/official/nlp/modeling/networks/transformer_encoder.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Transformer-based text encoder network.""" -# pylint: disable=g-classes-have-attributes -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import tensorflow as tf - -from official.modeling import activations -from official.nlp.modeling import layers - - -@tf.keras.utils.register_keras_serializable(package='Text') -class TransformerEncoder(tf.keras.Model): - """Bi-directional Transformer-based encoder network. - - This network implements a bi-directional Transformer-based encoder as - described in "BERT: Pre-training of Deep Bidirectional Transformers for - Language Understanding" (https://arxiv.org/abs/1810.04805). It includes the - embedding lookups and transformer layers, but not the masked language model - or classification task networks. - - The default values for this object are taken from the BERT-Base implementation - in "BERT: Pre-training of Deep Bidirectional Transformers for Language - Understanding". - - Arguments: - vocab_size: The size of the token vocabulary. - hidden_size: The size of the transformer hidden layers. - num_layers: The number of transformer layers. - num_attention_heads: The number of attention heads for each transformer. The - hidden size must be divisible by the number of attention heads. - sequence_length: The sequence length that this encoder expects. If None, the - sequence length is dynamic; if an integer, the encoder will require - sequences padded to this length. - max_sequence_length: The maximum sequence length that this encoder can - consume. If None, max_sequence_length uses the value from sequence length. - This determines the variable shape for positional embeddings. - type_vocab_size: The number of types that the 'type_ids' input can take. - intermediate_size: The intermediate size for the transformer layers. - activation: The activation to use for the transformer layers. - dropout_rate: The dropout rate to use for the transformer layers. - attention_dropout_rate: The dropout rate to use for the attention layers - within the transformer layers. - initializer: The initialzer to use for all weights in this encoder. - return_all_encoder_outputs: Whether to output sequence embedding outputs of - all encoder transformer layers. - output_range: The sequence output range, [0, output_range), by slicing the - target sequence of the last transformer layer. `None` means the entire - target sequence will attend to the source sequence, which yeilds the full - output. - embedding_width: The width of the word embeddings. If the embedding width - is not equal to hidden size, embedding parameters will be factorized into - two matrices in the shape of ['vocab_size', 'embedding_width'] and - ['embedding_width', 'hidden_size'] ('embedding_width' is usually much - smaller than 'hidden_size'). - embedding_layer: The word embedding layer. `None` means we will create a new - embedding layer. Otherwise, we will reuse the given embedding layer. This - parameter is originally added for ELECTRA model which needs to tie the - generator embeddings with the discriminator embeddings. - """ - - def __init__(self, - vocab_size, - hidden_size=768, - num_layers=12, - num_attention_heads=12, - sequence_length=512, - max_sequence_length=None, - type_vocab_size=16, - intermediate_size=3072, - activation=activations.gelu, - dropout_rate=0.1, - attention_dropout_rate=0.1, - initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), - return_all_encoder_outputs=False, - output_range=None, - embedding_width=None, - embedding_layer=None, - **kwargs): - activation = tf.keras.activations.get(activation) - initializer = tf.keras.initializers.get(initializer) - - if not max_sequence_length: - max_sequence_length = sequence_length - self._self_setattr_tracking = False - self._config_dict = { - 'vocab_size': vocab_size, - 'hidden_size': hidden_size, - 'num_layers': num_layers, - 'num_attention_heads': num_attention_heads, - 'sequence_length': sequence_length, - 'max_sequence_length': max_sequence_length, - 'type_vocab_size': type_vocab_size, - 'intermediate_size': intermediate_size, - 'activation': tf.keras.activations.serialize(activation), - 'dropout_rate': dropout_rate, - 'attention_dropout_rate': attention_dropout_rate, - 'initializer': tf.keras.initializers.serialize(initializer), - 'return_all_encoder_outputs': return_all_encoder_outputs, - 'output_range': output_range, - 'embedding_width': embedding_width, - } - - word_ids = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name='input_word_ids') - mask = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name='input_mask') - type_ids = tf.keras.layers.Input( - shape=(sequence_length,), dtype=tf.int32, name='input_type_ids') - - if embedding_width is None: - embedding_width = hidden_size - if embedding_layer is None: - self._embedding_layer = layers.OnDeviceEmbedding( - vocab_size=vocab_size, - embedding_width=embedding_width, - initializer=initializer, - name='word_embeddings') - else: - self._embedding_layer = embedding_layer - word_embeddings = self._embedding_layer(word_ids) - - # Always uses dynamic slicing for simplicity. - self._position_embedding_layer = layers.PositionEmbedding( - initializer=initializer, - use_dynamic_slicing=True, - max_sequence_length=max_sequence_length, - name='position_embedding') - position_embeddings = self._position_embedding_layer(word_embeddings) - self._type_embedding_layer = layers.OnDeviceEmbedding( - vocab_size=type_vocab_size, - embedding_width=embedding_width, - initializer=initializer, - use_one_hot=True, - name='type_embeddings') - type_embeddings = self._type_embedding_layer(type_ids) - - embeddings = tf.keras.layers.Add()( - [word_embeddings, position_embeddings, type_embeddings]) - - embeddings = ( - tf.keras.layers.LayerNormalization( - name='embeddings/layer_norm', - axis=-1, - epsilon=1e-12, - dtype=tf.float32)(embeddings)) - embeddings = ( - tf.keras.layers.Dropout(rate=dropout_rate)(embeddings)) - - # We project the 'embedding' output to 'hidden_size' if it is not already - # 'hidden_size'. - if embedding_width != hidden_size: - self._embedding_projection = tf.keras.layers.experimental.EinsumDense( - '...x,xy->...y', - output_shape=hidden_size, - bias_axes='y', - kernel_initializer=initializer, - name='embedding_projection') - embeddings = self._embedding_projection(embeddings) - - self._transformer_layers = [] - data = embeddings - attention_mask = layers.SelfAttentionMask()([data, mask]) - encoder_outputs = [] - for i in range(num_layers): - if i == num_layers - 1 and output_range is not None: - transformer_output_range = output_range - else: - transformer_output_range = None - layer = layers.Transformer( - num_attention_heads=num_attention_heads, - intermediate_size=intermediate_size, - intermediate_activation=activation, - dropout_rate=dropout_rate, - attention_dropout_rate=attention_dropout_rate, - output_range=transformer_output_range, - kernel_initializer=initializer, - name='transformer/layer_%d' % i) - self._transformer_layers.append(layer) - data = layer([data, attention_mask]) - encoder_outputs.append(data) - - first_token_tensor = ( - tf.keras.layers.Lambda(lambda x: tf.squeeze(x[:, 0:1, :], axis=1))( - encoder_outputs[-1])) - self._pooler_layer = tf.keras.layers.Dense( - units=hidden_size, - activation='tanh', - kernel_initializer=initializer, - name='pooler_transform') - cls_output = self._pooler_layer(first_token_tensor) - - if return_all_encoder_outputs: - outputs = [encoder_outputs, cls_output] - else: - outputs = [encoder_outputs[-1], cls_output] - - super(TransformerEncoder, self).__init__( - inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs) - - def get_embedding_table(self): - return self._embedding_layer.embeddings - - def get_embedding_layer(self): - return self._embedding_layer - - def get_config(self): - return self._config_dict - - @property - def transformer_layers(self): - """List of Transformer layers in the encoder.""" - return self._transformer_layers - - @property - def pooler_layer(self): - """The pooler dense layer after the transformer layers.""" - return self._pooler_layer - - @classmethod - def from_config(cls, config, custom_objects=None): - return cls(**config) diff --git a/official/nlp/modeling/networks/transformer_encoder_test.py b/official/nlp/modeling/networks/transformer_encoder_test.py deleted file mode 100644 index e9fbc3aaa25e39908618626538902643edaabe72..0000000000000000000000000000000000000000 --- a/official/nlp/modeling/networks/transformer_encoder_test.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for transformer-based text encoder network.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import -from official.nlp.modeling.networks import transformer_encoder - - -# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It -# guarantees forward compatibility of this code for the V2 switchover. -@keras_parameterized.run_all_keras_modes -class TransformerEncoderTest(keras_parameterized.TestCase): - - def tearDown(self): - super(TransformerEncoderTest, self).tearDown() - tf.keras.mixed_precision.experimental.set_policy("float32") - - def test_network_creation(self): - hidden_size = 32 - sequence_length = 21 - # Create a small TransformerEncoder for testing. - test_network = transformer_encoder.TransformerEncoder( - vocab_size=100, - hidden_size=hidden_size, - sequence_length=sequence_length, - num_attention_heads=2, - num_layers=3) - # Create the inputs (note that the first dimension is implicit). - word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - data, pooled = test_network([word_ids, mask, type_ids]) - - self.assertIsInstance(test_network.transformer_layers, list) - self.assertLen(test_network.transformer_layers, 3) - self.assertIsInstance(test_network.pooler_layer, tf.keras.layers.Dense) - - expected_data_shape = [None, sequence_length, hidden_size] - expected_pooled_shape = [None, hidden_size] - self.assertAllEqual(expected_data_shape, data.shape.as_list()) - self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) - - # The default output dtype is float32. - self.assertAllEqual(tf.float32, data.dtype) - self.assertAllEqual(tf.float32, pooled.dtype) - - def test_all_encoder_outputs_network_creation(self): - hidden_size = 32 - sequence_length = 21 - # Create a small TransformerEncoder for testing. - test_network = transformer_encoder.TransformerEncoder( - vocab_size=100, - hidden_size=hidden_size, - sequence_length=sequence_length, - num_attention_heads=2, - num_layers=3, - return_all_encoder_outputs=True) - # Create the inputs (note that the first dimension is implicit). - word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - all_encoder_outputs, pooled = test_network([word_ids, mask, type_ids]) - - expected_data_shape = [None, sequence_length, hidden_size] - expected_pooled_shape = [None, hidden_size] - self.assertLen(all_encoder_outputs, 3) - for data in all_encoder_outputs: - self.assertAllEqual(expected_data_shape, data.shape.as_list()) - self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) - - # The default output dtype is float32. - self.assertAllEqual(tf.float32, all_encoder_outputs[-1].dtype) - self.assertAllEqual(tf.float32, pooled.dtype) - - def test_network_creation_with_float16_dtype(self): - hidden_size = 32 - sequence_length = 21 - tf.keras.mixed_precision.experimental.set_policy("mixed_float16") - # Create a small TransformerEncoder for testing. - test_network = transformer_encoder.TransformerEncoder( - vocab_size=100, - hidden_size=hidden_size, - sequence_length=sequence_length, - num_attention_heads=2, - num_layers=3) - # Create the inputs (note that the first dimension is implicit). - word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - data, pooled = test_network([word_ids, mask, type_ids]) - - expected_data_shape = [None, sequence_length, hidden_size] - expected_pooled_shape = [None, hidden_size] - self.assertAllEqual(expected_data_shape, data.shape.as_list()) - self.assertAllEqual(expected_pooled_shape, pooled.shape.as_list()) - - # If float_dtype is set to float16, the data output is float32 (from a layer - # norm) and pool output should be float16. - self.assertAllEqual(tf.float32, data.dtype) - self.assertAllEqual(tf.float16, pooled.dtype) - - @parameterized.named_parameters( - ("all_sequence", None, 21), - ("output_range", 1, 1), - ) - def test_network_invocation(self, output_range, out_seq_len): - hidden_size = 32 - sequence_length = 21 - vocab_size = 57 - num_types = 7 - # Create a small TransformerEncoder for testing. - test_network = transformer_encoder.TransformerEncoder( - vocab_size=vocab_size, - hidden_size=hidden_size, - sequence_length=sequence_length, - num_attention_heads=2, - num_layers=3, - type_vocab_size=num_types, - output_range=output_range) - self.assertTrue( - test_network._position_embedding_layer._use_dynamic_slicing) - # Create the inputs (note that the first dimension is implicit). - word_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - mask = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - type_ids = tf.keras.Input(shape=(sequence_length,), dtype=tf.int32) - data, pooled = test_network([word_ids, mask, type_ids]) - - # Create a model based off of this network: - model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) - - # Invoke the model. We can't validate the output data here (the model is too - # complex) but this will catch structural runtime errors. - batch_size = 3 - word_id_data = np.random.randint( - vocab_size, size=(batch_size, sequence_length)) - mask_data = np.random.randint(2, size=(batch_size, sequence_length)) - type_id_data = np.random.randint( - num_types, size=(batch_size, sequence_length)) - _ = model.predict([word_id_data, mask_data, type_id_data]) - - # Creates a TransformerEncoder with max_sequence_length != sequence_length - max_sequence_length = 128 - test_network = transformer_encoder.TransformerEncoder( - vocab_size=vocab_size, - hidden_size=hidden_size, - sequence_length=sequence_length, - max_sequence_length=max_sequence_length, - num_attention_heads=2, - num_layers=3, - type_vocab_size=num_types) - self.assertTrue(test_network._position_embedding_layer._use_dynamic_slicing) - model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) - outputs = model.predict([word_id_data, mask_data, type_id_data]) - self.assertEqual(outputs[0].shape[1], out_seq_len) - - # Creates a TransformerEncoder with embedding_width != hidden_size - test_network = transformer_encoder.TransformerEncoder( - vocab_size=vocab_size, - hidden_size=hidden_size, - sequence_length=sequence_length, - max_sequence_length=max_sequence_length, - num_attention_heads=2, - num_layers=3, - type_vocab_size=num_types, - embedding_width=16) - model = tf.keras.Model([word_ids, mask, type_ids], [data, pooled]) - outputs = model.predict([word_id_data, mask_data, type_id_data]) - self.assertEqual(outputs[0].shape[-1], hidden_size) - self.assertTrue(hasattr(test_network, "_embedding_projection")) - - def test_serialize_deserialize(self): - tf.keras.mixed_precision.experimental.set_policy("mixed_float16") - # Create a network object that sets all of its config options. - kwargs = dict( - vocab_size=100, - hidden_size=32, - num_layers=3, - num_attention_heads=2, - sequence_length=21, - max_sequence_length=21, - type_vocab_size=12, - intermediate_size=1223, - activation="relu", - dropout_rate=0.05, - attention_dropout_rate=0.22, - initializer="glorot_uniform", - return_all_encoder_outputs=False, - output_range=-1, - embedding_width=16) - network = transformer_encoder.TransformerEncoder(**kwargs) - - expected_config = dict(kwargs) - expected_config["activation"] = tf.keras.activations.serialize( - tf.keras.activations.get(expected_config["activation"])) - expected_config["initializer"] = tf.keras.initializers.serialize( - tf.keras.initializers.get(expected_config["initializer"])) - self.assertEqual(network.get_config(), expected_config) - - # Create another network object from the first object's config. - new_network = transformer_encoder.TransformerEncoder.from_config( - network.get_config()) - - # Validate that the config can be forced to JSON. - _ = new_network.to_json() - - # If the serialization was successful, the new config should match the old. - self.assertAllEqual(network.get_config(), new_network.get_config()) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/nlp/modeling/networks/xlnet_base.py b/official/nlp/modeling/networks/xlnet_base.py new file mode 100644 index 0000000000000000000000000000000000000000..ce32d3dfdda85cdeec5ef1cad4bf7cfbb8d43787 --- /dev/null +++ b/official/nlp/modeling/networks/xlnet_base.py @@ -0,0 +1,709 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-based XLNet Model.""" + +from absl import logging + +import tensorflow as tf + +from official.nlp.modeling import layers +from official.nlp.modeling.layers import transformer_xl + +_SEG_ID_CLS = 2 + + +def _create_causal_attention_mask( + seq_length, + memory_length, + dtype=tf.float32, + same_length=False): + """Creates a causal attention mask with a single-sided context. + + When applying the attention mask in `MultiHeadRelativeAttention`, the + attention scores are of shape `[(batch dimensions), S, S + M]`, where: + - S = sequence length. + - M = memory length. + + In a simple case where S = 2, M = 1, here is a simple illustration of the + `attention_scores` matrix, where `a` represents an attention function: + + token_0 [[a(token_0, mem_0) a(token_0, token_0) a(token_0, token_1)], + token_1 [a(token_1, mem_0) a(token_1, token_0) a(token_1, token_1)]] + mem_0 token_0 token_1 + + For uni-directional attention, we want to mask out values in the attention + scores that represent a(token_i, token_j) where j > i. We can achieve this by + concatenating 0s (representing memory positions) with a strictly upper + triangular matrix of 1s. + + We then flip the matrix values in order to match the representation where + real values are 1s. + + Args: + seq_length: int, The length of each sequence. + memory_length: int, The length of memory blocks. + dtype: dtype of the mask. + same_length: bool, whether to use the same attention length for each token. + + Returns: + A unidirectional attention mask of shape + `[seq_length, seq_length + memory_length]`. E.g.: + + [[1. 1. 1. 0. 0. 0.] + [1. 1. 1. 1. 0. 0.] + [1. 1. 1. 1. 1. 0.] + [1. 1. 1. 1. 1. 1.]] + """ + ones_matrix = tf.ones([seq_length, seq_length], dtype=dtype) + upper_triangular = tf.linalg.band_part(ones_matrix, 0, -1) + diagonal = tf.linalg.band_part(ones_matrix, 0, 0) + + padding = tf.zeros([seq_length, memory_length], dtype=dtype) + causal_attention_mask = tf.concat( + [padding, upper_triangular - diagonal], 1) + if same_length: + lower_triangular = tf.linalg.band_part(ones_matrix, -1, 0) + strictly_lower_triangular = lower_triangular - diagonal + causal_attention_mask = tf.concat( + [causal_attention_mask[:, :seq_length] + strictly_lower_triangular, + causal_attention_mask[:, seq_length:]], 1) + + return 1 - causal_attention_mask + + +def _combine_masks(mask1, mask2, dtype, how="and"): + """Combines two masks. + + Use "and" if trying to combine two existing masks. + Use "or" if trying to flip a few positions to "real". + + Args: + mask1: tf.Tensor, input mask 1 + mask2: tf.Tensor, input mask 2 + dtype: tf.dtype + how: Which logical operation should run. + + Returns: + The combined input masks. + + """ + if how == "and": + operator = tf.math.logical_and + else: + operator = tf.math.logical_or + return tf.cast(operator( + tf.cast(mask1, tf.bool), + tf.cast(mask2, tf.bool)), dtype=dtype) + + +def _compute_attention_mask( + input_mask, + permutation_mask, + attention_type, + seq_length, + memory_length, + batch_size, + dtype=tf.float32): + """Combines all input attention masks for XLNet. + + In XLNet modeling, `0` represents tokens that can be attended, and `1` + represents tokens that cannot be attended. + + For XLNet pre-training and fine tuning, there are a few masks used: + - Causal attention mask: If the attention type is unidirectional, then all + tokens after the current position cannot be attended to. + - Input mask: when generating data, padding is added to a max sequence length + to make all sequences the same length. This masks out real tokens (`0`) from + padding tokens (`1`). + - Permutation mask: during XLNet pretraining, the input sequence is factorized + into a factorization sequence `z`. During partial prediction, `z` is split + at a cutting point `c` (an index of the factorization sequence) and + prediction is only applied to all tokens after `c`. Therefore, tokens at + factorization positions `i` > `c` can be attended to and tokens at + factorization positions `i` <= `c` cannot be attended to. + + This function broadcasts and combines all attention masks to produce the + query attention mask and the content attention mask. + + Args: + input_mask: Tensor, the input mask related to padding. Input shape: + `(B, S)`. + permutation_mask: Tensor, the permutation mask used in partial prediction. + Input shape: `(B, S, S)`. + attention_type: str, the attention type. Can be "uni" (directional) or + "bi" (directional). + seq_length: int, the length of each sequence. + memory_length: int the length of memory blocks. + batch_size: int, the batch size. + dtype: The dtype of the masks. + + Returns: + attention_mask, content_attention_mask: The position and context-based + attention masks and content attention masks, respectively. + + """ + attention_mask = None + # `1` values mean do not attend to this position. + if attention_type == "uni": + causal_attention_mask = _create_causal_attention_mask( + seq_length=seq_length, + memory_length=memory_length, + dtype=dtype) + causal_attention_mask = causal_attention_mask[None, None, :, :] + # `causal_attention_mask`: [1, 1, S, S + M] + + # input_mask: [B, S] + # permutation_mask: [B, S, S] + if input_mask is not None and permutation_mask is not None: + data_mask = _combine_masks(input_mask[:, None, :], permutation_mask, dtype) + elif input_mask is not None and permutation_mask is None: + data_mask = input_mask[:, None, :] + elif input_mask is None and permutation_mask is not None: + data_mask = permutation_mask + else: + data_mask = None + + # data_mask: [B, S, S] or [B, 1, S] + + if data_mask is not None: + # All positions within state can be attended to. + state_mask = tf.ones([batch_size, tf.shape(data_mask)[1], memory_length], + dtype=dtype) + # state_mask: [B, 1, M] or [B, S, M] + data_mask = tf.concat([state_mask, data_mask], 2) + # data_mask: [B, 1, S + M] or [B, S, S + M] + + if attention_type == "uni": + attention_mask = _combine_masks(causal_attention_mask, + data_mask[:, None, :, :], + dtype=dtype) + else: + attention_mask = data_mask[:, None, :, :] + + if attention_mask is not None: + # Construct the content attention mask. + # This ensures that the mask allows the model to attend to positions in + # content positions (e.g. the content diagonal). + non_target_mask = tf.concat( + [tf.zeros([seq_length, memory_length], dtype=dtype), + tf.eye(seq_length, dtype=dtype)], axis=-1) + content_attention_mask = _combine_masks( + attention_mask, non_target_mask, how="or", dtype=dtype) + else: + content_attention_mask = None + + return attention_mask, content_attention_mask + + +def _compute_segment_matrix( + segment_ids, + memory_length, + batch_size, + use_cls_mask): + """Computes the segment embedding matrix. + + XLNet introduced segment-based attention for attention calculations. This + extends the idea of relative encodings in Transformer XL by considering + whether or not two positions are within the same segment, rather than + which segments they come from. + + This function generates a segment matrix by broadcasting provided segment IDs + in two different dimensions and checking where values are equal. This output + matrix shows `True` whenever two tokens are NOT in the same segment and + `False` whenever they are. + + Args: + segment_ids: A Tensor of size `[B, S]` that represents which segment + each token belongs to. + memory_length: int, the length of memory blocks. + batch_size: int, the batch size. + use_cls_mask: bool, whether or not to introduce cls mask in + input sequences. + + Returns: + A boolean Tensor of size `[B, S, S + M]`, where `True` means that two + tokens are NOT in the same segment, and `False` means they are in the same + segment. + + """ + if segment_ids is None: + return None + + memory_padding = tf.zeros([batch_size, memory_length], + dtype=segment_ids.dtype) + padded_segment_ids = tf.concat([memory_padding, segment_ids], 1) + # segment_ids: [B, S] + # padded_segment_ids: [B, S + M] + + if use_cls_mask: + # `1` indicates not in the same segment. + # Target result: [B, S, S + M] + + # segment_ids: [B, S] + # padded_segment_ids: [B, S + M] + broadcasted_segment_class_indices = ( + tf.equal(segment_ids, + tf.constant([_SEG_ID_CLS]))[:, :, None]) + + broadcasted_padded_class_indices = ( + tf.equal( + padded_segment_ids, + tf.constant([_SEG_ID_CLS]))[:, None, :]) + + class_index_matrix = tf.logical_or(broadcasted_segment_class_indices, + broadcasted_padded_class_indices) + + segment_matrix = tf.equal(segment_ids[:, :, None], + padded_segment_ids[:, None, :]) + segment_matrix = tf.logical_or(class_index_matrix, segment_matrix) + else: + # TODO(allencwang) - address this legacy mismatch from `use_cls_mask`. + segment_matrix = tf.logical_not( + tf.equal(segment_ids[:, :, None], padded_segment_ids[:, None, :])) + return segment_matrix + + +def _compute_positional_encoding( + attention_type, + position_encoding_layer, + hidden_size, + batch_size, + total_length, + seq_length, + clamp_length, + bi_data, + dtype=tf.float32): + """Computes the relative position encoding. + + Args: + attention_type: str, the attention type. Can be "uni" (directional) or + "bi" (directional). + position_encoding_layer: An instance of `RelativePositionEncoding`. + hidden_size: int, the hidden size. + batch_size: int, the batch size. + total_length: int, the sequence length added to the memory length. + seq_length: int, the length of each sequence. + clamp_length: int, clamp all relative distances larger than clamp_length. -1 + means no clamping. + bi_data: bool, whether to use bidirectional input pipeline. Usually set to + True during pretraining and False during finetuning. + dtype: the dtype of the encoding. + + Returns: + A Tensor, representing the position encoding. + + """ + freq_seq = tf.range(0, hidden_size, 2.0) + if dtype is not None and dtype != tf.float32: + freq_seq = tf.cast(freq_seq, dtype=dtype) + + if attention_type == "bi": + beg, end = total_length, -seq_length + elif attention_type == "uni": + beg, end = total_length, -1 + else: + raise ValueError("Unknown `attention_type` {}.".format(attention_type)) + + if bi_data: + forward_position_sequence = tf.range(beg, end, -1.0) + backward_position_sequence = tf.range(-beg, -end, 1.0) + + if dtype is not None and dtype != tf.float32: + forward_position_sequence = tf.cast(forward_position_sequence, + dtype=dtype) + backward_position_sequence = tf.cast(backward_position_sequence, + dtype=dtype) + + if clamp_length > 0: + forward_position_sequence = tf.clip_by_value( + forward_position_sequence, + -clamp_length, + clamp_length) + backward_position_sequence = tf.clip_by_value( + backward_position_sequence, + -clamp_length, + clamp_length) + + if batch_size is not None: + forward_positional_encoding = position_encoding_layer( + forward_position_sequence, batch_size // 2) + backward_positional_encoding = position_encoding_layer( + backward_position_sequence, batch_size // 2) + else: + forward_positional_encoding = position_encoding_layer( + forward_position_sequence, None) + backward_positional_encoding = position_encoding_layer( + backward_position_sequence, None) + + relative_position_encoding = tf.concat( + [forward_positional_encoding, backward_positional_encoding], axis=0) + else: + forward_position_sequence = tf.range(beg, end, -1.0) + if dtype is not None and dtype != tf.float32: + forward_position_sequence = tf.cast( + forward_position_sequence, dtype=dtype) + if clamp_length > 0: + forward_position_sequence = tf.clip_by_value( + forward_position_sequence, + -clamp_length, + clamp_length) + + relative_position_encoding = position_encoding_layer( + forward_position_sequence, batch_size) + return relative_position_encoding + + +class RelativePositionEncoding(tf.keras.layers.Layer): + """Creates a relative positional encoding. + + This layer creates a relative positional encoding as described in + "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" + (https://arxiv.org/abs/1901.02860). + + Rather than an absolute position embedding as in Transformer, this + formulation represents position as the relative distance between tokens using + sinusoidal positional embeddings. + + Note: This layer is currently experimental. + + Attributes: + hidden_size: The dimensionality of the input embeddings. + """ + + def __init__(self, hidden_size, **kwargs): + super(RelativePositionEncoding, self).__init__(**kwargs) + self._hidden_size = hidden_size + self._inv_freq = 1.0 / (10000.0**( + tf.range(0, self._hidden_size, 2.0) / self._hidden_size)) + + def call(self, pos_seq, batch_size=None): + """Implements call() for the layer. + + Args: + pos_seq: A 1-D `Tensor` + batch_size: The optionally provided batch size that tiles the relative + positional encoding. + + Returns: + The relative positional encoding of shape: + [batch_size, len(pos_seq), hidden_size] if batch_size is provided, else + [1, len(pos_seq), hidden_size]. + """ + sinusoid_input = tf.einsum("i,d->id", pos_seq, self._inv_freq) + relative_position_encoding = tf.concat([tf.sin(sinusoid_input), + tf.cos(sinusoid_input)], -1) + relative_position_encoding = relative_position_encoding[None, :, :] + if batch_size is not None: + relative_position_encoding = tf.tile(relative_position_encoding, + [batch_size, 1, 1]) + return relative_position_encoding + + +@tf.keras.utils.register_keras_serializable(package="Text") +class XLNetBase(tf.keras.layers.Layer): + """Base XLNet model. + + Attributes: + vocab_size: int, the number of tokens in vocabulary. + num_layers: int, the number of layers. + hidden_size: int, the hidden size. + num_attention_heads: int, the number of attention heads. + head_size: int, the dimension size of each attention head. + inner_size: int, the hidden size in feed-forward layers. + dropout_rate: float, dropout rate. + attention_dropout_rate: float, dropout rate on attention probabilities. + attention_type: str, "uni" or "bi". + bi_data: bool, whether to use bidirectional input pipeline. Usually set to + True during pretraining and False during finetuning. + initializer: A tf initializer. + two_stream: bool, whether or not to use `TwoStreamRelativeAttention` used + in the XLNet pretrainer. If `False`, then it will use + `MultiHeadRelativeAttention` as in Transformer XL. + tie_attention_biases: bool, whether or not to tie the biases together. + Usually set to `True`. Used for backwards compatibility. + memory_length: int, the number of tokens to cache. + same_length: bool, whether to use the same attention length for each + token. + clamp_length: int, clamp all relative distances larger than clamp_length. -1 + means no clamping. + reuse_length: int, the number of tokens in the currect batch to be cached + and reused in the future. + inner_activation: str, "relu" or "gelu". + use_cls_mask: bool, whether or not cls mask is included in the + input sequences. + embedding_width: The width of the word embeddings. If the embedding width + is not equal to hidden size, embedding parameters will be factorized + into two matrices in the shape of ["vocab_size", "embedding_width"] and + ["embedding_width", "hidden_size"] ("embedding_width" is usually much + smaller than "hidden_size"). + embedding_layer: The word embedding layer. `None` means we will create a + new embedding layer. Otherwise, we will reuse the given embedding layer. + This parameter is originally added for ELECTRA model which needs to tie + the generator embeddings with the discriminator embeddings. + """ + + def __init__(self, + vocab_size, + num_layers, + hidden_size, + num_attention_heads, + head_size, + inner_size, + dropout_rate, + attention_dropout_rate, + attention_type, + bi_data, + initializer, + two_stream=False, + tie_attention_biases=True, + memory_length=None, + clamp_length=-1, + reuse_length=None, + inner_activation="relu", + use_cls_mask=False, + embedding_width=None, + **kwargs): + super(XLNetBase, self).__init__(**kwargs) + + self._vocab_size = vocab_size + self._initializer = initializer + self._attention_type = attention_type + self._num_layers = num_layers + self._hidden_size = hidden_size + self._num_attention_heads = num_attention_heads + self._head_size = head_size + self._inner_size = inner_size + self._inner_activation = inner_activation + self._dropout_rate = dropout_rate + self._attention_dropout_rate = attention_dropout_rate + self._tie_attention_biases = tie_attention_biases + self._two_stream = two_stream + + self._memory_length = memory_length + self._reuse_length = reuse_length + self._bi_data = bi_data + self._clamp_length = clamp_length + self._use_cls_mask = use_cls_mask + + self._segment_embedding = None + self._mask_embedding = None + self._embedding_width = embedding_width + + if embedding_width is None: + embedding_width = hidden_size + + self._embedding_layer = layers.OnDeviceEmbedding( + vocab_size=self._vocab_size, + embedding_width=embedding_width, + initializer=self._initializer, + dtype=tf.float32, + name="word_embedding") + self._dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + + self.embedding_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) + self.position_encoding = RelativePositionEncoding(self._hidden_size) + + self._transformer_xl = transformer_xl.TransformerXL( + vocab_size=vocab_size, + num_layers=num_layers, + hidden_size=hidden_size, + num_attention_heads=num_attention_heads, + head_size=head_size, + inner_size=inner_size, + dropout_rate=dropout_rate, + attention_dropout_rate=attention_dropout_rate, + initializer=initializer, + two_stream=two_stream, + tie_attention_biases=tie_attention_biases, + memory_length=memory_length, + reuse_length=reuse_length, + inner_activation=inner_activation, + name="transformer_xl") + + def get_config(self): + config = { + "vocab_size": + self._vocab_size, + "num_layers": + self._num_layers, + "hidden_size": + self._hidden_size, + "num_attention_heads": + self._num_attention_heads, + "head_size": + self._head_size, + "inner_size": + self._inner_size, + "dropout_rate": + self._dropout_rate, + "attention_dropout_rate": + self._attention_dropout_rate, + "attention_type": + self._attention_type, + "bi_data": + self._bi_data, + "initializer": + self._initializer, + "two_stream": + self._two_stream, + "tie_attention_biases": + self._tie_attention_biases, + "memory_length": + self._memory_length, + "clamp_length": + self._clamp_length, + "reuse_length": + self._reuse_length, + "inner_activation": + self._inner_activation, + "use_cls_mask": + self._use_cls_mask, + "embedding_width": + self._embedding_width, + } + base_config = super(XLNetBase, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def get_embedding_lookup_table(self): + """Returns the embedding layer weights.""" + return self._embedding_layer.embeddings + + def __call__(self, + input_ids, + segment_ids=None, + input_mask=None, + state=None, + permutation_mask=None, + target_mapping=None, + masked_tokens=None, + **kwargs): + # Uses dict to feed inputs into call() in order to keep state as a python + # list. + inputs = { + "input_ids": input_ids, + "segment_ids": segment_ids, + "input_mask": input_mask, + "state": state, + "permutation_mask": permutation_mask, + "target_mapping": target_mapping, + "masked_tokens": masked_tokens + } + return super(XLNetBase, self).__call__(inputs, **kwargs) + + def call(self, inputs): + """Implements call() for the layer.""" + input_ids = inputs["input_ids"] + segment_ids = inputs["segment_ids"] + input_mask = inputs["input_mask"] + state = inputs["state"] + permutation_mask = inputs["permutation_mask"] + target_mapping = inputs["target_mapping"] + masked_tokens = inputs["masked_tokens"] + + batch_size = tf.shape(input_ids)[0] + seq_length = tf.shape(input_ids)[1] + if state is not None: + memory_length = tf.shape(state[0])[1] + else: + memory_length = 0 + total_length = memory_length + seq_length + + if self._two_stream and masked_tokens is None: + raise ValueError("`masked_tokens` must be provided in order to " + "initialize the query stream in " + "`TwoStreamRelativeAttention`.") + if masked_tokens is not None and not self._two_stream: + logging.warning("`masked_tokens` is provided but `two_stream` is not " + "enabled. Please enable `two_stream` to enable two " + "stream attention.") + + if input_mask is not None: + dtype = input_mask.dtype + elif permutation_mask is not None: + dtype = permutation_mask.dtype + else: + dtype = tf.int32 + query_attention_mask, content_attention_mask = _compute_attention_mask( + input_mask=input_mask, + permutation_mask=permutation_mask, + attention_type=self._attention_type, + seq_length=seq_length, + memory_length=memory_length, + batch_size=batch_size, + dtype=dtype) + relative_position_encoding = _compute_positional_encoding( + attention_type=self._attention_type, + position_encoding_layer=self.position_encoding, + hidden_size=self._hidden_size, + batch_size=batch_size, + total_length=total_length, + seq_length=seq_length, + clamp_length=self._clamp_length, + bi_data=self._bi_data, + dtype=tf.float32) + relative_position_encoding = self.embedding_dropout( + relative_position_encoding) + + if segment_ids is None: + segment_embedding = None + segment_matrix = None + else: + if self._segment_embedding is None: + self._segment_embedding = self.add_weight( + "seg_embed", + shape=[self._num_layers, 2, self._num_attention_heads, + self._head_size], + dtype=tf.float32, + initializer=self._initializer) + + segment_embedding = self._segment_embedding + segment_matrix = _compute_segment_matrix( + segment_ids=segment_ids, + memory_length=memory_length, + batch_size=batch_size, + use_cls_mask=self._use_cls_mask) + + word_embeddings = self._embedding_layer(input_ids) + content_stream = self._dropout(word_embeddings) + + if self._two_stream: + if self._mask_embedding is None: + self._mask_embedding = self.add_weight( + "mask_emb/mask_emb", + shape=[1, 1, self._hidden_size], + dtype=tf.float32) + if target_mapping is None: + masked_tokens = masked_tokens[:, :, None] + masked_token_embedding = ( + masked_tokens * self._mask_embedding + + (1 - masked_tokens) * word_embeddings) + else: + masked_token_embedding = tf.tile( + self._mask_embedding, + [batch_size, tf.shape(target_mapping)[1], 1]) + query_stream = self._dropout(masked_token_embedding) + else: + query_stream = None + + return self._transformer_xl( + content_stream=content_stream, + query_stream=query_stream, + target_mapping=target_mapping, + state=state, + relative_position_encoding=relative_position_encoding, + segment_matrix=segment_matrix, + segment_embedding=segment_embedding, + content_attention_mask=content_attention_mask, + query_attention_mask=query_attention_mask) diff --git a/official/nlp/modeling/networks/xlnet_base_test.py b/official/nlp/modeling/networks/xlnet_base_test.py new file mode 100644 index 0000000000000000000000000000000000000000..81db32487325b3b61d47afac6217590491067257 --- /dev/null +++ b/official/nlp/modeling/networks/xlnet_base_test.py @@ -0,0 +1,451 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Keras based XLNet model.""" +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.keras import keras_parameterized # pylint: disable=g-direct-tensorflow-import +from official.nlp.modeling.networks import xlnet_base + + +@keras_parameterized.run_all_keras_modes +class RelativePositionEncodingTest(keras_parameterized.TestCase): + + def test_positional_embedding(self): + """A low-dimensional example is tested. + + With len(pos_seq)=2 and d_model=4: + + pos_seq = [[1.], [0.]] + inv_freq = [1., 0.01] + pos_seq x inv_freq = [[1, 0.01], [0., 0.]] + pos_emb = [[sin(1.), sin(0.01), cos(1.), cos(0.01)], + [sin(0.), sin(0.), cos(0.), cos(0.)]] + = [[0.84147096, 0.00999983, 0.54030228, 0.99994999], + [0., 0., 1., 1.]] + """ + target = np.array([[[0.84147096, 0.00999983, 0.54030228, 0.99994999], + [0., 0., 1., 1.]]]) + hidden_size = 4 + pos_seq = tf.range(1, -1, -1.0) # [1., 0.] + encoding_layer = xlnet_base.RelativePositionEncoding( + hidden_size=hidden_size) + encoding = encoding_layer(pos_seq, batch_size=None).numpy().astype(float) + self.assertAllClose(encoding, target) + + +class ComputePositionEncodingTest(keras_parameterized.TestCase): + + @combinations.generate(combinations.combine( + attention_type=["uni", "bi"], + bi_data=[False, True], + )) + def test_compute_position_encoding_smoke(self, attention_type, bi_data): + hidden_size = 4 + batch_size = 4 + total_length = 8 + seq_length = 4 + position_encoding_layer = xlnet_base.RelativePositionEncoding( + hidden_size=hidden_size) + encoding = xlnet_base._compute_positional_encoding( + attention_type=attention_type, + position_encoding_layer=position_encoding_layer, + hidden_size=hidden_size, + batch_size=batch_size, + total_length=total_length, + seq_length=seq_length, + clamp_length=2, + bi_data=bi_data, + dtype=tf.float32) + self.assertEqual(encoding.shape[0], batch_size) + self.assertEqual(encoding.shape[2], hidden_size) + + +class CausalAttentionMaskTests(tf.test.TestCase): + + def test_casual_attention_mask_with_no_memory(self): + seq_length, memory_length = 3, 0 + causal_attention_mask = xlnet_base._create_causal_attention_mask( + seq_length=seq_length, + memory_length=memory_length) + + expected_output = np.array([[1, 0, 0], + [1, 1, 0], + [1, 1, 1]]) + self.assertAllClose(causal_attention_mask, expected_output) + + def test_casual_attention_mask_with_memory(self): + seq_length, memory_length = 3, 2 + causal_attention_mask = xlnet_base._create_causal_attention_mask( + seq_length=seq_length, + memory_length=memory_length) + + expected_output = np.array([[1, 1, 1, 0, 0], + [1, 1, 1, 1, 0], + [1, 1, 1, 1, 1]]) + self.assertAllClose(causal_attention_mask, expected_output) + + def test_causal_attention_mask_with_same_length(self): + seq_length, memory_length = 3, 2 + causal_attention_mask = xlnet_base._create_causal_attention_mask( + seq_length=seq_length, + memory_length=memory_length, + same_length=True) + + expected_output = np.array([[1, 1, 1, 0, 0], + [0, 1, 1, 1, 0], + [0, 0, 1, 1, 1]]) + self.assertAllClose(causal_attention_mask, expected_output) + + +class MaskComputationTests(keras_parameterized.TestCase): + + @combinations.generate(combinations.combine( + use_input_mask=[False, True], + use_permutation_mask=[False, True], + attention_type=["uni", "bi"], + memory_length=[0, 4], + )) + def test_compute_attention_mask_smoke(self, + use_input_mask, + use_permutation_mask, + attention_type, + memory_length): + """Tests coverage and functionality for different configurations.""" + batch_size = 2 + seq_length = 8 + if use_input_mask: + input_mask = tf.zeros(shape=(batch_size, seq_length)) + else: + input_mask = None + if use_permutation_mask: + permutation_mask = tf.zeros(shape=(batch_size, seq_length, seq_length)) + else: + permutation_mask = None + _, content_mask = xlnet_base._compute_attention_mask( + input_mask=input_mask, + permutation_mask=permutation_mask, + attention_type=attention_type, + seq_length=seq_length, + memory_length=memory_length, + batch_size=batch_size, + dtype=tf.float32) + + expected_mask_shape = (batch_size, 1, + seq_length, seq_length + memory_length) + if use_input_mask or use_permutation_mask: + self.assertEqual(content_mask.shape, expected_mask_shape) + + def test_no_input_masks(self): + query_mask, content_mask = xlnet_base._compute_attention_mask( + input_mask=None, + permutation_mask=None, + attention_type="uni", + seq_length=8, + memory_length=2, + batch_size=2, + dtype=tf.float32) + self.assertIsNone(query_mask) + self.assertIsNone(content_mask) + + def test_input_mask_no_permutation(self): + """Tests if an input mask is provided but not permutation. + + In the case that only one of input mask or permutation mask is provided + and the attention type is bidirectional, the query mask should be + a broadcasted version of the provided mask. + + Content mask should be a broadcasted version of the query mask, where the + diagonal is 0s. + + """ + seq_length = 4 + batch_size = 1 + memory_length = 0 + + input_mask = np.array([[1, 1, 0, 0]]) + permutation_mask = None + + expected_query_mask = input_mask[None, None, :, :] + expected_content_mask = np.array([[[ + [1, 1, 0, 0], + [1, 1, 0, 0], + [1, 1, 1, 0], + [1, 1, 0, 1]]]]) + + query_mask, content_mask = xlnet_base._compute_attention_mask( + input_mask=input_mask, + permutation_mask=permutation_mask, + attention_type="bi", + seq_length=seq_length, + memory_length=memory_length, + batch_size=batch_size, + dtype=tf.float32) + + self.assertAllClose(query_mask, expected_query_mask) + self.assertAllClose(content_mask, expected_content_mask) + + def test_permutation_mask_no_input_mask(self): + """Tests if a permutation mask is provided but not input.""" + seq_length = 2 + batch_size = 1 + memory_length = 0 + + input_mask = None + permutation_mask = np.array([ + [[1, 0], + [1, 0]], + ]) + + expected_query_mask = permutation_mask[:, None, :, :] + expected_content_mask = np.array([[[ + [1, 0], + [1, 1]]]]) + + query_mask, content_mask = xlnet_base._compute_attention_mask( + input_mask=input_mask, + permutation_mask=permutation_mask, + attention_type="bi", + seq_length=seq_length, + memory_length=memory_length, + batch_size=batch_size, + dtype=tf.float32) + + self.assertAllClose(query_mask, expected_query_mask) + self.assertAllClose(content_mask, expected_content_mask) + + def test_permutation_and_input_mask(self): + """Tests if both an input and permutation mask are provided.""" + seq_length = 4 + batch_size = 1 + memory_length = 0 + + input_mask = np.array([[1, 1, 0, 0]]) + permutation_mask = np.array([[ + [0, 1, 1, 1], + [1, 0, 1, 1], + [1, 1, 0, 1], + [1, 1, 1, 0], + ]]) + + expected_query_mask = np.array([[[ + [0, 1, 0, 0], + [1, 0, 0, 0], + [1, 1, 0, 0], + [1, 1, 0, 0]]]]) + expected_content_mask = np.array([[[ + [1, 1, 0, 0], + [1, 1, 0, 0], + [1, 1, 1, 0], + [1, 1, 0, 1]]]]) + query_mask, content_mask = xlnet_base._compute_attention_mask( + input_mask=input_mask, + permutation_mask=permutation_mask, + attention_type="bi", + seq_length=seq_length, + memory_length=memory_length, + batch_size=batch_size, + dtype=tf.float32) + + self.assertAllClose(query_mask, expected_query_mask) + self.assertAllClose(content_mask, expected_content_mask) + + def test_permutation_input_uni_mask(self): + """Tests if an input, permutation and causal mask are provided.""" + seq_length = 4 + batch_size = 1 + memory_length = 0 + + input_mask = np.array([[1, 1, 1, 0]]) + permutation_mask = np.array([[ + [0, 1, 1, 1], + [1, 0, 1, 1], + [1, 1, 0, 1], + [1, 1, 1, 0], + ]]) + + expected_query_mask = np.array([[[ + [0, 0, 0, 0], + [1, 0, 0, 0], + [1, 1, 0, 0], + [1, 1, 1, 0]]]]) + expected_content_mask = np.array([[[ + [1, 0, 0, 0], + [1, 1, 0, 0], + [1, 1, 1, 0], + [1, 1, 1, 1]]]]) + query_mask, content_mask = xlnet_base._compute_attention_mask( + input_mask=input_mask, + permutation_mask=permutation_mask, + attention_type="uni", + seq_length=seq_length, + memory_length=memory_length, + batch_size=batch_size, + dtype=tf.float32) + + self.assertAllClose(query_mask, expected_query_mask) + self.assertAllClose(content_mask, expected_content_mask) + + +class SegmentMatrixTests(tf.test.TestCase): + + def test_no_segment_ids(self): + segment_matrix = xlnet_base._compute_segment_matrix( + segment_ids=None, + memory_length=2, + batch_size=1, + use_cls_mask=False) + self.assertIsNone(segment_matrix) + + def test_basic(self): + batch_size = 1 + memory_length = 0 + segment_ids = np.array([ + [1, 1, 2, 1] + ]) + expected_segment_matrix = np.array([[ + [False, False, True, False], + [False, False, True, False], + [True, True, False, True], + [False, False, True, False] + ]]) + segment_matrix = xlnet_base._compute_segment_matrix( + segment_ids=segment_ids, + memory_length=memory_length, + batch_size=batch_size, + use_cls_mask=False) + self.assertAllClose(segment_matrix, expected_segment_matrix) + + def test_basic_with_memory(self): + batch_size = 1 + memory_length = 1 + segment_ids = np.array([ + [1, 1, 2, 1] + ]) + expected_segment_matrix = np.array([[ + [True, False, False, True, False], + [True, False, False, True, False], + [True, True, True, False, True], + [True, False, False, True, False] + ]]).astype(int) + segment_matrix = tf.cast(xlnet_base._compute_segment_matrix( + segment_ids=segment_ids, + memory_length=memory_length, + batch_size=batch_size, + use_cls_mask=False), dtype=tf.uint8) + self.assertAllClose(segment_matrix, expected_segment_matrix) + + def dont_test_basic_with_class_mask(self): + # TODO(allencwang) - this test should pass but illustrates the legacy issue + # of using class mask. Enable once addressed. + batch_size = 1 + memory_length = 0 + segment_ids = np.array([ + [1, 1, 2, 1] + ]) + expected_segment_matrix = np.array([[ + [False, False, True, False], + [False, False, True, False], + [True, True, False, True], + [False, False, True, False] + ]]).astype(int) + segment_matrix = tf.cast(xlnet_base._compute_segment_matrix( + segment_ids=segment_ids, + memory_length=memory_length, + batch_size=batch_size, + use_cls_mask=True), dtype=tf.uint8) + self.assertAllClose(segment_matrix, expected_segment_matrix) + + +class XLNetModelTests(tf.test.TestCase): + + def _generate_data(self, + batch_size, + seq_length, + num_predictions=None): + """Generates sample XLNet data for testing.""" + sequence_shape = (batch_size, seq_length) + if num_predictions is not None: + target_mapping = tf.random.uniform( + shape=(batch_size, num_predictions, seq_length)) + + return { + "input_ids": np.random.randint(10, size=sequence_shape, dtype="int32"), + "segment_ids": + np.random.randint(2, size=sequence_shape, dtype="int32"), + "input_mask": + np.random.randint(2, size=sequence_shape).astype("float32"), + "permutation_mask": + np.random.randint( + 2, size=(batch_size, seq_length, seq_length)).astype("float32"), + "target_mapping": target_mapping, + "masked_tokens": tf.random.uniform(shape=sequence_shape), + } + + def test_xlnet_model(self): + batch_size = 2 + seq_length = 8 + num_predictions = 2 + hidden_size = 4 + xlnet_model = xlnet_base.XLNetBase( + vocab_size=32000, + num_layers=2, + hidden_size=hidden_size, + num_attention_heads=2, + head_size=2, + inner_size=2, + dropout_rate=0., + attention_dropout_rate=0., + attention_type="bi", + bi_data=True, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + two_stream=False, + tie_attention_biases=True, + reuse_length=0, + inner_activation="relu") + input_data = self._generate_data(batch_size=batch_size, + seq_length=seq_length, + num_predictions=num_predictions) + model_output = xlnet_model(**input_data) + self.assertEqual(model_output[0].shape, + (batch_size, seq_length, hidden_size)) + + def test_get_config(self): + xlnet_model = xlnet_base.XLNetBase( + vocab_size=32000, + num_layers=12, + hidden_size=36, + num_attention_heads=12, + head_size=12, + inner_size=12, + dropout_rate=0., + attention_dropout_rate=0., + attention_type="bi", + bi_data=True, + initializer=tf.keras.initializers.RandomNormal(stddev=0.1), + two_stream=False, + tie_attention_biases=True, + memory_length=0, + reuse_length=0, + inner_activation="relu") + config = xlnet_model.get_config() + new_xlnet = xlnet_base.XLNetBase.from_config(config) + self.assertEqual(config, new_xlnet.get_config()) + + +if __name__ == "__main__": + tf.random.set_seed(0) + tf.test.main() diff --git a/official/nlp/modeling/ops/__init__.py b/official/nlp/modeling/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e21f33273f3801a34073aceecf301e23808727d3 --- /dev/null +++ b/official/nlp/modeling/ops/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ops package definition.""" +from official.nlp.modeling.ops.beam_search import sequence_beam_search +from official.nlp.modeling.ops.segment_extractor import get_next_sentence_labels +from official.nlp.modeling.ops.segment_extractor import get_sentence_order_labels diff --git a/official/nlp/modeling/ops/beam_search.py b/official/nlp/modeling/ops/beam_search.py new file mode 100644 index 0000000000000000000000000000000000000000..5e1ec11e1bbe8b664c2555794ec660820be2aec9 --- /dev/null +++ b/official/nlp/modeling/ops/beam_search.py @@ -0,0 +1,708 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Beam search to find the translated sequence with the highest probability.""" + +import numpy as np +import tensorflow as tf + + +def inf(dtype): + """Returns a value close to infinity, but is still finite in `dtype`. + + This is useful to get a very large value that is still zero when multiplied by + zero. The floating-point "Inf" value is NaN when multiplied by zero. + + Args: + dtype: A dtype. The returned value will be finite when casted to this dtype. + + Returns: + A very large value. + """ + if dtype == "float32" or dtype == "bfloat16": + return 1e7 + elif dtype == "float16": + # Disable no-member lint error, as the linter thinks np.float16 does not + # exist for some reason. + return np.finfo(np.float16).max # pylint: disable=no-member + else: + raise AssertionError("Invalid dtype: %s" % dtype) + + +class _StateKeys(object): + """Keys to dictionary storing the state of the beam search loop.""" + + # Variable storing the loop index. + CUR_INDEX = "CUR_INDEX" + + # Top sequences that are alive for each batch item. Alive sequences are ones + # that have not generated an EOS token. Sequences that reach EOS are marked as + # finished and moved to the FINISHED_SEQ tensor. + # Has shape [batch_size, beam_size, CUR_INDEX + 1] + ALIVE_SEQ = "ALIVE_SEQ" + # Log probabilities of each alive sequence. Shape [batch_size, beam_size] + ALIVE_LOG_PROBS = "ALIVE_LOG_PROBS" + # Dictionary of cached values for each alive sequence. The cache stores + # the encoder output, attention bias, and the decoder attention output from + # the previous iteration. + ALIVE_CACHE = "ALIVE_CACHE" + + # Top finished sequences for each batch item. + # Has shape [batch_size, beam_size, CUR_INDEX + 1]. Sequences that are + # shorter than CUR_INDEX + 1 are padded with 0s. + FINISHED_SEQ = "FINISHED_SEQ" + # Scores for each finished sequence. Score = log probability / length norm + # Shape [batch_size, beam_size] + FINISHED_SCORES = "FINISHED_SCORES" + # Flags indicating which sequences in the finished sequences are finished. + # At the beginning, all of the sequences in FINISHED_SEQ are filler values. + # True -> finished sequence, False -> filler. Shape [batch_size, beam_size] + FINISHED_FLAGS = "FINISHED_FLAGS" + + +def _expand_to_same_rank(tensor, target): + """Expands a given tensor to target's rank to be broadcastable. + + Args: + tensor: input tensor to tile. Shape: [b, d1, ..., da] + target: target tensor. Shape: [b, d1, ..., da, ..., dn] + + Returns: + Tiled tensor of shape [b, d1, ..., da, 1, ..., 1] with same rank of target. + + Raises: + ValueError, if the shape rank of rank tensor/target is None. + """ + if tensor.shape.rank is None: + raise ValueError("Expect rank for tensor shape, but got None.") + if target.shape.rank is None: + raise ValueError("Expect rank for target shape, but got None.") + + with tf.name_scope("expand_rank"): + diff_rank = target.shape.rank - tensor.shape.rank + for _ in range(diff_rank): + tensor = tf.expand_dims(tensor, -1) + return tensor + + +class SequenceBeamSearch(tf.Module): + """Implementation of beam search loop.""" + + def __init__(self, + symbols_to_logits_fn, + vocab_size, + beam_size, + alpha, + max_decode_length, + eos_id, + padded_decode, + dtype=tf.float32): + """Initialize sequence beam search. + + Args: + symbols_to_logits_fn: A function to provide logits, which is the + interface to the Transformer model. The passed in arguments are: ids -> + A tensor with shape [batch_size * beam_size, index]. index -> A + scalar. cache -> A nested dictionary of tensors [batch_size * + beam_size, ...]. + The function must return a tuple of logits and the updated cache: logits + -> A tensor with shape [batch * beam_size, vocab_size]. updated cache + -> A nested dictionary with the same structure as the input cache. + vocab_size: An integer, the size of the vocabulary, used for topk + computation. + beam_size: An integer, number of beams for beam search. + alpha: A float, defining the strength of length normalization. + max_decode_length: An integer, the maximum number of steps to decode a + sequence. + eos_id: An integer. ID of end of sentence token. + padded_decode: A bool, indicating if max_sequence_length padding is used + for beam search. + dtype: A tensorflow data type used for score computation. The default is + tf.float32. + """ + self.symbols_to_logits_fn = symbols_to_logits_fn + self.vocab_size = vocab_size + self.beam_size = beam_size + self.alpha = alpha + self.max_decode_length = max_decode_length + self.eos_id = eos_id + self.padded_decode = padded_decode + self.dtype = tf.as_dtype(dtype) + + def search(self, initial_ids, initial_cache): + """Beam search for sequences with highest scores. + + Args: + initial_ids: initial ids to pass into the symbols_to_logits_fn. int tensor + with shape [batch_size, 1] + initial_cache: dictionary storing values to be passed into the + symbols_to_logits_fn. + + Returns: + finished_seq and finished_scores. + """ + batch_size = ( + initial_ids.shape.as_list()[0] + if self.padded_decode else tf.shape(initial_ids)[0]) + state, state_shapes = self._create_initial_state(initial_ids, initial_cache, + batch_size) + + def _grow_alive_seq(state): + """Grow alive sequences by one token, collect top 2*beam_size sequences. + + 2*beam_size sequences are collected because some sequences may have + reached the EOS token. 2*beam_size ensures that at least beam_size + sequences are still alive. + + Args: + state: A dictionary with the current loop state. + + Returns: + Tuple of + (Top 2*beam_size sequences [batch_size, 2 * beam_size, cur_index + 1], + Scores of returned sequences [batch_size, 2 * beam_size], + New alive cache, for each of the 2 * beam_size sequences) + """ + i = state[_StateKeys.CUR_INDEX] + alive_seq = state[_StateKeys.ALIVE_SEQ] + alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS] + alive_cache = state[_StateKeys.ALIVE_CACHE] + + beams_to_keep = 2 * self.beam_size + + # Get logits for the next candidate IDs for the alive sequences. Get the + # new cache values at the same time. + if self.padded_decode: + flat_ids = tf.reshape( + tf.slice(alive_seq, [0, 0, i], [batch_size, self.beam_size, 1]), + [batch_size * self.beam_size, -1]) + else: + flat_ids = flatten_beam_dim(alive_seq) # [batch_size * beam_size] + flat_cache = tf.nest.map_structure(flatten_beam_dim, alive_cache) + + flat_logits, flat_cache = self.symbols_to_logits_fn( + flat_ids, i, flat_cache) + + # Unflatten logits to shape [batch_size, beam_size, vocab_size] + logits = _unflatten_beam_dim(flat_logits, batch_size, self.beam_size) + new_cache = tf.nest.map_structure( + lambda t: _unflatten_beam_dim(t, batch_size, self.beam_size), + flat_cache) + + # Convert logits to normalized log probs + candidate_log_probs = _log_prob_from_logits(logits) + + # Calculate new log probabilities if each of the alive sequences were + # extended # by the the candidate IDs. + # Shape [batch_size, beam_size, vocab_size] + log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs, axis=2) + + # Each batch item has beam_size * vocab_size candidate sequences. For each + # batch item, get the k candidates with the highest log probabilities. + flat_log_probs = tf.reshape(log_probs, + [-1, self.beam_size * self.vocab_size]) + topk_log_probs, topk_indices = tf.nn.top_k( + flat_log_probs, k=beams_to_keep) + + # Extract the alive sequences that generate the highest log probabilities + # after being extended. + topk_beam_indices = topk_indices // self.vocab_size + topk_seq, new_cache = _gather_beams([alive_seq, new_cache], + topk_beam_indices, batch_size, + beams_to_keep) + + # Append the most probable IDs to the topk sequences + topk_ids = topk_indices % self.vocab_size + if self.padded_decode: + topk_seq = tf.transpose(topk_seq, perm=[2, 0, 1]) + # TODO(b/145533236, hongkuny): Reverts once TF fix the validation. + topk_seq = tf.tensor_scatter_nd_update(topk_seq, [[i + 1]], + tf.expand_dims(topk_ids, axis=0)) + topk_seq = tf.transpose(topk_seq, perm=[1, 2, 0]) + else: + topk_seq = tf.concat( + [topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2) + return topk_seq, topk_log_probs, topk_ids, new_cache + + def _get_new_alive_state(new_seq, new_log_probs, new_finished_flags, + new_cache): + """Gather the top k sequences that are still alive. + + Args: + new_seq: New sequences generated by growing the current alive sequences + int32 tensor with shape [batch_size, 2 * beam_size, cur_index + 1] + new_log_probs: Log probabilities of new sequences float32 tensor with + shape [batch_size, beam_size] + new_finished_flags: A boolean Tensor indicates which sequences are live + inside the beam. + new_cache: Dict of cached values for each sequence. + + Returns: + Dictionary with alive keys from _StateKeys: + {Top beam_size sequences that are still alive (don't end with eos_id) + Log probabilities of top alive sequences + Dict cache storing decoder states for top alive sequences} + """ + # To prevent finished sequences from being considered, set log probs to + # -inf. + new_log_probs += tf.cast(new_finished_flags, + self.dtype) * -inf(self.dtype) + + top_alive_seq, top_alive_log_probs, top_alive_cache = _gather_topk_beams( + [new_seq, new_log_probs, new_cache], new_log_probs, batch_size, + self.beam_size) + + return { + _StateKeys.ALIVE_SEQ: top_alive_seq, + _StateKeys.ALIVE_LOG_PROBS: top_alive_log_probs, + _StateKeys.ALIVE_CACHE: top_alive_cache + } + + def _get_new_finished_state(state, new_seq, new_log_probs, + new_finished_flags): + """Combine new and old finished sequences, and gather the top k sequences. + + Args: + state: A dictionary with the current loop state. + new_seq: New sequences generated by growing the current alive sequences + int32 tensor with shape [batch_size, beam_size, i + 1] + new_log_probs: Log probabilities of new sequences float32 tensor with + shape [batch_size, beam_size] + new_finished_flags: A boolean Tensor indicates which sequences are live + inside the beam. + + Returns: + Dictionary with finished keys from _StateKeys: + {Top beam_size finished sequences based on score, + Scores of finished sequences, + Finished flags of finished sequences} + """ + i = state[_StateKeys.CUR_INDEX] + finished_seq = state[_StateKeys.FINISHED_SEQ] + finished_scores = state[_StateKeys.FINISHED_SCORES] + finished_flags = state[_StateKeys.FINISHED_FLAGS] + + # First append a column of 0-ids to finished_seq to increment the length. + # New shape of finished_seq: [batch_size, beam_size, i + 1] + if not self.padded_decode: + finished_seq = tf.concat( + [finished_seq, + tf.zeros([batch_size, self.beam_size, 1], tf.int32)], + axis=2) + + # Calculate new seq scores from log probabilities. + length_norm = _length_normalization(self.alpha, i + 1, dtype=self.dtype) + new_scores = new_log_probs / length_norm + + # Set the scores of the still-alive seq in new_seq to large negative + # values. + new_scores += ((1. - tf.cast(new_finished_flags, self.dtype)) * + -inf(self.dtype)) + + # Combine sequences, scores, and flags. + finished_seq = tf.concat([finished_seq, new_seq], axis=1) + finished_scores = tf.concat([finished_scores, new_scores], axis=1) + finished_flags = tf.concat([finished_flags, new_finished_flags], axis=1) + + # Return the finished sequences with the best scores. + top_finished_seq, top_finished_scores, top_finished_flags = ( + _gather_topk_beams([finished_seq, finished_scores, finished_flags], + finished_scores, batch_size, self.beam_size)) + + return { + _StateKeys.FINISHED_SEQ: top_finished_seq, + _StateKeys.FINISHED_SCORES: top_finished_scores, + _StateKeys.FINISHED_FLAGS: top_finished_flags + } + + def _search_step(state): + """Beam search loop body. + + Grow alive sequences by a single ID. Sequences that have reached the EOS + token are marked as finished. The alive and finished sequences with the + highest log probabilities and scores are returned. + + A sequence's finished score is calculating by dividing the log probability + by the length normalization factor. Without length normalization, the + search is more likely to return shorter sequences. + + Args: + state: A dictionary with the current loop state. + + Returns: + new state dictionary. + """ + # Grow alive sequences by one token. + new_seq, new_log_probs, topk_ids, new_cache = _grow_alive_seq(state) + new_finished_flags = tf.equal(topk_ids, self.eos_id) + # Collect top beam_size alive sequences + alive_state = _get_new_alive_state(new_seq, new_log_probs, + new_finished_flags, new_cache) + + # Combine newly finished sequences with existing finished sequences, and + # collect the top k scoring sequences. + finished_state = _get_new_finished_state(state, new_seq, new_log_probs, + new_finished_flags) + + # Increment loop index and create new state dictionary + new_state = {_StateKeys.CUR_INDEX: state[_StateKeys.CUR_INDEX] + 1} + new_state.update(alive_state) + new_state.update(finished_state) + return [new_state] + + finished_state = tf.nest.map_structure( + tf.stop_gradient, + tf.while_loop( + self._continue_search, + _search_step, + loop_vars=[state], + shape_invariants=[state_shapes], + parallel_iterations=1)) + finished_state = finished_state[0] + return self._process_finished_state(finished_state) + + def _process_finished_state(self, finished_state): + alive_seq = finished_state[_StateKeys.ALIVE_SEQ] + alive_log_probs = finished_state[_StateKeys.ALIVE_LOG_PROBS] + finished_seq = finished_state[_StateKeys.FINISHED_SEQ] + finished_scores = finished_state[_StateKeys.FINISHED_SCORES] + finished_flags = finished_state[_StateKeys.FINISHED_FLAGS] + # TF2 changes tf.where behavior. Should make parameters broadcastable. + finished_cond = tf.reduce_any(finished_flags, 1, name="finished_cond") + seq_cond = _expand_to_same_rank(finished_cond, finished_seq) + score_cond = _expand_to_same_rank(finished_cond, finished_scores) + + # Account for corner case where there are no finished sequences for a + # particular batch item. In that case, return alive sequences for that batch + # item. + finished_seq = tf.where(seq_cond, finished_seq, alive_seq) + finished_scores = tf.where(score_cond, finished_scores, alive_log_probs) + return finished_seq, finished_scores + + def _create_initial_state(self, initial_ids, initial_cache, batch_size): + """Return initial state dictionary and its shape invariants.""" + for key, value in initial_cache.items(): + for inner_value in tf.nest.flatten(value): + if inner_value.dtype != self.dtype: + raise TypeError( + "initial_cache element for key '%s' has dtype %s that does not " + "match SequenceBeamSearch's dtype of %s. Value: %s" % + (key, inner_value.dtype.name, self.dtype.name, inner_value)) + + # Current loop index (starts at 0) + cur_index = tf.constant(0) + + # Create alive sequence with shape [batch_size, beam_size, 1] + alive_seq = expand_to_beam_size(initial_ids, self.beam_size) + alive_seq = tf.expand_dims(alive_seq, axis=2) + if self.padded_decode: + alive_seq = tf.tile(alive_seq, [1, 1, self.max_decode_length + 1]) + + # Create tensor for storing initial log probabilities. + # Assume initial_ids are prob 1.0 + initial_log_probs = tf.constant([[0.] + [-float("inf")] * + (self.beam_size - 1)], + dtype=self.dtype) + alive_log_probs = tf.tile(initial_log_probs, [batch_size, 1]) + + # Expand all values stored in the dictionary to the beam size, so that each + # beam has a separate cache. + alive_cache = tf.nest.map_structure( + lambda t: expand_to_beam_size(t, self.beam_size), initial_cache) + + # Initialize tensor storing finished sequences with filler values. + finished_seq = tf.zeros(tf.shape(alive_seq), tf.int32) + + # Set scores of the initial finished seqs to negative infinity. + finished_scores = tf.ones([batch_size, self.beam_size], + dtype=self.dtype) * -inf(self.dtype) + + # Initialize finished flags with all False values. + finished_flags = tf.zeros([batch_size, self.beam_size], tf.bool) + + # Create state dictionary + state = { + _StateKeys.CUR_INDEX: cur_index, + _StateKeys.ALIVE_SEQ: alive_seq, + _StateKeys.ALIVE_LOG_PROBS: alive_log_probs, + _StateKeys.ALIVE_CACHE: alive_cache, + _StateKeys.FINISHED_SEQ: finished_seq, + _StateKeys.FINISHED_SCORES: finished_scores, + _StateKeys.FINISHED_FLAGS: finished_flags + } + + # Create state invariants for each value in the state dictionary. Each + # dimension must be a constant or None. A None dimension means either: + # 1) the dimension's value is a tensor that remains the same but may + # depend on the input sequence to the model (e.g. batch size). + # 2) the dimension may have different values on different iterations. + if self.padded_decode: + state_shape_invariants = { + _StateKeys.CUR_INDEX: + tf.TensorShape([]), + _StateKeys.ALIVE_SEQ: + tf.TensorShape( + [batch_size, self.beam_size, self.max_decode_length + 1]), + _StateKeys.ALIVE_LOG_PROBS: + tf.TensorShape([batch_size, self.beam_size]), + _StateKeys.ALIVE_CACHE: + tf.nest.map_structure(lambda state: state.get_shape(), + alive_cache), + _StateKeys.FINISHED_SEQ: + tf.TensorShape( + [batch_size, self.beam_size, self.max_decode_length + 1]), + _StateKeys.FINISHED_SCORES: + tf.TensorShape([batch_size, self.beam_size]), + _StateKeys.FINISHED_FLAGS: + tf.TensorShape([batch_size, self.beam_size]) + } + else: + state_shape_invariants = { + _StateKeys.CUR_INDEX: + tf.TensorShape([]), + _StateKeys.ALIVE_SEQ: + tf.TensorShape([None, self.beam_size, None]), + _StateKeys.ALIVE_LOG_PROBS: + tf.TensorShape([None, self.beam_size]), + _StateKeys.ALIVE_CACHE: + tf.nest.map_structure(_get_shape_keep_last_dim, alive_cache), + _StateKeys.FINISHED_SEQ: + tf.TensorShape([None, self.beam_size, None]), + _StateKeys.FINISHED_SCORES: + tf.TensorShape([None, self.beam_size]), + _StateKeys.FINISHED_FLAGS: + tf.TensorShape([None, self.beam_size]) + } + + return state, state_shape_invariants + + def _continue_search(self, state): + """Return whether to continue the search loop. + + The loops should terminate when + 1) when decode length has been reached, or + 2) when the worst score in the finished sequences is better than the best + score in the alive sequences (i.e. the finished sequences are provably + unchanging) + + Args: + state: A dictionary with the current loop state. + + Returns: + Bool tensor with value True if loop should continue, False if loop should + terminate. + """ + i = state[_StateKeys.CUR_INDEX] + alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS] + finished_scores = state[_StateKeys.FINISHED_SCORES] + finished_flags = state[_StateKeys.FINISHED_FLAGS] + + not_at_max_decode_length = tf.less(i, self.max_decode_length) + + # Calculate largest length penalty (the larger penalty, the better score). + max_length_norm = _length_normalization( + self.alpha, self.max_decode_length, dtype=self.dtype) + # Get the best possible scores from alive sequences. + # This tf.slice/tf.squeeze is equivalent to alive_log_probs[:, 0] which + # emits a tf.strided_slice. tf.slice is easier to reason about as we aren't + # actually taking a non trivial stride. + best_alive_scores = tf.squeeze(tf.slice(alive_log_probs, [0, 0], [-1, 1]), + axis=1) / max_length_norm + + # Compute worst score in finished sequences for each batch element + finished_scores *= tf.cast(finished_flags, + self.dtype) # set filler scores to zero + lowest_finished_scores = tf.reduce_min(finished_scores, axis=1) + + # If there are no finished sequences in a batch element, then set the lowest + # finished score to -INF for that element. + finished_batches = tf.reduce_any(finished_flags, 1) + lowest_finished_scores += ((1.0 - tf.cast(finished_batches, self.dtype)) * + -inf(self.dtype)) + + worst_finished_score_better_than_best_alive_score = tf.reduce_all( + tf.greater(lowest_finished_scores, best_alive_scores)) + + return tf.logical_and( + not_at_max_decode_length, + tf.logical_not(worst_finished_score_better_than_best_alive_score)) + + +def sequence_beam_search(symbols_to_logits_fn, + initial_ids, + initial_cache, + vocab_size, + beam_size, + alpha, + max_decode_length, + eos_id, + padded_decode=False, + dtype="float32"): + """Search for sequence of subtoken ids with the largest probability. + + Args: + symbols_to_logits_fn: A function that takes in ids, index, and cache as + arguments. The passed in arguments will have shape: ids -> A tensor with + shape [batch_size * beam_size, index]. index -> A scalar. cache -> A + nested dictionary of tensors [batch_size * beam_size, ...]. + The function must return a tuple of logits and new cache: logits -> A + tensor with shape [batch * beam_size, vocab_size]. new cache -> A nested + dictionary with the same shape/structure as the inputted cache. + initial_ids: An int32 tensor with shape [batch_size]. Starting ids for each + batch item. + initial_cache: A dictionary, containing starting decoder variables + information. + vocab_size: An integer, the size of tokens. + beam_size: An integer, the number of beams. + alpha: A float, defining the strength of length normalization. + max_decode_length: An integer, the maximum length to decoded a sequence. + eos_id: An integer, ID of eos token, used to determine when a sequence has + finished. + padded_decode: A bool, indicating if max_sequence_length padding is used for + beam search. + dtype: A tensorflow data type used for score computation. The default is + tf.float32. + + Returns: + Top decoded sequences [batch_size, beam_size, max_decode_length] + sequence scores [batch_size, beam_size] + """ + sbs = SequenceBeamSearch(symbols_to_logits_fn, vocab_size, beam_size, alpha, + max_decode_length, eos_id, padded_decode, dtype) + return sbs.search(initial_ids, initial_cache) + + +def _log_prob_from_logits(logits): + return logits - tf.reduce_logsumexp(logits, axis=2, keepdims=True) + + +def _length_normalization(alpha, length, dtype=tf.float32): + """Return length normalization factor.""" + return tf.pow(((5. + tf.cast(length, dtype)) / 6.), alpha) + + +def expand_to_beam_size(tensor, beam_size): + """Tiles a given tensor by beam_size. + + Args: + tensor: tensor to tile [batch_size, ...] + beam_size: How much to tile the tensor by. + + Returns: + Tiled tensor [batch_size, beam_size, ...] + """ + tensor = tf.expand_dims(tensor, axis=1) + tile_dims = [1] * tensor.shape.ndims + tile_dims[1] = beam_size + + return tf.tile(tensor, tile_dims) + + +def flatten_beam_dim(tensor): + """Reshapes first two dimensions into a single dimension. + + Args: + tensor: Tensor to reshape of shape [A, B, ...] + + Returns: + Reshaped tensor of shape [A*B, ...] + """ + shape = _shape_list(tensor) + shape[0] *= shape[1] + shape.pop(1) # Remove beam dim + return tf.reshape(tensor, shape) + + +def _shape_list(tensor): + """Return a list of the tensor's shape, and ensure no None values in list.""" + # Get statically known shape (may contain None's for unknown dimensions) + shape = tensor.get_shape().as_list() + + # Ensure that the shape values are not None + dynamic_shape = tf.shape(tensor) + for i in range(len(shape)): # pylint: disable=consider-using-enumerate + if shape[i] is None: + shape[i] = dynamic_shape[i] + return shape + + +def _get_shape_keep_last_dim(tensor): + shape_list = _shape_list(tensor) + + # Only the last + for i in range(len(shape_list) - 1): + shape_list[i] = None + + if isinstance(shape_list[-1], tf.Tensor): + shape_list[-1] = None + return tf.TensorShape(shape_list) + + +def _unflatten_beam_dim(tensor, batch_size, beam_size): + """Reshapes first dimension back to [batch_size, beam_size]. + + Args: + tensor: Tensor to reshape of shape [batch_size*beam_size, ...] + batch_size: Tensor, original batch size. + beam_size: int, original beam size. + + Returns: + Reshaped tensor of shape [batch_size, beam_size, ...] + """ + shape = _shape_list(tensor) + new_shape = [batch_size, beam_size] + shape[1:] + return tf.reshape(tensor, new_shape) + + +def _gather_beams(nested, beam_indices, batch_size, new_beam_size): + """Gather beams from nested structure of tensors. + + Each tensor in nested represents a batch of beams, where beam refers to a + single search state (beam search involves searching through multiple states + in parallel). + + This function is used to gather the top beams, specified by + beam_indices, from the nested tensors. + + Args: + nested: Nested structure (tensor, list, tuple or dict) containing tensors + with shape [batch_size, beam_size, ...]. + beam_indices: int32 tensor with shape [batch_size, new_beam_size]. Each + value in beam_indices must be between [0, beam_size), and are not + necessarily unique. + batch_size: int size of batch + new_beam_size: int number of beams to be pulled from the nested tensors. + + Returns: + Nested structure containing tensors with shape + [batch_size, new_beam_size, ...] + """ + # Computes the i'th coodinate that contains the batch index for gather_nd. + # Batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..]. + batch_pos = tf.range(batch_size * new_beam_size) // new_beam_size + batch_pos = tf.reshape(batch_pos, [batch_size, new_beam_size]) + + # Create coordinates to be passed to tf.gather_nd. Stacking creates a tensor + # with shape [batch_size, beam_size, 2], where the last dimension contains + # the (i, j) gathering coordinates. + coordinates = tf.stack([batch_pos, beam_indices], axis=2) + + return tf.nest.map_structure(lambda state: tf.gather_nd(state, coordinates), + nested) + + +def _gather_topk_beams(nested, score_or_log_prob, batch_size, beam_size): + """Gather top beams from nested structure.""" + _, topk_indexes = tf.nn.top_k(score_or_log_prob, k=beam_size) + return _gather_beams(nested, topk_indexes, batch_size, beam_size) diff --git a/official/nlp/modeling/ops/beam_search_test.py b/official/nlp/modeling/ops/beam_search_test.py new file mode 100644 index 0000000000000000000000000000000000000000..dcfd22646e08570f5b6f1210baf276fb3c32d901 --- /dev/null +++ b/official/nlp/modeling/ops/beam_search_test.py @@ -0,0 +1,108 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test beam search helper methods.""" + +from absl.testing import parameterized +import tensorflow as tf + +from official.nlp.modeling.ops import beam_search + + +class BeamSearchTests(tf.test.TestCase, parameterized.TestCase): + + def test_expand_to_beam_size(self): + x = tf.ones([7, 4, 2, 5]) + x = beam_search.expand_to_beam_size(x, 3) + shape = tf.shape(x) + self.assertAllEqual([7, 3, 4, 2, 5], shape) + + def test_get_shape_keep_last_dim(self): + y = tf.constant(4.0) + x = tf.ones([7, tf.cast(tf.sqrt(y), tf.int32), 2, 5]) + shape = beam_search._get_shape_keep_last_dim(x) + self.assertAllEqual([None, None, None, 5], shape.as_list()) + + def test_flatten_beam_dim(self): + x = tf.ones([7, 4, 2, 5]) + x = beam_search.flatten_beam_dim(x) + self.assertAllEqual([28, 2, 5], tf.shape(x)) + + def test_unflatten_beam_dim(self): + x = tf.ones([28, 2, 5]) + x = beam_search._unflatten_beam_dim(x, 7, 4) + self.assertAllEqual([7, 4, 2, 5], tf.shape(x)) + + def test_gather_beams(self): + x = tf.reshape(tf.range(24), [2, 3, 4]) + # x looks like: [[[ 0 1 2 3] + # [ 4 5 6 7] + # [ 8 9 10 11]] + # + # [[12 13 14 15] + # [16 17 18 19] + # [20 21 22 23]]] + + y = beam_search._gather_beams(x, [[1, 2], [0, 2]], 2, 2) + self.assertAllEqual( + [[[4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [20, 21, 22, 23]]], + y) + + def test_gather_topk_beams(self): + x = tf.reshape(tf.range(24), [2, 3, 4]) + x_scores = [[0, 1, 1], [1, 0, 1]] + + y = beam_search._gather_topk_beams(x, x_scores, 2, 2) + self.assertAllEqual( + [[[4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [20, 21, 22, 23]]], + y) + + @parameterized.named_parameters([ + ('padded_decode_true', True), + ('padded_decode_false', False), + ]) + def test_sequence_beam_search(self, padded_decode): + # batch_size*beam_size, max_decode_length, vocab_size + probabilities = tf.constant([[[0.2, 0.7, 0.1], [0.5, 0.3, 0.2], + [0.1, 0.8, 0.1]], + [[0.1, 0.8, 0.1], [0.3, 0.4, 0.3], + [0.2, 0.1, 0.7]]]) + # batch_size, max_decode_length, num_heads, embed_size per head + x = tf.zeros([1, 3, 2, 32], dtype=tf.float32) + cache = {'layer_%d' % layer: {'k': x, 'v': x} for layer in range(2)} + + def _get_test_symbols_to_logits_fn(): + """Test function that returns logits for next token.""" + + def symbols_to_logits_fn(_, i, cache): + logits = tf.cast(probabilities[:, i, :], tf.float32) + return logits, cache + return symbols_to_logits_fn + + predictions, _ = beam_search.sequence_beam_search( + symbols_to_logits_fn=_get_test_symbols_to_logits_fn(), + initial_ids=tf.zeros([1], dtype=tf.int32), + initial_cache=cache, + vocab_size=3, + beam_size=2, + alpha=0.6, + max_decode_length=3, + eos_id=9, + padded_decode=padded_decode, + dtype=tf.float32) + self.assertAllEqual([[[0, 1, 0, 1], [0, 1, 1, 2]]], predictions) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/ops/decoding_module.py b/official/nlp/modeling/ops/decoding_module.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd928f130ed82f839155bdc845a5d7326e1ec2f --- /dev/null +++ b/official/nlp/modeling/ops/decoding_module.py @@ -0,0 +1,282 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base class for Decoding Strategies (beam_search, top_k, top_p and greedy).""" + +import abc +from typing import Any, Callable, Dict, Tuple + +import tensorflow as tf + +from tensorflow.python.framework import dtypes +from official.modeling import tf_utils + +Output = Tuple[tf.Tensor, tf.Tensor] +InternalState = Tuple[tf.Tensor, tf.Tensor, tf.Tensor, Dict] +InitialState = Tuple[Dict[str, Any], Dict[str, Any]] + + +class StateKeys: + """Keys to dictionary storing the state of Decoding loop.""" + + # Variable storing the loop index. + CUR_INDEX = "CUR_INDEX" + + # Top sequences that are alive for each batch item. Alive sequences are ones + # that have not generated an EOS token. Sequences that reach EOS are marked as + # finished and moved to the FINISHED_SEQ tensor. + # Has shape [batch_size, beam_size, CUR_INDEX + 1] for SequenceBeamSearch and + # [batch_size, CUR_INDEX + 1] otherwise. + ALIVE_SEQ = "ALIVE_SEQ" + # Log probabilities of each alive sequence. Shape [batch_size, beam_size] + ALIVE_LOG_PROBS = "ALIVE_LOG_PROBS" + # Dictionary of cached values for each alive sequence. The cache stores + # the encoder output, attention bias, and the decoder attention output from + # the previous iteration. + ALIVE_CACHE = "ALIVE_CACHE" + + # Top finished sequences for each batch item. + # Has shape [batch_size, beam_size, CUR_INDEX + 1]. Sequences that are + # shorter than CUR_INDEX + 1 are padded with 0s. + FINISHED_SEQ = "FINISHED_SEQ" + # Scores for each finished sequence. Score = log probability / length norm + # Shape [batch_size, beam_size] + FINISHED_SCORES = "FINISHED_SCORES" + # Flags indicating which sequences in the finished sequences are finished. + # At the beginning, all of the sequences in FINISHED_SEQ are filler values. + # True -> finished sequence, False -> filler. Shape [batch_size, beam_size] + FINISHED_FLAGS = "FINISHED_FLAGS" + + +def log_prob_from_logits(logits): + return logits - tf.reduce_logsumexp(logits, axis=-1, keepdims=True) + + +def shape_list(tensor): + """Return a list of the tensor's shape, and ensure no None values in list.""" + return tf_utils.get_shape_list(tensor) + + +def get_shape_keep_last_dim(tensor): + shape_list_obj = shape_list(tensor) + for i in range(len(shape_list_obj) - 1): + shape_list_obj[i] = None + + if isinstance(shape_list_obj[-1], tf.Tensor): + shape_list_obj[-1] = None + return tf.TensorShape(shape_list_obj) + + +def expand_to_same_rank(tensor, target): + """Expands a given tensor to target's rank to be broadcastable. + + Args: + tensor: input tensor to tile. Shape: [b, d1, ..., da] + target: target tensor. Shape: [b, d1, ..., da, ..., dn] + + Returns: + Tiled tensor of shape [b, d1, ..., da, 1, ..., 1] with same rank of target + + Raises: + ValueError, if the shape rank of rank tensor/target is None. + """ + if tensor.shape.rank is None: + raise ValueError("Expect rank for tensor shape, but got None.") + if target.shape.rank is None: + raise ValueError("Expect rank for target shape, but got None.") + + with tf.name_scope("expand_rank"): + diff_rank = target.shape.rank - tensor.shape.rank + for _ in range(diff_rank): + tensor = tf.expand_dims(tensor, -1) + return tensor + + +class DecodingModule(tf.Module, metaclass=abc.ABCMeta): + """A base class for the API required for decoding (go/decoding-tf-nlp).""" + + def __init__(self, + length_normalization_fn: Callable[[int, tf.DType], float], + dtype: tf.DType = tf.float32): + """Initialize the Decoding Module. + + Args: + length_normalization_fn: Closure for returning length normalization + parameter. Function accepts input as length, dtype and returns float. + dtype: A tensorflow data type used for score computation. The default is + tf.float32. + """ + self.length_normalization_fn = length_normalization_fn + self.dtype = tf.as_dtype(dtype) + + def generate(self, + initial_ids: tf.Tensor, + initial_cache: Dict[str, tf.Tensor]) -> Output: + """Implements the decoding strategy (beam_search or sampling). + + Args: + initial_ids: initial ids to pass into the symbols_to_logits_fn. + int tensor with shape [batch_size, 1] + initial_cache: dictionary for caching model outputs from previous step. + Returns: + Tuple of tensors representing + finished_sequence: shape [batch, max_seq_length] + finished_scores: [batch] + """ + batch_size = ( + initial_ids.shape.as_list()[0] + if self.padded_decode else tf.shape(initial_ids)[0]) + + state, state_shapes = self._create_initial_state(initial_ids, + initial_cache, + batch_size) + + def _generate_step(state): + topk_seq, topk_log_probs, topk_ids, new_cache = self._grow_alive_seq( + state, batch_size) + new_finished_flags = self._finished_flags(topk_ids, state) + alive_state = self._get_new_alive_state(topk_seq, + topk_log_probs, + new_finished_flags, + new_cache) + finished_state = self._get_new_finished_state(state, + topk_seq, + topk_log_probs, + new_finished_flags, + batch_size) + new_state = { + StateKeys.CUR_INDEX: state[StateKeys.CUR_INDEX] + 1 + } + new_state.update(alive_state) + new_state.update(finished_state) + return [new_state] + + finished_state = tf.nest.map_structure( + tf.stop_gradient, + tf.while_loop( + self._continue_search, + _generate_step, + loop_vars=[state], + shape_invariants=[state_shapes], + parallel_iterations=1)) + final_state = self._process_finished_state(finished_state[0]) + return final_state + + @abc.abstractmethod + def _create_initial_state(self, + initial_ids: tf.Tensor, + initial_cache: Dict[str, tf.Tensor], + batch_size: int) -> InitialState: + """Return initial state dictionary and its shape invariants.""" + pass + + @abc.abstractmethod + def _grow_alive_seq(self, + state: Dict[str, Any], + batch_size: int) -> InternalState: + """Grow alive sequences by one token. + + Args: + state: A dictionary with the current loop state. + batch_size: The given batch size + + Returns: + Tuple of + (Top sequences, + Scores of returned sequences, + New ids, + New alive cache) + """ + pass + + @abc.abstractmethod + def _get_new_alive_state( + self, + new_seq: tf.Tensor, + new_log_probs: tf.Tensor, + new_finished_flags: tf.Tensor, + new_cache: Dict[str, tf.Tensor]) -> Dict[str, Any]: + """Gather the sequences that are still alive. + + Args: + new_seq: New sequences generated by growing the current alive sequences + int32 tensor with shape + new_log_probs: Log probabilities of new sequences float32 tensor with + shape + new_finished_flags: A boolean Tensor indicates which sequences are live. + new_cache: Dict of cached values for each sequence. + + Returns: + Dictionary with alive keys from StateKeys. + """ + pass + + @abc.abstractmethod + def _get_new_finished_state(self, + state: Dict[str, Any], + new_seq: tf.Tensor, + new_log_probs: tf.Tensor, + new_finished_flags: tf.Tensor, + batch_size: int) -> Dict[str, tf.Tensor]: + """Combine new and old finished sequences. + + Args: + state: A dictionary with the current loop state. + new_seq: New sequences generated by growing the current alive sequences + int32 tensor. + new_log_probs: Log probabilities of new sequences float32 tensor with + shape. + new_finished_flags: A boolean Tensor indicates which sequences are live. + batch_size: The given batch size. + + Returns: + Dictionary with finished keys from StateKeys. + """ + pass + + @abc.abstractmethod + def _process_finished_state(self, finished_state: Dict[str, Any]) -> Output: + """Process the alive/finished state to return final sequences and scores.""" + pass + + @abc.abstractmethod + def _continue_search(self, state: Dict[str, Any]) -> tf.Tensor: + """Returns a bool tensor if the decoding loop should continue.""" + pass + + @abc.abstractmethod + def _finished_flags(self, + topk_ids: tf.Tensor, + state: Dict[str, Any]) -> tf.Tensor: + """Calculate the finished flags.""" + pass + + def inf(self): + """Returns a value close to infinity, but is still finite in `dtype`. + + This is useful to get a very large value that is still zero when multiplied + by zero. The floating-point "Inf" value is NaN when multiplied by zero. + + Returns: + A very large value. + """ + if self.dtype == dtypes.float32 or self.dtype == dtypes.bfloat16: + return 1e7 + elif self.dtype == dtypes.float16: + return dtypes.float16.max + else: + raise AssertionError("Invalid dtype: %s" % self.dtype) + + + diff --git a/official/nlp/modeling/ops/decoding_module_test.py b/official/nlp/modeling/ops/decoding_module_test.py new file mode 100644 index 0000000000000000000000000000000000000000..da444ed5394a6fd257663b61c9230be715d7846c --- /dev/null +++ b/official/nlp/modeling/ops/decoding_module_test.py @@ -0,0 +1,84 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test decoding utility methods.""" + +import abc +import tensorflow as tf + +from official.nlp.modeling.ops import decoding_module + + +def length_normalization(length, dtype): + """Return length normalization factor.""" + return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0) + + +class TestSubclass(decoding_module.DecodingModule, metaclass=abc.ABCMeta): + + def __init__(self, + length_normalization_fn=length_normalization, + dtype=tf.float32): + super(TestSubclass, self).__init__( + length_normalization_fn=length_normalization, dtype=dtype) + + def _create_initial_state(self, initial_ids, initial_cache, batch_size): + pass + + def _grow_alive_seq(self, state, batch_size): + pass + + def _process_finished_state(self, finished_state): + pass + + def _get_new_finished_state(self, state, new_seq, new_log_probs, + new_finished_flags, batch_size): + pass + + def _finished_flags(self, topk_ids, state): + pass + + def _continue_search(self, state): + pass + + def _get_new_alive_state(self, new_seq, new_log_probs, new_finished_flags, + new_cache): + pass + + +class DecodingModuleTest(tf.test.TestCase): + + def test_get_shape_keep_last_dim(self): + y = tf.constant(4.0) + x = tf.ones([7, tf.cast(tf.sqrt(y), tf.int32), 2, 5]) + shape = decoding_module.get_shape_keep_last_dim(x) + self.assertAllEqual([None, None, None, 5], shape.as_list()) + + def test_shape_list(self): + x = tf.ones([7, 1]) + shape = decoding_module.shape_list(x) + self.assertAllEqual([7, 1], shape) + + def test_inf(self): + d = TestSubclass() + inf_value = d.inf() + self.assertAllEqual(inf_value, tf.constant(10000000., tf.float32)) + + def test_length_normalization(self): + d = TestSubclass() + normalized_length = d.length_normalization_fn(32, tf.float32) + self.assertAllEqual(normalized_length, tf.constant(1.0, tf.float32)) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/modeling/ops/sampling_module.py b/official/nlp/modeling/ops/sampling_module.py new file mode 100644 index 0000000000000000000000000000000000000000..5bd758fd91162b4a2ec017a4207d29ffb8984bbc --- /dev/null +++ b/official/nlp/modeling/ops/sampling_module.py @@ -0,0 +1,447 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sampling module for top_k, top_p and greedy decoding.""" + +import abc +from typing import Any, Callable, Dict + +import numpy as np +import tensorflow as tf + +from official.nlp.modeling.ops import decoding_module + + +def greedy(log_probs): + """Returns the top ids and scores based on greedy decoding.""" + log_probs, ids = tf.math.top_k(log_probs, k=1) + return log_probs, ids + + +def sample_logits_with_temperature(logits, temperature): + """Applies a sampling temperature. + + Temperature skews the distribution towards high probability + tokens and lowers the mass in tail distribution. + + Args: + logits: Input logits for next token. + temperature: Tensor for specifying the sampling temperature. + + Returns: + Logits with applied temperature. + """ + return logits / temperature + + +def sample_top_k(logits, top_k): + """Chooses top_k logits and sets the others to negative infinity. + + Args: + logits: Input logits for next token. + top_k: Tensor to specify the top_k values. + + Returns: + Logits with top_k filtering applied. + """ + top_k_logits = tf.math.top_k(logits, k=top_k) + indices_to_remove = logits < tf.expand_dims(top_k_logits[0][..., -1], -1) + top_k_logits = set_tensor_by_indices_to_value(logits, indices_to_remove, + np.NINF) + return top_k_logits + + +def sample_top_p(logits, top_p): + """Chooses most probable logits with cumulative probabilities upto top_p. + + Sets the remaining logits to negative infinity. + + Args: + logits: Input logits for next token. + top_p: Float tensor with a value >=0 and < 1.0 + + Returns: + Logits with top_p filtering applied. + """ + sorted_indices = tf.argsort(logits, direction="DESCENDING") + # Flatten logits as tf.gather on TPU needs axis to be compile time constant. + logits_shape = decoding_module.shape_list(logits) + range_for_gather = tf.expand_dims(tf.range(0, logits_shape[0]), axis=1) + range_for_gather = tf.tile(range_for_gather * logits_shape[1], + [1, logits_shape[1]]) + sorted_indices + flattened_logits = tf.reshape(logits, [-1]) + flattened_sorted_indices = tf.reshape(range_for_gather, [-1]) + sorted_logits = tf.reshape( + tf.gather(flattened_logits, flattened_sorted_indices), + [logits_shape[0], logits_shape[1]]) + cumulative_probs = tf.cumsum(tf.nn.softmax(sorted_logits, axis=-1), axis=-1) + + # Remove tokens with cumulative probability above the threshold. + sorted_indices_to_remove = cumulative_probs > top_p + + # Shift the indices to the right to keep the first token above threshold. + sorted_indices_to_remove = tf.roll(sorted_indices_to_remove, 1, axis=-1) + sorted_indices_to_remove = tf.concat([ + tf.zeros_like(sorted_indices_to_remove[:, :1]), + sorted_indices_to_remove[:, 1:] + ], -1) + + # Scatter sorted indices to original indexes. + indices_to_remove = scatter_values_on_batch_indices( + sorted_indices_to_remove, sorted_indices) + top_p_logits = set_tensor_by_indices_to_value( + logits, indices_to_remove, np.NINF) + return top_p_logits + + +def scatter_values_on_batch_indices(values, batch_indices): + """Scatter `values` into a tensor using `batch_indices`. + + Args: + values: tensor of shape [batch_size, vocab_size] containing the values to + scatter + batch_indices: tensor of shape [batch_size, vocab_size] containing the + indices to insert (should be a permutation in range(0, n)) + + Returns: + Tensor of shape [batch_size, vocab_size] with values inserted at + batch_indices + """ + tensor_shape = decoding_module.shape_list(batch_indices) + broad_casted_batch_dims = tf.reshape( + tf.broadcast_to( + tf.expand_dims(tf.range(tensor_shape[0]), axis=-1), + tensor_shape), [1, -1]) + pair_indices = tf.transpose( + tf.concat([broad_casted_batch_dims, + tf.reshape(batch_indices, [1, -1])], 0)) + return tf.scatter_nd(pair_indices, + tf.reshape(values, [-1]), tensor_shape) + + +def set_tensor_by_indices_to_value(input_tensor, indices, value): + """Where indices is True, set the value in input_tensor to value. + + Args: + input_tensor: float (batch_size, dim) + indices: bool (batch_size, dim) + value: float scalar + Returns: + output_tensor: same shape as input_tensor. + """ + value_tensor = tf.zeros_like(input_tensor) + value + output_tensor = tf.where(indices, value_tensor, input_tensor) + return output_tensor + + +class SamplingModule(decoding_module.DecodingModule, metaclass=abc.ABCMeta): + """Implementation for sampling strategies (go/decoding-tf-nlp).""" + + def __init__(self, + symbols_to_logits_fn, + length_normalization_fn: Callable[[int, tf.DType], float], + vocab_size: int, + max_decode_length: int, + eos_id: int, + padded_decode: bool, + top_k=0, + top_p=1.0, + sample_temperature=0.0, + enable_greedy: bool = True, + dtype: tf.DType = tf.float32): + """Initialize sampling module.""" + self.symbols_to_logits_fn = symbols_to_logits_fn + self.length_normalization_fn = length_normalization_fn + self.eos_id = eos_id + self.padded_decode = padded_decode + self.dtype = tf.as_dtype(dtype) + self.vocab_size = tf.convert_to_tensor(vocab_size, dtype=tf.int32) + self.max_decode_length = max_decode_length + self.top_k = tf.convert_to_tensor(top_k, dtype=tf.int32) + self.top_p = tf.convert_to_tensor(top_p, dtype=tf.float32) + self.sample_temperature = tf.convert_to_tensor(sample_temperature, + dtype=tf.float32) + self.enable_greedy = enable_greedy + super(SamplingModule, self).__init__( + length_normalization_fn=length_normalization_fn, dtype=dtype) + + def _grow_alive_seq(self, + state: Dict[str, Any], + batch_size: int) -> decoding_module.InternalState: + """Grow alive sequences by one token. + + This function will implement the decoding strategies like top_p, top_k + and greedy for the choosing the next logit. + + Args: + state: A dictionary with the current loop state. + batch_size: The given batch size + + Returns: + Tuple of + (Top sequences [batch, curr_index + 1] or [batch, max_decode_length + 1], + Scores of returned sequences [batch, 1], + New ids [batch, 1], + New alive cache) + """ + i = state[decoding_module.StateKeys.CUR_INDEX] + alive_seq = state[decoding_module.StateKeys.ALIVE_SEQ] + alive_log_probs = state[decoding_module.StateKeys.ALIVE_LOG_PROBS] + alive_cache = state[decoding_module.StateKeys.ALIVE_CACHE] + + if self.padded_decode: + ids = tf.slice(alive_seq, [0, i], [batch_size, 1]) + else: + ids = alive_seq + + new_logits, new_cache = self.symbols_to_logits_fn(ids, i, alive_cache) + candidate_log_probs = decoding_module.log_prob_from_logits( + new_logits) + original_log_probs = candidate_log_probs + alive_log_probs + + topk_log_probs, topk_ids = None, None + if self.enable_greedy: + topk_log_probs, topk_ids = greedy(original_log_probs) + else: + temperature_fn = sample_logits_with_temperature + sampled_logits = tf.cond( + self.sample_temperature > 0.0, + lambda: temperature_fn(new_logits, self.sample_temperature), + lambda: new_logits) + sampled_logits = tf.cond( + self.top_k > 0, + lambda: sample_top_k(sampled_logits, self.top_k), + lambda: sampled_logits) + sampled_logits = tf.cond( + self.top_p < 1, + lambda: sample_top_p(sampled_logits, self.top_p), + lambda: sampled_logits) + topk_ids = tf.random.categorical( + sampled_logits, dtype=tf.int32, num_samples=1) + topk_log_probs = tf.gather( + original_log_probs, topk_ids, axis=1, batch_dims=1) + if self.padded_decode: + topk_seq = tf.transpose(alive_seq, perm=[1, 0]) + topk_seq = tf.tensor_scatter_nd_update( + topk_seq, [[i + 1]], tf.expand_dims(tf.squeeze(topk_ids, -1), 0)) + topk_seq = tf.transpose(topk_seq, perm=[1, 0]) + else: + topk_seq = tf.concat([alive_seq, topk_ids], axis=-1) + return topk_seq, topk_log_probs, topk_ids, new_cache + + def _create_initial_state(self, + initial_ids: tf.Tensor, + initial_cache: Dict[str, tf.Tensor], + batch_size: int) -> decoding_module.InitialState: + """Return initial state dictionary and its shape invariants.""" + for key, value in initial_cache.items(): + for inner_value in tf.nest.flatten(value): + if inner_value.dtype != self.dtype: + raise TypeError( + "initial_cache element for key '%s' has dtype %s that does not " + "match sampling_module's dtype of %s. Value: %s" % + (key, value.dtype.name, self.dtype.name, inner_value)) + + # Current loop index (starts at 0) + cur_index = tf.constant(0) + + # Alive sequence with shape [batch_size, 1] + alive_seq = initial_ids + alive_seq = tf.expand_dims(alive_seq, axis=-1) + if self.padded_decode: + alive_seq = tf.tile(alive_seq, [1, self.max_decode_length + 1]) + + # Initial log probabilities with shape [batch_size, 1]. + initial_log_probs = tf.constant([[0.]], dtype=self.dtype) + alive_log_probs = tf.tile(initial_log_probs, [batch_size, 1]) + + alive_cache = initial_cache + + # Initialize tensor storing finished sequences [batch_size, 1, 1]. + finished_seq = tf.zeros(tf.shape(alive_seq), tf.int32) + + # Set scores of the initial finished seqs to negative infinity. + finished_scores = tf.zeros([batch_size, 1], dtype=self.dtype) + + # Initialize finished flags with all False values. + finished_flags = tf.zeros([batch_size, 1], tf.bool) + + # Create state dictionary and state shapes. + state = { + decoding_module.StateKeys.CUR_INDEX: cur_index, + decoding_module.StateKeys.ALIVE_SEQ: alive_seq, + decoding_module.StateKeys.ALIVE_LOG_PROBS: alive_log_probs, + decoding_module.StateKeys.ALIVE_CACHE: alive_cache, + decoding_module.StateKeys.FINISHED_SEQ: finished_seq, + decoding_module.StateKeys.FINISHED_SCORES: finished_scores, + decoding_module.StateKeys.FINISHED_FLAGS: finished_flags + } + + if self.padded_decode: + state_shape_invariants = { + decoding_module.StateKeys.CUR_INDEX: + tf.TensorShape([]), + decoding_module.StateKeys.ALIVE_SEQ: + tf.TensorShape( + [batch_size, self.max_decode_length + 1]), + decoding_module.StateKeys.ALIVE_LOG_PROBS: + tf.TensorShape([batch_size, 1]), + decoding_module.StateKeys.ALIVE_CACHE: + tf.nest.map_structure(lambda state: state.get_shape(), + alive_cache), + decoding_module.StateKeys.FINISHED_SEQ: + tf.TensorShape( + [batch_size, self.max_decode_length + 1]), + decoding_module.StateKeys.FINISHED_SCORES: + tf.TensorShape([batch_size, 1]), + decoding_module.StateKeys.FINISHED_FLAGS: + tf.TensorShape([batch_size, 1]) + } + else: + state_shape_invariants = { + decoding_module.StateKeys.CUR_INDEX: + tf.TensorShape([]), + decoding_module.StateKeys.ALIVE_SEQ: + tf.TensorShape([None, None]), + decoding_module.StateKeys.ALIVE_LOG_PROBS: + tf.TensorShape([None, 1]), + decoding_module.StateKeys.ALIVE_CACHE: + tf.nest.map_structure( + decoding_module.get_shape_keep_last_dim, + alive_cache), + decoding_module.StateKeys.FINISHED_SEQ: + tf.TensorShape([None, None]), + decoding_module.StateKeys.FINISHED_SCORES: + tf.TensorShape([None, 1]), + decoding_module.StateKeys.FINISHED_FLAGS: + tf.TensorShape([None, 1]) + } + + return state, state_shape_invariants + + def _get_new_alive_state( + self, + new_seq: tf.Tensor, + new_log_probs: tf.Tensor, + new_finished_flags: tf.Tensor, + new_cache: Dict[str, tf.Tensor]) -> Dict[str, Any]: + """Gather the sequences that are still alive. + + This function resets the sequences in the alive_state that are finished. + + Args: + new_seq: New sequences generated by growing the current alive sequences + int32 tensor with shape [batch_size, cur_index + 1] + new_log_probs: Log probabilities of new sequences float32 tensor with + shape [batch_size, 1] + new_finished_flags: A boolean Tensor indicates which sequences are live + inside the beam. + new_cache: Dict of cached values for each sequence. + + Returns: + Dictionary with alive keys. + """ + new_seq = tf.multiply( + new_seq, tf.cast(tf.logical_not(new_finished_flags), new_seq.dtype)) + return { + decoding_module.StateKeys.ALIVE_SEQ: new_seq, + decoding_module.StateKeys.ALIVE_LOG_PROBS: new_log_probs, + decoding_module.StateKeys.ALIVE_CACHE: new_cache + } + + def _get_new_finished_state(self, + state: Dict[str, Any], + new_seq: tf.Tensor, + new_log_probs: tf.Tensor, + new_finished_flags: tf.Tensor, + batch_size: int) -> Dict[str, tf.Tensor]: + """Combine new and old finished sequences. + + Args: + state: A dictionary with the current loop state. + new_seq: New sequences generated by growing the current alive sequences + int32 tensor [batch, curr_index + 1] or [batch, max_decode_length + 1]. + new_log_probs: Log probabilities of new sequences float32 tensor with + shape [batch, 1]. + new_finished_flags: A boolean Tensor indicates which sequences are live. + batch_size: The given batch size. + + Returns: + Dictionary with finished keys from StateKeys. + """ + i = state[decoding_module.StateKeys.CUR_INDEX] + finished_seq = state[decoding_module.StateKeys.FINISHED_SEQ] + finished_scores = state[decoding_module.StateKeys.FINISHED_SCORES] + finished_flags = state[decoding_module.StateKeys.FINISHED_FLAGS] + + if not self.padded_decode: + finished_seq = tf.concat( + [finished_seq, tf.zeros([batch_size, 1], tf.int32)], axis=-1) + new_scores = new_log_probs + if self.length_normalization_fn is not None: + length_norm = self.length_normalization_fn(i + 1, self.dtype) + new_scores = new_log_probs / length_norm + new_seq = tf.multiply( + new_seq, tf.cast(tf.logical_not(finished_flags), new_seq.dtype)) + new_scores = tf.multiply( + new_scores, tf.cast(tf.logical_not(finished_flags), new_scores.dtype)) + + finished_seq += tf.multiply(new_seq, + tf.cast(new_finished_flags, new_seq.dtype)) + finished_scores += tf.multiply( + new_scores, tf.cast(new_finished_flags, new_scores.dtype)) + new_finished_flags = tf.logical_or(new_finished_flags, finished_flags) + return { + decoding_module.StateKeys.FINISHED_SEQ: finished_seq, + decoding_module.StateKeys.FINISHED_SCORES: finished_scores, + decoding_module.StateKeys.FINISHED_FLAGS: new_finished_flags + } + + def _process_finished_state( + self, finished_state: Dict[str, Any]) -> decoding_module.Output: + """Process the alive/finished state to return final sequences and scores.""" + alive_seq = finished_state[decoding_module.StateKeys.ALIVE_SEQ] + alive_log_probs = finished_state[decoding_module.StateKeys.ALIVE_LOG_PROBS] + finished_seq = finished_state[decoding_module.StateKeys.FINISHED_SEQ] + finished_scores = finished_state[decoding_module.StateKeys.FINISHED_SCORES] + finished_flags = finished_state[decoding_module.StateKeys.FINISHED_FLAGS] + finished_cond = tf.reduce_any(finished_flags, 1, name="finished_cond") + if self.length_normalization_fn is not None: + length_norm = self.length_normalization_fn(self.max_decode_length + 1, + self.dtype) + alive_log_probs = alive_log_probs / length_norm + seq_cond = decoding_module.expand_to_same_rank( + finished_cond, finished_seq) + score_cond = decoding_module.expand_to_same_rank( + finished_cond, finished_scores) + finished_seq = tf.where(seq_cond, finished_seq, alive_seq) + finished_scores = tf.where(score_cond, finished_scores, alive_log_probs) + return finished_seq, finished_scores + + def _continue_search(self, state) -> tf.Tensor: + i = state[decoding_module.StateKeys.CUR_INDEX] + return tf.less(i, self.max_decode_length) + + def _finished_flags(self, topk_ids, state) -> tf.Tensor: + new_finished_flags = tf.equal(topk_ids, self.eos_id) + new_finished_flags = tf.logical_or( + new_finished_flags, state[decoding_module.StateKeys.FINISHED_FLAGS]) + return new_finished_flags + + + + + + + diff --git a/official/nlp/modeling/ops/segment_extractor.py b/official/nlp/modeling/ops/segment_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..e01649e4a3deffcd6aa9634da639c26b0c7199a0 --- /dev/null +++ b/official/nlp/modeling/ops/segment_extractor.py @@ -0,0 +1,210 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for extracting segments from sentences in documents.""" + +import tensorflow as tf + + +# Get a random tensor like `positions` and make some decisions +def _get_random(positions, random_fn): + flat_random = random_fn( + shape=tf.shape(positions.flat_values), + minval=0, + maxval=1, + dtype=tf.float32) + return positions.with_flat_values(flat_random) + + +# For every position j in a row, sample a position preceeding j or +# a position which is [0, j-1] +def _random_int_up_to(maxval, random_fn): + # Need to cast because the int kernel for uniform doesn't support bcast. + # We add one because maxval is exclusive, and this will get rounded down + # when we cast back to int. + float_maxval = tf.cast(maxval, tf.float32) + return tf.cast( + random_fn( + shape=tf.shape(maxval), + minval=tf.zeros_like(float_maxval), + maxval=float_maxval), + dtype=maxval.dtype) + + +def _random_int_from_range(minval, maxval, random_fn): + # Need to cast because the int kernel for uniform doesn't support bcast. + # We add one because maxval is exclusive, and this will get rounded down + # when we cast back to int. + float_minval = tf.cast(minval, tf.float32) + float_maxval = tf.cast(maxval, tf.float32) + return tf.cast( + random_fn(tf.shape(maxval), minval=float_minval, maxval=float_maxval), + maxval.dtype) + + +def _sample_from_other_batch(sentences, random_fn): + """Samples sentences from other batches.""" + # other_batch: [num_sentences]: The batch to sample from for each + # sentence. + other_batch = random_fn( + shape=[tf.size(sentences)], + minval=0, + maxval=sentences.nrows() - 1, + dtype=tf.int64) + + other_batch += tf.cast(other_batch >= sentences.value_rowids(), tf.int64) + + # other_sentence: [num_sentences]: The sentence within each batch + # that we sampled. + other_sentence = _random_int_up_to( + tf.gather(sentences.row_lengths(), other_batch), random_fn) + return sentences.with_values(tf.stack([other_batch, other_sentence], axis=1)) + + +def get_sentence_order_labels(sentences, + random_threshold=0.5, + random_next_threshold=0.5, + random_fn=tf.random.uniform): + """Extract segments and labels for sentence order prediction (SOP) task. + + Extracts the segment and labels for the sentence order prediction task + defined in "ALBERT: A Lite BERT for Self-Supervised Learning of Language + Representations" (https://arxiv.org/pdf/1909.11942.pdf) + + Args: + sentences: a `RaggedTensor` of shape [batch, (num_sentences)] with string + dtype. + random_threshold: (optional) A float threshold between 0 and 1, used to + determine whether to extract a random, out-of-batch sentence or a + suceeding sentence. Higher value favors succeeding sentence. + random_next_threshold: (optional) A float threshold between 0 and 1, used to + determine whether to extract either a random, out-of-batch, or succeeding + sentence or a preceeding sentence. Higher value favors preceeding + sentences. + random_fn: (optional) An op used to generate random float values. + + Returns: + a tuple of (preceeding_or_random_next, is_suceeding_or_random) where: + preceeding_or_random_next: a `RaggedTensor` of strings with the same shape + as `sentences` and contains either a preceeding, suceeding, or random + out-of-batch sentence respective to its counterpart in `sentences` and + dependent on its label in `is_preceeding_or_random_next`. + is_suceeding_or_random: a `RaggedTensor` of bool values with the + same shape as `sentences` and is True if it's corresponding sentence in + `preceeding_or_random_next` is a random or suceeding sentence, False + otherwise. + """ + # Create a RaggedTensor in the same shape as sentences ([doc, (sentences)]) + # whose values are index positions. + positions = tf.ragged.range(sentences.row_lengths()) + + row_lengths_broadcasted = tf.expand_dims(positions.row_lengths(), + -1) + 0 * positions + row_lengths_broadcasted_flat = row_lengths_broadcasted.flat_values + + # Generate indices for all preceeding, succeeding and random. + # For every position j in a row, sample a position preceeding j or + # a position which is [0, j-1] + all_preceding = tf.ragged.map_flat_values(_random_int_up_to, positions, + random_fn) + + # For every position j, sample a position following j, or a position + # which is [j, row_max] + all_succeeding = positions.with_flat_values( + tf.ragged.map_flat_values(_random_int_from_range, + positions.flat_values + 1, + row_lengths_broadcasted_flat, random_fn)) + + # Convert to format that is convenient for `gather_nd` + rows_broadcasted = tf.expand_dims(tf.range(sentences.nrows()), + -1) + 0 * positions + all_preceding_nd = tf.stack([rows_broadcasted, all_preceding], -1) + all_succeeding_nd = tf.stack([rows_broadcasted, all_succeeding], -1) + all_random_nd = _sample_from_other_batch(positions, random_fn) + + # There's a few spots where there is no "preceding" or "succeeding" item (e.g. + # first and last sentences in a document). Mark where these are and we will + # patch them up to grab a random sentence from another document later. + all_zeros = tf.zeros_like(positions) + all_ones = tf.ones_like(positions) + valid_preceding_mask = tf.cast( + tf.concat([all_zeros[:, :1], all_ones[:, 1:]], -1), tf.bool) + valid_succeeding_mask = tf.cast( + tf.concat([all_ones[:, :-1], all_zeros[:, -1:]], -1), tf.bool) + + # Decide what to use for the segment: (1) random, out-of-batch, (2) preceeding + # item, or (3) succeeding. + # Should get out-of-batch instead of succeeding item + should_get_random = ((_get_random(positions, random_fn) > random_threshold) + | tf.logical_not(valid_succeeding_mask)) + random_or_succeeding_nd = tf.compat.v1.where(should_get_random, all_random_nd, + all_succeeding_nd) + # Choose which items should get a random succeeding item. Force positions that + # don't have a valid preceeding items to get a random succeeding item. + should_get_random_or_succeeding = ( + (_get_random(positions, random_fn) > random_next_threshold) + | tf.logical_not(valid_preceding_mask)) + gather_indices = tf.compat.v1.where(should_get_random_or_succeeding, + random_or_succeeding_nd, all_preceding_nd) + return (tf.gather_nd(sentences, + gather_indices), should_get_random_or_succeeding) + + +def get_next_sentence_labels(sentences, + random_threshold=0.5, + random_fn=tf.random.uniform): + """Extracts the next sentence label from sentences. + + Args: + sentences: A `RaggedTensor` of strings w/ shape [batch, (num_sentences)]. + random_threshold: (optional) A float threshold between 0 and 1, used to + determine whether to extract a random sentence or the immediate next + sentence. Higher value favors next sentence. + random_fn: (optional) An op used to generate random float values. + + Returns: + A tuple of (next_sentence_or_random, is_next_sentence) where: + + next_sentence_or_random: A `Tensor` with shape [num_sentences] that + contains either the subsequent sentence of `segment_a` or a randomly + injected sentence. + is_next_sentence: A `Tensor` of bool w/ shape [num_sentences] + that contains whether or not `next_sentence_or_random` is truly a + subsequent sentence or not. + """ + # shift everyone to get the next sentence predictions positions + positions = tf.ragged.range(sentences.row_lengths()) + + # Shift every position down to the right. + next_sentences_pos = (positions + 1) % tf.expand_dims(sentences.row_lengths(), + 1) + rows_broadcasted = tf.expand_dims(tf.range(sentences.nrows()), + -1) + 0 * positions + next_sentences_pos_nd = tf.stack([rows_broadcasted, next_sentences_pos], -1) + all_random_nd = _sample_from_other_batch(positions, random_fn) + + # Mark the items that don't have a next sentence (e.g. the last + # sentences in the document). We will patch these up and force them to grab a + # random sentence from a random document. + valid_next_sentences = tf.cast( + tf.concat([ + tf.ones_like(positions)[:, :-1], + tf.zeros([positions.nrows(), 1], dtype=tf.int64) + ], -1), tf.bool) + + is_random = ((_get_random(positions, random_fn) > random_threshold) + | tf.logical_not(valid_next_sentences)) + gather_indices = tf.compat.v1.where(is_random, all_random_nd, + next_sentences_pos_nd) + return tf.gather_nd(sentences, gather_indices), tf.logical_not(is_random) diff --git a/official/nlp/modeling/ops/segment_extractor_test.py b/official/nlp/modeling/ops/segment_extractor_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6b4094b87870ab3054f460b150e230f74ab30339 --- /dev/null +++ b/official/nlp/modeling/ops/segment_extractor_test.py @@ -0,0 +1,138 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# encoding=utf-8 +"""Tests for sentence prediction labels.""" +import functools + +from absl.testing import parameterized +import tensorflow as tf + +from official.nlp.modeling.ops import segment_extractor + + +class NextSentencePredictionTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters([ + dict( + test_description="all random", + sentences=[[b"Hello there.", b"La la la.", b"Such is life."], + [b"Who let the dogs out?", b"Who?."]], + expected_segment=[[ + b"Who let the dogs out?", b"Who?.", b"Who let the dogs out?" + ], [b"Hello there.", b"Hello there."]], + expected_labels=[ + [False, False, False], + [False, False], + ], + random_threshold=0.0, + ), + dict( + test_description="all next", + sentences=[[b"Hello there.", b"La la la.", b"Such is life."], + [b"Who let the dogs out?", b"Who?."]], + expected_segment=[ + [b"La la la.", b"Such is life.", b"Who let the dogs out?"], + [b"Who?.", b"Hello there."], + ], + expected_labels=[ + [True, True, False], + [True, False], + ], + random_threshold=1.0, + ), + ]) + def testNextSentencePrediction(self, + sentences, + expected_segment, + expected_labels, + random_threshold=0.5, + test_description=""): + sentences = tf.ragged.constant(sentences) + # Set seed and rig the shuffle function to a deterministic reverse function + # instead. This is so that we have consistent and deterministic results. + extracted_segment, actual_labels = ( + segment_extractor.get_next_sentence_labels( + sentences, + random_threshold, + random_fn=functools.partial( + tf.random.stateless_uniform, seed=(2, 3)))) + self.assertAllEqual(expected_segment, extracted_segment) + self.assertAllEqual(expected_labels, actual_labels) + + +class SentenceOrderLabelsTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters([ + dict( + test_description="all random", + sentences=[[b"Hello there.", b"La la la.", b"Such is life."], + [b"Who let the dogs out?", b"Who?."]], + expected_segment=[[ + b"Who let the dogs out?", b"Who?.", b"Who let the dogs out?" + ], [b"Hello there.", b"Hello there."]], + expected_labels=[[True, True, True], [True, True]], + random_threshold=0.0, + random_next_threshold=0.0, + ), + dict( + test_description="all next", + sentences=[[b"Hello there.", b"La la la.", b"Such is life."], + [b"Who let the dogs out?", b"Who?."]], + expected_segment=[[ + b"La la la.", b"Such is life.", b"Who let the dogs out?" + ], [b"Who?.", b"Hello there."]], + expected_labels=[[True, True, True], [True, True]], + random_threshold=1.0, + random_next_threshold=0.0, + ), + dict( + test_description="all preceeding", + sentences=[[b"Hello there.", b"La la la.", b"Such is life."], + [b"Who let the dogs out?", b"Who?."]], + expected_segment=[ + [b"La la la.", b"Hello there.", b"Hello there."], + [b"Who?.", b"Who let the dogs out?"], + ], + expected_labels=[ + [True, False, False], + [True, False], + ], + random_threshold=1.0, + random_next_threshold=1.0, + ), + ]) + def testSentenceOrderPrediction(self, + sentences, + expected_segment, + expected_labels, + random_threshold=0.5, + random_next_threshold=0.5, + test_description=""): + sentences = tf.ragged.constant(sentences) + # Set seed and rig the shuffle function to a deterministic reverse function + # instead. This is so that we have consistent and deterministic results. + extracted_segment, actual_labels = ( + segment_extractor.get_sentence_order_labels( + sentences, + random_threshold=random_threshold, + random_next_threshold=random_next_threshold, + random_fn=functools.partial( + tf.random.stateless_uniform, seed=(2, 3)))) + self.assertAllEqual(expected_segment, extracted_segment) + self.assertAllEqual(expected_labels, actual_labels) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/nhnet/README.md b/official/nlp/nhnet/README.md index 14c55636ab52b4582cb6b12e88a282c7adbb059e..f838d120fb8bcc419d5eaeb543675eb224cfddbd 100644 --- a/official/nlp/nhnet/README.md +++ b/official/nlp/nhnet/README.md @@ -82,7 +82,7 @@ Next, we can run the following data preprocess script which may take a few hours ```shell # Recall that we use DATA_FOLDER=/path/to/downloaded_dataset. -$ python3 raw_data_preprocess.py \ +$ python3 raw_data_process.py \ -crawled_articles=/tmp/nhnet \ -vocab=/path/to/bert_checkpoint/vocab.txt \ -do_lower_case=True \ @@ -104,14 +104,13 @@ Please first install TensorFlow 2 and Tensorflow Model Garden following the ```shell $ python3 trainer.py \ --mode=train_and_eval \ - --vocab=/path/to/bert_checkpoint/vocab.txt \ --init_checkpoint=/path/to/bert_checkpoint/bert_model.ckpt \ --params_override='init_from_bert2bert=false' \ --train_file_pattern=$DATA_FOLDER/processed/train.tfrecord* \ --model_dir=/path/to/output/model \ --len_title=15 \ --len_passage=200 \ - --max_num_articles=5 \ + --num_nhnet_articles=5 \ --model_type=nhnet \ --train_batch_size=16 \ --train_steps=10000 \ @@ -123,14 +122,13 @@ $ python3 trainer.py \ ```shell $ python3 trainer.py \ --mode=train_and_eval \ - --vocab=/path/to/bert_checkpoint/vocab.txt \ --init_checkpoint=/path/to/bert_checkpoint/bert_model.ckpt \ --params_override='init_from_bert2bert=false' \ --train_file_pattern=$DATA_FOLDER/processed/train.tfrecord* \ --model_dir=/path/to/output/model \ --len_title=15 \ --len_passage=200 \ - --max_num_articles=5 \ + --num_nhnet_articles=5 \ --model_type=nhnet \ --train_batch_size=1024 \ --train_steps=10000 \ diff --git a/official/nlp/nhnet/__init__.py b/official/nlp/nhnet/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/nhnet/__init__.py +++ b/official/nlp/nhnet/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/nhnet/configs.py b/official/nlp/nhnet/configs.py index 41cfa6117cb49e00224becb87b129401562a9807..267357b73bfa51572d4aa5c893f301a3ec2e42ff 100644 --- a/official/nlp/nhnet/configs.py +++ b/official/nlp/nhnet/configs.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Common NHNet/Bert2Bert configuration.""" from typing import List, Text diff --git a/official/nlp/nhnet/configs_test.py b/official/nlp/nhnet/configs_test.py index 2b855ec6a955cd7f2a50fb173f7f5efb68b84263..1636414f1fa236fc567419bf2d696559d42a8e42 100644 --- a/official/nlp/nhnet/configs_test.py +++ b/official/nlp/nhnet/configs_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for configs.""" import tensorflow as tf @@ -99,7 +99,6 @@ NHNET_CONFIG = { "pad_token_id": 0, "end_token_id": 102, "start_token_id": 101, - "init_from_bert2bert": True, } diff --git a/official/nlp/nhnet/decoder.py b/official/nlp/nhnet/decoder.py index b38fa2a6b6a251af48848e5d0a8d684be8f4c098..cd4b4b66e683ddc27f8632b97e006dccce620f6e 100644 --- a/official/nlp/nhnet/decoder.py +++ b/official/nlp/nhnet/decoder.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,18 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Transformer decoder that mimics a BERT encoder, to load BERT checkpoints.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Transformer decoder that mimics a BERT encoder, to load BERT checkpoints.""" import tensorflow as tf from official.modeling import tf_utils from official.nlp.modeling import layers -from official.nlp.modeling.layers import transformer from official.nlp.transformer import model_utils as transformer_utils @@ -59,7 +53,7 @@ class TransformerDecoder(tf.keras.layers.Layer): self.layers = [] for i in range(self.num_hidden_layers): self.layers.append( - transformer.TransformerDecoderLayer( + layers.TransformerDecoderBlock( num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, intermediate_activation=self.intermediate_activation, diff --git a/official/nlp/nhnet/decoder_test.py b/official/nlp/nhnet/decoder_test.py index f5effbdb090e9c08939bfc203091e960741700c6..d38339a3c1bb34a8c7ebdae65ed41aae55d7a882 100644 --- a/official/nlp/nhnet/decoder_test.py +++ b/official/nlp/nhnet/decoder_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for nlp.nhnet.decoder.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for nlp.nhnet.decoder.""" import numpy as np import tensorflow as tf diff --git a/official/nlp/nhnet/evaluation.py b/official/nlp/nhnet/evaluation.py index b9c94dcfb71aa763c2acab5ffd022db94c20d776..8e365aa6acdd9d00c07afc14f428f3cbbd094838 100644 --- a/official/nlp/nhnet/evaluation.py +++ b/official/nlp/nhnet/evaluation.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,15 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Evaluation for Bert2Bert.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Evaluation for Bert2Bert.""" import os +# Import libraries from absl import logging import numpy as np import tensorflow as tf @@ -113,7 +108,6 @@ def continuous_eval(strategy, dtype=tf.int64, aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA, shape=[]) - model.global_step = global_step @tf.function def test_step(inputs): @@ -148,7 +142,7 @@ def continuous_eval(strategy, eval_results = {} for latest_checkpoint in tf.train.checkpoints_iterator( model_dir, timeout=timeout): - checkpoint = tf.train.Checkpoint(model=model) + checkpoint = tf.train.Checkpoint(model=model, global_step=global_step) checkpoint.restore(latest_checkpoint).expect_partial() logging.info("Loaded checkpoint %s", latest_checkpoint) @@ -161,7 +155,7 @@ def continuous_eval(strategy, metric.update_state(func(logits.numpy(), targets.numpy())) with eval_summary_writer.as_default(): - step = model.global_step.numpy() + step = global_step.numpy() for metric, _ in metrics_and_funcs: eval_results[metric.name] = metric.result().numpy().astype(float) tf.summary.scalar( diff --git a/official/nlp/nhnet/input_pipeline.py b/official/nlp/nhnet/input_pipeline.py index cadf3f085c868e56039679fdb2124b23f33fc19b..d61ea688e2d9dc83083f5ddd1e1df109dc8e65d5 100644 --- a/official/nlp/nhnet/input_pipeline.py +++ b/official/nlp/nhnet/input_pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,14 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Input pipelines.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Input pipelines.""" -import tensorflow.compat.v2 as tf +import tensorflow as tf def decode_record(record, name_to_features): @@ -222,7 +218,7 @@ def get_input_dataset(input_file_pattern, # When using TPU pods, we need to clone dataset across # workers and need to pass in function that returns the dataset rather # than passing dataset instance itself. - use_dataset_fn = isinstance(strategy, tf.distribute.experimental.TPUStrategy) + use_dataset_fn = isinstance(strategy, tf.distribute.TPUStrategy) if use_dataset_fn: if batch_size % strategy.num_replicas_in_sync != 0: raise ValueError( @@ -230,7 +226,7 @@ def get_input_dataset(input_file_pattern, strategy.num_replicas_in_sync)) # As auto rebatching is not supported in - # `experimental_distribute_datasets_from_function()` API, which is + # `distribute_datasets_from_function()` API, which is # required when cloning dataset to multiple workers in eager mode, # we use per-replica batch size. batch_size = int(batch_size / strategy.num_replicas_in_sync) @@ -249,6 +245,6 @@ def get_input_dataset(input_file_pattern, input_pipeline_context=ctx) if use_dataset_fn: - return strategy.experimental_distribute_datasets_from_function(_dataset_fn) + return strategy.distribute_datasets_from_function(_dataset_fn) else: return strategy.experimental_distribute_dataset(_dataset_fn()) diff --git a/official/nlp/nhnet/models.py b/official/nlp/nhnet/models.py index d6f70e7f36d8a30ed869c1ca135ef3262fd2150e..69a7d93b86c131bd9059feb24a9d45759c52bdca 100644 --- a/official/nlp/nhnet/models.py +++ b/official/nlp/nhnet/models.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,27 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""tf.keras Models for NHNet.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""tf.keras Models for NHNet.""" +from typing import Optional, Text from absl import logging import gin import tensorflow as tf -from typing import Optional, Text from official.modeling import tf_utils from official.modeling.hyperparams import params_dict from official.nlp.modeling import networks from official.nlp.modeling.layers import multi_channel_attention +from official.nlp.modeling.ops import beam_search from official.nlp.nhnet import configs from official.nlp.nhnet import decoder from official.nlp.nhnet import utils -from official.nlp.transformer import beam_search def embedding_linear(embedding_matrix, x): @@ -404,7 +399,7 @@ def get_bert2bert_layers(params: configs.BERT2BERTConfig): target_ids = tf.keras.layers.Input( shape=(None,), name="target_ids", dtype=tf.int32) bert_config = utils.get_bert_config_from_params(params) - bert_model_layer = networks.TransformerEncoder( + bert_model_layer = networks.BertEncoder( vocab_size=bert_config.vocab_size, hidden_size=bert_config.hidden_size, num_layers=bert_config.num_hidden_layers, @@ -413,7 +408,6 @@ def get_bert2bert_layers(params: configs.BERT2BERTConfig): activation=tf_utils.get_activation(bert_config.hidden_act), dropout_rate=bert_config.hidden_dropout_prob, attention_dropout_rate=bert_config.attention_probs_dropout_prob, - sequence_length=None, max_sequence_length=bert_config.max_position_embeddings, type_vocab_size=bert_config.type_vocab_size, initializer=tf.keras.initializers.TruncatedNormal( @@ -455,7 +449,7 @@ def get_nhnet_layers(params: configs.NHNetConfig): segment_ids = tf.keras.layers.Input( shape=(None,), name="segment_ids", dtype=tf.int32) bert_config = utils.get_bert_config_from_params(params) - bert_model_layer = networks.TransformerEncoder( + bert_model_layer = networks.BertEncoder( vocab_size=bert_config.vocab_size, hidden_size=bert_config.hidden_size, num_layers=bert_config.num_hidden_layers, @@ -584,7 +578,6 @@ def create_model(model_type: Text, elif model_type == "nhnet": return create_nhnet_model(params, init_checkpoint=init_checkpoint) elif "transformer" in model_type: - return create_transformer_model( - params, init_checkpoint=init_checkpoint) + return create_transformer_model(params, init_checkpoint=init_checkpoint) else: raise KeyError("The model type is not defined: %s" % model_type) diff --git a/official/nlp/nhnet/models_test.py b/official/nlp/nhnet/models_test.py index 39676a347d65e2dc19e99a7dec4d22dfb4c60df4..d076c5a385393eb3a1099fad0243d15ff085c46a 100644 --- a/official/nlp/nhnet/models_test.py +++ b/official/nlp/nhnet/models_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for nlp.nhnet.models.""" import os @@ -34,13 +34,11 @@ def all_strategy_combinations(): return combinations.combine( distribution=[ strategy_combinations.default_strategy, - strategy_combinations.tpu_strategy, + strategy_combinations.cloud_tpu_strategy, strategy_combinations.one_device_strategy_gpu, strategy_combinations.mirrored_strategy_with_gpu_and_cpu, strategy_combinations.mirrored_strategy_with_two_gpus, - ], - mode="eager", - ) + ],) def distribution_forward_path(strategy, @@ -179,8 +177,9 @@ class Bert2BertTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(all_strategy_combinations()) def test_bert2bert_eval(self, distribution): seq_length = 10 - padded_decode = isinstance(distribution, - tf.distribute.experimental.TPUStrategy) + padded_decode = isinstance( + distribution, + (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy)) self._config.override( { "beam_size": 3, @@ -286,8 +285,9 @@ class NHNetTest(tf.test.TestCase, parameterized.TestCase): @combinations.generate(all_strategy_combinations()) def test_nhnet_eval(self, distribution): seq_length = 10 - padded_decode = isinstance(distribution, - tf.distribute.experimental.TPUStrategy) + padded_decode = isinstance( + distribution, + (tf.distribute.TPUStrategy, tf.distribute.experimental.TPUStrategy)) self._nhnet_config.override( { "beam_size": 4, diff --git a/official/nlp/nhnet/optimizer.py b/official/nlp/nhnet/optimizer.py index 15c7e248019399f1abc94f64f5acd509db104f38..03375c3b22134e566dd1ce28120a2897cf8a1b1d 100644 --- a/official/nlp/nhnet/optimizer.py +++ b/official/nlp/nhnet/optimizer.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Optimizer and learning rate scheduler.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Optimizer and learning rate scheduler.""" import tensorflow as tf @@ -71,10 +66,8 @@ class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): def create_optimizer(params: params_dict.ParamsDict): """Creates optimizer.""" - lr_schedule = LearningRateSchedule( - params.learning_rate, - params.hidden_size, - params.learning_rate_warmup_steps) + lr_schedule = LearningRateSchedule(params.learning_rate, params.hidden_size, + params.learning_rate_warmup_steps) return tf.keras.optimizers.Adam( learning_rate=lr_schedule, beta_1=params.adam_beta1, diff --git a/official/nlp/nhnet/raw_data_process.py b/official/nlp/nhnet/raw_data_process.py index 9597043237355f2c4b4399c490e105672e406b62..26ee4ef97d48c49d464c0ba563c58e60ac0165ca 100644 --- a/official/nlp/nhnet/raw_data_process.py +++ b/official/nlp/nhnet/raw_data_process.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,10 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Processes crawled content from news URLs by generating tfrecords.""" import os + from absl import app from absl import flags from official.nlp.nhnet import raw_data_processor diff --git a/official/nlp/nhnet/raw_data_processor.py b/official/nlp/nhnet/raw_data_processor.py index 0a30532f4f401e6f2b29430d353767c6cdea0966..73a00ba158cb2aa098516880ef6d18dd1ef2636e 100644 --- a/official/nlp/nhnet/raw_data_processor.py +++ b/official/nlp/nhnet/raw_data_processor.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Library for processing crawled content and generating tfrecords.""" import collections @@ -20,6 +19,7 @@ import json import multiprocessing import os import urllib.parse + import tensorflow as tf from official.nlp.bert import tokenization @@ -47,10 +47,10 @@ class RawDataProcessor(object): max_num_articles: Maximum number of articles in a story. include_article_title_in_passage: Whether to include article title in article passage. - include_text_snippet_in_example: Whether to include text snippet - (headline and article content) in generated tensorflow Examples, for - debug usage. If include_article_title_in_passage=True, title and body - will be separated by [SEP]. + include_text_snippet_in_example: Whether to include text snippet (headline + and article content) in generated tensorflow Examples, for debug usage. + If include_article_title_in_passage=True, title and body will be + separated by [SEP]. """ self.articles = dict() self.tokenizer = tokenization.FullTokenizer( @@ -156,6 +156,7 @@ class RawDataProcessor(object): def _get_single_story_features(self, story_headline, articles): """Converts a list of articles to a tensorflow Example.""" + def get_text_snippet(article): if article.text_b: return " [SEP] ".join([article.text_a, article.text_b]) diff --git a/official/nlp/nhnet/trainer.py b/official/nlp/nhnet/trainer.py index 3fa26a53a0d002247eb656691e8f49fa42bbe80f..5fbb7eb19296c029c3b0cc9aaa47a16792fb511e 100644 --- a/official/nlp/nhnet/trainer.py +++ b/official/nlp/nhnet/trainer.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,27 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Run NHNet model training and eval.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Run NHNet model training and eval.""" import os +# Import libraries from absl import app from absl import flags from absl import logging from six.moves import zip import tensorflow as tf +from official.common import distribute_utils from official.modeling.hyperparams import params_dict from official.nlp.nhnet import evaluation from official.nlp.nhnet import input_pipeline from official.nlp.nhnet import models from official.nlp.nhnet import optimizer from official.nlp.transformer import metrics as transformer_metrics -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils FLAGS = flags.FLAGS @@ -84,6 +80,10 @@ def define_flags(): default=None, help=("a YAML/JSON string or a YAML file which specifies additional " "overrides over the default parameters")) + # Enables MLIR-based TF/XLA bridge. This is part of a soft rollout and will + # eventually be the Google-wide default. + flags.DEFINE_bool("enable_mlir_bridge", True, + "Use MLIR TF/XLA bridge (experimental).") # pylint: disable=protected-access @@ -140,20 +140,20 @@ def train(params, strategy, dataset=None): FLAGS.model_type, params, init_checkpoint=FLAGS.init_checkpoint) opt = optimizer.create_optimizer(params) trainer = Trainer(model, params) - model.global_step = opt.iterations trainer.compile( optimizer=opt, - experimental_steps_per_execution=FLAGS.steps_per_loop) + steps_per_execution=FLAGS.steps_per_loop) summary_dir = os.path.join(FLAGS.model_dir, "summaries") summary_callback = tf.keras.callbacks.TensorBoard( summary_dir, update_freq=max(100, FLAGS.steps_per_loop)) - checkpoint = tf.train.Checkpoint(model=model, optimizer=opt) + checkpoint = tf.train.Checkpoint( + model=model, optimizer=opt, global_step=opt.iterations) checkpoint_manager = tf.train.CheckpointManager( checkpoint, directory=FLAGS.model_dir, max_to_keep=10, - step_counter=model.global_step, + step_counter=opt.iterations, checkpoint_interval=FLAGS.checkpoint_interval) if checkpoint_manager.restore_or_initialize(): logging.info("Training restored from the checkpoints in: %s", @@ -177,7 +177,10 @@ def train(params, strategy, dataset=None): def run(): """Runs NHNet using Keras APIs.""" - strategy = distribution_utils.get_distribution_strategy( + if FLAGS.enable_mlir_bridge: + tf.config.experimental.enable_mlir_bridge() + + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=FLAGS.distribution_strategy, tpu_address=FLAGS.tpu) if strategy: logging.info("***** Number of cores used : %d", @@ -206,7 +209,7 @@ def run(): if "eval" in FLAGS.mode: timeout = 0 if FLAGS.mode == "train_and_eval" else FLAGS.eval_timeout # Uses padded decoding for TPU. Always uses cache. - padded_decode = isinstance(strategy, tf.distribute.experimental.TPUStrategy) + padded_decode = isinstance(strategy, tf.distribute.TPUStrategy) params.override({ "padded_decode": padded_decode, }, is_strict=False) diff --git a/official/nlp/nhnet/trainer_test.py b/official/nlp/nhnet/trainer_test.py index 39673dd2c7afe0f7310e556395824b9ba4582262..d906f6a0559c22a90e2860b3f9d7e9e5720b31e4 100644 --- a/official/nlp/nhnet/trainer_test.py +++ b/official/nlp/nhnet/trainer_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for official.nlp.nhnet.trainer.""" import os @@ -38,10 +37,8 @@ def all_strategy_combinations(): strategy_combinations.one_device_strategy, strategy_combinations.one_device_strategy_gpu, strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - ], - mode="eager", - ) + strategy_combinations.cloud_tpu_strategy, + ],) def get_trivial_data(config) -> tf.data.Dataset: diff --git a/official/nlp/nhnet/utils.py b/official/nlp/nhnet/utils.py index f588798b7feee95a33b3b003f77570fe48340fe7..398c173f408e5e417ab9fbac19b6333d7b605dee 100644 --- a/official/nlp/nhnet/utils.py +++ b/official/nlp/nhnet/utils.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,16 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Utility helpers for Bert2Bert.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +from typing import Optional, Text from absl import logging import tensorflow as tf -from typing import Optional, Text + from official.modeling.hyperparams import params_dict from official.nlp.bert import configs from official.nlp.nhnet import configs as nhnet_configs @@ -44,6 +41,8 @@ def encoder_common_layers(transformer_block): transformer_block._intermediate_dense, transformer_block._output_dense, transformer_block._output_layer_norm ] + + # pylint: enable=protected-access diff --git a/official/nlp/optimization.py b/official/nlp/optimization.py index 51289a535b239d5831dd76bae57d6306f604e746..dfac8f4684fe08dd7cb401e64d501edc5b1c240d 100644 --- a/official/nlp/optimization.py +++ b/official/nlp/optimization.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,11 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Functions and classes related to optimization (weight updates).""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import re @@ -72,7 +69,8 @@ def create_optimizer(init_lr, num_train_steps, num_warmup_steps, end_lr=0.0, - optimizer_type='adamw'): + optimizer_type='adamw', + beta_1=0.9): """Creates an optimizer with learning rate schedule.""" # Implements linear decay of the learning rate. lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay( @@ -90,7 +88,7 @@ def create_optimizer(init_lr, optimizer = AdamWeightDecay( learning_rate=lr_schedule, weight_decay_rate=0.01, - beta_1=0.9, + beta_1=beta_1, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias']) @@ -99,7 +97,7 @@ def create_optimizer(init_lr, optimizer = tfa_optimizers.LAMB( learning_rate=lr_schedule, weight_decay_rate=0.01, - beta_1=0.9, + beta_1=beta_1, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias']) @@ -116,7 +114,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): correct way of using L2 regularization/weight decay with Adam, since that will interact with the m and v parameters in strange ways. - Instead we want ot decay the weights in a manner that doesn't interact with + Instead we want to decay the weights in a manner that doesn't interact with the m/v parameters. This is equivalent to adding the square of the weights to the loss with plain (non-momentum) SGD. """ @@ -130,13 +128,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): weight_decay_rate=0.0, include_in_weight_decay=None, exclude_from_weight_decay=None, + gradient_clip_norm=1.0, name='AdamWeightDecay', **kwargs): super(AdamWeightDecay, self).__init__(learning_rate, beta_1, beta_2, epsilon, amsgrad, name, **kwargs) self.weight_decay_rate = weight_decay_rate + self.gradient_clip_norm = gradient_clip_norm self._include_in_weight_decay = include_in_weight_decay self._exclude_from_weight_decay = exclude_from_weight_decay + logging.info('gradient_clip_norm=%f', gradient_clip_norm) @classmethod def from_config(cls, config): @@ -165,13 +166,14 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): name=None, experimental_aggregate_gradients=True): grads, tvars = list(zip(*grads_and_vars)) - if experimental_aggregate_gradients: + if experimental_aggregate_gradients and self.gradient_clip_norm > 0.0: # when experimental_aggregate_gradients = False, apply_gradients() no # longer implicitly allreduce gradients, users manually allreduce gradient # and passed the allreduced grads_and_vars. For now, the # clip_by_global_norm will be moved to before the explicit allreduce to # keep the math the same as TF 1 and pre TF 2.2 implementation. - (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) + (grads, _) = tf.clip_by_global_norm( + grads, clip_norm=self.gradient_clip_norm) return super(AdamWeightDecay, self).apply_gradients( zip(grads, tvars), name=name, diff --git a/official/nlp/projects/__init__.py b/official/nlp/projects/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/nlp/projects/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/projects/bigbird/__init__.py b/official/nlp/projects/bigbird/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/nlp/projects/bigbird/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/projects/bigbird/attention.py b/official/nlp/projects/bigbird/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..9c2816cd7c6676811757cf8cdc9ccee78c8956ad --- /dev/null +++ b/official/nlp/projects/bigbird/attention.py @@ -0,0 +1,491 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-based bigbird attention layer.""" + +import numpy as np +import tensorflow as tf + +MAX_SEQ_LEN = 4096 + + +def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask): + """Create 3D attention mask from a 2D tensor mask. + + Args: + from_blocked_mask: 2D Tensor of shape [batch_size, + from_seq_length//from_block_size, from_block_size]. + to_blocked_mask: int32 Tensor of shape [batch_size, + to_seq_length//to_block_size, to_block_size]. + + Returns: + float Tensor of shape [batch_size, 1, from_seq_length//from_block_size-4, + from_block_size, 3*to_block_size]. + """ + exp_blocked_to_pad = tf.concat([ + to_blocked_mask[:, 1:-3], to_blocked_mask[:, 2:-2], to_blocked_mask[:, + 3:-1] + ], 2) + band_mask = tf.einsum("BLQ,BLK->BLQK", from_blocked_mask[:, 2:-2], + exp_blocked_to_pad) + band_mask = tf.expand_dims(band_mask, 1) + return band_mask + + +def bigbird_block_rand_mask(from_seq_length, + to_seq_length, + from_block_size, + to_block_size, + num_rand_blocks, + last_idx=-1): + """Create adjacency list of random attention. + + Args: + from_seq_length: int. length of from sequence. + to_seq_length: int. length of to sequence. + from_block_size: int. size of block in from sequence. + to_block_size: int. size of block in to sequence. + num_rand_blocks: int. Number of random chunks per row. + last_idx: if -1 then num_rand_blocks blocks chosen anywhere in to sequence, + if positive then num_rand_blocks blocks choosen only upto last_idx. + + Returns: + adjacency list of size from_seq_length//from_block_size-2 by num_rand_blocks + """ + assert from_seq_length//from_block_size == to_seq_length//to_block_size, \ + "Error the number of blocks needs to be same!" + + rand_attn = np.zeros( + (from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32) + middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32) + last = to_seq_length // to_block_size - 1 + if last_idx > (2 * to_block_size): + last = (last_idx // to_block_size) - 1 + + r = num_rand_blocks # shorthand + for i in range(1, from_seq_length // from_block_size - 1): + start = i - 2 + end = i + if i == 1: + rand_attn[i - 1, :] = np.random.permutation(middle_seq[2:last])[:r] + elif i == 2: + rand_attn[i - 1, :] = np.random.permutation(middle_seq[3:last])[:r] + elif i == from_seq_length // from_block_size - 3: + rand_attn[i - 1, :] = np.random.permutation(middle_seq[:last])[:r] + # Missing -3: should have been sliced till last-3 + elif i == from_seq_length // from_block_size - 2: + rand_attn[i - 1, :] = np.random.permutation(middle_seq[:last])[:r] + # Missing -4: should have been sliced till last-4 + else: + if start > last: + start = last + rand_attn[i - 1, :] = np.random.permutation(middle_seq[:start])[:r] + elif (end + 1) == last: + rand_attn[i - 1, :] = np.random.permutation(middle_seq[:start])[:r] + else: + rand_attn[i - 1, :] = np.random.permutation( + np.concatenate((middle_seq[:start], middle_seq[end + 1:last])))[:r] + return rand_attn + + +def create_rand_mask_from_inputs(from_blocked_mask, to_blocked_mask, rand_attn, + num_attention_heads, num_rand_blocks, + batch_size, from_seq_length, from_block_size): + """Create 3D attention mask from a 2D tensor mask. + + Args: + from_blocked_mask: 2D Tensor of shape [batch_size, + from_seq_length//from_block_size, from_block_size]. + to_blocked_mask: int32 Tensor of shape [batch_size, + to_seq_length//to_block_size, to_block_size]. + rand_attn: [batch_size, num_attention_heads, + from_seq_length//from_block_size-2, num_rand_blocks] + num_attention_heads: int. Number of attention heads. + num_rand_blocks: int. Number of random chunks per row. + batch_size: int. Batch size for computation. + from_seq_length: int. length of from sequence. + from_block_size: int. size of block in from sequence. + + Returns: + float Tensor of shape [batch_size, num_attention_heads, + from_seq_length//from_block_size-2, + from_block_size, num_rand_blocks*to_block_size]. + """ + num_windows = from_seq_length // from_block_size - 2 + rand_mask = tf.reshape( + tf.gather(to_blocked_mask, rand_attn, batch_dims=1), [ + batch_size, num_attention_heads, num_windows, + num_rand_blocks * from_block_size + ]) + rand_mask = tf.einsum("BLQ,BHLK->BHLQK", from_blocked_mask[:, 1:-1], + rand_mask) + return rand_mask + + +def bigbird_block_sparse_attention( + query_layer, key_layer, value_layer, band_mask, from_mask, to_mask, + from_blocked_mask, to_blocked_mask, rand_attn, num_attention_heads, + num_rand_blocks, size_per_head, batch_size, from_seq_length, to_seq_length, + from_block_size, to_block_size): + """BigBird attention sparse calculation using blocks in linear time. + + Assumes from_seq_length//from_block_size == to_seq_length//to_block_size. + + + Args: + query_layer: float Tensor of shape [batch_size, num_attention_heads, + from_seq_length, size_per_head] + key_layer: float Tensor of shape [batch_size, num_attention_heads, + to_seq_length, size_per_head] + value_layer: float Tensor of shape [batch_size, num_attention_heads, + to_seq_length, size_per_head] + band_mask: (optional) int32 Tensor of shape [batch_size, 1, + from_seq_length//from_block_size-4, from_block_size, 3*to_block_size]. The + values should be 1 or 0. The attention scores will effectively be set to + -infinity for any positions in the mask that are 0, and will be unchanged + for positions that are 1. + from_mask: (optional) int32 Tensor of shape [batch_size, 1, from_seq_length, + 1]. The values should be 1 or 0. The attention scores will effectively be + set to -infinity for any positions in the mask that are 0, and will be + unchanged for positions that are 1. + to_mask: (optional) int32 Tensor of shape [batch_size, 1, 1, to_seq_length]. + The values should be 1 or 0. The attention scores will effectively be set + to -infinity for any positions in the mask that are 0, and will be + unchanged for positions that are 1. + from_blocked_mask: (optional) int32 Tensor of shape [batch_size, + from_seq_length//from_block_size, from_block_size]. Same as from_mask, + just reshaped. + to_blocked_mask: (optional) int32 Tensor of shape [batch_size, + to_seq_length//to_block_size, to_block_size]. Same as to_mask, just + reshaped. + rand_attn: [batch_size, num_attention_heads, + from_seq_length//from_block_size-2, num_rand_blocks] + num_attention_heads: int. Number of attention heads. + num_rand_blocks: int. Number of random chunks per row. + size_per_head: int. Size of each attention head. + batch_size: int. Batch size for computation. + from_seq_length: int. length of from sequence. + to_seq_length: int. length of to sequence. + from_block_size: int. size of block in from sequence. + to_block_size: int. size of block in to sequence. + + Returns: + float Tensor of shape [batch_size, from_seq_length, num_attention_heads, + size_per_head]. + """ + rand_attn = tf.expand_dims(rand_attn, 0) + rand_attn = tf.repeat(rand_attn, batch_size, 0) + + rand_mask = create_rand_mask_from_inputs( + from_blocked_mask, + to_blocked_mask, + rand_attn, + num_attention_heads, + num_rand_blocks, + batch_size, + from_seq_length, + from_block_size, + ) + + # Define shorthands + h = num_attention_heads + r = num_rand_blocks + d = size_per_head + b = batch_size + m = from_seq_length + n = to_seq_length + wm = from_block_size + wn = to_block_size + dtype = query_layer.dtype + query_layer = tf.transpose(query_layer, perm=[0, 2, 1, 3]) + key_layer = tf.transpose(key_layer, perm=[0, 2, 1, 3]) + value_layer = tf.transpose(value_layer, perm=[0, 2, 1, 3]) + blocked_query_matrix = tf.reshape(query_layer, (b, h, m // wm, wm, -1)) + blocked_key_matrix = tf.reshape(key_layer, (b, h, n // wn, wn, -1)) + blocked_value_matrix = tf.reshape(value_layer, (b, h, n // wn, wn, -1)) + gathered_key = tf.reshape( + tf.gather(blocked_key_matrix, rand_attn, batch_dims=2, name="gather_key"), + (b, h, m // wm - 2, r * wn, -1)) # [b, h, n//wn-2, r, wn, -1] + gathered_value = tf.reshape( + tf.gather( + blocked_value_matrix, rand_attn, batch_dims=2, name="gather_value"), + (b, h, m // wm - 2, r * wn, -1)) # [b, h, n//wn-2, r, wn, -1] + first_product = tf.einsum( + "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, 0], + key_layer) # [b, h, wm, -1] x [b, h, n, -1] ==> [b, h, wm, n] + first_product = tf.multiply(first_product, 1.0 / np.sqrt(d)) + first_product += (1.0 - tf.cast(to_mask, dtype=dtype)) * -10000.0 + first_attn_weights = tf.nn.softmax(first_product) # [b, h, wm, n] + first_context_layer = tf.einsum( + "BHQK,BHKD->BHQD", first_attn_weights, + value_layer) # [b, h, wm, n] x [b, h, n, -1] ==> [b, h, wm, -1] + first_context_layer = tf.expand_dims(first_context_layer, 2) + + second_key_mat = tf.concat([ + blocked_key_matrix[:, :, 0], blocked_key_matrix[:, :, 1], + blocked_key_matrix[:, :, 2], blocked_key_matrix[:, :, + -1], gathered_key[:, :, 0] + ], 2) # [b, h, (4+r)*wn, -1] + second_value_mat = tf.concat([ + blocked_value_matrix[:, :, 0], blocked_value_matrix[:, :, 1], + blocked_value_matrix[:, :, 2], blocked_value_matrix[:, :, -1], + gathered_value[:, :, 0] + ], 2) # [b, h, (4+r)*wn, -1] + second_product = tf.einsum( + "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, 1], second_key_mat + ) # [b, h, wm, -1] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, (4+r)*wn] + second_seq_pad = tf.concat([ + to_mask[:, :, :, :3 * wn], to_mask[:, :, :, -wn:], + tf.ones([b, 1, 1, r * wn], dtype=dtype) + ], 3) + second_rand_pad = tf.concat([ + tf.ones([b, h, wm, 4 * wn], dtype=dtype), rand_mask[:, :, 0] + ], 3) + second_product = tf.multiply(second_product, 1.0 / np.sqrt(d)) + second_product += (1.0 - + tf.minimum(second_seq_pad, second_rand_pad)) * -10000.0 + second_attn_weights = tf.nn.softmax(second_product) # [b , h, wm, (4+r)*wn] + second_context_layer = tf.einsum( + "BHQK,BHKD->BHQD", second_attn_weights, second_value_mat + ) # [b, h, wm, (4+r)*wn] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, -1] + second_context_layer = tf.expand_dims(second_context_layer, 2) + + exp_blocked_key_matrix = tf.concat([ + blocked_key_matrix[:, :, 1:-3], blocked_key_matrix[:, :, 2:-2], + blocked_key_matrix[:, :, 3:-1] + ], 3) # [b, h, m//wm-4, 3*wn, -1] + exp_blocked_value_matrix = tf.concat([ + blocked_value_matrix[:, :, 1:-3], blocked_value_matrix[:, :, 2:-2], + blocked_value_matrix[:, :, 3:-1] + ], 3) # [b, h, m//wm-4, 3*wn, -1] + middle_query_matrix = blocked_query_matrix[:, :, 2:-2] + inner_band_product = tf.einsum( + "BHLQD,BHLKD->BHLQK", middle_query_matrix, exp_blocked_key_matrix + ) # [b, h, m//wm-4, wm, -1] x [b, h, m//wm-4, 3*wn, -1] + # ==> [b, h, m//wm-4, wm, 3*wn] + inner_band_product = tf.multiply(inner_band_product, 1.0 / np.sqrt(d)) + rand_band_product = tf.einsum( + "BHLQD,BHLKD->BHLQK", middle_query_matrix, + gathered_key[:, :, + 1:-1]) # [b, h, m//wm-4, wm, -1] x [b, h, m//wm-4, r*wn, -1] + # ==> [b, h, m//wm-4, wm, r*wn] + rand_band_product = tf.multiply(rand_band_product, 1.0 / np.sqrt(d)) + first_band_product = tf.einsum( + "BHLQD,BHKD->BHLQK", middle_query_matrix, blocked_key_matrix[:, :, 0] + ) # [b, h, m//wm-4, wm, -1] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, wn] + first_band_product = tf.multiply(first_band_product, 1.0 / np.sqrt(d)) + last_band_product = tf.einsum( + "BHLQD,BHKD->BHLQK", middle_query_matrix, blocked_key_matrix[:, :, -1] + ) # [b, h, m//wm-4, wm, -1] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, wn] + last_band_product = tf.multiply(last_band_product, 1.0 / np.sqrt(d)) + inner_band_product += (1.0 - band_mask) * -10000.0 + first_band_product += (1.0 - + tf.expand_dims(to_mask[:, :, :, :wn], 3)) * -10000.0 + last_band_product += (1.0 - + tf.expand_dims(to_mask[:, :, :, -wn:], 3)) * -10000.0 + rand_band_product += (1.0 - rand_mask[:, :, 1:-1]) * -10000.0 + band_product = tf.concat([ + first_band_product, inner_band_product, rand_band_product, + last_band_product + ], -1) # [b, h, m//wm-4, wm, (5+r)*wn] + attn_weights = tf.nn.softmax(band_product) # [b, h, m//wm-4, wm, (5+r)*wn] + context_layer = tf.einsum( + "BHLQK,BHLKD->BHLQD", attn_weights[:, :, :, :, + wn:4 * wn], exp_blocked_value_matrix + ) # [b, h, m//wm-4, wm, 3*wn] x [b, h, m//wm-4, 3*wn, -1] + # ==> [b, h, m//wm-4, wm, -1] + context_layer += tf.einsum( + "BHLQK,BHLKD->BHLQD", attn_weights[:, :, :, :, + 4 * wn:-wn], gathered_value[:, :, 1:-1] + ) # [b, h, m//wm-4, wm, r*wn] x [b, h, m//wm-4, r*wn, -1] + # ==> [b, h, m//wm-4, wm, -1] + context_layer += tf.einsum( + "BHLQK,BHKD->BHLQD", attn_weights[:, :, :, :, :wn], + blocked_value_matrix[:, :, 0] + ) # [b, h, m//wm-4, wm, wn] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, -1] + context_layer += tf.einsum( + "BHLQK,BHKD->BHLQD", attn_weights[:, :, :, :, + -wn:], blocked_value_matrix[:, :, -1] + ) # [b, h, m//wm-4, wm, wn] x [b, h, wn, -1] ==> [b, h, m//wm-4, wm, -1] + + second_last_key_mat = tf.concat([ + blocked_key_matrix[:, :, 0], blocked_key_matrix[:, :, -3], + blocked_key_matrix[:, :, -2], blocked_key_matrix[:, :, -1], + gathered_key[:, :, -1] + ], 2) # [b, h, (4+r)*wn, -1] + second_last_value_mat = tf.concat([ + blocked_value_matrix[:, :, 0], blocked_value_matrix[:, :, -3], + blocked_value_matrix[:, :, -2], blocked_value_matrix[:, :, -1], + gathered_value[:, :, -1] + ], 2) # [b, h, (4+r)*wn, -1] + second_last_product = tf.einsum( + "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, -2], second_last_key_mat + ) # [b, h, wm, -1] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, (4+r)*wn] + second_last_seq_pad = tf.concat([ + to_mask[:, :, :, :wn], to_mask[:, :, :, -3 * wn:], + tf.ones([b, 1, 1, r * wn], dtype=dtype) + ], 3) + second_last_rand_pad = tf.concat( + [tf.ones([b, h, wm, 4 * wn], dtype=dtype), rand_mask[:, :, -1]], 3) + second_last_product = tf.multiply(second_last_product, 1.0 / np.sqrt(d)) + second_last_product += ( + 1.0 - tf.minimum(second_last_seq_pad, second_last_rand_pad)) * -10000.0 + second_last_attn_weights = tf.nn.softmax( + second_last_product) # [b, h, wm, (4+r)*wn] + second_last_context_layer = tf.einsum( + "BHQK,BHKD->BHQD", second_last_attn_weights, second_last_value_mat + ) # [b, h, wm, (4+r)*wn] x [b, h, (4+r)*wn, -1] ==> [b, h, wm, -1] + second_last_context_layer = tf.expand_dims(second_last_context_layer, 2) + + last_product = tf.einsum( + "BHQD,BHKD->BHQK", blocked_query_matrix[:, :, -1], + key_layer) # [b, h, wm, -1] x [b, h, n, -1] ==> [b, h, wm, n] + last_product = tf.multiply(last_product, 1.0 / np.sqrt(d)) + last_product += (1.0 - to_mask) * -10000.0 + last_attn_weights = tf.nn.softmax(last_product) # [b, h, wm, n] + last_context_layer = tf.einsum( + "BHQK,BHKD->BHQD", last_attn_weights, + value_layer) # [b, h, wm, n] x [b, h, n, -1] ==> [b, h, wm, -1] + last_context_layer = tf.expand_dims(last_context_layer, 2) + + context_layer = tf.concat([ + first_context_layer, second_context_layer, context_layer, + second_last_context_layer, last_context_layer + ], 2) + context_layer = tf.reshape(context_layer, (b, h, m, -1)) * from_mask + context_layer = tf.transpose(context_layer, (0, 2, 1, 3)) + return context_layer + + +class BigBirdMasks(tf.keras.layers.Layer): + """Creates bigbird attention masks.""" + + def __init__(self, block_size, **kwargs): + super().__init__(**kwargs) + self._block_size = block_size + + def call(self, inputs): + encoder_shape = tf.shape(inputs) + batch_size, seq_length = encoder_shape[0], encoder_shape[1] + # reshape for blocking + blocked_encoder_mask = tf.reshape( + inputs, (batch_size, seq_length // self._block_size, self._block_size)) + encoder_from_mask = tf.reshape(inputs, (batch_size, 1, seq_length, 1)) + encoder_to_mask = tf.reshape(inputs, (batch_size, 1, 1, seq_length)) + + band_mask = create_band_mask_from_inputs(blocked_encoder_mask, + blocked_encoder_mask) + return [band_mask, encoder_from_mask, encoder_to_mask, blocked_encoder_mask] + + +@tf.keras.utils.register_keras_serializable(package="Text") +class BigBirdAttention(tf.keras.layers.MultiHeadAttention): + """BigBird, a sparse attention mechanism. + + This layer follows the paper "Big Bird: Transformers for Longer Sequences" + (https://arxiv.org/abs/2007.14062). + It reduces this quadratic dependency of attention + computation to linear. + + Arguments are the same as `MultiHeadAttention` layer. + """ + + def __init__(self, + num_rand_blocks=3, + from_block_size=64, + to_block_size=64, + max_rand_mask_length=MAX_SEQ_LEN, + seed=None, + **kwargs): + super().__init__(**kwargs) + self._num_rand_blocks = num_rand_blocks + self._from_block_size = from_block_size + self._to_block_size = to_block_size + self._seed = seed + + # Generates random attention. + np.random.seed(self._seed) + # pylint: disable=g-complex-comprehension + rand_attn = [ + bigbird_block_rand_mask( + max_rand_mask_length, + max_rand_mask_length, + from_block_size, + to_block_size, + num_rand_blocks, + last_idx=1024) for _ in range(self._num_heads) + ] + # pylint: enable=g-complex-comprehension + rand_attn = np.stack(rand_attn, axis=0) + self.rand_attn = tf.constant(rand_attn, dtype=tf.int32) + + def _compute_attention(self, query, key, value, attention_mask=None): + (band_mask, encoder_from_mask, encoder_to_mask, + blocked_encoder_mask) = attention_mask + query_shape = tf.shape(query) + from_seq_length = query_shape[1] + to_seq_length = tf.shape(key)[1] + rand_attn = self.rand_attn[:, :(from_seq_length // self._from_block_size - + 2)] + return bigbird_block_sparse_attention( + query, + key, + value, + band_mask, + encoder_from_mask, + encoder_to_mask, + blocked_encoder_mask, + blocked_encoder_mask, + num_attention_heads=self._num_heads, + num_rand_blocks=self._num_rand_blocks, + size_per_head=self._key_dim, + batch_size=query_shape[0], + from_seq_length=from_seq_length, + to_seq_length=to_seq_length, + from_block_size=self._from_block_size, + to_block_size=self._to_block_size, + rand_attn=rand_attn) + + def call(self, query, value, key=None, attention_mask=None, **kwargs): + if not self._built_from_signature: + self._build_from_signature(query=query, value=value, key=key) + if key is None: + key = value + + # N = `num_attention_heads` + # H = `size_per_head` + # `query` = [B, T, N ,H] + query = self._query_dense(query) + + # `key` = [B, S, N, H] + key = self._key_dense(key) + + # `value` = [B, S, N, H] + value = self._value_dense(value) + + attention_output = self._compute_attention(query, key, value, + attention_mask) + attention_output.set_shape([None, None, self._num_heads, self._value_dim]) + attention_output = self._output_dense(attention_output) + return attention_output + + def get_config(self): + config = { + "num_rand_blocks": self._num_rand_blocks, + "from_block_size": self._from_block_size, + "to_block_size": self._to_block_size, + "seed": self._seed + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/official/nlp/projects/bigbird/attention_test.py b/official/nlp/projects/bigbird/attention_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0696a1880c6ea9c4332940e844a18e98c918651b --- /dev/null +++ b/official/nlp/projects/bigbird/attention_test.py @@ -0,0 +1,67 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.projects.bigbird.attention.""" + +import tensorflow as tf + +from official.nlp.projects.bigbird import attention + + +class BigbirdAttentionTest(tf.test.TestCase): + + def test_attention(self): + num_heads = 12 + key_dim = 64 + seq_length = 1024 + batch_size = 2 + block_size = 64 + mask_layer = attention.BigBirdMasks(block_size=block_size) + encoder_inputs_mask = tf.zeros((batch_size, seq_length), dtype=tf.int32) + masks = mask_layer(tf.cast(encoder_inputs_mask, dtype=tf.float64)) + test_layer = attention.BigBirdAttention( + num_heads=num_heads, + key_dim=key_dim, + from_block_size=block_size, + to_block_size=block_size, + seed=0) + query = tf.random.normal( + shape=(batch_size, seq_length, key_dim)) + value = query + output = test_layer( + query=query, + value=value, + attention_mask=masks) + self.assertEqual(output.shape, [batch_size, seq_length, key_dim]) + + def test_config(self): + num_heads = 12 + key_dim = 64 + block_size = 64 + test_layer = attention.BigBirdAttention( + num_heads=num_heads, + key_dim=key_dim, + from_block_size=block_size, + to_block_size=block_size, + seed=0) + print(test_layer.get_config()) + new_layer = attention.BigBirdAttention.from_config( + test_layer.get_config()) + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(test_layer.get_config(), new_layer.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/projects/bigbird/encoder.py b/official/nlp/projects/bigbird/encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..60973875452b4487d4ab8a602956744de2278d6c --- /dev/null +++ b/official/nlp/projects/bigbird/encoder.py @@ -0,0 +1,232 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Transformer-based text encoder network.""" +# pylint: disable=g-classes-have-attributes + +import tensorflow as tf + +from official.modeling import activations +from official.nlp import keras_nlp +from official.nlp.modeling import layers +from official.nlp.projects.bigbird import attention +from official.nlp.projects.bigbird import recompute_grad +from official.nlp.projects.bigbird import recomputing_dropout + + +class RecomputeTransformerLayer(layers.TransformerScaffold): + """Transformer layer that recomputes the forward pass during backpropagation.""" + + def call(self, inputs, training=None): + emb, mask = inputs + def f(*args): + # recompute_grad can only handle tensor inputs. so we enumerate the + # nested input [emb, mask] as follows: + # args[0]: emb + # args[1]: mask[0] = band_mask + # args[2]: mask[1] = encoder_from_mask + # args[3]: mask[2] = encoder_to_mask + # args[4]: mask[3] = blocked_encoder_mask + x = super(RecomputeTransformerLayer, + self).call([args[0], [args[1], args[2], args[3], args[4]]], + training=training) + return x + + f = recompute_grad.recompute_grad(f) + + return f(emb, *mask) + + +@tf.keras.utils.register_keras_serializable(package='Text') +class BigBirdEncoder(tf.keras.Model): + """Transformer-based encoder network with BigBird attentions. + + *Note* that the network is constructed by + [Keras Functional API](https://keras.io/guides/functional_api/). + + Args: + vocab_size: The size of the token vocabulary. + hidden_size: The size of the transformer hidden layers. + num_layers: The number of transformer layers. + num_attention_heads: The number of attention heads for each transformer. The + hidden size must be divisible by the number of attention heads. + max_position_embeddings: The maximum length of position embeddings that this + encoder can consume. If None, max_position_embeddings uses the value from + sequence length. This determines the variable shape for positional + embeddings. + type_vocab_size: The number of types that the 'type_ids' input can take. + intermediate_size: The intermediate size for the transformer layers. + activation: The activation to use for the transformer layers. + dropout_rate: The dropout rate to use for the transformer layers. + attention_dropout_rate: The dropout rate to use for the attention layers + within the transformer layers. + initializer: The initialzer to use for all weights in this encoder. + embedding_width: The width of the word embeddings. If the embedding width is + not equal to hidden size, embedding parameters will be factorized into two + matrices in the shape of ['vocab_size', 'embedding_width'] and + ['embedding_width', 'hidden_size'] ('embedding_width' is usually much + smaller than 'hidden_size'). + use_gradient_checkpointing: Use gradient checkpointing to trade-off compute + for memory. + """ + + def __init__(self, + vocab_size, + hidden_size=768, + num_layers=12, + num_attention_heads=12, + max_position_embeddings=attention.MAX_SEQ_LEN, + type_vocab_size=16, + intermediate_size=3072, + block_size=64, + num_rand_blocks=3, + activation=activations.gelu, + dropout_rate=0.1, + attention_dropout_rate=0.1, + initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + embedding_width=None, + use_gradient_checkpointing=False, + **kwargs): + activation = tf.keras.activations.get(activation) + initializer = tf.keras.initializers.get(initializer) + + if use_gradient_checkpointing: + tf.keras.layers.Dropout = recomputing_dropout.RecomputingDropout + layer_cls = RecomputeTransformerLayer + else: + layer_cls = layers.TransformerScaffold + + self._self_setattr_tracking = False + self._config_dict = { + 'vocab_size': vocab_size, + 'hidden_size': hidden_size, + 'num_layers': num_layers, + 'num_attention_heads': num_attention_heads, + 'max_position_embeddings': max_position_embeddings, + 'type_vocab_size': type_vocab_size, + 'intermediate_size': intermediate_size, + 'block_size': block_size, + 'num_rand_blocks': num_rand_blocks, + 'activation': tf.keras.activations.serialize(activation), + 'dropout_rate': dropout_rate, + 'attention_dropout_rate': attention_dropout_rate, + 'initializer': tf.keras.initializers.serialize(initializer), + 'embedding_width': embedding_width, + } + + word_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_word_ids') + mask = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_mask') + type_ids = tf.keras.layers.Input( + shape=(None,), dtype=tf.int32, name='input_type_ids') + + if embedding_width is None: + embedding_width = hidden_size + self._embedding_layer = keras_nlp.layers.OnDeviceEmbedding( + vocab_size=vocab_size, + embedding_width=embedding_width, + initializer=initializer, + name='word_embeddings') + word_embeddings = self._embedding_layer(word_ids) + + # Always uses dynamic slicing for simplicity. + self._position_embedding_layer = keras_nlp.layers.PositionEmbedding( + initializer=initializer, + max_length=max_position_embeddings, + name='position_embedding') + position_embeddings = self._position_embedding_layer(word_embeddings) + self._type_embedding_layer = keras_nlp.layers.OnDeviceEmbedding( + vocab_size=type_vocab_size, + embedding_width=embedding_width, + initializer=initializer, + use_one_hot=True, + name='type_embeddings') + type_embeddings = self._type_embedding_layer(type_ids) + + embeddings = tf.keras.layers.Add()( + [word_embeddings, position_embeddings, type_embeddings]) + + self._embedding_norm_layer = tf.keras.layers.LayerNormalization( + name='embeddings/layer_norm', axis=-1, epsilon=1e-12, dtype=tf.float32) + + embeddings = self._embedding_norm_layer(embeddings) + embeddings = tf.keras.layers.Dropout(rate=dropout_rate)(embeddings) + + # We project the 'embedding' output to 'hidden_size' if it is not already + # 'hidden_size'. + if embedding_width != hidden_size: + self._embedding_projection = tf.keras.layers.experimental.EinsumDense( + '...x,xy->...y', + output_shape=hidden_size, + bias_axes='y', + kernel_initializer=initializer, + name='embedding_projection') + embeddings = self._embedding_projection(embeddings) + + self._transformer_layers = [] + data = embeddings + masks = attention.BigBirdMasks(block_size=block_size)( + tf.cast(mask, embeddings.dtype)) + encoder_outputs = [] + attn_head_dim = hidden_size // num_attention_heads + for i in range(num_layers): + layer = layer_cls( + num_attention_heads, + intermediate_size, + activation, + attention_cls=attention.BigBirdAttention, + attention_cfg=dict( + num_heads=num_attention_heads, + key_dim=attn_head_dim, + kernel_initializer=initializer, + from_block_size=block_size, + to_block_size=block_size, + num_rand_blocks=num_rand_blocks, + max_rand_mask_length=max_position_embeddings, + seed=i), + dropout_rate=dropout_rate, + attention_dropout_rate=dropout_rate, + kernel_initializer=initializer) + self._transformer_layers.append(layer) + data = layer([data, masks]) + encoder_outputs.append(data) + + outputs = dict( + sequence_output=encoder_outputs[-1], encoder_outputs=encoder_outputs) + super().__init__( + inputs=[word_ids, mask, type_ids], outputs=outputs, **kwargs) + + def get_embedding_table(self): + return self._embedding_layer.embeddings + + def get_embedding_layer(self): + return self._embedding_layer + + def get_config(self): + return self._config_dict + + @property + def transformer_layers(self): + """List of Transformer layers in the encoder.""" + return self._transformer_layers + + @property + def pooler_layer(self): + """The pooler dense layer after the transformer layers.""" + return self._pooler_layer + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/nlp/projects/bigbird/encoder_test.py b/official/nlp/projects/bigbird/encoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5ebab7776b56b1af40539e164fa41c7f016f32e0 --- /dev/null +++ b/official/nlp/projects/bigbird/encoder_test.py @@ -0,0 +1,63 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.projects.bigbird.encoder.""" + +import numpy as np +import tensorflow as tf + +from official.nlp.projects.bigbird import encoder + + +class BigBirdEncoderTest(tf.test.TestCase): + + def test_encoder(self): + sequence_length = 1024 + batch_size = 2 + vocab_size = 1024 + network = encoder.BigBirdEncoder( + num_layers=1, vocab_size=1024, max_position_embeddings=4096) + word_id_data = np.random.randint( + vocab_size, size=(batch_size, sequence_length)) + mask_data = np.random.randint(2, size=(batch_size, sequence_length)) + type_id_data = np.random.randint(2, size=(batch_size, sequence_length)) + outputs = network([word_id_data, mask_data, type_id_data]) + self.assertEqual(outputs["sequence_output"].shape, + (batch_size, sequence_length, 768)) + + def test_save_restore(self): + sequence_length = 1024 + batch_size = 2 + vocab_size = 1024 + network = encoder.BigBirdEncoder( + num_layers=1, vocab_size=1024, max_position_embeddings=4096) + word_id_data = np.random.randint( + vocab_size, size=(batch_size, sequence_length)) + mask_data = np.random.randint(2, size=(batch_size, sequence_length)) + type_id_data = np.random.randint(2, size=(batch_size, sequence_length)) + inputs = dict( + input_word_ids=word_id_data, + input_mask=mask_data, + input_type_ids=type_id_data) + ref_outputs = network(inputs) + model_path = self.get_temp_dir() + "/model" + network.save(model_path) + loaded = tf.keras.models.load_model(model_path) + outputs = loaded(inputs) + self.assertAllClose(outputs["sequence_output"], + ref_outputs["sequence_output"]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/projects/bigbird/recompute_grad.py b/official/nlp/projects/bigbird/recompute_grad.py new file mode 100644 index 0000000000000000000000000000000000000000..d570ba848be467425f6cb3177fb1b8587a25632d --- /dev/null +++ b/official/nlp/projects/bigbird/recompute_grad.py @@ -0,0 +1,240 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library for rematerialization. + +Incubates a version of tf.recompute_grad that is XLA compatible. +""" +import collections +import os +import threading +from typing import Deque, List, NamedTuple, Optional, Sequence + +from absl import logging +import numpy as np +import tensorflow as tf + + +class RecomputeContext( + NamedTuple('RecomputeContext', [ + ('is_recomputing', bool), + ('seed', tf.Tensor), + ('children', Deque['RecomputeContext']), + ])): + """Context for recomputation. + + Attributes: + is_recomputing: Whether we are in a recomputation phase. + seed: Scalar integer tensor that should be used with stateless random ops + for deterministic behavior and correct computation of the gradient. + children: Nested `RecomputeContext` instances. Used internally by + `recompute_grad` to track nested instances of `RecomputeContext`. + """ + + def __enter__(self): + return _context_stack.push(self) + + def __exit__(self, exc_type, exc_value, traceback): + _context_stack.pop(self) + + +# Simplified version of `_DefaultStack` in +# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/ops.py. +class _ContextStack(threading.local): + """A thread-local stack for providing implicit recompute contexts.""" + + def __init__(self): + super(_ContextStack, self).__init__() + self._stack = [] + + def top(self) -> Optional[RecomputeContext]: + return self._stack[-1] if self._stack else None + + def push(self, context: RecomputeContext): + self._stack.append(context) + return context + + def pop(self, context: RecomputeContext): + if self._stack[-1] is not context: + raise AssertionError('Nesting violated for RecomputeContext.') + self._stack.pop() + + +_context_stack = _ContextStack() + + +def get_recompute_context() -> Optional[RecomputeContext]: + """Returns the current recomputing context if it exists.""" + return _context_stack.top() + + +# Adapted from +# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/control_flow_util.py. +def _get_containing_xla_context(graph: tf.Graph) -> Optional[object]: + """Returns the first ancestor `XLAControlFlowContext` in the `graph`.""" + ctxt = graph._get_control_flow_context() # pylint: disable=protected-access + while ctxt: + if ctxt.IsXLAContext(): + return ctxt + ctxt = ctxt.outer_context + return None + + +def _in_xla_context(graph: Optional[tf.Graph] = None) -> bool: + """Detects whether we are in an XLA context.""" + if '--tf_xla_auto_jit=2' in os.environ.get('TF_XLA_FLAGS', ''): + return True + graph = tf.compat.v1.get_default_graph() if graph is None else graph + while True: + if _get_containing_xla_context(graph) is not None: + return True + try: + graph = graph.outer_graph + except AttributeError: + return False + + +def _force_data_dependency( + first_compute: Sequence[tf.Tensor], + then_compute: Sequence[tf.Tensor]) -> List[tf.Tensor]: + """Force all of `then_compute` to depend on all of `first_compute`. + + Uses a dummy data dependency, which is useful when running on TPUs because + XLA ignores control dependencies. Only supports float arguments. + + Args: + first_compute: Sequence of `Tensor`s to be executed before `then_compute`. + then_compute: Sequence of `Tensor`s to executed after `first_compute`. + + Returns: + Sequence of `Tensor`s with same length of `then_compute`. + + Raises: + ValueError: if ranks are unknown or types are not floating. + """ + + def _first_element(x): + if x.shape.ndims is None: + raise ValueError('Rank of Tensor %s must be known' % x) + ndims = x.shape.ndims + begin = tf.zeros(ndims, dtype=tf.int32) + size = tf.ones(ndims, dtype=tf.int32) + return tf.reshape(tf.slice(x, begin, size), []) + + first_compute_sum = tf.add_n( + [_first_element(x) for x in first_compute if x is not None]) + dtype = first_compute_sum.dtype + if not dtype.is_floating: + raise ValueError('_force_data_dependency only supports floating dtypes.') + zero = np.finfo(dtype.as_numpy_dtype).tiny * first_compute_sum + return [ + x + tf.cast(zero, x.dtype) if x is not None else None + for x in then_compute + ] + + +def _make_seed_if_none(seed: Optional[tf.Tensor]) -> tf.Tensor: + """Uses the global generator to make a seed if necessary.""" + if seed is not None: + return seed + generator = tf.random.experimental.get_global_generator() + # The two seeds for stateless random ops don't have individual semantics and + # are scrambled together, so providing one seed is fine. This makes it easier + # for users to provide a local seed without worrying about integer overflow. + # See `make_seeds` in + # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/stateful_random_ops.py. + try: + return generator.uniform_full_int([], tf.int32, name='recompute_grad_seed') + except (RuntimeError, TypeError, ValueError, tf.errors.NotFoundError) as e: + # For a number of reasons, the above operation can fail like using multiple + # graphs or toggling between eager and graph modes. Reset the generator. + logging.warn('Resetting the generator. %s: %s', type(e), e) + tf.random.experimental.set_global_generator(None) + generator = tf.random.experimental.get_global_generator() + return generator.uniform_full_int([], tf.int32, name='recompute_grad_seed') + + +def recompute_grad(f, seed=None): + """An eager-compatible version of recompute_grad. + + For f(*args, **kwargs), this supports gradients with respect to args, or to + gradients with respect to any variables residing in the kwarg 'variables'. + Note that for keras layer and model objects, this is handled automatically. + + Warning: If `f` was originally a tf.keras Model or Layer object, `g` will not + be able to access the member variables of that object, because `g` returns + through the wrapper function `inner`. When recomputing gradients through + objects that inherit from keras, we suggest keeping a reference to the + underlying object around for the purpose of accessing these variables. + + Args: + f: function `f(*x)` that returns a `Tensor` or sequence of `Tensor` outputs. + seed: Optional seed for random ops. `seed` should an integer scalar + `Tensor`. When compiling to XLA, `seed` must have dtype `tf.int32`. If + `seed` is not provided one will be generated. + + Returns: + A function `g` that wraps `f`, but which recomputes `f` on the backwards + pass of a gradient call. + """ + + @tf.custom_gradient + def inner(*args, **kwargs): + """Inner function closure for calculating gradients.""" + # Detect when we're nested and in the backwards pass, so we don't generate + # an additional seed. + parent_context = get_recompute_context() + if parent_context is not None and parent_context.is_recomputing: + # Use the cached context in the recomputation phase. + with parent_context.children.popleft()._replace( + is_recomputing=True) as context: + result = f(*args, **kwargs) + else: + with RecomputeContext( + is_recomputing=False, + seed=_make_seed_if_none(seed), + children=collections.deque()) as context: + result = f(*args, **kwargs) + # In the forward pass, build up a tree of recomputation contexts. + if parent_context is not None and not parent_context.is_recomputing: + parent_context.children.append(context) + + def grad(*dresult, **grad_kwargs): + """Gradient function calculation for inner function.""" + variables = grad_kwargs.pop('variables', None) + if grad_kwargs: + raise ValueError('Found unexpected kwargs for `grad`: ', + list(grad_kwargs.keys())) + inputs, seed = list(args), context.seed + if _in_xla_context(): + inputs = _force_data_dependency( + tf.nest.flatten(dresult), inputs + [seed]) + seed = inputs.pop() + with tf.GradientTape() as tape: + tape.watch(inputs) + if variables is not None: + tape.watch(variables) + with tf.control_dependencies(dresult): + with context._replace(is_recomputing=True, seed=seed): + result = f(*inputs, **kwargs) + kw_vars = [] + if variables is not None: + kw_vars = list(variables) + grads = tape.gradient( + result, list(inputs) + kw_vars, output_gradients=dresult) + return grads[:len(inputs)], grads[len(inputs):] + + return result, grad + + return inner diff --git a/official/nlp/projects/bigbird/recomputing_dropout.py b/official/nlp/projects/bigbird/recomputing_dropout.py new file mode 100644 index 0000000000000000000000000000000000000000..3a0cfa31c2143d2dd06505badf7f66a5af658d7a --- /dev/null +++ b/official/nlp/projects/bigbird/recomputing_dropout.py @@ -0,0 +1,159 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras dropout layer that is aware of `RecomputeContext`.""" + +import numpy as np +import tensorflow as tf + +from official.nlp.projects.bigbird import recompute_grad as recompute_grad_lib +from official.nlp.projects.bigbird import stateless_dropout as stateless_dropout_lib + + +# Reimplements internal function +# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/smart_cond.py. +def smart_cond(pred, true_fn=None, false_fn=None, name=None): + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Arguments: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if not callable(true_fn): + raise TypeError('`true_fn` must be callable.') + if not callable(false_fn): + raise TypeError('`false_fn` must be callable.') + pred_value = tf.get_static_value(pred) + if isinstance(pred, tf.Variable) or pred_value is None: + return tf.cond( + pred, true_fn=true_fn, false_fn=false_fn, name=name) + if pred_value: + return true_fn() + else: + return false_fn() + + +# See https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout. +class RecomputingDropout(tf.keras.layers.Layer): + """`tf.keras.layers.Dropout` that supports `recompute_grad`.""" + + def __init__(self, + rate, + noise_shape=None, + seed=None, + force_recomputation=False, + **kwargs): + """Initializes `RecomputingDropout`. + + Args: + rate: Float between 0 and 1. Fraction of the input units to drop. + noise_shape: 1D integer tensor representing the shape of the binary + dropout mask that will be multiplied with the input. For instance, if + inputs have shape `(batch_size, timesteps, features)` and you want the + dropout mask to be the same for all timesteps, you can use + `noise_shape=(batch_size, 1, features)`. + seed: A Python integer to use as random seed. + force_recomputation: If `True`, then raises an error if called outside a + recompute context. + **kwargs: Keyword arguments for `tf.keras.layers.Layer`. + """ + + super(RecomputingDropout, self).__init__(**kwargs) + self.rate = rate + self.noise_shape = noise_shape + self.seed = seed + self.force_recomputation = force_recomputation + self.supports_masking = True + # Create a layer-specific seed to combine with the global recompute seed. + self._recompute_seed = ( + np.random.randint(-2**31, 2**31, dtype=np.int32) + if seed is None else seed) + + def _get_noise_shape(self, inputs): + # Subclasses of `Dropout` may implement `_get_noise_shape(self, inputs)`, + # which will override `self.noise_shape`, and allows for custom noise + # shapes with dynamically sized inputs. + if self.noise_shape is None: + return None + + concrete_inputs_shape = tf.shape(inputs) + noise_shape = [] + for i, value in enumerate(self.noise_shape): + noise_shape.append(concrete_inputs_shape[i] if value is None else value) + return tf.convert_to_tensor(noise_shape) + + def call(self, inputs, training=None): + """Builds computation graph. + + Args: + inputs: Input tensor (of any rank). + training: Python boolean indicating whether the layer should behave in + training mode (adding dropout) or in inference mode (doing nothing). + + Returns: + `inputs` masked according to layer configuration. + + Raises: + ValueError: If `force_recomputation` is `True` and called outside a + a recompute context. + """ + if training is None: + training = tf.keras.backend.learning_phase() + + def dropped_inputs(): + """Randomly drops elements of `inputs` when `training=True`.""" + recompute_context = recompute_grad_lib.get_recompute_context() + if recompute_context is None: + if self.force_recomputation: + raise ValueError( + 'RecomputeContext is required when force_recomputation=True.') + return tf.nn.dropout( + inputs, + noise_shape=self._get_noise_shape(inputs), + seed=self.seed, + rate=self.rate) + seed = tf.stack([recompute_context.seed, self._recompute_seed]) + return stateless_dropout_lib.stateless_dropout( + inputs, + rate=self.rate, + seed=seed, + noise_shape=self._get_noise_shape(inputs)) + + output = smart_cond(training, dropped_inputs, lambda: tf.identity(inputs)) + return output + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = { + 'rate': self.rate, + 'noise_shape': self.noise_shape, + 'seed': self.seed, + 'force_recomputation': self.force_recomputation, + } + base_config = super(RecomputingDropout, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/official/nlp/projects/bigbird/stateless_dropout.py b/official/nlp/projects/bigbird/stateless_dropout.py new file mode 100644 index 0000000000000000000000000000000000000000..d61b313b5465d7eb2ada787c70ad97035fd098d4 --- /dev/null +++ b/official/nlp/projects/bigbird/stateless_dropout.py @@ -0,0 +1,124 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A replacement for tf.nn.dropout that uses stateless random ops.""" + +import numbers +from typing import Optional, Sequence, Text, Union + +from absl import logging +import tensorflow as tf + + +def _as_shape(shape: Union[Sequence[int], tf.TensorShape]) -> tf.TensorShape: + """Converts the given object to a TensorShape.""" + return shape if isinstance(shape, tf.TensorShape) else tf.TensorShape(shape) + + +def _get_noise_shape( + x: tf.Tensor, noise_shape: Union[Sequence[int], tf.TensorShape] +) -> Union[tf.Tensor, tf.TensorShape, Sequence[int]]: + """Computes the shape of the binary mask for dropout.""" + # If noise_shape is none return immediately. + if noise_shape is None: + return tf.shape(x) + + try: + # Best effort to figure out the intended shape. + # If not possible, let the op to handle it. + # In eager mode exception will show up. + noise_shape_ = _as_shape(noise_shape) + except (TypeError, ValueError): + return noise_shape + + if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims): + new_dims = [] + for i, dim in enumerate(x.shape.dims): + if noise_shape_.dims[i].value is None and dim.value is not None: + new_dims.append(dim.value) + else: + new_dims.append(noise_shape_.dims[i].value) + return tf.TensorShape(new_dims) + + return noise_shape + + +def stateless_dropout(x: tf.Tensor, + rate: float, + seed: tf.Tensor, + noise_shape: Optional[Union[Sequence[int], + tf.TensorShape]] = None, + name: Optional[Text] = None) -> tf.Tensor: + """Computes dropout: randomly sets elements to zero to prevent overfitting. + + See https://www.tensorflow.org/api_docs/python/tf/nn/dropout. + This version differs in that the seed is required if the rate is nonzero. + + Args: + x: A floating point tensor. + rate: A scalar `Tensor` with the same type as x. The probability that each + element is dropped. For example, setting rate=0.1 would drop 10% of input + elements. + seed: A shape [2] integer Tensor of seeds to the random number generator. + Must have dtype `tf.int32` when compiling to XLA. + noise_shape: A 1-D `Tensor` of type `int32`, representing the shape for + randomly generated keep/drop flags. + name: A name for this operation (optional). + + Returns: + A `Tensor` of the same shape of `x`. + + Raises: + ValueError: If `rate` is not in `[0, 1)` or if `x` is not a floating point + tensor. `rate=1` is disallowed, because the output would be all zeros, + which is likely not what was intended. + """ + with tf.name_scope(name or 'stateless_dropout') as name: + x = tf.convert_to_tensor(x, name='x') + if not x.dtype.is_floating: + raise ValueError('x has to be a floating point tensor since it\'s going ' + ' to be scaled. Got a %s tensor instead.' % x.dtype) + if isinstance(rate, numbers.Real): + if not (rate >= 0 and rate < 1): + raise ValueError('rate must be a scalar tensor or a float in the ' + 'range [0, 1), got %g' % rate) + if rate > 0.5: + logging.log_first_n( + logging.WARN, 'Large dropout rate: %g (>0.5). In TensorFlow ' + '.x, dropout() uses dropout rate instead of keep_prob. ' + 'Please ensure that this is intended.', 5, rate) + + # Early return if nothing needs to be dropped. + if tf.get_static_value(rate) == 0: + return x + + rate = tf.convert_to_tensor(rate, dtype=x.dtype, name='rate') + rate.shape.assert_has_rank(0) + noise_shape = _get_noise_shape(x, noise_shape) + # Sample a uniform distribution on [0.0, 1.0) and select values larger than + # rate. + # + # NOTE: Random uniform actually can only generate 2^23 floats on [1.0, 2.0) + # and subtract 1.0. + random_tensor = tf.random.stateless_uniform( + noise_shape, seed=seed, dtype=x.dtype) + keep_prob = 1 - rate + scale = 1 / keep_prob + # NOTE: if (1.0 + rate) - 1 is equal to rate, then we want to consider that + # float to be selected, hence we use a >= comparison. + keep_mask = random_tensor >= rate + ret = x * scale * tf.cast(keep_mask, x.dtype) + if not tf.executing_eagerly(): + ret.set_shape(x.get_shape()) + return ret diff --git a/official/nlp/projects/mobilebert/README.md b/official/nlp/projects/mobilebert/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4d47c878a44aff070a505a32e71b444fd275c59 --- /dev/null +++ b/official/nlp/projects/mobilebert/README.md @@ -0,0 +1,70 @@ +# MobileBERT (MobileBERT: A Compact Task-Agnostic BERT for Resource-Limited Devices) + +[MobileBERT](https://arxiv.org/abs/2004.02984) +is a thin version of BERT_LARGE, while equipped with bottleneck +structures and a carefully designed balance between self-attentions and +feed-forward networks. + +To train MobileBERT, we first train a specially designed teacher model, an +inverted-bottleneck incorporated BERT_LARGE model. Then, we conduct knowledge +transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT +is 4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive +results on well-known benchmarks. This repository contains TensorFlow 2.x +implementation for MobileBERT. + +## Network Implementations + +Following +[MobileBERT TF1 implementation](https://github.com/google-research/google-research/tree/master/mobilebert), +we re-implemented MobileBERT encoder and layers using `tf.keras` APIs in NLP +modeling library: + + * [mobile_bert_encoder.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/networks/mobile_bert_encoder.py) + contains `MobileBERTEncoder` implementation. + * [mobile_bert_layers.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/mobile_bert_layers.py) + contains `MobileBertEmbedding`, `MobileBertMaskedLM` and `MobileBertMaskedLM` + implementation. + +## Pre-trained Models + +We converted the originial TF 1.x pretrained English MobileBERT checkpoint to +TF 2.x checkpoint, which is compatible with the above implementations. +In addition, we also provide new multiple-lingual MobileBERT checkpoint +trained using multi-lingual Wiki data. Furthermore, we export the checkpoints to +TF-HUB SavedModel. Please find the details in the following table: + +Model | Configuration | Number of Parameters | Training Data | Checkpoint & Vocabulary | TF-Hub SavedModel | Metrics +------------------------------ | :--------------------------------------: | :------------------- | :-----------: | :-----------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------: | :-----: +MobileBERT uncased English | uncased_L-24_H-128_B-512_A-4_F-4_OPT | 25.3 Million | Wiki + Books | [Download](https://storage.cloud.google.com/tf_model_garden/official/mobilebert/uncased_L-24_H-128_B-512_A-4_F-4_OPT.tar.gz) | [TF-Hub](https://tfhub.dev/tensorflow/mobilebert_en_uncased_L-24_H-128_B-512_A-4_F-4_OPT/1) | Squad v1.1 F1 90.0, GLUE 77.7 +MobileBERT cased Multi-lingual | multi_cased_L-24_H-128_B-512_A-4_F-4_OPT | 36 Million | Wiki | [Download](https://storage.cloud.google.com/tf_model_garden/official/mobilebert/multi_cased_L-24_H-128_B-512_A-4_F-4_OPT.tar.gz) | [TF-Hub](https://tfhub.dev/tensorflow/mobilebert_multi_cased_L-24_H-128_B-512_A-4_F-4_OPT/1) | XNLI (zero-short):64.7 + +### Restoring from Checkpoints + +To load the pre-trained MobileBERT checkpoint in your code, please follow the +example below: + +```python +import tensorflow as tf +from official.nlp.projects.mobilebert import model_utils + +bert_config_file = ... +model_checkpoint_path = ... + +bert_config = model_utils.BertConfig.from_json_file(bert_config_file) + +# `pretrainer` is an instance of `nlp.modeling.models.BertPretrainerV2`. +pretrainer = model_utils.create_mobilebert_pretrainer(bert_config) +checkpoint = tf.train.Checkpoint(**pretrainer.checkpoint_items) +checkpoint.restore(model_checkpoint_path).assert_existing_objects_matched() + +# `mobilebert_encoder` is an instance of +# `nlp.modeling.networks.MobileBERTEncoder`. +mobilebert_encoder = pretrainer.encoder_network +``` + +### Use TF-Hub models + +For the usage of MobileBert TF-Hub model, please see the TF-Hub site +([English model](https://tfhub.dev/tensorflow/mobilebert_en_uncased_L-24_H-128_B-512_A-4_F-4_OPT/1) +or +[Multilingual model](https://tfhub.dev/tensorflow/mobilebert_multi_cased_L-24_H-128_B-512_A-4_F-4_OPT/1)). diff --git a/official/nlp/projects/mobilebert/__init__.py b/official/nlp/projects/mobilebert/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/nlp/projects/mobilebert/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/projects/mobilebert/distillation.py b/official/nlp/projects/mobilebert/distillation.py new file mode 100644 index 0000000000000000000000000000000000000000..e14a48cfa54a34d478db0c96c35be32ed13e8ee4 --- /dev/null +++ b/official/nlp/projects/mobilebert/distillation.py @@ -0,0 +1,590 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Progressive distillation for MobileBERT student model.""" +from typing import List, Optional + +from absl import logging +import dataclasses +import orbit +import tensorflow as tf +from official.core import base_task +from official.core import config_definitions as cfg +from official.modeling import optimization +from official.modeling import tf_utils +from official.modeling.hyperparams import base_config +from official.modeling.progressive import policies +from official.nlp import keras_nlp +from official.nlp.configs import bert +from official.nlp.configs import encoders +from official.nlp.data import data_loader_factory +from official.nlp.modeling import layers +from official.nlp.modeling import models + + +@dataclasses.dataclass +class LayerWiseDistillConfig(base_config.Config): + """Defines the behavior of layerwise distillation.""" + num_steps: int = 10000 + warmup_steps: int = 0 + initial_learning_rate: float = 1.5e-3 + end_learning_rate: float = 1.5e-3 + decay_steps: int = 10000 + hidden_distill_factor: float = 100.0 + beta_distill_factor: float = 5000.0 + gamma_distill_factor: float = 5.0 + if_transfer_attention: bool = True + attention_distill_factor: float = 1.0 + if_freeze_previous_layers: bool = False + + # The ids of teacher layers that will be mapped to the student model. + # For example, if you want to compress a 24 layer teacher to a 6 layer + # student, you can set it to [3, 7, 11, 15, 19, 23] (the index starts from 0). + # If `None`, we assume teacher and student have the same number of layers, + # and each layer of teacher model will be mapped to student's corresponding + # layer. + transfer_teacher_layers: Optional[List[int]] = None + + +@dataclasses.dataclass +class PretrainDistillConfig(base_config.Config): + """Defines the behavior of pretrain distillation.""" + num_steps: int = 500000 + warmup_steps: int = 10000 + initial_learning_rate: float = 1.5e-3 + end_learning_rate: float = 1.5e-7 + decay_steps: int = 500000 + if_use_nsp_loss: bool = True + distill_ground_truth_ratio: float = 0.5 + + +@dataclasses.dataclass +class BertDistillationProgressiveConfig(policies.ProgressiveConfig): + """Defines the specific distillation behavior.""" + if_copy_embeddings: bool = True + layer_wise_distill_config: LayerWiseDistillConfig = LayerWiseDistillConfig() + pretrain_distill_config: PretrainDistillConfig = PretrainDistillConfig() + + +@dataclasses.dataclass +class BertDistillationTaskConfig(cfg.TaskConfig): + """Defines the teacher/student model architecture and training data.""" + teacher_model: bert.PretrainerConfig = bert.PretrainerConfig( + encoder=encoders.EncoderConfig(type='mobilebert')) + + student_model: bert.PretrainerConfig = bert.PretrainerConfig( + encoder=encoders.EncoderConfig(type='mobilebert')) + # The path to the teacher model checkpoint or its directory. + teacher_model_init_checkpoint: str = '' + train_data: cfg.DataConfig = cfg.DataConfig() + validation_data: cfg.DataConfig = cfg.DataConfig() + + +def build_sub_encoder(encoder, target_layer_id): + """Builds an encoder that only computes first few transformer layers.""" + input_ids = encoder.inputs[0] + input_mask = encoder.inputs[1] + type_ids = encoder.inputs[2] + attention_mask = keras_nlp.layers.SelfAttentionMask()( + inputs=input_ids, to_mask=input_mask) + embedding_output = encoder.embedding_layer(input_ids, type_ids) + + layer_output = embedding_output + attention_score = None + for layer_idx in range(target_layer_id + 1): + layer_output, attention_score = encoder.transformer_layers[layer_idx]( + layer_output, attention_mask, return_attention_scores=True) + + return tf.keras.Model( + inputs=[input_ids, input_mask, type_ids], + outputs=[layer_output, attention_score]) + + +class BertDistillationTask(policies.ProgressivePolicy, base_task.Task): + """Distillation language modeling task progressively.""" + + def __init__(self, + strategy, + progressive: BertDistillationProgressiveConfig, + optimizer_config: optimization.OptimizationConfig, + task_config: BertDistillationTaskConfig, + logging_dir=None): + + self._strategy = strategy + self._task_config = task_config + self._progressive_config = progressive + self._optimizer_config = optimizer_config + self._train_data_config = task_config.train_data + self._eval_data_config = task_config.validation_data + self._the_only_train_dataset = None + self._the_only_eval_dataset = None + + layer_wise_config = self._progressive_config.layer_wise_distill_config + transfer_teacher_layers = layer_wise_config.transfer_teacher_layers + num_teacher_layers = ( + self._task_config.teacher_model.encoder.mobilebert.num_blocks) + num_student_layers = ( + self._task_config.student_model.encoder.mobilebert.num_blocks) + if transfer_teacher_layers and len( + transfer_teacher_layers) != num_student_layers: + raise ValueError('The number of `transfer_teacher_layers` %s does not ' + 'match the number of student layers. %d' % + (transfer_teacher_layers, num_student_layers)) + if not transfer_teacher_layers and (num_teacher_layers != + num_student_layers): + raise ValueError('`transfer_teacher_layers` is not specified, and the ' + 'number of teacher layers does not match ' + 'the number of student layers.') + + ratio = progressive.pretrain_distill_config.distill_ground_truth_ratio + if ratio < 0 or ratio > 1: + raise ValueError('distill_ground_truth_ratio has to be within [0, 1].') + + # A non-trainable layer for feature normalization for transfer loss + self._layer_norm = tf.keras.layers.LayerNormalization( + axis=-1, + beta_initializer='zeros', + gamma_initializer='ones', + trainable=False) + + # Build the teacher and student pretrainer model. + self._teacher_pretrainer = self._build_pretrainer( + self._task_config.teacher_model, name='teacher') + self._student_pretrainer = self._build_pretrainer( + self._task_config.student_model, name='student') + + base_task.Task.__init__( + self, params=task_config, logging_dir=logging_dir) + policies.ProgressivePolicy.__init__(self) + + def _build_pretrainer(self, pretrainer_cfg: bert.PretrainerConfig, name: str): + """Builds pretrainer from config and encoder.""" + encoder = encoders.build_encoder(pretrainer_cfg.encoder) + if pretrainer_cfg.cls_heads: + cls_heads = [ + layers.ClassificationHead(**cfg.as_dict()) + for cfg in pretrainer_cfg.cls_heads + ] + else: + cls_heads = [] + + masked_lm = layers.MobileBertMaskedLM( + embedding_table=encoder.get_embedding_table(), + activation=tf_utils.get_activation(pretrainer_cfg.mlm_activation), + initializer=tf.keras.initializers.TruncatedNormal( + stddev=pretrainer_cfg.mlm_initializer_range), + name='cls/predictions') + + pretrainer = models.BertPretrainerV2( + encoder_network=encoder, + classification_heads=cls_heads, + customized_masked_lm=masked_lm, + name=name) + return pretrainer + + # override policies.ProgressivePolicy + def num_stages(self): + # One stage for each layer, plus additional stage for pre-training + return self._task_config.student_model.encoder.mobilebert.num_blocks + 1 + + # override policies.ProgressivePolicy + def num_steps(self, stage_id) -> int: + """Return the total number of steps in this stage.""" + if stage_id + 1 < self.num_stages(): + return self._progressive_config.layer_wise_distill_config.num_steps + else: + return self._progressive_config.pretrain_distill_config.num_steps + + # override policies.ProgressivePolicy + def get_model(self, stage_id, old_model=None) -> tf.keras.Model: + del old_model + return self.build_model(stage_id) + + # override policies.ProgressivePolicy + def get_optimizer(self, stage_id): + """Build optimizer for each stage.""" + if stage_id + 1 < self.num_stages(): + distill_config = self._progressive_config.layer_wise_distill_config + else: + distill_config = self._progressive_config.pretrain_distill_config + + params = self._optimizer_config.replace( + learning_rate={ + 'polynomial': { + 'decay_steps': + distill_config.decay_steps, + 'initial_learning_rate': + distill_config.initial_learning_rate, + 'end_learning_rate': + distill_config.end_learning_rate, + } + }, + warmup={ + 'linear': + {'warmup_steps': + distill_config.warmup_steps, + } + }) + opt_factory = optimization.OptimizerFactory(params) + optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate()) + + return optimizer + + # override policies.ProgressivePolicy + def get_train_dataset(self, stage_id: int) -> tf.data.Dataset: + """Return Dataset for this stage.""" + del stage_id + if self._the_only_train_dataset is None: + self._the_only_train_dataset = orbit.utils.make_distributed_dataset( + self._strategy, self.build_inputs, self._train_data_config) + return self._the_only_train_dataset + + # overrides policies.ProgressivePolicy + def get_eval_dataset(self, stage_id): + del stage_id + if self._the_only_eval_dataset is None: + self._the_only_eval_dataset = orbit.utils.make_distributed_dataset( + self._strategy, self.build_inputs, self._eval_data_config) + return self._the_only_eval_dataset + + # override base_task.task + def build_model(self, stage_id) -> tf.keras.Model: + """Build teacher/student keras models with outputs for current stage.""" + # Freeze the teacher model. + self._teacher_pretrainer.trainable = False + layer_wise_config = self._progressive_config.layer_wise_distill_config + freeze_previous_layers = layer_wise_config.if_freeze_previous_layers + student_encoder = self._student_pretrainer.encoder_network + + if stage_id != self.num_stages() - 1: + # Build a model that outputs teacher's and student's transformer outputs. + inputs = student_encoder.inputs + student_sub_encoder = build_sub_encoder( + encoder=student_encoder, target_layer_id=stage_id) + student_output_feature, student_attention_score = student_sub_encoder( + inputs) + + if layer_wise_config.transfer_teacher_layers: + teacher_layer_id = layer_wise_config.transfer_teacher_layers[stage_id] + else: + teacher_layer_id = stage_id + + teacher_sub_encoder = build_sub_encoder( + encoder=self._teacher_pretrainer.encoder_network, + target_layer_id=teacher_layer_id) + + teacher_output_feature, teacher_attention_score = teacher_sub_encoder( + inputs) + + if freeze_previous_layers: + student_encoder.embedding_layer.trainable = False + for i in range(stage_id): + student_encoder.transformer_layers[i].trainable = False + + return tf.keras.Model( + inputs=inputs, + outputs=dict( + student_output_feature=student_output_feature, + student_attention_score=student_attention_score, + teacher_output_feature=teacher_output_feature, + teacher_attention_score=teacher_attention_score)) + else: + # Build a model that outputs teacher's and student's MLM/NSP outputs. + inputs = self._student_pretrainer.inputs + student_pretrainer_output = self._student_pretrainer(inputs) + teacher_pretrainer_output = self._teacher_pretrainer(inputs) + + # Set all student's transformer blocks to trainable. + if freeze_previous_layers: + student_encoder.embedding_layer.trainable = True + for layer in student_encoder.transformer_layers: + layer.trainable = True + + model = tf.keras.Model( + inputs=inputs, + outputs=dict( + student_pretrainer_output=student_pretrainer_output, + teacher_pretrainer_output=teacher_pretrainer_output, + )) + # Checkpoint the student encoder which is the goal of distillation. + model.checkpoint_items = self._student_pretrainer.checkpoint_items + return model + + # overrides base_task.Task + def build_inputs(self, params, input_context=None): + """Returns tf.data.Dataset for pretraining.""" + # copy from masked_lm.py for testing + if params.input_path == 'dummy': + + def dummy_data(_): + dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32) + dummy_lm = tf.zeros((1, params.max_predictions_per_seq), dtype=tf.int32) + return dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids, + masked_lm_positions=dummy_lm, + masked_lm_ids=dummy_lm, + masked_lm_weights=tf.cast(dummy_lm, dtype=tf.float32), + next_sentence_labels=tf.zeros((1, 1), dtype=tf.int32)) + + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + dataset = dataset.map( + dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + return dataset + + return data_loader_factory.get_data_loader(params).load(input_context) + + def _get_distribution_losses(self, teacher, student): + """Return the beta and gamma distall losses for feature distribution.""" + teacher_mean = tf.math.reduce_mean(teacher, axis=-1, keepdims=True) + student_mean = tf.math.reduce_mean(student, axis=-1, keepdims=True) + teacher_var = tf.math.reduce_variance(teacher, axis=-1, keepdims=True) + student_var = tf.math.reduce_variance(student, axis=-1, keepdims=True) + + beta_loss = tf.math.squared_difference(student_mean, teacher_mean) + beta_loss = tf.math.reduce_mean(beta_loss, axis=None, keepdims=False) + gamma_loss = tf.math.abs(student_var - teacher_var) + gamma_loss = tf.math.reduce_mean(gamma_loss, axis=None, keepdims=False) + + return beta_loss, gamma_loss + + def _get_attention_loss(self, teacher_score, student_score): + # Note that the definition of KLDivergence here is a little different from + # the original one (tf.keras.losses.KLDivergence). We adopt this approach + # to stay consistent with the TF1 implementation. + teacher_weight = tf.keras.activations.softmax(teacher_score, axis=-1) + student_log_weight = tf.nn.log_softmax(student_score, axis=-1) + kl_divergence = -(teacher_weight * student_log_weight) + kl_divergence = tf.math.reduce_sum(kl_divergence, axis=-1, keepdims=True) + kl_divergence = tf.math.reduce_mean(kl_divergence, axis=None, + keepdims=False) + return kl_divergence + + def build_losses(self, labels, outputs, metrics) -> tf.Tensor: + """Builds losses and update loss-related metrics for the current stage.""" + last_stage = 'student_pretrainer_output' in outputs + + # Layer-wise warmup stage + if not last_stage: + distill_config = self._progressive_config.layer_wise_distill_config + teacher_feature = outputs['teacher_output_feature'] + student_feature = outputs['student_output_feature'] + + feature_transfer_loss = tf.keras.losses.mean_squared_error( + self._layer_norm(teacher_feature), self._layer_norm(student_feature)) + feature_transfer_loss *= distill_config.hidden_distill_factor + beta_loss, gamma_loss = self._get_distribution_losses(teacher_feature, + student_feature) + beta_loss *= distill_config.beta_distill_factor + gamma_loss *= distill_config.gamma_distill_factor + total_loss = feature_transfer_loss + beta_loss + gamma_loss + + if distill_config.if_transfer_attention: + teacher_attention = outputs['teacher_attention_score'] + student_attention = outputs['student_attention_score'] + attention_loss = self._get_attention_loss(teacher_attention, + student_attention) + attention_loss *= distill_config.attention_distill_factor + total_loss += attention_loss + + total_loss /= tf.cast((self._stage_id + 1), tf.float32) + + # Last stage to distill pretraining layer. + else: + distill_config = self._progressive_config.pretrain_distill_config + lm_label = labels['masked_lm_ids'] + vocab_size = ( + self._task_config.student_model.encoder.mobilebert.word_vocab_size) + + # Shape: [batch, max_predictions_per_seq, vocab_size] + lm_label = tf.one_hot(indices=lm_label, depth=vocab_size, on_value=1.0, + off_value=0.0, axis=-1, dtype=tf.float32) + gt_ratio = distill_config.distill_ground_truth_ratio + if gt_ratio != 1.0: + teacher_mlm_logits = outputs['teacher_pretrainer_output']['mlm_logits'] + teacher_labels = tf.nn.softmax(teacher_mlm_logits, axis=-1) + lm_label = gt_ratio * lm_label + (1-gt_ratio) * teacher_labels + + student_pretrainer_output = outputs['student_pretrainer_output'] + # Shape: [batch, max_predictions_per_seq, vocab_size] + student_lm_log_probs = tf.nn.log_softmax( + student_pretrainer_output['mlm_logits'], axis=-1) + + # Shape: [batch * max_predictions_per_seq] + per_example_loss = tf.reshape( + -tf.reduce_sum(student_lm_log_probs * lm_label, axis=[-1]), [-1]) + + lm_label_weights = tf.reshape(labels['masked_lm_weights'], [-1]) + lm_numerator_loss = tf.reduce_sum(per_example_loss * lm_label_weights) + lm_denominator_loss = tf.reduce_sum(lm_label_weights) + mlm_loss = tf.math.divide_no_nan(lm_numerator_loss, lm_denominator_loss) + total_loss = mlm_loss + + if 'next_sentence_labels' in labels: + sentence_labels = labels['next_sentence_labels'] + sentence_outputs = tf.cast( + student_pretrainer_output['next_sentence'], dtype=tf.float32) + sentence_loss = tf.reduce_mean( + tf.keras.losses.sparse_categorical_crossentropy( + sentence_labels, sentence_outputs, from_logits=True)) + total_loss += sentence_loss + + # Also update loss-related metrics here, instead of in `process_metrics`. + metrics = dict([(metric.name, metric) for metric in metrics]) + + if not last_stage: + metrics['feature_transfer_mse'].update_state(feature_transfer_loss) + metrics['beta_transfer_loss'].update_state(beta_loss) + metrics['gamma_transfer_loss'].update_state(gamma_loss) + layer_wise_config = self._progressive_config.layer_wise_distill_config + if layer_wise_config.if_transfer_attention: + metrics['attention_transfer_loss'].update_state(attention_loss) + else: + metrics['lm_example_loss'].update_state(mlm_loss) + if 'next_sentence_labels' in labels: + metrics['next_sentence_loss'].update_state(sentence_loss) + metrics['total_loss'].update_state(total_loss) + + return total_loss + + # overrides base_task.Task + def build_metrics(self, training=None): + del training + metrics = [ + tf.keras.metrics.Mean(name='feature_transfer_mse'), + tf.keras.metrics.Mean(name='beta_transfer_loss'), + tf.keras.metrics.Mean(name='gamma_transfer_loss'), + tf.keras.metrics.SparseCategoricalAccuracy(name='masked_lm_accuracy'), + tf.keras.metrics.Mean(name='lm_example_loss'), + tf.keras.metrics.Mean(name='total_loss')] + if self._progressive_config.layer_wise_distill_config.if_transfer_attention: + metrics.append(tf.keras.metrics.Mean(name='attention_transfer_loss')) + if self._task_config.train_data.use_next_sentence_label: + metrics.append(tf.keras.metrics.SparseCategoricalAccuracy( + name='next_sentence_accuracy')) + metrics.append(tf.keras.metrics.Mean(name='next_sentence_loss')) + + return metrics + + # overrides base_task.Task + # process non-loss metrics + def process_metrics(self, metrics, labels, student_pretrainer_output): + metrics = dict([(metric.name, metric) for metric in metrics]) + # Final pretrainer layer distillation stage. + if student_pretrainer_output is not None: + if 'masked_lm_accuracy' in metrics: + metrics['masked_lm_accuracy'].update_state( + labels['masked_lm_ids'], student_pretrainer_output['mlm_logits'], + labels['masked_lm_weights']) + if 'next_sentence_accuracy' in metrics: + metrics['next_sentence_accuracy'].update_state( + labels['next_sentence_labels'], + student_pretrainer_output['next_sentence']) + + # overrides base_task.Task + def train_step(self, inputs, model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, metrics): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + with tf.GradientTape() as tape: + outputs = model(inputs, training=True) + + # Computes per-replica loss. + loss = self.build_losses( + labels=inputs, + outputs=outputs, + metrics=metrics) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + # TODO(b/154564893): enable loss scaling. + # scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync + + # get trainable variables for current stage + tvars = model.trainable_variables + last_stage = 'student_pretrainer_output' in outputs + + grads = tape.gradient(loss, tvars) + optimizer.apply_gradients(list(zip(grads, tvars))) + self.process_metrics( + metrics, inputs, + outputs['student_pretrainer_output'] if last_stage else None) + return {self.loss: loss} + + # overrides base_task.Task + def validation_step(self, inputs, model: tf.keras.Model, metrics): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. + model: the keras.Model. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + outputs = model(inputs, training=False) + # Computes per-replica loss. + loss = self.build_losses(labels=inputs, outputs=outputs, metrics=metrics) + last_stage = 'student_pretrainer_output' in outputs + self.process_metrics( + metrics, inputs, + outputs['student_pretrainer_output'] if last_stage else None) + return {self.loss: loss} + + @property + def cur_checkpoint_items(self): + """Checkpoints for model, stage_id, optimizer for preemption handling.""" + return dict( + stage_id=self._stage_id, + volatiles=self._volatiles, + student_pretrainer=self._student_pretrainer, + teacher_pretrainer=self._teacher_pretrainer, + encoder=self._student_pretrainer.encoder_network) + + def initialize(self, model): + """Loads teacher's pretrained checkpoint and copy student's embedding.""" + # This function will be called when no checkpoint found for the model, + # i.e., when the training starts (not preemption case). + # The weights of teacher pretrainer and student pretrainer will be + # initialized, rather than the passed-in `model`. + del model + logging.info('Begin to load checkpoint for teacher pretrainer model.') + ckpt_dir_or_file = self._task_config.teacher_model_init_checkpoint + if not ckpt_dir_or_file: + raise ValueError('`teacher_model_init_checkpoint` is not specified.') + + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + # Makes sure the teacher pretrainer variables are created. + _ = self._teacher_pretrainer(self._teacher_pretrainer.inputs) + teacher_checkpoint = tf.train.Checkpoint( + **self._teacher_pretrainer.checkpoint_items) + teacher_checkpoint.read(ckpt_dir_or_file).assert_existing_objects_matched() + + logging.info('Begin to copy word embedding from teacher model to student.') + teacher_encoder = self._teacher_pretrainer.encoder_network + student_encoder = self._student_pretrainer.encoder_network + embedding_weights = teacher_encoder.embedding_layer.get_weights() + student_encoder.embedding_layer.set_weights(embedding_weights) diff --git a/official/nlp/projects/mobilebert/distillation_test.py b/official/nlp/projects/mobilebert/distillation_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a3fa9e24df622b2cf965bd0c9f0cb55b2b9f4c70 --- /dev/null +++ b/official/nlp/projects/mobilebert/distillation_test.py @@ -0,0 +1,174 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.projects.mobilebert.distillation.""" +import os + +from absl import logging +from absl.testing import parameterized +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.modeling import optimization +from official.modeling import tf_utils +from official.modeling.progressive import trainer as prog_trainer_lib +from official.nlp.configs import bert +from official.nlp.configs import encoders +from official.nlp.data import pretrain_dataloader +from official.nlp.modeling import layers +from official.nlp.modeling import models +from official.nlp.projects.mobilebert import distillation + + +class DistillationTest(tf.test.TestCase, parameterized.TestCase): + + def prepare_config(self, teacher_block_num, student_block_num, + transfer_teacher_layers): + # using small model for testing + task_config = distillation.BertDistillationTaskConfig( + teacher_model=bert.PretrainerConfig( + encoder=encoders.EncoderConfig( + type='mobilebert', + mobilebert=encoders.MobileBertEncoderConfig( + num_blocks=teacher_block_num)), + cls_heads=[ + bert.ClsHeadConfig( + inner_dim=256, + num_classes=2, + dropout_rate=0.1, + name='next_sentence') + ], + mlm_activation='gelu'), + student_model=bert.PretrainerConfig( + encoder=encoders.EncoderConfig( + type='mobilebert', + mobilebert=encoders.MobileBertEncoderConfig( + num_blocks=student_block_num)), + cls_heads=[ + bert.ClsHeadConfig( + inner_dim=256, + num_classes=2, + dropout_rate=0.1, + name='next_sentence') + ], + mlm_activation='relu'), + train_data=pretrain_dataloader.BertPretrainDataConfig( + input_path='dummy', + max_predictions_per_seq=76, + seq_length=512, + global_batch_size=10), + validation_data=pretrain_dataloader.BertPretrainDataConfig( + input_path='dummy', + max_predictions_per_seq=76, + seq_length=512, + global_batch_size=10)) + + # set only 1 step for each stage + progressive_config = distillation.BertDistillationProgressiveConfig() + progressive_config.layer_wise_distill_config.transfer_teacher_layers = ( + transfer_teacher_layers) + progressive_config.layer_wise_distill_config.num_steps = 1 + progressive_config.pretrain_distill_config.num_steps = 1 + + optimization_config = optimization.OptimizationConfig( + optimizer=optimization.OptimizerConfig( + type='lamb', + lamb=optimization.LAMBConfig( + weight_decay_rate=0.0001, + exclude_from_weight_decay=[ + 'LayerNorm', 'layer_norm', 'bias', 'no_norm' + ])), + learning_rate=optimization.LrConfig( + type='polynomial', + polynomial=optimization.PolynomialLrConfig( + initial_learning_rate=1.5e-3, + decay_steps=10000, + end_learning_rate=1.5e-3)), + warmup=optimization.WarmupConfig( + type='linear', + linear=optimization.LinearWarmupConfig(warmup_learning_rate=0))) + + exp_config = cfg.ExperimentConfig( + task=task_config, + trainer=prog_trainer_lib.ProgressiveTrainerConfig( + progressive=progressive_config, + optimizer_config=optimization_config)) + + # Create a teacher model checkpoint. + teacher_encoder = encoders.build_encoder(task_config.teacher_model.encoder) + pretrainer_config = task_config.teacher_model + if pretrainer_config.cls_heads: + teacher_cls_heads = [ + layers.ClassificationHead(**cfg.as_dict()) + for cfg in pretrainer_config.cls_heads + ] + else: + teacher_cls_heads = [] + + masked_lm = layers.MobileBertMaskedLM( + embedding_table=teacher_encoder.get_embedding_table(), + activation=tf_utils.get_activation(pretrainer_config.mlm_activation), + initializer=tf.keras.initializers.TruncatedNormal( + stddev=pretrainer_config.mlm_initializer_range), + name='cls/predictions') + teacher_pretrainer = models.BertPretrainerV2( + encoder_network=teacher_encoder, + classification_heads=teacher_cls_heads, + customized_masked_lm=masked_lm) + + # The model variables will be created after the forward call. + _ = teacher_pretrainer(teacher_pretrainer.inputs) + teacher_pretrainer_ckpt = tf.train.Checkpoint( + **teacher_pretrainer.checkpoint_items) + teacher_ckpt_path = os.path.join(self.get_temp_dir(), 'teacher_model.ckpt') + teacher_pretrainer_ckpt.save(teacher_ckpt_path) + exp_config.task.teacher_model_init_checkpoint = self.get_temp_dir() + + return exp_config + + @parameterized.parameters((2, 2, None), (4, 2, [1, 3])) + def test_task(self, teacher_block_num, student_block_num, + transfer_teacher_layers): + exp_config = self.prepare_config(teacher_block_num, student_block_num, + transfer_teacher_layers) + bert_distillation_task = distillation.BertDistillationTask( + strategy=tf.distribute.get_strategy(), + progressive=exp_config.trainer.progressive, + optimizer_config=exp_config.trainer.optimizer_config, + task_config=exp_config.task) + metrics = bert_distillation_task.build_metrics() + train_dataset = bert_distillation_task.get_train_dataset(stage_id=0) + train_iterator = iter(train_dataset) + + eval_dataset = bert_distillation_task.get_eval_dataset(stage_id=0) + eval_iterator = iter(eval_dataset) + optimizer = tf.keras.optimizers.SGD(lr=0.1) + + # test train/val step for all stages, including the last pretraining stage + for stage in range(student_block_num + 1): + step = stage + bert_distillation_task.update_pt_stage(step) + model = bert_distillation_task.get_model(stage, None) + bert_distillation_task.initialize(model) + bert_distillation_task.train_step(next(train_iterator), model, optimizer, + metrics=metrics) + bert_distillation_task.validation_step(next(eval_iterator), model, + metrics=metrics) + + logging.info('begin to save and load model checkpoint') + ckpt = tf.train.Checkpoint(model=model) + ckpt.save(self.get_temp_dir()) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/nlp/projects/mobilebert/experiments/en_uncased_student.yaml b/official/nlp/projects/mobilebert/experiments/en_uncased_student.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cfcf40c2b89e2d85763785235ae661e6774e47c3 --- /dev/null +++ b/official/nlp/projects/mobilebert/experiments/en_uncased_student.yaml @@ -0,0 +1,22 @@ +task: + model: + encoder: + type: mobilebert + mobilebert: + word_vocab_size: 30522 + word_embed_size: 128 + type_vocab_size: 2 + max_sequence_length: 512 + num_blocks: 24 + hidden_size: 512 + num_attention_heads: 4 + intermediate_size: 512 + hidden_activation: relu + hidden_dropout_prob: 0.0 + attention_probs_dropout_prob: 0.1 + intra_bottleneck_size: 128 + initializer_range: 0.02 + key_query_shared_bottleneck: true + num_feedforward_networks: 4 + normalization_type: no_norm + classifier_activation: false diff --git a/official/nlp/projects/mobilebert/experiments/en_uncased_teacher.yaml b/official/nlp/projects/mobilebert/experiments/en_uncased_teacher.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eeee8537da5dbdd55c7effdb5bc6bdab24339bc6 --- /dev/null +++ b/official/nlp/projects/mobilebert/experiments/en_uncased_teacher.yaml @@ -0,0 +1,22 @@ +task: + model: + encoder: + type: mobilebert + mobilebert: + word_vocab_size: 30522 + word_embed_size: 128 + type_vocab_size: 2 + max_sequence_length: 512 + num_blocks: 24 + hidden_size: 512 + num_attention_heads: 4 + intermediate_size: 4096 + hidden_activation: gelu + hidden_dropout_prob: 0.1 + attention_probs_dropout_prob: 0.1 + intra_bottleneck_size: 1024 + initializer_range: 0.02 + key_query_shared_bottleneck: false + num_feedforward_networks: 1 + normalization_type: layer_norm + classifier_activation: false diff --git a/official/nlp/projects/mobilebert/experiments/mobilebert_distillation_en_uncased.yaml b/official/nlp/projects/mobilebert/experiments/mobilebert_distillation_en_uncased.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74e6adc3c47181747161eadcdfe40ca6d95d80e3 --- /dev/null +++ b/official/nlp/projects/mobilebert/experiments/mobilebert_distillation_en_uncased.yaml @@ -0,0 +1,79 @@ +task: + train_data: + drop_remainder: true + global_batch_size: 2048 + input_path: "" + is_training: true + max_predictions_per_seq: 20 + seq_length: 512 + use_next_sentence_label: true + use_position_id: false + validation_data: + drop_remainder: true + global_batch_size: 2048 + input_path: "" + is_training: false + max_predictions_per_seq: 20 + seq_length: 512 + use_next_sentence_label: true + use_position_id: false + teacher_model: + cls_heads: [] + mlm_activation: gelu + mlm_initializer_range: 0.02 + encoder: + type: mobilebert + mobilebert: + word_vocab_size: 30522 + word_embed_size: 128 + type_vocab_size: 2 + max_sequence_length: 512 + num_blocks: 24 + hidden_size: 512 + num_attention_heads: 4 + intermediate_size: 4096 + hidden_activation: gelu + hidden_dropout_prob: 0.1 + attention_probs_dropout_prob: 0.1 + intra_bottleneck_size: 1024 + initializer_range: 0.02 + key_query_shared_bottleneck: false + num_feedforward_networks: 1 + normalization_type: layer_norm + classifier_activation: false + student_model: + cls_heads: [{activation: tanh, cls_token_idx: 0, dropout_rate: 0.0, inner_dim: 512, + name: next_sentence, num_classes: 2}] + mlm_activation: relu + mlm_initializer_range: 0.02 + encoder: + type: mobilebert + mobilebert: + word_vocab_size: 30522 + word_embed_size: 128 + type_vocab_size: 2 + max_sequence_length: 512 + num_blocks: 24 + hidden_size: 512 + num_attention_heads: 4 + intermediate_size: 512 + hidden_activation: relu + hidden_dropout_prob: 0.0 + attention_probs_dropout_prob: 0.1 + intra_bottleneck_size: 128 + initializer_range: 0.02 + key_query_shared_bottleneck: true + num_feedforward_networks: 4 + normalization_type: no_norm + classifier_activation: false + teacher_model_init_checkpoint: "" +trainer: + progressive: + if_copy_embeddings: true + layer_wise_distill_config: + num_steps: 10000 + pretrain_distill_config: + num_steps: 500000 + decay_steps: 500000 + train_steps: 740000 + max_to_keep: 10 diff --git a/official/nlp/projects/mobilebert/export_tfhub.py b/official/nlp/projects/mobilebert/export_tfhub.py new file mode 100644 index 0000000000000000000000000000000000000000..bfc45d0f4316216cd7f12a184527b86c17dacf10 --- /dev/null +++ b/official/nlp/projects/mobilebert/export_tfhub.py @@ -0,0 +1,86 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A script to export the MobileBERT encoder model as a TF-Hub SavedModel.""" +from absl import app +from absl import flags +from absl import logging +import tensorflow as tf + +from official.nlp.projects.mobilebert import model_utils + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + "bert_config_file", None, + "Bert configuration file to define core mobilebert layers.") +flags.DEFINE_string("model_checkpoint_path", None, + "File path to TF model checkpoint.") +flags.DEFINE_string("export_path", None, "TF-Hub SavedModel destination path.") +flags.DEFINE_string("vocab_file", None, + "The vocabulary file that the BERT model was trained on.") +flags.DEFINE_bool("do_lower_case", True, "Whether to lowercase.") + + +def create_mobilebert_model(bert_config): + """Creates a model for exporting to tfhub.""" + pretrainer = model_utils.create_mobilebert_pretrainer(bert_config) + encoder = pretrainer.encoder_network + encoder_inputs_dict = {x.name: x for x in encoder.inputs} + encoder_output_dict = encoder(encoder_inputs_dict) + + # For interchangeability with other text representations, + # add "default" as an alias for MobileBERT's whole-input reptesentations. + encoder_output_dict["default"] = encoder_output_dict["pooled_output"] + core_model = tf.keras.Model( + inputs=encoder_inputs_dict, outputs=encoder_output_dict) + + pretrainer_inputs_dict = {x.name: x for x in pretrainer.inputs} + pretrainer_output_dict = pretrainer(pretrainer_inputs_dict) + mlm_model = tf.keras.Model( + inputs=pretrainer_inputs_dict, outputs=pretrainer_output_dict) + # Set `_auto_track_sub_layers` to False, so that the additional weights + # from `mlm` sub-object will not be included in the core model. + # TODO(b/169210253): Use public API after the bug is resolved. + core_model._auto_track_sub_layers = False # pylint: disable=protected-access + core_model.mlm = mlm_model + return core_model, pretrainer + + +def export_bert_tfhub(bert_config, model_checkpoint_path, hub_destination, + vocab_file, do_lower_case): + """Restores a tf.keras.Model and saves for TF-Hub.""" + core_model, pretrainer = create_mobilebert_model(bert_config) + checkpoint = tf.train.Checkpoint(**pretrainer.checkpoint_items) + + logging.info("Begin to load model") + checkpoint.restore(model_checkpoint_path).assert_existing_objects_matched() + logging.info("Loading model finished") + core_model.vocab_file = tf.saved_model.Asset(vocab_file) + core_model.do_lower_case = tf.Variable(do_lower_case, trainable=False) + logging.info("Begin to save files for tfhub at %s", hub_destination) + core_model.save(hub_destination, include_optimizer=False, save_format="tf") + logging.info("tfhub files exported!") + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + bert_config = model_utils.BertConfig.from_json_file(FLAGS.bert_config_file) + export_bert_tfhub(bert_config, FLAGS.model_checkpoint_path, FLAGS.export_path, + FLAGS.vocab_file, FLAGS.do_lower_case) + + +if __name__ == "__main__": + app.run(main) diff --git a/official/nlp/projects/mobilebert/model_utils.py b/official/nlp/projects/mobilebert/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0cd6448515771f5aa4a06cbe71908a7ac196933d --- /dev/null +++ b/official/nlp/projects/mobilebert/model_utils.py @@ -0,0 +1,170 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Checkpoint converter for Mobilebert.""" +import copy +import json + +import tensorflow.compat.v1 as tf + +from official.modeling import tf_utils +from official.nlp.modeling import layers +from official.nlp.modeling import models +from official.nlp.modeling import networks + + +class BertConfig(object): + """Configuration for `BertModel`.""" + + def __init__(self, + vocab_size, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=16, + initializer_range=0.02, + embedding_size=None, + trigram_input=False, + use_bottleneck=False, + intra_bottleneck_size=None, + use_bottleneck_attention=False, + key_query_shared_bottleneck=False, + num_feedforward_networks=1, + normalization_type="layer_norm", + classifier_activation=True): + """Constructs BertConfig. + + Args: + vocab_size: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. + hidden_dropout_prob: The dropout probability for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The stdev of the truncated_normal_initializer for + initializing all weight matrices. + embedding_size: The size of the token embedding. + trigram_input: Use a convolution of trigram as input. + use_bottleneck: Use the bottleneck/inverted-bottleneck structure in BERT. + intra_bottleneck_size: The hidden size in the bottleneck. + use_bottleneck_attention: Use attention inputs from the bottleneck + transformation. + key_query_shared_bottleneck: Use the same linear transformation for + query&key in the bottleneck. + num_feedforward_networks: Number of FFNs in a block. + normalization_type: The normalization type in BERT. + classifier_activation: Using the tanh activation for the final + representation of the [CLS] token in fine-tuning. + """ + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.embedding_size = embedding_size + self.trigram_input = trigram_input + self.use_bottleneck = use_bottleneck + self.intra_bottleneck_size = intra_bottleneck_size + self.use_bottleneck_attention = use_bottleneck_attention + self.key_query_shared_bottleneck = key_query_shared_bottleneck + self.num_feedforward_networks = num_feedforward_networks + self.normalization_type = normalization_type + self.classifier_activation = classifier_activation + + @classmethod + def from_dict(cls, json_object): + """Constructs a `BertConfig` from a Python dictionary of parameters.""" + config = BertConfig(vocab_size=None) + for (key, value) in json_object.items(): + config.__dict__[key] = value + if config.embedding_size is None: + config.embedding_size = config.hidden_size + if config.intra_bottleneck_size is None: + config.intra_bottleneck_size = config.hidden_size + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with tf.gfile.GFile(json_file, "r") as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" + + +def create_mobilebert_pretrainer(bert_config): + """Creates a BertPretrainerV2 that wraps MobileBERTEncoder model.""" + mobilebert_encoder = networks.MobileBERTEncoder( + word_vocab_size=bert_config.vocab_size, + word_embed_size=bert_config.embedding_size, + type_vocab_size=bert_config.type_vocab_size, + max_sequence_length=bert_config.max_position_embeddings, + num_blocks=bert_config.num_hidden_layers, + hidden_size=bert_config.hidden_size, + num_attention_heads=bert_config.num_attention_heads, + intermediate_size=bert_config.intermediate_size, + intermediate_act_fn=tf_utils.get_activation(bert_config.hidden_act), + hidden_dropout_prob=bert_config.hidden_dropout_prob, + attention_probs_dropout_prob=bert_config.attention_probs_dropout_prob, + intra_bottleneck_size=bert_config.intra_bottleneck_size, + initializer_range=bert_config.initializer_range, + use_bottleneck_attention=bert_config.use_bottleneck_attention, + key_query_shared_bottleneck=bert_config.key_query_shared_bottleneck, + num_feedforward_networks=bert_config.num_feedforward_networks, + normalization_type=bert_config.normalization_type, + classifier_activation=bert_config.classifier_activation) + + masked_lm = layers.MobileBertMaskedLM( + embedding_table=mobilebert_encoder.get_embedding_table(), + activation=tf_utils.get_activation(bert_config.hidden_act), + initializer=tf.keras.initializers.TruncatedNormal( + stddev=bert_config.initializer_range), + name="cls/predictions") + + pretrainer = models.BertPretrainerV2( + encoder_network=mobilebert_encoder, customized_masked_lm=masked_lm) + # Makes sure the pretrainer variables are created. + _ = pretrainer(pretrainer.inputs) + return pretrainer diff --git a/official/nlp/projects/mobilebert/run_distillation.py b/official/nlp/projects/mobilebert/run_distillation.py new file mode 100644 index 0000000000000000000000000000000000000000..b056fea49d4a1fcd070ca4858d631b95cbcafa9a --- /dev/null +++ b/official/nlp/projects/mobilebert/run_distillation.py @@ -0,0 +1,147 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=line-too-long +"""Creating the task and start trainer.""" + +import pprint + +from absl import app +from absl import flags +from absl import logging +import gin +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import config_definitions as cfg +from official.core import train_utils +from official.modeling import hyperparams +from official.modeling import optimization +from official.modeling import performance +from official.modeling.progressive import train_lib +from official.modeling.progressive import trainer as prog_trainer_lib +from official.nlp.data import pretrain_dataloader +from official.nlp.projects.mobilebert import distillation + + +FLAGS = flags.FLAGS + +optimization_config = optimization.OptimizationConfig( + optimizer=optimization.OptimizerConfig( + type='lamb', + lamb=optimization.LAMBConfig( + weight_decay_rate=0.01, + exclude_from_weight_decay=['LayerNorm', 'bias', 'norm'], + clipnorm=1.0)), + learning_rate=optimization.LrConfig( + type='polynomial', + polynomial=optimization.PolynomialLrConfig( + initial_learning_rate=1.5e-3, + decay_steps=10000, + end_learning_rate=1.5e-3)), + warmup=optimization.WarmupConfig( + type='linear', + linear=optimization.LinearWarmupConfig(warmup_learning_rate=0))) + + +# copy from progressive/utils.py due to the private visibility issue. +def config_override(params, flags_obj): + """Override ExperimentConfig according to flags.""" + # Change runtime.tpu to the real tpu. + params.override({ + 'runtime': { + 'tpu': flags_obj.tpu, + } + }) + + # Get the first level of override from `--config_file`. + # `--config_file` is typically used as a template that specifies the common + # override for a particular experiment. + for config_file in flags_obj.config_file or []: + params = hyperparams.override_params_dict( + params, config_file, is_strict=True) + + # Get the second level of override from `--params_override`. + # `--params_override` is typically used as a further override over the + # template. For example, one may define a particular template for training + # ResNet50 on ImageNet in a config file and pass it via `--config_file`, + # then define different learning rates and pass it via `--params_override`. + if flags_obj.params_override: + params = hyperparams.override_params_dict( + params, flags_obj.params_override, is_strict=True) + + params.validate() + params.lock() + + pp = pprint.PrettyPrinter() + logging.info('Final experiment parameters: %s', pp.pformat(params.as_dict())) + + model_dir = flags_obj.model_dir + if 'train' in flags_obj.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + return params + + +def get_exp_config(): + """Get ExperimentConfig.""" + params = cfg.ExperimentConfig( + task=distillation.BertDistillationTaskConfig( + train_data=pretrain_dataloader.BertPretrainDataConfig(), + validation_data=pretrain_dataloader.BertPretrainDataConfig( + is_training=False)), + trainer=prog_trainer_lib.ProgressiveTrainerConfig( + progressive=distillation.BertDistillationProgressiveConfig(), + optimizer_config=optimization_config, + train_steps=740000, + checkpoint_interval=20000)) + + return config_override(params, FLAGS) + + +def main(_): + logging.info('Parsing config files...') + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = get_exp_config() + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + + with distribution_strategy.scope(): + task = distillation.BertDistillationTask( + strategy=distribution_strategy, + progressive=params.trainer.progressive, + optimizer_config=params.trainer.optimizer_config, + task_config=params.task) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=FLAGS.model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/nlp/projects/mobilebert/tf2_model_checkpoint_converter.py b/official/nlp/projects/mobilebert/tf2_model_checkpoint_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..cbed9e2a179de95d830f8acc2ba2b3488b14546f --- /dev/null +++ b/official/nlp/projects/mobilebert/tf2_model_checkpoint_converter.py @@ -0,0 +1,278 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Checkpoint converter for Mobilebert.""" +import os + +from absl import app +from absl import flags +from absl import logging +import numpy as np +import tensorflow.compat.v1 as tf + +from official.nlp.projects.mobilebert import model_utils + + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + "bert_config_file", None, + "Bert configuration file to define core mobilebert layers.") +flags.DEFINE_string("tf1_checkpoint_path", None, + "Path to load tf1 checkpoint.") +flags.DEFINE_string("tf2_checkpoint_path", None, + "Path to save tf2 checkpoint.") +flags.DEFINE_boolean("use_model_prefix", False, + ("If use model name as prefix for variables. Turn this" + "flag on when the converted checkpoint is used for model" + "in subclass implementation, which uses the model name as" + "prefix for all variable names.")) + + +def _bert_name_replacement(var_name, name_replacements): + """Gets the variable name replacement.""" + for src_pattern, tgt_pattern in name_replacements: + if src_pattern in var_name: + old_var_name = var_name + var_name = var_name.replace(src_pattern, tgt_pattern) + logging.info("Converted: %s --> %s", old_var_name, var_name) + return var_name + + +def _has_exclude_patterns(name, exclude_patterns): + """Checks if a string contains substrings that match patterns to exclude.""" + for p in exclude_patterns: + if p in name: + return True + return False + + +def _get_permutation(name, permutations): + """Checks whether a variable requires transposition by pattern matching.""" + for src_pattern, permutation in permutations: + if src_pattern in name: + logging.info("Permuted: %s --> %s", name, permutation) + return permutation + + return None + + +def _get_new_shape(name, shape, num_heads): + """Checks whether a variable requires reshape by pattern matching.""" + if "attention/attention_output/kernel" in name: + return tuple([num_heads, shape[0] // num_heads, shape[1]]) + if "attention/attention_output/bias" in name: + return shape + + patterns = [ + "attention/query", "attention/value", "attention/key" + ] + for pattern in patterns: + if pattern in name: + if "kernel" in name: + return tuple([shape[0], num_heads, shape[1] // num_heads]) + if "bias" in name: + return tuple([num_heads, shape[0] // num_heads]) + return None + + +def convert(checkpoint_from_path, + checkpoint_to_path, + name_replacements, + permutations, + bert_config, + exclude_patterns=None): + """Migrates the names of variables within a checkpoint. + + Args: + checkpoint_from_path: Path to source checkpoint to be read in. + checkpoint_to_path: Path to checkpoint to be written out. + name_replacements: A list of tuples of the form (match_str, replace_str) + describing variable names to adjust. + permutations: A list of tuples of the form (match_str, permutation) + describing permutations to apply to given variables. Note that match_str + should match the original variable name, not the replaced one. + bert_config: A `BertConfig` to create the core model. + exclude_patterns: A list of string patterns to exclude variables from + checkpoint conversion. + + Returns: + A dictionary that maps the new variable names to the Variable objects. + A dictionary that maps the old variable names to the new variable names. + """ + last_ffn_layer_id = str(bert_config.num_feedforward_networks - 1) + name_replacements = [ + (x[0], x[1].replace("LAST_FFN_LAYER_ID", last_ffn_layer_id)) + for x in name_replacements + ] + + output_dir, _ = os.path.split(checkpoint_to_path) + tf.io.gfile.makedirs(output_dir) + # Create a temporary V1 name-converted checkpoint in the output directory. + temporary_checkpoint_dir = os.path.join(output_dir, "temp_v1") + temporary_checkpoint = os.path.join(temporary_checkpoint_dir, "ckpt") + + with tf.Graph().as_default(): + logging.info("Reading checkpoint_from_path %s", checkpoint_from_path) + reader = tf.train.NewCheckpointReader(checkpoint_from_path) + name_shape_map = reader.get_variable_to_shape_map() + new_variable_map = {} + conversion_map = {} + for var_name in name_shape_map: + if exclude_patterns and _has_exclude_patterns(var_name, exclude_patterns): + continue + # Get the original tensor data. + tensor = reader.get_tensor(var_name) + + # Look up the new variable name, if any. + new_var_name = _bert_name_replacement(var_name, name_replacements) + + # See if we need to reshape the underlying tensor. + new_shape = None + if bert_config.num_attention_heads > 0: + new_shape = _get_new_shape(new_var_name, tensor.shape, + bert_config.num_attention_heads) + if new_shape: + logging.info("Veriable %s has a shape change from %s to %s", + var_name, tensor.shape, new_shape) + tensor = np.reshape(tensor, new_shape) + + # See if we need to permute the underlying tensor. + permutation = _get_permutation(var_name, permutations) + if permutation: + tensor = np.transpose(tensor, permutation) + + # Create a new variable with the possibly-reshaped or transposed tensor. + var = tf.Variable(tensor, name=var_name) + + # Save the variable into the new variable map. + new_variable_map[new_var_name] = var + + # Keep a list of converter variables for sanity checking. + if new_var_name != var_name: + conversion_map[var_name] = new_var_name + + saver = tf.train.Saver(new_variable_map) + + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + logging.info("Writing checkpoint_to_path %s", temporary_checkpoint) + saver.save(sess, temporary_checkpoint, write_meta_graph=False) + + logging.info("Summary:") + logging.info("Converted %d variable name(s).", len(new_variable_map)) + logging.info("Converted: %s", str(conversion_map)) + + mobilebert_model = model_utils.create_mobilebert_pretrainer(bert_config) + create_v2_checkpoint( + mobilebert_model, temporary_checkpoint, checkpoint_to_path) + + # Clean up the temporary checkpoint, if it exists. + try: + tf.io.gfile.rmtree(temporary_checkpoint_dir) + except tf.errors.OpError: + # If it doesn't exist, we don't need to clean it up; continue. + pass + + +def create_v2_checkpoint(model, src_checkpoint, output_path): + """Converts a name-based matched TF V1 checkpoint to TF V2 checkpoint.""" + # Uses streaming-restore in eager model to read V1 name-based checkpoints. + model.load_weights(src_checkpoint).assert_existing_objects_matched() + checkpoint = tf.train.Checkpoint(**model.checkpoint_items) + checkpoint.save(output_path) + + +_NAME_REPLACEMENT = [ + # prefix path replacement + ("bert/", "mobile_bert_encoder/"), + ("encoder/layer_", "transformer_layer_"), + + # embedding layer + ("embeddings/embedding_transformation", + "mobile_bert_embedding/embedding_projection"), + ("embeddings/position_embeddings", + "mobile_bert_embedding/position_embedding/embeddings"), + ("embeddings/token_type_embeddings", + "mobile_bert_embedding/type_embedding/embeddings"), + ("embeddings/word_embeddings", + "mobile_bert_embedding/word_embedding/embeddings"), + ("embeddings/FakeLayerNorm", "mobile_bert_embedding/embedding_norm"), + ("embeddings/LayerNorm", "mobile_bert_embedding/embedding_norm"), + + # attention layer + ("attention/output/dense", "attention/attention_output"), + ("attention/output/FakeLayerNorm", "attention/norm"), + ("attention/output/LayerNorm", "attention/norm"), + ("attention/self", "attention"), + + # input bottleneck + ("bottleneck/input/dense", "bottleneck_input/dense"), + ("bottleneck/input/FakeLayerNorm", "bottleneck_input/norm"), + ("bottleneck/input/LayerNorm", "bottleneck_input/norm"), + ("bottleneck/attention/dense", "kq_shared_bottleneck/dense"), + ("bottleneck/attention/FakeLayerNorm", "kq_shared_bottleneck/norm"), + ("bottleneck/attention/LayerNorm", "kq_shared_bottleneck/norm"), + + # ffn layer + ("ffn_layer_0/output/dense", "ffn_layer_0/output_dense"), + ("ffn_layer_1/output/dense", "ffn_layer_1/output_dense"), + ("ffn_layer_2/output/dense", "ffn_layer_2/output_dense"), + ("output/dense", "ffn_layer_LAST_FFN_LAYER_ID/output_dense"), + ("ffn_layer_0/output/FakeLayerNorm", "ffn_layer_0/norm"), + ("ffn_layer_0/output/LayerNorm", "ffn_layer_0/norm"), + ("ffn_layer_1/output/FakeLayerNorm", "ffn_layer_1/norm"), + ("ffn_layer_1/output/LayerNorm", "ffn_layer_1/norm"), + ("ffn_layer_2/output/FakeLayerNorm", "ffn_layer_2/norm"), + ("ffn_layer_2/output/LayerNorm", "ffn_layer_2/norm"), + ("output/FakeLayerNorm", "ffn_layer_LAST_FFN_LAYER_ID/norm"), + ("output/LayerNorm", "ffn_layer_LAST_FFN_LAYER_ID/norm"), + ("ffn_layer_0/intermediate/dense", "ffn_layer_0/intermediate_dense"), + ("ffn_layer_1/intermediate/dense", "ffn_layer_1/intermediate_dense"), + ("ffn_layer_2/intermediate/dense", "ffn_layer_2/intermediate_dense"), + ("intermediate/dense", "ffn_layer_LAST_FFN_LAYER_ID/intermediate_dense"), + + # output bottleneck + ("output/bottleneck/FakeLayerNorm", "bottleneck_output/norm"), + ("output/bottleneck/LayerNorm", "bottleneck_output/norm"), + ("output/bottleneck/dense", "bottleneck_output/dense"), + + # pooler layer + ("pooler/dense", "pooler"), + + # MLM layer + ("cls/predictions", "bert/cls/predictions"), + ("cls/predictions/output_bias", "cls/predictions/output_bias/bias") +] + +_EXCLUDE_PATTERNS = ["cls/seq_relationship", "global_step"] + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + + if not FLAGS.use_model_prefix: + _NAME_REPLACEMENT[0] = ("bert/", "") + + bert_config = model_utils.BertConfig.from_json_file(FLAGS.bert_config_file) + convert(FLAGS.tf1_checkpoint_path, + FLAGS.tf2_checkpoint_path, + _NAME_REPLACEMENT, + [], + bert_config, + _EXCLUDE_PATTERNS) + +if __name__ == "__main__": + app.run(main) diff --git a/official/nlp/projects/mobilebert/utils.py b/official/nlp/projects/mobilebert/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d5c3e4067471de279ce2e3147ef655771447cb57 --- /dev/null +++ b/official/nlp/projects/mobilebert/utils.py @@ -0,0 +1,29 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions.""" + +import numpy as np + + +def generate_fake_input(batch_size=1, seq_len=5, vocab_size=10000, seed=0): + """Generate consistent fake integer input sequences.""" + np.random.seed(seed) + fake_input = [] + for _ in range(batch_size): + fake_input.append([]) + for _ in range(seq_len): + fake_input[-1].append(np.random.randint(0, vocab_size)) + fake_input = np.asarray(fake_input) + return fake_input diff --git a/official/nlp/projects/tn_bert/README.md b/official/nlp/projects/tn_bert/README.md new file mode 100644 index 0000000000000000000000000000000000000000..50928155807d1800220f14ff67d6408e50a4012c --- /dev/null +++ b/official/nlp/projects/tn_bert/README.md @@ -0,0 +1,31 @@ +# TN-BERT (TensorNetwork BERT) + +TN-BERT is a modification of the BERT-base architecture that greatly compresses +the original BERT model using tensor networks. The dense feedforward layers are +replaced with Expand / Condense tn layers tuned to the TPU architecture. + +This work is based on research conducted during the development of the +[TensorNetwork](https://arxiv.org/abs/1905.01330) Library. Check it out on +[github](https://github.com/google/TensorNetwork). + +TN-BERT achieves the following improvements: + +* 69M params, or 37% fewer than the original BERT base. + +* 22% faster inference than the baseline model on TPUs. + +* Pre-training time under 8 hours on an 8x8 pod of TPUs. + +* 15% less energy consumption by accellerators + +For more information go to the TF Hub model page +[here](https://tfhub.dev/google/tn_bert/1) + +### Implementation + +The expand_condense and transformer layers are the only components that differ +from the reference BERT implementation. These layers can be viewed at: + +* [tn_transformer_expand_condense.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/tn_transformer_expand_condense.py) + +* [tn_expand_condense.py](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/tn_expand_condense.py) diff --git a/official/nlp/projects/triviaqa/__init__.py b/official/nlp/projects/triviaqa/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/nlp/projects/triviaqa/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/projects/triviaqa/dataset.py b/official/nlp/projects/triviaqa/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1623991266b1a2af318ff07874463f9c3047ceb6 --- /dev/null +++ b/official/nlp/projects/triviaqa/dataset.py @@ -0,0 +1,456 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TriviaQA: A Reading Comprehension Dataset.""" +import functools +import json +import os + +from absl import logging +import apache_beam as beam +import six +import tensorflow as tf +import tensorflow_datasets.public_api as tfds + +from official.nlp.projects.triviaqa import preprocess + +_CITATION = """ +@article{2017arXivtriviaqa, + author = {{Joshi}, Mandar and {Choi}, Eunsol and {Weld}, + Daniel and {Zettlemoyer}, Luke}, + title = "{triviaqa: A Large Scale Distantly Supervised Challenge Dataset for Reading Comprehension}", + journal = {arXiv e-prints}, + year = 2017, + eid = {arXiv:1705.03551}, + pages = {arXiv:1705.03551}, +archivePrefix = {arXiv}, + eprint = {1705.03551}, +} +""" +_DOWNLOAD_URL_TMPL = ( + "http://nlp.cs.washington.edu/triviaqa/data/triviaqa-{}.tar.gz") +_TRAIN_FILE_FORMAT = "*-train.json" +_VALIDATION_FILE_FORMAT = "*-dev.json" +_TEST_FILE_FORMAT = "*test-without-answers.json" +_WEB_EVIDENCE_DIR = "evidence/web" +_WIKI_EVIDENCE_DIR = "evidence/wikipedia" + +_DESCRIPTION = """\ +TriviaqQA is a reading comprehension dataset containing over 650K +question-answer-evidence triples. TriviaqQA includes 95K question-answer +pairs authored by trivia enthusiasts and independently gathered evidence +documents, six per question on average, that provide high quality distant +supervision for answering the questions. +""" + +_RC_DESCRIPTION = """\ +Question-answer pairs where all documents for a given question contain the +answer string(s). +""" + +_UNFILTERED_DESCRIPTION = """\ +110k question-answer pairs for open domain QA where not all documents for a +given question contain the answer string(s). This makes the unfiltered dataset +more appropriate for IR-style QA. +""" + +_CONTEXT_ADDENDUM = "Includes context from Wikipedia and search results." + + +def _web_evidence_dir(tmp_dir): + return tf.io.gfile.glob(os.path.join(tmp_dir, _WEB_EVIDENCE_DIR)) + + +def _wiki_evidence_dir(tmp_dir): + return tf.io.gfile.glob(os.path.join(tmp_dir, _WIKI_EVIDENCE_DIR)) + + +class TriviaQAConfig(tfds.core.BuilderConfig): + """BuilderConfig for TriviaQA.""" + + def __init__(self, *, unfiltered=False, exclude_context=False, **kwargs): + """BuilderConfig for TriviaQA. + + Args: + unfiltered: bool, whether to use the unfiltered version of the dataset, + intended for open-domain QA. + exclude_context: bool, whether to exclude Wikipedia and search context for + reduced size. + **kwargs: keyword arguments forwarded to super. + """ + name = "unfiltered" if unfiltered else "rc" + if exclude_context: + name += ".nocontext" + description = _UNFILTERED_DESCRIPTION if unfiltered else _RC_DESCRIPTION + if not exclude_context: + description += _CONTEXT_ADDENDUM + super(TriviaQAConfig, self).__init__( + name=name, + description=description, + version=tfds.core.Version("1.1.1"), + **kwargs) + self.unfiltered = unfiltered + self.exclude_context = exclude_context + + +class BigBirdTriviaQAConfig(tfds.core.BuilderConfig): + """BuilderConfig for TriviaQA.""" + + def __init__(self, **kwargs): + """BuilderConfig for TriviaQA. + + Args: + **kwargs: keyword arguments forwarded to super. + """ + name = "rc_wiki.preprocessed" + description = _RC_DESCRIPTION + super(BigBirdTriviaQAConfig, self).__init__( + name=name, + description=description, + version=tfds.core.Version("1.1.1"), + **kwargs) + self.unfiltered = False + self.exclude_context = False + + def configure(self, + sentencepiece_model_path, + sequence_length, + stride, + global_sequence_length=None): + """Configures additional user-specified arguments.""" + self.sentencepiece_model_path = sentencepiece_model_path + self.sequence_length = sequence_length + self.stride = stride + if global_sequence_length is None and sequence_length is not None: + self.global_sequence_length = sequence_length // 16 + 64 + else: + self.global_sequence_length = global_sequence_length + logging.info( + """ + global_sequence_length: %s + sequence_length: %s + stride: %s + sentencepiece_model_path: %s""", + self.global_sequence_length, self.sequence_length, + self.stride, self.sentencepiece_model_path) + + def validate(self): + """Validates that user specifies valid arguments.""" + if self.sequence_length is None: + raise ValueError("sequence_length must be specified for BigBird.") + if self.stride is None: + raise ValueError("stride must be specified for BigBird.") + if self.sentencepiece_model_path is None: + raise ValueError( + "sentencepiece_model_path must be specified for BigBird.") + + +def filter_files_for_big_bird(files): + filtered_files = [f for f in files if os.path.basename(f).startswith("wiki")] + assert len(filtered_files) == 1, "There should only be one wikipedia file." + return filtered_files + + +class TriviaQA(tfds.core.BeamBasedBuilder): + """TriviaQA is a reading comprehension dataset. + + It containss over 650K question-answer-evidence triples. + """ + name = "bigbird_trivia_qa" + BUILDER_CONFIGS = [ + BigBirdTriviaQAConfig(), + TriviaQAConfig(unfiltered=False, exclude_context=False), # rc + TriviaQAConfig(unfiltered=False, exclude_context=True), # rc.nocontext + TriviaQAConfig(unfiltered=True, exclude_context=False), # unfiltered + TriviaQAConfig(unfiltered=True, exclude_context=True), + # unfilered.nocontext + ] + + def __init__(self, + *, + sentencepiece_model_path=None, + sequence_length=None, + stride=None, + global_sequence_length=None, + **kwargs): + super(TriviaQA, self).__init__(**kwargs) + if isinstance(self.builder_config, BigBirdTriviaQAConfig): + self.builder_config.configure( + sentencepiece_model_path=sentencepiece_model_path, + sequence_length=sequence_length, + stride=stride, + global_sequence_length=global_sequence_length) + + def _info(self): + if isinstance(self.builder_config, BigBirdTriviaQAConfig): + return tfds.core.DatasetInfo( + builder=self, + description=_DESCRIPTION, + supervised_keys=None, + homepage="http://nlp.cs.washington.edu/triviaqa/", + citation=_CITATION, + features=tfds.features.FeaturesDict({ + "id": tfds.features.Text(), + "qid": tfds.features.Text(), + "question": tfds.features.Text(), + "context": tfds.features.Text(), + # Sequence features. + "token_ids": tfds.features.Tensor(shape=(None,), dtype=tf.int64), + "token_offsets": + tfds.features.Tensor(shape=(None,), dtype=tf.int64), + "segment_ids": + tfds.features.Tensor(shape=(None,), dtype=tf.int64), + "global_token_ids": + tfds.features.Tensor(shape=(None,), dtype=tf.int64), + # Start and end indices (inclusive). + "answers": + tfds.features.Tensor(shape=(None, 2), dtype=tf.int64), + })) + + return tfds.core.DatasetInfo( + builder=self, + description=_DESCRIPTION, + features=tfds.features.FeaturesDict({ + "question": + tfds.features.Text(), + "question_id": + tfds.features.Text(), + "question_source": + tfds.features.Text(), + "entity_pages": + tfds.features.Sequence({ + "doc_source": + tfds.features.Text(), + "filename": + tfds.features.Text(), + "title": + tfds.features.Text(), + "wiki_context": + tfds.features.Text(), + }), + "search_results": + tfds.features.Sequence({ + "description": + tfds.features.Text(), + "filename": + tfds.features.Text(), + "rank": + tf.int32, + "title": + tfds.features.Text(), + "url": + tfds.features.Text(), + "search_context": + tfds.features.Text(), + }), + "answer": + tfds.features.FeaturesDict({ + "aliases": + tfds.features.Sequence(tfds.features.Text()), + "normalized_aliases": + tfds.features.Sequence(tfds.features.Text()), + "matched_wiki_entity_name": + tfds.features.Text(), + "normalized_matched_wiki_entity_name": + tfds.features.Text(), + "normalized_value": + tfds.features.Text(), + "type": + tfds.features.Text(), + "value": + tfds.features.Text(), + }), + }), + + supervised_keys=None, + homepage="http://nlp.cs.washington.edu/triviaqa/", + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + """Returns SplitGenerators.""" + cfg = self.builder_config + download_urls = dict() + if not (cfg.unfiltered and cfg.exclude_context): + download_urls["rc"] = _DOWNLOAD_URL_TMPL.format("rc") + if cfg.unfiltered: + download_urls["unfiltered"] = _DOWNLOAD_URL_TMPL.format("unfiltered") + file_paths = dl_manager.download_and_extract(download_urls) + + qa_dir = ( + os.path.join(file_paths["unfiltered"], "triviaqa-unfiltered") + if cfg.unfiltered else + os.path.join(file_paths["rc"], "qa")) + train_files = tf.io.gfile.glob(os.path.join(qa_dir, _TRAIN_FILE_FORMAT)) + valid_files = tf.io.gfile.glob( + os.path.join(qa_dir, _VALIDATION_FILE_FORMAT)) + test_files = tf.io.gfile.glob(os.path.join(qa_dir, _TEST_FILE_FORMAT)) + + if cfg.exclude_context: + web_evidence_dir = None + wiki_evidence_dir = None + else: + web_evidence_dir = os.path.join(file_paths["rc"], _WEB_EVIDENCE_DIR) + wiki_evidence_dir = os.path.join(file_paths["rc"], _WIKI_EVIDENCE_DIR) + + if isinstance(cfg, BigBirdTriviaQAConfig): + train_files = filter_files_for_big_bird(train_files) + valid_files = filter_files_for_big_bird(valid_files) + test_files = filter_files_for_big_bird(test_files) + + return [ + tfds.core.SplitGenerator( + name=tfds.Split.TRAIN, + gen_kwargs={"files": train_files, + "web_dir": web_evidence_dir, + "wiki_dir": wiki_evidence_dir, + "answer": True}), + tfds.core.SplitGenerator( + name=tfds.Split.VALIDATION, + gen_kwargs={"files": valid_files, + "web_dir": web_evidence_dir, + "wiki_dir": wiki_evidence_dir, + "answer": True}), + tfds.core.SplitGenerator( + name=tfds.Split.TEST, + gen_kwargs={"files": test_files, + "web_dir": web_evidence_dir, + "wiki_dir": wiki_evidence_dir, + "answer": False}), + ] + + def _build_pcollection(self, pipeline, files, web_dir, wiki_dir, answer): + if isinstance(self.builder_config, BigBirdTriviaQAConfig): + self.builder_config.validate() + question_answers = preprocess.read_question_answers(files[0]) + return preprocess.make_pipeline( + pipeline, + question_answers=question_answers, + answer=answer, + max_num_tokens=self.builder_config.sequence_length, + max_num_global_tokens=self.builder_config.global_sequence_length, + stride=self.builder_config.stride, + sentencepiece_model_path=self.builder_config.sentencepiece_model_path, + wikipedia_dir=wiki_dir, + web_dir=web_dir) + + parse_example_fn = functools.partial(parse_example, + self.builder_config.exclude_context, + web_dir, wiki_dir) + return (pipeline + | beam.Create(files) + | beam.ParDo(ReadQuestions()) + | beam.Reshuffle() + | beam.Map(parse_example_fn)) + + +class ReadQuestions(beam.DoFn): + """Read questions from JSON.""" + + def process(self, file): + with tf.io.gfile.GFile(file) as f: + data = json.load(f) + for question in data["Data"]: + example = {"SourceFile": os.path.basename(file)} + example.update(question) + yield example + + +def parse_example(exclude_context, web_dir, wiki_dir, article): + """Return a single example from an article JSON record.""" + + def _strip(collection): + return [item.strip() for item in collection] + + if "Answer" in article: + answer = article["Answer"] + answer_dict = { + "aliases": + _strip(answer["Aliases"]), + "normalized_aliases": + _strip(answer["NormalizedAliases"]), + "matched_wiki_entity_name": + answer.get("MatchedWikiEntryName", "").strip(), + "normalized_matched_wiki_entity_name": + answer.get("NormalizedMatchedWikiEntryName", "").strip(), + "normalized_value": + answer["NormalizedValue"].strip(), + "type": + answer["Type"].strip(), + "value": + answer["Value"].strip(), + } + else: + answer_dict = { + "aliases": [], + "normalized_aliases": [], + "matched_wiki_entity_name": "", + "normalized_matched_wiki_entity_name": "", + "normalized_value": "", + "type": "", + "value": "", + } + + if exclude_context: + article["SearchResults"] = [] + article["EntityPages"] = [] + + def _add_context(collection, context_field, file_dir): + """Adds context from file, or skips if file does not exist.""" + new_items = [] + for item in collection: + if "Filename" not in item: + logging.info("Missing context 'Filename', skipping.") + continue + + new_item = item.copy() + fname = item["Filename"] + try: + with tf.io.gfile.GFile(os.path.join(file_dir, fname)) as f: + new_item[context_field] = f.read() + except (IOError, tf.errors.NotFoundError): + logging.info("File does not exist, skipping: %s", fname) + continue + new_items.append(new_item) + return new_items + + def _strip_if_str(v): + return v.strip() if isinstance(v, six.string_types) else v + + def _transpose_and_strip_dicts(dicts, field_names): + return { + tfds.core.naming.camelcase_to_snakecase(k): + [_strip_if_str(d[k]) for d in dicts] for k in field_names + } + + search_results = _transpose_and_strip_dicts( + _add_context(article.get("SearchResults", []), "SearchContext", web_dir), + ["Description", "Filename", "Rank", "Title", "Url", "SearchContext"]) + + entity_pages = _transpose_and_strip_dicts( + _add_context(article.get("EntityPages", []), "WikiContext", wiki_dir), + ["DocSource", "Filename", "Title", "WikiContext"]) + + question = article["Question"].strip() + question_id = article["QuestionId"] + question_source = article["QuestionSource"].strip() + + return f"{article['SourceFile']}_{question_id}", { + "entity_pages": entity_pages, + "search_results": search_results, + "question": question, + "question_id": question_id, + "question_source": question_source, + "answer": answer_dict, + } diff --git a/official/nlp/projects/triviaqa/download_and_prepare.py b/official/nlp/projects/triviaqa/download_and_prepare.py new file mode 100644 index 0000000000000000000000000000000000000000..98b3e4befd41d74e0f2042636ca9c97afaae2eca --- /dev/null +++ b/official/nlp/projects/triviaqa/download_and_prepare.py @@ -0,0 +1,72 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Downloads and prepares TriviaQA dataset.""" +from unittest import mock + +from absl import app +from absl import flags +from absl import logging +import apache_beam as beam +import tensorflow_datasets as tfds + +from official.nlp.projects.triviaqa import dataset # pylint: disable=unused-import + +flags.DEFINE_integer('sequence_length', 4096, 'Max number of tokens.') + +flags.DEFINE_integer( + 'global_sequence_length', None, + 'Max number of question tokens plus sentences. If not set, defaults to ' + 'sequence_length // 16 + 64.') + +flags.DEFINE_integer( + 'stride', 3072, + 'For documents longer than `sequence_length`, where to split them.') + +flags.DEFINE_string( + 'sentencepiece_model_path', None, + 'SentencePiece model to use for tokenization.') + +flags.DEFINE_string('data_dir', None, 'Data directory for TFDS.') + +flags.DEFINE_string('runner', 'DirectRunner', 'Beam runner to use.') + +FLAGS = flags.FLAGS + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + builder = tfds.builder( + 'bigbird_trivia_qa/rc_wiki.preprocessed', + data_dir=FLAGS.data_dir, + sentencepiece_model_path=FLAGS.sentencepiece_model_path, + sequence_length=FLAGS.sequence_length, + global_sequence_length=FLAGS.global_sequence_length, + stride=FLAGS.stride) + download_config = tfds.download.DownloadConfig( + beam_options=beam.options.pipeline_options.PipelineOptions(flags=[ + f'--runner={FLAGS.runner}', + '--direct_num_workers=8', + '--direct_running_mode=multi_processing', + ])) + with mock.patch('tensorflow_datasets.core.download.extractor._normpath', + new=lambda x: x): + builder.download_and_prepare(download_config=download_config) + logging.info(builder.info.splits) + + +if __name__ == '__main__': + flags.mark_flag_as_required('sentencepiece_model_path') + app.run(main) diff --git a/official/nlp/projects/triviaqa/evaluate.py b/official/nlp/projects/triviaqa/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..2afdeacd903fa67655b77cc394de6c86bec5e7d5 --- /dev/null +++ b/official/nlp/projects/triviaqa/evaluate.py @@ -0,0 +1,48 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Evalutes TriviaQA predictions.""" +import json + +from absl import app +from absl import flags +from absl import logging +import tensorflow as tf + +from official.nlp.projects.triviaqa import evaluation + +flags.DEFINE_string('gold_path', None, + 'Path to golden validation, i.e. wikipedia-dev.json.') + +flags.DEFINE_string('predictions_path', None, + 'Path to predictions in JSON format') + +FLAGS = flags.FLAGS + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + with tf.io.gfile.GFile(FLAGS.gold_path) as f: + ground_truth = { + datum['QuestionId']: datum['Answer'] for datum in json.load(f)['Data'] + } + with tf.io.gfile.GFile(FLAGS.predictions_path) as f: + predictions = json.load(f) + logging.info(evaluation.evaluate_triviaqa(ground_truth, predictions)) + + +if __name__ == '__main__': + flags.mark_flag_as_required('predictions_path') + app.run(main) diff --git a/official/nlp/projects/triviaqa/evaluation.py b/official/nlp/projects/triviaqa/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..fb987f4cce3656bac3cb28504d795228ab484f42 --- /dev/null +++ b/official/nlp/projects/triviaqa/evaluation.py @@ -0,0 +1,168 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Official evaluation script for v1.0 of the TriviaQA dataset. + +Forked from +https://github.com/mandarjoshi90/triviaqa/blob/master/evaluation/triviaqa_evaluation.py. +Modifications are removal of main function. +""" +import collections +import re +import string +import sys + + +def normalize_answer(s): + """Lower text and remove punctuation, articles and extra whitespace.""" + + def remove_articles(text): + return re.sub(r'\b(a|an|the)\b', ' ', text) + + def white_space_fix(text): + return ' '.join(text.split()) + + def handle_punc(text): + exclude = set(string.punctuation + ''.join([u'‘', u'’', u'´', u'`'])) + return ''.join(ch if ch not in exclude else ' ' for ch in text) + + def lower(text): + return text.lower() + + def replace_underscore(text): + return text.replace('_', ' ') + + return white_space_fix( + remove_articles(handle_punc(lower(replace_underscore(s))))).strip() + + +def f1_score(prediction, ground_truth): + prediction_tokens = normalize_answer(prediction).split() + ground_truth_tokens = normalize_answer(ground_truth).split() + common = ( + collections.Counter(prediction_tokens) + & collections.Counter(ground_truth_tokens)) + num_same = sum(common.values()) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(prediction_tokens) + recall = 1.0 * num_same / len(ground_truth_tokens) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + + +def exact_match_score(prediction, ground_truth): + return normalize_answer(prediction) == normalize_answer(ground_truth) + + +def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): + scores_for_ground_truths = [] + for ground_truth in ground_truths: + score = metric_fn(prediction, ground_truth) + scores_for_ground_truths.append(score) + return max(scores_for_ground_truths) + + +def is_exact_match(answer_object, prediction): + ground_truths = get_ground_truths(answer_object) + for ground_truth in ground_truths: + if exact_match_score(prediction, ground_truth): + return True + return False + + +def has_exact_match(ground_truths, candidates): + for ground_truth in ground_truths: + if ground_truth in candidates: + return True + return False + + +def get_ground_truths(answer): + return answer['NormalizedAliases'] + [ + normalize_answer(ans) for ans in answer.get('HumanAnswers', []) + ] + + +def get_oracle_score(ground_truth, + predicted_answers, + qid_list=None, + mute=False): + exact_match = common = 0 + if qid_list is None: + qid_list = ground_truth.keys() + for qid in qid_list: + if qid not in predicted_answers: + if not mute: + message = 'Irrelavant question {} will receive score 0.'.format(qid) + print(message, file=sys.stderr) + continue + common += 1 + prediction = normalize_answer(predicted_answers[qid]) + ground_truths = get_ground_truths(ground_truth[qid]) + em_for_this_question = has_exact_match(ground_truths, prediction) + exact_match += int(em_for_this_question) + + exact_match = 100.0 * exact_match / len(qid_list) + + return { + 'oracle_exact_match': exact_match, + 'common': common, + 'denominator': len(qid_list), + 'pred_len': len(predicted_answers), + 'gold_len': len(ground_truth) + } + + +def evaluate_triviaqa(ground_truth, + predicted_answers, + qid_list=None, + mute=False): + f1 = exact_match = common = 0 + if qid_list is None: + qid_list = ground_truth.keys() + for qid in qid_list: + if qid not in predicted_answers: + if not mute: + message = 'Missed question {} will receive score 0.'.format(qid) + print(message, file=sys.stderr) + continue + if qid not in ground_truth: + if not mute: + message = 'Irrelavant question {} will receive score 0.'.format(qid) + print(message, file=sys.stderr) + continue + common += 1 + prediction = predicted_answers[qid] + ground_truths = get_ground_truths(ground_truth[qid]) + em_for_this_question = metric_max_over_ground_truths( + exact_match_score, prediction, ground_truths) + if em_for_this_question == 0 and not mute: + print('em=0:', prediction, ground_truths) + exact_match += em_for_this_question + f1_for_this_question = metric_max_over_ground_truths( + f1_score, prediction, ground_truths) + f1 += f1_for_this_question + + exact_match = 100.0 * exact_match / len(qid_list) + f1 = 100.0 * f1 / len(qid_list) + + return { + 'exact_match': exact_match, + 'f1': f1, + 'common': common, + 'denominator': len(qid_list), + 'pred_len': len(predicted_answers), + 'gold_len': len(ground_truth) + } diff --git a/official/nlp/projects/triviaqa/inputs.py b/official/nlp/projects/triviaqa/inputs.py new file mode 100644 index 0000000000000000000000000000000000000000..cd9d414b545afe78dcba83688f528b497840bf3e --- /dev/null +++ b/official/nlp/projects/triviaqa/inputs.py @@ -0,0 +1,548 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Input processing for TriviaQA.""" +import os +from typing import Optional, Text, Union + +import tensorflow as tf +import tensorflow_datasets as tfds + +from official.modeling import tf_utils +from official.nlp.projects.triviaqa import dataset # pylint: disable=unused-import + + +def _flatten_dims(tensor: tf.Tensor, + first_dim: Optional[int] = 0, + last_dim: Optional[int] = -1, + name: Optional[Text] = None) -> tf.Tensor: + """Flattens the given span of dimensions in `tensor`. + + Args: + tensor: [..., first_dim_size, ...middle_dims..., last_dim_size, ...] shaped + Tensor. + first_dim: The first dimension to flatten (inclusive). Must be a valid index + for the rank of `tensor`. Default is 0. + last_dim: The last dimension to flatten (inclusive). Must be a valid index + for the rank of `tensor`. Default is -1. + name: A name for the operation (optional). + + Returns: + Tensor of shape [..., flattened_dim_size, ...] where + flattened_dim_size = first_dim_size * ...middle_dims... * last_dim_size. + """ + with tf.name_scope(name or 'flatten_dims'): + tensor = tf.convert_to_tensor(tensor) + + rank = tensor.shape.rank + if rank is None: + raise ValueError('Static rank of `tensor` must be known.') + if first_dim < 0: + first_dim += rank + if first_dim < 0 or first_dim >= rank: + raise ValueError('`first_dim` out of bounds for `tensor` rank.') + if last_dim < 0: + last_dim += rank + if last_dim < 0 or last_dim >= rank: + raise ValueError('`last_dim` out of bounds for `tensor` rank.') + if first_dim > last_dim: + raise ValueError('`first_dim` must not be larger than `last_dim`.') + + # Try to calculate static flattened dim size if all input sizes to flatten + # are statically known. Otherwise, just use -1. + flat_dims_shape = tensor.shape[first_dim:(last_dim + 1)].as_list() + flattened_dim_size = 1 + for size in flat_dims_shape: + if size is None: + flattened_dim_size = -1 + break + flattened_dim_size *= size + + old_shape = tf.shape(tensor) + output_shape = tf.concat([ + old_shape[:first_dim], [flattened_dim_size], old_shape[(last_dim + 1):] + ], 0) + return tf.reshape(tensor, output_shape) + + +def _pad_to_multiple(tensor: tf.Tensor, + factor: Union[int, tf.Tensor], + axis: int, + mode: Optional[Text] = 'CONSTANT', + constant_values=0, + name: Optional[Text] = None) -> tf.Tensor: + """Pads `tensor` on a given `axis` to be a multiple of `factor`. + + Padding will be concatenated to the end of the axis only, not the beginning. + If the length along `axis` is already a multiple of `factor`, this is + effectively a no-op. + + Args: + tensor: A Tensor with rank >= 1 to pad. + factor: Positive integer factor to pad for. If a Tensor, must be a scalar + int. + axis: A valid axis in `tensor` to pad. + mode: The padding mode to use according to `tf.pad`. Defaults to 'CONSTANT'. + constant_values: For 'CONSTANT' mode, the scalar pad value to use within + `tf.pad`. Defaults to 0. Must be same type as `tensor`. + name: A name for the operation (optional). + + Returns: + The padded Tensor result. + """ + with tf.name_scope(name or 'pad_to_multiple'): + tensor = tf.convert_to_tensor(tensor) + + if isinstance(factor, int) and factor < 1: + raise ValueError('`factor` must be positive.') + rank = tensor.shape.rank + if rank is None: + raise ValueError('Static rank of `tensor` must be known.') + if axis < 0: + axis += rank + if axis < 0 or axis >= rank: + raise ValueError('`axis` out of bounds for `tensor` rank.') + + axis_len = tf_utils.get_shape_list(tensor)[axis] + pad_len = -axis_len % factor + paddings = pad_len * tf.one_hot([-1, axis], rank, axis=0, dtype=tf.int32) + return tf.pad( + tensor=tensor, + paddings=paddings, + mode=mode, + constant_values=constant_values) + + +def _skew_elements_right(tensor: tf.Tensor, + axis: int, + pad_value=0, + name: Optional[Text] = None) -> tf.Tensor: + """Skews successive elements right along the given `axis`. + + This changes an input like + [ + [1, 2, 3], + [4, 5, 6], + [7, 8, 9] + ] + into the following: + [ + [1, 2, 3, 0, 0], + [0, 4, 5, 6, 0], + [0, 0, 7, 8, 9] + ] + + Args: + tensor: Tensor of shape [..., num_rows, axis_len, ...]. + axis: A valid axis in `tensor` to skew along. It must not be the first axis + in `tensor`. + pad_value: The scalar pad value to use. Defaults to 0. Must be the same type + as `tensor`. + name: A name for the operation (optional). + + Returns: + Tensor of shape [..., num_rows, axis_len + num_rows - 1, ...]. + """ + with tf.name_scope(name or 'skew_elements_right'): + tensor = tf.convert_to_tensor(tensor) + + rank = tensor.shape.rank + num_rows = tf_utils.get_shape_list(tensor)[axis - 1] + axis_len = tf_utils.get_shape_list(tensor)[axis] + + if rank is None: + raise ValueError('Static rank of `tensor` must be known.') + if axis < 0: + axis += rank + if axis <= 0 or axis >= rank: + raise ValueError('`axis` out of bounds for `tensor` rank.') + + output_len = axis_len + num_rows - 1 + + paddings = num_rows * tf.one_hot([-1, axis], rank, axis=0, dtype=tf.int32) + + # [..., num_rows, axis_len + num_rows, ...] + padded_tensor = tf.pad(tensor, paddings, constant_values=pad_value) + + # [..., num_rows * (axis_len + num_rows), ...] + flat_tensor = _flatten_dims( + padded_tensor, first_dim=axis - 1, last_dim=axis) + + padded_tensor2 = _pad_to_multiple( + flat_tensor, + factor=output_len, + axis=axis - 1, + constant_values=pad_value) + + # [..., num_rows + 1, output_len, ...] + new_shape = tf.concat([ + tf.shape(tensor)[:(axis - 1)], [num_rows + 1, output_len], + tf.shape(tensor)[(axis + 1):] + ], 0) + reshaped_tensor = tf.reshape(padded_tensor2, new_shape) + + # [..., num_rows, output_len, ...] + output_shape = new_shape - tf.one_hot(axis - 1, depth=rank, dtype=tf.int32) + return tf.slice( + reshaped_tensor, begin=tf.zeros_like(output_shape), size=output_shape) + + +class RelativePositionGenerator(object): + """Generates `relative_att_ids` for purely distance-based relative positions. + + This implements the clipped relative position representations originally + described in https://arxiv.org/abs/1803.02155 . + + Attributes: + max_distance: Integer passed from `__init__`. + ignore_direction: Bool passed from `__init__`. + relative_vocab_size: Integer representing the maximum number of unique ids + output from this generator. + left_pad_value: Integer id for all positions at or beyond max_distance to + the left. + right_pad_value: Integer id for all positions at or beyond max_distance to + the right. + """ + + def __init__(self, max_distance: int, ignore_direction: bool = False): + """Init. + + Args: + max_distance: The maximum distance to represent. Must not be negative. All + larger distances will be clipped to this value. + ignore_direction: If True, both left and right position representations + will have the same ids based on absolute distance (resulting in + symmetric ids around the center token). + """ + if max_distance < 0: + raise ValueError('`max_distance` must not be negative.') + self.max_distance = max_distance + self.ignore_direction = ignore_direction + + self.right_pad_value = max_distance + self.left_pad_value = max_distance if ignore_direction else 2 * max_distance + + # 0 is the first id, so vocab size is 1 + the largest id (left pad value). + self.relative_vocab_size = self.left_pad_value + 1 + + def make_relative_att_ids(self, + seq_len: Union[int, tf.Tensor], + batch_size: Optional[Union[int, tf.Tensor]] = 1, + name: Optional[Text] = None) -> tf.Tensor: + """Makes relative position ids for full self-attention. + + For example, if `max_distance` is 3, `ignore_direction` is False, `seq_len` + is 6, and `batch_size` is 1, the result is the following: + [[ + [0, 1, 2, 3, 3, 3], + [4, 0, 1, 2, 3, 3], + [5, 4, 0, 1, 2, 3], + [6, 5, 4, 0, 1, 2], + [6, 6, 5, 4, 0, 1], + [6, 6, 6, 5, 4, 0], + ]] + + Args: + seq_len: The sequence length to create ids for. Must be positive. If a + Tensor, must be a scalar int. + batch_size: The batch size of the result (default 1). Must be positive. If + a Tensor, must be a scalar int. All examples in the batch will have the + same id pattern. + name: A name for the operation (optional). + + Returns: + [batch_size, seq_len, seq_len] Tensor of relative position ids. + """ + with tf.name_scope(name or 'make_relative_att_ids'): + if isinstance(seq_len, int) and seq_len < 1: + raise ValueError('`seq_len` must be positive.') + if isinstance(batch_size, int) and batch_size < 1: + raise ValueError('`batch_size` must be positive.') + + # We need the id_pattern to cover all tokens to the left of the last token + # and all tokens to the right of the first token at the same time. + window_size = 2 * seq_len - 1 + + # [window_size] + id_pattern = self._make_relative_id_pattern(window_size) + + # [seq_len, window_size] + id_tensor = tf.tile(id_pattern[tf.newaxis, :], [seq_len, 1]) + + # [seq_len, window_size + seq_len - 1] + id_tensor = _skew_elements_right(id_tensor, -1) + + # [seq_len, seq_len] + id_tensor = tf.slice(id_tensor, [0, seq_len - 1], [seq_len, seq_len]) + + return tf.tile(id_tensor[tf.newaxis, :, :], [batch_size, 1, 1]) + + def make_local_relative_att_ids(self, + seq_len: Union[int, tf.Tensor], + local_radius: int, + batch_size: Optional[Union[int, + tf.Tensor]] = 1, + name: Optional[Text] = None) -> tf.Tensor: + """Makes relative position ids for local self-attention. + + The result can be used as `relative_att_ids` in + `layers.RelativeLocalSelfAttention`. + + For example, if `max_distance` is 3, `ignore_direction` is False, `seq_len` + is 4, `local_radius` is 5, and `batch_size` is 1, the result is the + following: + [[ + [6, 6, 6, 5, 4, 0, 1, 2, 3, 3, 3], + [6, 6, 6, 5, 4, 0, 1, 2, 3, 3, 3], + [6, 6, 6, 5, 4, 0, 1, 2, 3, 3, 3], + [6, 6, 6, 5, 4, 0, 1, 2, 3, 3, 3], + ]] + + Args: + seq_len: The sequence length to create ids for. Must be positive. If a + Tensor, must be a scalar int. + local_radius: The local radius as expected by + `layers.RelativeLocalSelfAttention`. Must be positive. + batch_size: The batch size of the result (default 1). Must be positive. If + a Tensor, must be a scalar int. All examples in the batch will have the + same id pattern. + name: A name for the operation (optional). + + Returns: + [batch_size, seq_len, 2*local_radius + 1] Tensor of relative + position ids. + """ + with tf.name_scope(name or 'make_local_relative_att_ids'): + if isinstance(seq_len, int) and seq_len < 1: + raise ValueError('`seq_len` must be positive.') + if local_radius < 1: + raise ValueError('`local_radius` must be positive.') + if isinstance(batch_size, int) and batch_size < 1: + raise ValueError('`batch_size` must be positive.') + + window_size = 2 * local_radius + 1 + + # [window_size] + id_pattern = self._make_relative_id_pattern(window_size) + + return tf.tile(id_pattern[tf.newaxis, tf.newaxis, :], + [batch_size, seq_len, 1]) + + def _make_relative_id_pattern( + self, window_size: Union[int, tf.Tensor]) -> tf.Tensor: + """Helper for making the relative id pattern for a particular window size. + + For example, if `max_distance` is 3, `ignore_direction` is False, and + `window_size` is 11, the result is the following: + [6, 6, 6, 5, 4, 0, 1, 2, 3, 3, 3]. + + Args: + window_size: Window size to return relative ids for. Must be positive and + odd since ids will be relative to the center of the window. If a Tensor, + must be a scalar int. + + Returns: + [window_size] Tensor of relative position ids. + """ + if isinstance(window_size, int): + if window_size < 1: + raise ValueError('`window_size` must be positive.') + if window_size % 2 != 1: + raise ValueError('`window_size` must be odd.') + + x = tf.range(self.max_distance + 1, dtype=tf.int32) + x = tf.pad(x, [[self.max_distance, 0]], mode='REFLECT') + if not self.ignore_direction: + direction_adder = tf.concat([ + tf.fill([self.max_distance], self.max_distance), + tf.zeros([self.max_distance + 1], dtype=tf.int32) + ], 0) + x += direction_adder + + len_x = x.shape.as_list()[0] + if len_x > window_size: + trim_amount = (len_x - window_size) // 2 + return x[trim_amount:-trim_amount] + + pad_amount = (window_size - len_x) // 2 + result = tf.pad(x, [[pad_amount, 0]], constant_values=self.left_pad_value) + result = tf.pad( + result, [[0, pad_amount]], constant_values=self.right_pad_value) + return result + + +def read_batches(data_dir, + split, + batch_size, + include_answers=True, + shuffle=False, + drop_final_batch=False, + compression_type=''): + """Read TriviaQA batches.""" + features = { + 'id': tf.io.FixedLenFeature([], tf.string), + 'qid': tf.io.FixedLenFeature([], tf.string), + 'context': tf.io.FixedLenFeature([], tf.string), + 'question': tf.io.FixedLenFeature([], tf.string), + 'global_token_ids': tf.io.RaggedFeature(tf.int64), + 'token_ids': tf.io.RaggedFeature(tf.int64), + 'segment_ids': tf.io.RaggedFeature(tf.int64), + 'token_offsets': tf.io.RaggedFeature(tf.int64), + } + if include_answers: + features['answers'] = tf.io.RaggedFeature( + tf.int64, partitions=(tf.io.RaggedFeature.UniformRowLength(2),)) # pytype: disable=attribute-error + + dataset_builder = tfds.builder( + 'bigbird_trivia_qa/rc_wiki.preprocessed', data_dir=data_dir) + split_info = dataset_builder.info.splits[split] + return tf.data.experimental.make_batched_features_dataset( + [ + os.path.join(dataset_builder.data_dir, filename) + for filename in split_info.filenames + ], + batch_size=batch_size, + features=features, + reader=lambda path: tf.data.TFRecordDataset(path, compression_type), + label_key='answers' if include_answers else None, + num_epochs=1, + shuffle=shuffle, + shuffle_buffer_size=split_info.num_examples, + prefetch_buffer_size=tf.data.experimental.AUTOTUNE, + sloppy_ordering=True, + drop_final_batch=drop_final_batch, + reader_num_threads=8, + parser_num_threads=16) + + +def scatter_labels(labels, batch_size, sequence_length): + """Create one hot labels.""" + row_ids = labels.value_rowids() + indices = tf.concat( + (tf.stack((row_ids, tf.cast(labels.flat_values[:, 0], + tf.int32), tf.zeros_like(row_ids)), -1), + tf.stack((row_ids, tf.cast(labels.flat_values[:, 1], + tf.int32), tf.ones_like(row_ids)), -1)), 0) + one_hot_labels = tf.scatter_nd(indices, + tf.ones(tf.shape(indices)[0], tf.float32), + (batch_size, sequence_length, 2)) + return tf.minimum(one_hot_labels, 1.) + + +def features_map_fn(features, local_radius, relative_pos_max_distance, + use_hard_g2l_mask, padding_id, eos_id, null_id, cls_id, + sep_id, sequence_length, global_sequence_length): + """Make features.""" + batch_size = tf.get_static_value(features['token_ids'].shape[0]) + # sequence_lengths = features['token_ids'].row_lengths() + question_lengths = tf.argmax( + tf.equal(features['token_ids'].to_tensor( + shape=(batch_size, global_sequence_length)), sep_id), -1) + 1 + mapped_features = dict( + token_ids=tf.cast( + features['token_ids'].to_tensor(shape=(batch_size, sequence_length)), + tf.int32), + global_token_ids=tf.cast( + features['global_token_ids'].to_tensor( + shape=(batch_size, global_sequence_length)), tf.int32), + segment_ids=tf.cast( + features['segment_ids'].to_tensor( + shape=(batch_size, sequence_length)), tf.int32), + ) + relative_pos_generator = RelativePositionGenerator( + max_distance=relative_pos_max_distance) + # Only do long-to-long attention for non-null tokens. + # Let the null token attend to itself. + l2l_att_mask = tf.ones((batch_size, sequence_length, 2 * local_radius + 1), + tf.int32) + l2l_att_mask *= 1 - tf.cast( + tf.logical_or( + tf.equal(mapped_features['token_ids'], padding_id), + tf.equal(mapped_features['token_ids'], null_id)), + tf.int32)[:, :, tf.newaxis] + l2l_relative_att_ids = relative_pos_generator.make_local_relative_att_ids( + seq_len=sequence_length, local_radius=local_radius, batch_size=batch_size) + # + l2g_att_mask = tf.ones((batch_size, sequence_length, global_sequence_length), + tf.int32) + l2g_att_mask *= tf.cast( + tf.not_equal(mapped_features['token_ids'], padding_id), + tf.int32)[:, :, tf.newaxis] + l2g_att_mask *= tf.cast( + tf.not_equal(mapped_features['global_token_ids'], padding_id), + tf.int32)[:, tf.newaxis, :] + l2g_relative_att_ids = tf.fill( + (batch_size, sequence_length, global_sequence_length), + relative_pos_generator.relative_vocab_size + 1) + # + g2g_att_mask = tf.ones( + (batch_size, global_sequence_length, global_sequence_length), tf.int32) + g2g_att_mask *= tf.cast( + tf.not_equal(mapped_features['global_token_ids'], padding_id), + tf.int32)[:, :, tf.newaxis] + g2g_relative_att_ids = relative_pos_generator.make_relative_att_ids( + seq_len=global_sequence_length, batch_size=batch_size) + global_sentence_mask = tf.equal(mapped_features['global_token_ids'], eos_id) + global_question_mask = tf.logical_not( + tf.logical_or( + tf.logical_or( + tf.equal(mapped_features['global_token_ids'], cls_id), + tf.equal(mapped_features['global_token_ids'], eos_id)), + tf.equal(mapped_features['global_token_ids'], padding_id))) + g2g_question_mask = tf.logical_and(global_question_mask[:, tf.newaxis, :], + global_question_mask[:, :, tf.newaxis]) + g2g_sentence_mask = tf.logical_and(global_sentence_mask[:, tf.newaxis, :], + global_sentence_mask[:, :, tf.newaxis]) + g2g_local_mask = tf.cast( + tf.logical_or(g2g_question_mask, g2g_sentence_mask), tf.int32) + g2g_relative_att_ids *= g2g_local_mask + g2g_relative_att_ids += (1 - g2g_local_mask) * ( + relative_pos_generator.relative_vocab_size + 2) + # + g2l_att_mask = tf.transpose(l2g_att_mask, [0, 2, 1]) + if use_hard_g2l_mask: + global_range = tf.range( + global_sequence_length, dtype=mapped_features['global_token_ids'].dtype) + g2l_att_mask *= tf.cast( + tf.logical_or( + tf.equal( + mapped_features['global_token_ids'], cls_id)[:, :, tf.newaxis], + tf.equal(global_range[tf.newaxis, :, tf.newaxis], + mapped_features['segment_ids'][:, tf.newaxis, :])), + tf.int32) + g2l_relative_att_ids = tf.transpose(l2g_relative_att_ids, [0, 2, 1]) + mapped_features.update( + dict( + l2l_att_mask=l2l_att_mask, + l2l_relative_att_ids=l2l_relative_att_ids, + l2g_att_mask=l2g_att_mask, + l2g_relative_att_ids=l2g_relative_att_ids, + g2g_att_mask=g2g_att_mask, + g2g_relative_att_ids=g2g_relative_att_ids, + g2l_att_mask=g2l_att_mask, + g2l_relative_att_ids=g2l_relative_att_ids, + question_lengths=question_lengths, + )) + return mapped_features + + +def labels_map_fn(token_ids, labels, sequence_length): + batch_size = tf.get_static_value(labels.shape[0]) + row_lengths = labels.row_lengths() + empty_token_index = token_ids.row_lengths() - 1 + one_hot_labels = scatter_labels(labels, batch_size, sequence_length) + one_hot_labels += (tf.cast(row_lengths == 0, tf.float32)[:, tf.newaxis] * + tf.one_hot(empty_token_index, sequence_length))[:, :, + tf.newaxis] + return one_hot_labels diff --git a/official/nlp/projects/triviaqa/modeling.py b/official/nlp/projects/triviaqa/modeling.py new file mode 100644 index 0000000000000000000000000000000000000000..9a2c711352b4248b667ef5c882a5244f84f79de4 --- /dev/null +++ b/official/nlp/projects/triviaqa/modeling.py @@ -0,0 +1,114 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Modeling for TriviaQA.""" +import tensorflow as tf + +from official.modeling import tf_utils +from official.nlp.configs import encoders + + +class TriviaQaHead(tf.keras.layers.Layer): + """Computes logits given token and global embeddings.""" + + def __init__(self, + intermediate_size, + intermediate_activation=tf_utils.get_activation('gelu'), + dropout_rate=0.0, + attention_dropout_rate=0.0, + **kwargs): + super(TriviaQaHead, self).__init__(**kwargs) + self._attention_dropout = tf.keras.layers.Dropout(attention_dropout_rate) + self._intermediate_dense = tf.keras.layers.Dense(intermediate_size) + self._intermediate_activation = tf.keras.layers.Activation( + intermediate_activation) + self._output_dropout = tf.keras.layers.Dropout(dropout_rate) + self._output_layer_norm = tf.keras.layers.LayerNormalization() + self._logits_dense = tf.keras.layers.Dense(2) + + def build(self, input_shape): + output_shape = input_shape['token_embeddings'][-1] + self._output_dense = tf.keras.layers.Dense(output_shape) + super(TriviaQaHead, self).build(input_shape) + + def call(self, inputs, training=None): + token_embeddings = inputs['token_embeddings'] + token_ids = inputs['token_ids'] + question_lengths = inputs['question_lengths'] + x = self._attention_dropout(token_embeddings, training=training) + intermediate_outputs = self._intermediate_dense(x) + intermediate_outputs = self._intermediate_activation(intermediate_outputs) + outputs = self._output_dense(intermediate_outputs) + outputs = self._output_dropout(outputs, training=training) + outputs = self._output_layer_norm(outputs + token_embeddings) + logits = self._logits_dense(outputs) + logits -= tf.expand_dims( + tf.cast(tf.equal(token_ids, 0), tf.float32) + tf.sequence_mask( + question_lengths, logits.shape[-2], dtype=tf.float32), -1) * 1e6 + return logits + + +class TriviaQaModel(tf.keras.Model): + """Model for TriviaQA.""" + + def __init__(self, model_config: encoders.EncoderConfig, sequence_length: int, + **kwargs): + inputs = dict( + token_ids=tf.keras.Input((sequence_length,), dtype=tf.int32), + question_lengths=tf.keras.Input((), dtype=tf.int32)) + encoder = encoders.build_encoder(model_config) + x = encoder( + dict( + input_word_ids=inputs['token_ids'], + input_mask=tf.cast(inputs['token_ids'] > 0, tf.int32), + input_type_ids=1 - + tf.sequence_mask(inputs['question_lengths'], sequence_length, + tf.int32)))['sequence_output'] + logits = TriviaQaHead( + model_config.get().intermediate_size, + dropout_rate=model_config.get().dropout_rate, + attention_dropout_rate=model_config.get().attention_dropout_rate)( + dict( + token_embeddings=x, + token_ids=inputs['token_ids'], + question_lengths=inputs['question_lengths'])) + super(TriviaQaModel, self).__init__(inputs, logits, **kwargs) + self._encoder = encoder + + @property + def encoder(self): + return self._encoder + + +class SpanOrCrossEntropyLoss(tf.keras.losses.Loss): + """Cross entropy loss for multiple correct answers. + + See https://arxiv.org/abs/1710.10723. + """ + + def call(self, y_true, y_pred): + y_pred_masked = y_pred - tf.cast(y_true < 0.5, tf.float32) * 1e6 + or_cross_entropy = ( + tf.math.reduce_logsumexp(y_pred, axis=-2) - + tf.math.reduce_logsumexp(y_pred_masked, axis=-2)) + return tf.math.reduce_sum(or_cross_entropy, -1) + + +def smooth_labels(label_smoothing, labels, question_lengths, token_ids): + mask = 1. - ( + tf.cast(tf.equal(token_ids, 0), tf.float32) + + tf.sequence_mask(question_lengths, labels.shape[-2], dtype=tf.float32)) + num_classes = tf.expand_dims(tf.math.reduce_sum(mask, -1, keepdims=True), -1) + labels = (1. - label_smoothing) * labels + (label_smoothing / num_classes) + return labels * tf.expand_dims(mask, -1) diff --git a/official/nlp/projects/triviaqa/predict.py b/official/nlp/projects/triviaqa/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..bc4f5dad87792f8bb1c80ba4609cbbec526c03c0 --- /dev/null +++ b/official/nlp/projects/triviaqa/predict.py @@ -0,0 +1,185 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TriviaQA script for inference.""" +import collections +import contextlib +import functools +import json +import operator + +from absl import app +from absl import flags +from absl import logging +import tensorflow as tf +import tensorflow_datasets as tfds + +import sentencepiece as spm +from official.nlp.configs import encoders # pylint: disable=unused-import +from official.nlp.projects.triviaqa import evaluation +from official.nlp.projects.triviaqa import inputs +from official.nlp.projects.triviaqa import prediction + +flags.DEFINE_string('data_dir', None, 'TensorFlow Datasets directory.') + +flags.DEFINE_enum('split', None, + [tfds.Split.TRAIN, tfds.Split.VALIDATION, tfds.Split.TEST], + 'For which split to generate predictions.') + +flags.DEFINE_string('predictions_path', None, 'Output for predictions.') + +flags.DEFINE_string('sentencepiece_model_path', None, + 'Path to sentence piece model.') + +flags.DEFINE_integer('bigbird_block_size', 64, + 'Size of blocks for sparse block attention.') + +flags.DEFINE_string('saved_model_dir', None, + 'Path from which to initialize model and weights.') + +flags.DEFINE_integer('sequence_length', 4096, 'Maximum number of tokens.') + +flags.DEFINE_integer('global_sequence_length', 320, + 'Maximum number of global tokens.') + +flags.DEFINE_integer('batch_size', 32, 'Size of batch.') + +flags.DEFINE_string('master', '', 'Address of the TPU master.') + +flags.DEFINE_integer('decode_top_k', 8, + 'Maximum number of tokens to consider for begin/end.') + +flags.DEFINE_integer('decode_max_size', 16, + 'Maximum number of sentence pieces in an answer.') + +FLAGS = flags.FLAGS + + +@contextlib.contextmanager +def worker_context(): + if FLAGS.master: + with tf.device('/job:worker') as d: + yield d + else: + yield + + +def read_sentencepiece_model(path): + with tf.io.gfile.GFile(path, 'rb') as file: + processor = spm.SentencePieceProcessor() + processor.LoadFromSerializedProto(file.read()) + return processor + + +def predict(sp_processor, features_map_fn, logits_fn, decode_logits_fn, + split_and_pad_fn, distribute_strategy, dataset): + """Make predictions.""" + predictions = collections.defaultdict(list) + for _, features in dataset.enumerate(): + token_ids = features['token_ids'] + x = split_and_pad_fn(features_map_fn(features)) + logits = tf.concat( + distribute_strategy.experimental_local_results(logits_fn(x)), 0) + logits = logits[:features['token_ids'].shape[0]] + end_limit = token_ids.row_lengths() - 1 # inclusive + begin, end, scores = decode_logits_fn(logits, end_limit) + answers = prediction.decode_answer(features['context'], begin, end, + features['token_offsets'], + end_limit).numpy() + for j, (qid, token_id, offset, score, answer) in enumerate( + zip(features['qid'].numpy(), + tf.gather(features['token_ids'], begin, batch_dims=1).numpy(), + tf.gather(features['token_offsets'], begin, batch_dims=1).numpy(), + scores, answers)): + if not answer: + logging.info('%s: %s | NO_ANSWER, %f', + features['id'][j].numpy().decode('utf-8'), + features['question'][j].numpy().decode('utf-8'), score) + continue + if sp_processor.IdToPiece(int(token_id)).startswith('▁') and offset > 0: + answer = answer[1:] + logging.info('%s: %s | %s, %f', features['id'][j].numpy().decode('utf-8'), + features['question'][j].numpy().decode('utf-8'), + answer.decode('utf-8'), score) + predictions[qid.decode('utf-8')].append((score, answer.decode('utf-8'))) + predictions = { + qid: evaluation.normalize_answer( + sorted(answers, key=operator.itemgetter(0), reverse=True)[0][1]) + for qid, answers in predictions.items() + } + return predictions + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + # Configure input processing. + sp_processor = read_sentencepiece_model(FLAGS.sentencepiece_model_path) + features_map_fn = tf.function( + functools.partial( + inputs.features_map_fn, + local_radius=FLAGS.bigbird_block_size, + relative_pos_max_distance=24, + use_hard_g2l_mask=True, + sequence_length=FLAGS.sequence_length, + global_sequence_length=FLAGS.global_sequence_length, + padding_id=sp_processor.PieceToId(''), + eos_id=sp_processor.PieceToId(''), + null_id=sp_processor.PieceToId(''), + cls_id=sp_processor.PieceToId(''), + sep_id=sp_processor.PieceToId('')), + autograph=False) + # Connect to TPU cluster. + if FLAGS.master: + resolver = tf.distribute.cluster_resolver.TPUClusterResolver(FLAGS.master) + tf.config.experimental_connect_to_cluster(resolver) + tf.tpu.experimental.initialize_tpu_system(resolver) + strategy = tf.distribute.TPUStrategy(resolver) + else: + strategy = tf.distribute.MirroredStrategy() + # Initialize datasets. + with worker_context(): + _ = tf.random.get_global_generator() + dataset = inputs.read_batches( + FLAGS.data_dir, FLAGS.split, FLAGS.batch_size, include_answers=False) + # Initialize model and compile. + with strategy.scope(): + model = tf.keras.models.load_model(FLAGS.saved_model_dir, compile=False) + logging.info('Model initialized. Beginning prediction loop.') + logits_fn = tf.function( + functools.partial(prediction.distributed_logits_fn, model)) + decode_logits_fn = tf.function( + functools.partial(prediction.decode_logits, FLAGS.decode_top_k, + FLAGS.decode_max_size)) + split_and_pad_fn = tf.function( + functools.partial(prediction.split_and_pad, strategy, FLAGS.batch_size)) + # Prediction strategy. + predict_fn = functools.partial( + predict, + sp_processor=sp_processor, + features_map_fn=features_map_fn, + logits_fn=logits_fn, + decode_logits_fn=decode_logits_fn, + split_and_pad_fn=split_and_pad_fn, + distribute_strategy=strategy, + dataset=dataset) + with worker_context(): + predictions = predict_fn() + with tf.io.gfile.GFile(FLAGS.predictions_path, 'w') as f: + json.dump(predictions, f) + + +if __name__ == '__main__': + flags.mark_flags_as_required(['split', 'predictions_path', 'saved_model_dir']) + app.run(main) diff --git a/official/nlp/projects/triviaqa/prediction.py b/official/nlp/projects/triviaqa/prediction.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ebd729fa7698bf71af1b4b2efa3a70f79a42ad --- /dev/null +++ b/official/nlp/projects/triviaqa/prediction.py @@ -0,0 +1,69 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for inference.""" +import tensorflow as tf + + +def split_and_pad(strategy, batch_size, x): + """Split and pad for interence.""" + per_replica_size = batch_size // strategy.num_replicas_in_sync + + def slice_fn(x, i): + begin = min(x.shape[0], i * per_replica_size) + end = min(x.shape[0], (i + 1) * per_replica_size) + indices = tf.range(begin, end, dtype=tf.int32) + return tf.gather(x, tf.pad(indices, [[0, per_replica_size - end + begin]])) + + # pylint: disable=g-long-lambda + return tf.nest.map_structure( + lambda x: strategy.experimental_distribute_values_from_function( + lambda ctx: slice_fn(x, ctx.replica_id_in_sync_group)), x) + # pylint: enable=g-long-lambda + + +def decode_logits(top_k, max_size, logits, default): + """Get the span from logits.""" + logits = tf.transpose(logits, [0, 2, 1]) + values, indices = tf.math.top_k(logits, top_k) + width = ( + tf.expand_dims(indices[:, 1, :], -2) - + tf.expand_dims(indices[:, 0, :], -1)) + mask = tf.logical_and(width >= 0, width <= max_size) + scores = ( + tf.expand_dims(values[:, 0, :], -1) + tf.expand_dims(values[:, 1, :], -2)) + scores = tf.where(mask, scores, -1e8) + flat_indices = tf.argmax(tf.reshape(scores, (-1, top_k * top_k)), -1) + begin = tf.gather( + indices[:, 0, :], tf.math.floordiv(flat_indices, top_k), batch_dims=1) + end = tf.gather( + indices[:, 1, :], tf.math.mod(flat_indices, top_k), batch_dims=1) + reduced_mask = tf.math.reduce_any(mask, [-1, -2]) + return (tf.where(reduced_mask, begin, + default), tf.where(reduced_mask, end, default), + tf.math.reduce_max(scores, [-1, -2])) + + +@tf.function +def decode_answer(context, begin, end, token_offsets, end_limit): + i = tf.gather(token_offsets, begin, batch_dims=1) + j = tf.gather(token_offsets, tf.minimum(end + 1, end_limit), batch_dims=1) + j = tf.where(end == end_limit, tf.cast(tf.strings.length(context), tf.int64), + j) + return tf.strings.substr(context, i, j - i) + + +def distributed_logits_fn(model, x): + return model.distribute_strategy.run( + lambda x: model(x, training=False), args=(x,)) diff --git a/official/nlp/projects/triviaqa/preprocess.py b/official/nlp/projects/triviaqa/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..45406a68f7724b3ec17b4356e890cef67c843574 --- /dev/null +++ b/official/nlp/projects/triviaqa/preprocess.py @@ -0,0 +1,515 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for preprocessing TriviaQA data.""" +import bisect +import json +import operator +import os +import re +import string +from typing import Any, Dict, Generator, List, Optional, Set, Text, Tuple + +from absl import logging +import apache_beam as beam +from apache_beam import metrics +import dataclasses +import nltk +import numpy as np +import tensorflow.io.gfile as gfile + +import sentencepiece as spm +from official.nlp.projects.triviaqa import evaluation +from official.nlp.projects.triviaqa import sentencepiece_pb2 + + +@dataclasses.dataclass +class Question(object): + id: Text + value: Text + + +@dataclasses.dataclass +class EvidenceInfo(object): + id: Text + source: Text + title: Text + + +@dataclasses.dataclass +class Evidence(object): + info: EvidenceInfo + text: Text + + +@dataclasses.dataclass +class Answer(object): + value: Text + aliases: List[Text] + normalized_aliases: List[Text] + + +@dataclasses.dataclass +class QuestionAnswer(object): + question: Question + evidence_info: List[EvidenceInfo] + answer: Optional[Answer] = None + + +@dataclasses.dataclass +class QuestionAnswerEvidence(object): + question: Question + evidence: Evidence + answer: Optional[Answer] = None + + +@dataclasses.dataclass +class Features(object): + id: Text + stride_index: int + question_id: Text + question: Text + context: bytes + token_ids: List[int] + token_offsets: List[int] + global_token_ids: List[int] + segment_ids: List[int] + + +@dataclasses.dataclass +class Paragraph(object): + sentences: List[sentencepiece_pb2.SentencePieceText] + size: int + + +@dataclasses.dataclass +class AnswerSpan(object): + begin: int # inclusive + end: int # inclusive + text: Text + + +def make_paragraph( + sentence_tokenizer: nltk.tokenize.api.TokenizerI, + processor: spm.SentencePieceProcessor, + text: Text, + paragraph_metric: Optional[metrics.Metrics.DelegatingDistribution] = None, + sentence_metric: Optional[metrics.Metrics.DelegatingDistribution] = None +) -> Paragraph: + """Tokenizes paragraphs.""" + paragraph_size = 0 + sentences = [] + for sentence in sentence_tokenizer.tokenize(text): + sentencepiece_text = sentencepiece_pb2.SentencePieceText.FromString( + processor.EncodeAsSerializedProto(sentence)) + paragraph_size += len(sentencepiece_text.pieces) + sentences.append(sentencepiece_text) + if sentence_metric: + sentence_metric.update(len(sentencepiece_text.pieces)) + if paragraph_metric: + paragraph_metric.update(paragraph_size) + return Paragraph(sentences=sentences, size=paragraph_size) + + +def read_question_answers(json_path: Text) -> List[QuestionAnswer]: + """Read question answers.""" + with gfile.GFile(json_path) as f: + data = json.load(f)['Data'] + question_answers = [] + for datum in data: + question = Question(id=datum['QuestionId'], value=datum['Question']) + if 'Answer' in datum: + answer = Answer( + value=datum['Answer']['Value'], + aliases=datum['Answer']['Aliases'], + normalized_aliases=datum['Answer']['NormalizedAliases']) + else: + answer = None + evidence_info = [] + for key in ['EntityPages', 'SearchResults']: + for document in datum.get(key, []): + evidence_info.append( + EvidenceInfo( + id=document['Filename'], title=document['Title'], source=key)) + question_answers.append( + QuestionAnswer( + question=question, evidence_info=evidence_info, answer=answer)) + return question_answers + + +def alias_answer(answer: Text, include=None): + alias = answer.replace('_', ' ').lower() + exclude = set(string.punctuation + ''.join(['‘', '’', '´', '`'])) + include = include or [] + alias = ''.join(c if c not in exclude or c in include else ' ' for c in alias) + return ' '.join(alias.split()).strip() + + +def make_answer_set(answer: Answer) -> Set[Text]: + """Apply less aggressive normalization to the answer aliases.""" + answers = [] + for alias in [answer.value] + answer.aliases: + answers.append(alias_answer(alias)) + answers.append(alias_answer(alias, [',', '.'])) + answers.append(alias_answer(alias, ['-'])) + answers.append(alias_answer(alias, [',', '.', '-'])) + answers.append(alias_answer(alias, string.punctuation)) + return set(answers + answer.normalized_aliases) + + +def find_answer_spans(text: bytes, answer_set: Set[Text]) -> List[AnswerSpan]: + """Find answer spans.""" + spans = [] + for answer in answer_set: + answer_regex = re.compile( + re.escape(answer).encode('utf-8').replace(b'\\ ', b'[ -]'), + flags=re.IGNORECASE) + for match in re.finditer(answer_regex, text): + spans.append( + AnswerSpan( + begin=match.start(), + end=match.end(), + text=match.group(0).decode('utf-8'))) + return sorted(spans, key=operator.attrgetter('begin')) + + +def realign_answer_span(features: Features, answer_set: Optional[Set[Text]], + processor: spm.SentencePieceProcessor, + span: AnswerSpan) -> Optional[AnswerSpan]: + """Align answer span to text with given tokens.""" + i = bisect.bisect_left(features.token_offsets, span.begin) + if i == len(features.token_offsets) or span.begin < features.token_offsets[i]: + i -= 1 + j = i + 1 + answer_end = span.begin + len(span.text.encode('utf-8')) + while (j < len(features.token_offsets) and + features.token_offsets[j] < answer_end): + j += 1 + j -= 1 + sp_answer = ( + features.context[features.token_offsets[i]:features.token_offsets[j + 1]] + if j + 1 < len(features.token_offsets) else + features.context[features.token_offsets[i]:]) + if (processor.IdToPiece(features.token_ids[i]).startswith('▁') and + features.token_offsets[i] > 0): + sp_answer = sp_answer[1:] + sp_answer = evaluation.normalize_answer(sp_answer.decode('utf-8')) + if answer_set is not None and sp_answer not in answer_set: + # No need to warn if the cause was breaking word boundaries. + if len(sp_answer) and not len(sp_answer) > len( + evaluation.normalize_answer(span.text)): + logging.warning('%s: "%s" not in %s.', features.question_id, sp_answer, + answer_set) + return None + return AnswerSpan(begin=i, end=j, text=span.text) + + +def read_sentencepiece_model(path): + with gfile.GFile(path, 'rb') as file: + processor = spm.SentencePieceProcessor() + processor.LoadFromSerializedProto(file.read()) + return processor + + +class ReadEvidence(beam.DoFn): + """Function to read evidence.""" + + def __init__(self, wikipedia_dir: Text, web_dir: Text): + self._wikipedia_dir = wikipedia_dir + self._web_dir = web_dir + + def process( + self, question_answer: QuestionAnswer + ) -> Generator[QuestionAnswerEvidence, None, None]: + for info in question_answer.evidence_info: + if info.source == 'EntityPages': + evidence_path = os.path.join(self._wikipedia_dir, info.id) + elif info.source == 'SearchResult': + evidence_path = os.path.join(self._web_dir, info.id) + else: + raise ValueError(f'Unknown evidence source: {info.source}.') + with gfile.GFile(evidence_path, 'rb') as f: + text = f.read().decode('utf-8') + metrics.Metrics.counter('_', 'documents').inc() + yield QuestionAnswerEvidence( + question=question_answer.question, + evidence=Evidence(info=info, text=text), + answer=question_answer.answer) + + +_CLS_PIECE = '' +_EOS_PIECE = '' +_SEP_PIECE = '' +# _PARAGRAPH_SEP_PIECE = '' +_NULL_PIECE = '' +_QUESTION_PIECE = '' + + +class MakeFeatures(beam.DoFn): + """Function to make features.""" + + def __init__(self, sentencepiece_model_path: Text, max_num_tokens: int, + max_num_global_tokens: int, stride: int): + self._sentencepiece_model_path = sentencepiece_model_path + self._max_num_tokens = max_num_tokens + self._max_num_global_tokens = max_num_global_tokens + self._stride = stride + + def setup(self): + self._sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') + self._sentencepiece_processor = read_sentencepiece_model( + self._sentencepiece_model_path) + + def _make_features(self, stride_index: int, paragraph_texts: List[Text], + paragraphs: List[Paragraph], + question_answer_evidence: QuestionAnswerEvidence, + ids: List[int], + paragraph_offset: int) -> Tuple[int, Features]: + global_ids = ( + [self._sentencepiece_processor.PieceToId(_CLS_PIECE)] + + [self._sentencepiece_processor.PieceToId(_QUESTION_PIECE)] * len(ids)) + segment_ids = [i + 1 for i in range(len(ids))] # offset for CLS token + token_ids, sentences = [], [] + offsets, offset, full_text = [-1] * len(ids), 0, True + for i in range(paragraph_offset, len(paragraph_texts)): + if i < len(paragraphs): + paragraph = paragraphs[i] + else: + paragraphs.append( + make_paragraph( + self._sentence_tokenizer, + self._sentencepiece_processor, + paragraph_texts[i], + paragraph_metric=metrics.Metrics.distribution( + '_', 'paragraphs'), + sentence_metric=metrics.Metrics.distribution('_', 'sentences'))) + paragraph = paragraphs[-1] + for sentence in paragraph.sentences: + if (len(ids) + len(token_ids) + len(sentence.pieces) + 1 >= + self._max_num_tokens or + len(global_ids) >= self._max_num_global_tokens): + full_text = False + break + for j, piece in enumerate(sentence.pieces): + token_ids.append(piece.id) + segment_ids.append(len(global_ids)) + offsets.append(offset + piece.begin) + if j == 0 and sentences: + offsets[-1] -= 1 + offset += len(sentence.text.encode('utf-8')) + 1 + global_ids.append(self._sentencepiece_processor.PieceToId(_EOS_PIECE)) + sentences.append(sentence.text) + if not full_text: + break + context = ' '.join(sentences).encode('utf-8') + token_ids.append(self._sentencepiece_processor.PieceToId(_NULL_PIECE)) + offsets.append(len(context)) + segment_ids.append(0) + next_paragraph_index = len(paragraph_texts) + if not full_text and self._stride > 0: + shift = paragraphs[paragraph_offset].size + next_paragraph_index = paragraph_offset + 1 + while (next_paragraph_index < len(paragraphs) and + shift + paragraphs[next_paragraph_index].size <= self._stride): + shift += paragraphs[next_paragraph_index].size + next_paragraph_index += 1 + return next_paragraph_index, Features( + id='{}--{}'.format(question_answer_evidence.question.id, + question_answer_evidence.evidence.info.id), + stride_index=stride_index, + question_id=question_answer_evidence.question.id, + question=question_answer_evidence.question.value, + context=context, + token_ids=ids + token_ids, + global_token_ids=global_ids, + segment_ids=segment_ids, + token_offsets=offsets) + + def process( + self, question_answer_evidence: QuestionAnswerEvidence + ) -> Generator[Features, None, None]: + # Tokenize question which is shared among all examples. + ids = ( + self._sentencepiece_processor.EncodeAsIds( + question_answer_evidence.question.value) + + [self._sentencepiece_processor.PieceToId(_SEP_PIECE)]) + paragraph_texts = list( + filter( + lambda p: p, + map(lambda p: p.strip(), + question_answer_evidence.evidence.text.split('\n')))) + stride_index, paragraphs, paragraph_index = 0, [], 0 + while paragraph_index < len(paragraph_texts): + paragraph_index, features = self._make_features(stride_index, + paragraph_texts, + paragraphs, + question_answer_evidence, + ids, paragraph_index) + stride_index += 1 + yield features + + +def _handle_exceptional_examples( + features: Features, + processor: spm.SentencePieceProcessor) -> List[AnswerSpan]: + """Special cases in data.""" + if features.id == 'qw_6687--Viola.txt': + pattern = 'three strings in common—G, D, and A'.encode('utf-8') + i = features.context.find(pattern) + if i != -1: + span = AnswerSpan(i + len(pattern) - 1, i + len(pattern), 'A') + span = realign_answer_span(features, None, processor, span) + assert span is not None, 'Span should exist.' + return [span] + if features.id == 'sfq_26183--Vitamin_A.txt': + pattern = ('Vitamin A is a group of unsaturated nutritional organic ' + 'compounds that includes retinol').encode('utf-8') + i = features.context.find(pattern) + if i != -1: + span = AnswerSpan(i + pattern.find(b'A'), i + pattern.find(b'A') + 1, 'A') + span = realign_answer_span(features, None, processor, span) + assert span is not None, 'Span should exist.' + spans = [span] + span = AnswerSpan(i, i + pattern.find(b'A') + 1, 'Vitamin A') + span = realign_answer_span(features, None, processor, span) + return spans + [span] + if features.id == 'odql_292--Colombia.txt': + pattern = b'Colombia is the third-most populous country in Latin America' + i = features.context.find(pattern) + if i != -1: + span = AnswerSpan(i, i + len(b'Colombia'), 'Colombia') + span = realign_answer_span(features, None, processor, span) + assert span is not None, 'Span should exist.' + return [span] + if features.id == 'tc_1648--Vietnam.txt': + pattern = 'Bảo Đại'.encode('utf-8') + i = features.context.find(pattern) + if i != -1: + span = AnswerSpan(i, i + len(pattern), 'Bảo Đại') + span = realign_answer_span(features, None, processor, span) + assert span is not None, 'Span should exist.' + return [span] + if features.id == 'sfq_22225--Irish_mythology.txt': + pattern = 'Tír na nÓg'.encode('utf-8') + spans = [] + i = 0 + while features.context.find(pattern, i) != -1: + i = features.context.find(pattern) + span = AnswerSpan(i, i + len(pattern), 'Tír na nÓg') + span = realign_answer_span(features, None, processor, span) + assert span is not None, 'Span should exist.' + spans.append(span) + i += len(pattern) + return spans + return [] + + +class FindAnswerSpans(beam.DoFn): + """Find answer spans in document.""" + + def __init__(self, sentencepiece_model_path: Text): + self._sentencepiece_model_path = sentencepiece_model_path + + def setup(self): + self._sentencepiece_processor = read_sentencepiece_model( + self._sentencepiece_model_path) + + def process( + self, + element: Tuple[Text, List[Features]], + answer_sets: Dict[Text, Set[Text]], + ) -> Generator[Tuple[Features, List[AnswerSpan]], None, None]: + question_id, features = element + answer_set = answer_sets[question_id] + has_answer = False + for feature in features: + answer_spans = [] + for answer_span in find_answer_spans(feature.context, answer_set): + realigned_answer_span = realign_answer_span( + feature, answer_set, self._sentencepiece_processor, answer_span) + if realigned_answer_span: + answer_spans.append(realigned_answer_span) + if not answer_spans: + answer_spans = _handle_exceptional_examples( + feature, self._sentencepiece_processor) + if answer_spans: + has_answer = True + else: + metrics.Metrics.counter('_', 'answerless_examples').inc() + yield feature, answer_spans + if not has_answer: + metrics.Metrics.counter('_', 'answerless_questions').inc() + logging.error('Question %s has no answer.', question_id) + + +def make_example( + features: Features, + labels: Optional[List[AnswerSpan]] = None) -> Tuple[Text, Dict[Text, Any]]: + """Make an example.""" + feature = { + 'id': features.id, + 'qid': features.question_id, + 'question': features.question, + 'context': features.context, + 'token_ids': features.token_ids, + 'token_offsets': features.token_offsets, + 'segment_ids': features.segment_ids, + 'global_token_ids': features.global_token_ids, + } + if labels: + answers = set((label.begin, label.end) for label in labels) + feature['answers'] = np.array([list(answer) for answer in answers], + np.int64) + else: + feature['answers'] = np.zeros([0, 2], np.int64) + metrics.Metrics.counter('_', 'examples').inc() + return f'{features.id}--{features.stride_index}', feature + + +def make_pipeline(root: beam.Pipeline, question_answers: List[QuestionAnswer], + answer: bool, max_num_tokens: int, max_num_global_tokens: int, + stride: int, sentencepiece_model_path: Text, + wikipedia_dir: Text, web_dir: Text): + """Makes a Beam pipeline.""" + question_answers = ( + root | 'CreateQuestionAnswers' >> beam.Create(question_answers)) + features = ( + question_answers + | 'ReadEvidence' >> beam.ParDo( + ReadEvidence(wikipedia_dir=wikipedia_dir, web_dir=web_dir)) + | 'MakeFeatures' >> beam.ParDo( + MakeFeatures( + sentencepiece_model_path=sentencepiece_model_path, + max_num_tokens=max_num_tokens, + max_num_global_tokens=max_num_global_tokens, + stride=stride))) + if answer: + features = features | 'KeyFeature' >> beam.Map( + lambda feature: (feature.question_id, feature)) + # pylint: disable=g-long-lambda + answer_sets = ( + question_answers + | 'MakeAnswerSet' >> + beam.Map(lambda qa: (qa.question.id, make_answer_set(qa.answer)))) + # pylint: enable=g-long-lambda + examples = ( + features + | beam.GroupByKey() + | 'FindAnswerSpans' >> beam.ParDo( + FindAnswerSpans(sentencepiece_model_path), + answer_sets=beam.pvalue.AsDict(answer_sets)) + | 'MakeExamplesWithLabels' >> beam.MapTuple(make_example)) + else: + examples = features | 'MakeExamples' >> beam.Map(make_example) + return examples diff --git a/official/nlp/projects/triviaqa/sentencepiece_pb2.py b/official/nlp/projects/triviaqa/sentencepiece_pb2.py new file mode 100755 index 0000000000000000000000000000000000000000..518e907792e1dd36d222182f39f3bd49b81afb4f --- /dev/null +++ b/official/nlp/projects/triviaqa/sentencepiece_pb2.py @@ -0,0 +1,312 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +# pylint: disable=bad-continuation +# pylint: disable=protected-access +# Generated by the protocol buffer compiler. DO NOT EDIT! +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + +DESCRIPTOR = _descriptor.FileDescriptor( + name='third_party/sentencepiece/src/sentencepiece.proto', + package='sentencepiece', + syntax='proto2', + serialized_options=None, + create_key=_descriptor._internal_create_key, + serialized_pb=b'\n1third_party/sentencepiece/src/sentencepiece.proto\x12\rsentencepiece\"\xdf\x01\n\x11SentencePieceText\x12\x0c\n\x04text\x18\x01 \x01(\t\x12>\n\x06pieces\x18\x02 \x03(\x0b\x32..sentencepiece.SentencePieceText.SentencePiece\x12\r\n\x05score\x18\x03 \x01(\x02\x1a\x62\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\r\x12\x0f\n\x07surface\x18\x03 \x01(\t\x12\r\n\x05\x62\x65gin\x18\x04 \x01(\r\x12\x0b\n\x03\x65nd\x18\x05 \x01(\r*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"J\n\x16NBestSentencePieceText\x12\x30\n\x06nbests\x18\x01 \x03(\x0b\x32 .sentencepiece.SentencePieceText' +) + +_SENTENCEPIECETEXT_SENTENCEPIECE = _descriptor.Descriptor( + name='SentencePiece', + full_name='sentencepiece.SentencePieceText.SentencePiece', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='piece', + full_name='sentencepiece.SentencePieceText.SentencePiece.piece', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=b''.decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='id', + full_name='sentencepiece.SentencePieceText.SentencePiece.id', + index=1, + number=2, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='surface', + full_name='sentencepiece.SentencePieceText.SentencePiece.surface', + index=2, + number=3, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=b''.decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='begin', + full_name='sentencepiece.SentencePieceText.SentencePiece.begin', + index=3, + number=4, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='end', + full_name='sentencepiece.SentencePieceText.SentencePiece.end', + index=4, + number=5, + type=13, + cpp_type=3, + label=1, + has_default_value=False, + default_value=0, + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=True, + syntax='proto2', + extension_ranges=[ + (200, 536870912), + ], + oneofs=[], + serialized_start=183, + serialized_end=281, +) + +_SENTENCEPIECETEXT = _descriptor.Descriptor( + name='SentencePieceText', + full_name='sentencepiece.SentencePieceText', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='text', + full_name='sentencepiece.SentencePieceText.text', + index=0, + number=1, + type=9, + cpp_type=9, + label=1, + has_default_value=False, + default_value=b''.decode('utf-8'), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='pieces', + full_name='sentencepiece.SentencePieceText.pieces', + index=1, + number=2, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='score', + full_name='sentencepiece.SentencePieceText.score', + index=2, + number=3, + type=2, + cpp_type=6, + label=1, + has_default_value=False, + default_value=float(0), + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + ], + extensions=[], + nested_types=[ + _SENTENCEPIECETEXT_SENTENCEPIECE, + ], + enum_types=[], + serialized_options=None, + is_extendable=True, + syntax='proto2', + extension_ranges=[ + (200, 536870912), + ], + oneofs=[], + serialized_start=69, + serialized_end=292, +) + +_NBESTSENTENCEPIECETEXT = _descriptor.Descriptor( + name='NBestSentencePieceText', + full_name='sentencepiece.NBestSentencePieceText', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='nbests', + full_name='sentencepiece.NBestSentencePieceText.nbests', + index=0, + number=1, + type=11, + cpp_type=10, + label=3, + has_default_value=False, + default_value=[], + message_type=None, + enum_type=None, + containing_type=None, + is_extension=False, + extension_scope=None, + serialized_options=None, + file=DESCRIPTOR, + create_key=_descriptor._internal_create_key), + ], + extensions=[], + nested_types=[], + enum_types=[], + serialized_options=None, + is_extendable=False, + syntax='proto2', + extension_ranges=[], + oneofs=[], + serialized_start=294, + serialized_end=368, +) + +_SENTENCEPIECETEXT_SENTENCEPIECE.containing_type = _SENTENCEPIECETEXT +_SENTENCEPIECETEXT.fields_by_name[ + 'pieces'].message_type = _SENTENCEPIECETEXT_SENTENCEPIECE +_NBESTSENTENCEPIECETEXT.fields_by_name[ + 'nbests'].message_type = _SENTENCEPIECETEXT +DESCRIPTOR.message_types_by_name['SentencePieceText'] = _SENTENCEPIECETEXT +DESCRIPTOR.message_types_by_name[ + 'NBestSentencePieceText'] = _NBESTSENTENCEPIECETEXT +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +SentencePieceText = _reflection.GeneratedProtocolMessageType( + 'SentencePieceText', + (_message.Message,), + { + 'SentencePiece': + _reflection.GeneratedProtocolMessageType( + 'SentencePiece', + (_message.Message,), + { + 'DESCRIPTOR': + _SENTENCEPIECETEXT_SENTENCEPIECE, + '__module__': + 'official.nlp.projects.triviaqa.sentencepiece_pb2' + # @@protoc_insertion_point(class_scope:sentencepiece.SentencePieceText.SentencePiece) + }), + 'DESCRIPTOR': + _SENTENCEPIECETEXT, + '__module__': + 'official.nlp.projects.triviaqa.sentencepiece_pb2' + # @@protoc_insertion_point(class_scope:sentencepiece.SentencePieceText) + }) +_sym_db.RegisterMessage(SentencePieceText) +_sym_db.RegisterMessage(SentencePieceText.SentencePiece) + +NBestSentencePieceText = _reflection.GeneratedProtocolMessageType( + 'NBestSentencePieceText', + (_message.Message,), + { + 'DESCRIPTOR': _NBESTSENTENCEPIECETEXT, + '__module__': 'official.nlp.projects.triviaqa.sentencepiece_pb2' + # @@protoc_insertion_point(class_scope:sentencepiece.NBestSentencePieceText) + }) +_sym_db.RegisterMessage(NBestSentencePieceText) + +# @@protoc_insertion_point(module_scope) diff --git a/official/nlp/projects/triviaqa/train.py b/official/nlp/projects/triviaqa/train.py new file mode 100644 index 0000000000000000000000000000000000000000..c4e4c101f9f0034600c955fa0fb218a6253299c2 --- /dev/null +++ b/official/nlp/projects/triviaqa/train.py @@ -0,0 +1,384 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TriviaQA training script.""" +import collections +import contextlib +import functools +import json +import operator +import os + +from absl import app +from absl import flags +from absl import logging +import gin +import tensorflow as tf +import tensorflow_datasets as tfds + +import sentencepiece as spm +from official.nlp import optimization as nlp_optimization +from official.nlp.configs import encoders +from official.nlp.projects.triviaqa import evaluation +from official.nlp.projects.triviaqa import inputs +from official.nlp.projects.triviaqa import modeling +from official.nlp.projects.triviaqa import prediction + +flags.DEFINE_string('data_dir', None, 'Data directory for TensorFlow Datasets.') + +flags.DEFINE_string( + 'validation_gold_path', None, + 'Path to golden validation. Usually, the wikipedia-dev.json file.') + +flags.DEFINE_string('model_dir', None, + 'Directory for checkpoints and summaries.') + +flags.DEFINE_string('model_config_path', None, + 'JSON file containing model coniguration.') + +flags.DEFINE_string('sentencepiece_model_path', None, + 'Path to sentence piece model.') + +flags.DEFINE_enum('encoder', 'bigbird', + ['bert', 'bigbird', 'albert', 'mobilebert'], + 'Which transformer encoder model to use.') + +flags.DEFINE_integer('bigbird_block_size', 64, + 'Size of blocks for sparse block attention.') + +flags.DEFINE_string('init_checkpoint_path', None, + 'Path from which to initialize weights.') + +flags.DEFINE_integer('train_sequence_length', 4096, + 'Maximum number of tokens for training.') + +flags.DEFINE_integer('train_global_sequence_length', 320, + 'Maximum number of global tokens for training.') + +flags.DEFINE_integer('validation_sequence_length', 4096, + 'Maximum number of tokens for validation.') + +flags.DEFINE_integer('validation_global_sequence_length', 320, + 'Maximum number of global tokens for validation.') + +flags.DEFINE_integer('batch_size', 32, 'Size of batch.') + +flags.DEFINE_string('master', '', 'Address of the TPU master.') + +flags.DEFINE_integer('decode_top_k', 8, + 'Maximum number of tokens to consider for begin/end.') + +flags.DEFINE_integer('decode_max_size', 16, + 'Maximum number of sentence pieces in an answer.') + +flags.DEFINE_float('dropout_rate', 0.1, 'Dropout rate for hidden layers.') + +flags.DEFINE_float('attention_dropout_rate', 0.3, + 'Dropout rate for attention layers.') + +flags.DEFINE_float('label_smoothing', 1e-1, 'Degree of label smoothing.') + +flags.DEFINE_multi_string( + 'gin_bindings', [], + 'Gin bindings to override the values set in the config files') + +FLAGS = flags.FLAGS + + +@contextlib.contextmanager +def worker_context(): + if FLAGS.master: + with tf.device('/job:worker') as d: + yield d + else: + yield + + +def read_sentencepiece_model(path): + with tf.io.gfile.GFile(path, 'rb') as file: + processor = spm.SentencePieceProcessor() + processor.LoadFromSerializedProto(file.read()) + return processor + + +# Rename old BERT v1 configuration parameters. +_MODEL_CONFIG_REPLACEMENTS = { + 'num_hidden_layers': 'num_layers', + 'attention_probs_dropout_prob': 'attention_dropout_rate', + 'hidden_dropout_prob': 'dropout_rate', + 'hidden_act': 'hidden_activation', + 'window_size': 'block_size', +} + + +def read_model_config(encoder, + path, + bigbird_block_size=None) -> encoders.EncoderConfig: + """Merges the JSON configuration into the encoder configuration.""" + with tf.io.gfile.GFile(path) as f: + model_config = json.load(f) + for key, value in _MODEL_CONFIG_REPLACEMENTS.items(): + if key in model_config: + model_config[value] = model_config.pop(key) + model_config['attention_dropout_rate'] = FLAGS.attention_dropout_rate + model_config['dropout_rate'] = FLAGS.dropout_rate + model_config['block_size'] = bigbird_block_size + encoder_config = encoders.EncoderConfig(type=encoder) + # Override the default config with those loaded from the JSON file. + encoder_config_keys = encoder_config.get().as_dict().keys() + overrides = {} + for key, value in model_config.items(): + if key in encoder_config_keys: + overrides[key] = value + else: + logging.warning('Ignoring config parameter %s=%s', key, value) + encoder_config.get().override(overrides) + return encoder_config + + +@gin.configurable(denylist=[ + 'model', + 'strategy', + 'train_dataset', + 'model_dir', + 'init_checkpoint_path', + 'evaluate_fn', +]) +def fit(model, + strategy, + train_dataset, + model_dir, + init_checkpoint_path=None, + evaluate_fn=None, + learning_rate=1e-5, + learning_rate_polynomial_decay_rate=1., + weight_decay_rate=1e-1, + num_warmup_steps=5000, + num_decay_steps=51000, + num_epochs=6): + """Train and evaluate.""" + hparams = dict( + learning_rate=learning_rate, + num_decay_steps=num_decay_steps, + num_warmup_steps=num_warmup_steps, + num_epochs=num_epochs, + weight_decay_rate=weight_decay_rate, + dropout_rate=FLAGS.dropout_rate, + attention_dropout_rate=FLAGS.attention_dropout_rate, + label_smoothing=FLAGS.label_smoothing) + logging.info(hparams) + learning_rate_schedule = nlp_optimization.WarmUp( + learning_rate, + tf.keras.optimizers.schedules.PolynomialDecay( + learning_rate, + num_decay_steps, + end_learning_rate=0., + power=learning_rate_polynomial_decay_rate), num_warmup_steps) + with strategy.scope(): + optimizer = nlp_optimization.AdamWeightDecay( + learning_rate_schedule, + weight_decay_rate=weight_decay_rate, + epsilon=1e-6, + exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias']) + model.compile(optimizer, loss=modeling.SpanOrCrossEntropyLoss()) + + def init_fn(init_checkpoint_path): + ckpt = tf.train.Checkpoint(encoder=model.encoder) + ckpt.restore(init_checkpoint_path).assert_existing_objects_matched() + + with worker_context(): + ckpt_manager = tf.train.CheckpointManager( + tf.train.Checkpoint(model=model, optimizer=optimizer), + model_dir, + max_to_keep=None, + init_fn=(functools.partial(init_fn, init_checkpoint_path) + if init_checkpoint_path else None)) + with strategy.scope(): + ckpt_manager.restore_or_initialize() + val_summary_writer = tf.summary.create_file_writer( + os.path.join(model_dir, 'val')) + best_exact_match = 0. + for epoch in range(len(ckpt_manager.checkpoints), num_epochs): + model.fit( + train_dataset, + callbacks=[ + tf.keras.callbacks.TensorBoard(model_dir, write_graph=False), + ]) + ckpt_path = ckpt_manager.save() + if evaluate_fn is None: + continue + metrics = evaluate_fn() + logging.info('Epoch %d: %s', epoch + 1, metrics) + if best_exact_match < metrics['exact_match']: + best_exact_match = metrics['exact_match'] + model.save(os.path.join(model_dir, 'export'), include_optimizer=False) + logging.info('Exporting %s as SavedModel.', ckpt_path) + with val_summary_writer.as_default(): + for name, data in metrics.items(): + tf.summary.scalar(name, data, epoch + 1) + + +def evaluate(sp_processor, features_map_fn, labels_map_fn, logits_fn, + decode_logits_fn, split_and_pad_fn, distribute_strategy, + validation_dataset, ground_truth): + """Run evaluation.""" + loss_metric = tf.keras.metrics.Mean() + + @tf.function + def update_loss(y, logits): + loss_fn = modeling.SpanOrCrossEntropyLoss( + reduction=tf.keras.losses.Reduction.NONE) + return loss_metric(loss_fn(y, logits)) + + predictions = collections.defaultdict(list) + for _, (features, labels) in validation_dataset.enumerate(): + token_ids = features['token_ids'] + y = labels_map_fn(token_ids, labels) + x = split_and_pad_fn(features_map_fn(features)) + logits = tf.concat( + distribute_strategy.experimental_local_results(logits_fn(x)), 0) + logits = logits[:features['token_ids'].shape[0]] + update_loss(y, logits) + end_limit = token_ids.row_lengths() - 1 # inclusive + begin, end, scores = decode_logits_fn(logits, end_limit) + answers = prediction.decode_answer(features['context'], begin, end, + features['token_offsets'], + end_limit).numpy() + for _, (qid, token_id, offset, score, answer) in enumerate( + zip(features['qid'].numpy(), + tf.gather(features['token_ids'], begin, batch_dims=1).numpy(), + tf.gather(features['token_offsets'], begin, batch_dims=1).numpy(), + scores, answers)): + if not answer: + continue + if sp_processor.IdToPiece(int(token_id)).startswith('▁') and offset > 0: + answer = answer[1:] + predictions[qid.decode('utf-8')].append((score, answer.decode('utf-8'))) + predictions = { + qid: evaluation.normalize_answer( + sorted(answers, key=operator.itemgetter(0), reverse=True)[0][1]) + for qid, answers in predictions.items() + } + metrics = evaluation.evaluate_triviaqa(ground_truth, predictions, mute=True) + metrics['loss'] = loss_metric.result().numpy() + return metrics + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + gin.parse_config(FLAGS.gin_bindings) + model_config = read_model_config( + FLAGS.encoder, + FLAGS.model_config_path, + bigbird_block_size=FLAGS.bigbird_block_size) + logging.info(model_config.get().as_dict()) + # Configure input processing. + sp_processor = read_sentencepiece_model(FLAGS.sentencepiece_model_path) + features_map_fn = functools.partial( + inputs.features_map_fn, + local_radius=FLAGS.bigbird_block_size, + relative_pos_max_distance=24, + use_hard_g2l_mask=True, + padding_id=sp_processor.PieceToId(''), + eos_id=sp_processor.PieceToId(''), + null_id=sp_processor.PieceToId(''), + cls_id=sp_processor.PieceToId(''), + sep_id=sp_processor.PieceToId('')) + train_features_map_fn = tf.function( + functools.partial( + features_map_fn, + sequence_length=FLAGS.train_sequence_length, + global_sequence_length=FLAGS.train_global_sequence_length), + autograph=False) + train_labels_map_fn = tf.function( + functools.partial( + inputs.labels_map_fn, sequence_length=FLAGS.train_sequence_length)) + # Connect to TPU cluster. + if FLAGS.master: + resolver = tf.distribute.cluster_resolver.TPUClusterResolver(FLAGS.master) + tf.config.experimental_connect_to_cluster(resolver) + tf.tpu.experimental.initialize_tpu_system(resolver) + strategy = tf.distribute.TPUStrategy(resolver) + else: + strategy = tf.distribute.MirroredStrategy() + # Initialize datasets. + with worker_context(): + _ = tf.random.get_global_generator() + train_dataset = inputs.read_batches( + FLAGS.data_dir, + tfds.Split.TRAIN, + FLAGS.batch_size, + shuffle=True, + drop_final_batch=True) + validation_dataset = inputs.read_batches(FLAGS.data_dir, + tfds.Split.VALIDATION, + FLAGS.batch_size) + + def train_map_fn(x, y): + features = train_features_map_fn(x) + labels = modeling.smooth_labels(FLAGS.label_smoothing, + train_labels_map_fn(x['token_ids'], y), + features['question_lengths'], + features['token_ids']) + return features, labels + + train_dataset = train_dataset.map(train_map_fn, 16).prefetch(16) + # Initialize model and compile. + with strategy.scope(): + model = modeling.TriviaQaModel(model_config, FLAGS.train_sequence_length) + logits_fn = tf.function( + functools.partial(prediction.distributed_logits_fn, model)) + decode_logits_fn = tf.function( + functools.partial(prediction.decode_logits, FLAGS.decode_top_k, + FLAGS.decode_max_size)) + split_and_pad_fn = tf.function( + functools.partial(prediction.split_and_pad, strategy, FLAGS.batch_size)) + # Evaluation strategy. + with tf.io.gfile.GFile(FLAGS.validation_gold_path) as f: + ground_truth = { + datum['QuestionId']: datum['Answer'] for datum in json.load(f)['Data'] + } + validation_features_map_fn = tf.function( + functools.partial( + features_map_fn, + sequence_length=FLAGS.validation_sequence_length, + global_sequence_length=FLAGS.validation_global_sequence_length), + autograph=False) + validation_labels_map_fn = tf.function( + functools.partial( + inputs.labels_map_fn, + sequence_length=FLAGS.validation_sequence_length)) + evaluate_fn = functools.partial( + evaluate, + sp_processor=sp_processor, + features_map_fn=validation_features_map_fn, + labels_map_fn=validation_labels_map_fn, + logits_fn=logits_fn, + decode_logits_fn=decode_logits_fn, + split_and_pad_fn=split_and_pad_fn, + distribute_strategy=strategy, + validation_dataset=validation_dataset, + ground_truth=ground_truth) + logging.info('Model initialized. Beginning training fit loop.') + fit(model, strategy, train_dataset, FLAGS.model_dir, + FLAGS.init_checkpoint_path, evaluate_fn) + + +if __name__ == '__main__': + flags.mark_flags_as_required([ + 'model_config_path', 'model_dir', 'sentencepiece_model_path', + 'validation_gold_path' + ]) + app.run(main) diff --git a/official/nlp/tasks/__init__.py b/official/nlp/tasks/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/tasks/__init__.py +++ b/official/nlp/tasks/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/tasks/electra_task.py b/official/nlp/tasks/electra_task.py new file mode 100644 index 0000000000000000000000000000000000000000..6853a2cc246acd79f7ee81c7ba0b843ac2c9bfb3 --- /dev/null +++ b/official/nlp/tasks/electra_task.py @@ -0,0 +1,242 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ELECTRA pretraining task (Joint Masked LM and Replaced Token Detection).""" + +import dataclasses +import tensorflow as tf + +from official.core import base_task +from official.core import config_definitions as cfg +from official.core import task_factory +from official.modeling import tf_utils +from official.nlp.configs import bert +from official.nlp.configs import electra +from official.nlp.configs import encoders +from official.nlp.data import pretrain_dataloader +from official.nlp.modeling import layers +from official.nlp.modeling import models + + +@dataclasses.dataclass +class ElectraPretrainConfig(cfg.TaskConfig): + """The model config.""" + model: electra.ElectraPretrainerConfig = electra.ElectraPretrainerConfig( + cls_heads=[ + bert.ClsHeadConfig( + inner_dim=768, + num_classes=2, + dropout_rate=0.1, + name='next_sentence') + ]) + train_data: cfg.DataConfig = cfg.DataConfig() + validation_data: cfg.DataConfig = cfg.DataConfig() + + +def _build_pretrainer( + config: electra.ElectraPretrainerConfig) -> models.ElectraPretrainer: + """Instantiates ElectraPretrainer from the config.""" + generator_encoder_cfg = config.generator_encoder + discriminator_encoder_cfg = config.discriminator_encoder + # Copy discriminator's embeddings to generator for easier model serialization. + discriminator_network = encoders.build_encoder(discriminator_encoder_cfg) + if config.tie_embeddings: + embedding_layer = discriminator_network.get_embedding_layer() + generator_network = encoders.build_encoder( + generator_encoder_cfg, embedding_layer=embedding_layer) + else: + generator_network = encoders.build_encoder(generator_encoder_cfg) + + generator_encoder_cfg = generator_encoder_cfg.get() + return models.ElectraPretrainer( + generator_network=generator_network, + discriminator_network=discriminator_network, + vocab_size=generator_encoder_cfg.vocab_size, + num_classes=config.num_classes, + sequence_length=config.sequence_length, + num_token_predictions=config.num_masked_tokens, + mlm_activation=tf_utils.get_activation( + generator_encoder_cfg.hidden_activation), + mlm_initializer=tf.keras.initializers.TruncatedNormal( + stddev=generator_encoder_cfg.initializer_range), + classification_heads=[ + layers.ClassificationHead(**cfg.as_dict()) for cfg in config.cls_heads + ], + disallow_correct=config.disallow_correct) + + +@task_factory.register_task_cls(ElectraPretrainConfig) +class ElectraPretrainTask(base_task.Task): + """ELECTRA Pretrain Task (Masked LM + Replaced Token Detection).""" + + def build_model(self): + return _build_pretrainer(self.task_config.model) + + def build_losses(self, + labels, + model_outputs, + metrics, + aux_losses=None) -> tf.Tensor: + metrics = dict([(metric.name, metric) for metric in metrics]) + + # generator lm and (optional) nsp loss. + lm_prediction_losses = tf.keras.losses.sparse_categorical_crossentropy( + labels['masked_lm_ids'], + tf.cast(model_outputs['lm_outputs'], tf.float32), + from_logits=True) + lm_label_weights = labels['masked_lm_weights'] + lm_numerator_loss = tf.reduce_sum(lm_prediction_losses * lm_label_weights) + lm_denominator_loss = tf.reduce_sum(lm_label_weights) + mlm_loss = tf.math.divide_no_nan(lm_numerator_loss, lm_denominator_loss) + metrics['lm_example_loss'].update_state(mlm_loss) + if 'next_sentence_labels' in labels: + sentence_labels = labels['next_sentence_labels'] + sentence_outputs = tf.cast( + model_outputs['sentence_outputs'], dtype=tf.float32) + sentence_loss = tf.keras.losses.sparse_categorical_crossentropy( + sentence_labels, sentence_outputs, from_logits=True) + metrics['next_sentence_loss'].update_state(sentence_loss) + total_loss = mlm_loss + sentence_loss + else: + total_loss = mlm_loss + + # discriminator replaced token detection (rtd) loss. + rtd_logits = model_outputs['disc_logits'] + rtd_labels = tf.cast(model_outputs['disc_label'], tf.float32) + input_mask = tf.cast(labels['input_mask'], tf.float32) + rtd_ind_loss = tf.nn.sigmoid_cross_entropy_with_logits( + logits=rtd_logits, labels=rtd_labels) + rtd_numerator = tf.reduce_sum(input_mask * rtd_ind_loss) + rtd_denominator = tf.reduce_sum(input_mask) + rtd_loss = tf.math.divide_no_nan(rtd_numerator, rtd_denominator) + metrics['discriminator_loss'].update_state(rtd_loss) + total_loss = total_loss + \ + self.task_config.model.discriminator_loss_weight * rtd_loss + + if aux_losses: + total_loss += tf.add_n(aux_losses) + + metrics['total_loss'].update_state(total_loss) + return total_loss + + def build_inputs(self, params, input_context=None): + """Returns tf.data.Dataset for pretraining.""" + if params.input_path == 'dummy': + + def dummy_data(_): + dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32) + dummy_lm = tf.zeros((1, params.max_predictions_per_seq), dtype=tf.int32) + return dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids, + masked_lm_positions=dummy_lm, + masked_lm_ids=dummy_lm, + masked_lm_weights=tf.cast(dummy_lm, dtype=tf.float32), + next_sentence_labels=tf.zeros((1, 1), dtype=tf.int32)) + + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + dataset = dataset.map( + dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + return dataset + + return pretrain_dataloader.BertPretrainDataLoader(params).load( + input_context) + + def build_metrics(self, training=None): + del training + metrics = [ + tf.keras.metrics.SparseCategoricalAccuracy(name='masked_lm_accuracy'), + tf.keras.metrics.Mean(name='lm_example_loss'), + tf.keras.metrics.SparseCategoricalAccuracy( + name='discriminator_accuracy'), + ] + if self.task_config.train_data.use_next_sentence_label: + metrics.append( + tf.keras.metrics.SparseCategoricalAccuracy( + name='next_sentence_accuracy')) + metrics.append(tf.keras.metrics.Mean(name='next_sentence_loss')) + + metrics.append(tf.keras.metrics.Mean(name='discriminator_loss')) + metrics.append(tf.keras.metrics.Mean(name='total_loss')) + + return metrics + + def process_metrics(self, metrics, labels, model_outputs): + metrics = dict([(metric.name, metric) for metric in metrics]) + if 'masked_lm_accuracy' in metrics: + metrics['masked_lm_accuracy'].update_state(labels['masked_lm_ids'], + model_outputs['lm_outputs'], + labels['masked_lm_weights']) + if 'next_sentence_accuracy' in metrics: + metrics['next_sentence_accuracy'].update_state( + labels['next_sentence_labels'], model_outputs['sentence_outputs']) + if 'discriminator_accuracy' in metrics: + disc_logits_expanded = tf.expand_dims(model_outputs['disc_logits'], -1) + discrim_full_logits = tf.concat( + [-1.0 * disc_logits_expanded, disc_logits_expanded], -1) + metrics['discriminator_accuracy'].update_state( + model_outputs['disc_label'], discrim_full_logits, + labels['input_mask']) + + def train_step(self, inputs, model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, metrics): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + with tf.GradientTape() as tape: + outputs = model(inputs, training=True) + # Computes per-replica loss. + loss = self.build_losses( + labels=inputs, + model_outputs=outputs, + metrics=metrics, + aux_losses=model.losses) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + optimizer.apply_gradients(list(zip(grads, tvars))) + self.process_metrics(metrics, inputs, outputs) + return {self.loss: loss} + + def validation_step(self, inputs, model: tf.keras.Model, metrics): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. + model: the keras.Model. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + outputs = model(inputs, training=False) + loss = self.build_losses( + labels=inputs, + model_outputs=outputs, + metrics=metrics, + aux_losses=model.losses) + self.process_metrics(metrics, inputs, outputs) + return {self.loss: loss} diff --git a/official/nlp/tasks/electra_task_test.py b/official/nlp/tasks/electra_task_test.py new file mode 100644 index 0000000000000000000000000000000000000000..4f775d26906dc93f78b3fdd66f1cbb230c558104 --- /dev/null +++ b/official/nlp/tasks/electra_task_test.py @@ -0,0 +1,60 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.tasks.electra_task.""" + +import tensorflow as tf + +from official.nlp.configs import bert +from official.nlp.configs import electra +from official.nlp.configs import encoders +from official.nlp.data import pretrain_dataloader +from official.nlp.tasks import electra_task + + +class ElectraPretrainTaskTest(tf.test.TestCase): + + def test_task(self): + config = electra_task.ElectraPretrainConfig( + model=electra.ElectraPretrainerConfig( + generator_encoder=encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, + num_layers=1)), + discriminator_encoder=encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, + num_layers=1)), + num_masked_tokens=20, + sequence_length=128, + cls_heads=[ + bert.ClsHeadConfig( + inner_dim=10, num_classes=2, name="next_sentence") + ]), + train_data=pretrain_dataloader.BertPretrainDataConfig( + input_path="dummy", + max_predictions_per_seq=20, + seq_length=128, + global_batch_size=1)) + task = electra_task.ElectraPretrainTask(config) + model = task.build_model() + metrics = task.build_metrics() + dataset = task.build_inputs(config.train_data) + + iterator = iter(dataset) + optimizer = tf.keras.optimizers.SGD(lr=0.1) + task.train_step(next(iterator), model, optimizer, metrics=metrics) + task.validation_step(next(iterator), model, metrics=metrics) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/tasks/masked_lm.py b/official/nlp/tasks/masked_lm.py index 512d9b67106e2d4351cb7addb93e876d7d43bceb..8e5802ada291c332ed80d874030b5f36f099f835 100644 --- a/official/nlp/tasks/masked_lm.py +++ b/official/nlp/tasks/masked_lm.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,68 +11,90 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Masked language task.""" -from absl import logging + import dataclasses import tensorflow as tf from official.core import base_task -from official.modeling.hyperparams import config_definitions as cfg +from official.core import config_definitions as cfg +from official.core import task_factory +from official.modeling import tf_utils from official.nlp.configs import bert +from official.nlp.configs import encoders from official.nlp.data import data_loader_factory +from official.nlp.modeling import layers +from official.nlp.modeling import models @dataclasses.dataclass class MaskedLMConfig(cfg.TaskConfig): """The model config.""" - init_checkpoint: str = '' - model: bert.BertPretrainerConfig = bert.BertPretrainerConfig(cls_heads=[ + model: bert.PretrainerConfig = bert.PretrainerConfig(cls_heads=[ bert.ClsHeadConfig( inner_dim=768, num_classes=2, dropout_rate=0.1, name='next_sentence') ]) + # TODO(b/154564893): Mathematically, scale_loss should be True. + # However, it works better with scale_loss being False. + scale_loss: bool = False train_data: cfg.DataConfig = cfg.DataConfig() validation_data: cfg.DataConfig = cfg.DataConfig() -@base_task.register_task_cls(MaskedLMConfig) +@task_factory.register_task_cls(MaskedLMConfig) class MaskedLMTask(base_task.Task): - """Mock task object for testing.""" - - def build_model(self): - return bert.instantiate_pretrainer_from_cfg(self.task_config.model) + """Task object for Mask language modeling.""" + + def _build_encoder(self, encoder_cfg): + return encoders.build_encoder(encoder_cfg) + + def build_model(self, params=None): + config = params or self.task_config.model + encoder_cfg = config.encoder + encoder_network = self._build_encoder(encoder_cfg) + cls_heads = [ + layers.ClassificationHead(**cfg.as_dict()) for cfg in config.cls_heads + ] if config.cls_heads else [] + return models.BertPretrainerV2( + mlm_activation=tf_utils.get_activation(config.mlm_activation), + mlm_initializer=tf.keras.initializers.TruncatedNormal( + stddev=config.mlm_initializer_range), + encoder_network=encoder_network, + classification_heads=cls_heads) def build_losses(self, labels, model_outputs, metrics, aux_losses=None) -> tf.Tensor: - metrics = dict([(metric.name, metric) for metric in metrics]) - lm_prediction_losses = tf.keras.losses.sparse_categorical_crossentropy( - labels['masked_lm_ids'], - tf.cast(model_outputs['lm_output'], tf.float32), - from_logits=True) - lm_label_weights = labels['masked_lm_weights'] - lm_numerator_loss = tf.reduce_sum(lm_prediction_losses * lm_label_weights) - lm_denominator_loss = tf.reduce_sum(lm_label_weights) - mlm_loss = tf.math.divide_no_nan(lm_numerator_loss, lm_denominator_loss) - metrics['lm_example_loss'].update_state(mlm_loss) - if 'next_sentence_labels' in labels: - sentence_labels = labels['next_sentence_labels'] - sentence_outputs = tf.cast( - model_outputs['next_sentence'], dtype=tf.float32) - sentence_loss = tf.reduce_mean( - tf.keras.losses.sparse_categorical_crossentropy(sentence_labels, - sentence_outputs, - from_logits=True)) - metrics['next_sentence_loss'].update_state(sentence_loss) - total_loss = mlm_loss + sentence_loss - else: - total_loss = mlm_loss - - if aux_losses: - total_loss += tf.add_n(aux_losses) - return total_loss + with tf.name_scope('MaskedLMTask/losses'): + metrics = dict([(metric.name, metric) for metric in metrics]) + lm_prediction_losses = tf.keras.losses.sparse_categorical_crossentropy( + labels['masked_lm_ids'], + tf.cast(model_outputs['mlm_logits'], tf.float32), + from_logits=True) + lm_label_weights = labels['masked_lm_weights'] + lm_numerator_loss = tf.reduce_sum(lm_prediction_losses * + lm_label_weights) + lm_denominator_loss = tf.reduce_sum(lm_label_weights) + mlm_loss = tf.math.divide_no_nan(lm_numerator_loss, lm_denominator_loss) + metrics['lm_example_loss'].update_state(mlm_loss) + if 'next_sentence_labels' in labels: + sentence_labels = labels['next_sentence_labels'] + sentence_outputs = tf.cast( + model_outputs['next_sentence'], dtype=tf.float32) + sentence_loss = tf.reduce_mean( + tf.keras.losses.sparse_categorical_crossentropy( + sentence_labels, sentence_outputs, from_logits=True)) + metrics['next_sentence_loss'].update_state(sentence_loss) + total_loss = mlm_loss + sentence_loss + else: + total_loss = mlm_loss + + if aux_losses: + total_loss += tf.add_n(aux_losses) + return total_loss def build_inputs(self, params, input_context=None): """Returns tf.data.Dataset for pretraining.""" @@ -114,14 +135,15 @@ class MaskedLMTask(base_task.Task): return metrics def process_metrics(self, metrics, labels, model_outputs): - metrics = dict([(metric.name, metric) for metric in metrics]) - if 'masked_lm_accuracy' in metrics: - metrics['masked_lm_accuracy'].update_state(labels['masked_lm_ids'], - model_outputs['lm_output'], - labels['masked_lm_weights']) - if 'next_sentence_accuracy' in metrics: - metrics['next_sentence_accuracy'].update_state( - labels['next_sentence_labels'], model_outputs['next_sentence']) + with tf.name_scope('MaskedLMTask/process_metrics'): + metrics = dict([(metric.name, metric) for metric in metrics]) + if 'masked_lm_accuracy' in metrics: + metrics['masked_lm_accuracy'].update_state( + labels['masked_lm_ids'], model_outputs['mlm_logits'], + labels['masked_lm_weights']) + if 'next_sentence_accuracy' in metrics: + metrics['next_sentence_accuracy'].update_state( + labels['next_sentence_labels'], model_outputs['next_sentence']) def train_step(self, inputs, model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics): @@ -144,12 +166,15 @@ class MaskedLMTask(base_task.Task): model_outputs=outputs, metrics=metrics, aux_losses=model.losses) - # Scales loss as the default gradients allreduce performs sum inside the - # optimizer. - # TODO(b/154564893): enable loss scaling. - # scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync + if self.task_config.scale_loss: + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync tvars = model.trainable_variables - grads = tape.gradient(loss, tvars) + if self.task_config.scale_loss: + grads = tape.gradient(scaled_loss, tvars) + else: + grads = tape.gradient(loss, tvars) optimizer.apply_gradients(list(zip(grads, tvars))) self.process_metrics(metrics, inputs, outputs) return {self.loss: loss} @@ -173,17 +198,3 @@ class MaskedLMTask(base_task.Task): aux_losses=model.losses) self.process_metrics(metrics, inputs, outputs) return {self.loss: loss} - - def initialize(self, model: tf.keras.Model): - ckpt_dir_or_file = self.task_config.init_checkpoint - if tf.io.gfile.isdir(ckpt_dir_or_file): - ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) - if not ckpt_dir_or_file: - return - # Restoring all modules defined by the model, e.g. encoder, masked_lm and - # cls pooler. The best initialization may vary case by case. - ckpt = tf.train.Checkpoint(**model.checkpoint_items) - status = ckpt.read(ckpt_dir_or_file) - status.expect_partial().assert_existing_objects_matched() - logging.info('Finished loading pretrained checkpoint from %s', - ckpt_dir_or_file) diff --git a/official/nlp/tasks/masked_lm_test.py b/official/nlp/tasks/masked_lm_test.py index 38970a378bb1dc12738e9575a660056913a891fa..14774e9859f3389ddb6839a2c1eeacbe4077505e 100644 --- a/official/nlp/tasks/masked_lm_test.py +++ b/official/nlp/tasks/masked_lm_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for official.nlp.tasks.masked_lm.""" import tensorflow as tf @@ -28,8 +27,11 @@ class MLMTaskTest(tf.test.TestCase): def test_task(self): config = masked_lm.MaskedLMConfig( init_checkpoint=self.get_temp_dir(), - model=bert.BertPretrainerConfig( - encoders.TransformerEncoderConfig(vocab_size=30522, num_layers=1), + scale_loss=True, + model=bert.PretrainerConfig( + encoder=encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, + num_layers=1)), cls_heads=[ bert.ClsHeadConfig( inner_dim=10, num_classes=2, name="next_sentence") @@ -50,8 +52,7 @@ class MLMTaskTest(tf.test.TestCase): task.validation_step(next(iterator), model, metrics=metrics) # Saves a checkpoint. - ckpt = tf.train.Checkpoint( - model=model, **model.checkpoint_items) + ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items) ckpt.save(config.init_checkpoint) task.initialize(model) diff --git a/official/nlp/tasks/question_answering.py b/official/nlp/tasks/question_answering.py index 69bab88808d2d41e88279aa8d0fc18433c985276..aee3fab883434c89114a7633ef5fd934d1122eed 100644 --- a/official/nlp/tasks/question_answering.py +++ b/official/nlp/tasks/question_answering.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,19 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Question answering task.""" -import collections +import functools import json import os +from typing import List, Optional + from absl import logging import dataclasses +import orbit import tensorflow as tf -import tensorflow_hub as hub from official.core import base_task +from official.core import config_definitions as cfg +from official.core import task_factory from official.modeling.hyperparams import base_config -from official.modeling.hyperparams import config_definitions as cfg from official.nlp.bert import squad_evaluate_v1_1 from official.nlp.bert import squad_evaluate_v2_0 from official.nlp.bert import tokenization @@ -39,8 +41,7 @@ from official.nlp.tasks import utils @dataclasses.dataclass class ModelConfig(base_config.Config): """A base span labeler configuration.""" - encoder: encoders.TransformerEncoderConfig = ( - encoders.TransformerEncoderConfig()) + encoder: encoders.EncoderConfig = encoders.EncoderConfig() @dataclasses.dataclass @@ -57,19 +58,26 @@ class QuestionAnsweringConfig(cfg.TaskConfig): validation_data: cfg.DataConfig = cfg.DataConfig() -@base_task.register_task_cls(QuestionAnsweringConfig) +@dataclasses.dataclass +class RawAggregatedResult: + """Raw representation for SQuAD predictions.""" + unique_id: int + start_logits: List[float] + end_logits: List[float] + start_indexes: Optional[List[int]] = None + end_indexes: Optional[List[int]] = None + class_logits: Optional[float] = None + + +@task_factory.register_task_cls(QuestionAnsweringConfig) class QuestionAnsweringTask(base_task.Task): """Task object for question answering.""" - def __init__(self, params=cfg.TaskConfig, logging_dir=None): - super(QuestionAnsweringTask, self).__init__(params, logging_dir) - if params.hub_module_url and params.init_checkpoint: - raise ValueError('At most one of `hub_module_url` and ' - '`init_checkpoint` can be specified.') - if params.hub_module_url: - self._hub_module = hub.load(params.hub_module_url) - else: - self._hub_module = None + def __init__(self, params: cfg.TaskConfig, logging_dir=None, name=None): + super().__init__(params, logging_dir, name=name) + + if params.validation_data is None: + return if params.validation_data.tokenization == 'WordPiece': self.squad_lib = squad_lib_wp @@ -83,17 +91,24 @@ class QuestionAnsweringTask(base_task.Task): self._tf_record_input_path, self._eval_examples, self._eval_features = ( self._preprocess_eval_data(params.validation_data)) + def set_preprocessed_eval_input_path(self, eval_input_path): + """Sets the path to the preprocessed eval data.""" + self._tf_record_input_path = eval_input_path + def build_model(self): - if self._hub_module: - encoder_network = utils.get_encoder_from_hub(self._hub_module) + if self.task_config.hub_module_url and self.task_config.init_checkpoint: + raise ValueError('At most one of `hub_module_url` and ' + '`init_checkpoint` can be specified.') + if self.task_config.hub_module_url: + encoder_network = utils.get_encoder_from_hub( + self.task_config.hub_module_url) else: - encoder_network = encoders.instantiate_encoder_from_cfg( - self.task_config.model.encoder) - # Currently, we only supports bert-style question answering finetuning. + encoder_network = encoders.build_encoder(self.task_config.model.encoder) + encoder_cfg = self.task_config.model.encoder.get() return models.BertSpanLabeler( network=encoder_network, initializer=tf.keras.initializers.TruncatedNormal( - stddev=self.task_config.model.encoder.initializer_range)) + stddev=encoder_cfg.initializer_range)) def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: start_positions = labels['start_positions'] @@ -105,9 +120,7 @@ class QuestionAnsweringTask(base_task.Task): tf.cast(start_logits, dtype=tf.float32), from_logits=True) end_loss = tf.keras.losses.sparse_categorical_crossentropy( - end_positions, - tf.cast(end_logits, dtype=tf.float32), - from_logits=True) + end_positions, tf.cast(end_logits, dtype=tf.float32), from_logits=True) loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2 return loss @@ -133,20 +146,29 @@ class QuestionAnsweringTask(base_task.Task): eval_features.append(feature) eval_writer.process_feature(feature) + # XLNet preprocesses SQuAD examples in a P, Q, class order whereas + # BERT preprocesses in a class, Q, P order. + xlnet_ordering = self.task_config.model.encoder.type == 'xlnet' kwargs = dict( examples=eval_examples, - tokenizer=tokenization.FullTokenizer( - vocab_file=params.vocab_file, - do_lower_case=params.do_lower_case), max_seq_length=params.seq_length, doc_stride=params.doc_stride, max_query_length=params.query_length, is_training=False, output_fn=_append_feature, - batch_size=params.global_batch_size) + batch_size=params.global_batch_size, + xlnet_format=xlnet_ordering) + if params.tokenization == 'SentencePiece': # squad_lib_sp requires one more argument 'do_lower_case'. kwargs['do_lower_case'] = params.do_lower_case + kwargs['tokenizer'] = tokenization.FullSentencePieceTokenizer( + sp_model_file=params.vocab_file) + elif params.tokenization == 'WordPiece': + kwargs['tokenizer'] = tokenization.FullTokenizer( + vocab_file=params.vocab_file, do_lower_case=params.do_lower_case) + else: + raise ValueError('Unexpected tokenization: %s' % params.tokenization) eval_dataset_size = self.squad_lib.convert_examples_to_features(**kwargs) eval_writer.close() @@ -159,23 +181,25 @@ class QuestionAnsweringTask(base_task.Task): return eval_writer.filename, eval_examples, eval_features + def _dummy_data(self, params, _): + """Returns dummy data.""" + dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32) + x = dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids) + y = dict( + start_positions=tf.constant(0, dtype=tf.int32), + end_positions=tf.constant(1, dtype=tf.int32), + is_impossible=tf.constant(0, dtype=tf.int32)) + return x, y + def build_inputs(self, params, input_context=None): """Returns tf.data.Dataset for sentence_prediction task.""" if params.input_path == 'dummy': - # Dummy training data for unit test. - def dummy_data(_): - dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32) - x = dict( - input_word_ids=dummy_ids, - input_mask=dummy_ids, - input_type_ids=dummy_ids) - y = dict( - start_positions=tf.constant(0, dtype=tf.int32), - end_positions=tf.constant(1, dtype=tf.int32)) - return (x, y) - dataset = tf.data.Dataset.range(1) dataset = dataset.repeat() + dummy_data = functools.partial(self._dummy_data, params) dataset = dataset.map( dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) return dataset @@ -186,11 +210,14 @@ class QuestionAnsweringTask(base_task.Task): input_path = self._tf_record_input_path dataloader_params = params.replace(input_path=input_path) - return data_loader_factory.get_data_loader( - dataloader_params).load(input_context) + return data_loader_factory.get_data_loader(dataloader_params).load( + input_context) def build_metrics(self, training=None): - del training + if not training: + # We cannot compute start/end_position_accuracy because start/end_position + # labels are not available in the validation dataset (b/173794928). + return [] # TODO(lehou): a list of metrics doesn't work the same as in compile/fit. metrics = [ tf.keras.metrics.SparseCategoricalAccuracy( @@ -203,56 +230,53 @@ class QuestionAnsweringTask(base_task.Task): def process_metrics(self, metrics, labels, model_outputs): metrics = dict([(metric.name, metric) for metric in metrics]) start_logits, end_logits = model_outputs - metrics['start_position_accuracy'].update_state( - labels['start_positions'], start_logits) - metrics['end_position_accuracy'].update_state( - labels['end_positions'], end_logits) + metrics['start_position_accuracy'].update_state(labels['start_positions'], + start_logits) + metrics['end_position_accuracy'].update_state(labels['end_positions'], + end_logits) def process_compiled_metrics(self, compiled_metrics, labels, model_outputs): start_logits, end_logits = model_outputs compiled_metrics.update_state( y_true=labels, # labels has keys 'start_positions' and 'end_positions'. - y_pred={'start_positions': start_logits, 'end_positions': end_logits}) + y_pred={ + 'start_positions': start_logits, + 'end_positions': end_logits + }) def validation_step(self, inputs, model: tf.keras.Model, metrics=None): features, _ = inputs unique_ids = features.pop('unique_ids') model_outputs = self.inference_step(features, model) start_logits, end_logits = model_outputs + # We cannot compute validation_loss here, because start/end_position + # labels are not available in the validation dataset (b/173794928). logs = { - self.loss: 0.0, # TODO(lehou): compute the real validation loss. 'unique_ids': unique_ids, 'start_logits': start_logits, 'end_logits': end_logits, } return logs - raw_aggregated_result = collections.namedtuple( - 'RawResult', ['unique_id', 'start_logits', 'end_logits']) - def aggregate_logs(self, state=None, step_outputs=None): assert step_outputs is not None, 'Got no logs from self.validation_step.' if state is None: state = [] - for unique_ids, start_logits, end_logits in zip( - step_outputs['unique_ids'], - step_outputs['start_logits'], - step_outputs['end_logits']): - u_ids, s_logits, e_logits = ( - unique_ids.numpy(), start_logits.numpy(), end_logits.numpy()) - if u_ids.size == 1: - u_ids = [u_ids] - s_logits = [s_logits] - e_logits = [e_logits] - for values in zip(u_ids, s_logits, e_logits): - state.append(self.raw_aggregated_result( + for outputs in zip(step_outputs['unique_ids'], + step_outputs['start_logits'], + step_outputs['end_logits']): + numpy_values = [ + output.numpy() for output in outputs if output is not None] + + for values in zip(*numpy_values): + state.append(RawAggregatedResult( unique_id=values[0], - start_logits=values[1].tolist(), - end_logits=values[2].tolist())) + start_logits=values[1], + end_logits=values[2])) return state - def reduce_aggregated_logs(self, aggregated_logs): + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): all_predictions, _, scores_diff = ( self.squad_lib.postprocess_output( self._eval_examples, @@ -265,29 +289,210 @@ class QuestionAnsweringTask(base_task.Task): self.task_config.validation_data.version_2_with_negative), null_score_diff_threshold=( self.task_config.null_score_diff_threshold), + xlnet_format=self.task_config.validation_data.xlnet_format, verbose=False)) - with tf.io.gfile.GFile( - self.task_config.validation_data.input_path, 'r') as reader: + with tf.io.gfile.GFile(self.task_config.validation_data.input_path, + 'r') as reader: dataset_json = json.load(reader) pred_dataset = dataset_json['data'] if self.task_config.validation_data.version_2_with_negative: - eval_metrics = squad_evaluate_v2_0.evaluate( - pred_dataset, all_predictions, scores_diff) + eval_metrics = squad_evaluate_v2_0.evaluate(pred_dataset, all_predictions, + scores_diff) + eval_metrics = { + 'exact_match': eval_metrics['final_exact'], + 'exact_match_threshold': eval_metrics['final_exact_thresh'], + 'final_f1': eval_metrics['final_f1'] / 100.0, # scale back to [0, 1]. + 'f1_threshold': eval_metrics['final_f1_thresh'], + 'has_answer_exact_match': eval_metrics['HasAns_exact'], + 'has_answer_f1': eval_metrics['HasAns_f1'] + } else: eval_metrics = squad_evaluate_v1_1.evaluate(pred_dataset, all_predictions) + eval_metrics = { + 'exact_match': eval_metrics['exact_match'], + 'final_f1': eval_metrics['final_f1'] + } return eval_metrics - def initialize(self, model): - """Load a pretrained checkpoint (if exists) and then train from iter 0.""" - ckpt_dir_or_file = self.task_config.init_checkpoint - if tf.io.gfile.isdir(ckpt_dir_or_file): - ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) - if not ckpt_dir_or_file: - return - ckpt = tf.train.Checkpoint(**model.checkpoint_items) - status = ckpt.read(ckpt_dir_or_file) - status.expect_partial().assert_existing_objects_matched() - logging.info('Finished loading pretrained checkpoint from %s', - ckpt_dir_or_file) +@dataclasses.dataclass +class XLNetQuestionAnsweringConfig(QuestionAnsweringConfig): + """The config for the XLNet variation of QuestionAnswering.""" + pass + + +@task_factory.register_task_cls(XLNetQuestionAnsweringConfig) +class XLNetQuestionAnsweringTask(QuestionAnsweringTask): + """XLNet variant of the Question Answering Task. + + The main differences include: + - The encoder is an `XLNetBase` class. + - The `SpanLabeling` head is an instance of `XLNetSpanLabeling` which + predicts start/end positions and impossibility score. During inference, + it predicts the top N scores and indexes. + """ + + def build_model(self): + if self.task_config.hub_module_url and self.task_config.init_checkpoint: + raise ValueError('At most one of `hub_module_url` and ' + '`init_checkpoint` can be specified.') + if self.task_config.hub_module_url: + encoder_network = utils.get_encoder_from_hub( + self.task_config.hub_module_url) + else: + encoder_network = encoders.build_encoder(self.task_config.model.encoder) + encoder_cfg = self.task_config.model.encoder.get() + return models.XLNetSpanLabeler( + network=encoder_network, + start_n_top=self.task_config.n_best_size, + end_n_top=self.task_config.n_best_size, + initializer=tf.keras.initializers.RandomNormal( + stddev=encoder_cfg.initializer_range)) + + def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: + start_positions = labels['start_positions'] + end_positions = labels['end_positions'] + is_impossible = labels['is_impossible'] + is_impossible = tf.cast(tf.reshape(is_impossible, [-1]), tf.float32) + + start_logits = model_outputs['start_logits'] + end_logits = model_outputs['end_logits'] + class_logits = model_outputs['class_logits'] + + start_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + start_positions, start_logits) + end_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + end_positions, end_logits) + is_impossible_loss = tf.keras.losses.binary_crossentropy( + is_impossible, class_logits, from_logits=True) + + loss = (tf.reduce_mean(start_loss) + tf.reduce_mean(end_loss)) / 2 + loss += tf.reduce_mean(is_impossible_loss) / 2 + return loss + + def process_metrics(self, metrics, labels, model_outputs): + metrics = dict([(metric.name, metric) for metric in metrics]) + start_logits = model_outputs['start_logits'] + end_logits = model_outputs['end_logits'] + metrics['start_position_accuracy'].update_state(labels['start_positions'], + start_logits) + metrics['end_position_accuracy'].update_state(labels['end_positions'], + end_logits) + + def process_compiled_metrics(self, compiled_metrics, labels, model_outputs): + start_logits = model_outputs['start_logits'] + end_logits = model_outputs['end_logits'] + compiled_metrics.update_state( + y_true=labels, # labels has keys 'start_positions' and 'end_positions'. + y_pred={ + 'start_positions': start_logits, + 'end_positions': end_logits, + }) + + def _dummy_data(self, params, _): + """Returns dummy data.""" + dummy_ids = tf.zeros((1, params.seq_length), dtype=tf.int32) + zero = tf.constant(0, dtype=tf.int32) + x = dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids, + class_index=zero, + is_impossible=zero, + paragraph_mask=dummy_ids, + start_positions=tf.zeros((1), dtype=tf.int32)) + y = dict( + start_positions=tf.zeros((1), dtype=tf.int32), + end_positions=tf.ones((1), dtype=tf.int32), + is_impossible=zero) + return x, y + + def validation_step(self, inputs, model: tf.keras.Model, metrics=None): + features, _ = inputs + unique_ids = features.pop('unique_ids') + model_outputs = self.inference_step(features, model) + start_top_predictions = model_outputs['start_top_predictions'] + end_top_predictions = model_outputs['end_top_predictions'] + start_indexes = model_outputs['start_top_index'] + end_indexes = model_outputs['end_top_index'] + class_logits = model_outputs['class_logits'] + + logs = { + 'unique_ids': unique_ids, + 'start_top_predictions': start_top_predictions, + 'end_top_predictions': end_top_predictions, + 'start_indexes': start_indexes, + 'end_indexes': end_indexes, + 'class_logits': class_logits, + } + return logs + + def aggregate_logs(self, state=None, step_outputs=None): + assert step_outputs is not None, 'Got no logs from self.validation_step.' + if state is None: + state = [] + + for outputs in zip(step_outputs['unique_ids'], + step_outputs['start_top_predictions'], + step_outputs['end_top_predictions'], + step_outputs['start_indexes'], + step_outputs['end_indexes'], + step_outputs['class_logits']): + numpy_values = [ + output.numpy() for output in outputs] + + for (unique_id, start_top_predictions, end_top_predictions, start_indexes, + end_indexes, class_logits) in zip(*numpy_values): + state.append(RawAggregatedResult( + unique_id=unique_id, + start_logits=start_top_predictions.tolist(), + end_logits=end_top_predictions.tolist(), + start_indexes=start_indexes.tolist(), + end_indexes=end_indexes.tolist(), + class_logits=class_logits)) + return state + + +def predict(task: QuestionAnsweringTask, params: cfg.DataConfig, + model: tf.keras.Model): + """Predicts on the input data. + + Args: + task: A `QuestionAnsweringTask` object. + params: A `cfg.DataConfig` object. + model: A keras.Model. + + Returns: + A tuple of `all_predictions`, `all_nbest` and `scores_diff`, which + are dict and can be written to json files including prediction json file, + nbest json file and null_odds json file. + """ + tf_record_input_path, eval_examples, eval_features = ( + task._preprocess_eval_data(params)) # pylint: disable=protected-access + + # `tf_record_input_path` will overwrite `params.input_path`, + # when `task.buid_inputs()` is called. + task.set_preprocessed_eval_input_path(tf_record_input_path) + + def predict_step(inputs): + """Replicated prediction calculation.""" + return task.validation_step(inputs, model) + + dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(), + task.build_inputs, params) + aggregated_outputs = utils.predict(predict_step, task.aggregate_logs, dataset) + + all_predictions, all_nbest, scores_diff = ( + task.squad_lib.postprocess_output( + eval_examples, + eval_features, + aggregated_outputs, + task.task_config.n_best_size, + task.task_config.max_answer_length, + task.task_config.validation_data.do_lower_case, + version_2_with_negative=(params.version_2_with_negative), + null_score_diff_threshold=task.task_config.null_score_diff_threshold, + xlnet_format=task.task_config.validation_data.xlnet_format, + verbose=False)) + return all_predictions, all_nbest, scores_diff diff --git a/official/nlp/tasks/question_answering_test.py b/official/nlp/tasks/question_answering_test.py index 626c3cadb791622b0c1adbb296d5e55c53d8c0a2..aa79e3ae86eaf54dca5318df6fef8ceec48ba703 100644 --- a/official/nlp/tasks/question_answering_test.py +++ b/official/nlp/tasks/question_answering_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,19 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for official.nlp.tasks.question_answering.""" import itertools import json import os + from absl.testing import parameterized import tensorflow as tf -from official.nlp.bert import configs -from official.nlp.bert import export_tfhub from official.nlp.configs import bert from official.nlp.configs import encoders from official.nlp.data import question_answering_dataloader +from official.nlp.tasks import masked_lm from official.nlp.tasks import question_answering @@ -32,21 +31,37 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): super(QuestionAnsweringTaskTest, self).setUp() - self._encoder_config = encoders.TransformerEncoderConfig( - vocab_size=30522, num_layers=1) + self._encoder_config = encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)) self._train_data_config = question_answering_dataloader.QADataConfig( - input_path="dummy", - seq_length=128, - global_batch_size=1) - - val_data = {"version": "1.1", - "data": [{"paragraphs": [ - {"context": "Sky is blue.", - "qas": [{"question": "What is blue?", "id": "1234", - "answers": [{"text": "Sky", "answer_start": 0}, - {"text": "Sky", "answer_start": 0}, - {"text": "Sky", "answer_start": 0}] - }]}]}]} + input_path="dummy", seq_length=128, global_batch_size=1) + + val_data = { + "version": + "1.1", + "data": [{ + "paragraphs": [{ + "context": + "Sky is blue.", + "qas": [{ + "question": + "What is blue?", + "id": + "1234", + "answers": [{ + "text": "Sky", + "answer_start": 0 + }, { + "text": "Sky", + "answer_start": 0 + }, { + "text": "Sky", + "answer_start": 0 + }] + }] + }] + }] + } self._val_input_path = os.path.join(self.get_temp_dir(), "val_data.json") with tf.io.gfile.GFile(self._val_input_path, "w") as writer: writer.write(json.dumps(val_data, indent=4) + "\n") @@ -81,23 +96,27 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase): val_dataset = task.build_inputs(config.validation_data) val_iterator = iter(val_dataset) logs = task.validation_step(next(val_iterator), model, metrics=metrics) + # Mock that `logs` is from one replica. + logs = {x: (logs[x],) for x in logs} logs = task.aggregate_logs(step_outputs=logs) metrics = task.reduce_aggregated_logs(logs) self.assertIn("final_f1", metrics) + model.save(os.path.join(self.get_temp_dir(), "saved_model")) - @parameterized.parameters(itertools.product( - (False, True), - ("WordPiece", "SentencePiece"), - )) + @parameterized.parameters( + itertools.product( + (False, True), + ("WordPiece", "SentencePiece"), + )) def test_task(self, version_2_with_negative, tokenization): # Saves a checkpoint. - pretrain_cfg = bert.BertPretrainerConfig( + pretrain_cfg = bert.PretrainerConfig( encoder=self._encoder_config, cls_heads=[ bert.ClsHeadConfig( inner_dim=10, num_classes=3, name="next_sentence") ]) - pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg) + pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg) ckpt = tf.train.Checkpoint( model=pretrain_model, **pretrain_model.checkpoint_items) saved_path = ckpt.save(self.get_temp_dir()) @@ -110,51 +129,132 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase): version_2_with_negative)) self._run_task(config) - def test_task_with_fit(self): + def _export_bert_tfhub(self): + encoder = encoders.build_encoder( + encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1))) + encoder_inputs_dict = {x.name: x for x in encoder.inputs} + encoder_output_dict = encoder(encoder_inputs_dict) + core_model = tf.keras.Model( + inputs=encoder_inputs_dict, outputs=encoder_output_dict) + hub_destination = os.path.join(self.get_temp_dir(), "hub") + core_model.save(hub_destination, include_optimizer=False, save_format="tf") + return hub_destination + + def test_task_with_hub(self): + hub_module_url = self._export_bert_tfhub() config = question_answering.QuestionAnsweringConfig( + hub_module_url=hub_module_url, model=question_answering.ModelConfig(encoder=self._encoder_config), train_data=self._train_data_config, validation_data=self._get_validation_data_config()) + self._run_task(config) + + @parameterized.named_parameters(("squad1", False), ("squad2", True)) + def test_predict(self, version_2_with_negative): + validation_data = self._get_validation_data_config( + version_2_with_negative=version_2_with_negative) + + config = question_answering.QuestionAnsweringConfig( + model=question_answering.ModelConfig(encoder=self._encoder_config), + train_data=self._train_data_config, + validation_data=validation_data) task = question_answering.QuestionAnsweringTask(config) model = task.build_model() - model = task.compile_model( - model, - optimizer=tf.keras.optimizers.SGD(lr=0.1), - train_step=task.train_step, - metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")]) - dataset = task.build_inputs(config.train_data) - logs = model.fit(dataset, epochs=1, steps_per_epoch=2) - self.assertIn("loss", logs.history) - self.assertIn("start_positions_accuracy", logs.history) - self.assertIn("end_positions_accuracy", logs.history) - def _export_bert_tfhub(self): - bert_config = configs.BertConfig( - vocab_size=30522, - hidden_size=16, - intermediate_size=32, - max_position_embeddings=128, - num_attention_heads=2, - num_hidden_layers=1) - _, encoder = export_tfhub.create_bert_model(bert_config) - model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") - checkpoint = tf.train.Checkpoint(model=encoder) - checkpoint.save(os.path.join(model_checkpoint_dir, "test")) - model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) - - vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt") - with tf.io.gfile.GFile(vocab_file, "w") as f: - f.write("dummy content") + all_predictions, all_nbest, scores_diff = question_answering.predict( + task, validation_data, model) + self.assertLen(all_predictions, 1) + self.assertLen(all_nbest, 1) + if version_2_with_negative: + self.assertLen(scores_diff, 1) + else: + self.assertEmpty(scores_diff) - hub_destination = os.path.join(self.get_temp_dir(), "hub") - export_tfhub.export_bert_tfhub(bert_config, model_checkpoint_path, - hub_destination, vocab_file) - return hub_destination - def test_task_with_hub(self): - hub_module_url = self._export_bert_tfhub() - config = question_answering.QuestionAnsweringConfig( - hub_module_url=hub_module_url, +class XLNetQuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + super(XLNetQuestionAnsweringTaskTest, self).setUp() + self._encoder_config = encoders.EncoderConfig( + type="xlnet", + xlnet=encoders.XLNetEncoderConfig(vocab_size=30522, num_layers=1)) + self._train_data_config = question_answering_dataloader.QADataConfig( + input_path="dummy", seq_length=128, + global_batch_size=2, xlnet_format=True) + + val_data = { + "version": + "2.0", + "data": [{ + "paragraphs": [{ + "context": + "Sky is blue.", + "qas": [{ + "question": + "What is blue?", + "id": + "1234", + "answers": [{ + "text": "Sky", + "answer_start": 0 + }, { + "text": "Sky", + "answer_start": 0 + }, { + "text": "Sky", + "answer_start": 0 + }] + }] + }] + }] + } + self._val_input_path = os.path.join(self.get_temp_dir(), "val_data.json") + with tf.io.gfile.GFile(self._val_input_path, "w") as writer: + writer.write(json.dumps(val_data, indent=4) + "\n") + + self._test_vocab = os.path.join(self.get_temp_dir(), "vocab.txt") + with tf.io.gfile.GFile(self._test_vocab, "w") as writer: + writer.write("[PAD]\n[UNK]\n[CLS]\n[SEP]\n[MASK]\nsky\nis\nblue\n") + + def _get_validation_data_config(self): + return question_answering_dataloader.QADataConfig( + is_training=False, + input_path=self._val_input_path, + input_preprocessed_data_path=self.get_temp_dir(), + seq_length=128, + global_batch_size=2, + version_2_with_negative=True, + vocab_file=self._test_vocab, + tokenization="WordPiece", + do_lower_case=True, + xlnet_format=True) + + def _run_task(self, config): + task = question_answering.XLNetQuestionAnsweringTask(config) + model = task.build_model() + metrics = task.build_metrics() + task.initialize(model) + + train_dataset = task.build_inputs(config.train_data) + train_iterator = iter(train_dataset) + optimizer = tf.keras.optimizers.SGD(lr=0.1) + task.train_step(next(train_iterator), model, optimizer, metrics=metrics) + + val_dataset = task.build_inputs(config.validation_data) + val_iterator = iter(val_dataset) + logs = task.validation_step(next(val_iterator), model, metrics=metrics) + # Mock that `logs` is from one replica. + logs = {x: (logs[x],) for x in logs} + logs = task.aggregate_logs(step_outputs=logs) + metrics = task.reduce_aggregated_logs(logs) + self.assertIn("final_f1", metrics) + self.assertNotIn("loss", metrics) + + def test_task(self): + config = question_answering.XLNetQuestionAnsweringConfig( + init_checkpoint="", + n_best_size=5, model=question_answering.ModelConfig(encoder=self._encoder_config), train_data=self._train_data_config, validation_data=self._get_validation_data_config()) diff --git a/official/nlp/tasks/sentence_prediction.py b/official/nlp/tasks/sentence_prediction.py index 38d1dd4c002d4755d17cc292a90d66d4be24f055..64b9835fa6df7794b50e03fdf6eb06b8b5426690 100644 --- a/official/nlp/tasks/sentence_prediction.py +++ b/official/nlp/tasks/sentence_prediction.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,32 +11,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Sentence prediction (classification) task.""" +from typing import List, Union, Optional + from absl import logging import dataclasses import numpy as np +import orbit from scipy import stats from sklearn import metrics as sklearn_metrics import tensorflow as tf -import tensorflow_hub as hub from official.core import base_task +from official.core import config_definitions as cfg +from official.core import task_factory +from official.modeling import tf_utils from official.modeling.hyperparams import base_config -from official.modeling.hyperparams import config_definitions as cfg from official.nlp.configs import encoders from official.nlp.data import data_loader_factory from official.nlp.modeling import models from official.nlp.tasks import utils +METRIC_TYPES = frozenset( + ['accuracy', 'matthews_corrcoef', 'pearson_spearman_corr']) + @dataclasses.dataclass class ModelConfig(base_config.Config): """A classifier/regressor configuration.""" num_classes: int = 0 use_encoder_pooler: bool = False - encoder: encoders.TransformerEncoderConfig = ( - encoders.TransformerEncoderConfig()) + encoder: encoders.EncoderConfig = encoders.EncoderConfig() @dataclasses.dataclass @@ -55,43 +60,50 @@ class SentencePredictionConfig(cfg.TaskConfig): validation_data: cfg.DataConfig = cfg.DataConfig() -@base_task.register_task_cls(SentencePredictionConfig) +@task_factory.register_task_cls(SentencePredictionConfig) class SentencePredictionTask(base_task.Task): """Task object for sentence_prediction.""" - def __init__(self, params=cfg.TaskConfig, logging_dir=None): - super(SentencePredictionTask, self).__init__(params, logging_dir) - if params.hub_module_url and params.init_checkpoint: - raise ValueError('At most one of `hub_module_url` and ' - '`init_checkpoint` can be specified.') - if params.hub_module_url: - self._hub_module = hub.load(params.hub_module_url) - else: - self._hub_module = None + def __init__(self, params: cfg.TaskConfig, logging_dir=None, name=None): + super().__init__(params, logging_dir, name=name) + if params.metric_type not in METRIC_TYPES: + raise ValueError('Invalid metric_type: {}'.format(params.metric_type)) self.metric_type = params.metric_type def build_model(self): - if self._hub_module: - encoder_network = utils.get_encoder_from_hub(self._hub_module) + if self.task_config.hub_module_url and self.task_config.init_checkpoint: + raise ValueError('At most one of `hub_module_url` and ' + '`init_checkpoint` can be specified.') + if self.task_config.hub_module_url: + encoder_network = utils.get_encoder_from_hub( + self.task_config.hub_module_url) else: - encoder_network = encoders.instantiate_encoder_from_cfg( - self.task_config.model.encoder) - - # Currently, we only supports bert-style sentence prediction finetuning. - return models.BertClassifier( - network=encoder_network, - num_classes=self.task_config.model.num_classes, - initializer=tf.keras.initializers.TruncatedNormal( - stddev=self.task_config.model.encoder.initializer_range), - use_encoder_pooler=self.task_config.model.use_encoder_pooler) + encoder_network = encoders.build_encoder(self.task_config.model.encoder) + encoder_cfg = self.task_config.model.encoder.get() + if self.task_config.model.encoder.type == 'xlnet': + return models.XLNetClassifier( + network=encoder_network, + num_classes=self.task_config.model.num_classes, + initializer=tf.keras.initializers.RandomNormal( + stddev=encoder_cfg.initializer_range)) + else: + return models.BertClassifier( + network=encoder_network, + num_classes=self.task_config.model.num_classes, + initializer=tf.keras.initializers.TruncatedNormal( + stddev=encoder_cfg.initializer_range), + use_encoder_pooler=self.task_config.model.use_encoder_pooler) def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: - loss = tf.keras.losses.sparse_categorical_crossentropy( - labels, tf.cast(model_outputs, tf.float32), from_logits=True) + if self.task_config.model.num_classes == 1: + loss = tf.keras.losses.mean_squared_error(labels, model_outputs) + else: + loss = tf.keras.losses.sparse_categorical_crossentropy( + labels, tf.cast(model_outputs, tf.float32), from_logits=True) if aux_losses: loss += tf.add_n(aux_losses) - return tf.reduce_mean(loss) + return tf_utils.safe_mean(loss) def build_inputs(self, params, input_context=None): """Returns tf.data.Dataset for sentence_prediction task.""" @@ -103,8 +115,12 @@ class SentencePredictionTask(base_task.Task): input_word_ids=dummy_ids, input_mask=dummy_ids, input_type_ids=dummy_ids) - y = tf.zeros((1, 1), dtype=tf.int32) - return (x, y) + + if self.task_config.model.num_classes == 1: + y = tf.zeros((1,), dtype=tf.float32) + else: + y = tf.zeros((1, 1), dtype=tf.int32) + return x, y dataset = tf.data.Dataset.range(1) dataset = dataset.repeat() @@ -116,7 +132,12 @@ class SentencePredictionTask(base_task.Task): def build_metrics(self, training=None): del training - metrics = [tf.keras.metrics.SparseCategoricalAccuracy(name='cls_accuracy')] + if self.task_config.model.num_classes == 1: + metrics = [tf.keras.metrics.MeanSquaredError()] + else: + metrics = [ + tf.keras.metrics.SparseCategoricalAccuracy(name='cls_accuracy') + ] return metrics def process_metrics(self, metrics, labels, model_outputs): @@ -137,8 +158,8 @@ class SentencePredictionTask(base_task.Task): logs = {self.loss: loss} if self.metric_type == 'matthews_corrcoef': logs.update({ - 'sentence_prediction': - tf.expand_dims(tf.math.argmax(outputs, axis=1), axis=0), + 'sentence_prediction': # Ensure one prediction along batch dimension. + tf.expand_dims(tf.math.argmax(outputs, axis=1), axis=1), 'labels': labels, }) @@ -161,16 +182,22 @@ class SentencePredictionTask(base_task.Task): np.concatenate([v.numpy() for v in step_outputs['labels']], axis=0)) return state - def reduce_aggregated_logs(self, aggregated_logs): - if self.metric_type == 'matthews_corrcoef': + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): + if self.metric_type == 'accuracy': + return None + elif self.metric_type == 'matthews_corrcoef': preds = np.concatenate(aggregated_logs['sentence_prediction'], axis=0) + preds = np.reshape(preds, -1) labels = np.concatenate(aggregated_logs['labels'], axis=0) + labels = np.reshape(labels, -1) return { self.metric_type: sklearn_metrics.matthews_corrcoef(preds, labels) } - if self.metric_type == 'pearson_spearman_corr': + elif self.metric_type == 'pearson_spearman_corr': preds = np.concatenate(aggregated_logs['sentence_prediction'], axis=0) + preds = np.reshape(preds, -1) labels = np.concatenate(aggregated_logs['labels'], axis=0) + labels = np.reshape(labels, -1) pearson_corr = stats.pearsonr(preds, labels)[0] spearman_corr = stats.spearmanr(preds, labels)[0] corr_metric = (pearson_corr + spearman_corr) / 2 @@ -187,9 +214,8 @@ class SentencePredictionTask(base_task.Task): pretrain2finetune_mapping = { 'encoder': model.checkpoint_items['encoder'], } - # TODO(b/160251903): Investigate why no pooler dense improves finetuning - # accuracies. if self.task_config.init_cls_pooler: + # This option is valid when use_encoder_pooler is false. pretrain2finetune_mapping[ 'next_sentence.pooler_dense'] = model.checkpoint_items[ 'sentence_prediction.pooler_dense'] @@ -198,3 +224,70 @@ class SentencePredictionTask(base_task.Task): status.expect_partial().assert_existing_objects_matched() logging.info('Finished loading pretrained checkpoint from %s', ckpt_dir_or_file) + + +def predict(task: SentencePredictionTask, + params: cfg.DataConfig, + model: tf.keras.Model, + params_aug: Optional[cfg.DataConfig] = None, + test_time_aug_wgt: float = 0.3) -> List[Union[int, float]]: + """Predicts on the input data. + + Args: + task: A `SentencePredictionTask` object. + params: A `cfg.DataConfig` object. + model: A keras.Model. + params_aug: A `cfg.DataConfig` object for augmented data. + test_time_aug_wgt: Test time augmentation weight. The prediction score will + use (1. - test_time_aug_wgt) original prediction plus test_time_aug_wgt + augmented prediction. + + Returns: + A list of predictions with length of `num_examples`. For regression task, + each element in the list is the predicted score; for classification task, + each element is the predicted class id. + """ + + def predict_step(inputs): + """Replicated prediction calculation.""" + x, _ = inputs + example_id = x.pop('example_id') + outputs = task.inference_step(x, model) + return dict(example_id=example_id, predictions=outputs) + + def aggregate_fn(state, outputs): + """Concatenates model's outputs.""" + if state is None: + state = [] + + for per_replica_example_id, per_replica_batch_predictions in zip( + outputs['example_id'], outputs['predictions']): + state.extend(zip(per_replica_example_id, per_replica_batch_predictions)) + return state + + dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(), + task.build_inputs, params) + outputs = utils.predict(predict_step, aggregate_fn, dataset) + + # When running on TPU POD, the order of output cannot be maintained, + # so we need to sort by example_id. + outputs = sorted(outputs, key=lambda x: x[0]) + is_regression = task.task_config.model.num_classes == 1 + if params_aug is not None: + dataset_aug = orbit.utils.make_distributed_dataset( + tf.distribute.get_strategy(), task.build_inputs, params_aug) + outputs_aug = utils.predict(predict_step, aggregate_fn, dataset_aug) + outputs_aug = sorted(outputs_aug, key=lambda x: x[0]) + if is_regression: + return [(1. - test_time_aug_wgt) * x[1] + test_time_aug_wgt * y[1] + for x, y in zip(outputs, outputs_aug)] + else: + return [ + tf.argmax( + (1. - test_time_aug_wgt) * x[1] + test_time_aug_wgt * y[1], + axis=-1) for x, y in zip(outputs, outputs_aug) + ] + if is_regression: + return [x[1] for x in outputs] + else: + return [tf.argmax(x[1], axis=-1) for x in outputs] diff --git a/official/nlp/tasks/sentence_prediction_test.py b/official/nlp/tasks/sentence_prediction_test.py index 3bb2f0a0a82b8dbe72e22dc030c948b4c5decef5..94d056fee6b059ac96e0de01780de9499d612934 100644 --- a/official/nlp/tasks/sentence_prediction_test.py +++ b/official/nlp/tasks/sentence_prediction_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,22 +11,52 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for official.nlp.tasks.sentence_prediction.""" import functools import os from absl.testing import parameterized +import numpy as np import tensorflow as tf -from official.nlp.bert import configs -from official.nlp.bert import export_tfhub from official.nlp.configs import bert from official.nlp.configs import encoders from official.nlp.data import sentence_prediction_dataloader +from official.nlp.tasks import masked_lm from official.nlp.tasks import sentence_prediction +def _create_fake_dataset(output_path, seq_length, num_classes, num_examples): + """Creates a fake dataset.""" + writer = tf.io.TFRecordWriter(output_path) + + def create_int_feature(values): + return tf.train.Feature(int64_list=tf.train.Int64List(value=list(values))) + + def create_float_feature(values): + return tf.train.Feature(float_list=tf.train.FloatList(value=list(values))) + + for i in range(num_examples): + features = {} + input_ids = np.random.randint(100, size=(seq_length)) + features["input_ids"] = create_int_feature(input_ids) + features["input_mask"] = create_int_feature(np.ones_like(input_ids)) + features["segment_ids"] = create_int_feature(np.ones_like(input_ids)) + features["segment_ids"] = create_int_feature(np.ones_like(input_ids)) + features["example_id"] = create_int_feature([i]) + + if num_classes == 1: + features["label_ids"] = create_float_feature([np.random.random()]) + else: + features["label_ids"] = create_int_feature( + [np.random.random_integers(0, num_classes - 1, size=())]) + + tf_example = tf.train.Example(features=tf.train.Features(feature=features)) + writer.write(tf_example.SerializeToString()) + writer.close() + + class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): @@ -38,8 +67,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): def get_model_config(self, num_classes): return sentence_prediction.ModelConfig( - encoder=encoders.TransformerEncoderConfig( - vocab_size=30522, num_layers=1), + encoder=encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)), num_classes=num_classes) def _run_task(self, config): @@ -48,42 +77,87 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): metrics = task.build_metrics() strategy = tf.distribute.get_strategy() - dataset = strategy.experimental_distribute_datasets_from_function( + dataset = strategy.distribute_datasets_from_function( functools.partial(task.build_inputs, config.train_data)) iterator = iter(dataset) optimizer = tf.keras.optimizers.SGD(lr=0.1) task.train_step(next(iterator), model, optimizer, metrics=metrics) + model.save(os.path.join(self.get_temp_dir(), "saved_model")) + return task.validation_step(next(iterator), model, metrics=metrics) + + @parameterized.named_parameters( + ("init_cls_pooler", True), + ("init_encoder", False), + ) + def test_task(self, init_cls_pooler): + # Saves a checkpoint. + pretrain_cfg = bert.PretrainerConfig( + encoder=encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)), + cls_heads=[ + bert.ClsHeadConfig( + inner_dim=768, num_classes=2, name="next_sentence") + ]) + pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg) + # The model variables will be created after the forward call. + _ = pretrain_model(pretrain_model.inputs) + ckpt = tf.train.Checkpoint( + model=pretrain_model, **pretrain_model.checkpoint_items) + init_path = ckpt.save(self.get_temp_dir()) + + # Creates the task. + config = sentence_prediction.SentencePredictionConfig( + init_checkpoint=init_path, + model=self.get_model_config(num_classes=2), + train_data=self._train_data_config, + init_cls_pooler=init_cls_pooler) + task = sentence_prediction.SentencePredictionTask(config) + model = task.build_model() + metrics = task.build_metrics() + dataset = task.build_inputs(config.train_data) + + iterator = iter(dataset) + optimizer = tf.keras.optimizers.SGD(lr=0.1) + task.initialize(model) + task.train_step(next(iterator), model, optimizer, metrics=metrics) task.validation_step(next(iterator), model, metrics=metrics) - def test_task(self): + @parameterized.named_parameters( + { + "testcase_name": "regression", + "num_classes": 1, + }, + { + "testcase_name": "classification", + "num_classes": 2, + }, + ) + def test_metrics_and_losses(self, num_classes): config = sentence_prediction.SentencePredictionConfig( init_checkpoint=self.get_temp_dir(), - model=self.get_model_config(2), + model=self.get_model_config(num_classes), train_data=self._train_data_config) task = sentence_prediction.SentencePredictionTask(config) model = task.build_model() metrics = task.build_metrics() - dataset = task.build_inputs(config.train_data) + if num_classes == 1: + self.assertIsInstance(metrics[0], tf.keras.metrics.MeanSquaredError) + else: + self.assertIsInstance(metrics[0], + tf.keras.metrics.SparseCategoricalAccuracy) + dataset = task.build_inputs(config.train_data) iterator = iter(dataset) optimizer = tf.keras.optimizers.SGD(lr=0.1) task.train_step(next(iterator), model, optimizer, metrics=metrics) - task.validation_step(next(iterator), model, metrics=metrics) - # Saves a checkpoint. - pretrain_cfg = bert.BertPretrainerConfig( - encoder=encoders.TransformerEncoderConfig( - vocab_size=30522, num_layers=1), - cls_heads=[ - bert.ClsHeadConfig( - inner_dim=10, num_classes=3, name="next_sentence") - ]) - pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg) - ckpt = tf.train.Checkpoint( - model=pretrain_model, **pretrain_model.checkpoint_items) - ckpt.save(config.init_checkpoint) - task.initialize(model) + logs = task.validation_step(next(iterator), model, metrics=metrics) + loss = logs["loss"].numpy() + if num_classes == 1: + self.assertGreater(loss, 1.0) + else: + self.assertLess(loss, 1.0) @parameterized.parameters(("matthews_corrcoef", 2), ("pearson_spearman_corr", 1)) @@ -108,41 +182,44 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): aggregated = task.aggregate_logs(state=aggregated, step_outputs=outputs) self.assertIn(metric_type, task.reduce_aggregated_logs(aggregated)) - def test_task_with_fit(self): + def test_np_metrics_cola_partial_batch(self): + train_data_path = os.path.join(self.get_temp_dir(), "train.tf_record") + num_examples = 5 + global_batch_size = 8 + seq_length = 16 + _create_fake_dataset( + train_data_path, + seq_length=seq_length, + num_classes=2, + num_examples=num_examples) + + train_data_config = ( + sentence_prediction_dataloader.SentencePredictionDataConfig( + input_path=train_data_path, + seq_length=seq_length, + is_training=True, + label_type="int", + global_batch_size=global_batch_size, + drop_remainder=False, + include_example_id=True)) + config = sentence_prediction.SentencePredictionConfig( - model=self.get_model_config(2), train_data=self._train_data_config) - task = sentence_prediction.SentencePredictionTask(config) - model = task.build_model() - model = task.compile_model( - model, - optimizer=tf.keras.optimizers.SGD(lr=0.1), - train_step=task.train_step, - metrics=task.build_metrics()) - dataset = task.build_inputs(config.train_data) - logs = model.fit(dataset, epochs=1, steps_per_epoch=2) - self.assertIn("loss", logs.history) + metric_type="matthews_corrcoef", + model=self.get_model_config(2), + train_data=train_data_config) + outputs = self._run_task(config) + self.assertEqual(outputs["sentence_prediction"].shape.as_list(), [8, 1]) def _export_bert_tfhub(self): - bert_config = configs.BertConfig( - vocab_size=30522, - hidden_size=16, - intermediate_size=32, - max_position_embeddings=128, - num_attention_heads=2, - num_hidden_layers=1) - _, encoder = export_tfhub.create_bert_model(bert_config) - model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") - checkpoint = tf.train.Checkpoint(model=encoder) - checkpoint.save(os.path.join(model_checkpoint_dir, "test")) - model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) - - vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt") - with tf.io.gfile.GFile(vocab_file, "w") as f: - f.write("dummy content") - + encoder = encoders.build_encoder( + encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1))) + encoder_inputs_dict = {x.name: x for x in encoder.inputs} + encoder_output_dict = encoder(encoder_inputs_dict) + core_model = tf.keras.Model( + inputs=encoder_inputs_dict, outputs=encoder_output_dict) hub_destination = os.path.join(self.get_temp_dir(), "hub") - export_tfhub.export_bert_tfhub(bert_config, model_checkpoint_path, - hub_destination, vocab_file) + core_model.save(hub_destination, include_optimizer=False, save_format="tf") return hub_destination def test_task_with_hub(self): @@ -153,6 +230,39 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): train_data=self._train_data_config) self._run_task(config) + @parameterized.named_parameters(("classification", 5), ("regression", 1)) + def test_prediction(self, num_classes): + task_config = sentence_prediction.SentencePredictionConfig( + model=self.get_model_config(num_classes=num_classes), + train_data=self._train_data_config) + task = sentence_prediction.SentencePredictionTask(task_config) + model = task.build_model() + + test_data_path = os.path.join(self.get_temp_dir(), "test.tf_record") + seq_length = 16 + num_examples = 100 + _create_fake_dataset( + test_data_path, + seq_length=seq_length, + num_classes=num_classes, + num_examples=num_examples) + + test_data_config = ( + sentence_prediction_dataloader.SentencePredictionDataConfig( + input_path=test_data_path, + seq_length=seq_length, + is_training=False, + label_type="int" if num_classes > 1 else "float", + global_batch_size=16, + drop_remainder=False, + include_example_id=True)) + + predictions = sentence_prediction.predict(task, test_data_config, model) + self.assertLen(predictions, num_examples) + for prediction in predictions: + self.assertEqual(prediction.dtype, + tf.int64 if num_classes > 1 else tf.float32) + if __name__ == "__main__": tf.test.main() diff --git a/official/nlp/tasks/tagging.py b/official/nlp/tasks/tagging.py index 4f8debccb07f9b644cb1815657d98e7932c3c52f..bf6a3b7b1828fc9ca7e5d6f1d95f0f3d8f8c224a 100644 --- a/official/nlp/tasks/tagging.py +++ b/official/nlp/tasks/tagging.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tagging (e.g., NER/POS) task.""" -import logging from typing import List, Optional, Tuple import dataclasses @@ -23,11 +21,11 @@ import orbit from seqeval import metrics as seqeval_metrics import tensorflow as tf -import tensorflow_hub as hub from official.core import base_task +from official.core import config_definitions as cfg +from official.core import task_factory from official.modeling.hyperparams import base_config -from official.modeling.hyperparams import config_definitions as cfg from official.nlp.configs import encoders from official.nlp.data import data_loader_factory from official.nlp.modeling import models @@ -37,8 +35,7 @@ from official.nlp.tasks import utils @dataclasses.dataclass class ModelConfig(base_config.Config): """A base span labeler configuration.""" - encoder: encoders.TransformerEncoderConfig = ( - encoders.TransformerEncoderConfig()) + encoder: encoders.EncoderConfig = encoders.EncoderConfig() head_dropout: float = 0.1 head_initializer_range: float = 0.02 @@ -81,29 +78,19 @@ def _masked_labels_and_weights(y_true): return masked_y_true, tf.cast(mask, tf.float32) -@base_task.register_task_cls(TaggingConfig) +@task_factory.register_task_cls(TaggingConfig) class TaggingTask(base_task.Task): """Task object for tagging (e.g., NER or POS).""" - def __init__(self, params=cfg.TaskConfig, logging_dir=None): - super(TaggingTask, self).__init__(params, logging_dir) - if params.hub_module_url and params.init_checkpoint: + def build_model(self): + if self.task_config.hub_module_url and self.task_config.init_checkpoint: raise ValueError('At most one of `hub_module_url` and ' '`init_checkpoint` can be specified.') - if not params.class_names: - raise ValueError('TaggingConfig.class_names cannot be empty.') - - if params.hub_module_url: - self._hub_module = hub.load(params.hub_module_url) - else: - self._hub_module = None - - def build_model(self): - if self._hub_module: - encoder_network = utils.get_encoder_from_hub(self._hub_module) + if self.task_config.hub_module_url: + encoder_network = utils.get_encoder_from_hub( + self.task_config.hub_module_url) else: - encoder_network = encoders.instantiate_encoder_from_cfg( - self.task_config.model.encoder) + encoder_network = encoders.build_encoder(self.task_config.model.encoder) return models.BertTokenClassifier( network=encoder_network, @@ -111,13 +98,14 @@ class TaggingTask(base_task.Task): initializer=tf.keras.initializers.TruncatedNormal( stddev=self.task_config.model.head_initializer_range), dropout_rate=self.task_config.model.head_dropout, - output='logits') + output='logits', + output_encoder_outputs=True) def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: - model_outputs = tf.cast(model_outputs, tf.float32) + logits = tf.cast(model_outputs['logits'], tf.float32) masked_labels, masked_weights = _masked_labels_and_weights(labels) loss = tf.keras.losses.sparse_categorical_crossentropy( - masked_labels, model_outputs, from_logits=True) + masked_labels, logits, from_logits=True) numerator_loss = tf.reduce_sum(loss * masked_weights) denominator_loss = tf.reduce_sum(masked_weights) loss = tf.math.divide_no_nan(numerator_loss, denominator_loss) @@ -152,8 +140,9 @@ class TaggingTask(base_task.Task): def inference_step(self, inputs, model: tf.keras.Model): """Performs the forward step.""" - logits = model(inputs, training=False) - return {'logits': logits, 'predict_ids': tf.argmax(logits, axis=-1)} + logits = model(inputs, training=False)['logits'] + return {'logits': logits, + 'predict_ids': tf.argmax(logits, axis=-1, output_type=tf.int32)} def validation_step(self, inputs, model: tf.keras.Model, metrics=None): """Validatation step. @@ -168,7 +157,7 @@ class TaggingTask(base_task.Task): """ features, labels = inputs outputs = self.inference_step(features, model) - loss = self.build_losses(labels=labels, model_outputs=outputs['logits']) + loss = self.build_losses(labels=labels, model_outputs=outputs) # Negative label ids are padding labels which should be ignored. real_label_index = tf.where(tf.greater_equal(labels, 0)) @@ -200,7 +189,7 @@ class TaggingTask(base_task.Task): state['label_class'].extend(id_to_class_name(step_outputs['label_ids'])) return state - def reduce_aggregated_logs(self, aggregated_logs): + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): """Reduces aggregated logs over validation steps.""" label_class = aggregated_logs['label_class'] predict_class = aggregated_logs['predict_class'] @@ -215,23 +204,10 @@ class TaggingTask(base_task.Task): seqeval_metrics.accuracy_score(label_class, predict_class), } - def initialize(self, model): - """Load a pretrained checkpoint (if exists) and then train from iter 0.""" - ckpt_dir_or_file = self.task_config.init_checkpoint - if tf.io.gfile.isdir(ckpt_dir_or_file): - ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) - if not ckpt_dir_or_file: - return - ckpt = tf.train.Checkpoint(**model.checkpoint_items) - status = ckpt.restore(ckpt_dir_or_file) - status.expect_partial().assert_existing_objects_matched() - logging.info('Finished loading pretrained checkpoint from %s', - ckpt_dir_or_file) - - -def predict(task: TaggingTask, params: cfg.DataConfig, - model: tf.keras.Model) -> Tuple[List[List[int]], List[int]]: +def predict(task: TaggingTask, + params: cfg.DataConfig, + model: tf.keras.Model) -> List[Tuple[int, int, List[int]]]: """Predicts on the input data. Args: @@ -240,55 +216,50 @@ def predict(task: TaggingTask, params: cfg.DataConfig, model: A keras.Model. Returns: - A tuple of `predict_ids` and `sentence_ids`, which are list with length - of `num_examples`. Each element in `predict_ids` is a sequence of - predicted per-word label id, and each element in `sentence_ids` is the - sentence id of the corresponding example. + A list of tuple. Each tuple contains `sentence_id`, `sub_sentence_id` and + a list of predicted ids. """ - @tf.function - def predict_step(iterator): - """Predicts on distributed devices.""" - - def _replicated_step(inputs): - """Replicated prediction calculation.""" - x, y = inputs - sentence_ids = x.pop('sentence_id') - outputs = task.inference_step(x, model) - predict_ids = outputs['predict_ids'] - label_mask = tf.greater_equal(y, 0) - return dict( - predict_ids=predict_ids, - label_mask=label_mask, - sentence_ids=sentence_ids) - - outputs = tf.distribute.get_strategy().experimental_run_v2( - _replicated_step, args=(next(iterator),)) - return tf.nest.map_structure( - tf.distribute.get_strategy().experimental_local_results, outputs) - - def reduce_fn(state, outputs): + def predict_step(inputs): + """Replicated prediction calculation.""" + x, y = inputs + sentence_ids = x.pop('sentence_id') + sub_sentence_ids = x.pop('sub_sentence_id') + outputs = task.inference_step(x, model) + predict_ids = outputs['predict_ids'] + label_mask = tf.greater_equal(y, 0) + return dict( + predict_ids=predict_ids, + label_mask=label_mask, + sentence_ids=sentence_ids, + sub_sentence_ids=sub_sentence_ids) + + def aggregate_fn(state, outputs): """Concatenates model's outputs.""" - cur_predict_ids, cur_sentence_ids = state - for batch_predict_ids, batch_label_mask, batch_sentence_ids in zip( - outputs['predict_ids'], outputs['label_mask'], - outputs['sentence_ids']): - for tmp_predict_ids, tmp_label_mask, tmp_sentence_id in zip( - batch_predict_ids.numpy(), batch_label_mask.numpy(), - batch_sentence_ids.numpy()): - cur_sentence_ids.append(tmp_sentence_id) - cur_predict_ids.append([]) + if state is None: + state = [] + + for (batch_predict_ids, batch_label_mask, batch_sentence_ids, + batch_sub_sentence_ids) in zip(outputs['predict_ids'], + outputs['label_mask'], + outputs['sentence_ids'], + outputs['sub_sentence_ids']): + for (tmp_predict_ids, tmp_label_mask, tmp_sentence_id, + tmp_sub_sentence_id) in zip(batch_predict_ids.numpy(), + batch_label_mask.numpy(), + batch_sentence_ids.numpy(), + batch_sub_sentence_ids.numpy()): + real_predict_ids = [] assert len(tmp_predict_ids) == len(tmp_label_mask) for i in range(len(tmp_predict_ids)): # Skip the padding label. if tmp_label_mask[i]: - cur_predict_ids[-1].append(tmp_predict_ids[i]) - return cur_predict_ids, cur_sentence_ids + real_predict_ids.append(tmp_predict_ids[i]) + state.append((tmp_sentence_id, tmp_sub_sentence_id, real_predict_ids)) + + return state - loop_fn = orbit.utils.create_loop_fn(predict_step) dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(), task.build_inputs, params) - # Set `num_steps` to -1 to exhaust the dataset. - predict_ids, sentence_ids = loop_fn( - iter(dataset), num_steps=-1, state=([], []), reduce_fn=reduce_fn) - return predict_ids, sentence_ids + outputs = utils.predict(predict_step, aggregate_fn, dataset) + return sorted(outputs, key=lambda x: (x[0], x[1])) diff --git a/official/nlp/tasks/tagging_test.py b/official/nlp/tasks/tagging_test.py index a3ea999c42fcfb5327868914ed3a48a96b30a79a..98ac97627abdbfb89fe6a55ef87b5e6f89c67b1a 100644 --- a/official/nlp/tasks/tagging_test.py +++ b/official/nlp/tasks/tagging_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,17 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for official.nlp.tasks.tagging.""" import functools import os + import numpy as np import tensorflow as tf -from official.nlp.bert import configs -from official.nlp.bert import export_tfhub from official.nlp.configs import encoders -from official.nlp.data import tagging_data_loader +from official.nlp.data import tagging_dataloader from official.nlp.tasks import tagging @@ -43,6 +41,7 @@ def _create_fake_dataset(output_path, seq_length, num_labels, num_examples): features["label_ids"] = create_int_feature( np.random.random_integers(-1, num_labels - 1, size=(seq_length))) features["sentence_id"] = create_int_feature([i]) + features["sub_sentence_id"] = create_int_feature([0]) tf_example = tf.train.Example(features=tf.train.Features(feature=features)) writer.write(tf_example.SerializeToString()) @@ -53,9 +52,9 @@ class TaggingTest(tf.test.TestCase): def setUp(self): super(TaggingTest, self).setUp() - self._encoder_config = encoders.TransformerEncoderConfig( - vocab_size=30522, num_layers=1) - self._train_data_config = tagging_data_loader.TaggingDataConfig( + self._encoder_config = encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)) + self._train_data_config = tagging_dataloader.TaggingDataConfig( input_path="dummy", seq_length=128, global_batch_size=1) def _run_task(self, config): @@ -64,17 +63,18 @@ class TaggingTest(tf.test.TestCase): metrics = task.build_metrics() strategy = tf.distribute.get_strategy() - dataset = strategy.experimental_distribute_datasets_from_function( + dataset = strategy.distribute_datasets_from_function( functools.partial(task.build_inputs, config.train_data)) iterator = iter(dataset) optimizer = tf.keras.optimizers.SGD(lr=0.1) task.train_step(next(iterator), model, optimizer, metrics=metrics) task.validation_step(next(iterator), model, metrics=metrics) + model.save(os.path.join(self.get_temp_dir(), "saved_model")) def test_task(self): # Saves a checkpoint. - encoder = encoders.instantiate_encoder_from_cfg(self._encoder_config) + encoder = encoders.build_encoder(self._encoder_config) ckpt = tf.train.Checkpoint(encoder=encoder) saved_path = ckpt.save(self.get_temp_dir()) @@ -94,45 +94,16 @@ class TaggingTest(tf.test.TestCase): task.validation_step(next(iterator), model, metrics=metrics) task.initialize(model) - def test_task_with_fit(self): - config = tagging.TaggingConfig( - model=tagging.ModelConfig(encoder=self._encoder_config), - train_data=self._train_data_config, - class_names=["O", "B-PER", "I-PER"]) - - task = tagging.TaggingTask(config) - model = task.build_model() - model = task.compile_model( - model, - optimizer=tf.keras.optimizers.SGD(lr=0.1), - train_step=task.train_step, - metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")]) - dataset = task.build_inputs(config.train_data) - logs = model.fit(dataset, epochs=1, steps_per_epoch=2) - self.assertIn("loss", logs.history) - self.assertIn("accuracy", logs.history) - def _export_bert_tfhub(self): - bert_config = configs.BertConfig( - vocab_size=30522, - hidden_size=16, - intermediate_size=32, - max_position_embeddings=128, - num_attention_heads=2, - num_hidden_layers=1) - _, encoder = export_tfhub.create_bert_model(bert_config) - model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") - checkpoint = tf.train.Checkpoint(model=encoder) - checkpoint.save(os.path.join(model_checkpoint_dir, "test")) - model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) - - vocab_file = os.path.join(self.get_temp_dir(), "uncased_vocab.txt") - with tf.io.gfile.GFile(vocab_file, "w") as f: - f.write("dummy content") - + encoder = encoders.build_encoder( + encoders.EncoderConfig( + bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1))) + encoder_inputs_dict = {x.name: x for x in encoder.inputs} + encoder_output_dict = encoder(encoder_inputs_dict) + core_model = tf.keras.Model( + inputs=encoder_inputs_dict, outputs=encoder_output_dict) hub_destination = os.path.join(self.get_temp_dir(), "hub") - export_tfhub.export_bert_tfhub(bert_config, model_checkpoint_path, - hub_destination, vocab_file) + core_model.save(hub_destination, include_optimizer=False, save_format="tf") return hub_destination def test_task_with_hub(self): @@ -180,7 +151,7 @@ class TaggingTest(tf.test.TestCase): seq_length=seq_length, num_labels=len(task_config.class_names), num_examples=num_examples) - test_data_config = tagging_data_loader.TaggingDataConfig( + test_data_config = tagging_dataloader.TaggingDataConfig( input_path=test_data_path, seq_length=seq_length, is_training=False, @@ -188,9 +159,9 @@ class TaggingTest(tf.test.TestCase): drop_remainder=False, include_sentence_id=True) - predict_ids, sentence_ids = tagging.predict(task, test_data_config, model) - self.assertLen(predict_ids, num_examples) - self.assertLen(sentence_ids, num_examples) + results = tagging.predict(task, test_data_config, model) + self.assertLen(results, num_examples) + self.assertLen(results[0], 3) if __name__ == "__main__": diff --git a/official/nlp/tasks/translation.py b/official/nlp/tasks/translation.py new file mode 100644 index 0000000000000000000000000000000000000000..3658af2dfae4fde632fbe6eb5b096b1118c83e86 --- /dev/null +++ b/official/nlp/tasks/translation.py @@ -0,0 +1,367 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines the translation task.""" +import os +from typing import Optional + +from absl import logging +import dataclasses +import sacrebleu +import tensorflow as tf +import tensorflow_text as tftxt + +from official.core import base_task +from official.core import config_definitions as cfg +from official.core import task_factory +from official.modeling.hyperparams import base_config +from official.nlp.data import data_loader_factory +from official.nlp.metrics import bleu +from official.nlp.modeling import models + + +def _pad_tensors_to_same_length(x, y): + """Pad x and y so that the results have the same length (second dimension).""" + x_length = tf.shape(x)[1] + y_length = tf.shape(y)[1] + + max_length = tf.maximum(x_length, y_length) + + x = tf.pad(x, [[0, 0], [0, max_length - x_length], [0, 0]]) + y = tf.pad(y, [[0, 0], [0, max_length - y_length]]) + return x, y + + +def _padded_cross_entropy_loss(logits, labels, smoothing, vocab_size): + """Calculate cross entropy loss while ignoring padding. + + Args: + logits: Tensor of size [batch_size, length_logits, vocab_size] + labels: Tensor of size [batch_size, length_labels] + smoothing: Label smoothing constant, used to determine the on and off values + vocab_size: int size of the vocabulary + + Returns: + Returns the cross entropy loss and weight tensors: float32 tensors with + shape [batch_size, max(length_logits, length_labels)] + """ + logits, labels = _pad_tensors_to_same_length(logits, labels) + + # Calculate smoothing cross entropy + confidence = 1.0 - smoothing + low_confidence = (1.0 - confidence) / tf.cast(vocab_size - 1, tf.float32) + soft_targets = tf.one_hot( + tf.cast(labels, tf.int32), + depth=vocab_size, + on_value=confidence, + off_value=low_confidence) + xentropy = tf.nn.softmax_cross_entropy_with_logits( + logits=logits, labels=soft_targets) + + # Calculate the best (lowest) possible value of cross entropy, and + # subtract from the cross entropy loss. + normalizing_constant = -( + confidence * tf.math.log(confidence) + tf.cast(vocab_size - 1, tf.float32) + * low_confidence * tf.math.log(low_confidence + 1e-20)) + xentropy -= normalizing_constant + + weights = tf.cast(tf.not_equal(labels, 0), tf.float32) + return xentropy * weights, weights + + +@dataclasses.dataclass +class EncDecoder(base_config.Config): + """Configurations for Encoder/Decoder.""" + num_layers: int = 6 + num_attention_heads: int = 8 + intermediate_size: int = 2048 + activation: str = "relu" + dropout_rate: float = 0.1 + attention_dropout_rate: float = 0.1 + intermediate_dropout: float = 0.1 + use_bias: bool = False + norm_first: bool = True + norm_epsilon: float = 1e-6 + + +@dataclasses.dataclass +class ModelConfig(base_config.Config): + """A base Seq2Seq model configuration.""" + encoder: EncDecoder = EncDecoder() + decoder: EncDecoder = EncDecoder() + + embedding_width: int = 512 + dropout_rate: float = 0.1 + + # Decoding. + padded_decode: bool = False + decode_max_length: Optional[int] = None + beam_size: int = 4 + alpha: float = 0.6 + + # Training. + label_smoothing: float = 0.1 + + +@dataclasses.dataclass +class TranslationConfig(cfg.TaskConfig): + """The translation task config.""" + model: ModelConfig = ModelConfig() + train_data: cfg.DataConfig = cfg.DataConfig() + validation_data: cfg.DataConfig = cfg.DataConfig() + # Tokenization + sentencepiece_model_path: str = "" + # Evaluation. + print_translations: Optional[bool] = None + + +def write_test_record(params, model_dir): + """Writes the test input to a tfrecord.""" + # Get raw data from tfds. + params = params.replace(transform_and_batch=False) + dataset = data_loader_factory.get_data_loader(params).load() + references = [] + total_samples = 0 + output_file = os.path.join(model_dir, "eval.tf_record") + writer = tf.io.TFRecordWriter(output_file) + for d in dataset: + references.append(d[params.tgt_lang].numpy().decode()) + example = tf.train.Example( + features=tf.train.Features( + feature={ + "unique_id": tf.train.Feature( + int64_list=tf.train.Int64List(value=[total_samples])), + params.src_lang: tf.train.Feature( + bytes_list=tf.train.BytesList( + value=[d[params.src_lang].numpy()])), + params.tgt_lang: tf.train.Feature( + bytes_list=tf.train.BytesList( + value=[d[params.tgt_lang].numpy()])), + })) + writer.write(example.SerializeToString()) + total_samples += 1 + batch_size = params.global_batch_size + num_dummy_example = batch_size - total_samples % batch_size + for i in range(num_dummy_example): + example = tf.train.Example( + features=tf.train.Features( + feature={ + "unique_id": tf.train.Feature( + int64_list=tf.train.Int64List(value=[total_samples + i])), + params.src_lang: tf.train.Feature( + bytes_list=tf.train.BytesList(value=[b""])), + params.tgt_lang: tf.train.Feature( + bytes_list=tf.train.BytesList(value=[b""])), + })) + writer.write(example.SerializeToString()) + writer.close() + return references, output_file + + +@task_factory.register_task_cls(TranslationConfig) +class TranslationTask(base_task.Task): + """A single-replica view of training procedure. + + Tasks provide artifacts for training/evalution procedures, including + loading/iterating over Datasets, initializing the model, calculating the loss + and customized metrics with reduction. + """ + + def __init__(self, params: cfg.TaskConfig, logging_dir=None, name=None): + super().__init__(params, logging_dir, name=name) + self._sentencepiece_model_path = params.sentencepiece_model_path + if params.sentencepiece_model_path: + self._sp_tokenizer = tftxt.SentencepieceTokenizer( + model=tf.io.gfile.GFile(params.sentencepiece_model_path, "rb").read(), + add_eos=True) + try: + empty_str_tokenized = self._sp_tokenizer.tokenize("").numpy() + except tf.errors.InternalError: + raise ValueError( + "EOS token not in tokenizer vocab." + "Please make sure the tokenizer generates a single token for an " + "empty string.") + self._eos_id = empty_str_tokenized.item() + self._vocab_size = self._sp_tokenizer.vocab_size().numpy() + else: + raise ValueError("Setencepiece model path not provided.") + if (params.validation_data.input_path or + params.validation_data.tfds_name) and self._logging_dir: + self._references, self._tf_record_input_path = write_test_record( + params.validation_data, self.logging_dir) + + def build_model(self) -> tf.keras.Model: + """Creates model architecture. + + Returns: + A model instance. + """ + model_cfg = self.task_config.model + encoder_kwargs = model_cfg.encoder.as_dict() + encoder_layer = models.TransformerEncoder(**encoder_kwargs) + decoder_kwargs = model_cfg.decoder.as_dict() + decoder_layer = models.TransformerDecoder(**decoder_kwargs) + + return models.Seq2SeqTransformer( + vocab_size=self._vocab_size, + embedding_width=model_cfg.embedding_width, + dropout_rate=model_cfg.dropout_rate, + padded_decode=model_cfg.padded_decode, + decode_max_length=model_cfg.decode_max_length, + beam_size=model_cfg.beam_size, + alpha=model_cfg.alpha, + encoder_layer=encoder_layer, + decoder_layer=decoder_layer, + eos_id=self._eos_id) + + def build_inputs(self, + params: cfg.DataConfig, + input_context: Optional[tf.distribute.InputContext] = None): + """Returns a dataset.""" + if params.is_training: + dataloader_params = params + else: + input_path = self._tf_record_input_path + # Read from padded tf records instead. + dataloader_params = params.replace( + input_path=input_path, + tfds_name="", + tfds_split="", + has_unique_id=True) + dataloader_params = dataloader_params.replace( + sentencepiece_model_path=self._sentencepiece_model_path) + return data_loader_factory.get_data_loader(dataloader_params).load( + input_context) + + def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: + """Standard interface to compute losses. + + Args: + labels: optional label tensors. + model_outputs: a nested structure of output tensors. + aux_losses: auxiliary loss tensors, i.e. `losses` in keras.Model. + + Returns: + The total loss tensor. + """ + del aux_losses + + smoothing = self.task_config.model.label_smoothing + xentropy, weights = _padded_cross_entropy_loss(model_outputs, labels, + smoothing, self._vocab_size) + return tf.reduce_sum(xentropy) / tf.reduce_sum(weights) + + def train_step(self, + inputs, + model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, + metrics=None): + """Does forward and backward. + + With distribution strategies, this method runs on devices. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + with tf.GradientTape() as tape: + outputs = model(inputs, training=True) + # Computes per-replica loss. + loss = self.build_losses(labels=inputs["targets"], model_outputs=outputs) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / tf.distribute.get_strategy().num_replicas_in_sync + + # For mixed precision, when a LossScaleOptimizer is used, the loss is + # scaled to avoid numeric underflow. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + logs = {self.loss: loss} + if metrics: + self.process_metrics(metrics, inputs["targets"], outputs) + return logs + + def validation_step(self, inputs, model: tf.keras.Model, metrics=None): + unique_ids = inputs.pop("unique_id") + # Validation loss + outputs = model(inputs, training=False) + # Computes per-replica loss to help understand if we are overfitting. + loss = self.build_losses(labels=inputs["targets"], model_outputs=outputs) + inputs.pop("targets") + # Beam search to calculate metrics. + model_outputs = model(inputs, training=False) + outputs = model_outputs + logs = { + self.loss: loss, + "inputs": inputs["inputs"], + "unique_ids": unique_ids, + } + logs.update(outputs) + return logs + + def aggregate_logs(self, state=None, step_outputs=None): + """Aggregates over logs returned from a validation step.""" + if state is None: + state = {} + + for in_token_ids, out_token_ids, unique_ids in zip( + step_outputs["inputs"], + step_outputs["outputs"], + step_outputs["unique_ids"]): + for in_ids, out_ids, u_id in zip( + in_token_ids.numpy(), out_token_ids.numpy(), unique_ids.numpy()): + state[u_id] = (in_ids, out_ids) + return state + + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): + + def _decode(ids): + return self._sp_tokenizer.detokenize(ids).numpy().decode() + + def _trim_and_decode(ids): + """Trim EOS and PAD tokens from ids, and decode to return a string.""" + try: + index = list(ids).index(self._eos_id) + return _decode(ids[:index]) + except ValueError: # No EOS found in sequence + return _decode(ids) + + translations = [] + for u_id in sorted(aggregated_logs): + if u_id >= len(self._references): + continue + src = _trim_and_decode(aggregated_logs[u_id][0]) + translation = _trim_and_decode(aggregated_logs[u_id][1]) + translations.append(translation) + if self.task_config.print_translations: + # Deccoding the in_ids to reflect what the model sees. + logging.info("Translating:\n\tInput: %s\n\tOutput: %s\n\tReference: %s", + src, translation, self._references[u_id]) + sacrebleu_score = sacrebleu.corpus_bleu( + translations, [self._references]).score + bleu_score = bleu.bleu_on_list(self._references, translations) + return {"sacrebleu_score": sacrebleu_score, + "bleu_score": bleu_score} diff --git a/official/nlp/tasks/translation_test.py b/official/nlp/tasks/translation_test.py new file mode 100644 index 0000000000000000000000000000000000000000..aba4ea62bb972b833c22b6f90430a6174e77af51 --- /dev/null +++ b/official/nlp/tasks/translation_test.py @@ -0,0 +1,167 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for official.nlp.tasks.translation.""" +import functools +import os + +import orbit +import tensorflow as tf + +from sentencepiece import SentencePieceTrainer +from official.nlp.data import wmt_dataloader +from official.nlp.tasks import translation + + +def _generate_line_file(filepath, lines): + with tf.io.gfile.GFile(filepath, "w") as f: + for l in lines: + f.write("{}\n".format(l)) + + +def _generate_record_file(filepath, src_lines, tgt_lines): + writer = tf.io.TFRecordWriter(filepath) + for src, tgt in zip(src_lines, tgt_lines): + example = tf.train.Example( + features=tf.train.Features( + feature={ + "en": tf.train.Feature( + bytes_list=tf.train.BytesList( + value=[src.encode()])), + "reverse_en": tf.train.Feature( + bytes_list=tf.train.BytesList( + value=[tgt.encode()])), + })) + writer.write(example.SerializeToString()) + writer.close() + + +def _train_sentencepiece(input_path, vocab_size, model_path, eos_id=1): + argstr = " ".join([ + f"--input={input_path}", f"--vocab_size={vocab_size}", + "--character_coverage=0.995", + f"--model_prefix={model_path}", "--model_type=bpe", + "--bos_id=-1", "--pad_id=0", f"--eos_id={eos_id}", "--unk_id=2" + ]) + SentencePieceTrainer.Train(argstr) + + +class TranslationTaskTest(tf.test.TestCase): + + def setUp(self): + super(TranslationTaskTest, self).setUp() + self._temp_dir = self.get_temp_dir() + src_lines = [ + "abc ede fg", + "bbcd ef a g", + "de f a a g" + ] + tgt_lines = [ + "dd cc a ef g", + "bcd ef a g", + "gef cd ba" + ] + self._record_input_path = os.path.join(self._temp_dir, "inputs.record") + _generate_record_file(self._record_input_path, src_lines, tgt_lines) + self._sentencepeice_input_path = os.path.join(self._temp_dir, "inputs.txt") + _generate_line_file(self._sentencepeice_input_path, src_lines + tgt_lines) + sentencepeice_model_prefix = os.path.join(self._temp_dir, "sp") + _train_sentencepiece(self._sentencepeice_input_path, 11, + sentencepeice_model_prefix) + self._sentencepeice_model_path = "{}.model".format( + sentencepeice_model_prefix) + + def test_task(self): + config = translation.TranslationConfig( + model=translation.ModelConfig( + encoder=translation.EncDecoder(), decoder=translation.EncDecoder()), + train_data=wmt_dataloader.WMTDataConfig( + input_path=self._record_input_path, + src_lang="en", tgt_lang="reverse_en", + is_training=True, static_batch=True, global_batch_size=24, + max_seq_length=12), + sentencepiece_model_path=self._sentencepeice_model_path) + task = translation.TranslationTask(config) + model = task.build_model() + dataset = task.build_inputs(config.train_data) + iterator = iter(dataset) + optimizer = tf.keras.optimizers.SGD(lr=0.1) + task.train_step(next(iterator), model, optimizer) + + def test_no_sentencepiece_path(self): + config = translation.TranslationConfig( + model=translation.ModelConfig( + encoder=translation.EncDecoder(), decoder=translation.EncDecoder()), + train_data=wmt_dataloader.WMTDataConfig( + input_path=self._record_input_path, + src_lang="en", tgt_lang="reverse_en", + is_training=True, static_batch=True, global_batch_size=4, + max_seq_length=4), + sentencepiece_model_path=None) + with self.assertRaisesRegex( + ValueError, + "Setencepiece model path not provided."): + translation.TranslationTask(config) + + def test_sentencepiece_no_eos(self): + sentencepeice_model_prefix = os.path.join(self._temp_dir, "sp_no_eos") + _train_sentencepiece(self._sentencepeice_input_path, 20, + sentencepeice_model_prefix, eos_id=-1) + sentencepeice_model_path = "{}.model".format( + sentencepeice_model_prefix) + config = translation.TranslationConfig( + model=translation.ModelConfig( + encoder=translation.EncDecoder(), decoder=translation.EncDecoder()), + train_data=wmt_dataloader.WMTDataConfig( + input_path=self._record_input_path, + src_lang="en", tgt_lang="reverse_en", + is_training=True, static_batch=True, global_batch_size=4, + max_seq_length=4), + sentencepiece_model_path=sentencepeice_model_path) + with self.assertRaisesRegex( + ValueError, + "EOS token not in tokenizer vocab.*"): + translation.TranslationTask(config) + + def test_evaluation(self): + config = translation.TranslationConfig( + model=translation.ModelConfig( + encoder=translation.EncDecoder(), decoder=translation.EncDecoder(), + padded_decode=False, + decode_max_length=64), + validation_data=wmt_dataloader.WMTDataConfig( + input_path=self._record_input_path, src_lang="en", + tgt_lang="reverse_en", static_batch=True, global_batch_size=4), + sentencepiece_model_path=self._sentencepeice_model_path) + logging_dir = self.get_temp_dir() + task = translation.TranslationTask(config, logging_dir=logging_dir) + dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(), + task.build_inputs, + config.validation_data) + model = task.build_model() + strategy = tf.distribute.get_strategy() + aggregated = None + for data in dataset: + distributed_outputs = strategy.run( + functools.partial(task.validation_step, model=model), + args=(data,)) + outputs = tf.nest.map_structure(strategy.experimental_local_results, + distributed_outputs) + aggregated = task.aggregate_logs(state=aggregated, step_outputs=outputs) + metrics = task.reduce_aggregated_logs(aggregated) + self.assertIn("sacrebleu_score", metrics) + self.assertIn("bleu_score", metrics) + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/tasks/utils.py b/official/nlp/tasks/utils.py index 467dafe31f813779b7af5ea0209aadccb6d1bdf8..35be4e3d4546dc29f5b43983687d697967eda4f3 100644 --- a/official/nlp/tasks/utils.py +++ b/official/nlp/tasks/utils.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,23 +11,66 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Common utils for tasks.""" +from typing import Any, Callable + +import orbit import tensorflow as tf import tensorflow_hub as hub -def get_encoder_from_hub(hub_module: str) -> tf.keras.Model: - """Gets an encoder from hub.""" +def get_encoder_from_hub(hub_model_path: str) -> tf.keras.Model: + """Gets an encoder from hub. + + Args: + hub_model_path: The path to the tfhub model. + + Returns: + A tf.keras.Model. + """ input_word_ids = tf.keras.layers.Input( shape=(None,), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.layers.Input( shape=(None,), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.layers.Input( shape=(None,), dtype=tf.int32, name='input_type_ids') - hub_layer = hub.KerasLayer(hub_module, trainable=True) - pooled_output, sequence_output = hub_layer( - [input_word_ids, input_mask, input_type_ids]) - return tf.keras.Model( - inputs=[input_word_ids, input_mask, input_type_ids], - outputs=[sequence_output, pooled_output]) + hub_layer = hub.KerasLayer(hub_model_path, trainable=True) + output_dict = {} + dict_input = dict( + input_word_ids=input_word_ids, + input_mask=input_mask, + input_type_ids=input_type_ids) + output_dict = hub_layer(dict_input) + + return tf.keras.Model(inputs=dict_input, outputs=output_dict) + + +def predict(predict_step_fn: Callable[[Any], Any], + aggregate_fn: Callable[[Any, Any], Any], dataset: tf.data.Dataset): + """Runs prediction. + + Args: + predict_step_fn: A callable such as `def predict_step(inputs)`, where + `inputs` are input tensors. + aggregate_fn: A callable such as `def aggregate_fn(state, value)`, where + `value` is the outputs from `predict_step_fn`. + dataset: A `tf.data.Dataset` object. + + Returns: + The aggregated predictions. + """ + + @tf.function + def predict_step(iterator): + """Predicts on distributed devices.""" + outputs = tf.distribute.get_strategy().run( + predict_step_fn, args=(next(iterator),)) + return tf.nest.map_structure( + tf.distribute.get_strategy().experimental_local_results, outputs) + + loop_fn = orbit.utils.create_loop_fn(predict_step) + # Set `num_steps` to -1 to exhaust the dataset. + outputs = loop_fn( + iter(dataset), num_steps=-1, state=None, reduce_fn=aggregate_fn) # pytype: disable=wrong-arg-types + return outputs diff --git a/official/nlp/tools/__init__.py b/official/nlp/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a25710c222e3327cb20e000db5df5c5651c4a2cc --- /dev/null +++ b/official/nlp/tools/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/official/nlp/tools/export_tfhub.py b/official/nlp/tools/export_tfhub.py new file mode 100644 index 0000000000000000000000000000000000000000..0effd56863bbe1fbe956cb25114c1f7705a181a1 --- /dev/null +++ b/official/nlp/tools/export_tfhub.py @@ -0,0 +1,219 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Exports a BERT-like encoder and its preprocessing as SavedModels for TF Hub. + +This tool creates preprocessor and encoder SavedModels suitable for uploading +to https://tfhub.dev that implement the preprocessor and encoder APIs defined +at https://www.tensorflow.org/hub/common_saved_model_apis/text. + +For a full usage guide, see +https://github.com/tensorflow/models/blob/master/official/nlp/docs/tfhub.md + +Minimal usage examples: + +1) Exporting an Encoder from checkpoint and config. + +``` +export_tfhub \ + --encoder_config_file=${BERT_DIR:?}/bert_encoder.yaml \ + --model_checkpoint_path=${BERT_DIR:?}/bert_model.ckpt \ + --vocab_file=${BERT_DIR:?}/vocab.txt \ + --export_type=model \ + --export_path=/tmp/bert_model +``` + +An --encoder_config_file can specify encoder types other than BERT. +For BERT, a --bert_config_file in the legacy JSON format can be passed instead. + +Flag --vocab_file (and flag --do_lower_case, whose default value is guessed +from the vocab_file path) capture how BertTokenizer was used in pre-training. +Use flag --sp_model_file instead if SentencepieceTokenizer was used. + +Changing --export_type to model_with_mlm additionally creates an `.mlm` +subobject on the exported SavedModel that can be called to produce +the logits of the Masked Language Model task from pretraining. +The help string for flag --model_checkpoint_path explains the checkpoint +formats required for each --export_type. + + +2) Exporting a preprocessor SavedModel + +``` +export_tfhub \ + --vocab_file ${BERT_DIR:?}/vocab.txt \ + --export_type preprocessing --export_path /tmp/bert_preprocessing +``` + +Be sure to use flag values that match the encoder and how it has been +pre-trained (see above for --vocab_file vs --sp_model_file). + +If your encoder has been trained with text preprocessing for which tfhub.dev +already has SavedModel, you could guide your users to reuse that one instead +of exporting and publishing your own. + +TODO(b/175369555): When exporting to users of TensorFlow 2.4, add flag +`--experimental_disable_assert_in_preprocessing`. +""" + +from absl import app +from absl import flags +import gin + +from official.modeling import hyperparams +from official.nlp.bert import configs +from official.nlp.configs import encoders +from official.nlp.tools import export_tfhub_lib + +FLAGS = flags.FLAGS + +flags.DEFINE_enum( + "export_type", "model", + ["model", "model_with_mlm", "preprocessing"], + "The overall type of SavedModel to export. Flags " + "--bert_config_file/--encoder_config_file and --vocab_file/--sp_model_file " + "control which particular encoder model and preprocessing are exported.") +flags.DEFINE_string( + "export_path", None, + "Directory to which the SavedModel is written.") +flags.DEFINE_string( + "encoder_config_file", None, + "A yaml file representing `encoders.EncoderConfig` to define the encoder " + "(BERT or other). " + "Exactly one of --bert_config_file and --encoder_config_file can be set. " + "Needed for --export_type model and model_with_mlm.") +flags.DEFINE_string( + "bert_config_file", None, + "A JSON file with a legacy BERT configuration to define the BERT encoder. " + "Exactly one of --bert_config_file and --encoder_config_file can be set. " + "Needed for --export_type model and model_with_mlm.") +flags.DEFINE_bool( + "copy_pooler_dense_to_encoder", False, + "When the model is trained using `BertPretrainerV2`, the pool layer " + "of next sentence prediction task exists in `ClassificationHead` passed " + "to `BertPretrainerV2`. If True, we will copy this pooler's dense layer " + "to the encoder that is exported by this tool (as in classic BERT). " + "Using `BertPretrainerV2` and leaving this False exports an untrained " + "(randomly initialized) pooling layer, which some authors recommend for " + "subsequent fine-tuning,") +flags.DEFINE_string( + "model_checkpoint_path", None, + "File path to a pre-trained model checkpoint. " + "For --export_type model, this has to be an object-based (TF2) checkpoint " + "that can be restored to `tf.train.Checkpoint(encoder=encoder)` " + "for the `encoder` defined by the config file." + "(Legacy checkpoints with `model=` instead of `encoder=` are also " + "supported for now.) " + "For --export_type model_with_mlm, it must be restorable to " + "`tf.train.Checkpoint(**BertPretrainerV2(...).checkpoint_items)`. " + "(For now, `tf.train.Checkpoint(pretrainer=BertPretrainerV2(...))` is also " + "accepted.)") +flags.DEFINE_string( + "vocab_file", None, + "For encoders trained on BertTokenzier input: " + "the vocabulary file that the encoder model was trained with. " + "Exactly one of --vocab_file and --sp_model_file can be set. " + "Needed for --export_type model, model_with_mlm and preprocessing.") +flags.DEFINE_string( + "sp_model_file", None, + "For encoders trained on SentencepieceTokenzier input: " + "the SentencePiece .model file that the encoder model was trained with. " + "Exactly one of --vocab_file and --sp_model_file can be set. " + "Needed for --export_type model, model_with_mlm and preprocessing.") +flags.DEFINE_bool( + "do_lower_case", None, + "Whether to lowercase before tokenization. " + "If left as None, and --vocab_file is set, do_lower_case will be enabled " + "if 'uncased' appears in the name of --vocab_file. " + "If left as None, and --sp_model_file set, do_lower_case defaults to true. " + "Needed for --export_type model, model_with_mlm and preprocessing.") +flags.DEFINE_integer( + "default_seq_length", 128, + "The sequence length of preprocessing results from " + "top-level preprocess method. This is also the default " + "sequence length for the bert_pack_inputs subobject." + "Needed for --export_type preprocessing.") +flags.DEFINE_bool( + "tokenize_with_offsets", False, # TODO(b/181866850) + "Whether to export a .tokenize_with_offsets subobject for " + "--export_type preprocessing.") +flags.DEFINE_multi_string( + "gin_file", default=None, + help="List of paths to the config files.") +flags.DEFINE_multi_string( + "gin_params", default=None, + help="List of Gin bindings.") +flags.DEFINE_bool( # TODO(b/175369555): Remove this flag and its use. + "experimental_disable_assert_in_preprocessing", False, + "Export a preprocessing model without tf.Assert ops. " + "Usually, that would be a bad idea, except TF2.4 has an issue with " + "Assert ops in tf.functions used in Dataset.map() on a TPU worker, " + "and omitting the Assert ops lets SavedModels avoid the issue.") + + +def main(argv): + if len(argv) > 1: + raise app.UsageError("Too many command-line arguments.") + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + + if bool(FLAGS.vocab_file) == bool(FLAGS.sp_model_file): + raise ValueError("Exactly one of `vocab_file` and `sp_model_file` " + "can be specified, but got %s and %s." % + (FLAGS.vocab_file, FLAGS.sp_model_file)) + do_lower_case = export_tfhub_lib.get_do_lower_case( + FLAGS.do_lower_case, FLAGS.vocab_file, FLAGS.sp_model_file) + + if FLAGS.export_type in ("model", "model_with_mlm"): + if bool(FLAGS.bert_config_file) == bool(FLAGS.encoder_config_file): + raise ValueError("Exactly one of `bert_config_file` and " + "`encoder_config_file` can be specified, but got " + "%s and %s." % + (FLAGS.bert_config_file, FLAGS.encoder_config_file)) + if FLAGS.bert_config_file: + bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) + encoder_config = None + else: + bert_config = None + encoder_config = encoders.EncoderConfig() + encoder_config = hyperparams.override_params_dict( + encoder_config, FLAGS.encoder_config_file, is_strict=True) + export_tfhub_lib.export_model( + FLAGS.export_path, + bert_config=bert_config, + encoder_config=encoder_config, + model_checkpoint_path=FLAGS.model_checkpoint_path, + vocab_file=FLAGS.vocab_file, + sp_model_file=FLAGS.sp_model_file, + do_lower_case=do_lower_case, + with_mlm=FLAGS.export_type == "model_with_mlm", + copy_pooler_dense_to_encoder=FLAGS.copy_pooler_dense_to_encoder) + + elif FLAGS.export_type == "preprocessing": + export_tfhub_lib.export_preprocessing( + FLAGS.export_path, + vocab_file=FLAGS.vocab_file, + sp_model_file=FLAGS.sp_model_file, + do_lower_case=do_lower_case, + default_seq_length=FLAGS.default_seq_length, + tokenize_with_offsets=FLAGS.tokenize_with_offsets, + experimental_disable_assert= + FLAGS.experimental_disable_assert_in_preprocessing) + + else: + raise app.UsageError( + "Unknown value '%s' for flag --export_type" % FLAGS.export_type) + + +if __name__ == "__main__": + app.run(main) diff --git a/official/nlp/tools/export_tfhub_lib.py b/official/nlp/tools/export_tfhub_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..7062e41661e9db9f842bd28368e3ad4147eb6514 --- /dev/null +++ b/official/nlp/tools/export_tfhub_lib.py @@ -0,0 +1,473 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library of components of export_tfhub.py. See docstring there for more.""" + +import contextlib +import hashlib +import os +import tempfile + +from typing import Optional, Text, Tuple + +# Import libraries +from absl import logging +import tensorflow as tf +# pylint: disable=g-direct-tensorflow-import TODO(b/175369555): Remove these. +from tensorflow.core.protobuf import saved_model_pb2 +from tensorflow.python.ops import control_flow_ops +# pylint: enable=g-direct-tensorflow-import +from official.modeling import tf_utils +from official.nlp.bert import configs +from official.nlp.configs import encoders +from official.nlp.modeling import layers +from official.nlp.modeling import models +from official.nlp.modeling import networks + + +def get_bert_encoder(bert_config): + """Returns a BertEncoder with dict outputs.""" + bert_encoder = networks.BertEncoder( + vocab_size=bert_config.vocab_size, + hidden_size=bert_config.hidden_size, + num_layers=bert_config.num_hidden_layers, + num_attention_heads=bert_config.num_attention_heads, + intermediate_size=bert_config.intermediate_size, + activation=tf_utils.get_activation(bert_config.hidden_act), + dropout_rate=bert_config.hidden_dropout_prob, + attention_dropout_rate=bert_config.attention_probs_dropout_prob, + max_sequence_length=bert_config.max_position_embeddings, + type_vocab_size=bert_config.type_vocab_size, + initializer=tf.keras.initializers.TruncatedNormal( + stddev=bert_config.initializer_range), + embedding_width=bert_config.embedding_size, + dict_outputs=True) + + return bert_encoder + + +def get_do_lower_case(do_lower_case, vocab_file=None, sp_model_file=None): + """Returns do_lower_case, replacing None by a guess from vocab file name.""" + if do_lower_case is not None: + return do_lower_case + elif vocab_file: + do_lower_case = "uncased" in vocab_file + logging.info("Using do_lower_case=%s based on name of vocab_file=%s", + do_lower_case, vocab_file) + return do_lower_case + elif sp_model_file: + do_lower_case = True # All public ALBERTs (as of Oct 2020) do it. + logging.info("Defaulting to do_lower_case=%s for Sentencepiece tokenizer", + do_lower_case) + return do_lower_case + else: + raise ValueError("Must set vocab_file or sp_model_file.") + + +def _create_model( + *, + bert_config: Optional[configs.BertConfig] = None, + encoder_config: Optional[encoders.EncoderConfig] = None, + with_mlm: bool, +) -> Tuple[tf.keras.Model, tf.keras.Model]: + """Creates the model to export and the model to restore the checkpoint. + + Args: + bert_config: A legacy `BertConfig` to create a `BertEncoder` object. + Exactly one of encoder_config and bert_config must be set. + encoder_config: An `EncoderConfig` to create an encoder of the configured + type (`BertEncoder` or other). + with_mlm: A bool to control the second component of the result. + If True, will create a `BertPretrainerV2` object; otherwise, will + create a `BertEncoder` object. + + Returns: + A Tuple of (1) a Keras model that will be exported, (2) a `BertPretrainerV2` + object or `BertEncoder` object depending on the value of `with_mlm` + argument, which contains the first model and will be used for restoring + weights from the checkpoint. + """ + if (bert_config is not None) == (encoder_config is not None): + raise ValueError("Exactly one of `bert_config` and `encoder_config` " + "can be specified, but got %s and %s" % + (bert_config, encoder_config)) + + if bert_config is not None: + encoder = get_bert_encoder(bert_config) + else: + encoder = encoders.build_encoder(encoder_config) + + # Convert from list of named inputs to dict of inputs keyed by name. + # Only the latter accepts a dict of inputs after restoring from SavedModel. + encoder_inputs_dict = {x.name: x for x in encoder.inputs} + encoder_output_dict = encoder(encoder_inputs_dict) + # For interchangeability with other text representations, + # add "default" as an alias for BERT's whole-input reptesentations. + encoder_output_dict["default"] = encoder_output_dict["pooled_output"] + core_model = tf.keras.Model( + inputs=encoder_inputs_dict, outputs=encoder_output_dict) + + if with_mlm: + if bert_config is not None: + hidden_act = bert_config.hidden_act + else: + assert encoder_config is not None + hidden_act = encoder_config.get().hidden_activation + + pretrainer = models.BertPretrainerV2( + encoder_network=encoder, + mlm_activation=tf_utils.get_activation(hidden_act)) + + pretrainer_inputs_dict = {x.name: x for x in pretrainer.inputs} + pretrainer_output_dict = pretrainer(pretrainer_inputs_dict) + mlm_model = tf.keras.Model( + inputs=pretrainer_inputs_dict, outputs=pretrainer_output_dict) + # Set `_auto_track_sub_layers` to False, so that the additional weights + # from `mlm` sub-object will not be included in the core model. + # TODO(b/169210253): Use a public API when available. + core_model._auto_track_sub_layers = False # pylint: disable=protected-access + core_model.mlm = mlm_model + return core_model, pretrainer + else: + return core_model, encoder + + +def export_model(export_path: Text, + *, + bert_config: Optional[configs.BertConfig] = None, + encoder_config: Optional[encoders.EncoderConfig] = None, + model_checkpoint_path: Text, + with_mlm: bool, + copy_pooler_dense_to_encoder: bool = False, + vocab_file: Optional[Text] = None, + sp_model_file: Optional[Text] = None, + do_lower_case: Optional[bool] = None) -> None: + """Exports an Encoder as SavedModel after restoring pre-trained weights. + + The exported SavedModel implements a superset of the Encoder API for + Text embeddings with Transformer Encoders described at + https://www.tensorflow.org/hub/common_saved_model_apis/text. + + In particular, the exported SavedModel can be used in the following way: + + ``` + # Calls default interface (encoder only). + + encoder = hub.load(...) + encoder_inputs = dict( + input_word_ids=..., # Shape [batch, seq_length], dtype=int32 + input_mask=..., # Shape [batch, seq_length], dtype=int32 + input_type_ids=..., # Shape [batch, seq_length], dtype=int32 + ) + encoder_outputs = encoder(encoder_inputs) + assert encoder_outputs.keys() == { + "pooled_output", # Shape [batch_size, width], dtype=float32 + "default", # Alias for "pooled_output" (aligns with other models). + "sequence_output" # Shape [batch_size, seq_length, width], dtype=float32 + "encoder_outputs", # List of Tensors with outputs of all transformer layers. + } + ``` + + If `with_mlm` is True, the exported SavedModel can also be called in the + following way: + + ``` + # Calls expanded interface that includes logits of the Masked Language Model. + mlm_inputs = dict( + input_word_ids=..., # Shape [batch, seq_length], dtype=int32 + input_mask=..., # Shape [batch, seq_length], dtype=int32 + input_type_ids=..., # Shape [batch, seq_length], dtype=int32 + masked_lm_positions=..., # Shape [batch, num_predictions], dtype=int32 + ) + mlm_outputs = encoder.mlm(mlm_inputs) + assert mlm_outputs.keys() == { + "pooled_output", # Shape [batch, width], dtype=float32 + "sequence_output", # Shape [batch, seq_length, width], dtype=float32 + "encoder_outputs", # List of Tensors with outputs of all transformer layers. + "mlm_logits" # Shape [batch, num_predictions, vocab_size], dtype=float32 + } + ``` + + Args: + export_path: The SavedModel output directory. + bert_config: An optional `configs.BertConfig` object. Note: exactly one of + `bert_config` and following `encoder_config` must be specified. + encoder_config: An optional `encoders.EncoderConfig` object. + model_checkpoint_path: The path to the checkpoint. + with_mlm: Whether to export the additional mlm sub-object. + copy_pooler_dense_to_encoder: Whether to copy the pooler's dense layer + used in the next sentence prediction task to the encoder. + vocab_file: The path to the wordpiece vocab file, or None. + sp_model_file: The path to the sentencepiece model file, or None. + Exactly one of vocab_file and sp_model_file must be set. + do_lower_case: Whether to lower-case text before tokenization. + """ + if with_mlm: + core_model, pretrainer = _create_model(bert_config=bert_config, + encoder_config=encoder_config, + with_mlm=with_mlm) + encoder = pretrainer.encoder_network + # It supports both the new pretrainer checkpoint produced by TF-NLP and + # the checkpoint converted from TF1 (original BERT, SmallBERTs). + checkpoint_items = pretrainer.checkpoint_items + checkpoint = tf.train.Checkpoint(**checkpoint_items) + else: + core_model, encoder = _create_model(bert_config=bert_config, + encoder_config=encoder_config, + with_mlm=with_mlm) + checkpoint = tf.train.Checkpoint( + model=encoder, # Legacy checkpoints. + encoder=encoder) + checkpoint.restore(model_checkpoint_path).assert_existing_objects_matched() + + if copy_pooler_dense_to_encoder: + logging.info("Copy pooler's dense layer to the encoder.") + pooler_checkpoint = tf.train.Checkpoint( + **{"next_sentence.pooler_dense": encoder.pooler_layer}) + pooler_checkpoint.restore( + model_checkpoint_path).assert_existing_objects_matched() + + # Before SavedModels for preprocessing appeared in Oct 2020, the encoders + # provided this information to let users do preprocessing themselves. + # We keep doing that for now. It helps users to upgrade incrementally. + # Moreover, it offers an escape hatch for advanced users who want the + # full vocab, not the high-level operations from the preprocessing model. + if vocab_file: + core_model.vocab_file = tf.saved_model.Asset(vocab_file) + if do_lower_case is None: + raise ValueError("Must pass do_lower_case if passing vocab_file.") + core_model.do_lower_case = tf.Variable(do_lower_case, trainable=False) + elif sp_model_file: + # This was used by ALBERT, with implied values of do_lower_case=True + # and strip_diacritics=True. + core_model.sp_model_file = tf.saved_model.Asset(sp_model_file) + else: + raise ValueError("Must set vocab_file or sp_model_file") + core_model.save(export_path, include_optimizer=False, save_format="tf") + + +class BertPackInputsSavedModelWrapper(tf.train.Checkpoint): + """Wraps a BertPackInputs layer for export to SavedModel. + + The wrapper object is suitable for use with `tf.saved_model.save()` and + `.load()`. The wrapper object is callable with inputs and outputs like the + BertPackInputs layer, but differs from saving an unwrapped Keras object: + + - The inputs can be a list of 1 or 2 RaggedTensors of dtype int32 and + ragged rank 1 or 2. (In Keras, saving to a tf.function in a SavedModel + would fix the number of RaggedTensors and their ragged rank.) + - The call accepts an optional keyword argument `seq_length=` to override + the layer's .seq_length hyperparameter. (In Keras, a hyperparameter + could not be changed after saving to a tf.function in a SavedModel.) + """ + + def __init__(self, bert_pack_inputs: layers.BertPackInputs): + super().__init__() + + # Preserve the layer's configured seq_length as a default but make it + # overridable. Having this dynamically determined default argument + # requires self.__call__ to be defined in this indirect way. + default_seq_length = bert_pack_inputs.seq_length + @tf.function(autograph=False) + def call(inputs, seq_length=default_seq_length): + return layers.BertPackInputs.bert_pack_inputs( + inputs, seq_length=seq_length, + start_of_sequence_id=bert_pack_inputs.start_of_sequence_id, + end_of_segment_id=bert_pack_inputs.end_of_segment_id, + padding_id=bert_pack_inputs.padding_id) + self.__call__ = call + + for ragged_rank in range(1, 3): + for num_segments in range(1, 3): + _ = self.__call__.get_concrete_function( + [tf.RaggedTensorSpec([None] * (ragged_rank + 1), dtype=tf.int32) + for _ in range(num_segments)], + seq_length=tf.TensorSpec([], tf.int32)) + + +def create_preprocessing(*, + vocab_file: Optional[str] = None, + sp_model_file: Optional[str] = None, + do_lower_case: bool, + tokenize_with_offsets: bool, + default_seq_length: int) -> tf.keras.Model: + """Returns a preprocessing Model for given tokenization parameters. + + This function builds a Keras Model with attached subobjects suitable for + saving to a SavedModel. The resulting SavedModel implements the Preprocessor + API for Text embeddings with Transformer Encoders described at + https://www.tensorflow.org/hub/common_saved_model_apis/text. + + Args: + vocab_file: The path to the wordpiece vocab file, or None. + sp_model_file: The path to the sentencepiece model file, or None. + Exactly one of vocab_file and sp_model_file must be set. + This determines the type of tokenzer that is used. + do_lower_case: Whether to do lower case. + tokenize_with_offsets: Whether to include the .tokenize_with_offsets + subobject. + default_seq_length: The sequence length of preprocessing results from + root callable. This is also the default sequence length for the + bert_pack_inputs subobject. + + Returns: + A tf.keras.Model object with several attached subobjects, suitable for + saving as a preprocessing SavedModel. + """ + # Select tokenizer. + if bool(vocab_file) == bool(sp_model_file): + raise ValueError("Must set exactly one of vocab_file, sp_model_file") + if vocab_file: + tokenize = layers.BertTokenizer( + vocab_file=vocab_file, + lower_case=do_lower_case, + tokenize_with_offsets=tokenize_with_offsets) + else: + tokenize = layers.SentencepieceTokenizer( + model_file_path=sp_model_file, + lower_case=do_lower_case, + strip_diacritics=True, # Strip diacritics to follow ALBERT model. + tokenize_with_offsets=tokenize_with_offsets) + + # The root object of the preprocessing model can be called to do + # one-shot preprocessing for users with single-sentence inputs. + sentences = tf.keras.layers.Input(shape=(), dtype=tf.string, name="sentences") + if tokenize_with_offsets: + tokens, start_offsets, limit_offsets = tokenize(sentences) + else: + tokens = tokenize(sentences) + pack = layers.BertPackInputs( + seq_length=default_seq_length, + special_tokens_dict=tokenize.get_special_tokens_dict()) + model_inputs = pack(tokens) + preprocessing = tf.keras.Model(sentences, model_inputs) + + # Individual steps of preprocessing are made available as named subobjects + # to enable more general preprocessing. For saving, they need to be Models + # in their own right. + preprocessing.tokenize = tf.keras.Model(sentences, tokens) + # Provide an equivalent to tokenize.get_special_tokens_dict(). + preprocessing.tokenize.get_special_tokens_dict = tf.train.Checkpoint() + preprocessing.tokenize.get_special_tokens_dict.__call__ = tf.function( + lambda: tokenize.get_special_tokens_dict(), # pylint: disable=[unnecessary-lambda] + input_signature=[]) + if tokenize_with_offsets: + preprocessing.tokenize_with_offsets = tf.keras.Model( + sentences, [tokens, start_offsets, limit_offsets]) + preprocessing.tokenize_with_offsets.get_special_tokens_dict = ( + preprocessing.tokenize.get_special_tokens_dict) + # Conceptually, this should be + # preprocessing.bert_pack_inputs = tf.keras.Model(tokens, model_inputs) + # but technicalities require us to use a wrapper (see comments there). + # In particular, seq_length can be overridden when calling this. + preprocessing.bert_pack_inputs = BertPackInputsSavedModelWrapper(pack) + + return preprocessing + + +def _move_to_tmpdir(file_path: Optional[Text], tmpdir: Text) -> Optional[Text]: + """Returns new path with same basename and hash of original path.""" + if file_path is None: return None + olddir, filename = os.path.split(file_path) + hasher = hashlib.sha1() + hasher.update(olddir.encode("utf-8")) + target_dir = os.path.join(tmpdir, hasher.hexdigest()) + target_file = os.path.join(target_dir, filename) + tf.io.gfile.mkdir(target_dir) + tf.io.gfile.copy(file_path, target_file) + return target_file + + +def export_preprocessing(export_path: Text, + *, + vocab_file: Optional[Text] = None, + sp_model_file: Optional[Text] = None, + do_lower_case: bool, + tokenize_with_offsets: bool, + default_seq_length: int, + experimental_disable_assert: bool = False) -> None: + """Exports preprocessing to a SavedModel for TF Hub.""" + with tempfile.TemporaryDirectory() as tmpdir: + # TODO(b/175369555): Remove experimental_disable_assert and its use. + with _maybe_disable_assert(experimental_disable_assert): + preprocessing = create_preprocessing( + vocab_file=_move_to_tmpdir(vocab_file, tmpdir), + sp_model_file=_move_to_tmpdir(sp_model_file, tmpdir), + do_lower_case=do_lower_case, + tokenize_with_offsets=tokenize_with_offsets, + default_seq_length=default_seq_length) + preprocessing.save(export_path, include_optimizer=False, save_format="tf") + if experimental_disable_assert: + _check_no_assert(export_path) + # It helps the unit test to prevent stray copies of the vocab file. + if tf.io.gfile.exists(tmpdir): + raise IOError("Failed to clean up TemporaryDirectory") + + +# TODO(b/175369555): Remove all workarounds for this bug of TensorFlow 2.4 +# when this bug is no longer a concern for publishing new models. +# TensorFlow 2.4 has a placement issue with Assert ops in tf.functions called +# from Dataset.map() on a TPU worker. They end up on the TPU coordinator, +# and invoking them from the TPU worker is either inefficient (when possible) +# or impossible (notably when using "headless" TPU workers on Cloud that do not +# have a channel to the coordinator). The bug has been fixed in time for TF 2.5. +# To work around this, the following code avoids Assert ops in the exported +# SavedModels. It monkey-patches calls to tf.Assert from inside TensorFlow and +# replaces them by a no-op while building the exported model. This is fragile, +# so _check_no_assert() validates the result. The resulting model should be fine +# to read on future versions of TF, even if this workaround at export time +# may break eventually. (Failing unit tests will tell.) + + +def _dont_assert(condition, data, summarize=None, name="Assert"): + """The no-op version of tf.Assert installed by _maybe_disable_assert.""" + del condition, data, summarize # Unused. + if tf.executing_eagerly(): + return + with tf.name_scope(name): + return tf.no_op(name="dont_assert") + + +@contextlib.contextmanager +def _maybe_disable_assert(disable_assert): + """Scoped monkey patch of control_flow_ops.Assert to a no-op.""" + if not disable_assert: + yield + return + + original_assert = control_flow_ops.Assert + control_flow_ops.Assert = _dont_assert + yield + control_flow_ops.Assert = original_assert + + +def _check_no_assert(saved_model_path): + """Raises AssertionError if SavedModel contains Assert ops.""" + saved_model_filename = os.path.join(saved_model_path, "saved_model.pb") + with tf.io.gfile.GFile(saved_model_filename, "rb") as f: + saved_model = saved_model_pb2.SavedModel.FromString(f.read()) + + assert_nodes = [] + graph_def = saved_model.meta_graphs[0].graph_def + assert_nodes += ["node '{}' in global graph".format(n.name) + for n in graph_def.node if n.op == "Assert"] + for fdef in graph_def.library.function: + assert_nodes += [ + "node '{}' in function '{}'".format(n.name, fdef.signature.name) + for n in fdef.node_def if n.op == "Assert"] + if assert_nodes: + raise AssertionError( + "Internal tool error: " + "failed to suppress {} Assert ops in SavedModel:\n{}".format( + len(assert_nodes), "\n".join(assert_nodes[:10]))) diff --git a/official/nlp/tools/export_tfhub_lib_test.py b/official/nlp/tools/export_tfhub_lib_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d2fade8e9580bb9c7df80de21a523fae38fab0d8 --- /dev/null +++ b/official/nlp/tools/export_tfhub_lib_test.py @@ -0,0 +1,992 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests export_tfhub_lib.""" + +import os +import tempfile + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +import tensorflow_hub as hub +import tensorflow_text as text + +from sentencepiece import SentencePieceTrainer +from official.modeling import tf_utils +from official.nlp.bert import configs +from official.nlp.configs import encoders +from official.nlp.modeling import layers +from official.nlp.modeling import models +from official.nlp.tools import export_tfhub_lib + + +def _get_bert_config_or_encoder_config(use_bert_config, hidden_size, + num_hidden_layers, vocab_size=100): + """Returns config args for export_tfhub_lib._create_model().""" + if use_bert_config: + bert_config = configs.BertConfig( + vocab_size=vocab_size, + hidden_size=hidden_size, + intermediate_size=32, + max_position_embeddings=128, + num_attention_heads=2, + num_hidden_layers=num_hidden_layers) + encoder_config = None + else: + bert_config = None + encoder_config = encoders.EncoderConfig( + type="albert", + albert=encoders.AlbertEncoderConfig( + vocab_size=vocab_size, + embedding_width=16, + hidden_size=hidden_size, + intermediate_size=32, + max_position_embeddings=128, + num_attention_heads=2, + num_layers=num_hidden_layers, + dropout_rate=0.1)) + + return bert_config, encoder_config + + +def _get_vocab_or_sp_model_dummy(temp_dir, use_sp_model): + """Returns tokenizer asset args for export_tfhub_lib.export_model().""" + dummy_file = os.path.join(temp_dir, "dummy_file.txt") + with tf.io.gfile.GFile(dummy_file, "w") as f: + f.write("dummy content") + if use_sp_model: + vocab_file, sp_model_file = None, dummy_file + else: + vocab_file, sp_model_file = dummy_file, None + return vocab_file, sp_model_file + + +def _read_asset(asset: tf.saved_model.Asset): + return tf.io.gfile.GFile(asset.asset_path.numpy()).read() + + +def _find_lambda_layers(layer): + """Returns list of all Lambda layers in a Keras model.""" + if isinstance(layer, tf.keras.layers.Lambda): + return [layer] + elif hasattr(layer, "layers"): # It's nested, like a Model. + result = [] + for l in layer.layers: + result += _find_lambda_layers(l) + return result + else: + return [] + + +class ExportModelTest(tf.test.TestCase, parameterized.TestCase): + """Tests exporting a Transformer Encoder model as a SavedModel. + + This covers export from an Encoder checkpoint to a SavedModel without + the .mlm subobject. This is no longer preferred, but still useful + for models like Electra that are trained without the MLM task. + + The export code is generic. This test focuses on two main cases + (the most important ones in practice when this was written in 2020): + - BERT built from a legacy BertConfig, for use with BertTokenizer. + - ALBERT built from an EncoderConfig (as a representative of all other + choices beyond BERT, for use with SentencepieceTokenizer (the one + alternative to BertTokenizer). + """ + + @parameterized.named_parameters(("Bert", True), ("Albert", False)) + def test_export_model(self, use_bert): + # Create the encoder and export it. + hidden_size = 16 + num_hidden_layers = 1 + bert_config, encoder_config = _get_bert_config_or_encoder_config( + use_bert, hidden_size, num_hidden_layers) + bert_model, encoder = export_tfhub_lib._create_model( + bert_config=bert_config, encoder_config=encoder_config, with_mlm=False) + self.assertEmpty( + _find_lambda_layers(bert_model), + "Lambda layers are non-portable since they serialize Python bytecode.") + model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") + checkpoint = tf.train.Checkpoint(encoder=encoder) + checkpoint.save(os.path.join(model_checkpoint_dir, "test")) + model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) + + vocab_file, sp_model_file = _get_vocab_or_sp_model_dummy( + self.get_temp_dir(), use_sp_model=not use_bert) + export_path = os.path.join(self.get_temp_dir(), "hub") + export_tfhub_lib.export_model( + export_path=export_path, + bert_config=bert_config, + encoder_config=encoder_config, + model_checkpoint_path=model_checkpoint_path, + with_mlm=False, + vocab_file=vocab_file, + sp_model_file=sp_model_file, + do_lower_case=True) + + # Restore the exported model. + hub_layer = hub.KerasLayer(export_path, trainable=True) + + # Check legacy tokenization data. + if use_bert: + self.assertTrue(hub_layer.resolved_object.do_lower_case.numpy()) + self.assertEqual("dummy content", + _read_asset(hub_layer.resolved_object.vocab_file)) + self.assertFalse(hasattr(hub_layer.resolved_object, "sp_model_file")) + else: + self.assertFalse(hasattr(hub_layer.resolved_object, "do_lower_case")) + self.assertFalse(hasattr(hub_layer.resolved_object, "vocab_file")) + self.assertEqual("dummy content", + _read_asset(hub_layer.resolved_object.sp_model_file)) + + # Check restored weights. + self.assertEqual(len(bert_model.trainable_weights), + len(hub_layer.trainable_weights)) + for source_weight, hub_weight in zip(bert_model.trainable_weights, + hub_layer.trainable_weights): + self.assertAllClose(source_weight.numpy(), hub_weight.numpy()) + + # Check computation. + seq_length = 10 + dummy_ids = np.zeros((2, seq_length), dtype=np.int32) + input_dict = dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids) + hub_output = hub_layer(input_dict) + source_output = bert_model(input_dict) + encoder_output = encoder(input_dict) + self.assertEqual(hub_output["pooled_output"].shape, (2, hidden_size)) + self.assertEqual(hub_output["sequence_output"].shape, + (2, seq_length, hidden_size)) + self.assertLen(hub_output["encoder_outputs"], num_hidden_layers) + + for key in ("pooled_output", "sequence_output", "encoder_outputs"): + self.assertAllClose(source_output[key], hub_output[key]) + self.assertAllClose(source_output[key], encoder_output[key]) + + # The "default" output of BERT as a text representation is pooled_output. + self.assertAllClose(hub_output["pooled_output"], hub_output["default"]) + + # Test that training=True makes a difference (activates dropout). + def _dropout_mean_stddev(training, num_runs=20): + input_ids = np.array([[14, 12, 42, 95, 99]], np.int32) + input_dict = dict( + input_word_ids=input_ids, + input_mask=np.ones_like(input_ids), + input_type_ids=np.zeros_like(input_ids)) + outputs = np.concatenate([ + hub_layer(input_dict, training=training)["pooled_output"] + for _ in range(num_runs) + ]) + return np.mean(np.std(outputs, axis=0)) + + self.assertLess(_dropout_mean_stddev(training=False), 1e-6) + self.assertGreater(_dropout_mean_stddev(training=True), 1e-3) + + # Test propagation of seq_length in shape inference. + input_word_ids = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) + input_mask = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) + input_type_ids = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) + input_dict = dict( + input_word_ids=input_word_ids, + input_mask=input_mask, + input_type_ids=input_type_ids) + output_dict = hub_layer(input_dict) + pooled_output = output_dict["pooled_output"] + sequence_output = output_dict["sequence_output"] + encoder_outputs = output_dict["encoder_outputs"] + + self.assertEqual(pooled_output.shape.as_list(), [None, hidden_size]) + self.assertEqual(sequence_output.shape.as_list(), + [None, seq_length, hidden_size]) + self.assertLen(encoder_outputs, num_hidden_layers) + + +class ExportModelWithMLMTest(tf.test.TestCase, parameterized.TestCase): + """Tests exporting a Transformer Encoder model as a SavedModel. + + This covers export from a Pretrainer checkpoint to a SavedModel including + the .mlm subobject, which is the preferred way since 2020. + + The export code is generic. This test focuses on two main cases + (the most important ones in practice when this was written in 2020): + - BERT built from a legacy BertConfig, for use with BertTokenizer. + - ALBERT built from an EncoderConfig (as a representative of all other + choices beyond BERT, for use with SentencepieceTokenizer (the one + alternative to BertTokenizer). + """ + + def test_copy_pooler_dense_to_encoder(self): + encoder_config = encoders.EncoderConfig( + type="bert", + bert=encoders.BertEncoderConfig( + hidden_size=24, intermediate_size=48, num_layers=2)) + cls_heads = [ + layers.ClassificationHead( + inner_dim=24, num_classes=2, name="next_sentence") + ] + encoder = encoders.build_encoder(encoder_config) + pretrainer = models.BertPretrainerV2( + encoder_network=encoder, + classification_heads=cls_heads, + mlm_activation=tf_utils.get_activation( + encoder_config.get().hidden_activation)) + # Makes sure the pretrainer variables are created. + _ = pretrainer(pretrainer.inputs) + checkpoint = tf.train.Checkpoint(**pretrainer.checkpoint_items) + model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") + checkpoint.save(os.path.join(model_checkpoint_dir, "test")) + + vocab_file, sp_model_file = _get_vocab_or_sp_model_dummy( + self.get_temp_dir(), use_sp_model=True) + export_path = os.path.join(self.get_temp_dir(), "hub") + export_tfhub_lib.export_model( + export_path=export_path, + encoder_config=encoder_config, + model_checkpoint_path=tf.train.latest_checkpoint(model_checkpoint_dir), + with_mlm=True, + copy_pooler_dense_to_encoder=True, + vocab_file=vocab_file, + sp_model_file=sp_model_file, + do_lower_case=True) + # Restores a hub KerasLayer. + hub_layer = hub.KerasLayer(export_path, trainable=True) + dummy_ids = np.zeros((2, 10), dtype=np.int32) + input_dict = dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids) + hub_pooled_output = hub_layer(input_dict)["pooled_output"] + encoder_outputs = encoder(input_dict) + # Verify that hub_layer's pooled_output is the same as the output of next + # sentence prediction's dense layer. + pretrained_pooled_output = cls_heads[0].dense( + (encoder_outputs["sequence_output"][:, 0, :])) + self.assertAllClose(hub_pooled_output, pretrained_pooled_output) + # But the pooled_output between encoder and hub_layer are not the same. + encoder_pooled_output = encoder_outputs["pooled_output"] + self.assertNotAllClose(hub_pooled_output, encoder_pooled_output) + + @parameterized.named_parameters( + ("Bert", True), + ("Albert", False), + ) + def test_export_model_with_mlm(self, use_bert): + # Create the encoder and export it. + hidden_size = 16 + num_hidden_layers = 2 + bert_config, encoder_config = _get_bert_config_or_encoder_config( + use_bert, hidden_size, num_hidden_layers) + bert_model, pretrainer = export_tfhub_lib._create_model( + bert_config=bert_config, encoder_config=encoder_config, with_mlm=True) + self.assertEmpty( + _find_lambda_layers(bert_model), + "Lambda layers are non-portable since they serialize Python bytecode.") + bert_model_with_mlm = bert_model.mlm + model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") + + checkpoint = tf.train.Checkpoint(**pretrainer.checkpoint_items) + + checkpoint.save(os.path.join(model_checkpoint_dir, "test")) + model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) + + vocab_file, sp_model_file = _get_vocab_or_sp_model_dummy( + self.get_temp_dir(), use_sp_model=not use_bert) + export_path = os.path.join(self.get_temp_dir(), "hub") + export_tfhub_lib.export_model( + export_path=export_path, + bert_config=bert_config, + encoder_config=encoder_config, + model_checkpoint_path=model_checkpoint_path, + with_mlm=True, + vocab_file=vocab_file, + sp_model_file=sp_model_file, + do_lower_case=True) + + # Restore the exported model. + hub_layer = hub.KerasLayer(export_path, trainable=True) + + # Check legacy tokenization data. + if use_bert: + self.assertTrue(hub_layer.resolved_object.do_lower_case.numpy()) + self.assertEqual("dummy content", + _read_asset(hub_layer.resolved_object.vocab_file)) + self.assertFalse(hasattr(hub_layer.resolved_object, "sp_model_file")) + else: + self.assertFalse(hasattr(hub_layer.resolved_object, "do_lower_case")) + self.assertFalse(hasattr(hub_layer.resolved_object, "vocab_file")) + self.assertEqual("dummy content", + _read_asset(hub_layer.resolved_object.sp_model_file)) + + # Check restored weights. + # Note that we set `_auto_track_sub_layers` to False when exporting the + # SavedModel, so hub_layer has the same number of weights as bert_model; + # otherwise, hub_layer will have extra weights from its `mlm` subobject. + self.assertEqual(len(bert_model.trainable_weights), + len(hub_layer.trainable_weights)) + for source_weight, hub_weight in zip(bert_model.trainable_weights, + hub_layer.trainable_weights): + self.assertAllClose(source_weight, hub_weight) + + # Check computation. + seq_length = 10 + dummy_ids = np.zeros((2, seq_length), dtype=np.int32) + input_dict = dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids) + hub_outputs_dict = hub_layer(input_dict) + source_outputs_dict = bert_model(input_dict) + encoder_outputs_dict = pretrainer.encoder_network( + [dummy_ids, dummy_ids, dummy_ids]) + self.assertEqual(hub_outputs_dict["pooled_output"].shape, (2, hidden_size)) + self.assertEqual(hub_outputs_dict["sequence_output"].shape, + (2, seq_length, hidden_size)) + for output_key in ("pooled_output", "sequence_output", "encoder_outputs"): + self.assertAllClose(source_outputs_dict[output_key], + hub_outputs_dict[output_key]) + self.assertAllClose(source_outputs_dict[output_key], + encoder_outputs_dict[output_key]) + + # The "default" output of BERT as a text representation is pooled_output. + self.assertAllClose(hub_outputs_dict["pooled_output"], + hub_outputs_dict["default"]) + + # Test that training=True makes a difference (activates dropout). + def _dropout_mean_stddev(training, num_runs=20): + input_ids = np.array([[14, 12, 42, 95, 99]], np.int32) + input_dict = dict( + input_word_ids=input_ids, + input_mask=np.ones_like(input_ids), + input_type_ids=np.zeros_like(input_ids)) + outputs = np.concatenate([ + hub_layer(input_dict, training=training)["pooled_output"] + for _ in range(num_runs) + ]) + return np.mean(np.std(outputs, axis=0)) + + self.assertLess(_dropout_mean_stddev(training=False), 1e-6) + self.assertGreater(_dropout_mean_stddev(training=True), 1e-3) + + # Checks sub-object `mlm`. + self.assertTrue(hasattr(hub_layer.resolved_object, "mlm")) + + self.assertLen(hub_layer.resolved_object.mlm.trainable_variables, + len(bert_model_with_mlm.trainable_weights)) + self.assertLen(hub_layer.resolved_object.mlm.trainable_variables, + len(pretrainer.trainable_weights)) + for source_weight, hub_weight, pretrainer_weight in zip( + bert_model_with_mlm.trainable_weights, + hub_layer.resolved_object.mlm.trainable_variables, + pretrainer.trainable_weights): + self.assertAllClose(source_weight, hub_weight) + self.assertAllClose(source_weight, pretrainer_weight) + + max_predictions_per_seq = 4 + mlm_positions = np.zeros((2, max_predictions_per_seq), dtype=np.int32) + input_dict = dict( + input_word_ids=dummy_ids, + input_mask=dummy_ids, + input_type_ids=dummy_ids, + masked_lm_positions=mlm_positions) + hub_mlm_outputs_dict = hub_layer.resolved_object.mlm(input_dict) + source_mlm_outputs_dict = bert_model_with_mlm(input_dict) + for output_key in ("pooled_output", "sequence_output", "mlm_logits", + "encoder_outputs"): + self.assertAllClose(hub_mlm_outputs_dict[output_key], + source_mlm_outputs_dict[output_key]) + + pretrainer_mlm_logits_output = pretrainer(input_dict)["mlm_logits"] + self.assertAllClose(hub_mlm_outputs_dict["mlm_logits"], + pretrainer_mlm_logits_output) + + # Test that training=True makes a difference (activates dropout). + def _dropout_mean_stddev_mlm(training, num_runs=20): + input_ids = np.array([[14, 12, 42, 95, 99]], np.int32) + mlm_position_ids = np.array([[1, 2, 3, 4]], np.int32) + input_dict = dict( + input_word_ids=input_ids, + input_mask=np.ones_like(input_ids), + input_type_ids=np.zeros_like(input_ids), + masked_lm_positions=mlm_position_ids) + outputs = np.concatenate([ + hub_layer.resolved_object.mlm(input_dict, + training=training)["pooled_output"] + for _ in range(num_runs) + ]) + return np.mean(np.std(outputs, axis=0)) + + self.assertLess(_dropout_mean_stddev_mlm(training=False), 1e-6) + self.assertGreater(_dropout_mean_stddev_mlm(training=True), 1e-3) + + # Test propagation of seq_length in shape inference. + input_word_ids = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) + input_mask = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) + input_type_ids = tf.keras.layers.Input(shape=(seq_length,), dtype=tf.int32) + input_dict = dict( + input_word_ids=input_word_ids, + input_mask=input_mask, + input_type_ids=input_type_ids) + hub_outputs_dict = hub_layer(input_dict) + self.assertEqual(hub_outputs_dict["pooled_output"].shape.as_list(), + [None, hidden_size]) + self.assertEqual(hub_outputs_dict["sequence_output"].shape.as_list(), + [None, seq_length, hidden_size]) + + +_STRING_NOT_TO_LEAK = "private_path_component_" + + +class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase): + + def _make_vocab_file(self, vocab, filename="vocab.txt", add_mask_token=False): + """Creates wordpiece vocab file with given words plus special tokens. + + The tokens of the resulting model are, in this order: + [PAD], [UNK], [CLS], [SEP], [MASK]*, ...vocab... + *=if requested by args. + + This function also accepts wordpieces that start with the ## continuation + marker, but avoiding those makes this function interchangeable with + _make_sp_model_file(), up to the extra dimension returned by BertTokenizer. + + Args: + vocab: a list of strings with the words or wordpieces to put into the + model's vocabulary. Do not include special tokens here. + filename: Optionally, a filename (relative to the temporary directory + created by this function). + add_mask_token: an optional bool, whether to include a [MASK] token. + + Returns: + The absolute filename of the created vocab file. + """ + full_vocab = ["[PAD]", "[UNK]", "[CLS]", "[SEP]" + ] + ["[MASK]"]*add_mask_token + vocab + path = os.path.join( + tempfile.mkdtemp(dir=self.get_temp_dir(), # New subdir each time. + prefix=_STRING_NOT_TO_LEAK), + filename) + with tf.io.gfile.GFile(path, "w") as f: + f.write("\n".join(full_vocab + [""])) + return path + + def _make_sp_model_file(self, vocab, prefix="spm", add_mask_token=False): + """Creates Sentencepiece word model with given words plus special tokens. + + The tokens of the resulting model are, in this order: + , , [CLS], [SEP], [MASK]*, ...vocab..., , + *=if requested by args. + + The words in the input vocab are plain text, without the whitespace marker. + That makes this function interchangeable with _make_vocab_file(). + + Args: + vocab: a list of strings with the words to put into the model's + vocabulary. Do not include special tokens here. + prefix: an optional string, to change the filename prefix for the model + (relative to the temporary directory created by this function). + add_mask_token: an optional bool, whether to include a [MASK] token. + + Returns: + The absolute filename of the created Sentencepiece model file. + """ + model_prefix = os.path.join( + tempfile.mkdtemp(dir=self.get_temp_dir()), # New subdir each time. + prefix) + input_file = model_prefix + "_train_input.txt" + # Create input text for training the sp model from the tokens provided. + # Repeat tokens, the earlier the more, because they are sorted by frequency. + input_text = [] + for i, token in enumerate(vocab): + input_text.append(" ".join([token] * (len(vocab) - i))) + with tf.io.gfile.GFile(input_file, "w") as f: + f.write("\n".join(input_text + [""])) + control_symbols = "[CLS],[SEP]" + full_vocab_size = len(vocab) + 6 # , , [CLS], [SEP], , . + if add_mask_token: + control_symbols += ",[MASK]" + full_vocab_size += 1 + flags = dict( + model_prefix=model_prefix, + model_type="word", + input=input_file, + pad_id=0, unk_id=1, control_symbols=control_symbols, + vocab_size=full_vocab_size, + bos_id=full_vocab_size-2, eos_id=full_vocab_size-1) + SentencePieceTrainer.Train( + " ".join(["--{}={}".format(k, v) for k, v in flags.items()])) + return model_prefix + ".model" + + def _do_export(self, vocab, do_lower_case, default_seq_length=128, + tokenize_with_offsets=True, use_sp_model=False, + experimental_disable_assert=False, add_mask_token=False): + """Runs SavedModel export and returns the export_path.""" + export_path = tempfile.mkdtemp(dir=self.get_temp_dir()) + vocab_file = sp_model_file = None + if use_sp_model: + sp_model_file = self._make_sp_model_file(vocab, + add_mask_token=add_mask_token) + else: + vocab_file = self._make_vocab_file(vocab, add_mask_token=add_mask_token) + export_tfhub_lib.export_preprocessing( + export_path, + vocab_file=vocab_file, + sp_model_file=sp_model_file, + do_lower_case=do_lower_case, + tokenize_with_offsets=tokenize_with_offsets, + default_seq_length=default_seq_length, + experimental_disable_assert=experimental_disable_assert) + # Invalidate the original filename to verify loading from the SavedModel. + tf.io.gfile.remove(sp_model_file or vocab_file) + return export_path + + def test_no_leaks(self): + """Tests not leaking the path to the original vocab file.""" + path = self._do_export( + ["d", "ef", "abc", "xy"], do_lower_case=True, use_sp_model=False) + with tf.io.gfile.GFile(os.path.join(path, "saved_model.pb"), "rb") as f: + self.assertFalse( # pylint: disable=g-generic-assert + _STRING_NOT_TO_LEAK.encode("ascii") in f.read()) + + @parameterized.named_parameters(("Bert", False), ("Sentencepiece", True)) + def test_exported_callables(self, use_sp_model): + preprocess = tf.saved_model.load(self._do_export( + ["d", "ef", "abc", "xy"], do_lower_case=True, + tokenize_with_offsets=not use_sp_model, # TODO(b/181866850): drop this. + experimental_disable_assert=True, # TODO(b/175369555): drop this. + use_sp_model=use_sp_model)) + + def fold_dim(rt): + """Removes the word/subword distinction of BertTokenizer.""" + return rt if use_sp_model else rt.merge_dims(1, 2) + + # .tokenize() + inputs = tf.constant(["abc d ef", "ABC D EF d"]) + token_ids = preprocess.tokenize(inputs) + self.assertAllEqual(fold_dim(token_ids), + tf.ragged.constant([[6, 4, 5], + [6, 4, 5, 4]])) + + special_tokens_dict = { + k: v.numpy().item() # Expecting eager Tensor, converting to Python. + for k, v in preprocess.tokenize.get_special_tokens_dict().items()} + self.assertDictEqual(special_tokens_dict, + dict(padding_id=0, + start_of_sequence_id=2, + end_of_segment_id=3, + vocab_size=4+6 if use_sp_model else 4+4)) + + # .tokenize_with_offsets() + if use_sp_model: + # TODO(b/181866850): Enable tokenize_with_offsets when it works and test. + self.assertFalse(hasattr(preprocess, "tokenize_with_offsets")) + else: + token_ids, start_offsets, limit_offsets = ( + preprocess.tokenize_with_offsets(inputs)) + self.assertAllEqual(fold_dim(token_ids), + tf.ragged.constant([[6, 4, 5], + [6, 4, 5, 4]])) + self.assertAllEqual(fold_dim(start_offsets), + tf.ragged.constant([[0, 4, 6], + [0, 4, 6, 9]])) + self.assertAllEqual(fold_dim(limit_offsets), + tf.ragged.constant([[3, 5, 8], + [3, 5, 8, 10]])) + self.assertIs(preprocess.tokenize.get_special_tokens_dict, + preprocess.tokenize_with_offsets.get_special_tokens_dict) + + # Root callable. + bert_inputs = preprocess(inputs) + self.assertAllEqual(bert_inputs["input_word_ids"].shape.as_list(), [2, 128]) + self.assertAllEqual(bert_inputs["input_word_ids"][:, :10], + tf.constant([[2, 6, 4, 5, 3, 0, 0, 0, 0, 0], + [2, 6, 4, 5, 4, 3, 0, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_mask"].shape.as_list(), [2, 128]) + self.assertAllEqual(bert_inputs["input_mask"][:, :10], + tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 0, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_type_ids"].shape.as_list(), [2, 128]) + self.assertAllEqual(bert_inputs["input_type_ids"][:, :10], + tf.constant([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])) + + # .bert_pack_inputs() + inputs_2 = tf.constant(["d xy", "xy abc"]) + token_ids_2 = preprocess.tokenize(inputs_2) + bert_inputs = preprocess.bert_pack_inputs( + [token_ids, token_ids_2], seq_length=256) + self.assertAllEqual(bert_inputs["input_word_ids"].shape.as_list(), [2, 256]) + self.assertAllEqual(bert_inputs["input_word_ids"][:, :10], + tf.constant([[2, 6, 4, 5, 3, 4, 7, 3, 0, 0], + [2, 6, 4, 5, 4, 3, 7, 6, 3, 0]])) + self.assertAllEqual(bert_inputs["input_mask"].shape.as_list(), [2, 256]) + self.assertAllEqual(bert_inputs["input_mask"][:, :10], + tf.constant([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])) + self.assertAllEqual(bert_inputs["input_type_ids"].shape.as_list(), [2, 256]) + self.assertAllEqual(bert_inputs["input_type_ids"][:, :10], + tf.constant([[0, 0, 0, 0, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 0]])) + + # For BertTokenizer only: repeat relevant parts for do_lower_case=False, + # default_seq_length=10, experimental_disable_assert=False, + # tokenize_with_offsets=False, and without folding the word/subword dimension. + def test_cased_length10(self): + preprocess = tf.saved_model.load(self._do_export( + ["d", "##ef", "abc", "ABC"], + do_lower_case=False, default_seq_length=10, + tokenize_with_offsets=False, + use_sp_model=False, + experimental_disable_assert=False)) + inputs = tf.constant(["abc def", "ABC DEF"]) + token_ids = preprocess.tokenize(inputs) + self.assertAllEqual(token_ids, tf.ragged.constant([[[6], [4, 5]], + [[7], [1]]])) + + self.assertFalse(hasattr(preprocess, "tokenize_with_offsets")) + + bert_inputs = preprocess(inputs) + self.assertAllEqual(bert_inputs["input_word_ids"], + tf.constant([[2, 6, 4, 5, 3, 0, 0, 0, 0, 0], + [2, 7, 1, 3, 0, 0, 0, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 0, 0, 0, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])) + + inputs_2 = tf.constant(["d ABC", "ABC abc"]) + token_ids_2 = preprocess.tokenize(inputs_2) + bert_inputs = preprocess.bert_pack_inputs([token_ids, token_ids_2]) + # Test default seq_length=10. + self.assertAllEqual(bert_inputs["input_word_ids"], + tf.constant([[2, 6, 4, 5, 3, 4, 7, 3, 0, 0], + [2, 7, 1, 3, 7, 6, 3, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 1, 1, 1, 0, 0, 0]])) + + # XLA requires fixed shapes for tensors found in graph mode. + # Statically known shapes in Python are a particularly firm way to + # guarantee that, and they are generally more convenient to work with. + # We test that the exported SavedModel plays well with TF's shape + # inference when applied to fully or partially known input shapes. + @parameterized.named_parameters(("Bert", False), ("Sentencepiece", True)) + def test_shapes(self, use_sp_model): + preprocess = tf.saved_model.load(self._do_export( + ["abc", "def"], do_lower_case=True, + tokenize_with_offsets=not use_sp_model, # TODO(b/181866850): drop this. + experimental_disable_assert=True, # TODO(b/175369555): drop this. + use_sp_model=use_sp_model)) + + def expected_bert_input_shapes(batch_size, seq_length): + return dict(input_word_ids=[batch_size, seq_length], + input_mask=[batch_size, seq_length], + input_type_ids=[batch_size, seq_length]) + + for batch_size in [7, None]: + if use_sp_model: + token_out_shape = [batch_size, None] # No word/subword distinction. + else: + token_out_shape = [batch_size, None, None] + self.assertEqual( + _result_shapes_in_tf_function( + preprocess.tokenize, + tf.TensorSpec([batch_size], tf.string)), + token_out_shape, + "with batch_size=%s" % batch_size) + # TODO(b/181866850): Enable tokenize_with_offsets when it works and test. + if use_sp_model: + self.assertFalse(hasattr(preprocess, "tokenize_with_offsets")) + else: + self.assertEqual( + _result_shapes_in_tf_function( + preprocess.tokenize_with_offsets, + tf.TensorSpec([batch_size], tf.string)), + [token_out_shape] * 3, + "with batch_size=%s" % batch_size) + self.assertEqual( + _result_shapes_in_tf_function( + preprocess.bert_pack_inputs, + [tf.RaggedTensorSpec([batch_size, None, None], tf.int32)] * 2, + seq_length=256), expected_bert_input_shapes(batch_size, 256), + "with batch_size=%s" % batch_size) + self.assertEqual( + _result_shapes_in_tf_function(preprocess, + tf.TensorSpec([batch_size], tf.string)), + expected_bert_input_shapes(batch_size, 128), + "with batch_size=%s" % batch_size) + + @parameterized.named_parameters(("Bert", False), ("Sentencepiece", True)) + def test_reexport(self, use_sp_model): + """Test that preprocess keeps working after another save/load cycle.""" + path1 = self._do_export( + ["d", "ef", "abc", "xy"], do_lower_case=True, default_seq_length=10, + tokenize_with_offsets=False, + experimental_disable_assert=True, # TODO(b/175369555): drop this. + use_sp_model=use_sp_model) + path2 = path1.rstrip("/") + ".2" + model1 = tf.saved_model.load(path1) + tf.saved_model.save(model1, path2) + # Delete the first SavedModel to test that the sceond one loads by itself. + # https://github.com/tensorflow/tensorflow/issues/46456 reports such a + # failure case for BertTokenizer. + tf.io.gfile.rmtree(path1) + model2 = tf.saved_model.load(path2) + + inputs = tf.constant(["abc d ef", "ABC D EF d"]) + bert_inputs = model2(inputs) + self.assertAllEqual(bert_inputs["input_word_ids"], + tf.constant([[2, 6, 4, 5, 3, 0, 0, 0, 0, 0], + [2, 6, 4, 5, 4, 3, 0, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_mask"], + tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 0, 0, 0, 0]])) + self.assertAllEqual(bert_inputs["input_type_ids"], + tf.constant([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])) + + @parameterized.named_parameters(("Bert", True), ("Albert", False)) + def test_preprocessing_for_mlm(self, use_bert): + """Combines both SavedModel types and TF.text helpers for MLM.""" + # Create the preprocessing SavedModel with a [MASK] token. + non_special_tokens = ["hello", "world", + "nice", "movie", "great", "actors", + "quick", "fox", "lazy", "dog"] + preprocess = tf.saved_model.load(self._do_export( + non_special_tokens, do_lower_case=True, + tokenize_with_offsets=use_bert, # TODO(b/181866850): drop this. + experimental_disable_assert=True, # TODO(b/175369555): drop this. + add_mask_token=True, use_sp_model=not use_bert)) + vocab_size = len(non_special_tokens) + (5 if use_bert else 7) + + # Create the encoder SavedModel with an .mlm subobject. + hidden_size = 16 + num_hidden_layers = 2 + bert_config, encoder_config = _get_bert_config_or_encoder_config( + use_bert, hidden_size, num_hidden_layers, vocab_size) + _, pretrainer = export_tfhub_lib._create_model( + bert_config=bert_config, encoder_config=encoder_config, with_mlm=True) + model_checkpoint_dir = os.path.join(self.get_temp_dir(), "checkpoint") + checkpoint = tf.train.Checkpoint(**pretrainer.checkpoint_items) + checkpoint.save(os.path.join(model_checkpoint_dir, "test")) + model_checkpoint_path = tf.train.latest_checkpoint(model_checkpoint_dir) + vocab_file, sp_model_file = _get_vocab_or_sp_model_dummy( # Not used below. + self.get_temp_dir(), use_sp_model=not use_bert) + encoder_export_path = os.path.join(self.get_temp_dir(), "encoder_export") + export_tfhub_lib.export_model( + export_path=encoder_export_path, + bert_config=bert_config, + encoder_config=encoder_config, + model_checkpoint_path=model_checkpoint_path, + with_mlm=True, + vocab_file=vocab_file, + sp_model_file=sp_model_file, + do_lower_case=True) + encoder = tf.saved_model.load(encoder_export_path) + + # Get special tokens from the vocab (and vocab size). + special_tokens_dict = preprocess.tokenize.get_special_tokens_dict() + self.assertEqual(int(special_tokens_dict["vocab_size"]), vocab_size) + padding_id = int(special_tokens_dict["padding_id"]) + self.assertEqual(padding_id, 0) + start_of_sequence_id = int(special_tokens_dict["start_of_sequence_id"]) + self.assertEqual(start_of_sequence_id, 2) + end_of_segment_id = int(special_tokens_dict["end_of_segment_id"]) + self.assertEqual(end_of_segment_id, 3) + mask_id = int(special_tokens_dict["mask_id"]) + self.assertEqual(mask_id, 4) + + # A batch of 3 segment pairs. + raw_segments = [tf.constant(["hello", "nice movie", "quick fox"]), + tf.constant(["world", "great actors", "lazy dog"])] + batch_size = 3 + + # Misc hyperparameters. + seq_length = 10 + max_selections_per_seq = 2 + + # Tokenize inputs. + tokenized_segments = [preprocess.tokenize(s) for s in raw_segments] + # Trim inputs to eventually fit seq_lentgh. + num_special_tokens = len(raw_segments) + 1 + trimmed_segments = text.WaterfallTrimmer( + seq_length - num_special_tokens).trim(tokenized_segments) + # Combine input segments into one input sequence. + input_ids, segment_ids = text.combine_segments( + trimmed_segments, + start_of_sequence_id=start_of_sequence_id, + end_of_segment_id=end_of_segment_id) + # Apply random masking controlled by policy objects. + (masked_input_ids, masked_lm_positions, + masked_ids) = text.mask_language_model( + input_ids=input_ids, + item_selector=text.RandomItemSelector( + max_selections_per_seq, + selection_rate=0.5, # Adjusted for the short test examples. + unselectable_ids=[start_of_sequence_id, end_of_segment_id]), + mask_values_chooser=text.MaskValuesChooser( + vocab_size=vocab_size, mask_token=mask_id, + # Always put [MASK] to have a predictable result. + mask_token_rate=1.0, random_token_rate=0.0)) + # Pad to fixed-length Transformer encoder inputs. + input_word_ids, _ = text.pad_model_inputs(masked_input_ids, + seq_length, + pad_value=padding_id) + input_type_ids, input_mask = text.pad_model_inputs(segment_ids, seq_length, + pad_value=0) + masked_lm_positions, _ = text.pad_model_inputs(masked_lm_positions, + max_selections_per_seq, + pad_value=0) + masked_lm_positions = tf.cast(masked_lm_positions, tf.int32) + num_predictions = int(tf.shape(masked_lm_positions)[1]) + + # Test transformer inputs. + self.assertEqual(num_predictions, max_selections_per_seq) + expected_word_ids = np.array([ + # [CLS] hello [SEP] world [SEP] + [2, 5, 3, 6, 3, 0, 0, 0, 0, 0], + # [CLS] nice movie [SEP] great actors [SEP] + [2, 7, 8, 3, 9, 10, 3, 0, 0, 0], + # [CLS] brown fox [SEP] lazy dog [SEP] + [2, 11, 12, 3, 13, 14, 3, 0, 0, 0]]) + for i in range(batch_size): + for j in range(num_predictions): + k = int(masked_lm_positions[i, j]) + if k != 0: + expected_word_ids[i, k] = 4 # [MASK] + self.assertAllEqual(input_word_ids, expected_word_ids) + + # Call the MLM head of the Transformer encoder. + mlm_inputs = dict( + input_word_ids=input_word_ids, + input_mask=input_mask, + input_type_ids=input_type_ids, + masked_lm_positions=masked_lm_positions, + ) + mlm_outputs = encoder.mlm(mlm_inputs) + self.assertEqual(mlm_outputs["pooled_output"].shape, + (batch_size, hidden_size)) + self.assertEqual(mlm_outputs["sequence_output"].shape, + (batch_size, seq_length, hidden_size)) + self.assertEqual(mlm_outputs["mlm_logits"].shape, + (batch_size, num_predictions, vocab_size)) + self.assertLen(mlm_outputs["encoder_outputs"], num_hidden_layers) + + # A real trainer would now compute the loss of mlm_logits + # trying to predict the masked_ids. + del masked_ids # Unused. + + @parameterized.named_parameters(("Bert", False), ("Sentencepiece", True)) + def test_special_tokens_in_estimator(self, use_sp_model): + """Tests getting special tokens without an Eager init context.""" + preprocess_export_path = self._do_export( + ["d", "ef", "abc", "xy"], do_lower_case=True, + use_sp_model=use_sp_model, tokenize_with_offsets=False) + + def _get_special_tokens_dict(obj): + """Returns special tokens of restored tokenizer as Python values.""" + if tf.executing_eagerly(): + special_tokens_numpy = {k: v.numpy() + for k, v in obj.get_special_tokens_dict()} + else: + with tf.Graph().as_default(): + # This code expects `get_special_tokens_dict()` to be a tf.function + # with no dependencies (bound args) from the context it was loaded in, + # and boldly assumes that it can just be called in a dfferent context. + special_tokens_tensors = obj.get_special_tokens_dict() + with tf.compat.v1.Session() as sess: + special_tokens_numpy = sess.run(special_tokens_tensors) + return {k: v.item() # Numpy to Python. + for k, v in special_tokens_numpy.items()} + + def input_fn(): + self.assertFalse(tf.executing_eagerly()) + # Build a preprocessing Model. + sentences = tf.keras.layers.Input(shape=[], dtype=tf.string) + preprocess = tf.saved_model.load(preprocess_export_path) + tokenize = hub.KerasLayer(preprocess.tokenize) + special_tokens_dict = _get_special_tokens_dict(tokenize.resolved_object) + for k, v in special_tokens_dict.items(): + self.assertIsInstance(v, int, "Unexpected type for {}".format(k)) + tokens = tokenize(sentences) + packed_inputs = layers.BertPackInputs( + 4, special_tokens_dict=special_tokens_dict)(tokens) + preprocessing = tf.keras.Model(sentences, packed_inputs) + # Map the dataset. + ds = tf.data.Dataset.from_tensors( + (tf.constant(["abc", "D EF"]), tf.constant([0, 1]))) + ds = ds.map(lambda features, labels: (preprocessing(features), labels)) + return ds + + def model_fn(features, labels, mode): + del labels # Unused. + return tf.estimator.EstimatorSpec(mode=mode, + predictions=features["input_word_ids"]) + + estimator = tf.estimator.Estimator(model_fn=model_fn) + outputs = list(estimator.predict(input_fn)) + self.assertAllEqual(outputs, np.array([[2, 6, 3, 0], + [2, 4, 5, 3]])) + + # TODO(b/175369555): Remove that code and its test. + @parameterized.named_parameters(("Bert", False), ("Sentencepiece", True)) + def test_check_no_assert(self, use_sp_model): + """Tests the self-check during export without assertions.""" + preprocess_export_path = self._do_export( + ["d", "ef", "abc", "xy"], do_lower_case=True, + use_sp_model=use_sp_model, tokenize_with_offsets=False, + experimental_disable_assert=False) + with self.assertRaisesRegex(AssertionError, + r"failed to suppress \d+ Assert ops"): + export_tfhub_lib._check_no_assert(preprocess_export_path) + + +def _result_shapes_in_tf_function(fn, *args, **kwargs): + """Returns shapes (as lists) observed on the result of `fn`. + + Args: + fn: A callable. + *args: TensorSpecs for Tensor-valued arguments and actual values + for Python-valued arguments to fn. + **kwargs: Same for keyword arguments. + + Returns: + The nest of partial tensor shapes (as lists) that is statically known inside + tf.function(fn)(*args, **kwargs) for the nest of its results. + """ + # Use a captured mutable container for a side outout from the wrapper. + uninitialized = "uninitialized!" + result_shapes_container = [uninitialized] + assert result_shapes_container[0] is uninitialized + + @tf.function + def shape_reporting_wrapper(*args, **kwargs): + result = fn(*args, **kwargs) + result_shapes_container[0] = tf.nest.map_structure( + lambda x: x.shape.as_list(), result) + return result + + shape_reporting_wrapper.get_concrete_function(*args, **kwargs) + assert result_shapes_container[0] is not uninitialized + return result_shapes_container[0] + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/train.py b/official/nlp/train.py new file mode 100644 index 0000000000000000000000000000000000000000..1af16c9218229c473acbe86665924fc33dfa1a93 --- /dev/null +++ b/official/nlp/train.py @@ -0,0 +1,69 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFM common training driver.""" + +from absl import app +from absl import flags +import gin + +from official.common import distribute_utils +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance + +FLAGS = flags.FLAGS + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu, + **params.runtime.model_parallelism()) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + train_utils.save_gin_config(FLAGS.mode, model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/nlp/train_ctl_continuous_finetune.py b/official/nlp/train_ctl_continuous_finetune.py new file mode 100644 index 0000000000000000000000000000000000000000..66d6ebfd6ecdbf95b74520bd909ad93976097a02 --- /dev/null +++ b/official/nlp/train_ctl_continuous_finetune.py @@ -0,0 +1,48 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFM continuous finetuning+eval training driver.""" +from absl import app +from absl import flags +import gin + +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.common import flags as tfm_flags +from official.core import train_utils +from official.nlp import continuous_finetune_lib + +FLAGS = flags.FLAGS + +flags.DEFINE_integer( + 'pretrain_steps', + default=None, + help='The number of total training steps for the pretraining job.') + + +def main(_): + # TODO(b/177863554): consolidate to nlp/train.py + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + train_utils.serialize_config(params, model_dir) + continuous_finetune_lib.run_continuous_finetune( + FLAGS.mode, params, model_dir, pretrain_steps=FLAGS.pretrain_steps) + train_utils.save_gin_config(FLAGS.mode, model_dir) + + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/nlp/transformer/README.md b/official/nlp/transformer/README.md index 1215ed574b316030f69713de8dc3000ea64e3df6..a3aec5f9a052fa4e591df7c477011d626e6f257b 100644 --- a/official/nlp/transformer/README.md +++ b/official/nlp/transformer/README.md @@ -3,9 +3,11 @@ This is an implementation of the Transformer translation model as described in the [Attention is All You Need](https://arxiv.org/abs/1706.03762) paper. The implementation leverages tf.keras and makes sure it is compatible with TF 2.x. -**Note: this transformer folder is subject to be integrated into official/nlp -folder. Due to its dependencies, we will finish the refactoring after the model -garden 2.1 release.** +**Warning: the features in the `transformer/` folder have been fully intergrated +into nlp/modeling. +Due to its dependencies, we will remove this folder after the model +garden 2.5 release. The model in `nlp/modeling/models/seq2seq_transformer.py` is +identical to the model in this folder.** ## Contents * [Contents](#contents) diff --git a/official/nlp/transformer/__init__.py b/official/nlp/transformer/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/transformer/__init__.py +++ b/official/nlp/transformer/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/transformer/attention_layer.py b/official/nlp/transformer/attention_layer.py index 114bd5fadc3064f0ff3c895245d8676e47a0bad4..802117c24d7896ac5ebe286afc2420656e628ad6 100644 --- a/official/nlp/transformer/attention_layer.py +++ b/official/nlp/transformer/attention_layer.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Implementation of multiheaded attention and self-attention layers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Implementation of multiheaded attention and self-attention layers.""" import math import tensorflow as tf @@ -88,7 +83,12 @@ class Attention(tf.keras.layers.Layer): "attention_dropout": self.attention_dropout, } - def call(self, query_input, source_input, bias, training, cache=None, + def call(self, + query_input, + source_input, + bias, + training, + cache=None, decode_loop_step=None): """Apply attention mechanism to query_input and source_input. @@ -102,9 +102,9 @@ class Attention(tf.keras.layers.Layer): cache: (Used during prediction) A dictionary with tensors containing results of previous attentions. The dictionary must have the items: {"k": tensor with shape [batch_size, i, heads, dim_per_head], - "v": tensor with shape [batch_size, i, heads, dim_per_head]} - where i is the current decoded length for non-padded decode, or max - sequence length for padded decode. + "v": tensor with shape [batch_size, i, heads, dim_per_head]} where + i is the current decoded length for non-padded decode, or max + sequence length for padded decode. decode_loop_step: An integer, step number of the decoding loop. Used only for autoregressive inference on TPU. @@ -142,7 +142,7 @@ class Attention(tf.keras.layers.Layer): # Scale query to prevent the dot product between query and key from growing # too large. depth = (self.hidden_size // self.num_heads) - query *= depth ** -0.5 + query *= depth**-0.5 # Calculate dot product attention logits = tf.einsum("BTNH,BFNH->BNFT", key, query) @@ -164,7 +164,11 @@ class Attention(tf.keras.layers.Layer): class SelfAttention(Attention): """Multiheaded self-attention layer.""" - def call(self, query_input, bias, training, cache=None, + def call(self, + query_input, + bias, + training, + cache=None, decode_loop_step=None): - return super(SelfAttention, self).call( - query_input, query_input, bias, training, cache, decode_loop_step) + return super(SelfAttention, self).call(query_input, query_input, bias, + training, cache, decode_loop_step) diff --git a/official/nlp/transformer/beam_search.py b/official/nlp/transformer/beam_search.py deleted file mode 100644 index a4c1127535e6ae805f6619819737c379cadca6f2..0000000000000000000000000000000000000000 --- a/official/nlp/transformer/beam_search.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Beam search in TF v2.""" - -import tensorflow as tf - -from official.nlp.transformer import beam_search_v1 as v1 - -_StateKeys = v1._StateKeys # pylint: disable=protected-access - - -class SequenceBeamSearchV2(v1.SequenceBeamSearch): - """Implementation of beam search loop in v2.""" - - def search(self, initial_ids, initial_cache): - """Beam search for sequences with highest scores.""" - state, state_shapes = self._create_initial_state(initial_ids, initial_cache) - - finished_state = tf.nest.map_structure( - tf.stop_gradient, - tf.while_loop(self._continue_search, - self._search_step, - loop_vars=[state], - shape_invariants=[state_shapes], - parallel_iterations=1)) - finished_state = finished_state[0] - - alive_seq = finished_state[_StateKeys.ALIVE_SEQ] - alive_log_probs = finished_state[_StateKeys.ALIVE_LOG_PROBS] - finished_seq = finished_state[_StateKeys.FINISHED_SEQ] - finished_scores = finished_state[_StateKeys.FINISHED_SCORES] - finished_flags = finished_state[_StateKeys.FINISHED_FLAGS] - - # 2.0 changes tf.where behavior. Should make parameters broadcastable. - finished_cond = tf.reduce_any(finished_flags, 1, name="finished_cond") - seq_cond = _expand_to_same_rank(finished_cond, finished_seq) - score_cond = _expand_to_same_rank(finished_cond, finished_scores) - - # Account for corner case where there are no finished sequences for a - # particular batch item. In that case, return alive sequences for that batch - # item. - finished_seq = tf.where(seq_cond, finished_seq, alive_seq) - finished_scores = tf.where( - score_cond, finished_scores, alive_log_probs) - return finished_seq, finished_scores - - -def sequence_beam_search(symbols_to_logits_fn, - initial_ids, - initial_cache, - vocab_size, - beam_size, - alpha, - max_decode_length, - eos_id, - padded_decode=False, - dtype="float32"): - """Search for sequence of subtoken ids with the largest probability. - - Args: - symbols_to_logits_fn: A function that takes in ids, index, and cache as - arguments. The passed in arguments will have shape: - ids -> A tensor with shape [batch_size * beam_size, index]. - index -> A scalar. - cache -> A nested dictionary of tensors [batch_size * beam_size, ...]. - The function must return a tuple of logits and new cache: - logits -> A tensor with shape [batch * beam_size, vocab_size]. - new cache -> A nested dictionary with the same shape/structure as the - inputted cache. - initial_ids: An int32 tensor with shape [batch_size]. Starting ids for - each batch item. - initial_cache: A dictionary, containing starting decoder variables - information. - vocab_size: An integer, the size of tokens. - beam_size: An integer, the number of beams. - alpha: A float, defining the strength of length normalization. - max_decode_length: An integer, the maximum length to decoded a sequence. - eos_id: An integer, ID of eos token, used to determine when a sequence has - finished. - padded_decode: A bool, indicating if max_sequence_length padding is used - for beam search. - dtype: A tensorflow data type used for score computation. The default is - tf.float32. - - Returns: - Top decoded sequences [batch_size, beam_size, max_decode_length] - sequence scores [batch_size, beam_size] - """ - batch_size = ( - initial_ids.shape.as_list()[0] if padded_decode else - tf.shape(initial_ids)[0]) - sbs = SequenceBeamSearchV2(symbols_to_logits_fn, vocab_size, batch_size, - beam_size, alpha, max_decode_length, eos_id, - padded_decode, dtype) - return sbs.search(initial_ids, initial_cache) - - -def _expand_to_same_rank(tensor, target): - """Expands a given tensor to target's rank to be broadcastable. - - Args: - tensor: input tensor to tile. Shape: [b, d1, ..., da] - target: target tensor. Shape: [b, d1, ..., da, ..., dn] - - Returns: - Tiled tensor of shape [b, d1, ..., da, 1, ..., 1] with same rank of target. - - Raises: - ValueError, if the shape rank of rank tensor/target is None. - """ - if tensor.shape.rank is None: - raise ValueError("Expect rank for tensor shape, but got None.") - if target.shape.rank is None: - raise ValueError("Expect rank for target shape, but got None.") - - with tf.name_scope("expand_rank"): - diff_rank = target.shape.rank - tensor.shape.rank - for _ in range(diff_rank): - tensor = tf.expand_dims(tensor, -1) - return tensor diff --git a/official/nlp/transformer/beam_search_v1.py b/official/nlp/transformer/beam_search_v1.py index 8b143b1b30ef462f6187850b12a5ca9dfe3ab39b..2c8537e63b20e718b15dfcd042f3263212af8c08 100644 --- a/official/nlp/transformer/beam_search_v1.py +++ b/official/nlp/transformer/beam_search_v1.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,128 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Beam search to find the translated sequence with the highest probability. -Source implementation from Tensor2Tensor: -https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/beam_search.py -""" +"""Beam search to find the translated sequence with the highest probability.""" -import numpy as np import tensorflow.compat.v1 as tf -from tensorflow.python.util import nest +from official.nlp.modeling.ops import beam_search +_StateKeys = beam_search._StateKeys # pylint: disable=protected-access -def inf(dtype): - """Returns a value close to infinity, but is still finite in `dtype`. - This is useful to get a very large value that is still zero when multiplied by - zero. The floating-point "Inf" value is NaN when multiplied by zero. - - Args: - dtype: A dtype. The returned value will be finite when casted to this dtype. - - Returns: - A very large value. - """ - if dtype == "float32" or dtype == "bfloat16": - return 1e7 - elif dtype == "float16": - # Disable no-member lint error, as the linter thinks np.float16 does not - # exist for some reason. - return np.finfo(np.float16).max # pylint: disable=no-member - else: - raise AssertionError('Invalid dtype: %s' % dtype) - - -class _StateKeys(object): - """Keys to dictionary storing the state of the beam search loop.""" - - # Variable storing the loop index. - CUR_INDEX = "CUR_INDEX" - - # Top sequences that are alive for each batch item. Alive sequences are ones - # that have not generated an EOS token. Sequences that reach EOS are marked as - # finished and moved to the FINISHED_SEQ tensor. - # Has shape [batch_size, beam_size, CUR_INDEX + 1] - ALIVE_SEQ = "ALIVE_SEQ" - # Log probabilities of each alive sequence. Shape [batch_size, beam_size] - ALIVE_LOG_PROBS = "ALIVE_LOG_PROBS" - # Dictionary of cached values for each alive sequence. The cache stores - # the encoder output, attention bias, and the decoder attention output from - # the previous iteration. - ALIVE_CACHE = "ALIVE_CACHE" - - # Top finished sequences for each batch item. - # Has shape [batch_size, beam_size, CUR_INDEX + 1]. Sequences that are - # shorter than CUR_INDEX + 1 are padded with 0s. - FINISHED_SEQ = "FINISHED_SEQ" - # Scores for each finished sequence. Score = log probability / length norm - # Shape [batch_size, beam_size] - FINISHED_SCORES = "FINISHED_SCORES" - # Flags indicating which sequences in the finished sequences are finished. - # At the beginning, all of the sequences in FINISHED_SEQ are filler values. - # True -> finished sequence, False -> filler. Shape [batch_size, beam_size] - FINISHED_FLAGS = "FINISHED_FLAGS" - - -class SequenceBeamSearch(object): +class SequenceBeamSearch(beam_search.SequenceBeamSearch): """Implementation of beam search loop.""" - def __init__(self, - symbols_to_logits_fn, - vocab_size, - batch_size, - beam_size, - alpha, - max_decode_length, - eos_id, - padded_decode, - dtype=tf.float32): - """Initialize sequence beam search. - - Args: - symbols_to_logits_fn: A function to provide logits, which is the - interface to the Transformer model. The passed in arguments are: - ids -> A tensor with shape [batch_size * beam_size, index]. - index -> A scalar. - cache -> A nested dictionary of tensors [batch_size * beam_size, ...]. - The function must return a tuple of logits and the updated cache: - logits -> A tensor with shape [batch * beam_size, vocab_size]. - updated cache -> A nested dictionary with the same structure as the - input cache. - vocab_size: An integer, the size of the vocabulary, used for topk - computation. - batch_size: An integer, the decode batch size. - beam_size: An integer, number of beams for beam search. - alpha: A float, defining the strength of length normalization. - max_decode_length: An integer, the maximum number of steps to decode - a sequence. - eos_id: An integer. ID of end of sentence token. - padded_decode: A bool, indicating if max_sequence_length padding is used - for beam search. - dtype: A tensorflow data type used for score computation. The default is - tf.float32. - """ - self.symbols_to_logits_fn = symbols_to_logits_fn - self.vocab_size = vocab_size - self.batch_size = batch_size - self.beam_size = beam_size - self.alpha = alpha - self.max_decode_length = max_decode_length - self.eos_id = eos_id - self.padded_decode = padded_decode - self.dtype = tf.as_dtype(dtype) - - def search(self, initial_ids, initial_cache): - """Beam search for sequences with highest scores.""" - state, state_shapes = self._create_initial_state(initial_ids, initial_cache) - - finished_state = tf.while_loop( - self._continue_search, self._search_step, loop_vars=[state], - shape_invariants=[state_shapes], parallel_iterations=1, back_prop=False) - finished_state = finished_state[0] - + def _process_finished_state(self, finished_state): alive_seq = finished_state[_StateKeys.ALIVE_SEQ] alive_log_probs = finished_state[_StateKeys.ALIVE_LOG_PROBS] finished_seq = finished_state[_StateKeys.FINISHED_SEQ] @@ -148,378 +39,28 @@ class SequenceBeamSearch(object): tf.reduce_any(finished_flags, 1), finished_scores, alive_log_probs) return finished_seq, finished_scores - def _create_initial_state(self, initial_ids, initial_cache): - """Return initial state dictionary and its shape invariants. - - Args: - initial_ids: initial ids to pass into the symbols_to_logits_fn. - int tensor with shape [batch_size, 1] - initial_cache: dictionary storing values to be passed into the - symbols_to_logits_fn. - - Returns: - state and shape invariant dictionaries with keys from _StateKeys - """ - for key, value in initial_cache.items(): - for inner_value in nest.flatten(value): - if inner_value.dtype != self.dtype: - raise TypeError( - "initial_cache element for key '%s' has dtype %s that does not " - "match SequenceBeamSearch's dtype of %s. Value: %s" % - (key, value.dtype.name, self.dtype.name, inner_value)) - - # Current loop index (starts at 0) - cur_index = tf.constant(0) - - # Create alive sequence with shape [batch_size, beam_size, 1] - alive_seq = _expand_to_beam_size(initial_ids, self.beam_size) - alive_seq = tf.expand_dims(alive_seq, axis=2) - if self.padded_decode: - alive_seq = tf.tile(alive_seq, [1, 1, self.max_decode_length + 1]) - - # Create tensor for storing initial log probabilities. - # Assume initial_ids are prob 1.0 - initial_log_probs = tf.constant( - [[0.] + [-float("inf")] * (self.beam_size - 1)], dtype=self.dtype) - alive_log_probs = tf.tile(initial_log_probs, [self.batch_size, 1]) - - # Expand all values stored in the dictionary to the beam size, so that each - # beam has a separate cache. - alive_cache = nest.map_structure( - lambda t: _expand_to_beam_size(t, self.beam_size), initial_cache) - - # Initialize tensor storing finished sequences with filler values. - finished_seq = tf.zeros(tf.shape(alive_seq), tf.int32) - - # Set scores of the initial finished seqs to negative infinity. - finished_scores = tf.ones([self.batch_size, self.beam_size], - dtype=self.dtype) * -inf(self.dtype) - - # Initialize finished flags with all False values. - finished_flags = tf.zeros([self.batch_size, self.beam_size], tf.bool) - - # Create state dictionary - state = { - _StateKeys.CUR_INDEX: cur_index, - _StateKeys.ALIVE_SEQ: alive_seq, - _StateKeys.ALIVE_LOG_PROBS: alive_log_probs, - _StateKeys.ALIVE_CACHE: alive_cache, - _StateKeys.FINISHED_SEQ: finished_seq, - _StateKeys.FINISHED_SCORES: finished_scores, - _StateKeys.FINISHED_FLAGS: finished_flags - } - - # Create state invariants for each value in the state dictionary. Each - # dimension must be a constant or None. A None dimension means either: - # 1) the dimension's value is a tensor that remains the same but may - # depend on the input sequence to the model (e.g. batch size). - # 2) the dimension may have different values on different iterations. - if self.padded_decode: - state_shape_invariants = { - _StateKeys.CUR_INDEX: - tf.TensorShape([]), - _StateKeys.ALIVE_SEQ: - tf.TensorShape( - [self.batch_size, self.beam_size, - self.max_decode_length + 1]), - _StateKeys.ALIVE_LOG_PROBS: - tf.TensorShape([self.batch_size, self.beam_size]), - _StateKeys.ALIVE_CACHE: - nest.map_structure(_get_shape, alive_cache), - _StateKeys.FINISHED_SEQ: - tf.TensorShape( - [self.batch_size, self.beam_size, - self.max_decode_length + 1]), - _StateKeys.FINISHED_SCORES: - tf.TensorShape([self.batch_size, self.beam_size]), - _StateKeys.FINISHED_FLAGS: - tf.TensorShape([self.batch_size, self.beam_size]) - } - else: - state_shape_invariants = { - _StateKeys.CUR_INDEX: - tf.TensorShape([]), - _StateKeys.ALIVE_SEQ: - tf.TensorShape([None, self.beam_size, None]), - _StateKeys.ALIVE_LOG_PROBS: - tf.TensorShape([None, self.beam_size]), - _StateKeys.ALIVE_CACHE: - nest.map_structure(_get_shape_keep_last_dim, alive_cache), - _StateKeys.FINISHED_SEQ: - tf.TensorShape([None, self.beam_size, None]), - _StateKeys.FINISHED_SCORES: - tf.TensorShape([None, self.beam_size]), - _StateKeys.FINISHED_FLAGS: - tf.TensorShape([None, self.beam_size]) - } - - return state, state_shape_invariants - - def _continue_search(self, state): - """Return whether to continue the search loop. - - The loops should terminate when - 1) when decode length has been reached, or - 2) when the worst score in the finished sequences is better than the best - score in the alive sequences (i.e. the finished sequences are provably - unchanging) - - Args: - state: A dictionary with the current loop state. - - Returns: - Bool tensor with value True if loop should continue, False if loop should - terminate. - """ - i = state[_StateKeys.CUR_INDEX] - alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS] - finished_scores = state[_StateKeys.FINISHED_SCORES] - finished_flags = state[_StateKeys.FINISHED_FLAGS] - - not_at_max_decode_length = tf.less(i, self.max_decode_length) - - # Calculate largest length penalty (the larger penalty, the better score). - max_length_norm = _length_normalization(self.alpha, self.max_decode_length, - dtype=self.dtype) - # Get the best possible scores from alive sequences. - best_alive_scores = alive_log_probs[:, 0] / max_length_norm - - # Compute worst score in finished sequences for each batch element - finished_scores *= tf.cast(finished_flags, - self.dtype) # set filler scores to zero - lowest_finished_scores = tf.reduce_min(finished_scores, axis=1) - - # If there are no finished sequences in a batch element, then set the lowest - # finished score to -INF for that element. - finished_batches = tf.reduce_any(finished_flags, 1) - lowest_finished_scores += ((1.0 - - tf.cast(finished_batches, self.dtype)) * - -inf(self.dtype)) - - worst_finished_score_better_than_best_alive_score = tf.reduce_all( - tf.greater(lowest_finished_scores, best_alive_scores) - ) - - return tf.logical_and( - not_at_max_decode_length, - tf.logical_not(worst_finished_score_better_than_best_alive_score) - ) - - def _search_step(self, state): - """Beam search loop body. - - Grow alive sequences by a single ID. Sequences that have reached the EOS - token are marked as finished. The alive and finished sequences with the - highest log probabilities and scores are returned. - - A sequence's finished score is calculating by dividing the log probability - by the length normalization factor. Without length normalization, the - search is more likely to return shorter sequences. - - Args: - state: A dictionary with the current loop state. - - Returns: - new state dictionary. - """ - # Grow alive sequences by one token. - new_seq, new_log_probs, topk_ids, new_cache = self._grow_alive_seq(state) - new_finished_flags = tf.equal(topk_ids, self.eos_id) - # Collect top beam_size alive sequences - alive_state = self._get_new_alive_state(new_seq, new_log_probs, - new_finished_flags, new_cache) - - # Combine newly finished sequences with existing finished sequences, and - # collect the top k scoring sequences. - finished_state = self._get_new_finished_state(state, new_seq, new_log_probs, - new_finished_flags) - - # Increment loop index and create new state dictionary - new_state = {_StateKeys.CUR_INDEX: state[_StateKeys.CUR_INDEX] + 1} - new_state.update(alive_state) - new_state.update(finished_state) - return [new_state] - - def _grow_alive_seq(self, state): - """Grow alive sequences by one token, and collect top 2*beam_size sequences. - - 2*beam_size sequences are collected because some sequences may have reached - the EOS token. 2*beam_size ensures that at least beam_size sequences are - still alive. - - Args: - state: A dictionary with the current loop state. - Returns: - Tuple of - (Top 2*beam_size sequences [batch_size, 2 * beam_size, cur_index + 1], - Scores of returned sequences [batch_size, 2 * beam_size], - New alive cache, for each of the 2 * beam_size sequences) - """ - i = state[_StateKeys.CUR_INDEX] - alive_seq = state[_StateKeys.ALIVE_SEQ] - alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS] - alive_cache = state[_StateKeys.ALIVE_CACHE] - - beams_to_keep = 2 * self.beam_size - - # Get logits for the next candidate IDs for the alive sequences. Get the new - # cache values at the same time. - if self.padded_decode: - flat_ids = tf.reshape( - tf.slice(alive_seq, [0, 0, i], [self.batch_size, self.beam_size, 1]), - [self.batch_size * self.beam_size, -1]) - else: - flat_ids = _flatten_beam_dim(alive_seq) # [batch_size * beam_size] - flat_cache = nest.map_structure(_flatten_beam_dim, alive_cache) - - flat_logits, flat_cache = self.symbols_to_logits_fn(flat_ids, i, flat_cache) - - # Unflatten logits to shape [batch_size, beam_size, vocab_size] - logits = _unflatten_beam_dim(flat_logits, self.batch_size, self.beam_size) - new_cache = nest.map_structure( - lambda t: _unflatten_beam_dim(t, self.batch_size, self.beam_size), - flat_cache) - - # Convert logits to normalized log probs - candidate_log_probs = _log_prob_from_logits(logits) - - # Calculate new log probabilities if each of the alive sequences were - # extended # by the the candidate IDs. - # Shape [batch_size, beam_size, vocab_size] - log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs, axis=2) - - # Each batch item has beam_size * vocab_size candidate sequences. For each - # batch item, get the k candidates with the highest log probabilities. - flat_log_probs = tf.reshape(log_probs, - [-1, self.beam_size * self.vocab_size]) - topk_log_probs, topk_indices = tf.nn.top_k(flat_log_probs, k=beams_to_keep) - - # Extract the alive sequences that generate the highest log probabilities - # after being extended. - topk_beam_indices = topk_indices // self.vocab_size - topk_seq, new_cache = _gather_beams( - [alive_seq, new_cache], topk_beam_indices, self.batch_size, - beams_to_keep) - - # Append the most probable IDs to the topk sequences - topk_ids = topk_indices % self.vocab_size - if self.padded_decode: - topk_seq = tf.transpose(topk_seq, perm=[2, 0, 1]) - # TODO(b/145533236, hongkuny): Reverts once TF fix the validation. - topk_seq = tf.tensor_scatter_nd_update(topk_seq, [[i + 1]], - tf.expand_dims(topk_ids, axis=0)) - topk_seq = tf.transpose(topk_seq, perm=[1, 2, 0]) - else: - topk_seq = tf.concat([topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2) - return topk_seq, topk_log_probs, topk_ids, new_cache - - def _get_new_alive_state(self, new_seq, new_log_probs, new_finished_flags, - new_cache): - """Gather the top k sequences that are still alive. - - Args: - new_seq: New sequences generated by growing the current alive sequences - int32 tensor with shape [batch_size, 2 * beam_size, cur_index + 1] - new_log_probs: Log probabilities of new sequences float32 tensor with - shape [batch_size, beam_size] - new_finished_flags: A boolean Tensor indicates which sequences are live - inside the beam. - new_cache: Dict of cached values for each sequence. - - Returns: - Dictionary with alive keys from _StateKeys: - {Top beam_size sequences that are still alive (don't end with eos_id) - Log probabilities of top alive sequences - Dict cache storing decoder states for top alive sequences} - """ - # To prevent finished sequences from being considered, set log probs to -inf - new_log_probs += tf.cast(new_finished_flags, self.dtype) * -inf(self.dtype) - top_alive_seq, top_alive_log_probs, top_alive_cache = _gather_topk_beams( - [new_seq, new_log_probs, new_cache], new_log_probs, self.batch_size, - self.beam_size) - - return { - _StateKeys.ALIVE_SEQ: top_alive_seq, - _StateKeys.ALIVE_LOG_PROBS: top_alive_log_probs, - _StateKeys.ALIVE_CACHE: top_alive_cache - } - - def _get_new_finished_state(self, state, new_seq, new_log_probs, - new_finished_flags): - """Combine new and old finished sequences, and gather the top k sequences. - - Args: - state: A dictionary with the current loop state. - new_seq: New sequences generated by growing the current alive sequences - int32 tensor with shape [batch_size, beam_size, i + 1] - new_log_probs: Log probabilities of new sequences float32 tensor with - shape [batch_size, beam_size] - new_finished_flags: A boolean Tensor indicates which sequences are live - inside the beam. - - Returns: - Dictionary with finished keys from _StateKeys: - {Top beam_size finished sequences based on score, - Scores of finished sequences, - Finished flags of finished sequences} - """ - i = state[_StateKeys.CUR_INDEX] - finished_seq = state[_StateKeys.FINISHED_SEQ] - finished_scores = state[_StateKeys.FINISHED_SCORES] - finished_flags = state[_StateKeys.FINISHED_FLAGS] - - # First append a column of 0-ids to finished_seq to increment the length. - # New shape of finished_seq: [batch_size, beam_size, i + 1] - if not self.padded_decode: - finished_seq = tf.concat([ - finished_seq, - tf.zeros([self.batch_size, self.beam_size, 1], tf.int32) - ], - axis=2) - - # Calculate new seq scores from log probabilities. - length_norm = _length_normalization(self.alpha, i + 1, dtype=self.dtype) - new_scores = new_log_probs / length_norm - - # Set the scores of the still-alive seq in new_seq to large negative values. - new_scores += ((1. - tf.cast(new_finished_flags, self.dtype)) * - -inf(self.dtype)) - - # Combine sequences, scores, and flags. - finished_seq = tf.concat([finished_seq, new_seq], axis=1) - finished_scores = tf.concat([finished_scores, new_scores], axis=1) - finished_flags = tf.concat([finished_flags, new_finished_flags], axis=1) - - # Return the finished sequences with the best scores. - top_finished_seq, top_finished_scores, top_finished_flags = ( - _gather_topk_beams([finished_seq, finished_scores, finished_flags], - finished_scores, self.batch_size, self.beam_size)) - - return { - _StateKeys.FINISHED_SEQ: top_finished_seq, - _StateKeys.FINISHED_SCORES: top_finished_scores, - _StateKeys.FINISHED_FLAGS: top_finished_flags - } - - -def sequence_beam_search( - symbols_to_logits_fn, initial_ids, initial_cache, vocab_size, beam_size, - alpha, max_decode_length, eos_id, padded_decode=False): +def sequence_beam_search(symbols_to_logits_fn, + initial_ids, + initial_cache, + vocab_size, + beam_size, + alpha, + max_decode_length, + eos_id, + padded_decode=False): """Search for sequence of subtoken ids with the largest probability. Args: symbols_to_logits_fn: A function that takes in ids, index, and cache as - arguments. The passed in arguments will have shape: - ids -> A tensor with shape [batch_size * beam_size, index]. - index -> A scalar. - cache -> A nested dictionary of tensors [batch_size * beam_size, ...]. - The function must return a tuple of logits and new cache: - logits -> A tensor with shape [batch * beam_size, vocab_size]. - new cache -> A nested dictionary with the same shape/structure as the - inputted cache. - initial_ids: An int32 tensor with shape [batch_size]. Starting ids for - each batch item. + arguments. The passed in arguments will have shape: ids -> A tensor with + shape [batch_size * beam_size, index]. index -> A scalar. cache -> A + nested dictionary of tensors [batch_size * beam_size, ...]. + The function must return a tuple of logits and new cache: logits -> A + tensor with shape [batch * beam_size, vocab_size]. new cache -> A nested + dictionary with the same shape/structure as the inputted cache. + initial_ids: An int32 tensor with shape [batch_size]. Starting ids for each + batch item. initial_cache: A dictionary, containing starting decoder variables information. vocab_size: An integer, the size of the vocabulary, used for topk @@ -529,147 +70,13 @@ def sequence_beam_search( max_decode_length: An integer, the maximum length to decoded a sequence. eos_id: An integer, ID of eos token, used to determine when a sequence has finished. - padded_decode: A bool, indicating if max_sequence_length padding is used - for beam search. + padded_decode: A bool, indicating if max_sequence_length padding is used for + beam search. Returns: Top decoded sequences [batch_size, beam_size, max_decode_length] sequence scores [batch_size, beam_size] """ - batch_size = ( - initial_ids.shape.as_list()[0] if padded_decode else - tf.shape(initial_ids)[0]) - sbs = SequenceBeamSearch(symbols_to_logits_fn, vocab_size, batch_size, - beam_size, alpha, max_decode_length, eos_id, - padded_decode) + sbs = SequenceBeamSearch(symbols_to_logits_fn, vocab_size, beam_size, alpha, + max_decode_length, eos_id, padded_decode) return sbs.search(initial_ids, initial_cache) - - -def _log_prob_from_logits(logits): - return logits - tf.reduce_logsumexp(logits, axis=2, keepdims=True) - - -def _length_normalization(alpha, length, dtype=tf.float32): - """Return length normalization factor.""" - return tf.pow(((5. + tf.cast(length, dtype)) / 6.), alpha) - - -def _expand_to_beam_size(tensor, beam_size): - """Tiles a given tensor by beam_size. - - Args: - tensor: tensor to tile [batch_size, ...] - beam_size: How much to tile the tensor by. - - Returns: - Tiled tensor [batch_size, beam_size, ...] - """ - tensor = tf.expand_dims(tensor, axis=1) - tile_dims = [1] * tensor.shape.ndims - tile_dims[1] = beam_size - - return tf.tile(tensor, tile_dims) - - -def _shape_list(tensor): - """Return a list of the tensor's shape, and ensure no None values in list.""" - # Get statically known shape (may contain None's for unknown dimensions) - shape = tensor.get_shape().as_list() - - # Ensure that the shape values are not None - dynamic_shape = tf.shape(tensor) - for i in range(len(shape)): # pylint: disable=consider-using-enumerate - if shape[i] is None: - shape[i] = dynamic_shape[i] - return shape - - -def _get_shape_keep_last_dim(tensor): - shape_list = _shape_list(tensor) - - # Only the last - for i in range(len(shape_list) - 1): - shape_list[i] = None - - if isinstance(shape_list[-1], tf.Tensor): - shape_list[-1] = None - return tf.TensorShape(shape_list) - - -def _get_shape(tensor): - """Return the shape of the input tensor.""" - return tf.TensorShape(_shape_list(tensor)) - - -def _flatten_beam_dim(tensor): - """Reshapes first two dimensions in to single dimension. - - Args: - tensor: Tensor to reshape of shape [A, B, ...] - - Returns: - Reshaped tensor of shape [A*B, ...] - """ - shape = _shape_list(tensor) - shape[0] *= shape[1] - shape.pop(1) # Remove beam dim - return tf.reshape(tensor, shape) - - -def _unflatten_beam_dim(tensor, batch_size, beam_size): - """Reshapes first dimension back to [batch_size, beam_size]. - - Args: - tensor: Tensor to reshape of shape [batch_size*beam_size, ...] - batch_size: Tensor, original batch size. - beam_size: int, original beam size. - - Returns: - Reshaped tensor of shape [batch_size, beam_size, ...] - """ - shape = _shape_list(tensor) - new_shape = [batch_size, beam_size] + shape[1:] - return tf.reshape(tensor, new_shape) - - -def _gather_beams(nested, beam_indices, batch_size, new_beam_size): - """Gather beams from nested structure of tensors. - - Each tensor in nested represents a batch of beams, where beam refers to a - single search state (beam search involves searching through multiple states - in parallel). - - This function is used to gather the top beams, specified by - beam_indices, from the nested tensors. - - Args: - nested: Nested structure (tensor, list, tuple or dict) containing tensors - with shape [batch_size, beam_size, ...]. - beam_indices: int32 tensor with shape [batch_size, new_beam_size]. Each - value in beam_indices must be between [0, beam_size), and are not - necessarily unique. - batch_size: int size of batch - new_beam_size: int number of beams to be pulled from the nested tensors. - - Returns: - Nested structure containing tensors with shape - [batch_size, new_beam_size, ...] - """ - # Computes the i'th coodinate that contains the batch index for gather_nd. - # Batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..]. - batch_pos = tf.range(batch_size * new_beam_size) // new_beam_size - batch_pos = tf.reshape(batch_pos, [batch_size, new_beam_size]) - - # Create coordinates to be passed to tf.gather_nd. Stacking creates a tensor - # with shape [batch_size, beam_size, 2], where the last dimension contains - # the (i, j) gathering coordinates. - coordinates = tf.stack([batch_pos, beam_indices], axis=2) - - return nest.map_structure( - lambda state: tf.gather_nd(state, coordinates), nested) - - -def _gather_topk_beams(nested, score_or_log_prob, batch_size, beam_size): - """Gather top beams from nested structure.""" - _, topk_indexes = tf.nn.top_k(score_or_log_prob, k=beam_size) - return _gather_beams(nested, topk_indexes, batch_size, beam_size) diff --git a/official/nlp/transformer/beam_search_v1_test.py b/official/nlp/transformer/beam_search_v1_test.py deleted file mode 100644 index 53cf921fb90e93950a05e999807fc497390674a1..0000000000000000000000000000000000000000 --- a/official/nlp/transformer/beam_search_v1_test.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Test beam search helper methods.""" - -import tensorflow.compat.v1 as tf - -from official.nlp.transformer import beam_search_v1 as beam_search - - -class BeamSearchHelperTests(tf.test.TestCase): - - def setUp(self): - super(BeamSearchHelperTests, self).setUp() - tf.compat.v1.disable_eager_execution() - - def test_expand_to_beam_size(self): - x = tf.ones([7, 4, 2, 5]) - x = beam_search._expand_to_beam_size(x, 3) - with self.session() as sess: - shape = sess.run(tf.shape(x)) - self.assertAllEqual([7, 3, 4, 2, 5], shape) - - def test_shape_list(self): - y = tf.compat.v1.placeholder(dtype=tf.int32, shape=[]) - x = tf.ones([7, y, 2, 5]) - shape = beam_search._shape_list(x) - self.assertIsInstance(shape[0], int) - self.assertIsInstance(shape[1], tf.Tensor) - self.assertIsInstance(shape[2], int) - self.assertIsInstance(shape[3], int) - - def test_get_shape_keep_last_dim(self): - y = tf.constant(4.0) - x = tf.ones([7, tf.cast(tf.sqrt(y), tf.int32), 2, 5]) - shape = beam_search._get_shape_keep_last_dim(x) - self.assertAllEqual([None, None, None, 5], - shape.as_list()) - - def test_flatten_beam_dim(self): - x = tf.ones([7, 4, 2, 5]) - x = beam_search._flatten_beam_dim(x) - with self.session() as sess: - shape = sess.run(tf.shape(x)) - self.assertAllEqual([28, 2, 5], shape) - - def test_unflatten_beam_dim(self): - x = tf.ones([28, 2, 5]) - x = beam_search._unflatten_beam_dim(x, 7, 4) - with self.session() as sess: - shape = sess.run(tf.shape(x)) - self.assertAllEqual([7, 4, 2, 5], shape) - - def test_gather_beams(self): - x = tf.reshape(tf.range(24), [2, 3, 4]) - # x looks like: [[[ 0 1 2 3] - # [ 4 5 6 7] - # [ 8 9 10 11]] - # - # [[12 13 14 15] - # [16 17 18 19] - # [20 21 22 23]]] - - y = beam_search._gather_beams(x, [[1, 2], [0, 2]], 2, 2) - with self.session() as sess: - y = sess.run(y) - - self.assertAllEqual([[[4, 5, 6, 7], - [8, 9, 10, 11]], - [[12, 13, 14, 15], - [20, 21, 22, 23]]], - y) - - def test_gather_topk_beams(self): - x = tf.reshape(tf.range(24), [2, 3, 4]) - x_scores = [[0, 1, 1], [1, 0, 1]] - - y = beam_search._gather_topk_beams(x, x_scores, 2, 2) - with self.session() as sess: - y = sess.run(y) - - self.assertAllEqual([[[4, 5, 6, 7], - [8, 9, 10, 11]], - [[12, 13, 14, 15], - [20, 21, 22, 23]]], - y) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/nlp/transformer/compute_bleu.py b/official/nlp/transformer/compute_bleu.py index f7dfd542bae32d6b5a2668b76d77faa9d02b47a8..8ed3d866fab8c07cce40644a5b7c2bd7159da5ba 100644 --- a/official/nlp/transformer/compute_bleu.py +++ b/official/nlp/transformer/compute_bleu.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Script to compute official BLEU score. Source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import re import sys import unicodedata -from absl import app as absl_app +from absl import app from absl import flags import six from six.moves import range @@ -149,4 +145,4 @@ if __name__ == "__main__": tf.logging.set_verbosity(tf.logging.INFO) define_compute_bleu_flags() FLAGS = flags.FLAGS - absl_app.run(main) + app.run(main) diff --git a/official/nlp/transformer/compute_bleu_test.py b/official/nlp/transformer/compute_bleu_test.py index 5b370947d45b0daf76ef1a901030f8168f92aa65..6160bf66ecfc5f36f18ddf730f96780bda236b50 100644 --- a/official/nlp/transformer/compute_bleu_test.py +++ b/official/nlp/transformer/compute_bleu_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Test functions in compute_blue.py.""" import tempfile diff --git a/official/nlp/transformer/data_download.py b/official/nlp/transformer/data_download.py index e5f66685611e1ad379d05dcf321a679527914b19..4e3f4a33564f1266ca85c1f5dad3953e9c699fbf 100644 --- a/official/nlp/transformer/data_download.py +++ b/official/nlp/transformer/data_download.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,19 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Download and preprocess WMT17 ende training and evaluation datasets.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Download and preprocess WMT17 ende training and evaluation datasets.""" import os import random import tarfile # pylint: disable=g-bad-import-order -from absl import app as absl_app + +from absl import app from absl import flags from absl import logging import six @@ -64,22 +61,18 @@ _TRAIN_DATA_SOURCES = [ # Use pre-defined minimum count to generate subtoken vocabulary. _TRAIN_DATA_MIN_COUNT = 6 -_EVAL_DATA_SOURCES = [ - { - "url": "http://data.statmt.org/wmt17/translation-task/dev.tgz", - "input": "newstest2013.en", - "target": "newstest2013.de", - } -] +_EVAL_DATA_SOURCES = [{ + "url": "http://data.statmt.org/wmt17/translation-task/dev.tgz", + "input": "newstest2013.en", + "target": "newstest2013.de", +}] -_TEST_DATA_SOURCES = [ - { - "url": ("https://storage.googleapis.com/tf-perf-public/" - "official_transformer/test_data/newstest2014.tgz"), - "input": "newstest2014.en", - "target": "newstest2014.de", - } -] +_TEST_DATA_SOURCES = [{ + "url": ("https://storage.googleapis.com/tf-perf-public/" + "official_transformer/test_data/newstest2014.tgz"), + "input": "newstest2014.en", + "target": "newstest2014.de", +}] # Vocabulary constants _TARGET_VOCAB_SIZE = 32768 # Number of subtokens in the vocabulary list. @@ -114,7 +107,9 @@ def find_file(path, filename, max_depth=5): # Download and extraction functions ############################################################################### def get_raw_files(raw_dir, data_source): - """Return raw files from source. Downloads/extracts if needed. + """Return raw files from source. + + Downloads/extracts if needed. Args: raw_dir: string directory to store raw files @@ -134,8 +129,8 @@ def get_raw_files(raw_dir, data_source): "targets": [], } # keys for d in data_source: - input_file, target_file = download_and_extract( - raw_dir, d["url"], d["input"], d["target"]) + input_file, target_file = download_and_extract(raw_dir, d["url"], + d["input"], d["target"]) raw_files["inputs"].append(input_file) raw_files["targets"].append(target_file) return raw_files @@ -167,7 +162,7 @@ def download_from_url(path, url): found_file = find_file(path, filename, max_depth=0) if found_file is None: filename = os.path.join(path, filename) - logging.info("Downloading from %s to %s." % (url, filename)) + logging.info("Downloading from %s to %s.", url, filename) inprogress_filepath = six.ensure_str(filename) + ".incomplete" inprogress_filepath, _ = urllib.request.urlretrieve( url, inprogress_filepath, reporthook=download_report_hook) @@ -176,7 +171,7 @@ def download_from_url(path, url): tf.gfile.Rename(inprogress_filepath, filename) return filename else: - logging.info("Already downloaded: %s (at %s)." % (url, found_file)) + logging.info("Already downloaded: %s (at %s).", url, found_file) return found_file @@ -199,14 +194,14 @@ def download_and_extract(path, url, input_filename, target_filename): input_file = find_file(path, input_filename) target_file = find_file(path, target_filename) if input_file and target_file: - logging.info("Already downloaded and extracted %s." % url) + logging.info("Already downloaded and extracted %s.", url) return input_file, target_file # Download archive file if it doesn't already exist. compressed_file = download_from_url(path, url) # Extract compressed files - logging.info("Extracting %s." % compressed_file) + logging.info("Extracting %s.", compressed_file) with tarfile.open(compressed_file, "r:gz") as corpus_tar: corpus_tar.extractall(path) @@ -236,13 +231,13 @@ def compile_files(raw_dir, raw_files, tag): raw_files: Dict containing filenames of input and target data. {"inputs": list of files containing data in input language "targets": list of files containing corresponding data in target language - } + } tag: String to append to the compiled filename. Returns: Full path of compiled input and target files. """ - logging.info("Compiling files with tag %s." % tag) + logging.info("Compiling files with tag %s.", tag) filename = "%s-%s" % (_PREFIX, tag) input_compiled_file = os.path.join(raw_dir, six.ensure_str(filename) + ".lang1") @@ -255,7 +250,7 @@ def compile_files(raw_dir, raw_files, tag): input_file = raw_files["inputs"][i] target_file = raw_files["targets"][i] - logging.info("Reading files %s and %s." % (input_file, target_file)) + logging.info("Reading files %s and %s.", input_file, target_file) write_file(input_writer, input_file) write_file(target_writer, target_file) return input_compiled_file, target_compiled_file @@ -271,8 +266,7 @@ def write_file(writer, filename): ############################################################################### # Data preprocessing ############################################################################### -def encode_and_save_files( - subtokenizer, data_dir, raw_files, tag, total_shards): +def encode_and_save_files(subtokenizer, data_dir, raw_files, tag, total_shards): """Save data from files as encoded Examples in TFrecord format. Args: @@ -287,14 +281,16 @@ def encode_and_save_files( List of all files produced. """ # Create a file for each shard. - filepaths = [shard_filename(data_dir, tag, n + 1, total_shards) - for n in range(total_shards)] + filepaths = [ + shard_filename(data_dir, tag, n + 1, total_shards) + for n in range(total_shards) + ] if all_exist(filepaths): - logging.info("Files with tag %s already exist." % tag) + logging.info("Files with tag %s already exist.", tag) return filepaths - logging.info("Saving files with tag %s." % tag) + logging.info("Saving files with tag %s.", tag) input_file = raw_files[0] target_file = raw_files[1] @@ -302,13 +298,14 @@ def encode_and_save_files( tmp_filepaths = [six.ensure_str(fname) + ".incomplete" for fname in filepaths] writers = [tf.python_io.TFRecordWriter(fname) for fname in tmp_filepaths] counter, shard = 0, 0 - for counter, (input_line, target_line) in enumerate(zip( - txt_line_iterator(input_file), txt_line_iterator(target_file))): + for counter, (input_line, target_line) in enumerate( + zip(txt_line_iterator(input_file), txt_line_iterator(target_file))): if counter > 0 and counter % 100000 == 0: - logging.info("\tSaving case %d." % counter) - example = dict_to_example( - {"inputs": subtokenizer.encode(input_line, add_eos=True), - "targets": subtokenizer.encode(target_line, add_eos=True)}) + logging.info("\tSaving case %d.", counter) + example = dict_to_example({ + "inputs": subtokenizer.encode(input_line, add_eos=True), + "targets": subtokenizer.encode(target_line, add_eos=True) + }) writers[shard].write(example.SerializeToString()) shard = (shard + 1) % total_shards for writer in writers: @@ -329,7 +326,7 @@ def shard_filename(path, tag, shard_num, total_shards): def shuffle_records(fname): """Shuffle records in a single file.""" - logging.info("Shuffling records in file %s" % fname) + logging.info("Shuffling records in file %s", fname) # Rename file prior to shuffling tmp_fname = six.ensure_str(fname) + ".unshuffled" @@ -349,7 +346,7 @@ def shuffle_records(fname): for count, record in enumerate(records): w.write(record) if count > 0 and count % 100000 == 0: - logging.info("\tWriting record: %d" % count) + logging.info("\tWriting record: %d", count) tf.gfile.Remove(tmp_fname) @@ -372,7 +369,7 @@ def all_exist(filepaths): def make_dir(path): if not tf.gfile.Exists(path): - logging.info("Creating directory %s" % path) + logging.info("Creating directory %s", path) tf.gfile.MakeDirs(path) @@ -395,7 +392,10 @@ def main(unused_argv): train_files_flat = train_files["inputs"] + train_files["targets"] vocab_file = os.path.join(FLAGS.data_dir, VOCAB_FILE) subtokenizer = tokenizer.Subtokenizer.init_from_files( - vocab_file, train_files_flat, _TARGET_VOCAB_SIZE, _TARGET_THRESHOLD, + vocab_file, + train_files_flat, + _TARGET_VOCAB_SIZE, + _TARGET_THRESHOLD, min_count=None if FLAGS.search else _TRAIN_DATA_MIN_COUNT) logging.info("Step 4/5: Compiling training and evaluation data") @@ -404,12 +404,11 @@ def main(unused_argv): # Tokenize and save data as Examples in the TFRecord format. logging.info("Step 5/5: Preprocessing and saving data") - train_tfrecord_files = encode_and_save_files( - subtokenizer, FLAGS.data_dir, compiled_train_files, _TRAIN_TAG, - _TRAIN_SHARDS) - encode_and_save_files( - subtokenizer, FLAGS.data_dir, compiled_eval_files, _EVAL_TAG, - _EVAL_SHARDS) + train_tfrecord_files = encode_and_save_files(subtokenizer, FLAGS.data_dir, + compiled_train_files, _TRAIN_TAG, + _TRAIN_SHARDS) + encode_and_save_files(subtokenizer, FLAGS.data_dir, compiled_eval_files, + _EVAL_TAG, _EVAL_SHARDS) for fname in train_tfrecord_files: shuffle_records(fname) @@ -418,15 +417,20 @@ def main(unused_argv): def define_data_download_flags(): """Add flags specifying data download arguments.""" flags.DEFINE_string( - name="data_dir", short_name="dd", default="/tmp/translate_ende", + name="data_dir", + short_name="dd", + default="/tmp/translate_ende", help=flags_core.help_wrap( "Directory for where the translate_ende_wmt32k dataset is saved.")) flags.DEFINE_string( - name="raw_dir", short_name="rd", default="/tmp/translate_ende_raw", + name="raw_dir", + short_name="rd", + default="/tmp/translate_ende_raw", help=flags_core.help_wrap( "Path where the raw data will be downloaded and extracted.")) flags.DEFINE_bool( - name="search", default=False, + name="search", + default=False, help=flags_core.help_wrap( "If set, use binary search to find the vocabulary set with size" "closest to the target size (%d)." % _TARGET_VOCAB_SIZE)) @@ -436,4 +440,4 @@ if __name__ == "__main__": logging.set_verbosity(logging.INFO) define_data_download_flags() FLAGS = flags.FLAGS - absl_app.run(main) + app.run(main) diff --git a/official/nlp/transformer/data_pipeline.py b/official/nlp/transformer/data_pipeline.py index cedd2c309d3194a07841610f8f1039a1a1e7ac51..1d9f242172cadcd38fefbc900658b914483b3b24 100644 --- a/official/nlp/transformer/data_pipeline.py +++ b/official/nlp/transformer/data_pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Input pipeline for the transformer model to read, filter, and batch examples. Two things to note in the pipeline: @@ -47,10 +47,6 @@ Two things to note in the pipeline: in the order of the examples. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import os from absl import logging @@ -87,8 +83,9 @@ def _parse_example(serialized_example): def _filter_max_length(example, max_length=256): """Indicates whether the example's length is lower than the maximum length.""" - return tf.logical_and(tf.size(example[0]) <= max_length, - tf.size(example[1]) <= max_length) + return tf.logical_and( + tf.size(example[0]) <= max_length, + tf.size(example[1]) <= max_length) def _get_example_length(example): @@ -97,8 +94,9 @@ def _get_example_length(example): return length -def _create_min_max_boundaries( - max_length, min_boundary=_MIN_BOUNDARY, boundary_scale=_BOUNDARY_SCALE): +def _create_min_max_boundaries(max_length, + min_boundary=_MIN_BOUNDARY, + boundary_scale=_BOUNDARY_SCALE): """Create min and max boundary lists up to max_length. For example, when max_length=24, min_boundary=4 and boundary_scale=2, the @@ -165,8 +163,8 @@ def _batch_examples(dataset, batch_size, max_length): # TODO(xunkai): investigate if removing code branching improves performance. conditions_c = tf.logical_and( - tf.less_equal(buckets_min, seq_length), - tf.less(seq_length, buckets_max)) + tf.less_equal(buckets_min, seq_length), tf.less(seq_length, + buckets_max)) bucket_id = tf.reduce_min(tf.where(conditions_c)) return bucket_id @@ -183,16 +181,23 @@ def _batch_examples(dataset, batch_size, max_length): # lengths as well. Resulting lengths of inputs and targets can differ. return grouped_dataset.padded_batch(bucket_batch_size, ([None], [None])) - return dataset.apply(tf.data.experimental.group_by_window( - key_func=example_to_bucket_id, - reduce_func=batching_fn, - window_size=None, - window_size_func=window_size_fn)) - - -def _read_and_batch_from_files( - file_pattern, batch_size, max_length, max_io_parallelism, shuffle, repeat, - static_batch=False, num_replicas=1, ctx=None): + return dataset.apply( + tf.data.experimental.group_by_window( + key_func=example_to_bucket_id, + reduce_func=batching_fn, + window_size=None, + window_size_func=window_size_fn)) + + +def _read_and_batch_from_files(file_pattern, + batch_size, + max_length, + max_io_parallelism, + shuffle, + repeat, + static_batch=False, + num_replicas=1, + ctx=None): """Create dataset where each item is a dict of "inputs" and "targets". Args: @@ -204,20 +209,18 @@ def _read_and_batch_from_files( repeat: Number of times to repeat the dataset. If None, the dataset is repeated forever. static_batch: Whether the batches in the dataset should have static shapes. - If True, the input is batched so that every batch has the - shape [batch_size // max_length, max_length]. If False, the input is - grouped by length, and batched so that batches may have different - shapes [N, M], where: - N * M <= batch_size - M <= max_length - In general, this setting should be False. Dynamic shapes allow the inputs - to be grouped so that the number of padding tokens is minimized, and helps - model training. In cases where the input shape must be static - (e.g. running on TPU), this setting should be set to True. + If True, the input is batched so that every batch has the shape + [batch_size // max_length, max_length]. If False, the input is grouped by + length, and batched so that batches may have different + shapes [N, M], where: N * M <= batch_size M <= max_length In general, this + setting should be False. Dynamic shapes allow the inputs to be grouped + so that the number of padding tokens is minimized, and helps model + training. In cases where the input shape must be static (e.g. running on + TPU), this setting should be set to True. num_replicas: Number of GPUs or other workers. We will generate global batches, and each global batch is equally divisible by number of replicas. Currently it is only effective when static_batch==True. TODO: make it - effective when static_batch=False. + effective when static_batch=False. ctx: Input context. Returns: @@ -239,9 +242,9 @@ def _read_and_batch_from_files( num_parallel_calls=tf.data.experimental.AUTOTUNE).with_options(options) # Parse each tf.Example into a dictionary - # TODO: Look into prefetch_input_elements for performance optimization. - dataset = dataset.map(_parse_example, - num_parallel_calls=tf.data.experimental.AUTOTUNE) + # TODO: Look into prefetch_input_elements for performance optimization. # pylint: disable=g-bad-todo + dataset = dataset.map( + _parse_example, num_parallel_calls=tf.data.experimental.AUTOTUNE) # Remove examples where the input or target length exceeds the maximum length, dataset = dataset.filter(lambda x, y: _filter_max_length((x, y), max_length)) @@ -252,7 +255,8 @@ def _read_and_batch_from_files( # into sentences, and finally expand to a global batch. It could prove # the global batch divisble for distribution strategy. int(batch_size // num_replicas // max_length * num_replicas), - ([max_length], [max_length]), drop_remainder=True) + ([max_length], [max_length]), + drop_remainder=True) else: # Group and batch such that each batch has examples of similar length. # TODO(xunkai): _batch_examples might need to do something special for @@ -291,10 +295,15 @@ def train_input_fn(params, ctx=None): if params["use_synthetic_data"]: return _generate_synthetic_data(params) return _read_and_batch_from_files( - file_pattern, params["batch_size"], params["max_length"], - params["max_io_parallelism"], shuffle=True, - repeat=params["repeat_dataset"], static_batch=params["static_batch"], - num_replicas=params["num_gpus"], ctx=ctx) + file_pattern, + params["batch_size"], + params["max_length"], + params["max_io_parallelism"], + shuffle=True, + repeat=params["repeat_dataset"], + static_batch=params["static_batch"], + num_replicas=params["num_gpus"], + ctx=ctx) def eval_input_fn(params, ctx=None): @@ -303,9 +312,14 @@ def eval_input_fn(params, ctx=None): if params["use_synthetic_data"]: return _generate_synthetic_data(params) return _read_and_batch_from_files( - file_pattern, params["batch_size"], params["max_length"], - params["max_io_parallelism"], shuffle=False, repeat=1, - static_batch=params["static_batch"], num_replicas=params["num_gpus"], + file_pattern, + params["batch_size"], + params["max_length"], + params["max_io_parallelism"], + shuffle=False, + repeat=1, + static_batch=params["static_batch"], + num_replicas=params["num_gpus"], ctx=ctx) diff --git a/official/nlp/transformer/embedding_layer.py b/official/nlp/transformer/embedding_layer.py index 6694e2b42af47673ee3ce0b9572ec5867d69cb7d..4c7b11f4914b6a3e94fb9d8d7a77b029c830e4cc 100644 --- a/official/nlp/transformer/embedding_layer.py +++ b/official/nlp/transformer/embedding_layer.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Implementation of embedding layer with shared weights.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Implementation of embedding layer with shared weights.""" import tensorflow as tf @@ -43,6 +39,7 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer): self.shared_weights = self.add_weight( "weights", shape=[self.vocab_size, self.hidden_size], + dtype=tf.float32, initializer=tf.random_normal_initializer( mean=0., stddev=self.hidden_size**-0.5)) super(EmbeddingSharedWeights, self).build(input_shape) @@ -59,6 +56,7 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer): Args: inputs: An int64 tensor with shape [batch_size, length] mode: string, a valid value is one of "embedding" and "linear". + Returns: outputs: (1) If mode == "embedding", output embedding tensor, float32 with shape [batch_size, length, embedding_size]; (2) mode == "linear", output @@ -81,7 +79,7 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer): mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype) embeddings *= tf.expand_dims(mask, -1) # Scale embedding by the sqrt of the hidden size - embeddings *= self.hidden_size ** 0.5 + embeddings *= self.hidden_size**0.5 return embeddings @@ -90,6 +88,7 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer): Args: inputs: A float32 tensor with shape [batch_size, length, hidden_size] + Returns: float32 tensor with shape [batch_size, length, vocab_size]. """ diff --git a/official/nlp/transformer/ffn_layer.py b/official/nlp/transformer/ffn_layer.py index a7785f27dd0c3fed01c514d052749dcafd163605..26f0a15f69c50abee6f95dd40928e844ece1c691 100644 --- a/official/nlp/transformer/ffn_layer.py +++ b/official/nlp/transformer/ffn_layer.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Implementation of fully connected network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Implementation of fully connected network.""" import tensorflow as tf @@ -66,8 +62,6 @@ class FeedForwardNetwork(tf.keras.layers.Layer): tensor with shape [batch_size, length, hidden_size] """ # Retrieve dynamically known shapes - batch_size = tf.shape(x)[0] - length = tf.shape(x)[1] output = self.filter_dense_layer(x) if training: diff --git a/official/nlp/transformer/metrics.py b/official/nlp/transformer/metrics.py index 4bd6bba6e6862d643c6cb9bb9fb857b70b3cc00f..38330aa471c7f7384a3f42abb7eefc5a62a48d94 100644 --- a/official/nlp/transformer/metrics.py +++ b/official/nlp/transformer/metrics.py @@ -1,17 +1,17 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # -# Licensed under the Apache License, Version 2.0 (the 'License'); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, +# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Functions for calculating loss, accuracy, and other model metrics. Metrics: @@ -22,9 +22,6 @@ Metrics: - ROUGE score. Source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/rouge.py """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import functools diff --git a/official/nlp/transformer/misc.py b/official/nlp/transformer/misc.py index e2b351ae652b7f644c8d598aef67b188ced01d68..a457e92f754f96547b527bddef016c30efea0cd9 100644 --- a/official/nlp/transformer/misc.py +++ b/official/nlp/transformer/misc.py @@ -1,24 +1,21 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # -# Licensed under the Apache License, Version 2.0 (the 'License'); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, +# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Misc for Transformer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Misc for Transformer.""" # pylint: disable=g-bad-import-order + from absl import flags import tensorflow as tf @@ -66,28 +63,34 @@ def define_transformer_flags(): tf_gpu_thread_mode=True, datasets_num_private_threads=True, enable_xla=True, - fp16_implementation=True - ) + fp16_implementation=True) flags_core.define_benchmark() flags_core.define_device(tpu=True) flags.DEFINE_integer( - name='train_steps', short_name='ts', default=300000, + name='train_steps', + short_name='ts', + default=300000, help=flags_core.help_wrap('The number of steps used to train.')) flags.DEFINE_integer( - name='steps_between_evals', short_name='sbe', default=5000, + name='steps_between_evals', + short_name='sbe', + default=5000, help=flags_core.help_wrap( 'The Number of training steps to run between evaluations. This is ' 'used if --train_steps is defined.')) flags.DEFINE_boolean( - name='enable_time_history', default=True, + name='enable_time_history', + default=True, help='Whether to enable TimeHistory callback.') flags.DEFINE_boolean( - name='enable_tensorboard', default=False, + name='enable_tensorboard', + default=False, help='Whether to enable Tensorboard callback.') flags.DEFINE_boolean( - name='enable_metrics_in_training', default=False, + name='enable_metrics_in_training', + default=False, help='Whether to enable metrics during training.') flags.DEFINE_boolean( name='enable_mlir_bridge', @@ -100,7 +103,9 @@ def define_transformer_flags(): # Add transformer-specific flags flags.DEFINE_enum( - name='param_set', short_name='mp', default='big', + name='param_set', + short_name='mp', + default='big', enum_values=PARAMS_MAP.keys(), help=flags_core.help_wrap( 'Parameter set to use when creating and training the model. The ' @@ -111,7 +116,9 @@ def define_transformer_flags(): 'complete list of parameters, please see model/model_params.py.')) flags.DEFINE_bool( - name='static_batch', short_name='sb', default=False, + name='static_batch', + short_name='sb', + default=False, help=flags_core.help_wrap( 'Whether the batches in the dataset should have static shapes. In ' 'general, this setting should be False. Dynamic shapes allow the ' @@ -120,7 +127,9 @@ def define_transformer_flags(): 'must be static (e.g. running on TPU), this setting will be ignored ' 'and static batching will always be used.')) flags.DEFINE_integer( - name='max_length', short_name='ml', default=256, + name='max_length', + short_name='ml', + default=256, help=flags_core.help_wrap( 'Max sentence length for Transformer. Default is 256. Note: Usually ' 'it is more effective to use a smaller max length if static_batch is ' @@ -128,30 +137,39 @@ def define_transformer_flags(): # Flags for training with steps (may be used for debugging) flags.DEFINE_integer( - name='validation_steps', short_name='vs', default=64, + name='validation_steps', + short_name='vs', + default=64, help=flags_core.help_wrap('The number of steps used in validation.')) # BLEU score computation flags.DEFINE_string( - name='bleu_source', short_name='bls', default=None, + name='bleu_source', + short_name='bls', + default=None, help=flags_core.help_wrap( 'Path to source file containing text translate when calculating the ' 'official BLEU score. Both --bleu_source and --bleu_ref must be set. ' - )) + )) flags.DEFINE_string( - name='bleu_ref', short_name='blr', default=None, + name='bleu_ref', + short_name='blr', + default=None, help=flags_core.help_wrap( 'Path to source file containing text translate when calculating the ' 'official BLEU score. Both --bleu_source and --bleu_ref must be set. ' - )) + )) flags.DEFINE_string( - name='vocab_file', short_name='vf', default=None, + name='vocab_file', + short_name='vf', + default=None, help=flags_core.help_wrap( 'Path to subtoken vocabulary file. If data_download.py was used to ' 'download and encode the training data, look in the data_dir to find ' 'the vocab file.')) flags.DEFINE_string( - name='mode', default='train', + name='mode', + default='train', help=flags_core.help_wrap('mode: train, eval, or predict')) flags.DEFINE_bool( name='use_ctl', @@ -187,10 +205,19 @@ def define_transformer_flags(): help=flags_core.help_wrap( 'Whether to do checkpointing during training. When running under ' 'benchmark harness, we will avoid checkpointing.')) + flags.DEFINE_bool( + name='save_weights_only', + default=True, + help=flags_core.help_wrap( + 'Only used when above `enable_checkpointing` is True. ' + 'If True, then only the model\'s weights will be saved ' + '(`model.save_weights(filepath)`), else the full model is saved ' + '(`model.save(filepath)`)')) - flags_core.set_defaults(data_dir='/tmp/translate_ende', - model_dir='/tmp/transformer_model', - batch_size=None) + flags_core.set_defaults( + data_dir='/tmp/translate_ende', + model_dir='/tmp/transformer_model', + batch_size=None) # pylint: disable=unused-variable @flags.multi_flags_validator( @@ -203,11 +230,12 @@ def define_transformer_flags(): @flags.multi_flags_validator( ['bleu_source', 'bleu_ref', 'vocab_file'], message='--vocab_file must be defined if --bleu_source and --bleu_ref ' - 'are defined.') + 'are defined.') def _check_bleu_vocab_file(flags_dict): if flags_dict['bleu_source'] and flags_dict['bleu_ref']: return flags_dict['vocab_file'] is not None return True + # pylint: enable=unused-variable @@ -256,5 +284,5 @@ def update_stats(history, stats, callbacks): if len(timestamp_log) > 1: stats['avg_exp_per_second'] = ( callback.batch_size * callback.log_steps * - (len(callback.timestamp_log)-1) / + (len(callback.timestamp_log) - 1) / (timestamp_log[-1].timestamp - timestamp_log[0].timestamp)) diff --git a/official/nlp/transformer/model_params.py b/official/nlp/transformer/model_params.py index e978abeafca5a627c698f291432f24119ae3fa68..0764d5e9a0d2e97754943cd61574b1c24469a0ae 100644 --- a/official/nlp/transformer/model_params.py +++ b/official/nlp/transformer/model_params.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Defines Transformer model parameters.""" -from collections import defaultdict +import collections -BASE_PARAMS = defaultdict( +BASE_PARAMS = collections.defaultdict( lambda: None, # Set default value to None. # Input params diff --git a/official/nlp/transformer/model_utils.py b/official/nlp/transformer/model_utils.py index 3f860f049cd0bcf0467913c91ee6312356f3ad23..6e163b97361cb7f071314909aaa1fc1e52ae6bfd 100644 --- a/official/nlp/transformer/model_utils.py +++ b/official/nlp/transformer/model_utils.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Transformer model helper methods.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Transformer model helper methods.""" import math @@ -29,8 +25,10 @@ _NEG_INF_FP32 = -1e9 _NEG_INF_FP16 = np.finfo(np.float16).min -def get_position_encoding( - length, hidden_size, min_timescale=1.0, max_timescale=1.0e4): +def get_position_encoding(length, + hidden_size, + min_timescale=1.0, + max_timescale=1.0e4): """Return positional encoding. Calculates the position encoding as a mix of sine and cosine functions with @@ -77,8 +75,8 @@ def get_decoder_self_attention_bias(length, dtype=tf.float32): """ neg_inf = _NEG_INF_FP16 if dtype == tf.float16 else _NEG_INF_FP32 with tf.name_scope("decoder_self_attention_bias"): - valid_locs = tf.linalg.band_part(tf.ones([length, length], dtype=dtype), - -1, 0) + valid_locs = tf.linalg.band_part( + tf.ones([length, length], dtype=dtype), -1, 0) valid_locs = tf.reshape(valid_locs, [1, 1, length, length]) decoder_bias = neg_inf * (1.0 - valid_locs) return decoder_bias diff --git a/official/nlp/transformer/model_utils_test.py b/official/nlp/transformer/model_utils_test.py index a8c4a15c9aba8dbff043088a392fe415f22206ca..10ddeed8392a77175b82b69c6e628cc1306c607c 100644 --- a/official/nlp/transformer/model_utils_test.py +++ b/official/nlp/transformer/model_utils_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Test Transformer model helper methods.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Test Transformer model helper methods.""" import tensorflow as tf @@ -40,22 +36,19 @@ class ModelUtilsTest(tf.test.TestCase): bias_shape = tf.shape(bias) flattened_bias = tf.reshape(bias, [3, 5]) - self.assertAllEqual([[0, NEG_INF, NEG_INF, NEG_INF, 0], - [0, 0, NEG_INF, NEG_INF, NEG_INF], - [NEG_INF, 0, 0, NEG_INF, 0]], - flattened_bias) + self.assertAllEqual( + [[0, NEG_INF, NEG_INF, NEG_INF, 0], [0, 0, NEG_INF, NEG_INF, NEG_INF], + [NEG_INF, 0, 0, NEG_INF, 0]], flattened_bias) self.assertAllEqual([3, 1, 1, 5], bias_shape) def test_get_decoder_self_attention_bias(self): length = 5 bias = model_utils.get_decoder_self_attention_bias(length) - self.assertAllEqual([[[[0, NEG_INF, NEG_INF, NEG_INF, NEG_INF], - [0, 0, NEG_INF, NEG_INF, NEG_INF], - [0, 0, 0, NEG_INF, NEG_INF], - [0, 0, 0, 0, NEG_INF], - [0, 0, 0, 0, 0]]]], - bias) + self.assertAllEqual( + [[[[0, NEG_INF, NEG_INF, NEG_INF, NEG_INF], + [0, 0, NEG_INF, NEG_INF, NEG_INF], [0, 0, 0, NEG_INF, NEG_INF], + [0, 0, 0, 0, NEG_INF], [0, 0, 0, 0, 0]]]], bias) if __name__ == "__main__": diff --git a/official/nlp/transformer/optimizer.py b/official/nlp/transformer/optimizer.py index fd5b92294e6580541183f4b2f28c1f20ea0f2b97..b27a6f07a4b73723be6f28d257bc3abcfbca43de 100644 --- a/official/nlp/transformer/optimizer.py +++ b/official/nlp/transformer/optimizer.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Optimizer from addons and learning rate scheduler.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Optimizer from addons and learning rate scheduler.""" import tensorflow as tf @@ -35,7 +31,8 @@ class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): super(LearningRateSchedule, self).__init__() self.initial_learning_rate = initial_learning_rate self.hidden_size = hidden_size - self.warmup_steps = tf.cast(warmup_steps, tf.float32) + self.warmup_steps = warmup_steps + self.warmup_steps_tensor = tf.cast(warmup_steps, tf.float32) def __call__(self, global_step): """Calculate learning rate with linear warmup and rsqrt decay. @@ -52,9 +49,10 @@ class LearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): learning_rate = self.initial_learning_rate learning_rate *= (self.hidden_size**-0.5) # Apply linear warmup - learning_rate *= tf.minimum(1.0, global_step / self.warmup_steps) + learning_rate *= tf.minimum(1.0, global_step / self.warmup_steps_tensor) # Apply rsqrt decay - learning_rate /= tf.sqrt(tf.maximum(global_step, self.warmup_steps)) + learning_rate /= tf.sqrt( + tf.maximum(global_step, self.warmup_steps_tensor)) return learning_rate def get_config(self): diff --git a/official/nlp/transformer/transformer.py b/official/nlp/transformer/transformer.py index a991676d54a06c4406c99ee8f531f7acfb01a5cd..da14c51ec393ae5d8bddf8e430a3147f4d767efc 100644 --- a/official/nlp/transformer/transformer.py +++ b/official/nlp/transformer/transformer.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,27 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Defines the Transformer model in TF 2.0. Model paper: https://arxiv.org/pdf/1706.03762.pdf Transformer model code source: https://github.com/tensorflow/tensor2tensor """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import tensorflow as tf from official.nlp.modeling.layers import position_embedding +from official.nlp.modeling.ops import beam_search from official.nlp.transformer import attention_layer -from official.nlp.transformer import beam_search from official.nlp.transformer import embedding_layer from official.nlp.transformer import ffn_layer from official.nlp.transformer import metrics from official.nlp.transformer import model_utils from official.nlp.transformer.utils.tokenizer import EOS_ID - # Disable the not-callable lint error, since it claims many objects are not # callable when they actually are. # pylint: disable=not-callable @@ -49,11 +45,12 @@ def create_model(params, is_train): label_smoothing = params["label_smoothing"] if params["enable_metrics_in_training"]: logits = metrics.MetricLayer(vocab_size)([logits, targets]) - logits = tf.keras.layers.Lambda(lambda x: x, name="logits", - dtype=tf.float32)(logits) + logits = tf.keras.layers.Lambda( + lambda x: x, name="logits", dtype=tf.float32)( + logits) model = tf.keras.Model([inputs, targets], logits) - loss = metrics.transformer_loss( - logits, targets, label_smoothing, vocab_size) + loss = metrics.transformer_loss(logits, targets, label_smoothing, + vocab_size) model.add_loss(loss) return model @@ -112,13 +109,14 @@ class Transformer(tf.keras.Model): sequence. float tensor with shape [batch_size, target_length, vocab_size] If target is none, then generate output sequence one token at a time. returns a dictionary { - outputs: [batch_size, decoded length] - scores: [batch_size, float]} + outputs: int tensor with shape [batch_size, decoded_length] + scores: float tensor with shape [batch_size]} Even when float16 is used, the output tensor(s) are always float32. Raises: NotImplementedError: If try to use padded decode method on CPU/GPUs. """ + inputs = inputs if isinstance(inputs, list) else [inputs] if len(inputs) == 2: inputs, targets = inputs[0], inputs[1] else: @@ -130,9 +128,7 @@ class Transformer(tf.keras.Model): "Padded decoding on CPU/GPUs is not supported.") decode_batch_size = int(self.params["decode_batch_size"] / self.params["num_replicas"]) - inputs.set_shape([ - decode_batch_size, self.params["decode_max_length"] - ]) + inputs.set_shape([decode_batch_size, self.params["decode_max_length"]]) # Variance scaling is used here because it seems to work in many problems. # Other reasonable initializers may also work just as well. @@ -257,19 +253,13 @@ class Transformer(tf.keras.Model): # Preprocess decoder input by getting embeddings and adding timing signal. decoder_input = self.embedding_softmax_layer(decoder_input) - + decoder_input += timing_signal[i] if self.params["padded_decode"]: - timing_signal_shape = timing_signal.shape.as_list() - decoder_input += tf.slice(timing_signal, [i, 0], - [1, timing_signal_shape[1]]) - bias_shape = decoder_self_attention_bias.shape.as_list() self_attention_bias = tf.slice( decoder_self_attention_bias, [0, 0, i, 0], [bias_shape[0], bias_shape[1], 1, bias_shape[3]]) else: - decoder_input += timing_signal[i:i + 1] - self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] decoder_outputs = self.decoder_stack( @@ -314,15 +304,13 @@ class Transformer(tf.keras.Model): cache = { "layer_%d" % layer: { "k": - tf.zeros([ - batch_size, init_decode_length, num_heads, dim_per_head - ], - dtype=self.params["dtype"]), + tf.zeros( + [batch_size, init_decode_length, num_heads, dim_per_head], + dtype=self.params["dtype"]), "v": - tf.zeros([ - batch_size, init_decode_length, num_heads, dim_per_head - ], - dtype=self.params["dtype"]) + tf.zeros( + [batch_size, init_decode_length, num_heads, dim_per_head], + dtype=self.params["dtype"]) } for layer in range(self.params["num_hidden_layers"]) } # pylint: enable=g-complex-comprehension @@ -512,15 +500,14 @@ class DecoderStack(tf.keras.layers.Layer): """Return the output of the decoder layer stacks. Args: - decoder_inputs: A tensor with shape - [batch_size, target_length, hidden_size]. - encoder_outputs: A tensor with shape - [batch_size, input_length, hidden_size] - decoder_self_attention_bias: A tensor with shape - [1, 1, target_len, target_length], the bias for decoder self-attention - layer. - attention_bias: A tensor with shape [batch_size, 1, 1, input_length], - the bias for encoder-decoder attention layer. + decoder_inputs: A tensor with shape [batch_size, target_length, + hidden_size]. + encoder_outputs: A tensor with shape [batch_size, input_length, + hidden_size] + decoder_self_attention_bias: A tensor with shape [1, 1, target_len, + target_length], the bias for decoder self-attention layer. + attention_bias: A tensor with shape [batch_size, 1, 1, input_length], the + bias for encoder-decoder attention layer. training: A bool, whether in training mode or not. cache: (Used for fast decoding) A nested dictionary storing previous decoder self-attention values. The items are: diff --git a/official/nlp/transformer/transformer_forward_test.py b/official/nlp/transformer/transformer_forward_test.py new file mode 100644 index 0000000000000000000000000000000000000000..4c8406a32e906bc8683b0a3a744eb5890e665cc9 --- /dev/null +++ b/official/nlp/transformer/transformer_forward_test.py @@ -0,0 +1,157 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Forward pass test for Transformer model refactoring.""" + +import numpy as np + +import tensorflow as tf + +from official.nlp.modeling import models +from official.nlp.transformer import metrics +from official.nlp.transformer import model_params +from official.nlp.transformer import transformer + + +def _count_params(layer, trainable_only=True): + """Returns the count of all model parameters, or just trainable ones.""" + if not trainable_only: + return layer.count_params() + else: + return int( + np.sum([ + tf.keras.backend.count_params(p) for p in layer.trainable_weights + ])) + + +def _create_model(params, is_train): + """Creates transformer model.""" + + encdec_kwargs = dict( + num_layers=params["num_hidden_layers"], + num_attention_heads=params["num_heads"], + intermediate_size=params["filter_size"], + activation="relu", + dropout_rate=params["relu_dropout"], + attention_dropout_rate=params["attention_dropout"], + use_bias=False, + norm_first=True, + norm_epsilon=1e-6, + intermediate_dropout=params["relu_dropout"]) + encoder_layer = models.TransformerEncoder(**encdec_kwargs) + decoder_layer = models.TransformerDecoder(**encdec_kwargs) + + model_kwargs = dict( + vocab_size=params["vocab_size"], + embedding_width=params["hidden_size"], + dropout_rate=params["layer_postprocess_dropout"], + padded_decode=params["padded_decode"], + decode_max_length=params["decode_max_length"], + dtype=params["dtype"], + extra_decode_length=params["extra_decode_length"], + beam_size=params["beam_size"], + alpha=params["alpha"], + encoder_layer=encoder_layer, + decoder_layer=decoder_layer, + name="transformer_v2") + + if is_train: + inputs = tf.keras.layers.Input((None,), dtype="int64", name="inputs") + targets = tf.keras.layers.Input((None,), dtype="int64", name="targets") + internal_model = models.Seq2SeqTransformer(**model_kwargs) + logits = internal_model( + dict(inputs=inputs, targets=targets), training=is_train) + vocab_size = params["vocab_size"] + label_smoothing = params["label_smoothing"] + if params["enable_metrics_in_training"]: + logits = metrics.MetricLayer(vocab_size)([logits, targets]) + logits = tf.keras.layers.Lambda( + lambda x: x, name="logits", dtype=tf.float32)( + logits) + model = tf.keras.Model([inputs, targets], logits) + loss = metrics.transformer_loss(logits, targets, label_smoothing, + vocab_size) + model.add_loss(loss) + return model + + batch_size = params["decode_batch_size"] if params["padded_decode"] else None + inputs = tf.keras.layers.Input((None,), + batch_size=batch_size, + dtype="int64", + name="inputs") + internal_model = models.Seq2SeqTransformer(**model_kwargs) + ret = internal_model(dict(inputs=inputs), training=is_train) + outputs, scores = ret["outputs"], ret["scores"] + return tf.keras.Model(inputs, [outputs, scores]) + + +class TransformerForwardTest(tf.test.TestCase): + + def setUp(self): + super(TransformerForwardTest, self).setUp() + self.params = params = model_params.TINY_PARAMS + params["batch_size"] = params["default_batch_size"] = 16 + params["hidden_size"] = 12 + params["num_hidden_layers"] = 3 + params["filter_size"] = 14 + params["num_heads"] = 2 + params["vocab_size"] = 41 + params["extra_decode_length"] = 0 + params["beam_size"] = 3 + params["dtype"] = tf.float32 + params["layer_postprocess_dropout"] = 0.0 + params["attention_dropout"] = 0.0 + params["relu_dropout"] = 0.0 + + def test_forward_pass_train(self): + # Set input_len different from target_len + inputs = np.asarray([[5, 2, 1], [7, 5, 0], [1, 4, 0], [7, 5, 11]]) + targets = np.asarray([[4, 3, 4, 0], [13, 19, 17, 8], [20, 14, 1, 2], + [5, 7, 3, 0]]) + + # src_model is the original model before refactored. + src_model = transformer.create_model(self.params, True) + src_num_weights = _count_params(src_model) + src_weights = src_model.get_weights() + src_model_output = src_model([inputs, targets], training=True) + + # dest_model is the refactored model. + dest_model = _create_model(self.params, True) + dest_num_weights = _count_params(dest_model) + self.assertEqual(src_num_weights, dest_num_weights) + dest_model.set_weights(src_weights) + dest_model_output = dest_model([inputs, targets], training=True) + self.assertAllEqual(src_model_output, dest_model_output) + + def test_forward_pass_not_train(self): + inputs = np.asarray([[5, 2, 1], [7, 5, 0], [1, 4, 0], [7, 5, 11]]) + + # src_model is the original model before refactored. + src_model = transformer.create_model(self.params, False) + src_num_weights = _count_params(src_model) + src_weights = src_model.get_weights() + src_model_output = src_model([inputs], training=False) + + # dest_model is the refactored model. + dest_model = _create_model(self.params, False) + dest_num_weights = _count_params(dest_model) + self.assertEqual(src_num_weights, dest_num_weights) + dest_model.set_weights(src_weights) + dest_model_output = dest_model([inputs], training=False) + self.assertAllEqual(src_model_output[0], dest_model_output[0]) + self.assertAllEqual(src_model_output[1], dest_model_output[1]) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/nlp/transformer/transformer_layers_test.py b/official/nlp/transformer/transformer_layers_test.py index 82d37259da2854fb83e086749fe7a8df2c22e955..83e76890548e2c4d40345e1b802e22a7fd645b2d 100644 --- a/official/nlp/transformer/transformer_layers_test.py +++ b/official/nlp/transformer/transformer_layers_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for layers in Transformer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for layers in Transformer.""" import tensorflow as tf @@ -34,11 +30,12 @@ class TransformerLayersTest(tf.test.TestCase): dropout = 0.5 dim_per_head = hidden_size // num_heads layer = attention_layer.SelfAttention(hidden_size, num_heads, dropout) - self.assertDictEqual(layer.get_config(), { - "hidden_size": hidden_size, - "num_heads": num_heads, - "attention_dropout": dropout, - }) + self.assertDictEqual( + layer.get_config(), { + "hidden_size": hidden_size, + "num_heads": num_heads, + "attention_dropout": dropout, + }) length = 2 x = tf.ones([1, length, hidden_size]) bias = tf.ones([1]) @@ -47,9 +44,23 @@ class TransformerLayersTest(tf.test.TestCase): "v": tf.zeros([1, 0, num_heads, dim_per_head]), } y = layer(x, bias, training=True, cache=cache) - self.assertEqual(y.shape, (1, length, 64,)) - self.assertEqual(cache["k"].shape, (1, length, num_heads, dim_per_head,)) - self.assertEqual(cache["v"].shape, (1, length, num_heads, dim_per_head,)) + self.assertEqual(y.shape, ( + 1, + length, + 64, + )) + self.assertEqual(cache["k"].shape, ( + 1, + length, + num_heads, + dim_per_head, + )) + self.assertEqual(cache["v"].shape, ( + 1, + length, + num_heads, + dim_per_head, + )) def test_embedding_shared_weights(self): vocab_size = 50 @@ -63,25 +74,38 @@ class TransformerLayersTest(tf.test.TestCase): idx = tf.ones([1, length], dtype="int32") y = layer(idx) - self.assertEqual(y.shape, (1, length, hidden_size,)) + self.assertEqual(y.shape, ( + 1, + length, + hidden_size, + )) x = tf.ones([1, length, hidden_size]) output = layer(x, "linear") - self.assertEqual(output.shape, (1, length, vocab_size,)) + self.assertEqual(output.shape, ( + 1, + length, + vocab_size, + )) def test_feed_forward_network(self): hidden_size = 64 filter_size = 32 relu_dropout = 0.5 layer = ffn_layer.FeedForwardNetwork(hidden_size, filter_size, relu_dropout) - self.assertDictEqual(layer.get_config(), { - "hidden_size": hidden_size, - "filter_size": filter_size, - "relu_dropout": relu_dropout, - }) + self.assertDictEqual( + layer.get_config(), { + "hidden_size": hidden_size, + "filter_size": filter_size, + "relu_dropout": relu_dropout, + }) length = 2 x = tf.ones([1, length, hidden_size]) y = layer(x, training=True) - self.assertEqual(y.shape, (1, length, hidden_size,)) + self.assertEqual(y.shape, ( + 1, + length, + hidden_size, + )) def test_metric_layer(self): vocab_size = 50 @@ -90,7 +114,11 @@ class TransformerLayersTest(tf.test.TestCase): name="logits") targets = tf.keras.layers.Input((None,), dtype="int64", name="targets") output_logits = metrics.MetricLayer(vocab_size)([logits, targets]) - self.assertEqual(output_logits.shape.as_list(), [None, None, vocab_size,]) + self.assertEqual(output_logits.shape.as_list(), [ + None, + None, + vocab_size, + ]) if __name__ == "__main__": diff --git a/official/nlp/transformer/transformer_main.py b/official/nlp/transformer/transformer_main.py index 7c71722797d6af0bbc609c0f905da3e4b347e4b0..7b1fe77e9194a10d1eb3a254fb36423680b63e02 100644 --- a/official/nlp/transformer/transformer_main.py +++ b/official/nlp/transformer/transformer_main.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,24 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Train and evaluate the Transformer model. See README for description of setting the training schedule and evaluating the BLEU score. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import tempfile +# Import libraries from absl import app from absl import flags from absl import logging import tensorflow as tf - +from official.common import distribute_utils from official.modeling import performance from official.nlp.transformer import compute_bleu from official.nlp.transformer import data_pipeline @@ -39,8 +37,8 @@ from official.nlp.transformer import transformer from official.nlp.transformer import translate from official.nlp.transformer.utils import tokenizer from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils +# pylint:disable=logging-format-interpolation INF = int(1e9) BLEU_DIR = "bleu" @@ -159,8 +157,9 @@ class TransformerTask(object): params["enable_metrics_in_training"] = flags_obj.enable_metrics_in_training params["steps_between_evals"] = flags_obj.steps_between_evals params["enable_checkpointing"] = flags_obj.enable_checkpointing + params["save_weights_only"] = flags_obj.save_weights_only - self.distribution_strategy = distribution_utils.get_distribution_strategy( + self.distribution_strategy = distribute_utils.get_distribution_strategy( distribution_strategy=flags_obj.distribution_strategy, num_gpus=num_gpus, all_reduce_alg=flags_obj.all_reduce_alg, @@ -177,15 +176,12 @@ class TransformerTask(object): else: logging.info("Not using any distribution strategy.") - performance.set_mixed_precision_policy( - params["dtype"], - flags_core.get_loss_scale(flags_obj, default_for_fp16="dynamic")) + performance.set_mixed_precision_policy(params["dtype"]) @property def use_tpu(self): if self.distribution_strategy: - return isinstance(self.distribution_strategy, - tf.distribute.experimental.TPUStrategy) + return isinstance(self.distribution_strategy, tf.distribute.TPUStrategy) return False def train(self): @@ -196,7 +192,7 @@ class TransformerTask(object): keras_utils.set_session_config(enable_xla=flags_obj.enable_xla) _ensure_dir(flags_obj.model_dir) - with distribution_utils.get_strategy_scope(self.distribution_strategy): + with distribute_utils.get_strategy_scope(self.distribution_strategy): model = transformer.create_model(params, is_train=True) opt = self._create_optimizer() @@ -212,10 +208,10 @@ class TransformerTask(object): train_loss_metric = tf.keras.metrics.Mean( "training_loss", dtype=tf.float32) if params["enable_tensorboard"]: - summary_writer = tf.compat.v2.summary.create_file_writer( - flags_obj.model_dir) + summary_writer = tf.summary.create_file_writer( + os.path.join(flags_obj.model_dir, "summary")) else: - summary_writer = tf.compat.v2.summary.create_noop_writer() + summary_writer = tf.summary.create_noop_writer() train_metrics = [train_loss_metric] if params["enable_metrics_in_training"]: train_metrics = train_metrics + model.metrics @@ -226,12 +222,11 @@ class TransformerTask(object): if self.use_tpu: # Different from experimental_distribute_dataset, - # experimental_distribute_datasets_from_function requires + # distribute_datasets_from_function requires # per-replica/local batch size. params["batch_size"] /= self.distribution_strategy.num_replicas_in_sync train_ds = ( - self.distribution_strategy - .experimental_distribute_datasets_from_function( + self.distribution_strategy.distribute_datasets_from_function( lambda ctx: data_pipeline.train_input_fn(params, ctx))) else: train_ds = data_pipeline.train_input_fn(params) @@ -321,8 +316,8 @@ class TransformerTask(object): if params["enable_tensorboard"]: for metric_obj in train_metrics: - tf.compat.v2.summary.scalar(metric_obj.name, metric_obj.result(), - current_step) + tf.summary.scalar(metric_obj.name, metric_obj.result(), + current_step) summary_writer.flush() for cb in callbacks: @@ -375,7 +370,7 @@ class TransformerTask(object): # We only want to create the model under DS scope for TPU case. # When 'distribution_strategy' is None, a no-op DummyContextManager will # be used. - with distribution_utils.get_strategy_scope(distribution_strategy): + with distribute_utils.get_strategy_scope(distribution_strategy): if not self.predict_model: self.predict_model = transformer.create_model(self.params, False) self._load_weights_if_possible( @@ -414,7 +409,7 @@ class TransformerTask(object): ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt") callbacks.append( tf.keras.callbacks.ModelCheckpoint( - ckpt_full_path, save_weights_only=True)) + ckpt_full_path, save_weights_only=params["save_weights_only"])) return callbacks def _load_weights_if_possible(self, model, init_weight_path=None): diff --git a/official/nlp/transformer/transformer_main_test.py b/official/nlp/transformer/transformer_main_test.py index a65cc4bcbf3a1c4281a36730a1ab60c496f3c7aa..cf5a349ba14b3097d59fa7f283aef7d8b2922dbe 100644 --- a/official/nlp/transformer/transformer_main_test.py +++ b/official/nlp/transformer/transformer_main_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Test Transformer model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Test Transformer model.""" import os import re @@ -29,7 +25,6 @@ import tensorflow as tf from tensorflow.python.eager import context # pylint: disable=ungrouped-imports from official.nlp.transformer import misc from official.nlp.transformer import transformer_main -from official.utils.misc import keras_utils FLAGS = flags.FLAGS FIXED_TIMESTAMP = 'my_time_stamp' @@ -45,7 +40,7 @@ def _generate_file(filepath, lines): class TransformerTaskTest(tf.test.TestCase): local_flags = None - def setUp(self): + def setUp(self): # pylint: disable=g-missing-super-call temp_dir = self.get_temp_dir() if TransformerTaskTest.local_flags is None: misc.define_transformer_flags() @@ -72,10 +67,10 @@ class TransformerTaskTest(tf.test.TestCase): self.bleu_source = os.path.join(temp_dir, 'bleu_source') self.bleu_ref = os.path.join(temp_dir, 'bleu_ref') self.orig_policy = ( - tf.compat.v2.keras.mixed_precision.experimental.global_policy()) + tf.compat.v2.keras.mixed_precision.global_policy()) - def tearDown(self): - tf.compat.v2.keras.mixed_precision.experimental.set_policy(self.orig_policy) + def tearDown(self): # pylint: disable=g-missing-super-call + tf.compat.v2.keras.mixed_precision.set_global_policy(self.orig_policy) def _assert_exists(self, filepath): self.assertTrue(os.path.exists(filepath)) @@ -86,6 +81,13 @@ class TransformerTaskTest(tf.test.TestCase): t = transformer_main.TransformerTask(FLAGS) t.train() + def test_train_save_full_model(self): + if context.num_gpus() >= 2: + self.skipTest('No need to test 2+ GPUs without a distribution strategy.') + FLAGS.save_weights_only = False + t = transformer_main.TransformerTask(FLAGS) + t.train() + def test_train_static_batch(self): if context.num_gpus() >= 2: self.skipTest('No need to test 2+ GPUs without a distribution strategy.') diff --git a/official/nlp/transformer/transformer_test.py b/official/nlp/transformer/transformer_test.py index 227b43dc6ff194ab74effc37214ae9253823310d..c64686dac034c5d0e1d4f29bf4b378f2b64ef130 100644 --- a/official/nlp/transformer/transformer_test.py +++ b/official/nlp/transformer/transformer_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Test Transformer model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Test Transformer model.""" import tensorflow as tf @@ -27,6 +23,7 @@ from official.nlp.transformer import transformer class TransformerV2Test(tf.test.TestCase): def setUp(self): + super().setUp() self.params = params = model_params.TINY_PARAMS params["batch_size"] = params["default_batch_size"] = 16 params["use_synthetic_data"] = True @@ -63,6 +60,39 @@ class TransformerV2Test(tf.test.TestCase): self.assertEqual(outputs[1].shape.as_list(), [None]) self.assertEqual(outputs[1].dtype, tf.float32) + def test_export(self): + model = transformer.Transformer(self.params, name="transformer_v2") + export_dir = self.get_temp_dir() + batch_size = 5 + max_length = 6 + + class SaveModule(tf.Module): + + def __init__(self, model): + super(SaveModule, self).__init__() + self.model = model + + @tf.function + def serve(self, x): + return self.model.call([x], training=False) + + save_module = SaveModule(model) + tensor_shape = (None, None) + sample_input = tf.zeros((batch_size, max_length), dtype=tf.int64) + _ = save_module.serve(sample_input) + signatures = dict( + serving_default=save_module.serve.get_concrete_function( + tf.TensorSpec(shape=tensor_shape, dtype=tf.int64, name="x"))) + tf.saved_model.save(save_module, export_dir, signatures=signatures) + imported = tf.saved_model.load(export_dir) + serving_fn = imported.signatures["serving_default"] + all_outputs = serving_fn(sample_input) + output = all_outputs["outputs"] + output_shapes = output.shape.as_list() + self.assertEqual(output_shapes[0], batch_size) + self.assertEqual(output_shapes[1], + max_length + model.params["extra_decode_length"]) + if __name__ == "__main__": tf.test.main() diff --git a/official/nlp/transformer/translate.py b/official/nlp/transformer/translate.py index a6e79a9cf888077333018a3b37e0a868b648a102..0c15096aed6b33fea0beb2f0ff76daf1737e09bb 100644 --- a/official/nlp/transformer/translate.py +++ b/official/nlp/transformer/translate.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Translate text or files using trained transformer model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Translate text or files using trained transformer model.""" +# Import libraries from absl import logging import numpy as np import tensorflow as tf @@ -149,19 +146,13 @@ def translate_file(model, def text_as_per_replica(): replica_context = tf.distribute.get_replica_context() replica_id = replica_context.replica_id_in_sync_group - return replica_id, text[replica_id] + return replica_id, text[replica_id] # pylint: disable=cell-var-from-loop text = distribution_strategy.run(text_as_per_replica) outputs = distribution_strategy.experimental_local_results( predict_step(text)) - tags, unordered_val_outputs = outputs[0] - tags = [tag.numpy() for tag in tags._values] - unordered_val_outputs = [ - val_output.numpy() for val_output in unordered_val_outputs._values] - # pylint: enable=protected-access - val_outputs = [None] * len(tags) - for k in range(len(tags)): - val_outputs[tags[k]] = unordered_val_outputs[k] + val_outputs = [output for _, output in outputs] + val_outputs = np.reshape(val_outputs, [params["decode_batch_size"], -1]) else: val_outputs, _ = model.predict(text) diff --git a/official/nlp/transformer/utils/__init__.py b/official/nlp/transformer/utils/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/nlp/transformer/utils/__init__.py +++ b/official/nlp/transformer/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/transformer/utils/metrics.py b/official/nlp/transformer/utils/metrics.py index 6b8d1f7d26e9a4ee743fc884feba878c9de4237f..ec1cad0b409cfb69535dce15fab1d531d7811391 100644 --- a/official/nlp/transformer/utils/metrics.py +++ b/official/nlp/transformer/utils/metrics.py @@ -1,17 +1,17 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # -# Licensed under the Apache License, Version 2.0 (the 'License'); +# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, +# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Functions for calculating loss, accuracy, and other model metrics. Metrics: @@ -203,7 +203,7 @@ def bleu_score(logits, labels): bleu: int, approx bleu score """ predictions = tf.cast(tf.argmax(logits, axis=-1), tf.int32) - # TODO: Look into removing use of py_func + # TODO: Look into removing use of py_func # pylint: disable=g-bad-todo bleu = tf.py_func(compute_bleu, (labels, predictions), tf.float32) return bleu, tf.constant(1.0) @@ -308,7 +308,7 @@ def rouge_2_fscore(logits, labels): rouge2_fscore: approx rouge-2 f1 score. """ predictions = tf.cast(tf.argmax(logits, axis=-1), tf.int32) - # TODO: Look into removing use of py_func + # TODO: Look into removing use of py_func # pylint: disable=g-bad-todo rouge_2_f_score = tf.py_func(rouge_n, (predictions, labels), tf.float32) return rouge_2_f_score, tf.constant(1.0) diff --git a/official/nlp/transformer/utils/tokenizer.py b/official/nlp/transformer/utils/tokenizer.py index 3749dfe9de6263a4cc185928b7f8967c56250216..6a992a324f3b0c651d219f4f2cc081a274d87db4 100644 --- a/official/nlp/transformer/utils/tokenizer.py +++ b/official/nlp/transformer/utils/tokenizer.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Defines Subtokenizer class to encode and decode strings.""" from __future__ import absolute_import @@ -22,6 +22,7 @@ import collections import re import sys import unicodedata + from absl import logging import numpy as np @@ -29,7 +30,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf - # pylint: disable=g-complex-comprehension PAD = "" PAD_ID = 0 diff --git a/official/nlp/transformer/utils/tokenizer_test.py b/official/nlp/transformer/utils/tokenizer_test.py index 307398fd3aeaf55a5bec495006a1fb65ebadd639..f6ef7a08b2490c49410201a5114183f24a87a1e7 100644 --- a/official/nlp/transformer/utils/tokenizer_test.py +++ b/official/nlp/transformer/utils/tokenizer_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Test Subtokenizer and string helper methods.""" import collections diff --git a/official/nlp/xlnet/README.md b/official/nlp/xlnet/README.md index 9675f01a57fd26a83ed5103e116257b3664396cb..546d1128e2d562fbcb4de32894d89b2200249d2b 100644 --- a/official/nlp/xlnet/README.md +++ b/official/nlp/xlnet/README.md @@ -3,8 +3,6 @@ The academic paper which describes XLNet in detail and provides full results on a number of tasks can be found here: https://arxiv.org/abs/1906.08237. -**Instructions and user guide will be added soon.** - XLNet is a generalized autoregressive BERT-like pretraining language model that enables learning bidirectional contexts by maximizing the expected likelihood over all permutations of the factorization order. It can learn dependency beyond @@ -14,3 +12,225 @@ recurrence mechanism and relative positional encoding scheme introduced in on 20 NLP benchmark tasks and achieves state-of-the-art results on 18 tasks including question answering, natural language inference, sentiment analysis, and document ranking. + +## Contents + +* [Contents](#contents) +* [Set Up](#set-up) +* [Process Datasets](#process-datasets) +* [Fine-tuning with XLNet](#fine-tuning-with-xlnet) + +## Set up + +To run XLNet on a Cloud TPU, you can first create a `tf-nightly` TPU with the +[ctpu tool](https://github.com/tensorflow/tpu/tree/master/tools/ctpu): + +```shell +ctpu up -name --tf-version=”nightly” +``` + +After SSH'ing into the VM (or if you're using an on-prem machine), setup +continues as follows: + +```shell +export PYTHONPATH="$PYTHONPATH:/path/to/models" +``` + +Install `tf-nightly` to get latest updates: + +```shell +pip install tf-nightly-gpu +``` + +## Process Datasets + +Dataset processing requires a +[Sentence Piece](https://github.com/google/sentencepiece) model. One can be +found at the publicly available GCS bucket at: +`gs://cloud-tpu-checkpoints/xlnet/cased_spiece.model`. + +Note that in order to train using Cloud TPUs, data must be stored on a GCS +bucket. + +Setup commands: + +```shell +export SPIECE_DIR=~/cased_spiece/ +export SPIECE_MODEL=${SPIECE_DIR}/cased_spiece.model +export DATASETS_DIR=gs://some_bucket/datasets +mkdir -p ${SPIECE_DIR} +gsutil cp gs://cloud-tpu-checkpoints/xlnet/cased_spiece.model ${SPIECE_DIR} +``` + + +### Pre-training + +Pre-training data can be converted into TFRecords using +[`preprocess_pretrain_data.py`](preprocess_pretrain_data.py). Inputs should +consist of a plain text file (or a file glob of plain text files) with one +sentence per line. + +To run the script, use the following command: + +```shell +export INPUT_GLOB='path/to/wiki_cased/*.txt' + +python3 preprocess_pretrain_data.py --bsz_per_host=32 --num_core_per_host=16 +--seq_len=512 --reuse_len=256 --input_glob='path/to/wiki_cased/*.txt' +--save_dir=${DATASETS_DIR}/pretrain --bi_data=True --sp_path=${SPIECE_MODEL} +--mask_alpha=6 --mask_beta=1 --num_predict=85 +``` + +Note that to make the memory mechanism work correctly, `bsz_per_host` and +`num_core_per_host` are *strictly specified* when preparing TFRecords. The same +TPU settings should be used when training. + +### Fine-tuning + +* Classification + +To prepare classification data TFRecords on the IMDB dataset, users can download +and unpack the [IMDB dataset](https://www.imdb.com/interfaces/) with the +following command: + +```shell +export IMDB_DIR=~/imdb +mkdir -p ${IMDB_DIR} + +cd ${IMDB_DIR} +wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz +tar zxvf aclImdb_v1.tar.gz -C ${IMDB_DIR} +rm aclImdb_v1.tar.gz +``` + +Then, the dataset can be converted into TFRecords with the following command: + +```shell +export TASK_NAME=imdb + +python3 preprocess_classification_data.py --max_seq_length=512 --spiece_model_file=${SPIECE_MODEL} --output_dir=${DATASETS_DIR}/${TASK_NAME} --data_dir=${IMDB_DIR}/aclImdb --task_name=${TASK_NAME} +``` + +Note: To obtain SOTA on the IMDB dataset, using a sequence length of 512 is +necessary. + +* SQUAD + +The [SQuAD website](https://rajpurkar.github.io/SQuAD-explorer/) contains +detailed information about the SQuAD datasets and evaluation. + +To download the relevant files, use the following command: + +```shell +export SQUAD_DIR=~/squad + +mkdir -p ${SQUAD_DIR} && cd ${SQUAD_DIR} +wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json +wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json +``` + +Then to process the dataset into TFRecords, run the following commands: + +```shell +python3 preprocess_squad_data.py --spiece_model_file=${SPIECE_MODEL} --train_file=${SQUAD_DIR}/train-v2.0.json --predict_file=${SQUAD_DIR}/dev-v2.0.json --output_dir=${DATASETS_DIR}/squad --uncased=False --max_seq_length=512 --num_proc=1 --proc_id=0 + +gsutil cp ${SQUAD_DIR}/dev-v2.0.json ${DATASETS_DIR}/squad +``` + +## Fine-tuning with XLNet + +* Cloud Storage + +The unzipped pre-trained model files can be found in the Google Cloud Storage +folder `gs://cloud-tpu-checkpoints/xlnet/keras_xlnet`. For example: + +```shell +export XLNET_DIR=gs:/cloud-tpu-checkpoints/xlnet/keras_xlnet +export MODEL_DIR=gs://some_bucket/my_output_dir +``` + +### Classification task + +This example code fine-tunes `XLNet` on the IMDB dataset. For this task, it +takes around 11 minutes to get the first 500 steps' results, and takes around 1 +hour to complete on a v3-8. It is expected to obtain an accuracy between 96.15 +and 96.33. + +To run on a v3-8 TPU: + +```shell +export TPU_NAME=my-tpu + +python3 run_classifier.py \ +--strategy_type=tpu \ +--tpu=${TPU_NAME} \ +--init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \ +--model_dir=${MODEL_DIR} \ +--test_data_size=25024 \ +--train_tfrecord_path=${DATASETS_DIR}/imdb/cased_spiece.model.len-512.train.tf_record \ +--test_tfrecord_path=${DATASETS_DIR}/imdb/cased_spiece.model.len-512.dev.eval.tf_record \ +--train_batch_size=32 \ +--seq_len=512 \ +--n_layer=24 \ +--d_model=1024 \ +--d_embed=1024 \ +--n_head=16 \ +--d_head=64 \ +--d_inner=4096 \ +--untie_r=true \ +--n_class=2 \ +--ff_activation=gelu \ +--learning_rate=2e-5 \ +--train_steps=4000 \ +--warmup_steps=500 \ +--iterations=500 \ +--bi_data=false \ +--summary_type=last +``` + +### SQuAD 2.0 Task + +The Stanford Question Answering Dataset (SQuAD) is a popular question answering +benchmark dataset. See more in +[SQuAD website](https://rajpurkar.github.io/SQuAD-explorer/). + +We use `XLNet-LARGE` (cased_L-24_H-1024_A-16) running on a v3-8 as an example to +run this workflow. It is expected to reach a `best_f1` score of between 88.30 +and 88.80. It should take around 5 minutes to read the pickle file, and then 18 +minutes to get the first 1000 steps' results. It takes around 2 hours to +complete. + +```shell +export TPU_NAME=my-tpu + +python3 run_squad.py \ + --strategy_type=tpu \ + --tpu=${TPU_NAME} \ + --init_checkpoint=${XLNET_DIR}/xlnet_model.ckpt \ + --model_dir=${MODEL_DIR} \ + --train_tfrecord_path=${DATASETS_DIR}/squad/squad_cased \ + --test_tfrecord_path=${DATASETS_DIR}/squad/squad_cased/12048.eval.tf_record \ + --test_feature_path=${DATASETS_DIR}/squad/spiece.model.slen-512.qlen-64.eval.features.pkl \ + --predict_dir=${MODEL_DIR} \ + --predict_file=${DATASETS_DIR}/squad/dev-v2.0.json \ + --train_batch_size=48 \ + --seq_len=512 \ + --reuse_len=256 \ + --mem_len=0 \ + --n_layer=24 \ + --d_model=1024 \ + --d_embed=1024 \ + --n_head=16 \ + --d_head=64 \ + --d_inner=4096 \ + --untie_r=true \ + --ff_activation=gelu \ + --learning_rate=.00003 \ + --train_steps=8000 \ + --warmup_steps=1000 \ + --iterations=1000 \ + --bi_data=false \ + --query_len=64 \ + --adam_epsilon=.000001 \ + --lr_layer_decay_rate=0.75 +``` diff --git a/official/nlp/xlnet/__init__.py b/official/nlp/xlnet/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..a25710c222e3327cb20e000db5df5c5651c4a2cc 100644 --- a/official/nlp/xlnet/__init__.py +++ b/official/nlp/xlnet/__init__.py @@ -1 +1,15 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/nlp/xlnet/classifier_utils.py b/official/nlp/xlnet/classifier_utils.py index 64363e322633f7ae43d6ffc65c99ee1beff36827..cb8acee087dc58596159d1b11ddf7c09299038dc 100644 --- a/official/nlp/xlnet/classifier_utils.py +++ b/official/nlp/xlnet/classifier_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Utilities for pre-processing classification data.""" + from absl import logging from official.nlp.xlnet import data_utils @@ -152,7 +153,7 @@ def convert_single_example(example_index, example, label_list, max_seq_length, logging.info("input_ids: %s", " ".join([str(x) for x in input_ids])) logging.info("input_mask: %s", " ".join([str(x) for x in input_mask])) logging.info("segment_ids: %s", " ".join([str(x) for x in segment_ids])) - logging.info("label: %d (id = %d)", example.label, label_id) + logging.info("label: %s (id = %d)", example.label, label_id) feature = InputFeatures( input_ids=input_ids, diff --git a/official/nlp/xlnet/common_flags.py b/official/nlp/xlnet/common_flags.py index 93d9499f19475b96095c409fb20a5efb35f3f9b5..549e7b036e8133c6e6e50deea5099404e9ee1dcf 100644 --- a/official/nlp/xlnet/common_flags.py +++ b/official/nlp/xlnet/common_flags.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Common flags used in XLNet model.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function from absl import flags diff --git a/official/nlp/xlnet/data_utils.py b/official/nlp/xlnet/data_utils.py index c1dfe5e7ffb06ff8d38c11271b5758db48c4c4cb..58ffdbffc2c287064b2f98a5e04a70cc8020ff34 100644 --- a/official/nlp/xlnet/data_utils.py +++ b/official/nlp/xlnet/data_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,23 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Utilities used for data preparation.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Utilities used for data preparation.""" import collections import json import os + from absl import logging import numpy as np import tensorflow as tf - special_symbols = { "": 0, "": 1, @@ -51,10 +46,10 @@ SEG_ID_Q = 1 SEG_ID_CLS = 2 SEG_ID_PAD = 3 - OnlineMaskingConfig = collections.namedtuple("OnlineMaskingConfig", [ "sample_strategy", "max_num_tokens", "min_num_tokens", "max_num_words", - "min_num_words"]) + "min_num_words" +]) def file_based_input_fn_builder(input_file, name_to_features, batch_size, @@ -95,7 +90,6 @@ def file_based_input_fn_builder(input_file, name_to_features, batch_size, d = d.interleave( tf.data.TFRecordDataset, - sloppy=is_training, cycle_length=cycle_length) if is_training: @@ -168,8 +162,7 @@ def get_input_iterator(input_fn, strategy): # pass callable that returns a dataset. input_data = input_fn() if callable(input_data): - iterator = iter( - strategy.experimental_distribute_datasets_from_function(input_data)) + iterator = iter(strategy.distribute_datasets_from_function(input_data)) else: iterator = iter(strategy.experimental_distribute_dataset(input_data)) return iterator @@ -182,7 +175,7 @@ def get_classification_input_data(batch_size, seq_len, strategy, is_training, # When using TPU pods, we need to clone dataset across # workers and need to pass in function that returns the dataset rather # than passing dataset instance itself. - use_dataset_fn = isinstance(strategy, tf.distribute.experimental.TPUStrategy) + use_dataset_fn = isinstance(strategy, tf.distribute.TPUStrategy) if use_dataset_fn: if batch_size % strategy.num_replicas_in_sync != 0: raise ValueError( @@ -190,7 +183,7 @@ def get_classification_input_data(batch_size, seq_len, strategy, is_training, strategy.num_replicas_in_sync)) # As auto rebatching is not supported in - # `experimental_distribute_datasets_from_function()` API, which is + # `distribute_datasets_from_function()` API, which is # required when cloning dataset to multiple workers in eager mode, # we use per-replica batch size. batch_size = int(batch_size / strategy.num_replicas_in_sync) @@ -215,7 +208,7 @@ def get_squad_input_data(batch_size, seq_len, q_len, strategy, is_training, # When using TPU pods, we need to clone dataset across # workers and need to pass in function that returns the dataset rather # than passing dataset instance itself. - use_dataset_fn = isinstance(strategy, tf.distribute.experimental.TPUStrategy) + use_dataset_fn = isinstance(strategy, tf.distribute.TPUStrategy) if use_dataset_fn: if batch_size % strategy.num_replicas_in_sync != 0: raise ValueError( @@ -223,7 +216,7 @@ def get_squad_input_data(batch_size, seq_len, q_len, strategy, is_training, strategy.num_replicas_in_sync)) # As auto rebatching is not supported in - # `experimental_distribute_datasets_from_function()` API, which is + # `distribute_datasets_from_function()` API, which is # required when cloning dataset to multiple workers in eager mode, # we use per-replica batch size. batch_size = int(batch_size / strategy.num_replicas_in_sync) @@ -253,20 +246,14 @@ def get_squad_input_data(batch_size, seq_len, q_len, strategy, is_training, def _idx_pair_to_mask(beg_indices, end_indices, inputs, tgt_len, num_predict): """Turn beg and end indices into actual mask.""" non_func_mask = tf.logical_and( - tf.not_equal(inputs, SEP_ID), - tf.not_equal(inputs, CLS_ID)) - all_indices = tf.where( - non_func_mask, - tf.range(tgt_len, dtype=tf.int64), - tf.constant(-1, shape=[tgt_len], dtype=tf.int64)) + tf.not_equal(inputs, SEP_ID), tf.not_equal(inputs, CLS_ID)) + all_indices = tf.where(non_func_mask, tf.range(tgt_len, dtype=tf.int64), + tf.constant(-1, shape=[tgt_len], dtype=tf.int64)) candidate_matrix = tf.cast( - tf.logical_and( - all_indices[None, :] >= beg_indices[:, None], - all_indices[None, :] < end_indices[:, None]), - tf.float32) + tf.logical_and(all_indices[None, :] >= beg_indices[:, None], + all_indices[None, :] < end_indices[:, None]), tf.float32) cumsum_matrix = tf.reshape( - tf.cumsum(tf.reshape(candidate_matrix, [-1])), - [-1, tgt_len]) + tf.cumsum(tf.reshape(candidate_matrix, [-1])), [-1, tgt_len]) masked_matrix = tf.cast(cumsum_matrix <= num_predict, tf.float32) target_mask = tf.reduce_sum(candidate_matrix * masked_matrix, axis=0) is_masked = tf.cast(target_mask, tf.bool) @@ -274,8 +261,8 @@ def _idx_pair_to_mask(beg_indices, end_indices, inputs, tgt_len, num_predict): return is_masked, target_mask -def _word_span_mask(inputs, tgt_len, num_predict, min_num_words, - max_num_words, boundary): +def _word_span_mask(inputs, tgt_len, num_predict, min_num_words, max_num_words, + boundary): """Sample whole word spans as prediction targets.""" # Note: 1.2 is the token-to-word ratio mask_alpha = tgt_len / num_predict / 1.2 @@ -283,7 +270,7 @@ def _word_span_mask(inputs, tgt_len, num_predict, min_num_words, # Sample span lengths from a zipf distribution span_len_seq = np.arange(min_num_words, max_num_words + 1) - probs = np.array([1.0 / (i + 1) for i in span_len_seq]) + probs = np.array([1.0 / (i + 1) for i in span_len_seq]) probs /= np.sum(probs) logits = tf.constant(np.log(probs), dtype=tf.float32) @@ -302,8 +289,8 @@ def _word_span_mask(inputs, tgt_len, num_predict, min_num_words, left_ctx_len = round_to_int(left_ctx_len) right_offset = round_to_int(span_lens_float * mask_alpha) - left_ctx_len - beg_indices = (tf.cumsum(left_ctx_len) + - tf.cumsum(right_offset, exclusive=True)) + beg_indices = ( + tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) end_indices = beg_indices + span_lens # Remove out of range indices @@ -333,7 +320,7 @@ def _token_span_mask(inputs, tgt_len, num_predict, min_num_tokens, # Sample span lengths from a zipf distribution span_len_seq = np.arange(min_num_tokens, max_num_tokens + 1) - probs = np.array([1.0 / (i + 1) for i in span_len_seq]) + probs = np.array([1.0 / (i + 1) for i in span_len_seq]) probs /= np.sum(probs) logits = tf.constant(np.log(probs), dtype=tf.float32) @@ -353,8 +340,8 @@ def _token_span_mask(inputs, tgt_len, num_predict, min_num_tokens, right_offset = round_to_int(span_lens_float * mask_alpha) - left_ctx_len # Get the actual begin and end indices - beg_indices = (tf.cumsum(left_ctx_len) + - tf.cumsum(right_offset, exclusive=True)) + beg_indices = ( + tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) end_indices = beg_indices + span_lens # Remove out of range indices @@ -387,8 +374,7 @@ def _single_token_mask(inputs, tgt_len, num_predict): """Sample individual tokens as prediction targets.""" all_indices = tf.range(tgt_len, dtype=tf.int64) non_func_mask = tf.logical_and( - tf.not_equal(inputs, SEP_ID), - tf.not_equal(inputs, CLS_ID)) + tf.not_equal(inputs, SEP_ID), tf.not_equal(inputs, CLS_ID)) non_func_indices = tf.boolean_mask(all_indices, non_func_mask) masked_pos = tf.random.shuffle(non_func_indices) @@ -404,7 +390,10 @@ def _single_token_mask(inputs, tgt_len, num_predict): return is_masked, target_mask -def _online_sample_masks(inputs, tgt_len, num_predict, online_masking_config, +def _online_sample_masks(inputs, + tgt_len, + num_predict, + online_masking_config, boundary=None): """Sample target positions to predict.""" logging.info("Online sample with strategy: `%s`.", @@ -422,8 +411,7 @@ def _online_sample_masks(inputs, tgt_len, num_predict, online_masking_config, assert boundary is not None, "word span sampling requires `boundary`" return _word_span_mask(inputs, tgt_len, num_predict, online_masking_config.min_num_words, - online_masking_config.max_num_words, - boundary) + online_masking_config.max_num_words, boundary) else: raise NotImplementedError @@ -500,7 +488,7 @@ def create_pretrain_dataset(file_names, # reshape back to fixed shape example["perm_mask"] = tf.reshape(perm_mask, [seq_len, seq_len]) - example["input_k"] = tf.reshape(input_k, [seq_len]) + example["input_ids"] = tf.reshape(input_k, [seq_len]) example["input_q"] = tf.reshape(input_q, [seq_len]) # Directly use raw inputs as the target @@ -529,10 +517,11 @@ def create_pretrain_dataset(file_names, example["target"] = tf.reshape(target, [num_predict]) ##### target mask - target_mask = tf.concat( - [tf.ones([actual_num_predict], dtype=tf.float32), - tf.zeros([pad_len], dtype=tf.float32)], - axis=0) + target_mask = tf.concat([ + tf.ones([actual_num_predict], dtype=tf.float32), + tf.zeros([pad_len], dtype=tf.float32) + ], + axis=0) example["target_mask"] = tf.reshape(target_mask, [num_predict]) else: example["target"] = tf.reshape(target, [seq_len]) @@ -562,7 +551,11 @@ def create_pretrain_dataset(file_names, return dataset -def format_filename(prefix, suffix, bsz_per_host, seq_len, reuse_len=None, +def format_filename(prefix, + suffix, + bsz_per_host, + seq_len, + reuse_len=None, uncased=False): """Generates input file name pattern.""" if reuse_len is not None and reuse_len > 0: @@ -577,8 +570,8 @@ def format_filename(prefix, suffix, bsz_per_host, seq_len, reuse_len=None, else: case_str = "uncased." - file_name = "{}.seq-{}.{}{}{}{}".format( - prefix, seq_len, reuse_str, bsz_str, case_str, suffix) + file_name = "{}.seq-{}.{}{}{}{}".format(prefix, seq_len, reuse_str, bsz_str, + case_str, suffix) return file_name @@ -599,7 +592,7 @@ def get_pretrain_input_data(batch_size, # When using TPU pods, we need to clone dataset across # workers and need to pass in function that returns the dataset rather # than passing dataset instance itself. - use_dataset_fn = isinstance(strategy, tf.distribute.experimental.TPUStrategy) + use_dataset_fn = isinstance(strategy, tf.distribute.TPUStrategy) split = "train" bsz_per_host = int(batch_size / num_hosts) record_glob_base = format_filename( @@ -625,7 +618,7 @@ def get_pretrain_input_data(batch_size, strategy.num_replicas_in_sync)) # As auto rebatching is not supported in - # `experimental_distribute_datasets_from_function()` API, which is + # `distribute_datasets_from_function()` API, which is # required when cloning dataset to multiple workers in eager mode, # we use per-replica batch size. batch_size = int(batch_size / strategy.num_replicas_in_sync) @@ -718,13 +711,9 @@ def parse_files_to_dataset(parser, cycle_length = min(8, len(file_paths)) logging.info("Interleave %d files", cycle_length) - # `sloppy` mode means that the interleaving is not exact. This adds - # even more randomness to the training pipeline. dataset = dataset.apply( tf.data.experimental.parallel_interleave( - tf.data.TFRecordDataset, - sloppy=True, - cycle_length=cycle_length)) + tf.data.TFRecordDataset, cycle_length=cycle_length)) buffer_size = 2048 logging.info("Perform sample-level shuffle with size %d", buffer_size) dataset = dataset.shuffle(buffer_size=buffer_size) @@ -778,9 +767,8 @@ def _local_perm(inputs, is_masked, perm_size, seq_len, leak_ratio): index = tf.reshape(tf.transpose(index), [-1]) # non-functional tokens - non_func_tokens = tf.logical_not(tf.logical_or( - tf.equal(inputs, SEP_ID), - tf.equal(inputs, CLS_ID))) + non_func_tokens = tf.logical_not( + tf.logical_or(tf.equal(inputs, SEP_ID), tf.equal(inputs, CLS_ID))) masked_tokens = tf.logical_and(is_masked, non_func_tokens) non_masked_or_func_tokens = tf.logical_not(masked_tokens) diff --git a/official/nlp/xlnet/optimization.py b/official/nlp/xlnet/optimization.py index 0d9031647faef79c7e4f722dfeca7e3c1fd7712f..d6954ab9fb76b12e37c05b7b8da51505dc72d6cb 100644 --- a/official/nlp/xlnet/optimization.py +++ b/official/nlp/xlnet/optimization.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Functions and classes related to optimization (weight updates).""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function from absl import logging import tensorflow as tf diff --git a/official/nlp/xlnet/preprocess_classification_data.py b/official/nlp/xlnet/preprocess_classification_data.py index 9b34ffef7c7ed66a87b8386e1675e14c11b0791d..e8d42fa4e61541fed4532caffcc012edcc8254bc 100644 --- a/official/nlp/xlnet/preprocess_classification_data.py +++ b/official/nlp/xlnet/preprocess_classification_data.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,16 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Script to pre-process classification data into tfrecords.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import collections import csv import os +# Import libraries from absl import app from absl import flags from absl import logging diff --git a/official/nlp/xlnet/preprocess_pretrain_data.py b/official/nlp/xlnet/preprocess_pretrain_data.py index 9bf5367611ca656e88c969e4711334911e9cedd0..3facc98f5941320379bd75688deeb626572db52d 100644 --- a/official/nlp/xlnet/preprocess_pretrain_data.py +++ b/official/nlp/xlnet/preprocess_pretrain_data.py @@ -1,5 +1,4 @@ -# -*- coding: utf-8 -*- -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,39 +11,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Script to pre-process pre-training data into tfrecords.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +# -*- coding: utf-8 -*- +"""Script to pre-process pre-training data into tfrecords.""" import json import os import random +# Import libraries from absl import app from absl import flags -import absl.logging as _logging # pylint: disable=unused-import +from absl import logging import numpy as np - -import tensorflow.google as tf -from official.nlp.xlnet import preprocess_utils +import tensorflow.compat.v1 as tf import sentencepiece as spm +from official.nlp.xlnet import preprocess_utils + +FLAGS = flags.FLAGS special_symbols = { - "" : 0, - "" : 1, - "" : 2, - "" : 3, - "" : 4, - "" : 5, - "" : 6, - "" : 7, - "" : 8, + "": 0, + "": 1, + "": 2, + "": 3, + "": 4, + "": 5, + "": 6, + "": 7, + "": 8, } VOCAB_SIZE = 32000 @@ -92,6 +90,7 @@ def format_filename(prefix, bsz_per_host, seq_len, bi_data, suffix, def _create_data(idx, input_paths): + """Creates data.""" # Load sentence-piece model sp = spm.SentencePieceProcessor() sp.Load(FLAGS.sp_path) @@ -101,10 +100,10 @@ def _create_data(idx, input_paths): for input_path in input_paths: input_data, sent_ids = [], [] sent_id, line_cnt = True, 0 - tf.logging.info("Processing %s", input_path) + logging.info("Processing %s", input_path) for line in tf.gfile.Open(input_path): if line_cnt % 100000 == 0: - tf.logging.info("Loading line %d", line_cnt) + logging.info("Loading line %d", line_cnt) line_cnt += 1 if not line.strip(): @@ -125,7 +124,7 @@ def _create_data(idx, input_paths): sent_ids.extend([sent_id] * len(cur_sent)) sent_id = not sent_id - tf.logging.info("Finish with line %d", line_cnt) + logging.info("Finish with line %d", line_cnt) if line_cnt == 0: continue @@ -135,7 +134,7 @@ def _create_data(idx, input_paths): total_line_cnt += line_cnt input_shards.append((input_data, sent_ids)) - tf.logging.info("[Task %d] Total number line: %d", idx, total_line_cnt) + logging.info("[Task %d] Total number line: %d", idx, total_line_cnt) tfrecord_dir = os.path.join(FLAGS.save_dir, "tfrecords") @@ -145,8 +144,8 @@ def _create_data(idx, input_paths): np.random.seed(100 * FLAGS.task + FLAGS.pass_id) perm_indices = np.random.permutation(len(input_shards)) - tf.logging.info("Using perm indices %s for pass %d", - perm_indices.tolist(), FLAGS.pass_id) + logging.info("Using perm indices %s for pass %d", + perm_indices.tolist(), FLAGS.pass_id) input_data_list, sent_ids_list = [], [] prev_sent_id = None @@ -188,6 +187,7 @@ def _create_data(idx, input_paths): def create_data(_): + """Creates pretrain data.""" # Validate FLAGS assert FLAGS.bsz_per_host % FLAGS.num_core_per_host == 0 if not FLAGS.use_tpu: @@ -224,16 +224,16 @@ def create_data(_): # Interleavely split the work into FLAGS.num_task splits file_paths = sorted(tf.gfile.Glob(FLAGS.input_glob)) - tf.logging.info("Use glob: %s", FLAGS.input_glob) - tf.logging.info("Find %d files: %s", len(file_paths), file_paths) + logging.info("Use glob: %s", FLAGS.input_glob) + logging.info("Find %d files: %s", len(file_paths), file_paths) task_file_paths = file_paths[FLAGS.task::FLAGS.num_task] if not task_file_paths: - tf.logging.info("Exit: task %d has no file to process.", FLAGS.task) + logging.info("Exit: task %d has no file to process.", FLAGS.task) return - tf.logging.info("Task %d process %d files: %s", - FLAGS.task, len(task_file_paths), task_file_paths) + logging.info("Task %d process %d files: %s", + FLAGS.task, len(task_file_paths), task_file_paths) record_info = _create_data(FLAGS.task, task_file_paths) record_prefix = "record_info-{}-{}-{}".format( @@ -256,6 +256,7 @@ def create_data(_): def batchify(data, bsz_per_host, sent_ids=None): + """Creates batches.""" num_step = len(data) // bsz_per_host data = data[:bsz_per_host * num_step] data = data.reshape(bsz_per_host, num_step) @@ -273,9 +274,9 @@ def _split_a_and_b(data, sent_ids, begin_idx, tot_len, extend_target=False): data_len = data.shape[0] if begin_idx + tot_len >= data_len: - tf.logging.info("[_split_a_and_b] returns None: " - "begin_idx %d + tot_len %d >= data_len %d", - begin_idx, tot_len, data_len) + logging.info("[_split_a_and_b] returns None: " + "begin_idx %d + tot_len %d >= data_len %d", + begin_idx, tot_len, data_len) return None end_idx = begin_idx + 1 @@ -287,9 +288,9 @@ def _split_a_and_b(data, sent_ids, begin_idx, tot_len, extend_target=False): end_idx += 1 a_begin = begin_idx - if len(cut_points) == 0 or random.random() < 0.5: + if len(cut_points) == 0 or random.random() < 0.5: # pylint:disable=g-explicit-length-test label = 0 - if len(cut_points) == 0: + if len(cut_points) == 0: # pylint:disable=g-explicit-length-test a_end = end_idx else: a_end = random.choice(cut_points) @@ -324,9 +325,9 @@ def _split_a_and_b(data, sent_ids, begin_idx, tot_len, extend_target=False): if extend_target: if a_end >= data_len or b_end >= data_len: - tf.logging.info("[_split_a_and_b] returns None: " - "a_end %d or b_end %d >= data_len %d", - a_end, b_end, data_len) + logging.info("[_split_a_and_b] returns None: " + "a_end %d or b_end %d >= data_len %d", + a_end, b_end, data_len) return None a_target = data[a_begin + 1: a_end + 1] b_target = data[b_begin: b_end + 1] @@ -345,9 +346,7 @@ def _is_start_piece(piece): def _sample_mask(sp, seg, reverse=False, max_gram=5, goal_num_predict=None): - """Sample `goal_num_predict` tokens for partial prediction. - About `mask_beta` tokens are chosen in a context of `mask_alpha` tokens.""" - + """Samples `goal_num_predict` tokens for partial prediction.""" seg_len = len(seg) mask = np.array([False] * seg_len, dtype=np.bool) @@ -409,8 +408,7 @@ def _sample_mask(sp, seg, reverse=False, max_gram=5, goal_num_predict=None): def _sample_mask_ngram(sp, seg, reverse=False, max_gram=5, goal_num_predict=None): - """Sample `goal_num_predict` tokens for partial prediction. - About `mask_beta` tokens are chosen in a context of `mask_alpha` tokens.""" + """Sample `goal_num_predict` tokens for partial prediction.""" seg_len = len(seg) mask = np.array([False] * seg_len, dtype=np.bool) @@ -477,6 +475,7 @@ def _sample_mask_ngram(sp, seg, reverse=False, max_gram=5, def create_tfrecords(save_dir, basename, data, bsz_per_host, seq_len, bi_data, sp): + """Creates TFRecords.""" data, sent_ids = data[0], data[1] num_core = FLAGS.num_core_per_host @@ -499,7 +498,7 @@ def create_tfrecords(save_dir, basename, data, bsz_per_host, seq_len, else: data, sent_ids = batchify(data, bsz_per_host, sent_ids) - tf.logging.info("Raw data shape %s.", data.shape) + logging.info("Raw data shape %s.", data.shape) file_name = format_filename( prefix=basename, @@ -515,7 +514,7 @@ def create_tfrecords(save_dir, basename, data, bsz_per_host, seq_len, ) save_path = os.path.join(save_dir, file_name) record_writer = tf.python_io.TFRecordWriter(save_path) - tf.logging.info("Start writing %s.", save_path) + logging.info("Start writing %s.", save_path) num_batch = 0 reuse_len = FLAGS.reuse_len @@ -530,7 +529,7 @@ def create_tfrecords(save_dir, basename, data, bsz_per_host, seq_len, i = 0 while i + seq_len <= data_len: if num_batch % 500 == 0: - tf.logging.info("Processing batch %d", num_batch) + logging.info("Processing batch %d", num_batch) all_ok = True features = [] @@ -545,7 +544,7 @@ def create_tfrecords(save_dir, basename, data, bsz_per_host, seq_len, tot_len=seq_len - reuse_len - 3, extend_target=True) if results is None: - tf.logging.info("Break out with seq idx %d", i) + logging.info("Break out with seq idx %d", i) all_ok = False break @@ -603,7 +602,7 @@ def create_tfrecords(save_dir, basename, data, bsz_per_host, seq_len, i += reuse_len record_writer.close() - tf.logging.info("Done writing %s. Num of batches: %d", save_path, num_batch) + logging.info("Done writing %s. Num of batches: %d", save_path, num_batch) return save_path, num_batch @@ -627,6 +626,8 @@ def _convert_example(example, use_bfloat16): def parse_files_to_dataset(parser, file_names, split, num_batch, num_hosts, host_id, num_core_per_host, bsz_per_core): + """Parses files to a dataset.""" + del num_batch # list of file pathes num_files = len(file_names) num_files_per_host = num_files // num_hosts @@ -635,7 +636,7 @@ def parse_files_to_dataset(parser, file_names, split, num_batch, num_hosts, if host_id == num_hosts - 1: my_end_file_id = num_files file_paths = file_names[my_start_file_id: my_end_file_id] - tf.logging.info("Host %d handles %d files", host_id, len(file_paths)) + logging.info("Host %d handles %d files", host_id, len(file_paths)) assert split == "train" dataset = tf.data.Dataset.from_tensor_slices(file_paths) @@ -660,9 +661,7 @@ def parse_files_to_dataset(parser, file_names, split, num_batch, num_hosts, def _local_perm(inputs, targets, is_masked, perm_size, seq_len): - """ - Sample a permutation of the factorization order, and create an - attention mask accordingly. + """Samples a permutation of the factorization order, and create a mask. Args: inputs: int64 Tensor in shape [seq_len], input ids. @@ -672,6 +671,10 @@ def _local_perm(inputs, targets, is_masked, perm_size, seq_len): perm_size: the length of longest permutation. Could be set to be reuse_len. Should not be larger than reuse_len or there will be data leaks. seq_len: int, sequence length. + + Returns: + The permutation mask, new targets, target mask, and new inputs. + """ # Generate permutation indices @@ -729,7 +732,10 @@ def _local_perm(inputs, targets, is_masked, perm_size, seq_len): def get_dataset(params, num_hosts, num_core_per_host, split, file_names, num_batch, seq_len, reuse_len, perm_size, mask_alpha, mask_beta, use_bfloat16=False, num_predict=None): + """Gets the dataset.""" + del mask_alpha + del mask_beta bsz_per_core = params["batch_size"] if num_hosts > 1: host_id = params["context"].current_host @@ -824,7 +830,7 @@ def get_dataset(params, num_hosts, num_core_per_host, split, file_names, _convert_example(example, use_bfloat16) for k, v in example.items(): - tf.logging.info("%s: %s", k, v) + logging.info("%s: %s", k, v) return example @@ -858,6 +864,7 @@ def get_input_fn( num_passes=None, use_bfloat16=False, num_predict=None): + """Gets the input function.""" # Merge all record infos into a single one record_glob_base = format_filename( @@ -875,15 +882,14 @@ def get_input_fn( record_info = {"num_batch": 0, "filenames": []} tfrecord_dirs = tfrecord_dir.split(",") - tf.logging.info("Use the following tfrecord dirs: %s", tfrecord_dirs) + logging.info("Use the following tfrecord dirs: %s", tfrecord_dirs) for idx, record_dir in enumerate(tfrecord_dirs): record_glob = os.path.join(record_dir, record_glob_base) - tf.logging.info("[%d] Record glob: %s", idx, record_glob) + logging.info("[%d] Record glob: %s", idx, record_glob) record_paths = sorted(tf.gfile.Glob(record_glob)) - tf.logging.info("[%d] Num of record info path: %d", - idx, len(record_paths)) + logging.info("[%d] Num of record info path: %d", idx, len(record_paths)) cur_record_info = {"num_batch": 0, "filenames": []} @@ -893,7 +899,7 @@ def get_input_fn( fields = record_info_name.split(".")[0].split("-") pass_id = int(fields[-1]) if len(fields) == 5 and pass_id >= num_passes: - tf.logging.info("Skip pass %d: %s", pass_id, record_info_name) + logging.info("Skip pass %d: %s", pass_id, record_info_name) continue with tf.gfile.Open(record_info_path, "r") as fp: @@ -915,21 +921,19 @@ def get_input_fn( new_filenames.append(new_filename) cur_record_info["filenames"] = new_filenames - tf.logging.info("[Dir %d] Number of chosen batches: %s", - idx, cur_record_info["num_batch"]) - tf.logging.info("[Dir %d] Number of chosen files: %s", - idx, len(cur_record_info["filenames"])) - tf.logging.info(cur_record_info["filenames"]) + logging.info("[Dir %d] Number of chosen batches: %s", + idx, cur_record_info["num_batch"]) + logging.info("[Dir %d] Number of chosen files: %s", + idx, len(cur_record_info["filenames"])) + logging.info(cur_record_info["filenames"]) # add `cur_record_info` to global `record_info` record_info["num_batch"] += cur_record_info["num_batch"] record_info["filenames"] += cur_record_info["filenames"] - tf.logging.info("Total number of batches: %d", - record_info["num_batch"]) - tf.logging.info("Total number of files: %d", - len(record_info["filenames"])) - tf.logging.info(record_info["filenames"]) + logging.info("Total number of batches: %d", record_info["num_batch"]) + logging.info("Total number of files: %d", len(record_info["filenames"])) + logging.info(record_info["filenames"]) def input_fn(params): """docs.""" @@ -955,8 +959,8 @@ def get_input_fn( return input_fn, record_info -if __name__ == "__main__": - FLAGS = flags.FLAGS +def define_flags(): + """Defines relevant flags.""" flags.DEFINE_bool("use_tpu", True, help="whether to use TPUs") flags.DEFINE_integer("bsz_per_host", 32, help="batch size per host.") flags.DEFINE_integer("num_core_per_host", 8, help="num TPU cores per host.") @@ -994,5 +998,8 @@ if __name__ == "__main__": flags.DEFINE_integer("task", 0, help="The Task ID. This value is used when " "using multiple workers to identify each worker.") - tf.logging.set_verbosity(tf.logging.INFO) + +if __name__ == "__main__": + define_flags() + logging.set_verbosity(logging.INFO) app.run(create_data) diff --git a/official/nlp/xlnet/preprocess_squad_data.py b/official/nlp/xlnet/preprocess_squad_data.py index 59c8944697348f12b185399463978c170b4ee46b..e1d49565067c57611d8613a6d14e5e4bf221b1fc 100644 --- a/official/nlp/xlnet/preprocess_squad_data.py +++ b/official/nlp/xlnet/preprocess_squad_data.py @@ -1,5 +1,4 @@ -# coding=utf-8 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# coding=utf-8 """Script to pre-process SQUAD data into tfrecords.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import random +# Import libraries from absl import app from absl import flags from absl import logging diff --git a/official/nlp/xlnet/preprocess_utils.py b/official/nlp/xlnet/preprocess_utils.py index d0e8ae8398111ae73185a4594f1ab9d7dac7dd38..5c714a0c1fdd3a7cddd9c0a63fc09c80bc08627e 100644 --- a/official/nlp/xlnet/preprocess_utils.py +++ b/official/nlp/xlnet/preprocess_utils.py @@ -1,5 +1,4 @@ -# coding=utf-8 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,16 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# coding=utf-8 """Utilities for pre-processing.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import unicodedata import six - SPIECE_UNDERLINE = '▁' @@ -40,7 +36,7 @@ def printable_text(text): elif six.PY2: if isinstance(text, str): return text - elif isinstance(text, unicode): + elif isinstance(text, unicode): # pylint: disable=undefined-variable return text.encode('utf-8') else: raise ValueError('Unsupported string type: %s' % (type(text))) @@ -85,7 +81,7 @@ def encode_pieces(sp_model, text, return_unicode=True, sample=False): """Encodes pieces.""" # return_unicode is used only for py2 - if six.PY2 and isinstance(text, unicode): + if six.PY2 and isinstance(text, unicode): # pylint: disable=undefined-variable text = text.encode('utf-8') if not sample: @@ -95,8 +91,8 @@ def encode_pieces(sp_model, text, return_unicode=True, sample=False): new_pieces = [] for piece in pieces: if len(piece) > 1 and piece[-1] == ',' and piece[-2].isdigit(): - cur_pieces = sp_model.EncodeAsPieces( - piece[:-1].replace(SPIECE_UNDERLINE, '')) + cur_pieces = sp_model.EncodeAsPieces(piece[:-1].replace( + SPIECE_UNDERLINE, '')) if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE: if len(cur_pieces[0]) == 1: cur_pieces = cur_pieces[1:] diff --git a/official/nlp/xlnet/run_classifier.py b/official/nlp/xlnet/run_classifier.py index 79a27f244d87617ea3cb34913154e7725cc94b1f..f2681e0ce8a714cb4f784430a86f076bdc356676 100644 --- a/official/nlp/xlnet/run_classifier.py +++ b/official/nlp/xlnet/run_classifier.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""XLNet classification finetuning runner in tf2.0.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""XLNet classification finetuning runner in tf2.0.""" import functools +# Import libraries from absl import app from absl import flags from absl import logging @@ -27,13 +23,13 @@ from absl import logging import numpy as np import tensorflow as tf # pylint: disable=unused-import +from official.common import distribute_utils from official.nlp.xlnet import common_flags from official.nlp.xlnet import data_utils from official.nlp.xlnet import optimization from official.nlp.xlnet import training_utils from official.nlp.xlnet import xlnet_config from official.nlp.xlnet import xlnet_modeling as modeling -from official.utils.misc import tpu_lib flags.DEFINE_integer("n_class", default=2, help="Number of classes.") flags.DEFINE_string( @@ -134,14 +130,9 @@ def get_metric_fn(): def main(unused_argv): del unused_argv - if FLAGS.strategy_type == "mirror": - strategy = tf.distribute.MirroredStrategy() - elif FLAGS.strategy_type == "tpu": - cluster_resolver = tpu_lib.tpu_initialize(FLAGS.tpu) - strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver) - else: - raise ValueError("The distribution strategy type is not supported: %s" % - FLAGS.strategy_type) + strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=FLAGS.strategy_type, + tpu_address=FLAGS.tpu) if strategy: logging.info("***** Number of cores used : %d", strategy.num_replicas_in_sync) diff --git a/official/nlp/xlnet/run_pretrain.py b/official/nlp/xlnet/run_pretrain.py index e136f4d12ab01d0b48c0d0765b8e3e8bbf8eedd7..80ab0bd4d1c500c92e2d97106fb3e3eab0d0b33e 100644 --- a/official/nlp/xlnet/run_pretrain.py +++ b/official/nlp/xlnet/run_pretrain.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,29 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""XLNet classification finetuning runner in tf2.0.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""XLNet pretraining runner in tf2.0.""" import functools import os +# Import libraries from absl import app from absl import flags from absl import logging import tensorflow as tf # pylint: disable=unused-import +from official.common import distribute_utils from official.nlp.xlnet import common_flags from official.nlp.xlnet import data_utils from official.nlp.xlnet import optimization from official.nlp.xlnet import training_utils from official.nlp.xlnet import xlnet_config from official.nlp.xlnet import xlnet_modeling as modeling -from official.utils.misc import tpu_lib flags.DEFINE_integer( "num_predict", @@ -76,17 +72,11 @@ def get_pretrainxlnet_model(model_config, run_config): def main(unused_argv): del unused_argv num_hosts = 1 - if FLAGS.strategy_type == "mirror": - strategy = tf.distribute.MirroredStrategy() - elif FLAGS.strategy_type == "tpu": - cluster_resolver = tpu_lib.tpu_initialize(FLAGS.tpu) - strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver) - topology = FLAGS.tpu_topology.split("x") - total_num_core = 2 * int(topology[0]) * int(topology[1]) - num_hosts = total_num_core // FLAGS.num_core_per_host - else: - raise ValueError("The distribution strategy type is not supported: %s" % - FLAGS.strategy_type) + strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=FLAGS.strategy_type, + tpu_address=FLAGS.tpu) + if FLAGS.strategy_type == "tpu": + num_hosts = strategy.extended.num_hosts if strategy: logging.info("***** Number of cores used : %d", strategy.num_replicas_in_sync) diff --git a/official/nlp/xlnet/run_squad.py b/official/nlp/xlnet/run_squad.py index 013893f1a289bb446dd67f33d9178903f706b2c8..a6126295ec1bd571abf04b90e2713eb43d1df002 100644 --- a/official/nlp/xlnet/run_squad.py +++ b/official/nlp/xlnet/run_squad.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,19 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""XLNet SQUAD finetuning runner in tf2.0.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""XLNet SQUAD finetuning runner in tf2.0.""" import functools import json import os import pickle +# Import libraries from absl import app from absl import flags from absl import logging @@ -31,6 +27,7 @@ from absl import logging import tensorflow as tf # pylint: disable=unused-import import sentencepiece as spm +from official.common import distribute_utils from official.nlp.xlnet import common_flags from official.nlp.xlnet import data_utils from official.nlp.xlnet import optimization @@ -38,7 +35,6 @@ from official.nlp.xlnet import squad_utils from official.nlp.xlnet import training_utils from official.nlp.xlnet import xlnet_config from official.nlp.xlnet import xlnet_modeling as modeling -from official.utils.misc import tpu_lib flags.DEFINE_string( "test_feature_path", default=None, help="Path to feature of test set.") @@ -216,14 +212,9 @@ def get_qaxlnet_model(model_config, run_config, start_n_top, end_n_top): def main(unused_argv): del unused_argv - if FLAGS.strategy_type == "mirror": - strategy = tf.distribute.MirroredStrategy() - elif FLAGS.strategy_type == "tpu": - cluster_resolver = tpu_lib.tpu_initialize(FLAGS.tpu) - strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver) - else: - raise ValueError("The distribution strategy type is not supported: %s" % - FLAGS.strategy_type) + strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=FLAGS.strategy_type, + tpu_address=FLAGS.tpu) if strategy: logging.info("***** Number of cores used : %d", strategy.num_replicas_in_sync) diff --git a/official/nlp/xlnet/squad_utils.py b/official/nlp/xlnet/squad_utils.py index efab6da6f80658213317e13dee86b09b2cb94c63..e8e6bad085448106028df5245a12a68a543fa50b 100644 --- a/official/nlp/xlnet/squad_utils.py +++ b/official/nlp/xlnet/squad_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + # coding=utf-8 """Utilities used in SQUAD task.""" from __future__ import absolute_import @@ -656,7 +656,7 @@ def convert_examples_to_features(examples, sp_model, max_seq_length, doc_stride, assert tok_start_position <= tok_end_position def _piece_to_id(x): - if six.PY2 and isinstance(x, unicode): + if six.PY2 and isinstance(x, unicode): # pylint: disable=undefined-variable x = x.encode("utf-8") return sp_model.PieceToId(x) diff --git a/official/nlp/xlnet/training_utils.py b/official/nlp/xlnet/training_utils.py index 293e4633d8f4ae0f00fc5fbabb3a3996827ced81..45afaa76d621046d37cb39d5c4acdd509f98c3da 100644 --- a/official/nlp/xlnet/training_utils.py +++ b/official/nlp/xlnet/training_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,26 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """XLNet training utils.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function import os import re +from typing import Any, Callable, Dict, Optional, Text from absl import logging - -# pytype: disable=attribute-error -# pylint: disable=g-bare-generic,unused-import import tensorflow as tf -from typing import Any, Callable, Dict, Text, Optional from official.nlp.bert import model_training_utils from official.nlp.xlnet import data_utils -from official.nlp.xlnet import xlnet_modeling as modeling + +# pytype: disable=attribute-error +# pylint: disable=g-bare-generic,unused-import _MIN_SUMMARY_STEPS = 10 @@ -213,8 +208,8 @@ def train( if input_meta_data["mem_len"] > 0: for _ in range(input_meta_data["n_layer"]): zeros = tf.zeros([ - input_meta_data["mem_len"], input_meta_data["batch_size_per_core"], + input_meta_data["mem_len"], input_meta_data["d_model"] ], dtype=tf.float32) diff --git a/official/nlp/xlnet/xlnet_config.py b/official/nlp/xlnet/xlnet_config.py index 7852eadf469476b4772533dce563366cd3478317..c0f51955b57289884fc522cc02c3d3db6404bf76 100644 --- a/official/nlp/xlnet/xlnet_config.py +++ b/official/nlp/xlnet/xlnet_config.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Utility functions used in XLNet model.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Utility functions used in XLNet model.""" import json import os @@ -38,12 +33,13 @@ def create_run_config(is_training, is_finetune, flags): clamp_len=flags.clamp_len) if not is_finetune: - kwargs.update(dict( - mem_len=flags.mem_len, - reuse_len=flags.reuse_len, - bi_data=flags.bi_data, - clamp_len=flags.clamp_len, - same_length=flags.same_length)) + kwargs.update( + dict( + mem_len=flags.mem_len, + reuse_len=flags.reuse_len, + bi_data=flags.bi_data, + clamp_len=flags.clamp_len, + same_length=flags.same_length)) return RunConfig(**kwargs) @@ -80,8 +76,10 @@ class XLNetConfig(object): assert FLAGS is not None or json_path is not None or args_dict is not None - self.keys = ['n_layer', 'd_model', 'n_head', 'd_head', 'd_inner', - 'ff_activation', 'untie_r', 'n_token'] + self.keys = [ + 'n_layer', 'd_model', 'n_head', 'd_head', 'd_inner', 'ff_activation', + 'untie_r', 'n_token' + ] if FLAGS is not None: self.init_from_flags(FLAGS) @@ -152,17 +150,17 @@ class RunConfig(object): init_method: str, the initialization scheme, either "normal" or "uniform". init_range: float, initialize the parameters with a uniform distribution in [-init_range, init_range]. Only effective when init="uniform". - init_std: float, initialize the parameters with a normal distribution - with mean 0 and stddev init_std. Only effective when init="normal". + init_std: float, initialize the parameters with a normal distribution with + mean 0 and stddev init_std. Only effective when init="normal". mem_len: int, the number of tokens to cache. - reuse_len: int, the number of tokens in the currect batch to be cached - and reused in the future. - bi_data: bool, whether to use bidirectional input pipeline. - Usually set to True during pretraining and False during finetuning. - clamp_len: int, clamp all relative distances larger than clamp_len. - -1 means no clamping. - same_length: bool, whether to use the same attention length - for each token. + reuse_len: int, the number of tokens in the currect batch to be cached and + reused in the future. + bi_data: bool, whether to use bidirectional input pipeline. Usually set to + True during pretraining and False during finetuning. + clamp_len: int, clamp all relative distances larger than clamp_len. -1 + means no clamping. + same_length: bool, whether to use the same attention length for each + token. use_cls_mask: bool, whether to introduce cls mask. """ diff --git a/official/nlp/xlnet/xlnet_modeling.py b/official/nlp/xlnet/xlnet_modeling.py index 3e16af8e9930ba4dabb8e92743769cf1ebb48585..b48aff4e795444c176cc862dcb98b01e76c39c7d 100644 --- a/official/nlp/xlnet/xlnet_modeling.py +++ b/official/nlp/xlnet/xlnet_modeling.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,35 +11,32 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Keras layers of XLNet model in TF 2.0.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Keras layers of XLNet model in TF 2.0.""" import copy -import numpy as np +import warnings import tensorflow as tf + +from official.nlp.modeling import networks from official.nlp.xlnet import data_utils def gelu(x): - """Gaussian Error Linear Unit. + return tf.keras.activations.gelu(x, approximate=True) - This is a smoother version of the RELU. - Original paper: https://arxiv.org/abs/1606.08415 - Args: - x: float Tensor to perform activation. - Returns: - `x` with the GELU activation applied. - """ - cdf = 0.5 * (1.0 + tf.tanh( - (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) - return x * cdf +def _get_initializer(flags): + """Get variable initializer.""" + if flags.init_method == "uniform": + initializer = tf.keras.initializers.RandomUniform( + minval=-flags.init_range, maxval=flags.init_range) + elif flags.init_method == "normal": + initializer = tf.keras.initializers.RandomNormal(stddev=flags.init_std) + else: + raise ValueError("Initializer {} not supported".format(flags.init_method)) + return initializer def rel_shift(x, klen=-1): @@ -54,18 +51,6 @@ def rel_shift(x, klen=-1): return x -def _get_initializer(flags): - """Get variable intializer.""" - if flags.init_method == 'uniform': - initializer = tf.keras.initializers.RandomUniform( - minval=-flags.init_range, maxval=flags.init_range) - elif flags.init_method == 'normal': - initializer = tf.keras.initializers.RandomNormal(stddev=flags.init_std) - else: - raise ValueError('Initializer {} not supported'.format(flags.init_method)) - return initializer - - def _create_mask(qlen, mlen, dtype=tf.float32, same_length=False): """Creates attention mask when single-side context allowed only.""" attn_mask = tf.ones([qlen, qlen], dtype=dtype) @@ -102,27 +87,49 @@ def is_special_none_tensor(tensor): return tensor.shape.ndims == 0 and tensor.dtype == tf.int32 -class PositionalEmbedding(tf.keras.layers.Layer): - """Generates relative positional embeddings used in Transformer-XL and XLNet.""" +@tf.keras.utils.register_keras_serializable(package="Text") +class RelativePositionEncoding(tf.keras.layers.Layer): + """Creates a relative positional encoding. - def __init__(self, dim, **kwargs): - super(PositionalEmbedding, self).__init__(**kwargs) - self.dim = dim + This layer creates a relative positional encoding as described in + "Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context" + (https://arxiv.org/abs/1901.02860). - def build(self, unused_input_shapes): - """Constructs inversed frequency vector for positional embedding layer.""" - self.inv_freq = 1.0 / (10000.0**(tf.range(0, self.dim, 2.0) / self.dim)) - super(PositionalEmbedding, self).build(unused_input_shapes) + Rather than an absolute position embedding as in Transformer, this + formulation represents position as the relative distance between tokens using + sinusoidal positional embeddings. - def call(self, pos_seq, batch_size): - """Implements call() for the layer.""" - sinusoid_inp = tf.einsum('i,d->id', pos_seq, self.inv_freq) - pos_emb = tf.concat([tf.sin(sinusoid_inp), tf.cos(sinusoid_inp)], -1) + Note: This layer is currently experimental. + + Attributes: + hidden_size: The dimensionality of the input embeddings. + """ + + def __init__(self, hidden_size, **kwargs): + super(RelativePositionEncoding, self).__init__(**kwargs) + self._hidden_size = hidden_size + self._inv_freq = 1.0 / (10000.0**( + tf.range(0, self._hidden_size, 2.0) / self._hidden_size)) + + def call(self, pos_seq, batch_size=None): + """Implements call() for the layer. + + Args: + pos_seq: A 1-D `Tensor` + batch_size: The optionally provided batch size that tiles the relative + positional encoding. + + Returns: + The relative positional encoding of shape: + [len(pos_seq), batch_size, hidden_size] if batch_size is provided, else + [len(pos_seq), 1, hidden_size]. + """ + sinusoid_input = tf.einsum("i,d->id", pos_seq, self._inv_freq) + pos_emb = tf.concat([tf.sin(sinusoid_input), tf.cos(sinusoid_input)], -1) pos_emb = pos_emb[:, None, :] if batch_size is not None: pos_emb = tf.tile(pos_emb, [1, batch_size, 1]) - return pos_emb @@ -147,17 +154,17 @@ class RelativeAttention(tf.keras.layers.Layer): """Implements call() for the layer.""" # content based attention score - ac = tf.einsum('ibnd,jbnd->ijbn', q_head + r_w_bias, k_head_h) + ac = tf.einsum("ibnd,jbnd->ijbn", q_head + r_w_bias, k_head_h) # position based attention score - bd = tf.einsum('ibnd,jbnd->ijbn', q_head + r_r_bias, k_head_r) + bd = tf.einsum("ibnd,jbnd->ijbn", q_head + r_r_bias, k_head_r) bd = rel_shift(bd, klen=tf.shape(ac)[1]) # segment-based attention score if seg_mat is None: ef = 0 else: - ef = tf.einsum('ibnd,snd->isbn', q_head + r_s_bias, seg_embed) + ef = tf.einsum("ibnd,snd->isbn", q_head + r_s_bias, seg_embed) tgt_shape = tf.shape(bd) ef = tf.where( tf.broadcast_to(tf.expand_dims(seg_mat, 3), tgt_shape), @@ -174,7 +181,7 @@ class RelativeAttention(tf.keras.layers.Layer): attn_prob = self.attention_probs_dropout(attn_prob) # attention output - attn_vec = tf.einsum('ijbn,jbnd->ibnd', attn_prob, v_head_h) + attn_vec = tf.einsum("ijbn,jbnd->ibnd", attn_prob, v_head_h) return attn_vec @@ -193,29 +200,29 @@ class PositionwiseFF(tf.keras.layers.Layer): def build(self, unused_input_shapes): """Implements build() for the layer.""" - if self.activation_type == 'relu': + if self.activation_type == "relu": activation = tf.nn.relu - elif self.activation_type == 'gelu': + elif self.activation_type == "gelu": activation = gelu else: - raise (ValueError('Unsupported activation type {}'.format( + raise (ValueError("Unsupported activation type {}".format( self.activation_type))) self.inner_projection_layer = ( tf.keras.layers.Dense( units=self.d_inner, activation=activation, kernel_initializer=self.kernel_initializer, - name='layer_1')) + name="layer_1")) self.output_projection_layer = ( tf.keras.layers.Dense( units=self.d_model, kernel_initializer=self.kernel_initializer, - name='layer_2')) + name="layer_2")) self.output_dropout = tf.keras.layers.Dropout( - rate=self.dropout, name='drop_2') + rate=self.dropout, name="drop_2") self.output_layer_norm = ( tf.keras.layers.LayerNormalization( - name='LayerNorm', axis=-1, epsilon=1e-12)) + name="LayerNorm", axis=-1, epsilon=1e-12)) super(PositionwiseFF, self).build(unused_input_shapes) def call(self, inp): @@ -240,7 +247,7 @@ class EmbeddingLookup(tf.keras.layers.Layer): def build(self, unused_input_shapes): """Implements build() for the layer.""" self.lookup_table = self.add_weight( - 'lookup_table', + "lookup_table", shape=[self.n_token, self.d_embed], initializer=self.initializer, dtype=self.dtype) @@ -269,22 +276,22 @@ class RelativeMultiheadAttention(tf.keras.layers.Layer): self.scale = 1.0 / (self.d_head**0.5) self.output_layer_norm = tf.keras.layers.LayerNormalization( - name='LayerNorm', axis=-1, epsilon=1e-12) + name="LayerNorm", axis=-1, epsilon=1e-12) self.kh_projection_layer = self.add_weight( - 'k/kernel', + "k/kernel", shape=[self.d_model, self.n_head, self.d_head], initializer=self.initializer) self.vh_projection_layer = self.add_weight( - 'v/kernel', + "v/kernel", shape=[self.d_model, self.n_head, self.d_head], initializer=self.initializer) self.kr_projection_layer = self.add_weight( - 'r/kernel', + "r/kernel", shape=[self.d_model, self.n_head, self.d_head], initializer=self.initializer) self.qh_projection_layer = self.add_weight( - 'q/kernel', + "q/kernel", shape=[self.d_model, self.n_head, self.d_head], initializer=self.initializer) @@ -292,7 +299,7 @@ class RelativeMultiheadAttention(tf.keras.layers.Layer): dropout_att=self.dropout_att, scale=self.scale) self.proj_o = self.add_weight( - 'o/kernel', + "o/kernel", shape=[self.d_model, self.n_head, self.d_head], initializer=self.initializer) @@ -310,12 +317,12 @@ class RelativeMultiheadAttention(tf.keras.layers.Layer): cat = h # content heads - q_head_h = tf.einsum('ibh,hnd->ibnd', h, self.qh_projection_layer) - k_head_h = tf.einsum('ibh,hnd->ibnd', cat, self.kh_projection_layer) - v_head_h = tf.einsum('ibh,hnd->ibnd', cat, self.vh_projection_layer) + q_head_h = tf.einsum("ibh,hnd->ibnd", h, self.qh_projection_layer) + k_head_h = tf.einsum("ibh,hnd->ibnd", cat, self.kh_projection_layer) + v_head_h = tf.einsum("ibh,hnd->ibnd", cat, self.vh_projection_layer) # positional heads - k_head_r = tf.einsum('ibh,hnd->ibnd', r, self.kr_projection_layer) + k_head_r = tf.einsum("ibh,hnd->ibnd", r, self.kr_projection_layer) # core attention ops attn_vec_h = self.relative_attention_layer(q_head_h, k_head_h, v_head_h, @@ -324,21 +331,21 @@ class RelativeMultiheadAttention(tf.keras.layers.Layer): attn_mask_h) # post processing - output_h = tf.einsum('ibnd,hnd->ibh', attn_vec_h, self.proj_o) + output_h = tf.einsum("ibnd,hnd->ibh", attn_vec_h, self.proj_o) output_h = self.attention_dropout(output_h) output_h = self.output_layer_norm(output_h + h) output_g = None if g is not None: # enable two-stream attention # g-stream - q_head_g = tf.einsum('ibh,hnd->ibnd', g, self.qh_projection_layer) + q_head_g = tf.einsum("ibh,hnd->ibnd", g, self.qh_projection_layer) if target_mapping is not None: - q_head_g = tf.einsum('mbnd,mlb->lbnd', q_head_g, target_mapping) + q_head_g = tf.einsum("mbnd,mlb->lbnd", q_head_g, target_mapping) attn_vec_g = self.relative_attention_layer(q_head_g, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat, r_w_bias, r_r_bias, r_s_bias, attn_mask_g) - attn_vec_g = tf.einsum('lbnd,mlb->mbnd', attn_vec_g, target_mapping) + attn_vec_g = tf.einsum("lbnd,mlb->mbnd", attn_vec_g, target_mapping) else: attn_vec_g = self.relative_attention_layer(q_head_g, k_head_h, v_head_h, @@ -347,7 +354,7 @@ class RelativeMultiheadAttention(tf.keras.layers.Layer): attn_mask_g) # post processing - output_g = tf.einsum('ibnd,hnd->ibh', attn_vec_g, self.proj_o) + output_g = tf.einsum("ibnd,hnd->ibh", attn_vec_g, self.proj_o) output_g = self.attention_dropout(output_g) output_g = self.output_layer_norm(output_g + g) @@ -376,7 +383,7 @@ class TransformerXLModel(tf.keras.layers.Layer): untie_r=False, use_tpu=True, reuse_len=None, - ff_activation='relu', + ff_activation="relu", use_cls_mask=False, **kwargs): """Initializes TransformerXLModel. @@ -410,6 +417,9 @@ class TransformerXLModel(tf.keras.layers.Layer): """ super(TransformerXLModel, self).__init__(**kwargs) + warnings.warn( + "`TransformerXLModel` is deprecated, please use `XLNetBase` instead", + DeprecationWarning, stacklevel=2) self.n_token = n_token self.initializer = initializer @@ -441,7 +451,7 @@ class TransformerXLModel(tf.keras.layers.Layer): d_embed=self.d_model, initializer=self.initializer, dtype=self.tf_float, - name='word_embedding') + name="word_embedding") self.h_dropout = tf.keras.layers.Dropout(rate=self.dropout) self.g_dropout = tf.keras.layers.Dropout(rate=self.dropout) @@ -449,52 +459,52 @@ class TransformerXLModel(tf.keras.layers.Layer): if self.untie_r: self.r_w_bias = ( self.add_weight( - 'r_w_bias', + "r_w_bias", shape=[self.n_layer, self.n_head, self.d_head], dtype=self.tf_float, initializer=self.initializer)) self.r_r_bias = ( self.add_weight( - 'r_r_bias', + "r_r_bias", shape=[self.n_layer, self.n_head, self.d_head], dtype=self.tf_float, initializer=self.initializer)) self.r_s_bias = ( self.add_weight( - 'r_s_bias', + "r_s_bias", shape=[self.n_layer, self.n_head, self.d_head], dtype=self.tf_float, initializer=self.initializer)) else: self.r_w_bias = ( self.add_weight( - 'r_w_bias', + "r_w_bias", shape=[self.n_head, self.d_head], dtype=self.tf_float, initializer=self.initializer)) self.r_r_bias = ( self.add_weight( - 'r_r_bias', + "r_r_bias", shape=[self.n_head, self.d_head], dtype=self.tf_float, initializer=self.initializer)) self.r_s_bias = ( self.add_weight( - 'r_s_bias', [self.n_head, self.d_head], + "r_s_bias", [self.n_head, self.d_head], dtype=self.tf_float, initializer=self.initializer)) self.seg_embed = self.add_weight( - 'seg_embed', [self.n_layer, 2, self.n_head, self.d_head], + "seg_embed", [self.n_layer, 2, self.n_head, self.d_head], dtype=self.tf_float, initializer=self.initializer) self.mask_emb = self.add_weight( - 'mask_emb/mask_emb', shape=[1, 1, self.d_model], dtype=self.tf_float) + "mask_emb/mask_emb", shape=[1, 1, self.d_model], dtype=self.tf_float) self.emb_dropout = tf.keras.layers.Dropout(rate=self.dropout) - self.fwd_position_embedding = PositionalEmbedding(self.d_model) - self.bwd_position_embedding = PositionalEmbedding(self.d_model) + self.fwd_position_embedding = RelativePositionEncoding(self.d_model) + self.bwd_position_embedding = RelativePositionEncoding(self.d_model) self.rel_multihead_layers = [] self.h_positionwise_ffn_layers = [] @@ -507,7 +517,7 @@ class TransformerXLModel(tf.keras.layers.Layer): d_head=self.d_head, dropout_att=self.dropout_att, kernel_initializer=self.initializer, - name='layer_%d/rel_attn' % (i))) + name="layer_%d/rel_attn" % (i))) self.h_positionwise_ffn_layers.append( PositionwiseFF( d_model=self.d_model, @@ -515,7 +525,7 @@ class TransformerXLModel(tf.keras.layers.Layer): dropout=self.dropout, kernel_initializer=self.initializer, activation_type=self.ff_activation, - name='layer_%d/ff' % (i))) + name="layer_%d/ff" % (i))) self.output_dropout = tf.keras.layers.Dropout(rate=self.dropout) @@ -533,25 +543,25 @@ class TransformerXLModel(tf.keras.layers.Layer): # Uses dict to feed inputs into call() in order to keep mems as a python # list. inputs = { - 'inp_k': inp_k, - 'seg_id': seg_id, - 'input_mask': input_mask, - 'mems': mems, - 'perm_mask': perm_mask, - 'target_mapping': target_mapping, - 'inp_q': inp_q + "inp_k": inp_k, + "seg_id": seg_id, + "input_mask": input_mask, + "mems": mems, + "perm_mask": perm_mask, + "target_mapping": target_mapping, + "inp_q": inp_q } return super(TransformerXLModel, self).__call__(inputs, **kwargs) def call(self, inputs): """Implements call() for the layer.""" - inp_k = inputs['inp_k'] - seg_id = inputs['seg_id'] - input_mask = inputs['input_mask'] - mems = inputs['mems'] - perm_mask = inputs['perm_mask'] - target_mapping = inputs['target_mapping'] - inp_q = inputs['inp_q'] + inp_k = inputs["inp_k"] + seg_id = inputs["seg_id"] + input_mask = inputs["input_mask"] + mems = inputs["mems"] + perm_mask = inputs["perm_mask"] + target_mapping = inputs["target_mapping"] + inp_q = inputs["inp_q"] new_mems = [] @@ -564,14 +574,14 @@ class TransformerXLModel(tf.keras.layers.Layer): ##### Attention mask # causal attention mask - if self.attn_type == 'uni': + if self.attn_type == "uni": attn_mask = _create_mask(qlen, mlen, self.tf_float, self.same_length) # pylint: enable=protected-access attn_mask = attn_mask[:, :, None, None] - elif self.attn_type == 'bi': + elif self.attn_type == "bi": attn_mask = None else: - raise ValueError('Unsupported attention type: {}'.format(self.attn_type)) + raise ValueError("Unsupported attention type: {}".format(self.attn_type)) # data mask: input mask & perm mask if input_mask is not None and perm_mask is not None: @@ -648,12 +658,12 @@ class TransformerXLModel(tf.keras.layers.Layer): if dtype is not None and dtype != tf.float32: freq_seq = tf.cast(freq_seq, dtype=self.dtype) - if self.attn_type == 'bi': + if self.attn_type == "bi": beg, end = klen, -qlen - elif self.attn_type == 'uni': + elif self.attn_type == "uni": beg, end = klen, -1 else: - raise ValueError('Unknown `attn_type` {}.'.format(self.attn_type)) + raise ValueError("Unknown `attn_type` {}.".format(self.attn_type)) if self.bi_data: fwd_pos_seq = tf.range(beg, end, -1.0) @@ -739,79 +749,82 @@ class PretrainingXLNetModel(tf.keras.Model): """ - def __init__(self, use_proj, xlnet_config, run_config, **kwargs): + def __init__(self, use_proj, xlnet_config, run_config, use_legacy_mask=True, + **kwargs): super(PretrainingXLNetModel, self).__init__(**kwargs) self.run_config = run_config self.initializer = _get_initializer(run_config) self.xlnet_config = copy.deepcopy(xlnet_config) + self._use_legacy_mask = use_legacy_mask - self.transformerxl_model = TransformerXLModel( - n_token=self.xlnet_config.n_token, + self.xlnet_model = networks.XLNetBase( + vocab_size=self.xlnet_config.n_token, initializer=self.initializer, - attn_type='bi', - n_layer=self.xlnet_config.n_layer, - d_model=self.xlnet_config.d_model, - n_head=self.xlnet_config.n_head, - d_head=self.xlnet_config.d_head, - d_inner=self.xlnet_config.d_inner, - ff_activation=self.xlnet_config.ff_activation, - untie_r=self.xlnet_config.untie_r, - is_training=self.run_config.is_training, - use_tpu=self.run_config.use_tpu, - dropout=self.run_config.dropout, - dropout_att=self.run_config.dropout_att, - mem_len=self.run_config.mem_len, - reuse_len=self.run_config.reuse_len, + attention_type="bi", + num_layers=self.xlnet_config.n_layer, + hidden_size=self.xlnet_config.d_model, + num_attention_heads=self.xlnet_config.n_head, + head_size=self.xlnet_config.d_head, + inner_size=self.xlnet_config.d_inner, + two_stream=True, + tie_attention_biases=not self.xlnet_config.untie_r, + inner_activation=self.xlnet_config.ff_activation, + dropout_rate=self.run_config.dropout, + attention_dropout_rate=self.run_config.dropout_att, + memory_length=self.run_config.mem_len, + reuse_length=self.run_config.reuse_len, bi_data=self.run_config.bi_data, - clamp_len=self.run_config.clamp_len, - same_length=self.run_config.same_length, + clamp_length=self.run_config.clamp_len, use_cls_mask=self.run_config.use_cls_mask, - name='transformer') + name="xlnet_model") + self.lmloss_layer = LMLossLayer( - n_token=self.xlnet_config.n_token, - d_model=self.xlnet_config.d_model, + vocab_size=self.xlnet_config.n_token, + hidden_size=self.xlnet_config.d_model, initializer=self.initializer, tie_weight=True, bi_data=self.run_config.bi_data, - use_tpu=self.run_config.use_tpu, + use_one_hot=self.run_config.use_tpu, use_proj=use_proj, - name='lm_loss') + name="lm_loss") def call(self, features): """Implements call() for the layer.""" - input_ids = tf.transpose(features['input_k'], [1, 0]) - inp_q = tf.transpose(features['input_q'], [1, 0]) - - seg_ids = tf.transpose(features['seg_id'], [1, 0]) - - perm_mask = tf.transpose(features['perm_mask'], [1, 2, 0]) - - target_mapping = tf.transpose(features['target_mapping'], [1, 2, 0]) + input_ids = features["input_ids"] + masked_tokens = features["input_q"] + seg_ids = features["seg_id"] + if self._use_legacy_mask: + # Legacy input mask assumes `real` values are 0 and `padding` + # values are 1. + perm_mask = 1 - features["perm_mask"] + else: + perm_mask = features["perm_mask"] + target_mapping = features["target_mapping"] # target for LM loss - target = tf.transpose(features['target'], [1, 0]) + target = features["target"] # target mask for LM loss - tgt_mask = tf.transpose(features['target_mask'], [1, 0]) + tgt_mask = features["target_mask"] - mems = features.get('mems', None) + mems = features.get("mems", None) - transformerxl_output, self.new_mems, self.lookup_table = self.transformerxl_model( - input_ids, - seg_id=seg_ids, + model_output, self.new_mems = self.xlnet_model( + input_ids=input_ids, + segment_ids=seg_ids, input_mask=None, - mems=mems, - perm_mask=perm_mask, + state=mems, + permutation_mask=perm_mask, target_mapping=target_mapping, - inp_q=inp_q) + masked_tokens=masked_tokens) lm_loss, _ = self.lmloss_layer( - hidden=transformerxl_output, + hidden=model_output, target=target, - lookup_table=self.transformerxl_model.embedding_lookup.lookup_table, + lookup_table=self.xlnet_model.get_embedding_lookup_table(), target_mask=tgt_mask) self.add_loss(lm_loss) - return self.new_mems, transformerxl_output + return self.new_mems, model_output class ClassificationXLNetModel(tf.keras.Model): @@ -821,64 +834,73 @@ class ClassificationXLNetModel(tf.keras.Model): """ - def __init__(self, xlnet_config, run_config, n_class, summary_type, **kwargs): + def __init__(self, xlnet_config, run_config, n_class, summary_type, + use_legacy_mask=True, **kwargs): super(ClassificationXLNetModel, self).__init__(**kwargs) + warnings.warn( + "`ClassificationXLNetModel` is deprecated, please use `XLNetClassifier`" + "instead.", DeprecationWarning, stacklevel=2) self.run_config = run_config self.initializer = _get_initializer(run_config) self.xlnet_config = copy.deepcopy(xlnet_config) + self._use_legacy_mask = use_legacy_mask - self.transformerxl_model = TransformerXLModel( - n_token=self.xlnet_config.n_token, + self.xlnet_model = networks.XLNetBase( + vocab_size=self.xlnet_config.n_token, initializer=self.initializer, - attn_type='bi', - n_layer=self.xlnet_config.n_layer, - d_model=self.xlnet_config.d_model, - n_head=self.xlnet_config.n_head, - d_head=self.xlnet_config.d_head, - d_inner=self.xlnet_config.d_inner, - ff_activation=self.xlnet_config.ff_activation, - untie_r=self.xlnet_config.untie_r, - is_training=self.run_config.is_training, - use_tpu=self.run_config.use_tpu, - dropout=self.run_config.dropout, - dropout_att=self.run_config.dropout_att, - mem_len=self.run_config.mem_len, - reuse_len=self.run_config.reuse_len, + attention_type="bi", + num_layers=self.xlnet_config.n_layer, + hidden_size=self.xlnet_config.d_model, + num_attention_heads=self.xlnet_config.n_head, + head_size=self.xlnet_config.d_head, + inner_size=self.xlnet_config.d_inner, + two_stream=False, + tie_attention_biases=not self.xlnet_config.untie_r, + inner_activation=self.xlnet_config.ff_activation, + dropout_rate=self.run_config.dropout, + attention_dropout_rate=self.run_config.dropout_att, + memory_length=self.run_config.mem_len, + reuse_length=self.run_config.reuse_len, bi_data=self.run_config.bi_data, - clamp_len=self.run_config.clamp_len, - same_length=self.run_config.same_length, - name='transformer') + clamp_length=self.run_config.clamp_len, + use_cls_mask=False, + name="xlnet_model") self.summarization_layer = Summarization( - d_model=self.xlnet_config.d_model, - n_head=self.xlnet_config.n_head, - d_head=self.xlnet_config.d_head, - dropout=self.run_config.dropout, - dropout_att=self.run_config.dropout_att, + hidden_size=self.xlnet_config.d_model, + num_attention_heads=self.xlnet_config.n_head, + head_size=self.xlnet_config.d_head, + dropout_rate=self.run_config.dropout, + attention_dropout_rate=self.run_config.dropout_att, initializer=self.initializer, use_proj=True, summary_type=summary_type, - name='sequence_summary') + name="sequence_summary") self.cl_loss_layer = ClassificationLossLayer( - n_class=n_class, initializer=self.initializer, name='classification') + n_class=n_class, initializer=self.initializer, name="classification") def call(self, features): """Implements call() for the layer.""" - bsz_per_core = tf.shape(features['input_ids'])[0] - - input_ids = tf.transpose(features['input_ids'], [1, 0]) - seg_ids = tf.transpose(features['segment_ids'], [1, 0]) - input_mask = tf.transpose(features['input_mask'], [1, 0]) + batch_size_per_core = tf.shape(features["input_ids"])[0] + + input_ids = features["input_ids"] + segment_ids = features["segment_ids"] + if self._use_legacy_mask: + # Legacy input mask assumes `real` values are 0 and `padding` + # values are 1. + input_mask = 1 - features["input_mask"] + else: + input_mask = features["input_mask"] - label = tf.reshape(features['label_ids'], [bsz_per_core]) + label = tf.reshape(features["label_ids"], [batch_size_per_core]) - mems = features.get('mems', None) + mems = features.get("mems", None) - transformerxl_output, new_mems, self.lookup_table = ( - self.transformerxl_model(input_ids, seg_ids, input_mask, mems)) + attention_output, new_mems = ( + self.xlnet_model(input_ids, segment_ids, input_mask, mems)) - summary = self.summarization_layer(transformerxl_output) + summary = self.summarization_layer(attention_output) per_example_loss, logits = self.cl_loss_layer(hidden=summary, labels=label) self.add_loss(tf.keras.backend.mean(per_example_loss)) return new_mems, logits @@ -888,56 +910,57 @@ class LMLossLayer(tf.keras.layers.Layer): """Layer computing cross entropy loss for language modeling.""" def __init__(self, - n_token, - d_model, + vocab_size, + hidden_size, initializer, tie_weight=False, bi_data=True, - use_tpu=False, + use_one_hot=False, use_proj=False, **kwargs): """Constructs LMLoss layer. Args: - n_token: Number of tokens in vocabulary. - d_model: The dimension of model hidden state. + vocab_size: Number of tokens in vocabulary. + hidden_size: The dimension of model hidden state. initializer: Initializer used for parameters. tie_weight: Whether to share weights between embedding lookup layer and next-token prediction layer. bi_data: Whether to use bidirectional input pipeline. Usually set to True during pretraining and False during finetuning. - use_tpu: bool, whether to use TPU. + use_one_hot: bool, whether to use one hot encodings. This should be used + when TPUs are used. use_proj: bool, whether to add a projection layer before LM prediction. **kwargs: Other parameters. """ super(LMLossLayer, self).__init__(**kwargs) - self.n_token = n_token - self.d_model = d_model + self.vocab_size = vocab_size + self.hidden_size = hidden_size self.initializer = initializer self.tie_weight = tie_weight self.bi_data = bi_data - self.use_tpu = use_tpu + self.use_one_hot = use_one_hot self.use_proj = use_proj def build(self, unused_input_shapes): """Implements build() for the layer.""" if self.use_proj: self.proj_layer = tf.keras.layers.Dense( - units=self.d_model, + units=self.hidden_size, kernel_initializer=self.initializer, activation=gelu, - name='lm_projection/dense') + name="lm_projection/dense") self.proj_layer_norm = tf.keras.layers.LayerNormalization( - axis=-1, epsilon=1e-12, name='lm_projection/LayerNorm') + axis=-1, epsilon=1e-12, name="lm_projection/LayerNorm") if not self.tie_weight: self.softmax_w = self.add_weight( - 'weight', - shape=[self.n_token, self.d_model], + "weight", + shape=[self.vocab_size, self.hidden_size], initializer=self.initializer) self.softmax_b = self.add_weight( - 'bias', shape=[self.n_token], initializer=tf.zeros_initializer()) + "bias", shape=[self.vocab_size], initializer=tf.zeros_initializer()) super(LMLossLayer, self).build(unused_input_shapes) @@ -946,12 +969,12 @@ class LMLossLayer(tf.keras.layers.Layer): if self.use_proj: hidden = self.proj_layer_norm(self.proj_layer(hidden)) if self.tie_weight: - logits = tf.einsum('ibd,nd->ibn', hidden, lookup_table) + self.softmax_b + logits = tf.einsum("ibd,nd->ibn", hidden, lookup_table) + self.softmax_b else: - logits = tf.einsum('ibd,nd->ibn', hidden, self.softmax_w) + self.softmax_b + logits = tf.einsum("ibd,nd->ibn", hidden, self.softmax_w) + self.softmax_b - if self.use_tpu: - one_hot_target = tf.one_hot(target, self.n_token, dtype=logits.dtype) + if self.use_one_hot: + one_hot_target = tf.one_hot(target, self.vocab_size, dtype=logits.dtype) loss = -tf.reduce_sum(tf.nn.log_softmax(logits) * one_hot_target, -1) else: loss = tf.nn.sparse_softmax_cross_entropy_with_logits( @@ -966,36 +989,36 @@ class Summarization(tf.keras.layers.Layer): """The layer to pool the output from XLNet model into a vector.""" def __init__(self, - d_model, - n_head, - d_head, - dropout, - dropout_att, + hidden_size, + num_attention_heads, + head_size, + dropout_rate, + attention_dropout_rate, initializer, use_proj=True, - summary_type='last', + summary_type="last", **kwargs): """Constructs Summarization layer. Args: - d_model: int, the dimension of model hidden state. - n_head: int, the number of attention heads. - d_head: int, the dimension size of each attention head. - dropout: float, dropout rate. - dropout_att: float, dropout rate on attention probabilities. + hidden_size: int, the dimension of model hidden state. + num_attention_heads: int, the number of attention heads. + head_size: int, the dimension size of each attention head. + dropout_rate: float, dropout rate. + attention_dropout_rate: float, dropout rate on attention probabilities. initializer: Initializer used for parameters. use_proj: bool, whether to use projection layer for summarization. summary_type: Method used to summarize a sequence into a compact vector. **kwargs: Other parameters. """ super(Summarization, self).__init__(**kwargs) - self.d_model = d_model - self.n_head = n_head - self.d_head = d_head + self.hidden_size = hidden_size + self.num_attention_heads = num_attention_heads + self.head_size = head_size self.initializer = initializer - self.dropout = dropout - self.dropout_att = dropout_att + self.dropout_rate = dropout_rate + self.attention_dropout_rate = attention_dropout_rate self.use_proj = use_proj self.summary_type = summary_type @@ -1003,22 +1026,22 @@ class Summarization(tf.keras.layers.Layer): """Implements build() for the layer.""" if self.use_proj: self.proj_layer = tf.keras.layers.Dense( - units=self.d_model, + units=self.hidden_size, kernel_initializer=self.initializer, activation=tf.nn.tanh, - name='summary') - self.dropout_layer = tf.keras.layers.Dropout(rate=self.dropout) + name="summary") + self.dropout_layer = tf.keras.layers.Dropout(rate=self.dropout_rate) super(Summarization, self).build(unused_input_shapes) def call(self, inputs): """Implements call() for the layer.""" - if self.summary_type == 'last': - summary = inputs[-1] - elif self.summary_type == 'first': - summary = inputs[0] + if self.summary_type == "last": + summary = inputs[:, -1, :] + elif self.summary_type == "first": + summary = inputs[:, 0, :] else: - raise ValueError('Invalid summary type provided: %s' % self.summary_type) + raise ValueError("Invalid summary type provided: %s" % self.summary_type) if self.use_proj: summary = self.proj_layer(summary) summary = self.dropout_layer(summary) @@ -1044,7 +1067,7 @@ class ClassificationLossLayer(tf.keras.layers.Layer): def build(self, unused_input_shapes): """Implements build() for the layer.""" self.proj_layer = tf.keras.layers.Dense( - units=self.n_class, kernel_initializer=self.initializer, name='logit') + units=self.n_class, kernel_initializer=self.initializer, name="logit") super(ClassificationLossLayer, self).build(unused_input_shapes) @@ -1066,116 +1089,125 @@ class QAXLNetModel(tf.keras.Model): """ def __init__(self, xlnet_config, run_config, start_n_top, end_n_top, - **kwargs): + use_legacy_mask=True, **kwargs): super(QAXLNetModel, self).__init__(**kwargs) + warnings.warn( + "`QAXLNetModel` is deprecated, please use `XLNetSpanLabeler` instead.", + DeprecationWarning, stacklevel=2) self.run_config = run_config self.initializer = _get_initializer(run_config) self.xlnet_config = copy.deepcopy(xlnet_config) + self._use_legacy_mask = use_legacy_mask - self.transformerxl_model = TransformerXLModel( - n_token=self.xlnet_config.n_token, + self.xlnet_model = networks.XLNetBase( + vocab_size=self.xlnet_config.n_token, initializer=self.initializer, - attn_type='bi', - n_layer=self.xlnet_config.n_layer, - d_model=self.xlnet_config.d_model, - n_head=self.xlnet_config.n_head, - d_head=self.xlnet_config.d_head, - d_inner=self.xlnet_config.d_inner, - ff_activation=self.xlnet_config.ff_activation, - untie_r=self.xlnet_config.untie_r, - is_training=self.run_config.is_training, - use_tpu=self.run_config.use_tpu, - dropout=self.run_config.dropout, - dropout_att=self.run_config.dropout_att, - mem_len=self.run_config.mem_len, - reuse_len=self.run_config.reuse_len, + attention_type="bi", + num_layers=self.xlnet_config.n_layer, + hidden_size=self.xlnet_config.d_model, + num_attention_heads=self.xlnet_config.n_head, + head_size=self.xlnet_config.d_head, + inner_size=self.xlnet_config.d_inner, + tie_attention_biases=not self.xlnet_config.untie_r, + inner_activation=self.xlnet_config.ff_activation, + dropout_rate=self.run_config.dropout, + attention_dropout_rate=self.run_config.dropout_att, + two_stream=False, + memory_length=self.run_config.mem_len, + reuse_length=self.run_config.reuse_len, bi_data=self.run_config.bi_data, - clamp_len=self.run_config.clamp_len, - same_length=self.run_config.same_length, - name='transformer') + clamp_length=self.run_config.clamp_len, + use_cls_mask=False, + name="xlnet_model") self.qa_loss_layer = QALossLayer( - d_model=self.xlnet_config.d_model, + hidden_size=self.xlnet_config.d_model, start_n_top=start_n_top, end_n_top=end_n_top, initializer=self.initializer, - dropout=self.run_config.dropout) + dropout_rate=self.run_config.dropout, + name="qa_loss_layer") def call(self, features, training=False): """Implements call() for the layer.""" - input_ids = tf.transpose(features['input_ids'], [1, 0]) - seg_ids = tf.transpose(features['segment_ids'], [1, 0]) - input_mask = tf.transpose(features['input_mask'], [1, 0]) + input_ids = features["input_ids"] + segment_ids = features["segment_ids"] + if self._use_legacy_mask: + # Legacy input mask assumes `real` values are 0 and `padding` + # values are 1. + input_mask = 1 - features["input_mask"] + else: + input_mask = features["input_mask"] - cls_index = tf.reshape(features['cls_index'], [-1]) - p_mask = features['p_mask'] + cls_index = tf.reshape(features["cls_index"], [-1]) + p_mask = features["p_mask"] - transformerxl_output, new_mems, self.lookup_table = ( - self.transformerxl_model(input_ids, seg_ids, input_mask)) + attention_output, new_mems = ( + self.xlnet_model(input_ids, segment_ids, input_mask)) if training: loss, logits = self.qa_loss_layer( - hidden=transformerxl_output, + hidden=attention_output, p_mask=p_mask, cls_index=cls_index, - start_positions=features['start_positions'], - end_positions=features['end_positions'], - is_impossible=features['is_impossible']) + start_positions=features["start_positions"], + end_positions=features["end_positions"], + is_impossible=features["is_impossible"]) self.add_loss(loss) return new_mems, logits else: results = self.qa_loss_layer( - hidden=transformerxl_output, p_mask=p_mask, cls_index=cls_index) + hidden=attention_output, p_mask=p_mask, cls_index=cls_index) return results class QALossLayer(tf.keras.layers.Layer): """Layer computing position and regression loss for question answering task.""" - def __init__(self, d_model, start_n_top, end_n_top, initializer, dropout, - **kwargs): + def __init__(self, hidden_size, start_n_top, end_n_top, initializer, + dropout_rate, **kwargs): """Constructs Summarization layer. Args: - d_model: Int, the hidden size. + hidden_size: Int, the hidden size. start_n_top: Beam size for span start. end_n_top: Beam size for span end. initializer: Initializer used for parameters. - dropout: float, dropout rate. + dropout_rate: float, dropout rate. **kwargs: Other parameters. """ super(QALossLayer, self).__init__(**kwargs) - self.d_model = d_model + self.hidden_size = hidden_size self.start_n_top = start_n_top self.end_n_top = end_n_top self.initializer = initializer - self.dropout = dropout + self.dropout_rate = dropout_rate def build(self, unused_input_shapes): """Implements build() for the layer.""" self.start_logits_proj_layer = tf.keras.layers.Dense( - units=1, kernel_initializer=self.initializer, name='start_logits/dense') + units=1, kernel_initializer=self.initializer, name="start_logits/dense") self.end_logits_proj_layer0 = tf.keras.layers.Dense( - units=self.d_model, + units=self.hidden_size, kernel_initializer=self.initializer, activation=tf.nn.tanh, - name='end_logits/dense_0') + name="end_logits/dense_0") self.end_logits_proj_layer1 = tf.keras.layers.Dense( - units=1, kernel_initializer=self.initializer, name='end_logits/dense_1') + units=1, kernel_initializer=self.initializer, name="end_logits/dense_1") self.end_logits_layer_norm = tf.keras.layers.LayerNormalization( - axis=-1, epsilon=1e-12, name='end_logits/LayerNorm') + axis=-1, epsilon=1e-12, name="end_logits/LayerNorm") self.answer_class_proj_layer0 = tf.keras.layers.Dense( - units=self.d_model, + units=self.hidden_size, kernel_initializer=self.initializer, activation=tf.nn.tanh, - name='answer_class/dense_0') + name="answer_class/dense_0") self.answer_class_proj_layer1 = tf.keras.layers.Dense( units=1, kernel_initializer=self.initializer, use_bias=False, - name='answer_class/dense_1') - self.ans_feature_dropout = tf.keras.layers.Dropout(rate=self.dropout) + name="answer_class/dense_1") + self.ans_feature_dropout = tf.keras.layers.Dropout(rate=self.dropout_rate) super(QALossLayer, self).build(unused_input_shapes) def __call__(self, hidden, p_mask, cls_index, **kwargs): @@ -1186,20 +1218,21 @@ class QALossLayer(tf.keras.layers.Layer): """Implements call() for the layer.""" hidden, p_mask, cls_index, kwargs = inputs return_dict = {} - seq_len = tf.shape(hidden)[0] + seq_len = tf.shape(hidden)[1] + hidden = tf.transpose(hidden, [1, 0, 2]) start_logits = self.start_logits_proj_layer(hidden) start_logits = tf.transpose(tf.squeeze(start_logits, -1), [1, 0]) start_logits_masked = start_logits * (1 - p_mask) - 1e30 * p_mask start_log_probs = tf.nn.log_softmax(start_logits_masked, -1) if training: - start_positions = kwargs['start_positions'] - end_positions = kwargs['end_positions'] - is_impossible = kwargs['is_impossible'] + start_positions = kwargs["start_positions"] + end_positions = kwargs["end_positions"] + is_impossible = kwargs["is_impossible"] start_positions = tf.reshape(start_positions, [-1]) start_index = tf.one_hot( start_positions, depth=seq_len, axis=-1, dtype=tf.float32) - start_features = tf.einsum('lbh,bl->bh', hidden, start_index) + start_features = tf.einsum("lbh,bl->bh", hidden, start_index) start_features = tf.tile(start_features[None], [seq_len, 1, 1]) end_logits = self.end_logits_proj_layer0( tf.concat([hidden, start_features], axis=-1)) @@ -1217,16 +1250,16 @@ class QALossLayer(tf.keras.layers.Layer): start_log_probs, k=self.start_n_top) start_index = tf.one_hot( start_top_index, depth=seq_len, axis=-1, dtype=tf.float32) - start_features = tf.einsum('lbh,bkl->bkh', hidden, start_index) + start_features = tf.einsum("lbh,bkl->bkh", hidden, start_index) end_input = tf.tile(hidden[:, :, None], [1, 1, self.start_n_top, 1]) start_features = tf.tile(start_features[None], [seq_len, 1, 1, 1]) end_input = tf.concat([end_input, start_features], axis=-1) end_logits = self.end_logits_proj_layer0(end_input) - end_logits = tf.reshape(end_logits, [seq_len, -1, self.d_model]) + end_logits = tf.reshape(end_logits, [seq_len, -1, self.hidden_size]) end_logits = self.end_logits_layer_norm(end_logits) end_logits = tf.reshape(end_logits, - [seq_len, -1, self.start_n_top, self.d_model]) + [seq_len, -1, self.start_n_top, self.hidden_size]) end_logits = self.end_logits_proj_layer1(end_logits) end_logits = tf.reshape(end_logits, [seq_len, -1, self.start_n_top]) @@ -1242,29 +1275,29 @@ class QALossLayer(tf.keras.layers.Layer): [-1, self.start_n_top * self.end_n_top]) if training: - return_dict['start_log_probs'] = start_log_probs - return_dict['end_log_probs'] = end_log_probs + return_dict["start_log_probs"] = start_log_probs + return_dict["end_log_probs"] = end_log_probs else: - return_dict['start_top_log_probs'] = start_top_log_probs - return_dict['start_top_index'] = start_top_index - return_dict['end_top_log_probs'] = end_top_log_probs - return_dict['end_top_index'] = end_top_index + return_dict["start_top_log_probs"] = start_top_log_probs + return_dict["start_top_index"] = start_top_index + return_dict["end_top_log_probs"] = end_top_log_probs + return_dict["end_top_index"] = end_top_index # an additional layer to predict answerability # get the representation of CLS cls_index = tf.one_hot(cls_index, seq_len, axis=-1, dtype=tf.float32) - cls_feature = tf.einsum('lbh,bl->bh', hidden, cls_index) + cls_feature = tf.einsum("lbh,bl->bh", hidden, cls_index) # get the representation of START - start_p = tf.nn.softmax(start_logits_masked, axis=-1, name='softmax_start') - start_feature = tf.einsum('lbh,bl->bh', hidden, start_p) + start_p = tf.nn.softmax(start_logits_masked, axis=-1, name="softmax_start") + start_feature = tf.einsum("lbh,bl->bh", hidden, start_p) ans_feature = tf.concat([start_feature, cls_feature], -1) ans_feature = self.answer_class_proj_layer0(ans_feature) ans_feature = self.ans_feature_dropout(ans_feature) cls_logits = self.answer_class_proj_layer1(ans_feature) cls_logits = tf.squeeze(cls_logits, -1) - return_dict['cls_logits'] = cls_logits + return_dict["cls_logits"] = cls_logits if not training: return return_dict diff --git a/official/nlp/xlnet/xlnet_modeling_test.py b/official/nlp/xlnet/xlnet_modeling_test.py deleted file mode 100644 index dce887aebd77c75999091af9ec112f8d0d336eee..0000000000000000000000000000000000000000 --- a/official/nlp/xlnet/xlnet_modeling_test.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import logging -import numpy as np -import tensorflow as tf - -from official.nlp.xlnet import xlnet_modeling - - -class PositionalEmbeddingLayerTest(tf.test.TestCase): - - def test_positional_embedding(self): - """A low-dimensional example is tested. - - With len(pos_seq)=2 and d_model=4: - - pos_seq = [[1.], [0.]] - inv_freq = [1., 0.01] - pos_seq x inv_freq = [[1, 0.01], [0., 0.]] - pos_emb = [[sin(1.), sin(0.01), cos(1.), cos(0.01)], - [sin(0.), sin(0.), cos(0.), cos(0.)]] - = [[0.84147096, 0.00999983, 0.54030228, 0.99994999], - [0., 0., 1., 1.]] - """ - target = np.array([[[0.84147096, 0.00999983, 0.54030228, 0.99994999]], - [[0., 0., 1., 1.]]]) - d_model = 4 - pos_seq = tf.range(1, -1, -1.0) # [1., 0.] - pos_emb_layer = xlnet_modeling.PositionalEmbedding(d_model) - pos_emb = pos_emb_layer(pos_seq, batch_size=None).numpy().astype(float) - - logging.info(pos_emb) - self.assertAllClose(pos_emb, target) - -if __name__ == "__main__": - tf.test.main() diff --git a/official/pip_package/setup.py b/official/pip_package/setup.py index 7603146009739caa4fd6980bc23357b7dd2bfd90..6d24d5ce254305034fecb26230f4af207167407b 100644 --- a/official/pip_package/setup.py +++ b/official/pip_package/setup.py @@ -20,7 +20,7 @@ import sys from setuptools import find_packages from setuptools import setup -version = '2.2.0' +version = '2.4.0' project_name = 'tf-models-official' @@ -59,8 +59,10 @@ install_requires, dependency_links = _get_requirements() if project_name == 'tf-models-nightly': version += '.dev' + datetime.datetime.now().strftime('%Y%m%d') install_requires.append('tf-nightly') + install_requires.append('tensorflow-text-nightly') else: - install_requires.append('tensorflow>=2.2.0') + install_requires.append('tensorflow>=2.4.0') + install_requires.append('tensorflow-text>=2.4.0') print('install_requires: ', install_requires) print('dependency_links: ', dependency_links) @@ -76,9 +78,6 @@ setup( license='Apache 2.0', packages=find_packages(exclude=[ 'research*', - 'tutorials*', - 'samples*', - 'official.r1*', 'official.pip_package*', 'official.benchmark*', 'official.colab*', diff --git a/official/recommendation/README.md b/official/recommendation/README.md index 441bc128681c3189b53f7909b22c70fccf564414..ea2abfadcab2902025ff65e1797ab38646f79082 100644 --- a/official/recommendation/README.md +++ b/official/recommendation/README.md @@ -4,7 +4,7 @@ This is an implementation of the Neural Collaborative Filtering (NCF) framework NCF is a general framework for collaborative filtering of recommendations in which a neural network architecture is used to model user-item interactions. Unlike traditional models, NCF does not resort to Matrix Factorization (MF) with an inner product on latent features of users and items. It replaces the inner product with a multi-layer perceptron that can learn an arbitrary function from data. -Two instantiations of NCF are Generalized Matrix Factorization (GMF) and Multi-Layer Perceptron (MLP). GMF applies a linear kernel to model the latent feature interactions, and and MLP uses a nonlinear kernel to learn the interaction function from data. NeuMF is a fused model of GMF and MLP to better model the complex user-item interactions, and unifies the strengths of linearity of MF and non-linearity of MLP for modeling the user-item latent structures. NeuMF allows GMF and MLP to learn separate embeddings, and combines the two models by concatenating their last hidden layer. [neumf_model.py](neumf_model.py) defines the architecture details. +Two instantiations of NCF are Generalized Matrix Factorization (GMF) and Multi-Layer Perceptron (MLP). GMF applies a linear kernel to model the latent feature interactions, and MLP uses a nonlinear kernel to learn the interaction function from data. NeuMF is a fused model of GMF and MLP to better model the complex user-item interactions, and unifies the strengths of linearity of MF and non-linearity of MLP for modeling the user-item latent structures. NeuMF allows GMF and MLP to learn separate embeddings, and combines the two models by concatenating their last hidden layer. [neumf_model.py](neumf_model.py) defines the architecture details. Some abbreviations used the code base include: - NCF: Neural Collaborative Filtering diff --git a/official/recommendation/__init__.py b/official/recommendation/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/recommendation/__init__.py +++ b/official/recommendation/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/recommendation/constants.py b/official/recommendation/constants.py index 8e313bfa66a2133862e79dbad89f03421fee39c5..a7aae736c2dd36b0f5e321f741f6bb9b75f8e95c 100644 --- a/official/recommendation/constants.py +++ b/official/recommendation/constants.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Central location for NCF specific values.""" import sys @@ -65,7 +65,7 @@ CACHE_INVALIDATION_SEC = 3600 * 24 # == Data Generation =========================================================== # ============================================================================== CYCLES_TO_BUFFER = 3 # The number of train cycles worth of data to "run ahead" - # of the main training loop. +# of the main training loop. # Number of batches to run per epoch when using synthetic data. At high batch # sizes, we run for more batches than with real data, which is good since diff --git a/official/recommendation/create_ncf_data.py b/official/recommendation/create_ncf_data.py index 60267bcd5f77ec7cb2036cb2037efe9360d692ba..bc411cbd8b03380baf9ef0e3e9481a4b97a90b66 100644 --- a/official/recommendation/create_ncf_data.py +++ b/official/recommendation/create_ncf_data.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,19 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Binary to generate training/evaluation dataset for NCF model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Binary to generate training/evaluation dataset for NCF model.""" import json # pylint: disable=g-bad-import-order +# Import libraries from absl import app from absl import flags -import tensorflow.compat.v2 as tf +import tensorflow as tf # pylint: enable=g-bad-import-order from official.recommendation import movielens diff --git a/official/recommendation/data_pipeline.py b/official/recommendation/data_pipeline.py index 1b4dd33afe25df2468cdfcbb2c146392d7bec76e..22a096c6ed313009c57968fff4cebeaf6d3fb9d6 100644 --- a/official/recommendation/data_pipeline.py +++ b/official/recommendation/data_pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Asynchronous data producer for the NCF pipeline.""" from __future__ import absolute_import @@ -41,7 +41,6 @@ from official.recommendation import popen_helper from official.recommendation import stat_utils from tensorflow.python.tpu.datasets import StreamingFilesDataset - SUMMARY_TEMPLATE = """General: {spacer}Num users: {num_users} {spacer}Num items: {num_items} @@ -74,25 +73,27 @@ class DatasetManager(object): num_train_epochs=None): # type: (bool, bool, int, typing.Optional[str], bool, int) -> None """Constructs a `DatasetManager` instance. + Args: is_training: Boolean of whether the data provided is training or - evaluation data. This determines whether to reuse the data - (if is_training=False) and the exact structure to use when storing and + evaluation data. This determines whether to reuse the data (if + is_training=False) and the exact structure to use when storing and yielding data. stream_files: Boolean indicating whether data should be serialized and written to file shards. batches_per_epoch: The number of batches in a single epoch. shard_root: The base directory to be used when stream_files=True. deterministic: Forgo non-deterministic speedups. (i.e. sloppy=True) - num_train_epochs: Number of epochs to generate. If None, then each - call to `get_dataset()` increments the number of epochs requested. + num_train_epochs: Number of epochs to generate. If None, then each call to + `get_dataset()` increments the number of epochs requested. """ self._is_training = is_training self._deterministic = deterministic self._stream_files = stream_files self._writers = [] - self._write_locks = [threading.RLock() for _ in - range(rconst.NUM_FILE_SHARDS)] if stream_files else [] + self._write_locks = [ + threading.RLock() for _ in range(rconst.NUM_FILE_SHARDS) + ] if stream_files else [] self._batches_per_epoch = batches_per_epoch self._epochs_completed = 0 self._epochs_requested = num_train_epochs if num_train_epochs else 0 @@ -103,8 +104,9 @@ class DatasetManager(object): @property def current_data_root(self): - subdir = (rconst.TRAIN_FOLDER_TEMPLATE.format(self._epochs_completed) - if self._is_training else rconst.EVAL_FOLDER) + subdir = ( + rconst.TRAIN_FOLDER_TEMPLATE.format(self._epochs_completed) + if self._is_training else rconst.EVAL_FOLDER) return os.path.join(self._shard_root, subdir) def buffer_reached(self): @@ -123,8 +125,8 @@ class DatasetManager(object): k: create_int_feature(v.astype(np.int64)) for k, v in data.items() } - return tf.train.Example( - features=tf.train.Features(feature=feature_dict)).SerializeToString() + return tf.train.Example(features=tf.train.Features( + feature=feature_dict)).SerializeToString() @staticmethod def deserialize(serialized_data, batch_size=None, is_training=True): @@ -134,8 +136,8 @@ class DatasetManager(object): serialized_data: A tensor containing serialized records. batch_size: The data arrives pre-batched, so batch size is needed to deserialize the data. - is_training: Boolean, whether data to deserialize to training data - or evaluation data. + is_training: Boolean, whether data to deserialize to training data or + evaluation data. """ def _get_feature_map(batch_size, is_training=True): @@ -171,13 +173,16 @@ class DatasetManager(object): valid_point_mask = tf.cast(features[rconst.VALID_POINT_MASK], tf.bool) fake_dup_mask = tf.zeros_like(users) return { - movielens.USER_COLUMN: users, - movielens.ITEM_COLUMN: items, - rconst.VALID_POINT_MASK: valid_point_mask, + movielens.USER_COLUMN: + users, + movielens.ITEM_COLUMN: + items, + rconst.VALID_POINT_MASK: + valid_point_mask, rconst.TRAIN_LABEL_KEY: - tf.reshape(tf.cast(features["labels"], tf.bool), - (batch_size, 1)), - rconst.DUPLICATE_MASK: fake_dup_mask + tf.reshape(tf.cast(features["labels"], tf.bool), (batch_size, 1)), + rconst.DUPLICATE_MASK: + fake_dup_mask } else: labels = tf.cast(tf.zeros_like(users), tf.bool) @@ -228,8 +233,10 @@ class DatasetManager(object): if self._stream_files: tf.io.gfile.makedirs(self.current_data_root) template = os.path.join(self.current_data_root, rconst.SHARD_TEMPLATE) - self._writers = [tf.io.TFRecordWriter(template.format(i)) - for i in range(rconst.NUM_FILE_SHARDS)] + self._writers = [ + tf.io.TFRecordWriter(template.format(i)) + for i in range(rconst.NUM_FILE_SHARDS) + ] def end_construction(self): if self._stream_files: @@ -273,8 +280,8 @@ class DatasetManager(object): Args: batch_size: The per-replica batch size of the dataset. - epochs_between_evals: How many epochs worth of data to yield. - (Generator mode only.) + epochs_between_evals: How many epochs worth of data to yield. (Generator + mode only.) """ self.increment_request_epoch() if self._stream_files: @@ -285,11 +292,13 @@ class DatasetManager(object): if not self._is_training: self._result_queue.put(epoch_data_dir) # Eval data is reused. - file_pattern = os.path.join( - epoch_data_dir, rconst.SHARD_TEMPLATE.format("*")) + file_pattern = os.path.join(epoch_data_dir, + rconst.SHARD_TEMPLATE.format("*")) dataset = StreamingFilesDataset( - files=file_pattern, worker_job=popen_helper.worker_job(), - num_parallel_reads=rconst.NUM_FILE_SHARDS, num_epochs=1, + files=file_pattern, + worker_job=popen_helper.worker_job(), + num_parallel_reads=rconst.NUM_FILE_SHARDS, + num_epochs=1, sloppy=not self._deterministic) map_fn = functools.partial( self.deserialize, @@ -298,8 +307,10 @@ class DatasetManager(object): dataset = dataset.map(map_fn, num_parallel_calls=16) else: - types = {movielens.USER_COLUMN: rconst.USER_DTYPE, - movielens.ITEM_COLUMN: rconst.ITEM_DTYPE} + types = { + movielens.USER_COLUMN: rconst.USER_DTYPE, + movielens.ITEM_COLUMN: rconst.ITEM_DTYPE + } shapes = { movielens.USER_COLUMN: tf.TensorShape([batch_size, 1]), movielens.ITEM_COLUMN: tf.TensorShape([batch_size, 1]) @@ -319,8 +330,7 @@ class DatasetManager(object): data_generator = functools.partial( self.data_generator, epochs_between_evals=epochs_between_evals) dataset = tf.data.Dataset.from_generator( - generator=data_generator, output_types=types, - output_shapes=shapes) + generator=data_generator, output_types=types, output_shapes=shapes) return dataset.prefetch(16) @@ -332,16 +342,17 @@ class DatasetManager(object): # Estimator passes batch_size during training and eval_batch_size during # eval. - param_batch_size = (params["batch_size"] if self._is_training else - params.get("eval_batch_size") or params["batch_size"]) + param_batch_size = ( + params["batch_size"] if self._is_training else + params.get("eval_batch_size") or params["batch_size"]) if batch_size != param_batch_size: raise ValueError("producer batch size ({}) differs from params batch " "size ({})".format(batch_size, param_batch_size)) - epochs_between_evals = (params.get("epochs_between_evals", 1) - if self._is_training else 1) - return self.get_dataset(batch_size=batch_size, - epochs_between_evals=epochs_between_evals) + epochs_between_evals = ( + params.get("epochs_between_evals", 1) if self._is_training else 1) + return self.get_dataset( + batch_size=batch_size, epochs_between_evals=epochs_between_evals) return input_fn @@ -405,15 +416,16 @@ class BaseDataConstructor(threading.Thread): (self._train_pos_count,) = self._train_pos_users.shape self._elements_in_epoch = (1 + num_train_negatives) * self._train_pos_count - self.train_batches_per_epoch = self._count_batches( - self._elements_in_epoch, train_batch_size, batches_per_train_step) + self.train_batches_per_epoch = self._count_batches(self._elements_in_epoch, + train_batch_size, + batches_per_train_step) # Evaluation if eval_batch_size % (1 + rconst.NUM_EVAL_NEGATIVES): raise ValueError("Eval batch size {} is not divisible by {}".format( eval_batch_size, 1 + rconst.NUM_EVAL_NEGATIVES)) - self._eval_users_per_batch = int( - eval_batch_size // (1 + rconst.NUM_EVAL_NEGATIVES)) + self._eval_users_per_batch = int(eval_batch_size // + (1 + rconst.NUM_EVAL_NEGATIVES)) self._eval_elements_in_epoch = num_users * (1 + rconst.NUM_EVAL_NEGATIVES) self.eval_batches_per_epoch = self._count_batches( self._eval_elements_in_epoch, eval_batch_size, batches_per_eval_step) @@ -450,12 +462,16 @@ class BaseDataConstructor(threading.Thread): multiplier = ("(x{} devices)".format(self._batches_per_train_step) if self._batches_per_train_step > 1 else "") summary = SUMMARY_TEMPLATE.format( - spacer=" ", num_users=self._num_users, num_items=self._num_items, + spacer=" ", + num_users=self._num_users, + num_items=self._num_items, train_pos_ct=self._train_pos_count, train_batch_size=self.train_batch_size, train_batch_ct=self.train_batches_per_epoch, - eval_pos_ct=self._num_users, eval_batch_size=self.eval_batch_size, - eval_batch_ct=self.eval_batches_per_epoch, multiplier=multiplier) + eval_pos_ct=self._num_users, + eval_batch_size=self.eval_batch_size, + eval_batch_ct=self.eval_batches_per_epoch, + multiplier=multiplier) return super(BaseDataConstructor, self).__str__() + "\n" + summary @staticmethod @@ -514,8 +530,9 @@ class BaseDataConstructor(threading.Thread): i: The index of the batch. This is used when stream_files=True to assign data to file shards. """ - batch_indices = self._current_epoch_order[i * self.train_batch_size: - (i + 1) * self.train_batch_size] + batch_indices = self._current_epoch_order[i * + self.train_batch_size:(i + 1) * + self.train_batch_size] (mask_start_index,) = batch_indices.shape batch_ind_mod = np.mod(batch_indices, self._train_pos_count) @@ -578,8 +595,9 @@ class BaseDataConstructor(threading.Thread): map_args = list(range(self.train_batches_per_epoch)) self._current_epoch_order = next(self._shuffle_iterator) - get_pool = (popen_helper.get_fauxpool if self.deterministic else - popen_helper.get_threadpool) + get_pool = ( + popen_helper.get_fauxpool + if self.deterministic else popen_helper.get_threadpool) with get_pool(6) as pool: pool.map(self._get_training_batch, map_args) self._train_dataset.end_construction() @@ -602,8 +620,8 @@ class BaseDataConstructor(threading.Thread): users: An array of users in a batch. (should be identical along axis 1) positive_items: An array (batch_size x 1) of positive item indices. negative_items: An array of negative item indices. - users_per_batch: How many users should be in the batch. This is passed - as an argument so that ncf_test.py can use this method. + users_per_batch: How many users should be in the batch. This is passed as + an argument so that ncf_test.py can use this method. Returns: User, item, and duplicate_mask arrays. @@ -635,11 +653,14 @@ class BaseDataConstructor(threading.Thread): """ low_index = i * self._eval_users_per_batch high_index = (i + 1) * self._eval_users_per_batch - users = np.repeat(self._eval_pos_users[low_index:high_index, np.newaxis], - 1 + rconst.NUM_EVAL_NEGATIVES, axis=1) + users = np.repeat( + self._eval_pos_users[low_index:high_index, np.newaxis], + 1 + rconst.NUM_EVAL_NEGATIVES, + axis=1) positive_items = self._eval_pos_items[low_index:high_index, np.newaxis] - negative_items = (self.lookup_negative_items(negative_users=users[:, :-1]) - .reshape(-1, rconst.NUM_EVAL_NEGATIVES)) + negative_items = ( + self.lookup_negative_items(negative_users=users[:, :-1]).reshape( + -1, rconst.NUM_EVAL_NEGATIVES)) users, items, duplicate_mask = self._assemble_eval_batch( users, positive_items, negative_items, self._eval_users_per_batch) @@ -664,8 +685,9 @@ class BaseDataConstructor(threading.Thread): self._eval_dataset.start_construction() map_args = [i for i in range(self.eval_batches_per_epoch)] - get_pool = (popen_helper.get_fauxpool if self.deterministic else - popen_helper.get_threadpool) + get_pool = ( + popen_helper.get_fauxpool + if self.deterministic else popen_helper.get_threadpool) with get_pool(6) as pool: pool.map(self._get_eval_batch, map_args) self._eval_dataset.end_construction() @@ -677,12 +699,12 @@ class BaseDataConstructor(threading.Thread): # It isn't feasible to provide a foolproof check, so this is designed to # catch most failures rather than provide an exhaustive guard. if self._fatal_exception is not None: - raise ValueError("Fatal exception in the data production loop: {}" - .format(self._fatal_exception)) + raise ValueError("Fatal exception in the data production loop: {}".format( + self._fatal_exception)) - return ( - self._train_dataset.make_input_fn(self.train_batch_size) if is_training - else self._eval_dataset.make_input_fn(self.eval_batch_size)) + return (self._train_dataset.make_input_fn(self.train_batch_size) + if is_training else self._eval_dataset.make_input_fn( + self.eval_batch_size)) def increment_request_epoch(self): self._train_dataset.increment_request_epoch() @@ -714,8 +736,9 @@ class DummyConstructor(threading.Thread): # Estimator passes batch_size during training and eval_batch_size during # eval. - batch_size = (params["batch_size"] if is_training else - params.get("eval_batch_size") or params["batch_size"]) + batch_size = ( + params["batch_size"] if is_training else + params.get("eval_batch_size") or params["batch_size"]) num_users = params["num_users"] num_items = params["num_items"] @@ -795,6 +818,7 @@ class MaterializedDataConstructor(BaseDataConstructor): a pre-compute which is quadratic in problem size will still fit in memory. A more scalable lookup method is in the works. """ + def __init__(self, *args, **kwargs): super(MaterializedDataConstructor, self).__init__(*args, **kwargs) self._negative_table = None @@ -807,8 +831,8 @@ class MaterializedDataConstructor(BaseDataConstructor): self._train_pos_users[:-1])[:, 0] + 1 (upper_bound,) = self._train_pos_users.shape index_bounds = [0] + inner_bounds.tolist() + [upper_bound] - self._negative_table = np.zeros(shape=(self._num_users, self._num_items), - dtype=rconst.ITEM_DTYPE) + self._negative_table = np.zeros( + shape=(self._num_users, self._num_items), dtype=rconst.ITEM_DTYPE) # Set the table to the max value to make sure the embedding lookup will fail # if we go out of bounds, rather than just overloading item zero. @@ -825,7 +849,7 @@ class MaterializedDataConstructor(BaseDataConstructor): # call does not parallelize well. Multiprocessing incurs too much # serialization overhead to be worthwhile. for i in range(self._num_users): - positives = self._train_pos_items[index_bounds[i]:index_bounds[i+1]] + positives = self._train_pos_items[index_bounds[i]:index_bounds[i + 1]] negatives = np.delete(full_set, positives) self._per_user_neg_count[i] = self._num_items - positives.shape[0] self._negative_table[i, :self._per_user_neg_count[i]] = negatives @@ -848,6 +872,7 @@ class BisectionDataConstructor(BaseDataConstructor): it at which point the item id for the ith negative is a simply algebraic expression. """ + def __init__(self, *args, **kwargs): super(BisectionDataConstructor, self).__init__(*args, **kwargs) self.index_bounds = None @@ -855,7 +880,7 @@ class BisectionDataConstructor(BaseDataConstructor): self._total_negatives = None def _index_segment(self, user): - lower, upper = self.index_bounds[user:user+2] + lower, upper = self.index_bounds[user:user + 2] items = self._sorted_train_pos_items[lower:upper] negatives_since_last_positive = np.concatenate( @@ -877,11 +902,11 @@ class BisectionDataConstructor(BaseDataConstructor): self._sorted_train_pos_items = self._train_pos_items.copy() for i in range(self._num_users): - lower, upper = self.index_bounds[i:i+2] + lower, upper = self.index_bounds[i:i + 2] self._sorted_train_pos_items[lower:upper].sort() - self._total_negatives = np.concatenate([ - self._index_segment(i) for i in range(self._num_users)]) + self._total_negatives = np.concatenate( + [self._index_segment(i) for i in range(self._num_users)]) logging.info("Negative total vector built. Time: {:.1f} seconds".format( timeit.default_timer() - start_time)) @@ -912,8 +937,7 @@ class BisectionDataConstructor(BaseDataConstructor): use_shortcut = neg_item_choice >= self._total_negatives[right_index] output[use_shortcut] = ( self._sorted_train_pos_items[right_index] + 1 + - (neg_item_choice - self._total_negatives[right_index]) - )[use_shortcut] + (neg_item_choice - self._total_negatives[right_index]))[use_shortcut] if np.all(use_shortcut): # The bisection code is ill-posed when there are no elements. @@ -943,8 +967,7 @@ class BisectionDataConstructor(BaseDataConstructor): output[not_use_shortcut] = ( self._sorted_train_pos_items[right_index] - - (self._total_negatives[right_index] - neg_item_choice) - ) + (self._total_negatives[right_index] - neg_item_choice)) assert np.all(output >= 0) diff --git a/official/recommendation/data_preprocessing.py b/official/recommendation/data_preprocessing.py index 3d7a3f856a7d8de45ff00ff3a0e1a6e6eacadd3a..a1ab31718205570a920eab8387aaf469dbd8531b 100644 --- a/official/recommendation/data_preprocessing.py +++ b/official/recommendation/data_preprocessing.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Preprocess dataset and construct any necessary artifacts.""" from __future__ import absolute_import @@ -25,6 +25,7 @@ import time import timeit # pylint: disable=wrong-import-order + from absl import logging import numpy as np import pandas as pd @@ -37,10 +38,9 @@ from official.recommendation import constants as rconst from official.recommendation import data_pipeline from official.recommendation import movielens - -_EXPECTED_CACHE_KEYS = ( - rconst.TRAIN_USER_KEY, rconst.TRAIN_ITEM_KEY, rconst.EVAL_USER_KEY, - rconst.EVAL_ITEM_KEY, rconst.USER_MAP, rconst.ITEM_MAP) +_EXPECTED_CACHE_KEYS = (rconst.TRAIN_USER_KEY, rconst.TRAIN_ITEM_KEY, + rconst.EVAL_USER_KEY, rconst.EVAL_ITEM_KEY, + rconst.USER_MAP, rconst.ITEM_MAP) def read_dataframe( @@ -178,17 +178,20 @@ def _filter_index_sort(raw_rating_path: Text, eval_df, train_df = grouped.tail(1), grouped.apply(lambda x: x.iloc[:-1]) data = { - rconst.TRAIN_USER_KEY: train_df[movielens.USER_COLUMN] - .values.astype(rconst.USER_DTYPE), - rconst.TRAIN_ITEM_KEY: train_df[movielens.ITEM_COLUMN] - .values.astype(rconst.ITEM_DTYPE), - rconst.EVAL_USER_KEY: eval_df[movielens.USER_COLUMN] - .values.astype(rconst.USER_DTYPE), - rconst.EVAL_ITEM_KEY: eval_df[movielens.ITEM_COLUMN] - .values.astype(rconst.ITEM_DTYPE), - rconst.USER_MAP: user_map, - rconst.ITEM_MAP: item_map, - "create_time": time.time(), + rconst.TRAIN_USER_KEY: + train_df[movielens.USER_COLUMN].values.astype(rconst.USER_DTYPE), + rconst.TRAIN_ITEM_KEY: + train_df[movielens.ITEM_COLUMN].values.astype(rconst.ITEM_DTYPE), + rconst.EVAL_USER_KEY: + eval_df[movielens.USER_COLUMN].values.astype(rconst.USER_DTYPE), + rconst.EVAL_ITEM_KEY: + eval_df[movielens.ITEM_COLUMN].values.astype(rconst.ITEM_DTYPE), + rconst.USER_MAP: + user_map, + rconst.ITEM_MAP: + item_map, + "create_time": + time.time(), } logging.info("Writing raw data cache.") @@ -217,8 +220,8 @@ def instantiate_pipeline(dataset, for the input pipeline. deterministic: Tell the data constructor to produce deterministically. epoch_dir: Directory in which to store the training epochs. - generate_data_offline: Boolean, whether current pipeline is done offline - or while training. + generate_data_offline: Boolean, whether current pipeline is done offline or + while training. """ logging.info("Beginning data preprocessing.") @@ -258,8 +261,8 @@ def instantiate_pipeline(dataset, create_data_offline=generate_data_offline) run_time = timeit.default_timer() - st - logging.info("Data preprocessing complete. Time: {:.1f} sec." - .format(run_time)) + logging.info( + "Data preprocessing complete. Time: {:.1f} sec.".format(run_time)) print(producer) return num_users, num_items, producer diff --git a/official/recommendation/data_test.py b/official/recommendation/data_test.py index 9541ee3f8bb4c65fb1f69070fa3876ee51b6c191..31e0ae4d2113cde0191c36223537b63957ecbfa9 100644 --- a/official/recommendation/data_test.py +++ b/official/recommendation/data_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Test NCF data pipeline.""" from __future__ import absolute_import @@ -23,6 +23,7 @@ import hashlib import os import mock + import numpy as np import scipy.stats import tensorflow as tf @@ -32,7 +33,6 @@ from official.recommendation import data_preprocessing from official.recommendation import movielens from official.recommendation import popen_helper - DATASET = "ml-test" NUM_USERS = 1000 NUM_ITEMS = 2000 @@ -41,7 +41,6 @@ BATCH_SIZE = 2048 EVAL_BATCH_SIZE = 4000 NUM_NEG = 4 - END_TO_END_TRAIN_MD5 = "b218738e915e825d03939c5e305a2698" END_TO_END_EVAL_MD5 = "d753d0f3186831466d6e218163a9501e" FRESH_RANDOMNESS_MD5 = "63d0dff73c0e5f1048fbdc8c65021e22" @@ -136,8 +135,11 @@ class BaseTest(tf.test.TestCase): def _test_end_to_end(self, constructor_type): params = self.make_params(train_epochs=1) _, _, producer = data_preprocessing.instantiate_pipeline( - dataset=DATASET, data_dir=self.temp_data_dir, params=params, - constructor_type=constructor_type, deterministic=True) + dataset=DATASET, + data_dir=self.temp_data_dir, + params=params, + constructor_type=constructor_type, + deterministic=True) producer.start() producer.join() @@ -258,8 +260,11 @@ class BaseTest(tf.test.TestCase): train_epochs = 5 params = self.make_params(train_epochs=train_epochs) _, _, producer = data_preprocessing.instantiate_pipeline( - dataset=DATASET, data_dir=self.temp_data_dir, params=params, - constructor_type=constructor_type, deterministic=True) + dataset=DATASET, + data_dir=self.temp_data_dir, + params=params, + constructor_type=constructor_type, + deterministic=True) producer.start() @@ -298,8 +303,8 @@ class BaseTest(tf.test.TestCase): self.assertRegexpMatches(md5.hexdigest(), FRESH_RANDOMNESS_MD5) # The positive examples should appear exactly once each epoch - self.assertAllEqual(list(positive_counts.values()), - [train_epochs for _ in positive_counts]) + self.assertAllEqual( + list(positive_counts.values()), [train_epochs for _ in positive_counts]) # The threshold for the negatives is heuristic, but in general repeats are # expected, but should not appear too frequently. @@ -317,8 +322,8 @@ class BaseTest(tf.test.TestCase): # The frequency of occurance of a given negative pair should follow an # approximately binomial distribution in the limit that the cardinality of # the negative pair set >> number of samples per epoch. - approx_pdf = scipy.stats.binom.pmf(k=np.arange(train_epochs+1), - n=train_epochs, p=e_sample) + approx_pdf = scipy.stats.binom.pmf( + k=np.arange(train_epochs + 1), n=train_epochs, p=e_sample) # Tally the actual observed counts. count_distribution = [0 for _ in range(train_epochs + 1)] diff --git a/official/recommendation/movielens.py b/official/recommendation/movielens.py index 576519a316bb3e05d786ac737da19cb44d2b61c4..f50820e1fec2021c85fda4fe37e0bc9c78b9a249 100644 --- a/official/recommendation/movielens.py +++ b/official/recommendation/movielens.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Download and extract the MovieLens dataset from GroupLens website. Download the dataset, and perform basic preprocessing. @@ -27,6 +27,7 @@ import tempfile import zipfile # pylint: disable=g-bad-import-order +# Import libraries import numpy as np import pandas as pd import six diff --git a/official/recommendation/ncf_common.py b/official/recommendation/ncf_common.py index 8abc927bfa29c52d6c151023d281d7e4f6f52100..43d6a88f1231dc2948365b31fc230521dcdaa512 100644 --- a/official/recommendation/ncf_common.py +++ b/official/recommendation/ncf_common.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,9 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Common functionalities used by both Keras and Estimator implementations. -""" + +"""Common functionalities used by both Keras and Estimator implementations.""" from __future__ import absolute_import from __future__ import division @@ -22,20 +21,17 @@ from __future__ import print_function import json import os -# pylint: disable=g-bad-import-order -import numpy as np from absl import flags from absl import logging +import numpy as np import tensorflow as tf -# pylint: enable=g-bad-import-order +from official.common import distribute_utils from official.recommendation import constants as rconst from official.recommendation import data_pipeline from official.recommendation import data_preprocessing from official.recommendation import movielens from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils -from official.utils.misc import keras_utils FLAGS = flags.FLAGS @@ -56,7 +52,9 @@ def get_inputs(params): num_eval_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH else: num_users, num_items, producer = data_preprocessing.instantiate_pipeline( - dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params, + dataset=FLAGS.dataset, + data_dir=FLAGS.data_dir, + params=params, constructor_type=FLAGS.constructor_type, deterministic=FLAGS.seed is not None) num_train_steps = producer.train_batches_per_epoch @@ -94,7 +92,7 @@ def parse_flags(flags_obj): "beta2": flags_obj.beta2, "epsilon": flags_obj.epsilon, "match_mlperf": flags_obj.ml_perf, - "epochs_between_evals": FLAGS.epochs_between_evals, + "epochs_between_evals": flags_obj.epochs_between_evals, "keras_use_ctl": flags_obj.keras_use_ctl, "hr_threshold": flags_obj.hr_threshold, "stream_files": flags_obj.tpu is not None, @@ -108,16 +106,17 @@ def get_v1_distribution_strategy(params): """Returns the distribution strategy to use.""" if params["use_tpu"]: # Some of the networking libraries are quite chatty. - for name in ["googleapiclient.discovery", "googleapiclient.discovery_cache", - "oauth2client.transport"]: + for name in [ + "googleapiclient.discovery", "googleapiclient.discovery_cache", + "oauth2client.transport" + ]: logging.getLogger(name).setLevel(logging.ERROR) tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( tpu=params["tpu"], zone=params["tpu_zone"], project=params["tpu_gcp_project"], - coordinator_name="coordinator" - ) + coordinator_name="coordinator") logging.info("Issuing reset command to TPU to ensure a clean state.") tf.Session.reset(tpu_cluster_resolver.get_master()) @@ -126,18 +125,20 @@ def get_v1_distribution_strategy(params): # by reading the `TF_CONFIG` environment variable, and the coordinator # is used by StreamingFilesDataset. tf_config_env = { - "session_master": tpu_cluster_resolver.get_master(), - "eval_session_master": tpu_cluster_resolver.get_master(), - "coordinator": tpu_cluster_resolver.cluster_spec() - .as_dict()["coordinator"] + "session_master": + tpu_cluster_resolver.get_master(), + "eval_session_master": + tpu_cluster_resolver.get_master(), + "coordinator": + tpu_cluster_resolver.cluster_spec().as_dict()["coordinator"] } os.environ["TF_CONFIG"] = json.dumps(tf_config_env) - distribution = tf.distribute.experimental.TPUStrategy( + distribution = tf.distribute.TPUStrategy( tpu_cluster_resolver, steps_per_run=100) else: - distribution = distribution_utils.get_distribution_strategy( + distribution = distribute_utils.get_distribution_strategy( num_gpus=params["num_gpus"]) return distribution @@ -146,16 +147,21 @@ def get_v1_distribution_strategy(params): def define_ncf_flags(): """Add flags for running ncf_main.""" # Add common flags - flags_core.define_base(model_dir=True, clean=True, train_epochs=True, - epochs_between_evals=True, export_dir=False, - run_eagerly=True, stop_threshold=True, num_gpu=True, - distribution_strategy=True) + flags_core.define_base( + model_dir=True, + clean=True, + train_epochs=True, + epochs_between_evals=True, + export_dir=False, + run_eagerly=True, + stop_threshold=True, + num_gpu=True, + distribution_strategy=True) flags_core.define_performance( synthetic_data=True, dtype=True, fp16_implementation=True, loss_scale=True, - dynamic_loss_scale=True, enable_xla=True, ) flags_core.define_device(tpu=True) @@ -171,69 +177,82 @@ def define_ncf_flags(): dataset=movielens.ML_1M, train_epochs=2, batch_size=99000, - tpu=None - ) + tpu=None) # Add ncf-specific flags flags.DEFINE_boolean( - name="download_if_missing", default=True, help=flags_core.help_wrap( + name="download_if_missing", + default=True, + help=flags_core.help_wrap( "Download data to data_dir if it is not already present.")) flags.DEFINE_integer( - name="eval_batch_size", default=None, help=flags_core.help_wrap( + name="eval_batch_size", + default=None, + help=flags_core.help_wrap( "The batch size used for evaluation. This should generally be larger" "than the training batch size as the lack of back propagation during" "evaluation can allow for larger batch sizes to fit in memory. If not" "specified, the training batch size (--batch_size) will be used.")) flags.DEFINE_integer( - name="num_factors", default=8, + name="num_factors", + default=8, help=flags_core.help_wrap("The Embedding size of MF model.")) # Set the default as a list of strings to be consistent with input arguments flags.DEFINE_list( - name="layers", default=["64", "32", "16", "8"], + name="layers", + default=["64", "32", "16", "8"], help=flags_core.help_wrap( "The sizes of hidden layers for MLP. Example " "to specify different sizes of MLP layers: --layers=32,16,8,4")) flags.DEFINE_float( - name="mf_regularization", default=0., + name="mf_regularization", + default=0., help=flags_core.help_wrap( "The regularization factor for MF embeddings. The factor is used by " "regularizer which allows to apply penalties on layer parameters or " "layer activity during optimization.")) flags.DEFINE_list( - name="mlp_regularization", default=["0.", "0.", "0.", "0."], + name="mlp_regularization", + default=["0.", "0.", "0.", "0."], help=flags_core.help_wrap( "The regularization factor for each MLP layer. See mf_regularization " "help for more info about regularization factor.")) flags.DEFINE_integer( - name="num_neg", default=4, + name="num_neg", + default=4, help=flags_core.help_wrap( "The Number of negative instances to pair with a positive instance.")) flags.DEFINE_float( - name="learning_rate", default=0.001, + name="learning_rate", + default=0.001, help=flags_core.help_wrap("The learning rate.")) flags.DEFINE_float( - name="beta1", default=0.9, + name="beta1", + default=0.9, help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer.")) flags.DEFINE_float( - name="beta2", default=0.999, + name="beta2", + default=0.999, help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer.")) flags.DEFINE_float( - name="epsilon", default=1e-8, + name="epsilon", + default=1e-8, help=flags_core.help_wrap("epsilon hyperparameter for the Adam " "optimizer.")) flags.DEFINE_float( - name="hr_threshold", default=1.0, + name="hr_threshold", + default=1.0, help=flags_core.help_wrap( "If passed, training will stop when the evaluation metric HR is " "greater than or equal to hr_threshold. For dataset ml-1m, the " @@ -242,8 +261,10 @@ def define_ncf_flags(): "achieved by MLPerf implementation.")) flags.DEFINE_enum( - name="constructor_type", default="bisection", - enum_values=["bisection", "materialized"], case_sensitive=False, + name="constructor_type", + default="bisection", + enum_values=["bisection", "materialized"], + case_sensitive=False, help=flags_core.help_wrap( "Strategy to use for generating false negatives. materialized has a" "precompute that scales badly, but a faster per-epoch construction" @@ -265,7 +286,8 @@ def define_ncf_flags(): help=flags_core.help_wrap("Path to input meta data file.")) flags.DEFINE_bool( - name="ml_perf", default=False, + name="ml_perf", + default=False, help=flags_core.help_wrap( "If set, changes the behavior of the model slightly to match the " "MLPerf reference implementations here: \n" @@ -280,23 +302,26 @@ def define_ncf_flags(): "not stable.")) flags.DEFINE_bool( - name="output_ml_perf_compliance_logging", default=False, + name="output_ml_perf_compliance_logging", + default=False, help=flags_core.help_wrap( "If set, output the MLPerf compliance logging. This is only useful " "if one is running the model for MLPerf. See " "https://github.com/mlperf/policies/blob/master/training_rules.adoc" "#submission-compliance-logs for details. This uses sudo and so may " "ask for your password, as root access is needed to clear the system " - "caches, which is required for MLPerf compliance." - ) - ) + "caches, which is required for MLPerf compliance.")) flags.DEFINE_integer( - name="seed", default=None, help=flags_core.help_wrap( + name="seed", + default=None, + help=flags_core.help_wrap( "This value will be used to seed both NumPy and TensorFlow.")) - @flags.validator("eval_batch_size", "eval_batch_size must be at least {}" - .format(rconst.NUM_EVAL_NEGATIVES + 1)) + @flags.validator( + "eval_batch_size", + "eval_batch_size must be at least {}".format(rconst.NUM_EVAL_NEGATIVES + + 1)) def eval_size_check(eval_batch_size): return (eval_batch_size is None or int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES) diff --git a/official/recommendation/ncf_input_pipeline.py b/official/recommendation/ncf_input_pipeline.py index f6bc0158effe4d5ca3faf7afbef1548d42c35183..93f950bcee827d6ee43cb598c56aafc2ec455fc9 100644 --- a/official/recommendation/ncf_input_pipeline.py +++ b/official/recommendation/ncf_input_pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,57 +11,49 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""NCF model input pipeline.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""NCF model input pipeline.""" import functools # pylint: disable=g-bad-import-order -import tensorflow.compat.v2 as tf +import tensorflow as tf # pylint: enable=g-bad-import-order from official.recommendation import constants as rconst -from official.recommendation import movielens from official.recommendation import data_pipeline - -NUM_SHARDS = 16 +from official.recommendation import movielens def create_dataset_from_tf_record_files(input_file_pattern, pre_batch_size, batch_size, - is_training=True): + is_training=True, + rebatch=False): """Creates dataset from (tf)records files for training/evaluation.""" + if pre_batch_size != batch_size: + raise ValueError("Pre-batch ({}) size is not equal to batch " + "size ({})".format(pre_batch_size, batch_size)) files = tf.data.Dataset.list_files(input_file_pattern, shuffle=is_training) - def make_dataset(files_dataset, shard_index): - """Returns dataset for sharded tf record files.""" - if pre_batch_size != batch_size: - raise ValueError("Pre-batch ({}) size is not equal to batch " - "size ({})".format(pre_batch_size, batch_size)) - files_dataset = files_dataset.shard(NUM_SHARDS, shard_index) - dataset = files_dataset.interleave( - tf.data.TFRecordDataset, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - decode_fn = functools.partial( - data_pipeline.DatasetManager.deserialize, - batch_size=pre_batch_size, - is_training=is_training) - dataset = dataset.map( - decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) - return dataset - - dataset = tf.data.Dataset.range(NUM_SHARDS) - map_fn = functools.partial(make_dataset, files) - dataset = dataset.interleave( - map_fn, - cycle_length=NUM_SHARDS, + dataset = files.interleave( + tf.data.TFRecordDataset, + cycle_length=16, num_parallel_calls=tf.data.experimental.AUTOTUNE) + decode_fn = functools.partial( + data_pipeline.DatasetManager.deserialize, + batch_size=pre_batch_size, + is_training=is_training) + dataset = dataset.map( + decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) + + if rebatch: + # A workaround for TPU Pod evaluation dataset. + # TODO (b/162341937) remove once it's fixed. + dataset = dataset.unbatch() + dataset = dataset.batch(pre_batch_size) + dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) return dataset @@ -134,8 +126,8 @@ def create_ncf_input_data(params, from tf record files. Must be specified when params["train_input_dataset"] is specified. strategy: Distribution strategy used for distributed training. If specified, - used to assert that evaluation batch size is correctly a multiple of - total number of devices used. + used to assert that evaluation batch size is correctly a multiple of total + number of devices used. Returns: (training dataset, evaluation dataset, train steps per epoch, @@ -162,12 +154,18 @@ def create_ncf_input_data(params, params["train_dataset_path"], input_meta_data["train_prebatch_size"], params["batch_size"], - is_training=True) + is_training=True, + rebatch=False) + + # Re-batch evaluation dataset for TPU Pods. + # TODO (b/162341937) remove once it's fixed. + eval_rebatch = (params["use_tpu"] and strategy.num_replicas_in_sync > 8) eval_dataset = create_dataset_from_tf_record_files( params["eval_dataset_path"], input_meta_data["eval_prebatch_size"], params["eval_batch_size"], - is_training=False) + is_training=False, + rebatch=eval_rebatch) num_train_steps = int(input_meta_data["num_train_steps"]) num_eval_steps = int(input_meta_data["num_eval_steps"]) diff --git a/official/recommendation/ncf_keras_main.py b/official/recommendation/ncf_keras_main.py index a4f2376b3ccbe462c2d514f64305fa5a17714a2f..2590df4ce32037dec1f7542767b0ebbcdc089ef2 100644 --- a/official/recommendation/ncf_keras_main.py +++ b/official/recommendation/ncf_keras_main.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,38 +11,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """NCF framework to train and evaluate the NeuMF model. The NeuMF model assembles both MF and MLP models under the NCF framework. Check `neumf_model.py` for more details about the models. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import json import os # pylint: disable=g-bad-import-order + from absl import app from absl import flags from absl import logging -import tensorflow.compat.v2 as tf +import tensorflow as tf # pylint: enable=g-bad-import-order +from official.common import distribute_utils from official.recommendation import constants as rconst from official.recommendation import movielens from official.recommendation import ncf_common from official.recommendation import ncf_input_pipeline from official.recommendation import neumf_model from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils from official.utils.misc import model_helpers - FLAGS = flags.FLAGS @@ -50,9 +46,7 @@ def metric_fn(logits, dup_mask, match_mlperf): dup_mask = tf.cast(dup_mask, tf.float32) logits = tf.slice(logits, [0, 1], [-1, -1]) in_top_k, _, metric_weights, _ = neumf_model.compute_top_k_and_ndcg( - logits, - dup_mask, - match_mlperf) + logits, dup_mask, match_mlperf) metric_weights = tf.cast(metric_weights, tf.float32) return in_top_k, metric_weights @@ -152,9 +146,10 @@ class CustomEarlyStopping(tf.keras.callbacks.Callback): logs = logs or {} monitor_value = logs.get(self.monitor) if monitor_value is None: - logging.warning("Early stopping conditioned on metric `%s` " - "which is not available. Available metrics are: %s", - self.monitor, ",".join(list(logs.keys()))) + logging.warning( + "Early stopping conditioned on metric `%s` " + "which is not available. Available metrics are: %s", self.monitor, + ",".join(list(logs.keys()))) return monitor_value @@ -181,12 +176,9 @@ def _get_keras_model(params): logits = base_model.output - zeros = tf.keras.layers.Lambda( - lambda x: x * 0)(logits) + zeros = tf.keras.layers.Lambda(lambda x: x * 0)(logits) - softmax_logits = tf.keras.layers.concatenate( - [zeros, logits], - axis=-1) + softmax_logits = tf.keras.layers.concatenate([zeros, logits], axis=-1) # Custom training loop calculates loss and metric as a part of # training/evaluation step function. @@ -204,7 +196,8 @@ def _get_keras_model(params): movielens.ITEM_COLUMN: item_input, rconst.VALID_POINT_MASK: valid_pt_mask_input, rconst.DUPLICATE_MASK: dup_mask_input, - rconst.TRAIN_LABEL_KEY: label_input}, + rconst.TRAIN_LABEL_KEY: label_input + }, outputs=softmax_logits) keras_model.summary() @@ -223,18 +216,16 @@ def run_ncf(_): model_helpers.apply_clean(FLAGS) if FLAGS.dtype == "fp16" and FLAGS.fp16_implementation == "keras": - policy = tf.keras.mixed_precision.experimental.Policy( - "mixed_float16", - loss_scale=flags_core.get_loss_scale(FLAGS, default_for_fp16="dynamic")) - tf.keras.mixed_precision.experimental.set_policy(policy) + tf.keras.mixed_precision.set_global_policy("mixed_float16") - strategy = distribution_utils.get_distribution_strategy( + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=FLAGS.distribution_strategy, num_gpus=FLAGS.num_gpus, tpu_address=FLAGS.tpu) params = ncf_common.parse_flags(FLAGS) params["distribute_strategy"] = strategy + params["use_tpu"] = (FLAGS.distribution_strategy == "tpu") if params["use_tpu"] and not params["keras_use_ctl"]: logging.error("Custom training loop must be used when using TPUStrategy.") @@ -267,13 +258,12 @@ def run_ncf(_): "val_HR_METRIC", desired_value=FLAGS.hr_threshold) callbacks.append(early_stopping_callback) - (train_input_dataset, eval_input_dataset, - num_train_steps, num_eval_steps) = \ - (ncf_input_pipeline.create_ncf_input_data( - params, producer, input_meta_data, strategy)) + (train_input_dataset, eval_input_dataset, num_train_steps, + num_eval_steps) = ncf_input_pipeline.create_ncf_input_data( + params, producer, input_meta_data, strategy) steps_per_epoch = None if generate_input_online else num_train_steps - with distribution_utils.get_strategy_scope(strategy): + with distribute_utils.get_strategy_scope(strategy): keras_model = _get_keras_model(params) optimizer = tf.keras.optimizers.Adam( learning_rate=params["learning_rate"], @@ -286,12 +276,17 @@ def run_ncf(_): optimizer, loss_scale=flags_core.get_loss_scale(FLAGS, default_for_fp16="dynamic")) - elif FLAGS.dtype == "fp16" and params["keras_use_ctl"]: - # When keras_use_ctl is False, instead Model.fit() automatically applies - # loss scaling so we don't need to create a LossScaleOptimizer. - optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer( - optimizer, - tf.keras.mixed_precision.experimental.global_policy().loss_scale) + elif FLAGS.dtype == "fp16": + loss_scale = flags_core.get_loss_scale(FLAGS, default_for_fp16="dynamic") + # Note Model.compile automatically wraps the optimizer with a + # LossScaleOptimizer using dynamic loss scaling. We explicitly wrap it + # here for the case where a custom training loop or fixed loss scale is + # used. + if loss_scale == "dynamic": + optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer) + else: + optimizer = tf.keras.mixed_precision.LossScaleOptimizer( + optimizer, dynamic=False, initial_scale=loss_scale) if params["keras_use_ctl"]: train_loss, eval_results = run_ncf_custom_training( @@ -311,7 +306,8 @@ def run_ncf(_): if not FLAGS.ml_perf: # Create Tensorboard summary and checkpoint callbacks. summary_dir = os.path.join(FLAGS.model_dir, "summaries") - summary_callback = tf.keras.callbacks.TensorBoard(summary_dir) + summary_callback = tf.keras.callbacks.TensorBoard( + summary_dir, profile_batch=0) checkpoint_path = os.path.join(FLAGS.model_dir, "checkpoint") checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( checkpoint_path, save_weights_only=True) @@ -411,8 +407,7 @@ def run_ncf_custom_training(params, optimizer.apply_gradients(grads) return loss - per_replica_losses = strategy.run( - step_fn, args=(next(train_iterator),)) + per_replica_losses = strategy.run(step_fn, args=(next(train_iterator),)) mean_loss = strategy.reduce( tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) return mean_loss @@ -431,8 +426,7 @@ def run_ncf_custom_training(params, return hr_sum, hr_count per_replica_hr_sum, per_replica_hr_count = ( - strategy.run( - step_fn, args=(next(eval_iterator),))) + strategy.run(step_fn, args=(next(eval_iterator),))) hr_sum = strategy.reduce( tf.distribute.ReduceOp.SUM, per_replica_hr_sum, axis=None) hr_count = strategy.reduce( @@ -481,26 +475,27 @@ def run_ncf_custom_training(params, # Write train loss once in every 1000 steps. if train_summary_writer and step % 1000 == 0: with train_summary_writer.as_default(): - tf.summary.scalar("training_loss", train_loss/(step + 1), - step=current_step) + tf.summary.scalar( + "training_loss", train_loss / (step + 1), step=current_step) for c in callbacks: c.on_batch_end(current_step) train_loss /= num_train_steps - logging.info("Done training epoch %s, epoch loss=%s.", epoch + 1, + logging.info("Done training epoch %s, epoch loss=%.3f", epoch + 1, train_loss) eval_input_iterator = iter( strategy.experimental_distribute_dataset(eval_input_dataset)) - hr_sum = 0 - hr_count = 0 + + hr_sum = 0.0 + hr_count = 0.0 for _ in range(num_eval_steps): step_hr_sum, step_hr_count = eval_step(eval_input_iterator) hr_sum += step_hr_sum hr_count += step_hr_count - logging.info("Done eval epoch %s, hit_rate=%s.", epoch + 1, + logging.info("Done eval epoch %s, hit_rate=%.3f", epoch + 1, hr_sum / hr_count) if eval_summary_writer: with eval_summary_writer.as_default(): @@ -550,7 +545,7 @@ def build_stats(loss, eval_result, time_callback): if len(timestamp_log) > 1: stats["avg_exp_per_second"] = ( time_callback.batch_size * time_callback.log_steps * - (len(time_callback.timestamp_log)-1) / + (len(time_callback.timestamp_log) - 1) / (timestamp_log[-1].timestamp - timestamp_log[0].timestamp)) return stats diff --git a/official/recommendation/ncf_test.py b/official/recommendation/ncf_test.py index 5103283e0aa617b0042ca75f5d2e9572cecb1b68..b37d0c1dcc486e8badaff7d5e3c941625245bec2 100644 --- a/official/recommendation/ncf_test.py +++ b/official/recommendation/ncf_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests NCF.""" from __future__ import absolute_import @@ -38,74 +38,80 @@ class NcfTest(tf.test.TestCase): ncf_common.define_ncf_flags() def setUp(self): + super().setUp() self.top_k_old = rconst.TOP_K self.num_eval_negatives_old = rconst.NUM_EVAL_NEGATIVES rconst.NUM_EVAL_NEGATIVES = 2 def tearDown(self): + super().tearDown() rconst.NUM_EVAL_NEGATIVES = self.num_eval_negatives_old rconst.TOP_K = self.top_k_old _BASE_END_TO_END_FLAGS = ['-batch_size', '1044', '-train_epochs', '1'] - @unittest.mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100) + @unittest.mock.patch.object(rconst, 'SYNTHETIC_BATCHES_PER_EPOCH', 100) def test_end_to_end_keras_no_dist_strat(self): integration.run_synthetic( - ncf_keras_main.main, tmp_root=self.get_temp_dir(), + ncf_keras_main.main, + tmp_root=self.get_temp_dir(), extra_flags=self._BASE_END_TO_END_FLAGS + ['-distribution_strategy', 'off']) - @unittest.mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100) + @unittest.mock.patch.object(rconst, 'SYNTHETIC_BATCHES_PER_EPOCH', 100) def test_end_to_end_keras_dist_strat(self): integration.run_synthetic( - ncf_keras_main.main, tmp_root=self.get_temp_dir(), + ncf_keras_main.main, + tmp_root=self.get_temp_dir(), extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '0']) - @unittest.mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100) + @unittest.mock.patch.object(rconst, 'SYNTHETIC_BATCHES_PER_EPOCH', 100) def test_end_to_end_keras_dist_strat_ctl(self): - flags = (self._BASE_END_TO_END_FLAGS + - ['-num_gpus', '0'] + - ['-keras_use_ctl', 'True']) + flags = ( + self._BASE_END_TO_END_FLAGS + ['-num_gpus', '0'] + + ['-keras_use_ctl', 'True']) integration.run_synthetic( - ncf_keras_main.main, tmp_root=self.get_temp_dir(), - extra_flags=flags) + ncf_keras_main.main, tmp_root=self.get_temp_dir(), extra_flags=flags) - @unittest.mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100) + @unittest.mock.patch.object(rconst, 'SYNTHETIC_BATCHES_PER_EPOCH', 100) def test_end_to_end_keras_1_gpu_dist_strat_fp16(self): if context.num_gpus() < 1: self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(1, context.num_gpus())) + '{} GPUs are not available for this test. {} GPUs are available' + .format(1, context.num_gpus())) integration.run_synthetic( - ncf_keras_main.main, tmp_root=self.get_temp_dir(), - extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '1', - '--dtype', 'fp16']) + ncf_keras_main.main, + tmp_root=self.get_temp_dir(), + extra_flags=self._BASE_END_TO_END_FLAGS + + ['-num_gpus', '1', '--dtype', 'fp16']) - @unittest.mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100) + @unittest.mock.patch.object(rconst, 'SYNTHETIC_BATCHES_PER_EPOCH', 100) def test_end_to_end_keras_1_gpu_dist_strat_ctl_fp16(self): if context.num_gpus() < 1: self.skipTest( - '{} GPUs are not available for this test. {} GPUs are available'. - format(1, context.num_gpus())) + '{} GPUs are not available for this test. {} GPUs are available' + .format(1, context.num_gpus())) integration.run_synthetic( - ncf_keras_main.main, tmp_root=self.get_temp_dir(), - extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '1', - '--dtype', 'fp16', - '--keras_use_ctl']) + ncf_keras_main.main, + tmp_root=self.get_temp_dir(), + extra_flags=self._BASE_END_TO_END_FLAGS + + ['-num_gpus', '1', '--dtype', 'fp16', '--keras_use_ctl']) @unittest.mock.patch.object(rconst, 'SYNTHETIC_BATCHES_PER_EPOCH', 100) def test_end_to_end_keras_2_gpu_fp16(self): if context.num_gpus() < 2: self.skipTest( - "{} GPUs are not available for this test. {} GPUs are available". - format(2, context.num_gpus())) + '{} GPUs are not available for this test. {} GPUs are available' + .format(2, context.num_gpus())) integration.run_synthetic( - ncf_keras_main.main, tmp_root=self.get_temp_dir(), - extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '2', - '--dtype', 'fp16']) + ncf_keras_main.main, + tmp_root=self.get_temp_dir(), + extra_flags=self._BASE_END_TO_END_FLAGS + + ['-num_gpus', '2', '--dtype', 'fp16']) + -if __name__ == "__main__": +if __name__ == '__main__': tf.test.main() diff --git a/official/recommendation/neumf_model.py b/official/recommendation/neumf_model.py index 48b09293af065a19db2dbfb1d44023439c2b9765..a088807560091aea9e4c72e7d2f87b25d39e41ce 100644 --- a/official/recommendation/neumf_model.py +++ b/official/recommendation/neumf_model.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Defines NeuMF model for NCF framework. Some abbreviations used in the code base: @@ -111,8 +111,7 @@ def neumf_model_fn(features, labels, mode, params): loss = tf.compat.v1.losses.sparse_softmax_cross_entropy( labels=labels, logits=softmax_logits, - weights=tf.cast(valid_pt_mask, tf.float32) - ) + weights=tf.cast(valid_pt_mask, tf.float32)) tf.identity(loss, name="cross_entropy") @@ -196,15 +195,19 @@ def construct_model(user_input: tf.Tensor, item_input: tf.Tensor, # GMF part mf_user_latent = tf.keras.layers.Lambda( - mf_slice_fn, name="embedding_user_mf")(embedding_user) + mf_slice_fn, name="embedding_user_mf")( + embedding_user) mf_item_latent = tf.keras.layers.Lambda( - mf_slice_fn, name="embedding_item_mf")(embedding_item) + mf_slice_fn, name="embedding_item_mf")( + embedding_item) # MLP part mlp_user_latent = tf.keras.layers.Lambda( - mlp_slice_fn, name="embedding_user_mlp")(embedding_user) + mlp_slice_fn, name="embedding_user_mlp")( + embedding_user) mlp_item_latent = tf.keras.layers.Lambda( - mlp_slice_fn, name="embedding_item_mlp")(embedding_item) + mlp_slice_fn, name="embedding_item_mlp")( + embedding_item) # Element-wise multiply mf_vector = tf.keras.layers.multiply([mf_user_latent, mf_item_latent]) @@ -225,8 +228,11 @@ def construct_model(user_input: tf.Tensor, item_input: tf.Tensor, # Final prediction layer logits = tf.keras.layers.Dense( - 1, activation=None, kernel_initializer="lecun_uniform", - name=movielens.RATING_COLUMN)(predict_vector) + 1, + activation=None, + kernel_initializer="lecun_uniform", + name=movielens.RATING_COLUMN)( + predict_vector) # Print model topology. model = tf.keras.models.Model([user_input, item_input], logits) @@ -263,8 +269,7 @@ def _get_estimator_spec_with_metrics(logits: tf.Tensor, return tf.estimator.EstimatorSpec( mode=tf.estimator.ModeKeys.EVAL, loss=cross_entropy, - eval_metric_ops=metric_fn(in_top_k, ndcg, metric_weights) - ) + eval_metric_ops=metric_fn(in_top_k, ndcg, metric_weights)) def compute_eval_loss_and_metrics_helper(logits: tf.Tensor, @@ -335,9 +340,13 @@ def compute_eval_loss_and_metrics_helper(logits: tf.Tensor, # Examples are provided by the eval Dataset in a structured format, so eval # labels can be reconstructed on the fly. - eval_labels = tf.reshape(shape=(-1,), tensor=tf.one_hot( - tf.zeros(shape=(logits_by_user.shape[0],), dtype=tf.int32) + - rconst.NUM_EVAL_NEGATIVES, logits_by_user.shape[1], dtype=tf.int32)) + eval_labels = tf.reshape( + shape=(-1,), + tensor=tf.one_hot( + tf.zeros(shape=(logits_by_user.shape[0],), dtype=tf.int32) + + rconst.NUM_EVAL_NEGATIVES, + logits_by_user.shape[1], + dtype=tf.int32)) eval_labels_float = tf.cast(eval_labels, tf.float32) @@ -346,13 +355,14 @@ def compute_eval_loss_and_metrics_helper(logits: tf.Tensor, # weights for the negative examples we compute a loss which is consistent with # the training data. (And provides apples-to-apples comparison) negative_scale_factor = num_training_neg / rconst.NUM_EVAL_NEGATIVES - example_weights = ( - (eval_labels_float + (1 - eval_labels_float) * negative_scale_factor) * - (1 + rconst.NUM_EVAL_NEGATIVES) / (1 + num_training_neg)) + example_weights = ((eval_labels_float + + (1 - eval_labels_float) * negative_scale_factor) * + (1 + rconst.NUM_EVAL_NEGATIVES) / (1 + num_training_neg)) # Tile metric weights back to logit dimensions - expanded_metric_weights = tf.reshape(tf.tile( - metric_weights[:, tf.newaxis], (1, rconst.NUM_EVAL_NEGATIVES + 1)), (-1,)) + expanded_metric_weights = tf.reshape( + tf.tile(metric_weights[:, tf.newaxis], + (1, rconst.NUM_EVAL_NEGATIVES + 1)), (-1,)) # ignore padded examples example_weights *= tf.cast(expanded_metric_weights, tf.float32) @@ -362,12 +372,15 @@ def compute_eval_loss_and_metrics_helper(logits: tf.Tensor, def metric_fn(top_k_tensor, ndcg_tensor, weight_tensor): return { - rconst.HR_KEY: tf.compat.v1.metrics.mean(top_k_tensor, - weights=weight_tensor, - name=rconst.HR_METRIC_NAME), - rconst.NDCG_KEY: tf.compat.v1.metrics.mean(ndcg_tensor, - weights=weight_tensor, - name=rconst.NDCG_METRIC_NAME) + rconst.HR_KEY: + tf.compat.v1.metrics.mean( + top_k_tensor, weights=weight_tensor, + name=rconst.HR_METRIC_NAME), + rconst.NDCG_KEY: + tf.compat.v1.metrics.mean( + ndcg_tensor, + weights=weight_tensor, + name=rconst.NDCG_METRIC_NAME) } return cross_entropy, metric_fn, in_top_k, ndcg, metric_weights @@ -405,27 +418,26 @@ def compute_top_k_and_ndcg(logits: tf.Tensor, # Determine the location of the first element in each row after the elements # are sorted. - sort_indices = tf.argsort( - logits_by_user, axis=1, direction="DESCENDING") + sort_indices = tf.argsort(logits_by_user, axis=1, direction="DESCENDING") # Use matrix multiplication to extract the position of the true item from the # tensor of sorted indices. This approach is chosen because both GPUs and TPUs # perform matrix multiplications very quickly. This is similar to np.argwhere. # However this is a special case because the target will only appear in # sort_indices once. - one_hot_position = tf.cast(tf.equal(sort_indices, rconst.NUM_EVAL_NEGATIVES), - tf.int32) + one_hot_position = tf.cast( + tf.equal(sort_indices, rconst.NUM_EVAL_NEGATIVES), tf.int32) sparse_positions = tf.multiply( - one_hot_position, tf.range(logits_by_user.shape[1])[tf.newaxis, :]) + one_hot_position, + tf.range(logits_by_user.shape[1])[tf.newaxis, :]) position_vector = tf.reduce_sum(sparse_positions, axis=1) in_top_k = tf.cast(tf.less(position_vector, rconst.TOP_K), tf.float32) - ndcg = tf.math.log(2.) / tf.math.log( - tf.cast(position_vector, tf.float32) + 2) + ndcg = tf.math.log(2.) / tf.math.log(tf.cast(position_vector, tf.float32) + 2) ndcg *= in_top_k # If a row is a padded row, all but the first element will be a duplicate. - metric_weights = tf.not_equal(tf.reduce_sum(duplicate_mask_by_user, axis=1), - rconst.NUM_EVAL_NEGATIVES) + metric_weights = tf.not_equal( + tf.reduce_sum(duplicate_mask_by_user, axis=1), rconst.NUM_EVAL_NEGATIVES) return in_top_k, ndcg, metric_weights, logits_by_user diff --git a/official/recommendation/popen_helper.py b/official/recommendation/popen_helper.py index dcdca4ced8e0b45294023c4675d16efd875694b7..c13c795e7833f536fedee381fb740ab76ab00ab8 100644 --- a/official/recommendation/popen_helper.py +++ b/official/recommendation/popen_helper.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Helper file for running the async data generation process in OSS.""" import contextlib diff --git a/official/recommendation/stat_utils.py b/official/recommendation/stat_utils.py index 658a2721e98a88d71dc2ac4562366283ffd2fc47..3f8c8050dad910bbadabe981b951ca8782c301f7 100644 --- a/official/recommendation/stat_utils.py +++ b/official/recommendation/stat_utils.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Statistics utility functions of NCF.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os - import numpy as np @@ -37,8 +35,7 @@ def permutation(args): args: A size two tuple that will unpacked into the size of the permutation and the random seed. This form is used because starmap is not universally available. - - returns: + Returns: A NumPy array containing a random permutation. """ x, seed = args @@ -53,8 +50,11 @@ def permutation(args): def very_slightly_biased_randint(max_val_vector): sample_dtype = np.uint64 out_dtype = max_val_vector.dtype - samples = np.random.randint(low=0, high=np.iinfo(sample_dtype).max, - size=max_val_vector.shape, dtype=sample_dtype) + samples = np.random.randint( + low=0, + high=np.iinfo(sample_dtype).max, + size=max_val_vector.shape, + dtype=sample_dtype) return np.mod(samples, max_val_vector.astype(sample_dtype)).astype(out_dtype) @@ -88,5 +88,5 @@ def mask_duplicates(x, axis=1): # type: (np.ndarray, int) -> np.ndarray # Duplicate values will have a difference of zero. By definition the first # element is never a duplicate. - return np.where(diffs[np.arange(x.shape[0])[:, np.newaxis], - inv_x_sort_ind], 0, 1) + return np.where(diffs[np.arange(x.shape[0])[:, np.newaxis], inv_x_sort_ind], + 0, 1) diff --git a/official/requirements.txt b/official/requirements.txt index a29867a17bb3d8ba106bd6c9cd29743406222f19..74028adcb55626ea73c5ec728b207ad1e4819519 100644 --- a/official/requirements.txt +++ b/official/requirements.txt @@ -3,25 +3,26 @@ google-api-python-client>=1.6.7 google-cloud-bigquery>=0.31.0 kaggle>=1.3.9 numpy>=1.15.4 -oauth2client>=4.1.2 +oauth2client pandas>=0.22.0 psutil>=5.4.3 py-cpuinfo>=3.3.0 scipy>=0.19.1 tensorflow-hub>=0.6.0 -tensorflow-model-optimization>=0.2.1 +tensorflow-model-optimization>=0.4.1 tensorflow-datasets tensorflow-addons -dataclasses +dataclasses;python_version<"3.7" gin-config tf_slim>=1.1.0 Cython matplotlib -pyyaml +pyyaml>=5.1 # CV related dependencies opencv-python-headless Pillow --e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI +pycocotools # NLP related dependencies seqeval sentencepiece +sacrebleu diff --git a/official/staging/training/controller.py b/official/staging/training/controller.py deleted file mode 100644 index a07be66329ad49ba07dff300d66f153552e1c78f..0000000000000000000000000000000000000000 --- a/official/staging/training/controller.py +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A light weight utilities to train TF2 models.""" - -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import time - -from absl import logging - -import tensorflow.compat.v2 as tf -from typing import Callable, Dict, Optional, Text - -from official.staging.training import utils - - -class Controller(object): - """Class that facilitates training and evaluation of models.""" - - def __init__( - self, - strategy: Optional[tf.distribute.Strategy] = None, - train_fn: Optional[Callable[[tf.Tensor], - Optional[Dict[Text, tf.Tensor]]]] = None, - eval_fn: Optional[Callable[[tf.Tensor], - Optional[Dict[Text, tf.Tensor]]]] = None, - global_step: Optional[tf.Variable] = None, - # Train related - train_steps: Optional[int] = None, - steps_per_loop: Optional[int] = None, - summary_dir: Optional[Text] = None, - checkpoint_manager: Optional[tf.train.CheckpointManager] = None, - # summary related - summary_interval: Optional[int] = None, - # Evaluation related - eval_summary_dir: Optional[Text] = None, - eval_steps: Optional[int] = None, - eval_interval: Optional[int] = None): - """Constructs a `Controller` instance. - - Args: - strategy: An instance of `tf.distribute.Strategy`. - train_fn: A callable defined as `def train_fn(num_steps)`, which - `num_steps` indicates the number of steps to run for each loop. - eval_fn: A callable defined as `def eval_fn(num_steps)`, which `num_steps` - indicates the number of steps for one evaluation. - global_step: An integer `tf.Variable` indicating the global training step - number. Usually this can be obtained from `iterations` property of the - model's optimizer (e.g. `self.optimizer.iterations`), or users can - create their own global step variable as well. If the users create their - own global step variable, it is recommended to create the `tf.Variable` - inside strategy scope, and with - `aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA`. - train_steps: The total (maximum) number of training steps to perform. - steps_per_loop: The number of steps to run in each "inner loop" of - training (passed to the `num_steps` parameter of `train_fn`). - summary_dir: The directory to restore and write checkpoints and summaries. - If None, it will be set to `checkpoint_manager.directory`. - checkpoint_manager: An instance of `tf.train.CheckpointManager`. - summary_interval: Step interval for training summaries. Note that this - argument only applies to the summaries outside the training loop. If the - value is None, then training summaries are not enabled. - eval_summary_dir: The directory to write eval summaries. If None, it will - be set to `summary_dir`. - eval_steps: Number of steps to run evaluation. - eval_interval: Step interval for evaluation. If None, will skip evaluation - in the middle of training. Note that evaluation only happens outside the - training loop, which the loop iteration is specify by `steps_per_loop` - parameter. - - Raises: - ValueError: If both `train_fn` and `eval_fn` are None. - ValueError: If `train_fn` is not None and `train_steps` is None. - ValueError: If `steps_per_loop` is None when `train_fn` is provided. - ValueError: If `steps_per_loop` is not a positive integer. - """ - if train_fn is None and eval_fn is None: - raise ValueError("`train_fn` and `eval_fn` should not both be None") - - # TODO(rxsang): Support training until exhaustion by passing - # `train_steps=-1`. Currently it cannot be supported with a host training - # loop because break statements are not supported with distributed dataset. - if train_fn is not None: - if train_steps is None: - raise ValueError("`train_steps` is required when `train_fn` is " - "provided.") - if steps_per_loop is None: - raise ValueError("`steps_per_loop` is required when `train_fn is " - "provided.") - if not isinstance(steps_per_loop, int) or steps_per_loop < 1: - raise ValueError("`steps_per_loop` should be a positive integer") - if summary_interval is not None and summary_interval <= 0: - raise ValueError("`summary_interval` should be larger than 0") - - self.strategy = strategy or tf.distribute.get_strategy() - - self.train_fn = train_fn - self.eval_fn = eval_fn - self.global_step = global_step - self.checkpoint_manager = checkpoint_manager - - if self.train_fn is not None: - self.train_steps = train_steps - self.steps_per_loop = steps_per_loop - if summary_dir: - self.summary_dir = summary_dir - elif checkpoint_manager: - self.summary_dir = checkpoint_manager.directory - else: - self.summary_dir = None - - self.summary_interval = summary_interval - if self.summary_dir and self.summary_interval: - summary_writer = tf.summary.create_file_writer(self.summary_dir) - else: - summary_writer = None - # TODO(rxsang): Consider pass SummaryManager directly into Controller for - # maximum customizability. - self.summary_manager = utils.SummaryManager( - summary_writer, - tf.summary.scalar, - global_step=self.global_step, - summary_interval=self.summary_interval) - - if self.eval_fn is not None: - eval_summary_dir = eval_summary_dir or self.summary_dir - eval_summary_writer = tf.summary.create_file_writer( - eval_summary_dir) if eval_summary_dir else None - self.eval_summary_manager = utils.SummaryManager( - eval_summary_writer, tf.summary.scalar, global_step=self.global_step) - - self.eval_steps = eval_steps - self.eval_interval = eval_interval - - # Creates and initializes the interval triggers. - self.eval_trigger = utils.IntervalTrigger(self.eval_interval, - self.global_step.numpy()) # pytype: disable=attribute-error - - if self.global_step: - tf.summary.experimental.set_step(self.global_step) - - # Restores the model if needed. - if self.checkpoint_manager is not None: - model_restored = self._restore_model() - if not model_restored and self.checkpoint_manager.checkpoint_interval: - # If the model is not restored from a checkpoint, save an initial - # checkpoint. - ckpt_path = self.checkpoint_manager.save( - checkpoint_number=self.global_step) - logging.info("Saved checkpoins in %s", ckpt_path) - - def _restore_model(self, checkpoint_path=None): - """Restore or initialize the model. - - Args: - checkpoint_path: An optional string indicates the checkpoint path to - restore. If None, will restore from `self.checkpoint_manager`. - - Returns: - True if the latest checkpoint is found or restored. Otherwise False. - """ - with self.strategy.scope(): - # Checkpoint restoring should be inside scope. b/139450638 - if checkpoint_path is not None: - self.checkpoint_manager.checkpoint.restore(checkpoint_path) - return True - return self.checkpoint_manager.restore_or_initialize() - - def _evaluate_once(self, current_step): - """Runs the evaluation once.""" - logging.info("Start evaluation at step: %s", current_step) - - with self.eval_summary_manager.summary_writer.as_default(): - eval_outputs = self.eval_fn(self.eval_steps) - - if eval_outputs: - eval_outputs = tf.nest.map_structure(lambda x: x.numpy(), eval_outputs) - - info = "step: {} evaluation metric: {}".format( - current_step, eval_outputs) - self._log_info(info) - - self.eval_summary_manager.write_summaries(eval_outputs) - self.eval_summary_manager.flush() - - def _maybe_save_checkpoints(self, current_step, force_trigger=False): - if self.checkpoint_manager and self.checkpoint_manager.checkpoint_interval: - ckpt_path = self.checkpoint_manager.save( - checkpoint_number=current_step, check_interval=not force_trigger) - if ckpt_path is not None: - logging.info("Saved checkpoins in %s", ckpt_path) - - def _maybe_evaluate(self, current_step, force_trigger=False): - if self.eval_trigger(current_step, force_trigger): - self._evaluate_once(current_step) - - def _log_info(self, message): - """Logs `message` to the `info` log, and also prints to stdout.""" - logging.info(message) - print(message) - - def train(self, evaluate=True): - """Runs the training, with optional evaluation. - - This handles evaluation, gathering summaries, and saving checkpoints. - - Args: - evaluate: A boolean indicates whether to perform evaluation during - training. - - Raises: - RuntimeError: If `global_step` is not updated correctly in `train_fn`. - """ - if self.train_fn is None: - raise ValueError("`self.train_fn` is required when calling `train` " - "method.") - if self.global_step is None: - raise ValueError("`self.global_step` is required when calling `train` " - "method.") - if evaluate and self.eval_fn is None: - raise ValueError("`self.eval_fn` is required when calling `train` method " - "with `evaluate=True`") - - step_timer = _StepTimer(self.global_step) - current_step = self.global_step.numpy() - logging.info("Train at step %s of %s", current_step, self.train_steps) - while current_step < self.train_steps: - # Calculates steps to run for the next train loop. - steps_per_loop = min(self.train_steps - current_step, self.steps_per_loop) - logging.info("Entering training loop with %s steps, at step %s of %s", - steps_per_loop, current_step, self.train_steps) - current_step += steps_per_loop - steps_per_loop = tf.convert_to_tensor(steps_per_loop, dtype=tf.int32) - - with self.summary_manager.summary_writer.as_default(): - train_outputs = self.train_fn(steps_per_loop) - - # Updates and verifies the current step after a training loop finishes. - if current_step != self.global_step.numpy(): - raise RuntimeError("`self.train_fn` is not updating `global_step` " - "correctly, expected: %s, actual: %s" % - (current_step, self.global_step.numpy())) - - # Print information like metrics and steps_per_second after a training - # loop. - if train_outputs: - train_outputs = tf.nest.map_structure( - lambda x: x.numpy(), train_outputs) - steps_per_second = step_timer.steps_per_second() - info = "step: {} steps_per_second: {:.2f} {}".format( - current_step, steps_per_second, train_outputs) - self._log_info(info) - - train_outputs = train_outputs or {} - train_outputs["steps_per_second"] = steps_per_second - self.summary_manager.write_summaries(train_outputs) - - self._maybe_save_checkpoints(current_step) - - if evaluate: - self._maybe_evaluate(current_step) - - self.summary_manager.write_summaries(train_outputs, always_write=True) - self.summary_manager.flush() - self._maybe_save_checkpoints(current_step, force_trigger=True) - if evaluate: - self._maybe_evaluate(current_step, force_trigger=True) - - def evaluate(self, continuous=False, timeout_fn=None): - """Runs the evaluation. - - Args: - continuous: If `True`, will continously monitor the checkpoint directory - to evaluate on the latest checkpoint. If `False`, will do the evaluation - once. - timeout_fn: Optional callable to call after a timeout. If the function - returns True, then it means that no new checkpoints will be generated - and the iterator will exit. - - Raises: - ValueError: If no checkpoint found in `self.checkpoint_manager.directory`. - """ - if self.eval_fn is None: - raise ValueError("`self.eval_fn` should not be None to call " - "`evaluate()` method.") - - if not continuous and timeout_fn is not None: - raise ValueError("`timeout_fn` can be only passed when `continuous` is " - "True") - - if continuous: - for checkpoint_path in tf.train.checkpoints_iterator( - self.checkpoint_manager.directory, timeout_fn=timeout_fn): - self._restore_model(checkpoint_path) - self._evaluate_once(self.global_step.numpy()) - return - - latest_checkpoint = self.checkpoint_manager.latest_checkpoint - if not latest_checkpoint: - raise ValueError("no checkpoint found in dir %s" % - self.checkpoint_manager.directory) - self._restore_model() - self._evaluate_once(self.global_step.numpy()) - - -class _StepTimer(object): - """Utility class for measuring steps/second.""" - - def __init__(self, step): - self.step = step - self.start() - - def start(self): - self.last_iteration = self.step.numpy() - self.last_time = time.time() - - def steps_per_second(self, restart=True): - value = ((self.step.numpy() - self.last_iteration) / - (time.time() - self.last_time)) - if restart: - self.start() - return value diff --git a/official/staging/training/controller_test.py b/official/staging/training/controller_test.py deleted file mode 100644 index eeaa191c04d40fcc108ed7b00dec86d30d5a2a0b..0000000000000000000000000000000000000000 --- a/official/staging/training/controller_test.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for official.staging.training.controller.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import strategy_combinations -from official.staging.training import controller -from official.staging.training import standard_runnable - - -def all_strategy_combinations(): - """Gets combinations of distribution strategies.""" - return combinations.combine( - strategy=[ - strategy_combinations.one_device_strategy, - strategy_combinations.tpu_strategy, - strategy_combinations.one_device_strategy_gpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode="eager", - ) - - -def create_model(): - x = tf.keras.layers.Input(shape=(3,), name="input") - y = tf.keras.layers.Dense(4, name="dense")(x) - model = tf.keras.Model(x, y) - return model - - -def summaries_with_matching_keyword(keyword, summary_dir): - """Yields summary protos matching given keyword from event file.""" - event_paths = tf.io.gfile.glob(os.path.join(summary_dir, "events*")) - for event in tf.compat.v1.train.summary_iterator(event_paths[-1]): - if event.summary is not None: - for value in event.summary.value: - if keyword in value.tag: - tf.compat.v1.logging.error(event) - yield event.summary - - -def check_eventfile_for_keyword(keyword, summary_dir): - """Checks event files for the keyword.""" - return any(summaries_with_matching_keyword(keyword, summary_dir)) - - -def dataset_fn(ctx): - del ctx - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10, drop_remainder=True) - return dataset - - -class TestRunnable(standard_runnable.StandardTrainable, - standard_runnable.StandardEvaluable): - """Implements the training and evaluation APIs for the test model.""" - - def __init__(self): - standard_runnable.StandardTrainable.__init__(self) - standard_runnable.StandardEvaluable.__init__(self) - self.strategy = tf.distribute.get_strategy() - self.model = create_model() - self.optimizer = tf.keras.optimizers.RMSprop() - self.global_step = self.optimizer.iterations - self.train_loss = tf.keras.metrics.Mean("train_loss", dtype=tf.float32) - self.eval_loss = tf.keras.metrics.Mean("eval_loss", dtype=tf.float32) - - def build_train_dataset(self): - return self.strategy.experimental_distribute_datasets_from_function( - dataset_fn) - - def train_step(self, iterator): - - def _replicated_step(inputs): - """Replicated training step.""" - inputs, targets = inputs - with tf.GradientTape() as tape: - outputs = self.model(inputs) - loss = tf.math.reduce_sum(outputs - targets) - grads = tape.gradient(loss, self.model.variables) - self.optimizer.apply_gradients(zip(grads, self.model.variables)) - self.train_loss.update_state(loss) - - self.strategy.run(_replicated_step, args=(next(iterator),)) - - def train_loop_end(self): - return { - "loss": self.train_loss.result(), - } - - def build_eval_dataset(self): - return self.strategy.experimental_distribute_datasets_from_function( - dataset_fn) - - def eval_begin(self): - self.eval_loss.reset_states() - - def eval_step(self, iterator): - - def _replicated_step(inputs): - """Replicated evaluation step.""" - inputs, targets = inputs - outputs = self.model(inputs) - loss = tf.math.reduce_sum(outputs - targets) - self.eval_loss.update_state(loss) - - self.strategy.run(_replicated_step, args=(next(iterator),)) - - def eval_end(self): - return { - "eval_loss": self.eval_loss.result(), - } - - -class ControllerTest(tf.test.TestCase, parameterized.TestCase): - - def setUp(self): - super(ControllerTest, self).setUp() - self.model_dir = self.get_temp_dir() - - def test_no_checkpoint(self): - test_runnable = TestRunnable() - # No checkpoint manager and no strategy. - test_controller = controller.Controller( - train_fn=test_runnable.train, - eval_fn=test_runnable.evaluate, - global_step=test_runnable.global_step, - train_steps=10, - steps_per_loop=2, - summary_dir=os.path.join(self.model_dir, "summaries/train"), - summary_interval=2, - eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"), - eval_steps=2, - eval_interval=5) - test_controller.train(evaluate=True) - self.assertEqual(test_runnable.global_step.numpy(), 10) - # Loss and accuracy values should be written into summaries. - self.assertNotEmpty( - tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train"))) - self.assertTrue( - check_eventfile_for_keyword( - "loss", os.path.join(self.model_dir, "summaries/train"))) - self.assertNotEmpty( - tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval"))) - self.assertTrue( - check_eventfile_for_keyword( - "eval_loss", os.path.join(self.model_dir, "summaries/eval"))) - # No checkpoint, so global step starts from 0. - test_runnable.global_step.assign(0) - test_controller.train(evaluate=True) - self.assertEqual(test_runnable.global_step.numpy(), 10) - - def test_no_checkpoint_and_summaries(self): - test_runnable = TestRunnable() - # No checkpoint + summary directories. - test_controller = controller.Controller( - train_fn=test_runnable.train, - eval_fn=test_runnable.evaluate, - global_step=test_runnable.global_step, - train_steps=10, - steps_per_loop=2, - eval_steps=2, - eval_interval=5) - test_controller.train(evaluate=True) - self.assertEqual(test_runnable.global_step.numpy(), 10) - - @combinations.generate(all_strategy_combinations()) - def test_train_and_evaluate(self, strategy): - with strategy.scope(): - test_runnable = TestRunnable() - - checkpoint = tf.train.Checkpoint( - model=test_runnable.model, optimizer=test_runnable.optimizer) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - self.model_dir, - max_to_keep=None, - step_counter=test_runnable.global_step, - checkpoint_interval=10) - test_controller = controller.Controller( - strategy=strategy, - train_fn=test_runnable.train, - eval_fn=test_runnable.evaluate, - global_step=test_runnable.global_step, - train_steps=10, - steps_per_loop=2, - summary_dir=os.path.join(self.model_dir, "summaries/train"), - summary_interval=2, - checkpoint_manager=checkpoint_manager, - eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"), - eval_steps=2, - eval_interval=5) - test_controller.train(evaluate=True) - - # Checkpoints are saved. - self.assertNotEmpty(tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt*"))) - - # Loss and accuracy values should be written into summaries. - self.assertNotEmpty( - tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train"))) - self.assertTrue( - check_eventfile_for_keyword( - "loss", os.path.join(self.model_dir, "summaries/train"))) - self.assertNotEmpty( - tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval"))) - self.assertTrue( - check_eventfile_for_keyword( - "eval_loss", os.path.join(self.model_dir, "summaries/eval"))) - - @combinations.generate(all_strategy_combinations()) - def test_train_only(self, strategy): - with strategy.scope(): - test_runnable = TestRunnable() - - checkpoint = tf.train.Checkpoint( - model=test_runnable.model, optimizer=test_runnable.optimizer) - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - self.model_dir, - max_to_keep=None, - step_counter=test_runnable.global_step, - checkpoint_interval=10) - test_controller = controller.Controller( - strategy=strategy, - train_fn=test_runnable.train, - global_step=test_runnable.global_step, - train_steps=10, - steps_per_loop=2, - summary_dir=os.path.join(self.model_dir, "summaries/train"), - summary_interval=2, - checkpoint_manager=checkpoint_manager, - eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"), - ) - test_controller.train(evaluate=False) - - # Checkpoints are saved. - self.assertNotEmpty(tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt*"))) - - # Only train summaries are written. - self.assertNotEmpty( - tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train"))) - self.assertTrue( - check_eventfile_for_keyword( - "loss", os.path.join(self.model_dir, "summaries/train"))) - self.assertFalse( - tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/eval"))) - - @combinations.generate(all_strategy_combinations()) - def test_evaluate_only(self, strategy): - with strategy.scope(): - test_runnable = TestRunnable() - - checkpoint = tf.train.Checkpoint(model=test_runnable.model) - checkpoint.save(os.path.join(self.model_dir, "ckpt")) - - checkpoint_manager = tf.train.CheckpointManager( - checkpoint, - self.model_dir, - max_to_keep=None, - step_counter=test_runnable.global_step) - test_controller = controller.Controller( - strategy=strategy, - eval_fn=test_runnable.evaluate, - global_step=test_runnable.global_step, - checkpoint_manager=checkpoint_manager, - summary_dir=os.path.join(self.model_dir, "summaries/train"), - eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"), - eval_steps=2, - eval_interval=5) - test_controller.evaluate() - - # Only eval summaries are written - self.assertFalse( - tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/train"))) - self.assertNotEmpty( - tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval"))) - self.assertTrue( - check_eventfile_for_keyword( - "eval_loss", os.path.join(self.model_dir, "summaries/eval"))) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/staging/training/grad_utils.py b/official/staging/training/grad_utils.py index efda2e7616e5ca841dae0877f951982371a44bba..48e7566ed9a78b0f93ac69de26c43664b05ea84b 100644 --- a/official/staging/training/grad_utils.py +++ b/official/staging/training/grad_utils.py @@ -14,14 +14,9 @@ # ============================================================================== """Some gradient util functions to help users writing custom training loop.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - from absl import logging -import tensorflow.compat.v2 as tf +import tensorflow as tf def _filter_grads(grads_and_vars): @@ -48,7 +43,8 @@ def _filter_grads(grads_and_vars): def _filter_and_allreduce_gradients(grads_and_vars, - allreduce_precision="float32"): + allreduce_precision="float32", + bytes_per_pack=0): """Filter None grads and then allreduce gradients in specified precision. This utils function is used when users intent to explicitly allreduce @@ -56,9 +52,11 @@ def _filter_and_allreduce_gradients(grads_and_vars, The allreduced gradients are then passed to optimizer.apply_gradients( experimental_aggregate_gradients=False). - Arguments: + Args: grads_and_vars: gradients and variables pairs. allreduce_precision: Whether to allreduce gradients in float32 or float16. + bytes_per_pack: A non-negative integer. Breaks collective operations into + packs of certain size. If it's zero, all gradients are in one pack. Returns: pairs of allreduced non-None gradients and variables. @@ -67,8 +65,10 @@ def _filter_and_allreduce_gradients(grads_and_vars, (grads, variables) = zip(*filtered_grads_and_vars) if allreduce_precision == "float16": grads = [tf.cast(grad, "float16") for grad in grads] - allreduced_grads = tf.distribute.get_replica_context().all_reduce( - tf.distribute.ReduceOp.SUM, grads) + hints = tf.distribute.experimental.CommunicationOptions( + bytes_per_pack=bytes_per_pack) + allreduced_grads = tf.distribute.get_strategy( # pylint: disable=protected-access + ).extended._replica_ctx_all_reduce(tf.distribute.ReduceOp.SUM, grads, hints) if allreduce_precision == "float16": allreduced_grads = [tf.cast(grad, "float32") for grad in allreduced_grads] return allreduced_grads, variables @@ -85,7 +85,8 @@ def minimize_using_explicit_allreduce(tape, loss, trainable_variables, pre_allreduce_callbacks=None, - post_allreduce_callbacks=None): + post_allreduce_callbacks=None, + allreduce_bytes_per_pack=0): """Minimizes loss for one step by updating `trainable_variables`. Minimizes loss for one step by updating `trainable_variables`. @@ -95,7 +96,7 @@ def minimize_using_explicit_allreduce(tape, For TPU and GPU training using FP32, explicit allreduce will aggregate gradients in FP32 format. - Arguments: + Args: tape: An instance of `tf.GradientTape`. optimizer: An instance of `tf.keras.optimizers.Optimizer`. loss: the loss tensor. @@ -103,17 +104,20 @@ def minimize_using_explicit_allreduce(tape, pre_allreduce_callbacks: A list of callback functions that takes gradients and model variables pairs as input, manipulate them, and returns a new gradients and model variables pairs. The callback functions will be - invoked in the list order and before gradients are allreduced. - With mixed precision training, the pre_allreduce_allbacks will be - applied on scaled_gradients. Default is no callbacks. + invoked in the list order and before gradients are allreduced. With + mixed precision training, the pre_allreduce_allbacks will be applied on + scaled_gradients. Default is no callbacks. post_allreduce_callbacks: A list of callback functions that takes gradients and model variables pairs as input, manipulate them, and returns a new gradients and model variables paris. The callback functions will be invoked in the list order and right before gradients are applied to variables for updates. Default is no callbacks. + allreduce_bytes_per_pack: A non-negative integer. Breaks collective + operations into packs of certain size. If it's zero, all gradients are + in one pack. """ if isinstance(optimizer, - tf.keras.mixed_precision.experimental.LossScaleOptimizer): + tf.keras.mixed_precision.LossScaleOptimizer): # FP16 GPU code path with tape: scaled_loss = optimizer.get_scaled_loss(loss) @@ -123,7 +127,9 @@ def minimize_using_explicit_allreduce(tape, grads_and_vars = _run_callbacks(pre_allreduce_callbacks, grads_and_vars) (allreduced_scaled_grads, filtered_training_vars) = _filter_and_allreduce_gradients( - grads_and_vars, allreduce_precision="float16") + grads_and_vars, + allreduce_precision="float16", + bytes_per_pack=allreduce_bytes_per_pack) allreduced_unscaled_grads = optimizer.get_unscaled_gradients( allreduced_scaled_grads) grads_and_vars = zip(allreduced_unscaled_grads, filtered_training_vars) @@ -135,7 +141,9 @@ def minimize_using_explicit_allreduce(tape, grads_and_vars = _run_callbacks(pre_allreduce_callbacks, grads_and_vars) (allreduced_grads, filtered_training_vars) = _filter_and_allreduce_gradients( - grads_and_vars, allreduce_precision="float32") + grads_and_vars, + allreduce_precision="float32", + bytes_per_pack=allreduce_bytes_per_pack) grads_and_vars = zip(allreduced_grads, filtered_training_vars) if post_allreduce_callbacks: grads_and_vars = _run_callbacks(post_allreduce_callbacks, grads_and_vars) diff --git a/official/staging/training/runnable.py b/official/staging/training/runnable.py deleted file mode 100644 index 1af6eca06a337506a68d6329e0da16c9ca095e0a..0000000000000000000000000000000000000000 --- a/official/staging/training/runnable.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""An abstraction that users can easily handle their custom training loops.""" - -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import abc -import six -import tensorflow.compat.v2 as tf -from typing import Dict, Optional, Text - - -@six.add_metaclass(abc.ABCMeta) -class AbstractTrainable(tf.Module): - """An abstract class defining the APIs required for training.""" - - @abc.abstractmethod - def train(self, - num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """Implements model training with multiple steps. - - In training, it is common to break the total training steps into several - training loops, so users can do checkpointing, write summaries and run some - python callbacks. This is necessary for getting good performance in TPU - training, as the overhead for launching a multi worker tf.function may be - large in Eager mode. It is usually encouraged to create a host training loop - (e.g. using a `tf.range` wrapping `strategy.run` inside a - `tf.function`) in the TPU case. For the cases that don't require host - training loop to acheive peak performance, users can just implement a simple - python loop to drive each step. - - Args: - num_steps: A guideline for how many training steps to run. Note that it is - up to the model what constitutes a "step" (this may involve more than - one update to model parameters, e.g. if training a GAN). - - Returns: - The function may return a dictionary of `Tensors`, which will be - written to logs and as TensorBoard summaries. - """ - pass - - -@six.add_metaclass(abc.ABCMeta) -class AbstractEvaluable(tf.Module): - """An abstract class defining the APIs required for evaluation.""" - - @abc.abstractmethod - def evaluate( - self, num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """Implements model evaluation. - - Args: - num_steps: A guideline for how many evaluation steps to run. Note that it - is up to the model what constitutes a "step". Generally, it may be - desirable to support both a limited number of eval steps and iterating - over a full dataset (however many steps are required) when `num_steps` - is `None`. - - Returns: - The function may return a dictionary of `Tensors`, which will be - written to logs and as TensorBoard summaries. - """ - pass diff --git a/official/staging/training/standard_runnable.py b/official/staging/training/standard_runnable.py deleted file mode 100644 index 20dd66f28e44f7b799dff4af826dcb22bb13595a..0000000000000000000000000000000000000000 --- a/official/staging/training/standard_runnable.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""An abstraction that users can easily handle their custom training loops.""" - -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import abc -import six -import tensorflow.compat.v2 as tf -from typing import Dict, Optional, Text - -from official.staging.training import runnable -from official.staging.training import utils - - -@six.add_metaclass(abc.ABCMeta) -class StandardTrainable(runnable.AbstractTrainable): - """Implements the standard functionality of AbstractTrainable APIs.""" - - def __init__(self, use_tf_while_loop=True, use_tf_function=True): - if use_tf_while_loop and not use_tf_function: - raise ValueError("`use_tf_while_loop=True` and `use_tf_function=False` " - "is not supported") - self.use_tf_while_loop = use_tf_while_loop - self.use_tf_function = use_tf_function - self.train_dataset = None - self.train_iter = None - self.train_loop_fn = None - - @abc.abstractmethod - def build_train_dataset(self): - """Builds the training datasets. - - Returns: - A tf.nest-compatible structure of tf.data.Dataset or DistributedDataset. - """ - pass - - def train(self, - num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """See base class.""" - if self.train_dataset is None: - # Build train input dataset - self.train_dataset = self.build_train_dataset() - self.train_iter = tf.nest.map_structure(iter, self.train_dataset) - - if self.train_loop_fn is None: - train_fn = self.train_step - if self.use_tf_while_loop: - self.train_loop_fn = utils.create_tf_while_loop_fn(train_fn) - else: - if self.use_tf_function: - train_fn = tf.function(train_fn) - self.train_loop_fn = utils.create_loop_fn(train_fn) - - self.train_loop_begin() - self.train_loop_fn(self.train_iter, num_steps) - return self.train_loop_end() - - def train_loop_begin(self): - """Called once at the beginning of the training loop. - - This is a good place to reset metrics that accumulate values over multiple - steps of training. - """ - pass - - @abc.abstractmethod - def train_step(self, iterator): - """Implements one step of training. - - What a "step" consists of is up to the implementer. If using distribution - strategies, the call to this method should take place in the "cross-replica - context" for generality, to allow e.g. multiple iterator dequeues and calls - to `strategy.run`. - - Args: - iterator: A tf.nest-compatible structure of tf.data Iterator or - DistributedIterator. - """ - pass - - def train_loop_end(self) -> Optional[Dict[Text, tf.Tensor]]: - """Called at the end of the training loop. - - This is a good place to get metric results. The value returned from this - function will be returned as-is from the train() method. - - Returns: - The function may return a dictionary of `Tensors`, which will be - written to logs and as TensorBoard summaries. - """ - pass - - -@six.add_metaclass(abc.ABCMeta) -class StandardEvaluable(runnable.AbstractEvaluable): - """Implements the standard functionality of AbstractEvaluable APIs.""" - - def __init__(self, use_tf_function=True): - self.eval_use_tf_function = use_tf_function - self.eval_dataset = None - self.eval_loop_fn = None - - @abc.abstractmethod - def build_eval_dataset(self): - """Builds the evaluation datasets. - - Returns: - A tf.nest-compatible structure of tf.data.Dataset or DistributedDataset. - """ - pass - - def evaluate( - self, num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """See base class.""" - if self.eval_dataset is None: - # Build train input dataset - self.eval_dataset = self.build_eval_dataset() - - if self.eval_loop_fn is None: - eval_fn = self.eval_step - if self.eval_use_tf_function: - eval_fn = tf.function(eval_fn) - self.eval_loop_fn = utils.create_loop_fn(eval_fn) - - eval_iter = tf.nest.map_structure(iter, self.eval_dataset) - - self.eval_begin() - self.eval_loop_fn(eval_iter, num_steps) - return self.eval_end() - - def eval_begin(self): - """Called once at the beginning of the evaluation. - - This is a good place to reset metrics that accumulate values over the entire - evaluation. - """ - pass - - @abc.abstractmethod - def eval_step(self, iterator): - """Implements one step of evaluation. - - What a "step" consists of is up to the implementer. If using distribution - strategies, the call to this method should take place in the "cross-replica - context" for generality, to allow e.g. multiple iterator dequeues and calls - to `strategy.run`. - - Args: - iterator: A tf.nest-compatible structure of tf.data Iterator or - DistributedIterator. - """ - pass - - def eval_end(self) -> Optional[Dict[Text, tf.Tensor]]: - """Called at the end of the evaluation. - - This is a good place to get metric results. The value returned from this - function will be returned as-is from the evaluate() method. - - Returns: - The function may return a dictionary of `Tensors`, which will be - written to logs and as TensorBoard summaries. - """ - pass diff --git a/official/staging/training/utils.py b/official/staging/training/utils.py deleted file mode 100644 index 33fa368b7b966e449c8309e523cd31db73efb978..0000000000000000000000000000000000000000 --- a/official/staging/training/utils.py +++ /dev/null @@ -1,342 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Some layered modules/functions to help users writing custom training loop.""" - -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import abc -import inspect -import six - -import tensorflow.compat.v2 as tf - - -def create_loop_fn(step_fn): - """Creates a multiple steps function driven by the python while loop. - - Args: - step_fn: A function which takes `iterator` as input. - - Returns: - A callable defined as the `loop_fn` defination below. - """ - - def loop_fn(iterator, num_steps, state=None, reduce_fn=None): - """A loop function with multiple steps. - - Args: - iterator: A nested structure of tf.data `Iterator` or - `DistributedIterator`. - num_steps: The number of steps in the loop. If `num_steps==-1`, will - iterate until exausting the iterator. - state: An optional initial state before running the loop. - reduce_fn: a callable defined as `def reduce_fn(state, value)`, where - `value` is the outputs from `step_fn`. - - Returns: - The updated state. - """ - try: - step = 0 - # To make sure the OutOfRangeError exception can be handled well with - # async remote eager, we need to wrap the loop body in a `async_scope`. - with tf.experimental.async_scope(): - while (num_steps == -1 or step < num_steps): - outputs = step_fn(iterator) - if reduce_fn is not None: - state = reduce_fn(state, outputs) - step += 1 - return state - except (StopIteration, tf.errors.OutOfRangeError): - tf.experimental.async_clear_error() - return state - - return loop_fn - - -def create_tf_while_loop_fn(step_fn): - """Create a multiple steps function driven by tf.while_loop on the host. - - Args: - step_fn: A function which takes `iterator` as input. - - Returns: - A callable defined as the `loop_fn` defination below. - """ - - @tf.function - def loop_fn(iterator, num_steps): - """A loop function with multiple steps. - - Args: - iterator: A nested structure of tf.data `Iterator` or - `DistributedIterator`. - num_steps: The number of steps in the loop. Must be a tf.Tensor. - """ - if not isinstance(num_steps, tf.Tensor): - raise ValueError("`num_steps` should be an `tf.Tensor`. Python object " - "may cause retracing.") - - for _ in tf.range(num_steps): - step_fn(iterator) - - return loop_fn - - -def make_distributed_dataset(strategy, dataset_or_fn, *args, **kwargs): - """A helper function to create distributed dataset. - - Args: - strategy: An instance of `tf.distribute.Strategy`. - dataset_or_fn: A instance of `tf.data.Dataset` or a function which takes an - `tf.distribute.InputContext` as input and returns a `tf.data.Dataset`. If - it is a function, it could optionally have an argument named - `input_context` which is `tf.distribute.InputContext` argument type. - *args: The list of arguments to be passed to dataset_or_fn. - **kwargs: Any keyword arguments to be passed. - - Returns: - A distributed Dataset. - """ - if strategy is None: - strategy = tf.distribute.get_strategy() - - if isinstance(dataset_or_fn, tf.data.Dataset): - return strategy.experimental_distribute_dataset(dataset_or_fn) - - if not callable(dataset_or_fn): - raise ValueError("`dataset_or_fn` should be either callable or an instance " - "of `tf.data.Dataset`") - - def dataset_fn(ctx): - """Wrapped dataset function for creating distributed dataset..""" - - # If `dataset_or_fn` is a function and has `input_context` as argument - # names, pass `ctx` as the value of `input_context` when calling - # `dataset_or_fn`. Otherwise `ctx` will not be used when calling - # `dataset_or_fn`. - if six.PY3: - argspec = inspect.getfullargspec(dataset_or_fn) - else: - argspec = inspect.getargspec(dataset_or_fn) - args_names = argspec.args - - if "input_context" in args_names: - kwargs["input_context"] = ctx - ds = dataset_or_fn(*args, **kwargs) - return ds - - return strategy.experimental_distribute_datasets_from_function(dataset_fn) - - -class SummaryManager(object): - """A class manages writing summaries.""" - - def __init__(self, - summary_writer, - summary_fn, - global_step=None, - summary_interval=None): - """Construct a summary manager object. - - Args: - summary_writer: A `tf.summary.SummaryWriter` instance for writing - summaries. - summary_fn: A callable defined as `def summary_fn(name, tensor, - step=None)`, which describes the summary operation. - global_step: A `tf.Variable` instance for checking the current global step - value, in case users want to save summaries every N steps. - summary_interval: An integer, indicates the minimum step interval between - two summaries. - """ - if summary_writer is not None: - self._summary_writer = summary_writer - self._enabled = True - else: - self._summary_writer = tf.summary.create_noop_writer() - self._enabled = False - self._summary_fn = summary_fn - - if global_step is None: - self._global_step = tf.summary.experimental.get_step() - else: - self._global_step = global_step - - if summary_interval is not None: - if self._global_step is None: - raise ValueError("`summary_interval` is not None, but no `global_step` " - "can be obtained ") - self._last_summary_step = self._global_step.numpy() - self._summary_interval = summary_interval - - @property - def summary_interval(self): - return self._summary_interval - - @property - def summary_writer(self): - """Returns the underlying summary writer.""" - return self._summary_writer - - def flush(self): - """Flush the underlying summary writer.""" - if self._enabled: - tf.summary.flush(self._summary_writer) - - def write_summaries(self, items, always_write=True): - """Write a bulk of summaries. - - Args: - items: a dictionary of `Tensors` for writing summaries. - always_write: An optional boolean. If `True`, the manager will always - write summaries unless the summaries have been written for the same - step. Otherwise the manager will only write the summaries if the - interval between summaries are larger than `summary_interval`. - - Returns: - A boolean indicates whether the summaries are written or not. - """ - # TODO(rxsang): Support writing summaries with nested structure, so users - # can split the summaries into different directories for nicer visualization - # in Tensorboard, like train and eval metrics. - if not self._enabled: - return False - - if self._summary_interval is not None: - current_step = self._global_step.numpy() - if current_step == self._last_summary_step: - return False - if not always_write and current_step < (self._last_summary_step + - self._summary_interval): - return False - self._last_summary_step = current_step - - with self._summary_writer.as_default(): - for name, tensor in items.items(): - self._summary_fn(name, tensor, step=self._global_step) - return True - - -@six.add_metaclass(abc.ABCMeta) -class Trigger(object): - """An abstract class representing a "trigger" for some event.""" - - @abc.abstractmethod - def __call__(self, value: float, force_trigger=False): - """Maybe trigger the event based on the given value. - - Args: - value: the value for triggering. - force_trigger: Whether the trigger is forced triggered. - - Returns: - `True` if the trigger is triggered on the given `value`, and - `False` otherwise. - """ - - @abc.abstractmethod - def reset(self): - """Reset states in the trigger.""" - - -class IntervalTrigger(Trigger): - """Triggers on every fixed interval.""" - - def __init__(self, interval, start=0): - """Constructs the IntervalTrigger. - - Args: - interval: The triggering interval. - start: An initial value for the trigger. - """ - self._interval = interval - self._last_trigger_value = start - - def __call__(self, value, force_trigger=False): - """Maybe trigger the event based on the given value. - - Args: - value: the value for triggering. - force_trigger: If True, the trigger will be forced triggered unless the - last trigger value is equal to `value`. - - Returns: - `True` if the trigger is triggered on the given `value`, and - `False` otherwise. - """ - if force_trigger and value != self._last_trigger_value: - self._last_trigger_value = value - return True - - if self._interval and self._interval > 0: - if value >= self._last_trigger_value + self._interval: - self._last_trigger_value = value - return True - return False - - def reset(self): - """See base class.""" - self._last_trigger_value = 0 - - -class EpochHelper(object): - """A Helper class to handle epochs in Customized Training Loop.""" - - def __init__(self, epoch_steps, global_step): - """Constructs the EpochHelper. - - Args: - epoch_steps: An integer indicates how many steps in an epoch. - global_step: A `tf.Variable` instance indicates the current global step. - """ - self._epoch_steps = epoch_steps - self._global_step = global_step - self._current_epoch = None - self._epoch_start_step = None - self._in_epoch = False - - def epoch_begin(self): - """Returns whether a new epoch should begin.""" - if self._in_epoch: - return False - current_step = self._global_step.numpy() - self._epoch_start_step = current_step - self._current_epoch = current_step // self._epoch_steps - self._in_epoch = True - return True - - def epoch_end(self): - """Returns whether the current epoch should end.""" - if not self._in_epoch: - raise ValueError("`epoch_end` can only be called inside an epoch") - current_step = self._global_step.numpy() - epoch = current_step // self._epoch_steps - - if epoch > self._current_epoch: - self._in_epoch = False - return True - return False - - @property - def batch_index(self): - """Index of the next batch within the current epoch.""" - return self._global_step.numpy() - self._epoch_start_step - - @property - def current_epoch(self): - return self._current_epoch diff --git a/official/utils/__init__.py b/official/utils/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/utils/__init__.py +++ b/official/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/utils/docs/build_api_docs_lib.py b/official/utils/docs/build_api_docs_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..0bff8b0117770c5ea70b105d37aa06b20d4823b5 --- /dev/null +++ b/official/utils/docs/build_api_docs_lib.py @@ -0,0 +1,54 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Common library for API docs builder.""" + +import tensorflow as tf +from tensorflow_docs.api_generator import doc_controls + + +def hide_module_model_and_layer_methods(): + """Hide methods and properties defined in the base classes of Keras layers. + + We hide all methods and properties of the base classes, except: + - `__init__` is always documented. + - `call` is always documented, as it can carry important information for + complex layers. + """ + module_contents = list(tf.Module.__dict__.items()) + model_contents = list(tf.keras.Model.__dict__.items()) + layer_contents = list(tf.keras.layers.Layer.__dict__.items()) + + for name, obj in module_contents + layer_contents + model_contents: + if name == '__init__': + # Always document __init__. + continue + + if name == 'call': + # Always document `call`. + if hasattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS): # pylint: disable=protected-access + delattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS) # pylint: disable=protected-access + continue + + # Otherwise, exclude from documentation. + if isinstance(obj, property): + obj = obj.fget + + if isinstance(obj, (staticmethod, classmethod)): + obj = obj.__func__ + + try: + doc_controls.do_not_doc_in_subclasses(obj) + except AttributeError: + pass diff --git a/official/utils/docs/build_nlp_api_docs.py b/official/utils/docs/build_nlp_api_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..25e3dda02ae2e42ef170d63133fb496dd39917fa --- /dev/null +++ b/official/utils/docs/build_nlp_api_docs.py @@ -0,0 +1,95 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Tool to generate api_docs for tensorflow_models/official library. + +Example: + +$> pip install -U git+https://github.com/tensorflow/docs +$> python build_nlp_api_docs \ + --output_dir=/tmp/api_docs +""" + +import os + +from absl import app +from absl import flags +from absl import logging +from tensorflow_docs.api_generator import generate_lib +from tensorflow_docs.api_generator import public_api + +from official.nlp import modeling as tfnlp +import build_api_docs_lib + +FLAGS = flags.FLAGS + +flags.DEFINE_string('output_dir', None, 'Where to write the resulting docs to.') +flags.DEFINE_string( + 'code_url_prefix', + 'https://github.com/tensorflow/models/blob/master/official/nlp/modeling/', + 'The url prefix for links to code.') + +flags.DEFINE_bool('search_hints', True, + 'Include metadata search hints in the generated files') + +flags.DEFINE_string('site_path', '/api_docs/python', + 'Path prefix in the _toc.yaml') + +flags.DEFINE_bool('gen_report', False, + 'Generate an API report containing the health of the ' + 'docstrings of the public API.') + +PROJECT_SHORT_NAME = 'tfnlp' +PROJECT_FULL_NAME = 'TensorFlow Official Models - NLP Modeling Library' + + +def gen_api_docs(code_url_prefix, site_path, output_dir, gen_report, + project_short_name, project_full_name, search_hints): + """Generates api docs for the tensorflow docs package.""" + build_api_docs_lib.hide_module_model_and_layer_methods() + del tfnlp.layers.MultiHeadAttention + del tfnlp.layers.EinsumDense + + doc_generator = generate_lib.DocGenerator( + root_title=project_full_name, + py_modules=[(project_short_name, tfnlp)], + base_dir=os.path.dirname(tfnlp.__file__), + code_url_prefix=code_url_prefix, + search_hints=search_hints, + site_path=site_path, + gen_report=gen_report, + callbacks=[public_api.explicit_package_contents_filter], + ) + + doc_generator.build(output_dir) + logging.info('Output docs to: %s', output_dir) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + gen_api_docs( + code_url_prefix=FLAGS.code_url_prefix, + site_path=FLAGS.site_path, + output_dir=FLAGS.output_dir, + gen_report=FLAGS.gen_report, + project_short_name=PROJECT_SHORT_NAME, + project_full_name=PROJECT_FULL_NAME, + search_hints=FLAGS.search_hints) + + +if __name__ == '__main__': + flags.mark_flag_as_required('output_dir') + app.run(main) diff --git a/official/utils/docs/build_vision_api_docs.py b/official/utils/docs/build_vision_api_docs.py new file mode 100644 index 0000000000000000000000000000000000000000..095e04c693d58f813c6871e0d2e304609b9c88ae --- /dev/null +++ b/official/utils/docs/build_vision_api_docs.py @@ -0,0 +1,93 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Tool to generate api_docs for tensorflow_models/official library. + +Example: + +$> pip install -U git+https://github.com/tensorflow/docs +$> python build_vision_api_docs \ + --output_dir=/tmp/api_docs +""" + +import os + +from absl import app +from absl import flags +from absl import logging +from tensorflow_docs.api_generator import generate_lib +from tensorflow_docs.api_generator import public_api + +import build_api_docs_lib +from official.vision.beta import modeling as tfvision + +FLAGS = flags.FLAGS + +flags.DEFINE_string('output_dir', None, 'Where to write the resulting docs to.') +flags.DEFINE_string( + 'code_url_prefix', + 'https://github.com/tensorflow/models/blob/master/official/vision/beta/modeling/', + 'The url prefix for links to code.') + +flags.DEFINE_bool('search_hints', True, + 'Include metadata search hints in the generated files') + +flags.DEFINE_string('site_path', 'tfvision/api_docs/python', + 'Path prefix in the _toc.yaml') + +flags.DEFINE_bool('gen_report', False, + 'Generate an API report containing the health of the ' + 'docstrings of the public API.') + +PROJECT_SHORT_NAME = 'tfvision' +PROJECT_FULL_NAME = 'TensorFlow Official Models - Vision Modeling Library' + + +def gen_api_docs(code_url_prefix, site_path, output_dir, gen_report, + project_short_name, project_full_name, search_hints): + """Generates api docs for the tensorflow docs package.""" + build_api_docs_lib.hide_module_model_and_layer_methods() + + doc_generator = generate_lib.DocGenerator( + root_title=project_full_name, + py_modules=[(project_short_name, tfvision)], + base_dir=os.path.dirname(tfvision.__file__), + code_url_prefix=code_url_prefix, + search_hints=search_hints, + site_path=site_path, + gen_report=gen_report, + callbacks=[public_api.explicit_package_contents_filter], + ) + + doc_generator.build(output_dir) + logging.info('Output docs to: %s', output_dir) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + gen_api_docs( + code_url_prefix=FLAGS.code_url_prefix, + site_path=FLAGS.site_path, + output_dir=FLAGS.output_dir, + gen_report=FLAGS.gen_report, + project_short_name=PROJECT_SHORT_NAME, + project_full_name=PROJECT_FULL_NAME, + search_hints=FLAGS.search_hints) + + +if __name__ == '__main__': + flags.mark_flag_as_required('output_dir') + app.run(main) diff --git a/official/utils/flags/README.md b/official/utils/flags/README.md index 18160f780a0928a2f28ab9a8e66433938179d581..beb3b2a1e1d8f60feceab52caf25b792902b178e 100644 --- a/official/utils/flags/README.md +++ b/official/utils/flags/README.md @@ -1,4 +1,9 @@ # Adding Abseil (absl) flags quickstart + +**WARNING** This module is deprecated. We no long use it in new models and +your projects should not depend on it. We will remove this module when +all models using it are deprecated which may take time. + ## Defining a flag absl flag definitions are similar to argparse, although they are defined on a global namespace. diff --git a/official/utils/flags/__init__.py b/official/utils/flags/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/utils/flags/__init__.py +++ b/official/utils/flags/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/utils/flags/_base.py b/official/utils/flags/_base.py index a143e078200eac12b0836eb32fa7c7d0416a8e66..b8e1dc09a9dc49f5a50c2e3640f9974a00edf042 100644 --- a/official/utils/flags/_base.py +++ b/official/utils/flags/_base.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Flags which will be nearly universal across models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Flags which will be nearly universal across models.""" from absl import flags import tensorflow as tf from official.utils.flags._conventions import help_wrap -def define_base(data_dir=True, model_dir=True, clean=False, train_epochs=False, - epochs_between_evals=False, stop_threshold=False, - batch_size=True, num_gpu=False, hooks=False, export_dir=False, - distribution_strategy=False, run_eagerly=False): +def define_base(data_dir=True, + model_dir=True, + clean=False, + train_epochs=False, + epochs_between_evals=False, + stop_threshold=False, + batch_size=True, + num_gpu=False, + hooks=False, + export_dir=False, + distribution_strategy=False, + run_eagerly=False): """Register base flags. Args: @@ -35,8 +39,8 @@ def define_base(data_dir=True, model_dir=True, clean=False, train_epochs=False, clean: Create a flag for removing the model_dir. train_epochs: Create a flag to specify the number of training epochs. epochs_between_evals: Create a flag to specify the frequency of testing. - stop_threshold: Create a flag to specify a threshold accuracy or other - eval metric which should trigger the end of training. + stop_threshold: Create a flag to specify a threshold accuracy or other eval + metric which should trigger the end of training. batch_size: Create a flag to specify the batch size. num_gpu: Create a flag to specify the number of GPUs used. hooks: Create a flag to specify hooks for logging. @@ -44,6 +48,7 @@ def define_base(data_dir=True, model_dir=True, clean=False, train_epochs=False, distribution_strategy: Create a flag to specify which Distribution Strategy to use. run_eagerly: Create a flag to specify to run eagerly op by op. + Returns: A list of flags for core.py to marks as key flags. """ @@ -51,38 +56,48 @@ def define_base(data_dir=True, model_dir=True, clean=False, train_epochs=False, if data_dir: flags.DEFINE_string( - name="data_dir", short_name="dd", default="/tmp", + name="data_dir", + short_name="dd", + default="/tmp", help=help_wrap("The location of the input data.")) key_flags.append("data_dir") if model_dir: flags.DEFINE_string( - name="model_dir", short_name="md", default="/tmp", + name="model_dir", + short_name="md", + default="/tmp", help=help_wrap("The location of the model checkpoint files.")) key_flags.append("model_dir") if clean: flags.DEFINE_boolean( - name="clean", default=False, + name="clean", + default=False, help=help_wrap("If set, model_dir will be removed if it exists.")) key_flags.append("clean") if train_epochs: flags.DEFINE_integer( - name="train_epochs", short_name="te", default=1, + name="train_epochs", + short_name="te", + default=1, help=help_wrap("The number of epochs used to train.")) key_flags.append("train_epochs") if epochs_between_evals: flags.DEFINE_integer( - name="epochs_between_evals", short_name="ebe", default=1, + name="epochs_between_evals", + short_name="ebe", + default=1, help=help_wrap("The number of training epochs to run between " "evaluations.")) key_flags.append("epochs_between_evals") if stop_threshold: flags.DEFINE_float( - name="stop_threshold", short_name="st", + name="stop_threshold", + short_name="st", default=None, help=help_wrap("If passed, training will stop at the earlier of " "train_epochs and when the evaluation metric is " @@ -90,7 +105,9 @@ def define_base(data_dir=True, model_dir=True, clean=False, train_epochs=False, if batch_size: flags.DEFINE_integer( - name="batch_size", short_name="bs", default=32, + name="batch_size", + short_name="bs", + default=32, help=help_wrap("Batch size for training and evaluation. When using " "multiple gpus, this is the global batch size for " "all devices. For example, if the batch size is 32 " @@ -100,49 +117,52 @@ def define_base(data_dir=True, model_dir=True, clean=False, train_epochs=False, if num_gpu: flags.DEFINE_integer( - name="num_gpus", short_name="ng", + name="num_gpus", + short_name="ng", default=1, - help=help_wrap( - "How many GPUs to use at each worker with the " - "DistributionStrategies API. The default is 1.")) + help=help_wrap("How many GPUs to use at each worker with the " + "DistributionStrategies API. The default is 1.")) if run_eagerly: flags.DEFINE_boolean( - name="run_eagerly", default=False, + name="run_eagerly", + default=False, help="Run the model op by op without building a model function.") if hooks: flags.DEFINE_list( - name="hooks", short_name="hk", default="LoggingTensorHook", + name="hooks", + short_name="hk", + default="LoggingTensorHook", help=help_wrap( u"A list of (case insensitive) strings to specify the names of " u"training hooks. Example: `--hooks ProfilerHook," u"ExamplesPerSecondHook`\n See hooks_helper " - u"for details.") - ) + u"for details.")) key_flags.append("hooks") if export_dir: flags.DEFINE_string( - name="export_dir", short_name="ed", default=None, + name="export_dir", + short_name="ed", + default=None, help=help_wrap("If set, a SavedModel serialization of the model will " "be exported to this directory at the end of training. " - "See the README for more details and relevant links.") - ) + "See the README for more details and relevant links.")) key_flags.append("export_dir") if distribution_strategy: flags.DEFINE_string( - name="distribution_strategy", short_name="ds", default="mirrored", + name="distribution_strategy", + short_name="ds", + default="mirrored", help=help_wrap("The Distribution Strategy to use for training. " "Accepted values are 'off', 'one_device', " "'mirrored', 'parameter_server', 'collective', " "case insensitive. 'off' means not to use " "Distribution Strategy; 'default' means to choose " "from `MirroredStrategy` or `OneDeviceStrategy` " - "according to the number of GPUs.") - ) - + "according to the number of GPUs.")) return key_flags diff --git a/official/utils/flags/_benchmark.py b/official/utils/flags/_benchmark.py index 5aa01421c5f5c7fede94b971d6674267f232b6da..abbe0a0b1a0ff990b00a677d320d8dffe8d22459 100644 --- a/official/utils/flags/_benchmark.py +++ b/official/utils/flags/_benchmark.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Flags for benchmarking models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Flags for benchmarking models.""" from absl import flags @@ -25,7 +21,8 @@ from official.utils.flags._conventions import help_wrap def define_log_steps(): flags.DEFINE_integer( - name="log_steps", default=100, + name="log_steps", + default=100, help="Frequency with which to log timing information with TimeHistory.") return [] @@ -45,13 +42,16 @@ def define_benchmark(benchmark_log_dir=True, bigquery_uploader=True): key_flags = [] flags.DEFINE_enum( - name="benchmark_logger_type", default="BaseBenchmarkLogger", + name="benchmark_logger_type", + default="BaseBenchmarkLogger", enum_values=["BaseBenchmarkLogger", "BenchmarkFileLogger"], help=help_wrap("The type of benchmark logger to use. Defaults to using " "BaseBenchmarkLogger which logs to STDOUT. Different " "loggers will require other flags to be able to work.")) flags.DEFINE_string( - name="benchmark_test_id", short_name="bti", default=None, + name="benchmark_test_id", + short_name="bti", + default=None, help=help_wrap("The unique test ID of the benchmark run. It could be the " "combination of key parameters. It is hardware " "independent and could be used compare the performance " @@ -63,34 +63,43 @@ def define_benchmark(benchmark_log_dir=True, bigquery_uploader=True): if benchmark_log_dir: flags.DEFINE_string( - name="benchmark_log_dir", short_name="bld", default=None, - help=help_wrap("The location of the benchmark logging.") - ) + name="benchmark_log_dir", + short_name="bld", + default=None, + help=help_wrap("The location of the benchmark logging.")) if bigquery_uploader: flags.DEFINE_string( - name="gcp_project", short_name="gp", default=None, + name="gcp_project", + short_name="gp", + default=None, help=help_wrap( "The GCP project name where the benchmark will be uploaded.")) flags.DEFINE_string( - name="bigquery_data_set", short_name="bds", default="test_benchmark", + name="bigquery_data_set", + short_name="bds", + default="test_benchmark", help=help_wrap( "The Bigquery dataset name where the benchmark will be uploaded.")) flags.DEFINE_string( - name="bigquery_run_table", short_name="brt", default="benchmark_run", + name="bigquery_run_table", + short_name="brt", + default="benchmark_run", help=help_wrap("The Bigquery table name where the benchmark run " "information will be uploaded.")) flags.DEFINE_string( - name="bigquery_run_status_table", short_name="brst", + name="bigquery_run_status_table", + short_name="brst", default="benchmark_run_status", help=help_wrap("The Bigquery table name where the benchmark run " "status information will be uploaded.")) flags.DEFINE_string( - name="bigquery_metric_table", short_name="bmt", + name="bigquery_metric_table", + short_name="bmt", default="benchmark_metric", help=help_wrap("The Bigquery table name where the benchmark metric " "information will be uploaded.")) @@ -98,7 +107,7 @@ def define_benchmark(benchmark_log_dir=True, bigquery_uploader=True): @flags.multi_flags_validator( ["benchmark_logger_type", "benchmark_log_dir"], message="--benchmark_logger_type=BenchmarkFileLogger will require " - "--benchmark_log_dir being set") + "--benchmark_log_dir being set") def _check_benchmark_log_dir(flags_dict): benchmark_logger_type = flags_dict["benchmark_logger_type"] if benchmark_logger_type == "BenchmarkFileLogger": diff --git a/official/utils/flags/_conventions.py b/official/utils/flags/_conventions.py index e04448ab81fc6db7fd8ba1650b427320ff00c05e..a42ff42a2a1d5fc5791f9fb4865cf403f6218767 100644 --- a/official/utils/flags/_conventions.py +++ b/official/utils/flags/_conventions.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Central location for shared argparse convention definitions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Central location for shared argparse convention definitions.""" import sys import codecs @@ -25,13 +21,12 @@ import functools from absl import app as absl_app from absl import flags - # This codifies help string conventions and makes it easy to update them if # necessary. Currently the only major effect is that help bodies start on the # line after flags are listed. All flag definitions should wrap the text bodies # with help wrap when calling DEFINE_*. -_help_wrap = functools.partial(flags.text_wrap, length=80, indent="", - firstline_indent="\n") +_help_wrap = functools.partial( + flags.text_wrap, length=80, indent="", firstline_indent="\n") # Pretty formatting causes issues when utf-8 is not installed on a system. @@ -46,6 +41,7 @@ def _stdout_utf8(): if _stdout_utf8(): help_wrap = _help_wrap else: + def help_wrap(text, *args, **kwargs): return _help_wrap(text, *args, **kwargs).replace(u"\ufeff", u"") diff --git a/official/utils/flags/_device.py b/official/utils/flags/_device.py index d8974fc48d1fc77d227745191579df16b2e46bcc..9d76f48717d77d6b02be0dd622f46de76c2c03f3 100644 --- a/official/utils/flags/_device.py +++ b/official/utils/flags/_device.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Flags for managing compute devices. Currently only contains TPU flags.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Flags for managing compute devices. Currently only contains TPU flags.""" from absl import flags from absl import logging @@ -26,11 +22,13 @@ from official.utils.flags._conventions import help_wrap def require_cloud_storage(flag_names): """Register a validator to check directory flags. + Args: flag_names: An iterable of strings containing the names of flags to be checked. """ msg = "TPU requires GCS path for {}".format(", ".join(flag_names)) + @flags.multi_flags_validator(["tpu"] + flag_names, message=msg) def _path_check(flag_values): # pylint: disable=missing-docstring if flag_values["tpu"] is None: @@ -47,8 +45,10 @@ def require_cloud_storage(flag_names): def define_device(tpu=True): """Register device specific flags. + Args: tpu: Create flags to specify TPU operation. + Returns: A list of flags for core.py to marks as key flags. """ @@ -57,7 +57,8 @@ def define_device(tpu=True): if tpu: flags.DEFINE_string( - name="tpu", default=None, + name="tpu", + default=None, help=help_wrap( "The Cloud TPU to use for training. This should be either the name " "used when creating the Cloud TPU, or a " @@ -66,20 +67,24 @@ def define_device(tpu=True): key_flags.append("tpu") flags.DEFINE_string( - name="tpu_zone", default=None, + name="tpu_zone", + default=None, help=help_wrap( "[Optional] GCE zone where the Cloud TPU is located in. If not " "specified, we will attempt to automatically detect the GCE " "project from metadata.")) flags.DEFINE_string( - name="tpu_gcp_project", default=None, + name="tpu_gcp_project", + default=None, help=help_wrap( "[Optional] Project name for the Cloud TPU-enabled project. If not " "specified, we will attempt to automatically detect the GCE " "project from metadata.")) - flags.DEFINE_integer(name="num_tpu_shards", default=8, - help=help_wrap("Number of shards (TPU chips).")) + flags.DEFINE_integer( + name="num_tpu_shards", + default=8, + help=help_wrap("Number of shards (TPU chips).")) return key_flags diff --git a/official/utils/flags/_distribution.py b/official/utils/flags/_distribution.py index ca331bf24affed5185273a19752d28a491ea3711..848e550cfed602cc692a975ab5e358fe2c638ddd 100644 --- a/official/utils/flags/_distribution.py +++ b/official/utils/flags/_distribution.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Flags related to distributed execution.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Flags related to distributed execution.""" from absl import flags import tensorflow as tf @@ -38,7 +34,8 @@ def define_distribution(worker_hosts=True, task_index=True): if worker_hosts: flags.DEFINE_string( - name='worker_hosts', default=None, + name='worker_hosts', + default=None, help=help_wrap( 'Comma-separated list of worker ip:port pairs for running ' 'multi-worker models with DistributionStrategy. The user would ' @@ -47,7 +44,8 @@ def define_distribution(worker_hosts=True, task_index=True): if task_index: flags.DEFINE_integer( - name='task_index', default=-1, + name='task_index', + default=-1, help=help_wrap('If multi-worker training, the task_index of this ' 'worker.')) diff --git a/official/utils/flags/_misc.py b/official/utils/flags/_misc.py index c6fa24b5ae7e29827967c5c6a1b78dc3613d40fe..744e3628bfdf9265a2132f4e607846687003e320 100644 --- a/official/utils/flags/_misc.py +++ b/official/utils/flags/_misc.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Misc flags.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Misc flags.""" from absl import flags @@ -37,7 +33,9 @@ def define_image(data_format=True): if data_format: flags.DEFINE_enum( - name="data_format", short_name="df", default=None, + name="data_format", + short_name="df", + default=None, enum_values=["channels_first", "channels_last"], help=help_wrap( "A flag to override the data format used in the model. " diff --git a/official/utils/flags/_performance.py b/official/utils/flags/_performance.py index cc5840f95e1ea26697951d1b78fe847526d5859b..5c05577beacfa280d9777b7387419f2b08c57167 100644 --- a/official/utils/flags/_performance.py +++ b/official/utils/flags/_performance.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,21 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Register flags for optimizing performance.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Register flags for optimizing performance.""" import multiprocessing -from absl import flags # pylint: disable=g-bad-import-order -import tensorflow as tf # pylint: disable=g-bad-import-order +from absl import flags # pylint: disable=g-bad-import-order +import tensorflow as tf # pylint: disable=g-bad-import-order from official.utils.flags._conventions import help_wrap - # Map string to TensorFlow dtype DTYPE_MAP = { "fp16": tf.float16, @@ -55,15 +50,21 @@ def get_loss_scale(flags_obj, default_for_fp16): return default_for_fp16 -def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, - synthetic_data=False, max_train_steps=False, dtype=False, - all_reduce_alg=False, num_packs=False, +def define_performance(num_parallel_calls=False, + inter_op=False, + intra_op=False, + synthetic_data=False, + max_train_steps=False, + dtype=False, + all_reduce_alg=False, + num_packs=False, tf_gpu_thread_mode=False, datasets_num_private_threads=False, datasets_num_parallel_batches=False, - dynamic_loss_scale=False, fp16_implementation=False, + fp16_implementation=False, loss_scale=False, - tf_data_experimental_slack=False, enable_xla=False, + tf_data_experimental_slack=False, + enable_xla=False, training_dataset_cache=False): """Register flags for specifying performance tuning arguments. @@ -72,8 +73,8 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, inter_op: Create a flag to allow specification of inter op threads. intra_op: Create a flag to allow specification of intra op threads. synthetic_data: Create a flag to allow the use of synthetic data. - max_train_steps: Create a flags to allow specification of maximum number - of training steps + max_train_steps: Create a flags to allow specification of maximum number of + training steps dtype: Create flags for specifying dtype. all_reduce_alg: If set forces a specific algorithm for multi-gpu. num_packs: If set provides number of packs for MirroredStrategy's cross @@ -81,9 +82,7 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, tf_gpu_thread_mode: gpu_private triggers us of private thread pool. datasets_num_private_threads: Number of private threads for datasets. datasets_num_parallel_batches: Determines how many batches to process in - parallel when using map and batch from tf.data. - dynamic_loss_scale: Allow the "loss_scale" flag to take on the value - "dynamic". Only valid if `dtype` is True. + parallel when using map and batch from tf.data. fp16_implementation: Create fp16_implementation flag. loss_scale: Controls the loss scaling, normally for mixed-precision training. Can only be turned on if dtype is also True. @@ -91,8 +90,8 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, `experimental_slack` option. enable_xla: Determines if XLA (auto clustering) is turned on. training_dataset_cache: Whether to cache the training dataset on workers. - Typically used to improve training performance when training data is in - remote storage and can fit into worker memory. + Typically used to improve training performance when training data is in + remote storage and can fit into worker memory. Returns: A list of flags for core.py to marks as key flags. @@ -101,7 +100,8 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, key_flags = [] if num_parallel_calls: flags.DEFINE_integer( - name="num_parallel_calls", short_name="npc", + name="num_parallel_calls", + short_name="npc", default=multiprocessing.cpu_count(), help=help_wrap("The number of records that are processed in parallel " "during input processing. This can be optimized per " @@ -111,20 +111,25 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, if inter_op: flags.DEFINE_integer( - name="inter_op_parallelism_threads", short_name="inter", default=0, + name="inter_op_parallelism_threads", + short_name="inter", + default=0, help=help_wrap("Number of inter_op_parallelism_threads to use for CPU. " - "See TensorFlow config.proto for details.") - ) + "See TensorFlow config.proto for details.")) if intra_op: flags.DEFINE_integer( - name="intra_op_parallelism_threads", short_name="intra", default=0, + name="intra_op_parallelism_threads", + short_name="intra", + default=0, help=help_wrap("Number of intra_op_parallelism_threads to use for CPU. " "See TensorFlow config.proto for details.")) if synthetic_data: flags.DEFINE_bool( - name="use_synthetic_data", short_name="synth", default=False, + name="use_synthetic_data", + short_name="synth", + default=False, help=help_wrap( "If set, use fake data (zeroes) instead of a real dataset. " "This mode is useful for performance debugging, as it removes " @@ -132,56 +137,53 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, if max_train_steps: flags.DEFINE_integer( - name="max_train_steps", short_name="mts", default=None, help=help_wrap( + name="max_train_steps", + short_name="mts", + default=None, + help=help_wrap( "The model will stop training if the global_step reaches this " "value. If not set, training will run until the specified number " "of epochs have run as usual. It is generally recommended to set " - "--train_epochs=1 when using this flag." - )) + "--train_epochs=1 when using this flag.")) if dtype: flags.DEFINE_enum( - name="dtype", short_name="dt", default="fp32", + name="dtype", + short_name="dt", + default="fp32", enum_values=DTYPE_MAP.keys(), help=help_wrap("The TensorFlow datatype used for calculations. " - "Variables may be cast to a higher precision on a " - "case-by-case basis for numerical stability.")) - - loss_scale_help_text = ( - "The amount to scale the loss by when the model is run. {}. Before " - "gradients are computed, the loss is multiplied by the loss scale, " - "making all gradients loss_scale times larger. To adjust for this, " - "gradients are divided by the loss scale before being applied to " - "variables. This is mathematically equivalent to training without " - "a loss scale, but the loss scale helps avoid some intermediate " - "gradients from underflowing to zero. If not provided the default " - "for fp16 is 128 and 1 for all other dtypes.{}" - ) - if dynamic_loss_scale: - loss_scale_help_text = loss_scale_help_text.format( - "This can be an int/float or the string 'dynamic'", - " The string 'dynamic' can be used to dynamically determine the " - "optimal loss scale during training, but currently this " - "significantly slows down performance") - loss_scale_validation_msg = ("loss_scale should be a positive int/float " - "or the string 'dynamic'.") - else: - loss_scale_help_text = loss_scale_help_text.format( - "This must be an int/float", "") - loss_scale_validation_msg = "loss_scale should be a positive int/float." + "For 16-bit dtypes, variables and certain ops will " + "still be float32 for numeric stability.")) + if loss_scale: flags.DEFINE_string( - name="loss_scale", short_name="ls", default=None, - help=help_wrap(loss_scale_help_text)) - - @flags.validator(flag_name="loss_scale", - message=loss_scale_validation_msg) - def _check_loss_scale(loss_scale): # pylint: disable=unused-variable + name="loss_scale", + short_name="ls", + default=None, + help=help_wrap( + "The amount to scale the loss by when --dtype=fp16. This can be " + "an int/float or the string 'dynamic'. Before gradients are " + "computed, the loss is multiplied by the loss scale, making all " + "gradients loss_scale times larger. To adjust for this, " + "gradients are divided by the loss scale before being applied to " + "variables. This is mathematically equivalent to training " + "without a loss scale, but the loss scale helps avoid some " + "intermediate gradients from underflowing to zero. The default " + "is 'dynamic', which dynamic determines the optimal loss scale " + "during training.")) + + # pylint: disable=unused-variable + @flags.validator( + flag_name="loss_scale", + message="loss_scale should be a positive int/float or the string " + "'dynamic'.") + def _check_loss_scale(loss_scale): """Validator to check the loss scale flag is valid.""" if loss_scale is None: return True # null case is handled in get_loss_scale() - if loss_scale == "dynamic" and dynamic_loss_scale: + if loss_scale == "dynamic": return True try: @@ -190,20 +192,22 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, return False return loss_scale > 0 + # pylint: enable=unused-variable if fp16_implementation: flags.DEFINE_enum( - name="fp16_implementation", default="keras", + name="fp16_implementation", + default="keras", enum_values=("keras', 'graph_rewrite"), help=help_wrap( "When --dtype=fp16, how fp16 should be implemented. This has no " "impact on correctness. 'keras' uses the " "tf.keras.mixed_precision API. 'graph_rewrite' uses the " - "tf.train.experimental.enable_mixed_precision_graph_rewrite " - "API.")) + "tf.compat.v1.mixed_precision." + "enable_mixed_precision_graph_rewrite API.")) - @flags.multi_flags_validator(["fp16_implementation", "dtype", - "loss_scale"]) + @flags.multi_flags_validator( + ["fp16_implementation", "dtype", "loss_scale"]) def _check_fp16_implementation(flags_dict): """Validator to check fp16_implementation flag is valid.""" if (flags_dict["fp16_implementation"] == "graph_rewrite" and @@ -214,7 +218,9 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, if all_reduce_alg: flags.DEFINE_string( - name="all_reduce_alg", short_name="ara", default=None, + name="all_reduce_alg", + short_name="ara", + default=None, help=help_wrap("Defines the algorithm to use for performing all-reduce." "When specified with MirroredStrategy for single " "worker, this controls " @@ -226,24 +232,26 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, if num_packs: flags.DEFINE_integer( - name="num_packs", default=1, + name="num_packs", + default=1, help=help_wrap("Sets `num_packs` in the cross device ops used in " "MirroredStrategy. For details, see " "tf.distribute.NcclAllReduce.")) if tf_gpu_thread_mode: flags.DEFINE_string( - name="tf_gpu_thread_mode", short_name="gt_mode", default=None, + name="tf_gpu_thread_mode", + short_name="gt_mode", + default=None, help=help_wrap( - "Whether and how the GPU device uses its own threadpool.") - ) + "Whether and how the GPU device uses its own threadpool.")) flags.DEFINE_integer( - name="per_gpu_thread_count", short_name="pgtc", default=0, - help=help_wrap( - "The number of threads to use for GPU. Only valid when " - "tf_gpu_thread_mode is not global.") - ) + name="per_gpu_thread_count", + short_name="pgtc", + default=0, + help=help_wrap("The number of threads to use for GPU. Only valid when " + "tf_gpu_thread_mode is not global.")) if datasets_num_private_threads: flags.DEFINE_integer( @@ -251,8 +259,7 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, default=None, help=help_wrap( "Number of threads for a private threadpool created for all" - "datasets computation..") - ) + "datasets computation..")) if datasets_num_parallel_batches: flags.DEFINE_integer( @@ -260,8 +267,7 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, default=None, help=help_wrap( "Determines how many batches to process in parallel when using " - "map and batch from tf.data.") - ) + "map and batch from tf.data.")) if training_dataset_cache: flags.DEFINE_boolean( @@ -270,20 +276,19 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False, help=help_wrap( "Determines whether to cache the training dataset on workers. " "Typically used to improve training performance when training " - "data is in remote storage and can fit into worker memory.") - ) + "data is in remote storage and can fit into worker memory.")) if tf_data_experimental_slack: flags.DEFINE_boolean( name="tf_data_experimental_slack", default=False, help=help_wrap( - "Whether to enable tf.data's `experimental_slack` option.") - ) + "Whether to enable tf.data's `experimental_slack` option.")) if enable_xla: flags.DEFINE_boolean( - name="enable_xla", default=False, + name="enable_xla", + default=False, help="Whether to enable XLA auto jit compilation") return key_flags diff --git a/official/utils/flags/core.py b/official/utils/flags/core.py index fa36944893a579fe5d4a65af9262651db0abc1ba..d864b957b30f901e751f365e118f07228e6cddf6 100644 --- a/official/utils/flags/core.py +++ b/official/utils/flags/core.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,17 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Public interface for flag definition. See _example.py for detailed instructions on defining flags. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import sys + from six.moves import shlex_quote from absl import app as absl_app @@ -65,6 +62,7 @@ def register_key_flags_in_core(f): def core_fn(*args, **kwargs): key_flags = f(*args, **kwargs) [flags.declare_key_flag(fl) for fl in key_flags] # pylint: disable=expression-not-assigned + return core_fn @@ -80,16 +78,15 @@ define_performance = register_key_flags_in_core(_performance.define_performance) define_distribution = register_key_flags_in_core( _distribution.define_distribution) - help_wrap = _conventions.help_wrap - get_num_gpus = _base.get_num_gpus get_tf_dtype = _performance.get_tf_dtype get_loss_scale = _performance.get_loss_scale DTYPE_MAP = _performance.DTYPE_MAP require_cloud_storage = _device.require_cloud_storage + def _get_nondefault_flags_as_dict(): """Returns the nondefault flags as a dict from flag name to value.""" nondefault_flags = {} diff --git a/official/utils/flags/flags_test.py b/official/utils/flags/flags_test.py index e11a1642242bf134f3a9f1df0908f29b00cecf74..11bc2ab4ce0aa39f1148e5992880531c6f63cbe3 100644 --- a/official/utils/flags/flags_test.py +++ b/official/utils/flags/flags_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== import unittest @@ -22,12 +21,19 @@ from official.utils.flags import core as flags_core # pylint: disable=g-bad-imp def define_flags(): - flags_core.define_base(clean=True, num_gpu=False, stop_threshold=True, - hooks=True, train_epochs=True, - epochs_between_evals=True) + flags_core.define_base( + clean=True, + num_gpu=False, + stop_threshold=True, + hooks=True, + train_epochs=True, + epochs_between_evals=True) flags_core.define_performance( - num_parallel_calls=True, inter_op=True, intra_op=True, - dynamic_loss_scale=True, loss_scale=True, synthetic_data=True, + num_parallel_calls=True, + inter_op=True, + intra_op=True, + loss_scale=True, + synthetic_data=True, dtype=True) flags_core.define_image() flags_core.define_benchmark() @@ -41,8 +47,7 @@ class BaseTester(unittest.TestCase): define_flags() def test_default_setting(self): - """Test to ensure fields exist and defaults can be set. - """ + """Test to ensure fields exist and defaults can be set.""" defaults = dict( data_dir="dfgasf", @@ -54,8 +59,7 @@ class BaseTester(unittest.TestCase): num_parallel_calls=18, inter_op_parallelism_threads=5, intra_op_parallelism_threads=10, - data_format="channels_first" - ) + data_format="channels_first") flags_core.set_defaults(**defaults) flags_core.parse_flags() @@ -77,8 +81,7 @@ class BaseTester(unittest.TestCase): assert flags.FLAGS.get_flag_value(name=key, default=None) == value def test_booleans(self): - """Test to ensure boolean flags trigger as expected. - """ + """Test to ensure boolean flags trigger as expected.""" flags_core.parse_flags([__file__, "--use_synthetic_data"]) @@ -87,35 +90,33 @@ class BaseTester(unittest.TestCase): def test_parse_dtype_info(self): flags_core.parse_flags([__file__, "--dtype", "fp16"]) self.assertEqual(flags_core.get_tf_dtype(flags.FLAGS), tf.float16) - self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, - default_for_fp16=2), 2) + self.assertEqual( + flags_core.get_loss_scale(flags.FLAGS, default_for_fp16=2), 2) - flags_core.parse_flags( - [__file__, "--dtype", "fp16", "--loss_scale", "5"]) - self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, - default_for_fp16=2), 5) + flags_core.parse_flags([__file__, "--dtype", "fp16", "--loss_scale", "5"]) + self.assertEqual( + flags_core.get_loss_scale(flags.FLAGS, default_for_fp16=2), 5) flags_core.parse_flags( [__file__, "--dtype", "fp16", "--loss_scale", "dynamic"]) - self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, - default_for_fp16=2), "dynamic") + self.assertEqual( + flags_core.get_loss_scale(flags.FLAGS, default_for_fp16=2), "dynamic") flags_core.parse_flags([__file__, "--dtype", "fp32"]) self.assertEqual(flags_core.get_tf_dtype(flags.FLAGS), tf.float32) - self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, - default_for_fp16=2), 1) + self.assertEqual( + flags_core.get_loss_scale(flags.FLAGS, default_for_fp16=2), 1) flags_core.parse_flags([__file__, "--dtype", "fp32", "--loss_scale", "5"]) - self.assertEqual(flags_core.get_loss_scale(flags.FLAGS, - default_for_fp16=2), 5) - + self.assertEqual( + flags_core.get_loss_scale(flags.FLAGS, default_for_fp16=2), 5) with self.assertRaises(SystemExit): flags_core.parse_flags([__file__, "--dtype", "int8"]) with self.assertRaises(SystemExit): - flags_core.parse_flags([__file__, "--dtype", "fp16", - "--loss_scale", "abc"]) + flags_core.parse_flags( + [__file__, "--dtype", "fp16", "--loss_scale", "abc"]) def test_get_nondefault_flags_as_str(self): defaults = dict( @@ -123,8 +124,7 @@ class BaseTester(unittest.TestCase): data_dir="abc", hooks=["LoggingTensorHook"], stop_threshold=1.5, - use_synthetic_data=False - ) + use_synthetic_data=False) flags_core.set_defaults(**defaults) flags_core.parse_flags() diff --git a/official/utils/hyperparams_flags.py b/official/utils/hyperparams_flags.py index 4b8150677e43b68a68b9234dd852f6df894ea849..e47bd8f066466f08502dd9a2757fb2afc078a508 100644 --- a/official/utils/hyperparams_flags.py +++ b/official/utils/hyperparams_flags.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Common flags for importing hyperparameters.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Common flags for importing hyperparameters.""" from absl import flags from official.utils.flags import core as flags_core diff --git a/official/utils/misc/__init__.py b/official/utils/misc/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/utils/misc/__init__.py +++ b/official/utils/misc/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/utils/misc/callstack_sampler.py b/official/utils/misc/callstack_sampler.py deleted file mode 100644 index 984f133e9c68a73569717bff47154110c718e3ce..0000000000000000000000000000000000000000 --- a/official/utils/misc/callstack_sampler.py +++ /dev/null @@ -1,62 +0,0 @@ -"""A simple Python callstack sampler.""" - -import contextlib -import datetime -import signal -import traceback - - -class CallstackSampler(object): - """A simple signal-based Python callstack sampler. - """ - - def __init__(self, interval=None): - self.stacks = [] - self.interval = 0.001 if interval is None else interval - - def _sample(self, signum, frame): - """Samples the current stack.""" - del signum - stack = traceback.extract_stack(frame) - formatted_stack = [] - formatted_stack.append(datetime.datetime.utcnow()) - for filename, lineno, function_name, text in stack: - formatted_frame = '{}:{}({})({})'.format(filename, lineno, function_name, - text) - formatted_stack.append(formatted_frame) - self.stacks.append(formatted_stack) - signal.setitimer(signal.ITIMER_VIRTUAL, self.interval, 0) - - @contextlib.contextmanager - def profile(self): - signal.signal(signal.SIGVTALRM, self._sample) - signal.setitimer(signal.ITIMER_VIRTUAL, self.interval, 0) - try: - yield - finally: - signal.setitimer(signal.ITIMER_VIRTUAL, 0) - - def save(self, fname): - with open(fname, 'w') as f: - for s in self.stacks: - for l in s: - f.write('%s\n' % l) - f.write('\n') - - -@contextlib.contextmanager -def callstack_sampling(filename, interval=None): - """Periodically samples the Python callstack. - - Args: - filename: the filename - interval: the sampling interval, in seconds. Defaults to 0.001. - - Yields: - nothing - """ - sampler = CallstackSampler(interval=interval) - with sampler.profile(): - yield - sampler.save(filename) - diff --git a/official/utils/misc/distribution_utils.py b/official/utils/misc/distribution_utils.py index e4823a9b1e6f5cb8d1ff4d7d86340d8656934a6e..77a3e79e64249975dc129e86187b7b3371ee871b 100644 --- a/official/utils/misc/distribution_utils.py +++ b/official/utils/misc/distribution_utils.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,195 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Helper functions for running models in a distributed setting.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import os -import random -import string - -from absl import logging -import tensorflow.compat.v2 as tf - -from official.utils.misc import tpu_lib - - -def _collective_communication(all_reduce_alg): - """Return a CollectiveCommunication based on all_reduce_alg. - - Args: - all_reduce_alg: a string specifying which collective communication to pick, - or None. - - Returns: - tf.distribute.experimental.CollectiveCommunication object - - Raises: - ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"] - """ - collective_communication_options = { - None: tf.distribute.experimental.CollectiveCommunication.AUTO, - "ring": tf.distribute.experimental.CollectiveCommunication.RING, - "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL - } - if all_reduce_alg not in collective_communication_options: - raise ValueError( - "When used with `multi_worker_mirrored`, valid values for " - "all_reduce_alg are [`ring`, `nccl`]. Supplied value: {}".format( - all_reduce_alg)) - return collective_communication_options[all_reduce_alg] - - -def _mirrored_cross_device_ops(all_reduce_alg, num_packs): - """Return a CrossDeviceOps based on all_reduce_alg and num_packs. - - Args: - all_reduce_alg: a string specifying which cross device op to pick, or None. - num_packs: an integer specifying number of packs for the cross device op. - - Returns: - tf.distribute.CrossDeviceOps object or None. - - Raises: - ValueError: if `all_reduce_alg` not in [None, "nccl", "hierarchical_copy"]. - """ - if all_reduce_alg is None: - return None - mirrored_all_reduce_options = { - "nccl": tf.distribute.NcclAllReduce, - "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce - } - if all_reduce_alg not in mirrored_all_reduce_options: - raise ValueError( - "When used with `mirrored`, valid values for all_reduce_alg are " - "[`nccl`, `hierarchical_copy`]. Supplied value: {}".format( - all_reduce_alg)) - cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg] - return cross_device_ops_class(num_packs=num_packs) - - -def get_distribution_strategy(distribution_strategy="mirrored", - num_gpus=0, - all_reduce_alg=None, - num_packs=1, - tpu_address=None): - """Return a DistributionStrategy for running the model. - - Args: - distribution_strategy: a string specifying which distribution strategy to - use. Accepted values are "off", "one_device", "mirrored", - "parameter_server", "multi_worker_mirrored", and "tpu" -- case insensitive. - "off" means not to use Distribution Strategy; "tpu" means to use - TPUStrategy using `tpu_address`. - num_gpus: Number of GPUs to run this model. - all_reduce_alg: Optional. Specifies which algorithm to use when performing - all-reduce. For `MirroredStrategy`, valid values are "nccl" and - "hierarchical_copy". For `MultiWorkerMirroredStrategy`, valid values are - "ring" and "nccl". If None, DistributionStrategy will choose based on - device topology. - num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce` - or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`. - tpu_address: Optional. String that represents TPU to connect to. Must not - be None if `distribution_strategy` is set to `tpu`. - Returns: - tf.distribute.DistibutionStrategy object. - Raises: - ValueError: if `distribution_strategy` is "off" or "one_device" and - `num_gpus` is larger than 1; or `num_gpus` is negative or if - `distribution_strategy` is `tpu` but `tpu_address` is not specified. - """ - if num_gpus < 0: - raise ValueError("`num_gpus` can not be negative.") - distribution_strategy = distribution_strategy.lower() - if distribution_strategy == "off": - if num_gpus > 1: - raise ValueError( - "When {} GPUs are specified, distribution_strategy " - "flag cannot be set to `off`.".format(num_gpus)) - return None - - if distribution_strategy == "tpu": - # When tpu_address is an empty string, we communicate with local TPUs. - cluster_resolver = tpu_lib.tpu_initialize(tpu_address) - return tf.distribute.experimental.TPUStrategy(cluster_resolver) - - if distribution_strategy == "multi_worker_mirrored": - return tf.distribute.experimental.MultiWorkerMirroredStrategy( - communication=_collective_communication(all_reduce_alg)) - - if distribution_strategy == "one_device": - if num_gpus == 0: - return tf.distribute.OneDeviceStrategy("device:CPU:0") - if num_gpus > 1: - raise ValueError("`OneDeviceStrategy` can not be used for more than " - "one device.") - return tf.distribute.OneDeviceStrategy("device:GPU:0") - - if distribution_strategy == "mirrored": - if num_gpus == 0: - devices = ["device:CPU:0"] - else: - devices = ["device:GPU:%d" % i for i in range(num_gpus)] - return tf.distribute.MirroredStrategy( - devices=devices, - cross_device_ops=_mirrored_cross_device_ops(all_reduce_alg, num_packs)) - - if distribution_strategy == "parameter_server": - return tf.distribute.experimental.ParameterServerStrategy() - - raise ValueError( - "Unrecognized Distribution Strategy: %r" % distribution_strategy) - - -def configure_cluster(worker_hosts=None, task_index=-1): - """Set multi-worker cluster spec in TF_CONFIG environment variable. - - Args: - worker_hosts: comma-separated list of worker ip:port pairs. - - Returns: - Number of workers in the cluster. - """ - tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) - if tf_config: - num_workers = (len(tf_config["cluster"].get("chief", [])) + - len(tf_config["cluster"].get("worker", []))) - elif worker_hosts: - workers = worker_hosts.split(",") - num_workers = len(workers) - if num_workers > 1 and task_index < 0: - raise ValueError("Must specify task_index when number of workers > 1") - task_index = 0 if num_workers == 1 else task_index - os.environ["TF_CONFIG"] = json.dumps({ - "cluster": { - "worker": workers - }, - "task": {"type": "worker", "index": task_index} - }) - else: - num_workers = 1 - return num_workers - - -def get_strategy_scope(strategy): - if strategy: - strategy_scope = strategy.scope() - else: - strategy_scope = DummyContextManager() - - return strategy_scope - - -class DummyContextManager(object): - - def __enter__(self): - pass - - def __exit__(self, *args): - pass +"""Helper functions for running models in a distributed setting.""" +# pylint: disable=wildcard-import +from official.common.distribute_utils import * diff --git a/official/utils/misc/distribution_utils_test.py b/official/utils/misc/distribution_utils_test.py deleted file mode 100644 index 4fd7bff09daaf2f5c85af2a0e7b7efbd00dc42c1..0000000000000000000000000000000000000000 --- a/official/utils/misc/distribution_utils_test.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""" Tests for distribution util functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow.compat.v2 as tf - -from official.utils.misc import distribution_utils - - -class GetDistributionStrategyTest(tf.test.TestCase): - """Tests for get_distribution_strategy.""" - def test_one_device_strategy_cpu(self): - ds = distribution_utils.get_distribution_strategy(num_gpus=0) - self.assertEquals(ds.num_replicas_in_sync, 1) - self.assertEquals(len(ds.extended.worker_devices), 1) - self.assertIn('CPU', ds.extended.worker_devices[0]) - - def test_one_device_strategy_gpu(self): - ds = distribution_utils.get_distribution_strategy(num_gpus=1) - self.assertEquals(ds.num_replicas_in_sync, 1) - self.assertEquals(len(ds.extended.worker_devices), 1) - self.assertIn('GPU', ds.extended.worker_devices[0]) - - def test_mirrored_strategy(self): - ds = distribution_utils.get_distribution_strategy(num_gpus=5) - self.assertEquals(ds.num_replicas_in_sync, 5) - self.assertEquals(len(ds.extended.worker_devices), 5) - for device in ds.extended.worker_devices: - self.assertIn('GPU', device) - - -if __name__ == "__main__": - tf.test.main() diff --git a/official/utils/misc/keras_utils.py b/official/utils/misc/keras_utils.py index 2cca51f1d24701802b0fd7cfc62a84306eedded2..a5b20c8a3ebc36387e2997f67b2d411894c5ca57 100644 --- a/official/utils/misc/keras_utils.py +++ b/official/utils/misc/keras_utils.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Helper functions for the Keras implementations of models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Helper functions for the Keras implementations of models.""" import multiprocessing import os @@ -25,6 +21,19 @@ import time from absl import logging import tensorflow as tf +from tensorflow.python.eager import monitoring + +global_batch_size_gauge = monitoring.IntGauge( + '/tensorflow/training/global_batch_size', 'TF training global batch size') + +first_batch_time_gauge = monitoring.IntGauge( + '/tensorflow/training/first_batch', + 'TF training start/end time for first batch (unix epoch time in us.', + 'type') + +first_batch_start_time = first_batch_time_gauge.get_cell('start') +first_batch_end_time = first_batch_time_gauge.get_cell('end') + class BatchTimestamp(object): """A structure to store batch time stamp.""" @@ -60,6 +69,8 @@ class TimeHistory(tf.keras.callbacks.Callback): self.steps_in_epoch = 0 self.start_time = None + global_batch_size_gauge.get_cell().set(batch_size) + if logdir: self.summary_writer = tf.summary.create_file_writer(logdir) else: @@ -110,14 +121,18 @@ class TimeHistory(tf.keras.callbacks.Callback): def on_batch_begin(self, batch, logs=None): if not self.start_time: self.start_time = time.time() + if not first_batch_start_time.value(): + first_batch_start_time.set(int(self.start_time * 1000000)) # Record the timestamp of the first global step if not self.timestamp_log: - self.timestamp_log.append(BatchTimestamp(self.global_steps, - self.start_time)) + self.timestamp_log.append( + BatchTimestamp(self.global_steps, self.start_time)) def on_batch_end(self, batch, logs=None): """Records elapse time of the batch and calculates examples per second.""" + if not first_batch_end_time.value(): + first_batch_end_time.set(int(time.time() * 1000000)) self.steps_in_epoch = batch + 1 steps_since_last_log = self.global_steps - self.last_log_step if steps_since_last_log >= self.log_steps: @@ -167,12 +182,12 @@ def set_session_config(enable_xla=False): if enable_xla: tf.config.optimizer.set_jit(True) + # TODO(hongkuny): remove set_config_v2 globally. set_config_v2 = set_session_config -def set_gpu_thread_mode_and_count(gpu_thread_mode, - datasets_num_private_threads, +def set_gpu_thread_mode_and_count(gpu_thread_mode, datasets_num_private_threads, num_gpus, per_gpu_thread_count): """Set GPU thread mode and count, and adjust dataset threads count.""" cpu_count = multiprocessing.cpu_count() @@ -182,10 +197,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode, per_gpu_thread_count = per_gpu_thread_count or 2 os.environ['TF_GPU_THREAD_MODE'] = gpu_thread_mode os.environ['TF_GPU_THREAD_COUNT'] = str(per_gpu_thread_count) - logging.info('TF_GPU_THREAD_COUNT: %s', - os.environ['TF_GPU_THREAD_COUNT']) - logging.info('TF_GPU_THREAD_MODE: %s', - os.environ['TF_GPU_THREAD_MODE']) + logging.info('TF_GPU_THREAD_COUNT: %s', os.environ['TF_GPU_THREAD_COUNT']) + logging.info('TF_GPU_THREAD_MODE: %s', os.environ['TF_GPU_THREAD_MODE']) # Limit data preprocessing threadpool to CPU cores minus number of total GPU # private threads and memory copy threads. @@ -193,7 +206,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode, num_runtime_threads = num_gpus if not datasets_num_private_threads: datasets_num_private_threads = min( - cpu_count - total_gpu_thread_count - num_runtime_threads, - num_gpus * 8) + cpu_count - total_gpu_thread_count - num_runtime_threads, num_gpus * 8) logging.info('Set datasets_num_private_threads to %s', datasets_num_private_threads) diff --git a/official/utils/misc/model_helpers.py b/official/utils/misc/model_helpers.py index 9a44e50ac46162821dcbfacc55b5b1e5c30eba8f..4c310588b39e32f23748772c64aa7ee9b4e987f2 100644 --- a/official/utils/misc/model_helpers.py +++ b/official/utils/misc/model_helpers.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Miscellaneous functions that can be called by models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Miscellaneous functions that can be called by models.""" import numbers @@ -58,9 +54,12 @@ def past_stop_threshold(stop_threshold, eval_metric): return False -def generate_synthetic_data( - input_shape, input_value=0, input_dtype=None, label_shape=None, - label_value=0, label_dtype=None): +def generate_synthetic_data(input_shape, + input_value=0, + input_dtype=None, + label_shape=None, + label_value=0, + label_dtype=None): """Create a repeating dataset with constant values. Args: diff --git a/official/utils/misc/model_helpers_test.py b/official/utils/misc/model_helpers_test.py index 9f2487e4223e7b46854db918114d2507fc891155..dd01c3431766d0ba00647ca2081c3f5687f2bfd5 100644 --- a/official/utils/misc/model_helpers_test.py +++ b/official/utils/misc/model_helpers_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for Model Helper functions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for Model Helper functions.""" import tensorflow as tf # pylint: disable=g-bad-import-order @@ -51,19 +47,19 @@ class PastStopThresholdTest(tf.test.TestCase): def test_past_stop_threshold_not_number(self): """Tests for error conditions.""" with self.assertRaises(ValueError): - model_helpers.past_stop_threshold("str", 1) + model_helpers.past_stop_threshold('str', 1) with self.assertRaises(ValueError): - model_helpers.past_stop_threshold("str", tf.constant(5)) + model_helpers.past_stop_threshold('str', tf.constant(5)) with self.assertRaises(ValueError): - model_helpers.past_stop_threshold("str", "another") + model_helpers.past_stop_threshold('str', 'another') with self.assertRaises(ValueError): model_helpers.past_stop_threshold(0, None) with self.assertRaises(ValueError): - model_helpers.past_stop_threshold(0.7, "str") + model_helpers.past_stop_threshold(0.7, 'str') with self.assertRaises(ValueError): model_helpers.past_stop_threshold(tf.constant(4), None) @@ -74,12 +70,13 @@ class SyntheticDataTest(tf.test.TestCase): def test_generate_synethetic_data(self): input_element, label_element = tf.compat.v1.data.make_one_shot_iterator( - model_helpers.generate_synthetic_data(input_shape=tf.TensorShape([5]), - input_value=123, - input_dtype=tf.float32, - label_shape=tf.TensorShape([]), - label_value=456, - label_dtype=tf.int32)).get_next() + model_helpers.generate_synthetic_data( + input_shape=tf.TensorShape([5]), + input_value=123, + input_dtype=tf.float32, + label_shape=tf.TensorShape([]), + label_value=456, + label_dtype=tf.int32)).get_next() with self.session() as sess: for n in range(5): @@ -102,8 +99,13 @@ class SyntheticDataTest(tf.test.TestCase): def test_generate_nested_data(self): d = model_helpers.generate_synthetic_data( - input_shape={'a': tf.TensorShape([2]), - 'b': {'c': tf.TensorShape([3]), 'd': tf.TensorShape([])}}, + input_shape={ + 'a': tf.TensorShape([2]), + 'b': { + 'c': tf.TensorShape([3]), + 'd': tf.TensorShape([]) + } + }, input_value=1.1) element = tf.compat.v1.data.make_one_shot_iterator(d).get_next() @@ -121,5 +123,5 @@ class SyntheticDataTest(tf.test.TestCase): self.assertAllClose(inp['b']['d'], 1.1) -if __name__ == "__main__": +if __name__ == '__main__': tf.test.main() diff --git a/official/utils/misc/tpu_lib.py b/official/utils/misc/tpu_lib.py deleted file mode 100644 index 4d4cddb1c6b015091ed2da57df49277e3008c252..0000000000000000000000000000000000000000 --- a/official/utils/misc/tpu_lib.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Initializes TPU system for TF 2.0.""" - -import tensorflow as tf - - -def tpu_initialize(tpu_address): - """Initializes TPU for TF 2.0 training. - - Args: - tpu_address: string, bns address of master TPU worker. - - Returns: - A TPUClusterResolver. - """ - cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( - tpu=tpu_address) - if tpu_address not in ('', 'local'): - tf.config.experimental_connect_to_cluster(cluster_resolver) - tf.tpu.experimental.initialize_tpu_system(cluster_resolver) - return cluster_resolver diff --git a/official/utils/testing/__init__.py b/official/utils/testing/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/utils/testing/__init__.py +++ b/official/utils/testing/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/utils/testing/integration.py b/official/utils/testing/integration.py index b4809a4815cd76c637e2b319352a1d15ab89b87b..763de50bef6a7ade0c27c2deca8597649f276719 100644 --- a/official/utils/testing/integration.py +++ b/official/utils/testing/integration.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Helper code to run complete models from within python. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Helper code to run complete models from within python.""" import os import shutil @@ -31,7 +26,11 @@ from official.utils.flags import core as flags_core @flagsaver.flagsaver -def run_synthetic(main, tmp_root, extra_flags=None, synth=True, train_epochs=1, +def run_synthetic(main, + tmp_root, + extra_flags=None, + synth=True, + train_epochs=1, epochs_between_evals=1): """Performs a minimal run of a model. diff --git a/official/utils/testing/mock_task.py b/official/utils/testing/mock_task.py new file mode 100644 index 0000000000000000000000000000000000000000..3afc77da18ac9a216afee96eaed5caacf171ce65 --- /dev/null +++ b/official/utils/testing/mock_task.py @@ -0,0 +1,101 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mock task for testing.""" + +import dataclasses +import numpy as np +import tensorflow as tf + +from official.core import base_task +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.core import task_factory + + +class MockModel(tf.keras.Model): + + def __init__(self, network): + super().__init__() + self.network = network + + def call(self, inputs): + outputs = self.network(inputs) + self.add_loss(tf.reduce_mean(outputs)) + return outputs + + +@dataclasses.dataclass +class MockTaskConfig(cfg.TaskConfig): + pass + + +@task_factory.register_task_cls(MockTaskConfig) +class MockTask(base_task.Task): + """Mock task object for testing.""" + + def __init__(self, params=None, logging_dir=None, name=None): + super().__init__(params=params, logging_dir=logging_dir, name=name) + + def build_model(self, *arg, **kwargs): + inputs = tf.keras.layers.Input(shape=(2,), name="random", dtype=tf.float32) + outputs = tf.keras.layers.Dense( + 1, bias_initializer=tf.keras.initializers.Ones(), name="dense_0")( + inputs) + network = tf.keras.Model(inputs=inputs, outputs=outputs) + return MockModel(network) + + def build_metrics(self, training: bool = True): + del training + return [tf.keras.metrics.Accuracy(name="acc")] + + def validation_step(self, inputs, model: tf.keras.Model, metrics=None): + logs = super().validation_step(inputs, model, metrics) + logs["counter"] = tf.constant(1, dtype=tf.float32) + return logs + + def build_inputs(self, params): + + def generate_data(_): + x = tf.zeros(shape=(2,), dtype=tf.float32) + label = tf.zeros([1], dtype=tf.int32) + return x, label + + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + dataset = dataset.map( + generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + return dataset.prefetch(buffer_size=1).batch(2, drop_remainder=True) + + def aggregate_logs(self, state, step_outputs): + if state is None: + state = {} + for key, value in step_outputs.items(): + if key not in state: + state[key] = [] + state[key].append( + np.concatenate([np.expand_dims(v.numpy(), axis=0) for v in value])) + return state + + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): + for k, v in aggregated_logs.items(): + aggregated_logs[k] = np.sum(np.stack(v, axis=0)) + return aggregated_logs + + +@exp_factory.register_config_factory("mock") +def mock_experiment() -> cfg.ExperimentConfig: + config = cfg.ExperimentConfig( + task=MockTaskConfig(), trainer=cfg.TrainerConfig()) + return config diff --git a/official/utils/testing/scripts/ci_sanity.sh b/official/utils/testing/scripts/ci_sanity.sh index 97d6bc290eff327f340088b960f910af2afa626b..0646c87a943c0f436f39fde8cf95ffae863b33a0 100755 --- a/official/utils/testing/scripts/ci_sanity.sh +++ b/official/utils/testing/scripts/ci_sanity.sh @@ -34,10 +34,10 @@ do_pylint() { # --incremental Performs check on only the python files changed in the # last non-merge git commit. - # Use this list to whitelist pylint errors - ERROR_WHITELIST="" + # Use this list to ALLOWLIST pylint errors + ERROR_ALLOWLIST="" - echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\"" + echo "ERROR_ALLOWLIST=\"${ERROR_ALLOWLIST}\"" PYLINT_BIN="python3 -m pylint" @@ -92,16 +92,16 @@ do_pylint() { N_ERRORS=0 while read -r LINE; do - IS_WHITELISTED=0 - for WL_REGEX in ${ERROR_WHITELIST}; do + IS_ALLOWLISTED=0 + for WL_REGEX in ${ERROR_ALLOWLIST}; do if echo ${LINE} | grep -q "${WL_REGEX}"; then - echo "Found a whitelisted error:" + echo "Found a ALLOWLISTed error:" echo " ${LINE}" - IS_WHITELISTED=1 + IS_ALLOWLISTED=1 fi done - if [[ ${IS_WHITELISTED} == "0" ]]; then + if [[ ${IS_ALLOWLISTED} == "0" ]]; then echo "${LINE}" >> ${NONWL_ERRORS_FILE} echo "" >> ${NONWL_ERRORS_FILE} ((N_ERRORS++)) @@ -116,7 +116,7 @@ do_pylint() { cat "${NONWL_ERRORS_FILE}" return 1 else - echo "PASS: No non-whitelisted pylint errors were found." + echo "PASS: No non-ALLOWLISTed pylint errors were found." return 0 fi } diff --git a/official/vision/__init__.py b/official/vision/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/__init__.py +++ b/official/vision/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/MODEL_GARDEN.md b/official/vision/beta/MODEL_GARDEN.md new file mode 100644 index 0000000000000000000000000000000000000000..42f7eea546bd2adcd95b2a6e17305a266a8ac180 --- /dev/null +++ b/official/vision/beta/MODEL_GARDEN.md @@ -0,0 +1,110 @@ +# TF Vision Model Garden + +## Introduction +TF Vision model garden provides a large collection of baselines and checkpoints for image classification, object detection, and instance segmentation. + + +## Image Classification +### ImageNet Baselines +#### ResNet models trained with vanilla settings: +* Models are trained from scratch with batch size 4096 and 1.6 initial learning rate. +* Linear warmup is applied for the first 5 epochs. +* Models trained with l2 weight regularization and ReLU activation. + +| model | resolution | epochs | Top-1 | Top-5 | download | +| ------------ |:-------------:|--------:|--------:|---------:|---------:| +| ResNet-50 | 224x224 | 90 | 76.1 | 92.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) | +| ResNet-50 | 224x224 | 200 | 77.1 | 93.5 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml) | +| ResNet-101 | 224x224 | 200 | 78.3 | 94.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml) | +| ResNet-152 | 224x224 | 200 | 78.7 | 94.3 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml) | + + +#### ResNet-RS models trained with settings including: +We support state-of-the-art [ResNet-RS](https://arxiv.org/abs/2103.07579) image classification models with features: + +* ResNet-RS architectural changes and Swish activation. (Note that ResNet-RS + adopts ReLU activation in the paper.) +* Regularization methods including Random Augment, 4e-5 weight decay, stochastic depth, label smoothing and dropout. +* New training methods including a 350-epoch schedule, cosine learning rate and + EMA. +* Configs are in this [directory](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification). + +model | resolution | params (M) | Top-1 | Top-5 | download +--------- | :--------: | -----: | ----: | ----: | -------: +ResNet-RS-50 | 160x160 | 35.7 | 79.1 | 94.5 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs50_i160.yaml) | +ResNet-RS-101 | 160x160 | 63.7 | 80.2 | 94.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml) | +ResNet-RS-101 | 192x192 | 63.7 | 81.3 | 95.6 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml) | +ResNet-RS-152 | 192x192 | 86.8 | 81.9 | 95.8 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml) | +ResNet-RS-152 | 224x224 | 86.8 | 82.5 | 96.1 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml) | +ResNet-RS-152 | 256x256 | 86.8 | 83.1 | 96.3 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i256.yaml) | +ResNet-RS-200 | 256x256 | 93.4 | 83.5 | 96.6 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs200_i256.yaml) | +ResNet-RS-270 | 256x256 | 130.1 | 83.6 | 96.6 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs270_i256.yaml) | +ResNet-RS-350 | 256x256 | 164.3 | 83.7 | 96.7 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml) | +ResNet-RS-350 | 320x320 | 164.3 | 84.2 | 96.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs420_i256.yaml) | + + +## Object Detection and Instance Segmentation +### Common Settings and Notes +* We provide models based on two detection frameworks, [RetinaNet](https://arxiv.org/abs/1708.02002) or [Mask R-CNN](https://arxiv.org/abs/1703.06870), and two backbones, [ResNet-FPN](https://arxiv.org/abs/1612.03144) or [SpineNet](https://arxiv.org/abs/1912.05027). +* Models are all trained on COCO train2017 and evaluated on COCO val2017. +* Training details: + * Models finetuned from ImageNet pretrained checkpoints adopt the 12 or 36 epochs schedule. Models trained from scratch adopt the 350 epochs schedule. + * The default training data augmentation implements horizontal flipping and scale jittering with a random scale between [0.5, 2.0]. + * Unless noted, all models are trained with l2 weight regularization and ReLU activation. + * We use batch size 256 and stepwise learning rate that decays at the last 30 and 10 epoch. + * We use square image as input by resizing the long side of an image to the target size then padding the short side with zeros. + +### COCO Object Detection Baselines +#### RetinaNet (ImageNet pretrained) +| backbone | resolution | epochs | FLOPs (B) | params (M) | box AP | download | +| ------------ |:-------------:| ---------:|-----------:|--------:|--------:|-----------:| +| R50-FPN | 640x640 | 12 | 97.0 | 34.0 | 34.3 | config| +| R50-FPN | 640x640 | 36 | 97.0 | 34.0 | 37.3 | config| + +#### RetinaNet (Trained from scratch) with training features including: +* Stochastic depth with drop rate 0.2. +* Swish activation. + +| backbone | resolution | epochs | FLOPs (B) | params (M) | box AP | download | +| ------------ |:-------------:| ---------:|-----------:|--------:|---------:|-----------:| +| SpineNet-49 | 640x640 | 500 | 85.4| 28.5 | 44.2 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)| +| SpineNet-96 | 1024x1024 | 500 | 265.4 | 43.0 | 48.5 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)| +| SpineNet-143 | 1280x1280 | 500 | 524.0 | 67.0 | 50.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet143_tpu.yaml) \| [TB.dev](https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)| + +#### Mobile-size RetinaNet (Trained from scratch): + +backbone | resolution | epochs | FLOPs (B) | params (M) | box AP | download +------------ | :--------: | -----: | --------: | ---------: | -----: | -------: +Mobile SpineNet-49 | 384x384 | 600 | 1.0 | 2.32 | 28.1 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml) | + + +### Instance Segmentation Baselines +#### Mask R-CNN (ImageNet pretrained) + + +#### Mask R-CNN (Trained from scratch) +| backbone | resolution | epochs | FLOPs (B) | params (M) | box AP | mask AP | download | +| ------------ |:-------------:| ---------:|-----------:|--------:|--------:|-----------:|-----------:| +| SpineNet-49 | 640x640 | 350 | 215.7 | 40.8 | 42.6 | 37.9 | config | + + +## Video Classification +### Common Settings and Notes +* We provide models for video classification with two backbones: [SlowOnly](https://arxiv.org/abs/1812.03982) and 3D-ResNet (R3D) used in [Spatiotemporal Contrastive Video Representation Learning](https://arxiv.org/abs/2008.03800). +* Training and evaluation details: + * All models are trained from scratch with vision modality (RGB) for 200 epochs. + * We use batch size of 1024 and cosine learning rate decay with linear warmup in first 5 epochs. + * We follow [SlowFast](https://arxiv.org/abs/1812.03982) to perform 30-view evaluation. + +### Kinetics-400 Action Recognition Baselines +| model | input (frame x stride) | Top-1 | Top-5 | download | +| -------- |:----------------------:|--------:|--------:|---------:| +| SlowOnly | 8 x 8 | 74.1 | 91.4 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) | +| SlowOnly | 16 x 4 | 75.6 | 92.1 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) | +| R3D-50 | 32 x 2 | 77.0 | 93.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) | + +### Kinetics-600 Action Recognition Baselines +| model | input (frame x stride) | Top-1 | Top-5 | download | +| -------- |:----------------------:|--------:|--------:|---------:| +| SlowOnly | 8 x 8 | 77.3 | 93.6 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) | +| R3D-50 | 32 x 2 | 79.5 | 94.8 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) | diff --git a/official/vision/beta/README.md b/official/vision/beta/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7d8f84fd5bd65d3a28f22a9b98b251d9ba333ea9 --- /dev/null +++ b/official/vision/beta/README.md @@ -0,0 +1,2 @@ +This directory contains the new design of TF model garden vision framework. +Stay tuned. diff --git a/official/vision/beta/__init__.py b/official/vision/beta/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..91f07553490b4602e6a97aba939748b1a2dbef3e --- /dev/null +++ b/official/vision/beta/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Vision package definition.""" +# Lint as: python3 +# pylint: disable=unused-import +from official.vision.beta import configs +from official.vision.beta import tasks diff --git a/official/vision/beta/configs/__init__.py b/official/vision/beta/configs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..925339330799dcf09d804daf73ad957370e5f6d2 --- /dev/null +++ b/official/vision/beta/configs/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Configs package definition.""" + +from official.vision.beta.configs import image_classification +from official.vision.beta.configs import maskrcnn +from official.vision.beta.configs import retinanet +from official.vision.beta.configs import semantic_segmentation +from official.vision.beta.configs import video_classification diff --git a/official/vision/beta/configs/backbones.py b/official/vision/beta/configs/backbones.py new file mode 100644 index 0000000000000000000000000000000000000000..bcac8429a0a8d23896f86c1a3bf1a3c111db3676 --- /dev/null +++ b/official/vision/beta/configs/backbones.py @@ -0,0 +1,109 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Backbones configurations.""" +from typing import Optional, List + +# Import libraries +import dataclasses + +from official.modeling import hyperparams + + +@dataclasses.dataclass +class ResNet(hyperparams.Config): + """ResNet config.""" + model_id: int = 50 + depth_multiplier: float = 1.0 + stem_type: str = 'v0' + se_ratio: float = 0.0 + stochastic_depth_drop_rate: float = 0.0 + resnetd_shortcut: bool = False + replace_stem_max_pool: bool = False + + +@dataclasses.dataclass +class DilatedResNet(hyperparams.Config): + """DilatedResNet config.""" + model_id: int = 50 + output_stride: int = 16 + multigrid: Optional[List[int]] = None + stem_type: str = 'v0' + last_stage_repeats: int = 1 + se_ratio: float = 0.0 + stochastic_depth_drop_rate: float = 0.0 + + +@dataclasses.dataclass +class EfficientNet(hyperparams.Config): + """EfficientNet config.""" + model_id: str = 'b0' + se_ratio: float = 0.0 + stochastic_depth_drop_rate: float = 0.0 + + +@dataclasses.dataclass +class MobileNet(hyperparams.Config): + """Mobilenet config.""" + model_id: str = 'MobileNetV2' + filter_size_scale: float = 1.0 + stochastic_depth_drop_rate: float = 0.0 + + +@dataclasses.dataclass +class SpineNet(hyperparams.Config): + """SpineNet config.""" + model_id: str = '49' + stochastic_depth_drop_rate: float = 0.0 + + +@dataclasses.dataclass +class SpineNetMobile(hyperparams.Config): + """SpineNet config.""" + model_id: str = '49' + stochastic_depth_drop_rate: float = 0.0 + se_ratio: float = 0.2 + expand_ratio: int = 6 + + +@dataclasses.dataclass +class RevNet(hyperparams.Config): + """RevNet config.""" + # Specifies the depth of RevNet. + model_id: int = 56 + + +@dataclasses.dataclass +class Backbone(hyperparams.OneOfConfig): + """Configuration for backbones. + + Attributes: + type: 'str', type of backbone be used, one of the fields below. + resnet: resnet backbone config. + dilated_resnet: dilated resnet backbone for semantic segmentation config. + revnet: revnet backbone config. + efficientnet: efficientnet backbone config. + spinenet: spinenet backbone config. + spinenet_mobile: mobile spinenet backbone config. + mobilenet: mobilenet backbone config. + """ + type: Optional[str] = None + resnet: ResNet = ResNet() + dilated_resnet: DilatedResNet = DilatedResNet() + revnet: RevNet = RevNet() + efficientnet: EfficientNet = EfficientNet() + spinenet: SpineNet = SpineNet() + spinenet_mobile: SpineNetMobile = SpineNetMobile() + mobilenet: MobileNet = MobileNet() diff --git a/official/vision/beta/configs/backbones_3d.py b/official/vision/beta/configs/backbones_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..d23df73ed60d2714e86ba68b94c88f1f4295e10b --- /dev/null +++ b/official/vision/beta/configs/backbones_3d.py @@ -0,0 +1,107 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""3D Backbones configurations.""" +from typing import Optional, Tuple + +# Import libraries +import dataclasses + +from official.modeling import hyperparams + + +@dataclasses.dataclass +class ResNet3DBlock(hyperparams.Config): + """Configuration of a ResNet 3D block.""" + temporal_strides: int = 1 + temporal_kernel_sizes: Tuple[int, ...] = () + use_self_gating: bool = False + + +@dataclasses.dataclass +class ResNet3D(hyperparams.Config): + """ResNet config.""" + model_id: int = 50 + stem_type: str = 'v0' + stem_conv_temporal_kernel_size: int = 5 + stem_conv_temporal_stride: int = 2 + stem_pool_temporal_stride: int = 2 + block_specs: Tuple[ResNet3DBlock, ...] = () + stochastic_depth_drop_rate: float = 0.0 + se_ratio: float = 0.0 + + +@dataclasses.dataclass +class ResNet3D50(ResNet3D): + """Block specifications of the Resnet50 (3D) model.""" + model_id: int = 50 + block_specs: Tuple[ + ResNet3DBlock, ResNet3DBlock, ResNet3DBlock, ResNet3DBlock] = ( + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(3, 3, 3), + use_self_gating=True), + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(3, 1, 3, 1), + use_self_gating=True), + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(3, 1, 3, 1, 3, 1), + use_self_gating=True), + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(1, 3, 1), + use_self_gating=True)) + + +@dataclasses.dataclass +class ResNet3DRS(ResNet3D): + """Block specifications of the ResNet-RS (3D) model.""" + model_id: int = 50 + stem_type: str = 'v1' + stem_conv_temporal_kernel_size: int = 5 + stem_conv_temporal_stride: int = 2 + stem_pool_temporal_stride: int = 2 + stochastic_depth_drop_rate: float = 0.1 + se_ratio: float = 0.2 + block_specs: Tuple[ + ResNet3DBlock, ResNet3DBlock, ResNet3DBlock, ResNet3DBlock] = ( + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(1,), + use_self_gating=True), + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(1,), + use_self_gating=True), + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(3,), + use_self_gating=True), + ResNet3DBlock(temporal_strides=1, + temporal_kernel_sizes=(3,), + use_self_gating=True)) + + +_RESNET3D50_DEFAULT_CFG = ResNet3D50() +_RESNET3DRS_DEFAULT_CFG = ResNet3DRS() + + +@dataclasses.dataclass +class Backbone3D(hyperparams.OneOfConfig): + """Configuration for backbones. + + Attributes: + type: 'str', type of backbone be used, one of the fields below. + resnet_3d: resnet3d backbone config. + resnet_3d_rs: resnet3d-rs backbone config. + """ + type: Optional[str] = None + resnet_3d: ResNet3D = _RESNET3D50_DEFAULT_CFG + resnet_3d_rs: ResNet3D = _RESNET3DRS_DEFAULT_CFG diff --git a/official/vision/beta/configs/common.py b/official/vision/beta/configs/common.py new file mode 100644 index 0000000000000000000000000000000000000000..1573873eb52b447c80c8eb20bbe975c5e9722a8a --- /dev/null +++ b/official/vision/beta/configs/common.py @@ -0,0 +1,74 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Common configurations.""" + +from typing import Optional +# Import libraries + +import dataclasses + +from official.core import config_definitions as cfg +from official.modeling import hyperparams + + +@dataclasses.dataclass +class RandAugment(hyperparams.Config): + """Configuration for RandAugment.""" + num_layers: int = 2 + magnitude: float = 10 + cutout_const: float = 40 + translate_const: float = 10 + + +@dataclasses.dataclass +class AutoAugment(hyperparams.Config): + """Configuration for AutoAugment.""" + augmentation_name: str = 'v0' + cutout_const: float = 100 + translate_const: float = 250 + + +@dataclasses.dataclass +class Augmentation(hyperparams.OneOfConfig): + """Configuration for input data augmentation. + + Attributes: + type: 'str', type of augmentation be used, one of the fields below. + randaug: RandAugment config. + autoaug: AutoAugment config. + """ + type: Optional[str] = None + randaug: RandAugment = RandAugment() + autoaug: AutoAugment = AutoAugment() + + +@dataclasses.dataclass +class NormActivation(hyperparams.Config): + activation: str = 'relu' + use_sync_bn: bool = True + norm_momentum: float = 0.99 + norm_epsilon: float = 0.001 + + +@dataclasses.dataclass +class PseudoLabelDataConfig(cfg.DataConfig): + """Psuedo Label input config for training.""" + input_path: str = '' + data_ratio: float = 1.0 # Per-batch ratio of pseudo-labeled to labeled data. + aug_rand_hflip: bool = True + aug_type: Optional[ + Augmentation] = None # Choose from AutoAugment and RandAugment. + file_type: str = 'tfrecord' diff --git a/official/vision/beta/configs/decoders.py b/official/vision/beta/configs/decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..3d028316215e9221ad26b312b070b6d2636e83a4 --- /dev/null +++ b/official/vision/beta/configs/decoders.py @@ -0,0 +1,68 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Decoders configurations.""" +from typing import Optional, List + +# Import libraries +import dataclasses + +from official.modeling import hyperparams + + +@dataclasses.dataclass +class Identity(hyperparams.Config): + """Identity config.""" + pass + + +@dataclasses.dataclass +class FPN(hyperparams.Config): + """FPN config.""" + num_filters: int = 256 + use_separable_conv: bool = False + + +@dataclasses.dataclass +class NASFPN(hyperparams.Config): + """NASFPN config.""" + num_filters: int = 256 + num_repeats: int = 5 + use_separable_conv: bool = False + + +@dataclasses.dataclass +class ASPP(hyperparams.Config): + """ASPP config.""" + level: int = 4 + dilation_rates: List[int] = dataclasses.field(default_factory=list) + dropout_rate: float = 0.0 + num_filters: int = 256 + pool_kernel_size: Optional[List[int]] = None # Use global average pooling. + + +@dataclasses.dataclass +class Decoder(hyperparams.OneOfConfig): + """Configuration for decoders. + + Attributes: + type: 'str', type of decoder be used, one of the fields below. + fpn: fpn config. + """ + type: Optional[str] = None + fpn: FPN = FPN() + nasfpn: NASFPN = NASFPN() + identity: Identity = Identity() + aspp: ASPP = ASPP() diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_gpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff1a0719e6f179c98674ba7723bd1695aaa90241 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_gpu.yaml @@ -0,0 +1,49 @@ +# MobileNetV2_1.0 ImageNet classification. 71.0% top-1 and 90.0% top-5 accuracy. +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float16' + loss_scale: 'dynamic' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'mobilenet' + mobilenet: + model_id: 'MobileNetV2' + filter_size_scale: 1.0 + dropout_rate: 0.2 + losses: + l2_weight_decay: 0.00001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 1024 # 128 * 8 + dtype: 'float16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 1024 # 128 * 8 + dtype: 'float16' + drop_remainder: false +trainer: + train_steps: 625500 # 500 epochs + validation_steps: 49 + validation_interval: 1251 + steps_per_loop: 1251 # NUM_EXAMPLES (1281167) // global_batch_size + summary_interval: 1251 + checkpoint_interval: 1251 + optimizer_config: + learning_rate: + type: 'exponential' + exponential: + initial_learning_rate: 0.064 # 0.008 * batch_size / 128 + decay_steps: 3127 # 2.5 * steps_per_epoch + decay_rate: 0.96 + staircase: true + warmup: + type: 'linear' + linear: + warmup_steps: 6255 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_tpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5df9d6e74a44617b91625042f5821fe8c967d9f --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_mobilenetv2_tpu.yaml @@ -0,0 +1,48 @@ +# MobileNetV2_1.0 ImageNet classification. 72.72% top-1 and 91.05% top-5 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'mobilenet' + mobilenet: + model_id: 'MobileNetV2' + filter_size_scale: 1.0 + dropout_rate: 0.2 + losses: + l2_weight_decay: 0.00001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 156000 # 500 epochs + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 # NUM_EXAMPLES (1281167) // global_batch_size + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + learning_rate: + type: 'exponential' + exponential: + initial_learning_rate: 0.256 # 0.008 * batch_size / 128 + decay_steps: 780 # 2.5 * steps_per_epoch + decay_rate: 0.96 + staircase: true + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_deeplab_tpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_deeplab_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d7d295963752ff64ee39e6d159eb99831da3306 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_deeplab_tpu.yaml @@ -0,0 +1,57 @@ +# Top-1 accuracy 81.6% on ImageNet +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'dilated_resnet' + dilated_resnet: + model_id: 101 + output_stride: 16 + stem_type: 'v1' + se_ratio: 0.25 + stochastic_depth_drop_rate: 0.2 + multigrid: [1, 2, 4] + last_stage_repeats: 1 + norm_activation: + activation: 'swish' + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_policy: 'randaug' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2600f58faa595d2100bc0325c7c4fdc83c9517c1 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet101_tpu.yaml @@ -0,0 +1,50 @@ +# ResNet-101 ImageNet classification. 79.1% top-1 and 94.5% top-5 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 101 + norm_activation: + activation: 'swish' + losses: + l2_weight_decay: 0.0001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 62400 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 62400 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c81953e2f6c06eb1d67a619d3106c7195da4e20 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet152_tpu.yaml @@ -0,0 +1,50 @@ +# ResNet-152 ImageNet classification. 79.4% top-1 and 94.7% top-5 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 152 + norm_activation: + activation: 'swish' + losses: + l2_weight_decay: 0.0001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 62400 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 62400 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_deeplab_tpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_deeplab_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11bdafbc35d4c6f63625b5990de0167a78a7e6b0 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_deeplab_tpu.yaml @@ -0,0 +1,50 @@ +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'dilated_resnet' + dilated_resnet: + model_id: 50 + output_stride: 16 + norm_activation: + activation: 'swish' + losses: + l2_weight_decay: 0.0001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 62400 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 62400 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_gpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd6a4dc1618bfa4dbcd30410966ee365284b7cf8 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_gpu.yaml @@ -0,0 +1,48 @@ +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float16' + loss_scale: 'dynamic' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 50 + losses: + l2_weight_decay: 0.0001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 2048 + dtype: 'float16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 2048 + dtype: 'float16' + drop_remainder: false +trainer: + train_steps: 56160 + validation_steps: 25 + validation_interval: 625 + steps_per_loop: 625 + summary_interval: 625 + checkpoint_interval: 625 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [18750, 37500, 50000] + values: [0.8, 0.08, 0.008, 0.0008] + warmup: + type: 'linear' + linear: + warmup_steps: 3125 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tfds_tpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tfds_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1506b48f903b0966e52e309b4be9f8cfc1faca6a --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tfds_tpu.yaml @@ -0,0 +1,56 @@ +# ResNet-50 ImageNet classification. 78.1% top-1 and 93.9% top-5 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 50 + norm_activation: + activation: 'swish' + losses: + l2_weight_decay: 0.0001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: '' + tfds_name: 'imagenet2012' + tfds_split: 'train' + sharding: true + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + validation_data: + input_path: '' + tfds_name: 'imagenet2012' + tfds_split: 'validation' + sharding: true + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 62400 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 62400 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fd10535aa8d39a89c35853dd21a2b3d4e7930d8 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnet50_tpu.yaml @@ -0,0 +1,50 @@ +# ResNet-50 ImageNet classification. 78.1% top-1 and 93.9% top-5 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 50 + norm_activation: + activation: 'swish' + losses: + l2_weight_decay: 0.0001 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 62400 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 62400 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c9e7b80a02386093981e584440c8b29ce395b83 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i160.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-101 ImageNet classification. 80.2% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [160, 160, 3] + backbone: + type: 'resnet' + resnet: + model_id: 101 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.0 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml new file mode 100644 index 0000000000000000000000000000000000000000..576c48625055d0e61d948d732cc16d4e5020c131 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs101_i192.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-101 ImageNet classification. 81.3% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [192, 192, 3] + backbone: + type: 'resnet' + resnet: + model_id: 101 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.0 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1c8edc463f8ce3150f4414f90d88df28abd77e9 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i192.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-152 ImageNet classification. 81.9% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [192, 192, 3] + backbone: + type: 'resnet' + resnet: + model_id: 152 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.0 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ec14bae5ab2f20867498ff7ba46af855d61f73c --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i224.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-152 ImageNet classification. 82.5% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 152 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.0 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i256.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i256.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91b53d6217f7579b6764ddaf58795f0ee14f58dc --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs152_i256.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-152 ImageNet classification. 83.1% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'resnet' + resnet: + model_id: 152 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.0 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs200_i256.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs200_i256.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d76c010170070426745607aa2690799136c0665 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs200_i256.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-200 ImageNet classification. 83.5% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'resnet' + resnet: + model_id: 200 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.1 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs270_i256.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs270_i256.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7c6a644e2cbb784c18b008d26a2986eacbf98e6 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs270_i256.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-270 ImageNet classification. 83.6% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'resnet' + resnet: + model_id: 270 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.1 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b2d3fe261c21bb789096677e57f2a49d2b75d57 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-350 ImageNet classification. 83.7% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'resnet' + resnet: + model_id: 350 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.1 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i320.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i320.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36cdba7bb43cd0d41b46e5c87eead2a657afe651 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i320.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-350 ImageNet classification. 84.2% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [320, 320, 3] + backbone: + type: 'resnet' + resnet: + model_id: 350 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.1 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.4 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs420_i320.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs420_i320.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b02b7e006a8239fe86b3d036fbfb484dc0b0995 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs420_i320.yaml @@ -0,0 +1,63 @@ +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [320, 320, 3] + backbone: + type: 'resnet' + resnet: + model_id: 420 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.1 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.4 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 15 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs50_i160.yaml b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs50_i160.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a57f41f3908c51fb2a4249e80afe8ef8fff48c88 --- /dev/null +++ b/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs50_i160.yaml @@ -0,0 +1,64 @@ +# ResNet-RS-50 ImageNet classification. 79.1% top-1 accuracy. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + num_classes: 1001 + input_size: [160, 160, 3] + backbone: + type: 'resnet' + resnet: + model_id: 50 + replace_stem_max_pool: true + resnetd_shortcut: true + se_ratio: 0.25 + stem_type: 'v1' + stochastic_depth_drop_rate: 0.0 + norm_activation: + activation: 'swish' + norm_momentum: 0.0 + use_sync_bn: false + dropout_rate: 0.25 + losses: + l2_weight_decay: 0.00004 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 4096 + dtype: 'bfloat16' + aug_type: + type: 'randaug' + randaug: + magnitude: 10 + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 4096 + dtype: 'bfloat16' + drop_remainder: false +trainer: + train_steps: 109200 + validation_steps: 13 + validation_interval: 312 + steps_per_loop: 312 + summary_interval: 312 + checkpoint_interval: 312 + optimizer_config: + ema: + average_decay: 0.9999 + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 + decay_steps: 109200 + warmup: + type: 'linear' + linear: + warmup_steps: 1560 diff --git a/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_casrcnn_tpu.yaml b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_casrcnn_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6cad50da74792030f46d0b6cc460b25550b6d122 --- /dev/null +++ b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_casrcnn_tpu.yaml @@ -0,0 +1,57 @@ +# Expect to reach: box mAP: 51.6%, mask mAP: 44.5% on COCO +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + init_checkpoint: null + train_data: + global_batch_size: 256 + parser: + aug_rand_hflip: true + aug_scale_min: 0.1 + aug_scale_max: 2.5 + losses: + l2_weight_decay: 0.00004 + model: + anchor: + anchor_size: 4.0 + num_scales: 3 + min_level: 3 + max_level: 7 + input_size: [1280, 1280, 3] + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '143' + type: 'spinenet' + decoder: + type: 'identity' + detection_head: + cascade_class_ensemble: true + class_agnostic_bbox_pred: true + rpn_head: + num_convs: 2 + num_filters: 256 + roi_sampler: + cascade_iou_thresholds: [0.7] + foreground_iou_threshold: 0.6 + norm_activation: + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + activation: 'swish' + detection_generator: + pre_nms_top_k: 1000 +trainer: + train_steps: 162050 + optimizer_config: + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [148160, 157420] + values: [0.32, 0.032, 0.0032] + warmup: + type: 'linear' + linear: + warmup_steps: 2000 + warmup_learning_rate: 0.0067 diff --git a/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_mrcnn_tpu.yaml b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_mrcnn_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d5ec8ae4819f337c8ef035558467726e7f8925b --- /dev/null +++ b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_mrcnn_tpu.yaml @@ -0,0 +1,47 @@ +# Expect to reach: box mAP: 49.3%, mask mAP: 43.4% on COCO +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + init_checkpoint: null + train_data: + global_batch_size: 256 + parser: + aug_rand_hflip: true + aug_scale_min: 0.1 + aug_scale_max: 2.0 + losses: + l2_weight_decay: 0.00004 + model: + anchor: + anchor_size: 4.0 + num_scales: 3 + min_level: 3 + max_level: 7 + input_size: [1280, 1280, 3] + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '143' + type: 'spinenet' + decoder: + type: 'identity' + norm_activation: + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + detection_generator: + pre_nms_top_k: 1000 +trainer: + train_steps: 231000 + optimizer_config: + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + warmup: + type: 'linear' + linear: + warmup_steps: 2000 + warmup_learning_rate: 0.0067 diff --git a/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_casrcnn_tpu.yaml b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_casrcnn_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49d17ddb9471dffb840455340768281b5b25b063 --- /dev/null +++ b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_casrcnn_tpu.yaml @@ -0,0 +1,56 @@ +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + init_checkpoint: null + train_data: + global_batch_size: 256 + parser: + aug_rand_hflip: true + aug_scale_min: 0.1 + aug_scale_max: 2.0 + losses: + l2_weight_decay: 0.00004 + model: + anchor: + anchor_size: 3.0 + num_scales: 3 + min_level: 3 + max_level: 7 + input_size: [640, 640, 3] + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '49' + type: 'spinenet' + decoder: + type: 'identity' + detection_head: + cascade_class_ensemble: true + class_agnostic_bbox_pred: true + rpn_head: + num_convs: 2 + num_filters: 256 + roi_sampler: + cascade_iou_thresholds: [0.7] + foreground_iou_threshold: 0.6 + norm_activation: + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + activation: 'swish' + detection_generator: + pre_nms_top_k: 1000 +trainer: + train_steps: 231000 + optimizer_config: + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [219450, 226380] + values: [0.28, 0.028, 0.0028] + warmup: + type: 'linear' + linear: + warmup_steps: 2000 + warmup_learning_rate: 0.0067 diff --git a/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_mrcnn_tpu.yaml b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_mrcnn_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ac1ae428bf8806f1ebb60a4e07f0163d3b49854 --- /dev/null +++ b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_mrcnn_tpu.yaml @@ -0,0 +1,47 @@ +# Expect to reach: box mAP: 43.2%, mask mAP: 38.3% on COCO +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + init_checkpoint: null + train_data: + global_batch_size: 256 + parser: + aug_rand_hflip: true + aug_scale_min: 0.1 + aug_scale_max: 2.0 + losses: + l2_weight_decay: 0.00004 + model: + anchor: + anchor_size: 3.0 + num_scales: 3 + min_level: 3 + max_level: 7 + input_size: [640, 640, 3] + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '49' + type: 'spinenet' + decoder: + type: 'identity' + norm_activation: + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + detection_generator: + pre_nms_top_k: 1000 +trainer: + train_steps: 231000 + optimizer_config: + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + warmup: + type: 'linear' + linear: + warmup_steps: 2000 + warmup_learning_rate: 0.0067 diff --git a/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_casrcnn_tpu.yaml b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_casrcnn_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..612608333c3e02652d665335314ea6359cc5267d --- /dev/null +++ b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_casrcnn_tpu.yaml @@ -0,0 +1,56 @@ +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + init_checkpoint: null + train_data: + global_batch_size: 256 + parser: + aug_rand_hflip: true + aug_scale_min: 0.1 + aug_scale_max: 2.0 + losses: + l2_weight_decay: 0.00004 + model: + anchor: + anchor_size: 3.0 + num_scales: 3 + min_level: 3 + max_level: 7 + input_size: [1024, 1024, 3] + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '96' + type: 'spinenet' + decoder: + type: 'identity' + detection_head: + cascade_class_ensemble: true + class_agnostic_bbox_pred: true + rpn_head: + num_convs: 2 + num_filters: 256 + roi_sampler: + cascade_iou_thresholds: [0.7] + foreground_iou_threshold: 0.6 + norm_activation: + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + activation: 'swish' + detection_generator: + pre_nms_top_k: 1000 +trainer: + train_steps: 231000 + optimizer_config: + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + warmup: + type: 'linear' + linear: + warmup_steps: 2000 + warmup_learning_rate: 0.0067 diff --git a/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_mrcnn_tpu.yaml b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_mrcnn_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9609b7eee34d0c0dadba209c7dda173bd7a32562 --- /dev/null +++ b/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_mrcnn_tpu.yaml @@ -0,0 +1,47 @@ +# Expect to reach: box mAP: 48.1%, mask mAP: 42.4% on COCO +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + init_checkpoint: null + train_data: + global_batch_size: 256 + parser: + aug_rand_hflip: true + aug_scale_min: 0.1 + aug_scale_max: 2.0 + losses: + l2_weight_decay: 0.00004 + model: + anchor: + anchor_size: 3.0 + num_scales: 3 + min_level: 3 + max_level: 7 + input_size: [1024, 1024, 3] + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '96' + type: 'spinenet' + decoder: + type: 'identity' + norm_activation: + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + detection_generator: + pre_nms_top_k: 1000 +trainer: + train_steps: 231000 + optimizer_config: + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + warmup: + type: 'linear' + linear: + warmup_steps: 2000 + warmup_learning_rate: 0.0067 diff --git a/official/vision/beta/configs/experiments/maskrcnn/r50fpn_640_coco_scratch_tpu4x4.yaml b/official/vision/beta/configs/experiments/maskrcnn/r50fpn_640_coco_scratch_tpu4x4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..218f045105860a76069128b410aa92aa5abc4053 --- /dev/null +++ b/official/vision/beta/configs/experiments/maskrcnn/r50fpn_640_coco_scratch_tpu4x4.yaml @@ -0,0 +1,36 @@ +# Expect to reach: box mAP: 42.3%, mask mAP: 37.6% on COCO +task: + init_checkpoint: null + train_data: + global_batch_size: 256 + parser: + aug_rand_hflip: true + aug_scale_min: 0.5 + aug_scale_max: 2.0 + losses: + l2_weight_decay: 0.00008 + model: + anchor: + anchor_size: 3.0 + min_level: 3 + max_level: 7 + input_size: [640, 640, 3] + norm_activation: + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + detection_generator: + pre_nms_top_k: 1000 +trainer: + train_steps: 162050 + optimizer_config: + learning_rate: + type: 'stepwise' + stepwise: + boundaries: [148160, 157420] + values: [0.32, 0.032, 0.0032] + warmup: + type: 'linear' + linear: + warmup_steps: 2000 + warmup_learning_rate: 0.0067 diff --git a/official/vision/beta/configs/experiments/retinanet/coco_spinenet143_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/coco_spinenet143_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..438fe031a8bdb9e1daaadcc6b977aa1d8319586f --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet143_tpu.yaml @@ -0,0 +1,58 @@ +# SpineNet-143 COCO detection with protocal C config. Expecting 50.0% AP. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + losses: + l2_weight_decay: 4.0e-05 + model: + anchor: + anchor_size: 4 + aspect_ratios: [0.5, 1.0, 2.0] + num_scales: 3 + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '143' + type: 'spinenet' + decoder: + type: 'identity' + head: + num_convs: 4 + num_filters: 256 + input_size: [1280, 1280, 3] + max_level: 7 + min_level: 3 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + dtype: 'bfloat16' + global_batch_size: 256 + is_training: true + parser: + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.1 + validation_data: + dtype: 'bfloat16' + global_batch_size: 8 + is_training: false +trainer: + checkpoint_interval: 462 + optimizer_config: + learning_rate: + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + type: 'stepwise' + warmup: + linear: + warmup_learning_rate: 0.0067 + warmup_steps: 2000 + steps_per_loop: 462 + train_steps: 231000 + validation_interval: 462 + validation_steps: 625 diff --git a/official/vision/beta/configs/experiments/retinanet/coco_spinenet190_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/coco_spinenet190_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc0ea1f94ec47baf80bde094e193ad8d1349e551 --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet190_tpu.yaml @@ -0,0 +1,57 @@ +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + losses: + l2_weight_decay: 4.0e-05 + model: + anchor: + anchor_size: 4 + aspect_ratios: [0.5, 1.0, 2.0] + num_scales: 3 + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '190' + type: 'spinenet' + decoder: + type: 'identity' + head: + num_convs: 7 + num_filters: 512 + input_size: [1280, 1280, 3] + max_level: 7 + min_level: 3 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + dtype: 'bfloat16' + global_batch_size: 256 + is_training: true + parser: + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.1 + validation_data: + dtype: 'bfloat16' + global_batch_size: 8 + is_training: false +trainer: + checkpoint_interval: 462 + optimizer_config: + learning_rate: + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + type: 'stepwise' + warmup: + linear: + warmup_learning_rate: 0.0067 + warmup_steps: 2000 + steps_per_loop: 462 + train_steps: 231000 + validation_interval: 462 + validation_steps: 625 diff --git a/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..116dd008493838b6299d36d321b0a04bda02a34f --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_mobile_tpu.yaml @@ -0,0 +1,60 @@ +# --experiment_type=retinanet_spinenet_mobile_coco +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + losses: + l2_weight_decay: 3.0e-05 + model: + anchor: + anchor_size: 3 + aspect_ratios: [0.5, 1.0, 2.0] + num_scales: 3 + backbone: + spinenet_mobile: + stochastic_depth_drop_rate: 0.2 + model_id: '49' + se_ratio: 0.2 + type: 'spinenet_mobile' + decoder: + type: 'identity' + head: + num_convs: 4 + num_filters: 48 + use_separable_conv: true + input_size: [384, 384, 3] + max_level: 7 + min_level: 3 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + dtype: 'bfloat16' + global_batch_size: 256 + is_training: true + parser: + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.5 + validation_data: + dtype: 'bfloat16' + global_batch_size: 8 + is_training: false +trainer: + checkpoint_interval: 462 + optimizer_config: + learning_rate: + stepwise: + boundaries: [265650, 272580] + values: [0.32, 0.032, 0.0032] + type: 'stepwise' + warmup: + linear: + warmup_learning_rate: 0.0067 + warmup_steps: 2000 + steps_per_loop: 462 + train_steps: 277200 + validation_interval: 462 + validation_steps: 625 diff --git a/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..725e1fc9b84fa4516d5ef431f2dd335eb8d65bc1 --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml @@ -0,0 +1,58 @@ +# SpineNet-49 COCO detection with protocal C config. Expecting 44.2% AP. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + losses: + l2_weight_decay: 4.0e-05 + model: + anchor: + anchor_size: 3 + aspect_ratios: [0.5, 1.0, 2.0] + num_scales: 3 + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '49' + type: 'spinenet' + decoder: + type: 'identity' + head: + num_convs: 4 + num_filters: 256 + input_size: [640, 640, 3] + max_level: 7 + min_level: 3 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + dtype: 'bfloat16' + global_batch_size: 256 + is_training: true + parser: + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.1 + validation_data: + dtype: 'bfloat16' + global_batch_size: 8 + is_training: false +trainer: + checkpoint_interval: 462 + optimizer_config: + learning_rate: + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + type: 'stepwise' + warmup: + linear: + warmup_learning_rate: 0.0067 + warmup_steps: 2000 + steps_per_loop: 462 + train_steps: 231000 + validation_interval: 462 + validation_steps: 625 diff --git a/official/vision/beta/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b89a6267596a129ca4e63996120405309508168 --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49s_mobile_tpu.yaml @@ -0,0 +1,60 @@ +# --experiment_type=retinanet_spinenet_mobile_coco +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + losses: + l2_weight_decay: 3.0e-05 + model: + anchor: + anchor_size: 3 + aspect_ratios: [0.5, 1.0, 2.0] + num_scales: 3 + backbone: + spinenet_mobile: + stochastic_depth_drop_rate: 0.2 + model_id: '49S' + se_ratio: 0.2 + type: 'spinenet_mobile' + decoder: + type: 'identity' + head: + num_convs: 4 + num_filters: 40 + use_separable_conv: true + input_size: [384, 384, 3] + max_level: 7 + min_level: 3 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + dtype: 'bfloat16' + global_batch_size: 256 + is_training: true + parser: + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.5 + validation_data: + dtype: 'bfloat16' + global_batch_size: 8 + is_training: false +trainer: + checkpoint_interval: 462 + optimizer_config: + learning_rate: + stepwise: + boundaries: [265650, 272580] + values: [0.32, 0.032, 0.0032] + type: 'stepwise' + warmup: + linear: + warmup_learning_rate: 0.0067 + warmup_steps: 2000 + steps_per_loop: 462 + train_steps: 277200 + validation_interval: 462 + validation_steps: 625 diff --git a/official/vision/beta/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8c9bf3a0b1f6506c61a81319737cecb9794e17a8 --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet49xs_mobile_tpu.yaml @@ -0,0 +1,60 @@ +# --experiment_type=retinanet_spinenet_mobile_coco +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + losses: + l2_weight_decay: 3.0e-05 + model: + anchor: + anchor_size: 3 + aspect_ratios: [0.5, 1.0, 2.0] + num_scales: 3 + backbone: + spinenet_mobile: + stochastic_depth_drop_rate: 0.2 + model_id: '49XS' + se_ratio: 0.2 + type: 'spinenet_mobile' + decoder: + type: 'identity' + head: + num_convs: 4 + num_filters: 24 + use_separable_conv: true + input_size: [256, 256, 3] + max_level: 7 + min_level: 3 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + dtype: 'bfloat16' + global_batch_size: 256 + is_training: true + parser: + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.5 + validation_data: + dtype: 'bfloat16' + global_batch_size: 8 + is_training: false +trainer: + checkpoint_interval: 462 + optimizer_config: + learning_rate: + stepwise: + boundaries: [265650, 272580] + values: [0.32, 0.032, 0.0032] + type: 'stepwise' + warmup: + linear: + warmup_learning_rate: 0.0067 + warmup_steps: 2000 + steps_per_loop: 462 + train_steps: 277200 + validation_interval: 462 + validation_steps: 625 diff --git a/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c75d667753fa859cfd585235f70c136ce349a488 --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml @@ -0,0 +1,58 @@ +# SpineNet-96 COCO detection with protocol C config. Expecting 48.5% AP. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + losses: + l2_weight_decay: 4.0e-05 + model: + anchor: + anchor_size: 3 + aspect_ratios: [0.5, 1.0, 2.0] + num_scales: 3 + backbone: + spinenet: + stochastic_depth_drop_rate: 0.2 + model_id: '96' + type: 'spinenet' + decoder: + type: 'identity' + head: + num_convs: 4 + num_filters: 256 + input_size: [1024, 1024, 3] + max_level: 7 + min_level: 3 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + dtype: 'bfloat16' + global_batch_size: 256 + is_training: true + parser: + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.1 + validation_data: + dtype: 'bfloat16' + global_batch_size: 8 + is_training: false +trainer: + checkpoint_interval: 462 + optimizer_config: + learning_rate: + stepwise: + boundaries: [219450, 226380] + values: [0.32, 0.032, 0.0032] + type: 'stepwise' + warmup: + linear: + warmup_learning_rate: 0.0067 + warmup_steps: 2000 + steps_per_loop: 462 + train_steps: 231000 + validation_interval: 462 + validation_steps: 625 diff --git a/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml b/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f9a30a34438042eb164631284ff0efe5279f8c3 --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tfds_tpu.yaml @@ -0,0 +1,34 @@ +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + annotation_file: '' # Can't use annotation file when tfds is used. + losses: + l2_weight_decay: 0.0001 + model: + num_classes: 91 + max_level: 7 + min_level: 3 + input_size: [640, 640, 3] + norm_activation: + activation: relu + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + train_data: + tfds_name: 'coco/2017' + tfds_split: 'train' + drop_remainder: true + dtype: bfloat16 + global_batch_size: 256 + input_path: '' + is_training: true + shuffle_buffer_size: 1000 + validation_data: + tfds_name: 'coco/2017' + tfds_split: 'validation' + drop_remainder: true + dtype: bfloat16 + global_batch_size: 8 + input_path: '' + is_training: false diff --git a/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml b/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46b1f3cad7c298984ab1b398c1bf4669415b6890 --- /dev/null +++ b/official/vision/beta/configs/experiments/retinanet/resnet50fpn_coco_tpu4x4_benchmark.yaml @@ -0,0 +1,7 @@ +# Benchmarks runs on same instnace, change eval batch size to fit on 4x4 tpu +task: + validation_data: + global_batch_size: 32 +trainer: + validation_interval: 1560 + validation_steps: 156 diff --git a/official/vision/beta/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml b/official/vision/beta/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ffc7689d2f2b3741e065664cb63d265544e7d2d --- /dev/null +++ b/official/vision/beta/configs/experiments/semantic_segmentation/deeplabv3plus_resnet101_cityscapes_tfds_tpu.yaml @@ -0,0 +1,78 @@ +# Use your own cityscapes preprocessed dataset. 79% meanIoU. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'float32' +task: + model: + num_classes: 19 + input_size: [null, null, 3] + backbone: + type: 'dilated_resnet' + dilated_resnet: + model_id: 101 + output_stride: 16 + stem_type: 'v1' + se_ratio: 0.25 + stochastic_depth_drop_rate: 0.2 + multigrid: [1, 2, 4] + last_stage_repeats: 1 + decoder: + aspp: + pool_kernel_size: [512, 1024] + head: + feature_fusion: 'deeplabv3plus' + low_level: 2 + low_level_num_filters: 48 + norm_activation: + activation: 'swish' + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + losses: + top_k_percent_pixels: 1.0 # only backpropagate loss for the topk 100% pixels. + train_data: + output_size: [1024, 2048] + crop_size: [512, 1024] + input_path: '' + tfds_name: 'cityscapes/semantic_segmentation' + tfds_split: 'train' + is_training: true + global_batch_size: 16 + dtype: 'float32' + aug_rand_hflip: true + aug_scale_max: 2.0 + aug_scale_min: 0.5 + validation_data: + output_size: [1024, 2048] + input_path: '' + tfds_name: 'cityscapes/semantic_segmentation' + tfds_split: 'validation' + is_training: false + global_batch_size: 16 + dtype: 'float32' + drop_remainder: false + resize_eval_groundtruth: true +trainer: + optimizer_config: + learning_rate: + polynomial: + decay_steps: 90000 + initial_learning_rate: 0.01 + power: 0.9 + type: polynomial + optimizer: + sgd: + momentum: 0.9 + type: sgd + warmup: + linear: + name: linear + warmup_learning_rate: 0 + warmup_steps: 925 + type: linear + steps_per_loop: 185 + summary_interval: 185 + train_steps: 90000 + validation_interval: 185 + validation_steps: 31 + checkpoint_interval: 185 diff --git a/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d9158c2bd179b7d7284f063efaf2e97a26c50edf --- /dev/null +++ b/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml @@ -0,0 +1,88 @@ +# 3D ResNet-50 video classification on Kinetics-400. +# +# --experiment_type=video_classification_kinetics400 +# Expected accuracy: 77.0% top-1, 93.0% top-5. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + dropout_rate: 0.5 + norm_activation: + use_sync_bn: false + backbone: + resnet_3d: + block_specs: !!python/tuple + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + model_id: 50 + stem_conv_temporal_kernel_size: 5 + stem_conv_temporal_stride: 2 + stem_pool_temporal_stride: 1 + train_data: + name: kinetics400 + feature_shape: !!python/tuple + - 32 + - 224 + - 224 + - 3 + temporal_stride: 2 + global_batch_size: 1024 + dtype: 'bfloat16' + shuffle_buffer_size: 1024 + aug_max_area_ratio: 1.0 + aug_max_aspect_ratio: 2.0 + aug_min_area_ratio: 0.08 + aug_min_aspect_ratio: 0.5 + validation_data: + name: kinetics400 + feature_shape: !!python/tuple + - 32 + - 256 + - 256 + - 3 + temporal_stride: 2 + num_test_clips: 10 + num_test_crops: 3 + global_batch_size: 64 + dtype: 'bfloat16' + drop_remainder: false +trainer: + optimizer_config: + learning_rate: + cosine: + initial_learning_rate: 0.8 + decay_steps: 42104 + warmup: + linear: + warmup_steps: 1053 + train_steps: 42104 + steps_per_loop: 500 + summary_interval: 500 + validation_interval: 500 diff --git a/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d68f539601c70c6b6a923d3087c5f4e7157ee2a --- /dev/null +++ b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml @@ -0,0 +1,98 @@ +# 3D ResNet-RS-50 video classification on Kinetics-400. +# +# --experiment_type=video_classification_kinetics400 +# Expected accuracy: 78.2% top-1 accuracy. +runtime: + mixed_precision_dtype: bfloat16 +task: + losses: + l2_weight_decay: 0.00004 + label_smoothing: 0.1 + one_hot: true + model: + aggregate_endpoints: false + backbone: + resnet_3d_rs: + model_id: 50 + stem_type: 'v1' + stem_conv_temporal_kernel_size: 5 + stem_conv_temporal_stride: 2 + stem_pool_temporal_stride: 1 + stochastic_depth_drop_rate: 0.1 + se_ratio: 0.25 + type: resnet_3d_rs + dropout_rate: 0.5 + model_type: video_classification + norm_activation: + activation: relu + norm_epsilon: 1.0e-05 + norm_momentum: 0.0 + use_sync_bn: false + train_data: + data_format: channels_last + drop_remainder: true + dtype: bfloat16 + feature_shape: !!python/tuple + - 32 + - 224 + - 224 + - 3 + file_type: sstable + global_batch_size: 1024 + is_training: true + min_image_size: 256 + name: kinetics400 + num_channels: 3 + num_classes: 400 + num_examples: 215570 + num_test_clips: 1 + num_test_crops: 1 + one_hot: true + temporal_stride: 2 + aug_max_area_ratio: 1.0 + aug_max_aspect_ratio: 2.0 + aug_min_area_ratio: 0.08 + aug_min_aspect_ratio: 0.5 + validation_data: + data_format: channels_last + drop_remainder: false + dtype: bfloat16 + feature_shape: !!python/tuple + - 32 + - 256 + - 256 + - 3 + file_type: sstable + global_batch_size: 64 + is_training: false + min_image_size: 256 + name: kinetics400 + num_channels: 3 + num_classes: 400 + num_examples: 17706 + num_test_clips: 10 + num_test_crops: 3 + one_hot: true + temporal_stride: 2 +trainer: + checkpoint_interval: 210 + max_to_keep: 3 + optimizer_config: + ema: + average_decay: 0.9999 + learning_rate: + cosine: + decay_steps: 73682 + initial_learning_rate: 0.8 + name: CosineDecay + type: cosine + warmup: + linear: + name: linear + warmup_learning_rate: 0 + warmup_steps: 1050 + type: linear + train_steps: 73682 + steps_per_loop: 500 + summary_interval: 500 + validation_interval: 500 diff --git a/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e6793a374e02743395ba86705d971f3e1204dc2 --- /dev/null +++ b/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml @@ -0,0 +1,88 @@ +# SlowOnly 16x4 video classification on Kinetics-400. +# +# --experiment_type=video_classification_kinetics400 +# Expected accuracy: 75.6% top-1, 92.1% top-5. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + dropout_rate: 0.5 + norm_activation: + use_sync_bn: false + backbone: + resnet_3d: + block_specs: !!python/tuple + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + model_id: 50 + stem_conv_temporal_kernel_size: 1 + stem_conv_temporal_stride: 1 + stem_pool_temporal_stride: 1 + train_data: + name: kinetics400 + feature_shape: !!python/tuple + - 16 + - 224 + - 224 + - 3 + temporal_stride: 4 + global_batch_size: 1024 + dtype: 'bfloat16' + shuffle_buffer_size: 1024 + aug_max_area_ratio: 1.0 + aug_max_aspect_ratio: 2.0 + aug_min_area_ratio: 0.08 + aug_min_aspect_ratio: 0.5 + validation_data: + name: kinetics400 + feature_shape: !!python/tuple + - 16 + - 256 + - 256 + - 3 + temporal_stride: 4 + num_test_clips: 10 + num_test_crops: 3 + global_batch_size: 64 + dtype: 'bfloat16' + drop_remainder: false +trainer: + optimizer_config: + learning_rate: + cosine: + initial_learning_rate: 0.8 + decay_steps: 42104 + warmup: + linear: + warmup_steps: 1053 + train_steps: 42104 + steps_per_loop: 500 + summary_interval: 500 + validation_interval: 500 diff --git a/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0bcd881ef37d55c4275cf2d5bd32eb09a6772a3 --- /dev/null +++ b/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml @@ -0,0 +1,88 @@ +# SlowOnly 8x8 video classification on Kinetics-400. +# +# --experiment_type=video_classification_kinetics400 +# Expected accuracy: 74.1% top-1, 91.4% top-5. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + dropout_rate: 0.5 + norm_activation: + use_sync_bn: false + backbone: + resnet_3d: + block_specs: !!python/tuple + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + model_id: 50 + stem_conv_temporal_kernel_size: 1 + stem_conv_temporal_stride: 1 + stem_pool_temporal_stride: 1 + train_data: + name: kinetics400 + feature_shape: !!python/tuple + - 8 + - 224 + - 224 + - 3 + temporal_stride: 8 + global_batch_size: 1024 + dtype: 'bfloat16' + shuffle_buffer_size: 1024 + aug_max_area_ratio: 1.0 + aug_max_aspect_ratio: 2.0 + aug_min_area_ratio: 0.08 + aug_min_aspect_ratio: 0.5 + validation_data: + name: kinetics400 + feature_shape: !!python/tuple + - 8 + - 256 + - 256 + - 3 + temporal_stride: 8 + num_test_clips: 10 + num_test_crops: 3 + global_batch_size: 64 + dtype: 'bfloat16' + drop_remainder: false +trainer: + optimizer_config: + learning_rate: + cosine: + initial_learning_rate: 0.8 + decay_steps: 42104 + warmup: + linear: + warmup_steps: 1053 + train_steps: 42104 + steps_per_loop: 500 + summary_interval: 500 + validation_interval: 500 diff --git a/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ceb38608d6da92b674ce8864f41c646f63ea4f1e --- /dev/null +++ b/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml @@ -0,0 +1,88 @@ +# 3D ResNet-50 video classification on Kinetics-600. +# +# --experiment_type=video_classification_kinetics600 +# Expected accuracy: 79.5% top-1, 94.8% top-5. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + dropout_rate: 0.5 + norm_activation: + use_sync_bn: false + backbone: + resnet_3d: + block_specs: !!python/tuple + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + model_id: 50 + stem_conv_temporal_kernel_size: 5 + stem_conv_temporal_stride: 2 + stem_pool_temporal_stride: 1 + train_data: + name: kinetics600 + feature_shape: !!python/tuple + - 32 + - 224 + - 224 + - 3 + temporal_stride: 2 + global_batch_size: 1024 + dtype: 'bfloat16' + shuffle_buffer_size: 1024 + aug_max_area_ratio: 1.0 + aug_max_aspect_ratio: 2.0 + aug_min_area_ratio: 0.08 + aug_min_aspect_ratio: 0.5 + validation_data: + name: kinetics600 + feature_shape: !!python/tuple + - 32 + - 256 + - 256 + - 3 + temporal_stride: 2 + num_test_clips: 10 + num_test_crops: 3 + global_batch_size: 64 + dtype: 'bfloat16' + drop_remainder: false +trainer: + optimizer_config: + learning_rate: + cosine: + initial_learning_rate: 0.8 + decay_steps: 71488 + warmup: + linear: + warmup_steps: 1787 + train_steps: 71488 + steps_per_loop: 500 + summary_interval: 500 + validation_interval: 500 diff --git a/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43f656ce3f0f501c3e49460c39e564ce5411618a --- /dev/null +++ b/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml @@ -0,0 +1,88 @@ +# SlowOnly 8x8 video classification on Kinetics-600. +# +# --experiment_type=video_classification_kinetics600 +# Expected accuracy: 77.3% top-1, 93.6% top-5. +runtime: + distribution_strategy: 'tpu' + mixed_precision_dtype: 'bfloat16' +task: + model: + dropout_rate: 0.5 + norm_activation: + use_sync_bn: false + backbone: + resnet_3d: + block_specs: !!python/tuple + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 1 + - 1 + - 1 + - 1 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + - temporal_kernel_sizes: !!python/tuple + - 3 + - 3 + - 3 + temporal_strides: 1 + use_self_gating: false + model_id: 50 + stem_conv_temporal_kernel_size: 1 + stem_conv_temporal_stride: 1 + stem_pool_temporal_stride: 1 + train_data: + name: kinetics600 + feature_shape: !!python/tuple + - 8 + - 224 + - 224 + - 3 + temporal_stride: 8 + global_batch_size: 1024 + dtype: 'bfloat16' + shuffle_buffer_size: 1024 + aug_max_area_ratio: 1.0 + aug_max_aspect_ratio: 2.0 + aug_min_area_ratio: 0.08 + aug_min_aspect_ratio: 0.5 + validation_data: + name: kinetics600 + feature_shape: !!python/tuple + - 8 + - 256 + - 256 + - 3 + temporal_stride: 8 + num_test_clips: 10 + num_test_crops: 3 + global_batch_size: 64 + dtype: 'bfloat16' + drop_remainder: false +trainer: + optimizer_config: + learning_rate: + cosine: + initial_learning_rate: 0.8 + decay_steps: 71488 + warmup: + linear: + warmup_steps: 1787 + train_steps: 71488 + steps_per_loop: 500 + summary_interval: 500 + validation_interval: 500 diff --git a/official/vision/beta/configs/image_classification.py b/official/vision/beta/configs/image_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..e80c85f87fd404fe8c2fab31ad29783c88139960 --- /dev/null +++ b/official/vision/beta/configs/image_classification.py @@ -0,0 +1,389 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Image classification configuration definition.""" +import os +from typing import List, Optional + +import dataclasses + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.modeling import optimization +from official.vision.beta.configs import common +from official.vision.beta.configs import backbones + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """Input config for training.""" + input_path: str = '' + global_batch_size: int = 0 + is_training: bool = True + dtype: str = 'float32' + shuffle_buffer_size: int = 10000 + cycle_length: int = 10 + is_multilabel: bool = False + aug_rand_hflip: bool = True + aug_type: Optional[ + common.Augmentation] = None # Choose from AutoAugment and RandAugment. + file_type: str = 'tfrecord' + image_field_key: str = 'image/encoded' + label_field_key: str = 'image/class/label' + + # Keep for backward compatibility. + aug_policy: Optional[str] = None # None, 'autoaug', or 'randaug'. + randaug_magnitude: Optional[int] = 10 + + +@dataclasses.dataclass +class ImageClassificationModel(hyperparams.Config): + """The model config.""" + num_classes: int = 0 + input_size: List[int] = dataclasses.field(default_factory=list) + backbone: backbones.Backbone = backbones.Backbone( + type='resnet', resnet=backbones.ResNet()) + dropout_rate: float = 0.0 + norm_activation: common.NormActivation = common.NormActivation( + use_sync_bn=False) + # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification + add_head_batch_norm: bool = False + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + one_hot: bool = True + label_smoothing: float = 0.0 + l2_weight_decay: float = 0.0 + + +@dataclasses.dataclass +class Evaluation(hyperparams.Config): + top_k: int = 5 + + +@dataclasses.dataclass +class ImageClassificationTask(cfg.TaskConfig): + """The task config.""" + model: ImageClassificationModel = ImageClassificationModel() + train_data: DataConfig = DataConfig(is_training=True) + validation_data: DataConfig = DataConfig(is_training=False) + losses: Losses = Losses() + evaluation: Evaluation = Evaluation() + init_checkpoint: Optional[str] = None + init_checkpoint_modules: str = 'all' # all or backbone + model_output_keys: Optional[List[int]] = dataclasses.field( + default_factory=list) + + +@exp_factory.register_config_factory('image_classification') +def image_classification() -> cfg.ExperimentConfig: + """Image classification general.""" + return cfg.ExperimentConfig( + task=ImageClassificationTask(), + trainer=cfg.TrainerConfig(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + +IMAGENET_TRAIN_EXAMPLES = 1281167 +IMAGENET_VAL_EXAMPLES = 50000 +IMAGENET_INPUT_PATH_BASE = 'imagenet-2012-tfrecord' + + +@exp_factory.register_config_factory('resnet_imagenet') +def image_classification_imagenet() -> cfg.ExperimentConfig: + """Image classification on imagenet with resnet.""" + train_batch_size = 4096 + eval_batch_size = 4096 + steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size + config = cfg.ExperimentConfig( + task=ImageClassificationTask( + model=ImageClassificationModel( + num_classes=1001, + input_size=[224, 224, 3], + backbone=backbones.Backbone( + type='resnet', resnet=backbones.ResNet(model_id=50)), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), + losses=Losses(l2_weight_decay=1e-4), + train_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size), + validation_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), + is_training=False, + global_batch_size=eval_batch_size)), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=90 * steps_per_epoch, + validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [ + 30 * steps_per_epoch, 60 * steps_per_epoch, + 80 * steps_per_epoch + ], + 'values': [ + 0.1 * train_batch_size / 256, + 0.01 * train_batch_size / 256, + 0.001 * train_batch_size / 256, + 0.0001 * train_batch_size / 256, + ] + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('resnet_rs_imagenet') +def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig: + """Image classification on imagenet with resnet-rs.""" + train_batch_size = 4096 + eval_batch_size = 4096 + steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size + config = cfg.ExperimentConfig( + task=ImageClassificationTask( + model=ImageClassificationModel( + num_classes=1001, + input_size=[160, 160, 3], + backbone=backbones.Backbone( + type='resnet', + resnet=backbones.ResNet( + model_id=50, + stem_type='v1', + resnetd_shortcut=True, + replace_stem_max_pool=True, + se_ratio=0.25, + stochastic_depth_drop_rate=0.0)), + dropout_rate=0.25, + norm_activation=common.NormActivation( + norm_momentum=0.0, + norm_epsilon=1e-5, + use_sync_bn=False, + activation='swish')), + losses=Losses(l2_weight_decay=4e-5, label_smoothing=0.1), + train_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size, + aug_type=common.Augmentation( + type='randaug', randaug=common.RandAugment(magnitude=10))), + validation_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), + is_training=False, + global_batch_size=eval_batch_size)), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=350 * steps_per_epoch, + validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'ema': { + 'average_decay': 0.9999 + }, + 'learning_rate': { + 'type': 'cosine', + 'cosine': { + 'initial_learning_rate': 1.6, + 'decay_steps': 350 * steps_per_epoch + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + return config + + +@exp_factory.register_config_factory('revnet_imagenet') +def image_classification_imagenet_revnet() -> cfg.ExperimentConfig: + """Returns a revnet config for image classification on imagenet.""" + train_batch_size = 4096 + eval_batch_size = 4096 + steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size + + config = cfg.ExperimentConfig( + task=ImageClassificationTask( + model=ImageClassificationModel( + num_classes=1001, + input_size=[224, 224, 3], + backbone=backbones.Backbone( + type='revnet', revnet=backbones.RevNet(model_id=56)), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False), + add_head_batch_norm=True), + losses=Losses(l2_weight_decay=1e-4), + train_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size), + validation_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), + is_training=False, + global_batch_size=eval_batch_size)), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=90 * steps_per_epoch, + validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [ + 30 * steps_per_epoch, 60 * steps_per_epoch, + 80 * steps_per_epoch + ], + 'values': [0.8, 0.08, 0.008, 0.0008] + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('mobilenet_imagenet') +def image_classification_imagenet_mobilenet() -> cfg.ExperimentConfig: + """Image classification on imagenet with mobilenet.""" + train_batch_size = 4096 + eval_batch_size = 4096 + steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size + config = cfg.ExperimentConfig( + task=ImageClassificationTask( + model=ImageClassificationModel( + num_classes=1001, + dropout_rate=0.2, + input_size=[224, 224, 3], + backbone=backbones.Backbone( + type='mobilenet', + mobilenet=backbones.MobileNet( + model_id='MobileNetV2', filter_size_scale=1.0)), + norm_activation=common.NormActivation( + norm_momentum=0.997, norm_epsilon=1e-3, use_sync_bn=False)), + losses=Losses(l2_weight_decay=1e-5, label_smoothing=0.1), + train_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size), + validation_data=DataConfig( + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), + is_training=False, + global_batch_size=eval_batch_size)), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=500 * steps_per_epoch, + validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'rmsprop', + 'rmsprop': { + 'rho': 0.9, + 'momentum': 0.9, + 'epsilon': 0.002, + } + }, + 'learning_rate': { + 'type': 'exponential', + 'exponential': { + 'initial_learning_rate': + 0.008 * (train_batch_size // 128), + 'decay_steps': + int(2.5 * steps_per_epoch), + 'decay_rate': + 0.98, + 'staircase': + True + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + }, + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config diff --git a/official/vision/beta/configs/image_classification_test.py b/official/vision/beta/configs/image_classification_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a2e6f9a2ee1b28e75e146f5de65dd8fc91b3cc61 --- /dev/null +++ b/official/vision/beta/configs/image_classification_test.py @@ -0,0 +1,48 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for image_classification.""" +# pylint: disable=unused-import +from absl.testing import parameterized +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.vision import beta +from official.vision.beta.configs import image_classification as exp_cfg + + +class ImageClassificationConfigTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + ('resnet_imagenet',), + ('resnet_rs_imagenet',), + ('revnet_imagenet',), + ('mobilenet_imagenet'), + ) + def test_image_classification_configs(self, config_name): + config = exp_factory.get_exp_config(config_name) + self.assertIsInstance(config, cfg.ExperimentConfig) + self.assertIsInstance(config.task, exp_cfg.ImageClassificationTask) + self.assertIsInstance(config.task.model, + exp_cfg.ImageClassificationModel) + self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig) + config.task.train_data.is_training = None + with self.assertRaises(KeyError): + config.validate() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/configs/maskrcnn.py b/official/vision/beta/configs/maskrcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..c412a785ababc1319cbc0d8b0bb882fc7982162b --- /dev/null +++ b/official/vision/beta/configs/maskrcnn.py @@ -0,0 +1,496 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Mask R-CNN configuration definition.""" + +import os +from typing import List, Optional + +import dataclasses + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.modeling import optimization +from official.vision.beta.configs import common +from official.vision.beta.configs import decoders +from official.vision.beta.configs import backbones + + +# pylint: disable=missing-class-docstring +@dataclasses.dataclass +class TfExampleDecoder(hyperparams.Config): + regenerate_source_id: bool = False + mask_binarize_threshold: Optional[float] = None + + +@dataclasses.dataclass +class TfExampleDecoderLabelMap(hyperparams.Config): + regenerate_source_id: bool = False + mask_binarize_threshold: Optional[float] = None + label_map: str = '' + + +@dataclasses.dataclass +class DataDecoder(hyperparams.OneOfConfig): + type: Optional[str] = 'simple_decoder' + simple_decoder: TfExampleDecoder = TfExampleDecoder() + label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap() + + +@dataclasses.dataclass +class Parser(hyperparams.Config): + num_channels: int = 3 + match_threshold: float = 0.5 + unmatched_threshold: float = 0.5 + aug_rand_hflip: bool = False + aug_scale_min: float = 1.0 + aug_scale_max: float = 1.0 + skip_crowd_during_training: bool = True + max_num_instances: int = 100 + rpn_match_threshold: float = 0.7 + rpn_unmatched_threshold: float = 0.3 + rpn_batch_size_per_im: int = 256 + rpn_fg_fraction: float = 0.5 + mask_crop_size: int = 112 + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """Input config for training.""" + input_path: str = '' + global_batch_size: int = 0 + is_training: bool = False + dtype: str = 'bfloat16' + decoder: DataDecoder = DataDecoder() + parser: Parser = Parser() + shuffle_buffer_size: int = 10000 + file_type: str = 'tfrecord' + + +@dataclasses.dataclass +class Anchor(hyperparams.Config): + num_scales: int = 1 + aspect_ratios: List[float] = dataclasses.field( + default_factory=lambda: [0.5, 1.0, 2.0]) + anchor_size: float = 8.0 + + +@dataclasses.dataclass +class RPNHead(hyperparams.Config): + num_convs: int = 1 + num_filters: int = 256 + use_separable_conv: bool = False + + +@dataclasses.dataclass +class DetectionHead(hyperparams.Config): + num_convs: int = 4 + num_filters: int = 256 + use_separable_conv: bool = False + num_fcs: int = 1 + fc_dims: int = 1024 + class_agnostic_bbox_pred: bool = False # Has to be True for Cascade RCNN. + # If additional IoUs are passed in 'cascade_iou_thresholds' + # then ensemble the class probabilities from all heads. + cascade_class_ensemble: bool = False + + +@dataclasses.dataclass +class ROIGenerator(hyperparams.Config): + pre_nms_top_k: int = 2000 + pre_nms_score_threshold: float = 0.0 + pre_nms_min_size_threshold: float = 0.0 + nms_iou_threshold: float = 0.7 + num_proposals: int = 1000 + test_pre_nms_top_k: int = 1000 + test_pre_nms_score_threshold: float = 0.0 + test_pre_nms_min_size_threshold: float = 0.0 + test_nms_iou_threshold: float = 0.7 + test_num_proposals: int = 1000 + use_batched_nms: bool = False + + +@dataclasses.dataclass +class ROISampler(hyperparams.Config): + mix_gt_boxes: bool = True + num_sampled_rois: int = 512 + foreground_fraction: float = 0.25 + foreground_iou_threshold: float = 0.5 + background_iou_high_threshold: float = 0.5 + background_iou_low_threshold: float = 0.0 + # IoU thresholds for additional FRCNN heads in Cascade mode. + # `foreground_iou_threshold` is the first threshold. + cascade_iou_thresholds: Optional[List[float]] = None + + +@dataclasses.dataclass +class ROIAligner(hyperparams.Config): + crop_size: int = 7 + sample_offset: float = 0.5 + + +@dataclasses.dataclass +class DetectionGenerator(hyperparams.Config): + apply_nms: bool = True + pre_nms_top_k: int = 5000 + pre_nms_score_threshold: float = 0.05 + nms_iou_threshold: float = 0.5 + max_num_detections: int = 100 + use_batched_nms: bool = False + + +@dataclasses.dataclass +class MaskHead(hyperparams.Config): + upsample_factor: int = 2 + num_convs: int = 4 + num_filters: int = 256 + use_separable_conv: bool = False + class_agnostic: bool = False + + +@dataclasses.dataclass +class MaskSampler(hyperparams.Config): + num_sampled_masks: int = 128 + + +@dataclasses.dataclass +class MaskROIAligner(hyperparams.Config): + crop_size: int = 14 + sample_offset: float = 0.5 + + +@dataclasses.dataclass +class MaskRCNN(hyperparams.Config): + num_classes: int = 0 + input_size: List[int] = dataclasses.field(default_factory=list) + min_level: int = 2 + max_level: int = 6 + anchor: Anchor = Anchor() + include_mask: bool = True + backbone: backbones.Backbone = backbones.Backbone( + type='resnet', resnet=backbones.ResNet()) + decoder: decoders.Decoder = decoders.Decoder( + type='fpn', fpn=decoders.FPN()) + rpn_head: RPNHead = RPNHead() + detection_head: DetectionHead = DetectionHead() + roi_generator: ROIGenerator = ROIGenerator() + roi_sampler: ROISampler = ROISampler() + roi_aligner: ROIAligner = ROIAligner() + detection_generator: DetectionGenerator = DetectionGenerator() + mask_head: Optional[MaskHead] = MaskHead() + mask_sampler: Optional[MaskSampler] = MaskSampler() + mask_roi_aligner: Optional[MaskROIAligner] = MaskROIAligner() + norm_activation: common.NormActivation = common.NormActivation( + norm_momentum=0.997, + norm_epsilon=0.0001, + use_sync_bn=True) + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + rpn_huber_loss_delta: float = 1. / 9. + frcnn_huber_loss_delta: float = 1. + l2_weight_decay: float = 0.0 + rpn_score_weight: float = 1.0 + rpn_box_weight: float = 1.0 + frcnn_class_weight: float = 1.0 + frcnn_box_weight: float = 1.0 + mask_weight: float = 1.0 + + +@dataclasses.dataclass +class MaskRCNNTask(cfg.TaskConfig): + model: MaskRCNN = MaskRCNN() + train_data: DataConfig = DataConfig(is_training=True) + validation_data: DataConfig = DataConfig(is_training=False) + losses: Losses = Losses() + init_checkpoint: Optional[str] = None + init_checkpoint_modules: str = 'all' # all or backbone + annotation_file: Optional[str] = None + per_category_metrics: bool = False + # If set, we only use masks for the specified class IDs. + allowed_mask_class_ids: Optional[List[int]] = None + + +COCO_INPUT_PATH_BASE = 'coco' + + +@exp_factory.register_config_factory('fasterrcnn_resnetfpn_coco') +def fasterrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: + """COCO object detection with Faster R-CNN.""" + steps_per_epoch = 500 + coco_val_samples = 5000 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=MaskRCNNTask( + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', + init_checkpoint_modules='backbone', + annotation_file=os.path.join(COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=MaskRCNN( + num_classes=91, + input_size=[1024, 1024, 3], + include_mask=False, + mask_head=None, + mask_sampler=None, + mask_roi_aligner=None), + losses=Losses(l2_weight_decay=0.00004), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=64, + parser=Parser( + aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=8)), + trainer=cfg.TrainerConfig( + train_steps=22500, + validation_steps=coco_val_samples // 8, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [15000, 20000], + 'values': [0.12, 0.012, 0.0012], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 500, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + return config + + +@exp_factory.register_config_factory('maskrcnn_resnetfpn_coco') +def maskrcnn_resnetfpn_coco() -> cfg.ExperimentConfig: + """COCO object detection with Mask R-CNN.""" + steps_per_epoch = 500 + coco_val_samples = 5000 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=MaskRCNNTask( + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', + init_checkpoint_modules='backbone', + annotation_file=os.path.join(COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=MaskRCNN( + num_classes=91, input_size=[1024, 1024, 3], include_mask=True), + losses=Losses(l2_weight_decay=0.00004), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=64, + parser=Parser( + aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=8)), + trainer=cfg.TrainerConfig( + train_steps=22500, + validation_steps=coco_val_samples // 8, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [15000, 20000], + 'values': [0.12, 0.012, 0.0012], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 500, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + return config + + +@exp_factory.register_config_factory('cascadercnn_resnetfpn_coco') +def cascadercnn_resnetfpn_coco() -> cfg.ExperimentConfig: + """COCO object detection with Cascade R-CNN.""" + steps_per_epoch = 500 + coco_val_samples = 5000 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=MaskRCNNTask( + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', + init_checkpoint_modules='backbone', + annotation_file=os.path.join(COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=MaskRCNN( + num_classes=91, + input_size=[1024, 1024, 3], + include_mask=True, + roi_sampler=ROISampler(cascade_iou_thresholds=[0.6, 0.7]), + detection_head=DetectionHead( + class_agnostic_bbox_pred=True, cascade_class_ensemble=True)), + losses=Losses(l2_weight_decay=0.00004), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=64, + parser=Parser( + aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=8)), + trainer=cfg.TrainerConfig( + train_steps=22500, + validation_steps=coco_val_samples // 8, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [15000, 20000], + 'values': [0.12, 0.012, 0.0012], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 500, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + return config + + +@exp_factory.register_config_factory('maskrcnn_spinenet_coco') +def maskrcnn_spinenet_coco() -> cfg.ExperimentConfig: + """COCO object detection with Mask R-CNN with SpineNet backbone.""" + steps_per_epoch = 463 + coco_val_samples = 5000 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=MaskRCNNTask( + annotation_file=os.path.join(COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=MaskRCNN( + backbone=backbones.Backbone( + type='spinenet', spinenet=backbones.SpineNet(model_id='49')), + decoder=decoders.Decoder( + type='identity', identity=decoders.Identity()), + anchor=Anchor(anchor_size=3), + norm_activation=common.NormActivation(use_sync_bn=True), + num_classes=91, + input_size=[640, 640, 3], + min_level=3, + max_level=7, + include_mask=True), + losses=Losses(l2_weight_decay=0.00004), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=256, + parser=Parser( + aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=8)), + trainer=cfg.TrainerConfig( + train_steps=steps_per_epoch * 350, + validation_steps=coco_val_samples // 8, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [ + steps_per_epoch * 320, steps_per_epoch * 340 + ], + 'values': [0.28, 0.028, 0.0028], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 2000, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + return config diff --git a/official/vision/beta/configs/maskrcnn_test.py b/official/vision/beta/configs/maskrcnn_test.py new file mode 100644 index 0000000000000000000000000000000000000000..be9be32b378356999a987c6e85195760c6317ce8 --- /dev/null +++ b/official/vision/beta/configs/maskrcnn_test.py @@ -0,0 +1,45 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for maskrcnn.""" +# pylint: disable=unused-import +from absl.testing import parameterized +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.vision import beta +from official.vision.beta.configs import maskrcnn as exp_cfg + + +class MaskRCNNConfigTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + ('fasterrcnn_resnetfpn_coco',), + ('maskrcnn_resnetfpn_coco',), + ('maskrcnn_spinenet_coco',), + ) + def test_maskrcnn_configs(self, config_name): + config = exp_factory.get_exp_config(config_name) + self.assertIsInstance(config, cfg.ExperimentConfig) + self.assertIsInstance(config.task, exp_cfg.MaskRCNNTask) + self.assertIsInstance(config.task.model, exp_cfg.MaskRCNN) + self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig) + config.task.train_data.is_training = None + with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'): + config.validate() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/configs/retinanet.py b/official/vision/beta/configs/retinanet.py new file mode 100644 index 0000000000000000000000000000000000000000..521faa4e628e2fc44edebd12c8a40e3d2c148b69 --- /dev/null +++ b/official/vision/beta/configs/retinanet.py @@ -0,0 +1,394 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""RetinaNet configuration definition.""" + +import os +from typing import List, Optional +import dataclasses + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.modeling import optimization +from official.vision.beta.configs import common +from official.vision.beta.configs import decoders +from official.vision.beta.configs import backbones + + +# pylint: disable=missing-class-docstring +@dataclasses.dataclass +class TfExampleDecoder(hyperparams.Config): + regenerate_source_id: bool = False + + +@dataclasses.dataclass +class TfExampleDecoderLabelMap(hyperparams.Config): + regenerate_source_id: bool = False + label_map: str = '' + + +@dataclasses.dataclass +class DataDecoder(hyperparams.OneOfConfig): + type: Optional[str] = 'simple_decoder' + simple_decoder: TfExampleDecoder = TfExampleDecoder() + label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap() + + +@dataclasses.dataclass +class Parser(hyperparams.Config): + num_channels: int = 3 + match_threshold: float = 0.5 + unmatched_threshold: float = 0.5 + aug_rand_hflip: bool = False + aug_scale_min: float = 1.0 + aug_scale_max: float = 1.0 + skip_crowd_during_training: bool = True + max_num_instances: int = 100 + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """Input config for training.""" + input_path: str = '' + global_batch_size: int = 0 + is_training: bool = False + dtype: str = 'bfloat16' + decoder: DataDecoder = DataDecoder() + parser: Parser = Parser() + shuffle_buffer_size: int = 10000 + file_type: str = 'tfrecord' + + +@dataclasses.dataclass +class Anchor(hyperparams.Config): + num_scales: int = 3 + aspect_ratios: List[float] = dataclasses.field( + default_factory=lambda: [0.5, 1.0, 2.0]) + anchor_size: float = 4.0 + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + focal_loss_alpha: float = 0.25 + focal_loss_gamma: float = 1.5 + huber_loss_delta: float = 0.1 + box_loss_weight: int = 50 + l2_weight_decay: float = 0.0 + + +@dataclasses.dataclass +class AttributeHead(hyperparams.Config): + name: str = '' + type: str = 'regression' + size: int = 1 + + +@dataclasses.dataclass +class RetinaNetHead(hyperparams.Config): + num_convs: int = 4 + num_filters: int = 256 + use_separable_conv: bool = False + attribute_heads: Optional[List[AttributeHead]] = None + + +@dataclasses.dataclass +class DetectionGenerator(hyperparams.Config): + apply_nms: bool = True + pre_nms_top_k: int = 5000 + pre_nms_score_threshold: float = 0.05 + nms_iou_threshold: float = 0.5 + max_num_detections: int = 100 + use_batched_nms: bool = False + + +@dataclasses.dataclass +class RetinaNet(hyperparams.Config): + num_classes: int = 0 + input_size: List[int] = dataclasses.field(default_factory=list) + min_level: int = 3 + max_level: int = 7 + anchor: Anchor = Anchor() + backbone: backbones.Backbone = backbones.Backbone( + type='resnet', resnet=backbones.ResNet()) + decoder: decoders.Decoder = decoders.Decoder( + type='fpn', fpn=decoders.FPN()) + head: RetinaNetHead = RetinaNetHead() + detection_generator: DetectionGenerator = DetectionGenerator() + norm_activation: common.NormActivation = common.NormActivation() + + +@dataclasses.dataclass +class RetinaNetTask(cfg.TaskConfig): + model: RetinaNet = RetinaNet() + train_data: DataConfig = DataConfig(is_training=True) + validation_data: DataConfig = DataConfig(is_training=False) + losses: Losses = Losses() + init_checkpoint: Optional[str] = None + init_checkpoint_modules: str = 'all' # all or backbone + annotation_file: Optional[str] = None + per_category_metrics: bool = False + + +@exp_factory.register_config_factory('retinanet') +def retinanet() -> cfg.ExperimentConfig: + """RetinaNet general config.""" + return cfg.ExperimentConfig( + task=RetinaNetTask(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + +COCO_INPUT_PATH_BASE = 'coco' +COCO_TRAIN_EXAMPLES = 118287 +COCO_VAL_EXAMPLES = 5000 + + +@exp_factory.register_config_factory('retinanet_resnetfpn_coco') +def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig: + """COCO object detection with RetinaNet.""" + train_batch_size = 256 + eval_batch_size = 8 + steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=RetinaNetTask( + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', + init_checkpoint_modules='backbone', + annotation_file=os.path.join(COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=RetinaNet( + num_classes=91, + input_size=[640, 640, 3], + min_level=3, + max_level=7), + losses=Losses(l2_weight_decay=1e-4), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size, + parser=Parser( + aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=eval_batch_size)), + trainer=cfg.TrainerConfig( + train_steps=72 * steps_per_epoch, + validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [ + 57 * steps_per_epoch, 67 * steps_per_epoch + ], + 'values': [ + 0.32 * train_batch_size / 256.0, + 0.032 * train_batch_size / 256.0, + 0.0032 * train_batch_size / 256.0 + ], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 500, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('retinanet_spinenet_coco') +def retinanet_spinenet_coco() -> cfg.ExperimentConfig: + """COCO object detection with RetinaNet using SpineNet backbone.""" + train_batch_size = 256 + eval_batch_size = 8 + steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + input_size = 640 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), + task=RetinaNetTask( + annotation_file=os.path.join(COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=RetinaNet( + backbone=backbones.Backbone( + type='spinenet', + spinenet=backbones.SpineNet( + model_id='49', stochastic_depth_drop_rate=0.2)), + decoder=decoders.Decoder( + type='identity', identity=decoders.Identity()), + anchor=Anchor(anchor_size=3), + norm_activation=common.NormActivation( + use_sync_bn=True, activation='swish'), + num_classes=91, + input_size=[input_size, input_size, 3], + min_level=3, + max_level=7), + losses=Losses(l2_weight_decay=4e-5), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size, + parser=Parser( + aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=eval_batch_size)), + trainer=cfg.TrainerConfig( + train_steps=500 * steps_per_epoch, + validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [ + 475 * steps_per_epoch, 490 * steps_per_epoch + ], + 'values': [ + 0.32 * train_batch_size / 256.0, + 0.032 * train_batch_size / 256.0, + 0.0032 * train_batch_size / 256.0 + ], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 2000, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('retinanet_spinenet_mobile_coco') +def retinanet_spinenet_mobile_coco() -> cfg.ExperimentConfig: + """COCO object detection with RetinaNet using Mobile SpineNet backbone.""" + train_batch_size = 256 + eval_batch_size = 8 + steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + input_size = 384 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='float32'), + task=RetinaNetTask( + annotation_file=os.path.join(COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=RetinaNet( + backbone=backbones.Backbone( + type='spinenet_mobile', + spinenet_mobile=backbones.SpineNetMobile( + model_id='49', stochastic_depth_drop_rate=0.2)), + decoder=decoders.Decoder( + type='identity', identity=decoders.Identity()), + head=RetinaNetHead(num_filters=48, use_separable_conv=True), + anchor=Anchor(anchor_size=3), + norm_activation=common.NormActivation( + use_sync_bn=True, activation='swish'), + num_classes=91, + input_size=[input_size, input_size, 3], + min_level=3, + max_level=7), + losses=Losses(l2_weight_decay=3e-5), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size, + parser=Parser( + aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=eval_batch_size)), + trainer=cfg.TrainerConfig( + train_steps=600 * steps_per_epoch, + validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [ + 575 * steps_per_epoch, 590 * steps_per_epoch + ], + 'values': [ + 0.32 * train_batch_size / 256.0, + 0.032 * train_batch_size / 256.0, + 0.0032 * train_batch_size / 256.0 + ], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 2000, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config diff --git a/official/vision/beta/configs/retinanet_test.py b/official/vision/beta/configs/retinanet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..8e6f9af39498dc71b51f41a4811db7b936685a20 --- /dev/null +++ b/official/vision/beta/configs/retinanet_test.py @@ -0,0 +1,45 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for retinanet.""" +# pylint: disable=unused-import +from absl.testing import parameterized +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.vision import beta +from official.vision.beta.configs import retinanet as exp_cfg + + +class MaskRCNNConfigTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + ('retinanet_resnetfpn_coco',), + ('retinanet_spinenet_coco',), + ('retinanet_spinenet_mobile_coco',), + ) + def test_maskrcnn_configs(self, config_name): + config = exp_factory.get_exp_config(config_name) + self.assertIsInstance(config, cfg.ExperimentConfig) + self.assertIsInstance(config.task, exp_cfg.RetinaNetTask) + self.assertIsInstance(config.task.model, exp_cfg.RetinaNet) + self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig) + config.task.train_data.is_training = None + with self.assertRaisesRegex(KeyError, 'Found inconsistncy between key'): + config.validate() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/configs/semantic_segmentation.py b/official/vision/beta/configs/semantic_segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..3a90a97e0790310b8a86216070d48cd18b1eb463 --- /dev/null +++ b/official/vision/beta/configs/semantic_segmentation.py @@ -0,0 +1,490 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Semantic segmentation configuration definition.""" +import os +from typing import List, Optional, Union + +import dataclasses +import numpy as np + +from official.core import exp_factory +from official.modeling import hyperparams +from official.modeling import optimization +from official.modeling.hyperparams import config_definitions as cfg +from official.vision.beta.configs import common +from official.vision.beta.configs import decoders +from official.vision.beta.configs import backbones + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """Input config for training.""" + output_size: List[int] = dataclasses.field(default_factory=list) + # If crop_size is specified, image will be resized first to + # output_size, then crop of size crop_size will be cropped. + crop_size: List[int] = dataclasses.field(default_factory=list) + input_path: str = '' + global_batch_size: int = 0 + is_training: bool = True + dtype: str = 'float32' + shuffle_buffer_size: int = 1000 + cycle_length: int = 10 + # If resize_eval_groundtruth is set to False, original image sizes are used + # for eval. In that case, groundtruth_padded_size has to be specified too to + # allow for batching the variable input sizes of images. + resize_eval_groundtruth: bool = True + groundtruth_padded_size: List[int] = dataclasses.field(default_factory=list) + aug_scale_min: float = 1.0 + aug_scale_max: float = 1.0 + aug_rand_hflip: bool = True + drop_remainder: bool = True + file_type: str = 'tfrecord' + + +@dataclasses.dataclass +class SegmentationHead(hyperparams.Config): + """Segmentation head config.""" + level: int = 3 + num_convs: int = 2 + num_filters: int = 256 + prediction_kernel_size: int = 1 + upsample_factor: int = 1 + feature_fusion: Optional[str] = None # None, deeplabv3plus, or pyramid_fusion + # deeplabv3plus feature fusion params + low_level: int = 2 + low_level_num_filters: int = 48 + + +@dataclasses.dataclass +class SemanticSegmentationModel(hyperparams.Config): + """Semantic segmentation model config.""" + num_classes: int = 0 + input_size: List[int] = dataclasses.field(default_factory=list) + min_level: int = 3 + max_level: int = 6 + head: SegmentationHead = SegmentationHead() + backbone: backbones.Backbone = backbones.Backbone( + type='resnet', resnet=backbones.ResNet()) + decoder: decoders.Decoder = decoders.Decoder(type='identity') + norm_activation: common.NormActivation = common.NormActivation() + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + label_smoothing: float = 0.0 + ignore_label: int = 255 + class_weights: List[float] = dataclasses.field(default_factory=list) + l2_weight_decay: float = 0.0 + use_groundtruth_dimension: bool = True + top_k_percent_pixels: float = 1.0 + + +@dataclasses.dataclass +class Evaluation(hyperparams.Config): + report_per_class_iou: bool = True + report_train_mean_iou: bool = True # Turning this off can speed up training. + + +@dataclasses.dataclass +class SemanticSegmentationTask(cfg.TaskConfig): + """The model config.""" + model: SemanticSegmentationModel = SemanticSegmentationModel() + train_data: DataConfig = DataConfig(is_training=True) + validation_data: DataConfig = DataConfig(is_training=False) + losses: Losses = Losses() + evaluation: Evaluation = Evaluation() + train_input_partition_dims: List[int] = dataclasses.field( + default_factory=list) + eval_input_partition_dims: List[int] = dataclasses.field( + default_factory=list) + init_checkpoint: Optional[str] = None + init_checkpoint_modules: Union[ + str, List[str]] = 'all' # all, backbone, and/or decoder + + +@exp_factory.register_config_factory('semantic_segmentation') +def semantic_segmentation() -> cfg.ExperimentConfig: + """Semantic segmentation general.""" + return cfg.ExperimentConfig( + task=SemanticSegmentationModel(), + trainer=cfg.TrainerConfig(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + +# PASCAL VOC 2012 Dataset +PASCAL_TRAIN_EXAMPLES = 10582 +PASCAL_VAL_EXAMPLES = 1449 +PASCAL_INPUT_PATH_BASE = 'pascal_voc_seg' + + +@exp_factory.register_config_factory('seg_deeplabv3_pascal') +def seg_deeplabv3_pascal() -> cfg.ExperimentConfig: + """Image segmentation on imagenet with resnet deeplabv3.""" + train_batch_size = 16 + eval_batch_size = 8 + steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size + output_stride = 16 + aspp_dilation_rates = [12, 24, 36] # [6, 12, 18] if output_stride = 16 + multigrid = [1, 2, 4] + stem_type = 'v1' + level = int(np.math.log2(output_stride)) + config = cfg.ExperimentConfig( + task=SemanticSegmentationTask( + model=SemanticSegmentationModel( + num_classes=21, + input_size=[None, None, 3], + backbone=backbones.Backbone( + type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( + model_id=101, output_stride=output_stride, + multigrid=multigrid, stem_type=stem_type)), + decoder=decoders.Decoder( + type='aspp', aspp=decoders.ASPP( + level=level, dilation_rates=aspp_dilation_rates)), + head=SegmentationHead(level=level, num_convs=0), + norm_activation=common.NormActivation( + activation='swish', + norm_momentum=0.9997, + norm_epsilon=1e-3, + use_sync_bn=True)), + losses=Losses(l2_weight_decay=1e-4), + train_data=DataConfig( + input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), + # TODO(arashwan): test changing size to 513 to match deeplab. + output_size=[512, 512], + is_training=True, + global_batch_size=train_batch_size, + aug_scale_min=0.5, + aug_scale_max=2.0), + validation_data=DataConfig( + input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'val*'), + output_size=[512, 512], + is_training=False, + global_batch_size=eval_batch_size, + resize_eval_groundtruth=False, + groundtruth_padded_size=[512, 512], + drop_remainder=False), + # resnet101 + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', + init_checkpoint_modules='backbone'), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=45 * steps_per_epoch, + validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 0.007, + 'decay_steps': 45 * steps_per_epoch, + 'end_learning_rate': 0.0, + 'power': 0.9 + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('seg_deeplabv3plus_pascal') +def seg_deeplabv3plus_pascal() -> cfg.ExperimentConfig: + """Image segmentation on imagenet with resnet deeplabv3+.""" + train_batch_size = 16 + eval_batch_size = 8 + steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size + output_stride = 16 + aspp_dilation_rates = [6, 12, 18] + multigrid = [1, 2, 4] + stem_type = 'v1' + level = int(np.math.log2(output_stride)) + config = cfg.ExperimentConfig( + task=SemanticSegmentationTask( + model=SemanticSegmentationModel( + num_classes=21, + input_size=[None, None, 3], + backbone=backbones.Backbone( + type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( + model_id=101, output_stride=output_stride, + stem_type=stem_type, multigrid=multigrid)), + decoder=decoders.Decoder( + type='aspp', + aspp=decoders.ASPP( + level=level, dilation_rates=aspp_dilation_rates)), + head=SegmentationHead( + level=level, + num_convs=2, + feature_fusion='deeplabv3plus', + low_level=2, + low_level_num_filters=48), + norm_activation=common.NormActivation( + activation='swish', + norm_momentum=0.9997, + norm_epsilon=1e-3, + use_sync_bn=True)), + losses=Losses(l2_weight_decay=1e-4), + train_data=DataConfig( + input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), + output_size=[512, 512], + is_training=True, + global_batch_size=train_batch_size, + aug_scale_min=0.5, + aug_scale_max=2.0), + validation_data=DataConfig( + input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'val*'), + output_size=[512, 512], + is_training=False, + global_batch_size=eval_batch_size, + resize_eval_groundtruth=False, + groundtruth_padded_size=[512, 512], + drop_remainder=False), + # resnet101 + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', + init_checkpoint_modules='backbone'), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=45 * steps_per_epoch, + validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 0.007, + 'decay_steps': 45 * steps_per_epoch, + 'end_learning_rate': 0.0, + 'power': 0.9 + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +@exp_factory.register_config_factory('seg_resnetfpn_pascal') +def seg_resnetfpn_pascal() -> cfg.ExperimentConfig: + """Image segmentation on imagenet with resnet-fpn.""" + train_batch_size = 256 + eval_batch_size = 32 + steps_per_epoch = PASCAL_TRAIN_EXAMPLES // train_batch_size + config = cfg.ExperimentConfig( + task=SemanticSegmentationTask( + model=SemanticSegmentationModel( + num_classes=21, + input_size=[512, 512, 3], + min_level=3, + max_level=7, + backbone=backbones.Backbone( + type='resnet', resnet=backbones.ResNet(model_id=50)), + decoder=decoders.Decoder(type='fpn', fpn=decoders.FPN()), + head=SegmentationHead(level=3, num_convs=3), + norm_activation=common.NormActivation( + activation='swish', + use_sync_bn=True)), + losses=Losses(l2_weight_decay=1e-4), + train_data=DataConfig( + input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'train_aug*'), + is_training=True, + global_batch_size=train_batch_size, + aug_scale_min=0.2, + aug_scale_max=1.5), + validation_data=DataConfig( + input_path=os.path.join(PASCAL_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=eval_batch_size, + resize_eval_groundtruth=False, + groundtruth_padded_size=[512, 512], + drop_remainder=False), + ), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=450 * steps_per_epoch, + validation_steps=PASCAL_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 0.007, + 'decay_steps': 450 * steps_per_epoch, + 'end_learning_rate': 0.0, + 'power': 0.9 + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config + + +# Cityscapes Dataset (Download and process the dataset yourself) +CITYSCAPES_TRAIN_EXAMPLES = 2975 +CITYSCAPES_VAL_EXAMPLES = 500 +CITYSCAPES_INPUT_PATH_BASE = 'cityscapes' + + +@exp_factory.register_config_factory('seg_deeplabv3plus_cityscapes') +def seg_deeplabv3plus_cityscapes() -> cfg.ExperimentConfig: + """Image segmentation on imagenet with resnet deeplabv3+.""" + train_batch_size = 16 + eval_batch_size = 16 + steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size + output_stride = 16 + aspp_dilation_rates = [6, 12, 18] + multigrid = [1, 2, 4] + stem_type = 'v1' + level = int(np.math.log2(output_stride)) + config = cfg.ExperimentConfig( + task=SemanticSegmentationTask( + model=SemanticSegmentationModel( + # Cityscapes uses only 19 semantic classes for train/evaluation. + # The void (background) class is ignored in train and evaluation. + num_classes=19, + input_size=[None, None, 3], + backbone=backbones.Backbone( + type='dilated_resnet', dilated_resnet=backbones.DilatedResNet( + model_id=101, output_stride=output_stride, + stem_type=stem_type, multigrid=multigrid)), + decoder=decoders.Decoder( + type='aspp', + aspp=decoders.ASPP( + level=level, dilation_rates=aspp_dilation_rates, + pool_kernel_size=[512, 1024])), + head=SegmentationHead( + level=level, + num_convs=2, + feature_fusion='deeplabv3plus', + low_level=2, + low_level_num_filters=48), + norm_activation=common.NormActivation( + activation='swish', + norm_momentum=0.99, + norm_epsilon=1e-3, + use_sync_bn=True)), + losses=Losses(l2_weight_decay=1e-4), + train_data=DataConfig( + input_path=os.path.join(CITYSCAPES_INPUT_PATH_BASE, + 'train_fine**'), + crop_size=[512, 1024], + output_size=[1024, 2048], + is_training=True, + global_batch_size=train_batch_size, + aug_scale_min=0.5, + aug_scale_max=2.0), + validation_data=DataConfig( + input_path=os.path.join(CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'), + output_size=[1024, 2048], + is_training=False, + global_batch_size=eval_batch_size, + resize_eval_groundtruth=True, + drop_remainder=False), + # resnet101 + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/deeplab/deeplab_resnet101_imagenet/ckpt-62400', + init_checkpoint_modules='backbone'), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=500 * steps_per_epoch, + validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'polynomial', + 'polynomial': { + 'initial_learning_rate': 0.01, + 'decay_steps': 500 * steps_per_epoch, + 'end_learning_rate': 0.0, + 'power': 0.9 + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 5 * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config diff --git a/official/vision/beta/configs/semantic_segmentation_test.py b/official/vision/beta/configs/semantic_segmentation_test.py new file mode 100644 index 0000000000000000000000000000000000000000..686aaaaefff5a26393b730ba316e600c3f6b9d9e --- /dev/null +++ b/official/vision/beta/configs/semantic_segmentation_test.py @@ -0,0 +1,45 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for semantic_segmentation.""" + +# pylint: disable=unused-import +from absl.testing import parameterized +import tensorflow as tf + +from official.core import exp_factory +from official.modeling.hyperparams import config_definitions as cfg +from official.vision import beta +from official.vision.beta.configs import semantic_segmentation as exp_cfg + + +class ImageSegmentationConfigTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters(('seg_deeplabv3_pascal',), + ('seg_deeplabv3plus_pascal',)) + def test_semantic_segmentation_configs(self, config_name): + config = exp_factory.get_exp_config(config_name) + self.assertIsInstance(config, cfg.ExperimentConfig) + self.assertIsInstance(config.task, exp_cfg.SemanticSegmentationTask) + self.assertIsInstance(config.task.model, + exp_cfg.SemanticSegmentationModel) + self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig) + config.task.train_data.is_training = None + with self.assertRaises(KeyError): + config.validate() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/configs/video_classification.py b/official/vision/beta/configs/video_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..1edbe22a74fcc0a2d75402f76e5e65c23a622758 --- /dev/null +++ b/official/vision/beta/configs/video_classification.py @@ -0,0 +1,314 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Video classification configuration definition.""" +from typing import Optional, Tuple +import dataclasses +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.modeling import optimization +from official.vision.beta.configs import backbones_3d +from official.vision.beta.configs import common + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """The base configuration for building datasets.""" + name: Optional[str] = None + file_type: Optional[str] = 'tfrecord' + compressed_input: bool = False + split: str = 'train' + variant_name: Optional[str] = None + feature_shape: Tuple[int, ...] = (64, 224, 224, 3) + temporal_stride: int = 1 + random_stride_range: int = 0 + num_test_clips: int = 1 + num_test_crops: int = 1 + num_classes: int = -1 + num_examples: int = -1 + global_batch_size: int = 128 + data_format: str = 'channels_last' + dtype: str = 'float32' + one_hot: bool = True + shuffle_buffer_size: int = 64 + cache: bool = False + input_path: str = '' + is_training: bool = True + cycle_length: int = 10 + drop_remainder: bool = True + min_image_size: int = 256 + is_multilabel: bool = False + output_audio: bool = False + audio_feature: str = '' + audio_feature_shape: Tuple[int, ...] = (-1,) + aug_min_aspect_ratio: float = 0.5 + aug_max_aspect_ratio: float = 2.0 + aug_min_area_ratio: float = 0.49 + aug_max_area_ratio: float = 1.0 + aug_type: Optional[str] = None # 'autoaug', 'randaug', or None + image_field_key: str = 'image/encoded' + label_field_key: str = 'clip/label/index' + + +def kinetics400(is_training): + """Generated Kinectics 400 dataset configs.""" + return DataConfig( + name='kinetics400', + num_classes=400, + is_training=is_training, + split='train' if is_training else 'valid', + drop_remainder=is_training, + num_examples=215570 if is_training else 17706, + feature_shape=(64, 224, 224, 3) if is_training else (250, 224, 224, 3)) + + +def kinetics600(is_training): + """Generated Kinectics 600 dataset configs.""" + return DataConfig( + name='kinetics600', + num_classes=600, + is_training=is_training, + split='train' if is_training else 'valid', + drop_remainder=is_training, + num_examples=366016 if is_training else 27780, + feature_shape=(64, 224, 224, 3) if is_training else (250, 224, 224, 3)) + + +def kinetics700(is_training): + """Generated Kinectics 600 dataset configs.""" + return DataConfig( + name='kinetics700', + num_classes=700, + is_training=is_training, + split='train' if is_training else 'valid', + drop_remainder=is_training, + num_examples=522883 if is_training else 33441, + feature_shape=(64, 224, 224, 3) if is_training else (250, 224, 224, 3)) + + +def kinetics700_2020(is_training): + """Generated Kinectics 600 dataset configs.""" + return DataConfig( + name='kinetics700', + num_classes=700, + is_training=is_training, + split='train' if is_training else 'valid', + drop_remainder=is_training, + num_examples=535982 if is_training else 33640, + feature_shape=(64, 224, 224, 3) if is_training else (250, 224, 224, 3)) + + +@dataclasses.dataclass +class VideoClassificationModel(hyperparams.Config): + """The model config.""" + model_type: str = 'video_classification' + backbone: backbones_3d.Backbone3D = backbones_3d.Backbone3D( + type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()) + norm_activation: common.NormActivation = common.NormActivation( + use_sync_bn=False) + dropout_rate: float = 0.2 + aggregate_endpoints: bool = False + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + one_hot: bool = True + label_smoothing: float = 0.0 + l2_weight_decay: float = 0.0 + + +@dataclasses.dataclass +class Metrics(hyperparams.Config): + use_per_class_recall: bool = False + + +@dataclasses.dataclass +class VideoClassificationTask(cfg.TaskConfig): + """The task config.""" + model: VideoClassificationModel = VideoClassificationModel() + train_data: DataConfig = DataConfig(is_training=True, drop_remainder=True) + validation_data: DataConfig = DataConfig( + is_training=False, drop_remainder=False) + losses: Losses = Losses() + metrics: Metrics = Metrics() + + +def add_trainer(experiment: cfg.ExperimentConfig, + train_batch_size: int, + eval_batch_size: int, + learning_rate: float = 1.6, + train_epochs: int = 44, + warmup_epochs: int = 5): + """Add and config a trainer to the experiment config.""" + if experiment.task.train_data.num_examples <= 0: + raise ValueError('Wrong train dataset size {!r}'.format( + experiment.task.train_data)) + if experiment.task.validation_data.num_examples <= 0: + raise ValueError('Wrong validation dataset size {!r}'.format( + experiment.task.validation_data)) + experiment.task.train_data.global_batch_size = train_batch_size + experiment.task.validation_data.global_batch_size = eval_batch_size + steps_per_epoch = experiment.task.train_data.num_examples // train_batch_size + experiment.trainer = cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=train_epochs * steps_per_epoch, + validation_steps=experiment.task.validation_data.num_examples // + eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9, + 'nesterov': True, + } + }, + 'learning_rate': { + 'type': 'cosine', + 'cosine': { + 'initial_learning_rate': learning_rate, + 'decay_steps': train_epochs * steps_per_epoch, + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': warmup_epochs * steps_per_epoch, + 'warmup_learning_rate': 0 + } + } + })) + return experiment + + +@exp_factory.register_config_factory('video_classification') +def video_classification() -> cfg.ExperimentConfig: + """Video classification general.""" + return cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=VideoClassificationTask(), + trainer=cfg.TrainerConfig(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None', + 'task.train_data.num_classes == task.validation_data.num_classes', + ]) + + +@exp_factory.register_config_factory('video_classification_kinetics400') +def video_classification_kinetics400() -> cfg.ExperimentConfig: + """Video classification on Kinectics 400 with resnet.""" + train_dataset = kinetics400(is_training=True) + validation_dataset = kinetics400(is_training=False) + task = VideoClassificationTask( + model=VideoClassificationModel( + backbone=backbones_3d.Backbone3D( + type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), + losses=Losses(l2_weight_decay=1e-4), + train_data=train_dataset, + validation_data=validation_dataset) + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=task, + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None', + 'task.train_data.num_classes == task.validation_data.num_classes', + ]) + add_trainer(config, train_batch_size=1024, eval_batch_size=64) + return config + + +@exp_factory.register_config_factory('video_classification_kinetics600') +def video_classification_kinetics600() -> cfg.ExperimentConfig: + """Video classification on Kinectics 600 with resnet.""" + train_dataset = kinetics600(is_training=True) + validation_dataset = kinetics600(is_training=False) + task = VideoClassificationTask( + model=VideoClassificationModel( + backbone=backbones_3d.Backbone3D( + type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), + losses=Losses(l2_weight_decay=1e-4), + train_data=train_dataset, + validation_data=validation_dataset) + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=task, + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None', + 'task.train_data.num_classes == task.validation_data.num_classes', + ]) + add_trainer(config, train_batch_size=1024, eval_batch_size=64) + return config + + +@exp_factory.register_config_factory('video_classification_kinetics700') +def video_classification_kinetics700() -> cfg.ExperimentConfig: + """Video classification on Kinectics 700 with resnet.""" + train_dataset = kinetics700(is_training=True) + validation_dataset = kinetics700(is_training=False) + task = VideoClassificationTask( + model=VideoClassificationModel( + backbone=backbones_3d.Backbone3D( + type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), + losses=Losses(l2_weight_decay=1e-4), + train_data=train_dataset, + validation_data=validation_dataset) + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=task, + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None', + 'task.train_data.num_classes == task.validation_data.num_classes', + ]) + add_trainer(config, train_batch_size=1024, eval_batch_size=64) + return config + + +@exp_factory.register_config_factory('video_classification_kinetics700_2020') +def video_classification_kinetics700_2020() -> cfg.ExperimentConfig: + """Video classification on Kinectics 700 2020 with resnet.""" + train_dataset = kinetics700_2020(is_training=True) + validation_dataset = kinetics700_2020(is_training=False) + task = VideoClassificationTask( + model=VideoClassificationModel( + backbone=backbones_3d.Backbone3D( + type='resnet_3d', resnet_3d=backbones_3d.ResNet3D50()), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), + losses=Losses(l2_weight_decay=1e-4), + train_data=train_dataset, + validation_data=validation_dataset) + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=task, + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None', + 'task.train_data.num_classes == task.validation_data.num_classes', + ]) + add_trainer(config, train_batch_size=1024, eval_batch_size=64) + return config diff --git a/official/vision/beta/configs/video_classification_test.py b/official/vision/beta/configs/video_classification_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a45b709fc202c4d6d448f4f1daae3d171e06ca24 --- /dev/null +++ b/official/vision/beta/configs/video_classification_test.py @@ -0,0 +1,44 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for video_classification.""" + +# pylint: disable=unused-import +from absl.testing import parameterized +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.vision import beta +from official.vision.beta.configs import video_classification as exp_cfg + + +class VideoClassificationConfigTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters(('video_classification',), + ('video_classification_kinetics600',)) + def test_video_classification_configs(self, config_name): + config = exp_factory.get_exp_config(config_name) + self.assertIsInstance(config, cfg.ExperimentConfig) + self.assertIsInstance(config.task, exp_cfg.VideoClassificationTask) + self.assertIsInstance(config.task.model, exp_cfg.VideoClassificationModel) + self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig) + config.task.train_data.is_training = None + with self.assertRaises(KeyError): + config.validate() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/data/__init__.py b/official/vision/beta/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/data/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/data/create_coco_tf_record.py b/official/vision/beta/data/create_coco_tf_record.py new file mode 100644 index 0000000000000000000000000000000000000000..2710244614201d4f6e9a0c675ce2790cd5e940a4 --- /dev/null +++ b/official/vision/beta/data/create_coco_tf_record.py @@ -0,0 +1,370 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Convert raw COCO dataset to TFRecord format. + +This scripts follows the label map decoder format and supports detection +boxes, instance masks and captions. + +Example usage: + python create_coco_tf_record.py --logtostderr \ + --image_dir="${TRAIN_IMAGE_DIR}" \ + --image_info_file="${TRAIN_IMAGE_INFO_FILE}" \ + --object_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ + --caption_annotations_file="${CAPTION_ANNOTATIONS_FILE}" \ + --output_file_prefix="${OUTPUT_DIR/FILE_PREFIX}" \ + --num_shards=100 +""" + +import collections +import json +import logging +import os + +from absl import app # pylint:disable=unused-import +from absl import flags +import numpy as np + +from pycocotools import mask +import tensorflow as tf + +import multiprocessing as mp +from official.vision.beta.data import tfrecord_lib + + +flags.DEFINE_boolean( + 'include_masks', False, 'Whether to include instance segmentations masks ' + '(PNG encoded) in the result. default: False.') +flags.DEFINE_string('image_dir', '', 'Directory containing images.') +flags.DEFINE_string( + 'image_info_file', '', 'File containing image information. ' + 'Tf Examples in the output files correspond to the image ' + 'info entries in this file. If this file is not provided ' + 'object_annotations_file is used if present. Otherwise, ' + 'caption_annotations_file is used to get image info.') +flags.DEFINE_string( + 'object_annotations_file', '', 'File containing object ' + 'annotations - boxes and instance masks.') +flags.DEFINE_string('caption_annotations_file', '', 'File containing image ' + 'captions.') +flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file') +flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.') + +FLAGS = flags.FLAGS + +logger = tf.get_logger() +logger.setLevel(logging.INFO) + + +def coco_segmentation_to_mask_png(segmentation, height, width, is_crowd): + """Encode a COCO mask segmentation as PNG string.""" + run_len_encoding = mask.frPyObjects(segmentation, height, width) + binary_mask = mask.decode(run_len_encoding) + if not is_crowd: + binary_mask = np.amax(binary_mask, axis=2) + + return tfrecord_lib.encode_binary_mask_as_png(binary_mask) + + +def coco_annotations_to_lists(bbox_annotations, id_to_name_map, + image_height, image_width, include_masks): + """Convert COCO annotations to feature lists.""" + + data = dict((k, list()) for k in + ['xmin', 'xmax', 'ymin', 'ymax', 'is_crowd', + 'category_id', 'category_names', 'area']) + if include_masks: + data['encoded_mask_png'] = [] + + num_annotations_skipped = 0 + + for object_annotations in bbox_annotations: + (x, y, width, height) = tuple(object_annotations['bbox']) + + if width <= 0 or height <= 0: + num_annotations_skipped += 1 + continue + if x + width > image_width or y + height > image_height: + num_annotations_skipped += 1 + continue + data['xmin'].append(float(x) / image_width) + data['xmax'].append(float(x + width) / image_width) + data['ymin'].append(float(y) / image_height) + data['ymax'].append(float(y + height) / image_height) + data['is_crowd'].append(object_annotations['iscrowd']) + category_id = int(object_annotations['category_id']) + data['category_id'].append(category_id) + data['category_names'].append(id_to_name_map[category_id].encode('utf8')) + data['area'].append(object_annotations['area']) + + if include_masks: + data['encoded_mask_png'].append( + coco_segmentation_to_mask_png(object_annotations['segmentation'], + image_height, image_width, + object_annotations['iscrowd']) + ) + + return data, num_annotations_skipped + + +def bbox_annotations_to_feature_dict( + bbox_annotations, image_height, image_width, id_to_name_map, include_masks): + """Convert COCO annotations to an encoded feature dict.""" + + data, num_skipped = coco_annotations_to_lists( + bbox_annotations, id_to_name_map, image_height, image_width, + include_masks) + feature_dict = { + 'image/object/bbox/xmin': + tfrecord_lib.convert_to_feature(data['xmin']), + 'image/object/bbox/xmax': + tfrecord_lib.convert_to_feature(data['xmax']), + 'image/object/bbox/ymin': + tfrecord_lib.convert_to_feature(data['ymin']), + 'image/object/bbox/ymax': + tfrecord_lib.convert_to_feature(data['ymax']), + 'image/object/class/text': + tfrecord_lib.convert_to_feature(data['category_names']), + 'image/object/class/label': + tfrecord_lib.convert_to_feature(data['category_id']), + 'image/object/is_crowd': + tfrecord_lib.convert_to_feature(data['is_crowd']), + 'image/object/area': + tfrecord_lib.convert_to_feature(data['area']), + } + if include_masks: + feature_dict['image/object/mask'] = ( + tfrecord_lib.convert_to_feature(data['encoded_mask_png'])) + + return feature_dict, num_skipped + + +def encode_caption_annotations(caption_annotations): + captions = [] + for caption_annotation in caption_annotations: + captions.append(caption_annotation['caption'].encode('utf8')) + + return captions + + +def create_tf_example(image, + image_dir, + bbox_annotations=None, + id_to_name_map=None, + caption_annotations=None, + include_masks=False): + """Converts image and annotations to a tf.Example proto. + + Args: + image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', + u'width', u'date_captured', u'flickr_url', u'id'] + image_dir: directory containing the image files. + bbox_annotations: + list of dicts with keys: [u'segmentation', u'area', u'iscrowd', + u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box + coordinates in the official COCO dataset are given as [x, y, width, + height] tuples using absolute coordinates where x, y represent the + top-left (0-indexed) corner. This function converts to the format + expected by the Tensorflow Object Detection API (which is which is + [ymin, xmin, ymax, xmax] with coordinates normalized relative to image + size). + id_to_name_map: a dict mapping category IDs to string names. + caption_annotations: + list of dict with keys: [u'id', u'image_id', u'str']. + include_masks: Whether to include instance segmentations masks + (PNG encoded) in the result. default: False. + + Returns: + example: The converted tf.Example + num_annotations_skipped: Number of (invalid) annotations that were ignored. + + Raises: + ValueError: if the image pointed to by data['filename'] is not a valid JPEG + """ + image_height = image['height'] + image_width = image['width'] + filename = image['file_name'] + image_id = image['id'] + + full_path = os.path.join(image_dir, filename) + with tf.io.gfile.GFile(full_path, 'rb') as fid: + encoded_jpg = fid.read() + + feature_dict = tfrecord_lib.image_info_to_feature_dict( + image_height, image_width, filename, image_id, encoded_jpg, 'jpg') + + num_annotations_skipped = 0 + if bbox_annotations: + box_feature_dict, num_skipped = bbox_annotations_to_feature_dict( + bbox_annotations, image_height, image_width, id_to_name_map, + include_masks) + num_annotations_skipped += num_skipped + feature_dict.update(box_feature_dict) + + if caption_annotations: + encoded_captions = encode_caption_annotations(caption_annotations) + feature_dict.update( + {'image/caption': tfrecord_lib.convert_to_feature(encoded_captions)}) + + example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) + return example, num_annotations_skipped + + +def _load_object_annotations(object_annotations_file): + """Loads object annotation JSON file.""" + with tf.io.gfile.GFile(object_annotations_file, 'r') as fid: + obj_annotations = json.load(fid) + + images = obj_annotations['images'] + id_to_name_map = dict((element['id'], element['name']) for element in + obj_annotations['categories']) + + img_to_obj_annotation = collections.defaultdict(list) + logging.info('Building bounding box index.') + for annotation in obj_annotations['annotations']: + image_id = annotation['image_id'] + img_to_obj_annotation[image_id].append(annotation) + + missing_annotation_count = 0 + for image in images: + image_id = image['id'] + if image_id not in img_to_obj_annotation: + missing_annotation_count += 1 + + logging.info('%d images are missing bboxes.', missing_annotation_count) + + return img_to_obj_annotation, id_to_name_map + + +def _load_caption_annotations(caption_annotations_file): + """Loads caption annotation JSON file.""" + with tf.io.gfile.GFile(caption_annotations_file, 'r') as fid: + caption_annotations = json.load(fid) + + img_to_caption_annotation = collections.defaultdict(list) + logging.info('Building caption index.') + for annotation in caption_annotations['annotations']: + image_id = annotation['image_id'] + img_to_caption_annotation[image_id].append(annotation) + + missing_annotation_count = 0 + images = caption_annotations['images'] + for image in images: + image_id = image['id'] + if image_id not in img_to_caption_annotation: + missing_annotation_count += 1 + + logging.info('%d images are missing captions.', missing_annotation_count) + + return img_to_caption_annotation + + +def _load_images_info(images_info_file): + with tf.io.gfile.GFile(images_info_file, 'r') as fid: + info_dict = json.load(fid) + return info_dict['images'] + + +def generate_annotations(images, image_dir, + img_to_obj_annotation=None, + img_to_caption_annotation=None, id_to_name_map=None, + include_masks=False): + """Generator for COCO annotations.""" + + for image in images: + object_annotation = (img_to_obj_annotation.get(image['id'], None) if + img_to_obj_annotation else None) + + caption_annotaion = (img_to_caption_annotation.get(image['id'], None) if + img_to_caption_annotation else None) + + yield (image, image_dir, object_annotation, id_to_name_map, + caption_annotaion, include_masks) + + +def _create_tf_record_from_coco_annotations(images_info_file, + image_dir, + output_path, + num_shards, + object_annotations_file=None, + caption_annotations_file=None, + include_masks=False): + """Loads COCO annotation json files and converts to tf.Record format. + + Args: + images_info_file: JSON file containing image info. The number of tf.Examples + in the output tf Record files is exactly equal to the number of image info + entries in this file. This can be any of train/val/test annotation json + files Eg. 'image_info_test-dev2017.json', + 'instance_annotations_train2017.json', + 'caption_annotations_train2017.json', etc. + image_dir: Directory containing the image files. + output_path: Path to output tf.Record file. + num_shards: Number of output files to create. + object_annotations_file: JSON file containing bounding box annotations. + caption_annotations_file: JSON file containing caption annotations. + include_masks: Whether to include instance segmentations masks + (PNG encoded) in the result. default: False. + """ + + logging.info('writing to output path: %s', output_path) + + images = _load_images_info(images_info_file) + + img_to_obj_annotation = None + img_to_caption_annotation = None + id_to_name_map = None + if object_annotations_file: + img_to_obj_annotation, id_to_name_map = ( + _load_object_annotations(object_annotations_file)) + if caption_annotations_file: + img_to_caption_annotation = ( + _load_caption_annotations(caption_annotations_file)) + + coco_annotations_iter = generate_annotations( + images, image_dir, img_to_obj_annotation, img_to_caption_annotation, + id_to_name_map=id_to_name_map, include_masks=include_masks) + + num_skipped = tfrecord_lib.write_tf_record_dataset( + output_path, coco_annotations_iter, create_tf_example, num_shards) + + logging.info('Finished writing, skipped %d annotations.', num_skipped) + + +def main(_): + assert FLAGS.image_dir, '`image_dir` missing.' + assert (FLAGS.image_info_file or FLAGS.object_annotations_file or + FLAGS.caption_annotations_file), ('All annotation files are ' + 'missing.') + if FLAGS.image_info_file: + images_info_file = FLAGS.image_info_file + elif FLAGS.object_annotations_file: + images_info_file = FLAGS.object_annotations_file + else: + images_info_file = FLAGS.caption_annotations_file + + directory = os.path.dirname(FLAGS.output_file_prefix) + if not tf.io.gfile.isdir(directory): + tf.io.gfile.makedirs(directory) + + _create_tf_record_from_coco_annotations(images_info_file, FLAGS.image_dir, + FLAGS.output_file_prefix, + FLAGS.num_shards, + FLAGS.object_annotations_file, + FLAGS.caption_annotations_file, + FLAGS.include_masks) + + +if __name__ == '__main__': + app.run(main) diff --git a/official/vision/beta/data/tfrecord_lib.py b/official/vision/beta/data/tfrecord_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..403b84b72ee7ea7bca9c66ae60fdeebd6feaa1d8 --- /dev/null +++ b/official/vision/beta/data/tfrecord_lib.py @@ -0,0 +1,175 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper functions for creating TFRecord datasets.""" + +import hashlib +import io +import itertools + +from absl import logging +import numpy as np +from PIL import Image +import tensorflow as tf + +import multiprocessing as mp + + +def convert_to_feature(value, value_type=None): + """Converts the given python object to a tf.train.Feature. + + Args: + value: int, float, bytes or a list of them. + value_type: optional, if specified, forces the feature to be of the given + type. Otherwise, type is inferred automatically. Can be one of + ['bytes', 'int64', 'float', 'bytes_list', 'int64_list', 'float_list'] + + Returns: + feature: A tf.train.Feature object. + """ + + if value_type is None: + + element = value[0] if isinstance(value, list) else value + + if isinstance(element, bytes): + value_type = 'bytes' + + elif isinstance(element, (int, np.integer)): + value_type = 'int64' + + elif isinstance(element, (float, np.floating)): + value_type = 'float' + + else: + raise ValueError('Cannot convert type {} to feature'. + format(type(element))) + + if isinstance(value, list): + value_type = value_type + '_list' + + if value_type == 'int64': + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + elif value_type == 'int64_list': + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + elif value_type == 'float': + return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) + + elif value_type == 'float_list': + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + elif value_type == 'bytes': + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + elif value_type == 'bytes_list': + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + + else: + raise ValueError('Unknown value_type parameter - {}'.format(value_type)) + + +def image_info_to_feature_dict(height, width, filename, image_id, + encoded_str, encoded_format): + """Convert image information to a dict of features.""" + + key = hashlib.sha256(encoded_str).hexdigest() + + return { + 'image/height': convert_to_feature(height), + 'image/width': convert_to_feature(width), + 'image/filename': convert_to_feature(filename.encode('utf8')), + 'image/source_id': convert_to_feature(str(image_id).encode('utf8')), + 'image/key/sha256': convert_to_feature(key.encode('utf8')), + 'image/encoded': convert_to_feature(encoded_str), + 'image/format': convert_to_feature(encoded_format.encode('utf8')), + } + + +def encode_binary_mask_as_png(binary_mask): + pil_image = Image.fromarray(binary_mask) + output_io = io.BytesIO() + pil_image.save(output_io, format='PNG') + return output_io.getvalue() + + +def write_tf_record_dataset(output_path, annotation_iterator, + process_func, num_shards, + use_multiprocessing=True, unpack_arguments=True): + """Iterates over annotations, processes them and writes into TFRecords. + + Args: + output_path: The prefix path to create TF record files. + annotation_iterator: An iterator of tuples containing details about the + dataset. + process_func: A function which takes the elements from the tuples of + annotation_iterator as arguments and returns a tuple of (tf.train.Example, + int). The integer indicates the number of annotations that were skipped. + num_shards: int, the number of shards to write for the dataset. + use_multiprocessing: + Whether or not to use multiple processes to write TF Records. + unpack_arguments: + Whether to unpack the tuples from annotation_iterator as individual + arguments to the process func or to pass the returned value as it is. + + Returns: + num_skipped: The total number of skipped annotations. + """ + + writers = [ + tf.io.TFRecordWriter( + output_path + '-%05d-of-%05d.tfrecord' % (i, num_shards)) + for i in range(num_shards) + ] + + total_num_annotations_skipped = 0 + + if use_multiprocessing: + pool = mp.Pool() + if unpack_arguments: + tf_example_iterator = pool.starmap(process_func, annotation_iterator) + else: + tf_example_iterator = pool.imap(process_func, annotation_iterator) + else: + if unpack_arguments: + tf_example_iterator = itertools.starmap(process_func, annotation_iterator) + else: + tf_example_iterator = map(process_func, annotation_iterator) + + for idx, (tf_example, num_annotations_skipped) in enumerate( + tf_example_iterator): + if idx % 100 == 0: + logging.info('On image %d', idx) + + total_num_annotations_skipped += num_annotations_skipped + writers[idx % num_shards].write(tf_example.SerializeToString()) + + if use_multiprocessing: + pool.close() + pool.join() + + for writer in writers: + writer.close() + + logging.info('Finished writing, skipped %d annotations.', + total_num_annotations_skipped) + return total_num_annotations_skipped + + +def check_and_make_dir(directory): + """Creates the directory if it doesn't exist.""" + if not tf.io.gfile.isdir(directory): + tf.io.gfile.makedirs(directory) + diff --git a/official/vision/beta/data/tfrecord_lib_test.py b/official/vision/beta/data/tfrecord_lib_test.py new file mode 100644 index 0000000000000000000000000000000000000000..b348d6243db70c13985d119faf066b935a269433 --- /dev/null +++ b/official/vision/beta/data/tfrecord_lib_test.py @@ -0,0 +1,93 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tfrecord_lib.""" + +import os + +from absl import flags +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.data import tfrecord_lib + + +FLAGS = flags.FLAGS + + +def process_sample(x): + d = {'x': x} + return tf.train.Example(features=tf.train.Features(feature=d)), 0 + + +def parse_function(example_proto): + + feature_description = { + 'x': tf.io.FixedLenFeature([], tf.int64, default_value=-1) + } + return tf.io.parse_single_example(example_proto, feature_description) + + +class TfrecordLibTest(parameterized.TestCase): + + def test_write_tf_record_dataset(self): + data = [(tfrecord_lib.convert_to_feature(i),) for i in range(17)] + + path = os.path.join(FLAGS.test_tmpdir, 'train') + + tfrecord_lib.write_tf_record_dataset( + path, data, process_sample, 3, use_multiprocessing=False) + tfrecord_files = tf.io.gfile.glob(path + '*') + + self.assertLen(tfrecord_files, 3) + + dataset = tf.data.TFRecordDataset(tfrecord_files) + dataset = dataset.map(parse_function) + + read_values = set(d['x'] for d in dataset.as_numpy_iterator()) + self.assertSetEqual(read_values, set(range(17))) + + def test_convert_to_feature_float(self): + + proto = tfrecord_lib.convert_to_feature(0.0) + self.assertEqual(proto.float_list.value[0], 0.0) + + def test_convert_to_feature_int(self): + + proto = tfrecord_lib.convert_to_feature(0) + self.assertEqual(proto.int64_list.value[0], 0) + + def test_convert_to_feature_bytes(self): + + proto = tfrecord_lib.convert_to_feature(b'123') + self.assertEqual(proto.bytes_list.value[0], b'123') + + def test_convert_to_feature_float_list(self): + + proto = tfrecord_lib.convert_to_feature([0.0, 1.0]) + self.assertSequenceAlmostEqual(proto.float_list.value, [0.0, 1.0]) + + def test_convert_to_feature_int_list(self): + + proto = tfrecord_lib.convert_to_feature([0, 1]) + self.assertSequenceAlmostEqual(proto.int64_list.value, [0, 1]) + + def test_convert_to_feature_bytes_list(self): + + proto = tfrecord_lib.convert_to_feature([b'123', b'456']) + self.assertSequenceAlmostEqual(proto.bytes_list.value, [b'123', b'456']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/dataloaders/__init__.py b/official/vision/beta/dataloaders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/dataloaders/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/dataloaders/classification_input.py b/official/vision/beta/dataloaders/classification_input.py new file mode 100644 index 0000000000000000000000000000000000000000..548a24152dbc292b2313f02f64f0b4948c7a90b7 --- /dev/null +++ b/official/vision/beta/dataloaders/classification_input.py @@ -0,0 +1,183 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classification decoder and parser.""" +from typing import Dict, List, Optional +# Import libraries +import tensorflow as tf + +from official.vision.beta.configs import common +from official.vision.beta.dataloaders import decoder +from official.vision.beta.dataloaders import parser +from official.vision.beta.ops import augment +from official.vision.beta.ops import preprocess_ops + +MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255) +STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255) + + +class Decoder(decoder.Decoder): + """A tf.Example decoder for classification task.""" + + def __init__(self, + image_field_key: str = 'image/encoded', + label_field_key: str = 'image/class/label', + is_multilabel: bool = False): + self._keys_to_features = { + image_field_key: tf.io.FixedLenFeature((), tf.string, default_value=''), + } + if is_multilabel: + self._keys_to_features.update( + {label_field_key: tf.io.VarLenFeature(dtype=tf.int64)}) + else: + self._keys_to_features.update({ + label_field_key: tf.io.FixedLenFeature((), tf.int64, default_value=-1) + }) + + def decode(self, + serialized_example: tf.train.Example) -> Dict[str, tf.Tensor]: + return tf.io.parse_single_example( + serialized_example, self._keys_to_features) + + +class Parser(parser.Parser): + """Parser to parse an image and its annotations into a dictionary of tensors.""" + + def __init__(self, + output_size: List[int], + num_classes: float, + image_field_key: str = 'image/encoded', + label_field_key: str = 'image/class/label', + aug_rand_hflip: bool = True, + aug_type: Optional[common.Augmentation] = None, + is_multilabel: bool = False, + dtype: str = 'float32'): + """Initializes parameters for parsing annotations in the dataset. + + Args: + output_size: `Tensor` or `list` for [height, width] of output image. The + output_size should be divided by the largest feature stride 2^max_level. + num_classes: `float`, number of classes. + image_field_key: A `str` of the key name to encoded image in TFExample. + label_field_key: A `str` of the key name to label in TFExample. + aug_rand_hflip: `bool`, if True, augment training with random + horizontal flip. + aug_type: An optional Augmentation object to choose from AutoAugment and + RandAugment. + is_multilabel: A `bool`, whether or not each example has multiple labels. + dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', + or 'bfloat16'. + """ + self._output_size = output_size + self._aug_rand_hflip = aug_rand_hflip + self._num_classes = num_classes + self._image_field_key = image_field_key + self._label_field_key = label_field_key + self._is_multilabel = is_multilabel + + if dtype == 'float32': + self._dtype = tf.float32 + elif dtype == 'float16': + self._dtype = tf.float16 + elif dtype == 'bfloat16': + self._dtype = tf.bfloat16 + else: + raise ValueError('dtype {!r} is not supported!'.format(dtype)) + if aug_type: + if aug_type.type == 'autoaug': + self._augmenter = augment.AutoAugment( + augmentation_name=aug_type.autoaug.augmentation_name, + cutout_const=aug_type.autoaug.cutout_const, + translate_const=aug_type.autoaug.translate_const) + elif aug_type.type == 'randaug': + self._augmenter = augment.RandAugment( + num_layers=aug_type.randaug.num_layers, + magnitude=aug_type.randaug.magnitude, + cutout_const=aug_type.randaug.cutout_const, + translate_const=aug_type.randaug.translate_const) + else: + raise ValueError('Augmentation policy {} not supported.'.format( + aug_type.type)) + else: + self._augmenter = None + + def _parse_train_data(self, decoded_tensors): + """Parses data for training.""" + label = tf.cast(decoded_tensors[self._label_field_key], dtype=tf.int32) + image_bytes = decoded_tensors[self._image_field_key] + image_shape = tf.image.extract_jpeg_shape(image_bytes) + + # Crops image. + # TODO(pengchong): support image format other than JPEG. + cropped_image = preprocess_ops.random_crop_image_v2( + image_bytes, image_shape) + image = tf.cond( + tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)), + lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape), + lambda: cropped_image) + + if self._aug_rand_hflip: + image = tf.image.random_flip_left_right(image) + + # Resizes image. + image = tf.image.resize( + image, self._output_size, method=tf.image.ResizeMethod.BILINEAR) + + # Apply autoaug or randaug. + if self._augmenter is not None: + image = self._augmenter.distort(image) + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image, + offset=MEAN_RGB, + scale=STDDEV_RGB) + + # Convert image to self._dtype. + image = tf.image.convert_image_dtype(image, self._dtype) + + if self._is_multilabel: + if isinstance(label, tf.sparse.SparseTensor): + label = tf.sparse.to_dense(label) + label = tf.reduce_sum(tf.one_hot(label, self._num_classes), axis=0) + + return image, label + + def _parse_eval_data(self, decoded_tensors): + """Parses data for evaluation.""" + label = tf.cast(decoded_tensors[self._label_field_key], dtype=tf.int32) + image_bytes = decoded_tensors[self._image_field_key] + image_shape = tf.image.extract_jpeg_shape(image_bytes) + + # Center crops and resizes image. + image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape) + + image = tf.image.resize( + image, self._output_size, method=tf.image.ResizeMethod.BILINEAR) + + image = tf.reshape(image, [self._output_size[0], self._output_size[1], 3]) + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image, + offset=MEAN_RGB, + scale=STDDEV_RGB) + + # Convert image to self._dtype. + image = tf.image.convert_image_dtype(image, self._dtype) + + if self._is_multilabel: + if isinstance(label, tf.sparse.SparseTensor): + label = tf.sparse.to_dense(label) + label = tf.reduce_sum(tf.one_hot(label, self._num_classes), axis=0) + + return image, label diff --git a/official/vision/beta/dataloaders/decoder.py b/official/vision/beta/dataloaders/decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..a5f691b95152ebebb46e53cd258459164c99fa26 --- /dev/null +++ b/official/vision/beta/dataloaders/decoder.py @@ -0,0 +1,35 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The generic decoder interface.""" + +import abc + + +class Decoder(object): + """Decodes the raw data into tensors.""" + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def decode(self, serialized_example): + """Decodes the serialized example into tensors. + + Args: + serialized_example: a serialized string tensor that encodes the data. + + Returns: + decoded_tensors: a dict of Tensors. + """ + pass diff --git a/official/vision/beta/dataloaders/input_reader.py b/official/vision/beta/dataloaders/input_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..59f7878415fd85e0b2dcaf9abacaf81ad6de3123 --- /dev/null +++ b/official/vision/beta/dataloaders/input_reader.py @@ -0,0 +1,173 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Dataset reader for vision model garden.""" + +from typing import Any, Callable, Optional, Tuple + +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import input_reader + + +def calculate_batch_sizes(total_batch_size: int, + pseudo_label_ratio: float) -> Tuple[int, int]: + """Calculates labeled and pseudo-labeled dataset batch sizes. + + Returns (labeled_batch_size, pseudo_labeled_batch_size) given a + total batch size and pseudo-label data ratio. + + Args: + total_batch_size: The total batch size for all data. + pseudo_label_ratio: A non-negative float ratio of pseudo-labeled + to labeled data in a batch. + + Returns: + (labeled_batch_size, pseudo_labeled_batch_size) as ints. + + Raises: + ValueError: If total_batch_size is negative. + ValueError: If pseudo_label_ratio is negative. + """ + if total_batch_size < 0: + raise ValueError('Invalid total_batch_size: {}'.format(total_batch_size)) + if pseudo_label_ratio < 0.0: + raise ValueError( + 'Invalid pseudo_label_ratio: {}'.format(pseudo_label_ratio)) + + ratio_factor = pseudo_label_ratio / (1.0 + pseudo_label_ratio) + pseudo_labeled_batch_size = int(round(total_batch_size * ratio_factor)) + labeled_batch_size = total_batch_size - pseudo_labeled_batch_size + return labeled_batch_size, pseudo_labeled_batch_size + + +class CombinationDatasetInputReader(input_reader.InputReader): + """Combination dataset input reader.""" + + def __init__(self, + params: cfg.DataConfig, + dataset_fn=tf.data.TFRecordDataset, + pseudo_label_dataset_fn=tf.data.TFRecordDataset, + decoder_fn: Optional[Callable[..., Any]] = None, + sample_fn: Optional[Callable[..., Any]] = None, + parser_fn: Optional[Callable[..., Any]] = None, + transform_and_batch_fn: Optional[Callable[ + [tf.data.Dataset, Optional[tf.distribute.InputContext]], + tf.data.Dataset]] = None, + postprocess_fn: Optional[Callable[..., Any]] = None): + """Initializes an CombinationDatasetInputReader instance. + + This class mixes a labeled and pseudo-labeled dataset. The params + must contain "pseudo_label_data.input_path" to specify the + pseudo-label dataset files and "pseudo_label_data.data_ratio" + to specify a per-batch mixing ratio of pseudo-label examples to + labeled dataset examples. + + Args: + params: A config_definitions.DataConfig object. + dataset_fn: A `tf.data.Dataset` that consumes the input files. For + example, it can be `tf.data.TFRecordDataset`. + pseudo_label_dataset_fn: A `tf.data.Dataset` that consumes the input + files. For example, it can be `tf.data.TFRecordDataset`. + decoder_fn: An optional `callable` that takes the serialized data string + and decodes them into the raw tensor dictionary. + sample_fn: An optional `callable` that takes a `tf.data.Dataset` object as + input and outputs the transformed dataset. It performs sampling on the + decoded raw tensors dict before the parser_fn. + parser_fn: An optional `callable` that takes the decoded raw tensors dict + and parse them into a dictionary of tensors that can be consumed by the + model. It will be executed after decoder_fn. + transform_and_batch_fn: An optional `callable` that takes a + `tf.data.Dataset` object and an optional `tf.distribute.InputContext` as + input, and returns a `tf.data.Dataset` object. It will be executed after + `parser_fn` to transform and batch the dataset; if None, after + `parser_fn` is executed, the dataset will be batched into per-replica + batch size. + postprocess_fn: A optional `callable` that processes batched tensors. It + will be executed after batching. + + Raises: + ValueError: If drop_remainder is False. + """ + super().__init__(params=params, + dataset_fn=dataset_fn, + decoder_fn=decoder_fn, + sample_fn=sample_fn, + parser_fn=parser_fn, + transform_and_batch_fn=transform_and_batch_fn, + postprocess_fn=postprocess_fn) + + self._pseudo_label_file_pattern = params.pseudo_label_data.input_path + self._pseudo_label_dataset_fn = pseudo_label_dataset_fn + self._pseudo_label_data_ratio = params.pseudo_label_data.data_ratio + self._pseudo_label_matched_files = self._match_files( + self._pseudo_label_file_pattern) + if not self._drop_remainder: + raise ValueError( + 'Must use drop_remainder=True with CombinationDatasetInputReader') + + def read( + self, + input_context: Optional[tf.distribute.InputContext] = None + ) -> tf.data.Dataset: + """Generates a tf.data.Dataset object.""" + + labeled_batch_size, pl_batch_size = calculate_batch_sizes( + self._global_batch_size, self._pseudo_label_data_ratio) + + if not labeled_batch_size and pl_batch_size: + raise ValueError( + 'Invalid batch_size: {} and pseudo_label_data_ratio: {}, ' + 'resulting in a 0 batch size for one of the datasets.'.format( + self._global_batch_size, self._pseudo_label_data_ratio)) + + labeled_dataset = self._read_decode_and_parse_dataset( + matched_files=self._matched_files, + dataset_fn=self._dataset_fn, + batch_size=labeled_batch_size, + input_context=input_context, + tfds_builder=self._tfds_builder) + + pseudo_labeled_dataset = self._read_decode_and_parse_dataset( + matched_files=self._pseudo_label_matched_files, + dataset_fn=self._pseudo_label_dataset_fn, + batch_size=pl_batch_size, + input_context=input_context, + tfds_builder=False) + + def concat_fn(d1, d2): + return tf.nest.map_structure( + lambda x1, x2: tf.concat([x1, x2], axis=0), d1, d2) + + dataset_concat = tf.data.Dataset.zip( + (labeled_dataset, pseudo_labeled_dataset)) + dataset_concat = dataset_concat.map( + concat_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) + + def maybe_map_fn(dataset, fn): + return dataset if fn is None else dataset.map( + fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) + + dataset_concat = maybe_map_fn(dataset_concat, self._postprocess_fn) + dataset_concat = self._maybe_apply_data_service(dataset_concat, + input_context) + + if self._deterministic is not None: + options = tf.data.Options() + options.experimental_deterministic = self._deterministic + dataset_concat = dataset_concat.with_options(options) + + return dataset_concat.prefetch(tf.data.experimental.AUTOTUNE) diff --git a/official/vision/beta/dataloaders/input_reader_factory.py b/official/vision/beta/dataloaders/input_reader_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..ffe8ae778cc4ff2dc9d01b4e0d03dcaf7622c1d2 --- /dev/null +++ b/official/vision/beta/dataloaders/input_reader_factory.py @@ -0,0 +1,44 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Factory for getting TF-Vision input readers.""" + +from official.common import dataset_fn as dataset_fn_util +from official.core import config_definitions as cfg +from official.core import input_reader as core_input_reader + +from official.vision.beta.dataloaders import input_reader as vision_input_reader + + +def input_reader_generator(params: cfg.DataConfig, + **kwargs) -> core_input_reader.InputReader: + """Instantiates an input reader class according to the params. + + Args: + params: A config_definitions.DataConfig object. + **kwargs: Additional arguments passed to input reader initialization. + + Returns: + An InputReader object. + + """ + if params.is_training and params.get('pseudo_label_data', False): + return vision_input_reader.CombinationDatasetInputReader( + params, + pseudo_label_dataset_fn=dataset_fn_util.pick_dataset_fn( + params.pseudo_label_data.file_type), + **kwargs) + else: + return core_input_reader.InputReader(params, **kwargs) diff --git a/official/vision/beta/dataloaders/maskrcnn_input.py b/official/vision/beta/dataloaders/maskrcnn_input.py new file mode 100644 index 0000000000000000000000000000000000000000..662c8885117f1d7fb7db296ce458e1ed21a12df6 --- /dev/null +++ b/official/vision/beta/dataloaders/maskrcnn_input.py @@ -0,0 +1,345 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data parser and processing for Mask R-CNN.""" + +# Import libraries + +import tensorflow as tf + +from official.vision.beta.dataloaders import parser +from official.vision.beta.dataloaders import utils +from official.vision.beta.ops import anchor +from official.vision.beta.ops import box_ops +from official.vision.beta.ops import preprocess_ops + + +class Parser(parser.Parser): + """Parser to parse an image and its annotations into a dictionary of tensors.""" + + def __init__(self, + output_size, + min_level, + max_level, + num_scales, + aspect_ratios, + anchor_size, + rpn_match_threshold=0.7, + rpn_unmatched_threshold=0.3, + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5, + aug_rand_hflip=False, + aug_scale_min=1.0, + aug_scale_max=1.0, + skip_crowd_during_training=True, + max_num_instances=100, + include_mask=False, + mask_crop_size=112, + dtype='float32'): + """Initializes parameters for parsing annotations in the dataset. + + Args: + output_size: `Tensor` or `list` for [height, width] of output image. The + output_size should be divided by the largest feature stride 2^max_level. + min_level: `int` number of minimum level of the output feature pyramid. + max_level: `int` number of maximum level of the output feature pyramid. + num_scales: `int` number representing intermediate scales added + on each level. For instances, num_scales=2 adds one additional + intermediate anchor scales [2^0, 2^0.5] on each level. + aspect_ratios: `list` of float numbers representing the aspect raito + anchors added on each level. The number indicates the ratio of width to + height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors + on each scale level. + anchor_size: `float` number representing the scale of size of the base + anchor to the feature stride 2^level. + rpn_match_threshold: + rpn_unmatched_threshold: + rpn_batch_size_per_im: + rpn_fg_fraction: + aug_rand_hflip: `bool`, if True, augment training with random + horizontal flip. + aug_scale_min: `float`, the minimum scale applied to `output_size` for + data augmentation during training. + aug_scale_max: `float`, the maximum scale applied to `output_size` for + data augmentation during training. + skip_crowd_during_training: `bool`, if True, skip annotations labeled with + `is_crowd` equals to 1. + max_num_instances: `int` number of maximum number of instances in an + image. The groundtruth data will be padded to `max_num_instances`. + include_mask: a bool to indicate whether parse mask groundtruth. + mask_crop_size: the size which groundtruth mask is cropped to. + dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}. + """ + + self._max_num_instances = max_num_instances + self._skip_crowd_during_training = skip_crowd_during_training + + # Anchor. + self._output_size = output_size + self._min_level = min_level + self._max_level = max_level + self._num_scales = num_scales + self._aspect_ratios = aspect_ratios + self._anchor_size = anchor_size + + # Target assigning. + self._rpn_match_threshold = rpn_match_threshold + self._rpn_unmatched_threshold = rpn_unmatched_threshold + self._rpn_batch_size_per_im = rpn_batch_size_per_im + self._rpn_fg_fraction = rpn_fg_fraction + + # Data augmentation. + self._aug_rand_hflip = aug_rand_hflip + self._aug_scale_min = aug_scale_min + self._aug_scale_max = aug_scale_max + + # Mask. + self._include_mask = include_mask + self._mask_crop_size = mask_crop_size + + # Image output dtype. + self._dtype = dtype + + def _parse_train_data(self, data): + """Parses data for training. + + Args: + data: the decoded tensor dictionary from TfExampleDecoder. + + Returns: + image: image tensor that is preproessed to have normalized value and + dimension [output_size[0], output_size[1], 3] + labels: a dictionary of tensors used for training. The following describes + {key: value} pairs in the dictionary. + image_info: a 2D `Tensor` that encodes the information of the image and + the applied preprocessing. It is in the format of + [[original_height, original_width], [scaled_height, scaled_width], + anchor_boxes: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, 4] representing anchor boxes at each level. + rpn_score_targets: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, anchors_per_location]. The height_l and + width_l represent the dimension of class logits at l-th level. + rpn_box_targets: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, anchors_per_location * 4]. The height_l and + width_l represent the dimension of bounding box regression output at + l-th level. + gt_boxes: Groundtruth bounding box annotations. The box is represented + in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled + image that is fed to the network. The tennsor is padded with -1 to + the fixed dimension [self._max_num_instances, 4]. + gt_classes: Groundtruth classes annotations. The tennsor is padded + with -1 to the fixed dimension [self._max_num_instances]. + gt_masks: groundtrugh masks cropped by the bounding box and + resized to a fixed size determined by mask_crop_size. + """ + classes = data['groundtruth_classes'] + boxes = data['groundtruth_boxes'] + if self._include_mask: + masks = data['groundtruth_instance_masks'] + + is_crowds = data['groundtruth_is_crowd'] + # Skips annotations with `is_crowd` = True. + if self._skip_crowd_during_training: + num_groundtruths = tf.shape(classes)[0] + with tf.control_dependencies([num_groundtruths, is_crowds]): + indices = tf.cond( + tf.greater(tf.size(is_crowds), 0), + lambda: tf.where(tf.logical_not(is_crowds))[:, 0], + lambda: tf.cast(tf.range(num_groundtruths), tf.int64)) + classes = tf.gather(classes, indices) + boxes = tf.gather(boxes, indices) + if self._include_mask: + masks = tf.gather(masks, indices) + + # Gets original image and its size. + image = data['image'] + image_shape = tf.shape(image)[0:2] + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image) + + # Flips image randomly during training. + if self._aug_rand_hflip: + if self._include_mask: + image, boxes, masks = preprocess_ops.random_horizontal_flip( + image, boxes, masks) + else: + image, boxes, _ = preprocess_ops.random_horizontal_flip( + image, boxes) + + # Converts boxes from normalized coordinates to pixel coordinates. + # Now the coordinates of boxes are w.r.t. the original image. + boxes = box_ops.denormalize_boxes(boxes, image_shape) + + # Resizes and crops image. + image, image_info = preprocess_ops.resize_and_crop_image( + image, + self._output_size, + padded_size=preprocess_ops.compute_padded_size( + self._output_size, 2 ** self._max_level), + aug_scale_min=self._aug_scale_min, + aug_scale_max=self._aug_scale_max) + image_height, image_width, _ = image.get_shape().as_list() + + # Resizes and crops boxes. + # Now the coordinates of boxes are w.r.t the scaled image. + image_scale = image_info[2, :] + offset = image_info[3, :] + boxes = preprocess_ops.resize_and_crop_boxes( + boxes, image_scale, image_info[1, :], offset) + + # Filters out ground truth boxes that are all zeros. + indices = box_ops.get_non_empty_box_indices(boxes) + boxes = tf.gather(boxes, indices) + classes = tf.gather(classes, indices) + if self._include_mask: + masks = tf.gather(masks, indices) + # Transfer boxes to the original image space and do normalization. + cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) + cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) + cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape) + num_masks = tf.shape(masks)[0] + masks = tf.image.crop_and_resize( + tf.expand_dims(masks, axis=-1), + cropped_boxes, + box_indices=tf.range(num_masks, dtype=tf.int32), + crop_size=[self._mask_crop_size, self._mask_crop_size], + method='bilinear') + masks = tf.squeeze(masks, axis=-1) + + # Assigns anchor targets. + # Note that after the target assignment, box targets are absolute pixel + # offsets w.r.t. the scaled image. + input_anchor = anchor.build_anchor_generator( + min_level=self._min_level, + max_level=self._max_level, + num_scales=self._num_scales, + aspect_ratios=self._aspect_ratios, + anchor_size=self._anchor_size) + anchor_boxes = input_anchor(image_size=(image_height, image_width)) + anchor_labeler = anchor.RpnAnchorLabeler( + self._rpn_match_threshold, + self._rpn_unmatched_threshold, + self._rpn_batch_size_per_im, + self._rpn_fg_fraction) + rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors( + anchor_boxes, boxes, + tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32)) + + # Casts input image to self._dtype + image = tf.cast(image, dtype=self._dtype) + + # Packs labels for model_fn outputs. + labels = { + 'anchor_boxes': + anchor_boxes, + 'image_info': + image_info, + 'rpn_score_targets': + rpn_score_targets, + 'rpn_box_targets': + rpn_box_targets, + 'gt_boxes': + preprocess_ops.clip_or_pad_to_fixed_size(boxes, + self._max_num_instances, + -1), + 'gt_classes': + preprocess_ops.clip_or_pad_to_fixed_size(classes, + self._max_num_instances, + -1), + } + if self._include_mask: + labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size( + masks, self._max_num_instances, -1) + + return image, labels + + def _parse_eval_data(self, data): + """Parses data for evaluation. + + Args: + data: the decoded tensor dictionary from TfExampleDecoder. + + Returns: + A dictionary of {'images': image, 'labels': labels} where + image: image tensor that is preproessed to have normalized value and + dimension [output_size[0], output_size[1], 3] + labels: a dictionary of tensors used for training. The following + describes {key: value} pairs in the dictionary. + source_ids: Source image id. Default value -1 if the source id is + empty in the groundtruth annotation. + image_info: a 2D `Tensor` that encodes the information of the image + and the applied preprocessing. It is in the format of + [[original_height, original_width], [scaled_height, scaled_width], + anchor_boxes: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, 4] representing anchor boxes at each + level. + """ + # Gets original image and its size. + image = data['image'] + image_shape = tf.shape(image)[0:2] + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image) + + # Resizes and crops image. + image, image_info = preprocess_ops.resize_and_crop_image( + image, + self._output_size, + padded_size=preprocess_ops.compute_padded_size( + self._output_size, 2 ** self._max_level), + aug_scale_min=1.0, + aug_scale_max=1.0) + image_height, image_width, _ = image.get_shape().as_list() + + # Casts input image to self._dtype + image = tf.cast(image, dtype=self._dtype) + + # Converts boxes from normalized coordinates to pixel coordinates. + boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape) + + # Compute Anchor boxes. + input_anchor = anchor.build_anchor_generator( + min_level=self._min_level, + max_level=self._max_level, + num_scales=self._num_scales, + aspect_ratios=self._aspect_ratios, + anchor_size=self._anchor_size) + anchor_boxes = input_anchor(image_size=(image_height, image_width)) + + labels = { + 'image_info': image_info, + 'anchor_boxes': anchor_boxes, + } + + groundtruths = { + 'source_id': data['source_id'], + 'height': data['height'], + 'width': data['width'], + 'num_detections': tf.shape(data['groundtruth_classes']), + 'boxes': boxes, + 'classes': data['groundtruth_classes'], + 'areas': data['groundtruth_area'], + 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), + } + groundtruths['source_id'] = utils.process_source_id( + groundtruths['source_id']) + groundtruths = utils.pad_groundtruths_to_fixed_size( + groundtruths, self._max_num_instances) + labels['groundtruths'] = groundtruths + return image, labels diff --git a/official/vision/beta/dataloaders/parser.py b/official/vision/beta/dataloaders/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..7a945ad74c55c068ca842108031e73a699d62a3f --- /dev/null +++ b/official/vision/beta/dataloaders/parser.py @@ -0,0 +1,69 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The generic parser interface.""" + +import abc + + +class Parser(object): + """Parses data and produces tensors to be consumed by models.""" + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def _parse_train_data(self, decoded_tensors): + """Generates images and labels that are usable for model training. + + Args: + decoded_tensors: a dict of Tensors produced by the decoder. + + Returns: + images: the image tensor. + labels: a dict of Tensors that contains labels. + """ + pass + + @abc.abstractmethod + def _parse_eval_data(self, decoded_tensors): + """Generates images and labels that are usable for model evaluation. + + Args: + decoded_tensors: a dict of Tensors produced by the decoder. + + Returns: + images: the image tensor. + labels: a dict of Tensors that contains labels. + """ + pass + + def parse_fn(self, is_training): + """Returns a parse fn that reads and parses raw tensors from the decoder. + + Args: + is_training: a `bool` to indicate whether it is in training mode. + + Returns: + parse: a `callable` that takes the serialized examle and generate the + images, labels tuple where labels is a dict of Tensors that contains + labels. + """ + def parse(decoded_tensors): + """Parses the serialized example data.""" + if is_training: + return self._parse_train_data(decoded_tensors) + else: + return self._parse_eval_data(decoded_tensors) + + return parse diff --git a/official/vision/beta/dataloaders/retinanet_input.py b/official/vision/beta/dataloaders/retinanet_input.py new file mode 100644 index 0000000000000000000000000000000000000000..fc665842676374d5c0a3599bc43f78572a126eca --- /dev/null +++ b/official/vision/beta/dataloaders/retinanet_input.py @@ -0,0 +1,278 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data parser and processing for RetinaNet. + +Parse image and ground truths in a dataset to training targets and package them +into (image, labels) tuple for RetinaNet. +""" + +# Import libraries +import tensorflow as tf + +from official.vision.beta.dataloaders import parser +from official.vision.beta.dataloaders import utils +from official.vision.beta.ops import anchor +from official.vision.beta.ops import box_ops +from official.vision.beta.ops import preprocess_ops + + +class Parser(parser.Parser): + """Parser to parse an image and its annotations into a dictionary of tensors.""" + + def __init__(self, + output_size, + min_level, + max_level, + num_scales, + aspect_ratios, + anchor_size, + match_threshold=0.5, + unmatched_threshold=0.5, + aug_rand_hflip=False, + aug_scale_min=1.0, + aug_scale_max=1.0, + use_autoaugment=False, + autoaugment_policy_name='v0', + skip_crowd_during_training=True, + max_num_instances=100, + dtype='bfloat16', + mode=None): + """Initializes parameters for parsing annotations in the dataset. + + Args: + output_size: `Tensor` or `list` for [height, width] of output image. The + output_size should be divided by the largest feature stride 2^max_level. + min_level: `int` number of minimum level of the output feature pyramid. + max_level: `int` number of maximum level of the output feature pyramid. + num_scales: `int` number representing intermediate scales added on each + level. For instances, num_scales=2 adds one additional intermediate + anchor scales [2^0, 2^0.5] on each level. + aspect_ratios: `list` of float numbers representing the aspect raito + anchors added on each level. The number indicates the ratio of width to + height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors + on each scale level. + anchor_size: `float` number representing the scale of size of the base + anchor to the feature stride 2^level. + match_threshold: `float` number between 0 and 1 representing the + lower-bound threshold to assign positive labels for anchors. An anchor + with a score over the threshold is labeled positive. + unmatched_threshold: `float` number between 0 and 1 representing the + upper-bound threshold to assign negative labels for anchors. An anchor + with a score below the threshold is labeled negative. + aug_rand_hflip: `bool`, if True, augment training with random horizontal + flip. + aug_scale_min: `float`, the minimum scale applied to `output_size` for + data augmentation during training. + aug_scale_max: `float`, the maximum scale applied to `output_size` for + data augmentation during training. + use_autoaugment: `bool`, if True, use the AutoAugment augmentation policy + during training. + autoaugment_policy_name: `string` that specifies the name of the + AutoAugment policy that will be used during training. + skip_crowd_during_training: `bool`, if True, skip annotations labeled with + `is_crowd` equals to 1. + max_num_instances: `int` number of maximum number of instances in an + image. The groundtruth data will be padded to `max_num_instances`. + dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}. + mode: a ModeKeys. Specifies if this is training, evaluation, prediction or + prediction with groundtruths in the outputs. + """ + self._mode = mode + self._max_num_instances = max_num_instances + self._skip_crowd_during_training = skip_crowd_during_training + + # Anchor. + self._output_size = output_size + self._min_level = min_level + self._max_level = max_level + self._num_scales = num_scales + self._aspect_ratios = aspect_ratios + self._anchor_size = anchor_size + self._match_threshold = match_threshold + self._unmatched_threshold = unmatched_threshold + + # Data augmentation. + self._aug_rand_hflip = aug_rand_hflip + self._aug_scale_min = aug_scale_min + self._aug_scale_max = aug_scale_max + + # Data Augmentation with AutoAugment. + self._use_autoaugment = use_autoaugment + self._autoaugment_policy_name = autoaugment_policy_name + + # Data type. + self._dtype = dtype + + def _parse_train_data(self, data): + """Parses data for training and evaluation.""" + classes = data['groundtruth_classes'] + boxes = data['groundtruth_boxes'] + is_crowds = data['groundtruth_is_crowd'] + # Skips annotations with `is_crowd` = True. + if self._skip_crowd_during_training: + num_groundtrtuhs = tf.shape(input=classes)[0] + with tf.control_dependencies([num_groundtrtuhs, is_crowds]): + indices = tf.cond( + pred=tf.greater(tf.size(input=is_crowds), 0), + true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0], + false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) + classes = tf.gather(classes, indices) + boxes = tf.gather(boxes, indices) + + # Gets original image and its size. + image = data['image'] + + image_shape = tf.shape(input=image)[0:2] + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image) + + # Flips image randomly during training. + if self._aug_rand_hflip: + image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes) + + # Converts boxes from normalized coordinates to pixel coordinates. + boxes = box_ops.denormalize_boxes(boxes, image_shape) + + # Resizes and crops image. + image, image_info = preprocess_ops.resize_and_crop_image( + image, + self._output_size, + padded_size=preprocess_ops.compute_padded_size(self._output_size, + 2**self._max_level), + aug_scale_min=self._aug_scale_min, + aug_scale_max=self._aug_scale_max) + image_height, image_width, _ = image.get_shape().as_list() + + # Resizes and crops boxes. + image_scale = image_info[2, :] + offset = image_info[3, :] + boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale, + image_info[1, :], offset) + # Filters out ground truth boxes that are all zeros. + indices = box_ops.get_non_empty_box_indices(boxes) + boxes = tf.gather(boxes, indices) + classes = tf.gather(classes, indices) + + # Assigns anchors. + input_anchor = anchor.build_anchor_generator( + min_level=self._min_level, + max_level=self._max_level, + num_scales=self._num_scales, + aspect_ratios=self._aspect_ratios, + anchor_size=self._anchor_size) + anchor_boxes = input_anchor(image_size=(image_height, image_width)) + anchor_labeler = anchor.AnchorLabeler(self._match_threshold, + self._unmatched_threshold) + (cls_targets, box_targets, cls_weights, + box_weights) = anchor_labeler.label_anchors( + anchor_boxes, boxes, tf.expand_dims(classes, axis=1)) + + # Casts input image to desired data type. + image = tf.cast(image, dtype=self._dtype) + + # Packs labels for model_fn outputs. + labels = { + 'cls_targets': cls_targets, + 'box_targets': box_targets, + 'anchor_boxes': anchor_boxes, + 'cls_weights': cls_weights, + 'box_weights': box_weights, + 'image_info': image_info, + } + return image, labels + + def _parse_eval_data(self, data): + """Parses data for training and evaluation.""" + groundtruths = {} + classes = data['groundtruth_classes'] + boxes = data['groundtruth_boxes'] + + # Gets original image and its size. + image = data['image'] + image_shape = tf.shape(input=image)[0:2] + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image) + + # Converts boxes from normalized coordinates to pixel coordinates. + boxes = box_ops.denormalize_boxes(boxes, image_shape) + + # Resizes and crops image. + image, image_info = preprocess_ops.resize_and_crop_image( + image, + self._output_size, + padded_size=preprocess_ops.compute_padded_size(self._output_size, + 2**self._max_level), + aug_scale_min=1.0, + aug_scale_max=1.0) + image_height, image_width, _ = image.get_shape().as_list() + + # Resizes and crops boxes. + image_scale = image_info[2, :] + offset = image_info[3, :] + boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale, + image_info[1, :], offset) + # Filters out ground truth boxes that are all zeros. + indices = box_ops.get_non_empty_box_indices(boxes) + boxes = tf.gather(boxes, indices) + classes = tf.gather(classes, indices) + + # Assigns anchors. + input_anchor = anchor.build_anchor_generator( + min_level=self._min_level, + max_level=self._max_level, + num_scales=self._num_scales, + aspect_ratios=self._aspect_ratios, + anchor_size=self._anchor_size) + anchor_boxes = input_anchor(image_size=(image_height, image_width)) + anchor_labeler = anchor.AnchorLabeler(self._match_threshold, + self._unmatched_threshold) + (cls_targets, box_targets, cls_weights, + box_weights) = anchor_labeler.label_anchors( + anchor_boxes, boxes, tf.expand_dims(classes, axis=1)) + + # Casts input image to desired data type. + image = tf.cast(image, dtype=self._dtype) + + # Sets up groundtruth data for evaluation. + groundtruths = { + 'source_id': data['source_id'], + 'height': data['height'], + 'width': data['width'], + 'num_detections': tf.shape(data['groundtruth_classes']), + 'image_info': image_info, + 'boxes': box_ops.denormalize_boxes( + data['groundtruth_boxes'], image_shape), + 'classes': data['groundtruth_classes'], + 'areas': data['groundtruth_area'], + 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), + } + groundtruths['source_id'] = utils.process_source_id( + groundtruths['source_id']) + groundtruths = utils.pad_groundtruths_to_fixed_size( + groundtruths, self._max_num_instances) + + # Packs labels for model_fn outputs. + labels = { + 'cls_targets': cls_targets, + 'box_targets': box_targets, + 'anchor_boxes': anchor_boxes, + 'cls_weights': cls_weights, + 'box_weights': box_weights, + 'image_info': image_info, + 'groundtruths': groundtruths, + } + return image, labels diff --git a/official/vision/beta/dataloaders/segmentation_input.py b/official/vision/beta/dataloaders/segmentation_input.py new file mode 100644 index 0000000000000000000000000000000000000000..545fe7398dce6455245faa05b68134be1f4a2c26 --- /dev/null +++ b/official/vision/beta/dataloaders/segmentation_input.py @@ -0,0 +1,207 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data parser and processing for segmentation datasets.""" + +import tensorflow as tf +from official.vision.beta.dataloaders import decoder +from official.vision.beta.dataloaders import parser +from official.vision.beta.ops import preprocess_ops + + +class Decoder(decoder.Decoder): + """A tf.Example decoder for segmentation task.""" + + def __init__(self): + self._keys_to_features = { + 'image/encoded': tf.io.FixedLenFeature((), tf.string, default_value=''), + 'image/height': tf.io.FixedLenFeature((), tf.int64, default_value=0), + 'image/width': tf.io.FixedLenFeature((), tf.int64, default_value=0), + 'image/segmentation/class/encoded': + tf.io.FixedLenFeature((), tf.string, default_value='') + } + + def decode(self, serialized_example): + return tf.io.parse_single_example( + serialized_example, self._keys_to_features) + + +class Parser(parser.Parser): + """Parser to parse an image and its annotations into a dictionary of tensors. + """ + + def __init__(self, + output_size, + crop_size=None, + resize_eval_groundtruth=True, + groundtruth_padded_size=None, + ignore_label=255, + aug_rand_hflip=False, + aug_scale_min=1.0, + aug_scale_max=1.0, + dtype='float32'): + """Initializes parameters for parsing annotations in the dataset. + + Args: + output_size: `Tensor` or `list` for [height, width] of output image. The + output_size should be divided by the largest feature stride 2^max_level. + crop_size: `Tensor` or `list` for [height, width] of the crop. If + specified a training crop of size crop_size is returned. This is useful + for cropping original images during training while evaluating on + original image sizes. + resize_eval_groundtruth: `bool`, if True, eval groundtruth masks are + resized to output_size. + groundtruth_padded_size: `Tensor` or `list` for [height, width]. When + resize_eval_groundtruth is set to False, the groundtruth masks are + padded to this size. + ignore_label: `int` the pixel with ignore label will not used for training + and evaluation. + aug_rand_hflip: `bool`, if True, augment training with random + horizontal flip. + aug_scale_min: `float`, the minimum scale applied to `output_size` for + data augmentation during training. + aug_scale_max: `float`, the maximum scale applied to `output_size` for + data augmentation during training. + dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}. + """ + self._output_size = output_size + self._crop_size = crop_size + self._resize_eval_groundtruth = resize_eval_groundtruth + if (not resize_eval_groundtruth) and (groundtruth_padded_size is None): + raise ValueError('groundtruth_padded_size ([height, width]) needs to be' + 'specified when resize_eval_groundtruth is False.') + self._groundtruth_padded_size = groundtruth_padded_size + self._ignore_label = ignore_label + + # Data augmentation. + self._aug_rand_hflip = aug_rand_hflip + self._aug_scale_min = aug_scale_min + self._aug_scale_max = aug_scale_max + + # dtype. + self._dtype = dtype + + def _prepare_image_and_label(self, data): + """Prepare normalized image and label.""" + image = tf.io.decode_image(data['image/encoded'], channels=3) + label = tf.io.decode_image(data['image/segmentation/class/encoded'], + channels=1) + height = data['image/height'] + width = data['image/width'] + image = tf.reshape(image, (height, width, 3)) + + label = tf.reshape(label, (1, height, width)) + label = tf.cast(label, tf.float32) + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image) + return image, label + + def _parse_train_data(self, data): + """Parses data for training and evaluation.""" + image, label = self._prepare_image_and_label(data) + + if self._crop_size: + + label = tf.reshape(label, [data['image/height'], data['image/width'], 1]) + # If output_size is specified, resize image, and label to desired + # output_size. + if self._output_size: + image = tf.image.resize(image, self._output_size, method='bilinear') + label = tf.image.resize(label, self._output_size, method='nearest') + + image_mask = tf.concat([image, label], axis=2) + image_mask_crop = tf.image.random_crop(image_mask, + self._crop_size + [4]) + image = image_mask_crop[:, :, :-1] + label = tf.reshape(image_mask_crop[:, :, -1], [1] + self._crop_size) + + # Flips image randomly during training. + if self._aug_rand_hflip: + image, _, label = preprocess_ops.random_horizontal_flip( + image, masks=label) + + train_image_size = self._crop_size if self._crop_size else self._output_size + # Resizes and crops image. + image, image_info = preprocess_ops.resize_and_crop_image( + image, + train_image_size, + train_image_size, + aug_scale_min=self._aug_scale_min, + aug_scale_max=self._aug_scale_max) + + # Resizes and crops boxes. + image_scale = image_info[2, :] + offset = image_info[3, :] + + # Pad label and make sure the padded region assigned to the ignore label. + # The label is first offset by +1 and then padded with 0. + label += 1 + label = tf.expand_dims(label, axis=3) + label = preprocess_ops.resize_and_crop_masks( + label, image_scale, train_image_size, offset) + label -= 1 + label = tf.where(tf.equal(label, -1), + self._ignore_label * tf.ones_like(label), label) + label = tf.squeeze(label, axis=0) + valid_mask = tf.not_equal(label, self._ignore_label) + labels = { + 'masks': label, + 'valid_masks': valid_mask, + 'image_info': image_info, + } + + # Cast image as self._dtype + image = tf.cast(image, dtype=self._dtype) + + return image, labels + + def _parse_eval_data(self, data): + """Parses data for training and evaluation.""" + image, label = self._prepare_image_and_label(data) + # The label is first offset by +1 and then padded with 0. + label += 1 + label = tf.expand_dims(label, axis=3) + + # Resizes and crops image. + image, image_info = preprocess_ops.resize_and_crop_image( + image, self._output_size, self._output_size) + + if self._resize_eval_groundtruth: + # Resizes eval masks to match input image sizes. In that case, mean IoU + # is computed on output_size not the original size of the images. + image_scale = image_info[2, :] + offset = image_info[3, :] + label = preprocess_ops.resize_and_crop_masks(label, image_scale, + self._output_size, offset) + else: + label = tf.image.pad_to_bounding_box( + label, 0, 0, self._groundtruth_padded_size[0], + self._groundtruth_padded_size[1]) + + label -= 1 + label = tf.where(tf.equal(label, -1), + self._ignore_label * tf.ones_like(label), label) + label = tf.squeeze(label, axis=0) + + valid_mask = tf.not_equal(label, self._ignore_label) + labels = { + 'masks': label, + 'valid_masks': valid_mask, + 'image_info': image_info + } + + # Cast image as self._dtype + image = tf.cast(image, dtype=self._dtype) + + return image, labels diff --git a/official/vision/beta/dataloaders/tf_example_decoder.py b/official/vision/beta/dataloaders/tf_example_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..5d588035c50bb1b6ad1c0ba678cc280aa89511e2 --- /dev/null +++ b/official/vision/beta/dataloaders/tf_example_decoder.py @@ -0,0 +1,172 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tensorflow Example proto decoder for object detection. + +A decoder to decode string tensors containing serialized tensorflow.Example +protos for object detection. +""" +import tensorflow as tf + +from official.vision.beta.dataloaders import decoder + + +def _generate_source_id(image_bytes): + return tf.strings.as_string( + tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 63 - 1)) + + +class TfExampleDecoder(decoder.Decoder): + """Tensorflow Example proto decoder.""" + + def __init__(self, + include_mask=False, + regenerate_source_id=False, + mask_binarize_threshold=None): + self._include_mask = include_mask + self._regenerate_source_id = regenerate_source_id + self._keys_to_features = { + 'image/encoded': tf.io.FixedLenFeature((), tf.string), + 'image/source_id': tf.io.FixedLenFeature((), tf.string), + 'image/height': tf.io.FixedLenFeature((), tf.int64), + 'image/width': tf.io.FixedLenFeature((), tf.int64), + 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32), + 'image/object/class/label': tf.io.VarLenFeature(tf.int64), + 'image/object/area': tf.io.VarLenFeature(tf.float32), + 'image/object/is_crowd': tf.io.VarLenFeature(tf.int64), + } + self._mask_binarize_threshold = mask_binarize_threshold + if include_mask: + self._keys_to_features.update({ + 'image/object/mask': tf.io.VarLenFeature(tf.string), + }) + + def _decode_image(self, parsed_tensors): + """Decodes the image and set its static shape.""" + image = tf.io.decode_image(parsed_tensors['image/encoded'], channels=3) + image.set_shape([None, None, 3]) + return image + + def _decode_boxes(self, parsed_tensors): + """Concat box coordinates in the format of [ymin, xmin, ymax, xmax].""" + xmin = parsed_tensors['image/object/bbox/xmin'] + xmax = parsed_tensors['image/object/bbox/xmax'] + ymin = parsed_tensors['image/object/bbox/ymin'] + ymax = parsed_tensors['image/object/bbox/ymax'] + return tf.stack([ymin, xmin, ymax, xmax], axis=-1) + + def _decode_classes(self, parsed_tensors): + return parsed_tensors['image/object/class/label'] + + def _decode_areas(self, parsed_tensors): + xmin = parsed_tensors['image/object/bbox/xmin'] + xmax = parsed_tensors['image/object/bbox/xmax'] + ymin = parsed_tensors['image/object/bbox/ymin'] + ymax = parsed_tensors['image/object/bbox/ymax'] + height = tf.cast(parsed_tensors['image/height'], dtype=tf.float32) + width = tf.cast(parsed_tensors['image/width'], dtype=tf.float32) + return tf.cond( + tf.greater(tf.shape(parsed_tensors['image/object/area'])[0], 0), + lambda: parsed_tensors['image/object/area'], + lambda: (xmax - xmin) * (ymax - ymin) * height * width) + + def _decode_masks(self, parsed_tensors): + """Decode a set of PNG masks to the tf.float32 tensors.""" + + def _decode_png_mask(png_bytes): + mask = tf.squeeze( + tf.io.decode_png(png_bytes, channels=1, dtype=tf.uint8), axis=-1) + mask = tf.cast(mask, dtype=tf.float32) + mask.set_shape([None, None]) + return mask + + height = parsed_tensors['image/height'] + width = parsed_tensors['image/width'] + masks = parsed_tensors['image/object/mask'] + return tf.cond( + pred=tf.greater(tf.size(input=masks), 0), + true_fn=lambda: tf.map_fn(_decode_png_mask, masks, dtype=tf.float32), + false_fn=lambda: tf.zeros([0, height, width], dtype=tf.float32)) + + def decode(self, serialized_example): + """Decode the serialized example. + + Args: + serialized_example: a single serialized tf.Example string. + + Returns: + decoded_tensors: a dictionary of tensors with the following fields: + - source_id: a string scalar tensor. + - image: a uint8 tensor of shape [None, None, 3]. + - height: an integer scalar tensor. + - width: an integer scalar tensor. + - groundtruth_classes: a int64 tensor of shape [None]. + - groundtruth_is_crowd: a bool tensor of shape [None]. + - groundtruth_area: a float32 tensor of shape [None]. + - groundtruth_boxes: a float32 tensor of shape [None, 4]. + - groundtruth_instance_masks: a float32 tensor of shape + [None, None, None]. + - groundtruth_instance_masks_png: a string tensor of shape [None]. + """ + parsed_tensors = tf.io.parse_single_example( + serialized=serialized_example, features=self._keys_to_features) + for k in parsed_tensors: + if isinstance(parsed_tensors[k], tf.SparseTensor): + if parsed_tensors[k].dtype == tf.string: + parsed_tensors[k] = tf.sparse.to_dense( + parsed_tensors[k], default_value='') + else: + parsed_tensors[k] = tf.sparse.to_dense( + parsed_tensors[k], default_value=0) + + if self._regenerate_source_id: + source_id = _generate_source_id(parsed_tensors['image/encoded']) + else: + source_id = tf.cond( + tf.greater(tf.strings.length(parsed_tensors['image/source_id']), 0), + lambda: parsed_tensors['image/source_id'], + lambda: _generate_source_id(parsed_tensors['image/encoded'])) + image = self._decode_image(parsed_tensors) + boxes = self._decode_boxes(parsed_tensors) + classes = self._decode_classes(parsed_tensors) + areas = self._decode_areas(parsed_tensors) + is_crowds = tf.cond( + tf.greater(tf.shape(parsed_tensors['image/object/is_crowd'])[0], 0), + lambda: tf.cast(parsed_tensors['image/object/is_crowd'], dtype=tf.bool), + lambda: tf.zeros_like(classes, dtype=tf.bool)) + if self._include_mask: + masks = self._decode_masks(parsed_tensors) + + if self._mask_binarize_threshold is not None: + masks = tf.cast(masks > self._mask_binarize_threshold, tf.float32) + + decoded_tensors = { + 'source_id': source_id, + 'image': image, + 'height': parsed_tensors['image/height'], + 'width': parsed_tensors['image/width'], + 'groundtruth_classes': classes, + 'groundtruth_is_crowd': is_crowds, + 'groundtruth_area': areas, + 'groundtruth_boxes': boxes, + } + if self._include_mask: + decoded_tensors.update({ + 'groundtruth_instance_masks': masks, + 'groundtruth_instance_masks_png': parsed_tensors['image/object/mask'], + }) + return decoded_tensors diff --git a/official/vision/beta/dataloaders/tf_example_decoder_test.py b/official/vision/beta/dataloaders/tf_example_decoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ce140d5f60559e07e0f21c6846d208c3f1711887 --- /dev/null +++ b/official/vision/beta/dataloaders/tf_example_decoder_test.py @@ -0,0 +1,354 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tf_example_decoder.py.""" + +import io +# Import libraries +from absl.testing import parameterized +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.vision.beta.dataloaders import tf_example_decoder + + +DUMP_SOURCE_ID = b'123' + + +def _encode_image(image_array, fmt): + image = Image.fromarray(image_array) + with io.BytesIO() as output: + image.save(output, format=fmt) + return output.getvalue() + + +class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + (100, 100, 0, True), + (100, 100, 1, True), + (100, 100, 2, True), + (100, 100, 0, False), + (100, 100, 1, False), + (100, 100, 2, False), + ) + def test_result_shape(self, + image_height, + image_width, + num_instances, + regenerate_source_id): + decoder = tf_example_decoder.TfExampleDecoder( + include_mask=True, regenerate_source_id=regenerate_source_id) + + image = _encode_image( + np.uint8(np.random.rand(image_height, image_width, 3) * 255), + fmt='JPEG') + if num_instances == 0: + xmins = [] + xmaxs = [] + ymins = [] + ymaxs = [] + labels = [] + areas = [] + is_crowds = [] + masks = [] + else: + xmins = list(np.random.rand(num_instances)) + xmaxs = list(np.random.rand(num_instances)) + ymins = list(np.random.rand(num_instances)) + ymaxs = list(np.random.rand(num_instances)) + labels = list(np.random.randint(100, size=num_instances)) + areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width + for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)] + is_crowds = [0] * num_instances + masks = [] + for _ in range(num_instances): + mask = _encode_image( + np.uint8(np.random.rand(image_height, image_width) * 255), + fmt='PNG') + masks.append(mask) + serialized_example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[image]))), + 'image/source_id': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), + 'image/height': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_height]))), + 'image/width': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_width]))), + 'image/object/bbox/xmin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmins))), + 'image/object/bbox/xmax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmaxs))), + 'image/object/bbox/ymin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymins))), + 'image/object/bbox/ymax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymaxs))), + 'image/object/class/label': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=labels))), + 'image/object/is_crowd': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=is_crowds))), + 'image/object/area': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=areas))), + 'image/object/mask': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=masks))), + })).SerializeToString() + decoded_tensors = decoder.decode( + tf.convert_to_tensor(value=serialized_example)) + + results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) + + self.assertAllEqual( + (image_height, image_width, 3), results['image'].shape) + if not regenerate_source_id: + self.assertEqual(DUMP_SOURCE_ID, results['source_id']) + self.assertEqual(image_height, results['height']) + self.assertEqual(image_width, results['width']) + self.assertAllEqual( + (num_instances,), results['groundtruth_classes'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_is_crowd'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_area'].shape) + self.assertAllEqual( + (num_instances, 4), results['groundtruth_boxes'].shape) + self.assertAllEqual( + (num_instances, image_height, image_width), + results['groundtruth_instance_masks'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_instance_masks_png'].shape) + + def test_result_content(self): + decoder = tf_example_decoder.TfExampleDecoder(include_mask=True) + + image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]], + [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], + [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], + [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] + image = _encode_image(np.uint8(image_content), fmt='PNG') + image_height = 4 + image_width = 4 + num_instances = 2 + xmins = [0, 0.25] + xmaxs = [0.5, 1.0] + ymins = [0, 0] + ymaxs = [0.5, 1.0] + labels = [3, 1] + areas = [ + 0.25 * image_height * image_width, 0.75 * image_height * image_width + ] + is_crowds = [1, 0] + mask_content = [[[255, 255, 0, 0], + [255, 255, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 255, 255, 255], + [0, 255, 255, 255], + [0, 255, 255, 255], + [0, 255, 255, 255]]] + masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] + serialized_example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[image]))), + 'image/source_id': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), + 'image/height': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_height]))), + 'image/width': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_width]))), + 'image/object/bbox/xmin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmins))), + 'image/object/bbox/xmax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmaxs))), + 'image/object/bbox/ymin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymins))), + 'image/object/bbox/ymax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymaxs))), + 'image/object/class/label': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=labels))), + 'image/object/is_crowd': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=is_crowds))), + 'image/object/area': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=areas))), + 'image/object/mask': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=masks))), + })).SerializeToString() + decoded_tensors = decoder.decode( + tf.convert_to_tensor(value=serialized_example)) + + results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) + + self.assertAllEqual( + (image_height, image_width, 3), results['image'].shape) + self.assertAllEqual(image_content, results['image']) + self.assertEqual(DUMP_SOURCE_ID, results['source_id']) + self.assertEqual(image_height, results['height']) + self.assertEqual(image_width, results['width']) + self.assertAllEqual( + (num_instances,), results['groundtruth_classes'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_is_crowd'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_area'].shape) + self.assertAllEqual( + (num_instances, 4), results['groundtruth_boxes'].shape) + self.assertAllEqual( + (num_instances, image_height, image_width), + results['groundtruth_instance_masks'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_instance_masks_png'].shape) + self.assertAllEqual( + [3, 1], results['groundtruth_classes']) + self.assertAllEqual( + [True, False], results['groundtruth_is_crowd']) + self.assertNDArrayNear( + [0.25 * image_height * image_width, 0.75 * image_height * image_width], + results['groundtruth_area'], 1e-4) + self.assertNDArrayNear( + [[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]], + results['groundtruth_boxes'], 1e-4) + self.assertNDArrayNear( + mask_content, results['groundtruth_instance_masks'], 1e-4) + self.assertAllEqual( + masks, results['groundtruth_instance_masks_png']) + + def test_handling_missing_fields(self): + decoder = tf_example_decoder.TfExampleDecoder(include_mask=True) + + image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]], + [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], + [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], + [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] + image = _encode_image(np.uint8(image_content), fmt='PNG') + image_height = 4 + image_width = 4 + num_instances = 2 + xmins = [0, 0.25] + xmaxs = [0.5, 1.0] + ymins = [0, 0] + ymaxs = [0.5, 1.0] + labels = [3, 1] + mask_content = [[[255, 255, 0, 0], + [255, 255, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 255, 255, 255], + [0, 255, 255, 255], + [0, 255, 255, 255], + [0, 255, 255, 255]]] + masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] + serialized_example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[image]))), + 'image/source_id': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), + 'image/height': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_height]))), + 'image/width': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_width]))), + 'image/object/bbox/xmin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmins))), + 'image/object/bbox/xmax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmaxs))), + 'image/object/bbox/ymin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymins))), + 'image/object/bbox/ymax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymaxs))), + 'image/object/class/label': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=labels))), + 'image/object/mask': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=masks))), + })).SerializeToString() + decoded_tensors = decoder.decode( + tf.convert_to_tensor(serialized_example)) + results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) + + self.assertAllEqual( + (image_height, image_width, 3), results['image'].shape) + self.assertAllEqual(image_content, results['image']) + self.assertEqual(DUMP_SOURCE_ID, results['source_id']) + self.assertEqual(image_height, results['height']) + self.assertEqual(image_width, results['width']) + self.assertAllEqual( + (num_instances,), results['groundtruth_classes'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_is_crowd'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_area'].shape) + self.assertAllEqual( + (num_instances, 4), results['groundtruth_boxes'].shape) + self.assertAllEqual( + (num_instances, image_height, image_width), + results['groundtruth_instance_masks'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_instance_masks_png'].shape) + self.assertAllEqual( + [3, 1], results['groundtruth_classes']) + self.assertAllEqual( + [False, False], results['groundtruth_is_crowd']) + self.assertNDArrayNear( + [0.25 * image_height * image_width, 0.75 * image_height * image_width], + results['groundtruth_area'], 1e-4) + self.assertNDArrayNear( + [[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]], + results['groundtruth_boxes'], 1e-4) + self.assertNDArrayNear( + mask_content, results['groundtruth_instance_masks'], 1e-4) + self.assertAllEqual( + masks, results['groundtruth_instance_masks_png']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/dataloaders/tf_example_label_map_decoder.py b/official/vision/beta/dataloaders/tf_example_label_map_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..14ebd2f831fd906e9d324f9b344750a2629384b5 --- /dev/null +++ b/official/vision/beta/dataloaders/tf_example_label_map_decoder.py @@ -0,0 +1,67 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tensorflow Example proto decoder for object detection. + +A decoder to decode string tensors containing serialized tensorflow.Example +protos for object detection. +""" +import csv +# Import libraries +import tensorflow as tf + +from official.vision.beta.dataloaders import tf_example_decoder + + +class TfExampleDecoderLabelMap(tf_example_decoder.TfExampleDecoder): + """Tensorflow Example proto decoder.""" + + def __init__(self, label_map, include_mask=False, regenerate_source_id=False, + mask_binarize_threshold=None): + super(TfExampleDecoderLabelMap, self).__init__( + include_mask=include_mask, regenerate_source_id=regenerate_source_id, + mask_binarize_threshold=mask_binarize_threshold) + self._keys_to_features.update({ + 'image/object/class/text': tf.io.VarLenFeature(tf.string), + }) + name_to_id = self._process_label_map(label_map) + self._name_to_id_table = tf.lookup.StaticHashTable( + tf.lookup.KeyValueTensorInitializer( + keys=tf.constant(list(name_to_id.keys()), dtype=tf.string), + values=tf.constant(list(name_to_id.values()), dtype=tf.int64)), + default_value=-1) + + def _process_label_map(self, label_map): + if label_map.endswith('.csv'): + name_to_id = self._process_csv(label_map) + else: + raise ValueError('The label map file is in incorrect format.') + return name_to_id + + def _process_csv(self, label_map): + name_to_id = {} + with tf.io.gfile.GFile(label_map, 'r') as f: + reader = csv.reader(f, delimiter=',') + for row in reader: + if len(row) != 2: + raise ValueError('Each row of the csv label map file must be in ' + '`id,name` format. length = {}'.format(len(row))) + id_index = int(row[0]) + name = row[1] + name_to_id[name] = id_index + return name_to_id + + def _decode_classes(self, parsed_tensors): + return self._name_to_id_table.lookup( + parsed_tensors['image/object/class/text']) diff --git a/official/vision/beta/dataloaders/tf_example_label_map_decoder_test.py b/official/vision/beta/dataloaders/tf_example_label_map_decoder_test.py new file mode 100644 index 0000000000000000000000000000000000000000..900826ba5dd890042195db9c3b2405132e3b44ce --- /dev/null +++ b/official/vision/beta/dataloaders/tf_example_label_map_decoder_test.py @@ -0,0 +1,268 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tf_example_label_map_decoder.py.""" + +import io +import os +# Import libraries +from absl.testing import parameterized +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.vision.beta.dataloaders import tf_example_label_map_decoder + + +DUMP_SOURCE_ID = b'123' +LABEL_MAP_CSV_CONTENT = '0,class_0\n1,class_1\n2,class_2' + + +def _encode_image(image_array, fmt): + image = Image.fromarray(image_array) + with io.BytesIO() as output: + image.save(output, format=fmt) + return output.getvalue() + + +class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + (100, 100, 0), + (100, 100, 1), + (100, 100, 2), + (100, 100, 0), + (100, 100, 1), + (100, 100, 2), + ) + def test_result_shape(self, image_height, image_width, num_instances): + label_map_dir = self.get_temp_dir() + label_map_name = 'label_map.csv' + label_map_path = os.path.join(label_map_dir, label_map_name) + with open(label_map_path, 'w') as f: + f.write(LABEL_MAP_CSV_CONTENT) + + decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( + label_map_path, include_mask=True) + + image = _encode_image( + np.uint8(np.random.rand(image_height, image_width, 3) * 255), + fmt='JPEG') + if num_instances == 0: + xmins = [] + xmaxs = [] + ymins = [] + ymaxs = [] + labels = [] + areas = [] + is_crowds = [] + masks = [] + else: + xmins = list(np.random.rand(num_instances)) + xmaxs = list(np.random.rand(num_instances)) + ymins = list(np.random.rand(num_instances)) + ymaxs = list(np.random.rand(num_instances)) + labels = list(np.random.randint(100, size=num_instances)) + areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width + for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)] + is_crowds = [0] * num_instances + masks = [] + labels = [b'class_1'] * num_instances + for _ in range(num_instances): + mask = _encode_image( + np.uint8(np.random.rand(image_height, image_width) * 255), + fmt='PNG') + masks.append(mask) + serialized_example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[image]))), + 'image/source_id': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), + 'image/height': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_height]))), + 'image/width': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_width]))), + 'image/object/bbox/xmin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmins))), + 'image/object/bbox/xmax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmaxs))), + 'image/object/bbox/ymin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymins))), + 'image/object/bbox/ymax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymaxs))), + 'image/object/class/text': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=labels))), + 'image/object/is_crowd': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=is_crowds))), + 'image/object/area': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=areas))), + 'image/object/mask': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=masks))), + })).SerializeToString() + decoded_tensors = decoder.decode( + tf.convert_to_tensor(value=serialized_example)) + + results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) + + self.assertAllEqual( + (image_height, image_width, 3), results['image'].shape) + self.assertEqual(DUMP_SOURCE_ID, results['source_id']) + self.assertEqual(image_height, results['height']) + self.assertEqual(image_width, results['width']) + self.assertAllEqual( + (num_instances,), results['groundtruth_classes'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_is_crowd'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_area'].shape) + self.assertAllEqual( + (num_instances, 4), results['groundtruth_boxes'].shape) + self.assertAllEqual( + (num_instances, image_height, image_width), + results['groundtruth_instance_masks'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_instance_masks_png'].shape) + + def test_result_content(self): + label_map_dir = self.get_temp_dir() + label_map_name = 'label_map.csv' + label_map_path = os.path.join(label_map_dir, label_map_name) + with open(label_map_path, 'w') as f: + f.write(LABEL_MAP_CSV_CONTENT) + + decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( + label_map_path, include_mask=True) + + image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]], + [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], + [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], + [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] + image = _encode_image(np.uint8(image_content), fmt='PNG') + image_height = 4 + image_width = 4 + num_instances = 2 + xmins = [0, 0.25] + xmaxs = [0.5, 1.0] + ymins = [0, 0] + ymaxs = [0.5, 1.0] + labels = [b'class_2', b'class_0'] + areas = [ + 0.25 * image_height * image_width, 0.75 * image_height * image_width + ] + is_crowds = [1, 0] + mask_content = [[[255, 255, 0, 0], + [255, 255, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]], + [[0, 255, 255, 255], + [0, 255, 255, 255], + [0, 255, 255, 255], + [0, 255, 255, 255]]] + masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] + serialized_example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[image]))), + 'image/source_id': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), + 'image/height': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_height]))), + 'image/width': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=[image_width]))), + 'image/object/bbox/xmin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmins))), + 'image/object/bbox/xmax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=xmaxs))), + 'image/object/bbox/ymin': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymins))), + 'image/object/bbox/ymax': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=ymaxs))), + 'image/object/class/text': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=labels))), + 'image/object/is_crowd': ( + tf.train.Feature( + int64_list=tf.train.Int64List(value=is_crowds))), + 'image/object/area': ( + tf.train.Feature( + float_list=tf.train.FloatList(value=areas))), + 'image/object/mask': ( + tf.train.Feature( + bytes_list=tf.train.BytesList(value=masks))), + })).SerializeToString() + decoded_tensors = decoder.decode( + tf.convert_to_tensor(value=serialized_example)) + + results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) + + self.assertAllEqual( + (image_height, image_width, 3), results['image'].shape) + self.assertAllEqual(image_content, results['image']) + self.assertEqual(DUMP_SOURCE_ID, results['source_id']) + self.assertEqual(image_height, results['height']) + self.assertEqual(image_width, results['width']) + self.assertAllEqual( + (num_instances,), results['groundtruth_classes'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_is_crowd'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_area'].shape) + self.assertAllEqual( + (num_instances, 4), results['groundtruth_boxes'].shape) + self.assertAllEqual( + (num_instances, image_height, image_width), + results['groundtruth_instance_masks'].shape) + self.assertAllEqual( + (num_instances,), results['groundtruth_instance_masks_png'].shape) + self.assertAllEqual( + [2, 0], results['groundtruth_classes']) + self.assertAllEqual( + [True, False], results['groundtruth_is_crowd']) + self.assertNDArrayNear( + [0.25 * image_height * image_width, 0.75 * image_height * image_width], + results['groundtruth_area'], 1e-4) + self.assertNDArrayNear( + [[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]], + results['groundtruth_boxes'], 1e-4) + self.assertNDArrayNear( + mask_content, results['groundtruth_instance_masks'], 1e-4) + self.assertAllEqual( + masks, results['groundtruth_instance_masks_png']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/dataloaders/tfds_classification_decoders.py b/official/vision/beta/dataloaders/tfds_classification_decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..36f6e28f734a41509944caa1dbfd09911f1a3acc --- /dev/null +++ b/official/vision/beta/dataloaders/tfds_classification_decoders.py @@ -0,0 +1,38 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFDS Classification decoders.""" + +import tensorflow as tf +from official.vision.beta.dataloaders import decoder + + +class ClassificationDecorder(decoder.Decoder): + """A tf.Example decoder for tfds classification datasets.""" + + def decode(self, serialized_example): + sample_dict = { + 'image/encoded': + tf.io.encode_jpeg(serialized_example['image'], quality=100), + 'image/class/label': + serialized_example['label'], + } + return sample_dict + + +TFDS_ID_TO_DECODER_MAP = { + 'cifar10': ClassificationDecorder, + 'cifar100': ClassificationDecorder, + 'imagenet2012': ClassificationDecorder, +} diff --git a/official/vision/beta/dataloaders/tfds_detection_decoders.py b/official/vision/beta/dataloaders/tfds_detection_decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..fef7d2f24ef42d2f6f33c29ea35e516a71bbd345 --- /dev/null +++ b/official/vision/beta/dataloaders/tfds_detection_decoders.py @@ -0,0 +1,60 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFDS detection decoders.""" + +import tensorflow as tf +from official.vision.beta.dataloaders import decoder + + +class MSCOCODecoder(decoder.Decoder): + """A tf.Example decoder for tfds coco datasets.""" + + def decode(self, serialized_example): + """Decode the serialized example. + + Args: + serialized_example: a dictonary example produced by tfds. + + Returns: + decoded_tensors: a dictionary of tensors with the following fields: + - source_id: a string scalar tensor. + - image: a uint8 tensor of shape [None, None, 3]. + - height: an integer scalar tensor. + - width: an integer scalar tensor. + - groundtruth_classes: a int64 tensor of shape [None]. + - groundtruth_is_crowd: a bool tensor of shape [None]. + - groundtruth_area: a float32 tensor of shape [None]. + - groundtruth_boxes: a float32 tensor of shape [None, 4]. + """ + + decoded_tensors = { + 'source_id': tf.strings.as_string(serialized_example['image/id']), + 'image': serialized_example['image'], + 'height': tf.cast(tf.shape(serialized_example['image'])[0], tf.int64), + 'width': tf.cast(tf.shape(serialized_example['image'])[1], tf.int64), + 'groundtruth_classes': serialized_example['objects']['label'], + 'groundtruth_is_crowd': serialized_example['objects']['is_crowd'], + 'groundtruth_area': tf.cast( + serialized_example['objects']['area'], tf.float32), + 'groundtruth_boxes': serialized_example['objects']['bbox'], + } + return decoded_tensors + + +TFDS_ID_TO_DECODER_MAP = { + 'coco/2017': MSCOCODecoder, + 'coco/2014': MSCOCODecoder, + 'coco': MSCOCODecoder +} diff --git a/official/vision/beta/dataloaders/tfds_segmentation_decoders.py b/official/vision/beta/dataloaders/tfds_segmentation_decoders.py new file mode 100644 index 0000000000000000000000000000000000000000..4b6985fcdbda28282821e3952ca3661bbaf096b4 --- /dev/null +++ b/official/vision/beta/dataloaders/tfds_segmentation_decoders.py @@ -0,0 +1,86 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFDS Semantic Segmentation decoders.""" + +import tensorflow as tf +from official.vision.beta.dataloaders import decoder + + +class CityScapesDecorder(decoder.Decoder): + """A tf.Example decoder for tfds cityscapes datasets.""" + + def __init__(self): + # Original labels to trainable labels map, 255 is the ignore class. + self._label_map = { + -1: 255, + 0: 255, + 1: 255, + 2: 255, + 3: 255, + 4: 255, + 5: 255, + 6: 255, + 7: 0, + 8: 1, + 9: 255, + 10: 255, + 11: 2, + 12: 3, + 13: 4, + 14: 255, + 15: 255, + 16: 255, + 17: 5, + 18: 255, + 19: 6, + 20: 7, + 21: 8, + 22: 9, + 23: 10, + 24: 11, + 25: 12, + 26: 13, + 27: 14, + 28: 15, + 29: 255, + 30: 255, + 31: 16, + 32: 17, + 33: 18, + } + + def decode(self, serialized_example): + # Convert labels according to the self._label_map + label = serialized_example['segmentation_label'] + for original_label in self._label_map: + label = tf.where(label == original_label, + self._label_map[original_label] * tf.ones_like(label), + label) + sample_dict = { + 'image/encoded': + tf.io.encode_jpeg(serialized_example['image_left'], quality=100), + 'image/height': serialized_example['image_left'].shape[0], + 'image/width': serialized_example['image_left'].shape[1], + 'image/segmentation/class/encoded': + tf.io.encode_png(label), + } + return sample_dict + + +TFDS_ID_TO_DECODER_MAP = { + 'cityscapes': CityScapesDecorder, + 'cityscapes/semantic_segmentation': CityScapesDecorder, + 'cityscapes/semantic_segmentation_extra': CityScapesDecorder, +} diff --git a/official/vision/beta/dataloaders/tfexample_utils.py b/official/vision/beta/dataloaders/tfexample_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b64d24ff35bfdb907d12a24d85fb8a111f424cee --- /dev/null +++ b/official/vision/beta/dataloaders/tfexample_utils.py @@ -0,0 +1,143 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Utility functions to create tf.Example and tf.SequnceExample for test. + +Example:video classification end-to-end test +i.e. from reading input file to train and eval. + +```python +class FooTrainTest(tf.test.TestCase): + + def setUp(self): + super(TrainTest, self).setUp() + + # Write the fake tf.train.SequenceExample to file for test. + data_dir = os.path.join(self.get_temp_dir(), 'data') + tf.io.gfile.makedirs(data_dir) + self._data_path = os.path.join(data_dir, 'data.tfrecord') + examples = [ + tfexample_utils.make_video_test_example( + image_shape=(36, 36, 3), + audio_shape=(20, 128), + label=random.randint(0, 100)) for _ in range(2) + ] + tfexample_utils.dump_to_tfrecord(self._data_path, tf_examples=examples) + + def test_foo(self): + dataset = tf.data.TFRecordDataset(self._data_path) + ... + +``` + +""" +import io +from typing import Sequence, Union + +import numpy as np +from PIL import Image +import tensorflow as tf + +IMAGE_KEY = 'image/encoded' +CLASSIFICATION_LABEL_KEY = 'image/class/label' +LABEL_KEY = 'clip/label/index' +AUDIO_KEY = 'features/audio' + + +def make_image_bytes(shape: Sequence[int]): + """Generates image and return bytes in JPEG format.""" + random_image = np.random.randint(0, 256, size=shape, dtype=np.uint8) + random_image = Image.fromarray(random_image) + with io.BytesIO() as buffer: + random_image.save(buffer, format='JPEG') + raw_image_bytes = buffer.getvalue() + return raw_image_bytes + + +def put_int64_to_context(seq_example: tf.train.SequenceExample, + label: int = 0, + key: str = LABEL_KEY): + """Puts int64 to SequenceExample context with key.""" + seq_example.context.feature[key].int64_list.value[:] = [label] + + +def put_bytes_list_to_feature(seq_example: tf.train.SequenceExample, + raw_image_bytes: bytes, + key: str = IMAGE_KEY, + repeat_num: int = 2): + """Puts bytes list to SequenceExample context with key.""" + for _ in range(repeat_num): + seq_example.feature_lists.feature_list.get_or_create( + key).feature.add().bytes_list.value[:] = [raw_image_bytes] + + +def put_float_list_to_feature(seq_example: tf.train.SequenceExample, + value: Sequence[Sequence[float]], key: str): + """Puts float list to SequenceExample context with key.""" + for s in value: + seq_example.feature_lists.feature_list.get_or_create( + key).feature.add().float_list.value[:] = s + + +def make_video_test_example(image_shape: Sequence[int] = (263, 320, 3), + audio_shape: Sequence[int] = (10, 256), + label: int = 42): + """Generates data for testing video models (inc. RGB, audio, & label).""" + raw_image_bytes = make_image_bytes(shape=image_shape) + random_audio = np.random.normal(size=audio_shape).tolist() + + seq_example = tf.train.SequenceExample() + put_int64_to_context(seq_example, label=label, key=LABEL_KEY) + put_bytes_list_to_feature( + seq_example, raw_image_bytes, key=IMAGE_KEY, repeat_num=4) + + put_float_list_to_feature(seq_example, value=random_audio, key=AUDIO_KEY) + return seq_example + + +def dump_to_tfrecord(record_file: str, + tf_examples: Sequence[Union[tf.train.Example, + tf.train.SequenceExample]]): + """Writes serialized Example to TFRecord file with path.""" + with tf.io.TFRecordWriter(record_file) as writer: + for tf_example in tf_examples: + writer.write(tf_example.SerializeToString()) + + +def _encode_image(image_array: np.ndarray, fmt: str) -> bytes: + """Util function to encode an image.""" + image = Image.fromarray(image_array) + with io.BytesIO() as output: + image.save(output, format=fmt) + return output.getvalue() + + +def create_classification_example( + image_height: int, + image_width: int, + is_multilabel: bool = False) -> tf.train.Example: + """Creates image and labels for image classification input pipeline.""" + image = _encode_image( + np.uint8(np.random.rand(image_height, image_width, 3) * 255), fmt='JPEG') + labels = [0, 1] if is_multilabel else [0] + serialized_example = tf.train.Example( + features=tf.train.Features( + feature={ + IMAGE_KEY: (tf.train.Feature( + bytes_list=tf.train.BytesList(value=[image]))), + CLASSIFICATION_LABEL_KEY: (tf.train.Feature( + int64_list=tf.train.Int64List(value=labels))), + })).SerializeToString() + return serialized_example diff --git a/official/vision/beta/dataloaders/utils.py b/official/vision/beta/dataloaders/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..51a62ebb00dc00885c33c0232f7b37d92ce598ab --- /dev/null +++ b/official/vision/beta/dataloaders/utils.py @@ -0,0 +1,65 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data loader utils.""" +from typing import Dict + +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import preprocess_ops + + +def process_source_id(source_id: tf.Tensor) -> tf.Tensor: + """Processes source_id to the right format. + + Args: + source_id: A `tf.Tensor` that contains the source ID. It can be empty. + + Returns: + A formatted source ID. + """ + if source_id.dtype == tf.string: + source_id = tf.cast(tf.strings.to_number(source_id), tf.int64) + with tf.control_dependencies([source_id]): + source_id = tf.cond( + pred=tf.equal(tf.size(input=source_id), 0), + true_fn=lambda: tf.cast(tf.constant(-1), tf.int64), + false_fn=lambda: tf.identity(source_id)) + return source_id + + +def pad_groundtruths_to_fixed_size(groundtruths: Dict[str, tf.Tensor], + size: int) -> Dict[str, tf.Tensor]: + """Pads the first dimension of groundtruths labels to the fixed size. + + Args: + groundtruths: A dictionary of {`str`: `tf.Tensor`} that contains groundtruth + annotations of `boxes`, `is_crowds`, `areas` and `classes`. + size: An `int` that specifies the expected size of the first dimension of + padded tensors. + + Returns: + A dictionary of the same keys as input and padded tensors as values. + + """ + groundtruths['boxes'] = preprocess_ops.clip_or_pad_to_fixed_size( + groundtruths['boxes'], size, -1) + groundtruths['is_crowds'] = preprocess_ops.clip_or_pad_to_fixed_size( + groundtruths['is_crowds'], size, 0) + groundtruths['areas'] = preprocess_ops.clip_or_pad_to_fixed_size( + groundtruths['areas'], size, -1) + groundtruths['classes'] = preprocess_ops.clip_or_pad_to_fixed_size( + groundtruths['classes'], size, -1) + return groundtruths diff --git a/official/vision/beta/dataloaders/utils_test.py b/official/vision/beta/dataloaders/utils_test.py new file mode 100644 index 0000000000000000000000000000000000000000..98fec9b8ec9faaa6573c2c23711468d3ef4eb867 --- /dev/null +++ b/official/vision/beta/dataloaders/utils_test.py @@ -0,0 +1,63 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for dataloader utils functions.""" + +# Import libraries + +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.dataloaders import utils + + +class UtilsTest(tf.test.TestCase, parameterized.TestCase): + + def test_process_empty_source_id(self): + source_id = tf.constant([], dtype=tf.int64) + source_id = tf.strings.as_string(source_id) + self.assertEqual(-1, utils.process_source_id(source_id=source_id)) + + @parameterized.parameters( + ([128, 256], [128, 256]), + ([128, 32, 16], [128, 32, 16]), + ) + def test_process_source_id(self, source_id, expected_result): + source_id = tf.constant(source_id, dtype=tf.int64) + source_id = tf.strings.as_string(source_id) + self.assertSequenceAlmostEqual(expected_result, + utils.process_source_id(source_id=source_id)) + + @parameterized.parameters( + ([[10, 20, 30, 40]], [[100]], [[0]], 10), + ([[0.1, 0.2, 0.5, 0.6]], [[0.5]], [[1]], 2), + ) + def test_pad_groundtruths_to_fixed_size(self, boxes, area, classes, size): + groundtruths = {} + groundtruths['boxes'] = tf.constant(boxes) + groundtruths['is_crowds'] = tf.constant([[0]]) + groundtruths['areas'] = tf.constant(area) + groundtruths['classes'] = tf.constant(classes) + + actual_result = utils.pad_groundtruths_to_fixed_size( + groundtruths=groundtruths, size=size) + + # Check that the first dimension is padded to the expected size. + for key in actual_result: + pad_shape = actual_result[key].shape[0] + self.assertEqual(size, pad_shape) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/dataloaders/video_input.py b/official/vision/beta/dataloaders/video_input.py new file mode 100644 index 0000000000000000000000000000000000000000..53cc70593127844f86edc5b57c7584db070ec289 --- /dev/null +++ b/official/vision/beta/dataloaders/video_input.py @@ -0,0 +1,369 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Parser for video and label datasets.""" + +from typing import Dict, Optional, Tuple, Union + +from absl import logging +import tensorflow as tf + +from official.vision.beta.configs import video_classification as exp_cfg +from official.vision.beta.dataloaders import decoder +from official.vision.beta.dataloaders import parser +from official.vision.beta.ops import augment +from official.vision.beta.ops import preprocess_ops_3d + +IMAGE_KEY = 'image/encoded' +LABEL_KEY = 'clip/label/index' + + +def process_image(image: tf.Tensor, + is_training: bool = True, + num_frames: int = 32, + stride: int = 1, + random_stride_range: int = 0, + num_test_clips: int = 1, + min_resize: int = 256, + crop_size: int = 224, + num_crops: int = 1, + zero_centering_image: bool = False, + min_aspect_ratio: float = 0.5, + max_aspect_ratio: float = 2, + min_area_ratio: float = 0.49, + max_area_ratio: float = 1.0, + augmenter: Optional[augment.ImageAugment] = None, + seed: Optional[int] = None) -> tf.Tensor: + """Processes a serialized image tensor. + + Args: + image: Input Tensor of shape [timesteps] and type tf.string of serialized + frames. + is_training: Whether or not in training mode. If True, random sample, crop + and left right flip is used. + num_frames: Number of frames per subclip. + stride: Temporal stride to sample frames. + random_stride_range: An int indicating the min and max bounds to uniformly + sample different strides from the video. E.g., a value of 1 with stride=2 + will uniformly sample a stride in {1, 2, 3} for each video in a batch. + Only used enabled training for the purposes of frame-rate augmentation. + Defaults to 0, which disables random sampling. + num_test_clips: Number of test clips (1 by default). If more than 1, this + will sample multiple linearly spaced clips within each video at test time. + If 1, then a single clip in the middle of the video is sampled. The clips + are aggreagated in the batch dimension. + min_resize: Frames are resized so that min(height, width) is min_resize. + crop_size: Final size of the frame after cropping the resized frames. Both + height and width are the same. + num_crops: Number of crops to perform on the resized frames. + zero_centering_image: If True, frames are normalized to values in [-1, 1]. + If False, values in [0, 1]. + min_aspect_ratio: The minimum aspect range for cropping. + max_aspect_ratio: The maximum aspect range for cropping. + min_area_ratio: The minimum area range for cropping. + max_area_ratio: The maximum area range for cropping. + augmenter: Image augmenter to distort each image. + seed: A deterministic seed to use when sampling. + + Returns: + Processed frames. Tensor of shape + [num_frames * num_test_clips, crop_size, crop_size, 3]. + """ + # Validate parameters. + if is_training and num_test_clips != 1: + logging.warning( + '`num_test_clips` %d is ignored since `is_training` is `True`.', + num_test_clips) + + if random_stride_range < 0: + raise ValueError('Random stride range should be >= 0, got {}'.format( + random_stride_range)) + + # Temporal sampler. + if is_training: + if random_stride_range > 0: + # Uniformly sample different frame-rates + stride = tf.random.uniform( + [], + tf.maximum(stride - random_stride_range, 1), + stride + random_stride_range, + dtype=tf.int32) + + # Sample random clip. + image = preprocess_ops_3d.sample_sequence(image, num_frames, True, stride, + seed) + elif num_test_clips > 1: + # Sample linspace clips. + image = preprocess_ops_3d.sample_linspace_sequence(image, num_test_clips, + num_frames, stride) + else: + # Sample middle clip. + image = preprocess_ops_3d.sample_sequence(image, num_frames, False, stride) + + # Decode JPEG string to tf.uint8. + image = preprocess_ops_3d.decode_jpeg(image, 3) + + if is_training: + # Standard image data augmentation: random resized crop and random flip. + image = preprocess_ops_3d.random_crop_resize( + image, crop_size, crop_size, num_frames, 3, + (min_aspect_ratio, max_aspect_ratio), + (min_area_ratio, max_area_ratio)) + image = preprocess_ops_3d.random_flip_left_right(image, seed) + + if augmenter is not None: + image = augmenter.distort(image) + else: + # Resize images (resize happens only if necessary to save compute). + image = preprocess_ops_3d.resize_smallest(image, min_resize) + # Crop of the frames. + image = preprocess_ops_3d.crop_image(image, crop_size, crop_size, False, + num_crops) + + # Cast the frames in float32, normalizing according to zero_centering_image. + return preprocess_ops_3d.normalize_image(image, zero_centering_image) + + +def postprocess_image(image: tf.Tensor, + is_training: bool = True, + num_frames: int = 32, + num_test_clips: int = 1, + num_test_crops: int = 1) -> tf.Tensor: + """Processes a batched Tensor of frames. + + The same parameters used in process should be used here. + + Args: + image: Input Tensor of shape [batch, timesteps, height, width, 3]. + is_training: Whether or not in training mode. If True, random sample, crop + and left right flip is used. + num_frames: Number of frames per subclip. + num_test_clips: Number of test clips (1 by default). If more than 1, this + will sample multiple linearly spaced clips within each video at test time. + If 1, then a single clip in the middle of the video is sampled. The clips + are aggreagated in the batch dimension. + num_test_crops: Number of test crops (1 by default). If more than 1, there + are multiple crops for each clip at test time. If 1, there is a single + central crop. The crops are aggreagated in the batch dimension. + + Returns: + Processed frames. Tensor of shape + [batch * num_test_clips * num_test_crops, num_frames, height, width, 3]. + """ + num_views = num_test_clips * num_test_crops + if num_views > 1 and not is_training: + # In this case, multiple views are merged together in batch dimenstion which + # will be batch * num_views. + image = tf.reshape(image, [-1, num_frames] + image.shape[2:].as_list()) + + return image + + +def process_label(label: tf.Tensor, + one_hot_label: bool = True, + num_classes: Optional[int] = None) -> tf.Tensor: + """Processes label Tensor.""" + # Validate parameters. + if one_hot_label and not num_classes: + raise ValueError( + '`num_classes` should be given when requesting one hot label.') + + # Cast to tf.int32. + label = tf.cast(label, dtype=tf.int32) + + if one_hot_label: + # Replace label index by one hot representation. + label = tf.one_hot(label, num_classes) + if len(label.shape.as_list()) > 1: + label = tf.reduce_sum(label, axis=0) + if num_classes == 1: + # The trick for single label. + label = 1 - label + + return label + + +class Decoder(decoder.Decoder): + """A tf.Example decoder for classification task.""" + + def __init__(self, image_key: str = IMAGE_KEY, label_key: str = LABEL_KEY): + self._context_description = { + # One integer stored in context. + label_key: tf.io.VarLenFeature(tf.int64), + } + self._sequence_description = { + # Each image is a string encoding JPEG. + image_key: tf.io.FixedLenSequenceFeature((), tf.string), + } + + def add_feature(self, feature_name: str, + feature_type: Union[tf.io.VarLenFeature, + tf.io.FixedLenFeature, + tf.io.FixedLenSequenceFeature]): + self._sequence_description[feature_name] = feature_type + + def add_context(self, feature_name: str, + feature_type: Union[tf.io.VarLenFeature, + tf.io.FixedLenFeature, + tf.io.FixedLenSequenceFeature]): + self._context_description[feature_name] = feature_type + + def decode(self, serialized_example): + """Parses a single tf.Example into image and label tensors.""" + result = {} + context, sequences = tf.io.parse_single_sequence_example( + serialized_example, self._context_description, + self._sequence_description) + result.update(context) + result.update(sequences) + for key, value in result.items(): + if isinstance(value, tf.SparseTensor): + result[key] = tf.sparse.to_dense(value) + return result + + +class Parser(parser.Parser): + """Parses a video and label dataset.""" + + def __init__(self, + input_params: exp_cfg.DataConfig, + image_key: str = IMAGE_KEY, + label_key: str = LABEL_KEY): + self._num_frames = input_params.feature_shape[0] + self._stride = input_params.temporal_stride + self._random_stride_range = input_params.random_stride_range + self._num_test_clips = input_params.num_test_clips + self._min_resize = input_params.min_image_size + self._crop_size = input_params.feature_shape[1] + self._num_crops = input_params.num_test_crops + self._one_hot_label = input_params.one_hot + self._num_classes = input_params.num_classes + self._image_key = image_key + self._label_key = label_key + self._dtype = tf.dtypes.as_dtype(input_params.dtype) + self._output_audio = input_params.output_audio + self._min_aspect_ratio = input_params.aug_min_aspect_ratio + self._max_aspect_ratio = input_params.aug_max_aspect_ratio + self._min_area_ratio = input_params.aug_min_area_ratio + self._max_area_ratio = input_params.aug_max_area_ratio + if self._output_audio: + self._audio_feature = input_params.audio_feature + self._audio_shape = input_params.audio_feature_shape + + self._augmenter = None + if input_params.aug_type is not None: + aug_type = input_params.aug_type + if aug_type == 'autoaug': + logging.info('Using AutoAugment.') + self._augmenter = augment.AutoAugment() + elif aug_type == 'randaug': + logging.info('Using RandAugment.') + self._augmenter = augment.RandAugment() + else: + raise ValueError('Augmentation policy {} is not supported.'.format( + aug_type)) + + def _parse_train_data( + self, decoded_tensors: Dict[str, tf.Tensor] + ) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]: + """Parses data for training.""" + # Process image and label. + image = decoded_tensors[self._image_key] + image = process_image( + image=image, + is_training=True, + num_frames=self._num_frames, + stride=self._stride, + random_stride_range=self._random_stride_range, + num_test_clips=self._num_test_clips, + min_resize=self._min_resize, + crop_size=self._crop_size, + min_aspect_ratio=self._min_aspect_ratio, + max_aspect_ratio=self._max_aspect_ratio, + min_area_ratio=self._min_area_ratio, + max_area_ratio=self._max_area_ratio, + augmenter=self._augmenter) + image = tf.cast(image, dtype=self._dtype) + + features = {'image': image} + + label = decoded_tensors[self._label_key] + label = process_label(label, self._one_hot_label, self._num_classes) + + if self._output_audio: + audio = decoded_tensors[self._audio_feature] + audio = tf.cast(audio, dtype=self._dtype) + # TODO(yeqing): synchronize audio/video sampling. Especially randomness. + audio = preprocess_ops_3d.sample_sequence( + audio, self._audio_shape[0], random=False, stride=1) + audio = tf.ensure_shape(audio, self._audio_shape) + features['audio'] = audio + + return features, label + + def _parse_eval_data( + self, decoded_tensors: Dict[str, tf.Tensor] + ) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]: + """Parses data for evaluation.""" + image = decoded_tensors[self._image_key] + image = process_image( + image=image, + is_training=False, + num_frames=self._num_frames, + stride=self._stride, + num_test_clips=self._num_test_clips, + min_resize=self._min_resize, + crop_size=self._crop_size, + num_crops=self._num_crops) + image = tf.cast(image, dtype=self._dtype) + features = {'image': image} + + label = decoded_tensors[self._label_key] + label = process_label(label, self._one_hot_label, self._num_classes) + + if self._output_audio: + audio = decoded_tensors[self._audio_feature] + audio = tf.cast(audio, dtype=self._dtype) + audio = preprocess_ops_3d.sample_sequence( + audio, 20, random=False, stride=1) + audio = tf.ensure_shape(audio, self._audio_shape) + features['audio'] = audio + + return features, label + + +class PostBatchProcessor(object): + """Processes a video and label dataset which is batched.""" + + def __init__(self, input_params: exp_cfg.DataConfig): + self._is_training = input_params.is_training + + self._num_frames = input_params.feature_shape[0] + self._num_test_clips = input_params.num_test_clips + self._num_test_crops = input_params.num_test_crops + + def __call__(self, features: Dict[str, tf.Tensor], + label: tf.Tensor) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]: + """Parses a single tf.Example into image and label tensors.""" + for key in ['image']: + if key in features: + features[key] = postprocess_image( + image=features[key], + is_training=self._is_training, + num_frames=self._num_frames, + num_test_clips=self._num_test_clips, + num_test_crops=self._num_test_crops) + + return features, label diff --git a/official/vision/beta/dataloaders/video_input_test.py b/official/vision/beta/dataloaders/video_input_test.py new file mode 100644 index 0000000000000000000000000000000000000000..4314fcfb92e8a8e736e5b2fc6fe222be52bf17e2 --- /dev/null +++ b/official/vision/beta/dataloaders/video_input_test.py @@ -0,0 +1,184 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +import io + +# Import libraries +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.vision.beta.configs import video_classification as exp_cfg +from official.vision.beta.dataloaders import video_input + + +AUDIO_KEY = 'features/audio' + + +def fake_seq_example(): + # Create fake data. + random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8) + random_image = Image.fromarray(random_image) + label = 42 + with io.BytesIO() as buffer: + random_image.save(buffer, format='JPEG') + raw_image_bytes = buffer.getvalue() + + seq_example = tf.train.SequenceExample() + seq_example.feature_lists.feature_list.get_or_create( + video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [ + raw_image_bytes + ] + seq_example.feature_lists.feature_list.get_or_create( + video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [ + raw_image_bytes + ] + seq_example.context.feature[video_input.LABEL_KEY].int64_list.value[:] = [ + label + ] + + random_audio = np.random.normal(size=(10, 256)).tolist() + for s in random_audio: + seq_example.feature_lists.feature_list.get_or_create( + AUDIO_KEY).feature.add().float_list.value[:] = s + return seq_example, label + + +class DecoderTest(tf.test.TestCase): + """A tf.SequenceExample decoder for the video classification task.""" + + def test_decoder(self): + decoder = video_input.Decoder() + + seq_example, label = fake_seq_example() + serialized_example = seq_example.SerializeToString() + + decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example)) + results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) + self.assertCountEqual([video_input.IMAGE_KEY, video_input.LABEL_KEY], + results.keys()) + self.assertEqual(label, results[video_input.LABEL_KEY]) + + def test_decode_audio(self): + decoder = video_input.Decoder() + decoder.add_feature(AUDIO_KEY, tf.io.VarLenFeature(dtype=tf.float32)) + + seq_example, label = fake_seq_example() + serialized_example = seq_example.SerializeToString() + + decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example)) + results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors) + self.assertCountEqual( + [video_input.IMAGE_KEY, video_input.LABEL_KEY, AUDIO_KEY], + results.keys()) + self.assertEqual(label, results[video_input.LABEL_KEY]) + self.assertEqual(results[AUDIO_KEY].shape, (10, 256)) + + +class VideoAndLabelParserTest(tf.test.TestCase): + + def test_video_input(self): + params = exp_cfg.kinetics600(is_training=True) + params.feature_shape = (2, 224, 224, 3) + params.min_image_size = 224 + + decoder = video_input.Decoder() + parser = video_input.Parser(params).parse_fn(params.is_training) + + seq_example, label = fake_seq_example() + + input_tensor = tf.constant(seq_example.SerializeToString()) + decoded_tensors = decoder.decode(input_tensor) + output_tensor = parser(decoded_tensors) + image_features, label = output_tensor + image = image_features['image'] + + self.assertAllEqual(image.shape, (2, 224, 224, 3)) + self.assertAllEqual(label.shape, (600,)) + + def test_video_audio_input(self): + params = exp_cfg.kinetics600(is_training=True) + params.feature_shape = (2, 224, 224, 3) + params.min_image_size = 224 + params.output_audio = True + params.audio_feature = AUDIO_KEY + params.audio_feature_shape = (15, 256) + + decoder = video_input.Decoder() + decoder.add_feature(params.audio_feature, + tf.io.VarLenFeature(dtype=tf.float32)) + parser = video_input.Parser(params).parse_fn(params.is_training) + + seq_example, label = fake_seq_example() + + input_tensor = tf.constant(seq_example.SerializeToString()) + decoded_tensors = decoder.decode(input_tensor) + output_tensor = parser(decoded_tensors) + features, label = output_tensor + image = features['image'] + audio = features['audio'] + + self.assertAllEqual(image.shape, (2, 224, 224, 3)) + self.assertAllEqual(label.shape, (600,)) + self.assertEqual(audio.shape, (15, 256)) + + def test_video_input_random_stride(self): + params = exp_cfg.kinetics600(is_training=True) + params.feature_shape = (2, 224, 224, 3) + params.min_image_size = 224 + + params.temporal_stride = 2 + params.random_stride_range = 1 + + decoder = video_input.Decoder() + parser = video_input.Parser(params).parse_fn(params.is_training) + + seq_example, label = fake_seq_example() + + input_tensor = tf.constant(seq_example.SerializeToString()) + decoded_tensors = decoder.decode(input_tensor) + output_tensor = parser(decoded_tensors) + image_features, label = output_tensor + image = image_features['image'] + + self.assertAllEqual(image.shape, (2, 224, 224, 3)) + self.assertAllEqual(label.shape, (600,)) + + def test_video_input_augmentation_returns_shape(self): + params = exp_cfg.kinetics600(is_training=True) + params.feature_shape = (2, 224, 224, 3) + params.min_image_size = 224 + + params.temporal_stride = 2 + params.aug_type = 'autoaug' + + decoder = video_input.Decoder() + parser = video_input.Parser(params).parse_fn(params.is_training) + + seq_example, label = fake_seq_example() + + input_tensor = tf.constant(seq_example.SerializeToString()) + decoded_tensors = decoder.decode(input_tensor) + output_tensor = parser(decoded_tensors) + image_features, label = output_tensor + image = image_features['image'] + + self.assertAllEqual(image.shape, (2, 224, 224, 3)) + self.assertAllEqual(label.shape, (600,)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/evaluation/__init__.py b/official/vision/beta/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/evaluation/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/evaluation/coco_evaluator.py b/official/vision/beta/evaluation/coco_evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..03793bdcd798568824cce827f6329f33a9dd6304 --- /dev/null +++ b/official/vision/beta/evaluation/coco_evaluator.py @@ -0,0 +1,336 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The COCO-style evaluator. + +The following snippet demonstrates the use of interfaces: + + evaluator = COCOEvaluator(...) + for _ in range(num_evals): + for _ in range(num_batches_per_eval): + predictions, groundtruth = predictor.predict(...) # pop a batch. + evaluator.update_state(groundtruths, predictions) + evaluator.result() # finish one full eval and reset states. + +See also: https://github.com/cocodataset/cocoapi/ +""" + +import atexit +import tempfile +# Import libraries +from absl import logging +import numpy as np +from pycocotools import cocoeval +import six +import tensorflow as tf + +from official.vision.beta.evaluation import coco_utils + + +class COCOEvaluator(object): + """COCO evaluation metric class.""" + + def __init__(self, + annotation_file, + include_mask, + need_rescale_bboxes=True, + per_category_metrics=False): + """Constructs COCO evaluation class. + + The class provides the interface to COCO metrics_fn. The + _update_op() takes detections from each image and push them to + self.detections. The _evaluate() loads a JSON file in COCO annotation format + as the groundtruths and runs COCO evaluation. + + Args: + annotation_file: a JSON file that stores annotations of the eval dataset. + If `annotation_file` is None, groundtruth annotations will be loaded + from the dataloader. + include_mask: a boolean to indicate whether or not to include the mask + eval. + need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back + to absolute values (`image_info` is needed in this case). + per_category_metrics: Whether to return per category metrics. + """ + if annotation_file: + if annotation_file.startswith('gs://'): + _, local_val_json = tempfile.mkstemp(suffix='.json') + tf.io.gfile.remove(local_val_json) + + tf.io.gfile.copy(annotation_file, local_val_json) + atexit.register(tf.io.gfile.remove, local_val_json) + else: + local_val_json = annotation_file + self._coco_gt = coco_utils.COCOWrapper( + eval_type=('mask' if include_mask else 'box'), + annotation_file=local_val_json) + self._annotation_file = annotation_file + self._include_mask = include_mask + self._per_category_metrics = per_category_metrics + self._metric_names = [ + 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', 'ARmax10', + 'ARmax100', 'ARs', 'ARm', 'ARl' + ] + self._required_prediction_fields = [ + 'source_id', 'num_detections', 'detection_classes', 'detection_scores', + 'detection_boxes' + ] + self._need_rescale_bboxes = need_rescale_bboxes + if self._need_rescale_bboxes: + self._required_prediction_fields.append('image_info') + self._required_groundtruth_fields = [ + 'source_id', 'height', 'width', 'classes', 'boxes' + ] + if self._include_mask: + mask_metric_names = ['mask_' + x for x in self._metric_names] + self._metric_names.extend(mask_metric_names) + self._required_prediction_fields.extend(['detection_masks']) + self._required_groundtruth_fields.extend(['masks']) + + self.reset_states() + + @property + def name(self): + return 'coco_metric' + + def reset_states(self): + """Resets internal states for a fresh run.""" + self._predictions = {} + if not self._annotation_file: + self._groundtruths = {} + + def result(self): + """Evaluates detection results, and reset_states.""" + metric_dict = self.evaluate() + # Cleans up the internal variables in order for a fresh eval next time. + self.reset_states() + return metric_dict + + def evaluate(self): + """Evaluates with detections from all images with COCO API. + + Returns: + coco_metric: float numpy array with shape [24] representing the + coco-style evaluation metrics (box and mask). + """ + if not self._annotation_file: + logging.info('There is no annotation_file in COCOEvaluator.') + gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset( + self._groundtruths) + coco_gt = coco_utils.COCOWrapper( + eval_type=('mask' if self._include_mask else 'box'), + gt_dataset=gt_dataset) + else: + logging.info('Using annotation file: %s', self._annotation_file) + coco_gt = self._coco_gt + coco_predictions = coco_utils.convert_predictions_to_coco_annotations( + self._predictions) + coco_dt = coco_gt.loadRes(predictions=coco_predictions) + image_ids = [ann['image_id'] for ann in coco_predictions] + + coco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='bbox') + coco_eval.params.imgIds = image_ids + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + coco_metrics = coco_eval.stats + + if self._include_mask: + mcoco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='segm') + mcoco_eval.params.imgIds = image_ids + mcoco_eval.evaluate() + mcoco_eval.accumulate() + mcoco_eval.summarize() + mask_coco_metrics = mcoco_eval.stats + + if self._include_mask: + metrics = np.hstack((coco_metrics, mask_coco_metrics)) + else: + metrics = coco_metrics + + metrics_dict = {} + for i, name in enumerate(self._metric_names): + metrics_dict[name] = metrics[i].astype(np.float32) + + # Adds metrics per category. + if self._per_category_metrics: + metrics_dict.update(self._retrieve_per_category_metrics(coco_eval)) + + if self._include_mask: + metrics_dict.update(self._retrieve_per_category_metrics( + mcoco_eval, prefix='mask')) + + return metrics_dict + + def _retrieve_per_category_metrics(self, coco_eval, prefix=''): + """Retrieves and per-category metrics and retuns them in a dict. + + Args: + coco_eval: a cocoeval.COCOeval object containing evaluation data. + prefix: str, A string used to prefix metric names. + + Returns: + metrics_dict: A dictionary with per category metrics. + """ + + metrics_dict = {} + if prefix: + prefix = prefix + ' ' + + if hasattr(coco_eval, 'category_stats'): + for category_index, category_id in enumerate(coco_eval.params.catIds): + if self._annotation_file: + coco_category = self._coco_gt.cats[category_id] + # if 'name' is available use it, otherwise use `id` + category_display_name = coco_category.get('name', category_id) + else: + category_display_name = category_id + + metrics_dict[prefix + 'Precision mAP ByCategory/{}'.format( + category_display_name + )] = coco_eval.category_stats[0][category_index].astype(np.float32) + metrics_dict[prefix + 'Precision mAP ByCategory@50IoU/{}'.format( + category_display_name + )] = coco_eval.category_stats[1][category_index].astype(np.float32) + metrics_dict[prefix + 'Precision mAP ByCategory@75IoU/{}'.format( + category_display_name + )] = coco_eval.category_stats[2][category_index].astype(np.float32) + metrics_dict[prefix + 'Precision mAP ByCategory (small) /{}'.format( + category_display_name + )] = coco_eval.category_stats[3][category_index].astype(np.float32) + metrics_dict[prefix + 'Precision mAP ByCategory (medium) /{}'.format( + category_display_name + )] = coco_eval.category_stats[4][category_index].astype(np.float32) + metrics_dict[prefix + 'Precision mAP ByCategory (large) /{}'.format( + category_display_name + )] = coco_eval.category_stats[5][category_index].astype(np.float32) + metrics_dict[prefix + 'Recall AR@1 ByCategory/{}'.format( + category_display_name + )] = coco_eval.category_stats[6][category_index].astype(np.float32) + metrics_dict[prefix + 'Recall AR@10 ByCategory/{}'.format( + category_display_name + )] = coco_eval.category_stats[7][category_index].astype(np.float32) + metrics_dict[prefix + 'Recall AR@100 ByCategory/{}'.format( + category_display_name + )] = coco_eval.category_stats[8][category_index].astype(np.float32) + metrics_dict[prefix + 'Recall AR (small) ByCategory/{}'.format( + category_display_name + )] = coco_eval.category_stats[9][category_index].astype(np.float32) + metrics_dict[prefix + 'Recall AR (medium) ByCategory/{}'.format( + category_display_name + )] = coco_eval.category_stats[10][category_index].astype(np.float32) + metrics_dict[prefix + 'Recall AR (large) ByCategory/{}'.format( + category_display_name + )] = coco_eval.category_stats[11][category_index].astype(np.float32) + + return metrics_dict + + def _process_predictions(self, predictions): + image_scale = np.tile(predictions['image_info'][:, 2:3, :], (1, 1, 2)) + predictions['detection_boxes'] = ( + predictions['detection_boxes'].astype(np.float32)) + predictions['detection_boxes'] /= image_scale + if 'detection_outer_boxes' in predictions: + predictions['detection_outer_boxes'] = ( + predictions['detection_outer_boxes'].astype(np.float32)) + predictions['detection_outer_boxes'] /= image_scale + + def _convert_to_numpy(self, groundtruths, predictions): + """Converts tesnors to numpy arrays.""" + if groundtruths: + labels = tf.nest.map_structure(lambda x: x.numpy(), groundtruths) + numpy_groundtruths = {} + for key, val in labels.items(): + if isinstance(val, tuple): + val = np.concatenate(val) + numpy_groundtruths[key] = val + else: + numpy_groundtruths = groundtruths + + if predictions: + outputs = tf.nest.map_structure(lambda x: x.numpy(), predictions) + numpy_predictions = {} + for key, val in outputs.items(): + if isinstance(val, tuple): + val = np.concatenate(val) + numpy_predictions[key] = val + else: + numpy_predictions = predictions + + return numpy_groundtruths, numpy_predictions + + def update_state(self, groundtruths, predictions): + """Update and aggregate detection results and groundtruth data. + + Args: + groundtruths: a dictionary of Tensors including the fields below. + See also different parsers under `../dataloader` for more details. + Required fields: + - source_id: a numpy array of int or string of shape [batch_size]. + - height: a numpy array of int of shape [batch_size]. + - width: a numpy array of int of shape [batch_size]. + - num_detections: a numpy array of int of shape [batch_size]. + - boxes: a numpy array of float of shape [batch_size, K, 4]. + - classes: a numpy array of int of shape [batch_size, K]. + Optional fields: + - is_crowds: a numpy array of int of shape [batch_size, K]. If the + field is absent, it is assumed that this instance is not crowd. + - areas: a numy array of float of shape [batch_size, K]. If the + field is absent, the area is calculated using either boxes or + masks depending on which one is available. + - masks: a numpy array of float of shape + [batch_size, K, mask_height, mask_width], + predictions: a dictionary of tensors including the fields below. + See different parsers under `../dataloader` for more details. + Required fields: + - source_id: a numpy array of int or string of shape [batch_size]. + - image_info [if `need_rescale_bboxes` is True]: a numpy array of + float of shape [batch_size, 4, 2]. + - num_detections: a numpy array of + int of shape [batch_size]. + - detection_boxes: a numpy array of float of shape [batch_size, K, 4]. + - detection_classes: a numpy array of int of shape [batch_size, K]. + - detection_scores: a numpy array of float of shape [batch_size, K]. + Optional fields: + - detection_masks: a numpy array of float of shape + [batch_size, K, mask_height, mask_width]. + Raises: + ValueError: if the required prediction or groundtruth fields are not + present in the incoming `predictions` or `groundtruths`. + """ + groundtruths, predictions = self._convert_to_numpy(groundtruths, + predictions) + for k in self._required_prediction_fields: + if k not in predictions: + raise ValueError( + 'Missing the required key `{}` in predictions!'.format(k)) + if self._need_rescale_bboxes: + self._process_predictions(predictions) + for k, v in six.iteritems(predictions): + if k not in self._predictions: + self._predictions[k] = [v] + else: + self._predictions[k].append(v) + + if not self._annotation_file: + assert groundtruths + for k in self._required_groundtruth_fields: + if k not in groundtruths: + raise ValueError( + 'Missing the required key `{}` in groundtruths!'.format(k)) + for k, v in six.iteritems(groundtruths): + if k not in self._groundtruths: + self._groundtruths[k] = [v] + else: + self._groundtruths[k].append(v) diff --git a/official/vision/beta/evaluation/coco_utils.py b/official/vision/beta/evaluation/coco_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..35a88399c44023dbf59993d31286a4664d5ee531 --- /dev/null +++ b/official/vision/beta/evaluation/coco_utils.py @@ -0,0 +1,372 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Util functions related to pycocotools and COCO eval.""" + +import copy +import json + +# Import libraries +from absl import logging +import numpy as np +from PIL import Image +from pycocotools import coco +from pycocotools import mask as mask_api +import six +import tensorflow as tf + +from official.vision.beta.dataloaders import tf_example_decoder +from official.vision.beta.ops import box_ops +from official.vision.beta.ops import mask_ops + + +class COCOWrapper(coco.COCO): + """COCO wrapper class. + + This class wraps COCO API object, which provides the following additional + functionalities: + 1. Support string type image id. + 2. Support loading the groundtruth dataset using the external annotation + dictionary. + 3. Support loading the prediction results using the external annotation + dictionary. + """ + + def __init__(self, eval_type='box', annotation_file=None, gt_dataset=None): + """Instantiates a COCO-style API object. + + Args: + eval_type: either 'box' or 'mask'. + annotation_file: a JSON file that stores annotations of the eval dataset. + This is required if `gt_dataset` is not provided. + gt_dataset: the groundtruth eval datatset in COCO API format. + """ + if ((annotation_file and gt_dataset) or + ((not annotation_file) and (not gt_dataset))): + raise ValueError('One and only one of `annotation_file` and `gt_dataset` ' + 'needs to be specified.') + + if eval_type not in ['box', 'mask']: + raise ValueError('The `eval_type` can only be either `box` or `mask`.') + + coco.COCO.__init__(self, annotation_file=annotation_file) + self._eval_type = eval_type + if gt_dataset: + self.dataset = gt_dataset + self.createIndex() + + def loadRes(self, predictions): + """Loads result file and return a result api object. + + Args: + predictions: a list of dictionary each representing an annotation in COCO + format. The required fields are `image_id`, `category_id`, `score`, + `bbox`, `segmentation`. + + Returns: + res: result COCO api object. + + Raises: + ValueError: if the set of image id from predctions is not the subset of + the set of image id of the groundtruth dataset. + """ + res = coco.COCO() + res.dataset['images'] = copy.deepcopy(self.dataset['images']) + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + + image_ids = [ann['image_id'] for ann in predictions] + if set(image_ids) != (set(image_ids) & set(self.getImgIds())): + raise ValueError('Results do not correspond to the current dataset!') + for ann in predictions: + x1, x2, y1, y2 = [ann['bbox'][0], ann['bbox'][0] + ann['bbox'][2], + ann['bbox'][1], ann['bbox'][1] + ann['bbox'][3]] + if self._eval_type == 'box': + ann['area'] = ann['bbox'][2] * ann['bbox'][3] + ann['segmentation'] = [ + [x1, y1, x1, y2, x2, y2, x2, y1]] + elif self._eval_type == 'mask': + ann['area'] = mask_api.area(ann['segmentation']) + + res.dataset['annotations'] = copy.deepcopy(predictions) + res.createIndex() + return res + + +def convert_predictions_to_coco_annotations(predictions): + """Converts a batch of predictions to annotations in COCO format. + + Args: + predictions: a dictionary of lists of numpy arrays including the following + fields. K below denotes the maximum number of instances per image. + Required fields: + - source_id: a list of numpy arrays of int or string of shape + [batch_size]. + - num_detections: a list of numpy arrays of int of shape [batch_size]. + - detection_boxes: a list of numpy arrays of float of shape + [batch_size, K, 4], where coordinates are in the original image + space (not the scaled image space). + - detection_classes: a list of numpy arrays of int of shape + [batch_size, K]. + - detection_scores: a list of numpy arrays of float of shape + [batch_size, K]. + Optional fields: + - detection_masks: a list of numpy arrays of float of shape + [batch_size, K, mask_height, mask_width]. + + Returns: + coco_predictions: prediction in COCO annotation format. + """ + coco_predictions = [] + num_batches = len(predictions['source_id']) + batch_size = predictions['source_id'][0].shape[0] + max_num_detections = predictions['detection_classes'][0].shape[1] + use_outer_box = 'detection_outer_boxes' in predictions + for i in range(num_batches): + predictions['detection_boxes'][i] = box_ops.yxyx_to_xywh( + predictions['detection_boxes'][i]) + if use_outer_box: + predictions['detection_outer_boxes'][i] = box_ops.yxyx_to_xywh( + predictions['detection_outer_boxes'][i]) + mask_boxes = predictions['detection_outer_boxes'] + else: + mask_boxes = predictions['detection_boxes'] + + for j in range(batch_size): + if 'detection_masks' in predictions: + image_masks = mask_ops.paste_instance_masks( + predictions['detection_masks'][i][j], + mask_boxes[i][j], + int(predictions['image_info'][i][j, 0, 0]), + int(predictions['image_info'][i][j, 0, 1])) + binary_masks = (image_masks > 0.0).astype(np.uint8) + encoded_masks = [ + mask_api.encode(np.asfortranarray(binary_mask)) + for binary_mask in list(binary_masks)] + for k in range(max_num_detections): + ann = {} + ann['image_id'] = predictions['source_id'][i][j] + ann['category_id'] = predictions['detection_classes'][i][j, k] + ann['bbox'] = predictions['detection_boxes'][i][j, k] + ann['score'] = predictions['detection_scores'][i][j, k] + if 'detection_masks' in predictions: + ann['segmentation'] = encoded_masks[k] + coco_predictions.append(ann) + + for i, ann in enumerate(coco_predictions): + ann['id'] = i + 1 + + return coco_predictions + + +def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None): + """Converts groundtruths to the dataset in COCO format. + + Args: + groundtruths: a dictionary of numpy arrays including the fields below. + Note that each element in the list represent the number for a single + example without batch dimension. K below denotes the actual number of + instances for each image. + Required fields: + - source_id: a list of numpy arrays of int or string of shape + [batch_size]. + - height: a list of numpy arrays of int of shape [batch_size]. + - width: a list of numpy arrays of int of shape [batch_size]. + - num_detections: a list of numpy arrays of int of shape [batch_size]. + - boxes: a list of numpy arrays of float of shape [batch_size, K, 4], + where coordinates are in the original image space (not the + normalized coordinates). + - classes: a list of numpy arrays of int of shape [batch_size, K]. + Optional fields: + - is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If + th field is absent, it is assumed that this instance is not crowd. + - areas: a list of numy arrays of float of shape [batch_size, K]. If the + field is absent, the area is calculated using either boxes or + masks depending on which one is available. + - masks: a list of numpy arrays of string of shape [batch_size, K], + label_map: (optional) a dictionary that defines items from the category id + to the category name. If `None`, collect the category mappping from the + `groundtruths`. + + Returns: + coco_groundtruths: the groundtruth dataset in COCO format. + """ + source_ids = np.concatenate(groundtruths['source_id'], axis=0) + heights = np.concatenate(groundtruths['height'], axis=0) + widths = np.concatenate(groundtruths['width'], axis=0) + gt_images = [{'id': int(i), 'height': int(h), 'width': int(w)} for i, h, w + in zip(source_ids, heights, widths)] + + gt_annotations = [] + num_batches = len(groundtruths['source_id']) + batch_size = groundtruths['source_id'][0].shape[0] + for i in range(num_batches): + max_num_instances = groundtruths['classes'][i].shape[1] + for j in range(batch_size): + num_instances = groundtruths['num_detections'][i][j] + if num_instances > max_num_instances: + logging.warning( + 'num_groundtruths is larger than max_num_instances, %d v.s. %d', + num_instances, max_num_instances) + num_instances = max_num_instances + for k in range(int(num_instances)): + ann = {} + ann['image_id'] = int(groundtruths['source_id'][i][j]) + if 'is_crowds' in groundtruths: + ann['iscrowd'] = int(groundtruths['is_crowds'][i][j, k]) + else: + ann['iscrowd'] = 0 + ann['category_id'] = int(groundtruths['classes'][i][j, k]) + boxes = groundtruths['boxes'][i] + ann['bbox'] = [ + float(boxes[j, k, 1]), + float(boxes[j, k, 0]), + float(boxes[j, k, 3] - boxes[j, k, 1]), + float(boxes[j, k, 2] - boxes[j, k, 0])] + if 'areas' in groundtruths: + ann['area'] = float(groundtruths['areas'][i][j, k]) + else: + ann['area'] = float( + (boxes[j, k, 3] - boxes[j, k, 1]) * + (boxes[j, k, 2] - boxes[j, k, 0])) + if 'masks' in groundtruths: + mask = Image.open(six.BytesIO(groundtruths['masks'][i][j, k])) + width, height = mask.size + np_mask = ( + np.array(mask.getdata()).reshape(height, width).astype(np.uint8)) + np_mask[np_mask > 0] = 255 + encoded_mask = mask_api.encode(np.asfortranarray(np_mask)) + ann['segmentation'] = encoded_mask + if 'areas' not in groundtruths: + ann['area'] = mask_api.area(encoded_mask) + gt_annotations.append(ann) + + for i, ann in enumerate(gt_annotations): + ann['id'] = i + 1 + + if label_map: + gt_categories = [{'id': i, 'name': label_map[i]} for i in label_map] + else: + category_ids = [gt['category_id'] for gt in gt_annotations] + gt_categories = [{'id': i} for i in set(category_ids)] + + gt_dataset = { + 'images': gt_images, + 'categories': gt_categories, + 'annotations': copy.deepcopy(gt_annotations), + } + return gt_dataset + + +class COCOGroundtruthGenerator: + """Generates the groundtruth annotations from a single example.""" + + def __init__(self, file_pattern, num_examples, include_mask): + self._file_pattern = file_pattern + self._num_examples = num_examples + self._include_mask = include_mask + self._dataset_fn = tf.data.TFRecordDataset + + def _parse_single_example(self, example): + """Parses a single serialized tf.Example proto. + + Args: + example: a serialized tf.Example proto string. + + Returns: + A dictionary of groundtruth with the following fields: + source_id: a scalar tensor of int64 representing the image source_id. + height: a scalar tensor of int64 representing the image height. + width: a scalar tensor of int64 representing the image width. + boxes: a float tensor of shape [K, 4], representing the groundtruth + boxes in absolute coordinates with respect to the original image size. + classes: a int64 tensor of shape [K], representing the class labels of + each instances. + is_crowds: a bool tensor of shape [K], indicating whether the instance + is crowd. + areas: a float tensor of shape [K], indicating the area of each + instance. + masks: a string tensor of shape [K], containing the bytes of the png + mask of each instance. + """ + decoder = tf_example_decoder.TfExampleDecoder( + include_mask=self._include_mask) + decoded_tensors = decoder.decode(example) + + image = decoded_tensors['image'] + image_size = tf.shape(image)[0:2] + boxes = box_ops.denormalize_boxes( + decoded_tensors['groundtruth_boxes'], image_size) + groundtruths = { + 'source_id': tf.string_to_number( + decoded_tensors['source_id'], out_type=tf.int64), + 'height': decoded_tensors['height'], + 'width': decoded_tensors['width'], + 'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0], + 'boxes': boxes, + 'classes': decoded_tensors['groundtruth_classes'], + 'is_crowds': decoded_tensors['groundtruth_is_crowd'], + 'areas': decoded_tensors['groundtruth_area'], + } + if self._include_mask: + groundtruths.update({ + 'masks': decoded_tensors['groundtruth_instance_masks_png'], + }) + return groundtruths + + def _build_pipeline(self): + """Builds data pipeline to generate groundtruth annotations.""" + dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False) + dataset = dataset.interleave( + map_func=lambda filename: self._dataset_fn(filename).prefetch(1), + cycle_length=12, + num_parallel_calls=tf.data.experimental.AUTOTUNE) + + dataset = dataset.map(self._parse_single_example, + num_parallel_calls=tf.data.experimental.AUTOTUNE) + dataset = dataset.batch(1, drop_remainder=False) + dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) + return dataset + + def __call__(self): + for groundtruth_result in self._build_pipeline(): + yield groundtruth_result + + +def scan_and_generator_annotation_file(file_pattern: str, + num_samples: int, + include_mask: bool, + annotation_file: str): + """Scans and generate the COCO-style annotation JSON file given a dataset.""" + groundtruth_generator = COCOGroundtruthGenerator( + file_pattern, num_samples, include_mask) + generate_annotation_file(groundtruth_generator, annotation_file) + + +def generate_annotation_file(groundtruth_generator, + annotation_file): + """Generates COCO-style annotation JSON file given a groundtruth generator.""" + groundtruths = {} + logging.info('Loading groundtruth annotations from dataset to memory...') + for groundtruth in groundtruth_generator(): + for k, v in six.iteritems(groundtruth): + if k not in groundtruths: + groundtruths[k] = [v] + else: + groundtruths[k].append(v) + gt_dataset = convert_groundtruths_to_coco_dataset(groundtruths) + + logging.info('Saving groundtruth annotations to the JSON file...') + with tf.io.gfile.GFile(annotation_file, 'w') as f: + f.write(json.dumps(gt_dataset)) + logging.info('Done saving the JSON file...') diff --git a/official/vision/beta/evaluation/segmentation_metrics.py b/official/vision/beta/evaluation/segmentation_metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..179cea3494336c5fcb6b7e707038544007e0336e --- /dev/null +++ b/official/vision/beta/evaluation/segmentation_metrics.py @@ -0,0 +1,233 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Metrics for segmentation.""" + +import tensorflow as tf + +from official.vision import keras_cv + + +class MeanIoU(tf.keras.metrics.MeanIoU): + """Mean IoU metric for semantic segmentation. + + This class utilizes tf.keras.metrics.MeanIoU to perform batched mean iou when + both input images and groundtruth masks are resized to the same size + (rescale_predictions=False). It also computes mean iou on groundtruth original + sizes, in which case, each prediction is rescaled back to the original image + size. + """ + + def __init__( + self, num_classes, rescale_predictions=False, name=None, dtype=None): + """Constructs Segmentation evaluator class. + + Args: + num_classes: `int`, number of classes. + rescale_predictions: `bool`, whether to scale back prediction to original + image sizes. If True, y_true['image_info'] is used to rescale + predictions. + name: `str`, name of the metric instance.. + dtype: data type of the metric result. + """ + self._rescale_predictions = rescale_predictions + super(MeanIoU, self).__init__( + num_classes=num_classes, name=name, dtype=dtype) + + def update_state(self, y_true, y_pred): + """Updates metric state. + + Args: + y_true: `dict`, dictionary with the following name, and key values. + - masks: [batch, width, height, 1], groundtruth masks. + - valid_masks: [batch, width, height, 1], valid elements in the mask. + - image_info: [batch, 4, 2], a tensor that holds information about + original and preprocessed images. Each entry is in the format of + [[original_height, original_width], [input_height, input_width], + [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, + desired_width] is the actual scaled image size, and [y_scale, x_scale] + is the scaling factor, which is the ratio of scaled dimension / + original dimension. + y_pred: Tensor [batch, width_p, height_p, num_classes], predicated masks. + """ + predictions = y_pred + masks = y_true['masks'] + valid_masks = y_true['valid_masks'] + images_info = y_true['image_info'] + + if isinstance(predictions, tuple) or isinstance(predictions, list): + predictions = tf.concat(predictions, axis=0) + masks = tf.concat(masks, axis=0) + valid_masks = tf.concat(valid_masks, axis=0) + images_info = tf.concat(images_info, axis=0) + + # Ignore mask elements is set to zero for argmax op. + masks = tf.where(valid_masks, masks, tf.zeros_like(masks)) + + if self._rescale_predictions: + # This part can only run on cpu/gpu due to dynamic image resizing. + flatten_predictions = [] + flatten_masks = [] + flatten_valid_masks = [] + for mask, valid_mask, predicted_mask, image_info in zip( + masks, valid_masks, predictions, images_info): + + rescale_size = tf.cast( + tf.math.ceil(image_info[1, :] / image_info[2, :]), tf.int32) + image_shape = tf.cast(image_info[0, :], tf.int32) + offsets = tf.cast(image_info[3, :], tf.int32) + + predicted_mask = tf.image.resize( + predicted_mask, + rescale_size, + method=tf.image.ResizeMethod.BILINEAR) + + predicted_mask = tf.image.crop_to_bounding_box(predicted_mask, + offsets[0], offsets[1], + image_shape[0], + image_shape[1]) + mask = tf.image.crop_to_bounding_box(mask, 0, 0, image_shape[0], + image_shape[1]) + valid_mask = tf.image.crop_to_bounding_box(valid_mask, 0, 0, + image_shape[0], + image_shape[1]) + + predicted_mask = tf.argmax(predicted_mask, axis=2) + flatten_predictions.append(tf.reshape(predicted_mask, shape=[1, -1])) + flatten_masks.append(tf.reshape(mask, shape=[1, -1])) + flatten_valid_masks.append(tf.reshape(valid_mask, shape=[1, -1])) + flatten_predictions = tf.concat(flatten_predictions, axis=1) + flatten_masks = tf.concat(flatten_masks, axis=1) + flatten_valid_masks = tf.concat(flatten_valid_masks, axis=1) + + else: + predictions = tf.image.resize( + predictions, + tf.shape(masks)[1:3], + method=tf.image.ResizeMethod.BILINEAR) + predictions = tf.argmax(predictions, axis=3) + flatten_predictions = tf.reshape(predictions, shape=[-1]) + flatten_masks = tf.reshape(masks, shape=[-1]) + flatten_valid_masks = tf.reshape(valid_masks, shape=[-1]) + + super(MeanIoU, self).update_state( + flatten_masks, flatten_predictions, + tf.cast(flatten_valid_masks, tf.float32)) + + +class PerClassIoU(keras_cv.metrics.PerClassIoU): + """Per Class IoU metric for semantic segmentation. + + This class utilizes keras_cv.metrics.PerClassIoU to perform batched per class + iou when both input images and groundtruth masks are resized to the same size + (rescale_predictions=False). It also computes per class iou on groundtruth + original sizes, in which case, each prediction is rescaled back to the + original image size. + """ + + def __init__( + self, num_classes, rescale_predictions=False, name=None, dtype=None): + """Constructs Segmentation evaluator class. + + Args: + num_classes: `int`, number of classes. + rescale_predictions: `bool`, whether to scale back prediction to original + image sizes. If True, y_true['image_info'] is used to rescale + predictions. + name: `str`, name of the metric instance.. + dtype: data type of the metric result. + """ + self._rescale_predictions = rescale_predictions + super(PerClassIoU, self).__init__( + num_classes=num_classes, name=name, dtype=dtype) + + def update_state(self, y_true, y_pred): + """Updates metric state. + + Args: + y_true: `dict`, dictionary with the following name, and key values. + - masks: [batch, width, height, 1], groundtruth masks. + - valid_masks: [batch, width, height, 1], valid elements in the mask. + - image_info: [batch, 4, 2], a tensor that holds information about + original and preprocessed images. Each entry is in the format of + [[original_height, original_width], [input_height, input_width], + [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, + desired_width] is the actual scaled image size, and [y_scale, x_scale] + is the scaling factor, which is the ratio of scaled dimension / + original dimension. + y_pred: Tensor [batch, width_p, height_p, num_classes], predicated masks. + """ + predictions = y_pred + masks = y_true['masks'] + valid_masks = y_true['valid_masks'] + images_info = y_true['image_info'] + + if isinstance(predictions, tuple) or isinstance(predictions, list): + predictions = tf.concat(predictions, axis=0) + masks = tf.concat(masks, axis=0) + valid_masks = tf.concat(valid_masks, axis=0) + images_info = tf.concat(images_info, axis=0) + + # Ignore mask elements is set to zero for argmax op. + masks = tf.where(valid_masks, masks, tf.zeros_like(masks)) + + if self._rescale_predictions: + # This part can only run on cpu/gpu due to dynamic image resizing. + flatten_predictions = [] + flatten_masks = [] + flatten_valid_masks = [] + for mask, valid_mask, predicted_mask, image_info in zip( + masks, valid_masks, predictions, images_info): + + rescale_size = tf.cast( + tf.math.ceil(image_info[1, :] / image_info[2, :]), tf.int32) + image_shape = tf.cast(image_info[0, :], tf.int32) + offsets = tf.cast(image_info[3, :], tf.int32) + + predicted_mask = tf.image.resize( + predicted_mask, + rescale_size, + method=tf.image.ResizeMethod.BILINEAR) + + predicted_mask = tf.image.crop_to_bounding_box(predicted_mask, + offsets[0], offsets[1], + image_shape[0], + image_shape[1]) + mask = tf.image.crop_to_bounding_box(mask, 0, 0, image_shape[0], + image_shape[1]) + valid_mask = tf.image.crop_to_bounding_box(valid_mask, 0, 0, + image_shape[0], + image_shape[1]) + + predicted_mask = tf.argmax(predicted_mask, axis=2) + flatten_predictions.append(tf.reshape(predicted_mask, shape=[1, -1])) + flatten_masks.append(tf.reshape(mask, shape=[1, -1])) + flatten_valid_masks.append(tf.reshape(valid_mask, shape=[1, -1])) + flatten_predictions = tf.concat(flatten_predictions, axis=1) + flatten_masks = tf.concat(flatten_masks, axis=1) + flatten_valid_masks = tf.concat(flatten_valid_masks, axis=1) + + else: + predictions = tf.image.resize( + predictions, + tf.shape(masks)[1:3], + method=tf.image.ResizeMethod.BILINEAR) + predictions = tf.argmax(predictions, axis=3) + flatten_predictions = tf.reshape(predictions, shape=[-1]) + flatten_masks = tf.reshape(masks, shape=[-1]) + flatten_valid_masks = tf.reshape(valid_masks, shape=[-1]) + + super(PerClassIoU, self).update_state( + flatten_masks, flatten_predictions, + tf.cast(flatten_valid_masks, tf.float32)) diff --git a/official/vision/beta/losses/__init__.py b/official/vision/beta/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/losses/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/losses/maskrcnn_losses.py b/official/vision/beta/losses/maskrcnn_losses.py new file mode 100644 index 0000000000000000000000000000000000000000..48fd01819261b7fc8d34111046e6b1fae606edd0 --- /dev/null +++ b/official/vision/beta/losses/maskrcnn_losses.py @@ -0,0 +1,312 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Losses for maskrcn model.""" + +# Import libraries +import tensorflow as tf + + +class RpnScoreLoss(object): + """Region Proposal Network score loss function.""" + + def __init__(self, rpn_batch_size_per_im): + self._rpn_batch_size_per_im = rpn_batch_size_per_im + self._binary_crossentropy = tf.keras.losses.BinaryCrossentropy( + reduction=tf.keras.losses.Reduction.SUM, from_logits=True) + + def __call__(self, score_outputs, labels): + """Computes total RPN detection loss. + + Computes total RPN detection loss including box and score from all levels. + + Args: + score_outputs: an OrderDict with keys representing levels and values + representing scores in [batch_size, height, width, num_anchors]. + labels: the dictionary that returned from dataloader that includes + groundturth targets. + + Returns: + rpn_score_loss: a scalar tensor representing total score loss. + """ + with tf.name_scope('rpn_loss'): + levels = sorted(score_outputs.keys()) + + score_losses = [] + for level in levels: + score_losses.append( + self._rpn_score_loss( + score_outputs[level], + labels[level], + normalizer=tf.cast( + tf.shape(score_outputs[level])[0] * + self._rpn_batch_size_per_im, + dtype=tf.float32))) + + # Sums per level losses to total loss. + return tf.math.add_n(score_losses) + + def _rpn_score_loss(self, score_outputs, score_targets, normalizer=1.0): + """Computes score loss.""" + # score_targets has three values: + # (1) score_targets[i]=1, the anchor is a positive sample. + # (2) score_targets[i]=0, negative. + # (3) score_targets[i]=-1, the anchor is don't care (ignore). + with tf.name_scope('rpn_score_loss'): + mask = tf.math.logical_or(tf.math.equal(score_targets, 1), + tf.math.equal(score_targets, 0)) + + score_targets = tf.math.maximum(score_targets, + tf.zeros_like(score_targets)) + + score_targets = tf.expand_dims(score_targets, axis=-1) + score_outputs = tf.expand_dims(score_outputs, axis=-1) + score_loss = self._binary_crossentropy( + score_targets, score_outputs, sample_weight=mask) + + score_loss /= normalizer + return score_loss + + +class RpnBoxLoss(object): + """Region Proposal Network box regression loss function.""" + + def __init__(self, huber_loss_delta: float): + # The delta is typically around the mean value of regression target. + # for instances, the regression targets of 512x512 input with 6 anchors on + # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2]. + self._huber_loss = tf.keras.losses.Huber( + delta=huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM) + + def __call__(self, box_outputs, labels): + """Computes total RPN detection loss. + + Computes total RPN detection loss including box and score from all levels. + + Args: + box_outputs: an OrderDict with keys representing levels and values + representing box regression targets in + [batch_size, height, width, num_anchors * 4]. + labels: the dictionary that returned from dataloader that includes + groundturth targets. + + Returns: + rpn_box_loss: a scalar tensor representing total box regression loss. + """ + with tf.name_scope('rpn_loss'): + levels = sorted(box_outputs.keys()) + + box_losses = [] + for level in levels: + box_losses.append(self._rpn_box_loss(box_outputs[level], labels[level])) + + # Sum per level losses to total loss. + return tf.add_n(box_losses) + + def _rpn_box_loss(self, box_outputs, box_targets, normalizer=1.0): + """Computes box regression loss.""" + with tf.name_scope('rpn_box_loss'): + mask = tf.cast(tf.not_equal(box_targets, 0.0), dtype=tf.float32) + box_targets = tf.expand_dims(box_targets, axis=-1) + box_outputs = tf.expand_dims(box_outputs, axis=-1) + box_loss = self._huber_loss(box_targets, box_outputs, sample_weight=mask) + # The loss is normalized by the sum of non-zero weights and additional + # normalizer provided by the function caller. Using + 0.01 here to avoid + # division by zero. + box_loss /= normalizer * (tf.reduce_sum(mask) + 0.01) + return box_loss + + +class FastrcnnClassLoss(object): + """Fast R-CNN classification loss function.""" + + def __init__(self): + self._categorical_crossentropy = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.SUM, from_logits=True) + + def __call__(self, class_outputs, class_targets): + """Computes the class loss (Fast-RCNN branch) of Mask-RCNN. + + This function implements the classification loss of the Fast-RCNN. + + The classification loss is softmax on all RoIs. + Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py # pylint: disable=line-too-long + + Args: + class_outputs: a float tensor representing the class prediction for each box + with a shape of [batch_size, num_boxes, num_classes]. + class_targets: a float tensor representing the class label for each box + with a shape of [batch_size, num_boxes]. + + Returns: + a scalar tensor representing total class loss. + """ + with tf.name_scope('fast_rcnn_loss'): + batch_size, num_boxes, num_classes = class_outputs.get_shape().as_list() + class_targets = tf.cast(class_targets, dtype=tf.int32) + class_targets_one_hot = tf.one_hot(class_targets, num_classes) + return self._fast_rcnn_class_loss(class_outputs, class_targets_one_hot, + normalizer=batch_size * num_boxes) + + def _fast_rcnn_class_loss(self, class_outputs, class_targets_one_hot, + normalizer=1.0): + """Computes classification loss.""" + with tf.name_scope('fast_rcnn_class_loss'): + class_loss = self._categorical_crossentropy(class_targets_one_hot, + class_outputs) + + class_loss /= normalizer + return class_loss + + +class FastrcnnBoxLoss(object): + """Fast R-CNN box regression loss function.""" + + def __init__(self, + huber_loss_delta: float, + class_agnostic_bbox_pred: bool = False): + """Initiate Faster RCNN box loss. + + Args: + huber_loss_delta: the delta is typically around the mean value of + regression target. for instances, the regression targets of 512x512 + input with 6 anchors on P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2]. + class_agnostic_bbox_pred: if True, class agnostic bounding box prediction + is performed. + """ + self._huber_loss = tf.keras.losses.Huber( + delta=huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM) + self._class_agnostic_bbox_pred = class_agnostic_bbox_pred + + def __call__(self, box_outputs, class_targets, box_targets): + """Computes the box loss (Fast-RCNN branch) of Mask-RCNN. + + This function implements the box regression loss of the Fast-RCNN. As the + `box_outputs` produces `num_classes` boxes for each RoI, the reference model + expands `box_targets` to match the shape of `box_outputs` and selects only + the target that the RoI has a maximum overlap. (Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py) # pylint: disable=line-too-long + Instead, this function selects the `box_outputs` by the `class_targets` so + that it doesn't expand `box_targets`. + + The box loss is smooth L1-loss on only positive samples of RoIs. + Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py # pylint: disable=line-too-long + + Args: + box_outputs: a float tensor representing the box prediction for each box + with a shape of [batch_size, num_boxes, num_classes * 4]. + class_targets: a float tensor representing the class label for each box + with a shape of [batch_size, num_boxes]. + box_targets: a float tensor representing the box label for each box + with a shape of [batch_size, num_boxes, 4]. + + Returns: + box_loss: a scalar tensor representing total box regression loss. + """ + with tf.name_scope('fast_rcnn_loss'): + class_targets = tf.cast(class_targets, dtype=tf.int32) + if not self._class_agnostic_bbox_pred: + box_outputs = self._assign_class_targets(box_outputs, class_targets) + + return self._fast_rcnn_box_loss(box_outputs, box_targets, class_targets) + + def _assign_class_targets(self, box_outputs, class_targets): + """Selects the box from `box_outputs` based on `class_targets`, with which the box has the maximum overlap.""" + (batch_size, num_rois, + num_class_specific_boxes) = box_outputs.get_shape().as_list() + num_classes = num_class_specific_boxes // 4 + box_outputs = tf.reshape(box_outputs, + [batch_size, num_rois, num_classes, 4]) + + box_indices = tf.reshape( + class_targets + tf.tile( + tf.expand_dims(tf.range(batch_size) * num_rois * num_classes, 1), + [1, num_rois]) + tf.tile( + tf.expand_dims(tf.range(num_rois) * num_classes, 0), + [batch_size, 1]), [-1]) + + box_outputs = tf.matmul( + tf.one_hot( + box_indices, + batch_size * num_rois * num_classes, + dtype=box_outputs.dtype), tf.reshape(box_outputs, [-1, 4])) + box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4]) + + return box_outputs + + def _fast_rcnn_box_loss(self, box_outputs, box_targets, class_targets, + normalizer=1.0): + """Computes box regression loss.""" + with tf.name_scope('fast_rcnn_box_loss'): + mask = tf.tile(tf.expand_dims(tf.greater(class_targets, 0), axis=2), + [1, 1, 4]) + mask = tf.cast(mask, dtype=tf.float32) + box_targets = tf.expand_dims(box_targets, axis=-1) + box_outputs = tf.expand_dims(box_outputs, axis=-1) + box_loss = self._huber_loss(box_targets, box_outputs, sample_weight=mask) + # The loss is normalized by the number of ones in mask, + # additianal normalizer provided by the user and using 0.01 here to avoid + # division by 0. + box_loss /= normalizer * (tf.reduce_sum(mask) + 0.01) + return box_loss + + +class MaskrcnnLoss(object): + """Mask R-CNN instance segmentation mask loss function.""" + + def __init__(self): + self._binary_crossentropy = tf.keras.losses.BinaryCrossentropy( + reduction=tf.keras.losses.Reduction.SUM, from_logits=True) + + def __call__(self, mask_outputs, mask_targets, select_class_targets): + """Computes the mask loss of Mask-RCNN. + + This function implements the mask loss of Mask-RCNN. As the `mask_outputs` + produces `num_classes` masks for each RoI, the reference model expands + `mask_targets` to match the shape of `mask_outputs` and selects only the + target that the RoI has a maximum overlap. (Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/mask_rcnn.py) # pylint: disable=line-too-long + Instead, this implementation selects the `mask_outputs` by the `class_targets` + so that it doesn't expand `mask_targets`. Note that the selection logic is + done in the post-processing of mask_rcnn_fn in mask_rcnn_architecture.py. + + Args: + mask_outputs: a float tensor representing the prediction for each mask, + with a shape of + [batch_size, num_masks, mask_height, mask_width]. + mask_targets: a float tensor representing the binary mask of ground truth + labels for each mask with a shape of + [batch_size, num_masks, mask_height, mask_width]. + select_class_targets: a tensor with a shape of [batch_size, num_masks], + representing the foreground mask targets. + + Returns: + mask_loss: a float tensor representing total mask loss. + """ + with tf.name_scope('mask_rcnn_loss'): + (batch_size, num_masks, mask_height, + mask_width) = mask_outputs.get_shape().as_list() + + weights = tf.tile( + tf.reshape(tf.greater(select_class_targets, 0), + [batch_size, num_masks, 1, 1]), + [1, 1, mask_height, mask_width]) + weights = tf.cast(weights, dtype=tf.float32) + + mask_targets = tf.expand_dims(mask_targets, axis=-1) + mask_outputs = tf.expand_dims(mask_outputs, axis=-1) + mask_loss = self._binary_crossentropy(mask_targets, mask_outputs, + sample_weight=weights) + + # The loss is normalized by the number of 1's in weights and + # + 0.01 is used to avoid division by zero. + return mask_loss / (tf.reduce_sum(weights) + 0.01) diff --git a/official/vision/beta/losses/retinanet_losses.py b/official/vision/beta/losses/retinanet_losses.py new file mode 100644 index 0000000000000000000000000000000000000000..8baf2525e215bf95c8b202f8bafff0e0d7c67bc2 --- /dev/null +++ b/official/vision/beta/losses/retinanet_losses.py @@ -0,0 +1,206 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Losses used for detection models.""" + +# Import libraries +import tensorflow as tf + + +def focal_loss(logits, targets, alpha, gamma): + """Compute the focal loss between `logits` and the golden `target` values. + + Focal loss = -(1-pt)^gamma * log(pt) + where pt is the probability of being classified to the true class. + + Args: + logits: A float32 tensor of size + [batch, d_1, ..., d_k, n_classes]. + targets: A float32 tensor of size + [batch, d_1, ..., d_k, n_classes]. + alpha: A float32 scalar multiplying alpha to the loss from positive examples + and (1-alpha) to the loss from negative examples. + gamma: A float32 scalar modulating loss from hard and easy examples. + + Returns: + loss: A float32 Tensor of size + [batch, d_1, ..., d_k, n_classes] representing + normalized loss on the prediction map. + """ + with tf.name_scope('focal_loss'): + positive_label_mask = tf.equal(targets, 1.0) + cross_entropy = ( + tf.nn.sigmoid_cross_entropy_with_logits(labels=targets, logits=logits)) + probs = tf.sigmoid(logits) + probs_gt = tf.where(positive_label_mask, probs, 1.0 - probs) + # With small gamma, the implementation could produce NaN during back prop. + modulator = tf.pow(1.0 - probs_gt, gamma) + loss = modulator * cross_entropy + weighted_loss = tf.where(positive_label_mask, alpha * loss, + (1.0 - alpha) * loss) + + return weighted_loss + + +class FocalLoss(tf.keras.losses.Loss): + """Implements a Focal loss for classification problems. + + Reference: + [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002). + """ + + def __init__(self, + alpha, + gamma, + num_classes, + reduction=tf.keras.losses.Reduction.AUTO, + name=None): + """Initializes `FocalLoss`. + + Args: + alpha: The `alpha` weight factor for binary class imbalance. + gamma: The `gamma` focusing parameter to re-weight loss. + num_classes: Number of foreground classes. + reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all cases + this defaults to `SUM_OVER_BATCH_SIZE`. When used with + `tf.distribute.Strategy`, outside of built-in training loops such as + `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) for + more details. + name: Optional name for the op. Defaults to 'retinanet_class_loss'. + """ + self._num_classes = num_classes + self._alpha = alpha + self._gamma = gamma + super(FocalLoss, self).__init__(reduction=reduction, name=name) + + def call(self, y_true, y_pred): + """Invokes the `FocalLoss`. + + Args: + y_true: Ordered Dict with level to [batch, height, width, num_anchors]. + for example, + {3: tf.Tensor(shape=[32, 512, 512, 9], dtype=tf.float32), + 4: tf.Tensor([shape=32, 256, 256, 9, dtype=tf.float32])} + y_pred: Ordered Dict with level to [batch, height, width, num_anchors * + num_classes]. for example, + {3: tf.Tensor(shape=[32, 512, 512, 9], dtype=tf.int64), + 4: tf.Tensor(shape=[32, 256, 256, 9 * 21], dtype=tf.int64)} + + Returns: + Summed loss float `Tensor`. + """ + flattened_cls_outputs = [] + flattened_labels = [] + batch_size = None + for level in y_pred.keys(): + cls_output = y_pred[level] + label = y_true[level] + if batch_size is None: + batch_size = cls_output.shape[0] or tf.shape(cls_output)[0] + flattened_cls_outputs.append( + tf.reshape(cls_output, [batch_size, -1, self._num_classes])) + flattened_labels.append(tf.reshape(label, [batch_size, -1])) + cls_outputs = tf.concat(flattened_cls_outputs, axis=1) + labels = tf.concat(flattened_labels, axis=1) + + cls_targets_one_hot = tf.one_hot(labels, self._num_classes) + return focal_loss( + tf.cast(cls_outputs, dtype=tf.float32), + tf.cast(cls_targets_one_hot, dtype=tf.float32), self._alpha, + self._gamma) + + def get_config(self): + config = { + 'alpha': self._alpha, + 'gamma': self._gamma, + 'num_classes': self._num_classes, + } + base_config = super(FocalLoss, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class RetinanetBoxLoss(tf.keras.losses.Loss): + """RetinaNet box Huber loss.""" + + def __init__(self, + delta, + reduction=tf.keras.losses.Reduction.AUTO, + name=None): + """Initializes `RetinanetBoxLoss`. + + Args: + delta: A float, the point where the Huber loss function changes from a + quadratic to linear. + reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all cases + this defaults to `SUM_OVER_BATCH_SIZE`. When used with + `tf.distribute.Strategy`, outside of built-in training loops such as + `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) for + more details. + name: Optional name for the op. Defaults to 'retinanet_class_loss'. + """ + self._huber_loss = tf.keras.losses.Huber( + delta=delta, reduction=tf.keras.losses.Reduction.NONE) + self._delta = delta + super(RetinanetBoxLoss, self).__init__(reduction=reduction, name=name) + + def call(self, y_true, y_pred): + """Computes box detection loss. + + Computes total detection loss including box and class loss from all levels. + + Args: + y_true: Ordered Dict with level to [batch, height, width, + num_anchors * 4] for example, + {3: tf.Tensor(shape=[32, 512, 512, 9 * 4], dtype=tf.float32), + 4: tf.Tensor([shape=32, 256, 256, 9 * 4, dtype=tf.float32])} + y_pred: Ordered Dict with level to [batch, height, width, + num_anchors * 4]. for example, + {3: tf.Tensor(shape=[32, 512, 512, 9 * 4], dtype=tf.int64), + 4: tf.Tensor(shape=[32, 256, 256, 9 * 4], dtype=tf.int64)} + + Returns: + an integer tensor representing total box regression loss. + """ + # Sums all positives in a batch for normalization and avoids zero + # num_positives_sum, which would lead to inf loss during training + + flattened_box_outputs = [] + flattened_labels = [] + batch_size = None + for level in y_pred.keys(): + box_output = y_pred[level] + label = y_true[level] + if batch_size is None: + batch_size = box_output.shape[0] or tf.shape(box_output)[0] + flattened_box_outputs.append(tf.reshape(box_output, [batch_size, -1, 4])) + flattened_labels.append(tf.reshape(label, [batch_size, -1, 4])) + box_outputs = tf.concat(flattened_box_outputs, axis=1) + labels = tf.concat(flattened_labels, axis=1) + loss = self._huber_loss(labels, box_outputs) + return loss + + def get_config(self): + config = { + 'delta': self._delta, + } + base_config = super(RetinanetBoxLoss, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/official/vision/beta/losses/segmentation_losses.py b/official/vision/beta/losses/segmentation_losses.py new file mode 100644 index 0000000000000000000000000000000000000000..f3cfec4aa645536d0b59051f16e226b8639ec9a0 --- /dev/null +++ b/official/vision/beta/losses/segmentation_losses.py @@ -0,0 +1,89 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Losses used for segmentation models.""" + +# Import libraries +import tensorflow as tf + +EPSILON = 1e-5 + + +class SegmentationLoss: + """Semantic segmentation loss.""" + + def __init__(self, label_smoothing, class_weights, ignore_label, + use_groundtruth_dimension, top_k_percent_pixels=1.0): + self._top_k_percent_pixels = top_k_percent_pixels + self._class_weights = class_weights + self._ignore_label = ignore_label + self._use_groundtruth_dimension = use_groundtruth_dimension + self._label_smoothing = label_smoothing + + def __call__(self, logits, labels): + _, height, width, num_classes = logits.get_shape().as_list() + + if self._use_groundtruth_dimension: + # TODO(arashwan): Test using align corners to match deeplab alignment. + logits = tf.image.resize( + logits, tf.shape(labels)[1:3], + method=tf.image.ResizeMethod.BILINEAR) + else: + labels = tf.image.resize( + labels, (height, width), + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + + valid_mask = tf.not_equal(labels, self._ignore_label) + normalizer = tf.reduce_sum(tf.cast(valid_mask, tf.float32)) + EPSILON + # Assign pixel with ignore label to class 0 (background). The loss on the + # pixel will later be masked out. + labels = tf.where(valid_mask, labels, tf.zeros_like(labels)) + + labels = tf.squeeze(tf.cast(labels, tf.int32), axis=3) + valid_mask = tf.squeeze(tf.cast(valid_mask, tf.float32), axis=3) + onehot_labels = tf.one_hot(labels, num_classes) + onehot_labels = onehot_labels * ( + 1 - self._label_smoothing) + self._label_smoothing / num_classes + cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits( + labels=onehot_labels, logits=logits) + + if not self._class_weights: + class_weights = [1] * num_classes + else: + class_weights = self._class_weights + + if num_classes != len(class_weights): + raise ValueError( + 'Length of class_weights should be {}'.format(num_classes)) + + weight_mask = tf.einsum('...y,y->...', + tf.one_hot(labels, num_classes, dtype=tf.float32), + tf.constant(class_weights, tf.float32)) + valid_mask *= weight_mask + cross_entropy_loss *= tf.cast(valid_mask, tf.float32) + + if self._top_k_percent_pixels >= 1.0: + loss = tf.reduce_sum(cross_entropy_loss) / normalizer + else: + cross_entropy_loss = tf.reshape(cross_entropy_loss, shape=[-1]) + top_k_pixels = tf.cast( + self._top_k_percent_pixels * + tf.cast(tf.size(cross_entropy_loss), tf.float32), tf.int32) + top_k_losses, _ = tf.math.top_k( + cross_entropy_loss, k=top_k_pixels, sorted=True) + normalizer = tf.reduce_sum( + tf.cast(tf.not_equal(top_k_losses, 0.0), tf.float32)) + EPSILON + loss = tf.reduce_sum(top_k_losses) / normalizer + + return loss diff --git a/official/vision/beta/modeling/__init__.py b/official/vision/beta/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3215829950349ce4201620a687e27ca57a61e437 --- /dev/null +++ b/official/vision/beta/modeling/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Modeling package definition.""" + +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling import decoders +from official.vision.beta.modeling import heads +from official.vision.beta.modeling import layers diff --git a/official/vision/beta/modeling/backbones/__init__.py b/official/vision/beta/modeling/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..83e079504d4dab071898b4df11d6152d2352b744 --- /dev/null +++ b/official/vision/beta/modeling/backbones/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Backbones package definition.""" + +from official.vision.beta.modeling.backbones.efficientnet import EfficientNet +from official.vision.beta.modeling.backbones.mobilenet import MobileNet +from official.vision.beta.modeling.backbones.resnet import ResNet +from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D +from official.vision.beta.modeling.backbones.resnet_deeplab import DilatedResNet +from official.vision.beta.modeling.backbones.revnet import RevNet +from official.vision.beta.modeling.backbones.spinenet import SpineNet +from official.vision.beta.modeling.backbones.spinenet_mobile import SpineNetMobile diff --git a/official/vision/beta/modeling/backbones/efficientnet.py b/official/vision/beta/modeling/backbones/efficientnet.py new file mode 100644 index 0000000000000000000000000000000000000000..0f8a55ff0ef2656d4d9a58cd2cfe34855772b89e --- /dev/null +++ b/official/vision/beta/modeling/backbones/efficientnet.py @@ -0,0 +1,318 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of EfficientNet Networks.""" + +import math +from typing import Any, List, Tuple + +# Import libraries + +import tensorflow as tf + +from official.modeling import hyperparams +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks +from official.vision.beta.modeling.layers import nn_layers + +layers = tf.keras.layers + +# The fixed EfficientNet-B0 architecture discovered by NAS. +# Each element represents a specification of a building block: +# (block_fn, block_repeats, kernel_size, strides, expand_ratio, in_filters, +# out_filters, is_output) +EN_B0_BLOCK_SPECS = [ + ('mbconv', 1, 3, 1, 1, 32, 16, False), + ('mbconv', 2, 3, 2, 6, 16, 24, True), + ('mbconv', 2, 5, 2, 6, 24, 40, True), + ('mbconv', 3, 3, 2, 6, 40, 80, False), + ('mbconv', 3, 5, 1, 6, 80, 112, True), + ('mbconv', 4, 5, 2, 6, 112, 192, False), + ('mbconv', 1, 3, 1, 6, 192, 320, True), +] + +SCALING_MAP = { + 'b0': dict(width_scale=1.0, depth_scale=1.0), + 'b1': dict(width_scale=1.0, depth_scale=1.1), + 'b2': dict(width_scale=1.1, depth_scale=1.2), + 'b3': dict(width_scale=1.2, depth_scale=1.4), + 'b4': dict(width_scale=1.4, depth_scale=1.8), + 'b5': dict(width_scale=1.6, depth_scale=2.2), + 'b6': dict(width_scale=1.8, depth_scale=2.6), + 'b7': dict(width_scale=2.0, depth_scale=3.1), +} + + +class BlockSpec(): + """A container class that specifies the block configuration for MnasNet.""" + + def __init__(self, block_fn: str, block_repeats: int, kernel_size: int, + strides: int, expand_ratio: float, in_filters: int, + out_filters: int, is_output: bool, width_scale: float, + depth_scale: float): + self.block_fn = block_fn + self.block_repeats = round_repeats(block_repeats, depth_scale) + self.kernel_size = kernel_size + self.strides = strides + self.expand_ratio = expand_ratio + self.in_filters = nn_layers.round_filters(in_filters, width_scale) + self.out_filters = nn_layers.round_filters(out_filters, width_scale) + self.is_output = is_output + + +def round_repeats(repeats: int, multiplier: float, skip: bool = False) -> int: + """Returns rounded number of filters based on depth multiplier.""" + if skip or not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +def block_spec_decoder(specs: List[Tuple[Any, ...]], width_scale: float, + depth_scale: float) -> List[BlockSpec]: + """Decodes and returns specs for a block.""" + decoded_specs = [] + for s in specs: + s = s + ( + width_scale, + depth_scale, + ) + decoded_specs.append(BlockSpec(*s)) + return decoded_specs + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class EfficientNet(tf.keras.Model): + """Creates an EfficientNet family model. + + This implements the EfficientNet model from: + Mingxing Tan, Quoc V. Le. + EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. + (https://arxiv.org/pdf/1905.11946) + """ + + def __init__(self, + model_id: str, + input_specs: tf.keras.layers.InputSpec = layers.InputSpec( + shape=[None, None, None, 3]), + se_ratio: float = 0.0, + stochastic_depth_drop_rate: float = 0.0, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: tf.keras.regularizers.Regularizer = None, + bias_regularizer: tf.keras.regularizers.Regularizer = None, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + **kwargs): + """Initializes an EfficientNet model. + + Args: + model_id: A `str` of model ID of EfficientNet. + input_specs: A `tf.keras.layers.InputSpec` of the input tensor. + se_ratio: A `float` of squeeze and excitation ratio for inverted + bottleneck blocks. + stochastic_depth_drop_rate: A `float` of drop rate for drop connect layer. + kernel_initializer: A `str` for kernel initializer of convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + activation: A `str` of name of the activation function. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + self._model_id = model_id + self._input_specs = input_specs + self._se_ratio = se_ratio + self._stochastic_depth_drop_rate = stochastic_depth_drop_rate + self._use_sync_bn = use_sync_bn + self._activation = activation + self._kernel_initializer = kernel_initializer + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + if use_sync_bn: + self._norm = layers.experimental.SyncBatchNormalization + else: + self._norm = layers.BatchNormalization + + if tf.keras.backend.image_data_format() == 'channels_last': + bn_axis = -1 + else: + bn_axis = 1 + + # Build EfficientNet. + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + width_scale = SCALING_MAP[model_id]['width_scale'] + depth_scale = SCALING_MAP[model_id]['depth_scale'] + + # Build stem. + x = layers.Conv2D( + filters=nn_layers.round_filters(32, width_scale), + kernel_size=3, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + + # Build intermediate blocks. + endpoints = {} + endpoint_level = 2 + decoded_specs = block_spec_decoder(EN_B0_BLOCK_SPECS, width_scale, + depth_scale) + + for i, specs in enumerate(decoded_specs): + x = self._block_group( + inputs=x, specs=specs, name='block_group_{}'.format(i)) + if specs.is_output: + endpoints[str(endpoint_level)] = x + endpoint_level += 1 + + # Build output specs for downstream tasks. + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + + # Build the final conv for classification. + x = layers.Conv2D( + filters=nn_layers.round_filters(1280, width_scale), + kernel_size=1, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + endpoints[str(endpoint_level)] = tf_utils.get_activation(activation)(x) + + super(EfficientNet, self).__init__( + inputs=inputs, outputs=endpoints, **kwargs) + + def _block_group(self, + inputs: tf.Tensor, + specs: BlockSpec, + name: str = 'block_group'): + """Creates one group of blocks for the EfficientNet model. + + Args: + inputs: A `tf.Tensor` of size `[batch, channels, height, width]`. + specs: The specifications for one inverted bottleneck block group. + name: A `str` name for the block. + + Returns: + The output `tf.Tensor` of the block layer. + """ + if specs.block_fn == 'mbconv': + block_fn = nn_blocks.InvertedBottleneckBlock + else: + raise ValueError('Block func {} not supported.'.format(specs.block_fn)) + + x = block_fn( + in_filters=specs.in_filters, + out_filters=specs.out_filters, + expand_ratio=specs.expand_ratio, + strides=specs.strides, + kernel_size=specs.kernel_size, + se_ratio=self._se_ratio, + stochastic_depth_drop_rate=self._stochastic_depth_drop_rate, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + inputs) + + for _ in range(1, specs.block_repeats): + x = block_fn( + in_filters=specs.out_filters, # Set 'in_filters' to 'out_filters'. + out_filters=specs.out_filters, + expand_ratio=specs.expand_ratio, + strides=1, # Fix strides to 1. + kernel_size=specs.kernel_size, + se_ratio=self._se_ratio, + stochastic_depth_drop_rate=self._stochastic_depth_drop_rate, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + x) + + return tf.identity(x, name=name) + + def get_config(self): + config_dict = { + 'model_id': self._model_id, + 'se_ratio': self._se_ratio, + 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + return config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self): + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('efficientnet') +def build_efficientnet( + input_specs: tf.keras.layers.InputSpec, + model_config: hyperparams.Config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds EfficientNet backbone from a config.""" + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'efficientnet', (f'Inconsistent backbone type ' + f'{backbone_type}') + + return EfficientNet( + model_id=backbone_cfg.model_id, + input_specs=input_specs, + stochastic_depth_drop_rate=backbone_cfg.stochastic_depth_drop_rate, + se_ratio=backbone_cfg.se_ratio, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) diff --git a/official/vision/beta/modeling/backbones/efficientnet_test.py b/official/vision/beta/modeling/backbones/efficientnet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..00e35001e743fcacf64bf30dc85cf75a969934b7 --- /dev/null +++ b/official/vision/beta/modeling/backbones/efficientnet_test.py @@ -0,0 +1,104 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for EfficientNet.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import efficientnet + + +class EfficientNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters(32, 224) + def test_network_creation(self, input_size): + """Test creation of EfficientNet family models.""" + tf.keras.backend.set_image_data_format('channels_last') + + network = efficientnet.EfficientNet(model_id='b0') + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + + self.assertAllEqual([1, input_size / 2**2, input_size / 2**2, 24], + endpoints['2'].shape.as_list()) + self.assertAllEqual([1, input_size / 2**3, input_size / 2**3, 40], + endpoints['3'].shape.as_list()) + self.assertAllEqual([1, input_size / 2**4, input_size / 2**4, 112], + endpoints['4'].shape.as_list()) + self.assertAllEqual([1, input_size / 2**5, input_size / 2**5, 320], + endpoints['5'].shape.as_list()) + + @parameterized.parameters('b0', 'b3', 'b6') + def test_network_scaling(self, model_id): + """Test compound scaling.""" + efficientnet_params = { + 'b0': 4049564, + 'b3': 10783528, + 'b6': 40960136, + } + tf.keras.backend.set_image_data_format('channels_last') + + input_size = 32 + network = efficientnet.EfficientNet(model_id=model_id, se_ratio=0.25) + self.assertEqual(network.count_params(), efficientnet_params[model_id]) + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + _ = network(inputs) + + @parameterized.parameters(1, 3) + def test_input_specs(self, input_dim): + """Test different input feature dimensions.""" + tf.keras.backend.set_image_data_format('channels_last') + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) + network = efficientnet.EfficientNet(model_id='b0', input_specs=input_specs) + + inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1) + _ = network(inputs) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + model_id='b0', + se_ratio=0.25, + stochastic_depth_drop_rate=None, + use_sync_bn=False, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + activation='relu', + norm_momentum=0.99, + norm_epsilon=0.001, + ) + network = efficientnet.EfficientNet(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = efficientnet.EfficientNet.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/factory.py b/official/vision/beta/modeling/backbones/factory.py new file mode 100644 index 0000000000000000000000000000000000000000..3d24fa87135fc169e99f6d5d5adbcdf7ffd15f45 --- /dev/null +++ b/official/vision/beta/modeling/backbones/factory.py @@ -0,0 +1,102 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Backbone registers and factory method. + +One can regitered a new backbone model by the following two steps: + +1 Import the factory and register the build in the backbone file. +2 Import the backbone class and add a build in __init__.py. + +``` +# my_backbone.py + +from modeling.backbones import factory + +class MyBackbone(): + ... + +@factory.register_backbone_builder('my_backbone') +def build_my_backbone(): + return MyBackbone() + +# backbones/__init__.py adds import +from modeling.backbones.my_backbone import MyBackbone +``` + +If one wants the MyBackbone class to be used only by those binary +then don't imported the backbone module in backbones/__init__.py, but import it +in place that uses it. + + +""" +# Import libraries + +import tensorflow as tf + +from official.core import registry +from official.modeling import hyperparams + + +_REGISTERED_BACKBONE_CLS = {} + + +def register_backbone_builder(key: str): + """Decorates a builder of backbone class. + + The builder should be a Callable (a class or a function). + This decorator supports registration of backbone builder as follows: + + ``` + class MyBackbone(tf.keras.Model): + pass + + @register_backbone_builder('mybackbone') + def builder(input_specs, config, l2_reg): + return MyBackbone(...) + + # Builds a MyBackbone object. + my_backbone = build_backbone_3d(input_specs, config, l2_reg) + ``` + + Args: + key: A `str` of key to look up the builder. + + Returns: + A callable for using as class decorator that registers the decorated class + for creation from an instance of task_config_cls. + """ + return registry.register(_REGISTERED_BACKBONE_CLS, key) + + +def build_backbone( + input_specs: tf.keras.layers.InputSpec, + model_config: hyperparams.Config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds backbone from a config. + + Args: + input_specs: A `tf.keras.layers.InputSpec` of input. + model_config: A `OneOfConfig` of model config. + l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to + None. + + Returns: + A `tf.keras.Model` instance of the backbone. + """ + backbone_builder = registry.lookup(_REGISTERED_BACKBONE_CLS, + model_config.backbone.type) + + return backbone_builder(input_specs, model_config, l2_regularizer) diff --git a/official/vision/beta/modeling/backbones/factory_test.py b/official/vision/beta/modeling/backbones/factory_test.py new file mode 100644 index 0000000000000000000000000000000000000000..07cd23a4fe8de0883bfde45efea0c03853052e85 --- /dev/null +++ b/official/vision/beta/modeling/backbones/factory_test.py @@ -0,0 +1,200 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for factory functions.""" +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from official.vision.beta.configs import backbones as backbones_cfg +from official.vision.beta.configs import backbones_3d as backbones_3d_cfg +from official.vision.beta.configs import common as common_cfg +from official.vision.beta.configs import retinanet as retinanet_cfg +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling.backbones import factory + + +class FactoryTest(tf.test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.combine(model_id=[18, 34, 50, 101, 152],)) + def test_resnet_creation(self, model_id): + """Test creation of ResNet models.""" + + network = backbones.ResNet( + model_id=model_id, se_ratio=0.0, norm_momentum=0.99, norm_epsilon=1e-5) + + backbone_config = backbones_cfg.Backbone( + type='resnet', + resnet=backbones_cfg.ResNet(model_id=model_id, se_ratio=0.0)) + norm_activation_config = common_cfg.NormActivation( + norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) + model_config = retinanet_cfg.RetinaNet( + backbone=backbone_config, norm_activation=norm_activation_config) + + factory_network = factory.build_backbone( + input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), + model_config=model_config) + + network_config = network.get_config() + factory_network_config = factory_network.get_config() + + self.assertEqual(network_config, factory_network_config) + + @combinations.generate( + combinations.combine( + model_id=['b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'], + se_ratio=[0.0, 0.25], + )) + def test_efficientnet_creation(self, model_id, se_ratio): + """Test creation of EfficientNet models.""" + + network = backbones.EfficientNet( + model_id=model_id, + se_ratio=se_ratio, + norm_momentum=0.99, + norm_epsilon=1e-5) + + backbone_config = backbones_cfg.Backbone( + type='efficientnet', + efficientnet=backbones_cfg.EfficientNet( + model_id=model_id, se_ratio=se_ratio)) + norm_activation_config = common_cfg.NormActivation( + norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) + model_config = retinanet_cfg.RetinaNet( + backbone=backbone_config, norm_activation=norm_activation_config) + + factory_network = factory.build_backbone( + input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), + model_config=model_config) + + network_config = network.get_config() + factory_network_config = factory_network.get_config() + + self.assertEqual(network_config, factory_network_config) + + @combinations.generate( + combinations.combine( + model_id=['MobileNetV1', 'MobileNetV2', + 'MobileNetV3Large', 'MobileNetV3Small', + 'MobileNetV3EdgeTPU'], + filter_size_scale=[1.0, 0.75], + )) + def test_mobilenet_creation(self, model_id, filter_size_scale): + """Test creation of Mobilenet models.""" + + network = backbones.MobileNet( + model_id=model_id, + filter_size_scale=filter_size_scale, + norm_momentum=0.99, + norm_epsilon=1e-5) + + backbone_config = backbones_cfg.Backbone( + type='mobilenet', + mobilenet=backbones_cfg.MobileNet( + model_id=model_id, filter_size_scale=filter_size_scale)) + norm_activation_config = common_cfg.NormActivation( + norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) + model_config = retinanet_cfg.RetinaNet( + backbone=backbone_config, norm_activation=norm_activation_config) + + factory_network = factory.build_backbone( + input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), + model_config=model_config) + + network_config = network.get_config() + factory_network_config = factory_network.get_config() + + self.assertEqual(network_config, factory_network_config) + + @combinations.generate(combinations.combine(model_id=['49'],)) + def test_spinenet_creation(self, model_id): + """Test creation of SpineNet models.""" + input_size = 128 + min_level = 3 + max_level = 7 + + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size, input_size, 3]) + network = backbones.SpineNet( + input_specs=input_specs, + min_level=min_level, + max_level=max_level, + norm_momentum=0.99, + norm_epsilon=1e-5) + + backbone_config = backbones_cfg.Backbone( + type='spinenet', + spinenet=backbones_cfg.SpineNet(model_id=model_id)) + norm_activation_config = common_cfg.NormActivation( + norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) + model_config = retinanet_cfg.RetinaNet( + backbone=backbone_config, norm_activation=norm_activation_config) + + factory_network = factory.build_backbone( + input_specs=tf.keras.layers.InputSpec( + shape=[None, input_size, input_size, 3]), + model_config=model_config) + + network_config = network.get_config() + factory_network_config = factory_network.get_config() + + self.assertEqual(network_config, factory_network_config) + + @combinations.generate( + combinations.combine(model_id=[38, 56, 104],)) + def test_revnet_creation(self, model_id): + """Test creation of RevNet models.""" + network = backbones.RevNet( + model_id=model_id, norm_momentum=0.99, norm_epsilon=1e-5) + + backbone_config = backbones_cfg.Backbone( + type='revnet', + revnet=backbones_cfg.RevNet(model_id=model_id)) + norm_activation_config = common_cfg.NormActivation( + norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False) + model_config = retinanet_cfg.RetinaNet( + backbone=backbone_config, norm_activation=norm_activation_config) + + factory_network = factory.build_backbone( + input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), + model_config=model_config) + + network_config = network.get_config() + factory_network_config = factory_network.get_config() + + self.assertEqual(network_config, factory_network_config) + + @combinations.generate(combinations.combine(model_type=['resnet_3d'],)) + def test_resnet_3d_creation(self, model_type): + """Test creation of ResNet 3D models.""" + backbone_cfg = backbones_3d_cfg.Backbone3D(type=model_type).get() + temporal_strides = [] + temporal_kernel_sizes = [] + for block_spec in backbone_cfg.block_specs: + temporal_strides.append(block_spec.temporal_strides) + temporal_kernel_sizes.append(block_spec.temporal_kernel_sizes) + + _ = backbones.ResNet3D( + model_id=backbone_cfg.model_id, + temporal_strides=temporal_strides, + temporal_kernel_sizes=temporal_kernel_sizes, + norm_momentum=0.99, + norm_epsilon=1e-5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/mobilenet.py b/official/vision/beta/modeling/backbones/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..5ac16510683b79786d14e3ac1893dec739a52f7a --- /dev/null +++ b/official/vision/beta/modeling/backbones/mobilenet.py @@ -0,0 +1,786 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of MobileNet Networks.""" + +from typing import Optional, Dict, Any, Tuple + +# Import libraries +import dataclasses +import tensorflow as tf +from official.modeling import hyperparams +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks +from official.vision.beta.modeling.layers import nn_layers + +layers = tf.keras.layers + + +# pylint: disable=pointless-string-statement + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class Conv2DBNBlock(tf.keras.layers.Layer): + """A convolution block with batch normalization.""" + + def __init__( + self, + filters: int, + kernel_size: int = 3, + strides: int = 1, + use_bias: bool = False, + activation: str = 'relu6', + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + use_normalization: bool = True, + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + **kwargs): + """A convolution block with batch normalization. + + Args: + filters: An `int` number of filters for the first two convolutions. Note + that the third and final convolution will use 4 times as many filters. + kernel_size: An `int` specifying the height and width of the 2D + convolution window. + strides: An `int` of block stride. If greater than 1, this block will + ultimately downsample the input. + use_bias: If True, use bias in the convolution layer. + activation: A `str` name of the activation function. + kernel_initializer: A `str` for kernel initializer of convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + use_normalization: If True, use batch normalization. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + super(Conv2DBNBlock, self).__init__(**kwargs) + self._filters = filters + self._kernel_size = kernel_size + self._strides = strides + self._activation = activation + self._use_bias = use_bias + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._use_normalization = use_normalization + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + + def get_config(self): + config = { + 'filters': self._filters, + 'strides': self._strides, + 'kernel_size': self._kernel_size, + 'use_bias': self._use_bias, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'use_normalization': self._use_normalization, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + base_config = super(Conv2DBNBlock, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + self._conv0 = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=self._kernel_size, + strides=self._strides, + padding='same', + use_bias=self._use_bias, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + if self._use_normalization: + self._norm0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._activation_layer = tf_utils.get_activation( + self._activation, use_keras_layer=True) + + super(Conv2DBNBlock, self).build(input_shape) + + def call(self, inputs, training=None): + x = self._conv0(inputs) + if self._use_normalization: + x = self._norm0(x) + return self._activation_layer(x) + +""" +Architecture: https://arxiv.org/abs/1704.04861. + +"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision +Applications" Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, +Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam +""" +MNV1_BLOCK_SPECS = { + 'spec_name': 'MobileNetV1', + 'block_spec_schema': ['block_fn', 'kernel_size', 'strides', + 'filters', 'is_output'], + 'block_specs': [ + ('convbn', 3, 2, 32, False), + ('depsepconv', 3, 1, 64, False), + ('depsepconv', 3, 2, 128, False), + ('depsepconv', 3, 1, 128, True), + ('depsepconv', 3, 2, 256, False), + ('depsepconv', 3, 1, 256, True), + ('depsepconv', 3, 2, 512, False), + ('depsepconv', 3, 1, 512, False), + ('depsepconv', 3, 1, 512, False), + ('depsepconv', 3, 1, 512, False), + ('depsepconv', 3, 1, 512, False), + ('depsepconv', 3, 1, 512, True), + ('depsepconv', 3, 2, 1024, False), + ('depsepconv', 3, 1, 1024, True), + ] +} + +""" +Architecture: https://arxiv.org/abs/1801.04381 + +"MobileNetV2: Inverted Residuals and Linear Bottlenecks" +Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen +""" +MNV2_BLOCK_SPECS = { + 'spec_name': 'MobileNetV2', + 'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters', + 'expand_ratio', 'is_output'], + 'block_specs': [ + ('convbn', 3, 2, 32, None, False), + ('invertedbottleneck', 3, 1, 16, 1., False), + ('invertedbottleneck', 3, 2, 24, 6., False), + ('invertedbottleneck', 3, 1, 24, 6., True), + ('invertedbottleneck', 3, 2, 32, 6., False), + ('invertedbottleneck', 3, 1, 32, 6., False), + ('invertedbottleneck', 3, 1, 32, 6., True), + ('invertedbottleneck', 3, 2, 64, 6., False), + ('invertedbottleneck', 3, 1, 64, 6., False), + ('invertedbottleneck', 3, 1, 64, 6., False), + ('invertedbottleneck', 3, 1, 64, 6., False), + ('invertedbottleneck', 3, 1, 96, 6., False), + ('invertedbottleneck', 3, 1, 96, 6., False), + ('invertedbottleneck', 3, 1, 96, 6., True), + ('invertedbottleneck', 3, 2, 160, 6., False), + ('invertedbottleneck', 3, 1, 160, 6., False), + ('invertedbottleneck', 3, 1, 160, 6., False), + ('invertedbottleneck', 3, 1, 320, 6., True), + ('convbn', 1, 1, 1280, None, False), + ] +} + +""" +Architecture: https://arxiv.org/abs/1905.02244 + +"Searching for MobileNetV3" +Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, +Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam +""" +MNV3Large_BLOCK_SPECS = { + 'spec_name': 'MobileNetV3Large', + 'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters', + 'activation', 'se_ratio', 'expand_ratio', + 'use_normalization', 'use_bias', 'is_output'], + 'block_specs': [ + ('convbn', 3, 2, 16, + 'hard_swish', None, None, True, False, False), + ('invertedbottleneck', 3, 1, 16, + 'relu', None, 1., None, False, False), + ('invertedbottleneck', 3, 2, 24, + 'relu', None, 4., None, False, False), + ('invertedbottleneck', 3, 1, 24, + 'relu', None, 3., None, False, True), + ('invertedbottleneck', 5, 2, 40, + 'relu', 0.25, 3., None, False, False), + ('invertedbottleneck', 5, 1, 40, + 'relu', 0.25, 3., None, False, False), + ('invertedbottleneck', 5, 1, 40, + 'relu', 0.25, 3., None, False, True), + ('invertedbottleneck', 3, 2, 80, + 'hard_swish', None, 6., None, False, False), + ('invertedbottleneck', 3, 1, 80, + 'hard_swish', None, 2.5, None, False, False), + ('invertedbottleneck', 3, 1, 80, + 'hard_swish', None, 2.3, None, False, False), + ('invertedbottleneck', 3, 1, 80, + 'hard_swish', None, 2.3, None, False, False), + ('invertedbottleneck', 3, 1, 112, + 'hard_swish', 0.25, 6., None, False, False), + ('invertedbottleneck', 3, 1, 112, + 'hard_swish', 0.25, 6., None, False, True), + ('invertedbottleneck', 5, 2, 160, + 'hard_swish', 0.25, 6., None, False, False), + ('invertedbottleneck', 5, 1, 160, + 'hard_swish', 0.25, 6., None, False, False), + ('invertedbottleneck', 5, 1, 160, + 'hard_swish', 0.25, 6., None, False, True), + ('convbn', 1, 1, 960, + 'hard_swish', None, None, True, False, False), + ('gpooling', None, None, None, + None, None, None, None, None, False), + ('convbn', 1, 1, 1280, + 'hard_swish', None, None, False, True, False), + ] +} + +MNV3Small_BLOCK_SPECS = { + 'spec_name': 'MobileNetV3Small', + 'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters', + 'activation', 'se_ratio', 'expand_ratio', + 'use_normalization', 'use_bias', 'is_output'], + 'block_specs': [ + ('convbn', 3, 2, 16, + 'hard_swish', None, None, True, False, False), + ('invertedbottleneck', 3, 2, 16, + 'relu', 0.25, 1, None, False, True), + ('invertedbottleneck', 3, 2, 24, + 'relu', None, 72. / 16, None, False, False), + ('invertedbottleneck', 3, 1, 24, + 'relu', None, 88. / 24, None, False, True), + ('invertedbottleneck', 5, 2, 40, + 'hard_swish', 0.25, 4., None, False, False), + ('invertedbottleneck', 5, 1, 40, + 'hard_swish', 0.25, 6., None, False, False), + ('invertedbottleneck', 5, 1, 40, + 'hard_swish', 0.25, 6., None, False, False), + ('invertedbottleneck', 5, 1, 48, + 'hard_swish', 0.25, 3., None, False, False), + ('invertedbottleneck', 5, 1, 48, + 'hard_swish', 0.25, 3., None, False, True), + ('invertedbottleneck', 5, 2, 96, + 'hard_swish', 0.25, 6., None, False, False), + ('invertedbottleneck', 5, 1, 96, + 'hard_swish', 0.25, 6., None, False, False), + ('invertedbottleneck', 5, 1, 96, + 'hard_swish', 0.25, 6., None, False, True), + ('convbn', 1, 1, 576, + 'hard_swish', None, None, True, False, False), + ('gpooling', None, None, None, + None, None, None, None, None, False), + ('convbn', 1, 1, 1024, + 'hard_swish', None, None, False, True, False), + ] +} + +""" +The EdgeTPU version is taken from +github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v3.py +""" +MNV3EdgeTPU_BLOCK_SPECS = { + 'spec_name': 'MobileNetV3EdgeTPU', + 'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters', + 'activation', 'se_ratio', 'expand_ratio', + 'use_residual', 'use_depthwise', 'is_output'], + 'block_specs': [ + ('convbn', 3, 2, 32, 'relu', None, None, None, None, False), + ('invertedbottleneck', 3, 1, 16, 'relu', None, 1., True, False, False), + ('invertedbottleneck', 3, 2, 32, 'relu', None, 8., True, False, False), + ('invertedbottleneck', 3, 1, 32, 'relu', None, 4., True, False, False), + ('invertedbottleneck', 3, 1, 32, 'relu', None, 4., True, False, False), + ('invertedbottleneck', 3, 1, 32, 'relu', None, 4., True, False, True), + ('invertedbottleneck', 3, 2, 48, 'relu', None, 8., True, False, False), + ('invertedbottleneck', 3, 1, 48, 'relu', None, 4., True, False, False), + ('invertedbottleneck', 3, 1, 48, 'relu', None, 4., True, False, False), + ('invertedbottleneck', 3, 1, 48, 'relu', None, 4., True, False, True), + ('invertedbottleneck', 3, 2, 96, 'relu', None, 8., True, True, False), + ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 3, 1, 96, 'relu', None, 8., False, True, False), + ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 3, 1, 96, 'relu', None, 4., True, True, True), + ('invertedbottleneck', 5, 2, 160, 'relu', None, 8., True, True, False), + ('invertedbottleneck', 5, 1, 160, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 5, 1, 160, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 5, 1, 160, 'relu', None, 4., True, True, False), + ('invertedbottleneck', 3, 1, 192, 'relu', None, 8., True, True, True), + ('convbn', 1, 1, 1280, 'relu', None, None, None, None, False), + ] +} + +""" +Architecture: https://arxiv.org/pdf/2008.08178.pdf + +"Discovering Multi-Hardware Mobile Models via Architecture Search" +Grace Chu, Okan Arikan, Gabriel Bender, Weijun Wang, +Achille Brighton, Pieter-Jan Kindermans, Hanxiao Liu, +Berkin Akin, Suyog Gupta, and Andrew Howard +""" +MNMultiMAX_BLOCK_SPECS = { + 'spec_name': 'MobileNetMultiMAX', + 'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters', + 'activation', 'expand_ratio', + 'use_normalization', 'use_bias', 'is_output'], + 'block_specs': [ + ('convbn', 3, 2, 32, 'relu', None, True, False, False), + ('invertedbottleneck', 3, 2, 32, 'relu', 3., None, False, True), + ('invertedbottleneck', 5, 2, 64, 'relu', 6., None, False, False), + ('invertedbottleneck', 3, 1, 64, 'relu', 2., None, False, False), + ('invertedbottleneck', 3, 1, 64, 'relu', 2., None, False, True), + ('invertedbottleneck', 5, 2, 128, 'relu', 6., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 4., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 6., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, True), + ('invertedbottleneck', 3, 2, 160, 'relu', 6., None, False, False), + ('invertedbottleneck', 5, 1, 160, 'relu', 4., None, False, False), + ('invertedbottleneck', 3, 1, 160, 'relu', 5., None, False, False), + ('invertedbottleneck', 5, 1, 160, 'relu', 4., None, False, True), + ('convbn', 1, 1, 960, 'relu', None, True, False, False), + ('gpooling', None, None, None, None, None, None, None, False), + ('convbn', 1, 1, 1280, 'relu', None, False, True, False), + ] +} + +MNMultiAVG_BLOCK_SPECS = { + 'spec_name': 'MobileNetMultiAVG', + 'block_spec_schema': ['block_fn', 'kernel_size', 'strides', 'filters', + 'activation', 'expand_ratio', + 'use_normalization', 'use_bias', 'is_output'], + 'block_specs': [ + ('convbn', 3, 2, 32, 'relu', None, True, False, False), + ('invertedbottleneck', 3, 2, 32, 'relu', 3., None, False, False), + ('invertedbottleneck', 3, 1, 32, 'relu', 2., None, False, True), + ('invertedbottleneck', 5, 2, 64, 'relu', 5., None, False, False), + ('invertedbottleneck', 3, 1, 64, 'relu', 3., None, False, False), + ('invertedbottleneck', 3, 1, 64, 'relu', 2., None, False, False), + ('invertedbottleneck', 3, 1, 64, 'relu', 3., None, False, True), + ('invertedbottleneck', 5, 2, 128, 'relu', 6., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False), + ('invertedbottleneck', 3, 1, 128, 'relu', 3., None, False, False), + ('invertedbottleneck', 3, 1, 160, 'relu', 6., None, False, False), + ('invertedbottleneck', 3, 1, 160, 'relu', 4., None, False, True), + ('invertedbottleneck', 3, 2, 192, 'relu', 6., None, False, False), + ('invertedbottleneck', 5, 1, 192, 'relu', 4., None, False, False), + ('invertedbottleneck', 5, 1, 192, 'relu', 4., None, False, False), + ('invertedbottleneck', 5, 1, 192, 'relu', 4., None, False, True), + ('convbn', 1, 1, 960, 'relu', None, True, False, False), + ('gpooling', None, None, None, None, None, None, None, False), + ('convbn', 1, 1, 1280, 'relu', None, False, True, False), + ] +} + +SUPPORTED_SPECS_MAP = { + 'MobileNetV1': MNV1_BLOCK_SPECS, + 'MobileNetV2': MNV2_BLOCK_SPECS, + 'MobileNetV3Large': MNV3Large_BLOCK_SPECS, + 'MobileNetV3Small': MNV3Small_BLOCK_SPECS, + 'MobileNetV3EdgeTPU': MNV3EdgeTPU_BLOCK_SPECS, + 'MobileNetMultiMAX': MNMultiMAX_BLOCK_SPECS, + 'MobileNetMultiAVG': MNMultiAVG_BLOCK_SPECS, +} + + +@dataclasses.dataclass +class BlockSpec(hyperparams.Config): + """A container class that specifies the block configuration for MobileNet.""" + + block_fn: str = 'convbn' + kernel_size: int = 3 + strides: int = 1 + filters: int = 32 + use_bias: bool = False + use_normalization: bool = True + activation: str = 'relu6' + # Used for block type InvertedResConv. + expand_ratio: Optional[float] = 6. + # Used for block type InvertedResConv with SE. + se_ratio: Optional[float] = None + use_depthwise: bool = True + use_residual: bool = True + is_output: bool = True + + +def block_spec_decoder( + specs: Dict[Any, Any], + filter_size_scale: float, + # Set to 1 for mobilenetv1. + divisible_by: int = 8, + finegrain_classification_mode: bool = True): + """Decodes specs for a block. + + Args: + specs: A `dict` specification of block specs of a mobilenet version. + filter_size_scale: A `float` multiplier for the filter size for all + convolution ops. The value must be greater than zero. Typical usage will + be to set this value in (0, 1) to reduce the number of parameters or + computation cost of the model. + divisible_by: An `int` that ensures all inner dimensions are divisible by + this number. + finegrain_classification_mode: If True, the model will keep the last layer + large even for small multipliers, following + https://arxiv.org/abs/1801.04381. + + Returns: + A list of `BlockSpec` that defines structure of the base network. + """ + + spec_name = specs['spec_name'] + block_spec_schema = specs['block_spec_schema'] + block_specs = specs['block_specs'] + + if not block_specs: + raise ValueError( + 'The block spec cannot be empty for {} !'.format(spec_name)) + + if len(block_specs[0]) != len(block_spec_schema): + raise ValueError('The block spec values {} do not match with ' + 'the schema {}'.format(block_specs[0], block_spec_schema)) + + decoded_specs = [] + + for s in block_specs: + kw_s = dict(zip(block_spec_schema, s)) + decoded_specs.append(BlockSpec(**kw_s)) + + # This adjustment applies to V2 and V3 + if (spec_name != 'MobileNetV1' + and finegrain_classification_mode + and filter_size_scale < 1.0): + decoded_specs[-1].filters /= filter_size_scale # pytype: disable=annotation-type-mismatch + + for ds in decoded_specs: + if ds.filters: + ds.filters = nn_layers.round_filters(filters=ds.filters, + multiplier=filter_size_scale, + divisor=divisible_by, + min_depth=8) + + return decoded_specs + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class MobileNet(tf.keras.Model): + """Creates a MobileNet family model.""" + + def __init__( + self, + model_id: str = 'MobileNetV2', + filter_size_scale: float = 1.0, + input_specs: tf.keras.layers.InputSpec = layers.InputSpec( + shape=[None, None, None, 3]), + # The followings are for hyper-parameter tuning. + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + # The followings should be kept the same most of the times. + output_stride: int = None, + min_depth: int = 8, + # divisible is not used in MobileNetV1. + divisible_by: int = 8, + stochastic_depth_drop_rate: float = 0.0, + regularize_depthwise: bool = False, + use_sync_bn: bool = False, + # finegrain is not used in MobileNetV1. + finegrain_classification_mode: bool = True, + **kwargs): + """Initializes a MobileNet model. + + Args: + model_id: A `str` of MobileNet version. The supported values are + `MobileNetV1`, `MobileNetV2`, `MobileNetV3Large`, `MobileNetV3Small`, + and `MobileNetV3EdgeTPU`. + filter_size_scale: A `float` of multiplier for the filters (number of + channels) for all convolution ops. The value must be greater than zero. + Typical usage will be to set this value in (0, 1) to reduce the number + of parameters or computation cost of the model. + input_specs: A `tf.keras.layers.InputSpec` of specs of the input tensor. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_initializer: A `str` for kernel initializer of convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + output_stride: An `int` that specifies the requested ratio of input to + output spatial resolution. If not None, then we invoke atrous + convolution if necessary to prevent the network from reducing the + spatial resolution of activation maps. Allowed values are 8 (accurate + fully convolutional mode), 16 (fast fully convolutional mode), 32 + (classification mode). + min_depth: An `int` of minimum depth (number of channels) for all + convolution ops. Enforced when filter_size_scale < 1, and not an active + constraint when filter_size_scale >= 1. + divisible_by: An `int` that ensures all inner dimensions are divisible by + this number. + stochastic_depth_drop_rate: A `float` of drop rate for drop connect layer. + regularize_depthwise: If Ture, apply regularization on depthwise. + use_sync_bn: If True, use synchronized batch normalization. + finegrain_classification_mode: If True, the model will keep the last layer + large even for small multipliers, following + https://arxiv.org/abs/1801.04381. + **kwargs: Additional keyword arguments to be passed. + """ + if model_id not in SUPPORTED_SPECS_MAP: + raise ValueError('The MobileNet version {} ' + 'is not supported'.format(model_id)) + + if filter_size_scale <= 0: + raise ValueError('filter_size_scale is not greater than zero.') + + if output_stride is not None: + if model_id == 'MobileNetV1': + if output_stride not in [8, 16, 32]: + raise ValueError('Only allowed output_stride values are 8, 16, 32.') + else: + if output_stride == 0 or (output_stride > 1 and output_stride % 2): + raise ValueError('Output stride must be None, 1 or a multiple of 2.') + + self._model_id = model_id + self._input_specs = input_specs + self._filter_size_scale = filter_size_scale + self._min_depth = min_depth + self._output_stride = output_stride + self._divisible_by = divisible_by + self._stochastic_depth_drop_rate = stochastic_depth_drop_rate + self._regularize_depthwise = regularize_depthwise + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._finegrain_classification_mode = finegrain_classification_mode + + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + + block_specs = SUPPORTED_SPECS_MAP.get(model_id) + self._decoded_specs = block_spec_decoder( + specs=block_specs, + filter_size_scale=self._filter_size_scale, + divisible_by=self._get_divisible_by(), + finegrain_classification_mode=self._finegrain_classification_mode) + + x, endpoints, next_endpoint_level = self._mobilenet_base(inputs=inputs) + + endpoints[str(next_endpoint_level)] = x + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + + super(MobileNet, self).__init__( + inputs=inputs, outputs=endpoints, **kwargs) + + def _get_divisible_by(self): + if self._model_id == 'MobileNetV1': + return 1 + else: + return self._divisible_by + + def _mobilenet_base(self, + inputs: tf.Tensor + ) -> Tuple[tf.Tensor, Dict[str, tf.Tensor], int]: + """Builds the base MobileNet architecture. + + Args: + inputs: A `tf.Tensor` of shape `[batch_size, height, width, channels]`. + + Returns: + A tuple of output Tensor and dictionary that collects endpoints. + """ + + input_shape = inputs.get_shape().as_list() + if len(input_shape) != 4: + raise ValueError('Expected rank 4 input, was: %d' % len(input_shape)) + + # The current_stride variable keeps track of the output stride of the + # activations, i.e., the running product of convolution strides up to the + # current network layer. This allows us to invoke atrous convolution + # whenever applying the next convolution would result in the activations + # having output stride larger than the target output_stride. + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + net = inputs + endpoints = {} + endpoint_level = 2 + for i, block_def in enumerate(self._decoded_specs): + block_name = 'block_group_{}_{}'.format(block_def.block_fn, i) + # A small catch for gpooling block with None strides + if not block_def.strides: + block_def.strides = 1 + if (self._output_stride is not None and + current_stride == self._output_stride): + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + layer_stride = 1 + layer_rate = rate + rate *= block_def.strides + else: + layer_stride = block_def.strides + layer_rate = 1 + current_stride *= block_def.strides + + if block_def.block_fn == 'convbn': + + net = Conv2DBNBlock( + filters=block_def.filters, + kernel_size=block_def.kernel_size, + strides=block_def.strides, + activation=block_def.activation, + use_bias=block_def.use_bias, + use_normalization=block_def.use_normalization, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon + )(net) + + elif block_def.block_fn == 'depsepconv': + net = nn_blocks.DepthwiseSeparableConvBlock( + filters=block_def.filters, + kernel_size=block_def.kernel_size, + strides=block_def.strides, + activation=block_def.activation, + dilation_rate=layer_rate, + regularize_depthwise=self._regularize_depthwise, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon, + )(net) + + elif block_def.block_fn == 'invertedbottleneck': + use_rate = rate + if layer_rate > 1 and block_def.kernel_size != 1: + # We will apply atrous rate in the following cases: + # 1) When kernel_size is not in params, the operation then uses + # default kernel size 3x3. + # 2) When kernel_size is in params, and if the kernel_size is not + # equal to (1, 1) (there is no need to apply atrous convolution to + # any 1x1 convolution). + use_rate = layer_rate + in_filters = net.shape.as_list()[-1] + net = nn_blocks.InvertedBottleneckBlock( + in_filters=in_filters, + out_filters=block_def.filters, + kernel_size=block_def.kernel_size, + strides=layer_stride, + expand_ratio=block_def.expand_ratio, + se_ratio=block_def.se_ratio, + expand_se_in_filters=True, + se_gating_activation='hard_sigmoid', + activation=block_def.activation, + use_depthwise=block_def.use_depthwise, + use_residual=block_def.use_residual, + dilation_rate=use_rate, + regularize_depthwise=self._regularize_depthwise, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon, + stochastic_depth_drop_rate=self._stochastic_depth_drop_rate, + divisible_by=self._get_divisible_by() + )(net) + + elif block_def.block_fn == 'gpooling': + net = layers.GlobalAveragePooling2D()(net) + net = layers.Reshape((1, 1, net.shape[1]))(net) + + else: + raise ValueError('Unknown block type {} for layer {}'.format( + block_def.block_fn, i)) + + net = tf.keras.layers.Activation('linear', name=block_name)(net) + + if block_def.is_output: + endpoints[str(endpoint_level)] = net + endpoint_level += 1 + + return net, endpoints, endpoint_level + + def get_config(self): + config_dict = { + 'model_id': self._model_id, + 'filter_size_scale': self._filter_size_scale, + 'min_depth': self._min_depth, + 'output_stride': self._output_stride, + 'divisible_by': self._divisible_by, + 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, + 'regularize_depthwise': self._regularize_depthwise, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'finegrain_classification_mode': self._finegrain_classification_mode, + } + return config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self): + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('mobilenet') +def build_mobilenet( + input_specs: tf.keras.layers.InputSpec, + model_config: hyperparams.Config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds MobileNet backbone from a config.""" + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'mobilenet', (f'Inconsistent backbone type ' + f'{backbone_type}') + + return MobileNet( + model_id=backbone_cfg.model_id, + filter_size_scale=backbone_cfg.filter_size_scale, + input_specs=input_specs, + stochastic_depth_drop_rate=backbone_cfg.stochastic_depth_drop_rate, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) diff --git a/official/vision/beta/modeling/backbones/mobilenet_test.py b/official/vision/beta/modeling/backbones/mobilenet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..328bf35c71f8b742b6a512428407b3e07da5437f --- /dev/null +++ b/official/vision/beta/modeling/backbones/mobilenet_test.py @@ -0,0 +1,177 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for MobileNet.""" + +import itertools +# Import libraries + +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import mobilenet + + +class MobileNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + 'MobileNetV1', + 'MobileNetV2', + 'MobileNetV3Large', + 'MobileNetV3Small', + 'MobileNetV3EdgeTPU', + 'MobileNetMultiAVG', + 'MobileNetMultiMAX', + ) + def test_serialize_deserialize(self, model_id): + # Create a network object that sets all of its config options. + kwargs = dict( + model_id=model_id, + filter_size_scale=1.0, + stochastic_depth_drop_rate=None, + use_sync_bn=False, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + norm_momentum=0.99, + norm_epsilon=0.001, + output_stride=None, + min_depth=8, + divisible_by=8, + regularize_depthwise=False, + finegrain_classification_mode=True + ) + network = mobilenet.MobileNet(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = mobilenet.MobileNet.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + @parameterized.parameters( + itertools.product( + [1, 3], + [ + 'MobileNetV1', + 'MobileNetV2', + 'MobileNetV3Large', + 'MobileNetV3Small', + 'MobileNetV3EdgeTPU', + 'MobileNetMultiAVG', + 'MobileNetMultiMAX', + ], + )) + def test_input_specs(self, input_dim, model_id): + """Test different input feature dimensions.""" + tf.keras.backend.set_image_data_format('channels_last') + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) + network = mobilenet.MobileNet(model_id=model_id, input_specs=input_specs) + + inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1) + _ = network(inputs) + + @parameterized.parameters( + itertools.product( + [ + 'MobileNetV1', + 'MobileNetV2', + 'MobileNetV3Large', + 'MobileNetV3Small', + 'MobileNetV3EdgeTPU', + 'MobileNetMultiAVG', + 'MobileNetMultiMAX', + ], + [32, 224], + )) + def test_mobilenet_creation(self, model_id, + input_size): + """Test creation of MobileNet family models.""" + tf.keras.backend.set_image_data_format('channels_last') + + mobilenet_layers = { + # The number of filters of layers having outputs been collected + # for filter_size_scale = 1.0 + 'MobileNetV1': [128, 256, 512, 1024], + 'MobileNetV2': [24, 32, 96, 320], + 'MobileNetV3Small': [16, 24, 48, 96], + 'MobileNetV3Large': [24, 40, 112, 160], + 'MobileNetV3EdgeTPU': [32, 48, 96, 192], + 'MobileNetMultiMAX': [32, 64, 128, 160], + 'MobileNetMultiAVG': [32, 64, 160, 192], + } + + network = mobilenet.MobileNet(model_id=model_id, + filter_size_scale=1.0) + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + + for idx, num_filter in enumerate(mobilenet_layers[model_id]): + self.assertAllEqual( + [1, input_size / 2 ** (idx+2), input_size / 2 ** (idx+2), num_filter], + endpoints[str(idx+2)].shape.as_list()) + + @parameterized.parameters( + itertools.product( + [ + 'MobileNetV1', + 'MobileNetV2', + 'MobileNetV3Large', + 'MobileNetV3Small', + 'MobileNetV3EdgeTPU', + 'MobileNetMultiAVG', + 'MobileNetMultiMAX', + ], + [1.0, 0.75], + )) + def test_mobilenet_scaling(self, model_id, + filter_size_scale): + """Test for creation of a MobileNet classifier.""" + mobilenet_params = { + ('MobileNetV1', 1.0): 3228864, + ('MobileNetV1', 0.75): 1832976, + ('MobileNetV2', 1.0): 2257984, + ('MobileNetV2', 0.75): 1382064, + ('MobileNetV3Large', 1.0): 4226432, + ('MobileNetV3Large', 0.75): 2731616, + ('MobileNetV3Small', 1.0): 1529968, + ('MobileNetV3Small', 0.75): 1026552, + ('MobileNetV3EdgeTPU', 1.0): 2849312, + ('MobileNetV3EdgeTPU', 0.75): 1737288, + ('MobileNetMultiAVG', 1.0): 3700576, + ('MobileNetMultiAVG', 0.75): 2345864, + ('MobileNetMultiMAX', 1.0): 3170720, + ('MobileNetMultiMAX', 0.75): 2041976, + } + + input_size = 224 + network = mobilenet.MobileNet(model_id=model_id, + filter_size_scale=filter_size_scale) + self.assertEqual(network.count_params(), + mobilenet_params[(model_id, filter_size_scale)]) + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + _ = network(inputs) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/resnet.py b/official/vision/beta/modeling/backbones/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..abb2900adb0c6b4a095101753ca6f3ec1cb31165 --- /dev/null +++ b/official/vision/beta/modeling/backbones/resnet.py @@ -0,0 +1,397 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of ResNet and ResNet-RS models.""" + +from typing import Callable, Optional + +# Import libraries +import tensorflow as tf + +from official.modeling import hyperparams +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks +from official.vision.beta.modeling.layers import nn_layers + +layers = tf.keras.layers + +# Specifications for different ResNet variants. +# Each entry specifies block configurations of the particular ResNet variant. +# Each element in the block configuration is in the following format: +# (block_fn, num_filters, block_repeats) +RESNET_SPECS = { + 18: [ + ('residual', 64, 2), + ('residual', 128, 2), + ('residual', 256, 2), + ('residual', 512, 2), + ], + 34: [ + ('residual', 64, 3), + ('residual', 128, 4), + ('residual', 256, 6), + ('residual', 512, 3), + ], + 50: [ + ('bottleneck', 64, 3), + ('bottleneck', 128, 4), + ('bottleneck', 256, 6), + ('bottleneck', 512, 3), + ], + 101: [ + ('bottleneck', 64, 3), + ('bottleneck', 128, 4), + ('bottleneck', 256, 23), + ('bottleneck', 512, 3), + ], + 152: [ + ('bottleneck', 64, 3), + ('bottleneck', 128, 8), + ('bottleneck', 256, 36), + ('bottleneck', 512, 3), + ], + 200: [ + ('bottleneck', 64, 3), + ('bottleneck', 128, 24), + ('bottleneck', 256, 36), + ('bottleneck', 512, 3), + ], + 270: [ + ('bottleneck', 64, 4), + ('bottleneck', 128, 29), + ('bottleneck', 256, 53), + ('bottleneck', 512, 4), + ], + 350: [ + ('bottleneck', 64, 4), + ('bottleneck', 128, 36), + ('bottleneck', 256, 72), + ('bottleneck', 512, 4), + ], + 420: [ + ('bottleneck', 64, 4), + ('bottleneck', 128, 44), + ('bottleneck', 256, 87), + ('bottleneck', 512, 4), + ], +} + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ResNet(tf.keras.Model): + """Creates ResNet and ResNet-RS family models. + + This implements the Deep Residual Network from: + Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. + Deep Residual Learning for Image Recognition. + (https://arxiv.org/pdf/1512.03385) and + Irwan Bello, William Fedus, Xianzhi Du, Ekin D. Cubuk, Aravind Srinivas, + Tsung-Yi Lin, Jonathon Shlens, Barret Zoph. + Revisiting ResNets: Improved Training and Scaling Strategies. + (https://arxiv.org/abs/2103.07579). + """ + + def __init__( + self, + model_id: int, + input_specs: tf.keras.layers.InputSpec = layers.InputSpec( + shape=[None, None, None, 3]), + depth_multiplier: float = 1.0, + stem_type: str = 'v0', + resnetd_shortcut: bool = False, + replace_stem_max_pool: bool = False, + se_ratio: Optional[float] = None, + init_stochastic_depth_rate: float = 0.0, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a ResNet model. + + Args: + model_id: An `int` of the depth of ResNet backbone model. + input_specs: A `tf.keras.layers.InputSpec` of the input tensor. + depth_multiplier: A `float` of the depth multiplier to uniformaly scale up + all layers in channel size. This argument is also referred to as + `width_multiplier` in (https://arxiv.org/abs/2103.07579). + stem_type: A `str` of stem type of ResNet. Default to `v0`. If set to + `v1`, use ResNet-D type stem (https://arxiv.org/abs/1812.01187). + resnetd_shortcut: A `bool` of whether to use ResNet-D shortcut in + downsampling blocks. + replace_stem_max_pool: A `bool` of whether to replace the max pool in stem + with a stride-2 conv, + se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. + init_stochastic_depth_rate: A `float` of initial stochastic depth rate. + activation: A `str` name of the activation function. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A small `float` added to variance to avoid dividing by zero. + kernel_initializer: A str for kernel initializer of convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + **kwargs: Additional keyword arguments to be passed. + """ + self._model_id = model_id + self._input_specs = input_specs + self._depth_multiplier = depth_multiplier + self._stem_type = stem_type + self._resnetd_shortcut = resnetd_shortcut + self._replace_stem_max_pool = replace_stem_max_pool + self._se_ratio = se_ratio + self._init_stochastic_depth_rate = init_stochastic_depth_rate + self._use_sync_bn = use_sync_bn + self._activation = activation + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + if use_sync_bn: + self._norm = layers.experimental.SyncBatchNormalization + else: + self._norm = layers.BatchNormalization + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + + if tf.keras.backend.image_data_format() == 'channels_last': + bn_axis = -1 + else: + bn_axis = 1 + + # Build ResNet. + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + + if stem_type == 'v0': + x = layers.Conv2D( + filters=int(64 * self._depth_multiplier), + kernel_size=7, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation, use_keras_layer=True)(x) + elif stem_type == 'v1': + x = layers.Conv2D( + filters=int(32 * self._depth_multiplier), + kernel_size=3, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation, use_keras_layer=True)(x) + x = layers.Conv2D( + filters=int(32 * self._depth_multiplier), + kernel_size=3, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation, use_keras_layer=True)(x) + x = layers.Conv2D( + filters=int(64 * self._depth_multiplier), + kernel_size=3, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation, use_keras_layer=True)(x) + else: + raise ValueError('Stem type {} not supported.'.format(stem_type)) + + if replace_stem_max_pool: + x = layers.Conv2D( + filters=int(64 * self._depth_multiplier), + kernel_size=3, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation, use_keras_layer=True)(x) + else: + x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) + + endpoints = {} + for i, spec in enumerate(RESNET_SPECS[model_id]): + if spec[0] == 'residual': + block_fn = nn_blocks.ResidualBlock + elif spec[0] == 'bottleneck': + block_fn = nn_blocks.BottleneckBlock + else: + raise ValueError('Block fn `{}` is not supported.'.format(spec[0])) + x = self._block_group( + inputs=x, + filters=int(spec[1] * self._depth_multiplier), + strides=(1 if i == 0 else 2), + block_fn=block_fn, + block_repeats=spec[2], + stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( + self._init_stochastic_depth_rate, i + 2, 5), + name='block_group_l{}'.format(i + 2)) + endpoints[str(i + 2)] = x + + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + + super(ResNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs) + + def _block_group(self, + inputs: tf.Tensor, + filters: int, + strides: int, + block_fn: Callable[..., tf.keras.layers.Layer], + block_repeats: int = 1, + stochastic_depth_drop_rate: float = 0.0, + name: str = 'block_group'): + """Creates one group of blocks for the ResNet model. + + Args: + inputs: A `tf.Tensor` of size `[batch, channels, height, width]`. + filters: An `int` number of filters for the first convolution of the + layer. + strides: An `int` stride to use for the first convolution of the layer. + If greater than 1, this layer will downsample the input. + block_fn: The type of block group. Either `nn_blocks.ResidualBlock` or + `nn_blocks.BottleneckBlock`. + block_repeats: An `int` number of blocks contained in the layer. + stochastic_depth_drop_rate: A `float` of drop rate of the current block + group. + name: A `str` name for the block. + + Returns: + The output `tf.Tensor` of the block layer. + """ + x = block_fn( + filters=filters, + strides=strides, + use_projection=True, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + se_ratio=self._se_ratio, + resnetd_shortcut=self._resnetd_shortcut, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + inputs) + + for _ in range(1, block_repeats): + x = block_fn( + filters=filters, + strides=1, + use_projection=False, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + se_ratio=self._se_ratio, + resnetd_shortcut=self._resnetd_shortcut, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + x) + + return tf.keras.layers.Activation('linear', name=name)(x) + + def get_config(self): + config_dict = { + 'model_id': self._model_id, + 'depth_multiplier': self._depth_multiplier, + 'stem_type': self._stem_type, + 'resnetd_shortcut': self._resnetd_shortcut, + 'replace_stem_max_pool': self._replace_stem_max_pool, + 'activation': self._activation, + 'se_ratio': self._se_ratio, + 'init_stochastic_depth_rate': self._init_stochastic_depth_rate, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + } + return config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self): + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('resnet') +def build_resnet( + input_specs: tf.keras.layers.InputSpec, + model_config: hyperparams.Config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds ResNet backbone from a config.""" + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'resnet', (f'Inconsistent backbone type ' + f'{backbone_type}') + + return ResNet( + model_id=backbone_cfg.model_id, + input_specs=input_specs, + depth_multiplier=backbone_cfg.depth_multiplier, + stem_type=backbone_cfg.stem_type, + resnetd_shortcut=backbone_cfg.resnetd_shortcut, + replace_stem_max_pool=backbone_cfg.replace_stem_max_pool, + se_ratio=backbone_cfg.se_ratio, + init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) diff --git a/official/vision/beta/modeling/backbones/resnet_3d.py b/official/vision/beta/modeling/backbones/resnet_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..cecb4fa9aa6cf6e392ec40ffaba4067c6c2940b0 --- /dev/null +++ b/official/vision/beta/modeling/backbones/resnet_3d.py @@ -0,0 +1,452 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of 3D Residual Networks.""" +from typing import Callable, List, Tuple, Optional + +# Import libraries +import tensorflow as tf + +from official.modeling import hyperparams +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks_3d +from official.vision.beta.modeling.layers import nn_layers + +layers = tf.keras.layers + +RESNET_SPECS = { + 50: [ + ('bottleneck3d', 64, 3), + ('bottleneck3d', 128, 4), + ('bottleneck3d', 256, 6), + ('bottleneck3d', 512, 3), + ], + 101: [ + ('bottleneck3d', 64, 3), + ('bottleneck3d', 128, 4), + ('bottleneck3d', 256, 23), + ('bottleneck3d', 512, 3), + ], + 152: [ + ('bottleneck3d', 64, 3), + ('bottleneck3d', 128, 8), + ('bottleneck3d', 256, 36), + ('bottleneck3d', 512, 3), + ], + 200: [ + ('bottleneck3d', 64, 3), + ('bottleneck3d', 128, 24), + ('bottleneck3d', 256, 36), + ('bottleneck3d', 512, 3), + ], + 270: [ + ('bottleneck3d', 64, 4), + ('bottleneck3d', 128, 29), + ('bottleneck3d', 256, 53), + ('bottleneck3d', 512, 4), + ], + 300: [ + ('bottleneck3d', 64, 4), + ('bottleneck3d', 128, 36), + ('bottleneck3d', 256, 54), + ('bottleneck3d', 512, 4), + ], + 350: [ + ('bottleneck3d', 64, 4), + ('bottleneck3d', 128, 36), + ('bottleneck3d', 256, 72), + ('bottleneck3d', 512, 4), + ], +} + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ResNet3D(tf.keras.Model): + """Creates a 3D ResNet family model.""" + + def __init__( + self, + model_id: int, + temporal_strides: List[int], + temporal_kernel_sizes: List[Tuple[int]], + use_self_gating: List[int] = None, + input_specs: tf.keras.layers.InputSpec = layers.InputSpec( + shape=[None, None, None, None, 3]), + stem_type: str = 'v0', + stem_conv_temporal_kernel_size: int = 5, + stem_conv_temporal_stride: int = 2, + stem_pool_temporal_stride: int = 2, + init_stochastic_depth_rate: float = 0.0, + activation: str = 'relu', + se_ratio: Optional[float] = None, + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a 3D ResNet model. + + Args: + model_id: An `int` of depth of ResNet backbone model. + temporal_strides: A list of integers that specifies the temporal strides + for all 3d blocks. + temporal_kernel_sizes: A list of tuples that specifies the temporal kernel + sizes for all 3d blocks in different block groups. + use_self_gating: A list of booleans to specify applying self-gating module + or not in each block group. If None, self-gating is not applied. + input_specs: A `tf.keras.layers.InputSpec` of the input tensor. + stem_type: A `str` of stem type of ResNet. Default to `v0`. If set to + `v1`, use ResNet-D type stem (https://arxiv.org/abs/1812.01187). + stem_conv_temporal_kernel_size: An `int` of temporal kernel size for the + first conv layer. + stem_conv_temporal_stride: An `int` of temporal stride for the first conv + layer. + stem_pool_temporal_stride: An `int` of temporal stride for the first pool + layer. + init_stochastic_depth_rate: A `float` of initial stochastic depth rate. + activation: A `str` of name of the activation function. + se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_initializer: A str for kernel initializer of convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + **kwargs: Additional keyword arguments to be passed. + """ + self._model_id = model_id + self._temporal_strides = temporal_strides + self._temporal_kernel_sizes = temporal_kernel_sizes + self._input_specs = input_specs + self._stem_type = stem_type + self._stem_conv_temporal_kernel_size = stem_conv_temporal_kernel_size + self._stem_conv_temporal_stride = stem_conv_temporal_stride + self._stem_pool_temporal_stride = stem_pool_temporal_stride + self._use_self_gating = use_self_gating + self._se_ratio = se_ratio + self._init_stochastic_depth_rate = init_stochastic_depth_rate + self._use_sync_bn = use_sync_bn + self._activation = activation + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + if use_sync_bn: + self._norm = layers.experimental.SyncBatchNormalization + else: + self._norm = layers.BatchNormalization + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + if tf.keras.backend.image_data_format() == 'channels_last': + bn_axis = -1 + else: + bn_axis = 1 + + # Build ResNet3D backbone. + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + + # Build stem. + if stem_type == 'v0': + x = layers.Conv3D( + filters=64, + kernel_size=[stem_conv_temporal_kernel_size, 7, 7], + strides=[stem_conv_temporal_stride, 2, 2], + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + elif stem_type == 'v1': + x = layers.Conv3D( + filters=32, + kernel_size=[stem_conv_temporal_kernel_size, 3, 3], + strides=[stem_conv_temporal_stride, 2, 2], + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + x = layers.Conv3D( + filters=32, + kernel_size=[1, 3, 3], + strides=[1, 1, 1], + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + x = layers.Conv3D( + filters=64, + kernel_size=[1, 3, 3], + strides=[1, 1, 1], + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + else: + raise ValueError(f'Stem type {stem_type} not supported.') + + temporal_kernel_size = 1 if stem_pool_temporal_stride == 1 else 3 + x = layers.MaxPool3D( + pool_size=[temporal_kernel_size, 3, 3], + strides=[stem_pool_temporal_stride, 2, 2], + padding='same')( + x) + + # Build intermediate blocks and endpoints. + resnet_specs = RESNET_SPECS[model_id] + if len(temporal_strides) != len(resnet_specs) or len( + temporal_kernel_sizes) != len(resnet_specs): + raise ValueError( + 'Number of blocks in temporal specs should equal to resnet_specs.') + + endpoints = {} + for i, resnet_spec in enumerate(resnet_specs): + if resnet_spec[0] == 'bottleneck3d': + block_fn = nn_blocks_3d.BottleneckBlock3D + else: + raise ValueError('Block fn `{}` is not supported.'.format( + resnet_spec[0])) + + x = self._block_group( + inputs=x, + filters=resnet_spec[1], + temporal_kernel_sizes=temporal_kernel_sizes[i], + temporal_strides=temporal_strides[i], + spatial_strides=(1 if i == 0 else 2), + block_fn=block_fn, + block_repeats=resnet_spec[2], + stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( + self._init_stochastic_depth_rate, i + 2, 5), + use_self_gating=use_self_gating[i] if use_self_gating else False, + name='block_group_l{}'.format(i + 2)) + endpoints[str(i + 2)] = x + + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + + super(ResNet3D, self).__init__(inputs=inputs, outputs=endpoints, **kwargs) + + def _block_group(self, + inputs: tf.Tensor, + filters: int, + temporal_kernel_sizes: Tuple[int], + temporal_strides: int, + spatial_strides: int, + block_fn: Callable[ + ..., + tf.keras.layers.Layer] = nn_blocks_3d.BottleneckBlock3D, + block_repeats: int = 1, + stochastic_depth_drop_rate: float = 0.0, + use_self_gating: bool = False, + name: str = 'block_group'): + """Creates one group of blocks for the ResNet3D model. + + Args: + inputs: A `tf.Tensor` of size `[batch, channels, height, width]`. + filters: An `int` of number of filters for the first convolution of the + layer. + temporal_kernel_sizes: A tuple that specifies the temporal kernel sizes + for each block in the current group. + temporal_strides: An `int` of temporal strides for the first convolution + in this group. + spatial_strides: An `int` stride to use for the first convolution of the + layer. If greater than 1, this layer will downsample the input. + block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`. + block_repeats: An `int` of number of blocks contained in the layer. + stochastic_depth_drop_rate: A `float` of drop rate of the current block + group. + use_self_gating: A `bool` that specifies whether to apply self-gating + module or not. + name: A `str` name for the block. + + Returns: + The output `tf.Tensor` of the block layer. + """ + if len(temporal_kernel_sizes) != block_repeats: + raise ValueError( + 'Number of elements in `temporal_kernel_sizes` must equal to `block_repeats`.' + ) + + # Only apply self-gating module in the last block. + use_self_gating_list = [False] * (block_repeats - 1) + [use_self_gating] + + x = block_fn( + filters=filters, + temporal_kernel_size=temporal_kernel_sizes[0], + temporal_strides=temporal_strides, + spatial_strides=spatial_strides, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + use_self_gating=use_self_gating_list[0], + se_ratio=self._se_ratio, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + inputs) + + for i in range(1, block_repeats): + x = block_fn( + filters=filters, + temporal_kernel_size=temporal_kernel_sizes[i], + temporal_strides=1, + spatial_strides=1, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + use_self_gating=use_self_gating_list[i], + se_ratio=self._se_ratio, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + x) + + return tf.identity(x, name=name) + + def get_config(self): + config_dict = { + 'model_id': self._model_id, + 'temporal_strides': self._temporal_strides, + 'temporal_kernel_sizes': self._temporal_kernel_sizes, + 'stem_type': self._stem_type, + 'stem_conv_temporal_kernel_size': self._stem_conv_temporal_kernel_size, + 'stem_conv_temporal_stride': self._stem_conv_temporal_stride, + 'stem_pool_temporal_stride': self._stem_pool_temporal_stride, + 'use_self_gating': self._use_self_gating, + 'se_ratio': self._se_ratio, + 'init_stochastic_depth_rate': self._init_stochastic_depth_rate, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + } + return config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self): + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('resnet_3d') +def build_resnet3d( + input_specs: tf.keras.layers.InputSpec, + model_config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds ResNet 3d backbone from a config.""" + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + + # Flatten configs before passing to the backbone. + temporal_strides = [] + temporal_kernel_sizes = [] + use_self_gating = [] + for block_spec in backbone_cfg.block_specs: + temporal_strides.append(block_spec.temporal_strides) + temporal_kernel_sizes.append(block_spec.temporal_kernel_sizes) + use_self_gating.append(block_spec.use_self_gating) + + return ResNet3D( + model_id=backbone_cfg.model_id, + temporal_strides=temporal_strides, + temporal_kernel_sizes=temporal_kernel_sizes, + use_self_gating=use_self_gating, + input_specs=input_specs, + stem_type=backbone_cfg.stem_type, + stem_conv_temporal_kernel_size=backbone_cfg + .stem_conv_temporal_kernel_size, + stem_conv_temporal_stride=backbone_cfg.stem_conv_temporal_stride, + stem_pool_temporal_stride=backbone_cfg.stem_pool_temporal_stride, + init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate, + se_ratio=backbone_cfg.se_ratio, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + +@factory.register_backbone_builder('resnet_3d_rs') +def build_resnet3d_rs( + input_specs: tf.keras.layers.InputSpec, + model_config: hyperparams.Config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds ResNet-3D-RS backbone from a config.""" + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + + # Flatten configs before passing to the backbone. + temporal_strides = [] + temporal_kernel_sizes = [] + use_self_gating = [] + for i, block_spec in enumerate(backbone_cfg.block_specs): + temporal_strides.append(block_spec.temporal_strides) + use_self_gating.append(block_spec.use_self_gating) + block_repeats_i = RESNET_SPECS[backbone_cfg.model_id][i][-1] + temporal_kernel_sizes.append(list(block_spec.temporal_kernel_sizes) * + block_repeats_i) + return ResNet3D( + model_id=backbone_cfg.model_id, + temporal_strides=temporal_strides, + temporal_kernel_sizes=temporal_kernel_sizes, + use_self_gating=use_self_gating, + input_specs=input_specs, + stem_type=backbone_cfg.stem_type, + stem_conv_temporal_kernel_size=backbone_cfg + .stem_conv_temporal_kernel_size, + stem_conv_temporal_stride=backbone_cfg.stem_conv_temporal_stride, + stem_pool_temporal_stride=backbone_cfg.stem_pool_temporal_stride, + init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate, + se_ratio=backbone_cfg.se_ratio, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) diff --git a/official/vision/beta/modeling/backbones/resnet_3d_test.py b/official/vision/beta/modeling/backbones/resnet_3d_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ea40c8f4fddb45bb586ce2b733f32fb73ee63e55 --- /dev/null +++ b/official/vision/beta/modeling/backbones/resnet_3d_test.py @@ -0,0 +1,103 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for resnet.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import resnet_3d + + +class ResNet3DTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 50, 4, 'v0', False, 0.0), + (128, 50, 4, 'v1', False, 0.2), + (256, 50, 4, 'v1', True, 0.2), + ) + def test_network_creation(self, input_size, model_id, endpoint_filter_scale, + stem_type, se_ratio, init_stochastic_depth_rate): + """Test creation of ResNet3D family models.""" + tf.keras.backend.set_image_data_format('channels_last') + temporal_strides = [1, 1, 1, 1] + temporal_kernel_sizes = [(3, 3, 3), (3, 1, 3, 1), (3, 1, 3, 1, 3, 1), + (1, 3, 1)] + use_self_gating = [True, False, True, False] + + network = resnet_3d.ResNet3D( + model_id=model_id, + temporal_strides=temporal_strides, + temporal_kernel_sizes=temporal_kernel_sizes, + use_self_gating=use_self_gating, + stem_type=stem_type, + se_ratio=se_ratio, + init_stochastic_depth_rate=init_stochastic_depth_rate) + inputs = tf.keras.Input(shape=(8, input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + + self.assertAllEqual([ + 1, 2, input_size / 2**2, input_size / 2**2, 64 * endpoint_filter_scale + ], endpoints['2'].shape.as_list()) + self.assertAllEqual([ + 1, 2, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale + ], endpoints['3'].shape.as_list()) + self.assertAllEqual([ + 1, 2, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale + ], endpoints['4'].shape.as_list()) + self.assertAllEqual([ + 1, 2, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale + ], endpoints['5'].shape.as_list()) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + model_id=50, + temporal_strides=[1, 1, 1, 1], + temporal_kernel_sizes=[(3, 3, 3), (3, 1, 3, 1), (3, 1, 3, 1, 3, 1), + (1, 3, 1)], + stem_type='v0', + stem_conv_temporal_kernel_size=5, + stem_conv_temporal_stride=2, + stem_pool_temporal_stride=2, + se_ratio=0.0, + use_self_gating=None, + init_stochastic_depth_rate=0.0, + use_sync_bn=False, + activation='relu', + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + ) + network = resnet_3d.ResNet3D(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = resnet_3d.ResNet3D.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/resnet_deeplab.py b/official/vision/beta/modeling/backbones/resnet_deeplab.py new file mode 100644 index 0000000000000000000000000000000000000000..9f7b6a6d2e12e83bf2e805dbf66aceba67a75b51 --- /dev/null +++ b/official/vision/beta/modeling/backbones/resnet_deeplab.py @@ -0,0 +1,365 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of Residual Networks with Deeplab modifications.""" + +from typing import Callable, Optional, Tuple, List + +import numpy as np +import tensorflow as tf +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks +from official.vision.beta.modeling.layers import nn_layers + +layers = tf.keras.layers + +# Specifications for different ResNet variants. +# Each entry specifies block configurations of the particular ResNet variant. +# Each element in the block configuration is in the following format: +# (block_fn, num_filters, block_repeats) +RESNET_SPECS = { + 50: [ + ('bottleneck', 64, 3), + ('bottleneck', 128, 4), + ('bottleneck', 256, 6), + ('bottleneck', 512, 3), + ], + 101: [ + ('bottleneck', 64, 3), + ('bottleneck', 128, 4), + ('bottleneck', 256, 23), + ('bottleneck', 512, 3), + ], +} + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class DilatedResNet(tf.keras.Model): + """Creates a ResNet model with Deeplabv3 modifications. + + This backbone is suitable for semantic segmentation. This implements + Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam. + Rethinking Atrous Convolution for Semantic Image Segmentation. + (https://arxiv.org/pdf/1706.05587) + """ + + def __init__( + self, + model_id: int, + output_stride: int, + input_specs: tf.keras.layers.InputSpec = layers.InputSpec( + shape=[None, None, None, 3]), + stem_type: str = 'v0', + se_ratio: Optional[float] = None, + init_stochastic_depth_rate: float = 0.0, + multigrid: Optional[Tuple[int]] = None, + last_stage_repeats: int = 1, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a ResNet model with DeepLab modification. + + Args: + model_id: An `int` specifies depth of ResNet backbone model. + output_stride: An `int` of output stride, ratio of input to output + resolution. + input_specs: A `tf.keras.layers.InputSpec` of the input tensor. + stem_type: A `str` of stem type. Can be `v0` or `v1`. `v1` replaces 7x7 + conv by 3 3x3 convs. + se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. + init_stochastic_depth_rate: A `float` of initial stochastic depth rate. + multigrid: A tuple of the same length as the number of blocks in the last + resnet stage. + last_stage_repeats: An `int` that specifies how many times last stage is + repeated. + activation: A `str` name of the activation function. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_initializer: A str for kernel initializer of convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + **kwargs: Additional keyword arguments to be passed. + """ + self._model_id = model_id + self._output_stride = output_stride + self._input_specs = input_specs + self._use_sync_bn = use_sync_bn + self._activation = activation + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + if use_sync_bn: + self._norm = layers.experimental.SyncBatchNormalization + else: + self._norm = layers.BatchNormalization + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._stem_type = stem_type + self._se_ratio = se_ratio + self._init_stochastic_depth_rate = init_stochastic_depth_rate + + if tf.keras.backend.image_data_format() == 'channels_last': + bn_axis = -1 + else: + bn_axis = 1 + + # Build ResNet. + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + + if stem_type == 'v0': + x = layers.Conv2D( + filters=64, + kernel_size=7, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + elif stem_type == 'v1': + x = layers.Conv2D( + filters=64, + kernel_size=3, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + x = layers.Conv2D( + filters=64, + kernel_size=3, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + x = layers.Conv2D( + filters=128, + kernel_size=3, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + x) + x = tf_utils.get_activation(activation)(x) + else: + raise ValueError('Stem type {} not supported.'.format(stem_type)) + + x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) + + normal_resnet_stage = int(np.math.log2(self._output_stride)) - 2 + + endpoints = {} + for i in range(normal_resnet_stage + 1): + spec = RESNET_SPECS[model_id][i] + if spec[0] == 'bottleneck': + block_fn = nn_blocks.BottleneckBlock + else: + raise ValueError('Block fn `{}` is not supported.'.format(spec[0])) + x = self._block_group( + inputs=x, + filters=spec[1], + strides=(1 if i == 0 else 2), + dilation_rate=1, + block_fn=block_fn, + block_repeats=spec[2], + stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( + self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats), + name='block_group_l{}'.format(i + 2)) + endpoints[str(i + 2)] = x + + dilation_rate = 2 + for i in range(normal_resnet_stage + 1, 3 + last_stage_repeats): + spec = RESNET_SPECS[model_id][i] if i < 3 else RESNET_SPECS[model_id][-1] + if spec[0] == 'bottleneck': + block_fn = nn_blocks.BottleneckBlock + else: + raise ValueError('Block fn `{}` is not supported.'.format(spec[0])) + x = self._block_group( + inputs=x, + filters=spec[1], + strides=1, + dilation_rate=dilation_rate, + block_fn=block_fn, + block_repeats=spec[2], + stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( + self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats), + multigrid=multigrid if i >= 3 else None, + name='block_group_l{}'.format(i + 2)) + dilation_rate *= 2 + + endpoints[str(normal_resnet_stage + 2)] = x + + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + + super(DilatedResNet, self).__init__( + inputs=inputs, outputs=endpoints, **kwargs) + + def _block_group(self, + inputs: tf.Tensor, + filters: int, + strides: int, + dilation_rate: int, + block_fn: Callable[..., tf.keras.layers.Layer], + block_repeats: int = 1, + stochastic_depth_drop_rate: float = 0.0, + multigrid: Optional[List[int]] = None, + name: str = 'block_group'): + """Creates one group of blocks for the ResNet model. + + Deeplab applies strides at the last block. + + Args: + inputs: A `tf.Tensor` of size `[batch, channels, height, width]`. + filters: An `int` off number of filters for the first convolution of the + layer. + strides: An `int` of stride to use for the first convolution of the layer. + If greater than 1, this layer will downsample the input. + dilation_rate: An `int` of diluted convolution rates. + block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`. + block_repeats: An `int` of number of blocks contained in the layer. + stochastic_depth_drop_rate: A `float` of drop rate of the current block + group. + multigrid: A list of `int` or None. If specified, dilation rates for each + block is scaled up by its corresponding factor in the multigrid. + name: A `str` name for the block. + + Returns: + The output `tf.Tensor` of the block layer. + """ + if multigrid is not None and len(multigrid) != block_repeats: + raise ValueError('multigrid has to match number of block_repeats') + + if multigrid is None: + multigrid = [1] * block_repeats + + # TODO(arashwan): move striding at the of the block. + x = block_fn( + filters=filters, + strides=strides, + dilation_rate=dilation_rate * multigrid[0], + use_projection=True, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + se_ratio=self._se_ratio, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + inputs) + for i in range(1, block_repeats): + x = block_fn( + filters=filters, + strides=1, + dilation_rate=dilation_rate * multigrid[i], + use_projection=False, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + se_ratio=self._se_ratio, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + x) + + return tf.identity(x, name=name) + + def get_config(self): + config_dict = { + 'model_id': self._model_id, + 'output_stride': self._output_stride, + 'stem_type': self._stem_type, + 'se_ratio': self._se_ratio, + 'init_stochastic_depth_rate': self._init_stochastic_depth_rate, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + } + return config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self): + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('dilated_resnet') +def build_dilated_resnet( + input_specs: tf.keras.layers.InputSpec, + model_config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds ResNet backbone from a config.""" + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'dilated_resnet', (f'Inconsistent backbone type ' + f'{backbone_type}') + + return DilatedResNet( + model_id=backbone_cfg.model_id, + output_stride=backbone_cfg.output_stride, + input_specs=input_specs, + stem_type=backbone_cfg.stem_type, + se_ratio=backbone_cfg.se_ratio, + init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate, + multigrid=backbone_cfg.multigrid, + last_stage_repeats=backbone_cfg.last_stage_repeats, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) diff --git a/official/vision/beta/modeling/backbones/resnet_deeplab_test.py b/official/vision/beta/modeling/backbones/resnet_deeplab_test.py new file mode 100644 index 0000000000000000000000000000000000000000..53169a1fe06935b9be8aea203a68e5e17aa15451 --- /dev/null +++ b/official/vision/beta/modeling/backbones/resnet_deeplab_test.py @@ -0,0 +1,144 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for resnet_deeplab models.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.modeling.backbones import resnet_deeplab + + +class ResNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 50, 4, 8), + (128, 101, 4, 8), + (128, 50, 4, 16), + (128, 101, 4, 16), + ) + def test_network_creation(self, input_size, model_id, + endpoint_filter_scale, output_stride): + """Test creation of ResNet models.""" + tf.keras.backend.set_image_data_format('channels_last') + + network = resnet_deeplab.DilatedResNet(model_id=model_id, + output_stride=output_stride) + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + print(endpoints) + self.assertAllEqual([ + 1, input_size / output_stride, input_size / output_stride, + 512 * endpoint_filter_scale + ], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list()) + + @parameterized.parameters( + ('v0', None, 0.0), + ('v1', None, 0.0), + ('v1', 0.25, 0.0), + ('v1', 0.25, 0.2), + ) + def test_network_features(self, stem_type, se_ratio, + init_stochastic_depth_rate): + """Test additional features of ResNet models.""" + input_size = 128 + model_id = 50 + endpoint_filter_scale = 4 + output_stride = 8 + + tf.keras.backend.set_image_data_format('channels_last') + + network = resnet_deeplab.DilatedResNet( + model_id=model_id, + output_stride=output_stride, + stem_type=stem_type, + se_ratio=se_ratio, + init_stochastic_depth_rate=init_stochastic_depth_rate) + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + print(endpoints) + self.assertAllEqual([ + 1, input_size / output_stride, input_size / output_stride, + 512 * endpoint_filter_scale + ], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list()) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + use_sync_bn=[False, True], + )) + def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): + """Test for sync bn on TPU and GPU devices.""" + inputs = np.random.rand(64, 128, 128, 3) + + tf.keras.backend.set_image_data_format('channels_last') + + with strategy.scope(): + network = resnet_deeplab.DilatedResNet( + model_id=50, output_stride=8, use_sync_bn=use_sync_bn) + _ = network(inputs) + + @parameterized.parameters(1, 3, 4) + def test_input_specs(self, input_dim): + """Test different input feature dimensions.""" + tf.keras.backend.set_image_data_format('channels_last') + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) + network = resnet_deeplab.DilatedResNet( + model_id=50, output_stride=8, input_specs=input_specs) + + inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1) + _ = network(inputs) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + model_id=50, + output_stride=8, + stem_type='v0', + se_ratio=0.25, + init_stochastic_depth_rate=0.2, + use_sync_bn=False, + activation='relu', + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + ) + network = resnet_deeplab.DilatedResNet(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = resnet_deeplab.DilatedResNet.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/resnet_test.py b/official/vision/beta/modeling/backbones/resnet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c99fe81ff9a92f560c02a5d585e4194f75c83911 --- /dev/null +++ b/official/vision/beta/modeling/backbones/resnet_test.py @@ -0,0 +1,153 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for resnet.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.modeling.backbones import resnet + + +class ResNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 18, 1), + (128, 34, 1), + (128, 50, 4), + (128, 101, 4), + (128, 152, 4), + ) + def test_network_creation(self, input_size, model_id, + endpoint_filter_scale): + """Test creation of ResNet family models.""" + resnet_params = { + 18: 11190464, + 34: 21306048, + 50: 23561152, + 101: 42605504, + 152: 58295232, + } + tf.keras.backend.set_image_data_format('channels_last') + + network = resnet.ResNet(model_id=model_id) + self.assertEqual(network.count_params(), resnet_params[model_id]) + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + + self.assertAllEqual( + [1, input_size / 2**2, input_size / 2**2, 64 * endpoint_filter_scale], + endpoints['2'].shape.as_list()) + self.assertAllEqual( + [1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale], + endpoints['3'].shape.as_list()) + self.assertAllEqual( + [1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale], + endpoints['4'].shape.as_list()) + self.assertAllEqual( + [1, input_size / 2**5, input_size / 2**5, 512 * endpoint_filter_scale], + endpoints['5'].shape.as_list()) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + use_sync_bn=[False, True], + )) + def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): + """Test for sync bn on TPU and GPU devices.""" + inputs = np.random.rand(64, 128, 128, 3) + + tf.keras.backend.set_image_data_format('channels_last') + + with strategy.scope(): + network = resnet.ResNet(model_id=50, use_sync_bn=use_sync_bn) + _ = network(inputs) + + @parameterized.parameters( + (128, 34, 1, 'v0', None, 0.0, 1.0, False, False), + (128, 34, 1, 'v1', 0.25, 0.2, 1.25, True, True), + (128, 50, 4, 'v0', None, 0.0, 1.5, False, False), + (128, 50, 4, 'v1', 0.25, 0.2, 2.0, True, True), + ) + def test_resnet_rs(self, input_size, model_id, endpoint_filter_scale, + stem_type, se_ratio, init_stochastic_depth_rate, + depth_multiplier, resnetd_shortcut, replace_stem_max_pool): + """Test creation of ResNet family models.""" + tf.keras.backend.set_image_data_format('channels_last') + network = resnet.ResNet( + model_id=model_id, + depth_multiplier=depth_multiplier, + stem_type=stem_type, + resnetd_shortcut=resnetd_shortcut, + replace_stem_max_pool=replace_stem_max_pool, + se_ratio=se_ratio, + init_stochastic_depth_rate=init_stochastic_depth_rate) + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + _ = network(inputs) + + @parameterized.parameters(1, 3, 4) + def test_input_specs(self, input_dim): + """Test different input feature dimensions.""" + tf.keras.backend.set_image_data_format('channels_last') + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) + network = resnet.ResNet(model_id=50, input_specs=input_specs) + + inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1) + _ = network(inputs) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + model_id=50, + depth_multiplier=1.0, + stem_type='v0', + se_ratio=None, + resnetd_shortcut=False, + replace_stem_max_pool=False, + init_stochastic_depth_rate=0.0, + use_sync_bn=False, + activation='relu', + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + ) + network = resnet.ResNet(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = resnet.ResNet.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/revnet.py b/official/vision/beta/modeling/backbones/revnet.py new file mode 100644 index 0000000000000000000000000000000000000000..a74a94025bd3a469dac928e68ef592534f339c69 --- /dev/null +++ b/official/vision/beta/modeling/backbones/revnet.py @@ -0,0 +1,232 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Contains definitions of RevNet.""" + +from typing import Any, Callable, Dict, Optional +# Import libraries +import tensorflow as tf +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks + + +# Specifications for different RevNet variants. +# Each entry specifies block configurations of the particular RevNet variant. +# Each element in the block configuration is in the following format: +# (block_fn, num_filters, block_repeats) +REVNET_SPECS = { + 38: [ + ('residual', 32, 3), + ('residual', 64, 3), + ('residual', 112, 3), + ], + 56: [ + ('bottleneck', 128, 2), + ('bottleneck', 256, 2), + ('bottleneck', 512, 3), + ('bottleneck', 832, 2), + ], + 104: [ + ('bottleneck', 128, 2), + ('bottleneck', 256, 2), + ('bottleneck', 512, 11), + ('bottleneck', 832, 2), + ], +} + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class RevNet(tf.keras.Model): + """Creates a Reversible ResNet (RevNet) family model. + + This implements: + Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse. + The Reversible Residual Network: Backpropagation Without Storing + Activations. + (https://arxiv.org/pdf/1707.04585.pdf) + """ + + def __init__( + self, + model_id: int, + input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec( + shape=[None, None, None, 3]), + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a RevNet model. + + Args: + model_id: An `int` of depth/id of ResNet backbone model. + input_specs: A `tf.keras.layers.InputSpec` of the input tensor. + activation: A `str` name of the activation function. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_initializer: A str for kernel initializer of convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + **kwargs: Additional keyword arguments to be passed. + """ + self._model_id = model_id + self._input_specs = input_specs + self._use_sync_bn = use_sync_bn + self._activation = activation + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + + axis = -1 if tf.keras.backend.image_data_format() == 'channels_last' else 1 + + # Build RevNet. + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + + x = tf.keras.layers.Conv2D( + filters=REVNET_SPECS[model_id][0][1], + kernel_size=7, strides=2, use_bias=False, padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer)(inputs) + x = self._norm( + axis=axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) + x = tf_utils.get_activation(activation)(x) + x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) + + endpoints = {} + for i, spec in enumerate(REVNET_SPECS[model_id]): + if spec[0] == 'residual': + inner_block_fn = nn_blocks.ResidualInner + elif spec[0] == 'bottleneck': + inner_block_fn = nn_blocks.BottleneckResidualInner + else: + raise ValueError('Block fn `{}` is not supported.'.format(spec[0])) + + if spec[1] % 2 != 0: + raise ValueError('Number of output filters must be even to ensure ' + 'splitting in channel dimension for reversible blocks') + + x = self._block_group( + inputs=x, + filters=spec[1], + strides=(1 if i == 0 else 2), + inner_block_fn=inner_block_fn, + block_repeats=spec[2], + batch_norm_first=(i != 0), # Only skip on first block + name='revblock_group_{}'.format(i + 2)) + endpoints[str(i + 2)] = x + + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + + super(RevNet, self).__init__(inputs=inputs, outputs=endpoints, **kwargs) + + def _block_group(self, + inputs: tf.Tensor, + filters: int, + strides: int, + inner_block_fn: Callable[..., tf.keras.layers.Layer], + block_repeats: int, + batch_norm_first: bool, + name: str = 'revblock_group') -> tf.Tensor: + """Creates one reversible block for RevNet model. + + Args: + inputs: A `tf.Tensor` of size `[batch, channels, height, width]`. + filters: An `int` number of filters for the first convolution of the + layer. + strides: An `int` stride to use for the first convolution of the layer. If + greater than 1, this block group will downsample the input. + inner_block_fn: Either `nn_blocks.ResidualInner` or + `nn_blocks.BottleneckResidualInner`. + block_repeats: An `int` number of blocks contained in this block group. + batch_norm_first: A `bool` that specifies whether to apply + BatchNormalization and activation layer before feeding into convolution + layers. + name: A `str` name for the block. + + Returns: + The output `tf.Tensor` of the block layer. + """ + x = inputs + for i in range(block_repeats): + is_first_block = i == 0 + # Only first residual layer in block gets downsampled + curr_strides = strides if is_first_block else 1 + f = inner_block_fn( + filters=filters // 2, + strides=curr_strides, + batch_norm_first=batch_norm_first and is_first_block, + kernel_regularizer=self._kernel_regularizer) + g = inner_block_fn( + filters=filters // 2, + strides=1, + batch_norm_first=batch_norm_first and is_first_block, + kernel_regularizer=self._kernel_regularizer) + x = nn_blocks.ReversibleLayer(f, g)(x) + + return tf.identity(x, name=name) + + def get_config(self) -> Dict[str, Any]: + config_dict = { + 'model_id': self._model_id, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + } + return config_dict + + @classmethod + def from_config(cls, + config: Dict[str, Any], + custom_objects: Optional[Any] = None) -> tf.keras.Model: + return cls(**config) + + @property + def output_specs(self) -> Dict[int, tf.TensorShape]: + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('revnet') +def build_revnet( + input_specs: tf.keras.layers.InputSpec, + model_config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds RevNet backbone from a config.""" + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'revnet', (f'Inconsistent backbone type ' + f'{backbone_type}') + + return RevNet( + model_id=backbone_cfg.model_id, + input_specs=input_specs, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) diff --git a/official/vision/beta/modeling/backbones/revnet_test.py b/official/vision/beta/modeling/backbones/revnet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..dd797f0ffc61c95c97c901c7e52a6848e4225f38 --- /dev/null +++ b/official/vision/beta/modeling/backbones/revnet_test.py @@ -0,0 +1,92 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for RevNet.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import revnet + + +class RevNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 56, 4), + (128, 104, 4), + ) + def test_network_creation(self, input_size, model_id, + endpoint_filter_scale): + """Test creation of RevNet family models.""" + tf.keras.backend.set_image_data_format('channels_last') + + network = revnet.RevNet(model_id=model_id) + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + network.summary() + + self.assertAllEqual( + [1, input_size / 2**2, input_size / 2**2, 128 * endpoint_filter_scale], + endpoints['2'].shape.as_list()) + self.assertAllEqual( + [1, input_size / 2**3, input_size / 2**3, 256 * endpoint_filter_scale], + endpoints['3'].shape.as_list()) + self.assertAllEqual( + [1, input_size / 2**4, input_size / 2**4, 512 * endpoint_filter_scale], + endpoints['4'].shape.as_list()) + self.assertAllEqual( + [1, input_size / 2**5, input_size / 2**5, 832 * endpoint_filter_scale], + endpoints['5'].shape.as_list()) + + @parameterized.parameters(1, 3, 4) + def test_input_specs(self, input_dim): + """Test different input feature dimensions.""" + tf.keras.backend.set_image_data_format('channels_last') + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) + network = revnet.RevNet(model_id=56, input_specs=input_specs) + + inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1) + _ = network(inputs) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + model_id=56, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + ) + network = revnet.RevNet(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = revnet.RevNet.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/spinenet.py b/official/vision/beta/modeling/backbones/spinenet.py new file mode 100644 index 0000000000000000000000000000000000000000..d15d42acdc703e8f6591368a9c442526516aedbd --- /dev/null +++ b/official/vision/beta/modeling/backbones/spinenet.py @@ -0,0 +1,558 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of SpineNet Networks.""" + +import math +from typing import Any, List, Optional, Tuple + +# Import libraries + +from absl import logging +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks +from official.vision.beta.modeling.layers import nn_layers +from official.vision.beta.ops import spatial_transform_ops + +layers = tf.keras.layers + +FILTER_SIZE_MAP = { + 1: 32, + 2: 64, + 3: 128, + 4: 256, + 5: 256, + 6: 256, + 7: 256, +} + +# The fixed SpineNet architecture discovered by NAS. +# Each element represents a specification of a building block: +# (block_level, block_fn, (input_offset0, input_offset1), is_output). +SPINENET_BLOCK_SPECS = [ + (2, 'bottleneck', (0, 1), False), + (4, 'residual', (0, 1), False), + (3, 'bottleneck', (2, 3), False), + (4, 'bottleneck', (2, 4), False), + (6, 'residual', (3, 5), False), + (4, 'bottleneck', (3, 5), False), + (5, 'residual', (6, 7), False), + (7, 'residual', (6, 8), False), + (5, 'bottleneck', (8, 9), False), + (5, 'bottleneck', (8, 10), False), + (4, 'bottleneck', (5, 10), True), + (3, 'bottleneck', (4, 10), True), + (5, 'bottleneck', (7, 12), True), + (7, 'bottleneck', (5, 14), True), + (6, 'bottleneck', (12, 14), True), +] + +SCALING_MAP = { + '49S': { + 'endpoints_num_filters': 128, + 'filter_size_scale': 0.65, + 'resample_alpha': 0.5, + 'block_repeats': 1, + }, + '49': { + 'endpoints_num_filters': 256, + 'filter_size_scale': 1.0, + 'resample_alpha': 0.5, + 'block_repeats': 1, + }, + '96': { + 'endpoints_num_filters': 256, + 'filter_size_scale': 1.0, + 'resample_alpha': 0.5, + 'block_repeats': 2, + }, + '143': { + 'endpoints_num_filters': 256, + 'filter_size_scale': 1.0, + 'resample_alpha': 1.0, + 'block_repeats': 3, + }, + '190': { + 'endpoints_num_filters': 512, + 'filter_size_scale': 1.3, + 'resample_alpha': 1.0, + 'block_repeats': 4, + }, +} + + +class BlockSpec(object): + """A container class that specifies the block configuration for SpineNet.""" + + def __init__(self, level: int, block_fn: str, input_offsets: Tuple[int, int], + is_output: bool): + self.level = level + self.block_fn = block_fn + self.input_offsets = input_offsets + self.is_output = is_output + + +def build_block_specs( + block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]: + """Builds the list of BlockSpec objects for SpineNet.""" + if not block_specs: + block_specs = SPINENET_BLOCK_SPECS + logging.info('Building SpineNet block specs: %s', block_specs) + return [BlockSpec(*b) for b in block_specs] + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SpineNet(tf.keras.Model): + """Creates a SpineNet family model. + + This implements: + Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan, + Yin Cui, Quoc V. Le, Xiaodan Song. + SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization. + (https://arxiv.org/abs/1912.05027) + """ + + def __init__( + self, + input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec( + shape=[None, 640, 640, 3]), + min_level: int = 3, + max_level: int = 7, + block_specs: List[BlockSpec] = build_block_specs(), + endpoints_num_filters: int = 256, + resample_alpha: float = 0.5, + block_repeats: int = 1, + filter_size_scale: float = 1.0, + init_stochastic_depth_rate: float = 0.0, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + **kwargs): + """Initializes a SpineNet model. + + Args: + input_specs: A `tf.keras.layers.InputSpec` of the input tensor. + min_level: An `int` of min level for output mutiscale features. + max_level: An `int` of max level for output mutiscale features. + block_specs: A list of block specifications for the SpineNet model + discovered by NAS. + endpoints_num_filters: An `int` of feature dimension for the output + endpoints. + resample_alpha: A `float` of resampling factor in cross-scale connections. + block_repeats: An `int` of number of blocks contained in the layer. + filter_size_scale: A `float` of multiplier for the filters (number of + channels) for all convolution ops. The value must be greater than zero. + Typical usage will be to set this value in (0, 1) to reduce the number + of parameters or computation cost of the model. + init_stochastic_depth_rate: A `float` of initial stochastic depth rate. + kernel_initializer: A str for kernel initializer of convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + activation: A `str` name of the activation function. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A small `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + self._input_specs = input_specs + self._min_level = min_level + self._max_level = max_level + self._block_specs = block_specs + self._endpoints_num_filters = endpoints_num_filters + self._resample_alpha = resample_alpha + self._block_repeats = block_repeats + self._filter_size_scale = filter_size_scale + self._init_stochastic_depth_rate = init_stochastic_depth_rate + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._activation = activation + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + if activation == 'relu': + self._activation_fn = tf.nn.relu + elif activation == 'swish': + self._activation_fn = tf.nn.swish + else: + raise ValueError('Activation {} not implemented.'.format(activation)) + self._init_block_fn = 'bottleneck' + self._num_init_blocks = 2 + + if use_sync_bn: + self._norm = layers.experimental.SyncBatchNormalization + else: + self._norm = layers.BatchNormalization + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + + # Build SpineNet. + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + + net = self._build_stem(inputs=inputs) + net = self._build_scale_permuted_network( + net=net, input_width=input_specs.shape[2]) + endpoints = self._build_endpoints(net=net) + + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + super(SpineNet, self).__init__(inputs=inputs, outputs=endpoints) + + def _block_group(self, + inputs: tf.Tensor, + filters: int, + strides: int, + block_fn_cand: str, + block_repeats: int = 1, + stochastic_depth_drop_rate: Optional[float] = None, + name: str = 'block_group'): + """Creates one group of blocks for the SpineNet model.""" + block_fn_candidates = { + 'bottleneck': nn_blocks.BottleneckBlock, + 'residual': nn_blocks.ResidualBlock, + } + block_fn = block_fn_candidates[block_fn_cand] + _, _, _, num_filters = inputs.get_shape().as_list() + + if block_fn_cand == 'bottleneck': + use_projection = not (num_filters == (filters * 4) and strides == 1) + else: + use_projection = not (num_filters == filters and strides == 1) + + x = block_fn( + filters=filters, + strides=strides, + use_projection=use_projection, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + inputs) + for _ in range(1, block_repeats): + x = block_fn( + filters=filters, + strides=1, + use_projection=False, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + x) + return tf.identity(x, name=name) + + def _build_stem(self, inputs): + """Builds SpineNet stem.""" + x = layers.Conv2D( + filters=64, + kernel_size=7, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + x = tf_utils.get_activation(self._activation_fn)(x) + x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x) + + net = [] + # Build the initial level 2 blocks. + for i in range(self._num_init_blocks): + x = self._block_group( + inputs=x, + filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale), + strides=1, + block_fn_cand=self._init_block_fn, + block_repeats=self._block_repeats, + name='stem_block_{}'.format(i + 1)) + net.append(x) + return net + + def _build_scale_permuted_network(self, + net, + input_width, + weighted_fusion=False): + """Builds scale-permuted network.""" + net_sizes = [int(math.ceil(input_width / 2**2))] * len(net) + net_block_fns = [self._init_block_fn] * len(net) + num_outgoing_connections = [0] * len(net) + + endpoints = {} + for i, block_spec in enumerate(self._block_specs): + # Find out specs for the target block. + target_width = int(math.ceil(input_width / 2**block_spec.level)) + target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] * + self._filter_size_scale) + target_block_fn = block_spec.block_fn + + # Resample then merge input0 and input1. + parents = [] + input0 = block_spec.input_offsets[0] + input1 = block_spec.input_offsets[1] + + x0 = self._resample_with_alpha( + inputs=net[input0], + input_width=net_sizes[input0], + input_block_fn=net_block_fns[input0], + target_width=target_width, + target_num_filters=target_num_filters, + target_block_fn=target_block_fn, + alpha=self._resample_alpha) + parents.append(x0) + num_outgoing_connections[input0] += 1 + + x1 = self._resample_with_alpha( + inputs=net[input1], + input_width=net_sizes[input1], + input_block_fn=net_block_fns[input1], + target_width=target_width, + target_num_filters=target_num_filters, + target_block_fn=target_block_fn, + alpha=self._resample_alpha) + parents.append(x1) + num_outgoing_connections[input1] += 1 + + # Merge 0 outdegree blocks to the output block. + if block_spec.is_output: + for j, (j_feat, + j_connections) in enumerate(zip(net, num_outgoing_connections)): + if j_connections == 0 and (j_feat.shape[2] == target_width and + j_feat.shape[3] == x0.shape[3]): + parents.append(j_feat) + num_outgoing_connections[j] += 1 + + # pylint: disable=g-direct-tensorflow-import + if weighted_fusion: + dtype = parents[0].dtype + parent_weights = [ + tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format( + i, j)), dtype=dtype)) for j in range(len(parents))] + weights_sum = tf.add_n(parent_weights) + parents = [ + parents[i] * parent_weights[i] / (weights_sum + 0.0001) + for i in range(len(parents)) + ] + + # Fuse all parent nodes then build a new block. + x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents)) + x = self._block_group( + inputs=x, + filters=target_num_filters, + strides=1, + block_fn_cand=target_block_fn, + block_repeats=self._block_repeats, + stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( + self._init_stochastic_depth_rate, i + 1, len(self._block_specs)), + name='scale_permuted_block_{}'.format(i + 1)) + + net.append(x) + net_sizes.append(target_width) + net_block_fns.append(target_block_fn) + num_outgoing_connections.append(0) + + # Save output feats. + if block_spec.is_output: + if block_spec.level in endpoints: + raise ValueError('Duplicate feats found for output level {}.'.format( + block_spec.level)) + if (block_spec.level < self._min_level or + block_spec.level > self._max_level): + raise ValueError('Output level is out of range [{}, {}]'.format( + self._min_level, self._max_level)) + endpoints[str(block_spec.level)] = x + + return endpoints + + def _build_endpoints(self, net): + """Matches filter size for endpoints before sharing conv layers.""" + endpoints = {} + for level in range(self._min_level, self._max_level + 1): + x = layers.Conv2D( + filters=self._endpoints_num_filters, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + net[str(level)]) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + x = tf_utils.get_activation(self._activation_fn)(x) + endpoints[str(level)] = x + return endpoints + + def _resample_with_alpha(self, + inputs, + input_width, + input_block_fn, + target_width, + target_num_filters, + target_block_fn, + alpha=0.5): + """Matches resolution and feature dimension.""" + _, _, _, input_num_filters = inputs.get_shape().as_list() + if input_block_fn == 'bottleneck': + input_num_filters /= 4 + new_num_filters = int(input_num_filters * alpha) + + x = layers.Conv2D( + filters=new_num_filters, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + x = tf_utils.get_activation(self._activation_fn)(x) + + # Spatial resampling. + if input_width > target_width: + x = layers.Conv2D( + filters=new_num_filters, + kernel_size=3, + strides=2, + padding='SAME', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + x = tf_utils.get_activation(self._activation_fn)(x) + input_width /= 2 + while input_width > target_width: + x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x) + input_width /= 2 + elif input_width < target_width: + scale = target_width // input_width + x = spatial_transform_ops.nearest_upsampling(x, scale=scale) + + # Last 1x1 conv to match filter size. + if target_block_fn == 'bottleneck': + target_num_filters *= 4 + x = layers.Conv2D( + filters=target_num_filters, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + return x + + def get_config(self): + config_dict = { + 'min_level': self._min_level, + 'max_level': self._max_level, + 'endpoints_num_filters': self._endpoints_num_filters, + 'resample_alpha': self._resample_alpha, + 'block_repeats': self._block_repeats, + 'filter_size_scale': self._filter_size_scale, + 'init_stochastic_depth_rate': self._init_stochastic_depth_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + return config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self): + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('spinenet') +def build_spinenet( + input_specs: tf.keras.layers.InputSpec, + model_config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds SpineNet backbone from a config.""" + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'spinenet', (f'Inconsistent backbone type ' + f'{backbone_type}') + + model_id = backbone_cfg.model_id + if model_id not in SCALING_MAP: + raise ValueError( + 'SpineNet-{} is not a valid architecture.'.format(model_id)) + scaling_params = SCALING_MAP[model_id] + + return SpineNet( + input_specs=input_specs, + min_level=model_config.min_level, + max_level=model_config.max_level, + endpoints_num_filters=scaling_params['endpoints_num_filters'], + resample_alpha=scaling_params['resample_alpha'], + block_repeats=scaling_params['block_repeats'], + filter_size_scale=scaling_params['filter_size_scale'], + init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate, + kernel_regularizer=l2_regularizer, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon) diff --git a/official/vision/beta/modeling/backbones/spinenet_mobile.py b/official/vision/beta/modeling/backbones/spinenet_mobile.py new file mode 100644 index 0000000000000000000000000000000000000000..7612f9c9415706a9f58a9ddbea85af49dc8a5270 --- /dev/null +++ b/official/vision/beta/modeling/backbones/spinenet_mobile.py @@ -0,0 +1,529 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions of Mobile SpineNet Networks.""" +import math +from typing import Any, List, Optional, Tuple + +# Import libraries + +from absl import logging +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.modeling.layers import nn_blocks +from official.vision.beta.modeling.layers import nn_layers +from official.vision.beta.ops import spatial_transform_ops + +layers = tf.keras.layers + +FILTER_SIZE_MAP = { + 0: 8, + 1: 16, + 2: 24, + 3: 40, + 4: 80, + 5: 112, + 6: 112, + 7: 112, +} + +# The fixed SpineNet architecture discovered by NAS. +# Each element represents a specification of a building block: +# (block_level, block_fn, (input_offset0, input_offset1), is_output). +SPINENET_BLOCK_SPECS = [ + (2, 'mbconv', (0, 1), False), + (2, 'mbconv', (1, 2), False), + (4, 'mbconv', (1, 2), False), + (3, 'mbconv', (3, 4), False), + (4, 'mbconv', (3, 5), False), + (6, 'mbconv', (4, 6), False), + (4, 'mbconv', (4, 6), False), + (5, 'mbconv', (7, 8), False), + (7, 'mbconv', (7, 9), False), + (5, 'mbconv', (9, 10), False), + (5, 'mbconv', (9, 11), False), + (4, 'mbconv', (6, 11), True), + (3, 'mbconv', (5, 11), True), + (5, 'mbconv', (8, 13), True), + (7, 'mbconv', (6, 15), True), + (6, 'mbconv', (13, 15), True), +] + +SCALING_MAP = { + '49': { + 'endpoints_num_filters': 48, + 'filter_size_scale': 1.0, + 'block_repeats': 1, + }, + '49S': { + 'endpoints_num_filters': 40, + 'filter_size_scale': 0.65, + 'block_repeats': 1, + }, + '49XS': { + 'endpoints_num_filters': 24, + 'filter_size_scale': 0.6, + 'block_repeats': 1, + }, +} + + +class BlockSpec(object): + """A container class that specifies the block configuration for SpineNet.""" + + def __init__(self, level: int, block_fn: str, input_offsets: Tuple[int, int], + is_output: bool): + self.level = level + self.block_fn = block_fn + self.input_offsets = input_offsets + self.is_output = is_output + + +def build_block_specs( + block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]: + """Builds the list of BlockSpec objects for SpineNet.""" + if not block_specs: + block_specs = SPINENET_BLOCK_SPECS + logging.info('Building SpineNet block specs: %s', block_specs) + return [BlockSpec(*b) for b in block_specs] + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SpineNetMobile(tf.keras.Model): + """Creates a Mobile SpineNet family model. + + This implements: + [1] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan, + Yin Cui, Quoc V. Le, Xiaodan Song. + SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization. + (https://arxiv.org/abs/1912.05027). + [2] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Yin Cui, Mingxing Tan, + Quoc Le, Xiaodan Song. + Efficient Scale-Permuted Backbone with Learned Resource Distribution. + (https://arxiv.org/abs/2010.11426). + """ + + def __init__( + self, + input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec( + shape=[None, 512, 512, 3]), + min_level: int = 3, + max_level: int = 7, + block_specs: List[BlockSpec] = build_block_specs(), + endpoints_num_filters: int = 256, + se_ratio: float = 0.2, + block_repeats: int = 1, + filter_size_scale: float = 1.0, + expand_ratio: int = 6, + init_stochastic_depth_rate=0.0, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + **kwargs): + """Initializes a Mobile SpineNet model. + + Args: + input_specs: A `tf.keras.layers.InputSpec` of the input tensor. + min_level: An `int` of min level for output mutiscale features. + max_level: An `int` of max level for output mutiscale features. + block_specs: The block specifications for the SpineNet model discovered by + NAS. + endpoints_num_filters: An `int` of feature dimension for the output + endpoints. + se_ratio: A `float` of Squeeze-and-Excitation ratio. + block_repeats: An `int` of number of blocks contained in the layer. + filter_size_scale: A `float` of multiplier for the filters (number of + channels) for all convolution ops. The value must be greater than zero. + Typical usage will be to set this value in (0, 1) to reduce the number + of parameters or computation cost of the model. + expand_ratio: An `integer` of expansion ratios for inverted bottleneck + blocks. + init_stochastic_depth_rate: A `float` of initial stochastic depth rate. + kernel_initializer: A str for kernel initializer of convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + Default to None. + activation: A `str` name of the activation function. + use_sync_bn: If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A small `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + self._input_specs = input_specs + self._min_level = min_level + self._max_level = max_level + self._block_specs = block_specs + self._endpoints_num_filters = endpoints_num_filters + self._se_ratio = se_ratio + self._block_repeats = block_repeats + self._filter_size_scale = filter_size_scale + self._expand_ratio = expand_ratio + self._init_stochastic_depth_rate = init_stochastic_depth_rate + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._activation = activation + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + if activation == 'relu': + self._activation_fn = tf.nn.relu + elif activation == 'swish': + self._activation_fn = tf.nn.swish + else: + raise ValueError('Activation {} not implemented.'.format(activation)) + self._num_init_blocks = 2 + + if use_sync_bn: + self._norm = layers.experimental.SyncBatchNormalization + else: + self._norm = layers.BatchNormalization + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + + # Build SpineNet. + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + + net = self._build_stem(inputs=inputs) + net = self._build_scale_permuted_network( + net=net, input_width=input_specs.shape[2]) + endpoints = self._build_endpoints(net=net) + + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints} + super().__init__(inputs=inputs, outputs=endpoints) + + def _block_group(self, + inputs: tf.Tensor, + in_filters: int, + out_filters: int, + strides: int, + expand_ratio: int = 6, + block_repeats: int = 1, + se_ratio: float = 0.2, + stochastic_depth_drop_rate: Optional[float] = None, + name: str = 'block_group'): + """Creates one group of blocks for the SpineNet model.""" + x = nn_blocks.InvertedBottleneckBlock( + in_filters=in_filters, + out_filters=out_filters, + strides=strides, + se_ratio=se_ratio, + expand_ratio=expand_ratio, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + inputs) + for _ in range(1, block_repeats): + x = nn_blocks.InvertedBottleneckBlock( + in_filters=in_filters, + out_filters=out_filters, + strides=1, + se_ratio=se_ratio, + expand_ratio=expand_ratio, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon)( + inputs) + return tf.identity(x, name=name) + + def _build_stem(self, inputs): + """Builds SpineNet stem.""" + x = layers.Conv2D( + filters=int(FILTER_SIZE_MAP[0] * self._filter_size_scale), + kernel_size=3, + strides=2, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + inputs) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + x = tf_utils.get_activation(self._activation_fn)(x) + + net = [] + stem_strides = [1, 2] + # Build the initial level 2 blocks. + for i in range(self._num_init_blocks): + x = self._block_group( + inputs=x, + in_filters=int(FILTER_SIZE_MAP[i] * self._filter_size_scale), + out_filters=int(FILTER_SIZE_MAP[i + 1] * self._filter_size_scale), + expand_ratio=self._expand_ratio, + strides=stem_strides[i], + se_ratio=self._se_ratio, + block_repeats=self._block_repeats, + name='stem_block_{}'.format(i + 1)) + net.append(x) + return net + + def _build_scale_permuted_network(self, + net, + input_width, + weighted_fusion=False): + """Builds scale-permuted network.""" + net_sizes = [ + int(math.ceil(input_width / 2)), + int(math.ceil(input_width / 2**2)) + ] + num_outgoing_connections = [0] * len(net) + + endpoints = {} + for i, block_spec in enumerate(self._block_specs): + # Find out specs for the target block. + target_width = int(math.ceil(input_width / 2**block_spec.level)) + target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] * + self._filter_size_scale) + + # Resample then merge input0 and input1. + parents = [] + input0 = block_spec.input_offsets[0] + input1 = block_spec.input_offsets[1] + + x0 = self._resample_with_sepconv( + inputs=net[input0], + input_width=net_sizes[input0], + target_width=target_width, + target_num_filters=target_num_filters) + parents.append(x0) + num_outgoing_connections[input0] += 1 + + x1 = self._resample_with_sepconv( + inputs=net[input1], + input_width=net_sizes[input1], + target_width=target_width, + target_num_filters=target_num_filters) + parents.append(x1) + num_outgoing_connections[input1] += 1 + + # Merge 0 outdegree blocks to the output block. + if block_spec.is_output: + for j, (j_feat, + j_connections) in enumerate(zip(net, num_outgoing_connections)): + if j_connections == 0 and (j_feat.shape[2] == target_width and + j_feat.shape[3] == x0.shape[3]): + parents.append(j_feat) + num_outgoing_connections[j] += 1 + + # pylint: disable=g-direct-tensorflow-import + if weighted_fusion: + dtype = parents[0].dtype + parent_weights = [ + tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format( + i, j)), dtype=dtype)) for j in range(len(parents))] + weights_sum = tf.add_n(parent_weights) + parents = [ + parents[i] * parent_weights[i] / (weights_sum + 0.0001) + for i in range(len(parents)) + ] + + # Fuse all parent nodes then build a new block. + x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents)) + x = self._block_group( + inputs=x, + in_filters=target_num_filters, + out_filters=target_num_filters, + strides=1, + se_ratio=self._se_ratio, + expand_ratio=self._expand_ratio, + block_repeats=self._block_repeats, + stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate( + self._init_stochastic_depth_rate, i + 1, len(self._block_specs)), + name='scale_permuted_block_{}'.format(i + 1)) + + net.append(x) + net_sizes.append(target_width) + num_outgoing_connections.append(0) + + # Save output feats. + if block_spec.is_output: + if block_spec.level in endpoints: + raise ValueError('Duplicate feats found for output level {}.'.format( + block_spec.level)) + if (block_spec.level < self._min_level or + block_spec.level > self._max_level): + raise ValueError('Output level is out of range [{}, {}]'.format( + self._min_level, self._max_level)) + endpoints[str(block_spec.level)] = x + + return endpoints + + def _build_endpoints(self, net): + """Matches filter size for endpoints before sharing conv layers.""" + endpoints = {} + for level in range(self._min_level, self._max_level + 1): + x = layers.Conv2D( + filters=self._endpoints_num_filters, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + net[str(level)]) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + x = tf_utils.get_activation(self._activation_fn)(x) + endpoints[str(level)] = x + return endpoints + + def _resample_with_sepconv(self, inputs, input_width, target_width, + target_num_filters): + """Matches resolution and feature dimension.""" + x = inputs + # Spatial resampling. + if input_width > target_width: + while input_width > target_width: + x = layers.DepthwiseConv2D( + kernel_size=3, + strides=2, + padding='SAME', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + x = tf_utils.get_activation(self._activation_fn)(x) + input_width /= 2 + elif input_width < target_width: + scale = target_width // input_width + x = spatial_transform_ops.nearest_upsampling(x, scale=scale) + + # Last 1x1 conv to match filter size. + x = layers.Conv2D( + filters=target_num_filters, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + x = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon)( + x) + return x + + def get_config(self): + config_dict = { + 'min_level': self._min_level, + 'max_level': self._max_level, + 'endpoints_num_filters': self._endpoints_num_filters, + 'se_ratio': self._se_ratio, + 'expand_ratio': self._expand_ratio, + 'block_repeats': self._block_repeats, + 'filter_size_scale': self._filter_size_scale, + 'init_stochastic_depth_rate': self._init_stochastic_depth_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + return config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self): + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs + + +@factory.register_backbone_builder('spinenet_mobile') +def build_spinenet_mobile( + input_specs: tf.keras.layers.InputSpec, + model_config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds Mobile SpineNet backbone from a config.""" + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'spinenet_mobile', (f'Inconsistent backbone type ' + f'{backbone_type}') + + model_id = backbone_cfg.model_id + if model_id not in SCALING_MAP: + raise ValueError( + 'Mobile SpineNet-{} is not a valid architecture.'.format(model_id)) + scaling_params = SCALING_MAP[model_id] + + return SpineNetMobile( + input_specs=input_specs, + min_level=model_config.min_level, + max_level=model_config.max_level, + endpoints_num_filters=scaling_params['endpoints_num_filters'], + block_repeats=scaling_params['block_repeats'], + filter_size_scale=scaling_params['filter_size_scale'], + se_ratio=backbone_cfg.se_ratio, + expand_ratio=backbone_cfg.expand_ratio, + init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate, + kernel_regularizer=l2_regularizer, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon) diff --git a/official/vision/beta/modeling/backbones/spinenet_mobile_test.py b/official/vision/beta/modeling/backbones/spinenet_mobile_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3ff910a656d6d0d0ef798b83869151a459ef7357 --- /dev/null +++ b/official/vision/beta/modeling/backbones/spinenet_mobile_test.py @@ -0,0 +1,111 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for SpineNet.""" +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import spinenet_mobile + + +class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 0.6, 1, 0.0, 24), + (128, 0.65, 1, 0.2, 40), + (256, 1.0, 1, 0.2, 48), + ) + def test_network_creation(self, input_size, filter_size_scale, block_repeats, + se_ratio, endpoints_num_filters): + """Test creation of SpineNet models.""" + min_level = 3 + max_level = 7 + + tf.keras.backend.set_image_data_format('channels_last') + + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size, input_size, 3]) + model = spinenet_mobile.SpineNetMobile( + input_specs=input_specs, + min_level=min_level, + max_level=max_level, + endpoints_num_filters=endpoints_num_filters, + resample_alpha=se_ratio, + block_repeats=block_repeats, + filter_size_scale=filter_size_scale, + init_stochastic_depth_rate=0.2, + ) + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = model(inputs) + + for l in range(min_level, max_level + 1): + self.assertIn(str(l), endpoints.keys()) + self.assertAllEqual( + [1, input_size / 2**l, input_size / 2**l, endpoints_num_filters], + endpoints[str(l)].shape.as_list()) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + min_level=3, + max_level=7, + endpoints_num_filters=256, + se_ratio=0.2, + expand_ratio=6, + block_repeats=1, + filter_size_scale=1.0, + init_stochastic_depth_rate=0.2, + use_sync_bn=False, + activation='relu', + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + ) + network = spinenet_mobile.SpineNetMobile(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = spinenet_mobile.SpineNetMobile.from_config( + network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/backbones/spinenet_test.py b/official/vision/beta/modeling/backbones/spinenet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..51061030f9779a7d5357040222d8a12ee5e47232 --- /dev/null +++ b/official/vision/beta/modeling/backbones/spinenet_test.py @@ -0,0 +1,129 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for SpineNet.""" +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import spinenet + + +class SpineNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 0.65, 1, 0.5, 128), + (256, 1.0, 1, 0.5, 256), + (384, 1.0, 2, 0.5, 256), + (512, 1.0, 3, 1.0, 256), + (640, 1.3, 4, 1.0, 384), + ) + def test_network_creation(self, input_size, filter_size_scale, block_repeats, + resample_alpha, endpoints_num_filters): + """Test creation of SpineNet models.""" + min_level = 3 + max_level = 7 + + tf.keras.backend.set_image_data_format('channels_last') + + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size, input_size, 3]) + model = spinenet.SpineNet( + input_specs=input_specs, + min_level=min_level, + max_level=max_level, + endpoints_num_filters=endpoints_num_filters, + resample_alpha=resample_alpha, + block_repeats=block_repeats, + filter_size_scale=filter_size_scale, + init_stochastic_depth_rate=0.2, + ) + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = model(inputs) + + for l in range(min_level, max_level + 1): + self.assertIn(str(l), endpoints.keys()) + self.assertAllEqual( + [1, input_size / 2**l, input_size / 2**l, endpoints_num_filters], + endpoints[str(l)].shape.as_list()) + + @parameterized.parameters( + ((128, 128), (128, 128)), + ((128, 128), (256, 256)), + ((640, 640), (896, 1664)), + ) + def test_load_from_different_input_specs(self, input_size_1, input_size_2): + """Test loading checkpoints with different input size.""" + + def build_spinenet(input_size): + tf.keras.backend.set_image_data_format('channels_last') + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size[0], input_size[1], 3]) + model = spinenet.SpineNet( + input_specs=input_specs, + min_level=3, + max_level=7, + endpoints_num_filters=384, + resample_alpha=1.0, + block_repeats=2, + filter_size_scale=0.5) + return model + + model_1 = build_spinenet(input_size_1) + model_2 = build_spinenet(input_size_2) + + ckpt_1 = tf.train.Checkpoint(backbone=model_1) + ckpt_2 = tf.train.Checkpoint(backbone=model_2) + + ckpt_path = self.get_temp_dir() + '/ckpt' + ckpt_1.write(ckpt_path) + ckpt_2.restore(ckpt_path).expect_partial() + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + min_level=3, + max_level=7, + endpoints_num_filters=256, + resample_alpha=0.5, + block_repeats=1, + filter_size_scale=1.0, + init_stochastic_depth_rate=0.2, + use_sync_bn=False, + activation='relu', + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + ) + network = spinenet.SpineNet(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = spinenet.SpineNet.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/classification_model.py b/official/vision/beta/modeling/classification_model.py new file mode 100644 index 0000000000000000000000000000000000000000..cde7ebcca596804db1c781ebb24b7bea405cf342 --- /dev/null +++ b/official/vision/beta/modeling/classification_model.py @@ -0,0 +1,122 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build classification models.""" + +from typing import Any, Mapping, Optional +# Import libraries +import tensorflow as tf + +layers = tf.keras.layers + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ClassificationModel(tf.keras.Model): + """A classification class builder.""" + + def __init__( + self, + backbone: tf.keras.Model, + num_classes: int, + input_specs: tf.keras.layers.InputSpec = layers.InputSpec( + shape=[None, None, None, 3]), + dropout_rate: float = 0.0, + kernel_initializer: str = 'random_uniform', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + add_head_batch_norm: bool = False, + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + skip_logits_layer: bool = False, + **kwargs): + """Classification initialization function. + + Args: + backbone: a backbone network. + num_classes: `int` number of classes in classification task. + input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. + dropout_rate: `float` rate for dropout regularization. + kernel_initializer: kernel initializer for the dense layer. + kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to + None. + bias_regularizer: tf.keras.regularizers.Regularizer object. Default to + None. + add_head_batch_norm: `bool` whether to add a batch normalization layer + before pool. + use_sync_bn: `bool` if True, use synchronized batch normalization. + norm_momentum: `float` normalization momentum for the moving average. + norm_epsilon: `float` small float added to variance to avoid dividing by + zero. + skip_logits_layer: `bool`, whether to skip the prediction layer. + **kwargs: keyword arguments to be passed. + """ + if use_sync_bn: + norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + norm = tf.keras.layers.BatchNormalization + axis = -1 if tf.keras.backend.image_data_format() == 'channels_last' else 1 + + inputs = tf.keras.Input(shape=input_specs.shape[1:], name=input_specs.name) + endpoints = backbone(inputs) + x = endpoints[max(endpoints.keys())] + + if add_head_batch_norm: + x = norm(axis=axis, momentum=norm_momentum, epsilon=norm_epsilon)(x) + x = tf.keras.layers.GlobalAveragePooling2D()(x) + if not skip_logits_layer: + x = tf.keras.layers.Dropout(dropout_rate)(x) + x = tf.keras.layers.Dense( + num_classes, + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer)( + x) + + super(ClassificationModel, self).__init__( + inputs=inputs, outputs=x, **kwargs) + self._config_dict = { + 'backbone': backbone, + 'num_classes': num_classes, + 'input_specs': input_specs, + 'dropout_rate': dropout_rate, + 'kernel_initializer': kernel_initializer, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + 'add_head_batch_norm': add_head_batch_norm, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + } + self._input_specs = input_specs + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._backbone = backbone + self._norm = norm + + @property + def checkpoint_items(self) -> Mapping[str, tf.keras.Model]: + """Returns a dictionary of items to be additionally checkpointed.""" + return dict(backbone=self.backbone) + + @property + def backbone(self) -> tf.keras.Model: + return self._backbone + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/modeling/classification_model_test.py b/official/vision/beta/modeling/classification_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..54522f065210079474148b76b8d5727fbe343def --- /dev/null +++ b/official/vision/beta/modeling/classification_model_test.py @@ -0,0 +1,203 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for classification network.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling import classification_model + + +class ClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 50, 'relu'), + (128, 50, 'relu'), + (128, 50, 'swish'), + ) + def test_resnet_network_creation( + self, input_size, resnet_model_id, activation): + """Test for creation of a ResNet-50 classifier.""" + inputs = np.random.rand(2, input_size, input_size, 3) + + tf.keras.backend.set_image_data_format('channels_last') + + backbone = backbones.ResNet( + model_id=resnet_model_id, activation=activation) + self.assertEqual(backbone.count_params(), 23561152) + + num_classes = 1000 + model = classification_model.ClassificationModel( + backbone=backbone, + num_classes=num_classes, + dropout_rate=0.2, + ) + self.assertEqual(model.count_params(), 25610152) + + logits = model(inputs) + self.assertAllEqual([2, num_classes], logits.numpy().shape) + + def test_revnet_network_creation(self): + """Test for creation of a RevNet-56 classifier.""" + revnet_model_id = 56 + inputs = np.random.rand(2, 224, 224, 3) + + tf.keras.backend.set_image_data_format('channels_last') + + backbone = backbones.RevNet(model_id=revnet_model_id) + self.assertEqual(backbone.count_params(), 19473792) + + num_classes = 1000 + model = classification_model.ClassificationModel( + backbone=backbone, + num_classes=num_classes, + dropout_rate=0.2, + add_head_batch_norm=True, + ) + self.assertEqual(model.count_params(), 22816104) + + logits = model(inputs) + self.assertAllEqual([2, num_classes], logits.numpy().shape) + + @combinations.generate( + combinations.combine( + mobilenet_model_id=[ + 'MobileNetV1', + 'MobileNetV2', + 'MobileNetV3Large', + 'MobileNetV3Small', + 'MobileNetV3EdgeTPU', + 'MobileNetMultiAVG', + 'MobileNetMultiMAX', + ], + filter_size_scale=[1.0, 0.75], + )) + def test_mobilenet_network_creation(self, mobilenet_model_id, + filter_size_scale): + """Test for creation of a MobileNet classifier.""" + mobilenet_params = { + ('MobileNetV1', 1.0): 4254889, + ('MobileNetV1', 0.75): 2602745, + ('MobileNetV2', 1.0): 3540265, + ('MobileNetV2', 0.75): 2664345, + ('MobileNetV3Large', 1.0): 5508713, + ('MobileNetV3Large', 0.75): 4013897, + ('MobileNetV3Small', 1.0): 2555993, + ('MobileNetV3Small', 0.75): 2052577, + ('MobileNetV3EdgeTPU', 1.0): 4131593, + ('MobileNetV3EdgeTPU', 0.75): 3019569, + ('MobileNetMultiAVG', 1.0): 4982857, + ('MobileNetMultiAVG', 0.75): 3628145, + ('MobileNetMultiMAX', 1.0): 4453001, + ('MobileNetMultiMAX', 0.75): 3324257, + } + + inputs = np.random.rand(2, 224, 224, 3) + + tf.keras.backend.set_image_data_format('channels_last') + + backbone = backbones.MobileNet( + model_id=mobilenet_model_id, filter_size_scale=filter_size_scale) + + num_classes = 1001 + model = classification_model.ClassificationModel( + backbone=backbone, + num_classes=num_classes, + dropout_rate=0.2, + ) + self.assertEqual(model.count_params(), + mobilenet_params[(mobilenet_model_id, filter_size_scale)]) + + logits = model(inputs) + self.assertAllEqual([2, num_classes], logits.numpy().shape) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + use_sync_bn=[False, True], + )) + def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): + """Test for sync bn on TPU and GPU devices.""" + inputs = np.random.rand(64, 128, 128, 3) + + tf.keras.backend.set_image_data_format('channels_last') + + with strategy.scope(): + backbone = backbones.ResNet(model_id=50, use_sync_bn=use_sync_bn) + + model = classification_model.ClassificationModel( + backbone=backbone, + num_classes=1000, + dropout_rate=0.2, + ) + _ = model(inputs) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.one_device_strategy_gpu, + ], + data_format=['channels_last', 'channels_first'], + input_dim=[1, 3, 4])) + def test_data_format_gpu(self, strategy, data_format, input_dim): + """Test for different data formats on GPU devices.""" + if data_format == 'channels_last': + inputs = np.random.rand(2, 128, 128, input_dim) + else: + inputs = np.random.rand(2, input_dim, 128, 128) + input_specs = tf.keras.layers.InputSpec(shape=inputs.shape) + + tf.keras.backend.set_image_data_format(data_format) + + with strategy.scope(): + backbone = backbones.ResNet(model_id=50, input_specs=input_specs) + + model = classification_model.ClassificationModel( + backbone=backbone, + num_classes=1000, + input_specs=input_specs, + ) + _ = model(inputs) + + def test_serialize_deserialize(self): + """Validate the classification net can be serialized and deserialized.""" + + tf.keras.backend.set_image_data_format('channels_last') + backbone = backbones.ResNet(model_id=50) + + model = classification_model.ClassificationModel( + backbone=backbone, num_classes=1000) + + config = model.get_config() + new_model = classification_model.ClassificationModel.from_config(config) + + # Validate that the config can be forced to JSON. + _ = new_model.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(model.get_config(), new_model.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/decoders/__init__.py b/official/vision/beta/modeling/decoders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1678aacb488552ad96ef8cd595f94986b61774b7 --- /dev/null +++ b/official/vision/beta/modeling/decoders/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Decoders package definition.""" + +from official.vision.beta.modeling.decoders.aspp import ASPP +from official.vision.beta.modeling.decoders.fpn import FPN +from official.vision.beta.modeling.decoders.nasfpn import NASFPN diff --git a/official/vision/beta/modeling/decoders/aspp.py b/official/vision/beta/modeling/decoders/aspp.py new file mode 100644 index 0000000000000000000000000000000000000000..147793b697ceaee92bef7808a72e251c587b1ebe --- /dev/null +++ b/official/vision/beta/modeling/decoders/aspp.py @@ -0,0 +1,130 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder.""" +from typing import Any, List, Optional, Mapping + +# Import libraries +import tensorflow as tf + +from official.vision import keras_cv + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ASPP(tf.keras.layers.Layer): + """Creates an Atrous Spatial Pyramid Pooling (ASPP) layer.""" + + def __init__( + self, + level: int, + dilation_rates: List[int], + num_filters: int = 256, + pool_kernel_size: Optional[int] = None, + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + activation: str = 'relu', + dropout_rate: float = 0.0, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + interpolation: str = 'bilinear', + **kwargs): + """Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer. + + Args: + level: An `int` level to apply ASPP. + dilation_rates: A `list` of dilation rates. + num_filters: An `int` number of output filters in ASPP. + pool_kernel_size: A `list` of [height, width] of pooling kernel size or + None. Pooling size is with respect to original image size, it will be + scaled down by 2**level. If None, global average pooling is used. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + activation: A `str` activation to be used in ASPP. + dropout_rate: A `float` rate for dropout regularization. + kernel_initializer: A `str` name of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + interpolation: A `str` of interpolation method. It should be one of + `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`, + `gaussian`, or `mitchellcubic`. + **kwargs: Additional keyword arguments to be passed. + """ + super(ASPP, self).__init__(**kwargs) + self._config_dict = { + 'level': level, + 'dilation_rates': dilation_rates, + 'num_filters': num_filters, + 'pool_kernel_size': pool_kernel_size, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'activation': activation, + 'dropout_rate': dropout_rate, + 'kernel_initializer': kernel_initializer, + 'kernel_regularizer': kernel_regularizer, + 'interpolation': interpolation, + } + + def build(self, input_shape): + pool_kernel_size = None + if self._config_dict['pool_kernel_size']: + pool_kernel_size = [ + int(p_size // 2**self._config_dict['level']) + for p_size in self._config_dict['pool_kernel_size'] + ] + self.aspp = keras_cv.layers.SpatialPyramidPooling( + output_channels=self._config_dict['num_filters'], + dilation_rates=self._config_dict['dilation_rates'], + pool_kernel_size=pool_kernel_size, + use_sync_bn=self._config_dict['use_sync_bn'], + batchnorm_momentum=self._config_dict['norm_momentum'], + batchnorm_epsilon=self._config_dict['norm_epsilon'], + activation=self._config_dict['activation'], + dropout=self._config_dict['dropout_rate'], + kernel_initializer=self._config_dict['kernel_initializer'], + kernel_regularizer=self._config_dict['kernel_regularizer'], + interpolation=self._config_dict['interpolation']) + + def call(self, inputs: Mapping[str, tf.Tensor]) -> Mapping[str, tf.Tensor]: + """Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input. + + The output of ASPP will be a dict of {`level`, `tf.Tensor`} even if only one + level is present. Hence, this will be compatible with the rest of the + segmentation model interfaces. + + Args: + inputs: A `dict` of `tf.Tensor` where + - key: A `str` of the level of the multilevel feature maps. + - values: A `tf.Tensor` of shape [batch, height_l, width_l, + filter_size]. + + Returns: + A `dict` of `tf.Tensor` where + - key: A `str` of the level of the multilevel feature maps. + - values: A `tf.Tensor` of output of ASPP module. + """ + outputs = {} + level = str(self._config_dict['level']) + outputs[level] = self.aspp(inputs[level]) + return outputs + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/modeling/decoders/aspp_test.py b/official/vision/beta/modeling/decoders/aspp_test.py new file mode 100644 index 0000000000000000000000000000000000000000..68acd58cf5cb8570194d986a12e350a7bc76986f --- /dev/null +++ b/official/vision/beta/modeling/decoders/aspp_test.py @@ -0,0 +1,87 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for aspp.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import resnet +from official.vision.beta.modeling.decoders import aspp + + +class ASPPTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (3, [6, 12, 18, 24], 128), + (3, [6, 12, 18], 128), + (3, [6, 12], 256), + (4, [6, 12, 18, 24], 128), + (4, [6, 12, 18], 128), + (4, [6, 12], 256), + ) + def test_network_creation(self, level, dilation_rates, num_filters): + """Test creation of ASPP.""" + + input_size = 256 + tf.keras.backend.set_image_data_format('channels_last') + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + + backbone = resnet.ResNet(model_id=50) + network = aspp.ASPP( + level=level, + dilation_rates=dilation_rates, + num_filters=num_filters) + + endpoints = backbone(inputs) + feats = network(endpoints) + + self.assertIn(str(level), feats) + self.assertAllEqual( + [1, input_size // 2**level, input_size // 2**level, num_filters], + feats[str(level)].shape.as_list()) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + level=3, + dilation_rates=[6, 12], + num_filters=256, + pool_kernel_size=None, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + activation='relu', + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + interpolation='bilinear', + dropout_rate=0.2, + ) + network = aspp.ASPP(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = aspp.ASPP.from_config(network.get_config()) + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/decoders/factory.py b/official/vision/beta/modeling/decoders/factory.py new file mode 100644 index 0000000000000000000000000000000000000000..5c6551e6e170fef852b088a7667eb9d53a4fd17f --- /dev/null +++ b/official/vision/beta/modeling/decoders/factory.py @@ -0,0 +1,91 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Contains the factory method to create decoders.""" + +from typing import Mapping, Optional + +# Import libraries + +import tensorflow as tf + +from official.modeling import hyperparams +from official.vision.beta.modeling import decoders + + +def build_decoder( + input_specs: Mapping[str, tf.TensorShape], + model_config: hyperparams.Config, + l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None +) -> tf.keras.Model: + """Builds decoder from a config. + + Args: + input_specs: A `dict` of input specifications. A dictionary consists of + {level: TensorShape} from a backbone. + model_config: A OneOfConfig. Model config. + l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to + None. + + Returns: + A `tf.keras.Model` instance of the decoder. + """ + decoder_type = model_config.decoder.type + decoder_cfg = model_config.decoder.get() + norm_activation_config = model_config.norm_activation + + if decoder_type == 'identity': + decoder = None + elif decoder_type == 'fpn': + decoder = decoders.FPN( + input_specs=input_specs, + min_level=model_config.min_level, + max_level=model_config.max_level, + num_filters=decoder_cfg.num_filters, + use_separable_conv=decoder_cfg.use_separable_conv, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + elif decoder_type == 'nasfpn': + decoder = decoders.NASFPN( + input_specs=input_specs, + min_level=model_config.min_level, + max_level=model_config.max_level, + num_filters=decoder_cfg.num_filters, + num_repeats=decoder_cfg.num_repeats, + use_separable_conv=decoder_cfg.use_separable_conv, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + elif decoder_type == 'aspp': + decoder = decoders.ASPP( + level=decoder_cfg.level, + dilation_rates=decoder_cfg.dilation_rates, + num_filters=decoder_cfg.num_filters, + pool_kernel_size=decoder_cfg.pool_kernel_size, + dropout_rate=decoder_cfg.dropout_rate, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + activation=norm_activation_config.activation, + kernel_regularizer=l2_regularizer) + else: + raise ValueError('Decoder {!r} not implement'.format(decoder_type)) + + return decoder diff --git a/official/vision/beta/modeling/decoders/fpn.py b/official/vision/beta/modeling/decoders/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..b87e9a8d7617b1ed4536f0451bef4d51acfe3437 --- /dev/null +++ b/official/vision/beta/modeling/decoders/fpn.py @@ -0,0 +1,189 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains the definitions of Feature Pyramid Networks (FPN).""" +from typing import Any, Mapping, Optional + +# Import libraries +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.ops import spatial_transform_ops + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class FPN(tf.keras.Model): + """Creates a Feature Pyramid Network (FPN). + + This implemets the paper: + Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and + Serge Belongie. + Feature Pyramid Networks for Object Detection. + (https://arxiv.org/pdf/1612.03144) + """ + + def __init__( + self, + input_specs: Mapping[str, tf.TensorShape], + min_level: int = 3, + max_level: int = 7, + num_filters: int = 256, + use_separable_conv: bool = False, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a Feature Pyramid Network (FPN). + + Args: + input_specs: A `dict` of input specifications. A dictionary consists of + {level: TensorShape} from a backbone. + min_level: An `int` of minimum level in FPN output feature maps. + max_level: An `int` of maximum level in FPN output feature maps. + num_filters: An `int` number of filters in FPN layers. + use_separable_conv: A `bool`. If True use separable convolution for + convolution in FPN layers. + activation: A `str` name of the activation function. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_initializer: A `str` name of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + self._config_dict = { + 'input_specs': input_specs, + 'min_level': min_level, + 'max_level': max_level, + 'num_filters': num_filters, + 'use_separable_conv': use_separable_conv, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_initializer': kernel_initializer, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + } + if use_separable_conv: + conv2d = tf.keras.layers.SeparableConv2D + else: + conv2d = tf.keras.layers.Conv2D + if use_sync_bn: + norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + norm = tf.keras.layers.BatchNormalization + activation_fn = tf.keras.layers.Activation( + tf_utils.get_activation(activation)) + + # Build input feature pyramid. + if tf.keras.backend.image_data_format() == 'channels_last': + bn_axis = -1 + else: + bn_axis = 1 + + # Get input feature pyramid from backbone. + inputs = self._build_input_pyramid(input_specs, min_level) + backbone_max_level = min(int(max(inputs.keys())), max_level) + + # Build lateral connections. + feats_lateral = {} + for level in range(min_level, backbone_max_level + 1): + feats_lateral[str(level)] = conv2d( + filters=num_filters, + kernel_size=1, + padding='same', + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer)( + inputs[str(level)]) + + # Build top-down path. + feats = {str(backbone_max_level): feats_lateral[str(backbone_max_level)]} + for level in range(backbone_max_level - 1, min_level - 1, -1): + feats[str(level)] = spatial_transform_ops.nearest_upsampling( + feats[str(level + 1)], 2) + feats_lateral[str(level)] + + # TODO(xianzhi): consider to remove bias in conv2d. + # Build post-hoc 3x3 convolution kernel. + for level in range(min_level, backbone_max_level + 1): + feats[str(level)] = conv2d( + filters=num_filters, + strides=1, + kernel_size=3, + padding='same', + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer)( + feats[str(level)]) + + # TODO(xianzhi): consider to remove bias in conv2d. + # Build coarser FPN levels introduced for RetinaNet. + for level in range(backbone_max_level + 1, max_level + 1): + feats_in = feats[str(level - 1)] + if level > backbone_max_level + 1: + feats_in = activation_fn(feats_in) + feats[str(level)] = conv2d( + filters=num_filters, + strides=2, + kernel_size=3, + padding='same', + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer)( + feats_in) + + # Apply batch norm layers. + for level in range(min_level, max_level + 1): + feats[str(level)] = norm( + axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)( + feats[str(level)]) + + self._output_specs = { + str(level): feats[str(level)].get_shape() + for level in range(min_level, max_level + 1) + } + + super(FPN, self).__init__(inputs=inputs, outputs=feats, **kwargs) + + def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape], + min_level: int): + assert isinstance(input_specs, dict) + if min(input_specs.keys()) > str(min_level): + raise ValueError( + 'Backbone min level should be less or equal to FPN min level') + + inputs = {} + for level, spec in input_specs.items(): + inputs[level] = tf.keras.Input(shape=spec[1:]) + return inputs + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self) -> Mapping[str, tf.TensorShape]: + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs diff --git a/official/vision/beta/modeling/decoders/fpn_test.py b/official/vision/beta/modeling/decoders/fpn_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3920b4efca1a4985f46fd9efbb8bf9cee42d4783 --- /dev/null +++ b/official/vision/beta/modeling/decoders/fpn_test.py @@ -0,0 +1,87 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for FPN.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import resnet +from official.vision.beta.modeling.decoders import fpn + + +class FPNTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (256, 3, 7, False), + (256, 3, 7, True), + ) + def test_network_creation(self, input_size, min_level, max_level, + use_separable_conv): + """Test creation of FPN.""" + tf.keras.backend.set_image_data_format('channels_last') + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + + backbone = resnet.ResNet(model_id=50) + network = fpn.FPN( + input_specs=backbone.output_specs, + min_level=min_level, + max_level=max_level, + use_separable_conv=use_separable_conv) + + endpoints = backbone(inputs) + feats = network(endpoints) + + for level in range(min_level, max_level + 1): + self.assertIn(str(level), feats) + self.assertAllEqual( + [1, input_size // 2**level, input_size // 2**level, 256], + feats[str(level)].shape.as_list()) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + input_specs=resnet.ResNet(model_id=50).output_specs, + min_level=3, + max_level=7, + num_filters=256, + use_separable_conv=False, + use_sync_bn=False, + activation='relu', + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + ) + network = fpn.FPN(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = fpn.FPN.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/decoders/nasfpn.py b/official/vision/beta/modeling/decoders/nasfpn.py new file mode 100644 index 0000000000000000000000000000000000000000..2e94dcc3e41e072cd1b4fe8e935e8db9ce2d202d --- /dev/null +++ b/official/vision/beta/modeling/decoders/nasfpn.py @@ -0,0 +1,318 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of NAS-FPN.""" +from typing import Any, Mapping, List, Tuple, Optional + +# Import libraries +from absl import logging +import tensorflow as tf + +from official.vision.beta.ops import spatial_transform_ops + + +# The fixed NAS-FPN architecture discovered by NAS. +# Each element represents a specification of a building block: +# (block_level, combine_fn, (input_offset0, input_offset1), is_output). +NASFPN_BLOCK_SPECS = [ + (4, 'attention', (1, 3), False), + (4, 'sum', (1, 5), False), + (3, 'sum', (0, 6), True), + (4, 'sum', (6, 7), True), + (5, 'attention', (7, 8), True), + (7, 'attention', (6, 9), True), + (6, 'attention', (9, 10), True), +] + + +class BlockSpec(): + """A container class that specifies the block configuration for NAS-FPN.""" + + def __init__(self, level: int, combine_fn: str, + input_offsets: Tuple[int, int], is_output: bool): + self.level = level + self.combine_fn = combine_fn + self.input_offsets = input_offsets + self.is_output = is_output + + +def build_block_specs( + block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]: + """Builds the list of BlockSpec objects for NAS-FPN.""" + if not block_specs: + block_specs = NASFPN_BLOCK_SPECS + logging.info('Building NAS-FPN block specs: %s', block_specs) + return [BlockSpec(*b) for b in block_specs] + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class NASFPN(tf.keras.Model): + """Creates a NAS-FPN model. + + This implements the paper: + Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le. + NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection. + (https://arxiv.org/abs/1904.07392) + """ + + def __init__( + self, + input_specs: Mapping[str, tf.TensorShape], + min_level: int = 3, + max_level: int = 7, + block_specs: List[BlockSpec] = build_block_specs(), + num_filters: int = 256, + num_repeats: int = 5, + use_separable_conv: bool = False, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_initializer: str = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a NAS-FPN model. + + Args: + input_specs: A `dict` of input specifications. A dictionary consists of + {level: TensorShape} from a backbone. + min_level: An `int` of minimum level in FPN output feature maps. + max_level: An `int` of maximum level in FPN output feature maps. + block_specs: a list of BlockSpec objects that specifies the NAS-FPN + network topology. By default, the previously discovered architecture is + used. + num_filters: An `int` number of filters in FPN layers. + num_repeats: number of repeats for feature pyramid network. + use_separable_conv: A `bool`. If True use separable convolution for + convolution in FPN layers. + activation: A `str` name of the activation function. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_initializer: A `str` name of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + self._config_dict = { + 'input_specs': input_specs, + 'min_level': min_level, + 'max_level': max_level, + 'num_filters': num_filters, + 'num_repeats': num_repeats, + 'use_separable_conv': use_separable_conv, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_initializer': kernel_initializer, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + } + self._min_level = min_level + self._max_level = max_level + self._block_specs = block_specs + self._num_repeats = num_repeats + self._conv_op = (tf.keras.layers.SeparableConv2D + if self._config_dict['use_separable_conv'] + else tf.keras.layers.Conv2D) + if self._config_dict['use_separable_conv']: + self._conv_kwargs = { + 'depthwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'pointwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'depthwise_regularizer': self._config_dict['kernel_regularizer'], + 'pointwise_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + else: + self._conv_kwargs = { + 'kernel_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + self._norm_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._norm_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + if activation == 'relu': + self._activation = tf.nn.relu + elif activation == 'swish': + self._activation = tf.nn.swish + else: + raise ValueError('Activation {} not implemented.'.format(activation)) + + # Gets input feature pyramid from backbone. + inputs = self._build_input_pyramid(input_specs, min_level) + + # Projects the input features. + feats = [] + for level in range(self._min_level, self._max_level + 1): + if str(level) in inputs.keys(): + feats.append(self._resample_feature_map( + inputs[str(level)], level, level, self._config_dict['num_filters'])) + else: + feats.append(self._resample_feature_map( + feats[-1], level - 1, level, self._config_dict['num_filters'])) + + # Repeatly builds the NAS-FPN modules. + for _ in range(self._num_repeats): + output_feats = self._build_feature_pyramid(feats) + feats = [output_feats[level] + for level in range(self._min_level, self._max_level + 1)] + + self._output_specs = { + str(level): output_feats[level].get_shape() + for level in range(min_level, max_level + 1) + } + output_feats = {str(level): output_feats[level] + for level in output_feats.keys()} + super(NASFPN, self).__init__(inputs=inputs, outputs=output_feats, **kwargs) + + def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape], + min_level: int): + assert isinstance(input_specs, dict) + if min(input_specs.keys()) > str(min_level): + raise ValueError( + 'Backbone min level should be less or equal to FPN min level') + + inputs = {} + for level, spec in input_specs.items(): + inputs[level] = tf.keras.Input(shape=spec[1:]) + return inputs + + def _resample_feature_map(self, + inputs, + input_level, + target_level, + target_num_filters=256): + x = inputs + _, _, _, input_num_filters = x.get_shape().as_list() + if input_num_filters != target_num_filters: + x = self._conv_op( + filters=target_num_filters, + kernel_size=1, + padding='same', + **self._conv_kwargs)(x) + x = self._norm_op(**self._norm_kwargs)(x) + + if input_level < target_level: + stride = int(2 ** (target_level - input_level)) + x = tf.keras.layers.MaxPool2D( + pool_size=stride, strides=stride, padding='same')(x) + elif input_level > target_level: + scale = int(2 ** (input_level - target_level)) + x = spatial_transform_ops.nearest_upsampling(x, scale=scale) + + return x + + def _global_attention(self, feat0, feat1): + m = tf.math.reduce_max(feat0, axis=[1, 2], keepdims=True) + m = tf.math.sigmoid(m) + return feat0 + feat1 * m + + def _build_feature_pyramid(self, feats): + num_output_connections = [0] * len(feats) + num_output_levels = self._max_level - self._min_level + 1 + feat_levels = list(range(self._min_level, self._max_level + 1)) + + for i, block_spec in enumerate(self._block_specs): + new_level = block_spec.level + + # Checks the range of input_offsets. + for input_offset in block_spec.input_offsets: + if input_offset >= len(feats): + raise ValueError( + 'input_offset ({}) is larger than num feats({})'.format( + input_offset, len(feats))) + input0 = block_spec.input_offsets[0] + input1 = block_spec.input_offsets[1] + + # Update graph with inputs. + node0 = feats[input0] + node0_level = feat_levels[input0] + num_output_connections[input0] += 1 + node0 = self._resample_feature_map(node0, node0_level, new_level) + node1 = feats[input1] + node1_level = feat_levels[input1] + num_output_connections[input1] += 1 + node1 = self._resample_feature_map(node1, node1_level, new_level) + + # Combine node0 and node1 to create new feat. + if block_spec.combine_fn == 'sum': + new_node = node0 + node1 + elif block_spec.combine_fn == 'attention': + if node0_level >= node1_level: + new_node = self._global_attention(node0, node1) + else: + new_node = self._global_attention(node1, node0) + else: + raise ValueError('unknown combine_fn `{}`.' + .format(block_spec.combine_fn)) + + # Add intermediate nodes that do not have any connections to output. + if block_spec.is_output: + for j, (feat, feat_level, num_output) in enumerate( + zip(feats, feat_levels, num_output_connections)): + if num_output == 0 and feat_level == new_level: + num_output_connections[j] += 1 + + feat_ = self._resample_feature_map(feat, feat_level, new_level) + new_node += feat_ + + new_node = self._activation(new_node) + new_node = self._conv_op( + filters=self._config_dict['num_filters'], + kernel_size=(3, 3), + padding='same', + **self._conv_kwargs)(new_node) + new_node = self._norm_op(**self._norm_kwargs)(new_node) + + feats.append(new_node) + feat_levels.append(new_level) + num_output_connections.append(0) + + output_feats = {} + for i in range(len(feats) - num_output_levels, len(feats)): + level = feat_levels[i] + output_feats[level] = feats[i] + logging.info('Output feature pyramid: %s', output_feats) + return output_feats + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + @property + def output_specs(self) -> Mapping[str, tf.TensorShape]: + """A dict of {level: TensorShape} pairs for the model output.""" + return self._output_specs diff --git a/official/vision/beta/modeling/decoders/nasfpn_test.py b/official/vision/beta/modeling/decoders/nasfpn_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c8101281591fcd401b796c6a20e72c7e9c4eaf3e --- /dev/null +++ b/official/vision/beta/modeling/decoders/nasfpn_test.py @@ -0,0 +1,59 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for NAS-FPN.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.backbones import resnet +from official.vision.beta.modeling.decoders import nasfpn + + +class NASFPNTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (256, 3, 7, False), + (256, 3, 7, True), + ) + def test_network_creation(self, input_size, min_level, max_level, + use_separable_conv): + """Test creation of NAS-FPN.""" + tf.keras.backend.set_image_data_format('channels_last') + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + + num_filters = 256 + backbone = resnet.ResNet(model_id=50) + network = nasfpn.NASFPN( + input_specs=backbone.output_specs, + min_level=min_level, + max_level=max_level, + num_filters=num_filters, + use_separable_conv=use_separable_conv) + + endpoints = backbone(inputs) + feats = network(endpoints) + + for level in range(min_level, max_level + 1): + self.assertIn(str(level), feats) + self.assertAllEqual( + [1, input_size // 2**level, input_size // 2**level, num_filters], + feats[str(level)].shape.as_list()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/factory.py b/official/vision/beta/modeling/factory.py new file mode 100644 index 0000000000000000000000000000000000000000..a7fed5c8a0a97c388299c88d112e7c6f2ad6983b --- /dev/null +++ b/official/vision/beta/modeling/factory.py @@ -0,0 +1,334 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Factory methods to build models.""" + +# Import libraries + +import tensorflow as tf + +from official.vision.beta.configs import image_classification as classification_cfg +from official.vision.beta.configs import maskrcnn as maskrcnn_cfg +from official.vision.beta.configs import retinanet as retinanet_cfg +from official.vision.beta.configs import semantic_segmentation as segmentation_cfg +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling import classification_model +from official.vision.beta.modeling import maskrcnn_model +from official.vision.beta.modeling import retinanet_model +from official.vision.beta.modeling import segmentation_model +from official.vision.beta.modeling.decoders import factory as decoder_factory +from official.vision.beta.modeling.heads import dense_prediction_heads +from official.vision.beta.modeling.heads import instance_heads +from official.vision.beta.modeling.heads import segmentation_heads +from official.vision.beta.modeling.layers import detection_generator +from official.vision.beta.modeling.layers import mask_sampler +from official.vision.beta.modeling.layers import roi_aligner +from official.vision.beta.modeling.layers import roi_generator +from official.vision.beta.modeling.layers import roi_sampler + + +def build_classification_model( + input_specs: tf.keras.layers.InputSpec, + model_config: classification_cfg.ImageClassificationModel, + l2_regularizer: tf.keras.regularizers.Regularizer = None, + skip_logits_layer: bool = False) -> tf.keras.Model: + """Builds the classification model.""" + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + norm_activation_config = model_config.norm_activation + model = classification_model.ClassificationModel( + backbone=backbone, + num_classes=model_config.num_classes, + input_specs=input_specs, + dropout_rate=model_config.dropout_rate, + kernel_regularizer=l2_regularizer, + add_head_batch_norm=model_config.add_head_batch_norm, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + skip_logits_layer=skip_logits_layer) + return model + + +def build_maskrcnn( + input_specs: tf.keras.layers.InputSpec, + model_config: maskrcnn_cfg.MaskRCNN, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds Mask R-CNN model.""" + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + decoder = decoder_factory.build_decoder( + input_specs=backbone.output_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + rpn_head_config = model_config.rpn_head + roi_generator_config = model_config.roi_generator + roi_sampler_config = model_config.roi_sampler + roi_aligner_config = model_config.roi_aligner + detection_head_config = model_config.detection_head + generator_config = model_config.detection_generator + norm_activation_config = model_config.norm_activation + num_anchors_per_location = ( + len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales) + + rpn_head = dense_prediction_heads.RPNHead( + min_level=model_config.min_level, + max_level=model_config.max_level, + num_anchors_per_location=num_anchors_per_location, + num_convs=rpn_head_config.num_convs, + num_filters=rpn_head_config.num_filters, + use_separable_conv=rpn_head_config.use_separable_conv, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + detection_head = instance_heads.DetectionHead( + num_classes=model_config.num_classes, + num_convs=detection_head_config.num_convs, + num_filters=detection_head_config.num_filters, + use_separable_conv=detection_head_config.use_separable_conv, + num_fcs=detection_head_config.num_fcs, + fc_dims=detection_head_config.fc_dims, + class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer, + name='detection_head') + if roi_sampler_config.cascade_iou_thresholds: + detection_head_cascade = [detection_head] + for cascade_num in range(len(roi_sampler_config.cascade_iou_thresholds)): + detection_head = instance_heads.DetectionHead( + num_classes=model_config.num_classes, + num_convs=detection_head_config.num_convs, + num_filters=detection_head_config.num_filters, + use_separable_conv=detection_head_config.use_separable_conv, + num_fcs=detection_head_config.num_fcs, + fc_dims=detection_head_config.fc_dims, + class_agnostic_bbox_pred=detection_head_config + .class_agnostic_bbox_pred, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer, + name='detection_head_{}'.format(cascade_num + 1)) + detection_head_cascade.append(detection_head) + detection_head = detection_head_cascade + + roi_generator_obj = roi_generator.MultilevelROIGenerator( + pre_nms_top_k=roi_generator_config.pre_nms_top_k, + pre_nms_score_threshold=roi_generator_config.pre_nms_score_threshold, + pre_nms_min_size_threshold=( + roi_generator_config.pre_nms_min_size_threshold), + nms_iou_threshold=roi_generator_config.nms_iou_threshold, + num_proposals=roi_generator_config.num_proposals, + test_pre_nms_top_k=roi_generator_config.test_pre_nms_top_k, + test_pre_nms_score_threshold=( + roi_generator_config.test_pre_nms_score_threshold), + test_pre_nms_min_size_threshold=( + roi_generator_config.test_pre_nms_min_size_threshold), + test_nms_iou_threshold=roi_generator_config.test_nms_iou_threshold, + test_num_proposals=roi_generator_config.test_num_proposals, + use_batched_nms=roi_generator_config.use_batched_nms) + + roi_sampler_cascade = [] + roi_sampler_obj = roi_sampler.ROISampler( + mix_gt_boxes=roi_sampler_config.mix_gt_boxes, + num_sampled_rois=roi_sampler_config.num_sampled_rois, + foreground_fraction=roi_sampler_config.foreground_fraction, + foreground_iou_threshold=roi_sampler_config.foreground_iou_threshold, + background_iou_high_threshold=( + roi_sampler_config.background_iou_high_threshold), + background_iou_low_threshold=( + roi_sampler_config.background_iou_low_threshold)) + roi_sampler_cascade.append(roi_sampler_obj) + # Initialize addtional roi simplers for cascade heads. + if roi_sampler_config.cascade_iou_thresholds: + for iou in roi_sampler_config.cascade_iou_thresholds: + roi_sampler_obj = roi_sampler.ROISampler( + mix_gt_boxes=False, + num_sampled_rois=roi_sampler_config.num_sampled_rois, + foreground_iou_threshold=iou, + background_iou_high_threshold=iou, + background_iou_low_threshold=0.0, + skip_subsampling=True) + roi_sampler_cascade.append(roi_sampler_obj) + + roi_aligner_obj = roi_aligner.MultilevelROIAligner( + crop_size=roi_aligner_config.crop_size, + sample_offset=roi_aligner_config.sample_offset) + + detection_generator_obj = detection_generator.DetectionGenerator( + apply_nms=generator_config.apply_nms, + pre_nms_top_k=generator_config.pre_nms_top_k, + pre_nms_score_threshold=generator_config.pre_nms_score_threshold, + nms_iou_threshold=generator_config.nms_iou_threshold, + max_num_detections=generator_config.max_num_detections, + use_batched_nms=generator_config.use_batched_nms) + + if model_config.include_mask: + mask_head = instance_heads.MaskHead( + num_classes=model_config.num_classes, + upsample_factor=model_config.mask_head.upsample_factor, + num_convs=model_config.mask_head.num_convs, + num_filters=model_config.mask_head.num_filters, + use_separable_conv=model_config.mask_head.use_separable_conv, + activation=model_config.norm_activation.activation, + norm_momentum=model_config.norm_activation.norm_momentum, + norm_epsilon=model_config.norm_activation.norm_epsilon, + kernel_regularizer=l2_regularizer, + class_agnostic=model_config.mask_head.class_agnostic) + + mask_sampler_obj = mask_sampler.MaskSampler( + mask_target_size=( + model_config.mask_roi_aligner.crop_size * + model_config.mask_head.upsample_factor), + num_sampled_masks=model_config.mask_sampler.num_sampled_masks) + + mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( + crop_size=model_config.mask_roi_aligner.crop_size, + sample_offset=model_config.mask_roi_aligner.sample_offset) + else: + mask_head = None + mask_sampler_obj = None + mask_roi_aligner_obj = None + + model = maskrcnn_model.MaskRCNNModel( + backbone=backbone, + decoder=decoder, + rpn_head=rpn_head, + detection_head=detection_head, + roi_generator=roi_generator_obj, + roi_sampler=roi_sampler_cascade, + roi_aligner=roi_aligner_obj, + detection_generator=detection_generator_obj, + mask_head=mask_head, + mask_sampler=mask_sampler_obj, + mask_roi_aligner=mask_roi_aligner_obj, + class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred, + cascade_class_ensemble=detection_head_config.cascade_class_ensemble, + min_level=model_config.min_level, + max_level=model_config.max_level, + num_scales=model_config.anchor.num_scales, + aspect_ratios=model_config.anchor.aspect_ratios, + anchor_size=model_config.anchor.anchor_size) + return model + + +def build_retinanet( + input_specs: tf.keras.layers.InputSpec, + model_config: retinanet_cfg.RetinaNet, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds RetinaNet model.""" + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + decoder = decoder_factory.build_decoder( + input_specs=backbone.output_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + head_config = model_config.head + generator_config = model_config.detection_generator + norm_activation_config = model_config.norm_activation + num_anchors_per_location = ( + len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales) + + head = dense_prediction_heads.RetinaNetHead( + min_level=model_config.min_level, + max_level=model_config.max_level, + num_classes=model_config.num_classes, + num_anchors_per_location=num_anchors_per_location, + num_convs=head_config.num_convs, + num_filters=head_config.num_filters, + attribute_heads=[ + cfg.as_dict() for cfg in (head_config.attribute_heads or []) + ], + use_separable_conv=head_config.use_separable_conv, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + detection_generator_obj = detection_generator.MultilevelDetectionGenerator( + apply_nms=generator_config.apply_nms, + pre_nms_top_k=generator_config.pre_nms_top_k, + pre_nms_score_threshold=generator_config.pre_nms_score_threshold, + nms_iou_threshold=generator_config.nms_iou_threshold, + max_num_detections=generator_config.max_num_detections, + use_batched_nms=generator_config.use_batched_nms) + + model = retinanet_model.RetinaNetModel( + backbone, + decoder, + head, + detection_generator_obj, + min_level=model_config.min_level, + max_level=model_config.max_level, + num_scales=model_config.anchor.num_scales, + aspect_ratios=model_config.anchor.aspect_ratios, + anchor_size=model_config.anchor.anchor_size) + return model + + +def build_segmentation_model( + input_specs: tf.keras.layers.InputSpec, + model_config: segmentation_cfg.SemanticSegmentationModel, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds Segmentation model.""" + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + decoder = decoder_factory.build_decoder( + input_specs=backbone.output_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + head_config = model_config.head + norm_activation_config = model_config.norm_activation + + head = segmentation_heads.SegmentationHead( + num_classes=model_config.num_classes, + level=head_config.level, + num_convs=head_config.num_convs, + prediction_kernel_size=head_config.prediction_kernel_size, + num_filters=head_config.num_filters, + upsample_factor=head_config.upsample_factor, + feature_fusion=head_config.feature_fusion, + low_level=head_config.low_level, + low_level_num_filters=head_config.low_level_num_filters, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + model = segmentation_model.SegmentationModel(backbone, decoder, head) + return model diff --git a/official/vision/beta/modeling/factory_3d.py b/official/vision/beta/modeling/factory_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..3bf9e5a339d89912a707ca72f058faf4fb44fea9 --- /dev/null +++ b/official/vision/beta/modeling/factory_3d.py @@ -0,0 +1,100 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Factory methods to build models.""" + +# Import libraries +import tensorflow as tf + +from official.core import registry +from official.vision.beta.configs import video_classification as video_classification_cfg +from official.vision.beta.modeling import video_classification_model +from official.vision.beta.modeling import backbones + +_REGISTERED_MODEL_CLS = {} + + +def register_model_builder(key: str): + """Decorates a builder of model class. + + The builder should be a Callable (a class or a function). + This decorator supports registration of backbone builder as follows: + + ``` + class MyModel(tf.keras.Model): + pass + + @register_backbone_builder('mybackbone') + def builder(input_specs, config, l2_reg): + return MyModel(...) + + # Builds a MyModel object. + my_backbone = build_backbone_3d(input_specs, config, l2_reg) + ``` + + Args: + key: the key to look up the builder. + + Returns: + A callable for use as class decorator that registers the decorated class + for creation from an instance of model class. + """ + return registry.register(_REGISTERED_MODEL_CLS, key) + + +def build_model( + model_type: str, + input_specs: tf.keras.layers.InputSpec, + model_config: video_classification_cfg.hyperparams.Config, + num_classes: int, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds backbone from a config. + + Args: + model_type: string name of model type. It should be consistent with + ModelConfig.model_type. + input_specs: tf.keras.layers.InputSpec. + model_config: a OneOfConfig. Model config. + num_classes: number of classes. + l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None. + + Returns: + tf.keras.Model instance of the backbone. + """ + model_builder = registry.lookup(_REGISTERED_MODEL_CLS, model_type) + + return model_builder(input_specs, model_config, num_classes, l2_regularizer) + + +@register_model_builder('video_classification') +def build_video_classification_model( + input_specs: tf.keras.layers.InputSpec, + model_config: video_classification_cfg.VideoClassificationModel, + num_classes: int, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds the video classification model.""" + input_specs_dict = {'image': input_specs} + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + model = video_classification_model.VideoClassificationModel( + backbone=backbone, + num_classes=num_classes, + input_specs=input_specs_dict, + dropout_rate=model_config.dropout_rate, + aggregate_endpoints=model_config.aggregate_endpoints, + kernel_regularizer=l2_regularizer) + return model diff --git a/official/vision/beta/modeling/factory_test.py b/official/vision/beta/modeling/factory_test.py new file mode 100644 index 0000000000000000000000000000000000000000..79127f1b5b4d9f9c7d6a37eb6d1523082dadbf1f --- /dev/null +++ b/official/vision/beta/modeling/factory_test.py @@ -0,0 +1,132 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for factory.py.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.configs import backbones +from official.vision.beta.configs import backbones_3d +from official.vision.beta.configs import image_classification as classification_cfg +from official.vision.beta.configs import maskrcnn as maskrcnn_cfg +from official.vision.beta.configs import retinanet as retinanet_cfg +from official.vision.beta.configs import video_classification as video_classification_cfg +from official.vision.beta.modeling import factory +from official.vision.beta.modeling import factory_3d + + +class ClassificationModelBuilderTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + ('resnet', (224, 224), 5e-5), + ('resnet', (224, 224), None), + ('resnet', (None, None), 5e-5), + ('resnet', (None, None), None), + ) + def test_builder(self, backbone_type, input_size, weight_decay): + num_classes = 2 + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size[0], input_size[1], 3]) + model_config = classification_cfg.ImageClassificationModel( + num_classes=num_classes, + backbone=backbones.Backbone(type=backbone_type)) + l2_regularizer = ( + tf.keras.regularizers.l2(weight_decay) if weight_decay else None) + _ = factory.build_classification_model( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + +class MaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + ('resnet', (640, 640)), + ('resnet', (None, None)), + ) + def test_builder(self, backbone_type, input_size): + num_classes = 2 + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size[0], input_size[1], 3]) + model_config = maskrcnn_cfg.MaskRCNN( + num_classes=num_classes, + backbone=backbones.Backbone(type=backbone_type)) + l2_regularizer = tf.keras.regularizers.l2(5e-5) + _ = factory.build_maskrcnn( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + +class RetinaNetBuilderTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + ('resnet', (640, 640), False), + ('resnet', (None, None), True), + ) + def test_builder(self, backbone_type, input_size, has_att_heads): + num_classes = 2 + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size[0], input_size[1], 3]) + if has_att_heads: + attribute_heads_config = [ + retinanet_cfg.AttributeHead(name='att1'), + retinanet_cfg.AttributeHead( + name='att2', type='classification', size=2), + ] + else: + attribute_heads_config = None + model_config = retinanet_cfg.RetinaNet( + num_classes=num_classes, + backbone=backbones.Backbone(type=backbone_type), + head=retinanet_cfg.RetinaNetHead( + attribute_heads=attribute_heads_config)) + l2_regularizer = tf.keras.regularizers.l2(5e-5) + _ = factory.build_retinanet( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + if has_att_heads: + self.assertEqual(model_config.head.attribute_heads[0].as_dict(), + dict(name='att1', type='regression', size=1)) + self.assertEqual(model_config.head.attribute_heads[1].as_dict(), + dict(name='att2', type='classification', size=2)) + + +class VideoClassificationModelBuilderTest(parameterized.TestCase, + tf.test.TestCase): + + @parameterized.parameters( + ('resnet_3d', (8, 224, 224), 5e-5), + ('resnet_3d', (None, None, None), 5e-5), + ) + def test_builder(self, backbone_type, input_size, weight_decay): + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size[0], input_size[1], input_size[2], 3]) + model_config = video_classification_cfg.VideoClassificationModel( + backbone=backbones_3d.Backbone3D(type=backbone_type)) + l2_regularizer = ( + tf.keras.regularizers.l2(weight_decay) if weight_decay else None) + _ = factory_3d.build_video_classification_model( + input_specs=input_specs, + model_config=model_config, + num_classes=2, + l2_regularizer=l2_regularizer) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/heads/__init__.py b/official/vision/beta/modeling/heads/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..881fc1120e85f5bc38c04e103e885285e22c7a8c --- /dev/null +++ b/official/vision/beta/modeling/heads/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Heads package definition.""" + +from official.vision.beta.modeling.heads.dense_prediction_heads import RetinaNetHead +from official.vision.beta.modeling.heads.dense_prediction_heads import RPNHead +from official.vision.beta.modeling.heads.instance_heads import DetectionHead +from official.vision.beta.modeling.heads.instance_heads import MaskHead +from official.vision.beta.modeling.heads.segmentation_heads import SegmentationHead diff --git a/official/vision/beta/modeling/heads/dense_prediction_heads.py b/official/vision/beta/modeling/heads/dense_prediction_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..a9da2d3b32fad7f5aa599a2ea149edaa41975145 --- /dev/null +++ b/official/vision/beta/modeling/heads/dense_prediction_heads.py @@ -0,0 +1,509 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of dense prediction heads.""" + +from typing import Any, Dict, List, Mapping, Optional, Union + +# Import libraries + +import numpy as np +import tensorflow as tf + +from official.modeling import tf_utils + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class RetinaNetHead(tf.keras.layers.Layer): + """Creates a RetinaNet head.""" + + def __init__( + self, + min_level: int, + max_level: int, + num_classes: int, + num_anchors_per_location: int, + num_convs: int = 4, + num_filters: int = 256, + attribute_heads: List[Dict[str, Any]] = None, + use_separable_conv: bool = False, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a RetinaNet head. + + Args: + min_level: An `int` number of minimum feature level. + max_level: An `int` number of maximum feature level. + num_classes: An `int` number of classes to predict. + num_anchors_per_location: An `int` number of number of anchors per pixel + location. + num_convs: An `int` number that represents the number of the intermediate + conv layers before the prediction. + num_filters: An `int` number that represents the number of filters of the + intermediate conv layers. + attribute_heads: If not None, a list that contains a dict for each + additional attribute head. Each dict consists of 3 key-value pairs: + `name`, `type` ('regression' or 'classification'), and `size` (number + of predicted values for each instance). + use_separable_conv: A `bool` that indicates whether the separable + convolution layers is used. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(RetinaNetHead, self).__init__(**kwargs) + self._config_dict = { + 'min_level': min_level, + 'max_level': max_level, + 'num_classes': num_classes, + 'num_anchors_per_location': num_anchors_per_location, + 'num_convs': num_convs, + 'num_filters': num_filters, + 'attribute_heads': attribute_heads, + 'use_separable_conv': use_separable_conv, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + } + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + """Creates the variables of the head.""" + conv_op = (tf.keras.layers.SeparableConv2D + if self._config_dict['use_separable_conv'] + else tf.keras.layers.Conv2D) + conv_kwargs = { + 'filters': self._config_dict['num_filters'], + 'kernel_size': 3, + 'padding': 'same', + 'bias_initializer': tf.zeros_initializer(), + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + if not self._config_dict['use_separable_conv']: + conv_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.RandomNormal( + stddev=0.01), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + }) + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + # Class net. + self._cls_convs = [] + self._cls_norms = [] + for level in range( + self._config_dict['min_level'], self._config_dict['max_level'] + 1): + this_level_cls_norms = [] + for i in range(self._config_dict['num_convs']): + if level == self._config_dict['min_level']: + cls_conv_name = 'classnet-conv_{}'.format(i) + self._cls_convs.append(conv_op(name=cls_conv_name, **conv_kwargs)) + cls_norm_name = 'classnet-conv-norm_{}_{}'.format(level, i) + this_level_cls_norms.append(bn_op(name=cls_norm_name, **bn_kwargs)) + self._cls_norms.append(this_level_cls_norms) + + classifier_kwargs = { + 'filters': ( + self._config_dict['num_classes'] * + self._config_dict['num_anchors_per_location']), + 'kernel_size': 3, + 'padding': 'same', + 'bias_initializer': tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + if not self._config_dict['use_separable_conv']: + classifier_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.RandomNormal(stddev=1e-5), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + }) + self._classifier = conv_op(name='scores', **classifier_kwargs) + + # Box net. + self._box_convs = [] + self._box_norms = [] + for level in range( + self._config_dict['min_level'], self._config_dict['max_level'] + 1): + this_level_box_norms = [] + for i in range(self._config_dict['num_convs']): + if level == self._config_dict['min_level']: + box_conv_name = 'boxnet-conv_{}'.format(i) + self._box_convs.append(conv_op(name=box_conv_name, **conv_kwargs)) + box_norm_name = 'boxnet-conv-norm_{}_{}'.format(level, i) + this_level_box_norms.append(bn_op(name=box_norm_name, **bn_kwargs)) + self._box_norms.append(this_level_box_norms) + + box_regressor_kwargs = { + 'filters': 4 * self._config_dict['num_anchors_per_location'], + 'kernel_size': 3, + 'padding': 'same', + 'bias_initializer': tf.zeros_initializer(), + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + if not self._config_dict['use_separable_conv']: + box_regressor_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.RandomNormal( + stddev=1e-5), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + }) + self._box_regressor = conv_op(name='boxes', **box_regressor_kwargs) + + # Attribute learning nets. + if self._config_dict['attribute_heads']: + self._att_predictors = {} + self._att_convs = {} + self._att_norms = {} + + for att_config in self._config_dict['attribute_heads']: + att_name = att_config['name'] + att_type = att_config['type'] + att_size = att_config['size'] + att_convs_i = [] + att_norms_i = [] + + # Build conv and norm layers. + for level in range(self._config_dict['min_level'], + self._config_dict['max_level'] + 1): + this_level_att_norms = [] + for i in range(self._config_dict['num_convs']): + if level == self._config_dict['min_level']: + att_conv_name = '{}-conv_{}'.format(att_name, i) + att_convs_i.append(conv_op(name=att_conv_name, **conv_kwargs)) + att_norm_name = '{}-conv-norm_{}_{}'.format(att_name, level, i) + this_level_att_norms.append(bn_op(name=att_norm_name, **bn_kwargs)) + att_norms_i.append(this_level_att_norms) + self._att_convs[att_name] = att_convs_i + self._att_norms[att_name] = att_norms_i + + # Build the final prediction layer. + att_predictor_kwargs = { + 'filters': + (att_size * self._config_dict['num_anchors_per_location']), + 'kernel_size': 3, + 'padding': 'same', + 'bias_initializer': tf.zeros_initializer(), + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + if att_type == 'regression': + att_predictor_kwargs.update( + {'bias_initializer': tf.zeros_initializer()}) + elif att_type == 'classification': + att_predictor_kwargs.update({ + 'bias_initializer': + tf.constant_initializer(-np.log((1 - 0.01) / 0.01)) + }) + else: + raise ValueError( + 'Attribute head type {} not supported.'.format(att_type)) + + if not self._config_dict['use_separable_conv']: + att_predictor_kwargs.update({ + 'kernel_initializer': + tf.keras.initializers.RandomNormal(stddev=1e-5), + 'kernel_regularizer': + self._config_dict['kernel_regularizer'], + }) + + self._att_predictors[att_name] = conv_op( + name='{}_attributes'.format(att_name), **att_predictor_kwargs) + + super(RetinaNetHead, self).build(input_shape) + + def call(self, features: Mapping[str, tf.Tensor]): + """Forward pass of the RetinaNet head. + + Args: + features: A `dict` of `tf.Tensor` where + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor`, the feature map tensors, whose shape is + [batch, height_l, width_l, channels]. + + Returns: + scores: A `dict` of `tf.Tensor` which includes scores of the predictions. + - key: A `str` of the level of the multilevel predictions. + - values: A `tf.Tensor` of the box scores predicted from a particular + feature level, whose shape is + [batch, height_l, width_l, num_classes * num_anchors_per_location]. + boxes: A `dict` of `tf.Tensor` which includes coordinates of the + predictions. + - key: A `str` of the level of the multilevel predictions. + - values: A `tf.Tensor` of the box scores predicted from a particular + feature level, whose shape is + [batch, height_l, width_l, 4 * num_anchors_per_location]. + attributes: a dict of (attribute_name, attribute_prediction). Each + `attribute_prediction` is a dict of: + - key: `str`, the level of the multilevel predictions. + - values: `Tensor`, the box scores predicted from a particular feature + level, whose shape is + [batch, height_l, width_l, + attribute_size * num_anchors_per_location]. + Can be an empty dictionary if no attribute learning is required. + """ + scores = {} + boxes = {} + if self._config_dict['attribute_heads']: + attributes = { + att_config['name']: {} + for att_config in self._config_dict['attribute_heads'] + } + else: + attributes = {} + + for i, level in enumerate( + range(self._config_dict['min_level'], + self._config_dict['max_level'] + 1)): + this_level_features = features[str(level)] + + # class net. + x = this_level_features + for conv, norm in zip(self._cls_convs, self._cls_norms[i]): + x = conv(x) + x = norm(x) + x = self._activation(x) + scores[str(level)] = self._classifier(x) + + # box net. + x = this_level_features + for conv, norm in zip(self._box_convs, self._box_norms[i]): + x = conv(x) + x = norm(x) + x = self._activation(x) + boxes[str(level)] = self._box_regressor(x) + + # attribute nets. + if self._config_dict['attribute_heads']: + for att_config in self._config_dict['attribute_heads']: + att_name = att_config['name'] + x = this_level_features + for conv, norm in zip(self._att_convs[att_name], + self._att_norms[att_name][i]): + x = conv(x) + x = norm(x) + x = self._activation(x) + attributes[att_name][str(level)] = self._att_predictors[att_name](x) + + return scores, boxes, attributes + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class RPNHead(tf.keras.layers.Layer): + """Creates a Region Proposal Network (RPN) head.""" + + def __init__( + self, + min_level: int, + max_level: int, + num_anchors_per_location: int, + num_convs: int = 1, + num_filters: int = 256, + use_separable_conv: bool = False, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a Region Proposal Network head. + + Args: + min_level: An `int` number of minimum feature level. + max_level: An `int` number of maximum feature level. + num_anchors_per_location: An `int` number of number of anchors per pixel + location. + num_convs: An `int` number that represents the number of the intermediate + convolution layers before the prediction. + num_filters: An `int` number that represents the number of filters of the + intermediate convolution layers. + use_separable_conv: A `bool` that indicates whether the separable + convolution layers is used. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(RPNHead, self).__init__(**kwargs) + self._config_dict = { + 'min_level': min_level, + 'max_level': max_level, + 'num_anchors_per_location': num_anchors_per_location, + 'num_convs': num_convs, + 'num_filters': num_filters, + 'use_separable_conv': use_separable_conv, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + } + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape): + """Creates the variables of the head.""" + conv_op = (tf.keras.layers.SeparableConv2D + if self._config_dict['use_separable_conv'] + else tf.keras.layers.Conv2D) + conv_kwargs = { + 'filters': self._config_dict['num_filters'], + 'kernel_size': 3, + 'padding': 'same', + 'bias_initializer': tf.zeros_initializer(), + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + if not self._config_dict['use_separable_conv']: + conv_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.RandomNormal( + stddev=0.01), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + }) + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + self._convs = [] + self._norms = [] + for level in range( + self._config_dict['min_level'], self._config_dict['max_level'] + 1): + this_level_norms = [] + for i in range(self._config_dict['num_convs']): + if level == self._config_dict['min_level']: + conv_name = 'rpn-conv_{}'.format(i) + self._convs.append(conv_op(name=conv_name, **conv_kwargs)) + norm_name = 'rpn-conv-norm_{}_{}'.format(level, i) + this_level_norms.append(bn_op(name=norm_name, **bn_kwargs)) + self._norms.append(this_level_norms) + + classifier_kwargs = { + 'filters': self._config_dict['num_anchors_per_location'], + 'kernel_size': 1, + 'padding': 'valid', + 'bias_initializer': tf.zeros_initializer(), + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + if not self._config_dict['use_separable_conv']: + classifier_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.RandomNormal( + stddev=1e-5), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + }) + self._classifier = conv_op(name='rpn-scores', **classifier_kwargs) + + box_regressor_kwargs = { + 'filters': 4 * self._config_dict['num_anchors_per_location'], + 'kernel_size': 1, + 'padding': 'valid', + 'bias_initializer': tf.zeros_initializer(), + 'bias_regularizer': self._config_dict['bias_regularizer'], + } + if not self._config_dict['use_separable_conv']: + box_regressor_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.RandomNormal( + stddev=1e-5), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + }) + self._box_regressor = conv_op(name='rpn-boxes', **box_regressor_kwargs) + + super(RPNHead, self).build(input_shape) + + def call(self, features: Mapping[str, tf.Tensor]): + """Forward pass of the RPN head. + + Args: + features: A `dict` of `tf.Tensor` where + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor`, the feature map tensors, whose shape is [batch, + height_l, width_l, channels]. + + Returns: + scores: A `dict` of `tf.Tensor` which includes scores of the predictions. + - key: A `str` of the level of the multilevel predictions. + - values: A `tf.Tensor` of the box scores predicted from a particular + feature level, whose shape is + [batch, height_l, width_l, num_classes * num_anchors_per_location]. + boxes: A `dict` of `tf.Tensor` which includes coordinates of the + predictions. + - key: A `str` of the level of the multilevel predictions. + - values: A `tf.Tensor` of the box scores predicted from a particular + feature level, whose shape is + [batch, height_l, width_l, 4 * num_anchors_per_location]. + """ + scores = {} + boxes = {} + for i, level in enumerate( + range(self._config_dict['min_level'], + self._config_dict['max_level'] + 1)): + x = features[str(level)] + for conv, norm in zip(self._convs, self._norms[i]): + x = conv(x) + x = norm(x) + x = self._activation(x) + scores[str(level)] = self._classifier(x) + boxes[str(level)] = self._box_regressor(x) + return scores, boxes + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/heads/dense_prediction_heads_test.py b/official/vision/beta/modeling/heads/dense_prediction_heads_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ee940c550e38700b3ee2d6b6d313a0b6448637d3 --- /dev/null +++ b/official/vision/beta/modeling/heads/dense_prediction_heads_test.py @@ -0,0 +1,148 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for dense_prediction_heads.py.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling.heads import dense_prediction_heads + + +class RetinaNetHeadTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (False, False, False), + (False, True, False), + (True, False, True), + (True, True, True), + ) + def test_forward(self, use_separable_conv, use_sync_bn, has_att_heads): + if has_att_heads: + attribute_heads = [dict(name='depth', type='regression', size=1)] + else: + attribute_heads = None + + retinanet_head = dense_prediction_heads.RetinaNetHead( + min_level=3, + max_level=4, + num_classes=3, + num_anchors_per_location=3, + num_convs=2, + num_filters=256, + attribute_heads=attribute_heads, + use_separable_conv=use_separable_conv, + activation='relu', + use_sync_bn=use_sync_bn, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + features = { + '3': np.random.rand(2, 128, 128, 16), + '4': np.random.rand(2, 64, 64, 16), + } + scores, boxes, attributes = retinanet_head(features) + self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 9]) + self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 9]) + self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12]) + self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12]) + if has_att_heads: + for att in attributes.values(): + self.assertAllEqual(att['3'].numpy().shape, [2, 128, 128, 3]) + self.assertAllEqual(att['4'].numpy().shape, [2, 64, 64, 3]) + + def test_serialize_deserialize(self): + retinanet_head = dense_prediction_heads.RetinaNetHead( + min_level=3, + max_level=7, + num_classes=3, + num_anchors_per_location=9, + num_convs=2, + num_filters=16, + attribute_heads=None, + use_separable_conv=False, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + config = retinanet_head.get_config() + new_retinanet_head = ( + dense_prediction_heads.RetinaNetHead.from_config(config)) + self.assertAllEqual( + retinanet_head.get_config(), new_retinanet_head.get_config()) + + +class RpnHeadTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (False, False), + (False, True), + (True, False), + (True, True), + ) + def test_forward(self, use_separable_conv, use_sync_bn): + rpn_head = dense_prediction_heads.RPNHead( + min_level=3, + max_level=4, + num_anchors_per_location=3, + num_convs=2, + num_filters=256, + use_separable_conv=use_separable_conv, + activation='relu', + use_sync_bn=use_sync_bn, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + features = { + '3': np.random.rand(2, 128, 128, 16), + '4': np.random.rand(2, 64, 64, 16), + } + scores, boxes = rpn_head(features) + self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 3]) + self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 3]) + self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12]) + self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12]) + + def test_serialize_deserialize(self): + rpn_head = dense_prediction_heads.RPNHead( + min_level=3, + max_level=7, + num_anchors_per_location=9, + num_convs=2, + num_filters=16, + use_separable_conv=False, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + config = rpn_head.get_config() + new_rpn_head = dense_prediction_heads.RPNHead.from_config(config) + self.assertAllEqual(rpn_head.get_config(), new_rpn_head.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/heads/instance_heads.py b/official/vision/beta/modeling/heads/instance_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..fd492dd22a6d30b727b6c1cc2c67979337329307 --- /dev/null +++ b/official/vision/beta/modeling/heads/instance_heads.py @@ -0,0 +1,444 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of instance prediction heads.""" + +from typing import List, Union, Optional +# Import libraries +import tensorflow as tf + +from official.modeling import tf_utils + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class DetectionHead(tf.keras.layers.Layer): + """Creates a detection head.""" + + def __init__( + self, + num_classes: int, + num_convs: int = 0, + num_filters: int = 256, + use_separable_conv: bool = False, + num_fcs: int = 2, + fc_dims: int = 1024, + class_agnostic_bbox_pred: bool = False, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a detection head. + + Args: + num_classes: An `int` for the number of classes. + num_convs: An `int` number that represents the number of the intermediate + convolution layers before the FC layers. + num_filters: An `int` number that represents the number of filters of the + intermediate convolution layers. + use_separable_conv: A `bool` that indicates whether the separable + convolution layers is used. + num_fcs: An `int` number that represents the number of FC layers before + the predictions. + fc_dims: An `int` number that represents the number of dimension of the FC + layers. + class_agnostic_bbox_pred: `bool`, indicating whether bboxes should be + predicted for every class or not. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(DetectionHead, self).__init__(**kwargs) + self._config_dict = { + 'num_classes': num_classes, + 'num_convs': num_convs, + 'num_filters': num_filters, + 'use_separable_conv': use_separable_conv, + 'num_fcs': num_fcs, + 'fc_dims': fc_dims, + 'class_agnostic_bbox_pred': class_agnostic_bbox_pred, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + } + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + """Creates the variables of the head.""" + conv_op = (tf.keras.layers.SeparableConv2D + if self._config_dict['use_separable_conv'] + else tf.keras.layers.Conv2D) + conv_kwargs = { + 'filters': self._config_dict['num_filters'], + 'kernel_size': 3, + 'padding': 'same', + } + if self._config_dict['use_separable_conv']: + conv_kwargs.update({ + 'depthwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'pointwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'depthwise_regularizer': self._config_dict['kernel_regularizer'], + 'pointwise_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + else: + conv_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + self._convs = [] + self._conv_norms = [] + for i in range(self._config_dict['num_convs']): + conv_name = 'detection-conv_{}'.format(i) + self._convs.append(conv_op(name=conv_name, **conv_kwargs)) + bn_name = 'detection-conv-bn_{}'.format(i) + self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs)) + + self._fcs = [] + self._fc_norms = [] + for i in range(self._config_dict['num_fcs']): + fc_name = 'detection-fc_{}'.format(i) + self._fcs.append( + tf.keras.layers.Dense( + units=self._config_dict['fc_dims'], + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=1 / 3.0, mode='fan_out', distribution='uniform'), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer'], + name=fc_name)) + bn_name = 'detection-fc-bn_{}'.format(i) + self._fc_norms.append(bn_op(name=bn_name, **bn_kwargs)) + + self._classifier = tf.keras.layers.Dense( + units=self._config_dict['num_classes'], + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + bias_initializer=tf.zeros_initializer(), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer'], + name='detection-scores') + + num_box_outputs = (4 if self._config_dict['class_agnostic_bbox_pred'] else + self._config_dict['num_classes'] * 4) + self._box_regressor = tf.keras.layers.Dense( + units=num_box_outputs, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001), + bias_initializer=tf.zeros_initializer(), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer'], + name='detection-boxes') + + super(DetectionHead, self).build(input_shape) + + def call(self, inputs: tf.Tensor, training: bool = None): + """Forward pass of box and class branches for the Mask-RCNN model. + + Args: + inputs: A `tf.Tensor` of the shape [batch_size, num_instances, roi_height, + roi_width, roi_channels], representing the ROI features. + training: a `bool` indicating whether it is in `training` mode. + + Returns: + class_outputs: A `tf.Tensor` of the shape + [batch_size, num_rois, num_classes], representing the class predictions. + box_outputs: A `tf.Tensor` of the shape + [batch_size, num_rois, num_classes * 4], representing the box + predictions. + """ + roi_features = inputs + _, num_rois, height, width, filters = roi_features.get_shape().as_list() + + x = tf.reshape(roi_features, [-1, height, width, filters]) + for conv, bn in zip(self._convs, self._conv_norms): + x = conv(x) + x = bn(x) + x = self._activation(x) + + _, _, _, filters = x.get_shape().as_list() + x = tf.reshape(x, [-1, num_rois, height * width * filters]) + + for fc, bn in zip(self._fcs, self._fc_norms): + x = fc(x) + x = bn(x) + x = self._activation(x) + + classes = self._classifier(x) + boxes = self._box_regressor(x) + return classes, boxes + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class MaskHead(tf.keras.layers.Layer): + """Creates a mask head.""" + + def __init__( + self, + num_classes: int, + upsample_factor: int = 2, + num_convs: int = 4, + num_filters: int = 256, + use_separable_conv: bool = False, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + class_agnostic: bool = False, + **kwargs): + """Initializes a mask head. + + Args: + num_classes: An `int` of the number of classes. + upsample_factor: An `int` that indicates the upsample factor to generate + the final predicted masks. It should be >= 1. + num_convs: An `int` number that represents the number of the intermediate + convolution layers before the mask prediction layers. + num_filters: An `int` number that represents the number of filters of the + intermediate convolution layers. + use_separable_conv: A `bool` that indicates whether the separable + convolution layers is used. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + class_agnostic: A `bool`. If set, we use a single channel mask head that + is shared between all classes. + **kwargs: Additional keyword arguments to be passed. + """ + super(MaskHead, self).__init__(**kwargs) + self._config_dict = { + 'num_classes': num_classes, + 'upsample_factor': upsample_factor, + 'num_convs': num_convs, + 'num_filters': num_filters, + 'use_separable_conv': use_separable_conv, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + 'class_agnostic': class_agnostic + } + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + """Creates the variables of the head.""" + conv_op = (tf.keras.layers.SeparableConv2D + if self._config_dict['use_separable_conv'] + else tf.keras.layers.Conv2D) + conv_kwargs = { + 'filters': self._config_dict['num_filters'], + 'kernel_size': 3, + 'padding': 'same', + } + if self._config_dict['use_separable_conv']: + conv_kwargs.update({ + 'depthwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'pointwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'depthwise_regularizer': self._config_dict['kernel_regularizer'], + 'pointwise_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + else: + conv_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + self._convs = [] + self._conv_norms = [] + for i in range(self._config_dict['num_convs']): + conv_name = 'mask-conv_{}'.format(i) + self._convs.append(conv_op(name=conv_name, **conv_kwargs)) + bn_name = 'mask-conv-bn_{}'.format(i) + self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs)) + + self._deconv = tf.keras.layers.Conv2DTranspose( + filters=self._config_dict['num_filters'], + kernel_size=self._config_dict['upsample_factor'], + strides=self._config_dict['upsample_factor'], + padding='valid', + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + bias_initializer=tf.zeros_initializer(), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer'], + name='mask-upsampling') + self._deconv_bn = bn_op(name='mask-deconv-bn', **bn_kwargs) + + if self._config_dict['class_agnostic']: + num_filters = 1 + else: + num_filters = self._config_dict['num_classes'] + + conv_kwargs = { + 'filters': num_filters, + 'kernel_size': 1, + 'padding': 'valid', + } + if self._config_dict['use_separable_conv']: + conv_kwargs.update({ + 'depthwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'pointwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'depthwise_regularizer': self._config_dict['kernel_regularizer'], + 'pointwise_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + else: + conv_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + self._mask_regressor = conv_op(name='mask-logits', **conv_kwargs) + + super(MaskHead, self).build(input_shape) + + def call(self, inputs: List[tf.Tensor], training: bool = None): + """Forward pass of mask branch for the Mask-RCNN model. + + Args: + inputs: A `list` of two tensors where + inputs[0]: A `tf.Tensor` of shape [batch_size, num_instances, + roi_height, roi_width, roi_channels], representing the ROI features. + inputs[1]: A `tf.Tensor` of shape [batch_size, num_instances], + representing the classes of the ROIs. + training: A `bool` indicating whether it is in `training` mode. + + Returns: + mask_outputs: A `tf.Tensor` of shape + [batch_size, num_instances, roi_height * upsample_factor, + roi_width * upsample_factor], representing the mask predictions. + """ + roi_features, roi_classes = inputs + batch_size, num_rois, height, width, filters = ( + roi_features.get_shape().as_list()) + if batch_size is None: + batch_size = tf.shape(roi_features)[0] + + x = tf.reshape(roi_features, [-1, height, width, filters]) + for conv, bn in zip(self._convs, self._conv_norms): + x = conv(x) + x = bn(x) + x = self._activation(x) + + x = self._deconv(x) + x = self._deconv_bn(x) + x = self._activation(x) + + logits = self._mask_regressor(x) + + mask_height = height * self._config_dict['upsample_factor'] + mask_width = width * self._config_dict['upsample_factor'] + + if self._config_dict['class_agnostic']: + logits = tf.reshape(logits, [-1, num_rois, mask_height, mask_width, 1]) + else: + logits = tf.reshape( + logits, + [-1, num_rois, mask_height, mask_width, + self._config_dict['num_classes']]) + + batch_indices = tf.tile( + tf.expand_dims(tf.range(batch_size), axis=1), [1, num_rois]) + mask_indices = tf.tile( + tf.expand_dims(tf.range(num_rois), axis=0), [batch_size, 1]) + + if self._config_dict['class_agnostic']: + class_gather_indices = tf.zeros_like(roi_classes, dtype=tf.int32) + else: + class_gather_indices = tf.cast(roi_classes, dtype=tf.int32) + + gather_indices = tf.stack( + [batch_indices, mask_indices, class_gather_indices], + axis=2) + mask_outputs = tf.gather_nd( + tf.transpose(logits, [0, 1, 4, 2, 3]), gather_indices) + return mask_outputs + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/heads/instance_heads_test.py b/official/vision/beta/modeling/heads/instance_heads_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2f87705ecae7e9a63e45410cf84e8546511540ab --- /dev/null +++ b/official/vision/beta/modeling/heads/instance_heads_test.py @@ -0,0 +1,135 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for instance_heads.py.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling.heads import instance_heads + + +class DetectionHeadTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (0, 0, False, False), + (0, 1, False, False), + (1, 0, False, False), + (1, 1, False, False), + ) + def test_forward(self, num_convs, num_fcs, use_separable_conv, use_sync_bn): + detection_head = instance_heads.DetectionHead( + num_classes=3, + num_convs=num_convs, + num_filters=16, + use_separable_conv=use_separable_conv, + num_fcs=num_fcs, + fc_dims=4, + activation='relu', + use_sync_bn=use_sync_bn, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + roi_features = np.random.rand(2, 10, 128, 128, 16) + scores, boxes = detection_head(roi_features) + self.assertAllEqual(scores.numpy().shape, [2, 10, 3]) + self.assertAllEqual(boxes.numpy().shape, [2, 10, 12]) + + def test_serialize_deserialize(self): + detection_head = instance_heads.DetectionHead( + num_classes=91, + num_convs=0, + num_filters=256, + use_separable_conv=False, + num_fcs=2, + fc_dims=1024, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + config = detection_head.get_config() + new_detection_head = instance_heads.DetectionHead.from_config(config) + self.assertAllEqual( + detection_head.get_config(), new_detection_head.get_config()) + + +class MaskHeadTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (1, 1, False), + (1, 2, False), + (2, 1, False), + (2, 2, False), + ) + def test_forward(self, upsample_factor, num_convs, use_sync_bn): + mask_head = instance_heads.MaskHead( + num_classes=3, + upsample_factor=upsample_factor, + num_convs=num_convs, + num_filters=16, + use_separable_conv=False, + activation='relu', + use_sync_bn=use_sync_bn, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + roi_features = np.random.rand(2, 10, 14, 14, 16) + roi_classes = np.zeros((2, 10)) + masks = mask_head([roi_features, roi_classes]) + self.assertAllEqual( + masks.numpy().shape, + [2, 10, 14 * upsample_factor, 14 * upsample_factor]) + + def test_serialize_deserialize(self): + mask_head = instance_heads.MaskHead( + num_classes=3, + upsample_factor=2, + num_convs=1, + num_filters=256, + use_separable_conv=False, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + config = mask_head.get_config() + new_mask_head = instance_heads.MaskHead.from_config(config) + self.assertAllEqual( + mask_head.get_config(), new_mask_head.get_config()) + + def test_forward_class_agnostic(self): + mask_head = instance_heads.MaskHead( + num_classes=3, + class_agnostic=True + ) + roi_features = np.random.rand(2, 10, 14, 14, 16) + roi_classes = np.zeros((2, 10)) + masks = mask_head([roi_features, roi_classes]) + self.assertAllEqual(masks.numpy().shape, [2, 10, 28, 28]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/heads/segmentation_heads.py b/official/vision/beta/modeling/heads/segmentation_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..a14cfff2ad0faf07bb78533b7c84d489eff15644 --- /dev/null +++ b/official/vision/beta/modeling/heads/segmentation_heads.py @@ -0,0 +1,213 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of segmentation heads.""" +from typing import List, Union, Optional, Mapping +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.modeling.layers import nn_layers +from official.vision.beta.ops import spatial_transform_ops + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SegmentationHead(tf.keras.layers.Layer): + """Creates a segmentation head.""" + + def __init__( + self, + num_classes: int, + level: Union[int, str], + num_convs: int = 2, + num_filters: int = 256, + prediction_kernel_size: int = 1, + upsample_factor: int = 1, + feature_fusion: Optional[str] = None, + low_level: int = 2, + low_level_num_filters: int = 48, + activation: str = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a segmentation head. + + Args: + num_classes: An `int` number of mask classification categories. The number + of classes does not include background class. + level: An `int` or `str`, level to use to build segmentation head. + num_convs: An `int` number of stacked convolution before the last + prediction layer. + num_filters: An `int` number to specify the number of filters used. + Default is 256. + prediction_kernel_size: An `int` number to specify the kernel size of the + prediction layer. + upsample_factor: An `int` number to specify the upsampling factor to + generate finer mask. Default 1 means no upsampling is applied. + feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, or None. If + `deeplabv3plus`, features from decoder_features[level] will be fused + with low level feature maps from backbone. If `pyramid_fusion`, + multiscale features will be resized and fused at the target level. + low_level: An `int` of backbone level to be used for feature fusion. It is + used when feature_fusion is set to `deeplabv3plus`. + low_level_num_filters: An `int` of reduced number of filters for the low + level features before fusing it with higher level features. It is only + used when feature_fusion is set to `deeplabv3plus`. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + **kwargs: Additional keyword arguments to be passed. + """ + super(SegmentationHead, self).__init__(**kwargs) + + self._config_dict = { + 'num_classes': num_classes, + 'level': level, + 'num_convs': num_convs, + 'num_filters': num_filters, + 'prediction_kernel_size': prediction_kernel_size, + 'upsample_factor': upsample_factor, + 'feature_fusion': feature_fusion, + 'low_level': low_level, + 'low_level_num_filters': low_level_num_filters, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + } + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]): + """Creates the variables of the segmentation head.""" + conv_op = tf.keras.layers.Conv2D + conv_kwargs = { + 'kernel_size': 3, + 'padding': 'same', + 'use_bias': False, + 'kernel_initializer': tf.keras.initializers.RandomNormal(stddev=0.01), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + } + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + if self._config_dict['feature_fusion'] == 'deeplabv3plus': + # Deeplabv3+ feature fusion layers. + self._dlv3p_conv = conv_op( + kernel_size=1, + padding='same', + use_bias=False, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + kernel_regularizer=self._config_dict['kernel_regularizer'], + name='segmentation_head_deeplabv3p_fusion_conv', + filters=self._config_dict['low_level_num_filters']) + + self._dlv3p_norm = bn_op( + name='segmentation_head_deeplabv3p_fusion_norm', **bn_kwargs) + + # Segmentation head layers. + self._convs = [] + self._norms = [] + for i in range(self._config_dict['num_convs']): + conv_name = 'segmentation_head_conv_{}'.format(i) + self._convs.append( + conv_op( + name=conv_name, + filters=self._config_dict['num_filters'], + **conv_kwargs)) + norm_name = 'segmentation_head_norm_{}'.format(i) + self._norms.append(bn_op(name=norm_name, **bn_kwargs)) + + self._classifier = conv_op( + name='segmentation_output', + filters=self._config_dict['num_classes'], + kernel_size=self._config_dict['prediction_kernel_size'], + padding='same', + bias_initializer=tf.zeros_initializer(), + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer']) + + super(SegmentationHead, self).build(input_shape) + + def call(self, backbone_output: Mapping[str, tf.Tensor], + decoder_output: Mapping[str, tf.Tensor]): + """Forward pass of the segmentation head. + + Args: + backbone_output: A `dict` of tensors + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor` of the feature map tensors, whose shape is + [batch, height_l, width_l, channels]. + decoder_output: A `dict` of tensors + - key: A `str` of the level of the multilevel features. + - values: A `tf.Tensor` of the feature map tensors, whose shape is + [batch, height_l, width_l, channels]. + Returns: + segmentation prediction mask: A `tf.Tensor` of the segmentation mask + scores predicted from input features. + """ + if self._config_dict['feature_fusion'] == 'deeplabv3plus': + # deeplabv3+ feature fusion + x = decoder_output[str(self._config_dict['level'])] + y = backbone_output[str( + self._config_dict['low_level'])] + y = self._dlv3p_norm(self._dlv3p_conv(y)) + y = self._activation(y) + + x = tf.image.resize( + x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR) + x = tf.cast(x, dtype=y.dtype) + x = tf.concat([x, y], axis=self._bn_axis) + elif self._config_dict['feature_fusion'] == 'pyramid_fusion': + x = nn_layers.pyramid_feature_fusion(decoder_output, + self._config_dict['level']) + else: + x = decoder_output[str(self._config_dict['level'])] + + for conv, norm in zip(self._convs, self._norms): + x = conv(x) + x = norm(x) + x = self._activation(x) + if self._config_dict['upsample_factor'] > 1: + x = spatial_transform_ops.nearest_upsampling( + x, scale=self._config_dict['upsample_factor']) + + return self._classifier(x) + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/heads/segmentation_heads_test.py b/official/vision/beta/modeling/heads/segmentation_heads_test.py new file mode 100644 index 0000000000000000000000000000000000000000..58c8d5869a2aeab3af2a2dcb07504296a9ad6186 --- /dev/null +++ b/official/vision/beta/modeling/heads/segmentation_heads_test.py @@ -0,0 +1,58 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for segmentation_heads.py.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling.heads import segmentation_heads + + +class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (2, 'pyramid_fusion'), + (3, 'pyramid_fusion'), + ) + def test_forward(self, level, feature_fusion): + head = segmentation_heads.SegmentationHead( + num_classes=10, level=level, feature_fusion=feature_fusion) + backbone_features = { + '3': np.random.rand(2, 128, 128, 16), + '4': np.random.rand(2, 64, 64, 16), + } + decoder_features = { + '3': np.random.rand(2, 128, 128, 16), + '4': np.random.rand(2, 64, 64, 16), + } + logits = head(backbone_features, decoder_features) + + if level in decoder_features: + self.assertAllEqual(logits.numpy().shape, [ + 2, decoder_features[str(level)].shape[1], + decoder_features[str(level)].shape[2], 10 + ]) + + def test_serialize_deserialize(self): + head = segmentation_heads.SegmentationHead(num_classes=10, level=3) + config = head.get_config() + new_head = segmentation_heads.SegmentationHead.from_config(config) + self.assertAllEqual(head.get_config(), new_head.get_config()) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/layers/__init__.py b/official/vision/beta/modeling/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4e74bf6083c023cc76432d0afb1f829658d53f44 --- /dev/null +++ b/official/vision/beta/modeling/layers/__init__.py @@ -0,0 +1,44 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Layers package definition.""" + +from official.vision.beta.modeling.layers.box_sampler import BoxSampler +from official.vision.beta.modeling.layers.detection_generator import DetectionGenerator +from official.vision.beta.modeling.layers.detection_generator import MultilevelDetectionGenerator +from official.vision.beta.modeling.layers.mask_sampler import MaskSampler +from official.vision.beta.modeling.layers.nn_blocks import BottleneckBlock +from official.vision.beta.modeling.layers.nn_blocks import BottleneckResidualInner +from official.vision.beta.modeling.layers.nn_blocks import DepthwiseSeparableConvBlock +from official.vision.beta.modeling.layers.nn_blocks import InvertedBottleneckBlock +from official.vision.beta.modeling.layers.nn_blocks import ResidualBlock +from official.vision.beta.modeling.layers.nn_blocks import ResidualInner +from official.vision.beta.modeling.layers.nn_blocks import ReversibleLayer +from official.vision.beta.modeling.layers.nn_blocks_3d import BottleneckBlock3D +from official.vision.beta.modeling.layers.nn_blocks_3d import SelfGating +from official.vision.beta.modeling.layers.nn_layers import CausalConvMixin +from official.vision.beta.modeling.layers.nn_layers import Conv2D +from official.vision.beta.modeling.layers.nn_layers import Conv3D +from official.vision.beta.modeling.layers.nn_layers import DepthwiseConv2D +from official.vision.beta.modeling.layers.nn_layers import GlobalAveragePool3D +from official.vision.beta.modeling.layers.nn_layers import PositionalEncoding +from official.vision.beta.modeling.layers.nn_layers import Scale +from official.vision.beta.modeling.layers.nn_layers import SpatialAveragePool3D +from official.vision.beta.modeling.layers.nn_layers import SqueezeExcitation +from official.vision.beta.modeling.layers.nn_layers import StochasticDepth +from official.vision.beta.modeling.layers.nn_layers import TemporalSoftmaxPool +from official.vision.beta.modeling.layers.roi_aligner import MultilevelROIAligner +from official.vision.beta.modeling.layers.roi_generator import MultilevelROIGenerator +from official.vision.beta.modeling.layers.roi_sampler import ROISampler diff --git a/official/vision/beta/modeling/layers/box_sampler.py b/official/vision/beta/modeling/layers/box_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..3dfefbc680ea94722c214878d9479a7137b4d060 --- /dev/null +++ b/official/vision/beta/modeling/layers/box_sampler.py @@ -0,0 +1,93 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of box sampler.""" + +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import sampling_ops + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class BoxSampler(tf.keras.layers.Layer): + """Creates a BoxSampler to sample positive and negative boxes.""" + + def __init__(self, + num_samples: int = 512, + foreground_fraction: float = 0.25, + **kwargs): + """Initializes a box sampler. + + Args: + num_samples: An `int` of the number of sampled boxes per image. + foreground_fraction: A `float` in [0, 1], what percentage of boxes should + be sampled from the positive examples. + **kwargs: Additional keyword arguments passed to Layer. + """ + self._config_dict = { + 'num_samples': num_samples, + 'foreground_fraction': foreground_fraction, + } + super(BoxSampler, self).__init__(**kwargs) + + def call(self, positive_matches: tf.Tensor, negative_matches: tf.Tensor, + ignored_matches: tf.Tensor): + """Samples and selects positive and negative instances. + + Args: + positive_matches: A `bool` tensor of shape of [batch, N] where N is the + number of instances. For each element, `True` means the instance + corresponds to a positive example. + negative_matches: A `bool` tensor of shape of [batch, N] where N is the + number of instances. For each element, `True` means the instance + corresponds to a negative example. + ignored_matches: A `bool` tensor of shape of [batch, N] where N is the + number of instances. For each element, `True` means the instance should + be ignored. + + Returns: + A `tf.tensor` of shape of [batch_size, K], storing the indices of the + sampled examples, where K is `num_samples`. + """ + sample_candidates = tf.logical_and( + tf.logical_or(positive_matches, negative_matches), + tf.logical_not(ignored_matches)) + + sampler = sampling_ops.BalancedPositiveNegativeSampler( + positive_fraction=self._config_dict['foreground_fraction'], + is_static=True) + + batch_size = sample_candidates.shape[0] + sampled_indicators = [] + for i in range(batch_size): + sampled_indicator = sampler.subsample( + sample_candidates[i], + self._config_dict['num_samples'], + positive_matches[i]) + sampled_indicators.append(sampled_indicator) + sampled_indicators = tf.stack(sampled_indicators) + _, selected_indices = tf.nn.top_k( + tf.cast(sampled_indicators, dtype=tf.int32), + k=self._config_dict['num_samples'], + sorted=True) + + return selected_indices + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/layers/detection_generator.py b/official/vision/beta/modeling/layers/detection_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..b069a199ea895e1ca3c8e1ace874f1bc9053fc50 --- /dev/null +++ b/official/vision/beta/modeling/layers/detection_generator.py @@ -0,0 +1,762 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of generators to generate the final detections.""" +from typing import List, Optional, Mapping +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import box_ops +from official.vision.beta.ops import nms + + +def _generate_detections_v1(boxes: tf.Tensor, + scores: tf.Tensor, + attributes: Optional[Mapping[str, + tf.Tensor]] = None, + pre_nms_top_k: int = 5000, + pre_nms_score_threshold: float = 0.05, + nms_iou_threshold: float = 0.5, + max_num_detections: int = 100): + """Generates the final detections given the model outputs. + + The implementation unrolls the batch dimension and process images one by one. + It required the batch dimension to be statically known and it is TPU + compatible. + + Args: + boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or + `[batch_size, N, 1, 4]` for box predictions on all feature levels. The + N is the number of total anchors on all levels. + scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which + stacks class probability on all feature levels. The N is the number of + total anchors on all levels. The num_classes is the number of classes + predicted by the model. Note that the class_outputs here is the raw score. + attributes: None or a dict of (attribute_name, attributes) pairs. Each + attributes is a `tf.Tensor` with shape + `[batch_size, N, num_classes, attribute_size]` or + `[batch_size, N, 1, attribute_size]` for attribute predictions on all + feature levels. The N is the number of total anchors on all levels. Can + be None if no attribute learning is required. + pre_nms_top_k: An `int` number of top candidate detections per class before + NMS. + pre_nms_score_threshold: A `float` representing the threshold for deciding + when to remove boxes based on score. + nms_iou_threshold: A `float` representing the threshold for deciding whether + boxes overlap too much with respect to IOU. + max_num_detections: A scalar representing maximum number of boxes retained + over all classes. + + Returns: + nms_boxes: A `float` type `tf.Tensor` of shape + `[batch_size, max_num_detections, 4]` representing top detected boxes in + `[y1, x1, y2, x2]`. + nms_scores: A `float` type `tf.Tensor` of shape + `[batch_size, max_num_detections]` representing sorted confidence scores + for detected boxes. The values are between `[0, 1]`. + nms_classes: An `int` type `tf.Tensor` of shape + `[batch_size, max_num_detections]` representing classes for detected + boxes. + valid_detections: An `int` type `tf.Tensor` of shape `[batch_size]` only the + top `valid_detections` boxes are valid detections. + nms_attributes: None or a dict of (attribute_name, attributes). Each + attribute is a `float` type `tf.Tensor` of shape + `[batch_size, max_num_detections, attribute_size]` representing attribute + predictions for detected boxes. Can be an empty dict if no attribute + learning is required. + """ + with tf.name_scope('generate_detections'): + batch_size = scores.get_shape().as_list()[0] + nmsed_boxes = [] + nmsed_classes = [] + nmsed_scores = [] + valid_detections = [] + if attributes: + nmsed_attributes = {att_name: [] for att_name in attributes.keys()} + else: + nmsed_attributes = {} + + for i in range(batch_size): + (nmsed_boxes_i, nmsed_scores_i, nmsed_classes_i, valid_detections_i, + nmsed_att_i) = _generate_detections_per_image( + boxes[i], + scores[i], + attributes={ + att_name: att[i] for att_name, att in attributes.items() + } if attributes else {}, + pre_nms_top_k=pre_nms_top_k, + pre_nms_score_threshold=pre_nms_score_threshold, + nms_iou_threshold=nms_iou_threshold, + max_num_detections=max_num_detections) + nmsed_boxes.append(nmsed_boxes_i) + nmsed_scores.append(nmsed_scores_i) + nmsed_classes.append(nmsed_classes_i) + valid_detections.append(valid_detections_i) + if attributes: + for att_name in attributes.keys(): + nmsed_attributes[att_name].append(nmsed_att_i[att_name]) + + nmsed_boxes = tf.stack(nmsed_boxes, axis=0) + nmsed_scores = tf.stack(nmsed_scores, axis=0) + nmsed_classes = tf.stack(nmsed_classes, axis=0) + valid_detections = tf.stack(valid_detections, axis=0) + if attributes: + for att_name in attributes.keys(): + nmsed_attributes[att_name] = tf.stack(nmsed_attributes[att_name], axis=0) + + return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections, nmsed_attributes + + +def _generate_detections_per_image( + boxes: tf.Tensor, + scores: tf.Tensor, + attributes: Optional[Mapping[str, tf.Tensor]] = None, + pre_nms_top_k: int = 5000, + pre_nms_score_threshold: float = 0.05, + nms_iou_threshold: float = 0.5, + max_num_detections: int = 100): + """Generates the final detections per image given the model outputs. + + Args: + boxes: A `tf.Tensor` with shape `[N, num_classes, 4]` or `[N, 1, 4]`, which + box predictions on all feature levels. The N is the number of total + anchors on all levels. + scores: A `tf.Tensor` with shape `[N, num_classes]`, which stacks class + probability on all feature levels. The N is the number of total anchors on + all levels. The num_classes is the number of classes predicted by the + model. Note that the class_outputs here is the raw score. + attributes: If not None, a dict of `tf.Tensor`. Each value is in shape + `[N, num_classes, attribute_size]` or `[N, 1, attribute_size]` of + attribute predictions on all feature levels. The N is the number of total + anchors on all levels. + pre_nms_top_k: An `int` number of top candidate detections per class before + NMS. + pre_nms_score_threshold: A `float` representing the threshold for deciding + when to remove boxes based on score. + nms_iou_threshold: A `float` representing the threshold for deciding whether + boxes overlap too much with respect to IOU. + max_num_detections: A `scalar` representing maximum number of boxes retained + over all classes. + + Returns: + nms_boxes: A `float` tf.Tensor of shape `[max_num_detections, 4]` + representing top detected boxes in `[y1, x1, y2, x2]`. + nms_scores: A `float` tf.Tensor of shape `[max_num_detections]` representing + sorted confidence scores for detected boxes. The values are between [0, + 1]. + nms_classes: An `int` tf.Tensor of shape `[max_num_detections]` representing + classes for detected boxes. + valid_detections: An `int` tf.Tensor of shape [1] only the top + `valid_detections` boxes are valid detections. + nms_attributes: None or a dict. Each value is a `float` tf.Tensor of shape + `[max_num_detections, attribute_size]` representing attribute predictions + for detected boxes. Can be an empty dict if `attributes` is None. + """ + nmsed_boxes = [] + nmsed_scores = [] + nmsed_classes = [] + num_classes_for_box = boxes.get_shape().as_list()[1] + num_classes = scores.get_shape().as_list()[1] + if attributes: + nmsed_attributes = {att_name: [] for att_name in attributes.keys()} + else: + nmsed_attributes = {} + + for i in range(num_classes): + boxes_i = boxes[:, min(num_classes_for_box - 1, i)] + scores_i = scores[:, i] + # Obtains pre_nms_top_k before running NMS. + scores_i, indices = tf.nn.top_k( + scores_i, k=tf.minimum(tf.shape(scores_i)[-1], pre_nms_top_k)) + boxes_i = tf.gather(boxes_i, indices) + + (nmsed_indices_i, + nmsed_num_valid_i) = tf.image.non_max_suppression_padded( + tf.cast(boxes_i, tf.float32), + tf.cast(scores_i, tf.float32), + max_num_detections, + iou_threshold=nms_iou_threshold, + score_threshold=pre_nms_score_threshold, + pad_to_max_output_size=True, + name='nms_detections_' + str(i)) + nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i) + nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i) + # Sets scores of invalid boxes to -1. + nmsed_scores_i = tf.where( + tf.less(tf.range(max_num_detections), [nmsed_num_valid_i]), + nmsed_scores_i, -tf.ones_like(nmsed_scores_i)) + nmsed_classes_i = tf.fill([max_num_detections], i) + nmsed_boxes.append(nmsed_boxes_i) + nmsed_scores.append(nmsed_scores_i) + nmsed_classes.append(nmsed_classes_i) + if attributes: + for att_name, att in attributes.items(): + num_classes_for_attr = att.get_shape().as_list()[1] + att_i = att[:, min(num_classes_for_attr - 1, i)] + att_i = tf.gather(att_i, indices) + nmsed_att_i = tf.gather(att_i, nmsed_indices_i) + nmsed_attributes[att_name].append(nmsed_att_i) + + # Concats results from all classes and sort them. + nmsed_boxes = tf.concat(nmsed_boxes, axis=0) + nmsed_scores = tf.concat(nmsed_scores, axis=0) + nmsed_classes = tf.concat(nmsed_classes, axis=0) + nmsed_scores, indices = tf.nn.top_k( + nmsed_scores, k=max_num_detections, sorted=True) + nmsed_boxes = tf.gather(nmsed_boxes, indices) + nmsed_classes = tf.gather(nmsed_classes, indices) + valid_detections = tf.reduce_sum( + tf.cast(tf.greater(nmsed_scores, -1), tf.int32)) + if attributes: + for att_name in attributes.keys(): + nmsed_attributes[att_name] = tf.concat(nmsed_attributes[att_name], axis=0) + nmsed_attributes[att_name] = tf.gather(nmsed_attributes[att_name], + indices) + + return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections, nmsed_attributes + + +def _select_top_k_scores(scores_in: tf.Tensor, pre_nms_num_detections: int): + """Selects top_k scores and indices for each class. + + Args: + scores_in: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which + stacks class logit outputs on all feature levels. The N is the number of + total anchors on all levels. The num_classes is the number of classes + predicted by the model. + pre_nms_num_detections: Number of candidates before NMS. + + Returns: + scores and indices: A `tf.Tensor` with shape + `[batch_size, pre_nms_num_detections, num_classes]`. + """ + batch_size, num_anchors, num_class = scores_in.get_shape().as_list() + if batch_size is None: + batch_size = tf.shape(scores_in)[0] + scores_trans = tf.transpose(scores_in, perm=[0, 2, 1]) + scores_trans = tf.reshape(scores_trans, [-1, num_anchors]) + + top_k_scores, top_k_indices = tf.nn.top_k( + scores_trans, k=pre_nms_num_detections, sorted=True) + + top_k_scores = tf.reshape(top_k_scores, + [batch_size, num_class, pre_nms_num_detections]) + top_k_indices = tf.reshape(top_k_indices, + [batch_size, num_class, pre_nms_num_detections]) + + return tf.transpose(top_k_scores, + [0, 2, 1]), tf.transpose(top_k_indices, [0, 2, 1]) + + +def _generate_detections_v2(boxes: tf.Tensor, + scores: tf.Tensor, + pre_nms_top_k: int = 5000, + pre_nms_score_threshold: float = 0.05, + nms_iou_threshold: float = 0.5, + max_num_detections: int = 100): + """Generates the final detections given the model outputs. + + This implementation unrolls classes dimension while using the tf.while_loop + to implement the batched NMS, so that it can be parallelized at the batch + dimension. It should give better performance comparing to v1 implementation. + It is TPU compatible. + + Args: + boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or + `[batch_size, N, 1, 4]`, which box predictions on all feature levels. The + N is the number of total anchors on all levels. + scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which + stacks class probability on all feature levels. The N is the number of + total anchors on all levels. The num_classes is the number of classes + predicted by the model. Note that the class_outputs here is the raw score. + pre_nms_top_k: An `int` number of top candidate detections per class before + NMS. + pre_nms_score_threshold: A `float` representing the threshold for deciding + when to remove boxes based on score. + nms_iou_threshold: A `float` representing the threshold for deciding whether + boxes overlap too much with respect to IOU. + max_num_detections: A `scalar` representing maximum number of boxes retained + over all classes. + + Returns: + nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4] + representing top detected boxes in [y1, x1, y2, x2]. + nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections] + representing sorted confidence scores for detected boxes. The values are + between [0, 1]. + nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections] + representing classes for detected boxes. + valid_detections: An `int` tf.Tensor of shape [batch_size] only the top + `valid_detections` boxes are valid detections. + """ + with tf.name_scope('generate_detections'): + nmsed_boxes = [] + nmsed_classes = [] + nmsed_scores = [] + valid_detections = [] + batch_size, _, num_classes_for_box, _ = boxes.get_shape().as_list() + if batch_size is None: + batch_size = tf.shape(boxes)[0] + _, total_anchors, num_classes = scores.get_shape().as_list() + # Selects top pre_nms_num scores and indices before NMS. + scores, indices = _select_top_k_scores( + scores, min(total_anchors, pre_nms_top_k)) + for i in range(num_classes): + boxes_i = boxes[:, :, min(num_classes_for_box - 1, i), :] + scores_i = scores[:, :, i] + # Obtains pre_nms_top_k before running NMS. + boxes_i = tf.gather(boxes_i, indices[:, :, i], batch_dims=1, axis=1) + + # Filter out scores. + boxes_i, scores_i = box_ops.filter_boxes_by_scores( + boxes_i, scores_i, min_score_threshold=pre_nms_score_threshold) + + (nmsed_scores_i, nmsed_boxes_i) = nms.sorted_non_max_suppression_padded( + tf.cast(scores_i, tf.float32), + tf.cast(boxes_i, tf.float32), + max_num_detections, + iou_threshold=nms_iou_threshold) + nmsed_classes_i = tf.fill([batch_size, max_num_detections], i) + nmsed_boxes.append(nmsed_boxes_i) + nmsed_scores.append(nmsed_scores_i) + nmsed_classes.append(nmsed_classes_i) + nmsed_boxes = tf.concat(nmsed_boxes, axis=1) + nmsed_scores = tf.concat(nmsed_scores, axis=1) + nmsed_classes = tf.concat(nmsed_classes, axis=1) + nmsed_scores, indices = tf.nn.top_k( + nmsed_scores, k=max_num_detections, sorted=True) + nmsed_boxes = tf.gather(nmsed_boxes, indices, batch_dims=1, axis=1) + nmsed_classes = tf.gather(nmsed_classes, indices, batch_dims=1) + valid_detections = tf.reduce_sum( + input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32), axis=1) + return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections + + +def _generate_detections_batched(boxes: tf.Tensor, scores: tf.Tensor, + pre_nms_score_threshold: float, + nms_iou_threshold: float, + max_num_detections: int): + """Generates detected boxes with scores and classes for one-stage detector. + + The function takes output of multi-level ConvNets and anchor boxes and + generates detected boxes. Note that this used batched nms, which is not + supported on TPU currently. + + Args: + boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or + `[batch_size, N, 1, 4]`, which box predictions on all feature levels. The + N is the number of total anchors on all levels. + scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which + stacks class probability on all feature levels. The N is the number of + total anchors on all levels. The num_classes is the number of classes + predicted by the model. Note that the class_outputs here is the raw score. + pre_nms_score_threshold: A `float` representing the threshold for deciding + when to remove boxes based on score. + nms_iou_threshold: A `float` representing the threshold for deciding whether + boxes overlap too much with respect to IOU. + max_num_detections: A `scalar` representing maximum number of boxes retained + over all classes. + + Returns: + nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4] + representing top detected boxes in [y1, x1, y2, x2]. + nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections] + representing sorted confidence scores for detected boxes. The values are + between [0, 1]. + nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections] + representing classes for detected boxes. + valid_detections: An `int` tf.Tensor of shape [batch_size] only the top + `valid_detections` boxes are valid detections. + """ + with tf.name_scope('generate_detections'): + nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( + tf.image.combined_non_max_suppression( + boxes, + scores, + max_output_size_per_class=max_num_detections, + max_total_size=max_num_detections, + iou_threshold=nms_iou_threshold, + score_threshold=pre_nms_score_threshold, + pad_per_class=False, + clip_boxes=False)) + return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class DetectionGenerator(tf.keras.layers.Layer): + """Generates the final detected boxes with scores and classes.""" + + def __init__(self, + apply_nms: bool = True, + pre_nms_top_k: int = 5000, + pre_nms_score_threshold: float = 0.05, + nms_iou_threshold: float = 0.5, + max_num_detections: int = 100, + use_batched_nms: bool = False, + **kwargs): + """Initializes a detection generator. + + Args: + apply_nms: A `bool` of whether or not apply non maximum suppression. + If False, the decoded boxes and their scores are returned. + pre_nms_top_k: An `int` of the number of top scores proposals to be kept + before applying NMS. + pre_nms_score_threshold: A `float` of the score threshold to apply before + applying NMS. Proposals whose scores are below this threshold are + thrown away. + nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold. + max_num_detections: An `int` of the final number of total detections to + generate. + use_batched_nms: A `bool` of whether or not use + `tf.image.combined_non_max_suppression`. + **kwargs: Additional keyword arguments passed to Layer. + """ + self._config_dict = { + 'apply_nms': apply_nms, + 'pre_nms_top_k': pre_nms_top_k, + 'pre_nms_score_threshold': pre_nms_score_threshold, + 'nms_iou_threshold': nms_iou_threshold, + 'max_num_detections': max_num_detections, + 'use_batched_nms': use_batched_nms, + } + super(DetectionGenerator, self).__init__(**kwargs) + + def __call__(self, + raw_boxes: tf.Tensor, + raw_scores: tf.Tensor, + anchor_boxes: tf.Tensor, + image_shape: tf.Tensor, + regression_weights: Optional[List[float]] = None, + bbox_per_class: bool = True): + """Generates final detections. + + Args: + raw_boxes: A `tf.Tensor` of shape of `[batch_size, K, num_classes * 4]` + representing the class-specific box coordinates relative to anchors. + raw_scores: A `tf.Tensor` of shape of `[batch_size, K, num_classes]` + representing the class logits before applying score activiation. + anchor_boxes: A `tf.Tensor` of shape of `[batch_size, K, 4]` representing + the corresponding anchor boxes w.r.t `box_outputs`. + image_shape: A `tf.Tensor` of shape of `[batch_size, 2]` storing the image + height and width w.r.t. the scaled image, i.e. the same image space as + `box_outputs` and `anchor_boxes`. + regression_weights: A list of four float numbers to scale coordinates. + bbox_per_class: A `bool`. If True, perform per-class box regression. + + Returns: + If `apply_nms` = True, the return is a dictionary with keys: + `detection_boxes`: A `float` tf.Tensor of shape + [batch, max_num_detections, 4] representing top detected boxes in + [y1, x1, y2, x2]. + `detection_scores`: A `float` `tf.Tensor` of shape + [batch, max_num_detections] representing sorted confidence scores for + detected boxes. The values are between [0, 1]. + `detection_classes`: An `int` tf.Tensor of shape + [batch, max_num_detections] representing classes for detected boxes. + `num_detections`: An `int` tf.Tensor of shape [batch] only the first + `num_detections` boxes are valid detections + If `apply_nms` = False, the return is a dictionary with keys: + `decoded_boxes`: A `float` tf.Tensor of shape [batch, num_raw_boxes, 4] + representing all the decoded boxes. + `decoded_box_scores`: A `float` tf.Tensor of shape + [batch, num_raw_boxes] representing socres of all the decoded boxes. + """ + box_scores = tf.nn.softmax(raw_scores, axis=-1) + + # Removes the background class. + box_scores_shape = tf.shape(box_scores) + box_scores_shape_list = box_scores.get_shape().as_list() + batch_size = box_scores_shape[0] + num_locations = box_scores_shape_list[1] + num_classes = box_scores_shape_list[-1] + + box_scores = tf.slice(box_scores, [0, 0, 1], [-1, -1, -1]) + + if bbox_per_class: + num_detections = num_locations * (num_classes - 1) + raw_boxes = tf.reshape(raw_boxes, + [batch_size, num_locations, num_classes, 4]) + raw_boxes = tf.slice(raw_boxes, [0, 0, 1, 0], [-1, -1, -1, -1]) + anchor_boxes = tf.tile( + tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1]) + raw_boxes = tf.reshape(raw_boxes, [batch_size, num_detections, 4]) + anchor_boxes = tf.reshape(anchor_boxes, [batch_size, num_detections, 4]) + + # Box decoding. + decoded_boxes = box_ops.decode_boxes( + raw_boxes, anchor_boxes, weights=regression_weights) + + # Box clipping + decoded_boxes = box_ops.clip_boxes( + decoded_boxes, tf.expand_dims(image_shape, axis=1)) + + if bbox_per_class: + decoded_boxes = tf.reshape( + decoded_boxes, [batch_size, num_locations, num_classes - 1, 4]) + else: + decoded_boxes = tf.expand_dims(decoded_boxes, axis=2) + + if not self._config_dict['apply_nms']: + return { + 'decoded_boxes': decoded_boxes, + 'decoded_box_scores': box_scores, + } + + if self._config_dict['use_batched_nms']: + nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( + _generate_detections_batched( + decoded_boxes, + box_scores, + self._config_dict['pre_nms_score_threshold'], + self._config_dict['nms_iou_threshold'], + self._config_dict['max_num_detections'])) + else: + nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( + _generate_detections_v2( + decoded_boxes, + box_scores, + self._config_dict['pre_nms_top_k'], + self._config_dict['pre_nms_score_threshold'], + self._config_dict['nms_iou_threshold'], + self._config_dict['max_num_detections'])) + + # Adds 1 to offset the background class which has index 0. + nmsed_classes += 1 + + return { + 'num_detections': valid_detections, + 'detection_boxes': nmsed_boxes, + 'detection_classes': nmsed_classes, + 'detection_scores': nmsed_scores, + } + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class MultilevelDetectionGenerator(tf.keras.layers.Layer): + """Generates detected boxes with scores and classes for one-stage detector.""" + + def __init__(self, + apply_nms: bool = True, + pre_nms_top_k: int = 5000, + pre_nms_score_threshold: float = 0.05, + nms_iou_threshold: float = 0.5, + max_num_detections: int = 100, + use_batched_nms: bool = False, + **kwargs): + """Initializes a multi-level detection generator. + + Args: + apply_nms: A `bool` of whether or not apply non maximum suppression. If + False, the decoded boxes and their scores are returned. + pre_nms_top_k: An `int` of the number of top scores proposals to be kept + before applying NMS. + pre_nms_score_threshold: A `float` of the score threshold to apply before + applying NMS. Proposals whose scores are below this threshold are thrown + away. + nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold. + max_num_detections: An `int` of the final number of total detections to + generate. + use_batched_nms: A `bool` of whether or not use + `tf.image.combined_non_max_suppression`. + **kwargs: Additional keyword arguments passed to Layer. + """ + self._config_dict = { + 'apply_nms': apply_nms, + 'pre_nms_top_k': pre_nms_top_k, + 'pre_nms_score_threshold': pre_nms_score_threshold, + 'nms_iou_threshold': nms_iou_threshold, + 'max_num_detections': max_num_detections, + 'use_batched_nms': use_batched_nms, + } + super(MultilevelDetectionGenerator, self).__init__(**kwargs) + + def __call__(self, + raw_boxes: Mapping[str, tf.Tensor], + raw_scores: Mapping[str, tf.Tensor], + anchor_boxes: tf.Tensor, + image_shape: tf.Tensor, + raw_attributes: Mapping[str, tf.Tensor] = None): + """Generates final detections. + + Args: + raw_boxes: A `dict` with keys representing FPN levels and values + representing box tenors of shape `[batch, feature_h, feature_w, + num_anchors * 4]`. + raw_scores: A `dict` with keys representing FPN levels and values + representing logit tensors of shape `[batch, feature_h, feature_w, + num_anchors]`. + anchor_boxes: A `tf.Tensor` of shape of [batch_size, K, 4] representing + the corresponding anchor boxes w.r.t `box_outputs`. + image_shape: A `tf.Tensor` of shape of [batch_size, 2] storing the image + height and width w.r.t. the scaled image, i.e. the same image space as + `box_outputs` and `anchor_boxes`. + raw_attributes: If not None, a `dict` of (attribute_name, + attribute_prediction) pairs. `attribute_prediction` is a dict that + contains keys representing FPN levels and values representing tenors of + shape `[batch, feature_h, feature_w, num_anchors * attribute_size]`. + + Returns: + If `apply_nms` = True, the return is a dictionary with keys: + `detection_boxes`: A `float` tf.Tensor of shape + [batch, max_num_detections, 4] representing top detected boxes in + [y1, x1, y2, x2]. + `detection_scores`: A `float` tf.Tensor of shape + [batch, max_num_detections] representing sorted confidence scores for + detected boxes. The values are between [0, 1]. + `detection_classes`: An `int` tf.Tensor of shape + [batch, max_num_detections] representing classes for detected boxes. + `num_detections`: An `int` tf.Tensor of shape [batch] only the first + `num_detections` boxes are valid detections + `detection_attributes`: A dict. Values of the dict is a `float` + tf.Tensor of shape [batch, max_num_detections, attribute_size] + representing attribute predictions for detected boxes. + If `apply_nms` = False, the return is a dictionary with keys: + `decoded_boxes`: A `float` tf.Tensor of shape [batch, num_raw_boxes, 4] + representing all the decoded boxes. + `decoded_box_scores`: A `float` tf.Tensor of shape + [batch, num_raw_boxes] representing socres of all the decoded boxes. + `decoded_box_attributes`: A dict. Values in the dict is a + `float` tf.Tensor of shape [batch, num_raw_boxes, attribute_size] + representing attribute predictions of all the decoded boxes. + """ + # Collects outputs from all levels into a list. + boxes = [] + scores = [] + if raw_attributes: + attributes = {att_name: [] for att_name in raw_attributes.keys()} + else: + attributes = {} + + levels = list(raw_boxes.keys()) + min_level = int(min(levels)) + max_level = int(max(levels)) + for i in range(min_level, max_level + 1): + raw_boxes_i = raw_boxes[str(i)] + raw_scores_i = raw_scores[str(i)] + batch_size = tf.shape(raw_boxes_i)[0] + (_, feature_h_i, feature_w_i, + num_anchors_per_locations_times_4) = raw_boxes_i.get_shape().as_list() + num_locations = feature_h_i * feature_w_i + num_anchors_per_locations = num_anchors_per_locations_times_4 // 4 + num_classes = raw_scores_i.get_shape().as_list( + )[-1] // num_anchors_per_locations + + # Applies score transformation and remove the implicit background class. + scores_i = tf.sigmoid( + tf.reshape(raw_scores_i, [ + batch_size, num_locations * num_anchors_per_locations, num_classes + ])) + scores_i = tf.slice(scores_i, [0, 0, 1], [-1, -1, -1]) + + # Box decoding. + # The anchor boxes are shared for all data in a batch. + # One stage detector only supports class agnostic box regression. + anchor_boxes_i = tf.reshape( + anchor_boxes[str(i)], + [batch_size, num_locations * num_anchors_per_locations, 4]) + raw_boxes_i = tf.reshape( + raw_boxes_i, + [batch_size, num_locations * num_anchors_per_locations, 4]) + boxes_i = box_ops.decode_boxes(raw_boxes_i, anchor_boxes_i) + + # Box clipping. + boxes_i = box_ops.clip_boxes( + boxes_i, tf.expand_dims(image_shape, axis=1)) + + boxes.append(boxes_i) + scores.append(scores_i) + + if raw_attributes: + for att_name, raw_att in raw_attributes.items(): + attribute_size = raw_att[str( + i)].get_shape().as_list()[-1] // num_anchors_per_locations + att_i = tf.reshape(raw_att[str(i)], [ + batch_size, num_locations * num_anchors_per_locations, + attribute_size + ]) + attributes[att_name].append(att_i) + + boxes = tf.concat(boxes, axis=1) + boxes = tf.expand_dims(boxes, axis=2) + scores = tf.concat(scores, axis=1) + + if raw_attributes: + for att_name in raw_attributes.keys(): + attributes[att_name] = tf.concat(attributes[att_name], axis=1) + attributes[att_name] = tf.expand_dims(attributes[att_name], axis=2) + + if not self._config_dict['apply_nms']: + return { + 'decoded_boxes': boxes, + 'decoded_box_scores': scores, + 'decoded_box_attributes': attributes, + } + + if self._config_dict['use_batched_nms']: + if raw_attributes: + raise ValueError('Attribute learning is not supported for batched NMS.') + + nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( + _generate_detections_batched( + boxes, + scores, + self._config_dict['pre_nms_score_threshold'], + self._config_dict['nms_iou_threshold'], + self._config_dict['max_num_detections'])) + # Set `nmsed_attributes` to None for batched NMS. + nmsed_attributes = {} + else: + if raw_attributes: + nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections, nmsed_attributes = ( + _generate_detections_v1( + boxes, + scores, + attributes=attributes if raw_attributes else None, + pre_nms_top_k=self._config_dict['pre_nms_top_k'], + pre_nms_score_threshold=self + ._config_dict['pre_nms_score_threshold'], + nms_iou_threshold=self._config_dict['nms_iou_threshold'], + max_num_detections=self._config_dict['max_num_detections'])) + else: + nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( + _generate_detections_v2( + boxes, scores, self._config_dict['pre_nms_top_k'], + self._config_dict['pre_nms_score_threshold'], + self._config_dict['nms_iou_threshold'], + self._config_dict['max_num_detections'])) + nmsed_attributes = {} + # Adds 1 to offset the background class which has index 0. + nmsed_classes += 1 + + return { + 'num_detections': valid_detections, + 'detection_boxes': nmsed_boxes, + 'detection_classes': nmsed_classes, + 'detection_scores': nmsed_scores, + 'detection_attributes': nmsed_attributes, + } + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/layers/detection_generator_test.py b/official/vision/beta/modeling/layers/detection_generator_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a6649b5101665a8d551aece66472931b1e48992b --- /dev/null +++ b/official/vision/beta/modeling/layers/detection_generator_test.py @@ -0,0 +1,236 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for detection_generator.py.""" +# Import libraries + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling.layers import detection_generator +from official.vision.beta.ops import anchor + + +class SelectTopKScoresTest(tf.test.TestCase): + + def testSelectTopKScores(self): + pre_nms_num_boxes = 2 + scores_data = [[[0.2, 0.2], [0.1, 0.9], [0.5, 0.1], [0.3, 0.5]]] + scores_in = tf.constant(scores_data, dtype=tf.float32) + top_k_scores, top_k_indices = detection_generator._select_top_k_scores( + scores_in, pre_nms_num_detections=pre_nms_num_boxes) + expected_top_k_scores = np.array([[[0.5, 0.9], [0.3, 0.5]]], + dtype=np.float32) + + expected_top_k_indices = [[[2, 1], [3, 3]]] + + self.assertAllEqual(top_k_scores.numpy(), expected_top_k_scores) + self.assertAllEqual(top_k_indices.numpy(), expected_top_k_indices) + + +class DetectionGeneratorTest( + parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (True), + (False), + ) + def testDetectionsOutputShape(self, use_batched_nms): + max_num_detections = 100 + num_classes = 4 + pre_nms_top_k = 5000 + pre_nms_score_threshold = 0.01 + batch_size = 1 + kwargs = { + 'apply_nms': True, + 'pre_nms_top_k': pre_nms_top_k, + 'pre_nms_score_threshold': pre_nms_score_threshold, + 'nms_iou_threshold': 0.5, + 'max_num_detections': max_num_detections, + 'use_batched_nms': use_batched_nms, + } + generator = detection_generator.DetectionGenerator(**kwargs) + + cls_outputs_all = ( + np.random.rand(84, num_classes) - 0.5) * 3 # random 84x3 outputs. + box_outputs_all = np.random.rand(84, 4 * num_classes) # random 84 boxes. + anchor_boxes_all = np.random.rand(84, 4) # random 84 boxes. + class_outputs = tf.reshape( + tf.convert_to_tensor(cls_outputs_all, dtype=tf.float32), + [1, 84, num_classes]) + box_outputs = tf.reshape( + tf.convert_to_tensor(box_outputs_all, dtype=tf.float32), + [1, 84, 4 * num_classes]) + anchor_boxes = tf.reshape( + tf.convert_to_tensor(anchor_boxes_all, dtype=tf.float32), + [1, 84, 4]) + image_info = tf.constant( + [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], + dtype=tf.float32) + results = generator( + box_outputs, class_outputs, anchor_boxes, image_info[:, 1, :]) + boxes = results['detection_boxes'] + classes = results['detection_classes'] + scores = results['detection_scores'] + valid_detections = results['num_detections'] + + self.assertEqual(boxes.numpy().shape, (batch_size, max_num_detections, 4)) + self.assertEqual(scores.numpy().shape, (batch_size, max_num_detections,)) + self.assertEqual(classes.numpy().shape, (batch_size, max_num_detections,)) + self.assertEqual(valid_detections.numpy().shape, (batch_size,)) + + def test_serialize_deserialize(self): + kwargs = { + 'apply_nms': True, + 'pre_nms_top_k': 1000, + 'pre_nms_score_threshold': 0.1, + 'nms_iou_threshold': 0.5, + 'max_num_detections': 10, + 'use_batched_nms': False, + } + generator = detection_generator.DetectionGenerator(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(generator.get_config(), expected_config) + + new_generator = ( + detection_generator.DetectionGenerator.from_config( + generator.get_config())) + + self.assertAllEqual(generator.get_config(), new_generator.get_config()) + + +class MultilevelDetectionGeneratorTest( + parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (True, False), + (False, False), + (False, True), + ) + def testDetectionsOutputShape(self, use_batched_nms, has_att_heads): + min_level = 4 + max_level = 6 + num_scales = 2 + max_num_detections = 100 + aspect_ratios = [1.0, 2.0,] + anchor_scale = 2.0 + output_size = [64, 64] + num_classes = 4 + pre_nms_top_k = 5000 + pre_nms_score_threshold = 0.01 + batch_size = 1 + kwargs = { + 'apply_nms': True, + 'pre_nms_top_k': pre_nms_top_k, + 'pre_nms_score_threshold': pre_nms_score_threshold, + 'nms_iou_threshold': 0.5, + 'max_num_detections': max_num_detections, + 'use_batched_nms': use_batched_nms, + } + + input_anchor = anchor.build_anchor_generator(min_level, max_level, + num_scales, aspect_ratios, + anchor_scale) + anchor_boxes = input_anchor(output_size) + cls_outputs_all = ( + np.random.rand(84, num_classes) - 0.5) * 3 # random 84x3 outputs. + box_outputs_all = np.random.rand(84, 4) # random 84 boxes. + class_outputs = { + '4': + tf.reshape( + tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32), + [1, 8, 8, num_classes]), + '5': + tf.reshape( + tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32), + [1, 4, 4, num_classes]), + '6': + tf.reshape( + tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32), + [1, 2, 2, num_classes]), + } + box_outputs = { + '4': tf.reshape(tf.convert_to_tensor( + box_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, 4]), + '5': tf.reshape(tf.convert_to_tensor( + box_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, 4]), + '6': tf.reshape(tf.convert_to_tensor( + box_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, 4]), + } + if has_att_heads: + att_outputs_all = np.random.rand(84, 1) # random attributes. + att_outputs = { + 'depth': { + '4': + tf.reshape( + tf.convert_to_tensor( + att_outputs_all[0:64], dtype=tf.float32), + [1, 8, 8, 1]), + '5': + tf.reshape( + tf.convert_to_tensor( + att_outputs_all[64:80], dtype=tf.float32), + [1, 4, 4, 1]), + '6': + tf.reshape( + tf.convert_to_tensor( + att_outputs_all[80:84], dtype=tf.float32), + [1, 2, 2, 1]), + } + } + else: + att_outputs = None + image_info = tf.constant([[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], + dtype=tf.float32) + generator = detection_generator.MultilevelDetectionGenerator(**kwargs) + results = generator(box_outputs, class_outputs, anchor_boxes, + image_info[:, 1, :], att_outputs) + boxes = results['detection_boxes'] + classes = results['detection_classes'] + scores = results['detection_scores'] + valid_detections = results['num_detections'] + + self.assertEqual(boxes.numpy().shape, (batch_size, max_num_detections, 4)) + self.assertEqual(scores.numpy().shape, (batch_size, max_num_detections,)) + self.assertEqual(classes.numpy().shape, (batch_size, max_num_detections,)) + self.assertEqual(valid_detections.numpy().shape, (batch_size,)) + if has_att_heads: + for att in results['detection_attributes'].values(): + self.assertEqual(att.numpy().shape, (batch_size, max_num_detections, 1)) + + def test_serialize_deserialize(self): + kwargs = { + 'apply_nms': True, + 'pre_nms_top_k': 1000, + 'pre_nms_score_threshold': 0.1, + 'nms_iou_threshold': 0.5, + 'max_num_detections': 10, + 'use_batched_nms': False, + } + generator = detection_generator.MultilevelDetectionGenerator(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(generator.get_config(), expected_config) + + new_generator = ( + detection_generator.MultilevelDetectionGenerator.from_config( + generator.get_config())) + + self.assertAllEqual(generator.get_config(), new_generator.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/layers/mask_sampler.py b/official/vision/beta/modeling/layers/mask_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..73d3caa32749bdd37488eefbcab6982273bbbed3 --- /dev/null +++ b/official/vision/beta/modeling/layers/mask_sampler.py @@ -0,0 +1,166 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of mask sampler.""" + +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import spatial_transform_ops + + +def _sample_and_crop_foreground_masks(candidate_rois: tf.Tensor, + candidate_gt_boxes: tf.Tensor, + candidate_gt_classes: tf.Tensor, + candidate_gt_indices: tf.Tensor, + gt_masks: tf.Tensor, + num_sampled_masks: int = 128, + mask_target_size: int = 28): + """Samples and creates cropped foreground masks for training. + + Args: + candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is the + number of candidate RoIs to be considered for mask sampling. It includes + both positive and negative RoIs. The `num_mask_samples_per_image` positive + RoIs will be sampled to create mask training targets. + candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing + the corresponding groundtruth boxes to the `candidate_rois`. + candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing the + corresponding groundtruth classes to the `candidate_rois`. 0 in the tensor + corresponds to the background class, i.e. negative RoIs. + candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the + corresponding groundtruth instance indices to the `candidate_gt_boxes`, + i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and + gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is + the superset of candidate_gt_boxes. + gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height, + mask_width] containing all the groundtruth masks which sample masks are + drawn from. + num_sampled_masks: An `int` that specifies the number of masks to sample. + mask_target_size: An `int` that specifies the final cropped mask size after + sampling. The output masks are resized w.r.t the sampled RoIs. + + Returns: + foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the + RoI that corresponds to the sampled foreground masks, where + K = num_mask_samples_per_image. + foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the + classes corresponding to the sampled foreground masks. + cropoped_foreground_masks: A `tf.Tensor` of shape of + [batch_size, K, mask_target_size, mask_target_size] storing the cropped + foreground masks used for training. + """ + _, fg_instance_indices = tf.nn.top_k( + tf.cast(tf.greater(candidate_gt_classes, 0), dtype=tf.int32), + k=num_sampled_masks) + + fg_instance_indices_shape = tf.shape(fg_instance_indices) + batch_indices = ( + tf.expand_dims(tf.range(fg_instance_indices_shape[0]), axis=-1) * + tf.ones([1, fg_instance_indices_shape[-1]], dtype=tf.int32)) + + gather_nd_instance_indices = tf.stack( + [batch_indices, fg_instance_indices], axis=-1) + foreground_rois = tf.gather_nd( + candidate_rois, gather_nd_instance_indices) + foreground_boxes = tf.gather_nd( + candidate_gt_boxes, gather_nd_instance_indices) + foreground_classes = tf.gather_nd( + candidate_gt_classes, gather_nd_instance_indices) + foreground_gt_indices = tf.gather_nd( + candidate_gt_indices, gather_nd_instance_indices) + foreground_gt_indices = tf.where( + tf.equal(foreground_gt_indices, -1), + tf.zeros_like(foreground_gt_indices), + foreground_gt_indices) + + foreground_gt_indices_shape = tf.shape(foreground_gt_indices) + batch_indices = ( + tf.expand_dims(tf.range(foreground_gt_indices_shape[0]), axis=-1) * + tf.ones([1, foreground_gt_indices_shape[-1]], dtype=tf.int32)) + gather_nd_gt_indices = tf.stack( + [batch_indices, foreground_gt_indices], axis=-1) + foreground_masks = tf.gather_nd(gt_masks, gather_nd_gt_indices) + + cropped_foreground_masks = spatial_transform_ops.crop_mask_in_target_box( + foreground_masks, foreground_boxes, foreground_rois, mask_target_size, + sample_offset=0.5) + + return foreground_rois, foreground_classes, cropped_foreground_masks + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class MaskSampler(tf.keras.layers.Layer): + """Samples and creates mask training targets.""" + + def __init__(self, mask_target_size: int, num_sampled_masks: int, **kwargs): + self._config_dict = { + 'mask_target_size': mask_target_size, + 'num_sampled_masks': num_sampled_masks, + } + super(MaskSampler, self).__init__(**kwargs) + + def call(self, candidate_rois: tf.Tensor, candidate_gt_boxes: tf.Tensor, + candidate_gt_classes: tf.Tensor, candidate_gt_indices: tf.Tensor, + gt_masks: tf.Tensor): + """Samples and creates mask targets for training. + + Args: + candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is + the number of candidate RoIs to be considered for mask sampling. It + includes both positive and negative RoIs. The + `num_mask_samples_per_image` positive RoIs will be sampled to create + mask training targets. + candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing + the corresponding groundtruth boxes to the `candidate_rois`. + candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing + the corresponding groundtruth classes to the `candidate_rois`. 0 in the + tensor corresponds to the background class, i.e. negative RoIs. + candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the + corresponding groundtruth instance indices to the `candidate_gt_boxes`, + i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i], + where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= + N, is the superset of candidate_gt_boxes. + gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height, + mask_width] containing all the groundtruth masks which sample masks are + drawn from. after sampling. The output masks are resized w.r.t the + sampled RoIs. + + Returns: + foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the + RoI that corresponds to the sampled foreground masks, where + K = num_mask_samples_per_image. + foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the + classes corresponding to the sampled foreground masks. + cropoped_foreground_masks: A `tf.Tensor` of shape of + [batch_size, K, mask_target_size, mask_target_size] storing the + cropped foreground masks used for training. + """ + foreground_rois, foreground_classes, cropped_foreground_masks = ( + _sample_and_crop_foreground_masks( + candidate_rois, + candidate_gt_boxes, + candidate_gt_classes, + candidate_gt_indices, + gt_masks, + self._config_dict['num_sampled_masks'], + self._config_dict['mask_target_size'])) + return foreground_rois, foreground_classes, cropped_foreground_masks + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/layers/nn_blocks.py b/official/vision/beta/modeling/layers/nn_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..0df43c44421c0d5f6259acb3180288d9527b7c5f --- /dev/null +++ b/official/vision/beta/modeling/layers/nn_blocks.py @@ -0,0 +1,1276 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains common building blocks for neural networks.""" + +from typing import Any, Callable, Dict, List, Optional, Tuple, Union, Text + +# Import libraries +from absl import logging +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.modeling.layers import nn_layers + + +def _pad_strides(strides: int, axis: int) -> Tuple[int, int, int, int]: + """Converts int to len 4 strides (`tf.nn.avg_pool` uses length 4).""" + if axis == 1: + return (1, 1, strides, strides) + else: + return (1, strides, strides, 1) + + +def _maybe_downsample(x: tf.Tensor, + out_filter: int, + strides: int, + axis: int) -> tf.Tensor: + """Downsamples feature map and 0-pads tensor if in_filter != out_filter.""" + data_format = 'NCHW' if axis == 1 else 'NHWC' + strides = _pad_strides(strides, axis=axis) + + x = tf.nn.avg_pool(x, strides, strides, 'VALID', data_format=data_format) + + in_filter = x.shape[axis] + if in_filter < out_filter: + # Pad on channel dimension with 0s: half on top half on bottom. + pad_size = [(out_filter - in_filter) // 2, (out_filter - in_filter) // 2] + if axis == 1: + x = tf.pad(x, [[0, 0], pad_size, [0, 0], [0, 0]]) + else: + x = tf.pad(x, [[0, 0], [0, 0], [0, 0], pad_size]) + + return x + 0. + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ResidualBlock(tf.keras.layers.Layer): + """A residual block.""" + + def __init__(self, + filters, + strides, + use_projection=False, + se_ratio=None, + resnetd_shortcut=False, + stochastic_depth_drop_rate=None, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + **kwargs): + """Initializes a residual block with BN after convolutions. + + Args: + filters: An `int` number of filters for the first two convolutions. Note + that the third and final convolution will use 4 times as many filters. + strides: An `int` block stride. If greater than 1, this block will + ultimately downsample the input. + use_projection: A `bool` for whether this block should use a projection + shortcut (versus the default identity shortcut). This is usually `True` + for the first block of a block group, which may change the number of + filters and the resolution. + se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. + resnetd_shortcut: A `bool` if True, apply the resnetd style modification + to the shortcut connection. Not implemented in residual blocks. + stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for + the stochastic depth layer. + kernel_initializer: A `str` of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d. + Default to None. + activation: A `str` name of the activation function. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + super(ResidualBlock, self).__init__(**kwargs) + + self._filters = filters + self._strides = strides + self._use_projection = use_projection + self._se_ratio = se_ratio + self._resnetd_shortcut = resnetd_shortcut + self._use_sync_bn = use_sync_bn + self._activation = activation + self._stochastic_depth_drop_rate = stochastic_depth_drop_rate + self._kernel_initializer = kernel_initializer + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation_fn = tf_utils.get_activation(activation) + + def build(self, input_shape): + if self._use_projection: + self._shortcut = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=1, + strides=self._strides, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._conv1 = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=3, + strides=self._strides, + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm1 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._conv2 = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=3, + strides=1, + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm2 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1: + self._squeeze_excitation = nn_layers.SqueezeExcitation( + in_filters=self._filters, + out_filters=self._filters, + se_ratio=self._se_ratio, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + else: + self._squeeze_excitation = None + + if self._stochastic_depth_drop_rate: + self._stochastic_depth = nn_layers.StochasticDepth( + self._stochastic_depth_drop_rate) + else: + self._stochastic_depth = None + + super(ResidualBlock, self).build(input_shape) + + def get_config(self): + config = { + 'filters': self._filters, + 'strides': self._strides, + 'use_projection': self._use_projection, + 'se_ratio': self._se_ratio, + 'resnetd_shortcut': self._resnetd_shortcut, + 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + base_config = super(ResidualBlock, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs, training=None): + shortcut = inputs + if self._use_projection: + shortcut = self._shortcut(shortcut) + shortcut = self._norm0(shortcut) + + x = self._conv1(inputs) + x = self._norm1(x) + x = self._activation_fn(x) + + x = self._conv2(x) + x = self._norm2(x) + + if self._squeeze_excitation: + x = self._squeeze_excitation(x) + + if self._stochastic_depth: + x = self._stochastic_depth(x, training=training) + + return self._activation_fn(x + shortcut) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class BottleneckBlock(tf.keras.layers.Layer): + """A standard bottleneck block.""" + + def __init__(self, + filters, + strides, + dilation_rate=1, + use_projection=False, + se_ratio=None, + resnetd_shortcut=False, + stochastic_depth_drop_rate=None, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + **kwargs): + """Initializes a standard bottleneck block with BN after convolutions. + + Args: + filters: An `int` number of filters for the first two convolutions. Note + that the third and final convolution will use 4 times as many filters. + strides: An `int` block stride. If greater than 1, this block will + ultimately downsample the input. + dilation_rate: An `int` dilation_rate of convolutions. Default to 1. + use_projection: A `bool` for whether this block should use a projection + shortcut (versus the default identity shortcut). This is usually `True` + for the first block of a block group, which may change the number of + filters and the resolution. + se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. + resnetd_shortcut: A `bool`. If True, apply the resnetd style modification + to the shortcut connection. + stochastic_depth_drop_rate: A `float` or None. If not None, drop rate for + the stochastic depth layer. + kernel_initializer: A `str` of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d. + Default to None. + activation: A `str` name of the activation function. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + super(BottleneckBlock, self).__init__(**kwargs) + + self._filters = filters + self._strides = strides + self._dilation_rate = dilation_rate + self._use_projection = use_projection + self._se_ratio = se_ratio + self._resnetd_shortcut = resnetd_shortcut + self._use_sync_bn = use_sync_bn + self._activation = activation + self._stochastic_depth_drop_rate = stochastic_depth_drop_rate + self._kernel_initializer = kernel_initializer + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + + def build(self, input_shape): + if self._use_projection: + if self._resnetd_shortcut: + self._shortcut0 = tf.keras.layers.AveragePooling2D( + pool_size=2, strides=self._strides, padding='same') + self._shortcut1 = tf.keras.layers.Conv2D( + filters=self._filters * 4, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + else: + self._shortcut = tf.keras.layers.Conv2D( + filters=self._filters * 4, + kernel_size=1, + strides=self._strides, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + + self._norm0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._conv1 = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm1 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._activation1 = tf_utils.get_activation( + self._activation, use_keras_layer=True) + + self._conv2 = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=3, + strides=self._strides, + dilation_rate=self._dilation_rate, + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm2 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._activation2 = tf_utils.get_activation( + self._activation, use_keras_layer=True) + + self._conv3 = tf.keras.layers.Conv2D( + filters=self._filters * 4, + kernel_size=1, + strides=1, + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm3 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._activation3 = tf_utils.get_activation( + self._activation, use_keras_layer=True) + + if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1: + self._squeeze_excitation = nn_layers.SqueezeExcitation( + in_filters=self._filters * 4, + out_filters=self._filters * 4, + se_ratio=self._se_ratio, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + else: + self._squeeze_excitation = None + + if self._stochastic_depth_drop_rate: + self._stochastic_depth = nn_layers.StochasticDepth( + self._stochastic_depth_drop_rate) + else: + self._stochastic_depth = None + self._add = tf.keras.layers.Add() + + super(BottleneckBlock, self).build(input_shape) + + def get_config(self): + config = { + 'filters': self._filters, + 'strides': self._strides, + 'dilation_rate': self._dilation_rate, + 'use_projection': self._use_projection, + 'se_ratio': self._se_ratio, + 'resnetd_shortcut': self._resnetd_shortcut, + 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + base_config = super(BottleneckBlock, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs, training=None): + shortcut = inputs + if self._use_projection: + if self._resnetd_shortcut: + shortcut = self._shortcut0(shortcut) + shortcut = self._shortcut1(shortcut) + else: + shortcut = self._shortcut(shortcut) + shortcut = self._norm0(shortcut) + + x = self._conv1(inputs) + x = self._norm1(x) + x = self._activation1(x) + + x = self._conv2(x) + x = self._norm2(x) + x = self._activation2(x) + + x = self._conv3(x) + x = self._norm3(x) + + if self._squeeze_excitation: + x = self._squeeze_excitation(x) + + if self._stochastic_depth: + x = self._stochastic_depth(x, training=training) + + x = self._add([x, shortcut]) + return self._activation3(x) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class InvertedBottleneckBlock(tf.keras.layers.Layer): + """An inverted bottleneck block.""" + + def __init__(self, + in_filters, + out_filters, + expand_ratio, + strides, + kernel_size=3, + se_ratio=None, + stochastic_depth_drop_rate=None, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + activation='relu', + se_inner_activation='relu', + se_gating_activation='sigmoid', + expand_se_in_filters=False, + depthwise_activation=None, + use_sync_bn=False, + dilation_rate=1, + divisible_by=1, + regularize_depthwise=False, + use_depthwise=True, + use_residual=True, + norm_momentum=0.99, + norm_epsilon=0.001, + **kwargs): + """Initializes an inverted bottleneck block with BN after convolutions. + + Args: + in_filters: An `int` number of filters of the input tensor. + out_filters: An `int` number of filters of the output tensor. + expand_ratio: An `int` of expand_ratio for an inverted bottleneck block. + strides: An `int` block stride. If greater than 1, this block will + ultimately downsample the input. + kernel_size: An `int` kernel_size of the depthwise conv layer. + se_ratio: A `float` or None. If not None, se ratio for the squeeze and + excitation layer. + stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for + the stochastic depth layer. + kernel_initializer: A `str` of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d. + Default to None. + activation: A `str` name of the activation function. + se_inner_activation: A `str` name of squeeze-excitation inner activation. + se_gating_activation: A `str` name of squeeze-excitation gating + activation. + expand_se_in_filters: A `bool` of whether or not to expand in_filter in + squeeze and excitation layer. + depthwise_activation: A `str` name of the activation function for + depthwise only. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + dilation_rate: An `int` that specifies the dilation rate to use for. + divisible_by: An `int` that ensures all inner dimensions are divisible by + this number. + dilated convolution: An `int` to specify the same value for all spatial + dimensions. + regularize_depthwise: A `bool` of whether or not apply regularization on + depthwise. + use_depthwise: A `bool` of whether to uses fused convolutions instead of + depthwise. + use_residual: A `bool` of whether to include residual connection between + input and output. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + super(InvertedBottleneckBlock, self).__init__(**kwargs) + + self._in_filters = in_filters + self._out_filters = out_filters + self._expand_ratio = expand_ratio + self._strides = strides + self._kernel_size = kernel_size + self._se_ratio = se_ratio + self._divisible_by = divisible_by + self._stochastic_depth_drop_rate = stochastic_depth_drop_rate + self._dilation_rate = dilation_rate + self._use_sync_bn = use_sync_bn + self._regularize_depthwise = regularize_depthwise + self._use_depthwise = use_depthwise + self._use_residual = use_residual + self._activation = activation + self._se_inner_activation = se_inner_activation + self._se_gating_activation = se_gating_activation + self._depthwise_activation = depthwise_activation + self._kernel_initializer = kernel_initializer + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._expand_se_in_filters = expand_se_in_filters + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + if not depthwise_activation: + self._depthwise_activation = activation + if regularize_depthwise: + self._depthsize_regularizer = kernel_regularizer + else: + self._depthsize_regularizer = None + + def build(self, input_shape): + expand_filters = self._in_filters + if self._expand_ratio > 1: + # First 1x1 conv for channel expansion. + expand_filters = nn_layers.make_divisible( + self._in_filters * self._expand_ratio, self._divisible_by) + + expand_kernel = 1 if self._use_depthwise else self._kernel_size + expand_stride = 1 if self._use_depthwise else self._strides + + self._conv0 = tf.keras.layers.Conv2D( + filters=expand_filters, + kernel_size=expand_kernel, + strides=expand_stride, + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._activation_layer = tf_utils.get_activation( + self._activation, use_keras_layer=True) + + if self._use_depthwise: + # Depthwise conv. + self._conv1 = tf.keras.layers.DepthwiseConv2D( + kernel_size=(self._kernel_size, self._kernel_size), + strides=self._strides, + padding='same', + depth_multiplier=1, + dilation_rate=self._dilation_rate, + use_bias=False, + depthwise_initializer=self._kernel_initializer, + depthwise_regularizer=self._depthsize_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm1 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._depthwise_activation_layer = tf_utils.get_activation( + self._depthwise_activation, use_keras_layer=True) + + # Squeeze and excitation. + if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1: + logging.info('Use Squeeze and excitation.') + in_filters = self._in_filters + if self._expand_se_in_filters: + in_filters = expand_filters + self._squeeze_excitation = nn_layers.SqueezeExcitation( + in_filters=in_filters, + out_filters=expand_filters, + se_ratio=self._se_ratio, + divisible_by=self._divisible_by, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._se_inner_activation, + gating_activation=self._se_gating_activation) + else: + self._squeeze_excitation = None + + # Last 1x1 conv. + self._conv2 = tf.keras.layers.Conv2D( + filters=self._out_filters, + kernel_size=1, + strides=1, + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm2 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + if self._stochastic_depth_drop_rate: + self._stochastic_depth = nn_layers.StochasticDepth( + self._stochastic_depth_drop_rate) + else: + self._stochastic_depth = None + self._add = tf.keras.layers.Add() + + super(InvertedBottleneckBlock, self).build(input_shape) + + def get_config(self): + config = { + 'in_filters': self._in_filters, + 'out_filters': self._out_filters, + 'expand_ratio': self._expand_ratio, + 'strides': self._strides, + 'kernel_size': self._kernel_size, + 'se_ratio': self._se_ratio, + 'divisible_by': self._divisible_by, + 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'se_inner_activation': self._se_inner_activation, + 'se_gating_activation': self._se_gating_activation, + 'expand_se_in_filters': self._expand_se_in_filters, + 'depthwise_activation': self._depthwise_activation, + 'dilation_rate': self._dilation_rate, + 'use_sync_bn': self._use_sync_bn, + 'regularize_depthwise': self._regularize_depthwise, + 'use_depthwise': self._use_depthwise, + 'use_residual': self._use_residual, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + base_config = super(InvertedBottleneckBlock, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs, training=None): + shortcut = inputs + if self._expand_ratio > 1: + x = self._conv0(inputs) + x = self._norm0(x) + x = self._activation_layer(x) + else: + x = inputs + + if self._use_depthwise: + x = self._conv1(x) + x = self._norm1(x) + x = self._depthwise_activation_layer(x) + + if self._squeeze_excitation: + x = self._squeeze_excitation(x) + + x = self._conv2(x) + x = self._norm2(x) + + if (self._use_residual and + self._in_filters == self._out_filters and + self._strides == 1): + if self._stochastic_depth: + x = self._stochastic_depth(x, training=training) + x = self._add([x, shortcut]) + + return x + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ResidualInner(tf.keras.layers.Layer): + """Creates a single inner block of a residual. + + This corresponds to `F`/`G` functions in the RevNet paper: + Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse. + The Reversible Residual Network: Backpropagation Without Storing Activations. + (https://arxiv.org/pdf/1707.04585.pdf) + """ + + def __init__( + self, + filters: int, + strides: int, + kernel_initializer: Union[str, Callable[ + ..., tf.keras.initializers.Initializer]] = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + activation: Union[str, Callable[..., tf.Tensor]] = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + batch_norm_first: bool = True, + **kwargs): + """Initializes a ResidualInner. + + Args: + filters: An `int` of output filter size. + strides: An `int` of stride size for convolution for the residual block. + kernel_initializer: A `str` or `tf.keras.initializers.Initializer` + instance for convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` for Conv2D. + activation: A `str` or `callable` instance of the activation function. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + batch_norm_first: A `bool` of whether to apply activation and batch norm + before conv. + **kwargs: Additional keyword arguments to be passed. + """ + super(ResidualInner, self).__init__(**kwargs) + + self.strides = strides + self.filters = filters + self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) + self._kernel_regularizer = kernel_regularizer + self._activation = tf.keras.activations.get(activation) + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._batch_norm_first = batch_norm_first + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation_fn = tf_utils.get_activation(activation) + + def build(self, input_shape: tf.TensorShape): + if self._batch_norm_first: + self._batch_norm_0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._conv2d_1 = tf.keras.layers.Conv2D( + filters=self.filters, + kernel_size=3, + strides=self.strides, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer) + + self._batch_norm_1 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._conv2d_2 = tf.keras.layers.Conv2D( + filters=self.filters, + kernel_size=3, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer) + + super(ResidualInner, self).build(input_shape) + + def get_config(self) -> Dict[str, Any]: + config = { + 'filters': self.filters, + 'strides': self.strides, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'batch_norm_first': self._batch_norm_first, + } + base_config = super(ResidualInner, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call( + self, inputs: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor: + x = inputs + if self._batch_norm_first: + x = self._batch_norm_0(x, training=training) + x = self._activation_fn(x) + x = self._conv2d_1(x) + + x = self._batch_norm_1(x, training=training) + x = self._activation_fn(x) + x = self._conv2d_2(x) + return x + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class BottleneckResidualInner(tf.keras.layers.Layer): + """Creates a single inner block of a bottleneck. + + This corresponds to `F`/`G` functions in the RevNet paper: + Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse. + The Reversible Residual Network: Backpropagation Without Storing Activations. + (https://arxiv.org/pdf/1707.04585.pdf) + """ + + def __init__( + self, + filters: int, + strides: int, + kernel_initializer: Union[str, Callable[ + ..., tf.keras.initializers.Initializer]] = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + activation: Union[str, Callable[..., tf.Tensor]] = 'relu', + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + batch_norm_first: bool = True, + **kwargs): + """Initializes a BottleneckResidualInner. + + Args: + filters: An `int` number of filters for first 2 convolutions. Last Last, + and thus the number of output channels from the bottlneck block is + `4*filters` + strides: An `int` of stride size for convolution for the residual block. + kernel_initializer: A `str` or `tf.keras.initializers.Initializer` + instance for convolutional layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` for Conv2D. + activation: A `str` or `callable` instance of the activation function. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + batch_norm_first: A `bool` of whether to apply activation and batch norm + before conv. + **kwargs: Additional keyword arguments to be passed. + """ + super(BottleneckResidualInner, self).__init__(**kwargs) + + self.strides = strides + self.filters = filters + self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) + self._kernel_regularizer = kernel_regularizer + self._activation = tf.keras.activations.get(activation) + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._batch_norm_first = batch_norm_first + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation_fn = tf_utils.get_activation(activation) + + def build(self, input_shape: tf.TensorShape): + if self._batch_norm_first: + self._batch_norm_0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._conv2d_1 = tf.keras.layers.Conv2D( + filters=self.filters, + kernel_size=1, + strides=self.strides, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer) + self._batch_norm_1 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._conv2d_2 = tf.keras.layers.Conv2D( + filters=self.filters, + kernel_size=3, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer) + self._batch_norm_2 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + self._conv2d_3 = tf.keras.layers.Conv2D( + filters=self.filters * 4, + kernel_size=1, + strides=1, + use_bias=False, + padding='same', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer) + + super(BottleneckResidualInner, self).build(input_shape) + + def get_config(self) -> Dict[str, Any]: + config = { + 'filters': self.filters, + 'strides': self.strides, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'batch_norm_first': self._batch_norm_first, + } + base_config = super(BottleneckResidualInner, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call( + self, inputs: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor: + x = inputs + if self._batch_norm_first: + x = self._batch_norm_0(x, training=training) + x = self._activation_fn(x) + x = self._conv2d_1(x) + + x = self._batch_norm_1(x, training=training) + x = self._activation_fn(x) + x = self._conv2d_2(x) + + x = self._batch_norm_2(x, training=training) + x = self._activation_fn(x) + x = self._conv2d_3(x) + + return x + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ReversibleLayer(tf.keras.layers.Layer): + """Creates a reversible layer. + + Computes y1 = x1 + f(x2), y2 = x2 + g(y1), where f and g can be arbitrary + layers that are stateless, which in this case are `ResidualInner` layers. + """ + + def __init__(self, + f: tf.keras.layers.Layer, + g: tf.keras.layers.Layer, + manual_grads: bool = True, + **kwargs): + """Initializes a ReversibleLayer. + + Args: + f: A `tf.keras.layers.Layer` instance of `f` inner block referred to in + paper. Each reversible layer consists of two inner functions. For + example, in RevNet the reversible residual consists of two f/g inner + (bottleneck) residual functions. Where the input to the reversible layer + is x, the input gets partitioned in the channel dimension and the + forward pass follows (eq8): x = [x1; x2], z1 = x1 + f(x2), y2 = x2 + + g(z1), y1 = stop_gradient(z1). + g: A `tf.keras.layers.Layer` instance of `g` inner block referred to in + paper. Detailed explanation same as above as `f` arg. + manual_grads: A `bool` [Testing Only] of whether to manually take + gradients as in Algorithm 1 or defer to autograd. + **kwargs: Additional keyword arguments to be passed. + """ + super(ReversibleLayer, self).__init__(**kwargs) + + self._f = f + self._g = g + self._manual_grads = manual_grads + + if tf.keras.backend.image_data_format() == 'channels_last': + self._axis = -1 + else: + self._axis = 1 + + def get_config(self) -> Dict[str, Any]: + config = { + 'f': self._f, + 'g': self._g, + 'manual_grads': self._manual_grads, + } + base_config = super(ReversibleLayer, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _ckpt_non_trainable_vars(self): + self._f_non_trainable_vars = [ + v.read_value() for v in self._f.non_trainable_variables] + self._g_non_trainable_vars = [ + v.read_value() for v in self._g.non_trainable_variables] + + def _load_ckpt_non_trainable_vars(self): + for v, v_chkpt in zip( + self._f.non_trainable_variables, self._f_non_trainable_vars): + v.assign(v_chkpt) + for v, v_chkpt in zip( + self._g.non_trainable_variables, self._g_non_trainable_vars): + v.assign(v_chkpt) + + def call( + self, inputs: tf.Tensor, training: Optional[bool] = None) -> tf.Tensor: + + @tf.custom_gradient + def reversible( + x: tf.Tensor + ) -> Tuple[tf.Tensor, Callable[[Any], Tuple[List[tf.Tensor], + List[tf.Tensor]]]]: + """Implements Algorithm 1 in the RevNet paper. + + Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse. + The Reversible Residual Network: Backpropagation Without Storing + Activations. + (https://arxiv.org/pdf/1707.04585.pdf) + + Args: + x: An input `tf.Tensor. + + Returns: + y: The output [y1; y2] in Algorithm 1. + grad_fn: A callable function that computes the gradients. + """ + with tf.GradientTape() as fwdtape: + fwdtape.watch(x) + x1, x2 = tf.split(x, num_or_size_splits=2, axis=self._axis) + f_x2 = self._f(x2, training=training) + x1_down = _maybe_downsample( + x1, f_x2.shape[self._axis], self._f.strides, self._axis) + z1 = f_x2 + x1_down + g_z1 = self._g(z1, training=training) + x2_down = _maybe_downsample( + x2, g_z1.shape[self._axis], self._f.strides, self._axis) + y2 = x2_down + g_z1 + + # Equation 8: https://arxiv.org/pdf/1707.04585.pdf + # Decouple y1 and z1 so that their derivatives are different. + y1 = tf.identity(z1) + y = tf.concat([y1, y2], axis=self._axis) + + irreversible = ( + (self._f.strides != 1 or self._g.strides != 1) + or (y.shape[self._axis] != inputs.shape[self._axis])) + + # Checkpointing moving mean/variance for batch normalization layers + # as they shouldn't be updated during the custom gradient pass of f/g. + self._ckpt_non_trainable_vars() + + def grad_fn(dy: tf.Tensor, + variables: Optional[List[tf.Variable]] = None, + ) -> Tuple[List[tf.Tensor], List[tf.Tensor]]: + """Given dy calculate (dy/dx)|_{x_{input}} using f/g.""" + if irreversible or not self._manual_grads: + grads_combined = fwdtape.gradient( + y, [x] + variables, output_gradients=dy) + dx = grads_combined[0] + grad_vars = grads_combined[1:] + else: + y1_nograd = tf.stop_gradient(y1) + y2_nograd = tf.stop_gradient(y2) + dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=self._axis) + + # Index mapping from self.f/g.trainable_variables to grad_fn + # input `variables` kwarg so that we can reorder dwf + dwg + # variable gradient list to match `variables` order. + f_var_refs = [v.ref() for v in self._f.trainable_variables] + g_var_refs = [v.ref() for v in self._g.trainable_variables] + fg_var_refs = f_var_refs + g_var_refs + self_to_var_index = [fg_var_refs.index(v.ref()) for v in variables] + + # Algorithm 1 in paper (line # documented in-line) + z1 = y1_nograd # line 2 + with tf.GradientTape() as gtape: + gtape.watch(z1) + g_z1 = self._g(z1, training=training) + x2 = y2_nograd - g_z1 # line 3 + + with tf.GradientTape() as ftape: + ftape.watch(x2) + f_x2 = self._f(x2, training=training) + x1 = z1 - f_x2 # pylint: disable=unused-variable # line 4 + + # Compute gradients + g_grads_combined = gtape.gradient( + g_z1, + [z1] + self._g.trainable_variables, + output_gradients=dy2) + dz1 = dy1 + g_grads_combined[0] # line 5 + dwg = g_grads_combined[1:] # line 9 + + f_grads_combined = ftape.gradient( + f_x2, + [x2] + self._f.trainable_variables, + output_gradients=dz1) + dx2 = dy2 + f_grads_combined[0] # line 6 + dwf = f_grads_combined[1:] # line 8 + dx1 = dz1 # line 7 + + # Pack the input and variable gradients. + dx = tf.concat([dx1, dx2], axis=self._axis) + grad_vars = dwf + dwg + # Reorder gradients (trainable_variables to variables kwarg order) + grad_vars = [grad_vars[i] for i in self_to_var_index] + + # Restore batch normalization moving mean/variance for correctness. + self._load_ckpt_non_trainable_vars() + + return dx, grad_vars # grad_fn end + + return y, grad_fn # reversible end + + activations = reversible(inputs) + return activations + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class DepthwiseSeparableConvBlock(tf.keras.layers.Layer): + """Creates an depthwise separable convolution block with batch normalization.""" + + def __init__( + self, + filters: int, + kernel_size: int = 3, + strides: int = 1, + regularize_depthwise=False, + activation: Text = 'relu6', + kernel_initializer: Text = 'VarianceScaling', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + dilation_rate: int = 1, + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + **kwargs): + """Initializes a convolution block with batch normalization. + + Args: + filters: An `int` number of filters for the first two convolutions. Note + that the third and final convolution will use 4 times as many filters. + kernel_size: An `int` that specifies the height and width of the 2D + convolution window. + strides: An `int` of block stride. If greater than 1, this block will + ultimately downsample the input. + regularize_depthwise: A `bool`. If Ture, apply regularization on + depthwise. + activation: A `str` name of the activation function. + kernel_initializer: A `str` of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + dilation_rate: An `int` or tuple/list of 2 `int`, specifying the dilation + rate to use for dilated convolution. Can be a single integer to specify + the same value for all spatial dimensions. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + super(DepthwiseSeparableConvBlock, self).__init__(**kwargs) + self._filters = filters + self._kernel_size = kernel_size + self._strides = strides + self._activation = activation + self._regularize_depthwise = regularize_depthwise + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._dilation_rate = dilation_rate + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation_fn = tf_utils.get_activation(activation) + if regularize_depthwise: + self._depthsize_regularizer = kernel_regularizer + else: + self._depthsize_regularizer = None + + def get_config(self): + config = { + 'filters': self._filters, + 'strides': self._strides, + 'regularize_depthwise': self._regularize_depthwise, + 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + base_config = super(DepthwiseSeparableConvBlock, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + + self._dwconv0 = tf.keras.layers.DepthwiseConv2D( + kernel_size=self._kernel_size, + strides=self._strides, + padding='same', + depth_multiplier=1, + dilation_rate=self._dilation_rate, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._depthsize_regularizer, + use_bias=False) + self._norm0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._conv1 = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=1, + strides=1, + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer) + self._norm1 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + super(DepthwiseSeparableConvBlock, self).build(input_shape) + + def call(self, inputs, training=None): + x = self._dwconv0(inputs) + x = self._norm0(x) + x = self._activation_fn(x) + + x = self._conv1(x) + x = self._norm1(x) + return self._activation_fn(x) diff --git a/official/vision/beta/modeling/layers/nn_blocks_3d.py b/official/vision/beta/modeling/layers/nn_blocks_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..cf377530af094810757bea43dfaef83264e4b36b --- /dev/null +++ b/official/vision/beta/modeling/layers/nn_blocks_3d.py @@ -0,0 +1,286 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains common building blocks for 3D networks.""" +# Import libraries +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.modeling.layers import nn_layers + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SelfGating(tf.keras.layers.Layer): + """Feature gating as used in S3D-G. + + This implements the S3D-G network from: + Saining Xie, Chen Sun, Jonathan Huang, Zhuowen Tu, Kevin Murphy. + Rethinking Spatiotemporal Feature Learning: Speed-Accuracy Trade-offs in Video + Classification. + (https://arxiv.org/pdf/1712.04851.pdf) + """ + + def __init__(self, filters, **kwargs): + """Initializes a self-gating layer. + + Args: + filters: An `int` number of filters for the convolutional layer. + **kwargs: Additional keyword arguments to be passed. + """ + super(SelfGating, self).__init__(**kwargs) + self._filters = filters + + def build(self, input_shape): + self._spatial_temporal_average = tf.keras.layers.GlobalAveragePooling3D() + + # No BN and activation after conv. + self._transformer_w = tf.keras.layers.Conv3D( + filters=self._filters, + kernel_size=[1, 1, 1], + use_bias=True, + kernel_initializer=tf.keras.initializers.TruncatedNormal( + mean=0.0, stddev=0.01)) + + super(SelfGating, self).build(input_shape) + + def call(self, inputs): + x = self._spatial_temporal_average(inputs) + + x = tf.expand_dims(x, 1) + x = tf.expand_dims(x, 2) + x = tf.expand_dims(x, 3) + + x = self._transformer_w(x) + x = tf.nn.sigmoid(x) + + return tf.math.multiply(x, inputs) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class BottleneckBlock3D(tf.keras.layers.Layer): + """Creates a 3D bottleneck block.""" + + def __init__(self, + filters, + temporal_kernel_size, + temporal_strides, + spatial_strides, + stochastic_depth_drop_rate=0.0, + se_ratio=None, + use_self_gating=False, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + **kwargs): + """Initializes a 3D bottleneck block with BN after convolutions. + + Args: + filters: An `int` number of filters for the first two convolutions. Note + that the third and final convolution will use 4 times as many filters. + temporal_kernel_size: An `int` of kernel size for the temporal + convolutional layer. + temporal_strides: An `int` of ftemporal stride for the temporal + convolutional layer. + spatial_strides: An `int` of spatial stride for the spatial convolutional + layer. + stochastic_depth_drop_rate: A `float` or None. If not None, drop rate for + the stochastic depth layer. + se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer. + use_self_gating: A `bool` of whether to apply self-gating module or not. + kernel_initializer: A `str` of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d. + Default to None. + activation: A `str` name of the activation function. + use_sync_bn: A `bool`. If True, use synchronized batch normalization. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + **kwargs: Additional keyword arguments to be passed. + """ + super(BottleneckBlock3D, self).__init__(**kwargs) + + self._filters = filters + self._temporal_kernel_size = temporal_kernel_size + self._spatial_strides = spatial_strides + self._temporal_strides = temporal_strides + self._stochastic_depth_drop_rate = stochastic_depth_drop_rate + self._use_self_gating = use_self_gating + self._se_ratio = se_ratio + self._use_sync_bn = use_sync_bn + self._activation = activation + self._kernel_initializer = kernel_initializer + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation_fn = tf_utils.get_activation(activation) + + def build(self, input_shape): + self._shortcut_maxpool = tf.keras.layers.MaxPool3D( + pool_size=[1, 1, 1], + strides=[ + self._temporal_strides, self._spatial_strides, self._spatial_strides + ]) + + self._shortcut_conv = tf.keras.layers.Conv3D( + filters=4 * self._filters, + kernel_size=1, + strides=[ + self._temporal_strides, self._spatial_strides, self._spatial_strides + ], + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._temporal_conv = tf.keras.layers.Conv3D( + filters=self._filters, + kernel_size=[self._temporal_kernel_size, 1, 1], + strides=[self._temporal_strides, 1, 1], + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm1 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._spatial_conv = tf.keras.layers.Conv3D( + filters=self._filters, + kernel_size=[1, 3, 3], + strides=[1, self._spatial_strides, self._spatial_strides], + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm2 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + self._expand_conv = tf.keras.layers.Conv3D( + filters=4 * self._filters, + kernel_size=[1, 1, 1], + strides=[1, 1, 1], + padding='same', + use_bias=False, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + self._norm3 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon) + + if self._se_ratio and self._se_ratio > 0 and self._se_ratio <= 1: + self._squeeze_excitation = nn_layers.SqueezeExcitation( + in_filters=self._filters * 4, + out_filters=self._filters * 4, + se_ratio=self._se_ratio, + use_3d_input=True, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + else: + self._squeeze_excitation = None + + if self._stochastic_depth_drop_rate: + self._stochastic_depth = nn_layers.StochasticDepth( + self._stochastic_depth_drop_rate) + else: + self._stochastic_depth = None + + if self._use_self_gating: + self._self_gating = SelfGating(filters=4 * self._filters) + else: + self._self_gating = None + + super(BottleneckBlock3D, self).build(input_shape) + + def get_config(self): + config = { + 'filters': self._filters, + 'temporal_kernel_size': self._temporal_kernel_size, + 'temporal_strides': self._temporal_strides, + 'spatial_strides': self._spatial_strides, + 'use_self_gating': self._use_self_gating, + 'se_ratio': self._se_ratio, + 'stochastic_depth_drop_rate': self._stochastic_depth_drop_rate, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + base_config = super(BottleneckBlock3D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs, training=None): + in_filters = inputs.shape.as_list()[-1] + if in_filters == 4 * self._filters: + if self._temporal_strides == 1 and self._spatial_strides == 1: + shortcut = inputs + else: + shortcut = self._shortcut_maxpool(inputs) + else: + shortcut = self._shortcut_conv(inputs) + shortcut = self._norm0(shortcut) + + x = self._temporal_conv(inputs) + x = self._norm1(x) + x = self._activation_fn(x) + + x = self._spatial_conv(x) + x = self._norm2(x) + x = self._activation_fn(x) + + x = self._expand_conv(x) + x = self._norm3(x) + + # Apply self-gating, SE, stochastic depth. + if self._self_gating: + x = self._self_gating(x) + if self._squeeze_excitation: + x = self._squeeze_excitation(x) + if self._stochastic_depth: + x = self._stochastic_depth(x, training=training) + + # Apply activation before additional modules. + x = self._activation_fn(x + shortcut) + + return x diff --git a/official/vision/beta/modeling/layers/nn_blocks_3d_test.py b/official/vision/beta/modeling/layers/nn_blocks_3d_test.py new file mode 100644 index 0000000000000000000000000000000000000000..189c0e7cb7df4710b2e465af476ce43952097ca7 --- /dev/null +++ b/official/vision/beta/modeling/layers/nn_blocks_3d_test.py @@ -0,0 +1,59 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for resnet.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.layers import nn_blocks_3d + + +class NNBlocksTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (nn_blocks_3d.BottleneckBlock3D, 1, 1, 2, True, 0.2, 0.1), + (nn_blocks_3d.BottleneckBlock3D, 3, 2, 1, False, 0.0, 0.0), + ) + def test_bottleneck_block_creation(self, block_fn, temporal_kernel_size, + temporal_strides, spatial_strides, + use_self_gating, se_ratio, + stochastic_depth): + temporal_size = 16 + spatial_size = 128 + filters = 256 + inputs = tf.keras.Input( + shape=(temporal_size, spatial_size, spatial_size, filters * 4), + batch_size=1) + block = block_fn( + filters=filters, + temporal_kernel_size=temporal_kernel_size, + temporal_strides=temporal_strides, + spatial_strides=spatial_strides, + use_self_gating=use_self_gating, + se_ratio=se_ratio, + stochastic_depth_drop_rate=stochastic_depth) + + features = block(inputs) + + self.assertAllEqual([ + 1, temporal_size // temporal_strides, spatial_size // spatial_strides, + spatial_size // spatial_strides, filters * 4 + ], features.shape.as_list()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/layers/nn_blocks_test.py b/official/vision/beta/modeling/layers/nn_blocks_test.py new file mode 100644 index 0000000000000000000000000000000000000000..07e7ad16147f696782d5efbb662f9465bcdb7243 --- /dev/null +++ b/official/vision/beta/modeling/layers/nn_blocks_test.py @@ -0,0 +1,307 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for nn_blocks.""" + +from typing import Any, Iterable, Tuple +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.modeling.layers import nn_blocks + + +def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]: + """Returns the combinations of end-to-end tests to run.""" + return combinations.combine( + distribution=[ + strategy_combinations.default_strategy, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + ) + + +class NNBlocksTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (nn_blocks.ResidualBlock, 1, False, 0.0, None), + (nn_blocks.ResidualBlock, 2, True, 0.2, 0.25), + ) + def test_residual_block_creation(self, block_fn, strides, use_projection, + stochastic_depth_drop_rate, se_ratio): + input_size = 128 + filter_size = 256 + inputs = tf.keras.Input( + shape=(input_size, input_size, filter_size), batch_size=1) + block = block_fn( + filter_size, + strides, + use_projection=use_projection, + se_ratio=se_ratio, + stochastic_depth_drop_rate=stochastic_depth_drop_rate, + ) + + features = block(inputs) + + self.assertAllEqual( + [1, input_size // strides, input_size // strides, filter_size], + features.shape.as_list()) + + @parameterized.parameters( + (nn_blocks.BottleneckBlock, 1, False, 0.0, None), + (nn_blocks.BottleneckBlock, 2, True, 0.2, 0.25), + ) + def test_bottleneck_block_creation(self, block_fn, strides, use_projection, + stochastic_depth_drop_rate, se_ratio): + input_size = 128 + filter_size = 256 + inputs = tf.keras.Input( + shape=(input_size, input_size, filter_size * 4), batch_size=1) + block = block_fn( + filter_size, + strides, + use_projection=use_projection, + se_ratio=se_ratio, + stochastic_depth_drop_rate=stochastic_depth_drop_rate) + + features = block(inputs) + + self.assertAllEqual( + [1, input_size // strides, input_size // strides, filter_size * 4], + features.shape.as_list()) + + @parameterized.parameters( + (nn_blocks.InvertedBottleneckBlock, 1, 1, None, None), + (nn_blocks.InvertedBottleneckBlock, 6, 1, None, None), + (nn_blocks.InvertedBottleneckBlock, 1, 2, None, None), + (nn_blocks.InvertedBottleneckBlock, 1, 1, 0.2, None), + (nn_blocks.InvertedBottleneckBlock, 1, 1, None, 0.2), + ) + def test_invertedbottleneck_block_creation( + self, block_fn, expand_ratio, strides, se_ratio, + stochastic_depth_drop_rate): + input_size = 128 + in_filters = 24 + out_filters = 40 + inputs = tf.keras.Input( + shape=(input_size, input_size, in_filters), batch_size=1) + block = block_fn( + in_filters=in_filters, + out_filters=out_filters, + expand_ratio=expand_ratio, + strides=strides, + se_ratio=se_ratio, + stochastic_depth_drop_rate=stochastic_depth_drop_rate) + + features = block(inputs) + + self.assertAllEqual( + [1, input_size // strides, input_size // strides, out_filters], + features.shape.as_list()) + + +class ResidualInnerTest(parameterized.TestCase, tf.test.TestCase): + + @combinations.generate(distribution_strategy_combinations()) + def test_shape(self, distribution): + bsz, h, w, c = 8, 32, 32, 32 + filters = 64 + strides = 2 + + input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) + with distribution.scope(): + test_layer = nn_blocks.ResidualInner(filters, strides) + + output = test_layer(input_tensor) + expected_output_shape = [bsz, h // strides, w // strides, filters] + self.assertEqual(expected_output_shape, output.shape.as_list()) + + +class BottleneckResidualInnerTest(parameterized.TestCase, tf.test.TestCase): + + @combinations.generate(distribution_strategy_combinations()) + def test_shape(self, distribution): + bsz, h, w, c = 8, 32, 32, 32 + filters = 64 + strides = 2 + + input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) + with distribution.scope(): + test_layer = nn_blocks.BottleneckResidualInner(filters, strides) + + output = test_layer(input_tensor) + expected_output_shape = [bsz, h // strides, w // strides, filters * 4] + self.assertEqual(expected_output_shape, output.shape.as_list()) + + +class ReversibleLayerTest(parameterized.TestCase, tf.test.TestCase): + + @combinations.generate(distribution_strategy_combinations()) + def test_downsampling_non_reversible_step(self, distribution): + bsz, h, w, c = 8, 32, 32, 32 + filters = 64 + strides = 2 + + input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) + with distribution.scope(): + f = nn_blocks.ResidualInner( + filters=filters // 2, + strides=strides, + batch_norm_first=True) + g = nn_blocks.ResidualInner( + filters=filters // 2, + strides=1, + batch_norm_first=True) + test_layer = nn_blocks.ReversibleLayer(f, g) + test_layer.build(input_tensor.shape) + optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) + + @tf.function + def step_fn(): + with tf.GradientTape() as tape: + output = test_layer(input_tensor, training=True) + grads = tape.gradient(output, test_layer.trainable_variables) + # Test applying gradients with optimizer works + optimizer.apply_gradients(zip(grads, test_layer.trainable_variables)) + + return output + + replica_output = distribution.run(step_fn) + outputs = distribution.experimental_local_results(replica_output) + + # Assert forward pass shape + expected_output_shape = [bsz, h // strides, w // strides, filters] + for output in outputs: + self.assertEqual(expected_output_shape, output.shape.as_list()) + + @combinations.generate(distribution_strategy_combinations()) + def test_reversible_step(self, distribution): + # Reversible layers satisfy: (a) strides = 1 (b) in_filter = out_filter + bsz, h, w, c = 8, 32, 32, 32 + filters = c + strides = 1 + + input_tensor = tf.random.uniform(shape=[bsz, h, w, c]) + with distribution.scope(): + f = nn_blocks.ResidualInner( + filters=filters // 2, + strides=strides, + batch_norm_first=False) + g = nn_blocks.ResidualInner( + filters=filters // 2, + strides=1, + batch_norm_first=False) + test_layer = nn_blocks.ReversibleLayer(f, g) + test_layer(input_tensor, training=False) # init weights + optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) + + @tf.function + def step_fn(): + with tf.GradientTape() as tape: + output = test_layer(input_tensor, training=True) + grads = tape.gradient(output, test_layer.trainable_variables) + # Test applying gradients with optimizer works + optimizer.apply_gradients(zip(grads, test_layer.trainable_variables)) + + return output + + @tf.function + def fwd(): + test_layer(input_tensor) + + distribution.run(fwd) # Initialize variables + prev_variables = tf.identity_n(test_layer.trainable_variables) + replica_output = distribution.run(step_fn) + outputs = distribution.experimental_local_results(replica_output) + + # Assert variables values have changed values + for v0, v1 in zip(prev_variables, test_layer.trainable_variables): + self.assertNotAllEqual(v0, v1) + + # Assert forward pass shape + expected_output_shape = [bsz, h // strides, w // strides, filters] + for output in outputs: + self.assertEqual(expected_output_shape, output.shape.as_list()) + + @combinations.generate(distribution_strategy_combinations()) + def test_manual_gradients_correctness(self, distribution): + bsz, h, w, c = 8, 32, 32, 32 + filters = c + strides = 1 + + input_tensor = tf.random.uniform(shape=[bsz, h, w, c * 4]) # bottleneck + with distribution.scope(): + f_manual = nn_blocks.BottleneckResidualInner( + filters=filters // 2, + strides=strides, + batch_norm_first=False) + g_manual = nn_blocks.BottleneckResidualInner( + filters=filters // 2, + strides=1, + batch_norm_first=False) + manual_grad_layer = nn_blocks.ReversibleLayer(f_manual, g_manual) + manual_grad_layer(input_tensor, training=False) # init weights + + f_auto = nn_blocks.BottleneckResidualInner( + filters=filters // 2, + strides=strides, + batch_norm_first=False) + g_auto = nn_blocks.BottleneckResidualInner( + filters=filters // 2, + strides=1, + batch_norm_first=False) + auto_grad_layer = nn_blocks.ReversibleLayer( + f_auto, g_auto, manual_grads=False) + auto_grad_layer(input_tensor) # init weights + # Clone all weights (tf.keras.layers.Layer has no .clone()) + auto_grad_layer._f.set_weights(manual_grad_layer._f.get_weights()) + auto_grad_layer._g.set_weights(manual_grad_layer._g.get_weights()) + + @tf.function + def manual_fn(): + with tf.GradientTape() as tape: + output = manual_grad_layer(input_tensor, training=True) + grads = tape.gradient(output, manual_grad_layer.trainable_variables) + return grads + + @tf.function + def auto_fn(): + with tf.GradientTape() as tape: + output = auto_grad_layer(input_tensor, training=True) + grads = tape.gradient(output, auto_grad_layer.trainable_variables) + return grads + + manual_grads = distribution.run(manual_fn) + auto_grads = distribution.run(auto_fn) + + # Assert gradients calculated manually are close to that from autograd + for manual_grad, auto_grad in zip(manual_grads, auto_grads): + self.assertAllClose( + distribution.experimental_local_results(manual_grad), + distribution.experimental_local_results(auto_grad), + atol=5e-3, rtol=5e-3) + + # Verify that BN moving mean and variance is correct. + for manual_var, auto_var in zip( + manual_grad_layer.non_trainable_variables, + auto_grad_layer.non_trainable_variables): + self.assertAllClose(manual_var, auto_var) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/layers/nn_layers.py b/official/vision/beta/modeling/layers/nn_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..f44b17a25ca899020da353f0e6b9bcf93a7974bc --- /dev/null +++ b/official/vision/beta/modeling/layers/nn_layers.py @@ -0,0 +1,887 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains common building blocks for neural networks.""" + +from typing import Callable, Dict, List, Optional, Tuple, Union + +from absl import logging +import tensorflow as tf + +from official.modeling import tf_utils + + +# Type annotations. +States = Dict[str, tf.Tensor] +Activation = Union[str, Callable] + + +def make_divisible(value: float, + divisor: int, + min_value: Optional[float] = None + ) -> int: + """This is to ensure that all layers have channels that are divisible by 8. + + Args: + value: A `float` of original value. + divisor: An `int` off the divisor that need to be checked upon. + min_value: A `float` of minimum value threshold. + + Returns: + The adjusted value in `int` that is divisible against divisor. + """ + if min_value is None: + min_value = divisor + new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_value < 0.9 * value: + new_value += divisor + return new_value + + +def round_filters(filters: int, + multiplier: float, + divisor: int = 8, + min_depth: Optional[int] = None, + skip: bool = False): + """Rounds number of filters based on width multiplier.""" + orig_f = filters + if skip or not multiplier: + return filters + + new_filters = make_divisible(value=filters * multiplier, + divisor=divisor, + min_value=min_depth) + + logging.info('round_filter input=%s output=%s', orig_f, new_filters) + return int(new_filters) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SqueezeExcitation(tf.keras.layers.Layer): + """Creates a squeeze and excitation layer.""" + + def __init__(self, + in_filters, + out_filters, + se_ratio, + divisible_by=1, + use_3d_input=False, + kernel_initializer='VarianceScaling', + kernel_regularizer=None, + bias_regularizer=None, + activation='relu', + gating_activation='sigmoid', + **kwargs): + """Initializes a squeeze and excitation layer. + + Args: + in_filters: An `int` number of filters of the input tensor. + out_filters: An `int` number of filters of the output tensor. + se_ratio: A `float` or None. If not None, se ratio for the squeeze and + excitation layer. + divisible_by: An `int` that ensures all inner dimensions are divisible by + this number. + use_3d_input: A `bool` of whether input is 2D or 3D image. + kernel_initializer: A `str` of kernel_initializer for convolutional + layers. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default to None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d. + Default to None. + activation: A `str` name of the activation function. + gating_activation: A `str` name of the activation function for final + gating function. + **kwargs: Additional keyword arguments to be passed. + """ + super(SqueezeExcitation, self).__init__(**kwargs) + + self._in_filters = in_filters + self._out_filters = out_filters + self._se_ratio = se_ratio + self._divisible_by = divisible_by + self._use_3d_input = use_3d_input + self._activation = activation + self._gating_activation = gating_activation + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + if tf.keras.backend.image_data_format() == 'channels_last': + if not use_3d_input: + self._spatial_axis = [1, 2] + else: + self._spatial_axis = [1, 2, 3] + else: + if not use_3d_input: + self._spatial_axis = [2, 3] + else: + self._spatial_axis = [2, 3, 4] + self._activation_fn = tf_utils.get_activation(activation) + self._gating_activation_fn = tf_utils.get_activation(gating_activation) + + def build(self, input_shape): + num_reduced_filters = make_divisible( + max(1, int(self._in_filters * self._se_ratio)), + divisor=self._divisible_by) + + self._se_reduce = tf.keras.layers.Conv2D( + filters=num_reduced_filters, + kernel_size=1, + strides=1, + padding='same', + use_bias=True, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + + self._se_expand = tf.keras.layers.Conv2D( + filters=self._out_filters, + kernel_size=1, + strides=1, + padding='same', + use_bias=True, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + + super(SqueezeExcitation, self).build(input_shape) + + def get_config(self): + config = { + 'in_filters': self._in_filters, + 'out_filters': self._out_filters, + 'se_ratio': self._se_ratio, + 'divisible_by': self._divisible_by, + 'use_3d_input': self._use_3d_input, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'activation': self._activation, + 'gating_activation': self._gating_activation, + } + base_config = super(SqueezeExcitation, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs): + x = tf.reduce_mean(inputs, self._spatial_axis, keepdims=True) + x = self._activation_fn(self._se_reduce(x)) + x = self._gating_activation_fn(self._se_expand(x)) + return x * inputs + + +def get_stochastic_depth_rate(init_rate, i, n): + """Get drop connect rate for the ith block. + + Args: + init_rate: A `float` of initial drop rate. + i: An `int` of order of the current block. + n: An `int` total number of blocks. + + Returns: + Drop rate of the ith block. + """ + if init_rate is not None: + if init_rate < 0 or init_rate > 1: + raise ValueError('Initial drop rate must be within 0 and 1.') + rate = init_rate * float(i) / n + else: + rate = None + return rate + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class StochasticDepth(tf.keras.layers.Layer): + """Creates a stochastic depth layer.""" + + def __init__(self, stochastic_depth_drop_rate, **kwargs): + """Initializes a stochastic depth layer. + + Args: + stochastic_depth_drop_rate: A `float` of drop rate. + **kwargs: Additional keyword arguments to be passed. + + Returns: + A output `tf.Tensor` of which should have the same shape as input. + """ + super(StochasticDepth, self).__init__(**kwargs) + self._drop_rate = stochastic_depth_drop_rate + + def get_config(self): + config = {'drop_rate': self._drop_rate} + base_config = super(StochasticDepth, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs, training=None): + if training is None: + training = tf.keras.backend.learning_phase() + if not training or self._drop_rate is None or self._drop_rate == 0: + return inputs + + keep_prob = 1.0 - self._drop_rate + batch_size = tf.shape(inputs)[0] + random_tensor = keep_prob + random_tensor += tf.random.uniform( + [batch_size] + [1] * (inputs.shape.rank - 1), dtype=inputs.dtype) + binary_tensor = tf.floor(random_tensor) + output = tf.math.divide(inputs, keep_prob) * binary_tensor + return output + + +@tf.keras.utils.register_keras_serializable(package='Vision') +def pyramid_feature_fusion(inputs, target_level): + """Fuses all feature maps in the feature pyramid at the target level. + + Args: + inputs: A dictionary containing the feature pyramid. The size of the input + tensor needs to be fixed. + target_level: An `int` of the target feature level for feature fusion. + + Returns: + A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width, + feature_channel]. + """ + # Convert keys to int. + pyramid_feats = {int(k): v for k, v in inputs.items()} + min_level = min(pyramid_feats.keys()) + max_level = max(pyramid_feats.keys()) + resampled_feats = [] + + for l in range(min_level, max_level + 1): + if l == target_level: + resampled_feats.append(pyramid_feats[l]) + else: + feat = pyramid_feats[l] + target_size = list(feat.shape[1:3]) + target_size[0] *= 2**(l - target_level) + target_size[1] *= 2**(l - target_level) + # Casts feat to float32 so the resize op can be run on TPU. + feat = tf.cast(feat, tf.float32) + feat = tf.image.resize( + feat, size=target_size, method=tf.image.ResizeMethod.BILINEAR) + # Casts it back to be compatible with the rest opetations. + feat = tf.cast(feat, pyramid_feats[l].dtype) + resampled_feats.append(feat) + + return tf.math.add_n(resampled_feats) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class Scale(tf.keras.layers.Layer): + """Scales the input by a trainable scalar weight. + + This is useful for applying ReZero to layers, which improves convergence + speed. This implements the paper: + + Thomas Bachlechner, Bodhisattwa Prasad Majumder, Huanru Henry Mao, + Garrison W. Cottrell, Julian McAuley. + ReZero is All You Need: Fast Convergence at Large Depth. + (https://arxiv.org/pdf/2003.04887.pdf). + """ + + def __init__( + self, + initializer: tf.keras.initializers.Initializer = 'ones', + regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Initializes a scale layer. + + Args: + initializer: A `str` of initializer for the scalar weight. + regularizer: A `tf.keras.regularizers.Regularizer` for the scalar weight. + **kwargs: Additional keyword arguments to be passed to this layer. + + Returns: + An `tf.Tensor` of which should have the same shape as input. + """ + super(Scale, self).__init__(**kwargs) + + self._initializer = initializer + self._regularizer = regularizer + + self._scale = self.add_weight( + name='scale', + shape=[], + dtype=self.dtype, + initializer=self._initializer, + regularizer=self._regularizer, + trainable=True) + + def get_config(self): + """Returns a dictionary containing the config used for initialization.""" + config = { + 'initializer': self._initializer, + 'regularizer': self._regularizer, + } + base_config = super(Scale, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs): + """Calls the layer with the given inputs.""" + scale = tf.cast(self._scale, inputs.dtype) + return scale * inputs + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class TemporalSoftmaxPool(tf.keras.layers.Layer): + """Creates a network layer corresponding to temporal softmax pooling. + + This is useful for multi-class logits (used in e.g., Charades). Modified from + AssembleNet Charades evaluation from: + + Michael S. Ryoo, AJ Piergiovanni, Mingxing Tan, Anelia Angelova. + AssembleNet: Searching for Multi-Stream Neural Connectivity in Video + Architectures. + (https://arxiv.org/pdf/1905.13209.pdf). + """ + + def call(self, inputs): + """Calls the layer with the given inputs.""" + assert inputs.shape.rank in (3, 4, 5) + frames = tf.shape(inputs)[1] + pre_logits = inputs / tf.sqrt(tf.cast(frames, inputs.dtype)) + activations = tf.nn.softmax(pre_logits, axis=1) + outputs = inputs * activations + return outputs + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class PositionalEncoding(tf.keras.layers.Layer): + """Creates a network layer that adds a sinusoidal positional encoding. + + Positional encoding is incremented across frames, and is added to the input. + The positional encoding is first weighted at 0 so that the network can choose + to ignore it. This implements: + + Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, + Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin. + Attention Is All You Need. + (https://arxiv.org/pdf/1706.03762.pdf). + """ + + def __init__(self, + initializer: tf.keras.initializers.Initializer = 'zeros', + cache_encoding: bool = False, + **kwargs): + """Initializes positional encoding. + + Args: + initializer: A `str` of initializer for weighting the positional encoding. + cache_encoding: A `bool`. If True, cache the positional encoding tensor + after calling build. Otherwise, rebuild the tensor for every call. + Setting this to False can be useful when we want to input a variable + number of frames, so the positional encoding tensor can change shape. + **kwargs: Additional keyword arguments to be passed to this layer. + + Returns: + A `tf.Tensor` of which should have the same shape as input. + """ + super(PositionalEncoding, self).__init__(**kwargs) + self._initializer = initializer + self._cache_encoding = cache_encoding + self._pos_encoding = None + self._rezero = Scale(initializer=initializer, name='rezero') + + def get_config(self): + """Returns a dictionary containing the config used for initialization.""" + config = { + 'initializer': self._initializer, + 'cache_encoding': self._cache_encoding, + } + base_config = super(PositionalEncoding, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _positional_encoding(self, + num_positions: int, + hidden_size: int, + dtype: tf.DType = tf.float32): + """Creates a sequence of sinusoidal positional encoding vectors. + + Args: + num_positions: An `int` of number of positions (frames). + hidden_size: An `int` of number of channels used for the hidden vectors. + dtype: The dtype of the output tensor. + + Returns: + The positional encoding tensor with shape [num_positions, hidden_size]. + """ + # Calling `tf.range` with `dtype=tf.bfloat16` results in an error, + # so we cast afterward. + positions = tf.cast(tf.range(num_positions)[:, tf.newaxis], dtype) + idx = tf.range(hidden_size)[tf.newaxis, :] + + power = tf.cast(2 * (idx // 2), dtype) + power /= tf.cast(hidden_size, dtype) + angles = 1. / tf.math.pow(10_000., power) + radians = positions * angles + + sin = tf.math.sin(radians[:, 0::2]) + cos = tf.math.cos(radians[:, 1::2]) + pos_encoding = tf.concat([sin, cos], axis=-1) + + return pos_encoding + + def _get_pos_encoding(self, input_shape): + """Calculates the positional encoding from the input shape.""" + frames = input_shape[1] + channels = input_shape[-1] + pos_encoding = self._positional_encoding(frames, channels, dtype=self.dtype) + pos_encoding = tf.reshape(pos_encoding, [1, frames, 1, 1, channels]) + return pos_encoding + + def build(self, input_shape): + """Builds the layer with the given input shape. + + Args: + input_shape: The input shape. + + Raises: + ValueError: If using 'channels_first' data format. + """ + if tf.keras.backend.image_data_format() == 'channels_first': + raise ValueError('"channels_first" mode is unsupported.') + + if self._cache_encoding: + self._pos_encoding = self._get_pos_encoding(input_shape) + + super(PositionalEncoding, self).build(input_shape) + + def call(self, inputs): + """Calls the layer with the given inputs.""" + if self._cache_encoding: + pos_encoding = self._pos_encoding + else: + pos_encoding = self._get_pos_encoding(tf.shape(inputs)) + pos_encoding = tf.cast(pos_encoding, inputs.dtype) + pos_encoding = tf.stop_gradient(pos_encoding) + pos_encoding = self._rezero(pos_encoding) + return inputs + pos_encoding + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class GlobalAveragePool3D(tf.keras.layers.Layer): + """Creates a global average pooling layer with causal mode. + + Implements causal mode, which runs a cumulative sum (with `tf.cumsum`) across + frames in the time dimension, allowing the use of a stream buffer. Sums any + valid input state with the current input to allow state to accumulate over + several iterations. + """ + + def __init__(self, + keepdims: bool = False, + causal: bool = False, + **kwargs): + """Initializes a global average pool layer. + + Args: + keepdims: A `bool`. If True, keep the averaged dimensions. + causal: A `bool` of whether to run in causal mode with a cumulative sum + across frames. + **kwargs: Additional keyword arguments to be passed to this layer. + + Returns: + An output `tf.Tensor`. + """ + super(GlobalAveragePool3D, self).__init__(**kwargs) + + self._keepdims = keepdims + self._causal = causal + + self._frame_count = None + + def get_config(self): + """Returns a dictionary containing the config used for initialization.""" + config = { + 'keepdims': self._keepdims, + 'causal': self._causal, + } + base_config = super(GlobalAveragePool3D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + """Builds the layer with the given input shape.""" + # Here we define strings that will uniquely reference the buffer states + # in the TF graph. These will be used for passing in a mapping of states + # for streaming mode. To do this, we can use a name scope. + with tf.name_scope('buffer') as state_name: + self._state_name = state_name + self._frame_count_name = state_name + '_frame_count' + + super(GlobalAveragePool3D, self).build(input_shape) + + def call(self, + inputs: tf.Tensor, + states: Optional[States] = None, + output_states: bool = True + ) -> Union[tf.Tensor, Tuple[tf.Tensor, States]]: + """Calls the layer with the given inputs. + + Args: + inputs: An input `tf.Tensor`. + states: A `dict` of states such that, if any of the keys match for this + layer, will overwrite the contents of the buffer(s). + output_states: A `bool`. If True, returns the output tensor and output + states. Returns just the output tensor otherwise. + + Returns: + An output `tf.Tensor` (and optionally the states if `output_states=True`). + If `causal=True`, the output tensor will have shape + `[batch_size, num_frames, 1, 1, channels]` if `keepdims=True`. We keep + the frame dimension in this case to simulate a cumulative global average + as if we are inputting one frame at a time. If `causal=False`, the output + is equivalent to `tf.keras.layers.GlobalAveragePooling3D` with shape + `[batch_size, 1, 1, 1, channels]` if `keepdims=True` (plus the optional + buffer stored in `states`). + + Raises: + ValueError: If using 'channels_first' data format. + """ + states = dict(states) if states is not None else {} + + if tf.keras.backend.image_data_format() == 'channels_first': + raise ValueError('"channels_first" mode is unsupported.') + + # Shape: [batch_size, 1, 1, 1, channels] + buffer = states.get(self._state_name, None) + if buffer is None: + buffer = tf.zeros_like(inputs[:, :1, :1, :1], dtype=inputs.dtype) + states[self._state_name] = buffer + + # Keep a count of frames encountered across input iterations in + # num_frames to be able to accurately take a cumulative average across + # all frames when running in streaming mode + num_frames = tf.shape(inputs)[1] + frame_count = states.get(self._frame_count_name, 0) + states[self._frame_count_name] = frame_count + num_frames + + if self._causal: + # Take a mean of spatial dimensions to make computation more efficient. + x = tf.reduce_mean(inputs, axis=[2, 3], keepdims=True) + x = tf.cumsum(x, axis=1) + x = x + buffer + + # The last frame will be the value of the next state + # Shape: [batch_size, 1, 1, 1, channels] + states[self._state_name] = x[:, -1:] + + # In causal mode, the divisor increments by 1 for every frame to + # calculate cumulative averages instead of one global average + mean_divisors = tf.range(num_frames) + frame_count + 1 + mean_divisors = tf.reshape(mean_divisors, [1, num_frames, 1, 1, 1]) + mean_divisors = tf.cast(mean_divisors, x.dtype) + + # Shape: [batch_size, num_frames, 1, 1, channels] + x = x / mean_divisors + else: + # In non-causal mode, we (optionally) sum across frames to take a + # cumulative average across input iterations rather than individual + # frames. If no buffer state is passed, this essentially becomes + # regular global average pooling. + # Shape: [batch_size, 1, 1, 1, channels] + x = tf.reduce_sum(inputs, axis=(1, 2, 3), keepdims=True) + x = x / tf.cast(tf.shape(inputs)[2] * tf.shape(inputs)[3], x.dtype) + x = x + buffer + + # Shape: [batch_size, 1, 1, 1, channels] + states[self._state_name] = x + + x = x / tf.cast(frame_count + num_frames, x.dtype) + + if not self._keepdims: + x = tf.squeeze(x, axis=(1, 2, 3)) + + return (x, states) if output_states else x + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SpatialAveragePool3D(tf.keras.layers.Layer): + """Creates a global average pooling layer pooling across spatial dimentions.""" + + def __init__(self, keepdims: bool = False, **kwargs): + """Initializes a global average pool layer. + + Args: + keepdims: A `bool`. If True, keep the averaged dimensions. + **kwargs: Additional keyword arguments to be passed to this layer. + + Returns: + An output `tf.Tensor`. + """ + super(SpatialAveragePool3D, self).__init__(**kwargs) + self._keepdims = keepdims + + def get_config(self): + """Returns a dictionary containing the config used for initialization.""" + config = { + 'keepdims': self._keepdims, + } + base_config = super(SpatialAveragePool3D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + """Builds the layer with the given input shape.""" + if tf.keras.backend.image_data_format() == 'channels_first': + raise ValueError('"channels_first" mode is unsupported.') + + super(SpatialAveragePool3D, self).build(input_shape) + + def call(self, inputs): + """Calls the layer with the given inputs.""" + if inputs.shape.rank != 5: + raise ValueError( + 'Input should have rank {}, got {}'.format(5, inputs.shape.rank)) + + return tf.reduce_mean(inputs, axis=(2, 3), keepdims=self._keepdims) + + +class CausalConvMixin: + """Mixin class to implement CausalConv for `tf.keras.layers.Conv` layers.""" + + @property + def use_buffered_input(self) -> bool: + return self._use_buffered_input + + @use_buffered_input.setter + def use_buffered_input(self, variable: bool): + self._use_buffered_input = variable + + def _compute_buffered_causal_padding(self, + inputs: Optional[tf.Tensor] = None, + use_buffered_input: bool = False, + time_axis: int = 1) -> List[List[int]]: + """Calculates padding for 'causal' option for conv layers. + + Args: + inputs: An optional input `tf.Tensor` to be padded. + use_buffered_input: A `bool`. If True, use 'valid' padding along the time + dimension. This should be set when applying the stream buffer. + time_axis: An `int` of the axis of the time dimension. + + Returns: + A list of paddings for `tf.pad`. + """ + del inputs + + if tf.keras.backend.image_data_format() == 'channels_first': + raise ValueError('"channels_first" mode is unsupported.') + + kernel_size_effective = [ + (self.kernel_size[i] + + (self.kernel_size[i] - 1) * (self.dilation_rate[i] - 1)) + for i in range(self.rank) + ] + pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)] + pad_beg = [pad_total[i] // 2 for i in range(self.rank)] + pad_end = [pad_total[i] - pad_beg[i] for i in range(self.rank)] + padding = [[pad_beg[i], pad_end[i]] for i in range(self.rank)] + padding = [[0, 0]] + padding + [[0, 0]] + + if use_buffered_input: + padding[time_axis] = [0, 0] + else: + padding[time_axis] = [padding[time_axis][0] + padding[time_axis][1], 0] + return padding + + def _causal_validate_init(self): + """Validates the Conv layer initial configuration.""" + # Overriding this method is meant to circumvent unnecessary errors when + # using causal padding. + if (self.filters is not None + and self.filters % self.groups != 0): + raise ValueError( + 'The number of filters must be evenly divisible by the number of ' + 'groups. Received: groups={}, filters={}'.format( + self.groups, self.filters)) + + if not all(self.kernel_size): + raise ValueError('The argument `kernel_size` cannot contain 0(s). ' + 'Received: %s' % (self.kernel_size,)) + + def _buffered_spatial_output_shape(self, spatial_output_shape: List[int]): + """Computes the spatial output shape from the input shape.""" + # When buffer padding, use 'valid' padding across time. The output shape + # across time should be the input shape minus any padding, assuming + # the stride across time is 1. + if self._use_buffered_input and spatial_output_shape[0] is not None: + padding = self._compute_buffered_causal_padding(use_buffered_input=False) + spatial_output_shape[0] -= sum(padding[1]) + return spatial_output_shape + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class Conv2D(tf.keras.layers.Conv2D, CausalConvMixin): + """Conv2D layer supporting CausalConv. + + Supports `padding='causal'` option (like in `tf.keras.layers.Conv1D`), + which applies causal padding to the temporal dimension, and same padding in + the spatial dimensions. + """ + + def __init__(self, *args, use_buffered_input=False, **kwargs): + """Initializes conv2d. + + Args: + *args: Arguments to be passed. + use_buffered_input: A `bool`. If True, the input is expected to be padded + beforehand. In effect, calling this layer will use 'valid' padding on + the temporal dimension to simulate 'causal' padding. + **kwargs: Additional keyword arguments to be passed. + + Returns: + An output `tf.Tensor` of the Conv2D operation. + """ + super(Conv2D, self).__init__(*args, **kwargs) + self._use_buffered_input = use_buffered_input + + def get_config(self): + """Returns a dictionary containing the config used for initialization.""" + config = { + 'use_buffered_input': self._use_buffered_input, + } + base_config = super(Conv2D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def _compute_causal_padding(self, inputs): + """Computes causal padding dimensions for the given inputs.""" + return self._compute_buffered_causal_padding( + inputs, use_buffered_input=self._use_buffered_input) + + def _validate_init(self): + """Validates the Conv layer initial configuration.""" + self._causal_validate_init() + + def _spatial_output_shape(self, spatial_input_shape: List[int]): + """Computes the spatial output shape from the input shape.""" + shape = super(Conv2D, self)._spatial_output_shape(spatial_input_shape) + return self._buffered_spatial_output_shape(shape) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class DepthwiseConv2D(tf.keras.layers.DepthwiseConv2D, CausalConvMixin): + """DepthwiseConv2D layer supporting CausalConv. + + Supports `padding='causal'` option (like in `tf.keras.layers.Conv1D`), + which applies causal padding to the temporal dimension, and same padding in + the spatial dimensions. + """ + + def __init__(self, *args, use_buffered_input=False, **kwargs): + """Initializes depthwise conv2d. + + Args: + *args: Arguments to be passed. + use_buffered_input: A `bool`. If True, the input is expected to be padded + beforehand. In effect, calling this layer will use 'valid' padding on + the temporal dimension to simulate 'causal' padding. + **kwargs: Additional keyword arguments to be passed. + + Returns: + An output `tf.Tensor` of the DepthwiseConv2D operation. + """ + super(DepthwiseConv2D, self).__init__(*args, **kwargs) + self._use_buffered_input = use_buffered_input + + # Causal padding is unsupported by default for DepthwiseConv2D, + # so we resort to valid padding internally. However, we handle + # causal padding as a special case with `self._is_causal`, which is + # defined by the super class. + if self.padding == 'causal': + self.padding = 'valid' + + def get_config(self): + """Returns a dictionary containing the config used for initialization.""" + config = { + 'use_buffered_input': self._use_buffered_input, + } + base_config = super(DepthwiseConv2D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def call(self, inputs): + """Calls the layer with the given inputs.""" + if self._is_causal: + inputs = tf.pad(inputs, self._compute_causal_padding(inputs)) + return super(DepthwiseConv2D, self).call(inputs) + + def _compute_causal_padding(self, inputs): + """Computes causal padding dimensions for the given inputs.""" + return self._compute_buffered_causal_padding( + inputs, use_buffered_input=self._use_buffered_input) + + def _validate_init(self): + """Validates the Conv layer initial configuration.""" + self._causal_validate_init() + + def _spatial_output_shape(self, spatial_input_shape: List[int]): + """Computes the spatial output shape from the input shape.""" + shape = super(DepthwiseConv2D, self)._spatial_output_shape( + spatial_input_shape) + return self._buffered_spatial_output_shape(shape) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin): + """Conv3D layer supporting CausalConv. + + Supports `padding='causal'` option (like in `tf.keras.layers.Conv1D`), + which applies causal padding to the temporal dimension, and same padding in + the spatial dimensions. + """ + + def __init__(self, *args, use_buffered_input=False, **kwargs): + """Initializes conv3d. + + Args: + *args: Arguments to be passed. + use_buffered_input: A `bool`. If True, the input is expected to be padded + beforehand. In effect, calling this layer will use 'valid' padding on + the temporal dimension to simulate 'causal' padding. + **kwargs: Additional keyword arguments to be passed. + + Returns: + An output `tf.Tensor` of the Conv3D operation. + """ + super(Conv3D, self).__init__(*args, **kwargs) + self._use_buffered_input = use_buffered_input + + def get_config(self): + """Returns a dictionary containing the config used for initialization.""" + config = { + 'use_buffered_input': self._use_buffered_input, + } + base_config = super(Conv3D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + """Builds the layer with the given input shape.""" + super(Conv3D, self).build(input_shape) + + # TODO(b/177662019): tf.nn.conv3d with depthwise kernels on CPU + # in eager mode may produce incorrect output or cause a segfault. + # To avoid this issue, compile the op to TF graph using tf.function. + self._convolution_op = tf.function( + self._convolution_op, experimental_compile=True) + + def _compute_causal_padding(self, inputs): + """Computes causal padding dimensions for the given inputs.""" + return self._compute_buffered_causal_padding( + inputs, use_buffered_input=self._use_buffered_input) + + def _validate_init(self): + """Validates the Conv layer initial configuration.""" + self._causal_validate_init() + + def _spatial_output_shape(self, spatial_input_shape: List[int]): + """Computes the spatial output shape from the input shape.""" + shape = super(Conv3D, self)._spatial_output_shape(spatial_input_shape) + return self._buffered_spatial_output_shape(shape) diff --git a/official/vision/beta/modeling/layers/nn_layers_test.py b/official/vision/beta/modeling/layers/nn_layers_test.py new file mode 100644 index 0000000000000000000000000000000000000000..50af2b100570623f75cff24be9b0759e39bb1ca7 --- /dev/null +++ b/official/vision/beta/modeling/layers/nn_layers_test.py @@ -0,0 +1,308 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for nn_layers.""" + +# Import libraries +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.beta.modeling.layers import nn_layers + + +class NNLayersTest(parameterized.TestCase, tf.test.TestCase): + + def test_scale(self): + scale = nn_layers.Scale(initializer=tf.keras.initializers.constant(10.)) + output = scale(3.) + self.assertAllEqual(output, 30.) + + def test_temporal_softmax_pool(self): + inputs = tf.range(4, dtype=tf.float32) + 1. + inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) + layer = nn_layers.TemporalSoftmaxPool() + output = layer(inputs) + self.assertAllClose( + output, + [[[[[0.10153633]]], + [[[0.33481020]]], + [[[0.82801306]]], + [[[1.82021690]]]]]) + + def test_positional_encoding(self): + pos_encoding = nn_layers.PositionalEncoding( + initializer='ones', cache_encoding=False) + pos_encoding_cached = nn_layers.PositionalEncoding( + initializer='ones', cache_encoding=True) + + inputs = tf.ones([1, 4, 1, 1, 3]) + outputs = pos_encoding(inputs) + outputs_cached = pos_encoding_cached(inputs) + + expected = tf.constant( + [[[[[1.0000000, 1.0000000, 2.0000000]]], + [[[1.8414710, 1.0021545, 1.5403023]]], + [[[1.9092975, 1.0043088, 0.5838531]]], + [[[1.1411200, 1.0064633, 0.0100075]]]]]) + + self.assertEqual(outputs.shape, expected.shape) + self.assertAllClose(outputs, expected) + + self.assertEqual(outputs.shape, outputs_cached.shape) + self.assertAllClose(outputs, outputs_cached) + + inputs = tf.ones([1, 5, 1, 1, 3]) + _ = pos_encoding(inputs) + + def test_positional_encoding_bfloat16(self): + pos_encoding = nn_layers.PositionalEncoding(initializer='ones') + + inputs = tf.ones([1, 4, 1, 1, 3], dtype=tf.bfloat16) + outputs = pos_encoding(inputs) + + expected = tf.constant( + [[[[[1.0000000, 1.0000000, 2.0000000]]], + [[[1.8414710, 1.0021545, 1.5403023]]], + [[[1.9092975, 1.0043088, 0.5838531]]], + [[[1.1411200, 1.0064633, 0.0100075]]]]]) + + self.assertEqual(outputs.shape, expected.shape) + self.assertAllClose(outputs, expected) + + def test_global_average_pool_basic(self): + pool = nn_layers.GlobalAveragePool3D(keepdims=True) + + inputs = tf.ones([1, 2, 3, 4, 1]) + outputs = pool(inputs, output_states=False) + + expected = tf.ones([1, 1, 1, 1, 1]) + + self.assertEqual(outputs.shape, expected.shape) + self.assertAllEqual(outputs, expected) + + def test_global_average_pool_keras(self): + pool = nn_layers.GlobalAveragePool3D(keepdims=False) + keras_pool = tf.keras.layers.GlobalAveragePooling3D() + + inputs = 10 * tf.random.normal([1, 2, 3, 4, 1]) + + outputs = pool(inputs, output_states=False) + keras_output = keras_pool(inputs) + + self.assertAllEqual(outputs.shape, keras_output.shape) + self.assertAllClose(outputs, keras_output) + + def test_stream_global_average_pool(self): + gap = nn_layers.GlobalAveragePool3D(keepdims=True, causal=False) + + inputs = tf.range(4, dtype=tf.float32) + 1. + inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) + inputs = tf.tile(inputs, [1, 1, 2, 2, 3]) + expected, _ = gap(inputs) + + for num_splits in [1, 2, 4]: + frames = tf.split(inputs, num_splits, axis=1) + states = {} + predicted = None + for frame in frames: + predicted, states = gap(frame, states=states) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + self.assertAllClose( + predicted, + [[[[[2.5, 2.5, 2.5]]]]]) + + def test_causal_stream_global_average_pool(self): + gap = nn_layers.GlobalAveragePool3D(keepdims=True, causal=True) + + inputs = tf.range(4, dtype=tf.float32) + 1. + inputs = tf.reshape(inputs, [1, 4, 1, 1, 1]) + inputs = tf.tile(inputs, [1, 1, 2, 2, 3]) + expected, _ = gap(inputs) + + for num_splits in [1, 2, 4]: + frames = tf.split(inputs, num_splits, axis=1) + states = {} + predicted = [] + for frame in frames: + x, states = gap(frame, states=states) + predicted.append(x) + predicted = tf.concat(predicted, axis=1) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + self.assertAllClose( + predicted, + [[[[[1.0, 1.0, 1.0]]], + [[[1.5, 1.5, 1.5]]], + [[[2.0, 2.0, 2.0]]], + [[[2.5, 2.5, 2.5]]]]]) + + def test_spatial_average_pool(self): + pool = nn_layers.SpatialAveragePool3D(keepdims=True) + + inputs = tf.range(64, dtype=tf.float32) + 1. + inputs = tf.reshape(inputs, [1, 4, 4, 4, 1]) + + output = pool(inputs) + + self.assertEqual(output.shape, [1, 4, 1, 1, 1]) + self.assertAllClose( + output, + [[[[[8.50]]], + [[[24.5]]], + [[[40.5]]], + [[[56.5]]]]]) + + def test_conv2d_causal(self): + conv2d = nn_layers.Conv2D( + filters=3, + kernel_size=(3, 3), + strides=(1, 2), + padding='causal', + use_buffered_input=True, + kernel_initializer='ones', + use_bias=False, + ) + + inputs = tf.ones([1, 4, 2, 3]) + + paddings = [[0, 0], [2, 0], [0, 0], [0, 0]] + padded_inputs = tf.pad(inputs, paddings) + predicted = conv2d(padded_inputs) + + expected = tf.constant( + [[[[6.0, 6.0, 6.0]], + [[12., 12., 12.]], + [[18., 18., 18.]], + [[18., 18., 18.]]]]) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + + conv2d.use_buffered_input = False + predicted = conv2d(inputs) + + self.assertFalse(conv2d.use_buffered_input) + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + + def test_depthwise_conv2d_causal(self): + conv2d = nn_layers.DepthwiseConv2D( + kernel_size=(3, 3), + strides=(1, 1), + padding='causal', + use_buffered_input=True, + depthwise_initializer='ones', + use_bias=False, + ) + + inputs = tf.ones([1, 2, 2, 3]) + + paddings = [[0, 0], [2, 0], [0, 0], [0, 0]] + padded_inputs = tf.pad(inputs, paddings) + predicted = conv2d(padded_inputs) + + expected = tf.constant( + [[[[2., 2., 2.], + [2., 2., 2.]], + [[4., 4., 4.], + [4., 4., 4.]]]]) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + + conv2d.use_buffered_input = False + predicted = conv2d(inputs) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + + def test_conv3d_causal(self): + conv3d = nn_layers.Conv3D( + filters=3, + kernel_size=(3, 3, 3), + strides=(1, 2, 2), + padding='causal', + use_buffered_input=True, + kernel_initializer='ones', + use_bias=False, + ) + + inputs = tf.ones([1, 2, 4, 4, 3]) + + paddings = [[0, 0], [2, 0], [0, 0], [0, 0], [0, 0]] + padded_inputs = tf.pad(inputs, paddings) + predicted = conv3d(padded_inputs) + + expected = tf.constant( + [[[[[12., 12., 12.], + [18., 18., 18.]], + [[18., 18., 18.], + [27., 27., 27.]]], + [[[24., 24., 24.], + [36., 36., 36.]], + [[36., 36., 36.], + [54., 54., 54.]]]]]) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + + conv3d.use_buffered_input = False + predicted = conv3d(inputs) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + + def test_depthwise_conv3d_causal(self): + conv3d = nn_layers.Conv3D( + filters=3, + kernel_size=(3, 3, 3), + strides=(1, 2, 2), + padding='causal', + use_buffered_input=True, + kernel_initializer='ones', + use_bias=False, + groups=3, + ) + + inputs = tf.ones([1, 2, 4, 4, 3]) + + paddings = [[0, 0], [2, 0], [0, 0], [0, 0], [0, 0]] + padded_inputs = tf.pad(inputs, paddings) + predicted = conv3d(padded_inputs) + + expected = tf.constant( + [[[[[4.0, 4.0, 4.0], + [6.0, 6.0, 6.0]], + [[6.0, 6.0, 6.0], + [9.0, 9.0, 9.0]]], + [[[8.0, 8.0, 8.0], + [12., 12., 12.]], + [[12., 12., 12.], + [18., 18., 18.]]]]]) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + + conv3d.use_buffered_input = False + predicted = conv3d(inputs) + + self.assertEqual(predicted.shape, expected.shape) + self.assertAllClose(predicted, expected) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/layers/roi_aligner.py b/official/vision/beta/modeling/layers/roi_aligner.py new file mode 100644 index 0000000000000000000000000000000000000000..6f9f55b604ee45a47f43c00c887021f6c0fe932d --- /dev/null +++ b/official/vision/beta/modeling/layers/roi_aligner.py @@ -0,0 +1,72 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of ROI aligner.""" + +from typing import Mapping +import tensorflow as tf + +from official.vision.beta.ops import spatial_transform_ops + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class MultilevelROIAligner(tf.keras.layers.Layer): + """Performs ROIAlign for the second stage processing.""" + + def __init__(self, crop_size: int = 7, sample_offset: float = 0.5, **kwargs): + """Initializes a ROI aligner. + + Args: + crop_size: An `int` of the output size of the cropped features. + sample_offset: A `float` in [0, 1] of the subpixel sample offset. + **kwargs: Additional keyword arguments passed to Layer. + """ + self._config_dict = { + 'crop_size': crop_size, + 'sample_offset': sample_offset, + } + super(MultilevelROIAligner, self).__init__(**kwargs) + + def call(self, + features: Mapping[str, tf.Tensor], + boxes: tf.Tensor, + training: bool = None): + """Generates ROIs. + + Args: + features: A dictionary with key as pyramid level and value as features. + The features are in shape of + [batch_size, height_l, width_l, num_filters]. + boxes: A 3-D `tf.Tensor` of shape [batch_size, num_boxes, 4]. Each row + represents a box with [y1, x1, y2, x2] in un-normalized coordinates. + from grid point. + training: A `bool` of whether it is in training mode. + + Returns: + A 5-D `tf.Tensor` representing feature crop of shape + [batch_size, num_boxes, crop_size, crop_size, num_filters]. + """ + roi_features = spatial_transform_ops.multilevel_crop_and_resize( + features, + boxes, + output_size=self._config_dict['crop_size'], + sample_offset=self._config_dict['sample_offset']) + return roi_features + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/modeling/layers/roi_aligner_test.py b/official/vision/beta/modeling/layers/roi_aligner_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ce6b124fed4001b2aee82b7b01372caef1b540be --- /dev/null +++ b/official/vision/beta/modeling/layers/roi_aligner_test.py @@ -0,0 +1,42 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for roi_aligner.py.""" + +# Import libraries +import tensorflow as tf + +from official.vision.beta.modeling.layers import roi_aligner + + +class MultilevelROIAlignerTest(tf.test.TestCase): + + def test_serialize_deserialize(self): + kwargs = dict( + crop_size=7, + sample_offset=0.5, + ) + aligner = roi_aligner.MultilevelROIAligner(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(aligner.get_config(), expected_config) + + new_aligner = roi_aligner.MultilevelROIAligner.from_config( + aligner.get_config()) + + self.assertAllEqual(aligner.get_config(), new_aligner.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/layers/roi_generator.py b/official/vision/beta/modeling/layers/roi_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..b569a3ba3b92db936498fc8422c9b15ca914958f --- /dev/null +++ b/official/vision/beta/modeling/layers/roi_generator.py @@ -0,0 +1,313 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of ROI generator.""" +from typing import Optional, Mapping +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import box_ops +from official.vision.beta.ops import nms + + +def _multilevel_propose_rois(raw_boxes: Mapping[str, tf.Tensor], + raw_scores: Mapping[str, tf.Tensor], + anchor_boxes: Mapping[str, tf.Tensor], + image_shape: tf.Tensor, + pre_nms_top_k: int = 2000, + pre_nms_score_threshold: float = 0.0, + pre_nms_min_size_threshold: float = 0.0, + nms_iou_threshold: float = 0.7, + num_proposals: int = 1000, + use_batched_nms: bool = False, + decode_boxes: bool = True, + clip_boxes: bool = True, + apply_sigmoid_to_score: bool = True): + """Proposes RoIs given a group of candidates from different FPN levels. + + The following describes the steps: + 1. For each individual level: + a. Apply sigmoid transform if specified. + b. Decode boxes if specified. + c. Clip boxes if specified. + d. Filter small boxes and those fall outside image if specified. + e. Apply pre-NMS filtering including pre-NMS top k and score thresholding. + f. Apply NMS. + 2. Aggregate post-NMS boxes from each level. + 3. Apply an overall top k to generate the final selected RoIs. + + Args: + raw_boxes: A `dict` with keys representing FPN levels and values + representing box tenors of shape + [batch_size, feature_h, feature_w, num_anchors * 4]. + raw_scores: A `dict` with keys representing FPN levels and values + representing logit tensors of shape + [batch_size, feature_h, feature_w, num_anchors]. + anchor_boxes: A `dict` with keys representing FPN levels and values + representing anchor box tensors of shape + [batch_size, feature_h * feature_w * num_anchors, 4]. + image_shape: A `tf.Tensor` of shape [batch_size, 2] where the last dimension + are [height, width] of the scaled image. + pre_nms_top_k: An `int` of top scoring RPN proposals *per level* to keep + before applying NMS. Default: 2000. + pre_nms_score_threshold: A `float` between 0 and 1 representing the minimal + box score to keep before applying NMS. This is often used as a + pre-filtering step for better performance. Default: 0, no filtering is + applied. + pre_nms_min_size_threshold: A `float` representing the minimal box size in + each side (w.r.t. the scaled image) to keep before applying NMS. This is + often used as a pre-filtering step for better performance. Default: 0, no + filtering is applied. + nms_iou_threshold: A `float` between 0 and 1 representing the IoU threshold + used for NMS. If 0.0, no NMS is applied. Default: 0.7. + num_proposals: An `int` of top scoring RPN proposals *in total* to keep + after applying NMS. Default: 1000. + use_batched_nms: A `bool` indicating whether NMS is applied in batch using + `tf.image.combined_non_max_suppression`. Currently only available in + CPU/GPU. Default is False. + decode_boxes: A `bool` indicating whether `raw_boxes` needs to be decoded + using `anchor_boxes`. If False, use `raw_boxes` directly and ignore + `anchor_boxes`. Default is True. + clip_boxes: A `bool` indicating whether boxes are first clipped to the + scaled image size before appliying NMS. If False, no clipping is applied + and `image_shape` is ignored. Default is True. + apply_sigmoid_to_score: A `bool` indicating whether apply sigmoid to + `raw_scores` before applying NMS. Default is True. + + Returns: + selected_rois: A `tf.Tensor` of shape [batch_size, num_proposals, 4], + representing the box coordinates of the selected proposals w.r.t. the + scaled image. + selected_roi_scores: A `tf.Tensor` of shape [batch_size, num_proposals, 1], + representing the scores of the selected proposals. + """ + with tf.name_scope('multilevel_propose_rois'): + rois = [] + roi_scores = [] + image_shape = tf.expand_dims(image_shape, axis=1) + for level in sorted(raw_scores.keys()): + with tf.name_scope('level_%s' % level): + _, feature_h, feature_w, num_anchors_per_location = ( + raw_scores[level].get_shape().as_list()) + + num_boxes = feature_h * feature_w * num_anchors_per_location + this_level_scores = tf.reshape(raw_scores[level], [-1, num_boxes]) + this_level_boxes = tf.reshape(raw_boxes[level], [-1, num_boxes, 4]) + this_level_anchors = tf.cast( + tf.reshape(anchor_boxes[level], [-1, num_boxes, 4]), + dtype=this_level_scores.dtype) + + if apply_sigmoid_to_score: + this_level_scores = tf.sigmoid(this_level_scores) + + if decode_boxes: + this_level_boxes = box_ops.decode_boxes( + this_level_boxes, this_level_anchors) + if clip_boxes: + this_level_boxes = box_ops.clip_boxes( + this_level_boxes, image_shape) + + if pre_nms_min_size_threshold > 0.0: + this_level_boxes, this_level_scores = box_ops.filter_boxes( + this_level_boxes, + this_level_scores, + image_shape, + pre_nms_min_size_threshold) + + this_level_pre_nms_top_k = min(num_boxes, pre_nms_top_k) + this_level_post_nms_top_k = min(num_boxes, num_proposals) + if nms_iou_threshold > 0.0: + if use_batched_nms: + this_level_rois, this_level_roi_scores, _, _ = ( + tf.image.combined_non_max_suppression( + tf.expand_dims(this_level_boxes, axis=2), + tf.expand_dims(this_level_scores, axis=-1), + max_output_size_per_class=this_level_pre_nms_top_k, + max_total_size=this_level_post_nms_top_k, + iou_threshold=nms_iou_threshold, + score_threshold=pre_nms_score_threshold, + pad_per_class=False, + clip_boxes=False)) + else: + if pre_nms_score_threshold > 0.0: + this_level_boxes, this_level_scores = ( + box_ops.filter_boxes_by_scores( + this_level_boxes, + this_level_scores, + pre_nms_score_threshold)) + this_level_boxes, this_level_scores = box_ops.top_k_boxes( + this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k) + this_level_roi_scores, this_level_rois = ( + nms.sorted_non_max_suppression_padded( + this_level_scores, + this_level_boxes, + max_output_size=this_level_post_nms_top_k, + iou_threshold=nms_iou_threshold)) + else: + this_level_rois, this_level_roi_scores = box_ops.top_k_boxes( + this_level_boxes, + this_level_scores, + k=this_level_post_nms_top_k) + + rois.append(this_level_rois) + roi_scores.append(this_level_roi_scores) + + all_rois = tf.concat(rois, axis=1) + all_roi_scores = tf.concat(roi_scores, axis=1) + + with tf.name_scope('top_k_rois'): + _, num_valid_rois = all_roi_scores.get_shape().as_list() + overall_top_k = min(num_valid_rois, num_proposals) + + selected_rois, selected_roi_scores = box_ops.top_k_boxes( + all_rois, all_roi_scores, k=overall_top_k) + + return selected_rois, selected_roi_scores + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class MultilevelROIGenerator(tf.keras.layers.Layer): + """Proposes RoIs for the second stage processing.""" + + def __init__(self, + pre_nms_top_k: int = 2000, + pre_nms_score_threshold: float = 0.0, + pre_nms_min_size_threshold: float = 0.0, + nms_iou_threshold: float = 0.7, + num_proposals: int = 1000, + test_pre_nms_top_k: int = 1000, + test_pre_nms_score_threshold: float = 0.0, + test_pre_nms_min_size_threshold: float = 0.0, + test_nms_iou_threshold: float = 0.7, + test_num_proposals: int = 1000, + use_batched_nms: bool = False, + **kwargs): + """Initializes a ROI generator. + + The ROI generator transforms the raw predictions from RPN to ROIs. + + Args: + pre_nms_top_k: An `int` of the number of top scores proposals to be kept + before applying NMS. + pre_nms_score_threshold: A `float` of the score threshold to apply before + applying NMS. Proposals whose scores are below this threshold are + thrown away. + pre_nms_min_size_threshold: A `float` of the threshold of each side of the + box (w.r.t. the scaled image). Proposals whose sides are below this + threshold are thrown away. + nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold. + num_proposals: An `int` of the final number of proposals to generate. + test_pre_nms_top_k: An `int` of the number of top scores proposals to be + kept before applying NMS in testing. + test_pre_nms_score_threshold: A `float` of the score threshold to apply + before applying NMS in testing. Proposals whose scores are below this + threshold are thrown away. + test_pre_nms_min_size_threshold: A `float` of the threshold of each side + of the box (w.r.t. the scaled image) in testing. Proposals whose sides + are below this threshold are thrown away. + test_nms_iou_threshold: A `float` in [0, 1] of the NMS IoU threshold in + testing. + test_num_proposals: An `int` of the final number of proposals to generate + in testing. + use_batched_nms: A `bool` of whether or not use + `tf.image.combined_non_max_suppression`. + **kwargs: Additional keyword arguments passed to Layer. + """ + self._config_dict = { + 'pre_nms_top_k': pre_nms_top_k, + 'pre_nms_score_threshold': pre_nms_score_threshold, + 'pre_nms_min_size_threshold': pre_nms_min_size_threshold, + 'nms_iou_threshold': nms_iou_threshold, + 'num_proposals': num_proposals, + 'test_pre_nms_top_k': test_pre_nms_top_k, + 'test_pre_nms_score_threshold': test_pre_nms_score_threshold, + 'test_pre_nms_min_size_threshold': test_pre_nms_min_size_threshold, + 'test_nms_iou_threshold': test_nms_iou_threshold, + 'test_num_proposals': test_num_proposals, + 'use_batched_nms': use_batched_nms, + } + super(MultilevelROIGenerator, self).__init__(**kwargs) + + def call(self, + raw_boxes: Mapping[str, tf.Tensor], + raw_scores: Mapping[str, tf.Tensor], + anchor_boxes: Mapping[str, tf.Tensor], + image_shape: tf.Tensor, + training: Optional[bool] = None): + """Proposes RoIs given a group of candidates from different FPN levels. + + The following describes the steps: + 1. For each individual level: + a. Apply sigmoid transform if specified. + b. Decode boxes if specified. + c. Clip boxes if specified. + d. Filter small boxes and those fall outside image if specified. + e. Apply pre-NMS filtering including pre-NMS top k and score + thresholding. + f. Apply NMS. + 2. Aggregate post-NMS boxes from each level. + 3. Apply an overall top k to generate the final selected RoIs. + + Args: + raw_boxes: A `dict` with keys representing FPN levels and values + representing box tenors of shape + [batch, feature_h, feature_w, num_anchors * 4]. + raw_scores: A `dict` with keys representing FPN levels and values + representing logit tensors of shape + [batch, feature_h, feature_w, num_anchors]. + anchor_boxes: A `dict` with keys representing FPN levels and values + representing anchor box tensors of shape + [batch, feature_h * feature_w * num_anchors, 4]. + image_shape: A `tf.Tensor` of shape [batch, 2] where the last dimension + are [height, width] of the scaled image. + training: A `bool` that indicates whether it is in training mode. + + Returns: + roi_boxes: A `tf.Tensor` of shape [batch, num_proposals, 4], the proposed + ROIs in the scaled image coordinate. + roi_scores: A `tf.Tensor` of shape [batch, num_proposals], scores of the + proposed ROIs. + """ + roi_boxes, roi_scores = _multilevel_propose_rois( + raw_boxes, + raw_scores, + anchor_boxes, + image_shape, + pre_nms_top_k=( + self._config_dict['pre_nms_top_k'] if training + else self._config_dict['test_pre_nms_top_k']), + pre_nms_score_threshold=( + self._config_dict['pre_nms_score_threshold'] if training + else self._config_dict['test_pre_nms_score_threshold']), + pre_nms_min_size_threshold=( + self._config_dict['pre_nms_min_size_threshold'] if training + else self._config_dict['test_pre_nms_min_size_threshold']), + nms_iou_threshold=( + self._config_dict['nms_iou_threshold'] if training + else self._config_dict['test_nms_iou_threshold']), + num_proposals=( + self._config_dict['num_proposals'] if training + else self._config_dict['test_num_proposals']), + use_batched_nms=self._config_dict['use_batched_nms'], + decode_boxes=True, + clip_boxes=True, + apply_sigmoid_to_score=True) + return roi_boxes, roi_scores + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/modeling/layers/roi_sampler.py b/official/vision/beta/modeling/layers/roi_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..35a767576de1e271ee243f0f8fbfd89eb3d7c047 --- /dev/null +++ b/official/vision/beta/modeling/layers/roi_sampler.py @@ -0,0 +1,172 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains definitions of ROI sampler.""" +# Import libraries +import tensorflow as tf + +from official.vision import keras_cv +from official.vision.beta.modeling.layers import box_sampler + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class ROISampler(tf.keras.layers.Layer): + """Samples ROIs and assigns targets to the sampled ROIs.""" + + def __init__(self, + mix_gt_boxes: bool = True, + num_sampled_rois: int = 512, + foreground_fraction: float = 0.25, + foreground_iou_threshold: float = 0.5, + background_iou_high_threshold: float = 0.5, + background_iou_low_threshold: float = 0, + skip_subsampling: bool = False, + **kwargs): + """Initializes a ROI sampler. + + Args: + mix_gt_boxes: A `bool` of whether to mix the groundtruth boxes with + proposed ROIs. + num_sampled_rois: An `int` of the number of sampled ROIs per image. + foreground_fraction: A `float` in [0, 1], what percentage of proposed ROIs + should be sampled from the foreground boxes. + foreground_iou_threshold: A `float` that represents the IoU threshold for + a box to be considered as positive (if >= `foreground_iou_threshold`). + background_iou_high_threshold: A `float` that represents the IoU threshold + for a box to be considered as negative (if overlap in + [`background_iou_low_threshold`, `background_iou_high_threshold`]). + background_iou_low_threshold: A `float` that represents the IoU threshold + for a box to be considered as negative (if overlap in + [`background_iou_low_threshold`, `background_iou_high_threshold`]) + skip_subsampling: a bool that determines if we want to skip the sampling + procedure than balances the fg/bg classes. Used for upper frcnn layers + in cascade RCNN. + **kwargs: Additional keyword arguments passed to Layer. + """ + self._config_dict = { + 'mix_gt_boxes': mix_gt_boxes, + 'num_sampled_rois': num_sampled_rois, + 'foreground_fraction': foreground_fraction, + 'foreground_iou_threshold': foreground_iou_threshold, + 'background_iou_high_threshold': background_iou_high_threshold, + 'background_iou_low_threshold': background_iou_low_threshold, + 'skip_subsampling': skip_subsampling, + } + + self._sim_calc = keras_cv.ops.IouSimilarity() + self._box_matcher = keras_cv.ops.BoxMatcher( + thresholds=[ + background_iou_low_threshold, background_iou_high_threshold, + foreground_iou_threshold + ], + indicators=[-3, -1, -2, 1]) + self._target_gather = keras_cv.ops.TargetGather() + + self._sampler = box_sampler.BoxSampler( + num_sampled_rois, foreground_fraction) + super(ROISampler, self).__init__(**kwargs) + + def call(self, boxes: tf.Tensor, gt_boxes: tf.Tensor, gt_classes: tf.Tensor): + """Assigns the proposals with groundtruth classes and performs subsmpling. + + Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the + following algorithm to generate the final `num_samples_per_image` RoIs. + 1. Calculates the IoU between each proposal box and each gt_boxes. + 2. Assigns each proposed box with a groundtruth class and box by choosing + the largest IoU overlap. + 3. Samples `num_samples_per_image` boxes from all proposed boxes, and + returns box_targets, class_targets, and RoIs. + + Args: + boxes: A `tf.Tensor` of shape of [batch_size, N, 4]. N is the number of + proposals before groundtruth assignment. The last dimension is the + box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] + format. + gt_boxes: A `tf.Tensor` of shape of [batch_size, MAX_NUM_INSTANCES, 4]. + The coordinates of gt_boxes are in the pixel coordinates of the scaled + image. This tensor might have padding of values -1 indicating the + invalid box coordinates. + gt_classes: A `tf.Tensor` with a shape of [batch_size, MAX_NUM_INSTANCES]. + This tensor might have paddings with values of -1 indicating the invalid + classes. + + Returns: + sampled_rois: A `tf.Tensor` of shape of [batch_size, K, 4], representing + the coordinates of the sampled RoIs, where K is the number of the + sampled RoIs, i.e. K = num_samples_per_image. + sampled_gt_boxes: A `tf.Tensor` of shape of [batch_size, K, 4], storing + the box coordinates of the matched groundtruth boxes of the samples + RoIs. + sampled_gt_classes: A `tf.Tensor` of shape of [batch_size, K], storing the + classes of the matched groundtruth boxes of the sampled RoIs. + sampled_gt_indices: A `tf.Tensor` of shape of [batch_size, K], storing the + indices of the sampled groudntruth boxes in the original `gt_boxes` + tensor, i.e., + gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i]. + """ + gt_boxes = tf.cast(gt_boxes, dtype=boxes.dtype) + if self._config_dict['mix_gt_boxes']: + boxes = tf.concat([boxes, gt_boxes], axis=1) + + boxes_invalid_mask = tf.less( + tf.reduce_max(boxes, axis=-1, keepdims=True), 0.0) + gt_invalid_mask = tf.less( + tf.reduce_max(gt_boxes, axis=-1, keepdims=True), 0.0) + similarity_matrix = self._sim_calc(boxes, gt_boxes, boxes_invalid_mask, + gt_invalid_mask) + matched_gt_indices, match_indicators = self._box_matcher(similarity_matrix) + positive_matches = tf.greater_equal(match_indicators, 0) + negative_matches = tf.equal(match_indicators, -1) + ignored_matches = tf.equal(match_indicators, -2) + invalid_matches = tf.equal(match_indicators, -3) + + background_mask = tf.expand_dims( + tf.logical_or(negative_matches, invalid_matches), -1) + gt_classes = tf.expand_dims(gt_classes, axis=-1) + matched_gt_classes = self._target_gather(gt_classes, matched_gt_indices, + background_mask) + matched_gt_classes = tf.where(background_mask, + tf.zeros_like(matched_gt_classes), + matched_gt_classes) + matched_gt_boxes = self._target_gather(gt_boxes, matched_gt_indices, + tf.tile(background_mask, [1, 1, 4])) + matched_gt_boxes = tf.where(background_mask, + tf.zeros_like(matched_gt_boxes), + matched_gt_boxes) + matched_gt_indices = tf.where( + tf.squeeze(background_mask, -1), -tf.ones_like(matched_gt_indices), + matched_gt_indices) + + if self._config_dict['skip_subsampling']: + return (boxes, matched_gt_boxes, tf.squeeze(matched_gt_classes, + axis=-1), matched_gt_indices) + + sampled_indices = self._sampler( + positive_matches, negative_matches, ignored_matches) + + sampled_rois = self._target_gather(boxes, sampled_indices) + sampled_gt_boxes = self._target_gather(matched_gt_boxes, sampled_indices) + sampled_gt_classes = tf.squeeze(self._target_gather( + matched_gt_classes, sampled_indices), axis=-1) + sampled_gt_indices = tf.squeeze(self._target_gather( + tf.expand_dims(matched_gt_indices, -1), sampled_indices), axis=-1) + return (sampled_rois, sampled_gt_boxes, sampled_gt_classes, + sampled_gt_indices) + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/maskrcnn_model.py b/official/vision/beta/modeling/maskrcnn_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e85d0e575472d283a766e8af60429e8787864e73 --- /dev/null +++ b/official/vision/beta/modeling/maskrcnn_model.py @@ -0,0 +1,369 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mask R-CNN model.""" + +from typing import Any, List, Mapping, Optional, Union + +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import anchor +from official.vision.beta.ops import box_ops + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class MaskRCNNModel(tf.keras.Model): + """The Mask R-CNN model.""" + + def __init__(self, + backbone: tf.keras.Model, + decoder: tf.keras.Model, + rpn_head: tf.keras.layers.Layer, + detection_head: Union[tf.keras.layers.Layer, + List[tf.keras.layers.Layer]], + roi_generator: tf.keras.layers.Layer, + roi_sampler: Union[tf.keras.layers.Layer, + List[tf.keras.layers.Layer]], + roi_aligner: tf.keras.layers.Layer, + detection_generator: tf.keras.layers.Layer, + mask_head: Optional[tf.keras.layers.Layer] = None, + mask_sampler: Optional[tf.keras.layers.Layer] = None, + mask_roi_aligner: Optional[tf.keras.layers.Layer] = None, + class_agnostic_bbox_pred: bool = False, + cascade_class_ensemble: bool = False, + min_level: Optional[int] = None, + max_level: Optional[int] = None, + num_scales: Optional[int] = None, + aspect_ratios: Optional[List[float]] = None, + anchor_size: Optional[float] = None, + **kwargs): + """Initializes the Mask R-CNN model. + + Args: + backbone: `tf.keras.Model`, the backbone network. + decoder: `tf.keras.Model`, the decoder network. + rpn_head: the RPN head. + detection_head: the detection head or a list of heads. + roi_generator: the ROI generator. + roi_sampler: a single ROI sampler or a list of ROI samplers for cascade + detection heads. + roi_aligner: the ROI aligner. + detection_generator: the detection generator. + mask_head: the mask head. + mask_sampler: the mask sampler. + mask_roi_aligner: the ROI alginer for mask prediction. + class_agnostic_bbox_pred: if True, perform class agnostic bounding box + prediction. Needs to be `True` for Cascade RCNN models. + cascade_class_ensemble: if True, ensemble classification scores over + all detection heads. + min_level: Minimum level in output feature maps. + max_level: Maximum level in output feature maps. + num_scales: A number representing intermediate scales added + on each level. For instances, num_scales=2 adds one additional + intermediate anchor scales [2^0, 2^0.5] on each level. + aspect_ratios: A list representing the aspect raito + anchors added on each level. The number indicates the ratio of width to + height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors + on each scale level. + anchor_size: A number representing the scale of size of the base + anchor to the feature stride 2^level. + **kwargs: keyword arguments to be passed. + """ + super(MaskRCNNModel, self).__init__(**kwargs) + self._config_dict = { + 'backbone': backbone, + 'decoder': decoder, + 'rpn_head': rpn_head, + 'detection_head': detection_head, + 'roi_generator': roi_generator, + 'roi_sampler': roi_sampler, + 'roi_aligner': roi_aligner, + 'detection_generator': detection_generator, + 'mask_head': mask_head, + 'mask_sampler': mask_sampler, + 'mask_roi_aligner': mask_roi_aligner, + 'class_agnostic_bbox_pred': class_agnostic_bbox_pred, + 'cascade_class_ensemble': cascade_class_ensemble, + 'min_level': min_level, + 'max_level': max_level, + 'num_scales': num_scales, + 'aspect_ratios': aspect_ratios, + 'anchor_size': anchor_size, + } + self.backbone = backbone + self.decoder = decoder + self.rpn_head = rpn_head + if not isinstance(detection_head, (list, tuple)): + self.detection_head = [detection_head] + else: + self.detection_head = detection_head + self.roi_generator = roi_generator + if not isinstance(roi_sampler, (list, tuple)): + self.roi_sampler = [roi_sampler] + else: + self.roi_sampler = roi_sampler + if len(self.roi_sampler) > 1 and not class_agnostic_bbox_pred: + raise ValueError( + '`class_agnostic_bbox_pred` needs to be True if multiple detection heads are specified.' + ) + self.roi_aligner = roi_aligner + self.detection_generator = detection_generator + self._include_mask = mask_head is not None + self.mask_head = mask_head + if self._include_mask and mask_sampler is None: + raise ValueError('`mask_sampler` is not provided in Mask R-CNN.') + self.mask_sampler = mask_sampler + if self._include_mask and mask_roi_aligner is None: + raise ValueError('`mask_roi_aligner` is not provided in Mask R-CNN.') + self.mask_roi_aligner = mask_roi_aligner + # Weights for the regression losses for each FRCNN layer. + # TODO(xianzhi): Make the weights configurable. + self._cascade_layer_to_weights = [ + [10.0, 10.0, 5.0, 5.0], + [20.0, 20.0, 10.0, 10.0], + [30.0, 30.0, 15.0, 15.0], + ] + + def call(self, + images: tf.Tensor, + image_shape: tf.Tensor, + anchor_boxes: Optional[Mapping[str, tf.Tensor]] = None, + gt_boxes: tf.Tensor = None, + gt_classes: tf.Tensor = None, + gt_masks: tf.Tensor = None, + training: bool = None) -> Mapping[str, tf.Tensor]: + model_outputs = {} + + # Feature extraction. + features = self.backbone(images) + if self.decoder: + features = self.decoder(features) + + # Region proposal network. + rpn_scores, rpn_boxes = self.rpn_head(features) + + model_outputs.update({ + 'rpn_boxes': rpn_boxes, + 'rpn_scores': rpn_scores + }) + + # Generate anchor boxes for this batch if not provided. + if anchor_boxes is None: + _, image_height, image_width, _ = images.get_shape().as_list() + anchor_boxes = anchor.Anchor( + min_level=self._config_dict['min_level'], + max_level=self._config_dict['max_level'], + num_scales=self._config_dict['num_scales'], + aspect_ratios=self._config_dict['aspect_ratios'], + anchor_size=self._config_dict['anchor_size'], + image_size=(image_height, image_width)).multilevel_boxes + for l in anchor_boxes: + anchor_boxes[l] = tf.tile( + tf.expand_dims(anchor_boxes[l], axis=0), + [tf.shape(images)[0], 1, 1, 1]) + + # Generate RoIs. + current_rois, _ = self.roi_generator(rpn_boxes, rpn_scores, anchor_boxes, + image_shape, training) + + next_rois = current_rois + all_class_outputs = [] + for cascade_num in range(len(self.roi_sampler)): + # In cascade RCNN we want the higher layers to have different regression + # weights as the predicted deltas become smaller and smaller. + regression_weights = self._cascade_layer_to_weights[cascade_num] + current_rois = next_rois + + (class_outputs, box_outputs, model_outputs, matched_gt_boxes, + matched_gt_classes, matched_gt_indices, + current_rois) = self._run_frcnn_head( + features=features, + rois=current_rois, + gt_boxes=gt_boxes, + gt_classes=gt_classes, + training=training, + model_outputs=model_outputs, + cascade_num=cascade_num, + regression_weights=regression_weights) + all_class_outputs.append(class_outputs) + + # Generate ROIs for the next cascade head if there is any. + if cascade_num < len(self.roi_sampler) - 1: + next_rois = box_ops.decode_boxes( + tf.cast(box_outputs, tf.float32), + current_rois, + weights=regression_weights) + next_rois = box_ops.clip_boxes(next_rois, + tf.expand_dims(image_shape, axis=1)) + + if not training: + if self._config_dict['cascade_class_ensemble']: + class_outputs = tf.add_n(all_class_outputs) / len(all_class_outputs) + + detections = self.detection_generator( + box_outputs, + class_outputs, + current_rois, + image_shape, + regression_weights, + bbox_per_class=(not self._config_dict['class_agnostic_bbox_pred'])) + model_outputs.update({ + 'cls_outputs': class_outputs, + 'box_outputs': box_outputs, + }) + if self.detection_generator.get_config()['apply_nms']: + model_outputs.update({ + 'detection_boxes': detections['detection_boxes'], + 'detection_scores': detections['detection_scores'], + 'detection_classes': detections['detection_classes'], + 'num_detections': detections['num_detections'] + }) + else: + model_outputs.update({ + 'decoded_boxes': detections['decoded_boxes'], + 'decoded_box_scores': detections['decoded_box_scores'] + }) + + if not self._include_mask: + return model_outputs + + if training: + current_rois, roi_classes, roi_masks = self.mask_sampler( + current_rois, matched_gt_boxes, matched_gt_classes, + matched_gt_indices, gt_masks) + roi_masks = tf.stop_gradient(roi_masks) + + model_outputs.update({ + 'mask_class_targets': roi_classes, + 'mask_targets': roi_masks, + }) + else: + current_rois = model_outputs['detection_boxes'] + roi_classes = model_outputs['detection_classes'] + + # Mask RoI align. + mask_roi_features = self.mask_roi_aligner(features, current_rois) + + # Mask head. + raw_masks = self.mask_head([mask_roi_features, roi_classes]) + + if training: + model_outputs.update({ + 'mask_outputs': raw_masks, + }) + else: + model_outputs.update({ + 'detection_masks': tf.math.sigmoid(raw_masks), + }) + return model_outputs + + def _run_frcnn_head(self, features, rois, gt_boxes, gt_classes, training, + model_outputs, cascade_num, regression_weights): + """Runs the frcnn head that does both class and box prediction. + + Args: + features: `list` of features from the feature extractor. + rois: `list` of current rois that will be used to predict bbox refinement + and classes from. + gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. + This tensor might have paddings with a negative value. + gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box + classes. It is padded with -1s to indicate the invalid classes. + training: `bool`, if model is training or being evaluated. + model_outputs: `dict`, used for storing outputs used for eval and losses. + cascade_num: `int`, the current frcnn layer in the cascade. + regression_weights: `list`, weights used for l1 loss in bounding box + regression. + + Returns: + class_outputs: Class predictions for rois. + box_outputs: Box predictions for rois. These are formatted for the + regression loss and need to be converted before being used as rois + in the next stage. + model_outputs: Updated dict with predictions used for losses and eval. + matched_gt_boxes: If `is_training` is true, then these give the gt box + location of its positive match. + matched_gt_classes: If `is_training` is true, then these give the gt class + of the predicted box. + matched_gt_boxes: If `is_training` is true, then these give the box + location of its positive match. + matched_gt_indices: If `is_training` is true, then gives the index of + the positive box match. Used for mask prediction. + rois: The sampled rois used for this layer. + """ + # Only used during training. + matched_gt_boxes, matched_gt_classes, matched_gt_indices = (None, None, + None) + if training and gt_boxes is not None: + rois = tf.stop_gradient(rois) + + current_roi_sampler = self.roi_sampler[cascade_num] + rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices = ( + current_roi_sampler(rois, gt_boxes, gt_classes)) + # Create bounding box training targets. + box_targets = box_ops.encode_boxes( + matched_gt_boxes, rois, weights=regression_weights) + # If the target is background, the box target is set to all 0s. + box_targets = tf.where( + tf.tile( + tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1), + [1, 1, 4]), tf.zeros_like(box_targets), box_targets) + model_outputs.update({ + 'class_targets_{}'.format(cascade_num) + if cascade_num else 'class_targets': + matched_gt_classes, + 'box_targets_{}'.format(cascade_num) + if cascade_num else 'box_targets': + box_targets, + }) + + # Get roi features. + roi_features = self.roi_aligner(features, rois) + + # Run frcnn head to get class and bbox predictions. + current_detection_head = self.detection_head[cascade_num] + class_outputs, box_outputs = current_detection_head(roi_features) + + model_outputs.update({ + 'class_outputs_{}'.format(cascade_num) + if cascade_num else 'class_outputs': + class_outputs, + 'box_outputs_{}'.format(cascade_num) if cascade_num else 'box_outputs': + box_outputs, + }) + return (class_outputs, box_outputs, model_outputs, matched_gt_boxes, + matched_gt_classes, matched_gt_indices, rois) + + @property + def checkpoint_items( + self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]: + """Returns a dictionary of items to be additionally checkpointed.""" + items = dict( + backbone=self.backbone, + rpn_head=self.rpn_head, + detection_head=self.detection_head) + if self.decoder is not None: + items.update(decoder=self.decoder) + if self._include_mask: + items.update(mask_head=self.mask_head) + + return items + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/maskrcnn_model_test.py b/official/vision/beta/modeling/maskrcnn_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ae03f868fac632ed70d8cc495e2694a323333f60 --- /dev/null +++ b/official/vision/beta/modeling/maskrcnn_model_test.py @@ -0,0 +1,398 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for maskrcnn_model.py.""" + +import os +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.modeling import maskrcnn_model +from official.vision.beta.modeling.backbones import resnet +from official.vision.beta.modeling.decoders import fpn +from official.vision.beta.modeling.heads import dense_prediction_heads +from official.vision.beta.modeling.heads import instance_heads +from official.vision.beta.modeling.layers import detection_generator +from official.vision.beta.modeling.layers import mask_sampler +from official.vision.beta.modeling.layers import roi_aligner +from official.vision.beta.modeling.layers import roi_generator +from official.vision.beta.modeling.layers import roi_sampler +from official.vision.beta.ops import anchor + + +class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): + + @combinations.generate( + combinations.combine( + include_mask=[True, False], + use_separable_conv=[True, False], + build_anchor_boxes=[True, False], + is_training=[True, False])) + def test_build_model(self, include_mask, use_separable_conv, + build_anchor_boxes, is_training): + num_classes = 3 + min_level = 3 + max_level = 7 + num_scales = 3 + aspect_ratios = [1.0] + anchor_size = 3 + resnet_model_id = 50 + num_anchors_per_location = num_scales * len(aspect_ratios) + image_size = 384 + images = np.random.rand(2, image_size, image_size, 3) + image_shape = np.array([[image_size, image_size], [image_size, image_size]]) + + if build_anchor_boxes: + anchor_boxes = anchor.Anchor( + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=3, + image_size=(image_size, image_size)).multilevel_boxes + for l in anchor_boxes: + anchor_boxes[l] = tf.tile( + tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) + else: + anchor_boxes = None + + backbone = resnet.ResNet(model_id=resnet_model_id) + decoder = fpn.FPN( + input_specs=backbone.output_specs, + min_level=min_level, + max_level=max_level, + use_separable_conv=use_separable_conv) + rpn_head = dense_prediction_heads.RPNHead( + min_level=min_level, + max_level=max_level, + num_anchors_per_location=num_anchors_per_location, + num_convs=1) + detection_head = instance_heads.DetectionHead(num_classes=num_classes) + roi_generator_obj = roi_generator.MultilevelROIGenerator() + roi_sampler_obj = roi_sampler.ROISampler() + roi_aligner_obj = roi_aligner.MultilevelROIAligner() + detection_generator_obj = detection_generator.DetectionGenerator() + if include_mask: + mask_head = instance_heads.MaskHead( + num_classes=num_classes, upsample_factor=2) + mask_sampler_obj = mask_sampler.MaskSampler( + mask_target_size=28, num_sampled_masks=1) + mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) + else: + mask_head = None + mask_sampler_obj = None + mask_roi_aligner_obj = None + model = maskrcnn_model.MaskRCNNModel( + backbone, + decoder, + rpn_head, + detection_head, + roi_generator_obj, + roi_sampler_obj, + roi_aligner_obj, + detection_generator_obj, + mask_head, + mask_sampler_obj, + mask_roi_aligner_obj, + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=anchor_size) + + gt_boxes = np.array( + [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], + [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], + dtype=np.float32) + gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) + if include_mask: + gt_masks = np.ones((2, 3, 100, 100)) + else: + gt_masks = None + + # Results will be checked in test_forward. + _ = model( + images, + image_shape, + anchor_boxes, + gt_boxes, + gt_classes, + gt_masks, + training=is_training) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + include_mask=[True, False], + build_anchor_boxes=[True, False], + use_cascade_heads=[True, False], + training=[True, False], + )) + def test_forward(self, strategy, include_mask, build_anchor_boxes, training, + use_cascade_heads): + num_classes = 3 + min_level = 3 + max_level = 4 + num_scales = 3 + aspect_ratios = [1.0] + anchor_size = 3 + if use_cascade_heads: + cascade_iou_thresholds = [0.6] + class_agnostic_bbox_pred = True + cascade_class_ensemble = True + else: + cascade_iou_thresholds = None + class_agnostic_bbox_pred = False + cascade_class_ensemble = False + + image_size = (256, 256) + images = np.random.rand(2, image_size[0], image_size[1], 3) + image_shape = np.array([[224, 100], [100, 224]]) + with strategy.scope(): + if build_anchor_boxes: + anchor_boxes = anchor.Anchor( + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=anchor_size, + image_size=image_size).multilevel_boxes + else: + anchor_boxes = None + num_anchors_per_location = len(aspect_ratios) * num_scales + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) + backbone = resnet.ResNet(model_id=50, input_specs=input_specs) + decoder = fpn.FPN( + min_level=min_level, + max_level=max_level, + input_specs=backbone.output_specs) + rpn_head = dense_prediction_heads.RPNHead( + min_level=min_level, + max_level=max_level, + num_anchors_per_location=num_anchors_per_location) + detection_head = instance_heads.DetectionHead( + num_classes=num_classes, + class_agnostic_bbox_pred=class_agnostic_bbox_pred) + roi_generator_obj = roi_generator.MultilevelROIGenerator() + + roi_sampler_cascade = [] + roi_sampler_obj = roi_sampler.ROISampler() + roi_sampler_cascade.append(roi_sampler_obj) + if cascade_iou_thresholds: + for iou in cascade_iou_thresholds: + roi_sampler_obj = roi_sampler.ROISampler( + mix_gt_boxes=False, + foreground_iou_threshold=iou, + background_iou_high_threshold=iou, + background_iou_low_threshold=0.0, + skip_subsampling=True) + roi_sampler_cascade.append(roi_sampler_obj) + roi_aligner_obj = roi_aligner.MultilevelROIAligner() + detection_generator_obj = detection_generator.DetectionGenerator() + if include_mask: + mask_head = instance_heads.MaskHead( + num_classes=num_classes, upsample_factor=2) + mask_sampler_obj = mask_sampler.MaskSampler( + mask_target_size=28, num_sampled_masks=1) + mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) + else: + mask_head = None + mask_sampler_obj = None + mask_roi_aligner_obj = None + model = maskrcnn_model.MaskRCNNModel( + backbone, + decoder, + rpn_head, + detection_head, + roi_generator_obj, + roi_sampler_obj, + roi_aligner_obj, + detection_generator_obj, + mask_head, + mask_sampler_obj, + mask_roi_aligner_obj, + class_agnostic_bbox_pred=class_agnostic_bbox_pred, + cascade_class_ensemble=cascade_class_ensemble, + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=anchor_size) + + gt_boxes = np.array( + [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], + [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], + dtype=np.float32) + gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) + if include_mask: + gt_masks = np.ones((2, 3, 100, 100)) + else: + gt_masks = None + + results = model( + images, + image_shape, + anchor_boxes, + gt_boxes, + gt_classes, + gt_masks, + training=training) + + self.assertIn('rpn_boxes', results) + self.assertIn('rpn_scores', results) + if training: + self.assertIn('class_targets', results) + self.assertIn('box_targets', results) + self.assertIn('class_outputs', results) + self.assertIn('box_outputs', results) + if include_mask: + self.assertIn('mask_outputs', results) + else: + self.assertIn('detection_boxes', results) + self.assertIn('detection_scores', results) + self.assertIn('detection_classes', results) + self.assertIn('num_detections', results) + if include_mask: + self.assertIn('detection_masks', results) + + @parameterized.parameters( + (False,), + (True,), + ) + def test_serialize_deserialize(self, include_mask): + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) + backbone = resnet.ResNet(model_id=50, input_specs=input_specs) + decoder = fpn.FPN( + min_level=3, max_level=7, input_specs=backbone.output_specs) + rpn_head = dense_prediction_heads.RPNHead( + min_level=3, max_level=7, num_anchors_per_location=3) + detection_head = instance_heads.DetectionHead(num_classes=2) + roi_generator_obj = roi_generator.MultilevelROIGenerator() + roi_sampler_obj = roi_sampler.ROISampler() + roi_aligner_obj = roi_aligner.MultilevelROIAligner() + detection_generator_obj = detection_generator.DetectionGenerator() + if include_mask: + mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) + mask_sampler_obj = mask_sampler.MaskSampler( + mask_target_size=28, num_sampled_masks=1) + mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) + else: + mask_head = None + mask_sampler_obj = None + mask_roi_aligner_obj = None + model = maskrcnn_model.MaskRCNNModel( + backbone, + decoder, + rpn_head, + detection_head, + roi_generator_obj, + roi_sampler_obj, + roi_aligner_obj, + detection_generator_obj, + mask_head, + mask_sampler_obj, + mask_roi_aligner_obj, + min_level=3, + max_level=7, + num_scales=3, + aspect_ratios=[1.0], + anchor_size=3) + + config = model.get_config() + new_model = maskrcnn_model.MaskRCNNModel.from_config(config) + + # Validate that the config can be forced to JSON. + _ = new_model.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(model.get_config(), new_model.get_config()) + + @parameterized.parameters( + (False,), + (True,), + ) + def test_checkpoint(self, include_mask): + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) + backbone = resnet.ResNet(model_id=50, input_specs=input_specs) + decoder = fpn.FPN( + min_level=3, max_level=7, input_specs=backbone.output_specs) + rpn_head = dense_prediction_heads.RPNHead( + min_level=3, max_level=7, num_anchors_per_location=3) + detection_head = instance_heads.DetectionHead(num_classes=2) + roi_generator_obj = roi_generator.MultilevelROIGenerator() + roi_sampler_obj = roi_sampler.ROISampler() + roi_aligner_obj = roi_aligner.MultilevelROIAligner() + detection_generator_obj = detection_generator.DetectionGenerator() + if include_mask: + mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2) + mask_sampler_obj = mask_sampler.MaskSampler( + mask_target_size=28, num_sampled_masks=1) + mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) + else: + mask_head = None + mask_sampler_obj = None + mask_roi_aligner_obj = None + model = maskrcnn_model.MaskRCNNModel( + backbone, + decoder, + rpn_head, + detection_head, + roi_generator_obj, + roi_sampler_obj, + roi_aligner_obj, + detection_generator_obj, + mask_head, + mask_sampler_obj, + mask_roi_aligner_obj, + min_level=3, + max_level=7, + num_scales=3, + aspect_ratios=[1.0], + anchor_size=3) + expect_checkpoint_items = dict( + backbone=backbone, + decoder=decoder, + rpn_head=rpn_head, + detection_head=[detection_head]) + if include_mask: + expect_checkpoint_items['mask_head'] = mask_head + self.assertAllEqual(expect_checkpoint_items, model.checkpoint_items) + + # Test save and load checkpoints. + ckpt = tf.train.Checkpoint(model=model, **model.checkpoint_items) + save_dir = self.create_tempdir().full_path + ckpt.save(os.path.join(save_dir, 'ckpt')) + + partial_ckpt = tf.train.Checkpoint(backbone=backbone) + partial_ckpt.restore(tf.train.latest_checkpoint( + save_dir)).expect_partial().assert_existing_objects_matched() + + if include_mask: + partial_ckpt_mask = tf.train.Checkpoint( + backbone=backbone, mask_head=mask_head) + partial_ckpt_mask.restore(tf.train.latest_checkpoint( + save_dir)).expect_partial().assert_existing_objects_matched() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/retinanet_model.py b/official/vision/beta/modeling/retinanet_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c052426cf7aa4be46c8520f7dc4255340f5e0cb5 --- /dev/null +++ b/official/vision/beta/modeling/retinanet_model.py @@ -0,0 +1,205 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RetinaNet.""" +from typing import Any, Mapping, List, Optional, Union + +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import anchor + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class RetinaNetModel(tf.keras.Model): + """The RetinaNet model class.""" + + def __init__(self, + backbone: tf.keras.Model, + decoder: tf.keras.Model, + head: tf.keras.layers.Layer, + detection_generator: tf.keras.layers.Layer, + min_level: Optional[int] = None, + max_level: Optional[int] = None, + num_scales: Optional[int] = None, + aspect_ratios: Optional[List[float]] = None, + anchor_size: Optional[float] = None, + **kwargs): + """Classification initialization function. + + Args: + backbone: `tf.keras.Model` a backbone network. + decoder: `tf.keras.Model` a decoder network. + head: `RetinaNetHead`, the RetinaNet head. + detection_generator: the detection generator. + min_level: Minimum level in output feature maps. + max_level: Maximum level in output feature maps. + num_scales: A number representing intermediate scales added + on each level. For instances, num_scales=2 adds one additional + intermediate anchor scales [2^0, 2^0.5] on each level. + aspect_ratios: A list representing the aspect raito + anchors added on each level. The number indicates the ratio of width to + height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors + on each scale level. + anchor_size: A number representing the scale of size of the base + anchor to the feature stride 2^level. + **kwargs: keyword arguments to be passed. + """ + super(RetinaNetModel, self).__init__(**kwargs) + self._config_dict = { + 'backbone': backbone, + 'decoder': decoder, + 'head': head, + 'detection_generator': detection_generator, + 'min_level': min_level, + 'max_level': max_level, + 'num_scales': num_scales, + 'aspect_ratios': aspect_ratios, + 'anchor_size': anchor_size, + } + self._backbone = backbone + self._decoder = decoder + self._head = head + self._detection_generator = detection_generator + + def call(self, + images: tf.Tensor, + image_shape: Optional[tf.Tensor] = None, + anchor_boxes: Optional[Mapping[str, tf.Tensor]] = None, + training: bool = None) -> Mapping[str, tf.Tensor]: + """Forward pass of the RetinaNet model. + + Args: + images: `Tensor`, the input batched images, whose shape is + [batch, height, width, 3]. + image_shape: `Tensor`, the actual shape of the input images, whose shape + is [batch, 2] where the last dimension is [height, width]. Note that + this is the actual image shape excluding paddings. For example, images + in the batch may be resized into different shapes before padding to the + fixed size. + anchor_boxes: a dict of tensors which includes multilevel anchors. + - key: `str`, the level of the multilevel predictions. + - values: `Tensor`, the anchor coordinates of a particular feature + level, whose shape is [height_l, width_l, num_anchors_per_location]. + training: `bool`, indicating whether it is in training mode. + + Returns: + scores: a dict of tensors which includes scores of the predictions. + - key: `str`, the level of the multilevel predictions. + - values: `Tensor`, the box scores predicted from a particular feature + level, whose shape is + [batch, height_l, width_l, num_classes * num_anchors_per_location]. + boxes: a dict of tensors which includes coordinates of the predictions. + - key: `str`, the level of the multilevel predictions. + - values: `Tensor`, the box coordinates predicted from a particular + feature level, whose shape is + [batch, height_l, width_l, 4 * num_anchors_per_location]. + attributes: a dict of (attribute_name, attribute_predictions). Each + attribute prediction is a dict that includes: + - key: `str`, the level of the multilevel predictions. + - values: `Tensor`, the attribute predictions from a particular + feature level, whose shape is + [batch, height_l, width_l, att_size * num_anchors_per_location]. + """ + # Feature extraction. + features = self.backbone(images) + if self.decoder: + features = self.decoder(features) + + # Dense prediction. `raw_attributes` can be empty. + raw_scores, raw_boxes, raw_attributes = self.head(features) + + if training: + outputs = { + 'cls_outputs': raw_scores, + 'box_outputs': raw_boxes, + } + if raw_attributes: + outputs.update({'att_outputs': raw_attributes}) + return outputs + else: + # Generate anchor boxes for this batch if not provided. + if anchor_boxes is None: + _, image_height, image_width, _ = images.get_shape().as_list() + anchor_boxes = anchor.Anchor( + min_level=self._config_dict['min_level'], + max_level=self._config_dict['max_level'], + num_scales=self._config_dict['num_scales'], + aspect_ratios=self._config_dict['aspect_ratios'], + anchor_size=self._config_dict['anchor_size'], + image_size=(image_height, image_width)).multilevel_boxes + for l in anchor_boxes: + anchor_boxes[l] = tf.tile( + tf.expand_dims(anchor_boxes[l], axis=0), + [tf.shape(images)[0], 1, 1, 1]) + + # Post-processing. + final_results = self.detection_generator( + raw_boxes, raw_scores, anchor_boxes, image_shape, raw_attributes) + outputs = { + 'cls_outputs': raw_scores, + 'box_outputs': raw_boxes, + } + if self.detection_generator.get_config()['apply_nms']: + outputs.update({ + 'detection_boxes': final_results['detection_boxes'], + 'detection_scores': final_results['detection_scores'], + 'detection_classes': final_results['detection_classes'], + 'num_detections': final_results['num_detections'] + }) + else: + outputs.update({ + 'decoded_boxes': final_results['decoded_boxes'], + 'decoded_box_scores': final_results['decoded_box_scores'] + }) + + if raw_attributes: + outputs.update({ + 'att_outputs': raw_attributes, + 'detection_attributes': final_results['detection_attributes'], + }) + return outputs + + @property + def checkpoint_items( + self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]: + """Returns a dictionary of items to be additionally checkpointed.""" + items = dict(backbone=self.backbone, head=self.head) + if self.decoder is not None: + items.update(decoder=self.decoder) + + return items + + @property + def backbone(self) -> tf.keras.Model: + return self._backbone + + @property + def decoder(self) -> tf.keras.Model: + return self._decoder + + @property + def head(self) -> tf.keras.layers.Layer: + return self._head + + @property + def detection_generator(self) -> tf.keras.layers.Layer: + return self._detection_generator + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/modeling/retinanet_model_test.py b/official/vision/beta/modeling/retinanet_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6c45cc6f7e2cdc8789e55596cbf1c152fadaa7f5 --- /dev/null +++ b/official/vision/beta/modeling/retinanet_model_test.py @@ -0,0 +1,294 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for RetinaNet models.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.modeling import retinanet_model +from official.vision.beta.modeling.backbones import resnet +from official.vision.beta.modeling.decoders import fpn +from official.vision.beta.modeling.heads import dense_prediction_heads +from official.vision.beta.modeling.layers import detection_generator +from official.vision.beta.ops import anchor + + +class RetinaNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + { + 'use_separable_conv': True, + 'build_anchor_boxes': True, + 'is_training': False, + 'has_att_heads': False + }, + { + 'use_separable_conv': False, + 'build_anchor_boxes': True, + 'is_training': False, + 'has_att_heads': False + }, + { + 'use_separable_conv': False, + 'build_anchor_boxes': False, + 'is_training': False, + 'has_att_heads': False + }, + { + 'use_separable_conv': False, + 'build_anchor_boxes': False, + 'is_training': True, + 'has_att_heads': False + }, + { + 'use_separable_conv': False, + 'build_anchor_boxes': True, + 'is_training': True, + 'has_att_heads': True + }, + { + 'use_separable_conv': False, + 'build_anchor_boxes': True, + 'is_training': False, + 'has_att_heads': True + }, + ) + def test_build_model(self, use_separable_conv, build_anchor_boxes, + is_training, has_att_heads): + num_classes = 3 + min_level = 3 + max_level = 7 + num_scales = 3 + aspect_ratios = [1.0] + anchor_size = 3 + fpn_num_filters = 256 + head_num_convs = 4 + head_num_filters = 256 + num_anchors_per_location = num_scales * len(aspect_ratios) + image_size = 384 + images = np.random.rand(2, image_size, image_size, 3) + image_shape = np.array([[image_size, image_size], [image_size, image_size]]) + + if build_anchor_boxes: + anchor_boxes = anchor.Anchor( + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=anchor_size, + image_size=(image_size, image_size)).multilevel_boxes + for l in anchor_boxes: + anchor_boxes[l] = tf.tile( + tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) + else: + anchor_boxes = None + + if has_att_heads: + attribute_heads = [dict(name='depth', type='regression', size=1)] + else: + attribute_heads = None + + backbone = resnet.ResNet(model_id=50) + decoder = fpn.FPN( + input_specs=backbone.output_specs, + min_level=min_level, + max_level=max_level, + num_filters=fpn_num_filters, + use_separable_conv=use_separable_conv) + head = dense_prediction_heads.RetinaNetHead( + min_level=min_level, + max_level=max_level, + num_classes=num_classes, + attribute_heads=attribute_heads, + num_anchors_per_location=num_anchors_per_location, + use_separable_conv=use_separable_conv, + num_convs=head_num_convs, + num_filters=head_num_filters) + generator = detection_generator.MultilevelDetectionGenerator( + max_num_detections=10) + model = retinanet_model.RetinaNetModel( + backbone=backbone, + decoder=decoder, + head=head, + detection_generator=generator, + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=anchor_size) + + _ = model(images, image_shape, anchor_boxes, training=is_training) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + image_size=[ + (128, 128), + ], + training=[True, False], + has_att_heads=[True, False], + )) + def test_forward(self, strategy, image_size, training, has_att_heads): + """Test for creation of a R50-FPN RetinaNet.""" + tf.keras.backend.set_image_data_format('channels_last') + num_classes = 3 + min_level = 3 + max_level = 7 + num_scales = 3 + aspect_ratios = [1.0] + num_anchors_per_location = num_scales * len(aspect_ratios) + + images = np.random.rand(2, image_size[0], image_size[1], 3) + image_shape = np.array( + [[image_size[0], image_size[1]], [image_size[0], image_size[1]]]) + + with strategy.scope(): + anchor_gen = anchor.build_anchor_generator( + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=3) + anchor_boxes = anchor_gen(image_size) + for l in anchor_boxes: + anchor_boxes[l] = tf.tile( + tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) + + backbone = resnet.ResNet(model_id=50) + decoder = fpn.FPN( + input_specs=backbone.output_specs, + min_level=min_level, + max_level=max_level) + + if has_att_heads: + attribute_heads = [dict(name='depth', type='regression', size=1)] + else: + attribute_heads = None + head = dense_prediction_heads.RetinaNetHead( + min_level=min_level, + max_level=max_level, + num_classes=num_classes, + attribute_heads=attribute_heads, + num_anchors_per_location=num_anchors_per_location) + generator = detection_generator.MultilevelDetectionGenerator( + max_num_detections=10) + model = retinanet_model.RetinaNetModel( + backbone=backbone, + decoder=decoder, + head=head, + detection_generator=generator) + + model_outputs = model( + images, + image_shape, + anchor_boxes, + training=training) + + if training: + cls_outputs = model_outputs['cls_outputs'] + box_outputs = model_outputs['box_outputs'] + for level in range(min_level, max_level + 1): + self.assertIn(str(level), cls_outputs) + self.assertIn(str(level), box_outputs) + self.assertAllEqual([ + 2, + image_size[0] // 2**level, + image_size[1] // 2**level, + num_classes * num_anchors_per_location + ], cls_outputs[str(level)].numpy().shape) + self.assertAllEqual([ + 2, + image_size[0] // 2**level, + image_size[1] // 2**level, + 4 * num_anchors_per_location + ], box_outputs[str(level)].numpy().shape) + if has_att_heads: + att_outputs = model_outputs['att_outputs'] + for att in att_outputs.values(): + self.assertAllEqual([ + 2, image_size[0] // 2**level, image_size[1] // 2**level, + 1 * num_anchors_per_location + ], att[str(level)].numpy().shape) + else: + self.assertIn('detection_boxes', model_outputs) + self.assertIn('detection_scores', model_outputs) + self.assertIn('detection_classes', model_outputs) + self.assertIn('num_detections', model_outputs) + self.assertAllEqual( + [2, 10, 4], model_outputs['detection_boxes'].numpy().shape) + self.assertAllEqual( + [2, 10], model_outputs['detection_scores'].numpy().shape) + self.assertAllEqual( + [2, 10], model_outputs['detection_classes'].numpy().shape) + self.assertAllEqual( + [2,], model_outputs['num_detections'].numpy().shape) + if has_att_heads: + self.assertIn('detection_attributes', model_outputs) + self.assertAllEqual( + [2, 10, 1], + model_outputs['detection_attributes']['depth'].numpy().shape) + + def test_serialize_deserialize(self): + """Validate the network can be serialized and deserialized.""" + num_classes = 3 + min_level = 3 + max_level = 7 + num_scales = 3 + aspect_ratios = [1.0] + num_anchors_per_location = num_scales * len(aspect_ratios) + + backbone = resnet.ResNet(model_id=50) + decoder = fpn.FPN( + input_specs=backbone.output_specs, + min_level=min_level, + max_level=max_level) + head = dense_prediction_heads.RetinaNetHead( + min_level=min_level, + max_level=max_level, + num_classes=num_classes, + num_anchors_per_location=num_anchors_per_location) + generator = detection_generator.MultilevelDetectionGenerator( + max_num_detections=10) + model = retinanet_model.RetinaNetModel( + backbone=backbone, + decoder=decoder, + head=head, + detection_generator=generator, + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=3) + + config = model.get_config() + new_model = retinanet_model.RetinaNetModel.from_config(config) + + # Validate that the config can be forced to JSON. + _ = new_model.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(model.get_config(), new_model.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/segmentation_model.py b/official/vision/beta/modeling/segmentation_model.py new file mode 100644 index 0000000000000000000000000000000000000000..f21f89104f3c7b0532b638879ee131e9d4ccba40 --- /dev/null +++ b/official/vision/beta/modeling/segmentation_model.py @@ -0,0 +1,81 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build segmentation models.""" +from typing import Any, Mapping, Union + +# Import libraries +import tensorflow as tf + +layers = tf.keras.layers + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class SegmentationModel(tf.keras.Model): + """A Segmentation class model. + + Input images are passed through backbone first. Decoder network is then + applied, and finally, segmentation head is applied on the output of the + decoder network. Layers such as ASPP should be part of decoder. Any feature + fusion is done as part of the segmentation head (i.e. deeplabv3+ feature + fusion is not part of the decoder, instead it is part of the segmentation + head). This way, different feature fusion techniques can be combined with + different backbones, and decoders. + """ + + def __init__(self, backbone: tf.keras.Model, decoder: tf.keras.Model, + head: tf.keras.layers.Layer, **kwargs): + """Segmentation initialization function. + + Args: + backbone: a backbone network. + decoder: a decoder network. E.g. FPN. + head: segmentation head. + **kwargs: keyword arguments to be passed. + """ + super(SegmentationModel, self).__init__(**kwargs) + self._config_dict = { + 'backbone': backbone, + 'decoder': decoder, + 'head': head, + } + self.backbone = backbone + self.decoder = decoder + self.head = head + + def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor: + backbone_features = self.backbone(inputs) + + if self.decoder: + decoder_features = self.decoder(backbone_features) + else: + decoder_features = backbone_features + + return self.head(backbone_features, decoder_features) + + @property + def checkpoint_items( + self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]: + """Returns a dictionary of items to be additionally checkpointed.""" + items = dict(backbone=self.backbone, head=self.head) + if self.decoder is not None: + items.update(decoder=self.decoder) + return items + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/modeling/segmentation_model_test.py b/official/vision/beta/modeling/segmentation_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..dd2cdfb27b211eb0a6403729dfcbd5a187245521 --- /dev/null +++ b/official/vision/beta/modeling/segmentation_model_test.py @@ -0,0 +1,85 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for segmentation network.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling import segmentation_model +from official.vision.beta.modeling.decoders import fpn +from official.vision.beta.modeling.heads import segmentation_heads + + +class SegmentationNetworkTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 2), + (128, 3), + (128, 4), + (256, 2), + (256, 3), + (256, 4), + ) + def test_segmentation_network_creation( + self, input_size, level): + """Test for creation of a segmentation network.""" + num_classes = 10 + inputs = np.random.rand(2, input_size, input_size, 3) + tf.keras.backend.set_image_data_format('channels_last') + backbone = backbones.ResNet(model_id=50) + + decoder = fpn.FPN( + input_specs=backbone.output_specs, min_level=2, max_level=7) + head = segmentation_heads.SegmentationHead(num_classes, level=level) + + model = segmentation_model.SegmentationModel( + backbone=backbone, + decoder=decoder, + head=head + ) + + logits = model(inputs) + self.assertAllEqual( + [2, input_size // (2**level), input_size // (2**level), num_classes], + logits.numpy().shape) + + def test_serialize_deserialize(self): + """Validate the network can be serialized and deserialized.""" + num_classes = 3 + backbone = backbones.ResNet(model_id=50) + decoder = fpn.FPN( + input_specs=backbone.output_specs, min_level=3, max_level=7) + head = segmentation_heads.SegmentationHead(num_classes, level=3) + model = segmentation_model.SegmentationModel( + backbone=backbone, + decoder=decoder, + head=head + ) + + config = model.get_config() + new_model = segmentation_model.SegmentationModel.from_config(config) + + # Validate that the config can be forced to JSON. + _ = new_model.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(model.get_config(), new_model.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/modeling/video_classification_model.py b/official/vision/beta/modeling/video_classification_model.py new file mode 100644 index 0000000000000000000000000000000000000000..34a2edeca0a0b4c7040c181f2d974e4c2eb10835 --- /dev/null +++ b/official/vision/beta/modeling/video_classification_model.py @@ -0,0 +1,113 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Build video classification models.""" +from typing import Any, Mapping, Optional, Union +import tensorflow as tf + +layers = tf.keras.layers + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class VideoClassificationModel(tf.keras.Model): + """A video classification class builder.""" + + def __init__( + self, + backbone: tf.keras.Model, + num_classes: int, + input_specs: Mapping[str, tf.keras.layers.InputSpec] = None, + dropout_rate: float = 0.0, + aggregate_endpoints: bool = False, + kernel_initializer: str = 'random_uniform', + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, + **kwargs): + """Video Classification initialization function. + + Args: + backbone: a 3d backbone network. + num_classes: `int` number of classes in classification task. + input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. + dropout_rate: `float` rate for dropout regularization. + aggregate_endpoints: `bool` aggregate all end ponits or only use the + final end point. + kernel_initializer: kernel initializer for the dense layer. + kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to + None. + bias_regularizer: tf.keras.regularizers.Regularizer object. Default to + None. + **kwargs: keyword arguments to be passed. + """ + if not input_specs: + input_specs = { + 'image': layers.InputSpec(shape=[None, None, None, None, 3]) + } + self._self_setattr_tracking = False + self._config_dict = { + 'backbone': backbone, + 'num_classes': num_classes, + 'input_specs': input_specs, + 'dropout_rate': dropout_rate, + 'aggregate_endpoints': aggregate_endpoints, + 'kernel_initializer': kernel_initializer, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + } + self._input_specs = input_specs + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._backbone = backbone + + inputs = { + k: tf.keras.Input(shape=v.shape[1:]) for k, v in input_specs.items() + } + endpoints = backbone(inputs['image']) + + if aggregate_endpoints: + pooled_feats = [] + for endpoint in endpoints.values(): + x_pool = tf.keras.layers.GlobalAveragePooling3D()(endpoint) + pooled_feats.append(x_pool) + x = tf.concat(pooled_feats, axis=1) + else: + x = endpoints[max(endpoints.keys())] + x = tf.keras.layers.GlobalAveragePooling3D()(x) + + x = tf.keras.layers.Dropout(dropout_rate)(x) + x = tf.keras.layers.Dense( + num_classes, kernel_initializer=kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer)( + x) + + super(VideoClassificationModel, self).__init__( + inputs=inputs, outputs=x, **kwargs) + + @property + def checkpoint_items( + self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]: + """Returns a dictionary of items to be additionally checkpointed.""" + return dict(backbone=self.backbone) + + @property + def backbone(self) -> tf.keras.Model: + return self._backbone + + def get_config(self) -> Mapping[str, Any]: + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/modeling/video_classification_model_test.py b/official/vision/beta/modeling/video_classification_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7b06cf83bf19529c9d4378865ba323cfa892e708 --- /dev/null +++ b/official/vision/beta/modeling/video_classification_model_test.py @@ -0,0 +1,92 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for video classification network.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling import video_classification_model + + +class VideoClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (50, 8, 112, 'relu', False), + (50, 8, 112, 'swish', True), + ) + def test_resnet3d_network_creation(self, model_id, temporal_size, + spatial_size, activation, + aggregate_endpoints): + """Test for creation of a ResNet3D-50 classifier.""" + input_specs = tf.keras.layers.InputSpec( + shape=[None, temporal_size, spatial_size, spatial_size, 3]) + temporal_strides = [1, 1, 1, 1] + temporal_kernel_sizes = [(3, 3, 3), (3, 1, 3, 1), (3, 1, 3, 1, 3, 1), + (1, 3, 1)] + + tf.keras.backend.set_image_data_format('channels_last') + + backbone = backbones.ResNet3D( + model_id=model_id, + temporal_strides=temporal_strides, + temporal_kernel_sizes=temporal_kernel_sizes, + input_specs=input_specs, + activation=activation) + + num_classes = 1000 + model = video_classification_model.VideoClassificationModel( + backbone=backbone, + num_classes=num_classes, + input_specs={'image': input_specs}, + dropout_rate=0.2, + aggregate_endpoints=aggregate_endpoints, + ) + + inputs = np.random.rand(2, temporal_size, spatial_size, spatial_size, 3) + logits = model(inputs) + self.assertAllEqual([2, num_classes], logits.numpy().shape) + + def test_serialize_deserialize(self): + """Validate the classification network can be serialized and deserialized.""" + model_id = 50 + temporal_strides = [1, 1, 1, 1] + temporal_kernel_sizes = [(3, 3, 3), (3, 1, 3, 1), (3, 1, 3, 1, 3, 1), + (1, 3, 1)] + + backbone = backbones.ResNet3D( + model_id=model_id, + temporal_strides=temporal_strides, + temporal_kernel_sizes=temporal_kernel_sizes) + + model = video_classification_model.VideoClassificationModel( + backbone=backbone, num_classes=1000) + + config = model.get_config() + new_model = video_classification_model.VideoClassificationModel.from_config( + config) + + # Validate that the config can be forced to JSON. + _ = new_model.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(model.get_config(), new_model.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/ops/__init__.py b/official/vision/beta/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/ops/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/ops/anchor.py b/official/vision/beta/ops/anchor.py new file mode 100644 index 0000000000000000000000000000000000000000..330524ce870a54272024b2a7da227a649c3b15c3 --- /dev/null +++ b/official/vision/beta/ops/anchor.py @@ -0,0 +1,343 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Anchor box and labeler definition.""" + +import collections +# Import libraries +import tensorflow as tf +from official.vision import keras_cv +from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler +from official.vision.detection.utils.object_detection import box_list +from official.vision.detection.utils.object_detection import faster_rcnn_box_coder + + +class Anchor(object): + """Anchor class for anchor-based object detectors.""" + + def __init__(self, + min_level, + max_level, + num_scales, + aspect_ratios, + anchor_size, + image_size): + """Constructs multiscale anchors. + + Args: + min_level: integer number of minimum level of the output feature pyramid. + max_level: integer number of maximum level of the output feature pyramid. + num_scales: integer number representing intermediate scales added + on each level. For instances, num_scales=2 adds one additional + intermediate anchor scales [2^0, 2^0.5] on each level. + aspect_ratios: list of float numbers representing the aspect raito anchors + added on each level. The number indicates the ratio of width to height. + For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each + scale level. + anchor_size: float number representing the scale of size of the base + anchor to the feature stride 2^level. + image_size: a list of integer numbers or Tensors representing + [height, width] of the input image size.The image_size should be divided + by the largest feature stride 2^max_level. + """ + self.min_level = min_level + self.max_level = max_level + self.num_scales = num_scales + self.aspect_ratios = aspect_ratios + self.anchor_size = anchor_size + self.image_size = image_size + self.boxes = self._generate_boxes() + + def _generate_boxes(self): + """Generates multiscale anchor boxes. + + Returns: + a Tensor of shape [N, 4], representing anchor boxes of all levels + concatenated together. + """ + boxes_all = [] + for level in range(self.min_level, self.max_level + 1): + boxes_l = [] + for scale in range(self.num_scales): + for aspect_ratio in self.aspect_ratios: + stride = 2 ** level + intermidate_scale = 2 ** (scale / float(self.num_scales)) + base_anchor_size = self.anchor_size * stride * intermidate_scale + aspect_x = aspect_ratio ** 0.5 + aspect_y = aspect_ratio ** -0.5 + half_anchor_size_x = base_anchor_size * aspect_x / 2.0 + half_anchor_size_y = base_anchor_size * aspect_y / 2.0 + x = tf.range(stride / 2, self.image_size[1], stride) + y = tf.range(stride / 2, self.image_size[0], stride) + xv, yv = tf.meshgrid(x, y) + xv = tf.cast(tf.reshape(xv, [-1]), dtype=tf.float32) + yv = tf.cast(tf.reshape(yv, [-1]), dtype=tf.float32) + # Tensor shape Nx4. + boxes = tf.stack([yv - half_anchor_size_y, xv - half_anchor_size_x, + yv + half_anchor_size_y, xv + half_anchor_size_x], + axis=1) + boxes_l.append(boxes) + # Concat anchors on the same level to tensor shape NxAx4. + boxes_l = tf.stack(boxes_l, axis=1) + boxes_l = tf.reshape(boxes_l, [-1, 4]) + boxes_all.append(boxes_l) + return tf.concat(boxes_all, axis=0) + + def unpack_labels(self, labels): + """Unpacks an array of labels into multiscales labels.""" + unpacked_labels = collections.OrderedDict() + count = 0 + for level in range(self.min_level, self.max_level + 1): + feat_size_y = tf.cast(self.image_size[0] / 2 ** level, tf.int32) + feat_size_x = tf.cast(self.image_size[1] / 2 ** level, tf.int32) + steps = feat_size_y * feat_size_x * self.anchors_per_location + unpacked_labels[str(level)] = tf.reshape( + labels[count:count + steps], [feat_size_y, feat_size_x, -1]) + count += steps + return unpacked_labels + + @property + def anchors_per_location(self): + return self.num_scales * len(self.aspect_ratios) + + @property + def multilevel_boxes(self): + return self.unpack_labels(self.boxes) + + +class AnchorLabeler(object): + """Labeler for dense object detector.""" + + def __init__(self, + match_threshold=0.5, + unmatched_threshold=0.5): + """Constructs anchor labeler to assign labels to anchors. + + Args: + match_threshold: a float number between 0 and 1 representing the + lower-bound threshold to assign positive labels for anchors. An anchor + with a score over the threshold is labeled positive. + unmatched_threshold: a float number between 0 and 1 representing the + upper-bound threshold to assign negative labels for anchors. An anchor + with a score below the threshold is labeled negative. + """ + self.similarity_calc = keras_cv.ops.IouSimilarity() + self.target_gather = keras_cv.ops.TargetGather() + self.matcher = keras_cv.ops.BoxMatcher( + thresholds=[unmatched_threshold, match_threshold], + indicators=[-1, -2, 1], + force_match_for_each_col=True) + self.box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() + + def label_anchors(self, anchor_boxes, gt_boxes, gt_labels): + """Labels anchors with ground truth inputs. + + Args: + anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes. + For each row, it stores [y0, x0, y1, x1] for four corners of a box. + gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. + For each row, it stores [y0, x0, y1, x1] for four corners of a box. + gt_labels: A integer tensor with shape [N, 1] representing groundtruth + classes. + Returns: + cls_targets_dict: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, num_anchors_per_location]. The height_l and + width_l represent the dimension of class logits at l-th level. + box_targets_dict: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, num_anchors_per_location * 4]. The height_l + and width_l represent the dimension of bounding box regression output at + l-th level. + cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that + serves as masking / sample weight for classification loss. Its value + is 1.0 for positive and negative matched anchors, and 0.0 for ignored + anchors. + box_weights: A flattened Tensor with shape [batch_size, num_anchors], that + serves as masking / sample weight for regression loss. Its value is + 1.0 for positive matched anchors, and 0.0 for negative and ignored + anchors. + """ + flattened_anchor_boxes = [] + for anchors in anchor_boxes.values(): + flattened_anchor_boxes.append(tf.reshape(anchors, [-1, 4])) + flattened_anchor_boxes = tf.concat(flattened_anchor_boxes, axis=0) + similarity_matrix = self.similarity_calc(flattened_anchor_boxes, gt_boxes) + match_indices, match_indicators = self.matcher(similarity_matrix) + mask = tf.less_equal(match_indicators, 0) + cls_mask = tf.expand_dims(mask, -1) + cls_targets = self.target_gather(gt_labels, match_indices, cls_mask, -1) + box_mask = tf.tile(cls_mask, [1, 4]) + box_targets = self.target_gather(gt_boxes, match_indices, box_mask) + weights = tf.squeeze(tf.ones_like(gt_labels, dtype=tf.float32), -1) + box_weights = self.target_gather(weights, match_indices, mask) + ignore_mask = tf.equal(match_indicators, -2) + cls_weights = self.target_gather(weights, match_indices, ignore_mask) + box_targets_list = box_list.BoxList(box_targets) + anchor_box_list = box_list.BoxList(flattened_anchor_boxes) + box_targets = self.box_coder.encode(box_targets_list, anchor_box_list) + + # Unpacks labels into multi-level representations. + cls_targets_dict = unpack_targets(cls_targets, anchor_boxes) + box_targets_dict = unpack_targets(box_targets, anchor_boxes) + + return cls_targets_dict, box_targets_dict, cls_weights, box_weights + + +class RpnAnchorLabeler(AnchorLabeler): + """Labeler for Region Proposal Network.""" + + def __init__(self, + match_threshold=0.7, + unmatched_threshold=0.3, + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5): + AnchorLabeler.__init__(self, match_threshold=match_threshold, + unmatched_threshold=unmatched_threshold) + self._rpn_batch_size_per_im = rpn_batch_size_per_im + self._rpn_fg_fraction = rpn_fg_fraction + + def _get_rpn_samples(self, match_results): + """Computes anchor labels. + + This function performs subsampling for foreground (fg) and background (bg) + anchors. + Args: + match_results: A integer tensor with shape [N] representing the + matching results of anchors. (1) match_results[i]>=0, + meaning that column i is matched with row match_results[i]. + (2) match_results[i]=-1, meaning that column i is not matched. + (3) match_results[i]=-2, meaning that column i is ignored. + Returns: + score_targets: a integer tensor with the a shape of [N]. + (1) score_targets[i]=1, the anchor is a positive sample. + (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is + don't care (ignore). + """ + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( + positive_fraction=self._rpn_fg_fraction, is_static=False)) + # indicator includes both positive and negative labels. + # labels includes only positives labels. + # positives = indicator & labels. + # negatives = indicator & !labels. + # ignore = !indicator. + indicator = tf.greater(match_results, -2) + labels = tf.greater(match_results, -1) + + samples = sampler.subsample( + indicator, self._rpn_batch_size_per_im, labels) + positive_labels = tf.where( + tf.logical_and(samples, labels), + tf.constant(2, dtype=tf.int32, shape=match_results.shape), + tf.constant(0, dtype=tf.int32, shape=match_results.shape)) + negative_labels = tf.where( + tf.logical_and(samples, tf.logical_not(labels)), + tf.constant(1, dtype=tf.int32, shape=match_results.shape), + tf.constant(0, dtype=tf.int32, shape=match_results.shape)) + ignore_labels = tf.fill(match_results.shape, -1) + + return (ignore_labels + positive_labels + negative_labels, + positive_labels, negative_labels) + + def label_anchors(self, anchor_boxes, gt_boxes, gt_labels): + """Labels anchors with ground truth inputs. + + Args: + anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes. + For each row, it stores [y0, x0, y1, x1] for four corners of a box. + gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. + For each row, it stores [y0, x0, y1, x1] for four corners of a box. + gt_labels: A integer tensor with shape [N, 1] representing groundtruth + classes. + Returns: + score_targets_dict: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, num_anchors]. The height_l and width_l + represent the dimension of class logits at l-th level. + box_targets_dict: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, num_anchors * 4]. The height_l and + width_l represent the dimension of bounding box regression output at + l-th level. + """ + flattened_anchor_boxes = [] + for anchors in anchor_boxes.values(): + flattened_anchor_boxes.append(tf.reshape(anchors, [-1, 4])) + flattened_anchor_boxes = tf.concat(flattened_anchor_boxes, axis=0) + similarity_matrix = self.similarity_calc(flattened_anchor_boxes, gt_boxes) + match_indices, match_indicators = self.matcher(similarity_matrix) + box_mask = tf.tile(tf.expand_dims(tf.less_equal(match_indicators, 0), -1), + [1, 4]) + box_targets = self.target_gather(gt_boxes, match_indices, box_mask) + box_targets_list = box_list.BoxList(box_targets) + anchor_box_list = box_list.BoxList(flattened_anchor_boxes) + box_targets = self.box_coder.encode(box_targets_list, anchor_box_list) + + # Zero out the unmatched and ignored regression targets. + num_matches = match_indices.shape.as_list()[0] or tf.shape(match_indices)[0] + unmatched_ignored_box_targets = tf.zeros([num_matches, 4], dtype=tf.float32) + matched_anchors_mask = tf.greater_equal(match_indicators, 0) + # To broadcast matched_anchors_mask to the same shape as + # matched_reg_targets. + matched_anchors_mask = tf.tile( + tf.expand_dims(matched_anchors_mask, 1), + [1, tf.shape(box_targets)[1]]) + box_targets = tf.where(matched_anchors_mask, box_targets, + unmatched_ignored_box_targets) + + # score_targets contains the subsampled positive and negative anchors. + score_targets, _, _ = self._get_rpn_samples(match_indicators) + + # Unpacks labels. + score_targets_dict = unpack_targets(score_targets, anchor_boxes) + box_targets_dict = unpack_targets(box_targets, anchor_boxes) + + return score_targets_dict, box_targets_dict + + +def build_anchor_generator(min_level, max_level, num_scales, aspect_ratios, + anchor_size): + """Build anchor generator from levels.""" + anchor_sizes = collections.OrderedDict() + strides = collections.OrderedDict() + scales = [] + for scale in range(num_scales): + scales.append(2**(scale / float(num_scales))) + for level in range(min_level, max_level + 1): + stride = 2**level + strides[str(level)] = stride + anchor_sizes[str(level)] = anchor_size * stride + anchor_gen = keras_cv.ops.AnchorGenerator( + anchor_sizes=anchor_sizes, + scales=scales, + aspect_ratios=aspect_ratios, + strides=strides) + return anchor_gen + + +def unpack_targets(targets, anchor_boxes_dict): + """Unpacks an array of labels into multiscales labels.""" + unpacked_targets = collections.OrderedDict() + count = 0 + for level, anchor_boxes in anchor_boxes_dict.items(): + feat_size_shape = anchor_boxes.shape.as_list() + feat_size_y = feat_size_shape[0] + feat_size_x = feat_size_shape[1] + anchors_per_location = int(feat_size_shape[2] / 4) + steps = feat_size_y * feat_size_x * anchors_per_location + unpacked_targets[level] = tf.reshape(targets[count:count + steps], + [feat_size_y, feat_size_x, -1]) + count += steps + return unpacked_targets diff --git a/official/vision/beta/ops/anchor_test.py b/official/vision/beta/ops/anchor_test.py new file mode 100644 index 0000000000000000000000000000000000000000..777dd976bbf904f8691241bdb8b27c84f86d2eb8 --- /dev/null +++ b/official/vision/beta/ops/anchor_test.py @@ -0,0 +1,168 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for anchor.py.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from official.vision.beta.ops import anchor + + +class AnchorTest(parameterized.TestCase, tf.test.TestCase): + + # The set of parameters are tailored for the MLPerf configuration, where + # the number of anchors is 495132, rpn_batch_size_per_im=256, and + # rpn_fg_fraction=0.5. + @parameterized.parameters( + (512, 25, 25, 25, 25, (512, 512)), + (512, 25, 25, 25, 25, (512, 640)), + (512, 25, 25, 25, 25, (640, 512)), + (495132, 100, 100, 100, 100, (512, 512)), + (495132, 200, 100, 128, 100, (512, 512)), + (495132, 100, 120, 100, 120, (512, 512)), + (495132, 100, 200, 100, 156, (512, 512)), + (495132, 200, 200, 128, 128, (512, 512)), + ) + def testAnchorRpnSample(self, num_anchors, num_positives, + num_negatives, expected_positives, + expected_negatives, image_size): + match_results_np = np.empty([num_anchors]) + match_results_np.fill(-2) + match_results_np[:num_positives] = 0 + match_results_np[num_positives:num_positives + num_negatives] = -1 + match_results = tf.convert_to_tensor(value=match_results_np, dtype=tf.int32) + anchor_labeler = anchor.RpnAnchorLabeler( + match_threshold=0.7, + unmatched_threshold=0.3, + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5) + rpn_sample_op = anchor_labeler._get_rpn_samples(match_results) + labels = [v.numpy() for v in rpn_sample_op] + self.assertLen(labels[0], num_anchors) + positives = np.sum(np.array(labels[0]) == 1) + negatives = np.sum(np.array(labels[0]) == 0) + self.assertEqual(positives, expected_positives) + self.assertEqual(negatives, expected_negatives) + + @parameterized.parameters( + # Single scale anchor. + (5, 5, 1, [1.0], 2.0, + [[-16, -16, 48, 48], [-16, 16, 48, 80], + [16, -16, 80, 48], [16, 16, 80, 80]]), + # Multi scale anchor. + (5, 6, 1, [1.0], 2.0, + [[-16, -16, 48, 48], [-16, 16, 48, 80], + [16, -16, 80, 48], [16, 16, 80, 80], [-32, -32, 96, 96]]), + # # Multi aspect ratio anchor. + (6, 6, 1, [1.0, 4.0, 0.25], 2.0, + [[-32, -32, 96, 96], [-0, -96, 64, 160], [-96, -0, 160, 64]]), + + ) + def testAnchorGeneration(self, min_level, max_level, num_scales, + aspect_ratios, anchor_size, expected_boxes): + image_size = [64, 64] + anchors = anchor.Anchor(min_level, max_level, num_scales, aspect_ratios, + anchor_size, image_size) + boxes = anchors.boxes.numpy() + self.assertEqual(expected_boxes, boxes.tolist()) + + @parameterized.parameters( + # Single scale anchor. + (5, 5, 1, [1.0], 2.0, + [[-16, -16, 48, 48], [-16, 16, 48, 80], + [16, -16, 80, 48], [16, 16, 80, 80]]), + # Multi scale anchor. + (5, 6, 1, [1.0], 2.0, + [[-16, -16, 48, 48], [-16, 16, 48, 80], + [16, -16, 80, 48], [16, 16, 80, 80], [-32, -32, 96, 96]]), + # # Multi aspect ratio anchor. + (6, 6, 1, [1.0, 4.0, 0.25], 2.0, + [[-32, -32, 96, 96], [-0, -96, 64, 160], [-96, -0, 160, 64]]), + + ) + def testAnchorGenerationWithImageSizeAsTensor(self, + min_level, + max_level, + num_scales, + aspect_ratios, + anchor_size, + expected_boxes): + image_size = tf.constant([64, 64], tf.int32) + anchors = anchor.Anchor(min_level, max_level, num_scales, aspect_ratios, + anchor_size, image_size) + boxes = anchors.boxes.numpy() + self.assertEqual(expected_boxes, boxes.tolist()) + + @parameterized.parameters( + (3, 6, 2, [1.0], 2.0), + ) + def testLabelAnchors(self, min_level, max_level, num_scales, + aspect_ratios, anchor_size): + input_size = [512, 512] + ground_truth_class_id = 2 + + # The matched anchors are the anchors used as ground truth and the anchors + # at the next octave scale on the same location. + expected_anchor_locations = [[0, 0, 0], [0, 0, 1]] + anchor_gen = anchor.build_anchor_generator(min_level, max_level, num_scales, + aspect_ratios, anchor_size) + anchor_boxes = anchor_gen(input_size) + anchor_labeler = anchor.AnchorLabeler() + + # Uses the first anchors as ground truth. The ground truth should map to + # two anchors with two intermediate scales at the same location. + gt_boxes = anchor_boxes['3'][0:1, 0, 0:4] + gt_classes = tf.constant([[ground_truth_class_id]], dtype=tf.float32) + (cls_targets, box_targets, _, + box_weights) = anchor_labeler.label_anchors( + anchor_boxes, gt_boxes, gt_classes) + + for k, v in cls_targets.items(): + cls_targets[k] = v.numpy() + for k, v in box_targets.items(): + box_targets[k] = v.numpy() + box_weights = box_weights.numpy() + + anchor_locations = np.vstack( + np.where(cls_targets[str(min_level)] > -1)).transpose() + self.assertAllClose(expected_anchor_locations, anchor_locations) + # Two anchor boxes on min_level got matched to the gt_boxes. + self.assertAllClose(tf.reduce_sum(box_weights), 2) + + @parameterized.parameters( + (3, 7, [.5, 1., 2.], 2, 8, (256, 256)), + (3, 8, [1.], 3, 32, (512, 512)), + (3, 3, [1.], 2, 4, (32, 32)), + ) + def testEquivalentResult(self, min_level, max_level, aspect_ratios, + num_scales, anchor_size, image_size): + anchor_gen = anchor.build_anchor_generator( + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=anchor_size) + anchors = anchor_gen(image_size) + expected_anchor_gen = anchor.Anchor(min_level, max_level, num_scales, + aspect_ratios, anchor_size, image_size) + + expected_anchors = expected_anchor_gen.multilevel_boxes + for k in expected_anchors.keys(): + self.assertAllClose(expected_anchors[k], anchors[k]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/ops/augment.py b/official/vision/beta/ops/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..84a7e1e1a67f8fb26439a6367e52b8832ffb7a61 --- /dev/null +++ b/official/vision/beta/ops/augment.py @@ -0,0 +1,1256 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoAugment and RandAugment policies for enhanced image/video preprocessing. + +AutoAugment Reference: https://arxiv.org/abs/1805.09501 +RandAugment Reference: https://arxiv.org/abs/1909.13719 +""" +import math +from typing import Any, List, Iterable, Optional, Text, Tuple + +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras.layers.preprocessing import image_preprocessing as image_ops + + +# This signifies the max integer that the controller RNN could predict for the +# augmentation scheme. +_MAX_LEVEL = 10. + + +def to_4d(image: tf.Tensor) -> tf.Tensor: + """Converts an input Tensor to 4 dimensions. + + 4D image => [N, H, W, C] or [N, C, H, W] + 3D image => [1, H, W, C] or [1, C, H, W] + 2D image => [1, H, W, 1] + + Args: + image: The 2/3/4D input tensor. + + Returns: + A 4D image tensor. + + Raises: + `TypeError` if `image` is not a 2/3/4D tensor. + + """ + shape = tf.shape(image) + original_rank = tf.rank(image) + left_pad = tf.cast(tf.less_equal(original_rank, 3), dtype=tf.int32) + right_pad = tf.cast(tf.equal(original_rank, 2), dtype=tf.int32) + new_shape = tf.concat( + [ + tf.ones(shape=left_pad, dtype=tf.int32), + shape, + tf.ones(shape=right_pad, dtype=tf.int32), + ], + axis=0, + ) + return tf.reshape(image, new_shape) + + +def from_4d(image: tf.Tensor, ndims: tf.Tensor) -> tf.Tensor: + """Converts a 4D image back to `ndims` rank.""" + shape = tf.shape(image) + begin = tf.cast(tf.less_equal(ndims, 3), dtype=tf.int32) + end = 4 - tf.cast(tf.equal(ndims, 2), dtype=tf.int32) + new_shape = shape[begin:end] + return tf.reshape(image, new_shape) + + +def _convert_translation_to_transform(translations: tf.Tensor) -> tf.Tensor: + """Converts translations to a projective transform. + + The translation matrix looks like this: + [[1 0 -dx] + [0 1 -dy] + [0 0 1]] + + Args: + translations: The 2-element list representing [dx, dy], or a matrix of + 2-element lists representing [dx dy] to translate for each image. The + shape must be static. + + Returns: + The transformation matrix of shape (num_images, 8). + + Raises: + `TypeError` if + - the shape of `translations` is not known or + - the shape of `translations` is not rank 1 or 2. + + """ + translations = tf.convert_to_tensor(translations, dtype=tf.float32) + if translations.get_shape().ndims is None: + raise TypeError('translations rank must be statically known') + elif len(translations.get_shape()) == 1: + translations = translations[None] + elif len(translations.get_shape()) != 2: + raise TypeError('translations should have rank 1 or 2.') + num_translations = tf.shape(translations)[0] + + return tf.concat( + values=[ + tf.ones((num_translations, 1), tf.dtypes.float32), + tf.zeros((num_translations, 1), tf.dtypes.float32), + -translations[:, 0, None], + tf.zeros((num_translations, 1), tf.dtypes.float32), + tf.ones((num_translations, 1), tf.dtypes.float32), + -translations[:, 1, None], + tf.zeros((num_translations, 2), tf.dtypes.float32), + ], + axis=1, + ) + + +def _convert_angles_to_transform(angles: tf.Tensor, image_width: tf.Tensor, + image_height: tf.Tensor) -> tf.Tensor: + """Converts an angle or angles to a projective transform. + + Args: + angles: A scalar to rotate all images, or a vector to rotate a batch of + images. This must be a scalar. + image_width: The width of the image(s) to be transformed. + image_height: The height of the image(s) to be transformed. + + Returns: + A tensor of shape (num_images, 8). + + Raises: + `TypeError` if `angles` is not rank 0 or 1. + + """ + angles = tf.convert_to_tensor(angles, dtype=tf.float32) + if len(angles.get_shape()) == 0: # pylint:disable=g-explicit-length-test + angles = angles[None] + elif len(angles.get_shape()) != 1: + raise TypeError('Angles should have a rank 0 or 1.') + x_offset = ((image_width - 1) - + (tf.math.cos(angles) * (image_width - 1) - tf.math.sin(angles) * + (image_height - 1))) / 2.0 + y_offset = ((image_height - 1) - + (tf.math.sin(angles) * (image_width - 1) + tf.math.cos(angles) * + (image_height - 1))) / 2.0 + num_angles = tf.shape(angles)[0] + return tf.concat( + values=[ + tf.math.cos(angles)[:, None], + -tf.math.sin(angles)[:, None], + x_offset[:, None], + tf.math.sin(angles)[:, None], + tf.math.cos(angles)[:, None], + y_offset[:, None], + tf.zeros((num_angles, 2), tf.dtypes.float32), + ], + axis=1, + ) + + +def transform(image: tf.Tensor, transforms) -> tf.Tensor: + """Prepares input data for `image_ops.transform`.""" + original_ndims = tf.rank(image) + transforms = tf.convert_to_tensor(transforms, dtype=tf.float32) + if transforms.shape.rank == 1: + transforms = transforms[None] + image = to_4d(image) + image = image_ops.transform( + images=image, transforms=transforms, interpolation='nearest') + return from_4d(image, original_ndims) + + +def translate(image: tf.Tensor, translations) -> tf.Tensor: + """Translates image(s) by provided vectors. + + Args: + image: An image Tensor of type uint8. + translations: A vector or matrix representing [dx dy]. + + Returns: + The translated version of the image. + + """ + transforms = _convert_translation_to_transform(translations) + return transform(image, transforms=transforms) + + +def rotate(image: tf.Tensor, degrees: float) -> tf.Tensor: + """Rotates the image by degrees either clockwise or counterclockwise. + + Args: + image: An image Tensor of type uint8. + degrees: Float, a scalar angle in degrees to rotate all images by. If + degrees is positive the image will be rotated clockwise otherwise it will + be rotated counterclockwise. + + Returns: + The rotated version of image. + + """ + # Convert from degrees to radians. + degrees_to_radians = math.pi / 180.0 + radians = tf.cast(degrees * degrees_to_radians, tf.float32) + + original_ndims = tf.rank(image) + image = to_4d(image) + + image_height = tf.cast(tf.shape(image)[1], tf.float32) + image_width = tf.cast(tf.shape(image)[2], tf.float32) + transforms = _convert_angles_to_transform( + angles=radians, image_width=image_width, image_height=image_height) + # In practice, we should randomize the rotation degrees by flipping + # it negatively half the time, but that's done on 'degrees' outside + # of the function. + image = transform(image, transforms=transforms) + return from_4d(image, original_ndims) + + +def blend(image1: tf.Tensor, image2: tf.Tensor, factor: float) -> tf.Tensor: + """Blend image1 and image2 using 'factor'. + + Factor can be above 0.0. A value of 0.0 means only image1 is used. + A value of 1.0 means only image2 is used. A value between 0.0 and + 1.0 means we linearly interpolate the pixel values between the two + images. A value greater than 1.0 "extrapolates" the difference + between the two pixel values, and we clip the results to values + between 0 and 255. + + Args: + image1: An image Tensor of type uint8. + image2: An image Tensor of type uint8. + factor: A floating point value above 0.0. + + Returns: + A blended image Tensor of type uint8. + """ + if factor == 0.0: + return tf.convert_to_tensor(image1) + if factor == 1.0: + return tf.convert_to_tensor(image2) + + image1 = tf.cast(image1, tf.float32) + image2 = tf.cast(image2, tf.float32) + + difference = image2 - image1 + scaled = factor * difference + + # Do addition in float. + temp = tf.cast(image1, tf.float32) + scaled + + # Interpolate + if factor > 0.0 and factor < 1.0: + # Interpolation means we always stay within 0 and 255. + return tf.cast(temp, tf.uint8) + + # Extrapolate: + # + # We need to clip and then cast. + return tf.cast(tf.clip_by_value(temp, 0.0, 255.0), tf.uint8) + + +def cutout(image: tf.Tensor, pad_size: int, replace: int = 0) -> tf.Tensor: + """Apply cutout (https://arxiv.org/abs/1708.04552) to image. + + This operation applies a (2*pad_size x 2*pad_size) mask of zeros to + a random location within `image`. The pixel values filled in will be of the + value `replace`. The location where the mask will be applied is randomly + chosen uniformly over the whole image. + + Args: + image: An image Tensor of type uint8. + pad_size: Specifies how big the zero mask that will be generated is that is + applied to the image. The mask will be of size (2*pad_size x 2*pad_size). + replace: What pixel value to fill in the image in the area that has the + cutout mask applied to it. + + Returns: + An image Tensor that is of type uint8. + """ + if image.shape.rank not in [3, 4]: + raise ValueError('Bad image rank: {}'.format(image.shape.rank)) + + if image.shape.rank == 4: + return cutout_video(image, replace=replace) + + image_height = tf.shape(image)[0] + image_width = tf.shape(image)[1] + + # Sample the center location in the image where the zero mask will be applied. + cutout_center_height = tf.random.uniform( + shape=[], minval=0, maxval=image_height, dtype=tf.int32) + + cutout_center_width = tf.random.uniform( + shape=[], minval=0, maxval=image_width, dtype=tf.int32) + + lower_pad = tf.maximum(0, cutout_center_height - pad_size) + upper_pad = tf.maximum(0, image_height - cutout_center_height - pad_size) + left_pad = tf.maximum(0, cutout_center_width - pad_size) + right_pad = tf.maximum(0, image_width - cutout_center_width - pad_size) + + cutout_shape = [ + image_height - (lower_pad + upper_pad), + image_width - (left_pad + right_pad) + ] + padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]] + mask = tf.pad( + tf.zeros(cutout_shape, dtype=image.dtype), + padding_dims, + constant_values=1) + mask = tf.expand_dims(mask, -1) + mask = tf.tile(mask, [1, 1, 3]) + image = tf.where( + tf.equal(mask, 0), + tf.ones_like(image, dtype=image.dtype) * replace, image) + return image + + +def cutout_video(image: tf.Tensor, replace: int = 0) -> tf.Tensor: + """Apply cutout (https://arxiv.org/abs/1708.04552) to a video. + + This operation applies a random size 3D mask of zeros to a random location + within `image`. The mask is padded The pixel values filled in will be of the + value `replace`. The location where the mask will be applied is randomly + chosen uniformly over the whole image. The size of the mask is randomly + sampled uniformly from [0.25*height, 0.5*height], [0.25*width, 0.5*width], + and [1, 0.25*depth], which represent the height, width, and number of frames + of the input video tensor respectively. + + Args: + image: A video Tensor of type uint8. + replace: What pixel value to fill in the image in the area that has the + cutout mask applied to it. + + Returns: + An video Tensor that is of type uint8. + """ + image_depth = tf.shape(image)[0] + image_height = tf.shape(image)[1] + image_width = tf.shape(image)[2] + + # Sample the center location in the image where the zero mask will be applied. + cutout_center_height = tf.random.uniform( + shape=[], minval=0, maxval=image_height, dtype=tf.int32) + + cutout_center_width = tf.random.uniform( + shape=[], minval=0, maxval=image_width, dtype=tf.int32) + + cutout_center_depth = tf.random.uniform( + shape=[], minval=0, maxval=image_depth, dtype=tf.int32) + + pad_size_height = tf.random.uniform( + shape=[], + minval=tf.maximum(1, tf.cast(image_height / 4, tf.int32)), + maxval=tf.maximum(2, tf.cast(image_height / 2, tf.int32)), + dtype=tf.int32) + pad_size_width = tf.random.uniform( + shape=[], + minval=tf.maximum(1, tf.cast(image_width / 4, tf.int32)), + maxval=tf.maximum(2, tf.cast(image_width / 2, tf.int32)), + dtype=tf.int32) + pad_size_depth = tf.random.uniform( + shape=[], + minval=1, + maxval=tf.maximum(2, tf.cast(image_depth / 4, tf.int32)), + dtype=tf.int32) + + lower_pad = tf.maximum(0, cutout_center_height - pad_size_height) + upper_pad = tf.maximum( + 0, image_height - cutout_center_height - pad_size_height) + left_pad = tf.maximum(0, cutout_center_width - pad_size_width) + right_pad = tf.maximum(0, image_width - cutout_center_width - pad_size_width) + back_pad = tf.maximum(0, cutout_center_depth - pad_size_depth) + forward_pad = tf.maximum( + 0, image_depth - cutout_center_depth - pad_size_depth) + + cutout_shape = [ + image_depth - (back_pad + forward_pad), + image_height - (lower_pad + upper_pad), + image_width - (left_pad + right_pad), + ] + padding_dims = [[back_pad, forward_pad], + [lower_pad, upper_pad], + [left_pad, right_pad]] + mask = tf.pad( + tf.zeros(cutout_shape, dtype=image.dtype), + padding_dims, + constant_values=1) + mask = tf.expand_dims(mask, -1) + mask = tf.tile(mask, [1, 1, 1, 3]) + image = tf.where( + tf.equal(mask, 0), + tf.ones_like(image, dtype=image.dtype) * replace, image) + return image + + +def solarize(image: tf.Tensor, threshold: int = 128) -> tf.Tensor: + """Solarize the input image(s).""" + # For each pixel in the image, select the pixel + # if the value is less than the threshold. + # Otherwise, subtract 255 from the pixel. + return tf.where(image < threshold, image, 255 - image) + + +def solarize_add(image: tf.Tensor, + addition: int = 0, + threshold: int = 128) -> tf.Tensor: + """Additive solarize the input image(s).""" + # For each pixel in the image less than threshold + # we add 'addition' amount to it and then clip the + # pixel value to be between 0 and 255. The value + # of 'addition' is between -128 and 128. + added_image = tf.cast(image, tf.int64) + addition + added_image = tf.cast(tf.clip_by_value(added_image, 0, 255), tf.uint8) + return tf.where(image < threshold, added_image, image) + + +def color(image: tf.Tensor, factor: float) -> tf.Tensor: + """Equivalent of PIL Color.""" + degenerate = tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image)) + return blend(degenerate, image, factor) + + +def contrast(image: tf.Tensor, factor: float) -> tf.Tensor: + """Equivalent of PIL Contrast.""" + degenerate = tf.image.rgb_to_grayscale(image) + # Cast before calling tf.histogram. + degenerate = tf.cast(degenerate, tf.int32) + + # Compute the grayscale histogram, then compute the mean pixel value, + # and create a constant image size of that value. Use that as the + # blending degenerate target of the original image. + hist = tf.histogram_fixed_width(degenerate, [0, 255], nbins=256) + mean = tf.reduce_sum(tf.cast(hist, tf.float32)) / 256.0 + degenerate = tf.ones_like(degenerate, dtype=tf.float32) * mean + degenerate = tf.clip_by_value(degenerate, 0.0, 255.0) + degenerate = tf.image.grayscale_to_rgb(tf.cast(degenerate, tf.uint8)) + return blend(degenerate, image, factor) + + +def brightness(image: tf.Tensor, factor: float) -> tf.Tensor: + """Equivalent of PIL Brightness.""" + degenerate = tf.zeros_like(image) + return blend(degenerate, image, factor) + + +def posterize(image: tf.Tensor, bits: int) -> tf.Tensor: + """Equivalent of PIL Posterize.""" + shift = 8 - bits + return tf.bitwise.left_shift(tf.bitwise.right_shift(image, shift), shift) + + +def wrapped_rotate(image: tf.Tensor, degrees: float, replace: int) -> tf.Tensor: + """Applies rotation with wrap/unwrap.""" + image = rotate(wrap(image), degrees=degrees) + return unwrap(image, replace) + + +def translate_x(image: tf.Tensor, pixels: int, replace: int) -> tf.Tensor: + """Equivalent of PIL Translate in X dimension.""" + image = translate(wrap(image), [-pixels, 0]) + return unwrap(image, replace) + + +def translate_y(image: tf.Tensor, pixels: int, replace: int) -> tf.Tensor: + """Equivalent of PIL Translate in Y dimension.""" + image = translate(wrap(image), [0, -pixels]) + return unwrap(image, replace) + + +def shear_x(image: tf.Tensor, level: float, replace: int) -> tf.Tensor: + """Equivalent of PIL Shearing in X dimension.""" + # Shear parallel to x axis is a projective transform + # with a matrix form of: + # [1 level + # 0 1]. + image = transform( + image=wrap(image), transforms=[1., level, 0., 0., 1., 0., 0., 0.]) + return unwrap(image, replace) + + +def shear_y(image: tf.Tensor, level: float, replace: int) -> tf.Tensor: + """Equivalent of PIL Shearing in Y dimension.""" + # Shear parallel to y axis is a projective transform + # with a matrix form of: + # [1 0 + # level 1]. + image = transform( + image=wrap(image), transforms=[1., 0., 0., level, 1., 0., 0., 0.]) + return unwrap(image, replace) + + +def autocontrast(image: tf.Tensor) -> tf.Tensor: + """Implements Autocontrast function from PIL using TF ops. + + Args: + image: A 3D uint8 tensor. + + Returns: + The image after it has had autocontrast applied to it and will be of type + uint8. + """ + + def scale_channel(image: tf.Tensor) -> tf.Tensor: + """Scale the 2D image using the autocontrast rule.""" + # A possibly cheaper version can be done using cumsum/unique_with_counts + # over the histogram values, rather than iterating over the entire image. + # to compute mins and maxes. + lo = tf.cast(tf.reduce_min(image), tf.float32) + hi = tf.cast(tf.reduce_max(image), tf.float32) + + # Scale the image, making the lowest value 0 and the highest value 255. + def scale_values(im): + scale = 255.0 / (hi - lo) + offset = -lo * scale + im = tf.cast(im, tf.float32) * scale + offset + im = tf.clip_by_value(im, 0.0, 255.0) + return tf.cast(im, tf.uint8) + + result = tf.cond(hi > lo, lambda: scale_values(image), lambda: image) + return result + + # Assumes RGB for now. Scales each channel independently + # and then stacks the result. + s1 = scale_channel(image[..., 0]) + s2 = scale_channel(image[..., 1]) + s3 = scale_channel(image[..., 2]) + image = tf.stack([s1, s2, s3], -1) + + return image + + +def sharpness(image: tf.Tensor, factor: float) -> tf.Tensor: + """Implements Sharpness function from PIL using TF ops.""" + orig_image = image + image = tf.cast(image, tf.float32) + # Make image 4D for conv operation. + image = tf.expand_dims(image, 0) + # SMOOTH PIL Kernel. + if orig_image.shape.rank == 3: + kernel = tf.constant([[1, 1, 1], [1, 5, 1], [1, 1, 1]], + dtype=tf.float32, + shape=[3, 3, 1, 1]) / 13. + # Tile across channel dimension. + kernel = tf.tile(kernel, [1, 1, 3, 1]) + strides = [1, 1, 1, 1] + degenerate = tf.nn.depthwise_conv2d( + image, kernel, strides, padding='VALID', dilations=[1, 1]) + elif orig_image.shape.rank == 4: + kernel = tf.constant([[1, 1, 1], [1, 5, 1], [1, 1, 1]], + dtype=tf.float32, + shape=[1, 3, 3, 1, 1]) / 13. + strides = [1, 1, 1, 1, 1] + # Run the kernel across each channel + channels = tf.split(image, 3, axis=-1) + degenerates = [ + tf.nn.conv3d(channel, kernel, strides, padding='VALID', + dilations=[1, 1, 1, 1, 1]) + for channel in channels + ] + degenerate = tf.concat(degenerates, -1) + else: + raise ValueError('Bad image rank: {}'.format(image.shape.rank)) + degenerate = tf.clip_by_value(degenerate, 0.0, 255.0) + degenerate = tf.squeeze(tf.cast(degenerate, tf.uint8), [0]) + + # For the borders of the resulting image, fill in the values of the + # original image. + mask = tf.ones_like(degenerate) + paddings = [[0, 0]] * (orig_image.shape.rank - 3) + padded_mask = tf.pad(mask, paddings + [[1, 1], [1, 1], [0, 0]]) + padded_degenerate = tf.pad(degenerate, paddings + [[1, 1], [1, 1], [0, 0]]) + result = tf.where(tf.equal(padded_mask, 1), padded_degenerate, orig_image) + + # Blend the final result. + return blend(result, orig_image, factor) + + +def equalize(image: tf.Tensor) -> tf.Tensor: + """Implements Equalize function from PIL using TF ops.""" + + def scale_channel(im, c): + """Scale the data in the channel to implement equalize.""" + im = tf.cast(im[..., c], tf.int32) + # Compute the histogram of the image channel. + histo = tf.histogram_fixed_width(im, [0, 255], nbins=256) + + # For the purposes of computing the step, filter out the nonzeros. + nonzero = tf.where(tf.not_equal(histo, 0)) + nonzero_histo = tf.reshape(tf.gather(histo, nonzero), [-1]) + step = (tf.reduce_sum(nonzero_histo) - nonzero_histo[-1]) // 255 + + def build_lut(histo, step): + # Compute the cumulative sum, shifting by step // 2 + # and then normalization by step. + lut = (tf.cumsum(histo) + (step // 2)) // step + # Shift lut, prepending with 0. + lut = tf.concat([[0], lut[:-1]], 0) + # Clip the counts to be in range. This is done + # in the C code for image.point. + return tf.clip_by_value(lut, 0, 255) + + # If step is zero, return the original image. Otherwise, build + # lut from the full histogram and step and then index from it. + result = tf.cond( + tf.equal(step, 0), lambda: im, + lambda: tf.gather(build_lut(histo, step), im)) + + return tf.cast(result, tf.uint8) + + # Assumes RGB for now. Scales each channel independently + # and then stacks the result. + s1 = scale_channel(image, 0) + s2 = scale_channel(image, 1) + s3 = scale_channel(image, 2) + image = tf.stack([s1, s2, s3], -1) + return image + + +def invert(image: tf.Tensor) -> tf.Tensor: + """Inverts the image pixels.""" + image = tf.convert_to_tensor(image) + return 255 - image + + +def wrap(image: tf.Tensor) -> tf.Tensor: + """Returns 'image' with an extra channel set to all 1s.""" + shape = tf.shape(image) + extended_channel = tf.expand_dims(tf.ones(shape[:-1], image.dtype), -1) + extended = tf.concat([image, extended_channel], axis=-1) + return extended + + +def unwrap(image: tf.Tensor, replace: int) -> tf.Tensor: + """Unwraps an image produced by wrap. + + Where there is a 0 in the last channel for every spatial position, + the rest of the three channels in that spatial dimension are grayed + (set to 128). Operations like translate and shear on a wrapped + Tensor will leave 0s in empty locations. Some transformations look + at the intensity of values to do preprocessing, and we want these + empty pixels to assume the 'average' value, rather than pure black. + + + Args: + image: A 3D Image Tensor with 4 channels. + replace: A one or three value 1D tensor to fill empty pixels. + + Returns: + image: A 3D image Tensor with 3 channels. + """ + image_shape = tf.shape(image) + # Flatten the spatial dimensions. + flattened_image = tf.reshape(image, [-1, image_shape[-1]]) + + # Find all pixels where the last channel is zero. + alpha_channel = tf.expand_dims(flattened_image[..., 3], axis=-1) + + replace = tf.concat([replace, tf.ones([1], image.dtype)], 0) + + # Where they are zero, fill them in with 'replace'. + flattened_image = tf.where( + tf.equal(alpha_channel, 0), + tf.ones_like(flattened_image, dtype=image.dtype) * replace, + flattened_image) + + image = tf.reshape(flattened_image, image_shape) + image = tf.slice( + image, + [0] * image.shape.rank, + tf.concat([image_shape[:-1], [3]], -1)) + return image + + +def _randomly_negate_tensor(tensor): + """With 50% prob turn the tensor negative.""" + should_flip = tf.cast(tf.floor(tf.random.uniform([]) + 0.5), tf.bool) + final_tensor = tf.cond(should_flip, lambda: tensor, lambda: -tensor) + return final_tensor + + +def _rotate_level_to_arg(level: float): + level = (level / _MAX_LEVEL) * 30. + level = _randomly_negate_tensor(level) + return (level,) + + +def _shrink_level_to_arg(level: float): + """Converts level to ratio by which we shrink the image content.""" + if level == 0: + return (1.0,) # if level is zero, do not shrink the image + # Maximum shrinking ratio is 2.9. + level = 2. / (_MAX_LEVEL / level) + 0.9 + return (level,) + + +def _enhance_level_to_arg(level: float): + return ((level / _MAX_LEVEL) * 1.8 + 0.1,) + + +def _shear_level_to_arg(level: float): + level = (level / _MAX_LEVEL) * 0.3 + # Flip level to negative with 50% chance. + level = _randomly_negate_tensor(level) + return (level,) + + +def _translate_level_to_arg(level: float, translate_const: float): + level = (level / _MAX_LEVEL) * float(translate_const) + # Flip level to negative with 50% chance. + level = _randomly_negate_tensor(level) + return (level,) + + +def _mult_to_arg(level: float, multiplier: float = 1.): + return (int((level / _MAX_LEVEL) * multiplier),) + + +def _apply_func_with_prob(func: Any, image: tf.Tensor, args: Any, prob: float): + """Apply `func` to image w/ `args` as input with probability `prob`.""" + assert isinstance(args, tuple) + + # Apply the function with probability `prob`. + should_apply_op = tf.cast( + tf.floor(tf.random.uniform([], dtype=tf.float32) + prob), tf.bool) + augmented_image = tf.cond(should_apply_op, lambda: func(image, *args), + lambda: image) + return augmented_image + + +def select_and_apply_random_policy(policies: Any, image: tf.Tensor): + """Select a random policy from `policies` and apply it to `image`.""" + policy_to_select = tf.random.uniform([], maxval=len(policies), dtype=tf.int32) + # Note that using tf.case instead of tf.conds would result in significantly + # larger graphs and would even break export for some larger policies. + for (i, policy) in enumerate(policies): + image = tf.cond( + tf.equal(i, policy_to_select), + lambda selected_policy=policy: selected_policy(image), + lambda: image) + return image + + +NAME_TO_FUNC = { + 'AutoContrast': autocontrast, + 'Equalize': equalize, + 'Invert': invert, + 'Rotate': wrapped_rotate, + 'Posterize': posterize, + 'Solarize': solarize, + 'SolarizeAdd': solarize_add, + 'Color': color, + 'Contrast': contrast, + 'Brightness': brightness, + 'Sharpness': sharpness, + 'ShearX': shear_x, + 'ShearY': shear_y, + 'TranslateX': translate_x, + 'TranslateY': translate_y, + 'Cutout': cutout, +} + +# Functions that have a 'replace' parameter +REPLACE_FUNCS = frozenset({ + 'Rotate', + 'TranslateX', + 'ShearX', + 'ShearY', + 'TranslateY', + 'Cutout', +}) + + +def level_to_arg(cutout_const: float, translate_const: float): + """Creates a dict mapping image operation names to their arguments.""" + + no_arg = lambda level: () + posterize_arg = lambda level: _mult_to_arg(level, 4) + solarize_arg = lambda level: _mult_to_arg(level, 256) + solarize_add_arg = lambda level: _mult_to_arg(level, 110) + cutout_arg = lambda level: _mult_to_arg(level, cutout_const) + translate_arg = lambda level: _translate_level_to_arg(level, translate_const) + + args = { + 'AutoContrast': no_arg, + 'Equalize': no_arg, + 'Invert': no_arg, + 'Rotate': _rotate_level_to_arg, + 'Posterize': posterize_arg, + 'Solarize': solarize_arg, + 'SolarizeAdd': solarize_add_arg, + 'Color': _enhance_level_to_arg, + 'Contrast': _enhance_level_to_arg, + 'Brightness': _enhance_level_to_arg, + 'Sharpness': _enhance_level_to_arg, + 'ShearX': _shear_level_to_arg, + 'ShearY': _shear_level_to_arg, + 'Cutout': cutout_arg, + 'TranslateX': translate_arg, + 'TranslateY': translate_arg, + } + return args + + +def _parse_policy_info(name: Text, prob: float, level: float, + replace_value: List[int], cutout_const: float, + translate_const: float) -> Tuple[Any, float, Any]: + """Return the function that corresponds to `name` and update `level` param.""" + func = NAME_TO_FUNC[name] + args = level_to_arg(cutout_const, translate_const)[name](level) + + if name in REPLACE_FUNCS: + # Add in replace arg if it is required for the function that is called. + args = tuple(list(args) + [replace_value]) + + return func, prob, args + + +class ImageAugment(object): + """Image augmentation class for applying image distortions.""" + + def distort(self, image: tf.Tensor) -> tf.Tensor: + """Given an image tensor, returns a distorted image with the same shape. + + Args: + image: `Tensor` of shape [height, width, 3] or + [num_frames, height, width, 3] representing an image or image sequence. + + Returns: + The augmented version of `image`. + """ + raise NotImplementedError() + + +class AutoAugment(ImageAugment): + """Applies the AutoAugment policy to images. + + AutoAugment is from the paper: https://arxiv.org/abs/1805.09501. + """ + + def __init__(self, + augmentation_name: Text = 'v0', + policies: Optional[Iterable[Iterable[Tuple[Text, float, + float]]]] = None, + cutout_const: float = 100, + translate_const: float = 250): + """Applies the AutoAugment policy to images. + + Args: + augmentation_name: The name of the AutoAugment policy to use. The + available options are `v0`, `test`, `reduced_cifar10`, `svhn` and + `reduced_imagenet`. `v0` is the policy used for all + of the results in the paper and was found to achieve the best results on + the COCO dataset. `v1`, `v2` and `v3` are additional good policies found + on the COCO dataset that have slight variation in what operations were + used during the search procedure along with how many operations are + applied in parallel to a single image (2 vs 3). Make sure to set + `policies` to `None` (the default) if you want to set options using + `augmentation_name`. + policies: list of lists of tuples in the form `(func, prob, level)`, + `func` is a string name of the augmentation function, `prob` is the + probability of applying the `func` operation, `level` (or magnitude) is + the input argument for `func`. For example: + ``` + [[('Equalize', 0.9, 3), ('Color', 0.7, 8)], + [('Invert', 0.6, 5), ('Rotate', 0.2, 9), ('ShearX', 0.1, 2)], ...] + ``` + The outer-most list must be 3-d. The number of operations in a + sub-policy can vary from one sub-policy to another. + If you provide `policies` as input, any option set with + `augmentation_name` will get overriden as they are mutually exclusive. + cutout_const: multiplier for applying cutout. + translate_const: multiplier for applying translation. + + Raises: + ValueError if `augmentation_name` is unsupported. + """ + super(AutoAugment, self).__init__() + + self.augmentation_name = augmentation_name + self.cutout_const = float(cutout_const) + self.translate_const = float(translate_const) + self.available_policies = { + 'v0': self.policy_v0(), + 'test': self.policy_test(), + 'simple': self.policy_simple(), + 'reduced_cifar10': self.policy_reduced_cifar10(), + 'svhn': self.policy_svhn(), + 'reduced_imagenet': self.policy_reduced_imagenet(), + } + + if not policies: + if augmentation_name not in self.available_policies: + raise ValueError( + 'Invalid augmentation_name: {}'.format(augmentation_name)) + + self.policies = self.available_policies[augmentation_name] + + else: + self._check_policy_shape(policies) + self.policies = policies + + def _check_policy_shape(self, policies): + """Checks dimension and shape of the custom policy. + + Args: + policies: List of list of tuples in the form `(func, prob, level)`. Must + have shape of `(:, :, 3)`. + + Raises: + ValueError if the shape of `policies` is unexpected. + """ + in_shape = np.array(policies).shape + if len(in_shape) != 3 or in_shape[-1:] != (3,): + raise ValueError('Wrong shape detected for custom policy. Expected ' + '(:, :, 3) but got {}.'.format(in_shape)) + + def distort(self, image: tf.Tensor) -> tf.Tensor: + """Applies the AutoAugment policy to `image`. + + AutoAugment is from the paper: https://arxiv.org/abs/1805.09501. + + Args: + image: `Tensor` of shape [height, width, 3] representing an image. + + Returns: + A version of image that now has data augmentation applied to it based on + the `policies` pass into the function. + """ + input_image_type = image.dtype + + if input_image_type != tf.uint8: + image = tf.clip_by_value(image, 0.0, 255.0) + image = tf.cast(image, dtype=tf.uint8) + + replace_value = [128] * 3 + + # func is the string name of the augmentation function, prob is the + # probability of applying the operation and level is the parameter + # associated with the tf op. + + # tf_policies are functions that take in an image and return an augmented + # image. + tf_policies = [] + for policy in self.policies: + tf_policy = [] + assert_ranges = [] + # Link string name to the correct python function and make sure the + # correct argument is passed into that function. + for policy_info in policy: + _, prob, level = policy_info + assert_ranges.append(tf.Assert(tf.less_equal(prob, 1.), [prob])) + assert_ranges.append( + tf.Assert(tf.less_equal(level, int(_MAX_LEVEL)), [level])) + + policy_info = list(policy_info) + [ + replace_value, self.cutout_const, self.translate_const + ] + tf_policy.append(_parse_policy_info(*policy_info)) + # Now build the tf policy that will apply the augmentation procedue + # on image. + def make_final_policy(tf_policy_): + + def final_policy(image_): + for func, prob, args in tf_policy_: + image_ = _apply_func_with_prob(func, image_, args, prob) + return image_ + + return final_policy + + with tf.control_dependencies(assert_ranges): + tf_policies.append(make_final_policy(tf_policy)) + + image = select_and_apply_random_policy(tf_policies, image) + image = tf.cast(image, dtype=input_image_type) + return image + + @staticmethod + def policy_v0(): + """Autoaugment policy that was used in AutoAugment Paper. + + Each tuple is an augmentation operation of the form + (operation, probability, magnitude). Each element in policy is a + sub-policy that will be applied sequentially on the image. + + Returns: + the policy. + """ + + policy = [ + [('Equalize', 0.8, 1), ('ShearY', 0.8, 4)], + [('Color', 0.4, 9), ('Equalize', 0.6, 3)], + [('Color', 0.4, 1), ('Rotate', 0.6, 8)], + [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], + [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], + [('Color', 0.2, 0), ('Equalize', 0.8, 8)], + [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], + [('ShearX', 0.2, 9), ('Rotate', 0.6, 8)], + [('Color', 0.6, 1), ('Equalize', 1.0, 2)], + [('Invert', 0.4, 9), ('Rotate', 0.6, 0)], + [('Equalize', 1.0, 9), ('ShearY', 0.6, 3)], + [('Color', 0.4, 7), ('Equalize', 0.6, 0)], + [('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)], + [('Solarize', 0.6, 8), ('Color', 0.6, 9)], + [('Solarize', 0.2, 4), ('Rotate', 0.8, 9)], + [('Rotate', 1.0, 7), ('TranslateY', 0.8, 9)], + [('ShearX', 0.0, 0), ('Solarize', 0.8, 4)], + [('ShearY', 0.8, 0), ('Color', 0.6, 4)], + [('Color', 1.0, 0), ('Rotate', 0.6, 2)], + [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], + [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], + [('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)], + [('Posterize', 0.8, 2), ('Solarize', 0.6, 10)], + [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], + [('Color', 0.8, 6), ('Rotate', 0.4, 5)], + ] + return policy + + @staticmethod + def policy_reduced_cifar10(): + """Autoaugment policy for reduced CIFAR-10 dataset. + + Result is from the AutoAugment paper: https://arxiv.org/abs/1805.09501. + + Each tuple is an augmentation operation of the form + (operation, probability, magnitude). Each element in policy is a + sub-policy that will be applied sequentially on the image. + + Returns: + the policy. + """ + policy = [ + [('Invert', 0.1, 7), ('Contrast', 0.2, 6)], + [('Rotate', 0.7, 2), ('TranslateX', 0.3, 9)], + [('Sharpness', 0.8, 1), ('Sharpness', 0.9, 3)], + [('ShearY', 0.5, 8), ('TranslateY', 0.7, 9)], + [('AutoContrast', 0.5, 8), ('Equalize', 0.9, 2)], + [('ShearY', 0.2, 7), ('Posterize', 0.3, 7)], + [('Color', 0.4, 3), ('Brightness', 0.6, 7)], + [('Sharpness', 0.3, 9), ('Brightness', 0.7, 9)], + [('Equalize', 0.6, 5), ('Equalize', 0.5, 1)], + [('Contrast', 0.6, 7), ('Sharpness', 0.6, 5)], + [('Color', 0.7, 7), ('TranslateX', 0.5, 8)], + [('Equalize', 0.3, 7), ('AutoContrast', 0.4, 8)], + [('TranslateY', 0.4, 3), ('Sharpness', 0.2, 6)], + [('Brightness', 0.9, 6), ('Color', 0.2, 8)], + [('Solarize', 0.5, 2), ('Invert', 0.0, 3)], + [('Equalize', 0.2, 0), ('AutoContrast', 0.6, 0)], + [('Equalize', 0.2, 8), ('Equalize', 0.6, 4)], + [('Color', 0.9, 9), ('Equalize', 0.6, 6)], + [('AutoContrast', 0.8, 4), ('Solarize', 0.2, 8)], + [('Brightness', 0.1, 3), ('Color', 0.7, 0)], + [('Solarize', 0.4, 5), ('AutoContrast', 0.9, 3)], + [('TranslateY', 0.9, 9), ('TranslateY', 0.7, 9)], + [('AutoContrast', 0.9, 2), ('Solarize', 0.8, 3)], + [('Equalize', 0.8, 8), ('Invert', 0.1, 3)], + [('TranslateY', 0.7, 9), ('AutoContrast', 0.9, 1)], + ] + return policy + + @staticmethod + def policy_svhn(): + """Autoaugment policy for SVHN dataset. + + Result is from the AutoAugment paper: https://arxiv.org/abs/1805.09501. + + Each tuple is an augmentation operation of the form + (operation, probability, magnitude). Each element in policy is a + sub-policy that will be applied sequentially on the image. + + Returns: + the policy. + """ + policy = [ + [('ShearX', 0.9, 4), ('Invert', 0.2, 3)], + [('ShearY', 0.9, 8), ('Invert', 0.7, 5)], + [('Equalize', 0.6, 5), ('Solarize', 0.6, 6)], + [('Invert', 0.9, 3), ('Equalize', 0.6, 3)], + [('Equalize', 0.6, 1), ('Rotate', 0.9, 3)], + [('ShearX', 0.9, 4), ('AutoContrast', 0.8, 3)], + [('ShearY', 0.9, 8), ('Invert', 0.4, 5)], + [('ShearY', 0.9, 5), ('Solarize', 0.2, 6)], + [('Invert', 0.9, 6), ('AutoContrast', 0.8, 1)], + [('Equalize', 0.6, 3), ('Rotate', 0.9, 3)], + [('ShearX', 0.9, 4), ('Solarize', 0.3, 3)], + [('ShearY', 0.8, 8), ('Invert', 0.7, 4)], + [('Equalize', 0.9, 5), ('TranslateY', 0.6, 6)], + [('Invert', 0.9, 4), ('Equalize', 0.6, 7)], + [('Contrast', 0.3, 3), ('Rotate', 0.8, 4)], + [('Invert', 0.8, 5), ('TranslateY', 0.0, 2)], + [('ShearY', 0.7, 6), ('Solarize', 0.4, 8)], + [('Invert', 0.6, 4), ('Rotate', 0.8, 4)], + [('ShearY', 0.3, 7), ('TranslateX', 0.9, 3)], + [('ShearX', 0.1, 6), ('Invert', 0.6, 5)], + [('Solarize', 0.7, 2), ('TranslateY', 0.6, 7)], + [('ShearY', 0.8, 4), ('Invert', 0.8, 8)], + [('ShearX', 0.7, 9), ('TranslateY', 0.8, 3)], + [('ShearY', 0.8, 5), ('AutoContrast', 0.7, 3)], + [('ShearX', 0.7, 2), ('Invert', 0.1, 5)], + ] + return policy + + @staticmethod + def policy_reduced_imagenet(): + """Autoaugment policy for reduced ImageNet dataset. + + Result is from the AutoAugment paper: https://arxiv.org/abs/1805.09501. + + Each tuple is an augmentation operation of the form + (operation, probability, magnitude). Each element in policy is a + sub-policy that will be applied sequentially on the image. + + Returns: + the policy. + """ + policy = [ + [('Posterize', 0.4, 8), ('Rotate', 0.6, 9)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)], + [('Posterize', 0.6, 7), ('Posterize', 0.6, 6)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Equalize', 0.4, 4), ('Rotate', 0.8, 8)], + [('Solarize', 0.6, 3), ('Equalize', 0.6, 7)], + [('Posterize', 0.8, 5), ('Equalize', 1.0, 2)], + [('Rotate', 0.2, 3), ('Solarize', 0.6, 8)], + [('Equalize', 0.6, 8), ('Posterize', 0.4, 6)], + [('Rotate', 0.8, 8), ('Color', 0.4, 0)], + [('Rotate', 0.4, 9), ('Equalize', 0.6, 2)], + [('Equalize', 0.0, 7), ('Equalize', 0.8, 8)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Rotate', 0.8, 8), ('Color', 1.0, 2)], + [('Color', 0.8, 8), ('Solarize', 0.8, 7)], + [('Sharpness', 0.4, 7), ('Invert', 0.6, 8)], + [('ShearX', 0.6, 5), ('Equalize', 1.0, 9)], + [('Color', 0.4, 0), ('Equalize', 0.6, 3)], + [('Equalize', 0.4, 7), ('Solarize', 0.2, 4)], + [('Solarize', 0.6, 5), ('AutoContrast', 0.6, 5)], + [('Invert', 0.6, 4), ('Equalize', 1.0, 8)], + [('Color', 0.6, 4), ('Contrast', 1.0, 8)], + [('Equalize', 0.8, 8), ('Equalize', 0.6, 3)] + ] + return policy + + @staticmethod + def policy_simple(): + """Same as `policy_v0`, except with custom ops removed.""" + + policy = [ + [('Color', 0.4, 9), ('Equalize', 0.6, 3)], + [('Solarize', 0.8, 3), ('Equalize', 0.4, 7)], + [('Solarize', 0.4, 2), ('Solarize', 0.6, 2)], + [('Color', 0.2, 0), ('Equalize', 0.8, 8)], + [('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)], + [('Color', 0.6, 1), ('Equalize', 1.0, 2)], + [('Color', 0.4, 7), ('Equalize', 0.6, 0)], + [('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)], + [('Solarize', 0.6, 8), ('Color', 0.6, 9)], + [('Equalize', 0.8, 4), ('Equalize', 0.0, 8)], + [('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)], + [('Posterize', 0.8, 2), ('Solarize', 0.6, 10)], + [('Solarize', 0.6, 8), ('Equalize', 0.6, 1)], + ] + return policy + + @staticmethod + def policy_test(): + """Autoaugment test policy for debugging.""" + policy = [ + [('TranslateX', 1.0, 4), ('Equalize', 1.0, 10)], + ] + return policy + + +class RandAugment(ImageAugment): + """Applies the RandAugment policy to images. + + RandAugment is from the paper https://arxiv.org/abs/1909.13719, + """ + + def __init__(self, + num_layers: int = 2, + magnitude: float = 10., + cutout_const: float = 40., + translate_const: float = 100.): + """Applies the RandAugment policy to images. + + Args: + num_layers: Integer, the number of augmentation transformations to apply + sequentially to an image. Represented as (N) in the paper. Usually best + values will be in the range [1, 3]. + magnitude: Integer, shared magnitude across all augmentation operations. + Represented as (M) in the paper. Usually best values are in the range + [5, 10]. + cutout_const: multiplier for applying cutout. + translate_const: multiplier for applying translation. + """ + super(RandAugment, self).__init__() + + self.num_layers = num_layers + self.magnitude = float(magnitude) + self.cutout_const = float(cutout_const) + self.translate_const = float(translate_const) + self.available_ops = [ + 'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize', + 'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'ShearY', + 'TranslateX', 'TranslateY', 'Cutout', 'SolarizeAdd' + ] + + def distort(self, image: tf.Tensor) -> tf.Tensor: + """Applies the RandAugment policy to `image`. + + Args: + image: `Tensor` of shape [height, width, 3] representing an image. + + Returns: + The augmented version of `image`. + """ + input_image_type = image.dtype + + if input_image_type != tf.uint8: + image = tf.clip_by_value(image, 0.0, 255.0) + image = tf.cast(image, dtype=tf.uint8) + + replace_value = [128] * 3 + min_prob, max_prob = 0.2, 0.8 + + for _ in range(self.num_layers): + op_to_select = tf.random.uniform([], + maxval=len(self.available_ops) + 1, + dtype=tf.int32) + + branch_fns = [] + for (i, op_name) in enumerate(self.available_ops): + prob = tf.random.uniform([], + minval=min_prob, + maxval=max_prob, + dtype=tf.float32) + func, _, args = _parse_policy_info(op_name, prob, self.magnitude, + replace_value, self.cutout_const, + self.translate_const) + branch_fns.append(( + i, + # pylint:disable=g-long-lambda + lambda selected_func=func, selected_args=args: selected_func( + image, *selected_args))) + # pylint:enable=g-long-lambda + + image = tf.switch_case( + branch_index=op_to_select, + branch_fns=branch_fns, + default=lambda: tf.identity(image)) + + image = tf.cast(image, dtype=input_image_type) + return image diff --git a/official/vision/beta/ops/augment_test.py b/official/vision/beta/ops/augment_test.py new file mode 100644 index 0000000000000000000000000000000000000000..14f40695d2e9db5ae7c0a3e106863580cf7bc72a --- /dev/null +++ b/official/vision/beta/ops/augment_test.py @@ -0,0 +1,258 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for autoaugment.""" + +from __future__ import absolute_import +from __future__ import division +# from __future__ import google_type_annotations +from __future__ import print_function + +import random +from absl.testing import parameterized + +import tensorflow as tf + +from official.vision.beta.ops import augment + + +def get_dtype_test_cases(): + return [ + ('uint8', tf.uint8), + ('int32', tf.int32), + ('float16', tf.float16), + ('float32', tf.float32), + ] + + +@parameterized.named_parameters(get_dtype_test_cases()) +class TransformsTest(parameterized.TestCase, tf.test.TestCase): + """Basic tests for fundamental transformations.""" + + def test_to_from_4d(self, dtype): + for shape in [(10, 10), (10, 10, 10), (10, 10, 10, 10)]: + original_ndims = len(shape) + image = tf.zeros(shape, dtype=dtype) + image_4d = augment.to_4d(image) + self.assertEqual(4, tf.rank(image_4d)) + self.assertAllEqual(image, augment.from_4d(image_4d, original_ndims)) + + def test_transform(self, dtype): + image = tf.constant([[1, 2], [3, 4]], dtype=dtype) + self.assertAllEqual( + augment.transform(image, transforms=[1] * 8), [[4, 4], [4, 4]]) + + def test_translate(self, dtype): + image = tf.constant( + [[1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1]], dtype=dtype) + translations = [-1, -1] + translated = augment.translate(image=image, translations=translations) + expected = [[1, 0, 1, 1], [0, 1, 0, 0], [1, 0, 1, 1], [1, 0, 1, 1]] + self.assertAllEqual(translated, expected) + + def test_translate_shapes(self, dtype): + translation = [0, 0] + for shape in [(3, 3), (5, 5), (224, 224, 3)]: + image = tf.zeros(shape, dtype=dtype) + self.assertAllEqual(image, augment.translate(image, translation)) + + def test_translate_invalid_translation(self, dtype): + image = tf.zeros((1, 1), dtype=dtype) + invalid_translation = [[[1, 1]]] + with self.assertRaisesRegex(TypeError, 'rank 1 or 2'): + _ = augment.translate(image, invalid_translation) + + def test_rotate(self, dtype): + image = tf.reshape(tf.cast(tf.range(9), dtype), (3, 3)) + rotation = 90. + transformed = augment.rotate(image=image, degrees=rotation) + expected = [[2, 5, 8], [1, 4, 7], [0, 3, 6]] + self.assertAllEqual(transformed, expected) + + def test_rotate_shapes(self, dtype): + degrees = 0. + for shape in [(3, 3), (5, 5), (224, 224, 3)]: + image = tf.zeros(shape, dtype=dtype) + self.assertAllEqual(image, augment.rotate(image, degrees)) + + +class AutoaugmentTest(tf.test.TestCase, parameterized.TestCase): + + AVAILABLE_POLICIES = [ + 'v0', + 'test', + 'simple', + 'reduced_cifar10', + 'svhn', + 'reduced_imagenet', + ] + + AVAILABLE_POLICIES = [ + 'v0', + 'test', + 'simple', + 'reduced_cifar10', + 'svhn', + 'reduced_imagenet', + ] + + def test_autoaugment(self): + """Smoke test to be sure there are no syntax errors.""" + image = tf.zeros((224, 224, 3), dtype=tf.uint8) + + for policy in self.AVAILABLE_POLICIES: + augmenter = augment.AutoAugment(augmentation_name=policy) + aug_image = augmenter.distort(image) + + self.assertEqual((224, 224, 3), aug_image.shape) + + def test_randaug(self): + """Smoke test to be sure there are no syntax errors.""" + image = tf.zeros((224, 224, 3), dtype=tf.uint8) + + augmenter = augment.RandAugment() + aug_image = augmenter.distort(image) + + self.assertEqual((224, 224, 3), aug_image.shape) + + def test_all_policy_ops(self): + """Smoke test to be sure all augmentation functions can execute.""" + + prob = 1 + magnitude = 10 + replace_value = [128] * 3 + cutout_const = 100 + translate_const = 250 + + image = tf.ones((224, 224, 3), dtype=tf.uint8) + + for op_name in augment.NAME_TO_FUNC: + func, _, args = augment._parse_policy_info(op_name, prob, magnitude, + replace_value, cutout_const, + translate_const) + image = func(image, *args) + + self.assertEqual((224, 224, 3), image.shape) + + def test_autoaugment_video(self): + """Smoke test with video to be sure there are no syntax errors.""" + image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8) + + for policy in self.AVAILABLE_POLICIES: + augmenter = augment.AutoAugment(augmentation_name=policy) + aug_image = augmenter.distort(image) + + self.assertEqual((2, 224, 224, 3), aug_image.shape) + + def test_randaug_video(self): + """Smoke test with video to be sure there are no syntax errors.""" + image = tf.zeros((2, 224, 224, 3), dtype=tf.uint8) + + augmenter = augment.RandAugment() + aug_image = augmenter.distort(image) + + self.assertEqual((2, 224, 224, 3), aug_image.shape) + + def test_all_policy_ops_video(self): + """Smoke test to be sure all video augmentation functions can execute.""" + + prob = 1 + magnitude = 10 + replace_value = [128] * 3 + cutout_const = 100 + translate_const = 250 + + image = tf.ones((2, 224, 224, 3), dtype=tf.uint8) + + for op_name in augment.NAME_TO_FUNC: + func, _, args = augment._parse_policy_info(op_name, prob, magnitude, + replace_value, cutout_const, + translate_const) + image = func(image, *args) + + self.assertEqual((2, 224, 224, 3), image.shape) + + def _generate_test_policy(self): + """Generate a test policy at random.""" + op_list = list(augment.NAME_TO_FUNC.keys()) + size = 6 + prob = [round(random.uniform(0., 1.), 1) for _ in range(size)] + mag = [round(random.uniform(0, 10)) for _ in range(size)] + policy = [] + for i in range(0, size, 2): + policy.append([(op_list[i], prob[i], mag[i]), + (op_list[i + 1], prob[i + 1], mag[i + 1])]) + return policy + + def test_custom_policy(self): + """Test autoaugment with a custom policy.""" + image = tf.zeros((224, 224, 3), dtype=tf.uint8) + augmenter = augment.AutoAugment(policies=self._generate_test_policy()) + aug_image = augmenter.distort(image) + + self.assertEqual((224, 224, 3), aug_image.shape) + + @parameterized.named_parameters( + {'testcase_name': '_OutOfRangeProb', + 'sub_policy': ('Equalize', 1.1, 3), 'value': '1.1'}, + {'testcase_name': '_OutOfRangeMag', + 'sub_policy': ('Equalize', 0.9, 11), 'value': '11'}, + ) + def test_invalid_custom_sub_policy(self, sub_policy, value): + """Test autoaugment with out-of-range values in the custom policy.""" + image = tf.zeros((224, 224, 3), dtype=tf.uint8) + policy = self._generate_test_policy() + policy[0][0] = sub_policy + augmenter = augment.AutoAugment(policies=policy) + + with self.assertRaisesRegex( + tf.errors.InvalidArgumentError, + r'Expected \'tf.Tensor\(False, shape=\(\), dtype=bool\)\' to be true. ' + r'Summarized data: ({})'.format(value)): + augmenter.distort(image) + + def test_invalid_custom_policy_ndim(self): + """Test autoaugment with wrong dimension in the custom policy.""" + policy = [[('Equalize', 0.8, 1), ('Shear', 0.8, 4)], + [('TranslateY', 0.6, 3), ('Rotate', 0.9, 3)]] + policy = [[policy]] + + with self.assertRaisesRegex( + ValueError, + r'Expected \(:, :, 3\) but got \(1, 1, 2, 2, 3\).'): + augment.AutoAugment(policies=policy) + + def test_invalid_custom_policy_shape(self): + """Test autoaugment with wrong shape in the custom policy.""" + policy = [[('Equalize', 0.8, 1, 1), ('Shear', 0.8, 4, 1)], + [('TranslateY', 0.6, 3, 1), ('Rotate', 0.9, 3, 1)]] + + with self.assertRaisesRegex( + ValueError, + r'Expected \(:, :, 3\) but got \(2, 2, 4\)'): + augment.AutoAugment(policies=policy) + + def test_invalid_custom_policy_key(self): + """Test autoaugment with invalid key in the custom policy.""" + image = tf.zeros((224, 224, 3), dtype=tf.uint8) + policy = [[('AAAAA', 0.8, 1), ('Shear', 0.8, 4)], + [('TranslateY', 0.6, 3), ('Rotate', 0.9, 3)]] + augmenter = augment.AutoAugment(policies=policy) + + with self.assertRaisesRegex(KeyError, '\'AAAAA\''): + augmenter.distort(image) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/ops/box_ops.py b/official/vision/beta/ops/box_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..f37e92fb62ebad38e2747a18e1d0a60cf8a6b455 --- /dev/null +++ b/official/vision/beta/ops/box_ops.py @@ -0,0 +1,639 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Box related ops.""" + +# Import libraries +import numpy as np +import tensorflow as tf + + +EPSILON = 1e-8 +BBOX_XFORM_CLIP = np.log(1000. / 16.) + + +def yxyx_to_xywh(boxes): + """Converts boxes from ymin, xmin, ymax, xmax to xmin, ymin, width, height. + + Args: + boxes: a numpy array whose last dimension is 4 representing the coordinates + of boxes in ymin, xmin, ymax, xmax order. + + Returns: + boxes: a numpy array whose shape is the same as `boxes` in new format. + + Raises: + ValueError: If the last dimension of boxes is not 4. + """ + if boxes.shape[-1] != 4: + raise ValueError( + 'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1])) + + boxes_ymin = boxes[..., 0] + boxes_xmin = boxes[..., 1] + boxes_width = boxes[..., 3] - boxes[..., 1] + boxes_height = boxes[..., 2] - boxes[..., 0] + new_boxes = np.stack( + [boxes_xmin, boxes_ymin, boxes_width, boxes_height], axis=-1) + + return new_boxes + + +def jitter_boxes(boxes, noise_scale=0.025): + """Jitter the box coordinates by some noise distribution. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. + noise_scale: a python float which specifies the magnitude of noise. The rule + of thumb is to set this between (0, 0.1]. The default value is found to + mimic the noisy detections best empirically. + + Returns: + jittered_boxes: a tensor whose shape is the same as `boxes` representing + the jittered boxes. + + Raises: + ValueError: If the last dimension of boxes is not 4. + """ + if boxes.shape[-1] != 4: + raise ValueError( + 'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1])) + + with tf.name_scope('jitter_boxes'): + bbox_jitters = tf.random.normal(tf.shape(boxes), stddev=noise_scale) + ymin = boxes[..., 0:1] + xmin = boxes[..., 1:2] + ymax = boxes[..., 2:3] + xmax = boxes[..., 3:4] + width = xmax - xmin + height = ymax - ymin + new_center_x = (xmin + xmax) / 2.0 + bbox_jitters[..., 0:1] * width + new_center_y = (ymin + ymax) / 2.0 + bbox_jitters[..., 1:2] * height + new_width = width * tf.math.exp(bbox_jitters[..., 2:3]) + new_height = height * tf.math.exp(bbox_jitters[..., 3:4]) + jittered_boxes = tf.concat( + [new_center_y - new_height * 0.5, new_center_x - new_width * 0.5, + new_center_y + new_height * 0.5, new_center_x + new_width * 0.5], + axis=-1) + + return jittered_boxes + + +def normalize_boxes(boxes, image_shape): + """Converts boxes to the normalized coordinates. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates + of boxes in ymin, xmin, ymax, xmax order. + image_shape: a list of two integers, a two-element vector or a tensor such + that all but the last dimensions are `broadcastable` to `boxes`. The last + dimension is 2, which represents [height, width]. + + Returns: + normalized_boxes: a tensor whose shape is the same as `boxes` representing + the normalized boxes. + + Raises: + ValueError: If the last dimension of boxes is not 4. + """ + if boxes.shape[-1] != 4: + raise ValueError( + 'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1])) + + with tf.name_scope('normalize_boxes'): + if isinstance(image_shape, list) or isinstance(image_shape, tuple): + height, width = image_shape + else: + image_shape = tf.cast(image_shape, dtype=boxes.dtype) + height = image_shape[..., 0:1] + width = image_shape[..., 1:2] + + ymin = boxes[..., 0:1] / height + xmin = boxes[..., 1:2] / width + ymax = boxes[..., 2:3] / height + xmax = boxes[..., 3:4] / width + + normalized_boxes = tf.concat([ymin, xmin, ymax, xmax], axis=-1) + return normalized_boxes + + +def denormalize_boxes(boxes, image_shape): + """Converts boxes normalized by [height, width] to pixel coordinates. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates + of boxes in ymin, xmin, ymax, xmax order. + image_shape: a list of two integers, a two-element vector or a tensor such + that all but the last dimensions are `broadcastable` to `boxes`. The last + dimension is 2, which represents [height, width]. + + Returns: + denormalized_boxes: a tensor whose shape is the same as `boxes` representing + the denormalized boxes. + + Raises: + ValueError: If the last dimension of boxes is not 4. + """ + with tf.name_scope('denormalize_boxes'): + if isinstance(image_shape, list) or isinstance(image_shape, tuple): + height, width = image_shape + else: + image_shape = tf.cast(image_shape, dtype=boxes.dtype) + height, width = tf.split(image_shape, 2, axis=-1) + + ymin, xmin, ymax, xmax = tf.split(boxes, 4, axis=-1) + ymin = ymin * height + xmin = xmin * width + ymax = ymax * height + xmax = xmax * width + + denormalized_boxes = tf.concat([ymin, xmin, ymax, xmax], axis=-1) + return denormalized_boxes + + +def clip_boxes(boxes, image_shape): + """Clips boxes to image boundaries. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates + of boxes in ymin, xmin, ymax, xmax order. + image_shape: a list of two integers, a two-element vector or a tensor such + that all but the last dimensions are `broadcastable` to `boxes`. The last + dimension is 2, which represents [height, width]. + + Returns: + clipped_boxes: a tensor whose shape is the same as `boxes` representing the + clipped boxes. + + Raises: + ValueError: If the last dimension of boxes is not 4. + """ + if boxes.shape[-1] != 4: + raise ValueError( + 'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1])) + + with tf.name_scope('clip_boxes'): + if isinstance(image_shape, list) or isinstance(image_shape, tuple): + height, width = image_shape + max_length = [height, width, height, width] + else: + image_shape = tf.cast(image_shape, dtype=boxes.dtype) + height, width = tf.unstack(image_shape, axis=-1) + max_length = tf.stack([height, width, height, width], axis=-1) + + clipped_boxes = tf.math.maximum(tf.math.minimum(boxes, max_length), 0.0) + return clipped_boxes + + +def compute_outer_boxes(boxes, image_shape, scale=1.0): + """Compute outer box encloses an object with a margin. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. + image_shape: a list of two integers, a two-element vector or a tensor such + that all but the last dimensions are `broadcastable` to `boxes`. The last + dimension is 2, which represents [height, width]. + scale: a float number specifying the scale of output outer boxes to input + `boxes`. + + Returns: + outer_boxes: a tensor whose shape is the same as `boxes` representing the + outer boxes. + """ + if scale < 1.0: + raise ValueError( + 'scale is {}, but outer box scale must be greater than 1.0.'.format( + scale)) + centers_y = (boxes[..., 0] + boxes[..., 2]) / 2.0 + centers_x = (boxes[..., 1] + boxes[..., 3]) / 2.0 + box_height = (boxes[..., 2] - boxes[..., 0]) * scale + box_width = (boxes[..., 3] - boxes[..., 1]) * scale + outer_boxes = tf.stack( + [centers_y - box_height / 2.0, centers_x - box_width / 2.0, + centers_y + box_height / 2.0, centers_x + box_width / 2.0], + axis=1) + outer_boxes = clip_boxes(outer_boxes, image_shape) + return outer_boxes + + +def encode_boxes(boxes, anchors, weights=None): + """Encode boxes to targets. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates + of boxes in ymin, xmin, ymax, xmax order. + anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`, + representing the coordinates of anchors in ymin, xmin, ymax, xmax order. + weights: None or a list of four float numbers used to scale coordinates. + + Returns: + encoded_boxes: a tensor whose shape is the same as `boxes` representing the + encoded box targets. + + Raises: + ValueError: If the last dimension of boxes is not 4. + """ + if boxes.shape[-1] != 4: + raise ValueError( + 'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1])) + + with tf.name_scope('encode_boxes'): + boxes = tf.cast(boxes, dtype=anchors.dtype) + ymin = boxes[..., 0:1] + xmin = boxes[..., 1:2] + ymax = boxes[..., 2:3] + xmax = boxes[..., 3:4] + box_h = ymax - ymin + box_w = xmax - xmin + box_yc = ymin + 0.5 * box_h + box_xc = xmin + 0.5 * box_w + + anchor_ymin = anchors[..., 0:1] + anchor_xmin = anchors[..., 1:2] + anchor_ymax = anchors[..., 2:3] + anchor_xmax = anchors[..., 3:4] + anchor_h = anchor_ymax - anchor_ymin + anchor_w = anchor_xmax - anchor_xmin + anchor_yc = anchor_ymin + 0.5 * anchor_h + anchor_xc = anchor_xmin + 0.5 * anchor_w + + encoded_dy = (box_yc - anchor_yc) / anchor_h + encoded_dx = (box_xc - anchor_xc) / anchor_w + encoded_dh = tf.math.log(box_h / anchor_h) + encoded_dw = tf.math.log(box_w / anchor_w) + if weights: + encoded_dy *= weights[0] + encoded_dx *= weights[1] + encoded_dh *= weights[2] + encoded_dw *= weights[3] + + encoded_boxes = tf.concat( + [encoded_dy, encoded_dx, encoded_dh, encoded_dw], axis=-1) + return encoded_boxes + + +def decode_boxes(encoded_boxes, anchors, weights=None): + """Decode boxes. + + Args: + encoded_boxes: a tensor whose last dimension is 4 representing the + coordinates of encoded boxes in ymin, xmin, ymax, xmax order. + anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`, + representing the coordinates of anchors in ymin, xmin, ymax, xmax order. + weights: None or a list of four float numbers used to scale coordinates. + + Returns: + encoded_boxes: a tensor whose shape is the same as `boxes` representing the + decoded box targets. + """ + if encoded_boxes.shape[-1] != 4: + raise ValueError( + 'encoded_boxes.shape[-1] is {:d}, but must be 4.' + .format(encoded_boxes.shape[-1])) + + with tf.name_scope('decode_boxes'): + encoded_boxes = tf.cast(encoded_boxes, dtype=anchors.dtype) + dy = encoded_boxes[..., 0:1] + dx = encoded_boxes[..., 1:2] + dh = encoded_boxes[..., 2:3] + dw = encoded_boxes[..., 3:4] + if weights: + dy /= weights[0] + dx /= weights[1] + dh /= weights[2] + dw /= weights[3] + dh = tf.math.minimum(dh, BBOX_XFORM_CLIP) + dw = tf.math.minimum(dw, BBOX_XFORM_CLIP) + + anchor_ymin = anchors[..., 0:1] + anchor_xmin = anchors[..., 1:2] + anchor_ymax = anchors[..., 2:3] + anchor_xmax = anchors[..., 3:4] + anchor_h = anchor_ymax - anchor_ymin + anchor_w = anchor_xmax - anchor_xmin + anchor_yc = anchor_ymin + 0.5 * anchor_h + anchor_xc = anchor_xmin + 0.5 * anchor_w + + decoded_boxes_yc = dy * anchor_h + anchor_yc + decoded_boxes_xc = dx * anchor_w + anchor_xc + decoded_boxes_h = tf.math.exp(dh) * anchor_h + decoded_boxes_w = tf.math.exp(dw) * anchor_w + + decoded_boxes_ymin = decoded_boxes_yc - 0.5 * decoded_boxes_h + decoded_boxes_xmin = decoded_boxes_xc - 0.5 * decoded_boxes_w + decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h + decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w + + decoded_boxes = tf.concat( + [decoded_boxes_ymin, decoded_boxes_xmin, + decoded_boxes_ymax, decoded_boxes_xmax], + axis=-1) + return decoded_boxes + + +def filter_boxes(boxes, scores, image_shape, min_size_threshold): + """Filter and remove boxes that are too small or fall outside the image. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. + scores: a tensor whose shape is the same as tf.shape(boxes)[:-1] + representing the original scores of the boxes. + image_shape: a tensor whose shape is the same as, or `broadcastable` to + `boxes` except the last dimension, which is 2, representing [height, + width] of the scaled image. + min_size_threshold: a float representing the minimal box size in each side + (w.r.t. the scaled image). Boxes whose sides are smaller than it will be + filtered out. + + Returns: + filtered_boxes: a tensor whose shape is the same as `boxes` but with + the position of the filtered boxes are filled with 0. + filtered_scores: a tensor whose shape is the same as 'scores' but with + the positinon of the filtered boxes filled with 0. + """ + if boxes.shape[-1] != 4: + raise ValueError( + 'boxes.shape[1] is {:d}, but must be 4.'.format(boxes.shape[-1])) + + with tf.name_scope('filter_boxes'): + if isinstance(image_shape, list) or isinstance(image_shape, tuple): + height, width = image_shape + else: + image_shape = tf.cast(image_shape, dtype=boxes.dtype) + height = image_shape[..., 0] + width = image_shape[..., 1] + + ymin = boxes[..., 0] + xmin = boxes[..., 1] + ymax = boxes[..., 2] + xmax = boxes[..., 3] + + h = ymax - ymin + w = xmax - xmin + yc = ymin + 0.5 * h + xc = xmin + 0.5 * w + + min_size = tf.cast( + tf.math.maximum(min_size_threshold, 0.0), dtype=boxes.dtype) + + filtered_size_mask = tf.math.logical_and( + tf.math.greater(h, min_size), tf.math.greater(w, min_size)) + filtered_center_mask = tf.logical_and( + tf.math.logical_and(tf.math.greater(yc, 0.0), tf.math.less(yc, height)), + tf.math.logical_and(tf.math.greater(xc, 0.0), tf.math.less(xc, width))) + filtered_mask = tf.math.logical_and( + filtered_size_mask, filtered_center_mask) + + filtered_scores = tf.where(filtered_mask, scores, tf.zeros_like(scores)) + filtered_boxes = tf.cast( + tf.expand_dims(filtered_mask, axis=-1), dtype=boxes.dtype) * boxes + return filtered_boxes, filtered_scores + + +def filter_boxes_by_scores(boxes, scores, min_score_threshold): + """Filter and remove boxes whose scores are smaller than the threshold. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. + scores: a tensor whose shape is the same as tf.shape(boxes)[:-1] + representing the original scores of the boxes. + min_score_threshold: a float representing the minimal box score threshold. + Boxes whose score are smaller than it will be filtered out. + + Returns: + filtered_boxes: a tensor whose shape is the same as `boxes` but with + the position of the filtered boxes are filled with -1. + filtered_scores: a tensor whose shape is the same as 'scores' but with + the + """ + if boxes.shape[-1] != 4: + raise ValueError('boxes.shape[1] is {:d}, but must be 4.'.format( + boxes.shape[-1])) + + with tf.name_scope('filter_boxes_by_scores'): + filtered_mask = tf.math.greater(scores, min_score_threshold) + filtered_scores = tf.where(filtered_mask, scores, -tf.ones_like(scores)) + filtered_boxes = tf.cast( + tf.expand_dims(filtered_mask, axis=-1), dtype=boxes.dtype) * boxes + + return filtered_boxes, filtered_scores + + +def gather_instances(selected_indices, instances, *aux_instances): + """Gather instances by indices. + + Args: + selected_indices: a Tensor of shape [batch, K] which indicates the selected + indices in instance dimension (2nd dimension). + instances: a Tensor of shape [batch, N, ...] where the 2nd dimension is + the instance dimension to be selected from. + *aux_instances: the additional Tensors whose shapes are in [batch, N, ...] + which are the tensors to be selected from using the `selected_indices`. + + Returns: + selected_instances: the tensor of shape [batch, K, ...] which corresponds to + the selected instances of the `instances` tensor. + selected_aux_instances: the additional tensors of shape [batch, K, ...] + which corresponds to the selected instances of the `aus_instances` + tensors. + """ + batch_size = instances.shape[0] + if batch_size == 1: + selected_instances = tf.squeeze( + tf.gather(instances, selected_indices, axis=1), axis=1) + if aux_instances: + selected_aux_instances = [ + tf.squeeze( + tf.gather(a, selected_indices, axis=1), axis=1) + for a in aux_instances + ] + return tuple([selected_instances] + selected_aux_instances) + else: + return selected_instances + else: + indices_shape = tf.shape(selected_indices) + batch_indices = ( + tf.expand_dims(tf.range(indices_shape[0]), axis=-1) * + tf.ones([1, indices_shape[-1]], dtype=tf.int32)) + gather_nd_indices = tf.stack( + [batch_indices, selected_indices], axis=-1) + selected_instances = tf.gather_nd(instances, gather_nd_indices) + if aux_instances: + selected_aux_instances = [ + tf.gather_nd(a, gather_nd_indices) for a in aux_instances + ] + return tuple([selected_instances] + selected_aux_instances) + else: + return selected_instances + + +def top_k_boxes(boxes, scores, k): + """Sort and select top k boxes according to the scores. + + Args: + boxes: a tensor of shape [batch_size, N, 4] representing the coordinate of + the boxes. N is the number of boxes per image. + scores: a tensor of shsape [batch_size, N] representing the socre of the + boxes. + k: an integer or a tensor indicating the top k number. + + Returns: + selected_boxes: a tensor of shape [batch_size, k, 4] representing the + selected top k box coordinates. + selected_scores: a tensor of shape [batch_size, k] representing the selected + top k box scores. + """ + with tf.name_scope('top_k_boxes'): + selected_scores, top_k_indices = tf.nn.top_k(scores, k=k, sorted=True) + selected_boxes = gather_instances(top_k_indices, boxes) + return selected_boxes, selected_scores + + +def get_non_empty_box_indices(boxes): + """Get indices for non-empty boxes.""" + # Selects indices if box height or width is 0. + height = boxes[:, 2] - boxes[:, 0] + width = boxes[:, 3] - boxes[:, 1] + indices = tf.where(tf.logical_and(tf.greater(height, 0), + tf.greater(width, 0))) + return indices[:, 0] + + +def bbox_overlap(boxes, gt_boxes): + """Calculates the overlap between proposal and ground truth boxes. + + Some `boxes` or `gt_boxes` may have been padded. The returned `iou` tensor + for these boxes will be -1. + + Args: + boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of + proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The + last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form. + gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This + tensor might have paddings with a negative value. + + Returns: + iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES]. + """ + with tf.name_scope('bbox_overlap'): + bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split( + value=boxes, num_or_size_splits=4, axis=2) + gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split( + value=gt_boxes, num_or_size_splits=4, axis=2) + + # Calculates the intersection area. + i_xmin = tf.math.maximum(bb_x_min, tf.transpose(gt_x_min, [0, 2, 1])) + i_xmax = tf.math.minimum(bb_x_max, tf.transpose(gt_x_max, [0, 2, 1])) + i_ymin = tf.math.maximum(bb_y_min, tf.transpose(gt_y_min, [0, 2, 1])) + i_ymax = tf.math.minimum(bb_y_max, tf.transpose(gt_y_max, [0, 2, 1])) + i_area = ( + tf.math.maximum((i_xmax - i_xmin), 0) * + tf.math.maximum((i_ymax - i_ymin), 0)) + + # Calculates the union area. + bb_area = (bb_y_max - bb_y_min) * (bb_x_max - bb_x_min) + gt_area = (gt_y_max - gt_y_min) * (gt_x_max - gt_x_min) + # Adds a small epsilon to avoid divide-by-zero. + u_area = bb_area + tf.transpose(gt_area, [0, 2, 1]) - i_area + 1e-8 + + # Calculates IoU. + iou = i_area / u_area + + # Fills -1 for IoU entries between the padded ground truth boxes. + gt_invalid_mask = tf.less( + tf.reduce_max(gt_boxes, axis=-1, keepdims=True), 0.0) + padding_mask = tf.logical_or( + tf.zeros_like(bb_x_min, dtype=tf.bool), + tf.transpose(gt_invalid_mask, [0, 2, 1])) + iou = tf.where(padding_mask, -tf.ones_like(iou), iou) + + # Fills -1 for for invalid (-1) boxes. + boxes_invalid_mask = tf.less( + tf.reduce_max(boxes, axis=-1, keepdims=True), 0.0) + iou = tf.where(boxes_invalid_mask, -tf.ones_like(iou), iou) + + return iou + + +def box_matching(boxes, gt_boxes, gt_classes): + """Match boxes to groundtruth boxes. + + Given the proposal boxes and the groundtruth boxes and classes, perform the + groundtruth matching by taking the argmax of the IoU between boxes and + groundtruth boxes. + + Args: + boxes: a tensor of shape of [batch_size, N, 4] representing the box + coordiantes to be matched to groundtruth boxes. + gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing + the groundtruth box coordinates. It is padded with -1s to indicate the + invalid boxes. + gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box + classes. It is padded with -1s to indicate the invalid classes. + + Returns: + matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing + the matched groundtruth box coordinates for each input box. If the box + does not overlap with any groundtruth boxes, the matched boxes of it + will be set to all 0s. + matched_gt_classes: a tensor of shape of [batch_size, N], representing + the matched groundtruth classes for each input box. If the box does not + overlap with any groundtruth boxes, the matched box classes of it will + be set to 0, which corresponds to the background class. + matched_gt_indices: a tensor of shape of [batch_size, N], representing + the indices of the matched groundtruth boxes in the original gt_boxes + tensor. If the box does not overlap with any groundtruth boxes, the + index of the matched groundtruth will be set to -1. + matched_iou: a tensor of shape of [batch_size, N], representing the IoU + between the box and its matched groundtruth box. The matched IoU is the + maximum IoU of the box and all the groundtruth boxes. + iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix + between boxes and the groundtruth boxes. The IoU between a box and the + invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1. + """ + # Compute IoU between boxes and gt_boxes. + # iou <- [batch_size, N, K] + iou = bbox_overlap(boxes, gt_boxes) + + # max_iou <- [batch_size, N] + # 0.0 -> no match to gt, or -1.0 match to no gt + matched_iou = tf.reduce_max(iou, axis=-1) + + # background_box_mask <- bool, [batch_size, N] + background_box_mask = tf.less_equal(matched_iou, 0.0) + + argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32) + + matched_gt_boxes, matched_gt_classes = gather_instances( + argmax_iou_indices, gt_boxes, gt_classes) + matched_gt_boxes = tf.where( + tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), + tf.zeros_like(matched_gt_boxes, dtype=matched_gt_boxes.dtype), + matched_gt_boxes) + matched_gt_classes = tf.where( + background_box_mask, + tf.zeros_like(matched_gt_classes), + matched_gt_classes) + + matched_gt_indices = tf.where( + background_box_mask, + -tf.ones_like(argmax_iou_indices), + argmax_iou_indices) + + return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, + matched_iou, iou) diff --git a/official/vision/beta/ops/mask_ops.py b/official/vision/beta/ops/mask_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..6109bfdb568d815875a3c5b2cdf58bab4b8ede4d --- /dev/null +++ b/official/vision/beta/ops/mask_ops.py @@ -0,0 +1,190 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions for segmentations.""" + +import math +# Import libraries +import cv2 +import numpy as np + + +def paste_instance_masks(masks, + detected_boxes, + image_height, + image_width): + """Paste instance masks to generate the image segmentation results. + + Args: + masks: a numpy array of shape [N, mask_height, mask_width] representing the + instance masks w.r.t. the `detected_boxes`. + detected_boxes: a numpy array of shape [N, 4] representing the reference + bounding boxes. + image_height: an integer representing the height of the image. + image_width: an integer representing the width of the image. + + Returns: + segms: a numpy array of shape [N, image_height, image_width] representing + the instance masks *pasted* on the image canvas. + """ + + def expand_boxes(boxes, scale): + """Expands an array of boxes by a given scale.""" + # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227 # pylint: disable=line-too-long + # The `boxes` in the reference implementation is in [x1, y1, x2, y2] form, + # whereas `boxes` here is in [x1, y1, w, h] form + w_half = boxes[:, 2] * .5 + h_half = boxes[:, 3] * .5 + x_c = boxes[:, 0] + w_half + y_c = boxes[:, 1] + h_half + + w_half *= scale + h_half *= scale + + boxes_exp = np.zeros(boxes.shape) + boxes_exp[:, 0] = x_c - w_half + boxes_exp[:, 2] = x_c + w_half + boxes_exp[:, 1] = y_c - h_half + boxes_exp[:, 3] = y_c + h_half + + return boxes_exp + + # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812 # pylint: disable=line-too-long + # To work around an issue with cv2.resize (it seems to automatically pad + # with repeated border values), we manually zero-pad the masks by 1 pixel + # prior to resizing back to the original image resolution. This prevents + # "top hat" artifacts. We therefore need to expand the reference boxes by an + # appropriate factor. + _, mask_height, mask_width = masks.shape + scale = max((mask_width + 2.0) / mask_width, + (mask_height + 2.0) / mask_height) + + ref_boxes = expand_boxes(detected_boxes, scale) + ref_boxes = ref_boxes.astype(np.int32) + padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32) + segms = [] + for mask_ind, mask in enumerate(masks): + im_mask = np.zeros((image_height, image_width), dtype=np.uint8) + # Process mask inside bounding boxes. + padded_mask[1:-1, 1:-1] = mask[:, :] + + ref_box = ref_boxes[mask_ind, :] + w = ref_box[2] - ref_box[0] + 1 + h = ref_box[3] - ref_box[1] + 1 + w = np.maximum(w, 1) + h = np.maximum(h, 1) + + mask = cv2.resize(padded_mask, (w, h)) + mask = np.array(mask > 0.5, dtype=np.uint8) + + x_0 = min(max(ref_box[0], 0), image_width) + x_1 = min(max(ref_box[2] + 1, 0), image_width) + y_0 = min(max(ref_box[1], 0), image_height) + y_1 = min(max(ref_box[3] + 1, 0), image_height) + + im_mask[y_0:y_1, x_0:x_1] = mask[ + (y_0 - ref_box[1]):(y_1 - ref_box[1]), + (x_0 - ref_box[0]):(x_1 - ref_box[0]) + ] + segms.append(im_mask) + + segms = np.array(segms) + assert masks.shape[0] == segms.shape[0] + return segms + + +def paste_instance_masks_v2(masks, + detected_boxes, + image_height, + image_width): + """Paste instance masks to generate the image segmentation (v2). + + Args: + masks: a numpy array of shape [N, mask_height, mask_width] representing the + instance masks w.r.t. the `detected_boxes`. + detected_boxes: a numpy array of shape [N, 4] representing the reference + bounding boxes. + image_height: an integer representing the height of the image. + image_width: an integer representing the width of the image. + + Returns: + segms: a numpy array of shape [N, image_height, image_width] representing + the instance masks *pasted* on the image canvas. + """ + _, mask_height, mask_width = masks.shape + + segms = [] + for i, mask in enumerate(masks): + box = detected_boxes[i, :] + xmin = box[0] + ymin = box[1] + xmax = xmin + box[2] + ymax = ymin + box[3] + + # Sample points of the cropped mask w.r.t. the image grid. + # Note that these coordinates may fall beyond the image. + # Pixel clipping will happen after warping. + xmin_int = int(math.floor(xmin)) + xmax_int = int(math.ceil(xmax)) + ymin_int = int(math.floor(ymin)) + ymax_int = int(math.ceil(ymax)) + + alpha = box[2] / (1.0 * mask_width) + beta = box[3] / (1.0 * mask_height) + # pylint: disable=invalid-name + # Transformation from mask pixel indices to image coordinate. + M_mask_to_image = np.array( + [[alpha, 0, xmin], + [0, beta, ymin], + [0, 0, 1]], + dtype=np.float32) + # Transformation from image to cropped mask coordinate. + M_image_to_crop = np.array( + [[1, 0, -xmin_int], + [0, 1, -ymin_int], + [0, 0, 1]], + dtype=np.float32) + M = np.dot(M_image_to_crop, M_mask_to_image) + # Compensate the half pixel offset that OpenCV has in the + # warpPerspective implementation: the top-left pixel is sampled + # at (0,0), but we want it to be at (0.5, 0.5). + M = np.dot( + np.dot( + np.array([[1, 0, -0.5], + [0, 1, -0.5], + [0, 0, 1]], np.float32), + M), + np.array([[1, 0, 0.5], + [0, 1, 0.5], + [0, 0, 1]], np.float32)) + # pylint: enable=invalid-name + cropped_mask = cv2.warpPerspective( + mask.astype(np.float32), M, + (xmax_int - xmin_int, ymax_int - ymin_int)) + cropped_mask = np.array(cropped_mask > 0.5, dtype=np.uint8) + + img_mask = np.zeros((image_height, image_width)) + x0 = max(min(xmin_int, image_width), 0) + x1 = max(min(xmax_int, image_width), 0) + y0 = max(min(ymin_int, image_height), 0) + y1 = max(min(ymax_int, image_height), 0) + img_mask[y0:y1, x0:x1] = cropped_mask[ + (y0 - ymin_int):(y1 - ymin_int), + (x0 - xmin_int):(x1 - xmin_int)] + + segms.append(img_mask) + + segms = np.array(segms) + return segms + diff --git a/official/vision/beta/ops/mask_ops_test.py b/official/vision/beta/ops/mask_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..09b7663294a7b56b82957f04a9390c1ad4824f5e --- /dev/null +++ b/official/vision/beta/ops/mask_ops_test.py @@ -0,0 +1,55 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Tests for mask_ops.py.""" + +# Import libraries +import numpy as np +import tensorflow as tf +from official.vision.beta.ops import mask_ops + + +class MaskUtilsTest(tf.test.TestCase): + + def testPasteInstanceMasks(self): + image_height = 10 + image_width = 10 + mask_height = 6 + mask_width = 6 + masks = np.random.randint(0, 255, (1, mask_height, mask_width)) + detected_boxes = np.array([[0.0, 2.0, mask_width, mask_height]]) + + _ = mask_ops.paste_instance_masks( + masks, detected_boxes, image_height, image_width) + + def testPasteInstanceMasksV2(self): + image_height = 10 + image_width = 10 + mask_height = 6 + mask_width = 6 + masks = np.random.randint(0, 255, (1, mask_height, mask_width)) + detected_boxes = np.array([[0.0, 2.0, mask_width, mask_height]]) + + image_masks = mask_ops.paste_instance_masks_v2( + masks, detected_boxes, image_height, image_width) + + self.assertNDArrayNear( + image_masks[:, 2:8, 0:6], + np.array(masks > 0.5, dtype=np.uint8), + 1e-5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/ops/nms.py b/official/vision/beta/ops/nms.py new file mode 100644 index 0000000000000000000000000000000000000000..945e7896d3b2ea0d3ee37dbd20125bc15125bd50 --- /dev/null +++ b/official/vision/beta/ops/nms.py @@ -0,0 +1,202 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tensorflow implementation of non max suppression.""" + +# Import libraries +import tensorflow as tf + +from official.vision.beta.ops import box_ops + + +NMS_TILE_SIZE = 512 + + +def _self_suppression(iou, _, iou_sum): + batch_size = tf.shape(iou)[0] + can_suppress_others = tf.cast( + tf.reshape(tf.reduce_max(iou, 1) <= 0.5, [batch_size, -1, 1]), iou.dtype) + iou_suppressed = tf.reshape( + tf.cast(tf.reduce_max(can_suppress_others * iou, 1) <= 0.5, iou.dtype), + [batch_size, -1, 1]) * iou + iou_sum_new = tf.reduce_sum(iou_suppressed, [1, 2]) + return [ + iou_suppressed, + tf.reduce_any(iou_sum - iou_sum_new > 0.5), iou_sum_new + ] + + +def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx): + batch_size = tf.shape(boxes)[0] + new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0], + [batch_size, NMS_TILE_SIZE, 4]) + iou = box_ops.bbox_overlap(new_slice, box_slice) + ret_slice = tf.expand_dims( + tf.cast(tf.reduce_all(iou < iou_threshold, [1]), box_slice.dtype), + 2) * box_slice + return boxes, ret_slice, iou_threshold, inner_idx + 1 + + +def _suppression_loop_body(boxes, iou_threshold, output_size, idx): + """Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE). + + Args: + boxes: a tensor with a shape of [batch_size, anchors, 4]. + iou_threshold: a float representing the threshold for deciding whether boxes + overlap too much with respect to IOU. + output_size: an int32 tensor of size [batch_size]. Representing the number + of selected boxes for each batch. + idx: an integer scalar representing induction variable. + + Returns: + boxes: updated boxes. + iou_threshold: pass down iou_threshold to the next iteration. + output_size: the updated output_size. + idx: the updated induction variable. + """ + num_tiles = tf.shape(boxes)[1] // NMS_TILE_SIZE + batch_size = tf.shape(boxes)[0] + + # Iterates over tiles that can possibly suppress the current tile. + box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0], + [batch_size, NMS_TILE_SIZE, 4]) + _, box_slice, _, _ = tf.while_loop( + lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, + _cross_suppression, [boxes, box_slice, iou_threshold, + tf.constant(0)]) + + # Iterates over the current tile to compute self-suppression. + iou = box_ops.bbox_overlap(box_slice, box_slice) + mask = tf.expand_dims( + tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape( + tf.range(NMS_TILE_SIZE), [-1, 1]), 0) + iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype) + suppressed_iou, _, _ = tf.while_loop( + lambda _iou, loop_condition, _iou_sum: loop_condition, _self_suppression, + [iou, tf.constant(True), + tf.reduce_sum(iou, [1, 2])]) + suppressed_box = tf.reduce_sum(suppressed_iou, 1) > 0 + box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2) + + # Uses box_slice to update the input boxes. + mask = tf.reshape( + tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1]) + boxes = tf.tile(tf.expand_dims( + box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape( + boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask) + boxes = tf.reshape(boxes, [batch_size, -1, 4]) + + # Updates output_size. + output_size += tf.reduce_sum( + tf.cast(tf.reduce_any(box_slice > 0, [2]), tf.int32), [1]) + return boxes, iou_threshold, output_size, idx + 1 + + +def sorted_non_max_suppression_padded(scores, + boxes, + max_output_size, + iou_threshold): + """A wrapper that handles non-maximum suppression. + + Assumption: + * The boxes are sorted by scores unless the box is a dot (all coordinates + are zero). + * Boxes with higher scores can be used to suppress boxes with lower scores. + + The overal design of the algorithm is to handle boxes tile-by-tile: + + boxes = boxes.pad_to_multiply_of(tile_size) + num_tiles = len(boxes) // tile_size + output_boxes = [] + for i in range(num_tiles): + box_tile = boxes[i*tile_size : (i+1)*tile_size] + for j in range(i - 1): + suppressing_tile = boxes[j*tile_size : (j+1)*tile_size] + iou = bbox_overlap(box_tile, suppressing_tile) + # if the box is suppressed in iou, clear it to a dot + box_tile *= _update_boxes(iou) + # Iteratively handle the diagnal tile. + iou = _box_overlap(box_tile, box_tile) + iou_changed = True + while iou_changed: + # boxes that are not suppressed by anything else + suppressing_boxes = _get_suppressing_boxes(iou) + # boxes that are suppressed by suppressing_boxes + suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes) + # clear iou to 0 for boxes that are suppressed, as they cannot be used + # to suppress other boxes any more + new_iou = _clear_iou(iou, suppressed_boxes) + iou_changed = (new_iou != iou) + iou = new_iou + # remaining boxes that can still suppress others, are selected boxes. + output_boxes.append(_get_suppressing_boxes(iou)) + if len(output_boxes) >= max_output_size: + break + + Args: + scores: a tensor with a shape of [batch_size, anchors]. + boxes: a tensor with a shape of [batch_size, anchors, 4]. + max_output_size: a scalar integer `Tensor` representing the maximum number + of boxes to be selected by non max suppression. + iou_threshold: a float representing the threshold for deciding whether boxes + overlap too much with respect to IOU. + + Returns: + nms_scores: a tensor with a shape of [batch_size, anchors]. It has same + dtype as input scores. + nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has + same dtype as input boxes. + """ + batch_size = tf.shape(boxes)[0] + num_boxes = tf.shape(boxes)[1] + pad = tf.cast( + tf.math.ceil(tf.cast(num_boxes, tf.float32) / NMS_TILE_SIZE), + tf.int32) * NMS_TILE_SIZE - num_boxes + boxes = tf.pad(tf.cast(boxes, tf.float32), [[0, 0], [0, pad], [0, 0]]) + scores = tf.pad( + tf.cast(scores, tf.float32), [[0, 0], [0, pad]], constant_values=-1) + num_boxes += pad + + def _loop_cond(unused_boxes, unused_threshold, output_size, idx): + return tf.logical_and( + tf.reduce_min(output_size) < max_output_size, + idx < num_boxes // NMS_TILE_SIZE) + + selected_boxes, _, output_size, _ = tf.while_loop( + _loop_cond, _suppression_loop_body, [ + boxes, iou_threshold, + tf.zeros([batch_size], tf.int32), + tf.constant(0) + ]) + idx = num_boxes - tf.cast( + tf.nn.top_k( + tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) * + tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0], + tf.int32) + idx = tf.minimum(idx, num_boxes - 1) + idx = tf.reshape( + idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1]) + boxes = tf.reshape( + tf.gather(tf.reshape(boxes, [-1, 4]), idx), + [batch_size, max_output_size, 4]) + boxes = boxes * tf.cast( + tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape( + output_size, [-1, 1, 1]), boxes.dtype) + scores = tf.reshape( + tf.gather(tf.reshape(scores, [-1, 1]), idx), + [batch_size, max_output_size]) + scores = scores * tf.cast( + tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape( + output_size, [-1, 1]), scores.dtype) + return scores, boxes diff --git a/official/vision/beta/ops/preprocess_ops.py b/official/vision/beta/ops/preprocess_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..e9f8988f671da7f05d711a3a117e5ceb85ff8cd7 --- /dev/null +++ b/official/vision/beta/ops/preprocess_ops.py @@ -0,0 +1,557 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Preprocessing ops.""" + +import math +from six.moves import range +import tensorflow as tf + +from official.vision.beta.ops import box_ops + + +CENTER_CROP_FRACTION = 0.875 + + +def clip_or_pad_to_fixed_size(input_tensor, size, constant_values=0): + """Pads data to a fixed length at the first dimension. + + Args: + input_tensor: `Tensor` with any dimension. + size: `int` number for the first dimension of output Tensor. + constant_values: `int` value assigned to the paddings. + + Returns: + `Tensor` with the first dimension padded to `size`. + """ + input_shape = input_tensor.get_shape().as_list() + padding_shape = [] + + # Computes the padding length on the first dimension, clip input tensor if it + # is longer than `size`. + input_length = tf.shape(input_tensor)[0] + input_length = tf.clip_by_value(input_length, 0, size) + input_tensor = input_tensor[:input_length] + + padding_length = tf.maximum(0, size - input_length) + padding_shape.append(padding_length) + + # Copies shapes of the rest of input shape dimensions. + for i in range(1, len(input_shape)): + padding_shape.append(tf.shape(input_tensor)[i]) + + # Pads input tensor to the fixed first dimension. + paddings = tf.cast(constant_values * tf.ones(padding_shape), + input_tensor.dtype) + padded_tensor = tf.concat([input_tensor, paddings], axis=0) + output_shape = input_shape + output_shape[0] = size + padded_tensor.set_shape(output_shape) + return padded_tensor + + +def normalize_image(image, + offset=(0.485, 0.456, 0.406), + scale=(0.229, 0.224, 0.225)): + """Normalizes the image to zero mean and unit variance.""" + with tf.name_scope('normalize_image'): + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + offset = tf.constant(offset) + offset = tf.expand_dims(offset, axis=0) + offset = tf.expand_dims(offset, axis=0) + image -= offset + + scale = tf.constant(scale) + scale = tf.expand_dims(scale, axis=0) + scale = tf.expand_dims(scale, axis=0) + image /= scale + return image + + +def compute_padded_size(desired_size, stride): + """Compute the padded size given the desired size and the stride. + + The padded size will be the smallest rectangle, such that each dimension is + the smallest multiple of the stride which is larger than the desired + dimension. For example, if desired_size = (100, 200) and stride = 32, + the output padded_size = (128, 224). + + Args: + desired_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the target output image size. + stride: an integer, the stride of the backbone network. + + Returns: + padded_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the padded output image size. + """ + if isinstance(desired_size, list) or isinstance(desired_size, tuple): + padded_size = [int(math.ceil(d * 1.0 / stride) * stride) + for d in desired_size] + else: + padded_size = tf.cast( + tf.math.ceil( + tf.cast(desired_size, dtype=tf.float32) / stride) * stride, + tf.int32) + return padded_size + + +def resize_and_crop_image(image, + desired_size, + padded_size, + aug_scale_min=1.0, + aug_scale_max=1.0, + seed=1, + method=tf.image.ResizeMethod.BILINEAR): + """Resizes the input image to output size (RetinaNet style). + + Resize and pad images given the desired output size of the image and + stride size. + + Here are the preprocessing steps. + 1. For a given image, keep its aspect ratio and rescale the image to make it + the largest rectangle to be bounded by the rectangle specified by the + `desired_size`. + 2. Pad the rescaled image to the padded_size. + + Args: + image: a `Tensor` of shape [height, width, 3] representing an image. + desired_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the desired actual output image size. + padded_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the padded output image size. Padding will be applied + after scaling the image to the desired_size. + aug_scale_min: a `float` with range between [0, 1.0] representing minimum + random scale applied to desired_size for training scale jittering. + aug_scale_max: a `float` with range between [1.0, inf] representing maximum + random scale applied to desired_size for training scale jittering. + seed: seed for random scale jittering. + method: function to resize input image to scaled image. + + Returns: + output_image: `Tensor` of shape [height, width, 3] where [height, width] + equals to `output_size`. + image_info: a 2D `Tensor` that encodes the information of the image and the + applied preprocessing. It is in the format of + [[original_height, original_width], [desired_height, desired_width], + [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, + desired_width] is the actual scaled image size, and [y_scale, x_scale] is + the scaling factor, which is the ratio of + scaled dimension / original dimension. + """ + with tf.name_scope('resize_and_crop_image'): + image_size = tf.cast(tf.shape(image)[0:2], tf.float32) + + random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0) + + if random_jittering: + random_scale = tf.random.uniform( + [], aug_scale_min, aug_scale_max, seed=seed) + scaled_size = tf.round(random_scale * desired_size) + else: + scaled_size = desired_size + + scale = tf.minimum( + scaled_size[0] / image_size[0], scaled_size[1] / image_size[1]) + scaled_size = tf.round(image_size * scale) + + # Computes 2D image_scale. + image_scale = scaled_size / image_size + + # Selects non-zero random offset (x, y) if scaled image is larger than + # desired_size. + if random_jittering: + max_offset = scaled_size - desired_size + max_offset = tf.where( + tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) + offset = max_offset * tf.random.uniform([2,], 0, 1, seed=seed) + offset = tf.cast(offset, tf.int32) + else: + offset = tf.zeros((2,), tf.int32) + + scaled_image = tf.image.resize( + image, tf.cast(scaled_size, tf.int32), method=method) + + if random_jittering: + scaled_image = scaled_image[ + offset[0]:offset[0] + desired_size[0], + offset[1]:offset[1] + desired_size[1], :] + + output_image = tf.image.pad_to_bounding_box( + scaled_image, 0, 0, padded_size[0], padded_size[1]) + + image_info = tf.stack([ + image_size, + tf.constant(desired_size, dtype=tf.float32), + image_scale, + tf.cast(offset, tf.float32)]) + return output_image, image_info + + +def resize_and_crop_image_v2(image, + short_side, + long_side, + padded_size, + aug_scale_min=1.0, + aug_scale_max=1.0, + seed=1, + method=tf.image.ResizeMethod.BILINEAR): + """Resizes the input image to output size (Faster R-CNN style). + + Resize and pad images given the specified short / long side length and the + stride size. + + Here are the preprocessing steps. + 1. For a given image, keep its aspect ratio and first try to rescale the short + side of the original image to `short_side`. + 2. If the scaled image after 1 has a long side that exceeds `long_side`, keep + the aspect ratio and rescal the long side of the image to `long_side`. + 2. Pad the rescaled image to the padded_size. + + Args: + image: a `Tensor` of shape [height, width, 3] representing an image. + short_side: a scalar `Tensor` or `int` representing the desired short side + to be rescaled to. + long_side: a scalar `Tensor` or `int` representing the desired long side to + be rescaled to. + padded_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the padded output image size. Padding will be applied + after scaling the image to the desired_size. + aug_scale_min: a `float` with range between [0, 1.0] representing minimum + random scale applied to desired_size for training scale jittering. + aug_scale_max: a `float` with range between [1.0, inf] representing maximum + random scale applied to desired_size for training scale jittering. + seed: seed for random scale jittering. + method: function to resize input image to scaled image. + + Returns: + output_image: `Tensor` of shape [height, width, 3] where [height, width] + equals to `output_size`. + image_info: a 2D `Tensor` that encodes the information of the image and the + applied preprocessing. It is in the format of + [[original_height, original_width], [desired_height, desired_width], + [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, + desired_width] is the actual scaled image size, and [y_scale, x_scale] is + the scaling factor, which is the ratio of + scaled dimension / original dimension. + """ + with tf.name_scope('resize_and_crop_image_v2'): + image_size = tf.cast(tf.shape(image)[0:2], tf.float32) + + scale_using_short_side = ( + short_side / tf.math.minimum(image_size[0], image_size[1])) + scale_using_long_side = ( + long_side / tf.math.maximum(image_size[0], image_size[1])) + + scaled_size = tf.math.round(image_size * scale_using_short_side) + scaled_size = tf.where( + tf.math.greater( + tf.math.maximum(scaled_size[0], scaled_size[1]), long_side), + tf.math.round(image_size * scale_using_long_side), + scaled_size) + desired_size = scaled_size + + random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0) + + if random_jittering: + random_scale = tf.random.uniform( + [], aug_scale_min, aug_scale_max, seed=seed) + scaled_size = tf.math.round(random_scale * scaled_size) + + # Computes 2D image_scale. + image_scale = scaled_size / image_size + + # Selects non-zero random offset (x, y) if scaled image is larger than + # desired_size. + if random_jittering: + max_offset = scaled_size - desired_size + max_offset = tf.where( + tf.math.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) + offset = max_offset * tf.random.uniform([2,], 0, 1, seed=seed) + offset = tf.cast(offset, tf.int32) + else: + offset = tf.zeros((2,), tf.int32) + + scaled_image = tf.image.resize( + image, tf.cast(scaled_size, tf.int32), method=method) + + if random_jittering: + scaled_image = scaled_image[ + offset[0]:offset[0] + desired_size[0], + offset[1]:offset[1] + desired_size[1], :] + + output_image = tf.image.pad_to_bounding_box( + scaled_image, 0, 0, padded_size[0], padded_size[1]) + + image_info = tf.stack([ + image_size, + tf.cast(desired_size, dtype=tf.float32), + image_scale, + tf.cast(offset, tf.float32)]) + return output_image, image_info + + +def center_crop_image(image): + """Center crop a square shape slice from the input image. + + It crops a square shape slice from the image. The side of the actual crop + is 224 / 256 = 0.875 of the short side of the original image. References: + [1] Very Deep Convolutional Networks for Large-Scale Image Recognition + https://arxiv.org/abs/1409.1556 + [2] Deep Residual Learning for Image Recognition + https://arxiv.org/abs/1512.03385 + + Args: + image: a Tensor of shape [height, width, 3] representing the input image. + + Returns: + cropped_image: a Tensor representing the center cropped image. + """ + with tf.name_scope('center_crop_image'): + image_size = tf.cast(tf.shape(image)[:2], dtype=tf.float32) + crop_size = ( + CENTER_CROP_FRACTION * tf.math.minimum(image_size[0], image_size[1])) + crop_offset = tf.cast((image_size - crop_size) / 2.0, dtype=tf.int32) + crop_size = tf.cast(crop_size, dtype=tf.int32) + cropped_image = image[ + crop_offset[0]:crop_offset[0] + crop_size, + crop_offset[1]:crop_offset[1] + crop_size, :] + return cropped_image + + +def center_crop_image_v2(image_bytes, image_shape): + """Center crop a square shape slice from the input image. + + It crops a square shape slice from the image. The side of the actual crop + is 224 / 256 = 0.875 of the short side of the original image. References: + [1] Very Deep Convolutional Networks for Large-Scale Image Recognition + https://arxiv.org/abs/1409.1556 + [2] Deep Residual Learning for Image Recognition + https://arxiv.org/abs/1512.03385 + + This is a faster version of `center_crop_image` which takes the original + image bytes and image size as the inputs, and partially decode the JPEG + bytes according to the center crop. + + Args: + image_bytes: a Tensor of type string representing the raw image bytes. + image_shape: a Tensor specifying the shape of the raw image. + + Returns: + cropped_image: a Tensor representing the center cropped image. + """ + with tf.name_scope('center_image_crop_v2'): + image_shape = tf.cast(image_shape, tf.float32) + crop_size = ( + CENTER_CROP_FRACTION * tf.math.minimum(image_shape[0], image_shape[1])) + crop_offset = tf.cast((image_shape - crop_size) / 2.0, dtype=tf.int32) + crop_size = tf.cast(crop_size, dtype=tf.int32) + crop_window = tf.stack( + [crop_offset[0], crop_offset[1], crop_size, crop_size]) + cropped_image = tf.image.decode_and_crop_jpeg( + image_bytes, crop_window, channels=3) + return cropped_image + + +def random_crop_image(image, + aspect_ratio_range=(3. / 4., 4. / 3.), + area_range=(0.08, 1.0), + max_attempts=10, + seed=1): + """Randomly crop an arbitrary shaped slice from the input image. + + Args: + image: a Tensor of shape [height, width, 3] representing the input image. + aspect_ratio_range: a list of floats. The cropped area of the image must + have an aspect ratio = width / height within this range. + area_range: a list of floats. The cropped reas of the image must contain + a fraction of the input image within this range. + max_attempts: the number of attempts at generating a cropped region of the + image of the specified constraints. After max_attempts failures, return + the entire image. + seed: the seed of the random generator. + + Returns: + cropped_image: a Tensor representing the random cropped image. Can be the + original image if max_attempts is exhausted. + """ + with tf.name_scope('random_crop_image'): + crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box( + tf.shape(image), + tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]), + seed=seed, + min_object_covered=area_range[0], + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts) + cropped_image = tf.slice(image, crop_offset, crop_size) + return cropped_image + + +def random_crop_image_v2(image_bytes, + image_shape, + aspect_ratio_range=(3. / 4., 4. / 3.), + area_range=(0.08, 1.0), + max_attempts=10, + seed=1): + """Randomly crop an arbitrary shaped slice from the input image. + + This is a faster version of `random_crop_image` which takes the original + image bytes and image size as the inputs, and partially decode the JPEG + bytes according to the generated crop. + + Args: + image_bytes: a Tensor of type string representing the raw image bytes. + image_shape: a Tensor specifying the shape of the raw image. + aspect_ratio_range: a list of floats. The cropped area of the image must + have an aspect ratio = width / height within this range. + area_range: a list of floats. The cropped reas of the image must contain + a fraction of the input image within this range. + max_attempts: the number of attempts at generating a cropped region of the + image of the specified constraints. After max_attempts failures, return + the entire image. + seed: the seed of the random generator. + + Returns: + cropped_image: a Tensor representing the random cropped image. Can be the + original image if max_attempts is exhausted. + """ + with tf.name_scope('random_crop_image_v2'): + crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box( + image_shape, + tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]), + seed=seed, + min_object_covered=area_range[0], + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts) + offset_y, offset_x, _ = tf.unstack(crop_offset) + crop_height, crop_width, _ = tf.unstack(crop_size) + crop_window = tf.stack([offset_y, offset_x, crop_height, crop_width]) + cropped_image = tf.image.decode_and_crop_jpeg( + image_bytes, crop_window, channels=3) + return cropped_image + + +def resize_and_crop_boxes(boxes, + image_scale, + output_size, + offset): + """Resizes boxes to output size with scale and offset. + + Args: + boxes: `Tensor` of shape [N, 4] representing ground truth boxes. + image_scale: 2D float `Tensor` representing scale factors that apply to + [height, width] of input image. + output_size: 2D `Tensor` or `int` representing [height, width] of target + output image size. + offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled + boxes. + + Returns: + boxes: `Tensor` of shape [N, 4] representing the scaled boxes. + """ + with tf.name_scope('resize_and_crop_boxes'): + # Adjusts box coordinates based on image_scale and offset. + boxes *= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) + boxes -= tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) + # Clips the boxes. + boxes = box_ops.clip_boxes(boxes, output_size) + return boxes + + +def resize_and_crop_masks(masks, + image_scale, + output_size, + offset): + """Resizes boxes to output size with scale and offset. + + Args: + masks: `Tensor` of shape [N, H, W, 1] representing ground truth masks. + image_scale: 2D float `Tensor` representing scale factors that apply to + [height, width] of input image. + output_size: 2D `Tensor` or `int` representing [height, width] of target + output image size. + offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled + boxes. + + Returns: + masks: `Tensor` of shape [N, H, W, 1] representing the scaled masks. + """ + with tf.name_scope('resize_and_crop_masks'): + mask_size = tf.cast(tf.shape(masks)[1:3], tf.float32) + # Pad masks to avoid empty mask annotations. + masks = tf.concat( + [tf.zeros([1, mask_size[0], mask_size[1], 1]), masks], axis=0) + + scaled_size = tf.cast(image_scale * mask_size, tf.int32) + scaled_masks = tf.image.resize( + masks, scaled_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + offset = tf.cast(offset, tf.int32) + scaled_masks = scaled_masks[ + :, + offset[0]:offset[0] + output_size[0], + offset[1]:offset[1] + output_size[1], + :] + + output_masks = tf.image.pad_to_bounding_box( + scaled_masks, 0, 0, output_size[0], output_size[1]) + # Remove padding. + output_masks = output_masks[1::] + return output_masks + + +def horizontal_flip_image(image): + """Flips image horizontally.""" + return tf.image.flip_left_right(image) + + +def horizontal_flip_boxes(normalized_boxes): + """Flips normalized boxes horizontally.""" + ymin, xmin, ymax, xmax = tf.split( + value=normalized_boxes, num_or_size_splits=4, axis=1) + flipped_xmin = tf.subtract(1.0, xmax) + flipped_xmax = tf.subtract(1.0, xmin) + flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1) + return flipped_boxes + + +def horizontal_flip_masks(masks): + """Flips masks horizontally.""" + return masks[:, :, ::-1] + + +def random_horizontal_flip(image, normalized_boxes=None, masks=None, seed=1): + """Randomly flips input image and bounding boxes.""" + with tf.name_scope('random_horizontal_flip'): + do_flip = tf.greater(tf.random.uniform([], seed=seed), 0.5) + + image = tf.cond( + do_flip, + lambda: horizontal_flip_image(image), + lambda: image) + + if normalized_boxes is not None: + normalized_boxes = tf.cond( + do_flip, + lambda: horizontal_flip_boxes(normalized_boxes), + lambda: normalized_boxes) + + if masks is not None: + masks = tf.cond( + do_flip, + lambda: horizontal_flip_masks(masks), + lambda: masks) + + return image, normalized_boxes, masks diff --git a/official/vision/beta/ops/preprocess_ops_3d.py b/official/vision/beta/ops/preprocess_ops_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..ad9d03029dc951996792022f410ca943b3d0f314 --- /dev/null +++ b/official/vision/beta/ops/preprocess_ops_3d.py @@ -0,0 +1,355 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Utils for processing video dataset features.""" + +from typing import Optional, Tuple +import tensorflow as tf + + +def _sample_or_pad_sequence_indices(sequence: tf.Tensor, + num_steps: int, + stride: int, + offset: tf.Tensor) -> tf.Tensor: + """Returns indices to take for sampling or padding sequences to fixed size.""" + sequence_length = tf.shape(sequence)[0] + sel_idx = tf.range(sequence_length) + + # Repeats sequence until num_steps are available in total. + max_length = num_steps * stride + offset + num_repeats = tf.math.floordiv( + max_length + sequence_length - 1, sequence_length) + sel_idx = tf.tile(sel_idx, [num_repeats]) + + steps = tf.range(offset, offset + num_steps * stride, stride) + return tf.gather(sel_idx, steps) + + +def sample_linspace_sequence(sequence: tf.Tensor, + num_windows: int, + num_steps: int, + stride: int) -> tf.Tensor: + """Samples `num_windows` segments from sequence with linearly spaced offsets. + + The samples are concatenated in a single `tf.Tensor` in order to have the same + format structure per timestep (e.g. a single frame). If `num_steps` * `stride` + is bigger than the number of timesteps, the sequence is repeated. This + function can be used in evaluation in order to extract enough segments to span + the entire sequence. + + Args: + sequence: Any tensor where the first dimension is timesteps. + num_windows: Number of windows retrieved from the sequence. + num_steps: Number of steps (e.g. frames) to take. + stride: Distance to sample between timesteps. + + Returns: + A single `tf.Tensor` with first dimension `num_windows` * `num_steps`. The + tensor contains the concatenated list of `num_windows` tensors which offsets + have been linearly spaced from input. + """ + sequence_length = tf.shape(sequence)[0] + max_offset = tf.maximum(0, sequence_length - num_steps * stride) + offsets = tf.linspace(0.0, tf.cast(max_offset, tf.float32), num_windows) + offsets = tf.cast(offsets, tf.int32) + + all_indices = [] + for i in range(num_windows): + all_indices.append(_sample_or_pad_sequence_indices( + sequence=sequence, + num_steps=num_steps, + stride=stride, + offset=offsets[i])) + + indices = tf.concat(all_indices, axis=0) + indices.set_shape((num_windows * num_steps,)) + return tf.gather(sequence, indices) + + +def sample_sequence(sequence: tf.Tensor, + num_steps: int, + random: bool, + stride: int, + seed: Optional[int] = None) -> tf.Tensor: + """Samples a single segment of size `num_steps` from a given sequence. + + If `random` is not `True`, this function will simply sample the central window + of the sequence. Otherwise, a random offset will be chosen in a way that the + desired `num_steps` might be extracted from the sequence. + + Args: + sequence: Any tensor where the first dimension is timesteps. + num_steps: Number of steps (e.g. frames) to take. + random: A boolean indicating whether to random sample the single window. If + `True`, the offset is randomized. If `False`, the middle frame minus half + of `num_steps` is the first frame. + stride: Distance to sample between timesteps. + seed: A deterministic seed to use when sampling. + + Returns: + A single `tf.Tensor` with first dimension `num_steps` with the sampled + segment. + """ + sequence_length = tf.shape(sequence)[0] + + if random: + sequence_length = tf.cast(sequence_length, tf.float32) + frame_stride = tf.cast(stride, tf.float32) + max_offset = tf.cond( + sequence_length > (num_steps - 1) * frame_stride, + lambda: sequence_length - (num_steps - 1) * frame_stride, + lambda: sequence_length) + offset = tf.random.uniform( + (), + maxval=tf.cast(max_offset, dtype=tf.int32), + dtype=tf.int32, + seed=seed) + else: + offset = (sequence_length - num_steps * stride) // 2 + offset = tf.maximum(0, offset) + + indices = _sample_or_pad_sequence_indices( + sequence=sequence, + num_steps=num_steps, + stride=stride, + offset=offset) + indices.set_shape((num_steps,)) + + return tf.gather(sequence, indices) + + +def decode_jpeg(image_string: tf.Tensor, channels: int = 0) -> tf.Tensor: + """Decodes JPEG raw bytes string into a RGB uint8 Tensor. + + Args: + image_string: A `tf.Tensor` of type strings with the raw JPEG bytes where + the first dimension is timesteps. + channels: Number of channels of the JPEG image. Allowed values are 0, 1 and + 3. If 0, the number of channels will be calculated at runtime and no + static shape is set. + + Returns: + A Tensor of shape [T, H, W, C] of type uint8 with the decoded images. + """ + return tf.map_fn( + lambda x: tf.image.decode_jpeg(x, channels=channels), + image_string, back_prop=False, dtype=tf.uint8) + + +def crop_image(frames: tf.Tensor, + target_height: int, + target_width: int, + random: bool = False, + num_crops: int = 1, + seed: Optional[int] = None) -> tf.Tensor: + """Crops the image sequence of images. + + If requested size is bigger than image size, image is padded with 0. If not + random cropping, a central crop is performed if num_crops is 1. + + Args: + frames: A Tensor of dimension [timesteps, in_height, in_width, channels]. + target_height: Target cropped image height. + target_width: Target cropped image width. + random: A boolean indicating if crop should be randomized. + num_crops: Number of crops (support 1 for central crop and 3 for 3-crop). + seed: A deterministic seed to use when random cropping. + + Returns: + A Tensor of shape [timesteps, out_height, out_width, channels] of type uint8 + with the cropped images. + """ + if random: + # Random spatial crop. + shape = tf.shape(frames) + # If a static_shape is available (e.g. when using this method from add_image + # method), it will be used to have an output tensor with static shape. + static_shape = frames.shape.as_list() + seq_len = shape[0] if static_shape[0] is None else static_shape[0] + channels = shape[3] if static_shape[3] is None else static_shape[3] + frames = tf.image.random_crop( + frames, (seq_len, target_height, target_width, channels), seed) + else: + if num_crops == 1: + # Central crop or pad. + frames = tf.image.resize_with_crop_or_pad(frames, target_height, + target_width) + + elif num_crops == 3: + # Three-crop evaluation. + shape = tf.shape(frames) + static_shape = frames.shape.as_list() + seq_len = shape[0] if static_shape[0] is None else static_shape[0] + height = shape[1] if static_shape[1] is None else static_shape[1] + width = shape[2] if static_shape[2] is None else static_shape[2] + channels = shape[3] if static_shape[3] is None else static_shape[3] + + size = tf.convert_to_tensor( + (seq_len, target_height, target_width, channels)) + + offset_1 = tf.broadcast_to([0, 0, 0, 0], [4]) + # pylint:disable=g-long-lambda + offset_2 = tf.cond( + tf.greater_equal(height, width), + true_fn=lambda: tf.broadcast_to([ + 0, tf.cast(height, tf.float32) / 2 - target_height // 2, 0, 0 + ], [4]), + false_fn=lambda: tf.broadcast_to([ + 0, 0, tf.cast(width, tf.float32) / 2 - target_width // 2, 0 + ], [4])) + offset_3 = tf.cond( + tf.greater_equal(height, width), + true_fn=lambda: tf.broadcast_to( + [0, tf.cast(height, tf.float32) - target_height, 0, 0], [4]), + false_fn=lambda: tf.broadcast_to( + [0, 0, tf.cast(width, tf.float32) - target_width, 0], [4])) + # pylint:disable=g-long-lambda + + crops = [] + for offset in [offset_1, offset_2, offset_3]: + offset = tf.cast(tf.math.round(offset), tf.int32) + crops.append(tf.slice(frames, offset, size)) + frames = tf.concat(crops, axis=0) + + else: + raise NotImplementedError( + f"Only 1-crop and 3-crop are supported. Found {num_crops!r}.") + + return frames + + +def resize_smallest(frames: tf.Tensor, + min_resize: int) -> tf.Tensor: + """Resizes frames so that min(`height`, `width`) is equal to `min_resize`. + + This function will not do anything if the min(`height`, `width`) is already + equal to `min_resize`. This allows to save compute time. + + Args: + frames: A Tensor of dimension [timesteps, input_h, input_w, channels]. + min_resize: Minimum size of the final image dimensions. + + Returns: + A Tensor of shape [timesteps, output_h, output_w, channels] of type + frames.dtype where min(output_h, output_w) = min_resize. + """ + shape = tf.shape(frames) + input_h = shape[1] + input_w = shape[2] + + output_h = tf.maximum(min_resize, (input_h * min_resize) // input_w) + output_w = tf.maximum(min_resize, (input_w * min_resize) // input_h) + + def resize_fn(): + frames_resized = tf.image.resize(frames, (output_h, output_w)) + return tf.cast(frames_resized, frames.dtype) + + should_resize = tf.math.logical_or(tf.not_equal(input_w, output_w), + tf.not_equal(input_h, output_h)) + frames = tf.cond(should_resize, resize_fn, lambda: frames) + + return frames + + +def random_crop_resize(frames: tf.Tensor, + output_h: int, + output_w: int, + num_frames: int, + num_channels: int, + aspect_ratio: Tuple[float, float], + area_range: Tuple[float, float]) -> tf.Tensor: + """First crops clip with jittering and then resizes to (output_h, output_w). + + Args: + frames: A Tensor of dimension [timesteps, input_h, input_w, channels]. + output_h: Resized image height. + output_w: Resized image width. + num_frames: Number of input frames per clip. + num_channels: Number of channels of the clip. + aspect_ratio: Float tuple with the aspect range for cropping. + area_range: Float tuple with the area range for cropping. + Returns: + A Tensor of shape [timesteps, output_h, output_w, channels] of type + frames.dtype. + """ + shape = tf.shape(frames) + seq_len, _, _, channels = shape[0], shape[1], shape[2], shape[3] + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) + factor = output_w / output_h + aspect_ratio = (aspect_ratio[0] * factor, aspect_ratio[1] * factor) + sample_distorted_bbox = tf.image.sample_distorted_bounding_box( + shape[1:], + bounding_boxes=bbox, + min_object_covered=0.1, + aspect_ratio_range=aspect_ratio, + area_range=area_range, + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bbox + offset_y, offset_x, _ = tf.unstack(bbox_begin) + target_height, target_width, _ = tf.unstack(bbox_size) + size = tf.convert_to_tensor(( + seq_len, target_height, target_width, channels)) + offset = tf.convert_to_tensor(( + 0, offset_y, offset_x, 0)) + frames = tf.slice(frames, offset, size) + frames = tf.cast( + tf.image.resize(frames, (output_h, output_w)), + frames.dtype) + frames.set_shape((num_frames, output_h, output_w, num_channels)) + return frames + + +def random_flip_left_right( + frames: tf.Tensor, + seed: Optional[int] = None) -> tf.Tensor: + """Flips all the frames with a probability of 50%. + + Args: + frames: A Tensor of shape [timesteps, input_h, input_w, channels]. + seed: A seed to use for the random sampling. + + Returns: + A Tensor of shape [timesteps, output_h, output_w, channels] eventually + flipped left right. + """ + is_flipped = tf.random.uniform( + (), minval=0, maxval=2, dtype=tf.int32, seed=seed) + + frames = tf.cond(tf.equal(is_flipped, 1), + true_fn=lambda: tf.image.flip_left_right(frames), + false_fn=lambda: frames) + return frames + + +def normalize_image(frames: tf.Tensor, + zero_centering_image: bool, + dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor: + """Normalizes images. + + Args: + frames: A Tensor of numbers. + zero_centering_image: If True, results are in [-1, 1], if False, results are + in [0, 1]. + dtype: Type of output Tensor. + + Returns: + A Tensor of same shape as the input and of the given type. + """ + frames = tf.cast(frames, dtype) + if zero_centering_image: + return frames * (2.0 / 255.0) - 1.0 + else: + return frames / 255.0 diff --git a/official/vision/beta/ops/preprocess_ops_3d_test.py b/official/vision/beta/ops/preprocess_ops_3d_test.py new file mode 100644 index 0000000000000000000000000000000000000000..b2db9334b7d19bb6f6194219757b2f22e9c71fe0 --- /dev/null +++ b/official/vision/beta/ops/preprocess_ops_3d_test.py @@ -0,0 +1,159 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +import io +import itertools +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.vision.beta.ops import preprocess_ops_3d + + +class ParserUtilsTest(tf.test.TestCase): + + def setUp(self): + super().setUp() + # [[0, 1, ..., 119], [1, 2, ..., 120], ..., [119, 120, ..., 218]]. + self._frames = tf.stack([tf.range(i, i + 120) for i in range(90)]) + self._frames = tf.cast(self._frames, tf.uint8) + self._frames = self._frames[tf.newaxis, :, :, tf.newaxis] + self._frames = tf.broadcast_to(self._frames, (6, 90, 120, 3)) + + # Create an equivalent numpy array for assertions. + self._np_frames = np.array([range(i, i + 120) for i in range(90)]) + self._np_frames = self._np_frames[np.newaxis, :, :, np.newaxis] + self._np_frames = np.broadcast_to(self._np_frames, (6, 90, 120, 3)) + + def test_sample_linspace_sequence(self): + sequence = tf.range(100) + sampled_seq_1 = preprocess_ops_3d.sample_linspace_sequence( + sequence, 10, 10, 1) + sampled_seq_2 = preprocess_ops_3d.sample_linspace_sequence( + sequence, 7, 10, 1) + sampled_seq_3 = preprocess_ops_3d.sample_linspace_sequence( + sequence, 7, 5, 2) + sampled_seq_4 = preprocess_ops_3d.sample_linspace_sequence( + sequence, 101, 1, 1) + + self.assertAllEqual(sampled_seq_1, range(100)) + # [0, 1, 2, 3, 4, ..., 8, 9, 15, 16, ..., 97, 98, 99] + self.assertAllEqual( + sampled_seq_2, + [15 * i + j for i, j in itertools.product(range(7), range(10))]) + # [0, 2, 4, 6, 8, 15, 17, 19, ..., 96, 98] + self.assertAllEqual( + sampled_seq_3, + [15 * i + 2 * j for i, j in itertools.product(range(7), range(5))]) + self.assertAllEqual(sampled_seq_4, [0] + list(range(100))) + + def test_sample_sequence(self): + sequence = tf.range(100) + sampled_seq_1 = preprocess_ops_3d.sample_sequence(sequence, 10, False, 1) + sampled_seq_2 = preprocess_ops_3d.sample_sequence(sequence, 10, False, 2) + sampled_seq_3 = preprocess_ops_3d.sample_sequence(sequence, 10, True, 1) + + self.assertAllEqual(sampled_seq_1, range(45, 55)) + self.assertAllEqual(sampled_seq_2, range(40, 60, 2)) + + offset_3 = sampled_seq_3[0] + self.assertBetween(offset_3, 0, 99) + self.assertAllEqual(sampled_seq_3, range(offset_3, offset_3 + 10)) + + def test_decode_jpeg(self): + # Create a random RGB JPEG image. + random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8) + random_image = Image.fromarray(random_image) + with io.BytesIO() as buffer: + random_image.save(buffer, format='JPEG') + raw_image_bytes = buffer.getvalue() + + raw_image = tf.constant([raw_image_bytes, raw_image_bytes]) + decoded_image = preprocess_ops_3d.decode_jpeg(raw_image, 3) + + self.assertEqual(decoded_image.shape.as_list()[3], 3) + self.assertAllEqual(decoded_image.shape, (2, 263, 320, 3)) + + def test_crop_image(self): + cropped_image_1 = preprocess_ops_3d.crop_image(self._frames, 50, 70) + cropped_image_2 = preprocess_ops_3d.crop_image(self._frames, 200, 200) + cropped_image_3 = preprocess_ops_3d.crop_image(self._frames, 50, 70, True) + cropped_image_4 = preprocess_ops_3d.crop_image( + self._frames, 90, 90, False, 3) + + self.assertAllEqual(cropped_image_1.shape, (6, 50, 70, 3)) + self.assertAllEqual(cropped_image_1, self._np_frames[:, 20:70, 25:95, :]) + + self.assertAllEqual(cropped_image_2.shape, (6, 200, 200, 3)) + expected = np.pad( + self._np_frames, ((0, 0), (55, 55), (40, 40), (0, 0)), 'constant') + self.assertAllEqual(cropped_image_2, expected) + + self.assertAllEqual(cropped_image_3.shape, (6, 50, 70, 3)) + offset = cropped_image_3[0, 0, 0, 0] + expected = np.array([range(i, i + 70) for i in range(offset, offset + 50)]) + expected = expected[np.newaxis, :, :, np.newaxis] + expected = np.broadcast_to(expected, (6, 50, 70, 3)) + self.assertAllEqual(cropped_image_3, expected) + self.assertAllEqual(cropped_image_4.shape, (18, 90, 90, 3)) + + def test_resize_smallest(self): + resized_frames_1 = preprocess_ops_3d.resize_smallest(self._frames, 180) + resized_frames_2 = preprocess_ops_3d.resize_smallest(self._frames, 45) + resized_frames_3 = preprocess_ops_3d.resize_smallest(self._frames, 90) + resized_frames_4 = preprocess_ops_3d.resize_smallest( + tf.transpose(self._frames, (0, 2, 1, 3)), 45) + + self.assertAllEqual(resized_frames_1.shape, (6, 180, 240, 3)) + self.assertAllEqual(resized_frames_2.shape, (6, 45, 60, 3)) + self.assertAllEqual(resized_frames_3.shape, (6, 90, 120, 3)) + self.assertAllEqual(resized_frames_4.shape, (6, 60, 45, 3)) + + def test_random_crop_resize(self): + resized_frames_1 = preprocess_ops_3d.random_crop_resize( + self._frames, 256, 256, 6, 3, (0.5, 2), (0.3, 1)) + resized_frames_2 = preprocess_ops_3d.random_crop_resize( + self._frames, 224, 224, 6, 3, (0.5, 2), (0.3, 1)) + resized_frames_3 = preprocess_ops_3d.random_crop_resize( + self._frames, 256, 256, 6, 3, (0.8, 1.2), (0.3, 1)) + resized_frames_4 = preprocess_ops_3d.random_crop_resize( + self._frames, 256, 256, 6, 3, (0.5, 2), (0.1, 1)) + self.assertAllEqual(resized_frames_1.shape, (6, 256, 256, 3)) + self.assertAllEqual(resized_frames_2.shape, (6, 224, 224, 3)) + self.assertAllEqual(resized_frames_3.shape, (6, 256, 256, 3)) + self.assertAllEqual(resized_frames_4.shape, (6, 256, 256, 3)) + + def test_random_flip_left_right(self): + flipped_frames = preprocess_ops_3d.random_flip_left_right(self._frames) + + flipped = np.fliplr(self._np_frames[0, :, :, 0]) + flipped = flipped[np.newaxis, :, :, np.newaxis] + flipped = np.broadcast_to(flipped, (6, 90, 120, 3)) + self.assertTrue((flipped_frames == self._np_frames).numpy().all() or ( + flipped_frames == flipped).numpy().all()) + + def test_normalize_image(self): + normalized_images_1 = preprocess_ops_3d.normalize_image( + self._frames, False, tf.float32) + normalized_images_2 = preprocess_ops_3d.normalize_image( + self._frames, True, tf.float32) + + self.assertAllClose(normalized_images_1, self._np_frames / 255) + self.assertAllClose(normalized_images_2, self._np_frames * 2 / 255 - 1.0) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/ops/preprocess_ops_test.py b/official/vision/beta/ops/preprocess_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d501cbfc69e0ff8b0f0d22a442e3627364d4701b --- /dev/null +++ b/official/vision/beta/ops/preprocess_ops_test.py @@ -0,0 +1,230 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Tests for preprocess_ops.py.""" + +import io +# Import libraries +from absl.testing import parameterized +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.vision.beta.ops import preprocess_ops + + +def _encode_image(image_array, fmt): + image = Image.fromarray(image_array) + with io.BytesIO() as output: + image.save(output, format=fmt) + return output.getvalue() + + +class InputUtilsTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + ([1], 10), + ([1, 2], 10), + ([1, 2, 3], 10), + ([11], 10), + ([12, 2], 10), + ([13, 2, 3], 10), + ) + def testPadToFixedSize(self, input_shape, output_size): + # Copies input shape to padding shape. + clip_shape = input_shape[:] + clip_shape[0] = min(output_size, clip_shape[0]) + padding_shape = input_shape[:] + padding_shape[0] = max(output_size - input_shape[0], 0) + expected_outputs = np.concatenate( + [np.ones(clip_shape), np.zeros(padding_shape)], axis=0) + + data = tf.ones(input_shape) + output_data = preprocess_ops.clip_or_pad_to_fixed_size( + data, output_size, constant_values=0) + output_data = output_data.numpy() + self.assertAllClose(output_size, output_data.shape[0]) + self.assertAllClose(expected_outputs, output_data) + + @parameterized.parameters( + (100, 200, 100, 200, 32, 1.0, 1.0, 128, 224), + (100, 256, 128, 256, 32, 1.0, 1.0, 128, 256), + (200, 512, 200, 128, 32, 0.25, 0.25, 224, 128), + ) + def testResizeAndCropImageRectangluarCase(self, + input_height, + input_width, + desired_height, + desired_width, + stride, + scale_y, + scale_x, + output_height, + output_width): + image = tf.convert_to_tensor( + np.random.rand(input_height, input_width, 3)) + + desired_size = (desired_height, desired_width) + resized_image, image_info = preprocess_ops.resize_and_crop_image( + image, + desired_size=desired_size, + padded_size=preprocess_ops.compute_padded_size(desired_size, stride)) + resized_image_shape = tf.shape(resized_image) + + self.assertAllEqual( + [output_height, output_width, 3], + resized_image_shape.numpy()) + self.assertNDArrayNear( + [[input_height, input_width], + [desired_height, desired_width], + [scale_y, scale_x], + [0.0, 0.0]], + image_info.numpy(), + 1e-5) + + @parameterized.parameters( + (100, 200, 220, 220, 32, 1.1, 1.1, 224, 224), + (512, 512, 1024, 1024, 32, 2.0, 2.0, 1024, 1024), + ) + def testResizeAndCropImageSquareCase(self, + input_height, + input_width, + desired_height, + desired_width, + stride, + scale_y, + scale_x, + output_height, + output_width): + image = tf.convert_to_tensor( + np.random.rand(input_height, input_width, 3)) + + desired_size = (desired_height, desired_width) + resized_image, image_info = preprocess_ops.resize_and_crop_image( + image, + desired_size=desired_size, + padded_size=preprocess_ops.compute_padded_size(desired_size, stride)) + resized_image_shape = tf.shape(resized_image) + + self.assertAllEqual( + [output_height, output_width, 3], + resized_image_shape.numpy()) + self.assertNDArrayNear( + [[input_height, input_width], + [desired_height, desired_width], + [scale_y, scale_x], + [0.0, 0.0]], + image_info.numpy(), + 1e-5) + + @parameterized.parameters( + (100, 200, 100, 300, 32, 1.0, 1.0, 100, 200, 128, 320), + (200, 100, 100, 300, 32, 1.0, 1.0, 200, 100, 320, 128), + (100, 200, 80, 100, 32, 0.5, 0.5, 50, 100, 96, 128), + (200, 100, 80, 100, 32, 0.5, 0.5, 100, 50, 128, 96), + ) + def testResizeAndCropImageV2(self, + input_height, + input_width, + short_side, + long_side, + stride, + scale_y, + scale_x, + desired_height, + desired_width, + output_height, + output_width): + image = tf.convert_to_tensor( + np.random.rand(input_height, input_width, 3)) + image_shape = tf.shape(image)[0:2] + + desired_size = tf.where( + tf.greater(image_shape[0], image_shape[1]), + tf.constant([long_side, short_side], dtype=tf.int32), + tf.constant([short_side, long_side], dtype=tf.int32)) + resized_image, image_info = preprocess_ops.resize_and_crop_image_v2( + image, + short_side=short_side, + long_side=long_side, + padded_size=preprocess_ops.compute_padded_size(desired_size, stride)) + resized_image_shape = tf.shape(resized_image) + + self.assertAllEqual( + [output_height, output_width, 3], + resized_image_shape.numpy()) + self.assertNDArrayNear( + [[input_height, input_width], + [desired_height, desired_width], + [scale_y, scale_x], + [0.0, 0.0]], + image_info.numpy(), + 1e-5) + + @parameterized.parameters( + (400, 600), (600, 400), + ) + def testCenterCropImage(self, + input_height, + input_width): + image = tf.convert_to_tensor( + np.random.rand(input_height, input_width, 3)) + cropped_image = preprocess_ops.center_crop_image(image) + cropped_image_shape = tf.shape(cropped_image) + self.assertAllEqual([350, 350, 3], cropped_image_shape.numpy()) + + @parameterized.parameters( + (400, 600), (600, 400), + ) + def testCenterCropImageV2(self, + input_height, + input_width): + image_bytes = tf.constant( + _encode_image( + np.uint8(np.random.rand(input_height, input_width, 3) * 255), + fmt='JPEG'), + dtype=tf.string) + cropped_image = preprocess_ops.center_crop_image_v2( + image_bytes, tf.constant([input_height, input_width, 3], tf.int32)) + cropped_image_shape = tf.shape(cropped_image) + self.assertAllEqual([350, 350, 3], cropped_image_shape.numpy()) + + @parameterized.parameters( + (400, 600), (600, 400), + ) + def testRandomCropImage(self, + input_height, + input_width): + image = tf.convert_to_tensor( + np.random.rand(input_height, input_width, 3)) + _ = preprocess_ops.random_crop_image(image) + + @parameterized.parameters( + (400, 600), (600, 400), + ) + def testRandomCropImageV2(self, + input_height, + input_width): + image_bytes = tf.constant( + _encode_image( + np.uint8(np.random.rand(input_height, input_width, 3) * 255), + fmt='JPEG'), + dtype=tf.string) + _ = preprocess_ops.random_crop_image_v2( + image_bytes, tf.constant([input_height, input_width, 3], tf.int32)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/ops/sampling_ops.py b/official/vision/beta/ops/sampling_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..bd19e3ff727d4febe0a88653015c1f369f95e75d --- /dev/null +++ b/official/vision/beta/ops/sampling_ops.py @@ -0,0 +1,383 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Class to subsample minibatches by balancing positives and negatives. + +Subsamples minibatches based on a pre-specified positive fraction in range +[0,1]. The class presumes there are many more negatives than positive examples: +if the desired batch_size cannot be achieved with the pre-specified positive +fraction, it fills the rest with negative examples. If this is not sufficient +for obtaining the desired batch_size, it returns fewer examples. + +The main function to call is Subsample(self, indicator, labels). For convenience +one can also call SubsampleWeights(self, weights, labels) which is defined in +the minibatch_sampler base class. + +When is_static is True, it implements a method that guarantees static shapes. +It also ensures the length of output of the subsample is always batch_size, even +when number of examples set to True in indicator is less than batch_size. + +This is originally implemented in TensorFlow Object Detection API. +""" + +# Import libraries +import tensorflow as tf + + +def combined_static_and_dynamic_shape(tensor): + """Returns a list containing static and dynamic values for the dimensions. + + Returns a list of static and dynamic values for shape dimensions. This is + useful to preserve static shapes when available in reshape operation. + + Args: + tensor: A tensor of any type. + + Returns: + A list of size tensor.shape.ndims containing integers or a scalar tensor. + """ + static_tensor_shape = tensor.shape.as_list() + dynamic_tensor_shape = tf.shape(input=tensor) + combined_shape = [] + for index, dim in enumerate(static_tensor_shape): + if dim is not None: + combined_shape.append(dim) + else: + combined_shape.append(dynamic_tensor_shape[index]) + return combined_shape + + +def indices_to_dense_vector(indices, + size, + indices_value=1., + default_value=0, + dtype=tf.float32): + """Creates dense vector with indices set to specific value and rest to zeros. + + This function exists because it is unclear if it is safe to use + tf.sparse_to_dense(indices, [size], 1, validate_indices=False) + with indices which are not ordered. + This function accepts a dynamic size (e.g. tf.shape(tensor)[0]) + + Args: + indices: 1d Tensor with integer indices which are to be set to + indices_values. + size: scalar with size (integer) of output Tensor. + indices_value: values of elements specified by indices in the output vector + default_value: values of other elements in the output vector. + dtype: data type. + + Returns: + dense 1D Tensor of shape [size] with indices set to indices_values and the + rest set to default_value. + """ + size = tf.cast(size, dtype=tf.int32) + zeros = tf.ones([size], dtype=dtype) * default_value + values = tf.ones_like(indices, dtype=dtype) * indices_value + + return tf.dynamic_stitch( + [tf.range(size), tf.cast(indices, dtype=tf.int32)], [zeros, values]) + + +def matmul_gather_on_zeroth_axis(params, indices, scope=None): + """Matrix multiplication based implementation of tf.gather on zeroth axis. + + TODO(rathodv, jonathanhuang): enable sparse matmul option. + + Args: + params: A float32 Tensor. The tensor from which to gather values. + Must be at least rank 1. + indices: A Tensor. Must be one of the following types: int32, int64. + Must be in range [0, params.shape[0]) + scope: A name for the operation (optional). + + Returns: + A Tensor. Has the same type as params. Values from params gathered + from indices given by indices, with shape indices.shape + params.shape[1:]. + """ + scope = scope or 'MatMulGather' + with tf.name_scope(scope): + params_shape = combined_static_and_dynamic_shape(params) + indices_shape = combined_static_and_dynamic_shape(indices) + params2d = tf.reshape(params, [params_shape[0], -1]) + indicator_matrix = tf.one_hot(indices, params_shape[0]) + gathered_result_flattened = tf.matmul(indicator_matrix, params2d) + return tf.reshape(gathered_result_flattened, + tf.stack(indices_shape + params_shape[1:])) + + +class BalancedPositiveNegativeSampler: + """Subsamples minibatches to a desired balance of positives and negatives.""" + + def __init__(self, positive_fraction=0.5, is_static=False): + """Constructs a minibatch sampler. + + Args: + positive_fraction: desired fraction of positive examples (scalar in [0,1]) + in the batch. + is_static: If True, uses an implementation with static shape guarantees. + + Raises: + ValueError: if positive_fraction < 0, or positive_fraction > 1 + """ + if positive_fraction < 0 or positive_fraction > 1: + raise ValueError('positive_fraction should be in range [0,1]. ' + 'Received: %s.' % positive_fraction) + self._positive_fraction = positive_fraction + self._is_static = is_static + + @staticmethod + def subsample_indicator(indicator, num_samples): + """Subsample indicator vector. + + Given a boolean indicator vector with M elements set to `True`, the function + assigns all but `num_samples` of these previously `True` elements to + `False`. If `num_samples` is greater than M, the original indicator vector + is returned. + + Args: + indicator: a 1-dimensional boolean tensor indicating which elements + are allowed to be sampled and which are not. + num_samples: int32 scalar tensor + + Returns: + a boolean tensor with the same shape as input (indicator) tensor + """ + indices = tf.where(indicator) + indices = tf.random.shuffle(indices) + indices = tf.reshape(indices, [-1]) + + num_samples = tf.minimum(tf.size(input=indices), num_samples) + selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1])) + + selected_indicator = indices_to_dense_vector( + selected_indices, + tf.shape(input=indicator)[0]) + + return tf.equal(selected_indicator, 1) + + def _get_num_pos_neg_samples(self, sorted_indices_tensor, sample_size): + """Counts the number of positives and negatives numbers to be sampled. + + Args: + sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains + the signed indices of the examples where the sign is based on the label + value. The examples that cannot be sampled are set to 0. It samples + at most sample_size*positive_fraction positive examples and remaining + from negative examples. + sample_size: Size of subsamples. + + Returns: + A tuple containing the number of positive and negative labels in the + subsample. + """ + input_length = tf.shape(input=sorted_indices_tensor)[0] + valid_positive_index = tf.greater(sorted_indices_tensor, + tf.zeros(input_length, tf.int32)) + num_sampled_pos = tf.reduce_sum( + input_tensor=tf.cast(valid_positive_index, tf.int32)) + max_num_positive_samples = tf.constant( + int(sample_size * self._positive_fraction), tf.int32) + num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos) + num_negative_samples = tf.constant(sample_size, + tf.int32) - num_positive_samples + + return num_positive_samples, num_negative_samples + + def _get_values_from_start_and_end(self, input_tensor, num_start_samples, + num_end_samples, total_num_samples): + """slices num_start_samples and last num_end_samples from input_tensor. + + Args: + input_tensor: An int32 tensor of shape [N] to be sliced. + num_start_samples: Number of examples to be sliced from the beginning + of the input tensor. + num_end_samples: Number of examples to be sliced from the end of the + input tensor. + total_num_samples: Sum of is num_start_samples and num_end_samples. This + should be a scalar. + + Returns: + A tensor containing the first num_start_samples and last num_end_samples + from input_tensor. + + """ + input_length = tf.shape(input=input_tensor)[0] + start_positions = tf.less(tf.range(input_length), num_start_samples) + end_positions = tf.greater_equal( + tf.range(input_length), input_length - num_end_samples) + selected_positions = tf.logical_or(start_positions, end_positions) + selected_positions = tf.cast(selected_positions, tf.float32) + indexed_positions = tf.multiply(tf.cumsum(selected_positions), + selected_positions) + one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1, + total_num_samples, + dtype=tf.float32) + return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32), + one_hot_selector, axes=[0, 0]), tf.int32) + + def _static_subsample(self, indicator, batch_size, labels): + """Returns subsampled minibatch. + + Args: + indicator: boolean tensor of shape [N] whose True entries can be sampled. + N should be a complie time constant. + batch_size: desired batch size. This scalar cannot be None. + labels: boolean tensor of shape [N] denoting positive(=True) and negative + (=False) examples. N should be a complie time constant. + + Returns: + sampled_idx_indicator: boolean tensor of shape [N], True for entries which + are sampled. It ensures the length of output of the subsample is always + batch_size, even when number of examples set to True in indicator is + less than batch_size. + + Raises: + ValueError: if labels and indicator are not 1D boolean tensors. + """ + # Check if indicator and labels have a static size. + if not indicator.shape.is_fully_defined(): + raise ValueError('indicator must be static in shape when is_static is' + 'True') + if not labels.shape.is_fully_defined(): + raise ValueError('labels must be static in shape when is_static is' + 'True') + if not isinstance(batch_size, int): + raise ValueError('batch_size has to be an integer when is_static is' + 'True.') + + input_length = tf.shape(input=indicator)[0] + + # Set the number of examples set True in indicator to be at least + # batch_size. + num_true_sampled = tf.reduce_sum( + input_tensor=tf.cast(indicator, tf.float32)) + additional_false_sample = tf.less_equal( + tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)), + batch_size - num_true_sampled) + indicator = tf.logical_or(indicator, additional_false_sample) + + # Shuffle indicator and label. Need to store the permutation to restore the + # order post sampling. + permutation = tf.random.shuffle(tf.range(input_length)) + indicator = matmul_gather_on_zeroth_axis( + tf.cast(indicator, tf.float32), permutation) + labels = matmul_gather_on_zeroth_axis( + tf.cast(labels, tf.float32), permutation) + + # index (starting from 1) when indicator is True, 0 when False + indicator_idx = tf.where( + tf.cast(indicator, tf.bool), tf.range(1, input_length + 1), + tf.zeros(input_length, tf.int32)) + + # Replace -1 for negative, +1 for positive labels + signed_label = tf.where( + tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32), + tf.scalar_mul(-1, tf.ones(input_length, tf.int32))) + # negative of index for negative label, positive index for positive label, + # 0 when indicator is False. + signed_indicator_idx = tf.multiply(indicator_idx, signed_label) + sorted_signed_indicator_idx = tf.nn.top_k( + signed_indicator_idx, input_length, sorted=True).values + + [num_positive_samples, + num_negative_samples] = self._get_num_pos_neg_samples( + sorted_signed_indicator_idx, batch_size) + + sampled_idx = self._get_values_from_start_and_end( + sorted_signed_indicator_idx, num_positive_samples, + num_negative_samples, batch_size) + + # Shift the indices to start from 0 and remove any samples that are set as + # False. + sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32) + sampled_idx = tf.multiply( + tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32), + sampled_idx) + + sampled_idx_indicator = tf.cast( + tf.reduce_sum( + input_tensor=tf.one_hot(sampled_idx, depth=input_length), axis=0), + tf.bool) + + # project back the order based on stored permutations + reprojections = tf.one_hot(permutation, depth=input_length, + dtype=tf.float32) + return tf.cast(tf.tensordot( + tf.cast(sampled_idx_indicator, tf.float32), + reprojections, axes=[0, 0]), tf.bool) + + def subsample(self, indicator, batch_size, labels, scope=None): + """Returns subsampled minibatch. + + Args: + indicator: boolean tensor of shape [N] whose True entries can be sampled. + batch_size: desired batch size. If None, keeps all positive samples and + randomly selects negative samples so that the positive sample fraction + matches self._positive_fraction. It cannot be None is is_static is True. + labels: boolean tensor of shape [N] denoting positive(=True) and negative + (=False) examples. + scope: name scope. + + Returns: + sampled_idx_indicator: boolean tensor of shape [N], True for entries which + are sampled. + + Raises: + ValueError: if labels and indicator are not 1D boolean tensors. + """ + if len(indicator.get_shape().as_list()) != 1: + raise ValueError('indicator must be 1 dimensional, got a tensor of ' + 'shape %s' % indicator.get_shape()) + if len(labels.get_shape().as_list()) != 1: + raise ValueError('labels must be 1 dimensional, got a tensor of ' + 'shape %s' % labels.get_shape()) + if labels.dtype != tf.bool: + raise ValueError('labels should be of type bool. Received: %s' % + labels.dtype) + if indicator.dtype != tf.bool: + raise ValueError('indicator should be of type bool. Received: %s' % + indicator.dtype) + scope = scope or 'BalancedPositiveNegativeSampler' + with tf.name_scope(scope): + if self._is_static: + return self._static_subsample(indicator, batch_size, labels) + + else: + # Only sample from indicated samples + negative_idx = tf.logical_not(labels) + positive_idx = tf.logical_and(labels, indicator) + negative_idx = tf.logical_and(negative_idx, indicator) + + # Sample positive and negative samples separately + if batch_size is None: + max_num_pos = tf.reduce_sum( + input_tensor=tf.cast(positive_idx, dtype=tf.int32)) + else: + max_num_pos = int(self._positive_fraction * batch_size) + sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) + num_sampled_pos = tf.reduce_sum( + input_tensor=tf.cast(sampled_pos_idx, tf.int32)) + if batch_size is None: + negative_positive_ratio = ( + 1 - self._positive_fraction) / self._positive_fraction + max_num_neg = tf.cast( + negative_positive_ratio * + tf.cast(num_sampled_pos, dtype=tf.float32), + dtype=tf.int32) + else: + max_num_neg = batch_size - num_sampled_pos + sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) + + return tf.logical_or(sampled_pos_idx, sampled_neg_idx) diff --git a/official/vision/beta/ops/spatial_transform_ops.py b/official/vision/beta/ops/spatial_transform_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..bcfdd6a73287732a2bf666fe5009965067d1d45b --- /dev/null +++ b/official/vision/beta/ops/spatial_transform_ops.py @@ -0,0 +1,529 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Spatial transform ops.""" + +import tensorflow as tf + +_EPSILON = 1e-8 + + +def _feature_bilinear_interpolation(features, kernel_y, kernel_x): + """Feature bilinear interpolation. + + The RoIAlign feature f can be computed by bilinear interpolation + of four neighboring feature points f0, f1, f2, and f3. + + f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T + [f10, f11]] + f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11 + f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11 + kernel_y = [hy, ly] + kernel_x = [hx, lx] + + Args: + features: The features are in shape of [batch_size, num_boxes, output_size * + 2, output_size * 2, num_filters]. + kernel_y: Tensor of size [batch_size, boxes, output_size, 2, 1]. + kernel_x: Tensor of size [batch_size, boxes, output_size, 2, 1]. + + Returns: + A 5-D tensor representing feature crop of shape + [batch_size, num_boxes, output_size, output_size, num_filters]. + + """ + batch_size, num_boxes, output_size, _, num_filters = ( + features.get_shape().as_list()) + if batch_size is None: + batch_size = tf.shape(features)[0] + output_size = output_size // 2 + kernel_y = tf.reshape(kernel_y, [batch_size, num_boxes, output_size * 2, 1]) + kernel_x = tf.reshape(kernel_x, [batch_size, num_boxes, 1, output_size * 2]) + # Use implicit broadcast to generate the interpolation kernel. The + # multiplier `4` is for avg pooling. + interpolation_kernel = kernel_y * kernel_x * 4 + + # Interpolate the gathered features with computed interpolation kernels. + features *= tf.cast( + tf.expand_dims(interpolation_kernel, axis=-1), dtype=features.dtype) + features = tf.reshape( + features, + [batch_size * num_boxes, output_size * 2, output_size * 2, num_filters]) + features = tf.nn.avg_pool(features, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') + features = tf.reshape( + features, [batch_size, num_boxes, output_size, output_size, num_filters]) + return features + + +def _compute_grid_positions(boxes, boundaries, output_size, sample_offset): + """Computes the grid position w.r.t. the corresponding feature map. + + Args: + boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the + information of each box w.r.t. the corresponding feature map. + boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left + corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) + in terms of the number of pixels of the corresponding feature map size. + boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing + the boundary (in (y, x)) of the corresponding feature map for each box. + Any resampled grid points that go beyond the bounary will be clipped. + output_size: a scalar indicating the output crop size. + sample_offset: a float number in [0, 1] indicates the subpixel sample offset + from grid point. + + Returns: + kernel_y: Tensor of size [batch_size, boxes, output_size, 2, 1]. + kernel_x: Tensor of size [batch_size, boxes, output_size, 2, 1]. + box_grid_y0y1: Tensor of size [batch_size, boxes, output_size, 2] + box_grid_x0x1: Tensor of size [batch_size, boxes, output_size, 2] + """ + batch_size, num_boxes, _ = boxes.get_shape().as_list() + if batch_size is None: + batch_size = tf.shape(boxes)[0] + box_grid_x = [] + box_grid_y = [] + for i in range(output_size): + box_grid_x.append(boxes[:, :, 1] + + (i + sample_offset) * boxes[:, :, 3] / output_size) + box_grid_y.append(boxes[:, :, 0] + + (i + sample_offset) * boxes[:, :, 2] / output_size) + box_grid_x = tf.stack(box_grid_x, axis=2) + box_grid_y = tf.stack(box_grid_y, axis=2) + + box_grid_y0 = tf.floor(box_grid_y) + box_grid_x0 = tf.floor(box_grid_x) + box_grid_x0 = tf.maximum(tf.cast(0., dtype=box_grid_x0.dtype), box_grid_x0) + box_grid_y0 = tf.maximum(tf.cast(0., dtype=box_grid_y0.dtype), box_grid_y0) + + box_grid_x0 = tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)) + box_grid_x1 = tf.minimum(box_grid_x0 + 1, + tf.expand_dims(boundaries[:, :, 1], -1)) + box_grid_y0 = tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)) + box_grid_y1 = tf.minimum(box_grid_y0 + 1, + tf.expand_dims(boundaries[:, :, 0], -1)) + + box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1) + box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1) + + # The RoIAlign feature f can be computed by bilinear interpolation of four + # neighboring feature points f0, f1, f2, and f3. + # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T + # [f10, f11]] + # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11 + # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11 + ly = box_grid_y - box_grid_y0 + lx = box_grid_x - box_grid_x0 + hy = 1.0 - ly + hx = 1.0 - lx + kernel_y = tf.reshape( + tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1]) + kernel_x = tf.reshape( + tf.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1]) + return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 + + +def multilevel_crop_and_resize(features, + boxes, + output_size=7, + sample_offset=0.5): + """Crop and resize on multilevel feature pyramid. + + Generate the (output_size, output_size) set of pixels for each input box + by first locating the box into the correct feature level, and then cropping + and resizing it using the correspoding feature map of that level. + + Args: + features: A dictionary with key as pyramid level and value as features. The + features are in shape of [batch_size, height_l, width_l, num_filters]. + boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents + a box with [y1, x1, y2, x2] in un-normalized coordinates. + output_size: A scalar to indicate the output crop size. + sample_offset: a float number in [0, 1] indicates the subpixel sample offset + from grid point. + + Returns: + A 5-D tensor representing feature crop of shape + [batch_size, num_boxes, output_size, output_size, num_filters]. + """ + + with tf.name_scope('multilevel_crop_and_resize'): + levels = list(features.keys()) + min_level = int(min(levels)) + max_level = int(max(levels)) + batch_size, max_feature_height, max_feature_width, num_filters = ( + features[str(min_level)].get_shape().as_list()) + if batch_size is None: + batch_size = tf.shape(features[str(min_level)])[0] + _, num_boxes, _ = boxes.get_shape().as_list() + + # Stack feature pyramid into a features_all of shape + # [batch_size, levels, height, width, num_filters]. + features_all = [] + feature_heights = [] + feature_widths = [] + for level in range(min_level, max_level + 1): + shape = features[str(level)].get_shape().as_list() + feature_heights.append(shape[1]) + feature_widths.append(shape[2]) + # Concat tensor of [batch_size, height_l * width_l, num_filters] for each + # levels. + features_all.append( + tf.reshape(features[str(level)], [batch_size, -1, num_filters])) + features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters]) + + # Calculate height_l * width_l for each level. + level_dim_sizes = [ + feature_widths[i] * feature_heights[i] + for i in range(len(feature_widths)) + ] + # level_dim_offsets is accumulated sum of level_dim_size. + level_dim_offsets = [0] + for i in range(len(feature_widths) - 1): + level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i]) + batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1] + level_dim_offsets = tf.constant(level_dim_offsets, tf.int32) + height_dim_sizes = tf.constant(feature_widths, tf.int32) + + # Assigns boxes to the right level. + box_width = boxes[:, :, 3] - boxes[:, :, 1] + box_height = boxes[:, :, 2] - boxes[:, :, 0] + areas_sqrt = tf.cast(tf.sqrt(box_height * box_width), tf.float32) + levels = tf.cast( + tf.math.floordiv( + tf.math.log(tf.divide(areas_sqrt, 224.0)), + tf.math.log(2.0)) + 4.0, + dtype=tf.int32) + # Maps levels between [min_level, max_level]. + levels = tf.minimum(max_level, tf.maximum(levels, min_level)) + + # Projects box location and sizes to corresponding feature levels. + scale_to_level = tf.cast( + tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)), + dtype=boxes.dtype) + boxes /= tf.expand_dims(scale_to_level, axis=2) + box_width /= scale_to_level + box_height /= scale_to_level + boxes = tf.concat([boxes[:, :, 0:2], + tf.expand_dims(box_height, -1), + tf.expand_dims(box_width, -1)], axis=-1) + + # Maps levels to [0, max_level-min_level]. + levels -= min_level + level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32)) + boundary = tf.cast( + tf.concat([ + tf.expand_dims( + [[tf.cast(max_feature_height, tf.float32)]] / level_strides - 1, + axis=-1), + tf.expand_dims( + [[tf.cast(max_feature_width, tf.float32)]] / level_strides - 1, + axis=-1), + ], + axis=-1), boxes.dtype) + + # Compute grid positions. + kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions( + boxes, boundary, output_size, sample_offset) + + x_indices = tf.cast( + tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]), + dtype=tf.int32) + y_indices = tf.cast( + tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]), + dtype=tf.int32) + + batch_size_offset = tf.tile( + tf.reshape( + tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]), + [1, num_boxes, output_size * 2, output_size * 2]) + # Get level offset for each box. Each box belongs to one level. + levels_offset = tf.tile( + tf.reshape( + tf.gather(level_dim_offsets, levels), + [batch_size, num_boxes, 1, 1]), + [1, 1, output_size * 2, output_size * 2]) + y_indices_offset = tf.tile( + tf.reshape( + y_indices * tf.expand_dims(tf.gather(height_dim_sizes, levels), -1), + [batch_size, num_boxes, output_size * 2, 1]), + [1, 1, 1, output_size * 2]) + x_indices_offset = tf.tile( + tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]), + [1, 1, output_size * 2, 1]) + indices = tf.reshape( + batch_size_offset + levels_offset + y_indices_offset + x_indices_offset, + [-1]) + + # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar + # performance. + features_per_box = tf.reshape( + tf.gather(features_r2, indices), + [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters]) + + # Bilinear interpolation. + features_per_box = _feature_bilinear_interpolation( + features_per_box, kernel_y, kernel_x) + return features_per_box + + +def _selective_crop_and_resize(features, + boxes, + box_levels, + boundaries, + output_size=7, + sample_offset=0.5, + use_einsum_gather=False): + """Crop and resize boxes on a set of feature maps. + + Given multiple features maps indexed by different levels, and a set of boxes + where each box is mapped to a certain level, it selectively crops and resizes + boxes from the corresponding feature maps to generate the box features. + + We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf, + figure 3 for reference). Specifically, for each feature map, we select an + (output_size, output_size) set of pixels corresponding to the box location, + and then use bilinear interpolation to select the feature value for each + pixel. + + For performance, we perform the gather and interpolation on all layers as a + single operation. In this op the multi-level features are first stacked and + gathered into [2*output_size, 2*output_size] feature points. Then bilinear + interpolation is performed on the gathered feature points to generate + [output_size, output_size] RoIAlign feature map. + + Here is the step-by-step algorithm: + 1. The multi-level features are gathered into a + [batch_size, num_boxes, output_size*2, output_size*2, num_filters] + Tensor. The Tensor contains four neighboring feature points for each + vertex in the output grid. + 2. Compute the interpolation kernel of shape + [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis + can be seen as stacking 2x2 interpolation kernels for all vertices in the + output grid. + 3. Element-wise multiply the gathered features and interpolation kernel. + Then apply 2x2 average pooling to reduce spatial dimension to + output_size. + + Args: + features: a 5-D tensor of shape [batch_size, num_levels, max_height, + max_width, num_filters] where cropping and resizing are based. + boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the + information of each box w.r.t. the corresponding feature map. + boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left + corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) + in terms of the number of pixels of the corresponding feature map size. + box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing + the 0-based corresponding feature level index of each box. + boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing + the boundary (in (y, x)) of the corresponding feature map for each box. + Any resampled grid points that go beyond the bounary will be clipped. + output_size: a scalar indicating the output crop size. + sample_offset: a float number in [0, 1] indicates the subpixel sample offset + from grid point. + use_einsum_gather: use einsum to replace gather or not. Replacing einsum + with gather can improve performance when feature size is not large, einsum + is friendly with model partition as well. Gather's performance is better + when feature size is very large and there are multiple box levels. + + Returns: + features_per_box: a 5-D tensor of shape + [batch_size, num_boxes, output_size, output_size, num_filters] + representing the cropped features. + """ + (batch_size, num_levels, max_feature_height, max_feature_width, + num_filters) = features.get_shape().as_list() + if batch_size is None: + batch_size = tf.shape(features)[0] + _, num_boxes, _ = boxes.get_shape().as_list() + + kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions( + boxes, boundaries, output_size, sample_offset) + x_indices = tf.cast( + tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]), + dtype=tf.int32) + y_indices = tf.cast( + tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]), + dtype=tf.int32) + + if use_einsum_gather: + # Blinear interpolation is done during the last two gathers: + # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T + # [f10, f11]] + # [[f00, f01], + # [f10, f11]] = tf.einsum(tf.einsum(features, y_one_hot), x_one_hot) + # where [hy, ly] and [hx, lx] are the bilinear interpolation kernel. + y_indices = tf.cast( + tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size, 2]), + dtype=tf.int32) + x_indices = tf.cast( + tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size, 2]), + dtype=tf.int32) + + # shape is [batch_size, num_boxes, output_size, 2, height] + grid_y_one_hot = tf.one_hot( + tf.cast(y_indices, tf.int32), max_feature_height, dtype=kernel_y.dtype) + # shape is [batch_size, num_boxes, output_size, 2, width] + grid_x_one_hot = tf.one_hot( + tf.cast(x_indices, tf.int32), max_feature_width, dtype=kernel_x.dtype) + + # shape is [batch_size, num_boxes, output_size, height] + grid_y_weight = tf.reduce_sum( + tf.multiply(grid_y_one_hot, kernel_y), axis=-2) + # shape is [batch_size, num_boxes, output_size, width] + grid_x_weight = tf.reduce_sum( + tf.multiply(grid_x_one_hot, kernel_x), axis=-2) + + # Gather for y_axis. + # shape is [batch_size, num_boxes, output_size, width, features] + features_per_box = tf.einsum('bmhwf,bmoh->bmowf', features, + tf.cast(grid_y_weight, features.dtype)) + # Gather for x_axis. + # shape is [batch_size, num_boxes, output_size, output_size, features] + features_per_box = tf.einsum('bmhwf,bmow->bmhof', features_per_box, + tf.cast(grid_x_weight, features.dtype)) + else: + height_dim_offset = max_feature_width + level_dim_offset = max_feature_height * height_dim_offset + batch_dim_offset = num_levels * level_dim_offset + + batch_size_offset = tf.tile( + tf.reshape( + tf.range(batch_size) * batch_dim_offset, [batch_size, 1, 1, 1]), + [1, num_boxes, output_size * 2, output_size * 2]) + box_levels_offset = tf.tile( + tf.reshape(box_levels * level_dim_offset, + [batch_size, num_boxes, 1, 1]), + [1, 1, output_size * 2, output_size * 2]) + y_indices_offset = tf.tile( + tf.reshape(y_indices * height_dim_offset, + [batch_size, num_boxes, output_size * 2, 1]), + [1, 1, 1, output_size * 2]) + x_indices_offset = tf.tile( + tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]), + [1, 1, output_size * 2, 1]) + + indices = tf.reshape( + batch_size_offset + box_levels_offset + y_indices_offset + + x_indices_offset, [-1]) + + features = tf.reshape(features, [-1, num_filters]) + # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar + # performance. + features_per_box = tf.reshape( + tf.gather(features, indices), + [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters]) + features_per_box = _feature_bilinear_interpolation( + features_per_box, kernel_y, kernel_x) + + return features_per_box + + +def crop_mask_in_target_box(masks, + boxes, + target_boxes, + output_size, + sample_offset=0, + use_einsum=True): + """Crop masks in target boxes. + + Args: + masks: A tensor with a shape of [batch_size, num_masks, height, width]. + boxes: a float tensor representing box cooridnates that tightly enclose + masks with a shape of [batch_size, num_masks, 4] in un-normalized + coordinates. A box is represented by [ymin, xmin, ymax, xmax]. + target_boxes: a float tensor representing target box cooridnates for masks + with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A + box is represented by [ymin, xmin, ymax, xmax]. + output_size: A scalar to indicate the output crop size. It currently only + supports to output a square shape outputs. + sample_offset: a float number in [0, 1] indicates the subpixel sample offset + from grid point. + use_einsum: Use einsum to replace gather in selective_crop_and_resize. + + Returns: + A 4-D tensor representing feature crop of shape + [batch_size, num_boxes, output_size, output_size]. + """ + with tf.name_scope('crop_mask_in_target_box'): + batch_size, num_masks, height, width = masks.get_shape().as_list() + if batch_size is None: + batch_size = tf.shape(masks)[0] + masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1]) + # Pad zeros on the boundary of masks. + masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4) + masks = tf.reshape(masks, [batch_size, num_masks, height+4, width+4, 1]) + + # Projects target box locations and sizes to corresponding cropped + # mask coordinates. + gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split( + value=boxes, num_or_size_splits=4, axis=2) + bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split( + value=target_boxes, num_or_size_splits=4, axis=2) + y_transform = (bb_y_min - gt_y_min) * height / ( + gt_y_max - gt_y_min + _EPSILON) + 2 + x_transform = (bb_x_min - gt_x_min) * height / ( + gt_x_max - gt_x_min + _EPSILON) + 2 + h_transform = (bb_y_max - bb_y_min) * width / ( + gt_y_max - gt_y_min + _EPSILON) + w_transform = (bb_x_max - bb_x_min) * width / ( + gt_x_max - gt_x_min + _EPSILON) + + boundaries = tf.concat( + [tf.ones_like(y_transform) * ((height + 4) - 1), + tf.ones_like(x_transform) * ((width + 4) - 1)], + axis=-1) + boundaries = tf.cast(boundaries, dtype=y_transform.dtype) + + # Reshape tensors to have the right shape for selective_crop_and_resize. + trasnformed_boxes = tf.concat( + [y_transform, x_transform, h_transform, w_transform], -1) + levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]), + [batch_size, 1]) + + cropped_masks = _selective_crop_and_resize( + masks, + trasnformed_boxes, + levels, + boundaries, + output_size, + sample_offset=sample_offset, + use_einsum_gather=use_einsum) + cropped_masks = tf.squeeze(cropped_masks, axis=-1) + + return cropped_masks + + +def nearest_upsampling(data, scale): + """Nearest neighbor upsampling implementation. + + Args: + data: A tensor with a shape of [batch, height_in, width_in, channels]. + scale: An integer multiple to scale resolution of input data. + + Returns: + data_up: A tensor with a shape of + [batch, height_in*scale, width_in*scale, channels]. Same dtype as input + data. + """ + with tf.name_scope('nearest_upsampling'): + bs, _, _, c = data.get_shape().as_list() + shape = tf.shape(input=data) + h = shape[1] + w = shape[2] + bs = -1 if bs is None else bs + # Uses reshape to quickly upsample the input. The nearest pixel is selected + # via tiling. + data = tf.tile( + tf.reshape(data, [bs, h, 1, w, 1, c]), [1, 1, scale, 1, scale, 1]) + return tf.reshape(data, [bs, h * scale, w * scale, c]) diff --git a/official/vision/beta/projects/README.md b/official/vision/beta/projects/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9c20f07fc608947e218602b696e03586109bdc8c --- /dev/null +++ b/official/vision/beta/projects/README.md @@ -0,0 +1,3 @@ +Here are a few projects that are built on tf.vision. They are build and maintain +by different parties. They can be used as examples of how to build your own +projects based on tf.vision. diff --git a/official/vision/beta/projects/__init__.py b/official/vision/beta/projects/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/projects/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/projects/assemblenet/README.md b/official/vision/beta/projects/assemblenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fb19541f9fc4ad8348948132d2d856bca6b714fa --- /dev/null +++ b/official/vision/beta/projects/assemblenet/README.md @@ -0,0 +1,14 @@ +# AssembleNet and AssembleNet++ + +This repository is the official implementations of the following papers. + +[![Paper](http://img.shields.io/badge/Paper-arXiv.2008.03800-B3181B?logo=arXiv)](https://arxiv.org/abs/1905.13209) +[AssembleNet: Searching for Multi-Stream Neural Connectivity in Video +Architectures](https://arxiv.org/abs/1905.13209) + +[![Paper](http://img.shields.io/badge/Paper-arXiv.2008.08072-B3181B?logo=arXiv)](https://arxiv.org/abs/1905.13209) +[AssembleNet++: Assembling Modality Representations via Attention +Connections](https://arxiv.org/abs/2008.08072) + +**DISCLAIMER**: AssembleNet++ implementation is still under development. +No support will be provided during the development phase. diff --git a/official/vision/beta/projects/assemblenet/configs/assemblenet.py b/official/vision/beta/projects/assemblenet/configs/assemblenet.py new file mode 100644 index 0000000000000000000000000000000000000000..1453450ad39daf56a2a9623c0db79715edf13596 --- /dev/null +++ b/official/vision/beta/projects/assemblenet/configs/assemblenet.py @@ -0,0 +1,225 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Definitions for AssembleNet/++ structures. + +This structure is a `list` corresponding to a graph representation of the +network, where a node is a convolutional block and an edge specifies a +connection from one block to another. + +Each node itself (in the structure list) is a list with the following format: +[block_level, [list_of_input_blocks], number_filter, temporal_dilation, +spatial_stride]. [list_of_input_blocks] should be the list of node indexes whose +values are less than the index of the node itself. The 'stems' of the network +directly taking raw inputs follow a different node format: +[stem_type, temporal_dilation]. The stem_type is -1 for RGB stem and is -2 for +optical flow stem. The stem_type -3 is reserved for the object segmentation +input. + +In AssembleNet++lite, instead of passing a single `int` for number_filter, we +pass a list/tuple of three `int`s. They specify the number of channels to be +used for each layer in the inverted bottleneck modules. + +The structure_weights specify the learned connection weights. +""" +from typing import List, Tuple +import dataclasses +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.vision.beta.configs import backbones_3d +from official.vision.beta.configs import common +from official.vision.beta.configs import video_classification + + +@dataclasses.dataclass +class BlockSpec(hyperparams.Config): + level: int = -1 + input_blocks: Tuple[int, ...] = tuple() + num_filters: int = -1 + temporal_dilation: int = 1 + spatial_stride: int = 1 + input_block_weight: Tuple[float, ...] = tuple() + + +def flat_lists_to_blocks(model_structures, model_edge_weights): + """Transforms the raw list structure configs to BlockSpec tuple.""" + blocks = [] + for node, edge_weights in zip(model_structures, model_edge_weights): + if node[0] < 0: + block = BlockSpec(level=node[0], temporal_dilation=node[1]) + else: + block = BlockSpec( + level=node[0], + input_blocks=node[1], + num_filters=node[2], + temporal_dilation=node[3], + spatial_stride=node[4]) + if edge_weights: + assert len(edge_weights[0]) == len(block.input_blocks), ( + f'{len(edge_weights[0])} != {len(block.input_blocks)} at block ' + f'{block} weight {edge_weights}') + block.input_block_weight = tuple(edge_weights[0]) + blocks.append(block) + return tuple(blocks) + + +def blocks_to_flat_lists(blocks: List[BlockSpec]): + """Transforms BlockSpec tuple to the raw list structure configs.""" + # pylint: disable=g-complex-comprehension + # pylint: disable=g-long-ternary + model_structure = [[ + b.level, + list(b.input_blocks), b.num_filters, b.temporal_dilation, + b.spatial_stride, 0 + ] if b.level >= 0 else [b.level, b.temporal_dilation] for b in blocks] + model_edge_weights = [ + [list(b.input_block_weight)] if b.input_block_weight else [] + for b in blocks + ] + return model_structure, model_edge_weights + + +# AssembleNet structure for 50/101 layer models, found using evolution with the +# Moments-in-Time dataset. This is the structure used for the experiments in the +# AssembleNet paper. The learned connectivity weights are also provided. +asn50_structure = [[-1, 4], [-1, 4], [-2, 1], [-2, 1], [0, [1], 32, 1, 1, 0], + [0, [0], 32, 4, 1, 0], [0, [0, 1, 2, 3], 32, 1, 1, 0], + [0, [2, 3], 32, 2, 1, 0], [1, [0, 4, 5, 6, 7], 64, 2, 2, 0], + [1, [0, 2, 4, 7], 64, 1, 2, 0], [1, [0, 5, 7], 64, 4, 2, 0], + [1, [0, 5], 64, 1, 2, 0], [2, [4, 8, 10, 11], 256, 1, 2, 0], + [2, [8, 9], 256, 4, 2, 0], [3, [12, 13], 512, 2, 2, 0]] +asn101_structure = [[-1, 4], [-1, 4], [-2, 1], [-2, 1], [0, [1], 32, 1, 1, 0], + [0, [0], 32, 4, 1, 0], [0, [0, 1, 2, 3], 32, 1, 1, 0], + [0, [2, 3], 32, 2, 1, 0], [1, [0, 4, 5, 6, 7], 64, 2, 2, 0], + [1, [0, 2, 4, 7], 64, 1, 2, 0], [1, [0, 5, 7], 64, 4, 2, 0], + [1, [0, 5], 64, 1, 2, 0], [2, [4, 8, 10, 11], 192, 1, 2, 0], + [2, [8, 9], 192, 4, 2, 0], [3, [12, 13], 256, 2, 2, 0]] +asn_structure_weights = [ + [], [], [], [], [], [], + [[ + 0.13810564577579498, 0.8465337157249451, 0.3072969317436218, + 0.2867436408996582 + ]], [[0.5846117734909058, 0.6066334843635559]], + [[ + 0.16382087767124176, 0.8852924704551697, 0.4039595425128937, + 0.6823437809944153, 0.5331538319587708 + ]], + [[ + 0.028569204732775688, 0.10333596915006638, 0.7517264485359192, + 0.9260114431381226 + ]], [[0.28832191228866577, 0.7627848982810974, 0.404977947473526]], + [[0.23474831879138947, 0.7841425538063049]], + [[ + 0.27616503834724426, 0.9514784812927246, 0.6568767428398132, + 0.9547983407974243 + ]], [[0.5047007203102112, 0.8876819610595703]], + [[0.9892204403877258, 0.8454614877700806]] +] + + +# AssembleNet++ structure for 50 layer models, found with the Charades dataset. +# This is the model used in the experiments in the AssembleNet++ paper. +# Note that, in order the build AssembleNet++ with this structure, you also need +# to feed 'object segmentation input' to the network indicated as [-3, 4]. It's +# the 5th block in the architecture. +# If you don't plan to use the object input but want to still benefit from +# peer-attention in AssembleNet++ (with RGB and OF), please use the above +# AssembleNet-50 model instead with assemblenet_plus.py code. +full_asnp50_structure = [[-1, 2], [-1, 4], [-2, 2], [-2, 1], [-3, 4], + [0, [0, 1, 2, 3, 4], 32, 1, 1, 0], + [0, [0, 1, 4], 32, 4, 1, 0], + [0, [2, 3, 4], 32, 8, 1, 0], + [0, [2, 3, 4], 32, 1, 1, 0], + [1, [0, 1, 2, 4, 5, 6, 7, 8], 64, 4, 2, 0], + [1, [2, 3, 4, 7, 8], 64, 1, 2, 0], + [1, [0, 4, 5, 6, 7], 128, 8, 2, 0], + [2, [4, 11], 256, 8, 2, 0], + [2, [2, 3, 4, 5, 6, 7, 8, 10, 11], 256, 4, 2, 0], + [3, [12, 13], 512, 2, 2, 0]] +full_asnp_structure_weights = [[], [], [], [], [], [[0.6143830418586731, 0.7111759185791016, 0.19351491332054138, 0.1701001077890396, 0.7178536653518677]], [[0.5755624771118164, 0.5644599795341492, 0.7128658294677734]], [[0.26563042402267456, 0.3033692538738251, 0.8244096636772156]], [[0.07013848423957825, 0.07905343919992447, 0.8767927885055542]], [[0.5008697509765625, 0.5020178556442261, 0.49819135665893555, 0.5015180706977844, 0.4987695813179016, 0.4990265369415283, 0.499239057302475, 0.4974501430988312]], [[0.47034338116645813, 0.4694305658340454, 0.767791748046875, 0.5539310574531555, 0.4520096182823181]], [[0.2769702076911926, 0.8116549253463745, 0.597356915473938, 0.6585626602172852, 0.5915306210517883]], [[0.501274824142456, 0.5016682147979736]], [[0.0866393893957138, 0.08469288796186447, 0.9739039540290833, 0.058271341025829315, 0.08397126197814941, 0.10285478830337524, 0.18506969511508942, 0.23874442279338837, 0.9188644886016846]], [[0.4174623489379883, 0.5844835638999939]]] # pylint: disable=line-too-long + + +# AssembleNet++lite structure using inverted bottleneck blocks. By specifing +# the connection weights as [], the model could alos automatically learn the +# connection weights during its training. +asnp_lite_structure = [[-1, 1], [-2, 1], + [0, [0, 1], [27, 27, 12], 1, 2, 0], + [0, [0, 1], [27, 27, 12], 4, 2, 0], + [1, [0, 1, 2, 3], [54, 54, 24], 2, 2, 0], + [1, [0, 1, 2, 3], [54, 54, 24], 1, 2, 0], + [1, [0, 1, 2, 3], [54, 54, 24], 4, 2, 0], + [1, [0, 1, 2, 3], [54, 54, 24], 1, 2, 0], + [2, [0, 1, 2, 3, 4, 5, 6, 7], [152, 152, 68], 1, 2, 0], + [2, [0, 1, 2, 3, 4, 5, 6, 7], [152, 152, 68], 4, 2, 0], + [3, [2, 3, 4, 5, 6, 7, 8, 9], [432, 432, 192], 2, 2, 0]] +asnp_lite_structure_weights = [[], [], [[0.19914183020591736, 0.9278576374053955]], [[0.010816320776939392, 0.888792097568512]], [[0.9473835825920105, 0.6303419470787048, 0.1704932451248169, 0.05950307101011276]], [[0.9560931324958801, 0.7898273468017578, 0.36138781905174255, 0.07344610244035721]], [[0.9213919043540955, 0.13418640196323395, 0.8371981978416443, 0.07936054468154907]], [[0.9441559910774231, 0.9435100555419922, 0.7253988981246948, 0.13498817384243011]], [[0.9964852333068848, 0.8427878618240356, 0.8895476460456848, 0.11014710366725922, 0.6270533204078674, 0.44782018661499023, 0.61344975233078, 0.44898226857185364]], [[0.9970942735671997, 0.7105681896209717, 0.5078442096710205, 0.0951600968837738, 0.624282717704773, 0.8527252674102783, 0.8105692863464355, 0.7857823967933655]], [[0.6180334091186523, 0.11882413923740387, 0.06102970987558365, 0.04484326392412186, 0.05602221190929413, 0.052324872463941574, 0.9969874024391174, 0.9987731575965881]]] # pylint: disable=line-too-long + + +@dataclasses.dataclass +class AssembleNet(hyperparams.Config): + model_id: str = '50' + num_frames: int = 0 + combine_method: str = 'sigmoid' + blocks: Tuple[BlockSpec, ...] = tuple() + + +@dataclasses.dataclass +class Backbone3D(backbones_3d.Backbone3D): + """Configuration for backbones. + + Attributes: + type: 'str', type of backbone be used, on the of fields below. + resnet: resnet3d backbone config. + """ + type: str = 'assemblenet' + assemblenet: AssembleNet = AssembleNet() + + +@dataclasses.dataclass +class AssembleNetModel(video_classification.VideoClassificationModel): + """The AssembleNet model config.""" + model_type: str = 'assemblenet' + backbone: Backbone3D = Backbone3D() + norm_activation: common.NormActivation = common.NormActivation( + norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=True) + max_pool_preditions: bool = False + + +@exp_factory.register_config_factory('assemblenet50_kinetics600') +def assemblenet_kinetics600() -> cfg.ExperimentConfig: + """Video classification on Videonet with assemblenet.""" + exp = video_classification.video_classification_kinetics600() + + feature_shape = (32, 224, 224, 3) + exp.task.train_data.global_batch_size = 1024 + exp.task.validation_data.global_batch_size = 32 + exp.task.train_data.feature_shape = feature_shape + exp.task.validation_data.feature_shape = (120, 224, 224, 3) + exp.task.train_data.dtype = 'bfloat16' + exp.task.validation_data.dtype = 'bfloat16' + + model = AssembleNetModel() + model.backbone.assemblenet.model_id = '50' + model.backbone.assemblenet.blocks = flat_lists_to_blocks( + asn50_structure, asn_structure_weights) + model.backbone.assemblenet.num_frames = feature_shape[0] + exp.task.model = model + + assert exp.task.model.backbone.assemblenet.num_frames > 0, ( + f'backbone num_frames ' + f'{exp.task.model.backbone.assemblenet}') + + return exp diff --git a/official/vision/beta/projects/assemblenet/modeling/assemblenet.py b/official/vision/beta/projects/assemblenet/modeling/assemblenet.py new file mode 100644 index 0000000000000000000000000000000000000000..e5de6b408ee5ce883f62792c4f0dc3d49d9c7eb0 --- /dev/null +++ b/official/vision/beta/projects/assemblenet/modeling/assemblenet.py @@ -0,0 +1,1073 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Contains definitions for the AssembleNet [1] models. + +Requires the AssembleNet architecture to be specified in +FLAGS.model_structure (and optionally FLAGS.model_edge_weights). +This structure is a list corresponding to a graph representation of the +network, where a node is a convolutional block and an edge specifies a +connection from one block to another as described in [1]. + +Each node itself (in the structure list) is a list with the following format: +[block_level, [list_of_input_blocks], number_filter, temporal_dilation, +spatial_stride]. [list_of_input_blocks] should be the list of node indexes whose +values are less than the index of the node itself. The 'stems' of the network +directly taking raw inputs follow a different node format: +[stem_type, temporal_dilation]. The stem_type is -1 for RGB stem and is -2 for +optical flow stem. + +Also note that the codes in this file could be used for one-shot differentiable +connection search by (1) giving an overly connected structure as +FLAGS.model_structure and by (2) setting FLAGS.model_edge_weights to be '[]'. +The 'agg_weights' variables will specify which connections are needed and which +are not, once trained. + +[1] Michael S. Ryoo, AJ Piergiovanni, Mingxing Tan, Anelia Angelova, + AssembleNet: Searching for Multi-Stream Neural Connectivity in Video + Architectures. ICLR 2020 + https://arxiv.org/abs/1905.13209 + +It uses (2+1)D convolutions for video representations. The main AssembleNet +takes a 4-D (N*T)HWC tensor as an input (i.e., the batch dim and time dim are +mixed), and it reshapes a tensor to NT(H*W)C whenever a 1-D temporal conv. is +necessary. This is to run this on TPU efficiently. +""" + +import functools +import math +from typing import Any, Mapping, List, Callable, Optional + +from absl import logging +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling import factory_3d as model_factory +from official.vision.beta.modeling.backbones import factory as backbone_factory +from official.vision.beta.projects.assemblenet.configs import assemblenet as cfg +from official.vision.beta.projects.assemblenet.modeling import rep_flow_2d_layer as rf + +layers = tf.keras.layers +intermediate_channel_size = [64, 128, 256, 512] + + +def fixed_padding(inputs, kernel_size): + """Pads the input along the spatial dimensions independently of input size. + + Args: + inputs: `Tensor` of size `[batch, channels, height, width]` or `[batch, + height, width, channels]` depending on `data_format`. + kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d` + operations. Should be a positive integer. + + Returns: + A padded `Tensor` of the same `data_format` with size either intact + (if `kernel_size == 1`) or padded (if `kernel_size > 1`). + """ + data_format = tf.keras.backend.image_data_format() + pad_total = kernel_size - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + if data_format == 'channels_first': + padded_inputs = tf.pad( + inputs, [[0, 0], [0, 0], [pad_beg, pad_end], [pad_beg, pad_end]]) + else: + padded_inputs = tf.pad( + inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) + + return padded_inputs + + +def reshape_temporal_conv1d_bn(inputs: tf.Tensor, + filters: int, + kernel_size: int, + num_frames: int = 32, + temporal_dilation: int = 1, + bn_decay: float = rf.BATCH_NORM_DECAY, + bn_epsilon: float = rf.BATCH_NORM_EPSILON, + use_sync_bn: bool = False): + """Performs 1D temporal conv. + + followed by batch normalization with reshaping. + + Args: + inputs: `Tensor` of size `[batch*time, height, width, channels]`. Only + supports 'channels_last' as the data format. + filters: `int` number of filters in the convolution. + kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d` + operations. Should be a positive integer. + num_frames: `int` number of frames in the input tensor. + temporal_dilation: `int` temporal dilatioin size for the 1D conv. + bn_decay: `float` batch norm decay parameter to use. + bn_epsilon: `float` batch norm epsilon parameter to use. + use_sync_bn: use synchronized batch norm for TPU. + + Returns: + A padded `Tensor` of the same `data_format` with size either intact + (if `kernel_size == 1`) or padded (if `kernel_size > 1`). + """ + data_format = tf.keras.backend.image_data_format() + assert data_format == 'channels_last' + + feature_shape = inputs.shape + + inputs = tf.reshape( + inputs, + [-1, num_frames, feature_shape[1] * feature_shape[2], feature_shape[3]]) + + if temporal_dilation == 1: + inputs = tf.keras.layers.Conv2D( + filters=filters, + kernel_size=(kernel_size, 1), + strides=1, + padding='SAME', + use_bias=False, + kernel_initializer=tf.keras.initializers.VarianceScaling())( + inputs=inputs) + else: + inputs = tf.keras.layers.Conv2D( + filters=filters, + kernel_size=(kernel_size, 1), + strides=1, + padding='SAME', + dilation_rate=(temporal_dilation, 1), + use_bias=False, + kernel_initializer=tf.keras.initializers.TruncatedNormal( + stddev=math.sqrt(2.0 / (kernel_size * feature_shape[3]))))( + inputs=inputs) + + num_channel = inputs.shape[3] + inputs = tf.reshape(inputs, + [-1, feature_shape[1], feature_shape[2], num_channel]) + inputs = rf.build_batch_norm( + bn_decay=bn_decay, bn_epsilon=bn_epsilon, use_sync_bn=use_sync_bn)( + inputs) + inputs = tf.nn.relu(inputs) + + return inputs + + +def conv2d_fixed_padding(inputs: tf.Tensor, filters: int, kernel_size: int, + strides: int): + """Strided 2-D convolution with explicit padding. + + The padding is consistent and is based only on `kernel_size`, not on the + dimensions of `inputs` (as opposed to using `tf.keras.layers.Conv2D` alone). + + Args: + inputs: `Tensor` of size `[batch, channels, height_in, width_in]`. + filters: `int` number of filters in the convolution. + kernel_size: `int` size of the kernel to be used in the convolution. + strides: `int` strides of the convolution. + + Returns: + A `Tensor` of shape `[batch, filters, height_out, width_out]`. + """ + if strides > 1: + inputs = fixed_padding(inputs, kernel_size) + + return tf.keras.layers.Conv2D( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=('SAME' if strides == 1 else 'VALID'), + use_bias=False, + kernel_initializer=tf.keras.initializers.VarianceScaling())( + inputs=inputs) + + +def conv3d_same_padding(inputs: tf.Tensor, + filters: int, + kernel_size: int, + strides: int, + temporal_dilation: int = 1, + do_2d_conv: bool = False): + """3D convolution layer wrapper. + + Uses conv3d function. + + Args: + inputs: 5D `Tensor` following the data_format. + filters: `int` number of filters in the convolution. + kernel_size: `int` size of the kernel to be used in the convolution. + strides: `int` strides of the convolution. + temporal_dilation: `int` temporal dilatioin size for the 1D conv. + do_2d_conv: `bool` indicating whether to do 2d conv. If false, do 3D conv. + + Returns: + A `Tensor` of shape `[batch, time_in, height_in, width_in, channels]`. + """ + if isinstance(kernel_size, int): + if do_2d_conv: + kernel_size = [1, kernel_size, kernel_size] + else: + kernel_size = [kernel_size, kernel_size, kernel_size] + + return tf.keras.layers.Conv3D( + filters=filters, + kernel_size=kernel_size, + strides=[1, strides, strides], + padding='SAME', + dilation_rate=[temporal_dilation, 1, 1], + use_bias=False, + kernel_initializer=tf.keras.initializers.VarianceScaling())( + inputs=inputs) + + +def bottleneck_block_interleave(inputs: tf.Tensor, + filters: int, + inter_filters: int, + strides: int, + use_projection: bool = False, + num_frames: int = 32, + temporal_dilation: int = 1, + bn_decay: float = rf.BATCH_NORM_DECAY, + bn_epsilon: float = rf.BATCH_NORM_EPSILON, + use_sync_bn: bool = False, + step=1): + """Interleaves a standard 2D residual module and (2+1)D residual module. + + Bottleneck block variant for residual networks with BN after convolutions. + + Args: + inputs: `Tensor` of size `[batch*time, channels, height, width]`. + filters: `int` number of filters for the first conv. layer. The last conv. + layer will use 4 times as many filters. + inter_filters: `int` number of filters for the second conv. layer. + strides: `int` block stride. If greater than 1, this block will ultimately + downsample the input spatially. + use_projection: `bool` for whether this block should use a projection + shortcut (versus the default identity shortcut). This is usually `True` + for the first block of a block group, which may change the number of + filters and the resolution. + num_frames: `int` number of frames in the input tensor. + temporal_dilation: `int` temporal dilatioin size for the 1D conv. + bn_decay: `float` batch norm decay parameter to use. + bn_epsilon: `float` batch norm epsilon parameter to use. + use_sync_bn: use synchronized batch norm for TPU. + step: `int` to decide whether to put 2D module or (2+1)D module. + + Returns: + The output `Tensor` of the block. + """ + if strides > 1 and not use_projection: + raise ValueError('strides > 1 requires use_projections=True, otherwise the ' + 'inputs and shortcut will have shape mismatch') + shortcut = inputs + if use_projection: + # Projection shortcut only in first block within a group. Bottleneck blocks + # end with 4 times the number of filters. + filters_out = 4 * filters + shortcut = conv2d_fixed_padding( + inputs=inputs, filters=filters_out, kernel_size=1, strides=strides) + shortcut = rf.build_batch_norm( + bn_decay=bn_decay, bn_epsilon=bn_epsilon, use_sync_bn=use_sync_bn)( + shortcut) + + if step % 2 == 1: + k = 3 + + inputs = reshape_temporal_conv1d_bn( + inputs=inputs, + filters=filters, + kernel_size=k, + num_frames=num_frames, + temporal_dilation=temporal_dilation, + bn_decay=bn_decay, + bn_epsilon=bn_epsilon, + use_sync_bn=use_sync_bn) + else: + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=1, strides=1) + inputs = rf.build_batch_norm( + bn_decay=bn_decay, bn_epsilon=bn_epsilon, use_sync_bn=use_sync_bn)( + inputs) + inputs = tf.nn.relu(inputs) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=inter_filters, kernel_size=3, strides=strides) + inputs = rf.build_batch_norm( + bn_decay=bn_decay, bn_epsilon=bn_epsilon, use_sync_bn=use_sync_bn)( + inputs) + inputs = tf.nn.relu(inputs) + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=4 * filters, kernel_size=1, strides=1) + inputs = rf.build_batch_norm( + init_zero=True, + bn_decay=bn_decay, + bn_epsilon=bn_epsilon, + use_sync_bn=use_sync_bn)( + inputs) + + return tf.nn.relu(inputs + shortcut) + + +def block_group(inputs: tf.Tensor, + filters: int, + block_fn: Callable[..., tf.Tensor], + blocks: int, + strides: int, + name, + block_level, + num_frames=32, + temporal_dilation=1): + """Creates one group of blocks for the AssembleNett model. + + Args: + inputs: `Tensor` of size `[batch*time, channels, height, width]`. + filters: `int` number of filters for the first convolution of the layer. + block_fn: `function` for the block to use within the model + blocks: `int` number of blocks contained in the layer. + strides: `int` stride to use for the first convolution of the layer. If + greater than 1, this layer will downsample the input. + name: `str` name for the Tensor output of the block layer. + block_level: `int` block level in AssembleNet. + num_frames: `int` number of frames in the input tensor. + temporal_dilation: `int` temporal dilatioin size for the 1D conv. + + Returns: + The output `Tensor` of the block layer. + """ + # Only the first block per block_group uses projection shortcut and strides. + inputs = block_fn( + inputs, + filters, + intermediate_channel_size[block_level], + strides, + use_projection=True, + num_frames=num_frames, + temporal_dilation=temporal_dilation, + step=0) + + for i in range(1, blocks): + inputs = block_fn( + inputs, + filters, + intermediate_channel_size[block_level], + 1, + num_frames=num_frames, + temporal_dilation=temporal_dilation, + step=i) + + return tf.identity(inputs, name) + + +def spatial_resize_and_concat(inputs): + """Concatenates multiple different sized tensors channel-wise. + + Args: + inputs: A list of `Tensors` of size `[batch*time, channels, height, width]`. + + Returns: + The output `Tensor` after concatenation. + """ + data_format = tf.keras.backend.image_data_format() + assert data_format == 'channels_last' + + # Do nothing if only 1 input + if len(inputs) == 1: + return inputs[0] + if data_format != 'channels_last': + return inputs + + # get smallest spatial size and largest channels + sm_size = [1000, 1000] + for inp in inputs: + # assume batch X height x width x channels + sm_size[0] = min(sm_size[0], inp.shape[1]) + sm_size[1] = min(sm_size[1], inp.shape[2]) + + for i in range(len(inputs)): + if inputs[i].shape[1] != sm_size[0] or inputs[i].shape[2] != sm_size[1]: + ratio = (inputs[i].shape[1] + 1) // sm_size[0] + inputs[i] = tf.keras.layers.MaxPool2D([ratio, ratio], + ratio, + padding='same')( + inputs[i]) + + return tf.concat(inputs, 3) + + +class _ApplyEdgeWeight(layers.Layer): + """Multiply weight on each input tensor. + + A weight is assigned for each connection (i.e., each input tensor). This layer + is used by the multi_connection_fusion to compute the weighted inputs. + """ + + def __init__(self, + weights_shape, + index: int = None, + use_5d_mode: bool = False, + model_edge_weights: Optional[List[Any]] = None, + **kwargs): + """Constructor. + + Args: + weights_shape: shape of the weights. Should equals to [len(inputs)]. + index: `int` index of the block within the AssembleNet architecture. Used + for summation weight initial loading. + use_5d_mode: `bool` indicating whether the inputs are in 5D tensor or 4D. + model_edge_weights: AssembleNet model structure connection weights in the + string format. + **kwargs: pass through arguments. + """ + super(_ApplyEdgeWeight, self).__init__(**kwargs) + + self._weights_shape = weights_shape + self._index = index + self._use_5d_mode = use_5d_mode + self._model_edge_weights = model_edge_weights + data_format = tf.keras.backend.image_data_format() + assert data_format == 'channels_last' + + def get_config(self): + config = { + 'weights_shape': self._weights_shape, + 'index': self._index, + 'use_5d_mode': self._use_5d_mode, + 'model_edge_weights': self._model_edge_weights, + } + base_config = super(_ApplyEdgeWeight, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape: tf.TensorShape): + if self._weights_shape[0] == 1: + self._edge_weights = 1.0 + return + + if self._index is None or not self._model_edge_weights: + self._edge_weights = self.add_weight( + shape=self._weights_shape, + initializer=tf.keras.initializers.TruncatedNormal( + mean=0.0, stddev=0.01), + trainable=True, + name='agg_weights') + else: + initial_weights_after_sigmoid = np.asarray( + self._model_edge_weights[self._index][0]).astype('float32') + # Initial_weights_after_sigmoid is never 0, as the initial weights are + # based the results of a successful connectivity search. + initial_weights = -np.log(1. / initial_weights_after_sigmoid - 1.) + self._edge_weights = self.add_weight( + shape=self._weights_shape, + initializer=tf.constant_initializer(initial_weights), + trainable=False, + name='agg_weights') + + def call(self, + inputs: List[tf.Tensor], + training: bool = None) -> Mapping[Any, List[tf.Tensor]]: + use_5d_mode = self._use_5d_mode + dtype = inputs[0].dtype + assert len(inputs) > 1 + + if use_5d_mode: + h_channel_loc = 2 + else: + h_channel_loc = 1 + + # get smallest spatial size and largest channels + sm_size = [10000, 10000] + lg_channel = 0 + for inp in inputs: + # assume batch X height x width x channels + sm_size[0] = min(sm_size[0], inp.shape[h_channel_loc]) + sm_size[1] = min(sm_size[1], inp.shape[h_channel_loc + 1]) + lg_channel = max(lg_channel, inp.shape[-1]) + + # loads or creates weight variables to fuse multiple inputs + weights = tf.math.sigmoid(tf.cast(self._edge_weights, dtype)) + + # Compute weighted inputs. We group inputs with the same channels. + per_channel_inps = dict({0: []}) + for i, inp in enumerate(inputs): + if inp.shape[h_channel_loc] != sm_size[0] or inp.shape[h_channel_loc + 1] != sm_size[1]: # pylint: disable=line-too-long + assert sm_size[0] != 0 + ratio = (inp.shape[h_channel_loc] + 1) // sm_size[0] + if use_5d_mode: + inp = tf.keras.layers.MaxPool3D([1, ratio, ratio], [1, ratio, ratio], + padding='same')( + inp) + else: + inp = tf.keras.layers.MaxPool2D([ratio, ratio], ratio, + padding='same')( + inp) + + weights = tf.cast(weights, inp.dtype) + if inp.shape[-1] in per_channel_inps: + per_channel_inps[inp.shape[-1]].append(weights[i] * inp) + else: + per_channel_inps.update({inp.shape[-1]: [weights[i] * inp]}) + return per_channel_inps + + +def multi_connection_fusion(inputs: List[tf.Tensor], + index: int = None, + use_5d_mode: bool = False, + model_edge_weights: Optional[List[Any]] = None): + """Do weighted summation of multiple different sized tensors. + + A weight is assigned for each connection (i.e., each input tensor), and their + summation weights are learned. Uses spatial max pooling and 1x1 conv. + to match their sizes. + + Args: + inputs: A `Tensor`. Either 4D or 5D, depending of use_5d_mode. + index: `int` index of the block within the AssembleNet architecture. Used + for summation weight initial loading. + use_5d_mode: `bool` indicating whether the inputs are in 5D tensor or 4D. + model_edge_weights: AssembleNet model structure connection weights in the + string format. + + Returns: + The output `Tensor` after concatenation. + """ + + if use_5d_mode: + h_channel_loc = 2 + conv_function = conv3d_same_padding + else: + h_channel_loc = 1 + conv_function = conv2d_fixed_padding + + # If only 1 input. + if len(inputs) == 1: + return inputs[0] + + # get smallest spatial size and largest channels + sm_size = [10000, 10000] + lg_channel = 0 + for inp in inputs: + # assume batch X height x width x channels + sm_size[0] = min(sm_size[0], inp.shape[h_channel_loc]) + sm_size[1] = min(sm_size[1], inp.shape[h_channel_loc + 1]) + lg_channel = max(lg_channel, inp.shape[-1]) + + per_channel_inps = _ApplyEdgeWeight( + weights_shape=[len(inputs)], + index=index, + use_5d_mode=use_5d_mode, + model_edge_weights=model_edge_weights)( + inputs) + + # Adding 1x1 conv layers (to match channel size) and fusing all inputs. + # We add inputs with the same channels first before applying 1x1 conv to save + # memory. + inps = [] + for key, channel_inps in per_channel_inps.items(): + if len(channel_inps) < 1: + continue + if len(channel_inps) == 1: + if key == lg_channel: + inp = channel_inps[0] + else: + inp = conv_function( + channel_inps[0], lg_channel, kernel_size=1, strides=1) + inps.append(inp) + else: + if key == lg_channel: + inp = tf.add_n(channel_inps) + else: + inp = conv_function( + tf.add_n(channel_inps), lg_channel, kernel_size=1, strides=1) + inps.append(inp) + + return tf.add_n(inps) + + +def rgb_conv_stem(inputs, + num_frames, + filters, + temporal_dilation, + bn_decay: float = rf.BATCH_NORM_DECAY, + bn_epsilon: float = rf.BATCH_NORM_EPSILON, + use_sync_bn: bool = False): + """Layers for a RGB stem. + + Args: + inputs: A `Tensor` of size `[batch*time, height, width, channels]`. + num_frames: `int` number of frames in the input tensor. + filters: `int` number of filters in the convolution. + temporal_dilation: `int` temporal dilatioin size for the 1D conv. + bn_decay: `float` batch norm decay parameter to use. + bn_epsilon: `float` batch norm epsilon parameter to use. + use_sync_bn: use synchronized batch norm for TPU. + + Returns: + The output `Tensor`. + """ + data_format = tf.keras.backend.image_data_format() + assert data_format == 'channels_last' + + if temporal_dilation < 1: + temporal_dilation = 1 + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=7, strides=2) + inputs = tf.identity(inputs, 'initial_conv') + inputs = rf.build_batch_norm( + bn_decay=bn_decay, bn_epsilon=bn_epsilon, use_sync_bn=use_sync_bn)( + inputs) + inputs = tf.nn.relu(inputs) + + inputs = reshape_temporal_conv1d_bn( + inputs=inputs, + filters=filters, + kernel_size=5, + num_frames=num_frames, + temporal_dilation=temporal_dilation, + bn_decay=bn_decay, + bn_epsilon=bn_epsilon, + use_sync_bn=use_sync_bn) + + inputs = tf.keras.layers.MaxPool2D( + pool_size=3, strides=2, padding='SAME')( + inputs=inputs) + inputs = tf.identity(inputs, 'initial_max_pool') + + return inputs + + +def flow_conv_stem(inputs, + filters, + temporal_dilation, + bn_decay: float = rf.BATCH_NORM_DECAY, + bn_epsilon: float = rf.BATCH_NORM_EPSILON, + use_sync_bn: bool = False): + """Layers for an optical flow stem. + + Args: + inputs: A `Tensor` of size `[batch*time, height, width, channels]`. + filters: `int` number of filters in the convolution. + temporal_dilation: `int` temporal dilatioin size for the 1D conv. + bn_decay: `float` batch norm decay parameter to use. + bn_epsilon: `float` batch norm epsilon parameter to use. + use_sync_bn: use synchronized batch norm for TPU. + + Returns: + The output `Tensor`. + """ + + if temporal_dilation < 1: + temporal_dilation = 1 + + inputs = conv2d_fixed_padding( + inputs=inputs, filters=filters, kernel_size=7, strides=2) + inputs = tf.identity(inputs, 'initial_conv') + inputs = rf.build_batch_norm( + bn_decay=bn_decay, bn_epsilon=bn_epsilon, use_sync_bn=use_sync_bn)( + inputs) + inputs = tf.nn.relu(inputs) + + inputs = tf.keras.layers.MaxPool2D( + pool_size=2, strides=2, padding='SAME')( + inputs=inputs) + inputs = tf.identity(inputs, 'initial_max_pool') + + return inputs + + +def multi_stream_heads(streams, + final_nodes, + num_frames, + num_classes, + max_pool_preditions: bool = False): + """Layers for the classification heads. + + Args: + streams: A list of 4D `Tensors` following the data_format. + final_nodes: A list of `int` where classification heads will be added. + num_frames: `int` number of frames in the input tensor. + num_classes: `int` number of possible classes for video classification. + max_pool_preditions: Use max-pooling on predictions instead of mean + pooling on features. It helps if you have more than 32 frames. + + Returns: + The output `Tensor`. + """ + inputs = streams[final_nodes[0]] + num_channels = inputs.shape[-1] + + def _pool_and_reshape(net): + # The activation is 7x7 so this is a global average pool. + net = tf.keras.layers.GlobalAveragePooling2D()(inputs=net) + net = tf.identity(net, 'final_avg_pool0') + + net = tf.reshape(net, [-1, num_frames, num_channels]) + if not max_pool_preditions: + net = tf.reduce_mean(net, 1) + return net + + outputs = _pool_and_reshape(inputs) + + for i in range(1, len(final_nodes)): + inputs = streams[final_nodes[i]] + + inputs = _pool_and_reshape(inputs) + + outputs = outputs + inputs + + if len(final_nodes) > 1: + outputs = outputs / len(final_nodes) + + outputs = tf.keras.layers.Dense( + units=num_classes, + kernel_initializer=tf.random_normal_initializer(stddev=.01))( + inputs=outputs) + outputs = tf.identity(outputs, 'final_dense0') + if max_pool_preditions: + pre_logits = outputs / np.sqrt(num_frames) + acts = tf.nn.softmax(pre_logits, axis=1) + outputs = tf.math.multiply(outputs, acts) + + outputs = tf.reduce_sum(outputs, 1) + + return outputs + + +class AssembleNet(tf.keras.Model): + """AssembleNet backbone.""" + + def __init__( + self, + block_fn, + num_blocks: List[int], + num_frames: int, + model_structure: List[Any], + input_specs: layers.InputSpec = layers.InputSpec( + shape=[None, None, None, None, 3]), + model_edge_weights: Optional[List[Any]] = None, + bn_decay: float = rf.BATCH_NORM_DECAY, + bn_epsilon: float = rf.BATCH_NORM_EPSILON, + use_sync_bn: bool = False, + combine_method: str = 'sigmoid', + **kwargs): + """Generator for AssembleNet v1 models. + + Args: + block_fn: `function` for the block to use within the model. Currently only + has `bottleneck_block_interleave as its option`. + num_blocks: list of 4 `int`s denoting the number of blocks to include in + each of the 4 block groups. Each group consists of blocks that take + inputs of the same resolution. + num_frames: the number of frames in the input tensor. + model_structure: AssembleNet model structure in the string format. + input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. + Dimension should be `[batch*time, height, width, channels]`. + model_edge_weights: AssembleNet model structure connection weights in the + string format. + bn_decay: `float` batch norm decay parameter to use. + bn_epsilon: `float` batch norm epsilon parameter to use. + use_sync_bn: use synchronized batch norm for TPU. + combine_method: 'str' for the weighted summation to fuse different blocks. + **kwargs: pass through arguments. + """ + inputs = tf.keras.Input(shape=input_specs.shape[1:]) + data_format = tf.keras.backend.image_data_format() + + # Creation of the model graph. + logging.info('model_structure=%r', model_structure) + logging.info('model_structure=%r', model_structure) + logging.info('model_edge_weights=%r', model_edge_weights) + structure = model_structure + + original_num_frames = num_frames + assert num_frames > 0, f'Invalid num_frames {num_frames}' + + grouping = {-3: [], -2: [], -1: [], 0: [], 1: [], 2: [], 3: []} + for i in range(len(structure)): + grouping[structure[i][0]].append(i) + + stem_count = len(grouping[-3]) + len(grouping[-2]) + len(grouping[-1]) + + assert stem_count != 0 + stem_filters = 128 // stem_count + + original_inputs = inputs + if len(input_specs.shape) == 5: + first_dim = ( + input_specs.shape[0] * input_specs.shape[1] + if input_specs.shape[0] and input_specs.shape[1] else -1) + reshape_inputs = tf.reshape(inputs, (first_dim,) + input_specs.shape[2:]) + elif len(input_specs.shape) == 4: + reshape_inputs = original_inputs + else: + raise ValueError( + f'Expect input spec to be 4 or 5 dimensions {input_specs.shape}') + if grouping[-2]: + # Instead of loading optical flows as inputs from data pipeline, we are + # applying the "Representation Flow" to RGB frames so that we can compute + # the flow within TPU/GPU on fly. It's essentially optical flow since we + # do it with RGBs. + axis = 3 if data_format == 'channels_last' else 1 + flow_inputs = rf.RepresentationFlow( + original_num_frames, + depth=reshape_inputs.shape.as_list()[axis], + num_iter=40, + bottleneck=1)( + reshape_inputs) + streams = [] + + for i in range(len(structure)): + with tf.name_scope('Node_' + str(i)): + if structure[i][0] == -1: + inputs = rgb_conv_stem( + reshape_inputs, + original_num_frames, + stem_filters, + temporal_dilation=structure[i][1], + bn_decay=bn_decay, + bn_epsilon=bn_epsilon, + use_sync_bn=use_sync_bn) + streams.append(inputs) + elif structure[i][0] == -2: + inputs = flow_conv_stem( + flow_inputs, + stem_filters, + temporal_dilation=structure[i][1], + bn_decay=bn_decay, + bn_epsilon=bn_epsilon, + use_sync_bn=use_sync_bn) + streams.append(inputs) + + else: + num_frames = original_num_frames + block_number = structure[i][0] + + combined_inputs = [] + if combine_method == 'concat': + combined_inputs = [ + streams[structure[i][1][j]] + for j in range(0, len(structure[i][1])) + ] + + combined_inputs = spatial_resize_and_concat(combined_inputs) + + else: + combined_inputs = [ + streams[structure[i][1][j]] + for j in range(0, len(structure[i][1])) + ] + + combined_inputs = multi_connection_fusion( + combined_inputs, index=i, model_edge_weights=model_edge_weights) + + graph = block_group( + inputs=combined_inputs, + filters=structure[i][2], + block_fn=block_fn, + blocks=num_blocks[block_number], + strides=structure[i][4], + name='block_group' + str(i), + block_level=structure[i][0], + num_frames=num_frames, + temporal_dilation=structure[i][3]) + + streams.append(graph) + + super(AssembleNet, self).__init__( + inputs=original_inputs, outputs=streams, **kwargs) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class AssembleNetModel(tf.keras.Model): + """An AssembleNet model builder.""" + + def __init__(self, + backbone, + num_classes, + num_frames: int, + model_structure: List[Any], + input_specs: Mapping[str, tf.keras.layers.InputSpec] = None, + max_pool_preditions: bool = False, + **kwargs): + if not input_specs: + input_specs = { + 'image': layers.InputSpec(shape=[None, None, None, None, 3]) + } + self._self_setattr_tracking = False + self._config_dict = { + 'backbone': backbone, + 'num_classes': num_classes, + 'num_frames': num_frames, + 'input_specs': input_specs, + 'model_structure': model_structure, + } + self._input_specs = input_specs + self._backbone = backbone + grouping = {-3: [], -2: [], -1: [], 0: [], 1: [], 2: [], 3: []} + for i in range(len(model_structure)): + grouping[model_structure[i][0]].append(i) + + inputs = { + k: tf.keras.Input(shape=v.shape[1:]) for k, v in input_specs.items() + } + streams = self._backbone(inputs['image']) + + outputs = multi_stream_heads( + streams, + grouping[3], + num_frames, + num_classes, + max_pool_preditions=max_pool_preditions) + + super(AssembleNetModel, self).__init__( + inputs=inputs, outputs=outputs, **kwargs) + + @property + def checkpoint_items(self): + """Returns a dictionary of items to be additionally checkpointed.""" + return dict(backbone=self.backbone) + + @property + def backbone(self): + return self._backbone + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + +ASSEMBLENET_SPECS = { + 26: { + 'block': bottleneck_block_interleave, + 'num_blocks': [2, 2, 2, 2] + }, + 38: { + 'block': bottleneck_block_interleave, + 'num_blocks': [2, 4, 4, 2] + }, + 50: { + 'block': bottleneck_block_interleave, + 'num_blocks': [3, 4, 6, 3] + }, + 68: { + 'block': bottleneck_block_interleave, + 'num_blocks': [3, 4, 12, 3] + }, + 77: { + 'block': bottleneck_block_interleave, + 'num_blocks': [3, 4, 15, 3] + }, + 101: { + 'block': bottleneck_block_interleave, + 'num_blocks': [3, 4, 23, 3] + }, +} + + +def assemblenet_v1(assemblenet_depth: int, + num_classes: int, + num_frames: int, + model_structure: List[Any], + input_specs: layers.InputSpec = layers.InputSpec( + shape=[None, None, None, None, 3]), + model_edge_weights: Optional[List[Any]] = None, + max_pool_preditions: bool = False, + combine_method: str = 'sigmoid', + **kwargs): + """Returns the AssembleNet model for a given size and number of output classes.""" + + data_format = tf.keras.backend.image_data_format() + assert data_format == 'channels_last' + + if assemblenet_depth not in ASSEMBLENET_SPECS: + raise ValueError('Not a valid assemblenet_depth:', assemblenet_depth) + + input_specs_dict = {'image': input_specs} + params = ASSEMBLENET_SPECS[assemblenet_depth] + backbone = AssembleNet( + block_fn=params['block'], + num_blocks=params['num_blocks'], + num_frames=num_frames, + model_structure=model_structure, + input_specs=input_specs, + model_edge_weights=model_edge_weights, + combine_method=combine_method, + **kwargs) + return AssembleNetModel( + backbone, + num_classes=num_classes, + num_frames=num_frames, + model_structure=model_structure, + input_specs=input_specs_dict, + max_pool_preditions=max_pool_preditions, + **kwargs) + + +@backbone_factory.register_backbone_builder('assemblenet') +def build_assemblenet_v1( + input_specs: tf.keras.layers.InputSpec, + model_config: cfg.Backbone3D, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds assemblenet backbone.""" + del l2_regularizer + + backbone_type = model_config.backbone.type + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + assert backbone_type == 'assemblenet' + + assemblenet_depth = int(backbone_cfg.model_id) + if assemblenet_depth not in ASSEMBLENET_SPECS: + raise ValueError('Not a valid assemblenet_depth:', assemblenet_depth) + model_structure, model_edge_weights = cfg.blocks_to_flat_lists( + backbone_cfg.blocks) + params = ASSEMBLENET_SPECS[assemblenet_depth] + block_fn = functools.partial( + params['block'], + use_sync_bn=norm_activation_config.use_sync_bn, + bn_decay=norm_activation_config.norm_momentum, + bn_epsilon=norm_activation_config.norm_epsilon) + backbone = AssembleNet( + block_fn=block_fn, + num_blocks=params['num_blocks'], + num_frames=backbone_cfg.num_frames, + model_structure=model_structure, + input_specs=input_specs, + model_edge_weights=model_edge_weights, + combine_method=backbone_cfg.combine_method, + use_sync_bn=norm_activation_config.use_sync_bn, + bn_decay=norm_activation_config.norm_momentum, + bn_epsilon=norm_activation_config.norm_epsilon) + logging.info('Number of parameters in AssembleNet backbone: %f M.', + backbone.count_params() / 10.**6) + return backbone + + +@model_factory.register_model_builder('assemblenet') +def build_assemblenet_model( + input_specs: tf.keras.layers.InputSpec, + model_config: cfg.AssembleNetModel, + num_classes: int, + l2_regularizer: tf.keras.regularizers.Regularizer = None): + """Builds assemblenet model.""" + input_specs_dict = {'image': input_specs} + backbone = build_assemblenet_v1(input_specs, model_config, l2_regularizer) + backbone_cfg = model_config.backbone.get() + model_structure, _ = cfg.blocks_to_flat_lists(backbone_cfg.blocks) + model = AssembleNetModel( + backbone, + num_classes=num_classes, + num_frames=backbone_cfg.num_frames, + model_structure=model_structure, + input_specs=input_specs_dict, + max_pool_preditions=model_config.max_pool_preditions) + return model diff --git a/official/vision/beta/projects/assemblenet/modeling/rep_flow_2d_layer.py b/official/vision/beta/projects/assemblenet/modeling/rep_flow_2d_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..d29968a668d6e6695b4442983239aeacd9b59b61 --- /dev/null +++ b/official/vision/beta/projects/assemblenet/modeling/rep_flow_2d_layer.py @@ -0,0 +1,405 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Contains definitions for 'Representation Flow' layer [1]. + +Representation flow layer is a generalization of optical flow extraction; the +layer could be inserted anywhere within a CNN to capture feature movements. This +is the version taking 4D tensor with the shape [batch*time, height, width, +channels], to make this run on TPU. + +[1] AJ Piergiovanni and Michael S. Ryoo, + Representation Flow for Action Recognition. CVPR 2019. +""" + +import numpy as np +import tensorflow as tf + +layers = tf.keras.layers +BATCH_NORM_DECAY = 0.99 +BATCH_NORM_EPSILON = 1e-5 + + +def build_batch_norm(init_zero: bool = False, + bn_decay: float = BATCH_NORM_DECAY, + bn_epsilon: float = BATCH_NORM_EPSILON, + use_sync_bn: bool = False): + """Performs a batch normalization followed by a ReLU. + + Args: + init_zero: `bool` if True, initializes scale parameter of batch + normalization with 0 instead of 1 (default). + bn_decay: `float` batch norm decay parameter to use. + bn_epsilon: `float` batch norm epsilon parameter to use. + use_sync_bn: use synchronized batch norm for TPU. + + Returns: + A normalized `Tensor` with the same `data_format`. + """ + + if init_zero: + gamma_initializer = tf.zeros_initializer() + else: + gamma_initializer = tf.ones_initializer() + + data_format = tf.keras.backend.image_data_format() + assert data_format == 'channels_last' + + if data_format == 'channels_first': + axis = 1 + else: + axis = -1 + + if use_sync_bn: + batch_norm = layers.experimental.SyncBatchNormalization( + axis=axis, + momentum=bn_decay, + epsilon=bn_epsilon, + gamma_initializer=gamma_initializer) + else: + batch_norm = layers.BatchNormalization( + axis=axis, + momentum=bn_decay, + epsilon=bn_epsilon, + fused=True, + gamma_initializer=gamma_initializer) + + return batch_norm + + +def divergence(p1, p2, f_grad_x, f_grad_y, name): + """Computes the divergence value used with TV-L1 optical flow algorithm. + + Args: + p1: 'Tensor' input. + p2: 'Tensor' input in the next frame. + f_grad_x: 'Tensor' x gradient of F value used in TV-L1. + f_grad_y: 'Tensor' y gradient of F value used in TV-L1. + name: 'str' name for the variable scope. + + Returns: + A `Tensor` with the same `data_format` and shape as input. + """ + data_format = tf.keras.backend.image_data_format() + df = 'NHWC' if data_format == 'channels_last' else 'NCHW' + + with tf.name_scope('divergence_' + name): + if data_format == 'channels_last': + p1 = tf.pad(p1[:, :, :-1, :], [[0, 0], [0, 0], [1, 0], [0, 0]]) + p2 = tf.pad(p2[:, :-1, :, :], [[0, 0], [1, 0], [0, 0], [0, 0]]) + else: + p1 = tf.pad(p1[:, :, :, :-1], [[0, 0], [0, 0], [0, 0], [1, 0]]) + p2 = tf.pad(p2[:, :, :-1, :], [[0, 0], [0, 0], [1, 0], [0, 0]]) + + grad_x = tf.nn.conv2d(p1, f_grad_x, [1, 1, 1, 1], 'SAME', data_format=df) + grad_y = tf.nn.conv2d(p2, f_grad_y, [1, 1, 1, 1], 'SAME', data_format=df) + return grad_x + grad_y + + +def forward_grad(x, f_grad_x, f_grad_y, name): + data_format = tf.keras.backend.image_data_format() + with tf.name_scope('forward_grad_' + name): + df = 'NHWC' if data_format == 'channels_last' else 'NCHW' + grad_x = tf.nn.conv2d(x, f_grad_x, [1, 1, 1, 1], 'SAME', data_format=df) + grad_y = tf.nn.conv2d(x, f_grad_y, [1, 1, 1, 1], 'SAME', data_format=df) + return grad_x, grad_y + + +def norm_img(x): + mx = tf.reduce_max(x) + mn = tf.reduce_min(x) + if mx == mn: + return x + else: + return 255 * (x - mn) / (mx - mn) + + +class RepresentationFlow(layers.Layer): + """Computes the representation flow motivated by TV-L1 optical flow.""" + + def __init__(self, + time: int, + depth: int, + num_iter: int = 20, + bottleneck: int = 32, + train_feature_grad: bool = False, + train_divergence: bool = False, + train_flow_grad: bool = False, + train_hyper: bool = False, + **kwargs): + """Constructor. + + Args: + time: 'int' number of frames in the input tensor. + depth: channel depth of the input tensor. + num_iter: 'int' number of iterations to use for the flow computation. + bottleneck: 'int' number of filters to be used for the flow computation. + train_feature_grad: Train image grad params. + train_divergence: train divergence params + train_flow_grad: train flow grad params. + train_hyper: train rep flow hyperparams. + **kwargs: keyword arguments to be passed to the parent constructor. + + Returns: + A `Tensor` with the same `data_format` and shape as input. + """ + super(RepresentationFlow, self).__init__(**kwargs) + + self._time = time + self._depth = depth + self._num_iter = num_iter + self._bottleneck = bottleneck + self._train_feature_grad = train_feature_grad + self._train_divergence = train_divergence + self._train_flow_grad = train_flow_grad + self._train_hyper = train_hyper + + def get_config(self): + config = { + 'time': self._time, + 'num_iter': self._num_iter, + 'bottleneck': self._bottleneck, + 'train_feature_grad': self._train_feature_grad, + 'train_divergence': self._train_divergence, + 'train_flow_grad': self._train_flow_grad, + 'train_hyper': self._train_hyper, + } + base_config = super(RepresentationFlow, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape: tf.TensorShape): + img_grad = np.array([-0.5, 0, 0.5], dtype='float32') + img_grad_x = np.repeat( + np.reshape(img_grad, (1, 3, 1, 1)), self._bottleneck, axis=2) * np.eye( + self._bottleneck, dtype='float32') + self.img_grad_x = self.add_weight( + shape=img_grad_x.shape, + initializer=tf.constant_initializer(img_grad_x), + trainable=self._train_feature_grad, + name='img_grad_x') + img_grad_y = np.repeat( + np.reshape(img_grad, (3, 1, 1, 1)), self._bottleneck, axis=2) * np.eye( + self._bottleneck, dtype='float32') + self.img_grad_y = self.add_weight( + shape=img_grad_y.shape, + initializer=tf.constant_initializer(img_grad_y), + trainable=self._train_feature_grad, + name='img_grad_y') + + f_grad = np.array([-1, 1], dtype='float32') + f_grad_x = np.repeat( + np.reshape(f_grad, (1, 2, 1, 1)), self._bottleneck, axis=2) * np.eye( + self._bottleneck, dtype='float32') + self.f_grad_x = self.add_weight( + shape=f_grad_x.shape, + initializer=tf.constant_initializer(f_grad_x), + trainable=self._train_divergence, + name='f_grad_x') + f_grad_y = np.repeat( + np.reshape(f_grad, (2, 1, 1, 1)), self._bottleneck, axis=2) * np.eye( + self._bottleneck, dtype='float32') + self.f_grad_y = self.add_weight( + shape=f_grad_y.shape, + initializer=tf.constant_initializer(f_grad_y), + trainable=self._train_divergence, + name='f_grad_y') + + f_grad_x2 = np.repeat( + np.reshape(f_grad, (1, 2, 1, 1)), self._bottleneck, axis=2) * np.eye( + self._bottleneck, dtype='float32') + self.f_grad_x2 = self.add_weight( + shape=f_grad_x2.shape, + initializer=tf.constant_initializer(f_grad_x2), + trainable=self._train_flow_grad, + name='f_grad_x2') + f_grad_y2 = np.repeat( + np.reshape(f_grad, (2, 1, 1, 1)), self._bottleneck, axis=2) * np.eye( + self._bottleneck, dtype='float32') + self.f_grad_y2 = self.add_weight( + shape=f_grad_y2.shape, + initializer=tf.constant_initializer(f_grad_y2), + trainable=self._train_flow_grad, + name='f_grad_y2') + + self.t = self.add_weight( + name='theta', + initializer=tf.constant_initializer(0.3), + trainable=self._train_hyper) + self.l = self.add_weight( + name='lambda', + initializer=tf.constant_initializer(0.15), + trainable=self._train_hyper) + self.a = self.add_weight( + name='tau', + initializer=tf.constant_initializer(0.25), + trainable=self._train_hyper) + self.t = tf.abs(self.t) + 1e-12 + self.l_t = self.l * self.t + self.taut = self.a / self.t + + self._bottleneck_conv2 = None + self._bottleneck_conv2 = None + if self._bottleneck > 1: + self._bottleneck_conv1 = layers.Conv2D( + filters=self._bottleneck, + kernel_size=1, + strides=1, + padding='same', + use_bias=False, + kernel_initializer=tf.keras.initializers.VarianceScaling(), + name='rf/bottleneck1') + self._bottleneck_conv2 = layers.Conv2D( + filters=self._depth, + kernel_size=1, + strides=1, + padding='same', + use_bias=False, + kernel_initializer=tf.keras.initializers.VarianceScaling(), + name='rf/bottleneck2') + self._batch_norm = build_batch_norm(init_zero=True) + + def call(self, inputs: tf.Tensor, training: bool = None) -> tf.Tensor: + """Perform representation flows. + + Args: + inputs: list of `Tensors` of shape `[batch*time, height, width, + channels]`. + training: True for training phase. + + Returns: + A tensor of the same shape as the inputs. + """ + data_format = tf.keras.backend.image_data_format() + df = 'NHWC' if data_format == 'channels_last' else 'NCHW' + axis = 3 if data_format == 'channels_last' else 1 # channel axis + dtype = inputs.dtype + residual = inputs + depth = inputs.shape.as_list()[axis] + # assert depth == self._depth, f'rep_flow {depth} != {self._depth}' + + if self._bottleneck == 1: + inputs = tf.reduce_mean(inputs, axis=axis) + inputs = tf.expand_dims(inputs, -1) + elif depth != self._bottleneck: + inputs = self._bottleneck_conv1(inputs) + + input_shape = inputs.shape.as_list() + inp = norm_img(inputs) + inp = tf.reshape( + inp, + (-1, self._time, inputs.shape[1], inputs.shape[2], inputs.shape[3])) + inp = tf.ensure_shape( + inp, (None, self._time, input_shape[1], input_shape[2], input_shape[3])) + img1 = tf.reshape( + inp[:, :-1], (-1, tf.shape(inp)[2], tf.shape(inp)[3], tf.shape(inp)[4])) + img2 = tf.reshape( + inp[:, 1:], (-1, tf.shape(inp)[2], tf.shape(inp)[3], tf.shape(inp)[4])) + img1 = tf.ensure_shape( + img1, (None, inputs.shape[1], inputs.shape[2], inputs.shape[3])) + img2 = tf.ensure_shape( + img2, (None, inputs.shape[1], inputs.shape[2], inputs.shape[3])) + + u1 = tf.zeros_like(img1, dtype=dtype) + u2 = tf.zeros_like(img2, dtype=dtype) + + l_t = self.l_t + taut = self.taut + + grad2_x = tf.nn.conv2d( + img2, self.img_grad_x, [1, 1, 1, 1], 'SAME', data_format=df) + grad2_y = tf.nn.conv2d( + img2, self.img_grad_y, [1, 1, 1, 1], 'SAME', data_format=df) + + p11 = tf.zeros_like(img1, dtype=dtype) + p12 = tf.zeros_like(img1, dtype=dtype) + p21 = tf.zeros_like(img1, dtype=dtype) + p22 = tf.zeros_like(img1, dtype=dtype) + + gsqx = grad2_x**2 + gsqy = grad2_y**2 + + grad = gsqx + gsqy + 1e-12 + + rho_c = img2 - grad2_x * u1 - grad2_y * u2 - img1 + + for _ in range(self._num_iter): + rho = rho_c + grad2_x * u1 + grad2_y * u2 + 1e-12 + + v1 = tf.zeros_like(img1, dtype=dtype) + v2 = tf.zeros_like(img2, dtype=dtype) + + mask1 = rho < -l_t * grad + tmp11 = tf.where(mask1, l_t * grad2_x, + tf.zeros_like(grad2_x, dtype=dtype)) + tmp12 = tf.where(mask1, l_t * grad2_y, + tf.zeros_like(grad2_y, dtype=dtype)) + + mask2 = rho > l_t * grad + tmp21 = tf.where(mask2, -l_t * grad2_x, + tf.zeros_like(grad2_x, dtype=dtype)) + tmp22 = tf.where(mask2, -l_t * grad2_y, + tf.zeros_like(grad2_y, dtype=dtype)) + + mask3 = (~mask1) & (~mask2) & (grad > 1e-12) + tmp31 = tf.where(mask3, (-rho / grad) * grad2_x, + tf.zeros_like(grad2_x, dtype=dtype)) + tmp32 = tf.where(mask3, (-rho / grad) * grad2_y, + tf.zeros_like(grad2_y, dtype=dtype)) + + v1 = tmp11 + tmp21 + tmp31 + u1 + v2 = tmp12 + tmp22 + tmp32 + u2 + + u1 = v1 + self.t * divergence(p11, p12, self.f_grad_x, self.f_grad_y, + 'div_p1') + u2 = v2 + self.t * divergence(p21, p22, self.f_grad_x, self.f_grad_y, + 'div_p2') + + u1x, u1y = forward_grad(u1, self.f_grad_x2, self.f_grad_y2, 'u1') + u2x, u2y = forward_grad(u2, self.f_grad_x2, self.f_grad_y2, 'u2') + + p11 = (p11 + taut * u1x) / (1. + taut * tf.sqrt(u1x**2 + u1y**2 + 1e-12)) + p12 = (p12 + taut * u1y) / (1. + taut * tf.sqrt(u1x**2 + u1y**2 + 1e-12)) + p21 = (p21 + taut * u2x) / (1. + taut * tf.sqrt(u2x**2 + u2y**2 + 1e-12)) + p22 = (p22 + taut * u2y) / (1. + taut * tf.sqrt(u2x**2 + u2y**2 + 1e-12)) + + u1 = tf.reshape(u1, (-1, self._time - 1, tf.shape(u1)[1], + tf.shape(u1)[2], tf.shape(u1)[3])) + u2 = tf.reshape(u2, (-1, self._time - 1, tf.shape(u2)[1], + tf.shape(u2)[2], tf.shape(u2)[3])) + flow = tf.concat([u1, u2], axis=axis + 1) + flow = tf.concat([ + flow, + tf.reshape( + flow[:, -1, :, :, :], + (-1, 1, tf.shape(u1)[2], tf.shape(u1)[3], tf.shape(u1)[4] * 2)) + ], + axis=1) + # padding: [bs, 1, w, h, 2*c] -> [bs, 1, w, h, 2*c] + # flow is [bs, t, w, h, 2*c] + flow = tf.reshape( + flow, (-1, tf.shape(u1)[2], tf.shape(u2)[3], tf.shape(u1)[4] * 2)) + # folwo is [bs*t, w, h, 2*c] + + if self._bottleneck == 1: + output_shape = residual.shape.as_list() + output_shape[-1] = self._bottleneck * 2 + flow = tf.ensure_shape(flow, output_shape) + return flow + else: + flow = self._bottleneck_conv2(flow) + + flow = self._batch_norm(flow) + flow = tf.ensure_shape(flow, residual.shape) + return tf.nn.relu(flow + residual) diff --git a/official/vision/beta/projects/assemblenet/train.py b/official/vision/beta/projects/assemblenet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a2018fdd1fae525101fb5a4cbeab9577e2129a04 --- /dev/null +++ b/official/vision/beta/projects/assemblenet/train.py @@ -0,0 +1,94 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Training driver.""" + +from absl import app +from absl import flags +from absl import logging +import gin + +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance +# pylint: disable=unused-import +from official.vision.beta.projects.assemblenet.configs import assemblenet as asn_configs +from official.vision.beta.projects.assemblenet.modeling import assemblenet as asn +# pylint: enable=unused-import + +FLAGS = flags.FLAGS + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + if 'train_and_eval' in FLAGS.mode: + assert (params.task.train_data.feature_shape == + params.task.validation_data.feature_shape), ( + f'train {params.task.train_data.feature_shape} != validate ' + f'{params.task.validation_data.feature_shape}') + + if 'assemblenet' in FLAGS.experiment: + if 'eval' in FLAGS.mode: + # Use the feature shape in validation_data for all jobs. The number of + # frames in train_data will be used to construct the Assemblenet model. + params.task.model.backbone.assemblenet.num_frames = params.task.validation_data.feature_shape[ + 0] + shape = params.task.validation_data.feature_shape + else: + params.task.model.backbone.assemblenet.num_frames = params.task.train_data.feature_shape[ + 0] + shape = params.task.train_data.feature_shape + logging.info('mode %r num_frames %r feature shape %r', FLAGS.mode, + params.task.model.backbone.assemblenet.num_frames, shape) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + train_utils.save_gin_config(FLAGS.mode, model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/vision/beta/projects/assemblenet/train_test.py b/official/vision/beta/projects/assemblenet/train_test.py new file mode 100644 index 0000000000000000000000000000000000000000..210b0aaa54680ade3e92e2eaaae9af653b68a269 --- /dev/null +++ b/official/vision/beta/projects/assemblenet/train_test.py @@ -0,0 +1,104 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +import json +import os +import random + +from absl import flags +from absl import logging +from absl.testing import flagsaver +import tensorflow as tf + +from official.vision.beta.dataloaders import tfexample_utils +from official.vision.beta.projects.assemblenet import train as train_lib + +FLAGS = flags.FLAGS + + +class TrainTest(tf.test.TestCase): + + def setUp(self): + super(TrainTest, self).setUp() + self._model_dir = os.path.join(self.get_temp_dir(), 'model_dir') + tf.io.gfile.makedirs(self._model_dir) + + data_dir = os.path.join(self.get_temp_dir(), 'data') + tf.io.gfile.makedirs(data_dir) + self._data_path = os.path.join(data_dir, 'data.tfrecord') + # pylint: disable=g-complex-comprehension + examples = [ + tfexample_utils.make_video_test_example( + image_shape=(36, 36, 3), + audio_shape=(20, 128), + label=random.randint(0, 100)) for _ in range(2) + ] + # pylint: enable=g-complex-comprehension + tfexample_utils.dump_to_tfrecord(self._data_path, tf_examples=examples) + + def test_run(self): + saved_flag_values = flagsaver.save_flag_values() + train_lib.tfm_flags.define_flags() + FLAGS.mode = 'train' + FLAGS.model_dir = self._model_dir + FLAGS.experiment = 'assemblenet50_kinetics600' + logging.info('Test pipeline correctness.') + num_frames = 4 + + params_override = json.dumps({ + 'runtime': { + 'mixed_precision_dtype': 'float32', + }, + 'trainer': { + 'train_steps': 1, + 'validation_steps': 1, + }, + 'task': { + 'model': { + 'backbone': { + 'assemblenet': { + 'model_id': '26', + 'num_frames': num_frames, + }, + }, + }, + 'train_data': { + 'input_path': self._data_path, + 'file_type': 'tfrecord', + 'feature_shape': [num_frames, 32, 32, 3], + 'global_batch_size': 2, + }, + 'validation_data': { + 'input_path': self._data_path, + 'file_type': 'tfrecord', + 'global_batch_size': 2, + 'feature_shape': [num_frames * 2, 32, 32, 3], + } + } + }) + FLAGS.params_override = params_override + + train_lib.main('unused_args') + + FLAGS.mode = 'eval' + + with train_lib.gin.unlock_config(): + train_lib.main('unused_args') + + flagsaver.restore_flag_values(saved_flag_values) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/deepmac_maskrcnn/README.md b/official/vision/beta/projects/deepmac_maskrcnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e241f2cf9b985174165413c5ba2ddf455098c26 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/README.md @@ -0,0 +1,119 @@ +# Mask R-CNN with deep mask heads + +This project brings insights from the DeepMAC model into the Mask-RCNN +architecture. Please see the paper +[The surprising impact of mask-head architecture on novel class segmentation](https://arxiv.org/abs/2104.00613) +for more details. + +## Code structure + +* This folder contains forks of a few Mask R-CNN files and repurposes them to + support deep mask heads. +* To see the benefits of using deep mask heads, it is important to train the + mask head with only groundtruth boxes. This is configured via the + `task.model.use_gt_boxes_for_masks` flag. +* Architecture of the mask head can be changed via the config value + `task.model.mask_head.convnet_variant`. Supported values are `"default"`, + `"hourglass20"`, `"hourglass52"`, and `"hourglass100"`. +* The flag `task.model.mask_head.class_agnostic` trains the model in class + agnostic mode and `task.allowed_mask_class_ids` controls which classes are + allowed to have masks during training. +* Majority of experiments and ablations from the paper are perfomed with the + [DeepMAC model](../../../../../research/object_detection/g3doc/deepmac.md) + in the Object Detection API code base. + +## Prerequisites + +### Prepare dataset + +Use [create_coco_tf_record.py](../../data/create_coco_tf_record.py) to create +the COCO dataset. The data needs to be store in a +[Google cloud storage bucket](https://cloud.google.com/storage/docs/creating-buckets) +so that it can be accessed by the TPU. + +### Start a TPU v3-32 instance + +See [TPU Quickstart](https://cloud.google.com/tpu/docs/quickstart) for +instructions. An example command would look like: + +```shell +ctpu up --name --zone --tpu-size=v3-32 --tf-version nightly +``` + +This model requires TF version `>= 2.5`. Currently, that is only available via a +`nightly` build on Cloud. + +### Install requirements + +SSH into the TPU host with `gcloud compute ssh ` and execute the +following. + +```shell +$ git clone https://github.com/tensorflow/models.git +$ cd models +$ pip3 install -r official/requirements.txt +``` + +## Training Models + +The configurations can be found in the `configs/experiments` directory. You can +launch a training job by executing. + +```shell +$ export CONFIG=./official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r50.yaml +$ export MODEL_DIR="gs://" +$ export ANNOTAION_FILE="gs://" +$ export TRAIN_DATA="gs://" +$ export EVAL_DATA="gs://" +# Overrides to access data. These can also be changed in the config file. +$ export OVERRIDES="task.validation_data.input_path=${EVAL_DATA},\ +task.train_data.input_path=${TRAIN_DATA},\ +task.annotation_file=${ANNOTAION_FILE},\ +runtime.distribution_strategy=tpu" + +$ python3 -m official.vision.beta.projects.deepmac_maskrcnn.train \ + --logtostderr \ + --mode=train_and_eval \ + --experiment=deep_mask_head_rcnn_resnetfpn_coco \ + --model_dir=$MODEL_DIR \ + --config_file=$CONFIG \ + --params_override=$OVERRIDES\ + --tpu= +``` + +`CONFIG_FILE` can be any file in the `configs/experiments` directory. + +**Note:** The default eval batch size of 32 discards some samples during +validation. For accurate vaidation statistics, launch a dedicated eval job on +TPU `v3-8` and set batch size to 8. + +## Configurations + +In the following table, we report the Mask mAP of our models on the non-VOC +classes when only training with masks for the VOC calsses. Performance is +measured on the `coco-val2017` set. + +Backbone | Mask head | Config name | Mask mAP +:--------- | :----------- | :--------------------------------------- | -------: +ResNet-50 | Default | `deep_mask_head_rcnn_voc_r50.yaml` | 25.9 +ResNet-50 | Hourglass-52 | `deep_mask_head_rcnn_voc_r50_hg52.yaml` | 33.1 +ResNet-101 | Hourglass-52 | `deep_mask_head_rcnn_voc_r101_hg52.yaml` | 34.4 + +## See also + +* [DeepMAC model](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/deepmac.md) + in the Object Detection API code base. +* Project website - [git.io/deepmac](https://git.io/deepmac) + +## Citation + +``` +@misc{birodkar2021surprising, + title={The surprising impact of mask-head architecture on novel class segmentation}, + author={Vighnesh Birodkar and Zhichao Lu and Siyang Li and Vivek Rathod and Jonathan Huang}, + year={2021}, + eprint={2104.00613}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/official/vision/beta/projects/deepmac_maskrcnn/common/registry_imports.py b/official/vision/beta/projects/deepmac_maskrcnn/common/registry_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..0732d1a0be9d5728dc01f907db493c8ac1a3bd73 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/common/registry_imports.py @@ -0,0 +1,18 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Imports to configure Mask R-CNN with deep mask heads.""" + +# pylint: disable=unused-import +from official.vision.beta.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn diff --git a/official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn.py b/official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..4137b187579f839644eafec8e42633ad2f3236e8 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn.py @@ -0,0 +1,112 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Configuration for Mask R-CNN with deep mask heads.""" + +import os +from typing import Optional + +import dataclasses + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import optimization +from official.vision.beta.configs import maskrcnn as maskrcnn_config +from official.vision.beta.configs import retinanet as retinanet_config + + +@dataclasses.dataclass +class DeepMaskHead(maskrcnn_config.MaskHead): + convnet_variant: str = 'default' + + +@dataclasses.dataclass +class DeepMaskHeadRCNN(maskrcnn_config.MaskRCNN): + mask_head: Optional[DeepMaskHead] = DeepMaskHead() + use_gt_boxes_for_masks: bool = False + + +@dataclasses.dataclass +class DeepMaskHeadRCNNTask(maskrcnn_config.MaskRCNNTask): + """Configuration for the deep mask head R-CNN task.""" + model: DeepMaskHeadRCNN = DeepMaskHeadRCNN() + + +@exp_factory.register_config_factory('deep_mask_head_rcnn_resnetfpn_coco') +def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig: + """COCO object detection with Mask R-CNN with deep mask heads.""" + global_batch_size = 64 + steps_per_epoch = int(retinanet_config.COCO_TRAIN_EXAMPLES / + global_batch_size) + coco_val_samples = 5000 + + config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=DeepMaskHeadRCNNTask( + init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', + init_checkpoint_modules='backbone', + annotation_file=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE, + 'instances_val2017.json'), + model=DeepMaskHeadRCNN( + num_classes=91, + input_size=[1024, 1024, 3], + include_mask=True), # pytype: disable=wrong-keyword-args + losses=maskrcnn_config.Losses(l2_weight_decay=0.00004), + train_data=maskrcnn_config.DataConfig( + input_path=os.path.join( + maskrcnn_config.COCO_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=global_batch_size, + parser=maskrcnn_config.Parser( + aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), + validation_data=maskrcnn_config.DataConfig( + input_path=os.path.join( + maskrcnn_config.COCO_INPUT_PATH_BASE, 'val*'), + is_training=False, + global_batch_size=8)), # pytype: disable=wrong-keyword-args + trainer=cfg.TrainerConfig( + train_steps=22500, + validation_steps=coco_val_samples // 8, + validation_interval=steps_per_epoch, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'sgd', + 'sgd': { + 'momentum': 0.9 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [15000, 20000], + 'values': [0.12, 0.012, 0.0012], + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + 'warmup_steps': 500, + 'warmup_learning_rate': 0.0067 + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + return config diff --git a/official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn_config_test.py b/official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn_config_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3ad0398da3a9a30e1dc1457327ee95cbd6c86588 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn_config_test.py @@ -0,0 +1,30 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Check that the config is set correctly.""" + +import tensorflow as tf + +from official.vision.beta.projects.deepmac_maskrcnn.configs import deep_mask_head_rcnn + + +class DeepMaskHeadRcnnConfigTest(tf.test.TestCase): + + def test_config(self): + config = deep_mask_head_rcnn.deep_mask_head_rcnn_resnetfpn_coco() + self.assertIsInstance(config.task, deep_mask_head_rcnn.DeepMaskHeadRCNNTask) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r101_hg52.yaml b/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r101_hg52.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fea1c3b5e9e3b7320ff55d7383812ed088a4cdd4 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r101_hg52.yaml @@ -0,0 +1,31 @@ +task: + # VOC class taken from + # models/official/vision/detection/utils/class_utils.py + allowed_mask_class_ids: [1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 63, 64, 67, 72] + per_category_metrics: true + model: + mask_head: + class_agnostic: true + convnet_variant: 'hourglass52' + num_filters: 64 + mask_roi_aligner: + crop_size: 32 + use_gt_boxes_for_masks: true + backbone: + type: 'resnet' + resnet: + model_id: 101 + init_checkpoint: 'gs://tf_model_garden/official/resnet101_imagenet/ckpt-62400' + train_data: + global_batch_size: 64 + validation_data: + global_batch_size: 32 + +trainer: + optimizer_config: + learning_rate: + stepwise: + boundaries: [50000, 65000] + type: 'stepwise' + train_steps: 70000 + validation_steps: 156 # 5000 / 32 diff --git a/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r50.yaml b/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4790a39729bbd934fc9445c788129cbed9d3c3b --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r50.yaml @@ -0,0 +1,27 @@ +task: + # VOC class taken from + # models/official/vision/detection/utils/class_utils.py + allowed_mask_class_ids: [1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 63, 64, 67, 72] + per_category_metrics: true + model: + mask_head: + class_agnostic: true + use_gt_boxes_for_masks: true + backbone: + type: 'resnet' + resnet: + model_id: 50 + init_checkpoint: 'gs://tf_model_garden/official/resnet50_imagenet/ckpt-28080' + train_data: + global_batch_size: 64 + validation_data: + global_batch_size: 32 + +trainer: + optimizer_config: + learning_rate: + stepwise: + boundaries: [50000, 65000] + type: 'stepwise' + train_steps: 70000 + validation_steps: 156 # 5000 / 32 diff --git a/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r50_hg52.yaml b/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r50_hg52.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdda1cbbdcbf99e88e2ea83524f98333bf4ca111 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/configs/experiments/deep_mask_head_rcnn_voc_r50_hg52.yaml @@ -0,0 +1,31 @@ +task: + # VOC class taken from + # models/official/vision/detection/utils/class_utils.py + allowed_mask_class_ids: [1, 2, 3, 4, 5, 6, 7, 9, 16, 17, 18, 19, 20, 21, 44, 62, 63, 64, 67, 72] + per_category_metrics: true + model: + mask_head: + class_agnostic: true + convnet_variant: 'hourglass52' + num_filters: 64 + mask_roi_aligner: + crop_size: 32 + use_gt_boxes_for_masks: true + backbone: + type: 'resnet' + resnet: + model_id: 50 + init_checkpoint: 'gs://tf_model_garden/official/resnet50_imagenet/ckpt-28080' + train_data: + global_batch_size: 64 + validation_data: + global_batch_size: 32 + +trainer: + optimizer_config: + learning_rate: + stepwise: + boundaries: [50000, 65000] + type: 'stepwise' + train_steps: 70000 + validation_steps: 156 # 5000 / 32 diff --git a/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/hourglass_network.py b/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/hourglass_network.py new file mode 100644 index 0000000000000000000000000000000000000000..8b73140457940d9a45c8f545f0bd085bda0daa8c --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/hourglass_network.py @@ -0,0 +1,637 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""The Hourglass[1] network. + +[1]: https://arxiv.org/abs/1603.06937 +""" + + +import tensorflow as tf + +BATCH_NORM_EPSILON = 1e-5 +BATCH_NORM_MOMENTUM = 0.1 +BATCH_NORM_FUSED = True + + +class IdentityLayer(tf.keras.layers.Layer): + """A layer which passes through the input as it is.""" + + def call(self, inputs): + return inputs + + +def _get_padding_for_kernel_size(kernel_size): + if kernel_size == 7: + return (3, 3) + elif kernel_size == 3: + return (1, 1) + else: + raise ValueError('Padding for kernel size {} not known.'.format( + kernel_size)) + + +def batchnorm(): + try: + return tf.keras.layers.experimental.SyncBatchNormalization( + name='batchnorm', epsilon=1e-5, momentum=0.1) + except AttributeError: + return tf.keras.layers.BatchNormalization( + name='batchnorm', epsilon=1e-5, momentum=0.1, fused=BATCH_NORM_FUSED) + + +class ConvolutionalBlock(tf.keras.layers.Layer): + """Block that aggregates Convolution + Norm layer + ReLU.""" + + def __init__(self, kernel_size, out_channels, stride=1, relu=True, + padding='same'): + """Initializes the Convolutional block. + + Args: + kernel_size: int, convolution kernel size. + out_channels: int, the desired number of output channels. + stride: Integer, stride used in the convolution. + relu: bool, whether to use relu at the end of the layer. + padding: str, the padding scheme to use when kernel_size <= 1 + """ + super(ConvolutionalBlock, self).__init__() + + if kernel_size > 1: + padding = 'valid' + padding_size = _get_padding_for_kernel_size(kernel_size) + + # TODO(vighneshb) Explore if removing and using padding option in conv + # layer works. + self.pad = tf.keras.layers.ZeroPadding2D(padding_size) + else: + self.pad = IdentityLayer() + + self.conv = tf.keras.layers.Conv2D( + filters=out_channels, kernel_size=kernel_size, use_bias=False, + strides=stride, padding=padding) + + self.norm = batchnorm() + + if relu: + self.relu = tf.keras.layers.ReLU() + else: + self.relu = IdentityLayer() + + def call(self, inputs): + net = self.pad(inputs) + net = self.conv(net) + net = self.norm(net) + return self.relu(net) + + +class SkipConvolution(ConvolutionalBlock): + """The skip connection layer for a ResNet.""" + + def __init__(self, out_channels, stride): + """Initializes the skip convolution layer. + + Args: + out_channels: int, the desired number of output channels. + stride: int, the stride for the layer. + """ + super(SkipConvolution, self).__init__( + out_channels=out_channels, kernel_size=1, stride=stride, relu=False) + + +class ResidualBlock(tf.keras.layers.Layer): + """A Residual block.""" + + def __init__(self, out_channels, skip_conv=False, kernel_size=3, stride=1, + padding='same'): + """Initializes the Residual block. + + Args: + out_channels: int, the desired number of output channels. + skip_conv: bool, whether to use a conv layer for skip connections. + kernel_size: int, convolution kernel size. + stride: Integer, stride used in the convolution. + padding: str, the type of padding to use. + """ + + super(ResidualBlock, self).__init__() + self.conv_block = ConvolutionalBlock( + kernel_size=kernel_size, out_channels=out_channels, stride=stride) + + self.conv = tf.keras.layers.Conv2D( + filters=out_channels, kernel_size=kernel_size, use_bias=False, + strides=1, padding=padding) + self.norm = batchnorm() + + if skip_conv: + self.skip = SkipConvolution(out_channels=out_channels, + stride=stride) + else: + self.skip = IdentityLayer() + + self.relu = tf.keras.layers.ReLU() + + def call(self, inputs): + net = self.conv_block(inputs) + net = self.conv(net) + net = self.norm(net) + net_skip = self.skip(inputs) + return self.relu(net + net_skip) + + +class InputDownsampleBlock(tf.keras.layers.Layer): + """Block for the initial feature downsampling.""" + + def __init__(self, out_channels_initial_conv, out_channels_residual_block): + """Initializes the downsample block. + + Args: + out_channels_initial_conv: int, the desired number of output channels + in the initial conv layer. + out_channels_residual_block: int, the desired number of output channels + in the underlying residual block. + """ + + super(InputDownsampleBlock, self).__init__() + self.conv_block = ConvolutionalBlock( + kernel_size=7, out_channels=out_channels_initial_conv, stride=2, + padding='valid') + self.residual_block = ResidualBlock( + out_channels=out_channels_residual_block, stride=2, skip_conv=True) + + def call(self, inputs): + return self.residual_block(self.conv_block(inputs)) + + +class InputConvBlock(tf.keras.layers.Layer): + """Block for the initial feature convolution. + + This block is used in the hourglass network when we don't want to downsample + the input. + """ + + def __init__(self, out_channels_initial_conv, out_channels_residual_block): + """Initializes the downsample block. + + Args: + out_channels_initial_conv: int, the desired number of output channels + in the initial conv layer. + out_channels_residual_block: int, the desired number of output channels + in the underlying residual block. + """ + + super(InputConvBlock, self).__init__() + + self.conv_block = ConvolutionalBlock( + kernel_size=3, out_channels=out_channels_initial_conv, stride=1, + padding='valid') + self.residual_block = ResidualBlock( + out_channels=out_channels_residual_block, stride=1, skip_conv=True) + + def call(self, inputs): + return self.residual_block(self.conv_block(inputs)) + + +def _make_repeated_residual_blocks(out_channels, num_blocks, + initial_stride=1, residual_channels=None, + initial_skip_conv=False): + """Stack Residual blocks one after the other. + + Args: + out_channels: int, the desired number of output channels. + num_blocks: int, the number of residual blocks to be stacked. + initial_stride: int, the stride of the initial residual block. + residual_channels: int, the desired number of output channels in the + intermediate residual blocks. If not specifed, we use out_channels. + initial_skip_conv: bool, if set, the first residual block uses a skip + convolution. This is useful when the number of channels in the input + are not the same as residual_channels. + + Returns: + blocks: A list of residual blocks to be applied in sequence. + + """ + + blocks = [] + + if residual_channels is None: + residual_channels = out_channels + + for i in range(num_blocks - 1): + # Only use the stride at the first block so we don't repeatedly downsample + # the input + stride = initial_stride if i == 0 else 1 + + # If the stide is more than 1, we cannot use an identity layer for the + # skip connection and are forced to use a conv for the skip connection. + skip_conv = stride > 1 + + if i == 0 and initial_skip_conv: + skip_conv = True + + blocks.append( + ResidualBlock(out_channels=residual_channels, stride=stride, + skip_conv=skip_conv) + ) + + if num_blocks == 1: + # If there is only 1 block, the for loop above is not run, + # therefore we honor the requested stride in the last residual block + stride = initial_stride + # We are forced to use a conv in the skip connection if stride > 1 + skip_conv = stride > 1 + else: + stride = 1 + skip_conv = residual_channels != out_channels + + blocks.append(ResidualBlock(out_channels=out_channels, skip_conv=skip_conv, + stride=stride)) + + return blocks + + +def _apply_blocks(inputs, blocks): + net = inputs + + for block in blocks: + net = block(net) + + return net + + +class EncoderDecoderBlock(tf.keras.layers.Layer): + """An encoder-decoder block which recursively defines the hourglass network.""" + + def __init__(self, num_stages, channel_dims, blocks_per_stage, + stagewise_downsample=True, encoder_decoder_shortcut=True): + """Initializes the encoder-decoder block. + + Args: + num_stages: int, Number of stages in the network. At each stage we have 2 + encoder and 1 decoder blocks. The second encoder block downsamples the + input. + channel_dims: int list, the output channels dimensions of stages in + the network. `channel_dims[0]` is used to define the number of + channels in the first encoder block and `channel_dims[1]` is used to + define the number of channels in the second encoder block. The channels + in the recursive inner layers are defined using `channel_dims[1:]` + blocks_per_stage: int list, number of residual blocks to use at each + stage. `blocks_per_stage[0]` defines the number of blocks at the + current stage and `blocks_per_stage[1:]` is used at further stages. + stagewise_downsample: bool, whether or not to downsample before passing + inputs to the next stage. + encoder_decoder_shortcut: bool, whether or not to use shortcut + connections between encoder and decoder. + """ + + super(EncoderDecoderBlock, self).__init__() + + out_channels = channel_dims[0] + out_channels_downsampled = channel_dims[1] + + self.encoder_decoder_shortcut = encoder_decoder_shortcut + + if encoder_decoder_shortcut: + self.merge_features = tf.keras.layers.Add() + self.encoder_block1 = _make_repeated_residual_blocks( + out_channels=out_channels, num_blocks=blocks_per_stage[0], + initial_stride=1) + + initial_stride = 2 if stagewise_downsample else 1 + self.encoder_block2 = _make_repeated_residual_blocks( + out_channels=out_channels_downsampled, + num_blocks=blocks_per_stage[0], initial_stride=initial_stride, + initial_skip_conv=out_channels != out_channels_downsampled) + + if num_stages > 1: + self.inner_block = [ + EncoderDecoderBlock(num_stages - 1, channel_dims[1:], + blocks_per_stage[1:], + stagewise_downsample=stagewise_downsample, + encoder_decoder_shortcut=encoder_decoder_shortcut) + ] + else: + self.inner_block = _make_repeated_residual_blocks( + out_channels=out_channels_downsampled, + num_blocks=blocks_per_stage[1]) + + self.decoder_block = _make_repeated_residual_blocks( + residual_channels=out_channels_downsampled, + out_channels=out_channels, num_blocks=blocks_per_stage[0]) + + self.upsample = tf.keras.layers.UpSampling2D(initial_stride) + + def call(self, inputs): + + if self.encoder_decoder_shortcut: + encoded_outputs = _apply_blocks(inputs, self.encoder_block1) + encoded_downsampled_outputs = _apply_blocks(inputs, self.encoder_block2) + inner_block_outputs = _apply_blocks( + encoded_downsampled_outputs, self.inner_block) + + decoded_outputs = _apply_blocks(inner_block_outputs, self.decoder_block) + upsampled_outputs = self.upsample(decoded_outputs) + + if self.encoder_decoder_shortcut: + return self.merge_features([encoded_outputs, upsampled_outputs]) + else: + return upsampled_outputs + + +class HourglassNetwork(tf.keras.Model): + """The hourglass network.""" + + def __init__(self, num_stages, input_channel_dims, channel_dims_per_stage, + blocks_per_stage, num_hourglasses, initial_downsample=True, + stagewise_downsample=True, encoder_decoder_shortcut=True): + """Intializes the feature extractor. + + Args: + num_stages: int, Number of stages in the network. At each stage we have 2 + encoder and 1 decoder blocks. The second encoder block downsamples the + input. + input_channel_dims: int, the number of channels in the input conv blocks. + channel_dims_per_stage: int list, the output channel dimensions of each + stage in the hourglass network. + blocks_per_stage: int list, number of residual blocks to use at each + stage in the hourglass network + num_hourglasses: int, number of hourglas networks to stack + sequentially. + initial_downsample: bool, if set, downsamples the input by a factor of 4 + before applying the rest of the network. Downsampling is done with a 7x7 + convolution kernel, otherwise a 3x3 kernel is used. + stagewise_downsample: bool, whether or not to downsample before passing + inputs to the next stage. + encoder_decoder_shortcut: bool, whether or not to use shortcut + connections between encoder and decoder. + """ + + super(HourglassNetwork, self).__init__() + + self.num_hourglasses = num_hourglasses + self.initial_downsample = initial_downsample + if initial_downsample: + self.downsample_input = InputDownsampleBlock( + out_channels_initial_conv=input_channel_dims, + out_channels_residual_block=channel_dims_per_stage[0] + ) + else: + self.conv_input = InputConvBlock( + out_channels_initial_conv=input_channel_dims, + out_channels_residual_block=channel_dims_per_stage[0] + ) + + self.hourglass_network = [] + self.output_conv = [] + for _ in range(self.num_hourglasses): + self.hourglass_network.append( + EncoderDecoderBlock( + num_stages=num_stages, channel_dims=channel_dims_per_stage, + blocks_per_stage=blocks_per_stage, + stagewise_downsample=stagewise_downsample, + encoder_decoder_shortcut=encoder_decoder_shortcut) + ) + self.output_conv.append( + ConvolutionalBlock(kernel_size=3, + out_channels=channel_dims_per_stage[0]) + ) + + self.intermediate_conv1 = [] + self.intermediate_conv2 = [] + self.intermediate_residual = [] + + for _ in range(self.num_hourglasses - 1): + self.intermediate_conv1.append( + ConvolutionalBlock( + kernel_size=1, out_channels=channel_dims_per_stage[0], relu=False) + ) + self.intermediate_conv2.append( + ConvolutionalBlock( + kernel_size=1, out_channels=channel_dims_per_stage[0], relu=False) + ) + self.intermediate_residual.append( + ResidualBlock(out_channels=channel_dims_per_stage[0]) + ) + + self.intermediate_relu = tf.keras.layers.ReLU() + + def call(self, inputs): + + if self.initial_downsample: + inputs = self.downsample_input(inputs) + else: + inputs = self.conv_input(inputs) + + outputs = [] + + for i in range(self.num_hourglasses): + + hourglass_output = self.hourglass_network[i](inputs) + + output = self.output_conv[i](hourglass_output) + outputs.append(output) + + if i < self.num_hourglasses - 1: + secondary_output = (self.intermediate_conv1[i](inputs) + + self.intermediate_conv2[i](output)) + secondary_output = self.intermediate_relu(secondary_output) + inputs = self.intermediate_residual[i](secondary_output) + + return outputs + + @property + def out_stride(self): + """The stride in the output image of the network.""" + return 4 + + @property + def num_feature_outputs(self): + """Ther number of feature outputs returned by the feature extractor.""" + return self.num_hourglasses + + +def _layer_depth(layer): + """Compute depth of Conv/Residual blocks or lists of them.""" + + if isinstance(layer, list): + return sum([_layer_depth(l) for l in layer]) + + elif isinstance(layer, ConvolutionalBlock): + return 1 + + elif isinstance(layer, ResidualBlock): + return 2 + + else: + raise ValueError('Unknown layer - {}'.format(layer)) + + +def _encoder_decoder_depth(network): + """Helper function to compute depth of encoder-decoder blocks.""" + + encoder_block2_layers = _layer_depth(network.encoder_block2) + decoder_block_layers = _layer_depth(network.decoder_block) + + if isinstance(network.inner_block[0], EncoderDecoderBlock): + + assert len(network.inner_block) == 1, 'Inner block is expected as length 1.' + inner_block_layers = _encoder_decoder_depth(network.inner_block[0]) + + return inner_block_layers + encoder_block2_layers + decoder_block_layers + + elif isinstance(network.inner_block[0], ResidualBlock): + return (encoder_block2_layers + decoder_block_layers + + _layer_depth(network.inner_block)) + + else: + raise ValueError('Unknown inner block type.') + + +def hourglass_depth(network): + """Helper function to verify depth of hourglass backbone.""" + + input_conv_layers = 3 # 1 ResidualBlock and 1 ConvBlock + + # Only intermediate_conv2 and intermediate_residual are applied before + # sending inputs to the later stages. + intermediate_layers = ( + _layer_depth(network.intermediate_conv2) + + _layer_depth(network.intermediate_residual) + ) + + # network.output_conv is applied before sending input to the later stages + output_layers = _layer_depth(network.output_conv) + + encoder_decoder_layers = sum(_encoder_decoder_depth(net) for net in + network.hourglass_network) + + return (input_conv_layers + encoder_decoder_layers + intermediate_layers + + output_layers) + + +def hourglass_104(): + """The Hourglass-104 backbone. + + The architecture parameters are taken from [1]. + + Returns: + network: An HourglassNetwork object implementing the Hourglass-104 + backbone. + + [1]: https://arxiv.org/abs/1904.07850 + """ + + return HourglassNetwork( + input_channel_dims=128, + channel_dims_per_stage=[256, 256, 384, 384, 384, 512], + num_hourglasses=2, + num_stages=5, + blocks_per_stage=[2, 2, 2, 2, 2, 4], + ) + + +def single_stage_hourglass(input_channel_dims, channel_dims_per_stage, + blocks_per_stage, initial_downsample=True, + stagewise_downsample=True, + encoder_decoder_shortcut=True): + assert len(channel_dims_per_stage) == len(blocks_per_stage) + + return HourglassNetwork( + input_channel_dims=input_channel_dims, + channel_dims_per_stage=channel_dims_per_stage, + num_hourglasses=1, + num_stages=len(channel_dims_per_stage) - 1, + blocks_per_stage=blocks_per_stage, + initial_downsample=initial_downsample, + stagewise_downsample=stagewise_downsample, + encoder_decoder_shortcut=encoder_decoder_shortcut + ) + + +def hourglass_10(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[1, 1], + channel_dims_per_stage=[nc * 2, nc * 2]) + + +def hourglass_20(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[1, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3]) + + +def hourglass_32(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[2, 2, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3, nc * 3]) + + +def hourglass_52(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[2, 2, 2, 2, 2, 4], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3, nc * 3, nc * 3, nc*4]) + + +def hourglass_100(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[4, 4, 4, 4, 4, 8], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3, nc * 3, nc * 3, nc*4]) + + +def hourglass_20_uniform_size(num_channels): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + blocks_per_stage=[1, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3], + initial_downsample=False, + stagewise_downsample=False) + + +def hourglass_20_no_shortcut(num_channels): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + blocks_per_stage=[1, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3], + initial_downsample=False, + encoder_decoder_shortcut=False) diff --git a/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/instance_heads.py b/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/instance_heads.py new file mode 100644 index 0000000000000000000000000000000000000000..4b39cc321ed7d911372c4213782ca0c48a8891b2 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/instance_heads.py @@ -0,0 +1,310 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Instance prediction heads.""" + +# Import libraries + +from absl import logging +import tensorflow as tf + +from official.modeling import tf_utils +from official.vision.beta.projects.deepmac_maskrcnn.modeling.heads import hourglass_network + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class DeepMaskHead(tf.keras.layers.Layer): + """Creates a mask head.""" + + def __init__(self, + num_classes, + upsample_factor=2, + num_convs=4, + num_filters=256, + use_separable_conv=False, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + class_agnostic=False, + convnet_variant='default', + **kwargs): + """Initializes a mask head. + + Args: + num_classes: An `int` of the number of classes. + upsample_factor: An `int` that indicates the upsample factor to generate + the final predicted masks. It should be >= 1. + num_convs: An `int` number that represents the number of the intermediate + convolution layers before the mask prediction layers. + num_filters: An `int` number that represents the number of filters of the + intermediate convolution layers. + use_separable_conv: A `bool` that indicates whether the separable + convolution layers is used. + activation: A `str` that indicates which activation is used, e.g. 'relu', + 'swish', etc. + use_sync_bn: A `bool` that indicates whether to use synchronized batch + normalization across different replicas. + norm_momentum: A `float` of normalization momentum for the moving average. + norm_epsilon: A `float` added to variance to avoid dividing by zero. + kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for + Conv2D. Default is None. + bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D. + class_agnostic: A `bool`. If set, we use a single channel mask head that + is shared between all classes. + convnet_variant: A `str` denoting the architecture of network used in the + head. Supported options are 'default', 'hourglass20', 'hourglass52' + and 'hourglass100'. + **kwargs: Additional keyword arguments to be passed. + """ + super(DeepMaskHead, self).__init__(**kwargs) + self._config_dict = { + 'num_classes': num_classes, + 'upsample_factor': upsample_factor, + 'num_convs': num_convs, + 'num_filters': num_filters, + 'use_separable_conv': use_separable_conv, + 'activation': activation, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'kernel_regularizer': kernel_regularizer, + 'bias_regularizer': bias_regularizer, + 'class_agnostic': class_agnostic, + 'convnet_variant': convnet_variant, + } + + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + self._activation = tf_utils.get_activation(activation) + + def _get_conv_op_and_kwargs(self): + conv_op = (tf.keras.layers.SeparableConv2D + if self._config_dict['use_separable_conv'] + else tf.keras.layers.Conv2D) + conv_kwargs = { + 'filters': self._config_dict['num_filters'], + 'kernel_size': 3, + 'padding': 'same', + } + if self._config_dict['use_separable_conv']: + conv_kwargs.update({ + 'depthwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'pointwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'depthwise_regularizer': self._config_dict['kernel_regularizer'], + 'pointwise_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + else: + conv_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + + return conv_op, conv_kwargs + + def _get_bn_op_and_kwargs(self): + + bn_op = (tf.keras.layers.experimental.SyncBatchNormalization + if self._config_dict['use_sync_bn'] + else tf.keras.layers.BatchNormalization) + bn_kwargs = { + 'axis': self._bn_axis, + 'momentum': self._config_dict['norm_momentum'], + 'epsilon': self._config_dict['norm_epsilon'], + } + + return bn_op, bn_kwargs + + def build(self, input_shape): + """Creates the variables of the head.""" + + conv_op, conv_kwargs = self._get_conv_op_and_kwargs() + + self._build_convnet_variant() + + self._deconv = tf.keras.layers.Conv2DTranspose( + filters=self._config_dict['num_filters'], + kernel_size=self._config_dict['upsample_factor'], + strides=self._config_dict['upsample_factor'], + padding='valid', + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + bias_initializer=tf.zeros_initializer(), + kernel_regularizer=self._config_dict['kernel_regularizer'], + bias_regularizer=self._config_dict['bias_regularizer'], + name='mask-upsampling') + + bn_op, bn_kwargs = self._get_bn_op_and_kwargs() + self._deconv_bn = bn_op(name='mask-deconv-bn', **bn_kwargs) + + if self._config_dict['class_agnostic']: + num_filters = 1 + else: + num_filters = self._config_dict['num_classes'] + + conv_kwargs = { + 'filters': num_filters, + 'kernel_size': 1, + 'padding': 'valid', + } + if self._config_dict['use_separable_conv']: + conv_kwargs.update({ + 'depthwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'pointwise_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'depthwise_regularizer': self._config_dict['kernel_regularizer'], + 'pointwise_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + else: + conv_kwargs.update({ + 'kernel_initializer': tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + 'bias_initializer': tf.zeros_initializer(), + 'kernel_regularizer': self._config_dict['kernel_regularizer'], + 'bias_regularizer': self._config_dict['bias_regularizer'], + }) + self._mask_regressor = conv_op(name='mask-logits', **conv_kwargs) + + super(DeepMaskHead, self).build(input_shape) + + def call(self, inputs, training=None): + """Forward pass of mask branch for the Mask-RCNN model. + + Args: + inputs: A `list` of two tensors where + inputs[0]: A `tf.Tensor` of shape [batch_size, num_instances, + roi_height, roi_width, roi_channels], representing the ROI features. + inputs[1]: A `tf.Tensor` of shape [batch_size, num_instances], + representing the classes of the ROIs. + training: A `bool` indicating whether it is in `training` mode. + + Returns: + mask_outputs: A `tf.Tensor` of shape + [batch_size, num_instances, roi_height * upsample_factor, + roi_width * upsample_factor], representing the mask predictions. + """ + roi_features, roi_classes = inputs + batch_size, num_rois, height, width, filters = ( + roi_features.get_shape().as_list()) + if batch_size is None: + batch_size = tf.shape(roi_features)[0] + + x = tf.reshape(roi_features, [-1, height, width, filters]) + + x = self._call_convnet_variant(x) + + x = self._deconv(x) + x = self._deconv_bn(x) + x = self._activation(x) + + logits = self._mask_regressor(x) + + mask_height = height * self._config_dict['upsample_factor'] + mask_width = width * self._config_dict['upsample_factor'] + + if self._config_dict['class_agnostic']: + logits = tf.reshape(logits, [-1, num_rois, mask_height, mask_width, 1]) + else: + logits = tf.reshape( + logits, + [-1, num_rois, mask_height, mask_width, + self._config_dict['num_classes']]) + + batch_indices = tf.tile( + tf.expand_dims(tf.range(batch_size), axis=1), [1, num_rois]) + mask_indices = tf.tile( + tf.expand_dims(tf.range(num_rois), axis=0), [batch_size, 1]) + + if self._config_dict['class_agnostic']: + class_gather_indices = tf.zeros_like(roi_classes, dtype=tf.int32) + else: + class_gather_indices = tf.cast(roi_classes, dtype=tf.int32) + + gather_indices = tf.stack( + [batch_indices, mask_indices, class_gather_indices], + axis=2) + mask_outputs = tf.gather_nd( + tf.transpose(logits, [0, 1, 4, 2, 3]), gather_indices) + return mask_outputs + + def _build_convnet_variant(self): + + variant = self._config_dict['convnet_variant'] + if variant == 'default': + conv_op, conv_kwargs = self._get_conv_op_and_kwargs() + bn_op, bn_kwargs = self._get_bn_op_and_kwargs() + self._convs = [] + self._conv_norms = [] + for i in range(self._config_dict['num_convs']): + conv_name = 'mask-conv_{}'.format(i) + self._convs.append(conv_op(name=conv_name, **conv_kwargs)) + bn_name = 'mask-conv-bn_{}'.format(i) + self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs)) + + elif variant == 'hourglass20': + logging.info('Using hourglass 20 network.') + self._hourglass = hourglass_network.hourglass_20( + self._config_dict['num_filters'], initial_downsample=False) + + elif variant == 'hourglass52': + logging.info('Using hourglass 52 network.') + self._hourglass = hourglass_network.hourglass_52( + self._config_dict['num_filters'], initial_downsample=False) + + elif variant == 'hourglass100': + logging.info('Using hourglass 100 network.') + self._hourglass = hourglass_network.hourglass_100( + self._config_dict['num_filters'], initial_downsample=False) + + else: + raise ValueError('Unknown ConvNet variant - {}'.format(variant)) + + def _call_convnet_variant(self, x): + + variant = self._config_dict['convnet_variant'] + if variant == 'default': + for conv, bn in zip(self._convs, self._conv_norms): + x = conv(x) + x = bn(x) + x = self._activation(x) + return x + elif variant == 'hourglass20': + return self._hourglass(x)[-1] + elif variant == 'hourglass52': + return self._hourglass(x)[-1] + elif variant == 'hourglass100': + return self._hourglass(x)[-1] + else: + raise ValueError('Unknown ConvNet variant - {}'.format(variant)) + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/instance_heads_test.py b/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/instance_heads_test.py new file mode 100644 index 0000000000000000000000000000000000000000..95947238f966cecc61c19ed997ddd282beca4914 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/modeling/heads/instance_heads_test.py @@ -0,0 +1,99 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for instance_heads.py.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.deepmac_maskrcnn.modeling.heads import instance_heads as deep_instance_heads + + +class MaskHeadTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (1, 1, False), + (1, 2, False), + (2, 1, False), + (2, 2, False), + ) + def test_forward(self, upsample_factor, num_convs, use_sync_bn): + mask_head = deep_instance_heads.DeepMaskHead( + num_classes=3, + upsample_factor=upsample_factor, + num_convs=num_convs, + num_filters=16, + use_separable_conv=False, + activation='relu', + use_sync_bn=use_sync_bn, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + roi_features = np.random.rand(2, 10, 14, 14, 16) + roi_classes = np.zeros((2, 10)) + masks = mask_head([roi_features, roi_classes]) + self.assertAllEqual( + masks.numpy().shape, + [2, 10, 14 * upsample_factor, 14 * upsample_factor]) + + def test_serialize_deserialize(self): + mask_head = deep_instance_heads.DeepMaskHead( + num_classes=3, + upsample_factor=2, + num_convs=1, + num_filters=256, + use_separable_conv=False, + activation='relu', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_regularizer=None, + bias_regularizer=None, + ) + config = mask_head.get_config() + new_mask_head = deep_instance_heads.DeepMaskHead.from_config(config) + self.assertAllEqual( + mask_head.get_config(), new_mask_head.get_config()) + + def test_forward_class_agnostic(self): + mask_head = deep_instance_heads.DeepMaskHead( + num_classes=3, + class_agnostic=True + ) + roi_features = np.random.rand(2, 10, 14, 14, 16) + roi_classes = np.zeros((2, 10)) + masks = mask_head([roi_features, roi_classes]) + self.assertAllEqual(masks.numpy().shape, [2, 10, 28, 28]) + + def test_instance_head_hourglass(self): + mask_head = deep_instance_heads.DeepMaskHead( + num_classes=3, + class_agnostic=True, + convnet_variant='hourglass20', + num_filters=32, + upsample_factor=2 + ) + roi_features = np.random.rand(2, 10, 16, 16, 16) + roi_classes = np.zeros((2, 10)) + masks = mask_head([roi_features, roi_classes]) + self.assertAllEqual(masks.numpy().shape, [2, 10, 32, 32]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model.py b/official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c49e6d5120350b7c725e2a1a2c7be8a25db39787 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model.py @@ -0,0 +1,242 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mask R-CNN model.""" + +# Import libraries + +from absl import logging +import tensorflow as tf + +from official.vision.beta.ops import box_ops + + +def resize_as(source, size): + + source = tf.transpose(source, (0, 2, 3, 1)) + source = tf.image.resize(source, (size, size)) + return tf.transpose(source, (0, 3, 1, 2)) + + +@tf.keras.utils.register_keras_serializable(package='Vision') +class DeepMaskRCNNModel(tf.keras.Model): + """The Mask R-CNN model.""" + + def __init__(self, + backbone, + decoder, + rpn_head, + detection_head, + roi_generator, + roi_sampler, + roi_aligner, + detection_generator, + mask_head=None, + mask_sampler=None, + mask_roi_aligner=None, + use_gt_boxes_for_masks=False, + **kwargs): + """Initializes the Mask R-CNN model. + + Args: + backbone: `tf.keras.Model`, the backbone network. + decoder: `tf.keras.Model`, the decoder network. + rpn_head: the RPN head. + detection_head: the detection head. + roi_generator: the ROI generator. + roi_sampler: the ROI sampler. + roi_aligner: the ROI aligner. + detection_generator: the detection generator. + mask_head: the mask head. + mask_sampler: the mask sampler. + mask_roi_aligner: the ROI alginer for mask prediction. + use_gt_boxes_for_masks: bool, if set, crop using groundtruth boxes + instead of proposals for training mask head + **kwargs: keyword arguments to be passed. + """ + super(DeepMaskRCNNModel, self).__init__(**kwargs) + self._config_dict = { + 'backbone': backbone, + 'decoder': decoder, + 'rpn_head': rpn_head, + 'detection_head': detection_head, + 'roi_generator': roi_generator, + 'roi_sampler': roi_sampler, + 'roi_aligner': roi_aligner, + 'detection_generator': detection_generator, + 'mask_head': mask_head, + 'mask_sampler': mask_sampler, + 'mask_roi_aligner': mask_roi_aligner, + 'use_gt_boxes_for_masks': use_gt_boxes_for_masks + } + self.backbone = backbone + self.decoder = decoder + self.rpn_head = rpn_head + self.detection_head = detection_head + self.roi_generator = roi_generator + self.roi_sampler = roi_sampler + self.roi_aligner = roi_aligner + self.detection_generator = detection_generator + self._include_mask = mask_head is not None + self.mask_head = mask_head + if self._include_mask and mask_sampler is None: + raise ValueError('`mask_sampler` is not provided in Mask R-CNN.') + self.mask_sampler = mask_sampler + if self._include_mask and mask_roi_aligner is None: + raise ValueError('`mask_roi_aligner` is not provided in Mask R-CNN.') + self.mask_roi_aligner = mask_roi_aligner + + def call(self, + images, + image_shape, + anchor_boxes=None, + gt_boxes=None, + gt_classes=None, + gt_masks=None, + training=None): + model_outputs = {} + + # Feature extraction. + features = self.backbone(images) + if self.decoder: + features = self.decoder(features) + + # Region proposal network. + rpn_scores, rpn_boxes = self.rpn_head(features) + + model_outputs.update({ + 'rpn_boxes': rpn_boxes, + 'rpn_scores': rpn_scores + }) + + # Generate RoIs. + rois, _ = self.roi_generator( + rpn_boxes, rpn_scores, anchor_boxes, image_shape, training) + + if training: + rois = tf.stop_gradient(rois) + + rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices = ( + self.roi_sampler(rois, gt_boxes, gt_classes)) + # Assign target for the 2nd stage classification. + box_targets = box_ops.encode_boxes( + matched_gt_boxes, rois, weights=[10.0, 10.0, 5.0, 5.0]) + # If the target is background, the box target is set to all 0s. + box_targets = tf.where( + tf.tile( + tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1), + [1, 1, 4]), + tf.zeros_like(box_targets), + box_targets) + model_outputs.update({ + 'class_targets': matched_gt_classes, + 'box_targets': box_targets, + }) + + # RoI align. + roi_features = self.roi_aligner(features, rois) + + # Detection head. + raw_scores, raw_boxes = self.detection_head(roi_features) + + if training: + model_outputs.update({ + 'class_outputs': raw_scores, + 'box_outputs': raw_boxes, + }) + else: + # Post-processing. + detections = self.detection_generator( + raw_boxes, raw_scores, rois, image_shape) + model_outputs.update({ + 'detection_boxes': detections['detection_boxes'], + 'detection_scores': detections['detection_scores'], + 'detection_classes': detections['detection_classes'], + 'num_detections': detections['num_detections'], + }) + + if not self._include_mask: + return model_outputs + + if training: + if self._config_dict['use_gt_boxes_for_masks']: + mask_size = ( + self.mask_roi_aligner._config_dict['crop_size'] * # pylint:disable=protected-access + self.mask_head._config_dict['upsample_factor'] # pylint:disable=protected-access + ) + gt_masks = resize_as(source=gt_masks, size=mask_size) + + logging.info('Using GT class and mask targets.') + model_outputs.update({ + 'mask_class_targets': gt_classes, + 'mask_targets': gt_masks, + }) + else: + rois, roi_classes, roi_masks = self.mask_sampler( + rois, + matched_gt_boxes, + matched_gt_classes, + matched_gt_indices, + gt_masks) + roi_masks = tf.stop_gradient(roi_masks) + model_outputs.update({ + 'mask_class_targets': roi_classes, + 'mask_targets': roi_masks, + }) + + else: + rois = model_outputs['detection_boxes'] + roi_classes = model_outputs['detection_classes'] + + # Mask RoI align. + if training and self._config_dict['use_gt_boxes_for_masks']: + logging.info('Using GT mask roi features.') + mask_roi_features = self.mask_roi_aligner(features, gt_boxes) + raw_masks = self.mask_head([mask_roi_features, gt_classes]) + + else: + mask_roi_features = self.mask_roi_aligner(features, rois) + raw_masks = self.mask_head([mask_roi_features, roi_classes]) + + # Mask head. + if training: + model_outputs.update({ + 'mask_outputs': raw_masks, + }) + else: + model_outputs.update({ + 'detection_masks': tf.math.sigmoid(raw_masks), + }) + return model_outputs + + @property + def checkpoint_items(self): + """Returns a dictionary of items to be additionally checkpointed.""" + items = dict( + backbone=self.backbone, + rpn_head=self.rpn_head, + detection_head=self.detection_head) + if self.decoder is not None: + items.update(decoder=self.decoder) + if self._include_mask: + items.update(mask_head=self.mask_head) + + return items + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model_test.py b/official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..206b80e882bd91724179a9e1b31dd856e6bc1c19 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/modeling/maskrcnn_model_test.py @@ -0,0 +1,131 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for maskrcnn_model.py.""" + +# Import libraries + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling.backbones import resnet +from official.vision.beta.modeling.decoders import fpn +from official.vision.beta.modeling.heads import dense_prediction_heads +from official.vision.beta.modeling.heads import instance_heads +from official.vision.beta.modeling.layers import detection_generator +from official.vision.beta.modeling.layers import mask_sampler +from official.vision.beta.modeling.layers import roi_aligner +from official.vision.beta.modeling.layers import roi_generator +from official.vision.beta.modeling.layers import roi_sampler +from official.vision.beta.ops import anchor +from official.vision.beta.projects.deepmac_maskrcnn.modeling import maskrcnn_model +from official.vision.beta.projects.deepmac_maskrcnn.modeling.heads import instance_heads as deep_instance_heads + + +class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (False, False,), + (False, True,), + (True, False,), + (True, True,), + ) + def test_forward(self, use_gt_boxes_for_masks, training): + + num_classes = 3 + min_level = 3 + max_level = 4 + num_scales = 3 + aspect_ratios = [1.0] + image_size = (256, 256) + images = np.random.rand(2, image_size[0], image_size[1], 3) + image_shape = np.array([[224, 100], [100, 224]]) + anchor_boxes = anchor.Anchor( + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=3, + image_size=image_size).multilevel_boxes + num_anchors_per_location = len(aspect_ratios) * num_scales + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) + backbone = resnet.ResNet(model_id=50, input_specs=input_specs) + decoder = fpn.FPN( + min_level=min_level, + max_level=max_level, + input_specs=backbone.output_specs) + rpn_head = dense_prediction_heads.RPNHead( + min_level=min_level, + max_level=max_level, + num_anchors_per_location=num_anchors_per_location) + detection_head = instance_heads.DetectionHead( + num_classes=num_classes) + roi_generator_obj = roi_generator.MultilevelROIGenerator() + roi_sampler_obj = roi_sampler.ROISampler() + roi_aligner_obj = roi_aligner.MultilevelROIAligner() + detection_generator_obj = detection_generator.DetectionGenerator() + mask_head = deep_instance_heads.DeepMaskHead( + num_classes=num_classes, upsample_factor=2) + mask_sampler_obj = mask_sampler.MaskSampler( + mask_target_size=28, num_sampled_masks=1) + mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) + + model = maskrcnn_model.DeepMaskRCNNModel( + backbone, + decoder, + rpn_head, + detection_head, + roi_generator_obj, + roi_sampler_obj, + roi_aligner_obj, + detection_generator_obj, + mask_head, + mask_sampler_obj, + mask_roi_aligner_obj, + use_gt_boxes_for_masks=use_gt_boxes_for_masks) + + gt_boxes = tf.zeros((2, 16, 4), dtype=tf.float32) + gt_masks = tf.zeros((2, 16, 32, 32)) + gt_classes = tf.zeros((2, 16), dtype=tf.int32) + results = model(images, + image_shape, + anchor_boxes, + gt_boxes, + gt_classes, + gt_masks, + training=training) + + self.assertIn('rpn_boxes', results) + self.assertIn('rpn_scores', results) + if training: + self.assertIn('class_targets', results) + self.assertIn('box_targets', results) + self.assertIn('class_outputs', results) + self.assertIn('box_outputs', results) + self.assertIn('mask_outputs', results) + self.assertEqual(results['mask_targets'].shape, + results['mask_outputs'].shape) + else: + self.assertIn('detection_boxes', results) + self.assertIn('detection_scores', results) + self.assertIn('detection_classes', results) + self.assertIn('num_detections', results) + self.assertIn('detection_masks', results) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/deepmac_maskrcnn/tasks/deep_mask_head_rcnn.py b/official/vision/beta/projects/deepmac_maskrcnn/tasks/deep_mask_head_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..514292d7aaa1b4a51af6cd6af5e201e5d7cf9f90 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/tasks/deep_mask_head_rcnn.py @@ -0,0 +1,189 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Mask R-CNN variant with support for deep mask heads.""" + +import tensorflow as tf + +from official.core import task_factory +from official.vision.beta.modeling import backbones +from official.vision.beta.modeling.decoders import factory as decoder_factory +from official.vision.beta.modeling.heads import dense_prediction_heads +from official.vision.beta.modeling.heads import instance_heads +from official.vision.beta.modeling.layers import detection_generator +from official.vision.beta.modeling.layers import mask_sampler +from official.vision.beta.modeling.layers import roi_aligner +from official.vision.beta.modeling.layers import roi_generator +from official.vision.beta.modeling.layers import roi_sampler +from official.vision.beta.projects.deepmac_maskrcnn.configs import deep_mask_head_rcnn as deep_mask_head_rcnn_config +from official.vision.beta.projects.deepmac_maskrcnn.modeling import maskrcnn_model as deep_maskrcnn_model +from official.vision.beta.projects.deepmac_maskrcnn.modeling.heads import instance_heads as deep_instance_heads +from official.vision.beta.tasks import maskrcnn + + +# Taken from modeling/factory.py +def build_maskrcnn(input_specs: tf.keras.layers.InputSpec, + model_config: deep_mask_head_rcnn_config.DeepMaskHeadRCNN, + l2_regularizer: tf.keras.regularizers.Regularizer = None): + """Builds Mask R-CNN model.""" + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + decoder = decoder_factory.build_decoder( + input_specs=backbone.output_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + rpn_head_config = model_config.rpn_head + roi_generator_config = model_config.roi_generator + roi_sampler_config = model_config.roi_sampler + roi_aligner_config = model_config.roi_aligner + detection_head_config = model_config.detection_head + generator_config = model_config.detection_generator + norm_activation_config = model_config.norm_activation + num_anchors_per_location = ( + len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales) + + rpn_head = dense_prediction_heads.RPNHead( + min_level=model_config.min_level, + max_level=model_config.max_level, + num_anchors_per_location=num_anchors_per_location, + num_convs=rpn_head_config.num_convs, + num_filters=rpn_head_config.num_filters, + use_separable_conv=rpn_head_config.use_separable_conv, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + detection_head = instance_heads.DetectionHead( + num_classes=model_config.num_classes, + num_convs=detection_head_config.num_convs, + num_filters=detection_head_config.num_filters, + use_separable_conv=detection_head_config.use_separable_conv, + num_fcs=detection_head_config.num_fcs, + fc_dims=detection_head_config.fc_dims, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + + roi_generator_obj = roi_generator.MultilevelROIGenerator( + pre_nms_top_k=roi_generator_config.pre_nms_top_k, + pre_nms_score_threshold=roi_generator_config.pre_nms_score_threshold, + pre_nms_min_size_threshold=( + roi_generator_config.pre_nms_min_size_threshold), + nms_iou_threshold=roi_generator_config.nms_iou_threshold, + num_proposals=roi_generator_config.num_proposals, + test_pre_nms_top_k=roi_generator_config.test_pre_nms_top_k, + test_pre_nms_score_threshold=( + roi_generator_config.test_pre_nms_score_threshold), + test_pre_nms_min_size_threshold=( + roi_generator_config.test_pre_nms_min_size_threshold), + test_nms_iou_threshold=roi_generator_config.test_nms_iou_threshold, + test_num_proposals=roi_generator_config.test_num_proposals, + use_batched_nms=roi_generator_config.use_batched_nms) + + roi_sampler_obj = roi_sampler.ROISampler( + mix_gt_boxes=roi_sampler_config.mix_gt_boxes, + num_sampled_rois=roi_sampler_config.num_sampled_rois, + foreground_fraction=roi_sampler_config.foreground_fraction, + foreground_iou_threshold=roi_sampler_config.foreground_iou_threshold, + background_iou_high_threshold=( + roi_sampler_config.background_iou_high_threshold), + background_iou_low_threshold=( + roi_sampler_config.background_iou_low_threshold)) + + roi_aligner_obj = roi_aligner.MultilevelROIAligner( + crop_size=roi_aligner_config.crop_size, + sample_offset=roi_aligner_config.sample_offset) + + detection_generator_obj = detection_generator.DetectionGenerator( + apply_nms=True, + pre_nms_top_k=generator_config.pre_nms_top_k, + pre_nms_score_threshold=generator_config.pre_nms_score_threshold, + nms_iou_threshold=generator_config.nms_iou_threshold, + max_num_detections=generator_config.max_num_detections, + use_batched_nms=generator_config.use_batched_nms) + + if model_config.include_mask: + mask_head = deep_instance_heads.DeepMaskHead( + num_classes=model_config.num_classes, + upsample_factor=model_config.mask_head.upsample_factor, + num_convs=model_config.mask_head.num_convs, + num_filters=model_config.mask_head.num_filters, + use_separable_conv=model_config.mask_head.use_separable_conv, + activation=model_config.norm_activation.activation, + norm_momentum=model_config.norm_activation.norm_momentum, + norm_epsilon=model_config.norm_activation.norm_epsilon, + kernel_regularizer=l2_regularizer, + class_agnostic=model_config.mask_head.class_agnostic, + convnet_variant=model_config.mask_head.convnet_variant) + + mask_sampler_obj = mask_sampler.MaskSampler( + mask_target_size=( + model_config.mask_roi_aligner.crop_size * + model_config.mask_head.upsample_factor), + num_sampled_masks=model_config.mask_sampler.num_sampled_masks) + + mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner( + crop_size=model_config.mask_roi_aligner.crop_size, + sample_offset=model_config.mask_roi_aligner.sample_offset) + else: + mask_head = None + mask_sampler_obj = None + mask_roi_aligner_obj = None + + model = deep_maskrcnn_model.DeepMaskRCNNModel( + backbone=backbone, + decoder=decoder, + rpn_head=rpn_head, + detection_head=detection_head, + roi_generator=roi_generator_obj, + roi_sampler=roi_sampler_obj, + roi_aligner=roi_aligner_obj, + detection_generator=detection_generator_obj, + mask_head=mask_head, + mask_sampler=mask_sampler_obj, + mask_roi_aligner=mask_roi_aligner_obj, + use_gt_boxes_for_masks=model_config.use_gt_boxes_for_masks) + return model + + +@task_factory.register_task_cls(deep_mask_head_rcnn_config.DeepMaskHeadRCNNTask) +class DeepMaskHeadRCNNTask(maskrcnn.MaskRCNNTask): + """Mask R-CNN with support for deep mask heads.""" + + def build_model(self): + """Build Mask R-CNN model.""" + + input_specs = tf.keras.layers.InputSpec( + shape=[None] + self.task_config.model.input_size) + + l2_weight_decay = self.task_config.losses.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + model = build_maskrcnn( + input_specs=input_specs, + model_config=self.task_config.model, + l2_regularizer=l2_regularizer) + return model diff --git a/official/vision/beta/projects/deepmac_maskrcnn/train.py b/official/vision/beta/projects/deepmac_maskrcnn/train.py new file mode 100644 index 0000000000000000000000000000000000000000..8e773615a3e99b20c64a1f61c74cc3ee866257a6 --- /dev/null +++ b/official/vision/beta/projects/deepmac_maskrcnn/train.py @@ -0,0 +1,72 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""TensorFlow Model Garden Vision training driver.""" + +from absl import app +from absl import flags +from absl import logging + +import gin + +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance +# pylint: disable=unused-import +from official.vision.beta.projects.deepmac_maskrcnn.common import registry_imports +# pylint: enable=unused-import + +FLAGS = flags.FLAGS + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + logging.info('Training with task %s', task) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + train_utils.save_gin_config(FLAGS.mode, model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/vision/beta/projects/example/README.md b/official/vision/beta/projects/example/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b2bf86f6c04974aba2586f46577494e286268daa --- /dev/null +++ b/official/vision/beta/projects/example/README.md @@ -0,0 +1,4 @@ +# Example Project + +This is a minimal example project to demonstrate how to use TF Model Garden's +building blocks to implement a new vision project from scratch. diff --git a/official/vision/beta/projects/keypoint/README.md b/official/vision/beta/projects/keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..05e7c03983d21190c5a2720070abe32bce17e472 --- /dev/null +++ b/official/vision/beta/projects/keypoint/README.md @@ -0,0 +1,3 @@ +# Keypoint Detection Models. + +TBD diff --git a/official/vision/beta/projects/simclr/README.md b/official/vision/beta/projects/simclr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..91b4375bd60ff89b8677acad89390592f658f6c0 --- /dev/null +++ b/official/vision/beta/projects/simclr/README.md @@ -0,0 +1,78 @@ +# Simple Framework for Contrastive Learning + +[![Paper](http://img.shields.io/badge/Paper-arXiv.2002.05709-B3181B?logo=arXiv)](https://arxiv.org/abs/2002.05709) +[![Paper](http://img.shields.io/badge/Paper-arXiv.2006.10029-B3181B?logo=arXiv)](https://arxiv.org/abs/2006.10029) + +
+ SimCLR Illustration +
+
+ An illustration of SimCLR (from our blog here). +
+ +## Enviroment setup + +The code can be run on multiple GPUs or TPUs with different distribution +strategies. See the TensorFlow distributed training +[guide](https://www.tensorflow.org/guide/distributed_training) for an overview +of `tf.distribute`. + +The code is compatible with TensorFlow 2.4+. See requirements.txt for all +prerequisites, and you can also install them using the following command. `pip +install -r ./official/requirements.txt` + +## Pretraining +To pretrain the model on Imagenet, try the following command: + +``` +python3 -m official.vision.beta.projects.simclr.train \ + --mode=train_and_eval \ + --experiment=simclr_pretraining \ + --model_dir={MODEL_DIR} \ + --config_file={CONFIG_FILE} +``` + +An example of the config file can be found [here](./configs/experiments/imagenet_simclr_pretrain_gpu.yaml) + + +## Semi-supervised learning and fine-tuning the whole network + +You can access 1% and 10% ImageNet subsets used for semi-supervised learning via +[tensorflow datasets](https://www.tensorflow.org/datasets/catalog/imagenet2012_subset). +You can also find image IDs of these subsets in `imagenet_subsets/`. + +To fine-tune the whole network, refer to the following command: + +``` +python3 -m official.vision.beta.projects.simclr.train \ + --mode=train_and_eval \ + --experiment=simclr_finetuning \ + --model_dir={MODEL_DIR} \ + --config_file={CONFIG_FILE} +``` + +An example of the config file can be found [here](./configs/experiments/imagenet_simclr_finetune_gpu.yaml). + +## Cite + +[SimCLR paper](https://arxiv.org/abs/2002.05709): + +``` +@article{chen2020simple, + title={A Simple Framework for Contrastive Learning of Visual Representations}, + author={Chen, Ting and Kornblith, Simon and Norouzi, Mohammad and Hinton, Geoffrey}, + journal={arXiv preprint arXiv:2002.05709}, + year={2020} +} +``` + +[SimCLRv2 paper](https://arxiv.org/abs/2006.10029): + +``` +@article{chen2020big, + title={Big Self-Supervised Models are Strong Semi-Supervised Learners}, + author={Chen, Ting and Kornblith, Simon and Swersky, Kevin and Norouzi, Mohammad and Hinton, Geoffrey}, + journal={arXiv preprint arXiv:2006.10029}, + year={2020} +} +``` diff --git a/official/vision/beta/projects/simclr/common/registry_imports.py b/official/vision/beta/projects/simclr/common/registry_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..11a3b290811b168f010d96852d0661c6a384b576 --- /dev/null +++ b/official/vision/beta/projects/simclr/common/registry_imports.py @@ -0,0 +1,36 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""All necessary imports for registration.""" + +# pylint: disable=unused-import +from official.common import registry_imports +from official.vision.beta.projects.simclr.configs import simclr +from official.vision.beta.projects.simclr.losses import contrastive_losses +from official.vision.beta.projects.simclr.modeling import simclr_model +from official.vision.beta.projects.simclr.tasks import simclr as simclr_task diff --git a/official/vision/beta/projects/simclr/configs/experiments/cifar_simclr_pretrain.yaml b/official/vision/beta/projects/simclr/configs/experiments/cifar_simclr_pretrain.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5d5bd642efa5c32930a3c413d927c733af9bbf3a --- /dev/null +++ b/official/vision/beta/projects/simclr/configs/experiments/cifar_simclr_pretrain.yaml @@ -0,0 +1,79 @@ +# Cifar classification. +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float16' + loss_scale: 'dynamic' + num_gpus: 16 +task: + model: + mode: 'pretrain' + input_size: [32, 32, 3] + backbone: + type: 'resnet' + resnet: + model_id: 50 + backbone_trainable: true + projection_head: + proj_output_dim: 64 + num_proj_layers: 2 + ft_proj_idx: 1 + supervised_head: + num_classes: 10 + norm_activation: + use_sync_bn: true + norm_momentum: 0.9 + norm_epsilon: 0.00001 + loss: + projection_norm: true + temperature: 0.2 + evaluation: + top_k: 5 + one_hot: true + train_data: + tfds_name: 'cifar10' + tfds_split: 'train' + input_path: '' + is_training: true + global_batch_size: 512 + dtype: 'float16' + parser: + mode: 'pretrain' + aug_color_jitter_strength: 0.5 + aug_rand_blur: false + decoder: + decode_label: true + validation_data: + tfds_name: 'cifar10' + tfds_split: 'test' + input_path: '' + is_training: false + global_batch_size: 512 + dtype: 'float16' + drop_remainder: false + parser: + mode: 'pretrain' + decoder: + decode_label: true +trainer: + train_steps: 48000 # 500 epochs + validation_steps: 18 # NUM_EXAMPLES (10000) // global_batch_size + validation_interval: 96 + steps_per_loop: 96 # NUM_EXAMPLES (50000) // global_batch_size + summary_interval: 96 + checkpoint_interval: 96 + optimizer_config: + optimizer: + type: 'lars' + lars: + momentum: 0.9 + weight_decay_rate: 0.000001 + exclude_from_weight_decay: ['batch_normalization', 'bias'] + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 0.6 # 0.3 × BatchSize / 256 + decay_steps: 43200 # train_steps - warmup_steps + warmup: + type: 'linear' + linear: + warmup_steps: 4800 # 10% of total epochs diff --git a/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_finetune_gpu.yaml b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_finetune_gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c79c0af7f9027ecae8ea0a05fb31bd2465b70f77 --- /dev/null +++ b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_finetune_gpu.yaml @@ -0,0 +1,72 @@ +# ImageNet classification. +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float16' + loss_scale: 'dynamic' + num_gpus: 16 +task: + model: + mode: 'finetune' + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 50 + backbone_trainable: true + projection_head: + proj_output_dim: 128 + num_proj_layers: 3 + ft_proj_idx: 1 + supervised_head: + num_classes: 1001 + zero_init: true + norm_activation: + use_sync_bn: false + norm_momentum: 0.9 + norm_epsilon: 0.00001 + loss: + label_smoothing: 0.0 + one_hot: true + evaluation: + top_k: 5 + one_hot: true + init_checkpoint: gs://tf_model_garden/official/simclr/r50_1x + init_checkpoint_modules: 'backbone_projection' + train_data: + tfds_name: 'imagenet2012_subset/10pct' + tfds_split: 'train' + input_path: '' + is_training: true + global_batch_size: 1024 + dtype: 'float16' + parser: + mode: 'finetune' + validation_data: + tfds_name: 'imagenet2012_subset/10pct' + tfds_split: 'validation' + input_path: '' + is_training: false + global_batch_size: 1024 + dtype: 'float16' + drop_remainder: false + parser: + mode: 'finetune' +trainer: + train_steps: 12500 # 100 epochs + validation_steps: 49 # NUM_EXAMPLES (50000) // global_batch_size + validation_interval: 125 + steps_per_loop: 125 # NUM_EXAMPLES (1281167) // global_batch_size + summary_interval: 125 + checkpoint_interval: 125 + optimizer_config: + optimizer: + type: 'lars' + lars: + momentum: 0.9 + weight_decay_rate: 0.0 + exclude_from_weight_decay: ['batch_normalization', 'bias'] + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 0.04 # 0.01 × BatchSize / 512 + decay_steps: 12500 # train_steps diff --git a/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_pretrain_gpu.yaml b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_pretrain_gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e7c326c3d68cfe89500c38d6d0e7a676145bca8 --- /dev/null +++ b/official/vision/beta/projects/simclr/configs/experiments/imagenet_simclr_pretrain_gpu.yaml @@ -0,0 +1,73 @@ +# ImageNet classification. +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float16' + loss_scale: 'dynamic' + num_gpus: 16 +task: + model: + mode: 'pretrain' + input_size: [224, 224, 3] + backbone: + type: 'resnet' + resnet: + model_id: 50 + backbone_trainable: true + projection_head: + proj_output_dim: 128 + num_proj_layers: 3 + ft_proj_idx: 0 + supervised_head: + num_classes: 1001 + norm_activation: + use_sync_bn: true + norm_momentum: 0.9 + norm_epsilon: 0.00001 + loss: + projection_norm: true + temperature: 0.1 + evaluation: + top_k: 5 + one_hot: true + train_data: + input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 2048 + dtype: 'float16' + parser: + mode: 'pretrain' + decoder: + decode_label: true + validation_data: + input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*' + is_training: false + global_batch_size: 2048 + dtype: 'float16' + drop_remainder: false + parser: + mode: 'pretrain' + decoder: + decode_label: true +trainer: + train_steps: 187200 # 300 epochs + validation_steps: 24 # NUM_EXAMPLES (50000) // global_batch_size + validation_interval: 624 + steps_per_loop: 624 # NUM_EXAMPLES (1281167) // global_batch_size + summary_interval: 624 + checkpoint_interval: 624 + optimizer_config: + optimizer: + type: 'lars' + lars: + momentum: 0.9 + weight_decay_rate: 0.000001 + exclude_from_weight_decay: ['batch_normalization', 'bias'] + learning_rate: + type: 'cosine' + cosine: + initial_learning_rate: 1.6 # 0.2 * BatchSize / 256 + decay_steps: 177840 # train_steps - warmup_steps + warmup: + type: 'linear' + linear: + warmup_steps: 9360 # 5% of total epochs diff --git a/official/vision/beta/projects/simclr/configs/simclr.py b/official/vision/beta/projects/simclr/configs/simclr.py new file mode 100644 index 0000000000000000000000000000000000000000..f3264b428238b5839441c2c0ea086b71d848d74e --- /dev/null +++ b/official/vision/beta/projects/simclr/configs/simclr.py @@ -0,0 +1,332 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""SimCLR configurations.""" +import os +from typing import List, Optional + +import dataclasses + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.modeling import optimization +from official.vision.beta.configs import backbones +from official.vision.beta.configs import common +from official.vision.beta.projects.simclr.modeling import simclr_model + + +@dataclasses.dataclass +class Decoder(hyperparams.Config): + decode_label: bool = True + + +@dataclasses.dataclass +class Parser(hyperparams.Config): + """Parser config.""" + aug_rand_crop: bool = True + aug_rand_hflip: bool = True + aug_color_distort: bool = True + aug_color_jitter_strength: float = 1.0 + aug_color_jitter_impl: str = 'simclrv2' # 'simclrv1' or 'simclrv2' + aug_rand_blur: bool = True + parse_label: bool = True + test_crop: bool = True + mode: str = simclr_model.PRETRAIN + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """Training data config.""" + input_path: str = '' + global_batch_size: int = 0 + is_training: bool = True + dtype: str = 'float32' + shuffle_buffer_size: int = 10000 + cycle_length: int = 10 + # simclr specific configs + parser: Parser = Parser() + decoder: Decoder = Decoder() + + +@dataclasses.dataclass +class ProjectionHead(hyperparams.Config): + proj_output_dim: int = 128 + num_proj_layers: int = 3 + ft_proj_idx: int = 1 # layer of the projection head to use for fine-tuning. + + +@dataclasses.dataclass +class SupervisedHead(hyperparams.Config): + num_classes: int = 1001 + zero_init: bool = False + + +@dataclasses.dataclass +class ContrastiveLoss(hyperparams.Config): + projection_norm: bool = True + temperature: float = 0.1 + l2_weight_decay: float = 0.0 + + +@dataclasses.dataclass +class ClassificationLosses(hyperparams.Config): + label_smoothing: float = 0.0 + one_hot: bool = True + l2_weight_decay: float = 0.0 + + +@dataclasses.dataclass +class Evaluation(hyperparams.Config): + top_k: int = 5 + one_hot: bool = True + + +@dataclasses.dataclass +class SimCLRModel(hyperparams.Config): + """SimCLR model config.""" + input_size: List[int] = dataclasses.field(default_factory=list) + backbone: backbones.Backbone = backbones.Backbone( + type='resnet', resnet=backbones.ResNet()) + projection_head: ProjectionHead = ProjectionHead( + proj_output_dim=128, + num_proj_layers=3, + ft_proj_idx=1) + supervised_head: SupervisedHead = SupervisedHead(num_classes=1001) + norm_activation: common.NormActivation = common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False) + mode: str = simclr_model.PRETRAIN + backbone_trainable: bool = True + + +@dataclasses.dataclass +class SimCLRPretrainTask(cfg.TaskConfig): + """SimCLR pretraining task config.""" + model: SimCLRModel = SimCLRModel(mode=simclr_model.PRETRAIN) + train_data: DataConfig = DataConfig( + parser=Parser(mode=simclr_model.PRETRAIN), is_training=True) + validation_data: DataConfig = DataConfig( + parser=Parser(mode=simclr_model.PRETRAIN), is_training=False) + loss: ContrastiveLoss = ContrastiveLoss() + evaluation: Evaluation = Evaluation() + init_checkpoint: Optional[str] = None + # all or backbone + init_checkpoint_modules: str = 'all' + + +@dataclasses.dataclass +class SimCLRFinetuneTask(cfg.TaskConfig): + """SimCLR fine tune task config.""" + model: SimCLRModel = SimCLRModel( + mode=simclr_model.FINETUNE, + supervised_head=SupervisedHead(num_classes=1001, zero_init=True)) + train_data: DataConfig = DataConfig( + parser=Parser(mode=simclr_model.FINETUNE), is_training=True) + validation_data: DataConfig = DataConfig( + parser=Parser(mode=simclr_model.FINETUNE), is_training=False) + loss: ClassificationLosses = ClassificationLosses() + evaluation: Evaluation = Evaluation() + init_checkpoint: Optional[str] = None + # all, backbone_projection or backbone + init_checkpoint_modules: str = 'backbone_projection' + + +@exp_factory.register_config_factory('simclr_pretraining') +def simclr_pretraining() -> cfg.ExperimentConfig: + """Image classification general.""" + return cfg.ExperimentConfig( + task=SimCLRPretrainTask(), + trainer=cfg.TrainerConfig(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + +@exp_factory.register_config_factory('simclr_finetuning') +def simclr_finetuning() -> cfg.ExperimentConfig: + """Image classification general.""" + return cfg.ExperimentConfig( + task=SimCLRFinetuneTask(), + trainer=cfg.TrainerConfig(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + +IMAGENET_TRAIN_EXAMPLES = 1281167 +IMAGENET_VAL_EXAMPLES = 50000 +IMAGENET_INPUT_PATH_BASE = 'imagenet-2012-tfrecord' + + +@exp_factory.register_config_factory('simclr_pretraining_imagenet') +def simclr_pretraining_imagenet() -> cfg.ExperimentConfig: + """Image classification general.""" + train_batch_size = 4096 + eval_batch_size = 4096 + steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size + return cfg.ExperimentConfig( + task=SimCLRPretrainTask( + model=SimCLRModel( + mode=simclr_model.PRETRAIN, + backbone_trainable=True, + input_size=[224, 224, 3], + backbone=backbones.Backbone( + type='resnet', resnet=backbones.ResNet(model_id=50)), + projection_head=ProjectionHead( + proj_output_dim=128, + num_proj_layers=3, + ft_proj_idx=1), + supervised_head=SupervisedHead(num_classes=1001), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=True)), + loss=ContrastiveLoss(), + evaluation=Evaluation(), + train_data=DataConfig( + parser=Parser(mode=simclr_model.PRETRAIN), + decoder=Decoder(decode_label=True), + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size), + validation_data=DataConfig( + parser=Parser(mode=simclr_model.PRETRAIN), + decoder=Decoder(decode_label=True), + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), + is_training=False, + global_batch_size=eval_batch_size), + ), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=500 * steps_per_epoch, + validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'lars', + 'lars': { + 'momentum': 0.9, + 'weight_decay_rate': 0.000001, + 'exclude_from_weight_decay': [ + 'batch_normalization', 'bias'] + } + }, + 'learning_rate': { + 'type': 'cosine', + 'cosine': { + # 0.2 * BatchSize / 256 + 'initial_learning_rate': 0.2 * train_batch_size / 256, + # train_steps - warmup_steps + 'decay_steps': 475 * steps_per_epoch + } + }, + 'warmup': { + 'type': 'linear', + 'linear': { + # 5% of total epochs + 'warmup_steps': 25 * steps_per_epoch + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) + + +@exp_factory.register_config_factory('simclr_finetuning_imagenet') +def simclr_finetuning_imagenet() -> cfg.ExperimentConfig: + """Image classification general.""" + train_batch_size = 1024 + eval_batch_size = 1024 + steps_per_epoch = IMAGENET_TRAIN_EXAMPLES // train_batch_size + pretrain_model_base = '' + return cfg.ExperimentConfig( + task=SimCLRFinetuneTask( + model=SimCLRModel( + mode=simclr_model.FINETUNE, + backbone_trainable=True, + input_size=[224, 224, 3], + backbone=backbones.Backbone( + type='resnet', resnet=backbones.ResNet(model_id=50)), + projection_head=ProjectionHead( + proj_output_dim=128, + num_proj_layers=3, + ft_proj_idx=1), + supervised_head=SupervisedHead( + num_classes=1001, zero_init=True), + norm_activation=common.NormActivation( + norm_momentum=0.9, norm_epsilon=1e-5, use_sync_bn=False)), + loss=ClassificationLosses(), + evaluation=Evaluation(), + train_data=DataConfig( + parser=Parser(mode=simclr_model.FINETUNE), + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'train*'), + is_training=True, + global_batch_size=train_batch_size), + validation_data=DataConfig( + parser=Parser(mode=simclr_model.FINETUNE), + input_path=os.path.join(IMAGENET_INPUT_PATH_BASE, 'valid*'), + is_training=False, + global_batch_size=eval_batch_size), + init_checkpoint=pretrain_model_base, + # all, backbone_projection or backbone + init_checkpoint_modules='backbone_projection'), + trainer=cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=60 * steps_per_epoch, + validation_steps=IMAGENET_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'lars', + 'lars': { + 'momentum': 0.9, + 'weight_decay_rate': 0.0, + 'exclude_from_weight_decay': [ + 'batch_normalization', 'bias'] + } + }, + 'learning_rate': { + 'type': 'cosine', + 'cosine': { + # 0.01 × BatchSize / 512 + 'initial_learning_rate': 0.01 * train_batch_size / 512, + 'decay_steps': 60 * steps_per_epoch + } + } + })), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) diff --git a/official/vision/beta/projects/simclr/configs/simclr_test.py b/official/vision/beta/projects/simclr/configs/simclr_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f232d7533f4effe5161611bc3e0862ab87c5f305 --- /dev/null +++ b/official/vision/beta/projects/simclr/configs/simclr_test.py @@ -0,0 +1,62 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for simclr.""" +# pylint: disable=unused-import +from absl.testing import parameterized + +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.vision.beta.projects.simclr.common import registry_imports # pylint: disable=unused-import +from official.vision.beta.projects.simclr.configs import simclr as exp_cfg + + +class SimCLRConfigTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + 'simclr_pretraining_imagenet', 'simclr_finetuning_imagenet') + def test_simclr_configs(self, config_name): + config = exp_factory.get_exp_config(config_name) + self.assertIsInstance(config, cfg.ExperimentConfig) + if config_name == 'simclr_pretrain_imagenet': + self.assertIsInstance(config.task, exp_cfg.SimCLRPretrainTask) + elif config_name == 'simclr_finetuning_imagenet': + self.assertIsInstance(config.task, exp_cfg.SimCLRFinetuneTask) + self.assertIsInstance(config.task.model, + exp_cfg.SimCLRModel) + self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig) + config.task.train_data.is_training = None + with self.assertRaises(KeyError): + config.validate() + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/simclr/dataloaders/preprocess_ops.py b/official/vision/beta/projects/simclr/dataloaders/preprocess_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..29a3a3eadd98557598305def18feaeeff720f31a --- /dev/null +++ b/official/vision/beta/projects/simclr/dataloaders/preprocess_ops.py @@ -0,0 +1,363 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Preprocessing ops.""" +import functools +import tensorflow as tf + +CROP_PROPORTION = 0.875 # Standard for ImageNet. + + +def random_apply(func, p, x): + """Randomly apply function func to x with probability p.""" + return tf.cond( + tf.less( + tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32), + tf.cast(p, tf.float32)), lambda: func(x), lambda: x) + + +def random_brightness(image, max_delta, impl='simclrv2'): + """A multiplicative vs additive change of brightness.""" + if impl == 'simclrv2': + factor = tf.random.uniform([], tf.maximum(1.0 - max_delta, 0), + 1.0 + max_delta) + image = image * factor + elif impl == 'simclrv1': + image = tf.image.random_brightness(image, max_delta=max_delta) + else: + raise ValueError('Unknown impl {} for random brightness.'.format(impl)) + return image + + +def to_grayscale(image, keep_channels=True): + image = tf.image.rgb_to_grayscale(image) + if keep_channels: + image = tf.tile(image, [1, 1, 3]) + return image + + +def color_jitter_nonrand(image, + brightness=0, + contrast=0, + saturation=0, + hue=0, + impl='simclrv2'): + """Distorts the color of the image (jittering order is fixed). + + Args: + image: The input image tensor. + brightness: A float, specifying the brightness for color jitter. + contrast: A float, specifying the contrast for color jitter. + saturation: A float, specifying the saturation for color jitter. + hue: A float, specifying the hue for color jitter. + impl: 'simclrv1' or 'simclrv2'. Whether to use simclrv1 or simclrv2's + version of random brightness. + + Returns: + The distorted image tensor. + """ + with tf.name_scope('distort_color'): + def apply_transform(i, x, brightness, contrast, saturation, hue): + """Apply the i-th transformation.""" + if brightness != 0 and i == 0: + x = random_brightness(x, max_delta=brightness, impl=impl) + elif contrast != 0 and i == 1: + x = tf.image.random_contrast( + x, lower=1 - contrast, upper=1 + contrast) + elif saturation != 0 and i == 2: + x = tf.image.random_saturation( + x, lower=1 - saturation, upper=1 + saturation) + elif hue != 0: + x = tf.image.random_hue(x, max_delta=hue) + return x + + for i in range(4): + image = apply_transform(i, image, brightness, contrast, saturation, hue) + image = tf.clip_by_value(image, 0., 1.) + return image + + +def color_jitter_rand(image, + brightness=0, + contrast=0, + saturation=0, + hue=0, + impl='simclrv2'): + """Distorts the color of the image (jittering order is random). + + Args: + image: The input image tensor. + brightness: A float, specifying the brightness for color jitter. + contrast: A float, specifying the contrast for color jitter. + saturation: A float, specifying the saturation for color jitter. + hue: A float, specifying the hue for color jitter. + impl: 'simclrv1' or 'simclrv2'. Whether to use simclrv1 or simclrv2's + version of random brightness. + + Returns: + The distorted image tensor. + """ + with tf.name_scope('distort_color'): + def apply_transform(i, x): + """Apply the i-th transformation.""" + + def brightness_foo(): + if brightness == 0: + return x + else: + return random_brightness(x, max_delta=brightness, impl=impl) + + def contrast_foo(): + if contrast == 0: + return x + else: + return tf.image.random_contrast(x, lower=1 - contrast, + upper=1 + contrast) + + def saturation_foo(): + if saturation == 0: + return x + else: + return tf.image.random_saturation( + x, lower=1 - saturation, upper=1 + saturation) + + def hue_foo(): + if hue == 0: + return x + else: + return tf.image.random_hue(x, max_delta=hue) + + x = tf.cond(tf.less(i, 2), + lambda: tf.cond(tf.less(i, 1), brightness_foo, contrast_foo), + lambda: tf.cond(tf.less(i, 3), saturation_foo, hue_foo)) + return x + + perm = tf.random.shuffle(tf.range(4)) + for i in range(4): + image = apply_transform(perm[i], image) + image = tf.clip_by_value(image, 0., 1.) + return image + + +def color_jitter(image, strength, random_order=True, impl='simclrv2'): + """Distorts the color of the image. + + Args: + image: The input image tensor. + strength: the floating number for the strength of the color augmentation. + random_order: A bool, specifying whether to randomize the jittering order. + impl: 'simclrv1' or 'simclrv2'. Whether to use simclrv1 or simclrv2's + version of random brightness. + + Returns: + The distorted image tensor. + """ + brightness = 0.8 * strength + contrast = 0.8 * strength + saturation = 0.8 * strength + hue = 0.2 * strength + if random_order: + return color_jitter_rand( + image, brightness, contrast, saturation, hue, impl=impl) + else: + return color_jitter_nonrand( + image, brightness, contrast, saturation, hue, impl=impl) + + +def random_color_jitter(image, + p=1.0, + color_jitter_strength=1.0, + impl='simclrv2'): + """Perform random color jitter.""" + def _transform(image): + color_jitter_t = functools.partial( + color_jitter, strength=color_jitter_strength, impl=impl) + image = random_apply(color_jitter_t, p=0.8, x=image) + return random_apply(to_grayscale, p=0.2, x=image) + + return random_apply(_transform, p=p, x=image) + + +def gaussian_blur(image, kernel_size, sigma, padding='SAME'): + """Blurs the given image with separable convolution. + + + Args: + image: Tensor of shape [height, width, channels] and dtype float to blur. + kernel_size: Integer Tensor for the size of the blur kernel. This is should + be an odd number. If it is an even number, the actual kernel size will be + size + 1. + sigma: Sigma value for gaussian operator. + padding: Padding to use for the convolution. Typically 'SAME' or 'VALID'. + + Returns: + A Tensor representing the blurred image. + """ + radius = tf.cast(kernel_size / 2, dtype=tf.int32) + kernel_size = radius * 2 + 1 + x = tf.cast(tf.range(-radius, radius + 1), dtype=tf.float32) + blur_filter = tf.exp(-tf.pow(x, 2.0) / + (2.0 * tf.pow(tf.cast(sigma, dtype=tf.float32), 2.0))) + blur_filter /= tf.reduce_sum(blur_filter) + # One vertical and one horizontal filter. + blur_v = tf.reshape(blur_filter, [kernel_size, 1, 1, 1]) + blur_h = tf.reshape(blur_filter, [1, kernel_size, 1, 1]) + num_channels = tf.shape(image)[-1] + blur_h = tf.tile(blur_h, [1, 1, num_channels, 1]) + blur_v = tf.tile(blur_v, [1, 1, num_channels, 1]) + expand_batch_dim = image.shape.ndims == 3 + if expand_batch_dim: + # Tensorflow requires batched input to convolutions, which we can fake with + # an extra dimension. + image = tf.expand_dims(image, axis=0) + blurred = tf.nn.depthwise_conv2d( + image, blur_h, strides=[1, 1, 1, 1], padding=padding) + blurred = tf.nn.depthwise_conv2d( + blurred, blur_v, strides=[1, 1, 1, 1], padding=padding) + if expand_batch_dim: + blurred = tf.squeeze(blurred, axis=0) + return blurred + + +def random_blur(image, height, width, p=0.5): + """Randomly blur an image. + + Args: + image: `Tensor` representing an image of arbitrary size. + height: Height of output image. + width: Width of output image. + p: probability of applying this transformation. + + Returns: + A preprocessed image `Tensor`. + """ + del width + + def _transform(image): + sigma = tf.random.uniform([], 0.1, 2.0, dtype=tf.float32) + return gaussian_blur( + image, kernel_size=height // 10, sigma=sigma, padding='SAME') + + return random_apply(_transform, p=p, x=image) + + +def distorted_bounding_box_crop(image, + bbox, + min_object_covered=0.1, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.05, 1.0), + max_attempts=100, + scope=None): + """Generates cropped_image using one of the bboxes randomly distorted. + + See `tf.image.sample_distorted_bounding_box` for more documentation. + + Args: + image: `Tensor` of image data. + bbox: `Tensor` of bounding boxes arranged `[1, num_boxes, coords]` + where each coordinate is [0, 1) and the coordinates are arranged + as `[ymin, xmin, ymax, xmax]`. If num_boxes is 0 then use the whole + image. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding + box supplied. + aspect_ratio_range: An optional list of `float`s. The cropped area of the + image must have an aspect ratio = width / height within this range. + area_range: An optional list of `float`s. The cropped area of the image + must contain a fraction of the supplied image within in this range. + max_attempts: An optional `int`. Number of attempts at generating a cropped + region of the image of the specified constraints. After `max_attempts` + failures, return the entire image. + scope: Optional `str` for name scope. + Returns: + (cropped image `Tensor`, distorted bbox `Tensor`). + """ + with tf.name_scope(scope or 'distorted_bounding_box_crop'): + shape = tf.shape(image) + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + shape, + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + + # Crop the image to the specified bounding box. + offset_y, offset_x, _ = tf.unstack(bbox_begin) + target_height, target_width, _ = tf.unstack(bbox_size) + image = tf.image.crop_to_bounding_box( + image, offset_y, offset_x, target_height, target_width) + + return image + + +def crop_and_resize(image, height, width): + """Make a random crop and resize it to height `height` and width `width`. + + Args: + image: Tensor representing the image. + height: Desired image height. + width: Desired image width. + + Returns: + A `height` x `width` x channels Tensor holding a random crop of `image`. + """ + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) + aspect_ratio = width / height + image = distorted_bounding_box_crop( + image, + bbox, + min_object_covered=0.1, + aspect_ratio_range=(3. / 4 * aspect_ratio, 4. / 3. * aspect_ratio), + area_range=(0.08, 1.0), + max_attempts=100, + scope=None) + return tf.image.resize([image], [height, width], + method=tf.image.ResizeMethod.BICUBIC)[0] + + +def random_crop_with_resize(image, height, width, p=1.0): + """Randomly crop and resize an image. + + Args: + image: `Tensor` representing an image of arbitrary size. + height: Height of output image. + width: Width of output image. + p: Probability of applying this transformation. + + Returns: + A preprocessed image `Tensor`. + """ + + def _transform(image): # pylint: disable=missing-docstring + image = crop_and_resize(image, height, width) + return image + + return random_apply(_transform, p=p, x=image) diff --git a/official/vision/beta/projects/simclr/dataloaders/simclr_input.py b/official/vision/beta/projects/simclr/dataloaders/simclr_input.py new file mode 100644 index 0000000000000000000000000000000000000000..5e7607af5170eacb489d294e4cb7297c746bec60 --- /dev/null +++ b/official/vision/beta/projects/simclr/dataloaders/simclr_input.py @@ -0,0 +1,242 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Data parser and processing for SimCLR. + +For pre-training: +- Preprocessing: + -> random cropping + -> resize back to the original size + -> random color distortions + -> random Gaussian blur (sequential) +- Each image need to be processed randomly twice + +```snippets + if train_mode == 'pretrain': + xs = [] + for _ in range(2): # Two transformations + xs.append(preprocess_fn_pretrain(image)) + image = tf.concat(xs, -1) + else: + image = preprocess_fn_finetune(image) +``` + +For fine-tuning: +typical image classification input +""" + +from typing import List + +import tensorflow as tf + +from official.vision.beta.dataloaders import decoder +from official.vision.beta.dataloaders import parser +from official.vision.beta.ops import preprocess_ops +from official.vision.beta.projects.simclr.dataloaders import preprocess_ops as simclr_preprocess_ops +from official.vision.beta.projects.simclr.modeling import simclr_model + + +class Decoder(decoder.Decoder): + """A tf.Example decoder for classification task.""" + + def __init__(self, decode_label=True): + self._decode_label = decode_label + + self._keys_to_features = { + 'image/encoded': tf.io.FixedLenFeature((), tf.string, default_value=''), + } + if self._decode_label: + self._keys_to_features.update({ + 'image/class/label': ( + tf.io.FixedLenFeature((), tf.int64, default_value=-1)) + }) + + def decode(self, serialized_example): + return tf.io.parse_single_example( + serialized_example, self._keys_to_features) + + +class TFDSDecoder(decoder.Decoder): + """A TFDS decoder for classification task.""" + + def __init__(self, decode_label=True): + self._decode_label = decode_label + + def decode(self, serialized_example): + sample_dict = { + 'image/encoded': tf.io.encode_jpeg( + serialized_example['image'], quality=100), + } + if self._decode_label: + sample_dict.update({ + 'image/class/label': serialized_example['label'], + }) + return sample_dict + + +class Parser(parser.Parser): + """Parser for SimCLR training.""" + + def __init__(self, + output_size: List[int], + aug_rand_crop: bool = True, + aug_rand_hflip: bool = True, + aug_color_distort: bool = True, + aug_color_jitter_strength: float = 1.0, + aug_color_jitter_impl: str = 'simclrv2', + aug_rand_blur: bool = True, + parse_label: bool = True, + test_crop: bool = True, + mode: str = simclr_model.PRETRAIN, + dtype: str = 'float32'): + """Initializes parameters for parsing annotations in the dataset. + + Args: + output_size: `Tensor` or `list` for [height, width] of output image. The + output_size should be divided by the largest feature stride 2^max_level. + aug_rand_crop: `bool`, if Ture, augment training with random cropping. + aug_rand_hflip: `bool`, if True, augment training with random + horizontal flip. + aug_color_distort: `bool`, if True augment training with color distortion. + aug_color_jitter_strength: `float`, the floating number for the strength + of the color augmentation + aug_color_jitter_impl: `str`, 'simclrv1' or 'simclrv2'. Define whether + to use simclrv1 or simclrv2's version of random brightness. + aug_rand_blur: `bool`, if True, augment training with random blur. + parse_label: `bool`, if True, parse label together with image. + test_crop: `bool`, if True, augment eval with center cropping. + mode: `str`, 'pretain' or 'finetune'. Define training mode. + dtype: `str`, cast output image in dtype. It can be 'float32', 'float16', + or 'bfloat16'. + """ + self._output_size = output_size + self._aug_rand_crop = aug_rand_crop + self._aug_rand_hflip = aug_rand_hflip + self._aug_color_distort = aug_color_distort + self._aug_color_jitter_strength = aug_color_jitter_strength + self._aug_color_jitter_impl = aug_color_jitter_impl + self._aug_rand_blur = aug_rand_blur + self._parse_label = parse_label + self._mode = mode + self._test_crop = test_crop + if max(self._output_size[0], self._output_size[1]) <= 32: + self._test_crop = False + + if dtype == 'float32': + self._dtype = tf.float32 + elif dtype == 'float16': + self._dtype = tf.float16 + elif dtype == 'bfloat16': + self._dtype = tf.bfloat16 + else: + raise ValueError('dtype {!r} is not supported!'.format(dtype)) + + def _parse_one_train_image(self, image_bytes): + + image = tf.image.decode_jpeg(image_bytes, channels=3) + # This line convert the image to float 0.0 - 1.0 + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + if self._aug_rand_crop: + image = simclr_preprocess_ops.random_crop_with_resize( + image, self._output_size[0], self._output_size[1]) + + if self._aug_rand_hflip: + image = tf.image.random_flip_left_right(image) + + if self._aug_color_distort and self._mode == simclr_model.PRETRAIN: + image = simclr_preprocess_ops.random_color_jitter( + image=image, + color_jitter_strength=self._aug_color_jitter_strength, + impl=self._aug_color_jitter_impl) + + if self._aug_rand_blur and self._mode == simclr_model.PRETRAIN: + image = simclr_preprocess_ops.random_blur( + image, self._output_size[0], self._output_size[1]) + + image = tf.image.resize( + image, self._output_size, method=tf.image.ResizeMethod.BILINEAR) + image = tf.reshape(image, [self._output_size[0], self._output_size[1], 3]) + + image = tf.clip_by_value(image, 0., 1.) + # Convert image to self._dtype. + image = tf.image.convert_image_dtype(image, self._dtype) + + return image + + def _parse_train_data(self, decoded_tensors): + """Parses data for training.""" + image_bytes = decoded_tensors['image/encoded'] + + if self._mode == simclr_model.FINETUNE: + image = self._parse_one_train_image(image_bytes) + + elif self._mode == simclr_model.PRETRAIN: + # Transform each example twice using a combination of + # simple augmentations, resulting in 2N data points + xs = [] + for _ in range(2): + xs.append(self._parse_one_train_image(image_bytes)) + image = tf.concat(xs, -1) + + else: + raise ValueError('The mode {} is not supported by the Parser.' + .format(self._mode)) + + if self._parse_label: + label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32) + return image, label + + return image + + def _parse_eval_data(self, decoded_tensors): + """Parses data for evaluation.""" + image_bytes = decoded_tensors['image/encoded'] + image_shape = tf.image.extract_jpeg_shape(image_bytes) + + if self._test_crop: + image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape) + else: + image = tf.image.decode_jpeg(image_bytes, channels=3) + # This line convert the image to float 0.0 - 1.0 + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + image = tf.image.resize( + image, self._output_size, method=tf.image.ResizeMethod.BILINEAR) + image = tf.reshape(image, [self._output_size[0], self._output_size[1], 3]) + + image = tf.clip_by_value(image, 0., 1.) + + # Convert image to self._dtype. + image = tf.image.convert_image_dtype(image, self._dtype) + + if self._parse_label: + label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32) + return image, label + + return image diff --git a/official/vision/beta/projects/simclr/heads/simclr_head.py b/official/vision/beta/projects/simclr/heads/simclr_head.py new file mode 100644 index 0000000000000000000000000000000000000000..32032323a3aeab1a8a965357161c80daf2d25f0b --- /dev/null +++ b/official/vision/beta/projects/simclr/heads/simclr_head.py @@ -0,0 +1,215 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Dense prediction heads.""" + +from typing import Text, Optional + +import tensorflow as tf + +from official.vision.beta.projects.simclr.modeling.layers import nn_blocks + +regularizers = tf.keras.regularizers +layers = tf.keras.layers + + +@tf.keras.utils.register_keras_serializable(package='simclr') +class ProjectionHead(tf.keras.layers.Layer): + """Projection head.""" + + def __init__( + self, + num_proj_layers: int = 3, + proj_output_dim: Optional[int] = None, + ft_proj_idx: int = 0, + kernel_initializer: Text = 'VarianceScaling', + kernel_regularizer: Optional[regularizers.Regularizer] = None, + bias_regularizer: Optional[regularizers.Regularizer] = None, + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + **kwargs): + """The projection head used during pretraining of SimCLR. + + Args: + num_proj_layers: `int` number of Dense layers used. + proj_output_dim: `int` output dimension of projection head, i.e., output + dimension of the final layer. + ft_proj_idx: `int` index of layer to use during fine-tuning. 0 means no + projection head during fine tuning, -1 means the final layer. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + Default to None. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + Default to None. + use_sync_bn: if True, use synchronized batch normalization. + norm_momentum: `float` normalization omentum for the moving average. + norm_epsilon: `float` small float added to variance to avoid dividing by + zero. + **kwargs: keyword arguments to be passed. + """ + super(ProjectionHead, self).__init__(**kwargs) + + assert proj_output_dim is not None or num_proj_layers == 0 + assert ft_proj_idx <= num_proj_layers, (num_proj_layers, ft_proj_idx) + + self._proj_output_dim = proj_output_dim + self._num_proj_layers = num_proj_layers + self._ft_proj_idx = ft_proj_idx + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._layers = [] + + def get_config(self): + config = { + 'proj_output_dim': self._proj_output_dim, + 'num_proj_layers': self._num_proj_layers, + 'ft_proj_idx': self._ft_proj_idx, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'use_normalization': self._use_normalization, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon + } + base_config = super(ProjectionHead, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + self._layers = [] + if self._num_proj_layers > 0: + intermediate_dim = int(input_shape[-1]) + for j in range(self._num_proj_layers): + if j != self._num_proj_layers - 1: + # for the middle layers, use bias and relu for the output. + layer = nn_blocks.DenseBN( + output_dim=intermediate_dim, + use_bias=True, + use_normalization=True, + activation='relu', + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon, + name='nl_%d' % j) + else: + # for the final layer, neither bias nor relu is used. + layer = nn_blocks.DenseBN( + output_dim=self._proj_output_dim, + use_bias=False, + use_normalization=True, + activation=None, + kernel_regularizer=self._kernel_regularizer, + kernel_initializer=self._kernel_initializer, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon, + name='nl_%d' % j) + self._layers.append(layer) + super(ProjectionHead, self).build(input_shape) + + def call(self, inputs, training=None): + hiddens_list = [tf.identity(inputs, 'proj_head_input')] + + if self._num_proj_layers == 0: + proj_head_output = inputs + proj_finetune_output = inputs + else: + for j in range(self._num_proj_layers): + hiddens = self._layers[j](hiddens_list[-1], training) + hiddens_list.append(hiddens) + proj_head_output = tf.identity( + hiddens_list[-1], 'proj_head_output') + proj_finetune_output = tf.identity( + hiddens_list[self._ft_proj_idx], 'proj_finetune_output') + + # The first element is the output of the projection head. + # The second element is the input of the finetune head. + return proj_head_output, proj_finetune_output + + +@tf.keras.utils.register_keras_serializable(package='simclr') +class ClassificationHead(tf.keras.layers.Layer): + """Classification Head.""" + + def __init__( + self, + num_classes: int, + kernel_initializer: Text = 'random_uniform', + kernel_regularizer: Optional[regularizers.Regularizer] = None, + bias_regularizer: Optional[regularizers.Regularizer] = None, + name: Text = 'head_supervised', + **kwargs): + """The classification head used during pretraining or fine tuning. + + Args: + num_classes: `int` size of the output dimension or number of classes + for classification task. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + Default to None. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + Default to None. + name: `str`, name of the layer. + **kwargs: keyword arguments to be passed. + """ + super(ClassificationHead, self).__init__(name=name, **kwargs) + self._num_classes = num_classes + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._name = name + + def get_config(self): + config = { + 'num_classes': self._num_classes, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + } + base_config = super(ClassificationHead, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + self._dense0 = layers.Dense( + units=self._num_classes, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=None) + super(ClassificationHead, self).build(input_shape) + + def call(self, inputs, training=None): + inputs = self._dense0(inputs) + return inputs diff --git a/official/vision/beta/projects/simclr/heads/simclr_head_test.py b/official/vision/beta/projects/simclr/heads/simclr_head_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ce98eecaea3122c5517c0c7e3521916ed42d206e --- /dev/null +++ b/official/vision/beta/projects/simclr/heads/simclr_head_test.py @@ -0,0 +1,117 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.simclr.heads import simclr_head + + +class ProjectionHeadTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + (0, None), + (1, 128), + (2, 128), + ) + def test_head_creation(self, num_proj_layers, proj_output_dim): + test_layer = simclr_head.ProjectionHead( + num_proj_layers=num_proj_layers, + proj_output_dim=proj_output_dim) + + input_dim = 64 + x = tf.keras.Input(shape=(input_dim,)) + proj_head_output, proj_finetune_output = test_layer(x) + + proj_head_output_dim = input_dim + if num_proj_layers > 0: + proj_head_output_dim = proj_output_dim + self.assertAllEqual(proj_head_output.shape.as_list(), + [None, proj_head_output_dim]) + + if num_proj_layers > 0: + proj_finetune_output_dim = input_dim + self.assertAllEqual(proj_finetune_output.shape.as_list(), + [None, proj_finetune_output_dim]) + + @parameterized.parameters( + (0, None, 0), + (1, 128, 0), + (2, 128, 1), + (2, 128, 2), + ) + def test_outputs(self, num_proj_layers, proj_output_dim, ft_proj_idx): + test_layer = simclr_head.ProjectionHead( + num_proj_layers=num_proj_layers, + proj_output_dim=proj_output_dim, + ft_proj_idx=ft_proj_idx + ) + + input_dim = 64 + batch_size = 2 + inputs = np.random.rand(batch_size, input_dim) + proj_head_output, proj_finetune_output = test_layer(inputs) + + if num_proj_layers == 0: + self.assertAllClose(inputs, proj_head_output) + self.assertAllClose(inputs, proj_finetune_output) + else: + self.assertAllEqual(proj_head_output.shape.as_list(), + [batch_size, proj_output_dim]) + if ft_proj_idx == 0: + self.assertAllClose(inputs, proj_finetune_output) + elif ft_proj_idx < num_proj_layers: + self.assertAllEqual(proj_finetune_output.shape.as_list(), + [batch_size, input_dim]) + else: + self.assertAllEqual(proj_finetune_output.shape.as_list(), + [batch_size, proj_output_dim]) + + +class ClassificationHeadTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + 10, 20 + ) + def test_head_creation(self, num_classes): + test_layer = simclr_head.ClassificationHead(num_classes=num_classes) + + input_dim = 64 + x = tf.keras.Input(shape=(input_dim,)) + out_x = test_layer(x) + + self.assertAllEqual(out_x.shape.as_list(), + [None, num_classes]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/simclr/losses/contrastive_losses.py b/official/vision/beta/projects/simclr/losses/contrastive_losses.py new file mode 100644 index 0000000000000000000000000000000000000000..c178f001601da6614e3303b7186bc2d8168c57a9 --- /dev/null +++ b/official/vision/beta/projects/simclr/losses/contrastive_losses.py @@ -0,0 +1,157 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contrastive loss functions.""" + +import functools + +import tensorflow as tf + +LARGE_NUM = 1e9 + + +def cross_replica_concat(tensor: tf.Tensor, num_replicas: int) -> tf.Tensor: + """Reduce a concatenation of the `tensor` across multiple replicas. + + Args: + tensor: `tf.Tensor` to concatenate. + num_replicas: `int` number of replicas. + + Returns: + Tensor of the same rank as `tensor` with first dimension `num_replicas` + times larger. + """ + if num_replicas <= 1: + return tensor + + replica_context = tf.distribute.get_replica_context() + with tf.name_scope('cross_replica_concat'): + # This creates a tensor that is like the input tensor but has an added + # replica dimension as the outermost dimension. On each replica it will + # contain the local values and zeros for all other values that need to be + # fetched from other replicas. + ext_tensor = tf.scatter_nd( + indices=[[replica_context.replica_id_in_sync_group]], + updates=[tensor], + shape=tf.concat([[num_replicas], tf.shape(tensor)], axis=0)) + + # As every value is only present on one replica and 0 in all others, adding + # them all together will result in the full tensor on all replicas. + ext_tensor = replica_context.all_reduce(tf.distribute.ReduceOp.SUM, + ext_tensor) + + # Flatten the replica dimension. + # The first dimension size will be: tensor.shape[0] * num_replicas + # Using [-1] trick to support also scalar input. + return tf.reshape(ext_tensor, [-1] + ext_tensor.shape.as_list()[2:]) + + +@tf.keras.utils.register_keras_serializable(package='simclr') +class ContrastiveLoss(object): + """Contrastive training loss function.""" + + def __init__(self, projection_norm: bool = True, temperature: float = 1.0): + """Initializes `ContrastiveLoss`. + + Args: + projection_norm: whether or not to use normalization on the hidden vector. + temperature: a `floating` number for temperature scaling. + """ + self._projection_norm = projection_norm + self._temperature = temperature + + def __call__(self, projection1: tf.Tensor, projection2: tf.Tensor): + """Compute the contrastive loss for contrastive learning. + + Note that projection2 is generated with the same batch (same order) of raw + images, but with different augmentation. More specifically: + image[i] -> random augmentation 1 -> projection -> projection1[i] + image[i] -> random augmentation 2 -> projection -> projection2[i] + + Args: + projection1: projection vector of shape (bsz, dim). + projection2: projection vector of shape (bsz, dim). + + Returns: + A loss scalar. + The logits for contrastive prediction task. + The labels for contrastive prediction task. + """ + # Get (normalized) hidden1 and hidden2. + if self._projection_norm: + projection1 = tf.math.l2_normalize(projection1, -1) + projection2 = tf.math.l2_normalize(projection2, -1) + batch_size = tf.shape(projection1)[0] + + p1_local, p2_local = projection1, projection2 + # Gather projection1/projection2 across replicas and create local labels. + num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync + if num_replicas_in_sync > 1: + p1_global = cross_replica_concat(p1_local, num_replicas_in_sync) + p2_global = cross_replica_concat(p2_local, num_replicas_in_sync) + global_batch_size = tf.shape(p1_global)[0] + + replica_context = tf.distribute.get_replica_context() + replica_id = tf.cast( + tf.cast(replica_context.replica_id_in_sync_group, tf.uint32), + tf.int32) + labels_idx = tf.range(batch_size) + replica_id * batch_size + labels = tf.one_hot(labels_idx, global_batch_size * 2) + masks = tf.one_hot(labels_idx, global_batch_size) + else: + p1_global = p1_local + p2_global = p2_local + labels = tf.one_hot(tf.range(batch_size), batch_size * 2) + masks = tf.one_hot(tf.range(batch_size), batch_size) + + tb_matmul = functools.partial(tf.matmul, transpose_b=True) + + logits_aa = tb_matmul(p1_local, p1_global) / self._temperature + logits_aa = logits_aa - masks * LARGE_NUM + + logits_bb = tb_matmul(p2_local, p2_global) / self._temperature + logits_bb = logits_bb - masks * LARGE_NUM + + logits_ab = tb_matmul(p1_local, p2_global) / self._temperature + logits_ba = tb_matmul(p2_local, p1_global) / self._temperature + + loss_a_local = tf.nn.softmax_cross_entropy_with_logits( + labels, tf.concat([logits_ab, logits_aa], 1)) + loss_b_local = tf.nn.softmax_cross_entropy_with_logits( + labels, tf.concat([logits_ba, logits_bb], 1)) + loss_local = tf.reduce_mean(loss_a_local + loss_b_local) + + return loss_local, (logits_ab, labels) + + def get_config(self): + config = { + 'projection_norm': self._projection_norm, + 'temperature': self._temperature, + } + return config diff --git a/official/vision/beta/projects/simclr/losses/contrastive_losses_test.py b/official/vision/beta/projects/simclr/losses/contrastive_losses_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3f7feddee5d1c240f7f3db830303a2718e27240d --- /dev/null +++ b/official/vision/beta/projects/simclr/losses/contrastive_losses_test.py @@ -0,0 +1,93 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.simclr.losses import contrastive_losses + + +class ContrastiveLossesTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters(1.0, 0.5) + def test_contrastive_loss_computation(self, temperature): + batch_size = 2 + project_dim = 16 + projection_norm = False + + p_1_arr = np.random.rand(batch_size, project_dim) + p_1 = tf.constant(p_1_arr, dtype=tf.float32) + p_2_arr = np.random.rand(batch_size, project_dim) + p_2 = tf.constant(p_2_arr, dtype=tf.float32) + + losses_obj = contrastive_losses.ContrastiveLoss( + projection_norm=projection_norm, + temperature=temperature) + comp_contrastive_loss = losses_obj( + projection1=p_1, + projection2=p_2) + + def _exp_sim(p1, p2): + return np.exp(np.matmul(p1, p2) / temperature) + + l11 = - np.log( + _exp_sim(p_1_arr[0], p_2_arr[0]) / + (_exp_sim(p_1_arr[0], p_1_arr[1]) + + _exp_sim(p_1_arr[0], p_2_arr[1]) + + _exp_sim(p_1_arr[0], p_2_arr[0])) + ) - np.log( + _exp_sim(p_1_arr[0], p_2_arr[0]) / + (_exp_sim(p_2_arr[0], p_2_arr[1]) + + _exp_sim(p_2_arr[0], p_1_arr[1]) + + _exp_sim(p_1_arr[0], p_2_arr[0])) + ) + + l22 = - np.log( + _exp_sim(p_1_arr[1], p_2_arr[1]) / + (_exp_sim(p_1_arr[1], p_1_arr[0]) + + _exp_sim(p_1_arr[1], p_2_arr[0]) + + _exp_sim(p_1_arr[1], p_2_arr[1])) + ) - np.log( + _exp_sim(p_1_arr[1], p_2_arr[1]) / + (_exp_sim(p_2_arr[1], p_2_arr[0]) + + _exp_sim(p_2_arr[1], p_1_arr[0]) + + _exp_sim(p_1_arr[1], p_2_arr[1])) + ) + + exp_contrastive_loss = (l11 + l22) / 2.0 + + self.assertAlmostEqual(comp_contrastive_loss[0].numpy(), + exp_contrastive_loss, places=5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/simclr/modeling/layers/nn_blocks.py b/official/vision/beta/projects/simclr/modeling/layers/nn_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..65a9a7a2830b5bbaf41e73d173652dd75889a6dd --- /dev/null +++ b/official/vision/beta/projects/simclr/modeling/layers/nn_blocks.py @@ -0,0 +1,150 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Contains common building blocks for simclr neural networks.""" +from typing import Text, Optional + +import tensorflow as tf + +from official.modeling import tf_utils + +regularizers = tf.keras.regularizers + + +@tf.keras.utils.register_keras_serializable(package='simclr') +class DenseBN(tf.keras.layers.Layer): + """Modified Dense layer to help build simclr system. + + The layer is a standards combination of Dense, BatchNorm and Activation. + """ + + def __init__( + self, + output_dim: int, + use_bias: bool = True, + use_normalization: bool = False, + use_sync_bn: bool = False, + norm_momentum: float = 0.99, + norm_epsilon: float = 0.001, + activation: Optional[Text] = 'relu', + kernel_initializer: Text = 'VarianceScaling', + kernel_regularizer: Optional[regularizers.Regularizer] = None, + bias_regularizer: Optional[regularizers.Regularizer] = None, + name='linear_layer', + **kwargs): + """Customized Dense layer. + + Args: + output_dim: `int` size of output dimension. + use_bias: if True, use biase in the dense layer. + use_normalization: if True, use batch normalization. + use_sync_bn: if True, use synchronized batch normalization. + norm_momentum: `float` normalization momentum for the moving average. + norm_epsilon: `float` small float added to variance to avoid dividing by + zero. + activation: `str` name of the activation function. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + Default to None. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + Default to None. + name: `str`, name of the layer. + **kwargs: keyword arguments to be passed. + """ + # Note: use_bias is ignored for the dense layer when use_bn=True. + # However, it is still used for batch norm. + super(DenseBN, self).__init__(**kwargs) + self._output_dim = output_dim + self._use_bias = use_bias + self._use_normalization = use_normalization + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._activation = activation + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._name = name + + if use_sync_bn: + self._norm = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._norm = tf.keras.layers.BatchNormalization + if tf.keras.backend.image_data_format() == 'channels_last': + self._bn_axis = -1 + else: + self._bn_axis = 1 + if activation: + self._activation_fn = tf_utils.get_activation(activation) + else: + self._activation_fn = None + + def get_config(self): + config = { + 'output_dim': self._output_dim, + 'use_bias': self._use_bias, + 'activation': self._activation, + 'use_sync_bn': self._use_sync_bn, + 'use_normalization': self._use_normalization, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + } + base_config = super(DenseBN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def build(self, input_shape): + self._dense0 = tf.keras.layers.Dense( + self._output_dim, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_bias=self._use_bias and not self._use_normalization) + + if self._use_normalization: + self._norm0 = self._norm( + axis=self._bn_axis, + momentum=self._norm_momentum, + epsilon=self._norm_epsilon, + center=self._use_bias, + scale=True) + + super(DenseBN, self).build(input_shape) + + def call(self, inputs, training=None): + assert inputs.shape.ndims == 2, inputs.shape + x = self._dense0(inputs) + if self._use_normalization: + x = self._norm0(x) + if self._activation: + x = self._activation_fn(x) + return x diff --git a/official/vision/beta/projects/simclr/modeling/layers/nn_blocks_test.py b/official/vision/beta/projects/simclr/modeling/layers/nn_blocks_test.py new file mode 100644 index 0000000000000000000000000000000000000000..e3e0da20c41aff3105a24a2cc1ef158d27d1525c --- /dev/null +++ b/official/vision/beta/projects/simclr/modeling/layers/nn_blocks_test.py @@ -0,0 +1,74 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from absl.testing import parameterized + +import tensorflow as tf + +from official.vision.beta.projects.simclr.modeling.layers import nn_blocks + + +class DenseBNTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters( + (64, True, True), + (64, True, False), + (64, False, True), + ) + def test_pass_through(self, output_dim, use_bias, use_normalization): + test_layer = nn_blocks.DenseBN( + output_dim=output_dim, + use_bias=use_bias, + use_normalization=use_normalization + ) + + x = tf.keras.Input(shape=(64,)) + out_x = test_layer(x) + + self.assertAllEqual(out_x.shape.as_list(), [None, output_dim]) + + # kernel of the dense layer + train_var_len = 1 + if use_normalization: + if use_bias: + # batch norm introduce two trainable variables + train_var_len += 2 + else: + # center is set to False if not use bias + train_var_len += 1 + else: + if use_bias: + # bias of dense layer + train_var_len += 1 + self.assertLen(test_layer.trainable_variables, train_var_len) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/simclr/modeling/simclr_model.py b/official/vision/beta/projects/simclr/modeling/simclr_model.py new file mode 100644 index 0000000000000000000000000000000000000000..06759f3b2e7b770af8189d92600b2600017e6779 --- /dev/null +++ b/official/vision/beta/projects/simclr/modeling/simclr_model.py @@ -0,0 +1,177 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Build simclr models.""" + +from typing import Optional +from absl import logging + +import tensorflow as tf + +layers = tf.keras.layers + +PRETRAIN = 'pretrain' +FINETUNE = 'finetune' + +PROJECTION_OUTPUT_KEY = 'projection_outputs' +SUPERVISED_OUTPUT_KEY = 'supervised_outputs' + + +@tf.keras.utils.register_keras_serializable(package='simclr') +class SimCLRModel(tf.keras.Model): + """A classification model based on SimCLR framework.""" + + def __init__(self, + backbone: tf.keras.models.Model, + projection_head: tf.keras.layers.Layer, + supervised_head: Optional[tf.keras.layers.Layer] = None, + input_specs=layers.InputSpec(shape=[None, None, None, 3]), + mode: str = PRETRAIN, + backbone_trainable: bool = True, + **kwargs): + """A classification model based on SimCLR framework. + + Args: + backbone: a backbone network. + projection_head: a projection head network. + supervised_head: a head network for supervised learning, e.g. + classification head. + input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. + mode: `str` indicates mode of training to be executed. + backbone_trainable: `bool` whether the backbone is trainable or not. + **kwargs: keyword arguments to be passed. + """ + super(SimCLRModel, self).__init__(**kwargs) + self._config_dict = { + 'backbone': backbone, + 'projection_head': projection_head, + 'supervised_head': supervised_head, + 'input_specs': input_specs, + 'mode': mode, + 'backbone_trainable': backbone_trainable, + } + self._input_specs = input_specs + self._backbone = backbone + self._projection_head = projection_head + self._supervised_head = supervised_head + self._mode = mode + self._backbone_trainable = backbone_trainable + + # Set whether the backbone is trainable + self._backbone.trainable = backbone_trainable + + def call(self, inputs, training=None, **kwargs): + model_outputs = {} + + if training and self._mode == PRETRAIN: + num_transforms = 2 + else: + num_transforms = 1 + + # Split channels, and optionally apply extra batched augmentation. + # (bsz, h, w, c*num_transforms) -> [(bsz, h, w, c), ....] + features_list = tf.split(inputs, num_or_size_splits=num_transforms, axis=-1) + # (num_transforms * bsz, h, w, c) + features = tf.concat(features_list, 0) + + # Base network forward pass. + endpoints = self._backbone(features, training=training) + features = endpoints[max(endpoints.keys())] + projection_inputs = layers.GlobalAveragePooling2D()(features) + + # Add heads. + projection_outputs, supervised_inputs = self._projection_head( + projection_inputs, training) + + if self._supervised_head is not None: + if self._mode == PRETRAIN: + logging.info('Ignoring gradient from supervised outputs !') + # When performing pretraining and supervised_head together, we do not + # want information from supervised evaluation flowing back into + # pretraining network. So we put a stop_gradient. + supervised_outputs = self._supervised_head( + tf.stop_gradient(supervised_inputs), training) + else: + supervised_outputs = self._supervised_head(supervised_inputs, training) + else: + supervised_outputs = None + + model_outputs.update({ + PROJECTION_OUTPUT_KEY: projection_outputs, + SUPERVISED_OUTPUT_KEY: supervised_outputs + }) + + return model_outputs + + @property + def checkpoint_items(self): + """Returns a dictionary of items to be additionally checkpointed.""" + if self._supervised_head is not None: + items = dict(backbone=self.backbone, + projection_head=self.projection_head, + supervised_head=self.supervised_head) + else: + items = dict(backbone=self.backbone, + projection_head=self.projection_head) + return items + + @property + def backbone(self): + return self._backbone + + @property + def projection_head(self): + return self._projection_head + + @property + def supervised_head(self): + return self._supervised_head + + @property + def mode(self): + return self._mode + + @mode.setter + def mode(self, value): + self._mode = value + + @property + def backbone_trainable(self): + return self._backbone_trainable + + @backbone_trainable.setter + def backbone_trainable(self, value): + self._backbone_trainable = value + self._backbone.trainable = value + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/projects/simclr/modeling/simclr_model_test.py b/official/vision/beta/projects/simclr/modeling/simclr_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2a54e247252a0d6ac9e1f6a97f772651dbb74fdf --- /dev/null +++ b/official/vision/beta/projects/simclr/modeling/simclr_model_test.py @@ -0,0 +1,87 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf + +from official.vision.beta.modeling import backbones +from official.vision.beta.projects.simclr.heads import simclr_head +from official.vision.beta.projects.simclr.modeling import simclr_model + + +class SimCLRModelTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (128, 3, 0), + (128, 3, 1), + (128, 1, 0), + (128, 1, 1), + ) + def test_model_creation(self, project_dim, num_proj_layers, ft_proj_idx): + input_size = 224 + inputs = np.random.rand(2, input_size, input_size, 3) + input_specs = tf.keras.layers.InputSpec( + shape=[None, input_size, input_size, 3]) + + tf.keras.backend.set_image_data_format('channels_last') + + backbone = backbones.ResNet(model_id=50, activation='relu', + input_specs=input_specs) + projection_head = simclr_head.ProjectionHead( + proj_output_dim=project_dim, + num_proj_layers=num_proj_layers, + ft_proj_idx=ft_proj_idx + ) + num_classes = 10 + supervised_head = simclr_head.ClassificationHead( + num_classes=10 + ) + + model = simclr_model.SimCLRModel( + input_specs=input_specs, + backbone=backbone, + projection_head=projection_head, + supervised_head=supervised_head, + mode=simclr_model.PRETRAIN + ) + outputs = model(inputs) + projection_outputs = outputs[simclr_model.PROJECTION_OUTPUT_KEY] + supervised_outputs = outputs[simclr_model.SUPERVISED_OUTPUT_KEY] + + self.assertAllEqual(projection_outputs.shape.as_list(), + [2, project_dim]) + self.assertAllEqual([2, num_classes], + supervised_outputs.numpy().shape) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/simclr/tasks/simclr.py b/official/vision/beta/projects/simclr/tasks/simclr.py new file mode 100644 index 0000000000000000000000000000000000000000..b9501af0c1077c0af0c8ca80c12084e0a4f2bbc0 --- /dev/null +++ b/official/vision/beta/projects/simclr/tasks/simclr.py @@ -0,0 +1,640 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Image SimCLR task definition. + +SimCLR training two different modes: +- pretrain +- fine-tuning + +For the above two different modes, the following components are different in +the task definition: +- training data format +- training loss +- projection_head and/or supervised_head +""" + +from typing import Dict, Optional + +from absl import logging +import tensorflow as tf + +from official.core import base_task +from official.core import config_definitions +from official.core import input_reader +from official.core import task_factory +from official.modeling import optimization +from official.modeling import performance +from official.modeling import tf_utils +from official.vision.beta.modeling import backbones +from official.vision.beta.projects.simclr.configs import simclr as exp_cfg +from official.vision.beta.projects.simclr.dataloaders import simclr_input +from official.vision.beta.projects.simclr.heads import simclr_head +from official.vision.beta.projects.simclr.losses import contrastive_losses +from official.vision.beta.projects.simclr.modeling import simclr_model + +OptimizationConfig = optimization.OptimizationConfig +RuntimeConfig = config_definitions.RuntimeConfig + + +@task_factory.register_task_cls(exp_cfg.SimCLRPretrainTask) +class SimCLRPretrainTask(base_task.Task): + """A task for image classification.""" + + def create_optimizer(self, optimizer_config: OptimizationConfig, + runtime_config: Optional[RuntimeConfig] = None): + """Creates an TF optimizer from configurations. + + Args: + optimizer_config: the parameters of the Optimization settings. + runtime_config: the parameters of the runtime. + + Returns: + A tf.optimizers.Optimizer object. + """ + if (optimizer_config.optimizer.type == 'lars' + and self.task_config.loss.l2_weight_decay > 0.0): + raise ValueError('The l2_weight_decay cannot be used together with lars ' + 'optimizer. Please set it to 0.') + + opt_factory = optimization.OptimizerFactory(optimizer_config) + optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate()) + # Configuring optimizer when loss_scale is set in runtime config. This helps + # avoiding overflow/underflow for float16 computations. + if runtime_config and runtime_config.loss_scale: + optimizer = performance.configure_optimizer( + optimizer, + use_float16=runtime_config.mixed_precision_dtype == 'float16', + loss_scale=runtime_config.loss_scale) + + return optimizer + + def build_model(self): + model_config = self.task_config.model + input_specs = tf.keras.layers.InputSpec( + shape=[None] + model_config.input_size) + + l2_weight_decay = self.task_config.loss.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + # Build backbone + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + # Build projection head + norm_activation_config = model_config.norm_activation + projection_head_config = model_config.projection_head + projection_head = simclr_head.ProjectionHead( + proj_output_dim=projection_head_config.proj_output_dim, + num_proj_layers=projection_head_config.num_proj_layers, + ft_proj_idx=projection_head_config.ft_proj_idx, + kernel_regularizer=l2_regularizer, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon) + + # Build supervised head + supervised_head_config = model_config.supervised_head + if supervised_head_config: + if supervised_head_config.zero_init: + s_kernel_initializer = 'zeros' + else: + s_kernel_initializer = 'random_uniform' + supervised_head = simclr_head.ClassificationHead( + num_classes=supervised_head_config.num_classes, + kernel_initializer=s_kernel_initializer, + kernel_regularizer=l2_regularizer) + else: + supervised_head = None + + model = simclr_model.SimCLRModel( + input_specs=input_specs, + backbone=backbone, + projection_head=projection_head, + supervised_head=supervised_head, + mode=model_config.mode, + backbone_trainable=model_config.backbone_trainable) + + logging.info(model.get_config()) + + return model + + def initialize(self, model: tf.keras.Model): + """Loading pretrained checkpoint.""" + if not self.task_config.init_checkpoint: + return + + ckpt_dir_or_file = self.task_config.init_checkpoint + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + + # Restoring checkpoint. + if self.task_config.init_checkpoint_modules == 'all': + ckpt = tf.train.Checkpoint(**model.checkpoint_items) + status = ckpt.restore(ckpt_dir_or_file) + status.assert_consumed() + elif self.task_config.init_checkpoint_modules == 'backbone': + ckpt = tf.train.Checkpoint(backbone=model.backbone) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + else: + assert "Only 'all' or 'backbone' can be used to initialize the model." + + logging.info('Finished loading pretrained checkpoint from %s', + ckpt_dir_or_file) + + def build_inputs(self, params, input_context=None): + input_size = self.task_config.model.input_size + + if params.tfds_name: + decoder = simclr_input.TFDSDecoder(params.decoder.decode_label) + else: + decoder = simclr_input.Decoder(params.decoder.decode_label) + + parser = simclr_input.Parser( + output_size=input_size[:2], + aug_rand_crop=params.parser.aug_rand_crop, + aug_rand_hflip=params.parser.aug_rand_hflip, + aug_color_distort=params.parser.aug_color_distort, + aug_color_jitter_strength=params.parser.aug_color_jitter_strength, + aug_color_jitter_impl=params.parser.aug_color_jitter_impl, + aug_rand_blur=params.parser.aug_rand_blur, + parse_label=params.parser.parse_label, + test_crop=params.parser.test_crop, + mode=params.parser.mode, + dtype=params.dtype) + + reader = input_reader.InputReader( + params, + dataset_fn=tf.data.TFRecordDataset, + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn(params.is_training)) + + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, + labels, + model_outputs, + aux_losses=None) -> Dict[str, tf.Tensor]: + # Compute contrastive relative loss + con_losses_obj = contrastive_losses.ContrastiveLoss( + projection_norm=self.task_config.loss.projection_norm, + temperature=self.task_config.loss.temperature) + # The projection outputs from model has the size of + # (2 * bsz, project_dim) + projection_outputs = model_outputs[simclr_model.PROJECTION_OUTPUT_KEY] + projection1, projection2 = tf.split(projection_outputs, 2, 0) + contrast_loss, (contrast_logits, contrast_labels) = con_losses_obj( + projection1=projection1, + projection2=projection2) + + contrast_accuracy = tf.equal( + tf.argmax(contrast_labels, axis=1), tf.argmax(contrast_logits, axis=1)) + contrast_accuracy = tf.reduce_mean(tf.cast(contrast_accuracy, tf.float32)) + + contrast_prob = tf.nn.softmax(contrast_logits) + contrast_entropy = -tf.reduce_mean( + tf.reduce_sum(contrast_prob * tf.math.log(contrast_prob + 1e-8), -1)) + + model_loss = contrast_loss + + losses = { + 'contrast_loss': contrast_loss, + 'contrast_accuracy': contrast_accuracy, + 'contrast_entropy': contrast_entropy + } + + if self.task_config.model.supervised_head is not None: + outputs = model_outputs[simclr_model.SUPERVISED_OUTPUT_KEY] + labels = tf.concat([labels, labels], 0) + + if self.task_config.evaluation.one_hot: + sup_loss = tf.keras.losses.CategoricalCrossentropy( + from_logits=True, reduction=tf.keras.losses.Reduction.NONE)(labels, + outputs) + else: + sup_loss = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, reduction=tf.keras.losses.Reduction.NONE)(labels, + outputs) + sup_loss = tf.reduce_mean(sup_loss) + + label_acc = tf.equal(tf.argmax(labels, axis=1), + tf.argmax(outputs, axis=1)) + label_acc = tf.reduce_mean(tf.cast(label_acc, tf.float32)) + + model_loss = contrast_loss + sup_loss + + losses.update({ + 'accuracy': label_acc, + 'supervised_loss': sup_loss, + }) + + total_loss = model_loss + if aux_losses: + reg_loss = tf.reduce_sum(aux_losses) + total_loss = model_loss + reg_loss + + losses['total_loss'] = total_loss + + return losses + + def build_metrics(self, training=True): + + if training: + metrics = [] + metric_names = [ + 'total_loss', + 'contrast_loss', + 'contrast_accuracy', + 'contrast_entropy' + ] + if self.task_config.model.supervised_head: + metric_names.extend(['supervised_loss', 'accuracy']) + for name in metric_names: + metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32)) + else: + k = self.task_config.evaluation.top_k + if self.task_config.evaluation.one_hot: + metrics = [ + tf.keras.metrics.CategoricalAccuracy(name='accuracy'), + tf.keras.metrics.TopKCategoricalAccuracy( + k=k, name='top_{}_accuracy'.format(k))] + else: + metrics = [ + tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), + tf.keras.metrics.SparseTopKCategoricalAccuracy( + k=k, name='top_{}_accuracy'.format(k))] + return metrics + + def train_step(self, inputs, model, optimizer, metrics=None): + features, labels = inputs + if (self.task_config.model.supervised_head is not None + and self.task_config.evaluation.one_hot): + num_classes = self.task_config.model.supervised_head.num_classes + labels = tf.one_hot(labels, num_classes) + + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model(features, training=True) + # Casting output layer as float32 is necessary when mixed_precision is + # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. + outputs = tf.nest.map_structure( + lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + losses = self.build_losses( + model_outputs=outputs, labels=labels, aux_losses=model.losses) + + scaled_loss = losses['total_loss'] / num_replicas + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + logging.info('Trainable variables:') + for var in tvars: + logging.info(var.name) + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient when LossScaleOptimizer is used. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: losses['total_loss']} + + for m in metrics: + m.update_state(losses[m.name]) + logs.update({m.name: m.result()}) + + return logs + + def validation_step(self, inputs, model, metrics=None): + if self.task_config.model.supervised_head is None: + assert 'Skipping eval during pretraining without supervised head.' + + features, labels = inputs + if self.task_config.evaluation.one_hot: + num_classes = self.task_config.model.supervised_head.num_classes + labels = tf.one_hot(labels, num_classes) + + outputs = model( + features, training=False)[simclr_model.SUPERVISED_OUTPUT_KEY] + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + + logs = {self.loss: 0} + + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + + return logs + + +@task_factory.register_task_cls(exp_cfg.SimCLRFinetuneTask) +class SimCLRFinetuneTask(base_task.Task): + """A task for image classification.""" + + def create_optimizer(self, optimizer_config: OptimizationConfig, + runtime_config: Optional[RuntimeConfig] = None): + """Creates an TF optimizer from configurations. + + Args: + optimizer_config: the parameters of the Optimization settings. + runtime_config: the parameters of the runtime. + + Returns: + A tf.optimizers.Optimizer object. + """ + if (optimizer_config.optimizer.type == 'lars' + and self.task_config.loss.l2_weight_decay > 0.0): + raise ValueError('The l2_weight_decay cannot be used together with lars ' + 'optimizer. Please set it to 0.') + + opt_factory = optimization.OptimizerFactory(optimizer_config) + optimizer = opt_factory.build_optimizer(opt_factory.build_learning_rate()) + # Configuring optimizer when loss_scale is set in runtime config. This helps + # avoiding overflow/underflow for float16 computations. + if runtime_config and runtime_config.loss_scale: + optimizer = performance.configure_optimizer( + optimizer, + use_float16=runtime_config.mixed_precision_dtype == 'float16', + loss_scale=runtime_config.loss_scale) + + return optimizer + + def build_model(self): + model_config = self.task_config.model + input_specs = tf.keras.layers.InputSpec( + shape=[None] + model_config.input_size) + + l2_weight_decay = self.task_config.loss.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + backbone = backbones.factory.build_backbone( + input_specs=input_specs, + model_config=model_config, + l2_regularizer=l2_regularizer) + + norm_activation_config = model_config.norm_activation + projection_head_config = model_config.projection_head + projection_head = simclr_head.ProjectionHead( + proj_output_dim=projection_head_config.proj_output_dim, + num_proj_layers=projection_head_config.num_proj_layers, + ft_proj_idx=projection_head_config.ft_proj_idx, + kernel_regularizer=l2_regularizer, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon) + + supervised_head_config = model_config.supervised_head + if supervised_head_config.zero_init: + s_kernel_initializer = 'zeros' + else: + s_kernel_initializer = 'random_uniform' + supervised_head = simclr_head.ClassificationHead( + num_classes=supervised_head_config.num_classes, + kernel_initializer=s_kernel_initializer, + kernel_regularizer=l2_regularizer) + + model = simclr_model.SimCLRModel( + input_specs=input_specs, + backbone=backbone, + projection_head=projection_head, + supervised_head=supervised_head, + mode=model_config.mode, + backbone_trainable=model_config.backbone_trainable) + + logging.info(model.get_config()) + + return model + + def initialize(self, model: tf.keras.Model): + """Loading pretrained checkpoint.""" + if not self.task_config.init_checkpoint: + return + + ckpt_dir_or_file = self.task_config.init_checkpoint + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + + # Restoring checkpoint. + if self.task_config.init_checkpoint_modules == 'all': + ckpt = tf.train.Checkpoint(**model.checkpoint_items) + status = ckpt.restore(ckpt_dir_or_file) + status.assert_consumed() + elif self.task_config.init_checkpoint_modules == 'backbone_projection': + ckpt = tf.train.Checkpoint(backbone=model.backbone, + projection_head=model.projection_head) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + elif self.task_config.init_checkpoint_modules == 'backbone': + ckpt = tf.train.Checkpoint(backbone=model.backbone) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + else: + assert "Only 'all' or 'backbone' can be used to initialize the model." + + # If the checkpoint is from pretraining, reset the following parameters + model.backbone_trainable = self.task_config.model.backbone_trainable + logging.info('Finished loading pretrained checkpoint from %s', + ckpt_dir_or_file) + + def build_inputs(self, params, input_context=None): + input_size = self.task_config.model.input_size + + if params.tfds_name: + decoder = simclr_input.TFDSDecoder(params.decoder.decode_label) + else: + decoder = simclr_input.Decoder(params.decoder.decode_label) + parser = simclr_input.Parser( + output_size=input_size[:2], + parse_label=params.parser.parse_label, + test_crop=params.parser.test_crop, + mode=params.parser.mode, + dtype=params.dtype) + + reader = input_reader.InputReader( + params, + dataset_fn=tf.data.TFRecordDataset, + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn(params.is_training)) + + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, labels, model_outputs, aux_losses=None): + """Sparse categorical cross entropy loss. + + Args: + labels: labels. + model_outputs: Output logits of the classifier. + aux_losses: auxiliarly loss tensors, i.e. `losses` in keras.Model. + + Returns: + The total loss tensor. + """ + losses_config = self.task_config.loss + if losses_config.one_hot: + total_loss = tf.keras.losses.categorical_crossentropy( + labels, + model_outputs, + from_logits=True, + label_smoothing=losses_config.label_smoothing) + else: + total_loss = tf.keras.losses.sparse_categorical_crossentropy( + labels, model_outputs, from_logits=True) + + total_loss = tf_utils.safe_mean(total_loss) + if aux_losses: + total_loss += tf.add_n(aux_losses) + + return total_loss + + def build_metrics(self, training=True): + """Gets streaming metrics for training/validation.""" + k = self.task_config.evaluation.top_k + if self.task_config.evaluation.one_hot: + metrics = [ + tf.keras.metrics.CategoricalAccuracy(name='accuracy'), + tf.keras.metrics.TopKCategoricalAccuracy( + k=k, name='top_{}_accuracy'.format(k))] + else: + metrics = [ + tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), + tf.keras.metrics.SparseTopKCategoricalAccuracy( + k=k, name='top_{}_accuracy'.format(k))] + return metrics + + def train_step(self, inputs, model, optimizer, metrics=None): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + if self.task_config.loss.one_hot: + num_classes = self.task_config.model.supervised_head.num_classes + labels = tf.one_hot(labels, num_classes) + + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model( + features, training=True)[simclr_model.SUPERVISED_OUTPUT_KEY] + # Casting output layer as float32 is necessary when mixed_precision is + # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + loss = self.build_losses( + model_outputs=outputs, + labels=labels, aux_losses=model.losses) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance( + optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + logging.info('Trainable variables:') + for var in tvars: + logging.info(var.name) + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient before apply_gradients when LossScaleOptimizer is + # used. + if isinstance( + optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: loss} + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + return logs + + def validation_step(self, inputs, model, metrics=None): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. + model: the keras.Model. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + if self.task_config.loss.one_hot: + num_classes = self.task_config.model.supervised_head.num_classes + labels = tf.one_hot(labels, num_classes) + + outputs = self.inference_step( + features, model)[simclr_model.SUPERVISED_OUTPUT_KEY] + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + loss = self.build_losses(model_outputs=outputs, + labels=labels, aux_losses=model.losses) + + logs = {self.loss: loss} + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + return logs diff --git a/official/vision/beta/projects/simclr/train.py b/official/vision/beta/projects/simclr/train.py new file mode 100644 index 0000000000000000000000000000000000000000..ca7c92262db6f49365b15f3b985109487a7a8065 --- /dev/null +++ b/official/vision/beta/projects/simclr/train.py @@ -0,0 +1,81 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow Model Garden Vision SimCLR training driver.""" +from absl import app +from absl import flags +import gin + +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance +from official.vision.beta.projects.simclr.common import registry_imports # pylint: disable=unused-import + +FLAGS = flags.FLAGS + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + print(FLAGS.experiment) + params = train_utils.parse_configuration(FLAGS) + + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/vision/beta/projects/yolo/README.md b/official/vision/beta/projects/yolo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0a1e27fbe9010f08d1a94361792d84335a97e235 --- /dev/null +++ b/official/vision/beta/projects/yolo/README.md @@ -0,0 +1,76 @@ +# YOLO Object Detectors, You Only Look Once + +[![Paper](http://img.shields.io/badge/Paper-arXiv.1804.02767-B3181B?logo=arXiv)](https://arxiv.org/abs/1804.02767) +[![Paper](http://img.shields.io/badge/Paper-arXiv.2004.10934-B3181B?logo=arXiv)](https://arxiv.org/abs/2004.10934) + +This repository is the unofficial implementation of the following papers. +However, we spent painstaking hours ensuring that every aspect that we +constructed was the exact same as the original paper and the original +repository. + +* YOLOv3: An Incremental Improvement: [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767) + +* YOLOv4: Optimal Speed and Accuracy of Object Detection: [YOLOv4: Optimal Speed and Accuracy of Object Detection](https://arxiv.org/abs/2004.10934) + +## Description + +Yolo v1 the original implementation was released in 2015 providing a ground +breaking algorithm that would quickly process images, and locate objects in a +single pass through the detector. The original implementation based used a +backbone derived from state of the art object classifier of the time, like +[GoogLeNet](https://arxiv.org/abs/1409.4842) and +[VGG](https://arxiv.org/abs/1409.1556). More attention was given to the novel +Yolo Detection head that allowed for Object Detection with a single pass of an +image. Though limited, the network could predict up to 90 bounding boxes per +image, and was tested for about 80 classes per box. Also, the model could only +make prediction at one scale. These attributes caused yolo v1 to be more +limited, and less versatile, so as the year passed, the Developers continued to +update and develop this model. + +Yolo v3 and v4 serve as the most up to date and capable versions of the Yolo +network group. These model uses a custom backbone called Darknet53 that uses +knowledge gained from the ResNet paper to improve its predictions. The new +backbone also allows for objects to be detected at multiple scales. As for the +new detection head, the model now predicts the bounding boxes using a set of +anchor box priors (Anchor Boxes) as suggestions. The multiscale predictions in +combination with the Anchor boxes allows for the network to make up to 1000 +object predictions on a single image. Finally, the new loss function forces the +network to make better prediction by using Intersection Over Union (IOU) to +inform the model's confidence rather than relying on the mean squared error for +the entire output. + +## Authors + +* Vishnu Samardh Banna ([@GitHub vishnubanna](https://github.com/vishnubanna)) +* Anirudh Vegesana ([@GitHub anivegesana](https://github.com/anivegesana)) +* Akhil Chinnakotla ([@GitHub The-Indian-Chinna](https://github.com/The-Indian-Chinna)) +* Tristan Yan ([@GitHub Tyan3001](https://github.com/Tyan3001)) +* Naveen Vivek ([@GitHub naveen-vivek](https://github.com/naveen-vivek)) + +## Table of Contents + +* [Our Goal](#our-goal) +* [Models in the library](#models-in-the-library) +* [References](#references) + + +## Our Goal + +Our goal with this model conversion is to provide implementations of the +Backbone and Yolo Head. We have built the model in such a way that the Yolo +head could be connected to a new, more powerful backbone if a person chose to. + +## Models in the library + +| Object Detectors | Classifiers | +| :--------------: | :--------------: | +| Yolo-v3 | Darknet53 | +| Yolo-v3 tiny | CSPDarknet53 | +| Yolo-v3 spp | +| Yolo-v4 | +| Yolo-v4 tiny | + +## Requirements + +[![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0) +[![Python 3.8](https://img.shields.io/badge/Python-3.8-3776AB)](https://www.python.org/downloads/release/python-380/) diff --git a/official/vision/beta/projects/yolo/common/registry_imports.py b/official/vision/beta/projects/yolo/common/registry_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..4ee795b8cc964514d3a05e6f48ad3186b45f7b22 --- /dev/null +++ b/official/vision/beta/projects/yolo/common/registry_imports.py @@ -0,0 +1,21 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""All necessary imports for registration.""" + +# pylint: disable=unused-import +from official.common import registry_imports +from official.vision.beta.projects.yolo.configs import darknet_classification +from official.vision.beta.projects.yolo.modeling.backbones import darknet +from official.vision.beta.projects.yolo.tasks import image_classification diff --git a/official/vision/beta/projects/yolo/configs/backbones.py b/official/vision/beta/projects/yolo/configs/backbones.py new file mode 100644 index 0000000000000000000000000000000000000000..a79cb09e17e6e7e633e652dd4965866b3d0e80b8 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/backbones.py @@ -0,0 +1,34 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +"""Backbones configurations.""" + +import dataclasses + +from official.modeling import hyperparams + +from official.vision.beta.configs import backbones + + +@dataclasses.dataclass +class DarkNet(hyperparams.Config): + """DarkNet config.""" + model_id: str = "darknet53" + + +@dataclasses.dataclass +class Backbone(backbones.Backbone): + darknet: DarkNet = DarkNet() diff --git a/official/vision/beta/projects/yolo/configs/darknet_classification.py b/official/vision/beta/projects/yolo/configs/darknet_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..b33e149d484f850aa9fcada5f94612eca8bf754d --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/darknet_classification.py @@ -0,0 +1,70 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Image classification with darknet configs.""" + +from typing import List, Optional + +import dataclasses + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.vision.beta.configs import common +from official.vision.beta.configs import image_classification as imc +from official.vision.beta.projects.yolo.configs import backbones + + +@dataclasses.dataclass +class ImageClassificationModel(hyperparams.Config): + num_classes: int = 0 + input_size: List[int] = dataclasses.field(default_factory=list) + backbone: backbones.Backbone = backbones.Backbone( + type='darknet', resnet=backbones.DarkNet()) + dropout_rate: float = 0.0 + norm_activation: common.NormActivation = common.NormActivation() + # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification + add_head_batch_norm: bool = False + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + one_hot: bool = True + label_smoothing: float = 0.0 + l2_weight_decay: float = 0.0 + + +@dataclasses.dataclass +class ImageClassificationTask(cfg.TaskConfig): + """The model config.""" + model: ImageClassificationModel = ImageClassificationModel() + train_data: imc.DataConfig = imc.DataConfig(is_training=True) + validation_data: imc.DataConfig = imc.DataConfig(is_training=False) + evaluation: imc.Evaluation = imc.Evaluation() + losses: Losses = Losses() + gradient_clip_norm: float = 0.0 + logging_dir: Optional[str] = None + + +@exp_factory.register_config_factory('darknet_classification') +def image_classification() -> cfg.ExperimentConfig: + """Image classification general.""" + return cfg.ExperimentConfig( + task=ImageClassificationTask(), + trainer=cfg.TrainerConfig(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None' + ]) diff --git a/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53.yaml b/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10dbdc5685583475106e4d9af37a4cabd59429b0 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53.yaml @@ -0,0 +1,51 @@ +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float32' +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'cspdarknet53' + norm_activation: + activation: 'mish' + losses: + l2_weight_decay: 0.0005 + one_hot: true + label_smoothing: 0.1 + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 128 + dtype: 'float16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: true + global_batch_size: 128 + dtype: 'float16' + drop_remainder: false +trainer: + train_steps: 1200000 # epochs: 120 + validation_steps: 400 # size of validation data + validation_interval: 10000 + steps_per_loop: 10000 + summary_interval: 10000 + checkpoint_interval: 10000 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'polynomial' + polynomial: + initial_learning_rate: 0.1 + end_learning_rate: 0.0001 + power: 4.0 + decay_steps: 1200000 + warmup: + type: 'linear' + linear: + warmup_steps: 1000 # learning rate rises from 0 to 0.1 over 1000 steps diff --git a/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml b/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b27ff015708a5152e559775a485764cafe01c5cf --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml @@ -0,0 +1,58 @@ +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float16' + loss_scale: 'dynamic' + num_gpus: 2 +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'cspdarknet53' + norm_activation: + activation: 'mish' + losses: + l2_weight_decay: 0.0005 + one_hot: true + train_data: + tfds_name: 'imagenet2012' + tfds_split: 'train' + tfds_data_dir: '~/tensorflow_datasets/' + is_training: true + global_batch_size: 16 # default = 128 + dtype: 'float16' + shuffle_buffer_size: 100 + validation_data: + tfds_name: 'imagenet2012' + tfds_split: 'validation' + tfds_data_dir: '~/tensorflow_datasets/' + is_training: true + global_batch_size: 16 # default = 128 + dtype: 'float16' + drop_remainder: false + shuffle_buffer_size: 100 +trainer: + train_steps: 9600000 # epochs: 120, 1200000 * 128/batchsize + validation_steps: 3200 # size of validation data, 400 * 128/batchsize + validation_interval: 10000 # 10000 + steps_per_loop: 10000 + summary_interval: 10000 + checkpoint_interval: 10000 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'polynomial' + polynomial: + initial_learning_rate: 0.0125 # 0.1 * batchsize/128, default = 0.1 + end_learning_rate: 0.0000125 # 0.0001 * batchsize/128, default = 0.0001 + power: 4.0 + decay_steps: 9592000 # 790000 * 128/batchsize, default = 800000 - 1000 = 799000 + warmup: + type: 'linear' + linear: + warmup_steps: 8000 # 0 to 0.1 over 1000 * 128/batchsize, default = 128 diff --git a/official/vision/beta/projects/yolo/configs/experiments/darknet53.yaml b/official/vision/beta/projects/yolo/configs/experiments/darknet53.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3333b599e305a5ba1c2001fa02db4a04ac66ef9 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/darknet53.yaml @@ -0,0 +1,50 @@ +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float32' +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'darknet53' + norm_activation: + activation: 'mish' + losses: + l2_weight_decay: 0.0005 + one_hot: true + train_data: + input_path: 'imagenet-2012-tfrecord/train*' + is_training: true + global_batch_size: 128 + dtype: 'float16' + validation_data: + input_path: 'imagenet-2012-tfrecord/valid*' + is_training: true + global_batch_size: 128 + dtype: 'float16' + drop_remainder: false +trainer: + train_steps: 800000 # epochs: 80 + validation_steps: 400 # size of validation data + validation_interval: 10000 + steps_per_loop: 10000 + summary_interval: 10000 + checkpoint_interval: 10000 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'polynomial' + polynomial: + initial_learning_rate: 0.1 + end_learning_rate: 0.0001 + power: 4.0 + decay_steps: 800000 + warmup: + type: 'linear' + linear: + warmup_steps: 1000 # learning rate rises from 0 to 0.1 over 1000 steps diff --git a/official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml b/official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f9fb2dfc6b32804f27560bb8cba68e325723435 --- /dev/null +++ b/official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml @@ -0,0 +1,58 @@ +runtime: + distribution_strategy: 'mirrored' + mixed_precision_dtype: 'float16' + loss_scale: 'dynamic' + num_gpus: 2 +task: + model: + num_classes: 1001 + input_size: [256, 256, 3] + backbone: + type: 'darknet' + darknet: + model_id: 'darknet53' + norm_activation: + activation: 'mish' + losses: + l2_weight_decay: 0.0005 + one_hot: true + train_data: + tfds_name: 'imagenet2012' + tfds_split: 'train' + tfds_data_dir: '~/tensorflow_datasets/' + is_training: true + global_batch_size: 16 # default = 128 + dtype: 'float16' + shuffle_buffer_size: 100 + validation_data: + tfds_name: 'imagenet2012' + tfds_split: 'validation' + tfds_data_dir: '~/tensorflow_datasets/' + is_training: true + global_batch_size: 16 # default = 128 + dtype: 'float16' + drop_remainder: false + shuffle_buffer_size: 100 +trainer: + train_steps: 6400000 # epochs: 80, 800000 * 128/batchsize + validation_steps: 3200 # size of validation data, 400 * 128/batchsize + validation_interval: 10000 # 10000 + steps_per_loop: 10000 + summary_interval: 10000 + checkpoint_interval: 10000 + optimizer_config: + optimizer: + type: 'sgd' + sgd: + momentum: 0.9 + learning_rate: + type: 'polynomial' + polynomial: + initial_learning_rate: 0.0125 # 0.1 * batchsize/128, default = 0.1 + end_learning_rate: 0.0000125 # 0.0001 * batchsize/128, default = 0.0001 + power: 4.0 + decay_steps: 6392000 # 790000 * 128/batchsize, default = 800000 - 1000 = 799000 + warmup: + type: 'linear' + linear: + warmup_steps: 8000 # 0 to 0.1 over 1000 * 128/batchsize, default = 128 diff --git a/official/vision/beta/projects/yolo/dataloaders/__init__.py b/official/vision/beta/projects/yolo/dataloaders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a25710c222e3327cb20e000db5df5c5651c4a2cc --- /dev/null +++ b/official/vision/beta/projects/yolo/dataloaders/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/official/vision/beta/projects/yolo/dataloaders/classification_tfds_decoder.py b/official/vision/beta/projects/yolo/dataloaders/classification_tfds_decoder.py new file mode 100644 index 0000000000000000000000000000000000000000..0abe984fd6d92ecb8a117db4301128ebb7701ed7 --- /dev/null +++ b/official/vision/beta/projects/yolo/dataloaders/classification_tfds_decoder.py @@ -0,0 +1,34 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFDS Classification decoder.""" + +import tensorflow as tf +from official.vision.beta.dataloaders import decoder + + +class Decoder(decoder.Decoder): + """A tf.Example decoder for classification task.""" + + def __init__(self): + return + + def decode(self, serialized_example): + sample_dict = { + 'image/encoded': + tf.io.encode_jpeg(serialized_example['image'], quality=100), + 'image/class/label': + serialized_example['label'], + } + return sample_dict diff --git a/official/vision/beta/projects/yolo/dataloaders/yolo_detection_input.py b/official/vision/beta/projects/yolo/dataloaders/yolo_detection_input.py new file mode 100644 index 0000000000000000000000000000000000000000..c3bc171e725b5c38188a707badeb77ce8270d145 --- /dev/null +++ b/official/vision/beta/projects/yolo/dataloaders/yolo_detection_input.py @@ -0,0 +1,319 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Detection Data parser and processing for YOLO. + +Parse image and ground truths in a dataset to training targets and package them +into (image, labels) tuple for RetinaNet. +""" + +import tensorflow as tf + +from official.vision.beta.dataloaders import parser +from official.vision.beta.ops import box_ops +from official.vision.beta.ops import preprocess_ops +from official.vision.beta.projects.yolo.ops import box_ops as yolo_box_ops +from official.vision.beta.projects.yolo.ops import preprocess_ops as yolo_preprocess_ops + + +class Parser(parser.Parser): + """Parser to parse an image and its annotations into a dictionary of tensors.""" + + def __init__(self, + output_size, + num_classes, + fixed_size=True, + jitter_im=0.1, + jitter_boxes=0.005, + use_tie_breaker=True, + min_level=3, + max_level=5, + masks=None, + max_process_size=608, + min_process_size=320, + max_num_instances=200, + random_flip=True, + aug_rand_saturation=True, + aug_rand_brightness=True, + aug_rand_zoom=True, + aug_rand_hue=True, + anchors=None, + seed=10, + dtype=tf.float32): + """Initializes parameters for parsing annotations in the dataset. + + Args: + output_size: a `Tuple` for (width, height) of input image. + num_classes: a `Tensor` or `int` for the number of classes. + fixed_size: a `bool` if True all output images have the same size. + jitter_im: a `float` representing a pixel value that is the maximum jitter + applied to the image for data augmentation during training. + jitter_boxes: a `float` representing a pixel value that is the maximum + jitter applied to the bounding box for data augmentation during + training. + use_tie_breaker: boolean value for wether or not to use the tie_breaker. + min_level: `int` number of minimum level of the output feature pyramid. + max_level: `int` number of maximum level of the output feature pyramid. + masks: a `Tensor`, `List` or `numpy.ndarray` for anchor masks. + max_process_size: an `int` for maximum image width and height. + min_process_size: an `int` for minimum image width and height , + max_num_instances: an `int` number of maximum number of instances in an + image. + random_flip: a `bool` if True, augment training with random horizontal + flip. + aug_rand_saturation: `bool`, if True, augment training with random + saturation. + aug_rand_brightness: `bool`, if True, augment training with random + brightness. + aug_rand_zoom: `bool`, if True, augment training with random zoom. + aug_rand_hue: `bool`, if True, augment training with random hue. + anchors: a `Tensor`, `List` or `numpy.ndarrray` for bounding box priors. + seed: an `int` for the seed used by tf.random + dtype: a `tf.dtypes.DType` object that represents the dtype the outputs + will be casted to. The available types are tf.float32, tf.float16, or + tf.bfloat16. + """ + self._net_down_scale = 2**max_level + + self._num_classes = num_classes + self._image_w = (output_size[0] // + self._net_down_scale) * self._net_down_scale + self._image_h = (output_size[1] // + self._net_down_scale) * self._net_down_scale + + self._max_process_size = max_process_size + self._min_process_size = min_process_size + self._fixed_size = fixed_size + + self._anchors = anchors + self._masks = { + key: tf.convert_to_tensor(value) for key, value in masks.items() + } + self._use_tie_breaker = use_tie_breaker + + self._jitter_im = 0.0 if jitter_im is None else jitter_im + self._jitter_boxes = 0.0 if jitter_boxes is None else jitter_boxes + self._max_num_instances = max_num_instances + self._random_flip = random_flip + + self._aug_rand_saturation = aug_rand_saturation + self._aug_rand_brightness = aug_rand_brightness + self._aug_rand_zoom = aug_rand_zoom + self._aug_rand_hue = aug_rand_hue + + self._seed = seed + self._dtype = dtype + + def _build_grid(self, raw_true, width, batch=False, use_tie_breaker=False): + mask = self._masks + for key in self._masks.keys(): + if not batch: + mask[key] = yolo_preprocess_ops.build_grided_gt( + raw_true, self._masks[key], width // 2**int(key), + raw_true['bbox'].dtype, use_tie_breaker) + else: + mask[key] = yolo_preprocess_ops.build_batch_grided_gt( + raw_true, self._masks[key], width // 2**int(key), + raw_true['bbox'].dtype, use_tie_breaker) + return mask + + def _parse_train_data(self, data): + """Generates images and labels that are usable for model training. + + Args: + data: a dict of Tensors produced by the decoder. + Returns: + images: the image tensor. + labels: a dict of Tensors that contains labels. + """ + + shape = tf.shape(data['image']) + image = data['image'] / 255 + boxes = data['groundtruth_boxes'] + width = shape[0] + height = shape[1] + + image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio( + image, + boxes, + width=width, + height=height, + target_dim=self._max_process_size) + + image_shape = tf.shape(image)[:2] + + if self._random_flip: + image, boxes, _ = preprocess_ops.random_horizontal_flip( + image, boxes, seed=self._seed) + + randscale = self._image_w // self._net_down_scale + + if not self._fixed_size: + do_scale = tf.greater( + tf.random.uniform([], minval=0, maxval=1, seed=self._seed), 0.5) + if do_scale: + # This scales the image to a random multiple of net_down_scale + # between 320 to 608 + randscale = tf.random.uniform( + [], + minval=self._min_process_size // self._net_down_scale, + maxval=self._max_process_size // self._net_down_scale, + seed=self._seed, + dtype=tf.int32) * self._net_down_scale + + if self._jitter_boxes != 0.0: + boxes = box_ops.denormalize_boxes(boxes, image_shape) + boxes = box_ops.jitter_boxes(boxes, 0.025) + boxes = box_ops.normalize_boxes(boxes, image_shape) + + # YOLO loss function uses x-center, y-center format + boxes = yolo_box_ops.yxyx_to_xcycwh(boxes) + + if self._jitter_im != 0.0: + image, boxes = yolo_preprocess_ops.random_translate( + image, boxes, self._jitter_im, seed=self._seed) + + if self._aug_rand_zoom: + image, boxes = yolo_preprocess_ops.resize_crop_filter( + image, + boxes, + default_width=self._image_w, + default_height=self._image_h, + target_width=randscale, + target_height=randscale) + image = tf.image.resize(image, (416, 416), preserve_aspect_ratio=False) + + if self._aug_rand_brightness: + image = tf.image.random_brightness( + image=image, max_delta=.1) # Brightness + if self._aug_rand_saturation: + image = tf.image.random_saturation( + image=image, lower=0.75, upper=1.25) # Saturation + if self._aug_rand_hue: + image = tf.image.random_hue(image=image, max_delta=.3) # Hue + image = tf.clip_by_value(image, 0.0, 1.0) + # Find the best anchor for the ground truth labels to maximize the iou + best_anchors = yolo_preprocess_ops.get_best_anchor( + boxes, self._anchors, width=self._image_w, height=self._image_h) + + # Padding + boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes, + self._max_num_instances, 0) + classes = preprocess_ops.clip_or_pad_to_fixed_size( + data['groundtruth_classes'], self._max_num_instances, -1) + best_anchors = preprocess_ops.clip_or_pad_to_fixed_size( + best_anchors, self._max_num_instances, 0) + area = preprocess_ops.clip_or_pad_to_fixed_size(data['groundtruth_area'], + self._max_num_instances, 0) + is_crowd = preprocess_ops.clip_or_pad_to_fixed_size( + tf.cast(data['groundtruth_is_crowd'], tf.int32), + self._max_num_instances, 0) + + labels = { + 'source_id': data['source_id'], + 'bbox': tf.cast(boxes, self._dtype), + 'classes': tf.cast(classes, self._dtype), + 'area': tf.cast(area, self._dtype), + 'is_crowd': is_crowd, + 'best_anchors': tf.cast(best_anchors, self._dtype), + 'width': width, + 'height': height, + 'num_detections': tf.shape(data['groundtruth_classes'])[0], + } + + if self._fixed_size: + grid = self._build_grid( + labels, self._image_w, use_tie_breaker=self._use_tie_breaker) + labels.update({'grid_form': grid}) + + return image, labels + + def _parse_eval_data(self, data): + """Generates images and labels that are usable for model training. + + Args: + data: a dict of Tensors produced by the decoder. + Returns: + images: the image tensor. + labels: a dict of Tensors that contains labels. + """ + + shape = tf.shape(data['image']) + image = data['image'] / 255 + boxes = data['groundtruth_boxes'] + width = shape[0] + height = shape[1] + + image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio( + image, boxes, width=width, height=height, target_dim=self._image_w) + boxes = yolo_box_ops.yxyx_to_xcycwh(boxes) + + # Find the best anchor for the ground truth labels to maximize the iou + best_anchors = yolo_preprocess_ops.get_best_anchor( + boxes, self._anchors, width=self._image_w, height=self._image_h) + boxes = yolo_preprocess_ops.pad_max_instances(boxes, + self._max_num_instances, 0) + classes = yolo_preprocess_ops.pad_max_instances(data['groundtruth_classes'], + self._max_num_instances, 0) + best_anchors = yolo_preprocess_ops.pad_max_instances( + best_anchors, self._max_num_instances, 0) + area = yolo_preprocess_ops.pad_max_instances(data['groundtruth_area'], + self._max_num_instances, 0) + is_crowd = yolo_preprocess_ops.pad_max_instances( + tf.cast(data['groundtruth_is_crowd'], tf.int32), + self._max_num_instances, 0) + + labels = { + 'source_id': data['source_id'], + 'bbox': tf.cast(boxes, self._dtype), + 'classes': tf.cast(classes, self._dtype), + 'area': tf.cast(area, self._dtype), + 'is_crowd': is_crowd, + 'best_anchors': tf.cast(best_anchors, self._dtype), + 'width': width, + 'height': height, + 'num_detections': tf.shape(data['groundtruth_classes'])[0], + } + + grid = self._build_grid( + labels, + self._image_w, + batch=False, + use_tie_breaker=self._use_tie_breaker) + labels.update({'grid_form': grid}) + return image, labels + + def _postprocess_fn(self, image, label): + randscale = self._image_w // self._net_down_scale + if not self._fixed_size: + do_scale = tf.greater( + tf.random.uniform([], minval=0, maxval=1, seed=self._seed), 0.5) + if do_scale: + # This scales the image to a random multiple of net_down_scale + # between 320 to 608 + randscale = tf.random.uniform( + [], + minval=self._min_process_size // self._net_down_scale, + maxval=self._max_process_size // self._net_down_scale, + seed=self._seed, + dtype=tf.int32) * self._net_down_scale + width = randscale + image = tf.image.resize(image, (width, width)) + grid = self._build_grid( + label, width, batch=True, use_tie_breaker=self._use_tie_breaker) + label.update({'grid_form': grid}) + return image, label + + def postprocess_fn(self, is_training=True): + return self._postprocess_fn if not self._fixed_size and is_training else None diff --git a/official/vision/beta/projects/yolo/dataloaders/yolo_detection_input_test.py b/official/vision/beta/projects/yolo/dataloaders/yolo_detection_input_test.py new file mode 100644 index 0000000000000000000000000000000000000000..53be13d6fae831b1b925e765de6a5943deac8995 --- /dev/null +++ b/official/vision/beta/projects/yolo/dataloaders/yolo_detection_input_test.py @@ -0,0 +1,103 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test case for YOLO detection dataloader configuration definition.""" +from absl.testing import parameterized +import dataclasses +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import input_reader +from official.modeling import hyperparams +from official.vision.beta.dataloaders import tfds_detection_decoders +from official.vision.beta.projects.yolo.dataloaders import yolo_detection_input + + +@dataclasses.dataclass +class Parser(hyperparams.Config): + """Dummy configuration for parser.""" + output_size: int = (416, 416) + num_classes: int = 80 + fixed_size: bool = True + jitter_im: float = 0.1 + jitter_boxes: float = 0.005 + min_process_size: int = 320 + max_process_size: int = 608 + max_num_instances: int = 200 + random_flip: bool = True + seed: int = 10 + shuffle_buffer_size: int = 10000 + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """Input config for training.""" + input_path: str = '' + tfds_name: str = 'coco/2017' + tfds_split: str = 'train' + global_batch_size: int = 10 + is_training: bool = True + dtype: str = 'float16' + decoder = None + parser: Parser = Parser() + shuffle_buffer_size: int = 10 + + +class YoloDetectionInputTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('training', True), ('testing', False)) + def test_yolo_input(self, is_training): + params = DataConfig(is_training=is_training) + + decoder = tfds_detection_decoders.MSCOCODecoder() + anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0], + [133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0], + [348.0, 340.0]] + masks = {'3': [0, 1, 2], '4': [3, 4, 5], '5': [6, 7, 8]} + + parser = yolo_detection_input.Parser( + output_size=params.parser.output_size, + num_classes=params.parser.num_classes, + fixed_size=params.parser.fixed_size, + jitter_im=params.parser.jitter_im, + jitter_boxes=params.parser.jitter_boxes, + min_process_size=params.parser.min_process_size, + max_process_size=params.parser.max_process_size, + max_num_instances=params.parser.max_num_instances, + random_flip=params.parser.random_flip, + seed=params.parser.seed, + anchors=anchors, + masks=masks) + postprocess_fn = parser.postprocess_fn(is_training=is_training) + + reader = input_reader.InputReader(params, + dataset_fn=tf.data.TFRecordDataset, + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn( + params.is_training)) + dataset = reader.read(input_context=None).batch(10).take(1) + if postprocess_fn: + image, _ = postprocess_fn( + *tf.data.experimental.get_single_element(dataset)) + else: + image, _ = tf.data.experimental.get_single_element(dataset) + print(image.shape) + self.assertAllEqual(image.shape, (10, 10, 416, 416, 3)) + self.assertTrue( + tf.reduce_all(tf.math.logical_and(image >= 0, image <= 1))) + + +if __name__ == '__main__': + tf.test.main() + diff --git a/official/vision/beta/projects/yolo/modeling/backbones/darknet.py b/official/vision/beta/projects/yolo/modeling/backbones/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..10a614d80d5d81c21a5245ec91a87d98853d2a31 --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/backbones/darknet.py @@ -0,0 +1,445 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +"""Contains definitions of Darknet Backbone Networks. + + The models are inspired by ResNet, and CSPNet + +Residual networks (ResNets) were proposed in: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 + +Cross Stage Partial networks (CSPNets) were proposed in: +[1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang Chen, + Jun-Wei Hsieh + CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + arXiv:1911.11929 + + +DarkNets Are used mainly for Object detection in: +[1] Joseph Redmon, Ali Farhadi + YOLOv3: An Incremental Improvement. arXiv:1804.02767 + +[2] Alexey Bochkovskiy, Chien-Yao Wang, Hong-Yuan Mark Liao + YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv:2004.10934 +""" +import collections + +import tensorflow as tf + +from official.vision.beta.modeling.backbones import factory +from official.vision.beta.projects.yolo.modeling.layers import nn_blocks + + +class BlockConfig(object): + """Get layer config to make code more readable. + + Args: + layer: string layer name + stack: the type of layer ordering to use for this specific level + repetitions: integer for the number of times to repeat block + bottelneck: boolean for does this stack have a bottle neck layer + filters: integer for the output depth of the level + pool_size: integer the pool_size of max pool layers + kernel_size: optional integer, for convolution kernel size + strides: integer or tuple to indicate convolution strides + padding: the padding to apply to layers in this stack + activation: string for the activation to use for this stack + route: integer for what level to route from to get the next input + output_name: the name to use for this output + is_output: is this layer an output in the default model + """ + + def __init__(self, layer, stack, reps, bottleneck, filters, pool_size, + kernel_size, strides, padding, activation, route, output_name, + is_output): + self.layer = layer + self.stack = stack + self.repetitions = reps + self.bottleneck = bottleneck + self.filters = filters + self.kernel_size = kernel_size + self.pool_size = pool_size + self.strides = strides + self.padding = padding + self.activation = activation + self.route = route + self.output_name = output_name + self.is_output = is_output + + +def build_block_specs(config): + specs = [] + for layer in config: + specs.append(BlockConfig(*layer)) + return specs + + +class LayerFactory(object): + """Class for quick look up of default layers. + + Used by darknet to connect, introduce or exit a level. Used in place of an if + condition or switch to make adding new layers easier and to reduce redundant + code. + """ + + def __init__(self): + self._layer_dict = { + "ConvBN": (nn_blocks.ConvBN, self.conv_bn_config_todict), + "MaxPool": (tf.keras.layers.MaxPool2D, self.maxpool_config_todict) + } + + def conv_bn_config_todict(self, config, kwargs): + dictvals = { + "filters": config.filters, + "kernel_size": config.kernel_size, + "strides": config.strides, + "padding": config.padding + } + dictvals.update(kwargs) + return dictvals + + def darktiny_config_todict(self, config, kwargs): + dictvals = {"filters": config.filters, "strides": config.strides} + dictvals.update(kwargs) + return dictvals + + def maxpool_config_todict(self, config, kwargs): + return { + "pool_size": config.pool_size, + "strides": config.strides, + "padding": config.padding, + "name": kwargs["name"] + } + + def __call__(self, config, kwargs): + layer, get_param_dict = self._layer_dict[config.layer] + param_dict = get_param_dict(config, kwargs) + return layer(**param_dict) + + +# model configs +LISTNAMES = [ + "default_layer_name", "level_type", "number_of_layers_in_level", + "bottleneck", "filters", "kernal_size", "pool_size", "strides", "padding", + "default_activation", "route", "level/name", "is_output" +] + +# pylint: disable=line-too-long +CSPDARKNET53 = { + "list_names": LISTNAMES, + "splits": {"backbone_split": 106, + "neck_split": 138}, + "backbone": [ + ["ConvBN", None, 1, False, 32, None, 3, 1, "same", "mish", -1, 0, False], + ["DarkRes", "csp", 1, True, 64, None, None, None, None, "mish", -1, 1, False], + ["DarkRes", "csp", 2, False, 128, None, None, None, None, "mish", -1, 2, False], + ["DarkRes", "csp", 8, False, 256, None, None, None, None, "mish", -1, 3, True], + ["DarkRes", "csp", 8, False, 512, None, None, None, None, "mish", -1, 4, True], + ["DarkRes", "csp", 4, False, 1024, None, None, None, None, "mish", -1, 5, True], + ] +} + +DARKNET53 = { + "list_names": LISTNAMES, + "splits": {"backbone_split": 76}, + "backbone": [ + ["ConvBN", None, 1, False, 32, None, 3, 1, "same", "leaky", -1, 0, False], + ["DarkRes", "residual", 1, True, 64, None, None, None, None, "leaky", -1, 1, False], + ["DarkRes", "residual", 2, False, 128, None, None, None, None, "leaky", -1, 2, False], + ["DarkRes", "residual", 8, False, 256, None, None, None, None, "leaky", -1, 3, True], + ["DarkRes", "residual", 8, False, 512, None, None, None, None, "leaky", -1, 4, True], + ["DarkRes", "residual", 4, False, 1024, None, None, None, None, "leaky", -1, 5, True], + ] +} + +CSPDARKNETTINY = { + "list_names": LISTNAMES, + "splits": {"backbone_split": 28}, + "backbone": [ + ["ConvBN", None, 1, False, 32, None, 3, 2, "same", "leaky", -1, 0, False], + ["ConvBN", None, 1, False, 64, None, 3, 2, "same", "leaky", -1, 1, False], + ["CSPTiny", "csp_tiny", 1, False, 64, None, 3, 2, "same", "leaky", -1, 2, False], + ["CSPTiny", "csp_tiny", 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False], + ["CSPTiny", "csp_tiny", 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True], + ["ConvBN", None, 1, False, 512, None, 3, 1, "same", "leaky", -1, 5, True], + ] +} + +DARKNETTINY = { + "list_names": LISTNAMES, + "splits": {"backbone_split": 14}, + "backbone": [ + ["ConvBN", None, 1, False, 16, None, 3, 1, "same", "leaky", -1, 0, False], + ["DarkTiny", "tiny", 1, True, 32, None, 3, 2, "same", "leaky", -1, 1, False], + ["DarkTiny", "tiny", 1, True, 64, None, 3, 2, "same", "leaky", -1, 2, False], + ["DarkTiny", "tiny", 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False], + ["DarkTiny", "tiny", 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True], + ["DarkTiny", "tiny", 1, False, 512, None, 3, 2, "same", "leaky", -1, 5, False], + ["DarkTiny", "tiny", 1, False, 1024, None, 3, 1, "same", "leaky", -1, 5, True], + ] +} +# pylint: enable=line-too-long + +BACKBONES = { + "darknettiny": DARKNETTINY, + "darknet53": DARKNET53, + "cspdarknet53": CSPDARKNET53, + "cspdarknettiny": CSPDARKNETTINY +} + + +@tf.keras.utils.register_keras_serializable(package="yolo") +class Darknet(tf.keras.Model): + """Darknet backbone.""" + + def __init__( + self, + model_id="darknet53", + input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), + min_level=None, + max_level=5, + activation=None, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer="glorot_uniform", + kernel_regularizer=None, + bias_regularizer=None, + **kwargs): + + layer_specs, splits = Darknet.get_model_config(model_id) + + self._model_name = model_id + self._splits = splits + self._input_shape = input_specs + self._registry = LayerFactory() + + # default layer look up + self._min_size = min_level + self._max_size = max_level + self._output_specs = None + + self._kernel_initializer = kernel_initializer + self._bias_regularizer = bias_regularizer + self._norm_momentum = norm_momentum + self._norm_epislon = norm_epsilon + self._use_sync_bn = use_sync_bn + self._activation = activation + self._kernel_regularizer = kernel_regularizer + + self._default_dict = { + "kernel_initializer": self._kernel_initializer, + "kernel_regularizer": self._kernel_regularizer, + "bias_regularizer": self._bias_regularizer, + "norm_momentum": self._norm_momentum, + "norm_epsilon": self._norm_epislon, + "use_sync_bn": self._use_sync_bn, + "activation": self._activation, + "name": None + } + + inputs = tf.keras.layers.Input(shape=self._input_shape.shape[1:]) + output = self._build_struct(layer_specs, inputs) + super().__init__(inputs=inputs, outputs=output, name=self._model_name) + + @property + def input_specs(self): + return self._input_shape + + @property + def output_specs(self): + return self._output_specs + + @property + def splits(self): + return self._splits + + def _build_struct(self, net, inputs): + endpoints = collections.OrderedDict() + stack_outputs = [inputs] + for i, config in enumerate(net): + if config.stack is None: + x = self._build_block(stack_outputs[config.route], + config, + name=f"{config.layer}_{i}") + stack_outputs.append(x) + elif config.stack == "residual": + x = self._residual_stack(stack_outputs[config.route], + config, + name=f"{config.layer}_{i}") + stack_outputs.append(x) + elif config.stack == "csp": + x = self._csp_stack(stack_outputs[config.route], + config, + name=f"{config.layer}_{i}") + stack_outputs.append(x) + elif config.stack == "csp_tiny": + x_pass, x = self._csp_tiny_stack(stack_outputs[config.route], + config, name=f"{config.layer}_{i}") + stack_outputs.append(x_pass) + elif config.stack == "tiny": + x = self._tiny_stack(stack_outputs[config.route], + config, + name=f"{config.layer}_{i}") + stack_outputs.append(x) + if (config.is_output and self._min_size is None): + endpoints[str(config.output_name)] = x + elif self._min_size is not None and config.output_name >= self._min_size and config.output_name <= self._max_size: + endpoints[str(config.output_name)] = x + + self._output_specs = {l: endpoints[l].get_shape() for l in endpoints.keys()} + return endpoints + + def _get_activation(self, activation): + if self._activation is None: + return activation + else: + return self._activation + + def _csp_stack(self, inputs, config, name): + if config.bottleneck: + csp_filter_scale = 1 + residual_filter_scale = 2 + scale_filters = 1 + else: + csp_filter_scale = 2 + residual_filter_scale = 1 + scale_filters = 2 + self._default_dict["activation"] = self._get_activation(config.activation) + self._default_dict["name"] = f"{name}_csp_down" + x, x_route = nn_blocks.CSPRoute(filters=config.filters, + filter_scale=csp_filter_scale, + downsample=True, + **self._default_dict)(inputs) + for i in range(config.repetitions): + self._default_dict["name"] = f"{name}_{i}" + x = nn_blocks.DarkResidual(filters=config.filters // scale_filters, + filter_scale=residual_filter_scale, + **self._default_dict)(x) + + self._default_dict["name"] = f"{name}_csp_connect" + output = nn_blocks.CSPConnect(filters=config.filters, + filter_scale=csp_filter_scale, + **self._default_dict)([x, x_route]) + self._default_dict["activation"] = self._activation + self._default_dict["name"] = None + return output + + def _csp_tiny_stack(self, inputs, config, name): + self._default_dict["activation"] = self._get_activation(config.activation) + self._default_dict["name"] = f"{name}_csp_tiny" + x, x_route = nn_blocks.CSPTiny(filters=config.filters, + **self._default_dict)(inputs) + self._default_dict["activation"] = self._activation + self._default_dict["name"] = None + return x, x_route + + def _tiny_stack(self, inputs, config, name): + x = tf.keras.layers.MaxPool2D(pool_size=2, + strides=config.strides, + padding="same", + data_format=None, + name=f"{name}_tiny/pool")(inputs) + self._default_dict["activation"] = self._get_activation(config.activation) + self._default_dict["name"] = f"{name}_tiny/conv" + x = nn_blocks.ConvBN( + filters=config.filters, + kernel_size=(3, 3), + strides=(1, 1), + padding="same", + **self._default_dict)( + x) + self._default_dict["activation"] = self._activation + self._default_dict["name"] = None + return x + + def _residual_stack(self, inputs, config, name): + self._default_dict["activation"] = self._get_activation(config.activation) + self._default_dict["name"] = f"{name}_residual_down" + x = nn_blocks.DarkResidual(filters=config.filters, + downsample=True, + **self._default_dict)(inputs) + for i in range(config.repetitions - 1): + self._default_dict["name"] = f"{name}_{i}" + x = nn_blocks.DarkResidual(filters=config.filters, + **self._default_dict)(x) + self._default_dict["activation"] = self._activation + self._default_dict["name"] = None + return x + + def _build_block(self, inputs, config, name): + x = inputs + i = 0 + self._default_dict["activation"] = self._get_activation(config.activation) + while i < config.repetitions: + self._default_dict["name"] = f"{name}_{i}" + layer = self._registry(config, self._default_dict) + x = layer(x) + i += 1 + self._default_dict["activation"] = self._activation + self._default_dict["name"] = None + return x + + @staticmethod + def get_model_config(name): + name = name.lower() + backbone = BACKBONES[name]["backbone"] + splits = BACKBONES[name]["splits"] + return build_block_specs(backbone), splits + + @property + def model_id(self): + return self._model_name + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) + + def get_config(self): + layer_config = { + "model_id": self._model_name, + "min_level": self._min_size, + "max_level": self._max_size, + "kernel_initializer": self._kernel_initializer, + "kernel_regularizer": self._kernel_regularizer, + "bias_regularizer": self._bias_regularizer, + "norm_momentum": self._norm_momentum, + "norm_epsilon": self._norm_epislon, + "use_sync_bn": self._use_sync_bn, + "activation": self._activation + } + return layer_config + + +@factory.register_backbone_builder("darknet") +def build_darknet( + input_specs: tf.keras.layers.InputSpec, + model_config, + l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: + """Builds darknet backbone.""" + + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + model = Darknet( + model_id=backbone_cfg.model_id, + input_shape=input_specs, + activation=norm_activation_config.activation, + use_sync_bn=norm_activation_config.use_sync_bn, + norm_momentum=norm_activation_config.norm_momentum, + norm_epsilon=norm_activation_config.norm_epsilon, + kernel_regularizer=l2_regularizer) + return model diff --git a/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py b/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py new file mode 100644 index 0000000000000000000000000000000000000000..76c595f2dd7d05086e419ef2cc54a334b68de503 --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py @@ -0,0 +1,117 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for resnet.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.projects.yolo.modeling.backbones import darknet + + +class DarkNetTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + (224, "darknet53", 2, 1), + (224, "darknettiny", 1, 2), + (224, "cspdarknettiny", 1, 1), + (224, "cspdarknet53", 2, 1), + ) + def test_network_creation(self, input_size, model_id, + endpoint_filter_scale, scale_final): + """Test creation of ResNet family models.""" + tf.keras.backend.set_image_data_format("channels_last") + + network = darknet.Darknet(model_id=model_id, min_level=3, max_level=5) + self.assertEqual(network.model_id, model_id) + + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) + endpoints = network(inputs) + + self.assertAllEqual( + [1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale], + endpoints["3"].shape.as_list()) + self.assertAllEqual( + [1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale], + endpoints["4"].shape.as_list()) + self.assertAllEqual([ + 1, input_size / 2**5, input_size / 2**5, + 512 * endpoint_filter_scale * scale_final + ], endpoints["5"].shape.as_list()) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + use_sync_bn=[False, True], + )) + def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): + """Test for sync bn on TPU and GPU devices.""" + inputs = np.random.rand(1, 224, 224, 3) + + tf.keras.backend.set_image_data_format("channels_last") + + with strategy.scope(): + network = darknet.Darknet(model_id="darknet53", min_size=3, max_size=5) + _ = network(inputs) + + @parameterized.parameters(1, 3, 4) + def test_input_specs(self, input_dim): + """Test different input feature dimensions.""" + tf.keras.backend.set_image_data_format("channels_last") + + input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) + network = darknet.Darknet( + model_id="darknet53", min_level=3, max_level=5, input_specs=input_specs) + + inputs = tf.keras.Input(shape=(224, 224, input_dim), batch_size=1) + _ = network(inputs) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + kwargs = dict( + model_id="darknet53", + min_level=3, + max_level=5, + use_sync_bn=False, + activation="relu", + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer="VarianceScaling", + kernel_regularizer=None, + bias_regularizer=None, + ) + network = darknet.Darknet(**kwargs) + + expected_config = dict(kwargs) + self.assertEqual(network.get_config(), expected_config) + + # Create another network object from the first object's config. + new_network = darknet.Darknet.from_config(network.get_config()) + + # Validate that the config can be forced to JSON. + _ = new_network.to_json() + + # If the serialization was successful, the new config should match the old. + self.assertAllEqual(network.get_config(), new_network.get_config()) + + +if __name__ == "__main__": + tf.test.main() diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..8bc6a78078a1fb794e28ccf389ca7849b6ce48e3 --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py @@ -0,0 +1,821 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +"""Contains common building blocks for yolo neural networks.""" + +from typing import Callable, List +import tensorflow as tf +from official.modeling import tf_utils + + +@tf.keras.utils.register_keras_serializable(package="yolo") +class Identity(tf.keras.layers.Layer): + + def call(self, inputs): + return inputs + + +@tf.keras.utils.register_keras_serializable(package="yolo") +class ConvBN(tf.keras.layers.Layer): + """Modified Convolution layer to match that of the DarkNet Library. + + The Layer is a standards combination of Conv BatchNorm Activation, + however, the use of bias in the conv is determined by the use of batch norm. + + Cross Stage Partial networks (CSPNets) were proposed in: + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang + Chen, Jun-Wei Hsieh. + CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + arXiv:1911.11929 + """ + + def __init__(self, + filters=1, + kernel_size=(1, 1), + strides=(1, 1), + padding="same", + dilation_rate=(1, 1), + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + use_bn=True, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + activation="leaky", + leaky_alpha=0.1, + **kwargs): + """Initializes ConvBN layer. + + Args: + filters: integer for output depth, or the number of features to learn + kernel_size: integer or tuple for the shape of the weight matrix or kernel + to learn. + strides: integer of tuple how much to move the kernel after each kernel + use padding: string 'valid' or 'same', if same, then pad the image, else + do not. + padding: `str`, padding method for conv layers. + dilation_rate: tuple to indicate how much to modulate kernel weights and + how many pixels in a feature map to skip. + kernel_initializer: string to indicate which function to use to initialize + weights. + bias_initializer: string to indicate which function to use to initialize + bias. + kernel_regularizer: string to indicate which function to use to + regularizer weights. + bias_regularizer: string to indicate which function to use to regularizer + bias. + use_bn: boolean for whether to use batch normalization. + use_sync_bn: boolean for whether sync batch normalization. + norm_momentum: float for moment to use for batch normalization + norm_epsilon: float for batch normalization epsilon + activation: string or None for activation function to use in layer, + if None activation is replaced by linear. + leaky_alpha: float to use as alpha if activation function is leaky. + **kwargs: Keyword Arguments + """ + # convolution params + self._filters = filters + self._kernel_size = kernel_size + self._strides = strides + self._padding = padding + self._dilation_rate = dilation_rate + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + + # batch normalization params + self._use_bn = use_bn + self._use_sync_bn = use_sync_bn + self._norm_moment = norm_momentum + self._norm_epsilon = norm_epsilon + + if tf.keras.backend.image_data_format() == "channels_last": + # format: (batch_size, height, width, channels) + self._bn_axis = -1 + else: + # format: (batch_size, channels, width, height) + self._bn_axis = 1 + + # activation params + self._activation = activation + self._leaky_alpha = leaky_alpha + + super(ConvBN, self).__init__(**kwargs) + + def build(self, input_shape): + use_bias = not self._use_bn + + self.conv = tf.keras.layers.Conv2D( + filters=self._filters, + kernel_size=self._kernel_size, + strides=self._strides, + padding=self._padding, + dilation_rate=self._dilation_rate, + use_bias=use_bias, + kernel_initializer=self._kernel_initializer, + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + + if self._use_bn: + if self._use_sync_bn: + self.bn = tf.keras.layers.experimental.SyncBatchNormalization( + momentum=self._norm_moment, + epsilon=self._norm_epsilon, + axis=self._bn_axis) + else: + self.bn = tf.keras.layers.BatchNormalization( + momentum=self._norm_moment, + epsilon=self._norm_epsilon, + axis=self._bn_axis) + else: + self.bn = Identity() + + if self._activation == "leaky": + self._activation_fn = tf.keras.layers.LeakyReLU(alpha=self._leaky_alpha) + elif self._activation == "mish": + self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x)) + else: + self._activation_fn = tf_utils.get_activation(self._activation) + + def call(self, x): + x = self.conv(x) + x = self.bn(x) + x = self._activation_fn(x) + return x + + def get_config(self): + # used to store/share parameters to reconstruct the model + layer_config = { + "filters": self._filters, + "kernel_size": self._kernel_size, + "strides": self._strides, + "padding": self._padding, + "dilation_rate": self._dilation_rate, + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "bias_regularizer": self._bias_regularizer, + "kernel_regularizer": self._kernel_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_moment": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "activation": self._activation, + "leaky_alpha": self._leaky_alpha + } + layer_config.update(super(ConvBN, self).get_config()) + return layer_config + + def __repr__(self): + return repr(self.get_config()) + + +@tf.keras.utils.register_keras_serializable(package="yolo") +class DarkResidual(tf.keras.layers.Layer): + """DarkNet block with Residual connection for Yolo v3 Backbone. + """ + + def __init__(self, + filters=1, + filter_scale=2, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + use_bn=True, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + activation="leaky", + leaky_alpha=0.1, + sc_activation="linear", + downsample=False, + **kwargs): + """Initializes DarkResidual. + + Args: + filters: integer for output depth, or the number of features to learn. + filter_scale: `int`, scale factor for number of filters. + kernel_initializer: string to indicate which function to use to initialize + weights + bias_initializer: string to indicate which function to use to initialize + bias + kernel_regularizer: string to indicate which function to use to + regularizer weights + bias_regularizer: string to indicate which function to use to regularizer + bias + use_bn: boolean for whether to use batch normalization + use_sync_bn: boolean for whether sync batch normalization. + norm_momentum: float for moment to use for batch normalization + norm_epsilon: float for batch normalization epsilon + activation: string for activation function to use in conv layers. + leaky_alpha: float to use as alpha if activation function is leaky + sc_activation: string for activation function to use in layer + downsample: boolean for if image input is larger than layer output, set + downsample to True so the dimensions are forced to match + **kwargs: Keyword Arguments + """ + # downsample + self._downsample = downsample + + # ConvBN params + self._filters = filters + self._filter_scale = filter_scale + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._bias_regularizer = bias_regularizer + self._use_bn = use_bn + self._use_sync_bn = use_sync_bn + self._kernel_regularizer = kernel_regularizer + + # normal params + self._norm_moment = norm_momentum + self._norm_epsilon = norm_epsilon + + # activation params + self._conv_activation = activation + self._leaky_alpha = leaky_alpha + self._sc_activation = sc_activation + + super().__init__(**kwargs) + + def build(self, input_shape): + self._dark_conv_args = { + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "bias_regularizer": self._bias_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_momentum": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "activation": self._conv_activation, + "kernel_regularizer": self._kernel_regularizer, + "leaky_alpha": self._leaky_alpha + } + if self._downsample: + self._dconv = ConvBN( + filters=self._filters, + kernel_size=(3, 3), + strides=(2, 2), + padding="same", + **self._dark_conv_args) + else: + self._dconv = Identity() + + self._conv1 = ConvBN( + filters=self._filters // self._filter_scale, + kernel_size=(1, 1), + strides=(1, 1), + padding="same", + **self._dark_conv_args) + + self._conv2 = ConvBN( + filters=self._filters, + kernel_size=(3, 3), + strides=(1, 1), + padding="same", + **self._dark_conv_args) + + self._shortcut = tf.keras.layers.Add() + if self._sc_activation == "leaky": + self._activation_fn = tf.keras.layers.LeakyReLU( + alpha=self._leaky_alpha) + elif self._sc_activation == "mish": + self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x)) + else: + self._activation_fn = tf_utils.get_activation(self._sc_activation) + super().build(input_shape) + + def call(self, inputs): + shortcut = self._dconv(inputs) + x = self._conv1(shortcut) + x = self._conv2(x) + x = self._shortcut([x, shortcut]) + return self._activation_fn(x) + + def get_config(self): + # used to store/share parameters to reconstruct the model + layer_config = { + "filters": self._filters, + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "kernel_regularizer": self._kernel_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_moment": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "activation": self._conv_activation, + "leaky_alpha": self._leaky_alpha, + "sc_activation": self._sc_activation, + "downsample": self._downsample + } + layer_config.update(super().get_config()) + return layer_config + + +@tf.keras.utils.register_keras_serializable(package="yolo") +class CSPTiny(tf.keras.layers.Layer): + """A Small size convolution block proposed in the CSPNet. + + The layer uses shortcuts, routing(concatnation), and feature grouping + in order to improve gradient variablity and allow for high efficency, low + power residual learning for small networtf.keras. + + Cross Stage Partial networks (CSPNets) were proposed in: + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang + Chen, Jun-Wei Hsieh + CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + arXiv:1911.11929 + """ + + def __init__(self, + filters=1, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + use_bn=True, + use_sync_bn=False, + group_id=1, + groups=2, + norm_momentum=0.99, + norm_epsilon=0.001, + activation="leaky", + downsample=True, + leaky_alpha=0.1, + **kwargs): + """Initializes CSPTiny. + + Args: + filters: integer for output depth, or the number of features to learn + kernel_initializer: string to indicate which function to use to initialize + weights + bias_initializer: string to indicate which function to use to initialize + bias + kernel_regularizer: string to indicate which function to use to + regularizer weights + bias_regularizer: string to indicate which function to use to regularizer + bias + use_bn: boolean for whether to use batch normalization + use_sync_bn: boolean for whether sync batch normalization statistics of + all batch norm layers to the models global statistics (across all input + batches) + group_id: integer for which group of features to pass through the csp tiny + stack. + groups: integer for how many splits there should be in the convolution + feature stack output + norm_momentum: float for moment to use for batch normalization + norm_epsilon: float for batch normalization epsilon + activation: string or None for activation function to use in layer, + if None activation is replaced by linear + downsample: boolean for if image input is larger than layer output, set + downsample to True so the dimensions are forced to match + leaky_alpha: float to use as alpha if activation function is leaky + **kwargs: Keyword Arguments + """ + + # ConvBN params + self._filters = filters + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._bias_regularizer = bias_regularizer + self._use_bn = use_bn + self._use_sync_bn = use_sync_bn + self._kernel_regularizer = kernel_regularizer + self._groups = groups + self._group_id = group_id + self._downsample = downsample + + # normal params + self._norm_moment = norm_momentum + self._norm_epsilon = norm_epsilon + + # activation params + self._conv_activation = activation + self._leaky_alpha = leaky_alpha + + super().__init__(**kwargs) + + def build(self, input_shape): + self._dark_conv_args = { + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "bias_regularizer": self._bias_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_momentum": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "activation": self._conv_activation, + "kernel_regularizer": self._kernel_regularizer, + "leaky_alpha": self._leaky_alpha + } + self._convlayer1 = ConvBN( + filters=self._filters, + kernel_size=(3, 3), + strides=(1, 1), + padding="same", + **self._dark_conv_args) + + self._convlayer2 = ConvBN( + filters=self._filters // 2, + kernel_size=(3, 3), + strides=(1, 1), + padding="same", + kernel_initializer=self._kernel_initializer, + bias_initializer=self._bias_initializer, + bias_regularizer=self._bias_regularizer, + kernel_regularizer=self._kernel_regularizer, + use_bn=self._use_bn, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_moment, + norm_epsilon=self._norm_epsilon, + activation=self._conv_activation, + leaky_alpha=self._leaky_alpha) + + self._convlayer3 = ConvBN( + filters=self._filters // 2, + kernel_size=(3, 3), + strides=(1, 1), + padding="same", + **self._dark_conv_args) + + self._convlayer4 = ConvBN( + filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + padding="same", + **self._dark_conv_args) + + self._maxpool = tf.keras.layers.MaxPool2D( + pool_size=2, strides=2, padding="same", data_format=None) + + super().build(input_shape) + + def call(self, inputs): + x1 = self._convlayer1(inputs) + x1_group = tf.split(x1, self._groups, axis=-1)[self._group_id] + x2 = self._convlayer2(x1_group) # grouping + x3 = self._convlayer3(x2) + x4 = tf.concat([x3, x2], axis=-1) # csp partial using grouping + x5 = self._convlayer4(x4) + x = tf.concat([x1, x5], axis=-1) # csp connect + if self._downsample: + x = self._maxpool(x) + return x, x5 + + def get_config(self): + # used to store/share parameters to reconsturct the model + layer_config = { + "filters": self._filters, + "strides": self._strides, + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "kernel_regularizer": self._kernel_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_moment": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "activation": self._conv_activation, + "leaky_alpha": self._leaky_alpha, + "sc_activation": self._sc_activation, + } + layer_config.update(super().get_config()) + return layer_config + + +@tf.keras.utils.register_keras_serializable(package="yolo") +class CSPRoute(tf.keras.layers.Layer): + """Down sampling layer to take the place of down sampleing. + + It is applied in Residual networks. This is the first of 2 layers needed to + convert any Residual Network model to a CSPNet. At the start of a new level + change, this CSPRoute layer creates a learned identity that will act as a + cross stage connection, that is used to inform the inputs to the next stage. + It is called cross stage partial because the number of filters required in + every intermitent Residual layer is reduced by half. The sister layer will + take the partial generated by this layer and concatnate it with the output of + the final residual layer in the stack to create a fully feature level output. + This concatnation merges the partial blocks of 2 levels as input to the next + allowing the gradients of each level to be more unique, and reducing the + number of parameters required by each level by 50% while keeping accuracy + consistent. + + Cross Stage Partial networks (CSPNets) were proposed in: + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang + Chen, Jun-Wei Hsieh. + CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + arXiv:1911.11929 + """ + + def __init__(self, + filters, + filter_scale=2, + activation="mish", + downsample=True, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + use_bn=True, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + **kwargs): + """Initializes CSPRoute. + + Args: + filters: integer for output depth, or the number of features to learn + filter_scale: integer dicating (filters//2) or the number of filters in + the partial feature stack. + activation: string for activation function to use in layer + downsample: down_sample the input. + kernel_initializer: string to indicate which function to use to initialize + weights. + bias_initializer: string to indicate which function to use to initialize + bias. + kernel_regularizer: string to indicate which function to use to + regularizer weights. + bias_regularizer: string to indicate which function to use to regularizer + bias. + use_bn: boolean for whether to use batch normalization. + use_sync_bn: boolean for whether sync batch normalization. + norm_momentum: float for moment to use for batch normalization + norm_epsilon: float for batch normalization epsilon + **kwargs: Keyword Arguments + """ + + super().__init__(**kwargs) + # Layer params. + self._filters = filters + self._filter_scale = filter_scale + self._activation = activation + + # Convoultion params. + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._use_bn = use_bn + self._use_sync_bn = use_sync_bn + self._norm_moment = norm_momentum + self._norm_epsilon = norm_epsilon + self._downsample = downsample + + def build(self, input_shape): + self._dark_conv_args = { + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "bias_regularizer": self._bias_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_momentum": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "activation": self._activation, + "kernel_regularizer": self._kernel_regularizer, + } + if self._downsample: + self._conv1 = ConvBN(filters=self._filters, + kernel_size=(3, 3), + strides=(2, 2), + **self._dark_conv_args) + else: + self._conv1 = ConvBN(filters=self._filters, + kernel_size=(3, 3), + strides=(1, 1), + **self._dark_conv_args) + self._conv2 = ConvBN(filters=self._filters // self._filter_scale, + kernel_size=(1, 1), + strides=(1, 1), + **self._dark_conv_args) + + self._conv3 = ConvBN(filters=self._filters // self._filter_scale, + kernel_size=(1, 1), + strides=(1, 1), + **self._dark_conv_args) + + def call(self, inputs): + x = self._conv1(inputs) + y = self._conv2(x) + x = self._conv3(x) + return (x, y) + + +@tf.keras.utils.register_keras_serializable(package="yolo") +class CSPConnect(tf.keras.layers.Layer): + """Sister Layer to the CSPRoute layer. + + Merges the partial feature stacks generated by the CSPDownsampling layer, + and the finaly output of the residual stack. Suggested in the CSPNet paper. + + Cross Stage Partial networks (CSPNets) were proposed in: + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang + Chen, Jun-Wei Hsieh. + CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + arXiv:1911.11929 + """ + + def __init__(self, + filters, + filter_scale=2, + activation="mish", + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + use_bn=True, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + **kwargs): + """Initializes CSPConnect. + + Args: + filters: integer for output depth, or the number of features to learn. + filter_scale: integer dicating (filters//2) or the number of filters in + the partial feature stack. + activation: string for activation function to use in layer. + kernel_initializer: string to indicate which function to use to initialize + weights. + bias_initializer: string to indicate which function to use to initialize + bias. + kernel_regularizer: string to indicate which function to use to + regularizer weights. + bias_regularizer: string to indicate which function to use to regularizer + bias. + use_bn: boolean for whether to use batch normalization. + use_sync_bn: boolean for whether sync batch normalization. + norm_momentum: float for moment to use for batch normalization + norm_epsilon: float for batch normalization epsilon + **kwargs: Keyword Arguments + """ + super().__init__(**kwargs) + # layer params. + self._filters = filters + self._filter_scale = filter_scale + self._activation = activation + + # Convoultion params. + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._use_bn = use_bn + self._use_sync_bn = use_sync_bn + self._norm_moment = norm_momentum + self._norm_epsilon = norm_epsilon + + def build(self, input_shape): + self._dark_conv_args = { + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "bias_regularizer": self._bias_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_momentum": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "activation": self._activation, + "kernel_regularizer": self._kernel_regularizer, + } + self._conv1 = ConvBN(filters=self._filters // self._filter_scale, + kernel_size=(1, 1), + strides=(1, 1), + **self._dark_conv_args) + self._concat = tf.keras.layers.Concatenate(axis=-1) + self._conv2 = ConvBN(filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + **self._dark_conv_args) + + def call(self, inputs): + x_prev, x_csp = inputs + x = self._conv1(x_prev) + x = self._concat([x, x_csp]) + x = self._conv2(x) + return x + + +class CSPStack(tf.keras.layers.Layer): + """CSP full stack. + + Combines the route and the connect in case you dont want to just quickly wrap + an existing callable or list of layers to make it a cross stage partial. + Added for ease of use. you should be able to wrap any layer stack with a CSP + independent of wether it belongs to the Darknet family. if filter_scale = 2, + then the blocks in the stack passed into the the CSP stack should also have + filters = filters/filter_scale. + + Cross Stage Partial networks (CSPNets) were proposed in: + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang + Chen, Jun-Wei Hsieh + CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + arXiv:1911.11929 + """ + + def __init__(self, + filters, + model_to_wrap=None, + filter_scale=2, + activation="mish", + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + downsample=True, + use_bn=True, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + **kwargs): + """Initializes CSPStack. + + Args: + filters: integer for output depth, or the number of features to learn. + model_to_wrap: callable Model or a list of callable objects that will + process the output of CSPRoute, and be input into CSPConnect. List will + be called sequentially. + filter_scale: integer dicating (filters//2) or the number of filters in + the partial feature stack. + activation: string for activation function to use in layer. + kernel_initializer: string to indicate which function to use to initialize + weights. + bias_initializer: string to indicate which function to use to initialize + bias. + kernel_regularizer: string to indicate which function to use to + regularizer weights. + bias_regularizer: string to indicate which function to use to regularizer + bias. + downsample: down_sample the input. + use_bn: boolean for whether to use batch normalization + use_sync_bn: boolean for whether sync batch normalization. + norm_momentum: float for moment to use for batch normalization + norm_epsilon: float for batch normalization epsilon + **kwargs: Keyword Arguments + """ + super().__init__(**kwargs) + # Layer params. + self._filters = filters + self._filter_scale = filter_scale + self._activation = activation + self._downsample = downsample + + # Convoultion params. + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._use_bn = use_bn + self._use_sync_bn = use_sync_bn + self._norm_moment = norm_momentum + self._norm_epsilon = norm_epsilon + + if model_to_wrap is not None: + if isinstance(model_to_wrap, Callable): + self._model_to_wrap = [model_to_wrap] + elif isinstance(model_to_wrap, List): + self._model_to_wrap = model_to_wrap + else: + raise ValueError("The input to the CSPStack must be a list of layers" + "that we can iterate through, or \n a callable") + else: + self._model_to_wrap = [] + + def build(self, input_shape): + self._dark_conv_args = { + "filters": self._filters, + "filter_scale": self._filter_scale, + "activation": self._activation, + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "bias_regularizer": self._bias_regularizer, + "use_bn": self._use_bn, + "use_sync_bn": self._use_sync_bn, + "norm_momentum": self._norm_moment, + "norm_epsilon": self._norm_epsilon, + "kernel_regularizer": self._kernel_regularizer, + } + self._route = CSPRoute(downsample=self._downsample, **self._dark_conv_args) + self._connect = CSPConnect(**self._dark_conv_args) + return + + def call(self, inputs): + x, x_route = self._route(inputs) + for layer in self._model_to_wrap: + x = layer(x) + x = self._connect([x, x_route]) + return x diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5df28a4f3fb1377b2a74c825e779d8b25b7d481b --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py @@ -0,0 +1,285 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.yolo.modeling.layers import nn_blocks + + +class CSPConnectTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(("same", 224, 224, 64, 1), + ("downsample", 224, 224, 64, 2)) + def test_pass_through(self, width, height, filters, mod): + x = tf.keras.Input(shape=(width, height, filters)) + test_layer = nn_blocks.CSPRoute(filters=filters, filter_scale=mod) + test_layer2 = nn_blocks.CSPConnect(filters=filters, filter_scale=mod) + outx, px = test_layer(x) + outx = test_layer2([outx, px]) + print(outx) + print(outx.shape.as_list()) + self.assertAllEqual( + outx.shape.as_list(), + [None, np.ceil(width // 2), + np.ceil(height // 2), (filters)]) + + @parameterized.named_parameters(("same", 224, 224, 64, 1), + ("downsample", 224, 224, 128, 2)) + def test_gradient_pass_though(self, filters, width, height, mod): + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + test_layer = nn_blocks.CSPRoute(filters, filter_scale=mod) + path_layer = nn_blocks.CSPConnect(filters, filter_scale=mod) + + init = tf.random_normal_initializer() + x = tf.Variable( + initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) + y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)), + int(np.ceil(height // 2)), + filters), + dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat, x_prev = test_layer(x) + x_hat = path_layer([x_hat, x_prev]) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, test_layer.trainable_variables) + optimizer.apply_gradients(zip(grad, test_layer.trainable_variables)) + + self.assertNotIn(None, grad) + + +class CSPRouteTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(("same", 224, 224, 64, 1), + ("downsample", 224, 224, 64, 2)) + def test_pass_through(self, width, height, filters, mod): + x = tf.keras.Input(shape=(width, height, filters)) + test_layer = nn_blocks.CSPRoute(filters=filters, filter_scale=mod) + outx, _ = test_layer(x) + print(outx) + print(outx.shape.as_list()) + self.assertAllEqual( + outx.shape.as_list(), + [None, np.ceil(width // 2), + np.ceil(height // 2), (filters / mod)]) + + @parameterized.named_parameters(("same", 224, 224, 64, 1), + ("downsample", 224, 224, 128, 2)) + def test_gradient_pass_though(self, filters, width, height, mod): + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + test_layer = nn_blocks.CSPRoute(filters, filter_scale=mod) + path_layer = nn_blocks.CSPConnect(filters, filter_scale=mod) + + init = tf.random_normal_initializer() + x = tf.Variable( + initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) + y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)), + int(np.ceil(height // 2)), + filters), + dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat, x_prev = test_layer(x) + x_hat = path_layer([x_hat, x_prev]) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, test_layer.trainable_variables) + optimizer.apply_gradients(zip(grad, test_layer.trainable_variables)) + + self.assertNotIn(None, grad) + + +class CSPStackTest(tf.test.TestCase, parameterized.TestCase): + + def build_layer( + self, layer_type, filters, filter_scale, count, stack_type, downsample): + if stack_type is not None: + layers = [] + if layer_type == "residual": + for _ in range(count): + layers.append( + nn_blocks.DarkResidual( + filters=filters // filter_scale, filter_scale=filter_scale)) + else: + for _ in range(count): + layers.append(nn_blocks.ConvBN(filters=filters)) + + if stack_type == "model": + layers = tf.keras.Sequential(layers=layers) + else: + layers = None + + stack = nn_blocks.CSPStack( + filters=filters, + filter_scale=filter_scale, + downsample=downsample, + model_to_wrap=layers) + return stack + + @parameterized.named_parameters( + ("no_stack", 224, 224, 64, 2, "residual", None, 0, True), + ("residual_stack", 224, 224, 64, 2, "residual", "list", 2, True), + ("conv_stack", 224, 224, 64, 2, "conv", "list", 3, False), + ("callable_no_scale", 224, 224, 64, 1, "residual", "model", 5, False)) + def test_pass_through(self, width, height, filters, mod, layer_type, + stack_type, count, downsample): + x = tf.keras.Input(shape=(width, height, filters)) + test_layer = self.build_layer(layer_type, filters, mod, count, stack_type, + downsample) + outx = test_layer(x) + print(outx) + print(outx.shape.as_list()) + if downsample: + self.assertAllEqual(outx.shape.as_list(), + [None, width // 2, height // 2, filters]) + else: + self.assertAllEqual(outx.shape.as_list(), [None, width, height, filters]) + + @parameterized.named_parameters( + ("no_stack", 224, 224, 64, 2, "residual", None, 0, True), + ("residual_stack", 224, 224, 64, 2, "residual", "list", 2, True), + ("conv_stack", 224, 224, 64, 2, "conv", "list", 3, False), + ("callable_no_scale", 224, 224, 64, 1, "residual", "model", 5, False)) + def test_gradient_pass_though(self, width, height, filters, mod, layer_type, + stack_type, count, downsample): + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + + init = tf.random_normal_initializer() + x = tf.Variable( + initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) + + if not downsample: + y = tf.Variable( + initial_value=init( + shape=(1, width, height, filters), dtype=tf.float32)) + else: + y = tf.Variable( + initial_value=init( + shape=(1, width // 2, height // 2, filters), dtype=tf.float32)) + test_layer = self.build_layer(layer_type, filters, mod, count, stack_type, + downsample) + + with tf.GradientTape() as tape: + x_hat = test_layer(x) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, test_layer.trainable_variables) + optimizer.apply_gradients(zip(grad, test_layer.trainable_variables)) + + self.assertNotIn(None, grad) + + +class ConvBNTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ("valid", (3, 3), "valid", (1, 1)), ("same", (3, 3), "same", (1, 1)), + ("downsample", (3, 3), "same", (2, 2)), ("test", (1, 1), "valid", (1, 1))) + def test_pass_through(self, kernel_size, padding, strides): + if padding == "same": + pad_const = 1 + else: + pad_const = 0 + x = tf.keras.Input(shape=(224, 224, 3)) + test_layer = nn_blocks.ConvBN( + filters=64, + kernel_size=kernel_size, + padding=padding, + strides=strides, + trainable=False) + outx = test_layer(x) + print(outx.shape.as_list()) + test = [ + None, + int((224 - kernel_size[0] + (2 * pad_const)) / strides[0] + 1), + int((224 - kernel_size[1] + (2 * pad_const)) / strides[1] + 1), 64 + ] + print(test) + self.assertAllEqual(outx.shape.as_list(), test) + + @parameterized.named_parameters(("filters", 3)) + def test_gradient_pass_though(self, filters): + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + with tf.device("/CPU:0"): + test_layer = nn_blocks.ConvBN(filters, kernel_size=(3, 3), padding="same") + + init = tf.random_normal_initializer() + x = tf.Variable(initial_value=init(shape=(1, 224, 224, + 3), dtype=tf.float32)) + y = tf.Variable( + initial_value=init(shape=(1, 224, 224, filters), dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat = test_layer(x) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, test_layer.trainable_variables) + optimizer.apply_gradients(zip(grad, test_layer.trainable_variables)) + self.assertNotIn(None, grad) + + +class DarkResidualTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(("same", 224, 224, 64, False), + ("downsample", 223, 223, 32, True), + ("oddball", 223, 223, 32, False)) + def test_pass_through(self, width, height, filters, downsample): + mod = 1 + if downsample: + mod = 2 + x = tf.keras.Input(shape=(width, height, filters)) + test_layer = nn_blocks.DarkResidual(filters=filters, downsample=downsample) + outx = test_layer(x) + print(outx) + print(outx.shape.as_list()) + self.assertAllEqual( + outx.shape.as_list(), + [None, np.ceil(width / mod), + np.ceil(height / mod), filters]) + + @parameterized.named_parameters(("same", 64, 224, 224, False), + ("downsample", 32, 223, 223, True), + ("oddball", 32, 223, 223, False)) + def test_gradient_pass_though(self, filters, width, height, downsample): + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + test_layer = nn_blocks.DarkResidual(filters, downsample=downsample) + + if downsample: + mod = 2 + else: + mod = 1 + + init = tf.random_normal_initializer() + x = tf.Variable( + initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) + y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width / mod)), + int(np.ceil(height / mod)), + filters), + dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat = test_layer(x) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, test_layer.trainable_variables) + optimizer.apply_gradients(zip(grad, test_layer.trainable_variables)) + + self.assertNotIn(None, grad) + +if __name__ == "__main__": + tf.test.main() diff --git a/official/vision/beta/projects/yolo/ops/__init__.py b/official/vision/beta/projects/yolo/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a25710c222e3327cb20e000db5df5c5651c4a2cc --- /dev/null +++ b/official/vision/beta/projects/yolo/ops/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/official/vision/beta/projects/yolo/ops/box_ops.py b/official/vision/beta/projects/yolo/ops/box_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0d49177d656906f063f1f0963a1f485be689cdff --- /dev/null +++ b/official/vision/beta/projects/yolo/ops/box_ops.py @@ -0,0 +1,297 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Bounding box utils.""" + +import math + +import tensorflow as tf + + +def yxyx_to_xcycwh(box: tf.Tensor): + """Converts boxes from ymin, xmin, ymax, xmax. + + to x_center, y_center, width, height. + + Args: + box: `Tensor` whose shape is [..., 4] and represents the coordinates + of boxes in ymin, xmin, ymax, xmax. + + Returns: + `Tensor` whose shape is [..., 4] and contains the new format. + + Raises: + ValueError: If the last dimension of box is not 4 or if box's dtype isn't + a floating point type. + """ + with tf.name_scope('yxyx_to_xcycwh'): + ymin, xmin, ymax, xmax = tf.split(box, 4, axis=-1) + x_center = (xmax + xmin) / 2 + y_center = (ymax + ymin) / 2 + width = xmax - xmin + height = ymax - ymin + box = tf.concat([x_center, y_center, width, height], axis=-1) + return box + + +def xcycwh_to_yxyx(box: tf.Tensor, split_min_max: bool = False): + """Converts boxes from x_center, y_center, width, height. + + to ymin, xmin, ymax, xmax. + + Args: + box: a `Tensor` whose shape is [..., 4] and represents the coordinates + of boxes in x_center, y_center, width, height. + split_min_max: bool, whether or not to split x, y min and max values. + + Returns: + box: a `Tensor` whose shape is [..., 4] and contains the new format. + + Raises: + ValueError: If the last dimension of box is not 4 or if box's dtype isn't + a floating point type. + """ + with tf.name_scope('xcycwh_to_yxyx'): + xy, wh = tf.split(box, 2, axis=-1) + xy_min = xy - wh / 2 + xy_max = xy + wh / 2 + x_min, y_min = tf.split(xy_min, 2, axis=-1) + x_max, y_max = tf.split(xy_max, 2, axis=-1) + box = tf.concat([y_min, x_min, y_max, x_max], axis=-1) + if split_min_max: + box = tf.split(box, 2, axis=-1) + return box + + +def xcycwh_to_xyxy(box: tf.Tensor, split_min_max: bool = False): + """Converts boxes from x_center, y_center, width, height to. + + xmin, ymin, xmax, ymax. + + Args: + box: box: a `Tensor` whose shape is [..., 4] and represents the + coordinates of boxes in x_center, y_center, width, height. + split_min_max: bool, whether or not to split x, y min and max values. + + Returns: + box: a `Tensor` whose shape is [..., 4] and contains the new format. + + Raises: + ValueError: If the last dimension of box is not 4 or if box's dtype isn't + a floating point type. + """ + with tf.name_scope('xcycwh_to_yxyx'): + xy, wh = tf.split(box, 2, axis=-1) + xy_min = xy - wh / 2 + xy_max = xy + wh / 2 + box = (xy_min, xy_max) + if not split_min_max: + box = tf.concat(box, axis=-1) + return box + + +def center_distance(center_1: tf.Tensor, center_2: tf.Tensor): + """Calculates the squared distance between two points. + + This function is mathematically equivalent to the following code, but has + smaller rounding errors. + + tf.norm(center_1 - center_2, axis=-1)**2 + + Args: + center_1: a `Tensor` whose shape is [..., 2] and represents a point. + center_2: a `Tensor` whose shape is [..., 2] and represents a point. + + Returns: + dist: a `Tensor` whose shape is [...] and value represents the squared + distance between center_1 and center_2. + + Raises: + ValueError: If the last dimension of either center_1 or center_2 is not 2. + """ + with tf.name_scope('center_distance'): + dist = (center_1[..., 0] - center_2[..., 0])**2 + (center_1[..., 1] - + center_2[..., 1])**2 + return dist + + +def compute_iou(box1, box2, yxyx=False): + """Calculates the intersection of union between box1 and box2. + + Args: + box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of + boxes in x_center, y_center, width, height. + box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of + boxes in x_center, y_center, width, height. + yxyx: `bool`, whether or not box1, and box2 are in yxyx format. + + Returns: + iou: a `Tensor` whose shape is [...] and value represents the intersection + over union. + + Raises: + ValueError: If the last dimension of either box1 or box2 is not 4. + """ + # Get box corners + with tf.name_scope('iou'): + if not yxyx: + box1 = xcycwh_to_yxyx(box1) + box2 = xcycwh_to_yxyx(box2) + + b1mi, b1ma = tf.split(box1, 2, axis=-1) + b2mi, b2ma = tf.split(box2, 2, axis=-1) + intersect_mins = tf.math.maximum(b1mi, b2mi) + intersect_maxes = tf.math.minimum(b1ma, b2ma) + intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, + tf.zeros_like(intersect_mins)) + intersection = tf.reduce_prod( + intersect_wh, axis=-1) # intersect_wh[..., 0] * intersect_wh[..., 1] + + box1_area = tf.math.abs(tf.reduce_prod(b1ma - b1mi, axis=-1)) + box2_area = tf.math.abs(tf.reduce_prod(b2ma - b2mi, axis=-1)) + union = box1_area + box2_area - intersection + + iou = intersection / (union + 1e-7) + iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0) + return iou + + +def compute_giou(box1, box2): + """Calculates the generalized intersection of union between box1 and box2. + + Args: + box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of + boxes in x_center, y_center, width, height. + box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of + boxes in x_center, y_center, width, height. + + Returns: + iou: a `Tensor` whose shape is [...] and value represents the generalized + intersection over union. + + Raises: + ValueError: If the last dimension of either box1 or box2 is not 4. + """ + with tf.name_scope('giou'): + # get box corners + box1 = xcycwh_to_yxyx(box1) + box2 = xcycwh_to_yxyx(box2) + + # compute IOU + intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2]) + intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4]) + intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, + tf.zeros_like(intersect_mins)) + intersection = intersect_wh[..., 0] * intersect_wh[..., 1] + + box1_area = tf.math.abs( + tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1)) + box2_area = tf.math.abs( + tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1)) + union = box1_area + box2_area - intersection + + iou = tf.math.divide_no_nan(intersection, union) + iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0) + + # find the smallest box to encompase both box1 and box2 + c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2]) + c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4]) + c = tf.math.abs(tf.reduce_prod(c_mins - c_maxes, axis=-1)) + + # compute giou + giou = iou - tf.math.divide_no_nan((c - union), c) + return iou, giou + + +def compute_diou(box1, box2): + """Calculates the distance intersection of union between box1 and box2. + + Args: + box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of + boxes in x_center, y_center, width, height. + box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of + boxes in x_center, y_center, width, height. + + Returns: + iou: a `Tensor` whose shape is [...] and value represents the distance + intersection over union. + + Raises: + ValueError: If the last dimension of either box1 or box2 is not 4. + """ + with tf.name_scope('diou'): + # compute center distance + dist = center_distance(box1[..., 0:2], box2[..., 0:2]) + + # get box corners + box1 = xcycwh_to_yxyx(box1) + box2 = xcycwh_to_yxyx(box2) + + # compute IOU + intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2]) + intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4]) + intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, + tf.zeros_like(intersect_mins)) + intersection = intersect_wh[..., 0] * intersect_wh[..., 1] + + box1_area = tf.math.abs( + tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1)) + box2_area = tf.math.abs( + tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1)) + union = box1_area + box2_area - intersection + + iou = tf.math.divide_no_nan(intersection, union) + iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0) + + # compute max diagnal of the smallest enclosing box + c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2]) + c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4]) + + diag_dist = tf.reduce_sum((c_maxes - c_mins)**2, axis=-1) + + regularization = tf.math.divide_no_nan(dist, diag_dist) + diou = iou + regularization + return iou, diou + + +def compute_ciou(box1, box2): + """Calculates the complete intersection of union between box1 and box2. + + Args: + box1: a `Tensor` whose shape is [..., 4] and represents the coordinates + of boxes in x_center, y_center, width, height. + box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of + boxes in x_center, y_center, width, height. + + Returns: + iou: a `Tensor` whose shape is [...] and value represents the complete + intersection over union. + + Raises: + ValueError: If the last dimension of either box1 or box2 is not 4. + """ + with tf.name_scope('ciou'): + # compute DIOU and IOU + iou, diou = compute_diou(box1, box2) + + # computer aspect ratio consistency + arcterm = ( + tf.math.atan(tf.math.divide_no_nan(box1[..., 2], box1[..., 3])) - + tf.math.atan(tf.math.divide_no_nan(box2[..., 2], box2[..., 3])))**2 + v = 4 * arcterm / (math.pi)**2 + + # compute IOU regularization + a = tf.math.divide_no_nan(v, ((1 - iou) + v)) + ciou = diou + v * a + return iou, ciou diff --git a/official/vision/beta/projects/yolo/ops/box_ops_test.py b/official/vision/beta/projects/yolo/ops/box_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..b10e53235b731bbe839551d473831d9f28fe94b0 --- /dev/null +++ b/official/vision/beta/projects/yolo/ops/box_ops_test.py @@ -0,0 +1,56 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.yolo.ops import box_ops + + +class InputUtilsTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters((1), (4)) + def test_box_conversions(self, num_boxes): + boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4)) + expected_shape = np.array([num_boxes, 4]) + xywh_box = box_ops.yxyx_to_xcycwh(boxes) + yxyx_box = box_ops.xcycwh_to_yxyx(boxes) + xyxy_box = box_ops.xcycwh_to_xyxy(boxes) + self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape) + self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape) + self.assertAllEqual(tf.shape(xyxy_box).numpy(), expected_shape) + + @parameterized.parameters((1), (5), (7)) + def test_ious(self, num_boxes): + boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4)) + expected_shape = np.array([ + num_boxes, + ]) + expected_iou = np.ones([ + num_boxes, + ]) + iou = box_ops.compute_iou(boxes, boxes) + _, giou = box_ops.compute_giou(boxes, boxes) + _, ciou = box_ops.compute_ciou(boxes, boxes) + _, diou = box_ops.compute_diou(boxes, boxes) + self.assertAllEqual(tf.shape(iou).numpy(), expected_shape) + self.assertArrayNear(iou, expected_iou, 0.001) + self.assertArrayNear(giou, expected_iou, 0.001) + self.assertArrayNear(ciou, expected_iou, 0.001) + self.assertArrayNear(diou, expected_iou, 0.001) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/yolo/ops/preprocess_ops.py b/official/vision/beta/projects/yolo/ops/preprocess_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..562b4fe0a90099f6a8fd240f92fb56b33ba8523f --- /dev/null +++ b/official/vision/beta/projects/yolo/ops/preprocess_ops.py @@ -0,0 +1,524 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Yolo preprocess ops.""" + +import tensorflow as tf +import tensorflow_addons as tfa + +from official.vision.beta.projects.yolo.ops import box_ops + + +def resize_crop_filter(image, boxes, default_width, default_height, + target_width, target_height): + """Apply zooming to the image and boxes. + + Args: + image: a `Tensor` representing the image. + boxes: a `Tensor` represeting the boxes. + default_width: a `Tensor` representing the width of the image. + default_height: a `Tensor` representing the height of the image. + target_width: a `Tensor` representing the desired width of the image. + target_height: a `Tensor` representing the desired height of the image. + Returns: + images: a `Tensor` representing the augmented image. + boxes: a `Tensor` representing the augmented boxes. + """ + with tf.name_scope('resize_crop_filter'): + image = tf.image.resize(image, (target_width, target_height)) + image = tf.image.resize_with_crop_or_pad(image, + target_height=default_height, + target_width=default_width) + + default_width = tf.cast(default_width, boxes.dtype) + default_height = tf.cast(default_height, boxes.dtype) + target_width = tf.cast(target_width, boxes.dtype) + target_height = tf.cast(target_height, boxes.dtype) + + aspect_change_width = target_width / default_width + aspect_change_height = target_height / default_height + + x, y, width, height = tf.split(boxes, 4, axis=-1) + x = (x - 0.5) * target_width / default_width + 0.5 + y = (y - 0.5) * target_height / default_height + 0.5 + width = width * aspect_change_width + height = height * aspect_change_height + boxes = tf.concat([x, y, width, height], axis=-1) + return image, boxes + + +def random_translate(image, box, t, seed=None): + """Randomly translate the image and boxes. + + Args: + image: a `Tensor` representing the image. + box: a `Tensor` represeting the boxes. + t: an `int` representing the translation factor + seed: an optional seed for tf.random operations + Returns: + image: a `Tensor` representing the augmented image. + box: a `Tensor` representing the augmented boxes. + """ + t_x = tf.random.uniform(minval=-t, + maxval=t, + shape=(), + dtype=tf.float32, + seed=seed) + t_y = tf.random.uniform(minval=-t, + maxval=t, + shape=(), + dtype=tf.float32, + seed=seed) + box = translate_boxes(box, t_x, t_y) + image = translate_image(image, t_x, t_y) + return image, box + + +def translate_boxes(box, translate_x, translate_y): + """Randomly translate the boxes. + + Args: + box: a `Tensor` represeitng the boxes. + translate_x: a `Tensor` represting the translation on the x-axis. + translate_y: a `Tensor` represting the translation on the y-axis. + Returns: + box: a `Tensor` representing the augmented boxes. + """ + with tf.name_scope('translate_boxs'): + x = box[..., 0] + translate_x + y = box[..., 1] + translate_y + box = tf.stack([x, y, box[..., 2], box[..., 3]], axis=-1) + box.set_shape([None, 4]) + return box + + +def translate_image(image, translate_x, translate_y): + """Randomly translate the image. + + Args: + image: a `Tensor` representing the image. + translate_x: a `Tensor` represting the translation on the x-axis. + translate_y: a `Tensor` represting the translation on the y-axis. + Returns: + box: a `Tensor` representing the augmented boxes. + """ + with tf.name_scope('translate_image'): + if (translate_x != 0 and translate_y != 0): + image_jitter = tf.convert_to_tensor([translate_x, translate_y]) + image_jitter.set_shape([2]) + image = tfa.image.translate( + image, image_jitter * tf.cast(tf.shape(image)[1], tf.float32)) + return image + + +def pad_max_instances(value, instances, pad_value=0, pad_axis=0): + """Pads tensors to max number of instances.""" + shape = tf.shape(value) + dim1 = shape[pad_axis] + take = tf.math.reduce_min([instances, dim1]) + value, _ = tf.split(value, [take, -1], + axis=pad_axis) # value[:instances, ...] + pad = tf.convert_to_tensor([tf.math.reduce_max([instances - dim1, 0])]) + nshape = tf.concat([shape[:pad_axis], pad, shape[(pad_axis + 1):]], axis=0) + pad_tensor = tf.fill(nshape, tf.cast(pad_value, dtype=value.dtype)) + value = tf.concat([value, pad_tensor], axis=pad_axis) + return value + + +def fit_preserve_aspect_ratio(image, + boxes, + width=None, + height=None, + target_dim=None): + """Resizes the image while peserving the image aspect ratio. + + Args: + image: a `Tensor` representing the image. + boxes: a `Tensor` representing the boxes. + width: int for the image width. + height: int for the image height. + target_dim: list or a Tensor of height and width. + Returns: + image: a `Tensor` representing the image. + box: a `Tensor` representing the boxes. + """ + if width is None or height is None: + shape = tf.shape(image) + if tf.shape(shape)[0] == 4: + width = shape[1] + height = shape[2] + else: + width = shape[0] + height = shape[1] + + clipper = tf.math.maximum(width, height) + if target_dim is None: + target_dim = clipper + + pad_width = clipper - width + pad_height = clipper - height + image = tf.image.pad_to_bounding_box(image, pad_width // 2, pad_height // 2, + clipper, clipper) + + boxes = box_ops.yxyx_to_xcycwh(boxes) + x, y, w, h = tf.split(boxes, 4, axis=-1) + + y *= tf.cast(width / clipper, tf.float32) + x *= tf.cast(height / clipper, tf.float32) + + y += tf.cast((pad_width / clipper) / 2, tf.float32) + x += tf.cast((pad_height / clipper) / 2, tf.float32) + + h *= tf.cast(width / clipper, tf.float32) + w *= tf.cast(height / clipper, tf.float32) + + boxes = tf.concat([x, y, w, h], axis=-1) + + boxes = box_ops.xcycwh_to_yxyx(boxes) + image = tf.image.resize(image, (target_dim, target_dim)) + return image, boxes + + +def get_best_anchor(y_true, anchors, width=1, height=1): + """Gets the correct anchor that is assoiciated with each box using IOU. + + Args: + y_true: tf.Tensor[] for the list of bounding boxes in the yolo format + anchors: list or tensor for the anchor boxes to be used in prediction + found via Kmeans + width: int for the image width + height: int for the image height + + Returns: + tf.Tensor: y_true with the anchor associated with each ground truth + box known. + """ + with tf.name_scope('get_anchor'): + width = tf.cast(width, dtype=tf.float32) + height = tf.cast(height, dtype=tf.float32) + + # split the boxes into center and width height + anchor_xy = y_true[..., 0:2] + + # scale thhe boxes + anchors = tf.convert_to_tensor(anchors, dtype=tf.float32) + anchors_x = anchors[..., 0] / width + anchors_y = anchors[..., 1] / height + anchors = tf.stack([anchors_x, anchors_y], axis=-1) + k = tf.shape(anchors)[0] + + # build a matrix of anchor boxes of shape [num_anchors, num_boxes, 4] + anchors = tf.transpose(anchors, perm=[1, 0]) + anchor_xy = tf.tile(tf.expand_dims(anchor_xy, axis=-1), + [1, 1, tf.shape(anchors)[-1]]) + anchors = tf.tile(tf.expand_dims(anchors, axis=0), + [tf.shape(anchor_xy)[0], 1, 1]) + + # stack the xy so, each anchor is asscoaited once with each center from + # the ground truth input + anchors = tf.concat([anchor_xy, anchors], axis=1) + anchors = tf.transpose(anchors, perm=[2, 0, 1]) + + # copy the gt n times so that each anchor from above can be compared to + # input ground truth to shape: [num_anchors, num_boxes, 4] + truth_comp = tf.tile(tf.expand_dims(y_true[..., 0:4], axis=-1), + [1, 1, tf.shape(anchors)[0]]) + truth_comp = tf.transpose(truth_comp, perm=[2, 0, 1]) + + # compute intersection over union of the boxes, and take the argmax of + # comuted iou for each box. thus each box is associated with the + # largest interection over union + iou_raw = box_ops.compute_iou(truth_comp, anchors) + values, indexes = tf.math.top_k(tf.transpose(iou_raw, perm=[1, 0]), + k=tf.cast(k, dtype=tf.int32), + sorted=True) + ind_mask = tf.cast(values > 0.213, dtype=indexes.dtype) + + # pad the indexs such that all values less than the thresh are -1 + # add one, multiply the mask to zeros all the bad locations + # subtract 1 makeing all the bad locations 0. + iou_index = tf.concat([ + tf.keras.backend.expand_dims(indexes[..., 0], axis=-1), + ((indexes[..., 1:] + 1) * ind_mask[..., 1:]) - 1 + ], + axis=-1) + iou_index = iou_index[..., :6] + + return tf.cast(iou_index, dtype=tf.float32) + + +def build_grided_gt(y_true, mask, size, dtype, use_tie_breaker): + """Converts ground truth for use in loss functions. + + Args: + y_true: tf.Tensor[] ground truth + [box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box] + mask: list of the anchor boxes choresponding to the output, + ex. [1, 2, 3] tells this layer to predict only the first 3 + anchors in the total. + size: The dimensions of this output, for regular, it progresses + from 13, to 26, to 52. + dtype: The expected output dtype. + use_tie_breaker: boolean value for wether or not to use the tie_breaker. + + Returns: + tf.Tensor[] of shape [size, size, #of_anchors, 4, 1, num_classes] + """ + # unpack required components from the input ground truth + boxes = tf.cast(y_true['bbox'], dtype) + classes = tf.expand_dims(tf.cast(y_true['classes'], dtype=dtype), axis=-1) + anchors = tf.cast(y_true['best_anchors'], dtype) + + # get the number of boxes in the ground truth boxs + num_boxes = tf.shape(boxes)[0] + # get the number of anchor boxes used for this anchor scale + len_masks = tf.shape(mask)[0] + + # init a fixed memeory size grid for this prediction scale + # [size, size, # of anchors, 1 + 1 + number of anchors per scale] + full = tf.zeros([size, size, len_masks, 6], dtype=dtype) + # init a grid to use to track which locations have already + # been used before (for the tie breaker) + depth_track = tf.zeros((size, size, len_masks), dtype=tf.int32) + + # rescale the x and y centers to the size of the grid [size, size] + x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32) + y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32) + + # init all the tensorArrays to be used in storeing the index + # and the values to be used to update both depth_track and full + update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True) + update = tf.TensorArray(dtype, size=0, dynamic_size=True) + + # init constants and match data types before entering loop + i = 0 + anchor_id = 0 + const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype) + mask = tf.cast(mask, dtype=dtype) + rand_update = 0.0 + + for box_id in range(num_boxes): + # If the width or height of the box is zero, skip it. + # After pre processing, if the box is not in the i image bounds anymore, + # skip it. + if tf.keras.backend.all(tf.math.equal( + boxes[box_id, 2:4], 0)) or tf.keras.backend.any( + tf.math.less(boxes[box_id, 0:2], 0.0)) or tf.keras.backend.any( + tf.math.greater_equal(boxes[box_id, 0:2], 1.0)): + continue + if use_tie_breaker: + for anchor_id in range(tf.shape(anchors)[-1]): + index = tf.math.equal(anchors[box_id, anchor_id], mask) + if tf.keras.backend.any(index): + # using the boolean index mask to determine exactly which + # anchor box was used + p = tf.cast( + tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)), + dtype=tf.int32) + # determine if the index was used or not + used = depth_track[y[box_id], x[box_id], p] + # defualt used upadte value + uid = 1 + + # if anchor_id is 0, this is the best matched anchor for this box + # with the highest IOU + if anchor_id == 0: + # write the box to the update list + # create random numbr to trigger a replacment if the cell + # is used already + if tf.math.equal(used, 1): + rand_update = tf.random.uniform([], maxval=1) + else: + rand_update = 1.0 + + if rand_update > 0.5: + # write the box to the update list + update_index = update_index.write(i, [y[box_id], x[box_id], p]) + value = tf.concat([boxes[box_id], const, classes[box_id]], + axis=-1) + update = update.write(i, value) + + # if used is 2, this cell is filled with a non-optimal box + # if used is 0, the cell in the ground truth is not yet consumed + # in either case you can replace that cell with a new box, as long + # as it is not consumed by an optimal box with anchor_id = 0 + elif tf.math.equal(used, 2) or tf.math.equal(used, 0): + uid = 2 + # write the box to the update list + update_index = update_index.write(i, [y[box_id], x[box_id], p]) + value = tf.concat([boxes[box_id], const, classes[box_id]], axis=-1) + update = update.write(i, value) + + depth_track = tf.tensor_scatter_nd_update( + depth_track, [(y[box_id], x[box_id], p)], [uid]) + i += 1 + else: + index = tf.math.equal(anchors[box_id, 0], mask) + # if any there is an index match + if tf.keras.backend.any(index): + # find the index + p = tf.cast( + tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)), + dtype=tf.int32) + # update the list of used boxes + update_index = update_index.write(i, [y[box_id], x[box_id], p]) + value = tf.concat([boxes[box_id], const, classes[box_id]], axis=-1) + update = update.write(i, value) + i += 1 + + # if the size of the update list is not 0, do an update, other wise, + # no boxes and pass an empty grid + if tf.math.greater(update_index.size(), 0): + update_index = update_index.stack() + update = update.stack() + full = tf.tensor_scatter_nd_update(full, update_index, update) + return full + + +def build_batch_grided_gt(y_true, mask, size, dtype, use_tie_breaker): + """Converts ground truth for use in loss functions. + + Args: + y_true: tf.Tensor[] ground truth + [batch, box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box] + mask: list of the anchor boxes choresponding to the output, + ex. [1, 2, 3] tells this layer to predict only the first 3 anchors + in the total. + size: the dimensions of this output, for regular, it progresses from + 13, to 26, to 52 + dtype: expected output datatype + use_tie_breaker: boolean value for wether or not to use the tie + breaker + + Returns: + tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes] + """ + # unpack required components from the input ground truth + boxes = tf.cast(y_true['bbox'], dtype) + classes = tf.expand_dims(tf.cast(y_true['classes'], dtype=dtype), axis=-1) + anchors = tf.cast(y_true['best_anchors'], dtype) + + # get the batch size + batches = tf.shape(boxes)[0] + # get the number of boxes in the ground truth boxs + num_boxes = tf.shape(boxes)[1] + # get the number of anchor boxes used for this anchor scale + len_masks = tf.shape(mask)[0] + + # init a fixed memeory size grid for this prediction scale + # [batch, size, size, # of anchors, 1 + 1 + number of anchors per scale] + full = tf.zeros([batches, size, size, len_masks, 1 + 4 + 1], dtype=dtype) + # init a grid to use to track which locations have already + # been used before (for the tie breaker) + depth_track = tf.zeros((batches, size, size, len_masks), dtype=tf.int32) + + # rescale the x and y centers to the size of the grid [size, size] + x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32) + y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32) + + # init all the tensorArrays to be used in storeing the index and the values + # to be used to update both depth_track and full + update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True) + update = tf.TensorArray(dtype, size=0, dynamic_size=True) + + # init constants and match data types before entering loop + i = 0 + anchor_id = 0 + const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype) + mask = tf.cast(mask, dtype=dtype) + rand_update = 0.0 + + for batch in range(batches): + for box_id in range(num_boxes): + # if the width or height of the box is zero, skip it + if tf.keras.backend.all(tf.math.equal(boxes[batch, box_id, 2:4], 0)): + continue + # after pre processing, if the box is not in the image bounds anymore + # skip the box + if tf.keras.backend.any(tf.math.less( + boxes[batch, box_id, 0:2], 0.0)) or tf.keras.backend.any( + tf.math.greater_equal(boxes[batch, box_id, 0:2], 1.0)): + continue + if use_tie_breaker: + for anchor_id in range(tf.shape(anchors)[-1]): + index = tf.math.equal(anchors[batch, box_id, anchor_id], mask) + if tf.keras.backend.any(index): + # using the boolean index mask to determine exactly which anchor + # box was used + p = tf.cast(tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)), + dtype=tf.int32) + # determine if the index was used or not + used = depth_track[batch, y[batch, box_id], x[batch, box_id], p] + # defualt used upadte value + uid = 1 + + # if anchor_id is 0, this is the best matched anchor for this box + # with the highest IOU + if anchor_id == 0: + # create random number to trigger a replacment if the cell + # is used already + if tf.math.equal(used, 1): + rand_update = tf.random.uniform([], maxval=1) + else: + rand_update = 1.0 + + if rand_update > 0.5: + # write the box to the update list + update_index = update_index.write( + i, [batch, y[batch, box_id], x[batch, box_id], p]) + value = tf.concat( + [boxes[batch, box_id], const, classes[batch, box_id]], + axis=-1) + update = update.write(i, value) + + # if used is 2, this cell is filled with a non-optimal box + # if used is 0, the cell in the ground truth is not yet consumed + # in either case you can replace that cell with a new box, as long + # as it is not consumed by an optimal box with anchor_id = 0 + elif tf.math.equal(used, 2) or tf.math.equal(used, 0): + uid = 2 + # write the box to the update list + update_index = update_index.write( + i, [batch, y[batch, box_id], x[batch, box_id], p]) + value = ([boxes[batch, box_id], const, classes[batch, box_id]]) + update = update.write(i, value) + + # update the used index for where and how the box was placed + depth_track = tf.tensor_scatter_nd_update( + depth_track, [(batch, y[batch, box_id], x[batch, box_id], p)], + [uid]) + i += 1 + else: + index = tf.math.equal(anchors[batch, box_id, 0], mask) + if tf.keras.backend.any(index): + # if any there is an index match + p = tf.cast( + tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)), + dtype=tf.int32) + # write the box to the update list + update_index = update_index.write( + i, [batch, y[batch, box_id], x[batch, box_id], p]) + value = tf.concat( + [boxes[batch, box_id], const, classes[batch, box_id]], axis=-1) + update = update.write(i, value) + i += 1 + + # if the size of the update list is not 0, do an update, other wise, + # no boxes and pass an empty grid + if tf.math.greater(update_index.size(), 0): + update_index = update_index.stack() + update = update.stack() + full = tf.tensor_scatter_nd_update(full, update_index, update) + return full + diff --git a/official/vision/beta/projects/yolo/ops/preprocess_ops_test.py b/official/vision/beta/projects/yolo/ops/preprocess_ops_test.py new file mode 100644 index 0000000000000000000000000000000000000000..11973d9787454985653b2cbfe04f022a0fb3eb61 --- /dev/null +++ b/official/vision/beta/projects/yolo/ops/preprocess_ops_test.py @@ -0,0 +1,67 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.yolo.ops import preprocess_ops + + +class PreprocessOpsTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters((416, 416, 5, 300, 300), (100, 200, 6, 50, 50)) + def test_resize_crop_filter(self, default_width, default_height, num_boxes, + target_width, target_height): + image = tf.convert_to_tensor( + np.random.rand(default_width, default_height, 3)) + boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4)) + resized_image, resized_boxes = preprocess_ops.resize_crop_filter( + image, boxes, default_width, default_height, target_width, + target_height) + resized_image_shape = tf.shape(resized_image) + resized_boxes_shape = tf.shape(resized_boxes) + self.assertAllEqual([default_height, default_width, 3], + resized_image_shape.numpy()) + self.assertAllEqual([num_boxes, 4], resized_boxes_shape.numpy()) + + @parameterized.parameters((7, 7., 5.), (25, 35., 45.)) + def test_translate_boxes(self, num_boxes, translate_x, translate_y): + boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4)) + translated_boxes = preprocess_ops.translate_boxes( + boxes, translate_x, translate_y) + translated_boxes_shape = tf.shape(translated_boxes) + self.assertAllEqual([num_boxes, 4], translated_boxes_shape.numpy()) + + @parameterized.parameters((100, 200, 75., 25.), (400, 600, 25., 75.)) + def test_translate_image(self, image_height, image_width, translate_x, + translate_y): + image = tf.convert_to_tensor(np.random.rand(image_height, image_width, 4)) + translated_image = preprocess_ops.translate_image( + image, translate_x, translate_y) + translated_image_shape = tf.shape(translated_image) + self.assertAllEqual([image_height, image_width, 4], + translated_image_shape.numpy()) + + @parameterized.parameters(([1, 2], 20, 0), ([13, 2, 4], 15, 0)) + def test_pad_max_instances(self, input_shape, instances, pad_axis): + expected_output_shape = input_shape + expected_output_shape[pad_axis] = instances + output = preprocess_ops.pad_max_instances( + np.ones(input_shape), instances, pad_axis=pad_axis) + self.assertAllEqual(expected_output_shape, tf.shape(output).numpy()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/yolo/tasks/image_classification.py b/official/vision/beta/projects/yolo/tasks/image_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..2dd3ce3a71c0d10dd0ce7bec6e9b356116e75a25 --- /dev/null +++ b/official/vision/beta/projects/yolo/tasks/image_classification.py @@ -0,0 +1,114 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Image classification task definition.""" +import tensorflow as tf + +from official.core import input_reader +from official.core import task_factory +from official.vision.beta.dataloaders import classification_input +from official.vision.beta.projects.yolo.configs import darknet_classification as exp_cfg +from official.vision.beta.projects.yolo.dataloaders import classification_tfds_decoder as cli +from official.vision.beta.tasks import image_classification + + +@task_factory.register_task_cls(exp_cfg.ImageClassificationTask) +class ImageClassificationTask(image_classification.ImageClassificationTask): + """A task for image classification.""" + + def build_inputs(self, params, input_context=None): + """Builds classification input.""" + + num_classes = self.task_config.model.num_classes + input_size = self.task_config.model.input_size + + if params.tfds_name: + decoder = cli.Decoder() + else: + decoder = classification_input.Decoder() + + parser = classification_input.Parser( + output_size=input_size[:2], + num_classes=num_classes, + dtype=params.dtype) + + reader = input_reader.InputReader( + params, + dataset_fn=tf.data.TFRecordDataset, + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn(params.is_training)) + + dataset = reader.read(input_context=input_context) + return dataset + + def train_step(self, inputs, model, optimizer, metrics=None): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + if self.task_config.losses.one_hot: + labels = tf.one_hot(labels, self.task_config.model.num_classes) + + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model(features, training=True) + # Casting output layer as float32 is necessary when mixed_precision is + # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. + outputs = tf.nest.map_structure( + lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + loss = self.build_losses( + model_outputs=outputs, labels=labels, aux_losses=model.losses) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient before apply_gradients when LossScaleOptimizer is + # used. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + + # Apply gradient clipping. + if self.task_config.gradient_clip_norm > 0: + grads, _ = tf.clip_by_global_norm( + grads, self.task_config.gradient_clip_norm) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: loss} + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + return logs + + diff --git a/official/vision/beta/projects/yolo/train.py b/official/vision/beta/projects/yolo/train.py new file mode 100644 index 0000000000000000000000000000000000000000..93c499013b77654130940164d1c8142408cc746f --- /dev/null +++ b/official/vision/beta/projects/yolo/train.py @@ -0,0 +1,73 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""TensorFlow Model Garden Vision training driver.""" + +from absl import app +from absl import flags +import gin + +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance +from official.vision.beta.projects.yolo.common import registry_imports # pylint: disable=unused-import + +FLAGS = flags.FLAGS + +''' +python3 -m official.vision.beta.projects.yolo.train --mode=train_and_eval --experiment=darknet_classification --model_dir=training_dir --config_file=official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml +''' + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + print(FLAGS.experiment) + params = train_utils.parse_configuration(FLAGS) + + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + train_utils.save_gin_config(FLAGS.mode, model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/vision/beta/projects/yt8m/README.md b/official/vision/beta/projects/yt8m/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2f4cf2ab4b079c3473089c4457d71a3f7b1e2793 --- /dev/null +++ b/official/vision/beta/projects/yt8m/README.md @@ -0,0 +1,154 @@ +# YouTube-8M Tensorflow Starter Code + +DISCLAIMER: This project is still under development. +No support will be provided during the development phase. + +This repo contains starter code (written in TensorFlow 2.x) for training and +evaluating machine learning models over the [YouTube-8M][1] dataset. +This is the Tensorflow2 version of the original starter code: +[YouTube-8M Tensorflow Starter Code][2] +which was tested on Tensorflow 1.14. (The code gives an end-to-end +working example for reading the dataset, training a TensorFlow model, +and evaluating the performance of the model). Functionalities are maintained, +while necessary migrations were done to accomodate running on tf2 environment. + +### Requirements + +The starter code requires Tensorflow. If you haven't installed it yet, follow +the instructions on [tensorflow.org][3]. +This code has been tested with Tensorflow 2.4.0. Going forward, +we will continue to target the latest released version of Tensorflow. + +Please verify that you have Python 3.6+ and Tensorflow 2.4.0 or higher +installed by running the following commands: + +```sh +python --version +python -c 'import tensorflow as tf; print(tf.__version__)' +``` + +Refer to the [instructions here][4] +for using the model in this repo. Make sure to add the models folder to your +Python path. + +[1]: https://research.google.com/youtube8m/ +[2]: https://github.com/google/youtube-8m +[3]: https://www.tensorflow.org/install/ +[4]: +https://github.com/tensorflow/models/tree/master/official#running-the-models + +#### Using GPUs + +If your Tensorflow installation has GPU support +(which should have been provided with `pip install tensorflow` for any version +above Tensorflow 1.15), this code will make use of all of your compatible GPUs. +You can verify your installation by running + +``` +tf.config.list_physical_devices('GPU') +``` + +This will print out something like the following for each of your compatible +GPUs. + +``` +I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] +Found device 0 with properties: +pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0 +coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB +deviceMemoryBandwidth: 681.88GiB/s +... +``` + +### Train and inference +Train video-level model on frame-level features and inference at segment-level. + +#### Train using the config file. +Create a YAML or JSON file for specifying the parameters to be overridden. +Working examples can be found in yt8m/experiments directory. +```sh +task: + model: + cluster_size: 2048 + hidden_size: 2048 + add_batch_norm: true + sample_random_frames: true + is_training: true + activation: "relu6" + pooling_method: "average" + yt8m_agg_classifier_model: "MoeModel" + train_data: + segment_labels: false + temporal_stride: 1 + num_devices: 1 + input_path: 'gs://youtube8m-ml/2/frame/train/train*.tfrecord' + num_examples: 3888919 + ... +``` + +The code can be run in different modes: `train / train_and_eval / eval`. +Run `train.py` and specify which mode you wish to execute. +Training is done using frame-level features with video-level labels, +while inference can be done at segment-level. +Setting `segment_labels=True` in your configuration forces +the segment level labels to be used in the evaluation/validation phrase. +If set to `False`, video level labels are used for inference. + +The following commands will train a model on Google Cloud over frame-level +features. + +```bash +python3 train.py --mode='train' \ + --experiment='yt8m_experiment' \ + --model_dir=$MODEL_DIR \ + --config_file=$CONFIG_FILE +``` + +In order to run evaluation after each training epoch, +set the mode to `train_and_eval`. +Paths to both train and validation dataset on Google Cloud are set as +train: `input_path=gs://youtube8m-ml/2/frame/train/train*.tfrecord` +validation:`input_path=gs://youtube8m-ml/3/frame/validate/validate*.tfrecord` +as default. + +```bash +python3 train.py --mode='train_and_eval' \ + --experiment='yt8m_experiment' \ + --model_dir=$MODEL_DIR \ + --config_file=$CONFIG_FILE \ +``` + +Running on evaluation mode loads saved checkpoint from specified path +and runs inference task. +```bash +python3 train.py --mode='eval' \ + --experiment='yt8m_experiment' \ + --model_dir=$MODEL_DIR \ + --config_file=$CONFIG_FILE +``` + + +Once these job starts executing you will see outputs similar to the following: +``` +train | step: 15190 | training until step 22785... +train | step: 22785 | steps/sec: 0.4 | output: + {'learning_rate': 0.0049961056, + 'model_loss': 0.0012011167, + 'total_loss': 0.0013538885, + 'training_loss': 0.0013538885} + +``` + +and the following for evaluation: + +``` +eval | step: 22785 | running 2172 steps of evaluation... +eval | step: 22785 | eval time: 1663.4 | output: + {'avg_hit_at_one': 0.5572835238737471, + 'avg_perr': 0.557277077999072, + 'gap': 0.768825760186494, + 'map': 0.19354554465020685, + 'model_loss': 0.0005052475, + 'total_loss': 0.0006564412, + 'validation_loss': 0.0006564412} +``` diff --git a/official/vision/beta/projects/yt8m/__init__.py b/official/vision/beta/projects/yt8m/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/projects/yt8m/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/projects/yt8m/configs/__init__.py b/official/vision/beta/projects/yt8m/configs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d620c7f1faddc7b962861e4c77f16c3f915d332 --- /dev/null +++ b/official/vision/beta/projects/yt8m/configs/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Configs package definition.""" + +from official.vision.beta.projects.yt8m.configs import yt8m diff --git a/official/vision/beta/projects/yt8m/configs/yt8m.py b/official/vision/beta/projects/yt8m/configs/yt8m.py new file mode 100644 index 0000000000000000000000000000000000000000..df1e588272cfddc442050da2c72fae0aecb46ce9 --- /dev/null +++ b/official/vision/beta/projects/yt8m/configs/yt8m.py @@ -0,0 +1,157 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Video classification configuration definition.""" +from typing import Optional, Tuple +from absl import flags +import dataclasses + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.modeling import optimization + +FLAGS = flags.FLAGS + +YT8M_TRAIN_EXAMPLES = 3888919 +YT8M_VAL_EXAMPLES = 1112356 +# 2/frame -> frame level +# 3/frame -> segment level +YT8M_TRAIN_PATH = 'gs://youtube8m-ml/2/frame/train/train*.tfrecord' +YT8M_VAL_PATH = 'gs://youtube8m-ml/3/frame/validate/validate*.tfrecord' + + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """The base configuration for building datasets.""" + name: Optional[str] = 'yt8m' + split: Optional[str] = None + feature_sizes: Tuple[int, ...] = (1024, 128) + feature_names: Tuple[str, ...] = ('rgb', 'audio') + segment_size: int = 1 + segment_labels: bool = False + temporal_stride: int = 1 + max_frames: int = 300 + num_frames: int = 300 # set smaller to allow random sample (Parser) + num_classes: int = 3862 + num_devices: int = 1 + input_path: str = '' + is_training: bool = True + random_seed: int = 123 + num_examples: int = -1 + + +def yt8m(is_training): + """YT8M dataset configs.""" + return DataConfig( + num_frames=30, + temporal_stride=1, + segment_labels=False, + segment_size=5, + is_training=is_training, + split='train' if is_training else 'valid', + num_examples=YT8M_TRAIN_EXAMPLES if is_training else YT8M_VAL_EXAMPLES, + input_path=YT8M_TRAIN_PATH if is_training else YT8M_VAL_PATH) + + +@dataclasses.dataclass +class YT8MModel(hyperparams.Config): + """The model config.""" + cluster_size: int = 2048 + hidden_size: int = 2048 + add_batch_norm: bool = True + sample_random_frames: bool = True + is_training: bool = True + activation: str = 'relu6' + pooling_method: str = 'average' + yt8m_agg_classifier_model: str = 'MoeModel' + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + name: str = 'binary_crossentropy' + from_logits: bool = False + label_smoothing: float = 0.0 + + +@dataclasses.dataclass +class YT8MTask(cfg.TaskConfig): + """The task config.""" + model: YT8MModel = YT8MModel() + train_data: DataConfig = yt8m(is_training=True) + validation_data: DataConfig = yt8m(is_training=False) + losses: Losses = Losses() + gradient_clip_norm: float = 1.0 + num_readers: int = 8 + top_k: int = 20 + top_n: Optional[int] = None + + +def add_trainer( + experiment: cfg.ExperimentConfig, + train_batch_size: int, + eval_batch_size: int, + learning_rate: float = 0.005, + train_epochs: int = 44, +): + """Add and config a trainer to the experiment config.""" + if YT8M_TRAIN_EXAMPLES <= 0: + raise ValueError('Wrong train dataset size {!r}'.format( + experiment.task.train_data)) + if YT8M_VAL_EXAMPLES <= 0: + raise ValueError('Wrong validation dataset size {!r}'.format( + experiment.task.validation_data)) + experiment.task.train_data.global_batch_size = train_batch_size + experiment.task.validation_data.global_batch_size = eval_batch_size + steps_per_epoch = YT8M_TRAIN_EXAMPLES // train_batch_size + experiment.trainer = cfg.TrainerConfig( + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + train_steps=train_epochs * steps_per_epoch, + validation_steps=YT8M_VAL_EXAMPLES // eval_batch_size, + validation_interval=steps_per_epoch, + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'adam', + 'adam': {} + }, + 'learning_rate': { + 'type': 'exponential', + 'exponential': { + 'initial_learning_rate': learning_rate, + 'decay_rate': 0.95, + 'decay_steps': 1500000, + } + }, + })) + return experiment + + +@exp_factory.register_config_factory('yt8m_experiment') +def yt8m_experiment() -> cfg.ExperimentConfig: + """Video classification general.""" + exp_config = cfg.ExperimentConfig( + runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'), + task=YT8MTask(), + trainer=cfg.TrainerConfig(), + restrictions=[ + 'task.train_data.is_training != None', + 'task.validation_data.is_training != None', + 'task.train_data.num_classes == task.validation_data.num_classes', + 'task.train_data.feature_sizes != None', + 'task.train_data.feature_names != None', + ]) + + return add_trainer(exp_config, train_batch_size=512, eval_batch_size=512) diff --git a/official/vision/beta/projects/yt8m/dataloaders/utils.py b/official/vision/beta/projects/yt8m/dataloaders/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..eda1a4ab23ac9e8ccf27f03ebcc86ae6be7b34e6 --- /dev/null +++ b/official/vision/beta/projects/yt8m/dataloaders/utils.py @@ -0,0 +1,215 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains a collection of util functions for training and evaluating.""" + +from absl import logging +import numpy +import tensorflow as tf + + +def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2): + """Dequantize the feature from the byte format to the float format. + + Args: + feat_vector: the input 1-d vector. + max_quantized_value: the maximum of the quantized value. + min_quantized_value: the minimum of the quantized value. + + Returns: + A float vector which has the same shape as feat_vector. + """ + assert max_quantized_value > min_quantized_value + quantized_range = max_quantized_value - min_quantized_value + scalar = quantized_range / 255.0 + bias = (quantized_range / 512.0) + min_quantized_value + return feat_vector * scalar + bias + + +def MakeSummary(name, value): + """Creates a tf.Summary proto with the given name and value.""" + summary = tf.Summary() + val = summary.value.add() + val.tag = str(name) + val.simple_value = float(value) + return summary + + +def AddGlobalStepSummary(summary_writer, + global_step_val, + global_step_info_dict, + summary_scope="Eval"): + """Add the global_step summary to the Tensorboard. + + Args: + summary_writer: Tensorflow summary_writer. + global_step_val: a int value of the global step. + global_step_info_dict: a dictionary of the evaluation metrics calculated for + a mini-batch. + summary_scope: Train or Eval. + + Returns: + A string of this global_step summary + """ + this_hit_at_one = global_step_info_dict["hit_at_one"] + this_perr = global_step_info_dict["perr"] + this_loss = global_step_info_dict["loss"] + examples_per_second = global_step_info_dict.get("examples_per_second", -1) + + summary_writer.add_summary( + MakeSummary("GlobalStep/" + summary_scope + "_Hit@1", this_hit_at_one), + global_step_val) + summary_writer.add_summary( + MakeSummary("GlobalStep/" + summary_scope + "_Perr", this_perr), + global_step_val) + summary_writer.add_summary( + MakeSummary("GlobalStep/" + summary_scope + "_Loss", this_loss), + global_step_val) + + if examples_per_second != -1: + summary_writer.add_summary( + MakeSummary("GlobalStep/" + summary_scope + "_Example_Second", + examples_per_second), global_step_val) + + summary_writer.flush() + info = ( + "global_step {0} | Batch Hit@1: {1:.3f} | Batch PERR: {2:.3f} | Batch " + "Loss: {3:.3f} | Examples_per_sec: {4:.3f}").format( + global_step_val, this_hit_at_one, this_perr, this_loss, + examples_per_second) + return info + + +def AddEpochSummary(summary_writer, + global_step_val, + epoch_info_dict, + summary_scope="Eval"): + """Add the epoch summary to the Tensorboard. + + Args: + summary_writer: Tensorflow summary_writer. + global_step_val: a int value of the global step. + epoch_info_dict: a dictionary of the evaluation metrics calculated for the + whole epoch. + summary_scope: Train or Eval. + + Returns: + A string of this global_step summary + """ + epoch_id = epoch_info_dict["epoch_id"] + avg_hit_at_one = epoch_info_dict["avg_hit_at_one"] + avg_perr = epoch_info_dict["avg_perr"] + avg_loss = epoch_info_dict["avg_loss"] + aps = epoch_info_dict["aps"] + gap = epoch_info_dict["gap"] + mean_ap = numpy.mean(aps) + + summary_writer.add_summary( + MakeSummary("Epoch/" + summary_scope + "_Avg_Hit@1", avg_hit_at_one), + global_step_val) + summary_writer.add_summary( + MakeSummary("Epoch/" + summary_scope + "_Avg_Perr", avg_perr), + global_step_val) + summary_writer.add_summary( + MakeSummary("Epoch/" + summary_scope + "_Avg_Loss", avg_loss), + global_step_val) + summary_writer.add_summary( + MakeSummary("Epoch/" + summary_scope + "_MAP", mean_ap), global_step_val) + summary_writer.add_summary( + MakeSummary("Epoch/" + summary_scope + "_GAP", gap), global_step_val) + summary_writer.flush() + + info = ("epoch/eval number {0} | Avg_Hit@1: {1:.3f} | Avg_PERR: {2:.3f} " + "| MAP: {3:.3f} | GAP: {4:.3f} | Avg_Loss: {5:3f} | num_classes: {6}" + ).format(epoch_id, avg_hit_at_one, avg_perr, mean_ap, gap, avg_loss, + len(aps)) + return info + + +def GetListOfFeatureNamesAndSizes(feature_names, feature_sizes): + """Extract the list of feature names and the dimensionality. + + Args: + feature_names: string containing comma separated list of feature names + feature_sizes: string containing comma separated list of feature sizes + + Returns: + List of the feature names and list of the dimensionality of each feature. + Elements in the first/second list are strings/integers. + """ + list_of_feature_names = [ + feature_names.strip() for feature_names in feature_names.split(",") + ] + list_of_feature_sizes = [ + int(feature_sizes) for feature_sizes in feature_sizes.split(",") + ] + if len(list_of_feature_names) != len(list_of_feature_sizes): + logging.error( + "length of the feature names (=%r) != length of feature " + "sizes (=%r)", str(len(list_of_feature_names)), + str(len(list_of_feature_sizes))) + + return list_of_feature_names, list_of_feature_sizes + + +def ClipGradientNorms(gradients_to_variables, max_norm): + """Clips the gradients by the given value. + + Args: + gradients_to_variables: A list of gradient to variable pairs (tuples). + max_norm: the maximum norm value. + + Returns: + A list of clipped gradient to variable pairs. + """ + clipped_grads_and_vars = [] + for grad, var in gradients_to_variables: + if grad is not None: + if isinstance(grad, tf.IndexedSlices): + tmp = tf.clip_by_norm(grad.values, max_norm) + grad = tf.IndexedSlices(tmp, grad.indices, grad.dense_shape) + else: + grad = tf.clip_by_norm(grad, max_norm) + clipped_grads_and_vars.append((grad, var)) + return clipped_grads_and_vars + + +def CombineGradients(tower_grads): + """Calculate the combined gradient for each shared variable across all towers. + + Note that this function provides a synchronization point across all towers. + + Args: + tower_grads: List of lists of (gradient, variable) tuples. The outer list is + over individual gradients. The inner list is over the gradient calculation + for each tower. + + Returns: + List of pairs of (gradient, variable) where the gradient has been summed + across all towers. + """ + filtered_grads = [ + [x for x in grad_list if x[0] is not None] for grad_list in tower_grads + ] + final_grads = [] + for i in range(len(filtered_grads[0])): + grads = [filtered_grads[t][i] for t in range(len(filtered_grads))] + grad = tf.stack([x[0] for x in grads], 0) + grad = tf.reduce_sum(grad, 0) + final_grads.append(( + grad, + filtered_grads[0][i][1], + )) + + return final_grads diff --git a/official/vision/beta/projects/yt8m/dataloaders/yt8m_input.py b/official/vision/beta/projects/yt8m/dataloaders/yt8m_input.py new file mode 100644 index 0000000000000000000000000000000000000000..8d59e73854a988ce85a4e9d75c8b488802867397 --- /dev/null +++ b/official/vision/beta/projects/yt8m/dataloaders/yt8m_input.py @@ -0,0 +1,419 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""class YT8MFrameFeatureReader(BaseReader). + + Reads TFRecords of SequenceExamples. + + The TFRecords must contain SequenceExamples with the sparse in64 'labels' + context feature and a fixed length byte-quantized feature vector, obtained + from the features in 'feature_names'. The quantized features will be mapped + back into a range between min_quantized_value and max_quantized_value. + link for details: https://research.google.com/youtube8m/download.html +""" + +from typing import Dict + +import tensorflow as tf +from official.vision.beta.configs import video_classification as exp_cfg +from official.vision.beta.dataloaders import decoder +from official.vision.beta.dataloaders import parser +from official.vision.beta.projects.yt8m.dataloaders import utils + + +def resize_axis(tensor, axis, new_size, fill_value=0): + """Truncates or pads a tensor to new_size on on a given axis. + + Truncate or extend tensor such that tensor.shape[axis] == new_size. If the + size increases, the padding will be performed at the end, using fill_value. + + Args: + tensor: The tensor to be resized. + axis: An integer representing the dimension to be sliced. + new_size: An integer or 0d tensor representing the new value for + tensor.shape[axis]. + fill_value: Value to use to fill any new entries in the tensor. Will be cast + to the type of tensor. + + Returns: + The resized tensor. + """ + tensor = tf.convert_to_tensor(tensor) + shape = tf.unstack(tf.shape(tensor)) + + pad_shape = shape[:] + pad_shape[axis] = tf.maximum(0, new_size - shape[axis]) + + shape[axis] = tf.minimum(shape[axis], new_size) + shape = tf.stack(shape) + + resized = tf.concat([ + tf.slice(tensor, tf.zeros_like(shape), shape), + tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype)) + ], axis) + + # Update shape. + new_shape = tensor.shape.as_list() # A copy is being made. + new_shape[axis] = new_size + resized = tf.ensure_shape(resized, new_shape) + return resized + + +def _process_segment_and_label(video_matrix, num_frames, contexts, + segment_labels, segment_size, + num_classes) -> Dict[str, tf.Tensor]: + """Processes a batched Tensor of frames. + + The same parameters used in process should be used here. + Args: + video_matrix: different features concatenated into one matrix + num_frames: Number of frames per subclip. + contexts: context information extracted from decoder + segment_labels: if we read segment labels instead. + segment_size: the segment_size used for reading segments. + num_classes: a positive integer for the number of classes. + + Returns: + output: dictionary containing batch information + """ + # Partition frame-level feature matrix to segment-level feature matrix. + if segment_labels: + start_times = contexts["segment_start_times"].values + # Here we assume all the segments that started at the same start time has + # the same segment_size. + uniq_start_times, seg_idxs = tf.unique(start_times, out_idx=tf.dtypes.int64) + # Range gather matrix, e.g., [[0,1,2],[1,2,3]] for segment_size == 3. + range_mtx = tf.expand_dims( + uniq_start_times, axis=-1) + tf.expand_dims( + tf.range(0, segment_size, dtype=tf.int64), axis=0) + # Shape: [num_segment, segment_size, feature_dim]. + batch_video_matrix = tf.gather_nd(video_matrix, + tf.expand_dims(range_mtx, axis=-1)) + num_segment = tf.shape(batch_video_matrix)[0] + batch_video_ids = tf.reshape( + tf.tile([contexts["id"]], [num_segment]), (num_segment,)) + batch_frames = tf.reshape( + tf.tile([segment_size], [num_segment]), (num_segment,)) + batch_frames = tf.cast(tf.expand_dims(batch_frames, 1), tf.float32) + + # For segment labels, all labels are not exhaustively rated. So we only + # evaluate the rated labels. + + # Label indices for each segment, shape: [num_segment, 2]. + label_indices = tf.stack([seg_idxs, contexts["segment_labels"].values], + axis=-1) + label_values = contexts["segment_scores"].values + sparse_labels = tf.sparse.SparseTensor(label_indices, label_values, + (num_segment, num_classes)) + batch_labels = tf.sparse.to_dense(sparse_labels, validate_indices=False) + + sparse_label_weights = tf.sparse.SparseTensor( + label_indices, tf.ones_like(label_values, dtype=tf.float32), + (num_segment, num_classes)) + batch_label_weights = tf.sparse.to_dense( + sparse_label_weights, validate_indices=False) + # output_dict = utils.get_segments(batch_video_matrix, batch_frames, 5) + else: + # Process video-level labels. + label_indices = contexts["labels"].values + sparse_labels = tf.sparse.SparseTensor( + tf.expand_dims(label_indices, axis=-1), + tf.ones_like(contexts["labels"].values, dtype=tf.bool), (num_classes,)) + labels = tf.sparse.to_dense( + sparse_labels, default_value=False, validate_indices=False) + + # convert to batch format. + batch_video_ids = tf.expand_dims(contexts["id"], 0) + batch_video_matrix = tf.expand_dims(video_matrix, 0) + batch_labels = tf.expand_dims(labels, 0) + batch_frames = tf.expand_dims(num_frames, 0) + batch_label_weights = None + + output_dict = { + "video_ids": batch_video_ids, + "video_matrix": batch_video_matrix, + "labels": batch_labels, + "num_frames": batch_frames, + } + if batch_label_weights is not None: + output_dict["label_weights"] = batch_label_weights + + return output_dict + + +def _get_video_matrix(features, feature_size, max_frames, max_quantized_value, + min_quantized_value): + """Decodes features from an input string and quantizes it. + + Args: + features: raw feature values + feature_size: length of each frame feature vector + max_frames: number of frames (rows) in the output feature_matrix + max_quantized_value: the maximum of the quantized value. + min_quantized_value: the minimum of the quantized value. + + Returns: + feature_matrix: matrix of all frame-features + num_frames: number of frames in the sequence + """ + decoded_features = tf.reshape( + tf.cast(tf.io.decode_raw(features, tf.uint8), tf.float32), + [-1, feature_size]) + + num_frames = tf.math.minimum(tf.shape(decoded_features)[0], max_frames) + feature_matrix = utils.Dequantize(decoded_features, max_quantized_value, + min_quantized_value) + feature_matrix = resize_axis(feature_matrix, 0, max_frames) + return feature_matrix, num_frames + + +def _concat_features(features, feature_names, feature_sizes, max_frames, + max_quantized_value, min_quantized_value): + """Loads (potentially) different types of features and concatenates them. + + Args: + features: raw feature values + feature_names: list of feature names + feature_sizes: list of features sizes + max_frames: number of frames in the sequence + max_quantized_value: the maximum of the quantized value. + min_quantized_value: the minimum of the quantized value. + + Returns: + video_matrix: different features concatenated into one matrix + num_frames: the number of frames in the video + """ + + num_features = len(feature_names) + assert num_features > 0, "No feature selected: feature_names is empty!" + + assert len(feature_names) == len(feature_sizes), ( + "length of feature_names (={}) != length of feature_sizes (={})".format( + len(feature_names), len(feature_sizes))) + + num_frames = -1 # the number of frames in the video + feature_matrices = [None] * num_features # an array of different features + for feature_index in range(num_features): + feature_matrix, num_frames_in_this_feature = _get_video_matrix( + features[feature_names[feature_index]], feature_sizes[feature_index], + max_frames, max_quantized_value, min_quantized_value) + if num_frames == -1: + num_frames = num_frames_in_this_feature + + feature_matrices[feature_index] = feature_matrix + + # cap the number of frames at self.max_frames + num_frames = tf.minimum(num_frames, max_frames) + + # concatenate different features + video_matrix = tf.concat(feature_matrices, 1) + + return video_matrix, num_frames + + +class Decoder(decoder.Decoder): + """A tf.Example decoder for classification task.""" + + def __init__( + self, + input_params: exp_cfg.DataConfig, + ): + + self._segment_labels = input_params.segment_labels + self._feature_names = input_params.feature_names + self._context_features = { + "id": tf.io.FixedLenFeature([], tf.string), + } + if self._segment_labels: + self._context_features.update({ + # There is no need to read end-time given we always assume the segment + # has the same size. + "segment_labels": tf.io.VarLenFeature(tf.int64), + "segment_start_times": tf.io.VarLenFeature(tf.int64), + "segment_scores": tf.io.VarLenFeature(tf.float32) + }) + else: + self._context_features.update({"labels": tf.io.VarLenFeature(tf.int64)}) + + self._sequence_features = { + feature_name: tf.io.FixedLenSequenceFeature([], dtype=tf.string) + for feature_name in self._feature_names + } + + def decode(self, serialized_example): + """Parses a single tf.Example into image and label tensors.""" + + contexts, features = tf.io.parse_single_sequence_example( + serialized_example, + context_features=self._context_features, + sequence_features=self._sequence_features) + + return {"contexts": contexts, "features": features} + + +class Parser(parser.Parser): + """Parses a video and label dataset. + + takes the decoded raw tensors dict + and parse them into a dictionary of tensors + that can be consumed by the model. + It will be executed after decoder. + """ + + def __init__( + self, + input_params: exp_cfg.DataConfig, + max_quantized_value=2, + min_quantized_value=-2, + ): + self._num_classes = input_params.num_classes + self._segment_size = input_params.segment_size + self._segment_labels = input_params.segment_labels + self._feature_names = input_params.feature_names + self._feature_sizes = input_params.feature_sizes + self.stride = input_params.temporal_stride + self._max_frames = input_params.max_frames + self._num_frames = input_params.num_frames + self._seed = input_params.random_seed + self._max_quantized_value = max_quantized_value + self._min_quantized_value = min_quantized_value + + def _parse_train_data(self, decoded_tensors): + """Parses data for training.""" + # loads (potentially) different types of features and concatenates them + self.video_matrix, self.num_frames = _concat_features( + decoded_tensors["features"], self._feature_names, self._feature_sizes, + self._max_frames, self._max_quantized_value, self._min_quantized_value) + output_dict = _process_segment_and_label(self.video_matrix, self.num_frames, + decoded_tensors["contexts"], + self._segment_labels, + self._segment_size, + self._num_classes) + return output_dict + + def _parse_eval_data(self, decoded_tensors): + """Parses data for evaluation.""" + # loads (potentially) different types of features and concatenates them + self.video_matrix, self.num_frames = _concat_features( + decoded_tensors["features"], self._feature_names, self._feature_sizes, + self._max_frames, self._max_quantized_value, self._min_quantized_value) + output_dict = _process_segment_and_label(self.video_matrix, self.num_frames, + decoded_tensors["contexts"], + self._segment_labels, + self._segment_size, + self._num_classes) + return output_dict # batched + + def parse_fn(self, is_training): + """Returns a parse fn that reads and parses raw tensors from the decoder. + + Args: + is_training: a `bool` to indicate whether it is in training mode. + + Returns: + parse: a `callable` that takes the serialized example and generate the + images, labels tuple where labels is a dict of Tensors that contains + labels. + """ + + def parse(decoded_tensors): + """Parses the serialized example data.""" + if is_training: + return self._parse_train_data(decoded_tensors) + else: + return self._parse_eval_data(decoded_tensors) + + return parse + + +class PostBatchProcessor(): + """Processes a video and label dataset which is batched.""" + + def __init__(self, input_params: exp_cfg.DataConfig): + self.segment_labels = input_params.segment_labels + self.num_classes = input_params.num_classes + self.segment_size = input_params.segment_size + + def post_fn(self, batched_tensors): + """Processes batched Tensors.""" + video_ids = batched_tensors["video_ids"] + video_matrix = batched_tensors["video_matrix"] + labels = batched_tensors["labels"] + num_frames = batched_tensors["num_frames"] + label_weights = None + + if self.segment_labels: + # [batch x num_segment x segment_size x num_features] + # -> [batch * num_segment x segment_size x num_features] + video_ids = tf.reshape(video_ids, [-1]) + video_matrix = tf.reshape(video_matrix, [-1, self.segment_size, 1152]) + labels = tf.reshape(labels, [-1, self.num_classes]) + num_frames = tf.reshape(num_frames, [-1, 1]) + + label_weights = tf.reshape(batched_tensors["label_weights"], + [-1, self.num_classes]) + + else: + video_matrix = tf.squeeze(video_matrix) + labels = tf.squeeze(labels) + + batched_tensors = { + "video_ids": video_ids, + "video_matrix": video_matrix, + "labels": labels, + "num_frames": num_frames, + } + + if label_weights is not None: + batched_tensors["label_weights"] = label_weights + + return batched_tensors + + +class TransformBatcher(): + """Performs manual batching on input dataset.""" + + def __init__(self, input_params: exp_cfg.DataConfig): + self._segment_labels = input_params.segment_labels + self._global_batch_size = input_params.global_batch_size + self._is_training = input_params.is_training + + def batch_fn(self, dataset, input_context): + """Add padding when segment_labels is true.""" + per_replica_batch_size = input_context.get_per_replica_batch_size( + self._global_batch_size) if input_context else self._global_batch_size + if not self._segment_labels: + dataset = dataset.batch(per_replica_batch_size, drop_remainder=True) + else: + # add padding + pad_shapes = { + "video_ids": [None], + "video_matrix": [None, None, None], + "labels": [None, None], + "num_frames": [None, None], + "label_weights": [None, None] + } + pad_values = { + "video_ids": None, + "video_matrix": 0.0, + "labels": -1.0, + "num_frames": 0.0, + "label_weights": 0.0 + } + dataset = dataset.padded_batch( + per_replica_batch_size, + padded_shapes=pad_shapes, + drop_remainder=True, + padding_values=pad_values) + return dataset diff --git a/official/vision/beta/projects/yt8m/eval_utils/average_precision_calculator.py b/official/vision/beta/projects/yt8m/eval_utils/average_precision_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..9bf1123793d5ee8f726687a9681936c7d49553bf --- /dev/null +++ b/official/vision/beta/projects/yt8m/eval_utils/average_precision_calculator.py @@ -0,0 +1,273 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Calculate or keep track of the interpolated average precision. + +It provides an interface for calculating interpolated average precision for an +entire list or the top-n ranked items. For the definition of the +(non-)interpolated average precision: +http://trec.nist.gov/pubs/trec15/appendices/CE.MEASURES06.pdf + +Example usages: +1) Use it as a static function call to directly calculate average precision for +a short ranked list in the memory. + +``` +import random + +p = np.array([random.random() for _ in xrange(10)]) +a = np.array([random.choice([0, 1]) for _ in xrange(10)]) + +ap = average_precision_calculator.AveragePrecisionCalculator.ap(p, a) +``` + +2) Use it as an object for long ranked list that cannot be stored in memory or +the case where partial predictions can be observed at a time (Tensorflow +predictions). In this case, we first call the function accumulate many times +to process parts of the ranked list. After processing all the parts, we call +peek_interpolated_ap_at_n. +``` +p1 = np.array([random.random() for _ in xrange(5)]) +a1 = np.array([random.choice([0, 1]) for _ in xrange(5)]) +p2 = np.array([random.random() for _ in xrange(5)]) +a2 = np.array([random.choice([0, 1]) for _ in xrange(5)]) + +# interpolated average precision at 10 using 1000 break points +calculator = average_precision_calculator.AveragePrecisionCalculator(10) +calculator.accumulate(p1, a1) +calculator.accumulate(p2, a2) +ap3 = calculator.peek_ap_at_n() +``` +""" + +import heapq +import numbers +import random + +import numpy + + +class AveragePrecisionCalculator(object): + """Calculate the average precision and average precision at n.""" + + def __init__(self, top_n=None): + """Construct an AveragePrecisionCalculator to calculate average precision. + + This class is used to calculate the average precision for a single label. + + Args: + top_n: A positive Integer specifying the average precision at n, or None + to use all provided data points. + + Raises: + ValueError: An error occurred when the top_n is not a positive integer. + """ + if not ((isinstance(top_n, int) and top_n >= 0) or top_n is None): + raise ValueError("top_n must be a positive integer or None.") + + self._top_n = top_n # average precision at n + self._total_positives = 0 # total number of positives have seen + self._heap = [] # max heap of (prediction, actual) + + @property + def heap_size(self): + """Gets the heap size maintained in the class.""" + return len(self._heap) + + @property + def num_accumulated_positives(self): + """Gets the number of positive samples that have been accumulated.""" + return self._total_positives + + def accumulate(self, predictions, actuals, num_positives=None): + """Accumulate the predictions and their ground truth labels. + + After the function call, we may call peek_ap_at_n to actually calculate + the average precision. + Note predictions and actuals must have the same shape. + + Args: + predictions: a list storing the prediction scores. + actuals: a list storing the ground truth labels. Any value larger than 0 + will be treated as positives, otherwise as negatives. num_positives = If + the 'predictions' and 'actuals' inputs aren't complete, then it's + possible some true positives were missed in them. In that case, you can + provide 'num_positives' in order to accurately track recall. + num_positives: number of positive examples. + + Raises: + ValueError: An error occurred when the format of the input is not the + numpy 1-D array or the shape of predictions and actuals does not match. + """ + if len(predictions) != len(actuals): + raise ValueError("the shape of predictions and actuals does not match.") + + if num_positives is not None: + if not isinstance(num_positives, numbers.Number) or num_positives < 0: + raise ValueError( + "'num_positives' was provided but it was a negative number.") + + if num_positives is not None: + self._total_positives += num_positives + else: + self._total_positives += numpy.size( + numpy.where(numpy.array(actuals) > 1e-5)) + topk = self._top_n + heap = self._heap + + for i in range(numpy.size(predictions)): + if topk is None or len(heap) < topk: + heapq.heappush(heap, (predictions[i], actuals[i])) + else: + if predictions[i] > heap[0][0]: # heap[0] is the smallest + heapq.heappop(heap) + heapq.heappush(heap, (predictions[i], actuals[i])) + + def clear(self): + """Clear the accumulated predictions.""" + self._heap = [] + self._total_positives = 0 + + def peek_ap_at_n(self): + """Peek the non-interpolated average precision at n. + + Returns: + The non-interpolated average precision at n (default 0). + If n is larger than the length of the ranked list, + the average precision will be returned. + """ + if self.heap_size <= 0: + return 0 + predlists = numpy.array(list(zip(*self._heap))) + + ap = self.ap_at_n( + predlists[0], + predlists[1], + n=self._top_n, + total_num_positives=self._total_positives) + return ap + + @staticmethod + def ap(predictions, actuals): + """Calculate the non-interpolated average precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + actuals: a numpy 1-D array storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + + Returns: + The non-interpolated average precision at n. + If n is larger than the length of the ranked list, + the average precision will be returned. + + Raises: + ValueError: An error occurred when the format of the input is not the + numpy 1-D array or the shape of predictions and actuals does not match. + """ + return AveragePrecisionCalculator.ap_at_n(predictions, actuals, n=None) + + @staticmethod + def ap_at_n(predictions, actuals, n=20, total_num_positives=None): + """Calculate the non-interpolated average precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + actuals: a numpy 1-D array storing the ground truth labels. Any value + larger than 0 will be treated as positives, otherwise as negatives. + n: the top n items to be considered in ap@n. + total_num_positives : (optionally) you can specify the number of total + positive in the list. If specified, it will be used in calculation. + + Returns: + The non-interpolated average precision at n. + If n is larger than the length of the ranked list, + the average precision will be returned. + + Raises: + ValueError: An error occurred when + 1) the format of the input is not the numpy 1-D array; + 2) the shape of predictions and actuals does not match; + 3) the input n is not a positive integer. + """ + if len(predictions) != len(actuals): + raise ValueError("the shape of predictions and actuals does not match.") + + if n is not None: + if not isinstance(n, int) or n <= 0: + raise ValueError("n must be 'None' or a positive integer." + " It was '%s'." % n) + + ap = 0.0 + + predictions = numpy.array(predictions) + actuals = numpy.array(actuals) + + # add a shuffler to avoid overestimating the ap + predictions, actuals = AveragePrecisionCalculator._shuffle( + predictions, actuals) + sortidx = sorted( + range(len(predictions)), key=lambda k: predictions[k], reverse=True) + + if total_num_positives is None: + numpos = numpy.size(numpy.where(actuals > 0)) + else: + numpos = total_num_positives + + if numpos == 0: + return 0 + + if n is not None: + numpos = min(numpos, n) + delta_recall = 1.0 / numpos + poscount = 0.0 + + # calculate the ap + r = len(sortidx) + if n is not None: + r = min(r, n) + for i in range(r): + if actuals[sortidx[i]] > 0: + poscount += 1 + ap += poscount / (i + 1) * delta_recall + return ap + + @staticmethod + def _shuffle(predictions, actuals): + random.seed(0) + suffidx = random.sample(range(len(predictions)), len(predictions)) + predictions = predictions[suffidx] + actuals = actuals[suffidx] + return predictions, actuals + + @staticmethod + def _zero_one_normalize(predictions, epsilon=1e-7): + """Normalize the predictions to the range between 0.0 and 1.0. + + For some predictions like SVM predictions, we need to normalize them before + calculate the interpolated average precision. The normalization will not + change the rank in the original list and thus won't change the average + precision. + + Args: + predictions: a numpy 1-D array storing the sparse prediction scores. + epsilon: a small constant to avoid denominator being zero. + + Returns: + The normalized prediction. + """ + denominator = numpy.max(predictions) - numpy.min(predictions) + ret = (predictions - numpy.min(predictions)) / numpy.max( + denominator, epsilon) + return ret diff --git a/official/vision/beta/projects/yt8m/eval_utils/eval_util.py b/official/vision/beta/projects/yt8m/eval_utils/eval_util.py new file mode 100644 index 0000000000000000000000000000000000000000..b42660121d88b67ea77ecbfa4bcd6e5de6a66402 --- /dev/null +++ b/official/vision/beta/projects/yt8m/eval_utils/eval_util.py @@ -0,0 +1,271 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides functions to help with evaluating models.""" +import numpy as np +import tensorflow as tf +from official.vision.beta.projects.yt8m.eval_utils import \ + average_precision_calculator as ap_calculator +from official.vision.beta.projects.yt8m.eval_utils import \ + mean_average_precision_calculator as map_calculator + + +def flatten(l): + """Merges a list of lists into a single list.""" + # pylint: disable=g-complex-comprehension + return [item for sublist in l for item in sublist] + # pylint: enable=g-complex-comprehension + + +def calculate_hit_at_one(predictions, actuals): + """Performs a local (numpy) calculation of the hit at one. + + Args: + predictions: Matrix containing the outputs of the model. Dimensions are + 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x + 'num_classes'. + + Returns: + float: The average hit at one across the entire batch. + """ + top_prediction = np.argmax(predictions, 1) + hits = actuals[np.arange(actuals.shape[0]), top_prediction] + return np.average(hits) + + +def calculate_precision_at_equal_recall_rate(predictions, actuals): + """Performs a local (numpy) calculation of the PERR. + + Args: + predictions: Matrix containing the outputs of the model. Dimensions are + 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x + 'num_classes'. + + Returns: + float: The average precision at equal recall rate across the entire batch. + """ + aggregated_precision = 0.0 + num_videos = actuals.shape[0] + for row in np.arange(num_videos): + num_labels = int(np.sum(actuals[row])) + top_indices = np.argpartition(predictions[row], -num_labels)[-num_labels:] + item_precision = 0.0 + for label_index in top_indices: + if predictions[row][label_index] > 0: + item_precision += actuals[row][label_index] + item_precision /= top_indices.size + aggregated_precision += item_precision + aggregated_precision /= num_videos + return aggregated_precision + + +def calculate_gap(predictions, actuals, top_k=20): + """Performs a local (numpy) calculation of the global average precision. + + Only the top_k predictions are taken for each of the videos. + + Args: + predictions: Matrix containing the outputs of the model. Dimensions are + 'batch' x 'num_classes'. + actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x + 'num_classes'. + top_k: How many predictions to use per video. + + Returns: + float: The global average precision. + """ + gap_calculator = ap_calculator.AveragePrecisionCalculator() + sparse_predictions, sparse_labels, num_positives = top_k_by_class( + predictions, actuals, top_k) + gap_calculator.accumulate( + flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) + return gap_calculator.peek_ap_at_n() + + +def top_k_by_class(predictions, labels, k=20): + """Extracts the top k predictions for each video, sorted by class. + + Args: + predictions: A numpy matrix containing the outputs of the model. Dimensions + are 'batch' x 'num_classes'. + labels: A numpy matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + k: the top k non-zero entries to preserve in each prediction. + + Returns: + A tuple (predictions,labels, true_positives). 'predictions' and 'labels' + are lists of lists of floats. 'true_positives' is a list of scalars. The + length of the lists are equal to the number of classes. The entries in the + predictions variable are probability predictions, and + the corresponding entries in the labels variable are the ground truth for + those predictions. The entries in 'true_positives' are the number of true + positives for each class in the ground truth. + + Raises: + ValueError: An error occurred when the k is not a positive integer. + """ + if k <= 0: + raise ValueError("k must be a positive integer.") + k = min(k, predictions.shape[1]) + num_classes = predictions.shape[1] + prediction_triplets = [] + for video_index in range(predictions.shape[0]): + prediction_triplets.extend( + top_k_triplets(predictions[video_index], labels[video_index], k)) + out_predictions = [[] for _ in range(num_classes)] + out_labels = [[] for _ in range(num_classes)] + for triplet in prediction_triplets: + out_predictions[triplet[0]].append(triplet[1]) + out_labels[triplet[0]].append(triplet[2]) + out_true_positives = [np.sum(labels[:, i]) for i in range(num_classes)] + + return out_predictions, out_labels, out_true_positives + + +def top_k_triplets(predictions, labels, k=20): + """Get the top_k for a 1-d numpy array. + + Args: + predictions: A numpy matrix containing the outputs of the model. Dimensions + are 'batch' x 'num_classes'. + labels: A numpy matrix containing the ground truth labels. + Dimensions are 'batch' x 'num_classes'. + k: The number top predictions to pick. + Returns: + a sparse list of tuples in (prediction, class) format. + """ + m = len(predictions) + k = min(k, m) + indices = np.argpartition(predictions, -k)[-k:] + return [(index, predictions[index], labels[index]) for index in indices] + + +class EvaluationMetrics(object): + """A class to store the evaluation metrics.""" + + def __init__(self, num_class, top_k, top_n): + """Construct an EvaluationMetrics object to store the evaluation metrics. + + Args: + num_class: A positive integer specifying the number of classes. + top_k: A positive integer specifying how many predictions are considered + per video. + top_n: A positive Integer specifying the average precision at n, or None + to use all provided data points. + + Raises: + ValueError: An error occurred when MeanAveragePrecisionCalculator cannot + not be constructed. + """ + self.sum_hit_at_one = 0.0 + self.sum_perr = 0.0 + self.map_calculator = map_calculator.MeanAveragePrecisionCalculator( + num_class, top_n=top_n) + self.global_ap_calculator = ap_calculator.AveragePrecisionCalculator() + self.top_k = top_k + self.num_examples = 0 + self.num_class = num_class + + def accumulate(self, predictions, labels): + """Accumulate the metrics calculated locally for this mini-batch. + + Args: + predictions: A numpy matrix containing the outputs of the model. + Dimensions are 'batch' x 'num_classes'. + labels: A numpy matrix containing the ground truth labels. Dimensions are + 'batch' x 'num_classes'. + + Returns: + dictionary: A dictionary storing the metrics for the mini-batch. + + Raises: + ValueError: An error occurred when the shape of predictions and actuals + does not match. + """ + predictions, labels = self._convert_to_numpy( + predictions=predictions[0], groundtruths=labels[0]) + batch_size = labels.shape[0] + mean_hit_at_one = calculate_hit_at_one(predictions, labels) + mean_perr = calculate_precision_at_equal_recall_rate(predictions, labels) + + # Take the top 20 predictions. + sparse_predictions, sparse_labels, num_positives = top_k_by_class( + predictions, labels, self.top_k) + self.map_calculator.accumulate(sparse_predictions, sparse_labels, + num_positives) + self.global_ap_calculator.accumulate( + flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) + + self.num_examples += batch_size + self.sum_hit_at_one += mean_hit_at_one * batch_size + self.sum_perr += mean_perr * batch_size + + return {"hit_at_one": mean_hit_at_one, "perr": mean_perr} + + def get(self): + """Calculate the evaluation metrics for the whole epoch. + + Raises: + ValueError: If no examples were accumulated. + + Returns: + dictionary: a dictionary storing the evaluation metrics for the epoch. The + dictionary has the fields: avg_hit_at_one, avg_perr, and + aps (default nan). + """ + if self.num_examples <= 0: + raise ValueError("total_sample must be positive.") + avg_hit_at_one = self.sum_hit_at_one / self.num_examples + avg_perr = self.sum_perr / self.num_examples + + aps = self.map_calculator.peek_map_at_n() + mean_ap = sum(aps) / self.num_class + gap = self.global_ap_calculator.peek_ap_at_n() + + epoch_info_dict = { + "avg_hit_at_one": avg_hit_at_one, + "avg_perr": avg_perr, + "map": mean_ap, + "gap": gap + } + return epoch_info_dict + + def clear(self): + """Clear the evaluation metrics and reset the EvaluationMetrics object.""" + self.sum_hit_at_one = 0.0 + self.sum_perr = 0.0 + self.map_calculator.clear() + self.global_ap_calculator.clear() + self.num_examples = 0 + + @property + def name(self): + return "avg_prec_metric" + + def _convert_to_numpy(self, groundtruths, predictions): + """Converts tesnors to numpy arrays.""" + if groundtruths is not None: + labels = tf.nest.map_structure(lambda x: x.numpy(), groundtruths) + else: + labels = groundtruths + + if predictions is not None: + outputs = tf.nest.map_structure(lambda x: x.numpy(), predictions) + else: + outputs = predictions + + labels = labels * 1 + return outputs, labels diff --git a/official/vision/beta/projects/yt8m/eval_utils/mean_average_precision_calculator.py b/official/vision/beta/projects/yt8m/eval_utils/mean_average_precision_calculator.py new file mode 100644 index 0000000000000000000000000000000000000000..0d6a5d6ceaa1f2bff921c7f4f8ce3f66ff991597 --- /dev/null +++ b/official/vision/beta/projects/yt8m/eval_utils/mean_average_precision_calculator.py @@ -0,0 +1,115 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Calculate the mean average precision. + +It provides an interface for calculating mean average precision +for an entire list or the top-n ranked items. + +Example usages: +We first call the function accumulate many times to process parts of the ranked +list. After processing all the parts, we call peek_map_at_n +to calculate the mean average precision. + +``` +import random + +p = np.array([[random.random() for _ in xrange(50)] for _ in xrange(1000)]) +a = np.array([[random.choice([0, 1]) for _ in xrange(50)] + for _ in xrange(1000)]) + +# mean average precision for 50 classes. +calculator = mean_average_precision_calculator.MeanAveragePrecisionCalculator( + num_class=50) +calculator.accumulate(p, a) +aps = calculator.peek_map_at_n() +``` +""" + +from official.vision.beta.projects.yt8m.eval_utils import \ + average_precision_calculator + + +class MeanAveragePrecisionCalculator(object): + """This class is to calculate mean average precision.""" + + def __init__(self, num_class, filter_empty_classes=True, top_n=None): + """Construct a calculator to calculate the (macro) average precision. + + Args: + num_class: A positive Integer specifying the number of classes. + filter_empty_classes: whether to filter classes without any positives. + top_n: A positive Integer specifying the average precision at n, or None + to use all provided data points. + + Raises: + ValueError: An error occurred when num_class is not a positive integer; + or the top_n_array is not a list of positive integers. + """ + if not isinstance(num_class, int) or num_class <= 1: + raise ValueError("num_class must be a positive integer.") + + self._ap_calculators = [] # member of AveragePrecisionCalculator + self._num_class = num_class # total number of classes + self._filter_empty_classes = filter_empty_classes + for _ in range(num_class): + self._ap_calculators.append( + average_precision_calculator.AveragePrecisionCalculator(top_n=top_n)) + + def accumulate(self, predictions, actuals, num_positives=None): + """Accumulate the predictions and their ground truth labels. + + Args: + predictions: A list of lists storing the prediction scores. The outer + dimension corresponds to classes. + actuals: A list of lists storing the ground truth labels. The dimensions + should correspond to the predictions input. Any value larger than 0 will + be treated as positives, otherwise as negatives. + num_positives: If provided, it is a list of numbers representing the + number of true positives for each class. If not provided, the number of + true positives will be inferred from the 'actuals' array. + + Raises: + ValueError: An error occurred when the shape of predictions and actuals + does not match. + """ + if not num_positives: + num_positives = [None for i in range(self._num_class)] + + calculators = self._ap_calculators + for i in range(self._num_class): + calculators[i].accumulate(predictions[i], actuals[i], num_positives[i]) + + def clear(self): + for calculator in self._ap_calculators: + calculator.clear() + + def is_empty(self): + return ([calculator.heap_size for calculator in self._ap_calculators + ] == [0 for _ in range(self._num_class)]) + + def peek_map_at_n(self): + """Peek the non-interpolated mean average precision at n. + + Returns: + An array of non-interpolated average precision at n (default 0) for each + class. + """ + aps = [] + for i in range(self._num_class): + if (not self._filter_empty_classes or + self._ap_calculators[i].num_accumulated_positives > 0): + ap = self._ap_calculators[i].peek_ap_at_n() + aps.append(ap) + return aps diff --git a/official/vision/beta/projects/yt8m/experiments/yt8m.yaml b/official/vision/beta/projects/yt8m/experiments/yt8m.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c099f23f90b3c94b874e60713b63de6ebd1c1c3a --- /dev/null +++ b/official/vision/beta/projects/yt8m/experiments/yt8m.yaml @@ -0,0 +1,56 @@ +# yt8m config file +task: + model: + cluster_size: 8192 + hidden_size: 1024 + add_batch_norm: true + sample_random_frames: true + is_training: true + activation: "sigmoid" + pooling_method: "max" + yt8m_agg_classifier_model: "MoeModel" + train_data: + name: 'yt8m' + split: 'train' + feature_sizes: !!python/tuple + - 1024 + - 128 + feature_names: !!python/tuple + - "rgb" + - "audio" + segment_size: 1 + segment_labels: false + temporal_stride: 1 + max_frames: 300 + num_frames: 300 + num_classes: 3862 + num_devices: 1 + input_path: 'gs://youtube8m-ml/2/frame/train/train*.tfrecord' + is_training: true + random_seed: 123 + validation_data: + name: 'yt8m' + split: 'train' + feature_sizes: !!python/tuple + - 1024 + - 128 + feature_names: !!python/tuple + - "rgb" + - "audio" + segment_size: 1 + segment_labels: true + temporal_stride: 1 + max_frames: 300 + num_frames: 300 + num_classes: 3862 + num_devices: 1 + input_path: 'gs://youtube8m-ml/3/frame/validate/validate*.tfrecord' + is_training: false + random_seed: 123 + losses: + name: 'binary_crossentropy' + from_logits: false + label_smoothing: 0.0 + gradient_clip_norm: 1.0 + num_readers: 8 + top_k: 20 diff --git a/official/vision/beta/projects/yt8m/experiments/yt8m_test.yaml b/official/vision/beta/projects/yt8m/experiments/yt8m_test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9a7ef94cc73373db1c6015d4a0b665a16fba4d90 --- /dev/null +++ b/official/vision/beta/projects/yt8m/experiments/yt8m_test.yaml @@ -0,0 +1,31 @@ +# yt8m test config file +task: + model: + cluster_size: 2048 + hidden_size: 2048 + add_batch_norm: true + sample_random_frames: true + is_training: true + activation: "relu6" + pooling_method: "average" + yt8m_agg_classifier_model: "MoeModel" + train_data: + segment_labels: false + temporal_stride: 1 + num_devices: 1 + input_path: 'gs://youtube8m-ml/2/frame/train/train*.tfrecord' + num_examples: 8000 + validation_data: + segment_size: 5 + segment_labels: true + temporal_stride: 1 + num_devices: 1 + input_path: 'gs://youtube8m-ml/3/frame/validate/validate*.tfrecord' + num_examples: 2000 + losses: + name: 'binary_crossentropy' + from_logits: false + label_smoothing: 0.0 + gradient_clip_norm: 1.0 + num_readers: 8 + top_k: 20 diff --git a/official/vision/beta/projects/yt8m/modeling/__init__.py b/official/vision/beta/projects/yt8m/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/projects/yt8m/modeling/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/projects/yt8m/modeling/yt8m_agg_models.py b/official/vision/beta/projects/yt8m/modeling/yt8m_agg_models.py new file mode 100644 index 0000000000000000000000000000000000000000..faf75beb9f050acfcb6da8142ac12bcca988b13f --- /dev/null +++ b/official/vision/beta/projects/yt8m/modeling/yt8m_agg_models.py @@ -0,0 +1,101 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains model definitions.""" + +import tensorflow as tf + +layers = tf.keras.layers +regularizers = tf.keras.regularizers +# The number of mixtures (excluding the dummy 'expert') used for MoeModel. +moe_num_mixtures = 2 + + +class LogisticModel(): + """Logistic model with L2 regularization.""" + + def create_model(self, model_input, vocab_size, l2_penalty=1e-8): + """Creates a logistic model. + + Args: + model_input: 'batch' x 'num_features' matrix of input features. + vocab_size: The number of classes in the dataset. + l2_penalty: L2 weight regularization ratio. + + Returns: + A dictionary with a tensor containing the probability predictions of the + model in the 'predictions' key. The dimensions of the tensor are + batch_size x num_classes. + """ + output = layers.Dense( + vocab_size, + activation=tf.nn.sigmoid, + kernel_regularizer=regularizers.l2(l2_penalty))( + model_input) + return {"predictions": output} + + +class MoeModel(): + """A softmax over a mixture of logistic models (with L2 regularization).""" + + def create_model(self, + model_input, + vocab_size, + num_mixtures=None, + l2_penalty=1e-8): + """Creates a Mixture of (Logistic) Experts model. + + The model consists of a per-class softmax distribution over a + configurable number of logistic classifiers. One of the classifiers + in the mixture is not trained, and always predicts 0. + Args: + model_input: 'batch_size' x 'num_features' matrix of input features. + vocab_size: The number of classes in the dataset. + num_mixtures: The number of mixtures (excluding a dummy 'expert' that + always predicts the non-existence of an entity). + l2_penalty: How much to penalize the squared magnitudes of parameter + values. + + Returns: + A dictionary with a tensor containing the probability predictions + of the model in the 'predictions' key. The dimensions of the tensor + are batch_size x num_classes. + """ + num_mixtures = num_mixtures or moe_num_mixtures + + gate_activations = layers.Dense( + vocab_size * (num_mixtures + 1), + activation=None, + bias_initializer=None, + kernel_regularizer=regularizers.l2(l2_penalty))( + model_input) + expert_activations = layers.Dense( + vocab_size * num_mixtures, + activation=None, + kernel_regularizer=regularizers.l2(l2_penalty))( + model_input) + + gating_distribution = tf.nn.softmax( + tf.reshape( + gate_activations, + [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) + expert_distribution = tf.nn.sigmoid( + tf.reshape(expert_activations, + [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures + + final_probabilities_by_class_and_batch = tf.reduce_sum( + gating_distribution[:, :num_mixtures] * expert_distribution, 1) + final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, + [-1, vocab_size]) + return {"predictions": final_probabilities} diff --git a/official/vision/beta/projects/yt8m/modeling/yt8m_model.py b/official/vision/beta/projects/yt8m/modeling/yt8m_model.py new file mode 100644 index 0000000000000000000000000000000000000000..87eb4ddf0ddae5610abd53b56c8f3e7637d90d76 --- /dev/null +++ b/official/vision/beta/projects/yt8m/modeling/yt8m_model.py @@ -0,0 +1,148 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""YT8M model definition.""" + +import tensorflow as tf +from official.modeling import tf_utils +from official.vision.beta.projects.yt8m.configs import yt8m as yt8m_cfg +from official.vision.beta.projects.yt8m.modeling import yt8m_agg_models +from official.vision.beta.projects.yt8m.modeling import yt8m_model_utils as utils + +layers = tf.keras.layers + + +class YT8MModel(tf.keras.Model): + """A YT8M model class builder.""" + + def __init__(self, + input_params: yt8m_cfg.YT8MModel, + num_frames=30, + num_classes=3862, + input_specs=layers.InputSpec(shape=[None, None, 1152]), + **kwargs): + """YT8M initialization function. + + Args: + input_params: model configuration parameters + num_frames: `int` number of frames in a single input. + num_classes: `int` number of classes in dataset. + input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. + [batch_size x num_frames x num_features] + **kwargs: keyword arguments to be passed. + """ + + self._self_setattr_tracking = False + self._config_dict = { + "input_specs": input_specs, + "num_classes": num_classes, + "num_frames": num_frames, + "input_params": input_params + } + self._num_classes = num_classes + self._input_specs = input_specs + self._act_fn = tf_utils.get_activation(input_params.activation) + self._is_training = input_params.is_training + + # [batch_size x num_frames x num_features] + feature_size = input_specs.shape[-1] + # shape 'excluding' batch_size + model_input = tf.keras.Input(shape=self._input_specs.shape[1:]) + reshaped_input = tf.reshape(model_input, [-1, feature_size]) + tf.summary.histogram("input_hist", model_input) + + # configure model + if input_params.add_batch_norm: + reshaped_input = layers.BatchNormalization( + name="input_bn", scale=True, center=True, + trainable=self._is_training)( + reshaped_input) + + # activation = reshaped input * cluster weights + activation = layers.Dense( + input_params.cluster_size, + kernel_initializer=tf.random_normal_initializer( + stddev=1 / tf.sqrt(tf.cast(feature_size, tf.float32))))( + reshaped_input) + + if input_params.add_batch_norm: + activation = layers.BatchNormalization( + name="cluster_bn", + scale=True, + center=True, + trainable=self._is_training)( + activation) + + else: + cluster_biases = tf.Variable( + tf.random_normal_initializer(stddev=1 / tf.math.sqrt(feature_size))( + shape=[input_params.cluster_size]), + name="cluster_biases") + tf.summary.histogram("cluster_biases", cluster_biases) + activation += cluster_biases + + activation = self._act_fn(activation) + tf.summary.histogram("cluster_output", activation) + + activation = tf.reshape(activation, + [-1, num_frames, input_params.cluster_size]) + activation = utils.FramePooling(activation, input_params.pooling_method) + + # activation = activation * hidden1_weights + activation = layers.Dense( + input_params.hidden_size, + kernel_initializer=tf.random_normal_initializer( + stddev=1 / + tf.sqrt(tf.cast(input_params.cluster_size, tf.float32))))( + activation) + + if input_params.add_batch_norm: + activation = layers.BatchNormalization( + name="hidden1_bn", + scale=True, + center=True, + trainable=self._is_training)( + activation) + + else: + hidden1_biases = tf.Variable( + tf.random_normal_initializer(stddev=0.01)( + shape=[input_params.hidden_size]), + name="hidden1_biases") + + tf.summary.histogram("hidden1_biases", hidden1_biases) + activation += hidden1_biases + + activation = self._act_fn(activation) + tf.summary.histogram("hidden1_output", activation) + + aggregated_model = getattr(yt8m_agg_models, + input_params.yt8m_agg_classifier_model) + output = aggregated_model().create_model( + model_input=activation, vocab_size=self._num_classes) + + super().__init__( + inputs=model_input, outputs=output.get("predictions"), **kwargs) + + @property + def checkpoint_items(self): + """Returns a dictionary of items to be additionally checkpointed.""" + return dict() + + def get_config(self): + return self._config_dict + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/official/vision/beta/projects/yt8m/modeling/yt8m_model_test.py b/official/vision/beta/projects/yt8m/modeling/yt8m_model_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f7bd9d9974f70300c36066e1db39a4568da9d207 --- /dev/null +++ b/official/vision/beta/projects/yt8m/modeling/yt8m_model_test.py @@ -0,0 +1,63 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for yt8m network.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from official.vision.beta.projects.yt8m.configs import yt8m as yt8m_cfg +from official.vision.beta.projects.yt8m.modeling import yt8m_model + + +class YT8MNetworkTest(parameterized.TestCase, tf.test.TestCase): + """Class for testing yt8m network.""" + + # test_yt8m_network_creation arbitrary params + @parameterized.parameters((32, 1152)) # 1152 = 1024 + 128 + def test_yt8m_network_creation(self, num_frames, feature_dims): + """Test for creation of a YT8M Model. + + Args: + num_frames: number of frames. + feature_dims: indicates total dimension size of the features. + """ + input_specs = tf.keras.layers.InputSpec(shape=[num_frames, feature_dims]) + + num_classes = 3862 + model = yt8m_model.YT8MModel( + input_params=yt8m_cfg.YT8MTask.model, + num_frames=num_frames, + num_classes=num_classes, + input_specs=input_specs) + + # batch = 2 -> arbitrary value for test + inputs = np.random.rand(2 * num_frames, feature_dims) + logits = model(inputs) + self.assertAllEqual([2, num_classes], logits.numpy().shape) + + def test_serialize_deserialize(self): + model = yt8m_model.YT8MModel(input_params=yt8m_cfg.YT8MTask.model) + + config = model.get_config() + new_model = yt8m_model.YT8MModel.from_config(config) + + # If the serialization was successful, + # the new config should match the old. + self.assertAllEqual(model.get_config(), new_model.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/yt8m/modeling/yt8m_model_utils.py b/official/vision/beta/projects/yt8m/modeling/yt8m_model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cebebb449d4979250383013b20734cffa3215901 --- /dev/null +++ b/official/vision/beta/projects/yt8m/modeling/yt8m_model_utils.py @@ -0,0 +1,95 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains a collection of util functions for model construction.""" +import tensorflow as tf + + +def SampleRandomSequence(model_input, num_frames, num_samples): + """Samples a random sequence of frames of size num_samples. + + Args: + model_input: tensor of shape [batch_size x max_frames x feature_size] + num_frames: tensor of shape [batch_size x 1] + num_samples: a scalar indicating the number of samples + + Returns: + reshaped model_input in [batch_size x 'num_samples' x feature_size] + """ + + batch_size = tf.shape(model_input)[0] + frame_index_offset = tf.tile( + tf.expand_dims(tf.range(num_samples), 0), [batch_size, 1]) + max_start_frame_index = tf.maximum(num_frames - num_samples, 0) + start_frame_index = tf.cast( + tf.multiply( + tf.random_uniform([batch_size, 1]), + tf.cast(max_start_frame_index + 1, tf.float32)), tf.int32) + frame_index = tf.minimum(start_frame_index + frame_index_offset, + tf.cast(num_frames - 1, tf.int32)) + batch_index = tf.tile( + tf.expand_dims(tf.range(batch_size), 1), [1, num_samples]) + index = tf.stack([batch_index, frame_index], 2) + return tf.gather_nd(model_input, index) + + +def SampleRandomFrames(model_input, num_frames, num_samples): + """Samples a random set of frames of size num_samples. + + Args: + model_input: tensor of shape [batch_size x max_frames x feature_size] + num_frames: tensor of shape [batch_size x 1] + num_samples (int): a scalar indicating the number of samples + + Returns: + reshaped model_input in [batch_size x 'num_samples' x feature_size] + """ + batch_size = tf.shape(model_input)[0] + frame_index = tf.cast( + tf.multiply( + tf.random.uniform([batch_size, num_samples]), + tf.tile(tf.cast(num_frames, tf.float32), [1, num_samples])), tf.int32) + batch_index = tf.tile( + tf.expand_dims(tf.range(batch_size), 1), [1, num_samples]) + index = tf.stack([batch_index, frame_index], 2) + return tf.gather_nd(model_input, index) + + +def FramePooling(frames, method): + """Pools over the frames of a video. + + Args: + frames: tensor of shape [batch_size, num_frames, feature_size]. + method: string indicating pooling method, one of: "average", "max", + "attention", or "none". + + Returns: + tensor of shape [batch_size, feature_size] for average, max, or + attention pooling, and shape [batch_size*num_frames, feature_size] + for none pooling. + Raises: + ValueError: if method is other than "average", "max", "attention", or + "none". + """ + if method == "average": + reduced = tf.reduce_mean(frames, 1) + elif method == "max": + reduced = tf.reduce_max(frames, 1) + elif method == "none": + feature_size = frames.shape_as_list()[2] + reduced = tf.reshape(frames, [-1, feature_size]) + else: + raise ValueError("Unrecognized pooling method: %s" % method) + + return reduced diff --git a/official/vision/beta/projects/yt8m/tasks/__init__.py b/official/vision/beta/projects/yt8m/tasks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..226844ec2ff4b9581458306aea4c574f1b2f8230 --- /dev/null +++ b/official/vision/beta/projects/yt8m/tasks/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tasks package definition.""" +from official.vision.beta.projects.yt8m.tasks import yt8m_task diff --git a/official/vision/beta/projects/yt8m/tasks/yt8m_task.py b/official/vision/beta/projects/yt8m/tasks/yt8m_task.py new file mode 100644 index 0000000000000000000000000000000000000000..ab79c732613535357f207cda5583f4090f0d0d50 --- /dev/null +++ b/official/vision/beta/projects/yt8m/tasks/yt8m_task.py @@ -0,0 +1,282 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Video classification task definition.""" +from absl import logging +import tensorflow as tf + +from official.core import base_task +from official.core import input_reader +from official.core import task_factory +from official.modeling import tf_utils +from official.vision.beta.projects.yt8m.configs import yt8m as yt8m_cfg +from official.vision.beta.projects.yt8m.dataloaders import yt8m_input +from official.vision.beta.projects.yt8m.eval_utils import eval_util +from official.vision.beta.projects.yt8m.modeling import yt8m_model_utils as utils +from official.vision.beta.projects.yt8m.modeling.yt8m_model import YT8MModel + + +@task_factory.register_task_cls(yt8m_cfg.YT8MTask) +class YT8MTask(base_task.Task): + """A task for video classification.""" + + def build_model(self): + """Builds model for YT8M Task.""" + train_cfg = self.task_config.train_data + common_input_shape = [None, sum(train_cfg.feature_sizes)] + + # [batch_size x num_frames x num_features] + input_specs = tf.keras.layers.InputSpec(shape=[None] + common_input_shape) + logging.info('Build model input %r', common_input_shape) + + # Model configuration. + model_config = self.task_config.model + model = YT8MModel( + input_params=model_config, + input_specs=input_specs, + num_frames=train_cfg.num_frames, + num_classes=train_cfg.num_classes) + return model + + def build_inputs(self, params: yt8m_cfg.DataConfig, input_context=None): + """Builds input. + + Args: + params: configuration for input data + input_context: indicates information about the compute replicas and input + pipelines + + Returns: + dataset: dataset fetched from reader + """ + + decoder = yt8m_input.Decoder(input_params=params) + decoder_fn = decoder.decode + parser = yt8m_input.Parser(input_params=params) + parser_fn = parser.parse_fn(params.is_training) + postprocess = yt8m_input.PostBatchProcessor(input_params=params) + postprocess_fn = postprocess.post_fn + transform_batch = yt8m_input.TransformBatcher(input_params=params) + batch_fn = transform_batch.batch_fn + + reader = input_reader.InputReader( + params, + dataset_fn=tf.data.TFRecordDataset, + decoder_fn=decoder_fn, + parser_fn=parser_fn, + postprocess_fn=postprocess_fn, + transform_and_batch_fn=batch_fn) + + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, labels, model_outputs, aux_losses=None): + """Sigmoid Cross Entropy. + + Args: + labels: tensor containing truth labels. + model_outputs: output logits of the classifier. + aux_losses: tensor containing auxiliarly loss tensors, i.e. `losses` in + keras.Model. + + Returns: + Tensors: The total loss, model loss tensors. + """ + losses_config = self.task_config.losses + model_loss = tf.keras.losses.binary_crossentropy( + labels, + model_outputs, + from_logits=losses_config.from_logits, + label_smoothing=losses_config.label_smoothing) + + model_loss = tf_utils.safe_mean(model_loss) + total_loss = model_loss + if aux_losses: + total_loss += tf.add_n(aux_losses) + + return total_loss, model_loss + + def build_metrics(self, training=True): + """Gets streaming metrics for training/validation. + + metric: mAP/gAP + top_k: A positive integer specifying how many predictions are considered + per video. + top_n: A positive Integer specifying the average precision at n, or None + to use all provided data points. + Args: + training: bool value, true for training mode, false for eval/validation. + + Returns: + list of strings that indicate metrics to be used + """ + metrics = [] + metric_names = ['total_loss', 'model_loss'] + for name in metric_names: + metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32)) + + if not training: # Cannot run in train step. + num_classes = self.task_config.validation_data.num_classes + top_k = self.task_config.top_k + top_n = self.task_config.top_n + self.avg_prec_metric = eval_util.EvaluationMetrics( + num_classes, top_k=top_k, top_n=top_n) + + return metrics + + def train_step(self, inputs, model, optimizer, metrics=None): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. output_dict = { + "video_ids": batch_video_ids, + "video_matrix": batch_video_matrix, + "labels": batch_labels, + "num_frames": batch_frames, } + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + a dictionary of logs. + """ + features, labels = inputs['video_matrix'], inputs['labels'] + num_frames = inputs['num_frames'] + + # Normalize input features. + feature_dim = len(features.shape) - 1 + features = tf.nn.l2_normalize(features, feature_dim) + + # sample random frames / random sequence + num_frames = tf.cast(num_frames, tf.float32) + sample_frames = self.task_config.train_data.num_frames + if self.task_config.model.sample_random_frames: + features = utils.SampleRandomFrames(features, num_frames, sample_frames) + else: + features = utils.SampleRandomSequence(features, num_frames, sample_frames) + + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model(features, training=True) + # Casting output layer as float32 is necessary when mixed_precision is + # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss + loss, model_loss = self.build_losses( + model_outputs=outputs, labels=labels, aux_losses=model.losses) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance(optimizer, + tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient before apply_gradients when LossScaleOptimizer is + # used. + if isinstance(optimizer, + tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + + # Apply gradient clipping. + if self.task_config.gradient_clip_norm > 0: + grads, _ = tf.clip_by_global_norm(grads, + self.task_config.gradient_clip_norm) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: loss} + + all_losses = {'total_loss': loss, 'model_loss': model_loss} + + if metrics: + for m in metrics: + m.update_state(all_losses[m.name]) + logs.update({m.name: m.result()}) + + return logs + + def validation_step(self, inputs, model, metrics=None): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. output_dict = { + "video_ids": batch_video_ids, + "video_matrix": batch_video_matrix, + "labels": batch_labels, + "num_frames": batch_frames, } + model: the model, forward definition + metrics: a nested structure of metrics objects. + + Returns: + a dictionary of logs. + """ + features, labels = inputs['video_matrix'], inputs['labels'] + num_frames = inputs['num_frames'] + + # Normalize input features. + feature_dim = len(features.shape) - 1 + features = tf.nn.l2_normalize(features, feature_dim) + + # sample random frames (None, 5, 1152) -> (None, 30, 1152) + sample_frames = self.task_config.validation_data.num_frames + if self.task_config.model.sample_random_frames: + features = utils.SampleRandomFrames(features, num_frames, sample_frames) + else: + features = utils.SampleRandomSequence(features, num_frames, sample_frames) + + outputs = self.inference_step(features, model) + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + if self.task_config.validation_data.segment_labels: + # workaround to ignore the unrated labels. + outputs *= inputs['label_weights'] + # remove padding + outputs = outputs[~tf.reduce_all(labels == -1, axis=1)] + labels = labels[~tf.reduce_all(labels == -1, axis=1)] + loss, model_loss = self.build_losses( + model_outputs=outputs, labels=labels, aux_losses=model.losses) + + logs = {self.loss: loss} + + all_losses = {'total_loss': loss, 'model_loss': model_loss} + + logs.update({self.avg_prec_metric.name: (labels, outputs)}) + + if metrics: + for m in metrics: + m.update_state(all_losses[m.name]) + logs.update({m.name: m.result()}) + return logs + + def inference_step(self, inputs, model): + """Performs the forward step.""" + return model(inputs, training=False) + + def aggregate_logs(self, state=None, step_logs=None): + if state is None: + state = self.avg_prec_metric + self.avg_prec_metric.accumulate( + labels=step_logs[self.avg_prec_metric.name][0], + predictions=step_logs[self.avg_prec_metric.name][1]) + return state + + def reduce_aggregated_logs(self, aggregated_logs): + avg_prec_metrics = self.avg_prec_metric.get() + self.avg_prec_metric.clear() + return avg_prec_metrics diff --git a/official/vision/beta/projects/yt8m/train.py b/official/vision/beta/projects/yt8m/train.py new file mode 100644 index 0000000000000000000000000000000000000000..097ef1d19e3507c06c7b43497e7a47269e599a7b --- /dev/null +++ b/official/vision/beta/projects/yt8m/train.py @@ -0,0 +1,68 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""YT8M model training driver.""" + +from absl import app +from absl import flags +import gin + +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance +# pylint: disable=unused-import +from official.vision.beta.projects.yt8m.configs import yt8m +from official.vision.beta.projects.yt8m.tasks import yt8m_task +# pylint: enable=unused-import + +FLAGS = flags.FLAGS + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/vision/beta/serving/__init__.py b/official/vision/beta/serving/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e419af524b5f349fe04abfa820c3cb51b777d422 --- /dev/null +++ b/official/vision/beta/serving/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/beta/serving/detection.py b/official/vision/beta/serving/detection.py new file mode 100644 index 0000000000000000000000000000000000000000..7061048e4b6fb66f58532488a370c5e1a9cecf86 --- /dev/null +++ b/official/vision/beta/serving/detection.py @@ -0,0 +1,151 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Detection input and model functions for serving/inference.""" + +import tensorflow as tf + +from official.vision.beta import configs +from official.vision.beta.modeling import factory +from official.vision.beta.ops import anchor +from official.vision.beta.ops import preprocess_ops +from official.vision.beta.serving import export_base + + +MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255) +STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255) + + +class DetectionModule(export_base.ExportModule): + """Detection Module.""" + + def _build_model(self): + + if self._batch_size is None: + ValueError("batch_size can't be None for detection models") + if not self.params.task.model.detection_generator.use_batched_nms: + ValueError('Only batched_nms is supported.') + input_specs = tf.keras.layers.InputSpec(shape=[self._batch_size] + + self._input_image_size + [3]) + + if isinstance(self.params.task.model, configs.maskrcnn.MaskRCNN): + model = factory.build_maskrcnn( + input_specs=input_specs, model_config=self.params.task.model) + elif isinstance(self.params.task.model, configs.retinanet.RetinaNet): + model = factory.build_retinanet( + input_specs=input_specs, model_config=self.params.task.model) + else: + raise ValueError('Detection module not implemented for {} model.'.format( + type(self.params.task.model))) + + return model + + def _build_inputs(self, image): + """Builds detection model inputs for serving.""" + model_params = self.params.task.model + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image, + offset=MEAN_RGB, + scale=STDDEV_RGB) + + image, image_info = preprocess_ops.resize_and_crop_image( + image, + self._input_image_size, + padded_size=preprocess_ops.compute_padded_size( + self._input_image_size, 2**model_params.max_level), + aug_scale_min=1.0, + aug_scale_max=1.0) + + input_anchor = anchor.build_anchor_generator( + min_level=model_params.min_level, + max_level=model_params.max_level, + num_scales=model_params.anchor.num_scales, + aspect_ratios=model_params.anchor.aspect_ratios, + anchor_size=model_params.anchor.anchor_size) + anchor_boxes = input_anchor(image_size=(self._input_image_size[0], + self._input_image_size[1])) + + return image, anchor_boxes, image_info + + def serve(self, images: tf.Tensor): + """Cast image to float and run inference. + + Args: + images: uint8 Tensor of shape [batch_size, None, None, 3] + Returns: + Tensor holding detection output logits. + """ + model_params = self.params.task.model + with tf.device('cpu:0'): + images = tf.cast(images, dtype=tf.float32) + + # Tensor Specs for map_fn outputs (images, anchor_boxes, and image_info). + images_spec = tf.TensorSpec(shape=self._input_image_size + [3], + dtype=tf.float32) + + num_anchors = model_params.anchor.num_scales * len( + model_params.anchor.aspect_ratios) * 4 + anchor_shapes = [] + for level in range(model_params.min_level, model_params.max_level + 1): + anchor_level_spec = tf.TensorSpec( + shape=[ + self._input_image_size[0] // 2**level, + self._input_image_size[1] // 2**level, num_anchors + ], + dtype=tf.float32) + anchor_shapes.append((str(level), anchor_level_spec)) + + image_info_spec = tf.TensorSpec(shape=[4, 2], dtype=tf.float32) + + images, anchor_boxes, image_info = tf.nest.map_structure( + tf.identity, + tf.map_fn( + self._build_inputs, + elems=images, + fn_output_signature=(images_spec, dict(anchor_shapes), + image_info_spec), + parallel_iterations=32)) + + input_image_shape = image_info[:, 1, :] + + # To overcome keras.Model extra limitation to save a model with layers that + # have multiple inputs, we use `model.call` here to trigger the forward + # path. Note that, this disables some keras magics happens in `__call__`. + detections = self.model.call( + images=images, + image_shape=input_image_shape, + anchor_boxes=anchor_boxes, + training=False) + + if self.params.task.model.detection_generator.apply_nms: + final_outputs = { + 'detection_boxes': detections['detection_boxes'], + 'detection_scores': detections['detection_scores'], + 'detection_classes': detections['detection_classes'], + 'num_detections': detections['num_detections'] + } + else: + final_outputs = { + 'decoded_boxes': detections['decoded_boxes'], + 'decoded_box_scores': detections['decoded_box_scores'], + 'cls_outputs': detections['cls_outputs'], + 'box_outputs': detections['box_outputs'] + } + + if 'detection_masks' in detections.keys(): + final_outputs['detection_masks'] = detections['detection_masks'] + + final_outputs.update({'image_info': image_info}) + return final_outputs diff --git a/official/vision/beta/serving/detection_test.py b/official/vision/beta/serving/detection_test.py new file mode 100644 index 0000000000000000000000000000000000000000..26ec504cfa705108d4872180a288511625f2481a --- /dev/null +++ b/official/vision/beta/serving/detection_test.py @@ -0,0 +1,123 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Test for image detection export lib.""" + +import io +import os + +from absl.testing import parameterized +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.common import registry_imports # pylint: disable=unused-import +from official.core import exp_factory +from official.vision.beta.serving import detection + + +class DetectionExportTest(tf.test.TestCase, parameterized.TestCase): + + def _get_detection_module(self, experiment_name): + params = exp_factory.get_exp_config(experiment_name) + params.task.model.backbone.resnet.model_id = 18 + params.task.model.detection_generator.use_batched_nms = True + detection_module = detection.DetectionModule( + params, batch_size=1, input_image_size=[640, 640]) + return detection_module + + def _export_from_module(self, module, input_type, save_directory): + signatures = module.get_inference_signatures( + {input_type: 'serving_default'}) + tf.saved_model.save(module, save_directory, signatures=signatures) + + def _get_dummy_input(self, input_type, batch_size, image_size): + """Get dummy input for the given input type.""" + h, w = image_size + + if input_type == 'image_tensor': + return tf.zeros((batch_size, h, w, 3), dtype=np.uint8) + elif input_type == 'image_bytes': + image = Image.fromarray(np.zeros((h, w, 3), dtype=np.uint8)) + byte_io = io.BytesIO() + image.save(byte_io, 'PNG') + return [byte_io.getvalue() for b in range(batch_size)] + elif input_type == 'tf_example': + image_tensor = tf.zeros((h, w, 3), dtype=tf.uint8) + encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).numpy() + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[encoded_jpeg])), + })).SerializeToString() + return [example for b in range(batch_size)] + + @parameterized.parameters( + ('image_tensor', 'fasterrcnn_resnetfpn_coco', [384, 384]), + ('image_bytes', 'fasterrcnn_resnetfpn_coco', [640, 640]), + ('tf_example', 'fasterrcnn_resnetfpn_coco', [640, 640]), + ('image_tensor', 'maskrcnn_resnetfpn_coco', [640, 640]), + ('image_bytes', 'maskrcnn_resnetfpn_coco', [640, 384]), + ('tf_example', 'maskrcnn_resnetfpn_coco', [640, 640]), + ('image_tensor', 'retinanet_resnetfpn_coco', [640, 640]), + ('image_bytes', 'retinanet_resnetfpn_coco', [640, 640]), + ('tf_example', 'retinanet_resnetfpn_coco', [384, 640]), + ('image_tensor', 'retinanet_resnetfpn_coco', [384, 384]), + ('image_bytes', 'retinanet_spinenet_coco', [640, 640]), + ('tf_example', 'retinanet_spinenet_coco', [640, 384]), + ) + def test_export(self, input_type, experiment_name, image_size): + tmp_dir = self.get_temp_dir() + module = self._get_detection_module(experiment_name) + + self._export_from_module(module, input_type, tmp_dir) + + self.assertTrue(os.path.exists(os.path.join(tmp_dir, 'saved_model.pb'))) + self.assertTrue( + os.path.exists(os.path.join(tmp_dir, 'variables', 'variables.index'))) + self.assertTrue( + os.path.exists( + os.path.join(tmp_dir, 'variables', + 'variables.data-00000-of-00001'))) + + imported = tf.saved_model.load(tmp_dir) + detection_fn = imported.signatures['serving_default'] + + images = self._get_dummy_input( + input_type, batch_size=1, image_size=image_size) + + processed_images, anchor_boxes, image_info = module._build_inputs( + tf.zeros((224, 224, 3), dtype=tf.uint8)) + image_shape = image_info[1, :] + image_shape = tf.expand_dims(image_shape, 0) + processed_images = tf.expand_dims(processed_images, 0) + for l, l_boxes in anchor_boxes.items(): + anchor_boxes[l] = tf.expand_dims(l_boxes, 0) + + expected_outputs = module.model( + images=processed_images, + image_shape=image_shape, + anchor_boxes=anchor_boxes, + training=False) + outputs = detection_fn(tf.constant(images)) + + self.assertAllClose(outputs['num_detections'].numpy(), + expected_outputs['num_detections'].numpy()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/serving/export_base.py b/official/vision/beta/serving/export_base.py new file mode 100644 index 0000000000000000000000000000000000000000..08472b55670359e65bf37c581a12675022172f2f --- /dev/null +++ b/official/vision/beta/serving/export_base.py @@ -0,0 +1,179 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Base class for model export.""" + +import abc +from typing import Dict, List, Mapping, Optional, Text + +import tensorflow as tf + +from official.core import export_base +from official.modeling.hyperparams import config_definitions as cfg + + +class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta): + """Base Export Module.""" + + def __init__(self, + params: cfg.ExperimentConfig, + *, + batch_size: int, + input_image_size: List[int], + num_channels: int = 3, + model: Optional[tf.keras.Model] = None): + """Initializes a module for export. + + Args: + params: Experiment params. + batch_size: The batch size of the model input. Can be `int` or None. + input_image_size: List or Tuple of size of the input image. For 2D image, + it is [height, width]. + num_channels: The number of the image channels. + model: A tf.keras.Model instance to be exported. + """ + self.params = params + self._batch_size = batch_size + self._input_image_size = input_image_size + self._num_channels = num_channels + if model is None: + model = self._build_model() # pylint: disable=assignment-from-none + super().__init__(params=params, model=model) + + def _decode_image(self, encoded_image_bytes: str) -> tf.Tensor: + """Decodes an image bytes to an image tensor. + + Use `tf.image.decode_image` to decode an image if input is expected to be 2D + image; otherwise use `tf.io.decode_raw` to convert the raw bytes to tensor + and reshape it to desire shape. + + Args: + encoded_image_bytes: An encoded image string to be decoded. + + Returns: + A decoded image tensor. + """ + if len(self._input_image_size) == 2: + # Decode an image if 2D input is expected. + image_tensor = tf.image.decode_image( + encoded_image_bytes, channels=self._num_channels) + image_tensor.set_shape((None, None, self._num_channels)) + else: + # Convert raw bytes into a tensor and reshape it, if not 2D input. + image_tensor = tf.io.decode_raw(encoded_image_bytes, out_type=tf.uint8) + image_tensor = tf.reshape(image_tensor, + self._input_image_size + [self._num_channels]) + return image_tensor + + def _decode_tf_example( + self, tf_example_string_tensor: tf.train.Example) -> tf.Tensor: + """Decodes a TF Example to an image tensor. + + Args: + tf_example_string_tensor: A tf.train.Example of encoded image and other + information. + + Returns: + A decoded image tensor. + """ + keys_to_features = {'image/encoded': tf.io.FixedLenFeature((), tf.string)} + parsed_tensors = tf.io.parse_single_example( + serialized=tf_example_string_tensor, features=keys_to_features) + image_tensor = self._decode_image(parsed_tensors['image/encoded']) + return image_tensor + + def _build_model(self, **kwargs): + """Returns a model built from the params.""" + return None + + @tf.function + def inference_from_image_tensors( + self, inputs: tf.Tensor) -> Mapping[str, tf.Tensor]: + return self.serve(inputs) + + @tf.function + def inference_from_image_bytes(self, inputs: tf.Tensor): + with tf.device('cpu:0'): + images = tf.nest.map_structure( + tf.identity, + tf.map_fn( + self._decode_image, + elems=inputs, + fn_output_signature=tf.TensorSpec( + shape=[None] * len(self._input_image_size) + + [self._num_channels], + dtype=tf.uint8), + parallel_iterations=32)) + images = tf.stack(images) + return self.serve(images) + + @tf.function + def inference_from_tf_example(self, + inputs: tf.Tensor) -> Mapping[str, tf.Tensor]: + with tf.device('cpu:0'): + images = tf.nest.map_structure( + tf.identity, + tf.map_fn( + self._decode_tf_example, + elems=inputs, + # Height/width of the shape of input images is unspecified (None) + # at the time of decoding the example, but the shape will + # be adjusted to conform to the input layer of the model, + # by _run_inference_on_image_tensors() below. + fn_output_signature=tf.TensorSpec( + shape=[None] * len(self._input_image_size) + + [self._num_channels], + dtype=tf.uint8), + dtype=tf.uint8, + parallel_iterations=32)) + images = tf.stack(images) + return self.serve(images) + + def get_inference_signatures(self, function_keys: Dict[Text, Text]): + """Gets defined function signatures. + + Args: + function_keys: A dictionary with keys as the function to create signature + for and values as the signature keys when returns. + + Returns: + A dictionary with key as signature key and value as concrete functions + that can be used for tf.saved_model.save. + """ + signatures = {} + for key, def_name in function_keys.items(): + if key == 'image_tensor': + input_signature = tf.TensorSpec( + shape=[self._batch_size] + [None] * len(self._input_image_size) + + [self._num_channels], + dtype=tf.uint8) + signatures[ + def_name] = self.inference_from_image_tensors.get_concrete_function( + input_signature) + elif key == 'image_bytes': + input_signature = tf.TensorSpec( + shape=[self._batch_size], dtype=tf.string) + signatures[ + def_name] = self.inference_from_image_bytes.get_concrete_function( + input_signature) + elif key == 'serve_examples' or key == 'tf_example': + input_signature = tf.TensorSpec( + shape=[self._batch_size], dtype=tf.string) + signatures[ + def_name] = self.inference_from_tf_example.get_concrete_function( + input_signature) + else: + raise ValueError('Unrecognized `input_type`') + return signatures diff --git a/official/vision/beta/serving/export_saved_model.py b/official/vision/beta/serving/export_saved_model.py new file mode 100644 index 0000000000000000000000000000000000000000..51966e91dda8212c9fd016928c6a4dccbddcca0d --- /dev/null +++ b/official/vision/beta/serving/export_saved_model.py @@ -0,0 +1,103 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +r"""Vision models export binary for serving/inference. + +To export a trained checkpoint in saved_model format (shell script): + +EXPERIMENT_TYPE = XX +CHECKPOINT_PATH = XX +EXPORT_DIR_PATH = XX +export_saved_model --experiment=${EXPERIMENT_TYPE} \ + --export_dir=${EXPORT_DIR_PATH}/ \ + --checkpoint_path=${CHECKPOINT_PATH} \ + --batch_size=2 \ + --input_image_size=224,224 + +To serve (python): + +export_dir_path = XX +input_type = XX +input_images = XX +imported = tf.saved_model.load(export_dir_path) +model_fn = imported.signatures['serving_default'] +output = model_fn(input_images) +""" + +from absl import app +from absl import flags + +from official.common import registry_imports # pylint: disable=unused-import +from official.core import exp_factory +from official.modeling import hyperparams +from official.vision.beta.serving import export_saved_model_lib + +FLAGS = flags.FLAGS + + +flags.DEFINE_string( + 'experiment', None, 'experiment type, e.g. retinanet_resnetfpn_coco') +flags.DEFINE_string('export_dir', None, 'The export directory.') +flags.DEFINE_string('checkpoint_path', None, 'Checkpoint path.') +flags.DEFINE_multi_string( + 'config_file', + default=None, + help='YAML/JSON files which specifies overrides. The override order ' + 'follows the order of args. Note that each file ' + 'can be used as an override template to override the default parameters ' + 'specified in Python. If the same parameter is specified in both ' + '`--config_file` and `--params_override`, `config_file` will be used ' + 'first, followed by params_override.') +flags.DEFINE_string( + 'params_override', '', + 'The JSON/YAML file or string which specifies the parameter to be overriden' + ' on top of `config_file` template.') +flags.DEFINE_integer( + 'batch_size', None, 'The batch size.') +flags.DEFINE_string( + 'input_type', 'image_tensor', + 'One of `image_tensor`, `image_bytes`, `tf_example`.') +flags.DEFINE_string( + 'input_image_size', '224,224', + 'The comma-separated string of two integers representing the height,width ' + 'of the input to the model.') + + +def main(_): + + params = exp_factory.get_exp_config(FLAGS.experiment) + for config_file in FLAGS.config_file or []: + params = hyperparams.override_params_dict( + params, config_file, is_strict=True) + if FLAGS.params_override: + params = hyperparams.override_params_dict( + params, FLAGS.params_override, is_strict=True) + + params.validate() + params.lock() + + export_saved_model_lib.export_inference_graph( + input_type=FLAGS.input_type, + batch_size=FLAGS.batch_size, + input_image_size=[int(x) for x in FLAGS.input_image_size.split(',')], + params=params, + checkpoint_path=FLAGS.checkpoint_path, + export_dir=FLAGS.export_dir, + export_checkpoint_subdir='checkpoint', + export_saved_model_subdir='saved_model') + + +if __name__ == '__main__': + app.run(main) diff --git a/official/vision/beta/serving/export_saved_model_lib.py b/official/vision/beta/serving/export_saved_model_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..ec391846515618a46fb94fa85c450915b5c00f64 --- /dev/null +++ b/official/vision/beta/serving/export_saved_model_lib.py @@ -0,0 +1,113 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +r"""Vision models export utility function for serving/inference.""" + +import os +from typing import Optional, List + +import tensorflow as tf + +from official.core import config_definitions as cfg +from official.core import export_base +from official.core import train_utils +from official.vision.beta import configs +from official.vision.beta.serving import detection +from official.vision.beta.serving import image_classification +from official.vision.beta.serving import semantic_segmentation + + +def export_inference_graph( + input_type: str, + batch_size: Optional[int], + input_image_size: List[int], + params: cfg.ExperimentConfig, + checkpoint_path: str, + export_dir: str, + num_channels: Optional[int] = 3, + export_module: Optional[export_base.ExportModule] = None, + export_checkpoint_subdir: Optional[str] = None, + export_saved_model_subdir: Optional[str] = None): + """Exports inference graph for the model specified in the exp config. + + Saved model is stored at export_dir/saved_model, checkpoint is saved + at export_dir/checkpoint, and params is saved at export_dir/params.yaml. + + Args: + input_type: One of `image_tensor`, `image_bytes`, `tf_example`. + batch_size: 'int', or None. + input_image_size: List or Tuple of height and width. + params: Experiment params. + checkpoint_path: Trained checkpoint path or directory. + export_dir: Export directory path. + num_channels: The number of input image channels. + export_module: Optional export module to be used instead of using params + to create one. If None, the params will be used to create an export + module. + export_checkpoint_subdir: Optional subdirectory under export_dir + to store checkpoint. + export_saved_model_subdir: Optional subdirectory under export_dir + to store saved model. + """ + + if export_checkpoint_subdir: + output_checkpoint_directory = os.path.join( + export_dir, export_checkpoint_subdir) + else: + output_checkpoint_directory = export_dir + + if export_saved_model_subdir: + output_saved_model_directory = os.path.join( + export_dir, export_saved_model_subdir) + else: + output_saved_model_directory = export_dir + + # TODO(arashwan): Offers a direct path to use ExportModule with Task objects. + if not export_module: + if isinstance(params.task, + configs.image_classification.ImageClassificationTask): + export_module = image_classification.ClassificationModule( + params=params, + batch_size=batch_size, + input_image_size=input_image_size, + num_channels=num_channels) + elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance( + params.task, configs.maskrcnn.MaskRCNNTask): + export_module = detection.DetectionModule( + params=params, + batch_size=batch_size, + input_image_size=input_image_size, + num_channels=num_channels) + elif isinstance(params.task, + configs.semantic_segmentation.SemanticSegmentationTask): + export_module = semantic_segmentation.SegmentationModule( + params=params, + batch_size=batch_size, + input_image_size=input_image_size, + num_channels=num_channels) + else: + raise ValueError('Export module not implemented for {} task.'.format( + type(params.task))) + + export_base.export( + export_module, + function_keys=[input_type], + export_savedmodel_dir=output_saved_model_directory, + checkpoint_path=checkpoint_path, + timestamped=False) + + ckpt = tf.train.Checkpoint(model=export_module.model) + ckpt.save(os.path.join(output_checkpoint_directory, 'ckpt')) + train_utils.serialize_config(params, export_dir) diff --git a/official/vision/beta/serving/export_tfhub.py b/official/vision/beta/serving/export_tfhub.py new file mode 100644 index 0000000000000000000000000000000000000000..8d8af0899034065f9750ea1ab51e23dea40bb915 --- /dev/null +++ b/official/vision/beta/serving/export_tfhub.py @@ -0,0 +1,105 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""A script to export the image classification as a TF-Hub SavedModel.""" + +# Import libraries +from absl import app +from absl import flags + +import tensorflow as tf + +from official.common import registry_imports # pylint: disable=unused-import +from official.core import exp_factory +from official.modeling import hyperparams +from official.vision.beta.modeling import factory + + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + 'experiment', None, 'experiment type, e.g. resnet_imagenet') +flags.DEFINE_string( + 'checkpoint_path', None, 'Checkpoint path.') +flags.DEFINE_string( + 'export_path', None, 'The export directory.') +flags.DEFINE_multi_string( + 'config_file', + None, + 'A YAML/JSON files which specifies overrides. The override order ' + 'follows the order of args. Note that each file ' + 'can be used as an override template to override the default parameters ' + 'specified in Python. If the same parameter is specified in both ' + '`--config_file` and `--params_override`, `config_file` will be used ' + 'first, followed by params_override.') +flags.DEFINE_string( + 'params_override', '', + 'The JSON/YAML file or string which specifies the parameter to be overriden' + ' on top of `config_file` template.') +flags.DEFINE_integer( + 'batch_size', None, 'The batch size.') +flags.DEFINE_string( + 'input_image_size', + '224,224', + 'The comma-separated string of two integers representing the height,width ' + 'of the input to the model.') +flags.DEFINE_boolean( + 'skip_logits_layer', + False, + 'Whether to skip the prediction layer and only output the feature vector.') + + +def export_model_to_tfhub(params, + batch_size, + input_image_size, + skip_logits_layer, + checkpoint_path, + export_path): + """Export an image classification model to TF-Hub.""" + input_specs = tf.keras.layers.InputSpec(shape=[batch_size] + + input_image_size + [3]) + + model = factory.build_classification_model( + input_specs=input_specs, + model_config=params.task.model, + l2_regularizer=None, + skip_logits_layer=skip_logits_layer) + checkpoint = tf.train.Checkpoint(model=model) + checkpoint.restore(checkpoint_path).assert_existing_objects_matched() + model.save(export_path, include_optimizer=False, save_format='tf') + + +def main(_): + params = exp_factory.get_exp_config(FLAGS.experiment) + for config_file in FLAGS.config_file or []: + params = hyperparams.override_params_dict( + params, config_file, is_strict=True) + if FLAGS.params_override: + params = hyperparams.override_params_dict( + params, FLAGS.params_override, is_strict=True) + params.validate() + params.lock() + + export_model_to_tfhub( + params=params, + batch_size=FLAGS.batch_size, + input_image_size=[int(x) for x in FLAGS.input_image_size.split(',')], + skip_logits_layer=FLAGS.skip_logits_layer, + checkpoint_path=FLAGS.checkpoint_path, + export_path=FLAGS.export_path) + + +if __name__ == '__main__': + app.run(main) diff --git a/official/vision/beta/serving/image_classification.py b/official/vision/beta/serving/image_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..739e07ab28e4bcfb247e92c9622715bf6e849398 --- /dev/null +++ b/official/vision/beta/serving/image_classification.py @@ -0,0 +1,82 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Detection input and model functions for serving/inference.""" + +import tensorflow as tf + +from official.vision.beta.modeling import factory +from official.vision.beta.ops import preprocess_ops +from official.vision.beta.serving import export_base + + +MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255) +STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255) + + +class ClassificationModule(export_base.ExportModule): + """classification Module.""" + + def _build_model(self): + input_specs = tf.keras.layers.InputSpec( + shape=[self._batch_size] + self._input_image_size + [3]) + + return factory.build_classification_model( + input_specs=input_specs, + model_config=self.params.task.model, + l2_regularizer=None) + + def _build_inputs(self, image): + """Builds classification model inputs for serving.""" + # Center crops and resizes image. + image = preprocess_ops.center_crop_image(image) + + image = tf.image.resize( + image, self._input_image_size, method=tf.image.ResizeMethod.BILINEAR) + + image = tf.reshape( + image, [self._input_image_size[0], self._input_image_size[1], 3]) + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image, + offset=MEAN_RGB, + scale=STDDEV_RGB) + return image + + def serve(self, images): + """Cast image to float and run inference. + + Args: + images: uint8 Tensor of shape [batch_size, None, None, 3] + Returns: + Tensor holding classification output logits. + """ + with tf.device('cpu:0'): + images = tf.cast(images, dtype=tf.float32) + + images = tf.nest.map_structure( + tf.identity, + tf.map_fn( + self._build_inputs, elems=images, + fn_output_signature=tf.TensorSpec( + shape=self._input_image_size + [3], dtype=tf.float32), + parallel_iterations=32 + ) + ) + + logits = self.inference_step(images) + probs = tf.nn.softmax(logits) + + return {'logits': logits, 'probs': probs} diff --git a/official/vision/beta/serving/image_classification_test.py b/official/vision/beta/serving/image_classification_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a88646f3a2a4ad6c92dc21607f83886271aef078 --- /dev/null +++ b/official/vision/beta/serving/image_classification_test.py @@ -0,0 +1,112 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Test for image classification export lib.""" + +import io +import os + +from absl.testing import parameterized +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.common import registry_imports # pylint: disable=unused-import +from official.core import exp_factory +from official.vision.beta.serving import image_classification + + +class ImageClassificationExportTest(tf.test.TestCase, parameterized.TestCase): + + def _get_classification_module(self): + params = exp_factory.get_exp_config('resnet_imagenet') + params.task.model.backbone.resnet.model_id = 18 + classification_module = image_classification.ClassificationModule( + params, batch_size=1, input_image_size=[224, 224]) + return classification_module + + def _export_from_module(self, module, input_type, save_directory): + signatures = module.get_inference_signatures( + {input_type: 'serving_default'}) + tf.saved_model.save(module, + save_directory, + signatures=signatures) + + def _get_dummy_input(self, input_type): + """Get dummy input for the given input type.""" + + if input_type == 'image_tensor': + return tf.zeros((1, 224, 224, 3), dtype=np.uint8) + elif input_type == 'image_bytes': + image = Image.fromarray(np.zeros((224, 224, 3), dtype=np.uint8)) + byte_io = io.BytesIO() + image.save(byte_io, 'PNG') + return [byte_io.getvalue()] + elif input_type == 'tf_example': + image_tensor = tf.zeros((224, 224, 3), dtype=tf.uint8) + encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).numpy() + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[encoded_jpeg])), + })).SerializeToString() + return [example] + + @parameterized.parameters( + {'input_type': 'image_tensor'}, + {'input_type': 'image_bytes'}, + {'input_type': 'tf_example'}, + ) + def test_export(self, input_type='image_tensor'): + tmp_dir = self.get_temp_dir() + module = self._get_classification_module() + # Test that the model restores any attrs that are trackable objects + # (eg: tables, resource variables, keras models/layers, tf.hub modules). + module.model.test_trackable = tf.keras.layers.InputLayer(input_shape=(4,)) + + self._export_from_module(module, input_type, tmp_dir) + + self.assertTrue(os.path.exists(os.path.join(tmp_dir, 'saved_model.pb'))) + self.assertTrue(os.path.exists( + os.path.join(tmp_dir, 'variables', 'variables.index'))) + self.assertTrue(os.path.exists( + os.path.join(tmp_dir, 'variables', 'variables.data-00000-of-00001'))) + + imported = tf.saved_model.load(tmp_dir) + classification_fn = imported.signatures['serving_default'] + + images = self._get_dummy_input(input_type) + processed_images = tf.nest.map_structure( + tf.stop_gradient, + tf.map_fn( + module._build_inputs, + elems=tf.zeros((1, 224, 224, 3), dtype=tf.uint8), + fn_output_signature=tf.TensorSpec( + shape=[224, 224, 3], dtype=tf.float32))) + expected_logits = module.model(processed_images, training=False) + expected_prob = tf.nn.softmax(expected_logits) + out = classification_fn(tf.constant(images)) + + # The imported model should contain any trackable attrs that the original + # model had. + self.assertTrue(hasattr(imported.model, 'test_trackable')) + self.assertAllClose(out['logits'].numpy(), expected_logits.numpy()) + self.assertAllClose(out['probs'].numpy(), expected_prob.numpy()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/serving/semantic_segmentation.py b/official/vision/beta/serving/semantic_segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..0650886c84513a2837b6c5d1aa0b2cffba5573ac --- /dev/null +++ b/official/vision/beta/serving/semantic_segmentation.py @@ -0,0 +1,81 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Semantic segmentation input and model functions for serving/inference.""" + +import tensorflow as tf + +from official.vision.beta.modeling import factory +from official.vision.beta.ops import preprocess_ops +from official.vision.beta.serving import export_base + + +MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255) +STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255) + + +class SegmentationModule(export_base.ExportModule): + """Segmentation Module.""" + + def _build_model(self): + input_specs = tf.keras.layers.InputSpec( + shape=[self._batch_size] + self._input_image_size + [3]) + + return factory.build_segmentation_model( + input_specs=input_specs, + model_config=self.params.task.model, + l2_regularizer=None) + + def _build_inputs(self, image): + """Builds classification model inputs for serving.""" + + # Normalizes image with mean and std pixel values. + image = preprocess_ops.normalize_image(image, + offset=MEAN_RGB, + scale=STDDEV_RGB) + + image, _ = preprocess_ops.resize_and_crop_image( + image, + self._input_image_size, + padded_size=self._input_image_size, + aug_scale_min=1.0, + aug_scale_max=1.0) + return image + + def serve(self, images): + """Cast image to float and run inference. + + Args: + images: uint8 Tensor of shape [batch_size, None, None, 3] + Returns: + Tensor holding classification output logits. + """ + with tf.device('cpu:0'): + images = tf.cast(images, dtype=tf.float32) + + images = tf.nest.map_structure( + tf.identity, + tf.map_fn( + self._build_inputs, elems=images, + fn_output_signature=tf.TensorSpec( + shape=self._input_image_size + [3], dtype=tf.float32), + parallel_iterations=32 + ) + ) + + masks = self.inference_step(images) + masks = tf.image.resize(masks, self._input_image_size, method='bilinear') + + return dict(predicted_masks=masks) diff --git a/official/vision/beta/serving/semantic_segmentation_test.py b/official/vision/beta/serving/semantic_segmentation_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f45802f90a93fcca58781896aa47a3dbcc6169fe --- /dev/null +++ b/official/vision/beta/serving/semantic_segmentation_test.py @@ -0,0 +1,105 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Test for semantic segmentation export lib.""" + +import io +import os + +from absl.testing import parameterized +import numpy as np +from PIL import Image +import tensorflow as tf + +from official.common import registry_imports # pylint: disable=unused-import +from official.core import exp_factory +from official.vision.beta.serving import semantic_segmentation + + +class SemanticSegmentationExportTest(tf.test.TestCase, parameterized.TestCase): + + def _get_segmentation_module(self): + params = exp_factory.get_exp_config('seg_deeplabv3_pascal') + params.task.model.backbone.dilated_resnet.model_id = 50 + segmentation_module = semantic_segmentation.SegmentationModule( + params, batch_size=1, input_image_size=[112, 112]) + return segmentation_module + + def _export_from_module(self, module, input_type, save_directory): + signatures = module.get_inference_signatures( + {input_type: 'serving_default'}) + tf.saved_model.save(module, save_directory, signatures=signatures) + + def _get_dummy_input(self, input_type): + """Get dummy input for the given input type.""" + + if input_type == 'image_tensor': + return tf.zeros((1, 112, 112, 3), dtype=np.uint8) + elif input_type == 'image_bytes': + image = Image.fromarray(np.zeros((112, 112, 3), dtype=np.uint8)) + byte_io = io.BytesIO() + image.save(byte_io, 'PNG') + return [byte_io.getvalue()] + elif input_type == 'tf_example': + image_tensor = tf.zeros((112, 112, 3), dtype=tf.uint8) + encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).numpy() + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + tf.train.Feature( + bytes_list=tf.train.BytesList(value=[encoded_jpeg])), + })).SerializeToString() + return [example] + + @parameterized.parameters( + {'input_type': 'image_tensor'}, + {'input_type': 'image_bytes'}, + {'input_type': 'tf_example'}, + ) + def test_export(self, input_type='image_tensor'): + tmp_dir = self.get_temp_dir() + module = self._get_segmentation_module() + + self._export_from_module(module, input_type, tmp_dir) + + self.assertTrue(os.path.exists(os.path.join(tmp_dir, 'saved_model.pb'))) + self.assertTrue( + os.path.exists(os.path.join(tmp_dir, 'variables', 'variables.index'))) + self.assertTrue( + os.path.exists( + os.path.join(tmp_dir, 'variables', + 'variables.data-00000-of-00001'))) + + imported = tf.saved_model.load(tmp_dir) + segmentation_fn = imported.signatures['serving_default'] + + images = self._get_dummy_input(input_type) + processed_images = tf.nest.map_structure( + tf.stop_gradient, + tf.map_fn( + module._build_inputs, + elems=tf.zeros((1, 112, 112, 3), dtype=tf.uint8), + fn_output_signature=tf.TensorSpec( + shape=[112, 112, 3], dtype=tf.float32))) + expected_output = tf.image.resize( + module.model(processed_images, training=False), [112, 112], + method='bilinear') + out = segmentation_fn(tf.constant(images)) + self.assertAllClose(out['predicted_masks'].numpy(), expected_output.numpy()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/tasks/__init__.py b/official/vision/beta/tasks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8410d0d5b44fad9fa2627a24773ebe02c5df19cb --- /dev/null +++ b/official/vision/beta/tasks/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tasks package definition.""" + +from official.vision.beta.tasks import image_classification +from official.vision.beta.tasks import maskrcnn +from official.vision.beta.tasks import retinanet +from official.vision.beta.tasks import semantic_segmentation +from official.vision.beta.tasks import video_classification diff --git a/official/vision/beta/tasks/image_classification.py b/official/vision/beta/tasks/image_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..0db37d711d3a4cb4d6f31a9aeb3bf699dff22961 --- /dev/null +++ b/official/vision/beta/tasks/image_classification.py @@ -0,0 +1,254 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Image classification task definition.""" +from typing import Any, Optional, List, Tuple +from absl import logging +import tensorflow as tf + +from official.common import dataset_fn +from official.core import base_task +from official.core import task_factory +from official.modeling import tf_utils +from official.vision.beta.configs import image_classification as exp_cfg +from official.vision.beta.dataloaders import classification_input +from official.vision.beta.dataloaders import input_reader_factory +from official.vision.beta.dataloaders import tfds_classification_decoders +from official.vision.beta.modeling import factory + + +@task_factory.register_task_cls(exp_cfg.ImageClassificationTask) +class ImageClassificationTask(base_task.Task): + """A task for image classification.""" + + def build_model(self): + """Builds classification model.""" + input_specs = tf.keras.layers.InputSpec( + shape=[None] + self.task_config.model.input_size) + + l2_weight_decay = self.task_config.losses.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + model = factory.build_classification_model( + input_specs=input_specs, + model_config=self.task_config.model, + l2_regularizer=l2_regularizer) + return model + + def initialize(self, model: tf.keras.Model): + """Loads pretrained checkpoint.""" + if not self.task_config.init_checkpoint: + return + + ckpt_dir_or_file = self.task_config.init_checkpoint + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + + # Restoring checkpoint. + if self.task_config.init_checkpoint_modules == 'all': + ckpt = tf.train.Checkpoint(**model.checkpoint_items) + status = ckpt.restore(ckpt_dir_or_file) + status.assert_consumed() + elif self.task_config.init_checkpoint_modules == 'backbone': + ckpt = tf.train.Checkpoint(backbone=model.backbone) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + else: + raise ValueError( + "Only 'all' or 'backbone' can be used to initialize the model.") + + logging.info('Finished loading pretrained checkpoint from %s', + ckpt_dir_or_file) + + def build_inputs(self, + params: exp_cfg.DataConfig, + input_context: Optional[tf.distribute.InputContext] = None): + """Builds classification input.""" + + num_classes = self.task_config.model.num_classes + input_size = self.task_config.model.input_size + image_field_key = self.task_config.train_data.image_field_key + label_field_key = self.task_config.train_data.label_field_key + + if params.tfds_name: + if params.tfds_name in tfds_classification_decoders.TFDS_ID_TO_DECODER_MAP: + decoder = tfds_classification_decoders.TFDS_ID_TO_DECODER_MAP[ + params.tfds_name]() + else: + raise ValueError('TFDS {} is not supported'.format(params.tfds_name)) + else: + decoder = classification_input.Decoder( + image_field_key=image_field_key, label_field_key=label_field_key) + + parser = classification_input.Parser( + output_size=input_size[:2], + num_classes=num_classes, + image_field_key=image_field_key, + label_field_key=label_field_key, + aug_rand_hflip=params.aug_rand_hflip, + aug_type=params.aug_type, + dtype=params.dtype) + + reader = input_reader_factory.input_reader_generator( + params, + dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn(params.is_training)) + + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, + labels: tf.Tensor, + model_outputs: tf.Tensor, + aux_losses: Optional[Any] = None): + """Builds sparse categorical cross entropy loss. + + Args: + labels: Input groundtruth labels. + model_outputs: Output logits of the classifier. + aux_losses: The auxiliarly loss tensors, i.e. `losses` in tf.keras.Model. + + Returns: + The total loss tensor. + """ + losses_config = self.task_config.losses + if losses_config.one_hot: + total_loss = tf.keras.losses.categorical_crossentropy( + labels, + model_outputs, + from_logits=True, + label_smoothing=losses_config.label_smoothing) + else: + total_loss = tf.keras.losses.sparse_categorical_crossentropy( + labels, model_outputs, from_logits=True) + + total_loss = tf_utils.safe_mean(total_loss) + if aux_losses: + total_loss += tf.add_n(aux_losses) + + return total_loss + + def build_metrics(self, training: bool = True): + """Gets streaming metrics for training/validation.""" + k = self.task_config.evaluation.top_k + if self.task_config.losses.one_hot: + metrics = [ + tf.keras.metrics.CategoricalAccuracy(name='accuracy'), + tf.keras.metrics.TopKCategoricalAccuracy( + k=k, name='top_{}_accuracy'.format(k))] + else: + metrics = [ + tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), + tf.keras.metrics.SparseTopKCategoricalAccuracy( + k=k, name='top_{}_accuracy'.format(k))] + return metrics + + def train_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, + metrics: Optional[List[Any]] = None): + """Does forward and backward. + + Args: + inputs: A tuple of of input tensors of (features, labels). + model: A tf.keras.Model instance. + optimizer: The optimizer for this training step. + metrics: A nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + if self.task_config.losses.one_hot: + labels = tf.one_hot(labels, self.task_config.model.num_classes) + + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model(features, training=True) + # Casting output layer as float32 is necessary when mixed_precision is + # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. + outputs = tf.nest.map_structure( + lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + loss = self.build_losses( + model_outputs=outputs, labels=labels, aux_losses=model.losses) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance( + optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient before apply_gradients when LossScaleOptimizer is + # used. + if isinstance( + optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: loss} + if metrics: + self.process_metrics(metrics, labels, outputs) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + return logs + + def validation_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + metrics: Optional[List[Any]] = None): + """Runs validatation step. + + Args: + inputs: A tuple of of input tensors of (features, labels). + model: A tf.keras.Model instance. + metrics: A nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + if self.task_config.losses.one_hot: + labels = tf.one_hot(labels, self.task_config.model.num_classes) + + outputs = self.inference_step(features, model) + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + loss = self.build_losses(model_outputs=outputs, labels=labels, + aux_losses=model.losses) + + logs = {self.loss: loss} + if metrics: + self.process_metrics(metrics, labels, outputs) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + return logs + + def inference_step(self, inputs: tf.Tensor, model: tf.keras.Model): + """Performs the forward step.""" + return model(inputs, training=False) diff --git a/official/vision/beta/tasks/maskrcnn.py b/official/vision/beta/tasks/maskrcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..4e290dac652b321826c0337cd5508882dcaec53f --- /dev/null +++ b/official/vision/beta/tasks/maskrcnn.py @@ -0,0 +1,375 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RetinaNet task definition.""" +from typing import Any, Optional, List, Tuple, Mapping + +from absl import logging +import tensorflow as tf +from official.common import dataset_fn +from official.core import base_task +from official.core import task_factory +from official.vision.beta.configs import maskrcnn as exp_cfg +from official.vision.beta.dataloaders import input_reader_factory +from official.vision.beta.dataloaders import maskrcnn_input +from official.vision.beta.dataloaders import tf_example_decoder +from official.vision.beta.dataloaders import tf_example_label_map_decoder +from official.vision.beta.evaluation import coco_evaluator +from official.vision.beta.losses import maskrcnn_losses +from official.vision.beta.modeling import factory + + +def zero_out_disallowed_class_ids(batch_class_ids: tf.Tensor, + allowed_class_ids: List[int]): + """Zero out IDs of classes not in allowed_class_ids. + + Args: + batch_class_ids: A [batch_size, num_instances] int tensor of input + class IDs. + allowed_class_ids: A python list of class IDs which we want to allow. + + Returns: + filtered_class_ids: A [batch_size, num_instances] int tensor with any + class ID not in allowed_class_ids set to 0. + """ + + allowed_class_ids = tf.constant(allowed_class_ids, + dtype=batch_class_ids.dtype) + + match_ids = (batch_class_ids[:, :, tf.newaxis] == + allowed_class_ids[tf.newaxis, tf.newaxis, :]) + + match_ids = tf.reduce_any(match_ids, axis=2) + return tf.where(match_ids, batch_class_ids, tf.zeros_like(batch_class_ids)) + + +@task_factory.register_task_cls(exp_cfg.MaskRCNNTask) +class MaskRCNNTask(base_task.Task): + """A single-replica view of training procedure. + + Mask R-CNN task provides artifacts for training/evalution procedures, + including loading/iterating over Datasets, initializing the model, calculating + the loss, post-processing, and customized metrics with reduction. + """ + + def build_model(self): + """Build Mask R-CNN model.""" + + input_specs = tf.keras.layers.InputSpec( + shape=[None] + self.task_config.model.input_size) + + l2_weight_decay = self.task_config.losses.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + model = factory.build_maskrcnn( + input_specs=input_specs, + model_config=self.task_config.model, + l2_regularizer=l2_regularizer) + return model + + def initialize(self, model: tf.keras.Model): + """Loading pretrained checkpoint.""" + if not self.task_config.init_checkpoint: + return + + ckpt_dir_or_file = self.task_config.init_checkpoint + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + + # Restoring checkpoint. + if self.task_config.init_checkpoint_modules == 'all': + ckpt = tf.train.Checkpoint(**model.checkpoint_items) + status = ckpt.restore(ckpt_dir_or_file) + status.assert_consumed() + elif self.task_config.init_checkpoint_modules == 'backbone': + ckpt = tf.train.Checkpoint(backbone=model.backbone) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + else: + raise ValueError( + "Only 'all' or 'backbone' can be used to initialize the model.") + + logging.info('Finished loading pretrained checkpoint from %s', + ckpt_dir_or_file) + + def build_inputs(self, + params: exp_cfg.DataConfig, + input_context: Optional[tf.distribute.InputContext] = None): + """Build input dataset.""" + decoder_cfg = params.decoder.get() + if params.decoder.type == 'simple_decoder': + decoder = tf_example_decoder.TfExampleDecoder( + include_mask=self._task_config.model.include_mask, + regenerate_source_id=decoder_cfg.regenerate_source_id, + mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) + elif params.decoder.type == 'label_map_decoder': + decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( + label_map=decoder_cfg.label_map, + include_mask=self._task_config.model.include_mask, + regenerate_source_id=decoder_cfg.regenerate_source_id, + mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) + else: + raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type)) + + parser = maskrcnn_input.Parser( + output_size=self.task_config.model.input_size[:2], + min_level=self.task_config.model.min_level, + max_level=self.task_config.model.max_level, + num_scales=self.task_config.model.anchor.num_scales, + aspect_ratios=self.task_config.model.anchor.aspect_ratios, + anchor_size=self.task_config.model.anchor.anchor_size, + dtype=params.dtype, + rpn_match_threshold=params.parser.rpn_match_threshold, + rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold, + rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im, + rpn_fg_fraction=params.parser.rpn_fg_fraction, + aug_rand_hflip=params.parser.aug_rand_hflip, + aug_scale_min=params.parser.aug_scale_min, + aug_scale_max=params.parser.aug_scale_max, + skip_crowd_during_training=params.parser.skip_crowd_during_training, + max_num_instances=params.parser.max_num_instances, + include_mask=self._task_config.model.include_mask, + mask_crop_size=params.parser.mask_crop_size) + + reader = input_reader_factory.input_reader_generator( + params, + dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn(params.is_training)) + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, + outputs: Mapping[str, Any], + labels: Mapping[str, Any], + aux_losses: Optional[Any] = None): + """Build Mask R-CNN losses.""" + params = self.task_config + cascade_ious = params.model.roi_sampler.cascade_iou_thresholds + + rpn_score_loss_fn = maskrcnn_losses.RpnScoreLoss( + tf.shape(outputs['box_outputs'])[1]) + rpn_box_loss_fn = maskrcnn_losses.RpnBoxLoss( + params.losses.rpn_huber_loss_delta) + rpn_score_loss = tf.reduce_mean( + rpn_score_loss_fn( + outputs['rpn_scores'], labels['rpn_score_targets'])) + rpn_box_loss = tf.reduce_mean( + rpn_box_loss_fn( + outputs['rpn_boxes'], labels['rpn_box_targets'])) + + frcnn_cls_loss_fn = maskrcnn_losses.FastrcnnClassLoss() + frcnn_box_loss_fn = maskrcnn_losses.FastrcnnBoxLoss( + params.losses.frcnn_huber_loss_delta, + params.model.detection_head.class_agnostic_bbox_pred) + + # Final cls/box losses are computed as an average of all detection heads. + frcnn_cls_loss = 0.0 + frcnn_box_loss = 0.0 + num_det_heads = 1 if cascade_ious is None else 1 + len(cascade_ious) + for cas_num in range(num_det_heads): + frcnn_cls_loss_i = tf.reduce_mean( + frcnn_cls_loss_fn( + outputs['class_outputs_{}' + .format(cas_num) if cas_num else 'class_outputs'], + outputs['class_targets_{}' + .format(cas_num) if cas_num else 'class_targets'])) + frcnn_box_loss_i = tf.reduce_mean( + frcnn_box_loss_fn( + outputs['box_outputs_{}'.format(cas_num + ) if cas_num else 'box_outputs'], + outputs['class_targets_{}' + .format(cas_num) if cas_num else 'class_targets'], + outputs['box_targets_{}'.format(cas_num + ) if cas_num else 'box_targets'])) + frcnn_cls_loss += frcnn_cls_loss_i + frcnn_box_loss += frcnn_box_loss_i + frcnn_cls_loss /= num_det_heads + frcnn_box_loss /= num_det_heads + + if params.model.include_mask: + mask_loss_fn = maskrcnn_losses.MaskrcnnLoss() + mask_class_targets = outputs['mask_class_targets'] + if self._task_config.allowed_mask_class_ids is not None: + # Classes with ID=0 are ignored by mask_loss_fn in loss computation. + mask_class_targets = zero_out_disallowed_class_ids( + mask_class_targets, self._task_config.allowed_mask_class_ids) + + mask_loss = tf.reduce_mean( + mask_loss_fn( + outputs['mask_outputs'], + outputs['mask_targets'], + mask_class_targets)) + else: + mask_loss = 0.0 + + model_loss = ( + params.losses.rpn_score_weight * rpn_score_loss + + params.losses.rpn_box_weight * rpn_box_loss + + params.losses.frcnn_class_weight * frcnn_cls_loss + + params.losses.frcnn_box_weight * frcnn_box_loss + + params.losses.mask_weight * mask_loss) + + total_loss = model_loss + if aux_losses: + reg_loss = tf.reduce_sum(aux_losses) + total_loss = model_loss + reg_loss + + losses = { + 'total_loss': total_loss, + 'rpn_score_loss': rpn_score_loss, + 'rpn_box_loss': rpn_box_loss, + 'frcnn_cls_loss': frcnn_cls_loss, + 'frcnn_box_loss': frcnn_box_loss, + 'mask_loss': mask_loss, + 'model_loss': model_loss, + } + return losses + + def build_metrics(self, training: bool = True): + """Build detection metrics.""" + metrics = [] + if training: + metric_names = [ + 'total_loss', + 'rpn_score_loss', + 'rpn_box_loss', + 'frcnn_cls_loss', + 'frcnn_box_loss', + 'mask_loss', + 'model_loss' + ] + for name in metric_names: + metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32)) + + else: + self.coco_metric = coco_evaluator.COCOEvaluator( + annotation_file=self._task_config.annotation_file, + include_mask=self._task_config.model.include_mask, + per_category_metrics=self._task_config.per_category_metrics) + + return metrics + + def train_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, + metrics: Optional[List[Any]] = None): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + images, labels = inputs + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model( + images, + image_shape=labels['image_info'][:, 1, :], + anchor_boxes=labels['anchor_boxes'], + gt_boxes=labels['gt_boxes'], + gt_classes=labels['gt_classes'], + gt_masks=(labels['gt_masks'] if self.task_config.model.include_mask + else None), + training=True) + outputs = tf.nest.map_structure( + lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + losses = self.build_losses( + outputs=outputs, labels=labels, aux_losses=model.losses) + scaled_loss = losses['total_loss'] / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient when LossScaleOptimizer is used. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: losses['total_loss']} + + if metrics: + for m in metrics: + m.update_state(losses[m.name]) + + return logs + + def validation_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + metrics: Optional[List[Any]] = None): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. + model: the keras.Model. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + images, labels = inputs + + outputs = model( + images, + anchor_boxes=labels['anchor_boxes'], + image_shape=labels['image_info'][:, 1, :], + training=False) + + logs = {self.loss: 0} + coco_model_outputs = { + 'detection_boxes': outputs['detection_boxes'], + 'detection_scores': outputs['detection_scores'], + 'detection_classes': outputs['detection_classes'], + 'num_detections': outputs['num_detections'], + 'source_id': labels['groundtruths']['source_id'], + 'image_info': labels['image_info'] + } + if self.task_config.model.include_mask: + coco_model_outputs.update({ + 'detection_masks': outputs['detection_masks'], + }) + logs.update({ + self.coco_metric.name: (labels['groundtruths'], coco_model_outputs) + }) + return logs + + def aggregate_logs(self, state=None, step_outputs=None): + if state is None: + self.coco_metric.reset_states() + state = self.coco_metric + self.coco_metric.update_state( + step_outputs[self.coco_metric.name][0], + step_outputs[self.coco_metric.name][1]) + return state + + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): + return self.coco_metric.result() diff --git a/official/vision/beta/tasks/retinanet.py b/official/vision/beta/tasks/retinanet.py new file mode 100644 index 0000000000000000000000000000000000000000..a1dca4205c5b83ac8b1d9cde2a7299ea55b6e14f --- /dev/null +++ b/official/vision/beta/tasks/retinanet.py @@ -0,0 +1,308 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RetinaNet task definition.""" +from typing import Any, Optional, List, Tuple, Mapping + +from absl import logging +import tensorflow as tf +from official.common import dataset_fn +from official.core import base_task +from official.core import task_factory +from official.vision import keras_cv +from official.vision.beta.configs import retinanet as exp_cfg +from official.vision.beta.dataloaders import input_reader_factory +from official.vision.beta.dataloaders import retinanet_input +from official.vision.beta.dataloaders import tf_example_decoder +from official.vision.beta.dataloaders import tfds_detection_decoders +from official.vision.beta.dataloaders import tf_example_label_map_decoder +from official.vision.beta.evaluation import coco_evaluator +from official.vision.beta.modeling import factory + + +@task_factory.register_task_cls(exp_cfg.RetinaNetTask) +class RetinaNetTask(base_task.Task): + """A single-replica view of training procedure. + + RetinaNet task provides artifacts for training/evalution procedures, including + loading/iterating over Datasets, initializing the model, calculating the loss, + post-processing, and customized metrics with reduction. + """ + + def build_model(self): + """Build RetinaNet model.""" + + input_specs = tf.keras.layers.InputSpec( + shape=[None] + self.task_config.model.input_size) + + l2_weight_decay = self.task_config.losses.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + model = factory.build_retinanet( + input_specs=input_specs, + model_config=self.task_config.model, + l2_regularizer=l2_regularizer) + return model + + def initialize(self, model: tf.keras.Model): + """Loading pretrained checkpoint.""" + if not self.task_config.init_checkpoint: + return + + ckpt_dir_or_file = self.task_config.init_checkpoint + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + + # Restoring checkpoint. + if self.task_config.init_checkpoint_modules == 'all': + ckpt = tf.train.Checkpoint(**model.checkpoint_items) + status = ckpt.restore(ckpt_dir_or_file) + status.assert_consumed() + elif self.task_config.init_checkpoint_modules == 'backbone': + ckpt = tf.train.Checkpoint(backbone=model.backbone) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + else: + raise ValueError( + "Only 'all' or 'backbone' can be used to initialize the model.") + + logging.info('Finished loading pretrained checkpoint from %s', + ckpt_dir_or_file) + + def build_inputs(self, + params: exp_cfg.DataConfig, + input_context: Optional[tf.distribute.InputContext] = None): + """Build input dataset.""" + + if params.tfds_name: + if params.tfds_name in tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP: + decoder = tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP[ + params.tfds_name]() + else: + raise ValueError('TFDS {} is not supported'.format(params.tfds_name)) + else: + decoder_cfg = params.decoder.get() + if params.decoder.type == 'simple_decoder': + decoder = tf_example_decoder.TfExampleDecoder( + regenerate_source_id=decoder_cfg.regenerate_source_id) + elif params.decoder.type == 'label_map_decoder': + decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( + label_map=decoder_cfg.label_map, + regenerate_source_id=decoder_cfg.regenerate_source_id) + else: + raise ValueError('Unknown decoder type: {}!'.format( + params.decoder.type)) + + parser = retinanet_input.Parser( + output_size=self.task_config.model.input_size[:2], + min_level=self.task_config.model.min_level, + max_level=self.task_config.model.max_level, + num_scales=self.task_config.model.anchor.num_scales, + aspect_ratios=self.task_config.model.anchor.aspect_ratios, + anchor_size=self.task_config.model.anchor.anchor_size, + dtype=params.dtype, + match_threshold=params.parser.match_threshold, + unmatched_threshold=params.parser.unmatched_threshold, + aug_rand_hflip=params.parser.aug_rand_hflip, + aug_scale_min=params.parser.aug_scale_min, + aug_scale_max=params.parser.aug_scale_max, + skip_crowd_during_training=params.parser.skip_crowd_during_training, + max_num_instances=params.parser.max_num_instances) + + reader = input_reader_factory.input_reader_generator( + params, + dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn(params.is_training)) + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, + outputs: Mapping[str, Any], + labels: Mapping[str, Any], + aux_losses: Optional[Any] = None): + """Build RetinaNet losses.""" + params = self.task_config + cls_loss_fn = keras_cv.losses.FocalLoss( + alpha=params.losses.focal_loss_alpha, + gamma=params.losses.focal_loss_gamma, + reduction=tf.keras.losses.Reduction.SUM) + box_loss_fn = tf.keras.losses.Huber( + params.losses.huber_loss_delta, reduction=tf.keras.losses.Reduction.SUM) + + # Sums all positives in a batch for normalization and avoids zero + # num_positives_sum, which would lead to inf loss during training + cls_sample_weight = labels['cls_weights'] + box_sample_weight = labels['box_weights'] + num_positives = tf.reduce_sum(box_sample_weight) + 1.0 + cls_sample_weight = cls_sample_weight / num_positives + box_sample_weight = box_sample_weight / num_positives + y_true_cls = keras_cv.losses.multi_level_flatten( + labels['cls_targets'], last_dim=None) + y_true_cls = tf.one_hot(y_true_cls, params.model.num_classes) + y_pred_cls = keras_cv.losses.multi_level_flatten( + outputs['cls_outputs'], last_dim=params.model.num_classes) + y_true_box = keras_cv.losses.multi_level_flatten( + labels['box_targets'], last_dim=4) + y_pred_box = keras_cv.losses.multi_level_flatten( + outputs['box_outputs'], last_dim=4) + + cls_loss = cls_loss_fn( + y_true=y_true_cls, y_pred=y_pred_cls, sample_weight=cls_sample_weight) + box_loss = box_loss_fn( + y_true=y_true_box, y_pred=y_pred_box, sample_weight=box_sample_weight) + + model_loss = cls_loss + params.losses.box_loss_weight * box_loss + + total_loss = model_loss + if aux_losses: + reg_loss = tf.reduce_sum(aux_losses) + total_loss = model_loss + reg_loss + + return total_loss, cls_loss, box_loss, model_loss + + def build_metrics(self, training: bool = True): + """Build detection metrics.""" + metrics = [] + metric_names = ['total_loss', 'cls_loss', 'box_loss', 'model_loss'] + for name in metric_names: + metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32)) + + if not training: + if self.task_config.validation_data.tfds_name and self.task_config.annotation_file: + raise ValueError( + "Can't evaluate using annotation file when TFDS is used.") + self.coco_metric = coco_evaluator.COCOEvaluator( + annotation_file=self.task_config.annotation_file, + include_mask=False, + per_category_metrics=self.task_config.per_category_metrics) + + return metrics + + def train_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, + metrics: Optional[List[Any]] = None): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model(features, training=True) + outputs = tf.nest.map_structure( + lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + loss, cls_loss, box_loss, model_loss = self.build_losses( + outputs=outputs, labels=labels, aux_losses=model.losses) + scaled_loss = loss / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient when LossScaleOptimizer is used. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: loss} + + all_losses = { + 'total_loss': loss, + 'cls_loss': cls_loss, + 'box_loss': box_loss, + 'model_loss': model_loss, + } + if metrics: + for m in metrics: + m.update_state(all_losses[m.name]) + logs.update({m.name: m.result()}) + + return logs + + def validation_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + metrics: Optional[List[Any]] = None): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. + model: the keras.Model. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + + outputs = model(features, anchor_boxes=labels['anchor_boxes'], + image_shape=labels['image_info'][:, 1, :], + training=False) + loss, cls_loss, box_loss, model_loss = self.build_losses( + outputs=outputs, labels=labels, aux_losses=model.losses) + logs = {self.loss: loss} + + all_losses = { + 'total_loss': loss, + 'cls_loss': cls_loss, + 'box_loss': box_loss, + 'model_loss': model_loss, + } + + coco_model_outputs = { + 'detection_boxes': outputs['detection_boxes'], + 'detection_scores': outputs['detection_scores'], + 'detection_classes': outputs['detection_classes'], + 'num_detections': outputs['num_detections'], + 'source_id': labels['groundtruths']['source_id'], + 'image_info': labels['image_info'] + } + logs.update({self.coco_metric.name: (labels['groundtruths'], + coco_model_outputs)}) + if metrics: + for m in metrics: + m.update_state(all_losses[m.name]) + logs.update({m.name: m.result()}) + return logs + + def aggregate_logs(self, state=None, step_outputs=None): + if state is None: + self.coco_metric.reset_states() + state = self.coco_metric + self.coco_metric.update_state(step_outputs[self.coco_metric.name][0], + step_outputs[self.coco_metric.name][1]) + return state + + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): + return self.coco_metric.result() diff --git a/official/vision/beta/tasks/semantic_segmentation.py b/official/vision/beta/tasks/semantic_segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..dd01f78a6db40351a816aaae03b2268b3803cc56 --- /dev/null +++ b/official/vision/beta/tasks/semantic_segmentation.py @@ -0,0 +1,287 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Image segmentation task definition.""" +from typing import Any, Optional, List, Tuple, Mapping, Union + +from absl import logging +import tensorflow as tf +from official.common import dataset_fn +from official.core import base_task +from official.core import task_factory +from official.vision.beta.configs import semantic_segmentation as exp_cfg +from official.vision.beta.dataloaders import input_reader_factory +from official.vision.beta.dataloaders import segmentation_input +from official.vision.beta.dataloaders import tfds_segmentation_decoders +from official.vision.beta.evaluation import segmentation_metrics +from official.vision.beta.losses import segmentation_losses +from official.vision.beta.modeling import factory + + +@task_factory.register_task_cls(exp_cfg.SemanticSegmentationTask) +class SemanticSegmentationTask(base_task.Task): + """A task for semantic segmentation.""" + + def build_model(self): + """Builds segmentation model.""" + input_specs = tf.keras.layers.InputSpec( + shape=[None] + self.task_config.model.input_size) + + l2_weight_decay = self.task_config.losses.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + model = factory.build_segmentation_model( + input_specs=input_specs, + model_config=self.task_config.model, + l2_regularizer=l2_regularizer) + return model + + def initialize(self, model: tf.keras.Model): + """Loads pretrained checkpoint.""" + if not self.task_config.init_checkpoint: + return + + ckpt_dir_or_file = self.task_config.init_checkpoint + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + + # Restoring checkpoint. + if 'all' in self.task_config.init_checkpoint_modules: + ckpt = tf.train.Checkpoint(**model.checkpoint_items) + status = ckpt.restore(ckpt_dir_or_file) + status.assert_consumed() + else: + ckpt_items = {} + if 'backbone' in self.task_config.init_checkpoint_modules: + ckpt_items.update(backbone=model.backbone) + if 'decoder' in self.task_config.init_checkpoint_modules: + ckpt_items.update(decoder=model.decoder) + + ckpt = tf.train.Checkpoint(**ckpt_items) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + + logging.info('Finished loading pretrained checkpoint from %s', + ckpt_dir_or_file) + + def build_inputs(self, + params: exp_cfg.DataConfig, + input_context: Optional[tf.distribute.InputContext] = None): + """Builds classification input.""" + + ignore_label = self.task_config.losses.ignore_label + + if params.tfds_name: + if params.tfds_name in tfds_segmentation_decoders.TFDS_ID_TO_DECODER_MAP: + decoder = tfds_segmentation_decoders.TFDS_ID_TO_DECODER_MAP[ + params.tfds_name]() + else: + raise ValueError('TFDS {} is not supported'.format(params.tfds_name)) + else: + decoder = segmentation_input.Decoder() + + parser = segmentation_input.Parser( + output_size=params.output_size, + crop_size=params.crop_size, + ignore_label=ignore_label, + resize_eval_groundtruth=params.resize_eval_groundtruth, + groundtruth_padded_size=params.groundtruth_padded_size, + aug_scale_min=params.aug_scale_min, + aug_scale_max=params.aug_scale_max, + aug_rand_hflip=params.aug_rand_hflip, + dtype=params.dtype) + + reader = input_reader_factory.input_reader_generator( + params, + dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), + decoder_fn=decoder.decode, + parser_fn=parser.parse_fn(params.is_training)) + + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, + labels: Mapping[str, tf.Tensor], + model_outputs: Union[Mapping[str, tf.Tensor], tf.Tensor], + aux_losses: Optional[Any] = None): + """Segmentation loss. + + Args: + labels: labels. + model_outputs: Output logits of the classifier. + aux_losses: auxiliarly loss tensors, i.e. `losses` in keras.Model. + + Returns: + The total loss tensor. + """ + loss_params = self._task_config.losses + segmentation_loss_fn = segmentation_losses.SegmentationLoss( + loss_params.label_smoothing, + loss_params.class_weights, + loss_params.ignore_label, + use_groundtruth_dimension=loss_params.use_groundtruth_dimension, + top_k_percent_pixels=loss_params.top_k_percent_pixels) + + total_loss = segmentation_loss_fn(model_outputs, labels['masks']) + + if aux_losses: + total_loss += tf.add_n(aux_losses) + + return total_loss + + def build_metrics(self, training: bool = True): + """Gets streaming metrics for training/validation.""" + metrics = [] + if training and self.task_config.evaluation.report_train_mean_iou: + metrics.append(segmentation_metrics.MeanIoU( + name='mean_iou', + num_classes=self.task_config.model.num_classes, + rescale_predictions=False, + dtype=tf.float32)) + else: + self.iou_metric = segmentation_metrics.PerClassIoU( + name='per_class_iou', + num_classes=self.task_config.model.num_classes, + rescale_predictions=not self.task_config.validation_data + .resize_eval_groundtruth, + dtype=tf.float32) + + return metrics + + def train_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, + metrics: Optional[List[Any]] = None): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + + input_partition_dims = self.task_config.train_input_partition_dims + if input_partition_dims: + strategy = tf.distribute.get_strategy() + features = strategy.experimental_split_to_logical_devices( + features, input_partition_dims) + + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model(features, training=True) + # Casting output layer as float32 is necessary when mixed_precision is + # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. + outputs = tf.nest.map_structure( + lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + loss = self.build_losses( + model_outputs=outputs, labels=labels, aux_losses=model.losses) + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient before apply_gradients when LossScaleOptimizer is + # used. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = {self.loss: loss} + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + + return logs + + def validation_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + metrics: Optional[List[Any]] = None): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. + model: the keras.Model. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + + input_partition_dims = self.task_config.eval_input_partition_dims + if input_partition_dims: + strategy = tf.distribute.get_strategy() + features = strategy.experimental_split_to_logical_devices( + features, input_partition_dims) + + outputs = self.inference_step(features, model) + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + + if self.task_config.validation_data.resize_eval_groundtruth: + loss = self.build_losses(model_outputs=outputs, labels=labels, + aux_losses=model.losses) + else: + loss = 0 + + logs = {self.loss: loss} + logs.update({self.iou_metric.name: (labels, outputs)}) + + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + + return logs + + def inference_step(self, inputs: tf.Tensor, model: tf.keras.Model): + """Performs the forward step.""" + return model(inputs, training=False) + + def aggregate_logs(self, state=None, step_outputs=None): + if state is None: + self.iou_metric.reset_states() + state = self.iou_metric + self.iou_metric.update_state(step_outputs[self.iou_metric.name][0], + step_outputs[self.iou_metric.name][1]) + return state + + def reduce_aggregated_logs(self, aggregated_logs, global_step=None): + result = {} + ious = self.iou_metric.result() + # TODO(arashwan): support loading class name from a label map file. + if self.task_config.evaluation.report_per_class_iou: + for i, value in enumerate(ious.numpy()): + result.update({'iou/{}'.format(i): value}) + # Computes mean IoU + result.update({'mean_iou': tf.reduce_mean(ious).numpy()}) + return result diff --git a/official/vision/beta/tasks/video_classification.py b/official/vision/beta/tasks/video_classification.py new file mode 100644 index 0000000000000000000000000000000000000000..125d5ba4bd71ea6972be0ddf9c85aaeb567ee720 --- /dev/null +++ b/official/vision/beta/tasks/video_classification.py @@ -0,0 +1,301 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Video classification task definition.""" +from typing import Any, Optional, List, Tuple + +from absl import logging +import tensorflow as tf +from official.core import base_task +from official.core import task_factory +from official.modeling import tf_utils +from official.vision.beta.configs import video_classification as exp_cfg +from official.vision.beta.dataloaders import input_reader_factory +from official.vision.beta.dataloaders import video_input +from official.vision.beta.modeling import factory_3d + + +@task_factory.register_task_cls(exp_cfg.VideoClassificationTask) +class VideoClassificationTask(base_task.Task): + """A task for video classification.""" + + def build_model(self): + """Builds video classification model.""" + common_input_shape = [ + d1 if d1 == d2 else None + for d1, d2 in zip(self.task_config.train_data.feature_shape, + self.task_config.validation_data.feature_shape) + ] + input_specs = tf.keras.layers.InputSpec(shape=[None] + common_input_shape) + logging.info('Build model input %r', common_input_shape) + + l2_weight_decay = self.task_config.losses.l2_weight_decay + # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss. + # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2) + # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss) + l2_regularizer = (tf.keras.regularizers.l2( + l2_weight_decay / 2.0) if l2_weight_decay else None) + + model = factory_3d.build_model( + self.task_config.model.model_type, + input_specs=input_specs, + model_config=self.task_config.model, + num_classes=self.task_config.train_data.num_classes, + l2_regularizer=l2_regularizer) + return model + + def _get_dataset_fn(self, params): + if params.file_type == 'tfrecord': + return tf.data.TFRecordDataset + else: + raise ValueError('Unknown input file type {!r}'.format(params.file_type)) + + def _get_decoder_fn(self, params): + decoder = video_input.Decoder( + image_key=params.image_field_key, label_key=params.label_field_key) + if self.task_config.train_data.output_audio: + assert self.task_config.train_data.audio_feature, 'audio feature is empty' + decoder.add_feature(self.task_config.train_data.audio_feature, + tf.io.VarLenFeature(dtype=tf.float32)) + return decoder.decode + + def build_inputs(self, + params: exp_cfg.DataConfig, + input_context: Optional[tf.distribute.InputContext] = None): + """Builds classification input.""" + + parser = video_input.Parser( + input_params=params, + image_key=params.image_field_key, + label_key=params.label_field_key) + postprocess_fn = video_input.PostBatchProcessor(params) + + reader = input_reader_factory.input_reader_generator( + params, + dataset_fn=self._get_dataset_fn(params), + decoder_fn=self._get_decoder_fn(params), + parser_fn=parser.parse_fn(params.is_training), + postprocess_fn=postprocess_fn) + + dataset = reader.read(input_context=input_context) + + return dataset + + def build_losses(self, + labels: Any, + model_outputs: Any, + aux_losses: Optional[Any] = None): + """Sparse categorical cross entropy loss. + + Args: + labels: labels. + model_outputs: Output logits of the classifier. + aux_losses: auxiliarly loss tensors, i.e. `losses` in keras.Model. + + Returns: + The total loss tensor. + """ + all_losses = {} + losses_config = self.task_config.losses + total_loss = None + if self.task_config.train_data.is_multilabel: + entropy = -tf.reduce_mean( + tf.reduce_sum(model_outputs * tf.math.log(model_outputs + 1e-8), -1)) + total_loss = tf.keras.losses.binary_crossentropy( + labels, model_outputs, from_logits=False) + all_losses.update({ + 'class_loss': total_loss, + 'entropy': entropy, + }) + else: + if losses_config.one_hot: + total_loss = tf.keras.losses.categorical_crossentropy( + labels, + model_outputs, + from_logits=False, + label_smoothing=losses_config.label_smoothing) + else: + total_loss = tf.keras.losses.sparse_categorical_crossentropy( + labels, model_outputs, from_logits=False) + + total_loss = tf_utils.safe_mean(total_loss) + all_losses.update({ + 'class_loss': total_loss, + }) + if aux_losses: + all_losses.update({ + 'reg_loss': aux_losses, + }) + total_loss += tf.add_n(aux_losses) + all_losses[self.loss] = total_loss + + return all_losses + + def build_metrics(self, training: bool = True): + """Gets streaming metrics for training/validation.""" + if self.task_config.losses.one_hot: + metrics = [ + tf.keras.metrics.CategoricalAccuracy(name='accuracy'), + tf.keras.metrics.TopKCategoricalAccuracy(k=1, name='top_1_accuracy'), + tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='top_5_accuracy') + ] + if self.task_config.train_data.is_multilabel: + metrics.append( + tf.keras.metrics.AUC( + curve='ROC', + multi_label=self.task_config.train_data.is_multilabel, + name='ROC-AUC')) + metrics.append( + tf.keras.metrics.RecallAtPrecision( + 0.95, name='RecallAtPrecision95')) + metrics.append( + tf.keras.metrics.AUC( + curve='PR', + multi_label=self.task_config.train_data.is_multilabel, + name='PR-AUC')) + if self.task_config.metrics.use_per_class_recall: + for i in range(self.task_config.train_data.num_classes): + metrics.append( + tf.keras.metrics.Recall(class_id=i, name=f'recall-{i}')) + else: + metrics = [ + tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), + tf.keras.metrics.SparseTopKCategoricalAccuracy( + k=1, name='top_1_accuracy'), + tf.keras.metrics.SparseTopKCategoricalAccuracy( + k=5, name='top_5_accuracy') + ] + return metrics + + def process_metrics(self, metrics: List[Any], labels: Any, + model_outputs: Any): + """Process and update metrics. + + Called when using custom training loop API. + + Args: + metrics: a nested structure of metrics objects. The return of function + self.build_metrics. + labels: a tensor or a nested structure of tensors. + model_outputs: a tensor or a nested structure of tensors. For example, + output of the keras model built by self.build_model. + """ + for metric in metrics: + metric.update_state(labels, model_outputs) + + def train_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, + metrics: Optional[List[Any]] = None): + """Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + + num_replicas = tf.distribute.get_strategy().num_replicas_in_sync + with tf.GradientTape() as tape: + outputs = model(features, training=True) + # Casting output layer as float32 is necessary when mixed_precision is + # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32. + outputs = tf.nest.map_structure( + lambda x: tf.cast(x, tf.float32), outputs) + + # Computes per-replica loss. + if self.task_config.train_data.is_multilabel: + outputs = tf.math.sigmoid(outputs) + else: + outputs = tf.math.softmax(outputs) + all_losses = self.build_losses( + model_outputs=outputs, labels=labels, aux_losses=model.losses) + loss = all_losses[self.loss] + # Scales loss as the default gradients allreduce performs sum inside the + # optimizer. + scaled_loss = loss / num_replicas + + # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # scaled for numerical stability. + if isinstance( + optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + + tvars = model.trainable_variables + grads = tape.gradient(scaled_loss, tvars) + # Scales back gradient before apply_gradients when LossScaleOptimizer is + # used. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + grads = optimizer.get_unscaled_gradients(grads) + optimizer.apply_gradients(list(zip(grads, tvars))) + + logs = all_losses + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + return logs + + def validation_step(self, + inputs: Tuple[Any, Any], + model: tf.keras.Model, + metrics: Optional[List[Any]] = None): + """Validatation step. + + Args: + inputs: a dictionary of input tensors. + model: the keras.Model. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + + outputs = self.inference_step(features, model) + outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs) + logs = self.build_losses(model_outputs=outputs, labels=labels, + aux_losses=model.losses) + + if metrics: + self.process_metrics(metrics, labels, outputs) + logs.update({m.name: m.result() for m in metrics}) + elif model.compiled_metrics: + self.process_compiled_metrics(model.compiled_metrics, labels, outputs) + logs.update({m.name: m.result() for m in model.metrics}) + return logs + + def inference_step(self, features: tf.Tensor, model: tf.keras.Model): + """Performs the forward step.""" + outputs = model(features, training=False) + if self.task_config.train_data.is_multilabel: + outputs = tf.math.sigmoid(outputs) + else: + outputs = tf.math.softmax(outputs) + num_test_clips = self.task_config.validation_data.num_test_clips + num_test_crops = self.task_config.validation_data.num_test_crops + num_test_views = num_test_clips * num_test_crops + if num_test_views > 1: + # Averaging output probabilities across multiples views. + outputs = tf.reshape(outputs, [-1, num_test_views, outputs.shape[-1]]) + outputs = tf.reduce_mean(outputs, axis=1) + return outputs diff --git a/official/vision/beta/train.py b/official/vision/beta/train.py new file mode 100644 index 0000000000000000000000000000000000000000..d922972e4269f08e7ec181f32388c8afd772bc0f --- /dev/null +++ b/official/vision/beta/train.py @@ -0,0 +1,69 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""TensorFlow Model Garden Vision training driver.""" + +from absl import app +from absl import flags +import gin + +# pylint: disable=unused-import +from official.common import registry_imports +# pylint: enable=unused-import +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance + +FLAGS = flags.FLAGS + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + distribution_strategy = distribute_utils.get_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + all_reduce_alg=params.runtime.all_reduce_alg, + num_gpus=params.runtime.num_gpus, + tpu_address=params.runtime.tpu) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + + train_utils.save_gin_config(FLAGS.mode, model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/vision/beta/train_spatial_partitioning.py b/official/vision/beta/train_spatial_partitioning.py new file mode 100644 index 0000000000000000000000000000000000000000..9ccd562f40e8633a54a0d1954594be14ffd41bea --- /dev/null +++ b/official/vision/beta/train_spatial_partitioning.py @@ -0,0 +1,136 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""TensorFlow Model Garden Vision training driver with spatial partitioning.""" + +from absl import app +from absl import flags +import gin +import numpy as np +import tensorflow as tf + +from official.common import registry_imports # pylint: disable=unused-import +from official.common import distribute_utils +from official.common import flags as tfm_flags +from official.core import task_factory +from official.core import train_lib +from official.core import train_utils +from official.modeling import performance + + +FLAGS = flags.FLAGS + + +def get_computation_shape_for_model_parallelism(input_partition_dims): + """Return computation shape to be used for TPUStrategy spatial partition.""" + num_logical_devices = np.prod(input_partition_dims) + if num_logical_devices == 1: + return [1, 1, 1, 1] + if num_logical_devices == 2: + return [1, 1, 1, 2] + if num_logical_devices == 4: + return [1, 2, 1, 2] + if num_logical_devices == 8: + return [2, 2, 1, 2] + if num_logical_devices == 16: + return [4, 2, 1, 2] + + +def create_distribution_strategy(distribution_strategy, + tpu_address, + input_partition_dims=None, + num_gpus=None): + """Creates distribution strategy to use for computation.""" + + if input_partition_dims is not None: + if distribution_strategy != 'tpu': + raise ValueError('Spatial partitioning is only supported ' + 'for TPUStrategy.') + + # When `input_partition_dims` is specified create custom TPUStrategy + # instance with computation shape for model parallelism. + resolver = tf.distribute.cluster_resolver.TPUClusterResolver( + tpu=tpu_address) + if tpu_address not in ('', 'local'): + tf.config.experimental_connect_to_cluster(resolver) + + topology = tf.tpu.experimental.initialize_tpu_system(resolver) + num_replicas = resolver.get_tpu_system_metadata().num_cores // np.prod( + input_partition_dims) + device_assignment = tf.tpu.experimental.DeviceAssignment.build( + topology, + num_replicas=num_replicas, + computation_shape=input_partition_dims) + return tf.distribute.TPUStrategy( + resolver, experimental_device_assignment=device_assignment) + + return distribute_utils.get_distribution_strategy( + distribution_strategy=distribution_strategy, + tpu_address=tpu_address, + num_gpus=num_gpus) + + +def main(_): + gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) + params = train_utils.parse_configuration(FLAGS) + model_dir = FLAGS.model_dir + if 'train' in FLAGS.mode: + # Pure eval modes do not output yaml files. Otherwise continuous eval job + # may race against the train job for writing the same file. + train_utils.serialize_config(params, model_dir) + + # Sets mixed_precision policy. Using 'mixed_float16' or 'mixed_bfloat16' + # can have significant impact on model speeds by utilizing float16 in case of + # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when + # dtype is float16 + if params.runtime.mixed_precision_dtype: + performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + + input_partition_dims = None + if FLAGS.mode == 'train_and_eval': + if np.prod(params.task.train_input_partition_dims) != np.prod( + params.task.eval_input_partition_dims): + raise ValueError('Train and eval input partition dims can not be' + 'partitioned on the same node') + else: + input_partition_dims = get_computation_shape_for_model_parallelism( + params.task.train_input_partition_dims) + elif FLAGS.mode == 'train': + if params.task.train_input_partition_dims: + input_partition_dims = get_computation_shape_for_model_parallelism( + params.task.train_input_partition_dims) + elif FLAGS.mode == 'eval' or FLAGS.mode == 'continuous_eval': + if params.task.eval_input_partition_dims: + input_partition_dims = get_computation_shape_for_model_parallelism( + params.task.eval_input_partition_dims) + + distribution_strategy = create_distribution_strategy( + distribution_strategy=params.runtime.distribution_strategy, + num_gpus=params.runtime.num_gpus, + input_partition_dims=input_partition_dims, + tpu_address=params.runtime.tpu) + with distribution_strategy.scope(): + task = task_factory.get_task(params.task, logging_dir=model_dir) + + train_lib.run_experiment( + distribution_strategy=distribution_strategy, + task=task, + mode=FLAGS.mode, + params=params, + model_dir=model_dir) + +if __name__ == '__main__': + tfm_flags.define_flags() + app.run(main) diff --git a/official/vision/detection/README.md b/official/vision/detection/README.md index d6cb5d4645f11681c450ab8c4ed33b38eba74b6b..2633f86d5dc4feed71ba170b92e9ffb66021652d 100644 --- a/official/vision/detection/README.md +++ b/official/vision/detection/README.md @@ -1,7 +1,7 @@ # Object Detection Models on TensorFlow 2 -**Note**: This repository is still under construction. -More features and instructions will be added soon. +**WARNING**: This repository will be deprecated and replaced by the solid +implementations inside vision/beta/. ## Prerequsite To get started, download the code from TensorFlow models GitHub repository or diff --git a/official/vision/detection/__init__.py b/official/vision/detection/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/__init__.py +++ b/official/vision/detection/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/detection/configs/__init__.py b/official/vision/detection/configs/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/configs/__init__.py +++ b/official/vision/detection/configs/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/configs/base_config.py b/official/vision/detection/configs/base_config.py index 4505da517bb500a6fe32f5849971fc92e2d726e7..32b8bcc1be551c249cafeab6706ae3bc58cc2d08 100644 --- a/official/vision/detection/configs/base_config.py +++ b/official/vision/detection/configs/base_config.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Base config template.""" diff --git a/official/vision/detection/configs/factory.py b/official/vision/detection/configs/factory.py index d60ea1e01133fdfffd76ad54daf4ee20ed1e46e0..58530518b7f6cbbb33e244c8f746fead7822add4 100644 --- a/official/vision/detection/configs/factory.py +++ b/official/vision/detection/configs/factory.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,11 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Factory to provide model configs.""" from official.modeling.hyperparams import params_dict from official.vision.detection.configs import maskrcnn_config +from official.vision.detection.configs import olnmask_config from official.vision.detection.configs import retinanet_config from official.vision.detection.configs import shapemask_config @@ -28,6 +29,9 @@ def config_generator(model): elif model == 'mask_rcnn': default_config = maskrcnn_config.MASKRCNN_CFG restrictions = maskrcnn_config.MASKRCNN_RESTRICTIONS + elif model == 'olnmask': + default_config = olnmask_config.OLNMASK_CFG + restrictions = olnmask_config.OLNMASK_RESTRICTIONS elif model == 'shapemask': default_config = shapemask_config.SHAPEMASK_CFG restrictions = shapemask_config.SHAPEMASK_RESTRICTIONS diff --git a/official/vision/detection/configs/maskrcnn_config.py b/official/vision/detection/configs/maskrcnn_config.py index 70c9b31448d3d83754c439c87ce9f0d0a04f88c9..e421fb4e7174b09c576423efe0cdbd5622d82304 100644 --- a/official/vision/detection/configs/maskrcnn_config.py +++ b/official/vision/detection/configs/maskrcnn_config.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Config template to train Mask R-CNN.""" from official.modeling.hyperparams import params_dict @@ -52,7 +52,6 @@ MASKRCNN_CFG.override({ 'anchor_size': 8, }, 'rpn_head': { - 'anchors_per_location': 3, 'num_convs': 2, 'num_filters': 256, 'use_separable_conv': False, diff --git a/official/vision/detection/configs/olnmask_config.py b/official/vision/detection/configs/olnmask_config.py new file mode 100644 index 0000000000000000000000000000000000000000..2888cc2ad87a6b6fbf2c0364337ace8d9ac5a30a --- /dev/null +++ b/official/vision/detection/configs/olnmask_config.py @@ -0,0 +1,143 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Config template to train Object Localization Network (OLN).""" + +from official.modeling.hyperparams import params_dict +from official.vision.detection.configs import base_config + + +# pylint: disable=line-too-long +OLNMASK_CFG = params_dict.ParamsDict(base_config.BASE_CFG) +OLNMASK_CFG.override({ + 'type': 'olnmask', + 'eval': { + 'type': 'oln_xclass_box', + 'use_category': False, + 'seen_class': 'voc', + 'num_images_to_visualize': 0, + }, + 'architecture': { + 'parser': 'olnmask_parser', + 'min_level': 2, + 'max_level': 6, + 'include_rpn_class': False, + 'include_frcnn_class': False, + 'include_frcnn_box': True, + 'include_mask': False, + 'mask_target_size': 28, + 'num_classes': 2, + }, + 'olnmask_parser': { + 'output_size': [640, 640], + 'num_channels': 3, + 'rpn_match_threshold': 0.7, + 'rpn_unmatched_threshold': 0.3, + 'rpn_batch_size_per_im': 256, + 'rpn_fg_fraction': 0.5, + 'aug_rand_hflip': True, + 'aug_scale_min': 0.5, + 'aug_scale_max': 2.0, + 'skip_crowd_during_training': True, + 'max_num_instances': 100, + 'mask_crop_size': 112, + # centerness targets. + 'has_centerness': True, + 'rpn_center_match_iou_threshold': 0.3, + 'rpn_center_unmatched_iou_threshold': 0.1, + 'rpn_num_center_samples_per_im': 256, + # class manipulation. + 'class_agnostic': True, + 'train_class': 'voc', + }, + 'anchor': { + 'num_scales': 1, + 'aspect_ratios': [1.0], + 'anchor_size': 8, + }, + 'rpn_head': { + 'num_convs': 2, + 'num_filters': 256, + 'use_separable_conv': False, + 'use_batch_norm': False, + # RPN-Centerness learning { + 'has_centerness': True, # } + }, + 'frcnn_head': { + 'num_convs': 0, + 'num_filters': 256, + 'use_separable_conv': False, + 'num_fcs': 2, + 'fc_dims': 1024, + 'use_batch_norm': False, + 'has_scoring': True, + }, + 'mrcnn_head': { + 'num_convs': 4, + 'num_filters': 256, + 'use_separable_conv': False, + 'use_batch_norm': False, + 'has_scoring': False, + }, + 'rpn_score_loss': { + 'rpn_batch_size_per_im': 256, + }, + 'rpn_box_loss': { + 'huber_loss_delta': 1.0 / 9.0, + }, + 'frcnn_box_loss': { + 'huber_loss_delta': 1.0, + }, + 'frcnn_box_score_loss': { + 'ignore_threshold': 0.3, + }, + 'roi_proposal': { + 'rpn_pre_nms_top_k': 2000, + 'rpn_post_nms_top_k': 2000, + 'rpn_nms_threshold': 0.7, + 'rpn_score_threshold': 0.0, + 'rpn_min_size_threshold': 0.0, + 'test_rpn_pre_nms_top_k': 2000, + 'test_rpn_post_nms_top_k': 2000, + 'test_rpn_nms_threshold': 0.7, + 'test_rpn_score_threshold': 0.0, + 'test_rpn_min_size_threshold': 0.0, + 'use_batched_nms': False, + }, + 'roi_sampling': { + 'num_samples_per_image': 512, + 'fg_fraction': 0.25, + 'fg_iou_thresh': 0.5, + 'bg_iou_thresh_hi': 0.5, + 'bg_iou_thresh_lo': 0.0, + 'mix_gt_boxes': True, + }, + 'mask_sampling': { + 'num_mask_samples_per_image': 128, # Typically = `num_samples_per_image` * `fg_fraction`. + }, + 'postprocess': { + 'use_batched_nms': False, + 'max_total_size': 100, + 'nms_iou_threshold': 0.5, + 'score_threshold': 0.00, + 'pre_nms_num_boxes': 2000, + }, +}, is_strict=False) + + +OLNMASK_RESTRICTIONS = [ + # 'anchor.aspect_ratios == [1.0]', + # 'anchor.scales == 1', +] +# pylint: enable=line-too-long diff --git a/official/vision/detection/configs/retinanet_config.py b/official/vision/detection/configs/retinanet_config.py index 579e30d083aacf138a2f9baffe1be7713ad21583..fb55a8a3bbedd1a0f54719c2385087edf2733853 100644 --- a/official/vision/detection/configs/retinanet_config.py +++ b/official/vision/detection/configs/retinanet_config.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Config template to train Retinanet.""" from official.modeling.hyperparams import params_dict @@ -39,7 +39,6 @@ RETINANET_CFG.override({ 'max_num_instances': 100, }, 'retinanet_head': { - 'anchors_per_location': 9, 'num_convs': 4, 'num_filters': 256, 'use_separable_conv': False, diff --git a/official/vision/detection/configs/shapemask_config.py b/official/vision/detection/configs/shapemask_config.py index 0914c492e15f65e5ba66701f27ca0d88d13698ff..aef823275c17f12b91b9c30f3cb3ca5b45b4e2cf 100644 --- a/official/vision/detection/configs/shapemask_config.py +++ b/official/vision/detection/configs/shapemask_config.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Config to train shapemask on COCO.""" from official.modeling.hyperparams import params_dict from official.vision.detection.configs import base_config -SHAPEMASK_RESNET_FROZEN_VAR_PREFIX = r'(resnet\d+/)conv2d(|_([1-9]|10))\/' +SHAPEMASK_RESNET_FROZEN_VAR_PREFIX = r'(conv2d(|_([1-9]|10))|batch_normalization(|_([1-9]|10)))\/' SHAPEMASK_CFG = params_dict.ParamsDict(base_config.BASE_CFG) SHAPEMASK_CFG.override({ @@ -62,7 +62,6 @@ SHAPEMASK_CFG.override({ 'upsample_factor': 4, }, 'retinanet_head': { - 'anchors_per_location': 9, 'num_convs': 4, 'num_filters': 256, 'use_separable_conv': False, diff --git a/official/vision/detection/dataloader/__init__.py b/official/vision/detection/dataloader/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/dataloader/__init__.py +++ b/official/vision/detection/dataloader/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/dataloader/anchor.py b/official/vision/detection/dataloader/anchor.py index f46f7480062e75cec55d48ff683dcad8301e4994..c0abaadb476286ffaef9d88bd1d3fde7358d77f4 100644 --- a/official/vision/detection/dataloader/anchor.py +++ b/official/vision/detection/dataloader/anchor.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Anchor box and labeler definition.""" from __future__ import absolute_import @@ -19,42 +19,39 @@ from __future__ import division from __future__ import print_function import collections + import tensorflow as tf +from official.vision import keras_cv +from official.vision.detection.utils import box_utils from official.vision.detection.utils.object_detection import argmax_matcher from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler from official.vision.detection.utils.object_detection import box_list from official.vision.detection.utils.object_detection import faster_rcnn_box_coder -from official.vision.detection.utils.object_detection import region_similarity_calculator from official.vision.detection.utils.object_detection import target_assigner class Anchor(object): """Anchor class for anchor-based object detectors.""" - def __init__(self, - min_level, - max_level, - num_scales, - aspect_ratios, - anchor_size, - image_size): + def __init__(self, min_level, max_level, num_scales, aspect_ratios, + anchor_size, image_size): """Constructs multiscale anchors. Args: min_level: integer number of minimum level of the output feature pyramid. max_level: integer number of maximum level of the output feature pyramid. - num_scales: integer number representing intermediate scales added - on each level. For instances, num_scales=2 adds one additional - intermediate anchor scales [2^0, 2^0.5] on each level. - aspect_ratios: list of float numbers representing the aspect raito anchors + num_scales: integer number representing intermediate scales added on each + level. For instances, num_scales=2 adds one additional intermediate + anchor scales [2^0, 2^0.5] on each level. + aspect_ratios: list of float numbers representing the aspect ratio anchors added on each level. The number indicates the ratio of width to height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each scale level. anchor_size: float number representing the scale of size of the base anchor to the feature stride 2^level. - image_size: a list of integer numbers or Tensors representing - [height, width] of the input image size.The image_size should be divided - by the largest feature stride 2^max_level. + image_size: a list of integer numbers or Tensors representing [height, + width] of the input image size.The image_size should be divisible by the + largest feature stride 2^max_level. """ self.min_level = min_level self.max_level = max_level @@ -76,11 +73,11 @@ class Anchor(object): boxes_l = [] for scale in range(self.num_scales): for aspect_ratio in self.aspect_ratios: - stride = 2 ** level - intermidate_scale = 2 ** (scale / float(self.num_scales)) - base_anchor_size = self.anchor_size * stride * intermidate_scale - aspect_x = aspect_ratio ** 0.5 - aspect_y = aspect_ratio ** -0.5 + stride = 2**level + intermediate_scale = 2**(scale / float(self.num_scales)) + base_anchor_size = self.anchor_size * stride * intermediate_scale + aspect_x = aspect_ratio**0.5 + aspect_y = aspect_ratio**-0.5 half_anchor_size_x = base_anchor_size * aspect_x / 2.0 half_anchor_size_y = base_anchor_size * aspect_y / 2.0 x = tf.range(stride / 2, self.image_size[1], stride) @@ -89,8 +86,10 @@ class Anchor(object): xv = tf.cast(tf.reshape(xv, [-1]), dtype=tf.float32) yv = tf.cast(tf.reshape(yv, [-1]), dtype=tf.float32) # Tensor shape Nx4. - boxes = tf.stack([yv - half_anchor_size_y, xv - half_anchor_size_x, - yv + half_anchor_size_y, xv + half_anchor_size_x], + boxes = tf.stack([ + yv - half_anchor_size_y, xv - half_anchor_size_x, + yv + half_anchor_size_y, xv + half_anchor_size_x + ], axis=1) boxes_l.append(boxes) # Concat anchors on the same level to tensor shape NxAx4. @@ -104,11 +103,11 @@ class Anchor(object): unpacked_labels = collections.OrderedDict() count = 0 for level in range(self.min_level, self.max_level + 1): - feat_size_y = tf.cast(self.image_size[0] / 2 ** level, tf.int32) - feat_size_x = tf.cast(self.image_size[1] / 2 ** level, tf.int32) + feat_size_y = tf.cast(self.image_size[0] / 2**level, tf.int32) + feat_size_x = tf.cast(self.image_size[1] / 2**level, tf.int32) steps = feat_size_y * feat_size_x * self.anchors_per_location - unpacked_labels[level] = tf.reshape( - labels[count:count + steps], [feat_size_y, feat_size_x, -1]) + unpacked_labels[level] = tf.reshape(labels[count:count + steps], + [feat_size_y, feat_size_x, -1]) count += steps return unpacked_labels @@ -124,10 +123,7 @@ class Anchor(object): class AnchorLabeler(object): """Labeler for dense object detector.""" - def __init__(self, - anchor, - match_threshold=0.5, - unmatched_threshold=0.5): + def __init__(self, anchor, match_threshold=0.5, unmatched_threshold=0.5): """Constructs anchor labeler to assign labels to anchors. Args: @@ -139,7 +135,7 @@ class AnchorLabeler(object): upper-bound threshold to assign negative labels for anchors. An anchor with a score below the threshold is labeled negative. """ - similarity_calc = region_similarity_calculator.IouSimilarity() + similarity_calc = keras_cv.ops.IouSimilarity() matcher = argmax_matcher.ArgMaxMatcher( match_threshold, unmatched_threshold=unmatched_threshold, @@ -161,6 +157,7 @@ class AnchorLabeler(object): For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. + Returns: cls_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with @@ -205,11 +202,14 @@ class AnchorLabeler(object): class RpnAnchorLabeler(AnchorLabeler): """Labeler for Region Proposal Network.""" - def __init__(self, anchor, match_threshold=0.7, - unmatched_threshold=0.3, rpn_batch_size_per_im=256, + def __init__(self, + anchor, + match_threshold=0.7, + unmatched_threshold=0.3, + rpn_batch_size_per_im=256, rpn_fg_fraction=0.5): - AnchorLabeler.__init__(self, anchor, match_threshold=0.7, - unmatched_threshold=0.3) + AnchorLabeler.__init__( + self, anchor, match_threshold=0.7, unmatched_threshold=0.3) self._rpn_batch_size_per_im = rpn_batch_size_per_im self._rpn_fg_fraction = rpn_fg_fraction @@ -219,11 +219,12 @@ class RpnAnchorLabeler(AnchorLabeler): This function performs subsampling for foreground (fg) and background (bg) anchors. Args: - match_results: A integer tensor with shape [N] representing the - matching results of anchors. (1) match_results[i]>=0, - meaning that column i is matched with row match_results[i]. - (2) match_results[i]=-1, meaning that column i is not matched. - (3) match_results[i]=-2, meaning that column i is ignored. + match_results: A integer tensor with shape [N] representing the matching + results of anchors. (1) match_results[i]>=0, meaning that column i is + matched with row match_results[i]. (2) match_results[i]=-1, meaning that + column i is not matched. (3) match_results[i]=-2, meaning that column i + is ignored. + Returns: score_targets: a integer tensor with the a shape of [N]. (1) score_targets[i]=1, the anchor is a positive sample. @@ -241,8 +242,7 @@ class RpnAnchorLabeler(AnchorLabeler): indicator = tf.greater(match_results, -2) labels = tf.greater(match_results, -1) - samples = sampler.subsample( - indicator, self._rpn_batch_size_per_im, labels) + samples = sampler.subsample(indicator, self._rpn_batch_size_per_im, labels) positive_labels = tf.where( tf.logical_and(samples, labels), tf.constant(2, dtype=tf.int32, shape=match_results.shape), @@ -253,8 +253,8 @@ class RpnAnchorLabeler(AnchorLabeler): tf.constant(0, dtype=tf.int32, shape=match_results.shape)) ignore_labels = tf.fill(match_results.shape, -1) - return (ignore_labels + positive_labels + negative_labels, - positive_labels, negative_labels) + return (ignore_labels + positive_labels + negative_labels, positive_labels, + negative_labels) def label_anchors(self, gt_boxes, gt_labels): """Labels anchors with ground truth inputs. @@ -264,6 +264,7 @@ class RpnAnchorLabeler(AnchorLabeler): For each row, it stores [y0, x0, y1, x1] for four corners of a box. gt_labels: A integer tensor with shape [N, 1] representing groundtruth classes. + Returns: score_targets_dict: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with @@ -290,3 +291,168 @@ class RpnAnchorLabeler(AnchorLabeler): box_targets_dict = self._anchor.unpack_labels(box_targets) return score_targets_dict, box_targets_dict + + +class OlnAnchorLabeler(RpnAnchorLabeler): + """Labeler for Region Proposal Network.""" + + def __init__(self, + anchor, + match_threshold=0.7, + unmatched_threshold=0.3, + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5, + has_centerness=False, + center_match_iou_threshold=0.3, + center_unmatched_iou_threshold=0.1, + num_center_samples_per_im=256): + """Constructs rpn anchor labeler to assign labels and centerness to anchors. + + Args: + anchor: an instance of class Anchors. + match_threshold: a float number between 0 and 1 representing the + lower-bound threshold to assign positive labels for anchors. An anchor + with a score over the threshold is labeled positive. + unmatched_threshold: a float number between 0 and 1 representing the + upper-bound threshold to assign negative labels for anchors. An anchor + with a score below the threshold is labeled negative. + rpn_batch_size_per_im: number of anchors that are sampled per image. + rpn_fg_fraction: + has_centerness: whether to include centerness target creation. An anchor + is paired with one centerness score. + center_match_iou_threshold: a float number between 0 and 1 representing + the lower-bound threshold to sample foreground anchors for centerness + regression. An anchor with a score over the threshold is sampled as + foreground sample for centerness regression. We sample mostly from the + foreground region (255 out of 256 samples). That is, we sample 255 vs 1 + (foreground vs background) anchor points to learn centerness regression. + center_unmatched_iou_threshold: a float number between 0 and 1 + representing the lower-bound threshold to sample background anchors for + centerness regression. An anchor with a score over the threshold is + sampled as foreground sample for centerness regression. We sample very + sparsely from the background region (1 out of 256 samples). That is, we + sample 255 vs 1 (foreground vs background) anchor points to learn + centerness regression. + num_center_samples_per_im: number of anchor points per image that are + sampled as centerness targets. + """ + super(OlnAnchorLabeler, self).__init__( + anchor, match_threshold=match_threshold, + unmatched_threshold=unmatched_threshold, + rpn_batch_size_per_im=rpn_batch_size_per_im, + rpn_fg_fraction=rpn_fg_fraction) + similarity_calc = keras_cv.ops.IouSimilarity() + matcher = argmax_matcher.ArgMaxMatcher( + match_threshold, + unmatched_threshold=unmatched_threshold, + negatives_lower_than_unmatched=True, + force_match_for_each_row=True) + box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() + if has_centerness: + center_matcher = argmax_matcher.ArgMaxMatcher( + center_match_iou_threshold, + unmatched_threshold=center_match_iou_threshold, + negatives_lower_than_unmatched=True, + force_match_for_each_row=True,) + else: + center_matcher = None + + self._target_assigner = target_assigner.OlnTargetAssigner( + similarity_calc, matcher, box_coder, + center_matcher=center_matcher) + self._num_center_samples_per_im = num_center_samples_per_im + self._center_unmatched_iou_threshold = center_unmatched_iou_threshold + self._rpn_batch_size_per_im = rpn_batch_size_per_im + self._rpn_fg_fraction = rpn_fg_fraction + + def label_anchors_lrtb(self, gt_boxes, gt_labels): + """Labels anchors with ground truth inputs. + + Args: + gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. + For each row, it stores [y0, x0, y1, x1] for four corners of a box. + gt_labels: A integer tensor with shape [N, 1] representing groundtruth + classes. + + Returns: + score_targets_dict: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, num_anchors]. The height_l and width_l + represent the dimension of class logits at l-th level. + box_targets_dict: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, num_anchors * 4]. The height_l and + width_l represent the dimension of bounding box regression output at + l-th level. + lrtb_targets_dict: Same strucure to box_target_dict, except the regression + targets are converted from xyhw to lrtb format. Ordered dictionary with + keys [min_level, min_level+1, ..., max_level]. The values are tensor + with shape [height_l, width_l, num_anchors * 4]. The height_l and + width_l represent the dimension of bounding box regression output at + l-th level. + center_targets_dict: Same structure to score_tragets_dict, except the + scores are centerness values ranging from 0 to 1. Ordered dictionary + with keys [min_level, min_level+1, ..., max_level]. The values are + tensor with shape [height_l, width_l, num_anchors]. The height_l and + width_l represent the dimension of class logits at l-th level. + """ + gt_box_list = box_list.BoxList(gt_boxes) + anchor_box_list = box_list.BoxList(self._anchor.boxes) + + # cls_targets, cls_weights, box_weights are not used. + (_, _, box_targets, _, matches, + matched_gt_box_list, matched_anchors_mask, + center_matched_gt_box_list, center_matched_anchors_mask, + matched_ious) = self._target_assigner.assign( + anchor_box_list, gt_box_list, gt_labels) + # Box lrtb_targets. + lrtb_targets, _ = box_utils.encode_boxes_lrtb( + matched_gt_box_list.data['boxes'], + anchor_box_list.data['boxes'], + weights=[1.0, 1.0, 1.0, 1.0]) + lrtb_sanity = tf.logical_and( + tf.greater(tf.reduce_min(lrtb_targets, -1), 0.), + matched_anchors_mask) + # To broadcast lrtb_sanity to the same shape as lrtb_targets. + lrtb_sanity = tf.tile(tf.expand_dims(lrtb_sanity, 1), + [1, tf.shape(lrtb_targets)[1]]) + lrtb_targets = tf.where(lrtb_sanity, + lrtb_targets, + tf.zeros_like(lrtb_targets)) + # RPN anchor-gtbox iou values. + iou_targets = tf.where(tf.greater(matched_ious, 0.0), + matched_ious, + tf.zeros_like(matched_ious)) + # Centerness_targets. + _, center_targets = box_utils.encode_boxes_lrtb( + center_matched_gt_box_list.data['boxes'], + anchor_box_list.data['boxes'], + weights=[1.0, 1.0, 1.0, 1.0]) + # Positive-negative centerness sampler. + num_center_samples_per_im = self._num_center_samples_per_im + center_pos_neg_sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( + positive_fraction=(1.- 1./num_center_samples_per_im), + is_static=False)) + center_pos_neg_indicator = tf.logical_or( + center_matched_anchors_mask, + tf.less(iou_targets, self._center_unmatched_iou_threshold)) + center_pos_labels = center_matched_anchors_mask + center_samples = center_pos_neg_sampler.subsample( + center_pos_neg_indicator, num_center_samples_per_im, center_pos_labels) + is_valid = center_samples + center_targets = tf.where(is_valid, + center_targets, + (-1) * tf.ones_like(center_targets)) + + # score_targets contains the subsampled positive and negative anchors. + score_targets, _, _ = self._get_rpn_samples(matches.match_results) + + # Unpacks labels. + score_targets_dict = self._anchor.unpack_labels(score_targets) + box_targets_dict = self._anchor.unpack_labels(box_targets) + lrtb_targets_dict = self._anchor.unpack_labels(lrtb_targets) + center_targets_dict = self._anchor.unpack_labels(center_targets) + + return (score_targets_dict, box_targets_dict, + lrtb_targets_dict, center_targets_dict) diff --git a/official/vision/detection/dataloader/factory.py b/official/vision/detection/dataloader/factory.py index 1e13aec222f529d97ee9c502d408648b9d091e5b..3d1e8574a497747463dadd792efc2ccadba7e3ed 100644 --- a/official/vision/detection/dataloader/factory.py +++ b/official/vision/detection/dataloader/factory.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Model architecture factory.""" from __future__ import absolute_import @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from official.vision.detection.dataloader import maskrcnn_parser +from official.vision.detection.dataloader import olnmask_parser from official.vision.detection.dataloader import retinanet_parser from official.vision.detection.dataloader import shapemask_parser @@ -69,6 +70,38 @@ def parser_generator(params, mode): mask_crop_size=parser_params.mask_crop_size, use_bfloat16=params.architecture.use_bfloat16, mode=mode) + elif params.architecture.parser == 'olnmask_parser': + anchor_params = params.anchor + parser_params = params.olnmask_parser + parser_fn = olnmask_parser.Parser( + output_size=parser_params.output_size, + min_level=params.architecture.min_level, + max_level=params.architecture.max_level, + num_scales=anchor_params.num_scales, + aspect_ratios=anchor_params.aspect_ratios, + anchor_size=anchor_params.anchor_size, + rpn_match_threshold=parser_params.rpn_match_threshold, + rpn_unmatched_threshold=parser_params.rpn_unmatched_threshold, + rpn_batch_size_per_im=parser_params.rpn_batch_size_per_im, + rpn_fg_fraction=parser_params.rpn_fg_fraction, + aug_rand_hflip=parser_params.aug_rand_hflip, + aug_scale_min=parser_params.aug_scale_min, + aug_scale_max=parser_params.aug_scale_max, + skip_crowd_during_training=parser_params.skip_crowd_during_training, + max_num_instances=parser_params.max_num_instances, + include_mask=params.architecture.include_mask, + mask_crop_size=parser_params.mask_crop_size, + use_bfloat16=params.architecture.use_bfloat16, + mode=mode, + has_centerness=parser_params.has_centerness, + rpn_center_match_iou_threshold=( + parser_params.rpn_center_match_iou_threshold), + rpn_center_unmatched_iou_threshold=( + parser_params.rpn_center_unmatched_iou_threshold), + rpn_num_center_samples_per_im=( + parser_params.rpn_num_center_samples_per_im), + class_agnostic=parser_params.class_agnostic, + train_class=parser_params.train_class,) elif params.architecture.parser == 'shapemask_parser': anchor_params = params.anchor parser_params = params.shapemask_parser diff --git a/official/vision/detection/dataloader/input_reader.py b/official/vision/detection/dataloader/input_reader.py index 6e65243f6863ccadb45704b3ed487aec3b8ab21a..d0f9b7abefb5a49bec7bd8ed65b005c56acdc68f 100644 --- a/official/vision/detection/dataloader/input_reader.py +++ b/official/vision/detection/dataloader/input_reader.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Data loader and input processing.""" from __future__ import absolute_import @@ -91,7 +91,8 @@ class InputFn(object): dataset = dataset.repeat() dataset = dataset.interleave( - map_func=self._dataset_fn, cycle_length=32, + map_func=self._dataset_fn, + cycle_length=32, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self._is_training: diff --git a/official/vision/detection/dataloader/maskrcnn_parser.py b/official/vision/detection/dataloader/maskrcnn_parser.py index 35db6f1478236d347839625d397fc918478694c4..7df1d547cbab068f68fc750927c945eefddec48e 100644 --- a/official/vision/detection/dataloader/maskrcnn_parser.py +++ b/official/vision/detection/dataloader/maskrcnn_parser.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Data parser and processing for Mask R-CNN.""" import tensorflow as tf diff --git a/official/vision/detection/dataloader/mode_keys.py b/official/vision/detection/dataloader/mode_keys.py index 020382b2486ca25a41f0c3eb88b1f2038c538e7e..d6fdd9008bd4491ebec171d25c14d517ca3647c6 100644 --- a/official/vision/detection/dataloader/mode_keys.py +++ b/official/vision/detection/dataloader/mode_keys.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Standard names for input dataloader modes. The following standard keys are defined: diff --git a/official/vision/detection/dataloader/olnmask_parser.py b/official/vision/detection/dataloader/olnmask_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..5f05f7387c7eca91ba4b42ed87bbd5a3a3926a73 --- /dev/null +++ b/official/vision/detection/dataloader/olnmask_parser.py @@ -0,0 +1,327 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data parser and processing for Mask R-CNN.""" + +import tensorflow as tf + +from official.vision.detection.dataloader import anchor +from official.vision.detection.dataloader.maskrcnn_parser import Parser as MaskrcnnParser +from official.vision.detection.utils import box_utils +from official.vision.detection.utils import class_utils +from official.vision.detection.utils import input_utils + + +class Parser(MaskrcnnParser): + """Parser to parse an image and its annotations into a dictionary of tensors.""" + + def __init__(self, + output_size, + min_level, + max_level, + num_scales, + aspect_ratios, + anchor_size, + rpn_match_threshold=0.7, + rpn_unmatched_threshold=0.3, + rpn_batch_size_per_im=256, + rpn_fg_fraction=0.5, + aug_rand_hflip=False, + aug_scale_min=1.0, + aug_scale_max=1.0, + skip_crowd_during_training=True, + max_num_instances=100, + include_mask=False, + mask_crop_size=112, + use_bfloat16=True, + mode=None, + # for centerness learning. + has_centerness=False, + rpn_center_match_iou_threshold=0.3, + rpn_center_unmatched_iou_threshold=0.1, + rpn_num_center_samples_per_im=256, + # for class manipulation. + class_agnostic=False, + train_class='all', + ): + """Initializes parameters for parsing annotations in the dataset. + + Args: + output_size: `Tensor` or `list` for [height, width] of output image. The + output_size should be divided by the largest feature stride 2^max_level. + min_level: `int` number of minimum level of the output feature pyramid. + max_level: `int` number of maximum level of the output feature pyramid. + num_scales: `int` number representing intermediate scales added + on each level. For instances, num_scales=2 adds one additional + intermediate anchor scales [2^0, 2^0.5] on each level. + aspect_ratios: `list` of float numbers representing the aspect raito + anchors added on each level. The number indicates the ratio of width to + height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors + on each scale level. + anchor_size: `float` number representing the scale of size of the base + anchor to the feature stride 2^level. + rpn_match_threshold: + rpn_unmatched_threshold: + rpn_batch_size_per_im: + rpn_fg_fraction: + aug_rand_hflip: `bool`, if True, augment training with random + horizontal flip. + aug_scale_min: `float`, the minimum scale applied to `output_size` for + data augmentation during training. + aug_scale_max: `float`, the maximum scale applied to `output_size` for + data augmentation during training. + skip_crowd_during_training: `bool`, if True, skip annotations labeled with + `is_crowd` equals to 1. + max_num_instances: `int` number of maximum number of instances in an + image. The groundtruth data will be padded to `max_num_instances`. + include_mask: a bool to indicate whether parse mask groundtruth. + mask_crop_size: the size which groundtruth mask is cropped to. + use_bfloat16: `bool`, if True, cast output image to tf.bfloat16. + mode: a ModeKeys. Specifies if this is training, evaluation, prediction + or prediction with groundtruths in the outputs. + has_centerness: whether to create centerness targets + rpn_center_match_iou_threshold: iou threshold for valid centerness samples + ,set to 0.3 by default. + rpn_center_unmatched_iou_threshold: iou threshold for invalid centerness + samples, set to 0.1 by default. + rpn_num_center_samples_per_im: number of centerness samples per image, + 256 by default. + class_agnostic: whether to merge class ids into one foreground(=1) class, + False by default. + train_class: 'all' or 'voc' or 'nonvoc', 'all' by default. + """ + super(Parser, self).__init__( + output_size=output_size, + min_level=min_level, + max_level=max_level, + num_scales=num_scales, + aspect_ratios=aspect_ratios, + anchor_size=anchor_size, + rpn_match_threshold=rpn_match_threshold, + rpn_unmatched_threshold=rpn_unmatched_threshold, + rpn_batch_size_per_im=rpn_batch_size_per_im, + rpn_fg_fraction=rpn_fg_fraction, + aug_rand_hflip=aug_rand_hflip, + aug_scale_min=aug_scale_min, + aug_scale_max=aug_scale_max, + skip_crowd_during_training=skip_crowd_during_training, + max_num_instances=max_num_instances, + include_mask=include_mask, + mask_crop_size=mask_crop_size, + use_bfloat16=use_bfloat16, + mode=mode,) + + # Centerness target assigning. + self._has_centerness = has_centerness + self._rpn_center_match_iou_threshold = rpn_center_match_iou_threshold + self._rpn_center_unmatched_iou_threshold = ( + rpn_center_unmatched_iou_threshold) + self._rpn_num_center_samples_per_im = rpn_num_center_samples_per_im + + # Class manipulation. + self._class_agnostic = class_agnostic + self._train_class = train_class + + def _parse_train_data(self, data): + """Parses data for training. + + Args: + data: the decoded tensor dictionary from TfExampleDecoder. + + Returns: + image: image tensor that is preproessed to have normalized value and + dimension [output_size[0], output_size[1], 3] + labels: a dictionary of tensors used for training. The following describes + {key: value} pairs in the dictionary. + image_info: a 2D `Tensor` that encodes the information of the image and + the applied preprocessing. It is in the format of + [[original_height, original_width], [scaled_height, scaled_width], + anchor_boxes: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, 4] representing anchor boxes at each level. + rpn_score_targets: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, anchors_per_location]. The height_l and + width_l represent the dimension of class logits at l-th level. + rpn_box_targets: ordered dictionary with keys + [min_level, min_level+1, ..., max_level]. The values are tensor with + shape [height_l, width_l, anchors_per_location * 4]. The height_l and + width_l represent the dimension of bounding box regression output at + l-th level. + gt_boxes: Groundtruth bounding box annotations. The box is represented + in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled + image that is fed to the network. The tennsor is padded with -1 to + the fixed dimension [self._max_num_instances, 4]. + gt_classes: Groundtruth classes annotations. The tennsor is padded + with -1 to the fixed dimension [self._max_num_instances]. + gt_masks: groundtrugh masks cropped by the bounding box and + resized to a fixed size determined by mask_crop_size. + """ + classes = data['groundtruth_classes'] + boxes = data['groundtruth_boxes'] + if self._include_mask: + masks = data['groundtruth_instance_masks'] + + is_crowds = data['groundtruth_is_crowd'] + # Skips annotations with `is_crowd` = True. + if self._skip_crowd_during_training and self._is_training: + num_groundtruths = tf.shape(classes)[0] + with tf.control_dependencies([num_groundtruths, is_crowds]): + indices = tf.cond( + tf.greater(tf.size(is_crowds), 0), + lambda: tf.where(tf.logical_not(is_crowds))[:, 0], + lambda: tf.cast(tf.range(num_groundtruths), tf.int64)) + classes = tf.gather(classes, indices) + boxes = tf.gather(boxes, indices) + if self._include_mask: + masks = tf.gather(masks, indices) + + # Gets original image and its size. + image = data['image'] + image_shape = tf.shape(image)[0:2] + + # Normalizes image with mean and std pixel values. + image = input_utils.normalize_image(image) + + # Flips image randomly during training. + if self._aug_rand_hflip: + if self._include_mask: + image, boxes, masks = input_utils.random_horizontal_flip( + image, boxes, masks) + else: + image, boxes = input_utils.random_horizontal_flip( + image, boxes) + + # Converts boxes from normalized coordinates to pixel coordinates. + # Now the coordinates of boxes are w.r.t. the original image. + boxes = box_utils.denormalize_boxes(boxes, image_shape) + + # Resizes and crops image. + image, image_info = input_utils.resize_and_crop_image( + image, + self._output_size, + padded_size=input_utils.compute_padded_size( + self._output_size, 2 ** self._max_level), + aug_scale_min=self._aug_scale_min, + aug_scale_max=self._aug_scale_max) + image_height, image_width, _ = image.get_shape().as_list() + + # Resizes and crops boxes. + # Now the coordinates of boxes are w.r.t the scaled image. + image_scale = image_info[2, :] + offset = image_info[3, :] + boxes = input_utils.resize_and_crop_boxes( + boxes, image_scale, image_info[1, :], offset) + + # Filters out ground truth boxes that are all zeros. + indices = box_utils.get_non_empty_box_indices(boxes) + boxes = tf.gather(boxes, indices) + classes = tf.gather(classes, indices) + if self._include_mask: + masks = tf.gather(masks, indices) + # Transfer boxes to the original image space and do normalization. + cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) + cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) + cropped_boxes = box_utils.normalize_boxes(cropped_boxes, image_shape) + num_masks = tf.shape(masks)[0] + masks = tf.image.crop_and_resize( + tf.expand_dims(masks, axis=-1), + cropped_boxes, + box_indices=tf.range(num_masks, dtype=tf.int32), + crop_size=[self._mask_crop_size, self._mask_crop_size], + method='bilinear') + masks = tf.squeeze(masks, axis=-1) + + # Class manipulation. + # Filter out novel split classes from training. + if self._train_class != 'all': + valid_classes = tf.cast( + class_utils.coco_split_class_ids(self._train_class), + dtype=classes.dtype) + match = tf.reduce_any(tf.equal( + tf.expand_dims(valid_classes, 1), + tf.expand_dims(classes, 0)), 0) + # kill novel split classes and boxes. + boxes = tf.gather(boxes, tf.where(match)[:, 0]) + classes = tf.gather(classes, tf.where(match)[:, 0]) + if self._include_mask: + masks = tf.gather(masks, tf.where(match)[:, 0]) + + # Assigns anchor targets. + # Note that after the target assignment, box targets are absolute pixel + # offsets w.r.t. the scaled image. + input_anchor = anchor.Anchor( + self._min_level, + self._max_level, + self._num_scales, + self._aspect_ratios, + self._anchor_size, + (image_height, image_width)) + anchor_labeler = anchor.OlnAnchorLabeler( + input_anchor, + self._rpn_match_threshold, + self._rpn_unmatched_threshold, + self._rpn_batch_size_per_im, + self._rpn_fg_fraction, + # for centerness target. + self._has_centerness, + self._rpn_center_match_iou_threshold, + self._rpn_center_unmatched_iou_threshold, + self._rpn_num_center_samples_per_im,) + + if self._has_centerness: + rpn_score_targets, _, rpn_lrtb_targets, rpn_center_targets = ( + anchor_labeler.label_anchors_lrtb( + gt_boxes=boxes, + gt_labels=tf.cast( + tf.expand_dims(classes, axis=-1), dtype=tf.float32))) + else: + rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors( + boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32)) + # For base rpn, dummy placeholder for centerness target. + rpn_center_targets = rpn_score_targets.copy() + + # If bfloat16 is used, casts input image to tf.bfloat16. + if self._use_bfloat16: + image = tf.cast(image, dtype=tf.bfloat16) + + inputs = { + 'image': image, + 'image_info': image_info, + } + # Packs labels for model_fn outputs. + labels = { + 'anchor_boxes': input_anchor.multilevel_boxes, + 'image_info': image_info, + 'rpn_score_targets': rpn_score_targets, + 'rpn_box_targets': (rpn_lrtb_targets if self._has_centerness + else rpn_box_targets), + 'rpn_center_targets': rpn_center_targets, + } + # If class_agnostic, convert to binary classes. + if self._class_agnostic: + classes = tf.where(tf.greater(classes, 0), + tf.ones_like(classes), + tf.zeros_like(classes)) + + inputs['gt_boxes'] = input_utils.pad_to_fixed_size(boxes, + self._max_num_instances, + -1) + inputs['gt_classes'] = input_utils.pad_to_fixed_size( + classes, self._max_num_instances, -1) + if self._include_mask: + inputs['gt_masks'] = input_utils.pad_to_fixed_size( + masks, self._max_num_instances, -1) + + return inputs, labels diff --git a/official/vision/detection/dataloader/retinanet_parser.py b/official/vision/detection/dataloader/retinanet_parser.py index d226a6da7e2fc2e650ad6ecdfb5a431d13df97a3..8e9c3397ed304ec505e2030ddd5eb825273d2ff0 100644 --- a/official/vision/detection/dataloader/retinanet_parser.py +++ b/official/vision/detection/dataloader/retinanet_parser.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Data parser and processing. Parse image and ground truths in a dataset to training targets and package them @@ -79,9 +79,9 @@ class Parser(object): output_size should be divided by the largest feature stride 2^max_level. min_level: `int` number of minimum level of the output feature pyramid. max_level: `int` number of maximum level of the output feature pyramid. - num_scales: `int` number representing intermediate scales added - on each level. For instances, num_scales=2 adds one additional - intermediate anchor scales [2^0, 2^0.5] on each level. + num_scales: `int` number representing intermediate scales added on each + level. For instances, num_scales=2 adds one additional intermediate + anchor scales [2^0, 2^0.5] on each level. aspect_ratios: `list` of float numbers representing the aspect raito anchors added on each level. The number indicates the ratio of width to height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors @@ -94,8 +94,8 @@ class Parser(object): unmatched_threshold: `float` number between 0 and 1 representing the upper-bound threshold to assign negative labels for anchors. An anchor with a score below the threshold is labeled negative. - aug_rand_hflip: `bool`, if True, augment training with random - horizontal flip. + aug_rand_hflip: `bool`, if True, augment training with random horizontal + flip. aug_scale_min: `float`, the minimum scale applied to `output_size` for data augmentation during training. aug_scale_max: `float`, the maximum scale applied to `output_size` for @@ -109,8 +109,8 @@ class Parser(object): max_num_instances: `int` number of maximum number of instances in an image. The groundtruth data will be padded to `max_num_instances`. use_bfloat16: `bool`, if True, cast output image to tf.bfloat16. - mode: a ModeKeys. Specifies if this is training, evaluation, prediction - or prediction with groundtruths in the outputs. + mode: a ModeKeys. Specifies if this is training, evaluation, prediction or + prediction with groundtruths in the outputs. """ self._mode = mode self._max_num_instances = max_num_instances @@ -232,8 +232,8 @@ class Parser(object): image, image_info = input_utils.resize_and_crop_image( image, self._output_size, - padded_size=input_utils.compute_padded_size( - self._output_size, 2 ** self._max_level), + padded_size=input_utils.compute_padded_size(self._output_size, + 2**self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() @@ -241,22 +241,21 @@ class Parser(object): # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] - boxes = input_utils.resize_and_crop_boxes( - boxes, image_scale, image_info[1, :], offset) + boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, + image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) # Assigns anchors. - input_anchor = anchor.Anchor( - self._min_level, self._max_level, self._num_scales, - self._aspect_ratios, self._anchor_size, (image_height, image_width)) - anchor_labeler = anchor.AnchorLabeler( - input_anchor, self._match_threshold, self._unmatched_threshold) + input_anchor = anchor.Anchor(self._min_level, self._max_level, + self._num_scales, self._aspect_ratios, + self._anchor_size, (image_height, image_width)) + anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, + self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( - boxes, - tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) + boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: @@ -292,8 +291,8 @@ class Parser(object): image, image_info = input_utils.resize_and_crop_image( image, self._output_size, - padded_size=input_utils.compute_padded_size( - self._output_size, 2 ** self._max_level), + padded_size=input_utils.compute_padded_size(self._output_size, + 2**self._max_level), aug_scale_min=1.0, aug_scale_max=1.0) image_height, image_width, _ = image.get_shape().as_list() @@ -301,22 +300,21 @@ class Parser(object): # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] - boxes = input_utils.resize_and_crop_boxes( - boxes, image_scale, image_info[1, :], offset) + boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, + image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) # Assigns anchors. - input_anchor = anchor.Anchor( - self._min_level, self._max_level, self._num_scales, - self._aspect_ratios, self._anchor_size, (image_height, image_width)) - anchor_labeler = anchor.AnchorLabeler( - input_anchor, self._match_threshold, self._unmatched_threshold) + input_anchor = anchor.Anchor(self._min_level, self._max_level, + self._num_scales, self._aspect_ratios, + self._anchor_size, (image_height, image_width)) + anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, + self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( - boxes, - tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) + boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: @@ -324,18 +322,24 @@ class Parser(object): # Sets up groundtruth data for evaluation. groundtruths = { - 'source_id': data['source_id'], - 'num_groundtrtuhs': tf.shape(data['groundtruth_classes']), - 'image_info': image_info, - 'boxes': box_utils.denormalize_boxes( - data['groundtruth_boxes'], image_shape), - 'classes': data['groundtruth_classes'], - 'areas': data['groundtruth_area'], - 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), + 'source_id': + data['source_id'], + 'num_groundtrtuhs': + tf.shape(data['groundtruth_classes']), + 'image_info': + image_info, + 'boxes': + box_utils.denormalize_boxes(data['groundtruth_boxes'], image_shape), + 'classes': + data['groundtruth_classes'], + 'areas': + data['groundtruth_area'], + 'is_crowds': + tf.cast(data['groundtruth_is_crowd'], tf.int32), } groundtruths['source_id'] = process_source_id(groundtruths['source_id']) - groundtruths = pad_groundtruths_to_fixed_size( - groundtruths, self._max_num_instances) + groundtruths = pad_groundtruths_to_fixed_size(groundtruths, + self._max_num_instances) # Packs labels for model_fn outputs. labels = { @@ -361,8 +365,8 @@ class Parser(object): image, image_info = input_utils.resize_and_crop_image( image, self._output_size, - padded_size=input_utils.compute_padded_size( - self._output_size, 2 ** self._max_level), + padded_size=input_utils.compute_padded_size(self._output_size, + 2**self._max_level), aug_scale_min=1.0, aug_scale_max=1.0) image_height, image_width, _ = image.get_shape().as_list() @@ -372,9 +376,9 @@ class Parser(object): image = tf.cast(image, dtype=tf.bfloat16) # Compute Anchor boxes. - input_anchor = anchor.Anchor( - self._min_level, self._max_level, self._num_scales, - self._aspect_ratios, self._anchor_size, (image_height, image_width)) + input_anchor = anchor.Anchor(self._min_level, self._max_level, + self._num_scales, self._aspect_ratios, + self._anchor_size, (image_height, image_width)) labels = { 'anchor_boxes': input_anchor.multilevel_boxes, @@ -384,8 +388,8 @@ class Parser(object): # in labels. if self._mode == ModeKeys.PREDICT_WITH_GT: # Converts boxes from normalized coordinates to pixel coordinates. - boxes = box_utils.denormalize_boxes( - data['groundtruth_boxes'], image_shape) + boxes = box_utils.denormalize_boxes(data['groundtruth_boxes'], + image_shape) groundtruths = { 'source_id': data['source_id'], 'num_detections': tf.shape(data['groundtruth_classes']), @@ -395,8 +399,8 @@ class Parser(object): 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), } groundtruths['source_id'] = process_source_id(groundtruths['source_id']) - groundtruths = pad_groundtruths_to_fixed_size( - groundtruths, self._max_num_instances) + groundtruths = pad_groundtruths_to_fixed_size(groundtruths, + self._max_num_instances) labels['groundtruths'] = groundtruths # Computes training objective for evaluation loss. @@ -404,18 +408,17 @@ class Parser(object): image_scale = image_info[2, :] offset = image_info[3, :] - boxes = input_utils.resize_and_crop_boxes( - boxes, image_scale, image_info[1, :], offset) + boxes = input_utils.resize_and_crop_boxes(boxes, image_scale, + image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) # Assigns anchors. - anchor_labeler = anchor.AnchorLabeler( - input_anchor, self._match_threshold, self._unmatched_threshold) + anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold, + self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( - boxes, - tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) + boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) labels['cls_targets'] = cls_targets labels['box_targets'] = box_targets labels['num_positives'] = num_positives diff --git a/official/vision/detection/dataloader/shapemask_parser.py b/official/vision/detection/dataloader/shapemask_parser.py index 3bc368c0ef290291405157b772ed523f3725e0a3..c0e79e071692adcb8f1328563ccb9bd40734df80 100644 --- a/official/vision/detection/dataloader/shapemask_parser.py +++ b/official/vision/detection/dataloader/shapemask_parser.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Data parser and processing. Parse image and ground truths in a dataset to training targets and package them @@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors. arXiv:1904.03239. """ - import tensorflow as tf from official.vision.detection.dataloader import anchor diff --git a/official/vision/detection/dataloader/tf_example_decoder.py b/official/vision/detection/dataloader/tf_example_decoder.py index f719a9168a4d3106600fffcc47c14cc90f3cadc7..e6472a36b9a31a8e8a98cecf10a6abf8ccb03985 100644 --- a/official/vision/detection/dataloader/tf_example_decoder.py +++ b/official/vision/detection/dataloader/tf_example_decoder.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tensorflow Example proto decoder for object detection. diff --git a/official/vision/detection/evaluation/__init__.py b/official/vision/detection/evaluation/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/evaluation/__init__.py +++ b/official/vision/detection/evaluation/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/evaluation/coco_evaluator.py b/official/vision/detection/evaluation/coco_evaluator.py index dc56a9332784dd66d5393bbf0d4c996fe5141c6d..108290bb7bef6633c4be579b8ca8c929b34213cb 100644 --- a/official/vision/detection/evaluation/coco_evaluator.py +++ b/official/vision/detection/evaluation/coco_evaluator.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """The COCO-style evaluator. The following snippet demonstrates the use of interfaces: @@ -31,9 +31,11 @@ from __future__ import division from __future__ import print_function import atexit +import copy import tempfile -import numpy as np + from absl import logging +import numpy as np from pycocotools import cocoeval import six import tensorflow as tf @@ -197,22 +199,21 @@ class COCOEvaluator(object): """Update and aggregate detection results and groundtruth data. Args: - predictions: a dictionary of numpy arrays including the fields below. - See different parsers under `../dataloader` for more details. + predictions: a dictionary of numpy arrays including the fields below. See + different parsers under `../dataloader` for more details. Required fields: - source_id: a numpy array of int or string of shape [batch_size]. - image_info [if `need_rescale_bboxes` is True]: a numpy array of float of shape [batch_size, 4, 2]. - - num_detections: a numpy array of - int of shape [batch_size]. + - num_detections: a numpy array of int of shape [batch_size]. - detection_boxes: a numpy array of float of shape [batch_size, K, 4]. - detection_classes: a numpy array of int of shape [batch_size, K]. - detection_scores: a numpy array of float of shape [batch_size, K]. Optional fields: - - detection_masks: a numpy array of float of shape - [batch_size, K, mask_height, mask_width]. - groundtruths: a dictionary of numpy arrays including the fields below. - See also different parsers under `../dataloader` for more details. + - detection_masks: a numpy array of float of shape [batch_size, K, + mask_height, mask_width]. + groundtruths: a dictionary of numpy arrays including the fields below. See + also different parsers under `../dataloader` for more details. Required fields: - source_id: a numpy array of int or string of shape [batch_size]. - height: a numpy array of int of shape [batch_size]. @@ -222,12 +223,12 @@ class COCOEvaluator(object): - classes: a numpy array of int of shape [batch_size, K]. Optional fields: - is_crowds: a numpy array of int of shape [batch_size, K]. If the - field is absent, it is assumed that this instance is not crowd. - - areas: a numy array of float of shape [batch_size, K]. If the - field is absent, the area is calculated using either boxes or - masks depending on which one is available. - - masks: a numpy array of float of shape - [batch_size, K, mask_height, mask_width], + field is absent, it is assumed that this instance is not crowd. + - areas: a numy array of float of shape [batch_size, K]. If the field + is absent, the area is calculated using either boxes or masks + depending on which one is available. + - masks: a numpy array of float of shape [batch_size, K, mask_height, + mask_width], Raises: ValueError: if the required prediction or groundtruth fields are not @@ -258,6 +259,278 @@ class COCOEvaluator(object): self._groundtruths[k].append(v) +class OlnXclassEvaluator(COCOEvaluator): + """COCO evaluation metric class.""" + + def __init__(self, annotation_file, include_mask, need_rescale_bboxes=True, + use_category=True, seen_class='all'): + """Constructs COCO evaluation class. + + The class provides the interface to metrics_fn in TPUEstimator. The + _update_op() takes detections from each image and push them to + self.detections. The _evaluate() loads a JSON file in COCO annotation format + as the groundtruths and runs COCO evaluation. + + Args: + annotation_file: a JSON file that stores annotations of the eval dataset. + If `annotation_file` is None, groundtruth annotations will be loaded + from the dataloader. + include_mask: a boolean to indicate whether or not to include the mask + eval. + need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back + to absolute values (`image_info` is needed in this case). + use_category: if `False`, treat all object in all classes in one + foreground category. + seen_class: 'all' or 'voc' or 'nonvoc' + """ + super(OlnXclassEvaluator, self).__init__( + annotation_file=annotation_file, + include_mask=include_mask, + need_rescale_bboxes=need_rescale_bboxes) + self._use_category = use_category + self._seen_class = seen_class + self._seen_class_ids = class_utils.coco_split_class_ids(seen_class) + self._metric_names = [ + 'AP', 'AP50', 'AP75', + 'APs', 'APm', 'APl', + 'ARmax10', 'ARmax20', 'ARmax50', 'ARmax100', 'ARmax200', + 'ARmax10s', 'ARmax10m', 'ARmax10l' + ] + if self._seen_class != 'all': + self._metric_names.extend([ + 'AP_seen', 'AP50_seen', 'AP75_seen', + 'APs_seen', 'APm_seen', 'APl_seen', + 'ARmax10_seen', 'ARmax20_seen', 'ARmax50_seen', + 'ARmax100_seen', 'ARmax200_seen', + 'ARmax10s_seen', 'ARmax10m_seen', 'ARmax10l_seen', + + 'AP_novel', 'AP50_novel', 'AP75_novel', + 'APs_novel', 'APm_novel', 'APl_novel', + 'ARmax10_novel', 'ARmax20_novel', 'ARmax50_novel', + 'ARmax100_novel', 'ARmax200_novel', + 'ARmax10s_novel', 'ARmax10m_novel', 'ARmax10l_novel', + ]) + if self._include_mask: + mask_metric_names = ['mask_' + x for x in self._metric_names] + self._metric_names.extend(mask_metric_names) + self._required_prediction_fields.extend(['detection_masks']) + self._required_groundtruth_fields.extend(['masks']) + + self.reset() + + def evaluate(self): + """Evaluates with detections from all images with COCO API. + + Returns: + coco_metric: float numpy array with shape [24] representing the + coco-style evaluation metrics (box and mask). + """ + if not self._annotation_file: + logging.info('Thre is no annotation_file in COCOEvaluator.') + gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset( + self._groundtruths) + coco_gt = coco_utils.COCOWrapper( + eval_type=('mask' if self._include_mask else 'box'), + gt_dataset=gt_dataset) + else: + logging.info('Using annotation file: %s', self._annotation_file) + coco_gt = self._coco_gt + + coco_predictions = coco_utils.convert_predictions_to_coco_annotations( + self._predictions) + coco_dt = coco_gt.loadRes(predictions=coco_predictions) + image_ids = [ann['image_id'] for ann in coco_predictions] + # Class manipulation: 'all' split samples -> ignored_split = 0. + for idx, ann in enumerate(coco_gt.dataset['annotations']): + coco_gt.dataset['annotations'][idx]['ignored_split'] = 0 + coco_eval = cocoeval.OlnCOCOevalXclassWrapper( + coco_gt, coco_dt, iou_type='bbox') + coco_eval.params.maxDets = [10, 20, 50, 100, 200] + coco_eval.params.imgIds = image_ids + coco_eval.params.useCats = 0 if not self._use_category else 1 + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + coco_metrics = coco_eval.stats + + if self._include_mask: + mcoco_eval = cocoeval.OlnCOCOevalXclassWrapper( + coco_gt, coco_dt, iou_type='segm') + mcoco_eval.params.maxDets = [10, 20, 50, 100, 200] + mcoco_eval.params.imgIds = image_ids + mcoco_eval.params.useCats = 0 if not self._use_category else 1 + mcoco_eval.evaluate() + mcoco_eval.accumulate() + mcoco_eval.summarize() + mask_coco_metrics = mcoco_eval.stats + + if self._include_mask: + metrics = np.hstack((coco_metrics, mask_coco_metrics)) + else: + metrics = coco_metrics + + if self._seen_class != 'all': + # for seen class eval, samples of novel_class are ignored. + coco_gt_seen = copy.deepcopy(coco_gt) + for idx, ann in enumerate(coco_gt.dataset['annotations']): + if ann['category_id'] in self._seen_class_ids: + coco_gt_seen.dataset['annotations'][idx]['ignored_split'] = 0 + else: + coco_gt_seen.dataset['annotations'][idx]['ignored_split'] = 1 + coco_eval_seen = cocoeval.OlnCOCOevalXclassWrapper( + coco_gt_seen, coco_dt, iou_type='bbox') + coco_eval_seen.params.maxDets = [10, 20, 50, 100, 200] + coco_eval_seen.params.imgIds = image_ids + coco_eval_seen.params.useCats = 0 if not self._use_category else 1 + coco_eval_seen.evaluate() + coco_eval_seen.accumulate() + coco_eval_seen.summarize() + coco_metrics_seen = coco_eval_seen.stats + if self._include_mask: + mcoco_eval_seen = cocoeval.OlnCOCOevalXclassWrapper( + coco_gt_seen, coco_dt, iou_type='segm') + mcoco_eval_seen.params.maxDets = [10, 20, 50, 100, 200] + mcoco_eval_seen.params.imgIds = image_ids + mcoco_eval_seen.params.useCats = 0 if not self._use_category else 1 + mcoco_eval_seen.evaluate() + mcoco_eval_seen.accumulate() + mcoco_eval_seen.summarize() + mask_coco_metrics_seen = mcoco_eval_seen.stats + + # for novel class eval, samples of seen_class are ignored. + coco_gt_novel = copy.deepcopy(coco_gt) + for idx, ann in enumerate(coco_gt.dataset['annotations']): + if ann['category_id'] in self._seen_class_ids: + coco_gt_novel.dataset['annotations'][idx]['ignored_split'] = 1 + else: + coco_gt_novel.dataset['annotations'][idx]['ignored_split'] = 0 + coco_eval_novel = cocoeval.OlnCOCOevalXclassWrapper( + coco_gt_novel, coco_dt, iou_type='bbox') + coco_eval_novel.params.maxDets = [10, 20, 50, 100, 200] + coco_eval_novel.params.imgIds = image_ids + coco_eval_novel.params.useCats = 0 if not self._use_category else 1 + coco_eval_novel.evaluate() + coco_eval_novel.accumulate() + coco_eval_novel.summarize() + coco_metrics_novel = coco_eval_novel.stats + if self._include_mask: + mcoco_eval_novel = cocoeval.OlnCOCOevalXclassWrapper( + coco_gt_novel, coco_dt, iou_type='segm') + mcoco_eval_novel.params.maxDets = [10, 20, 50, 100, 200] + mcoco_eval_novel.params.imgIds = image_ids + mcoco_eval_novel.params.useCats = 0 if not self._use_category else 1 + mcoco_eval_novel.evaluate() + mcoco_eval_novel.accumulate() + mcoco_eval_novel.summarize() + mask_coco_metrics_novel = mcoco_eval_novel.stats + + # Combine all splits. + if self._include_mask: + metrics = np.hstack(( + coco_metrics, coco_metrics_seen, coco_metrics_novel, + mask_coco_metrics, mask_coco_metrics_seen, mask_coco_metrics_novel)) + else: + metrics = np.hstack(( + coco_metrics, coco_metrics_seen, coco_metrics_novel)) + + # Cleans up the internal variables in order for a fresh eval next time. + self.reset() + + metrics_dict = {} + for i, name in enumerate(self._metric_names): + metrics_dict[name] = metrics[i].astype(np.float32) + return metrics_dict + + +class OlnXdataEvaluator(OlnXclassEvaluator): + """COCO evaluation metric class.""" + + def __init__(self, annotation_file, include_mask, need_rescale_bboxes=True, + use_category=True, seen_class='all'): + """Constructs COCO evaluation class. + + The class provides the interface to metrics_fn in TPUEstimator. The + _update_op() takes detections from each image and push them to + self.detections. The _evaluate() loads a JSON file in COCO annotation format + as the groundtruths and runs COCO evaluation. + + Args: + annotation_file: a JSON file that stores annotations of the eval dataset. + If `annotation_file` is None, groundtruth annotations will be loaded + from the dataloader. + include_mask: a boolean to indicate whether or not to include the mask + eval. + need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back + to absolute values (`image_info` is needed in this case). + use_category: if `False`, treat all object in all classes in one + foreground category. + seen_class: 'all' or 'voc' or 'nonvoc' + """ + super(OlnXdataEvaluator, self).__init__( + annotation_file=annotation_file, + include_mask=include_mask, + need_rescale_bboxes=need_rescale_bboxes, + use_category=False, + seen_class='all') + + def evaluate(self): + """Evaluates with detections from all images with COCO API. + + Returns: + coco_metric: float numpy array with shape [24] representing the + coco-style evaluation metrics (box and mask). + """ + if not self._annotation_file: + logging.info('Thre is no annotation_file in COCOEvaluator.') + gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset( + self._groundtruths) + coco_gt = coco_utils.COCOWrapper( + eval_type=('mask' if self._include_mask else 'box'), + gt_dataset=gt_dataset) + else: + logging.info('Using annotation file: %s', self._annotation_file) + coco_gt = self._coco_gt + coco_predictions = coco_utils.convert_predictions_to_coco_annotations( + self._predictions) + coco_dt = coco_gt.loadRes(predictions=coco_predictions) + image_ids = [ann['image_id'] for ann in coco_predictions] + # Class manipulation: 'all' split samples -> ignored_split = 0. + for idx, _ in enumerate(coco_gt.dataset['annotations']): + coco_gt.dataset['annotations'][idx]['ignored_split'] = 0 + coco_eval = cocoeval.OlnCOCOevalWrapper(coco_gt, coco_dt, iou_type='bbox') + coco_eval.params.maxDets = [10, 20, 50, 100, 200] + coco_eval.params.imgIds = image_ids + coco_eval.params.useCats = 0 if not self._use_category else 1 + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + coco_metrics = coco_eval.stats + + if self._include_mask: + mcoco_eval = cocoeval.OlnCOCOevalWrapper(coco_gt, coco_dt, + iou_type='segm') + mcoco_eval.params.maxDets = [10, 20, 50, 100, 200] + mcoco_eval.params.imgIds = image_ids + mcoco_eval.params.useCats = 0 if not self._use_category else 1 + mcoco_eval.evaluate() + mcoco_eval.accumulate() + mcoco_eval.summarize() + mask_coco_metrics = mcoco_eval.stats + + if self._include_mask: + metrics = np.hstack((coco_metrics, mask_coco_metrics)) + else: + metrics = coco_metrics + + # Cleans up the internal variables in order for a fresh eval next time. + self.reset() + + metrics_dict = {} + for i, name in enumerate(self._metric_names): + metrics_dict[name] = metrics[i].astype(np.float32) + return metrics_dict + + class ShapeMaskCOCOEvaluator(COCOEvaluator): """COCO evaluation metric class for ShapeMask.""" @@ -318,8 +591,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator): metrics = np.hstack((coco_metrics, mcoco_eval.stats)) else: mask_coco_metrics = mcoco_eval.category_stats - val_catg_idx = np.isin(mcoco_eval.params.catIds, - self._eval_categories) + val_catg_idx = np.isin(mcoco_eval.params.catIds, self._eval_categories) # Gather the valid evaluation of the eval categories. if np.any(val_catg_idx): mean_val_metrics = [] diff --git a/official/vision/detection/evaluation/coco_utils.py b/official/vision/detection/evaluation/coco_utils.py index 8155d1fbb89ac143eb7cc03457a6645a5b5ab505..0f289d354bacfc97e48c2b7d5af9fb6f72feae77 100644 --- a/official/vision/detection/evaluation/coco_utils.py +++ b/official/vision/detection/evaluation/coco_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Util functions related to pycocotools and COCO eval.""" from __future__ import absolute_import @@ -237,7 +237,7 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None): (boxes[j, k, 3] - boxes[j, k, 1]) * (boxes[j, k, 2] - boxes[j, k, 0])) if 'masks' in groundtruths: - mask = Image.open(six.StringIO(groundtruths['masks'][i][j, k])) + mask = Image.open(six.BytesIO(groundtruths['masks'][i][j, k])) width, height = mask.size np_mask = ( np.array(mask.getdata()).reshape(height, width).astype(np.uint8)) diff --git a/official/vision/detection/evaluation/factory.py b/official/vision/detection/evaluation/factory.py index 4d44bf177071a97b663b41410a05d59d59f04456..fcc543bfd00b72c08540088f74d89e410569d020 100644 --- a/official/vision/detection/evaluation/factory.py +++ b/official/vision/detection/evaluation/factory.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Evaluator factory.""" from __future__ import absolute_import @@ -29,6 +29,18 @@ def evaluator_generator(params): elif params.type == 'box_and_mask': evaluator = coco_evaluator.COCOEvaluator( annotation_file=params.val_json_file, include_mask=True) + elif params.type == 'oln_xclass_box': + evaluator = coco_evaluator.OlnXclassEvaluator( + annotation_file=params.val_json_file, include_mask=False, + use_category=False, seen_class=params.seen_class,) + elif params.type == 'oln_xclass_box_and_mask': + evaluator = coco_evaluator.OlnXclassEvaluator( + annotation_file=params.val_json_file, include_mask=True, + use_category=False, seen_class=params.seen_class,) + elif params.type == 'oln_xdata_box': + evaluator = coco_evaluator.OlnXdataEvaluator( + annotation_file=params.val_json_file, include_mask=False, + use_category=False, seen_class='all',) elif params.type == 'shapemask_box_and_mask': evaluator = coco_evaluator.ShapeMaskCOCOEvaluator( mask_eval_class=params.mask_eval_class, diff --git a/official/vision/detection/executor/__init__.py b/official/vision/detection/executor/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/executor/__init__.py +++ b/official/vision/detection/executor/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/executor/detection_executor.py b/official/vision/detection/executor/detection_executor.py index 26ff028cf67d6df37e5a0af31bc2e54844231fcd..91c557b6c37eb8de06706bf76fd896aee4683725 100644 --- a/official/vision/detection/executor/detection_executor.py +++ b/official/vision/detection/executor/detection_executor.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """An executor class for running model on TensorFlow 2.0.""" from __future__ import absolute_import @@ -22,7 +22,7 @@ from __future__ import print_function from absl import logging import tensorflow as tf -from official.modeling.training import distributed_executor as executor +from official.vision.detection.executor import distributed_executor as executor from official.vision.detection.utils.object_detection import visualization_utils @@ -63,10 +63,9 @@ class DetectionDistributedExecutor(executor.DistributedExecutor): trainable_variables) logging.info('Filter trainable variables from %d to %d', len(model.trainable_variables), len(trainable_variables)) - _update_state = lambda labels, outputs: None + update_state_fn = lambda labels, outputs: None if isinstance(metric, tf.keras.metrics.Metric): - _update_state = lambda labels, outputs: metric.update_state( - labels, outputs) + update_state_fn = metric.update_state else: logging.error('Detection: train metric is not an instance of ' 'tf.keras.metrics.Metric.') @@ -82,10 +81,11 @@ class DetectionDistributedExecutor(executor.DistributedExecutor): for k, v in all_losses.items(): losses[k] = tf.reduce_mean(v) per_replica_loss = losses['total_loss'] / strategy.num_replicas_in_sync - _update_state(labels, outputs) + update_state_fn(labels, outputs) grads = tape.gradient(per_replica_loss, trainable_variables) - optimizer.apply_gradients(zip(grads, trainable_variables)) + clipped_grads, _ = tf.clip_by_global_norm(grads, clip_norm=1.0) + optimizer.apply_gradients(zip(clipped_grads, trainable_variables)) return losses return _replicated_step diff --git a/official/modeling/training/distributed_executor.py b/official/vision/detection/executor/distributed_executor.py similarity index 93% rename from official/modeling/training/distributed_executor.py rename to official/vision/detection/executor/distributed_executor.py index 11451260cdca52a9c9f4019010123c4d2b40e99e..8f8c861c99f2094756ca90052769bb2a8f2e1fe6 100644 --- a/official/modeling/training/distributed_executor.py +++ b/official/vision/detection/executor/distributed_executor.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Custom training loop for running TensorFlow 2.0 models.""" from __future__ import absolute_import @@ -31,7 +31,7 @@ import tensorflow as tf from typing import Optional, Dict, List, Text, Callable, Union, Iterator, Any from official.modeling.hyperparams import params_dict from official.utils import hyperparams_flags -from official.utils.misc import distribution_utils +from official.common import distribute_utils from official.utils.misc import keras_utils FLAGS = flags.FLAGS @@ -63,8 +63,8 @@ def metrics_as_dict(metric): """Puts input metric(s) into a list. Args: - metric: metric(s) to be put into the list. `metric` could be a object, a - list or a dict of tf.keras.metrics.Metric or has the `required_method`. + metric: metric(s) to be put into the list. `metric` could be an object, a + list, or a dict of tf.keras.metrics.Metric or has the `required_method`. Returns: A dictionary of valid metrics. @@ -108,7 +108,7 @@ class SummaryWriter(object): def __init__(self, model_dir: Text, name: Text): """Inits SummaryWriter with paths. - Arguments: + Args: model_dir: the model folder path. name: the summary subfolder name. """ @@ -133,15 +133,9 @@ class SummaryWriter(object): class DistributedExecutor(object): - """Interface to train and eval models with tf.distribute.Strategy. - """ + """Interface to train and eval models with tf.distribute.Strategy.""" - def __init__(self, - strategy, - params, - model_fn, - loss_fn, - is_multi_host=False): + def __init__(self, strategy, params, model_fn, loss_fn, is_multi_host=False): """Constructor. Args: @@ -213,8 +207,7 @@ class DistributedExecutor(object): # across workers. Since Dataset instance cannot be cloned in eager mode, # we instead pass callable that returns a dataset. if self._is_multi_host: - return iter( - strategy.experimental_distribute_datasets_from_function(input_fn)) + return iter(strategy.distribute_datasets_from_function(input_fn)) else: input_data = input_fn() return iter(strategy.experimental_distribute_dataset(input_data)) @@ -293,8 +286,7 @@ class DistributedExecutor(object): raise ValueError('steps should be an Tensor. Python object may cause ' 'retracing.') - per_replica_losses = strategy.run( - replicated_step, args=(next(iterator),)) + per_replica_losses = strategy.run(replicated_step, args=(next(iterator),)) for _ in tf.range(num_steps - 1): per_replica_losses = strategy.run( replicated_step, args=(next(iterator),)) @@ -351,7 +343,8 @@ class DistributedExecutor(object): train_input_fn: (params: dict) -> tf.data.Dataset training data input function. eval_input_fn: (Optional) same type as train_input_fn. If not None, will - trigger evaluting metric on eval data. If None, will not run eval step. + trigger evaluating metric on eval data. If None, will not run the eval + step. model_dir: the folder path for model checkpoints. total_steps: total training steps. iterations_per_loop: train steps per loop. After each loop, this job will @@ -367,6 +360,7 @@ class DistributedExecutor(object): available checkpoints. If `False`, will do the evaluation once after the final step. save_config: bool. Whether to save params to model_dir. + Returns: The training loss and eval metrics. """ @@ -476,16 +470,15 @@ class DistributedExecutor(object): # Step-0 operations if current_step == 0 and not latest_checkpoint_file: - _save_checkpoint( - checkpoint, model_dir, checkpoint_name.format(step=current_step)) + _save_checkpoint(checkpoint, model_dir, + checkpoint_name.format(step=current_step)) if test_step: eval_iterator = self._get_input_iterator(eval_input_fn, strategy) - eval_metric_result = self._run_evaluation( - test_step, current_step, eval_metric, eval_iterator) - logging.info( - 'Step: %s evalation metric = %s.', current_step, eval_metric_result) - test_summary_writer( - metrics=eval_metric_result, step=optimizer.iterations) + eval_metric_result = self._run_evaluation(test_step, current_step, + eval_metric, eval_iterator) + logging.info('Step: %s evalation metric = %s.', current_step, + eval_metric_result) + test_summary_writer(metrics=eval_metric_result, step=optimizer.iterations) reset_states(eval_metric) logging.info('Training started') @@ -518,8 +511,7 @@ class DistributedExecutor(object): else: train_metric_result.update({'learning_rate': optimizer.lr.numpy()}) logging.info('Train Step: %d/%d / loss = %s / training metric = %s', - current_step, total_steps, train_loss, - train_metric_result) + current_step, total_steps, train_loss, train_metric_result) train_summary_writer( metrics=train_metric_result, step=optimizer.iterations) @@ -560,8 +552,7 @@ class DistributedExecutor(object): eval_metric_result = self._run_evaluation(test_step, current_step, eval_metric, eval_iterator) logging.info('Final evaluation metric = %s.', eval_metric_result) - test_summary_writer( - metrics=eval_metric_result, step=optimizer.iterations) + test_summary_writer(metrics=eval_metric_result, step=optimizer.iterations) self.train_summary_writer.close() self.eval_summary_writer.close() @@ -672,7 +663,7 @@ class DistributedExecutor(object): raise ValueError('if `eval_metric_fn` is specified, ' 'eval_metric_fn must be a callable.') - old_phrase = tf.keras.backend.learning_phase() + old_phase = tf.keras.backend.learning_phase() tf.keras.backend.set_learning_phase(0) params = self._params strategy = self._strategy @@ -695,10 +686,10 @@ class DistributedExecutor(object): reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path) current_step = reader.get_tensor( 'optimizer/iter/.ATTRIBUTES/VARIABLE_VALUE') - logging.info( - 'Checkpoint file %s found and restoring from ' - 'checkpoint', checkpoint_path) - checkpoint.restore(checkpoint_path) + logging.info('Checkpoint file %s found and restoring from ' + 'checkpoint', checkpoint_path) + status = checkpoint.restore(checkpoint_path) + status.expect_partial().assert_existing_objects_matched() self.global_train_step = model.optimizer.iterations eval_iterator = self._get_input_iterator(eval_input_fn, strategy) @@ -709,7 +700,7 @@ class DistributedExecutor(object): summary_writer(metrics=eval_metric_result, step=current_step) reset_states(eval_metric) - tf.keras.backend.set_learning_phase(old_phrase) + tf.keras.backend.set_learning_phase(old_phase) return eval_metric_result, current_step def predict(self): @@ -753,18 +744,18 @@ class ExecutorBuilder(object): """ def __init__(self, strategy_type=None, strategy_config=None): - _ = distribution_utils.configure_cluster( - strategy_config.worker_hosts, strategy_config.task_index) + _ = distribute_utils.configure_cluster(strategy_config.worker_hosts, + strategy_config.task_index) """Constructor. Args: strategy_type: string. One of 'tpu', 'mirrored', 'multi_worker_mirrored'. - If None. User is responsible to set the strategy before calling + If None, the user is responsible to set the strategy before calling build_executor(...). strategy_config: necessary config for constructing the proper Strategy. Check strategy_flags_dict() for examples of the structure. """ - self._strategy = distribution_utils.get_distribution_strategy( + self._strategy = distribute_utils.get_distribution_strategy( distribution_strategy=strategy_type, num_gpus=strategy_config.num_gpus, all_reduce_alg=strategy_config.all_reduce_alg, diff --git a/official/vision/detection/main.py b/official/vision/detection/main.py index 542be3a1dcc73f82719af2d60dc9abd210787931..6bfdd2906ca67b95bc4086d542066929f0539c85 100644 --- a/official/vision/detection/main.py +++ b/official/vision/detection/main.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,34 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Main function to train various object detection models.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Main function to train various object detection models.""" import functools import pprint -# pylint: disable=g-bad-import-order -import tensorflow as tf - from absl import app from absl import flags from absl import logging -# pylint: enable=g-bad-import-order +import tensorflow as tf +from official.common import distribute_utils from official.modeling.hyperparams import params_dict -from official.modeling.training import distributed_executor as executor from official.utils import hyperparams_flags from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils from official.vision.detection.configs import factory as config_factory from official.vision.detection.dataloader import input_reader from official.vision.detection.dataloader import mode_keys as ModeKeys +from official.vision.detection.executor import distributed_executor as executor from official.vision.detection.executor.detection_executor import DetectionDistributedExecutor from official.vision.detection.modeling import factory as model_factory @@ -48,7 +40,9 @@ flags_core.define_log_steps() flags.DEFINE_bool('enable_xla', default=False, help='Enable XLA for GPU') flags.DEFINE_string( - 'mode', default='train', help='Mode to run: `train` or `eval`.') + 'mode', + default='train', + help='Mode to run: `train`, `eval` or `eval_once`.') flags.DEFINE_string( 'model', default='retinanet', @@ -76,9 +70,7 @@ def run_executor(params, """Runs the object detection model on distribution strategy defined by the user.""" if params.architecture.use_bfloat16: - policy = tf.compat.v2.keras.mixed_precision.experimental.Policy( - 'mixed_bfloat16') - tf.compat.v2.keras.mixed_precision.experimental.set_policy(policy) + tf.compat.v2.keras.mixed_precision.set_global_policy('mixed_bfloat16') model_builder = model_factory.model_generator(params) @@ -86,9 +78,9 @@ def run_executor(params, strategy = prebuilt_strategy else: strategy_config = params.strategy_config - distribution_utils.configure_cluster(strategy_config.worker_hosts, - strategy_config.task_index) - strategy = distribution_utils.get_distribution_strategy( + distribute_utils.configure_cluster(strategy_config.worker_hosts, + strategy_config.task_index) + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=params.strategy_type, num_gpus=strategy_config.num_gpus, all_reduce_alg=strategy_config.all_reduce_alg, diff --git a/official/vision/detection/modeling/__init__.py b/official/vision/detection/modeling/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/modeling/__init__.py +++ b/official/vision/detection/modeling/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/modeling/architecture/__init__.py b/official/vision/detection/modeling/architecture/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/modeling/architecture/__init__.py +++ b/official/vision/detection/modeling/architecture/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/modeling/architecture/factory.py b/official/vision/detection/modeling/architecture/factory.py index 403d815eaafd91feb999d13b034c864d99804963..f39949d26ffd0f1ba3ac195b4c059744b6c99579 100644 --- a/official/vision/detection/modeling/architecture/factory.py +++ b/official/vision/detection/modeling/architecture/factory.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Model architecture factory.""" from __future__ import absolute_import @@ -77,11 +77,13 @@ def multilevel_features_generator(params): def retinanet_head_generator(params): """Generator function for RetinaNet head architecture.""" head_params = params.retinanet_head + anchors_per_location = params.anchor.num_scales * len( + params.anchor.aspect_ratios) return heads.RetinanetHead( params.architecture.min_level, params.architecture.max_level, params.architecture.num_classes, - head_params.anchors_per_location, + anchors_per_location, head_params.num_convs, head_params.num_filters, head_params.use_separable_conv, @@ -91,10 +93,29 @@ def retinanet_head_generator(params): def rpn_head_generator(params): """Generator function for RPN head architecture.""" head_params = params.rpn_head + anchors_per_location = params.anchor.num_scales * len( + params.anchor.aspect_ratios) return heads.RpnHead( params.architecture.min_level, params.architecture.max_level, - head_params.anchors_per_location, + anchors_per_location, + head_params.num_convs, + head_params.num_filters, + head_params.use_separable_conv, + params.norm_activation.activation, + head_params.use_batch_norm, + norm_activation=norm_activation_generator(params.norm_activation)) + + +def oln_rpn_head_generator(params): + """Generator function for OLN-proposal (OLN-RPN) head architecture.""" + head_params = params.rpn_head + anchors_per_location = params.anchor.num_scales * len( + params.anchor.aspect_ratios) + return heads.OlnRpnHead( + params.architecture.min_level, + params.architecture.max_level, + anchors_per_location, head_params.num_convs, head_params.num_filters, head_params.use_separable_conv, @@ -118,6 +139,21 @@ def fast_rcnn_head_generator(params): norm_activation=norm_activation_generator(params.norm_activation)) +def oln_box_score_head_generator(params): + """Generator function for Scoring Fast R-CNN head architecture.""" + head_params = params.frcnn_head + return heads.OlnBoxScoreHead( + params.architecture.num_classes, + head_params.num_convs, + head_params.num_filters, + head_params.use_separable_conv, + head_params.num_fcs, + head_params.fc_dims, + params.norm_activation.activation, + head_params.use_batch_norm, + norm_activation=norm_activation_generator(params.norm_activation)) + + def mask_rcnn_head_generator(params): """Generator function for Mask R-CNN head architecture.""" head_params = params.mrcnn_head @@ -132,6 +168,20 @@ def mask_rcnn_head_generator(params): norm_activation=norm_activation_generator(params.norm_activation)) +def oln_mask_score_head_generator(params): + """Generator function for Scoring Mask R-CNN head architecture.""" + head_params = params.mrcnn_head + return heads.OlnMaskScoreHead( + params.architecture.num_classes, + params.architecture.mask_target_size, + head_params.num_convs, + head_params.num_filters, + head_params.use_separable_conv, + params.norm_activation.activation, + head_params.use_batch_norm, + norm_activation=norm_activation_generator(params.norm_activation)) + + def shapeprior_head_generator(params): """Generator function for shape prior head architecture.""" head_params = params.shapemask_head diff --git a/official/vision/detection/modeling/architecture/fpn.py b/official/vision/detection/modeling/architecture/fpn.py index b968dc2e152eb66e2df7ca7673b506c123b59d0f..3cfb56dbdec6c6b09e4cc7f6bbd70b054f6cbc10 100644 --- a/official/vision/detection/modeling/architecture/fpn.py +++ b/official/vision/detection/modeling/architecture/fpn.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Feature Pyramid Networks. Feature Pyramid Networks were proposed in: @@ -28,7 +28,6 @@ import functools import tensorflow as tf -from tensorflow.python.keras import backend from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.ops import spatial_transform_ops @@ -120,7 +119,7 @@ class Fpn(object): 'The minimum backbone level %d should be '%(min(input_levels)) + 'less or equal to FPN minimum level %d.:'%(self._min_level)) backbone_max_level = min(max(input_levels), self._max_level) - with backend.get_graph().as_default(), tf.name_scope('fpn'): + with tf.name_scope('fpn'): # Adds lateral connections. feats_lateral = {} for level in range(self._min_level, backbone_max_level + 1): diff --git a/official/vision/detection/modeling/architecture/heads.py b/official/vision/detection/modeling/architecture/heads.py index 7f6954aecbbef8e8807345e643555ba222b0e1b9..8eb89892d67bd33541c6586cb035cdffbdc31ad8 100644 --- a/official/vision/detection/modeling/architecture/heads.py +++ b/official/vision/detection/modeling/architecture/heads.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Classes to build various prediction heads in all supported models.""" from __future__ import absolute_import @@ -22,7 +22,7 @@ import functools import numpy as np import tensorflow as tf -from tensorflow.python.keras import backend + from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.ops import spatial_transform_ops @@ -30,17 +30,17 @@ from official.vision.detection.ops import spatial_transform_ops class RpnHead(tf.keras.layers.Layer): """Region Proposal Network head.""" - def __init__(self, - min_level, - max_level, - anchors_per_location, - num_convs=2, - num_filters=256, - use_separable_conv=False, - activation='relu', - use_batch_norm=True, - norm_activation=nn_ops.norm_activation_builder( - activation='relu')): + def __init__( + self, + min_level, + max_level, + anchors_per_location, + num_convs=2, + num_filters=256, + use_separable_conv=False, + activation='relu', + use_batch_norm=True, + norm_activation=nn_ops.norm_activation_builder(activation='relu')): """Initialize params to build Region Proposal Network head. Args: @@ -56,9 +56,11 @@ class RpnHead(tf.keras.layers.Layer): is used. activation: activation function. Support 'relu' and 'swish'. use_batch_norm: 'bool', indicating whether batchnorm layers are added. - norm_activation: an operation that includes a normalization layer - followed by an optional activation layer. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. """ + super().__init__(autocast=False) + self._min_level = min_level self._max_level = max_level self._anchors_per_location = anchors_per_location @@ -122,12 +124,12 @@ class RpnHead(tf.keras.layers.Layer): return scores, bboxes - def __call__(self, features, is_training=None): + def call(self, features, is_training=None): scores_outputs = {} box_outputs = {} - with backend.get_graph().as_default(), tf.name_scope('rpn_head'): + with tf.name_scope('rpn_head'): for level in range(self._min_level, self._max_level + 1): scores_output, box_output = self._shared_rpn_heads( features[level], self._anchors_per_location, level, is_training) @@ -136,20 +138,144 @@ class RpnHead(tf.keras.layers.Layer): return scores_outputs, box_outputs +class OlnRpnHead(tf.keras.layers.Layer): + """Region Proposal Network for Object Localization Network (OLN).""" + + def __init__( + self, + min_level, + max_level, + anchors_per_location, + num_convs=2, + num_filters=256, + use_separable_conv=False, + activation='relu', + use_batch_norm=True, + norm_activation=nn_ops.norm_activation_builder(activation='relu')): + """Initialize params to build Region Proposal Network head. + + Args: + min_level: `int` number of minimum feature level. + max_level: `int` number of maximum feature level. + anchors_per_location: `int` number of number of anchors per pixel + location. + num_convs: `int` number that represents the number of the intermediate + conv layers before the prediction. + num_filters: `int` number that represents the number of filters of the + intermediate conv layers. + use_separable_conv: `bool`, indicating whether the separable conv layers + is used. + activation: activation function. Support 'relu' and 'swish'. + use_batch_norm: 'bool', indicating whether batchnorm layers are added. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. + """ + self._min_level = min_level + self._max_level = max_level + self._anchors_per_location = anchors_per_location + if activation == 'relu': + self._activation_op = tf.nn.relu + elif activation == 'swish': + self._activation_op = tf.nn.swish + else: + raise ValueError('Unsupported activation `{}`.'.format(activation)) + self._use_batch_norm = use_batch_norm + + if use_separable_conv: + self._conv2d_op = functools.partial( + tf.keras.layers.SeparableConv2D, + depth_multiplier=1, + bias_initializer=tf.zeros_initializer()) + else: + self._conv2d_op = functools.partial( + tf.keras.layers.Conv2D, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + bias_initializer=tf.zeros_initializer()) + + self._rpn_conv = self._conv2d_op( + num_filters, + kernel_size=(3, 3), + strides=(1, 1), + activation=(None if self._use_batch_norm else self._activation_op), + padding='same', + name='rpn') + self._rpn_class_conv = self._conv2d_op( + anchors_per_location, + kernel_size=(1, 1), + strides=(1, 1), + padding='valid', + name='rpn-class') + self._rpn_box_conv = self._conv2d_op( + 4 * anchors_per_location, + kernel_size=(1, 1), + strides=(1, 1), + padding='valid', + name='rpn-box-lrtb') + self._rpn_center_conv = self._conv2d_op( + anchors_per_location, + kernel_size=(1, 1), + strides=(1, 1), + padding='valid', + name='rpn-centerness') + + self._norm_activations = {} + if self._use_batch_norm: + for level in range(self._min_level, self._max_level + 1): + self._norm_activations[level] = norm_activation(name='rpn-l%d-bn' % + level) + + def _shared_rpn_heads(self, features, anchors_per_location, level, + is_training): + """Shared RPN heads.""" + features = self._rpn_conv(features) + if self._use_batch_norm: + # The batch normalization layers are not shared between levels. + features = self._norm_activations[level]( + features, is_training=is_training) + # Feature L2 normalization for training stability + features = tf.math.l2_normalize( + features, + axis=-1, + name='rpn-norm',) + # Proposal classification scores + scores = self._rpn_class_conv(features) + # Proposal bbox regression deltas + bboxes = self._rpn_box_conv(features) + # Proposal centerness scores + centers = self._rpn_center_conv(features) + + return scores, bboxes, centers + + def __call__(self, features, is_training=None): + + scores_outputs = {} + box_outputs = {} + center_outputs = {} + + with tf.name_scope('rpn_head'): + for level in range(self._min_level, self._max_level + 1): + scores_output, box_output, center_output = self._shared_rpn_heads( + features[level], self._anchors_per_location, level, is_training) + scores_outputs[level] = scores_output + box_outputs[level] = box_output + center_outputs[level] = center_output + return scores_outputs, box_outputs, center_outputs + + class FastrcnnHead(tf.keras.layers.Layer): """Fast R-CNN box head.""" - def __init__(self, - num_classes, - num_convs=0, - num_filters=256, - use_separable_conv=False, - num_fcs=2, - fc_dims=1024, - activation='relu', - use_batch_norm=True, - norm_activation=nn_ops.norm_activation_builder( - activation='relu')): + def __init__( + self, + num_classes, + num_convs=0, + num_filters=256, + use_separable_conv=False, + num_fcs=2, + fc_dims=1024, + activation='relu', + use_batch_norm=True, + norm_activation=nn_ops.norm_activation_builder(activation='relu')): """Initialize params to build Fast R-CNN box head. Args: @@ -166,9 +292,11 @@ class FastrcnnHead(tf.keras.layers.Layer): layers. activation: activation function. Support 'relu' and 'swish'. use_batch_norm: 'bool', indicating whether batchnorm layers are added. - norm_activation: an operation that includes a normalization layer - followed by an optional activation layer. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. """ + super(FastrcnnHead, self).__init__(autocast=False) + self._num_classes = num_classes self._num_convs = num_convs @@ -206,7 +334,8 @@ class FastrcnnHead(tf.keras.layers.Layer): strides=(1, 1), padding='same', dilation_rate=(1, 1), - activation=(None if self._use_batch_norm else self._activation_op), + activation=(None + if self._use_batch_norm else self._activation_op), name='conv_{}'.format(i))) if self._use_batch_norm: self._conv_bn_ops.append(self._norm_activation()) @@ -217,7 +346,8 @@ class FastrcnnHead(tf.keras.layers.Layer): self._fc_ops.append( tf.keras.layers.Dense( units=self._fc_dims, - activation=(None if self._use_batch_norm else self._activation_op), + activation=(None + if self._use_batch_norm else self._activation_op), name='fc{}'.format(i))) if self._use_batch_norm: self._fc_bn_ops.append(self._norm_activation(fused=False)) @@ -233,12 +363,12 @@ class FastrcnnHead(tf.keras.layers.Layer): bias_initializer=tf.zeros_initializer(), name='box-predict') - def __call__(self, roi_features, is_training=None): + def call(self, roi_features, is_training=None): """Box and class branches for the Mask-RCNN model. Args: - roi_features: A ROI feature tensor of shape - [batch_size, num_rois, height_l, width_l, num_filters]. + roi_features: A ROI feature tensor of shape [batch_size, num_rois, + height_l, width_l, num_filters]. is_training: `boolean`, if True if model is in training mode. Returns: @@ -249,7 +379,8 @@ class FastrcnnHead(tf.keras.layers.Layer): predictions. """ - with backend.get_graph().as_default(), tf.name_scope('fast_rcnn_head'): + with tf.name_scope( + 'fast_rcnn_head'): # reshape inputs beofre FC. _, num_rois, height, width, filters = roi_features.get_shape().as_list() @@ -272,19 +403,163 @@ class FastrcnnHead(tf.keras.layers.Layer): return class_outputs, box_outputs +class OlnBoxScoreHead(tf.keras.layers.Layer): + """Box head of Object Localization Network (OLN).""" + + def __init__( + self, + num_classes, + num_convs=0, + num_filters=256, + use_separable_conv=False, + num_fcs=2, + fc_dims=1024, + activation='relu', + use_batch_norm=True, + norm_activation=nn_ops.norm_activation_builder(activation='relu')): + """Initialize params to build OLN box head. + + Args: + num_classes: a integer for the number of classes. + num_convs: `int` number that represents the number of the intermediate + conv layers before the FC layers. + num_filters: `int` number that represents the number of filters of the + intermediate conv layers. + use_separable_conv: `bool`, indicating whether the separable conv layers + is used. + num_fcs: `int` number that represents the number of FC layers before the + predictions. + fc_dims: `int` number that represents the number of dimension of the FC + layers. + activation: activation function. Support 'relu' and 'swish'. + use_batch_norm: 'bool', indicating whether batchnorm layers are added. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. + """ + self._num_classes = num_classes + + self._num_convs = num_convs + self._num_filters = num_filters + if use_separable_conv: + self._conv2d_op = functools.partial( + tf.keras.layers.SeparableConv2D, + depth_multiplier=1, + bias_initializer=tf.zeros_initializer()) + else: + self._conv2d_op = functools.partial( + tf.keras.layers.Conv2D, + kernel_initializer=tf.keras.initializers.VarianceScaling( + scale=2, mode='fan_out', distribution='untruncated_normal'), + bias_initializer=tf.zeros_initializer()) + + self._num_fcs = num_fcs + self._fc_dims = fc_dims + if activation == 'relu': + self._activation_op = tf.nn.relu + elif activation == 'swish': + self._activation_op = tf.nn.swish + else: + raise ValueError('Unsupported activation `{}`.'.format(activation)) + self._use_batch_norm = use_batch_norm + self._norm_activation = norm_activation + + self._conv_ops = [] + self._conv_bn_ops = [] + for i in range(self._num_convs): + self._conv_ops.append( + self._conv2d_op( + self._num_filters, + kernel_size=(3, 3), + strides=(1, 1), + padding='same', + dilation_rate=(1, 1), + activation=(None + if self._use_batch_norm else self._activation_op), + name='conv_{}'.format(i))) + if self._use_batch_norm: + self._conv_bn_ops.append(self._norm_activation()) + + self._fc_ops = [] + self._fc_bn_ops = [] + for i in range(self._num_fcs): + self._fc_ops.append( + tf.keras.layers.Dense( + units=self._fc_dims, + activation=(None + if self._use_batch_norm else self._activation_op), + name='fc{}'.format(i))) + if self._use_batch_norm: + self._fc_bn_ops.append(self._norm_activation(fused=False)) + + self._class_predict = tf.keras.layers.Dense( + self._num_classes, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + bias_initializer=tf.zeros_initializer(), + name='class-predict') + self._box_predict = tf.keras.layers.Dense( + self._num_classes * 4, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001), + bias_initializer=tf.zeros_initializer(), + name='box-predict') + self._score_predict = tf.keras.layers.Dense( + 1, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), + bias_initializer=tf.zeros_initializer(), + name='score-predict') + + def __call__(self, roi_features, is_training=None): + """Box and class branches for the Mask-RCNN model. + + Args: + roi_features: A ROI feature tensor of shape [batch_size, num_rois, + height_l, width_l, num_filters]. + is_training: `boolean`, if True if model is in training mode. + + Returns: + class_outputs: a tensor with a shape of + [batch_size, num_rois, num_classes], representing the class predictions. + box_outputs: a tensor with a shape of + [batch_size, num_rois, num_classes * 4], representing the box + predictions. + """ + + with tf.name_scope('fast_rcnn_head'): + # reshape inputs beofre FC. + _, num_rois, height, width, filters = roi_features.get_shape().as_list() + + net = tf.reshape(roi_features, [-1, height, width, filters]) + for i in range(self._num_convs): + net = self._conv_ops[i](net) + if self._use_batch_norm: + net = self._conv_bn_ops[i](net, is_training=is_training) + + filters = self._num_filters if self._num_convs > 0 else filters + net = tf.reshape(net, [-1, num_rois, height * width * filters]) + + for i in range(self._num_fcs): + net = self._fc_ops[i](net) + if self._use_batch_norm: + net = self._fc_bn_ops[i](net, is_training=is_training) + + class_outputs = self._class_predict(net) + box_outputs = self._box_predict(net) + score_outputs = self._score_predict(net) + return class_outputs, box_outputs, score_outputs + + class MaskrcnnHead(tf.keras.layers.Layer): """Mask R-CNN head.""" - def __init__(self, - num_classes, - mask_target_size, - num_convs=4, - num_filters=256, - use_separable_conv=False, - activation='relu', - use_batch_norm=True, - norm_activation=nn_ops.norm_activation_builder( - activation='relu')): + def __init__( + self, + num_classes, + mask_target_size, + num_convs=4, + num_filters=256, + use_separable_conv=False, + activation='relu', + use_batch_norm=True, + norm_activation=nn_ops.norm_activation_builder(activation='relu')): """Initialize params to build Fast R-CNN head. Args: @@ -298,9 +573,10 @@ class MaskrcnnHead(tf.keras.layers.Layer): is used. activation: activation function. Support 'relu' and 'swish'. use_batch_norm: 'bool', indicating whether batchnorm layers are added. - norm_activation: an operation that includes a normalization layer - followed by an optional activation layer. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. """ + super(MaskrcnnHead, self).__init__(autocast=False) self._num_classes = num_classes self._mask_target_size = mask_target_size @@ -334,7 +610,8 @@ class MaskrcnnHead(tf.keras.layers.Layer): strides=(1, 1), padding='same', dilation_rate=(1, 1), - activation=(None if self._use_batch_norm else self._activation_op), + activation=(None + if self._use_batch_norm else self._activation_op), name='mask-conv-l%d' % i)) self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose( self._num_filters, @@ -347,14 +624,22 @@ class MaskrcnnHead(tf.keras.layers.Layer): bias_initializer=tf.zeros_initializer(), name='conv5-mask') - def __call__(self, roi_features, class_indices, is_training=None): + with tf.name_scope('mask_head'): + self._mask_conv2d_op = self._conv2d_op( + self._num_classes, + kernel_size=(1, 1), + strides=(1, 1), + padding='valid', + name='mask_fcn_logits') + + def call(self, roi_features, class_indices, is_training=None): """Mask branch for the Mask-RCNN model. Args: - roi_features: A ROI feature tensor of shape - [batch_size, num_rois, height_l, width_l, num_filters]. - class_indices: a Tensor of shape [batch_size, num_rois], indicating - which class the ROI is. + roi_features: A ROI feature tensor of shape [batch_size, num_rois, + height_l, width_l, num_filters]. + class_indices: a Tensor of shape [batch_size, num_rois], indicating which + class the ROI is. is_training: `boolean`, if True if model is in training mode. Returns: @@ -368,61 +653,54 @@ class MaskrcnnHead(tf.keras.layers.Layer): boxes is not 4. """ - with backend.get_graph().as_default(): - with tf.name_scope('mask_head'): - _, num_rois, height, width, filters = roi_features.get_shape().as_list() - net = tf.reshape(roi_features, [-1, height, width, filters]) - - for i in range(self._num_convs): - net = self._conv2d_ops[i](net) - if self._use_batch_norm: - net = self._norm_activation()(net, is_training=is_training) + with tf.name_scope('mask_head'): + _, num_rois, height, width, filters = roi_features.get_shape().as_list() + net = tf.reshape(roi_features, [-1, height, width, filters]) - net = self._mask_conv_transpose(net) + for i in range(self._num_convs): + net = self._conv2d_ops[i](net) if self._use_batch_norm: net = self._norm_activation()(net, is_training=is_training) - mask_outputs = self._conv2d_op( - self._num_classes, - kernel_size=(1, 1), - strides=(1, 1), - padding='valid', - name='mask_fcn_logits')( - net) - mask_outputs = tf.reshape(mask_outputs, [ - -1, num_rois, self._mask_target_size, self._mask_target_size, - self._num_classes - ]) - - with tf.name_scope('masks_post_processing'): - # TODO(pengchong): Figure out the way not to use the static inferred - # batch size. - batch_size, num_masks = class_indices.get_shape().as_list() - mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3]) - # Contructs indices for gather. - batch_indices = tf.tile( - tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks]) - mask_indices = tf.tile( - tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1]) - gather_indices = tf.stack( - [batch_indices, mask_indices, class_indices], axis=2) - mask_outputs = tf.gather_nd(mask_outputs, gather_indices) + net = self._mask_conv_transpose(net) + if self._use_batch_norm: + net = self._norm_activation()(net, is_training=is_training) + + mask_outputs = self._mask_conv2d_op(net) + mask_outputs = tf.reshape(mask_outputs, [ + -1, num_rois, self._mask_target_size, self._mask_target_size, + self._num_classes + ]) + + with tf.name_scope('masks_post_processing'): + # TODO(pengchong): Figure out the way not to use the static inferred + # batch size. + batch_size, num_masks = class_indices.get_shape().as_list() + mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3]) + # Constructs indices for gather. + batch_indices = tf.tile( + tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks]) + mask_indices = tf.tile( + tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1]) + gather_indices = tf.stack( + [batch_indices, mask_indices, class_indices], axis=2) + mask_outputs = tf.gather_nd(mask_outputs, gather_indices) return mask_outputs class RetinanetHead(object): """RetinaNet head.""" - def __init__(self, - min_level, - max_level, - num_classes, - anchors_per_location, - num_convs=4, - num_filters=256, - use_separable_conv=False, - norm_activation=nn_ops.norm_activation_builder( - activation='relu')): + def __init__( + self, + min_level, + max_level, + num_classes, + anchors_per_location, + num_convs=4, + num_filters=256, + use_separable_conv=False, + norm_activation=nn_ops.norm_activation_builder(activation='relu')): """Initialize params to build RetinaNet head. Args: @@ -435,8 +713,8 @@ class RetinanetHead(object): num_filters: `int` number of filters used in the head architecture. use_separable_conv: `bool` to indicate whether to use separable convoluation. - norm_activation: an operation that includes a normalization layer - followed by an optional activation layer. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. """ self._min_level = min_level self._max_level = max_level @@ -552,7 +830,7 @@ class RetinanetHead(object): """Returns outputs of RetinaNet head.""" class_outputs = {} box_outputs = {} - with backend.get_graph().as_default(), tf.name_scope('retinanet_head'): + with tf.name_scope('retinanet_head'): for level in range(self._min_level, self._max_level + 1): features = fpn_features[level] @@ -597,12 +875,8 @@ class RetinanetHead(object): class ShapemaskPriorHead(object): """ShapeMask Prior head.""" - def __init__(self, - num_classes, - num_downsample_channels, - mask_crop_size, - use_category_for_mask, - shape_prior_path): + def __init__(self, num_classes, num_downsample_channels, mask_crop_size, + use_category_for_mask, shape_prior_path): """Initialize params to build RetinaNet head. Args: @@ -629,12 +903,12 @@ class ShapemaskPriorHead(object): Args: fpn_features: a dictionary of FPN features. - boxes: a float tensor of shape [batch_size, num_instances, 4] - representing the tight gt boxes from dataloader/detection. + boxes: a float tensor of shape [batch_size, num_instances, 4] representing + the tight gt boxes from dataloader/detection. outer_boxes: a float tensor of shape [batch_size, num_instances, 4] representing the loose gt boxes from dataloader/detection. - classes: a int Tensor of shape [batch_size, num_instances] - of instance classes. + classes: a int Tensor of shape [batch_size, num_instances] of instance + classes. is_training: training mode or not. Returns: @@ -644,7 +918,7 @@ class ShapemaskPriorHead(object): detection_priors: A float Tensor of shape [batch_size * num_instances, mask_size, mask_size, 1]. """ - with backend.get_graph().as_default(), tf.name_scope('prior_mask'): + with tf.name_scope('prior_mask'): batch_size, num_instances, _ = boxes.get_shape().as_list() outer_boxes = tf.cast(outer_boxes, tf.float32) boxes = tf.cast(boxes, tf.float32) @@ -655,8 +929,9 @@ class ShapemaskPriorHead(object): shape_priors = self._get_priors() # Get uniform priors for each outer box. - uniform_priors = tf.ones([batch_size, num_instances, self._mask_crop_size, - self._mask_crop_size]) + uniform_priors = tf.ones([ + batch_size, num_instances, self._mask_crop_size, self._mask_crop_size + ]) uniform_priors = spatial_transform_ops.crop_mask_in_target_box( uniform_priors, boxes, outer_boxes, self._mask_crop_size) @@ -665,8 +940,9 @@ class ShapemaskPriorHead(object): tf.cast(instance_features, tf.float32), uniform_priors, classes) instance_priors = tf.gather(shape_priors, classes) - instance_priors *= tf.expand_dims(tf.expand_dims( - tf.cast(prior_distribution, tf.float32), axis=-1), axis=-1) + instance_priors *= tf.expand_dims( + tf.expand_dims(tf.cast(prior_distribution, tf.float32), axis=-1), + axis=-1) instance_priors = tf.reduce_sum(instance_priors, axis=2) detection_priors = spatial_transform_ops.crop_mask_in_target_box( instance_priors, boxes, outer_boxes, self._mask_crop_size) @@ -685,8 +961,10 @@ class ShapemaskPriorHead(object): # If prior path does not exist, do not use priors, i.e., pirors equal to # uniform empty 32x32 patch. self._num_clusters = 1 - priors = tf.zeros([self._mask_num_classes, self._num_clusters, - self._mask_crop_size, self._mask_crop_size]) + priors = tf.zeros([ + self._mask_num_classes, self._num_clusters, self._mask_crop_size, + self._mask_crop_size + ]) return priors def _classify_shape_priors(self, features, uniform_priors, classes): @@ -696,12 +974,12 @@ class ShapemaskPriorHead(object): category. Args: - features: A float Tensor of shape [batch_size, num_instances, - mask_size, mask_size, num_channels]. + features: A float Tensor of shape [batch_size, num_instances, mask_size, + mask_size, num_channels]. uniform_priors: A float Tensor of shape [batch_size, num_instances, mask_size, mask_size] representing the uniform detection priors. - classes: A int Tensor of shape [batch_size, num_instances] - of detection class ids. + classes: A int Tensor of shape [batch_size, num_instances] of detection + class ids. Returns: prior_distribution: A float Tensor of shape @@ -716,10 +994,11 @@ class ShapemaskPriorHead(object): features = tf.reduce_mean(features, axis=(2, 3)) logits = tf.keras.layers.Dense( self._mask_num_classes * self._num_clusters, - kernel_initializer=tf.random_normal_initializer(stddev=0.01))(features) - logits = tf.reshape(logits, - [batch_size, num_instances, - self._mask_num_classes, self._num_clusters]) + kernel_initializer=tf.random_normal_initializer(stddev=0.01), + name='classify-shape-prior-fc')(features) + logits = tf.reshape( + logits, + [batch_size, num_instances, self._mask_num_classes, self._num_clusters]) if self._use_category_for_mask: logits = tf.gather(logits, tf.expand_dims(classes, axis=-1), batch_dims=2) logits = tf.squeeze(logits, axis=2) @@ -749,8 +1028,8 @@ class ShapemaskCoarsemaskHead(object): use_category_for_mask: use class information in mask branch. num_convs: `int` number of stacked convolution before the last prediction layer. - norm_activation: an operation that includes a normalization layer - followed by an optional activation layer. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. """ self._mask_num_classes = num_classes if use_category_for_mask else 1 self._use_category_for_mask = use_category_for_mask @@ -766,13 +1045,15 @@ class ShapemaskCoarsemaskHead(object): self._class_norm_activation = [] for i in range(self._num_convs): - self._class_conv.append(tf.keras.layers.Conv2D( - self._num_downsample_channels, - kernel_size=(3, 3), - bias_initializer=tf.zeros_initializer(), - kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), - padding='same', - name='coarse-mask-class-%d' % i)) + self._class_conv.append( + tf.keras.layers.Conv2D( + self._num_downsample_channels, + kernel_size=(3, 3), + bias_initializer=tf.zeros_initializer(), + kernel_initializer=tf.keras.initializers.RandomNormal( + stddev=0.01), + padding='same', + name='coarse-mask-class-%d' % i)) self._class_norm_activation.append( norm_activation(name='coarse-mask-class-%d-bn' % i)) @@ -797,17 +1078,17 @@ class ShapemaskCoarsemaskHead(object): mask_crop_size, mask_crop_size, num_downsample_channels]. This is the instance feature crop. detection_priors: a float Tensor of shape [batch_size, num_instances, - mask_crop_size, mask_crop_size, 1]. This is the detection prior for - the instance. - classes: a int Tensor of shape [batch_size, num_instances] - of instance classes. + mask_crop_size, mask_crop_size, 1]. This is the detection prior for the + instance. + classes: a int Tensor of shape [batch_size, num_instances] of instance + classes. is_training: a bool indicating whether in training mode. Returns: mask_outputs: instance mask prediction as a float Tensor of shape [batch_size, num_instances, mask_size, mask_size]. """ - with backend.get_graph().as_default(), tf.name_scope('coarse_mask'): + with tf.name_scope('coarse_mask'): # Transform detection priors to have the same dimension as features. detection_priors = tf.expand_dims(detection_priors, axis=-1) detection_priors = self._coarse_mask_fc(detection_priors) @@ -817,8 +1098,8 @@ class ShapemaskCoarsemaskHead(object): # Gather the logits with right input class. if self._use_category_for_mask: mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3]) - mask_logits = tf.gather(mask_logits, tf.expand_dims(classes, -1), - batch_dims=2) + mask_logits = tf.gather( + mask_logits, tf.expand_dims(classes, -1), batch_dims=2) mask_logits = tf.squeeze(mask_logits, axis=2) else: mask_logits = mask_logits[..., 0] @@ -838,16 +1119,17 @@ class ShapemaskCoarsemaskHead(object): """ (batch_size, num_instances, height, width, num_channels) = features.get_shape().as_list() - features = tf.reshape(features, [batch_size * num_instances, height, width, - num_channels]) + features = tf.reshape( + features, [batch_size * num_instances, height, width, num_channels]) for i in range(self._num_convs): features = self._class_conv[i](features) - features = self._class_norm_activation[i](features, - is_training=is_training) + features = self._class_norm_activation[i]( + features, is_training=is_training) mask_logits = self._class_predict(features) - mask_logits = tf.reshape(mask_logits, [batch_size, num_instances, height, - width, self._mask_num_classes]) + mask_logits = tf.reshape( + mask_logits, + [batch_size, num_instances, height, width, self._mask_num_classes]) return mask_logits @@ -904,8 +1186,8 @@ class ShapemaskFinemaskHead(object): activation=None, padding='same', name='fine-mask-class-%d' % i)) - self._fine_class_bn.append(norm_activation( - name='fine-mask-class-%d-bn' % i)) + self._fine_class_bn.append( + norm_activation(name='fine-mask-class-%d-bn' % i)) self._class_predict_conv = tf.keras.layers.Conv2D( self._mask_num_classes, @@ -923,14 +1205,13 @@ class ShapemaskFinemaskHead(object): https://arxiv.org/pdf/1904.03239.pdf Args: - features: a float Tensor of shape - [batch_size, num_instances, mask_crop_size, mask_crop_size, - num_downsample_channels]. This is the instance feature crop. - mask_logits: a float Tensor of shape - [batch_size, num_instances, mask_crop_size, mask_crop_size] indicating - predicted mask logits. - classes: a int Tensor of shape [batch_size, num_instances] - of instance classes. + features: a float Tensor of shape [batch_size, num_instances, + mask_crop_size, mask_crop_size, num_downsample_channels]. This is the + instance feature crop. + mask_logits: a float Tensor of shape [batch_size, num_instances, + mask_crop_size, mask_crop_size] indicating predicted mask logits. + classes: a int Tensor of shape [batch_size, num_instances] of instance + classes. is_training: a bool indicating whether in training mode. Returns: @@ -939,7 +1220,7 @@ class ShapemaskFinemaskHead(object): """ # Extract the foreground mean features # with tf.variable_scope('fine_mask', reuse=tf.AUTO_REUSE): - with backend.get_graph().as_default(), tf.name_scope('fine_mask'): + with tf.name_scope('fine_mask'): mask_probs = tf.nn.sigmoid(mask_logits) # Compute instance embedding for hard average. binary_mask = tf.cast(tf.greater(mask_probs, 0.5), features.dtype) @@ -957,8 +1238,8 @@ class ShapemaskFinemaskHead(object): mask_logits = self.decoder_net(features, is_training) if self._use_category_for_mask: mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3]) - mask_logits = tf.gather(mask_logits, - tf.expand_dims(classes, -1), batch_dims=2) + mask_logits = tf.gather( + mask_logits, tf.expand_dims(classes, -1), batch_dims=2) mask_logits = tf.squeeze(mask_logits, axis=2) else: mask_logits = mask_logits[..., 0] @@ -979,8 +1260,8 @@ class ShapemaskFinemaskHead(object): """ (batch_size, num_instances, height, width, num_channels) = features.get_shape().as_list() - features = tf.reshape(features, [batch_size * num_instances, height, width, - num_channels]) + features = tf.reshape( + features, [batch_size * num_instances, height, width, num_channels]) for i in range(self._num_convs): features = self._fine_class_conv[i](features) features = self._fine_class_bn[i](features, is_training=is_training) @@ -991,9 +1272,8 @@ class ShapemaskFinemaskHead(object): # Predict per-class instance masks. mask_logits = self._class_predict_conv(features) - mask_logits = tf.reshape(mask_logits, - [batch_size, num_instances, - height * self.up_sample_factor, - width * self.up_sample_factor, - self._mask_num_classes]) + mask_logits = tf.reshape(mask_logits, [ + batch_size, num_instances, height * self.up_sample_factor, + width * self.up_sample_factor, self._mask_num_classes + ]) return mask_logits diff --git a/official/vision/detection/modeling/architecture/identity.py b/official/vision/detection/modeling/architecture/identity.py index acc90c4d5efddcac50eb95b1229c3c5500917445..778297f8919f8a90875c69ce1f11ef5dfd9fc95f 100644 --- a/official/vision/detection/modeling/architecture/identity.py +++ b/official/vision/detection/modeling/architecture/identity.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Identity Fn that forwards the input features.""" from __future__ import absolute_import diff --git a/official/vision/detection/modeling/architecture/nn_blocks.py b/official/vision/detection/modeling/architecture/nn_blocks.py index c94a079f9a4ce4081c478bc373d381070ddfaf96..7a59bfc68a32c50814b11445c85a9add01048229 100644 --- a/official/vision/detection/modeling/architecture/nn_blocks.py +++ b/official/vision/detection/modeling/architecture/nn_blocks.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Contains common building blocks for neural networks.""" from __future__ import absolute_import diff --git a/official/vision/detection/modeling/architecture/nn_ops.py b/official/vision/detection/modeling/architecture/nn_ops.py index 8b3617d6c5b23dd31a9f891985dcf8361ff1e177..76a33d98d0037361e1607d95ed275043fa41d364 100644 --- a/official/vision/detection/modeling/architecture/nn_ops.py +++ b/official/vision/detection/modeling/architecture/nn_ops.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Neural network operations commonly shared by the architectures.""" from __future__ import absolute_import @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import functools + import tensorflow as tf @@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer): GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization layer. init_zero: `bool` if True, initializes scale parameter of batch - normalization with 0. If False, initialize it with 1. + normalization with 0. If False, initialize it with 1. fused: `bool` fused option in batch normalziation. use_actiation: `bool`, whether to add the optional activation layer after the batch normalization layer. diff --git a/official/vision/detection/modeling/architecture/resnet.py b/official/vision/detection/modeling/architecture/resnet.py index abbc7213ea971f0cb014d770e7e0c1707855fb08..6f76e880ed701e17795454c252d97d9a876d6d16 100644 --- a/official/vision/detection/modeling/architecture/resnet.py +++ b/official/vision/detection/modeling/architecture/resnet.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Contains definitions for the post-activation form of Residual Networks. Residual networks (ResNets) were proposed in: @@ -23,27 +23,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from absl import logging import tensorflow as tf -from tensorflow.python.keras import backend from official.vision.detection.modeling.architecture import nn_ops + # TODO(b/140112644): Refactor the code with Keras style, i.e. build and call. class Resnet(object): """Class to build ResNet family model.""" - def __init__(self, - resnet_depth, - activation='relu', - norm_activation=nn_ops.norm_activation_builder( - activation='relu'), - data_format='channels_last'): + def __init__( + self, + resnet_depth, + activation='relu', + norm_activation=nn_ops.norm_activation_builder(activation='relu'), + data_format='channels_last'): """ResNet initialization function. Args: resnet_depth: `int` depth of ResNet backbone model. - norm_activation: an operation that includes a normalization layer - followed by an optional activation layer. + norm_activation: an operation that includes a normalization layer followed + by an optional activation layer. data_format: `str` either "channels_first" for `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`. """ @@ -58,24 +57,45 @@ class Resnet(object): self._data_format = data_format model_params = { - 10: {'block': self.residual_block, 'layers': [1, 1, 1, 1]}, - 18: {'block': self.residual_block, 'layers': [2, 2, 2, 2]}, - 34: {'block': self.residual_block, 'layers': [3, 4, 6, 3]}, - 50: {'block': self.bottleneck_block, 'layers': [3, 4, 6, 3]}, - 101: {'block': self.bottleneck_block, 'layers': [3, 4, 23, 3]}, - 152: {'block': self.bottleneck_block, 'layers': [3, 8, 36, 3]}, - 200: {'block': self.bottleneck_block, 'layers': [3, 24, 36, 3]} + 10: { + 'block': self.residual_block, + 'layers': [1, 1, 1, 1] + }, + 18: { + 'block': self.residual_block, + 'layers': [2, 2, 2, 2] + }, + 34: { + 'block': self.residual_block, + 'layers': [3, 4, 6, 3] + }, + 50: { + 'block': self.bottleneck_block, + 'layers': [3, 4, 6, 3] + }, + 101: { + 'block': self.bottleneck_block, + 'layers': [3, 4, 23, 3] + }, + 152: { + 'block': self.bottleneck_block, + 'layers': [3, 8, 36, 3] + }, + 200: { + 'block': self.bottleneck_block, + 'layers': [3, 24, 36, 3] + } } if resnet_depth not in model_params: valid_resnet_depths = ', '.join( [str(depth) for depth in sorted(model_params.keys())]) raise ValueError( - 'The resnet_depth should be in [%s]. Not a valid resnet_depth:'%( - valid_resnet_depths), self._resnet_depth) + 'The resnet_depth should be in [%s]. Not a valid resnet_depth:' % + (valid_resnet_depths), self._resnet_depth) params = model_params[resnet_depth] - self._resnet_fn = self.resnet_v1_generator( - params['block'], params['layers']) + self._resnet_fn = self.resnet_v1_generator(params['block'], + params['layers']) def __call__(self, inputs, is_training=None): """Returns the ResNet model for a given size and number of output classes. @@ -90,18 +110,17 @@ class Resnet(object): The values are corresponding feature hierarchy in ResNet with shape [batch_size, height_l, width_l, num_filters]. """ - with backend.get_graph().as_default(): - with tf.name_scope('resnet%s' % self._resnet_depth): - return self._resnet_fn(inputs, is_training) + with tf.name_scope('resnet%s' % self._resnet_depth): + return self._resnet_fn(inputs, is_training) def fixed_padding(self, inputs, kernel_size): """Pads the input along the spatial dimensions independently of input size. Args: - inputs: `Tensor` of size `[batch, channels, height, width]` or - `[batch, height, width, channels]` depending on `data_format`. + inputs: `Tensor` of size `[batch, channels, height, width]` or `[batch, + height, width, channels]` depending on `data_format`. kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d` - operations. Should be a positive integer. + operations. Should be a positive integer. Returns: A padded `Tensor` of the same `data_format` with size either intact @@ -160,14 +179,15 @@ class Resnet(object): Args: inputs: `Tensor` of size `[batch, channels, height, width]`. filters: `int` number of filters for the first two convolutions. Note that - the third and final convolution will use 4 times as many filters. + the third and final convolution will use 4 times as many filters. strides: `int` block stride. If greater than 1, this block will ultimately - downsample the input. + downsample the input. use_projection: `bool` for whether this block should use a projection - shortcut (versus the default identity shortcut). This is usually - `True` for the first block of a block group, which may change the - number of filters and the resolution. + shortcut (versus the default identity shortcut). This is usually `True` + for the first block of a block group, which may change the number of + filters and the resolution. is_training: `bool` if True, the model is in training mode. + Returns: The output `Tensor` of the block. """ @@ -185,8 +205,9 @@ class Resnet(object): inputs = self.conv2d_fixed_padding( inputs=inputs, filters=filters, kernel_size=3, strides=1) - inputs = self._norm_activation(use_activation=False, init_zero=True)( - inputs, is_training=is_training) + inputs = self._norm_activation( + use_activation=False, init_zero=True)( + inputs, is_training=is_training) return self._activation_op(inputs + shortcut) @@ -201,13 +222,13 @@ class Resnet(object): Args: inputs: `Tensor` of size `[batch, channels, height, width]`. filters: `int` number of filters for the first two convolutions. Note that - the third and final convolution will use 4 times as many filters. + the third and final convolution will use 4 times as many filters. strides: `int` block stride. If greater than 1, this block will ultimately - downsample the input. + downsample the input. use_projection: `bool` for whether this block should use a projection - shortcut (versus the default identity shortcut). This is usually - `True` for the first block of a block group, which may change the - number of filters and the resolution. + shortcut (versus the default identity shortcut). This is usually `True` + for the first block of a block group, which may change the number of + filters and the resolution. is_training: `bool` if True, the model is in training mode. Returns: @@ -233,8 +254,9 @@ class Resnet(object): inputs = self.conv2d_fixed_padding( inputs=inputs, filters=4 * filters, kernel_size=1, strides=1) - inputs = self._norm_activation(use_activation=False, init_zero=True)( - inputs, is_training=is_training) + inputs = self._norm_activation( + use_activation=False, init_zero=True)( + inputs, is_training=is_training) return self._activation_op(inputs + shortcut) @@ -248,7 +270,7 @@ class Resnet(object): block_fn: `function` for the block to use within the model blocks: `int` number of blocks contained in the layer. strides: `int` stride to use for the first convolution of the layer. If - greater than 1, this layer will downsample the input. + greater than 1, this layer will downsample the input. name: `str`name for the Tensor output of the block layer. is_training: `bool` if True, the model is in training mode. @@ -256,8 +278,8 @@ class Resnet(object): The output `Tensor` of the block layer. """ # Only the first block per block_group uses projection shortcut and strides. - inputs = block_fn(inputs, filters, strides, use_projection=True, - is_training=is_training) + inputs = block_fn( + inputs, filters, strides, use_projection=True, is_training=is_training) for _ in range(1, blocks): inputs = block_fn(inputs, filters, 1, is_training=is_training) @@ -269,7 +291,7 @@ class Resnet(object): Args: block_fn: `function` for the block to use within the model. Either - `residual_block` or `bottleneck_block`. + `residual_block` or `bottleneck_block`. layers: list of 4 `int`s denoting the number of blocks to include in each of the 4 block groups. Each group consists of blocks that take inputs of the same resolution. @@ -293,17 +315,37 @@ class Resnet(object): inputs = tf.identity(inputs, 'initial_max_pool') c2 = self.block_group( - inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0], - strides=1, name='block_group1', is_training=is_training) + inputs=inputs, + filters=64, + block_fn=block_fn, + blocks=layers[0], + strides=1, + name='block_group1', + is_training=is_training) c3 = self.block_group( - inputs=c2, filters=128, block_fn=block_fn, blocks=layers[1], - strides=2, name='block_group2', is_training=is_training) + inputs=c2, + filters=128, + block_fn=block_fn, + blocks=layers[1], + strides=2, + name='block_group2', + is_training=is_training) c4 = self.block_group( - inputs=c3, filters=256, block_fn=block_fn, blocks=layers[2], - strides=2, name='block_group3', is_training=is_training) + inputs=c3, + filters=256, + block_fn=block_fn, + blocks=layers[2], + strides=2, + name='block_group3', + is_training=is_training) c5 = self.block_group( - inputs=c4, filters=512, block_fn=block_fn, blocks=layers[3], - strides=2, name='block_group4', is_training=is_training) + inputs=c4, + filters=512, + block_fn=block_fn, + blocks=layers[3], + strides=2, + name='block_group4', + is_training=is_training) return {2: c2, 3: c3, 4: c4, 5: c5} return model diff --git a/official/vision/detection/modeling/architecture/spinenet.py b/official/vision/detection/modeling/architecture/spinenet.py index a11d11d5b52b1c754037558c4c5030ff7b9b4417..291203d9c25c3b5d4d4ee07e993ec44a611ef508 100644 --- a/official/vision/detection/modeling/architecture/spinenet.py +++ b/official/vision/detection/modeling/architecture/spinenet.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# Lint as: python3 # ============================================================================== """Implementation of SpineNet model. @@ -24,7 +25,6 @@ import math from absl import logging import tensorflow as tf -from tensorflow.python.keras import backend from official.modeling import tf_utils from official.vision.detection.modeling.architecture import nn_blocks @@ -486,21 +486,20 @@ class SpineNetBuilder(object): self._norm_epsilon = norm_epsilon def __call__(self, inputs, is_training=None): - with backend.get_graph().as_default(): - model = SpineNet( - input_specs=self._input_specs, - min_level=self._min_level, - max_level=self._max_level, - block_specs=self._block_specs, - endpoints_num_filters=self._endpoints_num_filters, - resample_alpha=self._resample_alpha, - block_repeats=self._block_repeats, - filter_size_scale=self._filter_size_scale, - kernel_initializer=self._kernel_initializer, - kernel_regularizer=self._kernel_regularizer, - bias_regularizer=self._bias_regularizer, - activation=self._activation, - use_sync_bn=self._use_sync_bn, - norm_momentum=self._norm_momentum, - norm_epsilon=self._norm_epsilon) - return model(inputs) + model = SpineNet( + input_specs=self._input_specs, + min_level=self._min_level, + max_level=self._max_level, + block_specs=self._block_specs, + endpoints_num_filters=self._endpoints_num_filters, + resample_alpha=self._resample_alpha, + block_repeats=self._block_repeats, + filter_size_scale=self._filter_size_scale, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon) + return model(inputs) diff --git a/official/vision/detection/modeling/base_model.py b/official/vision/detection/modeling/base_model.py index 8d18f12f5b7c52ca02334c4c685b70d353de83c5..0558e1db5530f3b85dd8ce9acf5d4c3b23146bc6 100644 --- a/official/vision/detection/modeling/base_model.py +++ b/official/vision/detection/modeling/base_model.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Base Model definition.""" from __future__ import absolute_import @@ -21,6 +21,7 @@ from __future__ import print_function import abc import functools import re + import tensorflow as tf from official.vision.detection.modeling import checkpoint_utils from official.vision.detection.modeling import learning_rates @@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix): # frozen_variable_prefix: a regex string specifing the prefix pattern of # the frozen variables' names. filtered_variables = [ - v for v in variables - if not frozen_variable_prefix or + v for v in variables if not frozen_variable_prefix or not re.match(frozen_variable_prefix, v.name) ] return filtered_variables @@ -60,9 +60,7 @@ class Model(object): self._use_bfloat16 = params.architecture.use_bfloat16 if params.architecture.use_bfloat16: - policy = tf.compat.v2.keras.mixed_precision.experimental.Policy( - 'mixed_bfloat16') - tf.compat.v2.keras.mixed_precision.experimental.set_policy(policy) + tf.compat.v2.keras.mixed_precision.set_global_policy('mixed_bfloat16') # Optimization. self._optimizer_fn = optimizers.OptimizerFactory(params.train.optimizer) @@ -115,8 +113,8 @@ class Model(object): def weight_decay_loss(self, trainable_variables): reg_variables = [ v for v in trainable_variables - if self._regularization_var_regex is None - or re.match(self._regularization_var_regex, v.name) + if self._regularization_var_regex is None or + re.match(self._regularization_var_regex, v.name) ] return self._l2_weight_decay * tf.add_n( diff --git a/official/vision/detection/modeling/checkpoint_utils.py b/official/vision/detection/modeling/checkpoint_utils.py index 1bb798396a714cbbc1a36309c99ceaa636a30354..fc0c09b7fcfee32b84db139b60880c018503d84a 100644 --- a/official/vision/detection/modeling/checkpoint_utils.py +++ b/official/vision/detection/modeling/checkpoint_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,8 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Util functions for loading checkpoints. Especially for loading Tensorflow 1.x + +"""Util functions for loading checkpoints. + +Especially for loading Tensorflow 1.x checkpoint to Tensorflow 2.x (keras) model. """ @@ -20,18 +22,19 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - import re + from absl import logging import tensorflow as tf def _build_assignment_map(keras_model, - prefix='', - skip_variables_regex=None, - var_to_shape_map=None): + prefix='', + skip_variables_regex=None, + var_to_shape_map=None): """Compute an assignment mapping for loading older checkpoints into a Keras + model. Variable names are remapped from the original TPUEstimator model to the new Keras name. @@ -48,12 +51,15 @@ def _build_assignment_map(keras_model, """ assignment_map = {} - - checkpoint_names = None + checkpoint_names = [] if var_to_shape_map: - checkpoint_names = list(filter( - lambda x: not x.endswith('Momentum') and not x.endswith( - 'global_step'), var_to_shape_map.keys())) + checkpoint_names = list( + filter( + lambda x: not x.endswith('Momentum') and not x.endswith( + 'global_step'), var_to_shape_map.keys())) + + logging.info('Number of variables in the checkpoint %d', + len(checkpoint_names)) for var in keras_model.variables: var_name = var.name @@ -84,7 +90,7 @@ def _build_assignment_map(keras_model, logging.info('Error removing the match_name: %s', match_names) logging.info('Exception: %s', e) raise - logging.info('Found variable in checkpoint: %d', len(assignment_map)) + logging.info('Found matching variable in checkpoint: %d', len(assignment_map)) return assignment_map @@ -95,14 +101,15 @@ def _get_checkpoint_map(checkpoint_path): def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None): """Returns scaffold function to restore parameters from v1 checkpoint. + Args: checkpoint_path: path of the checkpoint folder or file. Example 1: '/path/to/model_dir/' Example 2: '/path/to/model.ckpt-22500' prefix: prefix in the variable name to be remove for alignment with names in the checkpoint. - skip_regex: regular expression to math the names of variables that - do not need to be assign. + skip_regex: regular expression to math the names of variables that do not + need to be assign. Returns: Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model. @@ -125,7 +132,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None): var_to_shape_map=var_to_shape_map) if not vars_to_load: raise ValueError('Variables to load is empty.') - tf.compat.v1.train.init_from_checkpoint(checkpoint_path, - vars_to_load) + tf.compat.v1.train.init_from_checkpoint(checkpoint_path, vars_to_load) return _restore_checkpoint_fn diff --git a/official/vision/detection/modeling/factory.py b/official/vision/detection/modeling/factory.py index b140416dfdba90420f99a8bcb3b07cc04a63cc3e..c1393bcce047bce63cfae6cd9c963b783816d355 100644 --- a/official/vision/detection/modeling/factory.py +++ b/official/vision/detection/modeling/factory.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,11 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Factory to build detection model.""" from official.vision.detection.modeling import maskrcnn_model +from official.vision.detection.modeling import olnmask_model from official.vision.detection.modeling import retinanet_model from official.vision.detection.modeling import shapemask_model @@ -26,6 +27,8 @@ def model_generator(params): model_fn = retinanet_model.RetinanetModel(params) elif params.type == 'mask_rcnn': model_fn = maskrcnn_model.MaskrcnnModel(params) + elif params.type == 'olnmask': + model_fn = olnmask_model.OlnMaskModel(params) elif params.type == 'shapemask': model_fn = shapemask_model.ShapeMaskModel(params) else: diff --git a/official/vision/detection/modeling/learning_rates.py b/official/vision/detection/modeling/learning_rates.py index ecc24ffadb073c79f71725b1adcb61cbd83127cd..7c1cc147942af63064ae174baeeb0d5ead3a5d3e 100644 --- a/official/vision/detection/modeling/learning_rates.py +++ b/official/vision/detection/modeling/learning_rates.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Learning rate schedule.""" from __future__ import absolute_import @@ -25,7 +25,8 @@ import tensorflow as tf from official.modeling.hyperparams import params_dict -class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): +class StepLearningRateWithLinearWarmup( + tf.keras.optimizers.schedules.LearningRateSchedule): """Class to generate learning rate tensor.""" def __init__(self, total_steps, params): @@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat return {'_params': self._params.as_dict()} -class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): +class CosineLearningRateWithLinearWarmup( + tf.keras.optimizers.schedules.LearningRateSchedule): """Class to generate learning rate tensor.""" def __init__(self, total_steps, params): diff --git a/official/vision/detection/modeling/losses.py b/official/vision/detection/modeling/losses.py index 4b993061b3c51c9ae6456d84a79f7fea5d74c77e..02e2632ae60c9da49f58c1239964d2f1104b52f8 100644 --- a/official/vision/detection/modeling/losses.py +++ b/official/vision/detection/modeling/losses.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Losses used for detection models.""" from __future__ import absolute_import @@ -195,6 +195,149 @@ class RpnBoxLoss(object): return box_loss +class OlnRpnCenterLoss(object): + """Object Localization Network RPN centerness regression loss function.""" + + def __init__(self): + self._l1_loss = tf.keras.losses.MeanAbsoluteError( + reduction=tf.keras.losses.Reduction.SUM) + + def __call__(self, center_outputs, labels): + """Computes total RPN centerness regression loss. + + Computes total RPN centerness score regression loss from all levels. + + Args: + center_outputs: an OrderDict with keys representing levels and values + representing anchor centerness regression targets in + [batch_size, height, width, num_anchors * 4]. + labels: the dictionary that returned from dataloader that includes + groundturth targets. + + Returns: + rpn_center_loss: a scalar tensor representing total centerness regression + loss. + """ + with tf.name_scope('rpn_loss'): + # Normalizer. + levels = sorted(center_outputs.keys()) + num_valid = 0 + # 00, neg=0, ign=-1. + mask_ = tf.cast(tf.logical_and( + tf.greater(center_targets[level][..., 0], 0.0), + tf.greater(tf.reduce_min(labels[level], -1), 0.0)), tf.float32) + normalizer += tf.reduce_sum(mask_) + normalizer += 1e-8 + # iou_loss over multi levels. + iou_losses = [] + for level in levels: + iou_losses.append( + self._rpn_iou_loss( + box_outputs[level], labels[level], + center_weight=center_targets[level][..., 0], + normalizer=normalizer)) + # Sum per level losses to total loss. + return tf.add_n(iou_losses) + + def _rpn_iou_loss(self, box_outputs, box_targets, + center_weight=None, normalizer=1.0): + """Computes box regression loss.""" + # for instances, the regression targets of 512x512 input with 6 anchors on + # P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2]. + with tf.name_scope('rpn_iou_loss'): + mask = tf.logical_and( + tf.greater(center_weight, 0.0), + tf.greater(tf.reduce_min(box_targets, -1), 0.0)) + + pred_left = box_outputs[..., 0] + pred_right = box_outputs[..., 1] + pred_top = box_outputs[..., 2] + pred_bottom = box_outputs[..., 3] + + gt_left = box_targets[..., 0] + gt_right = box_targets[..., 1] + gt_top = box_targets[..., 2] + gt_bottom = box_targets[..., 3] + + inter_width = (tf.minimum(pred_left, gt_left) + + tf.minimum(pred_right, gt_right)) + inter_height = (tf.minimum(pred_top, gt_top) + + tf.minimum(pred_bottom, gt_bottom)) + inter_area = inter_width * inter_height + union_area = ((pred_left + pred_right) * (pred_top + pred_bottom) + + (gt_left + gt_right) * (gt_top + gt_bottom) - + inter_area) + iou = inter_area / (union_area + 1e-8) + mask_ = tf.cast(mask, tf.float32) + iou = tf.clip_by_value(iou, clip_value_min=1e-8, clip_value_max=1.0) + neg_log_iou = -tf.math.log(iou) + iou_loss = tf.reduce_sum(neg_log_iou * mask_) + iou_loss /= normalizer + return iou_loss + + class FastrcnnClassLoss(object): """Fast R-CNN classification loss function.""" @@ -317,6 +460,47 @@ class FastrcnnBoxLoss(object): return box_loss +class OlnBoxScoreLoss(object): + """Object Localization Network Box-Iou scoring function.""" + + def __init__(self, params): + self._ignore_threshold = params.ignore_threshold + self._l1_loss = tf.keras.losses.MeanAbsoluteError( + reduction=tf.keras.losses.Reduction.SUM) + + def __call__(self, score_outputs, score_targets): + """Computes the class loss (Fast-RCNN branch) of Mask-RCNN. + + This function implements the classification loss of the Fast-RCNN. + + The classification loss is softmax on all RoIs. + Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py # pylint: disable=line-too-long + + Args: + score_outputs: a float tensor representing the class prediction for each box + with a shape of [batch_size, num_boxes, num_classes]. + score_targets: a float tensor representing the class label for each box + with a shape of [batch_size, num_boxes]. + + Returns: + a scalar tensor representing total score loss. + """ + with tf.name_scope('fast_rcnn_loss'): + score_outputs = tf.squeeze(score_outputs, -1) + + mask = tf.greater(score_targets, self._ignore_threshold) + num_valid = tf.reduce_sum(tf.cast(mask, tf.float32)) + score_targets = tf.maximum(score_targets, tf.zeros_like(score_targets)) + score_outputs = tf.sigmoid(score_outputs) + score_targets = tf.expand_dims(score_targets, -1) + score_outputs = tf.expand_dims(score_outputs, -1) + mask = tf.cast(mask, dtype=tf.float32) + score_loss = self._l1_loss(score_targets, score_outputs, + sample_weight=mask) + score_loss /= (num_valid + 1e-10) + return score_loss + + class MaskrcnnLoss(object): """Mask R-CNN instance segmentation mask loss function.""" @@ -449,7 +633,7 @@ class RetinanetBoxLoss(object): num_positives: number of positive examples in the minibatch. Returns: - an integar tensor representing total box regression loss. + an integer tensor representing total box regression loss. """ # Sums all positives in a batch for normalization and avoids zero # num_positives_sum, which would lead to inf loss during training @@ -457,7 +641,6 @@ class RetinanetBoxLoss(object): box_losses = [] for level in box_outputs.keys(): - # Onehot encoding for classification labels. box_targets_l = labels[level] box_losses.append( self.box_loss(box_outputs[level], box_targets_l, num_positives_sum)) diff --git a/official/vision/detection/modeling/maskrcnn_model.py b/official/vision/detection/modeling/maskrcnn_model.py index e5cbe7d56ba7d82836ef58df201aa74779cb2f69..e9e6bb2697d7f78d4d01c9dceb8e0997376aecab 100644 --- a/official/vision/detection/modeling/maskrcnn_model.py +++ b/official/vision/detection/modeling/maskrcnn_model.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Model defination for the Mask R-CNN Model.""" from __future__ import absolute_import @@ -20,7 +20,6 @@ from __future__ import print_function import tensorflow as tf -from tensorflow.python.keras import backend from official.vision.detection.dataloader import anchor from official.vision.detection.dataloader import mode_keys from official.vision.detection.evaluation import factory as eval_factory @@ -118,9 +117,7 @@ class MaskrcnnModel(base_model.Model): box_targets = tf.where( tf.tile( tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1), - [1, 1, 4]), - tf.zeros_like(box_targets), - box_targets) + [1, 1, 4]), tf.zeros_like(box_targets), box_targets) model_outputs.update({ 'class_targets': matched_gt_classes, 'box_targets': box_targets, @@ -183,9 +180,7 @@ class MaskrcnnModel(base_model.Model): mask_outputs), }) else: - model_outputs.update({ - 'detection_masks': tf.nn.sigmoid(mask_outputs) - }) + model_outputs.update({'detection_masks': tf.nn.sigmoid(mask_outputs)}) return model_outputs @@ -297,14 +292,13 @@ class MaskrcnnModel(base_model.Model): def build_model(self, params, mode): if self._keras_model is None: input_layers = self.build_input_layers(self._params, mode) - with backend.get_graph().as_default(): - outputs = self.model_outputs(input_layers, mode) + outputs = self.model_outputs(input_layers, mode) - model = tf.keras.models.Model( - inputs=input_layers, outputs=outputs, name='maskrcnn') - assert model is not None, 'Fail to build tf.keras.Model.' - model.optimizer = self.build_optimizer() - self._keras_model = model + model = tf.keras.models.Model( + inputs=input_layers, outputs=outputs, name='maskrcnn') + assert model is not None, 'Fail to build tf.keras.Model.' + model.optimizer = self.build_optimizer() + self._keras_model = model return self._keras_model @@ -312,8 +306,8 @@ class MaskrcnnModel(base_model.Model): required_output_fields = ['class_outputs', 'box_outputs'] for field in required_output_fields: if field not in outputs: - raise ValueError('"%s" is missing in outputs, requried %s found %s' - %(field, required_output_fields, outputs.keys())) + raise ValueError('"%s" is missing in outputs, requried %s found %s' % + (field, required_output_fields, outputs.keys())) predictions = { 'image_info': labels['image_info'], 'num_detections': outputs['num_detections'], diff --git a/official/vision/detection/modeling/olnmask_model.py b/official/vision/detection/modeling/olnmask_model.py new file mode 100644 index 0000000000000000000000000000000000000000..60d59c1bd12bd25f8c2ed0e30bc32c7dad4cbdcf --- /dev/null +++ b/official/vision/detection/modeling/olnmask_model.py @@ -0,0 +1,432 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model defination for the Object Localization Network (OLN) Model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from official.vision.detection.dataloader import anchor +from official.vision.detection.dataloader import mode_keys +from official.vision.detection.modeling import losses +from official.vision.detection.modeling.architecture import factory +from official.vision.detection.modeling.maskrcnn_model import MaskrcnnModel +from official.vision.detection.ops import postprocess_ops +from official.vision.detection.ops import roi_ops +from official.vision.detection.ops import spatial_transform_ops +from official.vision.detection.ops import target_ops +from official.vision.detection.utils import box_utils + + +class OlnMaskModel(MaskrcnnModel): + """OLN-Mask model function.""" + + def __init__(self, params): + super(OlnMaskModel, self).__init__(params) + + self._params = params + + # Different heads and layers. + self._include_rpn_class = params.architecture.include_rpn_class + self._include_mask = params.architecture.include_mask + self._include_frcnn_class = params.architecture.include_frcnn_class + self._include_frcnn_box = params.architecture.include_frcnn_box + self._include_centerness = params.rpn_head.has_centerness + self._include_box_score = (params.frcnn_head.has_scoring and + params.architecture.include_frcnn_box) + self._include_mask_score = (params.mrcnn_head.has_scoring and + params.architecture.include_mask) + + # Architecture generators. + self._backbone_fn = factory.backbone_generator(params) + self._fpn_fn = factory.multilevel_features_generator(params) + self._rpn_head_fn = factory.rpn_head_generator(params) + if self._include_centerness: + self._rpn_head_fn = factory.oln_rpn_head_generator(params) + else: + self._rpn_head_fn = factory.rpn_head_generator(params) + self._generate_rois_fn = roi_ops.OlnROIGenerator(params.roi_proposal) + self._sample_rois_fn = target_ops.ROIScoreSampler(params.roi_sampling) + self._sample_masks_fn = target_ops.MaskSampler( + params.architecture.mask_target_size, + params.mask_sampling.num_mask_samples_per_image) + + if self._include_box_score: + self._frcnn_head_fn = factory.oln_box_score_head_generator(params) + else: + self._frcnn_head_fn = factory.fast_rcnn_head_generator(params) + + if self._include_mask: + if self._include_mask_score: + self._mrcnn_head_fn = factory.oln_mask_score_head_generator(params) + else: + self._mrcnn_head_fn = factory.mask_rcnn_head_generator(params) + + # Loss function. + self._rpn_score_loss_fn = losses.RpnScoreLoss(params.rpn_score_loss) + self._rpn_box_loss_fn = losses.RpnBoxLoss(params.rpn_box_loss) + if self._include_centerness: + self._rpn_iou_loss_fn = losses.OlnRpnIoULoss() + self._rpn_center_loss_fn = losses.OlnRpnCenterLoss() + self._frcnn_class_loss_fn = losses.FastrcnnClassLoss() + self._frcnn_box_loss_fn = losses.FastrcnnBoxLoss(params.frcnn_box_loss) + if self._include_box_score: + self._frcnn_box_score_loss_fn = losses.OlnBoxScoreLoss( + params.frcnn_box_score_loss) + if self._include_mask: + self._mask_loss_fn = losses.MaskrcnnLoss() + + self._generate_detections_fn = postprocess_ops.OlnDetectionGenerator( + params.postprocess) + + self._transpose_input = params.train.transpose_input + assert not self._transpose_input, 'Transpose input is not supportted.' + + def build_outputs(self, inputs, mode): + is_training = mode == mode_keys.TRAIN + model_outputs = {} + + image = inputs['image'] + _, image_height, image_width, _ = image.get_shape().as_list() + backbone_features = self._backbone_fn(image, is_training) + fpn_features = self._fpn_fn(backbone_features, is_training) + + # rpn_centerness. + if self._include_centerness: + rpn_score_outputs, rpn_box_outputs, rpn_center_outputs = ( + self._rpn_head_fn(fpn_features, is_training)) + model_outputs.update({ + 'rpn_center_outputs': + tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), + rpn_center_outputs), + }) + object_scores = rpn_center_outputs + else: + rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn( + fpn_features, is_training) + object_scores = None + model_outputs.update({ + 'rpn_score_outputs': + tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), + rpn_score_outputs), + 'rpn_box_outputs': + tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), + rpn_box_outputs), + }) + input_anchor = anchor.Anchor(self._params.architecture.min_level, + self._params.architecture.max_level, + self._params.anchor.num_scales, + self._params.anchor.aspect_ratios, + self._params.anchor.anchor_size, + (image_height, image_width)) + rpn_rois, rpn_roi_scores = self._generate_rois_fn( + rpn_box_outputs, + rpn_score_outputs, + input_anchor.multilevel_boxes, + inputs['image_info'][:, 1, :], + is_training, + is_box_lrtb=self._include_centerness, + object_scores=object_scores, + ) + if (not self._include_frcnn_class and + not self._include_frcnn_box and + not self._include_mask): + # if not is_training: + # For direct RPN detection, + # use dummy box_outputs = (dy,dx,dh,dw = 0,0,0,0) + box_outputs = tf.zeros_like(rpn_rois) + box_outputs = tf.concat([box_outputs, box_outputs], -1) + boxes, scores, classes, valid_detections = self._generate_detections_fn( + box_outputs, rpn_roi_scores, rpn_rois, + inputs['image_info'][:, 1:2, :], + is_single_fg_score=True, # if no_background, no softmax is applied. + keep_nms=True) + model_outputs.update({ + 'num_detections': valid_detections, + 'detection_boxes': boxes, + 'detection_classes': classes, + 'detection_scores': scores, + }) + return model_outputs + + # ---- OLN-Proposal finishes here. ---- + + if is_training: + rpn_rois = tf.stop_gradient(rpn_rois) + rpn_roi_scores = tf.stop_gradient(rpn_roi_scores) + + # Sample proposals. + (rpn_rois, rpn_roi_scores, matched_gt_boxes, matched_gt_classes, + matched_gt_indices) = ( + self._sample_rois_fn(rpn_rois, rpn_roi_scores, inputs['gt_boxes'], + inputs['gt_classes'])) + # Create bounding box training targets. + box_targets = box_utils.encode_boxes( + matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0]) + # If the target is background, the box target is set to all 0s. + box_targets = tf.where( + tf.tile( + tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1), + [1, 1, 4]), tf.zeros_like(box_targets), box_targets) + model_outputs.update({ + 'class_targets': matched_gt_classes, + 'box_targets': box_targets, + }) + # Create Box-IoU targets. { + box_ious = box_utils.bbox_overlap( + rpn_rois, inputs['gt_boxes']) + matched_box_ious = tf.reduce_max(box_ious, 2) + model_outputs.update({ + 'box_iou_targets': matched_box_ious,}) # } + + roi_features = spatial_transform_ops.multilevel_crop_and_resize( + fpn_features, rpn_rois, output_size=7) + + if not self._include_box_score: + class_outputs, box_outputs = self._frcnn_head_fn( + roi_features, is_training) + else: + class_outputs, box_outputs, score_outputs = self._frcnn_head_fn( + roi_features, is_training) + model_outputs.update({ + 'box_score_outputs': + tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), + score_outputs),}) + model_outputs.update({ + 'class_outputs': + tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), + class_outputs), + 'box_outputs': + tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), + box_outputs), + }) + + # Add this output to train to make the checkpoint loadable in predict mode. + # If we skip it in train mode, the heads will be out-of-order and checkpoint + # loading will fail. + if not self._include_frcnn_box: + box_outputs = tf.zeros_like(box_outputs) # dummy zeros. + + if self._include_box_score: + score_outputs = tf.cast(tf.squeeze(score_outputs, -1), + rpn_roi_scores.dtype) + + # box-score = (rpn-centerness * box-iou)^(1/2) + # TR: rpn_roi_scores: b,1000, score_outputs: b,512 + # TS: rpn_roi_scores: b,1000, score_outputs: b,1000 + box_scores = tf.pow( + rpn_roi_scores * tf.sigmoid(score_outputs), 1/2.) + + if not self._include_frcnn_class: + boxes, scores, classes, valid_detections = self._generate_detections_fn( + box_outputs, + box_scores, + rpn_rois, + inputs['image_info'][:, 1:2, :], + is_single_fg_score=True, + keep_nms=True,) + else: + boxes, scores, classes, valid_detections = self._generate_detections_fn( + box_outputs, class_outputs, rpn_rois, + inputs['image_info'][:, 1:2, :], + keep_nms=True,) + model_outputs.update({ + 'num_detections': valid_detections, + 'detection_boxes': boxes, + 'detection_classes': classes, + 'detection_scores': scores, + }) + + # ---- OLN-Box finishes here. ---- + + if not self._include_mask: + return model_outputs + + if is_training: + rpn_rois, classes, mask_targets = self._sample_masks_fn( + rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices, + inputs['gt_masks']) + mask_targets = tf.stop_gradient(mask_targets) + + classes = tf.cast(classes, dtype=tf.int32) + + model_outputs.update({ + 'mask_targets': mask_targets, + 'sampled_class_targets': classes, + }) + else: + rpn_rois = boxes + classes = tf.cast(classes, dtype=tf.int32) + + mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize( + fpn_features, rpn_rois, output_size=14) + + mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes, is_training) + + if is_training: + model_outputs.update({ + 'mask_outputs': + tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), + mask_outputs), + }) + else: + model_outputs.update({'detection_masks': tf.nn.sigmoid(mask_outputs)}) + + return model_outputs + + def build_loss_fn(self): + if self._keras_model is None: + raise ValueError('build_loss_fn() must be called after build_model().') + + filter_fn = self.make_filter_trainable_variables_fn() + trainable_variables = filter_fn(self._keras_model.trainable_variables) + + def _total_loss_fn(labels, outputs): + if self._include_rpn_class: + rpn_score_loss = self._rpn_score_loss_fn(outputs['rpn_score_outputs'], + labels['rpn_score_targets']) + else: + rpn_score_loss = 0.0 + if self._include_centerness: + rpn_center_loss = self._rpn_center_loss_fn( + outputs['rpn_center_outputs'], labels['rpn_center_targets']) + rpn_box_loss = self._rpn_iou_loss_fn( + outputs['rpn_box_outputs'], labels['rpn_box_targets'], + labels['rpn_center_targets']) + else: + rpn_center_loss = 0.0 + rpn_box_loss = self._rpn_box_loss_fn( + outputs['rpn_box_outputs'], labels['rpn_box_targets']) + + if self._include_frcnn_class: + frcnn_class_loss = self._frcnn_class_loss_fn( + outputs['class_outputs'], outputs['class_targets']) + else: + frcnn_class_loss = 0.0 + if self._include_frcnn_box: + frcnn_box_loss = self._frcnn_box_loss_fn( + outputs['box_outputs'], outputs['class_targets'], + outputs['box_targets']) + else: + frcnn_box_loss = 0.0 + if self._include_box_score: + box_score_loss = self._frcnn_box_score_loss_fn( + outputs['box_score_outputs'], outputs['box_iou_targets']) + else: + box_score_loss = 0.0 + + if self._include_mask: + mask_loss = self._mask_loss_fn(outputs['mask_outputs'], + outputs['mask_targets'], + outputs['sampled_class_targets']) + else: + mask_loss = 0.0 + + model_loss = ( + rpn_score_loss + rpn_box_loss + rpn_center_loss + + frcnn_class_loss + frcnn_box_loss + box_score_loss + + mask_loss) + + l2_regularization_loss = self.weight_decay_loss(trainable_variables) + total_loss = model_loss + l2_regularization_loss + return { + 'total_loss': total_loss, + 'loss': total_loss, + 'fast_rcnn_class_loss': frcnn_class_loss, + 'fast_rcnn_box_loss': frcnn_box_loss, + 'fast_rcnn_box_score_loss': box_score_loss, + 'mask_loss': mask_loss, + 'model_loss': model_loss, + 'l2_regularization_loss': l2_regularization_loss, + 'rpn_score_loss': rpn_score_loss, + 'rpn_box_loss': rpn_box_loss, + 'rpn_center_loss': rpn_center_loss, + } + + return _total_loss_fn + + def build_input_layers(self, params, mode): + is_training = mode == mode_keys.TRAIN + input_shape = ( + params.olnmask_parser.output_size + + [params.olnmask_parser.num_channels]) + if is_training: + batch_size = params.train.batch_size + input_layer = { + 'image': + tf.keras.layers.Input( + shape=input_shape, + batch_size=batch_size, + name='image', + dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), + 'image_info': + tf.keras.layers.Input( + shape=[4, 2], + batch_size=batch_size, + name='image_info', + ), + 'gt_boxes': + tf.keras.layers.Input( + shape=[params.olnmask_parser.max_num_instances, 4], + batch_size=batch_size, + name='gt_boxes'), + 'gt_classes': + tf.keras.layers.Input( + shape=[params.olnmask_parser.max_num_instances], + batch_size=batch_size, + name='gt_classes', + dtype=tf.int64), + } + if self._include_mask: + input_layer['gt_masks'] = tf.keras.layers.Input( + shape=[ + params.olnmask_parser.max_num_instances, + params.olnmask_parser.mask_crop_size, + params.olnmask_parser.mask_crop_size + ], + batch_size=batch_size, + name='gt_masks') + else: + batch_size = params.eval.batch_size + input_layer = { + 'image': + tf.keras.layers.Input( + shape=input_shape, + batch_size=batch_size, + name='image', + dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), + 'image_info': + tf.keras.layers.Input( + shape=[4, 2], + batch_size=batch_size, + name='image_info', + ), + } + return input_layer + + def build_model(self, params, mode): + if self._keras_model is None: + input_layers = self.build_input_layers(self._params, mode) + outputs = self.model_outputs(input_layers, mode) + + model = tf.keras.models.Model( + inputs=input_layers, outputs=outputs, name='olnmask') + assert model is not None, 'Fail to build tf.keras.Model.' + model.optimizer = self.build_optimizer() + self._keras_model = model + + return self._keras_model diff --git a/official/vision/detection/modeling/optimizers.py b/official/vision/detection/modeling/optimizers.py index fd51bb59f579b3de027cba26ef3bee0e67d0c74f..8b098c9f6456f77e720af387ec3a31ddb4ff2947 100644 --- a/official/vision/detection/modeling/optimizers.py +++ b/official/vision/detection/modeling/optimizers.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Optimizers.""" from __future__ import absolute_import diff --git a/official/vision/detection/modeling/retinanet_model.py b/official/vision/detection/modeling/retinanet_model.py index 270cd14d27f16d5d0c4f5f0e8c902091a964f7d5..7a0a307c27ceb035b4f4c752ccbb1cd4ead4da29 100644 --- a/official/vision/detection/modeling/retinanet_model.py +++ b/official/vision/detection/modeling/retinanet_model.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Model defination for the RetinaNet Model.""" from __future__ import absolute_import @@ -20,7 +20,6 @@ from __future__ import print_function import tensorflow as tf -from tensorflow.python.keras import backend from official.vision.detection.dataloader import mode_keys from official.vision.detection.evaluation import factory as eval_factory from official.vision.detection.modeling import base_model @@ -52,18 +51,15 @@ class RetinanetModel(base_model.Model): # Predict function. self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator( - params.architecture.min_level, - params.architecture.max_level, + params.architecture.min_level, params.architecture.max_level, params.postprocess) self._transpose_input = params.train.transpose_input assert not self._transpose_input, 'Transpose input is not supported.' # Input layer. - input_shape = ( - params.retinanet_parser.output_size + - [params.retinanet_parser.num_channels]) self._input_layer = tf.keras.layers.Input( - shape=input_shape, name='', + shape=(None, None, params.retinanet_parser.num_channels), + name='', dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32) def build_outputs(self, inputs, mode): @@ -120,14 +116,13 @@ class RetinanetModel(base_model.Model): def build_model(self, params, mode=None): if self._keras_model is None: - with backend.get_graph().as_default(): - outputs = self.model_outputs(self._input_layer, mode) + outputs = self.model_outputs(self._input_layer, mode) - model = tf.keras.models.Model( - inputs=self._input_layer, outputs=outputs, name='retinanet') - assert model is not None, 'Fail to build tf.keras.Model.' - model.optimizer = self.build_optimizer() - self._keras_model = model + model = tf.keras.models.Model( + inputs=self._input_layer, outputs=outputs, name='retinanet') + assert model is not None, 'Fail to build tf.keras.Model.' + model.optimizer = self.build_optimizer() + self._keras_model = model return self._keras_model @@ -144,8 +139,8 @@ class RetinanetModel(base_model.Model): raise ValueError('"%s" is missing in outputs, requried %s found %s', field, required_label_fields, labels.keys()) boxes, scores, classes, valid_detections = self._generate_detections_fn( - outputs['box_outputs'], outputs['cls_outputs'], - labels['anchor_boxes'], labels['image_info'][:, 1:2, :]) + outputs['box_outputs'], outputs['cls_outputs'], labels['anchor_boxes'], + labels['image_info'][:, 1:2, :]) # Discards the old output tensors to save memory. The `cls_outputs` and # `box_outputs` are pretty big and could potentiall lead to memory issue. outputs = { diff --git a/official/vision/detection/modeling/shapemask_model.py b/official/vision/detection/modeling/shapemask_model.py index 174187ed02ae7a7617f259974d64b1906a3d16e0..d197ec2fa38c167f616c0e60c5951bfb12ff94fb 100644 --- a/official/vision/detection/modeling/shapemask_model.py +++ b/official/vision/detection/modeling/shapemask_model.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Model definition for the ShapeMask Model.""" from __future__ import absolute_import @@ -20,7 +20,6 @@ from __future__ import print_function import tensorflow as tf -from tensorflow.python.keras import backend from official.vision.detection.dataloader import anchor from official.vision.detection.dataloader import mode_keys from official.vision.detection.evaluation import factory as eval_factory @@ -61,13 +60,11 @@ class ShapeMaskModel(base_model.Model): params.shapemask_loss.shape_prior_loss_weight) self._coarse_mask_loss_weight = ( params.shapemask_loss.coarse_mask_loss_weight) - self._fine_mask_loss_weight = ( - params.shapemask_loss.fine_mask_loss_weight) + self._fine_mask_loss_weight = (params.shapemask_loss.fine_mask_loss_weight) # Predict function. self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator( - params.architecture.min_level, - params.architecture.max_level, + params.architecture.min_level, params.architecture.max_level, params.postprocess) def build_outputs(self, inputs, mode): @@ -79,10 +76,8 @@ class ShapeMaskModel(base_model.Model): else: anchor_boxes = anchor.Anchor( self._params.architecture.min_level, - self._params.architecture.max_level, - self._params.anchor.num_scales, - self._params.anchor.aspect_ratios, - self._params.anchor.anchor_size, + self._params.architecture.max_level, self._params.anchor.num_scales, + self._params.anchor.aspect_ratios, self._params.anchor.anchor_size, images.get_shape().as_list()[1:3]).multilevel_boxes batch_size = tf.shape(images)[0] @@ -96,8 +91,7 @@ class ShapeMaskModel(base_model.Model): fpn_features, is_training=is_training) valid_boxes, valid_scores, valid_classes, valid_detections = ( - self._generate_detections_fn(box_outputs, cls_outputs, - anchor_boxes, + self._generate_detections_fn(box_outputs, cls_outputs, anchor_boxes, inputs['image_info'][:, 1:2, :])) image_size = images.get_shape().as_list()[1:3] @@ -124,22 +118,18 @@ class ShapeMaskModel(base_model.Model): return boxes, classes, outer_boxes boxes, classes, outer_boxes = SampledBoxesLayer()( - inputs, valid_boxes, valid_classes, - valid_outer_boxes, training=is_training) - - instance_features, prior_masks = self._shape_prior_head_fn(fpn_features, - boxes, - outer_boxes, - classes, - is_training) - coarse_mask_logits = self._coarse_mask_fn(instance_features, - prior_masks, - classes, - is_training) - fine_mask_logits = self._fine_mask_fn(instance_features, - coarse_mask_logits, - classes, - is_training) + inputs, + valid_boxes, + valid_classes, + valid_outer_boxes, + training=is_training) + + instance_features, prior_masks = self._shape_prior_head_fn( + fpn_features, boxes, outer_boxes, classes, is_training) + coarse_mask_logits = self._coarse_mask_fn(instance_features, prior_masks, + classes, is_training) + fine_mask_logits = self._fine_mask_fn(instance_features, coarse_mask_logits, + classes, is_training) model_outputs = { 'cls_outputs': cls_outputs, @@ -177,18 +167,15 @@ class ShapeMaskModel(base_model.Model): labels['num_positives']) # Adds Shapemask model losses. - shape_prior_loss = self._shapemask_prior_loss_fn( - outputs['prior_masks'], - labels['mask_targets'], - labels['mask_is_valid']) - coarse_mask_loss = self._shapemask_loss_fn( - outputs['coarse_mask_logits'], - labels['mask_targets'], - labels['mask_is_valid']) - fine_mask_loss = self._shapemask_loss_fn( - outputs['fine_mask_logits'], - labels['fine_mask_targets'], - labels['mask_is_valid']) + shape_prior_loss = self._shapemask_prior_loss_fn(outputs['prior_masks'], + labels['mask_targets'], + labels['mask_is_valid']) + coarse_mask_loss = self._shapemask_loss_fn(outputs['coarse_mask_logits'], + labels['mask_targets'], + labels['mask_is_valid']) + fine_mask_loss = self._shapemask_loss_fn(outputs['fine_mask_logits'], + labels['fine_mask_targets'], + labels['mask_is_valid']) model_loss = ( cls_loss + self._box_loss_weight * box_loss + @@ -222,64 +209,67 @@ class ShapeMaskModel(base_model.Model): if is_training: batch_size = params.train.batch_size input_layer = { - 'image': tf.keras.layers.Input( - shape=input_shape, - batch_size=batch_size, - name='image', - dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), - 'image_info': tf.keras.layers.Input( - shape=[4, 2], - batch_size=batch_size, - name='image_info'), - 'mask_classes': tf.keras.layers.Input( - shape=[params.shapemask_parser.num_sampled_masks], - batch_size=batch_size, - name='mask_classes', - dtype=tf.int64), - 'mask_outer_boxes': tf.keras.layers.Input( - shape=[params.shapemask_parser.num_sampled_masks, 4], - batch_size=batch_size, - name='mask_outer_boxes', - dtype=tf.float32), - 'mask_boxes': tf.keras.layers.Input( - shape=[params.shapemask_parser.num_sampled_masks, 4], - batch_size=batch_size, - name='mask_boxes', - dtype=tf.float32), + 'image': + tf.keras.layers.Input( + shape=input_shape, + batch_size=batch_size, + name='image', + dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), + 'image_info': + tf.keras.layers.Input( + shape=[4, 2], batch_size=batch_size, name='image_info'), + 'mask_classes': + tf.keras.layers.Input( + shape=[params.shapemask_parser.num_sampled_masks], + batch_size=batch_size, + name='mask_classes', + dtype=tf.int64), + 'mask_outer_boxes': + tf.keras.layers.Input( + shape=[params.shapemask_parser.num_sampled_masks, 4], + batch_size=batch_size, + name='mask_outer_boxes', + dtype=tf.float32), + 'mask_boxes': + tf.keras.layers.Input( + shape=[params.shapemask_parser.num_sampled_masks, 4], + batch_size=batch_size, + name='mask_boxes', + dtype=tf.float32), } else: batch_size = params.eval.batch_size input_layer = { - 'image': tf.keras.layers.Input( - shape=input_shape, - batch_size=batch_size, - name='image', - dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), - 'image_info': tf.keras.layers.Input( - shape=[4, 2], - batch_size=batch_size, - name='image_info'), + 'image': + tf.keras.layers.Input( + shape=input_shape, + batch_size=batch_size, + name='image', + dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), + 'image_info': + tf.keras.layers.Input( + shape=[4, 2], batch_size=batch_size, name='image_info'), } return input_layer def build_model(self, params, mode): if self._keras_model is None: input_layers = self.build_input_layers(self._params, mode) - with backend.get_graph().as_default(): - outputs = self.model_outputs(input_layers, mode) + outputs = self.model_outputs(input_layers, mode) - model = tf.keras.models.Model( - inputs=input_layers, outputs=outputs, name='shapemask') - assert model is not None, 'Fail to build tf.keras.Model.' - model.optimizer = self.build_optimizer() - self._keras_model = model + model = tf.keras.models.Model( + inputs=input_layers, outputs=outputs, name='shapemask') + assert model is not None, 'Fail to build tf.keras.Model.' + model.optimizer = self.build_optimizer() + self._keras_model = model return self._keras_model def post_processing(self, labels, outputs): - required_output_fields = ['num_detections', 'detection_boxes', - 'detection_classes', 'detection_masks', - 'detection_scores'] + required_output_fields = [ + 'num_detections', 'detection_boxes', 'detection_classes', + 'detection_masks', 'detection_scores' + ] for field in required_output_fields: if field not in outputs: diff --git a/official/vision/detection/ops/__init__.py b/official/vision/detection/ops/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/ops/__init__.py +++ b/official/vision/detection/ops/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/ops/nms.py b/official/vision/detection/ops/nms.py index bc516e5991a824b1d2f8e0261750cde2481fda2f..a81ff1e8fcd44ddd35dcf1a3bf7a9dad1831c76f 100644 --- a/official/vision/detection/ops/nms.py +++ b/official/vision/detection/ops/nms.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tensorflow implementation of non max suppression.""" from __future__ import absolute_import @@ -22,7 +22,6 @@ import tensorflow as tf from official.vision.detection.utils import box_utils - NMS_TILE_SIZE = 512 @@ -106,9 +105,7 @@ def _suppression_loop_body(boxes, iou_threshold, output_size, idx): return boxes, iou_threshold, output_size, idx + 1 -def sorted_non_max_suppression_padded(scores, - boxes, - max_output_size, +def sorted_non_max_suppression_padded(scores, boxes, max_output_size, iou_threshold): """A wrapper that handles non-maximum suppression. @@ -177,19 +174,18 @@ def sorted_non_max_suppression_padded(scores, idx < num_boxes // NMS_TILE_SIZE) selected_boxes, _, output_size, _ = tf.while_loop( - _loop_cond, _suppression_loop_body, [ - boxes, iou_threshold, - tf.zeros([batch_size], tf.int32), - tf.constant(0) - ]) + _loop_cond, _suppression_loop_body, + [boxes, iou_threshold, + tf.zeros([batch_size], tf.int32), + tf.constant(0)]) idx = num_boxes - tf.cast( tf.nn.top_k( tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) * tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0], tf.int32) idx = tf.minimum(idx, num_boxes - 1) - idx = tf.reshape( - idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1]) + idx = tf.reshape(idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), + [-1]) boxes = tf.reshape( tf.gather(tf.reshape(boxes, [-1, 4]), idx), [batch_size, max_output_size, 4]) diff --git a/official/vision/detection/ops/postprocess_ops.py b/official/vision/detection/ops/postprocess_ops.py index 2cb06c34ab114d171f30cb52e69d8dc73996e302..ba0f3c40664381c4fa76e4d617721edccbaab7d7 100644 --- a/official/vision/detection/ops/postprocess_ops.py +++ b/official/vision/detection/ops/postprocess_ops.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Post-processing model outputs to generate detection.""" from __future__ import absolute_import @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import functools + import tensorflow as tf from official.vision.detection.ops import nms @@ -202,15 +203,14 @@ def _generate_detections_per_image(boxes, scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes)) boxes_i = tf.gather(boxes_i, indices) - (nmsed_indices_i, - nmsed_num_valid_i) = tf.image.non_max_suppression_padded( - tf.cast(boxes_i, tf.float32), - tf.cast(scores_i, tf.float32), - max_total_size, - iou_threshold=nms_iou_threshold, - score_threshold=score_threshold, - pad_to_max_output_size=True, - name='nms_detections_' + str(i)) + (nmsed_indices_i, nmsed_num_valid_i) = tf.image.non_max_suppression_padded( + tf.cast(boxes_i, tf.float32), + tf.cast(scores_i, tf.float32), + max_total_size, + iou_threshold=nms_iou_threshold, + score_threshold=score_threshold, + pad_to_max_output_size=True, + name='nms_detections_' + str(i)) nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i) nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i) # Sets scores of invalid boxes to -1. @@ -235,11 +235,8 @@ def _generate_detections_per_image(boxes, return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections -def _generate_detections_batched(boxes, - scores, - max_total_size, - nms_iou_threshold, - score_threshold): +def _generate_detections_batched(boxes, scores, max_total_size, + nms_iou_threshold, score_threshold): """Generates detected boxes with scores and classes for one-stage detector. The function takes output of multi-level ConvNets and anchor boxes and @@ -247,19 +244,20 @@ def _generate_detections_batched(boxes, supported on TPU currently. Args: - boxes: a tensor with shape [batch_size, N, num_classes, 4] or - [batch_size, N, 1, 4], which box predictions on all feature levels. The N - is the number of total anchors on all levels. - scores: a tensor with shape [batch_size, N, num_classes], which - stacks class probability on all feature levels. The N is the number of - total anchors on all levels. The num_classes is the number of classes - predicted by the model. Note that the class_outputs here is the raw score. + boxes: a tensor with shape [batch_size, N, num_classes, 4] or [batch_size, + N, 1, 4], which box predictions on all feature levels. The N is the number + of total anchors on all levels. + scores: a tensor with shape [batch_size, N, num_classes], which stacks class + probability on all feature levels. The N is the number of total anchors on + all levels. The num_classes is the number of classes predicted by the + model. Note that the class_outputs here is the raw score. max_total_size: a scalar representing maximum number of boxes retained over all classes. nms_iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. score_threshold: a float representing the threshold for deciding when to remove boxes based on score. + Returns: nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4] representing top detected boxes in [y1, x1, y2, x2]. @@ -285,22 +283,24 @@ def _generate_detections_batched(boxes, max_total_size=max_total_size, iou_threshold=nms_iou_threshold, score_threshold=score_threshold, - pad_per_class=False,) + pad_per_class=False, + ) # De-normalizes box cooridinates. nmsed_boxes *= normalizer nmsed_classes = tf.cast(nmsed_classes, tf.int32) return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections -class MultilevelDetectionGenerator(object): +class MultilevelDetectionGenerator(tf.keras.layers.Layer): """Generates detected boxes with scores and classes for one-stage detector.""" def __init__(self, min_level, max_level, params): self._min_level = min_level self._max_level = max_level self._generate_detections = generate_detections_factory(params) + super(MultilevelDetectionGenerator, self).__init__(autocast=False) - def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape): + def call(self, box_outputs, class_outputs, anchor_boxes, image_shape): # Collects outputs from all levels into a list. boxes = [] scores = [] @@ -338,13 +338,14 @@ class MultilevelDetectionGenerator(object): return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections -class GenericDetectionGenerator(object): +class GenericDetectionGenerator(tf.keras.layers.Layer): """Generates the final detected boxes with scores and classes.""" def __init__(self, params): + super(GenericDetectionGenerator, self).__init__(autocast=False) self._generate_detections = generate_detections_factory(params) - def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape): + def call(self, box_outputs, class_outputs, anchor_boxes, image_shape): """Generate final detections. Args: @@ -382,16 +383,13 @@ class GenericDetectionGenerator(object): box_outputs = tf.reshape( box_outputs, tf.stack([batch_size, num_locations, num_classes, 4], axis=-1)) - box_outputs = tf.slice( - box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1]) + box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1]) anchor_boxes = tf.tile( tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1]) - box_outputs = tf.reshape( - box_outputs, - tf.stack([batch_size, num_detections, 4], axis=-1)) + box_outputs = tf.reshape(box_outputs, + tf.stack([batch_size, num_detections, 4], axis=-1)) anchor_boxes = tf.reshape( - anchor_boxes, - tf.stack([batch_size, num_detections, 4], axis=-1)) + anchor_boxes, tf.stack([batch_size, num_detections, 4], axis=-1)) # Box decoding. decoded_boxes = box_utils.decode_boxes( @@ -411,3 +409,89 @@ class GenericDetectionGenerator(object): nmsed_classes += 1 return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections + + +class OlnDetectionGenerator(GenericDetectionGenerator): + """Generates the final detected boxes with scores and classes.""" + + def __call__(self, box_outputs, class_outputs, anchor_boxes, image_shape, + is_single_fg_score=False, keep_nms=True): + """Generate final detections for Object Localization Network (OLN). + + Args: + box_outputs: a tensor of shape of [batch_size, K, num_classes * 4] + representing the class-specific box coordinates relative to anchors. + class_outputs: a tensor of shape of [batch_size, K, num_classes] + representing the class logits before applying score activiation. + anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the + corresponding anchor boxes w.r.t `box_outputs`. + image_shape: a tensor of shape of [batch_size, 2] storing the image height + and width w.r.t. the scaled image, i.e. the same image space as + `box_outputs` and `anchor_boxes`. + is_single_fg_score: a Bool indicator of whether class_outputs includes the + background scores concatenated or not. By default, class_outputs is a + concatenation of both scores for the foreground and background. That is, + scores_without_bg=False. + keep_nms: a Bool indicator of whether to perform NMS or not. + + Returns: + nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4] + representing top detected boxes in [y1, x1, y2, x2]. + nms_scores: `float` Tensor of shape [batch_size, max_total_size] + representing sorted confidence scores for detected boxes. The values are + between [0, 1]. + nms_classes: `int` Tensor of shape [batch_size, max_total_size] + representing classes for detected boxes. + valid_detections: `int` Tensor of shape [batch_size] only the top + `valid_detections` boxes are valid detections. + """ + if is_single_fg_score: + # Concatenates dummy background scores. + dummy_bg_scores = tf.zeros_like(class_outputs) + class_outputs = tf.stack([dummy_bg_scores, class_outputs], -1) + else: + class_outputs = tf.nn.softmax(class_outputs, axis=-1) + + # Removes the background class. + class_outputs_shape = tf.shape(class_outputs) + batch_size = class_outputs_shape[0] + num_locations = class_outputs_shape[1] + num_classes = class_outputs_shape[-1] + num_detections = num_locations * (num_classes - 1) + + class_outputs = tf.slice(class_outputs, [0, 0, 1], [-1, -1, -1]) + box_outputs = tf.reshape( + box_outputs, + tf.stack([batch_size, num_locations, num_classes, 4], axis=-1)) + box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1]) + anchor_boxes = tf.tile( + tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1]) + box_outputs = tf.reshape(box_outputs, + tf.stack([batch_size, num_detections, 4], axis=-1)) + anchor_boxes = tf.reshape( + anchor_boxes, tf.stack([batch_size, num_detections, 4], axis=-1)) + + # Box decoding. For RPN outputs, box_outputs are all zeros. + decoded_boxes = box_utils.decode_boxes( + box_outputs, anchor_boxes, weights=[10.0, 10.0, 5.0, 5.0]) + + # Box clipping + decoded_boxes = box_utils.clip_boxes(decoded_boxes, image_shape) + + decoded_boxes = tf.reshape( + decoded_boxes, + tf.stack([batch_size, num_locations, num_classes - 1, 4], axis=-1)) + + if keep_nms: + nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( + self._generate_detections(decoded_boxes, class_outputs)) + # Adds 1 to offset the background class which has index 0. + nmsed_classes += 1 + else: + nmsed_boxes = decoded_boxes[:, :, 0, :] + nmsed_scores = class_outputs[:, :, 0] + nmsed_classes = tf.cast(tf.ones_like(nmsed_scores), tf.int32) + valid_detections = tf.cast( + tf.reduce_sum(tf.ones_like(nmsed_scores), axis=-1), tf.int32) + + return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections diff --git a/official/vision/detection/ops/roi_ops.py b/official/vision/detection/ops/roi_ops.py index a21bc7b2882de39b12bc76dacd37047fabac1766..a198d0ee204996e695f0e58bacada4bf3154ac98 100644 --- a/official/vision/detection/ops/roi_ops.py +++ b/official/vision/detection/ops/roi_ops.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """ROI-related ops.""" from __future__ import absolute_import @@ -56,8 +56,8 @@ def multilevel_propose_rois(rpn_boxes, rpn_scores: a dict with keys representing FPN levels and values representing logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. anchor_boxes: a dict with keys representing FPN levels and values - representing anchor box tensors of shape - [batch_size, feature_h, feature_w, num_anchors * 4]. + representing anchor box tensors of shape [batch_size, feature_h, + feature_w, num_anchors * 4]. image_shape: a tensor of shape [batch_size, 2] where the last dimension are [height, width] of the scaled image. rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to @@ -112,17 +112,14 @@ def multilevel_propose_rois(rpn_boxes, this_level_scores = tf.sigmoid(this_level_scores) if decode_boxes: - this_level_boxes = box_utils.decode_boxes( - this_level_boxes, this_level_anchors) + this_level_boxes = box_utils.decode_boxes(this_level_boxes, + this_level_anchors) if clip_boxes: - this_level_boxes = box_utils.clip_boxes( - this_level_boxes, image_shape) + this_level_boxes = box_utils.clip_boxes(this_level_boxes, image_shape) if rpn_min_size_threshold > 0.0: this_level_boxes, this_level_scores = box_utils.filter_boxes( - this_level_boxes, - this_level_scores, - image_shape, + this_level_boxes, this_level_scores, image_shape, rpn_min_size_threshold) this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k) @@ -142,8 +139,9 @@ def multilevel_propose_rois(rpn_boxes, else: if rpn_score_threshold > 0.0: this_level_boxes, this_level_scores = ( - box_utils.filter_boxes_by_scores( - this_level_boxes, this_level_scores, rpn_score_threshold)) + box_utils.filter_boxes_by_scores(this_level_boxes, + this_level_scores, + rpn_score_threshold)) this_level_boxes, this_level_scores = box_utils.top_k_boxes( this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k) this_level_roi_scores, this_level_rois = ( @@ -154,9 +152,7 @@ def multilevel_propose_rois(rpn_boxes, iou_threshold=rpn_nms_threshold)) else: this_level_rois, this_level_roi_scores = box_utils.top_k_boxes( - this_level_rois, - this_level_scores, - k=this_level_post_nms_top_k) + this_level_rois, this_level_scores, k=this_level_post_nms_top_k) rois.append(this_level_rois) roi_scores.append(this_level_roi_scores) @@ -174,7 +170,7 @@ def multilevel_propose_rois(rpn_boxes, return selected_rois, selected_roi_scores -class ROIGenerator(object): +class ROIGenerator(tf.keras.layers.Layer): """Proposes RoIs for the second stage processing.""" def __init__(self, params): @@ -189,8 +185,9 @@ class ROIGenerator(object): self._test_rpn_score_threshold = params.test_rpn_score_threshold self._test_rpn_min_size_threshold = params.test_rpn_min_size_threshold self._use_batched_nms = params.use_batched_nms + super(ROIGenerator, self).__init__(autocast=False) - def __call__(self, boxes, scores, anchor_boxes, image_shape, is_training): + def call(self, boxes, scores, anchor_boxes, image_shape, is_training): """Generates RoI proposals. Args: @@ -199,8 +196,8 @@ class ROIGenerator(object): scores: a dict with keys representing FPN levels and values representing logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. anchor_boxes: a dict with keys representing FPN levels and values - representing anchor box tensors of shape - [batch_size, feature_h, feature_w, num_anchors * 4]. + representing anchor box tensors of shape [batch_size, feature_h, + feature_w, num_anchors * 4]. image_shape: a tensor of shape [batch_size, 2] where the last dimension are [height, width] of the scaled image. is_training: a bool indicating whether it is in training or inference @@ -220,18 +217,252 @@ class ROIGenerator(object): scores, anchor_boxes, image_shape, - rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k if is_training - else self._test_rpn_pre_nms_top_k), - rpn_post_nms_top_k=(self._rpn_post_nms_top_k if is_training - else self._test_rpn_post_nms_top_k), - rpn_nms_threshold=(self._rpn_nms_threshold if is_training - else self._test_rpn_nms_threshold), - rpn_score_threshold=(self._rpn_score_threshold if is_training - else self._test_rpn_score_threshold), - rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training - else self._test_rpn_min_size_threshold), + rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k + if is_training else self._test_rpn_pre_nms_top_k), + rpn_post_nms_top_k=(self._rpn_post_nms_top_k + if is_training else self._test_rpn_post_nms_top_k), + rpn_nms_threshold=(self._rpn_nms_threshold + if is_training else self._test_rpn_nms_threshold), + rpn_score_threshold=(self._rpn_score_threshold if is_training else + self._test_rpn_score_threshold), + rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training else + self._test_rpn_min_size_threshold), decode_boxes=True, clip_boxes=True, use_batched_nms=self._use_batched_nms, apply_sigmoid_to_score=True) return proposed_rois, proposed_roi_scores + + +class OlnROIGenerator(ROIGenerator): + """Proposes RoIs for the second stage processing.""" + + def __call__(self, boxes, scores, anchor_boxes, image_shape, is_training, + is_box_lrtb=False, object_scores=None): + """Generates RoI proposals. + + Args: + boxes: a dict with keys representing FPN levels and values representing + box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4]. + scores: a dict with keys representing FPN levels and values representing + logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. + anchor_boxes: a dict with keys representing FPN levels and values + representing anchor box tensors of shape [batch_size, feature_h, + feature_w, num_anchors * 4]. + image_shape: a tensor of shape [batch_size, 2] where the last dimension + are [height, width] of the scaled image. + is_training: a bool indicating whether it is in training or inference + mode. + is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top, + bottom) format. + object_scores: another objectness score (e.g., centerness). In OLN, we use + object_scores=centerness as a replacement of the scores at each level. + A dict with keys representing FPN levels and values representing logit + tensors of shape [batch_size, feature_h, feature_w, num_anchors]. + + Returns: + proposed_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4], + representing the box coordinates of the proposed RoIs w.r.t. the + scaled image. + proposed_roi_scores: a tensor of shape + [batch_size, rpn_post_nms_top_k, 1], representing the scores of the + proposed RoIs. + + """ + proposed_rois, proposed_roi_scores = self.oln_multilevel_propose_rois( + boxes, + scores, + anchor_boxes, + image_shape, + rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k + if is_training else self._test_rpn_pre_nms_top_k), + rpn_post_nms_top_k=(self._rpn_post_nms_top_k + if is_training else self._test_rpn_post_nms_top_k), + rpn_nms_threshold=(self._rpn_nms_threshold + if is_training else self._test_rpn_nms_threshold), + rpn_score_threshold=(self._rpn_score_threshold if is_training else + self._test_rpn_score_threshold), + rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training else + self._test_rpn_min_size_threshold), + decode_boxes=True, + clip_boxes=True, + use_batched_nms=self._use_batched_nms, + apply_sigmoid_to_score=True, + is_box_lrtb=is_box_lrtb, + rpn_object_scores=object_scores,) + return proposed_rois, proposed_roi_scores + + def oln_multilevel_propose_rois(self, + rpn_boxes, + rpn_scores, + anchor_boxes, + image_shape, + rpn_pre_nms_top_k=2000, + rpn_post_nms_top_k=1000, + rpn_nms_threshold=0.7, + rpn_score_threshold=0.0, + rpn_min_size_threshold=0.0, + decode_boxes=True, + clip_boxes=True, + use_batched_nms=False, + apply_sigmoid_to_score=True, + is_box_lrtb=False, + rpn_object_scores=None,): + """Proposes RoIs given a group of candidates from different FPN levels. + + The following describes the steps: + 1. For each individual level: + a. Adjust scores for each level if specified by rpn_object_scores. + b. Apply sigmoid transform if specified. + c. Decode boxes (either of xyhw or left-right-top-bottom format) if + specified. + d. Clip boxes if specified. + e. Filter small boxes and those fall outside image if specified. + f. Apply pre-NMS filtering including pre-NMS top k and score + thresholding. + g. Apply NMS. + 2. Aggregate post-NMS boxes from each level. + 3. Apply an overall top k to generate the final selected RoIs. + + Args: + rpn_boxes: a dict with keys representing FPN levels and values + representing box tenors of shape [batch_size, feature_h, feature_w, + num_anchors * 4]. + rpn_scores: a dict with keys representing FPN levels and values + representing logit tensors of shape [batch_size, feature_h, feature_w, + num_anchors]. + anchor_boxes: a dict with keys representing FPN levels and values + representing anchor box tensors of shape [batch_size, feature_h, + feature_w, num_anchors * 4]. + image_shape: a tensor of shape [batch_size, 2] where the last dimension + are [height, width] of the scaled image. + rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to + keep before applying NMS. Default: 2000. + rpn_post_nms_top_k: an integer of top scoring RPN proposals *in total* to + keep after applying NMS. Default: 1000. + rpn_nms_threshold: a float between 0 and 1 representing the IoU threshold + used for NMS. If 0.0, no NMS is applied. Default: 0.7. + rpn_score_threshold: a float between 0 and 1 representing the minimal box + score to keep before applying NMS. This is often used as a pre-filtering + step for better performance. If 0, no filtering is applied. Default: 0. + rpn_min_size_threshold: a float representing the minimal box size in each + side (w.r.t. the scaled image) to keep before applying NMS. This is + often used as a pre-filtering step for better performance. If 0, no + filtering is applied. Default: 0. + decode_boxes: a boolean indicating whether `rpn_boxes` needs to be decoded + using `anchor_boxes`. If False, use `rpn_boxes` directly and ignore + `anchor_boxes`. Default: True. + clip_boxes: a boolean indicating whether boxes are first clipped to the + scaled image size before appliying NMS. If False, no clipping is applied + and `image_shape` is ignored. Default: True. + use_batched_nms: a boolean indicating whether NMS is applied in batch + using `tf.image.combined_non_max_suppression`. Currently only available + in CPU/GPU. Default: False. + apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to + `rpn_scores` before applying NMS. Default: True. + is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top, + bottom) format. + rpn_object_scores: a predicted objectness score (e.g., centerness). In + OLN, we use object_scores=centerness as a replacement of the scores at + each level. A dict with keys representing FPN levels and values + representing logit tensors of shape [batch_size, feature_h, feature_w, + num_anchors]. + + Returns: + selected_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4], + representing the box coordinates of the selected proposals w.r.t. the + scaled image. + selected_roi_scores: a tensor of shape [batch_size, rpn_post_nms_top_k, + 1],representing the scores of the selected proposals. + """ + with tf.name_scope('multilevel_propose_rois'): + rois = [] + roi_scores = [] + image_shape = tf.expand_dims(image_shape, axis=1) + for level in sorted(rpn_scores.keys()): + with tf.name_scope('level_%d' % level): + _, feature_h, feature_w, num_anchors_per_location = ( + rpn_scores[level].get_shape().as_list()) + + num_boxes = feature_h * feature_w * num_anchors_per_location + this_level_scores = tf.reshape(rpn_scores[level], [-1, num_boxes]) + this_level_boxes = tf.reshape(rpn_boxes[level], [-1, num_boxes, 4]) + this_level_anchors = tf.cast( + tf.reshape(anchor_boxes[level], [-1, num_boxes, 4]), + dtype=this_level_scores.dtype) + + if rpn_object_scores: + this_level_object_scores = rpn_object_scores[level] + this_level_object_scores = tf.reshape(this_level_object_scores, + [-1, num_boxes]) + this_level_object_scores = tf.cast(this_level_object_scores, + this_level_scores.dtype) + this_level_scores = this_level_object_scores + + if apply_sigmoid_to_score: + this_level_scores = tf.sigmoid(this_level_scores) + + if decode_boxes: + if is_box_lrtb: # Box in left-right-top-bottom format. + this_level_boxes = box_utils.decode_boxes_lrtb( + this_level_boxes, this_level_anchors) + else: # Box in standard x-y-h-w format. + this_level_boxes = box_utils.decode_boxes( + this_level_boxes, this_level_anchors) + + if clip_boxes: + this_level_boxes = box_utils.clip_boxes( + this_level_boxes, image_shape) + + if rpn_min_size_threshold > 0.0: + this_level_boxes, this_level_scores = box_utils.filter_boxes( + this_level_boxes, this_level_scores, image_shape, + rpn_min_size_threshold) + + this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k) + this_level_post_nms_top_k = min(num_boxes, rpn_post_nms_top_k) + if rpn_nms_threshold > 0.0: + if use_batched_nms: + this_level_rois, this_level_roi_scores, _, _ = ( + tf.image.combined_non_max_suppression( + tf.expand_dims(this_level_boxes, axis=2), + tf.expand_dims(this_level_scores, axis=-1), + max_output_size_per_class=this_level_pre_nms_top_k, + max_total_size=this_level_post_nms_top_k, + iou_threshold=rpn_nms_threshold, + score_threshold=rpn_score_threshold, + pad_per_class=False, + clip_boxes=False)) + else: + if rpn_score_threshold > 0.0: + this_level_boxes, this_level_scores = ( + box_utils.filter_boxes_by_scores(this_level_boxes, + this_level_scores, + rpn_score_threshold)) + this_level_boxes, this_level_scores = box_utils.top_k_boxes( + this_level_boxes, this_level_scores, + k=this_level_pre_nms_top_k) + this_level_roi_scores, this_level_rois = ( + nms.sorted_non_max_suppression_padded( + this_level_scores, + this_level_boxes, + max_output_size=this_level_post_nms_top_k, + iou_threshold=rpn_nms_threshold)) + else: + this_level_rois, this_level_roi_scores = box_utils.top_k_boxes( + this_level_rois, this_level_scores, k=this_level_post_nms_top_k) + + rois.append(this_level_rois) + roi_scores.append(this_level_roi_scores) + + all_rois = tf.concat(rois, axis=1) + all_roi_scores = tf.concat(roi_scores, axis=1) + + with tf.name_scope('top_k_rois'): + _, num_valid_rois = all_roi_scores.get_shape().as_list() + overall_top_k = min(num_valid_rois, rpn_post_nms_top_k) + + selected_rois, selected_roi_scores = box_utils.top_k_boxes( + all_rois, all_roi_scores, k=overall_top_k) + + return selected_rois, selected_roi_scores diff --git a/official/vision/detection/ops/spatial_transform_ops.py b/official/vision/detection/ops/spatial_transform_ops.py index ae60d20f0e8c8454bd7972e851c33b6dca56ed90..4b7d7ecde48ca8dd1eeb4f7356a1642583b1754d 100644 --- a/official/vision/detection/ops/spatial_transform_ops.py +++ b/official/vision/detection/ops/spatial_transform_ops.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Functions to performa spatial transformation for Tensor.""" from __future__ import absolute_import @@ -20,7 +20,6 @@ from __future__ import print_function import tensorflow as tf - _EPSILON = 1e-8 @@ -30,6 +29,7 @@ def nearest_upsampling(data, scale): Args: data: A tensor with a shape of [batch, height_in, width_in, channels]. scale: An integer multiple to scale resolution of input data. + Returns: data_up: A tensor with a shape of [batch, height_in*scale, width_in*scale, channels]. Same dtype as input @@ -382,8 +382,7 @@ def multilevel_crop_and_resize(features, boxes, output_size=7): areas_sqrt = tf.sqrt(box_height * box_width) levels = tf.cast( tf.math.floordiv( - tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) + - 4.0, + tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) + 4.0, dtype=tf.int32) # Maps levels between [min_level, max_level]. levels = tf.minimum(max_level, tf.maximum(levels, min_level)) @@ -395,9 +394,12 @@ def multilevel_crop_and_resize(features, boxes, output_size=7): boxes /= tf.expand_dims(scale_to_level, axis=2) box_width /= scale_to_level box_height /= scale_to_level - boxes = tf.concat([boxes[:, :, 0:2], - tf.expand_dims(box_height, -1), - tf.expand_dims(box_width, -1)], axis=-1) + boxes = tf.concat([ + boxes[:, :, 0:2], + tf.expand_dims(box_height, -1), + tf.expand_dims(box_width, -1) + ], + axis=-1) # Maps levels to [0, max_level-min_level]. levels -= min_level @@ -464,12 +466,12 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels, Args: - features: a float tensor of shape [batch_size, num_levels, - max_feature_size, max_feature_size, num_downsample_channels]. - level_boxes: a float Tensor of the level boxes to crop from. - [batch_size, num_instances, 4]. + features: a float tensor of shape [batch_size, num_levels, max_feature_size, + max_feature_size, num_downsample_channels]. + level_boxes: a float Tensor of the level boxes to crop from. [batch_size, + num_instances, 4]. detection_prior_levels: an int Tensor of instance assigned level of shape - [batch_size, num_instances]. + [batch_size, num_instances]. min_mask_level: minimum FPN level to crop mask feature from. mask_crop_size: an int of mask crop size. @@ -478,8 +480,8 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels, mask_crop_size, mask_crop_size, num_downsample_channels]. This is the instance feature crop. """ - (batch_size, num_levels, max_feature_size, - _, num_downsample_channels) = features.get_shape().as_list() + (batch_size, num_levels, max_feature_size, _, + num_downsample_channels) = features.get_shape().as_list() _, num_of_instances, _ = level_boxes.get_shape().as_list() level_boxes = tf.cast(level_boxes, tf.int32) assert num_of_instances == detection_prior_levels.get_shape().as_list()[1] @@ -503,32 +505,25 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels, indices = tf.reshape( tf.tile( tf.reshape( - tf.range(batch_size) * batch_dim_size, - [batch_size, 1, 1, 1]), - [1, num_of_instances, - mask_crop_size, mask_crop_size]) + - tf.tile( - tf.reshape(levels * level_dim_size, - [batch_size, num_of_instances, 1, 1]), - [1, 1, mask_crop_size, mask_crop_size]) + - tf.tile( - tf.reshape(y_indices * height_dim_size, - [batch_size, num_of_instances, - mask_crop_size, 1]), - [1, 1, 1, mask_crop_size]) + + tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]), + [1, num_of_instances, mask_crop_size, mask_crop_size]) + tf.tile( + tf.reshape(levels * level_dim_size, + [batch_size, num_of_instances, 1, 1]), + [1, 1, mask_crop_size, mask_crop_size]) + tf.tile( + tf.reshape(y_indices * height_dim_size, + [batch_size, num_of_instances, mask_crop_size, 1]), + [1, 1, 1, mask_crop_size]) + tf.tile( tf.reshape(x_indices, - [batch_size, num_of_instances, - 1, mask_crop_size]), + [batch_size, num_of_instances, 1, mask_crop_size]), [1, 1, mask_crop_size, 1]), [-1]) - features_r2 = tf.reshape(features, - [-1, num_downsample_channels]) + features_r2 = tf.reshape(features, [-1, num_downsample_channels]) crop_features = tf.reshape( - tf.gather(features_r2, indices), - [batch_size * num_of_instances, - mask_crop_size, mask_crop_size, - num_downsample_channels]) + tf.gather(features_r2, indices), [ + batch_size * num_of_instances, mask_crop_size, mask_crop_size, + num_downsample_channels + ]) return crop_features @@ -546,9 +541,9 @@ def crop_mask_in_target_box(masks, boxes: a float tensor representing box cooridnates that tightly enclose masks with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A box is represented by [ymin, xmin, ymax, xmax]. - target_boxes: a float tensor representing target box cooridnates for - masks with a shape of [batch_size, num_masks, 4] in un-normalized - coordinates. A box is represented by [ymin, xmin, ymax, xmax]. + target_boxes: a float tensor representing target box cooridnates for masks + with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A + box is represented by [ymin, xmin, ymax, xmax]. output_size: A scalar to indicate the output crop size. It currently only supports to output a square shape outputs. sample_offset: a float number in [0, 1] indicates the subpixel sample offset @@ -561,10 +556,10 @@ def crop_mask_in_target_box(masks, """ with tf.name_scope('crop_mask_in_target_box'): batch_size, num_masks, height, width = masks.get_shape().as_list() - masks = tf.reshape(masks, [batch_size*num_masks, height, width, 1]) + masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1]) # Pad zeros on the boundary of masks. masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4) - masks = tf.reshape(masks, [batch_size, num_masks, height+4, width+4, 1]) + masks = tf.reshape(masks, [batch_size, num_masks, height + 4, width + 4, 1]) # Projects target box locations and sizes to corresponding cropped # mask coordinates. @@ -572,10 +567,10 @@ def crop_mask_in_target_box(masks, value=boxes, num_or_size_splits=4, axis=2) bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split( value=target_boxes, num_or_size_splits=4, axis=2) - y_transform = (bb_y_min - gt_y_min) * height / ( - gt_y_max - gt_y_min + _EPSILON) + 2 - x_transform = (bb_x_min - gt_x_min) * height / ( - gt_x_max - gt_x_min + _EPSILON) + 2 + y_transform = (bb_y_min - gt_y_min) * height / (gt_y_max - gt_y_min + + _EPSILON) + 2 + x_transform = (bb_x_min - gt_x_min) * height / (gt_x_max - gt_x_min + + _EPSILON) + 2 h_transform = (bb_y_max - bb_y_min) * width / ( gt_y_max - gt_y_min + _EPSILON) w_transform = (bb_x_max - bb_x_min) * width / ( @@ -592,8 +587,8 @@ def crop_mask_in_target_box(masks, # Reshape tensors to have the right shape for selective_crop_and_resize. trasnformed_boxes = tf.concat( [y_transform, x_transform, h_transform, w_transform], -1) - levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]), - [batch_size, 1]) + levels = tf.tile( + tf.reshape(tf.range(num_masks), [1, num_masks]), [batch_size, 1]) cropped_masks = selective_crop_and_resize( masks, diff --git a/official/vision/detection/ops/target_ops.py b/official/vision/detection/ops/target_ops.py index 2a7d6856511f846365041527f2532c8f2b376244..c129e853be0d1ef63ab474506d59024a08ab13ff 100644 --- a/official/vision/detection/ops/target_ops.py +++ b/official/vision/detection/ops/target_ops.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Target and sampling related ops.""" from __future__ import absolute_import @@ -87,18 +87,16 @@ def box_matching(boxes, gt_boxes, gt_classes): matched_gt_boxes) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices) - matched_gt_classes = tf.where( - background_box_mask, - tf.zeros_like(matched_gt_classes), - matched_gt_classes) + matched_gt_classes = tf.where(background_box_mask, + tf.zeros_like(matched_gt_classes), + matched_gt_classes) - matched_gt_indices = tf.where( - background_box_mask, - -tf.ones_like(argmax_iou_indices), - argmax_iou_indices) + matched_gt_indices = tf.where(background_box_mask, + -tf.ones_like(argmax_iou_indices), + argmax_iou_indices) - return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, - matched_iou, iou) + return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, + iou) def assign_and_sample_proposals(proposed_boxes, @@ -121,22 +119,21 @@ def assign_and_sample_proposals(proposed_boxes, returns box_targets, class_targets, and RoIs. Args: - proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number - of proposals before groundtruth assignment. The last dimension is the - box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] - format. - gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. - The coordinates of gt_boxes are in the pixel coordinates of the scaled - image. This tensor might have padding of values -1 indicating the invalid - box coordinates. + proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of + proposals before groundtruth assignment. The last dimension is the box + coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format. + gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The + coordinates of gt_boxes are in the pixel coordinates of the scaled image. + This tensor might have padding of values -1 indicating the invalid box + coordinates. gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This tensor might have paddings with values of -1 indicating the invalid classes. num_samples_per_image: a integer represents RoI minibatch size per image. mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before sampling proposals. - fg_fraction: a float represents the target fraction of RoI minibatch that - is labeled foreground (i.e., class > 0). + fg_fraction: a float represents the target fraction of RoI minibatch that is + labeled foreground (i.e., class > 0). fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be considered foreground (if >= fg_iou_thresh). bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to @@ -163,8 +160,8 @@ def assign_and_sample_proposals(proposed_boxes, else: boxes = proposed_boxes - (matched_gt_boxes, matched_gt_classes, matched_gt_indices, - matched_iou, _) = box_matching(boxes, gt_boxes, gt_classes) + (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, + _) = box_matching(boxes, gt_boxes, gt_classes) positive_match = tf.greater(matched_iou, fg_iou_thresh) negative_match = tf.logical_and( @@ -173,10 +170,12 @@ def assign_and_sample_proposals(proposed_boxes, ignored_match = tf.less(matched_iou, 0.0) # re-assign negatively matched boxes to the background class. - matched_gt_classes = tf.where( - negative_match, tf.zeros_like(matched_gt_classes), matched_gt_classes) - matched_gt_indices = tf.where( - negative_match, tf.zeros_like(matched_gt_indices), matched_gt_indices) + matched_gt_classes = tf.where(negative_match, + tf.zeros_like(matched_gt_classes), + matched_gt_classes) + matched_gt_indices = tf.where(negative_match, + tf.zeros_like(matched_gt_indices), + matched_gt_indices) sample_candidates = tf.logical_and( tf.logical_or(positive_match, negative_match), @@ -189,8 +188,9 @@ def assign_and_sample_proposals(proposed_boxes, batch_size, _ = sample_candidates.get_shape().as_list() sampled_indicators = [] for i in range(batch_size): - sampled_indicator = sampler.subsample( - sample_candidates[i], num_samples_per_image, positive_match[i]) + sampled_indicator = sampler.subsample(sample_candidates[i], + num_samples_per_image, + positive_match[i]) sampled_indicators.append(sampled_indicator) sampled_indicators = tf.stack(sampled_indicators) _, sampled_indices = tf.nn.top_k( @@ -206,10 +206,8 @@ def assign_and_sample_proposals(proposed_boxes, sampled_rois = tf.gather_nd(boxes, gather_nd_indices) sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices) - sampled_gt_classes = tf.gather_nd( - matched_gt_classes, gather_nd_indices) - sampled_gt_indices = tf.gather_nd( - matched_gt_indices, gather_nd_indices) + sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices) + sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices) return (sampled_rois, sampled_gt_boxes, sampled_gt_classes, sampled_gt_indices) @@ -237,8 +235,8 @@ def sample_and_crop_foreground_masks(candidate_rois, candidate_gt_indices: a tensor of shape [batch_size, N], storing the corresponding groundtruth instance indices to the `candidate_gt_boxes`, i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and - gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is the - superset of candidate_gt_boxes. + gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is + the superset of candidate_gt_boxes. gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width] containing all the groundtruth masks which sample masks are drawn from. num_mask_samples_per_image: an integer which specifies the number of masks @@ -266,33 +264,35 @@ def sample_and_crop_foreground_masks(candidate_rois, tf.expand_dims(tf.range(fg_instance_indices_shape[0]), axis=-1) * tf.ones([1, fg_instance_indices_shape[-1]], dtype=tf.int32)) - gather_nd_instance_indices = tf.stack( - [batch_indices, fg_instance_indices], axis=-1) - foreground_rois = tf.gather_nd( - candidate_rois, gather_nd_instance_indices) - foreground_boxes = tf.gather_nd( - candidate_gt_boxes, gather_nd_instance_indices) - foreground_classes = tf.gather_nd( - candidate_gt_classes, gather_nd_instance_indices) - foreground_gt_indices = tf.gather_nd( - candidate_gt_indices, gather_nd_instance_indices) + gather_nd_instance_indices = tf.stack([batch_indices, fg_instance_indices], + axis=-1) + foreground_rois = tf.gather_nd(candidate_rois, gather_nd_instance_indices) + foreground_boxes = tf.gather_nd(candidate_gt_boxes, + gather_nd_instance_indices) + foreground_classes = tf.gather_nd(candidate_gt_classes, + gather_nd_instance_indices) + foreground_gt_indices = tf.gather_nd(candidate_gt_indices, + gather_nd_instance_indices) foreground_gt_indices_shape = tf.shape(foreground_gt_indices) batch_indices = ( tf.expand_dims(tf.range(foreground_gt_indices_shape[0]), axis=-1) * tf.ones([1, foreground_gt_indices_shape[-1]], dtype=tf.int32)) - gather_nd_gt_indices = tf.stack( - [batch_indices, foreground_gt_indices], axis=-1) + gather_nd_gt_indices = tf.stack([batch_indices, foreground_gt_indices], + axis=-1) foreground_masks = tf.gather_nd(gt_masks, gather_nd_gt_indices) cropped_foreground_masks = spatial_transform_ops.crop_mask_in_target_box( - foreground_masks, foreground_boxes, foreground_rois, mask_target_size, + foreground_masks, + foreground_boxes, + foreground_rois, + mask_target_size, sample_offset=0.5) return foreground_rois, foreground_classes, cropped_foreground_masks -class ROISampler(object): +class ROISampler(tf.keras.layers.Layer): """Samples RoIs and creates training targets.""" def __init__(self, params): @@ -302,17 +302,17 @@ class ROISampler(object): self._bg_iou_thresh_hi = params.bg_iou_thresh_hi self._bg_iou_thresh_lo = params.bg_iou_thresh_lo self._mix_gt_boxes = params.mix_gt_boxes + super(ROISampler, self).__init__(autocast=False) - def __call__(self, rois, gt_boxes, gt_classes): + def call(self, rois, gt_boxes, gt_classes): """Sample and assign RoIs for training. Args: - rois: a tensor of shape of [batch_size, N, 4]. N is the number - of proposals before groundtruth assignment. The last dimension is the - box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] - format. - gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. - The coordinates of gt_boxes are in the pixel coordinates of the scaled + rois: a tensor of shape of [batch_size, N, 4]. N is the number of + proposals before groundtruth assignment. The last dimension is the box + coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format. + gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The + coordinates of gt_boxes are in the pixel coordinates of the scaled image. This tensor might have padding of values -1 indicating the invalid box coordinates. gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This @@ -343,19 +343,194 @@ class ROISampler(object): sampled_gt_indices) -class MaskSampler(object): +class ROIScoreSampler(ROISampler): + """Samples RoIs, RoI-scores and creates training targets.""" + + def __call__(self, rois, roi_scores, gt_boxes, gt_classes): + """Sample and assign RoIs for training. + + Args: + rois: a tensor of shape of [batch_size, N, 4]. N is the number of + proposals before groundtruth assignment. The last dimension is the box + coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format. + roi_scores: + gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The + coordinates of gt_boxes are in the pixel coordinates of the scaled + image. This tensor might have padding of values -1 indicating the + invalid box coordinates. + gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This + tensor might have paddings with values of -1 indicating the invalid + classes. + + Returns: + sampled_rois: a tensor of shape of [batch_size, K, 4], representing the + coordinates of the sampled RoIs, where K is the number of the sampled + RoIs, i.e. K = num_samples_per_image. + sampled_roi_scores: + sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the + box coordinates of the matched groundtruth boxes of the samples RoIs. + sampled_gt_classes: a tensor of shape of [batch_size, K], storing the + classes of the matched groundtruth boxes of the sampled RoIs. + """ + (sampled_rois, sampled_roi_scores, sampled_gt_boxes, sampled_gt_classes, + sampled_gt_indices) = ( + self.assign_and_sample_proposals_and_scores( + rois, + roi_scores, + gt_boxes, + gt_classes, + num_samples_per_image=self._num_samples_per_image, + mix_gt_boxes=self._mix_gt_boxes, + fg_fraction=self._fg_fraction, + fg_iou_thresh=self._fg_iou_thresh, + bg_iou_thresh_hi=self._bg_iou_thresh_hi, + bg_iou_thresh_lo=self._bg_iou_thresh_lo)) + return (sampled_rois, sampled_roi_scores, sampled_gt_boxes, + sampled_gt_classes, sampled_gt_indices) + + def assign_and_sample_proposals_and_scores(self, + proposed_boxes, + proposed_scores, + gt_boxes, + gt_classes, + num_samples_per_image=512, + mix_gt_boxes=True, + fg_fraction=0.25, + fg_iou_thresh=0.5, + bg_iou_thresh_hi=0.5, + bg_iou_thresh_lo=0.0): + """Assigns the proposals with groundtruth classes and performs subsmpling. + + Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the + following algorithm to generate the final `num_samples_per_image` RoIs. + 1. Calculates the IoU between each proposal box and each gt_boxes. + 2. Assigns each proposed box with a groundtruth class and box by choosing + the largest IoU overlap. + 3. Samples `num_samples_per_image` boxes from all proposed boxes, and + returns box_targets, class_targets, and RoIs. + + Args: + proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number + of proposals before groundtruth assignment. The last dimension is the + box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] + format. + proposed_scores: a tensor of shape of [batch_size, N]. N is the number of + proposals before groundtruth assignment. It is the rpn scores for all + proposed boxes which can be either their classification or centerness + scores. + gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The + coordinates of gt_boxes are in the pixel coordinates of the scaled + image. This tensor might have padding of values -1 indicating the + invalid box coordinates. + gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This + tensor might have paddings with values of -1 indicating the invalid + classes. + num_samples_per_image: a integer represents RoI minibatch size per image. + mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes + before sampling proposals. + fg_fraction: a float represents the target fraction of RoI minibatch that + is labeled foreground (i.e., class > 0). + fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to + be considered foreground (if >= fg_iou_thresh). + bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI + to be considered background (class = 0 if overlap in [LO, HI)). + bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI + to be considered background (class = 0 if overlap in [LO, HI)). + + Returns: + sampled_rois: a tensor of shape of [batch_size, K, 4], representing the + coordinates of the sampled RoIs, where K is the number of the sampled + RoIs, i.e. K = num_samples_per_image. + sampled_scores: a tensor of shape of [batch_size, K], representing the + confidence score of the sampled RoIs, where K is the number of the + sampled RoIs, i.e. K = num_samples_per_image. + sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the + box coordinates of the matched groundtruth boxes of the samples RoIs. + sampled_gt_classes: a tensor of shape of [batch_size, K], storing the + classes of the matched groundtruth boxes of the sampled RoIs. + sampled_gt_indices: a tensor of shape of [batch_size, K], storing the + indices of the sampled groudntruth boxes in the original `gt_boxes` + tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = + sampled_gt_boxes[:, i]. + """ + + with tf.name_scope('sample_proposals_and_scores'): + if mix_gt_boxes: + boxes = tf.concat([proposed_boxes, gt_boxes], axis=1) + gt_scores = tf.ones_like(gt_boxes[:, :, 0]) + scores = tf.concat([proposed_scores, gt_scores], axis=1) + else: + boxes = proposed_boxes + scores = proposed_scores + + (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, + _) = box_matching(boxes, gt_boxes, gt_classes) + + positive_match = tf.greater(matched_iou, fg_iou_thresh) + negative_match = tf.logical_and( + tf.greater_equal(matched_iou, bg_iou_thresh_lo), + tf.less(matched_iou, bg_iou_thresh_hi)) + ignored_match = tf.less(matched_iou, 0.0) + + # re-assign negatively matched boxes to the background class. + matched_gt_classes = tf.where(negative_match, + tf.zeros_like(matched_gt_classes), + matched_gt_classes) + matched_gt_indices = tf.where(negative_match, + tf.zeros_like(matched_gt_indices), + matched_gt_indices) + + sample_candidates = tf.logical_and( + tf.logical_or(positive_match, negative_match), + tf.logical_not(ignored_match)) + + sampler = ( + balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( + positive_fraction=fg_fraction, is_static=True)) + + batch_size, _ = sample_candidates.get_shape().as_list() + sampled_indicators = [] + for i in range(batch_size): + sampled_indicator = sampler.subsample(sample_candidates[i], + num_samples_per_image, + positive_match[i]) + sampled_indicators.append(sampled_indicator) + sampled_indicators = tf.stack(sampled_indicators) + _, sampled_indices = tf.nn.top_k( + tf.cast(sampled_indicators, dtype=tf.int32), + k=num_samples_per_image, + sorted=True) + + sampled_indices_shape = tf.shape(sampled_indices) + batch_indices = ( + tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) * + tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32)) + gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1) + + sampled_rois = tf.gather_nd(boxes, gather_nd_indices) + sampled_roi_scores = tf.gather_nd(scores, gather_nd_indices) + sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices) + sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices) + sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices) + + return (sampled_rois, sampled_roi_scores, sampled_gt_boxes, + sampled_gt_classes, sampled_gt_indices) + + +class MaskSampler(tf.keras.layers.Layer): """Samples and creates mask training targets.""" def __init__(self, mask_target_size, num_mask_samples_per_image): self._mask_target_size = mask_target_size self._num_mask_samples_per_image = num_mask_samples_per_image - - def __call__(self, - candidate_rois, - candidate_gt_boxes, - candidate_gt_classes, - candidate_gt_indices, - gt_masks): + super(MaskSampler, self).__init__(autocast=False) + + def call(self, + candidate_rois, + candidate_gt_boxes, + candidate_gt_classes, + candidate_gt_indices, + gt_masks): """Sample and create mask targets for training. Args: @@ -371,8 +546,8 @@ class MaskSampler(object): candidate_gt_indices: a tensor of shape [batch_size, N], storing the corresponding groundtruth instance indices to the `candidate_gt_boxes`, i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i], - where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, - is the superset of candidate_gt_boxes. + where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= + N, is the superset of candidate_gt_boxes. gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width] containing all the groundtruth masks which sample masks are drawn from. after sampling. The output masks are resized w.r.t the sampled RoIs. @@ -388,12 +563,9 @@ class MaskSampler(object): cropped foreground masks used for training. """ foreground_rois, foreground_classes, cropped_foreground_masks = ( - sample_and_crop_foreground_masks( - candidate_rois, - candidate_gt_boxes, - candidate_gt_classes, - candidate_gt_indices, - gt_masks, - self._num_mask_samples_per_image, - self._mask_target_size)) + sample_and_crop_foreground_masks(candidate_rois, candidate_gt_boxes, + candidate_gt_classes, + candidate_gt_indices, gt_masks, + self._num_mask_samples_per_image, + self._mask_target_size)) return foreground_rois, foreground_classes, cropped_foreground_masks diff --git a/official/vision/detection/utils/__init__.py b/official/vision/detection/utils/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/utils/__init__.py +++ b/official/vision/detection/utils/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/utils/box_utils.py b/official/vision/detection/utils/box_utils.py index 4c2ebf5781f44363b090f3e272101d6014f2edd0..27f051c4d0a6ad6c6808f606f1fe5248e816a93e 100644 --- a/official/vision/detection/utils/box_utils.py +++ b/official/vision/detection/utils/box_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Utility functions for bounding box processing.""" from __future__ import absolute_import @@ -115,8 +115,8 @@ def normalize_boxes(boxes, image_shape): """Converts boxes to the normalized coordinates. Args: - boxes: a tensor whose last dimension is 4 representing the coordinates - of boxes in ymin, xmin, ymax, xmax order. + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. image_shape: a list of two integers, a two-element vector or a tensor such that all but the last dimensions are `broadcastable` to `boxes`. The last dimension is 2, which represents [height, width]. @@ -153,8 +153,8 @@ def denormalize_boxes(boxes, image_shape): """Converts boxes normalized by [height, width] to pixel coordinates. Args: - boxes: a tensor whose last dimension is 4 representing the coordinates - of boxes in ymin, xmin, ymax, xmax order. + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. image_shape: a list of two integers, a two-element vector or a tensor such that all but the last dimensions are `broadcastable` to `boxes`. The last dimension is 2, which represents [height, width]. @@ -187,8 +187,8 @@ def clip_boxes(boxes, image_shape): """Clips boxes to image boundaries. Args: - boxes: a tensor whose last dimension is 4 representing the coordinates - of boxes in ymin, xmin, ymax, xmax order. + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. image_shape: a list of two integers, a two-element vector or a tensor such that all but the last dimensions are `broadcastable` to `boxes`. The last dimension is 2, which represents [height, width]. @@ -255,8 +255,8 @@ def encode_boxes(boxes, anchors, weights=None): """Encode boxes to targets. Args: - boxes: a tensor whose last dimension is 4 representing the coordinates - of boxes in ymin, xmin, ymax, xmax order. + boxes: a tensor whose last dimension is 4 representing the coordinates of + boxes in ymin, xmin, ymax, xmax order. anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`, representing the coordinates of anchors in ymin, xmin, ymax, xmax order. weights: None or a list of four float numbers used to scale coordinates. @@ -302,9 +302,8 @@ def encode_boxes(boxes, anchors, weights=None): encoded_dh *= weights[2] encoded_dw *= weights[3] - encoded_boxes = tf.concat( - [encoded_dy, encoded_dx, encoded_dh, encoded_dw], - axis=-1) + encoded_boxes = tf.concat([encoded_dy, encoded_dx, encoded_dh, encoded_dw], + axis=-1) return encoded_boxes @@ -359,11 +358,162 @@ def decode_boxes(encoded_boxes, anchors, weights=None): decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h - 1.0 decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w - 1.0 - decoded_boxes = tf.concat( + decoded_boxes = tf.concat([ + decoded_boxes_ymin, decoded_boxes_xmin, decoded_boxes_ymax, + decoded_boxes_xmax + ], + axis=-1) + return decoded_boxes + + +def encode_boxes_lrtb(boxes, anchors, weights=None): + """Encode boxes to targets on lrtb (=left,right,top,bottom) format. + + Args: + boxes: a tensor whose last dimension is 4 representing the coordinates + of boxes in ymin, xmin, ymax, xmax order. + anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`, + representing the coordinates of anchors in ymin, xmin, ymax, xmax order. + weights: None or a list of four float numbers used to scale coordinates. + + Returns: + encoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing + the encoded box targets. The box targets encode the left, right, top, + bottom distances from an anchor location to the four borders of the + matched groundtruth bounding box. + center_targets: centerness targets defined by the left, right, top, and + bottom distance targets. The centerness is defined as the deviation of the + anchor location from the groundtruth object center. Formally, centerness = + sqrt(min(left, right)/max(left, right)*min(top, bottom)/max(top, bottom)). + + Raises: + ValueError: If the last dimension of boxes is not 4. + """ + if boxes.shape[-1] != 4: + raise ValueError( + 'boxes.shape[-1] is {:d}, but must be 4.'.format(boxes.shape[-1])) + + with tf.name_scope('encode_boxes_lrtb'): + boxes = tf.cast(boxes, dtype=anchors.dtype) + ymin = boxes[..., 0:1] + xmin = boxes[..., 1:2] + ymax = boxes[..., 2:3] + xmax = boxes[..., 3:4] + # box_h = ymax - ymin + 1.0 + # box_w = xmax - xmin + 1.0 + box_h = ymax - ymin + box_w = xmax - xmin + + anchor_ymin = anchors[..., 0:1] + anchor_xmin = anchors[..., 1:2] + anchor_ymax = anchors[..., 2:3] + anchor_xmax = anchors[..., 3:4] + # anchor_h = anchor_ymax - anchor_ymin + 1.0 + # anchor_w = anchor_xmax - anchor_xmin + 1.0 + anchor_h = anchor_ymax - anchor_ymin + anchor_w = anchor_xmax - anchor_xmin + anchor_yc = anchor_ymin + 0.5 * anchor_h + anchor_xc = anchor_xmin + 0.5 * anchor_w + + box_h += EPSILON + box_w += EPSILON + anchor_h += EPSILON + anchor_w += EPSILON + + left = (anchor_xc - xmin) / anchor_w + right = (xmax - anchor_xc) / anchor_w + top = (anchor_yc - ymin) / anchor_h + bottom = (ymax - anchor_yc) / anchor_h + + # Create centerness target. { + lrtb_targets = tf.concat([left, right, top, bottom], axis=-1) + valid_match = tf.greater(tf.reduce_min(lrtb_targets, -1), 0.0) + + # Centerness score. + left_right = tf.concat([left, right], axis=-1) + + left_right = tf.where(tf.stack([valid_match, valid_match], -1), + left_right, tf.zeros_like(left_right)) + top_bottom = tf.concat([top, bottom], axis=-1) + top_bottom = tf.where(tf.stack([valid_match, valid_match], -1), + top_bottom, tf.zeros_like(top_bottom)) + center_targets = tf.sqrt( + (tf.reduce_min(left_right, -1) / + (tf.reduce_max(left_right, -1) + EPSILON)) * + (tf.reduce_min(top_bottom, -1) / + (tf.reduce_max(top_bottom, -1) + EPSILON))) + center_targets = tf.where(valid_match, + center_targets, + tf.zeros_like(center_targets)) + if weights: + left *= weights[0] + right *= weights[1] + top *= weights[2] + bottom *= weights[3] + + encoded_boxes_lrtb = tf.concat( + [left, right, top, bottom], + axis=-1) + + return encoded_boxes_lrtb, center_targets + + +def decode_boxes_lrtb(encoded_boxes_lrtb, anchors, weights=None): + """Decode boxes. + + Args: + encoded_boxes_lrtb: a tensor whose last dimension is 4 representing the + coordinates of encoded boxes in left, right, top, bottom order. + anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`, + representing the coordinates of anchors in ymin, xmin, ymax, xmax order. + weights: None or a list of four float numbers used to scale coordinates. + + Returns: + decoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing + the decoded box targets in lrtb (=left,right,top,bottom) format. The box + decoded box coordinates represent the left, right, top, and bottom + distances from an anchor location to the four borders of the matched + groundtruth bounding box. + """ + if encoded_boxes_lrtb.shape[-1] != 4: + raise ValueError( + 'encoded_boxes_lrtb.shape[-1] is {:d}, but must be 4.' + .format(encoded_boxes_lrtb.shape[-1])) + + with tf.name_scope('decode_boxes_lrtb'): + encoded_boxes_lrtb = tf.cast(encoded_boxes_lrtb, dtype=anchors.dtype) + left = encoded_boxes_lrtb[..., 0:1] + right = encoded_boxes_lrtb[..., 1:2] + top = encoded_boxes_lrtb[..., 2:3] + bottom = encoded_boxes_lrtb[..., 3:4] + if weights: + left /= weights[0] + right /= weights[1] + top /= weights[2] + bottom /= weights[3] + + anchor_ymin = anchors[..., 0:1] + anchor_xmin = anchors[..., 1:2] + anchor_ymax = anchors[..., 2:3] + anchor_xmax = anchors[..., 3:4] + + anchor_h = anchor_ymax - anchor_ymin + anchor_w = anchor_xmax - anchor_xmin + anchor_yc = anchor_ymin + 0.5 * anchor_h + anchor_xc = anchor_xmin + 0.5 * anchor_w + anchor_h += EPSILON + anchor_w += EPSILON + + decoded_boxes_ymin = anchor_yc - top * anchor_h + decoded_boxes_xmin = anchor_xc - left * anchor_w + decoded_boxes_ymax = anchor_yc + bottom * anchor_h + decoded_boxes_xmax = anchor_xc + right * anchor_w + + decoded_boxes_lrtb = tf.concat( [decoded_boxes_ymin, decoded_boxes_xmin, decoded_boxes_ymax, decoded_boxes_xmax], axis=-1) - return decoded_boxes + return decoded_boxes_lrtb def filter_boxes(boxes, scores, image_shape, min_size_threshold): @@ -546,6 +696,6 @@ def get_non_empty_box_indices(boxes): # Selects indices if box height or width is 0. height = boxes[:, 2] - boxes[:, 0] width = boxes[:, 3] - boxes[:, 1] - indices = tf.where(tf.logical_and(tf.greater(height, 0), - tf.greater(width, 0))) + indices = tf.where( + tf.logical_and(tf.greater(height, 0), tf.greater(width, 0))) return indices[:, 0] diff --git a/official/vision/detection/utils/class_utils.py b/official/vision/detection/utils/class_utils.py index cce9cf982bbbce7b90ee44e67ebe65997b7a91da..cbf806f11070736c17de79dd63240e9a626808d9 100644 --- a/official/vision/detection/utils/class_utils.py +++ b/official/vision/detection/utils/class_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Utility functions for handling dataset object categories.""" diff --git a/official/vision/detection/utils/dataloader_utils.py b/official/vision/detection/utils/dataloader_utils.py index da82203511da50393a352bf75ee56f25c6626c05..9569d7713c2177d233bcdc21934edb6ffbe0fefd 100644 --- a/official/vision/detection/utils/dataloader_utils.py +++ b/official/vision/detection/utils/dataloader_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Utility functions for dataloader.""" import tensorflow as tf diff --git a/official/vision/detection/utils/input_utils.py b/official/vision/detection/utils/input_utils.py index 6010dc8973f387318c4553d3014ccf495cf01fc6..7f5502eeefd1a05c05d3beb9d5b2bd5975e4c18d 100644 --- a/official/vision/detection/utils/input_utils.py +++ b/official/vision/detection/utils/input_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Utility functions for input processing.""" import math + import tensorflow as tf from official.vision.detection.utils import box_utils @@ -91,12 +92,12 @@ def compute_padded_size(desired_size, stride): [height, width] of the padded output image size. """ if isinstance(desired_size, list) or isinstance(desired_size, tuple): - padded_size = [int(math.ceil(d * 1.0 / stride) * stride) - for d in desired_size] + padded_size = [ + int(math.ceil(d * 1.0 / stride) * stride) for d in desired_size + ] else: padded_size = tf.cast( - tf.math.ceil( - tf.cast(desired_size, dtype=tf.float32) / stride) * stride, + tf.math.ceil(tf.cast(desired_size, dtype=tf.float32) / stride) * stride, tf.int32) return padded_size @@ -158,8 +159,8 @@ def resize_and_crop_image(image, else: scaled_size = desired_size - scale = tf.minimum( - scaled_size[0] / image_size[0], scaled_size[1] / image_size[1]) + scale = tf.minimum(scaled_size[0] / image_size[0], + scaled_size[1] / image_size[1]) scaled_size = tf.round(image_size * scale) # Computes 2D image_scale. @@ -169,9 +170,8 @@ def resize_and_crop_image(image, # desired_size. if random_jittering: max_offset = scaled_size - desired_size - max_offset = tf.where(tf.less(max_offset, 0), - tf.zeros_like(max_offset), - max_offset) + max_offset = tf.where( + tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) offset = max_offset * tf.random.uniform([ 2, ], 0, 1, seed=seed) @@ -191,9 +191,9 @@ def resize_and_crop_image(image, image_info = tf.stack([ image_size, - tf.cast(desired_size, dtype=tf.float32), - image_scale, - tf.cast(offset, tf.float32)]) + tf.cast(desired_size, dtype=tf.float32), image_scale, + tf.cast(offset, tf.float32) + ]) return output_image, image_info @@ -288,25 +288,21 @@ def resize_and_crop_image_v2(image, image, tf.cast(scaled_size, tf.int32), method=method) if random_jittering: - scaled_image = scaled_image[ - offset[0]:offset[0] + desired_size[0], - offset[1]:offset[1] + desired_size[1], :] + scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0], + offset[1]:offset[1] + desired_size[1], :] - output_image = tf.image.pad_to_bounding_box( - scaled_image, 0, 0, padded_size[0], padded_size[1]) + output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, + padded_size[0], padded_size[1]) image_info = tf.stack([ image_size, - tf.cast(desired_size, dtype=tf.float32), - image_scale, - tf.cast(offset, tf.float32)]) + tf.cast(desired_size, dtype=tf.float32), image_scale, + tf.cast(offset, tf.float32) + ]) return output_image, image_info -def resize_and_crop_boxes(boxes, - image_scale, - output_size, - offset): +def resize_and_crop_boxes(boxes, image_scale, output_size, offset): """Resizes boxes to output size with scale and offset. Args: @@ -329,10 +325,7 @@ def resize_and_crop_boxes(boxes, return boxes -def resize_and_crop_masks(masks, - image_scale, - output_size, - offset): +def resize_and_crop_masks(masks, image_scale, output_size, offset): """Resizes boxes to output size with scale and offset. Args: diff --git a/official/vision/detection/utils/mask_utils.py b/official/vision/detection/utils/mask_utils.py index 637d0484f4b48213c4b323be6e0c88f9fa19ebcc..926c829b81b35b11ca53a5a3d351d0ebca36205e 100644 --- a/official/vision/detection/utils/mask_utils.py +++ b/official/vision/detection/utils/mask_utils.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,21 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Utility functions for segmentations.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import math + import numpy as np import cv2 -def paste_instance_masks(masks, - detected_boxes, - image_height, - image_width): +def paste_instance_masks(masks, detected_boxes, image_height, image_width): """Paste instance masks to generate the image segmentation results. Args: @@ -95,10 +93,8 @@ def paste_instance_masks(masks, y_0 = min(max(ref_box[1], 0), image_height) y_1 = min(max(ref_box[3] + 1, 0), image_height) - im_mask[y_0:y_1, x_0:x_1] = mask[ - (y_0 - ref_box[1]):(y_1 - ref_box[1]), - (x_0 - ref_box[0]):(x_1 - ref_box[0]) - ] + im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]), + (x_0 - ref_box[0]):(x_1 - ref_box[0])] segms.append(im_mask) segms = np.array(segms) @@ -106,10 +102,7 @@ def paste_instance_masks(masks, return segms -def paste_instance_masks_v2(masks, - detected_boxes, - image_height, - image_width): +def paste_instance_masks_v2(masks, detected_boxes, image_height, image_width): """Paste instance masks to generate the image segmentation (v2). Args: @@ -146,34 +139,22 @@ def paste_instance_masks_v2(masks, beta = box[3] / (1.0 * mask_height) # pylint: disable=invalid-name # Transformation from mask pixel indices to image coordinate. - M_mask_to_image = np.array( - [[alpha, 0, xmin], - [0, beta, ymin], - [0, 0, 1]], - dtype=np.float32) + M_mask_to_image = np.array([[alpha, 0, xmin], [0, beta, ymin], [0, 0, 1]], + dtype=np.float32) # Transformation from image to cropped mask coordinate. M_image_to_crop = np.array( - [[1, 0, -xmin_int], - [0, 1, -ymin_int], - [0, 0, 1]], - dtype=np.float32) + [[1, 0, -xmin_int], [0, 1, -ymin_int], [0, 0, 1]], dtype=np.float32) M = np.dot(M_image_to_crop, M_mask_to_image) # Compensate the half pixel offset that OpenCV has in the # warpPerspective implementation: the top-left pixel is sampled # at (0,0), but we want it to be at (0.5, 0.5). M = np.dot( np.dot( - np.array([[1, 0, -0.5], - [0, 1, -0.5], - [0, 0, 1]], np.float32), - M), - np.array([[1, 0, 0.5], - [0, 1, 0.5], - [0, 0, 1]], np.float32)) + np.array([[1, 0, -0.5], [0, 1, -0.5], [0, 0, 1]], np.float32), M), + np.array([[1, 0, 0.5], [0, 1, 0.5], [0, 0, 1]], np.float32)) # pylint: enable=invalid-name cropped_mask = cv2.warpPerspective( - mask.astype(np.float32), M, - (xmax_int - xmin_int, ymax_int - ymin_int)) + mask.astype(np.float32), M, (xmax_int - xmin_int, ymax_int - ymin_int)) cropped_mask = np.array(cropped_mask > 0.5, dtype=np.uint8) img_mask = np.zeros((image_height, image_width)) @@ -181,12 +162,10 @@ def paste_instance_masks_v2(masks, x1 = max(min(xmax_int, image_width), 0) y0 = max(min(ymin_int, image_height), 0) y1 = max(min(ymax_int, image_height), 0) - img_mask[y0:y1, x0:x1] = cropped_mask[ - (y0 - ymin_int):(y1 - ymin_int), - (x0 - xmin_int):(x1 - xmin_int)] + img_mask[y0:y1, x0:x1] = cropped_mask[(y0 - ymin_int):(y1 - ymin_int), + (x0 - xmin_int):(x1 - xmin_int)] segms.append(img_mask) segms = np.array(segms) return segms - diff --git a/official/vision/detection/utils/object_detection/__init__.py b/official/vision/detection/utils/object_detection/__init__.py index 85c94f4b6bd7567796755895505a320405a40777..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/detection/utils/object_detection/__init__.py +++ b/official/vision/detection/utils/object_detection/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/detection/utils/object_detection/argmax_matcher.py b/official/vision/detection/utils/object_detection/argmax_matcher.py index 3f8b051bfb08a72846482c0da9c79d1b98418c38..c92b69edce2fb3734f4b90bdb83c75a30e69d43b 100644 --- a/official/vision/detection/utils/object_detection/argmax_matcher.py +++ b/official/vision/detection/utils/object_detection/argmax_matcher.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Argmax matcher implementation. diff --git a/official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py b/official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py index f969182b05a29167649d5c022a667b3f768f0143..e71dd1727ed590d92b2248b7070561c4246d275a 100644 --- a/official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py +++ b/official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Class to subsample minibatches by balancing positives and negatives. @@ -92,10 +91,10 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): Args: input_tensor: An int32 tensor of shape [N] to be sliced. - num_start_samples: Number of examples to be sliced from the beginning - of the input tensor. - num_end_samples: Number of examples to be sliced from the end of the - input tensor. + num_start_samples: Number of examples to be sliced from the beginning of + the input tensor. + num_end_samples: Number of examples to be sliced from the end of the input + tensor. total_num_samples: Sum of is num_start_samples and num_end_samples. This should be a scalar. @@ -110,13 +109,16 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): tf.range(input_length), input_length - num_end_samples) selected_positions = tf.logical_or(start_positions, end_positions) selected_positions = tf.cast(selected_positions, tf.float32) - indexed_positions = tf.multiply(tf.cumsum(selected_positions), - selected_positions) - one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1, - total_num_samples, - dtype=tf.float32) - return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32), - one_hot_selector, axes=[0, 0]), tf.int32) + indexed_positions = tf.multiply( + tf.cumsum(selected_positions), selected_positions) + one_hot_selector = tf.one_hot( + tf.cast(indexed_positions, tf.int32) - 1, + total_num_samples, + dtype=tf.float32) + return tf.cast( + tf.tensordot( + tf.cast(input_tensor, tf.float32), one_hot_selector, axes=[0, 0]), + tf.int32) def _static_subsample(self, indicator, batch_size, labels): """Returns subsampled minibatch. @@ -182,13 +184,12 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): sorted_signed_indicator_idx = tf.nn.top_k( signed_indicator_idx, input_length, sorted=True).values - [num_positive_samples, - num_negative_samples] = self._get_num_pos_neg_samples( - sorted_signed_indicator_idx, batch_size) + [num_positive_samples, num_negative_samples + ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx, batch_size) sampled_idx = self._get_values_from_start_and_end( - sorted_signed_indicator_idx, num_positive_samples, - num_negative_samples, batch_size) + sorted_signed_indicator_idx, num_positive_samples, num_negative_samples, + batch_size) # Shift the indices to start from 0 and remove any samples that are set as # False. @@ -203,11 +204,13 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): tf.bool) # project back the order based on stored permutations - reprojections = tf.one_hot(permutation, depth=input_length, - dtype=tf.float32) - return tf.cast(tf.tensordot( - tf.cast(sampled_idx_indicator, tf.float32), - reprojections, axes=[0, 0]), tf.bool) + reprojections = tf.one_hot( + permutation, depth=input_length, dtype=tf.float32) + return tf.cast( + tf.tensordot( + tf.cast(sampled_idx_indicator, tf.float32), + reprojections, + axes=[0, 0]), tf.bool) def subsample(self, indicator, batch_size, labels, scope=None): """Returns subsampled minibatch. @@ -218,7 +221,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. labels: boolean tensor of shape [N] denoting positive(=True) and negative - (=False) examples. + (=False) examples. scope: name scope. Returns: diff --git a/official/vision/detection/utils/object_detection/box_coder.py b/official/vision/detection/utils/object_detection/box_coder.py index f20ac956dfbce1fa69d1b9e6f5b023b704e1ec8a..c58eead30d4912b8c947fc532cb5b71dc5138233 100644 --- a/official/vision/detection/utils/object_detection/box_coder.py +++ b/official/vision/detection/utils/object_detection/box_coder.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Base box coder. @@ -32,7 +31,6 @@ from abc import abstractproperty import tensorflow as tf - # Box coder types. FASTER_RCNN = 'faster_rcnn' KEYPOINT = 'keypoint' @@ -138,11 +136,11 @@ def batch_decode(encoded_boxes, box_coder, anchors): """ encoded_boxes.get_shape().assert_has_rank(3) if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static(): - raise ValueError('The number of anchors inferred from encoded_boxes' - ' and anchors are inconsistent: shape[1] of encoded_boxes' - ' %s should be equal to the number of anchors: %s.' % - (encoded_boxes.get_shape()[1].value, - anchors.num_boxes_static())) + raise ValueError( + 'The number of anchors inferred from encoded_boxes' + ' and anchors are inconsistent: shape[1] of encoded_boxes' + ' %s should be equal to the number of anchors: %s.' % + (encoded_boxes.get_shape()[1].value, anchors.num_boxes_static())) decoded_boxes = tf.stack([ box_coder.decode(boxes, anchors).get() diff --git a/official/vision/detection/utils/object_detection/box_list.py b/official/vision/detection/utils/object_detection/box_list.py index 113fab8c197194f1cd0099d5a177cd9f1fb6e64c..f5d4443c81b22f1586f6691c5c4e309de3046f9c 100644 --- a/official/vision/detection/utils/object_detection/box_list.py +++ b/official/vision/detection/utils/object_detection/box_list.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Bounding Box List definition. @@ -126,8 +125,8 @@ class BoxList(object): it returns the box coordinates. Args: - field: this optional string parameter can be used to specify - a related field to be accessed. + field: this optional string parameter can be used to specify a related + field to be accessed. Returns: a tensor representing the box collection or an associated field. @@ -192,8 +191,8 @@ class BoxList(object): """Retrieves specified fields as a dictionary of tensors. Args: - fields: (optional) list of fields to return in the dictionary. - If None (default), all fields are returned. + fields: (optional) list of fields to return in the dictionary. If None + (default), all fields are returned. Returns: tensor_dict: A dictionary of tensors specified by fields. diff --git a/official/vision/detection/utils/object_detection/box_list_ops.py b/official/vision/detection/utils/object_detection/box_list_ops.py index 9f1b06e28d588eb05c9ea8596b44d08690481eae..ef2fcdc2d6b79d292949b70fa84bc1bcdcea58d7 100644 --- a/official/vision/detection/utils/object_detection/box_list_ops.py +++ b/official/vision/detection/utils/object_detection/box_list_ops.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Bounding Box List operations. @@ -152,8 +151,8 @@ def prune_outside_window(boxlist, window, scope=None): Args: boxlist: a BoxList holding M_in boxes. - window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] - of the window + window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of + the window scope: name scope. Returns: @@ -166,8 +165,10 @@ def prune_outside_window(boxlist, window, scope=None): value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ - tf.less(y_min, win_y_min), tf.less(x_min, win_x_min), - tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max) + tf.less(y_min, win_y_min), + tf.less(x_min, win_x_min), + tf.greater(y_max, win_y_max), + tf.greater(x_max, win_x_max) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) @@ -183,8 +184,8 @@ def prune_completely_outside_window(boxlist, window, scope=None): Args: boxlist: a BoxList holding M_in boxes. - window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] - of the window + window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of + the window scope: name scope. Returns: @@ -198,8 +199,10 @@ def prune_completely_outside_window(boxlist, window, scope=None): value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ - tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), - tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) + tf.greater_equal(y_min, win_y_max), + tf.greater_equal(x_min, win_x_max), + tf.less_equal(y_max, win_y_min), + tf.less_equal(x_max, win_x_min) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) @@ -274,8 +277,8 @@ def iou(boxlist1, boxlist2, scope=None): unions = ( tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections) return tf.where( - tf.equal(intersections, 0.0), - tf.zeros_like(intersections), tf.truediv(intersections, unions)) + tf.equal(intersections, 0.0), tf.zeros_like(intersections), + tf.truediv(intersections, unions)) def matched_iou(boxlist1, boxlist2, scope=None): @@ -295,8 +298,8 @@ def matched_iou(boxlist1, boxlist2, scope=None): areas2 = area(boxlist2) unions = areas1 + areas2 - intersections return tf.where( - tf.equal(intersections, 0.0), - tf.zeros_like(intersections), tf.truediv(intersections, unions)) + tf.equal(intersections, 0.0), tf.zeros_like(intersections), + tf.truediv(intersections, unions)) def ioa(boxlist1, boxlist2, scope=None): @@ -320,8 +323,10 @@ def ioa(boxlist1, boxlist2, scope=None): return tf.truediv(intersections, areas) -def prune_non_overlapping_boxes( - boxlist1, boxlist2, min_overlap=0.0, scope=None): +def prune_non_overlapping_boxes(boxlist1, + boxlist2, + min_overlap=0.0, + scope=None): """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. For each box in boxlist1, we want its IOA to be more than minoverlap with @@ -331,7 +336,7 @@ def prune_non_overlapping_boxes( boxlist1: BoxList holding N boxes. boxlist2: BoxList holding M boxes. min_overlap: Minimum required overlap between boxes, to count them as - overlapping. + overlapping. scope: name scope. Returns: @@ -361,8 +366,8 @@ def prune_small_boxes(boxlist, min_side, scope=None): """ with tf.name_scope(scope, 'PruneSmallBoxes'): height, width = height_width(boxlist) - is_valid = tf.logical_and(tf.greater_equal(width, min_side), - tf.greater_equal(height, min_side)) + is_valid = tf.logical_and( + tf.greater_equal(width, min_side), tf.greater_equal(height, min_side)) return gather(boxlist, tf.reshape(tf.where(is_valid), [-1])) @@ -389,9 +394,10 @@ def change_coordinate_frame(boxlist, window, scope=None): with tf.name_scope(scope, 'ChangeCoordinateFrame'): win_height = window[2] - window[0] win_width = window[3] - window[1] - boxlist_new = scale(box_list.BoxList( - boxlist.get() - [window[0], window[1], window[0], window[1]]), - 1.0 / win_height, 1.0 / win_width) + boxlist_new = scale( + box_list.BoxList(boxlist.get() - + [window[0], window[1], window[0], window[1]]), + 1.0 / win_height, 1.0 / win_width) boxlist_new = _copy_extra_fields(boxlist_new, boxlist) return boxlist_new @@ -420,13 +426,17 @@ def sq_dist(boxlist1, boxlist2, scope=None): with tf.name_scope(scope, 'SqDist'): sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True) sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True) - innerprod = tf.matmul(boxlist1.get(), boxlist2.get(), - transpose_a=False, transpose_b=True) + innerprod = tf.matmul( + boxlist1.get(), boxlist2.get(), transpose_a=False, transpose_b=True) return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod -def boolean_mask(boxlist, indicator, fields=None, scope=None, - use_static_shapes=False, indicator_sum=None): +def boolean_mask(boxlist, + indicator, + fields=None, + scope=None, + use_static_shapes=False, + indicator_sum=None): """Select boxes from BoxList according to indicator and return new BoxList. `boolean_mask` returns the subset of boxes that are marked as "True" by the @@ -463,8 +473,7 @@ def boolean_mask(boxlist, indicator, fields=None, scope=None, raise ValueError('`indicator_sum` must be a of type int') selected_positions = tf.cast(indicator, dtype=tf.float32) indexed_positions = tf.cast( - tf.multiply( - tf.cumsum(selected_positions), selected_positions), + tf.multiply(tf.cumsum(selected_positions), selected_positions), dtype=tf.int32) one_hot_selector = tf.one_hot( indexed_positions - 1, indicator_sum, dtype=tf.float32) @@ -541,9 +550,8 @@ def concatenate(boxlists, fields=None, scope=None): Args: boxlists: list of BoxList objects - fields: optional list of fields to also concatenate. By default, all - fields from the first BoxList in the list are included in the - concatenation. + fields: optional list of fields to also concatenate. By default, all fields + from the first BoxList in the list are included in the concatenation. scope: name scope. Returns: @@ -637,8 +645,8 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): Args: image: an image tensor with shape [height, width, 3] boxlist: a BoxList - normalized: (boolean) specify whether corners are to be interpreted - as absolute coordinates in image space or normalized with respect to the + normalized: (boolean) specify whether corners are to be interpreted as + absolute coordinates in image space or normalized with respect to the image size. scope: name scope. @@ -648,8 +656,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): with tf.name_scope(scope, 'VisualizeBoxesInImage'): if not normalized: height, width, _ = tf.unstack(tf.shape(image)) - boxlist = scale(boxlist, - 1.0 / tf.cast(height, tf.float32), + boxlist = scale(boxlist, 1.0 / tf.cast(height, tf.float32), 1.0 / tf.cast(width, tf.float32)) corners = tf.expand_dims(boxlist.get(), 0) image = tf.expand_dims(image, 0) @@ -714,9 +721,8 @@ def filter_greater_than(boxlist, thresh, scope=None): if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1: raise ValueError('Scores should have rank 1 or have shape ' 'consistent with [None, 1]') - high_score_indices = tf.cast(tf.reshape( - tf.where(tf.greater(scores, thresh)), - [-1]), tf.int32) + high_score_indices = tf.cast( + tf.reshape(tf.where(tf.greater(scores, thresh)), [-1]), tf.int32) return gather(boxlist, high_score_indices) @@ -748,8 +754,10 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None): if not boxlist.has_field('scores'): raise ValueError('input boxlist must have \'scores\' field') selected_indices = tf.image.non_max_suppression( - boxlist.get(), boxlist.get_field('scores'), - max_output_size, iou_threshold=thresh) + boxlist.get(), + boxlist.get_field('scores'), + max_output_size, + iou_threshold=thresh) return gather(boxlist, selected_indices) @@ -768,8 +776,11 @@ def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from): return boxlist_to_copy_to -def to_normalized_coordinates(boxlist, height, width, - check_range=True, scope=None): +def to_normalized_coordinates(boxlist, + height, + width, + check_range=True, + scope=None): """Converts absolute box coordinates to normalized coordinates in [0, 1]. Usually one uses the dynamic shape of the image or conv-layer tensor: @@ -797,8 +808,9 @@ def to_normalized_coordinates(boxlist, height, width, if check_range: max_val = tf.reduce_max(boxlist.get()) - max_assert = tf.Assert(tf.greater(max_val, 1.01), - ['max value is lower than 1.01: ', max_val]) + max_assert = tf.Assert( + tf.greater(max_val, 1.01), + ['max value is lower than 1.01: ', max_val]) with tf.control_dependencies([max_assert]): width = tf.identity(width) @@ -822,8 +834,8 @@ def to_absolute_coordinates(boxlist, height: Maximum value for height of absolute box coordinates. width: Maximum value for width of absolute box coordinates. check_range: If True, checks if the coordinates are normalized or not. - maximum_normalized_coordinate: Maximum coordinate value to be considered - as normalized, default to 1.1. + maximum_normalized_coordinate: Maximum coordinate value to be considered as + normalized, default to 1.1. scope: name scope. Returns: @@ -838,9 +850,10 @@ def to_absolute_coordinates(boxlist, if check_range: box_maximum = tf.reduce_max(boxlist.get()) max_assert = tf.Assert( - tf.greater_equal(maximum_normalized_coordinate, box_maximum), - ['maximum box coordinate value is larger ' - 'than %f: ' % maximum_normalized_coordinate, box_maximum]) + tf.greater_equal(maximum_normalized_coordinate, box_maximum), [ + 'maximum box coordinate value is larger ' + 'than %f: ' % maximum_normalized_coordinate, box_maximum + ]) with tf.control_dependencies([max_assert]): width = tf.identity(width) @@ -924,13 +937,15 @@ def refine_boxes(pool_boxes, if not pool_boxes.has_field('scores'): raise ValueError('pool_boxes must have a \'scores\' field') - nms_boxes = non_max_suppression( - pool_boxes, nms_iou_thresh, nms_max_detections) + nms_boxes = non_max_suppression(pool_boxes, nms_iou_thresh, + nms_max_detections) return box_voting(nms_boxes, pool_boxes, voting_iou_thresh) def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): - """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. + """Performs box voting as described in S. Gidaris and N. + + Komodakis, ICCV 2015. Performs box voting as described in 'Object detection via a multi-region & semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For @@ -972,9 +987,10 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): # match to any boxes in pool_boxes. For such boxes without any matches, we # should return the original boxes without voting. match_assert = tf.Assert( - tf.reduce_all(tf.greater(num_matches, 0)), - ['Each box in selected_boxes must match with at least one box ' - 'in pool_boxes.']) + tf.reduce_all(tf.greater(num_matches, 0)), [ + 'Each box in selected_boxes must match with at least one box ' + 'in pool_boxes.' + ]) scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) scores_assert = tf.Assert( @@ -993,9 +1009,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): return averaged_boxes -def get_minimal_coverage_box(boxlist, - default_box=None, - scope=None): +def get_minimal_coverage_box(boxlist, default_box=None, scope=None): """Creates a single bounding box which covers all boxes in the boxlist. Args: @@ -1045,9 +1059,9 @@ def sample_boxes_by_jittering(boxlist, boxlist: A boxlist containing N boxes in normalized coordinates. num_boxes_to_sample: A positive integer containing the number of boxes to sample. - stddev: Standard deviation. This is used to draw random offsets for the - box corners from a normal distribution. The offset is multiplied by the - box size so will be larger in terms of pixels for larger boxes. + stddev: Standard deviation. This is used to draw random offsets for the box + corners from a normal distribution. The offset is multiplied by the box + size so will be larger in terms of pixels for larger boxes. scope: Name scope. Returns: @@ -1056,11 +1070,10 @@ def sample_boxes_by_jittering(boxlist, """ with tf.name_scope(scope, 'SampleBoxesByJittering'): num_boxes = boxlist.num_boxes() - box_indices = tf.random_uniform( - [num_boxes_to_sample], - minval=0, - maxval=num_boxes, - dtype=tf.int32) + box_indices = tf.random_uniform([num_boxes_to_sample], + minval=0, + maxval=num_boxes, + dtype=tf.int32) sampled_boxes = tf.gather(boxlist.get(), box_indices) sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0] sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1] diff --git a/official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py b/official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py index 235df4ede474e89687a17413e81e60aa21772e23..0ce22d5ecf1fa5585127325a68ffa325e552b7d3 100644 --- a/official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py +++ b/official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Faster RCNN box coder. @@ -43,9 +42,9 @@ class FasterRcnnBoxCoder(box_coder.BoxCoder): """Constructor for FasterRcnnBoxCoder. Args: - scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. - If set to None, does not perform scaling. For Faster RCNN, - the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. + scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. If + set to None, does not perform scaling. For Faster RCNN, the open-source + implementation recommends using [10.0, 10.0, 5.0, 5.0]. """ if scale_factors: assert len(scale_factors) == 4 diff --git a/official/vision/detection/utils/object_detection/matcher.py b/official/vision/detection/utils/object_detection/matcher.py index 4a025d5e7118ee20f136c8a31b4c183de11f1e7f..1586830970437a19566bf430c18e8ca2b7e47a62 100644 --- a/official/vision/detection/utils/object_detection/matcher.py +++ b/official/vision/detection/utils/object_detection/matcher.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Matcher interface and Match class. @@ -49,9 +48,9 @@ class Match(object): Args: match_results: Integer tensor of shape [N] with (1) match_results[i]>=0, - meaning that column i is matched with row match_results[i]. - (2) match_results[i]=-1, meaning that column i is not matched. - (3) match_results[i]=-2, meaning that column i is ignored. + meaning that column i is matched with row match_results[i]. (2) + match_results[i]=-1, meaning that column i is not matched. (3) + match_results[i]=-2, meaning that column i is ignored. Raises: ValueError: if match_results does not have rank 1 or is not an @@ -168,8 +167,7 @@ class Match(object): def _reshape_and_cast(self, t): return tf.cast(tf.reshape(t, [-1]), tf.int32) - def gather_based_on_match(self, input_tensor, unmatched_value, - ignored_value): + def gather_based_on_match(self, input_tensor, unmatched_value, ignored_value): """Gathers elements from `input_tensor` based on match results. For columns that are matched to a row, gathered_tensor[col] is set to @@ -190,16 +188,15 @@ class Match(object): The shape of the gathered tensor is [match_results.shape[0]] + input_tensor.shape[1:]. """ - input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]), - input_tensor], axis=0) + input_tensor = tf.concat( + [tf.stack([ignored_value, unmatched_value]), input_tensor], axis=0) gather_indices = tf.maximum(self.match_results + 2, 0) gathered_tensor = tf.gather(input_tensor, gather_indices) return gathered_tensor class Matcher(object): - """Abstract base class for matcher. - """ + """Abstract base class for matcher.""" __metaclass__ = ABCMeta def match(self, similarity_matrix, scope=None, **params): @@ -212,8 +209,8 @@ class Matcher(object): similarity_matrix: Float tensor of shape [N, M] with pairwise similarity where higher value means more similar. scope: Op scope name. Defaults to 'Match' if None. - **params: Additional keyword arguments for specific implementations of - the Matcher. + **params: Additional keyword arguments for specific implementations of the + Matcher. Returns: A Match object with the results of matching. @@ -230,8 +227,8 @@ class Matcher(object): Args: similarity_matrix: Float tensor of shape [N, M] with pairwise similarity where higher value means more similar. - **params: Additional keyword arguments for specific implementations of - the Matcher. + **params: Additional keyword arguments for specific implementations of the + Matcher. Returns: match_results: Integer tensor of shape [M]: match_results[i]>=0 means diff --git a/official/vision/detection/utils/object_detection/minibatch_sampler.py b/official/vision/detection/utils/object_detection/minibatch_sampler.py index b9f529ab5976ca56f014788c1263e5887fde0444..63e99d85237b203c11f91d6dab5e11c954d89422 100644 --- a/official/vision/detection/utils/object_detection/minibatch_sampler.py +++ b/official/vision/detection/utils/object_detection/minibatch_sampler.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Base minibatch sampler module. @@ -53,8 +52,8 @@ class MinibatchSampler(object): Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. batch_size: desired batch size. - **params: additional keyword arguments for specific implementations of - the MinibatchSampler. + **params: additional keyword arguments for specific implementations of the + MinibatchSampler. Returns: sample_indicator: boolean tensor of shape [N] whose True entries have been @@ -72,8 +71,8 @@ class MinibatchSampler(object): is returned. Args: - indicator: a 1-dimensional boolean tensor indicating which elements - are allowed to be sampled and which are not. + indicator: a 1-dimensional boolean tensor indicating which elements are + allowed to be sampled and which are not. num_samples: int32 scalar tensor Returns: diff --git a/official/vision/detection/utils/object_detection/ops.py b/official/vision/detection/utils/object_detection/ops.py index bbfc1ae9353604986ad3f1f06a4f8e2e72bb5ca0..052e90a61009f3ada2ee77c0f1c8ad114143e5b7 100644 --- a/official/vision/detection/utils/object_detection/ops.py +++ b/official/vision/detection/utils/object_detection/ops.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """A module for helper tensorflow ops. @@ -37,7 +36,7 @@ def indices_to_dense_vector(indices, Args: indices: 1d Tensor with integer indices which are to be set to - indices_values. + indices_values. size: scalar with size (integer) of output Tensor. indices_value: values of elements specified by indices in the output vector default_value: values of other elements in the output vector. @@ -61,10 +60,10 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None): TODO(rathodv, jonathanhuang): enable sparse matmul option. Args: - params: A float32 Tensor. The tensor from which to gather values. - Must be at least rank 1. - indices: A Tensor. Must be one of the following types: int32, int64. - Must be in range [0, params.shape[0]) + params: A float32 Tensor. The tensor from which to gather values. Must be at + least rank 1. + indices: A Tensor. Must be one of the following types: int32, int64. Must be + in range [0, params.shape[0]) scope: A name for the operation (optional). Returns: diff --git a/official/vision/detection/utils/object_detection/preprocessor.py b/official/vision/detection/utils/object_detection/preprocessor.py index 55da5d2dfafda816be7dcb2d334a3a0711e0b699..c6678bf28d61d4ec666d11cf0cac81d398241936 100644 --- a/official/vision/detection/utils/object_detection/preprocessor.py +++ b/official/vision/detection/utils/object_detection/preprocessor.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Preprocess images and bounding boxes for detection. We perform two sets of operations in preprocessing stage: @@ -50,10 +50,9 @@ def _flip_boxes_left_right(boxes): """Left-right flip the boxes. Args: - boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. + boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes + are in normalized form meaning their coordinates vary between [0, 1]. Each + row is in the form of [ymin, xmin, ymax, xmax]. Returns: Flipped boxes. @@ -69,8 +68,8 @@ def _flip_masks_left_right(masks): """Left-right flip masks. Args: - masks: rank 3 float32 tensor with shape - [num_instances, height, width] representing instance masks. + masks: rank 3 float32 tensor with shape [num_instances, height, width] + representing instance masks. Returns: flipped masks: rank 3 float32 tensor with shape @@ -79,7 +78,9 @@ def _flip_masks_left_right(masks): return masks[:, :, ::-1] -def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation, +def keypoint_flip_horizontal(keypoints, + flip_point, + flip_permutation, scope=None): """Flips the keypoints horizontally around the flip_point. @@ -91,9 +92,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation, flip_point: (float) scalar tensor representing the x coordinate to flip the keypoints around. flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. This specifies the mapping from original keypoint indices - to the flipped keypoint indices. This is used primarily for keypoints - that are not reflection invariant. E.g. Suppose there are 3 keypoints + permutation. This specifies the mapping from original keypoint indices to + the flipped keypoint indices. This is used primarily for keypoints that + are not reflection invariant. E.g. Suppose there are 3 keypoints representing ['head', 'right_eye', 'left_eye'], then a logical choice for flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' and 'right_eye' after a horizontal flip. @@ -190,19 +191,16 @@ def random_horizontal_flip(image, Args: image: rank 3 float32 tensor with shape [height, width, channels]. - boxes: (optional) rank 2 float32 tensor with shape [N, 4] - containing the bounding boxes. - Boxes are in normalized form meaning their coordinates vary - between [0, 1]. - Each row is in the form of [ymin, xmin, ymax, xmax]. - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. The masks - are of the same height, width as the input `image`. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x - normalized coordinates. + boxes: (optional) rank 2 float32 tensor with shape [N, 4] containing the + bounding boxes. Boxes are in normalized form meaning their coordinates + vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax]. + masks: (optional) rank 3 float32 tensor with shape [num_instances, height, + width] containing instance masks. The masks are of the same height, width + as the input `image`. + keypoints: (optional) rank 3 float32 tensor with shape [num_instances, + num_keypoints, 2]. The keypoints are in y-x normalized coordinates. keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip - permutation. + permutation. seed: random seed Returns: @@ -369,20 +367,19 @@ def resize_to_range(image, Args: image: A 3D tensor of shape [height, width, channels] - masks: (optional) rank 3 float32 tensor with shape - [num_instances, height, width] containing instance masks. + masks: (optional) rank 3 float32 tensor with shape [num_instances, height, + width] containing instance masks. min_dimension: (optional) (scalar) desired size of the smaller image - dimension. - max_dimension: (optional) (scalar) maximum allowed size - of the larger image dimension. + dimension. + max_dimension: (optional) (scalar) maximum allowed size of the larger image + dimension. method: (optional) interpolation method used in resizing. Defaults to - BILINEAR. - align_corners: bool. If true, exactly align all 4 corners of the input - and output. Defaults to False. - pad_to_max_dimension: Whether to resize the image and pad it with zeros - so the resulting image is of the spatial size - [max_dimension, max_dimension]. If masks are included they are padded - similarly. + BILINEAR. + align_corners: bool. If true, exactly align all 4 corners of the input and + output. Defaults to False. + pad_to_max_dimension: Whether to resize the image and pad it with zeros so + the resulting image is of the spatial size [max_dimension, max_dimension]. + If masks are included they are padded similarly. Returns: Note that the position of the resized_image_shape changes based on whether @@ -410,8 +407,8 @@ def resize_to_range(image, new_image = tf.image.resize(image, new_size[:-1], method=method) if pad_to_max_dimension: - new_image = tf.image.pad_to_bounding_box( - new_image, 0, 0, max_dimension, max_dimension) + new_image = tf.image.pad_to_bounding_box(new_image, 0, 0, max_dimension, + max_dimension) result = [new_image] if masks is not None: @@ -422,8 +419,8 @@ def resize_to_range(image, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) new_masks = tf.squeeze(new_masks, 3) if pad_to_max_dimension: - new_masks = tf.image.pad_to_bounding_box( - new_masks, 0, 0, max_dimension, max_dimension) + new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0, max_dimension, + max_dimension) result.append(new_masks) result.append(new_size) @@ -500,11 +497,10 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): Args: image: A 3D float32 tensor of shape [height, width, channels]. boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding - boxes in normalized coordinates. Each row is of the form - [ymin, xmin, ymax, xmax]. - keypoints: (optional) rank 3 float32 tensor with shape - [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized - coordinates. + boxes in normalized coordinates. Each row is of the form [ymin, xmin, + ymax, xmax]. + keypoints: (optional) rank 3 float32 tensor with shape [num_instances, + num_keypoints, 2]. The keypoints are in y-x normalized coordinates. Returns: image: unchanged input image. diff --git a/official/vision/detection/utils/object_detection/region_similarity_calculator.py b/official/vision/detection/utils/object_detection/region_similarity_calculator.py index 0af2ce495ad53c9df0f8d2eb79f7431b02ab430e..9b26b4c65f9c0dde20b1d2d1916b0c3aa3d9e23f 100644 --- a/official/vision/detection/utils/object_detection/region_similarity_calculator.py +++ b/official/vision/detection/utils/object_detection/region_similarity_calculator.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Region Similarity Calculators for BoxLists. diff --git a/official/vision/detection/utils/object_detection/shape_utils.py b/official/vision/detection/utils/object_detection/shape_utils.py index e30b62b7acc15b7f9f98b6c27b1a22efaf2998a8..6bf7c49d0e1d0eb9f10524e7889ba74c461bfc50 100644 --- a/official/vision/detection/utils/object_detection/shape_utils.py +++ b/official/vision/detection/utils/object_detection/shape_utils.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Utils used to manipulate tensor shapes.""" @@ -42,7 +41,8 @@ def assert_shape_equal(shape_a, shape_b): all(isinstance(dim, int) for dim in shape_b)): if shape_a != shape_b: raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b)) - else: return tf.no_op() + else: + return tf.no_op() else: return tf.assert_equal(shape_a, shape_b) @@ -87,9 +87,7 @@ def pad_or_clip_nd(tensor, output_shape): if shape is not None else -1 for i, shape in enumerate(output_shape) ] clipped_tensor = tf.slice( - tensor, - begin=tf.zeros(len(clip_size), dtype=tf.int32), - size=clip_size) + tensor, begin=tf.zeros(len(clip_size), dtype=tf.int32), size=clip_size) # Pad tensor if the shape of clipped tensor is smaller than the expected # shape. @@ -99,10 +97,7 @@ def pad_or_clip_nd(tensor, output_shape): for i, shape in enumerate(output_shape) ] paddings = tf.stack( - [ - tf.zeros(len(trailing_paddings), dtype=tf.int32), - trailing_paddings - ], + [tf.zeros(len(trailing_paddings), dtype=tf.int32), trailing_paddings], axis=1) padded_tensor = tf.pad(tensor=clipped_tensor, paddings=paddings) output_static_shape = [ diff --git a/official/vision/detection/utils/object_detection/target_assigner.py b/official/vision/detection/utils/object_detection/target_assigner.py index c04448efb052b45da65366b26e7d773b62015773..43aeace3751d4932e69de2c4b4d79372c6e849aa 100644 --- a/official/vision/detection/utils/object_detection/target_assigner.py +++ b/official/vision/detection/utils/object_detection/target_assigner.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """Base target assigner module. @@ -31,35 +30,39 @@ Note that TargetAssigners only operate on detections from a single image at a time, so any logic for applying a TargetAssigner to multiple images must be handled externally. """ + import tensorflow as tf from official.vision.detection.utils.object_detection import box_list from official.vision.detection.utils.object_detection import shape_utils - KEYPOINTS_FIELD_NAME = 'keypoints' class TargetAssigner(object): """Target assigner to compute classification and regression targets.""" - def __init__(self, similarity_calc, matcher, box_coder, - negative_class_weight=1.0, unmatched_cls_target=None): + def __init__(self, + similarity_calc, + matcher, + box_coder, + negative_class_weight=1.0, + unmatched_cls_target=None): """Construct Object Detection Target Assigner. Args: similarity_calc: a RegionSimilarityCalculator matcher: Matcher used to match groundtruth to anchors. - box_coder: BoxCoder used to encode matching groundtruth boxes with - respect to anchors. + box_coder: BoxCoder used to encode matching groundtruth boxes with respect + to anchors. negative_class_weight: classification weight to be associated to negative anchors (default: 1.0). The weight must be in [0., 1.]. unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] - which is consistent with the classification target for each - anchor (and can be empty for scalar targets). This shape must thus be - compatible with the groundtruth labels that are passed to the "assign" - function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). - If set to None, unmatched_cls_target is set to be [0] for each anchor. + which is consistent with the classification target for each anchor (and + can be empty for scalar targets). This shape must thus be compatible + with the groundtruth labels that are passed to the "assign" function + (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). If set to None, + unmatched_cls_target is set to be [0] for each anchor. Raises: ValueError: if similarity_calc is not a RegionSimilarityCalculator or @@ -78,8 +81,12 @@ class TargetAssigner(object): def box_coder(self): return self._box_coder - def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None, - groundtruth_weights=None, **params): + def assign(self, + anchors, + groundtruth_boxes, + groundtruth_labels=None, + groundtruth_weights=None, + **params): """Assign classification and regression targets to each anchor. For a given set of anchors and groundtruth detections, match anchors @@ -93,16 +100,16 @@ class TargetAssigner(object): Args: anchors: a BoxList representing N anchors groundtruth_boxes: a BoxList representing M groundtruth boxes - groundtruth_labels: a tensor of shape [M, d_1, ... d_k] - with labels for each of the ground_truth boxes. The subshape - [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set - to None, groundtruth_labels assumes a binary problem where all - ground_truth boxes get a positive label (of 1). + groundtruth_labels: a tensor of shape [M, d_1, ... d_k] with labels for + each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty + (corresponding to scalar inputs). When set to None, groundtruth_labels + assumes a binary problem where all ground_truth boxes get a positive + label (of 1). groundtruth_weights: a float tensor of shape [M] indicating the weight to assign to all anchors match to a particular groundtruth box. The weights must be in [0., 1.]. If None, all weights are set to 1. - **params: Additional keyword arguments for specific implementations of - the Matcher. + **params: Additional keyword arguments for specific implementations of the + Matcher. Returns: cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], @@ -125,16 +132,15 @@ class TargetAssigner(object): raise ValueError('groundtruth_boxes must be an BoxList') if groundtruth_labels is None: - groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(), - 0)) + groundtruth_labels = tf.ones( + tf.expand_dims(groundtruth_boxes.num_boxes(), 0)) groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) unmatched_shape_assert = shape_utils.assert_shape_equal( shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:], shape_utils.combined_static_and_dynamic_shape( self._unmatched_cls_target)) labels_and_box_shapes_assert = shape_utils.assert_shape_equal( - shape_utils.combined_static_and_dynamic_shape( - groundtruth_labels)[:1], + shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[:1], shape_utils.combined_static_and_dynamic_shape( groundtruth_boxes.get())[:1]) @@ -145,11 +151,10 @@ class TargetAssigner(object): groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32) with tf.control_dependencies( [unmatched_shape_assert, labels_and_box_shapes_assert]): - match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes, - anchors) + match_quality_matrix = self._similarity_calc( + groundtruth_boxes.get(), anchors.get()) match = self._matcher.match(match_quality_matrix, **params) - reg_targets = self._create_regression_targets(anchors, - groundtruth_boxes, + reg_targets = self._create_regression_targets(anchors, groundtruth_boxes, match) cls_targets = self._create_classification_targets(groundtruth_labels, match) @@ -210,8 +215,8 @@ class TargetAssigner(object): match.match_results) # Zero out the unmatched and ignored regression targets. - unmatched_ignored_reg_targets = tf.tile( - self._default_regression_target(), [match_results_shape[0], 1]) + unmatched_ignored_reg_targets = tf.tile(self._default_regression_target(), + [match_results_shape[0], 1]) matched_anchors_mask = match.matched_column_indicator() # To broadcast matched_anchors_mask to the same shape as # matched_reg_targets. @@ -233,7 +238,7 @@ class TargetAssigner(object): Returns: default_target: a float32 tensor with shape [1, box_code_dimension] """ - return tf.constant([self._box_coder.code_size*[0]], tf.float32) + return tf.constant([self._box_coder.code_size * [0]], tf.float32) def _create_classification_targets(self, groundtruth_labels, match): """Create classification targets for each anchor. @@ -243,11 +248,11 @@ class TargetAssigner(object): to anything are given the target self._unmatched_cls_target Args: - groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k] - with labels for each of the ground_truth boxes. The subshape - [d_1, ... d_k] can be empty (corresponding to scalar labels). - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. + groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k] with + labels for each of the ground_truth boxes. The subshape [d_1, ... d_k] + can be empty (corresponding to scalar labels). + match: a matcher.Match object that provides a matching between anchors and + groundtruth boxes. Returns: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the @@ -267,8 +272,8 @@ class TargetAssigner(object): negative anchor. Args: - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. + match: a matcher.Match object that provides a matching between anchors and + groundtruth boxes. groundtruth_weights: a float tensor of shape [M] indicating the weight to assign to all anchors match to a particular groundtruth box. @@ -278,9 +283,7 @@ class TargetAssigner(object): return match.gather_based_on_match( groundtruth_weights, ignored_value=0., unmatched_value=0.) - def _create_classification_weights(self, - match, - groundtruth_weights): + def _create_classification_weights(self, match, groundtruth_weights): """Create classification weights for each anchor. Positive (matched) anchors are associated with a weight of @@ -291,8 +294,8 @@ class TargetAssigner(object): the case in object detection). Args: - match: a matcher.Match object that provides a matching between anchors - and groundtruth boxes. + match: a matcher.Match object that provides a matching between anchors and + groundtruth boxes. groundtruth_weights: a float tensor of shape [M] indicating the weight to assign to all anchors match to a particular groundtruth box. @@ -312,3 +315,209 @@ class TargetAssigner(object): BoxCoder object. """ return self._box_coder + + +class OlnTargetAssigner(TargetAssigner): + """Target assigner to compute classification and regression targets.""" + + def __init__(self, + similarity_calc, + matcher, + box_coder, + negative_class_weight=1.0, + unmatched_cls_target=None, + center_matcher=None): + """Construct Object Detection Target Assigner. + + Args: + similarity_calc: a RegionSimilarityCalculator + matcher: Matcher used to match groundtruth to anchors. + box_coder: BoxCoder used to encode matching groundtruth boxes with respect + to anchors. + negative_class_weight: classification weight to be associated to negative + anchors (default: 1.0). The weight must be in [0., 1.]. + unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k] + which is consistent with the classification target for each anchor (and + can be empty for scalar targets). This shape must thus be compatible + with the groundtruth labels that are passed to the "assign" function + (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). If set to None, + unmatched_cls_target is set to be [0] for each anchor. + center_matcher: Matcher used to match groundtruth to anchors to sample and + assign the regression targets of centerness to each anchor. + + Raises: + ValueError: if similarity_calc is not a RegionSimilarityCalculator or + if matcher is not a Matcher or if box_coder is not a BoxCoder + """ + super(OlnTargetAssigner, self).__init__( + similarity_calc=similarity_calc, + matcher=matcher, + box_coder=box_coder, + negative_class_weight=negative_class_weight, + unmatched_cls_target=unmatched_cls_target) + + # centerness-matcher with independent sampling IoU threshold. + self._center_matcher = center_matcher + + def assign(self, + anchors, + groundtruth_boxes, + groundtruth_labels=None, + groundtruth_weights=None, + **params): + """Assign classification and regression targets to each anchor. + + For a given set of anchors and groundtruth detections, match anchors + to groundtruth_boxes and assign classification and regression targets to + each anchor as well as weights based on the resulting match (specifying, + e.g., which anchors should not contribute to training loss). + + Anchors that are not matched to anything are given a classification target + of self._unmatched_cls_target which can be specified via the constructor. + + Args: + anchors: a BoxList representing N anchors + groundtruth_boxes: a BoxList representing M groundtruth boxes + groundtruth_labels: a tensor of shape [M, d_1, ... d_k] with labels for + each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty + (corresponding to scalar inputs). When set to None, groundtruth_labels + assumes a binary problem where all ground_truth boxes get a positive + label (of 1). + groundtruth_weights: a float tensor of shape [M] indicating the weight to + assign to all anchors match to a particular groundtruth box. The weights + must be in [0., 1.]. If None, all weights are set to 1. + **params: Additional keyword arguments for specific implementations of the + Matcher. + + Returns: + cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], + where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels + which has shape [num_gt_boxes, d_1, d_2, ... d_k]. + cls_weights: a float32 tensor with shape [num_anchors] + reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension] + reg_weights: a float32 tensor with shape [num_anchors] + match: a matcher.Match object encoding the match between anchors and + groundtruth boxes, with rows corresponding to groundtruth boxes + and columns corresponding to anchors. + matched_gt_boxlist: a BoxList object with data of float32 tensor with + shape [num_anchors, box_dimension] which encodes the coordinates of the + matched groundtruth boxes. + matched_anchors_mask: a Bool tensor with shape [num_anchors] which + indicates whether an anchor is matched or not. + center_matched_gt_boxlist: a BoxList object with data of float32 tensor + with shape [num_anchors, box_dimension] which encodes the coordinates of + the groundtruth boxes matched for centerness target assignment. + center_matched_anchors_mask: a Boolean tensor with shape [num_anchors] + which indicates whether an anchor is matched or not for centerness + target assignment. + matched_ious: a float32 tensor with shape [num_anchors] which encodes the + ious between each anchor and the matched groundtruth boxes. + + Raises: + ValueError: if anchors or groundtruth_boxes are not of type + box_list.BoxList + """ + if not isinstance(anchors, box_list.BoxList): + raise ValueError('anchors must be an BoxList') + if not isinstance(groundtruth_boxes, box_list.BoxList): + raise ValueError('groundtruth_boxes must be an BoxList') + + if groundtruth_labels is None: + groundtruth_labels = tf.ones( + tf.expand_dims(groundtruth_boxes.num_boxes(), 0)) + groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) + unmatched_shape_assert = shape_utils.assert_shape_equal( + shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:], + shape_utils.combined_static_and_dynamic_shape( + self._unmatched_cls_target)) + labels_and_box_shapes_assert = shape_utils.assert_shape_equal( + shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[:1], + shape_utils.combined_static_and_dynamic_shape( + groundtruth_boxes.get())[:1]) + + if groundtruth_weights is None: + num_gt_boxes = groundtruth_boxes.num_boxes_static() + if not num_gt_boxes: + num_gt_boxes = groundtruth_boxes.num_boxes() + groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32) + with tf.control_dependencies( + [unmatched_shape_assert, labels_and_box_shapes_assert]): + match_quality_matrix = self._similarity_calc( + groundtruth_boxes.get(), anchors.get()) + match = self._matcher.match(match_quality_matrix, **params) + reg_targets, matched_gt_boxlist, matched_anchors_mask = ( + self._create_regression_targets(anchors, + groundtruth_boxes, + match)) + cls_targets = self._create_classification_targets(groundtruth_labels, + match) + reg_weights = self._create_regression_weights(match, groundtruth_weights) + cls_weights = self._create_classification_weights(match, + groundtruth_weights) + # Match for creation of centerness regression targets. + if self._center_matcher is not None: + center_match = self._center_matcher.match( + match_quality_matrix, **params) + center_matched_gt_boxes = center_match.gather_based_on_match( + groundtruth_boxes.get(), + unmatched_value=tf.zeros(4), + ignored_value=tf.zeros(4)) + center_matched_gt_boxlist = box_list.BoxList(center_matched_gt_boxes) + center_matched_anchors_mask = center_match.matched_column_indicator() + + num_anchors = anchors.num_boxes_static() + if num_anchors is not None: + reg_targets = self._reset_target_shape(reg_targets, num_anchors) + cls_targets = self._reset_target_shape(cls_targets, num_anchors) + reg_weights = self._reset_target_shape(reg_weights, num_anchors) + cls_weights = self._reset_target_shape(cls_weights, num_anchors) + + if self._center_matcher is not None: + matched_ious = tf.reduce_max(match_quality_matrix, 0) + return (cls_targets, cls_weights, reg_targets, reg_weights, match, + matched_gt_boxlist, matched_anchors_mask, + center_matched_gt_boxlist, center_matched_anchors_mask, + matched_ious) + else: + return (cls_targets, cls_weights, reg_targets, reg_weights, match) + + def _create_regression_targets(self, anchors, groundtruth_boxes, match): + """Returns a regression target for each anchor. + + Args: + anchors: a BoxList representing N anchors + groundtruth_boxes: a BoxList representing M groundtruth_boxes + match: a matcher.Match object + + Returns: + reg_targets: a float32 tensor with shape [N, box_code_dimension] + """ + matched_gt_boxes = match.gather_based_on_match( + groundtruth_boxes.get(), + unmatched_value=tf.zeros(4), + ignored_value=tf.zeros(4)) + matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) + if groundtruth_boxes.has_field(KEYPOINTS_FIELD_NAME): + groundtruth_keypoints = groundtruth_boxes.get_field(KEYPOINTS_FIELD_NAME) + matched_keypoints = match.gather_based_on_match( + groundtruth_keypoints, + unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]), + ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:])) + matched_gt_boxlist.add_field(KEYPOINTS_FIELD_NAME, matched_keypoints) + matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors) + match_results_shape = shape_utils.combined_static_and_dynamic_shape( + match.match_results) + + # Zero out the unmatched and ignored regression targets. + unmatched_ignored_reg_targets = tf.tile(self._default_regression_target(), + [match_results_shape[0], 1]) + matched_anchors_mask = match.matched_column_indicator() + # To broadcast matched_anchors_mask to the same shape as + # matched_reg_targets. + matched_anchors_mask_tiled = tf.tile( + tf.expand_dims(matched_anchors_mask, 1), + [1, tf.shape(matched_reg_targets)[1]]) + reg_targets = tf.where(matched_anchors_mask_tiled, + matched_reg_targets, + unmatched_ignored_reg_targets) + return reg_targets, matched_gt_boxlist, matched_anchors_mask diff --git a/official/vision/detection/utils/object_detection/visualization_utils.py b/official/vision/detection/utils/object_detection/visualization_utils.py index db4af8089df673cd5c57c4a020b5d7e8f03846c9..d063325b37efe509160bd32186d1b47e304e4f27 100644 --- a/official/vision/detection/utils/object_detection/visualization_utils.py +++ b/official/vision/detection/utils/object_detection/visualization_utils.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """A set of functions that are used for visualization. @@ -21,9 +20,11 @@ The functions do not return a value, instead they modify the image itself. """ import collections import functools + from absl import logging # Set headless-friendly backend. -import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements +import matplotlib +matplotlib.use('Agg') # pylint: disable=multiple-statements import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top import numpy as np import PIL.Image as Image @@ -36,7 +37,6 @@ import tensorflow as tf from official.vision.detection.utils import box_utils from official.vision.detection.utils.object_detection import shape_utils - _TITLE_LEFT_MARGIN = 10 _TITLE_TOP_MARGIN = 10 STANDARD_COLORS = [ @@ -99,9 +99,9 @@ def visualize_images_with_bounding_boxes(images, box_outputs, step, summary_writer): """Records subset of evaluation images with bounding boxes.""" if not isinstance(images, list): - logging.warning('visualize_images_with_bounding_boxes expects list of ' - 'images but received type: %s and value: %s', - type(images), images) + logging.warning( + 'visualize_images_with_bounding_boxes expects list of ' + 'images but received type: %s and value: %s', type(images), images) return image_shape = tf.shape(images[0]) @@ -140,11 +140,11 @@ def draw_bounding_box_on_image_array(image, xmax: xmax of bounding box. color: color to draw bounding box. Default is red. thickness: line thickness. Default value is 4. - display_str_list: list of strings to display in box - (each to be shown on its own line). - use_normalized_coordinates: If True (default), treat coordinates - ymin, xmin, ymax, xmax as relative to the image. Otherwise treat - coordinates as absolute. + display_str_list: list of strings to display in box (each to be shown on its + own line). + use_normalized_coordinates: If True (default), treat coordinates ymin, xmin, + ymax, xmax as relative to the image. Otherwise treat coordinates as + absolute. """ image_pil = Image.fromarray(np.uint8(image)).convert('RGB') draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, @@ -180,11 +180,11 @@ def draw_bounding_box_on_image(image, xmax: xmax of bounding box. color: color to draw bounding box. Default is red. thickness: line thickness. Default value is 4. - display_str_list: list of strings to display in box - (each to be shown on its own line). - use_normalized_coordinates: If True (default), treat coordinates - ymin, xmin, ymax, xmax as relative to the image. Otherwise treat - coordinates as absolute. + display_str_list: list of strings to display in box (each to be shown on its + own line). + use_normalized_coordinates: If True (default), treat coordinates ymin, xmin, + ymax, xmax as relative to the image. Otherwise treat coordinates as + absolute. """ draw = ImageDraw.Draw(image) im_width, im_height = image.size @@ -193,8 +193,10 @@ def draw_bounding_box_on_image(image, ymin * im_height, ymax * im_height) else: (left, right, top, bottom) = (xmin, xmax, ymin, ymax) - draw.line([(left, top), (left, bottom), (right, bottom), - (right, top), (left, top)], width=thickness, fill=color) + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), + (left, top)], + width=thickness, + fill=color) try: font = ImageFont.truetype('arial.ttf', 24) except IOError: @@ -215,15 +217,13 @@ def draw_bounding_box_on_image(image, for display_str in display_str_list[::-1]: text_width, text_height = font.getsize(display_str) margin = np.ceil(0.05 * text_height) - draw.rectangle( - [(left, text_bottom - text_height - 2 * margin), (left + text_width, - text_bottom)], - fill=color) - draw.text( - (left + margin, text_bottom - text_height - margin), - display_str, - fill='black', - font=font) + draw.rectangle([(left, text_bottom - text_height - 2 * margin), + (left + text_width, text_bottom)], + fill=color) + draw.text((left + margin, text_bottom - text_height - margin), + display_str, + fill='black', + font=font) text_bottom -= text_height - 2 * margin @@ -236,15 +236,13 @@ def draw_bounding_boxes_on_image_array(image, Args: image: a numpy array object. - boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). - The coordinates are in normalized format between [0, 1]. + boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). The + coordinates are in normalized format between [0, 1]. color: color to draw bounding box. Default is red. thickness: line thickness. Default value is 4. - display_str_list_list: list of list of strings. - a list of strings for each bounding box. - The reason to pass a list of strings for a - bounding box is that it might contain - multiple labels. + display_str_list_list: list of list of strings. a list of strings for each + bounding box. The reason to pass a list of strings for a bounding box is + that it might contain multiple labels. Raises: ValueError: if boxes is not a [N, 4] array @@ -264,15 +262,13 @@ def draw_bounding_boxes_on_image(image, Args: image: a PIL.Image object. - boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). - The coordinates are in normalized format between [0, 1]. + boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). The + coordinates are in normalized format between [0, 1]. color: color to draw bounding box. Default is red. thickness: line thickness. Default value is 4. - display_str_list_list: list of list of strings. - a list of strings for each bounding box. - The reason to pass a list of strings for a - bounding box is that it might contain - multiple labels. + display_str_list_list: list of list of strings. a list of strings for each + bounding box. The reason to pass a list of strings for a bounding box is + that it might contain multiple labels. Raises: ValueError: if boxes is not a [N, 4] array @@ -319,8 +315,9 @@ def _visualize_boxes_and_keypoints(image, boxes, classes, scores, keypoints, **kwargs) -def _visualize_boxes_and_masks_and_keypoints( - image, boxes, classes, scores, masks, keypoints, category_index, **kwargs): +def _visualize_boxes_and_masks_and_keypoints(image, boxes, classes, scores, + masks, keypoints, category_index, + **kwargs): return visualize_boxes_and_labels_on_image_array( image, boxes, @@ -374,8 +371,8 @@ def draw_bounding_boxes_on_image_tensors(images, max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20. min_score_thresh: Minimum score threshold for visualization. Default 0.2. use_normalized_coordinates: Whether to assume boxes and kepoints are in - normalized coordinates (as opposed to absolute coordiantes). - Default is True. + normalized coordinates (as opposed to absolute coordiantes). Default is + True. Returns: 4D image tensor of type uint8, with boxes drawn on top. @@ -432,17 +429,15 @@ def draw_bounding_boxes_on_image_tensors(images, _visualize_boxes, category_index=category_index, **visualization_keyword_args) - elems = [ - true_shapes, original_shapes, images, boxes, classes, scores - ] + elems = [true_shapes, original_shapes, images, boxes, classes, scores] def draw_boxes(image_and_detections): """Draws boxes on image.""" true_shape = image_and_detections[0] original_shape = image_and_detections[1] if true_image_shape is not None: - image = shape_utils.pad_or_clip_nd( - image_and_detections[2], [true_shape[0], true_shape[1], 3]) + image = shape_utils.pad_or_clip_nd(image_and_detections[2], + [true_shape[0], true_shape[1], 3]) if original_image_spatial_shape is not None: image_and_detections[2] = _resize_original_image(image, original_shape) @@ -500,7 +495,8 @@ def draw_keypoints_on_image(image, for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y): draw.ellipse([(keypoint_x - radius, keypoint_y - radius), (keypoint_x + radius, keypoint_y + radius)], - outline=color, fill=color) + outline=color, + fill=color) def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): @@ -508,8 +504,8 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): Args: image: uint8 numpy array with shape (img_height, img_height, 3) - mask: a uint8 numpy array of shape (img_height, img_height) with - values between either 0 or 1. + mask: a uint8 numpy array of shape (img_height, img_height) with values + between either 0 or 1. color: color to draw the keypoints with. Default is red. alpha: transparency value between 0 and 1. (default: 0.4) @@ -531,7 +527,7 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): solid_color = np.expand_dims( np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') - pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L') + pil_mask = Image.fromarray(np.uint8(255.0 * alpha * mask)).convert('L') pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) np.copyto(image, np.array(pil_image.convert('RGB'))) @@ -565,21 +561,20 @@ def visualize_boxes_and_labels_on_image_array( boxes: a numpy array of shape [N, 4] classes: a numpy array of shape [N]. Note that class indices are 1-based, and match the keys in the label map. - scores: a numpy array of shape [N] or None. If scores=None, then - this function assumes that the boxes to be plotted are groundtruth - boxes and plot all boxes as black with no classes or scores. + scores: a numpy array of shape [N] or None. If scores=None, then this + function assumes that the boxes to be plotted are groundtruth boxes and + plot all boxes as black with no classes or scores. category_index: a dict containing category dictionaries (each holding category index `id` and category name `name`) keyed by category indices. instance_masks: a numpy array of shape [N, image_height, image_width] with values ranging between 0 and 1, can be None. instance_boundaries: a numpy array of shape [N, image_height, image_width] with values ranging between 0 and 1, can be None. - keypoints: a numpy array of shape [N, num_keypoints, 2], can - be None - use_normalized_coordinates: whether boxes is to be interpreted as - normalized coordinates or not. - max_boxes_to_draw: maximum number of boxes to visualize. If None, draw - all boxes. + keypoints: a numpy array of shape [N, num_keypoints, 2], can be None + use_normalized_coordinates: whether boxes is to be interpreted as normalized + coordinates or not. + max_boxes_to_draw: maximum number of boxes to visualize. If None, draw all + boxes. min_score_thresh: minimum score threshold for a box to be visualized agnostic_mode: boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. This mode will display scores but ignore @@ -624,32 +619,25 @@ def visualize_boxes_and_labels_on_image_array( display_str = str(class_name) if not skip_scores: if not display_str: - display_str = '{}%'.format(int(100*scores[i])) + display_str = '{}%'.format(int(100 * scores[i])) else: - display_str = '{}: {}%'.format(display_str, int(100*scores[i])) + display_str = '{}: {}%'.format(display_str, int(100 * scores[i])) box_to_display_str_map[box].append(display_str) if agnostic_mode: box_to_color_map[box] = 'DarkOrange' else: - box_to_color_map[box] = STANDARD_COLORS[ - classes[i] % len(STANDARD_COLORS)] + box_to_color_map[box] = STANDARD_COLORS[classes[i] % + len(STANDARD_COLORS)] # Draw all boxes onto image. for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box if instance_masks is not None: draw_mask_on_image_array( - image, - box_to_instance_masks_map[box], - color=color - ) + image, box_to_instance_masks_map[box], color=color) if instance_boundaries is not None: draw_mask_on_image_array( - image, - box_to_instance_boundaries_map[box], - color='red', - alpha=1.0 - ) + image, box_to_instance_boundaries_map[box], color='red', alpha=1.0) draw_bounding_box_on_image_array( image, ymin, @@ -681,13 +669,15 @@ def add_cdf_image_summary(values, name): values: a 1-D float32 tensor containing the values. name: name for the image summary. """ + def cdf_plot(values): """Numpy function to plot CDF.""" normalized_values = values / np.sum(values) sorted_values = np.sort(normalized_values) cumulative_values = np.cumsum(sorted_values) - fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32) - / cumulative_values.size) + fraction_of_examples = ( + np.arange(cumulative_values.size, dtype=np.float32) / + cumulative_values.size) fig = plt.figure(frameon=False) ax = fig.add_subplot('111') ax.plot(fraction_of_examples, cumulative_values) @@ -695,8 +685,9 @@ def add_cdf_image_summary(values, name): ax.set_xlabel('fraction of examples') fig.canvas.draw() width, height = fig.get_size_inches() * fig.get_dpi() - image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape( - 1, int(height), int(width), 3) + image = np.fromstring( + fig.canvas.tostring_rgb(), + dtype='uint8').reshape(1, int(height), int(width), 3) return image cdf_plot = tf.compat.v1.py_func(cdf_plot, [values], tf.uint8) @@ -725,8 +716,8 @@ def add_hist_image_summary(values, bins, name): fig.canvas.draw() width, height = fig.get_size_inches() * fig.get_dpi() image = np.fromstring( - fig.canvas.tostring_rgb(), dtype='uint8').reshape( - 1, int(height), int(width), 3) + fig.canvas.tostring_rgb(), + dtype='uint8').reshape(1, int(height), int(width), 3) return image hist_plot = tf.compat.v1.py_func(hist_plot, [values, bins], tf.uint8) diff --git a/official/vision/image_classification/README.md b/official/vision/image_classification/README.md index eb061d5b5f3284255bdb484cfbbb20bb3e157268..78bfe1f27e6543cb999698c9fec9e039d1ce9955 100644 --- a/official/vision/image_classification/README.md +++ b/official/vision/image_classification/README.md @@ -43,7 +43,7 @@ builder to 'records' or 'tfds' in the configurations. Note: These models will **not** work with TPUs on Colab. You can train image classification models on Cloud TPUs using -[tf.distribute.experimental.TPUStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/TPUStrategy?version=nightly). +[tf.distribute.TPUStrategy](https://www.tensorflow.org/api_docs/python/tf.distribute.TPUStrategy?version=nightly). If you are not familiar with Cloud TPUs, it is strongly recommended that you go through the [quickstart](https://cloud.google.com/tpu/docs/quickstart) to learn how to diff --git a/official/vision/image_classification/__init__.py b/official/vision/image_classification/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/image_classification/__init__.py +++ b/official/vision/image_classification/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/image_classification/augment.py b/official/vision/image_classification/augment.py index b6ef23a229c80bcc1fec92d431996688dc34eaad..d6acd0813875250cbfb49696cd01df541b39893a 100644 --- a/official/vision/image_classification/augment.py +++ b/official/vision/image_classification/augment.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """AutoAugment and RandAugment policies for enhanced image preprocessing. AutoAugment Reference: https://arxiv.org/abs/1805.09501 @@ -24,6 +24,7 @@ from __future__ import division from __future__ import print_function import math + import tensorflow as tf from typing import Any, Dict, List, Optional, Text, Tuple @@ -120,10 +121,8 @@ def _convert_translation_to_transform(translations: tf.Tensor) -> tf.Tensor: ) -def _convert_angles_to_transform( - angles: tf.Tensor, - image_width: tf.Tensor, - image_height: tf.Tensor) -> tf.Tensor: +def _convert_angles_to_transform(angles: tf.Tensor, image_width: tf.Tensor, + image_height: tf.Tensor) -> tf.Tensor: """Converts an angle or angles to a projective transform. Args: @@ -173,9 +172,7 @@ def transform(image: tf.Tensor, transforms) -> tf.Tensor: transforms = transforms[None] image = to_4d(image) image = image_ops.transform( - images=image, - transforms=transforms, - interpolation='nearest') + images=image, transforms=transforms, interpolation='nearest') return from_4d(image, original_ndims) @@ -216,9 +213,8 @@ def rotate(image: tf.Tensor, degrees: float) -> tf.Tensor: image_height = tf.cast(tf.shape(image)[1], tf.float32) image_width = tf.cast(tf.shape(image)[2], tf.float32) - transforms = _convert_angles_to_transform(angles=radians, - image_width=image_width, - image_height=image_height) + transforms = _convert_angles_to_transform( + angles=radians, image_width=image_width, image_height=image_height) # In practice, we should randomize the rotation degrees by flipping # it negatively half the time, but that's done on 'degrees' outside # of the function. @@ -279,11 +275,10 @@ def cutout(image: tf.Tensor, pad_size: int, replace: int = 0) -> tf.Tensor: Args: image: An image Tensor of type uint8. - pad_size: Specifies how big the zero mask that will be generated is that - is applied to the image. The mask will be of size - (2*pad_size x 2*pad_size). - replace: What pixel value to fill in the image in the area that has - the cutout mask applied to it. + pad_size: Specifies how big the zero mask that will be generated is that is + applied to the image. The mask will be of size (2*pad_size x 2*pad_size). + replace: What pixel value to fill in the image in the area that has the + cutout mask applied to it. Returns: An image Tensor that is of type uint8. @@ -293,30 +288,30 @@ def cutout(image: tf.Tensor, pad_size: int, replace: int = 0) -> tf.Tensor: # Sample the center location in the image where the zero mask will be applied. cutout_center_height = tf.random.uniform( - shape=[], minval=0, maxval=image_height, - dtype=tf.int32) + shape=[], minval=0, maxval=image_height, dtype=tf.int32) cutout_center_width = tf.random.uniform( - shape=[], minval=0, maxval=image_width, - dtype=tf.int32) + shape=[], minval=0, maxval=image_width, dtype=tf.int32) lower_pad = tf.maximum(0, cutout_center_height - pad_size) upper_pad = tf.maximum(0, image_height - cutout_center_height - pad_size) left_pad = tf.maximum(0, cutout_center_width - pad_size) right_pad = tf.maximum(0, image_width - cutout_center_width - pad_size) - cutout_shape = [image_height - (lower_pad + upper_pad), - image_width - (left_pad + right_pad)] + cutout_shape = [ + image_height - (lower_pad + upper_pad), + image_width - (left_pad + right_pad) + ] padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]] mask = tf.pad( tf.zeros(cutout_shape, dtype=image.dtype), - padding_dims, constant_values=1) + padding_dims, + constant_values=1) mask = tf.expand_dims(mask, -1) mask = tf.tile(mask, [1, 1, 3]) image = tf.where( tf.equal(mask, 0), - tf.ones_like(image, dtype=image.dtype) * replace, - image) + tf.ones_like(image, dtype=image.dtype) * replace, image) return image @@ -398,8 +393,8 @@ def shear_x(image: tf.Tensor, level: float, replace: int) -> tf.Tensor: # with a matrix form of: # [1 level # 0 1]. - image = transform(image=wrap(image), - transforms=[1., level, 0., 0., 1., 0., 0., 0.]) + image = transform( + image=wrap(image), transforms=[1., level, 0., 0., 1., 0., 0., 0.]) return unwrap(image, replace) @@ -409,8 +404,8 @@ def shear_y(image: tf.Tensor, level: float, replace: int) -> tf.Tensor: # with a matrix form of: # [1 0 # level 1]. - image = transform(image=wrap(image), - transforms=[1., 0., 0., level, 1., 0., 0., 0.]) + image = transform( + image=wrap(image), transforms=[1., 0., 0., level, 1., 0., 0., 0.]) return unwrap(image, replace) @@ -460,9 +455,9 @@ def sharpness(image: tf.Tensor, factor: float) -> tf.Tensor: # Make image 4D for conv operation. image = tf.expand_dims(image, 0) # SMOOTH PIL Kernel. - kernel = tf.constant( - [[1, 1, 1], [1, 5, 1], [1, 1, 1]], dtype=tf.float32, - shape=[3, 3, 1, 1]) / 13. + kernel = tf.constant([[1, 1, 1], [1, 5, 1], [1, 1, 1]], + dtype=tf.float32, + shape=[3, 3, 1, 1]) / 13. # Tile across channel dimension. kernel = tf.tile(kernel, [1, 1, 3, 1]) strides = [1, 1, 1, 1] @@ -484,6 +479,7 @@ def sharpness(image: tf.Tensor, factor: float) -> tf.Tensor: def equalize(image: tf.Tensor) -> tf.Tensor: """Implements Equalize function from PIL using TF ops.""" + def scale_channel(im, c): """Scale the data in the channel to implement equalize.""" im = tf.cast(im[:, :, c], tf.int32) @@ -507,9 +503,9 @@ def equalize(image: tf.Tensor) -> tf.Tensor: # If step is zero, return the original image. Otherwise, build # lut from the full histogram and step and then index from it. - result = tf.cond(tf.equal(step, 0), - lambda: im, - lambda: tf.gather(build_lut(histo, step), im)) + result = tf.cond( + tf.equal(step, 0), lambda: im, + lambda: tf.gather(build_lut(histo, step), im)) return tf.cast(result, tf.uint8) @@ -582,7 +578,7 @@ def _randomly_negate_tensor(tensor): def _rotate_level_to_arg(level: float): - level = (level/_MAX_LEVEL) * 30. + level = (level / _MAX_LEVEL) * 30. level = _randomly_negate_tensor(level) return (level,) @@ -597,18 +593,18 @@ def _shrink_level_to_arg(level: float): def _enhance_level_to_arg(level: float): - return ((level/_MAX_LEVEL) * 1.8 + 0.1,) + return ((level / _MAX_LEVEL) * 1.8 + 0.1,) def _shear_level_to_arg(level: float): - level = (level/_MAX_LEVEL) * 0.3 + level = (level / _MAX_LEVEL) * 0.3 # Flip level to negative with 50% chance. level = _randomly_negate_tensor(level) return (level,) def _translate_level_to_arg(level: float, translate_const: float): - level = (level/_MAX_LEVEL) * float(translate_const) + level = (level / _MAX_LEVEL) * float(translate_const) # Flip level to negative with 50% chance. level = _randomly_negate_tensor(level) return (level,) @@ -618,20 +614,15 @@ def _mult_to_arg(level: float, multiplier: float = 1.): return (int((level / _MAX_LEVEL) * multiplier),) -def _apply_func_with_prob(func: Any, - image: tf.Tensor, - args: Any, - prob: float): +def _apply_func_with_prob(func: Any, image: tf.Tensor, args: Any, prob: float): """Apply `func` to image w/ `args` as input with probability `prob`.""" assert isinstance(args, tuple) # Apply the function with probability `prob`. should_apply_op = tf.cast( tf.floor(tf.random.uniform([], dtype=tf.float32) + prob), tf.bool) - augmented_image = tf.cond( - should_apply_op, - lambda: func(image, *args), - lambda: image) + augmented_image = tf.cond(should_apply_op, lambda: func(image, *args), + lambda: image) return augmented_image @@ -709,11 +700,8 @@ def level_to_arg(cutout_const: float, translate_const: float): return args -def _parse_policy_info(name: Text, - prob: float, - level: float, - replace_value: List[int], - cutout_const: float, +def _parse_policy_info(name: Text, prob: float, level: float, + replace_value: List[int], cutout_const: float, translate_const: float) -> Tuple[Any, float, Any]: """Return the function that corresponds to `name` and update `level` param.""" func = NAME_TO_FUNC[name] @@ -969,8 +957,9 @@ class RandAugment(ImageAugment): min_prob, max_prob = 0.2, 0.8 for _ in range(self.num_layers): - op_to_select = tf.random.uniform( - [], maxval=len(self.available_ops) + 1, dtype=tf.int32) + op_to_select = tf.random.uniform([], + maxval=len(self.available_ops) + 1, + dtype=tf.int32) branch_fns = [] for (i, op_name) in enumerate(self.available_ops): @@ -978,11 +967,8 @@ class RandAugment(ImageAugment): minval=min_prob, maxval=max_prob, dtype=tf.float32) - func, _, args = _parse_policy_info(op_name, - prob, - self.magnitude, - replace_value, - self.cutout_const, + func, _, args = _parse_policy_info(op_name, prob, self.magnitude, + replace_value, self.cutout_const, self.translate_const) branch_fns.append(( i, @@ -991,9 +977,10 @@ class RandAugment(ImageAugment): image, *selected_args))) # pylint:enable=g-long-lambda - image = tf.switch_case(branch_index=op_to_select, - branch_fns=branch_fns, - default=lambda: tf.identity(image)) + image = tf.switch_case( + branch_index=op_to_select, + branch_fns=branch_fns, + default=lambda: tf.identity(image)) image = tf.cast(image, dtype=input_image_type) return image diff --git a/official/vision/image_classification/augment_test.py b/official/vision/image_classification/augment_test.py index 76bdb2b7b9db4fc109f39674c68ae0c1169f3f12..dceb14eeaddb1ce21fd40b4c40b6c463b62f64fb 100644 --- a/official/vision/image_classification/augment_test.py +++ b/official/vision/image_classification/augment_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for autoaugment.""" from __future__ import absolute_import @@ -49,24 +49,15 @@ class TransformsTest(parameterized.TestCase, tf.test.TestCase): def test_transform(self, dtype): image = tf.constant([[1, 2], [3, 4]], dtype=dtype) - self.assertAllEqual(augment.transform(image, transforms=[1]*8), - [[4, 4], [4, 4]]) + self.assertAllEqual( + augment.transform(image, transforms=[1] * 8), [[4, 4], [4, 4]]) def test_translate(self, dtype): image = tf.constant( - [[1, 0, 1, 0], - [0, 1, 0, 1], - [1, 0, 1, 0], - [0, 1, 0, 1]], - dtype=dtype) + [[1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1]], dtype=dtype) translations = [-1, -1] - translated = augment.translate(image=image, - translations=translations) - expected = [ - [1, 0, 1, 1], - [0, 1, 0, 0], - [1, 0, 1, 1], - [1, 0, 1, 1]] + translated = augment.translate(image=image, translations=translations) + expected = [[1, 0, 1, 1], [0, 1, 0, 0], [1, 0, 1, 1], [1, 0, 1, 1]] self.assertAllEqual(translated, expected) def test_translate_shapes(self, dtype): @@ -85,9 +76,7 @@ class TransformsTest(parameterized.TestCase, tf.test.TestCase): image = tf.reshape(tf.cast(tf.range(9), dtype), (3, 3)) rotation = 90. transformed = augment.rotate(image=image, degrees=rotation) - expected = [[2, 5, 8], - [1, 4, 7], - [0, 3, 6]] + expected = [[2, 5, 8], [1, 4, 7], [0, 3, 6]] self.assertAllEqual(transformed, expected) def test_rotate_shapes(self, dtype): @@ -129,15 +118,13 @@ class AutoaugmentTest(tf.test.TestCase): image = tf.ones((224, 224, 3), dtype=tf.uint8) for op_name in augment.NAME_TO_FUNC: - func, _, args = augment._parse_policy_info(op_name, - prob, - magnitude, - replace_value, - cutout_const, + func, _, args = augment._parse_policy_info(op_name, prob, magnitude, + replace_value, cutout_const, translate_const) image = func(image, *args) self.assertEqual((224, 224, 3), image.shape) + if __name__ == '__main__': tf.test.main() diff --git a/official/vision/image_classification/callbacks.py b/official/vision/image_classification/callbacks.py index 985d0c60cc0b866e10ad350986c004e4ea4ac161..033a2dd714f596e5cabbac4b0205099831fdb16c 100644 --- a/official/vision/image_classification/callbacks.py +++ b/official/vision/image_classification/callbacks.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Common modules for callbacks.""" from __future__ import absolute_import from __future__ import division @@ -21,37 +21,46 @@ from __future__ import print_function import os from typing import Any, List, MutableMapping, Text + from absl import logging import tensorflow as tf +from official.modeling import optimization from official.utils.misc import keras_utils -from official.vision.image_classification import optimizer_factory - - -def get_callbacks(model_checkpoint: bool = True, - include_tensorboard: bool = True, - time_history: bool = True, - track_lr: bool = True, - write_model_weights: bool = True, - apply_moving_average: bool = False, - initial_step: int = 0, - batch_size: int = 0, - log_steps: int = 0, - model_dir: str = None) -> List[tf.keras.callbacks.Callback]: + + +def get_callbacks( + model_checkpoint: bool = True, + include_tensorboard: bool = True, + time_history: bool = True, + track_lr: bool = True, + write_model_weights: bool = True, + apply_moving_average: bool = False, + initial_step: int = 0, + batch_size: int = 0, + log_steps: int = 0, + model_dir: str = None, + backup_and_restore: bool = False) -> List[tf.keras.callbacks.Callback]: """Get all callbacks.""" model_dir = model_dir or '' callbacks = [] if model_checkpoint: ckpt_full_path = os.path.join(model_dir, 'model.ckpt-{epoch:04d}') - callbacks.append(tf.keras.callbacks.ModelCheckpoint( - ckpt_full_path, save_weights_only=True, verbose=1)) + callbacks.append( + tf.keras.callbacks.ModelCheckpoint( + ckpt_full_path, save_weights_only=True, verbose=1)) + if backup_and_restore: + backup_dir = os.path.join(model_dir, 'tmp') + callbacks.append( + tf.keras.callbacks.experimental.BackupAndRestore(backup_dir)) if include_tensorboard: callbacks.append( CustomTensorBoard( log_dir=model_dir, track_lr=track_lr, initial_step=initial_step, - write_images=write_model_weights)) + write_images=write_model_weights, + profile_batch=0)) if time_history: callbacks.append( keras_utils.TimeHistory( @@ -61,13 +70,14 @@ def get_callbacks(model_checkpoint: bool = True, if apply_moving_average: # Save moving average model to a different file so that # we can resume training from a checkpoint - ckpt_full_path = os.path.join( - model_dir, 'average', 'model.ckpt-{epoch:04d}') - callbacks.append(AverageModelCheckpoint( - update_weights=False, - filepath=ckpt_full_path, - save_weights_only=True, - verbose=1)) + ckpt_full_path = os.path.join(model_dir, 'average', + 'model.ckpt-{epoch:04d}') + callbacks.append( + AverageModelCheckpoint( + update_weights=False, + filepath=ckpt_full_path, + save_weights_only=True, + verbose=1)) callbacks.append(MovingAverageCallback()) return callbacks @@ -162,7 +172,7 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard): class MovingAverageCallback(tf.keras.callbacks.Callback): - """A Callback to be used with a `MovingAverage` optimizer. + """A Callback to be used with a `ExponentialMovingAverage` optimizer. Applies moving average weights to the model during validation time to test and predict on the averaged weights rather than the current model weights. @@ -175,16 +185,14 @@ class MovingAverageCallback(tf.keras.callbacks.Callback): **kwargs: Any additional callback arguments. """ - def __init__(self, - overwrite_weights_on_train_end: bool = False, - **kwargs): + def __init__(self, overwrite_weights_on_train_end: bool = False, **kwargs): super(MovingAverageCallback, self).__init__(**kwargs) self.overwrite_weights_on_train_end = overwrite_weights_on_train_end def set_model(self, model: tf.keras.Model): super(MovingAverageCallback, self).set_model(model) assert isinstance(self.model.optimizer, - optimizer_factory.MovingAverage) + optimization.ExponentialMovingAverage) self.model.optimizer.shadow_copy(self.model) def on_test_begin(self, logs: MutableMapping[Text, Any] = None): @@ -204,44 +212,35 @@ class AverageModelCheckpoint(tf.keras.callbacks.ModelCheckpoint): Taken from tfa.callbacks.AverageModelCheckpoint. Attributes: - update_weights: If True, assign the moving average weights - to the model, and save them. If False, keep the old - non-averaged weights, but the saved model uses the - average weights. - See `tf.keras.callbacks.ModelCheckpoint` for the other args. + update_weights: If True, assign the moving average weights to the model, and + save them. If False, keep the old non-averaged weights, but the saved + model uses the average weights. See `tf.keras.callbacks.ModelCheckpoint` + for the other args. """ - def __init__( - self, - update_weights: bool, - filepath: str, - monitor: str = 'val_loss', - verbose: int = 0, - save_best_only: bool = False, - save_weights_only: bool = False, - mode: str = 'auto', - save_freq: str = 'epoch', - **kwargs): + def __init__(self, + update_weights: bool, + filepath: str, + monitor: str = 'val_loss', + verbose: int = 0, + save_best_only: bool = False, + save_weights_only: bool = False, + mode: str = 'auto', + save_freq: str = 'epoch', + **kwargs): self.update_weights = update_weights - super().__init__( - filepath, - monitor, - verbose, - save_best_only, - save_weights_only, - mode, - save_freq, - **kwargs) + super().__init__(filepath, monitor, verbose, save_best_only, + save_weights_only, mode, save_freq, **kwargs) def set_model(self, model): - if not isinstance(model.optimizer, optimizer_factory.MovingAverage): - raise TypeError( - 'AverageModelCheckpoint is only used when training' - 'with MovingAverage') + if not isinstance(model.optimizer, optimization.ExponentialMovingAverage): + raise TypeError('AverageModelCheckpoint is only used when training' + 'with MovingAverage') return super().set_model(model) def _save_model(self, epoch, logs): - assert isinstance(self.model.optimizer, optimizer_factory.MovingAverage) + assert isinstance(self.model.optimizer, + optimization.ExponentialMovingAverage) if self.update_weights: self.model.optimizer.assign_average_vars(self.model.variables) diff --git a/official/vision/image_classification/classifier_trainer.py b/official/vision/image_classification/classifier_trainer.py index c4b87ad6068d3d1beda0e4f0dec20f363466f7f8..ab6fbaea960e7d894d69e213e95c313d7fe9893c 100644 --- a/official/vision/image_classification/classifier_trainer.py +++ b/official/vision/image_classification/classifier_trainer.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Runs an Image Classification model.""" import os @@ -23,11 +23,10 @@ from absl import app from absl import flags from absl import logging import tensorflow as tf - +from official.common import distribute_utils from official.modeling import hyperparams from official.modeling import performance from official.utils import hyperparams_flags -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils from official.vision.image_classification import callbacks as custom_callbacks from official.vision.image_classification import dataset_factory @@ -41,7 +40,7 @@ from official.vision.image_classification.resnet import resnet_model def get_models() -> Mapping[str, tf.keras.Model]: """Returns the mapping from model type name to Keras model.""" - return { + return { 'efficientnet': efficientnet_model.EfficientNet.from_name, 'resnet': resnet_model.resnet50, } @@ -55,7 +54,7 @@ def get_dtype_map() -> Mapping[str, tf.dtypes.DType]: 'float16': tf.float16, 'fp32': tf.float32, 'bf16': tf.bfloat16, - } + } def _get_metrics(one_hot: bool) -> Mapping[Text, Any]: @@ -63,22 +62,28 @@ def _get_metrics(one_hot: bool) -> Mapping[Text, Any]: if one_hot: return { # (name, metric_fn) - 'acc': tf.keras.metrics.CategoricalAccuracy(name='accuracy'), - 'accuracy': tf.keras.metrics.CategoricalAccuracy(name='accuracy'), - 'top_1': tf.keras.metrics.CategoricalAccuracy(name='accuracy'), - 'top_5': tf.keras.metrics.TopKCategoricalAccuracy( - k=5, - name='top_5_accuracy'), + 'acc': + tf.keras.metrics.CategoricalAccuracy(name='accuracy'), + 'accuracy': + tf.keras.metrics.CategoricalAccuracy(name='accuracy'), + 'top_1': + tf.keras.metrics.CategoricalAccuracy(name='accuracy'), + 'top_5': + tf.keras.metrics.TopKCategoricalAccuracy( + k=5, name='top_5_accuracy'), } else: return { # (name, metric_fn) - 'acc': tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), - 'accuracy': tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), - 'top_1': tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), - 'top_5': tf.keras.metrics.SparseTopKCategoricalAccuracy( - k=5, - name='top_5_accuracy'), + 'acc': + tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), + 'accuracy': + tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), + 'top_1': + tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'), + 'top_5': + tf.keras.metrics.SparseTopKCategoricalAccuracy( + k=5, name='top_5_accuracy'), } @@ -94,8 +99,7 @@ def get_image_size_from_model( def _get_dataset_builders(params: base_configs.ExperimentConfig, strategy: tf.distribute.Strategy, - one_hot: bool - ) -> Tuple[Any, Any]: + one_hot: bool) -> Tuple[Any, Any]: """Create and return train and validation dataset builders.""" if one_hot: logging.warning('label_smoothing > 0, so datasets will be one hot encoded.') @@ -107,9 +111,7 @@ def _get_dataset_builders(params: base_configs.ExperimentConfig, image_size = get_image_size_from_model(params) - dataset_configs = [ - params.train_dataset, params.validation_dataset - ] + dataset_configs = [params.train_dataset, params.validation_dataset] builders = [] for config in dataset_configs: @@ -171,8 +173,7 @@ def _get_params_from_flags(flags_obj: flags.FlagValues): }, } - overriding_configs = (flags_obj.config_file, - flags_obj.params_override, + overriding_configs = (flags_obj.config_file, flags_obj.params_override, flags_overrides) pp = pprint.PrettyPrinter() @@ -190,8 +191,7 @@ def _get_params_from_flags(flags_obj: flags.FlagValues): return params -def resume_from_checkpoint(model: tf.keras.Model, - model_dir: str, +def resume_from_checkpoint(model: tf.keras.Model, model_dir: str, train_steps: int) -> int: """Resumes from the latest checkpoint, if possible. @@ -226,10 +226,8 @@ def resume_from_checkpoint(model: tf.keras.Model, def initialize(params: base_configs.ExperimentConfig, dataset_builder: dataset_factory.DatasetBuilder): """Initializes backend related initializations.""" - keras_utils.set_session_config( - enable_xla=params.runtime.enable_xla) - performance.set_mixed_precision_policy(dataset_builder.dtype, - get_loss_scale(params)) + keras_utils.set_session_config(enable_xla=params.runtime.enable_xla) + performance.set_mixed_precision_policy(dataset_builder.dtype) if tf.config.list_physical_devices('GPU'): data_format = 'channels_first' else: @@ -244,7 +242,8 @@ def initialize(params: base_configs.ExperimentConfig, per_gpu_thread_count=params.runtime.per_gpu_thread_count, gpu_thread_mode=params.runtime.gpu_thread_mode, num_gpus=params.runtime.num_gpus, - datasets_num_private_threads=params.runtime.dataset_num_private_threads) # pylint:disable=line-too-long + datasets_num_private_threads=params.runtime + .dataset_num_private_threads) # pylint:disable=line-too-long if params.runtime.batchnorm_spatial_persistent: os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1' @@ -253,9 +252,7 @@ def define_classifier_flags(): """Defines common flags for image classification.""" hyperparams_flags.initialize_common_flags() flags.DEFINE_string( - 'data_dir', - default=None, - help='The location of the input data.') + 'data_dir', default=None, help='The location of the input data.') flags.DEFINE_string( 'mode', default=None, @@ -278,8 +275,7 @@ def define_classifier_flags(): help='The interval of steps between logging of batch level stats.') -def serialize_config(params: base_configs.ExperimentConfig, - model_dir: str): +def serialize_config(params: base_configs.ExperimentConfig, model_dir: str): """Serializes and saves the experiment config.""" params_save_path = os.path.join(model_dir, 'params.yaml') logging.info('Saving experiment configuration to %s', params_save_path) @@ -293,18 +289,17 @@ def train_and_eval( """Runs the train and eval path using compile/fit.""" logging.info('Running train and eval.') - distribution_utils.configure_cluster( - params.runtime.worker_hosts, - params.runtime.task_index) + distribute_utils.configure_cluster(params.runtime.worker_hosts, + params.runtime.task_index) # Note: for TPUs, strategy and scope should be created before the dataset - strategy = strategy_override or distribution_utils.get_distribution_strategy( + strategy = strategy_override or distribute_utils.get_distribution_strategy( distribution_strategy=params.runtime.distribution_strategy, all_reduce_alg=params.runtime.all_reduce_alg, num_gpus=params.runtime.num_gpus, tpu_address=params.runtime.tpu) - strategy_scope = distribution_utils.get_strategy_scope(strategy) + strategy_scope = distribute_utils.get_strategy_scope(strategy) logging.info('Detected %d devices.', strategy.num_replicas_in_sync if strategy else 1) @@ -313,8 +308,9 @@ def train_and_eval( one_hot = label_smoothing and label_smoothing > 0 builders = _get_dataset_builders(params, strategy, one_hot) - datasets = [builder.build(strategy) - if builder else None for builder in builders] + datasets = [ + builder.build(strategy) if builder else None for builder in builders + ] # Unpack datasets and builders based on train/val/test splits train_builder, validation_builder = builders # pylint: disable=unbalanced-tuple-unpacking @@ -341,6 +337,10 @@ def train_and_eval( base_learning_rate=learning_rate, params=params.model.optimizer.as_dict(), model=model) + optimizer = performance.configure_optimizer( + optimizer, + use_float16=train_builder.dtype == 'float16', + loss_scale=get_loss_scale(params)) metrics_map = _get_metrics(one_hot) metrics = [metrics_map[metric] for metric in params.train.metrics] @@ -351,16 +351,16 @@ def train_and_eval( label_smoothing=params.model.loss.label_smoothing) else: loss_obj = tf.keras.losses.SparseCategoricalCrossentropy() - model.compile(optimizer=optimizer, - loss=loss_obj, - metrics=metrics, - experimental_steps_per_execution=steps_per_loop) + model.compile( + optimizer=optimizer, + loss=loss_obj, + metrics=metrics, + steps_per_execution=steps_per_loop) initial_epoch = 0 if params.train.resume_checkpoint: - initial_epoch = resume_from_checkpoint(model=model, - model_dir=params.model_dir, - train_steps=train_steps) + initial_epoch = resume_from_checkpoint( + model=model, model_dir=params.model_dir, train_steps=train_steps) callbacks = custom_callbacks.get_callbacks( model_checkpoint=params.train.callbacks.enable_checkpoint_and_export, @@ -371,7 +371,8 @@ def train_and_eval( initial_step=initial_epoch * train_steps, batch_size=train_builder.global_batch_size, log_steps=params.train.time_history.log_steps, - model_dir=params.model_dir) + model_dir=params.model_dir, + backup_and_restore=params.train.callbacks.enable_backup_and_restore) serialize_config(params=params, model_dir=params.model_dir) @@ -399,9 +400,7 @@ def train_and_eval( validation_dataset, steps=validation_steps, verbose=2) # TODO(dankondratyuk): eval and save final test accuracy - stats = common.build_stats(history, - validation_output, - callbacks) + stats = common.build_stats(history, validation_output, callbacks) return stats diff --git a/official/vision/image_classification/classifier_trainer_test.py b/official/vision/image_classification/classifier_trainer_test.py index 244425feef76bf89d4de939cb8a1914a6f0f47c6..06227c154427db3057269f9e9250a179a52264c9 100644 --- a/official/vision/image_classification/classifier_trainer_test.py +++ b/official/vision/image_classification/classifier_trainer_test.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,14 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Unit tests for the classifier trainer models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import copy import functools import json @@ -29,6 +28,7 @@ import sys from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional, Tuple from absl import flags +from absl.testing import flagsaver from absl.testing import parameterized import tensorflow as tf @@ -36,9 +36,7 @@ from tensorflow.python.distribute import combinations from tensorflow.python.distribute import strategy_combinations from official.utils.flags import core as flags_core from official.vision.image_classification import classifier_trainer -from official.vision.image_classification import dataset_factory -from official.vision.image_classification import test_utils -from official.vision.image_classification.configs import base_configs + classifier_trainer.define_classifier_flags() @@ -48,7 +46,7 @@ def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]: return combinations.combine( distribution=[ strategy_combinations.default_strategy, - strategy_combinations.tpu_strategy, + strategy_combinations.cloud_tpu_strategy, strategy_combinations.one_device_strategy_gpu, strategy_combinations.mirrored_strategy_with_two_gpus, ], @@ -56,7 +54,6 @@ def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]: 'efficientnet', 'resnet', ], - mode='eager', dataset=[ 'imagenet', ], @@ -99,33 +96,7 @@ def basic_params_override(dtype: str = 'float32') -> MutableMapping[str, Any]: } -def get_trivial_model(num_classes: int) -> tf.keras.Model: - """Creates and compiles trivial model for ImageNet dataset.""" - model = test_utils.trivial_model(num_classes=num_classes) - lr = 0.01 - optimizer = tf.keras.optimizers.SGD(learning_rate=lr) - loss_obj = tf.keras.losses.SparseCategoricalCrossentropy() - model.compile(optimizer=optimizer, - loss=loss_obj, - run_eagerly=True) - return model - - -def get_trivial_data() -> tf.data.Dataset: - """Gets trivial data in the ImageNet size.""" - def generate_data(_) -> tf.data.Dataset: - image = tf.zeros(shape=(224, 224, 3), dtype=tf.float32) - label = tf.zeros([1], dtype=tf.int32) - return image, label - - dataset = tf.data.Dataset.range(1) - dataset = dataset.repeat() - dataset = dataset.map(generate_data, - num_parallel_calls=tf.data.experimental.AUTOTUNE) - dataset = dataset.prefetch(buffer_size=1).batch(1) - return dataset - - +@flagsaver.flagsaver def run_end_to_end(main: Callable[[Any], None], extra_flags: Optional[Iterable[str]] = None, model_dir: Optional[str] = None): @@ -154,7 +125,7 @@ class ClassifierTest(tf.test.TestCase, parameterized.TestCase): # Some parameters are not defined as flags (e.g. cannot run # classifier_train.py --batch_size=...) by design, so use # "--params_override=..." instead - model_dir = self.get_temp_dir() + model_dir = self.create_tempdir().full_path base_flags = [ '--data_dir=not_used', '--model_type=' + model, @@ -165,11 +136,10 @@ class ClassifierTest(tf.test.TestCase, parameterized.TestCase): '--mode=train_and_eval', ] - run = functools.partial(classifier_trainer.run, - strategy_override=distribution) - run_end_to_end(main=run, - extra_flags=train_and_eval_flags, - model_dir=model_dir) + run = functools.partial( + classifier_trainer.run, strategy_override=distribution) + run_end_to_end( + main=run, extra_flags=train_and_eval_flags, model_dir=model_dir) @combinations.generate( combinations.combine( @@ -180,7 +150,6 @@ class ClassifierTest(tf.test.TestCase, parameterized.TestCase): 'efficientnet', 'resnet', ], - mode='eager', dataset='imagenet', dtype='float16', )) @@ -189,7 +158,7 @@ class ClassifierTest(tf.test.TestCase, parameterized.TestCase): # Some parameters are not defined as flags (e.g. cannot run # classifier_train.py --batch_size=...) by design, so use # "--params_override=..." instead - model_dir = self.get_temp_dir() + model_dir = self.create_tempdir().full_path base_flags = [ '--data_dir=not_used', '--model_type=' + model, @@ -209,35 +178,31 @@ class ClassifierTest(tf.test.TestCase, parameterized.TestCase): get_params_override(export_params) ] - run = functools.partial(classifier_trainer.run, - strategy_override=distribution) - run_end_to_end(main=run, - extra_flags=train_and_eval_flags, - model_dir=model_dir) - run_end_to_end(main=run, - extra_flags=export_flags, - model_dir=model_dir) + run = functools.partial( + classifier_trainer.run, strategy_override=distribution) + run_end_to_end( + main=run, extra_flags=train_and_eval_flags, model_dir=model_dir) + run_end_to_end(main=run, extra_flags=export_flags, model_dir=model_dir) self.assertTrue(os.path.exists(export_path)) @combinations.generate( combinations.combine( - distribution=[ - strategy_combinations.tpu_strategy, - ], - model=[ - 'efficientnet', - 'resnet', - ], - mode='eager', - dataset='imagenet', - dtype='bfloat16', - )) + distribution=[ + strategy_combinations.cloud_tpu_strategy, + ], + model=[ + 'efficientnet', + 'resnet', + ], + dataset='imagenet', + dtype='bfloat16', + )) def test_tpu_train(self, distribution, model, dataset, dtype): """Test train_and_eval and export for Keras classifier models.""" # Some parameters are not defined as flags (e.g. cannot run # classifier_train.py --batch_size=...) by design, so use # "--params_override=..." instead - model_dir = self.get_temp_dir() + model_dir = self.create_tempdir().full_path base_flags = [ '--data_dir=not_used', '--model_type=' + model, @@ -248,16 +213,15 @@ class ClassifierTest(tf.test.TestCase, parameterized.TestCase): '--mode=train_and_eval', ] - run = functools.partial(classifier_trainer.run, - strategy_override=distribution) - run_end_to_end(main=run, - extra_flags=train_and_eval_flags, - model_dir=model_dir) + run = functools.partial( + classifier_trainer.run, strategy_override=distribution) + run_end_to_end( + main=run, extra_flags=train_and_eval_flags, model_dir=model_dir) @combinations.generate(distribution_strategy_combinations()) def test_end_to_end_invalid_mode(self, distribution, model, dataset): """Test the Keras EfficientNet model with `strategy`.""" - model_dir = self.get_temp_dir() + model_dir = self.create_tempdir().full_path extra_flags = [ '--data_dir=not_used', '--mode=invalid_mode', @@ -266,122 +230,11 @@ class ClassifierTest(tf.test.TestCase, parameterized.TestCase): get_params_override(basic_params_override()), ] - run = functools.partial(classifier_trainer.run, - strategy_override=distribution) + run = functools.partial( + classifier_trainer.run, strategy_override=distribution) with self.assertRaises(ValueError): run_end_to_end(main=run, extra_flags=extra_flags, model_dir=model_dir) -class UtilTests(parameterized.TestCase, tf.test.TestCase): - """Tests for individual utility functions within classifier_trainer.py.""" - - @parameterized.named_parameters( - ('efficientnet-b0', 'efficientnet', 'efficientnet-b0', 224), - ('efficientnet-b1', 'efficientnet', 'efficientnet-b1', 240), - ('efficientnet-b2', 'efficientnet', 'efficientnet-b2', 260), - ('efficientnet-b3', 'efficientnet', 'efficientnet-b3', 300), - ('efficientnet-b4', 'efficientnet', 'efficientnet-b4', 380), - ('efficientnet-b5', 'efficientnet', 'efficientnet-b5', 456), - ('efficientnet-b6', 'efficientnet', 'efficientnet-b6', 528), - ('efficientnet-b7', 'efficientnet', 'efficientnet-b7', 600), - ('resnet', 'resnet', '', None), - ) - def test_get_model_size(self, model, model_name, expected): - config = base_configs.ExperimentConfig( - model_name=model, - model=base_configs.ModelConfig( - model_params={ - 'model_name': model_name, - }, - ) - ) - size = classifier_trainer.get_image_size_from_model(config) - self.assertEqual(size, expected) - - @parameterized.named_parameters( - ('dynamic', 'dynamic', None, 'dynamic'), - ('scalar', 128., None, 128.), - ('float32', None, 'float32', 1), - ('float16', None, 'float16', 128), - ) - def test_get_loss_scale(self, loss_scale, dtype, expected): - config = base_configs.ExperimentConfig( - runtime=base_configs.RuntimeConfig( - loss_scale=loss_scale), - train_dataset=dataset_factory.DatasetConfig(dtype=dtype)) - ls = classifier_trainer.get_loss_scale(config, fp16_default=128) - self.assertEqual(ls, expected) - - @parameterized.named_parameters( - ('float16', 'float16'), - ('bfloat16', 'bfloat16') - ) - def test_initialize(self, dtype): - config = base_configs.ExperimentConfig( - runtime=base_configs.RuntimeConfig( - run_eagerly=False, - enable_xla=False, - per_gpu_thread_count=1, - gpu_thread_mode='gpu_private', - num_gpus=1, - dataset_num_private_threads=1, - ), - train_dataset=dataset_factory.DatasetConfig(dtype=dtype), - model=base_configs.ModelConfig(), - ) - - class EmptyClass: - pass - fake_ds_builder = EmptyClass() - fake_ds_builder.dtype = dtype - fake_ds_builder.config = EmptyClass() - classifier_trainer.initialize(config, fake_ds_builder) - - def test_resume_from_checkpoint(self): - """Tests functionality for resuming from checkpoint.""" - # Set the keras policy - policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16') - tf.keras.mixed_precision.experimental.set_policy(policy) - - # Get the model, datasets, and compile it. - model = get_trivial_model(10) - - # Create the checkpoint - model_dir = self.get_temp_dir() - train_epochs = 1 - train_steps = 10 - ds = get_trivial_data() - callbacks = [ - tf.keras.callbacks.ModelCheckpoint( - os.path.join(model_dir, 'model.ckpt-{epoch:04d}'), - save_weights_only=True) - ] - model.fit( - ds, - callbacks=callbacks, - epochs=train_epochs, - steps_per_epoch=train_steps) - - # Test load from checkpoint - clean_model = get_trivial_model(10) - weights_before_load = copy.deepcopy(clean_model.get_weights()) - initial_epoch = classifier_trainer.resume_from_checkpoint( - model=clean_model, - model_dir=model_dir, - train_steps=train_steps) - self.assertEqual(initial_epoch, 1) - self.assertNotAllClose(weights_before_load, clean_model.get_weights()) - - tf.io.gfile.rmtree(model_dir) - - def test_serialize_config(self): - """Tests functionality for serializing data.""" - config = base_configs.ExperimentConfig() - model_dir = self.get_temp_dir() - classifier_trainer.serialize_config(params=config, model_dir=model_dir) - saved_params_path = os.path.join(model_dir, 'params.yaml') - self.assertTrue(os.path.exists(saved_params_path)) - tf.io.gfile.rmtree(model_dir) - if __name__ == '__main__': tf.test.main() diff --git a/official/vision/image_classification/classifier_trainer_util_test.py b/official/vision/image_classification/classifier_trainer_util_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d3624c286fdc716e4a09df56fbb8157fa35602aa --- /dev/null +++ b/official/vision/image_classification/classifier_trainer_util_test.py @@ -0,0 +1,166 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Unit tests for the classifier trainer models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import os + +from absl.testing import parameterized +import tensorflow as tf + +from official.vision.image_classification import classifier_trainer +from official.vision.image_classification import dataset_factory +from official.vision.image_classification import test_utils +from official.vision.image_classification.configs import base_configs + + +def get_trivial_model(num_classes: int) -> tf.keras.Model: + """Creates and compiles trivial model for ImageNet dataset.""" + model = test_utils.trivial_model(num_classes=num_classes) + lr = 0.01 + optimizer = tf.keras.optimizers.SGD(learning_rate=lr) + loss_obj = tf.keras.losses.SparseCategoricalCrossentropy() + model.compile(optimizer=optimizer, loss=loss_obj, run_eagerly=True) + return model + + +def get_trivial_data() -> tf.data.Dataset: + """Gets trivial data in the ImageNet size.""" + + def generate_data(_) -> tf.data.Dataset: + image = tf.zeros(shape=(224, 224, 3), dtype=tf.float32) + label = tf.zeros([1], dtype=tf.int32) + return image, label + + dataset = tf.data.Dataset.range(1) + dataset = dataset.repeat() + dataset = dataset.map( + generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) + dataset = dataset.prefetch(buffer_size=1).batch(1) + return dataset + + +class UtilTests(parameterized.TestCase, tf.test.TestCase): + """Tests for individual utility functions within classifier_trainer.py.""" + + @parameterized.named_parameters( + ('efficientnet-b0', 'efficientnet', 'efficientnet-b0', 224), + ('efficientnet-b1', 'efficientnet', 'efficientnet-b1', 240), + ('efficientnet-b2', 'efficientnet', 'efficientnet-b2', 260), + ('efficientnet-b3', 'efficientnet', 'efficientnet-b3', 300), + ('efficientnet-b4', 'efficientnet', 'efficientnet-b4', 380), + ('efficientnet-b5', 'efficientnet', 'efficientnet-b5', 456), + ('efficientnet-b6', 'efficientnet', 'efficientnet-b6', 528), + ('efficientnet-b7', 'efficientnet', 'efficientnet-b7', 600), + ('resnet', 'resnet', '', None), + ) + def test_get_model_size(self, model, model_name, expected): + config = base_configs.ExperimentConfig( + model_name=model, + model=base_configs.ModelConfig( + model_params={ + 'model_name': model_name, + },)) + size = classifier_trainer.get_image_size_from_model(config) + self.assertEqual(size, expected) + + @parameterized.named_parameters( + ('dynamic', 'dynamic', None, 'dynamic'), + ('scalar', 128., None, 128.), + ('float32', None, 'float32', 1), + ('float16', None, 'float16', 128), + ) + def test_get_loss_scale(self, loss_scale, dtype, expected): + config = base_configs.ExperimentConfig( + runtime=base_configs.RuntimeConfig(loss_scale=loss_scale), + train_dataset=dataset_factory.DatasetConfig(dtype=dtype)) + ls = classifier_trainer.get_loss_scale(config, fp16_default=128) + self.assertEqual(ls, expected) + + @parameterized.named_parameters(('float16', 'float16'), + ('bfloat16', 'bfloat16')) + def test_initialize(self, dtype): + config = base_configs.ExperimentConfig( + runtime=base_configs.RuntimeConfig( + run_eagerly=False, + enable_xla=False, + per_gpu_thread_count=1, + gpu_thread_mode='gpu_private', + num_gpus=1, + dataset_num_private_threads=1, + ), + train_dataset=dataset_factory.DatasetConfig(dtype=dtype), + model=base_configs.ModelConfig(), + ) + + class EmptyClass: + pass + + fake_ds_builder = EmptyClass() + fake_ds_builder.dtype = dtype + fake_ds_builder.config = EmptyClass() + classifier_trainer.initialize(config, fake_ds_builder) + + def test_resume_from_checkpoint(self): + """Tests functionality for resuming from checkpoint.""" + # Set the keras policy + tf.keras.mixed_precision.set_global_policy('mixed_bfloat16') + + # Get the model, datasets, and compile it. + model = get_trivial_model(10) + + # Create the checkpoint + model_dir = self.create_tempdir().full_path + train_epochs = 1 + train_steps = 10 + ds = get_trivial_data() + callbacks = [ + tf.keras.callbacks.ModelCheckpoint( + os.path.join(model_dir, 'model.ckpt-{epoch:04d}'), + save_weights_only=True) + ] + model.fit( + ds, + callbacks=callbacks, + epochs=train_epochs, + steps_per_epoch=train_steps) + + # Test load from checkpoint + clean_model = get_trivial_model(10) + weights_before_load = copy.deepcopy(clean_model.get_weights()) + initial_epoch = classifier_trainer.resume_from_checkpoint( + model=clean_model, model_dir=model_dir, train_steps=train_steps) + self.assertEqual(initial_epoch, 1) + self.assertNotAllClose(weights_before_load, clean_model.get_weights()) + + tf.io.gfile.rmtree(model_dir) + + def test_serialize_config(self): + """Tests functionality for serializing data.""" + config = base_configs.ExperimentConfig() + model_dir = self.create_tempdir().full_path + classifier_trainer.serialize_config(params=config, model_dir=model_dir) + saved_params_path = os.path.join(model_dir, 'params.yaml') + self.assertTrue(os.path.exists(saved_params_path)) + tf.io.gfile.rmtree(model_dir) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/image_classification/configs/__init__.py b/official/vision/image_classification/configs/__init__.py index 931c2ef11db4a949e6c2e95bca44e36bac1241e9..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/image_classification/configs/__init__.py +++ b/official/vision/image_classification/configs/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + diff --git a/official/vision/image_classification/configs/base_configs.py b/official/vision/image_classification/configs/base_configs.py index 11fcb5305660ec71153ebfc12631f455a3464115..6ae085260e13ebb1623867c88525c99f8b5d2698 100644 --- a/official/vision/image_classification/configs/base_configs.py +++ b/official/vision/image_classification/configs/base_configs.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,22 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Definitions for high level configuration groups..""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - +# Lint as: python3 +"""Definitions for high level configuration groups..""" from typing import Any, List, Mapping, Optional import dataclasses +from official.core import config_definitions from official.modeling import hyperparams -from official.modeling.hyperparams import config_definitions +from official.modeling.hyperparams import config_definitions as legacy_cfg -CallbacksConfig = config_definitions.CallbacksConfig -TensorboardConfig = config_definitions.TensorboardConfig +CallbacksConfig = legacy_cfg.CallbacksConfig +TensorboardConfig = legacy_cfg.TensorboardConfig RuntimeConfig = config_definitions.RuntimeConfig @@ -79,7 +75,7 @@ class TrainConfig(hyperparams.Config): callbacks: An instance of CallbacksConfig. metrics: An instance of MetricsConfig. tensorboard: An instance of TensorboardConfig. - set_epoch_loop: Whether or not to set `experimental_steps_per_execution` to + set_epoch_loop: Whether or not to set `steps_per_execution` to equal the number of training steps in `model.compile`. This reduces the number of callbacks run per epoch which significantly improves end-to-end TPU training time. diff --git a/official/vision/image_classification/configs/configs.py b/official/vision/image_classification/configs/configs.py index 8a79a1cd9b563a554614b9d4f2f0b93acf016791..413de5f542aed3f25e2f84d194a0e60c2c862da6 100644 --- a/official/vision/image_classification/configs/configs.py +++ b/official/vision/image_classification/configs/configs.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Configuration utils for image classification experiments.""" from __future__ import absolute_import from __future__ import division @@ -37,7 +37,6 @@ class EfficientNetImageNetConfig(base_configs.ExperimentConfig): train: A `TrainConfig` instance. evaluation: An `EvalConfig` instance. model: A `ModelConfig` instance. - """ export: base_configs.ExportConfig = base_configs.ExportConfig() runtime: base_configs.RuntimeConfig = base_configs.RuntimeConfig() @@ -49,16 +48,15 @@ class EfficientNetImageNetConfig(base_configs.ExperimentConfig): resume_checkpoint=True, epochs=500, steps=None, - callbacks=base_configs.CallbacksConfig(enable_checkpoint_and_export=True, - enable_tensorboard=True), + callbacks=base_configs.CallbacksConfig( + enable_checkpoint_and_export=True, enable_tensorboard=True), metrics=['accuracy', 'top_5'], time_history=base_configs.TimeHistoryConfig(log_steps=100), - tensorboard=base_configs.TensorboardConfig(track_lr=True, - write_model_weights=False), + tensorboard=base_configs.TensorboardConfig( + track_lr=True, write_model_weights=False), set_epoch_loop=False) evaluation: base_configs.EvalConfig = base_configs.EvalConfig( - epochs_between_evals=1, - steps=None) + epochs_between_evals=1, steps=None) model: base_configs.ModelConfig = \ efficientnet_config.EfficientNetModelConfig() @@ -82,16 +80,15 @@ class ResNetImagenetConfig(base_configs.ExperimentConfig): resume_checkpoint=True, epochs=90, steps=None, - callbacks=base_configs.CallbacksConfig(enable_checkpoint_and_export=True, - enable_tensorboard=True), + callbacks=base_configs.CallbacksConfig( + enable_checkpoint_and_export=True, enable_tensorboard=True), metrics=['accuracy', 'top_5'], time_history=base_configs.TimeHistoryConfig(log_steps=100), - tensorboard=base_configs.TensorboardConfig(track_lr=True, - write_model_weights=False), + tensorboard=base_configs.TensorboardConfig( + track_lr=True, write_model_weights=False), set_epoch_loop=False) evaluation: base_configs.EvalConfig = base_configs.EvalConfig( - epochs_between_evals=1, - steps=None) + epochs_between_evals=1, steps=None) model: base_configs.ModelConfig = resnet_config.ResNetModelConfig() @@ -109,10 +106,8 @@ def get_config(model: str, dataset: str) -> base_configs.ExperimentConfig: if dataset not in dataset_model_config_map: raise KeyError('Invalid dataset received. Received: {}. Supported ' 'datasets include: {}'.format( - dataset, - ', '.join(dataset_model_config_map.keys()))) + dataset, ', '.join(dataset_model_config_map.keys()))) raise KeyError('Invalid model received. Received: {}. Supported models for' '{} include: {}'.format( - model, - dataset, + model, dataset, ', '.join(dataset_model_config_map[dataset].keys()))) diff --git a/official/vision/image_classification/configs/examples/resnet/imagenet/gpu.yaml b/official/vision/image_classification/configs/examples/resnet/imagenet/gpu.yaml index 56844b81db70fbd5e8291a4c1c2eb60e3c488088..2037d6b5d1c39b9ff898eaf49ec7a68e3987356b 100644 --- a/official/vision/image_classification/configs/examples/resnet/imagenet/gpu.yaml +++ b/official/vision/image_classification/configs/examples/resnet/imagenet/gpu.yaml @@ -40,8 +40,6 @@ model: momentum: 0.9 decay: 0.9 epsilon: 0.001 - learning_rate: - name: 'piecewise_constant_with_warmup' loss: label_smoothing: 0.1 train: diff --git a/official/vision/image_classification/configs/examples/resnet/imagenet/tpu.yaml b/official/vision/image_classification/configs/examples/resnet/imagenet/tpu.yaml index ae975c16251ac0a23877bf8f6804cdea6b2baadf..0a3030333bb42ce59e67cfbe12a12be877ab19d0 100644 --- a/official/vision/image_classification/configs/examples/resnet/imagenet/tpu.yaml +++ b/official/vision/image_classification/configs/examples/resnet/imagenet/tpu.yaml @@ -43,8 +43,6 @@ model: epsilon: 0.001 moving_average_decay: 0. lookahead: False - learning_rate: - name: 'piecewise_constant_with_warmup' loss: label_smoothing: 0.1 train: diff --git a/official/vision/image_classification/dataset_factory.py b/official/vision/image_classification/dataset_factory.py index e9dad1268a7bed86f622f80ca28f4d485a0fab31..463de95c77ec1e4eb7b6187181a2fe54121bb6a1 100644 --- a/official/vision/image_classification/dataset_factory.py +++ b/official/vision/image_classification/dataset_factory.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Dataset utilities for vision tasks using TFDS and tf.data.Dataset.""" from __future__ import absolute_import from __future__ import division @@ -21,6 +21,7 @@ from __future__ import print_function import os from typing import Any, List, Optional, Tuple, Mapping, Union + from absl import logging from dataclasses import dataclass import tensorflow as tf @@ -30,7 +31,6 @@ from official.modeling.hyperparams import base_config from official.vision.image_classification import augment from official.vision.image_classification import preprocessing - AUGMENTERS = { 'autoaugment': augment.AutoAugment, 'randaugment': augment.RandAugment, @@ -42,8 +42,8 @@ class AugmentConfig(base_config.Config): """Configuration for image augmenters. Attributes: - name: The name of the image augmentation to use. Possible options are - None (default), 'autoaugment', or 'randaugment'. + name: The name of the image augmentation to use. Possible options are None + (default), 'autoaugment', or 'randaugment'. params: Any paramaters used to initialize the augmenter. """ name: Optional[str] = None @@ -68,17 +68,17 @@ class DatasetConfig(base_config.Config): 'tfds' (load using TFDS), 'records' (load from TFRecords), or 'synthetic' (generate dummy synthetic data without reading from files). split: The split of the dataset. Usually 'train', 'validation', or 'test'. - image_size: The size of the image in the dataset. This assumes that - `width` == `height`. Set to 'infer' to infer the image size from TFDS - info. This requires `name` to be a registered dataset in TFDS. - num_classes: The number of classes given by the dataset. Set to 'infer' - to infer the image size from TFDS info. This requires `name` to be a + image_size: The size of the image in the dataset. This assumes that `width` + == `height`. Set to 'infer' to infer the image size from TFDS info. This + requires `name` to be a registered dataset in TFDS. + num_classes: The number of classes given by the dataset. Set to 'infer' to + infer the image size from TFDS info. This requires `name` to be a registered dataset in TFDS. - num_channels: The number of channels given by the dataset. Set to 'infer' - to infer the image size from TFDS info. This requires `name` to be a + num_channels: The number of channels given by the dataset. Set to 'infer' to + infer the image size from TFDS info. This requires `name` to be a registered dataset in TFDS. - num_examples: The number of examples given by the dataset. Set to 'infer' - to infer the image size from TFDS info. This requires `name` to be a + num_examples: The number of examples given by the dataset. Set to 'infer' to + infer the image size from TFDS info. This requires `name` to be a registered dataset in TFDS. batch_size: The base batch size for the dataset. use_per_replica_batch_size: Whether to scale the batch size based on @@ -143,6 +143,9 @@ class ImageNetConfig(DatasetConfig): # Note: for large datasets like ImageNet, using records is faster than tfds builder: str = 'records' image_size: int = 224 + num_channels: int = 3 + num_examples: int = 1281167 + num_classes: int = 1000 batch_size: int = 128 @@ -267,8 +270,14 @@ class DatasetBuilder: @property def info(self) -> tfds.core.DatasetInfo: """The TFDS dataset info, if available.""" - if self.builder_info is None: - self.builder_info = tfds.builder(self.config.name).info + try: + if self.builder_info is None: + self.builder_info = tfds.builder(self.config.name).info + except ConnectionError as e: + logging.error('Failed to use TFDS to load info. Please set dataset info ' + '(image_size, num_channels, num_examples, num_classes) in ' + 'the dataset config.') + raise e return self.builder_info def build(self, strategy: tf.distribute.Strategy = None) -> tf.data.Dataset: @@ -284,19 +293,19 @@ class DatasetBuilder: """ if strategy: if strategy.num_replicas_in_sync != self.config.num_devices: - logging.warn('Passed a strategy with %d devices, but expected' - '%d devices.', - strategy.num_replicas_in_sync, - self.config.num_devices) - dataset = strategy.experimental_distribute_datasets_from_function( - self._build) + logging.warn( + 'Passed a strategy with %d devices, but expected' + '%d devices.', strategy.num_replicas_in_sync, + self.config.num_devices) + dataset = strategy.distribute_datasets_from_function(self._build) else: dataset = self._build() return dataset - def _build(self, input_context: tf.distribute.InputContext = None - ) -> tf.data.Dataset: + def _build( + self, + input_context: tf.distribute.InputContext = None) -> tf.data.Dataset: """Construct a dataset end-to-end and return it. Args: @@ -328,8 +337,7 @@ class DatasetBuilder: logging.info('Using TFDS to load data.') - builder = tfds.builder(self.config.name, - data_dir=self.config.data_dir) + builder = tfds.builder(self.config.name, data_dir=self.config.data_dir) if self.config.download: builder.download_and_prepare() @@ -380,8 +388,8 @@ class DatasetBuilder: dataset = tf.data.Dataset.range(1) dataset = dataset.repeat() - dataset = dataset.map(generate_data, - num_parallel_calls=tf.data.experimental.AUTOTUNE) + dataset = dataset.map( + generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE) return dataset def pipeline(self, dataset: tf.data.Dataset) -> tf.data.Dataset: @@ -393,14 +401,14 @@ class DatasetBuilder: Returns: A TensorFlow dataset outputting batched images and labels. """ - if (self.config.builder != 'tfds' and self.input_context - and self.input_context.num_input_pipelines > 1): + if (self.config.builder != 'tfds' and self.input_context and + self.input_context.num_input_pipelines > 1): dataset = dataset.shard(self.input_context.num_input_pipelines, self.input_context.input_pipeline_id) - logging.info('Sharding the dataset: input_pipeline_id=%d ' - 'num_input_pipelines=%d', - self.input_context.num_input_pipelines, - self.input_context.input_pipeline_id) + logging.info( + 'Sharding the dataset: input_pipeline_id=%d ' + 'num_input_pipelines=%d', self.input_context.num_input_pipelines, + self.input_context.input_pipeline_id) if self.is_training and self.config.builder == 'records': # Shuffle the input files. @@ -429,8 +437,8 @@ class DatasetBuilder: preprocess = self.parse_record else: preprocess = self.preprocess - dataset = dataset.map(preprocess, - num_parallel_calls=tf.data.experimental.AUTOTUNE) + dataset = dataset.map( + preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) if self.input_context and self.config.num_devices > 1: if not self.config.use_per_replica_batch_size: @@ -444,11 +452,11 @@ class DatasetBuilder: # The batch size of the dataset will be multiplied by the number of # replicas automatically when strategy.distribute_datasets_from_function # is called, so we use local batch size here. - dataset = dataset.batch(self.local_batch_size, - drop_remainder=self.is_training) + dataset = dataset.batch( + self.local_batch_size, drop_remainder=self.is_training) else: - dataset = dataset.batch(self.global_batch_size, - drop_remainder=self.is_training) + dataset = dataset.batch( + self.global_batch_size, drop_remainder=self.is_training) # Prefetch overlaps in-feed with training dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) @@ -470,24 +478,15 @@ class DatasetBuilder: def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: """Parse an ImageNet record from a serialized string Tensor.""" keys_to_features = { - 'image/encoded': - tf.io.FixedLenFeature((), tf.string, ''), - 'image/format': - tf.io.FixedLenFeature((), tf.string, 'jpeg'), - 'image/class/label': - tf.io.FixedLenFeature([], tf.int64, -1), - 'image/class/text': - tf.io.FixedLenFeature([], tf.string, ''), - 'image/object/bbox/xmin': - tf.io.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymin': - tf.io.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/xmax': - tf.io.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymax': - tf.io.VarLenFeature(dtype=tf.float32), - 'image/object/class/label': - tf.io.VarLenFeature(dtype=tf.int64), + 'image/encoded': tf.io.FixedLenFeature((), tf.string, ''), + 'image/format': tf.io.FixedLenFeature((), tf.string, 'jpeg'), + 'image/class/label': tf.io.FixedLenFeature([], tf.int64, -1), + 'image/class/text': tf.io.FixedLenFeature([], tf.string, ''), + 'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32), + 'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32), + 'image/object/class/label': tf.io.VarLenFeature(dtype=tf.int64), } parsed = tf.io.parse_single_example(record, keys_to_features) @@ -502,8 +501,8 @@ class DatasetBuilder: return image, label - def preprocess(self, image: tf.Tensor, label: tf.Tensor - ) -> Tuple[tf.Tensor, tf.Tensor]: + def preprocess(self, image: tf.Tensor, + label: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: """Apply image preprocessing and augmentation to the image and label.""" if self.is_training: image = preprocessing.preprocess_for_train( diff --git a/official/vision/image_classification/efficientnet/__init__.py b/official/vision/image_classification/efficientnet/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/image_classification/efficientnet/__init__.py +++ b/official/vision/image_classification/efficientnet/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/image_classification/efficientnet/common_modules.py b/official/vision/image_classification/efficientnet/common_modules.py index 9c9c2097d2398ec78cae5e1265478f804860f944..e3657bd862b4dca1cb678e14de8e0bd7568eceee 100644 --- a/official/vision/image_classification/efficientnet/common_modules.py +++ b/official/vision/image_classification/efficientnet/common_modules.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Common modeling utilities.""" from __future__ import absolute_import from __future__ import division @@ -79,7 +79,7 @@ def get_batch_norm(batch_norm_type: Text) -> tf.keras.layers.BatchNormalization: Args: batch_norm_type: The type of batch normalization layer implementation. `tpu` - will use `TpuBatchNormalization`. + will use `TpuBatchNormalization`. Returns: An instance of `tf.keras.layers.BatchNormalization`. @@ -95,8 +95,10 @@ def count_params(model, trainable_only=True): if not trainable_only: return model.count_params() else: - return int(np.sum([tf.keras.backend.count_params(p) - for p in model.trainable_weights])) + return int( + np.sum([ + tf.keras.backend.count_params(p) for p in model.trainable_weights + ])) def load_weights(model: tf.keras.Model, @@ -107,8 +109,8 @@ def load_weights(model: tf.keras.Model, Args: model: the model to load weights into model_weights_path: the path of the model weights - weights_format: the model weights format. One of 'saved_model', 'h5', - or 'checkpoint'. + weights_format: the model weights format. One of 'saved_model', 'h5', or + 'checkpoint'. """ if weights_format == 'saved_model': loaded_model = tf.keras.models.load_model(model_weights_path) diff --git a/official/vision/image_classification/efficientnet/efficientnet_config.py b/official/vision/image_classification/efficientnet/efficientnet_config.py index a758cc63c944463ebf184eaeae26cebd5935031a..47cfd740221d3581db585e90bc6df0711c289019 100644 --- a/official/vision/image_classification/efficientnet/efficientnet_config.py +++ b/official/vision/image_classification/efficientnet/efficientnet_config.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Configuration definitions for EfficientNet losses, learning rates, and optimizers.""" from __future__ import absolute_import from __future__ import division diff --git a/official/vision/image_classification/efficientnet/efficientnet_model.py b/official/vision/image_classification/efficientnet/efficientnet_model.py index ab81fc25d1200557c99f77424d34c74cf8774d84..e5f2c2c69fdea82288f286971395b6f9aec3f500 100644 --- a/official/vision/image_classification/efficientnet/efficientnet_model.py +++ b/official/vision/image_classification/efficientnet/efficientnet_model.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Contains definitions for EfficientNet model. [1] Mingxing Tan, Quoc V. Le @@ -64,11 +64,11 @@ class ModelConfig(base_config.Config): # (input_filters, output_filters, kernel_size, num_repeat, # expand_ratio, strides, se_ratio) # pylint: disable=bad-whitespace - BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), 0.25), - BlockConfig.from_args(16, 24, 3, 2, 6, (2, 2), 0.25), - BlockConfig.from_args(24, 40, 5, 2, 6, (2, 2), 0.25), - BlockConfig.from_args(40, 80, 3, 3, 6, (2, 2), 0.25), - BlockConfig.from_args(80, 112, 5, 3, 6, (1, 1), 0.25), + BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), 0.25), + BlockConfig.from_args(16, 24, 3, 2, 6, (2, 2), 0.25), + BlockConfig.from_args(24, 40, 5, 2, 6, (2, 2), 0.25), + BlockConfig.from_args(40, 80, 3, 3, 6, (2, 2), 0.25), + BlockConfig.from_args(80, 112, 5, 3, 6, (1, 1), 0.25), BlockConfig.from_args(112, 192, 5, 4, 6, (2, 2), 0.25), BlockConfig.from_args(192, 320, 3, 1, 6, (1, 1), 0.25), # pylint: enable=bad-whitespace @@ -128,8 +128,7 @@ DENSE_KERNEL_INITIALIZER = { } -def round_filters(filters: int, - config: ModelConfig) -> int: +def round_filters(filters: int, config: ModelConfig) -> int: """Round number of filters based on width coefficient.""" width_coefficient = config.width_coefficient min_depth = config.min_depth @@ -189,21 +188,24 @@ def conv2d_block(inputs: tf.Tensor, init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER}) else: conv2d = tf.keras.layers.Conv2D - init_kwargs.update({'filters': conv_filters, - 'kernel_initializer': CONV_KERNEL_INITIALIZER}) + init_kwargs.update({ + 'filters': conv_filters, + 'kernel_initializer': CONV_KERNEL_INITIALIZER + }) x = conv2d(**init_kwargs)(inputs) if use_batch_norm: bn_axis = 1 if data_format == 'channels_first' else -1 - x = batch_norm(axis=bn_axis, - momentum=bn_momentum, - epsilon=bn_epsilon, - name=name + '_bn')(x) + x = batch_norm( + axis=bn_axis, + momentum=bn_momentum, + epsilon=bn_epsilon, + name=name + '_bn')( + x) if activation is not None: - x = tf.keras.layers.Activation(activation, - name=name + '_activation')(x) + x = tf.keras.layers.Activation(activation, name=name + '_activation')(x) return x @@ -235,42 +237,43 @@ def mb_conv_block(inputs: tf.Tensor, if block.fused_conv: # If we use fused mbconv, skip expansion and use regular conv. - x = conv2d_block(x, - filters, - config, - kernel_size=block.kernel_size, - strides=block.strides, - activation=activation, - name=prefix + 'fused') + x = conv2d_block( + x, + filters, + config, + kernel_size=block.kernel_size, + strides=block.strides, + activation=activation, + name=prefix + 'fused') else: if block.expand_ratio != 1: # Expansion phase kernel_size = (1, 1) if use_depthwise else (3, 3) - x = conv2d_block(x, - filters, - config, - kernel_size=kernel_size, - activation=activation, - name=prefix + 'expand') + x = conv2d_block( + x, + filters, + config, + kernel_size=kernel_size, + activation=activation, + name=prefix + 'expand') # Depthwise Convolution if use_depthwise: - x = conv2d_block(x, - conv_filters=None, - config=config, - kernel_size=block.kernel_size, - strides=block.strides, - activation=activation, - depthwise=True, - name=prefix + 'depthwise') + x = conv2d_block( + x, + conv_filters=None, + config=config, + kernel_size=block.kernel_size, + strides=block.strides, + activation=activation, + depthwise=True, + name=prefix + 'depthwise') # Squeeze and Excitation phase if use_se: assert block.se_ratio is not None assert 0 < block.se_ratio <= 1 - num_reduced_filters = max(1, int( - block.input_filters * block.se_ratio - )) + num_reduced_filters = max(1, int(block.input_filters * block.se_ratio)) if data_format == 'channels_first': se_shape = (filters, 1, 1) @@ -280,53 +283,51 @@ def mb_conv_block(inputs: tf.Tensor, se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se) - se = conv2d_block(se, - num_reduced_filters, - config, - use_bias=True, - use_batch_norm=False, - activation=activation, - name=prefix + 'se_reduce') - se = conv2d_block(se, - filters, - config, - use_bias=True, - use_batch_norm=False, - activation='sigmoid', - name=prefix + 'se_expand') + se = conv2d_block( + se, + num_reduced_filters, + config, + use_bias=True, + use_batch_norm=False, + activation=activation, + name=prefix + 'se_reduce') + se = conv2d_block( + se, + filters, + config, + use_bias=True, + use_batch_norm=False, + activation='sigmoid', + name=prefix + 'se_expand') x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite') # Output phase - x = conv2d_block(x, - block.output_filters, - config, - activation=None, - name=prefix + 'project') + x = conv2d_block( + x, block.output_filters, config, activation=None, name=prefix + 'project') # Add identity so that quantization-aware training can insert quantization # ops correctly. - x = tf.keras.layers.Activation(tf_utils.get_activation('identity'), - name=prefix + 'id')(x) + x = tf.keras.layers.Activation( + tf_utils.get_activation('identity'), name=prefix + 'id')( + x) - if (block.id_skip - and all(s == 1 for s in block.strides) - and block.input_filters == block.output_filters): + if (block.id_skip and all(s == 1 for s in block.strides) and + block.input_filters == block.output_filters): if drop_connect_rate and drop_connect_rate > 0: # Apply dropconnect # The only difference between dropout and dropconnect in TF is scaling by # drop_connect_rate during training. See: # https://github.com/keras-team/keras/pull/9898#issuecomment-380577612 - x = tf.keras.layers.Dropout(drop_connect_rate, - noise_shape=(None, 1, 1, 1), - name=prefix + 'drop')(x) + x = tf.keras.layers.Dropout( + drop_connect_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')( + x) x = tf.keras.layers.add([x, inputs], name=prefix + 'add') return x -def efficientnet(image_input: tf.keras.layers.Input, - config: ModelConfig): +def efficientnet(image_input: tf.keras.layers.Input, config: ModelConfig): """Creates an EfficientNet graph given the model parameters. This function is wrapped by the `EfficientNet` class to make a tf.keras.Model. @@ -357,19 +358,18 @@ def efficientnet(image_input: tf.keras.layers.Input, # Happens on GPU/TPU if available. x = tf.keras.layers.Permute((3, 1, 2))(x) if rescale_input: - x = preprocessing.normalize_images(x, - num_channels=input_channels, - dtype=dtype, - data_format=data_format) + x = preprocessing.normalize_images( + x, num_channels=input_channels, dtype=dtype, data_format=data_format) # Build stem - x = conv2d_block(x, - round_filters(stem_base_filters, config), - config, - kernel_size=[3, 3], - strides=[2, 2], - activation=activation, - name='stem') + x = conv2d_block( + x, + round_filters(stem_base_filters, config), + config, + kernel_size=[3, 3], + strides=[2, 2], + activation=activation, + name='stem') # Build blocks num_blocks_total = sum( @@ -391,10 +391,7 @@ def efficientnet(image_input: tf.keras.layers.Input, x = mb_conv_block(x, block, config, block_prefix) block_num += 1 if block.num_repeat > 1: - block = block.replace( - input_filters=block.output_filters, - strides=[1, 1] - ) + block = block.replace(input_filters=block.output_filters, strides=[1, 1]) for block_idx in range(block.num_repeat - 1): drop_rate = drop_connect_rate * float(block_num) / num_blocks_total @@ -404,11 +401,12 @@ def efficientnet(image_input: tf.keras.layers.Input, block_num += 1 # Build top - x = conv2d_block(x, - round_filters(top_base_filters, config), - config, - activation=activation, - name='top') + x = conv2d_block( + x, + round_filters(top_base_filters, config), + config, + activation=activation, + name='top') # Build classifier x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x) @@ -419,7 +417,8 @@ def efficientnet(image_input: tf.keras.layers.Input, kernel_initializer=DENSE_KERNEL_INITIALIZER, kernel_regularizer=tf.keras.regularizers.l2(weight_decay), bias_regularizer=tf.keras.regularizers.l2(weight_decay), - name='logits')(x) + name='logits')( + x) x = tf.keras.layers.Activation('softmax', name='probs')(x) return x @@ -439,8 +438,7 @@ class EfficientNet(tf.keras.Model): Args: config: (optional) the main model parameters to create the model - overrides: (optional) a dict containing keys that can override - config + overrides: (optional) a dict containing keys that can override config """ overrides = overrides or {} config = config or ModelConfig() @@ -457,9 +455,7 @@ class EfficientNet(tf.keras.Model): # Cast to float32 in case we have a different model dtype output = tf.cast(output, tf.float32) - logging.info('Building model %s with params %s', - model_name, - self.config) + logging.info('Building model %s with params %s', model_name, self.config) super(EfficientNet, self).__init__( inputs=image_input, outputs=output, name=model_name) @@ -477,8 +473,8 @@ class EfficientNet(tf.keras.Model): Args: model_name: the predefined model name model_weights_path: the path to the weights (h5 file or saved model dir) - weights_format: the model weights format. One of 'saved_model', 'h5', - or 'checkpoint'. + weights_format: the model weights format. One of 'saved_model', 'h5', or + 'checkpoint'. overrides: (optional) a dict containing keys that can override config Returns: @@ -498,8 +494,7 @@ class EfficientNet(tf.keras.Model): model = cls(config=config, overrides=overrides) if model_weights_path: - common_modules.load_weights(model, - model_weights_path, - weights_format=weights_format) + common_modules.load_weights( + model, model_weights_path, weights_format=weights_format) return model diff --git a/official/vision/image_classification/efficientnet/tfhub_export.py b/official/vision/image_classification/efficientnet/tfhub_export.py index 3be8608a5cfc25442f5f936b4052f90b89c6cfce..691e568fa0a32abaa536deff6ee8a13621c1ee7e 100644 --- a/official/vision/image_classification/efficientnet/tfhub_export.py +++ b/official/vision/image_classification/efficientnet/tfhub_export.py @@ -1,4 +1,4 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """A script to export TF-Hub SavedModel.""" from __future__ import absolute_import @@ -30,10 +30,8 @@ from official.vision.image_classification.efficientnet import efficientnet_model FLAGS = flags.FLAGS -flags.DEFINE_string("model_name", None, - "EfficientNet model name.") -flags.DEFINE_string("model_path", None, - "File path to TF model checkpoint.") +flags.DEFINE_string("model_name", None, "EfficientNet model name.") +flags.DEFINE_string("model_path", None, "File path to TF model checkpoint.") flags.DEFINE_string("export_path", None, "TF-Hub SavedModel destination path to export.") @@ -65,5 +63,6 @@ def main(argv): export_tfhub(FLAGS.model_path, FLAGS.export_path, FLAGS.model_name) + if __name__ == "__main__": app.run(main) diff --git a/official/vision/image_classification/learning_rate.py b/official/vision/image_classification/learning_rate.py index 1c78b04bc6297a08a8bc7823dccc00f464e05ad4..72f7e95187521eeebefa1e698ca5382f10642e88 100644 --- a/official/vision/image_classification/learning_rate.py +++ b/official/vision/image_classification/learning_rate.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Learning rate utilities for vision tasks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from typing import Any, List, Mapping +from typing import Any, Mapping, Optional import numpy as np import tensorflow as tf @@ -29,32 +29,39 @@ BASE_LEARNING_RATE = 0.1 class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule): """A wrapper for LearningRateSchedule that includes warmup steps.""" - def __init__( - self, - lr_schedule: tf.keras.optimizers.schedules.LearningRateSchedule, - warmup_steps: int): + def __init__(self, + lr_schedule: tf.keras.optimizers.schedules.LearningRateSchedule, + warmup_steps: int, + warmup_lr: Optional[float] = None): """Add warmup decay to a learning rate schedule. Args: lr_schedule: base learning rate scheduler warmup_steps: number of warmup steps - + warmup_lr: an optional field for the final warmup learning rate. This + should be provided if the base `lr_schedule` does not contain this + field. """ super(WarmupDecaySchedule, self).__init__() self._lr_schedule = lr_schedule self._warmup_steps = warmup_steps + self._warmup_lr = warmup_lr def __call__(self, step: int): lr = self._lr_schedule(step) if self._warmup_steps: - initial_learning_rate = tf.convert_to_tensor( - self._lr_schedule.initial_learning_rate, name="initial_learning_rate") + if self._warmup_lr is not None: + initial_learning_rate = tf.convert_to_tensor( + self._warmup_lr, name="initial_learning_rate") + else: + initial_learning_rate = tf.convert_to_tensor( + self._lr_schedule.initial_learning_rate, + name="initial_learning_rate") dtype = initial_learning_rate.dtype global_step_recomp = tf.cast(step, dtype) warmup_steps = tf.cast(self._warmup_steps, dtype) warmup_lr = initial_learning_rate * global_step_recomp / warmup_steps - lr = tf.cond(global_step_recomp < warmup_steps, - lambda: warmup_lr, + lr = tf.cond(global_step_recomp < warmup_steps, lambda: warmup_lr, lambda: lr) return lr @@ -62,65 +69,11 @@ class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule): config = self._lr_schedule.get_config() config.update({ "warmup_steps": self._warmup_steps, + "warmup_lr": self._warmup_lr, }) return config -# TODO(b/149030439) - refactor this with -# tf.keras.optimizers.schedules.PiecewiseConstantDecay + WarmupDecaySchedule. -class PiecewiseConstantDecayWithWarmup( - tf.keras.optimizers.schedules.LearningRateSchedule): - """Piecewise constant decay with warmup schedule.""" - - def __init__(self, - batch_size: int, - epoch_size: int, - warmup_epochs: int, - boundaries: List[int], - multipliers: List[float]): - """Piecewise constant decay with warmup. - - Args: - batch_size: The training batch size used in the experiment. - epoch_size: The size of an epoch, or the number of examples in an epoch. - warmup_epochs: The number of warmup epochs to apply. - boundaries: The list of floats with strictly increasing entries. - multipliers: The list of multipliers/learning rates to use for the - piecewise portion. The length must be 1 less than that of boundaries. - - """ - super(PiecewiseConstantDecayWithWarmup, self).__init__() - if len(boundaries) != len(multipliers) - 1: - raise ValueError("The length of boundaries must be 1 less than the " - "length of multipliers") - - base_lr_batch_size = 256 - steps_per_epoch = epoch_size // batch_size - - self._rescaled_lr = BASE_LEARNING_RATE * batch_size / base_lr_batch_size - self._step_boundaries = [float(steps_per_epoch) * x for x in boundaries] - self._lr_values = [self._rescaled_lr * m for m in multipliers] - self._warmup_steps = warmup_epochs * steps_per_epoch - - def __call__(self, step: int): - """Compute learning rate at given step.""" - def warmup_lr(): - return self._rescaled_lr * ( - step / tf.cast(self._warmup_steps, tf.float32)) - def piecewise_lr(): - return tf.compat.v1.train.piecewise_constant( - tf.cast(step, tf.float32), self._step_boundaries, self._lr_values) - return tf.cond(step < self._warmup_steps, warmup_lr, piecewise_lr) - - def get_config(self) -> Mapping[str, Any]: - return { - "rescaled_lr": self._rescaled_lr, - "step_boundaries": self._step_boundaries, - "lr_values": self._lr_values, - "warmup_steps": self._warmup_steps, - } - - class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): """Class to generate learning rate tensor.""" diff --git a/official/vision/image_classification/learning_rate_test.py b/official/vision/image_classification/learning_rate_test.py index 272d2935fd7f1e6a7f1810e9247c4ef505021fde..6c33ed24b8e46b8ecb58005a1f528e62a66f0005 100644 --- a/official/vision/image_classification/learning_rate_test.py +++ b/official/vision/image_classification/learning_rate_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for learning_rate.""" from __future__ import absolute_import @@ -37,52 +37,13 @@ class LearningRateTests(tf.test.TestCase): decay_steps=decay_steps, decay_rate=decay_rate) lr = learning_rate.WarmupDecaySchedule( - lr_schedule=base_lr, - warmup_steps=warmup_steps) + lr_schedule=base_lr, warmup_steps=warmup_steps) for step in range(warmup_steps - 1): config = lr.get_config() self.assertEqual(config['warmup_steps'], warmup_steps) - self.assertAllClose(self.evaluate(lr(step)), - step / warmup_steps * initial_lr) - - def test_piecewise_constant_decay_with_warmup(self): - """Basic computational test for piecewise constant decay with warmup.""" - boundaries = [1, 2, 3] - warmup_epochs = boundaries[0] - learning_rate_multipliers = [1.0, 0.1, 0.001] - expected_keys = [ - 'rescaled_lr', 'step_boundaries', 'lr_values', 'warmup_steps', - ] - - expected_lrs = [0.0, 0.1, 0.1] - - lr = learning_rate.PiecewiseConstantDecayWithWarmup( - batch_size=256, - epoch_size=256, - warmup_epochs=warmup_epochs, - boundaries=boundaries[1:], - multipliers=learning_rate_multipliers) - - step = 0 - - config = lr.get_config() - self.assertAllInSet(list(config.keys()), expected_keys) - - for boundary, expected_lr in zip(boundaries, expected_lrs): - for _ in range(step, boundary): - self.assertAllClose(self.evaluate(lr(step)), expected_lr) - step += 1 - - def test_piecewise_constant_decay_invalid_boundaries(self): - with self.assertRaisesRegex(ValueError, - 'The length of boundaries must be 1 less '): - learning_rate.PiecewiseConstantDecayWithWarmup( - batch_size=256, - epoch_size=256, - warmup_epochs=1, - boundaries=[1, 2], - multipliers=[1, 2]) + self.assertAllClose( + self.evaluate(lr(step)), step / warmup_steps * initial_lr) def test_cosine_decay_with_warmup(self): """Basic computational test for cosine decay with warmup.""" diff --git a/official/vision/image_classification/mnist_main.py b/official/vision/image_classification/mnist_main.py index 1470c02d05b431e95de3c5807b68678a96d2b520..3eba80b06a9215cb5dc4d3b13facb2f2a4f3058c 100644 --- a/official/vision/image_classification/mnist_main.py +++ b/official/vision/image_classification/mnist_main.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Runs a simple model on the MNIST dataset.""" from __future__ import absolute_import from __future__ import division @@ -19,14 +19,14 @@ from __future__ import print_function import os +# Import libraries from absl import app from absl import flags from absl import logging import tensorflow as tf import tensorflow_datasets as tfds - +from official.common import distribute_utils from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils from official.utils.misc import model_helpers from official.vision.image_classification.resnet import common @@ -81,12 +81,15 @@ def run(flags_obj, datasets_override=None, strategy_override=None): Returns: Dictionary of training and eval stats. """ - strategy = strategy_override or distribution_utils.get_distribution_strategy( + # Start TF profiler server. + tf.profiler.experimental.server.start(flags_obj.profiler_port) + + strategy = strategy_override or distribute_utils.get_distribution_strategy( distribution_strategy=flags_obj.distribution_strategy, num_gpus=flags_obj.num_gpus, tpu_address=flags_obj.tpu) - strategy_scope = distribution_utils.get_strategy_scope(strategy) + strategy_scope = distribute_utils.get_strategy_scope(strategy) mnist = tfds.builder('mnist', data_dir=flags_obj.data_dir) if flags_obj.download: @@ -154,8 +157,10 @@ def define_mnist_flags(): distribution_strategy=True) flags_core.define_device() flags_core.define_distribution() - flags.DEFINE_bool('download', False, + flags.DEFINE_bool('download', True, 'Whether to download data to `--data_dir`.') + flags.DEFINE_integer('profiler_port', 9012, + 'Port to start profiler server on.') FLAGS.set_default('batch_size', 1024) diff --git a/official/vision/image_classification/mnist_test.py b/official/vision/image_classification/mnist_test.py index c05efcfe5d68fbbb3c181c19b59444db1abe5702..c94396a444294b37259ba849bd8ea2f6f76997d0 100644 --- a/official/vision/image_classification/mnist_test.py +++ b/official/vision/image_classification/mnist_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Test the Keras MNIST model on GPU.""" from __future__ import absolute_import @@ -29,15 +29,16 @@ from official.utils.testing import integration from official.vision.image_classification import mnist_main +mnist_main.define_mnist_flags() + + def eager_strategy_combinations(): return combinations.combine( distribution=[ strategy_combinations.default_strategy, - strategy_combinations.tpu_strategy, + strategy_combinations.cloud_tpu_strategy, strategy_combinations.one_device_strategy_gpu, - ], - mode="eager", - ) + ],) class KerasMnistTest(tf.test.TestCase, parameterized.TestCase): @@ -47,7 +48,6 @@ class KerasMnistTest(tf.test.TestCase, parameterized.TestCase): @classmethod def setUpClass(cls): # pylint: disable=invalid-name super(KerasMnistTest, cls).setUpClass() - mnist_main.define_mnist_flags() def tearDown(self): super(KerasMnistTest, self).tearDown() @@ -58,7 +58,8 @@ class KerasMnistTest(tf.test.TestCase, parameterized.TestCase): """Test Keras MNIST model with `strategy`.""" extra_flags = [ - "-train_epochs", "1", + "-train_epochs", + "1", # Let TFDS find the metadata folder automatically "--data_dir=" ] @@ -72,14 +73,15 @@ class KerasMnistTest(tf.test.TestCase, parameterized.TestCase): tf.data.Dataset.from_tensor_slices(dummy_data), ) - run = functools.partial(mnist_main.run, - datasets_override=datasets, - strategy_override=distribution) + run = functools.partial( + mnist_main.run, + datasets_override=datasets, + strategy_override=distribution) integration.run_synthetic( main=run, synth=False, - tmp_root=self.get_temp_dir(), + tmp_root=self.create_tempdir().full_path, extra_flags=extra_flags) diff --git a/official/vision/image_classification/optimizer_factory.py b/official/vision/image_classification/optimizer_factory.py index 29b19e22daf2605ba430506c8b2d6545b1cc0074..e3eaba944b5c22fb2543972e33eca0a1256f062d 100644 --- a/official/vision/image_classification/optimizer_factory.py +++ b/official/vision/image_classification/optimizer_factory.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,243 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Optimizer factory for vision tasks.""" from __future__ import absolute_import from __future__ import division # from __future__ import google_type_annotations from __future__ import print_function -from typing import Any, Dict, Text, List +from typing import Any, Dict, Text from absl import logging import tensorflow as tf import tensorflow_addons as tfa +from official.modeling import optimization from official.vision.image_classification import learning_rate from official.vision.image_classification.configs import base_configs # pylint: disable=protected-access -class MovingAverage(tf.keras.optimizers.Optimizer): - """Optimizer that computes a moving average of the variables. - - Empirically it has been found that using the moving average of the trained - parameters of a deep network is better than using its trained parameters - directly. This optimizer allows you to compute this moving average and swap - the variables at save time so that any code outside of the training loop - will use by default the average values instead of the original ones. - - Example of usage for training: - ```python - opt = tf.keras.optimizers.SGD(learning_rate) - opt = MovingAverage(opt) - - opt.shadow_copy(model) - ``` - - At test time, swap the shadow variables to evaluate on the averaged weights: - ```python - opt.swap_weights() - # Test eval the model here - opt.swap_weights() - ``` - """ - - def __init__(self, - optimizer: tf.keras.optimizers.Optimizer, - average_decay: float = 0.99, - start_step: int = 0, - dynamic_decay: bool = True, - name: Text = 'moving_average', - **kwargs): - """Construct a new MovingAverage optimizer. - - Args: - optimizer: `tf.keras.optimizers.Optimizer` that will be - used to compute and apply gradients. - average_decay: float. Decay to use to maintain the moving averages - of trained variables. - start_step: int. What step to start the moving average. - dynamic_decay: bool. Whether to change the decay based on the number - of optimizer updates. Decay will start at 0.1 and gradually increase - up to `average_decay` after each optimizer update. This behavior is - similar to `tf.train.ExponentialMovingAverage` in TF 1.x. - name: Optional name for the operations created when applying - gradients. Defaults to "moving_average". - **kwargs: keyword arguments. Allowed to be {`clipnorm`, - `clipvalue`, `lr`, `decay`}. - """ - super(MovingAverage, self).__init__(name, **kwargs) - self._optimizer = optimizer - self._average_decay = average_decay - self._start_step = tf.constant(start_step, tf.float32) - self._dynamic_decay = dynamic_decay - - def shadow_copy(self, model: tf.keras.Model): - """Creates shadow variables for the given model weights.""" - for var in model.weights: - self.add_slot(var, 'average', initializer='zeros') - self._average_weights = [ - self.get_slot(var, 'average') for var in model.weights - ] - self._model_weights = model.weights - - @property - def has_shadow_copy(self): - """Whether this optimizer has created shadow variables.""" - return self._model_weights is not None - - def _create_slots(self, var_list): - self._optimizer._create_slots(var_list=var_list) # pylint: disable=protected-access - - def apply_gradients(self, grads_and_vars, name: Text = None): - result = self._optimizer.apply_gradients(grads_and_vars, name) - self.update_average(self._optimizer.iterations) - return result - - @tf.function - def update_average(self, step: tf.Tensor): - step = tf.cast(step, tf.float32) - if step < self._start_step: - decay = tf.constant(0., tf.float32) - elif self._dynamic_decay: - decay = step - self._start_step - decay = tf.minimum(self._average_decay, (1. + decay) / (10. + decay)) - else: - decay = self._average_decay - - def _apply_moving(v_moving, v_normal): - diff = v_moving - v_normal - v_moving.assign_sub(tf.cast(1. - decay, v_moving.dtype) * diff) - return v_moving - - def _update(strategy, v_moving_and_v_normal): - for v_moving, v_normal in v_moving_and_v_normal: - strategy.extended.update(v_moving, _apply_moving, args=(v_normal,)) - - ctx = tf.distribute.get_replica_context() - return ctx.merge_call(_update, args=(zip(self._average_weights, - self._model_weights),)) - - def swap_weights(self): - """Swap the average and moving weights. - - This is a convenience method to allow one to evaluate the averaged weights - at test time. Loads the weights stored in `self._average` into the model, - keeping a copy of the original model weights. Swapping twice will return - the original weights. - """ - if tf.distribute.in_cross_replica_context(): - strategy = tf.distribute.get_strategy() - strategy.run(self._swap_weights, args=()) - else: - raise ValueError('Swapping weights must occur under a ' - 'tf.distribute.Strategy') - - @tf.function - def _swap_weights(self): - def fn_0(a, b): - a.assign_add(b) - return a - def fn_1(b, a): - b.assign(a - b) - return b - def fn_2(a, b): - a.assign_sub(b) - return a - - def swap(strategy, a_and_b): - """Swap `a` and `b` and mirror to all devices.""" - for a, b in a_and_b: - strategy.extended.update(a, fn_0, args=(b,)) # a = a + b - strategy.extended.update(b, fn_1, args=(a,)) # b = a - b - strategy.extended.update(a, fn_2, args=(b,)) # a = a - b - - ctx = tf.distribute.get_replica_context() - return ctx.merge_call( - swap, args=(zip(self._average_weights, self._model_weights),)) - - def assign_average_vars(self, var_list: List[tf.Variable]): - """Assign variables in var_list with their respective averages. - - Args: - var_list: List of model variables to be assigned to their average. - Returns: - assign_op: The op corresponding to the assignment operation of - variables to their average. - """ - assign_op = tf.group([ - var.assign(self.get_slot(var, 'average')) for var in var_list - if var.trainable - ]) - return assign_op - - def _create_hypers(self): - self._optimizer._create_hypers() # pylint: disable=protected-access - - def _prepare(self, var_list): - return self._optimizer._prepare(var_list=var_list) # pylint: disable=protected-access - - @property - def iterations(self): - return self._optimizer.iterations - - @iterations.setter - def iterations(self, variable): - self._optimizer.iterations = variable - - @property - def weights(self): - # return self._weights + self._optimizer.weights - return self._optimizer.weights - - @property - def lr(self): - return self._optimizer._get_hyper('learning_rate') - - @lr.setter - def lr(self, lr): - self._optimizer._set_hyper('learning_rate', lr) - - @property - def learning_rate(self): - return self._optimizer._get_hyper('learning_rate') - - @learning_rate.setter - def learning_rate(self, learning_rate): # pylint: disable=redefined-outer-name - self._optimizer._set_hyper('learning_rate', learning_rate) - - def _resource_apply_dense(self, grad, var): - return self._optimizer._resource_apply_dense(grad, var) - - def _resource_apply_sparse(self, grad, var, indices): - return self._optimizer._resource_apply_sparse(grad, var, indices) - - def _resource_apply_sparse_duplicate_indices(self, grad, var, indices): - return self._optimizer._resource_apply_sparse_duplicate_indices( - grad, var, indices) - - def get_config(self): - config = { - 'optimizer': tf.keras.optimizers.serialize(self._optimizer), - 'average_decay': self._average_decay, - 'start_step': self._start_step, - 'dynamic_decay': self._dynamic_decay, - } - base_config = super(MovingAverage, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - optimizer = tf.keras.optimizers.deserialize( - config.pop('optimizer'), - custom_objects=custom_objects, - ) - return cls(optimizer, **config) - - def build_optimizer( optimizer_name: Text, base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule, @@ -256,15 +39,15 @@ def build_optimizer( """Build the optimizer based on name. Args: - optimizer_name: String representation of the optimizer name. Examples: - sgd, momentum, rmsprop. + optimizer_name: String representation of the optimizer name. Examples: sgd, + momentum, rmsprop. base_learning_rate: `tf.keras.optimizers.schedules.LearningRateSchedule` base learning rate. - params: String -> Any dictionary representing the optimizer params. - This should contain optimizer specific parameters such as - `base_learning_rate`, `decay`, etc. + params: String -> Any dictionary representing the optimizer params. This + should contain optimizer specific parameters such as `base_learning_rate`, + `decay`, etc. model: The `tf.keras.Model`. This is used for the shadow copy if using - `MovingAverage`. + `ExponentialMovingAverage`. Returns: A tf.keras.Optimizer. @@ -279,43 +62,47 @@ def build_optimizer( if optimizer_name == 'sgd': logging.info('Using SGD optimizer') nesterov = params.get('nesterov', False) - optimizer = tf.keras.optimizers.SGD(learning_rate=base_learning_rate, - nesterov=nesterov) + optimizer = tf.keras.optimizers.SGD( + learning_rate=base_learning_rate, nesterov=nesterov) elif optimizer_name == 'momentum': logging.info('Using momentum optimizer') nesterov = params.get('nesterov', False) - optimizer = tf.keras.optimizers.SGD(learning_rate=base_learning_rate, - momentum=params['momentum'], - nesterov=nesterov) + optimizer = tf.keras.optimizers.SGD( + learning_rate=base_learning_rate, + momentum=params['momentum'], + nesterov=nesterov) elif optimizer_name == 'rmsprop': logging.info('Using RMSProp') rho = params.get('decay', None) or params.get('rho', 0.9) momentum = params.get('momentum', 0.9) epsilon = params.get('epsilon', 1e-07) - optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate, - rho=rho, - momentum=momentum, - epsilon=epsilon) + optimizer = tf.keras.optimizers.RMSprop( + learning_rate=base_learning_rate, + rho=rho, + momentum=momentum, + epsilon=epsilon) elif optimizer_name == 'adam': logging.info('Using Adam') beta_1 = params.get('beta_1', 0.9) beta_2 = params.get('beta_2', 0.999) epsilon = params.get('epsilon', 1e-07) - optimizer = tf.keras.optimizers.Adam(learning_rate=base_learning_rate, - beta_1=beta_1, - beta_2=beta_2, - epsilon=epsilon) + optimizer = tf.keras.optimizers.Adam( + learning_rate=base_learning_rate, + beta_1=beta_1, + beta_2=beta_2, + epsilon=epsilon) elif optimizer_name == 'adamw': logging.info('Using AdamW') weight_decay = params.get('weight_decay', 0.01) beta_1 = params.get('beta_1', 0.9) beta_2 = params.get('beta_2', 0.999) epsilon = params.get('epsilon', 1e-07) - optimizer = tfa.optimizers.AdamW(weight_decay=weight_decay, - learning_rate=base_learning_rate, - beta_1=beta_1, - beta_2=beta_2, - epsilon=epsilon) + optimizer = tfa.optimizers.AdamW( + weight_decay=weight_decay, + learning_rate=base_learning_rate, + beta_1=beta_1, + beta_2=beta_2, + epsilon=epsilon) else: raise ValueError('Unknown optimizer %s' % optimizer_name) @@ -327,11 +114,11 @@ def build_optimizer( moving_average_decay = params.get('moving_average_decay', 0.) if moving_average_decay is not None and moving_average_decay > 0.: if model is None: - raise ValueError('`model` must be provided if using `MovingAverage`.') + raise ValueError( + '`model` must be provided if using `ExponentialMovingAverage`.') logging.info('Including moving average decay.') - optimizer = MovingAverage( - optimizer=optimizer, - average_decay=moving_average_decay) + optimizer = optimization.ExponentialMovingAverage( + optimizer=optimizer, average_decay=moving_average_decay) optimizer.shadow_copy(model) return optimizer @@ -358,41 +145,38 @@ def build_learning_rate(params: base_configs.LearningRateConfig, if lr_multiplier and lr_multiplier > 0: # Scale the learning rate based on the batch size and a multiplier base_lr *= lr_multiplier * batch_size - logging.info('Scaling the learning rate based on the batch size ' - 'multiplier. New base_lr: %f', base_lr) + logging.info( + 'Scaling the learning rate based on the batch size ' + 'multiplier. New base_lr: %f', base_lr) if decay_type == 'exponential': - logging.info('Using exponential learning rate with: ' - 'initial_learning_rate: %f, decay_steps: %d, ' - 'decay_rate: %f', base_lr, decay_steps, decay_rate) + logging.info( + 'Using exponential learning rate with: ' + 'initial_learning_rate: %f, decay_steps: %d, ' + 'decay_rate: %f', base_lr, decay_steps, decay_rate) lr = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=base_lr, decay_steps=decay_steps, decay_rate=decay_rate, staircase=params.staircase) - elif decay_type == 'piecewise_constant_with_warmup': - logging.info('Using Piecewise constant decay with warmup. ' - 'Parameters: batch_size: %d, epoch_size: %d, ' - 'warmup_epochs: %d, boundaries: %s, multipliers: %s', - batch_size, params.examples_per_epoch, - params.warmup_epochs, params.boundaries, - params.multipliers) - lr = learning_rate.PiecewiseConstantDecayWithWarmup( - batch_size=batch_size, - epoch_size=params.examples_per_epoch, - warmup_epochs=params.warmup_epochs, - boundaries=params.boundaries, - multipliers=params.multipliers) + elif decay_type == 'stepwise': + steps_per_epoch = params.examples_per_epoch // batch_size + boundaries = [boundary * steps_per_epoch for boundary in params.boundaries] + multipliers = [batch_size * multiplier for multiplier in params.multipliers] + logging.info( + 'Using stepwise learning rate. Parameters: ' + 'boundaries: %s, values: %s', boundaries, multipliers) + lr = tf.keras.optimizers.schedules.PiecewiseConstantDecay( + boundaries=boundaries, values=multipliers) elif decay_type == 'cosine_with_warmup': lr = learning_rate.CosineDecayWithWarmup( batch_size=batch_size, total_steps=train_epochs * train_steps, warmup_steps=warmup_steps) if warmup_steps > 0: - if decay_type not in [ - 'piecewise_constant_with_warmup', 'cosine_with_warmup' - ]: + if decay_type not in ['cosine_with_warmup']: logging.info('Applying %d warmup steps to the learning rate', warmup_steps) - lr = learning_rate.WarmupDecaySchedule(lr, warmup_steps) + lr = learning_rate.WarmupDecaySchedule( + lr, warmup_steps, warmup_lr=base_lr) return lr diff --git a/official/vision/image_classification/optimizer_factory_test.py b/official/vision/image_classification/optimizer_factory_test.py index a620728482f66febe402c20e2f01717f6a1393e5..a98d23d9bff0f7d339b1f5d5d648b18c04c4e08c 100644 --- a/official/vision/image_classification/optimizer_factory_test.py +++ b/official/vision/image_classification/optimizer_factory_test.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for optimizer_factory.""" from __future__ import absolute_import @@ -35,10 +35,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): return model @parameterized.named_parameters( - ('sgd', 'sgd', 0., False), - ('momentum', 'momentum', 0., False), - ('rmsprop', 'rmsprop', 0., False), - ('adam', 'adam', 0., False), + ('sgd', 'sgd', 0., False), ('momentum', 'momentum', 0., False), + ('rmsprop', 'rmsprop', 0., False), ('adam', 'adam', 0., False), ('adamw', 'adamw', 0., False), ('momentum_lookahead', 'momentum', 0., True), ('sgd_ema', 'sgd', 0.999, False), @@ -84,17 +82,13 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): train_steps = 1 lr = optimizer_factory.build_learning_rate( - params=params, - batch_size=batch_size, - train_steps=train_steps) + params=params, batch_size=batch_size, train_steps=train_steps) self.assertTrue( issubclass( type(lr), tf.keras.optimizers.schedules.LearningRateSchedule)) - @parameterized.named_parameters( - ('exponential', 'exponential'), - ('piecewise_constant_with_warmup', 'piecewise_constant_with_warmup'), - ('cosine_with_warmup', 'cosine_with_warmup')) + @parameterized.named_parameters(('exponential', 'exponential'), + ('cosine_with_warmup', 'cosine_with_warmup')) def test_learning_rate_with_decay_and_warmup(self, lr_decay_type): """Basic smoke test for syntax.""" params = base_configs.LearningRateConfig( diff --git a/official/vision/image_classification/preprocessing.py b/official/vision/image_classification/preprocessing.py index 3f2019189d4e5f9c269a67276531b4344ede7e32..dece1fbc1199a137e5ee86ce7312b39cdeac27c9 100644 --- a/official/vision/image_classification/preprocessing.py +++ b/official/vision/image_classification/preprocessing.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Preprocessing functions for images.""" from __future__ import absolute_import @@ -379,12 +379,12 @@ def preprocess_for_train(image_bytes: tf.Tensor, """ images = decode_crop_and_flip(image_bytes=image_bytes) images = resize_image(images, height=image_size, width=image_size) + if augmenter is not None: + images = augmenter.distort(images) if mean_subtract: images = mean_image_subtraction(image_bytes=images, means=MEAN_RGB) if standardize: images = standardize_image(image_bytes=images, stddev=STDDEV_RGB) - if augmenter is not None: - images = augmenter.distort(images) if dtype is not None: images = tf.image.convert_image_dtype(images, dtype) diff --git a/official/vision/image_classification/resnet/__init__.py b/official/vision/image_classification/resnet/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e419af524b5f349fe04abfa820c3cb51b777d422 100644 --- a/official/vision/image_classification/resnet/__init__.py +++ b/official/vision/image_classification/resnet/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/official/vision/image_classification/resnet/common.py b/official/vision/image_classification/resnet/common.py index a9a64aa4064978863332a8024f4e46d64b9baaef..a034ba7dd0be5b2b2536727137497c84519001a5 100644 --- a/official/vision/image_classification/resnet/common.py +++ b/official/vision/image_classification/resnet/common.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Common util functions and classes used by both keras cifar and imagenet.""" from __future__ import absolute_import from __future__ import division @@ -22,7 +22,6 @@ import os from absl import flags import tensorflow as tf -from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_v2 import tensorflow_model_optimization as tfmot from official.utils.flags import core as flags_core from official.utils.misc import keras_utils @@ -30,7 +29,7 @@ from official.utils.misc import keras_utils FLAGS = flags.FLAGS BASE_LEARNING_RATE = 0.1 # This matches Jing's version. TRAIN_TOP_1 = 'training_accuracy_top_1' -LR_SCHEDULE = [ # (multiplier, epoch to start) tuples +LR_SCHEDULE = [ # (multiplier, epoch to start) tuples (1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80) ] @@ -39,8 +38,14 @@ class PiecewiseConstantDecayWithWarmup( tf.keras.optimizers.schedules.LearningRateSchedule): """Piecewise constant decay with warmup schedule.""" - def __init__(self, batch_size, epoch_size, warmup_epochs, boundaries, - multipliers, compute_lr_on_cpu=True, name=None): + def __init__(self, + batch_size, + epoch_size, + warmup_epochs, + boundaries, + multipliers, + compute_lr_on_cpu=True, + name=None): super(PiecewiseConstantDecayWithWarmup, self).__init__() if len(boundaries) != len(multipliers) - 1: raise ValueError('The length of boundaries must be 1 less than the ' @@ -77,14 +82,16 @@ class PiecewiseConstantDecayWithWarmup( def _get_learning_rate(self, step): """Compute learning rate at given step.""" with tf.name_scope('PiecewiseConstantDecayWithWarmup'): + def warmup_lr(step): return self.rescaled_lr * ( tf.cast(step, tf.float32) / tf.cast(self.warmup_steps, tf.float32)) + def piecewise_lr(step): - return tf.compat.v1.train.piecewise_constant( - step, self.step_boundaries, self.lr_values) - return tf.cond(step < self.warmup_steps, - lambda: warmup_lr(step), + return tf.compat.v1.train.piecewise_constant(step, self.step_boundaries, + self.lr_values) + + return tf.cond(step < self.warmup_steps, lambda: warmup_lr(step), lambda: piecewise_lr(step)) def get_config(self): @@ -101,13 +108,12 @@ class PiecewiseConstantDecayWithWarmup( def get_optimizer(learning_rate=0.1): """Returns optimizer to use.""" # The learning_rate is overwritten at the beginning of each step by callback. - return gradient_descent_v2.SGD(learning_rate=learning_rate, momentum=0.9) + return tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9) -def get_callbacks( - pruning_method=None, - enable_checkpoint_and_export=False, - model_dir=None): +def get_callbacks(pruning_method=None, + enable_checkpoint_and_export=False, + model_dir=None): """Returns common callbacks.""" time_callback = keras_utils.TimeHistory( FLAGS.batch_size, @@ -117,23 +123,23 @@ def get_callbacks( if FLAGS.enable_tensorboard: tensorboard_callback = tf.keras.callbacks.TensorBoard( - log_dir=FLAGS.model_dir, - profile_batch=FLAGS.profile_steps) + log_dir=FLAGS.model_dir, profile_batch=FLAGS.profile_steps) callbacks.append(tensorboard_callback) is_pruning_enabled = pruning_method is not None if is_pruning_enabled: callbacks.append(tfmot.sparsity.keras.UpdatePruningStep()) if model_dir is not None: - callbacks.append(tfmot.sparsity.keras.PruningSummaries( - log_dir=model_dir, profile_batch=0)) + callbacks.append( + tfmot.sparsity.keras.PruningSummaries( + log_dir=model_dir, profile_batch=0)) if enable_checkpoint_and_export: if model_dir is not None: ckpt_full_path = os.path.join(model_dir, 'model.ckpt-{epoch:04d}') callbacks.append( - tf.keras.callbacks.ModelCheckpoint(ckpt_full_path, - save_weights_only=True)) + tf.keras.callbacks.ModelCheckpoint( + ckpt_full_path, save_weights_only=True)) return callbacks @@ -182,28 +188,30 @@ def build_stats(history, eval_output, callbacks): return stats -def define_keras_flags( - dynamic_loss_scale=True, - model=False, - optimizer=False, - pretrained_filepath=False): +def define_keras_flags(model=False, + optimizer=False, + pretrained_filepath=False): """Define flags for Keras models.""" - flags_core.define_base(clean=True, num_gpu=True, run_eagerly=True, - train_epochs=True, epochs_between_evals=True, - distribution_strategy=True) - flags_core.define_performance(num_parallel_calls=False, - synthetic_data=True, - dtype=True, - all_reduce_alg=True, - num_packs=True, - tf_gpu_thread_mode=True, - datasets_num_private_threads=True, - dynamic_loss_scale=dynamic_loss_scale, - loss_scale=True, - fp16_implementation=True, - tf_data_experimental_slack=True, - enable_xla=True, - training_dataset_cache=True) + flags_core.define_base( + clean=True, + num_gpu=True, + run_eagerly=True, + train_epochs=True, + epochs_between_evals=True, + distribution_strategy=True) + flags_core.define_performance( + num_parallel_calls=False, + synthetic_data=True, + dtype=True, + all_reduce_alg=True, + num_packs=True, + tf_gpu_thread_mode=True, + datasets_num_private_threads=True, + loss_scale=True, + fp16_implementation=True, + tf_data_experimental_slack=True, + enable_xla=True, + training_dataset_cache=True) flags_core.define_image() flags_core.define_benchmark() flags_core.define_distribution() @@ -214,23 +222,33 @@ def define_keras_flags( # TODO(b/135607288): Remove this flag once we understand the root cause of # slowdown when setting the learning phase in Keras backend. flags.DEFINE_boolean( - name='set_learning_phase_to_train', default=True, + name='set_learning_phase_to_train', + default=True, help='If skip eval, also set Keras learning phase to 1 (training).') flags.DEFINE_boolean( - name='explicit_gpu_placement', default=False, + name='explicit_gpu_placement', + default=False, help='If not using distribution strategy, explicitly set device scope ' 'for the Keras training loop.') - flags.DEFINE_boolean(name='use_trivial_model', default=False, - help='Whether to use a trivial Keras model.') - flags.DEFINE_boolean(name='report_accuracy_metrics', default=True, - help='Report metrics during training and evaluation.') - flags.DEFINE_boolean(name='use_tensor_lr', default=True, - help='Use learning rate tensor instead of a callback.') flags.DEFINE_boolean( - name='enable_tensorboard', default=False, + name='use_trivial_model', + default=False, + help='Whether to use a trivial Keras model.') + flags.DEFINE_boolean( + name='report_accuracy_metrics', + default=True, + help='Report metrics during training and evaluation.') + flags.DEFINE_boolean( + name='use_tensor_lr', + default=True, + help='Use learning rate tensor instead of a callback.') + flags.DEFINE_boolean( + name='enable_tensorboard', + default=False, help='Whether to enable Tensorboard callback.') flags.DEFINE_string( - name='profile_steps', default=None, + name='profile_steps', + default=None, help='Save profiling data to model dir at given range of global steps. The ' 'value must be a comma separated pair of positive integers, specifying ' 'the first and last step to profile. For example, "--profile_steps=2,4" ' @@ -238,24 +256,27 @@ def define_keras_flags( 'Note that profiler has a non-trivial performance overhead, and the ' 'output file can be gigantic if profiling many steps.') flags.DEFINE_integer( - name='train_steps', default=None, + name='train_steps', + default=None, help='The number of steps to run for training. If it is larger than ' '# batches per epoch, then use # batches per epoch. This flag will be ' 'ignored if train_epochs is set to be larger than 1. ') flags.DEFINE_boolean( - name='batchnorm_spatial_persistent', default=True, + name='batchnorm_spatial_persistent', + default=True, help='Enable the spacial persistent mode for CuDNN batch norm kernel.') flags.DEFINE_boolean( - name='enable_get_next_as_optional', default=False, + name='enable_get_next_as_optional', + default=False, help='Enable get_next_as_optional behavior in DistributedIterator.') flags.DEFINE_boolean( - name='enable_checkpoint_and_export', default=False, + name='enable_checkpoint_and_export', + default=False, help='Whether to enable a checkpoint callback and export the savedmodel.') - flags.DEFINE_string( - name='tpu', default='', help='TPU address to connect to.') + flags.DEFINE_string(name='tpu', default='', help='TPU address to connect to.') flags.DEFINE_integer( name='steps_per_loop', - default=500, + default=None, help='Number of steps per training loop. Only training step happens ' 'inside the loop. Callbacks will not be called inside. Will be capped at ' 'steps per epoch.') @@ -270,20 +291,20 @@ def define_keras_flags( flags.DEFINE_string('model', 'resnet50_v1.5', 'Name of model preset. (mobilenet, resnet50_v1.5)') if optimizer: - flags.DEFINE_string('optimizer', 'resnet50_default', - 'Name of optimizer preset. ' - '(mobilenet_default, resnet50_default)') + flags.DEFINE_string( + 'optimizer', 'resnet50_default', 'Name of optimizer preset. ' + '(mobilenet_default, resnet50_default)') # TODO(kimjaehong): Replace as general hyper-params not only for mobilenet. - flags.DEFINE_float('initial_learning_rate_per_sample', 0.00007, - 'Initial value of learning rate per sample for ' - 'mobilenet_default.') + flags.DEFINE_float( + 'initial_learning_rate_per_sample', 0.00007, + 'Initial value of learning rate per sample for ' + 'mobilenet_default.') flags.DEFINE_float('lr_decay_factor', 0.94, 'Learning rate decay factor for mobilenet_default.') flags.DEFINE_float('num_epochs_per_decay', 2.5, 'Number of epochs per decay for mobilenet_default.') if pretrained_filepath: - flags.DEFINE_string('pretrained_filepath', '', - 'Pretrained file path.') + flags.DEFINE_string('pretrained_filepath', '', 'Pretrained file path.') def get_synth_data(height, width, num_channels, num_classes, dtype): @@ -317,23 +338,31 @@ def get_synth_data(height, width, num_channels, num_classes, dtype): def define_pruning_flags(): """Define flags for pruning methods.""" - flags.DEFINE_string('pruning_method', None, - 'Pruning method.' - 'None (no pruning) or polynomial_decay.') + flags.DEFINE_string( + 'pruning_method', None, 'Pruning method.' + 'None (no pruning) or polynomial_decay.') flags.DEFINE_float('pruning_initial_sparsity', 0.0, 'Initial sparsity for pruning.') flags.DEFINE_float('pruning_final_sparsity', 0.5, 'Final sparsity for pruning.') - flags.DEFINE_integer('pruning_begin_step', 0, - 'Begin step for pruning.') - flags.DEFINE_integer('pruning_end_step', 100000, - 'End step for pruning.') - flags.DEFINE_integer('pruning_frequency', 100, - 'Frequency for pruning.') + flags.DEFINE_integer('pruning_begin_step', 0, 'Begin step for pruning.') + flags.DEFINE_integer('pruning_end_step', 100000, 'End step for pruning.') + flags.DEFINE_integer('pruning_frequency', 100, 'Frequency for pruning.') + +def define_clustering_flags(): + """Define flags for clustering methods.""" + flags.DEFINE_string('clustering_method', None, + 'None (no clustering) or selective_clustering ' + '(cluster last three Conv2D layers of the model).') -def get_synth_input_fn(height, width, num_channels, num_classes, - dtype=tf.float32, drop_remainder=True): + +def get_synth_input_fn(height, + width, + num_channels, + num_classes, + dtype=tf.float32, + drop_remainder=True): """Returns an input function that returns a dataset with random data. This input_fn returns a data set that iterates over a set of random data and @@ -355,14 +384,16 @@ def get_synth_input_fn(height, width, num_channels, num_classes, An input_fn that can be used in place of a real one to return a dataset that can be used for iteration. """ + # pylint: disable=unused-argument def input_fn(is_training, data_dir, batch_size, *args, **kwargs): """Returns dataset filled with random data.""" - inputs, labels = get_synth_data(height=height, - width=width, - num_channels=num_channels, - num_classes=num_classes, - dtype=dtype) + inputs, labels = get_synth_data( + height=height, + width=width, + num_channels=num_channels, + num_classes=num_classes, + dtype=dtype) # Cast to float32 for Keras model. labels = tf.cast(labels, dtype=tf.float32) data = tf.data.Dataset.from_tensors((inputs, labels)).repeat() diff --git a/official/vision/image_classification/resnet/imagenet_preprocessing.py b/official/vision/image_classification/resnet/imagenet_preprocessing.py index f1490c22d8d769f32a6f6a1c6d29455519e8743a..86ba3ed98084987ea5d63edf8fd5f515d58fba93 100644 --- a/official/vision/image_classification/resnet/imagenet_preprocessing.py +++ b/official/vision/image_classification/resnet/imagenet_preprocessing.py @@ -1,4 +1,4 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Provides utilities to preprocess images. Training images are sampled using the provided bounding boxes, and subsequently @@ -36,6 +36,7 @@ from __future__ import division from __future__ import print_function import os + from absl import logging import tensorflow as tf @@ -78,17 +79,17 @@ def process_record_dataset(dataset, is_training: A boolean denoting whether the input is for training. batch_size: The number of samples per batch. shuffle_buffer: The buffer size to use when shuffling records. A larger - value results in better randomness, but smaller values reduce startup - time and use less memory. + value results in better randomness, but smaller values reduce startup time + and use less memory. parse_record_fn: A function that takes a raw record and returns the corresponding (image, label) pair. dtype: Data type to use for images/features. - datasets_num_private_threads: Number of threads for a private - threadpool created for all datasets computation. + datasets_num_private_threads: Number of threads for a private threadpool + created for all datasets computation. drop_remainder: A boolean indicates whether to drop the remainder of the batches. If True, the batch dimension will be static. - tf_data_experimental_slack: Whether to enable tf.data's - `experimental_slack` option. + tf_data_experimental_slack: Whether to enable tf.data's `experimental_slack` + option. Returns: Dataset of (image, label) pairs ready for iteration. @@ -99,8 +100,8 @@ def process_record_dataset(dataset, options.experimental_threading.private_threadpool_size = ( datasets_num_private_threads) dataset = dataset.with_options(options) - logging.info( - 'datasets_num_private_threads: %s', datasets_num_private_threads) + logging.info('datasets_num_private_threads: %s', + datasets_num_private_threads) if is_training: # Shuffles records before repeating to respect epoch boundaries. @@ -134,11 +135,13 @@ def get_filenames(is_training, data_dir): if is_training: return [ os.path.join(data_dir, 'train-%05d-of-01024' % i) - for i in range(_NUM_TRAIN_FILES)] + for i in range(_NUM_TRAIN_FILES) + ] else: return [ os.path.join(data_dir, 'validation-%05d-of-00128' % i) - for i in range(128)] + for i in range(128) + ] def parse_example_proto(example_serialized): @@ -165,8 +168,8 @@ def parse_example_proto(example_serialized): image/encoded: Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. + example_serialized: scalar Tensor tf.string containing a serialized Example + protocol buffer. Returns: image_buffer: Tensor tf.string containing the contents of a JPEG file. @@ -177,22 +180,24 @@ def parse_example_proto(example_serialized): """ # Dense features in Example proto. feature_map = { - 'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.io.FixedLenFeature([], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string, - default_value=''), + 'image/encoded': + tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), + 'image/class/label': + tf.io.FixedLenFeature([], dtype=tf.int64, default_value=-1), + 'image/class/text': + tf.io.FixedLenFeature([], dtype=tf.string, default_value=''), } sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in [ + feature_map.update({ + k: sparse_float32 for k in [ 'image/object/bbox/xmin', 'image/object/bbox/ymin', - 'image/object/bbox/xmax', 'image/object/bbox/ymax']}) + 'image/object/bbox/xmax', 'image/object/bbox/ymax' + ] + }) - features = tf.io.parse_single_example(serialized=example_serialized, - features=feature_map) + features = tf.io.parse_single_example( + serialized=example_serialized, features=feature_map) label = tf.cast(features['image/class/label'], dtype=tf.int32) xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) @@ -218,8 +223,8 @@ def parse_record(raw_record, is_training, dtype): through preprocessing steps (cropping, flipping, and so on). Args: - raw_record: scalar Tensor tf.string containing a serialized - Example protocol buffer. + raw_record: scalar Tensor tf.string containing a serialized Example protocol + buffer. is_training: A boolean denoting whether the input is for training. dtype: data type to use for images/features. @@ -240,8 +245,9 @@ def parse_record(raw_record, is_training, dtype): # Subtract one so that labels are in [0, 1000), and cast to float32 for # Keras model. - label = tf.cast(tf.cast(tf.reshape(label, shape=[1]), dtype=tf.int32) - 1, - dtype=tf.float32) + label = tf.cast( + tf.cast(tf.reshape(label, shape=[1]), dtype=tf.int32) - 1, + dtype=tf.float32) return image, label @@ -262,12 +268,14 @@ def get_parse_record_fn(use_keras_image_data_format=False): Returns: Function to use for parsing the records. """ + def parse_record_fn(raw_record, is_training, dtype): image, label = parse_record(raw_record, is_training, dtype) if use_keras_image_data_format: if tf.keras.backend.image_data_format() == 'channels_first': image = tf.transpose(image, perm=[2, 0, 1]) return image, label + return parse_record_fn @@ -295,11 +303,11 @@ def input_fn(is_training, `tf.distribute.Strategy`. drop_remainder: A boolean indicates whether to drop the remainder of the batches. If True, the batch dimension will be static. - tf_data_experimental_slack: Whether to enable tf.data's - `experimental_slack` option. + tf_data_experimental_slack: Whether to enable tf.data's `experimental_slack` + option. training_dataset_cache: Whether to cache the training dataset on workers. - Typically used to improve training performance when training data is in - remote storage and can fit into worker memory. + Typically used to improve training performance when training data is in + remote storage and can fit into worker memory. filenames: Optional field for providing the file names of the TFRecords. Returns: @@ -357,8 +365,8 @@ def _decode_crop_and_flip(image_buffer, bbox, num_channels): Args: image_buffer: scalar string Tensor representing the raw JPEG image buffer. bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. + where each coordinate is [0, 1) and the coordinates are arranged as [ymin, + xmin, ymax, xmax]. num_channels: Integer depth of the image buffer for decoding. Returns: @@ -414,8 +422,8 @@ def _central_crop(image, crop_height, crop_width): crop_top = amount_to_be_cropped_h // 2 amount_to_be_cropped_w = (width - crop_width) crop_left = amount_to_be_cropped_w // 2 - return tf.slice( - image, [crop_top, crop_left, 0], [crop_height, crop_width, -1]) + return tf.slice(image, [crop_top, crop_left, 0], + [crop_height, crop_width, -1]) def _mean_image_subtraction(image, means, num_channels): @@ -463,8 +471,8 @@ def _smallest_size_at_least(height, width, resize_min): Args: height: an int32 scalar tensor indicating the current height. width: an int32 scalar tensor indicating the current width. - resize_min: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. + resize_min: A python integer or scalar `Tensor` indicating the size of the + smallest side after resize. Returns: new_height: an int32 scalar tensor indicating the new height. @@ -490,8 +498,8 @@ def _aspect_preserving_resize(image, resize_min): Args: image: A 3-D image `Tensor`. - resize_min: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. + resize_min: A python integer or scalar `Tensor` indicating the size of the + smallest side after resize. Returns: resized_image: A 3-D tensor containing the resized image. @@ -520,12 +528,17 @@ def _resize_image(image, height, width): dimensions have the shape [height, width]. """ return tf.compat.v1.image.resize( - image, [height, width], method=tf.image.ResizeMethod.BILINEAR, + image, [height, width], + method=tf.image.ResizeMethod.BILINEAR, align_corners=False) -def preprocess_image(image_buffer, bbox, output_height, output_width, - num_channels, is_training=False): +def preprocess_image(image_buffer, + bbox, + output_height, + output_width, + num_channels, + is_training=False): """Preprocesses the given image. Preprocessing includes decoding, cropping, and resizing for both training @@ -535,8 +548,8 @@ def preprocess_image(image_buffer, bbox, output_height, output_width, Args: image_buffer: scalar string Tensor representing the raw JPEG image buffer. bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. + where each coordinate is [0, 1) and the coordinates are arranged as [ymin, + xmin, ymax, xmax]. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. num_channels: Integer depth of the image buffer for decoding. diff --git a/official/vision/image_classification/resnet/resnet_config.py b/official/vision/image_classification/resnet/resnet_config.py index a746257f02b85eddfc72192b9474638b92378644..e39db3955f9fe9c312ea307c8ac3196d45447cf3 100644 --- a/official/vision/image_classification/resnet/resnet_config.py +++ b/official/vision/image_classification/resnet/resnet_config.py @@ -1,5 +1,4 @@ -# Lint as: python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,28 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +# Lint as: python3 """Configuration definitions for ResNet losses, learning rates, and optimizers.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from typing import Any, Mapping - import dataclasses from official.modeling.hyperparams import base_config from official.vision.image_classification.configs import base_configs -_RESNET_LR_SCHEDULE = [ # (multiplier, epoch to start) tuples - (1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80) -] -_RESNET_LR_BOUNDARIES = list(p[1] for p in _RESNET_LR_SCHEDULE[1:]) -_RESNET_LR_MULTIPLIERS = list(p[0] for p in _RESNET_LR_SCHEDULE) -_RESNET_LR_WARMUP_EPOCHS = _RESNET_LR_SCHEDULE[0][1] - - @dataclasses.dataclass class ResNetModelConfig(base_configs.ModelConfig): """Configuration for the ResNet model.""" @@ -56,8 +46,10 @@ class ResNetModelConfig(base_configs.ModelConfig): moving_average_decay=None) learning_rate: base_configs.LearningRateConfig = ( base_configs.LearningRateConfig( - name='piecewise_constant_with_warmup', + name='stepwise', + initial_lr=0.1, examples_per_epoch=1281167, - warmup_epochs=_RESNET_LR_WARMUP_EPOCHS, - boundaries=_RESNET_LR_BOUNDARIES, - multipliers=_RESNET_LR_MULTIPLIERS)) + boundaries=[30, 60, 80], + warmup_epochs=5, + scale_by_batch_size=1. / 256., + multipliers=[0.1 / 256, 0.01 / 256, 0.001 / 256, 0.0001 / 256])) diff --git a/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py b/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py index c128dc0b99535d806634b42b99a2e56211c567ca..a66461df17a3fe5fc0d75969e99920310a694e71 100644 --- a/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py +++ b/official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,23 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Runs a ResNet model on the ImageNet dataset using custom training loops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Runs a ResNet model on the ImageNet dataset using custom training loops.""" import math +import os + +# Import libraries from absl import app from absl import flags from absl import logging +import orbit import tensorflow as tf - +from official.common import distribute_utils from official.modeling import performance -from official.staging.training import controller from official.utils.flags import core as flags_core -from official.utils.misc import distribution_utils from official.utils.misc import keras_utils from official.utils.misc import model_helpers from official.vision.image_classification.resnet import common @@ -87,15 +85,6 @@ def get_num_train_iterations(flags_obj): return train_steps, train_epochs, eval_steps -def _steps_to_run(steps_in_current_epoch, steps_per_epoch, steps_per_loop): - """Calculates steps to run on device.""" - if steps_per_loop <= 0: - raise ValueError('steps_per_loop should be positive integer.') - if steps_per_loop == 1: - return steps_per_loop - return min(steps_per_loop, steps_per_epoch - steps_in_current_epoch) - - def run(flags_obj): """Run ResNet ImageNet training and eval loop using custom training loops. @@ -108,8 +97,7 @@ def run(flags_obj): Returns: Dictionary of training and eval stats. """ - keras_utils.set_session_config( - enable_xla=flags_obj.enable_xla) + keras_utils.set_session_config() performance.set_mixed_precision_policy(flags_core.get_tf_dtype(flags_obj)) if tf.config.list_physical_devices('GPU'): @@ -121,14 +109,13 @@ def run(flags_obj): datasets_num_private_threads=flags_obj.datasets_num_private_threads) common.set_cudnn_batchnorm_mode() - # TODO(anj-s): Set data_format without using Keras. data_format = flags_obj.data_format if data_format is None: data_format = ('channels_first' if tf.config.list_physical_devices('GPU') else 'channels_last') tf.keras.backend.set_image_data_format(data_format) - strategy = distribution_utils.get_distribution_strategy( + strategy = distribute_utils.get_distribution_strategy( distribution_strategy=flags_obj.distribution_strategy, num_gpus=flags_obj.num_gpus, all_reduce_alg=flags_obj.all_reduce_alg, @@ -137,7 +124,14 @@ def run(flags_obj): per_epoch_steps, train_epochs, eval_steps = get_num_train_iterations( flags_obj) - steps_per_loop = min(flags_obj.steps_per_loop, per_epoch_steps) + if flags_obj.steps_per_loop is None: + steps_per_loop = per_epoch_steps + elif flags_obj.steps_per_loop > per_epoch_steps: + steps_per_loop = per_epoch_steps + logging.warn('Setting steps_per_loop to %d to respect epoch boundary.', + steps_per_loop) + else: + steps_per_loop = flags_obj.steps_per_loop logging.info( 'Training %d epochs, each epoch has %d steps, ' @@ -148,14 +142,14 @@ def run(flags_obj): flags_obj.batch_size, flags_obj.log_steps, logdir=flags_obj.model_dir if flags_obj.enable_tensorboard else None) - with distribution_utils.get_strategy_scope(strategy): + with distribute_utils.get_strategy_scope(strategy): runnable = resnet_runnable.ResnetRunnable(flags_obj, time_callback, per_epoch_steps) eval_interval = flags_obj.epochs_between_evals * per_epoch_steps checkpoint_interval = ( - per_epoch_steps if flags_obj.enable_checkpoint_and_export else None) - summary_interval = per_epoch_steps if flags_obj.enable_tensorboard else None + steps_per_loop * 5 if flags_obj.enable_checkpoint_and_export else None) + summary_interval = steps_per_loop if flags_obj.enable_tensorboard else None checkpoint_manager = tf.train.CheckpointManager( runnable.checkpoint, @@ -164,20 +158,25 @@ def run(flags_obj): step_counter=runnable.global_step, checkpoint_interval=checkpoint_interval) - resnet_controller = controller.Controller( - strategy, - runnable.train, - runnable.evaluate if not flags_obj.skip_eval else None, + resnet_controller = orbit.Controller( + strategy=strategy, + trainer=runnable, + evaluator=runnable if not flags_obj.skip_eval else None, global_step=runnable.global_step, steps_per_loop=steps_per_loop, - train_steps=per_epoch_steps * train_epochs, checkpoint_manager=checkpoint_manager, summary_interval=summary_interval, - eval_steps=eval_steps, - eval_interval=eval_interval) + summary_dir=flags_obj.model_dir, + eval_summary_dir=os.path.join(flags_obj.model_dir, 'eval')) time_callback.on_train_begin() - resnet_controller.train(evaluate=not flags_obj.skip_eval) + if not flags_obj.skip_eval: + resnet_controller.train_and_evaluate( + train_steps=per_epoch_steps * train_epochs, + eval_steps=eval_steps, + eval_interval=eval_interval) + else: + resnet_controller.train(steps=per_epoch_steps * train_epochs) time_callback.on_train_end() stats = build_stats(runnable, time_callback) diff --git a/official/vision/image_classification/resnet/resnet_model.py b/official/vision/image_classification/resnet/resnet_model.py index 10f1233356ece188cce51ec254f0064739cd6f41..17d124bb20a649885623277f5e32c5de05703819 100644 --- a/official/vision/image_classification/resnet/resnet_model.py +++ b/official/vision/image_classification/resnet/resnet_model.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """ResNet50 model for Keras. Adapted from tf.keras.applications.resnet50.ResNet50(). @@ -28,18 +28,14 @@ from __future__ import division from __future__ import print_function import tensorflow as tf - -from tensorflow.python.keras import backend -from tensorflow.python.keras import initializers -from tensorflow.python.keras import models -from tensorflow.python.keras import regularizers from official.vision.image_classification.resnet import imagenet_preprocessing layers = tf.keras.layers def _gen_l2_regularizer(use_l2_regularizer=True, l2_weight_decay=1e-4): - return regularizers.l2(l2_weight_decay) if use_l2_regularizer else None + return tf.keras.regularizers.L2( + l2_weight_decay) if use_l2_regularizer else None def identity_block(input_tensor, @@ -66,7 +62,7 @@ def identity_block(input_tensor, Output tensor for the block. """ filters1, filters2, filters3 = filters - if backend.image_data_format() == 'channels_last': + if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 @@ -154,7 +150,7 @@ def conv_block(input_tensor, Output tensor for the block. """ filters1, filters2, filters3 = filters - if backend.image_data_format() == 'channels_last': + if tf.keras.backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 @@ -253,7 +249,7 @@ def resnet50(num_classes, # Hub image modules expect inputs in the range [0, 1]. This rescales these # inputs to the range expected by the trained model. x = layers.Lambda( - lambda x: x * 255.0 - backend.constant( + lambda x: x * 255.0 - tf.keras.backend.constant( # pylint: disable=g-long-lambda imagenet_preprocessing.CHANNEL_MEANS, shape=[1, 1, 3], dtype=x.dtype), @@ -262,7 +258,7 @@ def resnet50(num_classes, else: x = img_input - if backend.image_data_format() == 'channels_first': + if tf.keras.backend.image_data_format() == 'channels_first': x = layers.Permute((3, 1, 2))(x) bn_axis = 1 else: # channels_last @@ -315,7 +311,8 @@ def resnet50(num_classes, x = layers.GlobalAveragePooling2D()(x) x = layers.Dense( num_classes, - kernel_initializer=initializers.RandomNormal(stddev=0.01), + kernel_initializer=tf.compat.v1.keras.initializers.random_normal( + stddev=0.01), kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer), bias_regularizer=_gen_l2_regularizer(use_l2_regularizer), name='fc1000')( @@ -326,4 +323,4 @@ def resnet50(num_classes, x = layers.Activation('softmax', dtype='float32')(x) # Create model. - return models.Model(img_input, x, name='resnet50') + return tf.keras.Model(img_input, x, name='resnet50') diff --git a/official/vision/image_classification/resnet/resnet_runnable.py b/official/vision/image_classification/resnet/resnet_runnable.py index 473b18daf7aaf02bfb1dc86110b3ae0fd2704359..6fa40b98cb76377a9b11e3f4a35b38094eca5cc5 100644 --- a/official/vision/image_classification/resnet/resnet_runnable.py +++ b/official/vision/image_classification/resnet/resnet_runnable.py @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,36 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Runs a ResNet model on the ImageNet dataset using custom training loops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Runs a ResNet model on the ImageNet dataset using custom training loops.""" +import orbit import tensorflow as tf from official.modeling import performance from official.staging.training import grad_utils -from official.staging.training import standard_runnable -from official.staging.training import utils from official.utils.flags import core as flags_core from official.vision.image_classification.resnet import common from official.vision.image_classification.resnet import imagenet_preprocessing from official.vision.image_classification.resnet import resnet_model -class ResnetRunnable(standard_runnable.StandardTrainable, - standard_runnable.StandardEvaluable): +class ResnetRunnable(orbit.StandardTrainer, orbit.StandardEvaluator): """Implements the training and evaluation APIs for Resnet model.""" def __init__(self, flags_obj, time_callback, epoch_steps): - standard_runnable.StandardTrainable.__init__(self, - flags_obj.use_tf_while_loop, - flags_obj.use_tf_function) - standard_runnable.StandardEvaluable.__init__(self, - flags_obj.use_tf_function) - self.strategy = tf.distribute.get_strategy() self.flags_obj = flags_obj self.dtype = flags_core.get_tf_dtype(flags_obj) @@ -54,7 +42,7 @@ class ResnetRunnable(standard_runnable.StandardTrainable, self.strategy.num_replicas_in_sync)) # As auto rebatching is not supported in - # `experimental_distribute_datasets_from_function()` API, which is + # `distribute_datasets_from_function()` API, which is # required when cloning dataset to multiple workers in eager mode, # we use per-replica batch size. self.batch_size = int(batch_size / self.strategy.num_replicas_in_sync) @@ -107,11 +95,8 @@ class ResnetRunnable(standard_runnable.StandardTrainable, # Handling epochs. self.epoch_steps = epoch_steps - self.epoch_helper = utils.EpochHelper(epoch_steps, self.global_step) - - def build_train_dataset(self): - """See base class.""" - return utils.make_distributed_dataset( + self.epoch_helper = orbit.utils.EpochHelper(epoch_steps, self.global_step) + train_dataset = orbit.utils.make_distributed_dataset( self.strategy, self.input_fn, is_training=True, @@ -122,17 +107,26 @@ class ResnetRunnable(standard_runnable.StandardTrainable, .datasets_num_private_threads, dtype=self.dtype, drop_remainder=True) - - def build_eval_dataset(self): - """See base class.""" - return utils.make_distributed_dataset( - self.strategy, - self.input_fn, - is_training=False, - data_dir=self.flags_obj.data_dir, - batch_size=self.batch_size, - parse_record_fn=imagenet_preprocessing.parse_record, - dtype=self.dtype) + orbit.StandardTrainer.__init__( + self, + train_dataset, + options=orbit.StandardTrainerOptions( + use_tf_while_loop=flags_obj.use_tf_while_loop, + use_tf_function=flags_obj.use_tf_function)) + if not flags_obj.skip_eval: + eval_dataset = orbit.utils.make_distributed_dataset( + self.strategy, + self.input_fn, + is_training=False, + data_dir=self.flags_obj.data_dir, + batch_size=self.batch_size, + parse_record_fn=imagenet_preprocessing.parse_record, + dtype=self.dtype) + orbit.StandardEvaluator.__init__( + self, + eval_dataset, + options=orbit.StandardEvaluatorOptions( + use_tf_function=flags_obj.use_tf_function)) def train_loop_begin(self): """See base class.""" @@ -173,7 +167,8 @@ class ResnetRunnable(standard_runnable.StandardTrainable, tape, self.optimizer, loss, self.model.trainable_variables) self.train_loss.update_state(loss) self.train_accuracy.update_state(labels, logits) - + if self.flags_obj.enable_xla: + step_fn = tf.function(step_fn, jit_compile=True) self.strategy.run(step_fn, args=(next(iterator),)) def train_loop_end(self): diff --git a/official/vision/image_classification/resnet/tfhub_export.py b/official/vision/image_classification/resnet/tfhub_export.py index ff1f124a1d67c93b9deee453a23cf71133bb6434..3f79a791304a8092bd5af808693a156d027a6ed1 100644 --- a/official/vision/image_classification/resnet/tfhub_export.py +++ b/official/vision/image_classification/resnet/tfhub_export.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """A script to export TF-Hub SavedModel.""" from __future__ import absolute_import @@ -21,6 +21,7 @@ from __future__ import print_function import os +# Import libraries from absl import app from absl import flags diff --git a/official/vision/image_classification/test_utils.py b/official/vision/image_classification/test_utils.py index a6dc91dc775ce25950a8918450548c19992eb2c4..8d7180c9d4e10c3241c4d6dd31d2cd013439df7a 100644 --- a/official/vision/image_classification/test_utils.py +++ b/official/vision/image_classification/test_utils.py @@ -1,4 +1,4 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,28 +11,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Test utilities for image classification tasks.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.keras import backend -from tensorflow.python.keras import layers -from tensorflow.python.keras import models +import tensorflow as tf def trivial_model(num_classes): """Trivial model for ImageNet dataset.""" input_shape = (224, 224, 3) - img_input = layers.Input(shape=input_shape) + img_input = tf.keras.layers.Input(shape=input_shape) - x = layers.Lambda(lambda x: backend.reshape(x, [-1, 224 * 224 * 3]), - name='reshape')(img_input) - x = layers.Dense(1, name='fc1')(x) - x = layers.Dense(num_classes, name='fc1000')(x) - x = layers.Activation('softmax', dtype='float32')(x) + x = tf.keras.layers.Lambda( + lambda x: tf.keras.backend.reshape(x, [-1, 224 * 224 * 3]), + name='reshape')(img_input) + x = tf.keras.layers.Dense(1, name='fc1')(x) + x = tf.keras.layers.Dense(num_classes, name='fc1000')(x) + x = tf.keras.layers.Activation('softmax', dtype='float32')(x) - return models.Model(img_input, x, name='trivial') + return tf.keras.models.Model(img_input, x, name='trivial') diff --git a/official/vision/keras_cv/LICENSE b/official/vision/keras_cv/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0b1ba4429805c3f80ea21528070e0791c484021b --- /dev/null +++ b/official/vision/keras_cv/LICENSE @@ -0,0 +1,203 @@ +Copyright 2020 The TensorFlow Authors. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2015, The TensorFlow Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/official/vision/keras_cv/README.md b/official/vision/keras_cv/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1132d521cd78605417b4c5a301d61d790a0410e5 --- /dev/null +++ b/official/vision/keras_cv/README.md @@ -0,0 +1,13 @@ +# keras-cv + +## Losses + +* [FocalLoss](losses/focal_loss.py) implements Focal loss as described in + ["Focal Loss for Dense Object Detection"](https://arxiv.org/abs/1708.02002). + + +## Ops + +Ops are used in data pipeline for pre-compute labels, weights. + +* [IOUSimilarity](ops/iou_similarity.py) implements Intersection-Over-Union. diff --git a/official/vision/keras_cv/__init__.py b/official/vision/keras_cv/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6d448259a474eaee1b54d332df0e658221f21587 --- /dev/null +++ b/official/vision/keras_cv/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-CV package definition.""" +# pylint: disable=wildcard-import +from official.vision.keras_cv import layers +from official.vision.keras_cv import losses +from official.vision.keras_cv import metrics +from official.vision.keras_cv import ops diff --git a/official/vision/keras_cv/contributing.md b/official/vision/keras_cv/contributing.md new file mode 100644 index 0000000000000000000000000000000000000000..d9efe9b0691c8fdb580513d52d6a35271c096bbb --- /dev/null +++ b/official/vision/keras_cv/contributing.md @@ -0,0 +1,21 @@ +## Contributing to KerasCV + +Patches to KerasCV are welcome! + +The source-of-truth repository lives under +[TF Model Garden Vision](https://github.com/tensorflow/models/official/vision/keras_cv), +and is mirrored as a read-only repository under +[keras-team/keras-cv](https://github.com/keras-team/keras-cv). +Contributions should be made as PRs to the TF Model Garden repository. +This is to ensure the codebase is rigorously tested with state-of-art models +on different accelerators. +In the long run, we will move development to the current repository `keras-team/keras-cv`. + +## :heavy_check_mark: Contributor checklist + +1. Ensure you have signed the [Contributor License Agreement](https://cla.developers.google.com/about/google-individual?csw=1). + * All code contributors are required to sign a Contributor License Agreement. + * Please read this [troubleshooting guide](Contributor-License-Agreements#troubleshooting-clas) + if you encounter an issue. +2. Please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute). +3. Check if your changes are consistent with the [TensorFlow coding style](https://www.tensorflow.org/community/contribute/code_style). diff --git a/official/vision/keras_cv/layers/__init__.py b/official/vision/keras_cv/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db606e5ada05387059ccbdcd5774e3c76484b0ec --- /dev/null +++ b/official/vision/keras_cv/layers/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-CV layers package definition.""" +from official.vision.keras_cv.layers.deeplab import SpatialPyramidPooling diff --git a/official/vision/keras_cv/layers/deeplab.py b/official/vision/keras_cv/layers/deeplab.py new file mode 100644 index 0000000000000000000000000000000000000000..40b820c20862aa56cb3194d5d110120ac62dfaf7 --- /dev/null +++ b/official/vision/keras_cv/layers/deeplab.py @@ -0,0 +1,193 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Layers for DeepLabV3.""" + +import tensorflow as tf + + +@tf.keras.utils.register_keras_serializable(package='keras_cv') +class SpatialPyramidPooling(tf.keras.layers.Layer): + """Implements the Atrous Spatial Pyramid Pooling. + + Reference: + [Rethinking Atrous Convolution for Semantic Image Segmentation]( + https://arxiv.org/pdf/1706.05587.pdf) + """ + + def __init__( + self, + output_channels, + dilation_rates, + pool_kernel_size=None, + use_sync_bn=False, + batchnorm_momentum=0.99, + batchnorm_epsilon=0.001, + activation='relu', + dropout=0.5, + kernel_initializer='glorot_uniform', + kernel_regularizer=None, + interpolation='bilinear', + **kwargs): + """Initializes `SpatialPyramidPooling`. + + Args: + output_channels: Number of channels produced by SpatialPyramidPooling. + dilation_rates: A list of integers for parallel dilated conv. + pool_kernel_size: A list of integers or None. If None, global average + pooling is applied, otherwise an average pooling of pool_kernel_size + is applied. + use_sync_bn: A bool, whether or not to use sync batch normalization. + batchnorm_momentum: A float for the momentum in BatchNorm. Defaults to + 0.99. + batchnorm_epsilon: A float for the epsilon value in BatchNorm. Defaults to + 0.001. + activation: A `str` for type of activation to be used. Defaults to 'relu'. + dropout: A float for the dropout rate before output. Defaults to 0.5. + kernel_initializer: Kernel initializer for conv layers. Defaults to + `glorot_uniform`. + kernel_regularizer: Kernel regularizer for conv layers. Defaults to None. + interpolation: The interpolation method for upsampling. Defaults to + `bilinear`. + **kwargs: Other keyword arguments for the layer. + """ + super(SpatialPyramidPooling, self).__init__(**kwargs) + + self.output_channels = output_channels + self.dilation_rates = dilation_rates + self.use_sync_bn = use_sync_bn + self.batchnorm_momentum = batchnorm_momentum + self.batchnorm_epsilon = batchnorm_epsilon + self.activation = activation + self.dropout = dropout + self.kernel_initializer = tf.keras.initializers.get(kernel_initializer) + self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) + self.interpolation = interpolation + self.input_spec = tf.keras.layers.InputSpec(ndim=4) + self.pool_kernel_size = pool_kernel_size + + def build(self, input_shape): + height = input_shape[1] + width = input_shape[2] + channels = input_shape[3] + + self.aspp_layers = [] + + if self.use_sync_bn: + bn_op = tf.keras.layers.experimental.SyncBatchNormalization + else: + bn_op = tf.keras.layers.BatchNormalization + + if tf.keras.backend.image_data_format() == 'channels_last': + bn_axis = -1 + else: + bn_axis = 1 + + conv_sequential = tf.keras.Sequential([ + tf.keras.layers.Conv2D( + filters=self.output_channels, kernel_size=(1, 1), + kernel_initializer=self.kernel_initializer, + kernel_regularizer=self.kernel_regularizer, + use_bias=False), + bn_op( + axis=bn_axis, + momentum=self.batchnorm_momentum, + epsilon=self.batchnorm_epsilon), + tf.keras.layers.Activation(self.activation) + ]) + self.aspp_layers.append(conv_sequential) + + for dilation_rate in self.dilation_rates: + conv_sequential = tf.keras.Sequential([ + tf.keras.layers.Conv2D( + filters=self.output_channels, kernel_size=(3, 3), + padding='same', kernel_regularizer=self.kernel_regularizer, + kernel_initializer=self.kernel_initializer, + dilation_rate=dilation_rate, use_bias=False), + bn_op(axis=bn_axis, momentum=self.batchnorm_momentum, + epsilon=self.batchnorm_epsilon), + tf.keras.layers.Activation(self.activation)]) + self.aspp_layers.append(conv_sequential) + + if self.pool_kernel_size is None: + pool_sequential = tf.keras.Sequential([ + tf.keras.layers.GlobalAveragePooling2D(), + tf.keras.layers.Reshape((1, 1, channels))]) + else: + pool_sequential = tf.keras.Sequential([ + tf.keras.layers.AveragePooling2D(self.pool_kernel_size)]) + + pool_sequential.add( + tf.keras.Sequential([ + tf.keras.layers.Conv2D( + filters=self.output_channels, + kernel_size=(1, 1), + kernel_initializer=self.kernel_initializer, + kernel_regularizer=self.kernel_regularizer, + use_bias=False), + bn_op( + axis=bn_axis, + momentum=self.batchnorm_momentum, + epsilon=self.batchnorm_epsilon), + tf.keras.layers.Activation(self.activation), + tf.keras.layers.experimental.preprocessing.Resizing( + height, + width, + interpolation=self.interpolation, + dtype=tf.float32) + ])) + + self.aspp_layers.append(pool_sequential) + + self.projection = tf.keras.Sequential([ + tf.keras.layers.Conv2D( + filters=self.output_channels, kernel_size=(1, 1), + kernel_initializer=self.kernel_initializer, + kernel_regularizer=self.kernel_regularizer, + use_bias=False), + bn_op( + axis=bn_axis, + momentum=self.batchnorm_momentum, + epsilon=self.batchnorm_epsilon), + tf.keras.layers.Activation(self.activation), + tf.keras.layers.Dropout(rate=self.dropout)]) + + def call(self, inputs, training=None): + if training is None: + training = tf.keras.backend.learning_phase() + result = [] + for layer in self.aspp_layers: + result.append(tf.cast(layer(inputs, training=training), inputs.dtype)) + result = tf.concat(result, axis=-1) + result = self.projection(result, training=training) + return result + + def get_config(self): + config = { + 'output_channels': self.output_channels, + 'dilation_rates': self.dilation_rates, + 'pool_kernel_size': self.pool_kernel_size, + 'use_sync_bn': self.use_sync_bn, + 'batchnorm_momentum': self.batchnorm_momentum, + 'batchnorm_epsilon': self.batchnorm_epsilon, + 'activation': self.activation, + 'dropout': self.dropout, + 'kernel_initializer': tf.keras.initializers.serialize( + self.kernel_initializer), + 'kernel_regularizer': tf.keras.regularizers.serialize( + self.kernel_regularizer), + 'interpolation': self.interpolation, + } + base_config = super(SpatialPyramidPooling, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/official/vision/keras_cv/layers/deeplab_test.py b/official/vision/keras_cv/layers/deeplab_test.py new file mode 100644 index 0000000000000000000000000000000000000000..858382ebf1b52ecbadb5bbf1605631f39f4b6f0e --- /dev/null +++ b/official/vision/keras_cv/layers/deeplab_test.py @@ -0,0 +1,53 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for ASPP.""" + +import tensorflow as tf + +from tensorflow.python.keras import keras_parameterized +from official.vision.keras_cv.layers import deeplab + + +@keras_parameterized.run_all_keras_modes +class DeeplabTest(keras_parameterized.TestCase): + + @keras_parameterized.parameterized.parameters( + (None,), + ([32, 32],), + ) + def test_aspp(self, pool_kernel_size): + inputs = tf.keras.Input(shape=(64, 64, 128), dtype=tf.float32) + layer = deeplab.SpatialPyramidPooling(output_channels=256, + dilation_rates=[6, 12, 18], + pool_kernel_size=None) + output = layer(inputs) + self.assertAllEqual([None, 64, 64, 256], output.shape) + + def test_aspp_invalid_shape(self): + inputs = tf.keras.Input(shape=(64, 64), dtype=tf.float32) + layer = deeplab.SpatialPyramidPooling(output_channels=256, + dilation_rates=[6, 12, 18]) + with self.assertRaises(ValueError): + _ = layer(inputs) + + def test_config_with_custom_name(self): + layer = deeplab.SpatialPyramidPooling(256, [5], name='aspp') + config = layer.get_config() + layer_1 = deeplab.SpatialPyramidPooling.from_config(config) + self.assertEqual(layer_1.name, layer.name) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/keras_cv/losses/__init__.py b/official/vision/keras_cv/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dd95f0a0249a696516a9cdcc2271d922fbf27d50 --- /dev/null +++ b/official/vision/keras_cv/losses/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-CV layers package definition.""" +from official.vision.keras_cv.losses.focal_loss import FocalLoss +from official.vision.keras_cv.losses.loss_utils import multi_level_flatten diff --git a/official/vision/keras_cv/losses/focal_loss.py b/official/vision/keras_cv/losses/focal_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..7241d9ae261e6644bb973e85587a3f6de535f603 --- /dev/null +++ b/official/vision/keras_cv/losses/focal_loss.py @@ -0,0 +1,84 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Losses used for detection models.""" + +import tensorflow as tf + + +class FocalLoss(tf.keras.losses.Loss): + """Implements a Focal loss for classification problems. + + Reference: + [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002). + """ + + def __init__(self, + alpha, + gamma, + reduction=tf.keras.losses.Reduction.AUTO, + name=None): + """Initializes `FocalLoss`. + + Args: + alpha: The `alpha` weight factor for binary class imbalance. + gamma: The `gamma` focusing parameter to re-weight loss. + reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all cases + this defaults to `SUM_OVER_BATCH_SIZE`. When used with + `tf.distribute.Strategy`, outside of built-in training loops such as + `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) for + more details. + name: Optional name for the op. Defaults to 'retinanet_class_loss'. + """ + self._alpha = alpha + self._gamma = gamma + super(FocalLoss, self).__init__(reduction=reduction, name=name) + + def call(self, y_true, y_pred): + """Invokes the `FocalLoss`. + + Args: + y_true: A tensor of size [batch, num_anchors, num_classes] + y_pred: A tensor of size [batch, num_anchors, num_classes] + + Returns: + Summed loss float `Tensor`. + """ + with tf.name_scope('focal_loss'): + y_true = tf.cast(y_true, dtype=tf.float32) + y_pred = tf.cast(y_pred, dtype=tf.float32) + positive_label_mask = tf.equal(y_true, 1.0) + cross_entropy = ( + tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)) + probs = tf.sigmoid(y_pred) + probs_gt = tf.where(positive_label_mask, probs, 1.0 - probs) + # With small gamma, the implementation could produce NaN during back prop. + modulator = tf.pow(1.0 - probs_gt, self._gamma) + loss = modulator * cross_entropy + weighted_loss = tf.where(positive_label_mask, self._alpha * loss, + (1.0 - self._alpha) * loss) + + return weighted_loss + + def get_config(self): + config = { + 'alpha': self._alpha, + 'gamma': self._gamma, + } + base_config = super(FocalLoss, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/official/vision/keras_cv/losses/loss_utils.py b/official/vision/keras_cv/losses/loss_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..70bc1ce5cad1d26de41a41b4d58750fb6c9c2928 --- /dev/null +++ b/official/vision/keras_cv/losses/loss_utils.py @@ -0,0 +1,42 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Losses utilities for detection models.""" + +import tensorflow as tf + + +def multi_level_flatten(multi_level_inputs, last_dim=None): + """Flattens a multi-level input. + + Args: + multi_level_inputs: Ordered Dict with level to [batch, d1, ..., dm]. + last_dim: Whether the output should be [batch_size, None], or [batch_size, + None, last_dim]. Defaults to `None`. + + Returns: + Concatenated output [batch_size, None], or [batch_size, None, dm] + """ + flattened_inputs = [] + batch_size = None + for level in multi_level_inputs.keys(): + single_input = multi_level_inputs[level] + if batch_size is None: + batch_size = single_input.shape[0] or tf.shape(single_input)[0] + if last_dim is not None: + flattened_input = tf.reshape(single_input, [batch_size, -1, last_dim]) + else: + flattened_input = tf.reshape(single_input, [batch_size, -1]) + flattened_inputs.append(flattened_input) + return tf.concat(flattened_inputs, axis=1) diff --git a/official/vision/keras_cv/metrics/__init__.py b/official/vision/keras_cv/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..507349b4ab4b35bd91b7bb77b396db96ad16eacf --- /dev/null +++ b/official/vision/keras_cv/metrics/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-CV metrics package definition.""" +from official.vision.keras_cv.metrics.iou import PerClassIoU diff --git a/official/vision/keras_cv/metrics/iou.py b/official/vision/keras_cv/metrics/iou.py new file mode 100644 index 0000000000000000000000000000000000000000..b1d94e7ea446cb292a5ea7e3722a5ab1df696138 --- /dev/null +++ b/official/vision/keras_cv/metrics/iou.py @@ -0,0 +1,129 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""IOU Metrics used for semantic segmentation models.""" + +import numpy as np +import tensorflow as tf + + +class PerClassIoU(tf.keras.metrics.Metric): + """Computes the per-class Intersection-Over-Union metric. + + Mean Intersection-Over-Union is a common evaluation metric for semantic image + segmentation, which first computes the IOU for each semantic class. + IOU is defined as follows: + IOU = true_positive / (true_positive + false_positive + false_negative). + The predictions are accumulated in a confusion matrix, weighted by + `sample_weight` and the metric is then calculated from it. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + + Example: + + >>> # cm = [[1, 1], + >>> # [1, 1]] + >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + >>> # iou = true_positives / (sum_row + sum_col - true_positives)) + >>> # result = [(1 / (2 + 2 - 1), 1 / (2 + 2 - 1)] = 0.33 + >>> m = tf.keras.metrics.MeanIoU(num_classes=2) + >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1]) + >>> m.result().numpy() + [0.33333334, 0.33333334] + + """ + + def __init__(self, num_classes, name=None, dtype=None): + """Initializes `PerClassIoU`. + + Args: + num_classes: The possible number of labels the prediction task can have. + This value must be provided, since a confusion matrix of dimension = + [num_classes, num_classes] will be allocated. + name: (Optional) string name of the metric instance. + dtype: (Optional) data type of the metric result. + + """ + + super(PerClassIoU, self).__init__(name=name, dtype=dtype) + self.num_classes = num_classes + + # Variable to accumulate the predictions in the confusion matrix. + self.total_cm = self.add_weight( + 'total_confusion_matrix', + shape=(num_classes, num_classes), + initializer=tf.compat.v1.zeros_initializer) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates the confusion matrix statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Defaults to 1. Can be a + `Tensor` whose rank is either 0, or the same rank as `y_true`, and must + be broadcastable to `y_true`. + + Returns: + IOU per class. + """ + + y_true = tf.cast(y_true, self._dtype) + y_pred = tf.cast(y_pred, self._dtype) + + # Flatten the input if its rank > 1. + if y_pred.shape.ndims > 1: + y_pred = tf.reshape(y_pred, [-1]) + + if y_true.shape.ndims > 1: + y_true = tf.reshape(y_true, [-1]) + + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, self._dtype) + if sample_weight.shape.ndims > 1: + sample_weight = tf.reshape(sample_weight, [-1]) + + # Accumulate the prediction to current confusion matrix. + current_cm = tf.math.confusion_matrix( + y_true, + y_pred, + self.num_classes, + weights=sample_weight, + dtype=self._dtype) + return self.total_cm.assign_add(current_cm) + + def result(self): + """Compute the mean intersection-over-union via the confusion matrix.""" + sum_over_row = tf.cast( + tf.reduce_sum(self.total_cm, axis=0), dtype=self._dtype) + sum_over_col = tf.cast( + tf.reduce_sum(self.total_cm, axis=1), dtype=self._dtype) + true_positives = tf.cast( + tf.linalg.tensor_diag_part(self.total_cm), dtype=self._dtype) + + # sum_over_row + sum_over_col = + # 2 * true_positives + false_positives + false_negatives. + denominator = sum_over_row + sum_over_col - true_positives + + return tf.math.divide_no_nan(true_positives, denominator) + + def reset_states(self): + tf.keras.backend.set_value( + self.total_cm, np.zeros((self.num_classes, self.num_classes))) + + def get_config(self): + config = {'num_classes': self.num_classes} + base_config = super(PerClassIoU, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/official/vision/keras_cv/metrics/iou_test.py b/official/vision/keras_cv/metrics/iou_test.py new file mode 100644 index 0000000000000000000000000000000000000000..20c2aa39970cdd453636e56984d70497460e00a5 --- /dev/null +++ b/official/vision/keras_cv/metrics/iou_test.py @@ -0,0 +1,99 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Keras metrics functions.""" + +import tensorflow as tf + +from official.vision.keras_cv.metrics import iou + + +class MeanIoUTest(tf.test.TestCase): + + def test_config(self): + m_obj = iou.PerClassIoU(num_classes=2, name='per_class_iou') + self.assertEqual(m_obj.name, 'per_class_iou') + self.assertEqual(m_obj.num_classes, 2) + + m_obj2 = iou.PerClassIoU.from_config(m_obj.get_config()) + self.assertEqual(m_obj2.name, 'per_class_iou') + self.assertEqual(m_obj2.num_classes, 2) + + def test_unweighted(self): + y_pred = [0, 1, 0, 1] + y_true = [0, 0, 1, 1] + + m_obj = iou.PerClassIoU(num_classes=2) + + result = m_obj(y_true, y_pred) + + # cm = [[1, 1], + # [1, 1]] + # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = [1 / (2 + 2 - 1), 1 / (2 + 2 - 1)] + self.assertAllClose(expected_result, result, atol=1e-3) + + def test_weighted(self): + y_pred = tf.constant([0, 1, 0, 1], dtype=tf.float32) + y_true = tf.constant([0, 0, 1, 1]) + sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1]) + + m_obj = iou.PerClassIoU(num_classes=2) + + result = m_obj(y_true, y_pred, sample_weight=sample_weight) + + # cm = [[0.2, 0.3], + # [0.4, 0.1]] + # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = [0.2 / (0.6 + 0.5 - 0.2), 0.1 / (0.4 + 0.5 - 0.1)] + self.assertAllClose(expected_result, result, atol=1e-3) + + def test_multi_dim_input(self): + y_pred = tf.constant([[0, 1], [0, 1]], dtype=tf.float32) + y_true = tf.constant([[0, 0], [1, 1]]) + sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]]) + + m_obj = iou.PerClassIoU(num_classes=2) + + result = m_obj(y_true, y_pred, sample_weight=sample_weight) + + # cm = [[0.2, 0.3], + # [0.4, 0.1]] + # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = [0.2 / (0.6 + 0.5 - 0.2), 0.1 / (0.4 + 0.5 - 0.1)] + self.assertAllClose(expected_result, result, atol=1e-3) + + def test_zero_valid_entries(self): + m_obj = iou.PerClassIoU(num_classes=2) + self.assertAllClose(m_obj.result(), [0, 0], atol=1e-3) + + def test_zero_and_non_zero_entries(self): + y_pred = tf.constant([1], dtype=tf.float32) + y_true = tf.constant([1]) + + m_obj = iou.PerClassIoU(num_classes=2) + result = m_obj(y_true, y_pred) + + # cm = [[0, 0], + # [0, 1]] + # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1] + # iou = true_positives / (sum_row + sum_col - true_positives)) + expected_result = [0, 1 / (1 + 1 - 1)] + self.assertAllClose(expected_result, result, atol=1e-3) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/keras_cv/ops/__init__.py b/official/vision/keras_cv/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2fc6dcd8a5f4ecb8ff565fe43a86b772ae16dc72 --- /dev/null +++ b/official/vision/keras_cv/ops/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Keras-CV layers package definition.""" +from official.vision.keras_cv.ops.anchor_generator import AnchorGenerator +from official.vision.keras_cv.ops.box_matcher import BoxMatcher +from official.vision.keras_cv.ops.iou_similarity import IouSimilarity +from official.vision.keras_cv.ops.target_gather import TargetGather diff --git a/official/vision/keras_cv/ops/anchor_generator.py b/official/vision/keras_cv/ops/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..c15b178da09913cd080bf54ccd2ee261f17751d6 --- /dev/null +++ b/official/vision/keras_cv/ops/anchor_generator.py @@ -0,0 +1,182 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Multi scale anchor generator definition.""" + +import tensorflow as tf + + +# (TODO/tanzheny): consider having customized anchor offset. +class _SingleAnchorGenerator: + """Utility to generate anchors for a single feature map. + + Example: + ```python + anchor_gen = _SingleAnchorGenerator(32, [.5, 1., 2.], stride=16) + anchors = anchor_gen([512, 512, 3]) + ``` + """ + + def __init__(self, + anchor_size, + scales, + aspect_ratios, + stride, + clip_boxes=False): + """Constructs single scale anchor. + + Args: + anchor_size: A single int represents the base anchor size. The anchor + height will be `anchor_size / sqrt(aspect_ratio)`, anchor width will be + `anchor_size * sqrt(aspect_ratio)`. + scales: A list/tuple, or a list/tuple of a list/tuple of positive + floats representing the actual anchor size to the base `anchor_size`. + aspect_ratios: a list/tuple of positive floats representing the ratio of + anchor width to anchor height. + stride: A single int represents the anchor stride size between center of + each anchor. + clip_boxes: Boolean to represent whether the anchor coordinates should be + clipped to the image size. Defaults to `True`. + Input shape: the size of the image, `[H, W, C]` + Output shape: the size of anchors, `[(H / stride) * (W / stride), 4]` + """ + self.anchor_size = anchor_size + self.scales = scales + self.aspect_ratios = aspect_ratios + self.stride = stride + self.clip_boxes = clip_boxes + + def __call__(self, image_size): + image_height = tf.cast(image_size[0], tf.float32) + image_width = tf.cast(image_size[1], tf.float32) + + k = len(self.scales) * len(self.aspect_ratios) + aspect_ratios_sqrt = tf.cast(tf.sqrt(self.aspect_ratios), dtype=tf.float32) + anchor_size = tf.cast(self.anchor_size, tf.float32) + + # [K] + anchor_heights = [] + anchor_widths = [] + for scale in self.scales: + anchor_size_t = anchor_size * scale + anchor_height = anchor_size_t / aspect_ratios_sqrt + anchor_width = anchor_size_t * aspect_ratios_sqrt + anchor_heights.append(anchor_height) + anchor_widths.append(anchor_width) + anchor_heights = tf.concat(anchor_heights, axis=0) + anchor_widths = tf.concat(anchor_widths, axis=0) + half_anchor_heights = tf.reshape(0.5 * anchor_heights, [1, 1, k]) + half_anchor_widths = tf.reshape(0.5 * anchor_widths, [1, 1, k]) + + stride = tf.cast(self.stride, tf.float32) + # [W] + cx = tf.range(0.5 * stride, image_width, stride) + # [H] + cy = tf.range(0.5 * stride, image_height, stride) + # [H, W] + cx_grid, cy_grid = tf.meshgrid(cx, cy) + # [H, W, 1] + cx_grid = tf.expand_dims(cx_grid, axis=-1) + cy_grid = tf.expand_dims(cy_grid, axis=-1) + + # [H, W, K, 1] + y_min = tf.expand_dims(cy_grid - half_anchor_heights, axis=-1) + y_max = tf.expand_dims(cy_grid + half_anchor_heights, axis=-1) + x_min = tf.expand_dims(cx_grid - half_anchor_widths, axis=-1) + x_max = tf.expand_dims(cx_grid + half_anchor_widths, axis=-1) + + if self.clip_boxes: + y_min = tf.maximum(tf.minimum(y_min, image_height), 0.) + y_max = tf.maximum(tf.minimum(y_max, image_height), 0.) + x_min = tf.maximum(tf.minimum(x_min, image_width), 0.) + x_max = tf.maximum(tf.minimum(x_max, image_width), 0.) + + # [H, W, K, 4] + result = tf.concat([y_min, x_min, y_max, x_max], axis=-1) + shape = result.shape.as_list() + # [H, W, K * 4] + return tf.reshape(result, [shape[0], shape[1], shape[2] * shape[3]]) + + +class AnchorGenerator(): + """Utility to generate anchors for a multiple feature maps. + + Example: + ```python + anchor_gen = AnchorGenerator([32, 64], [.5, 1., 2.], + strides=[16, 32]) + anchors = anchor_gen([512, 512, 3]) + ``` + + """ + + def __init__(self, + anchor_sizes, + scales, + aspect_ratios, + strides, + clip_boxes=False): + """Constructs multiscale anchors. + + Args: + anchor_sizes: A list of int represents the anchor size for each scale. The + anchor height will be `anchor_size / sqrt(aspect_ratio)`, anchor width + will be `anchor_size * sqrt(aspect_ratio)` for each scale. + scales: A list/tuple, or a list/tuple of a list/tuple of positive + floats representing the actual anchor size to the base `anchor_size`. + aspect_ratios: A list/tuple, or a list/tuple of a list/tuple of positive + floats representing the ratio of anchor width to anchor height. + strides: A list/tuple of ints represent the anchor stride size between + center of anchors at each scale. + clip_boxes: Boolean to represents whether the anchor coordinates should be + clipped to the image size. Defaults to `False`. + Input shape: the size of the image, `[H, W, C]` + Output shape: the size of anchors concat on each level, `[(H / + strides) * (W / strides), K * 4]` + """ + # aspect_ratio is a single list that is the same across all levels. + aspect_ratios = maybe_map_structure_for_anchor(aspect_ratios, anchor_sizes) + scales = maybe_map_structure_for_anchor(scales, anchor_sizes) + if isinstance(anchor_sizes, dict): + self.anchor_generators = {} + for k in anchor_sizes.keys(): + self.anchor_generators[k] = _SingleAnchorGenerator( + anchor_sizes[k], scales[k], aspect_ratios[k], strides[k], + clip_boxes) + elif isinstance(anchor_sizes, (list, tuple)): + self.anchor_generators = [] + for anchor_size, scale_list, ar_list, stride in zip( + anchor_sizes, scales, aspect_ratios, strides): + self.anchor_generators.append( + _SingleAnchorGenerator(anchor_size, scale_list, ar_list, stride, + clip_boxes)) + + def __call__(self, image_size): + anchor_generators = tf.nest.flatten(self.anchor_generators) + results = [anchor_gen(image_size) for anchor_gen in anchor_generators] + return tf.nest.pack_sequence_as(self.anchor_generators, results) + + +def maybe_map_structure_for_anchor(params, anchor_sizes): + """broadcast the params to match anchor_sizes.""" + if all(isinstance(param, (int, float)) for param in params): + if isinstance(anchor_sizes, (tuple, list)): + return [params] * len(anchor_sizes) + elif isinstance(anchor_sizes, dict): + return tf.nest.map_structure(lambda _: params, anchor_sizes) + else: + raise ValueError("the structure of `anchor_sizes` must be a tuple, " + "list, or dict, given {}".format(anchor_sizes)) + else: + return params diff --git a/official/vision/keras_cv/ops/anchor_generator_test.py b/official/vision/keras_cv/ops/anchor_generator_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5de6cb905c2fb01b1110638bc02d25b16f2d4e1e --- /dev/null +++ b/official/vision/keras_cv/ops/anchor_generator_test.py @@ -0,0 +1,137 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for anchor_generator.py.""" + +from absl.testing import parameterized +import tensorflow as tf +from official.vision.keras_cv.ops import anchor_generator + + +class AnchorGeneratorTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + # Single scale anchor. + (5, [1.0], [[[-16., -16., 48., 48.], [-16., 16., 48., 80.]], + [[16., -16., 80., 48.], [16., 16., 80., 80.]]]), + # # Multi aspect ratio anchor. + (6, [1.0, 4.0, 0.25], + [[[-32., -32., 96., 96., 0., -96., 64., 160., -96., 0., 160., 64.]]]), + ) + def testAnchorGeneration(self, level, aspect_ratios, expected_boxes): + image_size = [64, 64] + anchor_size = 2**(level + 1) + stride = 2**level + anchor_gen = anchor_generator._SingleAnchorGenerator( + anchor_size=anchor_size, + scales=[1.], + aspect_ratios=aspect_ratios, + stride=stride, + clip_boxes=False) + anchors = anchor_gen(image_size).numpy() + self.assertAllClose(expected_boxes, anchors) + + @parameterized.parameters( + # Single scale anchor. + (5, [1.0], [[[0., 0., 48., 48.], [0., 16., 48., 64.]], + [[16., 0., 64., 48.], [16., 16., 64., 64.]]]), + # # Multi aspect ratio anchor. + (6, [1.0, 4.0, 0.25 + ], [[[0., 0., 64., 64., 0., 0., 64., 64., 0., 0., 64., 64.]]]), + ) + def testAnchorGenerationClipped(self, level, aspect_ratios, expected_boxes): + image_size = [64, 64] + anchor_size = 2**(level + 1) + stride = 2**level + anchor_gen = anchor_generator._SingleAnchorGenerator( + anchor_size=anchor_size, + scales=[1.], + aspect_ratios=aspect_ratios, + stride=stride, + clip_boxes=True) + anchors = anchor_gen(image_size).numpy() + self.assertAllClose(expected_boxes, anchors) + + +class MultiScaleAnchorGeneratorTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters( + # Multi scale anchor. + (5, 6, [[1.0], [1.0]], [[-16, -16, 48, 48], [-16, 16, 48, 80], + [16, -16, 80, 48], [16, 16, 80, 80], + [-32, -32, 96, 96]]),) + def testAnchorGeneration(self, min_level, max_level, aspect_ratios, + expected_boxes): + image_size = [64, 64] + levels = range(min_level, max_level + 1) + anchor_sizes = [2**(level + 1) for level in levels] + strides = [2**level for level in levels] + anchor_gen = anchor_generator.AnchorGenerator( + anchor_sizes=anchor_sizes, + scales=[1.], + aspect_ratios=aspect_ratios, + strides=strides) + anchors = anchor_gen(image_size) + anchors = [tf.reshape(anchor, [-1, 4]) for anchor in anchors] + anchors = tf.concat(anchors, axis=0).numpy() + self.assertAllClose(expected_boxes, anchors) + + @parameterized.parameters( + # Multi scale anchor. + (5, 6, [[1.0], [1.0]], [[-16, -16, 48, 48], [-16, 16, 48, 80], + [16, -16, 80, 48], [16, 16, 80, 80], + [-32, -32, 96, 96]]),) + def testAnchorGenerationClipped(self, min_level, max_level, aspect_ratios, + expected_boxes): + image_size = [64, 64] + levels = range(min_level, max_level + 1) + anchor_sizes = [2**(level + 1) for level in levels] + strides = [2**level for level in levels] + anchor_gen = anchor_generator.AnchorGenerator( + anchor_sizes=anchor_sizes, + scales=[1.], + aspect_ratios=aspect_ratios, + strides=strides, + clip_boxes=False) + anchors = anchor_gen(image_size) + anchors = [tf.reshape(anchor, [-1, 4]) for anchor in anchors] + anchors = tf.concat(anchors, axis=0).numpy() + self.assertAllClose(expected_boxes, anchors) + + @parameterized.parameters( + # Multi scale anchor. + (5, 6, [1.0], { + '5': [[[-16., -16., 48., 48.], [-16., 16., 48., 80.]], + [[16., -16., 80., 48.], [16., 16., 80., 80.]]], + '6': [[[-32, -32, 96, 96]]] + }),) + def testAnchorGenerationDict(self, min_level, max_level, aspect_ratios, + expected_boxes): + image_size = [64, 64] + levels = range(min_level, max_level + 1) + anchor_sizes = dict((str(level), 2**(level + 1)) for level in levels) + strides = dict((str(level), 2**level) for level in levels) + anchor_gen = anchor_generator.AnchorGenerator( + anchor_sizes=anchor_sizes, + scales=[1.], + aspect_ratios=aspect_ratios, + strides=strides, + clip_boxes=False) + anchors = anchor_gen(image_size) + for k in expected_boxes.keys(): + self.assertAllClose(expected_boxes[k], anchors[k].numpy()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/keras_cv/ops/box_matcher.py b/official/vision/keras_cv/ops/box_matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..d788577d2f9701146252b52bd6ac0b738937143b --- /dev/null +++ b/official/vision/keras_cv/ops/box_matcher.py @@ -0,0 +1,191 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Box matcher implementation.""" + + +import tensorflow as tf + + +class BoxMatcher: + """Matcher based on highest value. + + This class computes matches from a similarity matrix. Each column is matched + to a single row. + + To support object detection target assignment this class enables setting both + positive_threshold (upper threshold) and negative_threshold (lower thresholds) + defining three categories of similarity which define whether examples are + positive, negative, or ignored, for example: + (1) thresholds=[negative_threshold, positive_threshold], and + indicators=[negative_value, ignore_value, positive_value]: The similarity + metrics below negative_threshold will be assigned with negative_value, + the metrics between negative_threshold and positive_threshold will be + assigned ignore_value, and the metrics above positive_threshold will be + assigned positive_value. + (2) thresholds=[negative_threshold, positive_threshold], and + indicators=[ignore_value, negative_value, positive_value]: The similarity + metric below negative_threshold will be assigned with ignore_value, + the metrics between negative_threshold and positive_threshold will be + assigned negative_value, and the metrics above positive_threshold will be + assigned positive_value. + """ + + def __init__(self, thresholds, indicators, force_match_for_each_col=False): + """Construct BoxMatcher. + + Args: + thresholds: A list of thresholds to classify boxes into + different buckets. The list needs to be sorted, and will be prepended + with -Inf and appended with +Inf. + indicators: A list of values to assign for each bucket. len(`indicators`) + must equal to len(`thresholds`) + 1. + force_match_for_each_col: If True, ensures that each column is matched to + at least one row (which is not guaranteed otherwise if the + positive_threshold is high). Defaults to False. If True, all force + matched row will be assigned to `indicators[-1]`. + + Raises: + ValueError: If `threshold` not sorted, + or len(indicators) != len(threshold) + 1 + """ + if not all([lo <= hi for (lo, hi) in zip(thresholds[:-1], thresholds[1:])]): + raise ValueError('`threshold` must be sorted, got {}'.format(thresholds)) + self.indicators = indicators + if len(indicators) != len(thresholds) + 1: + raise ValueError('len(`indicators`) must be len(`thresholds`) + 1, got ' + 'indicators {}, thresholds {}'.format( + indicators, thresholds)) + thresholds = thresholds[:] + thresholds.insert(0, -float('inf')) + thresholds.append(float('inf')) + self.thresholds = thresholds + self._force_match_for_each_col = force_match_for_each_col + + def __call__(self, similarity_matrix): + """Tries to match each column of the similarity matrix to a row. + + Args: + similarity_matrix: A float tensor of shape [N, M] representing any + similarity metric. + + Returns: + A integer tensor of shape [N] with corresponding match indices for each + of M columns, for positive match, the match result will be the + corresponding row index, for negative match, the match will be + `negative_value`, for ignored match, the match result will be + `ignore_value`. + """ + squeeze_result = False + if len(similarity_matrix.shape) == 2: + squeeze_result = True + similarity_matrix = tf.expand_dims(similarity_matrix, axis=0) + + static_shape = similarity_matrix.shape.as_list() + num_rows = static_shape[1] or tf.shape(similarity_matrix)[1] + batch_size = static_shape[0] or tf.shape(similarity_matrix)[0] + + def _match_when_rows_are_empty(): + """Performs matching when the rows of similarity matrix are empty. + + When the rows are empty, all detections are false positives. So we return + a tensor of -1's to indicate that the columns do not match to any rows. + + Returns: + matches: int32 tensor indicating the row each column matches to. + """ + with tf.name_scope('empty_gt_boxes'): + matches = tf.zeros([batch_size, num_rows], dtype=tf.int32) + match_labels = -tf.ones([batch_size, num_rows], dtype=tf.int32) + return matches, match_labels + + def _match_when_rows_are_non_empty(): + """Performs matching when the rows of similarity matrix are non empty. + + Returns: + matches: int32 tensor indicating the row each column matches to. + """ + # Matches for each column + with tf.name_scope('non_empty_gt_boxes'): + matches = tf.argmax(similarity_matrix, axis=-1, output_type=tf.int32) + + # Get logical indices of ignored and unmatched columns as tf.int64 + matched_vals = tf.reduce_max(similarity_matrix, axis=-1) + matched_indicators = tf.zeros([batch_size, num_rows], tf.int32) + + match_dtype = matched_vals.dtype + for (ind, low, high) in zip(self.indicators, self.thresholds[:-1], + self.thresholds[1:]): + low_threshold = tf.cast(low, match_dtype) + high_threshold = tf.cast(high, match_dtype) + mask = tf.logical_and( + tf.greater_equal(matched_vals, low_threshold), + tf.less(matched_vals, high_threshold)) + matched_indicators = self._set_values_using_indicator( + matched_indicators, mask, ind) + + if self._force_match_for_each_col: + # [batch_size, M], for each col (groundtruth_box), find the best + # matching row (anchor). + force_match_column_ids = tf.argmax( + input=similarity_matrix, axis=1, output_type=tf.int32) + # [batch_size, M, N] + force_match_column_indicators = tf.one_hot( + force_match_column_ids, depth=num_rows) + # [batch_size, N], for each row (anchor), find the largest column + # index for groundtruth box + force_match_row_ids = tf.argmax( + input=force_match_column_indicators, axis=1, output_type=tf.int32) + # [batch_size, N] + force_match_column_mask = tf.cast( + tf.reduce_max(force_match_column_indicators, axis=1), + tf.bool) + # [batch_size, N] + final_matches = tf.where(force_match_column_mask, force_match_row_ids, + matches) + final_matched_indicators = tf.where( + force_match_column_mask, self.indicators[-1] * + tf.ones([batch_size, num_rows], dtype=tf.int32), + matched_indicators) + return final_matches, final_matched_indicators + else: + return matches, matched_indicators + + num_gt_boxes = similarity_matrix.shape.as_list()[-1] or tf.shape( + similarity_matrix)[-1] + result_match, result_matched_indicators = tf.cond( + pred=tf.greater(num_gt_boxes, 0), + true_fn=_match_when_rows_are_non_empty, + false_fn=_match_when_rows_are_empty) + + if squeeze_result: + result_match = tf.squeeze(result_match, axis=0) + result_matched_indicators = tf.squeeze(result_matched_indicators, axis=0) + + return result_match, result_matched_indicators + + def _set_values_using_indicator(self, x, indicator, val): + """Set the indicated fields of x to val. + + Args: + x: tensor. + indicator: boolean with same shape as x. + val: scalar with value to set. + + Returns: + modified tensor. + """ + indicator = tf.cast(indicator, x.dtype) + return tf.add(tf.multiply(x, 1 - indicator), val * indicator) diff --git a/official/vision/keras_cv/ops/box_matcher_test.py b/official/vision/keras_cv/ops/box_matcher_test.py new file mode 100644 index 0000000000000000000000000000000000000000..baf4fe1136051b6bc175f9380177bab5b0e2c7ca --- /dev/null +++ b/official/vision/keras_cv/ops/box_matcher_test.py @@ -0,0 +1,78 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for box_matcher.py.""" + +import tensorflow as tf + +from official.vision.keras_cv.ops import box_matcher + + +class BoxMatcherTest(tf.test.TestCase): + + def test_box_matcher_unbatched(self): + sim_matrix = tf.constant( + [[0.04, 0, 0, 0], + [0, 0, 1., 0]], + dtype=tf.float32) + + fg_threshold = 0.5 + bg_thresh_hi = 0.2 + bg_thresh_lo = 0.0 + + matcher = box_matcher.BoxMatcher( + thresholds=[bg_thresh_lo, bg_thresh_hi, fg_threshold], + indicators=[-3, -2, -1, 1]) + match_indices, match_indicators = matcher(sim_matrix) + positive_matches = tf.greater_equal(match_indicators, 0) + negative_matches = tf.equal(match_indicators, -2) + + self.assertAllEqual( + positive_matches.numpy(), [False, True]) + self.assertAllEqual( + negative_matches.numpy(), [True, False]) + self.assertAllEqual( + match_indices.numpy(), [0, 2]) + self.assertAllEqual( + match_indicators.numpy(), [-2, 1]) + + def test_box_matcher_batched(self): + sim_matrix = tf.constant( + [[[0.04, 0, 0, 0], + [0, 0, 1., 0]]], + dtype=tf.float32) + + fg_threshold = 0.5 + bg_thresh_hi = 0.2 + bg_thresh_lo = 0.0 + + matcher = box_matcher.BoxMatcher( + thresholds=[bg_thresh_lo, bg_thresh_hi, fg_threshold], + indicators=[-3, -2, -1, 1]) + match_indices, match_indicators = matcher(sim_matrix) + positive_matches = tf.greater_equal(match_indicators, 0) + negative_matches = tf.equal(match_indicators, -2) + + self.assertAllEqual( + positive_matches.numpy(), [[False, True]]) + self.assertAllEqual( + negative_matches.numpy(), [[True, False]]) + self.assertAllEqual( + match_indices.numpy(), [[0, 2]]) + self.assertAllEqual( + match_indicators.numpy(), [[-2, 1]]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/keras_cv/ops/iou_similarity.py b/official/vision/keras_cv/ops/iou_similarity.py new file mode 100644 index 0000000000000000000000000000000000000000..beb69da73d40a781995dde50e80a8d5fae21ae16 --- /dev/null +++ b/official/vision/keras_cv/ops/iou_similarity.py @@ -0,0 +1,164 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Region Similarity Calculators.""" + +import tensorflow as tf + + +def area(box): + """Computes area of boxes. + + B: batch_size + N: number of boxes + + Args: + box: a float Tensor with [N, 4], or [B, N, 4]. + + Returns: + a float Tensor with [N], or [B, N] + """ + with tf.name_scope('Area'): + y_min, x_min, y_max, x_max = tf.split( + value=box, num_or_size_splits=4, axis=-1) + return tf.squeeze((y_max - y_min) * (x_max - x_min), axis=-1) + + +def intersection(gt_boxes, boxes): + """Compute pairwise intersection areas between boxes. + + B: batch_size + N: number of groundtruth boxes. + M: number of anchor boxes. + + Args: + gt_boxes: a float Tensor with [N, 4], or [B, N, 4] + boxes: a float Tensor with [M, 4], or [B, M, 4] + + Returns: + a float Tensor with shape [N, M] or [B, N, M] representing pairwise + intersections. + """ + with tf.name_scope('Intersection'): + y_min1, x_min1, y_max1, x_max1 = tf.split( + value=gt_boxes, num_or_size_splits=4, axis=-1) + y_min2, x_min2, y_max2, x_max2 = tf.split( + value=boxes, num_or_size_splits=4, axis=-1) + + boxes_rank = len(boxes.shape) + perm = [1, 0] if boxes_rank == 2 else [0, 2, 1] + # [N, M] or [B, N, M] + y_min_max = tf.minimum(y_max1, tf.transpose(y_max2, perm)) + y_max_min = tf.maximum(y_min1, tf.transpose(y_min2, perm)) + x_min_max = tf.minimum(x_max1, tf.transpose(x_max2, perm)) + x_max_min = tf.maximum(x_min1, tf.transpose(x_min2, perm)) + + intersect_heights = y_min_max - y_max_min + intersect_widths = x_min_max - x_max_min + zeros_t = tf.cast(0, intersect_heights.dtype) + intersect_heights = tf.maximum(zeros_t, intersect_heights) + intersect_widths = tf.maximum(zeros_t, intersect_widths) + return intersect_heights * intersect_widths + + +def iou(gt_boxes, boxes): + """Computes pairwise intersection-over-union between box collections. + + Args: + gt_boxes: a float Tensor with [N, 4]. + boxes: a float Tensor with [M, 4]. + + Returns: + a Tensor with shape [N, M] representing pairwise iou scores. + """ + with tf.name_scope('IOU'): + intersections = intersection(gt_boxes, boxes) + gt_boxes_areas = area(gt_boxes) + boxes_areas = area(boxes) + boxes_rank = len(boxes_areas.shape) + boxes_axis = 1 if (boxes_rank == 2) else 0 + gt_boxes_areas = tf.expand_dims(gt_boxes_areas, -1) + boxes_areas = tf.expand_dims(boxes_areas, boxes_axis) + unions = gt_boxes_areas + boxes_areas + unions = unions - intersections + return tf.where( + tf.equal(intersections, 0.0), tf.zeros_like(intersections), + tf.truediv(intersections, unions)) + + +class IouSimilarity: + """Class to compute similarity based on Intersection over Union (IOU) metric. + + """ + + def __init__(self, mask_val=-1): + self.mask_val = mask_val + + def __call__(self, boxes_1, boxes_2, boxes_1_masks=None, boxes_2_masks=None): + """Compute pairwise IOU similarity between ground truth boxes and anchors. + + B: batch_size + N: Number of groundtruth boxes. + M: Number of anchor boxes. + + Args: + boxes_1: a float Tensor with M or B * M boxes. + boxes_2: a float Tensor with N or B * N boxes, the rank must be less than + or equal to rank of `boxes_1`. + boxes_1_masks: a boolean Tensor with M or B * M boxes. Optional. + boxes_2_masks: a boolean Tensor with N or B * N boxes. Optional. + + Returns: + A Tensor with shape [M, N] or [B, M, N] representing pairwise + iou scores, anchor per row and groundtruth_box per colulmn. + + Input shape: + boxes_1: [N, 4], or [B, N, 4] + boxes_2: [M, 4], or [B, M, 4] + boxes_1_masks: [N, 1], or [B, N, 1] + boxes_2_masks: [M, 1], or [B, M, 1] + + Output shape: + [M, N], or [B, M, N] + """ + boxes_1_rank = len(boxes_1.shape) + boxes_2_rank = len(boxes_2.shape) + if boxes_1_rank < 2 or boxes_1_rank > 3: + raise ValueError( + '`groudtruth_boxes` must be rank 2 or 3, got {}'.format(boxes_1_rank)) + if boxes_2_rank < 2 or boxes_2_rank > 3: + raise ValueError( + '`anchors` must be rank 2 or 3, got {}'.format(boxes_2_rank)) + if boxes_1_rank < boxes_2_rank: + raise ValueError('`groundtruth_boxes` is unbatched while `anchors` is ' + 'batched is not a valid use case, got groundtruth_box ' + 'rank {}, and anchors rank {}'.format( + boxes_1_rank, boxes_2_rank)) + + result = iou(boxes_1, boxes_2) + if boxes_1_masks is None and boxes_2_masks is None: + return result + background_mask = None + mask_val_t = tf.cast(self.mask_val, result.dtype) * tf.ones_like(result) + perm = [1, 0] if boxes_2_rank == 2 else [0, 2, 1] + if boxes_1_masks is not None and boxes_2_masks is not None: + background_mask = tf.logical_or(boxes_1_masks, + tf.transpose(boxes_2_masks, perm)) + elif boxes_1_masks is not None: + background_mask = boxes_1_masks + else: + background_mask = tf.logical_or( + tf.zeros(tf.shape(boxes_2)[:-1], dtype=tf.bool), + tf.transpose(boxes_2_masks, perm)) + return tf.where(background_mask, mask_val_t, result) diff --git a/official/vision/keras_cv/ops/iou_similarity_test.py b/official/vision/keras_cv/ops/iou_similarity_test.py new file mode 100644 index 0000000000000000000000000000000000000000..137f75646a286e302924f3d1db6538fb82a5baba --- /dev/null +++ b/official/vision/keras_cv/ops/iou_similarity_test.py @@ -0,0 +1,76 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for iou_similarity.py.""" + +import tensorflow as tf + +from official.vision.keras_cv.ops import iou_similarity + + +class BoxMatcherTest(tf.test.TestCase): + + def test_similarity_unbatched(self): + boxes = tf.constant( + [ + [0, 0, 1, 1], + [5, 0, 10, 5], + ], + dtype=tf.float32) + + gt_boxes = tf.constant( + [ + [0, 0, 5, 5], + [0, 5, 5, 10], + [5, 0, 10, 5], + [5, 5, 10, 10], + ], + dtype=tf.float32) + + sim_calc = iou_similarity.IouSimilarity() + sim_matrix = sim_calc(boxes, gt_boxes) + + self.assertAllClose( + sim_matrix.numpy(), + [[0.04, 0, 0, 0], + [0, 0, 1., 0]]) + + def test_similarity_batched(self): + boxes = tf.constant( + [[ + [0, 0, 1, 1], + [5, 0, 10, 5], + ]], + dtype=tf.float32) + + gt_boxes = tf.constant( + [[ + [0, 0, 5, 5], + [0, 5, 5, 10], + [5, 0, 10, 5], + [5, 5, 10, 10], + ]], + dtype=tf.float32) + + sim_calc = iou_similarity.IouSimilarity() + sim_matrix = sim_calc(boxes, gt_boxes) + + self.assertAllClose( + sim_matrix.numpy(), + [[[0.04, 0, 0, 0], + [0, 0, 1., 0]]]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/keras_cv/ops/target_gather.py b/official/vision/keras_cv/ops/target_gather.py new file mode 100644 index 0000000000000000000000000000000000000000..e9cbbe4c62d1cd53e621bd4970a19f623a35e357 --- /dev/null +++ b/official/vision/keras_cv/ops/target_gather.py @@ -0,0 +1,103 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Definition of target gather, which gathers targets from indices.""" + +import tensorflow as tf + + +class TargetGather: + """Targer gather for dense object detector.""" + + def __call__(self, labels, match_indices, mask=None, mask_val=0.0): + """Labels anchors with ground truth inputs. + + B: batch_size + N: number of groundtruth boxes. + + Args: + labels: An integer tensor with shape [N, dims] or [B, N, ...] representing + groundtruth labels. + match_indices: An integer tensor with shape [M] or [B, M] representing + match label index. + mask: An boolean tensor with shape [M, dims] or [B, M,...] representing + match labels. + mask_val: An integer to fill in for mask. + + Returns: + target: An integer Tensor with shape [M] or [B, M] + Raises: + ValueError: If `labels` is higher than rank 3. + """ + if len(labels.shape) <= 2: + return self._gather_unbatched(labels, match_indices, mask, mask_val) + elif len(labels.shape) == 3: + return self._gather_batched(labels, match_indices, mask, mask_val) + else: + raise ValueError("`TargetGather` does not support `labels` with rank " + "larger than 3, got {}".format(len(labels.shape))) + + def _gather_unbatched(self, labels, match_indices, mask, mask_val): + """Gather based on unbatched labels and boxes.""" + num_gt_boxes = tf.shape(labels)[0] + + def _assign_when_rows_empty(): + if len(labels.shape) > 1: + mask_shape = [match_indices.shape[0], labels.shape[-1]] + else: + mask_shape = [match_indices.shape[0]] + return tf.cast(mask_val, labels.dtype) * tf.ones( + mask_shape, dtype=labels.dtype) + + def _assign_when_rows_not_empty(): + targets = tf.gather(labels, match_indices) + if mask is None: + return targets + else: + masked_targets = tf.cast(mask_val, labels.dtype) * tf.ones_like( + mask, dtype=labels.dtype) + return tf.where(mask, masked_targets, targets) + + return tf.cond(tf.greater(num_gt_boxes, 0), + _assign_when_rows_not_empty, + _assign_when_rows_empty) + + def _gather_batched(self, labels, match_indices, mask, mask_val): + """Gather based on batched labels.""" + batch_size = labels.shape[0] + if batch_size == 1: + if mask is not None: + result = self._gather_unbatched( + tf.squeeze(labels, axis=0), tf.squeeze(match_indices, axis=0), + tf.squeeze(mask, axis=0), mask_val) + else: + result = self._gather_unbatched( + tf.squeeze(labels, axis=0), tf.squeeze(match_indices, axis=0), + None, mask_val) + return tf.expand_dims(result, axis=0) + else: + indices_shape = tf.shape(match_indices) + indices_dtype = match_indices.dtype + batch_indices = (tf.expand_dims( + tf.range(indices_shape[0], dtype=indices_dtype), axis=-1) * + tf.ones([1, indices_shape[-1]], dtype=indices_dtype)) + gather_nd_indices = tf.stack( + [batch_indices, match_indices], axis=-1) + targets = tf.gather_nd(labels, gather_nd_indices) + if mask is None: + return targets + else: + masked_targets = tf.cast(mask_val, labels.dtype) * tf.ones_like( + mask, dtype=labels.dtype) + return tf.where(mask, masked_targets, targets) diff --git a/official/vision/keras_cv/ops/target_gather_test.py b/official/vision/keras_cv/ops/target_gather_test.py new file mode 100644 index 0000000000000000000000000000000000000000..cf5693b12b1092ecf88db9cca5bb6c81a6d5e508 --- /dev/null +++ b/official/vision/keras_cv/ops/target_gather_test.py @@ -0,0 +1,77 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for target_gather.py.""" + +import tensorflow as tf + +from official.vision.keras_cv.ops import target_gather + + +class TargetGatherTest(tf.test.TestCase): + + def test_target_gather_batched(self): + gt_boxes = tf.constant( + [[ + [0, 0, 5, 5], + [0, 5, 5, 10], + [5, 0, 10, 5], + [5, 5, 10, 10], + ]], + dtype=tf.float32) + gt_classes = tf.constant([[[2], [10], [3], [-1]]], dtype=tf.int32) + + labeler = target_gather.TargetGather() + + match_indices = tf.constant([[0, 2]], dtype=tf.int32) + match_indicators = tf.constant([[-2, 1]]) + mask = tf.less_equal(match_indicators, 0) + cls_mask = tf.expand_dims(mask, -1) + matched_gt_classes = labeler(gt_classes, match_indices, cls_mask) + box_mask = tf.tile(cls_mask, [1, 1, 4]) + matched_gt_boxes = labeler(gt_boxes, match_indices, box_mask) + + self.assertAllEqual( + matched_gt_classes.numpy(), [[[0], [3]]]) + self.assertAllClose( + matched_gt_boxes.numpy(), [[[0, 0, 0, 0], [5, 0, 10, 5]]]) + + def test_target_gather_unbatched(self): + gt_boxes = tf.constant( + [ + [0, 0, 5, 5], + [0, 5, 5, 10], + [5, 0, 10, 5], + [5, 5, 10, 10], + ], + dtype=tf.float32) + gt_classes = tf.constant([[2], [10], [3], [-1]], dtype=tf.int32) + + labeler = target_gather.TargetGather() + + match_indices = tf.constant([0, 2], dtype=tf.int32) + match_indicators = tf.constant([-2, 1]) + mask = tf.less_equal(match_indicators, 0) + cls_mask = tf.expand_dims(mask, -1) + matched_gt_classes = labeler(gt_classes, match_indices, cls_mask) + box_mask = tf.tile(cls_mask, [1, 4]) + matched_gt_boxes = labeler(gt_boxes, match_indices, box_mask) + + self.assertAllEqual( + matched_gt_classes.numpy(), [[0], [3]]) + self.assertAllClose( + matched_gt_boxes.numpy(), [[0, 0, 0, 0], [5, 0, 10, 5]]) + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/keras_cv/requirements.txt b/official/vision/keras_cv/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6bad10388ecb1eefd890a797d833976a5e631541 --- /dev/null +++ b/official/vision/keras_cv/requirements.txt @@ -0,0 +1,2 @@ +numpy +scipy diff --git a/official/vision/keras_cv/setup.py b/official/vision/keras_cv/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..030b22dc4064a092a72ebebfb93821be61e41f2a --- /dev/null +++ b/official/vision/keras_cv/setup.py @@ -0,0 +1,70 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Setup script.""" + +import os + +from setuptools import find_packages +from setuptools import setup + +version = '0.0.1' + + +def _get_requirements(): + """Parses requirements.txt file.""" + install_requires_tmp = [] + dependency_links_tmp = [] + with open( + os.path.join(os.path.dirname(__file__), './requirements.txt'), 'r') as f: + for line in f: + package_name = line.strip() + # Skip empty line or comments starting with "#". + if not package_name or package_name[0] == '#': + continue + if package_name.startswith('-e '): + dependency_links_tmp.append(package_name[3:].strip()) + else: + install_requires_tmp.append(package_name) + return install_requires_tmp, dependency_links_tmp + +install_requires, dependency_links = _get_requirements() + +install_requires.append('tf-nightly') +install_requires.append('tensorflow-datasets') + +setup( + name='keras-cv', + version=version, + description='Keras Computer Vision Library', + url='https://github.com/keras-team/keras-cv', + author='The Keras authors', + author_email='keras-team@google.com', + license='Apache License 2.0', + install_requires=install_requires, + classifiers=[ + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.6', + 'Operating System :: Unix', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: MacOS', + 'Intended Audience :: Science/Research', + 'Topic :: Scientific/Engineering', + 'Topic :: Software Development' + ], + packages=find_packages(exclude=('tests',)), + exclude_package_data={'': ['*_test.py',],}, + dependency_links=dependency_links, + python_requires='>=3.6', +) diff --git a/orbit/README.md b/orbit/README.md index 539c924d3062933a0070d7cc1f6b9ab45504cf40..9412860036e7540578b494631c12977ad364396e 100644 --- a/orbit/README.md +++ b/orbit/README.md @@ -1,9 +1,11 @@ -![TensorFlow Requirement: 2.x](https://img.shields.io/badge/TensorFlow%20Requirement-2.x-brightgreen) - # Orbit -Orbit is a customized training loop library built on top of Tensorflow 2. It -provides a flexible lightweight library that users can easily use or fork when -writing [customized training loop code](https://www.tensorflow.org/tutorials/distribute/custom_training) -in TF2. It intergates with `tf.distribute` seamlessly and supports running on -different device types (CPU, GPU, and TPU). +Orbit is a flexible, lightweight library designed to make it easy to write +[custom training loops][custom_training] in TensorFlow 2. Orbit handles common +model training tasks such as saving checkpoints, running model evaluations, and +setting up summary writing, while giving users full control over implementing +the inner training loop. It integrates with `tf.distribute` seamlessly and +supports running on different device types (CPU, GPU, and TPU). The core code is +intended to be easy to read and fork. + +[custom_training]: https://www.tensorflow.org/tutorials/distribute/custom_training diff --git a/orbit/__init__.py b/orbit/__init__.py index 6e7bbe53cd6328f5d5bc9387619c567b4a88b98c..a97bb719d7aaa385fe12a670b83b83707412738d 100644 --- a/orbit/__init__.py +++ b/orbit/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Orbit Authors. All Rights Reserved. +# Copyright 2021 The Orbit Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,9 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + +"""Defines exported symbols for the `orbit` package.""" from orbit import utils + from orbit.controller import Controller -from orbit.runner import * -from orbit.standard_runner import * + +from orbit.runner import AbstractEvaluator +from orbit.runner import AbstractTrainer + +from orbit.standard_runner import StandardEvaluator +from orbit.standard_runner import StandardEvaluatorOptions +from orbit.standard_runner import StandardTrainer +from orbit.standard_runner import StandardTrainerOptions diff --git a/orbit/controller.py b/orbit/controller.py index 6e2840f0470464193f30102d7e4cce9828306463..5242a7a7e42cace0c4ccde22ef7582fff98e9374 100644 --- a/orbit/controller.py +++ b/orbit/controller.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Orbit Authors. All Rights Reserved. +# Copyright 2021 The Orbit Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,105 +11,152 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""A light weight utilities to train TF2 models.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Provides a `Controller` class for managing the outer training loop.""" +import pprint import time -from typing import Callable, Optional, Text, Union + +from typing import Callable, Optional, Union from absl import logging + from orbit import runner from orbit import utils import tensorflow as tf -def _log_info(message: Text): +def _log(message: str): """Logs `message` to the `info` log, and also prints to stdout.""" logging.info(message) print(message) -def _validate_interval(interval: Optional[int], steps_per_loop: Optional[int], - interval_name: str): - if interval and steps_per_loop and (interval % steps_per_loop != 0): - raise ValueError("The {} interval ({}) must be a multiple " - "of the steps_per_loop ({})".format( - interval_name, interval, steps_per_loop)) +logging.ABSLLogger.register_frame_to_skip(__file__, _log.__name__) + + +def _format_output(output, indent=4): + """Formats `output`, either on one line, or indented across multiple lines.""" + formatted = pprint.pformat(output) + lines = formatted.splitlines() + if len(lines) == 1: + return formatted + lines = [" " * indent + line for line in lines] + return "\n" + "\n".join(lines) + +class Controller: + """Class that controls the outer loop of model training and evaluation. -class Controller(object): - """Class that facilitates training and evaluation of models.""" + Orbit divides training and evaluation into "inner" and "outer" loops. Inner + loops are implemented by users in the form of `AbstractTrainer` and + `AbstractEvaluator` subclasses, and define how to run a given number of + training or evaluation steps. The outer loop is provided by this `Controller`, + and interleaves calls to the user provided inner loops with additional actions + such as saving checkpoints, running evaluations, and writing summaries + (depending on the arguments passed to `Controller.__init__` and the method + being called). + + There are four top-level "outer loops" provided: + + - `train`, which trains until a specified number of global steps is reached; + - `evaluate`, for one-off model evaluation; + - `train_and_evaluate`, for interleaved training and evaluation; + - `evaluate_continuously`, for monitoring a given directory and running + evaluations on new model checkpoints. + + While this class attempts to provide out-of-the-box solutions for common + training and evaluation use cases, the internal details and method + implementations are also intended to be simple enough to make subclassing or + other custom outer loop implementations easy to achieve. + """ def __init__( self, - strategy: Optional[tf.distribute.Strategy] = None, + *, # Makes all args keyword only. + global_step: tf.Variable, trainer: Optional[runner.AbstractTrainer] = None, evaluator: Optional[runner.AbstractEvaluator] = None, - global_step: Optional[tf.Variable] = None, + strategy: Optional[tf.distribute.Strategy] = None, # Train related steps_per_loop: Optional[int] = None, checkpoint_manager: Optional[tf.train.CheckpointManager] = None, # Summary related summary_interval: Optional[int] = None, - summary_dir: Optional[Text] = None, + summary_dir: Optional[str] = None, # Evaluation related - eval_summary_dir: Optional[Text] = None): - """Constructs a `Controller` instance. + eval_summary_dir: Optional[str] = None): + """Initializes a `Controller` instance. + + Note that if `checkpoint_manager` is provided and there are checkpoints in + the associated model directory, the model will be restored from the most + recent checkpoint during this `__init__` method. Args: - strategy: An instance of `tf.distribute.Strategy`. - trainer: An instance of `orbit.AbstractTrainer`, which represents model - training details. - evaluator: An instance of `orbit.AbstractEvaluator`, which represents - model evaluation details. - global_step: An integer `tf.Variable` indicating the global training step - number. Usually this can be obtained from `iterations` property of the - model's optimizer (e.g. `self.optimizer.iterations`), or users can - create their own global step variable as well. If the users create their - own global step variable, it is recommended to create the `tf.Variable` - inside strategy scope, and with - `aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA`. - steps_per_loop: The number of steps to run in each "inner loop" of - training (passed to the `num_steps` parameter of `trainer.train`). - checkpoint_manager: An instance of `tf.train.CheckpointManager`. + global_step: An integer `tf.Variable` storing the global training step + number. Usually this can be obtained from the `iterations` property of + the model's optimizer (e.g. `trainer.optimizer.iterations`). In cases + where multiple optimizers are used, or if one model "step" corresponds + to more than one update to model parameters, users can create and + increment their own global step variable as well. In this case it is + recommended to create the `tf.Variable` inside the distribution strategy + scope, with `aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA` (see + also `orbit.utils.create_global_step()`). + trainer: An instance of `orbit.AbstractTrainer`, which implements the + inner training loop. + evaluator: An instance of `orbit.AbstractEvaluator`, which implements + evaluation. + strategy: An instance of `tf.distribute.Strategy`. If not provided, the + strategy will be initialized from the current in-scope strategy using + `tf.distribute.get_strategy()`. + steps_per_loop: The number of steps to run in each inner loop of training + (passed as the `num_steps` parameter of `trainer.train`). + checkpoint_manager: An instance of `tf.train.CheckpointManager`. If + provided and there are checkpoints in the associated model directory, + the model will be restored from the most recent checkpoint inside this + `__init__` method. If not provided, the `Controller` will not + automatically save to or restore from checkpoints. summary_interval: Step interval for training summaries. Note that this - argument only applies to the summaries inside `trainer.train` function. - Summaries outside like "steps_per_second" and outputs from - `trainer.train` function will always be enabled. If set, the value - should be divisible by steps_per_loop. - summary_dir: The directory to restore and write checkpoints and summaries. - If None, it will be set to `checkpoint_manager.directory`. - eval_summary_dir: The directory to write eval summaries. If None, it will - be set to `summary_dir`. + argument only applies to `tf.summary` calls inside the `trainer.train` + function. Summaries written by the `Controller` (specifically + "steps_per_second" and output from the `trainer.train` method) will + always be enabled unless the `summary_dir` parameter is `None`. If set, + the value must be divisible by `steps_per_loop`. + summary_dir: The directory to write summaries to. To use the same + directory as for checkpointing, pass `checkpoint_manager.directory`. If + `None`, no training summaries will be written. + eval_summary_dir: The directory to write eval summaries to. If `None`, it + will be set to `summary_dir`. If both `summary_dir` and + `eval_summary_dir` are `None`, no eval summaries will be written. Raises: - ValueError: If both `trainer` and `evaluator` are None. + ValueError: If both `trainer` and `evaluator` are `None`. ValueError: If `steps_per_loop` is not a positive integer. - ValueError: If `summary_interval` is not a positive integer or it cannot - be divisible by `steps_per_loop`. + ValueError: If `summary_interval` is not a positive integer or is not + divisible by `steps_per_loop`. """ if trainer is None and evaluator is None: - raise ValueError("`trainer` and `evaluator` should not both be None") - + raise ValueError("`trainer` and `evaluator` should not both be `None`.") if trainer is not None: if steps_per_loop is None: - raise ValueError("`steps_per_loop` is required when `trainer` is " - "provided.") - - if not isinstance(steps_per_loop, int) or steps_per_loop < 1: - raise ValueError("`steps_per_loop` should be a positive integer") + raise ValueError( + "`steps_per_loop` is required when `trainer` is provided.") + elif not isinstance(steps_per_loop, int) or steps_per_loop < 1: + raise ValueError( + f"`steps_per_loop` ({steps_per_loop}) must be a positive integer.") if summary_interval is not None: if summary_interval <= 0: - raise ValueError("`summary_interval` should be larger than 0") - _validate_interval( - summary_interval, steps_per_loop, interval_name="summary") + raise ValueError( + f"`summary_interval` ({summary_interval}) must be larger than 0.") + elif summary_interval % steps_per_loop != 0: + raise ValueError( + f"`summary interval` ({summary_interval}) must be a multiple " + f"of `steps_per_loop` ({steps_per_loop}).") + + if not isinstance(global_step, tf.Variable): + raise ValueError("`global_step` must be a `tf.Variable`.") self.trainer = trainer self.evaluator = evaluator @@ -119,9 +166,6 @@ class Controller(object): self.global_step = global_step self.checkpoint_manager = checkpoint_manager - if summary_dir is None and checkpoint_manager: - summary_dir = checkpoint_manager.directory - if self.trainer is not None: self.step_timer = None self.steps_per_loop = steps_per_loop @@ -129,7 +173,6 @@ class Controller(object): self.summary_manager = utils.SummaryManager( summary_dir, tf.summary.scalar, global_step=self.global_step) - eval_summary_writer = None if self.evaluator is not None: eval_summary_dir = eval_summary_dir or summary_dir if eval_summary_dir == summary_dir and self.trainer is not None: @@ -140,171 +183,141 @@ class Controller(object): self.eval_summary_manager = utils.SummaryManager( eval_summary_dir, tf.summary.scalar, global_step=self.global_step) - if self.global_step is not None: - tf.summary.experimental.set_step(self.global_step) + tf.summary.experimental.set_step(self.global_step) # Restores the model if needed. - # TODO(momernick): We probably only want to do this on certain occasions? if self.checkpoint_manager is not None: - checkpoint_interval = self.checkpoint_manager.checkpoint_interval - _validate_interval( - checkpoint_interval, steps_per_loop, interval_name="checkpoint") - - model_restored = self.restore_checkpoint() - if not model_restored and checkpoint_interval: - # If the model is not restored from a checkpoint, save an initial - # checkpoint. - self.save_checkpoint() + restored_path = self.restore_checkpoint() + if restored_path: + _log(f"restored from checkpoint: {restored_path}") def train(self, steps: int, checkpoint_at_completion: bool = True): - """Runs training. + """Runs training until the specified global step count has been reached. - This method calls the `train` method on the Trainable object until the - global step count is equal to `steps`. It will optionally save checkpoints, - if a CheckpointManager was passed to the Controller instance's `__init__`. + This method makes calls to `self.trainer.train()` until the global step + count is equal to `steps`. It will additionally save checkpoints (if a + `CheckpointManager` was passed to `Controller.__init__`) and summarize + training output (if `summary_dir` is set). Args: steps: The global step count to train up to. checkpoint_at_completion: Whether to save a checkpoint when this method - returns. Defaults to True (write the checkpoint). This is always - triggered, regardless of the checkpointing interval. + returns (regardless of the checkpointing interval). Defaults to `True`. """ - if self.trainer is None: - raise ValueError("`self.trainer` is required when calling `train` " - "method.") - if self.global_step is None: - raise ValueError("`self.global_step` is required when calling `train` " - "method.") + self._require("trainer", for_method="train") # TODO(momernick): Support steps=None or -1 (training to exhaustion). - current_step = self.global_step.numpy() # This is an expensive access. + current_step = self.global_step.numpy() # Cache, since this is expensive. + _log(f"train | step: {current_step: 6d} | training until step {steps}...") while current_step < steps: - logging.info("Train at step %s of %s", current_step, steps) # Calculates steps to run for the next train loop. num_steps = min(steps - current_step, self.steps_per_loop) self._train_n_steps(num_steps) self._maybe_save_checkpoint() - current_step = self.global_step.numpy() # This is an expensive access. + current_step = self.global_step.numpy() if checkpoint_at_completion: - self.save_checkpoint() + self._maybe_save_checkpoint(check_interval=False) - def evaluate(self, steps: int = None): - """Runs evaluation. + def evaluate(self, steps: int = -1) -> Optional[runner.Output]: + """Runs evaluation for the given number of steps. - This method calls the `evaluate` method on the Evaluator object for `steps` - steps, then writes the returned summaries (if any). + This method calls `self.evaluator.evaluate(steps)`, then writes the returned + summaries (if any). Args: - steps: The number of steps to evaluate for. + steps: The number of evaluation steps to run. The value `-1` is reserved + as a special sentinel to indicate a "complete" evaluation that runs + until the underlying dataset is exhausted. Support for this is dependent + on the specific `evaluator` being used. - Raises: - ValueError: If no checkpoint found in `self.checkpoint_manager.directory`. - ValueError: If `evaluator` is not provided. + Returns: + The evaluation results as a dictionary mapping names to NumPy values. + Raises: + ValueError: If `evaluator` was not provided to `Controller.__init__`. + ValueError: If no checkpoint is present in `checkpoint_manager.directory`. + ValueError: If `steps` is not a positive value or -1. """ - if self.evaluator is None: - raise ValueError("`evaluator` must be provided to call `evaluate()` " - "method.") + self._require("evaluator", for_method="evaluate") - steps = steps or -1 - current_step = self.global_step.numpy() if steps > 0: - logging.info("Running %s steps of evaluation at train step: %s", steps, - current_step) - steps = tf.convert_to_tensor(steps, dtype=tf.int32) + steps_msg = f"running {steps} steps of evaluation..." + elif steps == -1: + steps_msg = "running complete evaluation..." else: - logging.info("Evaluating at train step: %s", current_step) + raise ValueError(f"`steps` ({steps}) should be > 0, or == -1.") - with self.eval_summary_manager.summary_writer.as_default(): - eval_outputs = self.evaluator.evaluate(steps) + current_step = self.global_step.numpy() + _log(f" eval | step: {current_step: 6d} | {steps_msg}") - if eval_outputs: - eval_outputs = tf.nest.map_structure(utils.get_value, eval_outputs) + start = time.time() + with self.eval_summary_manager.summary_writer().as_default(): + steps_tensor = tf.convert_to_tensor(steps, dtype=tf.int32) + eval_output = self.evaluator.evaluate(steps_tensor) + eval_output = tf.nest.map_structure(utils.get_value, eval_output or {}) + elapsed = time.time() - start - info = "step: {} evaluation metric: {}".format( - current_step, eval_outputs) - _log_info(info) + _log(f" eval | step: {current_step: 6d} | " + f"eval time: {elapsed: 6.1f} sec | " + f"output: {_format_output(eval_output)}") - self.eval_summary_manager.write_summaries(eval_outputs) + self.eval_summary_manager.write_summaries(eval_output) self.eval_summary_manager.flush() - def restore_checkpoint(self, checkpoint_path: Text = None): - """Restore or initialize the model. - - Args: - checkpoint_path: An optional string indicates the checkpoint path to - restore. If None, will restore from `self.checkpoint_manager`. - - Returns: - The path to the restored checkpoint if a restore happened, or None - if no restore occurred. - """ - with self.strategy.scope(): - # Checkpoint restoring should be inside scope. b/139450638 - if checkpoint_path is not None: - self.checkpoint_manager.checkpoint.restore(checkpoint_path) - return checkpoint_path - return self.checkpoint_manager.restore_or_initialize() - - def save_checkpoint(self): - """Checkpoint the model. - - This method will write a checkpoint containing the current state of the - model. - - Raises: - ValueError: if no CheckpointManager was provided to this Controller's - init args. - """ - self._maybe_save_checkpoint(force_trigger=True) + return eval_output def train_and_evaluate(self, - train_steps: int = None, - eval_steps: int = None, - eval_interval: int = None): - """Train and evaluate in an interleaved manner. + train_steps: int, + eval_steps: int = -1, + eval_interval: Optional[int] = None) -> None: + """Runs interleaved training and evaluation. - This method will train the model until the global step count equals - `train_steps`, running an evaluation for `eval_steps` every `eval_interval` - training steps. In addition, this method will run a final evaluation at the - end of the training sequence. + This method interleaves calls to `self.train()` and `self.evaluate()`, + training the model until the global step count equals `train_steps`, and + running an evaluation for `eval_steps` every `eval_interval` training steps. + In addition, this method will run a final evaluation at the end of the + training sequence. Args: train_steps: The global step count to train up to. - eval_steps: The number of steps to run during an evaluation. If None, - this method will evaluate over the entire evaluation dataset. - eval_interval: The number of training steps to run between evalutions. - Must be a multiple of the controller's `steps_per_loop` init arg. If - None, evaluation will only be performed after training is complete. + eval_steps: The number of steps to run during an evaluation. If -1, this + method will evaluate over the entire evaluation dataset. + eval_interval: The number of training steps to run between evaluations. If + set, training will always stop every `eval_interval` steps, even if this + results in a shorter inner loop than specified by `steps_per_loop` + setting. If None, evaluation will only be performed after training is + complete. Raises: ValueError: If eval_interval is not a multiple of self.steps_per_loop. """ - _validate_interval(eval_interval, self.steps_per_loop, interval_name="eval") + self._require("trainer", for_method="train_and_evaluate") + self._require("evaluator", for_method="train_and_evaluate") - current_step = self.global_step.numpy() # This is an expensive access. + current_step = self.global_step.numpy() # Cache, since this is expensive. eval_interval = eval_interval or (train_steps - current_step) while current_step < train_steps: interval = min(train_steps - current_step, eval_interval) num_steps = current_step + interval self.train(steps=num_steps, checkpoint_at_completion=False) self.evaluate(steps=eval_steps) - current_step = self.global_step.numpy() # This is an expensive access. - self.save_checkpoint() + current_step = self.global_step.numpy() + self._maybe_save_checkpoint(check_interval=False) def evaluate_continuously(self, - steps: int = None, + steps: int = -1, timeout: Optional[Union[int, float]] = None, timeout_fn: Optional[Callable[[], bool]] = None): - """Monitor a directory and evaluate on checkpoints in it. + """Continuously monitors a directory and evaluates new checkpoints in it. This method continuously monitors a directory as specified by this Controller's CheckpointManager init arg and runs evaluation on the checkpoints found there. Args: - steps: The number of steps to run when evaluating. + steps: The number of steps to run when evaluating. If -1, this method will + evaluate over the entire evaluation dataset. timeout: The maximum number of seconds to wait between checkpoints. See tf.train.checkpoints_iterator documentation. timeout_fn: Optional callable to call after a timeout. If the function @@ -314,8 +327,10 @@ class Controller(object): Raises: ValueError: If no checkpoint found in `self.checkpoint_manager.directory`. ValueError: If `evaluator` was not provided as a controller init arg. - """ + self._require("evaluator", for_method="evaluate_continuously") + self._require("checkpoint_manager", for_method="evaluate_continuously") + for checkpoint_path in tf.train.checkpoints_iterator( self.checkpoint_manager.directory, timeout=timeout, @@ -323,63 +338,110 @@ class Controller(object): self.restore_checkpoint(checkpoint_path) self.evaluate(steps) + def restore_checkpoint(self, checkpoint_path: str = None): + """Restores the model from a checkpoint. + + Args: + checkpoint_path: An optional string specifying the checkpoint path to + restore from. If `None`, will restore from the most recent checkpoint + (or initialize the model using a custom `init_fn` if no checkpoints can + be found) using `self.checkpoint_manager.restore_or_initialize()`. + + Returns: + The path to the restored checkpoint if a restore happened, or `None` if no + restore occurred. + """ + self._require("checkpoint_manager", for_method="restore_checkpoint") + + with self.strategy.scope(): + # Checkpoint restoring should be inside scope (b/139450638). + if checkpoint_path is not None: + _log(f"restoring model from {checkpoint_path}...") + self.checkpoint_manager.checkpoint.restore(checkpoint_path) + else: + _log("restoring or initializing model...") + checkpoint_path = self.checkpoint_manager.restore_or_initialize() + + if checkpoint_path is not None: + _log(f"restored model from {checkpoint_path}.") + else: + _log("initialized model.") + + return checkpoint_path + + def save_checkpoint(self): + """Saves the model to a checkpoint. + + This method will save a checkpoint containing the current state of the + model. + + Raises: + ValueError: If no `checkpoint_manager` was provided to + `Controller.__init__`. + """ + self._require("checkpoint_manager", for_method="save_checkpoint") + self._maybe_save_checkpoint(check_interval=False) + def _train_n_steps(self, num_steps: int): - """Run training for `num_steps`. + """Runs training for `num_steps` steps. - It will also write training outputs to summaries if there is any. + Also prints/logs updates about training progress, and summarizes training + output (if output is returned from `self.trainer.train()`, and if + `self.summary_dir` is set). Args: - num_steps: An integer indicates how many steps to run for this training - loop. + num_steps: An integer specifying how many steps of training to run. Raises: - RuntimeError: If `global_step` is not updated correctly in - `trainer.train`. + RuntimeError: If `global_step` is not properly incremented by `num_steps` + after calling `self.trainer.train(num_steps)`. """ if not self.step_timer: self.step_timer = StepTimer(self.global_step) - - # Calculates steps to run for the next train loop. current_step = self.global_step.numpy() - logging.info("Entering training loop at step %s of %s", current_step, - num_steps) - current_step += num_steps - num_steps = tf.convert_to_tensor(num_steps, dtype=tf.int32) - with self.summary_manager.summary_writer.as_default(): - # Create a lambda that returns true when summaries should be written. + with self.summary_manager.summary_writer().as_default(): should_record = False # Allows static optimization in no-summary cases. if self.summary_interval: + # Create a predicate to determine when summaries should be written. should_record = lambda: (self.global_step % self.summary_interval == 0) with tf.summary.record_if(should_record): - train_outputs = self.trainer.train(num_steps) - - # Updates and verifies the current step after a training loop finishes. - if current_step != self.global_step.numpy(): - raise RuntimeError("`trainer.train` function is not updating " - "`global_step` correctly, expected: %s, actual: %s" % - (current_step, self.global_step.numpy())) - - # Print information like metrics and steps_per_second after a training - # loop. - if train_outputs: - train_outputs = tf.nest.map_structure(utils.get_value, train_outputs) - - train_outputs = train_outputs or {} + num_steps_tensor = tf.convert_to_tensor(num_steps, dtype=tf.int32) + train_output = self.trainer.train(num_steps_tensor) + train_output = tf.nest.map_structure(utils.get_value, train_output or {}) + + # Verify that global_step was updated properly, then update current_step. + expected_step = current_step + num_steps + if self.global_step.numpy() != expected_step: + message = ( + f"`trainer.train({num_steps})` did not update `global_step` by " + f"{num_steps}. Old value was {current_step}, expected updated value " + f"to be {expected_step}, but it was {self.global_step.numpy()}.") + logging.warning(message) + return + + current_step = expected_step steps_per_second = self.step_timer.steps_per_second() - info = "step: {} steps_per_second: {:.2f} {}".format( - current_step, steps_per_second, train_outputs) - _log_info(info) + _log(f"train | step: {current_step: 6d} | " + f"steps/sec: {steps_per_second: 6.1f} | " + f"output: {_format_output(train_output)}") - train_outputs["steps_per_second"] = steps_per_second - self.summary_manager.write_summaries(train_outputs) + train_output["steps_per_second"] = steps_per_second + self.summary_manager.write_summaries(train_output) + self.summary_manager.flush() - def _maybe_save_checkpoint(self, force_trigger: bool = False): - """Save checkpoints if necessary. + def _maybe_save_checkpoint(self, check_interval: bool = True): + """Conditionally saves a checkpoint. + + A checkpoint is saved if a `CheckpointManager` is available, and if the + required number of steps has elapsed since the last checkpoint was saved + (although this condition can be disabled by setting `check_interval=False`). Args: - force_trigger: A boolean indicates whether to force saving checkpoints - regardless of the checkpoint interval. + check_interval: Whether to check if the checkpoint interval has fully + elapsed. If `False`, a checkpoint is saved regardless of the elapsed + steps since the most recent checkpoint, unless no `checkpoint_manager` + was provided to `Controller.__init__`. Returns: A boolean indicating whether a checkpoint was saved. @@ -387,14 +449,21 @@ class Controller(object): if self.checkpoint_manager and self.checkpoint_manager.checkpoint_interval: ckpt_path = self.checkpoint_manager.save( checkpoint_number=self.global_step.numpy(), - check_interval=not force_trigger) + check_interval=check_interval) if ckpt_path is not None: - logging.info("Saved checkpoints in %s", ckpt_path) + _log(f"saved checkpoint to {ckpt_path}.") return True return False + def _require(self, attribute, for_method): + """Utility method to raise an error if the given `attribute` is not set.""" + if getattr(self, attribute, None) is None: + raise ValueError( + f"`{attribute}` is not set. Pass `{attribute}` to " + f"`Controller.__init__` before calling `{for_method}()`.") + -class StepTimer(object): +class StepTimer: """Utility class for measuring steps/second.""" def __init__(self, step): diff --git a/orbit/controller_test.py b/orbit/controller_test.py index 74c7d4194d4b6d0e0866b299200c7174eefad407..b4620b83bd7164e820424edb97ac670a3bf141ed 100644 --- a/orbit/controller_test.py +++ b/orbit/controller_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Orbit Authors. All Rights Reserved. +# Copyright 2021 The Orbit Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,18 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""Tests for orbit.controller.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +"""Tests for orbit.controller.""" import os + from absl import logging from absl.testing import parameterized + import numpy as np + from orbit import controller +from orbit import runner from orbit import standard_runner import tensorflow as tf @@ -36,19 +36,15 @@ def create_model(): def summaries_with_matching_keyword(keyword, summary_dir): - """Yields summary protos matching given keyword from event file.""" + """Returns summary protos matching given keyword from event file.""" + matches = [] event_paths = tf.io.gfile.glob(os.path.join(summary_dir, "events*")) for event in tf.compat.v1.train.summary_iterator(event_paths[-1]): if event.summary is not None: for value in event.summary.value: if keyword in value.tag: - logging.info(event) - yield event.summary - - -def check_eventfile_for_keyword(keyword, summary_dir): - """Checks event files for the keyword.""" - return any(summaries_with_matching_keyword(keyword, summary_dir)) + matches.append(event.summary) + return matches def dataset_fn(ctx): @@ -73,12 +69,8 @@ class TestRunner(standard_runner.StandardTrainer, self.train_loss = tf.keras.metrics.Mean("train_loss", dtype=tf.float32) self.eval_loss = tf.keras.metrics.Mean("eval_loss", dtype=tf.float32) self.return_numpy = return_numpy - train_dataset = ( - self.strategy.experimental_distribute_datasets_from_function(dataset_fn) - ) - eval_dataset = ( - self.strategy.experimental_distribute_datasets_from_function(dataset_fn) - ) + train_dataset = self.strategy.distribute_datasets_from_function(dataset_fn) + eval_dataset = self.strategy.distribute_datasets_from_function(dataset_fn) standard_runner.StandardTrainer.__init__(self, train_dataset) standard_runner.StandardEvaluator.__init__(self, eval_dataset) @@ -103,8 +95,7 @@ class TestRunner(standard_runner.StandardTrainer, } def build_eval_dataset(self): - return self.strategy.experimental_distribute_datasets_from_function( - dataset_fn) + return self.strategy.distribute_datasets_from_function(dataset_fn) def eval_begin(self): self.eval_loss.reset_states() @@ -133,8 +124,7 @@ class TestEvaluator(standard_runner.StandardEvaluator): def __init__(self): self.strategy = tf.distribute.get_strategy() self.model = create_model() - eval_dataset = self.strategy.experimental_distribute_datasets_from_function( - dataset_fn) + eval_dataset = self.strategy.distribute_datasets_from_function(dataset_fn) standard_runner.StandardEvaluator.__init__(self, eval_dataset) def eval_reduce(self, state, output): @@ -165,6 +155,61 @@ class TestEvaluator(standard_runner.StandardEvaluator): } +class TestEvaluatorNoOutput(runner.AbstractEvaluator): + + def evaluate(self, num_steps): + pass + + +class TestEvaluatorWithNestedSummary(standard_runner.StandardEvaluator): + """Implements the training and evaluation APIs for the test model.""" + + def __init__(self): + self.strategy = tf.distribute.get_strategy() + self.model = create_model() + dataset = self.strategy.distribute_datasets_from_function(dataset_fn) + dataset2 = self.strategy.distribute_datasets_from_function(dataset_fn) + self.loss = tf.keras.metrics.Mean("loss", dtype=tf.float32) + self.accuracy = tf.keras.metrics.CategoricalAccuracy( + "accuracy", dtype=tf.float32) + self.loss2 = tf.keras.metrics.Mean("loss", dtype=tf.float32) + self.accuracy2 = tf.keras.metrics.CategoricalAccuracy( + "accuracy", dtype=tf.float32) + standard_runner.StandardEvaluator.__init__( + self, eval_dataset={ + "dataset": dataset, + "dataset2": dataset2 + }) + + def eval_step(self, iterator): + + def _replicated_step(loss, accuracy, inputs): + """Replicated evaluation step.""" + inputs, targets = inputs + outputs = self.model(inputs) + loss.update_state(tf.keras.losses.MSE(targets, outputs)) + accuracy.update_state(targets, outputs) + + self.strategy.run( + lambda inputs: _replicated_step(self.loss, self.accuracy, inputs), + args=(next(iterator["dataset"]),)) + self.strategy.run( + lambda inputs: _replicated_step(self.loss2, self.accuracy2, inputs), + args=(next(iterator["dataset2"]),)) + + def eval_end(self): + return { + "dataset": { + "loss": self.loss.result(), + "accuracy": self.accuracy.result() + }, + "dataset2": { + "loss": self.loss2.result(), + "accuracy": self.accuracy2.result() + }, + } + + class TestTrainerWithSummaries(standard_runner.StandardTrainer): """A Trainer model with summaries for testing purposes.""" @@ -174,15 +219,15 @@ class TestTrainerWithSummaries(standard_runner.StandardTrainer): self.optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1) self.global_step = self.optimizer.iterations self.train_loss = tf.keras.metrics.Mean("train_loss", dtype=tf.float32) - train_dataset = ( - self.strategy.experimental_distribute_datasets_from_function(dataset_fn) - ) + train_dataset = self.strategy.distribute_datasets_from_function(dataset_fn) standard_runner.StandardTrainer.__init__( - self, train_dataset, use_tpu_summary_optimization=True) + self, + train_dataset, + options=standard_runner.StandardTrainerOptions( + use_tpu_summary_optimization=True)) def build_train_dataset(self): - return self.strategy.experimental_distribute_datasets_from_function( - dataset_fn) + return self.strategy.distribute_datasets_from_function(dataset_fn) def train_step(self, iterator): @@ -203,7 +248,7 @@ class TestTrainerWithSummaries(standard_runner.StandardTrainer): class ControllerTest(tf.test.TestCase, parameterized.TestCase): def setUp(self): - super(ControllerTest, self).setUp() + super().setUp() self.model_dir = self.get_temp_dir() def test_no_checkpoint(self): @@ -222,13 +267,13 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): # Loss and accuracy values should be written into summaries. self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "loss", os.path.join(self.model_dir, "summaries/train"))) self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "eval_loss", os.path.join(self.model_dir, "summaries/eval"))) # No checkpoint, so global step starts from 0. test_runner.global_step.assign(0) @@ -248,6 +293,76 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): train_steps=10, eval_steps=2, eval_interval=6) self.assertEqual(test_runner.global_step, 10) + def test_has_checkpoint_no_summaries(self): + test_runner = TestRunner() + # Has checkpoint, but no summary directories. + checkpoint = tf.train.Checkpoint(model=test_runner.model) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + self.model_dir, + max_to_keep=None, + step_counter=test_runner.global_step) + test_controller = controller.Controller( + trainer=test_runner, + evaluator=test_runner, + global_step=test_runner.global_step, + checkpoint_manager=checkpoint_manager, + steps_per_loop=2) + test_controller.train_and_evaluate( + train_steps=10, eval_steps=2, eval_interval=6) + self.assertEqual(test_runner.global_step, 10) + + # No summaries are saved. + self.assertEmpty(tf.io.gfile.glob( + os.path.join(checkpoint_manager.directory, "events.*"))) + + def test_has_checkpoint_eval_summary_only(self): + test_runner = TestRunner() + # Has checkpoint, but no summary directories. + checkpoint = tf.train.Checkpoint(model=test_runner.model) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + self.model_dir, + max_to_keep=None, + step_counter=test_runner.global_step) + test_controller = controller.Controller( + trainer=test_runner, + evaluator=test_runner, + global_step=test_runner.global_step, + checkpoint_manager=checkpoint_manager, + eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"), + steps_per_loop=2) + test_controller.train_and_evaluate( + train_steps=10, eval_steps=2, eval_interval=6) + self.assertEqual(test_runner.global_step, 10) + + # Training summaries are not saved. + self.assertEmpty(tf.io.gfile.glob( + os.path.join(checkpoint_manager.directory, "events.*"))) + # Evaluation summaries are saved. + self.assertNotEmpty(tf.io.gfile.glob( + os.path.join(self.model_dir, "summaries/eval/events.*"))) + + def test_restore_from_most_recent_checkpoint(self): + test_runner = TestRunner() + checkpoint = tf.train.Checkpoint(model=test_runner.model) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + self.model_dir, + max_to_keep=None, + step_counter=test_runner.global_step, + checkpoint_interval=5) + test_controller = controller.Controller( + trainer=test_runner, + global_step=test_runner.global_step, + checkpoint_manager=checkpoint_manager, + eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"), + steps_per_loop=5) + test_controller.train(20) + self.assertLen(checkpoint_manager.checkpoints, 4) + restored_path = test_controller.restore_checkpoint() + self.assertEqual(restored_path, checkpoint_manager.checkpoints[-1]) + @parameterized.named_parameters(("return_numpy", True), ("return_tensor", False)) def test_train_and_evaluate(self, return_numpy): @@ -278,13 +393,13 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): # Loss and accuracy values should be written into summaries. self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "loss", os.path.join(self.model_dir, "summaries/train"))) self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "eval_loss", os.path.join(self.model_dir, "summaries/eval"))) def test_train_only(self): @@ -314,8 +429,8 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): # Only train summaries are written. self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "loss", os.path.join(self.model_dir, "summaries/train"))) self.assertFalse( tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/eval"))) @@ -336,16 +451,17 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): checkpoint_manager=checkpoint_manager, summary_dir=os.path.join(self.model_dir, "summaries/train"), eval_summary_dir=os.path.join(self.model_dir, "summaries/eval")) - test_controller.evaluate(steps=2) + eval_results = test_controller.evaluate(steps=2) # Only eval summaries are written self.assertFalse( tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/train"))) self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "eval_loss", os.path.join(self.model_dir, "summaries/eval"))) + self.assertIn("eval_loss", eval_results) # Tests continuous eval with timeout and timeout_fn. done_file = os.path.join(self.model_dir, "summaries/eval/Done") @@ -426,8 +542,8 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): # Only train summaries are written. self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "loss", os.path.join(self.model_dir, "summaries/train"))) self.assertFalse( tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/eval"))) @@ -456,12 +572,12 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): # Loss and accuracy values should be written into summaries. self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries"))) - self.assertTrue( - check_eventfile_for_keyword("loss", - os.path.join(self.model_dir, "summaries"))) - self.assertTrue( - check_eventfile_for_keyword("eval_loss", - os.path.join(self.model_dir, "summaries"))) + self.assertNotEmpty( + summaries_with_matching_keyword( + "loss", os.path.join(self.model_dir, "summaries"))) + self.assertNotEmpty( + summaries_with_matching_keyword( + "eval_loss", os.path.join(self.model_dir, "summaries"))) def test_early_stop_on_eval_loss(self): test_runner = TestRunner() @@ -504,7 +620,7 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): self.assertLess(test_runner.global_step, 10) - def test_evaluate_with_loss_outputs(self): + def test_evaluate_with_loss_output(self): test_evaluator = TestEvaluator() checkpoint = tf.train.Checkpoint(model=test_evaluator.model) @@ -521,10 +637,17 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): # Only eval summaries are written self.assertNotEmpty( tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval"))) - self.assertTrue( - check_eventfile_for_keyword( + self.assertNotEmpty( + summaries_with_matching_keyword( "eval_loss", os.path.join(self.model_dir, "summaries/eval"))) + def test_evaluate_with_no_output(self): + test_controller = controller.Controller( + evaluator=TestEvaluatorNoOutput(), + global_step=tf.Variable(0, dtype=tf.int64), + eval_summary_dir=os.path.join(self.model_dir, "summaries/eval")) + self.assertEqual(test_controller.evaluate(steps=5), {}) + def test_train_and_evaluate_reset_datasets(self): test_runner = TestRunner() @@ -538,16 +661,68 @@ class ControllerTest(tf.test.TestCase, parameterized.TestCase): train_steps=10, eval_steps=2, eval_interval=6) train_dataset = ( - test_runner.strategy.experimental_distribute_datasets_from_function( - dataset_fn)) + test_runner.strategy.distribute_datasets_from_function(dataset_fn)) eval_dataset = ( - test_runner.strategy.experimental_distribute_datasets_from_function( - dataset_fn)) + test_runner.strategy.distribute_datasets_from_function(dataset_fn)) test_runner.train_dataset = train_dataset test_runner.eval_dataset = eval_dataset test_controller.train_and_evaluate( train_steps=10, eval_steps=2, eval_interval=6) + def test_eval_and_checkpoint_interval(self): + test_runner = TestRunner() + + checkpoint = tf.train.Checkpoint( + model=test_runner.model, optimizer=test_runner.optimizer) + checkpoint_manager = tf.train.CheckpointManager( + checkpoint, + self.model_dir, + max_to_keep=None, + step_counter=test_runner.global_step, + checkpoint_interval=5) + test_controller = controller.Controller( + trainer=test_runner, + evaluator=test_runner, + global_step=test_runner.global_step, + steps_per_loop=10, + checkpoint_manager=checkpoint_manager, + summary_dir=self.model_dir) + test_controller.train_and_evaluate( + train_steps=10, eval_steps=2, eval_interval=5) + + # Expect 3 checkpoints to be saved at step: 5, 10. + self.assertLen( + tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt-*.data*")), 2) + # Expect evaluation is performed 2 times at step: 5, 10. + self.assertLen( + summaries_with_matching_keyword("eval_loss", self.model_dir), 2) + + def test_evaluate_with_nested_summaries(self): + test_evaluator = TestEvaluatorWithNestedSummary() + test_controller = controller.Controller( + evaluator=test_evaluator, + global_step=tf.Variable(0, dtype=tf.int64), + eval_summary_dir=self.model_dir) + test_controller.evaluate(steps=5) + + self.assertNotEmpty( + tf.io.gfile.listdir(os.path.join(self.model_dir, "dataset"))) + self.assertNotEmpty( + summaries_with_matching_keyword( + "loss", os.path.join(self.model_dir, "dataset"))) + self.assertNotEmpty( + summaries_with_matching_keyword( + "accuracy", os.path.join(self.model_dir, "dataset"))) + + self.assertNotEmpty( + tf.io.gfile.listdir(os.path.join(self.model_dir, "dataset2"))) + self.assertNotEmpty( + summaries_with_matching_keyword( + "loss", os.path.join(self.model_dir, "dataset2"))) + self.assertNotEmpty( + summaries_with_matching_keyword( + "accuracy", os.path.join(self.model_dir, "dataset2"))) + if __name__ == "__main__": tf.test.main() diff --git a/orbit/examples/__init__.py b/orbit/examples/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a4d9cc3a1b148e5c8c153f2f2357d0475e7a43b6 --- /dev/null +++ b/orbit/examples/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/orbit/examples/single_task/__init__.py b/orbit/examples/single_task/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a4d9cc3a1b148e5c8c153f2f2357d0475e7a43b6 --- /dev/null +++ b/orbit/examples/single_task/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/orbit/examples/single_task/single_task_evaluator.py b/orbit/examples/single_task/single_task_evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..0dcbae063a6282cbf76b8247bdcea43ee9c26c42 --- /dev/null +++ b/orbit/examples/single_task/single_task_evaluator.py @@ -0,0 +1,86 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An evaluator object that can evaluate models with a single output.""" +import orbit +import tensorflow as tf + + +class SingleTaskEvaluator(orbit.StandardEvaluator): + """Evaluates a single-output model on a given dataset. + + This evaluator will handle running a model with one output on a single + dataset, and will apply the output of that model to one or more + `tf.keras.metrics.Metric` objects. + """ + + def __init__(self, + eval_dataset, + label_key, + model, + metrics, + evaluator_options=None): + """Initializes a `SingleTaskEvaluator` instance. + + If the `SingleTaskEvaluator` should run its model under a distribution + strategy, it should be created within that strategy's scope. + + Arguments: + eval_dataset: A `tf.data.Dataset` or `DistributedDataset` that contains a + string-keyed dict of `Tensor`s. + label_key: The key corresponding to the label value in feature + dictionaries dequeued from `eval_dataset`. This key will be removed from + the dictionary before it is passed to the model. + model: A `tf.Module` or Keras `Model` object to evaluate. + metrics: A single `tf.keras.metrics.Metric` object, or a list of + `tf.keras.metrics.Metric` objects. + evaluator_options: An optional `orbit.StandardEvaluatorOptions` object. + """ + + self.label_key = label_key + self.model = model + self.metrics = metrics if isinstance(metrics, list) else [metrics] + + # Capture the strategy from the containing scope. + self.strategy = tf.distribute.get_strategy() + + super(SingleTaskEvaluator, self).__init__( + eval_dataset=eval_dataset, options=evaluator_options) + + def eval_begin(self): + """Actions to take once before every eval loop.""" + for metric in self.metrics: + metric.reset_states() + + def eval_step(self, iterator): + """One eval step. Called multiple times per eval loop by the superclass.""" + + def step_fn(inputs): + # Extract the target value and delete it from the input dict, so that + # the model never sees it. + target = inputs.pop(self.label_key) + output = self.model(inputs) + for metric in self.metrics: + metric.update_state(target, output) + + # This is needed to handle distributed computation. + self.strategy.run(step_fn, args=(next(iterator),)) + + def eval_end(self): + """Actions to take once after an eval loop.""" + with self.strategy.scope(): + # Export the metrics. + metrics = {metric.name: metric.result() for metric in self.metrics} + + return metrics diff --git a/orbit/examples/single_task/single_task_evaluator_test.py b/orbit/examples/single_task/single_task_evaluator_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c074da0fb9de8e054451ac1a07f729c26ff08613 --- /dev/null +++ b/orbit/examples/single_task/single_task_evaluator_test.py @@ -0,0 +1,65 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the single_task_evaluator.""" +import orbit +from orbit.examples.single_task import single_task_evaluator +from orbit.examples.single_task import single_task_trainer + +import tensorflow as tf +import tensorflow_datasets as tfds + + +class SingleTaskEvaluatorTest(tf.test.TestCase): + + def test_single_task_evaluation(self): + + iris = tfds.load('iris') + train_ds = iris['train'].batch(32) + + model = tf.keras.Sequential([ + tf.keras.Input(shape=(4,), name='features'), + tf.keras.layers.Dense(10, activation=tf.nn.relu), + tf.keras.layers.Dense(10, activation=tf.nn.relu), + tf.keras.layers.Dense(3) + ]) + + trainer = single_task_trainer.SingleTaskTrainer( + train_ds, + label_key='label', + model=model, + loss_fn=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.SGD(learning_rate=0.01)) + + evaluator = single_task_evaluator.SingleTaskEvaluator( + train_ds, + label_key='label', + model=model, + metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) + + controller = orbit.Controller( + trainer=trainer, + evaluator=evaluator, + steps_per_loop=100, + global_step=trainer.optimizer.iterations) + + controller.train(train_ds.cardinality().numpy()) + controller.evaluate() + accuracy = evaluator.metrics[0].result().numpy() + + self.assertGreater(0.925, accuracy) + + +if __name__ == '__main__': + tf.test.main() diff --git a/orbit/examples/single_task/single_task_trainer.py b/orbit/examples/single_task/single_task_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..f9b29185a760ea391581ee08be94ff1f9df79932 --- /dev/null +++ b/orbit/examples/single_task/single_task_trainer.py @@ -0,0 +1,140 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A trainer object that can train models with a single output.""" + +import orbit +import tensorflow as tf + + +class SingleTaskTrainer(orbit.StandardTrainer): + """Trains a single-output model on a given dataset. + + This trainer will handle running a model with one output on a single + dataset. It will apply the provided loss function to the model's output + to calculate gradients and will apply them via the provided optimizer. It will + also supply the output of that model to one or more `tf.keras.metrics.Metric` + objects. + """ + + def __init__(self, + train_dataset, + label_key, + model, + loss_fn, + optimizer, + metrics=None, + trainer_options=None): + """Initializes a `SingleTaskTrainer` instance. + + If the `SingleTaskTrainer` should run its model under a distribution + strategy, it should be created within that strategy's scope. + + This trainer will also calculate metrics during training. The loss metric + is calculated by default, but other metrics can be passed to the `metrics` + arg. + + Arguments: + train_dataset: A `tf.data.Dataset` or `DistributedDataset` that contains a + string-keyed dict of `Tensor`s. + label_key: The key corresponding to the label value in feature + dictionaries dequeued from `train_dataset`. This key will be removed + from the dictionary before it is passed to the model. + model: A `tf.Module` or Keras `Model` object to evaluate. It must accept a + `training` kwarg. + loss_fn: A per-element loss function of the form (target, output). The + output of this loss function will be reduced via `tf.reduce_mean` to + create the final loss. We recommend using the functions in the + `tf.keras.losses` package or `tf.keras.losses.Loss` objects with + `reduction=tf.keras.losses.reduction.NONE`. + optimizer: A `tf.keras.optimizers.Optimizer` instance. + metrics: A single `tf.keras.metrics.Metric` object, or a list of + `tf.keras.metrics.Metric` objects. + trainer_options: An optional `orbit.utils.StandardTrainerOptions` object. + """ + self.label_key = label_key + self.model = model + self.loss_fn = loss_fn + self.optimizer = optimizer + + # Capture the strategy from the containing scope. + self.strategy = tf.distribute.get_strategy() + + # We always want to report training loss. + self.train_loss = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) + + # We need self.metrics to be an iterable later, so we handle that here. + if metrics is None: + self.metrics = [] + elif isinstance(metrics, list): + self.metrics = metrics + else: + self.metrics = [metrics] + + super(SingleTaskTrainer, self).__init__( + train_dataset=train_dataset, options=trainer_options) + + def train_loop_begin(self): + """Actions to take once, at the beginning of each train loop.""" + self.train_loss.reset_states() + for metric in self.metrics: + metric.reset_states() + + def train_step(self, iterator): + """A train step. Called multiple times per train loop by the superclass.""" + + def train_fn(inputs): + with tf.GradientTape() as tape: + # Extract the target value and delete it from the input dict, so that + # the model never sees it. + target = inputs.pop(self.label_key) + + # Get the outputs of the model. + output = self.model(inputs, training=True) + + # Get the average per-batch loss and scale it down by the number of + # replicas. This ensures that we don't end up multiplying our loss by + # the number of workers - gradients are summed, not averaged, across + # replicas during the apply_gradients call. + # Note, the reduction of loss is explicitly handled and scaled by + # num_replicas_in_sync. Recommend to use a plain loss function. + # If you're using tf.keras.losses.Loss object, you may need to set + # reduction argument explicitly. + loss = tf.reduce_mean(self.loss_fn(target, output)) + scaled_loss = loss / self.strategy.num_replicas_in_sync + + # Get the gradients by applying the loss to the model's trainable + # variables. + gradients = tape.gradient(scaled_loss, self.model.trainable_variables) + + # Apply the gradients via the optimizer. + self.optimizer.apply_gradients( + list(zip(gradients, self.model.trainable_variables))) + + # Update metrics. + self.train_loss.update_state(loss) + for metric in self.metrics: + metric.update_state(target, output) + + # This is needed to handle distributed computation. + self.strategy.run(train_fn, args=(next(iterator),)) + + def train_loop_end(self): + """Actions to take once after a training loop.""" + with self.strategy.scope(): + # Export the metrics. + metrics = {metric.name: metric.result() for metric in self.metrics} + metrics[self.train_loss.name] = self.train_loss.result() + + return metrics diff --git a/orbit/examples/single_task/single_task_trainer_test.py b/orbit/examples/single_task/single_task_trainer_test.py new file mode 100644 index 0000000000000000000000000000000000000000..cba34f7b05485d891c9b1c6da4e02522df1f772a --- /dev/null +++ b/orbit/examples/single_task/single_task_trainer_test.py @@ -0,0 +1,60 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the single_task_trainer.""" +import orbit +from orbit.examples.single_task import single_task_trainer + +import tensorflow as tf +import tensorflow_datasets as tfds + + +class SingleTaskTrainerTest(tf.test.TestCase): + + def test_single_task_training(self): + iris = tfds.load('iris') + train_ds = iris['train'].batch(32).repeat() + + model = tf.keras.Sequential([ + tf.keras.Input(shape=(4,), name='features'), + tf.keras.layers.Dense(10, activation=tf.nn.relu), + tf.keras.layers.Dense(10, activation=tf.nn.relu), + tf.keras.layers.Dense(3), + tf.keras.layers.Softmax(), + ]) + + trainer = single_task_trainer.SingleTaskTrainer( + train_ds, + label_key='label', + model=model, + loss_fn=tf.keras.losses.sparse_categorical_crossentropy, + optimizer=tf.keras.optimizers.SGD(learning_rate=0.01)) + + controller = orbit.Controller( + trainer=trainer, + steps_per_loop=100, + global_step=trainer.optimizer.iterations) + + controller.train(1) + start_loss = trainer.train_loss.result().numpy() + controller.train(500) + end_loss = trainer.train_loss.result().numpy() + + # Assert that the model has trained 'significantly' - that the loss + # has dropped by over 50%. + self.assertLess(end_loss, start_loss / 2) + + +if __name__ == '__main__': + tf.test.main() diff --git a/orbit/runner.py b/orbit/runner.py index 36931e464c87d98ce1fd3163877e38d2ed569912..b0377c5218cac2f60ceacdc66721bac5b149c68b 100644 --- a/orbit/runner.py +++ b/orbit/runner.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Orbit Authors. All Rights Reserved. +# Copyright 2021 The Orbit Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,69 +11,73 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""An abstraction that users can easily handle their custom training loops.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""Provides AbstractTrainer/Evaluator base classes, defining train/eval APIs.""" import abc -from typing import Dict, Optional, Text -import six + +from typing import Dict, Optional, Union + +import numpy as np import tensorflow as tf -@six.add_metaclass(abc.ABCMeta) -class AbstractTrainer(tf.Module): - """An abstract class defining the APIs required for training.""" +Output = Dict[str, Union[tf.Tensor, float, np.number, np.ndarray, 'Output']] # pytype: disable=not-supported-yet + + +class AbstractTrainer(tf.Module, metaclass=abc.ABCMeta): + """An abstract class defining the API required for training.""" @abc.abstractmethod - def train(self, - num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """Implements model training with multiple steps. - - In training, it is common to break the total training steps into several - training loops, so users can do checkpointing, write summaries and run some - python callbacks. This is necessary for getting good performance in TPU - training, as the overhead for launching a multi worker tf.function may be - large in Eager mode. It is usually encouraged to create a host training loop - (e.g. using a `tf.range` wrapping `strategy.run` inside a - `tf.function`) in the TPU case. For the cases that don't require host - training loop to acheive peak performance, users can just implement a simple - python loop to drive each step. + def train(self, num_steps: tf.Tensor) -> Optional[Output]: + """Implements `num_steps` steps of training. + + This method will be called by the `Controller` to perform the "inner loop" + of training. This inner loop amortizes the cost of bookkeeping associated + with checkpointing, evaluation, and writing summaries. Additionally, the + inner loop can be implemented (if desired) using TensorFlow's looping + constructs (e.g. a `for` loop over a `tf.range` inside a `tf.function`), + which can be necessary for getting optimal performance when running on TPU. + For cases that don't require peak performance, a simple Python loop can be + used instead for simplicity. Args: - num_steps: A guideline for how many training steps to run. Note that it is - up to the model what constitutes a "step" (this may involve more than - one update to model parameters, e.g. if training a GAN). + num_steps: The number of training steps to run. Note that it is up to the + model what constitutes a "step", which may involve more than one update + to model parameters (e.g., if training a GAN). Returns: - The function may return a dictionary of `Tensors` or numpy arrays, which - will be written to logs and as TensorBoard summaries. + Either `None`, or a dictionary mapping names to `Tensor`s or NumPy values. + If a dictionary is returned, it will be written to logs and as TensorBoard + summaries. The dictionary may also be nested, which will generate a + hierarchy of summary directories. """ pass -@six.add_metaclass(abc.ABCMeta) -class AbstractEvaluator(tf.Module): - """An abstract class defining the APIs required for evaluation.""" +class AbstractEvaluator(tf.Module, metaclass=abc.ABCMeta): + """An abstract class defining the API required for evaluation.""" @abc.abstractmethod - def evaluate( - self, num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """Implements model evaluation. + def evaluate(self, num_steps: tf.Tensor) -> Optional[Output]: + """Implements `num_steps` steps of evaluation. + + This method will by called the `Controller` to perform an evaluation. The + `num_steps` parameter specifies the number of steps of evaluation to run, + which is specified by the user when calling one of the `Controller`'s + evaluation methods. A special sentinel value of `-1` is reserved to indicate + evaluation should run until the underlying data source is exhausted. Args: - num_steps: A guideline for how many evaluation steps to run. Note that it - is up to the model what constitutes a "step". Generally, it may be - desirable to support both a limited number of eval steps and iterating - over a full dataset (however many steps are required) when `num_steps` - is `None`. + num_steps: The number of evaluation steps to run. Note that it is up to + the model what constitutes a "step". Evaluations may also want to + support "complete" evaluations when `num_steps == -1`, running until a + given data source is exhausted. Returns: - The function may return a dictionary of `Tensors` or numpy arrays, which - will be written to logs and as TensorBoard summaries. + Either `None`, or a dictionary mapping names to `Tensor`s or NumPy values. + If a dictionary is returned, it will be written to logs and as TensorBoard + summaries. The dictionary may also be nested, which will generate a + hierarchy of summary directories. """ pass diff --git a/orbit/standard_runner.py b/orbit/standard_runner.py index c0162e9e5ab0954b7d96d33917f47c0a39a93037..ac03707a0f7e0becd87147b1669dca196b1bff72 100644 --- a/orbit/standard_runner.py +++ b/orbit/standard_runner.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Orbit Authors. All Rights Reserved. +# Copyright 2021 The Orbit Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,91 +11,146 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== -"""An abstraction that users can easily handle their custom training loops.""" -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function +"""AbstractTrainer/Evaluator subclasses with added functionality. + +The classes in this module provide some additional structure to the bare +`AbstractTrainer`/`AbstractEvaluator` APIs. + +Both `StandardTrainer` and `StandardEvaluator` split the train/eval loops into +"begin", "step", and "end" methods, and provide an implementation of the loop +itself that makes calls to the relevant step method. + +`StandardTrainer` supports running the loop using the TF while loop construct +for added performance (particularly on TPUs). It additionally provides some +functionality to make writing summaries from inside a model more performant when +running on TPUs. + +These classes are intended to work well in common settings, however there may +be use cases these classes don't support (for instance, `StandardEvaluator` in +particular doesn't support running full evaluations over multiple different eval +datasets). Users are encouraged to simply fall back to custom `AbstractTrainer` +and `AbstractEvaluator` subclasses in these cases. +""" import abc -from typing import Any, Dict, Optional, Text + +from typing import Any, Optional + +import dataclasses + from orbit import runner -from orbit import utils -import six +from orbit.utils import loop_fns + import tensorflow as tf -@six.add_metaclass(abc.ABCMeta) -class StandardTrainer(runner.AbstractTrainer): - """Implements the standard functionality of AbstractTrainer APIs.""" +@dataclasses.dataclass(frozen=True) +class StandardTrainerOptions: + """Advanced options for `orbit.StandardTrainer`. - def __init__(self, - train_dataset, - use_tf_while_loop=True, - use_tf_function=True, - use_tpu_summary_optimization=False): - """Construct a `StandardTrainer` object. + Attributes: + use_tf_function: A boolean indicating whether to apply `tf.function` to the + training loop. This will only affect the body of the loop (involving + `train_step`); `train_loop_begin` and `train_loop_end` will always be run + in eager mode. + use_tf_while_loop: A boolean indicating whether to run the training loop + using a `tf.while_loop`. If `True`, `use_tf_function` must also be `True`. + use_tpu_summary_optimization: A boolean indicating whether to enable a + performance optimization for summaries in TPUs. Writing summaries + conditionally with outside compilation on TPUs can be extremely slow. If + `True`, this optimization creates two `tf.function`s with two XLA programs + (one with summary calls, and one without). The program with summaries runs + only for one step when summaries should be recorded. + """ + use_tf_function: bool = True + use_tf_while_loop: bool = True + use_tpu_summary_optimization: bool = False + + +class StandardTrainer(runner.AbstractTrainer, metaclass=abc.ABCMeta): + """Implements standard functionality on top of the AbstractTrainer API. + + This class structures the training "inner loop" roughly as follows: + + train_loop_begin() + for _ in range(num_steps): + train_step(train_iterator) + return train_loop_end() + + Calls to `train_loop_begin` and `train_loop_end` are always done in eager + mode, while the loop/`train_step` may be implemented using `tf.while` and/or + `tf.function`, as determined by the `options` passed to `__init__`. + """ + + def __init__(self, train_dataset, options: StandardTrainerOptions = None): + """Initializes the `StandardTrainer` instance. Args: - train_dataset: A tf.nest-compatible structure of tf.data.Dataset or - DistributedDataset. - use_tf_while_loop: A boolean indicates whether to wrap the train step with - a `tf.while_loop`. - use_tf_function: A boolean indicates whether a `tf.function` will be used. - If False, training will run on pure eager mode. - use_tpu_summary_optimization: A boolean indicates whether to enable the - performance optimization for summaries in TPUs. In TPUs, writing - summaries with outside compilation inside train step is slow. If True, - it creates two `tf.function` with two XLA programs: one with summaries - and one without, and run the program with summaries (slow one) only if - necessary. + train_dataset: A `tf.nest`-compatible structure of `tf.data.Dataset` or + `DistributedDataset`. + options: An `orbit.StandardTrainerOptions` instance. """ - if use_tf_while_loop and not use_tf_function: + options = options or StandardTrainerOptions() + if options.use_tf_while_loop and not options.use_tf_function: raise ValueError("`use_tf_while_loop=True` and `use_tf_function=False` " "is not supported") - if use_tpu_summary_optimization and not use_tf_while_loop: + if options.use_tpu_summary_optimization and not options.use_tf_while_loop: raise ValueError("`use_tpu_summary_optimization=True` and " "`use_tf_while_loop=False` is not supported") - self._use_tf_while_loop = use_tf_while_loop - self._use_tf_function = use_tf_function + + self._train_options = options self._train_dataset = train_dataset self._train_iter = None self._train_loop_fn = None - self._use_tpu_summary_optimization = use_tpu_summary_optimization - def train(self, - num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """See base class.""" + def create_train_loop_fn(self): + """Creates a training loop from the current step function and options. + + Returns: + The train loop function, i.e. wrapper of multiple train steps. + """ + train_step_fn = self.train_step + if self._train_options.use_tf_while_loop: + loop_fn = loop_fns.create_tf_while_loop_fn(train_step_fn) + if self._train_options.use_tpu_summary_optimization: + loop_fn = loop_fns.LoopFnWithSummaries(loop_fn) + else: + loop_fn = tf.function(loop_fn) + else: + if self._train_options.use_tf_function: + train_step_fn = tf.function(train_step_fn) + loop_fn = loop_fns.create_loop_fn(train_step_fn) + return loop_fn + + def train(self, num_steps: tf.Tensor) -> Optional[runner.Output]: + """Implements `num_steps` steps of training. + + Args: + num_steps: The number of training steps to run. This corresponds directly + to the number of calls made to `train_step`. + + Returns: + The output of `train_loop_end`. + """ self.train_loop_begin() + if self._train_loop_fn is None: + self._train_loop_fn = self.create_train_loop_fn() + if self._train_iter is None: self._train_iter = tf.nest.map_structure(iter, self.train_dataset) - if self._train_loop_fn is None: - train_fn = self.train_step - if self._use_tf_while_loop: - self._train_loop_fn = utils.create_tf_while_loop_fn(train_fn) - if self._use_tpu_summary_optimization: - self._train_loop_fn = utils.train_function_with_summaries( - self._train_loop_fn) - else: - self._train_loop_fn = tf.function(self._train_loop_fn) - else: - if self._use_tf_function: - train_fn = tf.function(train_fn) - self._train_loop_fn = utils.create_loop_fn(train_fn) - self._train_loop_fn(self._train_iter, num_steps) return self.train_loop_end() def train_loop_begin(self): """Called once at the beginning of the training loop. - This method is called before dataset iterators creation. - This is a good place to reset metrics that accumulate values over multiple - steps of training. + This method is always called in eager mode, and is a good place to reset + metrics that accumulate values over multiple steps of training. + + Note that this method is called before dataset iterator creation. """ pass @@ -103,79 +158,187 @@ class StandardTrainer(runner.AbstractTrainer): def train_step(self, iterator): """Implements one step of training. - What a "step" consists of is up to the implementer. If using distribution - strategies, the call to this method should take place in the "cross-replica + What a "step" consists of is up to the implementer. When using distribution + strategies, the call to this method takes place in the "cross-replica context" for generality, to allow e.g. multiple iterator dequeues and calls to `strategy.run`. + Note that if `use_tf_function=True`, all the code inside `train_step` should + be compatible with `tf.function` tracing (and in particular, any state + modifications involving `self` should be avoided). In some cases, non- + `tf.function` compatible code can be moved to `train_loop_begin` or + `train_loop_end`, which always execute eagerly. + Args: - iterator: A tf.nest-compatible structure of tf.data Iterator or - DistributedIterator. + iterator: A `tf.nest`-compatible structure of `tf.data.Iterator` or + `DistributedIterator`. The structure of this input matches the structure + of `train_dataset` as passed to `__init__`. """ pass - def train_loop_end(self) -> Optional[Dict[Text, tf.Tensor]]: - """Called at the end of the training loop. + def train_loop_end(self) -> Optional[runner.Output]: + """Called once at the end of the training loop. - This is a good place to get metric results. The value returned from this - function will be returned as-is from the train() method. + This method is always called in eager mode, and is a good place to get + metric results. The value returned from this function will be returned as-is + from the `train` method implementation provided by `StandardTrainer`. Returns: The function may return a dictionary of `Tensors`, which will be - written to logs and as TensorBoard summaries. + written to logs and as TensorBoard summaries. It can also be a + nested dictionary, yielding a hierarchy of summary directories. """ pass @property def train_dataset(self): - """Returns the train_dataset instance.""" + """The current training dataset.""" return self._train_dataset @train_dataset.setter def train_dataset(self, train_dataset): - """Set a new train dataset and replace with the existing one. + """Sets a new training dataset, replacing the current one. - Any unfinished work in the previous dataset will be discarded. + Any unprocessed examples in the current dataset are discarded. Args: - train_dataset: A tf.nest-compatible structure of tf.data.Dataset or - DistributedDataset. + train_dataset: A `tf.nest`-compatible structure of `tf.data.Dataset` or + `DistributedDataset`. """ self._train_dataset = train_dataset self._train_iter = None -@six.add_metaclass(abc.ABCMeta) -class StandardEvaluator(runner.AbstractEvaluator): - """Implements the standard functionality of AbstractEvaluator APIs.""" - - def __init__(self, eval_dataset, use_tf_function=True): - """Construct a `StandardEvaluator` object. +@dataclasses.dataclass(frozen=True) +class StandardEvaluatorOptions: + """Advanced options for the `orbit.StandardEvaluator`. + + Attributes: + use_tf_function: A boolean indicating whether to apply `tf.function` to the + evaluation loop. This will only affect the body of the loop (involving + `eval_step`); `eval_loop_begin` and `eval_loop_end` will always be run + in eager mode. + use_tf_while_loop: A boolean indicating whether to run the evaluation loop + using a `tf.while_loop`. If `True`, `use_tf_function` must also be `True`. + recreate_iterator_for_each_eval: A boolean indicating whether to recreate a + new iterator for the evaluation dataset before each round of evaluation, + which implies each round of evaluation starts from the beginning of + the evaluation dataset. For example, the evaluation dataset is + `[1, 2, 3, 4]`, batch size is 1 and evaluation steps is 2. If `True`, the + data to be evaluated is [1, 2] every time. If `False`, the iterator + state is maintained between calls to `StandardEvaluator.evaluate()`. + """ + use_tf_function: bool = True + use_tf_while_loop: bool = False + recreate_iterator_for_each_eval: bool = True + + +class StandardEvaluator(runner.AbstractEvaluator, metaclass=abc.ABCMeta): + """Implements the standard functionality of AbstractEvaluator APIs. + + This class structures evaluation roughly as follows: + + state = eval_begin() + for _ in range(num_steps): + step_outputs = eval_step(eval_iterator) + state = eval_reduce(state, step_outputs) + return eval_end(state) + + Calls to `eval_begin` and `eval_end` are always done in eager + mode, while `eval_step` may be compiled with `tf.function` as determined by + the `options` passed to `__init__`. `eval_reduce` is in eager mode if + `use_tf_while_loop=False` in `StandardEvaluatorOptions`, but in graph mode if + `use_tf_while_loop=True`. + + This class does not support completely evaluating multiple different datasets + (i.e., where every example of each dataset should be processed, as opposed to + running for a fixed number of evaluation steps). A custom `AbstractEvaluator` + is recommended in this case. + """ + + def __init__(self, eval_dataset, options: StandardEvaluatorOptions = None): + """Initializes the `StandardEvaluator` instance. Args: - eval_dataset: A tf.nest-compatible structure of tf.data.Dataset or - DistributedDataset. - use_tf_function: A boolean indicates whether a `tf.function` will be used. - If False, evaluation will run on pure eager mode. + eval_dataset: A `tf.nest`-compatible structure of `tf.data.Dataset` or + `DistributedDataset`. + options: An `orbit.StandardEvaluatorOptions` instance. """ - self._eval_use_tf_function = use_tf_function + options = options or StandardEvaluatorOptions() + if options.use_tf_while_loop and not options.use_tf_function: + raise ValueError("`use_tf_while_loop=True` and `use_tf_function=False` " + "is not supported") + + self._eval_options = options self._eval_dataset = eval_dataset + self._eval_iter = None self._eval_loop_fn = None - def evaluate( - self, num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]: - """See base class.""" + def create_eval_loop_fn(self, has_state: bool): + """Creates an eval loop from the current step function and options. + + Args: + has_state: If the step function has state, state will be kept in the loop. + + Returns: + The eval loop function, i.e. wrapper of multiple eval steps. + """ + eval_step_fn = self.eval_step + if self._eval_options.use_tf_while_loop: + # TODO(b/176126742): tf.while_loop doesn't support `None` as a loop input + # even when it is not used inside the loop. To workaround this limitation, + # we have to build two tf.functions for it. + if has_state: + loop_fn = loop_fns.create_tf_while_loop_fn_with_state(eval_step_fn) + else: + loop_fn = loop_fns.create_tf_while_loop_fn(eval_step_fn) + loop_fn = tf.function(loop_fn) + else: + if self._eval_options.use_tf_function: + eval_step_fn = tf.function(eval_step_fn) + loop_fn = loop_fns.create_loop_fn(eval_step_fn) + return loop_fn + + def evaluate(self, num_steps: tf.Tensor) -> Optional[runner.Output]: + """Implements `num_steps` steps of evaluation. + + Args: + num_steps: The number of evaluation steps to run. When this is -1, + evaluation proceeds until a call to `eval_step` raises a `StopIteration` + or `tf.errors.OutOfRangeError`. + + Returns: + The output of `self.eval_end()`. + + Raises: + ValueError: If `options.use_tf_while_loop` is `True` and `num_steps` is + unspecified. + """ + if self._eval_options.use_tf_while_loop and num_steps == -1: + raise ValueError("Looping until exhausted is not supported if " + "`options.use_tf_while_loop` is `True`") + outputs = self.eval_begin() # pylint: disable=assignment-from-no-return - eval_iter = tf.nest.map_structure(iter, self._eval_dataset) + has_state = outputs is not None if self._eval_loop_fn is None: - eval_fn = self.eval_step - if self._eval_use_tf_function: - eval_fn = tf.function(eval_fn) - self._eval_loop_fn = utils.create_loop_fn(eval_fn) + self._eval_loop_fn = self.create_eval_loop_fn(has_state) + + # If `recreate_iterator_for_each_eval` is `True`, `self._eval_iter` is + # always None. + if self._eval_iter is None: + eval_iter = tf.nest.map_structure(iter, self.eval_dataset) + if not self._eval_options.recreate_iterator_for_each_eval: + self._eval_iter = eval_iter + else: + eval_iter = self._eval_iter + + if self._eval_options.use_tf_while_loop and not has_state: + self._eval_loop_fn(eval_iter, num_steps) + else: + outputs = self._eval_loop_fn( + eval_iter, num_steps, state=outputs, reduce_fn=self.eval_reduce) - outputs = self._eval_loop_fn( - eval_iter, num_steps, state=outputs, reduce_fn=self.eval_reduce) if outputs is None: return self.eval_end() else: @@ -184,12 +347,13 @@ class StandardEvaluator(runner.AbstractEvaluator): def eval_begin(self) -> Any: """Called once at the beginning of the evaluation. - This method is called before dataset iterators creation. - This is a good place to reset metrics that accumulate values over the entire - evaluation. + This method is always called in eager mode, and is a good place to reset + metrics that accumulate values over the course of evaluation. + + Note that this method is called before dataset iterator creation. Returns: - An output which is passed as `state` argument into `eval_reduce` function. + An value to pass as the `state` argument to `eval_reduce`. """ pass @@ -197,14 +361,20 @@ class StandardEvaluator(runner.AbstractEvaluator): def eval_step(self, iterator) -> Any: """Implements one step of evaluation. - What a "step" consists of is up to the implementer. If using distribution - strategies, the call to this method should take place in the "cross-replica + What a "step" consists of is up to the implementer. When using distribution + strategies, the call to this method takes place in the "cross-replica context" for generality, to allow e.g. multiple iterator dequeues and calls to `strategy.run`. + Note that if `use_tf_function=True`, all the code inside `eval_step` should + be compatible with `tf.function` tracing (and in particular, any state + modifications involving `self` should be avoided). In some cases, non- + `tf.function` compatible code can be moved to `eval_loop_begin`, + `eval_reduce`, or `eval_loop_end`, which always execute eagerly. + Args: - iterator: A tf.nest-compatible structure of tf.data Iterator or - DistributedIterator. + iterator: A `tf.nest`-compatible structure of `tf.data.Iterator` or + `DistributedIterator`. Returns: An output which is passed as `step_outputs` argument into `eval_reduce` @@ -212,50 +382,62 @@ class StandardEvaluator(runner.AbstractEvaluator): """ pass - def eval_end(self, *args) -> Optional[Dict[Text, tf.Tensor]]: + def eval_end(self, *args) -> Optional[runner.Output]: """Called at the end of the evaluation. - This is a good place to get metric results. The value returned from this - function will be returned as-is from the evaluate() method. + Called once at the end of evaluation. + + This method is always called in eager mode, and is a good place to get + metric results. The value returned from this function will be returned as-is + from the `evaluate` method implementation provided by `StandardEvaluator`. Args: - *args: the outputs from `eval_reduce` for the last eval step. + *args: The outputs from `eval_reduce` for the last eval step, if they are + non-`None` (if they are `None`, nothing is passed). Returns: The function may return a dictionary of `Tensors`, which will be - written to logs and as TensorBoard summaries. + written to logs and as TensorBoard summaries. It can also be a + nested dictionary, yielding a hierarchy of summary directories. """ pass - def eval_reduce(self, state=None, step_outputs=None) -> Any: - """A function to do the reduction on the evaluation outputs per step. + def eval_reduce(self, + state: Any = None, + step_outputs: Optional[runner.Output] = None) -> Any: + """A function to perform per-step reduction on the evaluation outputs. - This is useful for passing states throughout evaluation. E.g. it can be used - to maintain the output losses from all the evaluation steps, and compute the - mean loss in `eval_end` function. + This is useful for passing state throughout evaluation, especially in cases + where maintaining or accumulating state is hard to accomplish using + `tf.metrics.Metric` or other `tf.Variable`-based approaches. For instance, + it can be used to easily accumulate all per-example losses from the full + evaluation for subsequent processing in `eval_end()`. Args: - state: A maintained state throughout the evaluation. + state: A state being mainted throughout the evaluation. step_outputs: Outputs from the current evaluation step. Returns: - An output which is passed as `state` argument into `eval_reduce` function - for the next step. After evaluation is finished, the output from last step - will be passed into `eval_end` function. + An output which is passed as the `state` argument to this function for the + next step. After evaluation is finished, the output from last step will be + passed to `eval_end`. """ pass @property def eval_dataset(self): - """Returns the train_datase instance.""" + """The current evaluation dataset.""" return self._eval_dataset @eval_dataset.setter def eval_dataset(self, eval_dataset): - """Set a new eval dataset and replace with the existing one. + """Sets a new eval dataset, replacing the current one. + + Any unprocessed examples in the current dataset are discarded. Args: - eval_dataset: A tf.nest-compatible structure of tf.data.Dataset or - DistributedDataset. + eval_dataset: A `tf.nest`-compatible structure of `tf.data.Dataset` or + `DistributedDataset`. """ self._eval_dataset = eval_dataset + self._eval_iter = None diff --git a/orbit/standard_runner_test.py b/orbit/standard_runner_test.py index 5854a4f963d046f667f273d4fc618dc3a315fc52..ef1335e79c1a85143e9c0cdc35a6271288590f0d 100644 --- a/orbit/standard_runner_test.py +++ b/orbit/standard_runner_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Orbit Authors. All Rights Reserved. +# Copyright 2021 The Orbit Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,11 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== + """Tests for orbit.standard_runner.""" -# pylint: disable=g-bad-import-order + +from absl.testing import parameterized from orbit import standard_runner +from orbit import utils import tensorflow as tf @@ -33,63 +35,118 @@ def dataset_fn(input_context=None): return dataset -class TestRunner(standard_runner.StandardTrainer, - standard_runner.StandardEvaluator): - """Implements the training and evaluation APIs for tests.""" +class TestTrainer(standard_runner.StandardTrainer): + """A StandardTrainer subclass for tests.""" - def __init__(self): + def __init__(self, options=None): self.strategy = tf.distribute.get_strategy() - self.global_step = tf.Variable( - 0, - trainable=False, - dtype=tf.int64, - name='global_step', - aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) - standard_runner.StandardTrainer.__init__(self, train_dataset=None) - standard_runner.StandardEvaluator.__init__(self, eval_dataset=None) + self.global_step = utils.create_global_step() + dataset = self.strategy.distribute_datasets_from_function(dataset_fn) + super().__init__(train_dataset=dataset, options=options) def train_loop_begin(self): - self.train_dataset = ( - self.strategy.experimental_distribute_datasets_from_function(dataset_fn) - ) + self.global_step.assign(0) def train_step(self, iterator): - def _replicated_step(_): + def replica_step(_): self.global_step.assign_add(1) - self.strategy.run(_replicated_step, args=(next(iterator),)) + self.strategy.run(replica_step, args=(next(iterator),)) def train_loop_end(self): return self.global_step.numpy() + +class TestEvaluator(standard_runner.StandardEvaluator): + """A StandardEvaluator subclass for tests.""" + + def __init__(self, options=None): + self.strategy = tf.distribute.get_strategy() + self.global_step = utils.create_global_step() + dataset = self.strategy.distribute_datasets_from_function(dataset_fn) + super().__init__(eval_dataset=dataset, options=options) + def eval_begin(self): - self.eval_dataset = self.strategy.experimental_distribute_datasets_from_function( - dataset_fn) + self.global_step.assign(0) def eval_step(self, iterator): - def _replicated_step(_): + def replica_step(_): self.global_step.assign_add(1) - self.strategy.run(_replicated_step, args=(next(iterator),)) + self.strategy.run(replica_step, args=(next(iterator),)) def eval_end(self): return self.global_step.numpy() -class StandardRunnerTest(tf.test.TestCase): +class TestEvaluatorWithOutputsAggregation(standard_runner.StandardEvaluator): + """A StandardEvaluator subclass for tests.""" + + def __init__(self, options=None): + self.strategy = tf.distribute.get_strategy() + dataset = self.strategy.distribute_datasets_from_function( + lambda _: tf.data.Dataset.range(10)) + super().__init__(eval_dataset=dataset, options=options) + + def eval_begin(self): + return tf.constant((0.0,)) + + def eval_reduce(self, state, step_outputs): + state = tf.concat([state, step_outputs], 0) + return state + + def eval_step(self, iterator): + + def replica_step(x): + x = tf.cast(x, tf.float32) + return tf.reduce_sum(x) + + return self.strategy.experimental_local_results( + self.strategy.run(replica_step, args=(next(iterator),))) + + def eval_end(self, outputs): + return tf.reduce_sum(outputs) + + +class StandardRunnerTest(parameterized.TestCase): + + def test_default_trainer(self): + trainer = TestTrainer() + self.assertEqual(trainer.train(tf.constant(10)), 10) + + def test_trainer_with_tpu_summary_optimization(self): + options = standard_runner.StandardTrainerOptions( + use_tpu_summary_optimization=True) + trainer = TestTrainer(options) + self.assertEqual(trainer.train(tf.constant(10)), 10) + + @parameterized.named_parameters(("use_tf_while_loop", True), ("", False)) + def test_default_evaluator(self, use_tf_while_loop): + options = standard_runner.StandardEvaluatorOptions( + use_tf_while_loop=use_tf_while_loop) + evaluator = TestEvaluator(options) + self.assertEqual(evaluator.evaluate(tf.constant(10)), 10) - def test_train(self): - test_runner = TestRunner() - self.assertEqual( - test_runner.train(tf.convert_to_tensor(10, dtype=tf.int32)), 10) + @parameterized.named_parameters(("use_tf_while_loop", True), ("", False)) + def test_evaluator_with_outputs_aggregation(self, use_tf_while_loop): + options = standard_runner.StandardEvaluatorOptions( + use_tf_while_loop=use_tf_while_loop) + evaluator = TestEvaluatorWithOutputsAggregation(options) + self.assertEqual(evaluator.evaluate(tf.constant(10)), 45) - def test_eval(self): - test_runner = TestRunner() - self.assertEqual( - test_runner.evaluate(tf.convert_to_tensor(10, dtype=tf.int32)), 10) + @parameterized.named_parameters( + ("recreate_iterator_for_each_eval", True, 10, 10), + ("not_recreate_iterator_for_each_eval", False, 10, 35)) + def test_evaluator_with_repeat_dataset(self, recreate_iterator_for_each_eval, + sum_for_1st_time, sum_for_2nd_time): + options = standard_runner.StandardEvaluatorOptions( + recreate_iterator_for_each_eval=recreate_iterator_for_each_eval) + evaluator = TestEvaluatorWithOutputsAggregation(options) + self.assertEqual(evaluator.evaluate(tf.constant(5)), sum_for_1st_time) + self.assertEqual(evaluator.evaluate(tf.constant(5)), sum_for_2nd_time) -if __name__ == '__main__': +if __name__ == "__main__": tf.test.main() diff --git a/orbit/utils.py b/orbit/utils.py deleted file mode 100644 index e63de82a943d49fc4ca78231a4d45c4b7380739d..0000000000000000000000000000000000000000 --- a/orbit/utils.py +++ /dev/null @@ -1,394 +0,0 @@ -# Copyright 2020 The Orbit Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Some layered modules/functions to help users writing custom training loop.""" - -from __future__ import absolute_import -from __future__ import division -# from __future__ import google_type_annotations -from __future__ import print_function - -import abc -import contextlib -import functools -import inspect - -import numpy as np -import six -import tensorflow as tf - - -def create_loop_fn(step_fn): - """Creates a multiple steps function driven by the python while loop. - - Args: - step_fn: A function which takes `iterator` as input. - - Returns: - A callable defined as the `loop_fn` defination below. - """ - - def loop_fn(iterator, num_steps, state=None, reduce_fn=None): - """A loop function with multiple steps. - - Args: - iterator: A nested structure of tf.data `Iterator` or - `DistributedIterator`. - num_steps: The number of steps in the loop. If `num_steps==-1`, will - iterate until exausting the iterator. - state: An optional initial state before running the loop. - reduce_fn: a callable defined as `def reduce_fn(state, value)`, where - `value` is the outputs from `step_fn`. - - Returns: - The updated state. - """ - try: - step = 0 - # To make sure the OutOfRangeError exception can be handled well with - # async remote eager, we need to wrap the loop body in a `async_scope`. - with tf.experimental.async_scope(): - while (num_steps == -1 or step < num_steps): - outputs = step_fn(iterator) - if reduce_fn is not None: - state = reduce_fn(state, outputs) - step += 1 - return state - except (StopIteration, tf.errors.OutOfRangeError): - tf.experimental.async_clear_error() - return state - - return loop_fn - - -def create_tf_while_loop_fn(step_fn): - """Create a multiple steps function driven by tf.while_loop on the host. - - Args: - step_fn: A function which takes `iterator` as input. - - Returns: - A callable defined as the `loop_fn` defination below. - """ - - def loop_fn(iterator, num_steps): - """A loop function with multiple steps. - - Args: - iterator: A nested structure of tf.data `Iterator` or - `DistributedIterator`. - num_steps: The number of steps in the loop. Must be a tf.Tensor. - """ - if not isinstance(num_steps, tf.Tensor): - raise ValueError("`num_steps` should be an `tf.Tensor`. Python object " - "may cause retracing.") - - for _ in tf.range(num_steps): - step_fn(iterator) - - return loop_fn - - -def make_distributed_dataset(strategy, dataset_or_fn, *args, **kwargs): - """A helper function to create distributed dataset. - - Args: - strategy: An instance of `tf.distribute.Strategy`. - dataset_or_fn: A instance of `tf.data.Dataset` or a function which takes an - `tf.distribute.InputContext` as input and returns a `tf.data.Dataset`. If - it is a function, it could optionally have an argument named - `input_context` which is `tf.distribute.InputContext` argument type. - *args: The list of arguments to be passed to dataset_or_fn. - **kwargs: Any keyword arguments to be passed. - - Returns: - A distributed Dataset. - """ - if strategy is None: - strategy = tf.distribute.get_strategy() - - if isinstance(dataset_or_fn, tf.data.Dataset): - return strategy.experimental_distribute_dataset(dataset_or_fn) - - if not callable(dataset_or_fn): - raise ValueError("`dataset_or_fn` should be either callable or an instance " - "of `tf.data.Dataset`") - - def dataset_fn(ctx): - """Wrapped dataset function for creating distributed dataset..""" - - # If `dataset_or_fn` is a function and has `input_context` as argument - # names, pass `ctx` as the value of `input_context` when calling - # `dataset_or_fn`. Otherwise `ctx` will not be used when calling - # `dataset_or_fn`. - if six.PY3: - argspec = inspect.getfullargspec(dataset_or_fn) - else: - argspec = inspect.getargspec(dataset_or_fn) # pylint: disable=deprecated-method - args_names = argspec.args - - if "input_context" in args_names: - kwargs["input_context"] = ctx - ds = dataset_or_fn(*args, **kwargs) - return ds - - return strategy.experimental_distribute_datasets_from_function(dataset_fn) - - -class SummaryManager(object): - """A class manages writing summaries.""" - - def __init__(self, summary_dir, summary_fn, global_step=None): - """Construct a summary manager object. - - Args: - summary_dir: the directory to write summaries. - summary_fn: A callable defined as `def summary_fn(name, tensor, - step=None)`, which describes the summary operation. - global_step: A `tf.Variable` instance for the global step. - """ - self._enabled = (summary_dir is not None) - self._summary_dir = summary_dir - self._summary_fn = summary_fn - self._summary_writer = None - - if global_step is None: - self._global_step = tf.summary.experimental.get_step() - else: - self._global_step = global_step - - @property - def summary_writer(self): - """Returns the underlying summary writer.""" - if self._summary_writer is not None: - return self._summary_writer - if self._enabled: - self._summary_writer = tf.summary.create_file_writer(self._summary_dir) - else: - self._summary_writer = tf.summary.create_noop_writer() - return self._summary_writer - - def flush(self): - """Flush the underlying summary writer.""" - if self._enabled: - tf.summary.flush(self.summary_writer) - - def write_summaries(self, items): - """Write a bulk of summaries. - - Args: - items: a dictionary of `Tensors` for writing summaries. - """ - # TODO(rxsang): Support writing summaries with nested structure, so users - # can split the summaries into different directories for nicer visualization - # in Tensorboard, like train and eval metrics. - if not self._enabled: - return - - with self.summary_writer.as_default(): - for name, tensor in items.items(): - self._summary_fn(name, tensor, step=self._global_step) - - -@six.add_metaclass(abc.ABCMeta) -class Trigger(object): - """An abstract class representing a "trigger" for some event.""" - - @abc.abstractmethod - def __call__(self, value: float, force_trigger=False): - """Maybe trigger the event based on the given value. - - Args: - value: the value for triggering. - force_trigger: Whether the trigger is forced triggered. - - Returns: - `True` if the trigger is triggered on the given `value`, and - `False` otherwise. - """ - - @abc.abstractmethod - def reset(self): - """Reset states in the trigger.""" - - -class IntervalTrigger(Trigger): - """Triggers on every fixed interval.""" - - def __init__(self, interval, start=0): - """Constructs the IntervalTrigger. - - Args: - interval: The triggering interval. - start: An initial value for the trigger. - """ - self._interval = interval - self._last_trigger_value = start - - def __call__(self, value, force_trigger=False): - """Maybe trigger the event based on the given value. - - Args: - value: the value for triggering. - force_trigger: If True, the trigger will be forced triggered unless the - last trigger value is equal to `value`. - - Returns: - `True` if the trigger is triggered on the given `value`, and - `False` otherwise. - """ - if force_trigger and value != self._last_trigger_value: - self._last_trigger_value = value - return True - - if self._interval and self._interval > 0: - if value >= self._last_trigger_value + self._interval: - self._last_trigger_value = value - return True - return False - - def reset(self): - """See base class.""" - self._last_trigger_value = 0 - - -class EpochHelper(object): - """A Helper class to handle epochs in Customized Training Loop.""" - - def __init__(self, epoch_steps, global_step): - """Constructs the EpochHelper. - - Args: - epoch_steps: An integer indicates how many steps in an epoch. - global_step: A `tf.Variable` instance indicates the current global step. - """ - self._epoch_steps = epoch_steps - self._global_step = global_step - self._current_epoch = None - self._epoch_start_step = None - self._in_epoch = False - - def epoch_begin(self): - """Returns whether a new epoch should begin.""" - if self._in_epoch: - return False - current_step = self._global_step.numpy() - self._epoch_start_step = current_step - self._current_epoch = current_step // self._epoch_steps - self._in_epoch = True - return True - - def epoch_end(self): - """Returns whether the current epoch should end.""" - if not self._in_epoch: - raise ValueError("`epoch_end` can only be called inside an epoch") - current_step = self._global_step.numpy() - epoch = current_step // self._epoch_steps - - if epoch > self._current_epoch: - self._in_epoch = False - return True - return False - - @property - def batch_index(self): - """Index of the next batch within the current epoch.""" - return self._global_step.numpy() - self._epoch_start_step - - @property - def current_epoch(self): - return self._current_epoch - - -@contextlib.contextmanager -def _soft_device_placement(): - """Context manager for soft device placement, allowing summaries on CPU.""" - original_setting = tf.config.get_soft_device_placement() - try: - tf.config.set_soft_device_placement(True) - yield - finally: - tf.config.set_soft_device_placement(original_setting) - - -def train_function_with_summaries(*args, **kwargs): - """Utility function to support TPU summaries via multiple `tf.function`s. - - This permits interleaving summaries inside TPU-compatible code, but without - any performance impact on steps that do not write summaries. - - Usage is as a decorator, similar to `tf.function`, and any `tf.function` - arguments will be passed through if supplied: - - @trainer.train_function_with_summaries - def train(self, num_steps): - ... - - The decorated function is assumed to be a loop method accepting a `num_steps` - parameter, as for instance would be called within the `Controller`'s outer - train loop. The implementation here assumes that `summary_frequency` is - divisible by `steps_per_loop`. The decorated method should accept two - arguments, `self` and `num_steps`. - - Two `tf.function` versions of `train_fn` are created: one inside a summary - writer scope with soft device placement enabled (used on steps that require - summary writing), and one with no summary writer present and soft device - placement disabled (used on all other steps). - - Args: - *args: Arguments to pass through to `tf.function`. - **kwargs: Keyword arguments to pass through to `tf.function`. - - Returns: - If the first argument is a callable, returns the decorated callable. - Otherwise, returns a decorator. - """ - - def decorator(train_fn): - # TODO(dhr): Validate the signature of train_fn? - - train_fn_with_summaries = tf.function(train_fn, *args, **kwargs) - train_fn_without_summaries = tf.function(train_fn, *args, **kwargs) - - @functools.wraps(train_fn) - def wrapper(self, num_steps): - if tf.summary.should_record_summaries(): - with _soft_device_placement(): - output = train_fn_with_summaries(self, tf.constant(1)) - num_steps -= 1 - if num_steps >= 1: - with tf.summary.record_if(False): - output = train_fn_without_summaries(self, num_steps) - return output - - return wrapper - - if args and callable(args[0]): - train_fn, args = args[0], args[1:] - return decorator(train_fn) - return decorator - - -def get_value(x) -> np.ndarray: - """Returns the value of a variable/tensor. - - Args: - x: input variable. - - Returns: - A Numpy array. - """ - if not tf.is_tensor(x): - return x - return x.numpy() diff --git a/orbit/utils/__init__.py b/orbit/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3eeb67c4a284d238e260e9587c4cfda1aab13a9a --- /dev/null +++ b/orbit/utils/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines exported symbols for the `orbit.utils` package.""" + +from orbit.utils.common import create_global_step +from orbit.utils.common import get_value +from orbit.utils.common import make_distributed_dataset + +from orbit.utils.epoch_helper import EpochHelper + +from orbit.utils.loop_fns import create_loop_fn +from orbit.utils.loop_fns import create_tf_while_loop_fn +from orbit.utils.loop_fns import LoopFnWithSummaries + +from orbit.utils.summary_manager import SummaryManager + +from orbit.utils.tpu_summaries import OptionalSummariesFunction diff --git a/orbit/utils/common.py b/orbit/utils/common.py new file mode 100644 index 0000000000000000000000000000000000000000..63ee020afe2e0ae7c923d2211cf1750b141f54ef --- /dev/null +++ b/orbit/utils/common.py @@ -0,0 +1,100 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Some layered modules/functions to help users writing custom training loop.""" + +import inspect + +import tensorflow as tf + + +def create_global_step() -> tf.Variable: + """Creates a `tf.Variable` suitable for use as a global step counter. + + Creating and managing a global step variable may be necessary for + `AbstractTrainer` subclasses that perform multiple parameter updates per + `Controller` "step", or use different optimizers on different steps. + + In these cases, an `optimizer.iterations` property generally can't be used + directly, since it would correspond to parameter updates instead of iterations + in the `Controller`'s training loop. Such use cases should simply call + `step.assign_add(1)` at the end of each step. + + Returns: + A non-trainable scalar `tf.Variable` of dtype `tf.int64`, with only the + first replica's value retained when synchronizing across replicas in + a distributed setting. + """ + return tf.Variable( + 0, + dtype=tf.int64, + name="global_step", + trainable=False, + aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) + + +def make_distributed_dataset(strategy, dataset_or_fn, *args, **kwargs): + """A utility function to help create a `tf.distribute.DistributedDataset`. + + Args: + strategy: An instance of `tf.distribute.Strategy`. + dataset_or_fn: A instance of `tf.data.Dataset`, or a "dataset function" + returning a `tf.data.Dataset`. If it is a function, it may optionally have + an argument named `input_context` which will be passed a + `tf.distribute.InputContext` instance. + *args: Any positional arguments to pass through to `dataset_or_fn`. + **kwargs: Any keyword arguments to pass through to `dataset_or_fn`. + + Returns: + A distributed Dataset. + """ + if strategy is None: + strategy = tf.distribute.get_strategy() + + if isinstance(dataset_or_fn, tf.data.Dataset): + return strategy.experimental_distribute_dataset(dataset_or_fn) + + if not callable(dataset_or_fn): + raise ValueError("`dataset_or_fn` should be either callable or an instance " + "of `tf.data.Dataset`.") + + def dataset_fn(input_context): + """Wraps `dataset_or_fn` for strategy.distribute_datasets_from_function.""" + + # If `dataset_or_fn` is a function and has an argument named + # `input_context`, pass through the given `input_context`. Otherwise + # `input_context` will be ignored. + argspec = inspect.getfullargspec(dataset_or_fn) + arg_names = argspec.args + + if "input_context" in arg_names: + kwargs["input_context"] = input_context + return dataset_or_fn(*args, **kwargs) + + return strategy.distribute_datasets_from_function(dataset_fn) + + +def get_value(x): + """Returns input values, converting any TensorFlow values to NumPy values. + + Args: + x: The input. May be a `tf.Tensor` or `tf.Variable`. + + Returns: + If the input is a TensorFlow `Tensor`, returns the `Tensor`'s equivalent + NumPy value. Otherwise, just returns the input. + """ + if not tf.is_tensor(x): + return x + return x.numpy() diff --git a/orbit/utils/common_test.py b/orbit/utils/common_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1a68e7c66b20b0814d2618de190128a0dcaa0387 --- /dev/null +++ b/orbit/utils/common_test.py @@ -0,0 +1,34 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for orbit.utils.common.""" + +from orbit.utils import common + +import tensorflow as tf + + +class UtilsTest(tf.test.TestCase): + + def test_create_global_step(self): + step = common.create_global_step() + self.assertEqual(step.name, "global_step:0") + self.assertEqual(step.dtype, tf.int64) + self.assertEqual(step, 0) + step.assign_add(1) + self.assertEqual(step, 1) + + +if __name__ == "__main__": + tf.test.main() diff --git a/orbit/utils/epoch_helper.py b/orbit/utils/epoch_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..10c11324ae8371b290c9973ae008902cd76fb9eb --- /dev/null +++ b/orbit/utils/epoch_helper.py @@ -0,0 +1,65 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides a utility class for training in epochs.""" + +import tensorflow as tf + + +class EpochHelper: + """A helper class handle bookkeeping of epochs in custom training loops.""" + + def __init__(self, epoch_steps: int, global_step: tf.Variable): + """Initializes the `EpochHelper` instance. + + Args: + epoch_steps: An integer indicating how many steps are in an epoch. + global_step: A `tf.Variable` providing the current global step. + """ + self._epoch_steps = epoch_steps + self._global_step = global_step + self._current_epoch = None + self._epoch_start_step = None + self._in_epoch = False + + def epoch_begin(self): + """Returns whether a new epoch should begin.""" + if self._in_epoch: + return False + current_step = self._global_step.numpy() + self._epoch_start_step = current_step + self._current_epoch = current_step // self._epoch_steps + self._in_epoch = True + return True + + def epoch_end(self): + """Returns whether the current epoch should end.""" + if not self._in_epoch: + raise ValueError("`epoch_end` can only be called inside an epoch.") + current_step = self._global_step.numpy() + epoch = current_step // self._epoch_steps + + if epoch > self._current_epoch: + self._in_epoch = False + return True + return False + + @property + def batch_index(self): + """Index of the next batch within the current epoch.""" + return self._global_step.numpy() - self._epoch_start_step + + @property + def current_epoch(self): + return self._current_epoch diff --git a/orbit/utils/loop_fns.py b/orbit/utils/loop_fns.py new file mode 100644 index 0000000000000000000000000000000000000000..6e326246942261941ecd5ec8d830b050fa00aed1 --- /dev/null +++ b/orbit/utils/loop_fns.py @@ -0,0 +1,192 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for creating loop functions.""" + +from orbit.utils import tpu_summaries + +import tensorflow as tf + + +def create_loop_fn(step_fn): + """Creates a loop function driven by a Python `while` loop. + + Args: + step_fn: A function taking a nested structure of `tf.data.Iterator` or + `DistributedIterator`. There are no constraints on the return value of the + function (except that it must be compatible with any `reduce_fn` provided + to the returned `loop_fn`). + + Returns: + A loop function taking required `iterator` and `num_steps` parameters, as + well as optional `state` and `reduce_fn` parameters for accumulating state + over multiple iterations of the loop. See the `loop_fn` definition below for + additional details. + """ + + def loop_fn(iterator, num_steps, state=None, reduce_fn=None): + """Makes `num_steps` calls to `step_fn(iterator)`. + + Additionally, state may be accumulated across iterations of the loop. + Conceptually, state accumulation is handled roughly as follows: + + for _ in range(num_steps): + step_outputs = step_fn(iterator) + state = reduce_fn(state, step_outputs) + return state + + However, the implementation is slightly more complicated in order to support + looping until the iterator is exhausted (when `num_steps == -1`) and to + properly catch exceptions when running under async remote eager (as is the + case in TPU training setups involving separate coordinator/worker machines). + + Args: + iterator: A nested structure of `tf.data.Iterator` or + `DistributedIterator`. + num_steps: The number of steps in the loop. If `num_steps == -1`, will + iterate until exausting the iterator. + state: An optional initial state before running the loop. + reduce_fn: A callable taking two inputs, `state` and `value`, where + `state` is the previous output from `reduce_fn`, and `value` is the + output from `step_fn`. + + Returns: + The final state returned by `reduce_fn`, or `None` if `state` and + `reduce_fn` are not provided. + """ + try: + step = 0 + # To make sure the OutOfRangeError exception can be handled well under + # async remote eager, we need to wrap the loop body in `async_scope`. + with tf.experimental.async_scope(): + while num_steps == -1 or step < num_steps: + outputs = step_fn(iterator) + if reduce_fn is not None: + state = reduce_fn(state, outputs) + step += 1 + return state + except (StopIteration, tf.errors.OutOfRangeError): + tf.experimental.async_clear_error() + return state + + return loop_fn + + +def create_tf_while_loop_fn(step_fn): + """Creates a loop function compatible with TF's AutoGraph loop conversion. + + Args: + step_fn: A function taking a nested structure of `tf.data.Iterator` or + `DistributedIterator`. Currently, any return values are ignored. + + Returns: + A loop function taking required `iterator` and `num_steps` parameters. If + called inside a `tf.function`, the loop will be converted by AutoGraph into + a `tf.while_loop` construct. See the `loop_fn` definition below for + additional details. + """ + + def loop_fn(iterator, num_steps): + """Makes `num_steps` calls to `step_fn(iterator)`. + + Args: + iterator: A nested structure of `tf.data.Iterator` or + `DistributedIterator`. + num_steps: The number of steps in the loop. Should be passed as a + `tf.Tensor`. Iterating until iterator exhaustion is not supported. + """ + if not isinstance(num_steps, tf.Tensor): + raise ValueError( + "`num_steps` should be a `tf.Tensor`. Passing a Python value can " + "cause unnecessary retracing when wrapped by `tf.function`.") + + for _ in tf.range(num_steps): + # Clear out the outer name scope so the ops created inside `tf.while_loop` + # don't get "while/" as name prefix. + with tf.name_scope(""): + step_fn(iterator) + + return loop_fn + + +def create_tf_while_loop_fn_with_state(step_fn): + """Creates a TF while loop function with state. + + This function is similar to `create_tf_while_loop_fn`, but allowing a `state` + to be accumulated over multiple iterations of the loop. Note that the + structure of the `state` cannot be changed across iterations. + + Args: + step_fn: A function taking a nested structure of `tf.data.Iterator` or + `DistributedIterator`. Currently, any return values are ignored. + + Returns: + A loop function taking required `iterator`, `num_steps`, `state` and + `reduce_fn` parameters. If called inside a `tf.function`, the loop will be + converted by AutoGraph into a `tf.while_loop` construct. See the `loop_fn` + definition below for additional details. + """ + + def loop_fn_with_state(iterator, num_steps, state, reduce_fn): + """Makes `num_steps` calls to `step_fn(iterator)`. + + Args: + iterator: A nested structure of `tf.data.Iterator` or + `DistributedIterator`. + num_steps: The number of steps in the loop. Should be passed as a + `tf.Tensor`. Iterating until iterator exhaustion is not supported. + state: An initial state before running the loop. + reduce_fn: A callable taking two inputs, `state` and `value`, where + `state` is the previous output from `reduce_fn`, and `value` is the + output from `step_fn`. + + Returns: + The final state returned by `reduce_fn`. + """ + if not isinstance(num_steps, tf.Tensor): + raise ValueError( + "`num_steps` should be a `tf.Tensor`. Passing a Python value can " + "cause unnecessary retracing when wrapped by `tf.function`.") + + for _ in tf.range(num_steps): + # Clear out the outer name scope so the ops created inside `tf.while_loop` + # don't get "while/" as name prefix. + with tf.name_scope(""): + # Relax the shapes within the loop, so the shape of `state` can change + # across iterations. This is useful to aggregate outputs from each step + # and concat to `state`. + tf.autograph.experimental.set_loop_options( + shape_invariants=[(t, tf.TensorShape([None] * t.shape.rank)) + for t in tf.nest.flatten(state) + if tf.is_tensor(t)]) + outputs = step_fn(iterator) + state = reduce_fn(state, outputs) + return state + + return loop_fn_with_state + + +class LoopFnWithSummaries(tpu_summaries.OptionalSummariesFunction): + """Implements a two-program approach for optimizing summaries on TPU. + + This version works with the result of `create_tf_while_loop_fn`. + """ + + def __call__(self, iterator, num_steps): + if tf.summary.should_record_summaries(): + output = self.with_summaries(iterator, tf.constant(1)) + num_steps -= 1 + if num_steps >= 1: + output = self.without_summaries(iterator, num_steps) + return output diff --git a/orbit/utils/summary_manager.py b/orbit/utils/summary_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..63a44940f533a33f39d78edaf936dcd1c8354648 --- /dev/null +++ b/orbit/utils/summary_manager.py @@ -0,0 +1,110 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Provides a utility class for managing summary writing.""" + +import os + +import tensorflow as tf + + +class SummaryManager: + """A utility class for managing summary writing.""" + + def __init__(self, summary_dir, summary_fn, global_step=None): + """Initializes the `SummaryManager` instance. + + Args: + summary_dir: The directory in which to write summaries. If `None`, all + summary writing operations provided by this class are no-ops. + summary_fn: A callable defined accepting `name`, `value`, and `step` + parameters, making calls to `tf.summary` functions to write summaries. + global_step: A `tf.Variable` containing the global step value. + """ + self._enabled = summary_dir is not None + self._summary_dir = summary_dir + self._summary_fn = summary_fn + self._summary_writers = {} + + if global_step is None: + self._global_step = tf.summary.experimental.get_step() + else: + self._global_step = global_step + + def summary_writer(self, relative_path=""): + """Returns the underlying summary writer for a specific subdirectory. + + Args: + relative_path: The current path in which to write summaries, relative to + the summary directory. By default it is empty, which corresponds to the + root directory. + """ + if self._summary_writers and relative_path in self._summary_writers: + return self._summary_writers[relative_path] + if self._enabled: + self._summary_writers[relative_path] = tf.summary.create_file_writer( + os.path.join(self._summary_dir, relative_path)) + else: + self._summary_writers[relative_path] = tf.summary.create_noop_writer() + return self._summary_writers[relative_path] + + def flush(self): + """Flushes the underlying summary writers.""" + if self._enabled: + tf.nest.map_structure(tf.summary.flush, self._summary_writers) + + def write_summaries(self, summary_dict): + """Writes summaries for the given dictionary of values. + + This recursively creates subdirectories for any nested dictionaries + provided in `summary_dict`, yielding a hierarchy of directories which will + then be reflected in the TensorBoard UI as different colored curves. + + For example, users may evaluate on multiple datasets and return + `summary_dict` as a nested dictionary: + + { + "dataset1": { + "loss": loss1, + "accuracy": accuracy1 + }, + "dataset2": { + "loss": loss2, + "accuracy": accuracy2 + }, + } + + This will create two subdirectories, "dataset1" and "dataset2", inside the + summary root directory. Each directory will contain event files including + both "loss" and "accuracy" summaries. + + Args: + summary_dict: A dictionary of values. If any value in `summary_dict` is + itself a dictionary, then the function will create a subdirectory with + name given by the corresponding key. This is performed recursively. Leaf + values are then summarized using the summary writer instance specific to + the parent relative path. + """ + if not self._enabled: + return + self._write_summaries(summary_dict) + + def _write_summaries(self, summary_dict, relative_path=""): + for name, value in summary_dict.items(): + if isinstance(value, dict): + self._write_summaries( + value, relative_path=os.path.join(relative_path, name)) + else: + with self.summary_writer(relative_path).as_default(): + self._summary_fn(name, value, step=self._global_step) diff --git a/orbit/utils/tpu_summaries.py b/orbit/utils/tpu_summaries.py new file mode 100644 index 0000000000000000000000000000000000000000..3501c7aa8041f977082e9d224d4105810b78f64c --- /dev/null +++ b/orbit/utils/tpu_summaries.py @@ -0,0 +1,145 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains utilities for TPU summary optimization.""" + +import contextlib +import functools + +import tensorflow as tf + + +@contextlib.contextmanager +def _soft_device_placement(): + """Context manager for soft device placement, allowing summaries on CPU.""" + original_setting = tf.config.get_soft_device_placement() + try: + tf.config.set_soft_device_placement(True) + yield + finally: + tf.config.set_soft_device_placement(original_setting) + + +class OptionalSummariesFunction: + """Wrapper that provides versions of a function with and without summaries. + + This is a utility class for implementing optimized summary recording via a + two-function approach, specifically important for TPUs. Two `tf.function` + versions of a given `function` are created: one with soft device placement + enabled (for use on steps that require summary writing), and one with summary + writing and soft device placement entirely disabled (for use on all other + steps). This removes any performance impact of summaries on steps where they + aren't recorded (b/148418718). + + This class can be used as a base class to implement summary optimizations for + a function with a specific signature. For example, to implement efficient TPU + summaries for a standard `train()` method (as in `orbit.AbstractTrainer`): + + class TrainFunctionWithSummaries(orbit.utils.OptionalSummariesFunction): + '''Implements a two-program approach for summaries on TPU.''' + + def __call__(self, num_steps): + if tf.summary.should_record_summaries(): + output = self.with_summaries(tf.constant(1)) + num_steps -= 1 + if num_steps >= 1: + output = self.without_summaries(num_steps) + return output + + This can be used directly or to implement a decorator: + + def train_function_with_summaries(function=None, **kwargs): + if function is not None: + return TrainFunctionWithSummaries(function, **kwargs) + return functools.partial(TrainFunctionWithSummaries, **kwargs) + + The decorator can be applied directly to `train()` methods: + + @train_function_with_summaries + def train(self, num_steps): + ... + + A similar approach approach can be implemented for functions with different + signatures. + + Note: The above approach assumes that the frequency of summary writing is + based on a step interval that is divisible by the number of steps executed + in each call to the `train()` function. This is enforced by the + `orbit.Controller`. + + This wrapper properly handles instance methods (see `__get__`). + + Attributes: + with_summaries: A wrapped version of the underlying function with summaries + enabled (using whatever the active predicate is for + `tf.summary.record_if`), and placed inside a "soft device placement" + context to enable summary recording on TPU. + without_summaries: A wrapped version of the underlying function with all + summary recording disabled. + """ + + def __init__(self, function, **tf_function_kwargs): + """Constructs an instance wrapping the given `function`. + + The given `function` is wrapped twice: Once in a "soft device placement" + context (allowing summaries to also run on TPU), and once with summary + recording entirely disabled. + + Both of these versions are compiled via `tf.function` (optionally using any + supplied `tf.function` settings), and made available as attributes. + + Args: + function: The underlying function to wrap. + **tf_function_kwargs: Additional arguments to pass to `tf.function`. + """ + + @tf.function(**tf_function_kwargs) + @functools.wraps(function) + def with_summaries(*args, **kwargs): + with _soft_device_placement(): + return function(*args, **kwargs) + + @tf.function(**tf_function_kwargs) + @functools.wraps(function) + def without_summaries(*args, **kwargs): + with tf.summary.record_if(False): + return function(*args, **kwargs) + + self.with_summaries = with_summaries + self.without_summaries = without_summaries + + def __get__(self, instance, owner): + """Allows this class to be used to wrap methods as well as free functions. + + For `tf.function` to work properly in all cases (e.g., when an + input_signature is specified), any `tf.function`-converted methods must be + properly bound to an instance if they are called as an instance method. + + This is done by implementing this `__get__` method of the descriptor + protocol, and forwarding to the `__get__` method on the underlying + `tf.function`s. + + Args: + instance: The instance to bind to. + owner: The class type of the instance. + + Returns: + A new bound instance of `TpuDiscretionarySummariesFunctions`. + """ + new = object.__new__(self.__class__) + # pytype: disable=attribute-error # See b/162476201. + new.with_summaries = self.with_summaries.__get__(instance, owner) + new.without_summaries = self.without_summaries.__get__(instance, owner) + # pytype: enable=attribute-error + return new diff --git a/orbit/utils/tpu_summaries_test.py b/orbit/utils/tpu_summaries_test.py new file mode 100644 index 0000000000000000000000000000000000000000..4aa0d0820fa8c501d7db339b568d56fd7dc1bf28 --- /dev/null +++ b/orbit/utils/tpu_summaries_test.py @@ -0,0 +1,120 @@ +# Copyright 2021 The Orbit Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for orbit.utils.tpu_summaries.""" + +import functools +import os + +from orbit.utils import common +from orbit.utils import tpu_summaries + +import tensorflow as tf + + +class TrainFunctionWithSummaries(tpu_summaries.OptionalSummariesFunction): + """Implements a two-program approach for summaries on TPU.""" + + def __call__(self, num_steps): + if tf.summary.should_record_summaries(): + output = self.with_summaries(tf.constant(1)) + num_steps -= 1 + if num_steps >= 1: + output = self.without_summaries(num_steps) + return output + + +def train_function_with_summaries(function=None, **kwargs): + if function is not None: + return TrainFunctionWithSummaries(function, **kwargs) + return functools.partial(TrainFunctionWithSummaries, **kwargs) + + +class DummyTrainer(tf.Module): + + def __init__(self): + self.step_counter = common.create_global_step() + + @train_function_with_summaries + def train_with_tpu_summary_optimization(self, num_steps): + for _ in tf.range(num_steps): + tf.summary.scalar("step", self.step_counter, step=self.step_counter) + self.step_counter.assign_add(1) + return self.step_counter + + @train_function_with_summaries( + input_signature=[tf.TensorSpec((), dtype=tf.int32)]) + def train_with_tpu_summary_optimization_and_input_signature(self, num_steps): + for _ in tf.range(num_steps): + tf.summary.scalar("step", self.step_counter, step=self.step_counter) + self.step_counter.assign_add(1) + return self.step_counter + + def train_with_tpu_summary_optimization_no_decorator(self, num_steps): + for _ in tf.range(num_steps): + tf.summary.scalar("step", self.step_counter, step=self.step_counter) + self.step_counter.assign_add(1) + return self.step_counter + + +class TpuSummariesTest(tf.test.TestCase): + + def setUp(self): + super().setUp() + self.trainer = DummyTrainer() + + def _get_events_from_logdir(self, logdir): + event_files = tf.io.gfile.listdir(logdir) + self.assertLen(event_files, 1) + path = os.path.join(logdir, event_files[0]) + events = list(tf.compat.v1.train.summary_iterator(path)) + return [event for event in events if event.WhichOneof("what") == "summary"] + + def _validate_tpu_summary_optimization(self, function, *args, **kwargs): + logdir = self.get_temp_dir() + with tf.summary.create_file_writer(logdir).as_default(): + with tf.summary.record_if(lambda: self.trainer.step_counter % 20 == 0): + for _ in range(4): + output = function(tf.constant(10), *args, **kwargs) + events = self._get_events_from_logdir(logdir) + self.assertLen(events, 2) + self.assertEqual(events[0].step, 0) + self.assertEqual(events[1].step, 20) + return output + + def test_train_with_tpu_summary_optimization(self): + output = self._validate_tpu_summary_optimization( + self.trainer.train_with_tpu_summary_optimization) + self.assertEqual(output, self.trainer.step_counter.numpy()) + + def test_train_with_tpu_summary_optimization_no_decorator(self): + optimized = train_function_with_summaries( + self.trainer.train_with_tpu_summary_optimization_no_decorator) + output = self._validate_tpu_summary_optimization(optimized) + self.assertEqual(output, self.trainer.step_counter.numpy()) + + def test_train_with_tpu_summary_optimization_and_input_signature(self): + output = self._validate_tpu_summary_optimization( + self.trainer.train_with_tpu_summary_optimization_and_input_signature) + self.assertEqual(output, self.trainer.step_counter.numpy()) + function = self.trainer.train_with_tpu_summary_optimization_and_input_signature + expected = (tf.TensorSpec((), dtype=tf.int32),) + input_signature = function.with_summaries.input_signature + self.assertEqual(input_signature, expected) + input_signature = function.without_summaries.input_signature + self.assertEqual(input_signature, expected) + + +if __name__ == "__main__": + tf.test.main() diff --git a/research/README.md b/research/README.md index f9e84fb86f44b687c6a9c221fa72cd461e84c01e..204955b3e082469533885a9b9860b134ee51f46f 100644 --- a/research/README.md +++ b/research/README.md @@ -7,14 +7,17 @@ This directory contains code implementations and pre-trained models of published The research models are maintained by their respective authors. ## Table of Contents -- [Modeling Libraries and Models](#modeling-libraries-and-models) -- [Models and Implementations](#models-and-implementations) - * [Computer Vision](#computer-vision) - * [Natural Language Processing](#natural-language-processing) - * [Audio and Speech](#audio-and-speech) - * [Reinforcement Learning](#reinforcement-learning) - * [Others](#others) -- [Archived Models and Implementations](#warning-archived-models-and-implementations) (:no_entry_sign: No longer maintained) +- [TensorFlow Research Models](#tensorflow-research-models) + - [Table of Contents](#table-of-contents) + - [Modeling Libraries and Models](#modeling-libraries-and-models) + - [Models and Implementations](#models-and-implementations) + - [Computer Vision](#computer-vision) + - [Natural Language Processing](#natural-language-processing) + - [Audio and Speech](#audio-and-speech) + - [Reinforcement Learning](#reinforcement-learning) + - [Others](#others) + - [Old Models and Implementations in TensorFlow 1](#old-models-and-implementations-in-tensorflow-1) + - [Contributions](#contributions) ## Modeling Libraries and Models @@ -49,6 +52,7 @@ The research models are maintained by their respective authors. | Directory | Paper(s) | Conference | Maintainer(s) | |-----------|----------|------------|---------------| | [audioset](audioset) | [1] [Audio Set: An ontology and human-labeled dataset for audio events](https://research.google/pubs/pub45857/)
[2] [CNN Architectures for Large-Scale Audio Classification](https://research.google/pubs/pub45611/) | ICASSP 2017 | plakal, dpwe | +| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | ICLR 2016 | yhliang2018 | ### Reinforcement Learning @@ -64,58 +68,9 @@ The research models are maintained by their respective authors. | [lfads](lfads) | [LFADS - Latent Factor Analysis via Dynamical Systems](https://arxiv.org/abs/1608.06315) | | jazcollins, sussillo | | [rebar](rebar) | [REBAR: Low-variance, unbiased gradient estimates for discrete latent variable models](https://arxiv.org/abs/1703.07370) | NIPS 2017 | gjtucker | ---- - -## :warning: Archived Models and Implementations - -The following research models are no longer maintained. +### Old Models and Implementations in TensorFlow 1 -**Note**: We will remove archived models from the master branch in June, 2020. -After removal, you will still be able to access archived models in the archive branch. - -| Directory | Paper(s) | Conference | Maintainer(s) | -|-----------|----------|------------|---------------| -| [adv_imagenet_models](adv_imagenet_models) | [1] [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)
[2] [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) | [1] ICLR 2017
[2] ICLR 2018 | alexeykurakin | -| [adversarial_crypto](adversarial_crypto) | [Learning to Protect Communications with Adversarial Neural Cryptography](https://arxiv.org/abs/1610.06918) | | dave-andersen | -| [adversarial_logit_pairing](adversarial_logit_pairing) | [Adversarial Logit Pairing](https://arxiv.org/abs/1803.06373) | | alexeykurakin | -| [autoencoder](autoencoder) | Various autoencoders | | snurkabill | -| [brain_coder](brain_coder) | [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526) | | danabo, mnorouzi | -| [cognitive_mapping_and_planning](cognitive_mapping_and_planning) | [Cognitive Mapping and Planning for Visual Navigation](https://arxiv.org/abs/1702.03920) | CVPR 2017 | s-gupta | -| [compression](compression) | [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) | CVPR 2017 | nmjohn | -| [deep_contextual_bandits](deep_contextual_bandits) | [Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks for Thompson Sampling](https://arxiv.org/abs/1802.09127) | ICLR 2018 | rikel | -| [deep_speech](deep_speech) | [Deep Speech 2](https://arxiv.org/abs/1512.02595) | ICLR 2016 | yhliang2018 | -| [domain_adaptation](domain_adaptation) | [1] [Domain Separation Networks](https://arxiv.org/abs/1608.06019)
[2] [Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks](https://arxiv.org/abs/1612.05424) | NIPS 2016 | bousmalis, dmrd | -| [feelvos](feelvos)| [FEELVOS](https://arxiv.org/abs/1902.09513) | CVPR 2019 | pvoigtlaender, yuningchai, aquariusjay | -| [fivo](fivo)| [Filtering variational objectives for training generative sequence models](https://arxiv.org/abs/1705.09279) | NIPS 2017 | dieterichlawson | -| [global_objectives](global_objectives) | [Scalable Learning of Non-Decomposable Objectives](https://arxiv.org/abs/1608.04802) | AISTATS 2017 | mackeya-google | -| [im2txt](im2txt) | [Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning Challenge](https://arxiv.org/abs/1609.06647) | TPAMI 2016 | cshallue | -| [inception](inception) | [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) | CVPR 2016 | shlens, vincentvanhoucke | -| [keypointnet](keypointnet) | [KeypointNet](https://arxiv.org/abs/1807.03146) | | mnorouzi | -| [learned_optimizer](learned_optimizer) | [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813) | ICML 2017 | olganw, nirum | -| [learning_to_remember_rare_events](learning_to_remember_rare_events) | [Learning to Remember Rare Events](https://arxiv.org/abs/1703.03129) | ICLR 2017| lukaszkaiser, ofirnachum | -| [learning_unsupervised_learning](learning_unsupervised_learning) | [Meta-Learning Update Rules for Unsupervised Representation Learning](https://arxiv.org/abs/1804.00222) | ICLR 2019 | lukemetz, nirum | -| [lexnet_nc](lexnet_nc) | [Olive Oil is Made of Olives, Baby Oil is Made for Babies: Interpreting Noun Compounds using Paraphrases in a Neural Model](https://arxiv.org/abs/1803.08073) | NAACL 2018 | vered1986, waterson | -| [lm_1b](lm_1b) | [Exploring the Limits of Language Modeling](https://arxiv.org/abs/1602.02410) | | oriolvinyals, panyx0718 | -| [lm_commonsense](lm_commonsense) | [A Simple Method for Commonsense Reasoning](https://arxiv.org/abs/1806.02847) | | thtrieu | -| [maskgan](maskgan)| [MaskGAN: Better Text Generation via Filling in the](https://arxiv.org/abs/1801.07736) | ICLR 2018 | liamb315, a-dai | -| [namignizer](namignizer)| Namignizer | | knathanieltucker | -| [neural_gpu](neural_gpu)| [Neural GPUs Learn Algorithms](https://arxiv.org/abs/1511.08228) | | lukaszkaiser | -| [neural_programmer](neural_programmer) | [Learning a Natural Language Interface with Neural Programmer](https://arxiv.org/abs/1611.08945) | ICLR 2017 | arvind2505 | -| [next_frame_prediction](next_frame_prediction) | [Visual Dynamics: Probabilistic Future Frame Synthesis via Cross Convolutional Networks](https://arxiv.org/abs/1607.02586) | NIPS 2016 | panyx0718 | -| [ptn](ptn) | [Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision](https://arxiv.org/abs/1612.00814) | NIPS 2016 | xcyan, arkanath, hellojas, honglaklee | -| [qa_kg](qa_kg) | [Learning to Reason: End-to-End Module Networks for Visual Question Answering](https://arxiv.org/abs/1704.05526) | ICCV 2017 | yuyuz | -| [real_nvp](real_nvp) | [Density estimation using Real NVP](https://arxiv.org/abs/1605.08803) | ICLR 2017 | laurent-dinh | -| [sentiment_analysis](sentiment_analysis)| [Effective Use of Word Order for Text Categorization with Convolutional Neural Networks](https://arxiv.org/abs/1412.1058) | NAACL HLT 2015 | sculd | -| [seq2species](seq2species) | [Seq2Species: A deep learning approach to pattern recognition for short DNA sequences](https://doi.org/10.1101/353474) | | apbusia, depristo | -| [skip_thoughts](skip_thoughts) | [Skip-Thought Vectors](https://arxiv.org/abs/1506.06726) | | cshallue | -| [steve](steve) | [Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion](https://arxiv.org/abs/1807.01675) | NeurIPS 2018 | buckman-google | -| [street](street) | [End-to-End Interpretation of the French Street Name Signs Dataset](https://arxiv.org/abs/1702.03970) | ECCV 2016 | theraysmith | -| [struct2depth](struct2depth)| [Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos](https://arxiv.org/abs/1811.06152) | AAAI 2019 | aneliaangelova | -| [swivel](swivel) | [Swivel: Improving Embeddings by Noticing What's Missing](https://arxiv.org/abs/1602.02215) | | waterson | -| [tcn](tcn) | [Time-Contrastive Networks: Self-Supervised Learning from Video](https://arxiv.org/abs/1704.06888) | ICRA 2018 | coreylynch, sermanet | -| [textsum](textsum)| [A Neural Attention Model for Abstractive Sentence Summarization](https://arxiv.org/abs/1509.00685) | EMNLP 2015 | panyx0718, peterjliu | -| [transformer](transformer) | [Spatial Transformer Network](https://arxiv.org/abs/1506.02025) | NIPS 2015 | daviddao| -| [video_prediction](video_prediction) | [Unsupervised Learning for Physical Interaction through Video Prediction](https://arxiv.org/abs/1605.07157) | NIPS 2016 | cbfinn | +:warning: If you are looking for old models, please visit the [Archive branch](https://github.com/tensorflow/models/tree/archive/research). --- diff --git a/research/a3c_blogpost/README.md b/research/a3c_blogpost/README.md deleted file mode 100644 index 55e390e703db361fbc4b1d89bb3baff9abb30dac..0000000000000000000000000000000000000000 --- a/research/a3c_blogpost/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# A3C Blog Post -In order to run this code, you will need the following prerequisites: - -* [OpenAI Gym](https://github.com/openai/gym) - `pip install gym` -* [pyglet](https://bitbucket.org/pyglet/pyglet/wiki/Home) - `pip install pyglet` -* [TensorFlow](https://www.tensorflow.org/install/) - `pip install tensorflow==2.2.0` diff --git a/research/a3c_blogpost/a3c_cartpole.py b/research/a3c_blogpost/a3c_cartpole.py deleted file mode 100644 index 62fdcf84929d76b9ccae564db77320d15e774002..0000000000000000000000000000000000000000 --- a/research/a3c_blogpost/a3c_cartpole.py +++ /dev/null @@ -1,366 +0,0 @@ -import os -os.environ["CUDA_VISIBLE_DEVICES"] = "" - -import threading -import gym -import multiprocessing -import numpy as np -from queue import Queue -import argparse -import matplotlib.pyplot as plt - - -import tensorflow as tf -from tensorflow.python import keras -from tensorflow.python.keras import layers - -parser = argparse.ArgumentParser(description='Run A3C algorithm on the game ' - 'Cartpole.') -parser.add_argument('--algorithm', default='a3c', type=str, - help='Choose between \'a3c\' and \'random\'.') -parser.add_argument('--train', dest='train', action='store_true', - help='Train our model.') -parser.add_argument('--lr', default=0.001, - help='Learning rate for the shared optimizer.') -parser.add_argument('--update-freq', default=20, type=int, - help='How often to update the global model.') -parser.add_argument('--max-eps', default=1000, type=int, - help='Global maximum number of episodes to run.') -parser.add_argument('--gamma', default=0.99, - help='Discount factor of rewards.') -parser.add_argument('--save-dir', default='/tmp/', type=str, - help='Directory in which you desire to save the model.') -args = parser.parse_args() - -class ActorCriticModel(keras.Model): - def __init__(self, state_size, action_size): - super(ActorCriticModel, self).__init__() - self.state_size = state_size - self.action_size = action_size - self.dense1 = layers.Dense(100, activation='relu') - self.policy_logits = layers.Dense(action_size) - self.dense2 = layers.Dense(100, activation='relu') - self.values = layers.Dense(1) - - def call(self, inputs): - # Forward pass - x = self.dense1(inputs) - logits = self.policy_logits(x) - v1 = self.dense2(inputs) - values = self.values(v1) - return logits, values - -def record(episode, - episode_reward, - worker_idx, - global_ep_reward, - result_queue, - total_loss, - num_steps): - """Helper function to store score and print statistics. - - Arguments: - episode: Current episode - episode_reward: Reward accumulated over the current episode - worker_idx: Which thread (worker) - global_ep_reward: The moving average of the global reward - result_queue: Queue storing the moving average of the scores - total_loss: The total loss accumualted over the current episode - num_steps: The number of steps the episode took to complete - """ - if global_ep_reward == 0: - global_ep_reward = episode_reward - else: - global_ep_reward = global_ep_reward * 0.99 + episode_reward * 0.01 - print( - f"Episode: {episode} | " - f"Moving Average Reward: {int(global_ep_reward)} | " - f"Episode Reward: {int(episode_reward)} | " - f"Loss: {int(total_loss / float(num_steps) * 1000) / 1000} | " - f"Steps: {num_steps} | " - f"Worker: {worker_idx}" - ) - result_queue.put(global_ep_reward) - return global_ep_reward - - -class RandomAgent: - """Random Agent that will play the specified game - - Arguments: - env_name: Name of the environment to be played - max_eps: Maximum number of episodes to run agent for. - """ - def __init__(self, env_name, max_eps): - self.env = gym.make(env_name) - self.max_episodes = max_eps - self.global_moving_average_reward = 0 - self.res_queue = Queue() - - def run(self): - reward_avg = 0 - for episode in range(self.max_episodes): - done = False - self.env.reset() - reward_sum = 0.0 - steps = 0 - while not done: - # Sample randomly from the action space and step - _, reward, done, _ = self.env.step(self.env.action_space.sample()) - steps += 1 - reward_sum += reward - # Record statistics - self.global_moving_average_reward = record(episode, - reward_sum, - 0, - self.global_moving_average_reward, - self.res_queue, 0, steps) - - reward_avg += reward_sum - final_avg = reward_avg / float(self.max_episodes) - print("Average score across {} episodes: {}".format(self.max_episodes, final_avg)) - return final_avg - - -class MasterAgent(): - def __init__(self): - self.game_name = 'CartPole-v0' - save_dir = args.save_dir - self.save_dir = save_dir - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - env = gym.make(self.game_name) - self.state_size = env.observation_space.shape[0] - self.action_size = env.action_space.n - self.opt = tf.compat.v1.train.AdamOptimizer(args.lr, use_locking=True) - print(self.state_size, self.action_size) - - self.global_model = ActorCriticModel(self.state_size, self.action_size) # global network - self.global_model(tf.convert_to_tensor(np.random.random((1, self.state_size)), dtype=tf.float32)) - - def train(self): - if args.algorithm == 'random': - random_agent = RandomAgent(self.game_name, args.max_eps) - random_agent.run() - return - - res_queue = Queue() - - workers = [Worker(self.state_size, - self.action_size, - self.global_model, - self.opt, res_queue, - i, game_name=self.game_name, - save_dir=self.save_dir) for i in range(multiprocessing.cpu_count())] - - for i, worker in enumerate(workers): - print("Starting worker {}".format(i)) - worker.start() - - moving_average_rewards = [] # record episode reward to plot - while True: - reward = res_queue.get() - if reward is not None: - moving_average_rewards.append(reward) - else: - break - [w.join() for w in workers] - - plt.plot(moving_average_rewards) - plt.ylabel('Moving average ep reward') - plt.xlabel('Step') - plt.savefig(os.path.join(self.save_dir, - '{} Moving Average.png'.format(self.game_name))) - plt.show() - - def play(self): - env = gym.make(self.game_name).unwrapped - state = env.reset() - model = self.global_model - model_path = os.path.join(self.save_dir, 'model_{}.h5'.format(self.game_name)) - print('Loading model from: {}'.format(model_path)) - model.load_weights(model_path) - done = False - step_counter = 0 - reward_sum = 0 - - try: - while not done: - env.render(mode='rgb_array') - policy, value = model(tf.convert_to_tensor(state[None, :], dtype=tf.float32)) - policy = tf.nn.softmax(policy) - action = np.argmax(policy) - state, reward, done, _ = env.step(action) - reward_sum += reward - print("{}. Reward: {}, action: {}".format(step_counter, reward_sum, action)) - step_counter += 1 - except KeyboardInterrupt: - print("Received Keyboard Interrupt. Shutting down.") - finally: - env.close() - - -class Memory: - def __init__(self): - self.states = [] - self.actions = [] - self.rewards = [] - - def store(self, state, action, reward): - self.states.append(state) - self.actions.append(action) - self.rewards.append(reward) - - def clear(self): - self.states = [] - self.actions = [] - self.rewards = [] - - -class Worker(threading.Thread): - # Set up global variables across different threads - global_episode = 0 - # Moving average reward - global_moving_average_reward = 0 - best_score = 0 - save_lock = threading.Lock() - - def __init__(self, - state_size, - action_size, - global_model, - opt, - result_queue, - idx, - game_name='CartPole-v0', - save_dir='/tmp'): - super(Worker, self).__init__() - self.state_size = state_size - self.action_size = action_size - self.result_queue = result_queue - self.global_model = global_model - self.opt = opt - self.local_model = ActorCriticModel(self.state_size, self.action_size) - self.worker_idx = idx - self.game_name = game_name - self.env = gym.make(self.game_name).unwrapped - self.save_dir = save_dir - self.ep_loss = 0.0 - - def run(self): - total_step = 1 - mem = Memory() - while Worker.global_episode < args.max_eps: - current_state = self.env.reset() - mem.clear() - ep_reward = 0. - ep_steps = 0 - self.ep_loss = 0 - - time_count = 0 - done = False - while not done: - logits, _ = self.local_model( - tf.convert_to_tensor(current_state[None, :], - dtype=tf.float32)) - probs = tf.nn.softmax(logits) - - action = np.random.choice(self.action_size, p=probs.numpy()[0]) - new_state, reward, done, _ = self.env.step(action) - if done: - reward = -1 - ep_reward += reward - mem.store(current_state, action, reward) - - if time_count == args.update_freq or done: - # Calculate gradient wrt to local model. We do so by tracking the - # variables involved in computing the loss by using tf.GradientTape - with tf.GradientTape() as tape: - total_loss = self.compute_loss(done, - new_state, - mem, - args.gamma) - self.ep_loss += total_loss - # Calculate local gradients - grads = tape.gradient(total_loss, self.local_model.trainable_weights) - # Push local gradients to global model - self.opt.apply_gradients(zip(grads, - self.global_model.trainable_weights)) - # Update local model with new weights - self.local_model.set_weights(self.global_model.get_weights()) - - mem.clear() - time_count = 0 - - if done: # done and print information - Worker.global_moving_average_reward = \ - record(Worker.global_episode, ep_reward, self.worker_idx, - Worker.global_moving_average_reward, self.result_queue, - self.ep_loss, ep_steps) - # We must use a lock to save our model and to print to prevent data races. - if ep_reward > Worker.best_score: - with Worker.save_lock: - print("Saving best model to {}, " - "episode score: {}".format(self.save_dir, ep_reward)) - self.global_model.save_weights( - os.path.join(self.save_dir, - 'model_{}.h5'.format(self.game_name)) - ) - Worker.best_score = ep_reward - Worker.global_episode += 1 - ep_steps += 1 - - time_count += 1 - current_state = new_state - total_step += 1 - self.result_queue.put(None) - - def compute_loss(self, - done, - new_state, - memory, - gamma=0.99): - if done: - reward_sum = 0. # terminal - else: - reward_sum = self.local_model( - tf.convert_to_tensor(new_state[None, :], - dtype=tf.float32))[-1].numpy()[0] - - # Get discounted rewards - discounted_rewards = [] - for reward in memory.rewards[::-1]: # reverse buffer r - reward_sum = reward + gamma * reward_sum - discounted_rewards.append(reward_sum) - discounted_rewards.reverse() - - logits, values = self.local_model( - tf.convert_to_tensor(np.vstack(memory.states), - dtype=tf.float32)) - # Get our advantages - advantage = tf.convert_to_tensor(np.array(discounted_rewards)[:, None], - dtype=tf.float32) - values - # Value loss - value_loss = advantage ** 2 - - # Calculate our policy loss - policy = tf.nn.softmax(logits) - entropy = tf.nn.softmax_cross_entropy_with_logits(labels=policy, logits=logits) - - policy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=memory.actions, - logits=logits) - policy_loss *= tf.stop_gradient(advantage) - policy_loss -= 0.01 * entropy - total_loss = tf.reduce_mean((0.5 * value_loss + policy_loss)) - return total_loss - - -if __name__ == '__main__': - print(args) - master = MasterAgent() - if args.train: - master.train() - else: - master.play() - diff --git a/research/adv_imagenet_models/README.md b/research/adv_imagenet_models/README.md deleted file mode 100644 index 6129f7347effe09ef0272de9ac42d4872726fcd1..0000000000000000000000000000000000000000 --- a/research/adv_imagenet_models/README.md +++ /dev/null @@ -1,91 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Adversarially trained ImageNet models - -Pre-trained ImageNet models from the following papers: - -* [Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236) -* [Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204) - -## Contact - -Author: Alexey Kurakin, -github: [AlexeyKurakin](https://github.com/AlexeyKurakin) - -## Pre-requesites and installation - -Ensure that you have installed TensorFlow 1.1 or greater -([instructions](https://www.tensorflow.org/install/)). - -You also need copy of ImageNet dataset if you want to run provided example. -Follow -[Preparing the dataset](https://github.com/tensorflow/models/tree/master/research/slim#Data) -instructions in TF-Slim library to get and preprocess ImageNet data. - -## Available models - -Following pre-trained models are available: - -Network Architecture | Adversarial training | Checkpoint ----------------------|----------------------|---------------- -Inception v3 | Step L.L. | [adv_inception_v3_2017_08_18.tar.gz](http://download.tensorflow.org/models/adv_inception_v3_2017_08_18.tar.gz) -Inception v3 | Step L.L. on ensemble of 3 models | [ens3_adv_inception_v3_2017_08_18.tar.gz](http://download.tensorflow.org/models/ens3_adv_inception_v3_2017_08_18.tar.gz) -Inception v3 | Step L.L. on ensemble of 4 models| [ens4_adv_inception_v3_2017_08_18.tar.gz](http://download.tensorflow.org/models/ens4_adv_inception_v3_2017_08_18.tar.gz) -Inception ResNet v2 | Step L.L. | [adv_inception_resnet_v2_2017_12_18.tar.gz](http://download.tensorflow.org/models/adv_inception_resnet_v2_2017_12_18.tar.gz) -Inception ResNet v2 | Step L.L. on ensemble of 3 models | [ens_adv_inception_resnet_v2_2017_08_18.tar.gz](http://download.tensorflow.org/models/ens_adv_inception_resnet_v2_2017_08_18.tar.gz) - -All checkpoints are compatible with -[TF-Slim](https://github.com/tensorflow/models/tree/master/research/slim) -implementation of Inception v3 and Inception Resnet v2. - -## How to evaluate models on ImageNet test data - -Python script `eval_on_adversarial.py` allow you to evaluate provided models -on white-box adversarial examples generated from ImageNet test set. - -Usage is following: - -```bash -# ${MODEL_NAME} - type of network architecture, -# either "inception_v3" or "inception_resnet_v2" -# ${CHECKPOINT_PATH} - path to model checkpoint -# ${DATASET_DIR} - directory with ImageNet test set -# ${ADV_METHOD} - which method to use to generate adversarial images, -# supported method: -# "none" - use clean images from the dataset -# "stepll" - one step towards least likely class method (StepLL), -# see https://arxiv.org/abs/1611.01236 for details -# "stepllnoise" - RAND+StepLL method from https://arxiv.org/abs/1705.07204 -# ${ADV_EPS} - size of adversarial perturbation, ignored when method is none -python eval_on_adversarial.py \ - --model_name=${MODEL_NAME} \ - --checkpoint_path=${CHECKPOINT_PATH} \ - --dataset_dir=${DATASET_DIR} \ - --batch_size=50 \ - --adversarial_method=${ADV_METHOD} \ - --adversarial_eps=${ADV_EPS} -``` - -Below is an example how to evaluate one of the models on RAND+StepLL adversarial -examples: - -```bash -# Download checkpoint -CHECKPOINT_DIR=/tmp/checkpoints -mkdir ${CHECKPOINT_DIR} -wget http://download.tensorflow.org/models/ens_adv_inception_resnet_v2_2017_08_18.tar.gz -tar -xvf ens_adv_inception_resnet_v2_2017_08_18.tar.gz -mv ens_adv_inception_resnet_v2.ckpt* ${CHECKPOINT_DIR} -rm ens_adv_inception_resnet_v2_2017_08_18.tar.gz - -# Run evaluation -python eval_on_adversarial.py \ - --model_name=inception_v3 \ - --checkpoint_path=${CHECKPOINT_DIR}/ens_adv_inception_resnet_v2.ckpt \ - --dataset_dir=${DATASET_DIR} \ - --batch_size=50 \ - --adversarial_method=stepllnoise \ - --adversarial_eps=16 -``` diff --git a/research/adv_imagenet_models/eval_on_adversarial.py b/research/adv_imagenet_models/eval_on_adversarial.py deleted file mode 100644 index f9188845c6c4e10484f9b24797d9ece3b730ffb0..0000000000000000000000000000000000000000 --- a/research/adv_imagenet_models/eval_on_adversarial.py +++ /dev/null @@ -1,331 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Script which evaluates model on adversarial examples.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import imagenet -import inception_resnet_v2 - -import tensorflow as tf -from tensorflow.contrib.slim.nets import inception - - -slim = tf.contrib.slim - -tf.app.flags.DEFINE_integer( - 'batch_size', 50, 'The number of samples in each batch.') - -tf.app.flags.DEFINE_integer( - 'max_num_batches', None, - 'Max number of batches to evaluate by default use all.') - -tf.app.flags.DEFINE_string( - 'master', '', 'The address of the TensorFlow master to use.') - -tf.app.flags.DEFINE_string( - 'checkpoint_path', '/tmp/tfmodel/', - 'The directory where the model was written to or an absolute path to a ' - 'checkpoint file.') - -tf.app.flags.DEFINE_integer( - 'num_preprocessing_threads', 4, - 'The number of threads used to create the batches.') - -tf.app.flags.DEFINE_string( - 'split_name', 'validation', 'The name of the train/test split.') - -tf.app.flags.DEFINE_string( - 'dataset_dir', None, 'The directory where the dataset files are stored.') - -tf.app.flags.DEFINE_string( - 'model_name', 'inception_v3', - 'Name of the model to use, either "inception_v3" or "inception_resnet_v2"') - -tf.app.flags.DEFINE_float( - 'moving_average_decay', None, - 'The decay to use for the moving average.' - 'If left as None, then moving averages are not used.') - -tf.app.flags.DEFINE_string( - 'adversarial_method', 'none', - 'What kind of adversarial examples to use for evaluation. ' - 'Could be one of: "none", "stepll", "stepllnoise".') - -tf.app.flags.DEFINE_float( - 'adversarial_eps', 0.0, - 'Size of adversarial perturbation in range [0, 255].') - - -FLAGS = tf.app.flags.FLAGS - - -IMAGE_SIZE = 299 -NUM_CLASSES = 1001 - - -def preprocess_for_eval(image, height, width, - central_fraction=0.875, scope=None): - """Prepare one image for evaluation. - - If height and width are specified it would output an image with that size by - applying resize_bilinear. - If central_fraction is specified it would crop the central fraction of the - input image. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details) - height: integer - width: integer - central_fraction: Optional Float, fraction of the image to crop. - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of prepared image. - """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image - - -def create_model(x, reuse=None): - """Create model graph. - - Args: - x: input images - reuse: reuse parameter which will be passed to underlying variable scopes. - Should be None first call and True every subsequent call. - - Returns: - (logits, end_points) - tuple of model logits and enpoints - - Raises: - ValueError: if model type specified by --model_name flag is invalid. - """ - if FLAGS.model_name == 'inception_v3': - with slim.arg_scope(inception.inception_v3_arg_scope()): - return inception.inception_v3( - x, num_classes=NUM_CLASSES, is_training=False, reuse=reuse) - elif FLAGS.model_name == 'inception_resnet_v2': - with slim.arg_scope(inception_resnet_v2.inception_resnet_v2_arg_scope()): - return inception_resnet_v2.inception_resnet_v2( - x, num_classes=NUM_CLASSES, is_training=False, reuse=reuse) - else: - raise ValueError('Invalid model name: %s' % (FLAGS.model_name)) - - -def step_target_class_adversarial_images(x, eps, one_hot_target_class): - """Base code for one step towards target class methods. - - Args: - x: source images - eps: size of adversarial perturbation - one_hot_target_class: one hot encoded target classes for all images - - Returns: - tensor with adversarial images - """ - logits, end_points = create_model(x, reuse=True) - cross_entropy = tf.losses.softmax_cross_entropy(one_hot_target_class, - logits, - label_smoothing=0.1, - weights=1.0) - cross_entropy += tf.losses.softmax_cross_entropy(one_hot_target_class, - end_points['AuxLogits'], - label_smoothing=0.1, - weights=0.4) - x_adv = x - eps * tf.sign(tf.gradients(cross_entropy, x)[0]) - x_adv = tf.clip_by_value(x_adv, -1.0, 1.0) - return tf.stop_gradient(x_adv) - - -def stepll_adversarial_images(x, eps): - """One step towards least likely class (Step L.L.) adversarial examples. - - This method is an alternative to FGSM which does not use true classes. - Method is described in the "Adversarial Machine Learning at Scale" paper, - https://arxiv.org/abs/1611.01236 - - Args: - x: source images - eps: size of adversarial perturbation - - Returns: - adversarial images - """ - logits, _ = create_model(x, reuse=True) - least_likely_class = tf.argmin(logits, 1) - one_hot_ll_class = tf.one_hot(least_likely_class, NUM_CLASSES) - return step_target_class_adversarial_images(x, eps, one_hot_ll_class) - - -def stepllnoise_adversarial_images(x, eps): - """Step L.L. with noise method. - - This is an imporvement of Step L.L. method. This method is better against - adversarially trained models which learn to mask gradient. - Method is described in the section "New randomized one shot attack" of - "Ensemble Adversarial Training: Attacks and Defenses" paper, - https://arxiv.org/abs/1705.07204 - - Args: - x: source images - eps: size of adversarial perturbation - - Returns: - adversarial images - """ - logits, _ = create_model(x, reuse=True) - least_likely_class = tf.argmin(logits, 1) - one_hot_ll_class = tf.one_hot(least_likely_class, NUM_CLASSES) - x_noise = x + eps / 2 * tf.sign(tf.random_normal(x.shape)) - return step_target_class_adversarial_images(x_noise, eps / 2, - one_hot_ll_class) - - -def get_input_images(dataset_images): - """Gets input images for the evaluation. - - Args: - dataset_images: tensor with dataset images - - Returns: - tensor with input images, which is either dataset images or adversarial - images. - - Raises: - ValueError: if adversarial method specified by --adversarial_method flag - is invalid. - """ - # adversarial_eps defines max difference of values of pixels if - # pixels are in range [0, 255]. However values of dataset pixels are - # in range [-1, 1], so converting epsilon. - eps = FLAGS.adversarial_eps / 255 * 2.0 - - if FLAGS.adversarial_method == 'stepll': - return stepll_adversarial_images(dataset_images, eps) - elif FLAGS.adversarial_method == 'stepllnoise': - return stepllnoise_adversarial_images(dataset_images, eps) - elif FLAGS.adversarial_method == 'none': - return dataset_images - else: - raise ValueError('Invalid adversarial method: %s' - % (FLAGS.adversarial_method)) - - -def main(_): - if not FLAGS.dataset_dir: - raise ValueError('You must supply the dataset directory with --dataset_dir') - - tf.logging.set_verbosity(tf.logging.INFO) - with tf.Graph().as_default(): - tf_global_step = tf.train.get_or_create_global_step() - - ################### - # Prepare dataset # - ################### - dataset = imagenet.get_split(FLAGS.split_name, FLAGS.dataset_dir) - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - shuffle=False, - common_queue_capacity=2 * FLAGS.batch_size, - common_queue_min=FLAGS.batch_size) - [dataset_image, label] = provider.get(['image', 'label']) - dataset_image = preprocess_for_eval(dataset_image, IMAGE_SIZE, IMAGE_SIZE) - dataset_images, labels = tf.train.batch( - [dataset_image, label], - batch_size=FLAGS.batch_size, - num_threads=FLAGS.num_preprocessing_threads, - capacity=5 * FLAGS.batch_size) - - ######################################## - # Define the model and input exampeles # - ######################################## - create_model(tf.placeholder(tf.float32, shape=dataset_images.shape)) - input_images = get_input_images(dataset_images) - logits, _ = create_model(input_images, reuse=True) - - if FLAGS.moving_average_decay > 0: - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, tf_global_step) - variables_to_restore = variable_averages.variables_to_restore( - slim.get_model_variables()) - variables_to_restore[tf_global_step.op.name] = tf_global_step - else: - variables_to_restore = slim.get_variables_to_restore() - - ###################### - # Define the metrics # - ###################### - predictions = tf.argmax(logits, 1) - labels = tf.squeeze(labels) - names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ - 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), - 'Recall_5': slim.metrics.streaming_sparse_recall_at_k( - logits, tf.reshape(labels, [-1, 1]), 5), - }) - - ###################### - # Run evaluation # - ###################### - if FLAGS.max_num_batches: - num_batches = FLAGS.max_num_batches - else: - # This ensures that we make a single pass over all of the data. - num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) - - if tf.gfile.IsDirectory(FLAGS.checkpoint_path): - checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) - else: - checkpoint_path = FLAGS.checkpoint_path - - tf.logging.info('Evaluating %s' % checkpoint_path) - - top1_accuracy, top5_accuracy = slim.evaluation.evaluate_once( - master=FLAGS.master, - checkpoint_path=checkpoint_path, - logdir=None, - summary_op=None, - num_evals=num_batches, - eval_op=list(names_to_updates.values()), - final_op=[names_to_values['Accuracy'], names_to_values['Recall_5']], - variables_to_restore=variables_to_restore) - - print('Top1 Accuracy: ', top1_accuracy) - print('Top5 Accuracy: ', top5_accuracy) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/adv_imagenet_models/imagenet.py b/research/adv_imagenet_models/imagenet.py deleted file mode 100644 index 26c4c7a388a234f647e446951a0765d1c53184cb..0000000000000000000000000000000000000000 --- a/research/adv_imagenet_models/imagenet.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes. - -Some images have one or more bounding boxes associated with the label of the -image. See details here: http://image-net.org/download-bboxes - -WARNING: Don't use for object detection, in this case all the bounding boxes -of the image belong to just one class. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tensorflow as tf - -slim = tf.contrib.slim - -_FILE_PATTERN = '%s-*' - -_SPLITS_TO_SIZES = { - 'train': 1281167, - 'validation': 50000, -} - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'A color image of varying height and width.', - 'label': 'The label id of the image, integer between 0 and 999', - 'label_text': 'The text of the label.', - 'object/bbox': 'A list of bounding boxes.', - 'object/label': 'A list of labels, one per each object.', -} - -_NUM_CLASSES = 1001 - - -def get_split(split_name, dataset_dir, file_pattern=None, reader=None): - """Gets a dataset tuple with instructions for reading ImageNet. - - Args: - split_name: A train/test split name. - dataset_dir: The base directory of the dataset sources. - file_pattern: The file pattern to use when matching the dataset sources. - It is assumed that the pattern contains a '%s' string so that the split - name can be inserted. - reader: The TensorFlow reader type. - - Returns: - A `Dataset` namedtuple. - - Raises: - ValueError: if `split_name` is not a valid train/test split. - """ - if split_name not in _SPLITS_TO_SIZES: - raise ValueError('split name %s was not recognized.' % split_name) - - if not file_pattern: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - - # Allowing None in the signature so that dataset_factory can use the default. - if reader is None: - reader = tf.TFRecordReader - - keys_to_features = { - 'image/encoded': tf.FixedLenFeature( - (), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature( - (), tf.string, default_value='jpeg'), - 'image/class/label': tf.FixedLenFeature( - [], dtype=tf.int64, default_value=-1), - 'image/class/text': tf.FixedLenFeature( - [], dtype=tf.string, default_value=''), - 'image/object/bbox/xmin': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/ymin': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/xmax': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/ymax': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/class/label': tf.VarLenFeature( - dtype=tf.int64), - } - - items_to_handlers = { - 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), - 'label': slim.tfexample_decoder.Tensor('image/class/label'), - 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), - 'object/bbox': slim.tfexample_decoder.BoundingBox( - ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), - 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), - } - - decoder = slim.tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handlers) - - return slim.dataset.Dataset( - data_sources=file_pattern, - reader=reader, - decoder=decoder, - num_samples=_SPLITS_TO_SIZES[split_name], - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - num_classes=_NUM_CLASSES) diff --git a/research/adv_imagenet_models/inception_resnet_v2.py b/research/adv_imagenet_models/inception_resnet_v2.py deleted file mode 100644 index 2f690e8d2f70ecde9a55f40375a7f74cd25651c7..0000000000000000000000000000000000000000 --- a/research/adv_imagenet_models/inception_resnet_v2.py +++ /dev/null @@ -1,358 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains the definition of the Inception Resnet V2 architecture. - -As described in http://arxiv.org/abs/1602.07261. - - Inception-v4, Inception-ResNet and the Impact of Residual Connections - on Learning - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -slim = tf.contrib.slim - - -def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 35x35 resnet block.""" - with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 17x17 resnet block.""" - with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], - scope='Conv2d_0b_1x7') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], - scope='Conv2d_0c_7x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 8x8 resnet block.""" - with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], - scope='Conv2d_0b_1x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], - scope='Conv2d_0c_3x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -def inception_resnet_v2_base(inputs, - final_endpoint='Conv2d_7b_1x1', - output_stride=16, - align_feature_maps=False, - scope=None): - """Inception model from http://arxiv.org/abs/1602.07261. - - Constructs an Inception Resnet v2 network from inputs to the given final - endpoint. This method can construct the network up to the final inception - block Conv2d_7b_1x1. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - final_endpoint: specifies the endpoint to construct the network up to. It - can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', - 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] - output_stride: A scalar that specifies the requested ratio of input to - output spatial resolution. Only supports 8 and 16. - align_feature_maps: When true, changes all the VALID paddings in the network - to SAME padding so that the feature maps are aligned. - scope: Optional variable_scope. - - Returns: - tensor_out: output tensor corresponding to the final_endpoint. - end_points: a set of activations for external use, for example summaries or - losses. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - or if the output_stride is not 8 or 16, or if the output_stride is 8 and - we request an end point after 'PreAuxLogits'. - """ - if output_stride != 8 and output_stride != 16: - raise ValueError('output_stride must be 8 or 16.') - - padding = 'SAME' if align_feature_maps else 'VALID' - - end_points = {} - - def add_and_check_final(name, net): - end_points[name] = net - return name == final_endpoint - - with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # 149 x 149 x 32 - net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, - scope='Conv2d_1a_3x3') - if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points - - # 147 x 147 x 32 - net = slim.conv2d(net, 32, 3, padding=padding, - scope='Conv2d_2a_3x3') - if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points - # 147 x 147 x 64 - net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') - if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points - # 73 x 73 x 64 - net = slim.max_pool2d(net, 3, stride=2, padding=padding, - scope='MaxPool_3a_3x3') - if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points - # 73 x 73 x 80 - net = slim.conv2d(net, 80, 1, padding=padding, - scope='Conv2d_3b_1x1') - if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points - # 71 x 71 x 192 - net = slim.conv2d(net, 192, 3, padding=padding, - scope='Conv2d_4a_3x3') - if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points - # 35 x 35 x 192 - net = slim.max_pool2d(net, 3, stride=2, padding=padding, - scope='MaxPool_5a_3x3') - if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points - - # 35 x 35 x 320 - with tf.variable_scope('Mixed_5b'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, - scope='Conv2d_0b_5x5') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', - scope='AvgPool_0a_3x3') - tower_pool_1 = slim.conv2d(tower_pool, 64, 1, - scope='Conv2d_0b_1x1') - net = tf.concat( - [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) - - if add_and_check_final('Mixed_5b', net): return net, end_points - # TODO(alemi): Register intermediate endpoints - net = slim.repeat(net, 10, block35, scale=0.17) - - # 17 x 17 x 1088 if output_stride == 8, - # 33 x 33 x 1088 if output_stride == 16 - use_atrous = output_stride == 8 - - with tf.variable_scope('Mixed_6a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, - scope='Conv2d_0b_3x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, - stride=1 if use_atrous else 2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, - padding=padding, - scope='MaxPool_1a_3x3') - net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) - - if add_and_check_final('Mixed_6a', net): return net, end_points - - # TODO(alemi): register intermediate endpoints - with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): - net = slim.repeat(net, 20, block17, scale=0.10) - if add_and_check_final('PreAuxLogits', net): return net, end_points - - if output_stride == 8: - # TODO(gpapan): Properly support output_stride for the rest of the net. - raise ValueError('output_stride==8 is only supported up to the ' - 'PreAuxlogits end_point for now.') - - # 8 x 8 x 2080 - with tf.variable_scope('Mixed_7a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.max_pool2d(net, 3, stride=2, - padding=padding, - scope='MaxPool_1a_3x3') - net = tf.concat( - [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) - - if add_and_check_final('Mixed_7a', net): return net, end_points - - # TODO(alemi): register intermediate endpoints - net = slim.repeat(net, 9, block8, scale=0.20) - net = block8(net, activation_fn=None) - - # 8 x 8 x 1536 - net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') - if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points - - raise ValueError('final_endpoint (%s) not recognized', final_endpoint) - - -def inception_resnet_v2(inputs, num_classes=1001, is_training=True, - dropout_keep_prob=0.8, - reuse=None, - scope='InceptionResnetV2', - create_aux_logits=True): - """Creates the Inception Resnet V2 model. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - num_classes: number of predicted classes. - is_training: whether is training or not. - dropout_keep_prob: float, the fraction to keep before final layer. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - create_aux_logits: Whether to include the auxilliary logits. - - Returns: - logits: the logits outputs of the model. - end_points: the set of end_points from the inception model. - """ - end_points = {} - - with tf.variable_scope(scope, 'InceptionResnetV2', [inputs, num_classes], - reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - - net, end_points = inception_resnet_v2_base(inputs, scope=scope) - - if create_aux_logits: - with tf.variable_scope('AuxLogits'): - aux = end_points['PreAuxLogits'] - aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', - scope='Conv2d_1a_3x3') - aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') - aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], - padding='VALID', scope='Conv2d_2a_5x5') - aux = slim.flatten(aux) - aux = slim.fully_connected(aux, num_classes, activation_fn=None, - scope='Logits') - end_points['AuxLogits'] = aux - - with tf.variable_scope('Logits'): - net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', - scope='AvgPool_1a_8x8') - net = slim.flatten(net) - - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='Dropout') - - end_points['PreLogitsFlatten'] = net - logits = slim.fully_connected(net, num_classes, activation_fn=None, - scope='Logits') - end_points['Logits'] = logits - end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') - - return logits, end_points -inception_resnet_v2.default_image_size = 299 - - -def inception_resnet_v2_arg_scope(weight_decay=0.00004, - batch_norm_decay=0.9997, - batch_norm_epsilon=0.001): - """Returns the scope with the default parameters for inception_resnet_v2. - - Args: - weight_decay: the weight decay for weights variables. - batch_norm_decay: decay for the moving average of batch_norm momentums. - batch_norm_epsilon: small float added to variance to avoid dividing by zero. - - Returns: - a arg_scope with the parameters needed for inception_resnet_v2. - """ - # Set weight_decay for weights in conv2d and fully_connected layers. - with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - biases_regularizer=slim.l2_regularizer(weight_decay)): - - batch_norm_params = { - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon, - } - # Set activation_fn and parameters for batch_norm. - with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params) as scope: - return scope diff --git a/research/adversarial_crypto/README.md b/research/adversarial_crypto/README.md deleted file mode 100644 index 3822def1325b8d4eb1fd31335f2f8ce053ff747a..0000000000000000000000000000000000000000 --- a/research/adversarial_crypto/README.md +++ /dev/null @@ -1,62 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Learning to Protect Communications with Adversarial Neural Cryptography - -This is a slightly-updated model used for the paper -["Learning to Protect Communications with Adversarial Neural -Cryptography"](https://arxiv.org/abs/1610.06918). - -> We ask whether neural networks can learn to use secret keys to protect -> information from other neural networks. Specifically, we focus on ensuring -> confidentiality properties in a multiagent system, and we specify those -> properties in terms of an adversary. Thus, a system may consist of neural -> networks named Alice and Bob, and we aim to limit what a third neural -> network named Eve learns from eavesdropping on the communication between -> Alice and Bob. We do not prescribe specific cryptographic algorithms to -> these neural networks; instead, we train end-to-end, adversarially. -> We demonstrate that the neural networks can learn how to perform forms of -> encryption and decryption, and also how to apply these operations -> selectively in order to meet confidentiality goals. - -This code allows you to train encoder/decoder/adversary network triplets -and evaluate their effectiveness on randomly generated input and key -pairs. - -## Prerequisites - -The only software requirements for running the encoder and decoder is having -TensorFlow installed. - -Requires TensorFlow r0.12 or later. - -## Training and evaluating - -After installing TensorFlow and ensuring that your paths are configured -appropriately: - -``` -python train_eval.py -``` - -This will begin training a fresh model. If and when the model becomes -sufficiently well-trained, it will reset the Eve model multiple times -and retrain it from scratch, outputting the accuracy thus obtained -in each run. - -## Model differences from the paper - -The model has been simplified slightly from the one described in -the paper - the convolutional layer width was reduced by a factor -of two. In the version in the paper, there was a nonlinear unit -after the fully-connected layer; that nonlinear has been removed -here. These changes improve the robustness of training. The -initializer for the convolution layers has switched to the -`tf.contrib.layers default` of `xavier_initializer` instead of -a simpler `truncated_normal`. - -## Contact information - -This model repository is maintained by David G. Andersen -([dave-andersen](https://github.com/dave-andersen)). diff --git a/research/adversarial_crypto/train_eval.py b/research/adversarial_crypto/train_eval.py deleted file mode 100644 index df7a00ad50f2ec01b37d8c162309a928207088d6..0000000000000000000000000000000000000000 --- a/research/adversarial_crypto/train_eval.py +++ /dev/null @@ -1,276 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Adversarial training to learn trivial encryption functions, -from the paper "Learning to Protect Communications with -Adversarial Neural Cryptography", Abadi & Andersen, 2016. - -https://arxiv.org/abs/1610.06918 - -This program creates and trains three neural networks, -termed Alice, Bob, and Eve. Alice takes inputs -in_m (message), in_k (key) and outputs 'ciphertext'. - -Bob takes inputs in_k, ciphertext and tries to reconstruct -the message. - -Eve is an adversarial network that takes input ciphertext -and also tries to reconstruct the message. - -The main function attempts to train these networks and then -evaluates them, all on random plaintext and key values. - -""" - -# TensorFlow Python 3 compatibility -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import signal -import sys -from six.moves import xrange # pylint: disable=redefined-builtin -import tensorflow as tf - -flags = tf.app.flags - -flags.DEFINE_float('learning_rate', 0.0008, 'Constant learning rate') -flags.DEFINE_integer('batch_size', 4096, 'Batch size') - -FLAGS = flags.FLAGS - -# Input and output configuration. -TEXT_SIZE = 16 -KEY_SIZE = 16 - -# Training parameters. -ITERS_PER_ACTOR = 1 -EVE_MULTIPLIER = 2 # Train Eve 2x for every step of Alice/Bob -# Train until either max loops or Alice/Bob "good enough": -MAX_TRAINING_LOOPS = 850000 -BOB_LOSS_THRESH = 0.02 # Exit when Bob loss < 0.02 and Eve > 7.7 bits -EVE_LOSS_THRESH = 7.7 - -# Logging and evaluation. -PRINT_EVERY = 200 # In training, log every 200 steps. -EVE_EXTRA_ROUNDS = 2000 # At end, train eve a bit more. -RETRAIN_EVE_ITERS = 10000 # Retrain eve up to ITERS*LOOPS times. -RETRAIN_EVE_LOOPS = 25 # With an evaluation each loop -NUMBER_OF_EVE_RESETS = 5 # And do this up to 5 times with a fresh eve. -# Use EVAL_BATCHES samples each time we check accuracy. -EVAL_BATCHES = 1 - - -def batch_of_random_bools(batch_size, n): - """Return a batch of random "boolean" numbers. - - Args: - batch_size: Batch size dimension of returned tensor. - n: number of entries per batch. - - Returns: - A [batch_size, n] tensor of "boolean" numbers, where each number is - preresented as -1 or 1. - """ - - as_int = tf.random.uniform( - [batch_size, n], minval=0, maxval=2, dtype=tf.int32) - expanded_range = (as_int * 2) - 1 - return tf.cast(expanded_range, tf.float32) - - -class AdversarialCrypto(object): - """Primary model implementation class for Adversarial Neural Crypto. - - This class contains the code for the model itself, - and when created, plumbs the pathways from Alice to Bob and - Eve, creates the optimizers and loss functions, etc. - - Attributes: - eve_loss: Eve's loss function. - bob_loss: Bob's loss function. Different units from eve_loss. - eve_optimizer: A tf op that runs Eve's optimizer. - bob_optimizer: A tf op that runs Bob's optimizer. - bob_reconstruction_loss: Bob's message reconstruction loss, - which is comparable to eve_loss. - reset_eve_vars: Execute this op to completely reset Eve. - """ - - def get_message_and_key(self): - """Generate random pseudo-boolean key and message values.""" - - batch_size = tf.compat.v1.placeholder_with_default(FLAGS.batch_size, shape=[]) - - in_m = batch_of_random_bools(batch_size, TEXT_SIZE) - in_k = batch_of_random_bools(batch_size, KEY_SIZE) - return in_m, in_k - - def model(self, collection, message, key=None): - """The model for Alice, Bob, and Eve. If key=None, the first fully connected layer - takes only the message as inputs. Otherwise, it uses both the key - and the message. - - Args: - collection: The graph keys collection to add new vars to. - message: The input message to process. - key: The input key (if any) to use. - """ - - if key is not None: - combined_message = tf.concat(axis=1, values=[message, key]) - else: - combined_message = message - - # Ensure that all variables created are in the specified collection. - with tf.contrib.framework.arg_scope( - [tf.contrib.layers.fully_connected, tf.contrib.layers.conv2d], - variables_collections=[collection]): - - fc = tf.contrib.layers.fully_connected( - combined_message, - TEXT_SIZE + KEY_SIZE, - biases_initializer=tf.constant_initializer(0.0), - activation_fn=None) - - # Perform a sequence of 1D convolutions (by expanding the message out to 2D - # and then squeezing it back down). - fc = tf.expand_dims(fc, 2) # 2D - fc = tf.expand_dims(fc, 3) # 3D -- conv2d needs a depth - # 2,1 -> 1,2 - conv = tf.contrib.layers.conv2d( - fc, 2, 2, 2, 'SAME', activation_fn=tf.nn.sigmoid) - # 1,2 -> 1, 2 - conv = tf.contrib.layers.conv2d( - conv, 2, 1, 1, 'SAME', activation_fn=tf.nn.sigmoid) - # 1,2 -> 1, 1 - conv = tf.contrib.layers.conv2d( - conv, 1, 1, 1, 'SAME', activation_fn=tf.nn.tanh) - conv = tf.squeeze(conv, 3) - conv = tf.squeeze(conv, 2) - return conv - - def __init__(self): - in_m, in_k = self.get_message_and_key() - encrypted = self.model('alice', in_m, in_k) - decrypted = self.model('bob', encrypted, in_k) - eve_out = self.model('eve', encrypted, None) - - self.reset_eve_vars = tf.group( - *[w.initializer for w in tf.compat.v1.get_collection('eve')]) - - optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) - - # Eve's goal is to decrypt the entire message: - eve_bits_wrong = tf.reduce_sum( - tf.abs((eve_out + 1.0) / 2.0 - (in_m + 1.0) / 2.0), [1]) - self.eve_loss = tf.reduce_sum(eve_bits_wrong) - self.eve_optimizer = optimizer.minimize( - self.eve_loss, var_list=tf.compat.v1.get_collection('eve')) - - # Alice and Bob want to be accurate... - self.bob_bits_wrong = tf.reduce_sum( - tf.abs((decrypted + 1.0) / 2.0 - (in_m + 1.0) / 2.0), [1]) - # ... and to not let Eve do better than guessing. - self.bob_reconstruction_loss = tf.reduce_sum(self.bob_bits_wrong) - bob_eve_error_deviation = tf.abs(float(TEXT_SIZE) / 2.0 - eve_bits_wrong) - # 7-9 bits wrong is OK too, so we squish the error function a bit. - # Without doing this, we often tend to hang out at 0.25 / 7.5 error, - # and it seems bad to have continued, high communication error. - bob_eve_loss = tf.reduce_sum( - tf.square(bob_eve_error_deviation) / (TEXT_SIZE / 2)**2) - - # Rescale the losses to [0, 1] per example and combine. - self.bob_loss = (self.bob_reconstruction_loss / TEXT_SIZE + bob_eve_loss) - - self.bob_optimizer = optimizer.minimize( - self.bob_loss, - var_list=(tf.compat.v1.get_collection('alice') + tf.compat.v1.get_collection('bob'))) - - -def doeval(s, ac, n, itercount): - """Evaluate the current network on n batches of random examples. - - Args: - s: The current TensorFlow session - ac: an instance of the AdversarialCrypto class - n: The number of iterations to run. - itercount: Iteration count label for logging. - - Returns: - Bob and Eve's loss, as a percent of bits incorrect. - """ - - bob_loss_accum = 0 - eve_loss_accum = 0 - for _ in xrange(n): - bl, el = s.run([ac.bob_reconstruction_loss, ac.eve_loss]) - bob_loss_accum += bl - eve_loss_accum += el - bob_loss_percent = bob_loss_accum / (n * FLAGS.batch_size) - eve_loss_percent = eve_loss_accum / (n * FLAGS.batch_size) - print('%10d\t%20.2f\t%20.2f'%(itercount, bob_loss_percent, eve_loss_percent)) - sys.stdout.flush() - return bob_loss_percent, eve_loss_percent - - -def train_until_thresh(s, ac): - for j in xrange(MAX_TRAINING_LOOPS): - for _ in xrange(ITERS_PER_ACTOR): - s.run(ac.bob_optimizer) - for _ in xrange(ITERS_PER_ACTOR * EVE_MULTIPLIER): - s.run(ac.eve_optimizer) - if j % PRINT_EVERY == 0: - bob_avg_loss, eve_avg_loss = doeval(s, ac, EVAL_BATCHES, j) - if (bob_avg_loss < BOB_LOSS_THRESH and eve_avg_loss > EVE_LOSS_THRESH): - print('Target losses achieved.') - return True - return False - - -def train_and_evaluate(): - """Run the full training and evaluation loop.""" - - ac = AdversarialCrypto() - init = tf.compat.v1.global_variables_initializer() - - with tf.compat.v1.Session() as s: - s.run(init) - print('# Batch size: ', FLAGS.batch_size) - print('# %10s\t%20s\t%20s'%("Iter","Bob_Recon_Error","Eve_Recon_Error")) - - if train_until_thresh(s, ac): - for _ in xrange(EVE_EXTRA_ROUNDS): - s.run(ac.eve_optimizer) - print('Loss after eve extra training:') - doeval(s, ac, EVAL_BATCHES * 2, 0) - for _ in xrange(NUMBER_OF_EVE_RESETS): - print('Resetting Eve') - s.run(ac.reset_eve_vars) - eve_counter = 0 - for _ in xrange(RETRAIN_EVE_LOOPS): - for _ in xrange(RETRAIN_EVE_ITERS): - eve_counter += 1 - s.run(ac.eve_optimizer) - doeval(s, ac, EVAL_BATCHES, eve_counter) - doeval(s, ac, EVAL_BATCHES, eve_counter) - - -def main(unused_argv): - # Exit more quietly with Ctrl-C. - signal.signal(signal.SIGINT, signal.SIG_DFL) - train_and_evaluate() - - -if __name__ == '__main__': - tf.compat.v1.app.run() diff --git a/research/adversarial_logit_pairing/README.md b/research/adversarial_logit_pairing/README.md deleted file mode 100644 index d3f576836c4e0fb28eee9882906b18d88a90c564..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/README.md +++ /dev/null @@ -1,281 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Adversarial logit pairing - -This directory contains implementation of -[Adversarial logit pairing](https://arxiv.org/abs/1803.06373) paper as well as -few models pre-trained on ImageNet and Tiny ImageNet. - -Please contact [Alexey Kurakin](https://github.com/AlexeyKurakin) regarding -this code. - -## Pre-requesites - -Code dependencies: - -* TensorFlow 1.8 and Python 2.7 (other versions may work, but were not tested) -* [Abseil Python](https://github.com/abseil/abseil-py). -* Script which converts Tiny Imagenet dataset into TFRecord format also - depends on [Pandas](https://pandas.pydata.org/). - -## Datasets - -To use this code you need to download datasets. You only need to download -those datasets which you're going to use. Following list of datasets is -supported: - -* [ImageNet](http://www.image-net.org/). Follow - [Preparing the datasets](https://github.com/tensorflow/models/tree/master/research/slim#Data) - instructions in TF-Slim documentation to download and convert ImageNet dataset - to TFRecord format. - -* [Tiny ImageNet](https://tiny-imagenet.herokuapp.com/). - To obtain Tiny ImageNet dataset do following: - - ``` - # Download zip archive with TinyImagenet - curl -O http://cs231n.stanford.edu/tiny-imagenet-200.zip - - # Extract archive - unzip tiny-imagenet-200.zip - - # Convert dataset to TFRecord format - mkdir tiny-imagenet-tfrecord - python tiny_imagenet_converter/converter.py \ - --input_dir=tiny-imagenet-200 \ - --output_dir=tiny-imagenet-tfrecord - ``` - -## Running the code - -NOTE: Provided code supports distributed training on multiple machines, -and all provided checkpoints were trained in a distributed way. However it is -beyond the scope of this document to describe how to do distributed training. -Readed should refer to -[other material](https://www.tensorflow.org/deploy/distributed) to learn -about it. - -### Training - -Following command runs training: - -``` -# Following arguments has to be specified for training: -# - MAX_NUMBER_OF_TRAINING_STEPS - maximum number of training steps, -# omit this flag or set it to -1 to have unlimited number of training steps. -# - MODEL_NAME - name of the model, now only "resnet_v2_50" is supported. -# - MOVING_AVG_DECAY - decay rate for exponential moving average of the -# trainable variables. Training with exponential moving average usually -# leads to better accuracy. Default of 0.9999. -1 disable exponential moving -# average. Default works well, so typically you set it only if you want -# to disable this feature. -# - HYPERPARAMETERS - string with hyperparameters, -# see model_lib.py for full list of hyperparameters. -# - DATASET - dataset, either "imagenet" or "tiny_imagenet". -# - IMAGE_SIZE - size of the image (single number). -# - OUTPUT_DIRECTORY - directory where to write results. -# - IMAGENET_DIR - directory with ImageNet dataset in TFRecord format. -# - TINY_IMAGENET_DIR - directory with Tiny ImageNet dataset in TFRecord format. -# -# Note that only one of IMAGENET_DIR or TINY_IMAGENET_DIR has to be provided -# depending on which dataset you use. -# -python train.py \ - --max_steps="${MAX_NUMBER_OF_TRAINING_STEPS}" \ - --model_name="${MODEL_NAME}" \ - --moving_average_decay="${MOVING_AVG_DECAY}" \ - --hparams="${HYPERPARAMETERS}" \ - --dataset="${DATASET}" \ - --dataset_image_size="${IMAGE_SIZE}" \ - --output_dir="${OUTPUT_DIRECTORY}" \ - --imagenet_data_dir="${IMAGENET_DIR}" \ - --tiny_imagenet_data_dir="${TINY_IMAGENET_DIR}" -``` - -Full list of training hyperparameters could be found in `model_lib.py`. -These hyperparameters control learning rate schedule, optimizer, weight decay, -label smoothing and adversarial training. - -Adversarial training is controlled by following hyperparameters: - -* `train_adv_method` - method which is used to craft adversarial examples during - training. Could be one of the following: - - * `clean` - perform regular training with clean examples; - * `pgd_EPS_STEP_NITER` - use non targeted PGD with maximum size of - perturbation equal to `EPS`, step size equal to `STEP` - and number of iterations equal to `NITER`. Size of perturbation and step - size are expected to be integers between 1 and 255. - * `pgdll_EPS_STEP_NITER` - use targeted PGD, where target class is least - likely prediction of the network. - * `pgdrnd_EPS_STEP_NITER` - use targeted PGD, where target class is chosen - randomly. - -* `train_lp_weight` - weight of adversarial logit pairing loss. If zero or - negarive, then no logit pairing is performed and training is done using - mixed minibatch PGD. If positive then adversarial logit pairing term is added - to the loss. - -Below is example of how to run training with adversarial logit pairing on -ImageNet 64x64: - -``` -python train.py \ - --model_name="resnet_v2_50" \ - --hparams="train_adv_method=pgdll_16_2_10,train_lp_weight=0.5" \ - --dataset="imagenet" \ - --dataset_image_size=64 \ - --output_dir="/tmp/adv_train" \ - --imagenet_data_dir="${IMAGENET_DIR}" -``` - -### Fine tuning - -Provided trainin script could be used to fine tune pre-trained checkpoint. -Following command does this: - -``` -# Fine tuning adds following additional arguments: -# - SCOPES_DO_NOT_LOAD_FROM_CHECKPOINT - comma separates list of scopes of -# variables, which should not be loadeded from checkpoint (and default -# initialization should be used instead). -# SCOPES_DO_NOT_LOAD_FROM_CHECKPOINT should be either same or a subset of -# LIST_OF_SCOPES_OF_TRAINABLE_VARS. -# - LIST_OF_SCOPES_OF_TRAINABLE_VARS - comma separated list of scopes of -# trainable variables. Only variables which are prefixed with these scopes -# will be trained. -# - PATH_TO_PRETRAINED_CHECKPOINT - directory with pretrained checkpoint which -# is used as initialization for fine tuning. -# -python train.py \ - --max_steps="${MAX_NUMBER_OF_TRAINING_STEPS}" \ - --model_name="${MODEL_NAME}" \ - --moving_average_decay="${MOVING_AVG_DECAY}" \ - --hparams="${HYPERPARAMETERS}" \ - --dataset="${DATASET}" \ - --dataset_image_size="${IMAGE_SIZE}" \ - --output_dir="${OUTPUT_DIRECTORY}" \ - --imagenet_data_dir="${IMAGENET_DIR}" \ - --tiny_imagenet_data_dir="${TINY_IMAGENET_DIR}" \ - --finetune_exclude_pretrained_scopes="${SCOPES_DO_NOT_LOAD_FROM_CHECKPOINT}" \ - --finetune_trainable_scopes="${LIST_OF_SCOPES_OF_TRAINABLE_VARS}" \ - --finetune_checkpoint_path="${PATH_TO_PRETRAINED_CHECKPOINT}" -``` - -Below is an example of how to fine tune last few layers of the model on -Tiny Imagenet dataset: - -``` -python train.py \ - --model_name="resnet_v2_50" \ - --hparams="train_adv_method=pgdll_16_2_10,train_lp_weight=0.5,learning_rate=0.02" \ - --dataset="tiny_imagenet" \ - --dataset_image_size=64 \ - --output_dir="/tmp/adv_finetune" \ - --tiny_imagenet_data_dir="${TINY_IMAGENET_DIR}" \ - --finetune_exclude_pretrained_scopes="resnet_v2_50/logits" \ - --finetune_trainable_scopes="resnet_v2_50/logits,resnet_v2_50/postnorm" \ - --finetune_checkpoint_path="/tmp/adv_train" -``` - -### Evaluation - -Following command runs evaluation: - -``` -# Following arguments should be provided for eval: -# - TRAINING_DIRECTORY - directory where training checkpoints are saved. -# - TRAINABLE_SCOPES - when loading checkpoint which was obtained by fine tuning -# this argument should be the same as LIST_OF_SCOPES_OF_TRAINABLE_VARS -# during training. Otherwise it should be empty. -# This is needed to properly load exponential moving average variables. -# If exponential moving averages are disabled then this flag could be -# omitted. -# - EVAL_SUBDIR_NAME - name of the subdirectory inside TRAINING_DIRECTORY -# where evaluation code will be saving event files. -# - DATASET - name of the dataset. -# - IMAGE_SIZE - size of the image in the dataset. -# - DATSET_SPLIT_NAME - name of the split in the dataset, -# either 'train' or 'validation'. Default is 'validation'. -# - MODEL_NAME - name of the model. -# - MOVING_AVG_DECAY - decay rate for exponential moving average. -# - ADV_METHOD_FOR_EVAL - should be "clean" to evaluate on clean example or -# description of the adversarial method to evaluate on adversarial examples. -# - HYPERPARAMETERS - hyperparameters, only "eval_batch_size" matters for eval -# - NUMBER_OF_EXAMPLES - how many examples from the dataset use for evaluation, -# specify -1 to use all examples. -# - EVAL_ONCE - if True then evaluate only once, otherwise keep evaluation -# running repeatedly on new checkpoints. Repeated evaluation might be useful -# when running concurrent with training. -# - IMAGENET_DIR - directory with ImageNet dataset in TFRecord format. -# - TINY_IMAGENET_DIR - directory with Tiny ImageNet dataset in TFRecord format. -# -python eval.py \ - --train_dir="${TRAINING_DIRECTORY} \ - --trainable_scopes="${TRAINABLE_SCOPES}" \ - --eval_name="${EVAL_SUBDIR_NAME}" \ - --dataset="${DATASET}" \ - --dataset_image_size="${IMAGE_SIZE}" \ - --split_name="${DATSET_SPLIT_NAME}" \ - --model_name="${MODEL_NAME}" \ - --moving_average_decay="${MOVING_AVG_DECAY}" \ - --adv_method="${ADV_METHOD_FOR_EVAL}" \ - --hparams="${HYPERPARAMETERS}" \ - --num_examples="${NUMBER_OF_EXAMPLES}" \ - --eval_once="${EVAL_ONCE}" \ - --imagenet_data_dir="${IMAGENET_DIR}" \ - --tiny_imagenet_data_dir="${TINY_IMAGENET_DIR}" -``` - -Example of running evaluation on 10000 of clean examples from ImageNet -training set: - -``` -python eval.py \ - --train_dir=/tmp/adv_train \ - --dataset=imagenet \ - --dataset_image_size=64 \ - --split_name=train \ - --adv_method=clean \ - --hparams="eval_batch_size=50" \ - --num_examples=10000 \ - --eval_once=True \ - --imagenet_data_dir="${IMAGENET_DIR}" -``` - -Example of running evaluatin on adversarial images generated from Tiny ImageNet -validation set using fine-tuned checkpoint: - -``` -python eval.py \ - --train_dir=tmp/adv_finetune \ - --trainable_scopes="resnet_v2_50/logits,resnet_v2_50/postnorm" \ - --dataset=tiny_imagenet \ - --dataset_image_size=64 \ - --adv_method=pgdrnd_16_2_10 \ - --hparams="eval_batch_size=50" \ - --eval_once=True \ - --tiny_imagenet_data_dir="${TINY_IMAGENET_DIR}" -``` - -### Pre-trained models - -Following set of pre-trained checkpoints released with this code: - -| Model | Dataset | Accuracy on
clean images | Accuracy on
`pgdll_16_1_20` | Accuracy on
`pgdll_16_2_10` | -| ----------- | ------------ | --------------- | --------------------------- | -------------- | -| [Baseline ResNet-v2-50](http://download.tensorflow.org/models/adversarial_logit_pairing/imagenet64_base_2018_06_26.ckpt.tar.gz) | ImageNet 64x64 | 60.5% | 1.8% | 3.5% | -| [ALP-trained ResNet-v2-50](http://download.tensorflow.org/models/adversarial_logit_pairing/imagenet64_alp025_2018_06_26.ckpt.tar.gz) | ImageNet 64x64 | 55.7% | 27.5% | 27.8% | -| [Baseline ResNet-v2-50](http://download.tensorflow.org/models/adversarial_logit_pairing/tiny_imagenet_base_2018_06_26.ckpt.tar.gz) | Tiny ImageNet | 69.2% | 0.1% | 0.3% | -| [ALP-trained ResNet-v2-50](http://download.tensorflow.org/models/adversarial_logit_pairing/tiny_imagenet_alp05_2018_06_26.ckpt.tar.gz) | Tiny ImageNet | 72.0% | 41.3% | 40.8% | - - -* All provided checkpoints were initially trained with exponential moving - average. However for ease of use they were re-saved without it. - So to load and use provided checkpoints you need to specify - `--moving_average_decay=-1` flag. -* All ALP models were trained with `pgdll_16_2_10` adversarial examples. -* All Tiny Imagenet models were obtained by fine tuning corresponding - ImageNet 64x64 models. ALP-trained models were fine tuned with ALP. diff --git a/research/adversarial_logit_pairing/adversarial_attack.py b/research/adversarial_logit_pairing/adversarial_attack.py deleted file mode 100644 index 804bd64bcf4444007638f9802a83973ee68eb3cf..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/adversarial_attack.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Library with adversarial attacks. - -This library designed to be self-contained and have no dependencies other -than TensorFlow. It only contains PGD / Iterative FGSM attacks, -see https://arxiv.org/abs/1706.06083 and https://arxiv.org/abs/1607.02533 -for details. - -For wider set of adversarial attacks refer to Cleverhans library: -https://github.com/tensorflow/cleverhans -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def generate_pgd_common(x, - bounds, - model_fn, - attack_params, - one_hot_labels, - perturbation_multiplier): - """Common code for generating PGD adversarial examples. - - Args: - x: original examples. - bounds: tuple with bounds of image values, bounds[0] < bounds[1]. - model_fn: model function with signature model_fn(images). - attack_params: parameters of the attack. - one_hot_labels: one hot label vector to use in the loss. - perturbation_multiplier: multiplier of adversarial perturbation, - either +1.0 or -1.0. - - Returns: - Tensor with adversarial examples. - - Raises: - ValueError: if attack parameters are invalid. - """ - # parse attack_params - # Format of attack_params: 'EPS_STEP_NITER' - # where EPS - epsilon, STEP - step size, NITER - number of iterations - params_list = attack_params.split('_') - if len(params_list) != 3: - raise ValueError('Invalid parameters of PGD attack: %s' % attack_params) - epsilon = int(params_list[0]) - step_size = int(params_list[1]) - niter = int(params_list[2]) - - # rescale epsilon and step size to image bounds - epsilon = float(epsilon) / 255.0 * (bounds[1] - bounds[0]) - step_size = float(step_size) / 255.0 * (bounds[1] - bounds[0]) - - # clipping boundaries - clip_min = tf.maximum(x - epsilon, bounds[0]) - clip_max = tf.minimum(x + epsilon, bounds[1]) - - # compute starting point - start_x = x + tf.random_uniform(tf.shape(x), -epsilon, epsilon) - start_x = tf.clip_by_value(start_x, clip_min, clip_max) - - # main iteration of PGD - loop_vars = [0, start_x] - - def loop_cond(index, _): - return index < niter - - def loop_body(index, adv_images): - logits = model_fn(adv_images) - loss = tf.reduce_sum( - tf.nn.softmax_cross_entropy_with_logits_v2( - labels=one_hot_labels, - logits=logits)) - perturbation = step_size * tf.sign(tf.gradients(loss, adv_images)[0]) - new_adv_images = adv_images + perturbation_multiplier * perturbation - new_adv_images = tf.clip_by_value(new_adv_images, clip_min, clip_max) - return index + 1, new_adv_images - - with tf.control_dependencies([start_x]): - _, result = tf.while_loop( - loop_cond, - loop_body, - loop_vars, - back_prop=False, - parallel_iterations=1) - return result - - -def generate_pgd_ll(x, bounds, model_fn, attack_params): - # pylint: disable=g-doc-args - """Generats targeted PGD adversarial examples with least likely target class. - - See generate_pgd_common for description of arguments. - - Returns: - Tensor with adversarial examples. - """ - # pylint: enable=g-doc-args - - # compute one hot least likely class - logits = model_fn(x) - num_classes = tf.shape(logits)[1] - one_hot_labels = tf.one_hot(tf.argmin(model_fn(x), axis=1), num_classes) - - return generate_pgd_common(x, bounds, model_fn, attack_params, - one_hot_labels=one_hot_labels, - perturbation_multiplier=-1.0) - - -def generate_pgd_rand(x, bounds, model_fn, attack_params): - # pylint: disable=g-doc-args - """Generats targeted PGD adversarial examples with random target class. - - See generate_pgd_common for description of arguments. - - Returns: - Tensor with adversarial examples. - """ - # pylint: enable=g-doc-args - - # compute one hot random class - logits = model_fn(x) - batch_size = tf.shape(logits)[0] - num_classes = tf.shape(logits)[1] - random_labels = tf.random_uniform(shape=[batch_size], - minval=0, - maxval=num_classes, - dtype=tf.int32) - one_hot_labels = tf.one_hot(random_labels, num_classes) - - return generate_pgd_common(x, bounds, model_fn, attack_params, - one_hot_labels=one_hot_labels, - perturbation_multiplier=-1.0) - - -def generate_pgd(x, bounds, model_fn, attack_params): - # pylint: disable=g-doc-args - """Generats non-targeted PGD adversarial examples. - - See generate_pgd_common for description of arguments. - - Returns: - tensor with adversarial examples. - """ - # pylint: enable=g-doc-args - - # compute one hot predicted class - logits = model_fn(x) - num_classes = tf.shape(logits)[1] - one_hot_labels = tf.one_hot(tf.argmax(model_fn(x), axis=1), num_classes) - - return generate_pgd_common(x, bounds, model_fn, attack_params, - one_hot_labels=one_hot_labels, - perturbation_multiplier=1.0) - - -def generate_adversarial_examples(x, bounds, model_fn, attack_description): - """Generates adversarial examples. - - Args: - x: original examples. - bounds: tuple with bounds of image values, bounds[0] < bounds[1] - model_fn: model function with signature model_fn(images). - attack_description: string which describes an attack, see notes below for - details. - - Returns: - Tensor with adversarial examples. - - Raises: - ValueError: if attack description is invalid. - - - Attack description could be one of the following strings: - - "clean" - no attack, return original images. - - "pgd_EPS_STEP_NITER" - non-targeted PGD attack. - - "pgdll_EPS_STEP_NITER" - tageted PGD attack with least likely target class. - - "pgdrnd_EPS_STEP_NITER" - targetd PGD attack with random target class. - - Meaning of attack parameters is following: - - EPS - maximum size of adversarial perturbation, between 0 and 255. - - STEP - step size of one iteration of PGD, between 0 and 255. - - NITER - number of iterations. - """ - if attack_description == 'clean': - return x - idx = attack_description.find('_') - if idx < 0: - raise ValueError('Invalid value of attack description %s' - % attack_description) - attack_name = attack_description[:idx] - attack_params = attack_description[idx+1:] - if attack_name == 'pgdll': - return generate_pgd_ll(x, bounds, model_fn, attack_params) - elif attack_name == 'pgdrnd': - return generate_pgd_rand(x, bounds, model_fn, attack_params) - elif attack_name == 'pgd': - return generate_pgd(x, bounds, model_fn, attack_params) - else: - raise ValueError('Invalid value of attack description %s' - % attack_description) - diff --git a/research/adversarial_logit_pairing/datasets/__init__.py b/research/adversarial_logit_pairing/datasets/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/adversarial_logit_pairing/datasets/dataset_factory.py b/research/adversarial_logit_pairing/datasets/dataset_factory.py deleted file mode 100644 index 01c36d4ff4710e1742e989b20a3daef75a6922e1..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/datasets/dataset_factory.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Library which creates datasets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datasets import imagenet_input -from datasets import tiny_imagenet_input - - -def get_dataset(dataset_name, split, batch_size, image_size, is_training): - """Returns dataset. - - Args: - dataset_name: name of the dataset, "imagenet" or "tiny_imagenet". - split: name of the split, "train" or "validation". - batch_size: size of the minibatch. - image_size: size of the one side of the image. Output images will be - resized to square shape image_size*image_size. - is_training: if True then training preprocessing is done, otherwise eval - preprocessing is done. - - Raises: - ValueError: if dataset_name is invalid. - - Returns: - dataset: instance of tf.data.Dataset with the dataset. - num_examples: number of examples in given split of the dataset. - num_classes: number of classes in the dataset. - bounds: tuple with bounds of image values. All returned image pixels - are between bounds[0] and bounds[1]. - """ - if dataset_name == 'tiny_imagenet': - dataset = tiny_imagenet_input.tiny_imagenet_input( - split, batch_size, image_size, is_training) - num_examples = tiny_imagenet_input.num_examples_per_epoch(split) - num_classes = 200 - bounds = (-1, 1) - elif dataset_name == 'imagenet': - dataset = imagenet_input.imagenet_input( - split, batch_size, image_size, is_training) - num_examples = imagenet_input.num_examples_per_epoch(split) - num_classes = 1001 - bounds = (-1, 1) - else: - raise ValueError('Invalid dataset %s' % dataset_name) - return dataset, num_examples, num_classes, bounds diff --git a/research/adversarial_logit_pairing/datasets/imagenet_input.py b/research/adversarial_logit_pairing/datasets/imagenet_input.py deleted file mode 100644 index 0b210b8ce11f3dbf1f14482b1b4f3a95da02a48a..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/datasets/imagenet_input.py +++ /dev/null @@ -1,255 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Imagenet input.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from absl import flags -import tensorflow as tf - -FLAGS = flags.FLAGS - - -flags.DEFINE_string('imagenet_data_dir', None, - 'Directory with Imagenet dataset in TFRecord format.') - - -def _decode_and_random_crop(image_buffer, bbox, image_size): - """Randomly crops image and then scales to target size.""" - with tf.name_scope('distorted_bounding_box_crop', - values=[image_buffer, bbox]): - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.image.extract_jpeg_shape(image_buffer), - bounding_boxes=bbox, - min_object_covered=0.1, - aspect_ratio_range=[0.75, 1.33], - area_range=[0.08, 1.0], - max_attempts=10, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, _ = sample_distorted_bounding_box - - # Crop the image to the specified bounding box. - offset_y, offset_x, _ = tf.unstack(bbox_begin) - target_height, target_width, _ = tf.unstack(bbox_size) - crop_window = tf.stack([offset_y, offset_x, target_height, target_width]) - image = tf.image.decode_and_crop_jpeg(image_buffer, crop_window, channels=3) - image = tf.image.convert_image_dtype( - image, dtype=tf.float32) - - image = tf.image.resize_bicubic([image], - [image_size, image_size])[0] - - return image - - -def _decode_and_center_crop(image_buffer, image_size): - """Crops to center of image with padding then scales to target size.""" - shape = tf.image.extract_jpeg_shape(image_buffer) - image_height = shape[0] - image_width = shape[1] - - padded_center_crop_size = tf.cast( - 0.875 * tf.cast(tf.minimum(image_height, image_width), tf.float32), - tf.int32) - - offset_height = ((image_height - padded_center_crop_size) + 1) // 2 - offset_width = ((image_width - padded_center_crop_size) + 1) // 2 - crop_window = tf.stack([offset_height, offset_width, - padded_center_crop_size, padded_center_crop_size]) - image = tf.image.decode_and_crop_jpeg(image_buffer, crop_window, channels=3) - image = tf.image.convert_image_dtype( - image, dtype=tf.float32) - - image = tf.image.resize_bicubic([image], - [image_size, image_size])[0] - - return image - - -def _normalize(image): - """Rescale image to [-1, 1] range.""" - return tf.multiply(tf.subtract(image, 0.5), 2.0) - - -def image_preprocessing(image_buffer, bbox, image_size, is_training): - """Does image decoding and preprocessing. - - Args: - image_buffer: string tensor with encoded image. - bbox: bounding box of the object at the image. - image_size: image size. - is_training: whether to do training or eval preprocessing. - - Returns: - Tensor with the image. - """ - if is_training: - image = _decode_and_random_crop(image_buffer, bbox, image_size) - image = _normalize(image) - image = tf.image.random_flip_left_right(image) - else: - image = _decode_and_center_crop(image_buffer, image_size) - image = _normalize(image) - image = tf.reshape(image, [image_size, image_size, 3]) - return image - - -def imagenet_parser(value, image_size, is_training): - """Parse an ImageNet record from a serialized string Tensor. - - Args: - value: encoded example. - image_size: size of the output image. - is_training: if True then do training preprocessing, - otherwise do eval preprocessing. - - Returns: - image: tensor with the image. - label: true label of the image. - """ - keys_to_features = { - 'image/encoded': - tf.FixedLenFeature((), tf.string, ''), - 'image/format': - tf.FixedLenFeature((), tf.string, 'jpeg'), - 'image/class/label': - tf.FixedLenFeature([], tf.int64, -1), - 'image/class/text': - tf.FixedLenFeature([], tf.string, ''), - 'image/object/bbox/xmin': - tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymin': - tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/xmax': - tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymax': - tf.VarLenFeature(dtype=tf.float32), - 'image/object/class/label': - tf.VarLenFeature(dtype=tf.int64), - } - - parsed = tf.parse_single_example(value, keys_to_features) - - image_buffer = tf.reshape(parsed['image/encoded'], shape=[]) - - xmin = tf.expand_dims(parsed['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(parsed['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(parsed['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(parsed['image/object/bbox/ymax'].values, 0) - # Note that ordering is (y, x) - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - image = image_preprocessing( - image_buffer=image_buffer, - bbox=bbox, - image_size=image_size, - is_training=is_training - ) - - # Labels are in [1, 1000] range - label = tf.cast( - tf.reshape(parsed['image/class/label'], shape=[]), dtype=tf.int32) - - return image, label - - -def imagenet_input(split, batch_size, image_size, is_training): - """Returns ImageNet dataset. - - Args: - split: name of the split, "train" or "validation". - batch_size: size of the minibatch. - image_size: size of the one side of the image. Output images will be - resized to square shape image_size*image_size. - is_training: if True then training preprocessing is done, otherwise eval - preprocessing is done. - - Raises: - ValueError: if name of the split is incorrect. - - Returns: - Instance of tf.data.Dataset with the dataset. - """ - if split.lower().startswith('train'): - file_pattern = os.path.join(FLAGS.imagenet_data_dir, 'train-*') - elif split.lower().startswith('validation'): - file_pattern = os.path.join(FLAGS.imagenet_data_dir, 'validation-*') - else: - raise ValueError('Invalid split: %s' % split) - - dataset = tf.data.Dataset.list_files(file_pattern, shuffle=is_training) - - if is_training: - dataset = dataset.repeat() - - def fetch_dataset(filename): - return tf.data.TFRecordDataset(filename, buffer_size=8*1024*1024) - - # Read the data from disk in parallel - dataset = dataset.apply( - tf.data.experimental.parallel_interleave( - fetch_dataset, cycle_length=4, sloppy=True)) - dataset = dataset.shuffle(1024) - - # Parse, preprocess, and batch the data in parallel - dataset = dataset.apply( - tf.data.experimental.map_and_batch( - lambda value: imagenet_parser(value, image_size, is_training), - batch_size=batch_size, - num_parallel_batches=4, - drop_remainder=True)) - - def set_shapes(images, labels): - """Statically set the batch_size dimension.""" - images.set_shape(images.get_shape().merge_with( - tf.TensorShape([batch_size, None, None, None]))) - labels.set_shape(labels.get_shape().merge_with( - tf.TensorShape([batch_size]))) - return images, labels - - # Assign static batch size dimension - dataset = dataset.map(set_shapes) - - # Prefetch overlaps in-feed with training - dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) - return dataset - - -def num_examples_per_epoch(split): - """Returns the number of examples in the data set. - - Args: - split: name of the split, "train" or "validation". - - Raises: - ValueError: if split name is incorrect. - - Returns: - Number of example in the split. - """ - if split.lower().startswith('train'): - return 1281167 - elif split.lower().startswith('validation'): - return 50000 - else: - raise ValueError('Invalid split: %s' % split) diff --git a/research/adversarial_logit_pairing/datasets/tiny_imagenet_input.py b/research/adversarial_logit_pairing/datasets/tiny_imagenet_input.py deleted file mode 100644 index 6d216d53ed0bd9f6e7a5770510cedc7f3d9f0a42..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/datasets/tiny_imagenet_input.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tiny imagenet input.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from absl import flags -import tensorflow as tf - -FLAGS = flags.FLAGS - - -flags.DEFINE_string('tiny_imagenet_data_dir', None, - 'Directory with Tiny Imagenet dataset in TFRecord format.') - - -def tiny_imagenet_parser(value, image_size, is_training): - """Parses tiny imagenet example. - - Args: - value: encoded example. - image_size: size of the image. - is_training: if True then do training preprocessing (which includes - random cropping), otherwise do eval preprocessing. - - Returns: - image: tensor with the image. - label: true label of the image. - """ - keys_to_features = { - 'image/encoded': tf.FixedLenFeature((), tf.string, ''), - 'label/tiny_imagenet': tf.FixedLenFeature([], tf.int64, -1), - } - - parsed = tf.parse_single_example(value, keys_to_features) - - image_buffer = tf.reshape(parsed['image/encoded'], shape=[]) - image = tf.image.decode_image(image_buffer, channels=3) - image = tf.image.convert_image_dtype( - image, dtype=tf.float32) - - # Crop image - if is_training: - bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=tf.constant([0.0, 0.0, 1.0, 1.0], - dtype=tf.float32, - shape=[1, 1, 4]), - min_object_covered=0.5, - aspect_ratio_range=[0.75, 1.33], - area_range=[0.5, 1.0], - max_attempts=20, - use_image_if_no_bounding_boxes=True) - image = tf.slice(image, bbox_begin, bbox_size) - - # resize image - image = tf.image.resize_bicubic([image], [image_size, image_size])[0] - - # Rescale image to [-1, 1] range. - image = tf.multiply(tf.subtract(image, 0.5), 2.0) - - image = tf.reshape(image, [image_size, image_size, 3]) - - # Labels are in [0, 199] range - label = tf.cast( - tf.reshape(parsed['label/tiny_imagenet'], shape=[]), dtype=tf.int32) - - return image, label - - -def tiny_imagenet_input(split, batch_size, image_size, is_training): - """Returns Tiny Imagenet Dataset. - - Args: - split: name of the split, "train" or "validation". - batch_size: size of the minibatch. - image_size: size of the one side of the image. Output images will be - resized to square shape image_size*image_size. - is_training: if True then training preprocessing is done, otherwise eval - preprocessing is done.instance of tf.data.Dataset with the dataset. - - Raises: - ValueError: if name of the split is incorrect. - - Returns: - Instance of tf.data.Dataset with the dataset. - """ - if split.lower().startswith('train'): - filepath = os.path.join(FLAGS.tiny_imagenet_data_dir, 'train.tfrecord') - elif split.lower().startswith('validation'): - filepath = os.path.join(FLAGS.tiny_imagenet_data_dir, 'validation.tfrecord') - else: - raise ValueError('Invalid split: %s' % split) - - dataset = tf.data.TFRecordDataset(filepath, buffer_size=8*1024*1024) - - if is_training: - dataset = dataset.shuffle(10000) - dataset = dataset.repeat() - - dataset = dataset.apply( - tf.data.experimental.map_and_batch( - lambda value: tiny_imagenet_parser(value, image_size, is_training), - batch_size=batch_size, - num_parallel_batches=4, - drop_remainder=True)) - - def set_shapes(images, labels): - """Statically set the batch_size dimension.""" - images.set_shape(images.get_shape().merge_with( - tf.TensorShape([batch_size, None, None, None]))) - labels.set_shape(labels.get_shape().merge_with( - tf.TensorShape([batch_size]))) - return images, labels - - # Assign static batch size dimension - dataset = dataset.map(set_shapes) - - dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) - - return dataset - - -def num_examples_per_epoch(split): - """Returns the number of examples in the data set. - - Args: - split: name of the split, "train" or "validation". - - Raises: - ValueError: if split name is incorrect. - - Returns: - Number of example in the split. - """ - if split.lower().startswith('train'): - return 100000 - elif split.lower().startswith('validation'): - return 10000 - else: - raise ValueError('Invalid split: %s' % split) diff --git a/research/adversarial_logit_pairing/eval.py b/research/adversarial_logit_pairing/eval.py deleted file mode 100644 index 504cc0b0bcf52edff9e7aaa2c0d051079ba521aa..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/eval.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Program which runs evaluation of Imagenet 64x64 and TinyImagenet models.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from absl import app -from absl import flags - -import tensorflow as tf - -import adversarial_attack -import model_lib -from datasets import dataset_factory - -FLAGS = flags.FLAGS - - -flags.DEFINE_string('train_dir', None, - 'Training directory. If specified then this program ' - 'runs in continuous evaluation mode.') - -flags.DEFINE_string('checkpoint_path', None, - 'Path to the file with checkpoint. If specified then ' - 'this program evaluates only provided checkpoint one time.') - -flags.DEFINE_string('output_file', None, - 'Name of output file. Used only in single evaluation mode.') - -flags.DEFINE_string('eval_name', 'default', 'Name for eval subdirectory.') - -flags.DEFINE_string('master', '', 'Tensorflow master.') - -flags.DEFINE_string('model_name', 'resnet_v2_50', 'Name of the model.') - -flags.DEFINE_string('adv_method', 'clean', - 'Method which is used to generate adversarial examples.') - -flags.DEFINE_string('dataset', 'imagenet', - 'Dataset: "tiny_imagenet" or "imagenet".') - -flags.DEFINE_integer('dataset_image_size', 64, - 'Size of the images in the dataset.') - -flags.DEFINE_string('hparams', '', 'Hyper parameters.') - -flags.DEFINE_string('split_name', 'validation', 'Name of the split.') - -flags.DEFINE_float('moving_average_decay', 0.9999, - 'The decay to use for the moving average.') - -flags.DEFINE_integer('eval_interval_secs', 120, - 'The frequency, in seconds, with which evaluation is run.') - -flags.DEFINE_integer( - 'num_examples', -1, - 'If positive - maximum number of example to use for evaluation.') - -flags.DEFINE_bool('eval_once', False, - 'If true then evaluate model only once.') - -flags.DEFINE_string('trainable_scopes', None, - 'If set then it defines list of variable scopes for ' - 'trainable variables.') - - -def main(_): - if not FLAGS.train_dir and not FLAGS.checkpoint_path: - print('Either --train_dir or --checkpoint_path flags has to be provided.') - if FLAGS.train_dir and FLAGS.checkpoint_path: - print('Only one of --train_dir or --checkpoint_path should be provided.') - params = model_lib.default_hparams() - params.parse(FLAGS.hparams) - tf.logging.info('User provided hparams: %s', FLAGS.hparams) - tf.logging.info('All hyper parameters: %s', params) - batch_size = params.eval_batch_size - graph = tf.Graph() - with graph.as_default(): - # dataset - dataset, num_examples, num_classes, bounds = dataset_factory.get_dataset( - FLAGS.dataset, - FLAGS.split_name, - batch_size, - FLAGS.dataset_image_size, - is_training=False) - dataset_iterator = dataset.make_one_shot_iterator() - images, labels = dataset_iterator.get_next() - if FLAGS.num_examples > 0: - num_examples = min(num_examples, FLAGS.num_examples) - - # setup model - global_step = tf.train.get_or_create_global_step() - model_fn_two_args = model_lib.get_model(FLAGS.model_name, num_classes) - model_fn = lambda x: model_fn_two_args(x, is_training=False) - if not FLAGS.adv_method or FLAGS.adv_method == 'clean': - logits = model_fn(images) - else: - adv_examples = adversarial_attack.generate_adversarial_examples( - images, bounds, model_fn, FLAGS.adv_method) - logits = model_fn(adv_examples) - - # update trainable variables if fine tuning is used - model_lib.filter_trainable_variables(FLAGS.trainable_scopes) - - # Setup the moving averages - if FLAGS.moving_average_decay and (FLAGS.moving_average_decay > 0): - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, global_step) - variables_to_restore = variable_averages.variables_to_restore( - tf.contrib.framework.get_model_variables()) - variables_to_restore[global_step.op.name] = global_step - else: - variables_to_restore = tf.contrib.framework.get_variables_to_restore() - - # Setup evaluation metric - with tf.name_scope('Eval'): - names_to_values, names_to_updates = ( - tf.contrib.metrics.aggregate_metric_map({ - 'Accuracy': tf.metrics.accuracy(labels, tf.argmax(logits, 1)), - 'Top5': tf.metrics.recall_at_k(tf.to_int64(labels), logits, 5) - })) - - for name, value in names_to_values.iteritems(): - tf.summary.scalar(name, value) - - # Run evaluation - num_batches = int(num_examples / batch_size) - if FLAGS.train_dir: - output_dir = os.path.join(FLAGS.train_dir, FLAGS.eval_name) - if not tf.gfile.Exists(output_dir): - tf.gfile.MakeDirs(output_dir) - tf.contrib.training.evaluate_repeatedly( - FLAGS.train_dir, - master=FLAGS.master, - scaffold=tf.train.Scaffold( - saver=tf.train.Saver(variables_to_restore)), - eval_ops=names_to_updates.values(), - eval_interval_secs=FLAGS.eval_interval_secs, - hooks=[ - tf.contrib.training.StopAfterNEvalsHook(num_batches), - tf.contrib.training.SummaryAtEndHook(output_dir), - tf.train.LoggingTensorHook(names_to_values, at_end=True), - ], - max_number_of_evaluations=1 if FLAGS.eval_once else None) - else: - result = tf.contrib.training.evaluate_once( - FLAGS.checkpoint_path, - master=FLAGS.master, - scaffold=tf.train.Scaffold( - saver=tf.train.Saver(variables_to_restore)), - eval_ops=names_to_updates.values(), - final_ops=names_to_values, - hooks=[ - tf.contrib.training.StopAfterNEvalsHook(num_batches), - tf.train.LoggingTensorHook(names_to_values, at_end=True), - ]) - if FLAGS.output_file: - with tf.gfile.Open(FLAGS.output_file, 'a') as f: - f.write('%s,%.3f,%.3f\n' - % (FLAGS.eval_name, result['Accuracy'], result['Top5'])) - - -if __name__ == '__main__': - app.run(main) diff --git a/research/adversarial_logit_pairing/model_lib.py b/research/adversarial_logit_pairing/model_lib.py deleted file mode 100644 index 1499a378ea1ba6511122ebe54ceed1226d38d649..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/model_lib.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Library with common functions for training and eval.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six - -import tensorflow as tf - -from tensorflow.contrib.slim.nets import resnet_v2 - - -def default_hparams(): - """Returns default hyperparameters.""" - return tf.contrib.training.HParams( - # Batch size for training and evaluation. - batch_size=32, - eval_batch_size=50, - - # General training parameters. - weight_decay=0.0001, - label_smoothing=0.1, - - # Parameters of the adversarial training. - train_adv_method='clean', # adversarial training method - train_lp_weight=0.0, # Weight of adversarial logit pairing loss - - # Parameters of the optimizer. - optimizer='rms', # possible values are: 'rms', 'momentum', 'adam' - momentum=0.9, # momentum - rmsprop_decay=0.9, # Decay term for RMSProp - rmsprop_epsilon=1.0, # Epsilon term for RMSProp - - # Parameters of learning rate schedule. - lr_schedule='exp_decay', # Possible values: 'exp_decay', 'step', 'fixed' - learning_rate=0.045, - lr_decay_factor=0.94, # Learning exponential decay - lr_num_epochs_per_decay=2.0, # Number of epochs per lr decay - lr_list=[1.0 / 6, 2.0 / 6, 3.0 / 6, - 4.0 / 6, 5.0 / 6, 1.0, 0.1, 0.01, - 0.001, 0.0001], - lr_decay_epochs=[1, 2, 3, 4, 5, 30, 60, 80, - 90]) - - -def get_lr_schedule(hparams, examples_per_epoch, replicas_to_aggregate=1): - """Returns TensorFlow op which compute learning rate. - - Args: - hparams: hyper parameters. - examples_per_epoch: number of training examples per epoch. - replicas_to_aggregate: number of training replicas running in parallel. - - Raises: - ValueError: if learning rate schedule specified in hparams is incorrect. - - Returns: - learning_rate: tensor with learning rate. - steps_per_epoch: number of training steps per epoch. - """ - global_step = tf.train.get_or_create_global_step() - steps_per_epoch = float(examples_per_epoch) / float(hparams.batch_size) - if replicas_to_aggregate > 0: - steps_per_epoch /= replicas_to_aggregate - - if hparams.lr_schedule == 'exp_decay': - decay_steps = long(steps_per_epoch * hparams.lr_num_epochs_per_decay) - learning_rate = tf.train.exponential_decay( - hparams.learning_rate, - global_step, - decay_steps, - hparams.lr_decay_factor, - staircase=True) - elif hparams.lr_schedule == 'step': - lr_decay_steps = [long(epoch * steps_per_epoch) - for epoch in hparams.lr_decay_epochs] - learning_rate = tf.train.piecewise_constant( - global_step, lr_decay_steps, hparams.lr_list) - elif hparams.lr_schedule == 'fixed': - learning_rate = hparams.learning_rate - else: - raise ValueError('Invalid value of lr_schedule: %s' % hparams.lr_schedule) - - if replicas_to_aggregate > 0: - learning_rate *= replicas_to_aggregate - - return learning_rate, steps_per_epoch - - -def get_optimizer(hparams, learning_rate): - """Returns optimizer. - - Args: - hparams: hyper parameters. - learning_rate: learning rate tensor. - - Raises: - ValueError: if type of optimizer specified in hparams is incorrect. - - Returns: - Instance of optimizer class. - """ - if hparams.optimizer == 'rms': - optimizer = tf.train.RMSPropOptimizer(learning_rate, - hparams.rmsprop_decay, - hparams.momentum, - hparams.rmsprop_epsilon) - elif hparams.optimizer == 'momentum': - optimizer = tf.train.MomentumOptimizer(learning_rate, - hparams.momentum) - elif hparams.optimizer == 'adam': - optimizer = tf.train.AdamOptimizer(learning_rate) - else: - raise ValueError('Invalid value of optimizer: %s' % hparams.optimizer) - return optimizer - - -RESNET_MODELS = {'resnet_v2_50': resnet_v2.resnet_v2_50} - - -def get_model(model_name, num_classes): - """Returns function which creates model. - - Args: - model_name: Name of the model. - num_classes: Number of classes. - - Raises: - ValueError: If model_name is invalid. - - Returns: - Function, which creates model when called. - """ - if model_name.startswith('resnet'): - def resnet_model(images, is_training, reuse=tf.AUTO_REUSE): - with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()): - resnet_fn = RESNET_MODELS[model_name] - logits, _ = resnet_fn(images, num_classes, is_training=is_training, - reuse=reuse) - logits = tf.reshape(logits, [-1, num_classes]) - return logits - return resnet_model - else: - raise ValueError('Invalid model: %s' % model_name) - - -def filter_trainable_variables(trainable_scopes): - """Keep only trainable variables which are prefixed with given scopes. - - Args: - trainable_scopes: either list of trainable scopes or string with comma - separated list of trainable scopes. - - This function removes all variables which are not prefixed with given - trainable_scopes from collection of trainable variables. - Useful during network fine tuning, when you only need to train subset of - variables. - """ - if not trainable_scopes: - return - if isinstance(trainable_scopes, six.string_types): - trainable_scopes = [scope.strip() for scope in trainable_scopes.split(',')] - trainable_scopes = {scope for scope in trainable_scopes if scope} - if not trainable_scopes: - return - trainable_collection = tf.get_collection_ref( - tf.GraphKeys.TRAINABLE_VARIABLES) - non_trainable_vars = [ - v for v in trainable_collection - if not any([v.op.name.startswith(s) for s in trainable_scopes]) - ] - for v in non_trainable_vars: - trainable_collection.remove(v) diff --git a/research/adversarial_logit_pairing/tiny_imagenet_converter/converter.py b/research/adversarial_logit_pairing/tiny_imagenet_converter/converter.py deleted file mode 100644 index 4fdccc32071f8c677bb1395e324c6b94aa7e85af..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/tiny_imagenet_converter/converter.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Converts Tiny Imagenet dataset into TFRecord format. - -As an output this program generates following files in TFRecord format: -- train.tfrecord -- validation.tfrecord -- test.tfrecord - -Generated train and validation files will contain tf.Example entries with -following features: -- image/encoded - encoded image -- image/format - image format -- label/wnid - label WordNet ID -- label/imagenet - imagenet label [1 ... 1000] -- label/tiny_imagenet - tiny imagenet label [0 ... 199] -- bbox/xmin -- bbox/ymin -- bbox/xmax -- bbox/ymax - -Test file will contain entries with 'image/encoded' and 'image/format' features. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import namedtuple -import os -import random - -from absl import app -from absl import flags -from absl import logging - -import pandas as pd - -import tensorflow as tf - - -FLAGS = flags.FLAGS - -flags.DEFINE_string('input_dir', '', 'Input directory') -flags.DEFINE_string('output_dir', '', 'Output directory') - -flags.DEFINE_string('imagenet_synsets_path', '', - 'Optional path to /imagenet_lsvrc_2015_synsets.txt') - - -ImageMetadata = namedtuple('ImageMetadata', ['label', 'x1', 'y1', 'x2', 'y2']) - - -class WnIdToNodeIdConverter(object): - """Converts WordNet IDs to numerical labels.""" - - def __init__(self, wnids_path, background_class): - self._wnid_to_node_id = {} - self._node_id_to_wnid = {} - with tf.gfile.Open(wnids_path) as f: - wnids_sequence = [wnid.strip() for wnid in f.readlines() if wnid.strip()] - node_id_offset = 1 if background_class else 0 - for i, label in enumerate(wnids_sequence): - self._wnid_to_node_id[label] = i + node_id_offset - self._node_id_to_wnid[i + node_id_offset] = label - - def to_node_id(self, wnid): - return self._wnid_to_node_id[wnid] - - def to_wnid(self, node_id): - return self._node_id_to_wnid[node_id] - - def all_wnids(self): - return self._wnid_to_node_id.keys() - - -def read_tiny_imagenet_annotations(annotations_filename, - images_dir, - one_label=None): - """Reads one file with Tiny Imagenet annotations.""" - result = [] - if one_label: - column_names = ['filename', 'x1', 'y1', 'x2', 'y2'] - else: - column_names = ['filename', 'label', 'x1', 'y1', 'x2', 'y2'] - with tf.gfile.Open(annotations_filename) as f: - data = pd.read_csv(f, sep='\t', names=column_names) - for row in data.itertuples(): - label = one_label if one_label else getattr(row, 'label') - full_filename = os.path.join(images_dir, getattr(row, 'filename')) - result.append((full_filename, - ImageMetadata(label=label, - x1=getattr(row, 'x1'), - y1=getattr(row, 'y1'), - x2=getattr(row, 'x2'), - y2=getattr(row, 'y2')))) - return result - - -def read_validation_annotations(validation_dir): - """Reads validation data annotations.""" - return read_tiny_imagenet_annotations( - os.path.join(validation_dir, 'val_annotations.txt'), - os.path.join(validation_dir, 'images')) - - -def read_training_annotations(training_dir): - """Reads training data annotations.""" - result = [] - sub_dirs = tf.gfile.ListDirectory(training_dir) - for sub_dir in sub_dirs: - if not sub_dir.startswith('n'): - logging.warning('Found non-class directory in training dir: %s', sub_dir) - continue - sub_dir_results = read_tiny_imagenet_annotations( - os.path.join(training_dir, sub_dir, sub_dir + '_boxes.txt'), - os.path.join(training_dir, sub_dir, 'images'), - one_label=sub_dir) - result.extend(sub_dir_results) - return result - - -def read_test_annotations(test_dir): - """Reads test data annotations.""" - files = tf.gfile.ListDirectory(os.path.join(test_dir, 'images')) - return [(os.path.join(test_dir, 'images', f), None) - for f in files if f.endswith('.JPEG')] - - -def get_image_format(filename): - """Returns image format from filename.""" - filename = filename.lower() - if filename.endswith('jpeg') or filename.endswith('jpg'): - return 'jpeg' - elif filename.endswith('png'): - return 'png' - else: - raise ValueError('Unrecognized file format: %s' % filename) - - -class TinyImagenetWriter(object): - """Helper class which writes Tiny Imagenet dataset into TFRecord file.""" - - def __init__(self, tiny_imagenet_wnid_conveter, imagenet_wnid_converter): - self.tiny_imagenet_wnid_conveter = tiny_imagenet_wnid_conveter - self.imagenet_wnid_converter = imagenet_wnid_converter - - def write_tf_record(self, - annotations, - output_file): - """Generates TFRecord file from given list of annotations.""" - with tf.python_io.TFRecordWriter(output_file) as writer: - for image_filename, image_metadata in annotations: - with tf.gfile.Open(image_filename) as f: - image_buffer = f.read() - image_format = get_image_format(image_filename) - features = { - 'image/encoded': tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image_buffer])), - 'image/format': tf.train.Feature( - bytes_list=tf.train.BytesList(value=[image_format])) - } - if image_metadata: - # bounding box features - features['bbox/xmin'] = tf.train.Feature( - int64_list=tf.train.Int64List(value=[image_metadata.x1])) - features['bbox/ymin'] = tf.train.Feature( - int64_list=tf.train.Int64List(value=[image_metadata.y1])) - features['bbox/xmax'] = tf.train.Feature( - int64_list=tf.train.Int64List(value=[image_metadata.x2])) - features['bbox/ymax'] = tf.train.Feature( - int64_list=tf.train.Int64List(value=[image_metadata.y2])) - # tiny imagenet label, from [0, 200) iterval - tiny_imagenet_label = self.tiny_imagenet_wnid_conveter.to_node_id( - image_metadata.label) - features['label/wnid'] = tf.train.Feature( - bytes_list=tf.train.BytesList(value=image_metadata.label)) - features['label/tiny_imagenet'] = tf.train.Feature( - int64_list=tf.train.Int64List(value=[tiny_imagenet_label])) - # full imagenet label, from [1, 1001) interval - if self.imagenet_wnid_converter: - imagenet_label = self.imagenet_wnid_converter.to_node_id( - image_metadata.label) - features['label/imagenet'] = tf.train.Feature( - int64_list=tf.train.Int64List(value=[imagenet_label])) - example = tf.train.Example(features=tf.train.Features(feature=features)) - writer.write(example.SerializeToString()) - - -def main(_): - assert FLAGS.input_dir, 'Input directory must be provided' - assert FLAGS.output_dir, 'Output directory must be provided' - - # Create WordNet ID conveters for tiny imagenet and possibly for imagenet - tiny_imagenet_wnid_conveter = WnIdToNodeIdConverter( - os.path.join(FLAGS.input_dir, 'wnids.txt'), - background_class=False) - if FLAGS.imagenet_synsets_path: - imagenet_wnid_converter = WnIdToNodeIdConverter(FLAGS.imagenet_synsets_path, - background_class=True) - else: - imagenet_wnid_converter = None - - # read tiny imagenet annotations - train_annotations = read_training_annotations( - os.path.join(FLAGS.input_dir, 'train')) - random.shuffle(train_annotations) - val_annotations = read_validation_annotations( - os.path.join(FLAGS.input_dir, 'val')) - test_filenames = read_test_annotations(os.path.join(FLAGS.input_dir, 'test')) - - # Generate TFRecord files - writer = TinyImagenetWriter(tiny_imagenet_wnid_conveter, - imagenet_wnid_converter) - tf.logging.info('Converting %d training images', len(train_annotations)) - writer.write_tf_record(train_annotations, - os.path.join(FLAGS.output_dir, 'train.tfrecord')) - tf.logging.info('Converting %d validation images ', len(val_annotations)) - writer.write_tf_record(val_annotations, - os.path.join(FLAGS.output_dir, 'validation.tfrecord')) - tf.logging.info('Converting %d test images', len(test_filenames)) - writer.write_tf_record(test_filenames, - os.path.join(FLAGS.output_dir, 'test.tfrecord')) - tf.logging.info('All files are converted') - - -if __name__ == '__main__': - app.run(main) diff --git a/research/adversarial_logit_pairing/train.py b/research/adversarial_logit_pairing/train.py deleted file mode 100644 index dd20969f8d09c59f7d294ee34a9e41bd44f86b39..0000000000000000000000000000000000000000 --- a/research/adversarial_logit_pairing/train.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright 2018 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Program which train models.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import app -from absl import flags - -import tensorflow as tf - -import adversarial_attack -import model_lib -from datasets import dataset_factory - -FLAGS = flags.FLAGS - - -flags.DEFINE_integer('max_steps', -1, 'Number of steps to stop at.') - -flags.DEFINE_string('output_dir', None, - 'Training directory where checkpoints will be saved.') - -flags.DEFINE_integer('ps_tasks', 0, 'Number of parameter servers.') - -flags.DEFINE_integer('task', 0, 'Task ID for running distributed training.') - -flags.DEFINE_string('master', '', 'Tensorflow master.') - -flags.DEFINE_string('model_name', 'resnet_v2_50', 'Name of the model.') - -flags.DEFINE_string('dataset', 'imagenet', - 'Dataset: "tiny_imagenet" or "imagenet".') - -flags.DEFINE_integer('dataset_image_size', 64, - 'Size of the images in the dataset.') - -flags.DEFINE_integer('num_summary_images', 3, - 'Number of images to display in Tensorboard.') - -flags.DEFINE_integer( - 'save_summaries_steps', 100, - 'The frequency with which summaries are saved, in steps.') - -flags.DEFINE_integer( - 'save_summaries_secs', None, - 'The frequency with which summaries are saved, in seconds.') - -flags.DEFINE_integer( - 'save_model_steps', 500, - 'The frequency with which the model is saved, in steps.') - -flags.DEFINE_string('hparams', '', 'Hyper parameters.') - -flags.DEFINE_integer('replicas_to_aggregate', 1, - 'Number of gradients to collect before param updates.') - -flags.DEFINE_integer('worker_replicas', 1, 'Number of worker replicas.') - -flags.DEFINE_float('moving_average_decay', 0.9999, - 'The decay to use for the moving average.') - -# Flags to control fine tuning - -flags.DEFINE_string('finetune_checkpoint_path', None, - 'Path to checkpoint for fine tuning. ' - 'If None then no fine tuning is done.') - -flags.DEFINE_string('finetune_exclude_pretrained_scopes', '', - 'Variable scopes to exclude when loading checkpoint for ' - 'fine tuning.') - -flags.DEFINE_string('finetune_trainable_scopes', None, - 'If set then it defines list of variable scopes for ' - 'trainable variables.') - - -def _get_finetuning_init_fn(variable_averages): - """Returns an init functions, used for fine tuning.""" - if not FLAGS.finetune_checkpoint_path: - return None - - if tf.train.latest_checkpoint(FLAGS.output_dir): - return None - - if tf.gfile.IsDirectory(FLAGS.finetune_checkpoint_path): - checkpoint_path = tf.train.latest_checkpoint(FLAGS.finetune_checkpoint_path) - else: - checkpoint_path = FLAGS.finetune_checkpoint_path - - if not checkpoint_path: - tf.logging.warning('Not doing fine tuning, can not find checkpoint in %s', - FLAGS.finetune_checkpoint_path) - return None - - tf.logging.info('Fine-tuning from %s', checkpoint_path) - - if FLAGS.finetune_exclude_pretrained_scopes: - exclusions = { - scope.strip() - for scope in FLAGS.finetune_exclude_pretrained_scopes.split(',') - } - else: - exclusions = set() - - filtered_model_variables = [ - v for v in tf.contrib.framework.get_model_variables() - if not any([v.op.name.startswith(e) for e in exclusions]) - ] - - if variable_averages: - variables_to_restore = {} - for v in filtered_model_variables: - # variables_to_restore[variable_averages.average_name(v)] = v - if v in tf.trainable_variables(): - variables_to_restore[variable_averages.average_name(v)] = v - else: - variables_to_restore[v.op.name] = v - else: - variables_to_restore = {v.op.name: v for v in filtered_model_variables} - - assign_fn = tf.contrib.framework.assign_from_checkpoint_fn( - checkpoint_path, - variables_to_restore) - if assign_fn: - return lambda _, sess: assign_fn(sess) - else: - return None - - -def main(_): - assert FLAGS.output_dir, '--output_dir has to be provided' - if not tf.gfile.Exists(FLAGS.output_dir): - tf.gfile.MakeDirs(FLAGS.output_dir) - params = model_lib.default_hparams() - params.parse(FLAGS.hparams) - tf.logging.info('User provided hparams: %s', FLAGS.hparams) - tf.logging.info('All hyper parameters: %s', params) - batch_size = params.batch_size - graph = tf.Graph() - with graph.as_default(): - with tf.device(tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks)): - # dataset - dataset, examples_per_epoch, num_classes, bounds = ( - dataset_factory.get_dataset( - FLAGS.dataset, - 'train', - batch_size, - FLAGS.dataset_image_size, - is_training=True)) - dataset_iterator = dataset.make_one_shot_iterator() - images, labels = dataset_iterator.get_next() - one_hot_labels = tf.one_hot(labels, num_classes) - - # set up model - global_step = tf.train.get_or_create_global_step() - model_fn = model_lib.get_model(FLAGS.model_name, num_classes) - if params.train_adv_method == 'clean': - logits = model_fn(images, is_training=True) - adv_examples = None - else: - model_fn_eval_mode = lambda x: model_fn(x, is_training=False) - adv_examples = adversarial_attack.generate_adversarial_examples( - images, bounds, model_fn_eval_mode, params.train_adv_method) - all_examples = tf.concat([images, adv_examples], axis=0) - logits = model_fn(all_examples, is_training=True) - one_hot_labels = tf.concat([one_hot_labels, one_hot_labels], axis=0) - - # update trainable variables if fine tuning is used - model_lib.filter_trainable_variables( - FLAGS.finetune_trainable_scopes) - - # set up losses - total_loss = tf.losses.softmax_cross_entropy( - onehot_labels=one_hot_labels, - logits=logits, - label_smoothing=params.label_smoothing) - tf.summary.scalar('loss_xent', total_loss) - - if params.train_lp_weight > 0: - images1, images2 = tf.split(logits, 2) - loss_lp = tf.losses.mean_squared_error( - images1, images2, weights=params.train_lp_weight) - tf.summary.scalar('loss_lp', loss_lp) - total_loss += loss_lp - - if params.weight_decay > 0: - loss_wd = ( - params.weight_decay - * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) - ) - tf.summary.scalar('loss_wd', loss_wd) - total_loss += loss_wd - - # Setup the moving averages: - if FLAGS.moving_average_decay and (FLAGS.moving_average_decay > 0): - with tf.name_scope('moving_average'): - moving_average_variables = tf.contrib.framework.get_model_variables() - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, global_step) - else: - moving_average_variables = None - variable_averages = None - - # set up optimizer and training op - learning_rate, steps_per_epoch = model_lib.get_lr_schedule( - params, examples_per_epoch, FLAGS.replicas_to_aggregate) - - optimizer = model_lib.get_optimizer(params, learning_rate) - - optimizer = tf.train.SyncReplicasOptimizer( - opt=optimizer, - replicas_to_aggregate=FLAGS.replicas_to_aggregate, - total_num_replicas=FLAGS.worker_replicas, - variable_averages=variable_averages, - variables_to_average=moving_average_variables) - - train_op = tf.contrib.training.create_train_op( - total_loss, optimizer, - update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - - tf.summary.image('images', images[0:FLAGS.num_summary_images]) - if adv_examples is not None: - tf.summary.image('adv_images', adv_examples[0:FLAGS.num_summary_images]) - tf.summary.scalar('total_loss', total_loss) - tf.summary.scalar('learning_rate', learning_rate) - tf.summary.scalar('current_epoch', - tf.to_double(global_step) / steps_per_epoch) - - # Training - is_chief = FLAGS.task == 0 - - scaffold = tf.train.Scaffold( - init_fn=_get_finetuning_init_fn(variable_averages)) - hooks = [ - tf.train.LoggingTensorHook({'total_loss': total_loss, - 'global_step': global_step}, - every_n_iter=1), - tf.train.NanTensorHook(total_loss), - ] - chief_only_hooks = [ - tf.train.SummarySaverHook(save_steps=FLAGS.save_summaries_steps, - save_secs=FLAGS.save_summaries_secs, - output_dir=FLAGS.output_dir, - scaffold=scaffold), - tf.train.CheckpointSaverHook(FLAGS.output_dir, - save_steps=FLAGS.save_model_steps, - scaffold=scaffold), - ] - - if FLAGS.max_steps > 0: - hooks.append( - tf.train.StopAtStepHook(last_step=FLAGS.max_steps)) - - # hook for sync replica training - hooks.append(optimizer.make_session_run_hook(is_chief)) - - with tf.train.MonitoredTrainingSession( - master=FLAGS.master, - is_chief=is_chief, - checkpoint_dir=FLAGS.output_dir, - scaffold=scaffold, - hooks=hooks, - chief_only_hooks=chief_only_hooks, - save_checkpoint_secs=None, - save_summaries_steps=None, - save_summaries_secs=None) as session: - while not session.should_stop(): - session.run([train_op]) - - -if __name__ == '__main__': - app.run(main) diff --git a/research/adversarial_text/README.md b/research/adversarial_text/README.md index 36b5d657615882908719c32ecdabcd75c2668382..643ed8b556fc03a99116163e2d19509057aa34a8 100644 --- a/research/adversarial_text/README.md +++ b/research/adversarial_text/README.md @@ -54,7 +54,7 @@ $ PRETRAIN_DIR=/tmp/models/imdb_pretrain $ python pretrain.py \ --train_dir=$PRETRAIN_DIR \ --data_dir=$IMDB_DATA_DIR \ - --vocab_size=86934 \ + --vocab_size=87007 \ --embedding_dims=256 \ --rnn_cell_size=1024 \ --num_candidate_samples=1024 \ @@ -83,7 +83,7 @@ $ python train_classifier.py \ --train_dir=$TRAIN_DIR \ --pretrained_model_dir=$PRETRAIN_DIR \ --data_dir=$IMDB_DATA_DIR \ - --vocab_size=86934 \ + --vocab_size=87007 \ --embedding_dims=256 \ --rnn_cell_size=1024 \ --cl_num_layers=1 \ @@ -111,7 +111,7 @@ $ python evaluate.py \ --run_once \ --num_examples=25000 \ --data_dir=$IMDB_DATA_DIR \ - --vocab_size=86934 \ + --vocab_size=87007 \ --embedding_dims=256 \ --rnn_cell_size=1024 \ --batch_size=256 \ diff --git a/research/attention_ocr/README.md b/research/attention_ocr/README.md index b7b2b5eb9ec2de79d547033446cf7155070c27c2..f2042e573fa5a1cecc3cd620c38e45be89c78dc1 100644 --- a/research/attention_ocr/README.md +++ b/research/attention_ocr/README.md @@ -1,4 +1,4 @@ -## Attention-based Extraction of Structured Information from Street View Imagery +# Attention-based Extraction of Structured Information from Street View Imagery [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/attention-based-extraction-of-structured/optical-character-recognition-on-fsns-test)](https://paperswithcode.com/sota/optical-character-recognition-on-fsns-test?p=attention-based-extraction-of-structured) [![Paper](http://img.shields.io/badge/paper-arXiv.1704.03549-B3181B.svg)](https://arxiv.org/abs/1704.03549) @@ -7,14 +7,20 @@ *A TensorFlow model for real-world image text extraction problems.* This folder contains the code needed to train a new Attention OCR model on the -[FSNS dataset][FSNS] dataset to transcribe street names in France. You can -also use it to train it on your own data. +[FSNS dataset][FSNS] to transcribe street names in France. You can also train the code on your own data. More details can be found in our paper: ["Attention-based Extraction of Structured Information from Street View Imagery"](https://arxiv.org/abs/1704.03549) +## Description + +* Paper presents a model based on ConvNets, RNN's and a novel attention mechanism. +Achieves **84.2%** on FSNS beating the previous benchmark (**72.46%**). Also studies +the speed/accuracy tradeoff that results from using CNN feature extractors of +different depths. + ## Contacts Authors @@ -22,7 +28,18 @@ Authors * Zbigniew Wojna (zbigniewwojna@gmail.com) * Alexander Gorban (gorban@google.com) -Maintainer: Xavier Gibert [@xavigibert](https://github.com/xavigibert) +Maintainer + +* Xavier Gibert ([@xavigibert](https://github.com/xavigibert)) + +## Table of Contents + +* [Requirements](https://github.com/tensorflow/models/blob/master/research/attention_ocr/README.md#requirements) +* [Dataset](https://github.com/tensorflow/models/blob/master/research/attention_ocr/README.md#dataset) +* [How to use this code](https://github.com/tensorflow/models/blob/master/research/attention_ocr/README.md#how-to-use-this-code) +* [Using your own image data](https://github.com/tensorflow/models/blob/master/research/attention_ocr/README.md#using-your-own-image-data) +* [How to use a pre-trained model](https://github.com/tensorflow/models/blob/master/research/attention_ocr/README.md#how-to-use-a-pre-trained-model) +* [Disclaimer](https://github.com/tensorflow/models/blob/master/research/attention_ocr/README.md#disclaimer) ## Requirements @@ -49,6 +66,42 @@ cd .. [TF]: https://www.tensorflow.org/install/ [FSNS]: https://github.com/tensorflow/models/tree/master/research/street +## Dataset + +The French Street Name Signs (FSNS) dataset is split into subsets, +each of which is composed of multiple files. Note that these datasets +are very large. The approximate sizes are: + +* Train: 512 files of 300MB each. +* Validation: 64 files of 40MB each. +* Test: 64 files of 50MB each. +* The datasets download includes a directory `testdata` that contains +some small datasets that are big enough to test that models can +actually learn something. +* Total: around 158GB + +The download paths are in the following list: + +``` +https://download.tensorflow.org/data/fsns-20160927/charset_size=134.txt +https://download.tensorflow.org/data/fsns-20160927/test/test-00000-of-00064 +... +https://download.tensorflow.org/data/fsns-20160927/test/test-00063-of-00064 +https://download.tensorflow.org/data/fsns-20160927/testdata/arial-32-00000-of-00001 +https://download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001 +https://download.tensorflow.org/data/fsns-20160927/testdata/mnist-sample-00000-of-00001 +https://download.tensorflow.org/data/fsns-20160927/testdata/numbers-16-00000-of-00001 +https://download.tensorflow.org/data/fsns-20160927/train/train-00000-of-00512 +... +https://download.tensorflow.org/data/fsns-20160927/train/train-00511-of-00512 +https://download.tensorflow.org/data/fsns-20160927/validation/validation-00000-of-00064 +... +https://download.tensorflow.org/data/fsns-20160927/validation/validation-00063-of-00064 +``` + +All URLs are stored in the [research/street](https://github.com/tensorflow/models/tree/master/research/street) +repository in the text file `python/fsns_urls.txt`. + ## How to use this code To run all unit tests: @@ -80,7 +133,7 @@ tar xf attention_ocr_2017_08_09.tar.gz python train.py --checkpoint=model.ckpt-399731 ``` -## How to use your own image data to train the model +## Using your own image data You need to define a new dataset. There are two options: diff --git a/research/attention_ocr/python/common_flags.py b/research/attention_ocr/python/common_flags.py index 86eb355ed85ceff81c10751119bc3c46ffef59b1..60aa49ffea84e6f7cc893715309755a3ca3e4b6f 100644 --- a/research/attention_ocr/python/common_flags.py +++ b/research/attention_ocr/python/common_flags.py @@ -17,7 +17,7 @@ import logging import sys -from tensorflow.python.platform import flags +from tensorflow.compat.v1 import flags import datasets import model diff --git a/research/attention_ocr/python/data_provider.py b/research/attention_ocr/python/data_provider.py index fb7feed7c4018deb2da9d53223c8045fba88ffc3..7a5a2d40cee1d431380b8958439e1d47b7f4e509 100644 --- a/research/attention_ocr/python/data_provider.py +++ b/research/attention_ocr/python/data_provider.py @@ -56,14 +56,14 @@ def augment_image(image): Returns: Distorted Tensor image of the same shape. """ - with tf.variable_scope('AugmentImage'): + with tf.compat.v1.variable_scope('AugmentImage'): height = image.get_shape().dims[0].value width = image.get_shape().dims[1].value # Random crop cut from the street sign image, resized to the same size. # Assures that the crop is covers at least 0.8 area of the input image. bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box( - tf.shape(image), + image_size=tf.shape(input=image), bounding_boxes=tf.zeros([0, 0, 4]), min_object_covered=0.8, aspect_ratio_range=[0.8, 1.2], @@ -74,7 +74,7 @@ def augment_image(image): # Randomly chooses one of the 4 interpolation methods distorted_image = inception_preprocessing.apply_with_random_selector( distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], method), + lambda x, method: tf.image.resize(x, [height, width], method), num_cases=4) distorted_image.set_shape([height, width, 3]) @@ -99,9 +99,10 @@ def central_crop(image, crop_size): Returns: A tensor of shape [crop_height, crop_width, channels]. """ - with tf.variable_scope('CentralCrop'): + with tf.compat.v1.variable_scope('CentralCrop'): target_width, target_height = crop_size - image_height, image_width = tf.shape(image)[0], tf.shape(image)[1] + image_height, image_width = tf.shape( + input=image)[0], tf.shape(input=image)[1] assert_op1 = tf.Assert( tf.greater_equal(image_height, target_height), ['image_height < target_height', image_height, target_height]) @@ -129,7 +130,7 @@ def preprocess_image(image, augment=False, central_crop_size=None, A float32 tensor of shape [H x W x 3] with RGB values in the required range. """ - with tf.variable_scope('PreprocessImage'): + with tf.compat.v1.variable_scope('PreprocessImage'): image = tf.image.convert_image_dtype(image, dtype=tf.float32) if augment or central_crop_size: if num_towers == 1: @@ -182,7 +183,7 @@ def get_data(dataset, image_orig, augment, central_crop_size, num_towers=dataset.num_of_views) label_one_hot = slim.one_hot_encoding(label, dataset.num_char_classes) - images, images_orig, labels, labels_one_hot = (tf.train.shuffle_batch( + images, images_orig, labels, labels_one_hot = (tf.compat.v1.train.shuffle_batch( [image, image_orig, label, label_one_hot], batch_size=batch_size, num_threads=shuffle_config.num_batching_threads, diff --git a/research/attention_ocr/python/datasets/fsns.py b/research/attention_ocr/python/datasets/fsns.py index 99ea684212c5e0c24737957c558fd76ba306f6e3..ab6d0f28b1369a8e5945d57c2f102733638058d0 100644 --- a/research/attention_ocr/python/datasets/fsns.py +++ b/research/attention_ocr/python/datasets/fsns.py @@ -72,7 +72,7 @@ def read_charset(filename, null_character=u'\u2591'): """ pattern = re.compile(r'(\d+)\t(.+)') charset = {} - with tf.gfile.GFile(filename) as f: + with tf.io.gfile.GFile(filename) as f: for i, line in enumerate(f): m = pattern.match(line) if m is None: @@ -96,9 +96,9 @@ class _NumOfViewsHandler(slim.tfexample_decoder.ItemHandler): self._num_of_views = num_of_views def tensors_to_item(self, keys_to_tensors): - return tf.to_int64( + return tf.cast( self._num_of_views * keys_to_tensors[self._original_width_key] / - keys_to_tensors[self._width_key]) + keys_to_tensors[self._width_key], dtype=tf.int64) def get_split(split_name, dataset_dir=None, config=None): @@ -133,19 +133,19 @@ def get_split(split_name, dataset_dir=None, config=None): zero = tf.zeros([1], dtype=tf.int64) keys_to_features = { 'image/encoded': - tf.FixedLenFeature((), tf.string, default_value=''), + tf.io.FixedLenFeature((), tf.string, default_value=''), 'image/format': - tf.FixedLenFeature((), tf.string, default_value='png'), + tf.io.FixedLenFeature((), tf.string, default_value='png'), 'image/width': - tf.FixedLenFeature([1], tf.int64, default_value=zero), + tf.io.FixedLenFeature([1], tf.int64, default_value=zero), 'image/orig_width': - tf.FixedLenFeature([1], tf.int64, default_value=zero), + tf.io.FixedLenFeature([1], tf.int64, default_value=zero), 'image/class': - tf.FixedLenFeature([config['max_sequence_length']], tf.int64), + tf.io.FixedLenFeature([config['max_sequence_length']], tf.int64), 'image/unpadded_class': - tf.VarLenFeature(tf.int64), + tf.io.VarLenFeature(tf.int64), 'image/text': - tf.FixedLenFeature([1], tf.string, default_value=''), + tf.io.FixedLenFeature([1], tf.string, default_value=''), } items_to_handlers = { 'image': @@ -171,7 +171,7 @@ def get_split(split_name, dataset_dir=None, config=None): config['splits'][split_name]['pattern']) return slim.dataset.Dataset( data_sources=file_pattern, - reader=tf.TFRecordReader, + reader=tf.compat.v1.TFRecordReader, decoder=decoder, num_samples=config['splits'][split_name]['size'], items_to_descriptions=config['items_to_descriptions'], diff --git a/research/attention_ocr/python/datasets/fsns_test.py b/research/attention_ocr/python/datasets/fsns_test.py index 4daedfbd12a58b6635cefed2bdc02bc84fc2c9ef..2f5f3afc78e9f73ec8ce8a295b9664bff8eeda67 100644 --- a/research/attention_ocr/python/datasets/fsns_test.py +++ b/research/attention_ocr/python/datasets/fsns_test.py @@ -22,8 +22,9 @@ from tensorflow.contrib import slim from datasets import fsns from datasets import unittest_utils +from tensorflow.compat.v1 import flags -FLAGS = tf.flags.FLAGS +FLAGS = flags.FLAGS def get_test_split(): @@ -91,7 +92,7 @@ class FsnsTest(tf.test.TestCase): image_tf, label_tf = provider.get(['image', 'label']) with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) + sess.run(tf.compat.v1.global_variables_initializer()) with slim.queues.QueueRunners(sess): image_np, label_np = sess.run([image_tf, label_tf]) diff --git a/research/attention_ocr/python/datasets/testdata/fsns/download_data.py b/research/attention_ocr/python/datasets/testdata/fsns/download_data.py index 559e3195f2156af3be97395b5bc8c0d8ea62f174..126ef58060bbfee11d94f756853fd88cdab8f28a 100644 --- a/research/attention_ocr/python/datasets/testdata/fsns/download_data.py +++ b/research/attention_ocr/python/datasets/testdata/fsns/download_data.py @@ -10,7 +10,8 @@ KEEP_NUM_RECORDS = 5 print('Downloading %s ...' % URL) urllib.request.urlretrieve(URL, DST_ORIG) -print('Writing %d records from %s to %s ...' % (KEEP_NUM_RECORDS, DST_ORIG, DST)) +print('Writing %d records from %s to %s ...' % + (KEEP_NUM_RECORDS, DST_ORIG, DST)) with tf.io.TFRecordWriter(DST) as writer: - for raw_record in itertools.islice(tf.python_io.tf_record_iterator(DST_ORIG), KEEP_NUM_RECORDS): + for raw_record in itertools.islice(tf.compat.v1.python_io.tf_record_iterator(DST_ORIG), KEEP_NUM_RECORDS): writer.write(raw_record) diff --git a/research/attention_ocr/python/demo_inference.py b/research/attention_ocr/python/demo_inference.py index d5fcf2515b85412aad272749cc50f5e81752b35d..8c6531d844f8711f1bb5d51ea39678340196cbd3 100644 --- a/research/attention_ocr/python/demo_inference.py +++ b/research/attention_ocr/python/demo_inference.py @@ -19,7 +19,7 @@ import numpy as np import PIL.Image import tensorflow as tf -from tensorflow.python.platform import flags +from tensorflow.compat.v1 import flags from tensorflow.python.training import monitored_session import common_flags @@ -49,7 +49,7 @@ def load_images(file_pattern, batch_size, dataset_name): for i in range(batch_size): path = file_pattern % i print("Reading %s" % path) - pil_image = PIL.Image.open(tf.gfile.GFile(path, 'rb')) + pil_image = PIL.Image.open(tf.io.gfile.GFile(path, 'rb')) images_actual_data[i, ...] = np.asarray(pil_image) return images_actual_data @@ -58,12 +58,13 @@ def create_model(batch_size, dataset_name): width, height = get_dataset_image_size(dataset_name) dataset = common_flags.create_dataset(split_name=FLAGS.split_name) model = common_flags.create_model( - num_char_classes=dataset.num_char_classes, - seq_length=dataset.max_sequence_length, - num_views=dataset.num_of_views, - null_code=dataset.null_code, - charset=dataset.charset) - raw_images = tf.placeholder(tf.uint8, shape=[batch_size, height, width, 3]) + num_char_classes=dataset.num_char_classes, + seq_length=dataset.max_sequence_length, + num_views=dataset.num_of_views, + null_code=dataset.null_code, + charset=dataset.charset) + raw_images = tf.compat.v1.placeholder( + tf.uint8, shape=[batch_size, height, width, 3]) images = tf.map_fn(data_provider.preprocess_image, raw_images, dtype=tf.float32) endpoints = model.create_base(images, labels_one_hot=None) @@ -76,9 +77,9 @@ def run(checkpoint, batch_size, dataset_name, image_path_pattern): images_data = load_images(image_path_pattern, batch_size, dataset_name) session_creator = monitored_session.ChiefSessionCreator( - checkpoint_filename_with_path=checkpoint) + checkpoint_filename_with_path=checkpoint) with monitored_session.MonitoredSession( - session_creator=session_creator) as sess: + session_creator=session_creator) as sess: predictions = sess.run(endpoints.predicted_text, feed_dict={images_placeholder: images_data}) return [pr_bytes.decode('utf-8') for pr_bytes in predictions.tolist()] @@ -87,10 +88,10 @@ def run(checkpoint, batch_size, dataset_name, image_path_pattern): def main(_): print("Predicted strings:") predictions = run(FLAGS.checkpoint, FLAGS.batch_size, FLAGS.dataset_name, - FLAGS.image_path_pattern) + FLAGS.image_path_pattern) for line in predictions: print(line) if __name__ == '__main__': - tf.app.run() + tf.compat.v1.app.run() diff --git a/research/attention_ocr/python/demo_inference_test.py b/research/attention_ocr/python/demo_inference_test.py index 457fb5ab9ef5dbcb326585c2dc8281ee23d319d1..91b8603383289092dd1fb4a06243cb59028102be 100644 --- a/research/attention_ocr/python/demo_inference_test.py +++ b/research/attention_ocr/python/demo_inference_test.py @@ -4,6 +4,7 @@ import os import demo_inference import tensorflow as tf from tensorflow.python.training import monitored_session +from tensorflow.compat.v1 import flags _CHECKPOINT = 'model.ckpt-399731' _CHECKPOINT_URL = 'http://download.tensorflow.org/models/attention_ocr_2017_08_09.tar.gz' @@ -14,12 +15,13 @@ class DemoInferenceTest(tf.test.TestCase): super(DemoInferenceTest, self).setUp() for suffix in ['.meta', '.index', '.data-00000-of-00001']: filename = _CHECKPOINT + suffix - self.assertTrue(tf.gfile.Exists(filename), + self.assertTrue(tf.io.gfile.exists(filename), msg='Missing checkpoint file %s. ' 'Please download and extract it from %s' % (filename, _CHECKPOINT_URL)) self._batch_size = 32 - tf.flags.FLAGS.dataset_dir = os.path.join(os.path.dirname(__file__), 'datasets/testdata/fsns') + flags.FLAGS.dataset_dir = os.path.join( + os.path.dirname(__file__), 'datasets/testdata/fsns') def test_moving_variables_properly_loaded_from_a_checkpoint(self): batch_size = 32 @@ -30,15 +32,15 @@ class DemoInferenceTest(tf.test.TestCase): images_data = demo_inference.load_images(image_path_pattern, batch_size, dataset_name) tensor_name = 'AttentionOcr_v1/conv_tower_fn/INCE/InceptionV3/Conv2d_2a_3x3/BatchNorm/moving_mean' - moving_mean_tf = tf.get_default_graph().get_tensor_by_name( - tensor_name + ':0') - reader = tf.train.NewCheckpointReader(_CHECKPOINT) + moving_mean_tf = tf.compat.v1.get_default_graph().get_tensor_by_name( + tensor_name + ':0') + reader = tf.compat.v1.train.NewCheckpointReader(_CHECKPOINT) moving_mean_expected = reader.get_tensor(tensor_name) session_creator = monitored_session.ChiefSessionCreator( - checkpoint_filename_with_path=_CHECKPOINT) + checkpoint_filename_with_path=_CHECKPOINT) with monitored_session.MonitoredSession( - session_creator=session_creator) as sess: + session_creator=session_creator) as sess: moving_mean_np = sess.run(moving_mean_tf, feed_dict={images_placeholder: images_data}) @@ -50,38 +52,38 @@ class DemoInferenceTest(tf.test.TestCase): 'fsns', image_path_pattern) self.assertEqual([ - u'Boulevard de Lunel░░░░░░░░░░░░░░░░░░░', - 'Rue de Provence░░░░░░░░░░░░░░░░░░░░░░', - 'Rue de Port Maria░░░░░░░░░░░░░░░░░░░░', - 'Avenue Charles Gounod░░░░░░░░░░░░░░░░', - 'Rue de l‘Aurore░░░░░░░░░░░░░░░░░░░░░░', - 'Rue de Beuzeville░░░░░░░░░░░░░░░░░░░░', - 'Rue d‘Orbey░░░░░░░░░░░░░░░░░░░░░░░░░░', - 'Rue Victor Schoulcher░░░░░░░░░░░░░░░░', - 'Rue de la Gare░░░░░░░░░░░░░░░░░░░░░░░', - 'Rue des Tulipes░░░░░░░░░░░░░░░░░░░░░░', - 'Rue André Maginot░░░░░░░░░░░░░░░░░░░░', - 'Route de Pringy░░░░░░░░░░░░░░░░░░░░░░', - 'Rue des Landelles░░░░░░░░░░░░░░░░░░░░', - 'Rue des Ilettes░░░░░░░░░░░░░░░░░░░░░░', - 'Avenue de Maurin░░░░░░░░░░░░░░░░░░░░░', - 'Rue Théresa░░░░░░░░░░░░░░░░░░░░░░░░░░', # GT='Rue Thérésa' - 'Route de la Balme░░░░░░░░░░░░░░░░░░░░', - 'Rue Hélène Roederer░░░░░░░░░░░░░░░░░░', - 'Rue Emile Bernard░░░░░░░░░░░░░░░░░░░░', - 'Place de la Mairie░░░░░░░░░░░░░░░░░░░', - 'Rue des Perrots░░░░░░░░░░░░░░░░░░░░░░', - 'Rue de la Libération░░░░░░░░░░░░░░░░░', - 'Impasse du Capcir░░░░░░░░░░░░░░░░░░░░', - 'Avenue de la Grand Mare░░░░░░░░░░░░░░', - 'Rue Pierre Brossolette░░░░░░░░░░░░░░░', - 'Rue de Provence░░░░░░░░░░░░░░░░░░░░░░', - 'Rue du Docteur Mourre░░░░░░░░░░░░░░░░', - 'Rue d‘Ortheuil░░░░░░░░░░░░░░░░░░░░░░░', - 'Rue des Sarments░░░░░░░░░░░░░░░░░░░░░', - 'Rue du Centre░░░░░░░░░░░░░░░░░░░░░░░░', - 'Impasse Pierre Mourgues░░░░░░░░░░░░░░', - 'Rue Marcel Dassault░░░░░░░░░░░░░░░░░░' + u'Boulevard de Lunel░░░░░░░░░░░░░░░░░░░', + 'Rue de Provence░░░░░░░░░░░░░░░░░░░░░░', + 'Rue de Port Maria░░░░░░░░░░░░░░░░░░░░', + 'Avenue Charles Gounod░░░░░░░░░░░░░░░░', + 'Rue de l‘Aurore░░░░░░░░░░░░░░░░░░░░░░', + 'Rue de Beuzeville░░░░░░░░░░░░░░░░░░░░', + 'Rue d‘Orbey░░░░░░░░░░░░░░░░░░░░░░░░░░', + 'Rue Victor Schoulcher░░░░░░░░░░░░░░░░', + 'Rue de la Gare░░░░░░░░░░░░░░░░░░░░░░░', + 'Rue des Tulipes░░░░░░░░░░░░░░░░░░░░░░', + 'Rue André Maginot░░░░░░░░░░░░░░░░░░░░', + 'Route de Pringy░░░░░░░░░░░░░░░░░░░░░░', + 'Rue des Landelles░░░░░░░░░░░░░░░░░░░░', + 'Rue des Ilettes░░░░░░░░░░░░░░░░░░░░░░', + 'Avenue de Maurin░░░░░░░░░░░░░░░░░░░░░', + 'Rue Théresa░░░░░░░░░░░░░░░░░░░░░░░░░░', # GT='Rue Thérésa' + 'Route de la Balme░░░░░░░░░░░░░░░░░░░░', + 'Rue Hélène Roederer░░░░░░░░░░░░░░░░░░', + 'Rue Emile Bernard░░░░░░░░░░░░░░░░░░░░', + 'Place de la Mairie░░░░░░░░░░░░░░░░░░░', + 'Rue des Perrots░░░░░░░░░░░░░░░░░░░░░░', + 'Rue de la Libération░░░░░░░░░░░░░░░░░', + 'Impasse du Capcir░░░░░░░░░░░░░░░░░░░░', + 'Avenue de la Grand Mare░░░░░░░░░░░░░░', + 'Rue Pierre Brossolette░░░░░░░░░░░░░░░', + 'Rue de Provence░░░░░░░░░░░░░░░░░░░░░░', + 'Rue du Docteur Mourre░░░░░░░░░░░░░░░░', + 'Rue d‘Ortheuil░░░░░░░░░░░░░░░░░░░░░░░', + 'Rue des Sarments░░░░░░░░░░░░░░░░░░░░░', + 'Rue du Centre░░░░░░░░░░░░░░░░░░░░░░░░', + 'Impasse Pierre Mourgues░░░░░░░░░░░░░░', + 'Rue Marcel Dassault░░░░░░░░░░░░░░░░░░' ], predictions) diff --git a/research/attention_ocr/python/eval.py b/research/attention_ocr/python/eval.py index ec68ad50bc25cd8528f4e9fd7976adad72782641..108227ba91cc71313cf5dbec25faa5729b5cee48 100644 --- a/research/attention_ocr/python/eval.py +++ b/research/attention_ocr/python/eval.py @@ -21,7 +21,7 @@ python eval.py import tensorflow as tf from tensorflow.contrib import slim from tensorflow import app -from tensorflow.python.platform import flags +from tensorflow.compat.v1 import flags import data_provider import common_flags @@ -45,8 +45,8 @@ flags.DEFINE_integer('number_of_steps', None, def main(_): - if not tf.gfile.Exists(FLAGS.eval_log_dir): - tf.gfile.MakeDirs(FLAGS.eval_log_dir) + if not tf.io.gfile.exists(FLAGS.eval_log_dir): + tf.io.gfile.makedirs(FLAGS.eval_log_dir) dataset = common_flags.create_dataset(split_name=FLAGS.split_name) model = common_flags.create_model(dataset.num_char_classes, @@ -62,7 +62,7 @@ def main(_): eval_ops = model.create_summaries( data, endpoints, dataset.charset, is_training=False) slim.get_or_create_global_step() - session_config = tf.ConfigProto(device_count={"GPU": 0}) + session_config = tf.compat.v1.ConfigProto(device_count={"GPU": 0}) slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.train_log_dir, diff --git a/research/attention_ocr/python/inception_preprocessing.py b/research/attention_ocr/python/inception_preprocessing.py index a4827f2cab742340da2d8d4972c41b35c9862a1e..b61b895021c230cb22f327b077255e28914a924e 100644 --- a/research/attention_ocr/python/inception_preprocessing.py +++ b/research/attention_ocr/python/inception_preprocessing.py @@ -38,7 +38,7 @@ def apply_with_random_selector(x, func, num_cases): The result of func(x, sel), where func receives the value of the selector as a python integer, but sel is sampled dynamically. """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + sel = tf.random.uniform([], maxval=num_cases, dtype=tf.int32) # Pass the real x only to one of the func calls. return control_flow_ops.merge([ func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) @@ -64,7 +64,7 @@ def distort_color(image, color_ordering=0, fast_mode=True, scope=None): Raises: ValueError: if color_ordering not in [0, 3] """ - with tf.name_scope(scope, 'distort_color', [image]): + with tf.compat.v1.name_scope(scope, 'distort_color', [image]): if fast_mode: if color_ordering == 0: image = tf.image.random_brightness(image, max_delta=32. / 255.) @@ -131,7 +131,7 @@ def distorted_bounding_box_crop(image, Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + with tf.compat.v1.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. @@ -143,7 +143,7 @@ def distorted_bounding_box_crop(image, # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), + image_size=tf.shape(input=image), bounding_boxes=bbox, min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, @@ -188,7 +188,7 @@ def preprocess_for_train(image, Returns: 3-D float Tensor of distorted image used for training with range [-1, 1]. """ - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.compat.v1.name_scope(scope, 'distort_image', [image, height, width, bbox]): if bbox is None: bbox = tf.constant( [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) @@ -198,7 +198,7 @@ def preprocess_for_train(image, # the coordinates are ordered [ymin, xmin, ymax, xmax]. image_with_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), bbox) - tf.summary.image('image_with_bounding_boxes', image_with_box) + tf.compat.v1.summary.image('image_with_bounding_boxes', image_with_box) distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) # Restore the shape since the dynamic slice based upon the bbox_size loses @@ -206,8 +206,8 @@ def preprocess_for_train(image, distorted_image.set_shape([None, None, 3]) image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distorted_bbox) - tf.summary.image('images_with_distorted_bounding_box', - image_with_distorted_box) + tf.compat.v1.summary.image('images_with_distorted_bounding_box', + image_with_distorted_box) # This resizing operation may distort the images because the aspect # ratio is not respected. We select a resize method in a round robin @@ -218,11 +218,11 @@ def preprocess_for_train(image, num_resize_cases = 1 if fast_mode else 4 distorted_image = apply_with_random_selector( distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], method=method), + lambda x, method: tf.image.resize(x, [height, width], method=method), num_cases=num_resize_cases) - tf.summary.image('cropped_resized_image', - tf.expand_dims(distorted_image, 0)) + tf.compat.v1.summary.image('cropped_resized_image', + tf.expand_dims(distorted_image, 0)) # Randomly flip the image horizontally. distorted_image = tf.image.random_flip_left_right(distorted_image) @@ -233,8 +233,8 @@ def preprocess_for_train(image, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) - tf.summary.image('final_distorted_image', - tf.expand_dims(distorted_image, 0)) + tf.compat.v1.summary.image('final_distorted_image', + tf.expand_dims(distorted_image, 0)) distorted_image = tf.subtract(distorted_image, 0.5) distorted_image = tf.multiply(distorted_image, 2.0) return distorted_image @@ -265,7 +265,7 @@ def preprocess_for_eval(image, Returns: 3-D float Tensor of prepared image. """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): + with tf.compat.v1.name_scope(scope, 'eval_image', [image, height, width]): if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Crop the central region of the image with an area containing 87.5% of @@ -276,8 +276,8 @@ def preprocess_for_eval(image, if height and width: # Resize the image to the specified height and width. image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear( - image, [height, width], align_corners=False) + image = tf.image.resize( + image, [height, width], method=tf.image.ResizeMethod.BILINEAR) image = tf.squeeze(image, [0]) image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) diff --git a/research/attention_ocr/python/metrics.py b/research/attention_ocr/python/metrics.py index 9e2a6a7579812583dc60546f97976f05befe07ff..0bd6c23d009848877441d97981b6514072979a54 100644 --- a/research/attention_ocr/python/metrics.py +++ b/research/attention_ocr/python/metrics.py @@ -34,20 +34,21 @@ def char_accuracy(predictions, targets, rej_char, streaming=False): a update_ops for execution and value tensor whose value on evaluation returns the total character accuracy. """ - with tf.variable_scope('CharAccuracy'): + with tf.compat.v1.variable_scope('CharAccuracy'): predictions.get_shape().assert_is_compatible_with(targets.get_shape()) - targets = tf.to_int32(targets) + targets = tf.cast(targets, dtype=tf.int32) const_rej_char = tf.constant(rej_char, shape=targets.get_shape()) - weights = tf.to_float(tf.not_equal(targets, const_rej_char)) - correct_chars = tf.to_float(tf.equal(predictions, targets)) - accuracy_per_example = tf.div( - tf.reduce_sum(tf.multiply(correct_chars, weights), 1), - tf.reduce_sum(weights, 1)) + weights = tf.cast(tf.not_equal(targets, const_rej_char), dtype=tf.float32) + correct_chars = tf.cast(tf.equal(predictions, targets), dtype=tf.float32) + accuracy_per_example = tf.compat.v1.div( + tf.reduce_sum(input_tensor=tf.multiply( + correct_chars, weights), axis=1), + tf.reduce_sum(input_tensor=weights, axis=1)) if streaming: return tf.contrib.metrics.streaming_mean(accuracy_per_example) else: - return tf.reduce_mean(accuracy_per_example) + return tf.reduce_mean(input_tensor=accuracy_per_example) def sequence_accuracy(predictions, targets, rej_char, streaming=False): @@ -66,25 +67,26 @@ def sequence_accuracy(predictions, targets, rej_char, streaming=False): returns the total sequence accuracy. """ - with tf.variable_scope('SequenceAccuracy'): + with tf.compat.v1.variable_scope('SequenceAccuracy'): predictions.get_shape().assert_is_compatible_with(targets.get_shape()) - targets = tf.to_int32(targets) + targets = tf.cast(targets, dtype=tf.int32) const_rej_char = tf.constant( rej_char, shape=targets.get_shape(), dtype=tf.int32) include_mask = tf.not_equal(targets, const_rej_char) - include_predictions = tf.to_int32( - tf.where(include_mask, predictions, - tf.zeros_like(predictions) + rej_char)) - correct_chars = tf.to_float(tf.equal(include_predictions, targets)) + include_predictions = tf.cast( + tf.compat.v1.where(include_mask, predictions, + tf.zeros_like(predictions) + rej_char), dtype=tf.int32) + correct_chars = tf.cast( + tf.equal(include_predictions, targets), dtype=tf.float32) correct_chars_counts = tf.cast( - tf.reduce_sum(correct_chars, reduction_indices=[1]), dtype=tf.int32) + tf.reduce_sum(input_tensor=correct_chars, axis=[1]), dtype=tf.int32) target_length = targets.get_shape().dims[1].value target_chars_counts = tf.constant( target_length, shape=correct_chars_counts.get_shape()) - accuracy_per_example = tf.to_float( - tf.equal(correct_chars_counts, target_chars_counts)) + accuracy_per_example = tf.cast( + tf.equal(correct_chars_counts, target_chars_counts), dtype=tf.float32) if streaming: return tf.contrib.metrics.streaming_mean(accuracy_per_example) else: - return tf.reduce_mean(accuracy_per_example) + return tf.reduce_mean(input_tensor=accuracy_per_example) diff --git a/research/attention_ocr/python/metrics_test.py b/research/attention_ocr/python/metrics_test.py index 5560ec2c898fe7674715ec54daa08ba9e7471adf..3e83194523eb3eba904e7225e580841e7f6e3a3f 100644 --- a/research/attention_ocr/python/metrics_test.py +++ b/research/attention_ocr/python/metrics_test.py @@ -38,8 +38,8 @@ class AccuracyTest(tf.test.TestCase): A session object that should be used as a context manager. """ with self.cached_session() as sess: - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(tf.compat.v1.local_variables_initializer()) yield sess def _fake_labels(self): @@ -55,7 +55,7 @@ class AccuracyTest(tf.test.TestCase): return incorrect def test_sequence_accuracy_identical_samples(self): - labels_tf = tf.convert_to_tensor(self._fake_labels()) + labels_tf = tf.convert_to_tensor(value=self._fake_labels()) accuracy_tf = metrics.sequence_accuracy(labels_tf, labels_tf, self.rej_char) @@ -66,9 +66,9 @@ class AccuracyTest(tf.test.TestCase): def test_sequence_accuracy_one_char_difference(self): ground_truth_np = self._fake_labels() - ground_truth_tf = tf.convert_to_tensor(ground_truth_np) + ground_truth_tf = tf.convert_to_tensor(value=ground_truth_np) prediction_tf = tf.convert_to_tensor( - self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0)))) + value=self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0)))) accuracy_tf = metrics.sequence_accuracy(prediction_tf, ground_truth_tf, self.rej_char) @@ -80,9 +80,9 @@ class AccuracyTest(tf.test.TestCase): def test_char_accuracy_one_char_difference_with_padding(self): ground_truth_np = self._fake_labels() - ground_truth_tf = tf.convert_to_tensor(ground_truth_np) + ground_truth_tf = tf.convert_to_tensor(value=ground_truth_np) prediction_tf = tf.convert_to_tensor( - self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0)))) + value=self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0)))) accuracy_tf = metrics.char_accuracy(prediction_tf, ground_truth_tf, self.rej_char) diff --git a/research/attention_ocr/python/model.py b/research/attention_ocr/python/model.py index 48d2fc842934172170e71291b6c72ded96546136..b489f964e9d756c90af901fa00da49553b17052a 100644 --- a/research/attention_ocr/python/model.py +++ b/research/attention_ocr/python/model.py @@ -92,8 +92,8 @@ class CharsetMapper(object): Args: ids: a tensor with shape [batch_size, max_sequence_length] """ - return tf.reduce_join( - self.table.lookup(tf.to_int64(ids)), reduction_indices=1) + return tf.strings.reduce_join( + inputs=self.table.lookup(tf.cast(ids, dtype=tf.int64)), axis=1) def get_softmax_loss_fn(label_smoothing): @@ -110,7 +110,7 @@ def get_softmax_loss_fn(label_smoothing): def loss_fn(labels, logits): return (tf.nn.softmax_cross_entropy_with_logits( - logits=logits, labels=labels)) + logits=logits, labels=tf.stop_gradient(labels))) else: def loss_fn(labels, logits): @@ -140,7 +140,7 @@ def get_tensor_dimensions(tensor): raise ValueError( 'Incompatible shape: len(tensor.get_shape().dims) != 4 (%d != 4)' % len(tensor.get_shape().dims)) - batch_size = tf.shape(tensor)[0] + batch_size = tf.shape(input=tensor)[0] height = tensor.get_shape().dims[1].value width = tensor.get_shape().dims[2].value num_features = tensor.get_shape().dims[3].value @@ -161,8 +161,8 @@ def lookup_indexed_value(indices, row_vecs): A tensor of shape (batch, ) formed by row_vecs[i, indices[i]]. """ gather_indices = tf.stack((tf.range( - tf.shape(row_vecs)[0], dtype=tf.int32), tf.cast(indices, tf.int32)), - axis=1) + tf.shape(input=row_vecs)[0], dtype=tf.int32), tf.cast(indices, tf.int32)), + axis=1) return tf.gather_nd(row_vecs, gather_indices) @@ -181,7 +181,7 @@ def max_char_logprob_cumsum(char_log_prob): so the same function can be used regardless whether use_length_predictions is true or false. """ - max_char_log_prob = tf.reduce_max(char_log_prob, reduction_indices=2) + max_char_log_prob = tf.reduce_max(input_tensor=char_log_prob, axis=2) # For an input array [a, b, c]) tf.cumsum returns [a, a + b, a + b + c] if # exclusive set to False (default). return tf.cumsum(max_char_log_prob, axis=1, exclusive=False) @@ -203,7 +203,7 @@ def find_length_by_null(predicted_chars, null_code): A [batch, ] tensor which stores the sequence length for each sample. """ return tf.reduce_sum( - tf.cast(tf.not_equal(null_code, predicted_chars), tf.int32), axis=1) + input_tensor=tf.cast(tf.not_equal(null_code, predicted_chars), tf.int32), axis=1) def axis_pad(tensor, axis, before=0, after=0, constant_values=0.0): @@ -248,7 +248,8 @@ def null_based_length_prediction(chars_log_prob, null_code): element #seq_length - is the probability of length=seq_length. predicted_length is a tensor with shape [batch]. """ - predicted_chars = tf.to_int32(tf.argmax(chars_log_prob, axis=2)) + predicted_chars = tf.cast( + tf.argmax(input=chars_log_prob, axis=2), dtype=tf.int32) # We do right pad to support sequences with seq_length elements. text_log_prob = max_char_logprob_cumsum( axis_pad(chars_log_prob, axis=1, after=1)) @@ -334,9 +335,9 @@ class Model(object): """ mparams = self._mparams['conv_tower_fn'] logging.debug('Using final_endpoint=%s', mparams.final_endpoint) - with tf.variable_scope('conv_tower_fn/INCE'): + with tf.compat.v1.variable_scope('conv_tower_fn/INCE'): if reuse: - tf.get_variable_scope().reuse_variables() + tf.compat.v1.get_variable_scope().reuse_variables() with slim.arg_scope(inception.inception_v3_arg_scope()): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): @@ -372,7 +373,7 @@ class Model(object): def sequence_logit_fn(self, net, labels_one_hot): mparams = self._mparams['sequence_logit_fn'] # TODO(gorban): remove /alias suffixes from the scopes. - with tf.variable_scope('sequence_logit_fn/SQLR'): + with tf.compat.v1.variable_scope('sequence_logit_fn/SQLR'): layer_class = sequence_layers.get_layer_class(mparams.use_attention, mparams.use_autoregression) layer = layer_class(net, labels_one_hot, self._params, mparams) @@ -392,7 +393,7 @@ class Model(object): ] xy_flat_shape = (batch_size, 1, height * width, num_features) nets_for_merge = [] - with tf.variable_scope('max_pool_views', values=nets_list): + with tf.compat.v1.variable_scope('max_pool_views', values=nets_list): for net in nets_list: nets_for_merge.append(tf.reshape(net, xy_flat_shape)) merged_net = tf.concat(nets_for_merge, 1) @@ -413,10 +414,11 @@ class Model(object): Returns: A tensor of shape [batch_size, seq_length, features_size]. """ - with tf.variable_scope('pool_views_fn/STCK'): + with tf.compat.v1.variable_scope('pool_views_fn/STCK'): net = tf.concat(nets, 1) - batch_size = tf.shape(net)[0] - image_size = net.get_shape().dims[1].value * net.get_shape().dims[2].value + batch_size = tf.shape(input=net)[0] + image_size = net.get_shape().dims[1].value * \ + net.get_shape().dims[2].value feature_size = net.get_shape().dims[3].value return tf.reshape(net, tf.stack([batch_size, image_size, feature_size])) @@ -438,11 +440,13 @@ class Model(object): with shape [batch_size x seq_length]. """ log_prob = utils.logits_to_log_prob(chars_logit) - ids = tf.to_int32(tf.argmax(log_prob, axis=2), name='predicted_chars') + ids = tf.cast(tf.argmax(input=log_prob, axis=2), + name='predicted_chars', dtype=tf.int32) mask = tf.cast( slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool) all_scores = tf.nn.softmax(chars_logit) - selected_scores = tf.boolean_mask(all_scores, mask, name='char_scores') + selected_scores = tf.boolean_mask( + tensor=all_scores, mask=mask, name='char_scores') scores = tf.reshape( selected_scores, shape=(-1, self._params.seq_length), @@ -499,7 +503,7 @@ class Model(object): images = tf.subtract(images, 0.5) images = tf.multiply(images, 2.5) - with tf.variable_scope(scope, reuse=reuse): + with tf.compat.v1.variable_scope(scope, reuse=reuse): views = tf.split( value=images, num_or_size_splits=self._params.num_views, axis=2) logging.debug('Views=%d single view: %s', len(views), views[0]) @@ -566,7 +570,7 @@ class Model(object): # multiple losses including regularization losses. self.sequence_loss_fn(endpoints.chars_logit, data.labels) total_loss = slim.losses.get_total_loss() - tf.summary.scalar('TotalLoss', total_loss) + tf.compat.v1.summary.scalar('TotalLoss', total_loss) return total_loss def label_smoothing_regularization(self, chars_labels, weight=0.1): @@ -605,7 +609,7 @@ class Model(object): A Tensor with shape [batch_size] - the log-perplexity for each sequence. """ mparams = self._mparams['sequence_loss_fn'] - with tf.variable_scope('sequence_loss_fn/SLF'): + with tf.compat.v1.variable_scope('sequence_loss_fn/SLF'): if mparams.label_smoothing > 0: smoothed_one_hot_labels = self.label_smoothing_regularization( chars_labels, mparams.label_smoothing) @@ -625,7 +629,7 @@ class Model(object): shape=(batch_size, seq_length), dtype=tf.int64) known_char = tf.not_equal(chars_labels, reject_char) - weights = tf.to_float(known_char) + weights = tf.cast(known_char, dtype=tf.float32) logits_list = tf.unstack(chars_logits, axis=1) weights_list = tf.unstack(weights, axis=1) @@ -635,7 +639,7 @@ class Model(object): weights_list, softmax_loss_function=get_softmax_loss_fn(mparams.label_smoothing), average_across_timesteps=mparams.average_across_timesteps) - tf.losses.add_loss(loss) + tf.compat.v1.losses.add_loss(loss) return loss def create_summaries(self, data, endpoints, charset, is_training): @@ -665,13 +669,14 @@ class Model(object): # tf.summary.text(sname('text/pr'), pr_text) # gt_text = charset_mapper.get_text(data.labels[:max_outputs,:]) # tf.summary.text(sname('text/gt'), gt_text) - tf.summary.image(sname('image'), data.images, max_outputs=max_outputs) + tf.compat.v1.summary.image( + sname('image'), data.images, max_outputs=max_outputs) if is_training: - tf.summary.image( + tf.compat.v1.summary.image( sname('image/orig'), data.images_orig, max_outputs=max_outputs) - for var in tf.trainable_variables(): - tf.summary.histogram(var.op.name, var) + for var in tf.compat.v1.trainable_variables(): + tf.compat.v1.summary.histogram(var.op.name, var) return None else: @@ -700,7 +705,8 @@ class Model(object): for name, value in names_to_values.items(): summary_name = 'eval/' + name - tf.summary.scalar(summary_name, tf.Print(value, [value], summary_name)) + tf.compat.v1.summary.scalar( + summary_name, tf.compat.v1.Print(value, [value], summary_name)) return list(names_to_updates.values()) def create_init_fn_to_restore(self, @@ -733,9 +739,9 @@ class Model(object): logging.info('variables_to_restore:\n%s', utils.variables_to_restore().keys()) logging.info('moving_average_variables:\n%s', - [v.op.name for v in tf.moving_average_variables()]) + [v.op.name for v in tf.compat.v1.moving_average_variables()]) logging.info('trainable_variables:\n%s', - [v.op.name for v in tf.trainable_variables()]) + [v.op.name for v in tf.compat.v1.trainable_variables()]) if master_checkpoint: assign_from_checkpoint(utils.variables_to_restore(), master_checkpoint) diff --git a/research/attention_ocr/python/model_export.py b/research/attention_ocr/python/model_export.py index 7c5f72e68a0b7a36260f0c0eec81078654952284..c4606003ae6f277ab9ac02f8b54c37146e1ee0dc 100644 --- a/research/attention_ocr/python/model_export.py +++ b/research/attention_ocr/python/model_export.py @@ -25,7 +25,7 @@ import os import tensorflow as tf from tensorflow import app from tensorflow.contrib import slim -from tensorflow.python.platform import flags +from tensorflow.compat.v1 import flags import common_flags import model_export_lib @@ -42,7 +42,8 @@ flags.DEFINE_integer( 'image_height', None, 'Image height used during training(or crop height if used)' ' If not set, the dataset default is used instead.') -flags.DEFINE_string('work_dir', '/tmp', 'A directory to store temporary files.') +flags.DEFINE_string('work_dir', '/tmp', + 'A directory to store temporary files.') flags.DEFINE_integer('version_number', 1, 'Version number of the model') flags.DEFINE_bool( 'export_for_serving', True, @@ -116,7 +117,7 @@ def export_model(export_dir, image_height = crop_image_height or dataset_image_height if export_for_serving: - images_orig = tf.placeholder( + images_orig = tf.compat.v1.placeholder( tf.string, shape=[batch_size], name='tf_example') images_orig_float = model_export_lib.generate_tfexample_image( images_orig, @@ -126,22 +127,23 @@ def export_model(export_dir, name='float_images') else: images_shape = (batch_size, image_height, image_width, image_depth) - images_orig = tf.placeholder( + images_orig = tf.compat.v1.placeholder( tf.uint8, shape=images_shape, name='original_image') images_orig_float = tf.image.convert_image_dtype( images_orig, dtype=tf.float32, name='float_images') endpoints = model.create_base(images_orig_float, labels_one_hot=None) - sess = tf.Session() - saver = tf.train.Saver(slim.get_variables_to_restore(), sharded=True) + sess = tf.compat.v1.Session() + saver = tf.compat.v1.train.Saver( + slim.get_variables_to_restore(), sharded=True) saver.restore(sess, get_checkpoint_path()) - tf.logging.info('Model restored successfully.') + tf.compat.v1.logging.info('Model restored successfully.') # Create model signature. if export_for_serving: input_tensors = { - tf.saved_model.signature_constants.CLASSIFY_INPUTS: images_orig + tf.saved_model.CLASSIFY_INPUTS: images_orig } else: input_tensors = {'images': images_orig} @@ -163,21 +165,21 @@ def export_model(export_dir, dataset.max_sequence_length)): output_tensors['attention_mask_%d' % i] = t signature_outputs = model_export_lib.build_tensor_info(output_tensors) - signature_def = tf.saved_model.signature_def_utils.build_signature_def( + signature_def = tf.compat.v1.saved_model.signature_def_utils.build_signature_def( signature_inputs, signature_outputs, - tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME) + tf.saved_model.CLASSIFY_METHOD_NAME) # Save model. - builder = tf.saved_model.builder.SavedModelBuilder(export_dir) + builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_dir) builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], + sess, [tf.saved_model.SERVING], signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def }, - main_op=tf.tables_initializer(), + main_op=tf.compat.v1.tables_initializer(), strip_default_attrs=True) builder.save() - tf.logging.info('Model has been exported to %s' % export_dir) + tf.compat.v1.logging.info('Model has been exported to %s' % export_dir) return signature_def diff --git a/research/attention_ocr/python/model_export_lib.py b/research/attention_ocr/python/model_export_lib.py index 3c68dc1fd45f4a0d887e408eac491cea5be1055e..d5d141be2a88f07dbf7b4c1396dd585e8d4d5490 100644 --- a/research/attention_ocr/python/model_export_lib.py +++ b/research/attention_ocr/python/model_export_lib.py @@ -36,7 +36,7 @@ def normalize_image(image, original_minval, original_maxval, target_minval, Returns: image: image which is the same shape as input image. """ - with tf.name_scope('NormalizeImage', values=[image]): + with tf.compat.v1.name_scope('NormalizeImage', values=[image]): original_minval = float(original_minval) original_maxval = float(original_maxval) target_minval = float(target_minval) @@ -68,16 +68,17 @@ def generate_tfexample_image(input_example_strings, A tensor with shape [batch_size, height, width, channels] of type float32 with values in the range [0..1] """ - batch_size = tf.shape(input_example_strings)[0] + batch_size = tf.shape(input=input_example_strings)[0] images_shape = tf.stack( [batch_size, image_height, image_width, image_channels]) tf_example_image_key = 'image/encoded' feature_configs = { tf_example_image_key: - tf.FixedLenFeature( + tf.io.FixedLenFeature( image_height * image_width * image_channels, dtype=tf.float32) } - feature_tensors = tf.parse_example(input_example_strings, feature_configs) + feature_tensors = tf.io.parse_example( + serialized=input_example_strings, features=feature_configs) float_images = tf.reshape( normalize_image( feature_tensors[tf_example_image_key], @@ -97,11 +98,11 @@ def attention_ocr_attention_masks(num_characters): names = ['%s/Softmax:0' % (prefix)] for i in range(1, num_characters): names += ['%s_%d/Softmax:0' % (prefix, i)] - return [tf.get_default_graph().get_tensor_by_name(n) for n in names] + return [tf.compat.v1.get_default_graph().get_tensor_by_name(n) for n in names] def build_tensor_info(tensor_dict): return { - k: tf.saved_model.utils.build_tensor_info(t) + k: tf.compat.v1.saved_model.utils.build_tensor_info(t) for k, t in tensor_dict.items() } diff --git a/research/attention_ocr/python/model_export_test.py b/research/attention_ocr/python/model_export_test.py index 19f73f905bf90e60633f812669b4554ac72e3ba4..4dc6688ca469e5b738983a19028f77a9a7b042b8 100644 --- a/research/attention_ocr/python/model_export_test.py +++ b/research/attention_ocr/python/model_export_test.py @@ -19,6 +19,7 @@ import os import numpy as np from absl.testing import flagsaver import tensorflow as tf +from tensorflow.compat.v1 import flags import common_flags import model_export @@ -29,7 +30,7 @@ _CHECKPOINT_URL = ( def _clean_up(): - tf.gfile.DeleteRecursively(tf.test.get_temp_dir()) + tf.io.gfile.rmtree(tf.compat.v1.test.get_temp_dir()) def _create_tf_example_string(image): @@ -47,17 +48,18 @@ class AttentionOcrExportTest(tf.test.TestCase): for suffix in ['.meta', '.index', '.data-00000-of-00001']: filename = _CHECKPOINT + suffix self.assertTrue( - tf.gfile.Exists(filename), + tf.io.gfile.exists(filename), msg='Missing checkpoint file %s. ' 'Please download and extract it from %s' % (filename, _CHECKPOINT_URL)) - tf.flags.FLAGS.dataset_name = 'fsns' - tf.flags.FLAGS.checkpoint = _CHECKPOINT - tf.flags.FLAGS.dataset_dir = os.path.join( + flags.FLAGS.dataset_name = 'fsns' + flags.FLAGS.checkpoint = _CHECKPOINT + flags.FLAGS.dataset_dir = os.path.join( os.path.dirname(__file__), 'datasets/testdata/fsns') tf.test.TestCase.setUp(self) _clean_up() - self.export_dir = os.path.join(tf.test.get_temp_dir(), 'exported_model') + self.export_dir = os.path.join( + tf.compat.v1.test.get_temp_dir(), 'exported_model') self.minimal_output_signature = { 'predictions': 'AttentionOcr_v1/predicted_chars:0', 'scores': 'AttentionOcr_v1/predicted_scores:0', @@ -93,10 +95,10 @@ class AttentionOcrExportTest(tf.test.TestCase): size=self.dataset.image_shape).astype('uint8'), } signature_def = graph_def.signature_def[ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY] if serving: input_name = signature_def.inputs[ - tf.saved_model.signature_constants.CLASSIFY_INPUTS].name + tf.saved_model.CLASSIFY_INPUTS].name # Model for serving takes input: inputs['inputs'] = 'tf_example:0' feed_dict = { input_name: [ @@ -126,11 +128,11 @@ class AttentionOcrExportTest(tf.test.TestCase): export_for_serving: True if the model was exported for Serving. This affects how input is fed into the model. """ - tf.reset_default_graph() - sess = tf.Session() - graph_def = tf.saved_model.loader.load( + tf.compat.v1.reset_default_graph() + sess = tf.compat.v1.Session() + graph_def = tf.compat.v1.saved_model.loader.load( sess=sess, - tags=[tf.saved_model.tag_constants.SERVING], + tags=[tf.saved_model.SERVING], export_dir=self.export_dir) feed_dict = self.create_input_feed(graph_def, export_for_serving) results = sess.run(self.minimal_output_signature, feed_dict=feed_dict) diff --git a/research/attention_ocr/python/model_test.py b/research/attention_ocr/python/model_test.py index ce9b439ca840c59682f88d82c585509d624f6411..6632a38358ad17d4f37888f86d4881362a402c20 100644 --- a/research/attention_ocr/python/model_test.py +++ b/research/attention_ocr/python/model_test.py @@ -52,7 +52,7 @@ class ModelTest(tf.test.TestCase): self.num_char_classes) self.length_logit_shape = (self.batch_size, self.seq_length + 1) # Placeholder knows image dimensions, but not batch size. - self.input_images = tf.placeholder( + self.input_images = tf.compat.v1.placeholder( tf.float32, shape=(None, self.image_height, self.image_width, 3), name='input_node') @@ -89,8 +89,8 @@ class ModelTest(tf.test.TestCase): with self.test_session() as sess: endpoints_tf = ocr_model.create_base( images=self.input_images, labels_one_hot=None) - sess.run(tf.global_variables_initializer()) - tf.tables_initializer().run() + sess.run(tf.compat.v1.global_variables_initializer()) + tf.compat.v1.tables_initializer().run() endpoints = sess.run( endpoints_tf, feed_dict={self.input_images: self.fake_images}) @@ -127,7 +127,7 @@ class ModelTest(tf.test.TestCase): ocr_model = self.create_model() conv_tower = ocr_model.conv_tower_fn(self.input_images) - sess.run(tf.global_variables_initializer()) + sess.run(tf.compat.v1.global_variables_initializer()) conv_tower_np = sess.run( conv_tower, feed_dict={self.input_images: self.fake_images}) @@ -141,9 +141,9 @@ class ModelTest(tf.test.TestCase): ocr_model = self.create_model() ocr_model.create_base(images=self.input_images, labels_one_hot=None) with self.test_session() as sess: - tfprof_root = tf.profiler.profile( + tfprof_root = tf.compat.v1.profiler.profile( sess.graph, - options=tf.profiler.ProfileOptionBuilder + options=tf.compat.v1.profiler.ProfileOptionBuilder .trainable_variables_parameter()) model_size_bytes = 4 * tfprof_root.total_parameters @@ -163,9 +163,9 @@ class ModelTest(tf.test.TestCase): summaries = ocr_model.create_summaries( data, endpoints, charset, is_training=False) with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - tf.tables_initializer().run() + sess.run(tf.compat.v1.global_variables_initializer()) + sess.run(tf.compat.v1.local_variables_initializer()) + tf.compat.v1.tables_initializer().run() sess.run(summaries) # just check it is runnable def test_sequence_loss_function_without_label_smoothing(self): @@ -188,7 +188,7 @@ class ModelTest(tf.test.TestCase): Returns: a list of tensors with encoded image coordinates in them. """ - batch_size = tf.shape(net)[0] + batch_size = tf.shape(input=net)[0] _, h, w, _ = net.shape.as_list() h_loc = [ tf.tile( @@ -200,7 +200,8 @@ class ModelTest(tf.test.TestCase): h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2) w_loc = [ tf.tile( - tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w), + tf.contrib.layers.one_hot_encoding( + tf.constant([i]), num_classes=w), [h, 1]) for i in range(w) ] w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2) @@ -272,8 +273,8 @@ class ModelTest(tf.test.TestCase): endpoints_tf = ocr_model.create_base( images=self.fake_images, labels_one_hot=None) - sess.run(tf.global_variables_initializer()) - tf.tables_initializer().run() + sess.run(tf.compat.v1.global_variables_initializer()) + tf.compat.v1.tables_initializer().run() endpoints = sess.run(endpoints_tf) self.assertEqual(endpoints.predicted_text.shape, (self.batch_size,)) @@ -289,7 +290,7 @@ class CharsetMapperTest(tf.test.TestCase): charset_mapper = model.CharsetMapper(charset) with self.test_session() as sess: - tf.tables_initializer().run() + tf.compat.v1.tables_initializer().run() text = sess.run(charset_mapper.get_text(ids)) self.assertAllEqual(text, [b'hello', b'world']) diff --git a/research/attention_ocr/python/sequence_layers.py b/research/attention_ocr/python/sequence_layers.py index 82e0de1b026623e01e100182da72c5995d0de45b..15c4b1c3f9451ed54d92a734dcdb4f58e1162f89 100644 --- a/research/attention_ocr/python/sequence_layers.py +++ b/research/attention_ocr/python/sequence_layers.py @@ -111,12 +111,12 @@ class SequenceLayerBase(object): self._mparams = method_params self._net = net self._labels_one_hot = labels_one_hot - self._batch_size = tf.shape(net)[0] + self._batch_size = tf.shape(input=net)[0] # Initialize parameters for char logits which will be computed on the fly # inside an LSTM decoder. self._char_logits = {} - regularizer = slim.l2_regularizer(self._mparams.weight_decay) + regularizer = tf.keras.regularizers.l2(0.5 * (self._mparams.weight_decay)) self._softmax_w = slim.model_variable( 'softmax_w', [self._mparams.num_lstm_units, self._params.num_char_classes], @@ -124,7 +124,7 @@ class SequenceLayerBase(object): regularizer=regularizer) self._softmax_b = slim.model_variable( 'softmax_b', [self._params.num_char_classes], - initializer=tf.zeros_initializer(), + initializer=tf.compat.v1.zeros_initializer(), regularizer=regularizer) @abc.abstractmethod @@ -203,8 +203,8 @@ class SequenceLayerBase(object): A tensor with shape [batch_size, num_char_classes] """ if char_index not in self._char_logits: - self._char_logits[char_index] = tf.nn.xw_plus_b(inputs, self._softmax_w, - self._softmax_b) + self._char_logits[char_index] = tf.compat.v1.nn.xw_plus_b(inputs, self._softmax_w, + self._softmax_b) return self._char_logits[char_index] def char_one_hot(self, logit): @@ -216,7 +216,7 @@ class SequenceLayerBase(object): Returns: A tensor with shape [batch_size, num_char_classes] """ - prediction = tf.argmax(logit, axis=1) + prediction = tf.argmax(input=logit, axis=1) return slim.one_hot_encoding(prediction, self._params.num_char_classes) def get_input(self, prev, i): @@ -244,10 +244,10 @@ class SequenceLayerBase(object): Returns: A tensor with shape [batch_size, seq_length, num_char_classes]. """ - with tf.variable_scope('LSTM'): + with tf.compat.v1.variable_scope('LSTM'): first_label = self.get_input(prev=None, i=0) decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1) - lstm_cell = tf.contrib.rnn.LSTMCell( + lstm_cell = tf.compat.v1.nn.rnn_cell.LSTMCell( self._mparams.num_lstm_units, use_peepholes=False, cell_clip=self._mparams.lstm_state_clip_value, @@ -259,9 +259,9 @@ class SequenceLayerBase(object): loop_function=self.get_input, cell=lstm_cell) - with tf.variable_scope('logits'): + with tf.compat.v1.variable_scope('logits'): logits_list = [ - tf.expand_dims(self.char_logit(logit, i), dim=1) + tf.expand_dims(self.char_logit(logit, i), axis=1) for i, logit in enumerate(lstm_outputs) ] diff --git a/research/attention_ocr/python/sequence_layers_test.py b/research/attention_ocr/python/sequence_layers_test.py index fd41e2d824c014084129707631d45de334ec741b..29be1875b2aff0bd35da5c16628543e14051a04e 100644 --- a/research/attention_ocr/python/sequence_layers_test.py +++ b/research/attention_ocr/python/sequence_layers_test.py @@ -29,13 +29,13 @@ import sequence_layers def fake_net(batch_size, num_features, feature_size): return tf.convert_to_tensor( - np.random.uniform(size=(batch_size, num_features, feature_size)), + value=np.random.uniform(size=(batch_size, num_features, feature_size)), dtype=tf.float32) def fake_labels(batch_size, seq_length, num_char_classes): labels_np = tf.convert_to_tensor( - np.random.randint( + value=np.random.randint( low=0, high=num_char_classes, size=(batch_size, seq_length))) return slim.one_hot_encoding(labels_np, num_classes=num_char_classes) diff --git a/research/attention_ocr/python/train.py b/research/attention_ocr/python/train.py index fa91fb73b412287889f05d0af5875e269f1ce367..9b59fd380601b33d901f35a3784c3c02b0fe6f14 100644 --- a/research/attention_ocr/python/train.py +++ b/research/attention_ocr/python/train.py @@ -23,7 +23,7 @@ import logging import tensorflow as tf from tensorflow.contrib import slim from tensorflow import app -from tensorflow.python.platform import flags +from tensorflow.compat.v1 import flags from tensorflow.contrib.tfprof import model_analyzer import data_provider @@ -96,16 +96,16 @@ def get_training_hparams(): def create_optimizer(hparams): """Creates optimized based on the specified flags.""" if hparams.optimizer == 'momentum': - optimizer = tf.train.MomentumOptimizer( + optimizer = tf.compat.v1.train.MomentumOptimizer( hparams.learning_rate, momentum=hparams.momentum) elif hparams.optimizer == 'adam': - optimizer = tf.train.AdamOptimizer(hparams.learning_rate) + optimizer = tf.compat.v1.train.AdamOptimizer(hparams.learning_rate) elif hparams.optimizer == 'adadelta': - optimizer = tf.train.AdadeltaOptimizer(hparams.learning_rate) + optimizer = tf.compat.v1.train.AdadeltaOptimizer(hparams.learning_rate) elif hparams.optimizer == 'adagrad': - optimizer = tf.train.AdagradOptimizer(hparams.learning_rate) + optimizer = tf.compat.v1.train.AdagradOptimizer(hparams.learning_rate) elif hparams.optimizer == 'rmsprop': - optimizer = tf.train.RMSPropOptimizer( + optimizer = tf.compat.v1.train.RMSPropOptimizer( hparams.learning_rate, momentum=hparams.momentum) return optimizer @@ -154,14 +154,14 @@ def train(loss, init_fn, hparams): def prepare_training_dir(): - if not tf.gfile.Exists(FLAGS.train_log_dir): + if not tf.io.gfile.exists(FLAGS.train_log_dir): logging.info('Create a new training directory %s', FLAGS.train_log_dir) - tf.gfile.MakeDirs(FLAGS.train_log_dir) + tf.io.gfile.makedirs(FLAGS.train_log_dir) else: if FLAGS.reset_train_dir: logging.info('Reset the training directory %s', FLAGS.train_log_dir) - tf.gfile.DeleteRecursively(FLAGS.train_log_dir) - tf.gfile.MakeDirs(FLAGS.train_log_dir) + tf.io.gfile.rmtree(FLAGS.train_log_dir) + tf.io.gfile.makedirs(FLAGS.train_log_dir) else: logging.info('Use already existing training directory %s', FLAGS.train_log_dir) @@ -169,7 +169,7 @@ def prepare_training_dir(): def calculate_graph_metrics(): param_stats = model_analyzer.print_model_analysis( - tf.get_default_graph(), + tf.compat.v1.get_default_graph(), tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS) return param_stats.total_parameters @@ -186,7 +186,7 @@ def main(_): # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. - device_setter = tf.train.replica_device_setter( + device_setter = tf.compat.v1.train.replica_device_setter( FLAGS.ps_tasks, merge_devices=True) with tf.device(device_setter): data = data_provider.get_data( diff --git a/research/attention_ocr/python/utils.py b/research/attention_ocr/python/utils.py index aa71f91b7ffe9629dd762f81510e2a9e95d48ce7..5d282f72874856ecdac6dd3932badbbe9d915a9a 100644 --- a/research/attention_ocr/python/utils.py +++ b/research/attention_ocr/python/utils.py @@ -37,16 +37,16 @@ def logits_to_log_prob(logits): probabilities. """ - with tf.variable_scope('log_probabilities'): + with tf.compat.v1.variable_scope('log_probabilities'): reduction_indices = len(logits.shape.as_list()) - 1 max_logits = tf.reduce_max( - logits, reduction_indices=reduction_indices, keep_dims=True) + input_tensor=logits, axis=reduction_indices, keepdims=True) safe_logits = tf.subtract(logits, max_logits) sum_exp = tf.reduce_sum( - tf.exp(safe_logits), - reduction_indices=reduction_indices, - keep_dims=True) - log_probs = tf.subtract(safe_logits, tf.log(sum_exp)) + input_tensor=tf.exp(safe_logits), + axis=reduction_indices, + keepdims=True) + log_probs = tf.subtract(safe_logits, tf.math.log(sum_exp)) return log_probs @@ -91,7 +91,7 @@ def ConvertAllInputsToTensors(func): """ def FuncWrapper(*args): - tensors = [tf.convert_to_tensor(a) for a in args] + tensors = [tf.convert_to_tensor(value=a) for a in args] return func(*tensors) return FuncWrapper diff --git a/research/audioset/vggish/README.md b/research/audioset/vggish/README.md index 0be0ae86687b81f2074d9011f4c16b5402a138bf..d20e5587af44de4bc029ad1820cd625b6648f0cd 100644 --- a/research/audioset/vggish/README.md +++ b/research/audioset/vggish/README.md @@ -16,17 +16,14 @@ VGGish depends on the following Python packages: * [`numpy`](http://www.numpy.org/) * [`resampy`](http://resampy.readthedocs.io/en/latest/) -* [`tensorflow`](http://www.tensorflow.org/) (currently, only TF v1.x) +* [`tensorflow`](http://www.tensorflow.org/) * [`tf_slim`](https://github.com/google-research/tf-slim) * [`six`](https://pythonhosted.org/six/) * [`soundfile`](https://pysoundfile.readthedocs.io/) These are all easily installable via, e.g., `pip install numpy` (as in the -sample installation session below). - -Any reasonably recent version of these packages shold work. Note that we currently only support -TensorFlow v1.x due to a [`tf_slim` limitation](https://github.com/google-research/tf-slim/pull/1). -TensorFlow v1.15 (the latest version as of Jan 2020) has been tested to work. +sample installation session below). Any reasonably recent version of these +packages shold work. VGGish also requires downloading two data files: @@ -60,7 +57,7 @@ Here's a sample installation and test session: $ sudo python -m pip install --upgrade pip wheel # Install all dependences. -$ sudo pip install numpy resampy tensorflow==1.15 tf_slim six soundfile +$ sudo pip install numpy resampy tensorflow tf_slim six soundfile # Clone TensorFlow models repo into a 'models' directory. $ git clone https://github.com/tensorflow/models.git @@ -129,7 +126,10 @@ changes we made: fully connected layer. This acts as a compact embedding layer. The model definition provided here defines layers up to and including the -128-wide embedding layer. +128-wide embedding layer. Note that the embedding layer does not include +a final non-linear activation, so the embedding value is pre-activation. +When training a model stacked on top of VGGish, you should send the +embedding through a non-linearity of your choice before adding more layers. ### Input: Audio Features @@ -150,14 +150,7 @@ VGGish was trained with audio features computed as follows: where each example covers 64 mel bands and 96 frames of 10 ms each. We provide our own NumPy implementation that produces features that are very -similar to those produced by our internal production code. This results in -embedding outputs that are closely match the embeddings that we have already -released. Note that these embeddings will *not* be bit-for-bit identical to the -released embeddings due to small differences between the feature computation -code paths, and even between two different installations of VGGish with -different underlying libraries and hardware. However, we expect that the -embeddings will be equivalent in the context of a downstream classification -task. +similar to those produced by our internal production code. ### Output: Embeddings diff --git a/research/audioset/vggish/vggish_export_tfhub.py b/research/audioset/vggish/vggish_export_tfhub.py new file mode 100644 index 0000000000000000000000000000000000000000..c3956f2365aa3d8475a9d001343b4b9260bc4462 --- /dev/null +++ b/research/audioset/vggish/vggish_export_tfhub.py @@ -0,0 +1,126 @@ +"""Exports VGGish as a SavedModel for publication to TF Hub. + +The exported SavedModel accepts a 1-d float32 Tensor of arbitrary shape +containing an audio waveform (assumed to be mono 16 kHz samples in the [-1, +1] +range) and returns a 2-d float32 batch of 128-d VGGish embeddings, one per +0.96s example generated from the waveform. + +Requires pip-installing tensorflow_hub. + +Usage: + vggish_export_tfhub.py +""" + +import sys +sys.path.append('..') # Lets us import yamnet modules from sibling directory. + +import numpy as np +import resampy +import tensorflow as tf +assert tf.version.VERSION >= '2.0.0', ( + 'Need at least TF 2.0, you have TF v{}'.format(tf.version.VERSION)) +import tensorflow_hub as tfhub + +import vggish_input +import vggish_params +import vggish_slim +from yamnet import features as yamnet_features +from yamnet import params as yamnet_params + + +def vggish_definer(variables, checkpoint_path): + """Defines VGGish with variables tracked and initialized from a checkpoint.""" + reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path) + + def var_tracker(next_creator, **kwargs): + """Variable creation hook that assigns initial values from a checkpoint.""" + var_name = kwargs['name'] + var_value = reader.get_tensor(var_name) + kwargs.update({'initial_value': var_value}) + var = next_creator(**kwargs) + variables.append(var) + return var + + def waveform_to_features(waveform): + """Creates VGGish features using the YAMNet feature extractor.""" + params = yamnet_params.Params( + sample_rate=vggish_params.SAMPLE_RATE, + stft_window_seconds=vggish_params.STFT_WINDOW_LENGTH_SECONDS, + stft_hop_seconds=vggish_params.STFT_HOP_LENGTH_SECONDS, + mel_bands=vggish_params.NUM_MEL_BINS, + mel_min_hz=vggish_params.MEL_MIN_HZ, + mel_max_hz=vggish_params.MEL_MAX_HZ, + log_offset=vggish_params.LOG_OFFSET, + patch_window_seconds=vggish_params.EXAMPLE_WINDOW_SECONDS, + patch_hop_seconds=vggish_params.EXAMPLE_HOP_SECONDS) + log_mel_spectrogram, features = yamnet_features.waveform_to_log_mel_spectrogram_patches( + waveform, params) + return features + + def define_vggish(waveform): + with tf.variable_creator_scope(var_tracker): + features = waveform_to_features(waveform) + return vggish_slim.define_vggish_slim(features, training=False) + + return define_vggish + + +class VGGish(tf.Module): + """A TF2 Module wrapper around VGGish.""" + def __init__(self, checkpoint_path): + super().__init__() + self._variables = [] + self._vggish_fn = tf.compat.v1.wrap_function( + vggish_definer(self._variables, checkpoint_path), + signature=(tf.TensorSpec(shape=[None], dtype=tf.float32),)) + + @tf.function(input_signature=(tf.TensorSpec(shape=[None], dtype=tf.float32),)) + def __call__(self, waveform): + return self._vggish_fn(waveform) + + +def check_model(model_fn): + """Applies vggish_smoke_test's sanity check to an instance of VGGish.""" + num_secs = 3 + freq = 1000 + sr = 44100 + t = np.arange(0, num_secs, 1 / sr) + waveform = np.sin(2 * np.pi * freq * t) + + waveform = resampy.resample(waveform, sr, vggish_params.SAMPLE_RATE) + embeddings = model_fn(waveform) + + expected_embedding_mean = -0.0333 + expected_embedding_std = 0.380 + rel_error = 0.1 + np.testing.assert_allclose( + [np.mean(embeddings), np.std(embeddings)], + [expected_embedding_mean, expected_embedding_std], + rtol=rel_error) + + +def main(args): + # Create a TF2 wrapper around VGGish. + vggish_checkpoint_path = args[0] + vggish = VGGish(vggish_checkpoint_path) + check_model(vggish) + + # Make TF-Hub export. + vggish_tfhub_export_path = args[1] + tf.saved_model.save(vggish, vggish_tfhub_export_path) + + # Check export in TF2. + model = tfhub.load(vggish_tfhub_export_path) + check_model(model) + + # Check export in TF1. + with tf.compat.v1.Graph().as_default(), tf.compat.v1.Session() as sess: + model = tfhub.load(vggish_tfhub_export_path) + sess.run(tf.compat.v1.global_variables_initializer()) + def run_model(waveform): + embeddings = model(waveform) + return sess.run(embeddings) + check_model(run_model) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/research/audioset/vggish/vggish_inference_demo.py b/research/audioset/vggish/vggish_inference_demo.py index 6d9d631b36d8eeac68ea23b59bd0938b5dbbd30c..2294698f5611050de271045bc62fc249e632d925 100644 --- a/research/audioset/vggish/vggish_inference_demo.py +++ b/research/audioset/vggish/vggish_inference_demo.py @@ -50,7 +50,6 @@ import numpy as np import six import soundfile import tensorflow.compat.v1 as tf -tf.disable_v2_behavior() import vggish_input import vggish_params @@ -89,7 +88,7 @@ def main(_): num_secs = 5 freq = 1000 sr = 44100 - t = np.linspace(0, num_secs, int(num_secs * sr)) + t = np.arange(0, num_secs, 1 / sr) x = np.sin(2 * np.pi * freq * t) # Convert to signed 16-bit samples. samples = np.clip(x * 32768, -32768, 32767).astype(np.int16) diff --git a/research/audioset/vggish/vggish_slim.py b/research/audioset/vggish/vggish_slim.py index 0a838c4b8e2619b2573c490f546044b113f3bb55..84a8aac3986b06eb42ee96de16f6ec34c2d3465f 100644 --- a/research/audioset/vggish/vggish_slim.py +++ b/research/audioset/vggish/vggish_slim.py @@ -31,28 +31,31 @@ https://github.com/tensorflow/models/blob/master/research/slim/nets/vgg.py """ import tensorflow.compat.v1 as tf -tf.disable_v2_behavior() import tf_slim as slim import vggish_params as params -def define_vggish_slim(training=False): +def define_vggish_slim(features_tensor=None, training=False): """Defines the VGGish TensorFlow model. All ops are created in the current default graph, under the scope 'vggish/'. - The input is a placeholder named 'vggish/input_features' of type float32 and - shape [batch_size, num_frames, num_bands] where batch_size is variable and - num_frames and num_bands are constants, and [num_frames, num_bands] represents - a log-mel-scale spectrogram patch covering num_bands frequency bands and - num_frames time frames (where each frame step is usually 10ms). This is - produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET). - The output is an op named 'vggish/embedding' which produces the activations of - a 128-D embedding layer, which is usually the penultimate layer when used as - part of a full model with a final classifier layer. + The input is either a tensor passed in via the optional 'features_tensor' + argument or a placeholder created below named 'vggish/input_features'. The + input is expected to have dtype float32 and shape [batch_size, num_frames, + num_bands] where batch_size is variable and num_frames and num_bands are + constants, and [num_frames, num_bands] represents a log-mel-scale spectrogram + patch covering num_bands frequency bands and num_frames time frames (where + each frame step is usually 10ms). This is produced by computing the stabilized + log(mel-spectrogram + params.LOG_OFFSET). The output is a tensor named + 'vggish/embedding' which produces the pre-activation values of a 128-D + embedding layer, which is usually the penultimate layer when used as part of a + full model with a final classifier layer. Args: + features_tensor: If not None, the tensor containing the input features. + If None, a placeholder input is created. training: If true, all parameters are marked trainable. Returns: @@ -76,11 +79,13 @@ def define_vggish_slim(training=False): kernel_size=[2, 2], stride=2, padding='SAME'), \ tf.variable_scope('vggish'): # Input: a batch of 2-D log-mel-spectrogram patches. - features = tf.placeholder( - tf.float32, shape=(None, params.NUM_FRAMES, params.NUM_BANDS), - name='input_features') + if features_tensor is None: + features_tensor = tf.placeholder( + tf.float32, shape=(None, params.NUM_FRAMES, params.NUM_BANDS), + name='input_features') # Reshape to 4-D so that we can convolve a batch with conv2d(). - net = tf.reshape(features, [-1, params.NUM_FRAMES, params.NUM_BANDS, 1]) + net = tf.reshape(features_tensor, + [-1, params.NUM_FRAMES, params.NUM_BANDS, 1]) # The VGG stack of alternating convolutions and max-pools. net = slim.conv2d(net, 64, scope='conv1') @@ -96,7 +101,8 @@ def define_vggish_slim(training=False): net = slim.flatten(net) net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1') # The embedding layer. - net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2') + net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2', + activation_fn=None) return tf.identity(net, name='embedding') diff --git a/research/audioset/vggish/vggish_smoke_test.py b/research/audioset/vggish/vggish_smoke_test.py index f27e583aee473c6a04a5af20fd101c7a54871e94..82a644a91e3ba8af3f47b71c7de0c3943ffe156f 100644 --- a/research/audioset/vggish/vggish_smoke_test.py +++ b/research/audioset/vggish/vggish_smoke_test.py @@ -33,7 +33,6 @@ from __future__ import print_function import numpy as np import tensorflow.compat.v1 as tf -tf.disable_v2_behavior() import vggish_input import vggish_params @@ -54,7 +53,7 @@ rel_error = 0.1 # Up to 10% num_secs = 3 freq = 1000 sr = 44100 -t = np.linspace(0, num_secs, int(num_secs * sr)) +t = np.arange(0, num_secs, 1 / sr) x = np.sin(2 * np.pi * freq * t) # Produce a batch of log mel spectrogram examples. @@ -77,8 +76,8 @@ with tf.Graph().as_default(), tf.Session() as sess: [embedding_batch] = sess.run([embedding_tensor], feed_dict={features_tensor: input_batch}) print('VGGish embedding: ', embedding_batch[0]) - expected_embedding_mean = 0.131 - expected_embedding_std = 0.238 + expected_embedding_mean = -0.0333 + expected_embedding_std = 0.380 np.testing.assert_allclose( [np.mean(embedding_batch), np.std(embedding_batch)], [expected_embedding_mean, expected_embedding_std], @@ -88,8 +87,8 @@ with tf.Graph().as_default(), tf.Session() as sess: pproc = vggish_postprocess.Postprocessor(pca_params_path) postprocessed_batch = pproc.postprocess(embedding_batch) print('Postprocessed VGGish embedding: ', postprocessed_batch[0]) -expected_postprocessed_mean = 123.0 -expected_postprocessed_std = 75.0 +expected_postprocessed_mean = 122.0 +expected_postprocessed_std = 93.5 np.testing.assert_allclose( [np.mean(postprocessed_batch), np.std(postprocessed_batch)], [expected_postprocessed_mean, expected_postprocessed_std], diff --git a/research/audioset/vggish/vggish_train_demo.py b/research/audioset/vggish/vggish_train_demo.py index d8be0f1774549b0b0ec4bdbcf840a16696fa6322..7a968b171106e360861ed794e6aeab512d6abac4 100644 --- a/research/audioset/vggish/vggish_train_demo.py +++ b/research/audioset/vggish/vggish_train_demo.py @@ -49,7 +49,6 @@ from random import shuffle import numpy as np import tensorflow.compat.v1 as tf -tf.disable_v2_behavior() import tf_slim as slim import vggish_input @@ -95,7 +94,7 @@ def _get_examples_batch(): # Make a waveform for each class. num_seconds = 5 sr = 44100 # Sampling rate. - t = np.linspace(0, num_seconds, int(num_seconds * sr)) # Time axis. + t = np.arange(0, num_seconds, 1 / sr) # Time axis # Random sine wave. freq = np.random.uniform(100, 1000) sine = np.sin(2 * np.pi * freq * t) @@ -129,14 +128,15 @@ def _get_examples_batch(): def main(_): with tf.Graph().as_default(), tf.Session() as sess: # Define VGGish. - embeddings = vggish_slim.define_vggish_slim(FLAGS.train_vggish) + embeddings = vggish_slim.define_vggish_slim(training=FLAGS.train_vggish) # Define a shallow classification model and associated training ops on top # of VGGish. with tf.variable_scope('mymodel'): - # Add a fully connected layer with 100 units. + # Add a fully connected layer with 100 units. Add an activation function + # to the embeddings since they are pre-activation. num_units = 100 - fc = slim.fully_connected(embeddings, num_units) + fc = slim.fully_connected(tf.nn.relu(embeddings), num_units) # Add a classifier layer at the end, consisting of parallel logistic # classifiers, one per class. This allows for multi-class tasks. @@ -146,19 +146,16 @@ def main(_): # Add training ops. with tf.variable_scope('train'): - global_step = tf.Variable( - 0, name='global_step', trainable=False, - collections=[tf.GraphKeys.GLOBAL_VARIABLES, - tf.GraphKeys.GLOBAL_STEP]) + global_step = tf.train.create_global_step() # Labels are assumed to be fed as a batch multi-hot vectors, with # a 1 in the position of each positive class label, and 0 elsewhere. - labels = tf.placeholder( + labels_input = tf.placeholder( tf.float32, shape=(None, _NUM_CLASSES), name='labels') # Cross-entropy label loss. xent = tf.nn.sigmoid_cross_entropy_with_logits( - logits=logits, labels=labels, name='xent') + logits=logits, labels=labels_input, name='xent') loss = tf.reduce_mean(xent, name='loss_op') tf.summary.scalar('loss', loss) @@ -166,29 +163,22 @@ def main(_): optimizer = tf.train.AdamOptimizer( learning_rate=vggish_params.LEARNING_RATE, epsilon=vggish_params.ADAM_EPSILON) - optimizer.minimize(loss, global_step=global_step, name='train_op') + train_op = optimizer.minimize(loss, global_step=global_step) # Initialize all variables in the model, and then load the pre-trained # VGGish checkpoint. sess.run(tf.global_variables_initializer()) vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint) - # Locate all the tensors and ops we need for the training loop. - features_tensor = sess.graph.get_tensor_by_name( - vggish_params.INPUT_TENSOR_NAME) - labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0') - global_step_tensor = sess.graph.get_tensor_by_name( - 'mymodel/train/global_step:0') - loss_tensor = sess.graph.get_tensor_by_name('mymodel/train/loss_op:0') - train_op = sess.graph.get_operation_by_name('mymodel/train/train_op') - # The training loop. + features_input = sess.graph.get_tensor_by_name( + vggish_params.INPUT_TENSOR_NAME) for _ in range(FLAGS.num_batches): (features, labels) = _get_examples_batch() - [num_steps, loss, _] = sess.run( - [global_step_tensor, loss_tensor, train_op], - feed_dict={features_tensor: features, labels_tensor: labels}) - print('Step %d: loss %g' % (num_steps, loss)) + [num_steps, loss_value, _] = sess.run( + [global_step, loss, train_op], + feed_dict={features_input: features, labels_input: labels}) + print('Step %d: loss %g' % (num_steps, loss_value)) if __name__ == '__main__': tf.app.run() diff --git a/research/audioset/yamnet/README.md b/research/audioset/yamnet/README.md index 983724c0d3b526a288f564d5c90b8f8330ac14ee..4f3caddfd0fc258421ec9eaff5c565e4909a9c37 100644 --- a/research/audioset/yamnet/README.md +++ b/research/audioset/yamnet/README.md @@ -18,12 +18,8 @@ YAMNet depends on the following Python packages: * [`pysoundfile`](https://pysoundfile.readthedocs.io/) These are all easily installable via, e.g., `pip install numpy` (as in the -example command sequence below). - -Any reasonably recent version of these packages should work. TensorFlow should -be at least version 1.8 to ensure Keras support is included. Note that while -the code works fine with TensorFlow v1.x or v2.x, we explicitly enable v1.x -behavior. +example command sequence below). Any reasonably recent version of these +packages should work. YAMNet also requires downloading the following data file: diff --git a/research/audioset/yamnet/export.py b/research/audioset/yamnet/export.py new file mode 100644 index 0000000000000000000000000000000000000000..87bb00612648c894deb7a94f161381a482742984 --- /dev/null +++ b/research/audioset/yamnet/export.py @@ -0,0 +1,213 @@ +"""Exports YAMNet as: TF2 SavedModel, TF-Lite model, TF-JS model. + +The exported models all accept as input: +- 1-d float32 Tensor of arbitrary shape containing an audio waveform + (assumed to be mono 16 kHz samples in the [-1, +1] range) +and return as output: +- a 2-d float32 Tensor of shape [num_frames, num_classes] containing + predicted class scores for each frame of audio extracted from the input. +- a 2-d float32 Tensor of shape [num_frames, embedding_size] containing + embeddings of each frame of audio. +- a 2-d float32 Tensor of shape [num_spectrogram_frames, num_mel_bins] + containing the log mel spectrogram of the entire waveform. +The SavedModels will also contain (as an asset) a class map CSV file that maps +class indices to AudioSet class names and Freebase MIDs. The path to the class +map is available as the 'class_map_path()' method of the restored model. + +Requires pip-installing tensorflow_hub and tensorflowjs. + +Usage: + export.py +and the various exports will be created in subdirectories of the output directory. +Assumes that it will be run in the yamnet source directory from where it loads +the class map. Skips an export if the corresponding directory already exists. +""" + +import os +import sys +import tempfile +import time + +import numpy as np +import tensorflow as tf +assert tf.version.VERSION >= '2.0.0', ( + 'Need at least TF 2.0, you have TF v{}'.format(tf.version.VERSION)) +import tensorflow_hub as tfhub +from tensorflowjs.converters import tf_saved_model_conversion_v2 as tfjs_saved_model_converter + +import params as yamnet_params +import yamnet + + +def log(msg): + print('\n=====\n{} | {}\n=====\n'.format(time.asctime(), msg), flush=True) + + +class YAMNet(tf.Module): + "''A TF2 Module wrapper around YAMNet.""" + def __init__(self, weights_path, params): + super().__init__() + self._yamnet = yamnet.yamnet_frames_model(params) + self._yamnet.load_weights(weights_path) + self._class_map_asset = tf.saved_model.Asset('yamnet_class_map.csv') + + @tf.function + def class_map_path(self): + return self._class_map_asset.asset_path + + @tf.function(input_signature=(tf.TensorSpec(shape=[None], dtype=tf.float32),)) + def __call__(self, waveform): + return self._yamnet(waveform) + + +def check_model(model_fn, class_map_path, params): + yamnet_classes = yamnet.class_names(class_map_path) + + """Applies yamnet_test's sanity checks to an instance of YAMNet.""" + def clip_test(waveform, expected_class_name, top_n=10): + predictions, embeddings, log_mel_spectrogram = model_fn(waveform) + clip_predictions = np.mean(predictions, axis=0) + top_n_indices = np.argsort(clip_predictions)[-top_n:] + top_n_scores = clip_predictions[top_n_indices] + top_n_class_names = yamnet_classes[top_n_indices] + top_n_predictions = list(zip(top_n_class_names, top_n_scores)) + assert expected_class_name in top_n_class_names, ( + 'Did not find expected class {} in top {} predictions: {}'.format( + expected_class_name, top_n, top_n_predictions)) + + clip_test( + waveform=np.zeros((int(3 * params.sample_rate),), dtype=np.float32), + expected_class_name='Silence') + + np.random.seed(51773) # Ensure repeatability. + clip_test( + waveform=np.random.uniform(-1.0, +1.0, + (int(3 * params.sample_rate),)).astype(np.float32), + expected_class_name='White noise') + + clip_test( + waveform=np.sin(2 * np.pi * 440 * + np.arange(0, 3, 1 / params.sample_rate), dtype=np.float32), + expected_class_name='Sine wave') + + +def make_tf2_export(weights_path, export_dir): + if os.path.exists(export_dir): + log('TF2 export already exists in {}, skipping TF2 export'.format( + export_dir)) + return + + # Create a TF2 Module wrapper around YAMNet. + log('Building and checking TF2 Module ...') + params = yamnet_params.Params() + yamnet = YAMNet(weights_path, params) + check_model(yamnet, yamnet.class_map_path(), params) + log('Done') + + # Make TF2 SavedModel export. + log('Making TF2 SavedModel export ...') + tf.saved_model.save(yamnet, export_dir) + log('Done') + + # Check export with TF-Hub in TF2. + log('Checking TF2 SavedModel export in TF2 ...') + model = tfhub.load(export_dir) + check_model(model, model.class_map_path(), params) + log('Done') + + # Check export with TF-Hub in TF1. + log('Checking TF2 SavedModel export in TF1 ...') + with tf.compat.v1.Graph().as_default(), tf.compat.v1.Session() as sess: + model = tfhub.load(export_dir) + sess.run(tf.compat.v1.global_variables_initializer()) + def run_model(waveform): + return sess.run(model(waveform)) + check_model(run_model, model.class_map_path().eval(), params) + log('Done') + + +def make_tflite_export(weights_path, export_dir): + if os.path.exists(export_dir): + log('TF-Lite export already exists in {}, skipping TF-Lite export'.format( + export_dir)) + return + + # Create a TF-Lite compatible Module wrapper around YAMNet. + log('Building and checking TF-Lite Module ...') + params = yamnet_params.Params(tflite_compatible=True) + yamnet = YAMNet(weights_path, params) + check_model(yamnet, yamnet.class_map_path(), params) + log('Done') + + # Make TF-Lite SavedModel export. + log('Making TF-Lite SavedModel export ...') + saved_model_dir = os.path.join(export_dir, 'saved_model') + os.makedirs(saved_model_dir) + tf.saved_model.save(yamnet, saved_model_dir) + log('Done') + + # Check that the export can be loaded and works. + log('Checking TF-Lite SavedModel export in TF2 ...') + model = tf.saved_model.load(saved_model_dir) + check_model(model, model.class_map_path(), params) + log('Done') + + # Make a TF-Lite model from the SavedModel. + log('Making TF-Lite model ...') + tflite_converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) + tflite_model = tflite_converter.convert() + tflite_model_path = os.path.join(export_dir, 'yamnet.tflite') + with open(tflite_model_path, 'wb') as f: + f.write(tflite_model) + log('Done') + + # Check the TF-Lite export. + log('Checking TF-Lite model ...') + interpreter = tf.lite.Interpreter(tflite_model_path) + audio_input_index = interpreter.get_input_details()[0]['index'] + scores_output_index = interpreter.get_output_details()[0]['index'] + embeddings_output_index = interpreter.get_output_details()[1]['index'] + spectrogram_output_index = interpreter.get_output_details()[2]['index'] + def run_model(waveform): + interpreter.resize_tensor_input(audio_input_index, [len(waveform)], strict=True) + interpreter.allocate_tensors() + interpreter.set_tensor(audio_input_index, waveform) + interpreter.invoke() + return (interpreter.get_tensor(scores_output_index), + interpreter.get_tensor(embeddings_output_index), + interpreter.get_tensor(spectrogram_output_index)) + check_model(run_model, 'yamnet_class_map.csv', params) + log('Done') + + return saved_model_dir + + +def make_tfjs_export(tflite_saved_model_dir, export_dir): + if os.path.exists(export_dir): + log('TF-JS export already exists in {}, skipping TF-JS export'.format( + export_dir)) + return + + # Make a TF-JS model from the TF-Lite SavedModel export. + log('Making TF-JS model ...') + os.makedirs(export_dir) + tfjs_saved_model_converter.convert_tf_saved_model( + tflite_saved_model_dir, export_dir) + log('Done') + + +def main(args): + weights_path = args[0] + output_dir = args[1] + + tf2_export_dir = os.path.join(output_dir, 'tf2') + make_tf2_export(weights_path, tf2_export_dir) + + tflite_export_dir = os.path.join(output_dir, 'tflite') + tflite_saved_model_dir = make_tflite_export(weights_path, tflite_export_dir) + + tfjs_export_dir = os.path.join(output_dir, 'tfjs') + make_tfjs_export(tflite_saved_model_dir, tfjs_export_dir) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/research/audioset/yamnet/features.py b/research/audioset/yamnet/features.py index 98661124787c1b3f672185483c5715edb375cb2a..9b1cf7775db8cd29b154a6703da033566284bced 100644 --- a/research/audioset/yamnet/features.py +++ b/research/audioset/yamnet/features.py @@ -19,61 +19,147 @@ import numpy as np import tensorflow as tf -def waveform_to_log_mel_spectrogram(waveform, params): - """Compute log mel spectrogram of a 1-D waveform.""" +def waveform_to_log_mel_spectrogram_patches(waveform, params): + """Compute log mel spectrogram patches of a 1-D waveform.""" with tf.name_scope('log_mel_features'): # waveform has shape [<# samples>] # Convert waveform into spectrogram using a Short-Time Fourier Transform. # Note that tf.signal.stft() uses a periodic Hann window by default. window_length_samples = int( - round(params.SAMPLE_RATE * params.STFT_WINDOW_SECONDS)) + round(params.sample_rate * params.stft_window_seconds)) hop_length_samples = int( - round(params.SAMPLE_RATE * params.STFT_HOP_SECONDS)) + round(params.sample_rate * params.stft_hop_seconds)) fft_length = 2 ** int(np.ceil(np.log(window_length_samples) / np.log(2.0))) num_spectrogram_bins = fft_length // 2 + 1 - magnitude_spectrogram = tf.abs(tf.signal.stft( - signals=waveform, - frame_length=window_length_samples, - frame_step=hop_length_samples, - fft_length=fft_length)) + if params.tflite_compatible: + magnitude_spectrogram = _tflite_stft_magnitude( + signal=waveform, + frame_length=window_length_samples, + frame_step=hop_length_samples, + fft_length=fft_length) + else: + magnitude_spectrogram = tf.abs(tf.signal.stft( + signals=waveform, + frame_length=window_length_samples, + frame_step=hop_length_samples, + fft_length=fft_length)) # magnitude_spectrogram has shape [<# STFT frames>, num_spectrogram_bins] # Convert spectrogram into log mel spectrogram. linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix( - num_mel_bins=params.MEL_BANDS, + num_mel_bins=params.mel_bands, num_spectrogram_bins=num_spectrogram_bins, - sample_rate=params.SAMPLE_RATE, - lower_edge_hertz=params.MEL_MIN_HZ, - upper_edge_hertz=params.MEL_MAX_HZ) + sample_rate=params.sample_rate, + lower_edge_hertz=params.mel_min_hz, + upper_edge_hertz=params.mel_max_hz) mel_spectrogram = tf.matmul( magnitude_spectrogram, linear_to_mel_weight_matrix) - log_mel_spectrogram = tf.math.log(mel_spectrogram + params.LOG_OFFSET) - # log_mel_spectrogram has shape [<# STFT frames>, MEL_BANDS] + log_mel_spectrogram = tf.math.log(mel_spectrogram + params.log_offset) + # log_mel_spectrogram has shape [<# STFT frames>, params.mel_bands] - return log_mel_spectrogram - - -def spectrogram_to_patches(spectrogram, params): - """Break up a spectrogram into a stack of fixed-size patches.""" - with tf.name_scope('feature_patches'): - # Frame spectrogram (shape [<# STFT frames>, MEL_BANDS]) into patches - # (the input examples). - # Only complete frames are emitted, so if there is less than - # PATCH_WINDOW_SECONDS of waveform then nothing is emitted + # Frame spectrogram (shape [<# STFT frames>, params.mel_bands]) into patches + # (the input examples). Only complete frames are emitted, so if there is + # less than params.patch_window_seconds of waveform then nothing is emitted # (to avoid this, zero-pad before processing). - hop_length_samples = int( - round(params.SAMPLE_RATE * params.STFT_HOP_SECONDS)) - spectrogram_sr = params.SAMPLE_RATE / hop_length_samples + spectrogram_hop_length_samples = int( + round(params.sample_rate * params.stft_hop_seconds)) + spectrogram_sample_rate = params.sample_rate / spectrogram_hop_length_samples patch_window_length_samples = int( - round(spectrogram_sr * params.PATCH_WINDOW_SECONDS)) + round(spectrogram_sample_rate * params.patch_window_seconds)) patch_hop_length_samples = int( - round(spectrogram_sr * params.PATCH_HOP_SECONDS)) + round(spectrogram_sample_rate * params.patch_hop_seconds)) features = tf.signal.frame( - signal=spectrogram, + signal=log_mel_spectrogram, frame_length=patch_window_length_samples, frame_step=patch_hop_length_samples, axis=0) - # features has shape [<# patches>, <# STFT frames in an patch>, MEL_BANDS] + # features has shape [<# patches>, <# STFT frames in an patch>, params.mel_bands] + + return log_mel_spectrogram, features + + +def pad_waveform(waveform, params): + """Pads waveform with silence if needed to get an integral number of patches.""" + # In order to produce one patch of log mel spectrogram input to YAMNet, we + # need at least one patch window length of waveform plus enough extra samples + # to complete the final STFT analysis window. + min_waveform_seconds = ( + params.patch_window_seconds + + params.stft_window_seconds - params.stft_hop_seconds) + min_num_samples = tf.cast(min_waveform_seconds * params.sample_rate, tf.int32) + num_samples = tf.shape(waveform)[0] + num_padding_samples = tf.maximum(0, min_num_samples - num_samples) + + # In addition, there might be enough waveform for one or more additional + # patches formed by hopping forward. If there are more samples than one patch, + # round up to an integral number of hops. + num_samples = tf.maximum(num_samples, min_num_samples) + num_samples_after_first_patch = num_samples - min_num_samples + hop_samples = tf.cast(params.patch_hop_seconds * params.sample_rate, tf.int32) + num_hops_after_first_patch = tf.cast(tf.math.ceil( + tf.cast(num_samples_after_first_patch, tf.float32) / + tf.cast(hop_samples, tf.float32)), tf.int32) + num_padding_samples += ( + hop_samples * num_hops_after_first_patch - num_samples_after_first_patch) + + padded_waveform = tf.pad(waveform, [[0, num_padding_samples]], + mode='CONSTANT', constant_values=0.0) + return padded_waveform + + +def _tflite_stft_magnitude(signal, frame_length, frame_step, fft_length): + """TF-Lite-compatible version of tf.abs(tf.signal.stft()).""" + def _hann_window(): + return tf.reshape( + tf.constant( + (0.5 - 0.5 * np.cos(2 * np.pi * np.arange(0, 1.0, 1.0 / frame_length)) + ).astype(np.float32), + name='hann_window'), [1, frame_length]) + + def _dft_matrix(dft_length): + """Calculate the full DFT matrix in NumPy.""" + # See https://en.wikipedia.org/wiki/DFT_matrix + omega = (0 + 1j) * 2.0 * np.pi / float(dft_length) + # Don't include 1/sqrt(N) scaling, tf.signal.rfft doesn't apply it. + return np.exp(omega * np.outer(np.arange(dft_length), np.arange(dft_length))) + + def _rdft(framed_signal, fft_length): + """Implement real-input Discrete Fourier Transform by matmul.""" + # We are right-multiplying by the DFT matrix, and we are keeping only the + # first half ("positive frequencies"). So discard the second half of rows, + # but transpose the array for right-multiplication. The DFT matrix is + # symmetric, so we could have done it more directly, but this reflects our + # intention better. + complex_dft_matrix_kept_values = _dft_matrix(fft_length)[:( + fft_length // 2 + 1), :].transpose() + real_dft_matrix = tf.constant( + np.real(complex_dft_matrix_kept_values).astype(np.float32), + name='real_dft_matrix') + imag_dft_matrix = tf.constant( + np.imag(complex_dft_matrix_kept_values).astype(np.float32), + name='imaginary_dft_matrix') + signal_frame_length = tf.shape(framed_signal)[-1] + half_pad = (fft_length - signal_frame_length) // 2 + padded_frames = tf.pad( + framed_signal, + [ + # Don't add any padding in the frame dimension. + [0, 0], + # Pad before and after the signal within each frame. + [half_pad, fft_length - signal_frame_length - half_pad] + ], + mode='CONSTANT', + constant_values=0.0) + real_stft = tf.matmul(padded_frames, real_dft_matrix) + imag_stft = tf.matmul(padded_frames, imag_dft_matrix) + return real_stft, imag_stft + + def _complex_abs(real, imag): + return tf.sqrt(tf.add(real * real, imag * imag)) - return features + framed_signal = tf.signal.frame(signal, frame_length, frame_step) + windowed_signal = framed_signal * _hann_window() + real_stft, imag_stft = _rdft(windowed_signal, fft_length) + stft_magnitude = _complex_abs(real_stft, imag_stft) + return stft_magnitude diff --git a/research/audioset/yamnet/inference.py b/research/audioset/yamnet/inference.py index 1aa015550933c8696e56f92bdedd4de61ac518cb..88509b0f0e2ead6da92c05ceb4180f5f7d89a409 100644 --- a/research/audioset/yamnet/inference.py +++ b/research/audioset/yamnet/inference.py @@ -23,17 +23,16 @@ import resampy import soundfile as sf import tensorflow as tf -import params +import params as yamnet_params import yamnet as yamnet_model def main(argv): - assert argv + assert argv, 'Usage: inference.py ...' - graph = tf.Graph() - with graph.as_default(): - yamnet = yamnet_model.yamnet_frames_model(params) - yamnet.load_weights('yamnet.h5') + params = yamnet_params.Params() + yamnet = yamnet_model.yamnet_frames_model(params) + yamnet.load_weights('yamnet.h5') yamnet_classes = yamnet_model.class_names('yamnet_class_map.csv') for file_name in argv: @@ -41,24 +40,22 @@ def main(argv): wav_data, sr = sf.read(file_name, dtype=np.int16) assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype waveform = wav_data / 32768.0 # Convert to [-1.0, +1.0] + waveform = waveform.astype('float32') # Convert to mono and the sample rate expected by YAMNet. if len(waveform.shape) > 1: waveform = np.mean(waveform, axis=1) - if sr != params.SAMPLE_RATE: - waveform = resampy.resample(waveform, sr, params.SAMPLE_RATE) + if sr != params.sample_rate: + waveform = resampy.resample(waveform, sr, params.sample_rate) # Predict YAMNet classes. - # Second output is log-mel-spectrogram array (used for visualizations). - # (steps=1 is a work around for Keras batching limitations.) - with graph.as_default(): - scores, _ = yamnet.predict(np.reshape(waveform, [1, -1]), steps=1) + scores, embeddings, spectrogram = yamnet(waveform) # Scores is a matrix of (time_frames, num_classes) classifier scores. # Average them along time to get an overall classifier output for the clip. prediction = np.mean(scores, axis=0) # Report the highest-scoring classes and their scores. top5_i = np.argsort(prediction)[::-1][:5] - print(file_name, ':\n' + + print(file_name, ':\n' + '\n'.join(' {:12s}: {:.3f}'.format(yamnet_classes[i], prediction[i]) for i in top5_i)) diff --git a/research/audioset/yamnet/params.py b/research/audioset/yamnet/params.py index 5d848ad71695f2fdb29eddea5b7c135509fa5fe2..306c94218d942e1232d50e05a975f1cba7ca6ad3 100644 --- a/research/audioset/yamnet/params.py +++ b/research/audioset/yamnet/params.py @@ -15,28 +15,37 @@ """Hyperparameters for YAMNet.""" -# The following hyperparameters (except PATCH_HOP_SECONDS) were used to train YAMNet, +from dataclasses import dataclass + +# The following hyperparameters (except patch_hop_seconds) were used to train YAMNet, # so expect some variability in performance if you change these. The patch hop can # be changed arbitrarily: a smaller hop should give you more patches from the same # clip and possibly better performance at a larger computational cost. -SAMPLE_RATE = 16000 -STFT_WINDOW_SECONDS = 0.025 -STFT_HOP_SECONDS = 0.010 -MEL_BANDS = 64 -MEL_MIN_HZ = 125 -MEL_MAX_HZ = 7500 -LOG_OFFSET = 0.001 -PATCH_WINDOW_SECONDS = 0.96 -PATCH_HOP_SECONDS = 0.48 +@dataclass(frozen=True) # Instances of this class are immutable. +class Params: + sample_rate: float = 16000.0 + stft_window_seconds: float = 0.025 + stft_hop_seconds: float = 0.010 + mel_bands: int = 64 + mel_min_hz: float = 125.0 + mel_max_hz: float = 7500.0 + log_offset: float = 0.001 + patch_window_seconds: float = 0.96 + patch_hop_seconds: float = 0.48 + + @property + def patch_frames(self): + return int(round(self.patch_window_seconds / self.stft_hop_seconds)) + + @property + def patch_bands(self): + return self.mel_bands -PATCH_FRAMES = int(round(PATCH_WINDOW_SECONDS / STFT_HOP_SECONDS)) -PATCH_BANDS = MEL_BANDS -NUM_CLASSES = 521 -CONV_PADDING = 'same' -BATCHNORM_CENTER = True -BATCHNORM_SCALE = False -BATCHNORM_EPSILON = 1e-4 -CLASSIFIER_ACTIVATION = 'sigmoid' + num_classes: int = 521 + conv_padding: str = 'same' + batchnorm_center: bool = True + batchnorm_scale: bool = False + batchnorm_epsilon: float = 1e-4 + classifier_activation: str = 'sigmoid' -FEATURES_LAYER_NAME = 'features' -EXAMPLE_PREDICTIONS_LAYER_NAME = 'predictions' + tflite_compatible: bool = False diff --git a/research/audioset/yamnet/yamnet.py b/research/audioset/yamnet/yamnet.py index ce36ff8cc462bc3a37bcaacd615d7c997d46f6ef..cac7f87d99e1c03991f36d4afde807353e886dfd 100644 --- a/research/audioset/yamnet/yamnet.py +++ b/research/audioset/yamnet/yamnet.py @@ -22,53 +22,52 @@ import tensorflow as tf from tensorflow.keras import Model, layers import features as features_lib -import params -def _batch_norm(name): +def _batch_norm(name, params): def _bn_layer(layer_input): return layers.BatchNormalization( name=name, - center=params.BATCHNORM_CENTER, - scale=params.BATCHNORM_SCALE, - epsilon=params.BATCHNORM_EPSILON)(layer_input) + center=params.batchnorm_center, + scale=params.batchnorm_scale, + epsilon=params.batchnorm_epsilon)(layer_input) return _bn_layer -def _conv(name, kernel, stride, filters): +def _conv(name, kernel, stride, filters, params): def _conv_layer(layer_input): output = layers.Conv2D(name='{}/conv'.format(name), filters=filters, kernel_size=kernel, strides=stride, - padding=params.CONV_PADDING, + padding=params.conv_padding, use_bias=False, activation=None)(layer_input) - output = _batch_norm(name='{}/conv/bn'.format(name))(output) + output = _batch_norm('{}/conv/bn'.format(name), params)(output) output = layers.ReLU(name='{}/relu'.format(name))(output) return output return _conv_layer -def _separable_conv(name, kernel, stride, filters): +def _separable_conv(name, kernel, stride, filters, params): def _separable_conv_layer(layer_input): output = layers.DepthwiseConv2D(name='{}/depthwise_conv'.format(name), kernel_size=kernel, strides=stride, depth_multiplier=1, - padding=params.CONV_PADDING, + padding=params.conv_padding, use_bias=False, activation=None)(layer_input) - output = _batch_norm(name='{}/depthwise_conv/bn'.format(name))(output) + output = _batch_norm('{}/depthwise_conv/bn'.format(name), params)(output) output = layers.ReLU(name='{}/depthwise_conv/relu'.format(name))(output) output = layers.Conv2D(name='{}/pointwise_conv'.format(name), filters=filters, kernel_size=(1, 1), strides=1, - padding=params.CONV_PADDING, + padding=params.conv_padding, use_bias=False, activation=None)(output) - output = _batch_norm(name='{}/pointwise_conv/bn'.format(name))(output) + output = _batch_norm('{}/pointwise_conv/bn'.format(name), params)(output) output = layers.ReLU(name='{}/pointwise_conv/relu'.format(name))(output) return output return _separable_conv_layer @@ -93,47 +92,46 @@ _YAMNET_LAYER_DEFS = [ ] -def yamnet(features): +def yamnet(features, params): """Define the core YAMNet mode in Keras.""" net = layers.Reshape( - (params.PATCH_FRAMES, params.PATCH_BANDS, 1), - input_shape=(params.PATCH_FRAMES, params.PATCH_BANDS))(features) + (params.patch_frames, params.patch_bands, 1), + input_shape=(params.patch_frames, params.patch_bands))(features) for (i, (layer_fun, kernel, stride, filters)) in enumerate(_YAMNET_LAYER_DEFS): - net = layer_fun('layer{}'.format(i + 1), kernel, stride, filters)(net) - net = layers.GlobalAveragePooling2D()(net) - logits = layers.Dense(units=params.NUM_CLASSES, use_bias=True)(net) - predictions = layers.Activation( - name=params.EXAMPLE_PREDICTIONS_LAYER_NAME, - activation=params.CLASSIFIER_ACTIVATION)(logits) - return predictions + net = layer_fun('layer{}'.format(i + 1), kernel, stride, filters, params)(net) + embeddings = layers.GlobalAveragePooling2D()(net) + logits = layers.Dense(units=params.num_classes, use_bias=True)(embeddings) + predictions = layers.Activation(activation=params.classifier_activation)(logits) + return predictions, embeddings -def yamnet_frames_model(feature_params): +def yamnet_frames_model(params): """Defines the YAMNet waveform-to-class-scores model. Args: - feature_params: An object with parameter fields to control the feature - calculation. + params: An instance of Params containing hyperparameters. Returns: - A model accepting (1, num_samples) waveform input and emitting a - (num_patches, num_classes) matrix of class scores per time frame as - well as a (num_spectrogram_frames, num_mel_bins) spectrogram feature - matrix. + A model accepting (num_samples,) waveform input and emitting: + - predictions: (num_patches, num_classes) matrix of class scores per time frame + - embeddings: (num_patches, embedding size) matrix of embeddings per time frame + - log_mel_spectrogram: (num_spectrogram_frames, num_mel_bins) spectrogram feature matrix """ - waveform = layers.Input(batch_shape=(1, None)) - # Store the intermediate spectrogram features to use in visualization. - spectrogram = features_lib.waveform_to_log_mel_spectrogram( - tf.squeeze(waveform, axis=0), feature_params) - patches = features_lib.spectrogram_to_patches(spectrogram, feature_params) - predictions = yamnet(patches) - frames_model = Model(name='yamnet_frames', - inputs=waveform, outputs=[predictions, spectrogram]) + waveform = layers.Input(batch_shape=(None,), dtype=tf.float32) + waveform_padded = features_lib.pad_waveform(waveform, params) + log_mel_spectrogram, features = features_lib.waveform_to_log_mel_spectrogram_patches( + waveform_padded, params) + predictions, embeddings = yamnet(features, params) + frames_model = Model( + name='yamnet_frames', inputs=waveform, + outputs=[predictions, embeddings, log_mel_spectrogram]) return frames_model def class_names(class_map_csv): """Read the class name definition file and return a list of strings.""" + if tf.is_tensor(class_map_csv): + class_map_csv = class_map_csv.numpy() with open(class_map_csv) as csv_file: reader = csv.reader(csv_file) next(reader) # Skip header diff --git a/research/audioset/yamnet/yamnet_test.py b/research/audioset/yamnet/yamnet_test.py index c3f64859949ce4bc7cc83529334a9e29da0d0124..d0d16da8082217a495f8eeb7e68d56956dd09058 100644 --- a/research/audioset/yamnet/yamnet_test.py +++ b/research/audioset/yamnet/yamnet_test.py @@ -23,46 +23,46 @@ import yamnet class YAMNetTest(tf.test.TestCase): - _yamnet_graph = None + _params = None _yamnet = None _yamnet_classes = None @classmethod def setUpClass(cls): - super(YAMNetTest, cls).setUpClass() - cls._yamnet_graph = tf.Graph() - with cls._yamnet_graph.as_default(): - cls._yamnet = yamnet.yamnet_frames_model(params) - cls._yamnet.load_weights('yamnet.h5') - cls._yamnet_classes = yamnet.class_names('yamnet_class_map.csv') + super().setUpClass() + cls._params = params.Params() + cls._yamnet = yamnet.yamnet_frames_model(cls._params) + cls._yamnet.load_weights('yamnet.h5') + cls._yamnet_classes = yamnet.class_names('yamnet_class_map.csv') def clip_test(self, waveform, expected_class_name, top_n=10): """Run the model on the waveform, check that expected class is in top-n.""" - with YAMNetTest._yamnet_graph.as_default(): - prediction = np.mean(YAMNetTest._yamnet.predict( - np.reshape(waveform, [1, -1]), steps=1)[0], axis=0) - top_n_class_names = YAMNetTest._yamnet_classes[ - np.argsort(prediction)[-top_n:]] - self.assertIn(expected_class_name, top_n_class_names) + predictions, embeddings, log_mel_spectrogram = YAMNetTest._yamnet(waveform) + clip_predictions = np.mean(predictions, axis=0) + top_n_indices = np.argsort(clip_predictions)[-top_n:] + top_n_scores = clip_predictions[top_n_indices] + top_n_class_names = YAMNetTest._yamnet_classes[top_n_indices] + top_n_predictions = list(zip(top_n_class_names, top_n_scores)) + self.assertIn(expected_class_name, top_n_class_names, + 'Did not find expected class {} in top {} predictions: {}'.format( + expected_class_name, top_n, top_n_predictions)) def testZeros(self): self.clip_test( - waveform=np.zeros((1, int(3 * params.SAMPLE_RATE))), + waveform=np.zeros((int(3 * YAMNetTest._params.sample_rate),)), expected_class_name='Silence') def testRandom(self): np.random.seed(51773) # Ensure repeatability. self.clip_test( waveform=np.random.uniform(-1.0, +1.0, - (1, int(3 * params.SAMPLE_RATE))), + (int(3 * YAMNetTest._params.sample_rate),)), expected_class_name='White noise') def testSine(self): self.clip_test( - waveform=np.reshape( - np.sin(2 * np.pi * 440 * np.linspace( - 0, 3, int(3 *params.SAMPLE_RATE))), - [1, -1]), + waveform=np.sin(2 * np.pi * 440 * + np.arange(0, 3, 1 / YAMNetTest._params.sample_rate)), expected_class_name='Sine wave') diff --git a/research/audioset/yamnet/yamnet_visualization.ipynb b/research/audioset/yamnet/yamnet_visualization.ipynb index 49e2186f2c7df022903f0c74e3937947663dabea..db08acfbc98e13ecb44995a4c95871885117a4de 100644 --- a/research/audioset/yamnet/yamnet_visualization.ipynb +++ b/research/audioset/yamnet/yamnet_visualization.ipynb @@ -1,198 +1,274 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright 2019 The TensorFlow Authors All Rights Reserved.\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# http://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License.\n", - "# ==============================================================================" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Visualization of the YAMNet audio event classification model.\n", - "# See https://github.com/tensorflow/models/tree/master/research/audioset/yamnet/" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Imports.\n", - "import numpy as np\n", - "import soundfile as sf\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import params\n", - "import yamnet as yamnet_model\n", - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sample rate = 16000\n" - ] + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "colab": { + "name": "yamnet_visualization.ipynb", + "provenance": [] } - ], - "source": [ - "# Read in the audio.\n", - "# You can get this example waveform via:\n", - "# curl -O https://storage.googleapis.com/audioset/speech_whistling2.wav\n", - "\n", - "wav_file_name = 'speech_whistling2.wav'\n", - "\n", - "wav_data, sr = sf.read(wav_file_name, dtype=np.int16)\n", - "waveform = wav_data / 32768.0\n", - "# The graph is designed for a sampling rate of 16 kHz, but higher rates \n", - "# should work too.\n", - "params.SAMPLE_RATE = sr\n", - "print(\"Sample rate =\", params.SAMPLE_RATE)" - ] }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:From /Users/dpwe/google/vggish/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1635: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n", - "Instructions for updating:\n", - "If using Keras pass *_constraint arguments to layers.\n" - ] - } - ], - "source": [ - "# Set up the YAMNet model.\n", - "class_names = yamnet_model.class_names('yamnet_class_map.csv')\n", - "params.PATCH_HOP_SECONDS = 0.1 # 10 Hz scores frame rate.\n", - "graph = tf.Graph()\n", - "with graph.as_default():\n", - " yamnet = yamnet_model.yamnet_frames_model(params)\n", - " yamnet.load_weights('yamnet.h5')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "metadata": { + "id": "xcZmKVHusxQT" + }, + "source": [ + "# Copyright 2019 The TensorFlow Authors All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ], + "execution_count": 1, + "outputs": [] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:When passing input data as arrays, do not specify `steps_per_epoch`/`steps` argument. Please use `batch_size` instead.\n" - ] - } - ], - "source": [ - "# Run the model.\n", - "with graph.as_default():\n", - " scores, spectrogram = yamnet.predict(np.reshape(waveform, [1, -1]), steps=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "metadata": { + "id": "0sDpjNbksxQa" + }, + "source": [ + "# Visualization of the YAMNet audio event classification model.\n", + "# See https://github.com/tensorflow/models/tree/master/research/audioset/yamnet/\n", + "#\n", + "# This notebook can be run in Google Colab at https://colab.research.google.com\n", + "# by either downloading this ipynb and uploading it, or by looking up the\n", + "# notebook directly on GitHub in Colab's \"Open notebook\" dialog." + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "hnI3jFRHs-N7", + "outputId": "24b5696f-e4cb-4d49-bddc-40ab3ef211b9", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "# Install required packages.\n", + "!pip install soundfile\n", + "!git clone https://github.com/tensorflow/models.git\n", + "%cd models/research/audioset/yamnet\n", + "\n", + "# Download YAMNet data\n", + "!curl -O https://storage.googleapis.com/audioset/yamnet.h5\n", + "\n", + "# Download audio for testing\n", + "!curl -O https://storage.googleapis.com/audioset/speech_whistling2.wav\n", + "\n", + "!ls -l" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting soundfile\n", + " Downloading https://files.pythonhosted.org/packages/eb/f2/3cbbbf3b96fb9fa91582c438b574cff3f45b29c772f94c400e2c99ef5db9/SoundFile-0.10.3.post1-py2.py3-none-any.whl\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.6/dist-packages (from soundfile) (1.14.3)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi>=1.0->soundfile) (2.20)\n", + "Installing collected packages: soundfile\n", + "Successfully installed soundfile-0.10.3.post1\n", + "Cloning into 'models'...\n", + "remote: Enumerating objects: 67, done.\u001b[K\n", + "remote: Counting objects: 100% (67/67), done.\u001b[K\n", + "remote: Compressing objects: 100% (65/65), done.\u001b[K\n", + "remote: Total 46144 (delta 26), reused 43 (delta 2), pack-reused 46077\u001b[K\n", + "Receiving objects: 100% (46144/46144), 551.17 MiB | 32.01 MiB/s, done.\n", + "Resolving deltas: 100% (31621/31621), done.\n", + "/content/models/research/audioset/yamnet\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 14.5M 100 14.5M 0 0 17.0M 0 --:--:-- --:--:-- --:--:-- 17.0M\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 153k 100 153k 0 0 1314k 0 --:--:-- --:--:-- --:--:-- 1314k\n", + "total 15296\n", + "-rw-r--r-- 1 root root 7816 Oct 22 17:31 export.py\n", + "-rw-r--r-- 1 root root 7490 Oct 22 17:31 features.py\n", + "-rw-r--r-- 1 root root 2307 Oct 22 17:31 inference.py\n", + "-rw-r--r-- 1 root root 1847 Oct 22 17:31 params.py\n", + "-rw-r--r-- 1 root root 5012 Oct 22 17:31 README.md\n", + "-rw-r--r-- 1 root root 157484 Oct 22 17:31 speech_whistling2.wav\n", + "-rw-r--r-- 1 root root 14096 Oct 22 17:31 yamnet_class_map.csv\n", + "-rw-r--r-- 1 root root 15296092 Oct 22 17:31 yamnet.h5\n", + "-rw-r--r-- 1 root root 5549 Oct 22 17:31 yamnet.py\n", + "-rw-r--r-- 1 root root 2564 Oct 22 17:31 yamnet_test.py\n", + "-rw-r--r-- 1 root root 140923 Oct 22 17:31 yamnet_visualization.ipynb\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "M0woGtbhsxQg" + }, + "source": [ + "# Imports.\n", + "import numpy as np\n", + "import soundfile as sf\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import params as yamnet_params\n", + "import yamnet as yamnet_model\n", + "import tensorflow as tf" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "jt2v3i94sxQl" + }, + "source": [ + "# Read in the audio.\n", + "wav_file_name = 'speech_whistling2.wav'\n", + "wav_data, sr = sf.read(wav_file_name, dtype=np.int16)\n", + "waveform = wav_data / 32768.0" + ], + "execution_count": 5, + "outputs": [] + }, { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAArgAAAHSCAYAAAAHR7iOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdd5wcdd3A8c93Ztv1VNIhgYQSCBAIvUqHKIgFwQKoyOOjqDw8iqGKSLWL5REEUVARBAsYivQeILSQAqT3cmnXt838nj9mbrN3ubK7t+3uvu/X65Ldqb/9zc7sd37zK2KMQSmllFJKqYHCKnUClFJKKaWUyicNcJVSSiml1ICiAa5SSimllBpQNMBVSimllFIDiga4SimllFJqQNEAVymllFJKDSiBUidAlY8RI0aYiRMnljoZSimllFK9evPNNzcbY0Z2NU8DXJUyceJE5s6dW+pkKKWUUkr1SkRWdjdPqygopZRSSqkBRQNcpZRSSik1oGiAq5RSSimlBhQNcJXKQkssycRZs3ngjdWlTopSSimluqEBrlJZ2NAYBeC3zy8tcUqUUkop1R0NcJVSSiml1ICiAW6ZE5HTROQDEVkiIrO6mH+hiNSLyDv+30Vp8y4QkcX+3wXFTblSSimlVGloP7hlTERs4NfAycAa4A0RedgYs7DTovcbYy7ptO4w4HvADMAAb/rrbitC0pVSSimlSkZLcMvbocASY8wyY0wc+CtwVobrngo8aYzZ6ge1TwKnFSidg9ai9Y1cePfrxJJOqZOilFJKKZ8GuOVtHJDeXH+NP62zT4rIPBF5UEQmZLOuiFwsInNFZG59fX2+0j1gGdPx/ayH5vHcB/W8vWp7aRKklFJKqZ1ogNv/PQJMNMbsj1dK+8dsVjbG3GGMmWGMmTFyZJfDOSvgoj/OZcYNT+6YIN5/765pAOB27VVBKaUGBGMMD7yxmpZYstRJUX2gAW55WwtMSHs/3p+WYozZYoyJ+W/vBA7OdF2VuacWbWRzc5xVW1sAWFbf0mH+ss0tXa2mlFKqH3l16RbO//3rXP7QPC74/eulTo7qAw1wy9sbwBQRmSQiIeBc4OH0BURkTNrbM4FF/usngFNEZKiIDAVO8aepPuiuKsLKLa1FTolSSql8O+93c3hx8WYA5q7UNtn9mfaiUMaMMUkRuQQvMLWB3xtjFojI9cBcY8zDwDdF5EwgCWwFLvTX3SoiP8ALkgGuN8ZsLfqHUEoppZQqMg1wy5wx5lHg0U7Trk17fQVwRTfr/h74fUETOMi8sULvEZQaDFZuaeHOF5dz3Zn7YltS6uSoEvnH22s4e/r4UidD5UCrKCiVhc3N8VInQSlVBF//y1vcO2clC9Y1lDopqoT+5/53S50ElSMNcJVSSqluNLQlSp2Eotjnmse55bH3AWiNa+8Bqv/TAFepXvzzbe18QqnBZv7aRgC+cNfgaEnflnD47fNLeXz+eqZe+wTz1gy+vr3//taaUidB5ZEGuEr14tL730m9XrKpuYQpUUoVw+qtg7dXlK/+6S0A5q3JvmqG6xpM59Fw+pE/v7aq1ElQeaQBrlJKKZXmTe0eiljSzXjZR95dx8RZs9n9yke57eklBUxVYXV33B23/wbtg5kGuEoppVSa9Kc2g0FXAdxtTy/OeP1v3Pd26vXPnvowL2kqJ399Q0t2+yMNcJXqwZJNTaVOglKqiAZjad321p17hxksjesyccuj75c6CSoHGuAq1YPP3zk4GpgopTyvLd+y07R9rnm8BCkpnu8+NC+v27vv9f5X4tnTjU1TTHuV6I80wFWqB04/bjChlMpBF6d8W8IpfjqK6KlFm/K6vSv+/l5et1cMC9c1ljoJKs80wFVKKaXUoLZYq6MNODpUr1I9qG+KlToJSqkiWra5pcvpq7e2MmFYZZFTU/7i3fS2MHHWbIZWBtnWmmDOFScyui5S5JRl57IHdMSygUZLcJXqxuvLt5Y6CUqpIrv6n/O7nH7MD58tckpK77VlO9dH7mzPqx/rdt62Vq+h2jurtdu1cvX68q28sWJg/tZpgKtUN/I9qMOTCzeScDLvW1IppUrpM3fMyct2ogm97pWrc25/lU//9tVSJ6MgNMBVqhv5HI/9hQ/r+co9c/n5AOwjUimlenLp/e9wXp6CZVUY0QwbUt47Z2WHfo/LmQa4SnXj509l3tF5b7a2eP1MLqvvun6fUkqVgttLv7+rtnQ/bHE2w/K+mkF1B1Vc6cf+3dXbe13+l08v5pp/zueRd9exsTGamr6+oa0g6esrDXCV6kZLHktw2z02fwMvLq7P+3aVUioXu1/5aI/zo8nuS/ba69hmqi1ent2tZRKor9wy8AonrvzHju7cPnPHnC5vdra1xDn4B0/y2d/N4SdP7ngC+YN/L2T11la+eu+bHHHzM7y0eDMAtzz2Pjc9uqjwic+ABrhKdSNfXeA2tCY6DP3ZfiFQqr97+N11PLlwY1YleeVsS/Pg6jWlux4Q0iWd7o/tj57IboSvfa4tzwEzMhm87ot/eKPwCfG1xJKs3tp9yXm+/PWN1R3eX/fIgp2WeWFxPVta4ryytGMJ/L/nreeYHz7L4ws2APD5u15j4qzZ/Pb5pdzxwrLCJToLGuCWORE5TUQ+EJElIjKri/mXichCEZknIk+LyG5p8xwRecf/e7i4KR/4Mn0ss2IA3vmrwaehLdGhhKehLcE373ubr9wzl7/NXVPClOXPwTc81eP85ABrJNpTDwjtnv+w+ydO972+utt53Yn1UCJcKm+u7L2Xh2JWL9v3e09wzA+f5ebHilsSes+rKzvcrG5pjvGtv77Twxrdu+/1Vby8ZDNzSlg1RQPcMiYiNvBr4HRgKnCeiEzttNjbwAxjzP7Ag8AP0+a1GWMO9P/OLEqiB5F/vr2u1ElQqiga2hIc8P3/cOsT77NkUzN7XPkoB3z/P6n5l+d5qNdy9ci8wXfO3/r4+3kdenevq8uvFDeT+qdAUXrBSb+Juv35ZazZVviS3HTpNy1f+/NbOW/nir+/x+fufI1z75jDs+/nd6S8TGmAW94OBZYYY5YZY+LAX4Gz0hcwxjxrjGk/A+YA44ucxkHr1sezezzXzgCPvLuOibNms701nt9EKVUA7QHO7c8v46SfPo+TyTPdAeh/7h+cgwF0NfTu3+ZmX3pbrjLt3WZbS+Gv1+d26m1i3pqGgu8zXXq93Nfy1Bd8Mat3pNMAt7yNA9KvImv8ad35MpD+zCkiInNFZI6IfLyrFUTkYn+ZufX12vipGIwx3PnScgCWdzNqklLlYmNjlFsey+1mrj8ZKPWIM5Xt5+1cJ/Q7Dw6cUvuWDBu/da6zWghzO1WX6Espal9kUm0jGxNnzebZD4pbkqsB7gAhIp8HZgA/Spu8mzFmBvBZ4Ociskfn9YwxdxhjZhhjZowcObJIqVV5a8GmVAHd/8YqDrvp6VInoyj+/Fr+HsP3B9mWwn/2Tu3H9qdPFqYf80XrG5k4azZH3/pMQbafrcZogk/+3yt53+4X736jqE9/NMAtb2uBCWnvx/vTOhCRk4CrgDONMalmwMaYtf7/y4DngOmFTOxAYYzJa2vq15Z3rGT/5MKNqVa7IpK3/SiVT45r+O5DOz+a7s6f5qwsYGoKr7shegeqbPulXb01f32dPvP+xrxtayA4/RcvArBmW/H6k+0p0Nz/uv90O6+v/l3Eeuwa4Ja3N4ApIjJJRELAuUCH3hBEZDpwO15wuylt+lARCfuvRwBHAQuLlvJ+7IG5q3ttTZ2phONy06MdH++u2NLKe2uLW69KqWzdMDu7y0UxA8RVW1oHXZWCfMulIK2hLbt+b7vz5EINcEutMU/HMlu59sqQCw1wy5gxJglcAjwBLAIeMMYsEJHrRaS9V4QfAdXA3zp1B7YPMFdE3gWeBW4xxmiAm4GXl+SvWxNXf4RVP3X3yyuyXqcYQec/317LsT96lv99YHA2+MqXtTmUFv7Fr8bx1qq+1c/MpXuxwez9DY153+Yvns7fSJ3lSgPcMmeMedQYs6cxZg9jzI3+tGuNMQ/7r08yxozq3B2YMeYVY8w0Y8wB/v93lfJzlKMF6xp45N3Sdvvz8V+/POg6l1d9k3RcJs6azT2vrijYPlpzHMXv1sc/yHNKdvZ/zy0F4O9vry3JuZPJ4Aj9wfMfZt/gp73nmE/8Jv/1M1X3Yon8f+f+8MqKvG+z3GiAqwatmbe9xDfue3un6fmsFiv0vrHFm5rzt0M1oD25cCOTr/I6Srn2XzuPOpQv67bnVhfwt88vzXNKdvbBxqbU60I0hOnND3PsHrDcNLbldhOjXRvm197X9D7YxkB7Eji/SFX0NMBVqpN8Nvsy9H5h2tAQ5ehbn+lyaEbXNWxsjOYxRSpTry7dwsRZs5k4azYNraWpr5auJZbkK/fM7TBt5m0vFmRf5TKISUNbgleWbCaedLn50UXsf90THeav2FLcTvCBVBd//V22jczaHXj9k3lOSWmVui53NIPS2YdL/KQx3z76y5dYuK6Rx+ev7zA6Yr5pgKtUAWVy7fz722tZs62NY374LBsadgSzq7a0ctjNT3PYTU/nXKKmcnfe73Z0i3TA9YVrVZyJv7+1hn2/98RO0xesy3/dPIBfPbukINvN1sX3zOWzd77GH15Zzu0vLKMxmlupo1LdSX8iUGyZjoyWS334cnfGbS/y1T+9xe5XPlqwfWiAqwa9znfwxe66K71lcvroaJ++/RXqm7w6hn+as7LkJQ2DSbnVs7yshwZVxhiMMRzzw2e4+bFFBS0RKbb2kZQ690SiVL58sKF0Ae73Hi5cNaP+pDFamCdkGuCqQe/+tNFp/jZ3Nau6qCqQia6qGGQifRz0f7y9NtWIZlvLjpP+N88t5ZF563Pavsre7V3UJb3/jY4DAcxZtqXDsJal8rMnP+SG2YtYvbWN259fVtASkUz0NcDe1Bjln2+vpSWWWWmtPt1QffFKlr3mvLR4c972/ZcsBhfZ48pHmbdme+8LZqC94KRc7H/dfzIuzc5GIO9bVKofSC81fe6Deuau3May+mbeWpX7BeR/H3iXB756RIdpx//ouay3c+vj7+O4LvFOJ3y5XZQGsp90MWLRdx96j6OnjOSoW57hpH124alFXiv0C46YyF6jawqWlt5G/vnL66vZ3Kk3gZeXbOaoySMKlqaeXPiHN3jhw3oW33g6QTv7MpRD/ZHTpo2ry2j5Xz+7hN2GV/KloyYRyGF/qjS2t8YZUhkqdTKybsC1tL6Zo6cU/9xyXMOZv3qZFbfM7PO2nv+wPg8pyq8pVz2Wl8+WTq8GalBK70Pz8QUbePDNNX0KbgFeX7F1p2kbcmwg9uP/7Bxg/fGVFTQV6FGO2qGnEpqjbvGG0mwPbgHWNRS2BLG3ngI6B7cAn7vztUIlp1cv+D+e/3qnbw1jMh0M5c+vreKmR9/nvtdzH2o3XwMYqMy9sjR//Y33RWvCyWr5Gx9dVKCUFE8hSkvLkQa4alBaX+CgpBBWbW3VOltF8Pm7sgsOb32scPVDk47LO6vz81iy2L79t+wHYljShy7z5q3Jveuh/ng96O++9ue3Sp0EAGZnWfWr3Orn5+LnT+1cgFIOHnsvv9XwNMBVKo/a4k7qR7oQjcK2NGsflOXm/QI1UokmnFSft4PFST99Pud1//bmmpzXXV7fktXyA6khnyqNQjWsysTGxvKs7nbp/fkdxlfr4KpBKdMGLNn6xn1vpR5ff+rg8Xnf/kDr8LvclFMn9g/38RF/rgZjbx0PvbU2q+Vnv7eejx0wtiBpeXFxPf94ay0/OeeAgvXoMhiPcbm5/pGFpU5C2YnluXRcS3DVoFSoDuJfTmuR+2AfSpS68+Lizazckl1pk8rMqi2tOXdiX4jSmMsfmpf3bWZiWx4HtegvdcafWrQxq+Ufn7+hIOlIOi5fuOt1/v72Wj72q5cKln9PL8p+mN5C6K+Bdm8NPzOR6+9Df82zUtAAVw06W1sKV0rXlmWDhVwcl0PPDKp3x/7o2ZzX3f+60g4E0ZW3Vm0rdRJ4MY9dKpWT2XmuK9juhcU7WrfPX9vItAJ9r5ZvLo+b5LV+F28X/fGNflXX/PkPS3eDkI/gupzls/qPBrhq0HktxyEq1cD13AflUaKVT7lWcXh/Q2FGR+vNsvrcG5gNFF/6w9ydpq0qwNOmP766Iu/bzMXdL69g+eYWnlq0iY//+uVSJydjXR2nYlnfMLCHbs9nPVwNcJVSg96Fd79R6iTk3b1zVua03i157BXi0SxKOk/4Se4NzNo9Pr//DoayuJshY4/90bPEkk6X3cHlqlwaq9710nI+8uPnSp2MnJSqoeFzZdiHbT49/G7+2h5ogKtUP3Tmr14qdRJUJ9vyWPUlH48hc91GX7rb6uzlJcWtovDVP2Xe9ZQxhtZ4YRqb5uLkn73Q7by9rn6cGTc8RXOeGscWoyrVQLfXNaXp4eT/nl1Skv0W07/eWZuqvtIXGuCqkmmJJdmSx1KJwWTemgYd2azMXHD360ycNZuJs2Zz76sruPIf7/H68p0H/8hErsNFdzZx1uySNkrZ1ppg4qzZ/LSLkeFKbcpVjzH12idyrpP/k/98wMRZs/Ocqp7t970nUq9/9cxi3lxZ+nrWg1XCMSUpxd04CK773/rrO6lBdfpCA1zVrRWbW3bq1Pr15Vs71GF9c+U2bnp0ERf9cW6HH9LGaIIr//Fet62AF61vZN/vPcHBNzxFWzyz0oTZ89Zz82N9H0VmoNRhOuHHzw2aEWn6g/SSz2v+tYC/vLaKc25/FfBGypo4a3aPAe+HG5t4YO5qAO55dUXe0vXRX77ExFmzSWbwXVmyqTB9+t729GJ++/zSgmy7s4mzZjPtuid6XS7pByd/zrEqxy+fWZLa37o+ljZlG2RPnDWbH//nw15Huetvin3DEO1jSfbuVz6ap5RkznFNn3rx6E+j9vX15lwD3DInIqeJyAciskREZnUxPywi9/vzXxORiWnzrvCnfyAip2az318/u4Tjf/wce179GK8s2cyTCzcycdZszrn9VT5zxxyuf2Qh/563jk/+3yvc8cIynlq0kScWeF3tOK7huw/O4y+vreJ3Lywj6bjc+vj7nHfHHE766fO8uLie03/xYmpf+1z7OG+v2sYdLywllnT8E3g9Nz26iA0NURatb2RbS5yv/+Utbn9+GXtf8xhf+sMb/OudtTmNKnP9vwdG/4NNsST3vprbj7PaoRDduaX79bNLOOD7Xmv4c25/lQfeWL3TMsYYTvnZC1z+4DyMMdz98oq87X/BOq/RWG8jtN329GJO+mn3j8n7Kr1u7+9eWMZGfxjrx+dvyHtg0xRN8q931qZK1Hv6ofxJHkqXj7zlmdS+cilVvT2L4D/XG4VowslLMF5o7fmYyQ1ZX/Vl5Lx2xhieWLCBWDLzYLmvN5Jf/dObOa/7jfve7tO+i2nSFY8ycdbsnBuGi/apVr5ExAY+BE4G1gBvAOcZYxamLfM1YH9jzFdF5FzgbGPMZ0RkKnAfcCgwFngK2NMY0+1ZOGPGDDN37ty8/9iMqg0XdOSUV2adQChgUR0OEAnaO81vr7dWHQ6wvqGNI27u+6OPcjKmLsLZ08cxc/8x7D6imoqQjTGmYJ3EF1MxPkexS40Anvv28UwcUZV6/1/3zk3dIBbD0pvOwLY65msp8mHpTWewRwlKwRbfeDpTCjxK3HmH7sq0cXV8fPpYKkMdx1R6b00DH/Pr0f/14sM59445Oe9n79E1PPrNY7Csns+TUhzfvvrZZw7gxH1GEQ5YvLp0C4dOGrZTXvZFIfLkmCkjuPfLh7F2exujayMdzrNtLXGm/yC3vra78sZVJ/Hu6u28u2Y7mxpjrNneyjdOmMJ3H5pHXUWQhy85eqd1+uP3AOCpy45l5m0vEUu6PP+d49l1WCUigoi8aYyZ0dU6GuCWMRE5ArjOGHOq//4KAGPMzWnLPOEv86qIBIANwEhgVvqy6ct1t7/d99nfXPf7h7n2XwsK9ZFUidVEAjRFd26ostvwSlamdUf0+cN35YMNTQRti+2tCSJBi8WbmlPrjqgOYQxs6fRoddyQig6NA0ZUhzlwwhDeWb2drS0xdh9ZTUXQZuWWFuoqg6zeumPZ/cbVMn9t111URYIWgrDv2Fp2HVbJ0s0tfLChkbOnj2PPUTW0xh2qwwHqm2I4xlARtFnf0Ma7qxs4YMIQltY3M3F4JXNXbmNZfQv7j69j3fZoXlumK1Wuzp4+jn+8nd1obf3F6NoIk3ep5rBJw5i3tiF1UzyiOsSUXWpYt72N3UZUEUs4VIRsEkmXWNJl/rpGHslji31VGitv/agGuP2RiHwKOM0Yc5H//gvAYcaYS9KWme8vs8Z/vxQ4DLgOmGOM+ZM//S7gMWPMg532cTFwMUBo9OSDx1zw84J/LqWUUkqpvuopwM1fWb/ql4wxdwB3ABww/WDzzNUncfANT5U4VbkZWRPmkIlDSTqGYVUhXl++lS0t8X5VqT4fRtdGCActDp80nFG1YSIhm7dWbmPPUTUctvtw7n11BdGEy0tLNnPxsbszc9oYkq4hmnBYtL6R8UMrqKsIARAKCI1tSba3xVmwtpHV21o5bs9dGFMXoTGaoDXuUBmyWbOtjYa2BLsOq6QxmqA5msS2hCmjathzVDUv+SNaja6LIAit8STjhlRQ3xxjzbY2wgGL6bsOYdH6Jhasa2Dd9ijhgEXccfn0wRMYXh3Cdb2SmZpIgIAtbG2OM7w6jMEQtC2GVAQJB202NUZxDbjGkHQMCcdFBCJBm3Xb21i9tZW9RtcSTTic//vXS3iklCq833zuIA6cMIQj89AqvVwct+dIDpwwhGjS4bgpIxERRtdFSPrnelM0SWUoQDhgEU062CKEAzahgEU4YBEOWqze2sapPy9cnXNVehrglre1wIS09+P9aV0ts8avolAHbMlw3Q6CtjC8Osyz3z6+T51vd35M/cJ3PtJhGNSLjp7EnS8t73Ld/z5+D1ZsbuExv5XohUdO5A+vrOhxf8tvPiOrepr9tQ5Sd+6+8BCO2GN4l/WPu3LcniO7nXfU5BHdzjt7etZJS9l7dG1Gyx2827Dcd+KrHlnd7bw9R9V0eL/ilpkl+T6suGVmh/cbG6McdtPTBd/v9z42le8/spD3f3DaTt+XYuTDMVNGpIbvHVIZ5J1rTynKfqeOqWXher+x3eG7csPHpxV8vx/ecDqhQNftuF3XpFrg9/U7eOsnpzFt3BCmjs3sHOtPltx4OgG7MG3h9xpdw5F7DOeVpfkb2fI//3Mse4ys3ql+e7r31jTwyLx13PHCsj7vL/060hpPsnJLK1N2qSbpGkK21WW97P76+7filpk0RhNsaowyeZcd13G5tft1tIpCGfMD1g+BE/GC0zeAzxpjFqQt83VgWlojs08YY84RkX2Bv7CjkdnTwJRMGpmB18djezc40PVF+M7zZ3DRPd7yL3znI+w6vDI17+lFG/nyH+futO4/vnYkB04YwhMLNnZoCXrvlw9l3JAKdveDE2MMS+ubmbxLDS8v2cyaba1896H3Oux/j5FVPHzJ0VSFs7tP+8RvXuatVf1n3POe/PScA/jEQeNLnYx+beG6Rs647cXeF8zR45cew2dun5N6ktDdj3b7ObLilpkcdcszeenoPN0vzj2Qsw4c1+38QucDeJ8t6bisb4gyYZh3vWiOJTv075ov737vlFTvFZ1vKKAwP/RHTR7Ony86POv1Hp+/PuNBKl678sQON0PZ3OC/vWob03cdWvZBzm8/fzCn7Te64PvZ3BxjRh+fWHb13epN+g1OMfcL8MunF+el55Bi6umz9tTITEtwy5gxJikilwBPADbwe2PMAhG5HphrjHkYuAu4V0SWAFuBc/11F4jIA8BCIAl8vafgtrP/PWUvLjt5TxKOweDdBP3wk/tz+UPzuHrmPkwdW8uRe+wo7UsPbgFO3GdUh/fPfft4IkGb0XURAE7ddxSTd6lmyaZmZk4bwzFTOpYqikjqLq29VPGEvUcxvCrUa2vh3kwcXjUgAtyLjp6kwW0eFLrka+/Rtbx9zck8+8Gmnc6LdItvPD01+ti+Y2vzFuCef8Ru3PPqSs6YNqbH5aaOrS1oifbb15wMQMC2UsEteL2b5HO/VSGbaz46lbqKYEZBwI8+tT/feXBezvs7fq+R/OycAxlaFcp5G4fvPjzjZUfVRth7dA3vb/C6msrm6dX0XYdmnbZiuvDIiVx35r5F29+I6nAf18/tmPf1N+zuCw/Jed0vHzOp3wS4uQbx7TTALXPGmEeBRztNuzbtdRT4dDfr3gjcmOu+RYRQYMeJeM4hEzj7oHEEM3xkFLItzjxwLECHLpHat/3zzxzIR3/5EkdP6f6xeLqRNX27GBXLmLpIUQaTuPqjUwu+D5W7v1x0GIdO8qpcWJb0GNwCBG2L9loDZx44lv8s7Hu3YecduivXn7Uf15+1X5+3lasvHL4bP/h48fa/4PrTMlrutvOm87MnP+Ts6eNyCnAfv/QYfvzEB/zu/Bl97spuSGV2gdLjlx7LhoYodRXBPu233BQzuM2HuVefXPR9njFtNB/Ze5ec148EMqvKNhBogKuy0jm4fXnWCbR0Mz76hzee3uO29htXx5wrTmRUbXED16lja/l7AbvMeeQbR/f5sZfqf5bffAZX/3M+Xzp6Env0UA84Ex/dfyyX/KXvHbLf/Ilpfd5GX2Ua3I6ti7CuiKMMnnnAWM48YGzO6+89upY7L8i9JK2zV684odc+ul+edULqdfvTMFUavzj3wFInISd9LT0uhr6W3LbTkcxUn4wbUrFTw51sjK6LFH1Agi8dNalg277kI5MZmmVpjCq9SZ2eMORCRLjx7Gl9Dm7zJddS0wPG1+UtDdncvH7jxCl93t871xa/RC1fxtRVdDn9xL134bUrT+Tf3ziacUO6XkYVX0/12Qvp6MndNxJWHWmAqwadQt7BfvvUvbAt4U9fPoxnv318wfbzzP8eV7BtD0aPX3pMqZOQd5+YntsP8HdO3TtvacjmHMjHk5xsH/WXm+ouGsz+4rzpjKqNsN+4/N14fDMPNxP5cMyUEbx33SmlTkbWPn1w6c8KhToAACAASURBVNo+7DmqPG6gC+Wpy/L326YBrlIFcPSUEQzrQ6OTnnxi+rhUbxMqP8J9rJf2xKXH5ikl+ZNt7yLt8lmvM5thVT+yV+71CgeKv3/tyJ2mdRX09tWuwyp7X6gIzp4+jpqI1xgwX4+li+GHn9q/ZPvef/yQku27GCbvkr/fNg1wlSqQIte8UH3Ul5KkvUbnXk2nO9eUqBFhT314FlKxqyqlO+vA7Orinp1j6XhvpnT6cX/qssLcOB20a3kESf21F5hSfle761tZ7UxzSqkCKdQlMJsuhVTmaiLBsqqq8KWjJpZkv4V68lDOeuvhorMzswyIMyUifPuUPQGvGlJ6h/b5NLyqf/RIM5D99JwDSp2EAU8DXKXyZPzQjg1ACnGXP7QyyKdn9M9Sj/5gchlV/RARLj9tr5zWHd6HILWUrfP3LkBJeCY+tn/PfQR31rmkNZ8uOWEKK26ZWdBqSHWVA6t7sf6oc9/vCs7J82+bBrhK5UnnlviFKMG96expJX08NtDlMizotwrYYOdrx0/Oab37Ls5+NK1y0NMw0r35yjG5946S7Tk1fmh51GFVfbd/lr2G7DGy7z2uQGn7dZ+WxwaL+XTVzPxWy9IAV6kCKUQcqrFt4c3YrfvRnt66ZuduqC4+dvdCJof7cwhW+9J1X76k99maqf89JfsS60MneoNpHDk5swFjlEr3ySzrAZdrcJiNLxyxW6mT0KV8D1yiAz0olSfGdHwvBSjD1ZKjwnvwv4/sctjYf3/jaIZVhVKtvTc2Rlm0vjHn3goydVgvda4fvuQobpi9iNeXbwXgr2VQevvQfx+ZU5+tuTSguf+/Dqct4WTVY4Mqva56jCiF06eN5nsPL8h4+Qkl6oEiYAlvXHVSXrZ16tTRXE7uw1MXwvF75b/KhpbgqkHpl+dNL/g+8l3a+revHpHXvjBVdjrn/ajaCMcXqWurq2fu0+28/cbW8bvzZwBw31cOL3kjxGU3ncHBPZSC96a9RDZTIqLBbT900K65f0fyyvS+SLqLjinsE5uuhAIWS246g6F5agBaW1F+58sfvnho3repAa4alD7WhyE6u1MRys8Y358+eDz3fGnnkz0fo22pzLx7bXl1Pv/lo7uvX2pZQl2F15foEXuUvoeNvg6kcsikMgl81KBQE8nusXg+H6Pf6d+Y9iaQ5677yq0dx4c3nF6Q7WqAq1SavnTgfcsnpvV5/7vUhPnRpw/g2LTGNrWR8rvbHujKrZW5iLD85jN2mh7KoVFcufvWiXvyk08fwILvn1rqpKgufP0je5Q6CXmVr4KJXBw5ObMb0sMmZfdUoz+45RPT2Ht0DctvPqNgffsOvKujUjk668CxfRqCcXh17q1iT99vNAAn7L3jkfcfvngIP/zk/vzPyV6/mDUa6BZVe3+kI2vCvHl1fuq+9YWIVwfvof8+IjVtwfUDLwgMBSw+efD4gtdtVrnJ51DOg12mVWvOmTGhwCkprqAtnHvorjx+6bEFLU3WK4hSvukThuT1ZMtmRKjqcIBXZp3QoeuY9PqdXzwq9y6QVG4uOWEKl5xQuC7AcjGyJszImnDBhzX9r+N25/bnlxV0H5n42vF78JvnlpY6GR383+cOKnUS8qIyZNMad3Ja95Spo/jPwo15TpHqTl+r/ZSbFy7/SFH2oyW4SvmOy3ODoaBt8c0s+kgdO6SC4AB85Kz6n0+VyRCq3z5lL9665mSuP2vfLuf/+xtHFzlFsM+Y2qLvsxD60ufwbwZIkF8OXsmgO70jy6BufT6Nqcu+h5Vc6K+pUsDuI6sK0ogr0xGPLizRsKxKdSXXJxn/fXx+62daljCsKsTnDuu6385S9CpSqm6i8u28Q3fNed1cBkRJd9I+xel9pD8Ym0F3etk2hMvE2dPH5X2b5UYD3DIlIsNE5EkRWez/v1PTYhE5UEReFZEFIjJPRD6TNu8PIrJcRN7x/w4s7idQmfrmiVPYd6x2/6XKR66jNRVqVDfbEu6+8JAO0849JL/1Emednlnd0myqHpWzyTkMN9xdSXq2tLvD0juzAD0JZeLuLx7S+0J5ogFu+ZoFPG2MmQI87b/vrBU43xizL3Aa8HMRGZI2/zvGmAP9v3cKn+T+K/0n65sn5DY8ajb2Hl36kaaU6k6uJbiRYOFapH8krQFmTTjABUdOzOv2B1pDnt5kUnLY2flHTMzLvncfmX1wXQ5mThtT6iTkzW7DS/Mkopg9QmiAW77OAv7ov/4j8PHOCxhjPjTGLPZfrwM2AfkfDmSQueyUvTLqomjfsbnXxbv2o1O59CS/tKvzEGhKqW6FbIv3vn9q3uvCDoxy2cK5/LSOwyjPueLEnLfVl4FASml4dX4GWugs308jMtFTlbxClu4Wc1AWDXDL1yhjzHr/9QZgVE8Li8ihQAhIb3J8o1914Wci0mUfViJysYjMFZG59fX1eUl4f/GXrxzGD7p55JZJF0WHTepDxX/xOu//1MHjuejY4o+Mo1Rvrjwju+6gRtXm3k1ephZdfxrzriuvQTgGi+kTOgalo+siOW8rl2GcC+kjGQ4T+5kCBaI3nd33PtSz1dNTmtuKMNJnMWiAW0Ii8pSIzO/i76z05Ywxhh4GFBSRMcC9wBeNMa4/+Qpgb+AQYBjw3a7WNcbcYYyZYYyZMXLk4Cr8PXKPEalhTXN5JFsR6tvpUxMJ8uNPH0BtARoQKNVXFx+7BytumclVZ3Q/THC6H5y1X4FT5HXKX6hqEPkcoaq/+Oj+mT1yP3z3YRy++8AbbKBdpj1KFKqthGUJK26Z2WX3f7/6bGmCzXw/IXnqsuMK3r1hZxrglpAx5iRjzH5d/P0L2OgHru0B7KautiEitcBs4CpjzJy0ba83nhhwN5D/gZ6VUgPeVzJ8wnDy1B4fMpW9TPoarSrhqFeFcEWGNy9/vfiIvPURPr8MR6gbVdt7afSFea7z3Z3Ojf/aC2GK5Ut+n+uPfjN/XfAdOGFITo0a+0oD3PL1MHCB//oC4F+dFxCREPAP4B5jzIOd5rUHx4JXf3d+QVPbT2ntV6V69/Y1J/e6TLmNb5+L3kqYMg0I+4tMqgp0rnvbF/913O5Ul+EIdafuO7rXZb58dHEG2+ncfdfQysLU++3Od0/3jnc+z+d/fv2ovG0rGxrglq9bgJNFZDFwkv8eEZkhInf6y5wDHAtc2EV3YH8WkfeA94ARwA3FTX7/sOuwSiYMq+Daj07Net1DJg7cR3ZKpRtateNH9qBdh+w0f0QfhqnuTz53WO59x5arP190WI/zJ+exx4PvnJK/YDmfMim9Tx9lspC+lDZq5RWn713UbuleveIEwoEdTyke+9Yxfdre5aftxfs/OK2vycqZBrhlyhizxRhzojFmil+VYas/fa4x5iL/9Z+MMcG0rsBS3YEZY04wxkzzqzx83hjTXMrPU64iQZsXLz+BY7uog3XMlBE9rtuXOkqibbZVP9NeR/CSTt3oRYIWT112bIlSVVwDoZS6s6Mm93ydO76HER6/ckx2pZp9HSCilArZBV66ipDNm1efxMxpY/iv4/I7cEpnd10wo8P7ziOM7TOmtsenGvuP775O8tnTx/HVY/coWr51pfyeFShVJk7aZxQvLt7c7fxM7qyrI3qKqYHloF2HErItLj9tLybvUt1jAKT6v1Cg+6D0nBkT+N2Ly3vdxndP25u9x2jf35kaXh3m10UYDnm34X0bvfPhS47mZ09+yC+eXgzAGdNG85vPHZyPpOWF/voqlaNMHssev+dIhlQG2d6aKEKKlCq8IZUhPrzx9FInQ5WBKaMyC1pPnjqqJI2MVM9GZNiv74pbZhJPurQlHA74/n9S08Abnnv+2gaumrlP2Q3g0X+fFyjVD4jIoBshSan+7L6vHF7qJAwoj33rGA1uy9SQtAZsnasrdBYKWNRVBHnyf47tMMhHJGhz14WHlF1wC1qCq1TBDbxae0oNPmP7MLBBf3Xdx3pvfLv/+DrmrWnYafq73zuFpOMyfJA0QOzvTtwns27+Mi21LwdagqtUN4ZU5qnj9y4i3Bqtm6tUWequHdk9Xx58XYlfeFTvjcge+K8jupxeVxHsV8HtlEFayvzM/x7HU5cdV+pkFIQGuEp1Y+a0MZySh87rO/eYcN6hu7LfuMKMiKOU6pvKLgZzOPOAsUzepf+UXBVTKVvJ59Md5/f8iH6g2n1k9YCtQqIBrlLdCNhWXi56nUuETtm3f4/4pNRAZnVRhHvzJ6aVICWqmCaN6L5HgSJ2RavySANcpQpMr41K9R9dlWZVleHoW/n0vS7q2s7cf0zO2/vOqeU5oEOuPrxBew3pjzTAVarABmDf8EoNWJGgXdTRo8rBFw7fbadp++dYjerDG07n6x+Z3PuC/Uh/HqBiMNOjplSBdfXIUylVvgZqo5vudBXAXXDkxIzX/+JR3rJXnrF3jwNDKFVMA/u5i1J5dtI+u/DUok1ZraPhrVL9S3p9zHsHWe8Jd3zhYE7Zd3RW61z70alceOTEPo+MVWqfPGg8D721ptTJUHmit1pK9WLckB3jc08dm/1ju5Ondvyx0IBXKVVuHvzqEVSHAxyxx/Cs1xWRfh/cAlx/1r6lToLKIw1wlerFy7NO6NP608bXce4hOpqZUv3RxAEQuGVixsRhzP/+qdRE8tT/dz800BsTDjZ6NJXKgpa+KjU4/ONrRzJuSAW71A6+EcyUGgg0wFVKKaU6mb7r0FInQZWBWafvXeokqBxpFQWllFJKqS589bg9Sp0ElSMNcJUqgk8cNL7USVBKKdWL939wWqmToPJEA9wyJSLDRORJEVns/9/l8zIRcUTkHf/v4bTpk0TkNRFZIiL3i0ioeKlXnR06aRjHTBkBeC2OlVJKlZ9I0E4FuecfsfMAGKr/0AC3fM0CnjbGTAGe9t93pc0Yc6D/d2ba9FuBnxljJgPbgC8XNrmqN+2tsesqBm8rZaWUKneRoM2KW2Zy/Vn7lTopqg80wC1fZwF/9F//Efh4piuKV0R4AvBgLuurwrhq5j78/sIZHDhhSKmTopRSSg1oGuCWr1HGmPX+6w3AqG6Wi4jIXBGZIyLtQexwYLsxJum/XwOM62plEbnYX39ufX193hI/UFX3oZ/ESNDmhL27O4xKKaWUyhcNcEtIRJ4Skfld/J2VvpwxxgCmm83sZoyZAXwW+LmIZNXk0xhzhzFmhjFmxsiRI3P7IIPIyVO9AHWXmjAAe46qBjqOdqaUUkqp0tJ+cEvIGHNSd/NEZKOIjDHGrBeRMcCmbrax1v9/mYg8B0wHHgKGiEjAL8UdD6zN+wcYhNrbh0WCNgAPX3I0H//1y9zzpcE1Xr1SSilVzrQEt3w9DFzgv74A+FfnBURkqIiE/dcjgKOAhX6J77PAp3paX/VdJGjz+KXH6mhHSimlVBnRALd83QKcLCKLgZP894jIDBG5019mH2CuiLyLF9DeYoxZ6M/7LnCZiCzBq5N7V1FTr5RSSilVIlpFoUwZY7YAJ3YxfS5wkf/6FWBaN+svA/S5eZ789eLD2dgYLXUylFJKKZUBDXCVysDhuw8HYOWWlhKnRCmllFK90QBXqSxMGFrJ5w7blfOPmFjqpCillFKqGxrgKpUFyxJuPLvLWiFKKaWUKhPayEwppZRSSg0oGuAqpZRSSqkBRQNcpZRSSik1oGiAq5RSSimlBhTxBr1SCkSkCfig1OkoUyOAzaVORJnSvOme5k33NG+6p3nTPc2b7g3GvNnNGDOyqxnai4JK94ExZkapE1GORGSu5k3XNG+6p3nTPc2b7mnedE/zpnuaNx1pFQWllFJKKTWgaICrlFJKKaUGFA1wVbo7Sp2AMqZ50z3Nm+5p3nRP86Z7mjfd07zpnuZNGm1kppRSSimlBhQtwVVKKaWUUgOKBrgKABE5TUQ+EJElIjKr1OkpFBH5vYhsEpH5adOGiciTIrLY/3+oP11E5DY/T+aJyEFp61zgL79YRC5Im36wiLznr3ObiEhxP2FuRGSCiDwrIgtFZIGIfMufrnkjEhGR10XkXT9vvu9PnyQir/mf534RCfnTw/77Jf78iWnbusKf/oGInJo2vV+ffyJii8jbIvJv/73mDSAiK/zv/DsiMtefNujPKQARGSIiD4rI+yKySESO0LwBEdnL/760/zWKyKWaNzkwxujfIP8DbGApsDsQAt4FppY6XQX6rMcCBwHz06b9EJjlv54F3Oq/PgN4DBDgcOA1f/owYJn//1D/9VB/3uv+suKve3qpP3OG+TIGOMh/XQN8CEzVvDH46a32XweB1/zP8QBwrj/9t8B/+6+/BvzWf30ucL//eqp/boWBSf45Zw+E8w+4DPgL8G//veaN97lWACM6TRv055Sf9j8CF/mvQ8AQzZud8sgGNgC7ad5k/6cluArgUGCJMWaZMSYO/BU4q8RpKghjzAvA1k6Tz8K72OL///G06fcYzxxgiIiMAU4FnjTGbDXGbAOeBE7z59UaY+YY7ypyT9q2ypoxZr0x5i3/dROwCBiH5g3+Z2z23wb9PwOcADzoT++cN+159iBwol9CchbwV2NMzBizHFiCd+716/NPRMYDM4E7/feC5k1PBv05JSJ1eIUNdwEYY+LGmO1o3nR2IrDUGLMSzZusaYCrwAtkVqe9X+NPGyxGGWPW+683AKP8193lS0/T13QxvV/xHxtPxyup1Lwh9Qj+HWAT3g/FUmC7MSbpL5L+eVJ54M9vAIaTfZ71Fz8HLgdc//1wNG/aGeA/IvKmiFzsT9Nzyiulrwfu9qu23CkiVWjedHYucJ//WvMmSxrgKpXGv6MdtF2LiEg18BBwqTGmMX3eYM4bY4xjjDkQGI9Xqrh3iZNUFkTko8AmY8ybpU5LmTraGHMQcDrwdRE5Nn3mID6nAnhVxf7PGDMdaMF77J4yiPMGAL/e+pnA3zrPG+x5kykNcBXAWmBC2vvx/rTBYqP/2Ab//03+9O7ypafp47uY3i+ISBAvuP2zMebv/mTNmzT+Y9RngSPwHgW2D3ee/nlSeeDPrwO2kH2e9QdHAWeKyAq86gMnAL9A8wYAY8xa//9NwD/wbo70nPJKDdcYY17z3z+IF/Bq3uxwOvCWMWaj/17zJksa4CqAN4Ap4rV8DuE9Fnm4xGkqpoeB9hamFwD/Spt+vt9K9XCgwX9E9ARwiogM9VuyngI84c9rFJHD/XqF56dtq6z56b0LWGSM+WnaLM0bkZEiMsR/XQGcjFdH+VngU/5infOmPc8+BTzjl7g8DJwrXk8Ck4ApeI09+u35Z4y5whgz3hgzES/dzxhjPofmDSJSJSI17a/xzoX56DmFMWYDsFpE9vInnQgsRPMm3XnsqJ4AmjfZ66rlmf4Nvj+8lpgf4tUtvKrU6Sng57wPWA8k8EoRvoxXB/BpYDHwFDDMX1aAX/t58h4wI207X8JrCLME+GLa9Bl4P2JLgV/hD6ZS7n/A0XiPvOYB7/h/Z2jeGID9gbf9vJkPXOtP3x0vCFuC9xgx7E+P+O+X+PN3T9vWVf7n/4C0lssD4fwDjmdHLwqDPm/8PHjX/1vQnnY9p1JpPxCY659X/8Rr6a9546W9Cu/JRl3aNM2bLP90JDOllFJKKTWgaBUFpZRSSik1oGiAq5RSSimlBhQNcJVSSiml1ICiAa5SSimllBpQNMBVSimllFIDiga4SimllFJqQNEAVymllFJKDSga4CqllFJKqQFFA1yllFJKKTWgBEqdAFU+QlbEhGtHeAO2+qyE671wXbAscF1MwEaSjjddBIzp9D/e4IE+Y3lvxDUYSxADuGbHMsZ4227nuhjb8pYDjHjrem/8fywLI+3bdUEEI5L22k9b2jpuyEql2Q2AG/K23b5MqCpBdSBGWJIExCEsSVw/kRExRI0QEUPMQMwEqZQ4jj+/yY3Q5oRIGBtjIJbodGq54t1OumAFXNyEBY7syCYDJuD9Lw6I6/9v6HA8MF5eiJuWx277wfLXS7r+5xJ/BX9BS0hGBDfkb9sBK2FS+SyOQRzj56v/2pYOx8FfEnHcDsc4tQ92LOul3ez4XvjHFpHUd6nzegDGtsAY7/tiW96+UnmQ9l1r3wad8siSjvtMW9X733jLtH+nDbhhm0SVt7wJ4iU+7buBZbxjmNqH/yVvz3u3m33hHRMrSWpZcb1jaCXb89vbnRuwcCq87TghvO+LGLBM6qOIfzBcV7CsHTsRAceRHV9546VJkl4araR/vJPeOpJ0/Tzw3xtIVNk4Fd76gXASyzIELQdjwMVCMCQcG9f/ItiWi2sEWwyuEVwjmB1fEi8NjoAjWEmwEv53N3UcwIomcMNBnIjgVLuEg0ksST+YYIvBAEnXwvL31b6Ma4Ska+G6lre/pIWVoMM5Yce873b7dSl984kqwY0Y7ICLJW4q/bblbSDo/59wvWtHwrFT+Q/e/sQFK95+nL1jKgkHY9v+8d7xPW+/LrlBwYl409yQSR1ry/L+nLTvU/vX2XW97yr+sbUS/rFPguWA+Psm/XqJf/31zwc3ZJGoAoKufxq6/uc1JB0L23IxCELHY+C4HeeJQCJpp9Y3RgjYLq7rrRm0XeJJO/UdtcXFcS0c10LE4LrS4fQ0RpCY95m865+XXiuedu6L96USx/996HR98jeU+qwdfmtI2wZ+HhoDtoUT8Y5TMgImaMA2HX/OnPbrp38N8L/Xdsw/5gmDtI8I61+fd+zT/9//fTO2pI6hGEOixsYJgwS9fEw/vwFs2+A4aRnlChL3zic7bnZcU/z9GxHE7PidNfjpSb8eGsACN2h5nzliEDGY9n0GXO+9/52zLO94tc8Xy5uXzrZd7/tJ+6XZYInBaZ+WtNKuv35eOuIdD8sgjnedEv/zWEn/czmdrpMJP+6wrdTvRFPr+s3GmJF0QQNclVJh13DAMd/ESuz49obXNYMtWE1tuNURpC2OM7wae0uzt0AwAIkkhIIQi3vvnfYgxr+AV4UBsNoSuJEAknSRtjjYfsCZdDBVEdrPGmmN4dZWeBdrvIuC1Zbw5jsuknRwaysxQe/CZLXGMUEbE7SxGtswwYD3Puz/wDgGXJfWCVVeemyhbYRF027+jwsgjrDrwWs5YsRyJoXrGRloZEpwM00mCMBewSRLEjaTgw7LExZLEyM5KLyOJuOdQs+07M27TROoj1YTcwKs2Di8Q94mW4JI2MHELSqHttFaX0WgwU6dyBhIDHUhKYQahECLEN5usBI7TnZjgR0zBNtcAq0ujh+w21HvpHfDFoEWh+C2NoxlQcCCpItb4aXRDdlsmRqhZYIhtF0INRqq1zmpoCe0PYHd4uUlSRerJYpbFUEcxz8ONtji/fA0tHrHuv2Gov0GxSL1XoxB4knv++HPl2jMOz6VYe87EArS4WopglMTRpIuVlsCpyqE3RL3tgHesv53zlSEvG3AjhsuYzCVESQax4SDfposSDo7vl+JJCYURFqjmErve9c2aSgbZ4QAiI5xMCEXCTtgBBOzsGsSOM3BVDLtqiSuI5i4d6NitdqpeSZgsKIWiEFcwW4TIlsg0OLtP9hmCLa6ROrj2E1RjG0jrkt8RCVb9/HOleZdDU6Viwm4WJVJQhHv89u29wsQjQaJRLwIx7ZcLDE0NVdgB7x8cBwLpzVAsD5IoEWIbDaEGwyRrd52wluiOBVB7Na498NiDPUz6th6gLf94ZO2URuJMrKiGdcIrckQAXFZ21RHPOl91ppIjLZEgJpwnJZ4iHjSpi26I49cx8ZpChLYbhPZLFSvdQm2uanriyQNle9vJDp5F7ZPDrH9mCiTx9QTDiRT2wiIQ2UgQdJYbI9VUBmI05IIUxHwPnvUCVDfUkVTSwQnYcPmMJF6i0CLf95VwpAlDuEGB0ka3KBgpwVNGw6P0LpPlLohrf7n8dI/vNLbwIiI9399WzUihrUNdQC0NnvHyWwLEWi2qF4FwVao2JIk2JgkuKEBd4h3vbFa45iA/0MfsHDDAaIjw2yZ6p2XrbslkYokYhkqquJUR2I0tkZSeRAOJr28bQl7gUJSkDabynXeNiNbDZGtLqGGJKGGOG7Awm5LpNZ3KoJYCQc3aNMyPsLGwwTGRgkGHSrC3vlTVxFlc3MVdRVRHP+mpf1mAqAxGqY2EsMxQtBysS2X9dtqqaqIefns2IyobqEpFsZxhTE1TazcNpSq9u2Ho2xtq6ShOUIkkqC1JYIdcFKnfjIeILgqTNUaCDUbQs0uri1UrotiJb3vpBuwsOJJrIZW3JpK7/pUW+FdYwAsC4klUr9HbjiIuG7qGuF9ofzzNOkg8QTukGqa9qgBYOveNm3jk9i1CQLBJIl4ANt2STR51wW7KokTtSFuEWiwqVsMgZihckMC2y8IstqSWLHkjn22B56V3vfFqQ6Da7CSLhJPsv7YoTTu6RDcpc1LXsBBBNpavH3W1rbR1FyRuitz2gJULAsR3gZ1KxLYUdf/XfD274Qt7JhLstLGjrkYSwg0x5GEixvxvm9W0sUN2bSODrN1qk10SpRwRYKYv8+6oS1EQgniSZvWaJhIKEFrNETcnx+sjOM6dioYN0BtdRstbWH/WNpUVMWpCCVobIlgXCG5NZIKxq2Y4FS5BBpsTMC7zgUbbOxWIeBlA5HN3o1wZLvjXSebY5iAhb1xu7fPmkqcmgjiuDw59/sr6YYGuGqHYJCtewWJbDFUbPV+KJun1FGxybuIIYKpDHsX7Arvy47llbA5VSGsoJ26w3IrQ6kAVeJJJJbERIJYbQnvYh8MIM2t3jZsG+OXxEnSReIJcHdcuKy2GCZgewGNazDBAG4kgNXqX0QSSUyll57k8KpUiVx7yXGwMYYkvR8ASXgn99Z9wpiJrakSiGQ0SFUwTlAcqqwYw+1mQuJi+xeoVtdhrO0ANuMDSdY6ccYHKgiKd8F8J9pMlR1ncbQCxwhDaltpag2TiPsXlYqk/+PklTYRcknWQnCbH6Q7IHHBx+sy2AAAIABJREFUjgnBZiHQ7JXMhJpcLD92c0KCuBCrsUlUWBgbAlGD8S/aRgRJGuyKIMa2sOIOBCysmJ+PcQesiBfUt5dwOKR+7MXxLrrSGsNUhv38djGhHQGyJF3sZu/7YII2Ejd+aat/9YomIBjAVIaQ1oR3oxMM7LjQR8I7Sl9DwR3H0fF/xGoiuAELsS0v2K0MYrfEdwSw8YT3edt/qIzxbo5aoqnti+NAPIG4Lm5tpVdaGdgRgBJPINGYd4NlCU5NBS1jghh/ESsmmCFewGEcwRjbK6EIpYpLsCwXEcEB6upaicaDRLd7QYldlcCN29AcwG4R3KAh0AbBVi8PAlFDsNHBijt+SaqFUxUiOixAbIi3i2RdkmBtHNcIlZUxjBGCtoPtl4hVhuMYI9iWS0NLBRXhOMFQklDQO9auEVqSFslqF0laJCuFQBTidQH/WHs/Rm4w4j3ZMH7J31DvnKoOx3Bci42tNdSEYoSsJNXBGNRAyPb2sS1WScByqQ7FqA7F2NhcQ3MyQmWV9/1oaQxgN1tU1AvBJoPlQGh7kuBm77yPj6oiMXaYVzLjeMF7SyJETcg7llEnyLZ4JVujwi6VTSRcm22xSra0VDKyuiV1OKPxIBWRBI1NYSRovO+0X1gV2WoINToEt8cI1DcSHz8MN2SlnkYEm8CqD9EcdGmLBYk1h5GAy+Y13oHYOKYxld/rNw4hEE5iXAu31cvH6tU24e2GuqVxL29dSFQHkF1qU+lzgxGMbRFoinklj0mX8PYElRvav5MB3GCA6NgkLS1B4kNsktEgps2b3xZxsAKudzOVtAhstwk2C1Xr2r9PLpGtCay4iyQcLNfdUdIF3jXMGOykS7A5RGh7kOgIG4KOFzwB8WSAeNxmfUsddbWtxBIBKsMJ2uLB9tOMuGPT7AcxgYBDMhGg2c/IRFswVcpriWF9kxc0bve3v7WxEhEIh5MkkzZuwsKJ2Yh/YxhotKhaC5WbXYLNLpENLbSOryZZFSDQ5n8W15CsDRMA3FAAcYJINJm69pigeDfhoQA4Bqs1hgkHMCHvM4jrIlHv++0Mq8Zq8wpC7Lh/49kCbcY7v23bJWEg0RBGKpKpPJBWGxM0qZJGJ+QV5DhB/wY+bpGsCWOHAljN0VQpPu2/hY7rl+5bJIdVULXRpW20Rdz2rh1xQKqTRCrjRFtDbK+vRmI2xvbWt1ot7Dg4FZCosojUR8EY3PbCnoAgrsGOuditCe+JiX+DIM6OGwVJOASirpe+piCJgEv1EO+8TLoWW7ZXe8e1NUg8HMCN2YifBidpe088K5LE4wEwQmNzBTVV3nnb7ERIJi22NFVDU5DgyDZM+7UTcIICQZfkUENwWwA3IRgL3LCBaPtTDQg1GkLbk5iAEB9RiROyCEW8Y2k3Rf3P1KkouROtg6uUUkoppQYULcEdAERkCHAnsB9eedyXgA+A+4GJwArgHGPMtp62YwTsKCDQMtq/I4yDscPYsRBWwqsHE9kcTZXIWW0J3MowknD9+rkWYGFFk16VBUg9qsMYknUVWPEk4ECVd3dvQoHUIzxxvMdGJmzj2t7dnIX3aM+Eg97jcxHcoI2p8UvMLMsreYwmMHUVYAlW3Csh8zMIEwogSQO2kKjxSzyNEAz4Jbhi2NpWybbqSqImSIsbZrUJUGl5pVHLkpU0uRWp93NbdsdmKXG/2G9VfASbYtXUN3iPMmNNYa+eVlN7SSOYCoMVE+LRKgi7Xv1I/xYzUi8EWoVgM4S3ud6jr3VRTNDCbvEeN8aHhAk1xElWB706XEnXq6IZS6Y+p1MZ8OvSOrhBy6/LZvnH0qFqvYOxbEKNfrWEhkSqbrI4xqvm0dLmVU3wj1mqDnXSTdXnMpVhjC2YCq9UPlnnHUu71d5xjCIhrPZHhjsqkeJWRbDiSdxQACua9B/R+3fibnsdY++7ZrcmvOoF7aU0lRFv+VgcUx3BjQS95YN+KXNlECsmUFsFiaRXWuKX6KTqqaWX5jpeiX6iWjABv4RiWIKg7RKOJIhFg7i2wXEsxK8eYAe9x4i4gCs0NVfgNAS94wm48TASE+yoEGwSKjYbgs0mVUIcaHVIVtpYTgBpS+BUescz2OpSUe8di2RlAKfVxq12iIrBdS3axKSqEIYjCVxXiMcDWGJodsJghDbHryJgBNMawEoIoUYh2GyorE8SaPXOiUBzAjfglWDbMQe7IUrz2KG4fjWM+spqwsEEFcEk9a1VJB2bgO3QGgtR6T92dlyLXWu3sS1WiWsEEUMg6KQe39McxFh4TxravCo2bsjCqYv430cXN2xjOYZA1JDYHqaxIk7S9UpPm6NhLMtlSEWULdEqmmJhLDEkEgHWbfOqCoSCSeKxIHZlDIlZBBsswtv9Ulwg3OBibCG6SwUVjvfo1liSKhUcujhOvDZEMlpBbEwCCbhePUe/1C4aDxKPBWiNehX2k9EgsjVIMOYf64BXx7dlTNB/nNrmlab51d/BeyRskt5TLowh0BBFEi7V6/wvhASJDvfrP7ZYJOwwJAWrza/WkPCfAjTaWAkIb/WevISa/cfiCYMkXJyIjRW3vXMl6WD80i6JJ3FqIiSrQwSbEtSsChAfEiQas5CIlw8tsQi2X8q2bbNX+upUW951DLDCDvFYkFA4QSwaIpnwqp84Ie8zhKriRNtCOA1BiPj1lysSmNWV3rbCBlPpELf+n703+ZVly9K8fms31nhzutu9iIwmI0uZRSGVEAimCKn+AGZMASExqzH8CUxrWkJCDJggJsyYpMS0RAIlRBVQCWRF896L257GG2t2x2BtM7+RiizEgCzlky/p6b57jh8/7ma2t1/71rd+X6Oqbd2W7Enfoz8KZi7072cwut/7Y8R/ORNv6j4fkqqQGdzLWD8TDKnT77uPB+Q0kN/cgSmk2x4zJ4qv/udsMVXFNKdJ97rnM81G946bXwnzrWPKwnQv5NkiSSizvke7DXDW2RA7qVLaPVUbwLmqvFb34OIMsnHYc4Sc9fMQtavEm45S9+b2S2DzfQvo7wi3qtTP1mNsQTaRGM3a3ncnwY5w8+uImQtp4y5+c9R+h9cugYRM8doJK96sCq7kAkn3dTtA6RM5GIZztVFEs86JqGKdEZe1AwmUUfe6CXBNYj41iM08P9bPeVNIwVBGC5tE/G6j14S9qK3mxSEZ3FGwg8WO6sLoP1TP9qQdH/XOF+wcKXL552ratUgupP5iifp9dVVwfxj1D4D/rpTyrwD/GvC/Af8p8KellD8G/rT+/VrXuta1rnWta13rB19XBfdveInILfBvA/8BQCllBmYR+XeBf6c+7L8E/nvgP/kXPldM3PwqkhtRn0ytIuCPUSe/h7ia/QGICTN9ZeK3FlIi7zdI9QUtKprMEXua1in6Um+vcmMp1StaliGm47wOkeVNo1+rd7+p3hkvd6TxtirI2wYK2CHoMNz95vKyDhPkTG4d7pTYfmsprmfe1mGOpjC9cnyadvxjfsbHds8790xXR5XvzJl/Pr/mkDse7IkpO/705V9dn/8ldozJk5PB+QRRsC+O5qmq0BFOv4jYaGgeBSmmemz1jrX7kkit4M+F/v3E+KZhvmtoHi8DKjZk4s5jT2E9XmnTML2qilgomJB1Gn/j1kldU4fQimR2vzxh5w0UcEPChLSqp8UKeAvGYI4DpW3g6ynlxWe7Pr76r3ft7xAVcuswox634i2iI9f699ZhTuM6YBhvVM1dfHTmcMYcRf25jVM/bUqUvg71WEvpHJKzDhT2jVIf6muyx0kH3kSvo2JtnZYWOFUvuXfqAZ4DZFWl3/xPJ57+RK+Xz3uPfWo532aKhfZZSJ27TPZmVpiDq96x/tGsvjx30glnE1Rlaw6JIkJZdltRBdEMkbzxev1XP/T+14tf2jG+MgzvBD47xEDcJ502Bs60mFEH2MJdxp4NxRZM0O/HXaZ5MTTPOmBmIutQIugEtZ108CpuHWa07L4LlP9BFZGnv7PntE9Il3R6OokSCraBuRJCQrA8HXpSsIiBNBvMs8cdqho56aT59vtM/0n9xiYWiqtrYojqX+wc7XNm82vHoew5bvUY+C4yH3uOrse1kTg7rMvEwVWKBczekUfL8VOLZKF/L7ihXAZUjZA6o17JH2/xh0huDLYqatN9T3EwfxPYvzpRitC4iK9q/eenHc4n7vdnhtbr0NXtwOk3qnLGjVCcHudiBInd2vEwi+8y6rpUP2kkbRukwMsf6rE+/BzCNzOui6TO0m5mcjLETe00JYPvA/2PZl6+bJnfCc17h2T9/uZDBmkqmcFgpkwjsvo+49tdvaYScecJW0gPke3dwOvqZf5y7mld4jQ2uN1I6xKti6S9+jJfzh1vb46UIhyawLYJDHu/enTnyeGbyPbHI9ZkxtljbSb90UGPQbD0vXrKp62j6wIhuJUQMP9qi5lFFdDWMN947JQJD5c9PO4b7BBV4fVW173I2qkrnb/sB40jbfyFIAAUb8i7ltw4bO2cuJDIdS9qvwTu/pnw8UbITw10CbZx3drSbGGXaT4b/EEVR3dO2ClfXoM1uJP6RklFPytDWbuZYi32FIi3LUWE6cEzvIGwW/bUAgbSwYO7SLOudgK1G1LW/due4zpkDWAmu64pCQkzzvpZG+pmpc9C7hynt5bxTcY0CefTOsDatOpBniZPaRK+0YG7XDtcdjcTg8U3EWsz/m5gHD3e1/mbLOrT3UWMyaQm0fhEGC5qq9zMlPcd06tM+0XnSbKBsNOjPd0JzXMh/ayle0qYUPAvMzLVIdqtr3vm1YP7Q69fAB+B/0JE/mcR+c9FZAu8K6V8Xx/zW+Dd7/thEfmPReTPROTP5jT8Nb3ka13rWte61rWuda3//+qq4P7NLwf8G8DfL6X8IxH5B/wlO0IppYjI773VKaX8Q+AfAtz6NwWB4ZVh86F69U6J1NmVYQtVFawYmtI3SjsoRb20jcOcJiSl9e4q9/Vuy9Rp7VSQki/Ypq+4hmnbqqqI+imhchwNkLMqsUaq0lvvz1LBvkzkjdIDyFlfV50klpjJm0ZVq9NMqriUsC2k2yrLucJDf8abxIdxz8bMvHPP/GZW3NcXs6Mzgc4EDhUW+u1wx12jNwVDUnUnnh0Rh3SJBIxdVV8HnX6WAsXB5ttC9tB/rn61Y2ZGfVnj64bNt8NlCnfB90ZH2nrMFAmbnmKEuHXkqogVVzBzIrcWSQV7DuudPUC6aXBPI81T0EnvrApD3Olxbj+eMceJ9PpG1e7G/Q5n0j1O5N6TbntVZkNSJcUI7qneHM0BY8yKaMvW6+TwdNlqxOVVlTZjVD9upWCwbS/nLWdK6/X8VeSXmQJyGCneqTqREmw7SvXgLiqFPJ+U1zyHC0HBX15DbhxmQYeJMN/6lXLw8L8IsQc7GnKjPkv/DN1n/f74qnJdIzz874Hx3mLnrOxalPcaK89WkqKpkpdVAU4t+EPCPg9QeZVh32DmTGor+ukpMz1YbBWd066oarnRJxFTSNaTXyd49uS2UGxZFV6ykLoCT8LNL2cQ7cTkZdp78eCNMy4rTzN7Q7jRn+9/K5zagtlFmnZmnjzOTziXV0Znni0ZSxmtelZHS7kLzO0yGW8pTtnLqRXinad5TviD7h1x3+CfRlzMxL7yQHeRvlIYQKfKmz7QNBHpAjEZfBMZPldgr8+YLpGjwb4Yui8Zfy7rmnBDRqJ6oO2gHk5JhXCjJyt7IfaFZj+zqWikh82w4rHcwwupiCqSJpOiw9sLoQBbLvxOgdQbzFSn2KvalL3BhISvfuBS2avzbWUe9xeWrG0SpQjh7OlvK03i3BAOjbJibaYUVbxSbT6FrZCdoTllzJSRrF2UFUmc1NNu5kRurHaNiqLkDlOz/u4pWlofmYIjZ8NxaHEVO2dt5sPLTnm3LvHxpcX7uFJonNfXPVQlb9dPxGTWa2W7mZijJWdD0yTGoSFOTj3PgNhCboTzu4buMeIPcb1OF7VdhkDptPtSUIyg7gtV/Wy9dpky2I/PkG60A7QgDHNWLvJhIt202u3qmrX7lDrL+RtDkUxpE7ZT9XKqymOZLLjMfCuYWQiTMLx2NKfM5lgV2qRIxFwMubGYGNeOJaD7lNHZgmINh58apm8CVP/z5mbEmEIIlpyM8qWDodzUeZb3HfONAawiwvYNTUj6eQjayTTgpkS86+rnwIyERKzdSnPW9ZcbyK8D3iecS3SNfr1xiSk4SnPB9XX9TFuvhV078eW04XxucS7TuMiIJ4bqx24D1ub12hlH7Wwu59p57QqFu4CcHPOtos6ksO4/qlBD+6Jc9uaLetuXj3wTdR2viLi/oq4K7t/8+g3wm1LKP6p//2/Qf/C+F5EfAdQ/P/xLen3Xuta1rnWta13rWn+tdVVw/4ZXKeW3IvJrEfnbpZT/A/h7wD+t//37wH9W//xv/1+fzFimW0PYCPNO732OP9IwgunO4MaC6y4AaYD5rqH9NKoalYqqo5W7t0yNS9Q8sLRRNc6MARIX+Lm9pLy4l1Gnjb290AFS0WnQjIZOnIMmsTQXcLWUsia3FG/Va1tVxbRrVxUy7VqmB8fwTkj7eEmgmoXHsWffjPzdm+94cCdu7IitP3hIHXNx/KH/yHcADXyc93x31mnujPDhtOPm9YnTuSWd3SW9DJ0idmchNwV5Ec7vFoVJH5AaYbw32LngzgV519PnjMRMeKiEglPAf/9C2bQ0H47k3us08eK/GqNOT3tH3vj1eCyBG/akXFr/6Qjs8F/OqoqP6m+VkCht3RKM0eM6JORY1dnGr77bYlVtNseJ3PpVgRVnwOi0sEyRvGv1TrtyatOrHbl+vzQXBq7U8IDilIgRXm31eniekHHGzLVj0LVITBrW0Hpy12CfT18B1bsaAlF5uyFSjEfCV3f6C7f3qw6CpELYLKSH+jCjFBE76Z+rArspzI0qtZs//4J/u2d806rPs5ZkCBtla5q56H81UGPxj+ebHvN8RoZAMwTSbUfqq+I/53VC3Q6aPleswez1OKTBrSlApc2QhLKJxMX3PhlyUxjfFA4/bfBD+SqJDrrHgj1H5YoeA2nrmfcXvSNuoTSZHIVuH9Y0o2n0SFXtms1M00QOjxtKqGt4Nuu0tBsEM4GdNeGrfYy4U1w933ZMqizd9sy3ltTr14dKYdjsJ+5ea6BMzIZNOxOSYZw90lfP46JYW31/807wp4sH18wZNybmG78merVfJsKu8oBzoXk2nA4NHwaPa+PvpBCKwDzbauku6o21BlvXrUQ915sPmf7jrGQIb7DncOFFVwKMmWaKteTekVrL7td1nzSGefCEaiITp77I8VzV1dFizpby7DCAPwjt46X70z4mciP4Q8K/6IxDETBLSl3MFBHMHPGlwI88JCEGy5fPSqzoX59JydA0cVUQjSnrZL2xiZINcbJsb0f1WSZDSpdrxrlMCJ6UCuepwdvENOp7SD6uam6KjnD2MBtK/ZBwSZRJXMM4UqM0mJVKAciuWTuHadusZJal41MwyKRdJeWzL/zcqhIbQxENwHHPum8WZ9YgicNPG17+JCKbiPN5DTszixfWZeViR11b843QfYHu/XRREjNAwk5gRf3ZCz8eNHTDDkquKVYpBu7ZEd+oQjtNfgVwlKTpYdZ/xZDdR4prkARxY3HnxPyqx78sczD6mUEq2BPgDMUobWKZUzBzJHaOzcfM8xdPNJ55H5kqkaBkfY8lGCVH1DrWNf7FbilJcG1kHBpG8ZQs6lEGJShMVj//MuAK5mQp95UGdHJIMJiglBl/FGJXkw6/6jObuWBSof08YT8+Y7b9JTSqqvpp6fz9FXX9B+4Po/4+8F+JSAP838B/iP7T6r8Wkf8I+CXw7/1LfH3Xuta1rnWta13rWn9tdf0H7g+gSin/GPg3f8+3/t7/pyeyhse/I8Q+c/ij+qVR1SsQ2kdD98XQf05kv9ABCtPrjvizDe6sHjOzseTGaI440H4YiHet+v9ywWw9RWT1G+oTgTtZTOfXCfxc1cTiDdka3DkQth1mUi/ZorS13x/IvSduPSbkylY1q28TwH54Qu52pE2DO2WaZ0P5paN9qgrtz+H2j0d+sf28khO+DfeEqjDc2TMf454/C3pgjqnlEFtNdwLO0ROiJSbD/c2ZL3lLng2lVRXBBEfqC7lPzFEn5rtPwnS7KLngT0XJEqIpNekPdzSHtN55S+8wZ526tyGp9y+VdaJ8OYeS88qLlCXmeCkR8qbRKeK9ptItnmQ36oRy3Hpc9ZQR86qUpF2r08IxayJv/Z6ERG7rnXSpFIaXA2XbXyZ8l6jekFTdHWeMCLn3OhlbVRSskHatTrrXiem861aVxpxHVWdDVB/urOe61N9fWospZY3ixBiYZlWdq5JdNurzlVkVbfftF8x4w3z7dfoUtE+Xw+YGTSAD6D5prro7F+UFF/V6Lp2I2AkmFppDJVMI2OErWCXgfvtEenNLfLXDBPUC2+OEaxYOrmX3beH4B4JJ4H4rFGM55cr8nPQacr9WJdcECHuDHeq52hT93Rk2nyIS9e9LpKc7hVUZW9ZN95hWEgTZIMkz3zqenj2lzZqslgT7z5V3Oe8ycTJQfewShe1fOMKCva7r0wT1+5qUCbdeedRovKhUBaZ5TriTx/5Fx3ynL+KMcjbDsUGaxDFucH2NSJ6W2LmisbXf2qpy6/Hf/1K7DrmxzHuPpII/hNWD7I8Lt9TTPhWGo6PYQnryxIcZ+WpN9ZuZxkUOx5549Mhs8HV5dJ9VpW5elEghKSvDu3VrRDYZ9UFOkbJ3Gj0tsu6hd39eCBvD6bnBRDj/OJO2GZnM8hahQO4y/sXQHPRabF4ql3nOSBbcOWA/HyibTrtbdXtdffijUmSaY6b93jO/vqjtw2MPSZittn3swTI/fLVvtEVPaDCc3m/BFcp+ouR6vZ4dk9XHZGOZUSXQvNR5h9f1UEwWc3A0p5pe1VzoAe2zriN/WhIs6591bzDPZyUltF6/Zyu5YnmNMa/e/9L4dX9aOoW5sdoFqY9d97Z6zRdD5fMqx9X4TAxulRXL+w7Juv7tpOl73ZdIuPFKB0L3H0lF96tY6TA1sVHXQlJFvzFIyNz+RcCfHMMbVSbH14W0yZhZyPtKDHhqcHVdd6NSUdxUsLNSSfzLjHk8rqeqbDvyrtHPQbjMJSxJZnX/d0PD7lee6aGQZo+tMdvT64Rso15zHzuloQyyzgM0T/D0dyOxCGWwmMGQ+3wh6QSdNbGjofskzPuCnYUQapcvg38R+vfKv04tnN9pTG/7WLs/U53fsUJuLent3e+k88XbVveU8Ss//O+pqwf3Wte61rWuda1rXetaP6i6KrjXWit7Q9wUyn1YvW3JWsysvspidaL8+ReO7kud+vwQyV7vpIdXqg7YQZXIVAkC85seOyWKqBrrzjPz7cU7U5ys/kU7Cqm1SCnErnpLQ8akQm6tToK3Vl9rvyR0bdXnm4pOgr/eYIZI2jX15xPp4Wb9/+7DGck9cXNJ0JruHM9jxym1DKnhm/YZL4lPQRmSk/d8P98Ss6W3M39+eMvOT8yVRfn+vOdw6ui6wHFoKdFgZoMMy2vUZKt8ksoyFOIG2pottyQvuVG9hM1RFSF3DJcs8ZDBGU3nsYKZAmnXrsqhm6N6WGO+/Mx44UDmjXrY0qZdGZvOGcKN3rm7w4SMATvGmoSjfra439fXqKpH8VbV4blODO+ar/LOC4RA3m1WhVRVkqrCjJqqJF2jNI7DSN536sUFChYJmebjSROMMheeLSAEJSBQvYXGYEKEmjdPUvVWlRtNVFsTfCZVpCSX9ZiRM/lhT+o9219Xr7EA9BSB8UGVNv8546oKe/d/ZlIj2LmQdg3DO+0KLIqlPxXsWK9ZK/hzJLUWV5Wp1FrCHzyQWkvz4QSm+giNwR2r11iE/lPEDXqd+1NWZbh6Hue9UjO6L6rMSlSP9+LzTUfBTgV/ovKQwZ6TJsOhHEl7CuSiilPc19SrKlwef57h7aQ+wMkikyEMHnGZ8LpyaveqdDopzKP68E4/T7hD5Yp+1tSl/nOs0/CJlFk5uIj6iuPWkXpD8wTj20Kp6VqMljA4zGjU2ysF+djjJ0320nMJkgQ3gjmgtIS5rPtLbqQqRoHU1q5PzqqaA80zNHvD9leG84/0QOWjg11VD23hfGw5546SBffkcEdZ1Sr/UuieM/55xswR83wm/8Gd+rpDPRe9Tv7nTauT/70ndRe6ybwzhJ0w3xck6j4rk1FvNSh9JYI/GLIDM+k5XWYcTMy4Q9TO1xyUEb3tSTUBLHVO94zbnrD3NC+R9skx37MEaEEQJAqlK2BVRZQXj5nr58B9qCQbAa+T+uHcrOqnOTjyLiE+w7P+XNoncj2OzhTlF0dRgkNXkCDrmmk/G4pR9Th2ulZy61T5XmY66hqXIZBu2kv6Ye0wmSmoRzNWis4wk3ct5qD+f0OdM3Dqw5XzpB2fei1sfxsp/6NjvnOkBoYfp7o2l+evaZMnve7aJ1VQzZwvKnlGiUIipIetvv7TiMkX9XTxjUpWPnJ7yEheOgswPVhSV2g/e9ygnw3L54Nkvb7b54Q/Ruwp6HuqHl9EtHvQWk0qFMEOug9/zTPXz49M81KYHrRLm5vldwgcPXYQTBQK0H3isre0wuZXjmKckloEcrSkLl9+vqgqH7Z67Ma3CXdciEK6L4SdkHrl3TbP0H/JNM/6RrsPZ00pq3ubzJHSOH1PQLbK8V7mS/6quiq417rWta51rWtd61rX+kHVVcG91qUKNE+GsbVInZjsPhmcht3gT+qZefrbrP7ZYjxuyLhzxp3BnyLZG9zp4ueTUgg75bX6Q9Jc7AKpW1ihekdsgvIr3Un9Vc1TVfWcwT0POnVvhPm+RWKm/1A9YrngXsbV57T4QpdaUmbSRr2GxQi5MTTPl8n6uLNsm5mQLbEYUjEcUoept70bM7OzEwOe99MPzNK7AAAgAElEQVSem2bgGFrGpKrCcWwpWRAplAIl6nTowiW1I8RdUfUHCLeq1C78Qn9URdfMQvelECdNxpICdqjvpdU7ePW8tjqJ36jaDaypP/YUVM1ovSrvi/IREsWoYqZczLzyUPXne9zRrcqtMiXzRQFNGRknMEpGKI2rVIx4IWd4S7rpMHNS221lctqhciLPI6YqCwtZwRzGrxReQ9q3kJSLLClRrEVKZYruOvW15Qy5IKVQ2gYZqkGsNJqSFhO0jR7fkCBEZLnbzxmcVYXXWoox2POsCWigqk0qnH5sSR2kBuKmTmCjftzYq1+s3el5ihtzUZJCxs7KdjVBJ5pN0L/rA4S51aSm+e22enTV87b4BacHhx2VDGJiIWwN52+EtAg1GZKH8zdS/W6CO+nfl9dokiAlY+ZMuHHYUdbXYFLBeIudEvNtg3+ZSb2jGHt5/tmqF9UUSqOe7nK8eDvj5ChRkKbm1NsC4aKojK8K229VpWteZp3sN4J/r+lYy3r1uZB9ixsK2YFU5dL4rIlOryacT6RoybeBMFrMs/5svouYZ0fcqLpmZjDPhdM3+n07q19xvmvIjdA8Vc92PVd2CNjQYGdI+0T7MFBmt3pwBdjuJ46PG2SwxH0ie0P/vipuQ8Efkvrmbztk3yrRJZfV225G7bjYU1BvuDPr+gOYbxxhB3GfcQdD7i7+W72m9Y/hXab4QrGW7pMQtrU7FBwe3efKbqOq5RBW7312SnWgFLw1TPcN2YIJsqpu2qEr+P1MnK2qqdtImqt/s1GCAKbgPnviPoErquai/mCk6GP6RHmIECzGp/Ut2D6uFIJSQAxffUZUgkIruHPBHabLeqz7mzmeSa/2FK/ryhzHNSETID5sK9u5ek+L171poS2cRvAOGWbKplWVNxWm18v8ALQvmaEmfJVGfaX2pOcx9Zoy5k+CO6naXIS/1DFT7nbeaIcq915nAhafr1fFOPULxUO96PM3uu7GN0rckQTTq0yIgjvIOg9QrHqXyfqfhHRRpdH9wz6dkdTpaypFf+dwYaLLMFO8I3V2JcOE20zZLtQJIAl2csS+IBnG16LKLOqbztWaHO4jEoweq3otlCT4T564zwSp1+wukkLtsol2XRDwBwg7Pe52Lhx+psdlfNjTvmT67056fOtcjqlKt/v4op24r+Zsfl9dFdxrXeta17rWta51rWv9oOqq4F7rUgbm+4wZDbZ6r+ykE9rNoawKa+7z6iGb94KdhLgRus+BYlSxKa1ZJ8dz9dHYBO4cVD3pL5Pl2RtMLKTG4M6JcOuxQ8bPqvol15C2LcUZcqP/uVO6KLOtJbze6HR2Vq+ue55WGoM5DOSbjXq/6pTrvDfMe8PTn1RP41t97Ck2vG6PHFPLr4d7XjcqX/+v4cfc+QFnMl4yoRg2LnCOevf/sD0zDJo2BGC6RNy4SqCA5kWIoNnfs6yTsosiN73KlC4jwRD2hrC1uKHQdMLmfVmPkzsGVQhz9VXlsqoHlKLpY95Qiv4pKa884eIt5jzSfZ85/q0bmkqQ0GQckGrqU19XvvhnF+Uzob8zXpLIFqV3UZFJCTsZ9dTmTN5W8kL1vKbbG8xx1DSiTas+uE27+mOJqm6ouqzett9VSFR9WNLq1kS1hbhh7crFLcYg84V3u9AoyFXd3bRKWTCQW0+syXlhb3n+hSVuYXpVFaimYE/1PVQv2Xwn+DN0j4mwNcRuAegK840ntoIbDZ3IqqIC2LGm8JSyTvWrz1iT6ZYKW8O8Vw/pvNd0tbitalZQP7ydIHaFsCs03qze1KlTqoHthfnOrSrg4lv0L7NOJxunZIWsnvdqKcedDGmn3N3i6mVgEzkbclheM5iuqreg6q1A3NTp/rMhtXD6xle+qZJV0vbiQdSUQs2U1+4NpNo1yUFV4TxbTbo6O2Vqtol8W1+CzeQ2w6YgSYkSqTH4YWEcF9xp8QaypnmV5bgb4fzGMLwBfzuRoqVp43rJLEzY3f2ZoW3Voxvb9TgNrw3ZefzO4s6q3JcilF4uxzxl0taTW8VLSMikxqzn3g2F8bU+PlslGKgaapbLifAqYY9WVdeNqmquvkfdc5XxmzftSg4olRNenLmwYo3OAMSt+l7jT1RNdy5Vq7xgbKUHAKlSFkpUcojpI6lLOJdJs8Usqp3LuKZ2WZJBpCC2YO2S0gYlG/rdxCkLZaivrW5dsS/ETth+r2r39G6LOwQlKNQOVrrf11RDp92bYVLm+a5GupWCPQZV0Id53bdk1O5Ovt3W1Lm6N4SEjCPmlXLGx4fKgd8X8k5VeZKQXT2RrhDbzNka2k/auXGVab0c62WfMseReLfRDlPtdAHIlIh37brH5tuG2Bvmej1PrxJY1Icuuhbi3jHfLR1V3ZNT69h9B3Z0pG2jnSLUnxxf7XTWYaMkBTMEZY1P9cNIBBknTFRKy3yb4SbA2a3vs33vyA7yzdLldJhYO313SeklXcK6TKpEk+VakLYQbgz2NmBsgiKE55ZS96YyC+Gm1L0C7Cwcfg6b9+arNaNfP/7hThnaTwH3NF6IQN5RYoL0u3Sav1xXBfda17rWta51rWtd61o/qLoquNdaq4jgfnRmfuwwNZWkGEgtTEbYfFQ6gnsxOL3xx4TCeC+0zzrNnRqDHRPzrbuwcmdNp8lOsKMj9srJW6apTVT1RjPiLanROzn/cvHzqT8QMIIdEqkzSK6K286ROuWC+kMkNwZo17vaZerfzJHiLc9/a8PxJ4bpVSHViW2ZDGN0xGKYsi6Lh+bMj6v56TfTPafYcufPOJP4J5++4ZvtYVVsQ9Kc9XH0vHt44f3nW+wgpGrvGt4Vii2UVzP5sVmnlxcPVNkmTJso0TCLxQSVzeykhAdQRqSkQu4d9hyQKVBaT7irGeOzqtpx11yYh41b8+I1i129r3bY6ePHSFsnV+2YlYH5eCLvek2mq7xBABFBth3xttU0rqBqWG6aVU2XoipCbhySkk6/WktZVJbqnY0PvXp6XU0fqpSEsrG4pwkzBQhR79TNhWlszyNYoylgx1ET1L7yXBdvKF2j/r1KJyjWqnq9wEtBPbkTlNaRt+oZXq7X1BriFs1I96rcymgwX2FB/QFM0rQyO5mVfas/LySvlIXl7xRh8xtdNGnj1WPoLf4xIseBfKv+6WVNSFYCw/mdx8yF1Ovk+TJ1Hm4zpUvkjaX0CTlaYl/Wyfp4k0hHRxp13TXPqSYQ1uut+lD945nSVR+j6VblszglCpQMvo3Mg6ftEuPZrv5U66pa2STSS6OK1z4gXyq9JF28lXFjaZ6UQ7uq/6X68/ee8WFZEFAWJanNbF+dmUZP18/0tyc+P+6wLq0qspiCzEbVoUq9kKjpaQCupmEVVxncc1IvfmWsZq8+xLjLkA03+zM33cQYq4KZDc+HHnxiux2xJvM42dXPmBpV191YuymprBSSpeznA3CjbO7OIijneDkXcSOkVveHtEtKF2gKtnYK0j4j20jyGZkszSfLfAdurB7cWDDRwkOvKmvI0FbeLiDREu573HFWysxW1cfzTyOvt+qDf+jPDNEzRoe3iVyE1l7mGD4etjQu0fpILoK3iZQN5/nigfQ2MUdHSobb7cA4+3XvKUVo/WXm4ThvYb7QKNxZyRhxY2k/T+t+Yg6jqtLotSJzxCb1lebbre4vdW9YfiY3FpGW0lrsYQJ7IRyUzmnXp3pTsd2aehc7w/ha17E5q99bmkLOy2yHEi3cKBQH2QnzbUP3cWB+qHvwzmPGpK81ZexhhBDJd9v1tekDhXTTEDeW0Bvm27ru2gxZVt6zGChuJpl6nD87iiiD20yZ+b5VUsuh+q33DXZKun/Wyr0mUJrz5WsyB6U/GDQFcHCXdMDRMt/nugcAUZTLfLiQP7CFEgxFCq5NxKNfk+r8dibv9PWnQ4fZBcxoyM1iuIbsdb4gO/X0pl1mwK4KbtiKzkbM0Bx0jgGRS5LZixJvVnrEX1FXBfda17rWta51rWtd61o/qLoquNdaS1IhBr2LWqZbh7eZ5sXQv79wJx/+acGf6525WSgIhdTq5HtuDc1LWhUKO2YQo/5dJ+RWsENGLjf0yvIsYKZC8zRfvFbLa8sFEOw5VDVXVoW2eZpJG4c9R+JWPYU6Pf/V81e/W+odx58aTr8IuH3AVEksDZbD0PGd3JK3gjOZh+bMd5NmtX93vuVn2y/8argHYI6WL+OG41Sz1rMhHR1mGxFgsx057lpKv0ym6u9pukgZW1KB4liPkZwttIl2OzPOHdkV7AB2Kiu/UDLYg7IbixWk8mVXhbV6Gu2o6qpUZu2aCGccuXHYw0T7OGHGSG5UWQOwh5G070j3W1WAncEep3UKOVelzx41Ha04gxSrfrJqWlRPrCaQyTBT+gZzni4nIiXKrr8oviHp46tHV45x9dbSOH2f00VJKq7mxz+fwet7IV84lJL1Tn9JNJIxYMaZ0reXx6QC3l2mq6vavaR82Tmz/U7wR0N2Rid9b8C/LBejdi7srH+e35rKw1zOgz7MjeorjL1B0iWZz8zK6TXHGXMaKF2jiW+tV5oCELadJjoVT9wK7qx8yfGhrsNZMJNDklBGUdVnUCUQwD9Z3EmVnuagvsbsZX3PxWXsORBf9epd9xYTCrEK7c2jUIwjGMg+UWa7dity0OOYBwdJkE0El2m+98SdYfP9wn7W39++ZExQr7g/z+SuZt4bMFOm/TRA6ZjuGvU7LgQDl4nRkIJhto4YdW8KT91len9wmALmbJSgkNBUxJqE5I+Z+aHDTkl95eeAfZku10JNsmqeDMPO8Xi45VEKLGpTEkhCOHnssyPdR+Rkid3i8dVjMt0IzbOe2/lOKS9uoZ/c7Qg3SqoAiz1OdEYTmpbrqRjLWSzFFXJblEm7bH8J5EtDaTLmfNGklnPlaypYai3t51GJJlZI1VNuT0HXblBl0Q2q9JvJ8Ok7NX9+2WzJo0XOFnmYa6IXLJtPem6Yny3PP57gsYH7GQ4e9nWTffH19VnMKLzfbOE2YKp/NY1WjyWoStdkihfaj5dOoZkhbAwSG/yxquCblumtemS7354uqYaVJCBTQJZzOUX1Ie87zDiTfAelkG4X73OCVCjeYA6jnvtymWGYb4XTzyNmH9RzXvfd9o2qheOhJd6CCZbSQfdRP7dS91W3MqBdsjEiU9Ium3cXFq/VDkbatbq3nBImFPxhoX7omp4nQ94mzMliz4Krl1JqtHsUO/Xmkwt+SsqEBVKnjPhijHYsRZQlmxYILqsX1x1nui8N850ht5myiKFSsIMlPgQIRvcYW9bUOTMaymTAFXIwZJ+VoFKJGyF069qwZ0OSgtQOC0DzfEnxA30/xWg63PIYOxaaQ6H/HNfOS+4uHFxpXaXrXN7X76urgnuta13rWte61rWuda0fVF0V3Gv9TjX/rNeJ/nplbH5rMJNyXJe0MX8u9O9VlQs7R/M8E7ee1Jk1R5wMqb8oFGbKNFMme0P7aca/f7koaOeRcrsj916NeUaVs8WrZ8eIeRmIr3fMdw2pNbghV6+tcibdIZCXHPBcVK2pE+O5dZhJp0pTY1R1jobNZsJWj9gLG87HlnH0fDlu+NnDI8fQ8rOtRo0Zyfzq9MBvT3v+6PYzw6TEhGFSlaRpovL9TOH9lxuaNlC6RHOrx8n7RIyGMDvKNzMlGMzJrlPxAOWxIb3R6dnmRciNqjSLRxYg3bTY43TJWbcWd9Q7cjMn0rbBPQ3KS2ycKoNrFnl9jn2rWehJj1OuTMZ40+E/HpHjmfTNvaofmwb/vR4D2XSX89N6VYSWhBm7qKFFGbPWQq/+2HTb6/moZcYZCalO/yZkmnXCGTDDRN61YCqdIRVK7y+UBavPbcJA2nbqNYtJ1RjAHA4UZzEhKp0hJuUAzwEzXGgTpW91knqKGNBjNS2T5r6qeoXUC/OtcmXXrHVRtUeS+mvjVlPEFl506lXtlWLw50L7pGl/YafHuXmeCbctpnOUN0r3kFQoTtaEOYDYW5qD+t7DtqYD1UlkO4pyOUuheTL4F+qarWv0RVO93KAMziVJael62HMkdY7UWmyKlE69774SB9xgmUDVy2OD383Ms1IKxNXuzaxUgzI4yOoLliSrn9BMmi8vudT0OFFO8pJQGGv6mwh2yoQtxE1Zux2uiYiA9Xmdxt/tR44CubJJKULeJtyTcn4lqYq2//MDAKc/3JGzJgLal4nSuXUKH+D0k47xjTDfZXAFGQxlm5atibu3B1I2ynF9U5iCY+4c3U90zR0/b+j/omF4I/SfLdO9Q5ImyOXqyY5v+tU7L3nxfqq/Wt8DzDcQbxM33xwYRv87tIpydtBHrM/kLAQvxB2Y+v32JZObyhi3AtaSW0vY6DHq5lRJDg3uGHj5Wct0nzGvJ1VhgfLcKHf7YVbmcDJ0XeB8rP7XPpHqeSn7iLWFtIm4ei3kuxmiIWfBvpvwpmBMYR7reQoGsw34NhLmug52M1Ne2N2GIeueJ8UStwY7FdrP0DxOdV0apeB4ZU6798+121T3+Zsec54xY0SGCWOt8rBrchihzmEUeyHEpLx298xccM+WvA/qP4+GEg3j4gnPAm0GY/HPQvucib0gya+KpHuZyK1DpkTeeErvKz1kiSITZL54tIvAvDe4sa550S6MRJQ/7XXd+EPtAD7r+Y6daBLkmJQRX/f47ldn8rYm2O1bioimqR2H1a+ad73u+QU2HzPzjWV4J8hTTbQ8Ct0X4dBZSpexL1Y7Q/WjqriCfzFkqzSgYgRzsuveRIHS6PxC3uhnrUyyesZTp92u6VWm/WLYfZs5v1PiytL98gH8kLHnWGcsMuY8kd/s6rkAPPDV58rvq6uCe61rXeta17rWta51rR9UXRXca62lKgvqrak3Y9MDSIDUy+qZzdZwMeyAiR47asJP2Kk/1k55nfjOndHJ9FYny3PbEPavVkXMzBl/DBRrSJ1dvbwmLBPdHhczYe+JvSF7oX1KK9NzTQkygpmKqoKVmACsaS9x35Ibo4lKg/7yTaPKYKzTxD/aHziFhpAt59Dwf6XXgE4Ix2x46M+8hI673ZnHw4Z50Lve6djqtOnJ4/cT58ceXGHbX/ynMTaIKRRTsNugvM/FlyboHXIRZKwsxsoFXbyhYW/pfzWQty0S9PXmjV8JAqn3mDGoL2zb6nN2DvtJ1ay835B2DfY4E7s6jXqeVh/TkghUbneYl0ETwlJW7iw6jYsxOsGaEjSe3DWqAlb105xnigjmPGlS0GnEhrSyaPO2pVhL3njs8wBuUfmrF/r1DdOrDjsmVVQFJUYsU+k5K7f2douZo6r/i7IMqthYA41X1cJZpS5sWiQtk9mD0hcySKgKdHNRTv0xMd4Z4kYY3tTkHq/8UVC6R+rUJ5mbUifXoftS/YZVMPLnwu5XZ+xf/Jbyo9fra4x3XY12knVqHCBjcecLr3R85ekeE93HiY//+la5m6eqolmQKJigynzcXLLkQb2oqRXsrClrbsw0T2Fdk6nTRDI7Jc2qHy40Ev0FqrSowVVTy0DV24WiUCrBgG2kRMEePakthBs9V5tvLXEjnF87/FCQvWXz3Ygdl2slqMRiTO1aFKZ7Ie8uXQ1rM42PnE5dPf2CMYW8SDNZ07eCKZA9zYuquOef1Y5ALNgpE3c6Sb6wiJduRvbCdF+wPzmTzh65m9ls5nUiPGXDMHqcy3T9xPHUkc6OY1X17KPX6fozjHeG5phxQ6kT6hefoYkZWVIUW4eEvKpV2aLHwWUK9VjLhSahPmtLmgx2Uo6tHYX2sar5U8G/JJpPJ0rriVtf/c4LokXTz1LvMCET9oJ5Paq/tq3XW/Uae59wLmFMYRwanL90j0KplBSbyVHwXVw9us4CPuFvxrUrdj63q1/b3swYKcRo1zSzIOoFB5BsaA7aKckWmnOhOQRVV+vlYL8cwWz0c2LjyXfb393nJ01ry71H7Ea7SM7oBD56jIuxmmjmVN0tfYOp1+PmU8ZEw3PuCG+i+suT4J9qJ3EQhp9EJKlnOOyE9lmfd7mexreb+jlqcC+jemFzXpPMzByIN50y3Z0wvHaaRFg/JsZXBRN1z5HBUtpM6iEuar4R7KzXuARNkYx3vSbVAWXxt3eOIqryFmMQ73SdUUkz3mCGSDE6+1J8Wf3kcVt4eRtXH3rqMxKE9suSnFd53Bb8wRBuA2U0Sl2oJV2iJEOxCfvkkCjrMSoCYa+dKTsqS9qdiqYv1kZdc8gUEYo3uMOsarg3+Ef1Q+fOkXqP/RcLuFcF91rXuta1rnWta13rWj+suiq411qrOPWiFVsoS0KNNZhJMElov0DqlNsYqwJq58J86zCbqj7N1OQkszI5s9cMeFO9d8XBdGdXBcPW6UsbMvOtI7ZC+5IIXfWYfQlISviXGTeoD8uegjIGQRNttr3erZeifqRksY9qisy7Doze/cXNDjdCyELjEn989xGAf/L5G16qSnTTjjxPHS9Dx5PUr3UT3iZCskzRMUeLMYUyXnzGGCAJ8XOPOxnimxlXbzG3zUwpguknOh8ZZk/ZCrlKauPkmY8N8XOHOxnsKJgIYS8c/0ClueaQGX56g50zfo6Qiqa6jV/hIowh3jf4D0dK61XVbfSuPm2r568my2m8ULkwZBvNac/bFvOSkZQwkyhxYKmclR3b9Ze0MzS5B1AFddtBNjrN3XjiXY/7oucidQ5/mnDfHcgPe1UVtj2pX1iWqjrZc6gqV/0d8eJhM89npXbcbqHZg9WvAZTjCQaD3OxVZRah2EtaGEDZaE67nEby/U7V21yYXrXruew/J07eIkkIt4m4Fcwr9V0OjzWJa9ZJZn9QlWlRbptjVastDO86zKufEzYGGxYFOJEbgxky3fuhJioFyuvdqvSErYNSOL2znF9vsHMh7ETPBxBuCnZSJTc79cWZIISqfk5ZamoTuElJDmSvU/aswhvmPJM7hxkCzcvlOnr/b1nk7cSmmxmOLWIKbV9Zm3kZh47kLLRtJATL3GTKZNf3ML4y2BHMrGzRuDc0zw5TFTV7zMRdhx0jsbekRkibdKEo1GSt4dRRsiCmMH3pkSTr6y99Irw0SFQVOOzBn+DwU71m+49Z31dSGkn3YSTu/OoZj50jdYV4bMAUmjZiTWY467qPzuB9IsyOj4cbGC32fNnb3FBJMpWK4saidJTK3QWQWDBTwn4+Et/eYIZI2vrV73x+o3SI5nvP+fkWXs+Uk8MeK2t3n1QBC4bug8Gf9Dl33+la8c8z/vtHwo/vdd0kpx2VvEjEYMZIqelp46v1MscsfmojlOB0TytC+OWWtM20b3XdzpNHBFyj/twcDHG2axeuBIPfzqSavtY3gdFmTHNRgPt+5uVxg/3sSdtMPrqVVCAJpjtoHwvNMdM8zfjHgdy4dUYg/OgOd5iwjwfMzYa4b3GVFgNKbaHRjlZpdP0Qy7qnkLJSXLpWSQKVpGLPcz0GHalRJrLpdKYi164EaNoaUgg3RRP3vDDvVH1euo22KvN2jOq1zeXSpQKdBQieeevIXrtEYScEtSIT7itdJgtlvciFcLckxhnCWeg+FeJW/d51oej3vdU9tRTSxuFOQbt8+ZI+qZ2TmnYn4I7gDobwUM+V1Q5jyUIenLJ5xTC+0d/hD+qVNhOE+0w5OczDvI4oUCDPlWkbDfxopHzbI3XfiJuCPyiZYXpVCHvtRvUfy5oQmK3BTQU7WczsahdC2d26prLuAfbSJfl9dVVwr3Wta13rWte61rWu9YOqq4J7rbU031wI24wsfppBE3fsoOpttpD3YKuS5EZVZ0qvrL3maaZ5UjVY6hRx2FtlA2b1K+bG0BzyOv1qUsENqmq5c1ZlSgQ7XTw9ufNKD5girvOE+/6SzLLpmN9ect6LN/jjTN6oClMaB9WDO+8tYQfxzUzjIh9Hnco8jQ3Tc8e37pbW6/OmZHRyHDgdOx7uTsRkCMkynBqsT8r/A2S0tB8t09tE89mQOqAIh6oEpSzM0dI4eBk6jBT6JnCqHN04VwZlFOxZE8y6T4WwFdYgncVzXPmNpbX4L8PqQTNj0HSsT2dVY0PCUr2zgB30+4isPmUJUX2qsCq5EpKSELyF+NWd//NZ1QLvMOeATDOlbbD5ouSWtlHv1LbFfnohvdpjDyMyVvbiS51gdlZVy76BUjCjqixmCnS/OSgftvHkTavUhbgklTlovFpYHw+UviXtu0uizR+8U4X6+aTEhZpqZk6j8nZBlZ1eCQvmNEFNxJMqwcatYbw3HH4B4S4qc7VYUvVEupPBDYKZIPWlZtKzkkdMKiQvpEbIzuIHVRvcsSYtecHMdfq8dZTek95sSL1dz3HslRVpohIbzKLWtgt/FRCI+0wRQ+oL7WdZk/Hmu4w7CcUJ2Rb6p0TzeSDcdvVaiOq9jQlzVuKIhMx839RrDHIwnM8bcJluOxOjIc5upRyIzViXGYaGHAy+i8yTBb/QSzKSdS1kX/3Au0taUWq2+BdNN9PHq9qbdCkTRkfymRwNvg/6ooaaWrYof2d9Pnc2um5GPTb9Rz2+7XPCf/ukyukUNWEvF8QsKYr6+BIFt4/EYCmFS1Ja9YnmbDSxaTbktmDGuv+dBX+sPthjWbng7hRXIkbxynJO9/WNUVnIF4EOCthZ8CfhtLO6rywKnssQLGYW5ntVv+zMqiIDMCk7vDiD/3BYPaa6JuuFmQqpppjlzw3cBcpCYthPRJ8JsyNHgdcBBss4XIzdtkmEY4PdBshCAfXhAiEa4uTIPlMKPE89fT8TK8A1Rsvp3GKbTLpNkISyi5R6LQXRvc9EnTko3hBvOv3cWGcMnHaQukb3tiFc1FrQvcQYzNNRuzQvJ90ratLZMhOgx9TCHMCYlZk9vDI6a5JFE/omAz6v1yNOZyaKgAmVVnIquHNezyVG50Hs5wN50yFJExlL7aItaWxSZ1jsCEV0nwcwgyFv6vN9xSFent9MgjsX/JBxp2Hl5egAACAASURBVLju42lf1/VhpDRC9hY7JvXsHkaKtxdeujNIzMSbltQK41t+h+aDFB0JKUAGtw/E50bfP5BHS7yNmJOuPf8wEieH1M/CXFNQTZvIsZIT7gP+qNdS/0FILfijIWyL2s9HmG8uyXaSwHyA8cFi5oxJBYmFtK+zI3PETBEZv4Lp/566KrjXuta1rnWta13rWtf6QdVVwb3WpUqheRTCjaFUcdSNQvMs+BPK68yF1CjfEmD7PmBCZrrzNcXMkv8f9t7c17ZtW+/6tV6Mas65yr332eeee+4rjCwbExGQkAAhWEREIESARIqEkJEzEgIiIEKyRECGRMR/4JDATrGEHw8/33vuKfZexVyzGFUvCFofY+5jvfeEE1vveDbp6qy71yzGHKP3Ptf4+td+nxf8Max3jPVrItZFuRGo9ouSVt7jPCv31gjuFLRz3xpVmKD45zK5csRdw3TnsX1a77xz0ru85I16z8aoKt7iLY3aSZq94fhrw/Ax4OrIj0+3fB81qcx8qrDAua45U2NsJvaWpe3cNIGn5+2qeDAbkvlCzZr0nMy9MH6ISBtgsqtf0QjMs+W8b3GNehePNOvdrn12VEflmdpRu7/rQ6L7nFYl3I6J0FmSCP23O9wpIjGvHjKJGYkBOfXQNcqqjXFVD+xpIoGqSXcNYu3PE76GWdmzhW1LzkoeKHfJkjLxvsP0qt5ijHYQj/HC2h001Sy5SukGIpowNGibsEEJCOHuHjNF7A8vSm04lgFVeWQYiY87ZVqOsybWTOqxpamI21o/y90W83rEfn5FNpp2lJ1V1abyK+sye4s5DysNQs5FUR4nZJwwcsv4cUsqnOdYCdEXNfDJkkqW/cId7X5UJq0/ZuadENpy6IeFLqDJUoh61P0hYse0djqbKSL9xFyYjrE2xNaqIlfGm+szAajmTHKG8UG7/c0XgkWsM/ZsSJVyeLOF7nt9/vmjEheqfSFAFGa0W46hLx3nC13CGGLjOH5d0tYC+N9Va4rRuPGkNmE2M2lYzqMnCqRdwO4dc+1pPllNI0M7/e0Eu98m6ueZ6mVgfNcy3RYO6ltQ4kfnqF4ntr9VFm8IOl7Dxwn5bYvxmTkYZDSYIKvaqedaE7AkCtUB6udM+xRXUgNGGP7wQdMVnyE1qmQtqrHrE90PltNvMikJabT6/uU6SK3+224zEIIlbYR0cpi3Rc1HmcmHTPUWmTdG0/eC8sFBu9ZT55m3DjslzCgc/3BDqMu1HjOb7+D4GyF0meqTI7aZuFt85/o/f1DmcqphqjRpDmCbYPo3v8W/qaKXds3KydbnyzruxjtV+qoXyxxk9cCOACbj25kUHJiMFA8mgFTKwZUqEkeLWH3s9FY4uYOB20SaDeNosVViDnbdAUuTxdWBEAybxzOnl1bnx2ZJQrNkkxnuBTca6s+aSll/OquSC5BZCQSI7kIsPG5g3aHJm5bUesw0K1mh7FCZH5/JD7c63mPS3ZKUyIvxM2uCln8TQqhIbUZGWWVAsxdSrSo9uaixWSk+/qjv7Y76XRYfdyWF0arK3JTzmBIyBvzbhAlKy3h9cMy7spsZRC3sN7P6V6NgR8GWHQM76O5ONkJsLPPO0/xwXv3eufbMu0pJQ89fJEiW6w9Km4htR6wN/TthfIzkNnLzrvSsZMFIJgNz4/A+0AOxqLEBaB97pp0mGYbBIzZhFu98G5SUkTXlMBbWsqy7S3qOzRlI6qeft5nkL5QYk7W3oNknMMLc6t8V7qSfof4cCNtKe1H+kroquNe61rWuda1rXeta1/pF1VXBvdZayalCgNdEH6CwWWF4UEad0hQydipewc5Qv0TaHwZiq52hsTFk61c/je0ToTWYkKleZ+ZdSSwrysJsPNXToGlOVjPazZSYb1UdyFZI5S7ZDkHv/sYv7tycwZ4mjOjdtnnrmb+6We9YszOrt9H2aD52rDGD4OblzrgoVQ/Kg4yCqrNfWJNI+juSILNgJrN6sdxBHxsfAraO+CowvlZMJZf7qfeYzx66RBgqzZrfxIsSdBaqF/V0mRn8OWlG+X7GP+mddbjvME7Vp1ir4i0xr2qUp/jLqp0qqyGRugpbCAZ4p3zbriY2lrBx+DfHfKveqPrHM+ZwVvVjCOrdFVnPHeOgz9805NYXwkYA90UXb+EwkhJMM2LVb5uLgiuNKsZmUv9r7hp9v8Ufm9LaCZ26as2Mz0WhRQR7KuprcuS21mFWGI8cTohz5NstTDO58ZjzCP2AuJ/TILC2dOcK/nXADoXkEDx2zLTPQqiFeae+u8UTLlm7fbMFd850PylrdUkzSlZoniOpUsXBjgkzBsZ3zWXI9g2xVtVWPZuR2Fz0Bjskpp0n1qoGj/eZ1CXMobAsgbhJVJ8s7mwITSa2mdOv9PmxydQvQuigfUogMHzs1t2A9vOBvC2qd+WYHprCr9bnb34H053Qf5XUb+my+gP9xfhpR2F+CEhvMaNootqBlaeZKtj+LtF8VlV/YYTWr3qtY23xU8KOkXnncaOqY9VrmZPnWpUtAWYht5EE5DphP13mDUY7uusXVW/9KSgXm8Jt7Wf17gk/U2/1GIThsXgHRwtBkGBovtfPOd0ZJAmHO6/zPgr2bFYlfd4q/zh6wZ8CEi12TNR/9kS60db4+aHFxEi1V0Z02FW4Pq0q8bwxiqR9gfFe2caLogdgX52OkwT1kxAaXYsXCk0ovloP2H2vfkwRKKqh6WfCbUO2Bn9OdL8X+g+CZMO8XdL9gMkQvUVMJh29slbrCwUhTwa7dySfyW1imuvVI5p9htEivdGAsC4yPFXUz3oeh1/NRGvIJ8f54DGzkLaBWBTiahDqFx3r+j3kCBtL2Oz0XKHqqD1NxG2NOc/EbaXkHKnKnFFygdmfMMsOk3cXikHllX1bO6g9jDOpqy7fMwaGBzVEp0rV29SkVQbMRucBPpOOUpTUzLQz+OMyniy2n0mNV197SeQ0L/oASZnUNSRnCK1e19OvM/G2DKggSBswLmFr9SiHXpm5oCzebLQXZr7RNLK4rdbvw9g46h8OhLtWiTVPJ13D5+mi8pY1un4ZkegxoxAb4bAv60ESGNQTnatEvCk7hIvc30SGU6XEFJchCDlZpDwuzQbjMmkwuCdPtrobMd0t6+dCY9J1KrZJmeKbSHKXeZ2d/u1gR00qNSGtc1dixp1m5b3/JXVVcK91rWtd61rXuta1rvWLqquCe61LiTC+i0gVYSw8w0+amDPdCrZXz5E/q98MVF0lZqa7imxFKQovUdOoSrqV7YOmkxWCwrwxVPuw+gGXMkMAqyk3sXUrZcEOUdWez0dSV5GdXHxTKDkhVVa9WRmkrTQJ7YvXl2nGese7kMm2pf9KPT9rJ7KUJKgoMBloVPUiFDXq7DAnq0zRXeEuznJJa6szpqQB3ezOOJuY4gZ3Lv7WZ4vthdGoWhy7DEe3eqtCl6mfNd88VoI/Juy0SDR6Hv3vX/DGaFdwyszvt7jDSCxZ6xIzsfW4t0HZwEYwOTN/fQOAe+mR4ku1gzJiJWeq1yLbWSHtutVT+892qObaI/2oHldxiGSmxw7/MmAWFm8hKsg5g7XKpnQWub/V1/AOgvJt5eWNfLvDvJ5WCoJ5PpC7BnMeWJK/8qZVEzPAHJSoME4wTGANedutPmJKqlm2qrITM7nxyNystAicRQ5n5eQ2tXJox6C+VIDUIjdelYbKUL8JdkpUz6pQHP6oJYuO/VzajZtPI+evSyfznAkbVV0kgX+bCkWkfISNWeeOmfR8Vc89WYRUVLf5xtN+mgnbpfvZEDq7KoexhnB2a/JPcxaSVWUECh0gsXrlkytUkmXX5Js77DmoIpL1GOebiuZVx9qpsiSnnrjpIVE9WaaHqApT2bWITUZGQ/PJYsvmgR3zqmQ//F8z/TvHdKvkguSE5mlevcipdZAy7mkgfdioevt2mZOhBUwmdRGZDO6TV/VqlpXiUh1g3ujzTFGnzRhX9rEMAXM4aXocUH0+Y54PTH/4Xs+bQPXqGa0QndHPtgv0f1iUy8nQ/OQws8MdheF9YvtPhfGhTApRgobkTPQGdwrEzjF/fUcqPQLaH2DBqMIHFP+8HuN4ozzU4b3uDEgS6hdDaC8+4+wyqdKfY6Oe6/TFt3f9POO/f9U5Nyf1pZaxka3OeVX5PNUpEw/C2wdlnuvFKO/1Wqn/0yd93rGowGW3KruMGQ1xc+EV63koSv8uwGhhNEgQxoWtmlUZlCTYg6ZemaNbmcl2EFyf2fyQcOdI6Cx2SLg+rsQR//sXsrOYyiE5Y4+T9hgUj/yS2rXu5qS0riE6YDWVUWKGedZ1KiT6d6poHv4Y8q/P1PVMnQwpCc5FbGGZD4Nnfqv1mMtx94+G7e8j472+RnLC7v+NmGHWnojzqHPuUHbRjIFtAwJmToTWsfszof+weHSVvGJHUY/9LEiV1+8ZE9Wfn7zgXpQBLPES52XPE9kY7NtI3NW6jo9B15+xeMIbr4+rHO1TYnivnvFwV3Yj97Z4jTP+2TEnwYyXpDKZdNckV2kdN2RIL8uOayZNmriWBapXQ5gvu6F21FRUs7CjP1vMDOnV4EqrhTvrbqau4wnbz0yP7eqtD7taaSHbq4J7rWtd61rXuta1rnWtf4XqquBeay0JmeYHhyS3qk2b3ydiLVSvGX9W760d05qIhBGyCPXTsHpeU2Xxb9OaMGOmSPNJc9jt8xEz3iApY/dFXhLRlBlnCXfKt/ULsxX1krqXoIlkKekdfcqXzv2onZZuP6jf6TiUu/Qid3mnvFfvlJ0YW9xJMJE1OcWE8vNsVo4niZX9J4MhV5kUL6w+hPWuNkVV0ORseb85cZor0iZSP5cO0sVyJ5SkG8EdZGWnulPxWt4Z3DkXH7NgnGBv1BtlzhPz4+aCRnSGsKvV0wx0nw8Yb4m7GpsSMs6kyq00C3MsqmgvWBElTSzEBDTpzJ6UcalKX6EoxKLWb1uoKyUtzBGS4M4z5nBeObWEqO+REjlEpRvMgdxe7rQlJVV2ffFie01QA8g3G+TUr8SD3NXKqiyJNeY8kVNCKn1ubmtVc50qdAubV/ry3zloqhpcvHg567jdduoPtpa89auP2PYz821FaITkhFgLiMG9qrzgz7UmYUmmfg7E1vD8t7qVotA/GJKH5iWTxVA/GcycVvVBUlYP2WHS3HqnqW+588ROP/fw4Ehe/c/JCvU+0z5f1Ki5VWrCvBXsoKlmsYHh8eIp98fL42Ol/MwlccmMUakj51lVJqu7JqEtfu4+c3LCfKPsTzOpz1Jmo8lGFNb1rOPWnSBsYd4Ibgm16wy+Vy+5nZR7baa47qxITMgYdRfh84nh3R2hhfHdwgguiYpJyHVi7qKyou8mUkkdDK2SGpKD468N0QuSKqoXHU/zQ0P2BkTw37+S7rfkm81KaDFzxARPvI1rR//7+wO1LUpRMhx+VROLosfgOQ/NqjI3n4Xz10L3A4z3DhMd7pyU2VpOv//dE/OvH7EvA7b1OueMwZaErmpnmW9kXR/mm6Q7BPUCDNa0vHmTVZHPquQuSvlyPSVE6MeLqlXmTPYWiQl7HHHnWn3ITl9n8+3hMi8lU7uISCZEwxQcU6G8hMmRRovUkRSFejPxZXkfSUloqhkjMAZLzrJSZFIyqoTuRuZ3lhgs+blaySTDV5H6yTLcG/KjwZ8y/pSpfjqtHO/cNcRtrb7qWcdNqjWlCyBuKlVF3yl7mzmQdq0qmADWqIrpLMREeNxi+xl/LvN+sIxnR7DK8k3REiZLLkp7ngzmaHFn3UHwR2Vg+1Nck/FcBkIitQ73+UjuaiU5POgOVqo0PS1VFneakZQZbwxmPZ2qziYPZFEe7CTUr2XeTvqe9T5ix6g8632/sn51TdSf3ac3ZJyVBd7Uq7JtDj3xYUvYeHZ/NtI/toQOfFHrQ5exZyHVkKpM+52j+yHz+jfLOJl17aEMM/equxPL3wwk8Edh3hUiRZeVdFJ2QyUrP7x+EapX3Vna/KREoGX9SU6wU6b5qcf0M6n1uMO0/o1RBhXm+HNSxD9bVwX3Wte61rWuda1rXetav6i6KrjXWstMgeqgLMBFHcgW6r36Ue2YcKeIGePatSlzRE49abchdR677zHBIcOod66od9OESD71kBO2qZEvvVFBWa0yjLhP6rHMbY0Uxt3CZDVHlYbMoIlEpqRjMQdV+SqviS21J4sgi2JXvEepq8gCzWsiNobxPq8Kqu3VM2T64hV2kNuI6fQYkjeYV0+8CUidkGdPbPLqgYxNuZv/eCYhjMHhn5wmTqFJLaGF1CZybyGpQlVCtPAH5ZwmLzSo18qORclayAQi2DGSnMF9eiO+26lvufgqw/sdZL2OaVMTHzeYkLB96bB93Ok5FGUEy1FVzsWn5j+rQin9SG4qVdW/oF3IaSBvGuQcEGOQpwNyu1WVolwbvGjS0DiDNXA4kUNQ5RfU/7qkCYGqwf24HkOqK/BO8+In9bExjOSS1EPKyrAtbF+9eBZz0N2A7KyOvUVJNkY/j3fKBwZNW2tq5DyomlN5VYOK8tF/3THvLOONKhWhVf9YtVcv8/m9xUyaMDbuDHZeEqwKoeBZfdTtU0AihE53RPIiJB9mVfhAWbTOIN6SrcGV7v/61ZAqUZLG28x0X60+VoDQWE2SOmSqU1ZmcTL4S/AUEsANWbvQE/hjWP162SiNIm4qjBXMW6+JQUXhPX+lnrvsM9lmpgeQ0WAHIdRfdN4bmO4T47tM9WxxZwjlUu3/2CoD9xw0ySwo3zo86o6EPc3gDGFTYabIcKdqT+wunkKqpB3dTcS4RIyC+22zztv+60j2Gbe32EEY3ikfVj7qA7qfItNtp4QWd6+qdeeQWd9j3nnq18ypN7TvNN0wZ+E861iYgyVlQSTjXMJ0mdOjw78sahcrH9wEmHYGOySqT6fVp0nKmuaXM6G20DmmnV/X2OFBmHa6ixTqTNxGpI34WudtmC2T9+AyqbJUrwY7QfWm16r5POPeBvKm1V2ft554v1mvtdmfSXcbVbFfR0LbMrxTKseisNY+EJKhcgEBGhfYJ0NdVOS6DrADIxlrEl2l43Tf68V2JjElS0oGYxPO6HufJpX57ndnpmAZZ4cxmWwS6XaGkuQoQTh/nWmehOpNk7rsWZneiwq3zE+zPyvrt6j/y9phj+P6HZG2DWKMMrRN2VncbVTBXYg654nsLd1vlXCw+foWM1eEzhN2pbO/SSsVpHrRI6ledfevec00n/U8+HP5Pox5lQ3jwwYzBFXQyw6VlN6J5ATxVulAfaYcIsnqrs10o3NBku7uLd77+pDVx94H7Fl3ObO1KwuYnDXRriROZqspotkYJJXvU2f1u1UqTr+q8efM5sfMeKOf8+2v6c6kmYSwSYVsAX5f1FWfmduMGXQ9kKhhkEs/iczw8f8c+d2/WzHfZkiq+i7+2rDRJDilBWWqY2K8Kbux5c8CSSXRzhlS4zD9rGP706s+oKl1TR+uCu61rnWta13rWte61rX+FaqrgnutL0rY/D6RnHZpg3Yy2jHR/u5QuuhnTRArahXTrKkww6h3S86u3ZrpXpOasreY46j51l2DOfXkriHcKSfS/7hXla2t1TOZ88/zu0NCjr12zHsHlaaSLdjXdLfBfn4jbRq9kxX5uYLcqG80ixIXmqfAtPWMj+o3AvUaJZ/JXcTUkXzwyGhIiwo8XxiISCbtYmEF612rP6jyi8kYModzrepdyfjW7lj9faoy1YthekhrJ3RyUD+rnzG06uet9qq0TXd6HqqXfDkfMal6C/jPqkCkrlpVROWrZmLrVgUXihoeM+b1UJLKEskuKWBGlYZphtoj40R8f7sq6RJKylnxleW2VgU2y8r8lH5S1aCRVVmR3RZOX/itF//rrOpzbuv1WptTXzzZkdzVetee80UNM8rVpVfFiph0XCxd8yHqtXbqO8zG6M8hXpLMQiSfzlAtfuJAtobhKz0PsTJEr4rrtFO1pv+Q+e3XhbfZq6LrBuWRIqo6zV05huKRc6eoSV2V0W7w9dqIMjuzeoqXayZLuhJQP6mffHhfY8fIcGcxUVMEQb23Zsrq0Zsz01ZZz1LarZVmgCZnzVnVsJjWzn5Qz+JS6bbDDDPho078VEL8sigpoXo22FGVmlgoEDqhwR1lZegmB7kouHZQr3D3hV/eHqeLwtV6pYA0lnnrGB+E+T4iS0e/zYjJmNsJshCPyqKd34WVbiKFTWuCJprFGmIjK6dbE68SZk4rP9qO8UJyqC3+qIlwx08bZd2OZvUTmlkINxFcQlzWJEP5Qo0/q4ru+oykzOaHeR3ncafnN942OhZFyN5oqpwo61ivlWHzfSY0gvyknN7+g2O61/FqT4bmzRDaTNwlzATVPq/UiOHRU/2YdN6J6M7HnJQRC6RtqzsFlSrXsRL1/DvL2eq8nZ4d2WWO7yZy75C5+KvdFzzewRDuAuZkebkJeq6m4k81WXnpkyFtg5JofEYKReYpGsLRQxS63zqmD0lJH+U6VnvBnWD7XaR5XpRGo3O8fJ/QOPVve7cST5JY5VmD8q7HL3Z3ctadqnP5oth2Os73Z1W3+wmZ7WUH65RJz4IUFnLYZGSS9RgX5rmdWP3s062jeZ70+0iPiNhV2PNE7CrMcSLXFokXPrikjDvrmpNqo9708h6hVTaz62F8zDq2o74fwLTRZNHqPJMqi2QIjy3+02k9Z8v4y67sZKa0+pRB17+w3RI6y9wJ/Tv9bjp/XIgIGZmF+U5JGeMHmG8FWxTY2Ol3YHa6HogoISZWeoxVLzz9GzUmQLSZ+T5SPdkLJ7zKmFGYN+APEGoheWHzQ1i97eONLWltDpvyRcVdeOilcl3xl9VVwb3Wta51rWtd61rXutYvqq4K7i+kRMQC/wD4Luf8t0Xkj4D/DXgE/iHwn+acp7/sNUiJ5mlmeOdXgTYb8Ce9ozYvR+14P+hdNADWXpTXYdTfB+VQLp2dyRtS7XBPFz9t6qq1Kz3XXpW5pB49SVnvupdOemeUb+osqfXIEFTpWvh/MRdWKsiUSJsa+SJxCWfJInr37wxmztrxnoTcqHcpWvVO2Tqy2/bI7ZlTX3O77deX+fTjrSqJRw+VpvnEzaIAW5qfLNtu4DhXiJS0mQ+qNEwPBr+3yGRI7yfmVKvXsCgksYO+M6pM/WCwTjj+2uGPmeZFj3F8rImV0DzPpPoBcqb63fN6HsRbZAykribVyh51b+NKo8hCyU/P5F2HHHtVYRel9TzqdbTqXWPxapXBkFqvaT7eYnPWx4qo0rKe70qz143B7I/QtRfaAcDLHu5vVb0NUX1yxQML6OMKD9YcR2g8MkyYqShj3qkim72OM8dKyIDC2U1K5GDsVzUn1/7Cy3x6hZuterjeNOknbmvGW/394TeG6U6z2F2vSqQdLxxHN+i5dEdVcLNRxXxR1KYboX3KhI0lOcH1CQlp5UKvytIU9JhyZr5XZWLx2Fb7iWnn2f+x4/lv7FSxm2X1bCcLblYFZbhTRTA0Zk0qG2+F9pOqyskJtjbKUC0K8Oa7YfXwpdrh9gOp9asXLnQQthnaiDlZ+m8C1En9pr7MO4F81DQmOwrTjXKyF8qImYTpVjh/VeHGRGgspjerfz9VlmzUdxxbx/AucfebVz5sj2VaG24rnX87P3IKFSEZpuR4HfR8HYaafvBMpqEdiuc9XxTW5IT51uAGo/77p6C7Ig+FTBJV0ZQEbjsTZ0NODnev3r4YDUyF63o05C5i3yyxXlR3pUa0P45kr+mC7qDq3aKWS9BdBvd8Im0b0n2N7SO2+IBD41dP8flj8dV+EuxYdhwikGDze+H0jQGj49J80VA+fbUjOaH+3JO3HXZ/ulBkjJDutsgUGL++4fzBYieYQLnfQNzqtWXvkbsZXj3pJiBjWTtMJviIzMpNZtTj8K9lvLSZbFV1x2akK5zckoiZkiC9JW8D800mVQk32UvnfYbmKWPnwkv+/qg7ULsaYxfYOJiXt1XFkzkglXK1AfX8GyVmmJcjuWt0bVjWABFle7/syQ93+hrngfBBCQfNa2S8NZhZd94W3/x6jGXHIsvy76qizxu3snrFWGwfVJWdI/G2ITYWdyge3AyIJrVlIwx3ltDIei3njWivhqf0vej7jXeXBK/shPlekwfNlHAv/UrCyc7ortCpL5QYo+u3lQtpIeguZ6zLurFVBXlhZodtxu8N4Q5kNsgkqrpOZdckCG4STSHrdAdv8dkCzFkT/sJO+b0SdN1adjPnbcYZMEFIldJAkhNe/9ivJBqkKNln/b7BCAkQs/Ctx+IxvpBE/ry6Kri/nPovgX/0xf//74H/Ief8rwEvwH/+L+WornWta13rWte61rX+BddVwf0FlIj8GvgPgP8O+K9ERIB/D/iPy0P+V+C/Bf7nv/SFYsQfZ84f/XpHGVrl3GIMcjwTv36HOfYXX1RdAfbSpT7O5E55tSsFoa7xn47qvfRW77pK4hVo1/7CK0w3LeY0QluvqmBqHHYKpE49kxggZtJGzX7qx1U2a/YWcxrXO1qAtKnVs2WAnOk/eGIlmCBIUXDrbmY4VsSDp/cBYzIxWD79oHf3ZCBKUTAE92YgsSYqKZwRrElMwWFMglFI+3JnveSZV4ntzUDvMt4k5r0qm+5mQraZMDrSs/5bHAVp4Lyk3CyyOjDdemWr3u9+fpta+LaL2ogI9lA6TVdSgVevcqMEBbNf2lsjeZqRulq7U5cksuUUSMqkch3FqgInMSPLa3inirsIWEO66TAvB6CoWTc7HU+Ff5udVZVloWrkrKSGQucgZxgnTWADiIl4t0ViRAbdScjTDHvleYpzmrT2doSmXlV/+93nNREup4jMlTJDvSPtWk3emxbWJPg37Q62I9z8WeTtDy3utKj1qlhoMpWmz2UjK4HADlA/j8w3FWbOuMOsnunX02VMblvSrsGeJpJRv1nybo04cAAAIABJREFUQiwcyPmmY9wZJMD8WOgJov4/YO1AHu6V9BArTaVbu7E9DO+E+kU7s10fNBWoqFHTfYXp3Ko6x01Fat2qEI+PierrE483J6b3jtrpXN74iVAG89vQMN5YTruGCKTeYc4WCct7gGTh8K3BTobd7yLj3YZ6X7ranYA02HNgvPOQhZAMPx3Vu7/fd9TtTO0DtQ+Ms6OrJ+ZosaVL/9u7V576jr1L9HTUP/18vp3fG9rn4r/PYAf1PubiBxzvPCZoYlIG9Z92F2k0B6N+0jqRg6wqrz0WT/GoCuvwoabaB8iqTNsxYoqyKP1MeGx1Z2mcsWdLqi3DY/F0z5loBInqmzazpiou6VUITO8Tw68y7qDrzrQV3FDGa8ycv6rwp6RUjDFgUtJ1ePkczpB9rQlhY2Z8EFJ9WU/cwRCrTGoTnB1yO+vyXBRcM2iSWdhGcpehSkhvmUuqY24S5mxJTYK+ED7e7Jr6mPcVZoaYhPmrCQZL3CT9PECsc+kb0O758eOW+scTZgjrupc6S7q/UVUyJNKmId5U2DedFLFrMJtadxrbWkkLQ1j9pzIp2UVudjr3jcA0Y8tOot0pqQQjq/fa3sykJ71O40Ok/mwZPign1vWWm386qN95ER6zMq5JCfs2ELc1/jyrPxgI77ZkJ9g+loQ5i+8z03b5HlGahqbj6fxX9bNcp6Nw+spRHQz+nIi1wT+ldbzb/Yl0u1Ez+jgh2eu6VxjzAMYZ3E9vyLt3jPdC8pnhMa9499iW3hCbtc9E9HtLgr6JG4XhY5kjNpMThR2vv5+6pPMjAVaJDLHJFx77rGM7eV0Dphv9ufvx4iv354Q/RkxMjPcV9fOoPSaF4GIah/3xFRZV+i+oq4L7y6j/Efg7XFpxHoHXnPPSXfQ74Js/74ki8l+IyD8QkX8w5eHPe8i1rnWta13rWte61l+puiq4f8VLRP428FPO+R+KyL/zz/v8nPPfA/4ewK19l0Pn6X4KuEMhEFjBP5+VVWot9tOrqnNz+f2ixunBoLdzercsxTfp+0m9uaceO87acR+SduujvktN28nYH15U6W1qUunyXnyfUjpnZUnSMkvqSUWunHavnsqd3qxd+HosgewMcVtjpkholoxsCN/rY6aqgk3EnA1T7MhdgN5ixvIeTaJ6sqrQFH9UtZdVUfNHGB5hKgk+/bHGu7wyFLNRhYIonPatkhiyA1dSeJbu7Mkw3Sl3OHlDVcnK2nVnZfiG1lLtZ7ITwm2tzM1SqaswL0ds6caPj7vVp5QaVShkCmCFdNupJ62otfEPvsLue1KrSWAyTqp0FfXUnEbmxw3++UyqHOmmxX7aqydu6dLNGRlmUqdkBJkjabfBfH7RX+82q8or+6BMW7ikjFFU48W3m9LPOme1QzwqySOhxA4RxF/SjiREpG1Wjq96IO06ZuX2hvxWFF8a5Psnun5i+rX68rZ/diZs1G887ywSoX7JtE+qWswbQ/0aOb93xaOe8YeZVBdO5lF3PewYdX4IhG21ek+zMcSbSo8/ZeLGY2JCkhCbomh5JQFko5QAyapumUWMdzDcC5ufEq9/bFX9SoIt96jjY2LzO8N0K/i3TKwN3e/7lb9r+0AqSnuqjPphRWif9To+d4ldN+JNoqpHahuI2ZAQTIFVvt8ccSbx2rbELByGmvO5Jh70WtiDZbxT66J/E4Y7g0RVgEBpFfPGEKsKBG7/BPr9PYdfFzrIi0XeWt6+SqRO/Z9vo1DtDeOjXoufohIBwjYroKOwf79UPwGa54Ad4trt7gpFIRt4+41Tv+dzjZmEZDJh+Wo8OOxgiDvAQDh6JF4UYkTJEfrZdKxIa5CXjFvoJrsGmVXlx1rc60BuvdIUgPpV/dPTrSBByRjzNq/Hnh3Uny/eUH8CO11S6cjgxoTrI6G1SG0xjVs76t1h1B2RMVC1XpVi55h3ghkXnzCIlVWplRdLbLXbHYoPVaB6sYQ2IwdL3ERY09YEexbs4AhdwoxmTYnUYzDUT8LZQjZWz3OT1td3Z/V9S8iEzmJCZnzfgZHVV+4PE+G+VYU6JOWgW8EUWo7MUVO8KlUtzWFApnn15+N0xylvLTLMunYYs35PnL721G+Z/kNJ53uzxMkoNaWcg2zBjII/anJgFik7HyUBcD+RaoMbZ90925ddqLKGute+0DQsWMGfPMOdYd7p77sfE/2jKeq8rh3z7pK4iVGfbraG5inoPO4qzLmo2HdbVYut0TU1RO2VCBH7VvyrcyBtCzO57FYN79PKsaVOultZFHl7NqrOF7HU9eCOSvUwwZYEvnz5riw6W2r1uTkb7FlWxq0ZzZpuNt6X8durmusK+10iHH9V0T7pWjB8qKmf5pV+kssO4fr98RfU9Q/cv/r1bwP/oYj8+0AD3AD/E3AnIq6ouL8GvvuXeIzXuta1rnWta13rWv/C6voH7l/xyjn/XeDvAhQF97/OOf8nIvK/A/8RSlL4z4D/4//Hi+EPE8kb/A+aGKIM2axqXEqqis0zuMtdMTGpl7GRS7LINJM+3ANg3s6q1ha1V449uak1Hxv0LnwIyP6od9mVJ5XMcQDpg6azGO3cJQFz0DtTVHmQOeLmqN7bcodsn1Sly11Dah32ODI9doRG/YkI3P4/+h7Do2F4p2k1boDzR0PcakoSsCqxALFL5Cbh3zzjffGHeeWAhuIPFKsd5f6tdJbeQGw0Hcf+VCFJO4+/7AHNXcS9OpLLuLOqBGZkNRJJonSIC9OtR5IqfP7z5VXMWw91RUa9y+oH1LteexzVi2ctZioEhNpiWvXqmfNMbitVvGunP/fTRZ0Vwb/0MM2YnMnJruo7fulsjvq8WZWDLAK1U5oCrKq7Pq5WisEXqWQSEzln5dkC6aYjbmvcSyFuLGpuQr142atXd1FpvFvv7hcFyzy96Wv7L/iY3kNdkT49Ye5u4XjGDLvy2gn/+UzcKYP2/HVN+xTxBx1vc1eRvFC/ReyQGO8c4b2/+KyNIKmCrGQFe55JlVFFCt0VWXyFsa6pP51X3/KitofWMDwYxgdNyUuen3kmSYI/CeO9ZXxMuLM+7/wrfYw7L3n2aEdyJfQfm4tPuA/rfLR9ILZK3Wh/VEWk+9OW58MDT3XGPfaEp5b6J8vwB9PKNjX3RTU6eNyrJTUZMwjSLl5mTQesn8EfM5sfA+4UL15yA2aMTHc1EhKxtYTGMRV/a7asqpHdOzCZ2CYmA+6kj2k+C7HWdKlYq0fZny+eRjdkqlelrtj9QNzVpFrVagA7J6YbCDdJk9u6iH1xxLbMKZuJD7P6cBNIb0HymlAYGvVcNy8JOyTmrdXOdyOaLFjGmz1rEpOO2ax9Bu913jXPgeQNh994YpcV8WvzSjiQSFGNhfpFcL0m59nSub+k1JEzsXO6XntD6Ip33qrP2VSO7HSM9R+Uo7xwuDWJUTC9qpfJF1b37YWfWr2qAn//jzLPfwvss2Uuv89tZH4ATFbKxDZCE7GfS1JZ2YW6+RMd02GTkbOylZfPGFodE/4YVLVNefUxgzKjl3OXNjXmeVBaTlkrhKJWpkSuK8x5uFBVAMTp7t9x0NTLyhO/uiN2JcHwnSF5VZOHj7pjIBH8cZHSSyJlp3MqWTh/1HRMW+ZVFt0ls4Mj54w5J/3uPC/fi0kpNr7THaJOGB6F8UGf338QskvETcK/qDpavQnzTSEQ3CgvPTbC2x9UbH7U5Dy3+FtHJcosCW8yzcoIt1bZ50B8f0euLcevLW9/I5JtprofuNlcLIoxCXO0iGRCsPRvDakwj8NDxm1nDOBcJCXB2szYl50bl3A+4gFrEyEYjMkYo5/hfKhBoG+r9TveDjouX8uYtaPyvbNx+D7jzol567BFqTYHpV/Y0wS/5S+sqwf3l1v/Ddpw9ieoJ/d/+Zd8PNe61rWuda1rXeta/0LqquD+girn/PeBv19+/lPg3/rnej5gjgNG5JIi9nogDyPS1KoMzrP6Gxd26uEMlSeVdKule57KX7r2N23x/dSq3izdxbHIIKNy+ai88korh8yRuNXh6U6jeoCdJvXkxiPZXVJMFiGj9phB88WTt5jCXg03Dbaflc0aE+O9EOtM7DLuXDyP5aXmXWZ60FQzfCJL8Va92rVT1h8McRayu6TbZFM4s8Dr6wb7Q40/aDoPgJ2E0FiGb2fkZDCj4GBVgrIAJ4c/CqkSbK93sd2niD8uiUcJM6mPMLSW5vuz3qIuyqG1qg4UNXBJ1FkZtSKEXY09Kf9UpvAzhqxMsxr+5oBMmbTr1Dv9Zd63d+pZK15brKafybEorG2NZK/XYNMiPz4hHx4uPu2SqGSGSTmdzpKb6uKlyllpGo83pRsZ/cyFYZu9XTuS9TOLfoaFs5szvL4hm05JHFb0PQqtQQdKIoeINIJ885G4aTCHM1/K6blxxM4xPKpS/vatw4TiV8yQnCF0QnKW0ArVPtO86nUab4VpZ6kOkdNXHr7ybL6fmO4uXFPXJyUbGIGYsU+v5Nst6bEp1zoz3sP47YRvZ4xNNFVQLitgTKbvK+p6xgZLKP9eV/oZp8lx3FdUT5bQCdNRcL3QPC9KU0N26mXLJbWs++mSMtY8aVf0+dvA3HskCDd/mgkbT9hdKAPxzat33Gv6kRsuJ7HaqxoosWTOv0yahpcu7GgzBJrvZ86/2THeGvU4ljkVukz9JOtr68mDtAu4U/FcW5hvVFHNAs2zzkVfkp+SFeadw5+icq4PI2ay65oxt40q41GQ3UwerfKGm6IAvxnitzNp1DlrZmV2Lr54PV7ovh8wU8S/WUxQaoNZ/IK1hZAwv/ukDGhjSDftyhfv31dMW4M/g/8nhukWhq/DSmj5cm2JNdz+adTUvDVtLeHeBmJX0f7jT8y/usceR8xY1NOUsS9nwuOGZGVNlPJH+WJNE6Zbnf79NwEzKDc5dfom7tUy3SfaHw3P/7qOGS6N85iDMojjVr3kVAmz99TPxW/dqBrff4D5Rrv+3Ukuux6OVfkeHiv8MWL7iPnT7y/++rbGTDMyB+w4kxvtvVjOg0Sl6KjfeCJ1DTJO69qSvdPvKGO0v2DTInPi8K2uHeM9ZKu81/Z7x7zJhMfAXPiv4TEo23c0zBtLtRfmrfJjfSGsaKJfIuwqYm1xfYU9zdCWa3EeSfdb5psKd5rZ/zXD/DfOtG3h5Iru6hnJhK8MKYnO+TK/4/ctsdbrdvqVMO88/pCxk54jf87ULzPVp5N+D1hTdlcrTFGysze4H/eEbou5mxBRdXUq61uIprTZWDbtiKsScpuxtqxvo0ckk5JgTCZnISWh2+q67G1kjpaclX8MEGaLL2tTs5mYJ0e4mTX502VCFOZ7u+5a2LOhfjble0PIYmg/zZcEu13DfFOpx/kvqauCe61rXeta17rWta51rV9UXRXca10q59UjSfH0xI+P2P3p4mXq7mCYoC9+nboiO1UoSFn9l2MgNzXmuXhgbzaFX2sgRlLnCduK6rPKmxKTdrQCctSu/vSbD5j54r9CRNVDY4ibGttrFvdS7vOw3q1lK7inI2lXOJCiJAXJmlZWvbWETr204/1FGcoG0lY7e/MuYFwizYvEANmDOwjDh0SuE7a/JEs1T6rqHI8N/p/WuLP+/9CV4zuVTtgg+P3lPRfv5LzJ+KOsfrvtd2ntCF8+2PjgaD5nzBipRiUJSEjqkwVMiOqZxihvU4S48aogAKl2mClipqCKqzEXFaT83p6mlUWrL0phHZcKUf2zzhRFZFKfV/HG5sarV7F4fGW3gX7UMQLk9ovXqjz5UDy4ZTzlx7tCzJjUR9s4zOtJs+NRTy4pYY5nHRPDSA4RbhQUKedBx1lKpMbpa4WE7I/ku+KJnGZNQXs7wLsHPR8l3Qeg/3qDHVSlnDaqqMcGxk5/X73B3P38+k53wvignzHW6ilrPwmnb4TqDeo3S/edfsbUFCV4Stgp6Dn2DqYZV963f++wE8jRkp4docqaPLUMxzpBEMYbwZhEVQXCbL9QeBOxTpjgNOEvKmFgulm8p7KmNNkhq+9x1iRBgPYpYYIhtpbpTrC9+jaTS1Sf9finqiTxmUxyhupV1D9eVFzJ6Hiddf70XzXYITGU89T9qHPYFU5zNoIJrN3cqcqFB5uJ26ipYbcT4a0idIuHOdN8Nsy7jB0KS9YJVdn1mDtlkyYrpK4iebsqyADHbyrGd6ruShZV6BL4l8L0fB/wNpEL5UTTnNSvCnDzTxKStcu7+64HI4StJ3QWiTqH/H7CDUGVyH6EEJi/uV2JGdPGMG+F4V2GrJ5LczarP9YOmvhkBx1bh28c3edE6Mq1DkLY7Kj2gfT5GbtrtYP/7bLTkbaFt30KxNbSfFbVdLrR3/cf9fzFOuOfLXZSPqqE8j3QZrid6bOn2hvcC0x3GSnqZuoiprfIaHRMverBL9dJqTD6mtkoVzX39ovEObA9mEkT75IVXEwwjuSH2/VzYMxKeSEm9eUuOzM5k7d6ztNti/vpDb7c3QlRd2+6Bjmckf0Rebzh+G1Zj//mgboKeBtJydBI5r7riX9Q/N5u5qfjlmHyDNuKoanwb8J0dxmz7ScIjaP9bGg/B53jn99I2+K/b2riFwl3kiAePafp8l2WzxaJQnYZ2QR2Nz1tpWv4i8kMdUv1VBTdWb2rzZM+t3pT9Vb6URmx3qmCXZjnoDti4atbqn3m9LnW+flmePuNfo/YV+Vh520gfN+RnSab9Xfl+zgI/qD++vE+UT8bbA/Hv7ZcB1VgU63jJ1slLJzLzs/uHzvSDdBk7VvZaCqo31vdOUWJMNNNZvOdEkP8KWlCXFHjDeC9Xf9u+IvqquBe61rXuta1rnWta13rF1VXBfdaP6+cIabVd2mPvWZYl872XLy1eVcUsy+UL6y5vIY1pHflzjslBKcJM95hcsbP6ZJU1lXI2GDGGXk7IdZgjuOq9mVXmH5ZM7Xd01EVimPxAnq3HjfO6l29KJkBtGM/bWrl6BpD90mZfy9bVX6gdClbsEeDCWDaonpSJNrSNbuot6BqyuLFM8W+mo5eO5Kj+qSWZClJRfURVT6qV6F+EuaioMw3GX9Srm5yejzVPq6kBID6JawcQHuaNKHr8xvxvZ5nmYLyHaew+pj9S1g78yVnUuOIuxppPabXzm5zKD4m6co5tnptigd2eX72ltiqOiQhKfliEnJdqWIA5K4ulICoqWO3GyU2FJXFvOq1y5tW3+vxTq9vviRqYQQ7R5gD9kUV5aVT2jy9Ed/fYfcnfc2mRqaZ2JVUqLcT+eFWu+QT2JczPO9VhV7u9o360sRt9N9m9SxPt3qt543h9JWOzf6rxUOYV7WerIlg/qi+wukuE24j+JKyU0fSYOk/OqpXPY7zO0usmnIdoPk8EzaOalJCyOpXdxcObrXPhPYLZcfmtZs6W8EdLN3/7Tj+QUJOQmoy1WvpdG4zsku4E9SvqpqZGW7+ybiOBZkuOyS2nxk+blZ/5vCgKUrupP63VMPpm0R+mJFj8RP2FnsyhPdzmROmzJOLHzFZVYNtn/DnQGgszateS9frf2NX4c4RszOY6ZIOaD9pV7tkIYuqheHosSezKov1s+AP6iXefK9UAH9KTJuiur1Eqr1SLMLGl3lrvqBVQPU40NQzbz/skNEwP4aSzARyLqq4ZFINac6rCrlcyyzK8Dx/0+Lfos7x4sEGiK3DDI78Bx+wb6rif7k7lRzMW11fTID+q/Qz6SlVRQnzKE/bCaEWXBFok4rKmDkhv/5IqhzETNwVX+ZLT+gqMJCsYe403W28EWRTzsNG38NMwvSQCNuEPxi63+n4O/2xkiTyXWBODgmqMC/Kpe2VCR0eAzHqc9zpssNlwrKmCanK5KhEnKmogmZWRXC6sZrQ1RrsdyN883ElXsgcSF1JqvMtsuxiuUUG1rV+/e+i3i5JZsW3K/2oJKDi61/6IFISchasyVRuxpvEa9+Qkr7+j9OO4anF7a0uISflTqv3e6GXgDsrM9sNuluWK0/cqYqcyq6bPSub3Z1h+ydeucfod8qy2+KPwvDOcvqx5tCW8zQKBpAguAHqp0yzT9hxeb7RNXSjNIfYemw/Yw7DSi2SY0+6vaP7lJh+a5Wtu81lIAFZiUB58koWMZl5mzGlX0UKC97MSh0iQdiyzkmJQryNIBn32WN7nddz8fiOD5nmk/rZh3fQfSeEja43y3edJF23koNmn7RfwBmGP3ws463syMYvdnn/nLoquNe61rWuda1rXeta1/pF1VXBvdbP66cn7UAvldtaVdq+J4cAXUPuGr1LRP08ch61Q93Z4uvUBCuZVLVLXUWuwMREtlYVm7eBcKOKlnsb1rvsfLNR2sHTK6CZ9OFhgxtmpQSEwhUcxtUbKqceRJO5zGmEBPG2XbuYF0+mxIicEq5vmTaWaq9pLPoiIEsjv4P5uUGyrApGtupDc2dD8kJssqaYlRvI6UbvvO3JYAf1Z1YHVuanCXrn6/YWM+l7hI2+LihjMgv0X2XcUTu1553FH+J6G5qcIDEx3zWY86ykiE27Uimmxw53nLCvZ8KuECuMYIYvkn5EyM5gUlZ/6rEnLZn1IanHdpwwMRLf7VSBMIVN+HJC5vLYpN3hMqtCsXBoU+PU55gzYavXWU79xQdnjP5cfLDxbot9OaxEDDPpuMneIskoz3fTYI5FZa68HpM1yIwqMnNQxaaMH+agXcuuXPcvPcQsPuFUjqVChpl4266eSDtl7CxMNxcKwLzLl652B6HJDB8yqVa1zfRmxW2yd/jx0qGOqJKv6UTqRZScia3jvNlQv85k2SIxM28LYzbCXKm3bvv7yPFjIYgUkcW8avf43KkvHMDvzfp7fxAkG9wJzATVKamPvCiX5hyxZ90FSLVlbhqmG7tmwVdvmVgL1VtmfFSvK0COypvV8aQ+QUajyVV+yZEvBIKTdsZLzJiYIWaq/XTJpO8D41cdyWlnf7JCdcwX/nWEeSuQLdNt4ffuHdl+6UvXLmt/1MfHTnDnRP2ia48dgqql2WpamxNcHxnvL+zlufdMx0p3I6qkSpZc+K757KBJ2IMlA/YsmNLt/fLXheYp43opCWp6bSWxKo+xNshdje0D4a7FTFGVtuLFXnjFJqon30y60xMb/cyhy9hZ1dvlvMQGwheecH9OHL9taBtLbAz+qGq5ngP9rG4/8vbXb3j7I1XLUsU6RiUKYaM+3NRFpNd0vOGxPCAJMprCABbC6nm+jPHhVzMU76h/K2p+GTduMKsfN7vCB88C/x97b/IzS5amef3OYKMPn3/THSIiMyIqK7ursquoQkAjIdESEjuWSPSaP4klvWTHmgWbXkILqWl1Q5FkDZmVU8Sdv8Enm87E4j1ufhOqa0+Wv1Lo3vjcP3M3s2PHrj3neX/PCY/tRcmOhWJcG+pHz3S3wAx+9hLHZSWK5BhkHGuNW5UUT1kV73vUdpJ7SGTuIzkptamRlRqiMNpVNxIWFe17+V7TLxeMQG8hrMI5hTKvSBTPhjJ7xEOTWP464VvF4v2ZKlLsA9olyocOgnBoheKi5/EYK4M5yM2mOMo2ymf5rOkKqsdMB1kK31n6Ms73oeKQx4tPLN959BixXV4hcxH9dCBeL9H7QQgt26PMk6d77LKZr8FyL551MyiKnYwTv4T6k+zPtFL4VtR1dYLQZPVWByAKG3haQ/NGvqNbpUwCMud+E85M+GkjKX22F2KF7RJWIDxyvSPbLA55Lu4j2iW0C/P8FRor512ft/931UXBvdSlLnWpS13qUpe61O9VXRTcS/1uBVFI53Sp7H9MyxZCELVUa8z0mTp6Sk3Z9tBUxCtJnzLHnHS0KDCdhxDQkwOrhWeaPbJhVZGMnhVX8+5hTkEDUZ1SWxFrK4lHtcU+Iwod4F9fC+9VZT/v4PP2cmd+YaEpUbsj8X5D/XFgWrZUDwqbeZfjjXQwxyoR64TpNWHj6X6YlaOjwCiVScJ69KLi1h9z/rYVxSUsItFqUdCC5G2D+Ii6V4bqSeGWCd+IAuKzSqMSVM8KlRTFDuyYqJ49xMR4nfmpUTxP0SrCuhJ/rtXE9qxGxdKg1k32RUsaUKxzwowL2OcOv2nRXc5ir0owJ6qDGI2TbVC7I2Y/yhNzVohTXaIPA6mpiKUQCojxdzi0yZz9jSokwqJCuYDfyKqA3Wnh4H7mn4pXi1mBJSX48ED48VewrDCHEX9VUebVgGQ1qsv+7LKAcSJuVjNJIlUluh+Jixrz4Zl4tSCuG/Fl589TLqB8IG4aYmnwty3jteXwZaYDrMid8ZF0M2FsxKiEd5kTuS1E1VGATpSfbO4WzsqkU9SfRI1p3id0gPIQiTZ3Wr+TcVk+O7QL6MNEWFdMm5LuPn9Gq/At1B8T3Z3Grc5qPyBqSspEh5RTvEbptJffFxW3OCbcQtF+iiSlmNYylkqE5hBKjXZJViVK6G+yB7gS1mn/QtLDUpNBs0486gDh9YQyieK3tSjtZRKm7gnE4hVJK0nOcwbjLLZPcyKcBXyrKQ6B/rbg+IW8f7rJ7NWjdN1rB/HFSPIavbWijma1WA8Kt5Lkv1DJqsh0ZWaSw3RlsV2gfifElrAohSNdZa/xrUI9FRinJGmwCbATWguAXnjiaOS6v0qo0WDfn7u9i14xXitUTAy3muKQJNlqZFZQm08et7SEyhALRXHwTBtLKPN3uBP1dPFbhVtBMcgx9NLmgD0qzCjd6MVBcfwqSaJZZmz7GkIlqVfxVVbhajmuAMN9Q7F3TLcN/a1iuI8sfqNnNi0gbOME/grwCq4cbqHAn05mPg+lfB+3SsQ2Ees8N9gEZQSv0ZNmuguYo57HwvDKi0czSoe8WyWK/Xk8T7cB32ghd1QwPJYUx0TSJXYQmbd6DhRHL6rtfkJNnvKDE84wzD521Y/EdY0uC9TuQPjqPl8zeYnFGmF4lwUqRKqt/Hz9C810pRjuE/aDwKHdOgpdA1h8p5jWmbO8l/mJYa/BAAAgAElEQVS1eYjYPs4reckoUkjo7ZG4bklthZo8xYOcrFgVKKUITYH2EV9nFTzvQnGA4V6uIzMq6k+JUCtufyoq9uMfV/hG1NPl20j1cZB+jNyDoLedpJZtO+KqFu75037ukQDwm4bdDyt8C75R+ObcTwDiI+7vwfYw3p7vdeX2NK/LamYMCt/ItWezFxnAX+XEzkExvBTaUNJnNV85xfGriGs1xRGmK/H83v3FyO6Hsh+3/9eB4UVN+TQRaqH/KB/RY+7FyPSZtDyvNv9ddVFwL3WpS13qUpe61KUu9XtVFwX3UufKaWJ8eoLX9/OPw9VCErKMwrx5kM71rIal3QFlLamtRUmbnChkWjHd5aerBMZH4mZBqC0YUWqKx+yrdKL8YTX6MEBTowY3J2yZY+72H/yZfWvO3hs9iQdT5xSsVFjscyfMVIQuEGuLVgrVT6RFia9V5tDKNo5f5/1pA6oz8Gpk0UwzVzRcK+J3LemHAyoolIb4WIqfCCifFKFBOJoqMW3ElzduZPuxFL+vf5E/xyR8K2oRQPPOoEdR5oxL+EoxfVlIStZBPmO8UpipoTj4zCuN4qfNHlv7cU+4yXSLEAmNpfxwnJVud9NSvt1hugl312IPdvZJA+Aj7qbFPg+oE/dYfSYbOk+yBnfdYA8TjEG8s0YT6zyVpIQ9OkhJPJ6VJZUWk1mnc4qZD6RC/p5KO5M4lAuo5QL7eCS2FbEpKJ6HswJhjXh36wL7/lkIDimJ/xvEm60U5uOzrDgMTpQb99l+KkUq7HzsDv94yeOfgL/NHMjWQ1K07YjVkdFZ+m2NOsg+1h800yaJb7CQFC0Vmb2pKPGg1Z/SuePYJ/R4IiBoVEziT+4n3Ms1KftQTx40kLHgsg84ltB/4VE5D36qAtUHgx3ArU782LPqN13J97JHSdzr7izlMVLuMt/3yqJiQk+JfmOZ1opppdj/OHtXD4ZkEqmQ/0wTCINBFZHpPquXOZnIbWSbJ49myL5K3ypUJd/PjLB4m1Ax4RfnRDgVE+OV4fCVwq0TyaRZHR2/9DBqKBJVpppMvSEt/XzdRAtxlSi3huFefL+hUITi9BmJxXcydvSHJ9KXd9B7hj+RL9n94YRpPMFr6nbCO4PfJBjl99ernr4sCN7QbEYOu4b4pJleZcrKQyEs1CtR4qKR461iYryRc3n1C0f3qhJffatwi5JQKD7+x9nfagK61/gWputEqBObnykOf5BXNTR4r9C9ZnztKFYjfdlSPZyoNcJptp0QV8wkqYfFTr5j/7qm3Iq/e/uPEuZVT3y/wC1ltQpArxxKJ9bLnufnBWnSqNHAQs5xsxrodIMqIkMhaq39WMwe3vJmYDqUrO4PTFeW2ka6Q0XK47Vaj4z7CnSSbU8av4CwlH3cvNqz/e6KaSW0kqTF+5nM2Q/qa0v7QY4vqqR6N+Ukw3ycyoJkDKkWdTS1Faob5tfV4AjXLebQy/y4qlEhzuSQUOXeiWehI/SvEnER5nSt4VZj8y0rlOAXQlOpntLMr3YLS7E7paQF/HXFZzMosS1ICh5/0uRVokT1KMo9wLSOxGWQ61xp+hdybb/9z6p5Tij3zEryeFdTNBazyxx554nrFtVP6G6i8JHUVLjbBfY5L+8oxeEHiv4PJuynglBHhhfndD49iWIbKkVYnxATirE4M+FjI570WEfCCvhgZs94shG1Cky1YXl/5PDYwqhndrTyEBYyjtKjmX3c7/5plRMUYf/tAtcqklKYIRBLIzz3E9ToxH8v/v5/wl4U3Etd6lKXutSlLnWpS/1e1UXBvdS5UkKVJSmmOd1K9ZN0Bbsg3cVWlL1T17sqCukSzV6j1Fbo5wPh9TUxP/EVe4cKgViV6OnEvixmn1syGrMfpOs/RHkCPZEZAL+psYdJVNqcZa8mT8rdseb9M2nZoj4bzn7Tok9P7l2QbVsjSUONQUUY7hLjq8+UPUDtLSqBsYHCBMbcgdy0I/tlpK0c3acW3RtRI/IjolsJMxYFaeXxWJIyMwfXrZR03F85Umdp749YE9k5IUWEWuPW4iNMz8L9DJX6HWVQBVFmVEyYwTPc1VRPafYyp7Yi1FYUTaB87InLEnJyU/HUo4aRuKzo7wv0TcHqr55J2RuqJofpjXT++oAaHSoM8++n9QJ8oHjshHQxOVJTihc3K7DFuy3u5RVKZYVwcOjtkXC3zvuQUAehKijnSdsd6mZz9nzDnKYXayspd4czhUEde1HoP2zzQVGox+3MZUYp8eIpJSsNzhPLAhY1+sOTvKepSfsjOqu4oRRFXR1l/ISsOh22BcppMAl7lK7wU1WPopqVWzlnKkqaHUiXcCiFerH5RaC/1rhGk/JXjKXCHgMqFCirGe5L8VzWavbZ+lYUmukqkqqE3Wr0oIlt9qC1nlAbtj+WBKjpOjBt1Jx+lYpI9dGKctxLN7LtP2OvGjAnS+IQGTdG+JR3IlENqhHfZetJTlNWjr6zUETaW1n2cJPFPYsqR5Gg9ZJRv5dzObyOKKcwnUY7hVsoVNAzqcG9LKgfA+PK0H87oQYjXs5K9nF93RGi5vjcMPUFZeNg5agXE0NW04uDZlg7Dj+SlZP0wc5+ZDkQ4BcF4aaCVwvKp5HptmbIC1SvvnjiMFQcdzXGRO5WR45Twf4gCu+L5YHv/BXXq46UFFNtGV8aylX2fC8c/VNF/d5iuxMPFMJGUexkP7d/0GDHxLhWuTteMdwm2h+ICbU/luhDTWjArQPYxPbHZmbxYhLFQ4FbR7ARYxL+1jGm7LdtlXhyexmHbgmhNIxXsg/GwXhTMVwbFt8+Y1Ri/6IhvRopq/P8F4KitIF2OXLV9rx5c8PmWryjV83AsXI871pWX25JSXH8eE3xUsbL1bLnU19QmEAqFHXhGWyBziq/MRFlI8urnsOugSoSgsI28vrd8oj+YeQpXAsNxCTGG7m+xs2ZXgIGMyVCqfHtFeWzw5x452NPakpCW8x0n7Ro5lWqVFjhEFclaXIko3Gbiu7+rEy6hVArxJ/6GRsWoUTUbwt8IxzzpMVbb6aI7uUzipCkv+HY419cYTqHGh0+r6zpweFXFcOt+JDdreAj3FW+GDWYnUFFhV9E9CSeZJ85uaZX2E5WC7p7zfrXETXF8wraZomKkVQX8z0cpYQ8kd8zXRX0X3ruXuz4xDoTM+J8n0mdJl55Um/QR0NsoqxKXWWJN0riXygjOKFhDN+M8/xZXI24p5rqtmdRTRxoMZ2WOZScigekRWCwCZP5urGAIr+nu1MUXWLcaFa/dvjWEJblvHKsJi+9Fcezt/jvqouCe6lLXepSl7rUpS51qd+ruii4l/rdihE2q5nhmJaZpzpOqBPDNCeVAWA0qa1JWnyFqhvFj1sZ7PGcEY7WqDGIH9RHyo/HmaWbrKShJKVIVy2hMhirOSekqaz4lqiQsB934r3JXyF8cYvuJlQ/ijKYEvbTfk5Co7Dgo/guS4tfGGKRu7TX8gS4Wgw8PayorgeGh4apK6Rr/oM81u43ol5120ZSfGrpnD35h8qdcE9DawgLhR515qXmXZiENWmLgL5xfLHZsR1qVJ/TYaKifJaOWciMSwV2nyi6zJJUolxMK0P1pGf/2SkRzi8KklboPoCWrv6k1cwKjLpA9SX96wXTSmOmRPfNmvqdKHIqp3/NXFvn4dihmpzv3hSgSsy7J1g2pFbGQiwM9lm2Ea4XWfEX7nCsC4iLs8qslDAat0eICbVakj4+kL56nc+Vgcnh7pfi803pd9Td1NbiBS5E3T3xbOcktaqAYSSFiKorUt+j6hKSnjtu42YhFJCnHdxcUe0iq1+amVl8/MoQrbA5p1vpBtf+MxasURTHMy0AJV3IJw9bqISAUBzBtYp6G8UP24U8FiLl91viVUusLNWjY7gt8JWie5UTiVbyubGOqIXHIwleMStKsbOkNoo/MCIc2iYK7QNgNEw3ATMY2vdCSZhWZlZx7SB+2FgoFr/a091tmO4960LO01gH0qSJk0HpRP/UUG8GptGyauRYv39swSSwefUiQUoKctIfLpMIJjk2binKk3aZdTnK5x++UmzuD+x2DdbGOTmqHwqqrDDaymNMFJKCTrCUnw8moRtPzN3+9YMi6cRwc0rvU4zXtaTauUS1zqs3L+X3v10/8hfDa0nWyvXlesexEb/iohj58d0nfD7ZpQn07cihl3nhq7sntuuaT/GGEajfG5QRTu2JknD8gahUy++SpB1amF47rms5jsePLRrof+BQtayU6a/Guc3A9QXTSydt6qNhVCU/+eYN4w/k9v23b+5wqwrfaNp3iXGjsB3zXGJGiVsbrxSreuTj04qwDlid+OGNrGr85vGaH9w9M3rLzaLj1WLH4abiKh8HpRJN4Yirjhg1hQ2s/skDRV4GCFHzox984L4+8LZbc98cWJQTRsvrlfE8Ni2buueDjhid+PRxNR/zw1Ty5/dv+KvC8/7fvUQlUfrSXhHzHGp7IYn4WtHfKapnhVto2vd59Sjzs2NlcOsSuyjEt5mvaxUjuChzY1NijhPdV+286hFLpOt/UCx/KSsxdj3hh7ya2RmGF17m8bcG2wtJ5fi6pOjkPbaLoBqKGIXnHCOptJ+tXJaE2lDuYPjJgAamu8989wtPKEU5TYvAlNXSU2knXu1yL/OTWxmqpzQTIlJlMB+OMDnCq2vcuqJ6u0Mdetyrq7yfivJmICbhaScr1KB0Wv2xCbyiuO+ZDqUwkIvz6k9KoA6WtMiMdh0xZSQamQCb2uHqAu8N/VSgTCK9HBmPMo+rUYONMGlJKdTCFrbdmcSQKogOSIr+RYntI9OmoHw+TbKR0BTE4nOH8/+3LgrupS51qUtd6lKXutSlfq/qouBe6lwKUe6skU53kG73Re42bSvxxoZIzOqo0lr8mKMTVVcpwqJEj2H2gqoxEBaldMbX5aw6zk+1ucs/toV0lx+mvI3srfJpTl4xvTzB+et27uDVPs6d+GqYSHVmpJ68nKUWHmdVEhYlvsm8w7uEymrR/eKID4YXqwPfq0SMihgVbnHy8eYkmYUn2SjpVIM5BR6hJ+Ge2oNsr/kgiu7pdRWEE1rayI9ffATgl8938/tNLx3z402i2EoyU2hE9aqfZSOu1agkXFUzeOlIX1WzBw2ke9t0E+o4EDcLYmkoTt2zPhJXNSp7aqNRKJ8Iy6x0a43O6VZMnrBZUXTDTKPQ+4FUWMLdFXrfiYJ6UlJPTNvSkowS9VVzTpM7ldUzz1g9bknrjZAP3IlzW6KOPdqv0M8H+Wylzkl3hUEPUSgSKUnKXoxzqp18hgU/5JWGnK5nFOpzn29ZkPZCc6i2AZTi+CqrdFvwDYQ2YbeG0Ea0U3PHeRxEaddeVFw9wnid0KekMwPFXlT74VqzfBvEp+eyF/pjN6f96Zj3BVGQ4ktR9Yoi4EyJPhjMjSfoJFzVMSv+gxFCw9KTvEVPighzIlIsREWMlawG2GNCa5hW8vu2T9SPnv6+ZHi9xC8U9c0wK25p0sJDVaBrLx69oEjhM3pJ44mdxdSioCqVZiUVEM/wIuCuwUyGYp+JD3lIFF2kvzH0XwVeLTp2uwZ3KLGL/IYs51zf7UlJMTpL0TqMjlQLmZ+GrqFqHNNYEIMo4OVWuLIgKyt6hNgCneL4Sjyc2OzvT6KKv3r9xD//4b/hT+vfcm+ObDLst1VyXI8xsU+W3/oN/7b7hn/57o8AuK2PNNZx/cc9UzSEH2t6VzB6w3Ev0qPSQn9wV4UkU0UoFo6PD6Jg2q0lLCN3X2x5udzzB8tPNMbxOIlv82lq+MXjHVfNwOAtu67GBcMfX70DoHcF3w+3hFqx/UOZA9wVc4qU7RXJiPK37RrcsWB5f+S//fH/xp81vwZg8e3ElR5Z6Egepjx/WfIQ5Dt0qeIYK55Dy9a31NrxVfnArTnIcdAdrfYUeVJ2KFw6j4WQ/99lk/n3/pp/+9XX/PXhhYzXpPi2/cTDuKD5T75j9Jb9UHF4URPymEqdRQ8aPSiqZ1kdc63GTHJdh8pASvjWyGqi19AU83znF5b618/n/gwf6W/0TDA48Z0T0N8pUh3R5qxc6l4TFgHTa6a1cJ5PKv05ES7l+40mWo19GvH3a0nrBA4/bNE+SXpXGfCTodieucppmUg5DU6XIad3qpk3bEa5p/g2E4BSwi0LioNcM6Gx6GWN8iX6OFIgq1oKZvKQdokQNE+PS8ykMAfFdCWMZRAPt91apiTqsaoDyWlUTnQzz5bYiMorv6CJvSHl/oDDoUbbSFEEdp8WspLTJvQyk0eMpVg43LYiVQFzKCQ9tD0r1faoKIDFO8d4bSnfTgz31dxXE1c12oWLgnupS13qUpe61KUudal/WHVRcC/1WSniZildpyf/qxaCQmoq9K6DcZIEmPeP8vqiRcWI6gZSPxB/+FJUxOMwe0NjaYTV6jxaa1JOcineSyd8skswCj0F9K4TlmOMkjYDqKZEvfkE99fi19Va8smz8qvefiK9viOVFrU7omLEfXlzTlJrCsq/fU+aHLp6QdLgVhCuHeSO7+/qDc4ZtkVNCJoYNHUzEZfnJ0TzyxoXLSpKLnfz/pzUEyrpnvZXCeUUfgH1h7NPMxYQm4DWkZgUv3y4IewL1KnRNYpP0fSZnFBnJu6QZpWg/eAxY8A3Jj/N5vS1MqvAYxC/mZJkJb0fYFl+lvJlxYs4RpqHQPUoHcgzRzJG8AHzIKqt7hzxZjUnodm9EAxiY0G3mKcjdANpfUfK70lGYXcDYVVhjpmEEYVJCcK5TW0h42W1ICmFMka+G6COw8yQTPuDJJ75MKvxyjmhI5B5siFCU88pRvF6RVJg3zySxglVV8TCoI4DKifbmZ0GnRVd5yl2E/bgcG07n0vtFOko6V5h0uK7yyp+qCWtqjgK3zZpBdmDDdJJr52cU9slolFEqxhfnZSmFaHWtN93xNLw/OOa6UoShU6817EvUL3GjIrwrhUF5yxwgE3ogyYZQ/1BE0vx56nTe04BVGNmxRYwVmf1lATT2uJaBYgnfbPseHhe5uOsSa2nqD1+tCw3HeNYiCduOpFFJgadZrhEnAyqM6hNXv25DSwWE/2xZBor/EJhOjWzRA9fGewxYa5HRm+FvtCE2WeslKIpRbENUdOPBcZG9ruGxSqvSpgkynFQcux+5Ogea/EFA7612E5R7hTTSlR1t1IzT/j/fPMF46Hi65snbsyBW91REE+2TQYSLs1BVWx0x4tiN5+Gp6HFJ8111dH5khA1UzCkZFGnc6AjIVn8MpC0ltS1982cxlY+KaakuV8c+PPNd7wsdlTa8W1OW/vkV3zdPtIYx6+6W/5ies3P397zlBO+nDdUbwr8IhIrIX5Ew8zoBuG6Vs+Jw1+vKQPUrzx3dsdGy8lolWehoyjWSoZPqyZuchTjSZF9jhVDKjAkauVo84BaKE+rwCjFlBJGsu3yn1JBKULyODQbc+QP6/cccut+Hwo+TCuWxYhPGk3iqAu0iYTs5aaMRECPhlDJipc9ChEEMhs1icc9aUUs9cxaBrBH6byfy0fMCO37rDq3iuatlgS/L4V/Ox1L4f0i7FY9aTmPyqCCktTKYyLlk10cAqGWpM5kNeFuhXYBdZAehfHqRu4FRjFtK3RnICdUAgy2wg6yOuRDRXGQ+0Eczvch24mvWgVZiVGZFAIQKk0RZYXT7IfMgC+gNMS8UuQWhvCsaL63ed6QFaeTF9n2kiBYv7MMLz3pIP0uaSaTKPBgBkNoogyWBHp3+uekRXkYFgXKySpQ6i20IpHrvcFPNXrM/molPHGirJzJRyTKHRxfFSQDw32FGc6rXX5RUL4/oqqLgnupS13qUpe61KUudal/QHVRcC/1OxVLQ7iuKZ5y96zzqIdn8TWeyAk+CP8WxAfZDUJS2Cwxn3aSFGXNrOwopUiVQTlPWIlKZj/tia141PS2EyZqZp36+zWmm2bfpd4eSW5CdQPxxFN1YfZtKmtxq4ri4Sj+W62F3ZpOySqK8PoGNXhCWxKtdGyqo6V6lH06mgb7UPBxldUfBZOJxKweJK/QWlJezCg529Eys1G1F3+U+aDmJ9r6KdLnYzZthBUaguYv37yUX4unTHNo3wlTt3oSf5V24p00E+IRBcwQsNuRpGqSNpghyhP7KcUmJEJrxWe8qii/f0I78U7LlwSUwu4nTK8xh5FUGFSXCQQnj6rz6H2W2cYJruWYx0VDuKrxjcFoBSzQhxE9ecJCPsOMWW3VSogV6xq7G2Ymo35+xhhFXLakpkA/H0lNNavEOvtl9eBJX74Q9dVH9KfMvS0KSEloCUEUZz4bO0mB2faiYhtNWrXi/z70JP+ZV7nOxyQEQmOx+2n2S5sBmofA8aXBDDB9DsAF7EFRPyTMJIrDdAXrX0XxdgLuoBluFLEQpcUtFKDzn+JBMy7x9JMlm7/pSSo3yN9GYpc7jR9KioOMMT0olDOSKpY7qlOZ/ywSbind+dWTIpwuy/w+HWQ1wDei3lbPmdJQKqalxvaJaSHq8b6vMTb7vasAvcFNwgF2zmJMxK4GfCYOnGgKSiVJrfKKVEbIr5ftREqwWA0cFYSPp3F4Sq0TJStMhsdDy+LVkT+6f8+rWviwt+UBTaLWDq0itfJ0sUSryNbLPPL+izU/391xdfeJ3hdEFN2q4DDIZ3VNha0chw8L6UZPCnPUFFkxm0ILK89fv33Bfz/+M9rC0bmCmM7nfHSWlBRaR0obqEzgzYN0pLunSuYLr7A7I77zSXzYZZc9sEcRr7ST9Ltxo2QeySJ0uU+oqPir717y3fZKKAUmYLP/c/SGRTVRW89hrNjvGopfVzzk46md4uo3idBokpLtxUJoHgDth0i59YRGUz9pXKP49GrNv7D/DJ0lf6MjlfGUJuCjpjYOq+P8uo8arRI+ajpfolWi92dPe6kDJq9OdU788DEpbPZ0K5XmbY3eMgVDiIrJy7xw3NVwkO2ZQ1ZeHeig2LzL19VKUe5k1cqMCTsk2g+TpDUCqh9xL68onmVVKrYl5qmb5z89THn1T+4TcdmweDsx3mT1sxC2dTJyzUWrMuc17+OjZrqOUEbSqHFX4nEHNR9rO1iKYwSjUF4Ux6QU8atbAIpjonuhcUtQvWH5S03/Ks2ecdsrymfFtBaFNlq5F5w8un4BtjulvCUOlWHzi3DuqygV08sFegyEZSUM3MPIeN/O81u0oCfhehdHmNYJMyg48WkNhCYSyyQs7M6QbidSL2ppcZD5Ai2UkqQiyQglCJDVJp1QvQGTMAdRxVMmBiUr85hCYfcGFHJP7c4rTMnIKutwl33HwVD0GpdXVG0XGb9YUr0/8vfVRcG91KUudalLXepSl7rU71VdFNxLzaWUEr/sFOf0F1ISj+PJB2kMFHb2wigfSEUmGPRCMFCjQ/XjWZWbPBGIVwvCssTuR+KyQR9EJUxVCU/bmXeqc850WmZDTj+hVivSogGl8OuaVGiUy0zPys4cQPd6I0+z8TN24BTRvSM2Bf3LksOXiuE+oiKU2U437a2w+I7iNwplwk0anVNWbKfkiRZR8IpD9tVmEaP+mIglNA8R20f6G0vRR7rsE3arhKk9brRokwiTQY8alQ/zcCeKX/s2oZ0wLMtdIhqhSJz3xaNSotw67H4ktAWhsfN+JqVIpcI+j4TrBfbTYU6Ei6uGZLUk+uwH9K6ThLGc561CIC4rtNHEVmgXSiv0Pj8lay3Egxdr8buNgen1GtM5Uu7QTQHM9oh+2BGvlpDTc9Qoj+apriQdL0TpelYKNbmZmKG3R1JZCO3BR/kO/TR7cEmJNExQV/jb5XxcTnQN83REDVn9ryvUscdoLXSNrOCq1ZJkDWmzJFmNbw1Q4k5JY4WivxdKgfKieh6/ks5eEOatWyqCl+Qjt0r4WmU/qyh0dhBFfbgRf6xbKuGRAtNaUW0T/Z1CBUm52r2A+ts93S4zhxcBtStw1158bEm8eCeygPIQ2ghFJJZGErsacKt8TRwlOcgtRR0yXU5Jy+ppsUsM98JNjYWsQBzfL6CW1YL/6B//ilf1ni+qZwrtqZWn0g5DJGRd5BBq9qHmECrGaPk0Lvk4LFlYWVHofcHH4wJrIvevHvjb8QXqscC9OtFRFMVWo54Lhr3lJ3/6G/7L25/xRfE0n9eIZpV9ooUKlAQKFWZv53NseLe5olCBnw1f8L98+hF/tPnAXz1Ld/7d8shV2fM3+p5lPaJVYtfXc0JhXYjnNyXFm7fX2PclYRFFiQbpIB/NzNq1H0vh/eaO8nqvMcMpZVA816dEwkIAAyzfiMJWvx+ZNiWLd4rxSlM/yrH2rSZaAz9t6G5r6gfFcZNmxrYZFM9KzlEyUBYJ5RVVTs6zHRR9pH6O2C6SrCTlzQmFPmHGICrYPhBfFzS/Knm7fXGWuM52XfSkxFuZgGxxTCph94ZkksyDSdS2E9PYLyLKy8qWuw6ohYfdZ9SSIL5SokIHGcban32fVS9+WhVlbLv1qScBysOJYwvtJzkvzdsBvyrQQ5C+DCBeL4SW4qOsTIVEuG5nvjU+wG4ru7peENsC4yLj6sRMTvhW5etekVYe+2jxGzkR1a8sbq0k7TLIMdNO4ZZnBvawEb74cN8wXRnMmKieHNPm1GwhymT3tcPsLONN/tk603qcns+7duKF1YPGZQ6t8sLY7n7gaX9jsT18+tOK6unkOQd7ZWg/eEKtMVMk3pckLWMCYLxSlI/ihXcrSW5zV+cxf6pk5Dwnm+CpROVVoVCJ79etA6iE3VpimWbSA0nUae0Bp8Rrnpg958mk2U8dGyie5L22O/vGzSgq8fDKU7+19C8Vo5OVMwAdEscbi1us4d/x762LgnupS13qUpe61KUudanfq7oouJeaK6UkNIPDhNpL12daNqSUUJMDsnrb9aiyPP1SVnSVME+HkTRNqPUK/SQSRrwSSkIyFnvyR6Ykym0uVVWkGKEk/FEAACAASURBVEl1ifm4JS7bufuflEirVjy30WL6CD24jahd7rbF9JJig0aeVp/H2VtabI+kqkRNCtdq/EKeQE2n6F+e1VEVpcs6KVFsUZpid85B106enpOB8VpUm1Lsgrilotwn6apXiuFWMa0t3ev8xHk3crXq6IYKN1nSqFl+J94kOUiiVoRSvLdJSc55UlBus8fWR/y6Ro8hq0iZaziE/KdHFZpYWcy7B+L9hlTZmR4Q2hLtRTnVnSFcr4ilOVMUlHwZtT2gQ4u/WWBdgGNW2tdL1OQwO6EjpMJSPPWofsKkk1GtIK4a1PtHUmUoPh6IywrzIAcqVaUoyimhu0n82mUxpw2dmLfKR9SbT6hFI+llmcucDgdQoiSbvkR/2pJW7TnJrCzm5Ly4bEUdftqRxhG1zBKt86hhxL9Ys/+6IVSKpK147hByRfeF+N7KR81UKKJJs1o/rYRZK+8Vxat/cSZmjNeiZpheY48w/HBCHQ2rv80sZQP2vfgxh1sl18O3B765eeRnx1fyHjTjbUAvHfFYiB+vCnxuB1Y2wmBEPaki/jahhuwtLYOk6WlY/GjL5CzDx4ZkcupdI97CPqcoVY/CDx3vZdt/fvUd/8Xy/2alJwyJQklHfEDN6unn6kiXDM+x4p3fYLLZ78Ev+Z8+/BnXVUdIiv2rik92xd1tnhcSPD2sSE6YuzEp/s3+G/5V/JFs05d0vqTUAZ80m7JnYeU8f1GJJ7tQgTFZNImf7l+TkuIvn1/w8USDUPDOrpjGgr4TL2Y4WklTApxJ6Px3rUSJ01tNaOS4hKTQg0btDCp7FNu3Crc6KeHic9aTJA7aIQGaykP9LNeV7SPKJ+xzR6zEc1h9CvOqw/GbJavvPdtvLPWDEFrqDwq3zuc5QPsxiX/zpRZm9uqsGi6/D6KA9ZFi7xjuSpJm9uZPmwodovBXj45yZSh2GtCz9zMhKwJm0KKcDprpNmK3WR0tskJ9zOSYk2c8r2rVH82sxtbvLdNaC4kkK92k7C29jhAVxss1VD2euc0A9ij9D/pJVjyKg6TdAdhBrrHq0WMfj+J59mFelUi2RE9B5r/nDgpLbIp57mEYZSWoqYlW072umBaa45dZhV7mvo0kzHJ9NPjrs2//+I1HTZJSaTvF+IUjfuGp/rqZ5/HxWjFeW9r3SUg4rSJU5ZzQ5RtZPSw3I/pNwXgviYWniymtPFOpJQ1QJerlxLCrMI8ncz0MLyLFZqTTifpNQbGH4xcn5rFQI8ZrQ3evKXeiKMdCMa7lQ05Jd4evA1w5muXIop64rvt5X33ShKhxmRPfjeXsS5+cZdmMVNbL9aESRqX5vT4YSit+bpAUO2D2ZveuoBsLuqcG03q8l+tyumY+Dg7xQbcvjozrEve2ymMvjxdr6F4pfHOhKFzqUpe61KUudalLXeofUF0U3Ev9TiWtQGvCnXQJm+eD+BlDIO0GVNui2obU5ae99VL8tN0oT9NVibJWEtCq7KndHkiuRk2OuGjQ270osrnCuiYV13PSiukGwrrCdCdqQ0Qfe+mUz3xevR8o9MnfWmJ2gzytHybxBys1vze1NUyOVDX4RtQCFTJfMD9Zm0GJj64XtdaMknl+quIAzcfIpz8TL66elOTK5wdr5WHx1jFdWQ7fiDLrKggvREUpbKApPPtjTfQKQu7OPXW2lqJONJ8SoZFtj2tN+zFw+IF4ketHj+k9uvfExgoRotRU32WecGXRoxM2rBG+YqqMkCXIPtWUQJXit60LtIvorXhs47oVtm5Vog4delnJ+zNxIFVW+LEhzL5e9dyD0XKMgWS1+GbrKn+++KPD7SqPhU7Oo/PEZY2apOM5ZZ5hKJboYRIe8vUa9kdUiKT1Ih9nP1My9PYo6ufoZgJEqgswGvW8l3FXFSTvRb09+Xh9IO72HL/6lsNX0k0c8zkHUdBVSIRNxAU1d8XPaUXx1Aks59+vIs6pswqjAAV+Lb7HdtNT3Xu2NzLm07YklJJHP97I++vKCQXAZUUiKH7yp7/hdSMm8bXtqbSn0Dn9L2l2vubDuOJX2xtc0GyaYX5dq8TfvH1BAl6vd7ho+NunGn+fZb+PBcmK59D2immTx3Lex192d1zbr9n6FpcMdZYLt75hmQ/Uygy0emRjOp5Di1GRIRbsY15ZiRafNH/zfM9xLHHBkAbDoc+KzWgpvi9RQY7vz37+JT8LX6GzCh3XHnUw2E7jbr0wb21E6YQ+0STe1IRVQFWRly+f+fCwJj1UVA96PlflM7CS/Sv34ps+sU9DKd53lPCPQ+aChqzWlp8M9SfFtIaxiDTvfze5Soezxzbm+at+joRK0byVOdJtKordiOpHyg+KsK4x+1ESA4H210fGFw1mEJrCiW9qhuzpvpbPC6WMRdvL66cKlaLcebQXZvniV3vpU8h9EvW7I/ppT1o0JK1Z/NwRiiviB8X2R1nVuxMPrZ5kHiqOCt9oSWREjt10lYhWOu+LoyjVbpm794uE7SQR60SZQQt5AHJX/FWaVznKZ4VbMTORQ5S/b34uVINoEtEqql2YexDk3gTlQ0dcN6jeQWEJaxlP9rknaY0Kkm6ougFzum8gRKDU1pKCNXhUEM9qOCVoRYhfDsTRUK4mYtAQFXUzzcfamogPmqEv0cCruy1vo6L+uYz50/aSVUQj4823Cl/nDSgZPDFo/CZy9YMtr9c77mpZ1bgvD9wWR1ZmoNKOQ6j5zXjD//H4JQDvnte09UhTeKa14WO4ZrxV1O9l3hjupW8DrYhlzPc1RbFXmP48ZmMBP/kPfsN/fvtz/rj+nld2O+9jSWRI8k/DgCKgGWKBy4bskDS35kCRzcIxabSKDClTMIgUKhCTnt8DzNscUsFHv+a37gYXLV0sGaOlDyXvBrlP/GZ3TW09RkemhWHbNHT7CvVLOZCHryOpTMS3f/8/YS8K7qUudalLXepSl7rUpX6v6qLgXupcSfyjSYHODNpkDSgrZALnSH2PahpYZUWtG+QpSalZZSNGUmkl+QxIXQdVAUoR2wI1ltLlesqVLgzGR9Tg0YdOmLr6M7OhRlipTYF5/yxP5NacOblTyF6sSnydMeKvW/RnOdU6JdyqFAWkU0SbKPaKUMvT++I7xXQlOdzGiadOJen4BaieEq5VVE9CUKieJc1qVnCDHAPXCksxWnDLRLWQp/+XV3vePcvTqTIJNUo2/EnBIEpX8bQWRcktJF1mXGvseOo+lQ7h2BboKaCOE0lrUiMKrXKSlGSehC2rUhKh6USneD7gX1zN7zWjF4ZxJ+eJZUsqDLFcYN72koSWzycgqmpVijqwrM+KaE5Ag8yaTIk0jJhPW6av71AhoQdRfNNCUseU88I/nhzpcIRvv5Df72UlQDiVNWZ3EAV6yjnm9xvxdheWVFjUMJKGEXUaj4+DfL4XFYes3qAUcSVKuHYetVriGsVwnyi20jk93MpxLvZq9hP6daD5rcWtIraXMdl9ESUrftCgE6mMpOJ3WbkkUE4RqoR7bPnqD97yxVrU2J92X9F9E0m/LdAThAZ271bs0oryMY/ZPzzy55vv+Ef1WxZ6YqV76kwxAGY/7HNseXezoVCejemIWbP43l3zP6s/warIp34hDfELz3ol5/qJFeahIFaJqUzENoovNXdC/6+/+ZZ/pb5lOJTgNWbhCLsSu55QmWfqnivMlZjWUwRTBGIwskIBlI0jRoX5mfhhY5Fojwr3JAe33ipJg5sS05WieVfglkKlADCZCWw7RcJKN3dmBvv2dByg+U1BaBOfPrwgVYn2rZ49j7aH4iBpgKcEKJKamcTaZTUygm4UqhYlP9RnBVh7qB/BLxTFPjHeKMoseC3fhMwTjlRPnmlj8bWi3Me5u98eHLp3cu2ECDGhHrfwlRieU6Up9o72kyEUSuYAKzxbEAWw2kbcQqO9+PyL7pxwWD9JwiGAOYwwOUxhZg44Xq7PVNqZZqKDKLUnJXvaQLHThCahoqi0JidqgRwj3yiMk7nOtfLaia3qW6HImE4Rq0T1LP0Kp/lVeUX5JKqtmSTdis/mz8WbyLRWuJVF+wRIGqNv9JlDuw+oKTHdtqiU0FYTazMzYN39guLDAX/donzEvn2Se0n2ncf7TV7hShAjtguAZvOnnwD4D++/47Y4cpfxF5V2bEw3UzxqJcfujb/mp92X/PJ4y8FVvIkK/xNZBXPHAjUa/FL2eXglnl57OKVNKuxR4T7WpGXgv/r6p/zTxS94YcQnvNITOu9wnQ/ufmn51+03chC+kmS7f/38NX/58SV65VAmMaS8gtYGiuVECJqqdnivce9b3FVETSefrvz5x+t3/KftL3hpDqx0JN9m2MeCgGJIBbVybPTAwhz4fIqbk/6SJiqFJrHRZ6XbJVkZG5Ih5MaBk5q7UI5VOcy0FIOovy5ZPni5R/5i/ZK/ObzA6sDz1HJVDXynr9i/zFSehYejZdr8Lvnh/10XBfdSl7rUpS51qUtd6lK/V3VRcC91LqWwhwk9+pkrG69aUVqtQcUo3tsYhTV6+rX9EapS1N+6QD93oNuzgnCzEQqDD+jOkZqs4ObPKB47YmnFg5uSMFhjwl3LU6ndT6QSSEm8weZ31bLyVx9F5UsJ1Y+kpiJpNVMG3LqkjJFiP9E8GUItHZjTlfjGQNQM24tyerINlVvx5YEoDUkLp7baR1yrZyXlVL7RTKuchqPAX3viIJfYR71g0Yz0Y4kfLcaJCpQDmYil+HyjFfWmOED7EBiuDPqYE402Ft9oyp0nlhr7HNH7jnAjCplxARWy+rmoUL0jLOVYA6jSYo7iUY6LrHYbLYo84K4byt98Im6WpKuVvHbsJc0MSCGitCaeVBGrUT0yNp5zp/KJjHF3DYcOMwZiaYht9shqRfFhP/t8CRFe3s3EjbRsUJy9vPLD9JlaLJ5qNYykVYNytXjrTlxma0iHI6ooSM4RF6ICA7/Ldl4v8Y0wHWsnDM5pIy9rj6guzwZ/4+l/4NFLR5+7fWMTKa8HpmNJuZi4WfYUJlDk1Ka75kDnS6yO/PZ5w/PbNW/3KzaNKOLFesQ914Q6oZ0ilIn1X1l8Az57P6dDyc+P93w/bCi1Z2lGKu1xuV37yvYscxSWIXFlPB/9mk6ilXjv1nzZPvM0tXz/3Y1IGV6xIw84pyi3Cu2VkDucrGaoTrbvdgtIUB4UsYIwaMygiV2NOnmR64T6bUOyCQUEA3av0FmV08capWQsJwMqKsrndKZ/OLlOmodIeVB0LzUqnjvrQy3vUUHUxFDJdywOiiHTH1CJopNt6yDJaOU2EbJ/3jdQPUO5TZJQ5aDosk9Rfp3iGIlWtl8cRMFcfC+vjzeKaSUrNvWDqJLNe1Ez4cRzTZS7gO0DsdI07xwYhTnm6yarqamy6H2PORq5TvxpBUq4re3bkVAb7GgZNnqmMGivsUNCxciErOhUTw57OFNmQlNgjk6up7IQ3nW+JnQECps55QWpMBSHgDGK/jb7JgeZ71zuTQgVlLs0e32Pr4SuQJLXtAM9nhXc6kHjrpKwTEs5Lkkn6o8nTq68t9irmYZQ7JjpA7FQNA8pq6rgF2Zm+YYye7KNotpNQqKIMkckrbA78YSHtiRVhcxxVoMX0ovOFJi4avI9YoLnHeVVQyhLfnL9EYD/+uZ/58fFE/fGUqkCjcIoTRflOD/Gicdo2ceGFxmg/rPdK9r1wBcb+f/eFWwzZ9mPFl0G4mTgKPPidC18Ydsr4p2nUIF3fsNjkDm8UIF9qNmGlqUZWOmeW3tgY/IqG+Jh3U4NhQkMJhEOxcyvVp3BhQq7NQwvNWnS2FGWPU7KLRHcOvJ9v+Ff6n/Cztdoldg7ud8+TQ2Pg6yI1dZxVx/ZFD1re6YsdFGoCu+GNVolrIpUWe6PKPauEub0VBOy8fpEV9lUPY1xWBUpdGBjOwIaQ2TMS2fPrmUIlm2/4v1uxTAUhH1x7svpSsonLeSLv6cuCu6lLnWpS13qUpe61KV+r+qi4P7/vJRSPwD+B+Al4lb6Fyml/04pdQP8j8A3wK+A/yal9PTv245Uwn7cEdctYSVKkNkNxKogLi0mJZRSoqCd0q2sDKFUlajtHliRhhGWLXGZ1SKjhLQwOvT+SLxaiJc2Uxb8/Rr7eASjQWv0rkeVBTp3GftlSfHYiY+yssTSkAqN3WcFoyxQxx5VV9KZ7yPlm/P2lY+icAwePSWSlhQeX0LInMZQi/fVTKJQnBJj3PrMZz110+ug0S5Jp/SDPDlPa8PiuyPD9ZJprUglkvCykKfa/s0S/6LHmEgaDLFI9C/PST4KUXOLvWTUV0+J/trgW8UpTqj95DF9RLlIrIwkwe2P2FNK2KqFmIirCvPhGf/6GnOY0IfMiG3KrOwoVB9FVcqJXgCxkKQ6vT2C84QX11AV6MxEVqMjXC8wn3bE69X/w96b9Mq2relZzyhnEdVaa6+19z7VPbdwWkliQNBCQkJI/AB69BANJHfoINEw4hfQ8g+wRIMGHSSQANG0RAPZsoRkLGQneZ15M8+95+5ylVHNahQ0vhEzzk0y06JjK4/ik7bOPjtizZjFmCPWfMf7PS9MUbZhLFyJd4qUUfsjWSmoK8z9FrVuZz8iReVHKdLdBv1yJK4qzHRWV7PVxEWFe/9EXi1QuwP5lcir6sRkVko8wdaAE0VXxpoRFWvhhOQAQlFISf4dyH1PvloVf6aiv804r9BlE92bRFwmSFBd9bT1SOMn6pLAtfEdG9+zcR2v3Y4pG/7B/c/5evEspwCF15HKBMJa0w2OlDSPhzM5hCzJaNMyg4b+JmM7NSdDVb91/KPwC8gKNZY0IJ3hxBUN5XzahGkDdT0xDpbpeE6PMnXE+QCTxj0ZqmdFLPdE9KJUmo45HSg0+nc85emkxB2BJ0P1lOlvNbaISf1dxj+LmTLW4resH+Tvct+VNKox43eZ4UrjD5mwPa/A6DHjDgl3hFBJl/xwdSYV1I9Zkq1WCjNA6hTN54wuPt9slKSHLRR2K/fQtFLzPtbHjN8nmg8Dh69rqueIHs++vVjrOfFpXIt6Oa5kmyDKL1kYrNVzYvF+pHvty30pBIPF+xG7nwo/W2O3/ZwECKIsmscXSYp8fY069KAU5lFUv+wsqvLEhcPuRuxhgtzM9IBqGyHB4mNHtZIkNf9pT1yWOfrxgAo1eteRljVhXWO6Se5BQO87uU/Kilq2mmQVOmTqp7JSV8m1d4dy3Y1QG07UA7TMjaKwZ6alJE3V92fKQvWdMKRB5tLF97B8J/fM9luZ6FKhVEiPg5w/gFDDq//zmem2JbSGxW+O6ONIWNdChQHMrkc9bcnLllxXYBRxWaELTzguPaRChYmJfL1GDSN5K6tLyjty44XcohT2t4+gl9wXtfJX42sSmnexY1X8pMfk+BBvAPjHx2/5k8Md9/2Cx64lJsVu3xCj5rtR3qN0ZuotbB16UOjJYyf5PgGZ31WUVaLdleN/+/5fZ3uoSQXnM20r3GokZUUaDdpHmmZkXVZ/vIks3MjH3ZLd/QLVGdr3ZibATAuwnaShjX2FHoXkkDUs35VrbeHlZ5p/+E9+j3/o/oas2pgsHneAU6LZpNBt4I9GI8zt/XluyXVEuQTPTnpkXMLsC2XhKkBSqE6jgiJdT9CbmeHt7w3JQ7gbZR6bFPiE3p1/Hc0uo65G0mCov/PUo3wnnnznAIuPiWHz53of/lxdFNy//hWA/zLn/AfAvwv850qpPwD+K+Dv55x/D/j75f8vdalLXepSl7rUpX70dVFw/5pXzvk98L78faeU+kPgK+A/Av6D8rb/Dvjfgb/zV29NEW+WxKVnakunc0iEpZf0LKUIX96IB28lMo36f/4M/fpWOtatdLbrpp5TyQDM/YukmRUFQT9shclYuvtjY7HWoLpBOLkleevERs1a6AHq0BF+esu4KvxWX16/XWF2jtw48QcPhdFatqOV/Pz4qimKjahNsRF1CIQ1mW3x/2VRJI4/neYzYwZHWolnNxtFrEtH9ea0j+L1JYvSYTs4LjPplMe+iCiVsVbSqFKVCEqj+x96+eTpWmtJuEoO6vs8+w1jbWnuE+2HiHvsRU21BvbFGzVOswc2e4d5OoJWwq8FwqZCdwH3eY86dMS7K/KmmYkV/uEHft7PL6iTh/pFPKwsW0kI2yxEET+OqG4QEoI/RREZ8tUKvT+C1uTak7WeVWSM/h1PrQpRVgnKeNL7gXjdokMiV14IETnPPmC0himQN0vx/TaVqDpFQVa9+IqFA5yE6nBzBbsD+bq858WQnaHaJUiasIpUf+PAdSvn8a458GX7ws+az9yYA6sCkOyLvHliwnoVeY4tf9h9yVPfUFkZTO93a3bHCu8D42iZBsuwN1D4rv7BUBevozsIlWNawPUv4+zt3H+t0ZMjtGef+HiVsIeiyg3y89M6k7Uj7VtYZKqTNT6rWb3xJXlKfq68vlSsfp3w+8TUCLd5+S7QXxcPbqvoW1FNT77UaSnq6VhSvKonRfWQCQtZdchGFL8f8jazKulKEdqPMoe8/QeiXPZvGpIV3qrbRxafJKFPj2Uflgrb5VnxGjfir8367P10u4ztwG/FC7t4J/t3sgs2DxG/k875+n7CDAm77WdfevV9x/RaxsXxtsEfxJ9rC0Bk2CgWHxLHNxq/ldUgHTKLj4Va0GdRWkPCPu3QV0uyM0INCWcGaHr7StIe+wDeEZYV5kQpUYq48OIrNZr+dYVKefY625eJ4ZVnvCmrU9sR9bTFKCGipI3cl7mp0Pserht0H2YKiipEkXizJjUW9+GFsHiFHhOukzG5eC/Haga59vm0slTmx+oxo6fSK3BMgMbvYFyWzvyiyIaFon7MRC++2uGqsMqXMpe1nxLDWpMtLH8bmNpTUppiumvRfURbhT4MqH5ErSp0XxTaVY2JCRWiePT3Haqy88oMCfTTXnz8x15U3EWNjhIJlxqP/vBA+OkbbKG16AH+6JfCmP27H28Jk8FVgbGshLg6YF3xBQfN1Dn0k8NvNdEL1UNZqAov2O0zw7UQJm7+mbCJu1dniHY2Svo6EjS/tRy+u0M5oNAmfIb8YKm24lUerjODa3g4NQggvvFYZ3yUFMXNnyQOXxSf6yh+6sXHxPgs+6RSScU8zfPbxPo7cEfL1MJ4k2ASrz2Ae3Ci5u8UsbaEhSQl+qfi6Z4kyU9FOc7kIGzOUHnzZMVzHxTNR0U/evyLOqd2KpkjwsrgngyxyfiPllhnTPEJJ5+x7xoZdzETnaK5T4yrkx9bEtuq57/ag3v5BfdHVEqpnwL/NvCPgDfll1+AD4iF4S/6mb8N/G2AmvYvesulLnWpS13qUpe61F+ruvyC+yMppdQS+B+B/yLnvFXq7E3JOWd1glf+uco5/z3g7wFs3F0e7oQheCy8udgsiF6xeD8QlxVx4dBjIjl5wvV1dU6xqjy5MsRmg5qieERBVDxnUFMg3q5RQ0QNI7oog3bphX5QezBKnjSdJZSEGrcdyN4S19dkJSlTOmSyKyk5TqMmj34+CGvSaBgSeSF0AMaJ6fWS5DShklSZ5CG1EYrvKL9YVJKEs/EmY/cKVSX0vRzntMhkIz5JMwgL0+34HQ9QNo7hRpS1/jajBwVfiKTWNCN95wk6Y5YTOSpyUoSjnGdzFC9SNuJtVFnSfrrXalaTpqVCJU3WFe7gsMco/rRWVFu968Vf2wemN2tMH8Tr91mgnS4kprsF5kk6rfWhJzTLmSMJwscNVzXZXgtlAeBGlKK4rFBjEFZuZbCfe+HtHjriW1EY9H4kVwada0mSGyfxGp4U2KYm3K3R/YT++EhetsLiLX7C7Cy6m1BTFFU3Z9LNcu4It796T77ZnNWx0zgv/5+9g9evYHck12VcOQvWoo4iXyqtyZVhaoR/+/anD/z7b/+Er71Y1DfmwJU5sihy53Nc8Bxbftm/La93fJpWGBKfxyX/4Fe/oGkH/vC38rqxkeGhYbAZ+2gxEbQF/1RUnoMoY2aUbv/1d4FpoWk+9oSycqKDI3rYfqupH+R9WWvMCU3sTp3oMt5OSv+J+iFpV+Jt9LtcOM38Dh92WGtRRh4jh7eGxfvE4r2oYU9/02N64Z+ufh3Yf21RUZTY9XfFd77UuGOelRmVES9s6bxXSZiyJ46zO5zY2nItq4eBbDTDjcceAgELSrH6vvgfX8u913yeGK8ssdKYUTyyJyU6VuIF9dvyGceM7UX1Aajve/R+RA0jsCJVhvFVi3sqq0S1k3kkQ1W4s/VTnr3IKLknk5U/3ZuK0Ciq50KAOQTcxy25rUjLFrXviK9FMTSFH61ylu79IZFqW7yiEFbrcq0T5hgYXjnsPmKGjD0EYiXnKZYxEStF1SWhgTT1+R7QXvzz3pIXQrOZrhvsroz3Y096tSa1QhiIGyFk2Kdu9rcev6xRqXhu5zGS5/EiG4L6KdFvNOvfTIwrw3B1wiAoopWViNAqSELBGNdljrZw/EKRnMYM4m1OVs1KvPQ0GNxjIR4sKpTVZKUI14VfXVi/4zfX2IdOVO6UCTeL8npg+MVrkteQ1phJVEv7A+pO3qxkjjIaFSLt58j0p3J+uzcanWHyDlwW/+ijo68KK/3XhvBNpHrQqAxZCfPXHs9e5VjLyohKYPqI6SOLMdG/Ol1DoYqERpTX/kYTPdiykmcPMFyJEpu1/PHPalbzT6sLI4rldxkzJkKtivdZKDz1UyJUcm7NmNFTuf/LilzzsUOHWjzvSRFWalbqQVThZDPVI/S3ChGwzUwWSQ7MoJlWwpK3Rxj27sx0z3LPxKoo/oeT/1heDq3MDc33lmkj29CjfO5pG8mrOeWz/Rw5vjIc3pj5vg61wkya5uEHO/4X1MWD+yMopZRDfrn973PO/1P5549KqS/K618An/5V7d+lLnWpS13qUpe61L/Muii4f81LiVT73wJ/mHP+uz946X8B/lPgvyn//Z//hRszlmFj8FvmpB/bF6NYBQAAIABJREFUydOpGhO7ny3E9/JyzgdXbUsGUuuFfhANmUTYVLhTUpm3ZKdJ60YYmFqTlh7zUBTcjy9ko8neFqXXyc8Uz9DJS5YLS9V24mM97cNw47FG42MkFTXTjNPss0vrmtAYbB9BGVKV8U+K2GqyLftYJengvlaEtnirnt3sCRpfRczBiHe3EgWpesl0t8VX+SIdntELT1JlGG8iDMWjWyucDzTVSEqK4VikNn168gblINeptK7DeCUe4SC2WKoHRX8jPMPqJaPHSDZavL+A9Rb7sGd6syY2BtMHUWBOHrVxwhwDaSmKiIrF81h4muG6wfQBFTLmpZPzl5if/GPjiv/LYPrI9HaD+7xn+skdpjv7lVVIhCvx9vo/3ZGX7ZymhlJCcvCW+NWt/F0p7Is8uodNgzmOhKtG/MzHcSZnAOS7G1Q/oA4dOUSSt+hlez6mlFDHQVL1hhG0FuJHXcGjUA5Yr8QLWRTN1k2EpPk4iaL26+GG1ozsQ8Xz1PJ5WPKb5yuskfO4PdSSJd9blEmoJ0+8b1CFyagPilqLSuuORTVVZ5Vx2CgM4B8zUxJlrr6fmNYe08t4dPtIXhvMCK/+sOf+bzXU9zBIszb2KIxXlU6kgqL27M63s9tJt/u4Eb941ueEtuo5yyBVku6lInR3DtvJMVaFHuB3meHasPw+8Px7roxVuSeW7yb2XznMANUusfvaYIY8q1n1c2bYKJJV+IN8vop55l/HWtjX9hCFbaoUdj8xXst4rrYRFcHtJ+HD9rlwbM+kBbcXdXvxKTCsDTpkxqWm+c1Q9lVjcpbVJatxzz3TVS1jH9CfnrDGkCuDSo7m88jhi2pWod0hYwahQEiqWcYMnD2qKYOzpQehUGC6ILSPeErxSkL88I7pVlK4wsKeexKCEEzsMZGcZvHPPhBv1sTCp66/34JSjHcLSLLKEjeLmcKgYyTdrdG7o9AFtJLzWSovGlF9c6GoWI3dj8R1xbgpnv0TuEIhPQhWxtXJ/68H8Lsk16PL6CkBBj2djwHEk2m6Mz9cn+EoUIZctjLmho2Zz5EdyjZP5IdtR1pW6CkSi+KvBqG2xMpgGicCX8zoKB8Sl17GZsokr3HbkfG6wpeVqFw52WYfhNvuHe6Q8Fs9j5WwyjTvNdM6C4d5OKeQqQj1J4M7wHAjCq89KNz2fG9nI35X02e5l5ViuLYz+7n9FHDPA8evWqrniWQqDl+qmYKQnNzDq+8jh9eaaSUe/XFTzudY+jVGZE7ZJ0yfcHs5B4cvK5JTjGu5d82Qad/19G8q6nuZo8PSkRX4cu80H0UB7st3mdtK4lpy4m1XUZUVjUL1eE6Ma039Sc0Saf1wJgJNy7Jy00HzWQgn04qzp7yTfdcT6ChUiWQpXvrzeFJJvLeh0lTbRPdKz9SN9rPMISdu8l9Wl19w//rXvwf8J8D/rZT6v8q//dfIL7b/g1LqPwO+A/7jf0X7d6lLXepSl7rUpS71L7Uuv+D+Na+c8//B7Jr6/9R/+P9rY1rR3WqGKzUnbIVa4Y6J/bcN/ZXCHRTJKaqX4of69hb73MuTckjEhWNaOdw+MN3KRsx+xOx64qqmv62xx4iOiXwn3cvu/TM4S1aK4aaiQrpNT2pWrsSbZ5/PKt945Wk/iFw13HjsfqT/Yok9BFETvZPOZOSJVSV58pcub0X/OsFqomrkqXY0Dl5EocxVIg9GOKWbkjY0aPxW0d9F3NZg++JRK0JJey9P0LtvHYOVDtTpSgkrEPA2oFQmZ0XlA0OucauBaV8UlKYoFUmUv5yR7vnMrCKHVpS76BTjykDKuP00Pxlnr8m1w3QTyWlUH4hXtfBxEQ+t7ifp1m4c402N244z81iFRKwt7uOWtKpFbeuHHyhNCRXE16aHQFh50rL63dSxlFDHiVRZsJq8WYrnsigzqZFrofrAdCdeyNg6cqEq2G1P2NQyhnYT2Vvsc0dalBSxpcd8+AxNQ359jT70pHUjSXiA/bwl3shnuvc92TthN3uHqotX3BoICTNkTK/59acb7vcL9rtCBjEZsiL2BkaNigq700zXpXP+IHzHZquE31qL4nBi2CYriqrbAwqa+8TxtZ5XRUQZyYRaFC+3i0IgMBRlDLKxoupHOL6pROHoM/7lnNBlelFik5Ox4XfnVQ13hOhE6XF7UWh/6LVGyViqdhG7n6ifNDqK4gPQfDyy/XmL7TPjQpMqIRigzgQDHSTpKvqi0r5k8RgWhmwyRcXuMtVzEFas0zI2AFOS9vxjR1h6QmNI3swqjenB70bxOjpF9Aq/j/Q3BrcvqwpeocfM8c7S3Eeqx4FsKtxTMfOFWDzYGrMfUccB68w8HvN6KYl8xdc/LS065PkY/VaO2e2zUAE2htVvBnR/WvkxhKsGPUb0y5G0qNH9SKo98UoUWPvckTYL9P0LyV9Rvdtjy8oFQGod9ulI1gtshtzWZKfnBC9AaCWxBc28OqJi8Z7ue1RMhLs1qTboIaLGRCyrWfbQo6Y830dh4XAvA8noWYEzU2b5LrB/a/H7TD7KOLUHOc+mWOhFactMCzlPiw/Fi7wLbL/12INc7+5G4w555hEnL8pfrIpi5xTVNs7H6LeRWGshUEyRcLMQFTwk6SUAdD+SFzX2ENDHUVbrQhJPM/Kd4Z9EoXe7gB4jto/nJMfjIF+UKZGuFnPPQnNfVi2eFS+/EMpO9aQ4vpGkwVOy5XCTad+rQvoQlrpK8ue0atHeyzG57STzYs7YPkEZjtWnoyjoxyipe/vEuDe4cp7HtWL1myge01HU8GzAP5/vORTooAi13Hf1bsIUSs3qTwPDqxq/07ISNyTc+yd0v5oTQMPKUz30uKNFJU9/rcVXXij5p+8ZPckKUPsgZJKCBiZU0n8Sa5lHdBaucVqcqQ06ntXm4ZrfIZ9MC1F8sz35kcVXrCf5I9uQtMDqJVE/yLV0B8e0KCrzIf0L1Vu4eHAvdalLXepSl7rUpS71I6uLgnupubKSDk7U+akVBcc3ophMK+nSXr7P9KV7Vk+Z5BeERhN+shB1L2TGKzc/YaWrmuo4ir82Z45vHGbKuH1JqDm0HL9ZYcZEbDTjdUX1bs/4WhSKbBtiZeSp/2Ege/07fq2sYbypMX0kLCzmMIlSMXcfS5f0tNRMrWTWk8H6yKKRJ9/hvsEMGtMrdKcJ64gKkiIF4kUerxLZiWIWG+iv9exdG9aS1BKbzFhlcpUw6xGt5RhD0ty0HUplYtKkW4VSmTD84BactPhCbYakSL6kQZ18bAamtXCB6xfoXznplD3KG8LKkW4X2OOEjgnVD2TXzolHYeUxVuM+vDCtK3QQz19sZB+qD3sIEVW8rWY3oIaR8KakiMVMWHnsbmRaV7jdKIlkTjPdtOU9CftwQGUwLz2pcaC1+C1BvLZaSxpZBr09kvxqVlmI4lszY1GKjyOME6oovOGuhZ9/Kd3grccoJQpO8fJReVEqQyItGlEt8kL28+bEyp3Qxx7brameIP5Jw9E3+JPfeiNd0K6wZnVQRJ+xz2XMB/Gl1Y/iU01GERtOFjXcTq5VWMDit4lkoX8lfrbTdcxa7rf6JWKGxLQSduS4Ll7lfFaH3D7i9kq838XzSKto7oXZWj9mhiuFPWRcd/LJRbY/EfLB8l3E7QNhYWZfe6gVfidpdiiF30XsbmK8EjVMeKuJaanJGg6vDe19YveVmZWY7tYV9VaUvWqXOLR6VvvMkFBJxrQeE7GxVO+2xE3xSw+RtNEMt43YgVNmvLKzihxaTTZe/ME3RW3MlO774t3sMv6QOd5qzEqTbUX1cPaDp7ZC5czwusU9D2A0apg4/kT81tXjQNaK7DTLXz7z+O/csPrNQCweVpU0/ZUpXkhRwrNW85qZ/7Sn/3JF/XQkbVpZJXL6d9TybDWxccSf3JKsIq4q7Oct+vQevWD4cs20NOgxM26uSO48B4erBhUqUTJzJlXS0xALOzrVZVVkCow3Hhq5zs2HIp96R3IG1Y3i3/WafFPjn4ZZ4upu/cwbNmPGdonQaKbCZVZR1MVYadqPET3B1GpMGY/DtcUOkhoXao3tc1mxOPs23THRX9uy2lFWMYpX93hnWbyfhBiRoL/1rH75LNSJkmhJ8Ge121vUMIHW4mcGqoeeuPRErwmNKMHmGGbmdlo3mPePxNeFEJPEu3ryfe6/rKgeRXHUk5ANzMDsj1VJMS3ES6qns6dURRiLJ9xMhvoxoscovQpDmSNc+a5yhtha3G4iVYb6fiRWFc1nGbP7rzyhVjMP23biV55/XstnL55kXskahlcVvhCF3P0R/3JiDzuS16SrJal1UMab7oW+o0Jm2AgJJbRnf76KosiaUby/WZ3nLBnPcuy2E/U1+aJonxjcnjOFpCjXkhp5uq+lV+Xk/xded1GJT771KB7hbBTuqUdFOZ/1UH5nOAbQimn5V/8Ke1FwL3WpS13qUpe61KUu9aOqi4J7qR+UIraZ6kHNvM1TN+zJQ5MtHG/17D2tthplM7uvLLaXDur2c2T3laW9L0+Vu8jx51eMS8PUKsyUGWpNqE+exJUw7oIRVmaj4csloTk9f4lqdHhtmBamdItHum8KR3LKTAuNfx4Iiwo0jBsvPESgvzbUT5FxKU/Gw00mrQN5sGyzKI9XX215eWmZciXq2aDJVZo7aMcvJsnMVtB9EXEvmnFVfIkI19AdMrHKYkqqIjkqvroTBm0uUm9jp/m/Q7SkVJigbmJXlTQvnRmea6IW1erkT20/KvqFPNUe3mhsB+OVpTy8M64N0SmqrcZtAypl4QaXJ3fTBezDgbSspUP+N0+E1+uZLwmQVjVh6TFH2c/py5s50Uw/HclrT6ossdbo0RSqgp6f7v3zRGqlO9tU0iWt+zB3zqdWOJ3pqsYeJqa3G/H97uTxX40BcxghOXQfhJKQ0qzwxsqgR6E0JKdRMdHfeBa/FiJHapz4gUvndWocaV1LelXx6aohkNoaMyaaxyQEDw2nsCC7l+5gPSlMV5jGAdy+dHgHmJbiPbVdPvtfy2msHxK7nwgrM7Qy5rIRlQTEd1a9ZPwuzcpMcnKdTFEoUKJgytjStA/xd5Sg6Mt9sBePn/mkmFo1UxDGQmCAfPayKzV3WyfnSFZRPQlz2HTimzZj+fmritBq+itFbES5ipXch6HgpadWvLHt50R0iu5GGNZD6fhWwVC/JKpHSRLLWtF/s5mVJNU6YiXeX/9hz3i3IFaaw+tCzDDQPCiyF76oOwghwR7PSvW4VPOcYiZ5XU+W2KzKedJUD4OoltcVtrFMSzufW3eQ8aT6SLiWuSDUhmllyva1qFRFLbN9Zve1Z/1dke9iIiwNh19ckaycf/eSMYd+9q4TxTcZlo7qcRAO67JhvJL7vbuTbv6sJU1Kj/L+9s9E8k9Lj4qJ8bqWbvxCmzjNb3Fpqd/L+NeT8GX1JAmUAKpxuMcjad3Iyk3MJKPQ247h1bWM+T6JIjtmQvFbK59xp5DEVWG9Zhg2mqkFf5BUNxBVu/oUSFbRf6GEjNLlmYGsI4wrg+3EgymrFYboTymNYIZ47hF47YmrSs5JmeC000wrR2hWuO0oSVrbDvKpYSSVMVq84lNkuqrx96JkZ6sJX71C9xPhusW9e5KVwUIFcV0ia4iNZrgSmsDp2sNpNUUoAfYoymVWFO4yZW7INL89CHN4SNiXDtNZhldyrcPSkYzCdYG49uQa6vtp3gfbJXQUYsW40nO65WkshVYVX7xi9dtJVk5rhZ5kJ12IkLLM4X1kWmti6yDnmXSTaitjyYpPWodc5jQ5huaTqM7DtaL5KH70rOR4Ze4Q9rQZyupiFtZzc1988U7ILWbIHN/ISkT9kOfve3vgB8dViEMr4feexoNS4PZJeLhG0b9eUj0OMzFI5UzO4Lbn1Zq/qC4K7qUudalLXepSl7rUpX5UdVFwLzWXJO6ISjtz9ybF1a8iLz8z8uR6EH/NyX8UK0V/ZUgVjF7RfE4c3pj56RbkST1Z8XAlpwhJldx1ed2MevbgjEtN8xg5vraz188MmfpxgjeG/lqx+BCZWi0eQsTrNy3Fd6aCdN6GhRFfLHB8o5gWBj2JShcWicVNx6rp52PXKvOSFoRVxD0JQQGl6b6RJ0S/GBkf5Sm8ejTyZBvPT+7RU/xoCjUp1LMjNYltLxL4bXvkuj7SmIkpGd4f17x0NX1XSA+nnHObiApIilwlsJlcUm5MB/W9nLtxLYpKshozFIUjCDfQ9IUzer0gNhpUVX4+EG4WxNaii6Kqhoi7FxpFriv6uwY9JYxWqCjd/fbzQQ4yZ+xuJFuNfx5JlcHsJ3Sw+OfCHa2sKHRaFMlUGcxuwO5ERQmv16LYPolfToWEjkk4ooDaH0k3a0w3SSrbFIUteujLtRYSRDYat+uFmzuk2adnukm8uYcBfejItQVnUCGh+pKs54S/6j93kGqi85gpE2s5j/VB0b3OhFqupzsoss5zh++4yTSfxCNNx5w0N3tHa4V/YfZpZiXX7cRWBVj9WcfxK0kTsp2Zr5l7KcxnZ2jHxLCp2X9pUBEWn+L8GW6fqZ8SbhdIXggNOjJ7dE80g/opCWljiiSrZg+u6RP1p272LuthQr8coaRwJa9JBpbvI8PaFJVIvIAnJUY8dTJnmEnS98aN5NQDXP3ziD0KeWNaOSEANHq+r8nQfBIPbFjX2N2IipmpdP+rBNVTYFqZwqEtfs5toilMT7exdNeG6kUUIr8XJXL+DCWkg2wgJ4UeIroyNJ9lxSA5je4j09qBUrOiNS5lLJhRxkByojgNG1ndmlby1RkW10yNnH8zio+4mqIQEk60iUPHdNeixySsY2dwD4dZgU1WEVpN9RRk/0IiLNy8cmI/vpCbivS2lQ79XSAsrHC9EfV0vFvgXgb0lLCHhDmMM6Uhrr2sZHgjqzjbnlRbiHFeEbDHiJ4yY5kzTwlltiRk5bVi/WcT/Y2Vc2rVPBZB1HyQFRshhMD1H5/n1/7Go0Omv1LUz+Idd/uInorGtodsFbqbIGfhQCtRomNZMTC7nnxXY/qiUHYBc+ypHkti2xTRU0KPqfhGhXusytyhvCW2FvN0IL1qhTrh1Oyd12OGBSzeJ/ZfF/ZtFvURZJVOT5nNrxLuIB7l0MiY0+P5XIRNhS1scRUz09Kf+0WU9KikygqRpvB6TeGAx9+/of31QQgSXy1QUVYv5mtZyc8LySGhvHwP2oOszAxfbXC7Uebwg3ia9ZQgpHOSo1K4lBlvanQUIoF/0TPxQk/Q3yj8LnP1xz39rSTlne57PWXqg3x/x9MqrFXUT4VH7DUqS7/LaZsqQvNQvpf24sFVCTmOIKq+SufUQzNkklfUnwamdYUZEmHp5u+t0DpZPfqjvzq/6qLgXupSl7rUpS51qUtd6kdVFwX3UufKwnW1+3Ni0lTD4++LIpuKj9DtIZ0ejTJ0d+IdU0GemvUk3c0nnuXxtahdthNmqDvAeH32NHZ38qRXP2b6G4UOQlsw5anY9onnn1cMN+KJnNriAS6pTcc3jmGtsMeKWCncwosX8Ors71JJujzHTcLedWidGIOZPbBTNOSjQbWRdNDol9JVGmUb02BRSUEhMIRWfGoz89Mqjre6JBgp4s2EX57aSiFkjSYzJcOV60iNQqtMW8l7joPH+0D3yyvSFz1UEaXEenXygO1+Jiq73auSBy7qzf5t4YpO0HwOjGuLShm9sgxrgyp+QtuVpCkDdDB8e4PpAuqTPNmPX1+TrcIcE9PSobtJ1KTbYs5KmdhYUZ5yxh6CKARjon8jMot/EmU3NJppUaOnjB4rcsmTn5YW/zKKh8oosi2e2rWozNlcSYfvS8f0zbWwU0NCFYajmhKpdoSFpf5+EG+hL6xd4Pj1AjNk7P2O+Ep8mCc1ONenz9AlMS+iY6Z5DER/7v5XQXzo/W2WJKMjDK9gWhd1tBZOrttlqhfxncf67Fv3h0S/0WQrfrTV95Fhc1YVk4PjVzXDSlO/iBJkhiS+t7u27KO8V0+gTWa4VpjxvA1h5mZJSSoJZe54VqFVEu9c9IrxqqJ66DFTYlyVNDKvSF46/nUXUJ/25EUzK7wnGkp9P3J83czd28mdVT2VhY/qi9o2rrX4kO1pHxXV5yOxcdg+cnzjC1f75NuE0BjQ0p3vdoFpZWkeSsrYKCsxbhfRYyZbxfHOoEP5OeT8umOmfpgYri3+JTBcO0KZe+qnyHDtcAfhjobWYXcT/evCfo6ga0Pycg3sMRIag9+fTrSouNNSPs/thQ5wosicVPoTScJ2iuG2keMuHlhYzrxXFTOhNeipJp3GdIKp0bidItaO+pMMpP4Lue/qd5njzza4l4l0K2lkodHnVbRaOMXJ1rKSZRQ6VFSP03wtOVFM9iPJC5kgLq8Y1rJfw1q66XXxMdsuz4SE03lCidfZHhLZFi7xTq6VakXxNxE2//zA8asWcwyz9xTOqlyoRIVXMc+ebxXF420WFfpZKCzDq6oo0qIMZi9UkHSi6GhFfH2NKt7S/usV1YMw2ZNVNO9GSXArPQxZSfd9vF4I+eVNI/dZOcxxJauAfifKcnLQfMyYsnLjDqnQH4TZq5LMt2FhqB7LyotShJWTnoOU6L9aoadEfy3jpXo5+0b9y8i08nKObuU8Ra84/mQxq5v1/ZHui3b2v7YfRkn1OwRUTIDFduHcb3JjJY0yJkLx0ZM8OmZsGW/TppJzMUSiM0yvLaE5p4h1d+fv8GltZ+/1aU5yW0lQi15xWClUkLngNMajF99ttmWeCuCO6cywPQqhYWqVJAKGcxLjD+c3MyRMNxEWFv8y0b39wVjqpe+m//kd/Cl/aV0U3Etd6lKXutSlLnWpS/2o6qLgXupcSpV86IwZzvzDbEoi0iBKUXOfGdfFh3dIqGxQpZsyG7DbPPtvQTpoc5Jcb+XFb+Nf5L8gyljWivqxdKZqaD9G6nclqez1QtTgLD6e0Cjq54TtCiAWg99npqWQDbJpxNt4UpoexC/WvZG0qNWiZ1WNhKQZozyWpqzQo6Z6faTL0GmH7RT1p9J5/95iBkl50SVhpn5ONB9E/cyqJnpF9SDPjN2VIkbNFys5hpvqwNoOJBRDsmiVcToSkyhJMSsW1chuHWEyaBfRJhM6K15cIKwSqExYWsxeo8OJvVjOwpjFO6coSoR0OZ+UmPrzIPzMyjBu5LiaPpJenWgUCdNFUsmrj61H5TyrA/7Tgdgs545eM0RI4n+NlSiDeoxkp7GHSFgY/MtEaC2xLopbrajvE2qUdBr5Nys+XGC6qvCPPTiLe+yYbhrpgC/KojsGCBm7mxjerjBdEJW5bD/Ueh4XceHwf/wRtCYvm5nEcFJ7k5eO61BrklP455Li9RQZFxp3VIRalIjlr5nHvGkkgezqVxPJKZIzsMvzeFv8tiMrUaxjpZha8ZWfKAsnj6g/ZGxRNvSYyPZMJ8laiZpYy5hPXjrOm8cyFqqS9KcMeowkp/G7iVCYxm43lWSwMh7fNthDnBVHlTLmMAp1IkMOgbRuZslDpVyoKKV7Xol66w6ZxQc5v+PalDS4RKw1i3cjejqnDekfKIDj2lI9R9IrQ138s6YP6CnJeNMePYmn+MS21KOoOLHWhFbjtxF30LhjmpVq22eiR7rqU0aPifZdR2jdPKbBUn0QXq2ekvgwi9ev+nik/6IVNXGI2JeB7svFTC7RIaOHTPWcqB8jw0ZIEtVWLuawEUKLjBVFcy/nvX/bUn0u3s8pUn84EFYV2WlylusbFufPkPMdS1e//p0x3f1kIwSNyoii3EfcdmT3beEJJ1HFxrXB7UVdHVeaptxf2WlMH+hfN4zXnqzF/2q6MK+SqSTXy+8mYqUxY6K6H2fvqJ4c9fc77KsG3QXM04HpzXp+XW2M0ASmTHYGMyTGjZ+/B9xB+Lvtp4TtEtXDSFi62acs5IcsKWudl3nIKFHx+tM8L+NajxFyRh8Gws2CbP38evIG/9Axvmrk/rYa/d1HGW9KoV52xK/vhMtcadrvj+y/lS8iO2RSEMJE9SgkBrKojwDDSpdkLsvyN73MaVlU+RPn23//hH22oLUwfL2WlL/SL6JH8caqGNEvB8LiTu7n5rTyIisfekpUjwOpsthDRIWifj51qJUwkVUf0GsvSaHqlHBY5gNvsLuRZBTjRlLnbOmTcC8DYelxzz3VQogiemImk3SIett+KH0QCeqXhC29IO1HuX9jo+dkOpRcY9kH+TwzyMqpKp7204qDfwmMa4OeFFGVNMWjfPecvO1ZK/zjKMxnLb0D1dPEVJjH40rmg5NP/S+ri4J7qUtd6lKXutSlLnWpH1VdFNxLzXVSycaNIhUladxI/nZohRRge+nQPvFxH/7AEVrpKu9vQO+li1ySTwqrcgXJZerP4l2d1uK/DIvSwT0qUpXZ/kIyrpMX1t7Tv1EStJL4t8JSsfuJxh4gtIbjXWELNgrTn1h+wsVEMTNqlZH0sdBm2MjT59+6fo/VkftBfG7/7P4N5u0RYxLaJdSbgfFocQ9yi7TvRMnKC8qTqeStRy/7MCsVO5hWoA+G3/+971k6eWo2KqNVYm0GNrYjZs337povGmFdTlnzPLbknygOg+e4r4iTRtkE7tQSDkpnUhNJo5rVtVN3+eGtpn7MRAcoqJ6LMqDOT7mxMgwbMzMvp41Hj6KQbH8qKvSJXZqNYriu8M+Fn1hIBSoW/6xRpMoxXnvcVrbRfdGiQhZ6Q5YUuf7W0Xwq2/CW41cNeqxJXhGdov0wcvhSBpQ7JKZNhbUafRwZ15ZpoWfFzQwajCjF09oQK43tRC0+lR4TeCdKwje32PdP4sMrKsd45UUdqcVPPK707+TJJ6Pwh0Q2Bj1lYiXX/DSeTszb2a+qFMv3I8c3cn5CK6ooWfxoZDjemll9XX7fEysjKmRRIPzzwPCqRp083wvN7itDWCiGqLEHGd/Vc0kHvDXE2nB4a9j8ScSMgXFMfysXAAAgAElEQVTlzp7GKcLComKm+X7H9vevaN71xLokzgXQLwfi8oZpbamGm5mbCqK4miHz+K/VdHeKaZnxL+L9nukQWjzhOiSOa4fbBhk/ZR/9c2D38yXuKExP489cVBBlMYeEHgN6smSrhI08qzgyfqOXsR69xg6J452ZFbGsFN0rTTYLms8jeozobUeqCge3kQTEXDlJRlPyOarwW+PSS4d2K8rpcNvQX5t5PyXdUc/jqv2U2H/p6V6VFQkvvlszFcVqSvghsvtpg4qFuhITetczfbnEHYOoald+9uBWL1F87Dee9rdHwsoTKz37fkH4qMmKWta99jSfx3m8wmkOULTfvYhS3QgzFkQhtkE4u2NRIevPA8kbqtL5rqOo38O1bLu/cfR3Fas/epLXp7qkYUlqpF553GNP/7Yt50ERGk39GNl/I977ZjuBPq0EZtw+kI1FT5n+dYXpzxxoKJSLMTLdtkxLUQGzURy/ks+oHifMfiRcVdjtQKo95qVn/zcF+dN86DH7QVZsCkUhVRrzA+99ur1muG2EI2wVsXFUzyUJsjV018L2/qFPfGrPJ3rcSL9If+uFfKKUpM9dlTS1X2dyiKixZ/j6DSrmkvoln2G3A6mxxLpGXzeS1HUM6LKSmJWQPaaFRkWPPcgqx1ToFqkR2kesLWlTYfoonPGjrBZU3jBtKvznA3FVU306EJu1cIeLH9o/9NjdQNhUwpfeR4w7pyS6gy4MW8fy3Ug2iv1Xdu4x6G8dfisEjsX7if2XDn/IHMr8FytmRnhWWXpv2rPH9zRPVtvI8U7Y7cqLr/w0p6qcGa88ZkzoKc29AuH2xMEVNf+Hvty/qC4K7qUudalLXepSl7rUpX5UdVFwL3WunDGdqLcn0S8bCAvwz6IS6GKHCiU8RkXx2UQvyta0LikvjpmBZ0aILUwrhe1hLOqWOfFdewBRfZvPuahmZ95mrBWxkqfp5MQTOTXgdicGH6SVov2Qxder5I8/FGWz+H9UyuSouG2PfOhF4TkGeSI89p6UNPv3S0mxWk4QFLp4kZvPie3PNP5Fjnm4EZ9Wd1u8gMc8kyOiz3A7cFMd+P2F+L++9E88hiX/dP8lf3Z8xTftE0O0NMVA240tVkWsTliduLo6cOgqrE0YLWrV9qklDwYSmEGRlXSodq9K92oN0Qkvdf+FITRqVn5AUnSOd+Jpcp2wKGOt5s796OUaDGtN8z5i73cMt7fz8NCHgf5nC8wo16h7W+OfA9X9cO4mL8lUDOJxi43B9qKQAfO1TUaYyONKYSY3f4Z/EQXPPZZO+pBRhdUI4HYj43VF1MVz20pHtSo817ooyeSM7QLZaMZvXmG6aU5Ty1qRjZYx8rnDXlv8NjKUTudkJX2s2kayVpguYfs4H0N/ZfCHH6SQWVFeZq/ejcX0kvMO0jE+rvQ5pWeKmJTRoyIsrPggYyY5IS9A8Z42Vsa2ESUiOWZOaXMfRd10isOXjuo5iTdzL+NpeFVjBuFskrN0gO96TnpHbBxpsyCrwuq8acUTW/idw81G8umdIrS5pLLJvbz9pnhkAyx/MwkZY5/o7zy2zyx/+QzA+GZJcuJBPjGAbX9Wo+qPQTyT3z2RnSEsHMNXDW53TtZTURTO6DXTUtia2YhPEcBMSTrwPfQ3jsVhIq1qYlXGR6Pn1QZ7TOLv1rIyAUL1aH67I3y7xu5GDt+0uGOeE+FsFxjXjnGpMX0ktkIoOV1Ld5Tr3v7xE/03G7LTJCUe7Xl81E6UwqdB/OYhi3JWxosQMRIqZ2IrCXPT0szzn5BRDG1hHqsyVpa/Ldfq2kEuXe+vFmSri5p9VvP1047aG9zBERojtJshoidVtuGpxpH+2sysVT3l2YscWofdj5huYrxy2EMirv3M4l2+i3R3HpRcY/8iSYpuW3jDlcG9eyH5m6KkM8/TMhYGxmuPfe6Y7haynS5AEva6jIVErowkmylFXMu84fayD9PKobuAPg5Uj2ZONcu1n+eEXAkT2/QBvy0e9fJdt/jVC8P6msUHIXKYPtPfmHk1UweoniRpzO0j9jDRvalldalsY/riSuae+z3Ja+wx4j8eGF+Lzzd5K95jgAjTyqAH4YqDKJdm23P8dkO2CvfUEa7OdJPhpsIdgnDSjax4hHWNnWTeM9szV9p92pEbWaH4YSJcaqyQU6Y0c6ZDpbHFEL36TWD/hSVbWc1qvt/RvbqW1RtAdTK31u/7mfTRX6l5RaF6EVKPjlDdSx/IoTJU23S+JxWYLuG3soJWPY2MG3tOdDv1z6RMbIqn+7qifS8ysu4CaGZ++F9WFwX3Upe61KUudalLXepSP6q6KLiXmku8QnD4Js3sVXsQ351KkuQ0rRTTSniyAP5FE2phgZpePGLjRjqbZ8Uqg3uRJ7z+9tQpCsmXp7QoPLz6Xvw7hy81bnferxO5oH7IHL4S5TKXrm4QCoOelDB0o3h+UXAs6uq0FO+wiuLN/enqgUOo+Kef3/KLm3sAvn31xC9/+aXkm29G4qjRg5738eHfUoQ20nyQzllhopbEKpjTfUIrfN/1lVATXJG8r8yRO7ul0hP/64d/k5Xr+ecvd3y9FLXr17trxmjoJ0tKGgeEyaJUYBxFQjBVhCoSj1ZYrEgm+Ek9cDs5J7FSoliXfz91fPc3lmkhCTXRw7hUuCOzslltRZUcF6IsZr8RNal4IvXUkKySruCSGBUqX7Lf5Vr7bWRaGuxRlGN7THRrMye+yfXOhfQg/99vzMw+HV45TJ+FBRnBdpF4Y+dkJYoGmUsql0rQvXYzocCWPPnpbkGyGjtM9G9r7N6Isoyovio62fZCPHixPvt8p4UwPbtr6ZhfHCJk5q54qDneWuqXktSVC8u1KHY6lrz2Vgl5o1G0n8JMF+i+aLBdksSfsk+7v7FkXGraezmQ3dcG02WmpSpKoKSInZTQ0GqGlezztFCopKlinpXJ0Ioq1N01pJ9fiTL/zRXuWY5B+MWRbDXmGEiV4fhFgzuI2pUMJflM5oDsZIUAxdk7+pwZrxwq5TPnN0NcVj+41qK+i1daxle/OflXG5a/PnL8g7fERktSmlV0d24eS7IzQhnIWvz9OmSGQrQY1gZ3EB51qBXjTS3vnWkRZa6pDKaPTGtP86sHpuWrMo40cVWLGr5wolyOeU5oci+J+mPH1C6EG6zk2vp96R+YgFQSpPYTw3VF8qrQOQqLNwohY7yqyAb880S2zGM2eo01cl+h4Pja4bo0j6fqoWdaiarpt5OoceZ8X56SHOW6V6JoZmZyiXIadbchtg41JSzFm2xFTQbxGce6or9VcK/RIbN8NzC+EeVR7g9DqIVbPW4cpo8zpQPAjELByFqhim+yvy7X8iUQ7lZU7/akb1fYfWTc2HkOiI0V3/GrBd2tl3HdWrJR+JfCub2rMIN4hcebGnMMjNd+VqrdbmK8azCDx30+EKvClG6LB1drUi37TRTvuDkGpnUZ862sQFTfvxDra/SQsJ2av8eqF1Eju1emqODnazYU1ng2wh/uf3I1K/H24GdiTP+mwh4T48pQRVF+Y6MZkX2wXSTWsoron4RZfHxbzUx5lRRp0CQnCWi7n7Ysf9OTFmVedDKnhIUl/vRaWNKtqNQzh9sbUmWw2wH7dCT+/Aqj80xySDbPaaWp0mQvKzTjqpAc9gkVMtN1I2q8Br8/35NmythOPL16klQ6dWfm+c+MmWGtyeYEzIZxY3H7yFC8zPXngeMX9bxilbWak+QA7MOBtG5Ezf4r6qLgXupSl7rUpS51qUtd6kdVFwX3UucKkeQhLuMZC9AZ/EshK1hRp/q3cSYkTEkRq0zYJOzOkHXxFl5F/Msprz1jg6ibWYvaOq4z6HMylB5E8Qq1YtxkVFDzE2d/l3EvSry4PqNrhd+e09ZiJVzabCAVX1dyMEpzLclDUEJycB8d4x9Yfn/5gSvXsbBCOfgnT1/hb3rG5wrnAzlbUp1IJ9OiTzApxo0wA6PPVKOaOZJ6gmmhCItEWECrMnd+x7HgJv7x8Vtu7Z4pG143O0Iy/GT1RFUU3reLLSEZtEqEbBij4fVyj1KZT3vxOT39diPe4CRKeX+X8c9qZqc29wkU9Ncav8tMjfhhc/Grdq80zX1iXCrGjZzfZNXcATysNNNK0X5KdHeiZvldUQBKnbi640r8n6ktfsiiFrcfpVN6Wohf0r+URLtynqIX1dgdEkEp6kfhGlcPch36N5K+My5FdaiMXM+TijItdfF9Z8yQxFP8AzVYR4PtEqO32KModuNC1MFQFLHowGfxX6qcGZei7p78X8OVwh4yyUvqXncr6VonFeaU8pWsKGnNQ6S7PfvHyDL+hivF8ntRsseVwQ6F9dtqhrXwU6eVePzGlfg2j3emHKeivhf/sT1mhhs5xydVfmoUdsjkEVyXON4a/E7Nvk+VoX9dMa4Vqvj1slH0NyWVTin0KPuWvKO/tjJWyiGYSRTjrAvHOp8T3GxXxttDYthobJ8ZV4r1d4Fpaeb0KrcPQlooiVX+sef4dTuvDg0bhfqqkeSjBIungVh5oaAgxIxkITkrHthJlFphXZcBqUqinxHf36z4/ID3a/9f9t49WLKzPO/9vWutXn3d19l7ZjSjEYMAgwQISQyEW2LZiX0gcQyxqQDOceAcKOyqkGPw8fElSZXjOKecHGIT42sBAWwfbJJwCwe7bJwQ2QkIw4ABSdwRQhKSRjOz9+xL39btPX+8X180zGh6C81lb72/qqndvfrr1V+vtbrn6+d7vucdKIPl4EWMhezw0jgHevQ6WUfI2nVT8LLScr0xpTtbSBCF/mqNxnpB40w58bU3LM82m4/RqGZV7ep2TYx8wvl8St42b2syqFCBrBNR36jGn4mkm5OmEduH05BYM/WZy0rzJSt0D9ZpnsoZ7KuNPxP2+Rhl2UJt2zb3V0w9jcMqfxXLVtZIqFKxKochxSPrCMP5iKIBg2Vh/p6KrSP1cR+j3PKwyzSi+e0ttr5ngaJVGyvAGpniHg8KymZCtphSNmRcUS7pRagI+VzNZiJ6SvOhjGwhVNarWebtcLlmqTiZqfbDxXg8SxbllvWbWvAMRTsBtZkp+9yaZ72xJlQH56yaX16N1xhA8PSH5I4qS6jSmHiUm90yRTtf7RANq7FC3lgPn/uwbiDObAYqyiuiMmKwEE/WrESW75yvNGicyiibMf0DDeqjJJpYyOZjsk5kyQk6SaUByOeSsSqcLabovrplbceTtInhYkKZCnnHUm/6++t0vmYzM/lSg/qJbfL5uVDZz/zrta2SpB9yatuxzR4VymB/K1QGncxAde4dUDQbViXzUEJ/eQ5RfZhnvL+/bklCSxGt0yXxYPL/jIRqn82TOWUzHs/+jj6TWUjFGeWKt07abEXZiMaZ7UUrIRlUZJ2YWrcyZbtuCShgMybxcGpa8Dy4gus4juM4juPsKVzBdSaUJXlHkWYJW/bLeuR1LZsgOXSvKaBREW0GdXa+RDKBekV0OraV/A1TZ0e+yygzJahKTWXRCqpmRdQPnp96RW0zokxDSoLAYEUpW+EXY6pIGRMPhaQvpsi2TZEF8/6WDauw1l8RoswqpQ32hbdVV8omJFvm9f3y+n7mkgHtZEhRWR++vbFAWUZE7YIij6my2BTmcvSzMyLZME+mlOatjArGK2zBPIpSmv/19HqH3qGUa+qnAfhK7yD39JcZlglZFVOp0KkN2SpM4c2qhEqFBKuqllcxzSQnL2MWmvbrfLg/Ic9j6s2cvjaBmKIjVHGoQLMSUdvW8ar+kco4klctV1TGP2uTvlI0ZewFLJqm0if9ytSdyvyoI6Vo62jLqrVt5OQtU9fSLfslnretzcbRUTax+bnzjnkkR7/Mh/OWvxtlFTQjugdj5u8xDyiYulmmluRRNoUqCRWSiknFpaxj1az6y7GtbO5WDEJiwWj1e9EUaltK0Tb/b39fMlaZo9JU5eFCTOe+jOHhhDiTsQ94dICizI5hWRNQZbAazlUnYrgUUQyUVqFIYUp486QpQRtPrFGEKn1FQ0i75nnO43Dg1VQyjWLqW5brWt+ozNvdGXntrHpQWbe0hyizbo0UsVrPjkm6EdSnwwnD+Wic32lKlY6TNrKGfQZHKk2cqan0JVZ1rim0TpZjL3WyXdJbiUn6SrYgNE5DpkL9jOW9jo61Rqa2SwXDxZjmQ/lYHc07QY2VhPYDGVtPbCM6qbxXJYzVqmRoObBlOpkVSfoVw8WY/rxQ6ymDRaF9wrzLIxW5CvXu7bZ5X7OOUN8cZXBbDrRVVqoYLibknWTsj5XK8m01hrwVZjXqlm0Kppb1VyI695dkc9G44lY/JG7UtyqrJDasGM5H49zjeFBR2574A7sHatR6FVGubB1tEBWM+wBWaU4qu9ZqXfs7WnVetlNa92yy/ZQFsnkhGVjCQn1rUmFLKvvMJ4NqvB5gpISnm6V51ms2+zBK6BDVsTIYZ6aCpxvB47xgPtxRBnbeSYiGJbVCKeYb1NcLhksJjdO2+n+wL6VoRiTbQhn8zv196fhc5u2Y9j3bdK/p2LWZV4hCfd2eXzZiokHJYF9qx29ola10JRmrvFIqqFWsrBJTw/NWNFYWq8TWYFR1AbHPTePBIVVIW4hKJelm5HMpwwMd0oe6DA+2x8om2Gch2RqitSa1bknejmk8ZBfb5pM6tB7KLIsbSLYyS+tYhEb47tDE8nZFdZy3HOU6rjCYbpnP1P6fkzDbKOP8ao1sjUmtW5F3zLee9CuqeFIdsIotd7ms2VqIqFCGh2xmJpuPEW3TX0nG3wWo2rqDkJeetyyFIh4UbF9dDzN5jGdWBqspta75cPOWIInNStTPhOtt3hTrqLDPskaWRT1S+6VS6pumSkdZRfdgajOBzZAqUpus9ymamCd5NaFKonE1teFyjc49PaKhZYoPl+z7ezSbGOUha/0CuILrOI7jOI7j7ClcwXUmxKai1uoFWRayVRWKvmWv5guKdAo0i9AkKEWdnGqrhiQV+WJFvB1ZZidQBP9q0VKqhtK6P2K4pORzFczlVGq/rJPtiKKpJAPL3Kx1BSmgWAom3LSyDD8Nv5DFkhpGfUi6EZIHVSyB+qmQR9sIvs+mrVaNY6FoKpu9Bvd0l6mYKCgrnS73Zwl5kVCupSS9iLJTjb3G6amQ41oKSdfUgnigFEEl0djyfJO+KaL5do1v9xc5UDPD2OH6Gco04nTeHr9mJEq/TKfum//WjrtQk4pcI7Lw07pdy1gfNCmriDxLiE8kZAsVUWZ9yOanqtHNmUeyd3CiwMZD8+rlHSEe2K/yeACdUGWsdzA1ZakZ0Vwr2D6Y0HmgHNf/zkMOapVYhmcVx5SpWK5tMjrXmMrUA+lqWPXO2LOosamr2XxM3hKGi9DvxuPVsYPlCKqQddwxb/Vo1S2MvKHQ22/3298qGC7GNDYm6kJUmtLZvSo11Sysuh+pfsMFoazFVAlsHk3JFoXapo79r/HQ0gtqXSVvCfUNU5tGq4DztpDNQaqWCDB3X0ZZi+ntD3XSF20Wo3FKxjMSSV/Hq5CTvvl78zkhKk0xSwZKf1809p1HOWxfFdsK7U7ISi0mXuPGesFgOSE9PWDjujmbZUlN0bH3YNnApnzaSufBXDSuxjbt3zRvJfRWE5ohxWG4UiNvmyIW5RN1p3mqelg+tVSmgtY3lKwtLD7UZbhk5vftqy11ICoto7ZoSPDU2r6SgamYcabjRInRsbJrKRr7E0ce5cGieeBH77NK7JrprUbUN5S8aarY6Dja/ipTdkN1NplaUR6V0D1Ut6zh0q61wcKkklkt5B33l2MaG5Y9PFiKxmpUrW8+01pfg0fT3leVRgxC1aV4aNes+XMjsjlTwkcKVNI3L3M8VJrrpb3WekUaMp17B+s0E7vWiqYEJdkqewH0Vurj66OqCXk7RpqTPhJB4+SA7WtaFIsx9U1TzaSaJFpomOhprNvnJc5s9fsoOxWBspFQWx/Qv7ptSme3sjxSYLDQML92YVXYpLLrejSzYyvyk0kSRC1iuJCQhipiZRqRnupRHKqHz1oc0jNgsBy+E0MWepTZ3yzMDtVC2oYlPVT09tdIy8ryfDf7REv2H1FViygbCfm8fd8Uiw2bEQoKb9GOSM8U9A93aDzUt4qK/cr6jc0yFE2rBNk8mdM/2Ar+Un2YshhvD9HllN6BlNq2zQ4UDXs83SwtZ76ApFcy2BeuySAi95dsFmaUcV3bhqjSsce3t5KY915MXU23KvJ2NM6m1tg+I6NrOx4SMnChESop1rq2JkCyAo3NY5x3ovHnOmtHRGWY3SiguW656oOlyXlorNvrjvzJ2f6I5unw//F2TraYgsh4vxpP/g+wWRehsW7KdG9/Qt6yhIhsLiRWbCrZgn1+iobNEjXOlNRPhVzlNKa/vzZJWjkPruA6juM4juM4ewpXcJ0JYv5MEaV20n7V5gdyNIqICqFsT6qGaC34bRTiXmS5kWlFORcaRDr2yJoqWDJcFiSH6lCGVgLNUIEmVoig6kdorOSR+djqJ8Lq2KtzqqZStiuioSm5VX3Sl2gYvHOjSishC7dshZ+9wTc7UjlW57ocam2wlrW4d2sRgE4tY3Vhm81Bna1egmZKNJBJikIEcd8SI6LCFMqoZFx5KhqaT7lKdPycB3rz3B4dBiCRitV0i2acU5OSWCoilE4wNNakpBYVxMEoWo9y4lBZZhCU7vVmmweH85wetimriC1tUXUKqmLkbYqobUf0DyqNk0Lej8jbpqAD1NeFKLdzls/ZL/RRAsD0eyhrQn85sYzZfYll7QJVKpRNiPJRNSc7BlQTH2StC4Nly0gVNYW7dbIcrwqvdc2HXdaF/or1u78q4wNZtKD1gNI7KGOVs0qjyap1NS9YNh8FJcdW/45Uv+GCKX1Smr+rTKNQXcmUBwir1nu2EnqwTxguma9tVHnPPL/mL+7vm6zsHSuLdbveal2laJk/rGwKWdh/0VTigXnpyoaQ59A6VY1VGsuJNCWjaJiaN1gwpX2kskQ5dK8KCnCilHXb1joxep+2Anvjujnz5So010qmJiWoahGNtZLuwYQ4h36Tsce2tqm0TpnHMG+PPI0TlXm4EJkqHLztRVBGkUkVMdSuiaJp57OqQTnXGKujRdNmNWrdKuRe2uzB6DhqFPZRs0qFrYdKNI7Hvs3NIwnN0xXpljLYF1E/UzFYjqhtTSqJlXWhsa5sH46of6sgbyXUtnXsaZz7tgZ/bWSK05mSrBWPj1O6WdJfia2637Za/mw9JmuPDmTwtYeJlvqZgt7+dJLiEN6nqVUSfKJ2/EaKWDycqGFlavnNUanjVI3aZsHWkZSiLrROmR+6dl9Jb39YBxFbtTKN7Xg1T5V0DyRsX52O9z9cFqJNu47TrZK4X7J1JFTmqwmD1YZV8gqJJ6P80nGWeMc+E2VqqqCUStLNxxmx9bUh20eaJD1LACiaViEvKke5y5avnbfMY9/fFxPljP3aUlmaxHAuAsxr3X5gSG9/On58eKBtal/wmuetGslgcq5HvvxYlHRbx2rfKAUmyoO3X8x7LSXkB+bH12vRiiBKyFsRrRP5OCd4tDK/TIXeAfOlR0WDohUR9yvyju1fCvOZj/ozWI5pnipAJt74uZMZVT0Zr12o5xVZKxnn2OZzsfn110v6q4mp/+HzYdcztB8sx4kCyXZO0U7GlRyzgzGdBwq2rq5ZSkVTGCwJw7DeJMoA7FwUdaFe2TU72JeMq/uNZvny5abNvpweUDRb4zUKlhCh9FatAmhtsyTaN5nVKOsSUhcs07msR0A8Tkko2gnDxYj2/TlVElPV7HvC+kbIYRfyZkRzvWTrkB3fKp3MrMRZxWA5sTUCIdu8ioUoZLYPV1K7HuqPrNG6gus4juM4juPsKVzBdSYoUIUKWiPrVVIRD4S8o2gcUgUqgeB/jZOKqhIYhtSBuRztJRBUWYDGSSFbEfL5kmgYUWsUZGsNaov2876oxWglVLXKlN/tBK0p404UAiUQg+RClYac3KBcNk5bYoJUo1/ZlmQwzsIMf6uaEg+FRpKzmPRYTHok4Sfj7Q9dRZqUJFEFtYoqFdL1mPh06EJLgwpj/uCyDqiMPY39VVN3R1nByWZMPS4YlvYRWy9STgzmSKRkUNZYaWyzWOszDObGZpzTijJqUUkUfkpHMlGpAVpRxmKtT7eos9jss3Ztn0igykMahUDvKqFoK7VNYbgY1MyRHzG2f/U1pXvIkiDyTmS+VyAeKGVTxsevcboim5/UGJcShkv2QqITFS/vBJ8zlucaZ+aHbqyZSrdxNJkoZmeUpKe2WjiGzn3K1hNkXHs87gvZvCnNGlm7oiU0To88uqaA1bbMl5Zu5PRW44nfMKQGRLlStCPq67Z6t7FWjj22o+MxVoZSU7Zq2yMJA2qDSU5j0YgoGtFYRQFTqsfV/voVKhNJL87sGOTzlkla1WCwOKmk1l82v6WKHVuNbDVyvGHKMUC6XTJcSsjqoyQTBSYeNeZMEdEk+F/nhXhQjdWZkRdTJSjFLfPTjlSaxpmKwaJVkOut2qruKNfxsTGlNlz7Tbvm44GpKKPkkMbJIWvXt6hvKt2rgqKePPx6mbuvMhU8M5VQ40kltDI1pb+qhWzNlcQya0dev4Ylg0SFjvtdptAc6th7l3ciU5tjU7WTvtJ+IKd3oDZuX6Yj77jNTozUIAjV2kaqdm+UmAHN9Wp8rpKBXYdVIubv3NTxbEBRt89Y0leGC0KS2bHL2pMc7yoxH6NloJpS2rmnN1ZHRaHWt5SI2nYJJHQP1iiDulrrhzzkvnnauwfMh9kPlRqlDMkpiVVei7KKshGP1dnhXGwV1LYqmqdKBsshHaMdTRIoUtABdL6dkc8nlGlE61uDcZXDbLkRsldtVXsVm5I/8p5qBO0Hc/orCa0HM/rLsanywdOtYm2r1JJUVKB3oD7JyR1YwoXGYQZuIYBB+rsAACAASURBVFQBbAiLd9n/E92DKZmaAkj4m27bdWyfOwVqdv5OF3QP1hiupERBeRSFxoM9+vvmKVoxrXu32XrSHO17e+E9NOnvi0lPl2RzcfBOR5OqdtuWYWwr+M3Dn/RKWIrH14PkFfl8aucjgqRbjPN5IfhmtyuytnlsR37w6aqfZcPSEzS2ymTZfDyeRZISevsTywoeKo21gv5KOp4tbd8XZgoadnzyTkTzZIaUCVEeZgSXa0S5sn0ote/XtmUqjxXa1NYdxJl9djQZ9XM0k2W5yclQGayakpr0ob86GU4mAyWfj8mb4Ty2mCRqhFmcMg3Vzgq174e2kG6OZncsE18q++zGQ5tNK5qWadw8WUDHvPePhCu4juM4juM4zp7CFVznYUQl5IXAgWy8bbC/omqWSLNEIqVSGSsqC3N9to9WJGrKryrQKInScuz93HpSZYruyMNXCfF8jgYVUPPIlOFEidOKSoJSG36VUqtof7VG92hpOYeJqcmNk/bLvXeV+X2TnjD3bdi+xry+cd/2HxWQL1RUdcivMlX1TNGiHQ/HlcTSpGTta8u0jm6O+zk8ko1zcKUfUTaF9n0R+RwkXRisTDy4eaci3YggUuprYmkFVcxK3coKFWlMv6zRjHMeGnS44+RVLDQHnOmZwbVUYbnVR0TJypgDrS2yKiESpZXYuWjHGblG3Le9SFFFJGlJ1k0hJF5ILhQLFZKNqsaZbzM9M1lNbaot1NdNLcybMlnV3oPOfRXbV0fEfVPC081JlZ3BsmUKR6XtP+nar/p8frK6P5s3VS7vCPWtknRbOH19TFUfZTIKi18v2XxCElZ9m8d0++qRGmUV6NKNkEwxtNcY9TEqzNNJO2Lhaz3Kpqk+44pymYRqa5jKs12RDIP/OnhgywYMYxnnBUshNB/SsYqssZC3GFdcSvoV3QMxVRL6WAUVp2E+2+1DifnKgorTfAg2nmzXb+sB68dwScYVmPKOqYjx0BSbdMsyLzWavA+Nhfb9FcMlU+Q1VdJTEYMV20d9zdoWsb0HTWDrSI2lUNFouFSjCokVUWEKXX1NLesWUzuLhlUJG1XAqmpC87R9HraeYCv1pYL+qs18zN9dMffNLuvXdcLnrkHeFvPWlyFLM47G1dQap5TuAUuGWPxGnxPHmpZN3Jqo7f1VYfGugu6B2CrIPTg5jvEgpBrss3M1Oka1ruXj2vUQlPYIBvsiFr+WsXFtOlbEGmcqylRY+Oo2+fUdU4gXI4pQ3GogsaVNdO2cdg9YVaf6VvhM9JXhYsT8PQWDpZiyZorqdIZtM8x0JANTWqVUNImonw45tnXziff32XfI/N0l0aCgWrI3mm7l1M9EZKFyVpyZQjnKwS0aEel6RtKL6e2vUzQZZyuPrhXUrrPBQkR+Vj6oqFImEnzQpkQPFyy9YjTzUaZQ7BeqWp3mKatwly+3qK2Zuplf06ZKhKJt6SdRYaraKB0gGShxr0CjhOG+mqmUwQcLMAxJHs3TFb0Vu12EBBGA9rcHnHlKy1IWuspgRWidUPqrEd2rwmr64Lcezgtlat7+4UJE2Ri/U6sYl0KZmtIcDyyRwK6niu0n2jVQNITNp8xR61ZsPjkk24SvKCmhvlVQ2xgwXGmOPbjjSoWYpzseWs52nE2u6bKVhMzZ4DteqNn/eaNZxBQ639zm1I3ztB8qyTrRw/zc8QCaJ+xYtE8UdK9KyTqTyn2th6qQJKLM3ZszWLYKdaPZgmzRZhctpUEtg7xXUKYNtg/ZcSwbowxaS4rpHkotFWj03SOwdXVMY81mLqK8CtUiw4xW0/ojlVA0GCeTjI5f0bJrd6Tij73YMjp2EqpFQnd/QnOtsnUNZchxBtr3DRg8s4VGsN2K7fslFbqLEq6nlCgz5feRcAXXcRzHcRzH2VO4gus8DCns52bSCBVszjSIcoE5JU4qiqGt3h79GhNRmvWMbr+OiFJ2axApaaeg3wg/KxslDGPiboTuH5Jv1Il6EeW8qUWSKFpEpA8mZPsiSCtTTsMvwqReki2FX5dANDDf3GB/kAeCChflQn8VC+EVqG2H1aLzaikKNSWqVdy/toCqkFcRaRwyFCuhmi/odU1V0U4BwxhphT5uxUgl9A+GFfJDIJry+YbKLGVpqt9wEe4+sY99je742KZRwZmsyaCssbnVoqwiNteCelAK2ULCcLtOa6HP/fctk85ltBoZSWzHsTtI6W82kH4M7QIthGStNl6dGuVCtlDROBmRL1g/kz7jCjT9/aacSAnptmVULn25x0PPsT5IaWqTFLbSHzUPZPNUNTrZ4+pttS01j1fbfnlHoTpVrTuqYGbe21rXKl9V6UT92DoSUzQZpx1Ew8m5ztum6g732ftBYeGunI1rTcJtnpzkn3aPNGnf2wdJx+rKSLEra6buaSQ0Tudk8wmDpTBjEIMMgFpIJZBQASt4bk05NFXKcltNIRkpvEm/ZPNIQm3LlGoV6Nyfs3V1bfwek75Zx1unSjavMY/rSM3SCJonbRW4lLB9OEYKu54aQUHtryT0V0fZzxVUpv5EYVV681TF/De2Wb9+zs5puOYH+0ylGSxEwXdp5yjKlM4DBetPtj7O31OSt8zf2l81pa3WregeSMbHIMphsKxIZf00P2x7rKbX1wsGixFb+2HublOmt69Ox8cp3VbyOcu7JKjEUcH4XFeJqU39ZftO0Qgap3IGi+n4XI6UHWrB+7dg52M0YzAy2NfXzdO3dWSS3wumjpe1mP6h0YrxIZtH2mNPY5Sbj3M4b/m2UpnCNc76TYP6n1Ukffs8FY1oLA1VNfMOShG+EsUUrqSrzH3LgpfXrm/ZqvOmjBNHhvtb1Hp2rot2Yl7XylaFN9Yq0q3SEhmAratTBqt1at3ScsBzob88UUdH+bBFy3zkeRsa60p93R4fLkQ01i3FYzhv/sqsE1FfL9HIpMH+AUvJyDuw+A3zoIpCsWRS9yjrt7dqKQJJX2mvF3SD13nhGxnDlTT4NiMap3LKtDbOty6a5hUfLESk2+YfTh8qxxXjsoWUsm7XzGDRjvPou2pa/ew3JlUT082SratrY99mMjQ/rvlYLfd1sBSPK521vrXJ+g1LIfUgfD42y/G5XvjmkO3DKZpA0Y5BGkSF+VzBvK+1no6zakdMX9PZXDxWIe3Yx2hkqvToehocMKV663BM54ESDZXJwPZz5ntaVs1sI6O32gqJMJPrMc4h6it5JwaxtBlG6wrqYW1AbKp5Y62kd7hpWeBB/ZRy4kmvde37skpkfBzSbZshyTo2g9FfqVHWhSIffeZstk8jmLu7z2B/HRWYv8e+nNa/p05/KbZkjQ37nmuuVWSdqYgXtcpymkD7nh5nntYJ+x710SoEVql9v0aoZVWH/+t6By1nfJRTfz5cwXUcx3Ecx3H2FK7g7nFE5MXArwMx8A5V/TeP2F5B1mpUB00ekGFEPABpFszP9VjbXASF+avMpFaUEapCtt4gaudIWhHVKpr1jEEzqKFhlT8Hh1SbNZqrPWpJSTO1n4Qn1+aI5kuyJEEGMXRKVKfSDyohfso2NRXy+cRW8ccV2g01yrOgImbm9Wk9aL69kco8XFYaJyKG+yrKuEbRq1MsbFOpEIUSMfONIc3DOWe6TXrDJkmjJGrlZGtm8NJ2iWzHlG2ltmH+RY2UOJv4W6WA2tB+ZecLJWlS8ldfudb6GFdcfXCd7WGKiBJFFXFUkbbtJ2l+oknRjmkt9Ol3bel8kcVIU+kNQy31yFRVjRXZtqQKFcv4BfPElg1Tr2obluta605yJKWAtFsxjCN6ByKKBmw9sTk+95aDaopV0lP6+y1Hd/QeBytKsm2KQdmUsa+0tm37hlBhaF7Iww/yhbtLojwepy7UtpThPiEOVcXmvp0z2JdO1KjGSPFRokxontZxTiWYf6scqXqAJqZy5sEDF2e2Gr4XVphncxFZxyo9FSOrXWTKpnZNdVz6SmnVyUbe0Y1qrJ5lHcuzbK5VIcMTkm41XsEdZ5Z32tufkHdGypJSXzOP7tbhhCizxIT2iTz0KWW4ZMpE696Sh54dKiVtgx4N13TI8S1aStyLSHqCJkq6FRTLjZLB/iZxZuesSkxtGa0IzzodBotCfVNDCkXFQzfWxl7ofi+iSoX6RsnmEyKquhIPo7ESv3BXwfr3JFR1aJyCzr0V24fN+zfy2G5cm1K0hDK1ZIyiaZm4o3MzWDaVrKwJ609tMFyG/upUqkdkvvyiZakYVco4/QBMzV67LmX5KwVnnpRQNuz66O+LxokX/X0y9uFJpXZNbkw8kduHYuobyuY1Ce0HK7qHG+aDfNA6UV/LOfPkOmXD/Ob1zYqtZjxOKFj6Ssbpp6emlmGfbVG1yBLMuzpcFJa/krN9KCFv2+dj6asFWpvoR/UzJVtXR5ayoKYGJn3bZ962pIYy5BlXibB1dW3swTWPecX6U1Jbjd6f+OvBZqjKphIV0ThvtEp0/JmURBksRcRBWR8sRdQ3lc0nJBMPo0CZqinjAulWSX9/ffwdGufK9lUx+Rwsfq2kvxIR9wrAztf21SnNkwVF3dr2V2uWh7sy8kpD60TG6Wc00ASSru1vpPBqbB7P1omMrJOGdBarFjlcCB7aTNm+BloPmNc/n4uJs4nannViFu8a0j2Yms9z3jKwR/mvxTOWSLcqeqvxOL2kt7829reeubZOrR9mrpZjdNXSJkazASqCivn7NTJVtVcL/txw2Q6WIhpBOc/mJFQSs8pf9rmtTB2vbE1AmZr3tRYm+oqG5SkvfDNn89omVQKt0yXdJKw3ORBZakfP1PhRYsIo8z2fs//DO/cIw2Wo7hO6B2LzwAaxMw2fncZ6yXAhRiqlt1/G3veFb9rtqiYUbVtPEhWWMQ5htrJXMf/NAWvXtSy9Y1vpHRh5fMO6Dg3pCUOrcjg6znPfLhnORbRO2uyWxjaTpHFEHrzx8do20Bm/XpWESoZhDUK2XFI2I9r3eorC4xYRiYHfAl4CXA+8SkSuv7y9chzHcRzHubi4gru3eS7wdVW9C0BE3gu8FPjieZ9RQdWoaAcPbn9ZyBeFffM9Fpt99JCQFQnDzC6dffNdNvsNon6ENgVZq7H81NPU4tJycYH6woB20/w5a1uL1JKSg3Nb9Av72ZvWC+K4YqCC1ktanSF5Ho9TFuIvdFh40QliUYZFQqVQVRFnhvaTMFmvTSo4qVWn0oixbzfKrFoUEpFVllfbSYccaZ9hPjEpMRIl15gH5+Z5cG7eDoUKazX7+V5VQjaXoJvmM8vnQk3t8B7TM/ard/7uivXrBJ0rOLy8wephS1FYTnvklSUpfLtrHuBmmhOH55+q1xEgG1qOY7QVU+VCv5FSfMN+yZYHMySpoBubalsKVatCs1Gdc/OuZnOWKJEtWiWw0er9pG+Zg0XLVr9qzFixBVNBi5YpUEXDfrlXqZo/DygbFfPfsJXvRUvHla5q2xPPY9Ey9bVsmu957amWgRsPwmvMC9mCksTC6udzzlxbCxmI4fKrB/VJoaorG9dGpBvRWCEpG5PVwelGQbZo1Y4mvk4BNTXMqiCZSlLVJ5XMNLIc4MZpW7FfpkLzVEmZBv9pv4LI/HTtB3OGiwnte3v0brLzMNgX0zhtFcKWv9Rl48kthovReFV70bD3WSXmQZ37ljJcgPUnWwNNTPGRCta/JyHvhKqAZcQwqNWj/MdyroRYKTsR9ZPxOE9z63BCum0e4bxlislgKWIjrAhf/PI2998yx2DJHts+EpHPhyxroHtVRNKz1eAaT/ozUnc2rk3IFm2GYLiolGlE/UxoL1Pq5RIgVmlMQs7uSP2s6kBl12DehnyuompM+iClkGxNKoXFIRtz5K/VxBTNtacmlE3I22oBJxXjPODRiu5sTtDEZhKm1c1swbJnR8etv89Wv48qR+VNmz0oGgTlM6LW1bHPc+PaGlXNXqdo2L5GK8PBlC17/Zi8I2Rzo4plEcPF2rhNfyV4Wtv22SoaQutB28lwIaW/PKpaJ2MldzTzojGcubbGcNkU7PSMKWujPhTtCm1UDPeZF7i2JZYNHK73pS/nrF1Xs5mWyny23QOm1meh8mSZKsVcRboesX1VYudSJp8Zqew7L59TNp4UE/ehd6jxsOqA9U0JqR2jlAkZp1XM3VuyfbWtfB8umle4aEHSn1xLotC9qhYqZQX/bc2SAcBev5iv6MYR6RlhOBfRXCvZOjKpZNbbn9p13FWyjlDr6dj7WSVCNh+bH7wECnvdUa4zCci2+Wbzjp3npK8M50beVJulWPhaj/XrWuHcTc4DWF510rcqXEXL1P04B41HSQ5hBkPNR99fjtj/kW9w5vtspi/v2HXQX05MJW4Lgyom/DdlFTpDSkLZFAjfcUUrqP2pfe9uXx0h2AxG3hYaZ6nEZR3iQUy2GK7rRaVK7XO51hQQy5pHNOT9TmbNLHVF2Hxi05JkIqiXk1zmqgajWPCiOVGxx7najZh0U9m62rKae4cbIYNbx7Ngmzfut/SROmEWCBqnlXw+fNE3S8pocv2cD1dw9zaHgXun7t8Xto0RkdeLyHEROZ5V/UvaOcdxHMdxnIuBqD7yKjRn9yIiLwderKqvC/d/HPgbqvqG87Q/CXSBU5eul85FYgU/j3sBP497Az+Pux8/h1cmT1DV1XM94BaFvc23gSNT968O286Jqq6KyHFVPXbRe+ZcVPw87g38PO4N/Dzufvwc7j7corC3+TTwFBF5ooikwCuBD1/mPjmO4ziO41xUXMHdw6hqISJvAP4Miwl7p6reeZm75TiO4ziOc1HxAe4eR1X/BPiTHTzlbRerL84lxc/j3sDP497Az+Pux8/hLsMXmTmO4ziO4zh7CvfgOo7jOI7jOHsKH+A6juM4juM4ewof4DoAiMiLReQrIvJ1Efn5y90f5/yIyDtF5CERuWNq27KI/LmIfC38XQrbRUTeGs7rF0Tk5svXc2caETkiIv9dRL4oIneKyE+F7X4udxEi0hCRT4nI58N5/KWw/Yki8lfhfP3HkGSDiNTD/a+Hx49ezv47E0QkFpG/FpGPhPt+DncxPsB1EJEY+C3gJcD1wKtE5PrL2yvnEXg38OKztv088N9U9SnAfwv3wc7pU8K/1wO/c4n66FyYAvg/VfV64HnAPwmfOz+Xu4sh8P2q+izgRuDFIvI84N8Cb1HVJwPrwGtD+9cC62H7W0I758rgp4AvTd33c7iL8QGuA/Bc4OuqepeqZsB7gZde5j4550FV/xJYO2vzS4HfC7d/D3jZ1PbfV+OTwKKIXHVpeuo8Eqr6gKp+Ntzewv5jPYyfy11FOB/b4W4t/FPg+4H3he1nn8fR+X0f8LdFRC5Rd53zICJXA38PeEe4L/g53NX4ANcB+0/13qn794Vtzu7hgKo+EG4/CBwIt/3c7gLCFOdNwF/h53LXEaa2Pwc8BPw58A3gjKoWocn0uRqfx/D4BrDv0vbYOQf/HvhZoAr39+HncFfjA1zH2WOoZf95/t8uQUQ6wPuBN6rq5vRjfi53B6paquqNWDn05wJPu8xdcnaAiPwQ8JCqfuZy98V57PABrgPwbeDI1P2rwzZn93BiNF0d/j4Utvu5vYIRkRo2uH2Pqn4gbPZzuUtR1TPAfweej1lIRsWUps/V+DyGxxeA05e4q87DeSHwwyJyN2bR+37g1/FzuKvxAa4D8GngKWHFaAq8EvjwZe6TszM+DLw63H418F+mtv/jsAL/ecDG1PS3cxkJnr3/AHxJVX9t6iE/l7sIEVkVkcVwuwn8AOan/u/Ay0Ozs8/j6Py+HPiYesWly4qq/oKqXq2qR7H//z6mqv8IP4e7Gq9k5gAgIn8X8yDFwDtV9f++zF1yzoOI/BFwC7ACnAB+EfgQ8J+Aa4BvAf9QVdfCIOo3sdSFHvC/qerxy9Fv5+GIyIuA/wHczsT3988wH66fy12CiNyALTiKMdHoP6nqvxKRazE1cBn4a+B/VdWhiDSAP8A812vAK1X1rsvTe+dsROQW4GdU9Yf8HO5ufIDrOI7jOI7j7CncouA4juM4juPsKXyA6ziO4ziO4+wpfIDrOI7jOI7j7Cl8gOs4juM4juPsKXyA6ziO4ziO4+wpfIDrOI7jOI7j7Cl8gOs4juM4juPsKXyA6ziO4ziO4+wpfIDrOI7jOI7j7CmSy90B58phZWVFjx49erm74ewxqqq6cKMpzpw5M3PbBx98cOa2R44c2VE/drLvJJn9q3R9fX3mtvV6fea2AGmaztx2a2tr5ra7seJlo9HYUft2uz1z2yzLZm67tLQ0c9sHHnhg5rYAO/m+3sk1utNjd9dds1ep7XQ6M7ddXl6eue3JkydnbgsQRbPre6dOnZq57U6+73bSB4D9+/fP3PbAgQM72vdu5TOf+cwpVV0912M+wHXGHD16lOPHvbS989iyvb29o/Yf+chHZm77K7/yKzO3/bVf+7Ud9ePNb37zzG138h/xhz70oZnbPulJT5q5LexsEH/rrbfO3HY4HM7cdqc/aHYy8CrLcua2T3ziE3fUj+c85zkzt7333ntnbvuKV7xi5ra//Mu/PHNbgLe+9a0zt93J4Oi6667bUT9e+cpXztz2ec973sxtf+zHfmzmtm9/+9tnbgs7+/H4zne+c+a23W535rbNZnPmtgBvetObZm77xje+cUf73q2IyLfO95hbFBzHcRzHcZw9hQ9wHyUi8hYReePU/T8TkXdM3f9VEflpETmnHCUi7xCR6x9h/68RkUNT928VkWPh9p+IyOJj804cx3Ecx3H2Fj7AffR8HHgBgIhEwArw9KnHXwCc1xCnqq9T1S8+wv5fAxw61wOq+ndVdXajouM4juM4zuMIH+A+ej4BPD/cfjpwB7AlIksiUgeuAz4LdETkfSLyZRF5j4gITBRZEYlF5N0icoeI3C4ibxKRlwPHgPeIyOdE5GFGHRG5W0RWROSoiHxJRN4uIneKyEdHbUXkOSLyhfD8N4vIHZfouDiO4ziO41xWfID7KFHV+4FCRK7B1NrbgL/CBr3HgNuBDLgJeCNwPXAt8MKzdnUjcFhVn6GqzwTeparvA44D/0hVb1TV/iN05SnAb6nq04EzwI+G7e8CfkJVbwTOuzJDRF4vIsdF5PhOV6E6juM4juNcifgA97vjE9jgdjTAvW3q/sdDm0+p6n2qWgGfA46etY+7gGtF5DdE5MXA5g778E1V/Vy4/RngaPDnzqnqbWH7H57vyar6NlU9pqrHVlfPmbThOI7jOI6zq/AB7nfHyIf7TMyi8ElMwX0BNvgFmM7XKTkrmk1V14FnAbcCPwm8g53xiPt3HMdxHMd5vOED3O+OTwA/BKypaqmqa8AiNsj9xCM+MyAiK0Ckqu8H/gVwc3hoC5h7NJ0KC9C2RORvhE2zhxQ6juM4juPsclzt++64HUtP+MOztnVU9VRYT3YhDgPvCkkMAL8Q/r4b+F0R6TNZzLYTXgu8XUQq4C+AjUexD8dxHMdxnF2HD3C/C1S1BObP2vaaqdu3YtaD0f03TN2+ZeppN3MWQdF9/9SmW6YeOxpungKeMbX93021v1NVbwAQkZ/HFq05juM4juPseXyAu3f5eyLyC9g5/haWq+s4juM4jrPnEVW93H1wrhCOHTumx4+70OtcXnbynfTBD35w5rYf+tCHdtSPG264Yea2H/jAB2Zue/PN3zFhc16+8IUvzNwW4NChc9aGOSc/+7M/O3Pbm266aea2M1qzxmxtbc3cdmNjdqfV3//7f39H/Xje8543c9uXvexlM7f9zd/8zZnb7uQ6Ajh16tTMbb/+9a/P3PZ1r3vdjvpx5513ztz2JS95ycxtT5w4MXPbnVxHAAcPHpy57U6u/wcffHDmtp/61Kdmbgs7u/53ejx2KyLyGVU9dq7HfJGZ4ziO4ziOs6fwAe5FRETeIiJvnLr/ZyLyjqn7vyoiPy0iHznP898hItc/wv5fIyKHpu7fKiLn/CXjOI7jOI7zeMEHuBeXUU4uISVhBSvrO+IFQHq+J6vq61T1i4+w/9cAs89JOo7jOI7jPA7wAe7F5RNMIr6ejhWD2BKRJRGpA9cBnwU6IvI+EfmyiLxHgoltpMiKSCwi7xaRO0TkdhF5k4i8HCsJ/B4R+ZyINKdfWER+UERuE5HPish/FpHOJXvXjuM4juM4lxFPUbiIqOr9IlKIyDVMyvkexga9G1hmbgbchA2A78dU3xcC/3NqVzcCh1X1GQAisqiqZ0TkDcDPqOrxsJ3wdwUrGvF3VLUrIj8H/DTwry7yW3Ycx3Ecx7nsuIJ78fkENrgdDXBvm7r/8dDmU6p6n6pWwOeAo2ft4y7gWhH5DRF5MbB5gdd8HnA98HER+RzwauAJ52ooIq8XkeMicvzkyZM7fnOO4ziO4zhXGj7AvfiMfLjPxCwKn8QU3BcwKec7nGpfcpayrqrrwLOwohE/CbyDR0aAP1fVG8O/61X1tedqqKpvU9VjqnpsdXV1R2/McRzHcRznSsQHuBefTwA/BKypaqmqa8AiNsj9xCM+MxAsB1GobvYvmFQ+2wLmzvGUTwIvFJEnh+e3ReR7vru34TiO4ziOsztwD+7F53YsPeEPz9rWUdVTM4aiHwbeFZIYAH4h/H038Lsi0meymA1VPSkirwH+KCxmAxsYf/XRvgnHcRzHcZzdgg9wLzKqWgLzZ217zdTtWzHrwej+G6Zu3zL1tO8ofxQU3fdPbbpl6rGPAc95lN12HMdxHMfZtbhFwXEcx3Ecx9lT+ADXcRzHcRzH2VO4RcFxnCuKGX3pADz3uc+due1gMNhRP170ohfN3HZ5eXnmtjfddNPMbT/96U/P3Bag05m9nsvRo0dnbquqM7fdyfkDiKLZdZZ6vX7hRoGdHAuAJzzhnEmK52Qn19JO2p44cWLmtgC33XbbzG2/8Y1vzNx2c/NCSZQPZyf93klaz6lTp2Zuu7KyMnNbodzZ/wAAHaZJREFU2Nl5WVxcnLnt85///As3Chw+fHjmtgB/8Rd/saP2j3dcwXUcx3Ecx3H2FD7AvQIQkZeJiIrI08L9oyJyx+Xul+M4juM4zm7EB7hXBq/CSvO+6tE8WUTcauI4juM4jhPwAe5lRkQ6wIuA1wKvPMfjR0Xkf4jIZ8O/F4Ttt4TtHwa+GNp9WUTeLSJfFZH3iMjfEZGPi8jXRGR2s6LjOI7jOM4uxge4l5+XAn+qql8FTovIs896/CHgB1T1ZuAVwFunHrsZ+ClVHVUpezLwq8DTwr8fwwbPPwP8s4v3FhzHcRzHca4cfIB7+XkV8N5w+718p02hBrxdRG4H/jNw/dRjn1LVb07d/6aq3q6qFXAn8N/Ulj/fDhw914uLyOtF5LiIHD958uR3/24cx3Ecx3EuM+7dvIyIyDLw/cAzRUSBGFDgt6aavQk4ATwL+0EynW3SPWuXw6nb1dT9ivOca1V9G/A2gGPHjs2eBeQ4juM4jnOF4gru5eXlwB+o6hNU9aiqHgG+CRyZarMAPBBU2R/HBsGO4ziO4zjOefAB7uXlVcAHz9r2fuAXpu7/NvBqEfk85qs9W7V1HMdxHMdxpnCLwmVEVb/vHNveytRCMlX9GnDDVJOfC9tvBW6danc38Iyp+68532OO4ziO4zh7GVdwHcdxHMdxnD2F7KTGuLO3OXbsmB4/fvxyd+O7Io5ntyhXVXURe7K3eelLXzpz21/8xV/c0b5vuOGGCzcKRNGV8Rt9J9+jV0qfHcdxdjsi8hlVPXaux/yb1nEcx3Ecx9lT+ADXcRzHcRzH2VP4APcSICL/XETuFJEviMjnRORvXILXvFtEVi726ziO4ziO41xpeIrCRUZEng/8EHCzqg7DoDO9zN1yHMdxHMfZs7iCe/G5CjilqkMAVT2lqvcHhfX/EZHbReRTIvJkABFZFZH3i8inw78Xhu1tEXlnaPvXIvLSsD0WkX8nIncEhfifTr32PxWRz4bXeNqlfuOO4ziO4ziXAx/gXnw+ChwRka+KyG+LyPdOPbahqs8EfhP492HbrwNvUdXnAD8KvCNs/+fAx1T1ucD3AW8WkTbweuAocKOq3gC8Z2r/p1T1ZuB3gJ+5OG/PcRzHcRznysItChcZVd0WkWcDfxMbmP5HEfn58PAfTf19S7j9d4DrRWS0i3kR6QA/CPywiIwGqg3gmtD+d1W1CK+3NvXyHwh/PwP8yLn6JyKvxwbJXHPNNY/2bTqO4ziO41wx+AD3EqCqJVZ17FYRuR149eih6WbhbwQ8T1UH0/sQG/H+qKp+5aztj/TSw/C35DznWlXfBrwNLAf3Qu/FcRzHcRznSsctChcZEXmqiDxlatONwLfC7VdM/b0t3P4oMPbRisiN4eafYZ5aCdtvCtv/HPgJEUnC9uXH/E04juM4juPsInyAe/HpAL8nIl8UkS8A1wP/Mjy2FLb9FPCmsO3/AI6FBWNfBH4ybP9loAZ8QUTuDPfBPLr3hO2fB37sYr8hx3Ecx3GcKxm3KFxkVPUzwAvO3h6E2Der6s+d1f4UE2V3ensf+IlzbC+Anw7/prcfnbp9HLjl0fTfcRzHcRxnt+EKruM4juM4jrOncAX3MjGtsDqPHWVZXu4uOGexvr6+o/bvec97Ltwo8OIXv3in3ZmZ06dPz9x2ZWX2ooE7abtTLrDo1HkEdvLdEcfxReyJc6Xh18buxBVcx3Ecx3EcZ0/hA1zHcRzHcRxnT+ED3EuMiLxMRHSW0rki8g4Ruf4xeM2jInLHd7sfx3Ecx3Gc3YAPcC89rwL+Z/j7iKjq61T1ixe/S47jOI7jOHsHH+BeQkLJ3RcBrwVeGbbdIiK3isj7ROTLIvKeqWIOt4rIsXB7W0TeLCJ3ish/FZHnhsfvEpEfDm2Oisj/EJHPhn/fEU/mOI7jOI6z1/EB7qXlpcCfqupXgdMi8uyw/SbgjVgRiGuBF57juW3gY6r6dGAL+NfADwD/APhXoc1DwA+o6s1Ylu5bL9QhEXm9iBwXkeMnT5589O/McRzHcRznCsEHuJeWVwHvDbffy8Sm8ClVvU9VK+BzwNFzPDcD/jTcvh34C1XNw+1R+xrwdhG5HfjP2ID5EVHVt6nqMVU9trq6uvN35DiO4ziOc4XhObiXCBFZBr4feKaIKBADCvwxMJxqWnLu85Krqobb1eg5qlqJyKj9m4ATwLOwHy+Dx/p9OI7jOI7jXOm4gnvpeDnwB6r6BFU9qqpHgG8Cf/MxfI0F4IGgBP84Noh2HMdxHMd5XOED3EvHq4APnrXt/cyQprADfht4tYh8Hnga0H0M9+04juM4jrMrcIvCJUJVv+8c297KWQvBVPUNU7dvmbrdmbr9L896Tif8/Rpww9RDPxe23w0847vovuM4juM4zq7BFVzHcRzHcRxnT+EKruM4F5V+v7+j9uvr6zO3jePZbeYLCws76sdkTeeFSdN0R/uelRCJfVHYyfu7mP24Ung8vEfn0bGTz4pz5eAKruM4juM4jrOn8AGu4ziO4ziOs6fwAe4uQERKEfmciHx+ugSviBwSkffNuI93i8jLL25PHcdxHMdxLj/uwd0d9FX1RgAR+V+AXwG+V1Xvx/J1H4aIJKpaXOI+Oo7jOI7jXBH4AHf3MQ+sA4jIUeAjqvoMEXkN8CNAB4hF5BbgN4AfAO7FSv06juM4juPseXyAuztoisjngAZwFVby91zcDNygqmsi8iPAU4HrgQPAF4F3nv0EEXk98HqAa6655iJ03XEcx3Ec59LiHtzdQV9Vb1TVpwEvBn5fzp1p8+equhZu/y3gj1S1DFaGj51rx6r6NlU9pqrHVldXL07vHcdxHMdxLiE+wN1lqOptwApwrtGol+Z1HMdxHOdxjw9wdxki8jQgBk5foOlfAq8QkVhErgK+o1Sw4ziO4zjOXsQ9uLuDkQcXQIBXq2p5gco7H8S8ul8E7gFuu7hddBzHcRzHuTLwAe4uQFXPWY9UVe8GnhFuvxt499RjCrzh4vfOcRzHcRznysItCo7jOI7jOM6ewhVcx3EuKmma7qh9q9WauW0Uzf4b3SY1ZqeqqpnbXsAudEWyG/t8MfHj4ZyPnXzPOFcOftYcx3Ecx3GcPcXjboArIm8RkTdO3f8zEXnH1P1fFZGfFpFDIvK+sO0WEfnIefZ3t4isPAb9+mER+fnvdj9hX7eKyLHHYl+O4ziO4zi7jcfdABf4OPACABGJsEzZp089/gLgE6p6v6q+/FJ1SlU/rKr/5lK9nuM4juM4zl7l8TjA/QTw/HD76cAdwJaILIlIHbgO+KyIHBWRO85+sojsE5GPisidQfk9p3FLRH5HRI6Hdr80tf1uEfklEfmsiNwecm0RkdeIyG+G2+8Oz/+kiNwVFOR3isiXROTdF3qNqcfjsK87wmu96dEeNMdxHMdxnN3C426AG8rWFiJyDabW3gb8FTboPQbcrqrZI+ziF4H/qapPx7JmrzlPu3+uqseAG4DvFZEbph47pao3A78D/Mx5nr8U+vQm4MPAW7AB+TNF5MYZXgPgRuCwqj5DVZ8JvOsR3pfjOI7jOM6e4HE3wA18Ahvcjga4t03d//gFnvu3gP8XQFX/GFg/T7t/KCKfBf4aG5heP/XYB8LfzwBHz/P8/y9k2d4OnFDV21W1Au6ces4jvQbAXcC1IvIbIvJiYPPsFxGR1wcV+PjJkyfP0xXHcRzHcZzdw+N1gDvy4T4Tsyh8ElNLX4ANfr8rROSJmDL7t1X1BuCPgcZUk2H4W3L+qLZRm2rq9uh+MsNroKrrwLOAW4GfBN7BWajq21T1mKoeW11dnfk9Oo7jOI7jXKk8Xge4nwB+CFhT1VJV14BFbJB7oQHuXwI/BiAiL8GsBGczD3SBDRE5ALzkser4Tl4jpDtEqvp+4F8AN1+EfjiO4ziO41xRPF4LPdyOpSf84VnbOqp66gLP/SXgj0TkTmwwfM/ZDVT18yLy18CXgXu5sO1hx8z4GoeBd4W0CIBfeKz74TiO4ziOc6UhO63u4+xdjh07psePH7/c3XD2GKdOXeg348P54Ac/OHPbl7989iS/Tqezo37spN/tdnvmtnNzczO39epal46d/F/o5+XxxU6qGnrVs0uLiHwmLLb/DvxMOI7jOI7jOHsKH+A6juM4juM4e4rHqwfX2aP4VNKjZyfHbnPzOxLnHpP9Avz4j//4zG13MlW8vb29o34sLi7O3HY4HF640aNomyQ7+4q+WFPncRxflP1eTHZqvyuKYua2Ozkvbmd4OLvRCrKT77Cd9PlKeX97Ff8f3nEcx3Ecx9lTXDEDXBHZmbwyed5Pisg/Psf2c5bavZyEMr0r4fajer+O4ziO4zjOI7PrLQqq+ruXuw8XA7G5CwnVyxzHcRzHcZwZuWIU3BEicouI3Coi7xORL4vIe8JgDxH5NyLyRRH5goj8u7DtX4rIz4TbzxaRz4vI54F/MrXPWETeLCKfDs/9iQv0IRaRd4vIHSJyu4i8KWy/VUTeEkrbfklEniMiHxCRr4nIv556/odE5DMicqeIvH4H7/2oiHxFRH4fq7B2REReFfpwh4j826m259u+Hd7rnSLyX0XkuaHfd4nID8/aF8dxHMdxnN3Klarg3gQ8HbgfK2DwQhH5EvAPgKepqorIuVaAvAt4g6r+pYi8eWr7a4ENVX2OiNSBj4vIR1X1m+d5/RuBw6r6DICzXitT1WMi8lPAfwGeDawB3xCRt6jqaeB/V9U1EWkCnxaR94fts/AU4NWq+kkROQT82/Aa68BHReRlwKfOtV1VPwS0gY+p6v8lIh8E/jXwA8D1wO8BH56xH47jOI7jOLuSK07BDXxKVe8L0/OfA44CG8AA+A8i8iNAb/oJYRC6qKp/GTb9wdTDPwj8Y/n/27vzaDur+ozj34ckkACSCMQsyhRAMDIGuCIqIqJWVCpgGeoKlaiFoqaiDBpalkZbKy1VKIhCRAbRpSKDRGwZjBEpS4ZMECJBNEotUAgVQhhMIHn6x7tvOBxucs9J7s0Z7vNZ66573v3u992/c3aGffbZZ/+k+cCdwFZUA8k1WQzsLOkCSYcBtV8Z7x0gLgAW2n7U9vJyzfbl3CfLLPIdpWxtbdV7yPYd5fEbgJ/bXmL7ReC7wMFrKQdYAdxYE+Ottl8oj8fXNybppDIjPXvJkiVNhBkRERHRntp1gFu7j85KYHgZyB0AXA0czkuDuEYI+DvbE8vPTrZvXlNl208C+wA/B04GLukjtlV1ca4Chks6BHgn8Cbb+wDzgJFNxPpsE3X78oJf2odldYzlzcIrZuxtT7fdY7tn7Nix69l0REREROu16wD3FSRtDoy2/R/Ap6kGoKvZfgp4StJBpWhSzembgI9JGlHutZukzcrjRX20tTWwke1rgLOA/ZoIdTTwpO3nJE0ADmzi2np3AW+TtLWkYcAHgVvXUh4REREx5LXrGty+vAq4XtJIqhnZU/uo82HgUkkGamdoL6H6eH5u+cLaEuDIMpDta6flbYHLJPW+ATiziThvBE4ua4YfoFqmsE5sPyppKjCrxPkT29cDrKk8IiIiYqhTs9leuomkw4GdbZ/f6ljaQU9Pj2fPnt3qMNZLMpmtu8HKZLZixYqm4thiiy0arttMJqDnnnuu/0o1Ro5sfGVRM9nJmrlvMpmtu2Qya0+dmMmsmT8bzfxdaZfn18kkzbHd09e5TprBHXC2b2h1DBERERExsIb0ADe6z7Jlyxqu28yMJcDKlSsbrrvppps2XPeJJ55ouO6YMX3tjrdmzcwQNDMr1Wwc7WCTTTYZtHuPGjVq0O4d66bZ2bERI0YMUiRRqxNnLZv9JCXaQz6jjYiIiIiukgFuRERERHSV9R7gSrKkr9Qcny5p2vredw1tjZH08ZrjP5N09WC0tSGVtMBHtzqOiIiIiG4wEDO4y4EPlC23miZp+NqO64wBVg9wbT9iOwPDiIiIiFhtIAa4LwLTqZIvvIykv5B0p6R5kn4qaVwpnybpSkm3A1dKmixphqSfATMlbS5ppqS5khZIOqLc8mxgF0nzJZ0jabyk+8o9R0q6rNSfJ+ntpXyypGsl3SjpQUn/2t8TkvRzSeeWFLb3S3pDuceDkv6ppt6pku4rP58qZePLNd+UtFDSzZJGlXMTJd0h6V5J10l6dR9tv6PEv0DSpZI2KeXvlbRI0hxJ50u6oea1PL3m+vskjS+Pj5d0V3m9Li5JISIiIiK62kCtwb0QmCRpdF35fwEH2t4X+D7wmZpzuwPvtP3BcrwfcLTttwF/Ao6yvR/wduArJUHDVOC3Jd3uGXVtfQKw7b2oMntdUZJCAEwEjgP2Ao6TtH0Dz2lF2VvtIuD6cv89gcmStpK0P1ViiTdSZSs7UdK+5dpdgQtt7wE8BfxlKf828FnbewMLgM/XNljivRw4rjyP4VQZ2EYCFwPvsb0/0G9OXUmvL8/5LbYnUqU8nrT2qyIiIiI634AMcG0/TTV4+2Tdqe2AmyQtAM4A9qg5N8P28zXHt9j+Y3ks4J8l3Qv8lCqz2Lh+wjgI+E6JZxHwELBbOTfT9lLbfwJ+BezYwNOaUX4vABbaftT2cmAxsH1p7zrbz9p+BrgWeGu55ne255fHc4DxZfA/xnZvSt0rgIPr2nxdufbXdXUmAItt/66Uf6+B+N8B7A/cLWl+Od65vpKkk8pM9ewlS5Y0cNuIiIiI9jaQuyicB3wU2Kym7ALga2U28m+B2hQ+z9ZdX3s8iWqWcv8y+/hY3bXNqk0ztJLG9v/tvWZV3fWrGrh+XdpbVy/y8n7sfZ0EXFFmuyfafp3tafUX255uu8d2z9ix/U4MR0RERLS9ARvgltnXq6gGub1GAw+Xxyc0cbvRwOO2XyhraXtnXJcBr1rDNbdRPoKXtBuwA/DA2hqR9G1JBzQRV317R0raVNJmwFGlrE+2lwJPSuqd5f1r4Na6ag9Qzfa+tq7OA8DOvWtrqZYe9Po91fIOJO0H7FTKZwJHS3pNObelpEZmriMiIiI62kDvg/sVoHY3hWnADyXNARpP1wTfBXrK0oYPAYsAbP8fcHv5ItU5ddd8HdioXPMDYHJZUrA2ewOPNBHXarbnUq2XvQu4E7jE9rx+LjsBOKcsvZgIfLHunn+iWtf7w/I8VgEXlaUcHwduLK/lMmBpuewaYEtJC4EpwK/LvX4FnAXcXNq7BdhmXZ5rRERERCeR7VbH0BKStgC+ZfuYVsfSCEmb236mfNnuQuBB2+cOZBs9PT2ePXv2QN5yg1u6dGn/lYqk6n25ZtJRJjVtRES0mqQ5ZUOAVxiymcxsP90pg9vixPJlsYVUSzgubnE8EREREW1pML/8FAOozNYO6IxtNxo9un6nuva3ww47tDqEQdXMzHez9TfaqPH36MuX97di6eU23njjQYlj2LBsRx0RMdiG7AxuRERERHSnDHAjIiIioqv0O8CVtLKkel0o6R5Jp0na4APjkub2vTXH75c0dRDbG7D7l9S/fS6CHkiSxkj6+GC3ExEREdHOGhmoPl8SBewBvAt4D3UpZjeQicDqAa7tGbbPHqzGBvv+g2QM1XZiEREREUNWUzOxth8HTgKmqDJS0mWSFkiaV5IyIGmypB9JukXS7yVNkXRqqXOHpC1LvV0k3ShpjqTbJE0o5ceUvW7vkfQLSRtT7Rl7XJlNPq608bVSf5yk60r9eyS9uT52Sd8oKWkXSvpCTfnvJX1B0tzyPCbUPIfe+19err9D0mJJh0i6VNL9ki7vr42a88PKve4rbX16ba+3pGmSrpT0S0kPSjqx5twZku6WdG9NW2cDu5TX6BxJ25TXb35p8619txQRERHRPZreRcH2YknDgNcAx1dF3qsMDG9WlUUMYE9gX6rUsb8BPmt7X0nnUiVvOA+YDpxs+0FJb6RK1nAo8Dng3bYfljTG9gpJnwN6bE+BagBaE9b5wK22jyqxbd5H6P9g+4/l/ExJe9u+t5x7wvZ+5eP904G/6eP6VwNvAt4PzADeUurdLWmi7fn9tAHVLPS2tvcsz6GRTU33Bg6kSoE8T9JPymu7K3AAVUreGZIOBqYCe5b0xkg6DbjJ9pdKTK/YnFXSSVRvWrr+2/wRERExNKzvWtqDgO8A2F4EPAT0DnBn2V5mewlV1q0fl/IFVOloNwfeTJW1az7Vvq69mbZuBy4vM5aN7KlzKPCNEsfKkha33rGS5gLzgD2A3WvOXVt+zwHGr6GNH7vKirEAeMz2AturqPal7b1mbW0ALKZKuXuBpMOApxt4btfbft72E8AsqkHtn5efecBcYALVgLfe3cCHJU0D9rK9rL6C7em2e2z3jB07toFwIiIiItpb0zO4knYGVgKP91O1dtPJVTXHq0q7GwFP9c421rJ9cpnRfR8wR9L+zcZZF/NOVDOzb7D9ZFlWMLKPWFey5tekNv765za8gTYo5fsA7wZOBo4FPtJP+PWp5kw1a/tl2y9L9iBpfF17vygzu++jesPwVdvf7qe9iIiIiI7W1AyupLHARcDXymzmbcCkcm43YAfggUbuZftp4HeSjinXqwz+kLSL7Tttfw5YAmwPLANetYbbzQQ+Vq4dJql+t/8tgGeBpZLGUX1RbqD124akrYGNbF8DnAXsV8qnSJqyhvseoWqt81bAIVSzsjcBHymz4EjaVtJrqHuNJO1INdv8TeCS3vYiIiIiulkjM7ijyhKCEcCLwJXAV8u5rwPfkLSgnJtse7mkRtufVK4/q9z/+8A9wDmSdqWaqZxZyv4bmFpi+XLdfU4Bpkv6KNUs7MeAX/aetH2PpHnAIuAPVEsgBlSDbWwLXKaXtlk7s/yesJaY7qVamrA18I+2HwEekfR64JfltX4GON72byXdLuk+4D+B+4AzJL1Q6nxofZ9nRERERLtTNREbrSTpBuADtlfUlU8DnrH9bxsijp6eHs+ePXtDNBVDSFL1vlxS9UZEDAxJc2z3mWeg6TW4MfBsH97qGCIiIiK6RWZwYzVJS6h2wug2WwNPtDqIWC/pw86W/ut86cPO1q39t6PtPreAygA3up6k2Wv6CCM6Q/qws6X/Ol/6sLMNxf5b331wIyIiIiLaSga4EREREdFVMsCNoWB6qwOI9ZY+7Gzpv86XPuxsQ67/sgY3IiIiIrpKZnAjIiIioqtkgBtdTdJhkh6Q9BtJU1sdT/RP0qWSHi8Z+XrLtpR0i6QHy+9XtzLGWDNJ20uaJelXkhZKOqWUpw87QEkNf5eke0r/faGU7yTpzvJv6Q8kNZ4JJVpC0jBJ80oyqSHXhxngRteSNAy4EHgPsDvwQUm7tzaqaMDlwGF1ZVOBmbZ3pUrfnTcr7etF4DTbuwMHAp8of+/Sh51hOXCo7X2AicBhkg4E/gU41/ZrgSeBj7YwxmjMKcD9NcdDqg8zwI1udgDwG9uLSxrk7wNHtDim6IftXwB/rCs+AriiPL4COHKDBhUNs/2o7bnl8TKq/2C3JX3YEVx5phyOKD8GDgWuLuXpvzYnaTvgfcAl5VgMsT7MADe62bbAH2qO/6eURecZZ/vR8vh/gXGtDCYaI2k8sC9wJ+nDjlE+2p4PPA7cAvwWeMr2i6VK/i1tf+cBnwFWleOtGGJ9mAFuRHQUV1u/ZPuXNidpc+Aa4FO2n649lz5sb7ZX2p4IbEf1SdiEFocUTZB0OPC47TmtjqWVhrc6gIhB9DCwfc3xdqUsOs9jkrax/aikbahmlqJNSRpBNbj9ru1rS3H6sMPYfkrSLOBNwBhJw8sMYP4tbW9vAd4v6b3ASGAL4N8ZYn2YGdzoZncDu5Zvjm4M/BUwo8UxxbqZAZxQHp8AXN/CWGItylq/bwH32/5qzan0YQeQNFbSmPJ4FPAuqnXUs4CjS7X0Xxuzfabt7WyPp/p/72e2JzHE+jCJHqKrlXew5wHDgEttf6nFIUU/JH0POATYGngM+DzwI+AqYAfgIeBY2/VfRIs2IOkg4DZgAS+t//t7qnW46cM2J2lvqi8gDaOaBLvK9hcl7Uz1Rd0tgXnA8baXty7SaISkQ4DTbR8+1PowA9yIiIiI6CpZohARERERXSUD3IiIiIjoKhngRkRERERXyQA3IiIiIrpKBrgRERER0VUywI2IiIiIrpIBbkRERER0lQxwIyIiIqKr/D9dM3cJQ9siswAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" + "cell_type": "code", + "metadata": { + "id": "HiLKl_rVUqHJ", + "outputId": "1a7c0344-6dd0-4228-9e62-611ad8847f84", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "# The graph is designed for a sampling rate of 16 kHz, but higher rates should work too.\n", + "# We also generate scores at a 10 Hz frame rate.\n", + "params = yamnet_params.Params(sample_rate=sr, patch_hop_seconds=0.1)\n", + "print(\"Sample rate =\", params.sample_rate)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Sample rate = 16000\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "bHNJU9JUsxQs" + }, + "source": [ + "# Set up the YAMNet model.\n", + "class_names = yamnet_model.class_names('yamnet_class_map.csv')\n", + "yamnet = yamnet_model.yamnet_frames_model(params)\n", + "yamnet.load_weights('yamnet.h5')" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XCrhG2WrsxQx" + }, + "source": [ + "# Run the model.\n", + "scores, embeddings, spectrogram = yamnet(waveform)\n", + "scores = scores.numpy()\n", + "spectrogram = spectrogram.numpy()" + ], + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XN67xLQasxQ2", + "outputId": "9b0744bd-bc2f-4996-c9d9-c7ad2e08725a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 483 + } + }, + "source": [ + "# Visualize the results.\n", + "plt.figure(figsize=(10, 8))\n", + "\n", + "# Plot the waveform.\n", + "plt.subplot(3, 1, 1)\n", + "plt.plot(waveform)\n", + "plt.xlim([0, len(waveform)])\n", + "# Plot the log-mel spectrogram (returned by the model).\n", + "plt.subplot(3, 1, 2)\n", + "plt.imshow(spectrogram.T, aspect='auto', interpolation='nearest', origin='bottom')\n", + "\n", + "# Plot and label the model output scores for the top-scoring classes.\n", + "mean_scores = np.mean(scores, axis=0)\n", + "top_N = 10\n", + "top_class_indices = np.argsort(mean_scores)[::-1][:top_N]\n", + "plt.subplot(3, 1, 3)\n", + "plt.imshow(scores[:, top_class_indices].T, aspect='auto', interpolation='nearest', cmap='gray_r')\n", + "# Compensate for the patch_window_seconds (0.96s) context window to align with spectrogram.\n", + "patch_padding = (params.patch_window_seconds / 2) / params.patch_hop_seconds\n", + "plt.xlim([-patch_padding, scores.shape[0] + patch_padding])\n", + "# Label the top_N classes.\n", + "yticks = range(0, top_N, 1)\n", + "plt.yticks(yticks, [class_names[top_class_indices[x]] for x in yticks])\n", + "_ = plt.ylim(-0.5 + np.array([top_N, 0]))\n" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAArgAAAHSCAYAAAAHR7iOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd5wcdd3A8c93ZtvVXBrpIYGEEkggEHqVDlEQC4IFUJHHR1F5eBRDFZES7GJ5BEEUVATBAoYiobcAoYWEAOmk9+Tqtpnf88fM3e1dbu9297bd3ff9yr2yO/U3v92Z/c5vfkWMMSillFJKKdVfWKVOgFJKKaWUUvmkAa5SSimllOpXNMBVSimllFL9iga4SimllFKqX9EAVymllFJK9Ssa4CqllFJKqX4lUOoEqPIxbNgwM2HChFInQymllFKqR6+//voWY8zwruZpgKvaTJgwgfnz55c6GUoppZRSPRKRVenmaRUFpZRSSinVr2iAq5RSSiml+hUNcJVSSimlVL+iAa5SWWiKJZkwaw73v7a61ElRSimlVBoa4CqVhQ31UQB+++yyEqdEKaWUUulogKuUUkoppfoVDXDLnIicJiLvi8hSEZnVxfwLRWSziLzl/12UMu8CEVni/11Q3JQrpZRSSpWG9oNbxkTEBn4NnAysAV4TkYeMMe92WvQ+Y8wlndYdAnwPmAEY4HV/3e1FSLpSSimlVMloCW55OxRYaoxZboyJA38Fzspw3VOBJ4wx2/yg9gngtAKlc8BavL6eC+96lVjSKXVSlFJKKeXTALe8jQFSm+uv8ad19kkRWSAiD4jIuGzWFZGLRWS+iMzfvHlzvtLdbxnT8f2sBxfwzPubefPDHaVJkFJKKaV2oQFu3/cwMMEYMw2vlPaP2axsjLndGDPDGDNj+PAuh3NWwEV/nM+MG55onyDef2+v2QnAbdqrglJK9QvGGO5/bTVNsWSpk6J6QQPc8rYWGJfyfqw/rY0xZqsxJua/vQM4ONN1VebmLt7IlsY4H25rAmD55qYO85dvaepqNaWUUn3Iy8u2cv7vX+XyBxdwwe9fLXVyVC9ogFveXgMmi8hEEQkB5wIPpS4gIqNS3p4JLPZfPw6cIiKDRWQwcIo/TfVCuqoIq7Y2FzklSiml8u28383j+SVbAJi/Sttk92Xai0IZM8YkReQSvMDUBn5vjFkkItcD840xDwHfFJEzgSSwDbjQX3ebiPwAL0gGuN4Ys63oB6GUUkopVWQa4JY5Y8wjwCOdpl2b8voK4Io06/4e+H1BEzjAvLZS7xGUGghWbW3ijudXcN2Z+2FbUurkqBL5x5trOHv62FInQ+VAqygolYUtjfFSJ0EpVQRf/8sb3DNvFYvW7Sx1UlQJ/c99b5c6CSpHGuAqpZRSaexsSZQ6CUWx7zWPMfvR9wBojmvvAarv0wBXqR78803tfEKpgWbh2noAvnDnwGhJ35Jw+O2zy3hs4XqmXPs4C9YMvL69//7GmlInQeWRBrhK9eDS+95qe710U2MJU6KUKobV2wZuryhf/dMbACxYk33VDNc1mM6j4fQhf37lw1InQeWRBrhKKaVUite1eyhiSTfjZR9+ex0TZs1hjysf4dYnlxYwVYWV7nN33L4btA9kGuAqpZRSKVKf2gwEXQVwtz65JOP1v3Hvm22vfzb3g7ykqZz89TUt2e2LNMBVqhtLNzWUOglKqSIaiKV1O5p37R1moDSuy8TsR94rdRJUDjTAVaobn79jYDQwUUp5XlmxdZdp+17zWAlSUjzffXBBXrd376t9r8Szuxubhpj2KtEXaYCrVDecPtxgQimVgy5O+ZaEU/x0FNHcxZvyur0r/v5OXrdXDO+uqy91ElSeaYCrlFJKqQFtiVZH63d0qF6lurG5IVbqJCilimj5lqYup6/e1sy4IZVFTk35i6fpbWHCrDkMrgyyvTnBvCtOZOSgSJFTlp3L7tcRy/obLcFVKo1XV2wrdRKUUkV29T8Xdjn9mB8+XeSUlN4ry3etj9zZXlc/mnbe9mavodpbq7XbtXL16optvLayf/7WaYCrVBr5HtThiXc3knAy71tSKaVK6TO3z8vLdqIJve6Vq3Nue5lP//blUiejIDTAVSqNfI7H/twHm/nK3fP5eT/sI1Ippbpz6X1vcV6egmVVGNEMG1LeM29Vh36Py5kGuEql8fO5mXd03pNtTV4/k8s3d12/TymlSsHtod/fD7emH7Y4m2F5X86guoMqrtTP/u3VO3pc/pdPLuGafy7k4bfXsbE+2jZ9/c6WgqSvtzTAVSqNpjyW4LZ6dOEGnl+yOe/bVUqpXOxx5SPdzo8m05fstdaxzVRLvDy7W8skUF+1tf8VTlz5j/bu3D5z+7wub3a2N8U5+AdP8NnfzeMnT7Q/gfzBv99l9bZmvnrP6xxx81O8sGQLALMffY+bHllc+MRnQANcpdLIVxe4O5sTHYb+bL0QKNXXPfT2Op54d2NWJXnlbGvjwOo1JV0PCKmSTvrP9kePZzfC177XlueAGZkMXvfFP7xW+IT4mmJJVm9LX3KeL399bXWH99c9vGiXZZ5bspmtTXFeWtaxBP7fC9ZzzA+f5rFFGwD4/J2vMGHWHH777DJuf2554RKdBQ1wy5yInCYi74vIUhGZ1cX8y0TkXRFZICJPisjuKfMcEXnL/3uouCnv/zJ9LLOyH975q4FnZ0uiQwnPzpYE37z3Tb5y93z+Nn9NCVOWPwffMLfb+cl+1ki0ux4QWj37QfonTve+ujrtvHRi3ZQIl8rrq3ru5aGY1cv2+97jHPPDp7n50eKWhN798qoON6tbG2N8669vdbNGeve++iEvLt3CvBJWTdEAt4yJiA38GjgdmAKcJyJTOi32JjDDGDMNeAD4Ycq8FmPMgf7fmUVJ9ADyzzfXlToJShXFzpYEB3z/P9zy+Hss3dTInlc+wgHf/0/b/MvzPNRruXp4wcA752957L28Dr2799XlV4qbSf1ToCi94KTeRN327HLWbC98SW6q1JuWr/35jZy3c8Xf3+Fzd7zCubfP4+n38jtSXqY0wC1vhwJLjTHLjTFx4K/AWakLGGOeNsa0ngHzgLFFTuOAdctj2T2ea2WAh99ex4RZc9jRHM9vopQqgNYA57Znl3PST5/FyeSZbj/0P/cNzMEAuhp692/zsy+9LVeZ9m6zvanw1+tzO/U2sWDNzoLvM1VqvdxX8tQXfDGrd6TSALe8jQFSryJr/GnpfBlIfeYUEZH5IjJPRD7e1QoicrG/zPzNm7XxUzEYY7jjhRUArEgzapJS5WJjfZTZj+Z2M9eX9Jd6xJnK9ng71wn9zgP9p9S+KcPGb53rrBbC/E7VJXpTitobmVTbyMaEWXN4+v3iluRqgNtPiMjngRnAj1Im726MmQF8Fvi5iOzZeT1jzO3GmBnGmBnDhw8vUmpV3lqwKVVA9732IYfd9GSpk1EUf34lf4/h+4JsS+E/e4f2Y/vTJwrTj/ni9fVMmDWHo295qiDbz1Z9NMEn/++lvG/3i3e9VtSnPxrglre1wLiU92P9aR2IyEnAVcCZxpi2ZsDGmLX+/8uBZ4DphUxsf2GMyWtr6ldWdKxk/8S7G9ta7YpI3vajVD45ruG7D+76aDqdP81bVcDUFF66IXr7q2z7pV29LX99nT713sa8bas/OP0XzwOwZnvx+pPtLtCcdt1/0s7rrX8XsR67Brjl7TVgsohMFJEQcC7QoTcEEZkO3IYX3G5KmT5YRML+62HAUcC7RUt5H3b//NU9tqbOVMJxuemRjo93V25t5p21xa1XpVS2bpiT3eWimAHih1ubB1yVgnzLpSBtZ0t2/d6m88S7GuCWWn2ePsts5dorQy40wC1jxpgkcAnwOLAYuN8Ys0hErheR1l4RfgRUA3/r1B3YvsB8EXkbeBqYbYzRADcDLy7NX7cmrv4Iqz7qrhdXZr1OMYLOf765lmN/9DT/e//AbPCVL2tzKC38i1+N440Pe1c/M5fuxQay9zbU532bv3gyfyN1lisNcMucMeYRY8xexpg9jTE3+tOuNcY85L8+yRgzonN3YMaYl4wxU40xB/j/31nK4yhHi9bt5OG3S9vtz8d//eKA61xe9U7ScZkwaw53v7yyYPtoznEUv1seez/PKdnV/z2zDIC/v7m2JOdOJoMj9AXPfpB9g5/WnmM+8Zv8189U6cUS+f/O/eGllXnfZrnRAFcNWDNvfYFv3PvmLtPzWS1W6HljSzY15m+Hql974t2NTLrK6yjl2n/tOupQvqzbkVtdwN8+uyzPKdnV+xsb2l4XoiFMT36YY/eA5aa+JbebGO3aML/2uabnwTb625PAhUWqoqcBrlKd5LPZl6HnC9OGnVGOvuWpLodmdF3DxvpoHlOkMvXysq1MmDWHCbPmsLO5NPXVUjXFknzl7vkdps289fmC7KtcBjHZ2ZLgpaVbiCddbn5kMdOue7zD/JVbi9sJPtDWxV9fl20js1YHXv9EnlNSWqWuyx3NoHT2oRI/acy3j/7yBd5dV89jC9d3GB0x3zTAVaqAMrl2/v3NtazZ3sIxP3yaDTvbg9kPtzZz2M1PcthNT+ZcoqZyd97v2rtFOuD6wrUqzsTf31jDft97fJfpi9blv24ewK+eXlqQ7Wbr4rvn89k7XuEPL63gtueWUx/NrdRRqXRSnwgUW6Yjo+VSH77cnXHr83z1T2+wx5WPFGwfGuCqAa/zHXyxu+5KbZmcOjrap297ic0NXh3DP81bVfKShoGk3OpZXtZNgypjDMYYjvnhU9z86OKClogUW+tISp17IlEqX97fULoA93sPFa6aUV9SHy3MEzINcNWAd1/K6DR/m7+aD7uoKpCJrqoYZCJ1HPR/vLm2rRHN9qb2k/43zyzj4QXrc9q+yt5tXdQlve+1jgMBzFu+tcOwlqXysyc+4IY5i1m9rYXbnl1e0BKRTPQ2wN5UH+Wfb66lKZZZaa0+3VC98VKWvea8sGRL3vb9lywGF9nzykdYsGZHzwtmoLXgpFxMu+4/GZdmZyOQ9y0q1Qeklpo+8/5m5q/azvLNjbzxYe4XkP+9/23u/+oRHaYd/6Nnst7OLY+9h+O6xDud8OV2UerPftLFiEXfffAdjp48nKNmP8VJ++7G3MVeK/QLjpjA3iNrCpaWnkb++curq9nSqTeBF5du4ahJwwqWpu5c+IfXeO6DzSy58XSCdvZlKIf6I6dNHTMoo+V//fRSdh9ayZeOmkggh/2p0tjRHKeuMlTqZGTdgGvZ5kaOnlz8c8txDWf+6kVWzp7Z6209+8HmPKQovyZf9Wheji2VXg3UgJTah+ZjizbwwOtrehXcAry6ctsu0zbk2EDsx//ZNcD640sraSjQoxzVrrsSmqNme0Nptga3AOt2FrYEsaeeAjoHtwCfu+OVQiWnR8/5P57/eqt3DWMyHQzlz698yE2PvMe9r+Y+1G6+BjBQmXtpWf76G++N5oST1fI3PrK4QCkpnkKUlpYjDXDVgLS+wEFJIXy4rVnrbBXB5+/MLji85dHC1Q9NOi5vrc7PY8li+/bfsh+IYWkvusxbsCb3rof64vWgr/van98odRIAmJNl1a9yq5+fi5/P3bUApRw8+k5+q+FpgKtUHrXEnbYf6UI0CtvaqH1Qlpv3CtRIJZpw2vq8HShO+umzOa/7t9fX5Lzuis1NWS3fnxryqdIoVMOqTGysL8/qbpfel99hfLUOrhqQMm3Akq1v3PtG2+PrTx08Nu/b728dfpebcurE/qFePuLP1UDsrePBN9Zmtfycd9bzsQNGFyQtzy/ZzD/eWMtPzjmgYD26DMTPuNxc//C7pU5C2YnluXRcS3DVgFSoDuJfTGmR+0AvSpTSeX7JFlZtza60SWXmw63NOXdiX4jSmMsfXJD3bWZiex4HtegrdcbnLt6Y1fKPLdxQkHQkHZcv3Pkqf39zLR/71QsFy78nF2c/TG8h9NVAu6eGn5nI9fehr+ZZKWiAqwacbU2FK6VrybLBQi6Oy6FnBtWzY3/0dM7rTruutANBdOWND7eXOgk8n8culcrJnDzXFWz13JL21u0L19YztUDfqxVbyuMmea3fxdtFf3ytT9U1f/aD0t0g5CO4Lmf5rP6jAa4acF7JcYhK1X898355lGjlU65VHN7bUJjR0XqyfHPuDcz6iy/9Yf4u0z4swNOmP768Mu/bzMVdL65kxZYm5i7exMd//WKpk5Oxrj6nYlm/s38P3Z7Perga4CqlBrwL73qt1EnIu3vmrcppvdl57BXikSxKOk/4Se4NzFo9trDvDoayJM2Qscf+6GliSafL7uByVS6NVe98YQUf+fEzpU5GTkrV0PCZMuzDNp8eejt/bQ80wFWqDzrzVy+UOgmqk+15rPqSj8eQuW6jN91tdfbi0uJWUfjqnzLvesoYQ3O8MI1Nc3Hyz55LO2/vqx9jxg1zacxT49hiVKXq7/a+pjQ9nPzf00tLst9i+tdba9uqr/SGBriqZJpiSbbmsVRiIFmwZqeObFZmLrjrVSbMmsOEWXO45+WVXPmPd3h1xa6Df2Qi1+GiO5swa05JG6Vsb04wYdYcftrFyHClNvmqR5ly7eM518n/yX/eZ8KsOXlOVff2/97jba9/9dQSXl9V+nrWA1XCMSUpxd04AK773/rrW22D6vSGBrgqrZVbmnbp1PrVFds61GF9fdV2bnpkMRf9cX6HH9L6aIIr//FO2lbAi9fXs9/3HufgG+bSEs+sNGHOgvXc/GjvR5HpL3WYTvjxMwNmRJq+ILXk85p/LeIvr3zIObe9DHgjZU2YNafbgPeDjQ3cP381AHe/vDJv6froL19gwqw5JDP4rizdVJg+fW99cgm/fXZZQbbd2YRZc5h63eM9Lpf0g5M/51iV45dPLW3b37peljZlG2RPmDWHH//ngx5Huetrin3DEO1lSfYeVz6Sp5RkznFNr3rx6Euj9vX25lwD3DInIqeJyPsislREZnUxPywi9/nzXxGRCSnzrvCnvy8ip2az318/vZTjf/wMe139KC8t3cIT725kwqw5nHPby3zm9nlc//C7/HvBOj75fy9x+3PLmbt4I48v8rracVzDdx9YwF9e+ZDfPbecpONyy2Pvcd7t8zjpp8/y/JLNnP6L59v2te+1j/Hmh9u5/bllxJKOfwKv56ZHFrNhZ5TF6+vZ3hTn6395g9ueXc4+1zzKl/7wGv96a21Oo8pc/+/+0f9gQyzJPS/n9uOs2hWiO7dUv356KQd832sNf85tL3P/a6t3WcYYwyk/e47LH1iAMYa7XlyZt/0vWuc1GutphLZbn1zCST9N/5i8t1Lr9v7uueVs9IexfmzhhrwHNg3RJP96a21biXp3P5Q/yUPp8pGzn2rbVy6lqrdlEfzneqMQTTh5CcYLrTUfM7kh663ejJzXyhjD44s2EEtmHiz39kbyq396Ped1v3Hvm73adzFNvOIRJsyak3PDcNE+1cqXiNjAB8DJwBrgNeA8Y8y7Kct8DZhmjPmqiJwLnG2M+YyITAHuBQ4FRgNzgb2MMWnPwhkzZpj58+fn/cdmRG24oCOnvDTrBEIBi+pwgEjQ3mV+a7216nCA9TtbOOLm3j/6KCejBkU4e/oYZk4bxR7DqqkI2RhjCtZJfDEV4ziKXWoE8My3j2fCsKq29/91z/y2G8RiWHbTGdhWx3wtRT4su+kM9ixBKdiSG09ncoFHiTvv0PFMHTOIj08fTWWo45hK76zZycf8evR/vfhwzr19Xs772WdkDY988xgsq/vzpBSfb2/97DMHcOK+IwgHLF5etpVDJw7ZJS97oxB5cszkYdzz5cNYu6OFkbWRDufZ9qY403+QW1/bXXntqpN4e/UO3l6zg031MdbsaOYbJ0zmuw8uYFBFkIcuOXqXdfri9wBg7mXHMvPWF4glXZ79zvGMH1KJiCAirxtjZnS1jga4ZUxEjgCuM8ac6r+/AsAYc3PKMo/7y7wsIgFgAzAcmJW6bOpy6fa3x77TzHW/f4hr/7WoUIekSqwmEqAhumtDld2HVrIqpTuizx8+nvc3NBC0LXY0J4gELZZsamxbd1h1CGNga6dHq2PqKjo0DhhWHebAcXW8tXoH25pi7DG8moqgzaqtTQyqDLJ6W/uy+4+pZeHarruoigQtBGG/0bWMH1LJsi1NvL+hnrOnj2GvETU0xx2qwwE2N8RwjKEiaLN+Zwtvr97JAePqWLa5kQlDK5m/ajvLNzcxbewg1u2I5rVlulLl6uzpY/jHm9mN1tZXjKyNMGm3ag6bOIQFa3e23RQPqw4xebca1u1oYfdhVcQSDhUhm0TSJZZ0Wbiunofz2GJflcaqWz6qAW5fJCKfAk4zxlzkv/8CcJgx5pKUZRb6y6zx3y8DDgOuA+YZY/7kT78TeNQY80CnfVwMXAwQGjnp4FEX/Lzgx6WUUkop1VvdBbj5K+tXfZIx5nbgdoADph9snrr6JA6+YW6JU5Wb4TVhDpkwmKRjGFIV4tUV29jaFO9TlerzYWRthHDQ4vCJQxlRGyYSsnlj1Xb2GlHDYXsM5Z6XVxJNuLywdAsXH7sHM6eOIukaogmHxevrGTu4gkEVIQBCAaG+JcmOljiL1tazenszx+21G6MGRaiPJmiOO1SGbNZsb2FnS4LxQyqpjyZojCaxLWHyiBr2GlHNC/6IViMHRRCE5niSMXUVbG6MsWZ7C+GAxfTxdSxe38CidTtZtyNKOGARd1w+ffA4hlaHcF2vZKYmEiBgC9sa4wytDmMwBG2Luoog4aDNpvoorgHXGJKOIeG4iEAkaLNuRwurtzWz98haogmH83//agk/KaUK7zefO4gDx9VxZB5apZeL4/YazoHj6ogmHY6bPBwRYeSgCEn/XG+IJqkMBQgHLKJJB1uEcMAmFLAIByzCQYvV21o49eeFq3OuSk8D3PK2FhiX8n6sP62rZdb4VRQGAVszXLeDoC0MrQ7z9LeP71Xn250fUz/3nY90GAb1oqMncscLK7pc97+P35OVW5p41G8leuGRE/jDSyu73d+Km8/Iqp5mX62DlM5dFx7CEXsO7bL+cVeO22t42nlHTRqWdt7Z07NOWpt9RtZmtNzBuw/JfSe+6uHVaeftNaKmw/uVs2eW5PuwcvbMDu831kc57KYnC77f731sCt9/+F3e+8Fpu3xfipEPx0we1jZ8b11lkLeuPaUo+50yqpZ31/uN7Q4fzw0fn1rw/X5ww+mEAl2343Zd09YCv7ffwVs+OZWpY+qYMjqzc6wvWXrj6QTswrSF33tkDUfuOZSXluVvZMv//M+x7Dm8epf67aneWbOThxes4/bnlvd6f6nXkeZ4klVbm5m8WzVJ1xCyrS7rZffV37+Vs2dSH02wqT7KpN3ar+NyS/p1tIpCGfMD1g+AE/GC09eAzxpjFqUs83Vgakojs08YY84Rkf2Av9DeyOxJYHImjczA6+OxtRsc6PoifMf5M7jobm/5577zEcYPrWyb9+TijXz5j/N3WfcfXzuSA8fV8fiijR1agt7z5UMZU1fBHn5wYoxh2eZGJu1Ww4tLt7BmezPfffCdDvvfc3gVD11yNFXh7O7TPvGbF3njw74z7nl3fnrOAXzioLGlTkaf9u66es649fmeF8zRY5cew2dum9f2JCHdj3brObJy9kyOmv1UXjo6T/WLcw/krAPHpJ1f6HwA79iSjsv6nVHGDfGuF42xZIf+XfPl7e+d0tZ7RecbCijMD/1Rk4by54sOz3q9xxauz3iQileuPLHDzVA2N/hvfrid6eMHl32Q89vPH8xp+48s+H62NMaY0csnll19t3qSeoNTzP0C/PLJJXnpOaSYujvW7hqZaQluGTPGJEXkEuBxwAZ+b4xZJCLXA/ONMQ8BdwL3iMhSYBtwrr/uIhG5H3gXSAJf7y647ex/T9mby07ei4RjMHg3QT/85DQuf3ABV8/clymjazlyz/bSvtTgFuDEfUd0eP/Mt48nErQZOSgCwKn7jWDSbtUs3dTIzKmjOGZyx1JFEWm7S2stVTxhnxEMrQr12Fq4JxOGVvWLAPeioydqcJsHhS752mdkLW9eczJPv79pl/Mi1ZIbT28bfWy/0bV5C3DPP2J37n55FWdMHdXtclNG1xa0RPvNa04GIGBbbcEteL2b5HO/VSGbaz46hUEVwYyCgB99ahrfeWBBzvs7fu/h/OycAxlcFcp5G4fvMTTjZUfURthnZA3vbfC6msrm6dX08YOzTlsxXXjkBK47c7+i7W9YdbiX6+f2mff2N+yuCw/Jed0vHzOxzwS4uQbxrTTALXPGmEeARzpNuzbldRT4dJp1bwRuzHXfIkIo0H4innPIOM4+aAzBDB8ZhWyLMw8cDdChS6TWbf/8Mwfy0V++wNGT0z8WTzW8pncXo2IZNShSlMEkrv7olILvQ+XuLxcdxqETvSoXliXdBrcAQduitdbAmQeO5j/v9r7bsPMOHc/1Z+3P9Wft3+tt5eoLh+/ODz5evP0vuv60jJa79bzp/OyJDzh7+picAtzHLj2GHz/+Pr87f0avu7Krq8wuUHrs0mPZsDPKoIpgr/ZbbooZ3ObD/KtPLvo+z5g6ko/ss1vO60cCmVVl6w80wFVZ6RzcvjjrBJrSjI/+wY2nd7ut/ccMYt4VJzKitriB65TRtfy9gF3mPPyNo3v92Ev1PStuPoOr/7mQLx09kT27qQeciY9OG80lf+l9h+w3f2Jqr7fRW5kGt6MHRVhXxFEGzzxgNGceMDrn9fcZWcsdF+RektbZy1ec0GMf3S/OOqHtdevTMFUavzj3wFInISe9LT0uht6W3LbSkcxUr4ypq9il4U42Rg6KFH1Agi8dNbFg277kI5MYnGVpjCq9iZ2eMORCRLjx7Km9Dm7zJddS0wPGDspbGrK5ef3GiZN7vb+3ri1+iVq+jBpU0eX0E/fZjVeuPJF/f+NoxtR1vYwqvu7qsxfS0ZPSNxJWHWmAqwacQt7BfvvUvbEt4U9fPoynv318wfbz1P8eV7BtD0SPXXpMqZOQd5+YntsP8HdO3SdvacjmHMjHk5xsH/WXm+ouGsz+4rzpjKiNsP+Y/N14fDMPNxP5cMzkYbxz3SmlTuem1/gAACAASURBVEbWPn1w6do+7DWiPG6gC2XuZfn7bdMAV6kCOHryMIb0otFJdz4xfUxbbxMqP8K9rJf2+KXH5ikl+ZNt7yKt8lmvM5thVT+yd+71CvuLv3/tyF2mdRX09tb4IZU9L1QEZ08fQ03EawyYr8fSxfDDT00r2b6nja0r2b6LYdJu+ftt0wBXqQIpcs0L1Uu9KUnae2Tu1XTSuaZEjQi768OzkIpdVSnVWQdmVxf37BxLx3syudOP+9zLCnPjdND48giS+movMKX8rqbrW1ntSnNKqQIp1CUwmy6FVOZqIsGyqqrwpaMmlGS/hXryUM566uGiszOzDIgzJSJ8+5S9AK8aUmqH9vk0tKpv9EjTn/30nANKnYR+TwNcpfJk7OCODUAKcZc/uDLIp2f0zVKPvmBSGVX9EBEuP23vnNYd2osgtZSt8/cpQEl4Jj42rfs+gjvrXNKaT5ecMJmVs2cWtBrSoMr+1b1YX9S573cF5+T5t00DXKXypHNL/EKU4N509tSSPh7r73IZFvRbBWyw87XjJ+W03r0XZz+aVjnobhjpnnzlmNx7R8n2nBo7uDzqsKrem5ZlryF7Du99jytQ2n7dp+axwWI+XTUzv9WyNMBVqkAKEYdqbFt4M3ZPP9rTG9fs2g3VxcfuUcjkcF8OwWpvuu7Ll9Q+WzP1v6dkX2J96ARvMI0jJ2U2YIxSqT6ZZT3gcg0Os/GFI3YvdRK6lO+BS3SgB6XyxJiO76UAZbhaclR4D/z3kV0OG/vvbxzNkKpQW2vvjfVRFq+vz7m3gkwd1kOd64cuOYob5izm1RXbAPhrGZTePvjfR+bUZ2suDWju+6/DaUk4WfXYoEqvqx4jSuH0qSP53kOLMl5+XIl6oAhYwmtXnZSXbZ06ZSSXk/vw1IVw/N75r7KhJbhqQPrledMLvo98l7b+7atH5LUvTJWdznk/ojbC8UXq2urqmfumnbf/6EH87vwZANz7lcNL3ghx+U1ncHA3peA9aS2RzZSIaHDbBx00PvfvSF6ZnhdJddExhX1i05VQwGLpTWcwOE8NQGsryu98+cMXD837NjXAVQPSx3oxRGc6FaH8jPH96YPHcveXdj3Z8zHalsrM29eWV+fzXz46ff1SyxIGVXh9iR6xZ+l72OjtQCqHTCyTwEcNCDWR7B6L5/Mx+h3+jWlPAnnuuq/c2nF8cMPpBdmuBrhKpehNB96zPzG11/vfrSbMjz59AMemNLapjZTf3XZ/V26tzEWEFTefscv0UA6N4srdt07ci598+gAWff/UUidFdeHrH9mz1EnIq3wVTOTiyEmZ3ZAeNjG7pxp9wexPTGWfkTWsuPmMgvXt2/+ujkrl6KwDR/dqCMah1bm3ij19/5EAnLBP+yPvP3zxEH74yWn8z8lev5g1GugWVWt/pMNrwrx+dX7qvvWGiFcH78H/PqJt2qLr+18QGApYfPLgsQWv26xyk8+hnAe6TKvWnDNjXIFTUlxBWzj30PE8dumxBS1N1iuIUr7p4+ryerJlMyJUdTjAS7NO6NB1TGr9zi8elXsXSCo3l5wwmUtOKFwXYLkYXhNmeE244MOa/tdxe3Dbs8sLuo9MfO34PfnNM8tKnYwO/u9zB5U6CXlRGbJpjjs5rXvKlBH8592NeU6RSqe31X7KzXOXf6Qo+9ESXKV8x+W5wVDQtvhmFn2kjq6rINgPHzmrvudTZTKE6rdP2Zs3rjmZ68/ar8v5//7G0UVOEew7qrbo+yyE3vQ5/Jt+EuSXg5cy6E7vyDKoW59PowZl38NKLvTXVClgj+FVBWnElemIRxeWaFhWpbqS65OM/z4+v/UzLUsYUhXic4d13W9nKXoVKVU3Ufl23qHjc143lwFRUp20b3F6H+kLRmfQnV62DeEycfb0MXnfZrnRALdMicgQEXlCRJb4/+/StFhEDhSRl0VkkYgsEJHPpMz7g4isEJG3/L8Di3sEKlPfPHEy+43W7r9U+ch1tKZCjepmW8JdFx7SYdq5h+S3XuKs0zOrW5pN1aNyNimH4YbTlaRnS7s7LL0zC9CTUCbu+uIhPS+UJxrglq9ZwJPGmMnAk/77zpqB840x+wGnAT8XkbqU+d8xxhzo/71V+CT3Xak/Wd88IbfhUbOxz8jSjzSlVDq5luBGgoVrkf6RlAaYNeEAFxw5Ia/b728NeXqSSclhZ+cfMSEv+95jePbBdTmYOXVUqZOQN7sPLc2TiGL2CKEBbvk6C/ij//qPwMc7L2CM+cAYs8R/vQ7YBOR/OJAB5rJT9s6oi6L9RudeF+/aj07h0pP80q7OQ6AppdIK2RbvfP/UvNeF7R/lsoVz+Wkdh1Ged8WJOW+rNwOBlNLQ6vwMtNBZvp9GZKK7KnmFLN0t5qAsGuCWrxHGmPX+6w3AiO4WFpFDgRCQ2uT4Rr/qws9EpMs+rETkYhGZLyLzN2/enJeE9xV/+cph/CDNI7dMuig6bGIvKv6L13n/pw4ey0XHFn9kHKV6cuUZ2XUHNaI2927yMrX4+tNYcF15DcIxUEwf1zEoHTkokvO2chnGuZA+kuEwsZ8pUCB609m970M9W909pbm1CCN9FoMGuCUkInNFZGEXf2elLmeMMXQzoKCIjALuAb5ojHH9yVcA+wCHAEOA73a1rjHmdmPMDGPMjOHDB1bh75F7Dmsb1jSXR7IVod6dPjWRID/+9AHUFqABgVK9dfGxe7Jy9kyuOiP9MMGpfnDW/gVOkdcpf6GqQeRzhKq+4qPTMnvkfvgeQzh8j/432ECrTHuUKFRbCcsSVs6e2WX3f7/6bGmCzXw/IZl72XEF796wMw1wS8gYc5IxZv8u/v4FbPQD19YAdlNX2xCRWmAOcJUxZl7KttcbTwy4C8j/QM9KqX7vKxk+YTh5SrcPmcpeJn2NVpVw1KtCuCLDm5e/XnxE3voIX1iGI9SNqO25NPrCPNf5Tqdz47/WQphi+ZLf5/oj38xfF3wHjqvLqVFjb2mAW74eAi7wX18A/KvzAiISAv4B3G2MeaDTvNbgWPDq7y4saGr7KK39qlTP3rzm5B6XKbfx7XPRUwlTpgFhX5FJVYHOdW9747+O24PqMhyh7tT9Rva4zJePLs5gO5277xpcWZh6v+l893Tv887n+fzPrx+Vt21lQwPc8jUbOFlElgAn+e8RkRkicoe/zDnAscCFXXQH9mcReQd4BxgG3FDc5PcN44dUMm5IBdd+dErW6x4yof8+slMq1eCq9h/Zg8bX7TJ/WC+Gqe5LPndY7n3Hlqs/X3RYt/Mn5bHHg++ckr9gOZ8yKb1PHWWykL6UMmrlFafvU9Ru6V6+4gTCgfanFI9+65hebe/y0/bmvR+c1ttk5UwD3DJljNlqjDnRGDPZr8qwzZ8+3xhzkf/6T8aYYEpXYG3dgRljTjDGTPWrPHzeGNNYyuMpV5GgzfOXn8CxXdTBOmbysG7X7U0dJdE226qPaa0jeEmnbvQiQYu5lx1bolQVV38ope7sqEndX+eO72aEx68ck12pZm8HiCilQnaBl6oiZPP61Scxc+oo/uu4/A6c0tmdF8zo8L7zCGP7jqrt9qnGtLHp6ySfPX0MXz12z6LlW1fK71mBUmXipH1H8PySLWnnZ3JnXR3RU0z1LweNH0zItrj8tL2ZtFt1twGQ6vtCgfRB6TkzxvG751f0uI3vnrYP+4zSvr8zNbQ6zK+LMBzy7kN7N3rnQ5cczc+e+IBfPLkEgDOmjuQ3nzs4H0nLC/31VSpHmTyWPX6v4dRVBtnRnChCipQqvLrKEB/ceHqpk6HKwOQRmQWtJ08ZUZJGRqp7wzLs13fl7JnEky4tCYcDvv+ftmngDc+9cO1Orpq5b9kN4NF3nxco1QeIyIAbIUmpvuzerxxe6iT0K49+6xgNbstUXUoDts7VFToLBSwGVQR54n+O7TDIRyRoc+eFh5RdcAtagqtUwfW/WntKDTyjezGwQV913cd6bnw7bewgFqzZucv0t793CknHZegAaYDY1524b2bd/GVaal8OtARXqTTqKvPU8XsXEW6N1s1Vqiyla0d295cHXlfiFx7VcyOy+//riC6nD6oI9qngdvIALWV+6n+PY+5lx5U6GQWhAa5SacycOopT8tB5feceE847dDz7jynMiDhKqd6p7GIwhzMPGM2k3fpOyVUxlbKVfD7dfn73j+j7qz2GV/fbKiQa4CqVRsC28nLR61widMp+fXvEJ6X6M6uLItybPzG1BClRxTRxWPoeBYrYFa3KIw1wlSowvTYq1Xd0VZpVVYajb+XT97qoaztz2qict/edU8tzQIdcfXCD9hrSF2mAq1SB9cO+4ZXqtyJBu6ijR5WDLxy++y7TpuVYjeqDG07n6x+Z1POCfUhfHqBiINNPTakC6+qRp1KqfPXXRjfpdBXAXXDkhIzX/+JR3rJXnrFPtwNDKFVM/fu5i1J5dtK+uzF38aas1tHwVqm+JbU+5j0DrPeE279wMKfsNzKrda796BQuPHJCr0fGKrVPHjSWB99YU+pkqDzRWy2lejCmrn187imjs39sd/KUjj8WGvAqpcrNA189gupwgCP2HJr1uiLS54NbgOvP2q/USVB5pAGuUj14cdYJvVp/6thBnHuIjmamVF80oR8EbpmYMWEIC79/KjWRPPX/3Qf198aEA41+mkplQUtflRoY/vG1IxlTV8FutQNvBDOl+gMNcJVSSqlOpo8fXOokqDIw6/R9Sp0ElSOtoqCUUkop1YWvHrdnqZOgcqQBrlJF8ImDxpY6CUoppXrw3g9OK3USVJ5ogFumRGSIiDwhIkv8/7t8XiYijoi85f89lDJ9ooi8IiJLReQ+EQkVL/Wqs0MnDuGYycMAr8WxUkqp8hMJ2m1B7vlH7DoAhuo7NMAtX7OAJ40xk4En/fddaTHGHOj/nZky/RbgZ8aYScB24MuFTa7qSWtr7EEVA7eVslJKlbtI0Gbl7Jlcf9b+pU6K6gUNcMvXWcAf/dd/BD6e6YriFRGeADyQy/qqMK6auS+/v3AGB46rK3VSlFJKqX5NA9zyNcIYs95/vQEYkWa5iIjMF5F5ItIaxA4Fdhhjkv77NcCYrlYWkYv99edv3rw5b4nvr6p70U9iJGhzwj7pPkallFJK5YsGuCUkInNFZGEXf2elLmeMMYBJs5ndjTEzgM8CPxeRrJp8GmNuN8bMMMbMGD58eG4HMoCcPMULUHerCQOw14hqoONoZ0oppZQqLe0Ht4SMMSelmyciG0VklDFmvYiMAjal2cZa///lIvIMMB14EKgTkYBfijsWWJv3AxiAWtuHRYI2AA9dcjQf//WL3P2lgTVevVJKKVXOtAS3fD0EXOC/vgD4V+cFRGSwiIT918OAo4B3/RLfp4FPdbe+6r1I0OaxS4/V0Y6UUkqpMqIBbvmaDZwsIkuAk/z3iMgMEbnDX2ZfYL6IvI0X0M42xrzrz/sucJmILMWrk3tnUVOvlFJKKVUiWkWhTBljtgIndjF9PnCR//olYGqa9ZcD+tw8T/568eFsrI+WOhlKKaWUyoAGuEpl4PA9hgKwamtTiVOilFJKqZ5ogKtUFsYNruRzh43n/CMmlDopSimllEpDA1ylsmBZwo1nd1krRCmllFJlQhuZKaWUUkqpfkUDXKWUUkop1a9ogKuUUkoppfoVDXCVUkoppVS/It6gV0qBiDQA75c6HWVqGLCl1IkoU5o36WnepKd5k57mTXqaN+kNxLzZ3RgzvKsZ2ouCSvW+MWZGqRNRjkRkvuZN1zRv0tO8SU/zJj3Nm/Q0b9LTvOlIqygopZRSSql+RQNcpZRSSinVr2iAq1LdXuoElDHNm/Q0b9LTvElP8yY9zZv0NG/S07xJoY3MlFJKKaVUv6IluEoppZRSql/RAFcBICKnicj7IrJURGaVOj2FIiK/F5FNIrIwZdoQEXlCRJb4/w/2p4uI3OrnyQIROShlnQv85ZeIyAUp0w8WkXf8dW4VESnuEeZGRMaJyNMi8q6ILBKRb/nTNW9EIiLyqoi87efN9/3pE0XkFf947hORkD897L9f6s+fkLKtK/zp74vIqSnT+/T5JyK2iLwpIv/232veACKy0v/OvyUi8/1pA/6cAhCROhF5QETeE5HFInKE5g2IyN7+96X1r15ELtW8yYExRv8G+B9gA8uAPYAQ8DYwpdTpKtCxHgscBCxMmfZDYJb/ehZwi//6DOBRQIDDgVf86UOA5f7/g/3Xg/15r/rLir/u6aU+5gzzZRRwkP+6BvgAmKJ5Y/DTW+2/DgKv+MdxP3CuP/23wH/7r78G/NZ/fS5wn/96in9uhYGJ/jln94fzD7gM+Avwb/+95o13XCuBYZ2mDfhzyk/7H4GL/NchoE7zZpc8soENwO6aN9n/aQmuAjgUWGqMWW6MiQN/Bc4qcZoKwhjzHLCt0+Sz8C62+P9/PGX63cYzD6gTkVHAqcATxphtxpjtwBPAaf68WmPMPONdRe5O2VZZM8asN8a84b9uABYDY9C8wT/GRv9t0P8zwAnAA/70znnTmmcPACf6JSRnAX81xsSMMSuApXjnXp8+/0RkLDATuMN/L2jedGfAn1MiMgivsOFOAGNM3BizA82bzk4ElhljVqF5kzUNcBV4gczqlPdr/GkDxQhjzHr/9QZghP86Xb50N31NF9P7FP+x8XS8kkrNG9oewb8FbML7oVgG7DDGJP1FUo+nLQ/8+TuBoWSfZ33Fz4HLAdd/PxTNm1YG+I+IvC4iF/vT9JzySuk3A3f5VVvuEJEqNG86Oxe413+teZMlDXCVSuHf0Q7YrkVEpBp4ELjUGFOfOm8g540xxjHGHAiMxStV3KfESSoLIvJRYJMx5vVSp6VMHW2MOQg4Hfi6iBybOnMAn1MBvKpi/2eMmQ404T12bzOA8wYAv976mcDfOs8b6HmTKQ1wFcBaYFzK+7H+tIFio//YBv//Tf70dPnS3fSxXUzvE0QkiBfc/tkY83d/suZNCv8x6tPAEXiPAluHO089nrY88OcPAraSfZ71BUcBZ4rISrzqAycAv0DzBgBjzFr//03AP/BujvSc8koN1xhjXvHfP4AX8GretDsdeMMYs9F/r3mTJQ1wFcBrwGTxWj6H8B6LPFTiNBXTQ0BrC9MLgH+lTD/fb6V6OLDTf0T0OHCKiAz2W7KeAjzuz6sXkcP9eoXnp2yrrPnpvRNYbIz5acoszRuR4SJS57+uAE7Gq6P8NPApf7HOedOaZ58CnvJLXB4CzhWvJ4GJwGS8xh599vwzxlxhjBlrjJmAl+6njDGfQ/MGEakSkZrW13jnwkL0nMIYswFYLSJ7+5NOBN5F8ybVebRXTwDNm+x11fJM/wbeH15LzA/w6hZeVer0FPA47wXWAwm8UoQv49UBfBJYAswFhvjLCvBrP0/eAWakbOdLeA1hlgJfTJk+A+9HbBnwK/zBVMr9Dzga75HXAuAt/+8MzRsDMA1408+bhcC1/vQ98IKwpXiPEcP+9Ij/fqk/f4+UbV3lH//7pLRc7g/nH3A87b0oDPi88fPgbf9vUWva9ZxqS/uBwHz/vPonXkt/zRsv7VV4TzYGpUzTvMnyT0cyU0oppZRS/YpWUVBKKaWUUv2KBrhKKaWUUqpf0QBXKaWUUkr1KxrgKqWUUkqpfkUDXKWUUkop1a9ogKuUUkoppfoVDXCVUkoppVS/ogGuUkoppZTqVzTAVUoppZRS/Uqg1AlQ5SNkVZhw7VBvwFbASrjtM10XLAuMwdgWknS9AQJbB8KzBFzjTUthrPZ7KDEGIyAGaB1Bz4CxxZsG3nzHYOz2DYnbabQ9Pw2IIK63TUQQxwWRtu20vsYY3JDtbwxc/1vvhvzlgFBlgppAlJA4BMQhLElvGYSIGKLG+z9mIGaCVEoc1z/YejdCixMiYWyMgXgy0HZ4uP4OLAOuYAVc3IQFbsox2+0HLw6IC+J4edO6TGs+i+vnh6RMF3960u147MZ4n4sIyYjghvzt4v1vJQxivPz2Pjtvndb8NpaXRi+PxPvXmsepIyCm5nm6z6w1zZblvbZ23YaxxD9m4+3bNd4yrYu1ft9a1zV+BrRO8z/rts89NZ2p01u/y4AbsklUCSaIl9mtX1fTui/T8TNM/UxTXwuQcrqI/1qSrZ+Z92clXe/73XoeAG7AwqkQnJC/D/H+F6s9fwRwXcHyp3mHYnAcK/Vr7n2vkgIuWEn/c04aJNl6UvufhS9ZaeNUQiCUxLa8RAcs189eb8MJx8Y1gm25uP6HbFsG1xUMguvngTGAI94f3v6thH/s/udlRZO44QBOxFvGqXYJB5NY0p4mW1wMguN6H4YlBscIlhhcIyRdC9e1vP0lrfZ8drzjs2P+d9o/N9rOHSBRJbgVBtt2sMRg/OMJpBw7GBKu3Xbs+Hnfti8XrHjr52m844s7mIC1y/feCGAJbkBwIoIbMmC3zcWyvL/WPGz9yrr+sbd9D13x8jIJluMdnzgp+2r9nou0HbcbskhU+fODLmIZLDHYliHpWNiWl89CxzQ7bsd5IpBI2v7n7k0PWA6O8T6DoO0ST9pt301bXBzXwnEtRLxja/+O+scZl7Y8FMdLrxV3O6QDYxDH9X5vXIOxrPbfkNZ89s/r1msV6UZmdQ0mYOGGbZIRMEEDtulw7oiAcaT9nPe/z3bMz1rTfs3ENe3Xp9bPqZUl3u9X2zXbkKixcSL++4CL+N/31s/dtg2O0/G6InHBSoIdN961xT9u41/rWl9L61e9NT2Ssh0BN2h5xxzxvgPGsbADTts55xrBbT2/XAvjirdcyqZaD9O23bZzz/K/T63nKYDxzxFcvHx0pO16Bl5+etcD79oArdcot8N1SZIOWJb3O+8nRJIOJhbf5aNtYPsWY8zwXT90DXBVigq7hmnHfqstsI2sb/RmWBbS2IKpiiDRBMkhVQS2NkIwAAkvEDShIBJPQMDucLF1q8Jt25eWBCYSQJIu0hIH24Kkg1sTabtYGxGspihubQU4BmzBavHPhKQfnTkObk0lJmhhNScwQRsTtLAbY5iAd4KZoI0bCiCOi7iGpnHeld7Y0DLUW6Zhd3BDBnGF8Qet5ejhyxgf2srwQD2Tg1u8ZUyQvYNJliZsJgUdViQsliWGc1B4Hc3Gu+g/0bQvbzeMY3O0mpgTYNWmIW0X8mSzd4pZYQc3blM1uIWmTVUE6m3vR9lAfLB3XJIUgjstgk1CaIfBSnhBgrEgEPPyJ9BiCDQ7uCHvGOyYixOyCDYlCW6P+sco3o+C4+JGgrghi61TIjSNM4S2e+kKNkDNuiSShNCOOHZTAhP01rEavSu6Wx1Gkq6Xp5aFEbB3tmCCAcRtjeD8i7llYUT8AAqseLL987ItpDmKCQYwlWGkJe59X1J+jIwITk3YCwBbErjVIazGOJJItn+f/O+bqQgh0YT3A5h0vBueSv/XIxaHcKhtm+I43o9RIokJB70kN0e95V1Dy8TBbDw4SHS0gwm6SMRPswETs7GrEzhNQTBgVyUxLrhxP0JxBKvJBgETMFjR1psAwfY+CsJbhWCTIdBiCDYbIptjBBpimKDddkMSG1bBtn3DNI43OFUOBAxWRZJwRaItf2zbJdoSIlIRxxghaDuIGOobKrEDXppdxyLZHCC4JUigSYhsMYR3GiLbHMJbvQQ5lQHspkRbnm45qJZtB7gMnbCduooWAIZGmnCNEHWCBMRhdf1g4kmbQRVRmuJeHtZGYjTFQySSNs3RkH9aWjj1IQI7vPyJbBWq17oEWgxWwmAlDZXvbaRl0nB2TPLW2Xl0lMmjNxGxk7gIAXGoDsaIuwHq495nWhmI05gIE7GTRJ0Am5uqaGyOkEzYsNm7voS3WASbIFkBdctcQjuTWEmDG7TaAicxhvWHR2jZN0pdXRPV4TgtiSAihmGVTQAMCTdjicvG5lpEDGt3DgKguTGM2eGlOdBoUf0hBJsgss0h2JgktG4nTl0lVrP/mbVehwIWbjhAdHiIrVMCNI9PIpVJ/9QxVFbHqArHaWjxjiMUcIgnbVqavPfG8QJrabGoXGcT3mao2OYSqncI1ns/9sYSrBb/hrwigJVwcYMWTWMr2Hiof5qOjhIMJYmEEgyqiLKlsYrBlS0kXAvbD3RabyAaomFqIjEcIwQtl6DtsG67lw/VFTGSjsWw6iYaYmESjsWomgZWbR9MdcS7btSGYmxrqaS+KUI4nKC5OUwg4OC6QjLuXQ+Dq8NUrYFgI4QaXYwNleui7dcbwIo7WDubcWsrsBpjuLUVSDyJCVjtvwmJJISCuJGAF/TH/OkpBSskHSSewKmrpnHParbtY9MyNoldkyAQ9PItmQhgWS6JhhB2VRInakPcIlBvM2iJfw7GoGpjAivuYkeTWNGk9zsGHQJrUxkmWR1CXIMkXay4w/pj66jfyztPQ7s1Y9sulmVo9j/n2poWGhor2gLfZEuAihUhwtth0MoEdtTF2IIdc3DC/s1GzCFZEcCOORhLCDQlkISDGw60XVvccICWESG2TrGJT24hXJEg2hSibnATFSEvr6KJAC2xEJFQgqaWMPHmIKHKBE7SxvJv/Fp/z2prmmlqCZOIBaisjhEJJqlvirRlQXJrhXcjGxOcSpdAvY0JGJwq/wZyh02gWQhEIbLZO9bIDpfI5ph3XQKwBXvjDkxNJU5tBEm6uCGbwOYGnCXL6WyueWDVLhN9GuCqdsEA2/cKENlmiGx3aJzkXdQim2IE8AIGKkLYzXFMZdgLRv1Ax60OITEbSTiI6+JWhsAxSDyJxP0gOBzEaklgAhYmGMBqbMYE/B96/1bRiie9QNlEvNctMS8wigTbgyorgFsRwG7yAiC30vvRTQ6ubLvQuGEbYwnB+jiSdAnVJ7HiLm7IYtu+ftA9sQlLDMlokJpgjKA4VFkxdrMbCPlFcLYxNLsOo20HsBkbSLLBiTE2UEFQvAvN69FGagJRlsWGU+YI/AAAIABJREFU4RihrraZ+qYIyUQAu8IPPhIWJCzvjj3kkqyB4HbbDwb9C3pUCDUIgSYvsA03eKU0TkjaSgTjNRbJCsG1hUDUxQ14AZaTtLEqvHwwAQsrlsTY3v9WXDBWBDfYfveP5d05i/H2YcWT0OzgVoYxFd4PuTgGNxzADdlYSRe7wfsBM0EbWu++XdcrbWiJIqEgbkUQKxb3Ps+AHwgag6nwf7AtC0JBTEUQaY63BcGmJoIJWDi2YAk4FQGsxjg4TtuPlgnYHbbpVkWwGlu8gNe/oEssAa7BranwfixtG2yQeAKJ+j9Gllf67wyuoHFUABMAiQlWXbK9ZMURjMH7YQ5ZXgmT5WJEMEFvX3XDm4jGg7RsryBQlcBJWNAQxG4WXO+jaAtsA1HjfwcdL12WhVPl5XNsSJB4HTiDkgRrY7iuRWWll9dB28sf2zJUheM4rkXAdtjZVEFFOE4onCDYGuAaodkRktU2krRIVnqBdmyQjbjtN5puwMIJWW0lyNbgONXhWFtp5eaWaqqCcUJWkspAnDE1O4kEEmyNVrWV8lYF41QF42xqqsZJ2lRUxmhqiGA3WUS2eN+xUINBHAjvSBDc1kJseCWJ0YPBEqzWe5+AQ3MiRHUwRtQJsjNRwbZoFSOr6ok53s9TzAmwpbGKYdXe+RpLBKmMxNnRUI2E/ODMLwWMbDOE6h1CO+PYWxpIjKlrCwgMQqgBYptCNARcmqMhYk0hrIDL5tWDvc90VD0ihopQgvUb6wiEkxjXwm0JUL3a205oh6FuWZz4oADiGpKVNrJbTfu5AZiAYDfGvVJdxwu4KzfYQAA36B1XdFSSxuYA8boAiWgA0xKgJeIgAReT8AO0hEVgp02wQaha532Pwtv871HMvzG2BUn43xPjleJZrkWw0SG00/siRofZBIIODY0VJBybeDzAuuY6amuaiSW89FRF4rTEg7iuEHdsGv2gOxBwSCS842owYeLNIRKO3fbd3NhYg4hhR2MlANvcKkQM4XACx7Fw4zaxmI00BQg0eMdVuVao2OIQbHSJbGimZWwVyeogdovjPSUCErVhgnhPWSTpIrGEVxAi0hYEE4qA62I1xzGhACbkB3cpJX3OkGqs5oBXIJIwBJugxXg3GLbt7ysOifoQUuF4BYYttnfDm1Ko7PhP/NyQhRUXkjVhrEgAuz7anh4A13tKYyW9oDQ+pILKjS4tI7xlYrYXBEp1kkhlnGhziB1bqpGojQl4O7Sabey4d8OWqLIJb44ixiuNFdvfl2OwYw5W1PFKQP1rIIb2m4SEgx31nky5DUEStqF6UAtJ12Lzjuq2JCeaQ8RCQdy4jVgGJ+k9jQwEvc84Hg+AEeobKqmpbqEhaZNM2mxtiECD9x0LDmvBhB0wghMQCLok6wzB7QHchP+UxwYnYrBjghOGcL0hvMOLCeJDvUDZDVkEIwEC/u8NjsGKJZHGZrKldXCVUkoppVS/oiW4/YCI1AF3APvjVcf5EvA+cB8wAVgJnGOM2d7ddox4j2GMQNMI7w4SwLXD2PEQVtyrHxje4j3utJoTXkkttJXcErBwxfZKblviuHVVHSryJAdFvNIHHExVBW4o0PY4D4Cki6mrxg0HwLKwaH/MJ8HWerSCCVokayPYluWXZiRI1lW2bcaKe4+GjIDxSyCNLSRq2irA4boW4UicZCzAlpYqtldXEg0FqXcjRI13V1ppxVierKTBraDS8u4oX2naE1hG3K+isCY+lA3RWjbt9O6IYw1hr85cg91eAlBhsGJCbE01RFzE8R7nV6wXEs1+tYFGiGx3sWOGqnUx3KBFoClBfHCY4A5v38nqkFfqmvRKauyYd4xOZbCt5EMc1ytJco1XlzPmUL3ewVg2ofr2R2mh+gSOXy3BBG2sphaoDmNa6wP6dXhb641iDG5lGGzBBFof98dxasPYzZZXVUTwSnGjybbqK/j1pd2KIBJzMGHbq65iC5JSn1Vc01Y/125JIo5XmtNa/cAELCSawK0OQ0XQS1MwgFsZ8vYHUFsFiSSScDChgFff1vVLf9t2JOC4uEGbZJXgBgxmSALbcglHvNLiWDSIaxuSSRuxjVdnzTK4jtVW77a+sQJnRwhxhGQihBWzsKNCsMGrHgAQajReaXuz45X0OUGsmP/UwX/aEGh2qdgkJCsCOM02bnWSqP+4uC3JQDiS8ErX4gEsMTQkvXxxHP9xuCu4TUGshBCqF4KNhsotDoFmh0CD/zjb/4ysmEOgPkrj6ME4DUE2V1QTDnrHXhVKsDVZScKxsS2XaDxIpV96PLZmBwDbY5Xt9XEDjvcIvzHg1zn10hxo8arXOCELqQljJ1zcsI0kTVsVjsT2CDsiCRKuRUNLBMtyqauIsrmlmoaYV4JoiSGeCLB+Ry3hYJJYNIhV6SIxm8BOb2ehHQbLgfBOg7EhOjxCRdIlWRHAtKXHoW5JnHhNiGS0gtjohFda6lhIhff9iSUCxGNBmgJeSVQyGkS2BwlGpa3uvh2DplFBwjtcIpujuEGr7RrXWpIqjuBUBBEDdkMUSThUr7MxEiA2tP1ztZps4lbYq+7SYmES3vcx0OB9X604hLd5T3BCja5X1SPh4kTs/2fvTX5lW9P0rt/7NauJZjenuzczK6sqKbkBCdEIpgjJfwAzpoCQmDGGP4GppxYSYsAEMWHGxBJTSyVseWCDjSlXZt6b95527x0Rq/s6Bu+3VpwsLKic2FQqXunq6J4TO2K1X+z1vM/7ezBL2u4bSYnSeiQk0rEl7j3+Ejn8Ujd6efBMi0HaxGXpMY3+7JePx80MnrNhOrWYJrHMnrYLTGNDjIZUVbrUWJpdYJo8l6c9dGqo9F0k/1rX39xmyi4xmxa56H6IATsY3LleM0th936pHRGDP0f855F4111nP4rak+zLDE7Xl9g5/KcLclI1L719QERI970q2lbVXVOu/lh7VkuQfR5pdp7jL4Xl3jEnYX7U7cmLhSRqS9oHuBgoBjvL5vftnrJaAc7aHYs7i12E1B2wdf2RmJEpYsdAPLaquOZC9yXS/7BapyzxPlMWw+IcxhbERWK8+tfdRbsvx18l7JJ1fa/7U9zqedf3NktUy1PW+9uEdPXpJrV/2AnoEykaxqElJ0Hq7EcOBrmoeixW74d08kgSqoaK84ll8IgpPH0+IDYzR0+ZLNQuZfzNTq+H+r7mxUFZ96XaKiagQP++4OZSZ07UY+zq9RxNvdYOrXqne4cdArjf/dfVm4L7+1F/G/hfSil/E/i3gH8M/NfA3y2l/DXg79b/v9WtbnWrW93qVrf6va+bgvtXvETkHvgPgP8UoJSyAIuI/EfAf1hf9t8D/yvwX/2/vlfM3P0ykhoh+68nMQV/ikgu2Kma/JekCtvLX5hqdBaJiXzXU2yvSlpVXmWJ2MtSp14N2EJp7eYVBfQJXGR76s67hmIFc1alECDuHG6IUCDet/rEv2/Ue1bfxwxL9eSCPc1IVj+pGzL77+qTsO2ZDy00heWN5cNy4B/wh3xoj3zjngHoTODBDPwyvOY59byyF0Kx/N2Xf2Pb5ZfYMSVPTgbrsk7ePjuaZ52CBbj8ImAGR/tRoBhMBH/SgZH4VBXcIdP/ODO9bVkePM2XZRuSiYc64DIEPV5LIu0c82tVPMxXwxlx5+pEMdgpIgYOfz5gQr8pynZMyJKxSYkUuQFxFnOaKF01kFbiwW9NLFOV1Or3Ssd2U69Sazclo3j7laoVKb3HDPXcY0l3bb2G6tP+84Ax6s0urdW/j4nSt1dfY6ODJPY0kbtGH89LwZ7mjbpRRJCvPXrGIOOkvt91e5Y6oJYLb//+wJe/3vP56LFPLcN9HaqwhebZkDqPrSSEdap4fevUFvrPBooOHJkFbCj4c6Y5rUN4kJ3U7ogOp+ReB/+uKkvh+OuICY7ptWF85+GjBwvpsKp0wlBazCTYUQgPGTsaii2Y6m+L+4w/GdonVZBNhOyF2Fuy03vHzhmJmbh32Nlz+D5S/tTx9K8fuRyrqtclHW7KAsFgDoFp8cRg+XJSlS4Gi0hVf5487izYWVWn3Q96TvuPEZNUcSzO6NohQu4s7Yt+Vv9rx4kj532k6QLzuedse1wbSdX3aWwhTg6ksDSeNFrOH1soQv9jVbzGgkmFYiB2htQI5Wc7mpdIalaPe2J+9GQPy08Cx1cXShEaF/HVi/n5eY91icfjwNh6chHkYeD86zvirqp9ztA86+S+id2mEEsGs25zzISdw00J2TWQC89/7Dj/EYRvdM30fSD2lrYPOiC4s5Qk+C7S/0Rf8/Jlx/Kt0PzokWzZvc8U4yuVoapiS6ap+IXwoKq+WTLh4Aj7em++WtjfT7w5XPg89HQ+cpka7D7TVg937wPpeOF56Hl7PFOKcGkC+2ZhOOqaMMwNy2Jpmkj7swmRwhwc1mbiL6oCFw37fiFmw7xz9F1gCU67D7/c123W45day3LvsFOGVz0A4ahrnR1j7dZZvddFu0mlcZQ39/X+huIcqXe42mUq3pKNUWUdtGPSeVxIZCd0nwMP/0R4fyek6lGmzchOtz8tlnJMNJ8s/nQlorghq+c1rENdpXqsvyIbiOj6kgzWCuWuBYHp0TG9rQNkh0wx6otNLw24lUgkm3JfrF7TUgp2rENt50WV2u76a1vudO7FjEEpEyFRrN3W1dw7hjeW6W3GNhnn4+Y7Xv9cFkduEr6JhGDJ2eAOiRjcNoTnXKZ5iIxjQ9NEHRiMFvYFU73YuU20PrKMekzNXSb/2DG/yjRfKrXG6jkLB2F5EJrnwunnLe1T3pR7/xIwIZF6D5WmU6yQHw/wK36nuim4f/XrF8AH4L8Tkb8vIv+tiOyBb0opv6mv+QH45l/0wyLyX4jIn4rIny7pdzdx3+pWt7rVrW51q1v9/61uCu5f/XLAvwv8l6WUvycif5u/YEcopRRZx8P/QpVS/g7wdwDu/btSjGK09u8TbqgqXbtyCFcvLZgxkLtGp+/1jSiNTtzbi5ILSIXS+yvTtnIMyXljY+pEuSgSDEj7RhUf0Ce4quhKzNiKrypGNjbipujtGmUCrniUnb6PhEQ+NBRrsJdlUz0Awr6Q7hTL9KofsFJ4Px052HlTcH+9vObJ7mkk8taduOSWXITfTPfceTUSjkmVnjB4AiBdIhWYeoMd9BnSPamqmh3svytkD/2njD9nVjCmZJjfNOy+GzYFG8AER9zXp+IpEh47ihHCzlGckJ2lOam/EcCkgh0jeVV0jy3+aaJ9CoS92z6rOEM4OLoPI+Yyk14dVOluVhVBGYT+y0juPem+JzuDiepf1Q8T3NMIS8BaUU81iirb1NngVDE15rpPc0TmtHm4y75BQt4wY7mx2KD4n9VfK8ukCsYcMTFR9p2qOV8hgcww6HU2xytxwevnl6p8rOiwIhDuPX4ovPqHQuzBTlVp8HqummfoPhWm16rGS4RX/7tuz/Ro1R/XCDYUYqdKrUlsHZDsRXFVreA/BuzzqDJwbgl3ValaMqk1dE+Z6dVX3ve+bH429lGnm8+e/CbBiyO1leUZr/dl6hSCevfny28xWDcMnxHcFKvvLZO9sNwJ/Q/CZSUSHALNLrIsDn+XcC4pAiwYUkWklckiu6gevIfA0llFAlnZeJ+5Febe0j4n/CkSDp7macbETOwrX7YB2Ud2h1kbBAXaXaDxEdPrgQjJ0rSB4eOO4jO2egntydB91m32QyY7JYuYUPDnouppvHYgwp0neSH1heawsGsCIRne7AZilWGbV4lUhGFucDYxB69s3AK4st07q6oXe6PK/Jwxc6JU1VCWTBMWilWlPu08y72QuutYfi6Cq17YMHp29yPjpSWcmu01yiMVnT5vYdkL2Rn8pWBX/FkqSooQPc/qy1T6R3Nauy/KK74s+t5TcLQ+Mi1+Y+6eR8V5WZP5cDqQs+Bc4jIdcFXlNSbjvWKjLmOD94ljPxOS2d7nsJuZg6MUoW1V9YuzU/5r5aGmRhi+aWi/RJqXq5fYjBEzVWpK63VNZ/2+cUhW9NXXXSX78QXyofKlBSkJahcQIB07/R6oimD0hsu3Rk9iU9XMThXMZXLk2YIthPuMXSxh0vtrfGPxF8P+HDaWdQbFK65fgwJYo/5/0VmCYoTTzw3zt7pf0mT2d6p+x2hJ0SiL1hu402s+ve9Y7gxgOU6FYDxN9Rhv37kimJCI973iHceIWSLxzmNXjFrReyy/WWiqertrA84qjk4v+vVQCn0XaFzi0M58vuwYBu38eJ9oXGSkIQaLb/S9rF1p8DBOlebjdGahZKE8Brg4wtoZqy8OR+UE21FJCgi0nyuecr2HYtb9WhnTw+oI/svXTcH9q1+/Bn5dSvl79f//J/QX3h9F5CcA9c/3/4q271a3utWtbnWrW93qX2rdFNy/4lVK+UFEfiUif6OU8n8Afwv4R/W//wT4b+qf//P/55tZw3xniTthORjOP1kVRLAPFjdlfKeKBaZnefC0n/Spyj6Pqgj17TbNjwPilSWY66S/QrrVv7R5JavKa19m8qEBMdi5QvxdTTOpSp0dg07lNzqRTUan7+1VIcytwz2N6hHNBawSCKZXjuGb6ks7RvUZLsLT1HPwM//2/a+5tyN3dcy7kcRT2rHg+JPmR74Lj/ykMXxYjnw/3G+H7uOw5+71hWFoiaMyA4spqrIBbhSyL/ggjO9W36B6BefHFWyuzFT5pqevkPDldY8dIs1vXuoxbGneX8i9x71URXNS4sC672nfbslkxYC7BIoI/sMFUA+c+zQiOWOn9kocgN9KYzLjgjmP6l+tvlsxqhLZ+jSdO6UYiNNJcjssyBI14GNVcC8T8fVBFYdhoexb9eOWQln5tc5glkh4s1OV8WWGecEs4crlzXVKvW1U2X0eVHHedRoIARryUIoGQtjKxCxKZ/it5CeoaVAQdrIpC+ufJui0/Bq2EXequpsA+3/6Wa+Nd0emtw2p+UpB7YELm7pmF5BY1cRUSMcO+zxi5kj7XlWWeNcRe4tZ9DPUO60s3VSZl+YQSGNVoG0mt+r1pk/EVfGYDaUpjG8Lpz9ocJMqIwDtl/U6jIS7FndeSLuG5bj6tqG0tfsRDf1xwdQ0o3nS6el2F/DVk3d+2lEqv7kEA6ZgR8HM6kUGVbu7Lwl3Wf37CQmJ+NAx39v6ubpdw6lld5x5fHsCVLVdQfTOZobZK6M01dwtl0EM4VCZu5fauZj1c5Z7t6U8NZ9rAMpelb/m2XA5NbyfHK6NzMFvnH5jCvPsEGFL4YrGYCaz+eklwe59pv8QqlJqcUNQL3hdz3LrlEXtDLnzpNZw/JXyj8Ok3ZjwrgY92YxpEuOlpUwWM1jys3qdjRT8i6F9gu5TpvuSyI3gT18FPVSlXn3r2gWROeFLQX5SPaZRCIvj48cdu7cXUjIUr2SQWNOnjCmMY6O852yIs2V/P5GzbOpsSjW9zCf1bubCeWrxNjHV/UrekJKml8VgCaOHxVCKxeaVkcwWxJEb9bGuRIpUw4HsGBC0q/c1paJYrmz16vVfWeqS9RhrAE31KD/rOonTeYbTL1pOf10DN3zlvK5BBsYW7UxkISdPagrLvf5b9wX697OuQVmVSzPH3+byipC7RgM3xkhptNvoRu3iAaR3i95TlZRSklCyYP1X6v4xkl2DJIg7gxtged0p131LScyQM3ZN9jRKsJHCxkimdfQfM8+fG4I0zHeRpVPWcanEhrJY3D5sYWynaPhs9psfHGAcWiZpKFkIwRIXS6nkiS3B0RfMxVIeAunskGiQILiT4M/rnMY1wQzAVpJC+2nGftCuqdn32sldImQlBJlwTZ/8Xer2C+7vR/2XwP8gIg3wfwH/GarO/48i8p8Dfw78x/8Kt+9Wt7rVrW51q1vd6l9a3X7B/T2oUso/AP69f8E//a3f6X2s4elvQtxlTr9gy+A2M9UjY2m/GPrPSfl+sTC/1qft8PN+88HZ3pFawc6F9sNAuK8JJV4VFdl5Td9qqwJkBH+pPsvObZP4uXVkbzSicBTiTi9XO2siWRFof7hQOkfaeyTk7clenFHPFeA+vJDv96Sdx18yzXP1Hv3S0TzB+Y8K9+3EL/af8KJPvt8FTTYKxfJgBz7EI386/GsAPMeeU2y5azTadIgNc3DEZHi4G/hS9qTFQJORqnLFvlD6xJIESdB9FJZ70Sf7y+qPVApA2BvSH+/xp6S+3c5S2qqwtpYSVNkmlWvGtzFfeV713yXmzYtVRMi7Bjvo8UnHFjsspM7hZlXE097jztfHa8n61JyOLTIrNcOgykH5SlFJe4cpmhxnTyN516lSu/p0nbIZzahpYmZl4gZ9T732NKo3eYMbaw75XmMaV+9V6VolMjRGFQpjyDu/qc+6PZqEV6wyc8mq7JRde2WUTkpz8L/5glmOzPeaQjV5aJ6/oodQo5GnQvdRk7/cUJTDC+r/GzPO6uS+iYXmpJPC6/FxY1VSMvgfn4lvjsTXe1UkVg7ueSE3lriz7L8vXH6q3NP+R6HUtLxLbrGzqoju106T1QKEo8WOVR3pC9kq1rT/lDBRmbBmzkodgc0jut4v7ZeEZLh8a5Aav7bcW748P1LajL+bSclg/3nHfMiEuao+99d46d2fOeIVQY1dVkoJyFJY7j0mKrM5Vc91+6w/78+eMrYsD5lBYGks4dQibeL8Wd/U9VGT5WarXOfRsvvekhv9DIDYCcc/n8iNIRwdJhb8KWz7C+AvkWI9zVNhuDgwhZgb4uOCqV7nUqDfLTQucr50hFODBINfNHYZdF1sXhImVT60aHJi6vYbq9RegsbKNh32suAFciM8/tNC2Ok2XZ4bJMLw00zeZWQ2m58x9zXa9MXgz2CnQvuSMUtGssEOEftJ1e686yjVg7vur8mB1Dqas75P+4NjeW3AZYbPO8gw2wJFed0A5dXqEa2KZjRc3u/BZfKhqsXZEEfHYouqnOHKYJaXqky/qd7XyWJODj8o8zs11/WqeQY3ZtylJrB9xZG1z7quls6rEj4FXQ+sIKGyXb+650vra7eobLzcYuTqv0y6zhSjcx+l+vzFFI17BozLhGAVZvC+U5X+k8HOqtiDxjKHo8deNDWNVOoaFpE6r1IaT+k0LhlnNAUyZO7/LOAvus3j+5bpbSH2GROEXOkl6dlvMxvdpCQUOxfMomu5f1mwXy7bfudDT963OqNg68wEenxype6YMeLGwuGXlulVIS8eM3vim7RFRgOkjy1mVqXZzEL7BE//ZlwDKymDw4yGvKtybaje6MnQf9Crdjkq0SUuja53J2H3g/JuY10yx3ca09t+0X0DvYdza4nvtCMqlaYQ7zQt1U5RO8Hhur1/2bp5cG91q1vd6la3utWtbvV7VTcF91ZbFWeI+0J5CJQkJLem6RhVF53+9/zHju5zYfc+bNPixQjjK0dzyar8iZA6YX7Tq2cXQAzZQnOJLPf6tJ/9ytyt6uyUSJ2msqSqikmsk/1mVaos2QqpN9jQq3KbC8UbwqtdfZ+oKtWSSa80YcwETR7aJqB36jdd7h3Pc8cltYyp4aftE7a+6GM4MHvP++XInD29Xfhn5zfsXGBKug8/XO44XTr6fuEyNeQomNkgg2aW6/YIqVVPrKrR0H65qgPA5pn056wK1DkiKf82S3idpJ0i6diqX3VRr9/qeV1JEmbSJKC0azRZZ98Q+6+UD2cId5oPL3PATKk+KVdV1VvS6/01KclbVYaXuJEL0r7VHPRUMCGRD50SDOw13YmYkLq9K8PRnCbyUZVegNLodrcfBuJdd1VerXJtQdXrdf9UzYmURhPBViUYIxRf97GCa1fuLfarNLOcyTUx7/CrSjag21KdxleG4sAPBTdlHv6ZEgdMKKSqjgzvGvxFt7O5ZOykil6pyWWgXQp3ieTWsvz0gdxamg8X3a71ODuDOy8gDbuPBT9aYif4IRO7qpRnSzhCdoX2s6oeJhTcINsEd24EOxX8oEqVKswFNwTiTq9VO0aMU5VnpThQ4PyHGfmm+qqjUBaLzIYwelW33kT8cd5OqZNCmB05Oy5/VHAvlu6zcnC7z1WNS9V3m2xlAev5DDtH7Cu14BnGd1C6DKMlDE69xEE2/zAfevyiVAspes/YCfwJ/LkqplUpzo1OZzdPesyLFVz1lMuSaV4C7dFw+KVh+LaAFNLFUQ5VvTQwnFuG3FGy4J4c7lJ9ozUFsH3ONM8LZkmY54H8swe9/lMm1fvLGCHvW03s6/1Gb1kOV9/w/KDpaxiQRVQxf7JIBH++ckPtrPtdrPpW7XnWOYZFt9mUQikd8a6rXNRMue8JR4+vhILmybE8oobdCBKE0mvq1KrKyYvDLEJ8UMWVJOAzmKI+WqgJjY68T4jP8Owxi5AOmXKoDHKT9fVJNrKHqeez+bx27Ur9N4sb9VzZS0BiuXK4UY/rmmgF2hkxU1RfLlWt7BrMtKiaeZm1yxTS5ost1mImVV1lyRx+iPC/OeYHR21aMP4s6fUWlefsLoI/673RPl3XVVuZzpRSkx4h3u00aQuQy4TkTN41pP16zErt7tTjnA3+AtMrS+oK7SePG/W6Xr8PJBfsoteaP0fcJeg+7a7c8dJavcad+m/tEJVY9DW73Ap2yfiXwvRK/fHZowmS57omTILU89N91PUkt7D7c0cx1TfcV/pFsuQuQ5Z6TRbCoZ4sgfldwp0NdlSlNhyFuBPa56rcv0D3OdM+J7r3I6lSliTmLZkvN/aqws/KHA6PnXYn/ozfqW4K7q1udatb3epWt7rVrX6v6qbg3uq3qnkyTK1FFkP7SZ9/fLX9uItmyD/9jUxuDcV4/HjlUPoB3CWRveCG6k8rEA7rhD74k/p39em9smwzW4pJcap4FSs0TwvFG9zTpLzTyiNcHlpMKvgPC5Iy9mXeGKcrq3fLaZ+TTpd2SnAookoqQPOsikM4Wk3xKcKYPaFYzqny/ySxMwu91Sf0j8uBg5/W50FeAAAgAElEQVQ5h3ZTcE9Tqx40qczKYPBn9VGu/shwLDpJWmC51yf/YgV3ls2/aBeh/Vxq/rlA0SQkWnslEsRMahskKvtSSlHV2oA9L9s+r5683HpN3qnK2fpkbxdlDBYjLK963LlmmHujCjzqwZVKc5BxAVEyQmmcTmujKrHkrDnxh069tjVVKddzYscFM0zqjxPBDDOUgjlNqtQCsuj0tFIaNG2rOIMU9c9CpR7kXNUToXQNMs1Iaa4Ttquqa6tCGmLlg+Ztqro4C07VPTuETekxqXD+aSVRdJAaCHsDGNwEsVM1R/9OqQ5xp15wEwpmyaTeYkL5yg9dSDtHEUitx86Z5e2eIl/5c4t2H6ZXTlX8opPFy94wfHudPpYEpYPxG1X07CS4AcZv6zkdBUl67E0oLEcln6SuvW7PIpg5Eh5a/It6sIupHZPqFxejXsnSqFKTT16Vqslvk9emSeSoaYQS1Ds6vSrsv5dNYfL1esQKzY/jlUOcC7nRc+rGQnHKBjVNIr00yOsZ79Pmjyz3C/PkMM+O/BAwT564K7iL2ZjB8gyXb51SFJbC8uBJjdA+xa8m3FXlMqHBLJDuIu3jBMFtyr2RQr8LnJ96ZLTEo65X3XuDq2tdc1LVPty3yHFVEgu5s0p+gcrdniAmcEbXgrmw3MmmeMW7hHux5D4hs9m2AYHx3erpLBRn1bO/N1tSmsyJctSFI+9bzBiwo17LdgxbJ2F5rJxpW1XbVlOziin440xc3NXHvyukYBCf9Tzbgvvkicd0ZQD7SvAw6sGly5THCMFiviIS2C4qHaB6dPPqe63dMxNrd2NIuOdrEqGUgnnRwKH0+kj2mtRlz3NVYKOud/X1Zq5M6+Kv8xeXCbxTDz5A32w+3fCmUVX2pTC+E6Y363Gu1JPBKUtawA2Cv5TNV40R5SrHvHWx8s5XekpVQ2vnJHuLLJm8W9nbV8/p5VtheqtJiJJgfp1ZguBPQqsgAYpVCo9k/XwJaevkrdez/TIgx24jxZTGbWlnZlo7Y47UXgkg4SFT9lHV9fVenh2p1+/h6Y2qu6nR+3IlJMTHiERD8VlV/Sy4D55wlwhSlXJf4BBJwVNA1yLAn2FZaScven++/KFjenWgOWV23w16TOeqgouml7n3Sg6i8ZWqkDZgw1+2bgrurW51q1vd6la3utWtfq/qpuDe6loCy0NW5uMsV4pCAH8q2FDITshdIXbqr3H1qTTsDN2noBOqIiQvuDFtSU4AJauCggC9xY25+m8LuebFuyGx3HvcmDBLUqbioaU4Ia0T4K2oT3WO5NYR3uz0CTCXLc1r5e2a00i+212fqlNmOej7LAfD0183hLf65HgKHe+6E+fU8t34AMCrZuBT2PPgR5zJGPTp/msP7qv9wDg25KyMSNsn4s5hguBfrod3epsxs5D2WX1rrT69lzXdKAjhqNP0dio0nbD7UVmR/lzViZCwNUXH5qL+11I23xIAWf2zEjNmivpEf5npf1M4/YkSA0z1lc13qqZDgxvU57V5vKSmyMHGmtx8basnOKv/VkgYI5ghqAdt315ZjNYQ74+aLGQMpW+QYSbvWsz61B4SqXeYRXmx6/ZLzDBXmc47SJnSN6rmhASNoziDeR7q9hRNMAt5U4cpRZW0XlXD0jdKORAh7Rxx54gHy/MvLFExwcyv1JNXfMEMpqrvSr5YOxrtcyLsDLETpBjmO0NqBTcVapMAu+RNSZWEJgutvvXVY+oMoSo9YW9YjsqWDMdrKlg4FEyA7Iv6JLtCOBaaJ0Ouq3h6LJioiWzzvb0Kglnz3UGnlbMV7JyQXEid0dz7QUiH6rk/BopNiClYm8m9IUWDSMGstsI1PW0xIBD3GXsxpFaVVFDPYvYGu+hU96oqFnNdE7IT3AWSKeTF6sT+bMmmkCv3F1uwbSLdq7qc2wyuIOnKL06N4Eb1tPrL2oEQzHJN+MvWUGxmeGuZ3hb8/UxOBt9cp7ONycRoOT4ODE2HSCGllmJgfGO2bW4OBjdUqkGB1DtV2ioDPO48udWLyQSlvmSv2zi9qeelCMUV7MnqtZYsBQivEvZcz8VFiFVd82NNk6z7vKYArhzW3FiyN7jtvpMrZWIP7SdD+tmwsV9zFowJxHj1pqcklKhrguki6dukanpNsDM+U1zGre+R6nVhNb1qfV8pQnecOWehjE7XmHLlHqcO/G+0Uzd9s9N5g5CQELaZCYkZWmWKy6jJjunQIgXM2h3IqFq5Ul2mZVvvV+W11G6OGRfkdcfyyjPfC+FYyIfVu18gGoorFJ+JLQzOKD1lxbpfdCYiN1bXXcCeZ+JDzwqRXbtbZomE+27zzob7htjXDuQdzK8TxRZY134pxDvLUpno3UdTu42Ow/eiTN19o/tRlen4eq8e5Z3XZMgxICkh87J1tGSYkXjg8jMh3GU4Bhgd2EL7o95f2St3t+4lElSxRcD0+vfW6v0pgLiMMYVwb7D3C8ZcddXw3KravwjhTr+n4g71YAPnP4L+R12bYgK7GM5/vFeP8pN+ln+akDHqeh8TxIT78KKdud+xbgrurW51q1vd6la3utWtfq/qpuDeaqsigv/phflLh1kcNZ6d1Kq3tf+gDEF30mQVEwrToz6Ztc+qzuRGk87C3pK9TnCuilV2gp0cqVOFIDv1NOFk8wgud47UCFIK/qX6Dzv1S65qjRtzfY+GuNeJc5PAn9P2WdAiIWPr5L9ZEtnDy5/0nP9Ad2x+VUid+t+m6IjFMCZPNsJDZdz+vPvMr6ZXXGLLgx9wJvEPP/2Un+xfSGu6T9Yc9mnyfPvqhR8+321JVOM3q4pQKK8W8pdGn9wRTICyS5h2pRYYFgETHM2zkGZYHhxuzJsylDuPGQJ2DuTWEx9azKITqPFQub9VXciNVU9rQTm5lwk3VlVpUQW8fc466V4K9vOFfOw25TPd9+pfNgL7jnjfYqd49fgCyTfK3c1Fk5taC1HUo1bV3nRoVVkthfjQIalgnPmt15TG4Z8nJT8EfXovVsidww41iao1mgR2nkm7Rp/Oo/p/aVXNylaVWQFK61S9XpXAylGUUqBxxH275ZzH1hD3sKyZ6U3BXgwyXnPm/UnV0FBVXruoYmsixFbP98qAze3qKTTsvxuJO49/mcne4r9EzHkkP9Q3KoVs1Y9erHD5xmOXQux1An2teJ8pXSLtDHQZuVjlK9fdi3eJeHZ67TmhfU7YWYkTq8cRAfd5pLTKIS6mw43amVkl31LAN5Fl8nRdYFzspuZu5YX00qhEsgvI50aVrsKmzsa9pX0KZGcoVpViciEcPdPjV0QLUc4mbWL/amSZHV2/0N2rVP756YB1iRwMYgsEQ0G9pHNdf0zU1DjzVSKW5ILMSX2SgF2UoWxiIRwKkiz3dxeO7cKcarJasjyfepwTjocRYzJfFu2O5CqYhqPgJvXYm1QwU8LErL7qj8qm5d0dxQip+o7jTkk0YSdbumGxhXQo6o31er2lY0b2kVRTrWS2NJ8My71ga5qaxEJ57DaFUkKG1mLHgAmZ+NhjzwvZmY25S4bx55E3+4lX/cAYPUuyWJPJdWFtra5DH897GhdpqiLrTCbWtW5Y9Fi2LjEFR0qG+/3ItHhcvT5yga4m3pUinMMOFrv5WgGS1+uj+7ioclvAXGZll6+i5hKx1d+f7/fqsW3dNl8BlSRgGnLjsKdZ6S2lUFr//+hoYTpMKsROGN9UP+1YX3MfoInk7MCAe7aaPun0ewpgufd0HybmVy3m0CiftVTiwctUL45IftiT6vpYRPnecWc2csjyUNQLnQXfB1IyiEDxmbS2SD7W+ZehaBfosaVY9enG6vs2c9L1cj0WnSO7ZiPT6PUTsUvW73JbKKNDukSZDctjXeu8zoQQhbTL2j0oot2UlRMsCddGwlnTzOxuwRyUthRO2hkz+4hMVn26aLeJAslCrtd8OiTG4pACYa9UIbMI7SlvvwMUEfKx3XjIedfCrsU+nfld66bg3upWt7rVrW51q1vd6veqbgrurbaSXIjBKeMuw1Qnef2Lof9xfQ08/iPwgybDmK+4fSu3NjWG5kXpBXbKWxoTDvXStkZVydWHZ9kMiXbOtE9powQA6m/KRYkCVEXXGMySaELG7CxuSMS9w1TmbrZCBR9sT/Kpd5x+bhj+WP/B3y2YIqTBcRo7fjB35CI0Jm0K7nfzA9+Pd/zh/gu/HB+xUlii5fO052Wqns4ipIvD7iNGCvvdzOnQqrc2r8pZoekieWqhqG+yGFRBqApud5gZl57slFbh5lL3Hexlue7LqlJmpSBQlT9Tj4+EpP7Hxio/2Ip6Ic9C80Xfx0yR3DmapwV3monHlvRqXxmzerzcadan6c5hlog7C6ZOapeq6NgxKFnBmzrF7JBxofS/rSQQE/nQX+kW61SwXb2GM7lrdEq4cUpBWNac95V+YLCniWIt9jLrtnoLyWzJYWvCkUxB2Zd9e+X3rsvd6terioGdE3Yx7L4vuMofLc7gT+qXa15QP9qiPlgb9OeGd6ru2qVcfXqTKkSh/ypVqK3T9SLYYcGcRkqvzE6A3HrsGIn7FndJVSVWQoKpk8jT64LMgpn0/mQ0kEXV2qqO+C8Wf9G0teakym32lfKwTl6POoUuMSPeYEImdZb2s4BUH7CB4hNlMWvYGjkY8uC261l2EVym+cETD4b+N0o0cJcr79MsBQkZfwmqZIpgUqb7OCFFzcXToyE75dEal0nJEBfLYh0hVDJKgeWpBa8KlClgLhazXKfyi1FKgT8n5lctdlF/rEkL9rwqVZYs+jPNk2E6OD6f7/kMSFtfU6fLw8VjXhz5IW5K+XYvA8udTrybJbM8NJhUsEMkPaoqH+4a/Mui5IbzTGd03TsIYPR6HsSSXaG01U9s9V7nSwN1qt+MV7926sCc0VSw1tB+mrZrDGPInccOC7l1G2PZTV+lys2Gj7+558tuTxotMlrkcdn8o1LT3NJLw/xsyT+dKF8a5HGhvFT5+hjg5Dn5jBksZhZ+3O3hPmBc7fyM1+uEAvhM8YX2g926gnYuxN6w3Hv8RVSF7j3Tux39b1S5T3ed+vhF5zRYAma2ui5UGkw+dtucgZRCfqhKb1V+AVVXK9lFYmG+F4Y/jNi7gFm7FnV7u3cD40tHvAcTrLKX39ddsarIZ2+UOGM0ZUuWuHWHcBZznjCD0c7Qsa0+84Sp64Y/OcziMEmYZ0PZR+TicINQRXNSqx2j2Gl3UwCZteuW1lmCmvK5koKKtRsPfCNIAO4SaD+3LA+iXYgOvS7HmrK5C+qlzygXuSlK9ZjNVY1dDLlRbjKLISx1OCCzpa8l0XONaCJkbq73yzqLUqzBTnq/uqnQnArdp6h867Xz2NW5itZXrm/92V3H71o3BfdWt7rVrW51q1vd6la/V3VTcG91rQL+n/TkV5nsoP+hqj6zPm2l+kTWDJn+x5l48PhnVeni3mkqTc0Wl1KIndVp/6VOnC7qZ+o+LvgfX1SFHCby/YFSOYIUnRQuzlC8wYwRf5oIbw6Emn6WWoMdVaG0c8KfArm1G1MXdHJZVWWHnSLx0JBanfiWqPvV9wvWZF7oGS8Nv5k8H097/vj1Z16CPi3+4f4LRgr//PyaH4cDf3L/iXFWYsJU/Wje68SpSOGHz3f4JlLaTHM/09QJ7RgtYXHkbxdKFOTiiIeqHjypOpLeJjCF5kVITeWuPlfV+qBP7fZclcs6Ob1O6abe456qD8yKTvoOQfPJq1KZjp0yiKmUiSUSjy3xrsN/PCOngfTt4+btSzuP/80TZtdt5yV3rqqvqyqm2e8rZ1ZVk0YJBfe9flZVk8wYKMHoRH1IyLRQ7lXxkmGCQ1cVtgKpkHeNJjZ9peCaUyA/Nuo9iwkRwZxOW3qZhKierZSUfbkEzFg5kVUBKFUdtqjiq2k5HsnX1KLUCcs9uAH1iAos99rZWO+DeCi4i+AGJRc0L0Ax+KHQ1ong7A1h72ieA8t9g50d+bXyS83qq7aGuFu7DAZ/LrgB4l6u09lO+cip1+nw5kkV5uwg1e7Hmuzlx8rOjNd7YU1ayv16nxZSZ8mNwV0ybrTM6zqQYTk3+MPCPHvEgHGZvCj3FqpntkC4z0gUlnvdPlsTmYDqKTZkr10XEwslClg2Xmzce+Je1VHXJEQKrkm/5fc9HCYu0pHOOlGf9wn75Cr3d91HuPs/T1z+6IBkTQG055nS+q0TJLlw+YOe8a2wPFQSw2gpu7j5j1+9OxGrH9e81W7N0nm6P1g4f9Lz1v/zhvGN0H8yTI9dJTckjDfM+3rNJ117ViJIscLwjUeKdgVAPdMPP3lhmBpSMuRoKIND+oCt3NmcGxYPciiYYGheMqkx+FPUextVxXKr9JVuUTpGPjS408Lp57q2LI8J83ohnTz5qVHF7HHB+URKdXK/CwznFukjMav6VY4RY1B2KmBdpjwsqugXsN8seFMwprBMq9fdYA6Bpo0ss/5dc1iYSo+9rOdCyF+AYgl7g10K3Ueh/XJl4mZvEasdCP/ji3rWU6kJiJW+MkVknDFWdOJeKpd4Tpvyx0qDySvVQ5P3yl3YwvJyVmbvODqVItsEYmlehPalkjF6gyRH9oI/LTrjsESlWdT0tVK5vaAsdhMzCWWSL8f6fTrpNqW2KA9YAFc2jy1owl/zUpSiUjSZ0z1PFGfofqXr/ErRSMe2Mp4Dcpkou1ZnKdCZDUph9yER7qzylZ88/izatQHOnaX0GXty5DYjWdcb/2LIde1NO+0ImYul1GuzNBms/pseIE3wtJNs6XXzq0zzxXD4Xl9z+caQOu2imEU52G5QBdpUjrEZFsKbg86COIfktSP5u/+6elNwb3WrW93qVre61a1u9XtVNwX3Vlup4qIcwFIM86P+vYkQe536R1QxkFLTpWJNq5o08SccHMWq9xYRUme25LDUCnYppLYhHF+przEU3OmaJpVaQ24Nkio/8tAguRAO1/z65IXmSRXaYmoalRHcnDYfj5nj9jQtIcGxITdKYjA1Ec1IYdcEnUoHvj2euISGOTlCVXH+WXqDt4lchMdu5Gnpud+PfDntWGo++5RaCEK8eJq7meGpB1/Y95smRggWpCA2Y5tClOr3q/YyUB+YTJblWHBWeatmzoSjw/1SPcFp32DDrLHpvcfETG5VpV7TfdJeFdTSFdynM/nYkQ4N9ryQ2quPyQwLLl8V1nJ/wJwmSluXhQLFO3Lv9TifajJT4yltZYK2TtPLpqBP4cOi6WmXCRuuSV153yqrc9dgn0c9N7B5Z+ObI/PrDjclTSeSytSNGdI6Vi2k+z0yJ1X+972qzSnp9DRQ2gZzHsFa9SXeKX/VvAw6oV23R0KiNE49yiiBY3owxF31vL4tyof0yiH1p6sqkZvVmKr3Rve5kKpF0Q+Fwy9H/J/9oOfiJ280be6hq9ntQtxZ2s/zdt5No0pxMTC99vRfEt2HhQ//zu4rBqdOdEtQagMZYq+Z8dv9m9Z7TNUmOxWaJ2WE5r76WUWwU8JOETsWwnFH7up91a8bhLJtJ1+9scp+RoDKs5R9pASDOXtym4nHgj9Z4k4Y31QP92gIR8vu+0k/cwx6XRrZzrt/URJCrt0MazPeJS5Du/nycxMRqZPeueAPC0EKIXua764UheHne02MmrKuQ05Yjld2pqRC8rA8FtzPBpbBYx4W+t28qZgxWcbJ433i2M9chpY4Os6jwz6tHm5wI0z3huZScGPGhLKlKOr2FExQj7zeI1UZs2wcW3wmFSFMTtfVi1Mv7mTXkKlKxNA/2y8FNxeal0jzcSDX+zTtPRKrElYKxRhlSsesTGXAvF4QKTpBX33GTROxVpmmANPY4GrHKWShFFHucBRcW9sBUjAC1iWa+7h5WMex2Sbu3d2CmEwMVtPMkiGYoql3dUnwJ1Uyi9V7xp+ieltLwb6M9bMq37d3m7e2eIuZ/yI5YKcdP2tV9RPR7t+ldrSc1XmF1mOnSP/RIdHwkruNgU40SBL8k8GOwvgHeo8Vc03hap/1HEsuTG+7qnTKbyexVVqMzIl012qHyxm9J+rt5SaYX2uHJTdFyQNtImWINfmtGL2P1+9BM0fiQ6/K9VdKZu78dj0VZxDvKhN5Zdwa7BTrdisdhiTEfWF5W49j9XvnPiNBaD8bzIJ2VupH+RdDuA+UUVnBSEHapN7lNT30WdnvEvXWDcdKXRqvDGl/KZRROy7tSf262Rv8adnS4Iq3uC+afJh6j63XNO5312NvCu6tbnWrW93qVre61a1+r+qm4N5qq+KqN80W8i5dPV6LYKJOdacO4k4Io6qxy71eQmav/ltZYH5wSKPcy+Tl6iNdrklA84PVfPbKDTXVp7vcO1Ir6jXrLO0X9Zg2LwE3XlVeNwTsy4SMM3nfY6xBCuSVq+oM5ulCOfQUY/BfJuJuj525ZmS7xF97+MA//vwNzxf1zj10I09TvxESnuk4djPeJkKyzNGxRIu1mTJVykNlDJKE5VOHvVjSmwVnM/vmShKwptC6yBgcea/bOc6euXIE4+cOd1YPk0kQDsL5Zw3tKTP+XI17Jijzllw9yFMEZyhGiI+6D837M7n15N4hjSetiUfGbL5PqWSKNd9cUlJ1+DRd6QJj0DSZ+vrSOPJ9Xz3AbPtuXyZImbxrdeI3JUrjiQ+qFvsvI6lz+MuM++4z+dVRc9z3/aYsrnQFO8YtwazI6qtbFYJB1eD7Hak9KNv3eaCcL9tkutwfkPozK30BIB+6Td2Xy0R63KuHLhfm15pUtfuUuLg1CU8IbyMxCubVwlR90rLIpqr6k/KcUwPN+TqtPn3TMr/+Iz2nO4MJqrqlxuCmRP/jqGlHNS8+vN4jUoh75UNe3lmG1z1mKYSqHmnyXcEu+hnFQXKqKK9ebrJ6UiWDnYXSg2SvanG+8nTNECidw4yB5llVnB//fYO8U8Vr1wbGS4tIoe1DTfMySBtJ9d7pusCyOEKbKJP6cefXSnWwy6qqFpajoXmpCuN5ITy02CGQajcmN9XDV1XElAzD6LWbUf2+06ceScpzLrtEODUQtfsRNPQKd4GXnzt2HzLNc9RruBS69yPxoOeuOCF2Quoy8dIgpuAbVSHH2W+f730iLI73p44yWexFqSf2K6KBJtIp9UV53oLIlQFsloT7dCG8PdTJd48fMuPr6zXZfO8Znu7hzUK5uMrBTUg0SFXKu/eVOR4Lh+8jzfOC/+GZ8JOHqw81F0xISPWamzkqQcUZ5tf8VonLYIQSHMYUShHCn6sPPh0y3buFefKIsPlzSzTEqs6KaKJVc1iI0eJcom8C8+IwTe0EAYd+4fnLHvPRkw6ZfOqUflEV3OUBui8Ff8k0TwFXuzq584Sf3gNgXxbs0wk57kh3Da7SYWRctlSrjaSyMoHnhOSMRCW5ANvri6zdvo7sNX3PdCtTN5HPntQVTVszRZO4LtfkweWg3mEbCiYW7QyNSlFYl8NiDBiQJWCCZzl4shfCTrZ7Oe4gPNbPzdQunvrtw8PVl78MQv+xrgtJ11z5ig5RKuM87BzuEtXzHZWoYS9r97DV73HRe8S9GMLrSLEFu6sKbiUJlTYDhultwp8MqWFLMw0PWa/R14se6oImD4JiVwD5dqJ83yFFiLuCPxmKKcyvC0sVyk0Uug/KgJ6swc5gZ4NZrsmLxRql6NSUToz+LmFPE1cC8l+ubgrurW51q1vd6la3utWtfq/qpuDe6loZ3GAI+6RP+lNVQ0X9UqmrnqSjKkR5umbKI1LzpAPNs3oNTSxwtJvap/4kKI3Q1OQSqfzI1QvphoydVZ0xc50k7j32POPqlGXuGk3wGgzsOpZ3X2W+V0qAvcyUXbt5PeOxZTla4h7SO1XOvE18mvecp5bpqeM7e0/rA0bY8tnD4rhcOh7vL8RkCMkyXhpck7aJcpkt7QfL9C7SfLJ18lU4DR2l7vwcHI1LzLHFSqH1kcvcEGaHDFUdiYIdBTtD/7EQ9tV3+ZX6piekUBpH83lUWsIYKN7iPw56fGpKlUE9anYMZK/nQapSLktEQlRCwcpODF+TEFAl1hlM9cSVxmOGBZkCpU4NSy6qlraaAJT2De7jifT6iKt58TLOuBejSqyzymjsG6SqTaBc3v67iDlPlNZfCQopbSoyrqowXy6UviUdW8quhe7dlr1uni+q3LaNXoPnWfcTKF1VdGLCXhYk131IhdgZpkfD6Rf6UeEhQqNe9DQ43MXgBsEs13QxSdU7a6n8ZyF5Id9Z3LjCGwv+kur0dlbFt7WwcyxvVXHX6Wz9M3l933Cnn5WrSJJa9S8WgXjIFDGkPtN+suqNR4kG6tUVii20T4n208Ty0OKGepznqHn1Q1X7QmZ+bNQLVxWZYdCs+u64kJIhzKrQir2mmQ1DS46GpgvMswVXqndeiN16utRHGPaVUdrs8S/LNWEK5X2aWUh7CLMjuazvuwsbg5epVd9fQe+VAraej1VhMomq3iaa756I7+42JXOlgpTqmZYklCjYQyRFw4Ijh6vWY62me7k2EhZDbgtmli2Fy5+VKuMv6k2WXPBDJPZuU/v8UyQ97rZrwISEZK8c2LpfdhH8Rbgc9fhRp+lZ9JgAzI8Ff9ZrYUujmxdl39ZulX9/3uYNSqtT85IKcW83kkD+3MB9oERDd5wJLrPMnhwF3tYu0+gYhwYRsE0iXDxuF4lZNoXStokc9ZpYubdPs6PvwnZtxGg5XzpMk0j3ylcth0gpENZOS9JUNjvpmp0OLcUIJmZi9WK6nKFV4oodIhKSekv7RpVSwD6dKbsOebnovreNzgDEtKm6OIvMC1j9PhhfV699kY1/W4JRT/QecBmiASnY5XpdNJeCH/QeLgJujNjPZ8quReo6JilvMwzK49U1x01sJz7uBTMa8i4hxVCkbB2M67WhPGs3Ztwlqoe7Um/WdbU0tnpsExhRbnkldrDyxWMm3DWk/5u9N/e5LFvTvH7vGjMrQzgAACAASURBVPZwhm+MyMi8N2/dGhpR0Ng4OICwoIWFBUIYSLhIqAVqDwcDC7CQWsLAQ8LiP2gTo9oFhKq7uqry3syMiG8435n23mvCeNfeJxK1WnUxqlTJeaW8ETfifOfsYa11Yj/reX9PI5y/0rVDD+5iBc9VkfXbiZB0NzE7SDeRcrrMVX8/ECeHsYU01vdv0jJ3BIj3Eb9v6D8KqQV3NMR1xtYdCTtAuNFeAJOg/1gY7i0mlIWLb1ImrRvMlLBjQkb9vpHxwvb9q9ZVwb3Wta51rWtd61rXutbPqq4K7rWWUr6mEG4MZTK4quD6neCO6u2TXEit4M6w+jEs6SzTnatd3Fb5rIcAVmhei/JnQZ9OhcX3JwXsKWoHbH2JP0XltlrBnjSRhqz+z7ht62c1uLN2wZdSMFPSrtkxYmbrkTFQPTxzotP+W8PwTcTXjuAfn2/4Pt4hnxtsgXPXcKZBbNE0nnrMpks8v6y1S7gAwTB9kdYjk/I/7dkwvU9IH2Gy5C+Sj0KwnHY9rouUIuxRT7J99viantW8qedJoqZB9Z/V62XHdOGkiuH0qy3+GDUDvV4vSRFzmvmIHaW1yg5sPeYwaWP8FImVTVucWVK+ZIqQ8sWPO/vZYtaEnpoQZM4BhqjEgjkNbP6Z86T3aWX08wXM/MQ96uen2zX57l6fzH94IVdqA6g6LEMgPW6Rc1jSicz+RGkvvEdzmEi3a+zrAff0Cn2nim315JXGq8dQRI/rNFK8Q07DolgQIvK6R+SG8es1uRFSI2QvyqgE5MkuDEgToP9R/W7+UJbO9NTXMXsoZKu56to1fEnzsmPCDJoZb4bKd0TnyUwFmTuzVfU12FDUy/soS168ifUzm6Lqjy+4k6HYQv+D/t35a3Anod0VVZIbfX93jOqnBr2Xq0a9i8aQOsvxG1WB/W+qwp1UlR5XntxnzDqQzw45Xrr7803E7BxT6+k+aiKXHVWx3f6lHnP3EmleRsbHjvHW4vfKZY4rS/Oqx7P9S2XxhuhJ34zIdz3GFaakRBE9d6CI7hpZlu5yf1CCBUD/lLCj+hSHP3ggNYb2WVm089qSvcGfC6vvDcdfq/80Do4QZCE2mDYSJsdqNRKjJa8jqTjM3uGPdWxkveftWyKsLdmCCco6nneQUu+JG4+ZMnZM7H+9UsLFCOvf6DEffqXd7M0nR+wLaZtUBjPgD9WvuS6kVn2gR2OR3DA+fEuzD8tuDJtO1zqZt8oEExLD3QWx0bwYQmgQYAD1H/eBnJwSKlCFOw8WadQYKj4TR4vYjO90zZzeWk25ugnkYBgni/GZmBLT7GMOBtdGcrCsH08cX3uQgmsT+W2mecBwL9jR0n2eiBtP+/FEvG0XL2a86ShO57I9R+h93XlKF/Vx3VN6jwkRGSdNOXzdk+9vFs89Sb8HZr+uFFUSm50Qo4753BVk0jlsd5bclkufQbn8qgzchNtPFGdIDxuKNZh516uq6JIKMkX824SJqtwPDzUpcFOQqGNObiZKUKXdDmb5/rJnqTumQuotYevofjwp47judsZtQ/aG5vWnHnsZ07L2xr4ntUbZz4+J0iduHo/zV/FSU+uU5w7EYIlA/3gmbCuNIQtx8IjLiBRsHylZFXCzrp91mHfEYLxTnrs7gWSzXMOwKRSvVJhSVClud3qt41bPKzuHP2bap0FZ+3Xn9sueir9qXRXca13rWte61rWuda1r/azqquBea6nsjHaQNhk5X7h9kmF8oGa/C91zwU5FeZ4v+vTW/zCQqgctdUKx6pd057R0TEuE5nUibDXRJwsU62mezkiqT/bWqK90Sky36qMsVvDHiDnrk1yzDwsrtRjBHkNld8riFw3f3GhClTXL0607gzkZYlKToBkEF7Tzu5jCGI0elPDTLPUCJRnt3M6CBEGCWbrF3UHVvXQfsV3E+8S480yx46mycs2Thz4ThwaJQlknzN5hT0L7oh/lj0WVtGNWP/Mu4J+OxPsVxl8IFKkVinWYWMjO4OfksXZdb5h2oKZVg3s9UazFvJ0pq5bU16f/9Qr/NjHdNnQfT5i3SOk9cg5LqlwRwZSiSUFvZ/K6g94rb3GcI6RqJ/MXnlyZAsaKcikBhgFpG1XZp4Q5jpRVp59X/bGSM3hNScsrr8zdJErBmCkKpwmZgnYS9y1S77/sj+Dq+LndwBSgrX7h83jx8OYLTxeoOwwjdnBIbHBjoXu+MJunrY4FO5a6E6CeWH/U6736WHBjJnkdo91zIjdSfeQ1zWhMjI8t0GDPHamt+fGpLKl/uZ0TjjLT1hBboVjltc4pQfJW1Y1VpvlskWS047uH0zf1/veF5kUIK+ifMsXA+esVJhbc573+/Kavn+mY7tX3qKqiJrUBnD9Un6Gb1WK7KKjhsd6vk8WOgnsx+IP6WlMD2+8y/edZLWbhhbavSpEwoyqtofJp7ajqTbMT8rkjbGuEVjCUVb0+opxQO3j8XipnG9qXQv+kr3HHhDsFyOrrXljQhkVVRSC2wvBYr9dodU4nof9ex8h01DVgf+fVv5oEezJIhFCnV/tSyF6UGpNUpW/+8oWy6UkPen1NKpryKBC2DW7ISDFMX6TTta8w3ilfPFQShnlVjvjsnW0/q5dxZh+HtVGKxx7crrKxb2Ymq0POgXSryqc/Fda/rZzar1QxjOt6bydD8lY5t4vyJso2LZrqZV8duSmUPjGFSmKRon7oyeh3hEBaJc5PG5qXurb8IpBsIR89p7dG2dHbSBwcTSVRtK/13jiUcb4yxNUGe874Q01NO07ETYM7B9KmITkDa485x4W6YnZHylT5yt5pT4F3P6UN9B5zDuRVo98pRr/PirAwrWUScqdc1mREz9Fn0tGTuurTNTCtDf4tLezx1NrKt76kytmXo64XtQck9pZihOO3elPTbYIoSB8xtuDbURXSs1083O6k47xYmLb6GXHbYoe4kGfaHw7Eu57cWtyTjgUTJv3s9sJ/bl4mJHnMoKrw/q1XFvJ46f0oTSZtp4V9TJcYTg2lvgZbICqjWW4mcjQYW/TXp/odZwt5k5TEVHTcxpXuQCzrWBTyKpGdxZ20x8SdM3bM+H2lgkQlBBEz9hhI6wb/25ergnuta13rWte61rWuda1rXRXca11KYHyXEJ9htPQf9WmyeS1Mt4I7ayezOxWat7QoUaC+2GJFKQrVd+fGhD3Hn3hwc2OJK0OzT/jjJZFmSacxBok1iScVZEjk1uI/HZfs7WwNuIKh+kZbq0/QpSCrmrAWMuZUu/jHiPWOdylTbM/5q0pnaFSRM6IsU5LAZKBPF4NSNOSTw5wsbhDiTfVEBlm8fdrhLpDhZnOmcYlPcY07GeRJnzrtIIymeqtWBY5WM7tXBZ4vSTnJVwV39tfFhP/tC25+eq1qZHhc4/YjaeU1n33lsbuhnq+ety2F6cMW/3JmzmOflcX5fjSvU0386rVL1ZlLt2pVOot3yHlEOk8xFkmF8FAZt7tRqQhZ+YtyGrVr+TxdlNP7W32PVMkZL2+U+xvlFK+rmv6yp3Qt5jToU3cpsO5Vja4qLykhgyZzYQ35ZkWxFll36tsG5ScGUU5w12BC1I5iZ5FjTUgqBRolTcgUMeegaXk3zaKG58bQvglmKrTPI29/0CtpYO5kRz3k3aeJ0zctEgphbchOFTr/VjudRcdFXF8U+EW1fKqGX6OKarhp6D9HwtrWsWWIq9qRHTW1LJ4dpja9u5OOwTmBTA7K6HXzaVrtSi8C0y/v9PKcwsL/9W+BcONpd4nkLamKPvYkhAdViqeHqjYFIfVF/ZdA+8nWznAwozJhH/+PwPmdW9jYkgvZCd1T0HWgc+o1fj5R3qscWoyh2es8nCkteZWQyWA/1p0EqyxuOwj+AHENzb6qmrXsmLQLPyTM24l4o+Oq/XzGvKh6Pf3+OwCaV89ohWSNeiG3gfPv1/k2GdqPDhMc/iAM7zObvzCq+C19AtS1zODOSk+IH25JrV2UxdIYisiy9vlDwsTCeOMuaXnvilJSitC8GtKg9Itil+Z7JZN0RT2LdTp1zwH//duS3rekNZLA1rTGOs/n3Ya413Mpbt46g/zaIDcT+FzvhVAOXq99UNbyTLiQucsfKKIqMNtIGQ2MSs+YZrZrQekEGexBk6/koD0aMx3CnmHzQ8KdMnFlcEPGnpXx7X+rW1rFO0xT/alH5aHPiZczUWJe9y/IDSBlfV1dE0xQhZeYGd553v4Q+OWJrgvYulOXs2BtxtrMOHqmXau7dfnytsOjYfPbxHjvyE7Y/rl2+RMz5jTjPAQ5nJTdOq9tIZN6w+bPK/v8K0GycrrtUPnWQaD9wkeb9RolL7QvCZOUeALatzJfA7sfSJu2rnuVhjOGRcGd53v3VBjeC/LZE+8S7s2S5s8zBf/sCEmQyWji3Pz95mcMB4tPPb+0FFvIU2VeV9N082qIkxI3zKiJZhKF7Artp0s/Q/bKdnbnmmxmK9+97tBOjx12SKSbZvnez3cbzO7I71pXBfda17rWta51rWtd61o/q7oquNdaSlKh+9Eh2UGG9feVa9hqDro/q/fWjpn2eVR1quZQt8/j4sssraV5C6rYhUT3qSoEVrBPB+x4A7moh0xUbZv9mvFO88JtZbsSsyZhbdqFB2umtPietBtcsLtBO2QPKl/ZmJXjaivntXXKUkzawQkg+5lhqr8SzZLLvXhwrapWpSnkWfw0gGgaC6CdvkZ9iV9tDhymlrxJtH/hFwWgiP6MFPX++oOhuNpNWh8zhzvBHwupU59mcQZ702NOE+FxfblRpWgq07YlbB2rzweKN6RKmXBZ6QelcepRrZnsZpguj7TGIFMkt4649rhjAFMoIsgXhARiIm96pG2WruRiNXEMNF1Mor5uObYQkPVKCQxA6dtlfMl5UuJBqp7bc1U6NyvkNCw+q7JqlXVpZVHiZ+WVUtSDOwRYGU0q+jK1qKvqbCoLe/dLhaesOnBK+8id+sHNOVJuG3J3UVpjKzgB93rGnzpVEqXQvuh5pc7y9Hd7mn1heDBkr/5MxNB9oa65Iak3ORXap4Ak9U5Lmj24DXHlGB4sydfMeKuK/kwJAIi9UgTiWlVgO0LsYKhz0A3Q7C+vT412MWuWfZ2b3mKPQceCCKl3pF65rrMqFrfqtzMBii9INJQ2a7pY5VliNBkpbFRRtYOmtrlzXsgqZsq4VDCT7qZIzJUNCk1lNp/f3eo5vM+UtlB8hiyUNhOrB1dOFrmbyKeO2F/4wMdfivqDAcme5mViuu/wlQXa/OaVdL+Grc4de4rIlDHJk24Tbh0owIeHNxqrnxWS5fiLhpAspYAMDaehoxjo6rpx/EZY/VA0sTFZVSF79Uk23z0DMH37gH87YzuvlBcr2M7TbC3TzcXTicB0o97P3GYkCf6t9kKg15Wi42H2Z5ox6a7Guc65dVvnpdUdmFSw+xF31y682Fn9XX+raraRQuMS1mRC0teMwTNNlhQsaVSaQk5Cu76kMQJ4rz7dvtGdnrEyw3Ouc6eqoWUjhEdLSgaeWyQI41d6zN2T5Xxv4NHgjwV3LDSflWVbVqp8pm27pC3KVMidQ1IhrZtFrYzvb/R7JETKutM/n1nb865XysTHNeYclSt7toxnR7B5WRZyNoTJUZJQJos52spZlkUFl6xe72JVAZeQyJ3HvZ3JdefQjIFyf6NrryjJxB0jkqG5qV77CUB93Iri1gRCMwnt66w6FyRDt1PPuj1H3O5M/sJbm1eNfo9+2iNToLQNed0hzizfg+lhQ1p7bv5iYHjsiT34oyOuysJ1VkpHof+NY/Vj4eVf0WMprujOJkAR3KulWKUkSNHek7gt+Le6bvTaQ2KirmFpVWifhfZFlsTS1SfdyYi97nbZqdB+GjDDRXV2x4gZ5l27jDmOpG2nbOPfsa4K7rWuda1rXeta17rWtX5WdVVwr7WUGRN+r6pB9he/YbdTH58di9IMpowZKo9wZq9ue/Kqwb4NlJyR06hPlY1XhQ8opzPkgu2bpUuYmCjtRcnznyKkRFm1mFNNrikFe5wWFU6mykg9BwgRY4x28Tuz8FA1xssgNQQ7d8pHbV8zqap0432hOPUcmknTZSjqGSt99TutIrkxyKvXnPguIU8NqS2LBzB1+rTtP5wpRRijw392tcO6nmYPucuUkyo9qa08wL0w3lffp6vKaYZ2rB23nVM6xDizAA3u0574bqtPuQbi+y1UHjBAXrekbo1EffJPDxvsYax+1tmnNep1zwX/fIIMMoxI21xoAwDGYI6DKgOnqDnnuzekqmJl1cJpVKJB1yjBYN3D4aTdzQBdo17ZqiTrQWZkuPAb8/0GvKuJNcr2NdNEXreX+34eKV2jKU5QfbXDxfuLppTN+eWECax6b0vbXJTk0wDRKmkha5f18M2K6Qt1TT2h1YO+23J6b7ATjLfCtK2paUFVOJOge86kRuifUk2Rqj7UpGqi3wdSP7OVi3KIv8iUd8dI+2rIjXrXml1gfPDILIw7iJ2+3h80RUuKUk0WvyY1XW0ouHNBcsYdVDGed1rscVLCgFXiiJkSJhhOH3Tcg6q2xRbGB5DRYAchPiiPevamT3eZ8V2hebK4k6pAuz+wbL7LS2pa9kY9wE4IDyvcMVCsJd34ZayOd8qCzbXLmur/lzZh3ZyMJbi/7MgWwjeJ4gruzWIHGN5VNVQcfO1Yf0xMt+rfL+Ze2car2uUdlN7QvBbkbOjfj3ibyEU4TnoRQ7SkIsr6NAWzGjk8OtyzI86bKEXnqImFcav+0fbT6SdeULcfIau3Oq08YauEmfODIWzrvYraxZ/XCbOKtG0kBsfk/OJ9zCdL82IxEzRvhf5zwL6N5HUPdX02b2fi40b9x7sT5W5FESWEpE7V0OFdIa9UeW19ICZL4yICi3qds0HaAm2Ejaq81mRWTcBUn+XLqceZzBQtKQvWFJyp6XaTju/77YkxOMbgVMktUG4nGFtmkPLpm0L3JPg3HavulDD7s2LPV5cBbXdn8raDqvxTCvYw/mQtKetOPa/OqKq67VXBneVZI9iT7giu/uLI+usbJDaklSdu514H3TUgs9AgmldVVmdVdaaDmFPSNajudMWH9aI4zkxwKYX4bkP2Bsm64+bOdR1LdccvF6YbIa5KVYdlme/Nvqh3fUjYY6D4y/ktcz2o5xrvdOdLRKk2Y74kmY2RuGk4fdPgToXVj8rxfvsjlvluR9F0RKtUi2annO3Qg6mee3tWVTYbfb0J8M3/PvLdv9UQbi4pbCZoumNca5+KxMoFP+h1ntfOeQ7ZMVOsUXZwZXVbK5jPO02jrP0fbgyL5/x3qauCe61rXeta17rWta51rZ9VXRXca11KYPObTPbqu22OcxpTpv/uQG6demUapyrfFBa1T84TRoRizKLG5ruNKq3HcX578rrHHAbyqiPe9bhPe2UZdjP9QJOzGKMqjCFp97sxyjmE+nsL50C+W2M/v5G36r+aPWdyVmZq6VpV9Yyql/1TImxqzvsDxJUqb8UXyiph2kTZe6SmuCVnICi1Yfbn5m2qf6/v49+qolez2PenFskQNwWpnafFAbaQm0LzahgftKM5W0P7fPHkxV6q/7JyF+8a2peLB05JBHlRDBa6RCo/eVyVlEm9W7yy8z0zLwd9QSmKjjBGFdBx0u7bxiupAPW3SVAftZkiedWqx6xtlFsLFAx50+ufOwOm1b/LeVF5OZwRIyBC3qz0HosoC7eqmOYwqF84ZvWzWUHOswJTT8xZVXFXnXp450S1lBZVtzirefD1vEhJx01KyE59n8wqdYiIMZw+9ORG/XCzHzpsVLEY3hW++3ca7FC7wkeIc666FNq3QljJ4pfzx0i2X6SIhawKjq2Z8bmop3imPsDFn/s8kp1hfNdgxsh42y2UktQIYVO9bAlsKEwbVYfm7mY7FrIHO2nHtTupmpzbL/iR64s6lm9X2CFy/rpVf/EsrhhNdWqfVLXODtLmp1qIO8jC0M0eSl99uJ0sXvnsBAvYQ/X7rj1uN5I6S9yoqjo8COE+qTpnQGzB3iiPMx7rbkwWwruoNIdKK5GoPsFUw7pSr8QLTZJTjrQyuQ227oaYU8C0luZQsGfD/uNGO+Un7WoHfc94G8EVxGXKVFV2q8oUgN8X/Fnv6/qHC3EkrZuF3qD+b1Wxc6NJjXbI2NGw+r4ec6ds2uwNw1eO8T5ijpbuzRArGSNtMiZAs9NUw+HR0/xYKQGzV34KmCkqQ3zbaX9Cp2tnmsfhQcjWcTQrxhdHdoX9u4lyckqSAO2cr30FZjSkOz2el21c2L1MRpe9AudgKJuof+YzctJr9TlaTbXKQv+XjvhVxkR9/3ZX18yDfte0z6HOWyGve00j63SdN+eoJIWq7JVGv1/MaVrWqNJV730pUHcUy2ZFvl1h5vme9V5JUC++P6p6PAVBol6fuC4QBImCCfNYAH8q2Dmt89bRPmtanQHS2lROq0eOc/+JUm2kfqY7RopTmoapc7lEmLayJIKOj5psJkk/DyCshHZnsMeB0lrIhfCwovl0XEgS87jLrde5k8qyXlD9/WmzIa4sYSUM74TshPPXVUmu9z3cJbCF8StlYbuj8msRJRyAUj2kems1RVH4/K+1mAipEjbCQ6L5bHW9aIru/FTiSWqr39cL6x8ixQrTjaEYIXdW1/rlvofFh63XtNG1fqbp/A51VXCvda1rXeta17rWta71s6qrgvszKRGxwJ8Avyml/D0R+QPgfwEegX8M/MellOlf9B6kTPcSOT867eSehbOTPg3al6N2vo+DqmLeXRTTKcBpVA9sZaLm1lG8Jbc6zNyzepRK35JXHrcb9D1gUYIXnu1poHhVgMuqo1hLnr10Q/UVJlUK81rTg4j58pr5KdcaSttgTpOqySFfOs2L+q7KuoAp2Caz3Zwxt0cOJ32CvN+qCvDjD3dKVjg4aDKly6R1Vaq8pftk2fQjx9BgTCFaCF8FpvvKwXyzyGTIX01MpSX3WZNyVonTek56E/rvLcUJ+28dzaHQviSGd83SQd0+R/Kv7tUr9t2zqhPeKhFh5gS3DnLBv43aZW5EFdsM5UZVVTmcVDUvRf3SIS5kgbn72ExJCRe9V1XSzd7TuNx3nKF4Q0bJBRiQ/Zmy7hclr/Qt7PZwu9XPiQkxZkn/mV9TRCid03vVOBgn5djOZAVnkVKQENVzlpJ6zbyjmNnD6eBwgtYD1Z9mBfP0RrnZLK+RtyMiSqIYbw373zNMt2XxmLqzKpJ20o5pO9S327N0BGPUp658U0P/VAhrp77u85zco8laUnR8zkq3lEK4v6gU2Rv860jceF7/0PH8x1tV7aqalH0lfgT9/XBnKAKpM5g4K0xC/0kV5eyses0FUmNY/6aSNE7q58utw+4HSusZbwxxpTsOANInzNFz/jZC9SVKoxn0c+rVzEidbtU/qOxVYboTTh90HNqxkDqLHSwyJpI3WKt+49mPPL7L3P3qla82B1Ix3Dba/X3rB/ZR5dlchCF5Xoee/dByHjxBOtzZXbz8RderYoXzjcUN6pHsngKl+o/DgyriqVG1zG0CKRhKdrgH/dycrCY8TUbn+iph95bcVNY1Ohb6HyeyV2XWHwJp5cmtReq9KBb88wm56Zh8gzsrTSK1fiEaFGFR07qPgh00PU0yrH5bvaq/NFWVZvFnTh82pMbQfq7HvFldaCZAut9ixsD4zZbTVxe/OAIkIW6SKtavHnM/UV7rurGtaqyB1CQIqpYvqi3qT40rVcqxBbEFVlH//jzTFAQZDGWTCNtCaTJM2ntQrbx0zwUT1IvefX+AVEjbFmtl8c6apzfKutfdtzFA4xYyjnyRamVeD7reDJHSVDVzDLrmANzd6Lg+j4QPt7SvmfHWYGJVbmH5vpOk90WKXq/ZK6tzGeLaYkIhGYM7J1Vrp7wo96mzeCuQlXUd10qyGW8NsXr6JSkJJfZUioKqxQiMd/N3gTKkw70SPGxQmlBuHKUm85lTwBxP+v3nzBffu34hDhRR9bSIpuWZKNizEDaZZlfV67uCTAaZ6k6mFyUhjLIwttMqV9+s7nhMgGxQ/229qRK0HyBK0esqyuZOzYUAkp2w+0NPsy/LsWmimVnoPQBiRYk43oHTdFP7clVw//9c/znwf37x//9b4L8rpfwd4AX4T/9Gjupa17rWta51rWtd66+5rgruz6BE5Fvg3wP+G+C/EH0U+reB/7C+5H8G/mvgf/wXvlFKuEMgf1B+a+j1+acD9T4dTqRfPGL2g3ql2uan/sjTAFNYPJJmSsTW4T8f5gNV9cg1y1MxU1B+4VTZottOVaZOc7xz56r/019+prJz86b9gqagiWb2WP2/jaue31a7M2ui1OkrT2qqxyoK0ifaPjAcGtLeMzQRkUKqbMcfv9cEKGL1axVwO6eqXiUkFLXS4W1ijE47sEch7dyifFCgNJnNduDsMl4KYdfibiakqghxdOTnmnzWCLGD9JVbvFsALTDdeEwo2IfN4sPTDtrZRHm5TnY/qj8NyJ2DuQu3bVQN3Z2UZBEC0jSLfxrUx4yzVWEv2rE7K7zzo3HM2LEyi53yezGGvF1ha4IUWGWRiiws2+KsemDne1oKpfqoZX+CTX9JLave63SvZAgZgnbXTgF5O4C1Fy/e0yt0LcVaSu+xv32CGCkpKSEC4DyCd+Rtz/jQKtt5Ar8XTL08doSbP0/sft8qm7i58GkvdAVVskxQckHzPBFuPAQuueq5YF91FyBvO9JNhzlMFC+Lspi9IfaGabti2qqyND6ogjOPHzPp5/hTYbhXn7YeU1mUvexheBSa10pmOCey19eM91VVXbslEUlWDalzZAfjY6L9Ro/zcXtkem/pnM7J3gViMezHlnGr9+Kw6UgC+eSQs63HDDwL+1+Z5Zi338F4t6LZJYoTJjrcKTLdzTImpGz4eNiw263oVhONS3RNYAz6mr4JhGSxe+M/9AAAIABJREFUJvPr+xc+nda8+cRJ1rQfZyVPr+fpvaF7zstYs4N+LkB2hunOVeVQKEA5O2QdLylNUVSxbDXBzW8mQmhxB7vMd5Pg/FVDs1NlPnuLGSM25oV2Eu578rrFnAPOW1JrGB69eoPNhZaSfVXlm6ocijKBz7/Qe+TerK7Fa8ENeq9PHxr8MZP6mlZldEcgr+btB0N2LXFlsaPOr+FByDW5yr1ZclvIfaYcPXJT6QCmUEaLDDW5bp3I66S9B1WdDTe6e2WOltzWPy+6QxVu6kDcNZggpAzxwwSjJa8yZm+W9Kxs69wYE8PXa7ofT9pXkAp5Tu972KpamzN53RI3De4wkTqHqTt19kX7N/K6RUbd2ZOx7vrUHoA52ZEQcc9H4toh2Wry3VRJCLcJuw3k3DA1ifbJcv5Kvaaunvv2L0adO6WompmykkrehoVBbl8C5hwIDysl3gyasFfELRSFsKlpc7H6uovO+ewh1LYFfxCOHyzt3uBOmdwa3FNBSsLWRK90u1Ye+FRj56IygNNdVXQB/3HP+K5hvBeyLwyPSl/JXWacp46pflzR7yh5NbhJGL7+gjtri/KpnZrlwypjj2ZZE0GpDKmta2KdUsWrv3qs5JDcQP+j+sn9KeMO2icw3Tc0tdcktg0mOEzrsR9fVZVv1pdEy9+hrgruz6P+e+C/5LJh9wi8llLmEfEd8Mt/3g+KyH8mIn8iIn8yleGf95JrXeta17rWta51rb9VdVVw/5aXiPw94GMp5R+LyL/5u/58KeUfAv8Q4Na+K6l3rD4G/D4sHkn/fELOIziH/bhTT+cUVDH8kpkKC9dUQkR2ET/7aABzPGOnQN6uNMVpCpV1Z5eucvfjqyq9fUveNGA00csM4dI1HPNCTCjWUBplTdrjtPhQJShLV6YIzhA3jSrK3UUVa14gft8yNQ2sE+ZkGHKPrBLlPHMEDbnLNE+qVORq7Wx2mqgEyiUdHmEIjlKE86HFO/U7ZX/x5JGE464HKeQi6sGNBqkepjIZpjtlDufG4BtZuILda+Xi9obmLZCtId60+E9VHVz5RSk0WRWsdL9GrJD6RpWNkBaVN9+uVPEYRuLvf8DuzqSuwYxB7zWoyiiCOY6E92vc05nSOdK2w33e18/tFk8tMVJ6rySGlMg3Kz2epzfKZkVpHcVa7Ouhqg5lSbADlC6QNaWMlCnblSYZVZ6uhKTe3FKUc2kNOKc7BvM47L/4fcx1jBa4u6HsDstnCWB+fGZ1nhi/vWPz5yfS2l/8mlurDMyXQv+cCCtD+5o4v3eLN705ZvxBO9X9Ud/UjppKtTAy1w4ztBSrfl8pBUmeuPZLx7PNidSJelSnojSJjHIl6yOqHZVmMNwLq4+Z3R/WVKEs6g8GxsfM+jtDuBHYF1IrrL4fSN0XNA1XPZbeIN6CQPeSKX1m0+t9dybjbaK1kVQMGcFI4d3qSFMl7ud+RS7C29ByPrXEg8fuLdPdxWfp9sJwZzAJ7GiUBLE2pKaZBVNu/hSG13vevo34VwtvPbsPmddV9YACu9HQ7AzjY+JjfI87CWFTEClLJ7gdqn+6vm/7rAzR7C8ajj1p9/bb79VErJcWMwnZWhZt6M3r7gtKkwiHRrmlsoi8eh/uDCbpGKG3tC8F9/mgzFaUuVvqjofbnTGdJzeG7lV3ZkCZyiaqJzNsy+L97D4bTNDjDuuCPyo1w4aCFE2ws+dEqkpnbi2mc8pa3U/IpIzy5s0tYyw7R9wIZrCYBCUq91uiYCr3Na60810q79i+WGJfkL0lrWuPRJOh+jjN4EjrjBmF7C67THZv6J6Fk3UUU/QadwUzsaRnxZVyhONKfcvjV6uqahbcvip5972mtp0S5jThRDDn8JN+g7nnwxwG7bK3VnedrMC6XughqIorQl61HL9paN4K5/cXaoR7s6RJsCez+HHtKPgDy/1CIPaW7A3NblKq0BgpItjduQ4OZU671wGM9pxgDM3RM1R/7bQVVj8WhkfBDoUG/U6atl+STCCudKfm7ilhz7q2mtOk6XzU9dKYZb2VoDtVdn/hnedtR7aCnfQ7a/gqYc+meuvr9+nJkjtVZM3Zkn3tQdibxYMr0ergFNH7Xa9J7jPmNB+0+ml1d1M02WyE8Z5FenNnVXTtmJEEx1809E86+4b3qoJ3z5N+30NN3AyYMZLvNvyudf0H7t/++jeAf19E/l3UTXAD/A/AnYi4quJ+C/zmb/AYr3Wta13rWte61rX+2ur6D9y/5VVK+QfAPwCoCu7fL6X8RyLyvwL/AUpS+E+A/+2v8n5ur7QB/8Nu8TVSiiaNlKI+nymAcz9JFineUXouHs4Qye9uMW+nRYFD1A9oDmdK3+h/1iJDxOzVV4S1FO80wQolJvy/Ga/z+5sQwa6RkHBBU8+KnU2vFvu0J686Su9wh4npsSd1X6Yfwc0/geHRML6D5sVgB8P5a0PaVMXCX1Qi0G7S0mb8zjPVBDJ9ItUkIGMyxqlHy++F6ab+XJ+RKNiPmuKWVlkZuaIJQwDu1S2MQX9Q1U67eC+qWDHCdKN+5GIE/1TPdj+oVwlUOWgcuVFvoDlMYOWnSThRVA3tO8xpInea767Ui0299pMybUVwT2dV5UtRskUdG8VbStsjo6qrqq5HVd4bXV7KqlPVP4NJUX1jb8dL6hyo6lrK0gmeb1akTYt7PZNn/nH1VRZroQOzPyslwbsLl7fxCyfSPr9pB3arme3L5zWe8vSC3N4gxzN22Gii29OJtJkz5ROnb1r650TzFol9Q26E9i2pWgiMt5b4ThXdYgRJVaGNZVFMU+NVnTIXPmxqLe3H04VnKUK2QugNw4NhvL/kw8+eRdBxEA+G8d4yPmbsrIb9oirBp5ptXxW47IXzh0792vV4MPr37pyIvfo7Vx8nVn/W87x/AOBzl/EPA/Gpp/1oGX49qc/27uLPTnuP21lSV1W/TpU6Owrts77GHQvrH5UNLLnobsCYmO6axVcee0PqHNPBkO2lq9y+OjCXDu7RgDsa2s/Kvm1e9dfZF+tPRTnBA7S7pMlQbwNp0144wAJmykw3ELeZYgvlNmFeHbmvc9wW0n2CyShp4GTAKCkiVdXKDap626EQNgY76lwM7zfLPDWnoHPOW+UnHyd439E+B3xVlfe/50l9oZgCVuekZL0/pe4AtC+CO6m6a6ai4/QYIENc2fm01Me9smQruFPEhEy2ZlkPz+8FE0pN6FM/pR00UWveZWpfDNON7kA0rwaJcPd/wcvfBVt7A8JtpnSJcI+SZ/aWtMlIl7CfdE2Ydx1u/tQwPFRl+KjK3+wXT72eoz9EXTNyQeKFVwy65pmT7srJ6aA7DjEhEey8jqWs6YenUdXbnDXVLoM56o5WaXRXKX19R1x55cF6ZdAOX8+0E2Us+8OFlJF6vbizx/T4daNjKBSQRpO4hrqjdKrrT6/HIrnosfieYkU5tI/63uNDYXgP2WfyKuNftc/B72VJBQs3eu9TL7z9umH9o1GOsjPYmQtcxxYoNaJY7Y8wLwfSB+0dyd5y/May+2NNvWwfzmxXOmnm9oeQ9ARTMpx2PbkzxMeC30zYmbHtNAXPmMJ49jifcL7SlSr/PQSLtRljCse9TpZz/0XMIpqI1lghrixmpO5YqT/Z12sY1g57DJj9QPxwq99fpSx9Or9LXT24P9/6r9CGsz9FPbn/09/w8VzrWte61rWuda1r/bXUVcH9GVUp5R8B/6j+/p8C//rv+PPYw6ge2NYj1bNYhgHpOlXAQlCfYynI/qTJLUBZtZipJlTNnMJSyJvu4gdat8oNrCkrc0c89ouUssarEhhSZXWOMKrXaPY1ls4jeHLbLMSA3DnMEElVHcneYPqWdNNqIo6t3eR3QmqqerQuuJNZVKOwKUz3RRUNf0nq8q92SfLxb0bTxhwXRqmFGufOy8sG+0NLs9dEmLlLN7WW4fcmOBjsKIB6E4sROH6RNtSosmBHWH1Sj6cdM2bKy3ml3tD9cNLrIarMklk6Z0tNjdIMc4tIJm5b7GlaPK9mihCUGyljBM/FF3ujXOE5Vx0RxFnySlPKZAyLl9cMk46DvkWKVwV402E+vlDe3et9rsliZWWVaBGiKq9dsyQSUYqq/o83qtiXgpkSZVbBqJ6zYpfzJqgaDCDPO32b7RqkUeKDtcrOjZXnG6sC0LfwzVekTYvZD5dx1Xpi9TUOj6qS7791mFj9185Uxmy9p72SFLrXzHArTFtDs8+cPzhOH3RQrX8IjLfqe7TnjDtrVzU5Yz9VH/N2TXnscGNhvIPxVxN+NWFMoW+qElx3B86nlrYL2GBJySBA16qiM46ew1tD82SJa2E6CPYM/XMGqUzZSmbIFhCh/zhRROieCrPecfpVJpw9kuDmzwpx7Yk3CaSQdlWRMTpPzKRKIEU9fvOOA0BzLLQvU03fK7qjMET67wPHX+suwXijXmIThbjK2CerqYHh4nnV+xrh6ClW/aqxqtRfpgC6U1G1bGNxx6QUkcOImWZiQyH1TlXxDNJHymirR7TO750l/SqQJ4MZNdUqOyUdzEqeCbD6fsRMCf9mVHk0cuE3g3oif/sMt1uwRn3rx8T5vWeqqXD+CP5gmG5h+CZiRtFb8MV5pxZu/ywT+7r7FYumwa0b+n+i2zfh61vcYcKOulPhdmfi/UrTwfxMWdH1RQr4N2G60/M5fxswtd+gmEJeZdzOMt1luo+Gl39Vx4qZNxIEzMEhCfXlZoEmI69+uRepg+QhvK+Kr4A76rnleivsqJ8/PHr8IeGGhP+nP4D3S4qVTAEJETNO2qsx++9jXtYECUmpKutO0yuHSakr06WHRMZJObkhcfh2zXivCV1mFPrv9YDCphAfImEU4rug92E0hDdLU9PXwkZwpzrOjODGTNg2pNbgzjov3DFA3yCnkXS/Jtw0uGNk90eG8C+rotz3k/rHAWsy4YMlZyElA6kmaP7Qa/pXgeMvhGmr7Fg7eXxltLcvQXswrOi5ekfuPSblZXfI/7gjrtbYuwkjBRGIyTBFuwB4QrBsVgPOZLg9Y21mqnMmzwl/pqABlcJqM9K4SEh63PNrRPS9vE90q4kYLPE2gBTd1QRSEk4PFpJgj4bmxdA9V+iw6DH3n5REkbcd4aah250p9v/bP1WvCu61rnWta13rWte61rV+VnVVcK91qVIuyWQipK9VgTO7kyqsKVPW98g4KfO28ep7QhULTb1qkEmVNfO81+SsOdHKiCo5q4a49jSfT6ruDBdvH6cBeR5J376/+EVhSagBSOtGVdnqrbOf98uT2vzU7l5OqhiLqFpZCk6EZt8RK2swrzLTfe2QNpA32hFcthHjqmI6Kfu2OFVAhq8SpSnksyzKb/ukRIX9ocf/RasdxkE7hX21FhcDRKNqgMyKkxBXBX+sT8BJ/Wvr36pSY8dMMcLw4Og/q0pnx4Qd1W9sYlbVImdVvutVMGOAA6RNgz0GVbdDQsaEOdR8dmMonaoiuXOLz6msL+laquQ3ykkMSfPunVIrZq+1TIGyXV3UlVnF366RodIYnKV86cVqvKaNNR7OigDIj3fKNB6mmsbTYF6PSMoLjYFctFsaYJxUkb1ZY47nyziMiXzryN7qDsHuQL7bqho0p/vsDvD+HhkTEpMmSn2zUgJCrbAWUiOkDsZe8HuIvXa9x9XlVMY7YXyw6gcdhP6TcPil0LzVsfGmKWKpV3aymZImxIlcjjlE7JA4vXeYAHKw5JcVsSnMoWlYKG2CaCiAMZm2DYRgiZXZbG0mtQmJbkll0pQ1WXYbZoXVDQV/CNrt74T+KSvnGU1jmu4Eezac3wvZZZrPlsm7ZWcDUe6lPwjFKfdZMrTPRVOzAAqcP7TYITM8WFYfI6YxS5f8fDwmqDcve009KlJI66IeWMDfjYRdS1oVhrbQfjaErXp/zUIJEJpDJvb6ucUKadVQGrMQWgAOv2yYHmviXhFN4yqa0AUwvY94lyih+kknPVnJcPPP8uW8vmpZ/eZEMULaNEoDSC3NTs/NDhFxTpnLMRJ/cUPqDGFtCJvZF1sWRdWcDcVq93luwM5N+S0cfmHpP2fiSlPr4u9vaHYRPqvZ2a07ijHYfZ1LK6VUuEMgzizzT7qGTbdw+kavXeoK/tktu0zFaVpb6jPcBgYamp2hfYHxrnbTT0JeJ+RskdHoGHvVf0bMqWAIlCLkRv3FpSmUweoaONvpzzXprlWfsMRCGSd4uP1ibBj9PjlPSko4Tbr7Uwp59soD6bbHfXpTikDfqk93prEAhBOyOyD+lv2vBP54T9tEvFVfKUAvhbt+IP3a0LnAp+Oa89gwrBvOtd+g2anybc9C9xlS5+g/G/rPdU4D9vMbedtTVi25pvWZqMSAdNQvjGMlZJSTU0KHK5h1YLMdWLU6fl5s5tz2Su9BExUnI3RP4N90gvlPJ11j20bTGYcJeq/r8KjHEz7c0uwKx08dRLBvhtffm7C13wOgrBMvP6zAFfyr4XyXIYHfG01yBMb7TPNs8Cc4/FHkCNiTIbcFmdcWqzs6p01m86eOvAVb6Rnz2Ch9xu+sfnZRz/HmN+rF9UedX/YcNcUScHuLDNoXJKffHWN6VXCvda1rXeta17rWta71s6qrgnutn5QU7f6U84g9qoxQGq8qbO1Ul5Qpm1XNvv7CMDYnaeWiiSrvbtWzWf/avhzVe1kKvnbsp1WDCUnTyEC76yt7NXeNdv5HTbMpToerezrq5x6SPrnmQkkZnMWevkjiGiI2JFV8B82k7z9l9QwCL2ujPM2sT5/2oIkypg+LP2kSrwqvKwxfZUozd8KXRRUzQbu/095Dr+8X86UjWq+r/s90X2hehO5JCQvhNuOP9Sl9rMlGqSyd4MVA+5IuXfCgSVi9xz7tie+2mCkp07Yq3jPNwD2d1YuaCrlzpG1LqelHctYubzkOiPR6352tnbmzUlWWTvB421clV9OaZPGtNshpIHc3NREna8rcTb+oCJIS5vWgtIC1+nt5uFWfWFE5PW8aiii/UUJc1FtyxtREtPTuBrNT1ZW2QYwhrRrs25F8X3EVNcvevZ7geQdNowxMY5ZENHFWlTU3d4Z7wtpw/HBh8p6/qoSCvlA8zCqeP1w6/ae7ot5UnzFtIo+W89eO5lUWH+rpnSU1PZRC/zkQN57mOel1mhPniqaWZa+e3tRduv6znbuqCxiDPVhW//eKw68zHFWFa16r8toXZJvxR2heLylrt/9sXDzks9IEYM6R4esVuRHOD2bpfncnoRhDauH4babcB+TQIIPFVr94fBeWuVAEsIVsldvZPtUUrnPGnRKps7SvGXuqfuK1V48sYDYGE9Sr3n6++OFB1UKAcGgwJ4OJQvusajoirL4vlNm6f8yEtaF7STRvgeQNcV19hNXzmp0Qe2geB1bdxOsPW2Q0hMe4KNNycuSkfODcFXJQz34xslxDRL2kp1+uaN4iRVj812lW7cZI+PV73E5VJxMzDOpnDRXnWSxIhPOHvPBPc6OKWJ4BNkbHRmoEGdXbKkXnIb/4oK9pHORM3PS43Vl5ziIkJxcF9yUz3QgShXivjG4zCeNDhkqMcXtD/53l9IcBJku5jYTkkGiW62xHwZ51hyC+C+RqqnVHWfy1JunuQbPTnoKS1Js93eXLmjkJ043FnTKxN/S/GSm//LDsFoESCSiF4nvdFWwc4owqlOGyRpm6ZpSureqlpmzOPG+moLslpWASpJkMYAre6pi0Ung9d+Rs+HHaMjx3uFeHsQVXx/zMm25fCv6k5I7uKWGHi4JbumYhd0guuFMke4M7w/pPdXCHjSO3eo0kamrZ+Gg5/tiy7y5KuS3qTbeD7oy0u4ydCrmt43nTwEYZ52nlMOdWaTp9gznod3e8vWP1KTN9ZytrVwkaZKVa6PXReZtNIWwK5mRqQh8L6715UT5w3IAkHUfpNiov+aDn5QaDGSFEYXwo9J9052h4hPVv6lxe6zozs76bV13j2nNZehuKN4y/fsAO6fLvjqy9Cr9rXRXca13rWte61rWuda1r/azqquBe66f18RmpKlvpqocpJeR41qSqriWvOvKmUU/nSZ+SJSSKterh7fzi2cyrhjI3Xmft7kwrj30bSTct9m1U9bCqWeVmTbGCeXrDAPFhjRuCqn2Vk1ha9YCWtlH/pQjpfqXEhfo+6bbXrmZjFsaiPU64oSdUfqR/06SWItpRK6gyE1+6Ra1Z/aXTmO9ela3sVCmzoyzqbLjRp3F7NOoLnJRjO3sg9aAFt3OYqtIGowpO82qWbvHT1wW/1y79aWtp9jXH3JmFETndtbSniewNsuqQlJkee9zRYV/UXxsfVpVcIOoFDKkmwgllVg29XXjEs39aziMmJuI7DQ63NS3O7E5IUD8zOVfKRU3Gar1yizunfses3l/3Nui9AfXCGbPkpktMpPs19vmw8HRlTNgpqoJcLERINz3mC76vjDWJrUQlMKSk4+7mklNeXKM+7JSR5uL7LZ2/KNOzt28MhHdbYqcJYiYI081PvapxWzCDUjNiVxjeF3J38aGas9G33Tn8WFW+2hQMdVyMWdWJotzX+O2K5jWCqCIhuRA3moqVG+XIbn6bOH5tF8LDCTBJfbxxpb5NTdQzy2f5vZq73RHsBP6YVYmpbFQAewqk3pNbq/zZG4uJhfatENvqeX2D4RFyW+dbUi+yhC8Sq0aDTGZR7dxJNHFrZPHFSipIKvjdpJSBMTK873/S3V+szpVZXQtrQYpjur34oe2rozj1c473euH9XpXCaSX18zPti3qZJSTIjtwYshPsWd8r3teO+cHxemyQyagKnQSZFdMukU4OuozZK5/UnQQThdd/qbJpn8GdC24oIHpP5vSvVNU1bjvsORLv+pqgaPV4mgvT2kRlF5tJ72XqCrEv2MjlRfV1sddr076BP2UOv+roq9KfOos/RFJnMWNV0/Yjb39ny+4Pqi8/KJFBiipwcV2wA5RVQk4zRUEZrWSQ8f9h702eJduy9K7f7k7n3W3iRsTrMvNVZUrVqEoygcAYIDPMmDFhAnP+JIZoyIwxDDUEGWYyIQqpVE1WVma9fO9FfxtvTrc7Bmv7uS/BDFMyI/FllhYRN/26Hz9nn+3Pv/Wt31dmMZIirPOiXqooxzp+7sUPbjLuoBfFG8A8aUL3nODo9gqyInSiWILss9FB2mqa+8j8osOMEXMYFwZ6bEpSmNEorQibCvc4oo8TeioJjutO9vxzy81o+dw6p20C6eW1UA1WFe27zPzLFWOG3uWFd67i2YecsA+WKlJ8yrD5u0Ld6RTdO1Fqq0NC+0T1aZC9czq3PwzZafQYSbWRzpsWJnTontnG81Wmvhfl1K/APSl0VMv5yabMb2T5DFm9jZhZuiCL3/exJ151MpeQKvRTL52tmEnr8hleOqzVPpOcEE+qJ4ffQPPxvMIUfg1hJWtRe0iVrJkzt1iYv+A30H5n8FtRkk2vqfbl3C+kIcW8K2thkH3V9iUxroyA+LXQgqpjxswZO8j5BOlOKK1kXT+OkmbmwzNp6beoi4J7qUtd6lKXutSlLnWp36m6KLiXei6lIKcl+UWd1b4YyZsOYkKfBrLRmNn/RpIZMRVFsCZedeR1jTnNhJXDlGQaQkRPXpReJylbcVOTjVrSWQDMu0fS3RVZKbKW/PDU2uUbXuwsFsjG4D+7Qhd/aupElYPi8eknlLPy8/2JeHdF837CF4W6+aSIjWIqiWSpypKZPmjSlTxn/6OA6fXzZPZasthDm2nfn5m7or7EVSJZjTtIYo+ZxTcFMLwyVA+ihsT2eYpZJagfyunPokyZKVM/BlSG6Uq8XGefZbbC1CRnsJpYPLXJGVTh18posag1sbFYH7EPvahJZ69zUcDRWryvMQsrcn8SJRzEoxsjVA51GsldLZxbZzBnNbR437JRi1qgYiKuqsVLF65azF6Ta4POmVx+N+1Wy2NUzvD+nvj3vhDl+TgTtjVuigvfV/WTpN9VDjV7+f1hJjcOXfx2aV1jP+xJ2460FRWfmMTbWygK8aoVT/Jty3RtOXxhFl/kdFfe1+2ENhmjE9EbpsdKcus1CxTUfbRoL8q49jJZPe2ge5cX5aM6JrJRdG/l+KqngJ4i5jTJdQTmK0d/Z/CdIrbQfMwMLzR+/cwNRRU1JUP1BPNWkeqMmZ4n7kMnKq47ZfxK0X46+3eft/nYiLKpfRKmcqUYbxSxVgvndXgpKnXuovj1QlGWPptQxRNsv2mXeyZbFpZt1uqZtewN2lvckIXnrCB0GneMDLdlovxzVZTDhD0pSQ70kF7O5EJ10E9W/OhWFEC/lrS/WLMQUaadwY4adhbbJ5q3gi+Jq4pU2NixlveqHirh2zaSwMXBkufis1wF4mRQJpF2Mv1v38vUtxvPE+WiaI23GnfM4kOcIDSG7qPsG35jiLUm1Qp3iMxbI+f6hVrUzdW3Cr8BPYh66leiFptJLd5rd1ScvsxUj6KQhwZCLX7K/vWzYTk2DndMTC8a3CEwXzcMt5rphazn1a81sYVoIa7LfbkFvIKSUBfWGoIotNoLBcGeFH6TSCURLbWRbAsnPGi018y3Ed1rSWQDptdeiBwJSDIpbw+yH/mbcg82mpPXhAaaT7qwZcGODc1jSXY8RcKmwh5m9BypPvalw/Pc8VPjRNxu0ZVD7Y/EL++WTp86U0pGL3tYyjRPifwLzbxVDHdQfSiPSQq/S6jZsPquJFBqIeecpcnmU8IOkiaXDeSk0E8n2Wu6wuP2EfexJzVWVO1OupmxeU5xy0au63j3zOPt3mVCo7j9c9kn7v+wJrRyz6/fJJqPI/o4kzq30DKwZlG8w6aivj+SpxmVMvFa7s/DjyrZV5qyP5QZAtvDeHdWWRXTC0najE2iehI6RmgzOj7PIthe/MAoCFt5vJ4Uw6uz517uZZlPgeNXmfpe404w74rveYLbfzNx+FHFzb89Mr5sqR8rsz4xAAAgAElEQVRm6T4U1vv5c/5Mm0mrVhjt/y/k2IuCe6lLXepSl7rUpS51qd+puii4l/qNUtaS7x9Rr14sP0u7Tr6NG4V5c4+qKzIJtT9CIRvkriFdr4v3NpK0YnrRFs9XUeyuVsTGLmpfdT+gfSQau/BrzXESf+XkwRUqglHoIci3YpCs9kUlSsJnTUkmbUvCjX0cSLsOPQZSZTFKoceZuHKEpvitlHyTPf04iUJb/Gjq1cSqFVXjnDATv+3gq0EmSDWkh4q5sCGrB/mGLGOvmfkqYwbFvFPESo4ntJmwkZz3bDKhFeWoeWuWSVUVJec8VorjF47QKtwxM+802ova5w6RbEVRy1ZjxoD7cCTcPE+YqiCJTdWHk6jcNy3V2wOm9/gb+WZvj/ZZPY0Rf9NhnwaUs8LVhYWJTIhghKRgjrOc77qYFo2W/PUM+uRROWN6Xzy59vma1kbU4yCKsB48uZK1ACWRaN1h70+kVU1qLPZxEs5judakRGod9u2jvH4CrKStLevn/ZNMUo9efMs+LP68s5eXkuf+9NOO+z+BcDtj20AGunLdrU7MwdA/taijoXkv6U46qIUfqUNRyW1ht1aK9mNeuMnnx5hJWLPEjHscxfv7ck0y5w6Awq/UotLO27MSIglXcn4Uc5WpPhjMBCCTzGZ8nsr3O1GO5zLRPtwa3ClR7SPzrhAkEpg5MVw5pq3CbxSHnwXMUS8KbnZZ2LZtII4WVSX8HTj3TGDwV9IiOHs1RSFSqJqF82pm6N6ASsJwJRlUgnlnOX5Zpqq3mWzk9ebPA8waXKJunkkm02DIaw97R3aSZFY9GcYXz56+5BSzE59z921PVgrz4ZFU3WBH6VqMNw39z2ZsE0hR0bSeEDRBZ3ldYLMeGCtH8IZ2N3A6NKTHhvm1J3105R6Ue1tF6epkI35jf63Y/UJea3gtXQ3fKXyrSU7x4T/IYCN6KKpYp5iupGu0+0vF8fcjUUGIannM/DpQbSfGbzvSvYYse805TQtEFTNTwu1n+s8aqifxdO9/ljCvRd6P71f4VekebSVdarseeHrqSPNZ6TSw9nSbiZNpUTYxugxVwn4o906G+mZgOtZs7o74K0trI/2xJk/yPPV2YjrWKJ1FGZ80YQVpHbl+LYDoh2930oVwMGol8w0G2o9wLPv86r3MI2QF9duTdJS8kHPO93uunSSbdbVwUkNCjzPhqsMWkgA5kzYNKiSik26FCkJ5sEUM7V9l0ipCVEw3evEcp0q8z1CoBzFjh0hYGdx+FnXRR+J14fL2osbH1oFS3P9RU7jJmeZTuaZbRBVfR5TXGIQ3rQO8/U+KEhxEbZ23xRt9W2Mbiz1MS5cybTvUMKP7GRcSua3xtyvs07Dcp8evFMPvTZiPFalJjC9FndezdBBB9pm4Lfd2gun8MdskzLHsUU0mrqH+YIhNJjvpfvjGsH4h3ZLjfSfe/NL1SV1itJn0YBb1GuDdP6lpPmUOX6/wnQJVyZxCVfbwPgHCr9b9JDMeq4bU2h+G/P171UXBvdSlLnWpS13qUpe61O9UXRTcSz1XzpKKkoWbqs9M2VTYhEkJOzRnMBacW9iiuatRcyA1FebpROx2JKdxB78QAGIniVp4iG1Rcq3GHqbntKGYSJsO3U8oHxfVUMWIKezMXDvx8jqDfvMo3tH/y1sJV614eVISBdka0JrY2YVROt1mps+e+bLqaCArjI1UVn5+OLZ03cR+lVg1ntOHTpQOnUlF8fIbmTxGZdgEQraAkanwTXlbTYadJ/eW7u6ENYl9WBEbvUzup1rUYFQWlSGVyecIZiy+spTRQ2B60VA9ZFGou5rUGBLPHFd3PxJXNSpn3MOIGmfSqma4K9SCG8fm509lQjViBi+TwDGK6gnovqQGbVaoGHH3vaQO9WFJJsvOiBL+7onwcksqaXV6DOin4oO83YiqehpkwtkHeHhC3VwtqmpWavF0p0p8x/ooj19oDM5i3z3JG9QK/bAXHrNSi78Wo5ep21Rb8rrFvH8QksNBjkc5K/7lWtR0dTKEklh3fCrnxyvx4PXPlIv6Xkv605NerhcJmo/i404VzFax+0VkvC682FbhV4ZUKWyf5NxYzfCiWtinoREaQegADfNOeMv2SaOLQpq6iG4DaW/Y/xTMmJmvE/OVJCGBsJrrj0bUqeE8nXyeZlfL+gGwY2K8MkzXmfZFz6DahcZgVoHkNVUVGE4OLKxue+bJ4p8KWUWJskcbUSqTjo7xdUJ5JZ51QD8o/Ap0FPbl/KpMzG8sw9clCW80cg2ayPaqJ2XF8bFlHhxVW3z5G0+7mhgODnvUTBvP8aelG/Je9p9Yi5KpciauHLE28KrDPc3MN9L9GO7g9WcPHMea06HBmMTL7ZHTXPF0lMe82hz49vGKq3VPzoq5tkyvPPVmIq3kmIf7hua9xfaingvlQFHtM/vfkw6JmTK+KPF+oxhvM91XB4ZThTo0ZU+AsItgMvufGfG1moz7vsIXioRyCa0z/tYzZ0fsFNWDTKCfVbF5o4i1Ydq1aA/TTcVwbVh9/YjV8jz7ly351URdB5TKhKCpbKRdzezu5P76/vsbrq56du1IW3ke9x2ba7nfju9vAKhe9WxXIx8HtySBtZVnNBVmXVLcTELZxHorCniuFSlqXOu5W8k9aH6U+RSucXtNMkLHcHvFdPXM0wWDmSFWjrDaUj969BAw80gq9JXUOeGADzN51Up3yRnsfly6TMpLAuO8qxju9EIiCatMLGMLsRWaBsD4had+44itMMvP92l1SpgpCZkmZnQ/o/uR8HKLLnMmevL4mw4zBsKmYrwRD7K/Dajyxvw2gQZzMJIct05or2U9rJ99sXZQ6Aj9C832mySd0drCdaGvxExu3NIpy0pIJbl2+J3sq8PngbuXez7kHYxahNE6kU+GtCvkmcGgT4bURrmnrjw5KtCZWJUPyyDs3PHHE6q3VLuJ+aGhvhlYlfS1o+owg8aeSkcTyOvI5PKyJ3gguYztDckq7JAZrwzbbzyhMKTjukKFjJ68zFGcJlJdyVwNv11dFNxLXepSl7rUpS51qUv9TtVFwb3Ub1ZMsBPZ8cwjJOYlDeqcFoPRwvwred/ZKFTO6HEmt5Lk4vozm7QoOnNhGsaE62dh5BpJpjl/1Yq7Vr6lWlHOslGkzpF1tXh57cfjkkIVX1+L0jzNoj6XL5z241Em7o0RJqyzksjVPacl2ZMibSY2q5GHTxua64HhY8fcO2IobMh3NftrA1Xi9NRIqk8jXi19npQvHMCwsqQuYWa1sHOf/bUKUwV04/n8as/T2IjfNynqx+KFasSHGTrxnVWHLPxAlRd/kt8Y6gclrGAfyU5LcpFWS9pZ1koSvbQio4itQw2O4bOOaXP2Ryr6H21p3/aoCUkMaoRpe2bKMk7QNuTWkZTDvnuEVUvq6oWwkZwWv/NVURWCEAtiV0ESeUSNwoJMV2tRdXOGzZr86QG+eFnWj0FNCv9ijTme43OKutsVOeDMQkxJ1qnWqHESFbgwL3MIqKYm9yOqkfWQVy3xqsMW75p62JOut9T7zOaXGhUzpy9Kck956/NtxBy1eJ3rLLzW0zNX8lzuKLxIO8h0vDsqfKcXeoaOwmjVc6L+7mlZ3829ZywkgVjD8CoTt0GUlDqh1oGAW5LDUq1IgyWvJCeepNGTIjV5eUzWMN8kzGBo30N0inmjcYN4NGUdil919asj/e2O+S6wtZGxTmRf/LSFIjA8tDRXI362rJuJd5868ZmD/JkUmEzOCuokPlb1vOa1Fz+ujvJ3M2VSpTh+qbi+k3S6p6cO6yIpGobRUddyAVwTsFaOefIarTNsPJPNomQXwsLZ15h1ZrwR3up43aCS+NmbR7Oo1tOrwNfbe/7t9FoUKiBlxWebPdtGTJdrN/HT24+EIts1NnDqJo5Dzdd393LM24b3+ZYJqN8ZspEOTaoUx6+KQn7SrL+TKflkYf7Mc9OOnD50S/jj8COPbgI5KcxuwiCM3vnVM1GG0TBR8Uc/+Z7pK8vfvnmBX9fEVtO9ldearyhUCYWZMypr5p1i00x8eJC9PG4iVmd+dPPArx+u+OrFI1Ow3Kx6Xq/EF3u8qbnqRM1dVTNpAzkrKhvZ/PEnuS4mEZPmZ1++57Y58abfctce6ZzHGdkQnY48dC27euSDiWgFnz5uyFmxn+Xz4k9ffM9f2cC7P3slnao2Y49CxrBn66wRlXt8oake5b7q3mn0LPue3DuGeeuwK4c9+SX1SvkZVWYIcuPQxxn/RUuy4qsdXifMoNj8qtzv1xm3m/CDQw2G6WVARYX93jx3QRScPnO43mL7CLrFxQwpP88tVBYdUpn1MFQHGP9oRAPzi+e9Q688cbTokyGvAj7Z35AbtZfUPXeUPcSvDdWj7K+pKLb2wx41eeLra/y2on5zgGPAv94Ry+dFdTMQk0I/CUtaD6KQZ5dFlQWqu57pWAsP2p0T4kAdLXlVPuR0BpsxVSLbmab2zLUjBMMwF065SeSXE9PJSQqhK3tCZlHBzaCwpzP3+XmOYbirsIVXPV056kcPMRE7K8z3lJdr/tvURcG91KUudalLXepSl7rU71RdFNxLPZdS4qm1ZeL9TD/oRLXNbYU6jaiYSG2FGtXi7dNjWJiqaV2hpygpLHNYWK3uvie1bkk9U1Mkrco38aIW65AwR6EdmCnKhGyRPM68W3ImXEuCjQolWUspmGZU8V3lygkFoNIkq1FtRVhXhEYzFSbfeJdQSXO3OpGS5sX6xPcqE6MmpTLlvY6okzANcxfITrx/atSLWqw9zDthG4ashY9b0o3OKm+qMs5kfvbyAwC/fLrFniSj/OzBna4z1ZMohbEV9at+TPhOLykx4seNkn61qdBjISGkvPCGVT+Rdh2p0ti9eJnTplmULDhzSzNhXWG0Qvd+8bP5KxnLr/qRtJWknOwM8cUWvR/QRRE/vy4xkSoj6rwvPrzT9BtLKxtDaqwovI8H0vUKPU5wzp03BtWPqLDBFLZkVgqV0kLGUFNRbVOGJOlruS5dhjPxIRVFxRqS1mCUKLxakeuiNBx7VEpUT4GsLKfXmmovintsiwL3aIhdoSbUmeSEU6n9sxqhJ5ivMrpkubuDQs8wXSvsm/KYOZfEo57cOEnpSZlsf8AwrRTp1YirAt5U6KPFVIG4y4SSVsWsZc1lRV4FUlCYWZEQRqqsMZga8QaHTni4WcO0MbiijtSHwHDnGD5b4deK5mbE6KLehnP8WsY0iWQyKSlSVCiVMV2QlC/ANKKiKpXJsXCKJ03uIl7smpjZyCT4Rs7bqs+M14bhi8jrohQ+7Tv8scKtnlXL69sjOSsmL6/lVjNWJ+rOM/aWuvXMkyNFRf+6kEyeJJ3L7ZUwaTtRNU+vFNqXdW89CUVKms8/e+C/+up/40+aX3NnTlwV6b5TigScUuaQLb8OV/zr/if887d/wG0j/tHaBnZ/MBKyJv5M03vH5C3HQ7NwgofBMl856gchO7jO8/7TFrO3S3rW3eePvFwd+enmA7UO3M8rnnzDzz/dsWtFUR6DZd83hKT5w91bpmD59XhLPDqefv98uTJ+K2lYdhQaQWjhqW/xvayz9d2J/+Zn/yv/uP0VzdeenZ5Y6YR/3hJ4/KLiU1zR55oxOT7FNcfY4FTky0oU3Ftz5Fb3dDrgyqbkUfj8rJXF8m9fsBzfhWv+9Zc/5m9Od4Qkj/u6+8inacXqP/w1Q3Acxprjy4YUFUNZY3ow6ElRPYJphKFsZkusV0vQW+hkP0hRE7OFmAkrS/vNTNhJ50f7BCEx3mj85jlNDQVjUVVzk9CmdF0GRVwJD33ePqecxUot51vFTHQaW+ZI1IPsd+Fug9lPnL7coYN04GwVCd7gHuV8JJfJKyAqshFVNGlQQYkPFtCTQeXzOi7vde2wB08sXlWzaqCu0KcJB6TaoaF090qHLWoe7teYWaGPhUl8UoQVmCd5nim1oMUHn4NGmYR5dKQf+JIBmBVpMOQ2cjo1aJtwLrL/VAg+XkMbMBtP7C2u8+LZrxL6eH7vEDtJD3Unee7V28B4bejeFNrJXYWak3xm+STJcH0gOf2DKZN/v7oouJe61KUudalLXepSl/qdqouCe6mlFBCv1uKVVWrxWSof5Zvivhevq7Po9w9CLzjnzvcjDCPxRy8hZcxxlgn72mDG8pXZB/FFaUitxb7fg12D1qIAA/rQF5UuCcu0rdBvP5FvrxYvL1oLW1VrzJuPxM9ekBuL2Z8W1dl/cY05eWJnqX7xHkLAuDuS+QHZ4DrA0fHt4xXeGw4uiHobNXVTWIPrwub7ZYtPVji4Adr3elHyYi2qQNjK9H3ooPnA4r8DyG3EmETKil9+uiEeZDpdFX8igB1VSR8r/NRR2Krd+7B4KGOrGe+q5Ru6qjRmkun8rJ4VBn2aYO3Q/SxTtjmjp0T3SRSC+kEmkkVRTKgYsR9FsT2nyqWbjXBnD4MkmDVOrtXDUdRXIG5r8e4ajd2PxFWFOc1Czihkg9zVsoY6h+pH8qqVr9bGLBPA+iQ/1z6SDyd0Kr9vtKTYADQ1GRYf99n/Ha9XoEV1tt/dg/dQV0LZ6CfUaUAf9HNevbXgA27vsaeA79qSva5Ip2c1NHiNjgrTC/sRJf7AHypAHESh82u5jjpK/vq5s5Ft5vTaEes1sdF03w8kZ3j4abOk+4QWUSVHixqMpNy97UhN+s0b1IA5KaIxtO81sQI9/0CjKG9Pz8LhTU7Su/QsKXkAeiuJaWBIDq7WPZ+eVqhZk7tn/2uYDevdwDxbyDDOjqadGYt0plUmq0zyBtUb9NUMN5HVeqI/ynWZx5qwUmUiHI5fSGKVvRmZglz3tHeoLpKSBpVpK/Fy+mgYJlcuV2K/b+nWE5gs1IaoqFsPvydro39owGRCZ7G9wu1LStj83CFh1vzv333BdKz50fUDN0WNdDyf5zFnfF6aM1zpnpdOPKqfRlGqQtZc1z1jdPhoGIMlZYUyGV3OT0yKuI5MSmMGRXjfkl2ie1DMRcV80Z34R1ff8srtqbXn61pxH9Z81T6wtnJ//bK/5c/mL/ibNy+5Hzp8MNTfO8Iqk+oycX/UwhG+Bh6EjVw9wfGvt5wtlc2rwAu7Z6NHOhVY6SRqtXpWuTo1c6PnRZF9TDVjdhgynZbjaVRkpQKdAqMUc84YMpAWRTcBUSliDng0V+bET5t3DNExJOmwvZ83rN1EQroDg3cYG4neCZ0D8UfryZBquYbuVNTJ4nuVtS7dk+QUJI0yYE9hSQlcbo0YMRO07zKhU3RvNMnA+HlpsQXFdKqwH5xQDSZNXEVQGpWeUyQluU5jj5HYSCJndprwQj5UVJC0z3F3TXJyvPO+Rp/M0oWrHjWjrbGTfJaEVOOOChKk8fk/yexJ0jB1RPypSdTpVJcrljP+uhU27hxIrdBDslH40hkNTxXtd/Z5vuAon0lmkKQ6gPqdZXoVyEfxAeekl3OiC9s4tUl8uFmhDpZ0sOgA48oJcaZUxpJXHn00hLlBz4rUsJBospOb69wpc3vF6bWVa1EIP2aU/XNe1dTvT8JLt5pkf1uGwkXBvdSlLnWpS13qUpe61O9YXRTcS/1G5dowXTe4pxF1LKrq46OoXuYHyVbWiheyF69Y7hrS1Rr78VCm+ItbRrNMfRofSNuSpPXxSO5q9FMvnMKixgGEF2vxk+aM3vdk71H9SHqxXR6jfETlANYStxX200BuqsWLqbIomclo4mc3Mm26csLeK56m+WipHzQn02I/Od6tG3BCLTj7sZLXZK/RWjLD9Sx8wmSfv5WK2gpmEmaiStA8JAajl7QzXCJGzV9+/0pUnqhwB8kgPyvB9SP4Tiah/UqUt/opYceI3RdPq2rI2mDGRKw1bi/pYbG1xFWZyt9WuO+fUD49kzCUwh09dhDFwhwnUTjHefG44gPqOCysQTV5uF6T1g1h2xA7i9EKrlaiECP+triqhL+oNVkrcmUJ20b4xiCpZO+ewCjSpiU1DvPYC22j+LNV8cuqMZA/f0FyRtSQT/uFpkDO8vecIUTUVNaNVphzeo9WoBR53RFXDn0chO0bAjTPa0yFSGwt9jhLutcEzX2kf1XOxQhivD5fY0XzKaNnYZ4CTFew/VWS63XUTDeKWEH9mJlX57Oo8StFNhbtM/d/uOLq54OorWdh8SaReof6VOFOQuDQk0J5I4oHQgnIRXHJLuHXmmyFm3ymgiQjKWoqQmgQz3CQ4zn7B+e1xg2yvmILx7HGmIyvI4zy3v2swWS8t2id6TYTPhrWzbOvWuvE6diAV+JLj4qq9aSkWG1kTzgpiB+L6q7Fh5usIsyGh5PsA+vPjvz9F+953Ry4rY44FalLS6MpUnmfKrRKHGPD2y+2/PXTS67v3tOHilhunmEjPs6hq3B14PR+JZtAVujC4KweDHPoYBP46zcv+e+mf0rnPIN3xPysDk3ekrNC60TjApWJfH+/xT8UVdDIBLo9GLKWxLZswPWiMgK0oSj6ITNfiV/ajJpqnyXpCfirb1/x3dOOmDTORJyNjN6yaSaqQiQ4zRWHfYv7pubTukbPit03EJtnPrM7ZlJJ6OreJ+qnSGg1zYNaUhs/vtryz+w/RauM0YnWeqyWjlJjCsNViQKd8jNh4uhrrE4MwS0/q3TElN8dgyNlRcpqYe4qlZfnnoIlJI2PmjlYToUBzEGezxxlvkAH8bpevc34QnqpnopCPWXsmGnfe6r3J9Q4EV5uy2Nm1ByIXYV97EmrWrpWWqML9zo3NWlds3rrGa+tnKsG0KCmc1cHOHu+7zX+KkGVyJNm3j0nfoGcUzta7KnMBPi0dGxQmvmLa6pTpn+p8WtQg2b9K81Q/OJhJUmX1ZN4YlWEJGj1RVUNK1lPwYinOtaG3S+iECcKIWF6ucJMibiuhUV+nJledqiYl86hnoRsYnvhtZtRkVv5zDl3iOZKEjj1aOB2Ig0Wd9CELi97VMosKZw6KFKTiG2WuYCyZZqDdBLyUMufJqOQ++RcapTUOu2ly+Q34oM2E+hYUkiHjF9rbJ8YP1vTvOuZbxpSdVFwL3WpS13qUpe61KUu9f/zuii4l3ourUjOoOcontjiwaVtRAXTkhJ1zgJXIf5GEpUeZ1FRZ4+aZpkan+LyLSrtVsR1hdnPpE2DPozkxsHDHkoyDRSW6lAe08+o9Yq0ahfFIm4bktNon7CVFR9pzvjPd0simlo8WhE1eHLr6F9WnD5XjC+L0pDB7WE+2MLnE1pCrDNz8R7pXkgHxTqGOyjcUf59Vs7aD5lYQfspSTLLjbATkwVfkmlsHZlHi7GJMFn0rNEehhfPCtzqTcb4vKQinUkHAGoqXrEM1d5j9jOmc8TWomf5/1Ilx+yeRtKuw306CTt23YqH9pwaB+h9T3yxFXpBEKVXa03qKvHLgvht9+KJdqNH3W1QMaHmwPxaPGdmKGzjlNGHHnMfSLsVNHZZP3rw5KZCDbOQLyor6WOzX96XeTySK7dkxisv6URotfjt8DO0tRA06MpzB8z9STjNIB7spkadBqxRqHEmh4had+TCTs5Xa7LVhM4AlSisFoY78wPFFqpH6L/ImF6uuV8Xz1wZGg5rISuEThKYzCge3PFG/LEgv2OmzLwV3vH4QrGPLWaC4U5erP29Pad9Q1pF1MExXwfxtWUWtY+sUBFil8DmZc3ERnLtQagBttf4dSasMrYXf2LWalHDhjvhpyYnXuLjuzXUkX/yB7/kZX0E4PP6EacDjQrU2mNIRDR9qngKct77VDFEx/284sOwprWeMVreH9fY0v24e/2Jv51forxjeu1RXmP3Gh4rhqLg/YM/+Tv+89u/4HP3IGsYzZXuiSgaJcqiIeNUxJB5TC3vrzYYMn8xfs7//FFQAn//6j0/f7rj5ebIxo38XN2xbia0yuwHUQ2n0dG4SEqKnBVv3l5j3lXEVRIVGlBtJE9GWLteYz5WIgTrTHMofvyp0EwmsL1c21gLs3T9/ZlqAu27kfmqIr1TTFtDex/wnSaVjkn+dy2nm4b6XnHcZekkjIp79ezzzgacy2ivqD9K98kOieYxYfvSZXIKd4wkKymCQlmxuEPm9JksxPZXFW/2L6XDlFnYpOKR/IHXW8t7FXW6+Hyz+NOh8Fm7JFzvUeGvI3rlyfvn/ZsoRACVpPORlZyvbKEa5HncSX6mAvit/GkHOYfnuY72kxxX92bEb5zMclhNuloJPx1QvSRaqpgIV52w0kNEH/bL/pM3K2Lr0HNi3sh9FdrzfV588OuIfbCEXaT7O4vfKvRRUi/PxAbt1eK1H680nc+Mdw3TzmCn4md98ExXjqxg3sDwY4/ZG6abfN5aCLuImjX+TOGZxQ9rJk0u7OdshIPbfxXovrHYUfHpT2rqh1w89OB2mu59IDYaPWfGu5qswZ0S0+7ctQAzQFiLcuyvErn6wfUGchamdTYZHiqUFq6zyuB3zxxc82RJVZb1koUkpIKi3KakStZwrkS9xSUSkBpwD+XztFzneSf0mdjC+CrQvLUML8sMSemWqQjDjSasNtg+XTy4l7rUpS51qUtd6lKXutRFwb3UcyWZXrRHjzr2Mu2OiFpq9uKrdRb6QXizKcu/QfyTvYdpJk8TbDfoM8/07N3VGvs4yjR7hNxWoujVFbnQD6gr7Ic9adNKQlnK5HWH8s9Z3mbwmAHmq5p024mK6IxQBMq3PPs4ovqJat8L/3SWJJywzotKZ3vF8DI/M2tnQMmEKUqO2T2Jwmq9MAmzhelG1AcnYUz4tcIdskypKhhvFfPW0H+W0XfiR9xueoapYp4tedasvtVkI89ji1oTK/HgoqA6JvHNPs2SjFOYjmaKJUmuJJKNQZJ9gibV8i3ZvH0gvdiRaydZ6Z0TVmRtyEPhDt+sSZXB+EhShTO7F3pBuBGJ0voIp4G8XaEmjzmMwjB2Bvcg70sNM+acgrZuUW8/kc8GzwMAACAASURBVKsd7uORtC7H/OkgCq6XroDuRZnNlUPFohAoBSmJevvmI6prJcFsmsnHYmzUCjVM6KbCfHwibzrUUJLMziqv0aRNi5oC+v5AnmbUqhV/cSE/hJdbDj9uCLUia0O24vPrPxdGJUD1IIl3yYB24hVLlXg6zVgOZ1YML0uC2HUi1Rk9atxJMX4lirLqDZtfSNqVmWSNjDfi1dZfi2L69c09f376TFSf24hee9LJooImrcriOFv8bILRCJu3ToSbjJrkms6VqCJKKba//8jkLcP7jmwMsT0rwc/sz+Y+EzrDfAd/uv2O/2z97wDY6LmopglDJqIw5P+bGtJnw2OqeRuuMCrxKaz5H9//Q65rMbnHrDi9rnhvt9zdHohJ8fBpI8zdHySJ/avDT/gX6fcZo6MPFVYlEoqtkxO9shNGZV5VexrtGZPDqMT/sf9iOZa/fHzJh8c1WmesTcyTZegLZaNwVdWk8UaukVzA4pN90ouvOmWFmjRqb1CNrIXue4XfKApMQZTWOYuaOiZAowM0jxFTeMM6ZOzTKPfcqGg+TOghEH+yZv2drPn9Twzqkyjz7QehPsjfM+4kr92/kg6S38ixrr9L6JAxQ8IdRDqbXlSStLf3zFcVOgh72Zw81ab4Sg8G8YOLrzJ2CT1KF8mU8zHfRtyTIbmMGcH2xeerYHxRKC5Vpv4o6xmVad5a5p3G+KIAljVWPWrma0kV0xGyg/rTc7cKRP3OqnBup0x1zEQnfkyQ+6p+CNjHAbOfZK9QimwrdOkyxdZh70+SVNk69MMRNUxCaDl3Ba2m/6zGrzSnz5V8BpTjPN/vujeE6+L5/rF0UNSosb1i+kLu5fR5pPrrlmSEdT1dW7p3mdCwqKqhrkCBbxXTXaLaTejv10x3cUkcREHeBHythRihMs16ZtzXmIfn/yQbXiXc1USvM80bhzvA6TOFLftPSAhX+k683WaW8zdtNfNVeesnOP04wtbTbSbWzcRVM5T7s5AWkmYu/td+knM2zo51O9E6WWM+afSPMk4npmgW3/jZKw7Q2kI08RW9d5zGiv6xxbaBEOvlnM830iUAMIOme3li2lb4t3U5ZkXWimQ1/WtFbA3Vk+zHK367uii4l7rUpS51qUtd6lKX+p2qi4J7qaXO085ZK+KLLeahKGc5Q4zkw4zqGuhaGEbydk1alwSyfpYc8MqhrJHHVw79dEJ5UfLU5EnrBv14JK/Fyxe2jUzx6+fvWqYfiZsG3XtUjJKk1dZLopnKoI8jTivCpsLuR+G1HueFlZuVQhktiVuzJ20aQkvhAZbniYAT+oGK4g1KFuwo/FB5kPjC2g+ZT/9QvLZmYS/KQ3SA9ZvAtDPsf2Jk6rWC9HLCFT/iqvIcTg0pSGKUEnQkqX5Oxmo/iv82W5i2mu5j5PRVS33/TD/Qoye2jrhyRKdpvttL/vngUeF8QFqS5ypLrivh3eZMXjvh08KSKKefTuKZnSO5dqhDjz6TF3KW8372zMYsSiigjsdysTS5a8jGoOdCKiivffbyxps1+kmoCfhAXNfijfZx4eDG6xV68ELNuNrCaUAde9JmhQ4/UHlzxjydxJs7e0msa9xC+FBPR3kOZyFG1GZF1krS9/YiuR+//AnHL0WdSla8lKEVr29cy2vNScl0vM2L/zVZQOfFjx3WCRVEcc8K0BA3ER0M3ZWoJPVdYH/TkR4rYm2o9jDeyLrqGlGGTr4izQai4o/+wTd81u7RKrG1I07J8TgdiVmzDw3vpw2/erohJs22GXElLk+rzN+8vSNnxWfbPVO0/O19S7ib4YMcdLLCAbWDYroSZY0Mv+xfcG1/DMBT6PDZ0BSawSE2dHpmZ3tWhYe60SOPscOoxJgch9TgkyWh+PnjnbyvqcJHQx4Mh75hniz2+0r8mGWw+s//5gv+PH6JHgxp51EHi+k14YVf5BdlE7rwb/P3DXETUU3k1csnPtzLNH36VFF/lPQn+wh5LT55d3j2TLfvMql6VhFjLf8TMkZR7j8Zmo+KeQuTi7TvnpVvVZpM6+9lmv38HprHRKgV7duReSfn2e1nVD9SvZc9zhwmcutYfXNkupO9z4waklrYyWZUTNeyv52pF2aWfckOZbq+VnR7SXjU5V5e/epI2NVko2je9piHA3nVko1i/YsnOT/uimTh6fc14wtZt2ameDZLUldjaN8rklXMO5khcCewp8z5Yvi1pPrZXuF3kuZnRvHZ1o+6rDHxWJ5nJupHUafN+OxnNQNc/UKoBsILV1R74XmfuxVZK+qPg+wXUyBrQ9w02MdhoRaomCEldD9Kt6mtYfJC9SksXDUGdBSVNXbSsUufj6TZUG8KzzsYSIq2m8kZrEnEpBn66tws4/XtE99HRf2LhtAJyzXZc4ph2RO6QmhQQJaEtbBL7L584rOttABetQdeVEeubc/GjNTac4wN30w3/Nm9dCXePW3Y1jOryjNuLR/SNdOtonlnFu9+1uLVzS6hR4VKimqvMEP5bEPugT/6k2/4T2//hj9svuO1lfVQkZjLNU1ZE1FENGNyeAwxa27Ncdl/zo/TKhU2csKpSMp6eYwprdFTdozZ8SFs+bW/wSfLlGWf72PFlCzfDzu+2V/TOY9SGb8yPLbSMe4PNeqXDacfJfHzOst0LVSh37YuCu6lLnWpS13qUpe61KV+p+qi4F7qN0shXsc5L1PnnFPN/Incj6iuEV9sP6LPpAWlSEWVVSmJgnccyKd+4ZhmU6b0xzL56iO50hC0MFcBfRhEEdRnyKwmdQ25sdh3j/I8TQ3WyOT+nCAlstXo4ywJWEC47siuTD3nTNhUoob0imRLispREdrM+lvFtPtBGpUv0D/km3D9kAkd1PcaexKuKOpZwVVRFDzfKUJT+H6rTN15Xu3kq/27pw1KgdLimVwyxgVBCMC0FXUprArdYaMxk/iiVTwnmTmhXMwRYxSpqVAxSsLPY+E+doU1mTO5cpjHI+HVTo61qKrGR9SpXJ9NSzaGvOswpwF9OJtM5XXVYRAfM5BXjTAmz+ulTC2bUQgJjBPmY2b+0YvlmPUcyatG/NY+YJ4G1OzJx5789Ws5npOHEMltTVxV2GNPrivU7Ekv5Nj141GUWWdR+xnGCTYr9P34PDEdgnj12vr5Z+sW5QN6K+SH0CrGFxm3l4nq6Vb+ni0L3DhuItW3Fr9J2AH6z5MwIEe9XLBcJXL1/DsgE8CxzvgHUSO+/PotX+6e+Df9V/Q/TuRvHWaWKe7HN6I+PrKl+mRQPzvyj66+5e81b1jpmY0eFhXV8OyHfUwdb2+ucCpwZfrltb/31/xP6k+xKvFxENnSrAK77Yn7LK9lPjlSlZldJnUJNSuoE//LN1/zL9TXsg5PFTkoTBeI+wq3m0Bl/EOD2T3LKDkqjJMUshQ0deHg6r9YL49JLtOdFP7B0jwJBcDMeUlx695U+HVhdA6iftpeAU7Uc4pSpSC2CaOg/bUjdJYP718u3NDujSjyMokvXFg7iLfznOIWVsLftCeZ0FadWmgIZ4+ySkLKqO8hdJrqkBlvFNX+mZDgVxo7ZupH6dqERlMfItloXGGHC7mlFl95zuiHPaG7JVd28c62HzXJKaatrL3qIOp6/fTMUVYho0PG9cIyrh+eUw31sfi8pxnjjNxfMUJMpMqiy54KonQqrejeZuYrcE9aOKZJfJoAq1G4yfYknSTtC1O5nCco6nIrFA47yFqvHxR+JZP353ugvlf4bel2lQ5HcrD6vrBXNwq/NuiQSShMktdUWeEOpSMxJ+bbFjIYq4mNsNfnuxXVe+kghesOFWvsG0nWzFaTXu6EAnQm6uRciBOGmz/9wD+++5Yr2/PCHZfzU2vPrTnS6Wmhd3wfrvnz/gv+rr8BYD835KSIf3TEHyuYNWGjICjG18++XnvUmEl4r/5jTd4E/osf/zn/0eoXALw0BzZ6RhfFsymtgcPa8i+7n8jzfAkfw4Z/+fhj/vLDK/Tao0xmzDW5K12dlScnTV17YtT4tx1hK/e0LbSKrOCPd2/4j7tf8Moc2ejElOGQHL60IMbsaJTnVg805og704rO2AdgzJqkFJrMrZ6IJe0OhfwJiyLsVGSlPJtqXOgopmQDjtnhs+X9esMvtq/4+fElVkf2c8u2ls+db/WOw2uL6gL0lnkn1Ib64bfXYy8K7qUudalLXepSl7rUpX6n6qLgXmoppRT25OXbb0ikXVFkfRTFNETyMEBMqKLYqZIWQ13JZHztUE8D2QobNd9cCYEBICZJmelq8d2GJAlk9Q88uDmLypsy/rrBHWYo6kQ8J5lpUWtUhuqbj2UaP6PGSdRdKGlqGr911CljDzPNgyU0huFVSXXaFVaoFuUndCWvG8lyB/HIJicqUvc2Ux0SvtNFVXk+d7HV+M3Zf5UJN4E0WT4cREnrmolxdvjRYryiepLXi5V4fAGyFSXHHaG9F5ahO2WmK0tsy7fjfSRVBref0PuBeLNCHSMqJbkGIMlioyesK7Qz5NqgjzNWqyXZTA0eZQyqbfHXLdXffSJdrclXG3JJodOnAcaJnBLolVw3q+XaFZEXa1BPx0WlTy+u0ccePQVSXfy1jQKjcO+P4gk+9qIuv7rFPMpJTKsGDUXZF1WenBc/MYgyrYaJtG6hrSXp68xlLutQVY48e1JXYfaFDjEHoXFs5FqERljHtZfkqflKJr3JYJ/kd8JNYPgyYDaePtSkJlHfDEyninolytmL9YDRicpEbpuTEAB04tunHQ/fi+r89rDhuhuothPzQ0NshGmaXGb7V3J+fPHzTYeavznd8d14RaXDb3hwfTbs7MC6IBwMmZ0J3Mc1hyhq8Tu/5YvukSff8t23NyJfRMWT6qD4zqu9QoXSKShqsxoMfv88n+wOso7TaDCjIg6iotEk1LfyWskKJzMasEeFskDfoNUP1rMGsvB/s1ELZaT9lJfH9K/Ej14/yGueVUMzqsUHH5uMOyrGWwVa6AUqCmf13Olxe2ECh1bu3YUj7WVaH4pnNkN1En9pbBTuKCrm6js5nvFGOjnVE9T3itAhk/KteiZZxEy1j5g+YCtN93YkG4U5zQuLWuUsSYGHAX00MpsQk3QzCge3ezMRG4OZLOOVpn6K6KAxU6Iu/OMJ6eLUDx5z8qiUia3D9PMzgaRyJKvlGLKQbfQ4k2u3vJY7SpdnuHWYAao9+FTYysVyX+2zdCtey7VSuXiUPQvZwCfpZPmtXIdUFYVdZ9oPcsx+JX5ldxDiSNbyetlAdM8EjzPH16807iSc01SpxV/r9p5Um3IutfiF97MkUpaOkjmVuYsQ5TOnn6B0i/RYFO6HPW7bkCrHT68+8l9e/yt+5h64M5ZayfNoFEZp+jRzn2buk+WQWj47fxAAfxVf0W1HPr/aM11bHoeGcagIs0EXjnKaDZwqpuuEymB6TX4x4VTkQ5DPr/u4plGex9jxFDvWZmSjB27tcenImOJ1fZpbKhuYjCOcHNQJ1ZfEwaCxe8P4SpFng5lFATeDWvzifpv4brjin6s/5lgu9CnUfJo67ke555XKtNbzojmytRNbK/MDU7KLyvt23KJVptKBWkdSVhyCPN/R12VtmOXYb+qe1ni0ytQ6LM8JMiswJcv9vGJOhnfDhnf7DdMo1yIcHbrXqFNF9SBcb3d87iL8NnVRcC91qUtd6lKXutSlLvU7VRcF9//jpZT6CvjvgVeIzvLPcs7/rVLqBvgfgJ8AvwL+65zzw//Tc+Wcse/3pG1H3NTYvahFqXWkxmJyRiklStqxB2tksh3EL/l4gKsNDCOsWuGgak3aiupznpJPuxXm/SO5coS7DfahXxTcbA16P6Bqi/KOsHa4+0E8vo0s1+QMyWmhJjiLGiZUV5NXzUIJcG8eoXLCkG0degqYOYE2i0obasnWjo0iOlEqQiPq0Dnpxu+E1XjOm1dRY7z44ZpP8kTz1tB92/+f7L1Jr21Zup71jGoWq9zlOSdORGRGZt5rXxsuErSQkBASP4AePUQDyR06SDRA/AJa/ABLNGjQQQIJJDctIRkjuUFhWdyU73VmOjMiTrmrVc5iVDS+seaKBCTfbNjWDdYnhU5EnLnXmmuOMcfa8x3v97x01wv0UpFqsC+WPA9078WPGF8dMSaRB+FMdq+FsQoQRSjH7oRxWD9n+mtNmElrfvsQzxQFn4i1Ru961O6AHb3winOe1Fn7aYP/4hq7H9HHUdQOq4VH3BWPYEriiV6vRf2xBr2R5LP0WiCKsV6gtwblA/FqjnnYEq+Xkj5WuLO5NrBeFDX/KElIbY152qMXhaNstdA5akdWini/Rm+OMseeyvkUxSvOHO79C3k5R+0OpNsrin1LfNpKCSXBmknRzdZAKo/3rhVGM8WP6yzERO4H8rWMhe3Ff93fSVe9GYQ5GedpYiTX6555O9C4QPM6sK46rquOpet5VVqmE4q/9/mP+Gr+QkLRmECtA2Gp6W6Lmp40j4eZiNBZ1E6/kCz4/rak3B0V2iuqd45/EH8BSaFGRa5+YNA2GXzRI1zCtIG2HRkGiz8WrEMG00RcFSBo3LOhelHE2mGKV5VUuvIfS1d+K3xJHZnS107Z9Sgjc/FOYQ/Q3yuqjZoOirWoq82jeM91ECJBsQ1T7TLDlcIdit8Z8d+6Qyw+W4i1EY7wWvzQ9ZOwgsflmYeaOkX7OaOCqHv2KF7Vaie7ICCcWHsUtbbaJ2YfRvZf1dQvUe77UrHRhEbjxsywEqXLL9Wk8lYbmYumh+YlM38/cnxVEWaKUBTl+XuP3QvL2R4jdtsTZxWERJ6JCqVedigg3V+JqqiU8KCdnXZa8txhDwF7CKjcoH0WkkDKtB9l7a0WTnaqPh2Iyxr9fECFRvob5uK1D6sG03mSMpIOac66VTpRZaxChUzzLOtHckJIiNWZBpGcwutcEs5EhW0eM355Xg+bB1H/579T2F56FZKFxXeweCf38vanZeemKurvKOSCWKtJLb77317wtzPCzDD/9og+euKqLrsoovap5y15MSPPavmeWVToTnamVOm10JuD7AZdLVG9h5ctyllSUwnVBYQn/u4J1JynYcavx1ckNO9ix/IHrfl9NrwLN/wfx5/yq8M9T8OMp26Gj3INd/uWlDS//XyNUuB7CzuHHtSU9GZH+f6wO6FjqAD7q4q/892/wvYg45WzYtzWuOVATpo4GkwVadqRVVvG3UTmbuTjbsHucY46GmbvjRA/Tt8Xvex2jH2NHpWQHLT4nAsynu3PNH//H/41/r7NqE6DQdLSfphm5jW6DfzSv0XZRN6XH24S2sm6ml+E75tdQh8M6SpAEs63OjGtrzyUBNDq0QhJ6H4U7nW5PlQJvZPE0Gwz+nokDobmtxVVGYrls6zLALOPiWEt27U/aHX4S9dFwf2rXwH4T3POfxP4N4H/WCn1N4H/HPi7Oec/Bv5u+e9LXepSl7rUpS51qR99XRTcv+KVc34PvC//vlNK/RL4Evj3gH+nHPbfAP8z8J/9s14v3iwIy4rQmomz6JeV5IArRfjyBlJGLRvUP/4t+v5WfjAlsFY8lG2DSolUO8zDVjircoKiwD1upeO1tsTWYvZGVA4gz2rUcUANoCorDNOQUIcO/81dOR+L9kXxu1titj2plg7bEyUAEMqDUhAS421DaLT4L5uTeiVq0XAtKkb1LIpOcnD82bkDWQ8OvxC/bjJKvHs7GNclU16DX0lmvfFge8VhkUhbB6XjVSkwhYmbmkRQGt0r0ExZ3tlSlD1RSZqHTH+rCI1h9iCPr+2HgeplIF7PMEajjr3wYOtzFnyuHPZFVPHsDGFdY/qAe9jL8UC8W5HWLWhF9diRirppPm8mxRSA7R7mM+lOXs9JrRWubkkFU4VFizWkq6VQMIyGupq8dPownL20WnqHVRDl68Sq1MeRsG5RIZGbShSYnFHDePZn+0BaL9AvO/HgvuzI6wWqH8m6jIWVXHrdeSg83XRbYzaaVPyI1S5B1sKs/cWB61nH69mOt+2Wn7YPANyYA0sjSlKfHHM9ElFUKvJSJPdfdm957lta6/l+t2bf1VQuMHrL2JfdhqChN1QPhiaK39QeFG5/5rNe/3nEt4rDlxrtHXGWMUc5drySwTBHNfGa/TqTtSPsZ+RZpirzR2XQY4XbQaXPcAczSNoewPJ3onCGVuby4l2gvzaE2TmNyQxQ7TM6ipLafBYVr3qRjHiQznrTQ96fPewqIupvYbbqmJl/FOXli/91R/e6JVtRjd1ePtfsszBlu9HgF0I+0DGjPQxXxTf8IqqiSqIKn7yfKmTm7+S9xqUQGtqniNsFVEy0jwEzRMy2SENWU3/X4V8JTeN4Z3AH8eqe0unGNcw/ZA6vNWzPFIPZx1gUS9AxoWLCfN6jbhbite2DJPVR0tNeXUu6Yx/AWcLNHLPtC22mKuMlNJb+VY1KonJWG89w6xiu6+ka2pdB+M5Gka7mZAW6qc9r5nWL6gN216NChJSIq5bUOOxH8ZCG+Q3GJ2xnmL8XVcwMMo4nBRdEhW+e5PrHClwn5/jD1MaTIuvnivpJuNDJKfqr4g1dyNo1+yQqebKw/D4SZppYyDPj3VxSGJ3GHEYYRljWmONIXJUExJiEp641et+hG3umUmzOxBh17CEl0rJBpxWpqTAfHvHfvAbAdQPEiB7hl3/+Jf/k4x1htFSNZzye101bB1wVCMHgO4d6cridntLOUICF+rPC7uV7Qwe4/bOACnJMfytG72RErVYJwjvL4bf3qPJWqcm4DDxa4db2MFxnBtvy6NfT+aikCE2mipKauP514vBGY8p0Nr0onOOLAsRLXu2T8IO3cn8tf6uwR0uYSdoiHlKlcQ922rGptorQWvI8k+tE9WzQXuEXCVVQQSrI91NYJ0nyexZ+sQrQfpT1uR8qqq3Mp6yEddwtDO7ZCH8YqD5ZYp0xnSJVGfO+pXnKqCjkE4DZQ2JYlqTPlGleoNpGhqsfTNS/ZF1+wf0RlVLqG+BfB/4B8Lr88gvwAbEw/H/9zN8C/hZAw+yf/0le6lKXutSlLnWpS/1zrssvuD+SUkotgP8e+E9yzlulzoaVnHNW6pQh8/uVc/7bwN8GWLv73N+36Jg5vLaEVn7hjZVi/j4TlzVh5konsKaqK9KJuarFh5srS7hfoXwSP1hdTZ28ykfi7RI1BtQQ0JsDZi5KxIl+gNaSN+4sflXjtiOpsqTV9TnJLGZUSCSnwWmUd5iXoyRaqdMxibSYoYaR8GpOcnrygJ2eXHMbiTaTt7ZkpiuGmyxd4cWjpB8cYZ7IWjq7zZAZr0XBHdbna5xMxXAtClt/l0VtezMym4nK0ncVShnswpOiIs8UobOST178Sdlk+nsgQbVRdK/k6d4vFF0qXjrdFA9jRIUKZpUopEZNLGH/eoXpg3BBP2+pQmK8n2OekngAAb0fSHdu8tIqH/FXDcleY44/iIy5WRPnNboPZGdItcF+3k3jpQ4d4c0V5jiSnSHnRlLkfMA8ls5ZH6CuCK9W6M5jPgmzUh178rJQL6xG9wE9BvFu50y8XZC1wv3moxxzvZo6x7NS0tQeJIGN9mY6n1w58eY6C9ag95JNn0pqWmjF7/nmp4/8229+xU/qR5a648qck7oAXuKclzjjV/0rFmbg0Yvk+lT+/Hu/+iPa2cCfvXuDsYn+oaW3GftkMSffsBVCgD0UP6LPhEax+l3Az2RM248DdmEx3hJqxe6nopTGWk1+StMXmocBtxF1t3kQf+zJ12g7UU1iA24nCuxJxT01MY8rRXKa9imyf2OYv4/M30ee/1o9Zdy7fWb5bWD3lUVF8eeufpsYFwr3QyIBQBa+rO1EQTKDsGdP5Q4yXslq6qeBbDTDjXhPy5mRFSy+G+leiVrUPghfNvbls/uzJzfW4gettoVlXc7H9sK2rR8HzH5ADQHUglRp/I2sUW4zkBtHchqVM9VOSBDNc566+1Hijc9W/uleV/hW07xEbPks7sOONKtJyxa97QivpDv+5AuFos6OkdxY/LJQYd4u0SFjjnKf9jcV7hDRQ8YdA7E2hHlR/ovf174I8zo3FcpHUiNkhFwn0mIxnXO4brG7AXXsSbcrIS0c/JmEk8E+d7Qhc3hbo5Io71MKH6LWnlIVs4LmWUguy289fikDPlwZ+VkrtIQTIze2MrdkrIXEkJzMh3qTyVbUOONP3mtN9STXIc4rtC1M1esWPZx34cavrnFPx8L4zYTrGXqIDD+7K69jUGmFHkW5dKeekKsl5lAUbqNRMTL7nPC/cXSvJfFurBy50A9U0KRHR9dkZr8z+K8j9ZMWT365Pqliou6kWnZDVMqYIWFKj4QZE/2tI9eyC+JbxfpXif5GTwmIphd6y3AlXufTOFQbNfUAZH0iVygWv8uYMREahenPSmf9LGuESuJzNl7u1ZwzzcfiYw4NsbaopPALNfWfnEguIMznfC9/l5Vh/r2sNabXjKtyn+3KOR+crCW5UDHqMw/eHuX83U5oJipD+70VWtGu7OaNoAfpNUnu/HlnnxPdrUy+wyth3MdGob2mfYyYPmKGCwf3/5ellHLIL7f/bc75fyj/+6NS6ovy918An/5lnd+lLnWpS13qUpe61L/Iuii4f8VLiVT7XwO/zDn/Vz/4q/8J+A+B/7L8+T/+M1/MWsa1odpGwlzhTmkoWtKodt/MxOezUeiQUfPZ6QGM1DjMridHgwLCusb5SK7Pylk20smvtCIvauxDxn7eFsW2eCidxnhHbqy87754RudOVBHAdlEUvJAYbsXrWYUkncwnxWqUtLS4XBJag+kTWRliI8k7AN1Mk10gVQm0eP7CLJHRsCmcxU4x3kbMURMbeWLWA9SbRHcnz4f1RpJ9Yl34uRn8TUSNhlwSkqyLzJuRXVKETrqF0ZLrrgqjNDdZOui1+CxVLH62R8VwU16ng2ojCW7ZavyqwlYG+3jAvylJXY0Rz7RWf/PlGgAAIABJREFU4o0ePeboJ78rILSJnDGHAX8zwxw9KiTsppvGgpyhcDfJWZidfcS/WeMeBGTqf3KH7oIoLCERrlqyVlS/+UReFstLJcp6NppUW/LbW+nuVmA3Mr5h3WCOnnAlRAhz9KTaEmuDuReqg+pGUX1DINeGPG9Jy0YU/eJHPKn22WhhNDc1PG3I64XwnRH1FwUz50lZ8X684nf5lkZ79oXtuA0tH/sl375c4Wxke2iIwRAGgzJl1j9XxIeWPM9wVDRaPGlCIDjfVm53Tu5qnjMk2RVpJwqHw/YRd4gkIzzLm18OPP6rDY1Ygulv5HVjK74+HRCyxw/8kSAe1cNC4VfiE0SLola/FOW1dCPrQRTe7t7hukS9SYxFynP7TH9tWH4XeP7jwje20iW/+1LuZTOIl3n3lcH0ovzVL5lhrUi2vM4hi4c+ZtlRqS3ZKtw+EuuyI6HA7QPDjROCQMzYnSfWmlAUZePFdzteCbc2NIrZp8i40qgStxQazeJbUYhJkMt72ZeBcFV2G3xEf3rGWU2qxLPffvYcvqhwXUlAPGjskHE7GScdMnbIJKvEJwtCbkmJbAxKa3TvxbceM9qXnZ/tkVw7/K3cA35mURnZcSj3lz1GklXMf/lJeh+alvbbLVkp/P1JeZW+grSeYR53mBAJ90v0tiO31TSfU13Wz0UrO2S5AqsnioI9eOKyYVyfd7kQ66Yk+CFzqrsTpbzdJXQUP7UkO5ZjvFyTSEmF6wpHN58Z4mRR78gyb+yQ6dcGHfOk7mufJo++2fWkeS3/z+kz/SBnYq0xs0qUuJTQQyIuKtm9Q1Th5BR2F/HrSqgxrSPNa+Ffg1ynyuEOEbeVa+IXmea9ZlzJ66Ra1EV7EEW6+WhweyGdTIQEL7zlaie+7WqfMH1Rb4tUOFxbkoHZJ0+1GTm+baleAtlUHN6ePe7JyX27/C5yfCUMdbeH8YR6H2V8lJd7RN4rY/ee41tZx6MT1bzayRydvevpXjc0D0KaOI1xtRc1tP0kO4L9nSTz6aEo7pV8LhVVUWMz9UtmXMnPAFOaYPMoOxx+IeuP7aD5XO7BuWJcyrjbjjONIwpRAuRnbVd2mAJTz0usFXXZlelu5b/bz2m6T7JWvzcP/7J1+QX3r379W8B/APwjpdT/Wf7ff4H8YvvfKaX+I+C3wL//L+n8LnWpS13qUpe61KX+hdblF9y/4pVz/l/4Pb3o9+rf/YNeSyu6O01/JQk+oSlKzDGz+2lbmJYQnaXeJsZv7rDP4vUZr2vqKCqqX1rcIeDvZtj9OHEN46phuKsxXUSHLAzcjxtwdnoCHm8aKsSTafpIrixZa9xzJ8obEFc141VF/WHHcFthD57+iwX2EDDdCcLpSDOHX1hUEs9X1pLR3b8qXrmlp248o3GTYpurDL08yQP4q4QeNG6rGe4i1dZgBvGrlUAZ2gd5PN19YxiMwu0U/gq0Szgjf9e4QEyKugoMqcEtB/yhIjeJePIzJulEz4mJE2o78RyePJTJgV+Kh8zuPKRMqgy5cZijKBbJatTgieuWvBCmse68sHIbV8arwe09cVGjx0hqrXgLl80UQq6Pw6TM6pDAKMwQ8ctKfLKI4q72URThcUA1lqw0eb04K8FAnIkKrIfAeDfDPfWkmTurONuBuG7wS4fbeVJlsC8dal4RT3zf948wa8j3V+IhXrWk2mI/74g34ovNRuPe9+LL1Vr8uE0talvx79ohY3rNt5+vedjP2e8btM5kIHZlSRx14Vlq/HVEHzU6KNqNmsYi1uJ50146xfUgbNGsoH2QOXZ8pQlzUUaqraiPOoj6efKUZ6NQPpGsFaUwwvF1RbJg+hOfVRFb8eLWL+IZ9TNJDdPhxNMt3rlRVFgVmRKNJp/lUd7b7QPNsxaP7ZBZferY/kxgp7bP+Lkm1pp6UzzaSVR/VwgJ0YlSW28ysZb3zkZUvBNFoX4JuH0gOU2qDKYwTN1TTywKU2wNqdLESriqbufFB+vUuct7n+luNG4v/GkzZo73htlDpH4Sv3jWNXbTobwopNkYzH5E96P43EF2EVYyL1VM6AB+YWU3qlynepuLYibXuF8blt+OmD5MKmm4atBjxGw60kL86amxhKtmYofH9RzzsCFWa5r3e6wxoCG2TrjfQNZzYbi2kuyYqqJK9gMqFYa0VtOuiApz1KFHJVk7Y3NS08WnG2cOe+ghZ+xLR5xXk6fXbUdyrSYlbPEucXhtcHtRx2XdENX9hIZNVuG6TJibaY4tPkTcPrL9icMdZKz7G409nP3QsRKGcKgVOsp1rLeJWCmqrdyDsTFko9E+EW7kOmif0F2QtQpJN7SHgN6PpJkkwcVFJTtET3Kdx6sat4/oPmBqI0SL/QmgXKg1V3PUvidZxewh0bzA5hea2HDezXstSq2KMF5n2g/iczfD2beqkijTWcv9LZ8nke3ZS2r6jO0S9acOrMYe5e+rfWLcF+X+IOro6tso60FRQpM5J2jqIPNQBaH2JKtodiPmMLL8jVzD4bah2st4miHh3r+gxyUohV/KzVM/DdijRaWK/loRWkX9LCzd07hrD2FRmMcLYb7HCqqd+PlP64fJoMZMmonvV0VZa06/fQzXcm1igjDPEzPZ7WXnSdaWUy+CqO+hEeW2efTowqt2B4ufm6nXRE6AaXfkD6mLB/dSl7rUpS51qUtd6lI/qroouJf6QSmGNaDlSfak+hxei3riF9KxPX+fGdYaPRriK1HOYqs5fj2fPHfDlcMeIuNVTVO68pOVrtTulUP7jNsnzKHl+PVienoLrYbrmubDgfFuhrctsTGiFj0Wn2VVFIXipRyua2wfCXM7EQBO6kVsNCrBuNCEtvjEytO2dZF5OzA8tphBYXpF7DVhFSdfrPKS7T1eJXIlGfaphe5aT0+u40qTlXSOpqtIrhN26VE6EQr9YNV0KJXJWZHv5efCYMFBPr2XUmSTRempVfG7yZP9yZuVtaLeQH9jaWPGHgN+6ch3s0nBVTGjupF0Nycua/zCYY3GftyQT9zZkMlGE1pD/eEg6maMorQVBUQNI+HNFSom/LLC7Ubx/B7OjOBkNeG6RYWMfTpAytiXntS6SaFMrcVuBvFN9x6VMnp3JNfLM2EjJPGGlm5o03sYPdooxruSmf6LL9D7kThzaKUk0ShmUWnj5AYnLRqyMdPTe7qRlKOTT9d2ifpZE38141jNcJ3CrxMqghtPFI7iSasydiPpdypC/czkBc9GTd3Cbld4yHNYfJcmysBwm6k2hQ1ZuqWbTcQMkXFlpzH1KyeqSMqopKj2kXGviKWbXntRXdoH8f+1T4m+pISdFdPI7idCPli8i7h9wM9t6XaWq+F2UfyRWv7d7TzjVYX2aWJn+rkma1Gf24fE/q2BDP2dI1bnTvlUlOjDzGDGhB0SKukzf3dMhNbQvNsJ43gMZFMz3rdnekfKDFcWM2b8TJNNhYrQ3eipo19l6cJPSRRDd8h0d5pxqUlW1P3maYQEaSbK1XDXUm2EoawHuS+OP1lSP41kLSSJ5Z+/8Phv3LD8dpiuj0qa/sowLsXvq+Np3BTVJ2Gv9l8ucc8dcS07CKIIl/lXmM2ptaSf3JGtIi5q7Odd4YDP6d8Wr/zCoH3Gr9ckJ2tuuGpRsRZf+2n+VpLcGJY1upW5rnxkuCnM3VZ2p9oPHVRCWVDdKLsCRRUer2uq5wGnFP2dIyfZfTBjnvzHvtUExC85rGTuzT8mOcdC/NC+jNcgvtBYS2d/mJ83EZuXjD0m+muDCgrjhWMaa0N3L3N+9sETZ9JX0d9WLP58Q27spNTK53LYl15U2SGQtcbPLPVjP3lMU6WJraEeE+boyVqTVy3m/ROxJDLq4wgx4XaRahM4vK2oH2G4OafuCSFH/LEqKmFU59//vjitC+O6dPc/RfSYiK3BFPKDyuIJzk5LKuPOE2tD8zgSahmv9rNn/2VFaNS0S2o62Tk53V9Zy7k1z5JKlrWiv6uonBaqBOA2Ggf4lSNWmni1IDbi9Tal30CFJL0qa+FEh5l4rlUZexCf7LiW79Jk5Xs/Ofnc5rRb1ZZkulG+l6ZjOB8Dogq7fSa2he6ihKpyYvdWu8y4lN8RdJQdg2TAvfRTCmls59RPHnsMsoOxcLS/fmL4+oo/tC4K7qUudalLXepSl7rUpX5UdVFwL3UuVbwzj9JteeJrQlExSldsdycpI9VOT96s3ZdW/IJa0ol2X1raB021ixx+Lk9efqHxM3kClKdXTaoWHF8ZVCgpU4dMbDX9mzmxPUEZJXXIz9vpXKpdovt6hfaZMNdUmxE/t1OHsL+qp3Sd5iXiF4owE85tWpXu2tGw27dcvd2y3bb43IgPdlTixQXsXuPfeCgqa/eFdOL6paIq/kTfKqpDJlXy+akSKSm+efVCKnJWzoq5G0lZUS8DYzLkrKhsYF//gG6gMsNLQ9IQ55lsDO1HRT8/q4aH10a6m68szirGlSE5RbU9JXX5ohaJmmu7iH06SHZ9UR+r754J9yvMUHxqi4awcJgfKEf+7bVcg02HX1XE2hAbUe6xxW/XaLJXVMeBNKvwVzWmtuJxLMqZ8klIDArCVY05eMLrNbEx2N2Zw2sORd0dhLmrgqhjJ3VNe0O6bsmlo324ccx/dyC37pxgl4WjnBpLXFWY7ShkjjGQm6Jej4nmKQnVQ0t6lS2JXFPHdAf9XUIHhdsrVJB7I1VnP3RoxadGgvYxsftaztPPxesm45XF73YQ2ka1SxPH8kQbEH5sUbCLUhYaTfuYJrU4OiVeay3+UNsn5p8UfqYmBW5cSVa9JU/jKilUcXqvbBXueUSHRF3Yq3pMjNf1pNKdvHphfuKMymcNMzWde/tZPkd/I2l8w1rhg6HeJJqnUzSfqNvd1ytUhjizxFrIB81nUaGG+xmq1hzvjXh4HxW5Et+hPZSUsoXGHkWp9gtV1hDp7h8Xp7nhCI34WOvHgVRpxqsa01j8Qr7m/ExjD0b8s33AX7dlDhvGpZ7eK7RqShXUfWb/VcXqt8Pk6fRzQ/ijNdkoTJeoNgl9HDE5T8egFH5hqZ5GzGEgLVrCVU13X01s4KxV8RQnslLMf7clzipUSIyF3Zu1mkgTqTKEhaN5vy+fOU1jqn0mLBzMHe7pSFq1kq5YlOVsFWbXM9w2orjONGYUKszJZ60rUcjHpdwLKpdxnRncodAhAmSdmX0KZKvYf+Gm9LoTIUHFzLjU2E52NLKRuRmd7HgAwjXthb6S7yviqpb71mpUoe74pSPOLG47QlDoXY/KM/HiToo7oDLay25h/dARrSZ9fTft5vnrlurdi/RhWGE21zoRZpqxBIdVWwgNpJm8ptsLKcAez9x0lHhsVZQdsPbdQXjDQ8S+iCfYHC39XUNYVHLN+0CsHVHpiZqSrcZ1wvWtdolxoWWnziJzCCESmD7j54rF957Yiic+tIbKn9XiMHfYY8SvivqdQR89qfizx5uGZGSnRwfx9PoFtJ+EAQ0wXCtmH/OURBhr8c7Ghkl5VQF0IWM0D0INGtdCFOpeqemY+kmIDfYoCnRyZ6IGlGTE50ysznSIWAlhZyjkkOqpx6+qaf1w25H+m2uSVX/wL6wXBfdSl7rUpS51qUtd6lI/qroouJeaSiV5qksW4vqsZq1/Hdn8zBAWokSlMmtipTi+EtUw1pAqYdcdXpsptWVcmen4YaVJTrxfZDAmE0Y9+XIAxoWifYLjK4dKwvernwK8MvTXcszig+Sau11ED4m0MKAKm7f4eMJMM6wM3WuFn1uMz4xXEBaJ+bVIcMu2R5fH2E2ck5YR92ywT2d/bfe1p5qPjE+NMAWfDe1HyYg/ddfGWrxpwi1UqI0jNYlt33DTilJ13RxpjSdmxfvjmk3X0B1rvDOMRzEzaZdEgE6KXCWwmdSLEtI8nNU+vwK0EtLEIOchyTPnBKB0NSe0hqwqTB8J1zPCzKInrypoH9EPO3Jb0d836JDRvZquYTYa9yi+Wrf3ZKOpXsRTZvfyaK+CpXoeitIn/kuVMqk+e3ntri9d3wb3Ih30ygvz9MQW1buOeLMQ76FR0g2vNXo/YNYlm77zkni29YSrBjOIMmw68fMBmKNH7zpyZYmVJBjpXlLWTup+/dChckty4gWPjaZ+UnSvMrE+XR+FPWiyzsWjlmk/K5nXRcG1feG49tINXG35f/FMms9qIiEsf9tzeFuLatcZbH/uCnabgeQMs5AZ1jX7Lw0qwuzTKSEp4/aZ9jnh9pHkFH5eKAiFD6kq2QExzwm782gfSU5U6tN7NZ/6SWXUfUBvArxaijpcfMOL95FhZdg3ogwmp0iNeFJPOwAql/fN4ttPS8X1X0TsMaHKro5fWvHQtWrqQG8+DWSrCauSLrb36JjxswqVoH4O+KXB7dRENgChGzQPnnFt6a+F7hAaTbU/3+8qFbWzls+SE+ghoova136W62b6jF9VoiAdxKM4FpX35Escnfimh7WkLo0ri5/LjkZoFX6mxSOZzuxbcp643ePdHDMm/MqRK4197IiVJlkmpbx+DphB3j/MLcSM+7ghtzWxEjUrVcINDnOLGSJZi+pdbcZp3PUxYvYjaAjLmly7Qq0IVNuS5tU6CJFUKewhon1iXJ3TyU5le+nwX//TQH9jxGtp1XRd4HT+wrWtt4lQK65+dU4AHG4cOmSGK0X9cmLGCvu4OpRLZTWqD6iUcGUMlY/k1kzUneG2Rg2JMJedJXPsqZ564RmfPvsYhI5hNdXLIJSJyghR4rH0ftzOCK9WpOqU+pXwc8P8fWL/1VnjU7kokK3M+/WvM+4QpS8E8ShX+4Qp/SJ+3eB2ReI88ZgXxZiqRGFPtVyn5JQo0YDZdMS/fs3s24Pwk7+co6KsQ6f1I+zK91mW802VfN+5Y2B4Kw0Zdic7MeY4EmYG5aPsnB2HSbp028xwXUt/wSFSb4SEpAOy+4Iwaa9+1dPfVuLlbWS83YFpTYi13FvZQvMs84isCbMzVQbku716TIS9qMCne/j0XennJdFukHstVor200hYVpOPOSwqYS3PLMlpZv/4E+mbW9IPqDx/2boouJe61KUudalLXepSl/pR1UXBvdS5siQSub0w7XyJOn/6E1FkxU+ocInJo9XdFTZtc+r415JwchRVq3t15lm6o/ic7FF8j26v6O7Fr9M8yRNwf6PQQbKozZgxfWbz84r+5vyk6GdafEU7OL520vF8rIiVws1Luo9WDGtFaApb1irGdcLedRhdVJ9oiEkxBks+GtQ8kA4avVGT6kxU+MGKHyiIAhBmUG/OvM/khB8seekQbwLVvKgH5ZFck4lZsbIDqZXoqbbyHIeKqipd3n9xRf6iRzURVCYnRbaZ3c/U1HXu9tLZbwZhWh7eGLSH2eeAL135IWX0wkpKz1LjOoMesvBWS2b68JMbTB8w3cD41ZWkAR0jYemmxC8VEv5mJrngbfFPJrCHcFZMx0T3uqV6GUm1IbaaMK/FIzkWH+F1i59bqo0kPmUtHlk9RvxKjN7ZauHt7o8MX19j96N0Uxs1KWSxsYS5pd0Nooi5Odkqjl/N0MX/N3vYE+6WonwcRd1J85r8g1QnHURlbJ5EidCjeOqaR0V/J69jepmn/Z2ot7ERtdLt8uRZzBpROwZwh8SwFoUuOcXy+3KdV0IOSQ4Ob2vGpabeJGKrzz7ZDP19OzGBtZd7bbhSmLFoEKlwZksXey4JZe4oSubp/lUJYqWFE/3Yo8eEX9oz/aAyolwPEfPxhTxvwRT/cVFZ6oeRw6t26uROrsz1fPbkuX0SgslKo6N4VpNRNJ878VsDpk8cXztCo3BHWVvizJKVeLcB3E7jl4bmMYqytjCyMzPmye97vBNqSmy1KGFH4WYO15ZqIyc9XFvxkz5HhmshuGSjxMO5E09w/0qUrNhYGbcuYY/SBX9SgkUxy/iFmVKg/EwxrM/q0amDPDmInaa/a2g+53N6FNLdn5Xs9ITWopa13H+JSRGsdpqxMTSfimL5dkHzfebws7XcK0B/V5GskuuVReUTL6nGF/9xMgodKponL77eEx9378mF3RsbS/jyinEp9Al3zLKrtdAT23jaCSgEHe0z7iBe69NYuF1Et4VwEWH9T44c37aYo2e4PfcSnNS5WEO9FUKJHvPkCY61xiwqzNMBcma4lTXDHgK5eHBVUT61l0S8+GqN7j3dl0vqx6KUX9dkq2jejaLmag26eF8LGzs5Rfe6EaqLyoxLQ1aK6pAmikK24kvVXjr7jc9CfmjOOwmzzwE/11RPIyjxCBMSKiX6LxfTNRyuDfUG3FbmpnsZCMtqol5wWxMroQ6RReVsHnqOb9vpOs8/jITG4I5h2hGxXSQ6zXAj99e8j6iQ8VeNKLi5RoWM05pxXeaiVpghkZzh8MoKB3crKZyn+912MC7d9J2ejfC8q12aKC77pZqIEqkS77iOhbpQbg0VZD0a5+V+H8HP1MTHBiE2ZC09FFkrISx0njBvcRtZXLo3ZceuT4S5pv/5HbHR07X5Q+qi4F7qUpe61KUudalLXepHVRcF91LnUtItrmLGjOcn12xKTncvvprmQXxa9pBQ5fHt9HSXjag9E8PyB2lKegRViwJabeRPv5QnueZJjklVSYr5HGnf7Rlez4m1sP2ax6K8Nop6k7BdBAzuIN3P41KRbMnpruXpsHmUP4+vhTG7XvQsavGL+WgYoyGmhPKaZj7SJUWnHKYvfs1PFhWF+xhmTPnr9Uti9uHUYiq+yvpRPnS8ghQ1Xyy3XFXiwV3ZgYRiKNJwbQK7oSZmxbyW19mtAniNtgljE75z5CoTluM0RHFpMXuNOj0Re1FLhisz8Uez1tS7iA6iKNQPI9mK8jOuz7d8+yGQblcon7Cd+LySUaS2HJNF8as+HYitPY/jECbvbGxdudaRUJJ7xB8dphSlWGsZs4eIHgN6FAUiNmd+6HhViyrjLNVTx3jTigqmmdJsVJCkq+HNAtMFUfUaQ2g09Q/oD3HmqP/JRzCavJhJSpQ9P8snJzsEsVGSsvSSaZ4Tfq6mBLnYiDqx/K10C4dW6B/rX3uSK+PsDNUuYfvM7PselLSIh/rss2yekyT2tKJ+u4MwQsNcT4pZMopsROmwB+GFto9FASuqRfNcmKNDImthSyan5Dq3cg+6nS++tRPxo8Xtxbd5GjtzGKEtrNEQSKtW+MSF5CAHCROTolTaQ2bxITCsDHY4KdyR2BgW70aMrxjnCvP/yIr3K0P9Esm3hvbBY3rxCGatyEYUJj1Gqm0W0sEoqk1sNH6mp9Sr6qCxXSI0uqhR4mdUSX4eYPYuEGdWEveUpflwoHu7QIdEmNkybzPNpyP9m9mU/mQ3A/3bGbnwa1Uovv+XTPsUGNaSGlhvI31RcZuXSDwKBaZ5kA737s2M5qGfaB7NhwNhWQuHNmXQ4pn+YWqaGQSym60uc1lz/MkKlfOZEpCFglJtPLufNrKD0ol/1u1jmc9KOvHHRHIa0wf6Vw3DrZvGwu4jtouYUdZ37bPMnVpP/trmQXy82jvad3vsdSu7PE8H/OsC4tYwJuEWa59JVsbEX9WTf9QeIqnSzD4lXJeoH31JlMyS+gUoD7G16MZh+4gvKYmmD1Nipdt59CjcZn0Y8Dcz8lJU0FTJWNSPQobItdzj+ncfMVqhXnbErwQ4bkaZU/Pvjux/OpO+BSPfVaedw6xKx/4xCVFDKfprw+LbntgUVncGHRWpMVTfvWA3jqyU8HsLbzhbISNonzHHUXalNgfi/G4aC1kLMskobJ+onkZiLeM58aqfe9SyxvQBNXjUyknfg1LEcr/rUbjlbjuSjdB0dMi4TRIGNAi//Lmnnhv8TGN84WYrMEW5n38U5T+l8r3aK2YffTnXMudrUWyzkvH1rSQAmgFSKjtPITMuJNWt3kTGlZFUtFrUYJC13PYRvxB/fv00CEtaA4WbXj97wtzgl0bWgCy7UtP69AfURcG91KUudalLXepSl7rUj6ouCu6lpjolT41r8eSNq+Jr/CD8wlgz8XFjDU9/w+ELn7X9pBiuJQkmtKfkpoxfQnLlmM/C0hyvxQ8aFtKhHuvM5hfy3tVWuIOpUjz/6booS8K/3BfOqD0Ki/J4Xzo+h4wOSli3hT+KhmqTyUaYfXGWYe1JGf70+p0cojJP44z/6+EN7s0RYxKmSvCmxx/l1rAPjtl7YX2GuXhfVZL89VgVT1V5y2oH4xL03vLX/+h7ZnbEFEqDVokr27M04h37tr/hi3bLkAwvo3RM8xM4DBWHXUMYDcomlIvkrNCmKKY5kor/FiXK3PGNpn7KU7IMCtSL8HltUSZCUW/9rCTnjJlxXdGMkd03zZQX3zwJLQGkG7p+8dKBDZNvKhtNqs/HVLvI8W0rLM5WC4mj0nS3JbXokydWluOXM8zYSDe5U8w+Dhy+kGvoDolxXWGtxhxH/MowzjV2ODNdsynM1pXF1BrbRfxcl89TjqkcYWbQX9/h3j+LwqIVfu3EOwekRhMrzbDUkyohvjNRVUCU71iL57TaZNyueBPND3xgCmbvPcfXwuskS9dzrNTEmOzuNM1TYv5dLx7lRk9/Vz3LTsJw20DM+Llh/9YIr7mk/Ey+2JdIdyc+6MNrw9WvRvQopIJTCqAOiaDkPJvvduz+ZI3bDMSmnegHenMgLK7xqxo93Exd335lJh/z0580dK8UfiEpbJJupCYWtrxXplsJASU6aF4S9Ytn9/MF9ijnMywNtjrzUZPT6JTRY5zOOVuNOXjGlRXPbCtjkzWTKmaGTHdrqPaZpKC71SQzp/3sJwXX7HpSvRQWrlOk1hFmGtRZ0dY+ExcVekiEuaQjjvct/bXBFtIFCvpCFdA+037ypLcV3Y2ZegliJ+q7HYSgoMco6mqsaQqBRO96/Ns5totkpxmvHNko3CZOc3W4dszedfiFI9UaP5ddGNsnVCwKboTjq4r2s/+9XTGA2W+3gCSsK5GxAAAgAElEQVSrhUbjl0KeYSMq+Tg/d+U3n0dhBL+Ip1OPkfFaXne4kfu0v5dEMTUmUispY8N1jV44qmdZt/rXM6ID3xrap8jhK6GvtNuRPCtKZ8y4fSAbi/aZ7lWF7dPveShzEh61v52Juhvk3jt+2VI/i3po9qOQCrbC2Lbbnt0fr5l9GCZCS9aitmathNzS1GStSfdX9HcnUkeY/NjVSyDMDcdrLZ7643nXIevzzssp+Wu4qyYKSlLiIx6uHNV3gA/o0XP86vW0o0USr6zbDqTGyS7VdStq7WknKmbpDXAKPzeoWGEPAR3SlG4YWyeJlo0lritsH1F9QB976qJej+uK+vORsKypPx4I7VLG/balehRft9sOhHUtlJdCX5EUUT3txhxfWebvPMYIz9720N05qm2cxmzxPrB/a3GHLOtdLeucqMEnj7B4butt4vC67FRuE8f27J9NlWYoyXtkJEVxTOiQp50x0wf6OwdZduyOb2qaJ6H3/KF1UXAvdalLXepSl7rUpS71o6qLgnupc+WMPYr6evLTgrDr6hdR8E6KUpiJ71YV/01yRcFcicKarfypR8mxBvBLISGMV5Sca0lMQzGpE82DdLDGSnK6YyOduFn/IPt6kGQlV1h7ogZm1JzpkS0rUcD8QrrYSZkcNXezIx968ZOlrDiGikNXE6Nm/2EBOqPnAcJJGYTZ58TmZ5pqI3nx/Y2iecpCTkA6R20nqUCpyqj7gftmz5/MP/DabQB4iTP+0e4rfh3v+Hr2TBcdrfF00VGVi+p0wqjM1dWBQ1fjXESrzPZ5RhzKYGQw48nzBP2tkCKclQQ5gMMbQ2ilOzpZ8AtLdy+epxP5IRvpZO/v25IkIx3xs/cR+yBJSf2d+Mb0YaD/mXA9tc90b5qpe735PIgvrJLUJDPIdQitnlSxMCtd8CWxJjrFuFRof+46r7bi09PPJ4KDMBXtIWELa3K8rsnGlNcU36KOmWp35v+qnLHHSLaa8etb8bD5k++zpHkpRf3QSRf+NtJfi5qXGlEcQOab7ZJ08s4Mw5UWhfcHKlSygBYVqL+x2D4zrAvjdHtSMbV0HAfx4upRE+aibJ14tMkpSekZEqG1MqdLXvxpzo8rQ/sQJpLA/m1F8xLFR1ooAcNNLWzgSsl1OCT0rqeBiWyQ1nPx2mfwNy26j5jDQH+zJs3UdD6hzRMxxAyw+9qgAyy/K/71ucUdEt1dhR0yi7/YML6aEx1QVLBTwprpM+PK0n7sRbF994R1NzKOc8vw5Qy3K+MeodoESSIrlAA/kx2UZMD683UZbiz2KJ89LRpireX6GEVWCntM1M/D5Okcryv83NJ+vyfMl7i95/BVK0SBcq62i+i1xc81povSnZ5yUatkLOwxMf/VM/3Xa1GdVCHIVGeCQWyXVM8jqTFon4i1xXZJSAonFnUWH2q2mnFhiLWifQiMK0NVmKmxthNHdfH9yHDtii834e8KK9eoSc3WIWGedzS1wR7clAaZrUIPEeUT43VFNcq8d7Wa0ii1lyTGOLPYg0d3gXTlcMdEKN5X0yfm7xPdvZu84NVGEsnctoxFbajebYjVNWEmNIqsZEzajyf6QYV96Rjv50Lu6aKk9h3VRA1IVVHflSLMHagKt4/4pUP38l76OFI9aVItHOFcV0IDqtzE/DZ9wG010cnuyfw3W8blFbMPQuIA4f92N2bq+G+ehfxg9xF7kLnZv67RQ0bljH8t6Xz26UCshS0MUH8+MtzPSJWZEvRMzPilnXaiqpcRMthtz/GnK7n3nzvCVTN9D443FfYQUVp2O/UxEFc1KkbMri9rVEu4anAfhGVuelnnZC0rbPXeo30qfGkhjVgSi+8Chy/stI5lq2i/29PdXhELNcV2kfa9gIu7L4RIMVzJTk69ySQj92L7cE65i7Wm2olfXhLuEvVWMRTOff2SChtfTWxulc49MyD9GLP3g4yxUtjeCXv9B30Wf9m6KLiXutSlLnWpS13qUpf6UdVFwb3UVCpLOtPhq0SyTB3l2ZXO2yAqrF+AXyV5Km7KU5cCPYjC6Ve5+HTFZ+O25XU0dPcI09MiamcSj1/zIOcQazi81VMqVJiJCtw8Zg5fnl/npOiOq4z2iv6mvHdh92Yt/ke/EN+wjvKz3ywfOQRRI/7s4TU/v37kp7fP/PlfvEVFhV55kteooTAmHTz8a4o4jzQfjKhay0yPot6U9yppP2GWcTvF+mrP0vbiuzVCUbi3W5yK/J2Pf8rS9fzF5p6fLJ/5p9sbQpL36kZHypK3HYJB68wYNaaOqLqkvHRWrrlShBmQRTlPjol1SmbyXFZbURf9XIlfshzjF9IxfEojik7h5wq/dKRKAtqTFT6iGlvhu1qNOyT8TBHrky9X/nS7iF8Y7DGRCk2hvz35CIvifPK7lsfqYa1pH+Vz9TeiQBy/nqMimC7S35iSsnTma57GXiU43lt0BNOls3/8fl7U0Ej3psEdhNTgZxoV5VxtF4mzivpFUorskBnn4vuM1+fEOHdIqJxpHnqgobsz0mVcvHTkkvTk1MTsDK1wdX1RzuafAn5u6N40mEKqOCl4u18s5ZwXivYxiv+ty3QL8edp8kRRsF0mzIRhaoZMmMGQNM1zZrwWJTzMNNXLSHffkn++Jhvov15TbSTpCEB7S7YKu/ek2tB90WAP4g89sZ+zknv/lEcPohLWG/EfypjCsDLn9MPCgNUlMUnuZV0+s2JYa2LVsvhdx/FvvJlYsNKJD929oy7UBJVlt+jkH40lUW1cKcaVwh1y4f8qhuu6jJfcEyePcKqNdGuvKprfCKIlLG9IRhGWNaHRhLkkbukxT1zeajPSfgyEb2bS0a6EZOEOTJQIlTLD2xV27xmvqpKQJeSD09hmqxivKrJRwoi2ckyq1PS5Tt7m4ytRd2MlXeXjcjapztUmyO6DVeI9HxLjQsbSz+SzV3tRP2NjyFGj7tdCKPDnrvPQGHLxuvZXhtDUwjB/0JOCO383MryaExuDHoSSYYbMuHKT+njawTBjptrKuWmfyFbT37vpnMP9kubDgePXS8xBVLusFbE9k1X83Yz+zpX5bORabT39fZlLQ8aMieGmxh4jw40c63ae4U62Bc1QU30+iIIL5Jkwr2NjMYX5TRKPpzl6/KoiNQ7bZ5rvt8TmunyehOs00RVub8j0t4bYmmnnJ1lRocelJhnxhfZfr0s/iYyJPVSYIdG9brDHiF8KhUPlPM15qLBdZGzmQhJ4Hsm1oXtTE+rT+qMlpdMZ3M6z+2nL4tueNKsklbHMw9BawjdXmDEVFm4u9JtCeqnFw9w8H4k/vxKfcasx9gc9G4VSkCtLtU8MS43bS8Kev26mtSVrqHaZYS33o+2EnHK6L+w+wn0ltIYxS3KpORMogEL/SAxXhubzyPGLekrUO63hodVon0QdXzbYg4y9HvUfrMheFNxLXepSl7rUpS51qUv9qOqi4F7qXDESK4jLKC2jnTxZVRsYr9TkC+y+CKWDXxGbwqZdZ8zOiId1VIR1pNpYwlwIByBqLEoUV78UwkFohUwQ2xN/VOFX8jMqQncvndyxURONIdaKagPDLaS6eH4Nv+flzU7S0lJ1Sh7KuE+OMVn+ZPEBgJvqwMIM/O/PX1PfdAzPDVXtGXHEuuSNNxlchqDwa8mfj3WmflTo8eRdAz9XhFkizDIz4NYdGJLjHx5/Iu9lD/hseN3uSFnzk+UztQ68XWwYY/GBLSMhGfpoeb2UtLNP+wXP36/RixK5E4VV3N9lqhdJkGofEllBf31KSCo0gyjUg/5W0z4kxoViXJ8VymQloUnSjRSzT4n+1k4+a7eL0okOE1N3XIgHNBTww+l6t58kOSrMtaTlbM6KsvZCxnDHjDsmstK0TwnfaupH8XR2rxtRKBfik6uNjFus1JTYpOMpxzxzvBdvX9CgojmnylXivfQrh5+r/5u9d4+25KrvOz+/ep3nffbth7ol0RIGCwFCyA0BTBLZE2dw4hgnZo3BHgeyzMJeK/bYeDx+TLLGDk7GniE2CbETL2CAxIPjJGCIx8mK7cSR7RhsaAggiTdCIKFWq7vv+zzr8Zs/fvvUOWq3pHMl9eNe/T5r3XXqVO1TZ1ftqrr7fPdvf39EeRQyMVGre3nXMtPlXTtHVSKMlk0ZnNQ57SmDtZjmBZvtbbF5GjL3WJnWujkb2HuhTC1Grfv1iir4fY4XTAXL2xGjxZjmhindpo6HtigsW1fRtZEMKS0T4GhVLFc9VveiFTL/jE3RHaxFVLtSfxdq2bpGi4JUEVVsyutoNa09kuPglVpmEaOVpJ7RHeVTtVhDbF2kUDaVGCEZQutCxSioLcmwYrwgLH21YNyNGa02SHuFqetBOItypbExonfCRgBGS0J0omkxwuG4Ghs5ZSMlbwvJKLL4wYmv60QZ6iv5gtRzAuIRVLHNup+oP3Yiqf18Te2zdsiP24jEpGw8im20opORDJXGuKxVzKKdmFe0wnAtpbFR0Ny0Ea1Jxri8EwVF0uJQi4bQvlBSxZAvmixWdGKKhlgcutjxNLZLykyIe3YvZ1nE7omMvG0KGoDkJdlOiEcF+scymudzhquWjU7FnmlUEIdHAhYyb2pomOWPWKzw5P6sMrG2Uxu9GS/GlA2bT7D4NWuM3esz83Adq2W8yyJaX99l+7mL5BMv4cI8zrPtkmhYUrYSxsuWrW5y7yR9AYnJF7ohQ6LSOjdmvJhOZ8qPKkYrKXkrIspLohxGyzZyN2l3i0UmnE/z4Z6M6kxGG5rrJXqsayMJuTI60g7XXlXH8sqoJGqYZ6yN3phyn691a0eLvDv1OK4SQTMhHhOy+U0zvI2WbE6HRhAFP9fmBcs6Bhanm20WaGz1H3cj84JXrWOCzbPWnhsqMF5OqZKMMp3OEwBlvJzUz78yE4ZHGnS/NCRfNlU1O9sjX1hguBLT2LJrK9upgrtMUrd7VFYMj7Qo2uGZkArdB4bkLVPBNYLd4zHD1a6NuPYtVnx4pFG7QwxXIloXbE5CFdvImSi0zhe1Mi2VPesRu0eSkTJatONpn5uOAExGZIpOQjxUqq6Q9qp6lKDM7Jk2Or5INLKHSTSejujsBVdwHcdxHMdxnAOFK7jOlLIk7ypRq0C3s+mv/9RcC6Ic+jcW0CyR7ZRiqUDGoVCjJL5gqkDZVIjUVKCR1PspMzUlTKFqVURDoWoo8XZE2Zh8l5oauaaU7QrNKooyIRpJnV2sbGBxRqmaL2/TYnSHa/arGyDZguGqqa3ahHRH0BQ+t3GEhcRmobbinFxjHtpepCxi4m5BkceU4wiC7yylwFhIti3+VkqIR5aXe6IoVVgdpDJl6sJGl9HxhBONDb44OArA/YNDjMqEQiMqFRbSEb0yY1ha3C1ApBWFRpQa0YgL8jJmpT1gfDRhPA6Ze1o5A1pAHFQtU/LSXa3rY7O6TZms0BB3NW0HsCw2eSsiKtV8ixOb9T6JqwQoOqYc7Jw0p4XOVkHeTqhiU4kBRotC0RG2T1oDFq3gh9iNao/JKLf87/E4/BJvmtPDwgMFVWMSTyi1m0PREqrEvEnNqcPqPO5GpP2K0arlUU97FaOlqFZvwY4z3bV4VSlhcCjEwJVKFeLbhksxCw+O2T0RE42F1nrJ5Lf+JKuT+TKa28DwcMa4GzFcieo61WUSoX2uYOumlMImGlM0TQ22a15strGaWqaRZT8bHo7JtoMypEreMbW2sVWF2M9J3nbq/aQDJSq0zvC1e9zU98l5nmSomtStaAY/2VTq4xocNreLtFdRNKVW/9NeyWBtEqOsjJeE5gUYV0Jjw/yq40FFOpnl3bVsTMPlmPY5m/mfdxuWe16sTTtncnZOtkHt2WFx3KZaTTw4R6upqZph/8VyzGjRjmm0bO3VOWuxy8kg7GNy3yXTayPvCtm2Ml4QMgkOG8GVYqJmFU2ps59ViXly2wzuqFbyxssJ/bWI7pmScTcy94FcGa7EdTYmqUwJHweHDFFzimjsTjMO9o+a+0CUK9snzSt2kqVqcF0rtJc9W9OevTa2K8pWSuvBHXafvVTfX8kwNh/TnZBlq7J7fOJSoiJUaUTeFrIdUyCrRILLQRhhayW1M040Bo2VbNscFIZL01GabKsw15NxSVRWFItNGhtF7TbQvJAzPGTuDOmu2LkbVQwOpfU1VnRiOg/06d3QDtdkBapkG+M6K1g8VIaHzBEiHlYkwxKNYsaLaR3HLGGUxWbcK3nbHEuqBOIwSjC5V1SE1sN98vD5uJdTLFgs7/hoh+xczzIg9u3mTfoVyc6IKrW2SHolRSem+ciQ7Wd3aD2SE4dZ/snuuP4uXY5MrU+C2qpQxdM45qqw7F/pTslo2RxRJvUrZ+KXNQpt2KvIu/YMSwcV43gam17Gdu1VqZDtVkipDK/rTkcttMNgLZ7e/2qxsuZ+EtwzBKJBwehEox61qxIYHm6Q9YJimgpF29aXDWhuKuNFU6rrZ11uz6LhoZjGdomUlt1Po6n7Qf9YSmPHRubKlJB1L/xPCN7Yg8OJzdEYKMOVhO7XBsTjFCqtrzE7jtieZa1JxrrRoz3I58QVXMdxHMdxHOdA4QquMyWKLfd8VjJulEzcRYu+qZfjJSXq5lTjGBIl7haUO8FvL6nIlyvinQgpoOqaqlm0tXZa6Hw9YrSq5AsVLOZUpCQ7EUXLZmTC5NeeEOVQrFSQVhZ7q9RxVwD5kqKJEu/ar+fRqv0CbZ2bxumWzel368C8PXcGTb7WW33UYR/q9DkzThgPU6r1JkkvolyY+KEq2YVJhh6bTV3FpkAU7WnMYjw0X18pIe8lfG2wylq6y3WZWS1cl23xyHiBSKZq46DMIOsRBRmq0ogqBBolUlJobH65Sc7G0JSGsorIxwnR2YR8sSIaC+OF6QxUsGxqrQsVvWMRVSYWs5iZyhUHL88yxAc2HynoHcuQwmbXNtdLesfseLtnSsadpHay0MQUFI2n8XZlw8573hbyBYuHlspmuE9+/VeJKcRFC/KFmLwtjJYg6cV1HPNwNQoKg8UIJgPqmbhRPpnFDL0jVrelr+YMl83VYLwY1XGfZUPoHQvqWWEz75MBjBeFKsw+rhLYOpkxWhayLaW/FtfZ8ibKa9E21wBUGXdCzGbXVPqJ1233wTFVEtM/kjBatpGL5vngRhCaIx2EWckDm7U8XhCktGt24gM5OBQFVRB615kzQ97lUfGsGkFjo2C4mpBdGLB5ywJVZsputjvxmzWVJxnAcNnU23whItuaxhaDIiJ2fzSgfzimfb6kt2ZOG3adW3tP7rfWhSp4Uke1YipVUHq6QuNsj9HyEjvXm5o2qfNoNaFo2vmoUrtHiqapyXV8dmmxdemgMm/j0u6n8cL0+hkuW+x7tmsxtcMVoX/Y2n6SuVBD/C11JjDIFxNTl3RaZymhf7xZxxnHY/Mujkch9rpvGbcsrrGiaJlyXzSlfkaVDfOTrmKpj8ueN1mdDa5MIYnsHI8XhOam2ujCQKdxzCOluVExWI1obVSk2wWDYw2aSVRfY0VLGC3EVDE013MGaw2q2JTXYAqChrj3omWz/JuPjNi9scVoOSbbnsTFVkgVYkhDdsPmRkVUTLMAJrsWR4+YC0W6OaR/fYd0tyTtTa6xgtFSw853kVE2giqea50l0Wbx23Xc2CqoUsui2NjM69n9jfM9escziy3vhFGWCssY15he8/HYlOjJvZltF5RNcxgAGBxJbS5IJkTbA6LVpqmUraTOChblSrTUMjeVxZS8E9HYKhmc6NB8JPjyLmXEg4qyYc+CshUxWjLv6eHRoLhX5hiQtyPiXEl2x4wOZQyOpPX50djiebPtsvbUTfoVg0NxPWoxGflIB1r7Wsuutc/kWh2EZxJCUOYr8k7EzvVJPVI3Wp5mu4tHdr7KVGisV6RmX8t4wdT4KhZaGwV5N2LUjMg7Ut+nUk29f3evixmuWBs0NyrLhofFJtv+IpoXLFPdeDkNz6pwrTbF7onEXJLyWGhuVGgU0T8SRlFaIGo+6Nm2ki+Zyl62p77Xzc2K5nnLvNc/Yu4qo0MNsq1J0Pn8uILrOI7jOI7jHChcwXWmCBCBiJKeS8mP2i8mjUxFKztl/YtIkwpViPrBL7YRQVZRLCqiApFSJRavWDTt199oVZAc9LoxVII2S/LY4nWrkF9bEyWPTRXIziaMT+RUTaXsVLU3rShUWQViqpdUMvX0m8RrLkDR0Vq10ViJcji8sMvxtqmq6+M2D+4s00nHrC322MkabPcTNFWiEO9bNe28xEOhbChSCFk/xM+Fu2eSWU1jDfHBwpn+IvdGx0kiO/bD2S7dZESEEktFhNKNR6RSkgapKkaJpCKVkjgcyFBTNlodHh5Z9rULow6VCtvaolooqIqIKo1IexH9Y/bzv3VOyAcRRccU9MaGebOikC+Epq6guWEuAPHI1PYqFYarca3SDQ4llE1TCYsWDPIkZHWaqnRSmnKQ9kxFN9XTVIfJzNkyE5KexQgXDWG4ZirRcE2QcBKLNnTOKP2jFiudd4Uyi9GEWtVobCrV0iQ21WbbyyD4MhbT+hStMCtdJuraTN70cJ0PDwnjFXOZSPqWkSkqprHFg0NR7SOahix1omFGf1CqRisJRdtUurKlFiMuwQkk1Kd1oSIqTUWKh6ZslE3LBDRR6apMaK5XFuN+TMz5I1bKZpiVDLTPwngpQRQ2b1moPaZb62XtmQpQpRGt9YLdY4mNgjTtgNMQ79vaNiUob1tsYzxUc9FYnPHBbVhMexHitlXMjUG0osgmymJQLVPzlZXK6huPIAsq+GghMiWnK8SDEHeopjJpUOk6j5RUscXb7Vyf0LpQke0og0MRjc2Juj/1cC4aQmNT6R0Xmpslu2F2f7arxGOl+/Xg5tGOGHeE5mZFOROP2NgOzheRxWonA9vvRL0Gi2cvQ5K9bLOgd6Rh12GgaNk5NTUeErVzaC4X0/trkr2PoMRno4psp2R8vT2sRg2hfb6ibAnJgyWDI6md55W0jjOezFTvH03Yub5hz5oVe+5NYhKT3ZJkULFzQ2z38JEGolDGUrsxTHxMq9QcXybHne2Udcxr0ivIF1Oy9TG9G1rE/YK8E1E0o9rHVMqMoi2kOxbnHo8r+odi4px6/oNUMF5MLV4Yi7HunBkzOJJNZ+UfbVM07foqG2IuGkOt4/DtPJvUnO6aCp6ijJYTG/3oTeKPLR48KiE/uohGEo41re/T9tmCMsRyVqmpvf0jibkbBE/0oh2TDCqKrjksDJfjepRhsGqfbZ8vLJa2Aa1zOVUzIW9bHPbkWh21zT1Dupatq7lRMVgLMdRB5R0vRJRN6JwtTY3OlbRfULSSuszusZjOmYLdEyllU8JIgjA8ND3PYO1QNkL8/qZ5h5eN6NEjeqstpFIaF4aULYuJVxGS8Hzrh4yc2XZBfCiqs1FWM97Yjc2cshEDwb2mkzBcjuk+NGaUTM6t/UU50/u1FdHcrNg9PnU7KZNpHPtwNbZ7aDh1/LDRlYrxYmYZNxsR405EuuuZzBzHcRzHcZxnOK7gOo+mgiJPkBiiNHjQjTKKrkKsaBmZR26qxHFFFbJUMTJ7BOkWaD+ByBS71jlhfChkz1osicYRabNgvN4kXR5RjM3wtEqCRBAp9BKqRNEogkKgBOKpmlVlipSCFELjgjA6pFBZ3O54eWampUyyG4VYuZHQiAtWQ4DSctInkYp7zx0jiSuSuISsosqFbD3M9j1vSjAVRLkphWUGqMVvAgwPm7pbpSEeaiemleQMypTh2OK3zgyWSKRkWKasNXdZzfqMq4RWNKYR5Mc0KolQFuJhHZcL0I7GHAp1HpQpS80hF24eEgFaKJUo/euEohMU0+2Y0ZL55UppMcOaQHPdlC8wFSBvRwxXzRmgaJkXLGqqI9gMbo2EqIThiinlE0PViaKTd0y9zBdsJvxwVWium1q3fTKo8mLqa9I3xVRj6D6o7DxL6ri0ZGBZqqLCPE41FsoWNNaDHyxBKd4xhTjbzs07tmlxhxPPyzhX8k5Ec8PiM1vrJf21qSoNpiCKmqtHEk/jbqPh1LcRLKasaCakYQZw0uNRSt5kFjtANLbjHy9Cuks9ojBaskxqw9WgjIjU2dgm13O2FdTj3ZLRSkLVCJn6Eq3P93jBFMN4bLPIG1sWXxwPFY3CDO40Ig8xc5qYo8TkuJtboU2XI5oXSpvNnNr5kipkiHuUSmltkQxNKdQYmufGrN9q13O2rfSui0xJTE0xkhK6D1YMwv0ejU29rWKIYqHI7Doo06kDSX8tIRkGxawZ4pFLrbM/ASGO3NwjpBszCmrzaGnqf9w5k9M/klJmcXBlUMuWmMqMMmQZwer46L7WmbmaG2HEZDUypT2yEZrBkdTcDTKLW4YQVzk0l4d4bK4v+YLUMeST/WkcXBD6SvdrA/PIrbSu82hZ6tnlvWOpzX8YKFIpaShTNoX+UYslHqxFwd2DcA2EuNiROTRYjLd5nTZ2KlrnTdEDG4UYd2xb2TClvftQznghrj14218dEhWWOazMoFhIa+eFMqjFUTtGI+g8nDNYS2g9UtRxs63zkwBtU4zLDEaLNpLSP5qFuP9wXywndRz0eEmIRkLRVJbuG9M/NgkuhkYYech2Jq4pcXBrmM64b14o6B1LGa5ZFjEUWg/3GRyy4aqiE9N+sMfuTV06Dw5AmgxWY1rrlmlscg6rVMwBZTeMDubmJz65VpN+iayYQhuVFXlQGDUy9duOK3hcdyw2Pu+E+QpKPRI1eV82LdNfFAtVbN7KtQtKCf0jae193dgsGBxKqVKl+yD1dVg0TS3NuxGtczlSxUR5RX/VzmGcQ+94alnkOsGxZGT/wybnNsptjkaVWL2ksjj+cXfqSjRYs6x8yQAbAQkuHuPFpI6Dj0c2cpfumud6MrDviXKtXSaiMRQdi7/VyM63xcJP/xcMDkXkrba5s3QT+ofNeWUy0hy5CtEAACAASURBVLAXXMF1HMdxHMdxDhSu4DpTwkzWshQ4OqrVo9HhiqpdIs2SKFbKytYvLQzo3WQqr6qQjxNUgVZBlFaUbWXn5mrqKRuB5qCVEC/mqApaBJU2ZCmLs4qqD1IImiqSVXS+mNI7WSIhI5q2K4ig+UjM4JhSZUrch87XYffGoDTkEA/MLzVfrKiA/FjBqExYz82wtBOPaEQFSVyx/qVVOie3guoLoxvCT9cKpB9TtoTOgxHjBfOtHK5N1ZqiW5FtRmikNNZNLR6VCUcWdhiHQqMyoREXPDLscu/5Yyy3hmz0WxRVxGrbrA3iqGJYJBxt71AE2aCdjFlIRoxCUN7XdlYpqogkLRn3MsgFySPyJVPHweJuVST4WppiFY/sF3ljY+KTGvxmU6APCw9W7Fwf1TPwwXw5qxhGKyH+uDRVNelTxzrnixqy9kC2CUUXsp2KbBcu3BpiszJFKmH5yyXbz7LYUFOh1GbeY4rFeMmy5hFZfammMV0AWc9m9S5/sU/ZStAEohB7OevXKGoz7uORKY9SQdmCUTJ1Y9AIokJonVOSoc2IL9rUil7SM1/K3tGYKonrmeJlY6oI7l4Xk+5aZrH2I7D1bKHKlM4ZarVmuCJk26Zk6uS41M7RZNawzRa3OnQfqriwItaGDSU7b2VGh6CxTp1dCbHj2Lk+YflLFvg5WkltFnfL1HtSU+3b50qGS9YWZRPKNKmzDpWp0LpQUjwrqq9nKWF42DIMLtyvLNzfZ+OWDv3rGnWs6mhF6jh0jSyLW/OC0j86dVpY/vKQs6eallmvbSrM4LCwdF9J72g4rmUhedhi1+OhnYfhoeDMMTk/Ym0/Wo6JCnPEQEzpWfmiXRxbN2eUmcU8lhksfWnAxi1t4lFFsTTxWgaVmKhU0p4pdL2jpj5nljiQZKCMliMWHigYLse1qjpRqcBGOMYLpp5LaU4QVWKZnoqg8lapMFg1f9Gl+0uiYYGuZKQ7OY1NO67xQgKVEo1NsW5sm/tIYz0n6VmZ/pEGRcti3KsMy/iodo1PnRZmdCo1JX60aMc1yXI3XApeyC2L8R4cEao0o3V+mnUvX22TrvfJb2xTJRbLmreDkh/UtaJpx530SzRKGa2kZLvmpjGJrx0tmsrbulAxWIuCG4g5P3S+bhLu5nNaoV3Nv7x9tqJ/OKJ3XVor5c0NG6UoM4vnHy3GFlOuwrgzebZAmZnSHA8rylZEMqzYualTj7CUjYidZy+Q9kq2nt2eiSeGxk4YPdsaMVprUQXv4wlFK6r9ffNOEka+oGjGFO24/p85XkrraxWxei18pcf52xdonyvJO4+Oi00G0Do7YvM5LdpnC3rXpWG0w9qy80hp7iGFsvBAYYp3ZKMEo2XCcdlzPO0rcV+J+zll1qB3PAux96ADu+eTvrJ7PLO4+7GNbO6csBu+tV4xjsxz17KBSh1/X4V7uWgGhVWnbkeNbWWwGtXtFeeWZXLy+fGS/a/oH4lprofRxYZQlqb+dh4ccOGFbbQBeSeuM72VmTBahtGyeStPRoJm22VeXMF1HMdxHMdxDhSu4DqPIioAhaRRkG81putiJUlLipFlxEFMcWw1TOnsDRpEUUW+m0GkZJ0xg0aFtEoY2q/tqB/BkRH5VoNoEFEuFEhaoXlCdsYuxfEhc2OgtJmhSVYyXn507E00tLiu4ZHSfqIVQpoLw8PTzEYIpLvCeNHiczWtiNKKh9aX0PArNK8isrikrIRqoaC3G372dou6ztIukJ0EKYX+UVO1qpHtfxLLJ5WpT1JZPGfvBHz1kVUONXt1nbOoYDBuMa4StnbaFGXM9kYbioh82Y59uJvRWRry0IOrZAtjOq0RSVzRG2b0t0PdBjHSLtFSSC6kRLnFf46XK5rn7PdqvqjEA1MJGpsV/SOmokSlku5OfV5XPtfn7Es6RKW5G0SFqZQThWOwFlmmq8gUQY0h2Q0xXyFrl5SmBCa9oHR1he2TMUlP6xnlVZiRvn1DUmfEm2SEm43lrVKLt5XCVKrl+3I2b06nsX2YYrh7Q4vuAwPAFInmZlXPeK4SkMJUhOaFgnwxZrQiwbs47CSF1lm7iKUyr91kGLJ1TbJDxSFWt7IZvslA2bnBFNu8O2l46J4paiUkGdhgROt8xfaN05nFUXCaaJ3Teub9zom4Vp1N7SoZrMUMDkeIKpoqlNSeoPHQFLHFL/dYv7Vrqlg1yS4UZuUvmmtBtqOMO6bIdB4u2Xx2wuLXbKSl17ZRjcHhoPD3TKWOh1OlfLQaYtxLc7rYLtqWaWujqNX97Zth4X5TVnauz0iGSrZrsdiTUQJRcy6R8EzRkClpsDqdlY9A63zOaDmjsVmZW0UJJGGkAFOsynTq6ACQbVrM5M4NSbh+7HNpr6LMYgbHmuYUcn7MzvVtO4dj8zFNe5YRbLBianPZNIUKppnn4lGIgx1Qz/afxFWPw3fFhdW/aNl9v/DVYR2jrNE0U1oVw+hIi6Rf2iz9SaauytxnWusVaXAzyK/PGB7OSHplfT1HuZhPdLgeVGzfk3s575ja2diweNbGhjl3jBaljrMcd4XmRolGCYOj1F7Ly18uGXcnmbGUfKVV+/z2D1s8swyUxoZdrP2jKctfHlu8a/Azbp3PKbO09rSeqHujJXOkGKxFtB8picdaK51lFpxXls2b20ZU7HxOHCyKZlRnB0x3SkaLCY1tUwkn1yFqcaitdWW0EpP2K1pf3WF023J9rUxGXbJta8ulr4zZPWHPjrw7mSfQQIqK5rrSP2I+2sOVaZbACVFhLkH5QhyeFfIoNV0juxbKTMw/VyzjYPeMedHCZJRJ2Xiubc+2xgwOt80/vkV9HUY5RAOLr9WgClvmvXDoCZRxiKtfL+mfaAcHiKi+xixm3xwcysxGaYqmxQePluP6ek4GynDN1PMit+sjCe0CsHj/kMGRBggsfG3ExnMb5g898Qsn+D2vV/Z8nDwiS5szYM9r2PjGTjjfFusrVYhJFmoPeI1tpGpw1HzF4yhk4+wkTL0Y5sMVXMdxHMdxHOdA4QruAUdEXgX8UyAG3qWqv/jYhU2FlPUMPVohw0necEFaBQvdAetbFgC0eGyHvIyoqhBLt94i6uZIWhFlJa3GmGGrgebT31BydES5ndI+0iNLShppwfmNBVgcM07DrNhhDM0STUAipaqE5Lm7ZArjhensWokV3U2RXOpMLvEY2g8HZ4NDFvOnq0rrbMToEJRxQtFrUCztAlCpEImy1BrSOpGz2WvRH7eIGwVRy+Ss8UYTbZfQiy0mcttme1exEo2nSl5UEGanCvlSSRZX/Nnnb0YSUx+vP7pBb5wSCUSiJHFJozNmfLZNntt57iwNGfQyUKHIY2hBb5gRRdPYJyKgF1wqIlPakp7F5k28adMt83ZNgpIQFaZsVXFEP8Q+lg3YvinkYR+asqixxXMNDgcluAvROGJ0SEl2LXavaFn8ZbZt35X0gvISWWxk3rW6Lt5fEY8nKov5NQ7XbGYtCt2HCstFP8lAFjLOFZ2KaCS0Lug0NjD4Wdrs5bDLNKq3VWOpnQ4GwdNxtBAx7mZUsSlcTBwYQp3zrrDyhZJxx/KmN7aqWvUA89Ysgz/teCEi7RVkO2qxvZNY56bQP5yQdwXZVhrrFqu5ezyu4/ZAaJ/NGS9Y5jSNYOHBkkfuSGoVM+nB1snElPSQ/S/uRyR9QUP8erorZNslwyNNU8JCeyUD6DxgUuf4eV2Ls96ymNfWhYpzL0qoMqUf/KrLTOhuF+zcmFA1lGhsTgjL9xVsPDepr43meVh4oGL3hGVtGiXC1s1ZHYtapUra15BBy1Tq4arN0p8onRvPbTJaJYysaK0GFW2p/YbLzFRBMBV7/ZaUlS8UbN2cBL9ZqBrmi5vtavAnDqdW7VoEc6Io2sLudTGNLWX7xpjOwxW9E816BKHzcEVzPWfzGxqmYm0r6XbFbiuur5vVz+dceH5ax/+aY0pQW8N+xsvCyucLdo/HFB2hzGDlCyVVGtWx19lWid5g51bUZsSnA4shn8aG2khFlQg716d1zHs0Vjafk4VzNVXSohzyBaVoK9GZqI4Xj3Jqz/EqgdFKZOqiWAw4mOvF9o0W/25qoAZFXMh27aYaHrEMZVGu9K6LGS/Ayhcr+msRSX+S1zJl5/qM9rmCsmGz/geHU1Pc1sIzvIT22THnX9CkSkyp7V1nZScqJmJltroZomHG/ihklQv3zs4NSuthy5CYd+1cFg1h3BWW7rORw/4x87sdhbjsMospXrBMtlPRPxzU2chUwf4Rc27YuslGHESV4UoYaVmzzGKT5xFi51Uj6mu+DJkQy1QYrkS188Z4cep/rBE0NyDdKhkvxDaHozRf7knGuKxnmQHzrrD0lYLtm1uWhXO9smsILJa9sFGMibtM0YSipejCZARA6DwgjFbMaaR3NCIemTo8UfcBWhsWgy+VxV6XGcT3S60EV6mQF/Z8jwobBUAtq9/iV+zA1m9pUaXmN90/mlmmytSu7dwGSCwbZMdGQLpfLxkvRLTPlWzfmNTxx1GhaByRtyG+sAt06nt54uISFTbHZbxaUrYiOg9GRGPqWN+94AruAUZEYuBXgW8HbgVeJyK3Xt1aOY7jOI7jXF5cwT3YvBT4kqreByAivwm8GvjMY35CoWpWtJo5g+A/mi8Lhxb7LLcG6HFhXCSMxgndxRHbA4sNjQYR2hZkPWX1G7dJ4xIipbk8pNuyX4GVwvrOMllScqS7y6BIyRo5cVwxCAqlNko63SHjccgj/ukFVl75MAL1zFlVoayEjdECyWaQi8TqPjw09UwdrZjKmm2CSkReCmWzoptZfW7obLKc9olEGVUJjyws8NDCUqir7WcjKyjLiPFCQrWdmY9f135tVsF/NNuyX8WLX61Yf56g3YITq1scPbHDambqWq4RvaLBmf4iZSU004JI4HzWnIQrMRqmaCVEOzFVLgwaGcWXu5THxrUSTC+2GNUYqpaiI1PDRmum7gAkfWG8Yt6c6Y6pslFhKm85mV0bh5mpYdZ93jZHhaIpdQxclSqDo0LZVBbuM7/foq1oSr2fdDeoCy1TF8q2qSAb3xjXonMyNFUgX7TY2MOfztm6KbFzGX79mw/jxPtT2bo5Itsyb88yxEcmfZv93tgqbYbt0NSKKpnG8pYNm+Vts6tNpagaplb1r7P9NNa1zm7WOl9QpQnJwGKNLfsStB/OGS8ndB7oM3hxl8GhhNaFgt6xhJXPWZtuPbvNeMmUj7Jhx6ixxaEufC3MKF+CzW/IqBJTNqWCjeck5llcx6lFlpVKg7NB15xHyk5E4/wkwx/sHk/IdjXMcLd2Gq0I299gKsjK53t8/S93GaxaW+7cEAWXC6V/zI4rGdjMcE2mMX1pT9m8OWE0E+s+Xla2sohs0/ySNQrftxoKRObtG5Wm4gzWIspGmFG+EBT3jjmMlK0qZPMSkp1HZwqbZNRSoVbsN55r6m3R1vrWnuy3bIVrNyjYMhPHHOWT60zqTGCDQ0IU1LXRYkTRykxRa0I5hiqJSHtau0Ns3pyag0Mrqtu0DMpW/V2x+c3mXVMXpbBrabyc1HHVgzWbq5B3wj0VMlcNlyKGqxMV3NSzojHxZZagMCb1eS5bim4KRUeCh2iFtiqGh6bZ+9JtYbRk9V39XM7681LS3XDOQrx4/6h9Pl+w+7roVqSbEb3r4lq1VLH7Ryq1TJBduw/jIfSON8L9Fe6r7Yi8JTQKreOXJyMtiw8UlnVtrIyXhWTX7vN4ILUKLhX0rjPnC00gLu18jJahCNkW80XLcpdt2X3ZWi/ZviEx1fhIWu8n7VlcfNK31yqVWj21+4vav1xjQkyqMlyK63j6shG8ghfEYslbEctfGrB+S2t6TyxJPXpQpkIyqBisJhRtcwMBG0WsYiEZWvy1ZaKrGKxGHP0PXwNg/c6TjLt23MPVuI7XHlXmYgPWTppN3W7iYXCMaFdoFu7TCnrXm5PDznGrRzy2Z2QdU94QkoEEJxwYryhVqmy0BZWpo46KPWOSfvBPVxjnwvZNzXAdSnDDsJjqasbLWmd8cFVs/W4zJttWdq43J5LeCdtPnFu9866wdfvh+hmtMyM/zQtqc2daJWVk103an8YD7wVXcA82J4AHZt4/GNbViMibROS0iJwel4MrWjnHcRzHcZzLgajuPTuEsz8QkdcAr1LVN4b33w/8BVX94ccofw7oAeevXC2dp5k1vP32O96G+xtvv/2Pt+H+4VmqevhSGzxE4WDzdeCGmffXh3WXRFUPi8hpVT112WvmXBa8/fY/3ob7G2+//Y+34cHAQxQONh8DniMiN4lIBrwW+O2rXCfHcRzHcZzLiiu4BxhVLUTkh4HfxWzC3q2q917lajmO4ziO41xWvIN7wFHV/wj8xz185B2Xqy7OFcHbb//jbbi/8fbb/3gbHgB8kpnjOI7jOI5zoPAYXMdxHMdxHOdA4R1cx3Ecx3Ec50DhHVwHABF5lYh8XkS+JCI/fbXr41waEXm3iDwiIvfMrFsVkd8XkS+G15WwXkTk7aFNPy0id1y9mjsAInKDiPxXEfmMiNwrIj8a1nsb7hNEpCkiHxWRT4U2/Adh/U0i8mehrf5NcK5BRBrh/ZfC9pNXs/6OISKxiPx3Efmd8N7b74DhHVwHEYmBXwW+HbgVeJ2I3Hp1a+U8Bu8FXnXRup8G/ouqPgf4L+E9WHs+J/y9CfgXV6iOzmNTAP+rqt4KvAz4u+Fe8zbcP4yAb1XVFwG3A68SkZcB/xfwNlX9BmAD+IFQ/geAjbD+baGcc/X5UeCzM++9/Q4Y3sF1AF4KfElV71PVMfCbwKuvcp2cS6CqfwSsX7T61cC/DMv/EviumfX/So0/BZZF5LorU1PnUqjqGVX9RFjewf7BnsDbcN8Q2mI3vE3DnwLfCrw/rL+4DSdt+37gfxARuULVdS6BiFwP/HXgXeG94O134PAOrgP2D/aBmfcPhnXO/uCoqp4Jyw8DR8Oyt+s1TBjqfDHwZ3gb7ivC8PYngUeA3we+DGyqahGKzLZT3YZh+xZw6MrW2LmIfwL8JFCF94fw9jtweAfXcQ4Qar5/7v13jSMiXeADwI+p6vbsNm/Dax9VLVX1diz9+UuBW65ylZw5EZHvAB5R1Y9f7bo4lxfv4DoAXwdumHl/fVjn7A/OToatw+sjYb236zWIiKRY5/Z9qvpbYbW34T5EVTeB/wq8HAsfmSRPmm2nug3D9iXgwhWuqjPlm4HvFJH7sXC8bwX+Kd5+Bw7v4DoAHwOeE2aRZsBrgd++ynVy5ue3gdeH5dcD/35m/d8OM/FfBmzNDIM7V4EQu/f/AJ9V1V+e2eRtuE8QkcMishyWW8C3YbHU/xV4TSh2cRtO2vY1wB+oZ1i6aqjqz6jq9ap6Evtf9weq+n14+x04PJOZA4CI/DUsLikG3q2q/+gqV8m5BCLyr4E7gTXgLPCzwIeAfwvcCHwV+J9UdT10pn4Fc13oA39HVU9fjXo7hoi8Evhj4G6m8X//OxaH6224DxCR27BJRzEmEv1bVX2LiNyMKYKrwH8H/mdVHYlIE/h1LN56HXitqt53dWrvzCIidwI/oarf4e138PAOruM4juM4jnOg8BAFx3Ecx3Ec50DhHVzHcRzHcRznQOEdXMdxHMdxHOdA4R1cx3Ecx3Ec50DhHVzHcRzHcRznQOEdXMdxHMdxHOdA4R1cx3Ecx3Ec50DhHVzHcRzHcRznQOEdXMdxHMdxHOdAkVztCjjXDmtra3ry5MmrXQ3nAFBV1RMXCmxubu5p3w8//PDcZW+44YbLst8kmf/RubGxMXfZRqMxd9ksy+Yuu7OzM3fZ/Zbdstlszl220+nMXXY8Hs9ddmVlZe6yZ86cmbvsXp7He7km93LO7rtvb1lpu93u3GVXV1fnLnvu3Lm5y0bR/Nrd+fPn5y67l+faXupw5MiRucuWZTl32ePHj89ddr/y8Y9//LyqHr7UNu/gOjUnT57k9GlPc+88dXZ3d+cu+zu/8zt72vcv/MIvzF32l3/5l+cu+9a3vnXusnv5x/yhD31o7rLPfvaz5y67l877XXfdNXfZ0Wg0d9m9/MPfSwdsL//Eb7rpprnLvuQlL5m77AMPPDB32e/5nu+Zu+zP//zPz1327W9/+9xl99JJet7znjd32de+9rVzlwV42cteNnfZ7/3e75277Dvf+c65y+7lh+K73/3uucv2er25y7ZarbnLvvnNb5677Pr6+txl3/KWt8xddr8iIl99rG0eouA4juM4juMcKLyD+yQRkbeJyI/NvP9dEXnXzPtfEpEfF5FLylMi8i4RufVx9v8GETk+8/4uETkVlv+jiCw/PUfiOI7jOI5zsPAO7pPnT4BXAIhIBKwBz5/Z/grgMYPkVPWNqvqZx9n/G4BLBtCo6l9T1b0FLjqO4ziO4zxD8A7uk+fDwMvD8vOBe4AdEVkRkQbwPOATQFdE3i8inxOR94mIwFSRFZFYRN4rIveIyN0i8mYReQ1wCnifiHxSRB4VzCMi94vImoicFJHPisg7ReReEfm9SVkReYmIfDp8/q0ics8VOi+O4ziO4zhXFe/gPklU9SGgEJEbMbX2I8CfYZ3eU8DdwBh4MfBjwK3AzcA3X7Sr24ETqvoCVX0h8B5VfT9wGvg+Vb1dVQePU5XnAL+qqs8HNoHvDuvfA/ygqt4OPOaMDRF5k4icFpHTe5ml6jiO4ziOc63iHdynxoexzu2kg/uRmfd/Esp8VFUfVNUK+CRw8qJ93AfcLCL/TEReBWzvsQ5fUdVPhuWPAydDfO6Cqn4krP+Nx/qwqr5DVU+p6qnDhy/ptOE4juM4jrOv8A7uU2MSh/tCLEThTzEF9xVY5xdg1nOn5CJrNlXdAF4E3AX8EPAu9sbj7t9xHMdxHOeZhndwnxofBr4DWFfVUlXXgWWsk/vhx/1kQETWgEhVPwD8feCOsGkHWHgylQoT0HZE5C+EVXszMnQcx3Ecx9nHuNr31Lgbc0/4jYvWdVX1fJhP9kScAN4TnBgAfia8vhf4NREZMJ3Mthd+AHiniFTAHwJbT2IfjuM4juM4+w7v4D4FVLUEFi9a94aZ5buw0IPJ+x+eWb5z5mN3cBFB0f3AzKo7Z7adDIvngRfMrP/HM+XvVdXbAETkp7FJa47jOI7jOAce7+AeXP66iPwM1sZfxXx1HcdxHMdxDjyiqle7Ds41wqlTp/T0aRd6nSvLXp9BH/zgB+cu+6EPfWjusrfddtvcZX/rt35r7rJ33PHnBmgek09/+tNzlz1+/JJ5YC7JT/7kT85d9sUvfvHcZecMwwJgZ2dn7rJbW/NHVP2Nv/E35i77spe9bO6y3/Vd3zV32V/5lV+Zu+xerp3z58/PXfZLX/rS3GXf+MY3zl323nvvnbsswLd/+7fPXfbs2bNzl93L9XPs2LG5y+7len/44YfnLvvRj3507rJ7ud673e7cZc+cOTN32f2KiHxcVU9daptPMnMcx3Ecx3EOFN7BvYyIyNtE5Mdm3v+uiLxr5v0viciPi8jvPMbn3yUitz7O/t8gIsdn3t8lIpf8JeM4juM4jvNMwTu4l5eJTy7BJWENS+s74RVA9lgfVtU3qupnHmf/bwDmH6d0HMdxHMd5BuAd3MvLh5lafD0fSwaxIyIrItIAngd8AuiKyPtF5HMi8j4JgW0TRVZEYhF5r4jcIyJ3i8ibReQ1WErg94nIJ0WkNfvFIvJXReQjIvIJEfl3IjJ/4I7jOI7jOM4+xl0ULiOq+pCIFCJyI9N0viewTu8W5pk7Bl6MdYAfwlTfbwb+28yubgdOqOoLAERkWVU3ReSHgZ9Q1dNhPeF1DUsa8VdUtSciPwX8OPCWy3zIjuM4juM4Vx1XcC8/H8Y6t5MO7kdm3v9JKPNRVX1QVSvgk8DJi/ZxH3CziPwzEXkVsP0E3/ky4FbgT0Tkk8DrgWddqqCIvElETovI6XPnzu354BzHcRzHca41vIN7+ZnE4b4QC1H4U0zBfQXTdL6jmfIlFynrqroBvAhLGvFDwLt4fAT4fVW9Pfzdqqo/cKmCqvoOVT2lqqcOHz68pwNzHMdxHMe5FvEO7uXnw8B3AOuqWqrqOrCMdXI//LifDISQgyhkN/v7TDOf7QALl/jInwLfLCLfED7fEZHnPrXDcBzHcRzH2R94DO7l527MPeE3LlrXVdXzcxqlnwDeE5wYAH4mvL4X+DURGTCdzIaqnhORNwD/OkxmA+sYf+HJHoTjOI7jOM5+wTu4lxlVLYHFi9a9YWb5Liz0YPL+h2eW75z52J9LhxQU3Q/MrLpzZtsfAC95ktV2HMdxHMfZt3iIguM4juM4jnOg8A6u4ziO4ziOc6DwEAXHca4qc8ah17z0pS+du+xwOJy77Ctf+cq5y66urs5d9sUvfvHcZT/2sY/NXbbbnT93y8mTJ+cuq6pzl91L20XR/HpKo9F44kKBvZyHZz3rkm6Jl2Qv185eyp49e3bush/5yEfmLvvlL3957rLb20/kNDllL/UF2Isbz/nz5+cuu7a2NnfZvbTH8vLy3GVf/vKXP3GhwIkTJ+Yu+4d/+Idzlz127NjcZZ/puILrOI7jOI7jHCi8g3sNICLfJSIqIreE9ydF5J6rXS/HcRzHcZz9iHdwrw1eh6Xmfd2T+bCIeKiJ4ziO4zhOwDu4VxkR6QKvBH4AeO0ltp8UkT8WkU+Ev1eE9XeG9b8NfCa8/0MR+fcicp+I/KKIfJ+IfFRE7haRZ1/ZI3Mcx3Ecx7k6eAf36vNq4D+p6heACyLyTRdtfwT4NlW9A/ge4O0z2+4AflRVJ1nKXoSl8n0e8P3Ac1X1pVhq3x+5jMfgOI7jOI5zzeAd3KvP64DfDMu/yZ8PU0iBd4rI3cC/A26d2fZRVf3KzPuPqeoZVR0BXwZ+L6y/Gzh5qS8XkTeJyGkROX3u3LmndiSO4ziO4zjX6MAKeQAAHFtJREFUAB67eRURkVXgW4EXiogCMaDAr84UezNwFlNnI2DW/6R30S5HM8vVzPuKx2hrVX0H8A6AU6dOze8P5DiO4ziOc43iCu7V5TXAr6vqs1T1pKreAHwFuGGmzBJwRlUrLOwgvgr1dBzHcRzH2Td4B/fq8jrggxet+wDwMzPv/znwehH5FHALf161dRzHcRzHcWbwEIWriKp+yyXWvZ2ZiWSq+kXgtpkiPxXW3wXcNVPu4vd3PtY2x3Ecx3Gcg4wruI7jOI7jOM6BQvaSd9w52Jw6dUpPnz59tavxpInj+cOTq6q6jDU5mLz61a+eu+zP/uzPzl32tttue+JCM0TR1f9dvpfn5rVQX8dxnIOIiHxcVU9daps/eR3HcRzHcZwDhXdwHcdxHMdxnAOFd3CvACLy90TkXhH5tIh8UkT+whX4zvtFZO1yf4/jOI7jOM61hrsoXGZE5OXAdwB3qOoodDqzq1wtx3Ecx3GcA4sruJef64DzIX0uqnpeVR8KCuv/LSJ3i8hHReQbAETksIh8QEQ+Fv6+OazviMi7Q9n/LiKvDutjEfnHInJPUIh/ZOa7f0REPhG+45YrfeCO4ziO4zhXA+/gXn5+D7hBRL4gIv9cRP7yzLYtVX0h8CvAPwnr/inwNlV9CfDdwLvC+r8H/IGqvhT4FuCtItIB3gScBG5X1duA983s/7yq3gH8C+AnLs/hOY7jOI7jXFt4iMJlRlV3ReSbgL+IdUz/jYj8dNj8r2de3xaW/wpwq4hMdrEoIl3grwLfKSKTjmoTuDGU/zVVLcL3rc98/W+F148Df+tS9RORN2GdZG688cYne5iO4ziO4zjXDN7BvQKoaollErtLRO4GXj/ZNFssvEbAy1R1OLsPsR7vd6vq5y9a/3hfPQqvJY/R1qr6DuAdYD64T3QsjuM4juM41zoeonCZEZFvFJHnzKy6HfhqWP6emdePhOXfA+o4WhG5PSz+LhZTK2H9i8P63wd+UESSsH71aT8Ix3Ecx3GcfYR3cC8/XeBfishnROTTwK3Az4VtK2HdjwJvDuv+F+BUmDD2GeCHwvqfB1Lg0yJyb3gPFqP7tbD+U8D3Xu4DchzHcRzHuZbxEIXLjKp+HHjFxeuDEPtWVf2pi8qfZ6rszq4fAD94ifUF8OPhb3b9yZnl08CdT6b+juM4juM4+w1XcB3HcRzHcZwDhSu4V4lZhdV5eijL8mpXwQlsbGzMXfZ973vfExea4VWvetVeqzMXFy5cmLvs2tr8SQL3UnYvPMEEU+cS7OUZEcfxZayJc63g18TBxRVcx3Ecx3Ec50DhHVzHcRzHcRznQOEd3CuMiHyXiOg8qXNF5F0icuvT8J0nReSep7ofx3Ecx3Gc/YB3cK88rwP+W3h9XFT1jar6mctfJcdxHMdxnIODd3CvICHl7iuBHwBeG9bdKSJ3icj7ReRzIvK+mWQOd4nIqbC8KyJvFZF7ReQ/i8hLw/b7ROQ7Q5mTIvLHIvKJ8Pfn7Mkcx3Ecx3EOOt7BvbK8GvhPqvoF4IKIfFNY/2Lgx7AkEDcD33yJz3aAP1DV5wM7wD8Evg34m8BbQplHgG9T1TswL923P1GFRORNInJaRE6fO3fuyR+Z4ziO4zjONYJ3cK8srwN+Myz/JtMwhY+q6oOqWgGfBE5e4rNj4D+F5buBP1TVPCxPyqfAO0XkbuDfYR3mx0VV36Gqp1T11OHDh/d+RI7jOI7jONcY7oN7hRCRVeBbgReKiAIxoMB/AEYzRUsu3S65qmpYriafUdVKRCbl3wycBV6E/XgZPt3H4TiO4ziOc63jCu6V4zXAr6vqs1T1pKreAHwF+ItP43csAWeCEvz9WCfacRzHcRznGYV3cK8crwM+eNG6DzCHm8Ie+OfA60XkU8AtQO9p3LfjOI7jOM6+wEMUrhCq+i2XWPd2LpoIpqo/PLN858xyd2b55y76TDe8fhG4bWbTT4X19wMveArVdxzHcRzH2Te4gus4juM4juMcKFzBdRznaWcwGMxddmNjY0/7juP5Q8uXlpbmLjudw/nEZFk2d9m9ECywn3b2cmyXqw7XAgf52Jwnx17uDWd/4Qqu4ziO4ziOc6DwDq7jOI7jOI5zoPAO7j5AREoR+aSIfGo2Ba+IHBeR98+5jzrtr+M4juM4zkHGY3D3BwNVvR1ARP5H4BeAv6yqD2H+uo9CRBJVLa5wHR3HcRzHca4JvIO7/1gENgBE5CTwO6r6AhF5A/C3gC4Qi8irgPdgWc0+B7SuRmUdx3Ecx3GuNN7B3R+0ROSTQBO4Dkv5eynuAG5T1XUR+XGgr6rPE5HbgE9c6gMi8ibgTQA33njj019zx3Ecx3GcK4zH4O4PBqp6u6reArwK+Fdyab+b31fV9bD8l4D/F0BVPw18+lI7VtV3qOopVT11+PDhy1F3x3Ecx3GcK4p3cPcZqvoRYA24VG/UU/M6juM4jvOMxzu4+wwRuQWIgQtPUPSPgO8Nn3kBj07h6ziO4ziOc2DxGNz9wSQGF0CA16tq+QRZef4F8B4R+SzwWeDjl7mOjuM4juM41wTewd0HqOolc5Oq6v3AC8Lye4H3zmwbAK+9/LVzHMdxHMe5tvAQBcdxHMdxHOdA4Qqu4zhPO1mWzV223W7vad9RNP/vclWdu2xVVXOXfYLwoGuO/Vbfy4WfB+di9vI8cfYX3rKO4ziO4zjOgeIZ18EVkbeJyI/NvP9dEXnXzPtfEpEfF5HvFJGfDut+TkR+4hL7Oiki9zxN9XqLiPyVp2lfu0/HfhzHcRzHcfYjz7gOLvAnwCsARCTCPGWfP7P9FcCHVfW3VfUXr1SlVPX/UNX/fKW+z3Ecx3Ec56DyTOzgfhh4eVh+PnAPsCMiKyLSAJ4HfEJE3iAiv3Lxh0Xkm0TkUyLyKeDvXuoLRKQrIv9FRD4hIneLyKvD+pMi8lkReaeI3CsivycirbDtvSLymrB8v4j8goh8UkROi8gdQWn+soj80ON9x0X1uE5E/ijs5x4R+YtP+ew5juM4juNc4zzjOriq+hBQiMiNmFr7EeDPsE7vKeBuVR0/zi7eA/yIqr7occoMgb+pqncA3wL80kxq3ecAv6qqzwc2ge9+jH18TVVvB/4Ys/96DfAy4B/M8R0Tvhf43bCfFwGfxHEcx3Ec54DzTHVR+DDWuX0F8MvAibC8hYUwXBIRWQaWVfWPwqpfB779UkWB/1NE/hJQhf0fDdu+oqqTjubHgZOP8XW/HV7vBrqquoMpzaNQj95jfMfDM/v4GPBuEUmBD8187+wxvQl4E8CNN974WIfuOI7jOI6zb3jGKbiBSRzuC7EQhT/FFNxXYJ3fp8r3AYeBbwrq6VmgGbaNZsqVPPaPjEm56qLPVOEzj/cdAISO+F8Cvg68V0T+9sVfoqrvUNVTqnrq8OHD8x+h4ziO4zjONcoztYP7YeA7gHVVLVV1HVjGOrmP2cFV1U1gU0ReGVZ932MUXQIeUdVcRL4FeNbTV/X5v0NEngWcVdV3Au8C7rgM9XAcx3Ecx7mmeKaGKNyNuSf8xkXruqp6/gk++3ewYX8Ffu8xyrwP+P9E5G7gNPC5p1jfJ/sddwL/m4jkwC7w5xRcx3Ecx3Gcg4bsJdOPc7A5deqUnj59+mpXwzkAnD//RL8Tp3zwgx/c075f85rXzF222+3OXXYvde50OnOXXVhYmLusZ9q6vOzl/523xTODvWQw9Kxn1x4i8nFVPXWpbd5ajuM4juM4zoHCO7iO4ziO4zjOgeKZGoPrHEB8qGnv7OWcbW9vX5b9fv/3f//cZWFvQ8e7u/NnrV5eXp677Gg0euJCT6Jsksz/SL5cQ+hxHF+W/V4u9hJ2UBTF3GWvhbbYj+y3MJC9PKv2Ut/LdWz+f25+ntlH7ziO4ziO4xw4rpkOrojML7U8+nM/dCl/15AW956nXrOnj5CCdy0sP6njdRzHcRzHcR6ffR+ioKq/drXrcDkIaXdFVecfj3Acx3Ecx3GuHQV3gojcKSJ3icj7ReRzIvK+0NlDRH5RRD4jIp8WkX8c1v2ciPxEWP4mEfmUiHwK+Lsz+4xF5K0i8rHw2R98gjrEIvJeEblHRO4WkTeH9XeJyNtE5LSIfFZEXiIivyUiXxSRfzjz+Q+JyMdF5N6QCnfeYz8pIp8XkX/1/7d391F2VfUZx78PIRA0kgiMLBrACIQVEWGAgYKgjVQRlaLiS0qDotBSqCi+IEVrNdhl0UUVqvgWEaLU+gYCUVvBIkLKApIJSUgooBJQQSRJgRAEQ5k8/ePsgevlTubeZCb3zs3zWWvW3LPPPmf/7tmTyb57ztk/qgxru5W4B+OYWeppiPIZkq6XdJWkFeV6zZK0oNTbs9lYIiIiIsaqTp3BPQB4CfBbqrS6h0u6A3gTMN22JTV6IuQS4HTbN0g6r6b8ZGCN7YMlbQvcKOka2/cM0X4vMMX2vgB1bT1pu0/SGcBVwEHAQ8Ddks63/b/ASbYfkrQdsFDS5aW8GdOAE23fLOnNJZb9qRJTLJR0A1VK4UbllLIXl5hWABfZPqTE+x7gfU3GERERETEmddwMbrHA9n3lz/NLgKnAGuAPwNckHQc8XntAGYROtj040Lu0ZvdRwDskLQFuAXakGkgOZQWwh6TPSzoaqH18fF75vgy43fYDtteVY3Yr+95bZpFvLmUbaqver2zfXF4fAXyrpBN+ELgeOHgD5QALa2K6m2eyrS2juo5/RNIpZUa6f9WqVS2EGREREdGZOnWAW7uuzgCwte2ngEOAy4BjgB+3cD4B77HdW75eZHuoNLvYfphqJvRnwKnARQ1iW18X53pga0kzgFcBh9neH1gMTGgh1t+3ULeR+phq433WjL3tObb7bPf19PRsYtMRERER7depA9xnkTQRmGT7P4D3Uw1An2b7EeARSUeUolk1u68GTpM0vpxrb0nPLa/vbNDWTsBWti8HPgoc2EKok4CHbT8uaTpwaAvH1psPzCz3BPcArwAWbKA8IiIiYovXqffgNvI84CpJE6hmZD/QoM67gIslmWf+NA/VDOxU4NbywNoq4I1lINtoNeYpwCWSBj8AfLiFOH8MnFruGb6L6jaFjXUFcBiwFDBwlu3fSRqqfPomtBURERHRFdRK1pFuI+kYYA/bn2t3LJ2gr6/P/f397Q5joyXDS+tGK5PZk08+2XTd7bffvum60FqGoMcff3z4SsWECc3fSdRKdrJWztsJ2bOSyazSCX0xFo21TGat/Ey08m8jmcw2D0mLbPc12jeWZnBHnO0ftjuGiIiIiBhZW/QAN7rL2rVrm67byqfggYGBpus+5znPabru6tWrm647eXKjVfEaa2XmoJVZqlZi6BTbbrvtqJx3u+22G5XzRuta+XkfP378KEYS0Bmzsq1o5XdgJ9gSZmVHSq5URERERHSVDHAjIiIioqts8gBXkiV9pmb7TEmzN/W8Q7Q1VdJf1Wz3SRrzD4iVFMANb5KOiIiIiNaMxAzuOuC4suRWyyRtvaHtOlOBpwe4tvttv3dj2o2IiIiI7jQSA9yngDlUyRf+iKS/kHSLpMWS/kvSzqV8tqRLJd0IXNpge6qk+ZJuLV8vK6f8FPBySUskvV/SDEk/LOfcQdKVkm6TdLOk/WraurjMkq6QNOyAWNK9ks4t7fRLOlDS1ZLulnRqqSNJ50laLmmZpJmlfEZp6zJJd0r6Zll7F0l/Xq7FshLTs56AkXR82b9c0qdryk+W9HNJCyR9VdKFpXyupLfU1Hus5vWHJC0s1+Sc4d53RERERDcYqXtwvwDMkjSprvy/gUNtHwB8GzirZt8+wKtsH99geyXwatsHAjOBwdsQzgbml3S759e1dQ6w2PZ+wEeAb9Tsmw68hirV78cHM5oN49e2e6myhs0F3kKVlWxwoHgc0EuVUe1VwHmSdin7DgDeV97THsDhJUHFXGCm7ZdSrWBxWm2Dkv4E+DRwZDn3wZLeWMr/sbR/eHk/GyTpKGBaec+9wEGSXtHE+46IiIgY00ZkfQzbj0r6BvBe4ImaXbsC3ykDv22Ae2r2zbP9xBDb44ELJfUCA8DeTYRxBPDmEs9PJe0oaXAF+R/ZXgesk7QS2Bm4b5jzzSvflwETba8F1kpaJ2lyae9btgeAByVdDxwMPAossH0fgKQlVLdWrAXusf3zct6vA+8GLqhp82DgZ7ZXlWO/SZWGF+B62w+V8u81cU2OKl+Ly/ZEqgHvDbWVJJ0CnAKw++67D3PKiIiIiM43kqsoXACcDDy3puzzwIVlxvJvgdqUPr+vO752+/3Ag1Szo31Ug+NNUZt2aIDmBvaDx6yvO359E8dvTHsb6ylKP5bUwoPXSsC5Zba71/Zetr9Wf7DtObb7bPf19PSMYpgRERERm8eIDXDL7OJ3qQa5gyYB95fXJ7ZwuknAA7bXA28HBvPjrQWeN8Qx84FZUN0HC6y2vcHcopKulTSlhbjq25spaZykHqqZ1gUbqH8XMFXSXmX77cD1dXUWAH8maSdJ44DjS52Fpfz55SG8N9cccy9wUHl9LNXsN8DVwEmSJgJImiLpBRvxPiMiIiLGlJFeB/czQO1qCrOB70laBDSftgm+CJwoaSnV/aaDs7u3AQOSlkqqf6htNtV9prdRPYy2wQF1me3cC3iohbhqXVHiWQr8FDjL9u+Gqmz7D8C7qK7HMqqZ4C/X1XmA6j7j68p5F9m+yvb9wD9TDYBvpBrUrimHfZVq8LsUOIxyrWxfA/w7cFNp7zKG/nAQERER0TVku90xtIWkfYGTbH+g3bE0Q9JE24+VGdwrgIttXzGSbfT19bm/v38kT7lZrVmzZvhKRVL1VlpJU5n0tBER0UkkLbLdMI/AFpvJzPbysTK4LWaXB9aWUz2sd2Wb44mIiIjoSKP58FOMINtntjuGTjdpUv0qdZ2tm1etaGXWu5W6AFtt1fzn8nXr1g1fqdhmm+afZW0lhnHjxg1fKSIiRtQWO4MbEREREd0pA9yIiIiI6CrDDnAlDZSUtbeX1Qs+WFYg2Kwk9Up6Xc32sZLOHsX2Ruz8JXVvw5ugR5KkyZL+brTbiYiIiOhkzQxUnyiJAl4CvBp4LfDx0Q2roV7g6QGu7Xm2PzVajY32+UfJZCAD3IiIiNiitTQTa3slVVrX01WZIOkSScskLZb0SgBJ75R0paSfSLpX0umSPlDq3Cxph1JvT0k/lrRI0nxJ00v5WyUtLzPGN0jaBvgEVWKFJZJmljYuLPV3lnRFqb9U0svqY5f0JUn9ZSb6nJryeyWdI+nW8j6m17yHwfPPLcffLGmFpBmSLpZ0h6S5w7VRs39cOdfy0lb9Wr719WdLulTSTZJ+IelvavZ9SNJCSbfVtPUpYM9yjc6TtEu5fktKmy9vopsjIiIixrSWV1GwvaJk2XoBcEJV5JeWgeE1kvYuVfcFDqBKz/tL4O9tHyDpfOAdVKl95wCn2v6FpD+lSvBwJPAx4DW275c02faTkj4G9Nk+HaoBaE1YnwOut/2mEtvEBqH/g+2Hyv5rJe1n+7ayb7XtA8uf988E/rrB8c+nSqRwLDAPOLzUWyip1/aSYdqAahZ6iu19y3toZnHT/YBDqVIgL5b0o3JtpwGHUKXknSfpFVRJIva13VvO/0HgatufLDE9a5FWSadQfWjp6qf6IyIiYsuxqffSHgH8G4DtO4FfAYMD3Otsr7W9iirr1g9K+TKqlLUTgZdRZfZaAnwF2KXUuRGYW2Ysm1lj50jgSyWOAduNVvx/m6RbgcXAS4B9avZ9v3xfBEwdoo0fuMqKsQx40Paykkr49ppjNtQGwApgD0mfl3Q0sMFUwsVVtp+wvZoqw9khwFHlazFwK1W2t2kNjl0IvEvSbOClttfWV7A9x3af7b6enp4mwomIiIjobC3P4EraAxgAVg5TtXYByvU12+tLu1sBjwzONtayfWqZ0X09sEjSQa3GWRfzi6hmZg+2/XC5rWBCg1gHGPqa1MZf/962bqINSvn+wGuAU4G3AScNE359qjlTzdqea/srde9zal17N5SZ3ddTfWD4rO1vDNNeRERExJjW0gyupB7gy8CFZTZzPjCr7Nsb2B24q5lz2X4UuEfSW8vxKoM/JO1p+xbbHwNWAbsBa4HnDXG6a4HTyrHjJNWv+L898HtgjaSdqR6UG2nDtiFpJ2Ar25cDHwUOLOWnSzp9iPO+QdW9zjsCM6hmZa8GTiqz4EiaIukF1F0jSS+kmm3+KnDRYHsRERER3ayZGdztyi0E44GngEuBz5Z9XwS+JGlZ2fdO2+skNdv+rHL8R8v5vw0sBc6TNI1qpvLaUvZr4OwSy7l15zkDmCPpZKpZ2NOAmwZ32l4qaTFwJ/AbqlsgRlSTbUwBLtEzy6x9uHyfvoGYbqO6NWEn4J9s/xb4raQXAzeVa/0YcILtuyXdKGk58J9UaX0/JOn/Sp13bOr7jIiIiOh0qiZio50k/RA4zvaTdeWzgcds/8vmiKOvr8/9/f2bo6nocknV+4yk6o2IGB2SFtlumGeg5XtwY+TZPqbdMURERER0i8zgxtMkraJaCaOb7ASsbncQsVHSd2NT+m1sSr+NTVt6v73QdsMloDLAja4mqX+oP19EZ0vfjU3pt7Ep/TY2pd+Gtqnr4EZEREREdJQMcCMiIiKiq2SAG91uTrsDiI2Wvhub0m9jU/ptbEq/DSH34EZEREREV8kMbkRERER0lQxwo2tJOlrSXZJ+KensdscTjUm6WNLKkoFvsGwHST+R9Ivy/fntjDGeTdJukq6T9D+Sbpd0RilP33W4kv59gaSlpe/OKeUvknRL+Z35HUnNZz+JzUbSOEmLS5Ko9NsQMsCNriRpHPAF4LXAPsDxkvZpb1QxhLnA0XVlZwPX2p5Gla47H1A6z1PAB23vAxwKvLv8G0vfdb51wJG29wd6gaMlHQp8Gjjf9l7Aw8DJbYwxhnYGcEfNdvqtgQxwo1sdAvzS9oqSAvnbwBvaHFM0YPsG4KG64jcAXy+vvw68cbMGFcOy/YDtW8vrtVT/4U4hfdfxXHmsbI4vXwaOBC4r5em7DiRpV+D1wEVlW6TfGsoAN7rVFOA3Ndv3lbIYG3a2/UB5/Ttg53YGExsmaSpwAHAL6bsxofyZewmwEvgJcDfwiO2nSpX8zuxMFwBnAevL9o6k3xrKADciOpqrpV6y3EuHkjQRuBx4n+1Ha/el7zqX7QHbvcCuVH/xmt7mkGIYko4BVtpe1O5YxoKt2x1AxCi5H9itZnvXUhZjw4OSdrH9gKRdqGaZosNIGk81uP2m7e+X4vTdGGL7EUnXAYcBkyVtXWYD8zuz8xwOHCvpdcAEYHvgX0m/NZQZ3OhWC4Fp5enSbYC/BOa1OaZo3jzgxPL6ROCqNsYSDZR7/74G3GH7szW70ncdTlKPpMnl9XbAq6nuob4OeEuplr7rMLY/bHtX21Op/k/7qe1ZpN8aSqKH6FrlU+4FwDjgYtufbHNI0YCkbwEzgJ2AB4GPA1cC3wV2B34FvM12/YNo0UaSjgDmA8t45n7Aj1Ddh5u+62CS9qN6GGkc1UTXd21/QtIeVA/k7gAsBk6wva59kcZQJM0AzrR9TPqtsQxwIyIiIqKr5BaFiIiIiOgqGeBGRERERFfJADciIiIiukoGuBERERHRVTLAjYiIiIiukgFuRERERHSVDHAjIiIioqtkgBsRERERXeX/AT5DbvTc/nXuAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" } - ], - "source": [ - "# Visualize the results.\n", - "plt.figure(figsize=(10, 8))\n", - "\n", - "# Plot the waveform.\n", - "plt.subplot(3, 1, 1)\n", - "plt.plot(waveform)\n", - "plt.xlim([0, len(waveform)])\n", - "# Plot the log-mel spectrogram (returned by the model).\n", - "plt.subplot(3, 1, 2)\n", - "plt.imshow(spectrogram.T, aspect='auto', interpolation='nearest', origin='bottom')\n", - "\n", - "# Plot and label the model output scores for the top-scoring classes.\n", - "mean_scores = np.mean(scores, axis=0)\n", - "top_N = 10\n", - "top_class_indices = np.argsort(mean_scores)[::-1][:top_N]\n", - "plt.subplot(3, 1, 3)\n", - "plt.imshow(scores[:, top_class_indices].T, aspect='auto', interpolation='nearest', cmap='gray_r')\n", - "# Compensate for the PATCH_WINDOW_SECONDS (0.96 s) context window to align with spectrogram.\n", - "patch_padding = (params.PATCH_WINDOW_SECONDS / 2) / params.PATCH_HOP_SECONDS\n", - "plt.xlim([-patch_padding, scores.shape[0] + patch_padding])\n", - "# Label the top_N classes.\n", - "yticks = range(0, top_N, 1)\n", - "plt.yticks(yticks, [class_names[top_class_indices[x]] for x in yticks])\n", - "_ = plt.ylim(-0.5 + np.array([top_N, 0]))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/research/autoencoder/AdditiveGaussianNoiseAutoencoderRunner.py b/research/autoencoder/AdditiveGaussianNoiseAutoencoderRunner.py deleted file mode 100644 index 8d8ee08654985250ac61415df96889b4a4cf5f1b..0000000000000000000000000000000000000000 --- a/research/autoencoder/AdditiveGaussianNoiseAutoencoderRunner.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import sklearn.preprocessing as prep -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -from autoencoder_models.DenoisingAutoencoder import AdditiveGaussianNoiseAutoencoder - -mnist = input_data.read_data_sets('MNIST_data', one_hot=True) - - -def standard_scale(X_train, X_test): - preprocessor = prep.StandardScaler().fit(X_train) - X_train = preprocessor.transform(X_train) - X_test = preprocessor.transform(X_test) - return X_train, X_test - - -def get_random_block_from_data(data, batch_size): - start_index = np.random.randint(0, len(data) - batch_size) - return data[start_index:(start_index + batch_size)] - - -X_train, X_test = standard_scale(mnist.train.images, mnist.test.images) - -n_samples = int(mnist.train.num_examples) -training_epochs = 20 -batch_size = 128 -display_step = 1 - -autoencoder = AdditiveGaussianNoiseAutoencoder( - n_input=784, - n_hidden=200, - transfer_function=tf.nn.softplus, - optimizer=tf.train.AdamOptimizer(learning_rate = 0.001), - scale=0.01) - -for epoch in range(training_epochs): - avg_cost = 0. - total_batch = int(n_samples / batch_size) - # Loop over all batches - for i in range(total_batch): - batch_xs = get_random_block_from_data(X_train, batch_size) - - # Fit training using batch data - cost = autoencoder.partial_fit(batch_xs) - # Compute average loss - avg_cost += cost / n_samples * batch_size - - # Display logs per epoch step - if epoch % display_step == 0: - print("Epoch:", '%d,' % (epoch + 1), - "Cost:", "{:.9f}".format(avg_cost)) - -print("Total cost: " + str(autoencoder.calc_total_cost(X_test))) diff --git a/research/autoencoder/AutoencoderRunner.py b/research/autoencoder/AutoencoderRunner.py deleted file mode 100644 index 7f1ab2ecd5a91c12960714ea79a864631e634f8c..0000000000000000000000000000000000000000 --- a/research/autoencoder/AutoencoderRunner.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import sklearn.preprocessing as prep -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -from autoencoder_models.Autoencoder import Autoencoder - -mnist = input_data.read_data_sets('MNIST_data', one_hot=True) - - -def standard_scale(X_train, X_test): - preprocessor = prep.StandardScaler().fit(X_train) - X_train = preprocessor.transform(X_train) - X_test = preprocessor.transform(X_test) - return X_train, X_test - - -def get_random_block_from_data(data, batch_size): - start_index = np.random.randint(0, len(data) - batch_size) - return data[start_index:(start_index + batch_size)] - - -X_train, X_test = standard_scale(mnist.train.images, mnist.test.images) - -n_samples = int(mnist.train.num_examples) -training_epochs = 20 -batch_size = 128 -display_step = 1 - -autoencoder = Autoencoder(n_layers=[784, 200], - transfer_function = tf.nn.softplus, - optimizer = tf.train.AdamOptimizer(learning_rate = 0.001)) - -for epoch in range(training_epochs): - avg_cost = 0. - total_batch = int(n_samples / batch_size) - # Loop over all batches - for i in range(total_batch): - batch_xs = get_random_block_from_data(X_train, batch_size) - - # Fit training using batch data - cost = autoencoder.partial_fit(batch_xs) - # Compute average loss - avg_cost += cost / n_samples * batch_size - - # Display logs per epoch step - if epoch % display_step == 0: - print("Epoch:", '%d,' % (epoch + 1), - "Cost:", "{:.9f}".format(avg_cost)) - -print("Total cost: " + str(autoencoder.calc_total_cost(X_test))) diff --git a/research/autoencoder/MaskingNoiseAutoencoderRunner.py b/research/autoencoder/MaskingNoiseAutoencoderRunner.py deleted file mode 100644 index b776302e286ff740ba7b8e6f679a54b23944df12..0000000000000000000000000000000000000000 --- a/research/autoencoder/MaskingNoiseAutoencoderRunner.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import sklearn.preprocessing as prep -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -from autoencoder_models.DenoisingAutoencoder import MaskingNoiseAutoencoder - -mnist = input_data.read_data_sets('MNIST_data', one_hot=True) - - -def standard_scale(X_train, X_test): - preprocessor = prep.StandardScaler().fit(X_train) - X_train = preprocessor.transform(X_train) - X_test = preprocessor.transform(X_test) - return X_train, X_test - - -def get_random_block_from_data(data, batch_size): - start_index = np.random.randint(0, len(data) - batch_size) - return data[start_index:(start_index + batch_size)] - - -X_train, X_test = standard_scale(mnist.train.images, mnist.test.images) - -n_samples = int(mnist.train.num_examples) -training_epochs = 100 -batch_size = 128 -display_step = 1 - -autoencoder = MaskingNoiseAutoencoder( - n_input=784, - n_hidden=200, - transfer_function=tf.nn.softplus, - optimizer=tf.train.AdamOptimizer(learning_rate=0.001), - dropout_probability=0.95) - -for epoch in range(training_epochs): - avg_cost = 0. - total_batch = int(n_samples / batch_size) - for i in range(total_batch): - batch_xs = get_random_block_from_data(X_train, batch_size) - - cost = autoencoder.partial_fit(batch_xs) - - avg_cost += cost / n_samples * batch_size - - if epoch % display_step == 0: - print("Epoch:", '%d,' % (epoch + 1), - "Cost:", "{:.9f}".format(avg_cost)) - -print("Total cost: " + str(autoencoder.calc_total_cost(X_test))) diff --git a/research/autoencoder/README.md b/research/autoencoder/README.md deleted file mode 100644 index cba7b3b66f59ac9e3810ee1b98d67133296aea25..0000000000000000000000000000000000000000 --- a/research/autoencoder/README.md +++ /dev/null @@ -1,3 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) diff --git a/research/autoencoder/VariationalAutoencoderRunner.py b/research/autoencoder/VariationalAutoencoderRunner.py deleted file mode 100644 index f5ce0045f3c6dfdd357cd874f8ee24df0d8cb3d9..0000000000000000000000000000000000000000 --- a/research/autoencoder/VariationalAutoencoderRunner.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import sklearn.preprocessing as prep -import tensorflow as tf -from tensorflow.examples.tutorials.mnist import input_data - -from autoencoder_models.VariationalAutoencoder import VariationalAutoencoder - -mnist = input_data.read_data_sets('MNIST_data', one_hot=True) - - -def min_max_scale(X_train, X_test): - preprocessor = prep.MinMaxScaler().fit(X_train) - X_train = preprocessor.transform(X_train) - X_test = preprocessor.transform(X_test) - return X_train, X_test - - -def get_random_block_from_data(data, batch_size): - start_index = np.random.randint(0, len(data) - batch_size) - return data[start_index:(start_index + batch_size)] - - -X_train, X_test = min_max_scale(mnist.train.images, mnist.test.images) - -n_samples = int(mnist.train.num_examples) -training_epochs = 20 -batch_size = 128 -display_step = 1 - -autoencoder = VariationalAutoencoder( - n_input=784, - n_hidden=200, - optimizer=tf.train.AdamOptimizer(learning_rate = 0.001)) - -for epoch in range(training_epochs): - avg_cost = 0. - total_batch = int(n_samples / batch_size) - # Loop over all batches - for i in range(total_batch): - batch_xs = get_random_block_from_data(X_train, batch_size) - - # Fit training using batch data - cost = autoencoder.partial_fit(batch_xs) - # Compute average loss - avg_cost += cost / n_samples * batch_size - - # Display logs per epoch step - if epoch % display_step == 0: - print("Epoch:", '%d,' % (epoch + 1), - "Cost:", "{:.9f}".format(avg_cost)) - -print("Total cost: " + str(autoencoder.calc_total_cost(X_test))) diff --git a/research/autoencoder/__init__.py b/research/autoencoder/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/autoencoder/autoencoder_models/Autoencoder.py b/research/autoencoder/autoencoder_models/Autoencoder.py deleted file mode 100644 index 788a14642306ece056fc53a85ba8c60d87d31826..0000000000000000000000000000000000000000 --- a/research/autoencoder/autoencoder_models/Autoencoder.py +++ /dev/null @@ -1,91 +0,0 @@ -import numpy as np -import tensorflow as tf - - -class Autoencoder(object): - - def __init__(self, n_layers, transfer_function=tf.nn.softplus, optimizer=tf.train.AdamOptimizer()): - self.n_layers = n_layers - self.transfer = transfer_function - - network_weights = self._initialize_weights() - self.weights = network_weights - - # model - self.x = tf.placeholder(tf.float32, [None, self.n_layers[0]]) - self.hidden_encode = [] - h = self.x - for layer in range(len(self.n_layers)-1): - h = self.transfer( - tf.add(tf.matmul(h, self.weights['encode'][layer]['w']), - self.weights['encode'][layer]['b'])) - self.hidden_encode.append(h) - - self.hidden_recon = [] - for layer in range(len(self.n_layers)-1): - h = self.transfer( - tf.add(tf.matmul(h, self.weights['recon'][layer]['w']), - self.weights['recon'][layer]['b'])) - self.hidden_recon.append(h) - self.reconstruction = self.hidden_recon[-1] - - # cost - self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0)) - self.optimizer = optimizer.minimize(self.cost) - - init = tf.global_variables_initializer() - self.sess = tf.Session() - self.sess.run(init) - - - def _initialize_weights(self): - all_weights = dict() - initializer = tf.contrib.layers.xavier_initializer() - # Encoding network weights - encoder_weights = [] - for layer in range(len(self.n_layers)-1): - w = tf.Variable( - initializer((self.n_layers[layer], self.n_layers[layer + 1]), - dtype=tf.float32)) - b = tf.Variable( - tf.zeros([self.n_layers[layer + 1]], dtype=tf.float32)) - encoder_weights.append({'w': w, 'b': b}) - # Recon network weights - recon_weights = [] - for layer in range(len(self.n_layers)-1, 0, -1): - w = tf.Variable( - initializer((self.n_layers[layer], self.n_layers[layer - 1]), - dtype=tf.float32)) - b = tf.Variable( - tf.zeros([self.n_layers[layer - 1]], dtype=tf.float32)) - recon_weights.append({'w': w, 'b': b}) - all_weights['encode'] = encoder_weights - all_weights['recon'] = recon_weights - return all_weights - - def partial_fit(self, X): - cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict={self.x: X}) - return cost - - def calc_total_cost(self, X): - return self.sess.run(self.cost, feed_dict={self.x: X}) - - def transform(self, X): - return self.sess.run(self.hidden_encode[-1], feed_dict={self.x: X}) - - def generate(self, hidden=None): - if hidden is None: - hidden = np.random.normal(size=self.weights['encode'][-1]['b']) - return self.sess.run(self.reconstruction, feed_dict={self.hidden_encode[-1]: hidden}) - - def reconstruct(self, X): - return self.sess.run(self.reconstruction, feed_dict={self.x: X}) - - def getWeights(self): - raise NotImplementedError - return self.sess.run(self.weights) - - def getBiases(self): - raise NotImplementedError - return self.sess.run(self.weights) - diff --git a/research/autoencoder/autoencoder_models/DenoisingAutoencoder.py b/research/autoencoder/autoencoder_models/DenoisingAutoencoder.py deleted file mode 100644 index 22b5dcb44a4079b80bfcfc16e3dcda5b21ca8c1b..0000000000000000000000000000000000000000 --- a/research/autoencoder/autoencoder_models/DenoisingAutoencoder.py +++ /dev/null @@ -1,129 +0,0 @@ -import tensorflow as tf - -class AdditiveGaussianNoiseAutoencoder(object): - def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(), - scale = 0.1): - self.n_input = n_input - self.n_hidden = n_hidden - self.transfer = transfer_function - self.scale = tf.placeholder(tf.float32) - self.training_scale = scale - network_weights = self._initialize_weights() - self.weights = network_weights - - # model - self.x = tf.placeholder(tf.float32, [None, self.n_input]) - self.hidden = self.transfer(tf.add(tf.matmul(self.x + scale * tf.random_normal((n_input,)), - self.weights['w1']), - self.weights['b1'])) - self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2']) - - # cost - self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0)) - self.optimizer = optimizer.minimize(self.cost) - - init = tf.global_variables_initializer() - self.sess = tf.Session() - self.sess.run(init) - - def _initialize_weights(self): - all_weights = dict() - all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden], - initializer=tf.contrib.layers.xavier_initializer()) - all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype = tf.float32)) - all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype = tf.float32)) - all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype = tf.float32)) - return all_weights - - def partial_fit(self, X): - cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict = {self.x: X, - self.scale: self.training_scale - }) - return cost - - def calc_total_cost(self, X): - return self.sess.run(self.cost, feed_dict = {self.x: X, - self.scale: self.training_scale - }) - - def transform(self, X): - return self.sess.run(self.hidden, feed_dict = {self.x: X, - self.scale: self.training_scale - }) - - def generate(self, hidden=None): - if hidden is None: - hidden = self.sess.run(tf.random_normal([1, self.n_hidden])) - return self.sess.run(self.reconstruction, feed_dict = {self.hidden: hidden}) - - def reconstruct(self, X): - return self.sess.run(self.reconstruction, feed_dict = {self.x: X, - self.scale: self.training_scale - }) - - def getWeights(self): - return self.sess.run(self.weights['w1']) - - def getBiases(self): - return self.sess.run(self.weights['b1']) - - -class MaskingNoiseAutoencoder(object): - def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(), - dropout_probability = 0.95): - self.n_input = n_input - self.n_hidden = n_hidden - self.transfer = transfer_function - self.dropout_probability = dropout_probability - self.keep_prob = tf.placeholder(tf.float32) - - network_weights = self._initialize_weights() - self.weights = network_weights - - # model - self.x = tf.placeholder(tf.float32, [None, self.n_input]) - self.hidden = self.transfer(tf.add(tf.matmul(tf.nn.dropout(self.x, self.keep_prob), self.weights['w1']), - self.weights['b1'])) - self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2']) - - # cost - self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0)) - self.optimizer = optimizer.minimize(self.cost) - - init = tf.global_variables_initializer() - self.sess = tf.Session() - self.sess.run(init) - - def _initialize_weights(self): - all_weights = dict() - all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden], - initializer=tf.contrib.layers.xavier_initializer()) - all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype = tf.float32)) - all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype = tf.float32)) - all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype = tf.float32)) - return all_weights - - def partial_fit(self, X): - cost, opt = self.sess.run((self.cost, self.optimizer), - feed_dict = {self.x: X, self.keep_prob: self.dropout_probability}) - return cost - - def calc_total_cost(self, X): - return self.sess.run(self.cost, feed_dict = {self.x: X, self.keep_prob: 1.0}) - - def transform(self, X): - return self.sess.run(self.hidden, feed_dict = {self.x: X, self.keep_prob: 1.0}) - - def generate(self, hidden=None): - if hidden is None: - hidden = self.sess.run(tf.random_normal([1, self.n_hidden])) - return self.sess.run(self.reconstruction, feed_dict = {self.hidden: hidden}) - - def reconstruct(self, X): - return self.sess.run(self.reconstruction, feed_dict = {self.x: X, self.keep_prob: 1.0}) - - def getWeights(self): - return self.sess.run(self.weights['w1']) - - def getBiases(self): - return self.sess.run(self.weights['b1']) diff --git a/research/autoencoder/autoencoder_models/VariationalAutoencoder.py b/research/autoencoder/autoencoder_models/VariationalAutoencoder.py deleted file mode 100644 index 3c2556ab89c2d32be0af5e61099aa12f91c1f176..0000000000000000000000000000000000000000 --- a/research/autoencoder/autoencoder_models/VariationalAutoencoder.py +++ /dev/null @@ -1,70 +0,0 @@ -import tensorflow as tf - -class VariationalAutoencoder(object): - - def __init__(self, n_input, n_hidden, optimizer = tf.train.AdamOptimizer()): - self.n_input = n_input - self.n_hidden = n_hidden - - network_weights = self._initialize_weights() - self.weights = network_weights - - # model - self.x = tf.placeholder(tf.float32, [None, self.n_input]) - self.z_mean = tf.add(tf.matmul(self.x, self.weights['w1']), self.weights['b1']) - self.z_log_sigma_sq = tf.add(tf.matmul(self.x, self.weights['log_sigma_w1']), self.weights['log_sigma_b1']) - - # sample from gaussian distribution - eps = tf.random_normal(tf.stack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32) - self.z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps)) - - self.reconstruction = tf.add(tf.matmul(self.z, self.weights['w2']), self.weights['b2']) - - # cost - reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0), 1) - latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq - - tf.square(self.z_mean) - - tf.exp(self.z_log_sigma_sq), 1) - self.cost = tf.reduce_mean(reconstr_loss + latent_loss) - self.optimizer = optimizer.minimize(self.cost) - - init = tf.global_variables_initializer() - self.sess = tf.Session() - self.sess.run(init) - - def _initialize_weights(self): - all_weights = dict() - all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden], - initializer=tf.contrib.layers.xavier_initializer()) - all_weights['log_sigma_w1'] = tf.get_variable("log_sigma_w1", shape=[self.n_input, self.n_hidden], - initializer=tf.contrib.layers.xavier_initializer()) - all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32)) - all_weights['log_sigma_b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32)) - all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32)) - all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype=tf.float32)) - return all_weights - - def partial_fit(self, X): - cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict={self.x: X}) - return cost - - def calc_total_cost(self, X): - return self.sess.run(self.cost, feed_dict = {self.x: X}) - - def transform(self, X): - return self.sess.run(self.z_mean, feed_dict={self.x: X}) - - def generate(self, hidden = None): - if hidden is None: - hidden = self.sess.run(tf.random_normal([1, self.n_hidden])) - return self.sess.run(self.reconstruction, feed_dict={self.z: hidden}) - - def reconstruct(self, X): - return self.sess.run(self.reconstruction, feed_dict={self.x: X}) - - def getWeights(self): - return self.sess.run(self.weights['w1']) - - def getBiases(self): - return self.sess.run(self.weights['b1']) - diff --git a/research/autoencoder/autoencoder_models/__init__.py b/research/autoencoder/autoencoder_models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/brain_coder/README.md b/research/brain_coder/README.md deleted file mode 100644 index 3e2a1656d8f145569266c19c64b41779ccbf308c..0000000000000000000000000000000000000000 --- a/research/brain_coder/README.md +++ /dev/null @@ -1,34 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Brain Coder - -*Authors: Daniel Abolafia, Mohammad Norouzi, Quoc Le* - -Brain coder is a code synthesis experimental environment. We provide code that reproduces the results from our recent paper [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526). See single_task/README.md for details on how to build and reproduce those experiments. - -## Installation - -First install dependencies seperately: - -* [bazel](https://docs.bazel.build/versions/master/install.html) -* [TensorFlow](https://www.tensorflow.org/install/) -* [scipy](https://www.scipy.org/install.html) -* [absl-py](https://github.com/abseil/abseil-py) - -Note: even if you already have these dependencies installed, make sure they are -up-to-date to avoid unnecessary debugging. - - -## Building - -Use bazel from the top-level repo directory. - -For example: - -```bash -bazel build single_task:run -``` - -View README.md files in subdirectories for more details. diff --git a/research/brain_coder/WORKSPACE b/research/brain_coder/WORKSPACE deleted file mode 100644 index 7c07b5325e71a1684fb38089adeaaa9f4f00a775..0000000000000000000000000000000000000000 --- a/research/brain_coder/WORKSPACE +++ /dev/null @@ -1,5 +0,0 @@ -git_repository( - name = "subpar", - remote = "https://github.com/google/subpar", - tag = "1.0.0", -) diff --git a/research/brain_coder/common/BUILD b/research/brain_coder/common/BUILD deleted file mode 100644 index b5f79c25096ca574d4a133f871343eedd985b25e..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/BUILD +++ /dev/null @@ -1,106 +0,0 @@ -licenses(["notice"]) - -package(default_visibility = [ - "//:__subpackages__", -]) - -py_library( - name = "bf", - srcs = ["bf.py"], -) - -py_test( - name = "bf_test", - srcs = ["bf_test.py"], - deps = [ - ":bf", - # tensorflow dep - ], -) - -py_library( - name = "config_lib", - srcs = ["config_lib.py"], -) - -py_test( - name = "config_lib_test", - srcs = ["config_lib_test.py"], - deps = [ - ":config_lib", - # tensorflow dep - ], -) - -py_library( - name = "reward", - srcs = ["reward.py"], -) - -py_test( - name = "reward_test", - srcs = ["reward_test.py"], - deps = [ - ":reward", - # numpy dep - # tensorflow dep - ], -) - -py_library( - name = "rollout", - srcs = ["rollout.py"], - deps = [ - ":utils", - # numpy dep - # scipy dep - ], -) - -py_test( - name = "rollout_test", - srcs = ["rollout_test.py"], - deps = [ - ":rollout", - # numpy dep - # tensorflow dep - ], -) - -py_library( - name = "schedules", - srcs = ["schedules.py"], - deps = [":config_lib"], -) - -py_test( - name = "schedules_test", - srcs = ["schedules_test.py"], - deps = [ - ":config_lib", - ":schedules", - # numpy dep - # tensorflow dep - ], -) - -py_library( - name = "utils", - srcs = ["utils.py"], - deps = [ - # file dep - # absl dep /logging - # numpy dep - # tensorflow dep - ], -) - -py_test( - name = "utils_test", - srcs = ["utils_test.py"], - deps = [ - ":utils", - # numpy dep - # tensorflow dep - ], -) diff --git a/research/brain_coder/common/bf.py b/research/brain_coder/common/bf.py deleted file mode 100644 index f049c45258f7b78a25b5492108b2f8b37c8a55cd..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/bf.py +++ /dev/null @@ -1,234 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""BrainF**k interpreter. - -Language info: https://en.wikipedia.org/wiki/Brainfuck - -Based on public implementation: -https://github.com/pocmo/Python-Brainfuck/blob/master/brainfuck.py -""" - -from collections import namedtuple -import time - - -EvalResult = namedtuple( - 'EvalResult', ['output', 'success', 'failure_reason', 'steps', 'time', - 'memory', 'program_trace']) - - -ExecutionSnapshot = namedtuple( - 'ExecutionSnapshot', - ['codeptr', 'codechar', 'memptr', 'memval', 'memory', 'next_input', - 'output_buffer']) - - -class Status(object): - SUCCESS = 'success' - TIMEOUT = 'timeout' - STEP_LIMIT = 'step-limit' - SYNTAX_ERROR = 'syntax-error' - - -CHARS = INT_TO_CHAR = ['>', '<', '+', '-', '[', ']', '.', ','] -CHAR_TO_INT = dict([(c, i) for i, c in enumerate(INT_TO_CHAR)]) - - -class LookAheadIterator(object): - """Same API as Python iterator, with additional peek method.""" - - def __init__(self, iterable): - self._it = iter(iterable) - self._current_element = None - self._done = False - self._preload_next() - - def _preload_next(self): - try: - self._current_element = self._it.next() - except StopIteration: - self._done = True - - def next(self): - if self._done: - raise StopIteration - element = self._current_element - self._preload_next() - return element - - def peek(self, default_value=None): - if self._done: - if default_value is None: - raise StopIteration - return default_value - return self._current_element - - -def buildbracemap(code): - """Build jump map. - - Args: - code: List or string or BF chars. - - Returns: - bracemap: dict mapping open and close brace positions in the code to their - destination jumps. Specifically, positions of matching open/close braces - if they exist. - correct_syntax: True if all braces match. False if there are unmatched - braces in the code. Even if there are unmatched braces, a bracemap will - be built, and unmatched braces will map to themselves. - """ - bracestack, bracemap = [], {} - - correct_syntax = True - for position, command in enumerate(code): - if command == '[': - bracestack.append(position) - if command == ']': - if not bracestack: # Unmatched closing brace. - bracemap[position] = position # Don't jump to any position. - correct_syntax = False - continue - start = bracestack.pop() - bracemap[start] = position - bracemap[position] = start - if bracestack: # Unmatched opening braces. - for pos in bracestack: - bracemap[pos] = pos # Don't jump to any position. - correct_syntax = False - return bracemap, correct_syntax - - -def evaluate(code, input_buffer=None, init_memory=None, base=256, timeout=1.0, - max_steps=None, require_correct_syntax=True, output_memory=False, - debug=False): - """Execute BF code. - - Args: - code: String or list of BF characters. Any character not in CHARS will be - ignored. - input_buffer: A list of ints which will be used as the program's input - stream. Each read op "," will read an int from this list. 0's will be - read once the end of the list is reached, or if no input buffer is - given. - init_memory: A list of ints. Memory for first k positions will be - initialized to this list (where k = len(init_memory)). Memory positions - are initialized to 0 by default. - base: Integer base for the memory. When a memory value is incremented to - `base` it will overflow to 0. When a memory value is decremented to -1 - it will underflow to `base` - 1. - timeout: Time limit for program execution in seconds. Set to None to - disable. - max_steps: Execution step limit. An execution step is the execution of one - operation (code character), even if that op has been executed before. - Execution exits when this many steps are reached. Set to None to - disable. Disabled by default. - require_correct_syntax: If True, unmatched braces will cause `evaluate` to - return without executing the code. The failure reason will be - `Status.SYNTAX_ERROR`. If False, unmatched braces are ignored - and execution will continue. - output_memory: If True, the state of the memory at the end of execution is - returned. - debug: If True, then a full program trace will be returned. - - Returns: - EvalResult namedtuple containing - output: List of ints which were written out by the program with the "." - operation. - success: Boolean. Whether execution completed successfully. - failure_reason: One of the attributes of `Status`. Gives extra info - about why execution was not successful. - steps: Number of execution steps the program ran for. - time: Amount of time in seconds the program ran for. - memory: If `output_memory` is True, a list of memory cells up to the last - one written to. otherwise, None. - """ - input_iter = ( - LookAheadIterator(input_buffer) if input_buffer is not None - else LookAheadIterator([])) - - # Null memory value. This is the value of an empty memory. Also the value - # returned by the read operation when the input buffer is empty, or the - # end of the buffer is reached. - null_value = 0 - - code = list(code) - bracemap, correct_syntax = buildbracemap(code) # will modify code list - if require_correct_syntax and not correct_syntax: - return EvalResult([], False, Status.SYNTAX_ERROR, 0, 0.0, - [] if output_memory else None, [] if debug else None) - - output_buffer = [] - - codeptr, cellptr = 0, 0 - - cells = list(init_memory) if init_memory else [0] - - program_trace = [] if debug else None - success = True - reason = Status.SUCCESS - start_time = time.time() - steps = 0 - while codeptr < len(code): - command = code[codeptr] - - if debug: - # Add step to program trace. - program_trace.append(ExecutionSnapshot( - codeptr=codeptr, codechar=command, memptr=cellptr, - memval=cells[cellptr], memory=list(cells), - next_input=input_iter.peek(null_value), - output_buffer=list(output_buffer))) - - if command == '>': - cellptr += 1 - if cellptr == len(cells): cells.append(null_value) - - if command == '<': - cellptr = 0 if cellptr <= 0 else cellptr - 1 - - if command == '+': - cells[cellptr] = cells[cellptr] + 1 if cells[cellptr] < (base - 1) else 0 - - if command == '-': - cells[cellptr] = cells[cellptr] - 1 if cells[cellptr] > 0 else (base - 1) - - if command == '[' and cells[cellptr] == 0: codeptr = bracemap[codeptr] - if command == ']' and cells[cellptr] != 0: codeptr = bracemap[codeptr] - - if command == '.': output_buffer.append(cells[cellptr]) - if command == ',': cells[cellptr] = next(input_iter, null_value) - - codeptr += 1 - steps += 1 - - if timeout is not None and time.time() - start_time > timeout: - success = False - reason = Status.TIMEOUT - break - if max_steps is not None and steps >= max_steps: - success = False - reason = Status.STEP_LIMIT - break - - if debug: - # Add step to program trace. - command = code[codeptr] if codeptr < len(code) else '' - program_trace.append(ExecutionSnapshot( - codeptr=codeptr, codechar=command, memptr=cellptr, - memval=cells[cellptr], memory=list(cells), - next_input=input_iter.peek(null_value), - output_buffer=list(output_buffer))) - - return EvalResult( - output=output_buffer, - success=success, - failure_reason=reason, - steps=steps, - time=time.time() - start_time, - memory=cells if output_memory else None, - program_trace=program_trace) - - diff --git a/research/brain_coder/common/bf_test.py b/research/brain_coder/common/bf_test.py deleted file mode 100644 index 2cbf505601a96ec1fc819f1d01fe551f2fae4a5d..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/bf_test.py +++ /dev/null @@ -1,137 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for common.bf.""" - -import tensorflow as tf - -from common import bf # brain coder - - -class BfTest(tf.test.TestCase): - - def assertCorrectOutput(self, target_output, eval_result): - self.assertEqual(target_output, eval_result.output) - self.assertTrue(eval_result.success) - self.assertEqual(bf.Status.SUCCESS, eval_result.failure_reason) - - def testBasicOps(self): - self.assertCorrectOutput( - [3, 1, 2], - bf.evaluate('+++.--.+.')) - self.assertCorrectOutput( - [1, 1, 2], - bf.evaluate('+.<.>++.')) - self.assertCorrectOutput( - [0], - bf.evaluate('+,.')) - self.assertCorrectOutput( - [ord(char) for char in 'Hello World!\n'], - bf.evaluate( - '>++++++++[-<+++++++++>]<.>>+>-[+]++>++>+++[>[->+++<<+++>]<<]>-----' - '.>->+++..+++.>-.<<+[>[+>+]>>]<--------------.>>.+++.------.-------' - '-.>+.>+.')) - - def testBase(self): - self.assertCorrectOutput( - [1, 4], - bf.evaluate('+.--.', base=5, input_buffer=[])) - - def testInputBuffer(self): - self.assertCorrectOutput( - [2, 3, 4], - bf.evaluate('>,[>,]<[.<]', input_buffer=[4, 3, 2])) - - def testBadChars(self): - self.assertCorrectOutput( - [2, 3, 4], - bf.evaluate('>,[>,]hello----.[[[[[>+.', - input_buffer=[], - base=10, - require_correct_syntax=False)) - - eval_result = bf.evaluate( - '+++.]]]]>----.[[[[[>+.', - input_buffer=[], - base=10, - require_correct_syntax=True) - self.assertEqual([], eval_result.output) - self.assertFalse(eval_result.success) - self.assertEqual(bf.Status.SYNTAX_ERROR, - eval_result.failure_reason) - - def testTimeout(self): - er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=0.1) - self.assertEqual( - ([1], False, bf.Status.TIMEOUT), - (er.output, er.success, er.failure_reason)) - self.assertTrue(0.07 < er.time < 0.21) - - er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=0.1) - self.assertEqual( - ([1, 0], True, bf.Status.SUCCESS), - (er.output, er.success, er.failure_reason)) - self.assertTrue(er.time < 0.15) - - def testMaxSteps(self): - er = bf.evaluate('+.[].', base=5, input_buffer=[], timeout=None, - max_steps=100) - self.assertEqual( - ([1], False, bf.Status.STEP_LIMIT, 100), - (er.output, er.success, er.failure_reason, er.steps)) - - er = bf.evaluate('+.[-].', base=5, input_buffer=[], timeout=None, - max_steps=100) - self.assertEqual( - ([1, 0], True, bf.Status.SUCCESS), - (er.output, er.success, er.failure_reason)) - self.assertTrue(er.steps < 100) - - def testOutputMemory(self): - er = bf.evaluate('+>++>+++>++++.', base=256, input_buffer=[], - output_memory=True) - self.assertEqual( - ([4], True, bf.Status.SUCCESS), - (er.output, er.success, er.failure_reason)) - self.assertEqual([1, 2, 3, 4], er.memory) - - def testProgramTrace(self): - es = bf.ExecutionSnapshot - er = bf.evaluate(',[.>,].', base=256, input_buffer=[2, 1], debug=True) - self.assertEqual( - [es(codeptr=0, codechar=',', memptr=0, memval=0, memory=[0], - next_input=2, output_buffer=[]), - es(codeptr=1, codechar='[', memptr=0, memval=2, memory=[2], - next_input=1, output_buffer=[]), - es(codeptr=2, codechar='.', memptr=0, memval=2, memory=[2], - next_input=1, output_buffer=[]), - es(codeptr=3, codechar='>', memptr=0, memval=2, memory=[2], - next_input=1, output_buffer=[2]), - es(codeptr=4, codechar=',', memptr=1, memval=0, memory=[2, 0], - next_input=1, output_buffer=[2]), - es(codeptr=5, codechar=']', memptr=1, memval=1, memory=[2, 1], - next_input=0, output_buffer=[2]), - es(codeptr=2, codechar='.', memptr=1, memval=1, memory=[2, 1], - next_input=0, output_buffer=[2]), - es(codeptr=3, codechar='>', memptr=1, memval=1, memory=[2, 1], - next_input=0, output_buffer=[2, 1]), - es(codeptr=4, codechar=',', memptr=2, memval=0, memory=[2, 1, 0], - next_input=0, output_buffer=[2, 1]), - es(codeptr=5, codechar=']', memptr=2, memval=0, memory=[2, 1, 0], - next_input=0, output_buffer=[2, 1]), - es(codeptr=6, codechar='.', memptr=2, memval=0, memory=[2, 1, 0], - next_input=0, output_buffer=[2, 1]), - es(codeptr=7, codechar='', memptr=2, memval=0, memory=[2, 1, 0], - next_input=0, output_buffer=[2, 1, 0])], - er.program_trace) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/common/config_lib.py b/research/brain_coder/common/config_lib.py deleted file mode 100644 index 733fa202f2e500f964beff2111cb7445fa66a9e1..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/config_lib.py +++ /dev/null @@ -1,337 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Objects for storing configuration and passing config into binaries. - -Config class stores settings and hyperparameters for models, data, and anything -else that may be specific to a particular run. -""" - -import ast -import itertools -from six.moves import xrange - - -class Config(dict): - """Stores model configuration, hyperparameters, or dataset parameters.""" - - def __getattr__(self, attr): - return self[attr] - - def __setattr__(self, attr, value): - self[attr] = value - - def pretty_str(self, new_lines=True, indent=2, final_indent=0): - prefix = (' ' * indent) if new_lines else '' - final_prefix = (' ' * final_indent) if new_lines else '' - kv = ['%s%s=%s' % (prefix, k, - (repr(v) if not isinstance(v, Config) - else v.pretty_str(new_lines=new_lines, - indent=indent+2, - final_indent=indent))) - for k, v in self.items()] - if new_lines: - return 'Config(\n%s\n%s)' % (',\n'.join(kv), final_prefix) - else: - return 'Config(%s)' % ', '.join(kv) - - def _update_iterator(self, *args, **kwargs): - """Convert mixed input into an iterator over (key, value) tuples. - - Follows the dict.update call signature. - - Args: - *args: (Optional) Pass a dict or iterable of (key, value) 2-tuples as - an unnamed argument. Only one unnamed argument allowed. - **kwargs: (Optional) Pass (key, value) pairs as named arguments, where the - argument name is the key and the argument value is the value. - - Returns: - An iterator over (key, value) tuples given in the input. - - Raises: - TypeError: If more than one unnamed argument is given. - """ - if len(args) > 1: - raise TypeError('Expected at most 1 unnamed arguments, got %d' - % len(args)) - obj = args[0] if args else dict() - if isinstance(obj, dict): - return itertools.chain(obj.items(), kwargs.items()) - # Assume obj is an iterable of 2-tuples. - return itertools.chain(obj, kwargs.items()) - - def make_default(self, keys=None): - """Convert OneOf objects into their default configs. - - Recursively calls into Config objects. - - Args: - keys: Iterable of key names to check. If None, all keys in self will be - used. - """ - if keys is None: - keys = self.keys() - for k in keys: - # Replace OneOf with its default value. - if isinstance(self[k], OneOf): - self[k] = self[k].default() - # Recursively call into all Config objects, even those that came from - # OneOf objects in the previous code line (for nested OneOf objects). - if isinstance(self[k], Config): - self[k].make_default() - - def update(self, *args, **kwargs): - """Same as dict.update except nested Config objects are updated. - - Args: - *args: (Optional) Pass a dict or list of (key, value) 2-tuples as unnamed - argument. - **kwargs: (Optional) Pass (key, value) pairs as named arguments, where the - argument name is the key and the argument value is the value. - """ - key_set = set(self.keys()) - for k, v in self._update_iterator(*args, **kwargs): - if k in key_set: - key_set.remove(k) # This key is updated so exclude from make_default. - if k in self and isinstance(self[k], Config) and isinstance(v, dict): - self[k].update(v) - elif k in self and isinstance(self[k], OneOf) and isinstance(v, dict): - # Replace OneOf with the chosen config. - self[k] = self[k].update(v) - else: - self[k] = v - self.make_default(key_set) - - def strict_update(self, *args, **kwargs): - """Same as Config.update except keys and types are not allowed to change. - - If a given key is not already in this instance, an exception is raised. If a - given value does not have the same type as the existing value for the same - key, an exception is raised. Use this method to catch config mistakes. - - Args: - *args: (Optional) Pass a dict or list of (key, value) 2-tuples as unnamed - argument. - **kwargs: (Optional) Pass (key, value) pairs as named arguments, where the - argument name is the key and the argument value is the value. - - Raises: - TypeError: If more than one unnamed argument is given. - TypeError: If new value type does not match existing type. - KeyError: If a given key is not already defined in this instance. - """ - key_set = set(self.keys()) - for k, v in self._update_iterator(*args, **kwargs): - if k in self: - key_set.remove(k) # This key is updated so exclude from make_default. - if isinstance(self[k], Config): - if not isinstance(v, dict): - raise TypeError('dict required for Config value, got %s' % type(v)) - self[k].strict_update(v) - elif isinstance(self[k], OneOf): - if not isinstance(v, dict): - raise TypeError('dict required for OneOf value, got %s' % type(v)) - # Replace OneOf with the chosen config. - self[k] = self[k].strict_update(v) - else: - if not isinstance(v, type(self[k])): - raise TypeError('Expecting type %s for key %s, got type %s' - % (type(self[k]), k, type(v))) - self[k] = v - else: - raise KeyError( - 'Key %s does not exist. New key creation not allowed in ' - 'strict_update.' % k) - self.make_default(key_set) - - @staticmethod - def from_str(config_str): - """Inverse of Config.__str__.""" - parsed = ast.literal_eval(config_str) - assert isinstance(parsed, dict) - - def _make_config(dictionary): - for k, v in dictionary.items(): - if isinstance(v, dict): - dictionary[k] = _make_config(v) - return Config(**dictionary) - return _make_config(parsed) - - @staticmethod - def parse(key_val_string): - """Parse hyperparameter string into Config object. - - Format is 'key=val,key=val,...' - Values can be any python literal, or another Config object encoded as - 'c(key=val,key=val,...)'. - c(...) expressions can be arbitrarily nested. - - Example: - 'a=1,b=3e-5,c=[1,2,3],d="hello world",e={"a":1,"b":2},f=c(x=1,y=[10,20])' - - Args: - key_val_string: The hyperparameter string. - - Returns: - Config object parsed from the input string. - """ - if not key_val_string.strip(): - return Config() - def _pair_to_kv(pair): - split_index = pair.find('=') - key, val = pair[:split_index].strip(), pair[split_index+1:].strip() - if val.startswith('c(') and val.endswith(')'): - val = Config.parse(val[2:-1]) - else: - val = ast.literal_eval(val) - return key, val - return Config(**dict([_pair_to_kv(pair) - for pair in _comma_iterator(key_val_string)])) - - -class OneOf(object): - """Stores branching config. - - In some cases there may be options which each have their own set of config - params. For example, if specifying config for an environment, each environment - can have custom config options. OneOf is a way to organize branching config. - - Usage example: - one_of = OneOf( - [Config(a=1, b=2), - Config(a=2, c='hello'), - Config(a=3, d=10, e=-10)], - a=1) - config = one_of.strict_update(Config(a=3, d=20)) - config == {'a': 3, 'd': 20, 'e': -10} - """ - - def __init__(self, choices, **kwargs): - """Constructor. - - Usage: OneOf([Config(...), Config(...), ...], attribute=default_value) - - Args: - choices: An iterable of Config objects. When update/strict_update is - called on this OneOf, one of these Config will be selected. - **kwargs: Give exactly one config attribute to branch on. The value of - this attribute during update/strict_update will determine which - Config is used. - - Raises: - ValueError: If kwargs does not contain exactly one entry. Should give one - named argument which is used as the attribute to condition on. - """ - if len(kwargs) != 1: - raise ValueError( - 'Incorrect usage. Must give exactly one named argument. The argument ' - 'name is the config attribute to condition on, and the argument ' - 'value is the default choice. Got %d named arguments.' % len(kwargs)) - key, default_value = kwargs.items()[0] - self.key = key - self.default_value = default_value - - # Make sure each choice is a Config object. - for config in choices: - if not isinstance(config, Config): - raise TypeError('choices must be a list of Config objects. Got %s.' - % type(config)) - - # Map value for key to the config with that value. - self.value_map = {config[key]: config for config in choices} - self.default_config = self.value_map[self.default_value] - - # Make sure there are no duplicate values. - if len(self.value_map) != len(choices): - raise ValueError('Multiple choices given for the same value of %s.' % key) - - # Check that the default value is valid. - if self.default_value not in self.value_map: - raise ValueError( - 'Default value is not an available choice. Got %s=%s. Choices are %s.' - % (key, self.default_value, self.value_map.keys())) - - def default(self): - return self.default_config - - def update(self, other): - """Choose a config and update it. - - If `other` is a Config, one of the config choices is selected and updated. - Otherwise `other` is returned. - - Args: - other: Will update chosen config with this value by calling `update` on - the config. - - Returns: - The chosen config after updating it, or `other` if no config could be - selected. - """ - if not isinstance(other, Config): - return other - if self.key not in other or other[self.key] not in self.value_map: - return other - target = self.value_map[other[self.key]] - target.update(other) - return target - - def strict_update(self, config): - """Choose a config and update it. - - `config` must be a Config object. `config` must have the key used to select - among the config choices, and that key must have a value which one of the - config choices has. - - Args: - config: A Config object. the chosen config will be update by calling - `strict_update`. - - Returns: - The chosen config after updating it. - - Raises: - TypeError: If `config` is not a Config instance. - ValueError: If `config` does not have the branching key in its key set. - ValueError: If the value of the config's branching key is not one of the - valid choices. - """ - if not isinstance(config, Config): - raise TypeError('Expecting Config instance, got %s.' % type(config)) - if self.key not in config: - raise ValueError( - 'Branching key %s required but not found in %s' % (self.key, config)) - if config[self.key] not in self.value_map: - raise ValueError( - 'Value %s for key %s is not a possible choice. Choices are %s.' - % (config[self.key], self.key, self.value_map.keys())) - target = self.value_map[config[self.key]] - target.strict_update(config) - return target - - -def _next_comma(string, start_index): - """Finds the position of the next comma not used in a literal collection.""" - paren_count = 0 - for i in xrange(start_index, len(string)): - c = string[i] - if c == '(' or c == '[' or c == '{': - paren_count += 1 - elif c == ')' or c == ']' or c == '}': - paren_count -= 1 - if paren_count == 0 and c == ',': - return i - return -1 - - -def _comma_iterator(string): - index = 0 - while 1: - next_index = _next_comma(string, index) - if next_index == -1: - yield string[index:] - return - yield string[index:next_index] - index = next_index + 1 diff --git a/research/brain_coder/common/config_lib_test.py b/research/brain_coder/common/config_lib_test.py deleted file mode 100644 index cdc96f92d2428f06e780930979662fdfda92e3f5..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/config_lib_test.py +++ /dev/null @@ -1,425 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for common.config_lib.""" - -import tensorflow as tf - -from common import config_lib # brain coder - - -class ConfigLibTest(tf.test.TestCase): - - def testConfig(self): - config = config_lib.Config(hello='world', foo='bar', num=123, f=56.7) - self.assertEqual('world', config.hello) - self.assertEqual('bar', config['foo']) - config.hello = 'everyone' - config['bar'] = 9000 - self.assertEqual('everyone', config['hello']) - self.assertEqual(9000, config.bar) - self.assertEqual(5, len(config)) - - def testConfigUpdate(self): - config = config_lib.Config(a=1, b=2, c=3) - config.update({'b': 10, 'd': 4}) - self.assertEqual({'a': 1, 'b': 10, 'c': 3, 'd': 4}, config) - - config = config_lib.Config(a=1, b=2, c=3) - config.update(b=10, d=4) - self.assertEqual({'a': 1, 'b': 10, 'c': 3, 'd': 4}, config) - - config = config_lib.Config(a=1, b=2, c=3) - config.update({'e': 5}, b=10, d=4) - self.assertEqual({'a': 1, 'b': 10, 'c': 3, 'd': 4, 'e': 5}, config) - - config = config_lib.Config( - a=1, - b=2, - x=config_lib.Config( - l='a', - y=config_lib.Config(m=1, n=2), - z=config_lib.Config( - q=config_lib.Config(a=10, b=20), - r=config_lib.Config(s=1, t=2)))) - config.update(x={'y': {'m': 10}, 'z': {'r': {'s': 5}}}) - self.assertEqual( - config_lib.Config( - a=1, b=2, - x=config_lib.Config( - l='a', - y=config_lib.Config(m=10, n=2), - z=config_lib.Config( - q=config_lib.Config(a=10, b=20), - r=config_lib.Config(s=5, t=2)))), - config) - - config = config_lib.Config( - foo='bar', - num=100, - x=config_lib.Config(a=1, b=2, c=config_lib.Config(h=10, i=20, j=30)), - y=config_lib.Config(qrs=5, tuv=10), - d={'a': 1, 'b': 2}, - l=[1, 2, 3]) - config.update( - config_lib.Config( - foo='hat', - num=50.5, - x={'a': 5, 'z': -10}, - y=config_lib.Config(wxyz=-1)), - d={'a': 10, 'c': 20}, - l=[3, 4, 5, 6]) - self.assertEqual( - config_lib.Config( - foo='hat', - num=50.5, - x=config_lib.Config(a=5, b=2, z=-10, - c=config_lib.Config(h=10, i=20, j=30)), - y=config_lib.Config(qrs=5, tuv=10, wxyz=-1), - d={'a': 10, 'c': 20}, - l=[3, 4, 5, 6]), - config) - self.assertTrue(isinstance(config.x, config_lib.Config)) - self.assertTrue(isinstance(config.x.c, config_lib.Config)) - self.assertTrue(isinstance(config.y, config_lib.Config)) - - config = config_lib.Config( - foo='bar', - num=100, - x=config_lib.Config(a=1, b=2, c=config_lib.Config(h=10, i=20, j=30)), - y=config_lib.Config(qrs=5, tuv=10), - d={'a': 1, 'b': 2}, - l=[1, 2, 3]) - config.update( - config_lib.Config( - foo=1234, - num='hello', - x={'a': 5, 'z': -10, 'c': {'h': -5, 'k': 40}}, - y=[1, 2, 3, 4], - d='stuff', - l={'a': 1, 'b': 2})) - self.assertEqual( - config_lib.Config( - foo=1234, - num='hello', - x=config_lib.Config(a=5, b=2, z=-10, - c=config_lib.Config(h=-5, i=20, j=30, k=40)), - y=[1, 2, 3, 4], - d='stuff', - l={'a': 1, 'b': 2}), - config) - self.assertTrue(isinstance(config.x, config_lib.Config)) - self.assertTrue(isinstance(config.x.c, config_lib.Config)) - self.assertTrue(isinstance(config.y, list)) - - def testConfigStrictUpdate(self): - config = config_lib.Config(a=1, b=2, c=3) - config.strict_update({'b': 10, 'c': 20}) - self.assertEqual({'a': 1, 'b': 10, 'c': 20}, config) - - config = config_lib.Config(a=1, b=2, c=3) - config.strict_update(b=10, c=20) - self.assertEqual({'a': 1, 'b': 10, 'c': 20}, config) - - config = config_lib.Config(a=1, b=2, c=3, d=4) - config.strict_update({'d': 100}, b=10, a=20) - self.assertEqual({'a': 20, 'b': 10, 'c': 3, 'd': 100}, config) - - config = config_lib.Config( - a=1, - b=2, - x=config_lib.Config( - l='a', - y=config_lib.Config(m=1, n=2), - z=config_lib.Config( - q=config_lib.Config(a=10, b=20), - r=config_lib.Config(s=1, t=2)))) - config.strict_update(x={'y': {'m': 10}, 'z': {'r': {'s': 5}}}) - self.assertEqual( - config_lib.Config( - a=1, b=2, - x=config_lib.Config( - l='a', - y=config_lib.Config(m=10, n=2), - z=config_lib.Config( - q=config_lib.Config(a=10, b=20), - r=config_lib.Config(s=5, t=2)))), - config) - - config = config_lib.Config( - foo='bar', - num=100, - x=config_lib.Config(a=1, b=2, c=config_lib.Config(h=10, i=20, j=30)), - y=config_lib.Config(qrs=5, tuv=10), - d={'a': 1, 'b': 2}, - l=[1, 2, 3]) - config.strict_update( - config_lib.Config( - foo='hat', - num=50, - x={'a': 5, 'c': {'h': 100}}, - y=config_lib.Config(tuv=-1)), - d={'a': 10, 'c': 20}, - l=[3, 4, 5, 6]) - self.assertEqual( - config_lib.Config( - foo='hat', - num=50, - x=config_lib.Config(a=5, b=2, - c=config_lib.Config(h=100, i=20, j=30)), - y=config_lib.Config(qrs=5, tuv=-1), - d={'a': 10, 'c': 20}, - l=[3, 4, 5, 6]), - config) - - def testConfigStrictUpdateFail(self): - config = config_lib.Config(a=1, b=2, c=3, x=config_lib.Config(a=1, b=2)) - with self.assertRaises(KeyError): - config.strict_update({'b': 10, 'c': 20, 'd': 50}) - with self.assertRaises(KeyError): - config.strict_update(b=10, d=50) - with self.assertRaises(KeyError): - config.strict_update(x={'c': 3}) - with self.assertRaises(TypeError): - config.strict_update(a='string') - with self.assertRaises(TypeError): - config.strict_update(x={'a': 'string'}) - with self.assertRaises(TypeError): - config.strict_update(x=[1, 2, 3]) - - def testConfigFromStr(self): - config = config_lib.Config.from_str("{'c': {'d': 5}, 'b': 2, 'a': 1}") - self.assertEqual( - {'c': {'d': 5}, 'b': 2, 'a': 1}, config) - self.assertTrue(isinstance(config, config_lib.Config)) - self.assertTrue(isinstance(config.c, config_lib.Config)) - - def testConfigParse(self): - config = config_lib.Config.parse( - 'hello="world",num=1234.5,lst=[10,20.5,True,"hi",("a","b","c")],' - 'dct={9:10,"stuff":"qwerty","subdict":{1:True,2:False}},' - 'subconfig=c(a=1,b=[1,2,[3,4]],c=c(f="f",g="g"))') - self.assertEqual( - {'hello': 'world', 'num': 1234.5, - 'lst': [10, 20.5, True, 'hi', ('a', 'b', 'c')], - 'dct': {9: 10, 'stuff': 'qwerty', 'subdict': {1: True, 2: False}}, - 'subconfig': {'a': 1, 'b': [1, 2, [3, 4]], 'c': {'f': 'f', 'g': 'g'}}}, - config) - self.assertTrue(isinstance(config, config_lib.Config)) - self.assertTrue(isinstance(config.subconfig, config_lib.Config)) - self.assertTrue(isinstance(config.subconfig.c, config_lib.Config)) - self.assertFalse(isinstance(config.dct, config_lib.Config)) - self.assertFalse(isinstance(config.dct['subdict'], config_lib.Config)) - self.assertTrue(isinstance(config.lst[4], tuple)) - - def testConfigParseErrors(self): - with self.assertRaises(SyntaxError): - config_lib.Config.parse('a=[1,2,b="hello"') - with self.assertRaises(SyntaxError): - config_lib.Config.parse('a=1,b=c(x="a",y="b"') - with self.assertRaises(SyntaxError): - config_lib.Config.parse('a=1,b=c(x="a")y="b"') - with self.assertRaises(SyntaxError): - config_lib.Config.parse('a=1,b=c(x="a"),y="b",') - - def testOneOf(self): - def make_config(): - return config_lib.Config( - data=config_lib.OneOf( - [config_lib.Config(task=1, a='hello'), - config_lib.Config(task=2, a='world', b='stuff'), - config_lib.Config(task=3, c=1234)], - task=2), - model=config_lib.Config(stuff=1)) - - config = make_config() - config.update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=1,a="hi")')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=1, a='hi'), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=2,a="hi")')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=2, a='hi', b='stuff'), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=3)')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=3, c=1234), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.update(config_lib.Config.parse( - 'model=c(stuff=2)')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=2, a='world', b='stuff'), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=4,d=9999)')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=4, d=9999), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.update(config_lib.Config.parse( - 'model=c(stuff=2),data=5')) - self.assertEqual( - config_lib.Config( - data=5, - model=config_lib.Config(stuff=2)), - config) - - def testOneOfStrict(self): - def make_config(): - return config_lib.Config( - data=config_lib.OneOf( - [config_lib.Config(task=1, a='hello'), - config_lib.Config(task=2, a='world', b='stuff'), - config_lib.Config(task=3, c=1234)], - task=2), - model=config_lib.Config(stuff=1)) - - config = make_config() - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=1,a="hi")')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=1, a='hi'), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=2,a="hi")')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=2, a='hi', b='stuff'), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=3)')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=3, c=1234), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2)')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config(task=2, a='world', b='stuff'), - model=config_lib.Config(stuff=2)), - config) - - def testNestedOneOf(self): - def make_config(): - return config_lib.Config( - data=config_lib.OneOf( - [config_lib.Config(task=1, a='hello'), - config_lib.Config( - task=2, - a=config_lib.OneOf( - [config_lib.Config(x=1, y=2), - config_lib.Config(x=-1, y=1000, z=4)], - x=1)), - config_lib.Config(task=3, c=1234)], - task=2), - model=config_lib.Config(stuff=1)) - - config = make_config() - config.update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=2,a=c(x=-1,z=8))')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config( - task=2, - a=config_lib.Config(x=-1, y=1000, z=8)), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=2,a=c(x=-1,z=8))')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config( - task=2, - a=config_lib.Config(x=-1, y=1000, z=8)), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.update(config_lib.Config.parse('model=c(stuff=2)')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config( - task=2, - a=config_lib.Config(x=1, y=2)), - model=config_lib.Config(stuff=2)), - config) - - config = make_config() - config.strict_update(config_lib.Config.parse('model=c(stuff=2)')) - self.assertEqual( - config_lib.Config( - data=config_lib.Config( - task=2, - a=config_lib.Config(x=1, y=2)), - model=config_lib.Config(stuff=2)), - config) - - def testOneOfStrictErrors(self): - def make_config(): - return config_lib.Config( - data=config_lib.OneOf( - [config_lib.Config(task=1, a='hello'), - config_lib.Config(task=2, a='world', b='stuff'), - config_lib.Config(task=3, c=1234)], - task=2), - model=config_lib.Config(stuff=1)) - - config = make_config() - with self.assertRaises(TypeError): - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=[1,2,3]')) - - config = make_config() - with self.assertRaises(KeyError): - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=3,c=5678,d=9999)')) - - config = make_config() - with self.assertRaises(ValueError): - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=c(task=4,d=9999)')) - - config = make_config() - with self.assertRaises(TypeError): - config.strict_update(config_lib.Config.parse( - 'model=c(stuff=2),data=5')) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/common/reward.py b/research/brain_coder/common/reward.py deleted file mode 100644 index 87e01c9c52e1ee22f2745dce12bc5e2726711ff7..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/reward.py +++ /dev/null @@ -1,390 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Reward functions, distance functions, and reward managers.""" - -from abc import ABCMeta -from abc import abstractmethod -from math import log - - -# All sequences here are assumed to be lists of ints bounded -# between 0 and `base`-1 (inclusive). - - -################################# -### Scalar Distance Functions ### -################################# - - -def abs_diff(a, b, base=0): - """Absolute value of difference between scalars. - - abs_diff is symmetric, i.e. `a` and `b` are interchangeable. - - Args: - a: First argument. An int. - b: Seconds argument. An int. - base: Dummy argument so that the argument signature matches other scalar - diff functions. abs_diff is the same in all bases. - - Returns: - abs(a - b). - """ - del base # Unused. - return abs(a - b) - - -def mod_abs_diff(a, b, base): - """Shortest distance between `a` and `b` in the modular integers base `base`. - - The smallest distance between a and b is returned. - Example: mod_abs_diff(1, 99, 100) ==> 2. It is not 98. - - mod_abs_diff is symmetric, i.e. `a` and `b` are interchangeable. - - Args: - a: First argument. An int. - b: Seconds argument. An int. - base: The modulo base. A positive int. - - Returns: - Shortest distance. - """ - diff = abs(a - b) - if diff >= base: - diff %= base - return min(diff, (-diff) + base) - - -############################### -### List Distance Functions ### -############################### - - -def absolute_distance(pred, target, base, scalar_diff_fn=abs_diff): - """Asymmetric list distance function. - - List distance is the sum of element-wise distances, like Hamming distance, but - where `pred` can be longer or shorter than `target`. For each position in both - `pred` and `target`, distance between those elements is computed with - `scalar_diff_fn`. For missing or extra elements in `pred`, the maximum - distance is assigned, which is equal to `base`. - - Distance is 0 when `pred` and `target` are identical, and will be a positive - integer when they are not. - - Args: - pred: Prediction list. Distance from this list is computed. - target: Target list. Distance to this list is computed. - base: The integer base to use. For example, a list of chars would use base - 256. - scalar_diff_fn: Element-wise distance function. - - Returns: - List distance between `pred` and `target`. - """ - d = 0 - for i, target_t in enumerate(target): - if i >= len(pred): - d += base # A missing slot is worth the max distance. - else: - # Add element-wise distance for this slot. - d += scalar_diff_fn(pred[i], target_t, base) - if len(pred) > len(target): - # Each extra slot is worth the max distance. - d += (len(pred) - len(target)) * base - return d - - -def log_absolute_distance(pred, target, base): - """Asymmetric list distance function that uses log distance. - - A list distance which computes sum of element-wise distances, similar to - `absolute_distance`. Unlike `absolute_distance`, this scales the resulting - distance to be a float. - - Element-wise distance are log-scale. Distance between two list changes - relatively less for elements that are far apart, but changes a lot (goes to 0 - faster) when values get close together. - - Args: - pred: List of ints. Computes distance from this list to the target. - target: List of ints. This is the "correct" list which the prediction list - is trying to match. - base: Integer base. - - Returns: - Float distance normalized so that when `pred` is at most as long as `target` - the distance is between 0.0 and 1.0. Distance grows unboundedly large - as `pred` grows past `target` in length. - """ - if not target: - length_normalizer = 1.0 - if not pred: - # Distance between [] and [] is 0.0 since they are equal. - return 0.0 - else: - length_normalizer = float(len(target)) - # max_dist is the maximum element-wise distance, before taking log and - # scaling. Since we use `mod_abs_diff`, it would be (base // 2), but we add - # 1 to it so that missing or extra positions get the maximum penalty. - max_dist = base // 2 + 1 - - # The log-distance will be scaled by a factor. - # Note: +1 is added to the numerator and denominator to avoid log(0). This - # only has a translational effect, i.e. log(dist + 1) / log(max_dist + 1). - factor = log(max_dist + 1) - - d = 0.0 # Total distance to be computed. - for i, target_t in enumerate(target): - if i >= len(pred): - # Assign the max element-wise distance for missing positions. This is 1.0 - # after scaling. - d += 1.0 - else: - # Add the log-dist divided by a scaling factor. - d += log(mod_abs_diff(pred[i], target_t, base) + 1) / factor - if len(pred) > len(target): - # Add the max element-wise distance for each extra position. - # Since max dist after scaling is 1, this is just the difference in list - # lengths. - d += (len(pred) - len(target)) - return d / length_normalizer # Normalize again by the target length. - - -######################## -### Reward Functions ### -######################## - -# Reward functions assign reward based on program output. -# Warning: only use these functions as the terminal rewards in episodes, i.e. -# for the "final" programs. - - -def absolute_distance_reward(pred, target, base, scalar_diff_fn=abs_diff): - """Reward function based on absolute_distance function. - - Maximum reward, 1.0, is given when the lists are equal. Reward is scaled - so that 0.0 reward is given when `pred` is the empty list (assuming `target` - is not empty). Reward can go negative when `pred` is longer than `target`. - - This is an asymmetric reward function, so which list is the prediction and - which is the target matters. - - Args: - pred: Prediction sequence. This should be the sequence outputted by the - generated code. List of ints n, where 0 <= n < base. - target: Target sequence. The correct sequence that the generated code needs - to output. List of ints n, where 0 <= n < base. - base: Base of the computation. - scalar_diff_fn: Element-wise distance function. - - Returns: - Reward computed based on `pred` and `target`. A float. - """ - unit_dist = float(base * len(target)) - if unit_dist == 0: - unit_dist = base - dist = absolute_distance(pred, target, base, scalar_diff_fn=scalar_diff_fn) - return (unit_dist - dist) / unit_dist - - -def absolute_mod_distance_reward(pred, target, base): - """Same as `absolute_distance_reward` but `mod_abs_diff` scalar diff is used. - - Args: - pred: Prediction sequence. This should be the sequence outputted by the - generated code. List of ints n, where 0 <= n < base. - target: Target sequence. The correct sequence that the generated code needs - to output. List of ints n, where 0 <= n < base. - base: Base of the computation. - - Returns: - Reward computed based on `pred` and `target`. A float. - """ - return absolute_distance_reward(pred, target, base, mod_abs_diff) - - -def absolute_log_distance_reward(pred, target, base): - """Compute reward using `log_absolute_distance`. - - Maximum reward, 1.0, is given when the lists are equal. Reward is scaled - so that 0.0 reward is given when `pred` is the empty list (assuming `target` - is not empty). Reward can go negative when `pred` is longer than `target`. - - This is an asymmetric reward function, so which list is the prediction and - which is the target matters. - - This reward function has the nice property that much more reward is given - for getting the correct value (at each position) than for there being any - value at all. For example, in base 100, lets say pred = [1] * 1000 - and target = [10] * 1000. A lot of reward would be given for being 80% - accurate (worst element-wise distance is 50, distances here are 9) using - `absolute_distance`. `log_absolute_distance` on the other hand will give - greater and greater reward increments the closer each predicted value gets to - the target. That makes the reward given for accuracy somewhat independant of - the base. - - Args: - pred: Prediction sequence. This should be the sequence outputted by the - generated code. List of ints n, where 0 <= n < base. - target: Target sequence. The correct sequence that the generated code needs - to output. List of ints n, where 0 <= n < base. - base: Base of the computation. - - Returns: - Reward computed based on `pred` and `target`. A float. - """ - return 1.0 - log_absolute_distance(pred, target, base) - - -####################### -### Reward Managers ### -####################### - -# Reward managers assign reward to many code attempts throughout an episode. - - -class RewardManager(object): - """Reward managers administer reward across an episode. - - Reward managers are used for "editor" environments. These are environments - where the agent has some way to edit its code over time, and run its code - many time in the same episode, so that it can make incremental improvements. - - Reward managers are instantiated with a target sequence, which is the known - correct program output. The manager is called on the output from a proposed - code, and returns reward. If many proposal outputs are tried, reward may be - some stateful function that takes previous tries into account. This is done, - in part, so that an agent cannot accumulate unbounded reward just by trying - junk programs as often as possible. So reward managers should not give the - same reward twice if the next proposal is not better than the last. - """ - __metaclass__ = ABCMeta - - def __init__(self, target, base, distance_fn=absolute_distance): - self._target = list(target) - self._base = base - self._distance_fn = distance_fn - - @abstractmethod - def __call__(self, sequence): - """Call this reward manager like a function to get reward. - - Calls to reward manager are stateful, and will take previous sequences - into account. Repeated calls with the same sequence may produce different - rewards. - - Args: - sequence: List of integers (each between 0 and base - 1). This is the - proposal sequence. Reward will be computed based on the distance - from this sequence to the target (distance function and target are - given in the constructor), as well as previous sequences tried during - the lifetime of this object. - - Returns: - Float value. The reward received from this call. - """ - return 0.0 - - -class DeltaRewardManager(RewardManager): - """Simple reward manager that assigns reward for the net change in distance. - - Given some (possibly asymmetric) list distance function, gives reward for - relative changes in prediction distance to the target. - - For example, if on the first call the distance is 3.0, the change in distance - is -3 (from starting distance of 0). That relative change will be scaled to - produce a negative reward for this step. On the next call, the distance is 2.0 - which is a +1 change, and that will be scaled to give a positive reward. - If the final call has distance 0 (the target is achieved), that is another - positive change of +2. The total reward across all 3 calls is then 0, which is - the highest posible episode total. - - Reward is scaled so that the maximum element-wise distance is worth 1.0. - Maximum total episode reward attainable is 0. - """ - - def __init__(self, target, base, distance_fn=absolute_distance): - super(DeltaRewardManager, self).__init__(target, base, distance_fn) - self._last_diff = 0 - - def _diff(self, seq): - return self._distance_fn(seq, self._target, self._base) - - def _delta_reward(self, seq): - # Reward is relative to previous sequence diff. - # Reward is scaled so that maximum token difference is worth 1.0. - # Reward = (last_diff - this_diff) / self.base. - # Reward is positive if this sequence is closer to the target than the - # previous sequence, and negative if this sequence is further away. - diff = self._diff(seq) - reward = (self._last_diff - diff) / float(self._base) - self._last_diff = diff - return reward - - def __call__(self, seq): - return self._delta_reward(seq) - - -class FloorRewardManager(RewardManager): - """Assigns positive reward for each step taken closer to the target. - - Given some (possibly asymmetric) list distance function, gives reward for - whenever a new episode minimum distance is reached. No reward is given if - the distance regresses to a higher value, so that the sum of rewards - for the episode is positive. - - Reward is scaled so that the maximum element-wise distance is worth 1.0. - Maximum total episode reward attainable is len(target). - - If the prediction sequence is longer than the target, a reward of -1 is given. - Subsequence predictions which are also longer get 0 reward. The -1 penalty - will be canceled out with a +1 reward when a prediction is given which is at - most the length of the target. - """ - - def __init__(self, target, base, distance_fn=absolute_distance): - super(FloorRewardManager, self).__init__(target, base, distance_fn) - self._last_diff = 0 - self._min_diff = self._max_diff() - self._too_long_penality_given = False - - def _max_diff(self): - return self._distance_fn([], self._target, self._base) - - def _diff(self, seq): - return self._distance_fn(seq, self._target, self._base) - - def _delta_reward(self, seq): - # Reward is only given if this sequence is closer to the target than any - # previous sequence. - # Reward is scaled so that maximum token difference is worth 1.0 - # Reward = (min_diff - this_diff) / self.base - # Reward is always positive. - diff = self._diff(seq) - if diff < self._min_diff: - reward = (self._min_diff - diff) / float(self._base) - self._min_diff = diff - else: - reward = 0.0 - return reward - - def __call__(self, seq): - if len(seq) > len(self._target): # Output is too long. - if not self._too_long_penality_given: - self._too_long_penality_given = True - reward = -1.0 - else: - reward = 0.0 # Don't give this penalty more than once. - return reward - - reward = self._delta_reward(seq) - if self._too_long_penality_given: - reward += 1.0 # Return the subtracted reward. - self._too_long_penality_given = False - return reward - diff --git a/research/brain_coder/common/reward_test.py b/research/brain_coder/common/reward_test.py deleted file mode 100644 index 38a1d4ace38cbc945362e52adb90cc9dd62f1be7..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/reward_test.py +++ /dev/null @@ -1,311 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for common.reward.""" - -from math import log -import numpy as np -import tensorflow as tf - -from common import reward # brain coder - - -class RewardTest(tf.test.TestCase): - - def testAbsDiff(self): - self.assertEqual(5, reward.abs_diff(15, 20)) - self.assertEqual(5, reward.abs_diff(20, 15)) - - def testModAbsDiff(self): - self.assertEqual(5, reward.mod_abs_diff(15, 20, 25)) - self.assertEqual(5, reward.mod_abs_diff(20, 15, 25)) - self.assertEqual(2, reward.mod_abs_diff(1, 24, 25)) - self.assertEqual(2, reward.mod_abs_diff(24, 1, 25)) - - self.assertEqual(0, reward.mod_abs_diff(0, 0, 5)) - self.assertEqual(1, reward.mod_abs_diff(0, 1, 5)) - self.assertEqual(2, reward.mod_abs_diff(0, 2, 5)) - self.assertEqual(2, reward.mod_abs_diff(0, 3, 5)) - self.assertEqual(1, reward.mod_abs_diff(0, 4, 5)) - - self.assertEqual(0, reward.mod_abs_diff(-1, 4, 5)) - self.assertEqual(1, reward.mod_abs_diff(-5, 4, 5)) - self.assertEqual(1, reward.mod_abs_diff(-7, 4, 5)) - self.assertEqual(1, reward.mod_abs_diff(13, 4, 5)) - self.assertEqual(1, reward.mod_abs_diff(15, 4, 5)) - - def testAbsoluteDistance_AbsDiffMethod(self): - self.assertEqual( - 4, - reward.absolute_distance([0], [4], 5, scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 0, - reward.absolute_distance([4], [4], 5, scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 0, - reward.absolute_distance([], [], 5, scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([1], [], 5, scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([], [1], 5, scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 0, - reward.absolute_distance([1, 2, 3], [1, 2, 3], 5, - scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 1, - reward.absolute_distance([1, 2, 4], [1, 2, 3], 5, - scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 1, - reward.absolute_distance([1, 2, 2], [1, 2, 3], 5, - scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([1, 2], [1, 2, 3], 5, - scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([1, 2, 3, 4], [1, 2, 3], 5, - scalar_diff_fn=reward.abs_diff)) - self.assertEqual( - 6, - reward.absolute_distance([4, 4, 4], [1, 2, 3], 5, - scalar_diff_fn=reward.abs_diff)) - - def testAbsoluteDistance_ModDiffMethod(self): - self.assertEqual( - 1, - reward.absolute_distance([0], [4], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 0, - reward.absolute_distance([4], [4], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 0, - reward.absolute_distance([], [], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([1], [], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([], [1], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 0, - reward.absolute_distance([1, 2, 3], [1, 2, 3], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 1, - reward.absolute_distance([1, 2, 4], [1, 2, 3], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 1, - reward.absolute_distance([1, 2, 2], [1, 2, 3], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([1, 2], [1, 2, 3], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([1, 2, 3, 4], [1, 2, 3], 5, - scalar_diff_fn=reward.mod_abs_diff)) - self.assertEqual( - 5, - reward.absolute_distance([4, 4, 4], [1, 2, 3], 5, - scalar_diff_fn=reward.mod_abs_diff)) - - def testLogAbsoluteDistance(self): - def log_diff(diff, base): - return log(diff + 1) / log(base // 2 + 2) - - self.assertEqual( - log_diff(1, 5), - reward.log_absolute_distance([0], [4], 5)) - self.assertEqual( - log_diff(2, 5), - reward.log_absolute_distance([1], [4], 5)) - self.assertEqual( - log_diff(2, 5), - reward.log_absolute_distance([2], [4], 5)) - self.assertEqual( - log_diff(1, 5), - reward.log_absolute_distance([3], [4], 5)) - self.assertEqual( - log_diff(3, 5), # max_dist = base // 2 + 1 = 3 - reward.log_absolute_distance([], [4], 5)) - self.assertEqual( - 0 + log_diff(3, 5), # max_dist = base // 2 + 1 = 3 - reward.log_absolute_distance([4, 4], [4], 5)) - self.assertEqual( - 0, - reward.log_absolute_distance([4], [4], 5)) - self.assertEqual( - 0, - reward.log_absolute_distance([], [], 5)) - self.assertEqual( - 1, - reward.log_absolute_distance([1], [], 5)) - self.assertEqual( - 1, - reward.log_absolute_distance([], [1], 5)) - - self.assertEqual( - 0, - reward.log_absolute_distance([1, 2, 3], [1, 2, 3], 5)) - self.assertEqual( - log_diff(1, 5) / 3, # divided by target length. - reward.log_absolute_distance([1, 2, 4], [1, 2, 3], 5)) - self.assertEqual( - log_diff(1, 5) / 3, - reward.log_absolute_distance([1, 2, 2], [1, 2, 3], 5)) - self.assertEqual( - log_diff(3, 5) / 3, # max_dist - reward.log_absolute_distance([1, 2], [1, 2, 3], 5)) - self.assertEqual( - log_diff(3, 5) / 3, # max_dist - reward.log_absolute_distance([1, 2, 3, 4], [1, 2, 3], 5)) - # Add log differences for each position. - self.assertEqual( - (log_diff(2, 5) + log_diff(2, 5) + log_diff(1, 5)) / 3, - reward.log_absolute_distance([4, 4, 4], [1, 2, 3], 5)) - - def testAbsoluteDistanceReward(self): - self.assertEqual( - 1, - reward.absolute_distance_reward([1, 2, 3], [1, 2, 3], 5)) - self.assertEqual( - 1 - 1 / (5 * 3.), # 1 - distance / (base * target_len) - reward.absolute_distance_reward([1, 2, 4], [1, 2, 3], 5)) - self.assertEqual( - 1 - 1 / (5 * 3.), - reward.absolute_distance_reward([1, 2, 2], [1, 2, 3], 5)) - self.assertTrue(np.isclose( - 1 - 5 / (5 * 3.), - reward.absolute_distance_reward([1, 2], [1, 2, 3], 5))) - self.assertTrue(np.isclose( - 1 - 5 / (5 * 3.), - reward.absolute_distance_reward([1, 2, 3, 4], [1, 2, 3], 5))) - # Add log differences for each position. - self.assertEqual( - 1 - (3 + 2 + 1) / (5 * 3.), - reward.absolute_distance_reward([4, 4, 4], [1, 2, 3], 5)) - self.assertEqual( - 1, - reward.absolute_distance_reward([], [], 5)) - - def testAbsoluteModDistanceReward(self): - self.assertEqual( - 1, - reward.absolute_mod_distance_reward([1, 2, 3], [1, 2, 3], 5)) - self.assertEqual( - 1 - 1 / (5 * 3.), # 1 - distance / (base * target_len) - reward.absolute_mod_distance_reward([1, 2, 4], [1, 2, 3], 5)) - self.assertEqual( - 1 - 1 / (5 * 3.), - reward.absolute_mod_distance_reward([1, 2, 2], [1, 2, 3], 5)) - self.assertTrue(np.isclose( - 1 - 5 / (5 * 3.), - reward.absolute_mod_distance_reward([1, 2], [1, 2, 3], 5))) - self.assertTrue(np.isclose( - 1 - 5 / (5 * 3.), - reward.absolute_mod_distance_reward([1, 2, 3, 4], [1, 2, 3], 5))) - # Add log differences for each position. - self.assertTrue(np.isclose( - 1 - (2 + 2 + 1) / (5 * 3.), - reward.absolute_mod_distance_reward([4, 4, 4], [1, 2, 3], 5))) - self.assertTrue(np.isclose( - 1 - (1 + 2 + 2) / (5 * 3.), - reward.absolute_mod_distance_reward([0, 1, 2], [4, 4, 4], 5))) - self.assertEqual( - 1, - reward.absolute_mod_distance_reward([], [], 5)) - - def testAbsoluteLogDistanceReward(self): - def log_diff(diff, base): - return log(diff + 1) / log(base // 2 + 2) - - self.assertEqual( - 1, - reward.absolute_log_distance_reward([1, 2, 3], [1, 2, 3], 5)) - self.assertEqual( - 1 - log_diff(1, 5) / 3, # divided by target length. - reward.absolute_log_distance_reward([1, 2, 4], [1, 2, 3], 5)) - self.assertEqual( - 1 - log_diff(1, 5) / 3, - reward.absolute_log_distance_reward([1, 2, 2], [1, 2, 3], 5)) - self.assertEqual( - 1 - log_diff(3, 5) / 3, # max_dist - reward.absolute_log_distance_reward([1, 2], [1, 2, 3], 5)) - self.assertEqual( - 1 - log_diff(3, 5) / 3, # max_dist - reward.absolute_log_distance_reward([1, 2, 3, 4], [1, 2, 3], 5)) - # Add log differences for each position. - self.assertEqual( - 1 - (log_diff(2, 5) + log_diff(2, 5) + log_diff(1, 5)) / 3, - reward.absolute_log_distance_reward([4, 4, 4], [1, 2, 3], 5)) - self.assertEqual( - 1 - (log_diff(1, 5) + log_diff(2, 5) + log_diff(2, 5)) / 3, - reward.absolute_log_distance_reward([0, 1, 2], [4, 4, 4], 5)) - self.assertEqual( - 1, - reward.absolute_log_distance_reward([], [], 5)) - - def testDeltaRewardManager(self): - reward_manager = reward.DeltaRewardManager( - [1, 2, 3, 4], base=5, distance_fn=reward.absolute_distance) - self.assertEqual(-3, reward_manager([1])) - self.assertEqual(0, reward_manager([1])) - self.assertEqual(4 / 5., reward_manager([1, 3])) - self.assertEqual(-4 / 5, reward_manager([1])) - self.assertEqual(3, reward_manager([1, 2, 3, 4])) - self.assertEqual(-1, reward_manager([1, 2, 3])) - self.assertEqual(0, reward_manager([1, 2, 3, 4, 3])) - self.assertEqual(-1, reward_manager([1, 2, 3, 4, 3, 2])) - self.assertEqual(2, reward_manager([1, 2, 3, 4])) - self.assertEqual(0, reward_manager([1, 2, 3, 4])) - self.assertEqual(0, reward_manager([1, 2, 3, 4])) - - def testFloorRewardMananger(self): - reward_manager = reward.FloorRewardManager( - [1, 2, 3, 4], base=5, distance_fn=reward.absolute_distance) - self.assertEqual(1, reward_manager([1])) - self.assertEqual(0, reward_manager([1])) - self.assertEqual(4 / 5., reward_manager([1, 3])) - self.assertEqual(0, reward_manager([1])) - self.assertEqual(1 / 5., reward_manager([1, 2])) - self.assertEqual(0, reward_manager([0, 1])) - self.assertEqual(0, reward_manager([])) - self.assertEqual(0, reward_manager([1, 2])) - self.assertEqual(2, reward_manager([1, 2, 3, 4])) - self.assertEqual(0, reward_manager([1, 2, 3])) - self.assertEqual(-1, reward_manager([1, 2, 3, 4, 3])) - self.assertEqual(0, reward_manager([1, 2, 3, 4, 3, 2])) - self.assertEqual(1, reward_manager([1, 2, 3, 4])) - self.assertEqual(0, reward_manager([1, 2, 3, 4])) - self.assertEqual(0, reward_manager([1, 2, 3, 4])) - - reward_manager = reward.FloorRewardManager( - [1, 2, 3, 4], base=5, distance_fn=reward.absolute_distance) - self.assertEqual(1, reward_manager([1])) - self.assertEqual(-1, reward_manager([1, 0, 0, 0, 0, 0])) - self.assertEqual(0, reward_manager([1, 2, 3, 4, 0, 0])) - self.assertEqual(0, reward_manager([1, 2, 3, 4, 0])) - self.assertEqual(1, reward_manager([])) - self.assertEqual(0, reward_manager([])) - self.assertEqual(0, reward_manager([1])) - self.assertEqual(1, reward_manager([1, 2])) - self.assertEqual(-1, reward_manager([1, 2, 3, 4, 0, 0])) - self.assertEqual(0, reward_manager([1, 1, 1, 1, 1])) - self.assertEqual(1 + 2, reward_manager([1, 2, 3, 4])) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/common/rollout.py b/research/brain_coder/common/rollout.py deleted file mode 100644 index e377aa662db640dfa907de83d32875cc096c4295..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/rollout.py +++ /dev/null @@ -1,306 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Utilities related to computing training batches from episode rollouts. - -Implementations here are based on code from Open AI: -https://github.com/openai/universe-starter-agent/blob/master/a3c.py. -""" - -from collections import namedtuple -import numpy as np -import scipy.signal - -from common import utils # brain coder - - -class Rollout(object): - """Holds a rollout for an episode. - - A rollout is a record of the states observed in some environment and actions - taken by the agent to arrive at those states. Other information includes - rewards received after each action, values estimated for each state, whether - the rollout concluded the episide, and total reward received. Everything - should be given in time order. - - At each time t, the agent sees state s_t, takes action a_t, and then receives - reward r_t. The agent may optionally estimate a state value V(s_t) for each - state. - - For an episode of length T: - states = [s_0, ..., s_(T-1)] - actions = [a_0, ..., a_(T-1)] - rewards = [r_0, ..., r_(T-1)] - values = [V(s_0), ..., V(s_(T-1))] - - Note that there is an extra state s_T observed after taking action a_(T-1), - but this is not included in the rollout. - - Rollouts have an `terminated` attribute which is True when the rollout is - "finalized", i.e. it holds a full episode. terminated will be False when - time steps are still being added to it. - """ - - def __init__(self): - self.states = [] - self.actions = [] - self.rewards = [] - self.values = [] - self.total_reward = 0.0 - self.terminated = False - - def add(self, state, action, reward, value=0.0, terminated=False): - """Add the next timestep to this rollout. - - Args: - state: The state observed at the start of this timestep. - action: The action taken after observing the given state. - reward: The reward received for taking the given action. - value: The value estimated for the given state. - terminated: Whether this timestep ends the episode. - - Raises: - ValueError: If this.terminated is already True, meaning that the episode - has already ended. - """ - if self.terminated: - raise ValueError( - 'Trying to add timestep to an already terminal rollout.') - self.states += [state] - self.actions += [action] - self.rewards += [reward] - self.values += [value] - self.terminated = terminated - self.total_reward += reward - - def add_many(self, states, actions, rewards, values=None, terminated=False): - """Add many timesteps to this rollout. - - Arguments are the same as `add`, but are lists of equal size. - - Args: - states: The states observed. - actions: The actions taken. - rewards: The rewards received. - values: The values estimated for the given states. - terminated: Whether this sequence ends the episode. - - Raises: - ValueError: If the lengths of all the input lists are not equal. - ValueError: If this.terminated is already True, meaning that the episode - has already ended. - """ - if len(states) != len(actions): - raise ValueError( - 'Number of states and actions must be the same. Got %d states and ' - '%d actions' % (len(states), len(actions))) - if len(states) != len(rewards): - raise ValueError( - 'Number of states and rewards must be the same. Got %d states and ' - '%d rewards' % (len(states), len(rewards))) - if values is not None and len(states) != len(values): - raise ValueError( - 'Number of states and values must be the same. Got %d states and ' - '%d values' % (len(states), len(values))) - if self.terminated: - raise ValueError( - 'Trying to add timesteps to an already terminal rollout.') - self.states += states - self.actions += actions - self.rewards += rewards - self.values += values if values is not None else [0.0] * len(states) - self.terminated = terminated - self.total_reward += sum(rewards) - - def extend(self, other): - """Append another rollout to this rollout.""" - assert not self.terminated - self.states.extend(other.states) - self.actions.extend(other.actions) - self.rewards.extend(other.rewards) - self.values.extend(other.values) - self.terminated = other.terminated - self.total_reward += other.total_reward - - -def discount(x, gamma): - """Returns discounted sums for each value in x, with discount factor gamma. - - This can be used to compute the return (discounted sum of rewards) at each - timestep given a sequence of rewards. See the definitions for return and - REINFORCE in section 3 of https://arxiv.org/pdf/1602.01783.pdf. - - Let g^k mean gamma ** k. - For list [x_0, ..., x_N], the following list of discounted sums is computed: - [x_0 + g^1 * x_1 + g^2 * x_2 + ... g^N * x_N, - x_1 + g^1 * x_2 + g^2 * x_3 + ... g^(N-1) * x_N, - x_2 + g^1 * x_3 + g^2 * x_4 + ... g^(N-2) * x_N, - ..., - x_(N-1) + g^1 * x_N, - x_N] - - Args: - x: List of numbers [x_0, ..., x_N]. - gamma: Float between 0 and 1 (inclusive). This is the discount factor. - - Returns: - List of discounted sums. - """ - return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1] - - -def discounted_advantage_and_rewards(rewards, values, gamma, lambda_=1.0): - """Compute advantages and returns (discounted sum of rewards). - - For an episode of length T, rewards = [r_0, ..., r_(T-1)]. - Each reward r_t is observed after taking action a_t at state s_t. A final - state s_T is observed but no reward is given at this state since no action - a_T is taken (otherwise there would be a new state s_(T+1)). - - `rewards` and `values` are for a single episode. Return R_t is the discounted - sum of future rewards starting at time t, where `gamma` is the discount - factor. - R_t = r_t + gamma * r_(t+1) + gamma**2 * r_(t+2) + ... - + gamma**(T-1-t) * r_(T-1) - - Advantage A(a_t, s_t) is approximated by computing A(a_t, s_t) = R_t - V(s_t) - where V(s_t) is an approximation of the value at that state, given in the - `values` list. Returns R_t are needed for all REINFORCE algorithms. Advantage - is used for the advantage actor critic variant of REINFORCE. - See algorithm S3 in https://arxiv.org/pdf/1602.01783.pdf. - - Additionally another parameter `lambda_` controls the bias-variance tradeoff. - See "Generalized Advantage Estimation": https://arxiv.org/abs/1506.02438. - lambda_ = 1 reduces to regular advantage. - 0 <= lambda_ < 1 trades off variance for bias, with lambda_ = 0 being the - most biased. - - Bootstrapping is also supported. If an episode does not end in a terminal - state (either because the episode was ended early, or the environment does not - have end states), the true return cannot be computed from the rewards alone. - However, it can be estimated by computing the value (an approximation of - return) of the last state s_T. Thus the `values` list will have an extra item: - values = [V(s_0), ..., V(s_(T-1)), V(s_T)]. - - Args: - rewards: List of observed rewards [r_0, ..., r_(T-1)]. - values: List of estimated values [V(s_0), ..., V(s_(T-1))] with an optional - extra V(s_T) item. - gamma: Discount factor. Number between 0 and 1. 1 means no discount. - If not 1, gamma is typically near 1, like 0.99. - lambda_: Bias-variance tradeoff factor. Between 0 and 1. - - Returns: - empirical_values: Returns at each timestep. - generalized_advantage: Avantages at each timestep. - - Raises: - ValueError: If shapes of `rewards` and `values` are not rank 1. - ValueError: If len(values) not in (len(rewards), len(rewards) + 1). - """ - rewards = np.asarray(rewards, dtype=np.float32) - values = np.asarray(values, dtype=np.float32) - if rewards.ndim != 1: - raise ValueError('Single episode only. rewards must be rank 1.') - if values.ndim != 1: - raise ValueError('Single episode only. values must be rank 1.') - if len(values) == len(rewards): - # No bootstrapping. - values = np.append(values, 0) - empirical_values = discount(rewards, gamma) - elif len(values) == len(rewards) + 1: - # With bootstrapping. - # Last value is for the terminal state (final state after last action was - # taken). - empirical_values = discount(np.append(rewards, values[-1]), gamma)[:-1] - else: - raise ValueError('values should contain the same number of items or one ' - 'more item than rewards') - delta = rewards + gamma * values[1:] - values[:-1] - generalized_advantage = discount(delta, gamma * lambda_) - - # empirical_values is the discounted sum of rewards into the future. - # generalized_advantage is the target for each policy update. - return empirical_values, generalized_advantage - - -"""Batch holds a minibatch of episodes. - -Let bi = batch_index, i.e. the index of each episode in the minibatch. -Let t = time. - -Attributes: - states: States for each timestep in each episode. Indexed by states[bi, t]. - actions: Actions for each timestep in each episode. Indexed by actions[bi, t]. - discounted_adv: Advantages (computed by discounted_advantage_and_rewards) - for each timestep in each episode. Indexed by discounted_adv[bi, t]. - discounted_r: Returns (discounted sum of rewards computed by - discounted_advantage_and_rewards) for each timestep in each episode. - Indexed by discounted_r[bi, t]. - total_rewards: Total reward for each episode, i.e. sum of rewards across all - timesteps (not discounted). Indexed by total_rewards[bi]. - episode_lengths: Number of timesteps in each episode. If an episode has - N actions, N rewards, and N states, then its length is N. Indexed by - episode_lengths[bi]. - batch_size: Number of episodes in this minibatch. An integer. - max_time: Maximum episode length in the batch. An integer. -""" # pylint: disable=pointless-string-statement -Batch = namedtuple( - 'Batch', - ['states', 'actions', 'discounted_adv', 'discounted_r', 'total_rewards', - 'episode_lengths', 'batch_size', 'max_time']) - - -def process_rollouts(rollouts, gamma, lambda_=1.0): - """Convert a batch of rollouts into tensors ready to be fed into a model. - - Lists from each episode are stacked into 2D tensors and padded with 0s up to - the maximum timestep in the batch. - - Args: - rollouts: A list of Rollout instances. - gamma: The discount factor. A number between 0 and 1 (inclusive). See gamma - argument in discounted_advantage_and_rewards. - lambda_: See lambda_ argument in discounted_advantage_and_rewards. - - Returns: - Batch instance. states, actions, discounted_adv, and discounted_r are - numpy arrays with shape (batch_size, max_episode_length). episode_lengths - is a list of ints. total_rewards is a list of floats (total reward in each - episode). batch_size and max_time are ints. - - Raises: - ValueError: If any of the rollouts are not terminal. - """ - for ro in rollouts: - if not ro.terminated: - raise ValueError('Can only process terminal rollouts.') - - episode_lengths = [len(ro.states) for ro in rollouts] - batch_size = len(rollouts) - max_time = max(episode_lengths) - - states = utils.stack_pad([ro.states for ro in rollouts], 0, max_time) - actions = utils.stack_pad([ro.actions for ro in rollouts], 0, max_time) - - discounted_rewards = [None] * batch_size - discounted_adv = [None] * batch_size - for i, ro in enumerate(rollouts): - disc_r, disc_adv = discounted_advantage_and_rewards( - ro.rewards, ro.values, gamma, lambda_) - discounted_rewards[i] = disc_r - discounted_adv[i] = disc_adv - discounted_rewards = utils.stack_pad(discounted_rewards, 0, max_time) - discounted_adv = utils.stack_pad(discounted_adv, 0, max_time) - - total_rewards = [sum(ro.rewards) for ro in rollouts] - - return Batch(states=states, - actions=actions, - discounted_adv=discounted_adv, - discounted_r=discounted_rewards, - total_rewards=total_rewards, - episode_lengths=episode_lengths, - batch_size=batch_size, - max_time=max_time) diff --git a/research/brain_coder/common/rollout_test.py b/research/brain_coder/common/rollout_test.py deleted file mode 100644 index 5be4cb0fafd8a2e94004c17b41e189d989a3a851..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/rollout_test.py +++ /dev/null @@ -1,129 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for common.rollout.""" - -import numpy as np -import tensorflow as tf - -from common import rollout as rollout_lib # brain coder - - -class RolloutTest(tf.test.TestCase): - - def MakeRollout(self, states, actions, rewards, values=None, terminated=True): - rollout = rollout_lib.Rollout() - rollout.add_many( - states=states, actions=actions, rewards=rewards, values=values, - terminated=terminated) - return rollout - - def testDiscount(self): - discounted = np.array([1.0 / 2 ** n for n in range(4, -1, -1)]) - discounted[:2] += [1.0 / 2 ** n for n in range(1, -1, -1)] - - self.assertTrue(np.array_equal( - rollout_lib.discount([0.0, 1.0, 0.0, 0.0, 1.0], 0.50), - discounted)) - self.assertTrue(np.array_equal( - rollout_lib.discount(np.array([0.0, 1.0, 0.0, 0.0, 1.0]), 0.50), - discounted)) - - def testDiscountedAdvantageAndRewards(self): - # lambda=1, No bootstrapping. - values = [0.1, 0.5, 0.5, 0.25] - (empirical_values, - generalized_advantage) = rollout_lib.discounted_advantage_and_rewards( - [0.0, 0.0, 0.0, 1.0], - values, - gamma=0.75, - lambda_=1.0) - expected_discounted_r = ( - np.array([1.0 * 0.75 ** n for n in range(3, -1, -1)])) - expected_adv = expected_discounted_r - values - self.assertTrue(np.array_equal(empirical_values, expected_discounted_r)) - self.assertTrue(np.allclose(generalized_advantage, expected_adv)) - - # lambda=1, With bootstrapping. - values = [0.1, 0.5, 0.5, 0.25, 0.75] - (empirical_values, - generalized_advantage) = rollout_lib.discounted_advantage_and_rewards( - [0.0, 0.0, 0.0, 1.0], - values, - gamma=0.75, - lambda_=1.0) - expected_discounted_r = ( - np.array([0.75 * 0.75 ** n for n in range(4, 0, -1)]) - + np.array([1.0 * 0.75 ** n for n in range(3, -1, -1)])) - expected_adv = expected_discounted_r - values[:-1] - self.assertTrue(np.array_equal(empirical_values, expected_discounted_r)) - self.assertTrue(np.allclose(generalized_advantage, expected_adv)) - - # lambda=0.5, With bootstrapping. - values = [0.1, 0.5, 0.5, 0.25, 0.75] - rewards = [0.0, 0.0, 0.0, 1.0] - l = 0.5 # lambda - g = 0.75 # gamma - (empirical_values, - generalized_advantage) = rollout_lib.discounted_advantage_and_rewards( - rewards, - values, - gamma=g, - lambda_=l) - expected_discounted_r = ( - np.array([0.75 * g ** n for n in range(4, 0, -1)]) - + np.array([1.0 * g ** n for n in range(3, -1, -1)])) - expected_adv = [0.0] * len(values) - for t in range(3, -1, -1): - delta_t = rewards[t] + g * values[t + 1] - values[t] - expected_adv[t] = delta_t + g * l * expected_adv[t + 1] - expected_adv = expected_adv[:-1] - self.assertTrue(np.array_equal(empirical_values, expected_discounted_r)) - self.assertTrue(np.allclose(generalized_advantage, expected_adv)) - - def testProcessRollouts(self): - g = 0.95 - rollouts = [ - self.MakeRollout( - states=[3, 6, 9], - actions=[1, 2, 3], - rewards=[1.0, -1.0, 0.5], - values=[0.5, 0.5, 0.1]), - self.MakeRollout( - states=[10], - actions=[5], - rewards=[1.0], - values=[0.5])] - batch = rollout_lib.process_rollouts(rollouts, gamma=g) - - self.assertEqual(2, batch.batch_size) - self.assertEqual(3, batch.max_time) - self.assertEqual([3, 1], batch.episode_lengths) - self.assertEqual([0.5, 1.0], batch.total_rewards) - self.assertEqual( - [[3, 6, 9], [10, 0, 0]], - batch.states.tolist()) - self.assertEqual( - [[1, 2, 3], [5, 0, 0]], - batch.actions.tolist()) - - rew1, rew2 = rollouts[0].rewards, rollouts[1].rewards - expected_discounted_rewards = [ - [rew1[0] + g * rew1[1] + g * g * rew1[2], - rew1[1] + g * rew1[2], - rew1[2]], - [rew2[0], 0.0, 0.0]] - expected_advantages = [ - [dr - v - for dr, v - in zip(expected_discounted_rewards[0], rollouts[0].values)], - [expected_discounted_rewards[1][0] - rollouts[1].values[0], 0.0, 0.0]] - self.assertTrue( - np.allclose(expected_discounted_rewards, batch.discounted_r)) - self.assertTrue( - np.allclose(expected_advantages, batch.discounted_adv)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/common/schedules.py b/research/brain_coder/common/schedules.py deleted file mode 100644 index fff2481e536d65f154ad2d9dc3972657d860abf8..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/schedules.py +++ /dev/null @@ -1,301 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Schedule functions for controlling hparams over time.""" - -from abc import ABCMeta -from abc import abstractmethod -import math - -from common import config_lib # brain coder - - -class Schedule(object): - """Schedule is a function which sets a hyperparameter's value over time. - - For example, a schedule can be used to decay an hparams, or oscillate it over - time. - - This object is constructed with an instance of config_lib.Config (will be - specific to each class implementation). For example if this is a decay - schedule, the config may specify the rate of decay and decay start time. Then - the object instance is called like a function, mapping global step (an integer - counting how many calls to the train op have been made) to the hparam value. - - Properties of a schedule function f(t): - 0) Domain of t is the non-negative integers (t may be 0). - 1) Range of f is the reals. - 2) Schedule functions can assume that they will be called in time order. This - allows schedules to be stateful. - 3) Schedule functions should be deterministic. Two schedule instances with the - same config must always give the same value for each t, and regardless of - what t's it was previously called on. Users may call f(t) on arbitrary - (positive) time jumps. Essentially, multiple schedule instances used in - replica training will behave the same. - 4) Duplicate successive calls on the same time are allowed. - """ - __metaclass__ = ABCMeta - - @abstractmethod - def __init__(self, config): - """Construct this schedule with a config specific to each class impl. - - Args: - config: An instance of config_lib.Config. - """ - pass - - @abstractmethod - def __call__(self, global_step): - """Map `global_step` to a value. - - `global_step` is an integer counting how many calls to the train op have - been made across all replicas (hence why it is global). Implementations - may assume calls to be made in time order, i.e. `global_step` now >= - previous `global_step` values. - - Args: - global_step: Non-negative integer. - - Returns: - Hparam value at this step. A number. - """ - pass - - -class ConstSchedule(Schedule): - """Constant function. - - config: - const: Constant value at every step. - - f(t) = const. - """ - - def __init__(self, config): - super(ConstSchedule, self).__init__(config) - self.const = config.const - - def __call__(self, global_step): - return self.const - - -class LinearDecaySchedule(Schedule): - """Linear decay function. - - config: - initial: Decay starts from this value. - final: Decay ends at this value. - start_time: Step when decay starts. Constant before it. - end_time: When decay ends. Constant after it. - - f(t) is a linear function when start_time <= t <= end_time, with slope of - (final - initial) / (end_time - start_time). f(t) = initial - when t <= start_time. f(t) = final when t >= end_time. - - If start_time == end_time, this becomes a step function. - """ - - def __init__(self, config): - super(LinearDecaySchedule, self).__init__(config) - self.initial = config.initial - self.final = config.final - self.start_time = config.start_time - self.end_time = config.end_time - - if self.end_time < self.start_time: - raise ValueError('start_time must be before end_time.') - - # Linear interpolation. - self._time_diff = float(self.end_time - self.start_time) - self._diff = float(self.final - self.initial) - self._slope = ( - self._diff / self._time_diff if self._time_diff > 0 else float('inf')) - - def __call__(self, global_step): - if global_step <= self.start_time: - return self.initial - if global_step > self.end_time: - return self.final - return self.initial + (global_step - self.start_time) * self._slope - - -class ExponentialDecaySchedule(Schedule): - """Exponential decay function. - - See https://en.wikipedia.org/wiki/Exponential_decay. - - Use this decay function to decay over orders of magnitude. For example, to - decay learning rate from 1e-2 to 1e-6. Exponential decay will decay the - exponent linearly. - - config: - initial: Decay starts from this value. - final: Decay ends at this value. - start_time: Step when decay starts. Constant before it. - end_time: When decay ends. Constant after it. - - f(t) is an exponential decay function when start_time <= t <= end_time. The - decay rate and amplitude are chosen so that f(t) = initial when - t = start_time, and f(t) = final when t = end_time. f(t) is constant for - t < start_time or t > end_time. initial and final must be positive values. - - If start_time == end_time, this becomes a step function. - """ - - def __init__(self, config): - super(ExponentialDecaySchedule, self).__init__(config) - self.initial = config.initial - self.final = config.final - self.start_time = config.start_time - self.end_time = config.end_time - - if self.initial <= 0 or self.final <= 0: - raise ValueError('initial and final must be positive numbers.') - - # Linear interpolation in log space. - self._linear_fn = LinearDecaySchedule( - config_lib.Config( - initial=math.log(self.initial), - final=math.log(self.final), - start_time=self.start_time, - end_time=self.end_time)) - - def __call__(self, global_step): - return math.exp(self._linear_fn(global_step)) - - -class SmootherstepDecaySchedule(Schedule): - """Smootherstep decay function. - - A sigmoidal like transition from initial to final values. A smoother - transition than linear and exponential decays, hence the name. - See https://en.wikipedia.org/wiki/Smoothstep. - - config: - initial: Decay starts from this value. - final: Decay ends at this value. - start_time: Step when decay starts. Constant before it. - end_time: When decay ends. Constant after it. - - f(t) is fully defined here: - https://en.wikipedia.org/wiki/Smoothstep#Variations. - - f(t) is smooth, as in its first-derivative exists everywhere. - """ - - def __init__(self, config): - super(SmootherstepDecaySchedule, self).__init__(config) - self.initial = config.initial - self.final = config.final - self.start_time = config.start_time - self.end_time = config.end_time - - if self.end_time < self.start_time: - raise ValueError('start_time must be before end_time.') - - self._time_diff = float(self.end_time - self.start_time) - self._diff = float(self.final - self.initial) - - def __call__(self, global_step): - if global_step <= self.start_time: - return self.initial - if global_step > self.end_time: - return self.final - x = (global_step - self.start_time) / self._time_diff - - # Smootherstep - return self.initial + x * x * x * (x * (x * 6 - 15) + 10) * self._diff - - -class HardOscillatorSchedule(Schedule): - """Hard oscillator function. - - config: - high: Max value of the oscillator. Value at constant plateaus. - low: Min value of the oscillator. Value at constant valleys. - start_time: Global step when oscillation starts. Constant before this. - period: Width of one oscillation, i.e. number of steps over which the - oscillation takes place. - transition_fraction: Fraction of the period spent transitioning between high - and low values. 50% of this time is spent rising, and 50% of this time - is spent falling. 50% of the remaining time is spent constant at the - high value, and 50% of the remaining time is spent constant at the low - value. transition_fraction = 1.0 means the entire period is spent - rising and falling. transition_fraction = 0.0 means no time is spent - rising and falling, i.e. the function jumps instantaneously between - high and low. - - f(t) = high when t < start_time. - f(t) is periodic when t >= start_time, with f(t + period) = f(t). - f(t) is linear with positive slope when rising, and negative slope when - falling. At the start of the period t0, f(t0) = high and begins to descend. - At the middle of the period f is low and is constant until the ascension - begins. f then rises from low to high and is constant again until the period - repeats. - - Note: when transition_fraction is 0, f starts the period low and ends high. - """ - - def __init__(self, config): - super(HardOscillatorSchedule, self).__init__(config) - self.high = config.high - self.low = config.low - self.start_time = config.start_time - self.period = float(config.period) - self.transition_fraction = config.transition_fraction - self.half_transition_fraction = config.transition_fraction / 2.0 - - if self.transition_fraction < 0 or self.transition_fraction > 1.0: - raise ValueError('transition_fraction must be between 0 and 1.0') - if self.period <= 0: - raise ValueError('period must be positive') - - self._slope = ( - float(self.high - self.low) / self.half_transition_fraction - if self.half_transition_fraction > 0 else float('inf')) - - def __call__(self, global_step): - if global_step < self.start_time: - return self.high - period_pos = ((global_step - self.start_time) / self.period) % 1.0 - if period_pos >= 0.5: - # ascending - period_pos -= 0.5 - if period_pos < self.half_transition_fraction: - return self.low + period_pos * self._slope - else: - return self.high - else: - # descending - if period_pos < self.half_transition_fraction: - return self.high - period_pos * self._slope - else: - return self.low - - -_NAME_TO_CONFIG = { - 'const': ConstSchedule, - 'linear_decay': LinearDecaySchedule, - 'exp_decay': ExponentialDecaySchedule, - 'smooth_decay': SmootherstepDecaySchedule, - 'hard_osc': HardOscillatorSchedule, -} - - -def make_schedule(config): - """Schedule factory. - - Given `config` containing a `fn` property, a Schedule implementation is - instantiated with `config`. See `_NAME_TO_CONFIG` for `fn` options. - - Args: - config: Config with a `fn` option that specifies which Schedule - implementation to use. `config` is passed into the constructor. - - Returns: - A Schedule impl instance. - """ - schedule_class = _NAME_TO_CONFIG[config.fn] - return schedule_class(config) diff --git a/research/brain_coder/common/schedules_test.py b/research/brain_coder/common/schedules_test.py deleted file mode 100644 index b17022f45a833fb3aa219fd06225f77fbd1b1055..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/schedules_test.py +++ /dev/null @@ -1,139 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for common.schedules.""" - -from math import exp -from math import sqrt -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from common import config_lib # brain coder -from common import schedules # brain coder - - -class SchedulesTest(tf.test.TestCase): - - def ScheduleTestHelper(self, config, schedule_subtype, io_values): - """Run common checks for schedules. - - Args: - config: Config object which is passed into schedules.make_schedule. - schedule_subtype: The expected schedule type to be instantiated. - io_values: List of (input, output) pairs. Must be in ascending input - order. No duplicate inputs. - """ - - # Check that make_schedule makes the correct type. - f = schedules.make_schedule(config) - self.assertTrue(isinstance(f, schedule_subtype)) - - # Check that multiple instances returned from make_schedule behave the same. - fns = [schedules.make_schedule(config) for _ in xrange(3)] - - # Check that all the inputs map to the right outputs. - for i, o in io_values: - for f in fns: - f_out = f(i) - self.assertTrue( - np.isclose(o, f_out), - 'Wrong value at input %d. Expected %s, got %s' % (i, o, f_out)) - - # Check that a subset of the io_values are still correct. - f = schedules.make_schedule(config) - subseq = [io_values[i**2] for i in xrange(int(sqrt(len(io_values))))] - if subseq[-1] != io_values[-1]: - subseq.append(io_values[-1]) - for i, o in subseq: - f_out = f(i) - self.assertTrue( - np.isclose(o, f_out), - 'Wrong value at input %d. Expected %s, got %s' % (i, o, f_out)) - - # Check duplicate calls. - f = schedules.make_schedule(config) - for i, o in io_values: - for _ in xrange(3): - f_out = f(i) - self.assertTrue( - np.isclose(o, f_out), - 'Duplicate calls at input %d are not equal. Expected %s, got %s' - % (i, o, f_out)) - - def testConstSchedule(self): - self.ScheduleTestHelper( - config_lib.Config(fn='const', const=5), - schedules.ConstSchedule, - [(0, 5), (1, 5), (10, 5), (20, 5), (100, 5), (1000000, 5)]) - - def testLinearDecaySchedule(self): - self.ScheduleTestHelper( - config_lib.Config(fn='linear_decay', initial=2, final=0, start_time=10, - end_time=20), - schedules.LinearDecaySchedule, - [(0, 2), (1, 2), (10, 2), (11, 1.8), (15, 1), (19, 0.2), (20, 0), - (100000, 0)]) - - # Test step function. - self.ScheduleTestHelper( - config_lib.Config(fn='linear_decay', initial=2, final=0, start_time=10, - end_time=10), - schedules.LinearDecaySchedule, - [(0, 2), (1, 2), (10, 2), (11, 0), (15, 0)]) - - def testExponentialDecaySchedule(self): - self.ScheduleTestHelper( - config_lib.Config(fn='exp_decay', initial=exp(-1), final=exp(-6), - start_time=10, end_time=20), - schedules.ExponentialDecaySchedule, - [(0, exp(-1)), (1, exp(-1)), (10, exp(-1)), (11, exp(-1/2. - 1)), - (15, exp(-5/2. - 1)), (19, exp(-9/2. - 1)), (20, exp(-6)), - (100000, exp(-6))]) - - # Test step function. - self.ScheduleTestHelper( - config_lib.Config(fn='exp_decay', initial=exp(-1), final=exp(-6), - start_time=10, end_time=10), - schedules.ExponentialDecaySchedule, - [(0, exp(-1)), (1, exp(-1)), (10, exp(-1)), (11, exp(-6)), - (15, exp(-6))]) - - def testSmootherstepDecaySchedule(self): - self.ScheduleTestHelper( - config_lib.Config(fn='smooth_decay', initial=2, final=0, start_time=10, - end_time=20), - schedules.SmootherstepDecaySchedule, - [(0, 2), (1, 2), (10, 2), (11, 1.98288), (15, 1), (19, 0.01712), - (20, 0), (100000, 0)]) - - # Test step function. - self.ScheduleTestHelper( - config_lib.Config(fn='smooth_decay', initial=2, final=0, start_time=10, - end_time=10), - schedules.SmootherstepDecaySchedule, - [(0, 2), (1, 2), (10, 2), (11, 0), (15, 0)]) - - def testHardOscillatorSchedule(self): - self.ScheduleTestHelper( - config_lib.Config(fn='hard_osc', high=2, low=0, start_time=100, - period=10, transition_fraction=0.5), - schedules.HardOscillatorSchedule, - [(0, 2), (1, 2), (10, 2), (100, 2), (101, 1.2), (102, 0.4), (103, 0), - (104, 0), (105, 0), (106, 0.8), (107, 1.6), (108, 2), (109, 2), - (110, 2), (111, 1.2), (112, 0.4), (115, 0), (116, 0.8), (119, 2), - (120, 2), (100001, 1.2), (100002, 0.4), (100005, 0), (100006, 0.8), - (100010, 2)]) - - # Test instantaneous step. - self.ScheduleTestHelper( - config_lib.Config(fn='hard_osc', high=2, low=0, start_time=100, - period=10, transition_fraction=0), - schedules.HardOscillatorSchedule, - [(0, 2), (1, 2), (10, 2), (99, 2), (100, 0), (104, 0), (105, 2), - (106, 2), (109, 2), (110, 0)]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/common/utils.py b/research/brain_coder/common/utils.py deleted file mode 100644 index fa5f1c50768986ee10eee6120a0bca392b1d9d0e..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/utils.py +++ /dev/null @@ -1,558 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Configuration class.""" - -import bisect -from collections import deque -import cPickle -import heapq -import random - -from absl import logging -import numpy as np -import six -from six.moves import xrange -import tensorflow as tf - - -def tuple_to_record(tuple_, record_type): - return record_type(**dict(zip(record_type.__slots__, tuple_))) - - -def make_record(type_name, attributes, defaults=None): - """Factory for mutable record classes. - - A record acts just like a collections.namedtuple except slots are writable. - One exception is that record classes are not equivalent to tuples or other - record classes of the same length. - - Note, each call to `make_record` produces a unique type. Two calls will make - different types even if `type_name` is the same each time. - - Args: - type_name: Name of the record type to create. - attributes: List of names of each record attribute. The order of the list - is preserved. - defaults: (optional) default values for attributes. A dict mapping attribute - names to values. - - Returns: - A new record type. - - Raises: - ValueError: If, - `defaults` is not a dict, - `attributes` contains duplicate names, - `defaults` keys are not contained in `attributes`. - """ - if defaults is None: - defaults = {} - if not isinstance(defaults, dict): - raise ValueError('defaults must be a dict.') - attr_set = set(attributes) - if len(attr_set) < len(attributes): - raise ValueError('No duplicate attributes allowed.') - if not set(defaults.keys()).issubset(attr_set): - raise ValueError('Default attributes must be given in the attributes list.') - - class RecordClass(object): - """A record type. - - Acts like mutable tuple with named slots. - """ - __slots__ = list(attributes) - _defaults = dict(defaults) - - def __init__(self, *args, **kwargs): - if len(args) > len(self.__slots__): - raise ValueError('Too many arguments. %s has length %d.' - % (type(self).__name__, len(self.__slots__))) - for attr, val in self._defaults.items(): - setattr(self, attr, val) - for i, arg in enumerate(args): - setattr(self, self.__slots__[i], arg) - for attr, val in kwargs.items(): - setattr(self, attr, val) - for attr in self.__slots__: - if not hasattr(self, attr): - raise ValueError('Required attr "%s" is not set.' % attr) - - def __len__(self): - return len(self.__slots__) - - def __iter__(self): - for attr in self.__slots__: - yield getattr(self, attr) - - def __getitem__(self, index): - return getattr(self, self.__slots__[index]) - - def __setitem__(self, index, value): - return setattr(self, self.__slots__[index], value) - - def __eq__(self, other): - # Types must be equal as well as values. - return (isinstance(other, type(self)) - and all(a == b for a, b in zip(self, other))) - - def __str__(self): - return '%s(%s)' % ( - type(self).__name__, - ', '.join(attr + '=' + str(getattr(self, attr)) - for attr in self.__slots__)) - - def __repr__(self): - return str(self) - - RecordClass.__name__ = type_name - return RecordClass - - -# Making minibatches. -def stack_pad(tensors, pad_axes=None, pad_to_lengths=None, dtype=np.float32, - pad_value=0): - """Stack tensors along 0-th dim and pad them to be the same shape. - - Args: - tensors: Any list of iterables (python list, numpy array, etc). Can be 1D - or multi-D iterables. - pad_axes: An int or list of ints. Axes to pad along. - pad_to_lengths: Length in each dimension. If pad_axes was an int, this is an - int or None. If pad_axes was a list of ints, this is a list of mixed int - and None types with the same length, or None. A None length means the - maximum length among the given tensors is used. - dtype: Type of output numpy array. Defaults to np.float32. - pad_value: Value to use for padding. Defaults to 0. - - Returns: - Numpy array containing the tensors stacked along the 0-th dimension and - padded along the specified dimensions. - - Raises: - ValueError: If the tensors do not have equal shapes along non-padded - dimensions. - """ - tensors = [np.asarray(t) for t in tensors] - max_lengths = [max(l) for l in zip(*[t.shape for t in tensors])] - same_axes = dict(enumerate(max_lengths)) - if pad_axes is None: - pad_axes = [] - if isinstance(pad_axes, six.integer_types): - if pad_to_lengths is not None: - max_lengths[pad_axes] = pad_to_lengths - del same_axes[pad_axes] - else: - if pad_to_lengths is None: - pad_to_lengths = [None] * len(pad_axes) - for i, l in zip(pad_axes, pad_to_lengths): - if l is not None: - max_lengths[i] = l - del same_axes[i] - same_axes_items = same_axes.items() - dest = np.full([len(tensors)] + max_lengths, pad_value, dtype=dtype) - for i, t in enumerate(tensors): - for j, l in same_axes_items: - if t.shape[j] != l: - raise ValueError( - 'Tensor at index %d does not have size %d along axis %d' - % (i, l, j)) - dest[[i] + [slice(0, d) for d in t.shape]] = t - return dest - - -class RandomQueue(deque): - - def __init__(self, capacity): - super(RandomQueue, self).__init__([], capacity) - self.capacity = capacity - - def random_sample(self, sample_size): - idx = np.random.choice(len(self), sample_size) - return [self[i] for i in idx] - - def push(self, item): - # Append to right. Oldest element will be popped from left. - self.append(item) - - -class MPQItemContainer(object): - """Class for holding an item with its score. - - Defines a comparison function for use in the heap-queue. - """ - - def __init__(self, score, item, extra_data): - self.item = item - self.score = score - self.extra_data = extra_data - - def __cmp__(self, other): - assert isinstance(other, type(self)) - return cmp(self.score, other.score) - - def __iter__(self): - """Allows unpacking like a tuple.""" - yield self.score - yield self.item - yield self.extra_data - - def __repr__(self): - """String representation of this item. - - `extra_data` is not included in the representation. We are assuming that - `extra_data` is not easily interpreted by a human (if it was, it should be - hashable, like a string or tuple). - - Returns: - String representation of `self`. - """ - return str((self.score, self.item)) - - def __str__(self): - return repr(self) - - -class MaxUniquePriorityQueue(object): - """A maximum priority queue where duplicates are not added. - - The top items by score remain in the queue. When the capacity is reached, - the lowest scored item in the queue will be dropped. - - This implementation differs from a typical priority queue, in that the minimum - score is popped, instead of the maximum. Largest scores remain stuck in the - queue. This is useful for accumulating the best known items from a population. - - The items used to determine uniqueness must be hashable, but additional - non-hashable data may be stored with each item. - """ - - def __init__(self, capacity): - self.capacity = capacity - self.heap = [] - self.unique_items = set() - - def push(self, score, item, extra_data=None): - """Push an item onto the queue. - - If the queue is at capacity, the item with the smallest score will be - dropped. Note that it is assumed each item has exactly one score. The same - item with a different score will still be dropped. - - Args: - score: Number used to prioritize items in the queue. Largest scores are - kept in the queue. - item: A hashable item to be stored. Duplicates of this item will not be - added to the queue. - extra_data: An extra (possible not hashable) data to store with the item. - """ - if item in self.unique_items: - return - if len(self.heap) >= self.capacity: - _, popped_item, _ = heapq.heappushpop( - self.heap, MPQItemContainer(score, item, extra_data)) - self.unique_items.add(item) - self.unique_items.remove(popped_item) - else: - heapq.heappush(self.heap, MPQItemContainer(score, item, extra_data)) - self.unique_items.add(item) - - def pop(self): - """Pop the item with the lowest score. - - Returns: - score: Item's score. - item: The item that was popped. - extra_data: Any extra data stored with the item. - """ - if not self.heap: - return () - score, item, extra_data = heapq.heappop(self.heap) - self.unique_items.remove(item) - return score, item, extra_data - - def get_max(self): - """Peek at the item with the highest score. - - Returns: - Same as `pop`. - """ - if not self.heap: - return () - score, item, extra_data = heapq.nlargest(1, self.heap)[0] - return score, item, extra_data - - def get_min(self): - """Peek at the item with the lowest score. - - Returns: - Same as `pop`. - """ - if not self.heap: - return () - score, item, extra_data = heapq.nsmallest(1, self.heap)[0] - return score, item, extra_data - - def random_sample(self, sample_size): - """Randomly select items from the queue. - - This does not modify the queue. - - Items are drawn from a uniform distribution, and not weighted by score. - - Args: - sample_size: Number of random samples to draw. The same item can be - sampled multiple times. - - Returns: - List of sampled items (of length `sample_size`). Each element in the list - is a tuple: (item, extra_data). - """ - idx = np.random.choice(len(self.heap), sample_size) - return [(self.heap[i].item, self.heap[i].extra_data) for i in idx] - - def iter_in_order(self): - """Iterate over items in the queue from largest score to smallest. - - Yields: - item: Hashable item. - extra_data: Extra data stored with the item. - """ - for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap): - yield item, extra_data - - def __len__(self): - return len(self.heap) - - def __iter__(self): - for _, item, _ in self.heap: - yield item - - def __repr__(self): - return '[' + ', '.join(repr(c) for c in self.heap) + ']' - - def __str__(self): - return repr(self) - - -class RouletteWheel(object): - """Randomly samples stored objects proportionally to their given weights. - - Stores objects and weights. Acts like a roulette wheel where each object is - given a slice of the roulette disk proportional to its weight. - - This can be used as a replay buffer where past experiences are sampled - proportionally to their weights. A good choice of "weight" for reinforcement - learning is exp(reward / temperature) where temperature -> inf makes the - distribution more uniform and temperature -> 0 makes the distribution more - peaky. - - To prevent experiences from being overweighted by appearing in the replay - buffer multiple times, a "unique mode" is supported where duplicate - experiences are ignored. In unique mode, weights can be quickly retrieved from - keys. - """ - - def __init__(self, unique_mode=False, save_file=None): - """Construct empty RouletteWheel. - - If `save_file` is not None, and the file already exists on disk, whatever - is in the file will be loaded into this instance. This allows jobs using - RouletteWheel to resume after preemption. - - Args: - unique_mode: If True, puts this RouletteWheel into unique mode, where - objects are added with hashable keys, so that duplicates are ignored. - save_file: Optional file path to save to. Must be a string containing - an absolute path to a file, or None. File will be Python pickle - format. - """ - self.unique_mode = unique_mode - self.objects = [] - self.weights = [] - self.partial_sums = [] - if self.unique_mode: - self.keys_to_weights = {} - self.save_file = save_file - self.save_to_disk_buffer = [] - - if save_file is not None and tf.gfile.Exists(save_file): - # Load from disk. - with tf.gfile.OpenFast(save_file, 'r') as f: - count = 0 - while 1: - try: - obj, weight, key = cPickle.load(f) - except EOFError: - break - else: - self.add(obj, weight, key) - count += 1 - logging.info('Loaded %d samples from disk.', count) - # Clear buffer since these items are already on disk. - self.save_to_disk_buffer = [] - - def __iter__(self): - return iter(zip(self.objects, self.weights)) - - def __len__(self): - return len(self.objects) - - def is_empty(self): - """Returns whether there is anything in the roulette wheel.""" - return not self.partial_sums - - @property - def total_weight(self): - """Total cumulative weight across all objects.""" - if self.partial_sums: - return self.partial_sums[-1] - return 0.0 - - def has_key(self, key): - if self.unique_mode: - RuntimeError('has_key method can only be called in unique mode.') - return key in self.keys_to_weights - - def get_weight(self, key): - if self.unique_mode: - RuntimeError('get_weight method can only be called in unique mode.') - return self.keys_to_weights[key] - - def add(self, obj, weight, key=None): - """Add one object and its weight to the roulette wheel. - - Args: - obj: Any object to be stored. - weight: A non-negative float. The given object will be drawn with - probability proportional to this weight when sampling. - key: This argument is only used when in unique mode. To allow `obj` to - be an unhashable type, like list, a separate hashable key is given. - Each `key` should be unique to each `obj`. `key` is used to check if - `obj` has been added to the roulette wheel before. - - Returns: - True if the object was added, False if it was not added due to it being - a duplicate (this only happens in unique mode). - - Raises: - ValueError: If `weight` is negative. - ValueError: If `key` is not given when in unique mode, or if `key` is - given when not in unique mode. - """ - if weight < 0: - raise ValueError('Weight must be non-negative') - if self.unique_mode: - if key is None: - raise ValueError( - 'Hashable key required for objects when unique mode is enabled.') - if key in self.keys_to_weights: - # Weight updates are not allowed. Ignore the given value of `weight`. - return False - self.keys_to_weights[key] = weight - elif key is not None: - raise ValueError( - 'key argument should not be used when unique mode is disabled.') - self.objects.append(obj) - self.weights.append(weight) - self.partial_sums.append(self.total_weight + weight) - if self.save_file is not None: - # Record new item in buffer. - self.save_to_disk_buffer.append((obj, weight, key)) - return True - - def add_many(self, objs, weights, keys=None): - """Add many object and their weights to the roulette wheel. - - Arguments are the same as the `add` method, except each is a list. Lists - must all be the same length. - - Args: - objs: List of objects to be stored. - weights: List of non-negative floats. See `add` method. - keys: List of hashable keys. This argument is only used when in unique - mode. See `add` method. - - Returns: - Number of objects added. This number will be less than the number of - objects provided if we are in unique mode and some keys are already - in the roulette wheel. - - Raises: - ValueError: If `keys` argument is provided when unique_mode == False, or - is not provided when unique_mode == True. - ValueError: If any of the lists are not the same length. - ValueError: If any of the weights are negative. - """ - if keys is not None and not self.unique_mode: - raise ValueError('Not in unique mode. Do not provide keys.') - elif keys is None and self.unique_mode: - raise ValueError('In unique mode. You must provide hashable keys.') - if keys and len(objs) != len(keys): - raise ValueError('Number of objects does not equal number of keys.') - if len(objs) != len(weights): - raise ValueError('Number of objects does not equal number of weights.') - return sum([self.add(obj, weights[i], key=keys[i] if keys else None) - for i, obj in enumerate(objs)]) - - def sample(self): - """Spin the roulette wheel. - - Randomly select an object with probability proportional to its weight. - - Returns: - object: The selected object. - weight: The weight of the selected object. - - Raises: - RuntimeError: If the roulette wheel is empty. - """ - if self.is_empty(): - raise RuntimeError('Trying to sample from empty roulette wheel.') - spin = random.random() * self.total_weight - - # Binary search. - i = bisect.bisect_right(self.partial_sums, spin) - if i == len(self.partial_sums): - # This should not happen since random.random() will always be strictly - # less than 1.0, and the last partial sum equals self.total_weight(). - # However it may happen due to rounding error. In that case it is easy to - # handle this, just select the last object. - i -= 1 - - return self.objects[i], self.weights[i] - - def sample_many(self, count): - """Spin the roulette wheel `count` times and return the results.""" - if self.is_empty(): - raise RuntimeError('Trying to sample from empty roulette wheel.') - return [self.sample() for _ in xrange(count)] - - def incremental_save(self, log_info=False): - """Write new entries to disk. - - This performs an append operation on the `save_file` given in the - constructor. Any entries added since the last call to `incremental_save` - will be appended to the file. - - If a new RouletteWheel is constructed with the same `save_file`, all the - entries written there will be automatically loaded into the instance. - This is useful when a job resumes after preemption. - - Args: - log_info: If True, info about this operation will be logged. - - Raises: - RuntimeError: If `save_file` given in the constructor is None. - """ - if self.save_file is None: - raise RuntimeError('Cannot call incremental_save. `save_file` is None.') - if log_info: - logging.info('Saving %d new samples to disk.', - len(self.save_to_disk_buffer)) - with tf.gfile.OpenFast(self.save_file, 'a') as f: - for entry in self.save_to_disk_buffer: - cPickle.dump(entry, f) - # Clear the buffer. - self.save_to_disk_buffer = [] diff --git a/research/brain_coder/common/utils_test.py b/research/brain_coder/common/utils_test.py deleted file mode 100644 index 569c2877d17bf7707616029cdd2a5eac55df7f60..0000000000000000000000000000000000000000 --- a/research/brain_coder/common/utils_test.py +++ /dev/null @@ -1,382 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for common.utils. -""" - -from collections import Counter -import random -import tempfile -import numpy as np -import tensorflow as tf - -from common import utils # brain coder - - -class UtilsTest(tf.test.TestCase): - - def testStackPad(self): - # 1D. - tensors = [[1, 2, 3], [4, 5, 6, 7, 8], [9]] - result = utils.stack_pad(tensors, pad_axes=0, pad_to_lengths=6) - self.assertTrue(np.array_equal( - result, - np.asarray([[1, 2, 3, 0, 0, 0], - [4, 5, 6, 7, 8, 0], - [9, 0, 0, 0, 0, 0]], dtype=np.float32))) - - # 3D. - tensors = [[[[1, 2, 3], [4, 5, 6]]], - [[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]], - [[[0, 1, 2]], [[3, 4, 5]]]] - result = utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=[2, 2]) - self.assertTrue(np.array_equal( - result, - np.asarray([[[[1, 2, 3], [4, 5, 6]], - [[0, 0, 0], [0, 0, 0]]], - [[[7, 8, 9], [0, 1, 2]], - [[3, 4, 5], [6, 7, 8]]], - [[[0, 1, 2], [0, 0, 0]], - [[3, 4, 5], [0, 0, 0]]]], dtype=np.float32))) - - def testStackPadNoAxes(self): - # 2D. - tensors = [[[1, 2, 3], [4, 5, 6]], - [[7, 8, 9], [1, 2, 3]], - [[4, 5, 6], [7, 8, 9]]] - result = utils.stack_pad(tensors) - self.assertTrue(np.array_equal( - result, - np.asarray(tensors))) - - def testStackPadNoneLength(self): - # 1D. - tensors = [[1, 2, 3], [4, 5, 6, 7, 8], [9]] - result = utils.stack_pad(tensors, pad_axes=0, pad_to_lengths=None) - self.assertTrue(np.array_equal( - result, - np.asarray([[1, 2, 3, 0, 0], - [4, 5, 6, 7, 8], - [9, 0, 0, 0, 0]], dtype=np.float32))) - - # 3D. - tensors = [[[[1, 2, 3], [4, 5, 6]]], - [[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]], - [[[0, 1, 2]], [[3, 4, 5]]]] - result = utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=None) - self.assertTrue(np.array_equal( - result, - np.asarray([[[[1, 2, 3], [4, 5, 6]], - [[0, 0, 0], [0, 0, 0]]], - [[[7, 8, 9], [0, 1, 2]], - [[3, 4, 5], [6, 7, 8]]], - [[[0, 1, 2], [0, 0, 0]], - [[3, 4, 5], [0, 0, 0]]]], dtype=np.float32))) - - # 3D with partial pad_to_lengths. - tensors = [[[[1, 2, 3], [4, 5, 6]]], - [[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]], - [[[0, 1, 2]], [[3, 4, 5]]]] - result = utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=[None, 3]) - self.assertTrue(np.array_equal( - result, - np.asarray([[[[1, 2, 3], [4, 5, 6], [0, 0, 0]], - [[0, 0, 0], [0, 0, 0], [0, 0, 0]]], - [[[7, 8, 9], [0, 1, 2], [0, 0, 0]], - [[3, 4, 5], [6, 7, 8], [0, 0, 0]]], - [[[0, 1, 2], [0, 0, 0], [0, 0, 0]], - [[3, 4, 5], [0, 0, 0], [0, 0, 0]]]], dtype=np.float32))) - - def testStackPadValueError(self): - # 3D. - tensors = [[[[1, 2, 3], [4, 5, 6]]], - [[[7, 8, 9], [0, 1, 2]], [[3, 4, 5], [6, 7, 8]]], - [[[0, 1, 2]], [[3, 4, 5]]], - [[[1, 2, 3, 4]]]] - - # Not all tensors have the same shape along axis 2. - with self.assertRaises(ValueError): - utils.stack_pad(tensors, pad_axes=[0, 1], pad_to_lengths=[2, 2]) - - def testRecord(self): - my_record = utils.make_record('my_record', ['a', 'b', 'c'], {'b': 55}) - inst = my_record(a=1, b=2, c=3) - self.assertEqual(1, inst.a) - self.assertEqual(2, inst.b) - self.assertEqual(3, inst.c) - self.assertEqual(1, inst[0]) - self.assertEqual(2, inst[1]) - self.assertEqual(3, inst[2]) - self.assertEqual([1, 2, 3], list(iter(inst))) - self.assertEqual(3, len(inst)) - - inst.b = 999 - self.assertEqual(999, inst.b) - self.assertEqual(999, inst[1]) - - inst2 = my_record(1, 999, 3) - self.assertTrue(inst == inst2) - inst2[1] = 3 - self.assertFalse(inst == inst2) - - inst3 = my_record(a=1, c=3) - inst.b = 55 - self.assertEqual(inst, inst3) - - def testRecordUnique(self): - record1 = utils.make_record('record1', ['a', 'b', 'c']) - record2 = utils.make_record('record2', ['a', 'b', 'c']) - self.assertNotEqual(record1(1, 2, 3), record2(1, 2, 3)) - self.assertEqual(record1(1, 2, 3), record1(1, 2, 3)) - - def testTupleToRecord(self): - my_record = utils.make_record('my_record', ['a', 'b', 'c']) - inst = utils.tuple_to_record((5, 6, 7), my_record) - self.assertEqual(my_record(5, 6, 7), inst) - - def testRecordErrors(self): - my_record = utils.make_record('my_record', ['a', 'b', 'c'], {'b': 10}) - - with self.assertRaises(ValueError): - my_record(c=5) # Did not provide required argument 'a'. - with self.assertRaises(ValueError): - my_record(1, 2, 3, 4) # Too many arguments. - - def testRandomQueue(self): - np.random.seed(567890) - queue = utils.RandomQueue(5) - queue.push(5) - queue.push(6) - queue.push(7) - queue.push(8) - queue.push(9) - queue.push(10) - self.assertTrue(5 not in queue) - sample = queue.random_sample(1000) - self.assertEqual(1000, len(sample)) - self.assertEqual([6, 7, 8, 9, 10], sorted(np.unique(sample).tolist())) - - def testMaxUniquePriorityQueue(self): - queue = utils.MaxUniquePriorityQueue(5) - queue.push(1.0, 'string 1') - queue.push(-0.5, 'string 2') - queue.push(0.5, 'string 3') - self.assertEqual((-0.5, 'string 2', None), queue.pop()) - queue.push(0.1, 'string 4') - queue.push(1.5, 'string 5') - queue.push(0.0, 'string 6') - queue.push(0.2, 'string 7') - self.assertEqual((1.5, 'string 5', None), queue.get_max()) - self.assertEqual((0.1, 'string 4', None), queue.get_min()) - self.assertEqual( - [('string 5', None), ('string 1', None), ('string 3', None), - ('string 7', None), ('string 4', None)], - list(queue.iter_in_order())) - - def testMaxUniquePriorityQueue_Duplicates(self): - queue = utils.MaxUniquePriorityQueue(5) - queue.push(0.0, 'string 1') - queue.push(0.0, 'string 2') - queue.push(0.0, 'string 3') - self.assertEqual((0.0, 'string 1', None), queue.pop()) - self.assertEqual((0.0, 'string 2', None), queue.pop()) - self.assertEqual((0.0, 'string 3', None), queue.pop()) - self.assertEqual(0, len(queue)) - queue.push(0.1, 'string 4') - queue.push(1.5, 'string 5') - queue.push(0.3, 'string 6') - queue.push(0.2, 'string 7') - queue.push(0.0, 'string 8') - queue.push(1.5, 'string 5') - queue.push(1.5, 'string 5') - self.assertEqual((1.5, 'string 5', None), queue.get_max()) - self.assertEqual((0.0, 'string 8', None), queue.get_min()) - self.assertEqual( - [('string 5', None), ('string 6', None), ('string 7', None), - ('string 4', None), ('string 8', None)], - list(queue.iter_in_order())) - - def testMaxUniquePriorityQueue_ExtraData(self): - queue = utils.MaxUniquePriorityQueue(5) - queue.push(1.0, 'string 1', [1, 2, 3]) - queue.push(0.5, 'string 2', [4, 5, 6]) - queue.push(0.5, 'string 3', [7, 8, 9]) - queue.push(0.5, 'string 2', [10, 11, 12]) - self.assertEqual((0.5, 'string 2', [4, 5, 6]), queue.pop()) - self.assertEqual((0.5, 'string 3', [7, 8, 9]), queue.pop()) - self.assertEqual((1.0, 'string 1', [1, 2, 3]), queue.pop()) - self.assertEqual(0, len(queue)) - queue.push(0.5, 'string 2', [10, 11, 12]) - self.assertEqual((0.5, 'string 2', [10, 11, 12]), queue.pop()) - - def testRouletteWheel(self): - random.seed(12345678987654321) - r = utils.RouletteWheel() - self.assertTrue(r.is_empty()) - with self.assertRaises(RuntimeError): - r.sample() # Cannot sample when empty. - self.assertEqual(0, r.total_weight) - self.assertEqual(True, r.add('a', 0.1)) - self.assertFalse(r.is_empty()) - self.assertEqual(0.1, r.total_weight) - self.assertEqual(True, r.add('b', 0.01)) - self.assertEqual(0.11, r.total_weight) - self.assertEqual(True, r.add('c', 0.5)) - self.assertEqual(True, r.add('d', 0.1)) - self.assertEqual(True, r.add('e', 0.05)) - self.assertEqual(True, r.add('f', 0.03)) - self.assertEqual(True, r.add('g', 0.001)) - self.assertEqual(0.791, r.total_weight) - self.assertFalse(r.is_empty()) - - # Check that sampling is correct. - obj, weight = r.sample() - self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight)) - self.assertTrue((obj, weight) in r) - for obj, weight in r.sample_many(100): - self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight)) - self.assertTrue((obj, weight) in r) - - # Check that sampling distribution is correct. - n = 1000000 - c = Counter(r.sample_many(n)) - for obj, w in r: - estimated_w = c[(obj, w)] / float(n) * r.total_weight - self.assertTrue( - np.isclose(w, estimated_w, atol=1e-3), - 'Expected %s, got %s, for object %s' % (w, estimated_w, obj)) - - def testRouletteWheel_AddMany(self): - random.seed(12345678987654321) - r = utils.RouletteWheel() - self.assertTrue(r.is_empty()) - with self.assertRaises(RuntimeError): - r.sample() # Cannot sample when empty. - self.assertEqual(0, r.total_weight) - count = r.add_many( - ['a', 'b', 'c', 'd', 'e', 'f', 'g'], - [0.1, 0.01, 0.5, 0.1, 0.05, 0.03, 0.001]) - self.assertEqual(7, count) - self.assertFalse(r.is_empty()) - self.assertEqual(0.791, r.total_weight) - - # Adding no items is allowed. - count = r.add_many([], []) - self.assertEqual(0, count) - self.assertFalse(r.is_empty()) - self.assertEqual(0.791, r.total_weight) - - # Check that sampling is correct. - obj, weight = r.sample() - self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight)) - self.assertTrue((obj, weight) in r) - for obj, weight in r.sample_many(100): - self.assertTrue(isinstance(weight, float), 'Type: %s' % type(weight)) - self.assertTrue((obj, weight) in r) - - # Check that sampling distribution is correct. - n = 1000000 - c = Counter(r.sample_many(n)) - for obj, w in r: - estimated_w = c[(obj, w)] / float(n) * r.total_weight - self.assertTrue( - np.isclose(w, estimated_w, atol=1e-3), - 'Expected %s, got %s, for object %s' % (w, estimated_w, obj)) - - def testRouletteWheel_AddZeroWeights(self): - r = utils.RouletteWheel() - self.assertEqual(True, r.add('a', 0)) - self.assertFalse(r.is_empty()) - self.assertEqual(4, r.add_many(['b', 'c', 'd', 'e'], [0, 0.1, 0, 0])) - self.assertEqual( - [('a', 0.0), ('b', 0.0), ('c', 0.1), ('d', 0.0), ('e', 0.0)], - list(r)) - - def testRouletteWheel_UniqueMode(self): - random.seed(12345678987654321) - r = utils.RouletteWheel(unique_mode=True) - self.assertEqual(True, r.add([1, 2, 3], 1, 'a')) - self.assertEqual(True, r.add([4, 5], 0.5, 'b')) - self.assertEqual(False, r.add([1, 2, 3], 1.5, 'a')) - self.assertEqual( - [([1, 2, 3], 1.0), ([4, 5], 0.5)], - list(r)) - self.assertEqual(1.5, r.total_weight) - self.assertEqual( - 2, - r.add_many( - [[5, 6, 2, 3], [1, 2, 3], [8], [1, 2, 3]], - [0.1, 0.2, 0.1, 2.0], - ['c', 'a', 'd', 'a'])) - self.assertEqual( - [([1, 2, 3], 1.0), ([4, 5], 0.5), ([5, 6, 2, 3], 0.1), ([8], 0.1)], - list(r)) - self.assertTrue(np.isclose(1.7, r.total_weight)) - self.assertEqual(0, r.add_many([], [], [])) # Adding no items is allowed. - with self.assertRaises(ValueError): - # Key not given. - r.add([7, 8, 9], 2.0) - with self.assertRaises(ValueError): - # Keys not given. - r.add_many([[7, 8, 9], [10]], [2.0, 2.0]) - self.assertEqual(True, r.has_key('a')) - self.assertEqual(True, r.has_key('b')) - self.assertEqual(False, r.has_key('z')) - self.assertEqual(1.0, r.get_weight('a')) - self.assertEqual(0.5, r.get_weight('b')) - - r = utils.RouletteWheel(unique_mode=False) - self.assertEqual(True, r.add([1, 2, 3], 1)) - self.assertEqual(True, r.add([4, 5], 0.5)) - self.assertEqual(True, r.add([1, 2, 3], 1.5)) - self.assertEqual( - [([1, 2, 3], 1.0), ([4, 5], 0.5), ([1, 2, 3], 1.5)], - list(r)) - self.assertEqual(3, r.total_weight) - self.assertEqual( - 4, - r.add_many( - [[5, 6, 2, 3], [1, 2, 3], [8], [1, 2, 3]], - [0.1, 0.2, 0.1, 0.2])) - self.assertEqual( - [([1, 2, 3], 1.0), ([4, 5], 0.5), ([1, 2, 3], 1.5), - ([5, 6, 2, 3], 0.1), ([1, 2, 3], 0.2), ([8], 0.1), ([1, 2, 3], 0.2)], - list(r)) - self.assertTrue(np.isclose(3.6, r.total_weight)) - with self.assertRaises(ValueError): - # Key is given. - r.add([7, 8, 9], 2.0, 'a') - with self.assertRaises(ValueError): - # Keys are given. - r.add_many([[7, 8, 9], [10]], [2.0, 2.0], ['a', 'b']) - - def testRouletteWheel_IncrementalSave(self): - f = tempfile.NamedTemporaryFile() - r = utils.RouletteWheel(unique_mode=True, save_file=f.name) - entries = [ - ([1, 2, 3], 0.1, 'a'), - ([4, 5], 0.2, 'b'), - ([6], 0.3, 'c'), - ([7, 8, 9, 10], 0.25, 'd'), - ([-1, -2], 0.15, 'e'), - ([-3, -4, -5], 0.5, 'f')] - - self.assertTrue(r.is_empty()) - for i in range(0, len(entries), 2): - r.add(*entries[i]) - r.add(*entries[i + 1]) - r.incremental_save() - - r2 = utils.RouletteWheel(unique_mode=True, save_file=f.name) - self.assertEqual(i + 2, len(r2)) - count = 0 - for j, (obj, weight) in enumerate(r2): - self.assertEqual(entries[j][0], obj) - self.assertEqual(entries[j][1], weight) - self.assertEqual(weight, r2.get_weight(entries[j][2])) - count += 1 - self.assertEqual(i + 2, count) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/single_task/BUILD b/research/brain_coder/single_task/BUILD deleted file mode 100644 index 47e91b12b8ba40a2a9916a89375fbb773758d7cf..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/BUILD +++ /dev/null @@ -1,244 +0,0 @@ -licenses(["notice"]) - -package(default_visibility = [ - "//learning/brain/research/neural_coder:__subpackages__", -]) - -load("@subpar//:subpar.bzl", "par_binary") - -par_binary( - name = "run", - srcs = ["run.py"], - deps = [ - ":defaults", - ":ga_train", - ":pg_train", - # absl dep :app - # absl dep /flags - # absl dep /logging - ], -) - -par_binary( - name = "tune", - srcs = ["tune.py"], - deps = [ - ":defaults", - ":run", - # file dep - # absl dep :app - # absl dep /flags - # absl dep /logging - # numpy dep - # tensorflow dep - ], -) - -py_library( - name = "ga_train", - srcs = ["ga_train.py"], - deps = [ - ":data", - ":defaults", - ":ga_lib", - ":results_lib", - # file dep - # absl dep /flags - # absl dep /logging - # numpy dep - # tensorflow dep - "//common:utils", # project - ], -) - -py_library( - name = "ga_lib", - srcs = ["ga_lib.py"], - deps = [ - ":misc", - # absl dep /flags - # absl dep /logging - # numpy dep - "//common:bf", # project - "//common:utils", # project - ], -) - -py_test( - name = "ga_train_test", - srcs = ["ga_train_test.py"], - deps = [ - ":defaults", - ":run", - # absl dep /flags - # tensorflow dep - ], -) - -py_library( - name = "pg_train", - srcs = ["pg_train.py"], - deps = [ - ":data", - ":defaults", - ":pg_agent", - ":results_lib", - # file dep - # absl dep /flags - # absl dep /logging - # tensorflow dep - # tensorflow internal dep # build_cleaner: keep - ], -) - -py_library( - name = "pg_agent", - srcs = ["pg_agent.py"], - deps = [ - ":misc", - # file dep - # absl dep /logging - # numpy dep - # tensorflow dep - "//common:rollout", # project - "//common:utils", # project - ], -) - -py_test( - name = "pg_agent_test", - srcs = ["pg_agent_test.py"], - deps = [ - ":data", - ":defaults", - ":misc", - ":pg_agent", - ":pg_train", - # absl dep /logging - # numpy dep - # tensorflow dep - "//common:utils", # project - ], -) - -py_library( - name = "defaults", - srcs = ["defaults.py"], - deps = [ - # absl dep /logging - "//common:config_lib", # project - ], -) - -py_library( - name = "misc", - srcs = ["misc.py"], -) - -py_library( - name = "data", - srcs = ["data.py"], - deps = [ - ":code_tasks", - # absl dep /logging - ], -) - -py_library( - name = "code_tasks", - srcs = ["code_tasks.py"], - deps = [ - ":misc", - ":test_tasks", - # absl dep /logging - # numpy dep - "//common:bf", # project - "//common:reward", # project - ], -) - -py_test( - name = "code_tasks_test", - srcs = ["code_tasks_test.py"], - deps = [ - ":code_tasks", - ":defaults", - # numpy dep - # tensorflow dep - ], -) - -py_library( - name = "test_tasks", - srcs = ["test_tasks.py"], - deps = [ - ":misc", - "//common:reward", # project - ], -) - -py_test( - name = "test_tasks_test", - srcs = ["test_tasks_test.py"], - deps = [ - ":misc", - ":test_tasks", - # numpy dep - # tensorflow dep - ], -) - -py_test( - name = "pg_train_test", - size = "large", - srcs = ["pg_train_test.py"], - deps = [ - ":defaults", - ":run", - # absl dep /logging - # tensorflow dep - ], -) - -py_library( - name = "results_lib", - srcs = ["results_lib.py"], - deps = [ - # file dep - # tensorflow dep - ], -) - -py_test( - name = "results_lib_test", - srcs = ["results_lib_test.py"], - deps = [ - ":results_lib", - # tensorflow dep - ], -) - -par_binary( - name = "aggregate_experiment_results", - srcs = ["aggregate_experiment_results.py"], - deps = [ - ":misc", - ":results_lib", - # file dep - # absl dep :app - # absl dep /flags - # numpy dep - # tensorflow dep - ], -) - -par_binary( - name = "aggregate_tuning_results", - srcs = ["aggregate_tuning_results.py"], - deps = [ - # file dep - # absl dep :app - # absl dep /flags - # tensorflow dep - ], -) diff --git a/research/brain_coder/single_task/README.md b/research/brain_coder/single_task/README.md deleted file mode 100644 index 69eaabcc6ccabada838a0a2a3f12fd7eed69744c..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/README.md +++ /dev/null @@ -1,192 +0,0 @@ -# Experiments for ICLR 2018 paper. - -[Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526). - -Runs policy gradient (REINFORCE), priority queue training, genetic algorithm, -and uniform random search. - -Run all examples below out of your top-level repo directory, i.e. where your git -clone resides. - - -## Just tell me how to run something and see results -```bash -# These tasks are the fastest to learn. 'echo' and 'count-down' are very -# easy. run_eval_tasks.py will do most of the work to run all the jobs. -# Should take between 10 and 30 minutes. - -# How many repetitions each experiment will run. In the paper, we use 25. Less -# reps means faster experiments, but noisier results. -REPS=25 - -# Extra description in the job names for these experiments. Use this description -# to distinguish between multiple runs of the same experiment. -DESC="demo" - -# The tasks to run. -TASKS="reverse echo-second-seq" - -# The model types and max NPE. -EXPS=( pg-20M topk-20M ga-20M rand-20M ) - -# Where training data is saved. This is chosen by launch_training.sh. Custom -# implementations of launch_training.sh may use different locations. -MODELS_DIR="/tmp/models" - -# Run run_eval_tasks.py for each experiment name in EXPS. -for exp in "${EXPS[@]}" -do - ./single_task/run_eval_tasks.py \ - --exp "$exp" --tasks $TASKS --desc "$DESC" --reps $REPS -done - -# During training or after completion, run this to aggregate results into a -# table. This is also useful for seeing how much progress has been made. -# Make sure the arguments here match the settings used above. -# Note: This can take a few minutes because it reads from every experiment -# directory. -bazel run single_task:aggregate_experiment_results -- \ - --models_dir="$MODELS_DIR" \ - --max_npe="20M" \ - --task_list="$TASKS" \ - --model_types="[('pg', '$DESC'), ('topk', '$DESC'), ('ga', '$DESC'), - ('rand', '$DESC')]" \ - --csv_file="/tmp/results_table.csv" -``` - - -## Reproduce tuning results in paper -```bash -bazel build -c opt single_task:tune.par - -# PG and TopK Tuning. -MAX_NPE=5000000 -CONFIG=" -env=c(task_cycle=['reverse-tune','remove-tune']), -agent=c( - algorithm='pg', - grad_clip_threshold=50.0,param_init_factor=0.5,entropy_beta=0.05,lr=1e-5, - optimizer='rmsprop',ema_baseline_decay=0.99,topk_loss_hparam=0.0,topk=0, - replay_temperature=1.0,alpha=0.0,eos_token=False), -timestep_limit=50,batch_size=64" - -./single_task/launch_tuning.sh \ - --job_name="iclr_pg_gridsearch.reverse-remove" \ - --config="$CONFIG" \ - --max_npe="$MAX_NPE" \ - --num_workers_per_tuner=1 \ - --num_ps_per_tuner=0 \ - --num_tuners=1 \ - --num_repetitions=50 \ - --hparam_space_type="pg" \ - --stop_on_success=true -./single_task/launch_tuning.sh \ - --job_name="iclr_pg_topk_gridsearch.reverse-remove" \ - --config="$CONFIG" \ - --max_npe="$MAX_NPE" \ - --num_workers_per_tuner=1 \ - --num_ps_per_tuner=0 \ - --num_tuners=1 \ - --num_repetitions=50 \ - --hparam_space_type="pg-topk" \ - --fixed_hparams="topk=10" \ - --stop_on_success=true -./single_task/launch_tuning.sh \ - --job_name="iclr_topk_gridsearch.reverse-remove" \ - --config="$CONFIG" \ - --max_npe="$MAX_NPE" \ - --num_workers_per_tuner=1 \ - --num_ps_per_tuner=0 \ - --num_tuners=1 \ - --num_repetitions=50 \ - --hparam_space_type="topk" \ - --fixed_hparams="topk=10" \ - --stop_on_success=true - -# GA Tuning. -CONFIG=" -env=c(task_cycle=['reverse-tune','remove-char-tune']), -agent=c(algorithm='ga'), -timestep_limit=50" -./single_task/launch_tuning.sh \ - --job_name="iclr_ga_gridsearch.reverse-remove" \ - --config="$CONFIG" \ - --max_npe="$MAX_NPE" \ - --num_workers_per_tuner=25 \ - --num_ps_per_tuner=0 \ - --num_tuners=1 \ - --num_repetitions=50 \ - --hparam_space_type="ga" \ - --stop_on_success=true - -# Aggregate tuning results. Run after tuning jobs complete. -bazel run -c opt single_task:aggregate_tuning_results -- \ - --tuning_dir="$MODELS_DIR/iclr_pg_gridsearch.reverse-remove" -bazel run -c opt single_task:aggregate_tuning_results -- \ - --tuning_dir="$MODELS_DIR/iclr_pg_topk_gridsearch.reverse-remove" -bazel run -c opt single_task:aggregate_tuning_results -- \ - --tuning_dir="$MODELS_DIR/iclr_topk_gridsearch.reverse-remove" -bazel run -c opt single_task:aggregate_tuning_results -- \ - --tuning_dir="$MODELS_DIR/iclr_ga_gridsearch.reverse-remove" -``` - -## Reproduce eval results in paper -```bash -DESC="v0" # Description for each experiment. "Version 0" is a good default. -EXPS=( pg-5M topk-5M ga-5M rand-5M pg-20M topk-20M ga-20M rand-20M ) -for exp in "${EXPS[@]}" -do - ./single_task/run_eval_tasks.py \ - --exp "$exp" --iclr_tasks --desc "$DESC" -done -``` - -## Run single experiment -```bash -EXP="topk-20M" # Learning algorithm + max-NPE -TASK="reverse" # Coding task -DESC="v0" # Description for each experiment. "Version 0" is a good default. -./single_task/run_eval_tasks.py \ - --exp "$EXP" --task "$TASK" --desc "$DESC" -``` - -## Fetch eval results into a table -```bash -# These arguments should match the settings you used to run the experiments. -MODELS_DIR="/tmp/models" -MAX_NPE="20M" -DESC="v0" # Same description used in the experiments. -# MODEL_TYPES specifies each model type and the description used in their -# experiments. -MODEL_TYPES="[('pg', '$DESC'), ('topk', '$DESC'), - ('ga', '$DESC'), ('rand', '$DESC')]" -TASKS="" # Empty string will default to all ICLR tasks. -# To specify custom task list, give task names separated by spaces. Example: -# TASKS="reverse remove-char" -bazel run single_task:aggregate_experiment_results -- \ - --models_dir="$MODELS_DIR" \ - --max_npe="$MAX_NPE" \ - --task_list="$TASKS" \ - --model_types="$MODEL_TYPES" \ - --csv_file="/tmp/results_table.csv" -``` - -## Reproduce shortest code examples in paper -```bash -# Maximum NPE is higher here. We only do 1 repetition, and the algorithm needs -# time to simplify its solution. -MODELS_DIR="/tmp/models" -NPE="500M" -DESC="short-code" -./single_task/run_eval_tasks.py \ - --exp "simpl-$NPE" --desc "$DESC" --iclr_tasks --reps 1 - -# Aggregate best code strings. Run after training completes. -TASKS="" # Empty string. Will default to all ICLR tasks. -bazel run single_task:aggregate_experiment_results -- \ - --models_dir="$MODELS_DIR" \ - --max_npe="$NPE" \ - --task_list="$TASKS" \ - --model_types="[('topk', '$DESC')]" \ - --data=code -``` diff --git a/research/brain_coder/single_task/aggregate_experiment_results.py b/research/brain_coder/single_task/aggregate_experiment_results.py deleted file mode 100644 index f106253004b3bbe1ff32443c41b8999b1c9e96f6..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/aggregate_experiment_results.py +++ /dev/null @@ -1,380 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -r"""This script crawls experiment directories for results and aggregates them. - -Usage example: - -MODELS_DIR="/tmp/models" -bazel run single_task:aggregate_experiment_results -- \ - --models_dir="$MODELS_DIR" \ - --max_npe="20M" \ - --task_list="add echo" \ - --model_types="[('topk', 'v0'), ('ga', 'v0')]" \ - --csv_file=/tmp/results_table.csv -""" - -import ast -from collections import namedtuple -import csv -import os -import re -import StringIO -import sys - -from absl import app -from absl import flags -import numpy as np -import tensorflow as tf - -from single_task import misc # brain coder -from single_task import results_lib # brain coder - -DEFAULT_MODELS = [('pg', 'v0'), ('topk', 'v0'), ('ga', 'v0'), ('rand', 'v0')] -DEFAULT_TASKS = [ - 'reverse', 'remove-char', 'count-char', 'add', 'bool-logic', 'print-hello', - 'echo-twice', 'echo-thrice', 'copy-reverse', 'zero-cascade', 'cascade', - 'shift-left', 'shift-right', 'riffle', 'unriffle', 'middle-char', - 'remove-last', 'remove-last-two', 'echo-alternating', 'echo-half', 'length', - 'echo-second-seq', 'echo-nth-seq', 'substring', 'divide-2', 'dedup'] - -FLAGS = flags.FLAGS -flags.DEFINE_string( - 'models_dir', '', - 'Absolute path where results folders are found.') -flags.DEFINE_string( - 'exp_prefix', 'bf_rl_iclr', - 'Prefix for all experiment folders.') -flags.DEFINE_string( - 'max_npe', '5M', - 'String representation of max NPE of the experiments.') -flags.DEFINE_spaceseplist( - 'task_list', DEFAULT_TASKS, - 'List of task names separated by spaces. If empty string, defaults to ' - '`DEFAULT_TASKS`. These are the rows of the results table.') -flags.DEFINE_string( - 'model_types', str(DEFAULT_MODELS), - 'String representation of a python list of 2-tuples, each a model_type + ' - 'job description pair. Descriptions allow you to choose among different ' - 'runs of the same experiment. These are the columns of the results table.') -flags.DEFINE_string( - 'csv_file', '/tmp/results_table.csv', - 'Where to write results table. Format is CSV.') -flags.DEFINE_enum( - 'data', 'success_rates', ['success_rates', 'code'], - 'What type of data to aggregate.') - - -def make_csv_string(table): - """Convert 2D list to CSV string.""" - s = StringIO.StringIO() - writer = csv.writer(s) - writer.writerows(table) - value = s.getvalue() - s.close() - return value - - -def process_results(metrics): - """Extract useful information from given metrics. - - Args: - metrics: List of results dicts. These should have been written to disk by - training jobs. - - Returns: - Dict mapping stats names to values. - - Raises: - ValueError: If max_npe or max_global_repetitions values are inconsistant - across dicts in the `metrics` list. - """ - count = len(metrics) - success_count = 0 - total_npe = 0 # Counting NPE across all runs. - success_npe = 0 # Counting NPE in successful runs only. - max_npe = 0 - max_repetitions = 0 - for metric_dict in metrics: - if not max_npe: - max_npe = metric_dict['max_npe'] - elif max_npe != metric_dict['max_npe']: - raise ValueError( - 'Invalid experiment. Different reps have different max-NPE settings.') - if not max_repetitions: - max_repetitions = metric_dict['max_global_repetitions'] - elif max_repetitions != metric_dict['max_global_repetitions']: - raise ValueError( - 'Invalid experiment. Different reps have different num-repetition ' - 'settings.') - if metric_dict['found_solution']: - success_count += 1 - success_npe += metric_dict['npe'] - total_npe += metric_dict['npe'] - stats = {} - stats['max_npe'] = max_npe - stats['max_repetitions'] = max_repetitions - stats['repetitions'] = count - stats['successes'] = success_count # successful reps - stats['failures'] = count - success_count # failed reps - stats['success_npe'] = success_npe - stats['total_npe'] = total_npe - if success_count: - # Only successful runs counted. - stats['avg_success_npe'] = stats['success_npe'] / float(success_count) - else: - stats['avg_success_npe'] = 0.0 - if count: - stats['success_rate'] = success_count / float(count) - stats['avg_total_npe'] = stats['total_npe'] / float(count) - else: - stats['success_rate'] = 0.0 - stats['avg_total_npe'] = 0.0 - - return stats - - -ProcessedResults = namedtuple('ProcessedResults', ['metrics', 'processed']) - - -def get_results_for_experiment( - models_dir, task_name, model_type='pg', max_npe='5M', desc='v0', - name_prefix='bf_rl_paper', extra_desc=''): - """Get and process results for a given experiment. - - An experiment is a set of runs with the same hyperparameters and environment. - It is uniquely specified by a (task_name, model_type, max_npe) triple, as - well as an optional description. - - We assume that each experiment has a folder with the same name as the job that - ran the experiment. The name is computed by - "%name_prefix%.%desc%-%max_npe%_%task_name%". - - Args: - models_dir: Parent directory containing experiment folders. - task_name: String name of task (the coding env). See code_tasks.py or - run_eval_tasks.py - model_type: Name of the algorithm, such as 'pg', 'topk', 'ga', 'rand'. - max_npe: String SI unit representation of the maximum NPE threshold for the - experiment. For example, "5M" means 5 million. - desc: Description. - name_prefix: Prefix of job names. Normally leave this as default. - extra_desc: Optional extra description at the end of the job name. - - Returns: - ProcessedResults namedtuple instance, containing - metrics: Raw dicts read from disk. - processed: Stats computed by `process_results`. - - Raises: - ValueError: If max_npe in the metrics does not match NPE in the experiment - folder name. - """ - folder = name_prefix + '.{0}.{1}-{2}_{3}'.format(desc, model_type, max_npe, - task_name) - if extra_desc: - folder += '.' + extra_desc - - results = results_lib.Results(os.path.join(models_dir, folder)) - metrics, _ = results.read_all() - processed = process_results(metrics) - if (not np.isclose(processed['max_npe'], misc.si_to_int(max_npe)) - and processed['repetitions']): - raise ValueError( - 'Invalid experiment. Max-NPE setting does not match expected max-NPE ' - 'in experiment name.') - return ProcessedResults(metrics=metrics, processed=processed) - - -BestCodeResults = namedtuple( - 'BestCodeResults', - ['code', 'reward', 'npe', 'folder', 'finished', 'error']) - - -class BestCodeResultError(object): - success = 0 - no_solution_found = 1 - experiment_does_not_exist = 2 - - -def get_best_code_for_experiment( - models_dir, task_name, model_type='pg', max_npe='5M', desc=0, - name_prefix='bf_rl_paper', extra_desc=''): - """Like `get_results_for_experiment`, but fetches the code solutions.""" - folder = name_prefix + '.{0}.{1}-{2}_{3}'.format(desc, model_type, max_npe, - task_name) - if extra_desc: - folder += '.' + extra_desc - - log_dir = os.path.join(models_dir, folder, 'logs') - search_regex = r'^solutions_([0-9])+\.txt$' - try: - all_children = tf.gfile.ListDirectory(log_dir) - except tf.errors.NotFoundError: - return BestCodeResults( - code=None, reward=0.0, npe=0, folder=folder, finished=False, - error=BestCodeResultError.experiment_does_not_exist) - solution_files = [ - fname for fname in all_children if re.search(search_regex, fname)] - max_reward = 0.0 - npe = 0 - best_code = None - for fname in solution_files: - with tf.gfile.FastGFile(os.path.join(log_dir, fname), 'r') as reader: - results = [ast.literal_eval(entry) for entry in reader] - for res in results: - if res['reward'] > max_reward: - best_code = res['code'] - max_reward = res['reward'] - npe = res['npe'] - error = ( - BestCodeResultError.success if best_code - else BestCodeResultError.no_solution_found) - try: - # If there is a status.txt file, check if it contains the status of the job. - with tf.gfile.FastGFile(os.path.join(log_dir, 'status.txt'), 'r') as f: - # Job is done, so mark this experiment as finished. - finished = f.read().lower().strip() == 'done' - except tf.errors.NotFoundError: - # No status file has been written, so the experiment is not done. No need to - # report an error here, because we do not require that experiment jobs write - # out a status.txt file until they have finished. - finished = False - return BestCodeResults( - code=best_code, reward=max_reward, npe=npe, folder=folder, - finished=finished, error=error) - - -def make_results_table( - models=None, - tasks=None, - max_npe='5M', - name_prefix='bf_rl_paper', - extra_desc='', - models_dir='/tmp'): - """Creates a table of results: algorithm + version by tasks. - - Args: - models: The table columns. A list of (algorithm, desc) tuples. - tasks: The table rows. List of task names. - max_npe: String SI unit representation of the maximum NPE threshold for the - experiment. For example, "5M" means 5 million. All entries in the table - share the same max-NPE. - name_prefix: Name prefix used in logging directory for the experiment. - extra_desc: Extra description added to name of logging directory for the - experiment. - models_dir: Parent directory containing all experiment folders. - - Returns: - A 2D list holding the table cells. - """ - if models is None: - models = DEFAULT_MODELS - if tasks is None: - tasks = DEFAULT_TASKS - model_results = {} - for model_type, desc in models: - model_results[model_type] = { - tname: get_results_for_experiment( - models_dir, tname, model_type, max_npe, desc, - name_prefix=name_prefix, extra_desc=extra_desc - ).processed - for tname in tasks} - - def info(stats): - return [str(stats['repetitions']), - '%.2f' % stats['success_rate'], - str(int(stats['avg_total_npe']))] - - rows = [['max NPE: ' + max_npe] - + misc.flatten([['{0} ({1})'.format(m, d), '', ''] - for m, d in models])] - rows.append( - [''] + misc.flatten([['reps', 'success rate', 'avg NPE'] - for _ in models])) - for tname in tasks: - rows.append( - [tname] - + misc.flatten([info(model_results[model][tname]) - for model, _ in models])) - - return rows - - -def print_results_table(results_table): - """Print human readable results table to stdout.""" - print('') - print('=== Results Table ===') - print('Format: # reps [success rate, avg total NPE]') - - def info_str(info_row): - # num_runs (success_rate, avg_total_npe) - if not info_row[0]: - return '0' - return '%s [%s, %s]' % (str(info_row[0]).ljust(2), info_row[1], info_row[2]) - - nc = len(results_table[0]) # num cols - out_table = [ - [results_table[0][0]] + [results_table[0][i] for i in range(1, nc, 3)]] - for row in results_table[2:]: - out_table.append([row[0]] + [info_str(row[i:i+3]) for i in range(1, nc, 3)]) - - nc = len(out_table[0]) # num cols - col_widths = [max(len(row[col]) for row in out_table) for col in range(nc)] - - table_string = '' - for row in out_table: - table_string += ''.join( - [row[c].ljust(col_widths[c] + 2) for c in range(nc)]) + '\n' - - print(table_string) - - -def main(argv): - del argv # Unused. - - name_prefix = FLAGS.exp_prefix - print('Experiments prefix: %s' % name_prefix) - - model_types = ast.literal_eval(FLAGS.model_types) - - if FLAGS.data == 'success_rates': - results_table = make_results_table( - models=model_types, tasks=FLAGS.task_list, max_npe=FLAGS.max_npe, - models_dir=FLAGS.models_dir, - name_prefix=name_prefix, extra_desc='') - with tf.gfile.FastGFile(FLAGS.csv_file, 'w') as f: - f.write(make_csv_string(results_table)) - - print_results_table(results_table) - else: - # Best code - print('* = experiment is still running') - print('') - print('=== Best Synthesized Code ===') - for model_type, desc in model_types: - print('%s (%s)' % (model_type, desc)) - sys.stdout.flush() - for tname in FLAGS.task_list: - res = get_best_code_for_experiment( - FLAGS.models_dir, tname, model_type, FLAGS.max_npe, desc, - name_prefix=name_prefix, extra_desc='') - unfinished_mark = '' if res.finished else ' *' - tname += unfinished_mark - if res.error == BestCodeResultError.success: - print(' %s' % tname) - print(' %s' % res.code) - print(' R=%.6f, NPE=%s' % (res.reward, misc.int_to_si(res.npe))) - elif res.error == BestCodeResultError.experiment_does_not_exist: - print(' Experiment does not exist. Check arguments.') - print(' Experiment folder: %s' % res.folder) - break - else: - print(' %s' % tname) - print(' (none)') - sys.stdout.flush() - - -if __name__ == '__main__': - app.run(main) diff --git a/research/brain_coder/single_task/aggregate_tuning_results.py b/research/brain_coder/single_task/aggregate_tuning_results.py deleted file mode 100644 index bb2e008ce583afbea8acabfe1ed8ccf264698f5e..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/aggregate_tuning_results.py +++ /dev/null @@ -1,71 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -r"""After running tuning, use this script to aggregate the results. - -Usage: - -OUT_DIR="" -bazel run -c opt single_task:aggregate_tuning_results -- \ - --alsologtostderr \ - --tuning_dir="$OUT_DIR" -""" - -import ast -import os - -from absl import app -from absl import flags -import tensorflow as tf - - -FLAGS = flags.FLAGS -flags.DEFINE_string( - 'tuning_dir', '', - 'Absolute path where results tuning trial folders are found.') - - -def main(argv): - del argv # Unused. - - try: - trial_dirs = tf.gfile.ListDirectory(FLAGS.tuning_dir) - except tf.errors.NotFoundError: - print('Tuning directory %s does not exist.' % (FLAGS.tuning_dir,)) - return - - metrics = [] - for trial_dir in trial_dirs: - tuning_results_file = os.path.join( - FLAGS.tuning_dir, trial_dir, 'tuning_results.txt') - if tf.gfile.Exists(tuning_results_file): - with tf.gfile.FastGFile(tuning_results_file, 'r') as reader: - for line in reader: - metrics.append(ast.literal_eval(line.replace(': nan,', ': 0.0,'))) - - if not metrics: - print('No trials found.') - return - - num_trials = [m['num_trials'] for m in metrics] - assert all(n == num_trials[0] for n in num_trials) - num_trials = num_trials[0] - print('Found %d completed trials out of %d' % (len(metrics), num_trials)) - - # Sort by objective descending. - sorted_trials = sorted(metrics, key=lambda m: -m['objective']) - - for i, metrics in enumerate(sorted_trials): - hparams = metrics['hparams'] - keys = sorted(hparams.keys()) - print( - str(i).ljust(4) + ': ' - + '{0:.2f}'.format(metrics['objective']).ljust(10) - + '[' - + ','.join(['{}={}'.format(k, hparams[k]).ljust(24) for k in keys]) - + ']') - - -if __name__ == '__main__': - app.run(main) diff --git a/research/brain_coder/single_task/code_tasks.py b/research/brain_coder/single_task/code_tasks.py deleted file mode 100644 index 27cc7ecd1c76f2d765692ce0a94acd1df04ff681..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/code_tasks.py +++ /dev/null @@ -1,1381 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tasks for RL.""" - -import abc -import copy -import itertools -import random - -from absl import logging -import numpy as np -from six.moves import xrange - -from common import bf # brain coder -from common import reward as r # brain coder -from single_task import misc # brain coder -from single_task import test_tasks # brain coder - - -MAX_EXECUTION_STEPS = 5000 - - -def make_task(task_name, override_kwargs=None, max_code_length=100, - require_correct_syntax=False, - do_code_simplification=False, - correct_bonus=2.0, code_length_bonus=1.0): - """Make tasks with setting from paper.""" - logging.info('Making paper-config task.') - n = 16 # Number of test cases. - task_mapping = { - 'print-hello': ( - PrintTask, dict(base=27, fixed_string=[8, 5, 12, 12, 15])), - 'print': (PrintIntTask, dict(base=256, fixed_string=[1, 2, 3, 4, 5])), - 'echo': (EchoTask, dict(base=27, min_length=1, max_length=6)), - 'remove-char': ( - RemoveCharTask, dict(base=256, n=n, min_len=1, max_len=6)), - 'reverse': ( - ReverseTask, dict(base=256, n=n, min_len=1, max_len=6)), - 'reverse-tune': ( - ReverseTaskV2, dict(base=256, reward_type='static-bylen')), - 'remove-char-tune': (RemoveCharTaskV2, dict(base=27)), - 'prefix': (CommonPrefixTask, dict(base=27)), - 'find': (FindSubStrTask, dict(base=27)), - 'sort3': (SortFixedTaskV2, dict(base=27, n=150, length=3)), - 'count-char': (CountCharTaskV2, dict(n=n, max_len=6)), - 'bool-logic': (BooleanLogicTask, dict()), - 'add': (AddTask, dict(n=9)), - 'echo-twice': (EchoTwiceTask, dict(n=n)), - 'echo-thrice': (EchoThriceTask, dict(n=n)), - 'copy-reverse': (CopyReverseTask, dict(n=n)), - 'zero-cascade': (EchoZeroCascadeTask, dict(n=n)), - 'cascade': (EchoCascadeTask, dict(n=n)), - 'shift-left': (ShiftLeftTask, dict(n=n)), - 'shift-right': (ShiftRightTask, dict(n=n)), - 'riffle': (RiffleTask, dict(n=n)), - 'unriffle': (UnriffleTask, dict(n=n)), - 'middle-char': (MiddleCharTask, dict(n=n)), - 'remove-last': (RemoveLastTask, dict(n=n)), - 'remove-last-two': (RemoveLastTwoTask, dict(n=n)), - 'echo-alternating': (EchoAlternatingTask, dict(n=n)), - 'echo-half': (EchoHalfTask, dict(n=n)), - 'length': (LengthTask, dict(n=n)), - 'echo-second-seq': (EchoSecondSequenceTask, dict(n=n)), - 'echo-nth-seq': (EchoNthSequenceTask, dict(n=n)), - 'substring': (SubstringTask, dict(n=n)), - 'divide-2': (Divide2Task, dict(n=n)), - 'dedup': (DedupTask, dict(n=n)), - 'remove-target-char': (RemoveTargetCharTask, dict(n=n)), - 'list-index': (ListIndexTask, dict(n=n)), - 'fib': (FibonacciTask, dict()), - 'count-down': (BottlesOfBeerTask, dict()), - 'split': (SplitTask, dict()), - 'trim-left': (TrimLeftTask, dict()), - 'circle-route': ( - JudgeRouteCircleTask, dict(n=100, max_len=32)), - 'multiply': (MultiplyTask, dict(n=100)), - 'divmod': (DivModTask, dict(n=100)), - } - - if task_name not in task_mapping: - # Test tasks. - if task_name == 'test-hill-climb': - return test_tasks.BasicTaskManager(test_tasks.HillClimbingTask()) - raise ValueError('Unknown task type "%s"' % task_name) - task_cls, kwargs = task_mapping[task_name] - - if override_kwargs: - if not isinstance(override_kwargs, dict): - raise ValueError( - 'override_kwargs must be a dict, got: %s', override_kwargs) - kwargs.update(override_kwargs) - - task = task_cls(**kwargs) - - reward_fn = r.absolute_distance_reward - # reward_fn = r.absolute_mod_distance_reward - # reward_fn = r.absolute_log_distance_reward - logging.info('Using reward function: %s', reward_fn.__name__) - - # We want reward with and without code simplification to be scaled the same - # way. Without code simplification, give the maximum code length bonus - # every time. - min_code_length = 0.0 if do_code_simplification else max_code_length - - return MultiIOTaskManager( - task=task, correct_bonus=correct_bonus, - code_length_bonus=code_length_bonus, - max_code_length=max_code_length, min_code_length=min_code_length, - reward_fn=reward_fn, require_correct_syntax=require_correct_syntax) - - -def concat(lists): - if not lists: - return [] - l = lists[0] - for k in lists[1:]: - l += k - return l - - -def concat_join(lists, sep): - if not lists: - return [] - l = lists[0] - for k in lists[1:]: - l += [sep] + k - return l - - -def clipped_linear(x, x0, y0, slope, y_range): - min_y, max_y = y_range - return min(max(slope * (x - x0) + y0, min_y), max_y) - - -class MultiIOTaskManager(object): - """Supports tasks which test the code with multiple I/O examples.""" - - def __init__(self, task, max_code_length=32, min_code_length=0, - max_execution_steps=MAX_EXECUTION_STEPS, correct_bonus=1.0, - code_length_bonus=1.0, failure_reward=-2.0, reward_fn=None, - require_correct_syntax=False): - assert isinstance(task, BaseTask) - self.task = task - self.max_code_length = max_code_length - self.min_code_length = min_code_length - self.max_execution_steps = max_execution_steps - self.require_correct_syntax = require_correct_syntax - self.correct_bonus = correct_bonus - self.code_length_bonus = code_length_bonus - self.failure_reward = failure_reward - self.time_penalty = ( - 1.0 / (max_code_length - min_code_length) - if max_code_length > min_code_length else 0.0) - if reward_fn is None: - self.reward_fn = r.absolute_distance_reward - else: - self.reward_fn = reward_fn - self.input_type = ( - task.input_type if hasattr(task, 'input_type') else misc.IOType.integer) - self.output_type = ( - task.output_type if hasattr(task, 'output_type') - else misc.IOType.integer) - self._compute_best_reward() - - def _compute_best_reward(self): - io_seqs = self.task.make_io_set() - reward = 0.0 - for _, output_seq in io_seqs: - reward += self.reward_fn(output_seq, output_seq, self.task.base) - reward += self.correct_bonus - reward += self.code_length_bonus # Bonus for shortest code. - self.best_reward = reward - self.good_reward = 0.75 * reward - logging.info('Known best reward: %.4f', self.best_reward) - - def _score_batch(self, code_strings): - return [self._score_code(code) for code in code_strings] - - def _score_code(self, code): - """Run test cases on code and compute reward. - - Args: - code: A single BF code string. - - Returns: - misc.RewardInfo namedtuple instance containing reward and code execution - information, including inputs, expected outputs, code outputs, input - and output types, and reason for the reward obtained. - """ - # Get list of 2-tuples, each containing an input sequence and an output - # sequence. - io_seqs = self.task.make_io_set() - terminal_reward = 0.0 - results = [] - reason = 'correct' - for input_seq, output_seq in io_seqs: - eval_result = bf.evaluate( - code, input_buffer=input_seq, timeout=0.1, - max_steps=self.max_execution_steps, - base=self.task.base, - require_correct_syntax=self.require_correct_syntax) - result, success = eval_result.output, eval_result.success - if not success: - # Code execution timed out. - terminal_reward = self.failure_reward - results = [] - reason = eval_result.failure_reason - break - else: - terminal_reward += self.reward_fn(result, output_seq, self.task.base) - if result == output_seq: - terminal_reward += self.correct_bonus # Bonus for correct answer. - - # Only add additional reward for shorter code. Subtracting reward - # interferes with the main objective. Only optimize for length once - # any solution is found. - if self.min_code_length == self.max_code_length: - terminal_reward += self.code_length_bonus - else: - terminal_reward += self.code_length_bonus * clipped_linear( - x=len(code), x0=self.min_code_length, y0=1.0, - slope=-self.time_penalty, y_range=(0.0, 1.0)) - - # reason remains 'correct' if it is already - elif reason == 'correct': - reason = 'wrong' - results.append(result) - - # Return list of rewards, one for each char in the code. All are 0 except - # for the terminal reward. - terminal_reward /= self.best_reward - return misc.RewardInfo( - episode_rewards=[0.0] * (len(code) - 1) + [terminal_reward], - input_case=misc.IOTuple(i for i, o in io_seqs), - correct_output=misc.IOTuple(o for i, o in io_seqs), - code_output=misc.IOTuple(results), - input_type=self.input_type, - output_type=self.output_type, - reason=reason) - - def rl_batch(self, batch_size): - """Produces list of reward functions. One for each program in the batch.""" - return [self._score_code] * batch_size - - -def conditional_overwrite(current_value, new_value, allowed_overwrite_values): - if current_value in allowed_overwrite_values: - return new_value - return current_value - - -class BaseTask(object): - """A coding task. - - All coding tasks should inherit this class. - """ - __metaclass__ = abc.ABCMeta - - def __init__(self, base=256): - self.base = base # All tasks must set the integer base that the expect. - - @abc.abstractmethod - def make_io_set(self): - """Generate a set of test cases for the task. - - Returns: - List of tuples, where each tuple is (input_case, output_case). - input_case and output_case are lists of integers. - """ - pass - - -# ============================================================================== -# ICLR tasks. -# ============================================================================== - - -class PrintTask(BaseTask): - """Print string coding task. - - Code needs to output a fixed string (given as a hyperparameter to the - task constructor). Program input is ignored. - """ - - def __init__(self, base, fixed_string=None): - super(type(self), self).__init__() - self.base = base # base includes EOS - self.eos = 0 - if fixed_string: - self.fixed_string = fixed_string - else: - self.fixed_string = [1, 2, 3, 0] # ABC - self.min_length = self.max_length = len(self.fixed_string) - - def make_io_set(self): - return [(list(), list(self.fixed_string))] - - -class RemoveCharTaskV2(BaseTask): - """Remove character coding task (version 2). - - Code needs to pipe input to output, but with all the 'A' (value 1) chars - removed. 'A' appears exactly once in each input. - - Test cases are hard-coded. - """ - - def __init__(self, base): - super(type(self), self).__init__() - self.base = base - self.eos = 0 - self.remove_char = 1 - assert base >= 27 - - def make_io_set(self): - rm = self.remove_char - return [ - ([rm, 0], [0]), - ([20, rm, 0], [20, 0]), - ([rm, 13, 0], [13, 0]), - ([6, rm, 17, 0], [6, 17, 0]), - ([rm, 11, 24, 0], [11, 24, 0]), - ([2, 16, 21, rm, 0], [2, 16, 21, 0]), - ([18, rm, 12, 26, 7, 0], [18, 12, 26, 7, 0]), - ([9, 10, 22, rm, 4, 0], [9, 10, 22, 4, 0])] - - -class RemoveCharTask(BaseTask): - """Remove character coding task. - - Code needs to pipe input to output, but with all the 'A' (value 1) chars - removed. 'A' appears at least once in each input. - - Test cases are dynamically generated, allowing for the number of test cases - to be a hyperparameter. - """ - - def __init__(self, base, n, min_len, max_len): - super(type(self), self).__init__() - self.base = base - self.eos = 0 - self.remove_char = 1 - assert base >= 27 - self._io_pairs = self._make_io_examples(n, min_len, max_len) - - def _make_io_examples(self, n, min_len, max_len): - """Generate test cases for the task.""" - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [] - for _ in xrange(n): - length = rand.randrange(min_len, max_len + 1) - rm_char_pos = rand.randrange(0, length) - input_seq = [rand.randrange(1, self.base) for _ in xrange(length)] - input_seq[rm_char_pos] = self.remove_char - output_seq = list(input_seq) - del output_seq[rm_char_pos] - output_seq.append(0) - io_examples.append((input_seq, output_seq)) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class ReverseTaskV2(BaseTask): - """Reverse string coding task (version 2). - - Code needs to pipe input to output, but in reverse order. - - Stochastic test case = new test case randomly generated for every run of - `make_io_set`, i.e. different test cases every time code is scored. - - Task supports different types of test cases: - rand-one: Code is scored on one stochastic test case. - rand-many: Code is scored on 5 stochastic test cases. - static-bylen: Code is scored on 5 static test cases. There is one test - case for string lengths 1 through 5. - rand-bylen: Code is scored on 5 stochastic test cases, where there is one - test case for string lengths 1 through 5. - """ - - def __init__(self, base, reward_type): - super(type(self), self).__init__() - self.base = base # base includes EOS - assert base >= 27 - self.eos = 0 - self.io_pair_fn = { - # One random example at a time. - 'rand-one': lambda: self._io_rand(1), - # K randomy examples at a time (any lengths). - 'rand-many': lambda: self._io_rand(5), - # Static examples, one for each length. - 'static-bylen': self._io_static_by_len, - # Random examples, one for each length. - 'rand-bylen': self._io_rand_by_len}[reward_type] - - def _make_io_examples(self, sequences): - outputs = [list(i) for i in sequences] - for o in outputs: - o.reverse() - o.append(0) - inputs = [i + [0] for i in sequences] - return zip(inputs, outputs) - - def _io_rand(self, k): - inputs = [(np.random.choice(26, random.randrange(1, 6)) + 1).tolist() - for _ in xrange(k)] - return self._make_io_examples(inputs) - - def _io_rand_by_len(self, k=5): - inputs = [(np.random.choice(26, length) + 1).tolist() - for length in xrange(1, k + 1)] - return self._make_io_examples(inputs) - - def _io_static_by_len(self): - return [ - ([7, 0], [7, 0]), - ([6, 2, 0], [2, 6, 0]), - ([5, 1, 10, 0], [10, 1, 5, 0]), - ([8, 6, 5, 15, 0], [15, 5, 6, 8, 0]), - ([10, 12, 5, 2, 7, 0], [7, 2, 5, 12, 10, 0])] - - def make_io_set(self): - return self.io_pair_fn() - - -class ReverseTask(BaseTask): - """Reverse string coding task. - - Code needs to pipe input to output, but in reverse order. - - Test cases are dynamically generated, allowing for the number of test cases - to be a hyperparameter. - """ - - def __init__(self, base, n, min_len, max_len): - super(type(self), self).__init__() - self.base = base # base includes EOS - assert base >= 27 - self.eos = 0 - self._io_pairs = self._make_io_examples(n, min_len, max_len) - - def _make_io_examples(self, n, min_len, max_len): - """Generate test cases for the task.""" - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [] - for _ in xrange(n): - length = rand.randrange(min_len, max_len + 1) - input_seq = [rand.randrange(1, self.base) for _ in xrange(length)] - output_seq = list(input_seq) - output_seq.reverse() - output_seq.append(0) - io_examples.append((input_seq, output_seq)) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class CommonPrefixTask(BaseTask): - """Common prefix coding task. - - Code needs to output the common prefix between two input lists. Input lists - are variable length, where each list ends with a 0. A common prefix is a - sequence which both lists start with. - """ - - def __init__(self, base): - super(type(self), self).__init__() - assert base >= 27 - self.base = base - self.eos = 0 - - def make_io_set(self): - return [ - ([12, 24, 18, 0, 12, 5, 0], [12, 0]), - ([1, 2, 3, 0, 1, 2, 17, 14, 0], [1, 2, 0]), - ([15, 2, 1, 9, 2, 0, 15, 2, 1, 25, 8, 14, 0], [15, 2, 1, 0]), - ([14, 9, 7, 8, 6, 16, 0, 14, 9, 7, 8, 8, 6, 8, 26, 0], - [14, 9, 7, 8, 0]), - ([12, 4, 16, 22, 1, 17, 0, 12, 4, 16, 22, 1, 8, 10, 0], - [12, 4, 16, 22, 1, 0])] - - -class CountCharTask(BaseTask): - - def __init__(self): - super(type(self), self).__init__() - self.base = 27 - self.eos = 0 - self.char = 1 - self.input_type = misc.IOType.string - self.output_type = misc.IOType.integer - - def make_io_set(self): - return [ - ([10, 0], [0]), - ([1, 0], [1]), - ([1, 1, 0], [2]), - ([11, 1, 0], [1]), - ([1, 24, 0], [1]), - ([13, 6, 0], [0]), - ([9, 2, 7, 0], [0]), - ([1, 24, 11, 0], [1]), - ([19, 1, 1, 0], [2]), - ([1, 6, 1, 0], [2]), - ([22, 16, 17, 9, 0], [0]), - ([1, 1, 1, 19, 0], [3]), - ([1, 1, 1, 1, 0], [4]), - ([9, 4, 19, 11, 5, 0], [0]), - ([24, 11, 26, 1, 15, 0], [1]), - ([1, 1, 20, 1, 1, 0], [4]), - ([1, 1, 1, 1, 1, 0], [5])] - - -class CountCharTaskV2(BaseTask): - """Count char coding task (version 2). - - Code must output the number of occurances of character 'A' (value 1) in an - input string. - - Test cases are dynamically generated, allowing for the number of test cases - to be a hyperparameter. - """ - - def __init__(self, n, max_len): - super(type(self), self).__init__() - self.base = 27 - self.eos = 0 - self.char = 1 - self.other_chars = [c for c in xrange(self.base) - if c not in (self.eos, self.char)] - self.input_type = misc.IOType.string - self.output_type = misc.IOType.integer - self._io_pairs = self._make_io_examples(n, max_len) - - def _make_io_examples(self, n, max_len): - """Generate test cases for the task.""" - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [] - io_examples.append(([10, 0], [0])) - io_examples.append(([1, 0], [1])) - io_examples.append(([1, 1, 0], [2])) - io_examples.append(([9, 4, 19, 11, 5, 0], [0])) - io_examples.append(([24, 11, 26, 1, 15, 0], [1])) - for _ in xrange(n - 5): - length = rand.randrange(2, max_len + 1) - num_chars = rand.randrange(0, max_len + 1) - input_seq = [self.char] * num_chars + [0] * (length - num_chars) - rand.shuffle(input_seq) - for i in xrange(len(input_seq)): - if not input_seq[i]: - input_seq[i] = self.other_chars[rand.randrange(len(self.other_chars))] - output_seq = [num_chars] - io_examples.append((input_seq, output_seq)) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class AddTask(BaseTask): - """Addition coding task. - - Code needs to read in two integers and output their sum mod the BF base, - followed by a terminating 0. - """ - - def __init__(self, n=16): - super(type(self), self).__init__() - self.base = 256 - self.input_type = misc.IOType.integer - self.output_type = misc.IOType.integer - self._io_pairs = self._make_io_examples(n) - - def _make_io_examples(self, n): - """Generate test cases for the task.""" - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [ - ([4, 0], [4, 0]), - ([0, 5], [5, 0]), - ([1, 2], [3, 0]), - ([67, 21], [88, 0]), - ([55, 56], [111, 0]), - ([128, 33], [161, 0]), - ([221, 251], [216, 0]), - ([130, 127], [1, 0]), - ([255, 1], [0, 0])] - extra_examples = max(n - len(io_examples), 0) - for _ in xrange(extra_examples): - a = rand.randrange(256) - b = rand.randrange(256) - input_seq = [a, b] - output_seq = [(a + b) % 256, 0] - io_examples.append((input_seq, output_seq)) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class BooleanLogicTask(BaseTask): - """Boolean logic (truth table) coding task. - - Code needs to memorize a boolean truth table. Specifically, it must encode a - mapping from triple of bools to a single bool. - """ - - def __init__(self): - super(type(self), self).__init__() - self.base = 2 - self.input_type = misc.IOType.boolean - self.output_type = misc.IOType.boolean - # X(~Z) + (~Y)(~Z) + (~X)YZ - self._truth_fn = ( - lambda x, y, z: # pylint: disable=g-long-lambda - (x and not z) or (not y and not z) or (not x and y and z)) - self._test_cases = [ - ([x, y, z], [int(self._truth_fn(x, y, z))]) - for x, y, z in itertools.product(range(2), range(2), range(2))] - - def make_io_set(self): - return copy.deepcopy(self._test_cases) - - -# ------------------------------------------------------------------------------ -# The following tasks are generated from known BF solutions. This guarantees -# that each task can be solved within the maximum code length, and maximum -# execution steps. -# ------------------------------------------------------------------------------ - - -def default_input_fn_factory(min_length=1, max_length=6, base=256): - def _input_gen(rand): - l = rand.randrange(min_length, max_length + 1) - return [rand.randrange(base) for _ in xrange(l)] - return _input_gen - - -class KnownCodeBaseTask(BaseTask): - """These tasks generate their test cases from a known BF solution. - - This ensures that each task has a solution which is under the max character - length, and that it solves the test cases under the max number of execution - steps. - """ - - def __init__(self, code_solution, make_input_fn, n=100, base=256, - max_steps=5000, seed=6849275409234): - super(KnownCodeBaseTask, self).__init__() - # Make sure known solution is less than the code length used in experiments. - assert len(code_solution) < 100 - self.code_solution = code_solution - self.make_input_fn = make_input_fn - self.n = n - self.base = base - self.max_steps = max_steps - self.seed = seed - self._test_cases = list(self._test_case_generator(code_solution)) - - def _test_case_generator(self, code_solution): - rand = random.Random(self.seed) - for _ in xrange(self.n): - input_case = self.make_input_fn(rand) - result = bf.evaluate( - code_solution, input_buffer=input_case, max_steps=self.max_steps, - base=self.base, require_correct_syntax=False) - if not result.success: - raise RuntimeError( - 'Program must succeed. Failed on input: %s' % input_case) - yield input_case, result.output - - def make_io_set(self): - return copy.deepcopy(self._test_cases) - - -class EchoTwiceTask(KnownCodeBaseTask): - """Echo twice.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,.[>,.]<[<]>[.>].', - default_input_fn_factory(), - **kwargs) - - -class EchoThriceTask(KnownCodeBaseTask): - """Echo three times.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,.[>,.]<[<]>[.>].<[<]>[.>].', - default_input_fn_factory(), - **kwargs) - - -class CopyReverseTask(KnownCodeBaseTask): - """Echo forwards, backwards, and then forwards again.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,.[>,.]<[.<].>[.>].', - default_input_fn_factory(), - **kwargs) - - -class EchoZeroCascadeTask(KnownCodeBaseTask): - """Print k-th char with k zeros inbetween (1-indexed).""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - ',[.>[->+>.<<]>+[-<+>]<<,]', - default_input_fn_factory(), - **kwargs) - - -class EchoCascadeTask(KnownCodeBaseTask): - """Print k-th char k times (1-indexed).""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - ',>>+<<[>>[-<+>]<[->+<<.>]>+<<,].', - default_input_fn_factory(base=20), - **kwargs) - - -class ShiftLeftTask(KnownCodeBaseTask): - """Circulate shift input left.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - ',>,[.,]<.,.', - default_input_fn_factory(), - **kwargs) - - -class ShiftRightTask(KnownCodeBaseTask): - """Circular shift input right.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,[>,]<.[-]<[<]>[.>].', - default_input_fn_factory(), - **kwargs) - - -class RiffleTask(KnownCodeBaseTask): - """Shuffle like a deck of cards. - - For input of length N, output values in the following index order: - N-1, 0, N-2, 1, N-3, 2, ... - """ - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,[>,]<[.[-]<[<]>.[-]>[>]<]', - default_input_fn_factory(base=20, max_length=8), - **kwargs) - - -class UnriffleTask(KnownCodeBaseTask): - """Inverse of riffle.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,[>,[.[-]],]<[.<].', - default_input_fn_factory(base=20, max_length=8), - **kwargs) - - -class MiddleCharTask(KnownCodeBaseTask): - """Print middle char if length is odd, or 0 if even.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,[>,]<<[[>]<[,<[<]>,>[>]][>]<<]>.', - default_input_fn_factory(max_length=10), - **kwargs) - - -class RemoveLastTask(KnownCodeBaseTask): - """Remove last character.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - ',>,[[<.[-]>[-<+>]],].', - default_input_fn_factory(base=20), - **kwargs) - - -class RemoveLastTwoTask(KnownCodeBaseTask): - """Remove last two characters.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - ',>,>,[[<<.[-]>[-<+>]>[-<+>]],].', - default_input_fn_factory(base=10), - **kwargs) - - -class EchoAlternatingTask(KnownCodeBaseTask): - # Print even numbered chars first (0-indexed), then odd numbered chars - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>,[.,>,]<<[<]>[.>].', - default_input_fn_factory(base=20, max_length=8), - **kwargs) - - -class EchoHalfTask(KnownCodeBaseTask): - """Echo only first half of the input (round down when odd lengthed).""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>>+>,[[<]>+[>],]<[<]>-[-[-<<+>]<[>]>]<<[->+<]>[[>]>.,<+[<]>-].', - default_input_fn_factory(base=20, max_length=9), - **kwargs) - - -class LengthTask(KnownCodeBaseTask): - """Print length of the input sequence.""" - - def __init__(self, **kwargs): - super(type(self), self).__init__( - '>+>,[[<]>+[>],]<[<]>-.', - default_input_fn_factory(max_length=14), - **kwargs) - - -class EchoSecondSequenceTask(KnownCodeBaseTask): - """Echo second sequence. Sequences are separated by 0.""" - - def __init__(self, **kwargs): - def echo_second_gen(rand): - l = rand.randrange(1, 6) - x = [rand.randrange(256) for _ in xrange(l)] - l = rand.randrange(1, 6) - y = [rand.randrange(256) for _ in xrange(l)] - return x + [0] + y + [0] - super(type(self), self).__init__( - ',[,],[.,].', - echo_second_gen, - **kwargs) - - -class EchoNthSequenceTask(KnownCodeBaseTask): - """Echo n-th sequence (1-indexed). Sequences are separated by 0.""" - - def __init__(self, **kwargs): - def echo_nth_gen(rand): - k = rand.randrange(1, 7) - n = rand.randrange(1, k + 1) - x = [] - for _ in xrange(k): - l = rand.randrange(0, 4) - x += [rand.randrange(256) for _ in xrange(l)] + [0] - return [n] + x - super(type(self), self).__init__( - ',-[->,[,]<],[.,].', - echo_nth_gen, - **kwargs) - - -class SubstringTask(KnownCodeBaseTask): - """Echo substring. - - First two inputs are i and l, where i is the starting index (0-indexed) - and l is the length of the substring. - """ - - def __init__(self, **kwargs): - def substring_gen(rand): - l = rand.randrange(2, 16) - i, j = sorted([rand.randrange(l), rand.randrange(l)]) - n = j - i - x = [rand.randrange(256) for _ in xrange(l)] + [0] - return [i, n] + x - super(type(self), self).__init__( - '>,<,>[->,<]>,<<[->>.,<<]', - substring_gen, - **kwargs) - - -class Divide2Task(KnownCodeBaseTask): - """Divide by 2 (integer floor division).""" - - def __init__(self, **kwargs): - def int_input_gen(rand): - return [rand.randrange(256)] - super(type(self), self).__init__( - ',[-[->>+<]>[<]<]>>.', - int_input_gen, - **kwargs) - - -class DedupTask(KnownCodeBaseTask): - """Deduplicate adjacent duplicate chars.""" - - def __init__(self, **kwargs): - def dedup_input_gen(rand): - np_random = np.random.RandomState(rand.randrange(2147483647)) - num_unique = rand.randrange(1, 5) - unique = np_random.choice(6, num_unique, replace=False) + 1 - return [v for v in unique for _ in xrange(rand.randrange(1, 5))] + [0] - super(type(self), self).__init__( - '>>,.[[-<+<+>>],[-<->]<[[-<->]<.>]<[->>+<<]>>]', - dedup_input_gen, - **kwargs) - - -# ============================================================================== -# Extra tasks. -# ============================================================================== - - -class PrintIntTask(BaseTask): - """Print integer coding task. - - Code needs to output a fixed single value (given as a hyperparameter to the - task constructor). Program input is ignored. - """ - - def __init__(self, base, fixed_string): - super(type(self), self).__init__() - self.base = base - self.eos = 0 - self.fixed_string = fixed_string - self.input_type = misc.IOType.integer - self.output_type = misc.IOType.integer - - def make_io_set(self): - return [(list(), list(self.fixed_string))] - - -class EchoTask(BaseTask): - """Echo string coding task. - - Code needs to pipe input to putput (without any modifications). - """ - - def __init__(self, base, min_length=1, max_length=5): - super(type(self), self).__init__() - self.base = base # base includes EOS - self.eos = 0 - self.min_length = min_length - self.max_length = max_length - self._io_pairs = self._make_io_examples(25) - - def _make_io_examples(self, n): - # Test cases are fixed, but varied. - np_random = np.random.RandomState(1234567890) - io_pairs = [] - for _ in xrange(n): - length = np_random.randint(self.min_length, self.max_length + 1) - input_seq = np_random.randint(1, self.base, length).tolist() + [self.eos] - output_seq = list(input_seq) - io_pairs.append((input_seq, output_seq)) - return io_pairs - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class JudgeRouteCircleTask(BaseTask): - """Judge route circle coding task. - - Code needs to determine if the given route makes a closed loop. - Encoding: U = 1, R = 2, D = 3, L = 4. - - Based on - https://leetcode.com/problems/judge-route-circle/description/ - """ - base = 256 - input_type = misc.IOType.integer - output_type = misc.IOType.integer - - def __init__(self, n, max_len=12): - super(type(self), self).__init__() - self.eos = 0 - self._io_pairs = self._make_io_examples(n, max_len) - self.input_type = misc.IOType.integer - self.output_type = misc.IOType.integer - - def _solve(self, input_seq): - assert input_seq[-1] == 0 - pos = [0, 0] # (x, y) - for move in input_seq[:-1]: - assert 0 < move <= 4 - if move & 1 == 0: # Left or Right. - pos[0] += 3 - move # Add or subtract 1. - else: - pos[1] += 2 - move # Add or subtract 1. - return [int(not pos[0] and not pos[1])] - - def _make_io_examples(self, n, max_len): - """Generate test cases for the task.""" - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [] - io_examples.append(([0], [1])) - io_examples.append(([4, 2, 0], [1])) - io_examples.append(([2, 4, 0], [1])) - io_examples.append(([3, 1, 0], [1])) - io_examples.append(([1, 3, 0], [1])) - io_examples.append(([1, 0], [0])) - io_examples.append(([2, 0], [0])) - io_examples.append(([3, 0], [0])) - io_examples.append(([4, 0], [0])) - for _ in xrange(n): - is_true = rand.randrange(2) - length = rand.randrange(1, max_len + 1) - if is_true: - # Make a true case. - length = (length >> 1) << 1 # Make even. - partition = (rand.randrange(length + 1) >> 1) << 1 - a = partition >> 1 - b = (length - partition) >> 1 - counts = {1: a, 2: b, 3: a, 4: b} - else: - # Make a false case. - partitions = ( - [0] - + sorted([rand.randrange(length + 1) for _ in range(3)]) - + [length]) - counts = {n: partitions[n] - partitions[n - 1] for n in range(1, 5)} - if counts[1] == counts[3] and counts[2] == counts[4]: - # By chance we sampled a true case. Make it false by exchanging - # one count between even and odd pairs. - base = 1 + 2 * rand.randrange(2) - a, b = (base, base + 1) if rand.randrange(2) else (base + 1, base) - if counts[a] == length or counts[b] == 0: - # If counts are at their extreme values, then swap who gets - # incremented and decremented. - a, b = b, a - counts[a] += 1 - counts[b] -= 1 - assert counts[a] <= length and counts[b] >= 0 - assert sum(counts.values()) == length - input_seq = [n for n in xrange(1, 5) for _ in xrange(counts[n])] - rand.shuffle(input_seq) - input_seq += [0] - output_seq = self._solve(input_seq) - assert output_seq[0] == is_true - io_examples.append((input_seq, output_seq)) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class MultiplyTask(BaseTask): - """Multiply coding task. - - Code needs to multiple two ints. - - Solution: - http://robl.co/brief-look-at-brainfuck/ - ,>,><<[->[->+>+<<]>>[-<<+>>]<<<]>>. - """ - base = 512 - input_type = misc.IOType.integer - output_type = misc.IOType.integer - - def __init__(self, n): - super(type(self), self).__init__() - self.eos = 0 - self._io_pairs = self._make_io_examples(n) - self.input_type = misc.IOType.integer - self.output_type = misc.IOType.integer - - def _factors(self, n): - return set(i for i in range(1, int(n**0.5) + 1) if n % i == 0) - - def _make_io_examples(self, n): - """Generate test cases for the task.""" - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [] - for _ in xrange(n): - n = rand.randrange(self.base) - if n == 0: - a, b = 0, rand.randrange(self.base) - else: - f = list(self._factors(n)) - a = f[rand.randrange(len(f))] - b = n // a - if rand.randrange(2): - a, b = b, a - io_examples.append(([a, b], [n])) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class DivModTask(BaseTask): - """Divmod coding task. - - Code needs to take the quotient and remainder of two ints. - - Solution: - http://robl.co/brief-look-at-brainfuck/ - ,>,><<[>[->+>+<<]>[-<<-[>]>>>[<[-<->]<[>]>>[[-]>>+<]>-<]<<]>>>+<<[-<<+>>]<<<]> - >>>>[-<<<<<+>>>>>]<<<<<.>.> - """ - base = 512 - input_type = misc.IOType.integer - output_type = misc.IOType.integer - - def __init__(self, n): - super(type(self), self).__init__() - self.eos = 0 - self._io_pairs = self._make_io_examples(n) - self.input_type = misc.IOType.integer - self.output_type = misc.IOType.integer - - def _make_io_examples(self, n): - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [] - for _ in xrange(n): - n = rand.randrange(0, self.base) - k = rand.randrange(1, self.base) # Divisor cannot be 0. - io_examples.append(([n, k], list(divmod(n, k)))) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class FibonacciTask(BaseTask): - - def __init__(self): - super(type(self), self).__init__() - self.base = 256 - self.input_type = misc.IOType.integer - self.output_type = misc.IOType.integer - - def make_io_set(self): - return [ - ([0], [0, 1]), - ([1], [1, 1]), - ([2], [1, 2]), - ([3], [2, 3]), - ([4], [3, 5]), - ([5], [5, 8]), - ([6], [8, 13]), - ([7], [13, 21]), - ([8], [21, 34]), - ([9], [34, 55]), - ([10], [55, 89]), - ([11], [89, 144]), - ([12], [144, 233]), - ([13], [233, 121])] - - -class FindSubStrTask(BaseTask): - """Find sub-string coding task. - - Code needs to output a bool: True if the input string contains a hard-coded - substring, 'AB' (values [1, 2]). - """ - - def __init__(self, base): - super(type(self), self).__init__() - assert base >= 27 - self.base = base - self.eos = 0 - self.find_str = [1, 2] - self.input_type = misc.IOType.string - self.output_type = misc.IOType.boolean - - def make_io_set(self): - return [ - ([1, 1, 23, 0], [0]), - ([21, 3, 2, 0], [0]), - ([2, 1, 19, 0], [0]), - ([2, 24, 15, 3, 0], [0]), - ([24, 6, 10, 16, 4, 0], [0]), - ([1, 2, 12, 0], [1]), - ([7, 1, 2, 0], [1]), - ([1, 2, 11, 3, 0], [1]), - ([1, 1, 2, 18, 0], [1]), - ([7, 25, 1, 2, 0], [1]), - ([3, 1, 2, 11, 8, 0], [1]), - ([15, 16, 20, 1, 2, 0], [1])] - - -class SortFixedTask(BaseTask): - """Sort list coding task. - - Code needs to output a sorted input list. The task consists of lists of the - same length L, where L is provided to this task's constructor as a - hyperparameter. - """ - - def __init__(self, base, length=3): - super(type(self), self).__init__() - assert base >= 27 - self.base = base - self.eos = 0 - self.length = length - assert length == 3 # More lengths will be supported. - - def make_io_set(self): - if self.length == 3: - return [ - ([1, 20, 6], [1, 6, 20]), - ([13, 6, 7], [6, 7, 13]), - ([24, 2, 23], [2, 23, 24]), - ([16, 12, 3], [3, 12, 16]), - ([11, 24, 4], [4, 11, 24]), - ([10, 1, 19], [1, 10, 19])] - - -class SortFixedTaskV2(BaseTask): - """Sort list coding task (version 2). - - Code needs to output a sorted input list. The task consists of lists of the - same length L, where L is provided to this task's constructor as a - hyperparameter. - - Test cases are dynamically generated, allowing for the number of test cases - to be a hyperparameter. - """ - - def __init__(self, base, n, length=3): - super(type(self), self).__init__() - assert base >= 27 - self.base = base - self.eos = 0 - self._io_pairs = self._make_io_examples(n, length) - self.input_type = misc.IOType.integer - self.output_type = misc.IOType.integer - - def _make_io_examples(self, n, length): - rand = random.Random(6849275409234) # Test cases are fixed, but varied. - io_examples = [] - for _ in xrange(n): - input_seq = [rand.randrange(1, self.base) for _ in xrange(length)] - output_seq = sorted(input_seq) - io_examples.append((input_seq, output_seq)) - return io_examples - - def make_io_set(self): - return copy.deepcopy(self._io_pairs) - - -class RemoveTargetCharTask(KnownCodeBaseTask): - """Remove target character from string, where first input is the target. - - Target can appear multiple times. - """ - - def __init__(self, **kwargs): - def randrange_hole(rand, a, hole, b): - x = rand.randrange(a, b - 1) - if x >= hole: - return x + 1 - return x - def remove_target_char_gen(rand): - char = rand.randrange(1, 6) - l = rand.randrange(1, 8) - input_seq = [randrange_hole(rand, 1, char, 256) for _ in xrange(l)] - idx = range(l) - rand.shuffle(idx) - num_targets = rand.randrange(0, l) - for pos in idx[:num_targets]: - input_seq[pos] = char - return [char] + input_seq + [0] - super(type(self), self).__init__( - ',>>>,[<<<[->+>+<<]>>[->->+<<]>[>[-<+>]<.[-]]>[-]<<<[-<+>]>>,].', - remove_target_char_gen, - **kwargs) - - -class ListIndexTask(KnownCodeBaseTask): - """Echo i-th value in the given list.""" - - def __init__(self, **kwargs): - def array_index_gen(rand): - l = rand.randrange(1, 16) - i = rand.randrange(l) - return [i] + [rand.randrange(256) for _ in xrange(l)] + [0] - super(type(self), self).__init__( - ',[->,<]>,.', - array_index_gen, - **kwargs) - - -# ============================================================================== -# Tasks based on primaryobjects paper. -# ============================================================================== - - -def string2tokens(string): - return [ord(c) for c in string] - - -def stringlist2tokens(strings): - return [string2tokens(string) for string in strings] - - -def string2tokens_b27(string): - return [ord(c.lower()) - ord('a') + 1 for c in string] - - -def stringlist2tokens_b27(strings): - return [string2tokens_b27(string) for string in strings] - - -class BottlesOfBeerTask(BaseTask): - """Bottles of beer coding task. - - This is a counting task. Code needs to read in an int N and then output - every int from N to 0, each separated by a 0. - """ - base = 256 - input_type = misc.IOType.integer - output_type = misc.IOType.integer - - def make_io_set(self): - return [ - ([1], [1, 0]), - ([2], [2, 0, 1, 0]), - ([3], [3, 0, 2, 0, 1, 0]), - ([4], [4, 0, 3, 0, 2, 0, 1, 0]), - ([5], [5, 0, 4, 0, 3, 0, 2, 0, 1, 0]), - ([6], [6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0])] - - -class SplitTask(BaseTask): - """Split coding task. - - Code needs to pipe input strings to output, but insert a 0 after every 3 - characters. This is in essence splitting the string into intervals of length - 3. - """ - base = 28 - input_type = misc.IOType.string - output_type = misc.IOType.integer - - def _splicer(self, lst, insert, interval=3): - for i, item in enumerate(lst): - yield item - if (i + 1) % interval == 0 and i < len(lst) - 1: - yield insert - - def __init__(self): - super(type(self), self).__init__() - inputs = stringlist2tokens_b27( - ['hello', 'orange', 'spaghetti', 'wins', 'one']) - targets = [list(self._splicer(i, 27)) for i in inputs] - self._test_cases = list(zip(inputs, targets)) - - def make_io_set(self): - return copy.deepcopy(self._test_cases) - - -class TrimLeftTask(BaseTask): - """Trim left coding task. - - Code needs to pipe input strings to output, but remove everything before the - first quotation char ("). - """ - base = 256 - input_type = misc.IOType.integer - output_type = misc.IOType.integer - - def __init__(self): - super(type(self), self).__init__() - inputs = stringlist2tokens( - ['a "inside" over', 'xy "test" rights', 'ca6 "foresting" service', - 'abc"def"yz.', 'A"B"']) - targets = stringlist2tokens( - ['"inside" over', '"test" rights', '"foresting" service', '"def"yz.', - '"B"']) - self._test_cases = list(zip(inputs, targets)) - - def make_io_set(self): - return copy.deepcopy(self._test_cases) diff --git a/research/brain_coder/single_task/code_tasks_test.py b/research/brain_coder/single_task/code_tasks_test.py deleted file mode 100644 index d3260a1a56ec0f7c36363d558122f7f7e49198e6..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/code_tasks_test.py +++ /dev/null @@ -1,108 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for code_tasks.""" - -import numpy as np -import tensorflow as tf - -from single_task import code_tasks # brain coder -from single_task import defaults # brain coder - - -def pad(string, pad_length, pad_char): - return string + pad_char * (pad_length - len(string)) - - -class CodeTasksTest(tf.test.TestCase): - - def assertClose(self, a, b): - self.assertTrue( - np.isclose(a, b, atol=1e-4), - 'Expecting approximately equal values. Got: %s, %s' % (a, b)) - - def testMultiIOTaskManager(self): - maxlen = 100 - padchr = '[' - task = code_tasks.make_paper_task( - 'print', timestep_limit=maxlen, do_code_simplification=False) - reward_fns = task.rl_batch(1) - r = reward_fns[0] - self.assertClose( - r(pad('++++++++.---.+++++++...', maxlen, padchr)).episode_rewards[-1], - 0.2444) - self.assertClose( - r(pad('++++++++.---.+++++++..+++.', - maxlen, padchr)).episode_rewards[-1], - 1.0) - - task = code_tasks.make_paper_task( - 'print', timestep_limit=maxlen, do_code_simplification=True) - reward_fns = task.rl_batch(1) - r = reward_fns[0] - self.assertClose( - r('++++++++.---.+++++++...').episode_rewards[-1], - 0.2444) - self.assertClose( - r('++++++++.---.+++++++..+++.').episode_rewards[-1], - 0.935) - self.assertClose( - r(pad('++++++++.---.+++++++..+++.', - maxlen, padchr)).episode_rewards[-1], - 0.75) - - task = code_tasks.make_paper_task( - 'reverse', timestep_limit=maxlen, do_code_simplification=False) - reward_fns = task.rl_batch(1) - r = reward_fns[0] - self.assertClose( - r(pad('>,>,>,.<.<.<.', maxlen, padchr)).episode_rewards[-1], - 0.1345) - self.assertClose( - r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1], - 1.0) - - task = code_tasks.make_paper_task( - 'reverse', timestep_limit=maxlen, do_code_simplification=True) - reward_fns = task.rl_batch(1) - r = reward_fns[0] - self.assertClose(r('>,>,>,.<.<.<.').episode_rewards[-1], 0.1324) - self.assertClose(r(',[>,]+[,<.]').episode_rewards[-1], 0.9725) - self.assertClose( - r(pad(',[>,]+[,<.]', maxlen, padchr)).episode_rewards[-1], - 0.75) - - def testMakeTask(self): - maxlen = 100 - padchr = '[' - config = defaults.default_config_with_updates( - 'env=c(config_for_iclr=False,fixed_string=[8,5,12,12,15])') - task = code_tasks.make_task(config.env, 'print', timestep_limit=maxlen) - reward_fns = task.rl_batch(1) - r = reward_fns[0] - self.assertClose( - r('++++++++.---.+++++++...').episode_rewards[-1], - 0.2444) - self.assertClose( - r('++++++++.---.+++++++..+++.').episode_rewards[-1], - 0.935) - self.assertClose( - r(pad('++++++++.---.+++++++..+++.', - maxlen, padchr)).episode_rewards[-1], - 0.75) - - def testKnownCodeBaseTask(self): - maxlen = 100 - padchr = '[' - task = code_tasks.make_paper_task( - 'shift-left', timestep_limit=maxlen, do_code_simplification=False) - reward_fns = task.rl_batch(1) - r = reward_fns[0] - self.assertClose( - r(pad(',>,[.,]<.,.', maxlen, padchr)).episode_rewards[-1], - 1.0) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/single_task/data.py b/research/brain_coder/single_task/data.py deleted file mode 100644 index 8f34464f5a3e1c403b0f253f1520920c303b0819..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/data.py +++ /dev/null @@ -1,89 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Manage data for pretraining and RL tasks.""" - -import ast -from collections import namedtuple - -from absl import logging - -from single_task import code_tasks # brain coder - - -RLBatch = namedtuple('RLBatch', ['reward_fns', 'batch_size', 'good_reward']) - - -class DataManager(object): - """Interface between environment and model.""" - - def __init__(self, global_config, run_number=None, - do_code_simplification=False): - """Constructs a DataManager. - - Args: - global_config: A config_lib.Config instance containing all config. See - config in defaults.py. - run_number: Which run this is (of the same experiment). This should be set - when a task cycle is defined in the config. A task cycle is a list of - tasks to cycle through repeatedly, and the selected task is a function - of the run number, i.e. 0-th run, 1-st run, 2-nd run, etc... - This can be None if only a single task is set in the config. - do_code_simplification: When global_config.env.config_for_iclr is True, - use this option to create code simplification (code golf) tasks, vs - fixed length coding tasks. If True, a task with code simplification - reward will be constructed. - - Raises: - ValueError: If global_config.env.task and global_config.env.task_cycle - are both set, or both not set. Only one should be given. - ValueError: If global_config.env.task_cycle is set but run_number is None. - """ - env_config = global_config.env - self.batch_size = global_config.batch_size - - if env_config.task_cycle: - if env_config.task: - raise ValueError('Do not set both `task` and `task_cycle`.') - if run_number is None: - raise ValueError('Do not use task_cycle for single-run experiment.') - index = run_number % len(env_config.task_cycle) - self.task_name = env_config.task_cycle[index] - logging.info('run_number: %d, task_cycle index: %d', run_number, index) - logging.info('task_cycle: %s', env_config.task_cycle) - elif env_config.task: - self.task_name = env_config.task - else: - raise ValueError('Either `task` or `task_cycle` must be set.') - logging.info('Task for this run: "%s"', self.task_name) - - logging.info('config_for_iclr=True; do_code_simplification=%s', - do_code_simplification) - self.rl_task = code_tasks.make_task( - task_name=self.task_name, - override_kwargs=ast.literal_eval(env_config.task_kwargs), - max_code_length=global_config.timestep_limit, - require_correct_syntax=env_config.correct_syntax, - do_code_simplification=do_code_simplification, - correct_bonus=env_config.task_manager_config.correct_bonus, - code_length_bonus=env_config.task_manager_config.code_length_bonus) - - def sample_rl_batch(self): - """Create reward functions from the current task. - - Returns: - RLBatch namedtuple instance, which holds functions and information for - a minibatch of episodes. - * reward_fns: A reward function for each episode. Maps code string to - reward. - * batch_size: Number of episodes in this minibatch. - * good_reward: Estimated threshold of rewards which indicate the algorithm - is starting to solve the task. This is a heuristic that tries to - reduce the amount of stuff written to disk. - """ - reward_fns = self.rl_task.rl_batch(self.batch_size) - return RLBatch( - reward_fns=reward_fns, - batch_size=self.batch_size, - good_reward=self.rl_task.good_reward) diff --git a/research/brain_coder/single_task/defaults.py b/research/brain_coder/single_task/defaults.py deleted file mode 100644 index d9bd8b942532dfffcf06d90d331e58725c4d82a9..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/defaults.py +++ /dev/null @@ -1,82 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Default configuration for agent and environment.""" - -from absl import logging - -from common import config_lib # brain coder - - -def default_config(): - return config_lib.Config( - agent=config_lib.OneOf( - [config_lib.Config( - algorithm='pg', - policy_lstm_sizes=[35,35], - # Set value_lstm_sizes to None to share weights with policy. - value_lstm_sizes=[35,35], - obs_embedding_size=10, - grad_clip_threshold=10.0, - param_init_factor=1.0, - lr=5e-5, - pi_loss_hparam=1.0, - vf_loss_hparam=0.5, - entropy_beta=1e-2, - regularizer=0.0, - softmax_tr=1.0, # Reciprocal temperature. - optimizer='rmsprop', # 'adam', 'sgd', 'rmsprop' - topk=0, # Top-k unique codes will be stored. - topk_loss_hparam=0.0, # off policy loss multiplier. - # Uniformly sample this many episodes from topk buffer per batch. - # If topk is 0, this has no effect. - topk_batch_size=1, - # Exponential moving average baseline for REINFORCE. - # If zero, A2C is used. - # If non-zero, should be close to 1, like .99, .999, etc. - ema_baseline_decay=0.99, - # Whether agent can emit EOS token. If true, agent can emit EOS - # token which ends the episode early (ends the sequence). - # If false, agent must emit tokens until the timestep limit is - # reached. e.g. True means variable length code, False means fixed - # length code. - # WARNING: Making this false slows things down. - eos_token=False, - replay_temperature=1.0, - # Replay probability. 1 = always replay, 0 = always on policy. - alpha=0.0, - # Whether to normalize importance weights in each minibatch. - iw_normalize=True), - config_lib.Config( - algorithm='ga', - crossover_rate=0.99, - mutation_rate=0.086), - config_lib.Config( - algorithm='rand')], - algorithm='pg', - ), - env=config_lib.Config( - # If True, task-specific settings are not needed. - task='', # 'print', 'echo', 'reverse', 'remove', ... - task_cycle=[], # If non-empty, reptitions will cycle through tasks. - task_kwargs='{}', # Python dict literal. - task_manager_config=config_lib.Config( - # Reward recieved per test case. These bonuses will be scaled - # based on how many test cases there are. - correct_bonus=2.0, # Bonus for code getting correct answer. - code_length_bonus=1.0), # Maximum bonus for short code. - correct_syntax=False, - ), - batch_size=64, - timestep_limit=32) - - -def default_config_with_updates(config_string, do_logging=True): - if do_logging: - logging.info('Config string: "%s"', config_string) - config = default_config() - config.strict_update(config_lib.Config.parse(config_string)) - if do_logging: - logging.info('Config:\n%s', config.pretty_str()) - return config diff --git a/research/brain_coder/single_task/ga_lib.py b/research/brain_coder/single_task/ga_lib.py deleted file mode 100644 index fadb96482b21a5c65c0d6d6cf4a3aec3b5708235..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/ga_lib.py +++ /dev/null @@ -1,472 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Genetic algorithm for BF tasks. - -Inspired by https://github.com/primaryobjects/AI-Programmer. -GA function code borrowed from https://github.com/DEAP/deap. -""" - -from collections import namedtuple -import random - -from absl import flags -from absl import logging -import numpy as np -from six.moves import xrange - -from common import bf # brain coder -from common import utils # brain coder -from single_task import misc # brain coder - -FLAGS = flags.FLAGS - -# Saving reward of previous programs saves computation if a program appears -# again. -USE_REWARD_CACHE = True # Disable this if GA is using up too much memory. -GENES = bf.CHARS -MAX_PROGRAM_STEPS = 500 -STEP_BONUS = True - -ALPHANUM_CHARS = ( - ['_'] + - [chr(ord('a') + i_) for i_ in range(26)] + - [chr(ord('A') + i_) for i_ in range(26)] + - [chr(ord('0') + i_) for i_ in range(10)]) - -Result = namedtuple( - 'Result', - ['reward', 'inputs', 'code_outputs', 'target_outputs', 'type_in', - 'type_out', 'base', 'correct']) - - -class IOType(object): - string = 'string' - integer = 'integer' - - -class CustomType(object): - - def __init__(self, to_str_fn): - self.to_str_fn = to_str_fn - - def __call__(self, obj): - return self.to_str_fn(obj) - - -def tokens_list_repr(tokens, repr_type, base): - """Make human readable representation of program IO.""" - if isinstance(repr_type, CustomType): - return repr_type(tokens) - elif repr_type == IOType.string: - chars = ( - [ALPHANUM_CHARS[t] for t in tokens] if base < len(ALPHANUM_CHARS) - else [chr(t) for t in tokens]) - return ''.join(chars) - elif repr_type == IOType.integer: - return str(tokens) - raise ValueError('No such representation type "%s"', repr_type) - - -def io_repr(result): - """Make human readable representation of test cases.""" - inputs = ','.join( - tokens_list_repr(tokens, result.type_in, result.base) - for tokens in result.inputs) - code_outputs = ','.join( - tokens_list_repr(tokens, result.type_out, result.base) - for tokens in result.code_outputs) - target_outputs = ','.join( - tokens_list_repr(tokens, result.type_out, result.base) - for tokens in result.target_outputs) - return inputs, target_outputs, code_outputs - - -def make_task_eval_fn(task_manager): - """Returns a wrapper that converts an RL task into a GA task. - - Args: - task_manager: Is a task manager object from code_tasks.py - - Returns: - A function that takes as input a single list of a code chars, and outputs - a Result namedtuple instance containing the reward and information about - code execution. - """ - def to_data_list(single_or_tuple): - if isinstance(single_or_tuple, misc.IOTuple): - return list(single_or_tuple) - return [single_or_tuple] - - def to_ga_type(rl_type): - if rl_type == misc.IOType.string: - return IOType.string - return IOType.integer - - # Wrapper function. - def evalbf(bf_chars): - result = task_manager._score_code(''.join(bf_chars)) - reward = sum(result.episode_rewards) - correct = result.reason == 'correct' - return Result( - reward=reward, - inputs=to_data_list(result.input_case), - code_outputs=to_data_list(result.code_output), - target_outputs=to_data_list(result.correct_output), - type_in=to_ga_type(result.input_type), - type_out=to_ga_type(result.output_type), - correct=correct, - base=task_manager.task.base) - - return evalbf - - -def debug_str(individual, task_eval_fn): - res = task_eval_fn(individual) - input_str, target_output_str, code_output_str = io_repr(res) - return ( - ''.join(individual) + - ' | ' + input_str + - ' | ' + target_output_str + - ' | ' + code_output_str + - ' | ' + str(res.reward) + - ' | ' + str(res.correct)) - - -def mutate_single(code_tokens, mutation_rate): - """Mutate a single code string. - - Args: - code_tokens: A string/list/Individual of BF code chars. Must end with EOS - symbol '_'. - mutation_rate: Float between 0 and 1 which sets the probability of each char - being mutated. - - Returns: - An Individual instance containing the mutated code string. - - Raises: - ValueError: If `code_tokens` does not end with EOS symbol. - """ - if len(code_tokens) <= 1: - return code_tokens - if code_tokens[-1] == '_': - # Do this check to ensure that the code strings have not been corrupted. - raise ValueError('`code_tokens` must end with EOS symbol.') - else: - cs = Individual(code_tokens) - eos = [] - mutated = False - for pos in range(len(cs)): - if random.random() < mutation_rate: - mutated = True - new_char = GENES[random.randrange(len(GENES))] - x = random.random() - if x < 0.25 and pos != 0 and pos != len(cs) - 1: - # Insertion mutation. - if random.random() < 0.50: - # Shift up. - cs = cs[:pos] + [new_char] + cs[pos:-1] - else: - # Shift down. - cs = cs[1:pos] + [new_char] + cs[pos:] - elif x < 0.50: - # Deletion mutation. - if random.random() < 0.50: - # Shift down. - cs = cs[:pos] + cs[pos + 1:] + [new_char] - else: - # Shift up. - cs = [new_char] + cs[:pos] + cs[pos + 1:] - elif x < 0.75: - # Shift rotate mutation (position invariant). - if random.random() < 0.50: - # Shift down. - cs = cs[1:] + [cs[0]] - else: - # Shift up. - cs = [cs[-1]] + cs[:-1] - else: - # Replacement mutation. - cs = cs[:pos] + [new_char] + cs[pos + 1:] - assert len(cs) + len(eos) == len(code_tokens) - if mutated: - return Individual(cs + eos) - else: - return Individual(code_tokens) - - -def crossover(parent1, parent2): - """Performs crossover mating between two code strings. - - Crossover mating is where a random position is selected, and the chars - after that point are swapped. The resulting new code strings are returned. - - Args: - parent1: First code string. - parent2: Second code string. - - Returns: - A 2-tuple of children, i.e. the resulting code strings after swapping. - """ - max_parent, min_parent = ( - (parent1, parent2) if len(parent1) > len(parent2) - else (parent2, parent1)) - pos = random.randrange(len(max_parent)) - if pos >= len(min_parent): - child1 = max_parent[:pos] - child2 = min_parent + max_parent[pos:] - else: - child1 = max_parent[:pos] + min_parent[pos:] - child2 = min_parent[:pos] + max_parent[pos:] - return Individual(child1), Individual(child2) - - -def _make_even(n): - """Return largest even integer less than or equal to `n`.""" - return (n >> 1) << 1 - - -def mutate_and_crossover(population, mutation_rate, crossover_rate): - """Take a generational step over a population. - - Transforms population of parents into population of children (of the same - size) via crossover mating and then mutation on the resulting children. - - Args: - population: Parent population. A list of Individual objects. - mutation_rate: Probability of mutation. See `mutate_single`. - crossover_rate: Probability that two parents will mate. - - Returns: - Child population. A list of Individual objects. - """ - children = [None] * len(population) - for i in xrange(0, _make_even(len(population)), 2): - p1 = population[i] - p2 = population[i + 1] - if random.random() < crossover_rate: - p1, p2 = crossover(p1, p2) - c1 = mutate_single(p1, mutation_rate) - c2 = mutate_single(p2, mutation_rate) - children[i] = c1 - children[i + 1] = c2 - if children[-1] is None: - children[-1] = population[-1] - return children - - -def ga_loop(population, cxpb, mutpb, ngen, task_eval_fn, halloffame=None, - checkpoint_writer=None): - """A bare bones genetic algorithm. - - Similar to chapter 7 of Back, Fogel and Michalewicz, "Evolutionary - Computation 1 : Basic Algorithms and Operators", 2000. - - Args: - population: A list of individuals. - cxpb: The probability of mating two individuals. - mutpb: The probability of mutating a gene. - ngen: The number of generation. Unlimited if zero. - task_eval_fn: A python function which maps an Individual to a Result - namedtuple. - halloffame: (optional) a utils.MaxUniquePriorityQueue object that will be - used to aggregate the best individuals found during search. - checkpoint_writer: (optional) an object that can save and load populations. - Needs to have `write`, `load`, and `has_checkpoint` methods. Used to - periodically save progress. In event of a restart, the population will - be loaded from disk. - - Returns: - GaResult namedtuple instance. This contains information about the GA run, - including the resulting population, best reward (fitness) obtained, and - the best code string found. - """ - - has_checkpoint = False - if checkpoint_writer and checkpoint_writer.has_checkpoint(): - try: - gen, population, halloffame = checkpoint_writer.load() - except EOFError: # Data was corrupted. Start over. - pass - else: - has_checkpoint = True - logging.info( - 'Loaded population from checkpoint. Starting at generation %d', gen) - - # Evaluate the individuals with an invalid fitness - invalid_ind = [ind for ind in population if not ind.fitness.valid] - for ind in invalid_ind: - ind.fitness.values = task_eval_fn(ind).reward, - for _, ind in halloffame.iter_in_order(): - ind.fitness.values = task_eval_fn(ind).reward, - - if not has_checkpoint: - # Evaluate the individuals with an invalid fitness - invalid_ind = [ind for ind in population if not ind.fitness.valid] - for ind in invalid_ind: - ind.fitness.values = task_eval_fn(ind).reward, - - if halloffame is not None: - for ind in population: - halloffame.push(ind.fitness.values, tuple(ind), ind) - - logging.info('Initialized new population.') - - gen = 1 - - pop_size = len(population) - program_reward_cache = {} if USE_REWARD_CACHE else None - - # Begin the generational process - while ngen == 0 or gen <= ngen: - # Select the next generation individuals - offspring = roulette_selection(population, pop_size - len(halloffame)) - - # Vary the pool of individuals - # offspring = varAnd(offspring, toolbox, cxpb, mutpb) - offspring = mutate_and_crossover( - offspring, mutation_rate=mutpb, crossover_rate=cxpb) - - # Evaluate the individuals with an invalid fitness - invalid_ind = [ind for ind in offspring if not ind.fitness.valid] - for ind in invalid_ind: - str_repr = ''.join(ind) - if program_reward_cache is not None and str_repr in program_reward_cache: - ind.fitness.values = (program_reward_cache[str_repr],) - else: - eval_result = task_eval_fn(ind) - ind.fitness.values = (eval_result.reward,) - if program_reward_cache is not None: - program_reward_cache[str_repr] = eval_result.reward - - # Replace the current population by the offspring - population = list(offspring) - - # Update the hall of fame with the generated individuals - if halloffame is not None: - for ind in population: - halloffame.push(ind.fitness.values, tuple(ind), ind) - - # elitism - population.extend([ind for _, ind in halloffame.iter_in_order()]) - - if gen % 100 == 0: - top_code = '\n'.join([debug_str(ind, task_eval_fn) - for ind in topk(population, k=4)]) - logging.info('gen: %d\nNPE: %d\n%s\n\n', gen, gen * pop_size, top_code) - - best_code = ''.join(halloffame.get_max()[1]) - res = task_eval_fn(best_code) - - # Write population and hall-of-fame to disk. - if checkpoint_writer: - checkpoint_writer.write(gen, population, halloffame) - - if res.correct: - logging.info('Solution found:\n%s\nreward = %s\n', - best_code, res.reward) - break - - gen += 1 - - best_code = ''.join(halloffame.get_max()[1]) - res = task_eval_fn(best_code) - - return GaResult( - population=population, best_code=best_code, reward=res.reward, - solution_found=res.correct, generations=gen, - num_programs=gen * len(population), - max_generations=ngen, max_num_programs=ngen * len(population)) - - -GaResult = namedtuple( - 'GaResult', - ['population', 'best_code', 'reward', 'generations', 'num_programs', - 'solution_found', 'max_generations', 'max_num_programs']) - - -def reward_conversion(reward): - """Convert real value into positive value.""" - if reward <= 0: - return 0.05 - return reward + 0.05 - - -def roulette_selection(population, k): - """Select `k` individuals with prob proportional to fitness. - - Each of the `k` selections is independent. - - Warning: - The roulette selection by definition cannot be used for minimization - or when the fitness can be smaller or equal to 0. - - Args: - population: A list of Individual objects to select from. - k: The number of individuals to select. - - Returns: - A list of selected individuals. - """ - fitnesses = np.asarray( - [reward_conversion(ind.fitness.values[0]) - for ind in population]) - assert np.all(fitnesses > 0) - - sum_fits = fitnesses.sum() - chosen = [None] * k - for i in xrange(k): - u = random.random() * sum_fits - sum_ = 0 - for ind, fitness in zip(population, fitnesses): - sum_ += fitness - if sum_ > u: - chosen[i] = Individual(ind) - break - if not chosen[i]: - chosen[i] = Individual(population[-1]) - - return chosen - - -def make_population(make_individual_fn, n): - return [make_individual_fn() for _ in xrange(n)] - - -def best(population): - best_ind = None - for ind in population: - if best_ind is None or best_ind.fitness.values < ind.fitness.values: - best_ind = ind - return best_ind - - -def topk(population, k): - q = utils.MaxUniquePriorityQueue(k) - for ind in population: - q.push(ind.fitness.values, tuple(ind), ind) - return [ind for _, ind in q.iter_in_order()] - - -class Fitness(object): - - def __init__(self): - self.values = () - - @property - def valid(self): - """Assess if a fitness is valid or not.""" - return bool(self.values) - - -class Individual(list): - - def __init__(self, *args): - super(Individual, self).__init__(*args) - self.fitness = Fitness() - - -def random_individual(genome_size): - return lambda: Individual(np.random.choice(GENES, genome_size).tolist()) diff --git a/research/brain_coder/single_task/ga_train.py b/research/brain_coder/single_task/ga_train.py deleted file mode 100644 index 630eca427e478dbadad58bd94b56e89a5a747526..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/ga_train.py +++ /dev/null @@ -1,324 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Genetic algorithm for BF tasks. - -Also contains the uniform random search algorithm. - -Inspired by https://github.com/primaryobjects/AI-Programmer. -GA function code borrowed from https://github.com/DEAP/deap. -""" - -import cPickle -import os -import sys -from time import sleep - -from absl import flags -from absl import logging -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from common import utils # brain coder -from single_task import data # brain coder -from single_task import defaults # brain coder -from single_task import ga_lib # brain coder -from single_task import results_lib # brain coder - -FLAGS = flags.FLAGS - - -def define_tuner_hparam_space(hparam_space_type): - """Define tunable hparams for grid search.""" - if hparam_space_type != 'ga': - raise ValueError('Hparam space is not valid: "%s"' % hparam_space_type) - return { - 'population_size': [10, 25, 50, 100, 500], - 'crossover_rate': [0.2, 0.5, 0.7, 0.9, 0.95], - 'mutation_rate': [0.01, 0.03, 0.05, 0.1, 0.15]} - - -def write_hparams_to_config(config, hparams, hparam_space_type): - """Write hparams given by the tuner into the Config object.""" - if hparam_space_type != 'ga': - raise ValueError('Hparam space is not valid: "%s"' % hparam_space_type) - config.batch_size = hparams.population_size - config.agent.crossover_rate = hparams.crossover_rate - config.agent.mutation_rate = hparams.mutation_rate - - -class CheckpointWriter(object): - """Manages loading and saving GA populations to disk. - - This object is used by the genetic algorithm to save progress periodically - so that a recent population can be loaded from disk in the event of a restart. - """ - - def __init__(self, checkpoint_dir, population_size): - self.checkpoint_file = os.path.join(checkpoint_dir, 'checkpoint.pickle') - self.population_size = population_size - - def write(self, gen, population, halloffame): - """Write GA state to disk. - - Overwrites previous saved state. - - Args: - gen: Generation number. - population: List of Individual objects. - halloffame: Hall-of-fame buffer. Typically a priority queue. - """ - raw = cPickle.dumps((gen, population, halloffame)) - with tf.gfile.FastGFile(self.checkpoint_file, 'w') as f: - f.write(raw) - - def load(self): - """Loads GA state from disk. - - Loads whatever is on disk, which will be whatever the most recent call - to `write` wrote. - - Returns: - gen: Generation number. - population: List of Individual objects. - halloffame: Hall-of-fame buffer. Typically a priority queue. - """ - with tf.gfile.FastGFile(self.checkpoint_file, 'r') as f: - raw = f.read() - objs = cPickle.loads(raw) - # Validate data. - assert isinstance(objs, tuple) and len(objs) == 3, ( - 'Expecting a 3-tuple, but got %s instead.' % (objs,)) - gen, population, halloffame = objs - assert isinstance(gen, int), ( - 'Expecting `gen` to be an integer, got %s' % (gen,)) - assert ( - isinstance(population, list) - and len(population) == self.population_size - ), ( - 'Expecting `population` to be a list with size %d, got %s' - % (self.population_size, population)) - assert halloffame is None or len(halloffame) == 2, ( - 'Expecting hall-of-fame object to have length two, got length %d' - % len(halloffame)) - logging.info('Loaded pop from checkpoint file: "%s".', - self.checkpoint_file) - return gen, population, halloffame - - def has_checkpoint(self): - """Checks if a checkpoint exists on disk, and if so returns True.""" - return tf.gfile.Exists(self.checkpoint_file) - - -def run_training(config=None, tuner=None, logdir=None, trial_name=None, # pylint: disable=unused-argument - is_chief=True): - """Do all training runs. - - This is the top level training function for policy gradient based models. - Run this from the main function. - - Args: - config: config_lib.Config instance containing global config (agent and - environment hparams). If None, config will be parsed from FLAGS.config. - tuner: (unused) A tuner instance. Leave as None if not tuning. - logdir: Parent directory where all data from all runs will be written. If - None, FLAGS.logdir will be used. - trial_name: (unused) If tuning, set this to a unique string that identifies - this trial. If `tuner` is not None, this also must be set. - is_chief: True if this worker is the chief. - - Returns: - List of results dicts which were written to disk. Each training run gets a - results dict. Results dict contains metrics, i.e. (name, value) pairs which - give information about the training run. - - Raises: - ValueError: If FLAGS.num_workers does not divide FLAGS.num_repetitions. - ValueError: If results dicts read from disk contain invalid data. - """ - if not config: - # If custom config is not given, get it from flags. - config = defaults.default_config_with_updates(FLAGS.config) - if not logdir: - logdir = FLAGS.logdir - - if FLAGS.num_repetitions % FLAGS.num_workers != 0: - raise ValueError('Number of workers must divide number of repetitions') - num_local_reps = FLAGS.num_repetitions // FLAGS.num_workers - logging.info('Running %d reps globally.', FLAGS.num_repetitions) - logging.info('This worker will run %d local reps.', num_local_reps) - if FLAGS.max_npe: - max_generations = FLAGS.max_npe // config.batch_size - logging.info('Max samples per rep: %d', FLAGS.max_npe) - logging.info('Max generations per rep: %d', max_generations) - else: - max_generations = sys.maxint - logging.info('Running unlimited generations.') - - assert FLAGS.num_workers > 0 - logging.info('Starting experiment. Directory: "%s"', logdir) - results = results_lib.Results(logdir, FLAGS.task_id) - local_results_list = results.read_this_shard() - if local_results_list: - if local_results_list[0]['max_npe'] != FLAGS.max_npe: - raise ValueError( - 'Cannot resume training. Max-NPE changed. Was %s, now %s', - local_results_list[0]['max_npe'], FLAGS.max_npe) - if local_results_list[0]['max_global_repetitions'] != FLAGS.num_repetitions: - raise ValueError( - 'Cannot resume training. Number of repetitions changed. Was %s, ' - 'now %s', - local_results_list[0]['max_global_repetitions'], - FLAGS.num_repetitions) - start_rep = len(local_results_list) - - for rep in xrange(start_rep, num_local_reps): - global_rep = num_local_reps * FLAGS.task_id + rep - logging.info( - 'Starting repetition: Rep = %d. (global rep = %d)', - rep, global_rep) - - # Save data for each rep, like checkpoints, goes into separate folders. - run_dir = os.path.join(logdir, 'run_%d' % global_rep) - - if not tf.gfile.IsDirectory(run_dir): - tf.gfile.MakeDirs(run_dir) - checkpoint_writer = CheckpointWriter(run_dir, - population_size=config.batch_size) - - data_manager = data.DataManager(config, run_number=global_rep) - task_eval_fn = ga_lib.make_task_eval_fn(data_manager.rl_task) - - if config.agent.algorithm == 'rand': - logging.info('Running random search.') - assert FLAGS.max_npe - result = run_random_search( - FLAGS.max_npe, run_dir, task_eval_fn, config.timestep_limit) - else: - assert config.agent.algorithm == 'ga' - logging.info('Running genetic algorithm.') - pop = ga_lib.make_population( - ga_lib.random_individual(config.timestep_limit), - n=config.batch_size) - hof = utils.MaxUniquePriorityQueue(2) # Hall of fame. - result = ga_lib.ga_loop( - pop, - cxpb=config.agent.crossover_rate, mutpb=config.agent.mutation_rate, - task_eval_fn=task_eval_fn, - ngen=max_generations, halloffame=hof, - checkpoint_writer=checkpoint_writer) - - logging.info('Finished rep. Num gens: %d', result.generations) - - results_dict = { - 'max_npe': FLAGS.max_npe, - 'batch_size': config.batch_size, - 'max_batches': FLAGS.max_npe // config.batch_size, - 'npe': result.num_programs, - 'max_global_repetitions': FLAGS.num_repetitions, - 'max_local_repetitions': num_local_reps, - 'code_solution': result.best_code if result.solution_found else '', - 'best_reward': result.reward, - 'num_batches': result.generations, - 'found_solution': result.solution_found, - 'task': data_manager.task_name, - 'global_rep': global_rep} - logging.info('results_dict: %s', results_dict) - results.append(results_dict) - - if is_chief: - logging.info( - 'Worker is chief. Waiting for all workers to finish so that results ' - 'can be reported to the tuner.') - - global_results_list, shard_stats = results.read_all( - num_shards=FLAGS.num_workers) - while not all(s.finished for s in shard_stats): - logging.info( - 'Still waiting on these workers: %s', - ', '.join( - ['%d (%d reps left)' - % (i, s.max_local_reps - s.num_local_reps_completed) - for i, s in enumerate(shard_stats) - if not s.finished])) - sleep(60) - global_results_list, shard_stats = results.read_all( - num_shards=FLAGS.num_workers) - - logging.info( - '%d results obtained. Chief worker is exiting the experiment.', - len(global_results_list)) - - return global_results_list - - -def run_random_search(max_num_programs, checkpoint_dir, task_eval_fn, - timestep_limit): - """Run uniform random search routine. - - Randomly samples programs from a uniform distribution until either a valid - program is found, or the maximum NPE is reached. Results are written to disk - and returned. - - Args: - max_num_programs: Maximum NPE (number of programs executed). If no solution - is found after this many programs are tried, the run is stopped and - considered a failure. - checkpoint_dir: Where to save state during the run. - task_eval_fn: Function that maps code string to result containing total - reward and info about success. - timestep_limit: Maximum length of code strings. - - Returns: - ga_lib.GaResult namedtuple instance. This contains the best code and highest - reward found. - """ - checkpoint_file = os.path.join(checkpoint_dir, 'random_search.txt') - num_programs_seen = 0 - found_solution = False - best_code = '' - best_reward = 0.0 - if tf.gfile.Exists(checkpoint_file): - try: - with tf.gfile.FastGFile(checkpoint_file, 'r') as f: - lines = list(f) - num_programs_seen = int(lines[0]) - found_solution = bool(int(lines[1])) - if found_solution: - best_code = lines[2] - best_reward = float(lines[3]) - except: # pylint: disable=bare-except - pass - - while not found_solution and num_programs_seen < max_num_programs: - if num_programs_seen % 1000 == 0: - logging.info('num_programs_seen = %d', num_programs_seen) - with tf.gfile.FastGFile(checkpoint_file, 'w') as f: - f.write(str(num_programs_seen) + '\n') - f.write(str(int(found_solution)) + '\n') - - code = np.random.choice(ga_lib.GENES, timestep_limit).tolist() - res = task_eval_fn(code) - found_solution = res.correct - num_programs_seen += 1 - - if found_solution: - best_code = ''.join(code) - best_reward = res.reward - - logging.info('num_programs_seen = %d', num_programs_seen) - logging.info('found solution: %s', found_solution) - with tf.gfile.FastGFile(checkpoint_file, 'w') as f: - f.write(str(num_programs_seen) + '\n') - f.write(str(int(found_solution)) + '\n') - if found_solution: - f.write(best_code + '\n') - f.write(str(best_reward) + '\n') - - return ga_lib.GaResult( - population=[], best_code=best_code, reward=best_reward, - solution_found=found_solution, generations=num_programs_seen, - num_programs=num_programs_seen, max_generations=max_num_programs, - max_num_programs=max_num_programs) diff --git a/research/brain_coder/single_task/ga_train_test.py b/research/brain_coder/single_task/ga_train_test.py deleted file mode 100644 index ff69ad84952a3fb90cad28b3cf8e67ff55c96e95..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/ga_train_test.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for ga_train. - -Tests that ga runs for a few generations without crashing. -""" - -from absl import flags -import tensorflow as tf - -from single_task import defaults # brain coder -from single_task import run # brain coder - -FLAGS = flags.FLAGS - - -class GaTest(tf.test.TestCase): - - def RunTrainingSteps(self, config_string, num_steps=10): - """Run a few training steps with the given config. - - Just check that nothing crashes. - - Args: - config_string: Config encoded in a string. See - $REPO_PATH/common/config_lib.py - num_steps: Number of training steps to run. Defaults to 10. - """ - config = defaults.default_config_with_updates(config_string) - FLAGS.max_npe = num_steps * config.batch_size - FLAGS.logdir = tf.test.get_temp_dir() - FLAGS.config = config_string - run.main(None) - - def testGeneticAlgorithm(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="ga"),' - 'timestep_limit=40,batch_size=64') - - def testUniformRandomSearch(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="rand"),' - 'timestep_limit=40,batch_size=64') - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/single_task/launch_training.sh b/research/brain_coder/single_task/launch_training.sh deleted file mode 100755 index a4a4688ed2912792185aa8f3134b1680fed6f006..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/launch_training.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -# Launches training jobs. -# Modify this file to launch workers with your prefered cloud API. -# The following implementation runs each worker as a subprocess on the local -# machine. - -MODELS_DIR="/tmp/models" - -# Get command line options. -OPTS=$(getopt -n "$0" -o "" --long "job_name:,config:,num_workers:,num_ps:,max_npe:,num_repetitions:,stop_on_success:" -- "$@") -if [ $? != 0 ] ; then echo "Failed parsing options." >&2 ; exit 1 ; fi - -eval set -- "$OPTS" - -JOB_NAME="" # Name of the process and the logs directory. -CONFIG="" # Model and environment hparams. -# NUM_WORKERS: Number of workers to launch for this training job. If using -# neural networks, each worker will be 1 replica. -NUM_WORKERS=1 -# NUM_PS: Number of parameter servers to launch for this training job. Only set -# this if using neural networks. For 1 worker, no parameter servers are needed. -# For more than 1 worker, at least 1 parameter server is needed to store the -# global model. -NUM_PS=0 -# MAX_NPE: Maximum number of programs executed. Training will quit once this -# threshold is reached. If 0, the threshold is infinite. -MAX_NPE=0 -NUM_REPETITIONS=1 # How many times to run this experiment. -STOP_ON_SUCCESS=true # Whether to halt training when a solution is found. - -# Parse options into variables. -while true; do - case "$1" in - --job_name ) JOB_NAME="$2"; shift; shift ;; - --config ) CONFIG="$2"; shift; shift ;; - --num_workers ) NUM_WORKERS="$2"; shift; shift ;; - --num_ps ) NUM_PS="$2"; shift; shift ;; - --max_npe ) MAX_NPE="$2"; shift; shift ;; - --num_repetitions ) NUM_REPETITIONS="$2"; shift; shift ;; - --stop_on_success ) STOP_ON_SUCCESS="$2"; shift; shift ;; - -- ) shift; break ;; - * ) break ;; - esac -done - -# Launch jobs. -# TODO: multi-worker RL training - -LOGDIR="$MODELS_DIR/$JOB_NAME" -mkdir -p $LOGDIR - -BIN_DIR="bazel-bin/single_task" -for (( i=0; i "$LOGDIR/task_$i.log" & # Run as subprocess - echo "Launched task $i. Logs: $LOGDIR/task_$i.log" -done - - -# Use "pidof run.par" to find jobs. -# Kill with "pkill run.par" diff --git a/research/brain_coder/single_task/launch_tuning.sh b/research/brain_coder/single_task/launch_tuning.sh deleted file mode 100755 index 97ce51b543e13d4b1c412656a93197b5b47373bb..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/launch_tuning.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash -# Launches tuning jobs. -# Modify this file to launch workers with your prefered cloud API. -# The following implementation runs each worker as a subprocess on the local -# machine. - -MODELS_DIR="/tmp/models" - -# Get command line options. -OPTS=$(getopt -n "$0" -o "" --long "job_name:,config:,num_tuners:,num_workers_per_tuner:,num_ps_per_tuner:,max_npe:,num_repetitions:,stop_on_success:,fixed_hparams:,hparam_space_type:" -- "$@") -if [ $? != 0 ] ; then echo "Failed parsing options." >&2 ; exit 1 ; fi - -eval set -- "$OPTS" - -JOB_NAME="" # Name of the process and the logs directory. -CONFIG="" # Model and environment hparams. -# NUM_TUNERS: Number of tuning jobs to launch. Each tuning job can train a -# hparam combination. So more tuners means more hparams tried in parallel. -NUM_TUNERS=1 -# NUM_WORKERS_PER_TUNER: Number of workers to launch for each tuning job. If -# using neural networks, each worker will be 1 replica. -NUM_WORKERS_PER_TUNER=1 -# NUM_PS_PER_TUNER: Number of parameter servers to launch for this tuning job. -# Only set this if using neural networks. For 1 worker per tuner, no parameter -# servers are needed. For more than 1 worker per tuner, at least 1 parameter -# server per tuner is needed to store the global model for each tuner. -NUM_PS_PER_TUNER=0 -# MAX_NPE: Maximum number of programs executed. Training will quit once this -# threshold is reached. If 0, the threshold is infinite. -MAX_NPE=0 -NUM_REPETITIONS=25 # How many times to run this experiment. -STOP_ON_SUCCESS=true # Whether to halt training when a solution is found. -# FIXED_HPARAMS: Hold hparams fixed in the grid search. This reduces the search -# space. -FIXED_HPARAMS="" -# HPARAM_SPACE_TYPE: Specifies the hparam search space. See -# `define_tuner_hparam_space` functions defined in pg_train.py and ga_train.py. -HPARAM_SPACE_TYPE="pg" - -# Parse options into variables. -while true; do - case "$1" in - --job_name ) JOB_NAME="$2"; shift; shift ;; - --config ) CONFIG="$2"; shift; shift ;; - --num_tuners ) NUM_TUNERS="$2"; shift; shift ;; - --num_workers_per_tuner ) NUM_WORKERS_PER_TUNER="$2"; shift; shift ;; - --num_ps_per_tuner ) NUM_PS_PER_TUNER="$2"; shift; shift ;; - --max_npe ) MAX_NPE="$2"; shift; shift ;; - --num_repetitions ) NUM_REPETITIONS="$2"; shift; shift ;; - --stop_on_success ) STOP_ON_SUCCESS="$2"; shift; shift ;; - --fixed_hparams ) FIXED_HPARAMS="$2"; shift; shift ;; - --hparam_space_type ) HPARAM_SPACE_TYPE="$2"; shift; shift ;; - -- ) shift; break ;; - * ) break ;; - esac -done - -# Launch jobs. -# TODO: multi-worker RL training - -LOGDIR="$MODELS_DIR/$JOB_NAME" -mkdir -p $LOGDIR - -BIN_DIR="bazel-bin/single_task" -for ((tuner=0;tuner "$LOGDIR/tuner_$tuner.task_$i.log" & # Run as subprocess - echo "Launched tuner $tuner, task $i. Logs: $LOGDIR/tuner_$tuner.task_$i.log" - done -done - -# Use "pidof tune.par" to find jobs. -# Kill with "pkill tune.par" diff --git a/research/brain_coder/single_task/misc.py b/research/brain_coder/single_task/misc.py deleted file mode 100644 index 07061d81c8aaafd4d97efc11ecca451528c6e9dd..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/misc.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Utilities specific to this project.""" - -from collections import namedtuple -from six import string_types - - -##################### -# BF-lang utilities # -##################### - - -BF_EOS_INT = 0 # Also used as SOS (start of sequence). -BF_EOS_CHAR = TEXT_EOS_CHAR = '_' -BF_LANG_INTS = range(1, 9) -BF_INT_TO_CHAR = [BF_EOS_CHAR, '>', '<', '+', '-', '[', ']', '.', ','] -BF_CHAR_TO_INT = dict([(c, i) for i, c in enumerate(BF_INT_TO_CHAR)]) - - -RewardInfo = namedtuple('RewardInfo', ['episode_rewards', 'input_case', - 'correct_output', - 'code_output', 'reason', 'input_type', - 'output_type']) - - -class IOType(object): - string = 'string' - integer = 'integer' - boolean = 'boolean' - - -class IOTuple(tuple): - pass - - -def flatten(lst): - return [item for row in lst for item in row] - - -def bf_num_tokens(): - # BF tokens plus EOS. - return len(BF_INT_TO_CHAR) - - -def bf_char2int(bf_char): - """Convert BF code char to int token.""" - return BF_CHAR_TO_INT[bf_char] - - -def bf_int2char(bf_int): - """Convert BF int token to code char.""" - return BF_INT_TO_CHAR[bf_int] - - -def bf_tokens_to_string(bf_tokens, truncate=True): - """Convert token list to code string. Will truncate at EOS token. - - Args: - bf_tokens: Python list of ints representing the code string. - truncate: If true, the output string will end at the first EOS token. - If false, the entire token list is converted to string. - - Returns: - String representation of the tokens. - - Raises: - ValueError: If bf_tokens is not a python list. - """ - if not isinstance(bf_tokens, list): - raise ValueError('Only python list supported here.') - if truncate: - try: - eos_index = bf_tokens.index(BF_EOS_INT) - except ValueError: - eos_index = len(bf_tokens) - else: - eos_index = len(bf_tokens) - return ''.join([BF_INT_TO_CHAR[t] for t in bf_tokens[:eos_index]]) - - -def bf_string_to_tokens(bf_string): - """Convert string to token list. Will strip and append EOS token.""" - tokens = [BF_CHAR_TO_INT[char] for char in bf_string.strip()] - tokens.append(BF_EOS_INT) - return tokens - - -def tokens_to_text(tokens): - """Convert token list to human readable text.""" - return ''.join( - [TEXT_EOS_CHAR if t == 0 else chr(t - 1 + ord('A')) for t in tokens]) - - -################################### -# Number representation utilities # -################################### - - -# https://en.wikipedia.org/wiki/Metric_prefix -si_magnitudes = { - 'k': 1e3, - 'm': 1e6, - 'g': 1e9} - - -def si_to_int(s): - """Convert string ending with SI magnitude to int. - - Examples: 5K ==> 5000, 12M ==> 12000000. - - Args: - s: String in the form 'xx..xP' where x is a digit and P is an SI prefix. - - Returns: - Integer equivalent to the string. - """ - if isinstance(s, string_types) and s[-1].lower() in si_magnitudes.keys(): - return int(int(s[:-1]) * si_magnitudes[s[-1].lower()]) - return int(s) - - -def int_to_si(n): - """Convert integer to string with SI magnitude. - - `n` will be truncated. - - Examples: 5432 ==> 5k, 12345678 ==> 12M - - Args: - n: Integer to represent as a string. - - Returns: - String representation of `n` containing SI magnitude. - """ - m = abs(n) - sign = -1 if n < 0 else 1 - if m < 1e3: - return str(n) - if m < 1e6: - return '{0}K'.format(sign*int(m / 1e3)) - if m < 1e9: - return '{0}M'.format(sign*int(m / 1e6)) - if m < 1e12: - return '{0}G'.format(sign*int(m / 1e9)) - return str(m) - diff --git a/research/brain_coder/single_task/pg_agent.py b/research/brain_coder/single_task/pg_agent.py deleted file mode 100644 index 13fc7da2dc89a1fbcc7fa5efbbce87008580aa92..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/pg_agent.py +++ /dev/null @@ -1,1297 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Language model agent. - -Agent outputs code in a sequence just like a language model. Can be trained -as a language model or using RL, or a combination of the two. -""" - -from collections import namedtuple -from math import exp -from math import log -import time - -from absl import logging -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from common import rollout as rollout_lib # brain coder -from common import utils # brain coder -from single_task import misc # brain coder - - -# Experiments in the ICLR 2018 paper used reduce_sum instead of reduce_mean for -# some losses. We make all loses be batch_size independent, and multiply the -# changed losses by 64, which was the fixed batch_size when the experiments -# where run. The loss hyperparameters still match what is reported in the paper. -MAGIC_LOSS_MULTIPLIER = 64 - - -def rshift_time(tensor_2d, fill=misc.BF_EOS_INT): - """Right shifts a 2D tensor along the time dimension (axis-1).""" - dim_0 = tf.shape(tensor_2d)[0] - fill_tensor = tf.fill([dim_0, 1], fill) - return tf.concat([fill_tensor, tensor_2d[:, :-1]], axis=1) - - -def join(a, b): - # Concat a and b along 0-th dim. - if a is None or len(a) == 0: # pylint: disable=g-explicit-length-test - return b - if b is None or len(b) == 0: # pylint: disable=g-explicit-length-test - return a - return np.concatenate((a, b)) - - -def make_optimizer(kind, lr): - if kind == 'sgd': - return tf.train.GradientDescentOptimizer(lr) - elif kind == 'adam': - return tf.train.AdamOptimizer(lr) - elif kind == 'rmsprop': - return tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.99) - else: - raise ValueError('Optimizer type "%s" not recognized.' % kind) - - -class LinearWrapper(tf.contrib.rnn.RNNCell): - """RNNCell wrapper that adds a linear layer to the output.""" - - def __init__(self, cell, output_size, dtype=tf.float32, suppress_index=None): - self.cell = cell - self._output_size = output_size - self._dtype = dtype - self._suppress_index = suppress_index - self.smallest_float = -2.4e38 - - def __call__(self, inputs, state, scope=None): - with tf.variable_scope(type(self).__name__): - outputs, state = self.cell(inputs, state, scope=scope) - logits = tf.matmul( - outputs, - tf.get_variable('w_output', - [self.cell.output_size, self.output_size], - dtype=self._dtype)) - if self._suppress_index is not None: - # Replace the target index with -inf, so that it never gets selected. - batch_size = tf.shape(logits)[0] - logits = tf.concat( - [logits[:, :self._suppress_index], - tf.fill([batch_size, 1], self.smallest_float), - logits[:, self._suppress_index + 1:]], - axis=1) - - return logits, state - - @property - def output_size(self): - return self._output_size - - @property - def state_size(self): - return self.cell.state_size - - def zero_state(self, batch_size, dtype): - return self.cell.zero_state(batch_size, dtype) - - -UpdateStepResult = namedtuple( - 'UpdateStepResult', - ['global_step', 'global_npe', 'summaries_list', 'gradients_dict']) - - -class AttrDict(dict): - """Dict with attributes as keys. - - https://stackoverflow.com/a/14620633 - """ - - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - - -class LMAgent(object): - """Language model agent.""" - action_space = misc.bf_num_tokens() - observation_space = misc.bf_num_tokens() - - def __init__(self, global_config, task_id=0, - logging_file=None, - experience_replay_file=None, - global_best_reward_fn=None, - found_solution_op=None, - assign_code_solution_fn=None, - program_count=None, - do_iw_summaries=False, - stop_on_success=True, - dtype=tf.float32, - verbose_level=0, - is_local=True): - self.config = config = global_config.agent - self.logging_file = logging_file - self.experience_replay_file = experience_replay_file - self.task_id = task_id - self.verbose_level = verbose_level - self.global_best_reward_fn = global_best_reward_fn - self.found_solution_op = found_solution_op - self.assign_code_solution_fn = assign_code_solution_fn - self.parent_scope_name = tf.get_variable_scope().name - self.dtype = dtype - self.allow_eos_token = config.eos_token - self.stop_on_success = stop_on_success - self.pi_loss_hparam = config.pi_loss_hparam - self.vf_loss_hparam = config.vf_loss_hparam - self.is_local = is_local - - self.top_reward = 0.0 - self.embeddings_trainable = True - - self.no_op = tf.no_op() - - self.learning_rate = tf.constant( - config.lr, dtype=dtype, name='learning_rate') - self.initializer = tf.contrib.layers.variance_scaling_initializer( - factor=config.param_init_factor, - mode='FAN_AVG', - uniform=True, - dtype=dtype) # TF's default initializer. - tf.get_variable_scope().set_initializer(self.initializer) - - self.a2c = config.ema_baseline_decay == 0 - if not self.a2c: - logging.info('Using exponential moving average REINFORCE baselines.') - self.ema_baseline_decay = config.ema_baseline_decay - self.ema_by_len = [0.0] * global_config.timestep_limit - else: - logging.info('Using advantage (a2c) with learned value function.') - self.ema_baseline_decay = 0.0 - self.ema_by_len = None - - # Top-k - if config.topk and config.topk_loss_hparam: - self.topk_loss_hparam = config.topk_loss_hparam - self.topk_batch_size = config.topk_batch_size - if self.topk_batch_size <= 0: - raise ValueError('topk_batch_size must be a positive integer. Got %s', - self.topk_batch_size) - self.top_episodes = utils.MaxUniquePriorityQueue(config.topk) - logging.info('Made max-priorty-queue with capacity %d', - self.top_episodes.capacity) - else: - self.top_episodes = None - self.topk_loss_hparam = 0.0 - logging.info('No max-priorty-queue') - - # Experience replay. - self.replay_temperature = config.replay_temperature - self.num_replay_per_batch = int(global_config.batch_size * config.alpha) - self.num_on_policy_per_batch = ( - global_config.batch_size - self.num_replay_per_batch) - self.replay_alpha = ( - self.num_replay_per_batch / float(global_config.batch_size)) - logging.info('num_replay_per_batch: %d', self.num_replay_per_batch) - logging.info('num_on_policy_per_batch: %d', self.num_on_policy_per_batch) - logging.info('replay_alpha: %s', self.replay_alpha) - if self.num_replay_per_batch > 0: - # Train with off-policy episodes from replay buffer. - start_time = time.time() - self.experience_replay = utils.RouletteWheel( - unique_mode=True, save_file=experience_replay_file) - logging.info('Took %s sec to load replay buffer from disk.', - int(time.time() - start_time)) - logging.info('Replay buffer file location: "%s"', - self.experience_replay.save_file) - else: - # Only train on-policy. - self.experience_replay = None - - if program_count is not None: - self.program_count = program_count - self.program_count_add_ph = tf.placeholder( - tf.int64, [], 'program_count_add_ph') - self.program_count_add_op = self.program_count.assign_add( - self.program_count_add_ph) - - ################################ - # RL policy and value networks # - ################################ - batch_size = global_config.batch_size - logging.info('batch_size: %d', batch_size) - - self.policy_cell = LinearWrapper( - tf.contrib.rnn.MultiRNNCell( - [tf.contrib.rnn.BasicLSTMCell(cell_size) - for cell_size in config.policy_lstm_sizes]), - self.action_space, - dtype=dtype, - suppress_index=None if self.allow_eos_token else misc.BF_EOS_INT) - self.value_cell = LinearWrapper( - tf.contrib.rnn.MultiRNNCell( - [tf.contrib.rnn.BasicLSTMCell(cell_size) - for cell_size in config.value_lstm_sizes]), - 1, - dtype=dtype) - - obs_embedding_scope = 'obs_embed' - with tf.variable_scope( - obs_embedding_scope, - initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0)): - obs_embeddings = tf.get_variable( - 'embeddings', - [self.observation_space, config.obs_embedding_size], - dtype=dtype, trainable=self.embeddings_trainable) - self.obs_embeddings = obs_embeddings - - ################################ - # RL policy and value networks # - ################################ - - initial_state = tf.fill([batch_size], misc.BF_EOS_INT) - def loop_fn(loop_time, cell_output, cell_state, loop_state): - """Function called by tf.nn.raw_rnn to instantiate body of the while_loop. - - See https://www.tensorflow.org/api_docs/python/tf/nn/raw_rnn for more - information. - - When time is 0, and cell_output, cell_state, loop_state are all None, - `loop_fn` will create the initial input, internal cell state, and loop - state. When time > 0, `loop_fn` will operate on previous cell output, - state, and loop state. - - Args: - loop_time: A scalar tensor holding the current timestep (zero based - counting). - cell_output: Output of the raw_rnn cell at the current timestep. - cell_state: Cell internal state at the current timestep. - loop_state: Additional loop state. These tensors were returned by the - previous call to `loop_fn`. - - Returns: - elements_finished: Bool tensor of shape [batch_size] which marks each - sequence in the batch as being finished or not finished. - next_input: A tensor containing input to be fed into the cell at the - next timestep. - next_cell_state: Cell internal state to be fed into the cell at the - next timestep. - emit_output: Tensor to be added to the TensorArray returned by raw_rnn - as output from the while_loop. - next_loop_state: Additional loop state. These tensors will be fed back - into the next call to `loop_fn` as `loop_state`. - """ - if cell_output is None: # 0th time step. - next_cell_state = self.policy_cell.zero_state(batch_size, dtype) - elements_finished = tf.zeros([batch_size], tf.bool) - output_lengths = tf.ones([batch_size], dtype=tf.int32) - next_input = tf.gather(obs_embeddings, initial_state) - emit_output = None - next_loop_state = ( - tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True), - output_lengths, - elements_finished - ) - else: - scaled_logits = cell_output * config.softmax_tr # Scale temperature. - prev_chosen, prev_output_lengths, prev_elements_finished = loop_state - next_cell_state = cell_state - chosen_outputs = tf.to_int32(tf.where( - tf.logical_not(prev_elements_finished), - tf.multinomial(logits=scaled_logits, num_samples=1)[:, 0], - tf.zeros([batch_size], dtype=tf.int64))) - elements_finished = tf.logical_or( - tf.equal(chosen_outputs, misc.BF_EOS_INT), - loop_time >= global_config.timestep_limit) - output_lengths = tf.where( - elements_finished, - prev_output_lengths, - # length includes EOS token. empty seq has len 1. - tf.tile(tf.expand_dims(loop_time + 1, 0), [batch_size]) - ) - next_input = tf.gather(obs_embeddings, chosen_outputs) - emit_output = scaled_logits - next_loop_state = (prev_chosen.write(loop_time - 1, chosen_outputs), - output_lengths, - tf.logical_or(prev_elements_finished, - elements_finished)) - return (elements_finished, next_input, next_cell_state, emit_output, - next_loop_state) - - with tf.variable_scope('policy'): - (decoder_outputs_ta, - _, # decoder_state - (sampled_output_ta, output_lengths, _)) = tf.nn.raw_rnn( - cell=self.policy_cell, - loop_fn=loop_fn) - policy_logits = tf.transpose(decoder_outputs_ta.stack(), (1, 0, 2), - name='policy_logits') - sampled_tokens = tf.transpose(sampled_output_ta.stack(), (1, 0), - name='sampled_tokens') - # Add SOS to beginning of the sequence. - rshift_sampled_tokens = rshift_time(sampled_tokens, fill=misc.BF_EOS_INT) - - # Initial state is 0, 2nd state is first token. - # Note: If value of last state is computed, this will be used as bootstrap. - if self.a2c: - with tf.variable_scope('value'): - value_output, _ = tf.nn.dynamic_rnn( - self.value_cell, - tf.gather(obs_embeddings, rshift_sampled_tokens), - sequence_length=output_lengths, - dtype=dtype) - value = tf.squeeze(value_output, axis=[2]) - else: - value = tf.zeros([], dtype=dtype) - - # for sampling actions from the agent, and which told tensors for doing - # gradient updates on the agent. - self.sampled_batch = AttrDict( - logits=policy_logits, - value=value, - tokens=sampled_tokens, - episode_lengths=output_lengths, - probs=tf.nn.softmax(policy_logits), - log_probs=tf.nn.log_softmax(policy_logits)) - - # adjusted_lengths can be less than the full length of each episode. - # Use this to train on only part of an episode (starting from t=0). - self.adjusted_lengths = tf.placeholder( - tf.int32, [None], name='adjusted_lengths') - self.policy_multipliers = tf.placeholder( - dtype, - [None, None], - name='policy_multipliers') - # Empirical value, i.e. discounted sum of observed future rewards from each - # time step in the episode. - self.empirical_values = tf.placeholder( - dtype, - [None, None], - name='empirical_values') - - # Off-policy training. Just add supervised loss to the RL loss. - self.off_policy_targets = tf.placeholder( - tf.int32, - [None, None], - name='off_policy_targets') - self.off_policy_target_lengths = tf.placeholder( - tf.int32, [None], name='off_policy_target_lengths') - - self.actions = tf.placeholder(tf.int32, [None, None], name='actions') - # Add SOS to beginning of the sequence. - inputs = rshift_time(self.actions, fill=misc.BF_EOS_INT) - with tf.variable_scope('policy', reuse=True): - logits, _ = tf.nn.dynamic_rnn( - self.policy_cell, tf.gather(obs_embeddings, inputs), - sequence_length=self.adjusted_lengths, - dtype=dtype) - - if self.a2c: - with tf.variable_scope('value', reuse=True): - value_output, _ = tf.nn.dynamic_rnn( - self.value_cell, - tf.gather(obs_embeddings, inputs), - sequence_length=self.adjusted_lengths, - dtype=dtype) - value2 = tf.squeeze(value_output, axis=[2]) - else: - value2 = tf.zeros([], dtype=dtype) - - self.given_batch = AttrDict( - logits=logits, - value=value2, - tokens=sampled_tokens, - episode_lengths=self.adjusted_lengths, - probs=tf.nn.softmax(logits), - log_probs=tf.nn.log_softmax(logits)) - - # Episode masks. - max_episode_length = tf.shape(self.actions)[1] - # range_row shape: [1, max_episode_length] - range_row = tf.expand_dims(tf.range(max_episode_length), 0) - episode_masks = tf.cast( - tf.less(range_row, tf.expand_dims(self.given_batch.episode_lengths, 1)), - dtype=dtype) - episode_masks_3d = tf.expand_dims(episode_masks, 2) - - # Length adjusted episodes. - self.a_probs = a_probs = self.given_batch.probs * episode_masks_3d - self.a_log_probs = a_log_probs = ( - self.given_batch.log_probs * episode_masks_3d) - self.a_value = a_value = self.given_batch.value * episode_masks - self.a_policy_multipliers = a_policy_multipliers = ( - self.policy_multipliers * episode_masks) - if self.a2c: - self.a_empirical_values = a_empirical_values = ( - self.empirical_values * episode_masks) - - # pi_loss is scalar - acs_onehot = tf.one_hot(self.actions, self.action_space, dtype=dtype) - self.acs_onehot = acs_onehot - chosen_masked_log_probs = acs_onehot * a_log_probs - pi_target = tf.expand_dims(a_policy_multipliers, -1) - pi_loss_per_step = chosen_masked_log_probs * pi_target # Maximize. - self.pi_loss = pi_loss = ( - -tf.reduce_mean(tf.reduce_sum(pi_loss_per_step, axis=[1, 2]), axis=0) - * MAGIC_LOSS_MULTIPLIER) # Minimize. - assert len(self.pi_loss.shape) == 0 # pylint: disable=g-explicit-length-test - - # shape: [batch_size, time] - self.chosen_log_probs = tf.reduce_sum(chosen_masked_log_probs, axis=2) - self.chosen_probs = tf.reduce_sum(acs_onehot * a_probs, axis=2) - - # loss of value function - if self.a2c: - vf_loss_per_step = tf.square(a_value - a_empirical_values) - self.vf_loss = vf_loss = ( - tf.reduce_mean(tf.reduce_sum(vf_loss_per_step, axis=1), axis=0) - * MAGIC_LOSS_MULTIPLIER) # Minimize. - assert len(self.vf_loss.shape) == 0 # pylint: disable=g-explicit-length-test - else: - self.vf_loss = vf_loss = 0.0 - - # Maximize entropy regularizer - self.entropy = entropy = ( - -tf.reduce_mean( - tf.reduce_sum(a_probs * a_log_probs, axis=[1, 2]), axis=0) - * MAGIC_LOSS_MULTIPLIER) # Maximize - self.negentropy = -entropy # Minimize negentropy. - assert len(self.negentropy.shape) == 0 # pylint: disable=g-explicit-length-test - - # off-policy loss - self.offp_switch = tf.placeholder(dtype, [], name='offp_switch') - if self.top_episodes is not None: - # Add SOS to beginning of the sequence. - offp_inputs = tf.gather(obs_embeddings, - rshift_time(self.off_policy_targets, - fill=misc.BF_EOS_INT)) - with tf.variable_scope('policy', reuse=True): - offp_logits, _ = tf.nn.dynamic_rnn( - self.policy_cell, offp_inputs, self.off_policy_target_lengths, - dtype=dtype) # shape: [batch_size, time, action_space] - topk_loss_per_step = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=self.off_policy_targets, - logits=offp_logits, - name='topk_loss_per_logit') - # Take mean over batch dimension so that the loss multiplier strength is - # independent of batch size. Sum over time dimension. - topk_loss = tf.reduce_mean( - tf.reduce_sum(topk_loss_per_step, axis=1), axis=0) - assert len(topk_loss.shape) == 0 # pylint: disable=g-explicit-length-test - self.topk_loss = topk_loss * self.offp_switch - logging.info('Including off policy loss.') - else: - self.topk_loss = topk_loss = 0.0 - - self.entropy_hparam = tf.constant( - config.entropy_beta, dtype=dtype, name='entropy_beta') - - self.pi_loss_term = pi_loss * self.pi_loss_hparam - self.vf_loss_term = vf_loss * self.vf_loss_hparam - self.entropy_loss_term = self.negentropy * self.entropy_hparam - self.topk_loss_term = self.topk_loss_hparam * topk_loss - self.loss = ( - self.pi_loss_term - + self.vf_loss_term - + self.entropy_loss_term - + self.topk_loss_term) - - params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, - tf.get_variable_scope().name) - self.trainable_variables = params - self.sync_variables = self.trainable_variables - non_embedding_params = [p for p in params - if obs_embedding_scope not in p.name] - self.non_embedding_params = non_embedding_params - self.params = params - - if config.regularizer: - logging.info('Adding L2 regularizer with scale %.2f.', - config.regularizer) - self.regularizer = config.regularizer * sum( - tf.nn.l2_loss(w) for w in non_embedding_params) - self.loss += self.regularizer - else: - logging.info('Skipping regularizer.') - self.regularizer = 0.0 - - # Only build gradients graph for local model. - if self.is_local: - unclipped_grads = tf.gradients(self.loss, params) - self.dense_unclipped_grads = [ - tf.convert_to_tensor(g) for g in unclipped_grads] - self.grads, self.global_grad_norm = tf.clip_by_global_norm( - unclipped_grads, config.grad_clip_threshold) - self.gradients_dict = dict(zip(params, self.grads)) - self.optimizer = make_optimizer(config.optimizer, self.learning_rate) - self.all_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, - tf.get_variable_scope().name) - - self.do_iw_summaries = do_iw_summaries - if self.do_iw_summaries: - b = None - self.log_iw_replay_ph = tf.placeholder(tf.float32, [b], - 'log_iw_replay_ph') - self.log_iw_policy_ph = tf.placeholder(tf.float32, [b], - 'log_iw_policy_ph') - self.log_prob_replay_ph = tf.placeholder(tf.float32, [b], - 'log_prob_replay_ph') - self.log_prob_policy_ph = tf.placeholder(tf.float32, [b], - 'log_prob_policy_ph') - self.log_norm_replay_weights_ph = tf.placeholder( - tf.float32, [b], 'log_norm_replay_weights_ph') - self.iw_summary_op = tf.summary.merge([ - tf.summary.histogram('is/log_iw_replay', self.log_iw_replay_ph), - tf.summary.histogram('is/log_iw_policy', self.log_iw_policy_ph), - tf.summary.histogram('is/log_prob_replay', self.log_prob_replay_ph), - tf.summary.histogram('is/log_prob_policy', self.log_prob_policy_ph), - tf.summary.histogram( - 'is/log_norm_replay_weights', self.log_norm_replay_weights_ph), - ]) - - def make_summary_ops(self): - """Construct summary ops for the model.""" - # size = number of timesteps across entire batch. Number normalized by size - # will not be affected by the amount of padding at the ends of sequences - # in the batch. - size = tf.cast( - tf.reduce_sum(self.given_batch.episode_lengths), dtype=self.dtype) - offp_size = tf.cast(tf.reduce_sum(self.off_policy_target_lengths), - dtype=self.dtype) - scope_prefix = self.parent_scope_name - - def _remove_prefix(prefix, name): - assert name.startswith(prefix) - return name[len(prefix):] - - # RL summaries. - self.rl_summary_op = tf.summary.merge( - [tf.summary.scalar('model/policy_loss', self.pi_loss / size), - tf.summary.scalar('model/value_loss', self.vf_loss / size), - tf.summary.scalar('model/topk_loss', self.topk_loss / offp_size), - tf.summary.scalar('model/entropy', self.entropy / size), - tf.summary.scalar('model/loss', self.loss / size), - tf.summary.scalar('model/grad_norm', - tf.global_norm(self.grads)), - tf.summary.scalar('model/unclipped_grad_norm', self.global_grad_norm), - tf.summary.scalar('model/non_embedding_var_norm', - tf.global_norm(self.non_embedding_params)), - tf.summary.scalar('hparams/entropy_beta', self.entropy_hparam), - tf.summary.scalar('hparams/topk_loss_hparam', self.topk_loss_hparam), - tf.summary.scalar('hparams/learning_rate', self.learning_rate), - tf.summary.scalar('model/trainable_var_norm', - tf.global_norm(self.trainable_variables)), - tf.summary.scalar('loss/loss', self.loss), - tf.summary.scalar('loss/entropy', self.entropy_loss_term), - tf.summary.scalar('loss/vf', self.vf_loss_term), - tf.summary.scalar('loss/policy', self.pi_loss_term), - tf.summary.scalar('loss/offp', self.topk_loss_term)] + - [tf.summary.scalar( - 'param_norms/' + _remove_prefix(scope_prefix + '/', p.name), - tf.norm(p)) - for p in self.params] + - [tf.summary.scalar( - 'grad_norms/' + _remove_prefix(scope_prefix + '/', p.name), - tf.norm(g)) - for p, g in zip(self.params, self.grads)] + - [tf.summary.scalar( - 'unclipped_grad_norms/' + _remove_prefix(scope_prefix + '/', - p.name), - tf.norm(g)) - for p, g in zip(self.params, self.dense_unclipped_grads)]) - - self.text_summary_placeholder = tf.placeholder(tf.string, shape=[]) - self.rl_text_summary_op = tf.summary.text('rl', - self.text_summary_placeholder) - - def _rl_text_summary(self, session, step, npe, tot_r, num_steps, - input_case, code_output, code, reason): - """Logs summary about a single episode and creates a text_summary for TB. - - Args: - session: tf.Session instance. - step: Global training step. - npe: Number of programs executed so far. - tot_r: Total reward. - num_steps: Number of timesteps in the episode (i.e. code length). - input_case: Inputs for test cases. - code_output: Outputs produced by running the code on the inputs. - code: String representation of the code. - reason: Reason for the reward assigned by the task. - - Returns: - Serialized text summary data for tensorboard. - """ - if not input_case: - input_case = ' ' - if not code_output: - code_output = ' ' - if not code: - code = ' ' - text = ( - 'Tot R: **%.2f**; Len: **%d**; Reason: **%s**\n\n' - 'Input: **`%s`**; Output: **`%s`**\n\nCode: **`%s`**' - % (tot_r, num_steps, reason, input_case, code_output, code)) - text_summary = session.run(self.rl_text_summary_op, - {self.text_summary_placeholder: text}) - logging.info( - 'Step %d.\t NPE: %d\t Reason: %s.\t Tot R: %.2f.\t Length: %d. ' - '\tInput: %s \tOutput: %s \tProgram: %s', - step, npe, reason, tot_r, num_steps, input_case, - code_output, code) - return text_summary - - def _rl_reward_summary(self, total_rewards): - """Create summary ops that report on episode rewards. - - Creates summaries for average, median, max, and min rewards in the batch. - - Args: - total_rewards: Tensor of shape [batch_size] containing the total reward - from each episode in the batch. - - Returns: - tf.Summary op. - """ - tr = np.asarray(total_rewards) - reward_summary = tf.Summary(value=[ - tf.Summary.Value( - tag='reward/avg', - simple_value=np.mean(tr)), - tf.Summary.Value( - tag='reward/med', - simple_value=np.median(tr)), - tf.Summary.Value( - tag='reward/max', - simple_value=np.max(tr)), - tf.Summary.Value( - tag='reward/min', - simple_value=np.min(tr))]) - return reward_summary - - def _iw_summary(self, session, replay_iw, replay_log_probs, - norm_replay_weights, on_policy_iw, - on_policy_log_probs): - """Compute summaries for importance weights at a given batch. - - Args: - session: tf.Session instance. - replay_iw: Importance weights for episodes from replay buffer. - replay_log_probs: Total log probabilities of the replay episodes under the - current policy. - norm_replay_weights: Normalized replay weights, i.e. values in `replay_iw` - divided by the total weight in the entire replay buffer. Note, this is - also the probability of selecting each episode from the replay buffer - (in a roulette wheel replay buffer). - on_policy_iw: Importance weights for episodes sampled from the current - policy. - on_policy_log_probs: Total log probabilities of the on-policy episodes - under the current policy. - - Returns: - Serialized TF summaries. Use a summary writer to write these summaries to - disk. - """ - return session.run( - self.iw_summary_op, - {self.log_iw_replay_ph: np.log(replay_iw), - self.log_iw_policy_ph: np.log(on_policy_iw), - self.log_norm_replay_weights_ph: np.log(norm_replay_weights), - self.log_prob_replay_ph: replay_log_probs, - self.log_prob_policy_ph: on_policy_log_probs}) - - def _compute_iw(self, policy_log_probs, replay_weights): - """Compute importance weights for a batch of episodes. - - Arguments are iterables of length batch_size. - - Args: - policy_log_probs: Log probability of each episode under the current - policy. - replay_weights: Weight of each episode in the replay buffer. 0 for - episodes not sampled from the replay buffer (i.e. sampled from the - policy). - - Returns: - Numpy array of shape [batch_size] containing the importance weight for - each episode in the batch. - """ - log_total_replay_weight = log(self.experience_replay.total_weight) - - # importance weight - # = 1 / [(1 - a) + a * exp(log(replay_weight / total_weight / p))] - # = 1 / ((1-a) + a*q/p) - a = float(self.replay_alpha) - a_com = 1.0 - a # compliment of a - importance_weights = np.asarray( - [1.0 / (a_com - + a * exp((log(replay_weight) - log_total_replay_weight) - - log_p)) - if replay_weight > 0 else 1.0 / a_com - for log_p, replay_weight - in zip(policy_log_probs, replay_weights)]) - return importance_weights - - def update_step(self, session, rl_batch, train_op, global_step_op, - return_gradients=False): - """Perform gradient update on the model. - - Args: - session: tf.Session instance. - rl_batch: RLBatch instance from data.py. Use DataManager to create a - RLBatch for each call to update_step. RLBatch contains a batch of - tasks. - train_op: A TF op which will perform the gradient update. LMAgent does not - own its training op, so that trainers can do distributed training - and construct a specialized training op. - global_step_op: A TF op which will return the current global step when - run (should not increment it). - return_gradients: If True, the gradients will be saved and returned from - this method call. This is useful for testing. - - Returns: - Results from the update step in a UpdateStepResult namedtuple, including - global step, global NPE, serialized summaries, and optionally gradients. - """ - assert self.is_local - - # Do update for REINFORCE or REINFORCE + replay buffer. - if self.experience_replay is None: - # Train with on-policy REINFORCE. - - # Sample new programs from the policy. - num_programs_from_policy = rl_batch.batch_size - (batch_actions, - batch_values, - episode_lengths) = session.run( - [self.sampled_batch.tokens, self.sampled_batch.value, - self.sampled_batch.episode_lengths]) - if episode_lengths.size == 0: - # This should not happen. - logging.warn( - 'Shapes:\n' - 'batch_actions.shape: %s\n' - 'batch_values.shape: %s\n' - 'episode_lengths.shape: %s\n', - batch_actions.shape, batch_values.shape, episode_lengths.shape) - - # Compute rewards. - code_scores = compute_rewards( - rl_batch, batch_actions, episode_lengths) - code_strings = code_scores.code_strings - batch_tot_r = code_scores.total_rewards - test_cases = code_scores.test_cases - code_outputs = code_scores.code_outputs - reasons = code_scores.reasons - - # Process on-policy samples. - batch_targets, batch_returns = process_episodes( - code_scores.batch_rewards, episode_lengths, a2c=self.a2c, - baselines=self.ema_by_len, - batch_values=batch_values) - batch_policy_multipliers = batch_targets - batch_emp_values = batch_returns if self.a2c else [[]] - adjusted_lengths = episode_lengths - - if self.top_episodes: - assert len(self.top_episodes) > 0 # pylint: disable=g-explicit-length-test - off_policy_targets = [ - item for item, _ - in self.top_episodes.random_sample(self.topk_batch_size)] - off_policy_target_lengths = [len(t) for t in off_policy_targets] - off_policy_targets = utils.stack_pad(off_policy_targets, pad_axes=0, - dtype=np.int32) - offp_switch = 1 - else: - off_policy_targets = [[0]] - off_policy_target_lengths = [1] - offp_switch = 0 - - fetches = { - 'global_step': global_step_op, - 'program_count': self.program_count, - 'summaries': self.rl_summary_op, - 'train_op': train_op, - 'gradients': self.gradients_dict if return_gradients else self.no_op} - fetched = session.run( - fetches, - {self.actions: batch_actions, - self.empirical_values: batch_emp_values, - self.policy_multipliers: batch_policy_multipliers, - self.adjusted_lengths: adjusted_lengths, - self.off_policy_targets: off_policy_targets, - self.off_policy_target_lengths: off_policy_target_lengths, - self.offp_switch: offp_switch}) - - combined_adjusted_lengths = adjusted_lengths - combined_returns = batch_returns - else: - # Train with REINFORCE + off-policy replay buffer by using importance - # sampling. - - # Sample new programs from the policy. - # Note: batch size is constant. A full batch will be sampled, but not all - # programs will be executed and added to the replay buffer. Those which - # are not executed will be discarded and not counted. - batch_actions, batch_values, episode_lengths, log_probs = session.run( - [self.sampled_batch.tokens, self.sampled_batch.value, - self.sampled_batch.episode_lengths, self.sampled_batch.log_probs]) - if episode_lengths.size == 0: - # This should not happen. - logging.warn( - 'Shapes:\n' - 'batch_actions.shape: %s\n' - 'batch_values.shape: %s\n' - 'episode_lengths.shape: %s\n', - batch_actions.shape, batch_values.shape, episode_lengths.shape) - - # Sample from experince replay buffer - empty_replay_buffer = ( - self.experience_replay.is_empty() - if self.experience_replay is not None else True) - num_programs_from_replay_buff = ( - self.num_replay_per_batch if not empty_replay_buffer else 0) - num_programs_from_policy = ( - rl_batch.batch_size - num_programs_from_replay_buff) - if (not empty_replay_buffer) and num_programs_from_replay_buff: - result = self.experience_replay.sample_many( - num_programs_from_replay_buff) - experience_samples, replay_weights = zip(*result) - (replay_actions, - replay_rewards, - _, # log probs - replay_adjusted_lengths) = zip(*experience_samples) - - replay_batch_actions = utils.stack_pad(replay_actions, pad_axes=0, - dtype=np.int32) - - # compute log probs for replay samples under current policy - all_replay_log_probs, = session.run( - [self.given_batch.log_probs], - {self.actions: replay_batch_actions, - self.adjusted_lengths: replay_adjusted_lengths}) - replay_log_probs = [ - np.choose(replay_actions[i], all_replay_log_probs[i, :l].T).sum() - for i, l in enumerate(replay_adjusted_lengths)] - else: - # Replay buffer is empty. Do not sample from it. - replay_actions = None - replay_policy_multipliers = None - replay_adjusted_lengths = None - replay_log_probs = None - replay_weights = None - replay_returns = None - on_policy_weights = [0] * num_programs_from_replay_buff - - assert not self.a2c # TODO(danabo): Support A2C with importance sampling. - - # Compute rewards. - code_scores = compute_rewards( - rl_batch, batch_actions, episode_lengths, - batch_size=num_programs_from_policy) - code_strings = code_scores.code_strings - batch_tot_r = code_scores.total_rewards - test_cases = code_scores.test_cases - code_outputs = code_scores.code_outputs - reasons = code_scores.reasons - - # Process on-policy samples. - p = num_programs_from_policy - batch_targets, batch_returns = process_episodes( - code_scores.batch_rewards, episode_lengths[:p], a2c=False, - baselines=self.ema_by_len) - batch_policy_multipliers = batch_targets - batch_emp_values = [[]] - on_policy_returns = batch_returns - - # Process off-policy samples. - if (not empty_replay_buffer) and num_programs_from_replay_buff: - offp_batch_rewards = [ - [0.0] * (l - 1) + [r] - for l, r in zip(replay_adjusted_lengths, replay_rewards)] - assert len(offp_batch_rewards) == num_programs_from_replay_buff - assert len(replay_adjusted_lengths) == num_programs_from_replay_buff - replay_batch_targets, replay_returns = process_episodes( - offp_batch_rewards, replay_adjusted_lengths, a2c=False, - baselines=self.ema_by_len) - # Convert 2D array back into ragged 2D list. - replay_policy_multipliers = [ - replay_batch_targets[i, :l] - for i, l - in enumerate( - replay_adjusted_lengths[:num_programs_from_replay_buff])] - - adjusted_lengths = episode_lengths[:num_programs_from_policy] - - if self.top_episodes: - assert len(self.top_episodes) > 0 # pylint: disable=g-explicit-length-test - off_policy_targets = [ - item for item, _ - in self.top_episodes.random_sample(self.topk_batch_size)] - off_policy_target_lengths = [len(t) for t in off_policy_targets] - off_policy_targets = utils.stack_pad(off_policy_targets, pad_axes=0, - dtype=np.int32) - offp_switch = 1 - else: - off_policy_targets = [[0]] - off_policy_target_lengths = [1] - offp_switch = 0 - - # On-policy episodes. - if num_programs_from_policy: - separate_actions = [ - batch_actions[i, :l] - for i, l in enumerate(adjusted_lengths)] - chosen_log_probs = [ - np.choose(separate_actions[i], log_probs[i, :l].T) - for i, l in enumerate(adjusted_lengths)] - new_experiences = [ - (separate_actions[i], - batch_tot_r[i], - chosen_log_probs[i].sum(), l) - for i, l in enumerate(adjusted_lengths)] - on_policy_policy_multipliers = [ - batch_policy_multipliers[i, :l] - for i, l in enumerate(adjusted_lengths)] - (on_policy_actions, - _, # rewards - on_policy_log_probs, - on_policy_adjusted_lengths) = zip(*new_experiences) - else: - new_experiences = [] - on_policy_policy_multipliers = [] - on_policy_actions = [] - on_policy_log_probs = [] - on_policy_adjusted_lengths = [] - - if (not empty_replay_buffer) and num_programs_from_replay_buff: - # Look for new experiences in replay buffer. Assign weight if an episode - # is in the buffer. - on_policy_weights = [0] * num_programs_from_policy - for i, cs in enumerate(code_strings): - if self.experience_replay.has_key(cs): - on_policy_weights[i] = self.experience_replay.get_weight(cs) - - # Randomly select on-policy or off policy episodes to train on. - combined_actions = join(replay_actions, on_policy_actions) - combined_policy_multipliers = join( - replay_policy_multipliers, on_policy_policy_multipliers) - combined_adjusted_lengths = join( - replay_adjusted_lengths, on_policy_adjusted_lengths) - combined_returns = join(replay_returns, on_policy_returns) - combined_actions = utils.stack_pad(combined_actions, pad_axes=0) - combined_policy_multipliers = utils.stack_pad(combined_policy_multipliers, - pad_axes=0) - # P - combined_on_policy_log_probs = join(replay_log_probs, on_policy_log_probs) - # Q - # Assume weight is zero for all sequences sampled from the policy. - combined_q_weights = join(replay_weights, on_policy_weights) - - # Importance adjustment. Naive formulation: - # E_{x~p}[f(x)] ~= 1/N sum_{x~p}(f(x)) ~= 1/N sum_{x~q}(f(x) * p(x)/q(x)). - # p(x) is the policy, and q(x) is the off-policy distribution, i.e. replay - # buffer distribution. Importance weight w(x) = p(x) / q(x). - - # Instead of sampling from the replay buffer only, we sample from a - # mixture distribution of the policy and replay buffer. - # We are sampling from the mixture a*q(x) + (1-a)*p(x), where 0 <= a <= 1. - # Thus the importance weight w(x) = p(x) / (a*q(x) + (1-a)*p(x)) - # = 1 / ((1-a) + a*q(x)/p(x)) where q(x) is 0 for x sampled from the - # policy. - # Note: a = self.replay_alpha - if empty_replay_buffer: - # The replay buffer is empty. - # Do no gradient update this step. The replay buffer will have stuff in - # it next time. - combined_policy_multipliers *= 0 - elif not num_programs_from_replay_buff: - combined_policy_multipliers = np.ones([len(combined_actions), 1], - dtype=np.float32) - else: - # If a < 1 compute importance weights - # importance weight - # = 1 / [(1 - a) + a * exp(log(replay_weight / total_weight / p))] - # = 1 / ((1-a) + a*q/p) - importance_weights = self._compute_iw(combined_on_policy_log_probs, - combined_q_weights) - if self.config.iw_normalize: - importance_weights *= ( - float(rl_batch.batch_size) / importance_weights.sum()) - combined_policy_multipliers *= importance_weights.reshape(-1, 1) - - # Train on replay batch, top-k MLE. - assert self.program_count is not None - fetches = { - 'global_step': global_step_op, - 'program_count': self.program_count, - 'summaries': self.rl_summary_op, - 'train_op': train_op, - 'gradients': self.gradients_dict if return_gradients else self.no_op} - fetched = session.run( - fetches, - {self.actions: combined_actions, - self.empirical_values: [[]], # replay_emp_values, - self.policy_multipliers: combined_policy_multipliers, - self.adjusted_lengths: combined_adjusted_lengths, - self.off_policy_targets: off_policy_targets, - self.off_policy_target_lengths: off_policy_target_lengths, - self.offp_switch: offp_switch}) - - # Add to experience replay buffer. - self.experience_replay.add_many( - objs=new_experiences, - weights=[exp(r / self.replay_temperature) for r in batch_tot_r], - keys=code_strings) - - # Update program count. - session.run( - [self.program_count_add_op], - {self.program_count_add_ph: num_programs_from_policy}) - - # Update EMA baselines on the mini-batch which we just did traning on. - if not self.a2c: - for i in xrange(rl_batch.batch_size): - episode_length = combined_adjusted_lengths[i] - empirical_returns = combined_returns[i, :episode_length] - for j in xrange(episode_length): - # Update ema_baselines in place. - self.ema_by_len[j] = ( - self.ema_baseline_decay * self.ema_by_len[j] - + (1 - self.ema_baseline_decay) * empirical_returns[j]) - - global_step = fetched['global_step'] - global_npe = fetched['program_count'] - core_summaries = fetched['summaries'] - summaries_list = [core_summaries] - - if num_programs_from_policy: - s_i = 0 - text_summary = self._rl_text_summary( - session, - global_step, - global_npe, - batch_tot_r[s_i], - episode_lengths[s_i], test_cases[s_i], - code_outputs[s_i], code_strings[s_i], reasons[s_i]) - reward_summary = self._rl_reward_summary(batch_tot_r) - - is_best = False - if self.global_best_reward_fn: - # Save best reward. - best_reward = np.max(batch_tot_r) - is_best = self.global_best_reward_fn(session, best_reward) - - if self.found_solution_op is not None and 'correct' in reasons: - session.run(self.found_solution_op) - - # Save program to disk for record keeping. - if self.stop_on_success: - solutions = [ - {'code': code_strings[i], 'reward': batch_tot_r[i], - 'npe': global_npe} - for i in xrange(len(reasons)) if reasons[i] == 'correct'] - elif is_best: - solutions = [ - {'code': code_strings[np.argmax(batch_tot_r)], - 'reward': np.max(batch_tot_r), - 'npe': global_npe}] - else: - solutions = [] - if solutions: - if self.assign_code_solution_fn: - self.assign_code_solution_fn(session, solutions[0]['code']) - with tf.gfile.FastGFile(self.logging_file, 'a') as writer: - for solution_dict in solutions: - writer.write(str(solution_dict) + '\n') - - max_i = np.argmax(batch_tot_r) - max_tot_r = batch_tot_r[max_i] - if max_tot_r >= self.top_reward: - if max_tot_r >= self.top_reward: - self.top_reward = max_tot_r - logging.info('Top code: r=%.2f, \t%s', max_tot_r, code_strings[max_i]) - if self.top_episodes is not None: - self.top_episodes.push( - max_tot_r, tuple(batch_actions[max_i, :episode_lengths[max_i]])) - - summaries_list += [text_summary, reward_summary] - - if self.do_iw_summaries and not empty_replay_buffer: - # prob of replay samples under replay buffer sampling. - norm_replay_weights = [ - w / self.experience_replay.total_weight - for w in replay_weights] - replay_iw = self._compute_iw(replay_log_probs, replay_weights) - on_policy_iw = self._compute_iw(on_policy_log_probs, on_policy_weights) - summaries_list.append( - self._iw_summary( - session, replay_iw, replay_log_probs, norm_replay_weights, - on_policy_iw, on_policy_log_probs)) - - return UpdateStepResult( - global_step=global_step, - global_npe=global_npe, - summaries_list=summaries_list, - gradients_dict=fetched['gradients']) - - -def io_to_text(io_case, io_type): - if isinstance(io_case, misc.IOTuple): - # If there are many strings, join them with ','. - return ','.join([io_to_text(e, io_type) for e in io_case]) - if io_type == misc.IOType.string: - # There is one string. Return it. - return misc.tokens_to_text(io_case) - if (io_type == misc.IOType.integer - or io_type == misc.IOType.boolean): - if len(io_case) == 1: - return str(io_case[0]) - return str(io_case) - - -CodeScoreInfo = namedtuple( - 'CodeScoreInfo', - ['code_strings', 'batch_rewards', 'total_rewards', 'test_cases', - 'code_outputs', 'reasons']) - - -def compute_rewards(rl_batch, batch_actions, episode_lengths, batch_size=None): - """Compute rewards for each episode in the batch. - - Args: - rl_batch: A data.RLBatch instance. This holds information about the task - each episode is solving, and a reward function for each episode. - batch_actions: Contains batch of episodes. Each sequence of actions will be - converted into a BF program and then scored. A numpy array of shape - [batch_size, max_sequence_length]. - episode_lengths: The sequence length of each episode in the batch. Iterable - of length batch_size. - batch_size: (optional) number of programs to score. Use this to limit the - number of programs executed from this batch. For example, when doing - importance sampling some of the on-policy episodes will be discarded - and they should not be executed. `batch_size` can be less than or equal - to the size of the input batch. - - Returns: - CodeScoreInfo namedtuple instance. This holds not just the computed rewards, - but additional information computed during code execution which can be used - for debugging and monitoring. this includes: BF code strings, test cases - the code was executed on, code outputs from those test cases, and reasons - for success or failure. - """ - code_strings = [ - ''.join([misc.bf_int2char(a) for a in action_sequence[:l]]) - for action_sequence, l in zip(batch_actions, episode_lengths)] - if batch_size is None: - batch_size = len(code_strings) - else: - assert batch_size <= len(code_strings) - code_strings = code_strings[:batch_size] - - if isinstance(rl_batch.reward_fns, (list, tuple)): - # reward_fns is a list of functions, same length as code_strings. - assert len(rl_batch.reward_fns) >= batch_size - r_fn_results = [ - rl_batch.reward_fns[i](code_strings[i]) for i in xrange(batch_size)] - else: - # reward_fns is allowed to be one function which processes a batch of code - # strings. This is useful for efficiency and batch level computation. - r_fn_results = rl_batch.reward_fns(code_strings) - - # Expecting that r_fn returns a list of rewards. Length of list equals - # length of the code string (including EOS char). - - batch_rewards = [r.episode_rewards for r in r_fn_results] - total_rewards = [sum(b) for b in batch_rewards] - test_cases = [io_to_text(r.input_case, r.input_type) for r in r_fn_results] - code_outputs = [io_to_text(r.code_output, r.output_type) - for r in r_fn_results] - reasons = [r.reason for r in r_fn_results] - return CodeScoreInfo( - code_strings=code_strings, - batch_rewards=batch_rewards, - total_rewards=total_rewards, - test_cases=test_cases, - code_outputs=code_outputs, - reasons=reasons) - - -def process_episodes( - batch_rewards, episode_lengths, a2c=False, baselines=None, - batch_values=None): - """Compute REINFORCE targets. - - REINFORCE here takes the form: - grad_t = grad[log(pi(a_t|c_t))*target_t] - where c_t is context: i.e. RNN state or environment state (or both). - - Two types of targets are supported: - 1) Advantage actor critic (a2c). - 2) Vanilla REINFORCE with baseline. - - Args: - batch_rewards: Rewards received in each episode in the batch. A numpy array - of shape [batch_size, max_sequence_length]. Note, these are per-timestep - rewards, not total reward. - episode_lengths: Length of each episode. An iterable of length batch_size. - a2c: A bool. Whether to compute a2c targets (True) or vanilla targets - (False). - baselines: If a2c is False, provide baselines for each timestep. This is a - list (or indexable container) of length max_time. Note: baselines are - shared across all episodes, which is why there is no batch dimension. - It is up to the caller to update baselines accordingly. - batch_values: If a2c is True, provide values computed by a value estimator. - A numpy array of shape [batch_size, max_sequence_length]. - - Returns: - batch_targets: REINFORCE targets for each episode and timestep. A numpy - array of shape [batch_size, max_sequence_length]. - batch_returns: Returns computed for each episode and timestep. This is for - reference, and is not used in the REINFORCE gradient update (but was - used to compute the targets). A numpy array of shape - [batch_size, max_sequence_length]. - """ - num_programs = len(batch_rewards) - assert num_programs <= len(episode_lengths) - batch_returns = [None] * num_programs - batch_targets = [None] * num_programs - for i in xrange(num_programs): - episode_length = episode_lengths[i] - assert len(batch_rewards[i]) == episode_length - # Compute target for each timestep. - # If we are computing A2C: - # target_t = advantage_t = R_t - V(c_t) - # where V(c_t) is a learned value function (provided as `values`). - # Otherwise: - # target_t = R_t - baselines[t] - # where `baselines` are provided. - # In practice we use a more generalized formulation of advantage. See docs - # for `discounted_advantage_and_rewards`. - if a2c: - # Compute advantage. - assert batch_values is not None - episode_values = batch_values[i, :episode_length] - episode_rewards = batch_rewards[i] - emp_val, gen_adv = rollout_lib.discounted_advantage_and_rewards( - episode_rewards, episode_values, gamma=1.0, lambda_=1.0) - batch_returns[i] = emp_val - batch_targets[i] = gen_adv - else: - # Compute return for each timestep. See section 3 of - # https://arxiv.org/pdf/1602.01783.pdf - assert baselines is not None - empirical_returns = rollout_lib.discount(batch_rewards[i], gamma=1.0) - targets = [None] * episode_length - for j in xrange(episode_length): - targets[j] = empirical_returns[j] - baselines[j] - batch_returns[i] = empirical_returns - batch_targets[i] = targets - batch_returns = utils.stack_pad(batch_returns, 0) - if num_programs: - batch_targets = utils.stack_pad(batch_targets, 0) - else: - batch_targets = np.array([], dtype=np.float32) - - return (batch_targets, batch_returns) diff --git a/research/brain_coder/single_task/pg_agent_test.py b/research/brain_coder/single_task/pg_agent_test.py deleted file mode 100644 index 503d37ecacbf968b0786b3553e6a97667569bf7d..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/pg_agent_test.py +++ /dev/null @@ -1,395 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for pg_agent.""" - -from collections import Counter - -from absl import logging -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from common import utils # brain coder -from single_task import data # brain coder -from single_task import defaults # brain coder -from single_task import misc # brain coder -from single_task import pg_agent as agent_lib # brain coder -from single_task import pg_train # brain coder - - -# Symmetric mean absolute percentage error (SMAPE). -# https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error -def smape(a, b): - return 2.0 * abs(a - b) / float(a + b) - - -def onehot(dim, num_dims): - value = np.zeros(num_dims, dtype=np.float32) - value[dim] = 1 - return value - - -def random_sequence(max_length, num_tokens, eos=0): - length = np.random.randint(1, max_length - 1) - return np.append(np.random.randint(1, num_tokens, length), eos) - - -def repeat_and_pad(v, rep, total_len): - return [v] * rep + [0.0] * (total_len - rep) - - -class AgentTest(tf.test.TestCase): - - def testProcessEpisodes(self): - batch_size = 3 - - def reward_fn(code_string): - return misc.RewardInfo( - episode_rewards=[float(ord(c)) for c in code_string], - input_case=[], - correct_output=[], - code_output=[], - input_type=misc.IOType.integer, - output_type=misc.IOType.integer, - reason='none') - - rl_batch = data.RLBatch( - reward_fns=[reward_fn for _ in range(batch_size)], - batch_size=batch_size, - good_reward=10.0) - batch_actions = np.asarray([ - [4, 5, 3, 6, 8, 1, 0, 0], - [1, 2, 3, 4, 0, 0, 0, 0], - [8, 7, 6, 5, 4, 3, 2, 1]], dtype=np.int32) - batch_values = np.asarray([ - [0, 1, 2, 1, 0, 1, 1, 0], - [0, 2, 1, 2, 1, 0, 0, 0], - [0, 1, 1, 0, 0, 0, 1, 1]], dtype=np.float32) - episode_lengths = np.asarray([7, 5, 8], dtype=np.int32) - - scores = agent_lib.compute_rewards( - rl_batch, batch_actions, episode_lengths) - batch_targets, batch_returns = agent_lib.process_episodes( - scores.batch_rewards, episode_lengths, a2c=True, - batch_values=batch_values) - self.assertEqual( - [[473.0, 428.0, 337.0, 294.0, 201.0, 157.0, 95.0, 0.0], - [305.0, 243.0, 183.0, 140.0, 95.0, 0.0, 0.0, 0.0], - [484.0, 440.0, 394.0, 301.0, 210.0, 165.0, 122.0, 62.0]], - batch_returns.tolist()) - self.assertEqual( - [[473.0, 427.0, 335.0, 293.0, 201.0, 156.0, 94.0, 0.0], - [305.0, 241.0, 182.0, 138.0, 94.0, 0.0, 0.0, 0.0], - [484.0, 439.0, 393.0, 301.0, 210.0, 165.0, 121.0, 61.0]], - batch_targets.tolist()) - - def testVarUpdates(self): - """Tests that variables get updated as expected. - - For the RL update, check that gradients are non-zero and that the global - model gets updated. - """ - config = defaults.default_config_with_updates( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg",eos_token=True,optimizer="sgd",lr=1.0)') - lr = config.agent.lr - - tf.reset_default_graph() - trainer = pg_train.AsyncTrainer( - config, task_id=0, ps_tasks=0, num_workers=1) - global_init_op = tf.variables_initializer( - tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'global')) - with tf.Session() as sess: - sess.run(global_init_op) # Initialize global copy. - trainer.initialize(sess) - model = trainer.model - global_vars = sess.run(trainer.global_model.trainable_variables) - local_vars = sess.run(model.trainable_variables) - - # Make sure names match. - g_prefix = 'global/' - l_prefix = 'local/' - for g, l in zip(trainer.global_model.trainable_variables, - model.trainable_variables): - self.assertEqual(g.name[len(g_prefix):], l.name[len(l_prefix):]) - - # Assert that shapes and values are the same between global and local - # models. - for g, l in zip(global_vars, local_vars): - self.assertEqual(g.shape, l.shape) - self.assertTrue(np.array_equal(g, l)) - - # Make all gradients dense tensors. - for param, grad in model.gradients_dict.items(): - if isinstance(grad, tf.IndexedSlices): - # Converts to dense tensor. - model.gradients_dict[param] = tf.multiply(grad, 1.0) - - # Perform update. - results = model.update_step( - sess, trainer.data_manager.sample_rl_batch(), trainer.train_op, - trainer.global_step, return_gradients=True) - grads_dict = results.gradients_dict - for grad in grads_dict.values(): - self.assertIsNotNone(grad) - self.assertTrue(np.count_nonzero(grad) > 0) - global_update = sess.run(trainer.global_model.trainable_variables) - for tf_var, var_before, var_after in zip( - model.trainable_variables, local_vars, global_update): - # Check that the params were updated. - self.assertTrue(np.allclose( - var_after, - var_before - grads_dict[tf_var] * lr)) - - # Test that global to local sync works. - sess.run(trainer.sync_op) - global_vars = sess.run(trainer.global_model.trainable_variables) - local_vars = sess.run(model.trainable_variables) - for l, g in zip(local_vars, global_vars): - self.assertTrue(np.allclose(l, g)) - - def testMonteCarloGradients(self): - """Test Monte Carlo estimate of REINFORCE gradient. - - Test that the Monte Carlo estimate of the REINFORCE gradient is - approximately equal to the true gradient. We compute the true gradient for a - toy environment with a very small action space. - - Similar to section 5 of https://arxiv.org/pdf/1505.00521.pdf. - """ - # Test may have different outcome on different machines due to different - # rounding behavior of float arithmetic. - tf.reset_default_graph() - tf.set_random_seed(12345678987654321) - np.random.seed(1294024302) - max_length = 2 - num_tokens = misc.bf_num_tokens() - eos = misc.BF_EOS_INT - assert eos == 0 - def sequence_iterator(max_length): - """Iterates through all sequences up to the given length.""" - yield [eos] - for a in xrange(1, num_tokens): - if max_length > 1: - for sub_seq in sequence_iterator(max_length - 1): - yield [a] + sub_seq - else: - yield [a] - actions = list(sequence_iterator(max_length)) - - # This batch contains all possible episodes up to max_length. - actions_batch = utils.stack_pad(actions, 0) - lengths_batch = [len(s) for s in actions] - - reward_map = {tuple(a): np.random.randint(-1, 7) for a in actions_batch} - # reward_map = {tuple(a): np.random.normal(3, 1) - # for a in actions_batch} # normal distribution - # reward_map = {tuple(a): 1.0 - # for a in actions_batch} # expected reward is 1 - - n = 100000 # MC sample size. - config = defaults.default_config_with_updates( - 'env=c(task="print"),' - 'agent=c(algorithm="pg",optimizer="sgd",lr=1.0,ema_baseline_decay=0.99,' - 'entropy_beta=0.0,topk_loss_hparam=0.0,regularizer=0.0,' - 'policy_lstm_sizes=[10],eos_token=True),' - 'batch_size='+str(n)+',timestep_limit='+str(max_length)) - - dtype = tf.float64 - trainer = pg_train.AsyncTrainer( - config, task_id=0, ps_tasks=0, num_workers=1, dtype=dtype) - model = trainer.model - actions_ph = model.actions - lengths_ph = model.adjusted_lengths - multipliers_ph = model.policy_multipliers - - global_init_op = tf.variables_initializer( - tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'global')) - with tf.Session() as sess, sess.graph.as_default(): - sess.run(global_init_op) # Initialize global copy. - trainer.initialize(sess) - - # Compute exact gradients. - # exact_grads = sum(P(a) * grad(log P(a)) * R(a) for a in actions_batch) - true_loss_unnormalized = 0.0 - exact_grads = [np.zeros(v.shape) for v in model.trainable_variables] - episode_probs_map = {} - grads_map = {} - for a_idx in xrange(len(actions_batch)): - a = actions_batch[a_idx] - grads_result, probs_result, loss = sess.run( - [model.dense_unclipped_grads, model.chosen_probs, model.loss], - {actions_ph: [a], - lengths_ph: [lengths_batch[a_idx]], - multipliers_ph: [ - repeat_and_pad(reward_map[tuple(a)], - lengths_batch[a_idx], - max_length)]}) - # Take product over time axis. - episode_probs_result = np.prod(probs_result[0, :lengths_batch[a_idx]]) - for i in range(0, len(exact_grads)): - exact_grads[i] += grads_result[i] * episode_probs_result - episode_probs_map[tuple(a)] = episode_probs_result - reward_map[tuple(a)] = reward_map[tuple(a)] - grads_map[tuple(a)] = grads_result - true_loss_unnormalized += loss - # Normalize loss. Since each episode is feed into the model one at a time, - # normalization needs to be done manually. - true_loss = true_loss_unnormalized / float(len(actions_batch)) - - # Compute Monte Carlo gradients. - # E_a~P[grad(log P(a)) R(a)] is aprox. eq. to - # sum(grad(log P(a)) R(a) for a in actions_sampled_from_P) / n - # where len(actions_sampled_from_P) == n. - # - # In other words, sample from the policy and compute the gradients of the - # log probs weighted by the returns. This will excersize the code in - # agent.py - sampled_actions, sampled_lengths = sess.run( - [model.sampled_tokens, model.episode_lengths]) - pi_multipliers = [ - repeat_and_pad(reward_map[tuple(a)], l, max_length) - for a, l in zip(sampled_actions, sampled_lengths)] - mc_grads_unnormalized, sampled_probs, mc_loss_unnormalized = sess.run( - [model.dense_unclipped_grads, model.chosen_probs, model.loss], - {actions_ph: sampled_actions, - multipliers_ph: pi_multipliers, - lengths_ph: sampled_lengths}) - # Loss is already normalized across the minibatch, so no normalization - # is needed. - mc_grads = mc_grads_unnormalized - mc_loss = mc_loss_unnormalized - - # Make sure true loss and MC loss are similar. - loss_error = smape(true_loss, mc_loss) - self.assertTrue(loss_error < 0.15, msg='actual: %s' % loss_error) - - # Check that probs computed for episodes sampled from the model are the same - # as the recorded true probs. - for i in range(100): - acs = tuple(sampled_actions[i].tolist()) - sampled_prob = np.prod(sampled_probs[i, :sampled_lengths[i]]) - self.assertTrue(np.isclose(episode_probs_map[acs], sampled_prob)) - - # Make sure MC estimates of true probs are close. - counter = Counter(tuple(e) for e in sampled_actions) - for acs, count in counter.iteritems(): - mc_prob = count / float(len(sampled_actions)) - true_prob = episode_probs_map[acs] - error = smape(mc_prob, true_prob) - self.assertTrue( - error < 0.15, - msg='actual: %s; count: %s; mc_prob: %s; true_prob: %s' - % (error, count, mc_prob, true_prob)) - - # Manually recompute MC gradients and make sure they match MC gradients - # computed in TF. - mc_grads_recompute = [np.zeros(v.shape) for v in model.trainable_variables] - for i in range(n): - acs = tuple(sampled_actions[i].tolist()) - for i in range(0, len(mc_grads_recompute)): - mc_grads_recompute[i] += grads_map[acs][i] - for i in range(0, len(mc_grads_recompute)): - self.assertTrue(np.allclose(mc_grads[i], mc_grads_recompute[i] / n)) - - # Check angle between gradients as fraction of pi. - for index in range(len(mc_grads)): - v1 = mc_grads[index].reshape(-1) - v2 = exact_grads[index].reshape(-1) - # angle = arccos(v1 . v2 / (|v1|*|v2|)) - angle_rad = np.arccos( - np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))) - logging.info('angle / pi: %s', angle_rad / np.pi) - angle_frac = angle_rad / np.pi - self.assertTrue(angle_frac < 0.02, msg='actual: %s' % angle_frac) - # Check norms. - for index in range(len(mc_grads)): - v1_norm = np.linalg.norm(mc_grads[index].reshape(-1)) - v2_norm = np.linalg.norm(exact_grads[index].reshape(-1)) - error = smape(v1_norm, v2_norm) - self.assertTrue(error < 0.02, msg='actual: %s' % error) - - # Check expected rewards. - # E_a~P[R(a)] approx eq sum(P(a) * R(a) for a in actions) - mc_expected_reward = np.mean( - [reward_map[tuple(a)] for a in sampled_actions]) - exact_expected_reward = np.sum( - [episode_probs_map[k] * reward_map[k] for k in reward_map]) - error = smape(mc_expected_reward, exact_expected_reward) - self.assertTrue(error < 0.005, msg='actual: %s' % angle_frac) - - def testNumericalGradChecking(self): - # Similar to - # http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization. - epsilon = 1e-4 - eos = misc.BF_EOS_INT - self.assertEqual(0, eos) - config = defaults.default_config_with_updates( - 'env=c(task="print"),' - 'agent=c(algorithm="pg",optimizer="sgd",lr=1.0,ema_baseline_decay=0.99,' - 'entropy_beta=0.0,topk_loss_hparam=0.0,policy_lstm_sizes=[10],' - 'eos_token=True),' - 'batch_size=64') - dtype = tf.float64 - tf.reset_default_graph() - tf.set_random_seed(12345678987654321) - np.random.seed(1294024302) - trainer = pg_train.AsyncTrainer( - config, task_id=0, ps_tasks=0, num_workers=1, dtype=dtype) - model = trainer.model - actions_ph = model.actions - lengths_ph = model.adjusted_lengths - multipliers_ph = model.policy_multipliers - loss = model.pi_loss - global_init_op = tf.variables_initializer( - tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'global')) - - assign_add_placeholders = [None] * len(model.trainable_variables) - assign_add_ops = [None] * len(model.trainable_variables) - param_shapes = [None] * len(model.trainable_variables) - for i, param in enumerate(model.trainable_variables): - param_shapes[i] = param.get_shape().as_list() - assign_add_placeholders[i] = tf.placeholder(dtype, - np.prod(param_shapes[i])) - assign_add_ops[i] = param.assign_add( - tf.reshape(assign_add_placeholders[i], param_shapes[i])) - - with tf.Session() as sess: - sess.run(global_init_op) # Initialize global copy. - trainer.initialize(sess) - - actions_raw = [random_sequence(10, 9) for _ in xrange(16)] - actions_batch = utils.stack_pad(actions_raw, 0) - lengths_batch = [len(l) for l in actions_raw] - feed = {actions_ph: actions_batch, - multipliers_ph: np.ones_like(actions_batch), - lengths_ph: lengths_batch} - - estimated_grads = [None] * len(model.trainable_variables) - for i, param in enumerate(model.trainable_variables): - param_size = np.prod(param_shapes[i]) - estimated_grads[i] = np.zeros(param_size, dtype=np.float64) - for index in xrange(param_size): - e = onehot(index, param_size) * epsilon - sess.run(assign_add_ops[i], - {assign_add_placeholders[i]: e}) - j_plus = sess.run(loss, feed) - sess.run(assign_add_ops[i], - {assign_add_placeholders[i]: -2 * e}) - j_minus = sess.run(loss, feed) - sess.run(assign_add_ops[i], - {assign_add_placeholders[i]: e}) - estimated_grads[i][index] = (j_plus - j_minus) / (2 * epsilon) - estimated_grads[i] = estimated_grads[i].reshape(param_shapes[i]) - - analytic_grads = sess.run(model.dense_unclipped_grads, feed) - - for g1, g2 in zip(estimated_grads[1:], analytic_grads[1:]): - logging.info('norm (g1-g2): %s', np.abs(g1 - g2).mean()) - self.assertTrue(np.allclose(g1, g2)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/single_task/pg_train.py b/research/brain_coder/single_task/pg_train.py deleted file mode 100644 index fde7cc84729a56002e8688d268a2085432ee124e..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/pg_train.py +++ /dev/null @@ -1,782 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -r"""Train RL agent on coding tasks.""" - -import contextlib -import cPickle -import cProfile -import marshal -import os -import time - -from absl import flags -from absl import logging -import tensorflow as tf - -# internal session lib import - -from single_task import data # brain coder -from single_task import defaults # brain coder -from single_task import pg_agent as agent_lib # brain coder -from single_task import results_lib # brain coder - - -FLAGS = flags.FLAGS -flags.DEFINE_string( - 'master', '', - 'URL of the TensorFlow master to use.') -flags.DEFINE_integer( - 'ps_tasks', 0, - 'Number of parameter server tasks. Only set to 0 for ' - 'single worker training.') -flags.DEFINE_integer( - 'summary_interval', 10, - 'How often to write summaries.') -flags.DEFINE_integer( - 'summary_tasks', 16, - 'If greater than 0 only tasks 0 through summary_tasks - 1 ' - 'will write summaries. If 0, all tasks will write ' - 'summaries.') -flags.DEFINE_bool( - 'stop_on_success', True, - 'If True, training will stop as soon as a solution is found. ' - 'If False, training will continue indefinitely until another ' - 'stopping condition is reached.') -flags.DEFINE_bool( - 'do_profiling', False, - 'If True, cProfile profiler will run and results will be ' - 'written to logdir. WARNING: Results will not be written if ' - 'the code crashes. Make sure it exists successfully.') -flags.DEFINE_integer('model_v', 0, 'Model verbosity level.') -flags.DEFINE_bool( - 'delayed_graph_cleanup', True, - 'If true, container for n-th run will not be reset until the (n+1)-th run ' - 'is complete. This greatly reduces the chance that a worker is still ' - 'using the n-th container when it is cleared.') - - -def define_tuner_hparam_space(hparam_space_type): - """Define tunable hparams for grid search.""" - if hparam_space_type not in ('pg', 'pg-topk', 'topk', 'is'): - raise ValueError('Hparam space is not valid: "%s"' % hparam_space_type) - - # Discrete hparam space is stored as a dict from hparam name to discrete - # values. - hparam_space = {} - - if hparam_space_type in ('pg', 'pg-topk', 'is'): - # Add a floating point parameter named learning rate. - hparam_space['lr'] = [1e-5, 1e-4, 1e-3] - hparam_space['entropy_beta'] = [0.005, 0.01, 0.05, 0.10] - else: # 'topk' - # Add a floating point parameter named learning rate. - hparam_space['lr'] = [1e-5, 1e-4, 1e-3] - hparam_space['entropy_beta'] = [0.0, 0.005, 0.01, 0.05, 0.10] - - if hparam_space_type in ('topk', 'pg-topk'): - # topk tuning will be enabled. - hparam_space['topk'] = [10] - hparam_space['topk_loss_hparam'] = [1.0, 10.0, 50.0, 200.0] - - elif hparam_space_type == 'is': - # importance sampling tuning will be enabled. - hparam_space['replay_temperature'] = [0.25, 0.5, 1.0, 2.0] - hparam_space['alpha'] = [0.5, 0.75, 63/64.] - - return hparam_space - - -def write_hparams_to_config(config, hparams, hparam_space_type): - """Write hparams given by the tuner into the Config object.""" - if hparam_space_type not in ('pg', 'pg-topk', 'topk', 'is'): - raise ValueError('Hparam space is not valid: "%s"' % hparam_space_type) - - config.agent.lr = hparams.lr - config.agent.entropy_beta = hparams.entropy_beta - - if hparam_space_type in ('topk', 'pg-topk'): - # topk tuning will be enabled. - config.agent.topk = hparams.topk - config.agent.topk_loss_hparam = hparams.topk_loss_hparam - elif hparam_space_type == 'is': - # importance sampling tuning will be enabled. - config.agent.replay_temperature = hparams.replay_temperature - config.agent.alpha = hparams.alpha - - -def make_initialized_variable(value, name, shape=None, dtype=tf.float32): - """Create a tf.Variable with a constant initializer. - - Args: - value: Constant value to initialize the variable with. This is the value - that the variable starts with. - name: Name of the variable in the TF graph. - shape: Shape of the variable. If None, variable will be a scalar. - dtype: Data type of the variable. Should be a TF dtype. Defaults to - tf.float32. - - Returns: - tf.Variable instance. - """ - if shape is None: - shape = [] - return tf.get_variable( - name=name, shape=shape, initializer=tf.constant_initializer(value), - dtype=dtype, trainable=False) - - -class AsyncTrainer(object): - """Manages graph creation and training. - - This async trainer creates a global model on the parameter server, and a local - model (for this worker). Gradient updates are sent to the global model, and - the updated weights are synced to the local copy. - """ - - def __init__(self, config, task_id, ps_tasks, num_workers, is_chief=True, - summary_writer=None, - dtype=tf.float32, - summary_interval=1, - run_number=0, - logging_dir='/tmp', model_v=0): - self.config = config - self.data_manager = data.DataManager( - config, run_number=run_number, - do_code_simplification=not FLAGS.stop_on_success) - self.task_id = task_id - self.ps_tasks = ps_tasks - self.is_chief = is_chief - if ps_tasks == 0: - assert task_id == 0, 'No parameter servers specified. Expecting 1 task.' - assert num_workers == 1, ( - 'No parameter servers specified. Expecting 1 task.') - worker_device = '/job:localhost/replica:%d/task:0/cpu:0' % task_id - # worker_device = '/cpu:0' - # ps_device = '/cpu:0' - else: - assert num_workers > 0, 'There must be at least 1 training worker.' - worker_device = '/job:worker/replica:%d/task:0/cpu:0' % task_id - # ps_device = '/job:ps/replica:0/task:0/cpu:0' - logging.info('worker_device: %s', worker_device) - - logging_file = os.path.join( - logging_dir, 'solutions_%d.txt' % task_id) - experience_replay_file = os.path.join( - logging_dir, 'replay_buffer_%d.pickle' % task_id) - self.topk_file = os.path.join( - logging_dir, 'topk_buffer_%d.pickle' % task_id) - - tf.get_variable_scope().set_use_resource(True) - - # global model - with tf.device(tf.train.replica_device_setter(ps_tasks, - ps_device='/job:ps/replica:0', - worker_device=worker_device)): - with tf.variable_scope('global'): - global_model = agent_lib.LMAgent(config, dtype=dtype, is_local=False) - global_params_dict = {p.name: p - for p in global_model.sync_variables} - self.global_model = global_model - self.global_step = make_initialized_variable( - 0, 'global_step', dtype=tf.int64) - - self.global_best_reward = make_initialized_variable( - -10.0, 'global_best_reward', dtype=tf.float64) - self.is_best_model = make_initialized_variable( - False, 'is_best_model', dtype=tf.bool) - self.reset_is_best_model = self.is_best_model.assign(False) - self.global_best_reward_placeholder = tf.placeholder( - tf.float64, [], name='global_best_reward_placeholder') - self.assign_global_best_reward_op = tf.group( - self.global_best_reward.assign( - self.global_best_reward_placeholder), - self.is_best_model.assign(True)) - def assign_global_best_reward_fn(session, reward): - reward = round(reward, 10) - best_reward = round(session.run(self.global_best_reward), 10) - is_best = reward > best_reward - if is_best: - session.run(self.assign_global_best_reward_op, - {self.global_best_reward_placeholder: reward}) - return is_best - self.assign_global_best_reward_fn = assign_global_best_reward_fn - - # Any worker will set to true when it finds a solution. - self.found_solution_flag = make_initialized_variable( - False, 'found_solution_flag', dtype=tf.bool) - self.found_solution_op = self.found_solution_flag.assign(True) - - self.run_number = make_initialized_variable( - run_number, 'run_number', dtype=tf.int32) - - # Store a solution when found. - self.code_solution_variable = tf.get_variable( - 'code_solution', [], tf.string, - initializer=tf.constant_initializer('')) - self.code_solution_ph = tf.placeholder( - tf.string, [], name='code_solution_ph') - self.code_solution_assign_op = self.code_solution_variable.assign( - self.code_solution_ph) - def assign_code_solution_fn(session, code_solution_string): - session.run(self.code_solution_assign_op, - {self.code_solution_ph: code_solution_string}) - self.assign_code_solution_fn = assign_code_solution_fn - - # Count all programs sampled from policy. This does not include - # programs sampled from replay buffer. - # This equals NPE (number of programs executed). Only programs sampled - # from the policy need to be executed. - self.program_count = make_initialized_variable( - 0, 'program_count', dtype=tf.int64) - - # local model - with tf.device(worker_device): - with tf.variable_scope('local'): - self.model = model = agent_lib.LMAgent( - config, - task_id=task_id, - logging_file=logging_file, - experience_replay_file=experience_replay_file, - dtype=dtype, - global_best_reward_fn=self.assign_global_best_reward_fn, - found_solution_op=self.found_solution_op, - assign_code_solution_fn=self.assign_code_solution_fn, - program_count=self.program_count, - stop_on_success=FLAGS.stop_on_success, - verbose_level=model_v) - local_params = model.trainable_variables - local_params_dict = {p.name: p for p in local_params} - - # Pull global params to local model. - def _global_to_local_scope(name): - assert name.startswith('global/') - return 'local' + name[6:] - sync_dict = { - local_params_dict[_global_to_local_scope(p_name)]: p - for p_name, p in global_params_dict.items()} - self.sync_op = tf.group(*[v_local.assign(v_global) - for v_local, v_global - in sync_dict.items()]) - - # Pair local gradients with global params. - grad_var_dict = { - gradient: sync_dict[local_var] - for local_var, gradient in model.gradients_dict.items()} - - # local model - model.make_summary_ops() # Don't put summaries under 'local' scope. - with tf.variable_scope('local'): - self.train_op = model.optimizer.apply_gradients( - grad_var_dict.items(), global_step=self.global_step) - self.local_init_op = tf.variables_initializer( - tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, - tf.get_variable_scope().name)) - - self.local_step = 0 - self.last_summary_time = time.time() - self.summary_interval = summary_interval - self.summary_writer = summary_writer - self.cached_global_step = -1 - self.cached_global_npe = -1 - - logging.info('summary_interval: %d', self.summary_interval) - - # Load top-k buffer. - if self.model.top_episodes is not None and tf.gfile.Exists(self.topk_file): - try: - with tf.gfile.FastGFile(self.topk_file, 'r') as f: - self.model.top_episodes = cPickle.loads(f.read()) - logging.info( - 'Loaded top-k buffer from disk with %d items. Location: "%s"', - len(self.model.top_episodes), self.topk_file) - except (cPickle.UnpicklingError, EOFError) as e: - logging.warn( - 'Failed to load existing top-k buffer from disk. Removing bad file.' - '\nLocation: "%s"\nException: %s', self.topk_file, str(e)) - tf.gfile.Remove(self.topk_file) - - def initialize(self, session): - """Run initialization ops.""" - session.run(self.local_init_op) - session.run(self.sync_op) - self.cached_global_step, self.cached_global_npe = session.run( - [self.global_step, self.program_count]) - - def update_global_model(self, session): - """Run an update step. - - 1) Asynchronously copy global weights to local model. - 2) Call into local model's update_step method, which does the following: - a) Sample batch of programs from policy. - b) Compute rewards. - c) Compute gradients and update the global model asynchronously. - 3) Write tensorboard summaries to disk. - - Args: - session: tf.Session instance. - """ - session.run(self.sync_op) # Copy weights from global to local. - - with session.as_default(): - result = self.model.update_step( - session, self.data_manager.sample_rl_batch(), self.train_op, - self.global_step) - global_step = result.global_step - global_npe = result.global_npe - summaries = result.summaries_list - self.cached_global_step = global_step - self.cached_global_npe = global_npe - self.local_step += 1 - - if self.summary_writer and self.local_step % self.summary_interval == 0: - if not isinstance(summaries, (tuple, list)): - summaries = [summaries] - summaries.append(self._local_step_summary()) - if self.is_chief: - (global_best_reward, - found_solution_flag, - program_count) = session.run( - [self.global_best_reward, - self.found_solution_flag, - self.program_count]) - summaries.append( - tf.Summary( - value=[tf.Summary.Value( - tag='model/best_reward', - simple_value=global_best_reward)])) - summaries.append( - tf.Summary( - value=[tf.Summary.Value( - tag='model/solution_found', - simple_value=int(found_solution_flag))])) - summaries.append( - tf.Summary( - value=[tf.Summary.Value( - tag='model/program_count', - simple_value=program_count)])) - for s in summaries: - self.summary_writer.add_summary(s, global_step) - self.last_summary_time = time.time() - - def _local_step_summary(self): - """Compute number of local steps per time increment.""" - dt = time.time() - self.last_summary_time - steps_per_time = self.summary_interval / float(dt) - return tf.Summary(value=[ - tf.Summary.Value( - tag='local_step/per_sec', - simple_value=steps_per_time), - tf.Summary.Value( - tag='local_step/step', - simple_value=self.local_step)]) - - def maybe_save_best_model(self, session, saver, checkpoint_file): - """Check if this model got the highest reward and save to disk if so.""" - if self.is_chief and session.run(self.is_best_model): - logging.info('Saving best model to "%s"', checkpoint_file) - saver.save(session, checkpoint_file) - session.run(self.reset_is_best_model) - - def save_replay_buffer(self): - """Save replay buffer to disk. - - Call this periodically so that training can recover if jobs go down. - """ - if self.model.experience_replay is not None: - logging.info('Saving experience replay buffer to "%s".', - self.model.experience_replay.save_file) - self.model.experience_replay.incremental_save(True) - - def delete_replay_buffer(self): - """Delete replay buffer from disk. - - Call this at the end of training to clean up. Replay buffer can get very - large. - """ - if self.model.experience_replay is not None: - logging.info('Deleting experience replay buffer at "%s".', - self.model.experience_replay.save_file) - tf.gfile.Remove(self.model.experience_replay.save_file) - - def save_topk_buffer(self): - """Save top-k buffer to disk. - - Call this periodically so that training can recover if jobs go down. - """ - if self.model.top_episodes is not None: - logging.info('Saving top-k buffer to "%s".', self.topk_file) - # Overwrite previous data each time. - with tf.gfile.FastGFile(self.topk_file, 'w') as f: - f.write(cPickle.dumps(self.model.top_episodes)) - - -@contextlib.contextmanager -def managed_session(sv, master='', config=None, - start_standard_services=True, - close_summary_writer=True, - max_wait_secs=7200): - # Same as Supervisor.managed_session, but with configurable timeout. - try: - sess = sv.prepare_or_wait_for_session( - master=master, config=config, - start_standard_services=start_standard_services, - max_wait_secs=max_wait_secs) - yield sess - except tf.errors.DeadlineExceededError: - raise - except Exception as e: # pylint: disable=broad-except - sv.request_stop(e) - finally: - try: - # Request all the threads to stop and wait for them to do so. Any - # exception raised by the threads is raised again from stop(). - # Passing stop_grace_period_secs is for blocked enqueue/dequeue - # threads which are not checking for `should_stop()`. They - # will be stopped when we close the session further down. - sv.stop(close_summary_writer=close_summary_writer) - finally: - # Close the session to finish up all pending calls. We do not care - # about exceptions raised when closing. This takes care of - # blocked enqueue/dequeue calls. - try: - sess.close() - except Exception: # pylint: disable=broad-except - # Silently ignore exceptions raised by close(). - pass - - -def train(config, is_chief, tuner=None, run_dir=None, run_number=0, - results_writer=None): - """Run training loop. - - Args: - config: config_lib.Config instance containing global config (agent and env). - is_chief: True if this worker is chief. Chief worker manages writing some - data to disk and initialization of the global model. - tuner: A tuner instance. If not tuning, leave as None. - run_dir: Directory where all data for this run will be written. If None, - run_dir = FLAGS.logdir. Set this argument when doing multiple runs. - run_number: Which run is this. - results_writer: Managest writing training results to disk. Results are a - dict of metric names and values. - - Returns: - The trainer object used to run training updates. - """ - logging.info('Will run asynchronous training.') - - if run_dir is None: - run_dir = FLAGS.logdir - train_dir = os.path.join(run_dir, 'train') - best_model_checkpoint = os.path.join(train_dir, 'best.ckpt') - events_dir = '%s/events_%d' % (run_dir, FLAGS.task_id) - logging.info('Events directory: %s', events_dir) - - logging_dir = os.path.join(run_dir, 'logs') - if not tf.gfile.Exists(logging_dir): - tf.gfile.MakeDirs(logging_dir) - status_file = os.path.join(logging_dir, 'status.txt') - - if FLAGS.summary_tasks and FLAGS.task_id < FLAGS.summary_tasks: - summary_writer = tf.summary.FileWriter(events_dir) - else: - summary_writer = None - - # Only profile task 0. - if FLAGS.do_profiling: - logging.info('Profiling enabled') - profiler = cProfile.Profile() - profiler.enable() - else: - profiler = None - - trainer = AsyncTrainer( - config, FLAGS.task_id, FLAGS.ps_tasks, FLAGS.num_workers, - is_chief=is_chief, - summary_interval=FLAGS.summary_interval, - summary_writer=summary_writer, - logging_dir=logging_dir, - run_number=run_number, - model_v=FLAGS.model_v) - - variables_to_save = [v for v in tf.global_variables() - if v.name.startswith('global')] - global_init_op = tf.variables_initializer(variables_to_save) - saver = tf.train.Saver(variables_to_save) - - var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, - tf.get_variable_scope().name) - logging.info('Trainable vars:') - for v in var_list: - logging.info(' %s, %s, %s', v.name, v.device, v.get_shape()) - - logging.info('All vars:') - for v in tf.global_variables(): - logging.info(' %s, %s, %s', v.name, v.device, v.get_shape()) - - def init_fn(unused_sess): - logging.info('No checkpoint found. Initialized global params.') - - sv = tf.train.Supervisor(is_chief=is_chief, - logdir=train_dir, - saver=saver, - summary_op=None, - init_op=global_init_op, - init_fn=init_fn, - summary_writer=summary_writer, - ready_op=tf.report_uninitialized_variables( - variables_to_save), - ready_for_local_init_op=None, - global_step=trainer.global_step, - save_model_secs=30, - save_summaries_secs=30) - - # Add a thread that periodically checks if this Trial should stop - # based on an early stopping policy. - if tuner: - sv.Loop(60, tuner.check_for_stop, (sv.coord,)) - - last_replay_save_time = time.time() - - global_step = -1 - logging.info( - 'Starting session. ' - 'If this hangs, we\'re mostly likely waiting to connect ' - 'to the parameter server. One common cause is that the parameter ' - 'server DNS name isn\'t resolving yet, or is misspecified.') - should_retry = True - supervisor_deadline_exceeded = False - while should_retry: - try: - with managed_session( - sv, FLAGS.master, max_wait_secs=60) as session, session.as_default(): - should_retry = False - do_training = True - - try: - trainer.initialize(session) - if session.run(trainer.run_number) != run_number: - # If we loaded existing model from disk, and the saved run number is - # different, throw an exception. - raise RuntimeError( - 'Expecting to be on run %d, but is actually on run %d. ' - 'run_dir: "%s"' - % (run_number, session.run(trainer.run_number), run_dir)) - global_step = trainer.cached_global_step - logging.info('Starting training at step=%d', global_step) - while do_training: - trainer.update_global_model(session) - - if is_chief: - trainer.maybe_save_best_model( - session, saver, best_model_checkpoint) - global_step = trainer.cached_global_step - global_npe = trainer.cached_global_npe - - if time.time() - last_replay_save_time >= 30: - trainer.save_replay_buffer() - trainer.save_topk_buffer() - last_replay_save_time = time.time() - - # Stopping conditions. - if tuner and tuner.should_trial_stop(): - logging.info('Tuner requested early stopping. Finishing.') - do_training = False - if is_chief and FLAGS.stop_on_success: - found_solution = session.run(trainer.found_solution_flag) - if found_solution: - do_training = False - logging.info('Solution found. Finishing.') - if FLAGS.max_npe and global_npe >= FLAGS.max_npe: - # Max NPE (number of programs executed) reached. - logging.info('Max NPE reached. Finishing.') - do_training = False - if sv.should_stop(): - logging.info('Supervisor issued stop. Finishing.') - do_training = False - - except tf.errors.NotFoundError: - # Catch "Error while reading resource variable". - # The chief worker likely destroyed the container, so do not retry. - logging.info('Caught NotFoundError. Quitting.') - do_training = False - should_retry = False - break - except tf.errors.InternalError as e: - # Catch "Invalid variable reference." - if str(e).startswith('Invalid variable reference.'): - # The chief worker likely destroyed the container, so do not - # retry. - logging.info( - 'Caught "InternalError: Invalid variable reference.". ' - 'Quitting.') - do_training = False - should_retry = False - break - else: - # Pass exception through. - raise - - # Exited training loop. Write results to disk. - if is_chief and results_writer: - assert not should_retry - with tf.gfile.FastGFile(status_file, 'w') as f: - f.write('done') - (program_count, - found_solution, - code_solution, - best_reward, - global_step) = session.run( - [trainer.program_count, - trainer.found_solution_flag, - trainer.code_solution_variable, - trainer.global_best_reward, - trainer.global_step]) - results_dict = { - 'max_npe': FLAGS.max_npe, - 'batch_size': config.batch_size, - 'max_batches': FLAGS.max_npe // config.batch_size, - 'npe': program_count, - 'max_global_repetitions': FLAGS.num_repetitions, - 'max_local_repetitions': FLAGS.num_repetitions, - 'code_solution': code_solution, - 'best_reward': best_reward, - 'num_batches': global_step, - 'found_solution': found_solution, - 'task': trainer.data_manager.task_name, - 'global_rep': run_number} - logging.info('results_dict: %s', results_dict) - results_writer.append(results_dict) - - except tf.errors.AbortedError: - # Catch "Graph handle is not found" error due to preempted jobs. - logging.info('Caught AbortedError. Retying.') - should_retry = True - except tf.errors.DeadlineExceededError: - supervisor_deadline_exceeded = True - should_retry = False - - if is_chief: - logging.info('This is chief worker. Stopping all workers.') - sv.stop() - - if supervisor_deadline_exceeded: - logging.info('Supervisor timed out. Quitting.') - else: - logging.info('Reached %s steps. Worker stopped.', global_step) - - # Dump profiling. - """ - How to use profiling data. - - Download the profiler dump to your local machine, say to PROF_FILE_PATH. - In a separate script, run something like the following: - - import pstats - p = pstats.Stats(PROF_FILE_PATH) - p.strip_dirs().sort_stats('cumtime').print_stats() - - This will sort by 'cumtime', which "is the cumulative time spent in this and - all subfunctions (from invocation till exit)." - https://docs.python.org/2/library/profile.html#instant-user-s-manual - """ # pylint: disable=pointless-string-statement - if profiler: - prof_file = os.path.join(run_dir, 'task_%d.prof' % FLAGS.task_id) - logging.info('Done profiling.\nDumping to "%s".', prof_file) - profiler.create_stats() - with tf.gfile.Open(prof_file, 'w') as f: - f.write(marshal.dumps(profiler.stats)) - - return trainer - - -def run_training(config=None, tuner=None, logdir=None, trial_name=None, - is_chief=True): - """Do all training runs. - - This is the top level training function for policy gradient based models. - Run this from the main function. - - Args: - config: config_lib.Config instance containing global config (agent and - environment hparams). If None, config will be parsed from FLAGS.config. - tuner: A tuner instance. Leave as None if not tuning. - logdir: Parent directory where all data from all runs will be written. If - None, FLAGS.logdir will be used. - trial_name: If tuning, set this to a unique string that identifies this - trial. If `tuner` is not None, this also must be set. - is_chief: True if this worker is the chief. - - Returns: - List of results dicts which were written to disk. Each training run gets a - results dict. Results dict contains metrics, i.e. (name, value) pairs which - give information about the training run. - - Raises: - ValueError: If results dicts read from disk contain invalid data. - """ - if not config: - # If custom config is not given, get it from flags. - config = defaults.default_config_with_updates(FLAGS.config) - if not logdir: - logdir = FLAGS.logdir - if not tf.gfile.Exists(logdir): - tf.gfile.MakeDirs(logdir) - assert FLAGS.num_repetitions > 0 - results = results_lib.Results(logdir) - results_list, _ = results.read_all() - - logging.info('Starting experiment. Directory: "%s"', logdir) - - if results_list: - if results_list[0]['max_npe'] != FLAGS.max_npe: - raise ValueError( - 'Cannot resume training. Max-NPE changed. Was %s, now %s', - results_list[0]['max_npe'], FLAGS.max_npe) - if results_list[0]['max_global_repetitions'] != FLAGS.num_repetitions: - raise ValueError( - 'Cannot resume training. Number of repetitions changed. Was %s, ' - 'now %s', - results_list[0]['max_global_repetitions'], - FLAGS.num_repetitions) - - while len(results_list) < FLAGS.num_repetitions: - run_number = len(results_list) - rep_container_name = trial_name if trial_name else 'container' - if FLAGS.num_repetitions > 1: - rep_dir = os.path.join(logdir, 'run_%d' % run_number) - rep_container_name = rep_container_name + '_run_' + str(run_number) - else: - rep_dir = logdir - - logging.info( - 'Starting repetition %d (%d out of %d)', run_number, run_number + 1, - FLAGS.num_repetitions) - - # Train will write result to disk. - with tf.container(rep_container_name): - trainer = train(config, is_chief, tuner, rep_dir, run_number, results) - logging.info('Done training.') - - if is_chief: - # Destroy current container immediately (clears current graph). - logging.info('Clearing shared variables.') - tf.Session.reset(FLAGS.master, containers=[rep_container_name]) - logging.info('Shared variables cleared.') - - # Delete replay buffer on disk. - assert trainer - trainer.delete_replay_buffer() - else: - # Give chief worker time to clean up. - sleep_sec = 30.0 - logging.info('Sleeping for %s sec.', sleep_sec) - time.sleep(sleep_sec) - tf.reset_default_graph() - logging.info('Default graph reset.') - - # Expecting that train wrote new result to disk before returning. - results_list, _ = results.read_all() - return results_list diff --git a/research/brain_coder/single_task/pg_train_test.py b/research/brain_coder/single_task/pg_train_test.py deleted file mode 100644 index 0a562e5331e638cab82bc8033bfa2c1fc355e960..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/pg_train_test.py +++ /dev/null @@ -1,87 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for pg_train. - -These tests excersize code paths available through configuration options. -Training will be run for just a few steps with the goal being to check that -nothing crashes. -""" - -from absl import flags -import tensorflow as tf - -from single_task import defaults # brain coder -from single_task import run # brain coder - -FLAGS = flags.FLAGS - - -class TrainTest(tf.test.TestCase): - - def RunTrainingSteps(self, config_string, num_steps=10): - """Run a few training steps with the given config. - - Just check that nothing crashes. - - Args: - config_string: Config encoded in a string. See - $REPO_PATH/common/config_lib.py - num_steps: Number of training steps to run. Defaults to 10. - """ - config = defaults.default_config_with_updates(config_string) - FLAGS.master = '' - FLAGS.max_npe = num_steps * config.batch_size - FLAGS.summary_interval = 1 - FLAGS.logdir = tf.test.get_temp_dir() - FLAGS.config = config_string - tf.reset_default_graph() - run.main(None) - - def testVanillaPolicyGradient(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg"),' - 'timestep_limit=90,batch_size=64') - - def testVanillaPolicyGradient_VariableLengthSequences(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg",eos_token=False),' - 'timestep_limit=90,batch_size=64') - - def testVanillaActorCritic(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg",ema_baseline_decay=0.0),' - 'timestep_limit=90,batch_size=64') - - def testPolicyGradientWithTopK(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg",topk_loss_hparam=1.0,topk=10),' - 'timestep_limit=90,batch_size=64') - - def testVanillaActorCriticWithTopK(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg",ema_baseline_decay=0.0,topk_loss_hparam=1.0,' - 'topk=10),' - 'timestep_limit=90,batch_size=64') - - def testPolicyGradientWithTopK_VariableLengthSequences(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg",topk_loss_hparam=1.0,topk=10,eos_token=False),' - 'timestep_limit=90,batch_size=64') - - def testPolicyGradientWithImportanceSampling(self): - self.RunTrainingSteps( - 'env=c(task="reverse"),' - 'agent=c(algorithm="pg",alpha=0.5),' - 'timestep_limit=90,batch_size=64') - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/single_task/results_lib.py b/research/brain_coder/single_task/results_lib.py deleted file mode 100644 index fd28fdd49ba3200dc9faa18d1722235ee4bf2ac2..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/results_lib.py +++ /dev/null @@ -1,155 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Results object manages distributed reading and writing of results to disk.""" - -import ast -from collections import namedtuple -import os -import re -from six.moves import xrange -import tensorflow as tf - - -ShardStats = namedtuple( - 'ShardStats', - ['num_local_reps_completed', 'max_local_reps', 'finished']) - - -def ge_non_zero(a, b): - return a >= b and b > 0 - - -def get_shard_id(file_name): - assert file_name[-4:].lower() == '.txt' - return int(file_name[file_name.rfind('_') + 1: -4]) - - -class Results(object): - """Manages reading and writing training results to disk asynchronously. - - Each worker writes to its own file, so that there are no race conditions when - writing happens. However any worker may read any file, as is the case for - `read_all`. Writes are expected to be atomic so that workers will never - read incomplete data, and this is likely to be the case on Unix systems. - Reading out of date data is fine, as workers calling `read_all` will wait - until data from every worker has been written before proceeding. - """ - file_template = 'experiment_results_{0}.txt' - search_regex = r'^experiment_results_([0-9])+\.txt$' - - def __init__(self, log_dir, shard_id=0): - """Construct `Results` instance. - - Args: - log_dir: Where to write results files. - shard_id: Unique id for this file (i.e. shard). Each worker that will - be writing results should use a different shard id. If there are - N shards, each shard should be numbered 0 through N-1. - """ - # Use different files for workers so that they can write to disk async. - assert 0 <= shard_id - self.file_name = self.file_template.format(shard_id) - self.log_dir = log_dir - self.results_file = os.path.join(self.log_dir, self.file_name) - - def append(self, metrics): - """Append results to results list on disk.""" - with tf.gfile.FastGFile(self.results_file, 'a') as writer: - writer.write(str(metrics) + '\n') - - def read_this_shard(self): - """Read only from this shard.""" - return self._read_shard(self.results_file) - - def _read_shard(self, results_file): - """Read only from the given shard file.""" - try: - with tf.gfile.FastGFile(results_file, 'r') as reader: - results = [ast.literal_eval(entry) for entry in reader] - except tf.errors.NotFoundError: - # No results written to disk yet. Return empty list. - return [] - return results - - def _get_max_local_reps(self, shard_results): - """Get maximum number of repetitions the given shard needs to complete. - - Worker working on each shard needs to complete a certain number of runs - before it finishes. This method will return that number so that we can - determine which shards are still not done. - - We assume that workers are including a 'max_local_repetitions' value in - their results, which should be the total number of repetitions it needs to - run. - - Args: - shard_results: Dict mapping metric names to values. This should be read - from a shard on disk. - - Returns: - Maximum number of repetitions the given shard needs to complete. - """ - mlrs = [r['max_local_repetitions'] for r in shard_results] - if not mlrs: - return 0 - for n in mlrs[1:]: - assert n == mlrs[0], 'Some reps have different max rep.' - return mlrs[0] - - def read_all(self, num_shards=None): - """Read results across all shards, i.e. get global results list. - - Args: - num_shards: (optional) specifies total number of shards. If the caller - wants information about which shards are incomplete, provide this - argument (so that shards which have yet to be created are still - counted as incomplete shards). Otherwise, no information about - incomplete shards will be returned. - - Returns: - aggregate: Global list of results (across all shards). - shard_stats: List of ShardStats instances, one for each shard. Or None if - `num_shards` is None. - """ - try: - all_children = tf.gfile.ListDirectory(self.log_dir) - except tf.errors.NotFoundError: - if num_shards is None: - return [], None - return [], [[] for _ in xrange(num_shards)] - shard_ids = { - get_shard_id(fname): fname - for fname in all_children if re.search(self.search_regex, fname)} - - if num_shards is None: - aggregate = [] - shard_stats = None - for results_file in shard_ids.values(): - aggregate.extend(self._read_shard( - os.path.join(self.log_dir, results_file))) - else: - results_per_shard = [None] * num_shards - for shard_id in xrange(num_shards): - if shard_id in shard_ids: - results_file = shard_ids[shard_id] - results_per_shard[shard_id] = self._read_shard( - os.path.join(self.log_dir, results_file)) - else: - results_per_shard[shard_id] = [] - - # Compute shard stats. - shard_stats = [] - for shard_results in results_per_shard: - max_local_reps = self._get_max_local_reps(shard_results) - shard_stats.append(ShardStats( - num_local_reps_completed=len(shard_results), - max_local_reps=max_local_reps, - finished=ge_non_zero(len(shard_results), max_local_reps))) - - # Compute aggregate. - aggregate = [ - r for shard_results in results_per_shard for r in shard_results] - - return aggregate, shard_stats diff --git a/research/brain_coder/single_task/results_lib_test.py b/research/brain_coder/single_task/results_lib_test.py deleted file mode 100644 index 6fe838d74d6a3bdea4c3b219a4d3ceea4385a97e..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/results_lib_test.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for results_lib.""" - -import contextlib -import os -import shutil -import tempfile -from six.moves import xrange -import tensorflow as tf - -from single_task import results_lib # brain coder - - -@contextlib.contextmanager -def temporary_directory(suffix='', prefix='tmp', base_path=None): - """A context manager to create a temporary directory and clean up on exit. - - The parameters are the same ones expected by tempfile.mkdtemp. - The directory will be securely and atomically created. - Everything under it will be removed when exiting the context. - - Args: - suffix: optional suffix. - prefix: options prefix. - base_path: the base path under which to create the temporary directory. - Yields: - The absolute path of the new temporary directory. - """ - temp_dir_path = tempfile.mkdtemp(suffix, prefix, base_path) - try: - yield temp_dir_path - finally: - try: - shutil.rmtree(temp_dir_path) - except OSError as e: - if e.message == 'Cannot call rmtree on a symbolic link': - # Interesting synthetic exception made up by shutil.rmtree. - # Means we received a symlink from mkdtemp. - # Also means must clean up the symlink instead. - os.unlink(temp_dir_path) - else: - raise - - -def freeze(dictionary): - """Convert dict to hashable frozenset.""" - return frozenset(dictionary.iteritems()) - - -class ResultsLibTest(tf.test.TestCase): - - def testResults(self): - with temporary_directory() as logdir: - results_obj = results_lib.Results(logdir) - self.assertEqual(results_obj.read_this_shard(), []) - results_obj.append( - {'foo': 1.5, 'bar': 2.5, 'baz': 0}) - results_obj.append( - {'foo': 5.5, 'bar': -1, 'baz': 2}) - self.assertEqual( - results_obj.read_this_shard(), - [{'foo': 1.5, 'bar': 2.5, 'baz': 0}, - {'foo': 5.5, 'bar': -1, 'baz': 2}]) - - def testShardedResults(self): - with temporary_directory() as logdir: - n = 4 # Number of shards. - results_objs = [ - results_lib.Results(logdir, shard_id=i) for i in xrange(n)] - for i, robj in enumerate(results_objs): - robj.append({'foo': i, 'bar': 1 + i * 2}) - results_list, _ = results_objs[0].read_all() - - # Check results. Order does not matter here. - self.assertEqual( - set(freeze(r) for r in results_list), - set(freeze({'foo': i, 'bar': 1 + i * 2}) for i in xrange(n))) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/single_task/run.py b/research/brain_coder/single_task/run.py deleted file mode 100644 index 9d8f37c973dcca3bbf8e25bce3d181e5405c6167..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/run.py +++ /dev/null @@ -1,142 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -r"""Run training. - -Choose training algorithm and task(s) and follow these examples. - -Run synchronous policy gradient training locally: - -CONFIG="agent=c(algorithm='pg'),env=c(task='reverse')" -OUT_DIR="/tmp/bf_pg_local" -rm -rf $OUT_DIR -bazel run -c opt single_task:run -- \ - --alsologtostderr \ - --config="$CONFIG" \ - --max_npe=0 \ - --logdir="$OUT_DIR" \ - --summary_interval=1 \ - --model_v=0 -learning/brain/tensorboard/tensorboard.sh --port 12345 --logdir "$OUT_DIR" - - -Run genetic algorithm locally: - -CONFIG="agent=c(algorithm='ga'),env=c(task='reverse')" -OUT_DIR="/tmp/bf_ga_local" -rm -rf $OUT_DIR -bazel run -c opt single_task:run -- \ - --alsologtostderr \ - --config="$CONFIG" \ - --max_npe=0 \ - --logdir="$OUT_DIR" - - -Run uniform random search locally: - -CONFIG="agent=c(algorithm='rand'),env=c(task='reverse')" -OUT_DIR="/tmp/bf_rand_local" -rm -rf $OUT_DIR -bazel run -c opt single_task:run -- \ - --alsologtostderr \ - --config="$CONFIG" \ - --max_npe=0 \ - --logdir="$OUT_DIR" -""" - -from absl import app -from absl import flags -from absl import logging - -from single_task import defaults # brain coder -from single_task import ga_train # brain coder -from single_task import pg_train # brain coder - -FLAGS = flags.FLAGS -flags.DEFINE_string('config', '', 'Configuration.') -flags.DEFINE_string( - 'logdir', None, 'Absolute path where to write results.') -flags.DEFINE_integer('task_id', 0, 'ID for this worker.') -flags.DEFINE_integer('num_workers', 1, 'How many workers there are.') -flags.DEFINE_integer( - 'max_npe', 0, - 'NPE = number of programs executed. Maximum number of programs to execute ' - 'in each run. Training will complete when this threshold is reached. Set ' - 'to 0 for unlimited training.') -flags.DEFINE_integer( - 'num_repetitions', 1, - 'Number of times the same experiment will be run (globally across all ' - 'workers). Each run is independent.') -flags.DEFINE_string( - 'log_level', 'INFO', - 'The threshold for what messages will be logged. One of DEBUG, INFO, WARN, ' - 'ERROR, or FATAL.') - - -# To register an algorithm: -# 1) Add dependency in the BUILD file to this build rule. -# 2) Import the algorithm's module at the top of this file. -# 3) Add a new entry in the following dict. The key is the algorithm name -# (used to select the algorithm in the config). The value is the module -# defining the expected functions for training and tuning. See the docstring -# for `get_namespace` for further details. -ALGORITHM_REGISTRATION = { - 'pg': pg_train, - 'ga': ga_train, - 'rand': ga_train, -} - - -def get_namespace(config_string): - """Get namespace for the selected algorithm. - - Users who want to add additional algorithm types should modify this function. - The algorithm's namespace should contain the following functions: - run_training: Run the main training loop. - define_tuner_hparam_space: Return the hparam tuning space for the algo. - write_hparams_to_config: Helper for tuning. Write hparams chosen for tuning - to the Config object. - Look at pg_train.py and ga_train.py for function signatures and - implementations. - - Args: - config_string: String representation of a Config object. This will get - parsed into a Config in order to determine what algorithm to use. - - Returns: - algorithm_namespace: The module corresponding to the algorithm given in the - config. - config: The Config object resulting from parsing `config_string`. - - Raises: - ValueError: If config.agent.algorithm is not one of the registered - algorithms. - """ - config = defaults.default_config_with_updates(config_string) - if config.agent.algorithm not in ALGORITHM_REGISTRATION: - raise ValueError('Unknown algorithm type "%s"' % (config.agent.algorithm,)) - else: - return ALGORITHM_REGISTRATION[config.agent.algorithm], config - - -def main(argv): - del argv # Unused. - - logging.set_verbosity(FLAGS.log_level) - - flags.mark_flag_as_required('logdir') - if FLAGS.num_workers <= 0: - raise ValueError('num_workers flag must be greater than 0.') - if FLAGS.task_id < 0: - raise ValueError('task_id flag must be greater than or equal to 0.') - if FLAGS.task_id >= FLAGS.num_workers: - raise ValueError( - 'task_id flag must be strictly less than num_workers flag.') - - ns, _ = get_namespace(FLAGS.config) - ns.run_training(is_chief=FLAGS.task_id == 0) - - -if __name__ == '__main__': - app.run(main) diff --git a/research/brain_coder/single_task/run_eval_tasks.py b/research/brain_coder/single_task/run_eval_tasks.py deleted file mode 100755 index eb684c344381462cd3626404b5d7fd7cf5d72b22..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/run_eval_tasks.py +++ /dev/null @@ -1,296 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function - -r"""This script can launch any eval experiments from the paper. - -This is a script. Run with python, not bazel. - -Usage: -./single_task/run_eval_tasks.py \ - --exp EXP --desc DESC [--tuning_tasks] [--iclr_tasks] [--task TASK] \ - [--tasks TASK1 TASK2 ...] - -where EXP is one of the keys in `experiments`, -and DESC is a string description of the set of experiments (such as "v0") - -Set only one of these flags: ---tuning_tasks flag only runs tuning tasks. ---iclr_tasks flag only runs the tasks included in the paper. ---regression_tests flag runs tasks which function as regression tests. ---task flag manually selects a single task to run. ---tasks flag takes a custom list of tasks. - -Other flags: ---reps N specifies N repetitions per experiment, Default is 25. ---training_replicas R specifies that R workers will be launched to train one - task (for neural network algorithms). These workers will update a global - model stored on a parameter server. Defaults to 1. If R > 1, a parameter - server will also be launched. - - -Run everything: -exps=( pg-20M pg-topk-20M topk-20M ga-20M rand-20M ) -BIN_DIR="single_task" -for exp in "${exps[@]}" -do - ./$BIN_DIR/run_eval_tasks.py \ - --exp "$exp" --iclr_tasks -done -""" - -import argparse -from collections import namedtuple -import subprocess - - -S = namedtuple('S', ['length']) -default_length = 100 - - -iclr_tasks = [ - 'reverse', 'remove-char', 'count-char', 'add', 'bool-logic', 'print-hello', - 'echo-twice', 'echo-thrice', 'copy-reverse', 'zero-cascade', 'cascade', - 'shift-left', 'shift-right', 'riffle', 'unriffle', 'middle-char', - 'remove-last', 'remove-last-two', 'echo-alternating', 'echo-half', 'length', - 'echo-second-seq', 'echo-nth-seq', 'substring', 'divide-2', 'dedup'] - - -regression_test_tasks = ['reverse', 'test-hill-climb'] - - -E = namedtuple( - 'E', - ['name', 'method_type', 'config', 'simplify', 'batch_size', 'max_npe']) - - -def make_experiment_settings(name, **kwargs): - # Unpack experiment info from name. - def split_last(string, char): - i = string.rindex(char) - return string[:i], string[i+1:] - def si_to_int(si_string): - return int( - si_string.upper().replace('K', '0'*3).replace('M', '0'*6) - .replace('G', '0'*9)) - method_type, max_npe = split_last(name, '-') - assert method_type - assert max_npe - return E( - name=name, method_type=method_type, max_npe=si_to_int(max_npe), **kwargs) - - -experiments_set = { - make_experiment_settings( - 'pg-20M', - config='entropy_beta=0.05,lr=0.0001,topk_loss_hparam=0.0,topk=0,' - 'pi_loss_hparam=1.0,alpha=0.0', - simplify=False, - batch_size=64), - make_experiment_settings( - 'pg-topk-20M', - config='entropy_beta=0.01,lr=0.0001,topk_loss_hparam=50.0,topk=10,' - 'pi_loss_hparam=1.0,alpha=0.0', - simplify=False, - batch_size=64), - make_experiment_settings( - 'topk-20M', - config='entropy_beta=0.01,lr=0.0001,topk_loss_hparam=200.0,topk=10,' - 'pi_loss_hparam=0.0,alpha=0.0', - simplify=False, - batch_size=64), - make_experiment_settings( - 'topk-0ent-20M', - config='entropy_beta=0.000,lr=0.0001,topk_loss_hparam=200.0,topk=10,' - 'pi_loss_hparam=0.0,alpha=0.0', - simplify=False, - batch_size=64), - make_experiment_settings( - 'ga-20M', - config='crossover_rate=0.95,mutation_rate=0.15', - simplify=False, - batch_size=100), # Population size. - make_experiment_settings( - 'rand-20M', - config='', - simplify=False, - batch_size=1), - make_experiment_settings( - 'simpl-500M', - config='entropy_beta=0.05,lr=0.0001,topk_loss_hparam=0.5,topk=10,' - 'pi_loss_hparam=1.0,alpha=0.0', - simplify=True, - batch_size=64), -} - -experiments = {e.name: e for e in experiments_set} - - -# pylint: disable=redefined-outer-name -def parse_args(extra_args=()): - """Parse arguments and extract task and experiment info.""" - parser = argparse.ArgumentParser(description='Run all eval tasks.') - parser.add_argument('--exp', required=True) - parser.add_argument('--tuning_tasks', action='store_true') - parser.add_argument('--iclr_tasks', action='store_true') - parser.add_argument('--regression_tests', action='store_true') - parser.add_argument('--desc', default='v0') - parser.add_argument('--reps', default=25) - parser.add_argument('--task') - parser.add_argument('--tasks', nargs='+') - for arg_string, default in extra_args: - parser.add_argument(arg_string, default=default) - args = parser.parse_args() - - print('Running experiment: %s' % (args.exp,)) - if args.desc: - print('Extra description: "%s"' % (args.desc,)) - if args.exp not in experiments: - raise ValueError('Experiment name is not valid') - experiment_name = args.exp - experiment_settings = experiments[experiment_name] - assert experiment_settings.name == experiment_name - - if args.tasks: - print('Launching tasks from args: %s' % (args.tasks,)) - tasks = {t: S(length=default_length) for t in args.tasks} - elif args.task: - print('Launching single task "%s"' % args.task) - tasks = {args.task: S(length=default_length)} - elif args.tuning_tasks: - print('Only running tuning tasks') - tasks = {name: S(length=default_length) - for name in ['reverse-tune', 'remove-char-tune']} - elif args.iclr_tasks: - print('Running eval tasks from ICLR paper.') - tasks = {name: S(length=default_length) for name in iclr_tasks} - elif args.regression_tests: - tasks = {name: S(length=default_length) for name in regression_test_tasks} - print('Tasks: %s' % tasks.keys()) - - print('reps = %d' % (int(args.reps),)) - - return args, tasks, experiment_settings - - -def run(command_string): - subprocess.call(command_string, shell=True) - - -if __name__ == '__main__': - LAUNCH_TRAINING_COMMAND = 'single_task/launch_training.sh' - COMPILE_COMMAND = 'bazel build -c opt single_task:run.par' - - args, tasks, experiment_settings = parse_args( - extra_args=(('--training_replicas', 1),)) - - if experiment_settings.method_type in ( - 'pg', 'pg-topk', 'topk', 'topk-0ent', 'simpl'): - # Runs PG and TopK. - - def make_run_cmd(job_name, task, max_npe, num_reps, code_length, - batch_size, do_simplify, custom_config_str): - """Constructs terminal command for launching NN based algorithms. - - The arguments to this function will be used to create config for the - experiment. - - Args: - job_name: Name of the job to launch. Should uniquely identify this - experiment run. - task: Name of the coding task to solve. - max_npe: Maximum number of programs executed. An integer. - num_reps: Number of times to run the experiment. An integer. - code_length: Maximum allowed length of synthesized code. - batch_size: Minibatch size for gradient descent. - do_simplify: Whether to run the experiment in code simplification mode. - A bool. - custom_config_str: Additional config for the model config string. - - Returns: - The terminal command that launches the specified experiment. - """ - config = """ - env=c(task='{0}',correct_syntax=False), - agent=c( - algorithm='pg', - policy_lstm_sizes=[35,35],value_lstm_sizes=[35,35], - grad_clip_threshold=50.0,param_init_factor=0.5,regularizer=0.0, - softmax_tr=1.0,optimizer='rmsprop',ema_baseline_decay=0.99, - eos_token={3},{4}), - timestep_limit={1},batch_size={2} - """.replace(' ', '').replace('\n', '').format( - task, code_length, batch_size, do_simplify, custom_config_str) - num_ps = 0 if args.training_replicas == 1 else 1 - return ( - r'{0} --job_name={1} --config="{2}" --max_npe={3} ' - '--num_repetitions={4} --num_workers={5} --num_ps={6} ' - '--stop_on_success={7}' - .format(LAUNCH_TRAINING_COMMAND, job_name, config, max_npe, num_reps, - args.training_replicas, num_ps, str(not do_simplify).lower())) - - else: - # Runs GA and Rand. - assert experiment_settings.method_type in ('ga', 'rand') - - def make_run_cmd(job_name, task, max_npe, num_reps, code_length, - batch_size, do_simplify, custom_config_str): - """Constructs terminal command for launching GA or uniform random search. - - The arguments to this function will be used to create config for the - experiment. - - Args: - job_name: Name of the job to launch. Should uniquely identify this - experiment run. - task: Name of the coding task to solve. - max_npe: Maximum number of programs executed. An integer. - num_reps: Number of times to run the experiment. An integer. - code_length: Maximum allowed length of synthesized code. - batch_size: Minibatch size for gradient descent. - do_simplify: Whether to run the experiment in code simplification mode. - A bool. - custom_config_str: Additional config for the model config string. - - Returns: - The terminal command that launches the specified experiment. - """ - assert not do_simplify - if custom_config_str: - custom_config_str = ',' + custom_config_str - config = """ - env=c(task='{0}',correct_syntax=False), - agent=c( - algorithm='{4}' - {3}), - timestep_limit={1},batch_size={2} - """.replace(' ', '').replace('\n', '').format( - task, code_length, batch_size, custom_config_str, - experiment_settings.method_type) - num_workers = num_reps # Do each rep in parallel. - return ( - r'{0} --job_name={1} --config="{2}" --max_npe={3} ' - '--num_repetitions={4} --num_workers={5} --num_ps={6} ' - '--stop_on_success={7}' - .format(LAUNCH_TRAINING_COMMAND, job_name, config, max_npe, num_reps, - num_workers, 0, str(not do_simplify).lower())) - - print('Compiling...') - run(COMPILE_COMMAND) - - print('Launching %d coding tasks...' % len(tasks)) - for task, task_settings in tasks.iteritems(): - name = 'bf_rl_iclr' - desc = '{0}.{1}_{2}'.format(args.desc, experiment_settings.name, task) - job_name = '{}.{}'.format(name, desc) - print('Job name: %s' % job_name) - reps = int(args.reps) if not experiment_settings.simplify else 1 - run_cmd = make_run_cmd( - job_name, task, experiment_settings.max_npe, reps, - task_settings.length, experiment_settings.batch_size, - experiment_settings.simplify, - experiment_settings.config) - print('Running command:\n' + run_cmd) - run(run_cmd) - - print('Done.') -# pylint: enable=redefined-outer-name diff --git a/research/brain_coder/single_task/test_tasks.py b/research/brain_coder/single_task/test_tasks.py deleted file mode 100644 index fb07a12653ebad6b38dc3786e749d3e8bf2b2072..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/test_tasks.py +++ /dev/null @@ -1,127 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tasks that test correctness of algorithms.""" - -from six.moves import xrange -from common import reward as reward_lib # brain coder -from single_task import misc # brain coder - - -class BasicTaskManager(object): - """Wraps a generic reward function.""" - - def __init__(self, reward_fn): - self.reward_fn = reward_fn - self.good_reward = 1.0 - - def _score_string(self, string): - actions = misc.bf_string_to_tokens(string) - reward, correct = self.reward_fn(actions) - return misc.RewardInfo( - episode_rewards=[0.0] * (len(string) - 1) + [reward], - input_case=None, - correct_output=None, - code_output=actions, - input_type=None, - output_type=misc.IOType.integer, - reason='correct' if correct else 'wrong') - - def rl_batch(self, batch_size): - reward_fns = [self._score_string] * batch_size - return reward_fns - - -class Trie(object): - """Trie for sequences.""" - EOS = () - - def __init__(self): - self.trie = {} - - def insert(self, sequence): - d = self.trie - for e in sequence: - if e not in d: - d[e] = {} - d = d[e] - d[self.EOS] = True # Terminate sequence. - - def prefix_match(self, sequence): - """Return prefix of `sequence` which exists in the trie.""" - d = self.trie - index = 0 - for i, e in enumerate(sequence + [self.EOS]): - index = i - if e in d: - d = d[e] - if e == self.EOS: - return sequence, True - else: - break - return sequence[:index], False - - def next_choices(self, sequence): - d = self.trie - for e in sequence: - if e in d: - d = d[e] - else: - raise ValueError('Sequence not a prefix: %s' % (sequence,)) - return d.keys() - - -class HillClimbingTask(object): - """Simple task that tests reward hill climbing ability. - - There are a set of paths (sequences of tokens) which are rewarded. The total - reward for a path is proportional to its length, so the longest path is the - target. Shorter paths can be dead ends. - """ - - def __init__(self): - # Paths are sequences of sub-sequences. Here we form unique sub-sequences - # out of 3 arbitrary ints. We use sub-sequences instead of single entities - # to make the task harder by making the episodes last longer, i.e. more - # for the agent to remember. - a = (1, 2, 3) - b = (4, 5, 6) - c = (7, 8, 7) - d = (6, 5, 4) - e = (3, 2, 1) - f = (8, 5, 1) - g = (6, 4, 2) - h = (1, 8, 3) - self.paths = Trie() - self.paths.insert([a, b, h]) - self.paths.insert([a, b, c, d, e, f, g, h]) - self.paths.insert([a, b, c, d, e, b, a]) - self.paths.insert([a, b, g, h]) - self.paths.insert([a, e, f, g]) - self.correct_sequence = misc.flatten([a, b, c, d, e, f, g, h]) - - def distance_fn(a, b): - len_diff = abs(len(a) - len(b)) - return sum(reward_lib.mod_abs_diff(ai - 1, bi - 1, 8) - for ai, bi in zip(a, b)) + len_diff * 4 # 8 / 2 = 4 - self.distance_fn = distance_fn - - def __call__(self, actions): - # Compute reward for action sequence. - actions = [a for a in actions if a > 0] - sequence = [tuple(actions[i: i + 3]) for i in xrange(0, len(actions), 3)] - prefix, complete = self.paths.prefix_match(sequence) - if complete: - return float(len(prefix)), actions == self.correct_sequence - if len(prefix) == len(sequence): - return float(len(prefix)), False - next_pred = sequence[len(prefix)] - choices = self.paths.next_choices(prefix) - if choices == [()]: - return (len(prefix) - len(next_pred) / 3.0), False - min_dist = min(self.distance_fn(c, next_pred) for c in choices) - # +1 reward for each element in the sequence correct, plus fraction torwards - # closest next element. - # Maximum distance possible is num_actions * base / 2 = 3 * 8 / 2 = 12 - return (len(prefix) + (1 - min_dist / 12.0)), False diff --git a/research/brain_coder/single_task/test_tasks_test.py b/research/brain_coder/single_task/test_tasks_test.py deleted file mode 100644 index bc905c6936de4c686e6cac1203c65c36bd7a0b16..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/test_tasks_test.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -"""Tests for test_tasks.""" - -import numpy as np -import tensorflow as tf - -from single_task import misc # brain coder -from single_task import test_tasks # brain coder - - -def get_reward(reward_fn, candidate): - return sum(reward_fn(misc.bf_tokens_to_string(candidate)).episode_rewards) - - -class TestTasksTest(tf.test.TestCase): - - def testHillClimbingTask(self): - task = test_tasks.BasicTaskManager(test_tasks.HillClimbingTask()) - reward_fns = task.rl_batch(1) - reward_fn = reward_fns[0] - self.assertTrue(np.isclose(get_reward(reward_fn, [1, 2, 0]), 8 / 12.)) - self.assertTrue(np.isclose(get_reward(reward_fn, [1, 2, 2, 0]), 11 / 12.)) - self.assertTrue(np.isclose(get_reward(reward_fn, [1, 2, 3, 0]), 1.0)) - self.assertTrue( - np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 2, 0]), 1. + 8 / 12.)) - self.assertTrue( - np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 0]), 2.0)) - self.assertTrue( - np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 1, 8, 3, 0]), 3.0)) - self.assertTrue( - np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 0]), 3.0)) - self.assertTrue( - np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 1, 8, 3, 1, 0]), - 3.0 - 4 / 12.)) - self.assertTrue( - np.isclose( - get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 1, 8, 3, 1, 1, 1, 1, 0]), - 2.0)) - self.assertTrue( - np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 3, 0]), - 3.0 + 1 / 12.)) - self.assertTrue( - np.isclose( - get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1, - 8, 5, 1, 6, 4, 2, 1, 8, 3, 0]), - 8.0)) - self.assertTrue( - np.isclose( - get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1, - 8, 5, 1, 6, 4, 2, 1, 8, 3, 1, 1, 0]), - 8.0 - 8 / 12.)) - self.assertTrue( - np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, - 2, 1, 8, 5, 1, 6, 4, 2, 1, 8, 3, 1, 1, - 1, 1, 1, 1, 1, 0]), - 7.0)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/brain_coder/single_task/tune.py b/research/brain_coder/single_task/tune.py deleted file mode 100644 index 3473b5e94bd3c1f737a18f0187790d5df2d7a2aa..0000000000000000000000000000000000000000 --- a/research/brain_coder/single_task/tune.py +++ /dev/null @@ -1,262 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -r"""Run grid search. - -Look at launch_tuning.sh for details on how to tune at scale. - -Usage example: -Tune with one worker on the local machine. - -CONFIG="agent=c(algorithm='pg')," -CONFIG+="env=c(task_cycle=['reverse-tune', 'remove-tune'])" -HPARAM_SPACE_TYPE="pg" -OUT_DIR="/tmp/bf_pg_tune" -MAX_NPE=5000000 -NUM_REPETITIONS=50 -rm -rf $OUT_DIR -mkdir $OUT_DIR -bazel run -c opt single_task:tune -- \ - --alsologtostderr \ - --config="$CONFIG" \ - --max_npe="$MAX_NPE" \ - --num_repetitions="$NUM_REPETITIONS" \ - --logdir="$OUT_DIR" \ - --summary_interval=1 \ - --model_v=0 \ - --hparam_space="$HPARAM_SPACE_TYPE" \ - --tuner_id=0 \ - --num_tuners=1 \ - 2>&1 >"$OUT_DIR/tuner_0.log" -learning/brain/tensorboard/tensorboard.sh --port 12345 --logdir "$OUT_DIR" -""" - -import ast -import os - -from absl import app -from absl import flags -from absl import logging -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from single_task import defaults # brain coder -from single_task import run as run_lib # brain coder - -FLAGS = flags.FLAGS -flags.DEFINE_integer( - 'tuner_id', 0, - 'The unique ID for this tuning worker.') -flags.DEFINE_integer( - 'num_tuners', 1, - 'How many tuners are there.') -flags.DEFINE_string( - 'hparam_space', 'default', - 'String name which denotes the hparam space to tune over. This is ' - 'algorithm dependent.') -flags.DEFINE_string( - 'fixed_hparams', '', - 'HParams string. Used to fix hparams during tuning.') -flags.DEFINE_float( - 'success_rate_objective_weight', 1.0, - 'How much to weight success rate vs num programs seen. By default, only ' - 'success rate is optimized (this is the setting used in the paper).') - - -def parse_hparams_string(hparams_str): - hparams = {} - for term in hparams_str.split(','): - if not term: - continue - name, value = term.split('=') - hparams[name.strip()] = ast.literal_eval(value) - return hparams - - -def int_to_multibase(n, bases): - digits = [0] * len(bases) - for i, b in enumerate(bases): - n, d = divmod(n, b) - digits[i] = d - return digits - - -def hparams_for_index(index, tuning_space): - keys = sorted(tuning_space.keys()) - indices = int_to_multibase(index, [len(tuning_space[k]) for k in keys]) - return tf.contrib.training.HParams( - **{k: tuning_space[k][i] for k, i in zip(keys, indices)}) - - -def run_tuner_loop(ns): - """Run tuning loop for this worker.""" - is_chief = FLAGS.task_id == 0 - tuning_space = ns.define_tuner_hparam_space( - hparam_space_type=FLAGS.hparam_space) - fixed_hparams = parse_hparams_string(FLAGS.fixed_hparams) - for name, value in fixed_hparams.iteritems(): - tuning_space[name] = [value] - tuning_space_size = np.prod([len(values) for values in tuning_space.values()]) - - num_local_trials, remainder = divmod(tuning_space_size, FLAGS.num_tuners) - if FLAGS.tuner_id < remainder: - num_local_trials += 1 - starting_trial_id = ( - num_local_trials * FLAGS.tuner_id + min(remainder, FLAGS.tuner_id)) - - logging.info('tuning_space_size: %d', tuning_space_size) - logging.info('num_local_trials: %d', num_local_trials) - logging.info('starting_trial_id: %d', starting_trial_id) - - for local_trial_index in xrange(num_local_trials): - trial_config = defaults.default_config_with_updates(FLAGS.config) - global_trial_index = local_trial_index + starting_trial_id - trial_name = 'trial_' + str(global_trial_index) - trial_dir = os.path.join(FLAGS.logdir, trial_name) - hparams = hparams_for_index(global_trial_index, tuning_space) - ns.write_hparams_to_config( - trial_config, hparams, hparam_space_type=FLAGS.hparam_space) - - results_list = ns.run_training( - config=trial_config, tuner=None, logdir=trial_dir, is_chief=is_chief, - trial_name=trial_name) - - if not is_chief: - # Only chief worker needs to write tuning results to disk. - continue - - objective, metrics = compute_tuning_objective( - results_list, hparams, trial_name, num_trials=tuning_space_size) - logging.info('metrics:\n%s', metrics) - logging.info('objective: %s', objective) - logging.info('programs_seen_fraction: %s', - metrics['programs_seen_fraction']) - logging.info('success_rate: %s', metrics['success_rate']) - logging.info('success_rate_objective_weight: %s', - FLAGS.success_rate_objective_weight) - - tuning_results_file = os.path.join(trial_dir, 'tuning_results.txt') - with tf.gfile.FastGFile(tuning_results_file, 'a') as writer: - writer.write(str(metrics) + '\n') - - logging.info('Trial %s complete.', trial_name) - - -def compute_tuning_objective(results_list, hparams, trial_name, num_trials): - """Compute tuning objective and metrics given results and trial information. - - Args: - results_list: List of results dicts read from disk. These are written by - workers. - hparams: tf.contrib.training.HParams instance containing the hparams used - in this trial (only the hparams which are being tuned). - trial_name: Name of this trial. Used to create a trial directory. - num_trials: Total number of trials that need to be run. This is saved in the - metrics dict for future reference. - - Returns: - objective: The objective computed for this trial. Choose the hparams for the - trial with the largest objective value. - metrics: Information about this trial. A dict. - """ - found_solution = [r['found_solution'] for r in results_list] - successful_program_counts = [ - r['npe'] for r in results_list if r['found_solution']] - - success_rate = sum(found_solution) / float(len(results_list)) - - max_programs = FLAGS.max_npe # Per run. - all_program_counts = [ - r['npe'] if r['found_solution'] else max_programs - for r in results_list] - programs_seen_fraction = ( - float(sum(all_program_counts)) - / (max_programs * len(all_program_counts))) - - # min/max/avg stats are over successful runs. - metrics = { - 'num_runs': len(results_list), - 'num_succeeded': sum(found_solution), - 'success_rate': success_rate, - 'programs_seen_fraction': programs_seen_fraction, - 'avg_programs': np.mean(successful_program_counts), - 'max_possible_programs_per_run': max_programs, - 'global_step': sum([r['num_batches'] for r in results_list]), - 'hparams': hparams.values(), - 'trial_name': trial_name, - 'num_trials': num_trials} - - # Report stats per tasks. - tasks = [r['task'] for r in results_list] - for task in set(tasks): - task_list = [r for r in results_list if r['task'] == task] - found_solution = [r['found_solution'] for r in task_list] - successful_rewards = [ - r['best_reward'] for r in task_list - if r['found_solution']] - successful_num_batches = [ - r['num_batches'] - for r in task_list if r['found_solution']] - successful_program_counts = [ - r['npe'] for r in task_list if r['found_solution']] - metrics_append = { - task + '__num_runs': len(task_list), - task + '__num_succeeded': sum(found_solution), - task + '__success_rate': ( - sum(found_solution) / float(len(task_list)))} - metrics.update(metrics_append) - if any(found_solution): - metrics_append = { - task + '__min_reward': min(successful_rewards), - task + '__max_reward': max(successful_rewards), - task + '__avg_reward': np.median(successful_rewards), - task + '__min_programs': min(successful_program_counts), - task + '__max_programs': max(successful_program_counts), - task + '__avg_programs': np.mean(successful_program_counts), - task + '__min_batches': min(successful_num_batches), - task + '__max_batches': max(successful_num_batches), - task + '__avg_batches': np.mean(successful_num_batches)} - metrics.update(metrics_append) - - # Objective will be maximized. - # Maximize success rate, minimize num programs seen. - # Max objective is always 1. - weight = FLAGS.success_rate_objective_weight - objective = ( - weight * success_rate - + (1 - weight) * (1 - programs_seen_fraction)) - metrics['objective'] = objective - - return objective, metrics - - -def main(argv): - del argv - - logging.set_verbosity(FLAGS.log_level) - - if not FLAGS.logdir: - raise ValueError('logdir flag must be provided.') - if FLAGS.num_workers <= 0: - raise ValueError('num_workers flag must be greater than 0.') - if FLAGS.task_id < 0: - raise ValueError('task_id flag must be greater than or equal to 0.') - if FLAGS.task_id >= FLAGS.num_workers: - raise ValueError( - 'task_id flag must be strictly less than num_workers flag.') - if FLAGS.num_tuners <= 0: - raise ValueError('num_tuners flag must be greater than 0.') - if FLAGS.tuner_id < 0: - raise ValueError('tuner_id flag must be greater than or equal to 0.') - if FLAGS.tuner_id >= FLAGS.num_tuners: - raise ValueError( - 'tuner_id flag must be strictly less than num_tuners flag.') - - ns, _ = run_lib.get_namespace(FLAGS.config) - run_tuner_loop(ns) - - -if __name__ == '__main__': - app.run(main) diff --git a/research/cognitive_mapping_and_planning/.gitignore b/research/cognitive_mapping_and_planning/.gitignore deleted file mode 100644 index cbc6a8f0271075171ffdf3c2bc5fb9c528b08fc6..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -deps -*.pyc -lib*.so -lib*.so* diff --git a/research/cognitive_mapping_and_planning/README.md b/research/cognitive_mapping_and_planning/README.md deleted file mode 100644 index 4457bafbb4d229998a01dadc46efe41f4ba1a3e0..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/README.md +++ /dev/null @@ -1,127 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Cognitive Mapping and Planning for Visual Navigation -**Saurabh Gupta, James Davidson, Sergey Levine, Rahul Sukthankar, Jitendra Malik** - -**Computer Vision and Pattern Recognition (CVPR) 2017.** - -**[ArXiv](https://arxiv.org/abs/1702.03920), -[Project Website](https://sites.google.com/corp/view/cognitive-mapping-and-planning/)** - -### Citing -If you find this code base and models useful in your research, please consider -citing the following paper: - ``` - @inproceedings{gupta2017cognitive, - title={Cognitive Mapping and Planning for Visual Navigation}, - author={Gupta, Saurabh and Davidson, James and Levine, Sergey and - Sukthankar, Rahul and Malik, Jitendra}, - booktitle={CVPR}, - year={2017} - } - ``` - -### Contents -1. [Requirements: software](#requirements-software) -2. [Requirements: data](#requirements-data) -3. [Test Pre-trained Models](#test-pre-trained-models) -4. [Train your Own Models](#train-your-own-models) - -### Requirements: software -1. Python Virtual Env Setup: All code is implemented in Python but depends on a - small number of python packages and a couple of C libraries. We recommend - using virtual environment for installing these python packages and python - bindings for these C libraries. - ```Shell - VENV_DIR=venv - pip install virtualenv - virtualenv $VENV_DIR - source $VENV_DIR/bin/activate - - # You may need to upgrade pip for installing openv-python. - pip install --upgrade pip - # Install simple dependencies. - pip install -r requirements.txt - - # Patch bugs in dependencies. - sh patches/apply_patches.sh - ``` - -2. Install [Tensorflow](https://www.tensorflow.org/) inside this virtual - environment. You will need to use one of the latest nightly builds - (see instructions [here](https://github.com/tensorflow/tensorflow#installation)). - -3. Swiftshader: We use - [Swiftshader](https://github.com/google/swiftshader.git), a CPU based - renderer to render the meshes. It is possible to use other renderers, - replace `SwiftshaderRenderer` in `render/swiftshader_renderer.py` with - bindings to your renderer. - ```Shell - mkdir -p deps - git clone --recursive https://github.com/google/swiftshader.git deps/swiftshader-src - cd deps/swiftshader-src && git checkout 91da6b00584afd7dcaed66da88e2b617429b3950 - git submodule update - mkdir build && cd build && cmake .. && make -j 16 libEGL libGLESv2 - cd ../../../ - cp deps/swiftshader-src/build/libEGL* libEGL.so.1 - cp deps/swiftshader-src/build/libGLESv2* libGLESv2.so.2 - ``` - -4. PyAssimp: We use [PyAssimp](https://github.com/assimp/assimp.git) to load - meshes. It is possible to use other libraries to load meshes, replace - `Shape` `render/swiftshader_renderer.py` with bindings to your library for - loading meshes. - ```Shell - mkdir -p deps - git clone https://github.com/assimp/assimp.git deps/assimp-src - cd deps/assimp-src - git checkout 2afeddd5cb63d14bc77b53740b38a54a97d94ee8 - cmake CMakeLists.txt -G 'Unix Makefiles' && make -j 16 - cd port/PyAssimp && python setup.py install - cd ../../../.. - cp deps/assimp-src/lib/libassimp* . - ``` - -5. graph-tool: We use [graph-tool](https://git.skewed.de/count0/graph-tool) - library for graph processing. - ```Shell - mkdir -p deps - # If the following git clone command fails, you can also download the source - # from https://downloads.skewed.de/graph-tool/graph-tool-2.2.44.tar.bz2 - git clone https://git.skewed.de/count0/graph-tool deps/graph-tool-src - cd deps/graph-tool-src && git checkout 178add3a571feb6666f4f119027705d95d2951ab - bash autogen.sh - ./configure --disable-cairo --disable-sparsehash --prefix=$HOME/.local - make -j 16 - make install - cd ../../ - ``` - -### Requirements: data -1. Download the Stanford 3D Indoor Spaces Dataset (S3DIS Dataset) and ImageNet - Pre-trained models for initializing different models. Follow instructions in - `data/README.md` - -### Test Pre-trained Models -1. Download pre-trained models. See `output/README.md`. - -2. Test models using `scripts/script_test_pretrained_models.sh`. - -### Train Your Own Models -All models were trained asynchronously with 16 workers each worker using data -from a single floor. The default hyper-parameters correspond to this setting. -See [distributed training with -Tensorflow](https://www.tensorflow.org/deploy/distributed) for setting up -distributed training. Training with a single worker is possible with the current -code base but will require some minor changes to allow each worker to load all -training environments. - -### Contact -For questions or issues open an issue on the tensorflow/models [issues -tracker](https://github.com/tensorflow/models/issues). Please assign issues to -@s-gupta. - -### Credits -This code was written by Saurabh Gupta (@s-gupta). diff --git a/research/cognitive_mapping_and_planning/__init__.py b/research/cognitive_mapping_and_planning/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/cognitive_mapping_and_planning/cfgs/__init__.py b/research/cognitive_mapping_and_planning/cfgs/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/cognitive_mapping_and_planning/cfgs/config_cmp.py b/research/cognitive_mapping_and_planning/cfgs/config_cmp.py deleted file mode 100644 index 715eee2b973cb66f816ecdb65bbcc3abdd8a9483..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/cfgs/config_cmp.py +++ /dev/null @@ -1,283 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import os, sys -import numpy as np -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -import logging -import src.utils as utils -import cfgs.config_common as cc - - -import tensorflow as tf - - -rgb_resnet_v2_50_path = 'data/init_models/resnet_v2_50/model.ckpt-5136169' -d_resnet_v2_50_path = 'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002' - -def get_default_args(): - summary_args = utils.Foo(display_interval=1, test_iters=26, - arop_full_summary_iters=14) - - control_args = utils.Foo(train=False, test=False, - force_batchnorm_is_training_at_test=False, - reset_rng_seed=False, only_eval_when_done=False, - test_mode=None) - return summary_args, control_args - -def get_default_cmp_args(): - batch_norm_param = {'center': True, 'scale': True, - 'activation_fn':tf.nn.relu} - - mapper_arch_args = utils.Foo( - dim_reduce_neurons=64, - fc_neurons=[1024, 1024], - fc_out_size=8, - fc_out_neurons=64, - encoder='resnet_v2_50', - deconv_neurons=[64, 32, 16, 8, 4, 2], - deconv_strides=[2, 2, 2, 2, 2, 2], - deconv_layers_per_block=2, - deconv_kernel_size=4, - fc_dropout=0.5, - combine_type='wt_avg_logits', - batch_norm_param=batch_norm_param) - - readout_maps_arch_args = utils.Foo( - num_neurons=[], - strides=[], - kernel_size=None, - layers_per_block=None) - - arch_args = utils.Foo( - vin_val_neurons=8, vin_action_neurons=8, vin_ks=3, vin_share_wts=False, - pred_neurons=[64, 64], pred_batch_norm_param=batch_norm_param, - conv_on_value_map=0, fr_neurons=16, fr_ver='v2', fr_inside_neurons=64, - fr_stride=1, crop_remove_each=30, value_crop_size=4, - action_sample_type='sample', action_sample_combine_type='one_or_other', - sample_gt_prob_type='inverse_sigmoid_decay', dagger_sample_bn_false=True, - vin_num_iters=36, isd_k=750., use_agent_loc=False, multi_scale=True, - readout_maps=False, rom_arch=readout_maps_arch_args) - - return arch_args, mapper_arch_args - -def get_arch_vars(arch_str): - if arch_str == '': vals = [] - else: vals = arch_str.split('_') - ks = ['var1', 'var2', 'var3'] - ks = ks[:len(vals)] - - # Exp Ver. - if len(vals) == 0: ks.append('var1'); vals.append('v0') - # custom arch. - if len(vals) == 1: ks.append('var2'); vals.append('') - # map scape for projection baseline. - if len(vals) == 2: ks.append('var3'); vals.append('fr2') - - assert(len(vals) == 3) - - vars = utils.Foo() - for k, v in zip(ks, vals): - setattr(vars, k, v) - - logging.error('arch_vars: %s', vars) - return vars - -def process_arch_str(args, arch_str): - # This function modifies args. - args.arch, args.mapper_arch = get_default_cmp_args() - - arch_vars = get_arch_vars(arch_str) - - args.navtask.task_params.outputs.ego_maps = True - args.navtask.task_params.outputs.ego_goal_imgs = True - args.navtask.task_params.outputs.egomotion = True - args.navtask.task_params.toy_problem = False - - if arch_vars.var1 == 'lmap': - args = process_arch_learned_map(args, arch_vars) - - elif arch_vars.var1 == 'pmap': - args = process_arch_projected_map(args, arch_vars) - - else: - logging.fatal('arch_vars.var1 should be lmap or pmap, but is %s', arch_vars.var1) - assert(False) - - return args - -def process_arch_learned_map(args, arch_vars): - # Multiscale vision based system. - args.navtask.task_params.input_type = 'vision' - args.navtask.task_params.outputs.images = True - - if args.navtask.camera_param.modalities[0] == 'rgb': - args.solver.pretrained_path = rgb_resnet_v2_50_path - elif args.navtask.camera_param.modalities[0] == 'depth': - args.solver.pretrained_path = d_resnet_v2_50_path - - if arch_vars.var2 == 'Ssc': - sc = 1./args.navtask.task_params.step_size - args.arch.vin_num_iters = 40 - args.navtask.task_params.map_scales = [sc] - max_dist = args.navtask.task_params.max_dist * \ - args.navtask.task_params.num_goals - args.navtask.task_params.map_crop_sizes = [2*max_dist] - - args.arch.fr_stride = 1 - args.arch.vin_action_neurons = 8 - args.arch.vin_val_neurons = 3 - args.arch.fr_inside_neurons = 32 - - args.mapper_arch.pad_map_with_zeros_each = [24] - args.mapper_arch.deconv_neurons = [64, 32, 16] - args.mapper_arch.deconv_strides = [1, 2, 1] - - elif (arch_vars.var2 == 'Msc' or arch_vars.var2 == 'MscROMms' or - arch_vars.var2 == 'MscROMss' or arch_vars.var2 == 'MscNoVin'): - # Code for multi-scale planner. - args.arch.vin_num_iters = 8 - args.arch.crop_remove_each = 4 - args.arch.value_crop_size = 8 - - sc = 1./args.navtask.task_params.step_size - max_dist = args.navtask.task_params.max_dist * \ - args.navtask.task_params.num_goals - n_scales = np.log2(float(max_dist) / float(args.arch.vin_num_iters)) - n_scales = int(np.ceil(n_scales)+1) - - args.navtask.task_params.map_scales = \ - list(sc*(0.5**(np.arange(n_scales))[::-1])) - args.navtask.task_params.map_crop_sizes = [16 for x in range(n_scales)] - - args.arch.fr_stride = 1 - args.arch.vin_action_neurons = 8 - args.arch.vin_val_neurons = 3 - args.arch.fr_inside_neurons = 32 - - args.mapper_arch.pad_map_with_zeros_each = [0 for _ in range(n_scales)] - args.mapper_arch.deconv_neurons = [64*n_scales, 32*n_scales, 16*n_scales] - args.mapper_arch.deconv_strides = [1, 2, 1] - - if arch_vars.var2 == 'MscNoVin': - # No planning version. - args.arch.fr_stride = [1, 2, 1, 2] - args.arch.vin_action_neurons = None - args.arch.vin_val_neurons = 16 - args.arch.fr_inside_neurons = 32 - - args.arch.crop_remove_each = 0 - args.arch.value_crop_size = 4 - args.arch.vin_num_iters = 0 - - elif arch_vars.var2 == 'MscROMms' or arch_vars.var2 == 'MscROMss': - # Code with read outs, MscROMms flattens and reads out, - # MscROMss does not flatten and produces output at multiple scales. - args.navtask.task_params.outputs.readout_maps = True - args.navtask.task_params.map_resize_method = 'antialiasing' - args.arch.readout_maps = True - - if arch_vars.var2 == 'MscROMms': - args.arch.rom_arch.num_neurons = [64, 1] - args.arch.rom_arch.kernel_size = 4 - args.arch.rom_arch.strides = [2,2] - args.arch.rom_arch.layers_per_block = 2 - - args.navtask.task_params.readout_maps_crop_sizes = [64] - args.navtask.task_params.readout_maps_scales = [sc] - - elif arch_vars.var2 == 'MscROMss': - args.arch.rom_arch.num_neurons = \ - [64, len(args.navtask.task_params.map_scales)] - args.arch.rom_arch.kernel_size = 4 - args.arch.rom_arch.strides = [1,1] - args.arch.rom_arch.layers_per_block = 1 - - args.navtask.task_params.readout_maps_crop_sizes = \ - args.navtask.task_params.map_crop_sizes - args.navtask.task_params.readout_maps_scales = \ - args.navtask.task_params.map_scales - - else: - logging.fatal('arch_vars.var2 not one of Msc, MscROMms, MscROMss, MscNoVin.') - assert(False) - - map_channels = args.mapper_arch.deconv_neurons[-1] / \ - (2*len(args.navtask.task_params.map_scales)) - args.navtask.task_params.map_channels = map_channels - - return args - -def process_arch_projected_map(args, arch_vars): - # Single scale vision based system which does not use a mapper but instead - # uses an analytically estimated map. - ds = int(arch_vars.var3[2]) - args.navtask.task_params.input_type = 'analytical_counts' - args.navtask.task_params.outputs.analytical_counts = True - - assert(args.navtask.task_params.modalities[0] == 'depth') - args.navtask.camera_param.img_channels = None - - analytical_counts = utils.Foo(map_sizes=[512/ds], - xy_resolution=[5.*ds], - z_bins=[[-10, 10, 150, 200]], - non_linearity=[arch_vars.var2]) - args.navtask.task_params.analytical_counts = analytical_counts - - sc = 1./ds - args.arch.vin_num_iters = 36 - args.navtask.task_params.map_scales = [sc] - args.navtask.task_params.map_crop_sizes = [512/ds] - - args.arch.fr_stride = [1,2] - args.arch.vin_action_neurons = 8 - args.arch.vin_val_neurons = 3 - args.arch.fr_inside_neurons = 32 - - map_channels = len(analytical_counts.z_bins[0]) + 1 - args.navtask.task_params.map_channels = map_channels - args.solver.freeze_conv = False - - return args - -def get_args_for_config(config_name): - args = utils.Foo() - - args.summary, args.control = get_default_args() - - exp_name, mode_str = config_name.split('+') - arch_str, solver_str, navtask_str = exp_name.split('.') - logging.error('config_name: %s', config_name) - logging.error('arch_str: %s', arch_str) - logging.error('navtask_str: %s', navtask_str) - logging.error('solver_str: %s', solver_str) - logging.error('mode_str: %s', mode_str) - - args.solver = cc.process_solver_str(solver_str) - args.navtask = cc.process_navtask_str(navtask_str) - - args = process_arch_str(args, arch_str) - args.arch.isd_k = args.solver.isd_k - - # Train, test, etc. - mode, imset = mode_str.split('_') - args = cc.adjust_args_for_mode(args, mode) - args.navtask.building_names = args.navtask.dataset.get_split(imset) - args.control.test_name = '{:s}_on_{:s}'.format(mode, imset) - - # Log the arguments - logging.error('%s', args) - return args diff --git a/research/cognitive_mapping_and_planning/cfgs/config_common.py b/research/cognitive_mapping_and_planning/cfgs/config_common.py deleted file mode 100644 index 440bf5b72f87a1eeca38e22f33b22e82de7345c0..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/cfgs/config_common.py +++ /dev/null @@ -1,261 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import os -import numpy as np -import logging -import src.utils as utils -import datasets.nav_env_config as nec -from datasets import factory - -def adjust_args_for_mode(args, mode): - if mode == 'train': - args.control.train = True - - elif mode == 'val1': - # Same settings as for training, to make sure nothing wonky is happening - # there. - args.control.test = True - args.control.test_mode = 'val' - args.navtask.task_params.batch_size = 32 - - elif mode == 'val2': - # No data augmentation, not sampling but taking the argmax action, not - # sampling from the ground truth at all. - args.control.test = True - args.arch.action_sample_type = 'argmax' - args.arch.sample_gt_prob_type = 'zero' - args.navtask.task_params.data_augment = \ - utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False, - relight_fast=False, structured=False) - args.control.test_mode = 'val' - args.navtask.task_params.batch_size = 32 - - elif mode == 'bench': - # Actually testing the agent in settings that are kept same between - # different runs. - args.navtask.task_params.batch_size = 16 - args.control.test = True - args.arch.action_sample_type = 'argmax' - args.arch.sample_gt_prob_type = 'zero' - args.navtask.task_params.data_augment = \ - utils.Foo(lr_flip=0, delta_angle=0, delta_xy=0, relight=False, - relight_fast=False, structured=False) - args.summary.test_iters = 250 - args.control.only_eval_when_done = True - args.control.reset_rng_seed = True - args.control.test_mode = 'test' - else: - logging.fatal('Unknown mode: %s.', mode) - assert(False) - return args - -def get_solver_vars(solver_str): - if solver_str == '': vals = []; - else: vals = solver_str.split('_') - ks = ['clip', 'dlw', 'long', 'typ', 'isdk', 'adam_eps', 'init_lr']; - ks = ks[:len(vals)] - - # Gradient clipping or not. - if len(vals) == 0: ks.append('clip'); vals.append('noclip'); - # data loss weight. - if len(vals) == 1: ks.append('dlw'); vals.append('dlw20') - # how long to train for. - if len(vals) == 2: ks.append('long'); vals.append('nolong') - # Adam - if len(vals) == 3: ks.append('typ'); vals.append('adam2') - # reg loss wt - if len(vals) == 4: ks.append('rlw'); vals.append('rlw1') - # isd_k - if len(vals) == 5: ks.append('isdk'); vals.append('isdk415') # 415, inflexion at 2.5k. - # adam eps - if len(vals) == 6: ks.append('adam_eps'); vals.append('aeps1en8') - # init lr - if len(vals) == 7: ks.append('init_lr'); vals.append('lr1en3') - - assert(len(vals) == 8) - - vars = utils.Foo() - for k, v in zip(ks, vals): - setattr(vars, k, v) - logging.error('solver_vars: %s', vars) - return vars - -def process_solver_str(solver_str): - solver = utils.Foo( - seed=0, learning_rate_decay=None, clip_gradient_norm=None, max_steps=None, - initial_learning_rate=None, momentum=None, steps_per_decay=None, - logdir=None, sync=False, adjust_lr_sync=True, wt_decay=0.0001, - data_loss_wt=None, reg_loss_wt=None, freeze_conv=True, num_workers=1, - task=0, ps_tasks=0, master='local', typ=None, momentum2=None, - adam_eps=None) - - # Clobber with overrides from solver str. - solver_vars = get_solver_vars(solver_str) - - solver.data_loss_wt = float(solver_vars.dlw[3:].replace('x', '.')) - solver.adam_eps = float(solver_vars.adam_eps[4:].replace('x', '.').replace('n', '-')) - solver.initial_learning_rate = float(solver_vars.init_lr[2:].replace('x', '.').replace('n', '-')) - solver.reg_loss_wt = float(solver_vars.rlw[3:].replace('x', '.')) - solver.isd_k = float(solver_vars.isdk[4:].replace('x', '.')) - - long = solver_vars.long - if long == 'long': - solver.steps_per_decay = 40000 - solver.max_steps = 120000 - elif long == 'long2': - solver.steps_per_decay = 80000 - solver.max_steps = 120000 - elif long == 'nolong' or long == 'nol': - solver.steps_per_decay = 20000 - solver.max_steps = 60000 - else: - logging.fatal('solver_vars.long should be long, long2, nolong or nol.') - assert(False) - - clip = solver_vars.clip - if clip == 'noclip' or clip == 'nocl': - solver.clip_gradient_norm = 0 - elif clip[:4] == 'clip': - solver.clip_gradient_norm = float(clip[4:].replace('x', '.')) - else: - logging.fatal('Unknown solver_vars.clip: %s', clip) - assert(False) - - typ = solver_vars.typ - if typ == 'adam': - solver.typ = 'adam' - solver.momentum = 0.9 - solver.momentum2 = 0.999 - solver.learning_rate_decay = 1.0 - elif typ == 'adam2': - solver.typ = 'adam' - solver.momentum = 0.9 - solver.momentum2 = 0.999 - solver.learning_rate_decay = 0.1 - elif typ == 'sgd': - solver.typ = 'sgd' - solver.momentum = 0.99 - solver.momentum2 = None - solver.learning_rate_decay = 0.1 - else: - logging.fatal('Unknown solver_vars.typ: %s', typ) - assert(False) - - logging.error('solver: %s', solver) - return solver - -def get_navtask_vars(navtask_str): - if navtask_str == '': vals = [] - else: vals = navtask_str.split('_') - - ks_all = ['dataset_name', 'modality', 'task', 'history', 'max_dist', - 'num_steps', 'step_size', 'n_ori', 'aux_views', 'data_aug'] - ks = ks_all[:len(vals)] - - # All data or not. - if len(vals) == 0: ks.append('dataset_name'); vals.append('sbpd') - # modality - if len(vals) == 1: ks.append('modality'); vals.append('rgb') - # semantic task? - if len(vals) == 2: ks.append('task'); vals.append('r2r') - # number of history frames. - if len(vals) == 3: ks.append('history'); vals.append('h0') - # max steps - if len(vals) == 4: ks.append('max_dist'); vals.append('32') - # num steps - if len(vals) == 5: ks.append('num_steps'); vals.append('40') - # step size - if len(vals) == 6: ks.append('step_size'); vals.append('8') - # n_ori - if len(vals) == 7: ks.append('n_ori'); vals.append('4') - # Auxiliary views. - if len(vals) == 8: ks.append('aux_views'); vals.append('nv0') - # Normal data augmentation as opposed to structured data augmentation (if set - # to straug. - if len(vals) == 9: ks.append('data_aug'); vals.append('straug') - - assert(len(vals) == 10) - for i in range(len(ks)): - assert(ks[i] == ks_all[i]) - - vars = utils.Foo() - for k, v in zip(ks, vals): - setattr(vars, k, v) - logging.error('navtask_vars: %s', vals) - return vars - -def process_navtask_str(navtask_str): - navtask = nec.nav_env_base_config() - - # Clobber with overrides from strings. - navtask_vars = get_navtask_vars(navtask_str) - - navtask.task_params.n_ori = int(navtask_vars.n_ori) - navtask.task_params.max_dist = int(navtask_vars.max_dist) - navtask.task_params.num_steps = int(navtask_vars.num_steps) - navtask.task_params.step_size = int(navtask_vars.step_size) - navtask.task_params.data_augment.delta_xy = int(navtask_vars.step_size)/2. - n_aux_views_each = int(navtask_vars.aux_views[2]) - aux_delta_thetas = np.concatenate((np.arange(n_aux_views_each) + 1, - -1 -np.arange(n_aux_views_each))) - aux_delta_thetas = aux_delta_thetas*np.deg2rad(navtask.camera_param.fov) - navtask.task_params.aux_delta_thetas = aux_delta_thetas - - if navtask_vars.data_aug == 'aug': - navtask.task_params.data_augment.structured = False - elif navtask_vars.data_aug == 'straug': - navtask.task_params.data_augment.structured = True - else: - logging.fatal('Unknown navtask_vars.data_aug %s.', navtask_vars.data_aug) - assert(False) - - navtask.task_params.num_history_frames = int(navtask_vars.history[1:]) - navtask.task_params.n_views = 1+navtask.task_params.num_history_frames - - navtask.task_params.goal_channels = int(navtask_vars.n_ori) - - if navtask_vars.task == 'hard': - navtask.task_params.type = 'rng_rejection_sampling_many' - navtask.task_params.rejection_sampling_M = 2000 - navtask.task_params.min_dist = 10 - elif navtask_vars.task == 'r2r': - navtask.task_params.type = 'room_to_room_many' - elif navtask_vars.task == 'ST': - # Semantic task at hand. - navtask.task_params.goal_channels = \ - len(navtask.task_params.semantic_task.class_map_names) - navtask.task_params.rel_goal_loc_dim = \ - len(navtask.task_params.semantic_task.class_map_names) - navtask.task_params.type = 'to_nearest_obj_acc' - else: - logging.fatal('navtask_vars.task: should be hard or r2r, ST') - assert(False) - - if navtask_vars.modality == 'rgb': - navtask.camera_param.modalities = ['rgb'] - navtask.camera_param.img_channels = 3 - elif navtask_vars.modality == 'd': - navtask.camera_param.modalities = ['depth'] - navtask.camera_param.img_channels = 2 - - navtask.task_params.img_height = navtask.camera_param.height - navtask.task_params.img_width = navtask.camera_param.width - navtask.task_params.modalities = navtask.camera_param.modalities - navtask.task_params.img_channels = navtask.camera_param.img_channels - navtask.task_params.img_fov = navtask.camera_param.fov - - navtask.dataset = factory.get_dataset(navtask_vars.dataset_name) - return navtask diff --git a/research/cognitive_mapping_and_planning/cfgs/config_distill.py b/research/cognitive_mapping_and_planning/cfgs/config_distill.py deleted file mode 100644 index 53be2f8a5f12ee701a53c1c354079659da6958d4..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/cfgs/config_distill.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import pprint -import copy -import os -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -import logging -import src.utils as utils -import cfgs.config_common as cc - - -import tensorflow as tf - -rgb_resnet_v2_50_path = 'cache/resnet_v2_50_inception_preprocessed/model.ckpt-5136169' - -def get_default_args(): - robot = utils.Foo(radius=15, base=10, height=140, sensor_height=120, - camera_elevation_degree=-15) - - camera_param = utils.Foo(width=225, height=225, z_near=0.05, z_far=20.0, - fov=60., modalities=['rgb', 'depth']) - - env = utils.Foo(padding=10, resolution=5, num_point_threshold=2, - valid_min=-10, valid_max=200, n_samples_per_face=200) - - data_augment = utils.Foo(lr_flip=0, delta_angle=1, delta_xy=4, relight=False, - relight_fast=False, structured=False) - - task_params = utils.Foo(num_actions=4, step_size=4, num_steps=0, - batch_size=32, room_seed=0, base_class='Building', - task='mapping', n_ori=6, data_augment=data_augment, - output_transform_to_global_map=False, - output_canonical_map=False, - output_incremental_transform=False, - output_free_space=False, move_type='shortest_path', - toy_problem=0) - - buildinger_args = utils.Foo(building_names=['area1_gates_wingA_floor1_westpart'], - env_class=None, robot=robot, - task_params=task_params, env=env, - camera_param=camera_param) - - solver_args = utils.Foo(seed=0, learning_rate_decay=0.1, - clip_gradient_norm=0, max_steps=120000, - initial_learning_rate=0.001, momentum=0.99, - steps_per_decay=40000, logdir=None, sync=False, - adjust_lr_sync=True, wt_decay=0.0001, - data_loss_wt=1.0, reg_loss_wt=1.0, - num_workers=1, task=0, ps_tasks=0, master='local') - - summary_args = utils.Foo(display_interval=1, test_iters=100) - - control_args = utils.Foo(train=False, test=False, - force_batchnorm_is_training_at_test=False) - - arch_args = utils.Foo(rgb_encoder='resnet_v2_50', d_encoder='resnet_v2_50') - - return utils.Foo(solver=solver_args, - summary=summary_args, control=control_args, arch=arch_args, - buildinger=buildinger_args) - -def get_vars(config_name): - vars = config_name.split('_') - if len(vars) == 1: # All data or not. - vars.append('noall') - if len(vars) == 2: # n_ori - vars.append('4') - logging.error('vars: %s', vars) - return vars - -def get_args_for_config(config_name): - args = get_default_args() - config_name, mode = config_name.split('+') - vars = get_vars(config_name) - - logging.info('config_name: %s, mode: %s', config_name, mode) - - args.buildinger.task_params.n_ori = int(vars[2]) - args.solver.freeze_conv = True - args.solver.pretrained_path = rgb_resnet_v2_50_path - args.buildinger.task_params.img_channels = 5 - args.solver.data_loss_wt = 0.00001 - - if vars[0] == 'v0': - None - else: - logging.error('config_name: %s undefined', config_name) - - args.buildinger.task_params.height = args.buildinger.camera_param.height - args.buildinger.task_params.width = args.buildinger.camera_param.width - args.buildinger.task_params.modalities = args.buildinger.camera_param.modalities - - if vars[1] == 'all': - args = cc.get_args_for_mode_building_all(args, mode) - elif vars[1] == 'noall': - args = cc.get_args_for_mode_building(args, mode) - - # Log the arguments - logging.error('%s', args) - return args diff --git a/research/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py b/research/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py deleted file mode 100644 index 3cc64fe594ab025fbcfb41543302fa42c7fc0074..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/cfgs/config_vision_baseline.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import pprint -import os -import numpy as np -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -import logging -import src.utils as utils -import cfgs.config_common as cc -import datasets.nav_env_config as nec - - -import tensorflow as tf - -FLAGS = flags.FLAGS - -get_solver_vars = cc.get_solver_vars -get_navtask_vars = cc.get_navtask_vars - - -rgb_resnet_v2_50_path = 'data/init_models/resnet_v2_50/model.ckpt-5136169' -d_resnet_v2_50_path = 'data/init_models/distill_rgb_to_d_resnet_v2_50/model.ckpt-120002' - -def get_default_args(): - summary_args = utils.Foo(display_interval=1, test_iters=26, - arop_full_summary_iters=14) - - control_args = utils.Foo(train=False, test=False, - force_batchnorm_is_training_at_test=False, - reset_rng_seed=False, only_eval_when_done=False, - test_mode=None) - return summary_args, control_args - -def get_default_baseline_args(): - batch_norm_param = {'center': True, 'scale': True, - 'activation_fn':tf.nn.relu} - arch_args = utils.Foo( - pred_neurons=[], goal_embed_neurons=[], img_embed_neurons=[], - batch_norm_param=batch_norm_param, dim_reduce_neurons=64, combine_type='', - encoder='resnet_v2_50', action_sample_type='sample', - action_sample_combine_type='one_or_other', - sample_gt_prob_type='inverse_sigmoid_decay', dagger_sample_bn_false=True, - isd_k=750., use_visit_count=False, lstm_output=False, lstm_ego=False, - lstm_img=False, fc_dropout=0.0, embed_goal_for_state=False, - lstm_output_init_state_from_goal=False) - return arch_args - -def get_arch_vars(arch_str): - if arch_str == '': vals = [] - else: vals = arch_str.split('_') - - ks = ['ver', 'lstm_dim', 'dropout'] - - # Exp Ver - if len(vals) == 0: vals.append('v0') - # LSTM dimentsions - if len(vals) == 1: vals.append('lstm2048') - # Dropout - if len(vals) == 2: vals.append('noDO') - - assert(len(vals) == 3) - - vars = utils.Foo() - for k, v in zip(ks, vals): - setattr(vars, k, v) - - logging.error('arch_vars: %s', vars) - return vars - -def process_arch_str(args, arch_str): - # This function modifies args. - args.arch = get_default_baseline_args() - arch_vars = get_arch_vars(arch_str) - - args.navtask.task_params.outputs.rel_goal_loc = True - args.navtask.task_params.input_type = 'vision' - args.navtask.task_params.outputs.images = True - - if args.navtask.camera_param.modalities[0] == 'rgb': - args.solver.pretrained_path = rgb_resnet_v2_50_path - elif args.navtask.camera_param.modalities[0] == 'depth': - args.solver.pretrained_path = d_resnet_v2_50_path - else: - logging.fatal('Neither of rgb or d') - - if arch_vars.dropout == 'DO': - args.arch.fc_dropout = 0.5 - - args.tfcode = 'B' - - exp_ver = arch_vars.ver - if exp_ver == 'v0': - # Multiplicative interaction between goal loc and image features. - args.arch.combine_type = 'multiply' - args.arch.pred_neurons = [256, 256] - args.arch.goal_embed_neurons = [64, 8] - args.arch.img_embed_neurons = [1024, 512, 256*8] - - elif exp_ver == 'v1': - # Additive interaction between goal and image features. - args.arch.combine_type = 'add' - args.arch.pred_neurons = [256, 256] - args.arch.goal_embed_neurons = [64, 256] - args.arch.img_embed_neurons = [1024, 512, 256] - - elif exp_ver == 'v2': - # LSTM at the output on top of multiple interactions. - args.arch.combine_type = 'multiply' - args.arch.goal_embed_neurons = [64, 8] - args.arch.img_embed_neurons = [1024, 512, 256*8] - args.arch.lstm_output = True - args.arch.lstm_output_dim = int(arch_vars.lstm_dim[4:]) - args.arch.pred_neurons = [256] # The other is inside the LSTM. - - elif exp_ver == 'v0blind': - # LSTM only on the goal location. - args.arch.combine_type = 'goalonly' - args.arch.goal_embed_neurons = [64, 256] - args.arch.img_embed_neurons = [2] # I dont know what it will do otherwise. - args.arch.lstm_output = True - args.arch.lstm_output_dim = 256 - args.arch.pred_neurons = [256] # The other is inside the LSTM. - - else: - logging.fatal('exp_ver: %s undefined', exp_ver) - assert(False) - - # Log the arguments - logging.error('%s', args) - return args - -def get_args_for_config(config_name): - args = utils.Foo() - - args.summary, args.control = get_default_args() - - exp_name, mode_str = config_name.split('+') - arch_str, solver_str, navtask_str = exp_name.split('.') - logging.error('config_name: %s', config_name) - logging.error('arch_str: %s', arch_str) - logging.error('navtask_str: %s', navtask_str) - logging.error('solver_str: %s', solver_str) - logging.error('mode_str: %s', mode_str) - - args.solver = cc.process_solver_str(solver_str) - args.navtask = cc.process_navtask_str(navtask_str) - - args = process_arch_str(args, arch_str) - args.arch.isd_k = args.solver.isd_k - - # Train, test, etc. - mode, imset = mode_str.split('_') - args = cc.adjust_args_for_mode(args, mode) - args.navtask.building_names = args.navtask.dataset.get_split(imset) - args.control.test_name = '{:s}_on_{:s}'.format(mode, imset) - - # Log the arguments - logging.error('%s', args) - return args diff --git a/research/cognitive_mapping_and_planning/data/.gitignore b/research/cognitive_mapping_and_planning/data/.gitignore deleted file mode 100644 index 2b6d5e46652d14a9c0a8025dbcccfc2dd4376e4a..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/data/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -stanford_building_parser_dataset_raw -stanford_building_parser_dataset -init_models diff --git a/research/cognitive_mapping_and_planning/data/README.md b/research/cognitive_mapping_and_planning/data/README.md deleted file mode 100644 index a8928345351dac19c0e12fd33f99dd2aa600e23b..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/data/README.md +++ /dev/null @@ -1,33 +0,0 @@ -This directory contains the data needed for training and benchmarking various -navigation models. - -1. Download the data from the [dataset website] - (http://buildingparser.stanford.edu/dataset.html). - 1. [Raw meshes](https://goo.gl/forms/2YSPaO2UKmn5Td5m2). We need the meshes - which are in the noXYZ folder. Download the tar files and place them in - the `stanford_building_parser_dataset_raw` folder. You need to download - `area_1_noXYZ.tar`, `area_3_noXYZ.tar`, `area_5a_noXYZ.tar`, - `area_5b_noXYZ.tar`, `area_6_noXYZ.tar` for training and - `area_4_noXYZ.tar` for evaluation. - 2. [Annotations](https://goo.gl/forms/4SoGp4KtH1jfRqEj2) for setting up - tasks. We will need the file called `Stanford3dDataset_v1.2.zip`. Place - the file in the directory `stanford_building_parser_dataset_raw`. - -2. Preprocess the data. - 1. Extract meshes using `scripts/script_preprocess_meshes_S3DIS.sh`. After - this `ls data/stanford_building_parser_dataset/mesh` should have 6 - folders `area1`, `area3`, `area4`, `area5a`, `area5b`, `area6`, with - textures and obj files within each directory. - 2. Extract out room information and semantics from zip file using - `scripts/script_preprocess_annoations_S3DIS.sh`. After this there should - be `room-dimension` and `class-maps` folder in - `data/stanford_building_parser_dataset`. (If you find this script to - crash because of an exception in np.loadtxt while processing - `Area_5/office_19/Annotations/ceiling_1.txt`, there is a special - character on line 323474, that should be removed manually.) - -3. Download ImageNet Pre-trained models. We used ResNet-v2-50 for representing - images. For RGB images this is pre-trained on ImageNet. For Depth images we - [distill](https://arxiv.org/abs/1507.00448) the RGB model to depth images - using paired RGB-D images. Both there models are available through - `scripts/script_download_init_models.sh` diff --git a/research/cognitive_mapping_and_planning/datasets/__init__.py b/research/cognitive_mapping_and_planning/datasets/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/cognitive_mapping_and_planning/datasets/factory.py b/research/cognitive_mapping_and_planning/datasets/factory.py deleted file mode 100644 index 3f7b5c0a602dbacf9619dc1c2ec98e94200428b6..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/datasets/factory.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Wrapper for selecting the navigation environment that we want to train and -test on. -""" -import numpy as np -import os, glob -import platform - -import logging -from tensorflow.python.platform import app -from tensorflow.python.platform import flags - -import render.swiftshader_renderer as renderer -import src.file_utils as fu -import src.utils as utils - -def get_dataset(dataset_name): - if dataset_name == 'sbpd': - dataset = StanfordBuildingParserDataset(dataset_name) - else: - logging.fatal('Not one of sbpd') - return dataset - -class Loader(): - def get_data_dir(): - pass - - def get_meta_data(self, file_name, data_dir=None): - if data_dir is None: - data_dir = self.get_data_dir() - full_file_name = os.path.join(data_dir, 'meta', file_name) - assert(fu.exists(full_file_name)), \ - '{:s} does not exist'.format(full_file_name) - ext = os.path.splitext(full_file_name)[1] - if ext == '.txt': - ls = [] - with fu.fopen(full_file_name, 'r') as f: - for l in f: - ls.append(l.rstrip()) - elif ext == '.pkl': - ls = utils.load_variables(full_file_name) - return ls - - def load_building(self, name, data_dir=None): - if data_dir is None: - data_dir = self.get_data_dir() - out = {} - out['name'] = name - out['data_dir'] = data_dir - out['room_dimension_file'] = os.path.join(data_dir, 'room-dimension', - name+'.pkl') - out['class_map_folder'] = os.path.join(data_dir, 'class-maps') - return out - - def load_building_meshes(self, building): - dir_name = os.path.join(building['data_dir'], 'mesh', building['name']) - mesh_file_name = glob.glob1(dir_name, '*.obj')[0] - mesh_file_name_full = os.path.join(dir_name, mesh_file_name) - logging.error('Loading building from obj file: %s', mesh_file_name_full) - shape = renderer.Shape(mesh_file_name_full, load_materials=True, - name_prefix=building['name']+'_') - return [shape] - -class StanfordBuildingParserDataset(Loader): - def __init__(self, ver): - self.ver = ver - self.data_dir = None - - def get_data_dir(self): - if self.data_dir is None: - self.data_dir = 'data/stanford_building_parser_dataset/' - return self.data_dir - - def get_benchmark_sets(self): - return self._get_benchmark_sets() - - def get_split(self, split_name): - if self.ver == 'sbpd': - return self._get_split(split_name) - else: - logging.fatal('Unknown version.') - - def _get_benchmark_sets(self): - sets = ['train1', 'val', 'test'] - return sets - - def _get_split(self, split_name): - train = ['area1', 'area5a', 'area5b', 'area6'] - train1 = ['area1'] - val = ['area3'] - test = ['area4'] - - sets = {} - sets['train'] = train - sets['train1'] = train1 - sets['val'] = val - sets['test'] = test - sets['all'] = sorted(list(set(train + val + test))) - return sets[split_name] diff --git a/research/cognitive_mapping_and_planning/datasets/nav_env.py b/research/cognitive_mapping_and_planning/datasets/nav_env.py deleted file mode 100644 index 5710e26dcb113121d99400cb060104224dd91749..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/datasets/nav_env.py +++ /dev/null @@ -1,1465 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Navidation Environment. Includes the following classes along with some -helper functions. - Building: Loads buildings, computes traversibility, exposes functionality for - rendering images. - - GridWorld: Base class which implements functionality for moving an agent on a - grid world. - - NavigationEnv: Base class which generates navigation problems on a grid world. - - VisualNavigationEnv: Builds upon NavigationEnv and Building to provide - interface that is used externally to train the agent. - - MeshMapper: Class used for distilling the model, testing the mapper. - - BuildingMultiplexer: Wrapper class that instantiates a VisualNavigationEnv for - each building and multiplexes between them as needed. -""" - -import numpy as np -import os -import re -import matplotlib.pyplot as plt - -import graph_tool as gt -import graph_tool.topology - -from tensorflow.python.platform import gfile -import logging -import src.file_utils as fu -import src.utils as utils -import src.graph_utils as gu -import src.map_utils as mu -import src.depth_utils as du -import render.swiftshader_renderer as sru -from render.swiftshader_renderer import SwiftshaderRenderer -import cv2 - -label_nodes_with_class = gu.label_nodes_with_class -label_nodes_with_class_geodesic = gu.label_nodes_with_class_geodesic -get_distance_node_list = gu.get_distance_node_list -convert_to_graph_tool = gu.convert_to_graph_tool -generate_graph = gu.generate_graph -get_hardness_distribution = gu.get_hardness_distribution -rng_next_goal_rejection_sampling = gu.rng_next_goal_rejection_sampling -rng_next_goal = gu.rng_next_goal -rng_room_to_room = gu.rng_room_to_room -rng_target_dist_field = gu.rng_target_dist_field - -compute_traversibility = mu.compute_traversibility -make_map = mu.make_map -resize_maps = mu.resize_maps -pick_largest_cc = mu.pick_largest_cc -get_graph_origin_loc = mu.get_graph_origin_loc -generate_egocentric_maps = mu.generate_egocentric_maps -generate_goal_images = mu.generate_goal_images -get_map_to_predict = mu.get_map_to_predict - -bin_points = du.bin_points -make_geocentric = du.make_geocentric -get_point_cloud_from_z = du.get_point_cloud_from_z -get_camera_matrix = du.get_camera_matrix - -def _get_semantic_maps(folder_name, building_name, map, flip): - # Load file from the cache. - file_name = '{:s}_{:d}_{:d}_{:d}_{:d}_{:d}_{:d}.pkl' - file_name = file_name.format(building_name, map.size[0], map.size[1], - map.origin[0], map.origin[1], map.resolution, - flip) - file_name = os.path.join(folder_name, file_name) - logging.info('Loading semantic maps from %s.', file_name) - - if fu.exists(file_name): - a = utils.load_variables(file_name) - maps = a['maps'] #HxWx#C - cats = a['cats'] - else: - logging.error('file_name: %s not found.', file_name) - maps = None - cats = None - return maps, cats - -def _select_classes(all_maps, all_cats, cats_to_use): - inds = [] - for c in cats_to_use: - ind = all_cats.index(c) - inds.append(ind) - out_maps = all_maps[:,:,inds] - return out_maps - -def _get_room_dimensions(file_name, resolution, origin, flip=False): - if fu.exists(file_name): - a = utils.load_variables(file_name)['room_dimension'] - names = a.keys() - dims = np.concatenate(a.values(), axis=0).reshape((-1,6)) - ind = np.argsort(names) - dims = dims[ind,:] - names = [names[x] for x in ind] - if flip: - dims_new = dims*1 - dims_new[:,1] = -dims[:,4] - dims_new[:,4] = -dims[:,1] - dims = dims_new*1 - - dims = dims*100. - dims[:,0] = dims[:,0] - origin[0] - dims[:,1] = dims[:,1] - origin[1] - dims[:,3] = dims[:,3] - origin[0] - dims[:,4] = dims[:,4] - origin[1] - dims = dims / resolution - out = {'names': names, 'dims': dims} - else: - out = None - return out - -def _filter_rooms(room_dims, room_regex): - pattern = re.compile(room_regex) - ind = [] - for i, name in enumerate(room_dims['names']): - if pattern.match(name): - ind.append(i) - new_room_dims = {} - new_room_dims['names'] = [room_dims['names'][i] for i in ind] - new_room_dims['dims'] = room_dims['dims'][ind,:]*1 - return new_room_dims - -def _label_nodes_with_room_id(xyt, room_dims): - # Label the room with the ID into things. - node_room_id = -1*np.ones((xyt.shape[0], 1)) - dims = room_dims['dims'] - for x, name in enumerate(room_dims['names']): - all_ = np.concatenate((xyt[:,[0]] >= dims[x,0], - xyt[:,[0]] <= dims[x,3], - xyt[:,[1]] >= dims[x,1], - xyt[:,[1]] <= dims[x,4]), axis=1) - node_room_id[np.all(all_, axis=1), 0] = x - return node_room_id - -def get_path_ids(start_node_id, end_node_id, pred_map): - id = start_node_id - path = [id] - while id != end_node_id: - id = pred_map[id] - path.append(id) - return path - -def image_pre(images, modalities): - # Assumes images are ...xHxWxC. - # We always assume images are RGB followed by Depth. - if 'depth' in modalities: - d = images[...,-1][...,np.newaxis]*1. - d[d < 0.01] = np.NaN; isnan = np.isnan(d); - d = 100./d; d[isnan] = 0.; - images = np.concatenate((images[...,:-1], d, isnan), axis=images.ndim-1) - if 'rgb' in modalities: - images[...,:3] = images[...,:3]*1. - 128 - return images - -def _get_relative_goal_loc(goal_loc, loc, theta): - r = np.sqrt(np.sum(np.square(goal_loc - loc), axis=1)) - t = np.arctan2(goal_loc[:,1] - loc[:,1], goal_loc[:,0] - loc[:,0]) - t = t-theta[:,0] + np.pi/2 - return np.expand_dims(r,axis=1), np.expand_dims(t, axis=1) - -def _gen_perturbs(rng, batch_size, num_steps, lr_flip, delta_angle, delta_xy, - structured): - perturbs = [] - for i in range(batch_size): - # Doing things one by one for each episode in this batch. This way this - # remains replicatable even when we change the batch size. - p = np.zeros((num_steps+1, 4)) - if lr_flip: - # Flip the whole trajectory. - p[:,3] = rng.rand(1)-0.5 - if delta_angle > 0: - if structured: - p[:,2] = (rng.rand(1)-0.5)* delta_angle - else: - p[:,2] = (rng.rand(p.shape[0])-0.5)* delta_angle - if delta_xy > 0: - if structured: - p[:,:2] = (rng.rand(1, 2)-0.5)*delta_xy - else: - p[:,:2] = (rng.rand(p.shape[0], 2)-0.5)*delta_xy - perturbs.append(p) - return perturbs - -def get_multiplexer_class(args, task_number): - assert(args.task_params.base_class == 'Building') - logging.info('Returning BuildingMultiplexer') - R = BuildingMultiplexer(args, task_number) - return R - -class GridWorld(): - def __init__(self): - """Class members that will be assigned by any class that actually uses this - class.""" - self.restrict_to_largest_cc = None - self.robot = None - self.env = None - self.category_list = None - self.traversible = None - - def get_loc_axis(self, node, delta_theta, perturb=None): - """Based on the node orientation returns X, and Y axis. Used to sample the - map in egocentric coordinate frame. - """ - if type(node) == tuple: - node = np.array([node]) - if perturb is None: - perturb = np.zeros((node.shape[0], 4)) - xyt = self.to_actual_xyt_vec(node) - x = xyt[:,[0]] + perturb[:,[0]] - y = xyt[:,[1]] + perturb[:,[1]] - t = xyt[:,[2]] + perturb[:,[2]] - theta = t*delta_theta - loc = np.concatenate((x,y), axis=1) - x_axis = np.concatenate((np.cos(theta), np.sin(theta)), axis=1) - y_axis = np.concatenate((np.cos(theta+np.pi/2.), np.sin(theta+np.pi/2.)), - axis=1) - # Flip the sampled map where need be. - y_axis[np.where(perturb[:,3] > 0)[0], :] *= -1. - return loc, x_axis, y_axis, theta - - def to_actual_xyt(self, pqr): - """Converts from node to location on the map.""" - (p, q, r) = pqr - if self.task.n_ori == 6: - out = (p - q * 0.5 + self.task.origin_loc[0], - q * np.sqrt(3.) / 2. + self.task.origin_loc[1], r) - elif self.task.n_ori == 4: - out = (p + self.task.origin_loc[0], - q + self.task.origin_loc[1], r) - return out - - def to_actual_xyt_vec(self, pqr): - """Converts from node array to location array on the map.""" - p = pqr[:,0][:, np.newaxis] - q = pqr[:,1][:, np.newaxis] - r = pqr[:,2][:, np.newaxis] - if self.task.n_ori == 6: - out = np.concatenate((p - q * 0.5 + self.task.origin_loc[0], - q * np.sqrt(3.) / 2. + self.task.origin_loc[1], - r), axis=1) - elif self.task.n_ori == 4: - out = np.concatenate((p + self.task.origin_loc[0], - q + self.task.origin_loc[1], - r), axis=1) - return out - - def raw_valid_fn_vec(self, xyt): - """Returns if the given set of nodes is valid or not.""" - height = self.traversible.shape[0] - width = self.traversible.shape[1] - x = np.round(xyt[:,[0]]).astype(np.int32) - y = np.round(xyt[:,[1]]).astype(np.int32) - is_inside = np.all(np.concatenate((x >= 0, y >= 0, - x < width, y < height), axis=1), axis=1) - x = np.minimum(np.maximum(x, 0), width-1) - y = np.minimum(np.maximum(y, 0), height-1) - ind = np.ravel_multi_index((y,x), self.traversible.shape) - is_traversible = self.traversible.ravel()[ind] - - is_valid = np.all(np.concatenate((is_inside[:,np.newaxis], is_traversible), - axis=1), axis=1) - return is_valid - - - def valid_fn_vec(self, pqr): - """Returns if the given set of nodes is valid or not.""" - xyt = self.to_actual_xyt_vec(np.array(pqr)) - height = self.traversible.shape[0] - width = self.traversible.shape[1] - x = np.round(xyt[:,[0]]).astype(np.int32) - y = np.round(xyt[:,[1]]).astype(np.int32) - is_inside = np.all(np.concatenate((x >= 0, y >= 0, - x < width, y < height), axis=1), axis=1) - x = np.minimum(np.maximum(x, 0), width-1) - y = np.minimum(np.maximum(y, 0), height-1) - ind = np.ravel_multi_index((y,x), self.traversible.shape) - is_traversible = self.traversible.ravel()[ind] - - is_valid = np.all(np.concatenate((is_inside[:,np.newaxis], is_traversible), - axis=1), axis=1) - return is_valid - - def get_feasible_actions(self, node_ids): - """Returns the feasible set of actions from the current node.""" - a = np.zeros((len(node_ids), self.task_params.num_actions), dtype=np.int32) - gtG = self.task.gtG - next_node = [] - for i, c in enumerate(node_ids): - neigh = gtG.vertex(c).out_neighbours() - neigh_edge = gtG.vertex(c).out_edges() - nn = {} - for n, e in zip(neigh, neigh_edge): - _ = gtG.ep['action'][e] - a[i,_] = 1 - nn[_] = int(n) - next_node.append(nn) - return a, next_node - - def take_action(self, current_node_ids, action): - """Returns the new node after taking the action action. Stays at the current - node if the action is invalid.""" - actions, next_node_ids = self.get_feasible_actions(current_node_ids) - new_node_ids = [] - for i, (c,a) in enumerate(zip(current_node_ids, action)): - if actions[i,a] == 1: - new_node_ids.append(next_node_ids[i][a]) - else: - new_node_ids.append(c) - return new_node_ids - - def set_r_obj(self, r_obj): - """Sets the SwiftshaderRenderer object used for rendering.""" - self.r_obj = r_obj - -class Building(GridWorld): - def __init__(self, building_name, robot, env, - category_list=None, small=False, flip=False, logdir=None, - building_loader=None): - - self.restrict_to_largest_cc = True - self.robot = robot - self.env = env - self.logdir = logdir - - # Load the building meta data. - building = building_loader.load_building(building_name) - if small: - building['mesh_names'] = building['mesh_names'][:5] - - # New code. - shapess = building_loader.load_building_meshes(building) - if flip: - for shapes in shapess: - shapes.flip_shape() - - vs = [] - for shapes in shapess: - vs.append(shapes.get_vertices()[0]) - vs = np.concatenate(vs, axis=0) - map = make_map(env.padding, env.resolution, vertex=vs, sc=100.) - map = compute_traversibility( - map, robot.base, robot.height, robot.radius, env.valid_min, - env.valid_max, env.num_point_threshold, shapess=shapess, sc=100., - n_samples_per_face=env.n_samples_per_face) - - room_dims = _get_room_dimensions(building['room_dimension_file'], - env.resolution, map.origin, flip=flip) - class_maps, class_map_names = _get_semantic_maps( - building['class_map_folder'], building_name, map, flip) - - self.class_maps = class_maps - self.class_map_names = class_map_names - self.building = building - self.shapess = shapess - self.map = map - self.traversible = map.traversible*1 - self.building_name = building_name - self.room_dims = room_dims - self.flipped = flip - self.renderer_entitiy_ids = [] - - if self.restrict_to_largest_cc: - self.traversible = pick_largest_cc(self.traversible) - - def load_building_into_scene(self): - # Loads the scene. - self.renderer_entitiy_ids += self.r_obj.load_shapes(self.shapess) - # Free up memory, we dont need the mesh or the materials anymore. - self.shapess = None - - def add_entity_at_nodes(self, nodes, height, shape): - xyt = self.to_actual_xyt_vec(nodes) - nxy = xyt[:,:2]*1. - nxy = nxy * self.map.resolution - nxy = nxy + self.map.origin - Ts = np.concatenate((nxy, nxy[:,:1]), axis=1) - Ts[:,2] = height; Ts = Ts / 100.; - - # Merge all the shapes into a single shape and add that shape. - shape.replicate_shape(Ts) - entity_ids = self.r_obj.load_shapes([shape]) - self.renderer_entitiy_ids += entity_ids - return entity_ids - - def add_shapes(self, shapes): - scene = self.r_obj.viz.scene() - for shape in shapes: - scene.AddShape(shape) - - def add_materials(self, materials): - scene = self.r_obj.viz.scene() - for material in materials: - scene.AddOrUpdateMaterial(material) - - def set_building_visibility(self, visibility): - self.r_obj.set_entity_visible(self.renderer_entitiy_ids, visibility) - - def render_nodes(self, nodes, perturb=None, aux_delta_theta=0.): - self.set_building_visibility(True) - if perturb is None: - perturb = np.zeros((len(nodes), 4)) - - imgs = [] - r = 2 - elevation_z = r * np.tan(np.deg2rad(self.robot.camera_elevation_degree)) - - for i in range(len(nodes)): - xyt = self.to_actual_xyt(nodes[i]) - lookat_theta = 3.0 * np.pi / 2.0 - (xyt[2]+perturb[i,2]+aux_delta_theta) * (self.task.delta_theta) - nxy = np.array([xyt[0]+perturb[i,0], xyt[1]+perturb[i,1]]).reshape(1, -1) - nxy = nxy * self.map.resolution - nxy = nxy + self.map.origin - camera_xyz = np.zeros((1, 3)) - camera_xyz[...] = [nxy[0, 0], nxy[0, 1], self.robot.sensor_height] - camera_xyz = camera_xyz / 100. - lookat_xyz = np.array([-r * np.sin(lookat_theta), - -r * np.cos(lookat_theta), elevation_z]) - lookat_xyz = lookat_xyz + camera_xyz[0, :] - self.r_obj.position_camera(camera_xyz[0, :].tolist(), - lookat_xyz.tolist(), [0.0, 0.0, 1.0]) - img = self.r_obj.render(take_screenshot=True, output_type=0) - img = [x for x in img if x is not None] - img = np.concatenate(img, axis=2).astype(np.float32) - if perturb[i,3]>0: - img = img[:,::-1,:] - imgs.append(img) - - self.set_building_visibility(False) - return imgs - - -class MeshMapper(Building): - def __init__(self, robot, env, task_params, building_name, category_list, - flip, logdir=None, building_loader=None): - Building.__init__(self, building_name, robot, env, category_list, - small=task_params.toy_problem, flip=flip, logdir=logdir, - building_loader=building_loader) - self.task_params = task_params - self.task = None - self._preprocess_for_task(self.task_params.building_seed) - - def _preprocess_for_task(self, seed): - if self.task is None or self.task.seed != seed: - rng = np.random.RandomState(seed) - origin_loc = get_graph_origin_loc(rng, self.traversible) - self.task = utils.Foo(seed=seed, origin_loc=origin_loc, - n_ori=self.task_params.n_ori) - G = generate_graph(self.valid_fn_vec, - self.task_params.step_size, self.task.n_ori, - (0, 0, 0)) - gtG, nodes, nodes_to_id = convert_to_graph_tool(G) - self.task.gtG = gtG - self.task.nodes = nodes - self.task.delta_theta = 2.0*np.pi/(self.task.n_ori*1.) - self.task.nodes_to_id = nodes_to_id - logging.info('Building %s, #V=%d, #E=%d', self.building_name, - self.task.nodes.shape[0], self.task.gtG.num_edges()) - - if self.logdir is not None: - write_traversible = cv2.applyColorMap(self.traversible.astype(np.uint8)*255, cv2.COLORMAP_JET) - img_path = os.path.join(self.logdir, - '{:s}_{:d}_graph.png'.format(self.building_name, - seed)) - node_xyt = self.to_actual_xyt_vec(self.task.nodes) - plt.set_cmap('jet'); - fig, ax = utils.subplot(plt, (1,1), (12,12)) - ax.plot(node_xyt[:,0], node_xyt[:,1], 'm.') - ax.imshow(self.traversible, origin='lower'); - ax.set_axis_off(); ax.axis('equal'); - ax.set_title('{:s}, {:d}, {:d}'.format(self.building_name, - self.task.nodes.shape[0], - self.task.gtG.num_edges())) - if self.room_dims is not None: - for i, r in enumerate(self.room_dims['dims']*1): - min_ = r[:3]*1 - max_ = r[3:]*1 - xmin, ymin, zmin = min_ - xmax, ymax, zmax = max_ - - ax.plot([xmin, xmax, xmax, xmin, xmin], - [ymin, ymin, ymax, ymax, ymin], 'g') - with fu.fopen(img_path, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - plt.close(fig) - - - def _gen_rng(self, rng): - # instances is a list of list of node_ids. - if self.task_params.move_type == 'circle': - _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size, - self.task.gtG, rng, 0, 1, - compute_path=True) - instances_ = paths - - instances = [] - for instance_ in instances_: - instance = instance_ - for i in range(self.task_params.num_steps): - instance.append(self.take_action([instance[-1]], [1])[0]) - instances.append(instance) - - elif self.task_params.move_type == 'shortest_path': - _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size, - self.task.gtG, rng, - self.task_params.num_steps, - self.task_params.num_steps+1, - compute_path=True) - instances = paths - - elif self.task_params.move_type == 'circle+forward': - _, _, _, _, paths = rng_target_dist_field(self.task_params.batch_size, - self.task.gtG, rng, 0, 1, - compute_path=True) - instances_ = paths - instances = [] - for instance_ in instances_: - instance = instance_ - for i in range(self.task_params.n_ori-1): - instance.append(self.take_action([instance[-1]], [1])[0]) - while len(instance) <= self.task_params.num_steps: - while self.take_action([instance[-1]], [3])[0] == instance[-1] and len(instance) <= self.task_params.num_steps: - instance.append(self.take_action([instance[-1]], [2])[0]) - if len(instance) <= self.task_params.num_steps: - instance.append(self.take_action([instance[-1]], [3])[0]) - instances.append(instance) - - # Do random perturbation if needed. - perturbs = _gen_perturbs(rng, self.task_params.batch_size, - self.task_params.num_steps, - self.task_params.data_augment.lr_flip, - self.task_params.data_augment.delta_angle, - self.task_params.data_augment.delta_xy, - self.task_params.data_augment.structured) - return instances, perturbs - - def worker(self, instances, perturbs): - # Output the images and the free space. - - # Make the instances be all the same length. - for i in range(len(instances)): - for j in range(self.task_params.num_steps - len(instances[i]) + 1): - instances[i].append(instances[i][-1]) - if perturbs[i].shape[0] < self.task_params.num_steps+1: - p = np.zeros((self.task_params.num_steps+1, 4)) - p[:perturbs[i].shape[0], :] = perturbs[i] - p[perturbs[i].shape[0]:, :] = perturbs[i][-1,:] - perturbs[i] = p - - instances_ = [] - for instance in instances: - instances_ = instances_ + instance - perturbs_ = np.concatenate(perturbs, axis=0) - - instances_nodes = self.task.nodes[instances_,:] - instances_nodes = [tuple(x) for x in instances_nodes] - - imgs_ = self.render_nodes(instances_nodes, perturbs_) - imgs = []; next = 0; - for instance in instances: - img_i = [] - for _ in instance: - img_i.append(imgs_[next]) - next = next+1 - imgs.append(img_i) - imgs = np.array(imgs) - - # Render out the maps in the egocentric view for all nodes and not just the - # last node. - all_nodes = [] - for x in instances: - all_nodes = all_nodes + x - all_perturbs = np.concatenate(perturbs, axis=0) - loc, x_axis, y_axis, theta = self.get_loc_axis( - self.task.nodes[all_nodes, :]*1, delta_theta=self.task.delta_theta, - perturb=all_perturbs) - fss = None - valids = None - loc_on_map = None - theta_on_map = None - cum_fs = None - cum_valid = None - incremental_locs = None - incremental_thetas = None - - if self.task_params.output_free_space: - fss, valids = get_map_to_predict(loc, x_axis, y_axis, - map=self.traversible*1., - map_size=self.task_params.map_size) - fss = np.array(fss) > 0.5 - fss = np.reshape(fss, [self.task_params.batch_size, - self.task_params.num_steps+1, - self.task_params.map_size, - self.task_params.map_size]) - valids = np.reshape(np.array(valids), fss.shape) - - if self.task_params.output_transform_to_global_map: - # Output the transform to the global map. - loc_on_map = np.reshape(loc*1, [self.task_params.batch_size, - self.task_params.num_steps+1, -1]) - # Converting to location wrt to first location so that warping happens - # properly. - theta_on_map = np.reshape(theta*1, [self.task_params.batch_size, - self.task_params.num_steps+1, -1]) - - if self.task_params.output_incremental_transform: - # Output the transform to the global map. - incremental_locs_ = np.reshape(loc*1, [self.task_params.batch_size, - self.task_params.num_steps+1, -1]) - incremental_locs_[:,1:,:] -= incremental_locs_[:,:-1,:] - t0 = -np.pi/2+np.reshape(theta*1, [self.task_params.batch_size, - self.task_params.num_steps+1, -1]) - t = t0*1 - incremental_locs = incremental_locs_*1 - incremental_locs[:,:,0] = np.sum(incremental_locs_ * np.concatenate((np.cos(t), np.sin(t)), axis=-1), axis=-1) - incremental_locs[:,:,1] = np.sum(incremental_locs_ * np.concatenate((np.cos(t+np.pi/2), np.sin(t+np.pi/2)), axis=-1), axis=-1) - incremental_locs[:,0,:] = incremental_locs_[:,0,:] - # print incremental_locs_[0,:,:], incremental_locs[0,:,:], t0[0,:,:] - - incremental_thetas = np.reshape(theta*1, [self.task_params.batch_size, - self.task_params.num_steps+1, - -1]) - incremental_thetas[:,1:,:] += -incremental_thetas[:,:-1,:] - - if self.task_params.output_canonical_map: - loc_ = loc[0::(self.task_params.num_steps+1), :] - x_axis = np.zeros_like(loc_); x_axis[:,1] = 1 - y_axis = np.zeros_like(loc_); y_axis[:,0] = -1 - cum_fs, cum_valid = get_map_to_predict(loc_, x_axis, y_axis, - map=self.traversible*1., - map_size=self.task_params.map_size) - cum_fs = np.array(cum_fs) > 0.5 - cum_fs = np.reshape(cum_fs, [self.task_params.batch_size, 1, - self.task_params.map_size, - self.task_params.map_size]) - cum_valid = np.reshape(np.array(cum_valid), cum_fs.shape) - - - inputs = {'fs_maps': fss, - 'valid_maps': valids, - 'imgs': imgs, - 'loc_on_map': loc_on_map, - 'theta_on_map': theta_on_map, - 'cum_fs_maps': cum_fs, - 'cum_valid_maps': cum_valid, - 'incremental_thetas': incremental_thetas, - 'incremental_locs': incremental_locs} - return inputs - - def pre(self, inputs): - inputs['imgs'] = image_pre(inputs['imgs'], self.task_params.modalities) - if inputs['loc_on_map'] is not None: - inputs['loc_on_map'] = inputs['loc_on_map'] - inputs['loc_on_map'][:,[0],:] - if inputs['theta_on_map'] is not None: - inputs['theta_on_map'] = np.pi/2. - inputs['theta_on_map'] - return inputs - -def _nav_env_reset_helper(type, rng, nodes, batch_size, gtG, max_dist, - num_steps, num_goals, data_augment, **kwargs): - """Generates and returns a new episode.""" - max_compute = max_dist + 4*num_steps - if type == 'general': - start_node_ids, end_node_ids, dist, pred_map, paths = \ - rng_target_dist_field(batch_size, gtG, rng, max_dist, max_compute, - nodes=nodes, compute_path=False) - target_class = None - - elif type == 'room_to_room_many': - goal_node_ids = []; dists = []; - node_room_ids = kwargs['node_room_ids'] - # Sample the first one - start_node_ids_, end_node_ids_, dist_, _, _ = rng_room_to_room( - batch_size, gtG, rng, max_dist, max_compute, - node_room_ids=node_room_ids, nodes=nodes) - start_node_ids = start_node_ids_ - goal_node_ids.append(end_node_ids_) - dists.append(dist_) - for n in range(num_goals-1): - start_node_ids_, end_node_ids_, dist_, _, _ = rng_next_goal( - goal_node_ids[n], batch_size, gtG, rng, max_dist, - max_compute, node_room_ids=node_room_ids, nodes=nodes, - dists_from_start_node=dists[n]) - goal_node_ids.append(end_node_ids_) - dists.append(dist_) - target_class = None - - elif type == 'rng_rejection_sampling_many': - num_goals = num_goals - goal_node_ids = []; dists = []; - - n_ori = kwargs['n_ori'] - step_size = kwargs['step_size'] - min_dist = kwargs['min_dist'] - sampling_distribution = kwargs['sampling_distribution'] - target_distribution = kwargs['target_distribution'] - rejection_sampling_M = kwargs['rejection_sampling_M'] - distribution_bins = kwargs['distribution_bins'] - - for n in range(num_goals): - if n == 0: input_nodes = None - else: input_nodes = goal_node_ids[n-1] - start_node_ids_, end_node_ids_, dist_, _, _, _, _ = rng_next_goal_rejection_sampling( - input_nodes, batch_size, gtG, rng, max_dist, min_dist, - max_compute, sampling_distribution, target_distribution, nodes, - n_ori, step_size, distribution_bins, rejection_sampling_M) - if n == 0: start_node_ids = start_node_ids_ - goal_node_ids.append(end_node_ids_) - dists.append(dist_) - target_class = None - - elif type == 'room_to_room_back': - num_goals = num_goals - assert(num_goals == 2), 'num_goals must be 2.' - goal_node_ids = []; dists = []; - node_room_ids = kwargs['node_room_ids'] - # Sample the first one. - start_node_ids_, end_node_ids_, dist_, _, _ = rng_room_to_room( - batch_size, gtG, rng, max_dist, max_compute, - node_room_ids=node_room_ids, nodes=nodes) - start_node_ids = start_node_ids_ - goal_node_ids.append(end_node_ids_) - dists.append(dist_) - - # Set second goal to be starting position, and compute distance to the start node. - goal_node_ids.append(start_node_ids) - dist = [] - for i in range(batch_size): - dist_ = gt.topology.shortest_distance( - gt.GraphView(gtG, reversed=True), - source=gtG.vertex(start_node_ids[i]), target=None) - dist_ = np.array(dist_.get_array()) - dist.append(dist_) - dists.append(dist) - target_class = None - - elif type[:14] == 'to_nearest_obj': - # Generate an episode by sampling one of the target classes (with - # probability proportional to the number of nodes in the world). - # With the sampled class sample a node that is within some distance from - # the sampled class. - class_nodes = kwargs['class_nodes'] - sampling = kwargs['sampling'] - dist_to_class = kwargs['dist_to_class'] - - assert(num_goals == 1), 'Only supports a single goal.' - ind = rng.choice(class_nodes.shape[0], size=batch_size) - target_class = class_nodes[ind,1] - start_node_ids = []; dists = []; goal_node_ids = []; - - for t in target_class: - if sampling == 'uniform': - max_dist = max_dist - cnts = np.bincount(dist_to_class[t], minlength=max_dist+1)*1. - cnts[max_dist+1:] = 0 - p_each = 1./ cnts / (max_dist+1.) - p_each[cnts == 0] = 0 - p = p_each[dist_to_class[t]]*1.; p = p/np.sum(p) - start_node_id = rng.choice(p.shape[0], size=1, p=p)[0] - else: - logging.fatal('Sampling not one of uniform.') - start_node_ids.append(start_node_id) - dists.append(dist_to_class[t]) - # Dummy goal node, same as the start node, so that vis is better. - goal_node_ids.append(start_node_id) - dists = [dists] - goal_node_ids = [goal_node_ids] - - return start_node_ids, goal_node_ids, dists, target_class - - -class NavigationEnv(GridWorld, Building): - """Wrapper around GridWorld which sets up navigation tasks. - """ - def _debug_save_hardness(self, seed): - out_path = os.path.join(self.logdir, '{:s}_{:d}_hardness.png'.format(self.building_name, seed)) - batch_size = 4000 - rng = np.random.RandomState(0) - start_node_ids, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists = \ - rng_next_goal_rejection_sampling( - None, batch_size, self.task.gtG, rng, self.task_params.max_dist, - self.task_params.min_dist, self.task_params.max_dist, - self.task.sampling_distribution, self.task.target_distribution, - self.task.nodes, self.task_params.n_ori, self.task_params.step_size, - self.task.distribution_bins, self.task.rejection_sampling_M) - bins = self.task.distribution_bins - n_bins = self.task.n_bins - with plt.style.context('ggplot'): - fig, axes = utils.subplot(plt, (1,2), (10,10)) - ax = axes[0] - _ = ax.hist(hardnesss, bins=bins, weights=np.ones_like(hardnesss)/len(hardnesss)) - ax.plot(bins[:-1]+0.5/n_bins, self.task.target_distribution, 'g') - ax.plot(bins[:-1]+0.5/n_bins, self.task.sampling_distribution, 'b') - ax.grid('on') - - ax = axes[1] - _ = ax.hist(gt_dists, bins=np.arange(self.task_params.max_dist+1)) - ax.grid('on') - ax.set_title('Mean: {:0.2f}, Median: {:0.2f}'.format(np.mean(gt_dists), - np.median(gt_dists))) - with fu.fopen(out_path, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - - def _debug_save_map_nodes(self, seed): - """Saves traversible space along with nodes generated on the graph. Takes - the seed as input.""" - img_path = os.path.join(self.logdir, '{:s}_{:d}_graph.png'.format(self.building_name, seed)) - node_xyt = self.to_actual_xyt_vec(self.task.nodes) - plt.set_cmap('jet'); - fig, ax = utils.subplot(plt, (1,1), (12,12)) - ax.plot(node_xyt[:,0], node_xyt[:,1], 'm.') - ax.set_axis_off(); ax.axis('equal'); - - if self.room_dims is not None: - for i, r in enumerate(self.room_dims['dims']*1): - min_ = r[:3]*1 - max_ = r[3:]*1 - xmin, ymin, zmin = min_ - xmax, ymax, zmax = max_ - - ax.plot([xmin, xmax, xmax, xmin, xmin], - [ymin, ymin, ymax, ymax, ymin], 'g') - ax.imshow(self.traversible, origin='lower'); - with fu.fopen(img_path, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - - def _debug_semantic_maps(self, seed): - """Saves traversible space along with nodes generated on the graph. Takes - the seed as input.""" - for i, cls in enumerate(self.task_params.semantic_task.class_map_names): - img_path = os.path.join(self.logdir, '{:s}_flip{:d}_{:s}_graph.png'.format(self.building_name, seed, cls)) - maps = self.traversible*1. - maps += 0.5*(self.task.class_maps_dilated[:,:,i]) - write_traversible = (maps*1.+1.)/3.0 - write_traversible = (write_traversible*255.).astype(np.uint8)[:,:,np.newaxis] - write_traversible = write_traversible + np.zeros((1,1,3), dtype=np.uint8) - fu.write_image(img_path, write_traversible[::-1,:,:]) - - def _preprocess_for_task(self, seed): - """Sets up the task field for doing navigation on the grid world.""" - if self.task is None or self.task.seed != seed: - rng = np.random.RandomState(seed) - origin_loc = get_graph_origin_loc(rng, self.traversible) - self.task = utils.Foo(seed=seed, origin_loc=origin_loc, - n_ori=self.task_params.n_ori) - G = generate_graph(self.valid_fn_vec, self.task_params.step_size, - self.task.n_ori, (0, 0, 0)) - gtG, nodes, nodes_to_id = convert_to_graph_tool(G) - self.task.gtG = gtG - self.task.nodes = nodes - self.task.delta_theta = 2.0*np.pi/(self.task.n_ori*1.) - self.task.nodes_to_id = nodes_to_id - - logging.info('Building %s, #V=%d, #E=%d', self.building_name, - self.task.nodes.shape[0], self.task.gtG.num_edges()) - type = self.task_params.type - if type == 'general': - # Do nothing - _ = None - - elif type == 'room_to_room_many' or type == 'room_to_room_back': - if type == 'room_to_room_back': - assert(self.task_params.num_goals == 2), 'num_goals must be 2.' - - self.room_dims = _filter_rooms(self.room_dims, self.task_params.room_regex) - xyt = self.to_actual_xyt_vec(self.task.nodes) - self.task.node_room_ids = _label_nodes_with_room_id(xyt, self.room_dims) - self.task.reset_kwargs = {'node_room_ids': self.task.node_room_ids} - - elif type == 'rng_rejection_sampling_many': - n_bins = 20 - rejection_sampling_M = self.task_params.rejection_sampling_M - min_dist = self.task_params.min_dist - bins = np.arange(n_bins+1)/(n_bins*1.) - target_d = np.zeros(n_bins); target_d[...] = 1./n_bins; - - sampling_d = get_hardness_distribution( - self.task.gtG, self.task_params.max_dist, self.task_params.min_dist, - np.random.RandomState(0), 4000, bins, self.task.nodes, - self.task_params.n_ori, self.task_params.step_size) - - self.task.reset_kwargs = {'distribution_bins': bins, - 'target_distribution': target_d, - 'sampling_distribution': sampling_d, - 'rejection_sampling_M': rejection_sampling_M, - 'n_bins': n_bins, - 'n_ori': self.task_params.n_ori, - 'step_size': self.task_params.step_size, - 'min_dist': self.task_params.min_dist} - self.task.n_bins = n_bins - self.task.distribution_bins = bins - self.task.target_distribution = target_d - self.task.sampling_distribution = sampling_d - self.task.rejection_sampling_M = rejection_sampling_M - - if self.logdir is not None: - self._debug_save_hardness(seed) - - elif type[:14] == 'to_nearest_obj': - self.room_dims = _filter_rooms(self.room_dims, self.task_params.room_regex) - xyt = self.to_actual_xyt_vec(self.task.nodes) - - self.class_maps = _select_classes(self.class_maps, - self.class_map_names, - self.task_params.semantic_task.class_map_names)*1 - self.class_map_names = self.task_params.semantic_task.class_map_names - nodes_xyt = self.to_actual_xyt_vec(np.array(self.task.nodes)) - - tt = utils.Timer(); tt.tic(); - if self.task_params.type == 'to_nearest_obj_acc': - self.task.class_maps_dilated, self.task.node_class_label = label_nodes_with_class_geodesic( - nodes_xyt, self.class_maps, - self.task_params.semantic_task.pix_distance+8, self.map.traversible, - ff_cost=1., fo_cost=1., oo_cost=4., connectivity=8.) - - dists = [] - for i in range(len(self.class_map_names)): - class_nodes_ = np.where(self.task.node_class_label[:,i])[0] - dists.append(get_distance_node_list(gtG, source_nodes=class_nodes_, direction='to')) - self.task.dist_to_class = dists - a_, b_ = np.where(self.task.node_class_label) - self.task.class_nodes = np.concatenate((a_[:,np.newaxis], b_[:,np.newaxis]), axis=1) - - if self.logdir is not None: - self._debug_semantic_maps(seed) - - self.task.reset_kwargs = {'sampling': self.task_params.semantic_task.sampling, - 'class_nodes': self.task.class_nodes, - 'dist_to_class': self.task.dist_to_class} - - if self.logdir is not None: - self._debug_save_map_nodes(seed) - - def reset(self, rngs): - rng = rngs[0]; rng_perturb = rngs[1]; - nodes = self.task.nodes - tp = self.task_params - - start_node_ids, goal_node_ids, dists, target_class = \ - _nav_env_reset_helper(tp.type, rng, self.task.nodes, tp.batch_size, - self.task.gtG, tp.max_dist, tp.num_steps, - tp.num_goals, tp.data_augment, - **(self.task.reset_kwargs)) - - start_nodes = [tuple(nodes[_,:]) for _ in start_node_ids] - goal_nodes = [[tuple(nodes[_,:]) for _ in __] for __ in goal_node_ids] - data_augment = tp.data_augment - perturbs = _gen_perturbs(rng_perturb, tp.batch_size, - (tp.num_steps+1)*tp.num_goals, - data_augment.lr_flip, data_augment.delta_angle, - data_augment.delta_xy, data_augment.structured) - perturbs = np.array(perturbs) # batch x steps x 4 - end_perturbs = perturbs[:,-(tp.num_goals):,:]*1 # fixed perturb for the goal. - perturbs = perturbs[:,:-(tp.num_goals),:]*1 - - history = -np.ones((tp.batch_size, tp.num_steps*tp.num_goals), dtype=np.int32) - self.episode = utils.Foo( - start_nodes=start_nodes, start_node_ids=start_node_ids, - goal_nodes=goal_nodes, goal_node_ids=goal_node_ids, dist_to_goal=dists, - perturbs=perturbs, goal_perturbs=end_perturbs, history=history, - target_class=target_class, history_frames=[]) - return start_node_ids - - def take_action(self, current_node_ids, action, step_number): - """In addition to returning the action, also returns the reward that the - agent receives.""" - goal_number = step_number / self.task_params.num_steps - new_node_ids = GridWorld.take_action(self, current_node_ids, action) - rewards = [] - for i, n in enumerate(new_node_ids): - reward = 0 - if n == self.episode.goal_node_ids[goal_number][i]: - reward = self.task_params.reward_at_goal - reward = reward - self.task_params.reward_time_penalty - rewards.append(reward) - return new_node_ids, rewards - - - def get_optimal_action(self, current_node_ids, step_number): - """Returns the optimal action from the current node.""" - goal_number = step_number / self.task_params.num_steps - gtG = self.task.gtG - a = np.zeros((len(current_node_ids), self.task_params.num_actions), dtype=np.int32) - d_dict = self.episode.dist_to_goal[goal_number] - for i, c in enumerate(current_node_ids): - neigh = gtG.vertex(c).out_neighbours() - neigh_edge = gtG.vertex(c).out_edges() - ds = np.array([d_dict[i][int(x)] for x in neigh]) - ds_min = np.min(ds) - for i_, e in enumerate(neigh_edge): - if ds[i_] == ds_min: - _ = gtG.ep['action'][e] - a[i, _] = 1 - return a - - def get_targets(self, current_node_ids, step_number): - """Returns the target actions from the current node.""" - action = self.get_optimal_action(current_node_ids, step_number) - action = np.expand_dims(action, axis=1) - return vars(utils.Foo(action=action)) - - def get_targets_name(self): - """Returns the list of names of the targets.""" - return ['action'] - - def cleanup(self): - self.episode = None - -class VisualNavigationEnv(NavigationEnv): - """Class for doing visual navigation in environments. Functions for computing - features on states, etc. - """ - def __init__(self, robot, env, task_params, category_list=None, - building_name=None, flip=False, logdir=None, - building_loader=None, r_obj=None): - tt = utils.Timer() - tt.tic() - Building.__init__(self, building_name, robot, env, category_list, - small=task_params.toy_problem, flip=flip, logdir=logdir, - building_loader=building_loader) - - self.set_r_obj(r_obj) - self.task_params = task_params - self.task = None - self.episode = None - self._preprocess_for_task(self.task_params.building_seed) - if hasattr(self.task_params, 'map_scales'): - self.task.scaled_maps = resize_maps( - self.traversible.astype(np.float32)*1, self.task_params.map_scales, - self.task_params.map_resize_method) - else: - logging.fatal('VisualNavigationEnv does not support scale_f anymore.') - self.task.readout_maps_scaled = resize_maps( - self.traversible.astype(np.float32)*1, - self.task_params.readout_maps_scales, - self.task_params.map_resize_method) - tt.toc(log_at=1, log_str='VisualNavigationEnv __init__: ') - - def get_weight(self): - return self.task.nodes.shape[0] - - def get_common_data(self): - goal_nodes = self.episode.goal_nodes - start_nodes = self.episode.start_nodes - perturbs = self.episode.perturbs - goal_perturbs = self.episode.goal_perturbs - target_class = self.episode.target_class - - goal_locs = []; rel_goal_locs = []; - for i in range(len(goal_nodes)): - end_nodes = goal_nodes[i] - goal_loc, _, _, goal_theta = self.get_loc_axis( - np.array(end_nodes), delta_theta=self.task.delta_theta, - perturb=goal_perturbs[:,i,:]) - - # Compute the relative location to all goals from the starting location. - loc, _, _, theta = self.get_loc_axis(np.array(start_nodes), - delta_theta=self.task.delta_theta, - perturb=perturbs[:,0,:]) - r_goal, t_goal = _get_relative_goal_loc(goal_loc*1., loc, theta) - rel_goal_loc = np.concatenate((r_goal*np.cos(t_goal), r_goal*np.sin(t_goal), - np.cos(goal_theta-theta), - np.sin(goal_theta-theta)), axis=1) - rel_goal_locs.append(np.expand_dims(rel_goal_loc, axis=1)) - goal_locs.append(np.expand_dims(goal_loc, axis=1)) - - map = self.traversible*1. - maps = np.repeat(np.expand_dims(np.expand_dims(map, axis=0), axis=0), - self.task_params.batch_size, axis=0)*1 - if self.task_params.type[:14] == 'to_nearest_obj': - for i in range(self.task_params.batch_size): - maps[i,0,:,:] += 0.5*(self.task.class_maps_dilated[:,:,target_class[i]]) - - rel_goal_locs = np.concatenate(rel_goal_locs, axis=1) - goal_locs = np.concatenate(goal_locs, axis=1) - maps = np.expand_dims(maps, axis=-1) - - if self.task_params.type[:14] == 'to_nearest_obj': - rel_goal_locs = np.zeros((self.task_params.batch_size, 1, - len(self.task_params.semantic_task.class_map_names)), - dtype=np.float32) - goal_locs = np.zeros((self.task_params.batch_size, 1, 2), - dtype=np.float32) - for i in range(self.task_params.batch_size): - t = target_class[i] - rel_goal_locs[i,0,t] = 1. - goal_locs[i,0,0] = t - goal_locs[i,0,1] = np.NaN - - return vars(utils.Foo(orig_maps=maps, goal_loc=goal_locs, - rel_goal_loc_at_start=rel_goal_locs)) - - def pre_common_data(self, inputs): - return inputs - - - def get_features(self, current_node_ids, step_number): - task_params = self.task_params - goal_number = step_number / self.task_params.num_steps - end_nodes = self.task.nodes[self.episode.goal_node_ids[goal_number],:]*1 - current_nodes = self.task.nodes[current_node_ids,:]*1 - end_perturbs = self.episode.goal_perturbs[:,goal_number,:][:,np.newaxis,:] - perturbs = self.episode.perturbs - target_class = self.episode.target_class - - # Append to history. - self.episode.history[:,step_number] = np.array(current_node_ids) - - # Render out the images from current node. - outs = {} - - if self.task_params.outputs.images: - imgs_all = [] - imgs = self.render_nodes([tuple(x) for x in current_nodes], - perturb=perturbs[:,step_number,:]) - imgs_all.append(imgs) - aux_delta_thetas = self.task_params.aux_delta_thetas - for i in range(len(aux_delta_thetas)): - imgs = self.render_nodes([tuple(x) for x in current_nodes], - perturb=perturbs[:,step_number,:], - aux_delta_theta=aux_delta_thetas[i]) - imgs_all.append(imgs) - imgs_all = np.array(imgs_all) # A x B x H x W x C - imgs_all = np.transpose(imgs_all, axes=[1,0,2,3,4]) - imgs_all = np.expand_dims(imgs_all, axis=1) # B x N x A x H x W x C - if task_params.num_history_frames > 0: - if step_number == 0: - # Append the same frame 4 times - for i in range(task_params.num_history_frames+1): - self.episode.history_frames.insert(0, imgs_all*1.) - self.episode.history_frames.insert(0, imgs_all) - self.episode.history_frames.pop() - imgs_all_with_history = np.concatenate(self.episode.history_frames, axis=2) - else: - imgs_all_with_history = imgs_all - outs['imgs'] = imgs_all_with_history # B x N x A x H x W x C - - if self.task_params.outputs.node_ids: - outs['node_ids'] = np.array(current_node_ids).reshape((-1,1,1)) - outs['perturbs'] = np.expand_dims(perturbs[:,step_number, :]*1., axis=1) - - if self.task_params.outputs.analytical_counts: - assert(self.task_params.modalities == ['depth']) - d = image_pre(outs['imgs']*1., self.task_params.modalities) - cm = get_camera_matrix(self.task_params.img_width, - self.task_params.img_height, - self.task_params.img_fov) - XYZ = get_point_cloud_from_z(100./d[...,0], cm) - XYZ = make_geocentric(XYZ*100., self.robot.sensor_height, - self.robot.camera_elevation_degree) - for i in range(len(self.task_params.analytical_counts.map_sizes)): - non_linearity = self.task_params.analytical_counts.non_linearity[i] - count, isvalid = bin_points(XYZ*1., - map_size=self.task_params.analytical_counts.map_sizes[i], - xy_resolution=self.task_params.analytical_counts.xy_resolution[i], - z_bins=self.task_params.analytical_counts.z_bins[i]) - assert(count.shape[2] == 1), 'only works for n_views equal to 1.' - count = count[:,:,0,:,:,:] - isvalid = isvalid[:,:,0,:,:,:] - if non_linearity == 'none': - None - elif non_linearity == 'min10': - count = np.minimum(count, 10.) - elif non_linearity == 'sqrt': - count = np.sqrt(count) - else: - logging.fatal('Undefined non_linearity.') - outs['analytical_counts_{:d}'.format(i)] = count - - # Compute the goal location in the cordinate frame of the robot. - if self.task_params.outputs.rel_goal_loc: - if self.task_params.type[:14] != 'to_nearest_obj': - loc, _, _, theta = self.get_loc_axis(current_nodes, - delta_theta=self.task.delta_theta, - perturb=perturbs[:,step_number,:]) - goal_loc, _, _, goal_theta = self.get_loc_axis(end_nodes, - delta_theta=self.task.delta_theta, - perturb=end_perturbs[:,0,:]) - r_goal, t_goal = _get_relative_goal_loc(goal_loc, loc, theta) - - rel_goal_loc = np.concatenate((r_goal*np.cos(t_goal), r_goal*np.sin(t_goal), - np.cos(goal_theta-theta), - np.sin(goal_theta-theta)), axis=1) - outs['rel_goal_loc'] = np.expand_dims(rel_goal_loc, axis=1) - elif self.task_params.type[:14] == 'to_nearest_obj': - rel_goal_loc = np.zeros((self.task_params.batch_size, 1, - len(self.task_params.semantic_task.class_map_names)), - dtype=np.float32) - for i in range(self.task_params.batch_size): - t = target_class[i] - rel_goal_loc[i,0,t] = 1. - outs['rel_goal_loc'] = rel_goal_loc - - # Location on map to plot the trajectory during validation. - if self.task_params.outputs.loc_on_map: - loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, - delta_theta=self.task.delta_theta, - perturb=perturbs[:,step_number,:]) - outs['loc_on_map'] = np.expand_dims(loc, axis=1) - - # Compute gt_dist to goal - if self.task_params.outputs.gt_dist_to_goal: - gt_dist_to_goal = np.zeros((len(current_node_ids), 1), dtype=np.float32) - for i, n in enumerate(current_node_ids): - gt_dist_to_goal[i,0] = self.episode.dist_to_goal[goal_number][i][n] - outs['gt_dist_to_goal'] = np.expand_dims(gt_dist_to_goal, axis=1) - - # Free space in front of you, map and goal as images. - if self.task_params.outputs.ego_maps: - loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, - delta_theta=self.task.delta_theta, - perturb=perturbs[:,step_number,:]) - maps = generate_egocentric_maps(self.task.scaled_maps, - self.task_params.map_scales, - self.task_params.map_crop_sizes, loc, - x_axis, y_axis, theta) - - for i in range(len(self.task_params.map_scales)): - outs['ego_maps_{:d}'.format(i)] = \ - np.expand_dims(np.expand_dims(maps[i], axis=1), axis=-1) - - if self.task_params.outputs.readout_maps: - loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, - delta_theta=self.task.delta_theta, - perturb=perturbs[:,step_number,:]) - maps = generate_egocentric_maps(self.task.readout_maps_scaled, - self.task_params.readout_maps_scales, - self.task_params.readout_maps_crop_sizes, - loc, x_axis, y_axis, theta) - for i in range(len(self.task_params.readout_maps_scales)): - outs['readout_maps_{:d}'.format(i)] = \ - np.expand_dims(np.expand_dims(maps[i], axis=1), axis=-1) - - # Images for the goal. - if self.task_params.outputs.ego_goal_imgs: - if self.task_params.type[:14] != 'to_nearest_obj': - loc, x_axis, y_axis, theta = self.get_loc_axis(current_nodes, - delta_theta=self.task.delta_theta, - perturb=perturbs[:,step_number,:]) - goal_loc, _, _, _ = self.get_loc_axis(end_nodes, - delta_theta=self.task.delta_theta, - perturb=end_perturbs[:,0,:]) - rel_goal_orientation = np.mod( - np.int32(current_nodes[:,2:] - end_nodes[:,2:]), self.task_params.n_ori) - goal_dist, goal_theta = _get_relative_goal_loc(goal_loc, loc, theta) - goals = generate_goal_images(self.task_params.map_scales, - self.task_params.map_crop_sizes, - self.task_params.n_ori, goal_dist, - goal_theta, rel_goal_orientation) - for i in range(len(self.task_params.map_scales)): - outs['ego_goal_imgs_{:d}'.format(i)] = np.expand_dims(goals[i], axis=1) - - elif self.task_params.type[:14] == 'to_nearest_obj': - for i in range(len(self.task_params.map_scales)): - num_classes = len(self.task_params.semantic_task.class_map_names) - outs['ego_goal_imgs_{:d}'.format(i)] = np.zeros((self.task_params.batch_size, 1, - self.task_params.map_crop_sizes[i], - self.task_params.map_crop_sizes[i], - self.task_params.goal_channels)) - for i in range(self.task_params.batch_size): - t = target_class[i] - for j in range(len(self.task_params.map_scales)): - outs['ego_goal_imgs_{:d}'.format(j)][i,:,:,:,t] = 1. - - # Incremental locs and theta (for map warping), always in the original scale - # of the map, the subequent steps in the tf code scale appropriately. - # Scaling is done by just multiplying incremental_locs appropriately. - if self.task_params.outputs.egomotion: - if step_number == 0: - # Zero Ego Motion - incremental_locs = np.zeros((self.task_params.batch_size, 1, 2), dtype=np.float32) - incremental_thetas = np.zeros((self.task_params.batch_size, 1, 1), dtype=np.float32) - else: - previous_nodes = self.task.nodes[self.episode.history[:,step_number-1], :]*1 - loc, _, _, theta = self.get_loc_axis(current_nodes, - delta_theta=self.task.delta_theta, - perturb=perturbs[:,step_number,:]) - previous_loc, _, _, previous_theta = self.get_loc_axis( - previous_nodes, delta_theta=self.task.delta_theta, - perturb=perturbs[:,step_number-1,:]) - - incremental_locs_ = np.reshape(loc-previous_loc, [self.task_params.batch_size, 1, -1]) - - t = -np.pi/2+np.reshape(theta*1, [self.task_params.batch_size, 1, -1]) - incremental_locs = incremental_locs_*1 - incremental_locs[:,:,0] = np.sum(incremental_locs_ * - np.concatenate((np.cos(t), np.sin(t)), - axis=-1), axis=-1) - incremental_locs[:,:,1] = np.sum(incremental_locs_ * - np.concatenate((np.cos(t+np.pi/2), - np.sin(t+np.pi/2)), - axis=-1), axis=-1) - incremental_thetas = np.reshape(theta-previous_theta, - [self.task_params.batch_size, 1, -1]) - outs['incremental_locs'] = incremental_locs - outs['incremental_thetas'] = incremental_thetas - - if self.task_params.outputs.visit_count: - # Output the visit count for this state, how many times has the current - # state been visited, and how far in the history was the last visit - # (except this one) - visit_count = np.zeros((self.task_params.batch_size, 1), dtype=np.int32) - last_visit = -np.ones((self.task_params.batch_size, 1), dtype=np.int32) - if step_number >= 1: - h = self.episode.history[:,:(step_number)] - visit_count[:,0] = np.sum(h == np.array(current_node_ids).reshape([-1,1]), - axis=1) - last_visit[:,0] = np.argmax(h[:,::-1] == np.array(current_node_ids).reshape([-1,1]), - axis=1) + 1 - last_visit[visit_count == 0] = -1 # -1 if not visited. - outs['visit_count'] = np.expand_dims(visit_count, axis=1) - outs['last_visit'] = np.expand_dims(last_visit, axis=1) - return outs - - def get_features_name(self): - f = [] - if self.task_params.outputs.images: - f.append('imgs') - if self.task_params.outputs.rel_goal_loc: - f.append('rel_goal_loc') - if self.task_params.outputs.loc_on_map: - f.append('loc_on_map') - if self.task_params.outputs.gt_dist_to_goal: - f.append('gt_dist_to_goal') - if self.task_params.outputs.ego_maps: - for i in range(len(self.task_params.map_scales)): - f.append('ego_maps_{:d}'.format(i)) - if self.task_params.outputs.readout_maps: - for i in range(len(self.task_params.readout_maps_scales)): - f.append('readout_maps_{:d}'.format(i)) - if self.task_params.outputs.ego_goal_imgs: - for i in range(len(self.task_params.map_scales)): - f.append('ego_goal_imgs_{:d}'.format(i)) - if self.task_params.outputs.egomotion: - f.append('incremental_locs') - f.append('incremental_thetas') - if self.task_params.outputs.visit_count: - f.append('visit_count') - f.append('last_visit') - if self.task_params.outputs.analytical_counts: - for i in range(len(self.task_params.analytical_counts.map_sizes)): - f.append('analytical_counts_{:d}'.format(i)) - if self.task_params.outputs.node_ids: - f.append('node_ids') - f.append('perturbs') - return f - - def pre_features(self, inputs): - if self.task_params.outputs.images: - inputs['imgs'] = image_pre(inputs['imgs'], self.task_params.modalities) - return inputs - -class BuildingMultiplexer(): - def __init__(self, args, task_number): - params = vars(args) - for k in params.keys(): - setattr(self, k, params[k]) - self.task_number = task_number - self._pick_data(task_number) - logging.info('Env Class: %s.', self.env_class) - if self.task_params.task == 'planning': - self._setup_planner() - elif self.task_params.task == 'mapping': - self._setup_mapper() - elif self.task_params.task == 'map+plan': - self._setup_mapper() - else: - logging.error('Undefined task: %s'.format(self.task_params.task)) - - def _pick_data(self, task_number): - logging.error('Input Building Names: %s', self.building_names) - self.flip = [np.mod(task_number / len(self.building_names), 2) == 1] - id = np.mod(task_number, len(self.building_names)) - self.building_names = [self.building_names[id]] - self.task_params.building_seed = task_number - logging.error('BuildingMultiplexer: Picked Building Name: %s', self.building_names) - self.building_names = self.building_names[0].split('+') - self.flip = [self.flip[0] for _ in self.building_names] - logging.error('BuildingMultiplexer: Picked Building Name: %s', self.building_names) - logging.error('BuildingMultiplexer: Flipping Buildings: %s', self.flip) - logging.error('BuildingMultiplexer: Set building_seed: %d', self.task_params.building_seed) - self.num_buildings = len(self.building_names) - logging.error('BuildingMultiplexer: Num buildings: %d', self.num_buildings) - - def _setup_planner(self): - # Load building env class. - self.buildings = [] - for i, building_name in enumerate(self.building_names): - b = self.env_class(robot=self.robot, env=self.env, - task_params=self.task_params, - building_name=building_name, flip=self.flip[i], - logdir=self.logdir, building_loader=self.dataset) - self.buildings.append(b) - - def _setup_mapper(self): - # Set up the renderer. - cp = self.camera_param - rgb_shader, d_shader = sru.get_shaders(cp.modalities) - r_obj = SwiftshaderRenderer() - r_obj.init_display(width=cp.width, height=cp.height, fov=cp.fov, - z_near=cp.z_near, z_far=cp.z_far, rgb_shader=rgb_shader, - d_shader=d_shader) - self.r_obj = r_obj - r_obj.clear_scene() - - # Load building env class. - self.buildings = [] - wt = [] - for i, building_name in enumerate(self.building_names): - b = self.env_class(robot=self.robot, env=self.env, - task_params=self.task_params, - building_name=building_name, flip=self.flip[i], - logdir=self.logdir, building_loader=self.dataset, - r_obj=r_obj) - wt.append(b.get_weight()) - b.load_building_into_scene() - b.set_building_visibility(False) - self.buildings.append(b) - wt = np.array(wt).astype(np.float32) - wt = wt / np.sum(wt+0.0001) - self.building_sampling_weights = wt - - def sample_building(self, rng): - if self.num_buildings == 1: - building_id = rng.choice(range(len(self.building_names))) - else: - building_id = rng.choice(self.num_buildings, - p=self.building_sampling_weights) - b = self.buildings[building_id] - instances = b._gen_rng(rng) - self._building_id = building_id - return self.buildings[building_id], instances - - def sample_env(self, rngs): - rng = rngs[0]; - if self.num_buildings == 1: - building_id = rng.choice(range(len(self.building_names))) - else: - building_id = rng.choice(self.num_buildings, - p=self.building_sampling_weights) - return self.buildings[building_id] - - def pre(self, inputs): - return self.buildings[self._building_id].pre(inputs) - - def __del__(self): - self.r_obj.clear_scene() - logging.error('Clearing scene.') diff --git a/research/cognitive_mapping_and_planning/datasets/nav_env_config.py b/research/cognitive_mapping_and_planning/datasets/nav_env_config.py deleted file mode 100644 index 3d71c5767c4dc0ed9f05cce5c1790f11ede3778a..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/datasets/nav_env_config.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Configs for stanford navigation environment. - -Base config for stanford navigation enviornment. -""" -import numpy as np -import src.utils as utils -import datasets.nav_env as nav_env - -def nav_env_base_config(): - """Returns the base config for stanford navigation environment. - - Returns: - Base config for stanford navigation environment. - """ - robot = utils.Foo(radius=15, - base=10, - height=140, - sensor_height=120, - camera_elevation_degree=-15) - - env = utils.Foo(padding=10, - resolution=5, - num_point_threshold=2, - valid_min=-10, - valid_max=200, - n_samples_per_face=200) - - camera_param = utils.Foo(width=225, - height=225, - z_near=0.05, - z_far=20.0, - fov=60., - modalities=['rgb'], - img_channels=3) - - data_augment = utils.Foo(lr_flip=0, - delta_angle=0.5, - delta_xy=4, - relight=True, - relight_fast=False, - structured=False) # if True, uses the same perturb for the whole episode. - - outputs = utils.Foo(images=True, - rel_goal_loc=False, - loc_on_map=True, - gt_dist_to_goal=True, - ego_maps=False, - ego_goal_imgs=False, - egomotion=False, - visit_count=False, - analytical_counts=False, - node_ids=True, - readout_maps=False) - - # class_map_names=['board', 'chair', 'door', 'sofa', 'table'] - class_map_names = ['chair', 'door', 'table'] - semantic_task = utils.Foo(class_map_names=class_map_names, pix_distance=16, - sampling='uniform') - - # time per iteration for cmp is 0.82 seconds per episode with 3.4s overhead per batch. - task_params = utils.Foo(max_dist=32, - step_size=8, - num_steps=40, - num_actions=4, - batch_size=4, - building_seed=0, - num_goals=1, - img_height=None, - img_width=None, - img_channels=None, - modalities=None, - outputs=outputs, - map_scales=[1.], - map_crop_sizes=[64], - rel_goal_loc_dim=4, - base_class='Building', - task='map+plan', - n_ori=4, - type='room_to_room_many', - data_augment=data_augment, - room_regex='^((?!hallway).)*$', - toy_problem=False, - map_channels=1, - gt_coverage=False, - input_type='maps', - full_information=False, - aux_delta_thetas=[], - semantic_task=semantic_task, - num_history_frames=0, - node_ids_dim=1, - perturbs_dim=4, - map_resize_method='linear_noantialiasing', - readout_maps_channels=1, - readout_maps_scales=[], - readout_maps_crop_sizes=[], - n_views=1, - reward_time_penalty=0.1, - reward_at_goal=1., - discount_factor=0.99, - rejection_sampling_M=100, - min_dist=None) - - navtask_args = utils.Foo( - building_names=['area1_gates_wingA_floor1_westpart'], - env_class=nav_env.VisualNavigationEnv, - robot=robot, - task_params=task_params, - env=env, - camera_param=camera_param, - cache_rooms=True) - return navtask_args - diff --git a/research/cognitive_mapping_and_planning/matplotlibrc b/research/cognitive_mapping_and_planning/matplotlibrc deleted file mode 100644 index ed5097572ae68680d0c9afdf510968e1c3d175d4..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/matplotlibrc +++ /dev/null @@ -1 +0,0 @@ -backend : agg diff --git a/research/cognitive_mapping_and_planning/output/.gitignore b/research/cognitive_mapping_and_planning/output/.gitignore deleted file mode 100644 index a767cafbbd864d0baf76530294598e4c2be60a24..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/output/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* diff --git a/research/cognitive_mapping_and_planning/output/README.md b/research/cognitive_mapping_and_planning/output/README.md deleted file mode 100644 index 7518c3874390da7e2aa65a89ccdec035ca7610e8..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/output/README.md +++ /dev/null @@ -1,16 +0,0 @@ -### Pre-Trained Models - -We provide the following pre-trained models: - -Config Name | Checkpoint | Mean Dist. | 50%ile Dist. | 75%ile Dist. | Success %age | -:-: | :-: | :-: | :-: | :-: | :-: | -cmp.lmap_Msc.clip5.sbpd_d_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_r2r.tar) | 4.79 | 0 | 1 | 78.9 | -cmp.lmap_Msc.clip5.sbpd_rgb_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_rgb_r2r.tar) | 7.74 | 0 | 14 | 62.4 | -cmp.lmap_Msc.clip5.sbpd_d_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_ST.tar) | 10.67 | 9 | 19 | 39.7 | -cmp.lmap_Msc.clip5.sbpd_rgb_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_rgb_ST.tar) | 11.27 | 10 | 19 | 35.6 | -cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80 | [ckpt](http:////download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80.tar) | 11.6 | 0 | 19 | 66.9 | -bl.v2.noclip.sbpd_d_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_r2r.tar) | 5.90 | 0 | 6 | 71.2 | -bl.v2.noclip.sbpd_rgb_r2r | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_rgb_r2r.tar) | 10.21 | 1 | 21 | 53.4 | -bl.v2.noclip.sbpd_d_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_ST.tar) | 13.29 | 14 | 23 | 28.0 | -bl.v2.noclip.sbpd_rgb_ST | [ckpt](http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_rgb_ST.tar) | 13.37 | 13 | 20 | 24.2 | -bl.v2.noclip.sbpd_d_r2r_h0_64_80 | [ckpt](http:////download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/bl.v2.noclip.sbpd_d_r2r_h0_64_80.tar) | 15.30 | 0 | 29 | 57.9 | diff --git a/research/cognitive_mapping_and_planning/patches/GLES2_2_0.py.patch b/research/cognitive_mapping_and_planning/patches/GLES2_2_0.py.patch deleted file mode 100644 index de1be442d5b9fff44862d37b9329e32face2b663..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/patches/GLES2_2_0.py.patch +++ /dev/null @@ -1,14 +0,0 @@ -10c10 -< from OpenGL import platform, constant, arrays ---- -> from OpenGL import platform, constant, arrays, contextdata -249a250 -> from OpenGL._bytes import _NULL_8_BYTE -399c400 -< array = ArrayDatatype.asArray( pointer, type ) ---- -> array = arrays.ArrayDatatype.asArray( pointer, type ) -405c406 -< ArrayDatatype.voidDataPointer( array ) ---- -> arrays.ArrayDatatype.voidDataPointer( array ) diff --git a/research/cognitive_mapping_and_planning/patches/apply_patches.sh b/research/cognitive_mapping_and_planning/patches/apply_patches.sh deleted file mode 100644 index 4a786058258decdfb381eff25684183d92788ebe..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/patches/apply_patches.sh +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -echo $VIRTUAL_ENV -patch $VIRTUAL_ENV/local/lib/python2.7/site-packages/OpenGL/GLES2/VERSION/GLES2_2_0.py patches/GLES2_2_0.py.patch -patch $VIRTUAL_ENV/local/lib/python2.7/site-packages/OpenGL/platform/ctypesloader.py patches/ctypesloader.py.patch diff --git a/research/cognitive_mapping_and_planning/patches/ctypesloader.py.patch b/research/cognitive_mapping_and_planning/patches/ctypesloader.py.patch deleted file mode 100644 index 27dd43b18010dc5fdcd605b9a5d470abaa19151f..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/patches/ctypesloader.py.patch +++ /dev/null @@ -1,15 +0,0 @@ -45c45,46 -< return dllType( name, mode ) ---- -> print './' + name -> return dllType( './' + name, mode ) -47,48c48,53 -< err.args += (name,fullName) -< raise ---- -> try: -> print name -> return dllType( name, mode ) -> except: -> err.args += (name,fullName) -> raise diff --git a/research/cognitive_mapping_and_planning/render/__init__.py b/research/cognitive_mapping_and_planning/render/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/cognitive_mapping_and_planning/render/depth_rgb_encoded.fp b/research/cognitive_mapping_and_planning/render/depth_rgb_encoded.fp deleted file mode 100644 index 23e93d27f585e93896799f177888e9c50fa03eed..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/render/depth_rgb_encoded.fp +++ /dev/null @@ -1,30 +0,0 @@ -// This shader computes per-pixel depth (-z coordinate in the camera space, or -// orthogonal distance to the camera plane). The result is multiplied by the -// `kFixedPointFraction` constant and is encoded to RGB channels as an integer -// (R being the least significant byte). - -#ifdef GL_ES -#ifdef GL_FRAGMENT_PRECISION_HIGH -precision highp float; -#else -precision mediump float; -#endif -#endif - -const float kFixedPointFraction = 1000.0; - -varying float vDepth; - -void main(void) { - float d = vDepth; - - // Encode the depth to RGB. - d *= (kFixedPointFraction / 255.0); - gl_FragColor.r = mod(d, 1.0); - d = (d - gl_FragColor.r) / 255.0; - gl_FragColor.g = mod(d, 1.0); - d = (d - gl_FragColor.g) / 255.0; - gl_FragColor.b = mod(d, 1.0); - - gl_FragColor.a = 1.0; -} diff --git a/research/cognitive_mapping_and_planning/render/depth_rgb_encoded.vp b/research/cognitive_mapping_and_planning/render/depth_rgb_encoded.vp deleted file mode 100644 index 2db74f14aa7f253b8f544ec1ab519129f13426a0..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/render/depth_rgb_encoded.vp +++ /dev/null @@ -1,15 +0,0 @@ -uniform mat4 uViewMatrix; -uniform mat4 uProjectionMatrix; - -attribute vec3 aPosition; - -varying float vDepth; - -void main(void) { - vec4 worldPosition = vec4(aPosition, 1.0); - vec4 viewPosition = uViewMatrix * worldPosition; - gl_Position = uProjectionMatrix * viewPosition; - - // Orthogonal depth is simply -z in the camera space. - vDepth = -viewPosition.z; -} diff --git a/research/cognitive_mapping_and_planning/render/rgb_flat_color.fp b/research/cognitive_mapping_and_planning/render/rgb_flat_color.fp deleted file mode 100644 index c8c24d76103793d9cfa9166517177cb332d1a92c..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/render/rgb_flat_color.fp +++ /dev/null @@ -1,11 +0,0 @@ -precision highp float; -varying vec4 vColor; -varying vec2 vTextureCoord; - -uniform sampler2D uTexture; - -void main(void) { - vec4 color = vColor; - color = texture2D(uTexture, vTextureCoord); - gl_FragColor = color; -} diff --git a/research/cognitive_mapping_and_planning/render/rgb_flat_color.vp b/research/cognitive_mapping_and_planning/render/rgb_flat_color.vp deleted file mode 100644 index ebc79173405f7449921fd40f778fe3695aab5ea8..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/render/rgb_flat_color.vp +++ /dev/null @@ -1,18 +0,0 @@ -uniform mat4 uViewMatrix; -uniform mat4 uProjectionMatrix; -uniform vec4 uColor; - -attribute vec4 aColor; -attribute vec3 aPosition; -attribute vec2 aTextureCoord; - -varying vec4 vColor; -varying vec2 vTextureCoord; - -void main(void) { - vec4 worldPosition = vec4(aPosition, 1.0); - gl_Position = uProjectionMatrix * (uViewMatrix * worldPosition); - - vColor = aColor * uColor; - vTextureCoord = aTextureCoord; -} diff --git a/research/cognitive_mapping_and_planning/render/swiftshader_renderer.py b/research/cognitive_mapping_and_planning/render/swiftshader_renderer.py deleted file mode 100644 index 74b1be72c11a2877231a66886d02babfd4793ce8..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/render/swiftshader_renderer.py +++ /dev/null @@ -1,427 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Implements loading and rendering of meshes. Contains 2 classes: - Shape: Class that exposes high level functions for loading and manipulating - shapes. This currently is bound to assimp - (https://github.com/assimp/assimp). If you want to interface to a different - library, reimplement this class with bindings to your mesh loading library. - - SwiftshaderRenderer: Class that renders Shapes. Currently this uses python - bindings to OpenGL (EGL), bindings to an alternate renderer may be implemented - here. -""" - -import numpy as np, os -import cv2, ctypes, logging, os, numpy as np -import pyassimp as assimp -from OpenGL.GLES2 import * -from OpenGL.EGL import * -import src.rotation_utils as ru - -__version__ = 'swiftshader_renderer' - -def get_shaders(modalities): - rgb_shader = 'rgb_flat_color' if 'rgb' in modalities else None - d_shader = 'depth_rgb_encoded' if 'depth' in modalities else None - return rgb_shader, d_shader - -def sample_points_on_faces(vs, fs, rng, n_samples_per_face): - idx = np.repeat(np.arange(fs.shape[0]), n_samples_per_face) - - r = rng.rand(idx.size, 2) - r1 = r[:,:1]; r2 = r[:,1:]; sqrt_r1 = np.sqrt(r1); - - v1 = vs[fs[idx, 0], :]; v2 = vs[fs[idx, 1], :]; v3 = vs[fs[idx, 2], :]; - pts = (1-sqrt_r1)*v1 + sqrt_r1*(1-r2)*v2 + sqrt_r1*r2*v3 - - v1 = vs[fs[:,0], :]; v2 = vs[fs[:, 1], :]; v3 = vs[fs[:, 2], :]; - ar = 0.5*np.sqrt(np.sum(np.cross(v1-v3, v2-v3)**2, 1)) - - return pts, ar, idx - -class Shape(): - def get_pyassimp_load_options(self): - load_flags = assimp.postprocess.aiProcess_Triangulate; - load_flags = load_flags | assimp.postprocess.aiProcess_SortByPType; - load_flags = load_flags | assimp.postprocess.aiProcess_OptimizeMeshes; - load_flags = load_flags | assimp.postprocess.aiProcess_RemoveRedundantMaterials; - load_flags = load_flags | assimp.postprocess.aiProcess_FindDegenerates; - load_flags = load_flags | assimp.postprocess.aiProcess_GenSmoothNormals; - load_flags = load_flags | assimp.postprocess.aiProcess_JoinIdenticalVertices; - load_flags = load_flags | assimp.postprocess.aiProcess_ImproveCacheLocality; - load_flags = load_flags | assimp.postprocess.aiProcess_GenUVCoords; - load_flags = load_flags | assimp.postprocess.aiProcess_FindInvalidData; - return load_flags - - def __init__(self, obj_file, material_file=None, load_materials=True, - name_prefix='', name_suffix=''): - if material_file is not None: - logging.error('Ignoring material file input, reading them off obj file.') - load_flags = self.get_pyassimp_load_options() - scene = assimp.load(obj_file, processing=load_flags) - filter_ind = self._filter_triangles(scene.meshes) - self.meshes = [scene.meshes[i] for i in filter_ind] - for m in self.meshes: - m.name = name_prefix + m.name + name_suffix - - dir_name = os.path.dirname(obj_file) - # Load materials - materials = None - if load_materials: - materials = [] - for m in self.meshes: - file_name = os.path.join(dir_name, m.material.properties[('file', 1)]) - assert(os.path.exists(file_name)), \ - 'Texture file {:s} foes not exist.'.format(file_name) - img_rgb = cv2.imread(file_name)[::-1,:,::-1] - if img_rgb.shape[0] != img_rgb.shape[1]: - logging.warn('Texture image not square.') - sz = np.maximum(img_rgb.shape[0], img_rgb.shape[1]) - sz = int(np.power(2., np.ceil(np.log2(sz)))) - img_rgb = cv2.resize(img_rgb, (sz,sz), interpolation=cv2.INTER_LINEAR) - else: - sz = img_rgb.shape[0] - sz_ = int(np.power(2., np.ceil(np.log2(sz)))) - if sz != sz_: - logging.warn('Texture image not square of power of 2 size. ' + - 'Changing size from %d to %d.', sz, sz_) - sz = sz_ - img_rgb = cv2.resize(img_rgb, (sz,sz), interpolation=cv2.INTER_LINEAR) - materials.append(img_rgb) - self.scene = scene - self.materials = materials - - def _filter_triangles(self, meshes): - select = [] - for i in range(len(meshes)): - if meshes[i].primitivetypes == 4: - select.append(i) - return select - - def flip_shape(self): - for m in self.meshes: - m.vertices[:,1] = -m.vertices[:,1] - bb = m.faces*1 - bb[:,1] = m.faces[:,2] - bb[:,2] = m.faces[:,1] - m.faces = bb - # m.vertices[:,[0,1]] = m.vertices[:,[1,0]] - - def get_vertices(self): - vs = [] - for m in self.meshes: - vs.append(m.vertices) - vss = np.concatenate(vs, axis=0) - return vss, vs - - def get_faces(self): - vs = [] - for m in self.meshes: - v = m.faces - vs.append(v) - return vs - - def get_number_of_meshes(self): - return len(self.meshes) - - def scale(self, sx=1., sy=1., sz=1.): - pass - - def sample_points_on_face_of_shape(self, i, n_samples_per_face, sc): - v = self.meshes[i].vertices*sc - f = self.meshes[i].faces - p, face_areas, face_idx = sample_points_on_faces( - v, f, np.random.RandomState(0), n_samples_per_face) - return p, face_areas, face_idx - - def __del__(self): - scene = self.scene - assimp.release(scene) - -class SwiftshaderRenderer(): - def __init__(self): - self.entities = {} - - def init_display(self, width, height, fov, z_near, z_far, rgb_shader, - d_shader): - self.init_renderer_egl(width, height) - dir_path = os.path.dirname(os.path.realpath(__file__)) - if d_shader is not None and rgb_shader is not None: - logging.fatal('Does not support setting both rgb_shader and d_shader.') - - if d_shader is not None: - assert rgb_shader is None - shader = d_shader - self.modality = 'depth' - - if rgb_shader is not None: - assert d_shader is None - shader = rgb_shader - self.modality = 'rgb' - - self.create_shaders(os.path.join(dir_path, shader+'.vp'), - os.path.join(dir_path, shader + '.fp')) - aspect = width*1./(height*1.) - self.set_camera(fov, z_near, z_far, aspect) - - def init_renderer_egl(self, width, height): - major,minor = ctypes.c_long(),ctypes.c_long() - logging.info('init_renderer_egl: EGL_DEFAULT_DISPLAY: %s', EGL_DEFAULT_DISPLAY) - - egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY) - logging.info('init_renderer_egl: egl_display: %s', egl_display) - - eglInitialize(egl_display, major, minor) - logging.info('init_renderer_egl: EGL_OPENGL_API, EGL_OPENGL_ES_API: %s, %s', - EGL_OPENGL_API, EGL_OPENGL_ES_API) - eglBindAPI(EGL_OPENGL_ES_API) - - num_configs = ctypes.c_long() - configs = (EGLConfig*1)() - local_attributes = [EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, EGL_BLUE_SIZE, 8, - EGL_DEPTH_SIZE, 16, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, - EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, EGL_NONE,] - logging.error('init_renderer_egl: local attributes: %s', local_attributes) - local_attributes = arrays.GLintArray.asArray(local_attributes) - success = eglChooseConfig(egl_display, local_attributes, configs, 1, num_configs) - logging.error('init_renderer_egl: eglChooseConfig success, num_configs: %d, %d', success, num_configs.value) - egl_config = configs[0] - - - context_attributes = [EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE] - context_attributes = arrays.GLintArray.asArray(context_attributes) - egl_context = eglCreateContext(egl_display, egl_config, EGL_NO_CONTEXT, context_attributes) - - buffer_attributes = [EGL_WIDTH, width, EGL_HEIGHT, height, EGL_NONE] - buffer_attributes = arrays.GLintArray.asArray(buffer_attributes) - egl_surface = eglCreatePbufferSurface(egl_display, egl_config, buffer_attributes) - - - eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context) - logging.error("init_renderer_egl: egl_display: %s egl_surface: %s, egl_config: %s", egl_display, egl_surface, egl_context) - - glViewport(0, 0, width, height); - - self.egl_display = egl_display - self.egl_surface = egl_surface - self.egl_config = egl_config - self.egl_mapping = {} - self.render_timer = None - self.load_timer = None - self.height = height - self.width = width - - def create_shaders(self, v_shader_file, f_shader_file): - v_shader = glCreateShader(GL_VERTEX_SHADER) - with open(v_shader_file, 'r') as f: - ls = '' - for l in f: - ls = ls + l - glShaderSource(v_shader, ls) - glCompileShader(v_shader); - assert(glGetShaderiv(v_shader, GL_COMPILE_STATUS) == 1) - - f_shader = glCreateShader(GL_FRAGMENT_SHADER) - with open(f_shader_file, 'r') as f: - ls = '' - for l in f: - ls = ls + l - glShaderSource(f_shader, ls) - glCompileShader(f_shader); - assert(glGetShaderiv(f_shader, GL_COMPILE_STATUS) == 1) - - egl_program = glCreateProgram(); - assert(egl_program) - glAttachShader(egl_program, v_shader) - glAttachShader(egl_program, f_shader) - glLinkProgram(egl_program); - assert(glGetProgramiv(egl_program, GL_LINK_STATUS) == 1) - glUseProgram(egl_program) - - glBindAttribLocation(egl_program, 0, "aPosition") - glBindAttribLocation(egl_program, 1, "aColor") - glBindAttribLocation(egl_program, 2, "aTextureCoord") - - self.egl_program = egl_program - self.egl_mapping['vertexs'] = 0 - self.egl_mapping['vertexs_color'] = 1 - self.egl_mapping['vertexs_tc'] = 2 - - glClearColor(0.0, 0.0, 0.0, 1.0); - # glEnable(GL_CULL_FACE); glCullFace(GL_BACK); - glEnable(GL_DEPTH_TEST); - - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) - - def set_camera(self, fov_vertical, z_near, z_far, aspect): - width = 2*np.tan(np.deg2rad(fov_vertical)/2.0)*z_near*aspect; - height = 2*np.tan(np.deg2rad(fov_vertical)/2.0)*z_near; - egl_program = self.egl_program - c = np.eye(4, dtype=np.float32) - c[3,3] = 0 - c[3,2] = -1 - c[2,2] = -(z_near+z_far)/(z_far-z_near) - c[2,3] = -2.0*(z_near*z_far)/(z_far-z_near) - c[0,0] = 2.0*z_near/width - c[1,1] = 2.0*z_near/height - c = c.T - - projection_matrix_o = glGetUniformLocation(egl_program, 'uProjectionMatrix') - projection_matrix = np.eye(4, dtype=np.float32) - projection_matrix[...] = c - projection_matrix = np.reshape(projection_matrix, (-1)) - glUniformMatrix4fv(projection_matrix_o, 1, GL_FALSE, projection_matrix) - - - def load_default_object(self): - v = np.array([[0.0, 0.5, 0.0, 1.0, 1.0, 0.0, 1.0], - [-0.5, -0.5, 0.0, 1.0, 0.0, 1.0, 1.0], - [0.5, -0.5, 0.0, 1.0, 1.0, 1.0, 1.0]], dtype=np.float32) - v = np.concatenate((v,v+0.1), axis=0) - v = np.ascontiguousarray(v, dtype=np.float32) - - vbo = glGenBuffers(1) - glBindBuffer (GL_ARRAY_BUFFER, vbo) - glBufferData (GL_ARRAY_BUFFER, v.dtype.itemsize*v.size, v, GL_STATIC_DRAW) - glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 28, ctypes.c_void_p(0)) - glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 28, ctypes.c_void_p(12)) - glEnableVertexAttribArray(0); - glEnableVertexAttribArray(1); - - self.num_to_render = 6; - - def _actual_render(self): - for entity_id, entity in self.entities.iteritems(): - if entity['visible']: - vbo = entity['vbo'] - tbo = entity['tbo'] - num = entity['num'] - - glBindBuffer(GL_ARRAY_BUFFER, vbo) - glVertexAttribPointer(self.egl_mapping['vertexs'], 3, GL_FLOAT, GL_FALSE, - 20, ctypes.c_void_p(0)) - glVertexAttribPointer(self.egl_mapping['vertexs_tc'], 2, GL_FLOAT, - GL_FALSE, 20, ctypes.c_void_p(12)) - glEnableVertexAttribArray(self.egl_mapping['vertexs']); - glEnableVertexAttribArray(self.egl_mapping['vertexs_tc']); - - glBindTexture(GL_TEXTURE_2D, tbo) - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glDrawArrays(GL_TRIANGLES, 0, num) - - def render(self, take_screenshot=False, output_type=0): - # self.render_timer.tic() - self._actual_render() - # self.render_timer.toc(log_at=1000, log_str='render timer', type='time') - - np_rgb_img = None - np_d_img = None - c = 1000. - if take_screenshot: - if self.modality == 'rgb': - screenshot_rgba = np.zeros((self.height, self.width, 4), dtype=np.uint8) - glReadPixels(0, 0, self.width, self.height, GL_RGBA, GL_UNSIGNED_BYTE, screenshot_rgba) - np_rgb_img = screenshot_rgba[::-1,:,:3]; - - if self.modality == 'depth': - screenshot_d = np.zeros((self.height, self.width, 4), dtype=np.uint8) - glReadPixels(0, 0, self.width, self.height, GL_RGBA, GL_UNSIGNED_BYTE, screenshot_d) - np_d_img = screenshot_d[::-1,:,:3]; - np_d_img = np_d_img[:,:,2]*(255.*255./c) + np_d_img[:,:,1]*(255./c) + np_d_img[:,:,0]*(1./c) - np_d_img = np_d_img.astype(np.float32) - np_d_img[np_d_img == 0] = np.NaN - np_d_img = np_d_img[:,:,np.newaxis] - - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) - return np_rgb_img, np_d_img - - def _load_mesh_into_gl(self, mesh, material): - vvt = np.concatenate((mesh.vertices, mesh.texturecoords[0,:,:2]), axis=1) - vvt = np.ascontiguousarray(vvt[mesh.faces.reshape((-1)),:], dtype=np.float32) - num = vvt.shape[0] - vvt = np.reshape(vvt, (-1)) - - vbo = glGenBuffers(1) - glBindBuffer(GL_ARRAY_BUFFER, vbo) - glBufferData(GL_ARRAY_BUFFER, vvt.dtype.itemsize*vvt.size, vvt, GL_STATIC_DRAW) - - tbo = glGenTextures(1) - glBindTexture(GL_TEXTURE_2D, tbo) - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, material.shape[1], - material.shape[0], 0, GL_RGB, GL_UNSIGNED_BYTE, - np.reshape(material, (-1))) - return num, vbo, tbo - - def load_shapes(self, shapes): - entities = self.entities - entity_ids = [] - for i, shape in enumerate(shapes): - for j in range(len(shape.meshes)): - name = shape.meshes[j].name - assert name not in entities, '{:s} entity already exists.'.format(name) - num, vbo, tbo = self._load_mesh_into_gl(shape.meshes[j], shape.materials[j]) - entities[name] = {'num': num, 'vbo': vbo, 'tbo': tbo, 'visible': False} - entity_ids.append(name) - return entity_ids - - def set_entity_visible(self, entity_ids, visibility): - for entity_id in entity_ids: - self.entities[entity_id]['visible'] = visibility - - def position_camera(self, camera_xyz, lookat_xyz, up): - camera_xyz = np.array(camera_xyz) - lookat_xyz = np.array(lookat_xyz) - up = np.array(up) - lookat_to = lookat_xyz - camera_xyz - lookat_from = np.array([0, 1., 0.]) - up_from = np.array([0, 0., 1.]) - up_to = up * 1. - # np.set_printoptions(precision=2, suppress=True) - # print up_from, lookat_from, up_to, lookat_to - r = ru.rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to) - R = np.eye(4, dtype=np.float32) - R[:3,:3] = r - - t = np.eye(4, dtype=np.float32) - t[:3,3] = -camera_xyz - - view_matrix = np.dot(R.T, t) - flip_yz = np.eye(4, dtype=np.float32) - flip_yz[1,1] = 0; flip_yz[2,2] = 0; flip_yz[1,2] = 1; flip_yz[2,1] = -1; - view_matrix = np.dot(flip_yz, view_matrix) - view_matrix = view_matrix.T - # print np.concatenate((R, t, view_matrix), axis=1) - view_matrix = np.reshape(view_matrix, (-1)) - view_matrix_o = glGetUniformLocation(self.egl_program, 'uViewMatrix') - glUniformMatrix4fv(view_matrix_o, 1, GL_FALSE, view_matrix) - return None, None #camera_xyz, q - - def clear_scene(self): - keys = self.entities.keys() - for entity_id in keys: - entity = self.entities.pop(entity_id, None) - vbo = entity['vbo'] - tbo = entity['tbo'] - num = entity['num'] - glDeleteBuffers(1, [vbo]) - glDeleteTextures(1, [tbo]) - - def __del__(self): - self.clear_scene() - eglMakeCurrent(self.egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT) - eglDestroySurface(self.egl_display, self.egl_surface) - eglTerminate(self.egl_display) diff --git a/research/cognitive_mapping_and_planning/requirements.txt b/research/cognitive_mapping_and_planning/requirements.txt deleted file mode 100644 index 306c807a6c9fd9404afa1c05108e5e835e84edc6..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -numpy -pillow -PyOpenGL -PyOpenGL-accelerate -six -networkx -scikit-image -scipy -opencv-python diff --git a/research/cognitive_mapping_and_planning/scripts/__init__.py b/research/cognitive_mapping_and_planning/scripts/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/cognitive_mapping_and_planning/scripts/script_distill.py b/research/cognitive_mapping_and_planning/scripts/script_distill.py deleted file mode 100644 index 010c690412ed28011146ab44109dc099d02324e7..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_distill.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r""" Script to setup the grid moving agent. - -blaze build --define=ION_GFX_OGLES20=1 -c opt --copt=-mavx --config=cuda_clang \ - learning/brain/public/tensorflow_std_server{,_gpu} \ - experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill.par \ - experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill - - -./blaze-bin/experimental/users/saurabhgupta/navigation/cmp/scripts/script_distill \ - --logdir=/cns/iq-d/home/saurabhgupta/output/stanford-distill/local/v0/ \ - --config_name 'v0+train' --gfs_user robot-intelligence-gpu - -""" -import sys, os, numpy as np -import copy -import argparse, pprint -import time -import cProfile - - -import tensorflow as tf -from tensorflow.contrib import slim -from tensorflow.python.framework import ops -from tensorflow.contrib.framework.python.ops import variables - -import logging -from tensorflow.python.platform import gfile -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -from cfgs import config_distill -from tfcode import tf_utils -import src.utils as utils -import src.file_utils as fu -import tfcode.distillation as distill -import datasets.nav_env as nav_env - -FLAGS = flags.FLAGS - -flags.DEFINE_string('master', 'local', - 'The name of the TensorFlow master to use.') -flags.DEFINE_integer('ps_tasks', 0, 'The number of parameter servers. If the ' - 'value is 0, then the parameters are handled locally by ' - 'the worker.') -flags.DEFINE_integer('task', 0, 'The Task ID. This value is used when training ' - 'with multiple workers to identify each worker.') - -flags.DEFINE_integer('num_workers', 1, '') - -flags.DEFINE_string('config_name', '', '') - -flags.DEFINE_string('logdir', '', '') - -def main(_): - args = config_distill.get_args_for_config(FLAGS.config_name) - args.logdir = FLAGS.logdir - args.solver.num_workers = FLAGS.num_workers - args.solver.task = FLAGS.task - args.solver.ps_tasks = FLAGS.ps_tasks - args.solver.master = FLAGS.master - - args.buildinger.env_class = nav_env.MeshMapper - fu.makedirs(args.logdir) - args.buildinger.logdir = args.logdir - R = nav_env.get_multiplexor_class(args.buildinger, args.solver.task) - - if False: - pr = cProfile.Profile() - pr.enable() - rng = np.random.RandomState(0) - for i in range(1): - b, instances_perturbs = R.sample_building(rng) - inputs = b.worker(*(instances_perturbs)) - for j in range(inputs['imgs'].shape[0]): - p = os.path.join('tmp', '{:d}.png'.format(j)) - img = inputs['imgs'][j,0,:,:,:3]*1 - img = (img).astype(np.uint8) - fu.write_image(p, img) - print(inputs['imgs'].shape) - inputs = R.pre(inputs) - pr.disable() - pr.print_stats(2) - - if args.control.train: - if not gfile.Exists(args.logdir): - gfile.MakeDirs(args.logdir) - - m = utils.Foo() - m.tf_graph = tf.Graph() - - config = tf.ConfigProto() - config.device_count['GPU'] = 1 - config.gpu_options.allow_growth = True - config.gpu_options.per_process_gpu_memory_fraction = 0.8 - - with m.tf_graph.as_default(): - with tf.device(tf.train.replica_device_setter(args.solver.ps_tasks)): - m = distill.setup_to_run(m, args, is_training=True, - batch_norm_is_training=True) - - train_step_kwargs = distill.setup_train_step_kwargs_mesh( - m, R, os.path.join(args.logdir, 'train'), - rng_seed=args.solver.task, is_chief=args.solver.task==0, iters=1, - train_display_interval=args.summary.display_interval) - - final_loss = slim.learning.train( - train_op=m.train_op, - logdir=args.logdir, - master=args.solver.master, - is_chief=args.solver.task == 0, - number_of_steps=args.solver.max_steps, - train_step_fn=tf_utils.train_step_custom, - train_step_kwargs=train_step_kwargs, - global_step=m.global_step_op, - init_op=m.init_op, - init_fn=m.init_fn, - sync_optimizer=m.sync_optimizer, - saver=m.saver_op, - summary_op=None, session_config=config) - - if args.control.test: - m = utils.Foo() - m.tf_graph = tf.Graph() - checkpoint_dir = os.path.join(format(args.logdir)) - with m.tf_graph.as_default(): - m = distill.setup_to_run(m, args, is_training=False, - batch_norm_is_training=args.control.force_batchnorm_is_training_at_test) - - train_step_kwargs = distill.setup_train_step_kwargs_mesh( - m, R, os.path.join(args.logdir, args.control.test_name), - rng_seed=args.solver.task+1, is_chief=args.solver.task==0, - iters=args.summary.test_iters, train_display_interval=None) - - sv = slim.learning.supervisor.Supervisor( - graph=ops.get_default_graph(), logdir=None, init_op=m.init_op, - summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op) - - last_checkpoint = None - while True: - last_checkpoint = slim.evaluation.wait_for_new_checkpoint(checkpoint_dir, last_checkpoint) - checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1]) - start = time.time() - logging.info('Starting evaluation at %s using checkpoint %s.', - time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), - last_checkpoint) - - config = tf.ConfigProto() - config.device_count['GPU'] = 1 - config.gpu_options.allow_growth = True - config.gpu_options.per_process_gpu_memory_fraction = 0.8 - - with sv.managed_session(args.solver.master,config=config, - start_standard_services=False) as sess: - sess.run(m.init_op) - sv.saver.restore(sess, last_checkpoint) - sv.start_queue_runners(sess) - vals, _ = tf_utils.train_step_custom( - sess, None, m.global_step_op, train_step_kwargs, mode='val') - if checkpoint_iter >= args.solver.max_steps: - break - -if __name__ == '__main__': - app.run() diff --git a/research/cognitive_mapping_and_planning/scripts/script_download_init_models.sh b/research/cognitive_mapping_and_planning/scripts/script_download_init_models.sh deleted file mode 100644 index 1900bd0b03566d29dac8a8de5f4fce623be98a92..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_download_init_models.sh +++ /dev/null @@ -1,18 +0,0 @@ -# Script to download models to initialize the RGB and D models for training.We -# use ResNet-v2-50 for both modalities. - -mkdir -p data/init_models -cd data/init_models - -# RGB Models are initialized by pre-training on ImageNet. -mkdir -p resnet_v2_50 -RGB_URL="http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz" -wget $RGB_URL -tar -xf resnet_v2_50_2017_04_14.tar.gz -C resnet_v2_50 - -# Depth models are initialized by distilling the RGB model to D images using -# Cross-Modal Distillation (https://arxiv.org/abs/1507.00448). -mkdir -p distill_rgb_to_d_resnet_v2_50 -D_URL="http://download.tensorflow.org/models/cognitive_mapping_and_planning/2017_04_16/distill_rgb_to_d_resnet_v2_50.tar" -wget $D_URL -tar -xf distill_rgb_to_d_resnet_v2_50.tar -C distill_rgb_to_d_resnet_v2_50 diff --git a/research/cognitive_mapping_and_planning/scripts/script_env_vis.py b/research/cognitive_mapping_and_planning/scripts/script_env_vis.py deleted file mode 100644 index 3690ff484fea9344db6fbe20ac54731200f0c84e..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_env_vis.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A simple python function to walk in the enviornments that we have created. -PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_env_vis.py \ - --dataset_name sbpd --building_name area3 -""" -import sys -import numpy as np -import matplotlib -matplotlib.use('TkAgg') -from PIL import ImageTk, Image -import Tkinter as tk -import logging -from tensorflow.python.platform import app -from tensorflow.python.platform import flags - -import datasets.nav_env_config as nec -import datasets.nav_env as nav_env -import cv2 -from datasets import factory -import render.swiftshader_renderer as renderer - -SwiftshaderRenderer = renderer.SwiftshaderRenderer -VisualNavigationEnv = nav_env.VisualNavigationEnv - -FLAGS = flags.FLAGS -flags.DEFINE_string('dataset_name', 'sbpd', 'Name of the dataset.') -flags.DEFINE_float('fov', 60., 'Field of view') -flags.DEFINE_integer('image_size', 512, 'Size of the image.') -flags.DEFINE_string('building_name', '', 'Name of the building.') - -def get_args(): - navtask = nec.nav_env_base_config() - navtask.task_params.type = 'rng_rejection_sampling_many' - navtask.task_params.rejection_sampling_M = 2000 - navtask.task_params.min_dist = 10 - sz = FLAGS.image_size - navtask.camera_param.fov = FLAGS.fov - navtask.camera_param.height = sz - navtask.camera_param.width = sz - navtask.task_params.img_height = sz - navtask.task_params.img_width = sz - - # navtask.task_params.semantic_task.class_map_names = ['chair', 'door', 'table'] - # navtask.task_params.type = 'to_nearest_obj_acc' - - logging.info('navtask: %s', navtask) - return navtask - -def load_building(dataset_name, building_name): - dataset = factory.get_dataset(dataset_name) - - navtask = get_args() - cp = navtask.camera_param - rgb_shader, d_shader = renderer.get_shaders(cp.modalities) - r_obj = SwiftshaderRenderer() - r_obj.init_display(width=cp.width, height=cp.height, - fov=cp.fov, z_near=cp.z_near, z_far=cp.z_far, - rgb_shader=rgb_shader, d_shader=d_shader) - r_obj.clear_scene() - b = VisualNavigationEnv(robot=navtask.robot, env=navtask.env, - task_params=navtask.task_params, - building_name=building_name, flip=False, - logdir=None, building_loader=dataset, - r_obj=r_obj) - b.load_building_into_scene() - b.set_building_visibility(False) - return b - -def walk_through(b): - # init agent at a random location in the environment. - init_env_state = b.reset([np.random.RandomState(0), np.random.RandomState(0)]) - - global current_node - rng = np.random.RandomState(0) - current_node = rng.choice(b.task.nodes.shape[0]) - - root = tk.Tk() - image = b.render_nodes(b.task.nodes[[current_node],:])[0] - print(image.shape) - image = image.astype(np.uint8) - im = Image.fromarray(image) - im = ImageTk.PhotoImage(im) - panel = tk.Label(root, image=im) - - map_size = b.traversible.shape - sc = np.max(map_size)/256. - loc = np.array([[map_size[1]/2., map_size[0]/2.]]) - x_axis = np.zeros_like(loc); x_axis[:,1] = sc - y_axis = np.zeros_like(loc); y_axis[:,0] = -sc - cum_fs, cum_valid = nav_env.get_map_to_predict(loc, x_axis, y_axis, - map=b.traversible*1., - map_size=256) - cum_fs = cum_fs[0] - cum_fs = cv2.applyColorMap((cum_fs*255).astype(np.uint8), cv2.COLORMAP_JET) - im = Image.fromarray(cum_fs) - im = ImageTk.PhotoImage(im) - panel_overhead = tk.Label(root, image=im) - - def refresh(): - global current_node - image = b.render_nodes(b.task.nodes[[current_node],:])[0] - image = image.astype(np.uint8) - im = Image.fromarray(image) - im = ImageTk.PhotoImage(im) - panel.configure(image=im) - panel.image = im - - def left_key(event): - global current_node - current_node = b.take_action([current_node], [2], 1)[0][0] - refresh() - - def up_key(event): - global current_node - current_node = b.take_action([current_node], [3], 1)[0][0] - refresh() - - def right_key(event): - global current_node - current_node = b.take_action([current_node], [1], 1)[0][0] - refresh() - - def quit(event): - root.destroy() - - panel_overhead.grid(row=4, column=5, rowspan=1, columnspan=1, - sticky=tk.W+tk.E+tk.N+tk.S) - panel.bind('', left_key) - panel.bind('', up_key) - panel.bind('', right_key) - panel.bind('q', quit) - panel.focus_set() - panel.grid(row=0, column=0, rowspan=5, columnspan=5, - sticky=tk.W+tk.E+tk.N+tk.S) - root.mainloop() - -def simple_window(): - root = tk.Tk() - - image = np.zeros((128, 128, 3), dtype=np.uint8) - image[32:96, 32:96, 0] = 255 - im = Image.fromarray(image) - im = ImageTk.PhotoImage(im) - - image = np.zeros((128, 128, 3), dtype=np.uint8) - image[32:96, 32:96, 1] = 255 - im2 = Image.fromarray(image) - im2 = ImageTk.PhotoImage(im2) - - panel = tk.Label(root, image=im) - - def left_key(event): - panel.configure(image=im2) - panel.image = im2 - - def quit(event): - sys.exit() - - panel.bind('', left_key) - panel.bind('', left_key) - panel.bind('', left_key) - panel.bind('q', quit) - panel.focus_set() - panel.pack(side = "bottom", fill = "both", expand = "yes") - root.mainloop() - -def main(_): - b = load_building(FLAGS.dataset_name, FLAGS.building_name) - walk_through(b) - -if __name__ == '__main__': - app.run() diff --git a/research/cognitive_mapping_and_planning/scripts/script_nav_agent_release.py b/research/cognitive_mapping_and_planning/scripts/script_nav_agent_release.py deleted file mode 100644 index dab2819a6fcf100cb2e385e45b7aa694c4c5f033..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_nav_agent_release.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r""" Script to train and test the grid navigation agent. -Usage: - 1. Testing a model. - CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \ - PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \ - --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+bench_test \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r - - 2. Training a model (locally). - CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \ - PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \ - --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+train_train \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_ - - 3. Training a model (distributed). - # See https://www.tensorflow.org/deploy/distributed on how to setup distributed - # training. - CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 \ - PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_nav_agent_release.py \ - --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+train_train \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_ \ - --ps_tasks $num_ps --master $master_name --task $worker_id -""" - -import sys, os, numpy as np -import copy -import argparse, pprint -import time -import cProfile -import platform - - -import tensorflow as tf -from tensorflow.contrib import slim -from tensorflow.python.framework import ops -from tensorflow.contrib.framework.python.ops import variables - -import logging -from tensorflow.python.platform import gfile -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -from cfgs import config_cmp -from cfgs import config_vision_baseline -import datasets.nav_env as nav_env -import src.file_utils as fu -import src.utils as utils -import tfcode.cmp as cmp -from tfcode import tf_utils -from tfcode import vision_baseline_lstm - -FLAGS = flags.FLAGS - -flags.DEFINE_string('master', '', - 'The address of the tensorflow master') -flags.DEFINE_integer('ps_tasks', 0, 'The number of parameter servers. If the ' - 'value is 0, then the parameters are handled locally by ' - 'the worker.') -flags.DEFINE_integer('task', 0, 'The Task ID. This value is used when training ' - 'with multiple workers to identify each worker.') - -flags.DEFINE_integer('num_workers', 1, '') - -flags.DEFINE_string('config_name', '', '') - -flags.DEFINE_string('logdir', '', '') - -flags.DEFINE_integer('solver_seed', 0, '') - -flags.DEFINE_integer('delay_start_iters', 20, '') - -logging.basicConfig(level=logging.INFO) - -def main(_): - _launcher(FLAGS.config_name, FLAGS.logdir) - -def _launcher(config_name, logdir): - args = _setup_args(config_name, logdir) - - fu.makedirs(args.logdir) - - if args.control.train: - _train(args) - - if args.control.test: - _test(args) - -def get_args_for_config(config_name): - configs = config_name.split('.') - type = configs[0] - config_name = '.'.join(configs[1:]) - if type == 'cmp': - args = config_cmp.get_args_for_config(config_name) - args.setup_to_run = cmp.setup_to_run - args.setup_train_step_kwargs = cmp.setup_train_step_kwargs - - elif type == 'bl': - args = config_vision_baseline.get_args_for_config(config_name) - args.setup_to_run = vision_baseline_lstm.setup_to_run - args.setup_train_step_kwargs = vision_baseline_lstm.setup_train_step_kwargs - - else: - logging.fatal('Unknown type: {:s}'.format(type)) - return args - -def _setup_args(config_name, logdir): - args = get_args_for_config(config_name) - args.solver.num_workers = FLAGS.num_workers - args.solver.task = FLAGS.task - args.solver.ps_tasks = FLAGS.ps_tasks - args.solver.master = FLAGS.master - args.solver.seed = FLAGS.solver_seed - args.logdir = logdir - args.navtask.logdir = None - return args - -def _train(args): - container_name = "" - - R = lambda: nav_env.get_multiplexer_class(args.navtask, args.solver.task) - m = utils.Foo() - m.tf_graph = tf.Graph() - - config = tf.ConfigProto() - config.device_count['GPU'] = 1 - - with m.tf_graph.as_default(): - with tf.device(tf.train.replica_device_setter(args.solver.ps_tasks, - merge_devices=True)): - with tf.container(container_name): - m = args.setup_to_run(m, args, is_training=True, - batch_norm_is_training=True, summary_mode='train') - - train_step_kwargs = args.setup_train_step_kwargs( - m, R(), os.path.join(args.logdir, 'train'), rng_seed=args.solver.task, - is_chief=args.solver.task==0, - num_steps=args.navtask.task_params.num_steps*args.navtask.task_params.num_goals, iters=1, - train_display_interval=args.summary.display_interval, - dagger_sample_bn_false=args.arch.dagger_sample_bn_false) - - delay_start = (args.solver.task*(args.solver.task+1))/2 * FLAGS.delay_start_iters - logging.error('delaying start for task %d by %d steps.', - args.solver.task, delay_start) - - additional_args = {} - final_loss = slim.learning.train( - train_op=m.train_op, - logdir=args.logdir, - master=args.solver.master, - is_chief=args.solver.task == 0, - number_of_steps=args.solver.max_steps, - train_step_fn=tf_utils.train_step_custom_online_sampling, - train_step_kwargs=train_step_kwargs, - global_step=m.global_step_op, - init_op=m.init_op, - init_fn=m.init_fn, - sync_optimizer=m.sync_optimizer, - saver=m.saver_op, - startup_delay_steps=delay_start, - summary_op=None, session_config=config, **additional_args) - -def _test(args): - args.solver.master = '' - container_name = "" - checkpoint_dir = os.path.join(format(args.logdir)) - logging.error('Checkpoint_dir: %s', args.logdir) - - config = tf.ConfigProto(); - config.device_count['GPU'] = 1; - - m = utils.Foo() - m.tf_graph = tf.Graph() - - rng_data_seed = 0; rng_action_seed = 0; - R = lambda: nav_env.get_multiplexer_class(args.navtask, rng_data_seed) - with m.tf_graph.as_default(): - with tf.container(container_name): - m = args.setup_to_run( - m, args, is_training=False, - batch_norm_is_training=args.control.force_batchnorm_is_training_at_test, - summary_mode=args.control.test_mode) - train_step_kwargs = args.setup_train_step_kwargs( - m, R(), os.path.join(args.logdir, args.control.test_name), - rng_seed=rng_data_seed, is_chief=True, - num_steps=args.navtask.task_params.num_steps*args.navtask.task_params.num_goals, - iters=args.summary.test_iters, train_display_interval=None, - dagger_sample_bn_false=args.arch.dagger_sample_bn_false) - - saver = slim.learning.tf_saver.Saver(variables.get_variables_to_restore()) - - sv = slim.learning.supervisor.Supervisor( - graph=ops.get_default_graph(), logdir=None, init_op=m.init_op, - summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op) - - last_checkpoint = None - reported = False - while True: - last_checkpoint_ = None - while last_checkpoint_ is None: - last_checkpoint_ = slim.evaluation.wait_for_new_checkpoint( - checkpoint_dir, last_checkpoint, seconds_to_sleep=10, timeout=60) - if last_checkpoint_ is None: break - - last_checkpoint = last_checkpoint_ - checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1]) - - logging.info('Starting evaluation at %s using checkpoint %s.', - time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), - last_checkpoint) - - if (args.control.only_eval_when_done == False or - checkpoint_iter >= args.solver.max_steps): - start = time.time() - logging.info('Starting evaluation at %s using checkpoint %s.', - time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()), - last_checkpoint) - - with sv.managed_session(args.solver.master, config=config, - start_standard_services=False) as sess: - sess.run(m.init_op) - sv.saver.restore(sess, last_checkpoint) - sv.start_queue_runners(sess) - if args.control.reset_rng_seed: - train_step_kwargs['rng_data'] = [np.random.RandomState(rng_data_seed), - np.random.RandomState(rng_data_seed)] - train_step_kwargs['rng_action'] = np.random.RandomState(rng_action_seed) - vals, _ = tf_utils.train_step_custom_online_sampling( - sess, None, m.global_step_op, train_step_kwargs, - mode=args.control.test_mode) - should_stop = False - - if checkpoint_iter >= args.solver.max_steps: - should_stop = True - - if should_stop: - break - -if __name__ == '__main__': - app.run() diff --git a/research/cognitive_mapping_and_planning/scripts/script_plot_trajectory.py b/research/cognitive_mapping_and_planning/scripts/script_plot_trajectory.py deleted file mode 100644 index 08273a83b512fa3100f7df6e20d41d666b037aad..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_plot_trajectory.py +++ /dev/null @@ -1,339 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r""" -Code for plotting trajectories in the top view, and also plot first person views -from saved trajectories. Does not run the network but only loads the mesh data -to plot the view points. - CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 - PYTHONPATH='.' PYOPENGL_PLATFORM=egl python scripts/script_plot_trajectory.py \ - --first_person --num_steps 40 \ - --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r \ - --imset test --alsologtostderr --base_dir output --out_dir vis - -""" -import os, sys, numpy as np, copy -import matplotlib -matplotlib.use("Agg") -import matplotlib.pyplot as plt -import matplotlib.animation as animation -from matplotlib.gridspec import GridSpec - -import tensorflow as tf -from tensorflow.contrib import slim -import cv2 -import logging -from tensorflow.python.platform import gfile -from tensorflow.python.platform import app -from tensorflow.python.platform import flags - -from datasets import nav_env -import scripts.script_nav_agent_release as sna -import src.file_utils as fu -from src import graph_utils -from src import utils -FLAGS = flags.FLAGS - -flags.DEFINE_string('out_dir', 'vis', 'Directory where to store the output') -flags.DEFINE_string('type', '', 'Optional type.') -flags.DEFINE_bool('first_person', False, 'Visualize the first person view.') -flags.DEFINE_bool('top_view', False, 'Visualize the trajectory in the top view.') -flags.DEFINE_integer('num_steps', 40, 'Number of steps to run the model for.') -flags.DEFINE_string('imset', 'test', '') -flags.DEFINE_string('base_dir', 'output', 'Cache directory.') - -def _get_suffix_str(): - return '' - - -def _load_trajectory(): - base_dir = FLAGS.base_dir - config_name = FLAGS.config_name+_get_suffix_str() - - dir_name = os.path.join(base_dir, FLAGS.type, config_name) - logging.info('Waiting for snapshot in directory %s.', dir_name) - last_checkpoint = slim.evaluation.wait_for_new_checkpoint(dir_name, None) - checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1]) - - # Load the distances. - a = utils.load_variables(os.path.join(dir_name, 'bench_on_'+FLAGS.imset, - 'all_locs_at_t_{:d}.pkl'.format(checkpoint_iter))) - return a - -def _compute_hardness(): - # Load the stanford data to compute the hardness. - if FLAGS.type == '': - args = sna.get_args_for_config(FLAGS.config_name+'+bench_'+FLAGS.imset) - else: - args = sna.get_args_for_config(FLAGS.type+'.'+FLAGS.config_name+'+bench_'+FLAGS.imset) - - args.navtask.logdir = None - R = lambda: nav_env.get_multiplexer_class(args.navtask, 0) - R = R() - - rng_data = [np.random.RandomState(0), np.random.RandomState(0)] - - # Sample a room. - h_dists = [] - gt_dists = [] - for i in range(250): - e = R.sample_env(rng_data) - nodes = e.task.nodes - - # Initialize the agent. - init_env_state = e.reset(rng_data) - - gt_dist_to_goal = [e.episode.dist_to_goal[0][j][s] - for j, s in enumerate(e.episode.start_node_ids)] - - for j in range(args.navtask.task_params.batch_size): - start_node_id = e.episode.start_node_ids[j] - end_node_id =e.episode.goal_node_ids[0][j] - h_dist = graph_utils.heuristic_fn_vec( - nodes[[start_node_id],:], nodes[[end_node_id], :], - n_ori=args.navtask.task_params.n_ori, - step_size=args.navtask.task_params.step_size)[0][0] - gt_dist = e.episode.dist_to_goal[0][j][start_node_id] - h_dists.append(h_dist) - gt_dists.append(gt_dist) - - h_dists = np.array(h_dists) - gt_dists = np.array(gt_dists) - e = R.sample_env([np.random.RandomState(0), np.random.RandomState(0)]) - input = e.get_common_data() - orig_maps = input['orig_maps'][0,0,:,:,0] - return h_dists, gt_dists, orig_maps - -def plot_trajectory_first_person(dt, orig_maps, out_dir): - out_dir = os.path.join(out_dir, FLAGS.config_name+_get_suffix_str(), - FLAGS.imset) - fu.makedirs(out_dir) - - # Load the model so that we can render. - plt.set_cmap('gray') - samples_per_action = 8; wait_at_action = 0; - - Writer = animation.writers['mencoder'] - writer = Writer(fps=3*(samples_per_action+wait_at_action), - metadata=dict(artist='anonymous'), bitrate=1800) - - args = sna.get_args_for_config(FLAGS.config_name + '+bench_'+FLAGS.imset) - args.navtask.logdir = None - navtask_ = copy.deepcopy(args.navtask) - navtask_.camera_param.modalities = ['rgb'] - navtask_.task_params.modalities = ['rgb'] - sz = 512 - navtask_.camera_param.height = sz - navtask_.camera_param.width = sz - navtask_.task_params.img_height = sz - navtask_.task_params.img_width = sz - R = lambda: nav_env.get_multiplexer_class(navtask_, 0) - R = R() - b = R.buildings[0] - - f = [0 for _ in range(wait_at_action)] + \ - [float(_)/samples_per_action for _ in range(samples_per_action)]; - - # Generate things for it to render. - inds_to_do = [] - inds_to_do += [1, 4, 10] #1291, 1268, 1273, 1289, 1302, 1426, 1413, 1449, 1399, 1390] - - for i in inds_to_do: - fig = plt.figure(figsize=(10,8)) - gs = GridSpec(3,4) - gs.update(wspace=0.05, hspace=0.05, left=0.0, top=0.97, right=1.0, bottom=0.) - ax = fig.add_subplot(gs[:,:-1]) - ax1 = fig.add_subplot(gs[0,-1]) - ax2 = fig.add_subplot(gs[1,-1]) - ax3 = fig.add_subplot(gs[2,-1]) - axes = [ax, ax1, ax2, ax3] - # ax = fig.add_subplot(gs[:,:]) - # axes = [ax] - for ax in axes: - ax.set_axis_off() - - node_ids = dt['all_node_ids'][i, :, 0]*1 - # Prune so that last node is not repeated more than 3 times? - if np.all(node_ids[-4:] == node_ids[-1]): - while node_ids[-4] == node_ids[-1]: - node_ids = node_ids[:-1] - num_steps = np.minimum(FLAGS.num_steps, len(node_ids)) - - xyt = b.to_actual_xyt_vec(b.task.nodes[node_ids]) - xyt_diff = xyt[1:,:] - xyt[:-1:,:] - xyt_diff[:,2] = np.mod(xyt_diff[:,2], 4) - ind = np.where(xyt_diff[:,2] == 3)[0] - xyt_diff[ind, 2] = -1 - xyt_diff = np.expand_dims(xyt_diff, axis=1) - to_cat = [xyt_diff*_ for _ in f] - perturbs_all = np.concatenate(to_cat, axis=1) - perturbs_all = np.concatenate([perturbs_all, np.zeros_like(perturbs_all[:,:,:1])], axis=2) - node_ids_all = np.expand_dims(node_ids, axis=1)*1 - node_ids_all = np.concatenate([node_ids_all for _ in f], axis=1) - node_ids_all = np.reshape(node_ids_all[:-1,:], -1) - perturbs_all = np.reshape(perturbs_all, [-1, 4]) - imgs = b.render_nodes(b.task.nodes[node_ids_all,:], perturb=perturbs_all) - - # Get action at each node. - actions = [] - _, action_to_nodes = b.get_feasible_actions(node_ids) - for j in range(num_steps-1): - action_to_node = action_to_nodes[j] - node_to_action = dict(zip(action_to_node.values(), action_to_node.keys())) - actions.append(node_to_action[node_ids[j+1]]) - - def init_fn(): - return fig, - gt_dist_to_goal = [] - - # Render trajectories. - def worker(j): - # Plot the image. - step_number = j/(samples_per_action + wait_at_action) - img = imgs[j]; ax = axes[0]; ax.clear(); ax.set_axis_off(); - img = img.astype(np.uint8); ax.imshow(img); - tt = ax.set_title( - "First Person View\n" + - "Top corners show diagnostics (distance, agents' action) not input to agent.", - fontsize=12) - plt.setp(tt, color='white') - - # Distance to goal. - t = 'Dist to Goal:\n{:2d} steps'.format(int(dt['all_d_at_t'][i, step_number])) - t = ax.text(0.01, 0.99, t, - horizontalalignment='left', - verticalalignment='top', - fontsize=20, color='red', - transform=ax.transAxes, alpha=1.0) - t.set_bbox(dict(color='white', alpha=0.85, pad=-0.1)) - - # Action to take. - action_latex = ['$\odot$ ', '$\curvearrowright$ ', '$\curvearrowleft$ ', r'$\Uparrow$ '] - t = ax.text(0.99, 0.99, action_latex[actions[step_number]], - horizontalalignment='right', - verticalalignment='top', - fontsize=40, color='green', - transform=ax.transAxes, alpha=1.0) - t.set_bbox(dict(color='white', alpha=0.85, pad=-0.1)) - - - # Plot the map top view. - ax = axes[-1] - if j == 0: - # Plot the map - locs = dt['all_locs'][i,:num_steps,:] - goal_loc = dt['all_goal_locs'][i,:,:] - xymin = np.minimum(np.min(goal_loc, axis=0), np.min(locs, axis=0)) - xymax = np.maximum(np.max(goal_loc, axis=0), np.max(locs, axis=0)) - xy1 = (xymax+xymin)/2. - 0.7*np.maximum(np.max(xymax-xymin), 24) - xy2 = (xymax+xymin)/2. + 0.7*np.maximum(np.max(xymax-xymin), 24) - - ax.set_axis_on() - ax.patch.set_facecolor((0.333, 0.333, 0.333)) - ax.set_xticks([]); ax.set_yticks([]); - ax.imshow(orig_maps, origin='lower', vmin=-1.0, vmax=2.0) - ax.plot(goal_loc[:,0], goal_loc[:,1], 'g*', markersize=12) - - locs = dt['all_locs'][i,:1,:] - ax.plot(locs[:,0], locs[:,1], 'b.', markersize=12) - - ax.set_xlim([xy1[0], xy2[0]]) - ax.set_ylim([xy1[1], xy2[1]]) - - locs = dt['all_locs'][i,step_number,:] - locs = np.expand_dims(locs, axis=0) - ax.plot(locs[:,0], locs[:,1], 'r.', alpha=1.0, linewidth=0, markersize=4) - tt = ax.set_title('Trajectory in topview', fontsize=14) - plt.setp(tt, color='white') - return fig, - - line_ani = animation.FuncAnimation(fig, worker, - (num_steps-1)*(wait_at_action+samples_per_action), - interval=500, blit=True, init_func=init_fn) - tmp_file_name = 'tmp.mp4' - line_ani.save(tmp_file_name, writer=writer, savefig_kwargs={'facecolor':'black'}) - out_file_name = os.path.join(out_dir, 'vis_{:04d}.mp4'.format(i)) - print(out_file_name) - - if fu.exists(out_file_name): - gfile.Remove(out_file_name) - gfile.Copy(tmp_file_name, out_file_name) - gfile.Remove(tmp_file_name) - plt.close(fig) - -def plot_trajectory(dt, hardness, orig_maps, out_dir): - out_dir = os.path.join(out_dir, FLAGS.config_name+_get_suffix_str(), - FLAGS.imset) - fu.makedirs(out_dir) - out_file = os.path.join(out_dir, 'all_locs_at_t.pkl') - dt['hardness'] = hardness - utils.save_variables(out_file, dt.values(), dt.keys(), overwrite=True) - - #Plot trajectories onto the maps - plt.set_cmap('gray') - for i in range(4000): - goal_loc = dt['all_goal_locs'][i, :, :] - locs = np.concatenate((dt['all_locs'][i,:,:], - dt['all_locs'][i,:,:]), axis=0) - xymin = np.minimum(np.min(goal_loc, axis=0), np.min(locs, axis=0)) - xymax = np.maximum(np.max(goal_loc, axis=0), np.max(locs, axis=0)) - xy1 = (xymax+xymin)/2. - 1.*np.maximum(np.max(xymax-xymin), 24) - xy2 = (xymax+xymin)/2. + 1.*np.maximum(np.max(xymax-xymin), 24) - - fig, ax = utils.tight_imshow_figure(plt, figsize=(6,6)) - ax.set_axis_on() - ax.patch.set_facecolor((0.333, 0.333, 0.333)) - ax.set_xticks([]) - ax.set_yticks([]) - - all_locs = dt['all_locs'][i,:,:]*1 - uniq = np.where(np.any(all_locs[1:,:] != all_locs[:-1,:], axis=1))[0]+1 - uniq = np.sort(uniq).tolist() - uniq.insert(0,0) - uniq = np.array(uniq) - all_locs = all_locs[uniq, :] - - ax.plot(dt['all_locs'][i, 0, 0], - dt['all_locs'][i, 0, 1], 'b.', markersize=24) - ax.plot(dt['all_goal_locs'][i, 0, 0], - dt['all_goal_locs'][i, 0, 1], 'g*', markersize=19) - ax.plot(all_locs[:,0], all_locs[:,1], 'r', alpha=0.4, linewidth=2) - ax.scatter(all_locs[:,0], all_locs[:,1], - c=5+np.arange(all_locs.shape[0])*1./all_locs.shape[0], - cmap='Reds', s=30, linewidth=0) - ax.imshow(orig_maps, origin='lower', vmin=-1.0, vmax=2.0, aspect='equal') - ax.set_xlim([xy1[0], xy2[0]]) - ax.set_ylim([xy1[1], xy2[1]]) - - file_name = os.path.join(out_dir, 'trajectory_{:04d}.png'.format(i)) - print(file_name) - with fu.fopen(file_name, 'w') as f: - plt.savefig(f) - plt.close(fig) - - -def main(_): - a = _load_trajectory() - h_dists, gt_dists, orig_maps = _compute_hardness() - hardness = 1.-h_dists*1./ gt_dists - - if FLAGS.top_view: - plot_trajectory(a, hardness, orig_maps, out_dir=FLAGS.out_dir) - - if FLAGS.first_person: - plot_trajectory_first_person(a, orig_maps, out_dir=FLAGS.out_dir) - -if __name__ == '__main__': - app.run() diff --git a/research/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.py b/research/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.py deleted file mode 100644 index 58f32d121acf4c638625079907b02161e808af68..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import os -import glob -import numpy as np -import logging -import cPickle -from datasets import nav_env -from datasets import factory -from src import utils -from src import map_utils as mu - -logging.basicConfig(level=logging.INFO) -DATA_DIR = 'data/stanford_building_parser_dataset_raw/' - -mkdir_if_missing = utils.mkdir_if_missing -save_variables = utils.save_variables - -def _get_semantic_maps(building_name, transform, map_, flip, cats): - rooms = get_room_in_building(building_name) - maps = [] - for cat in cats: - maps.append(np.zeros((map_.size[1], map_.size[0]))) - - for r in rooms: - room = load_room(building_name, r, category_list=cats) - classes = room['class_id'] - for i, cat in enumerate(cats): - c_ind = cats.index(cat) - ind = [_ for _, c in enumerate(classes) if c == c_ind] - if len(ind) > 0: - vs = [room['vertexs'][x]*1 for x in ind] - vs = np.concatenate(vs, axis=0) - if transform: - vs = np.array([vs[:,1], vs[:,0], vs[:,2]]).T - vs[:,0] = -vs[:,0] - vs[:,1] += 4.20 - vs[:,0] += 6.20 - vs = vs*100. - if flip: - vs[:,1] = -vs[:,1] - maps[i] = maps[i] + \ - mu._project_to_map(map_, vs, ignore_points_outside_map=True) - return maps - -def _map_building_name(building_name): - b = int(building_name.split('_')[0][4]) - out_name = 'Area_{:d}'.format(b) - if b == 5: - if int(building_name.split('_')[0][5]) == 1: - transform = True - else: - transform = False - else: - transform = False - return out_name, transform - -def get_categories(): - cats = ['beam', 'board', 'bookcase', 'ceiling', 'chair', 'clutter', 'column', - 'door', 'floor', 'sofa', 'table', 'wall', 'window'] - return cats - -def _write_map_files(b_in, b_out, transform): - cats = get_categories() - - env = utils.Foo(padding=10, resolution=5, num_point_threshold=2, - valid_min=-10, valid_max=200, n_samples_per_face=200) - robot = utils.Foo(radius=15, base=10, height=140, sensor_height=120, - camera_elevation_degree=-15) - - building_loader = factory.get_dataset('sbpd') - for flip in [False, True]: - b = nav_env.Building(b_out, robot, env, flip=flip, - building_loader=building_loader) - logging.info("building_in: %s, building_out: %s, transform: %d", b_in, - b_out, transform) - maps = _get_semantic_maps(b_in, transform, b.map, flip, cats) - maps = np.transpose(np.array(maps), axes=[1,2,0]) - - # Load file from the cache. - file_name = '{:s}_{:d}_{:d}_{:d}_{:d}_{:d}_{:d}.pkl' - file_name = file_name.format(b.building_name, b.map.size[0], b.map.size[1], - b.map.origin[0], b.map.origin[1], - b.map.resolution, flip) - out_file = os.path.join(DATA_DIR, 'processing', 'class-maps', file_name) - logging.info('Writing semantic maps to %s.', out_file) - save_variables(out_file, [maps, cats], ['maps', 'cats'], overwrite=True) - -def _transform_area5b(room_dimension): - for a in room_dimension.keys(): - r = room_dimension[a]*1 - r[[0,1,3,4]] = r[[1,0,4,3]] - r[[0,3]] = -r[[3,0]] - r[[1,4]] += 4.20 - r[[0,3]] += 6.20 - room_dimension[a] = r - return room_dimension - -def collect_room(building_name, room_name): - room_dir = os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2', building_name, - room_name, 'Annotations') - files = glob.glob1(room_dir, '*.txt') - files = sorted(files, key=lambda s: s.lower()) - vertexs = []; colors = []; - for f in files: - file_name = os.path.join(room_dir, f) - logging.info(' %s', file_name) - a = np.loadtxt(file_name) - vertex = a[:,:3]*1. - color = a[:,3:]*1 - color = color.astype(np.uint8) - vertexs.append(vertex) - colors.append(color) - files = [f.split('.')[0] for f in files] - out = {'vertexs': vertexs, 'colors': colors, 'names': files} - return out - -def load_room(building_name, room_name, category_list=None): - room = collect_room(building_name, room_name) - room['building_name'] = building_name - room['room_name'] = room_name - instance_id = range(len(room['names'])) - room['instance_id'] = instance_id - if category_list is not None: - name = [r.split('_')[0] for r in room['names']] - class_id = [] - for n in name: - if n in category_list: - class_id.append(category_list.index(n)) - else: - class_id.append(len(category_list)) - room['class_id'] = class_id - room['category_list'] = category_list - return room - -def get_room_in_building(building_name): - building_dir = os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2', building_name) - rn = os.listdir(building_dir) - rn = [x for x in rn if os.path.isdir(os.path.join(building_dir, x))] - rn = sorted(rn, key=lambda s: s.lower()) - return rn - -def write_room_dimensions(b_in, b_out, transform): - rooms = get_room_in_building(b_in) - room_dimension = {} - for r in rooms: - room = load_room(b_in, r, category_list=None) - vertex = np.concatenate(room['vertexs'], axis=0) - room_dimension[r] = np.concatenate((np.min(vertex, axis=0), np.max(vertex, axis=0)), axis=0) - if transform == 1: - room_dimension = _transform_area5b(room_dimension) - - out_file = os.path.join(DATA_DIR, 'processing', 'room-dimension', b_out+'.pkl') - save_variables(out_file, [room_dimension], ['room_dimension'], overwrite=True) - -def write_room_dimensions_all(I): - mkdir_if_missing(os.path.join(DATA_DIR, 'processing', 'room-dimension')) - bs_in = ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_5', 'Area_6'] - bs_out = ['area1', 'area2', 'area3', 'area4', 'area5a', 'area5b', 'area6'] - transforms = [0, 0, 0, 0, 0, 1, 0] - - for i in I: - b_in = bs_in[i] - b_out = bs_out[i] - t = transforms[i] - write_room_dimensions(b_in, b_out, t) - -def write_class_maps_all(I): - mkdir_if_missing(os.path.join(DATA_DIR, 'processing', 'class-maps')) - bs_in = ['Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_5', 'Area_6'] - bs_out = ['area1', 'area2', 'area3', 'area4', 'area5a', 'area5b', 'area6'] - transforms = [0, 0, 0, 0, 0, 1, 0] - - for i in I: - b_in = bs_in[i] - b_out = bs_out[i] - t = transforms[i] - _write_map_files(b_in, b_out, t) - - -if __name__ == '__main__': - write_room_dimensions_all([0, 2, 3, 4, 5, 6]) - write_class_maps_all([0, 2, 3, 4, 5, 6]) - diff --git a/research/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.sh b/research/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.sh deleted file mode 100644 index 1384fabe69259ccc514a14d62aee358d1909bffb..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_preprocess_annoations_S3DIS.sh +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -cd data/stanford_building_parser_dataset_raw -unzip Stanford3dDataset_v1.2.zip -cd ../../ -PYOPENGL_PLATFORM=egl PYTHONPATH='.' python scripts/script_preprocess_annoations_S3DIS.py - -mv data/stanford_building_parser_dataset_raw/processing/room-dimension data/stanford_building_parser_dataset/. -mv data/stanford_building_parser_dataset_raw/processing/class-maps data/stanford_building_parser_dataset/. - -echo "You may now delete data/stanford_building_parser_dataset_raw if needed." diff --git a/research/cognitive_mapping_and_planning/scripts/script_preprocess_meshes_S3DIS.sh b/research/cognitive_mapping_and_planning/scripts/script_preprocess_meshes_S3DIS.sh deleted file mode 100644 index 557a4dde611d42e71d71dd1589abf96f55e6eec6..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_preprocess_meshes_S3DIS.sh +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -mkdir -p data/stanford_building_parser_dataset -mkdir -p data/stanford_building_parser_dataset/mesh -cd data/stanford_building_parser_dataset_raw - -# Untar the files and extract the meshes. -for t in "1" "3" "4" "5a" "5b" "6"; do - tar -xf area_"$t"_noXYZ.tar area_$t/3d/rgb_textures - mv area_$t/3d/rgb_textures ../stanford_building_parser_dataset/mesh/area$t - rmdir area_$t/3d - rmdir area_$t -done - -cd ../../ - -# Preprocess meshes to remove the group and chunk information. -cd data/stanford_building_parser_dataset/ -for t in "1" "3" "4" "5a" "5b" "6"; do - obj_name=`ls mesh/area$t/*.obj` - cp $obj_name "$obj_name".bck - cat $obj_name.bck | grep -v '^g' | grep -v '^o' > $obj_name -done -cd ../../ diff --git a/research/cognitive_mapping_and_planning/scripts/script_test_pretrained_models.sh b/research/cognitive_mapping_and_planning/scripts/script_test_pretrained_models.sh deleted file mode 100644 index a4299fff5346afb53783a61de5c3e84f102a6304..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/scripts/script_test_pretrained_models.sh +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Test CMP models. -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r+bench_test \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_rgb_r2r+bench_test \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_rgb_r2r - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_ST+bench_test \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_d_ST - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_rgb_ST+bench_test \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_rgb_ST - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80+bench_test \ - --logdir output/cmp.lmap_Msc.clip5.sbpd_d_r2r_h0_64_80 - -# Test LSTM baseline models. -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_r2r+bench_test \ - --logdir output/bl.v2.noclip.sbpd_d_r2r - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_rgb_r2r+bench_test \ - --logdir output/bl.v2.noclip.sbpd_rgb_r2r - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_ST+bench_test \ - --logdir output/bl.v2.noclip.sbpd_d_ST - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_rgb_ST+bench_test \ - --logdir output/bl.v2.noclip.sbpd_rgb_ST - -CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ - python scripts/script_nav_agent_release.py --config_name bl.v2.noclip.sbpd_d_r2r_h0_64_80+bench_test \ - --logdir output/bl.v2.noclip.sbpd_d_r2r_h0_64_80 - -# Visualize test trajectories in top view. -# CUDA_VISIBLE_DEVICES=0 LD_LIBRARY_PATH=/opt/cuda-8.0/lib64:/opt/cudnnv51/lib64 PYTHONPATH='.' PYOPENGL_PLATFORM=egl \ -# python scripts/script_plot_trajectory.py \ -# --first_person --num_steps 40 \ -# --config_name cmp.lmap_Msc.clip5.sbpd_d_r2r \ -# --imset test --alsologtostderr diff --git a/research/cognitive_mapping_and_planning/src/__init__.py b/research/cognitive_mapping_and_planning/src/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/cognitive_mapping_and_planning/src/depth_utils.py b/research/cognitive_mapping_and_planning/src/depth_utils.py deleted file mode 100644 index 35f14fc7c37fffb2a408decede11e378867a2834..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/src/depth_utils.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for processing depth images. -""" -import numpy as np -import src.rotation_utils as ru -import src.utils as utils - -def get_camera_matrix(width, height, fov): - """Returns a camera matrix from image size and fov.""" - xc = (width-1.) / 2. - zc = (height-1.) / 2. - f = (width / 2.) / np.tan(np.deg2rad(fov / 2.)) - camera_matrix = utils.Foo(xc=xc, zc=zc, f=f) - return camera_matrix - -def get_point_cloud_from_z(Y, camera_matrix): - """Projects the depth image Y into a 3D point cloud. - Inputs: - Y is ...xHxW - camera_matrix - Outputs: - X is positive going right - Y is positive into the image - Z is positive up in the image - XYZ is ...xHxWx3 - """ - x, z = np.meshgrid(np.arange(Y.shape[-1]), - np.arange(Y.shape[-2]-1, -1, -1)) - for i in range(Y.ndim-2): - x = np.expand_dims(x, axis=0) - z = np.expand_dims(z, axis=0) - X = (x-camera_matrix.xc) * Y / camera_matrix.f - Z = (z-camera_matrix.zc) * Y / camera_matrix.f - XYZ = np.concatenate((X[...,np.newaxis], Y[...,np.newaxis], - Z[...,np.newaxis]), axis=X.ndim) - return XYZ - -def make_geocentric(XYZ, sensor_height, camera_elevation_degree): - """Transforms the point cloud into geocentric coordinate frame. - Input: - XYZ : ...x3 - sensor_height : height of the sensor - camera_elevation_degree : camera elevation to rectify. - Output: - XYZ : ...x3 - """ - R = ru.get_r_matrix([1.,0.,0.], angle=np.deg2rad(camera_elevation_degree)) - XYZ = np.matmul(XYZ.reshape(-1,3), R.T).reshape(XYZ.shape) - XYZ[...,2] = XYZ[...,2] + sensor_height - return XYZ - -def bin_points(XYZ_cms, map_size, z_bins, xy_resolution): - """Bins points into xy-z bins - XYZ_cms is ... x H x W x3 - Outputs is ... x map_size x map_size x (len(z_bins)+1) - """ - sh = XYZ_cms.shape - XYZ_cms = XYZ_cms.reshape([-1, sh[-3], sh[-2], sh[-1]]) - n_z_bins = len(z_bins)+1 - map_center = (map_size-1.)/2. - counts = [] - isvalids = [] - for XYZ_cm in XYZ_cms: - isnotnan = np.logical_not(np.isnan(XYZ_cm[:,:,0])) - X_bin = np.round(XYZ_cm[:,:,0] / xy_resolution + map_center).astype(np.int32) - Y_bin = np.round(XYZ_cm[:,:,1] / xy_resolution + map_center).astype(np.int32) - Z_bin = np.digitize(XYZ_cm[:,:,2], bins=z_bins).astype(np.int32) - - isvalid = np.array([X_bin >= 0, X_bin < map_size, Y_bin >= 0, Y_bin < map_size, - Z_bin >= 0, Z_bin < n_z_bins, isnotnan]) - isvalid = np.all(isvalid, axis=0) - - ind = (Y_bin * map_size + X_bin) * n_z_bins + Z_bin - ind[np.logical_not(isvalid)] = 0 - count = np.bincount(ind.ravel(), isvalid.ravel().astype(np.int32), - minlength=map_size*map_size*n_z_bins) - count = np.reshape(count, [map_size, map_size, n_z_bins]) - counts.append(count) - isvalids.append(isvalid) - counts = np.array(counts).reshape(list(sh[:-3]) + [map_size, map_size, n_z_bins]) - isvalids = np.array(isvalids).reshape(list(sh[:-3]) + [sh[-3], sh[-2], 1]) - return counts, isvalids diff --git a/research/cognitive_mapping_and_planning/src/file_utils.py b/research/cognitive_mapping_and_planning/src/file_utils.py deleted file mode 100644 index b386236ca6e04c9fa1e452b6ad3e70c6ab9bb88a..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/src/file_utils.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for manipulating files. -""" -import os -import numpy as np -import PIL -from tensorflow.python.platform import gfile -import cv2 - -exists = lambda path: gfile.Exists(path) -fopen = lambda path, mode: gfile.Open(path, mode) -makedirs = lambda path: gfile.MakeDirs(path) -listdir = lambda path: gfile.ListDir(path) -copyfile = lambda a, b, o: gfile.Copy(a,b,o) - -def write_image(image_path, rgb): - ext = os.path.splitext(image_path)[1] - with gfile.GFile(image_path, 'w') as f: - img_str = cv2.imencode(ext, rgb[:,:,::-1])[1].tostring() - f.write(img_str) - -def read_image(image_path, type='rgb'): - with fopen(image_path, 'r') as f: - I = PIL.Image.open(f) - II = np.array(I) - if type == 'rgb': - II = II[:,:,:3] - return II diff --git a/research/cognitive_mapping_and_planning/src/graph_utils.py b/research/cognitive_mapping_and_planning/src/graph_utils.py deleted file mode 100644 index cd99fd22a2f630438f31eecd7fbfece2c6008ead..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/src/graph_utils.py +++ /dev/null @@ -1,552 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Various function to manipulate graphs for computing distances. -""" -import skimage.morphology -import numpy as np -import networkx as nx -import itertools -import logging -from datasets.nav_env import get_path_ids -import graph_tool as gt -import graph_tool.topology -import graph_tool.generation -import src.utils as utils - -# Compute shortest path from all nodes to or from all source nodes -def get_distance_node_list(gtG, source_nodes, direction, weights=None): - gtG_ = gt.Graph(gtG) - v = gtG_.add_vertex() - - if weights is not None: - weights = gtG_.edge_properties[weights] - - for s in source_nodes: - e = gtG_.add_edge(s, int(v)) - if weights is not None: - weights[e] = 0. - - if direction == 'to': - dist = gt.topology.shortest_distance( - gt.GraphView(gtG_, reversed=True), source=gtG_.vertex(int(v)), - target=None, weights=weights) - elif direction == 'from': - dist = gt.topology.shortest_distance( - gt.GraphView(gtG_, reversed=False), source=gtG_.vertex(int(v)), - target=None, weights=weights) - dist = np.array(dist.get_array()) - dist = dist[:-1] - if weights is None: - dist = dist-1 - return dist - -# Functions for semantically labelling nodes in the traversal graph. -def generate_lattice(sz_x, sz_y): - """Generates a lattice with sz_x vertices along x and sz_y vertices along y - direction Each of these vertices is step_size distance apart. Origin is at - (0,0). """ - g = gt.generation.lattice([sz_x, sz_y]) - x, y = np.meshgrid(np.arange(sz_x), np.arange(sz_y)) - x = np.reshape(x, [-1,1]); y = np.reshape(y, [-1,1]); - nodes = np.concatenate((x,y), axis=1) - return g, nodes - -def add_diagonal_edges(g, nodes, sz_x, sz_y, edge_len): - offset = [sz_x+1, sz_x-1] - for o in offset: - s = np.arange(nodes.shape[0]-o-1) - t = s + o - ind = np.all(np.abs(nodes[s,:] - nodes[t,:]) == np.array([[1,1]]), axis=1) - s = s[ind][:,np.newaxis] - t = t[ind][:,np.newaxis] - st = np.concatenate((s,t), axis=1) - for i in range(st.shape[0]): - e = g.add_edge(st[i,0], st[i,1], add_missing=False) - g.ep['wts'][e] = edge_len - -def convert_traversible_to_graph(traversible, ff_cost=1., fo_cost=1., - oo_cost=1., connectivity=4): - assert(connectivity == 4 or connectivity == 8) - - sz_x = traversible.shape[1] - sz_y = traversible.shape[0] - g, nodes = generate_lattice(sz_x, sz_y) - - # Assign costs. - edge_wts = g.new_edge_property('float') - g.edge_properties['wts'] = edge_wts - wts = np.ones(g.num_edges(), dtype=np.float32) - edge_wts.get_array()[:] = wts - - if connectivity == 8: - add_diagonal_edges(g, nodes, sz_x, sz_y, np.sqrt(2.)) - - se = np.array([[int(e.source()), int(e.target())] for e in g.edges()]) - s_xy = nodes[se[:,0]] - t_xy = nodes[se[:,1]] - s_t = np.ravel_multi_index((s_xy[:,1], s_xy[:,0]), traversible.shape) - t_t = np.ravel_multi_index((t_xy[:,1], t_xy[:,0]), traversible.shape) - s_t = traversible.ravel()[s_t] - t_t = traversible.ravel()[t_t] - - wts = np.zeros(g.num_edges(), dtype=np.float32) - wts[np.logical_and(s_t == True, t_t == True)] = ff_cost - wts[np.logical_and(s_t == False, t_t == False)] = oo_cost - wts[np.logical_xor(s_t, t_t)] = fo_cost - - edge_wts = g.edge_properties['wts'] - for i, e in enumerate(g.edges()): - edge_wts[e] = edge_wts[e] * wts[i] - # d = edge_wts.get_array()*1. - # edge_wts.get_array()[:] = d*wts - return g, nodes - -def label_nodes_with_class(nodes_xyt, class_maps, pix): - """ - Returns: - class_maps__: one-hot class_map for each class. - node_class_label: one-hot class_map for each class, nodes_xyt.shape[0] x n_classes - """ - # Assign each pixel to a node. - selem = skimage.morphology.disk(pix) - class_maps_ = class_maps*1. - for i in range(class_maps.shape[2]): - class_maps_[:,:,i] = skimage.morphology.dilation(class_maps[:,:,i]*1, selem) - class_maps__ = np.argmax(class_maps_, axis=2) - class_maps__[np.max(class_maps_, axis=2) == 0] = -1 - - # For each node pick out the label from this class map. - x = np.round(nodes_xyt[:,[0]]).astype(np.int32) - y = np.round(nodes_xyt[:,[1]]).astype(np.int32) - ind = np.ravel_multi_index((y,x), class_maps__.shape) - node_class_label = class_maps__.ravel()[ind][:,0] - - # Convert to one hot versions. - class_maps_one_hot = np.zeros(class_maps.shape, dtype=np.bool) - node_class_label_one_hot = np.zeros((node_class_label.shape[0], class_maps.shape[2]), dtype=np.bool) - for i in range(class_maps.shape[2]): - class_maps_one_hot[:,:,i] = class_maps__ == i - node_class_label_one_hot[:,i] = node_class_label == i - return class_maps_one_hot, node_class_label_one_hot - -def label_nodes_with_class_geodesic(nodes_xyt, class_maps, pix, traversible, - ff_cost=1., fo_cost=1., oo_cost=1., - connectivity=4): - """Labels nodes in nodes_xyt with class labels using geodesic distance as - defined by traversible from class_maps. - Inputs: - nodes_xyt - class_maps: counts for each class. - pix: distance threshold to consider close enough to target. - traversible: binary map of whether traversible or not. - Output: - labels: For each node in nodes_xyt returns a label of the class or -1 is - unlabelled. - """ - g, nodes = convert_traversible_to_graph(traversible, ff_cost=ff_cost, - fo_cost=fo_cost, oo_cost=oo_cost, - connectivity=connectivity) - - class_dist = np.zeros_like(class_maps*1.) - n_classes = class_maps.shape[2] - if False: - # Assign each pixel to a class based on number of points. - selem = skimage.morphology.disk(pix) - class_maps_ = class_maps*1. - class_maps__ = np.argmax(class_maps_, axis=2) - class_maps__[np.max(class_maps_, axis=2) == 0] = -1 - - # Label nodes with classes. - for i in range(n_classes): - # class_node_ids = np.where(class_maps__.ravel() == i)[0] - class_node_ids = np.where(class_maps[:,:,i].ravel() > 0)[0] - dist_i = get_distance_node_list(g, class_node_ids, 'to', weights='wts') - class_dist[:,:,i] = np.reshape(dist_i, class_dist[:,:,i].shape) - class_map_geodesic = (class_dist <= pix) - class_map_geodesic = np.reshape(class_map_geodesic, [-1, n_classes]) - - # For each node pick out the label from this class map. - x = np.round(nodes_xyt[:,[0]]).astype(np.int32) - y = np.round(nodes_xyt[:,[1]]).astype(np.int32) - ind = np.ravel_multi_index((y,x), class_dist[:,:,0].shape) - node_class_label = class_map_geodesic[ind[:,0],:] - class_map_geodesic = class_dist <= pix - return class_map_geodesic, node_class_label - -def _get_next_nodes_undirected(n, sc, n_ori): - nodes_to_add = [] - nodes_to_validate = [] - (p, q, r) = n - nodes_to_add.append((n, (p, q, r), 0)) - if n_ori == 4: - for _ in [1, 2, 3, 4]: - if _ == 1: - v = (p - sc, q, r) - elif _ == 2: - v = (p + sc, q, r) - elif _ == 3: - v = (p, q - sc, r) - elif _ == 4: - v = (p, q + sc, r) - nodes_to_validate.append((n, v, _)) - return nodes_to_add, nodes_to_validate - -def _get_next_nodes(n, sc, n_ori): - nodes_to_add = [] - nodes_to_validate = [] - (p, q, r) = n - for r_, a_ in zip([-1, 0, 1], [1, 0, 2]): - nodes_to_add.append((n, (p, q, np.mod(r+r_, n_ori)), a_)) - - if n_ori == 6: - if r == 0: - v = (p + sc, q, r) - elif r == 1: - v = (p + sc, q + sc, r) - elif r == 2: - v = (p, q + sc, r) - elif r == 3: - v = (p - sc, q, r) - elif r == 4: - v = (p - sc, q - sc, r) - elif r == 5: - v = (p, q - sc, r) - elif n_ori == 4: - if r == 0: - v = (p + sc, q, r) - elif r == 1: - v = (p, q + sc, r) - elif r == 2: - v = (p - sc, q, r) - elif r == 3: - v = (p, q - sc, r) - nodes_to_validate.append((n,v,3)) - - return nodes_to_add, nodes_to_validate - -def generate_graph(valid_fn_vec=None, sc=1., n_ori=6, - starting_location=(0, 0, 0), vis=False, directed=True): - timer = utils.Timer() - timer.tic() - if directed: G = nx.DiGraph(directed=True) - else: G = nx.Graph() - G.add_node(starting_location) - new_nodes = G.nodes() - while len(new_nodes) != 0: - nodes_to_add = [] - nodes_to_validate = [] - for n in new_nodes: - if directed: - na, nv = _get_next_nodes(n, sc, n_ori) - else: - na, nv = _get_next_nodes_undirected(n, sc, n_ori) - nodes_to_add = nodes_to_add + na - if valid_fn_vec is not None: - nodes_to_validate = nodes_to_validate + nv - else: - node_to_add = nodes_to_add + nv - - # Validate nodes. - vs = [_[1] for _ in nodes_to_validate] - valids = valid_fn_vec(vs) - - for nva, valid in zip(nodes_to_validate, valids): - if valid: - nodes_to_add.append(nva) - - new_nodes = [] - for n,v,a in nodes_to_add: - if not G.has_node(v): - new_nodes.append(v) - G.add_edge(n, v, action=a) - - timer.toc(average=True, log_at=1, log_str='src.graph_utils.generate_graph') - return (G) - -def vis_G(G, ax, vertex_color='r', edge_color='b', r=None): - if edge_color is not None: - for e in G.edges(): - XYT = zip(*e) - x = XYT[-3] - y = XYT[-2] - t = XYT[-1] - if r is None or t[0] == r: - ax.plot(x, y, edge_color) - if vertex_color is not None: - XYT = zip(*G.nodes()) - x = XYT[-3] - y = XYT[-2] - t = XYT[-1] - ax.plot(x, y, vertex_color + '.') - -def convert_to_graph_tool(G): - timer = utils.Timer() - timer.tic() - gtG = gt.Graph(directed=G.is_directed()) - gtG.ep['action'] = gtG.new_edge_property('int') - - nodes_list = G.nodes() - nodes_array = np.array(nodes_list) - - nodes_id = np.zeros((nodes_array.shape[0],), dtype=np.int64) - - for i in range(nodes_array.shape[0]): - v = gtG.add_vertex() - nodes_id[i] = int(v) - - # d = {key: value for (key, value) in zip(nodes_list, nodes_id)} - d = dict(itertools.izip(nodes_list, nodes_id)) - - for src, dst, data in G.edges_iter(data=True): - e = gtG.add_edge(d[src], d[dst]) - gtG.ep['action'][e] = data['action'] - nodes_to_id = d - timer.toc(average=True, log_at=1, log_str='src.graph_utils.convert_to_graph_tool') - return gtG, nodes_array, nodes_to_id - - -def _rejection_sampling(rng, sampling_d, target_d, bins, hardness, M): - bin_ind = np.digitize(hardness, bins)-1 - i = 0 - ratio = target_d[bin_ind] / (M*sampling_d[bin_ind]) - while i < ratio.size and rng.rand() > ratio[i]: - i = i+1 - return i - -def heuristic_fn_vec(n1, n2, n_ori, step_size): - # n1 is a vector and n2 is a single point. - dx = (n1[:,0] - n2[0,0])/step_size - dy = (n1[:,1] - n2[0,1])/step_size - dt = n1[:,2] - n2[0,2] - dt = np.mod(dt, n_ori) - dt = np.minimum(dt, n_ori-dt) - - if n_ori == 6: - if dx*dy > 0: - d = np.maximum(np.abs(dx), np.abs(dy)) - else: - d = np.abs(dy-dx) - elif n_ori == 4: - d = np.abs(dx) + np.abs(dy) - - return (d + dt).reshape((-1,1)) - -def get_hardness_distribution(gtG, max_dist, min_dist, rng, trials, bins, nodes, - n_ori, step_size): - heuristic_fn = lambda node_ids, node_id: \ - heuristic_fn_vec(nodes[node_ids, :], nodes[[node_id], :], n_ori, step_size) - num_nodes = gtG.num_vertices() - gt_dists = []; h_dists = []; - for i in range(trials): - end_node_id = rng.choice(num_nodes) - gt_dist = gt.topology.shortest_distance(gt.GraphView(gtG, reversed=True), - source=gtG.vertex(end_node_id), - target=None, max_dist=max_dist) - gt_dist = np.array(gt_dist.get_array()) - ind = np.where(np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0] - gt_dist = gt_dist[ind] - h_dist = heuristic_fn(ind, end_node_id)[:,0] - gt_dists.append(gt_dist) - h_dists.append(h_dist) - gt_dists = np.concatenate(gt_dists) - h_dists = np.concatenate(h_dists) - hardness = 1. - h_dists*1./gt_dists - hist, _ = np.histogram(hardness, bins) - hist = hist.astype(np.float64) - hist = hist / np.sum(hist) - return hist - -def rng_next_goal_rejection_sampling(start_node_ids, batch_size, gtG, rng, - max_dist, min_dist, max_dist_to_compute, - sampling_d, target_d, - nodes, n_ori, step_size, bins, M): - sample_start_nodes = start_node_ids is None - dists = []; pred_maps = []; end_node_ids = []; start_node_ids_ = []; - hardnesss = []; gt_dists = []; - num_nodes = gtG.num_vertices() - for i in range(batch_size): - done = False - while not done: - if sample_start_nodes: - start_node_id = rng.choice(num_nodes) - else: - start_node_id = start_node_ids[i] - - gt_dist = gt.topology.shortest_distance( - gt.GraphView(gtG, reversed=False), source=start_node_id, target=None, - max_dist=max_dist) - gt_dist = np.array(gt_dist.get_array()) - ind = np.where(np.logical_and(gt_dist <= max_dist, gt_dist >= min_dist))[0] - ind = rng.permutation(ind) - gt_dist = gt_dist[ind]*1. - h_dist = heuristic_fn_vec(nodes[ind, :], nodes[[start_node_id], :], - n_ori, step_size)[:,0] - hardness = 1. - h_dist / gt_dist - sampled_ind = _rejection_sampling(rng, sampling_d, target_d, bins, - hardness, M) - if sampled_ind < ind.size: - # print sampled_ind - end_node_id = ind[sampled_ind] - hardness = hardness[sampled_ind] - gt_dist = gt_dist[sampled_ind] - done = True - - # Compute distance from end node to all nodes, to return. - dist, pred_map = gt.topology.shortest_distance( - gt.GraphView(gtG, reversed=True), source=end_node_id, target=None, - max_dist=max_dist_to_compute, pred_map=True) - dist = np.array(dist.get_array()) - pred_map = np.array(pred_map.get_array()) - - hardnesss.append(hardness); dists.append(dist); pred_maps.append(pred_map); - start_node_ids_.append(start_node_id); end_node_ids.append(end_node_id); - gt_dists.append(gt_dist); - paths = None - return start_node_ids_, end_node_ids, dists, pred_maps, paths, hardnesss, gt_dists - - -def rng_next_goal(start_node_ids, batch_size, gtG, rng, max_dist, - max_dist_to_compute, node_room_ids, nodes=None, - compute_path=False, dists_from_start_node=None): - # Compute the distance field from the starting location, and then pick a - # destination in another room if possible otherwise anywhere outside this - # room. - dists = []; pred_maps = []; paths = []; end_node_ids = []; - for i in range(batch_size): - room_id = node_room_ids[start_node_ids[i]] - # Compute distances. - if dists_from_start_node == None: - dist, pred_map = gt.topology.shortest_distance( - gt.GraphView(gtG, reversed=False), source=gtG.vertex(start_node_ids[i]), - target=None, max_dist=max_dist_to_compute, pred_map=True) - dist = np.array(dist.get_array()) - else: - dist = dists_from_start_node[i] - - # Randomly sample nodes which are within max_dist. - near_ids = dist <= max_dist - near_ids = near_ids[:, np.newaxis] - # Check to see if there is a non-negative node which is close enough. - non_same_room_ids = node_room_ids != room_id - non_hallway_ids = node_room_ids != -1 - good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids)) - good2_ids = np.logical_and(near_ids, non_hallway_ids) - good3_ids = near_ids - if np.any(good1_ids): - end_node_id = rng.choice(np.where(good1_ids)[0]) - elif np.any(good2_ids): - end_node_id = rng.choice(np.where(good2_ids)[0]) - elif np.any(good3_ids): - end_node_id = rng.choice(np.where(good3_ids)[0]) - else: - logging.error('Did not find any good nodes.') - - # Compute distance to this new goal for doing distance queries. - dist, pred_map = gt.topology.shortest_distance( - gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id), - target=None, max_dist=max_dist_to_compute, pred_map=True) - dist = np.array(dist.get_array()) - pred_map = np.array(pred_map.get_array()) - - dists.append(dist) - pred_maps.append(pred_map) - end_node_ids.append(end_node_id) - - path = None - if compute_path: - path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) - paths.append(path) - - return start_node_ids, end_node_ids, dists, pred_maps, paths - - -def rng_room_to_room(batch_size, gtG, rng, max_dist, max_dist_to_compute, - node_room_ids, nodes=None, compute_path=False): - # Sample one of the rooms, compute the distance field. Pick a destination in - # another room if possible otherwise anywhere outside this room. - dists = []; pred_maps = []; paths = []; start_node_ids = []; end_node_ids = []; - room_ids = np.unique(node_room_ids[node_room_ids[:,0] >= 0, 0]) - for i in range(batch_size): - room_id = rng.choice(room_ids) - end_node_id = rng.choice(np.where(node_room_ids[:,0] == room_id)[0]) - end_node_ids.append(end_node_id) - - # Compute distances. - dist, pred_map = gt.topology.shortest_distance( - gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_id), - target=None, max_dist=max_dist_to_compute, pred_map=True) - dist = np.array(dist.get_array()) - pred_map = np.array(pred_map.get_array()) - dists.append(dist) - pred_maps.append(pred_map) - - # Randomly sample nodes which are within max_dist. - near_ids = dist <= max_dist - near_ids = near_ids[:, np.newaxis] - - # Check to see if there is a non-negative node which is close enough. - non_same_room_ids = node_room_ids != room_id - non_hallway_ids = node_room_ids != -1 - good1_ids = np.logical_and(near_ids, np.logical_and(non_same_room_ids, non_hallway_ids)) - good2_ids = np.logical_and(near_ids, non_hallway_ids) - good3_ids = near_ids - if np.any(good1_ids): - start_node_id = rng.choice(np.where(good1_ids)[0]) - elif np.any(good2_ids): - start_node_id = rng.choice(np.where(good2_ids)[0]) - elif np.any(good3_ids): - start_node_id = rng.choice(np.where(good3_ids)[0]) - else: - logging.error('Did not find any good nodes.') - - start_node_ids.append(start_node_id) - - path = None - if compute_path: - path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) - paths.append(path) - - return start_node_ids, end_node_ids, dists, pred_maps, paths - - -def rng_target_dist_field(batch_size, gtG, rng, max_dist, max_dist_to_compute, - nodes=None, compute_path=False): - # Sample a single node, compute distance to all nodes less than max_dist, - # sample nodes which are a particular distance away. - dists = []; pred_maps = []; paths = []; start_node_ids = [] - end_node_ids = rng.choice(gtG.num_vertices(), size=(batch_size,), - replace=False).tolist() - - for i in range(batch_size): - dist, pred_map = gt.topology.shortest_distance( - gt.GraphView(gtG, reversed=True), source=gtG.vertex(end_node_ids[i]), - target=None, max_dist=max_dist_to_compute, pred_map=True) - dist = np.array(dist.get_array()) - pred_map = np.array(pred_map.get_array()) - dists.append(dist) - pred_maps.append(pred_map) - - # Randomly sample nodes which are withing max_dist - near_ids = np.where(dist <= max_dist)[0] - start_node_id = rng.choice(near_ids, size=(1,), replace=False)[0] - start_node_ids.append(start_node_id) - - path = None - if compute_path: - path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map) - paths.append(path) - - return start_node_ids, end_node_ids, dists, pred_maps, paths diff --git a/research/cognitive_mapping_and_planning/src/map_utils.py b/research/cognitive_mapping_and_planning/src/map_utils.py deleted file mode 100644 index 6756131a9eac161e7633ef089ed573e324f859e1..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/src/map_utils.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Various function to compute the ground truth map for training etc. -""" -import copy -import skimage.morphology -import logging -import numpy as np -import scipy.ndimage -import matplotlib.pyplot as plt -import PIL - -import src.utils as utils -import cv2 - -def _get_xy_bounding_box(vertex, padding): - """Returns the xy bounding box of the environment.""" - min_ = np.floor(np.min(vertex[:, :2], axis=0) - padding).astype(np.int) - max_ = np.ceil(np.max(vertex[:, :2], axis=0) + padding).astype(np.int) - return min_, max_ - -def _project_to_map(map, vertex, wt=None, ignore_points_outside_map=False): - """Projects points to map, returns how many points are present at each - location.""" - num_points = np.zeros((map.size[1], map.size[0])) - vertex_ = vertex[:, :2] - map.origin - vertex_ = np.round(vertex_ / map.resolution).astype(np.int) - if ignore_points_outside_map: - good_ind = np.all(np.array([vertex_[:,1] >= 0, vertex_[:,1] < map.size[1], - vertex_[:,0] >= 0, vertex_[:,0] < map.size[0]]), - axis=0) - vertex_ = vertex_[good_ind, :] - if wt is not None: - wt = wt[good_ind, :] - if wt is None: - np.add.at(num_points, (vertex_[:, 1], vertex_[:, 0]), 1) - else: - assert(wt.shape[0] == vertex.shape[0]), \ - 'number of weights should be same as vertices.' - np.add.at(num_points, (vertex_[:, 1], vertex_[:, 0]), wt) - return num_points - -def make_map(padding, resolution, vertex=None, sc=1.): - """Returns a map structure.""" - min_, max_ = _get_xy_bounding_box(vertex*sc, padding=padding) - sz = np.ceil((max_ - min_ + 1) / resolution).astype(np.int32) - max_ = min_ + sz * resolution - 1 - map = utils.Foo(origin=min_, size=sz, max=max_, resolution=resolution, - padding=padding) - return map - -def _fill_holes(img, thresh): - """Fills holes less than thresh area (assumes 4 connectivity when computing - hole area.""" - l, n = scipy.ndimage.label(np.logical_not(img)) - img_ = img == True - cnts = np.bincount(l.reshape(-1)) - for i, cnt in enumerate(cnts): - if cnt < thresh: - l[l == i] = -1 - img_[l == -1] = True - return img_ - -def compute_traversibility(map, robot_base, robot_height, robot_radius, - valid_min, valid_max, num_point_threshold, shapess, - sc=100., n_samples_per_face=200): - """Returns a bit map with pixels that are traversible or not as long as the - robot center is inside this volume we are good colisions can be detected by - doing a line search on things, or walking from current location to final - location in the bitmap, or doing bwlabel on the traversibility map.""" - - tt = utils.Timer() - tt.tic() - num_obstcale_points = np.zeros((map.size[1], map.size[0])) - num_points = np.zeros((map.size[1], map.size[0])) - - for i, shapes in enumerate(shapess): - for j in range(shapes.get_number_of_meshes()): - p, face_areas, face_idx = shapes.sample_points_on_face_of_shape( - j, n_samples_per_face, sc) - wt = face_areas[face_idx]/n_samples_per_face - - ind = np.all(np.concatenate( - (p[:, [2]] > robot_base, - p[:, [2]] < robot_base + robot_height), axis=1),axis=1) - num_obstcale_points += _project_to_map(map, p[ind, :], wt[ind]) - - ind = np.all(np.concatenate( - (p[:, [2]] > valid_min, - p[:, [2]] < valid_max), axis=1),axis=1) - num_points += _project_to_map(map, p[ind, :], wt[ind]) - - selem = skimage.morphology.disk(robot_radius / map.resolution) - obstacle_free = skimage.morphology.binary_dilation( - _fill_holes(num_obstcale_points > num_point_threshold, 20), selem) != True - valid_space = _fill_holes(num_points > num_point_threshold, 20) - traversible = np.all(np.concatenate((obstacle_free[...,np.newaxis], - valid_space[...,np.newaxis]), axis=2), - axis=2) - # plt.imshow(np.concatenate((obstacle_free, valid_space, traversible), axis=1)) - # plt.show() - - map_out = copy.deepcopy(map) - map_out.num_obstcale_points = num_obstcale_points - map_out.num_points = num_points - map_out.traversible = traversible - map_out.obstacle_free = obstacle_free - map_out.valid_space = valid_space - tt.toc(log_at=1, log_str='src.map_utils.compute_traversibility: ') - return map_out - - -def resize_maps(map, map_scales, resize_method): - scaled_maps = [] - for i, sc in enumerate(map_scales): - if resize_method == 'antialiasing': - # Resize using open cv so that we can compute the size. - # Use PIL resize to use anti aliasing feature. - map_ = cv2.resize(map*1, None, None, fx=sc, fy=sc, interpolation=cv2.INTER_LINEAR) - w = map_.shape[1]; h = map_.shape[0] - - map_img = PIL.Image.fromarray((map*255).astype(np.uint8)) - map__img = map_img.resize((w,h), PIL.Image.ANTIALIAS) - map_ = np.asarray(map__img).astype(np.float32) - map_ = map_/255. - map_ = np.minimum(map_, 1.0) - map_ = np.maximum(map_, 0.0) - elif resize_method == 'linear_noantialiasing': - map_ = cv2.resize(map*1, None, None, fx=sc, fy=sc, interpolation=cv2.INTER_LINEAR) - else: - logging.error('Unknown resizing method') - scaled_maps.append(map_) - return scaled_maps - - -def pick_largest_cc(traversible): - out = scipy.ndimage.label(traversible)[0] - cnt = np.bincount(out.reshape(-1))[1:] - return out == np.argmax(cnt) + 1 - -def get_graph_origin_loc(rng, traversible): - """Erode the traversibility mask so that we get points in the bulk of the - graph, and not end up with a situation where the graph is localized in the - corner of a cramped room. Output Locs is in the coordinate frame of the - map.""" - - aa = pick_largest_cc(skimage.morphology.binary_erosion(traversible == True, - selem=np.ones((15,15)))) - y, x = np.where(aa > 0) - ind = rng.choice(y.size) - locs = np.array([x[ind], y[ind]]) - locs = locs + rng.rand(*(locs.shape)) - 0.5 - return locs - - -def generate_egocentric_maps(scaled_maps, map_scales, map_crop_sizes, loc, - x_axis, y_axis, theta): - maps = [] - for i, (map_, sc, map_crop_size) in enumerate(zip(scaled_maps, map_scales, map_crop_sizes)): - maps_i = np.array(get_map_to_predict(loc*sc, x_axis, y_axis, map_, - map_crop_size, - interpolation=cv2.INTER_LINEAR)[0]) - maps_i[np.isnan(maps_i)] = 0 - maps.append(maps_i) - return maps - -def generate_goal_images(map_scales, map_crop_sizes, n_ori, goal_dist, - goal_theta, rel_goal_orientation): - goal_dist = goal_dist[:,0] - goal_theta = goal_theta[:,0] - rel_goal_orientation = rel_goal_orientation[:,0] - - goals = []; - # Generate the map images. - for i, (sc, map_crop_size) in enumerate(zip(map_scales, map_crop_sizes)): - goal_i = np.zeros((goal_dist.shape[0], map_crop_size, map_crop_size, n_ori), - dtype=np.float32) - x = goal_dist*np.cos(goal_theta)*sc + (map_crop_size-1.)/2. - y = goal_dist*np.sin(goal_theta)*sc + (map_crop_size-1.)/2. - - for j in range(goal_dist.shape[0]): - gc = rel_goal_orientation[j] - x0 = np.floor(x[j]).astype(np.int32); x1 = x0 + 1; - y0 = np.floor(y[j]).astype(np.int32); y1 = y0 + 1; - if x0 >= 0 and x0 <= map_crop_size-1: - if y0 >= 0 and y0 <= map_crop_size-1: - goal_i[j, y0, x0, gc] = (x1-x[j])*(y1-y[j]) - if y1 >= 0 and y1 <= map_crop_size-1: - goal_i[j, y1, x0, gc] = (x1-x[j])*(y[j]-y0) - - if x1 >= 0 and x1 <= map_crop_size-1: - if y0 >= 0 and y0 <= map_crop_size-1: - goal_i[j, y0, x1, gc] = (x[j]-x0)*(y1-y[j]) - if y1 >= 0 and y1 <= map_crop_size-1: - goal_i[j, y1, x1, gc] = (x[j]-x0)*(y[j]-y0) - - goals.append(goal_i) - return goals - -def get_map_to_predict(src_locs, src_x_axiss, src_y_axiss, map, map_size, - interpolation=cv2.INTER_LINEAR): - fss = [] - valids = [] - - center = (map_size-1.0)/2.0 - dst_theta = np.pi/2.0 - dst_loc = np.array([center, center]) - dst_x_axis = np.array([np.cos(dst_theta), np.sin(dst_theta)]) - dst_y_axis = np.array([np.cos(dst_theta+np.pi/2), np.sin(dst_theta+np.pi/2)]) - - def compute_points(center, x_axis, y_axis): - points = np.zeros((3,2),dtype=np.float32) - points[0,:] = center - points[1,:] = center + x_axis - points[2,:] = center + y_axis - return points - - dst_points = compute_points(dst_loc, dst_x_axis, dst_y_axis) - for i in range(src_locs.shape[0]): - src_loc = src_locs[i,:] - src_x_axis = src_x_axiss[i,:] - src_y_axis = src_y_axiss[i,:] - src_points = compute_points(src_loc, src_x_axis, src_y_axis) - M = cv2.getAffineTransform(src_points, dst_points) - - fs = cv2.warpAffine(map, M, (map_size, map_size), None, flags=interpolation, - borderValue=np.NaN) - valid = np.invert(np.isnan(fs)) - valids.append(valid) - fss.append(fs) - return fss, valids - diff --git a/research/cognitive_mapping_and_planning/src/rotation_utils.py b/research/cognitive_mapping_and_planning/src/rotation_utils.py deleted file mode 100644 index 8d6d4f3cbdb1f808d210dce8b22fa3ba831d45a9..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/src/rotation_utils.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for generating and applying rotation matrices. -""" -import numpy as np - -ANGLE_EPS = 0.001 - - -def normalize(v): - return v / np.linalg.norm(v) - - -def get_r_matrix(ax_, angle): - ax = normalize(ax_) - if np.abs(angle) > ANGLE_EPS: - S_hat = np.array( - [[0.0, -ax[2], ax[1]], [ax[2], 0.0, -ax[0]], [-ax[1], ax[0], 0.0]], - dtype=np.float32) - R = np.eye(3) + np.sin(angle)*S_hat + \ - (1-np.cos(angle))*(np.linalg.matrix_power(S_hat, 2)) - else: - R = np.eye(3) - return R - - -def r_between(v_from_, v_to_): - v_from = normalize(v_from_) - v_to = normalize(v_to_) - ax = normalize(np.cross(v_from, v_to)) - angle = np.arccos(np.dot(v_from, v_to)) - return get_r_matrix(ax, angle) - - -def rotate_camera_to_point_at(up_from, lookat_from, up_to, lookat_to): - inputs = [up_from, lookat_from, up_to, lookat_to] - for i in range(4): - inputs[i] = normalize(np.array(inputs[i]).reshape((-1,))) - up_from, lookat_from, up_to, lookat_to = inputs - r1 = r_between(lookat_from, lookat_to) - - new_x = np.dot(r1, np.array([1, 0, 0]).reshape((-1, 1))).reshape((-1)) - to_x = normalize(np.cross(lookat_to, up_to)) - angle = np.arccos(np.dot(new_x, to_x)) - if angle > ANGLE_EPS: - if angle < np.pi - ANGLE_EPS: - ax = normalize(np.cross(new_x, to_x)) - flip = np.dot(lookat_to, ax) - if flip > 0: - r2 = get_r_matrix(lookat_to, angle) - elif flip < 0: - r2 = get_r_matrix(lookat_to, -1. * angle) - else: - # Angle of rotation is too close to 180 degrees, direction of rotation - # does not matter. - r2 = get_r_matrix(lookat_to, angle) - else: - r2 = np.eye(3) - return np.dot(r2, r1) - diff --git a/research/cognitive_mapping_and_planning/src/utils.py b/research/cognitive_mapping_and_planning/src/utils.py deleted file mode 100644 index a1b9e44260b7c7884855761f56ac60d6f508c2fb..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/src/utils.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Generaly Utilities. -""" - -import numpy as np, cPickle, os, time -from six.moves import xrange -import src.file_utils as fu -import logging - -class Timer(): - def __init__(self): - self.calls = 0. - self.start_time = 0. - self.time_per_call = 0. - self.total_time = 0. - self.last_log_time = 0. - - def tic(self): - self.start_time = time.time() - - def toc(self, average=True, log_at=-1, log_str='', type='calls'): - if self.start_time == 0: - logging.error('Timer not started by calling tic().') - t = time.time() - diff = time.time() - self.start_time - self.total_time += diff - self.calls += 1. - self.time_per_call = self.total_time/self.calls - - if type == 'calls' and log_at > 0 and np.mod(self.calls, log_at) == 0: - _ = [] - logging.info('%s: %f seconds.', log_str, self.time_per_call) - elif type == 'time' and log_at > 0 and t - self.last_log_time >= log_at: - _ = [] - logging.info('%s: %f seconds.', log_str, self.time_per_call) - self.last_log_time = t - - if average: - return self.time_per_call - else: - return diff - -class Foo(object): - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - def __str__(self): - str_ = '' - for v in vars(self).keys(): - a = getattr(self, v) - if True: #isinstance(v, object): - str__ = str(a) - str__ = str__.replace('\n', '\n ') - else: - str__ = str(a) - str_ += '{:s}: {:s}'.format(v, str__) - str_ += '\n' - return str_ - - -def dict_equal(dict1, dict2): - assert(set(dict1.keys()) == set(dict2.keys())), "Sets of keys between 2 dictionaries are different." - for k in dict1.keys(): - assert(type(dict1[k]) == type(dict2[k])), "Type of key '{:s}' if different.".format(k) - if type(dict1[k]) == np.ndarray: - assert(dict1[k].dtype == dict2[k].dtype), "Numpy Type of key '{:s}' if different.".format(k) - assert(np.allclose(dict1[k], dict2[k])), "Value for key '{:s}' do not match.".format(k) - else: - assert(dict1[k] == dict2[k]), "Value for key '{:s}' do not match.".format(k) - return True - -def subplot(plt, Y_X, sz_y_sz_x = (10, 10)): - Y,X = Y_X - sz_y, sz_x = sz_y_sz_x - plt.rcParams['figure.figsize'] = (X*sz_x, Y*sz_y) - fig, axes = plt.subplots(Y, X) - plt.subplots_adjust(wspace=0.1, hspace=0.1) - return fig, axes - -def tic_toc_print(interval, string): - global tic_toc_print_time_old - if 'tic_toc_print_time_old' not in globals(): - tic_toc_print_time_old = time.time() - print(string) - else: - new_time = time.time() - if new_time - tic_toc_print_time_old > interval: - tic_toc_print_time_old = new_time; - print(string) - -def mkdir_if_missing(output_dir): - if not fu.exists(output_dir): - fu.makedirs(output_dir) - -def save_variables(pickle_file_name, var, info, overwrite = False): - if fu.exists(pickle_file_name) and overwrite == False: - raise Exception('{:s} exists and over write is false.'.format(pickle_file_name)) - # Construct the dictionary - assert(type(var) == list); assert(type(info) == list); - d = {} - for i in xrange(len(var)): - d[info[i]] = var[i] - with fu.fopen(pickle_file_name, 'w') as f: - cPickle.dump(d, f, cPickle.HIGHEST_PROTOCOL) - -def load_variables(pickle_file_name): - if fu.exists(pickle_file_name): - with fu.fopen(pickle_file_name, 'r') as f: - d = cPickle.load(f) - return d - else: - raise Exception('{:s} does not exists.'.format(pickle_file_name)) - -def voc_ap(rec, prec): - rec = rec.reshape((-1,1)) - prec = prec.reshape((-1,1)) - z = np.zeros((1,1)) - o = np.ones((1,1)) - mrec = np.vstack((z, rec, o)) - mpre = np.vstack((z, prec, z)) - for i in range(len(mpre)-2, -1, -1): - mpre[i] = max(mpre[i], mpre[i+1]) - - I = np.where(mrec[1:] != mrec[0:-1])[0]+1; - ap = 0; - for i in I: - ap = ap + (mrec[i] - mrec[i-1])*mpre[i]; - return ap - -def tight_imshow_figure(plt, figsize=None): - fig = plt.figure(figsize=figsize) - ax = plt.Axes(fig, [0,0,1,1]) - ax.set_axis_off() - fig.add_axes(ax) - return fig, ax - -def calc_pr(gt, out, wt=None): - if wt is None: - wt = np.ones((gt.size,1)) - - gt = gt.astype(np.float64).reshape((-1,1)) - wt = wt.astype(np.float64).reshape((-1,1)) - out = out.astype(np.float64).reshape((-1,1)) - - gt = gt*wt - tog = np.concatenate([gt, wt, out], axis=1)*1. - ind = np.argsort(tog[:,2], axis=0)[::-1] - tog = tog[ind,:] - cumsumsortgt = np.cumsum(tog[:,0]) - cumsumsortwt = np.cumsum(tog[:,1]) - prec = cumsumsortgt / cumsumsortwt - rec = cumsumsortgt / np.sum(tog[:,0]) - - ap = voc_ap(rec, prec) - return ap, rec, prec diff --git a/research/cognitive_mapping_and_planning/tfcode/__init__.py b/research/cognitive_mapping_and_planning/tfcode/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/cognitive_mapping_and_planning/tfcode/cmp.py b/research/cognitive_mapping_and_planning/tfcode/cmp.py deleted file mode 100644 index 228ef90fddcd9ff41b26795544d93a1f18466158..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/tfcode/cmp.py +++ /dev/null @@ -1,553 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Code for setting up the network for CMP. - -Sets up the mapper and the planner. -""" - -import sys, os, numpy as np -import matplotlib.pyplot as plt -import copy -import argparse, pprint -import time - - -import tensorflow as tf - -from tensorflow.contrib import slim -from tensorflow.contrib.slim import arg_scope - -import logging -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -from src import utils -import src.file_utils as fu -import tfcode.nav_utils as nu -import tfcode.cmp_utils as cu -import tfcode.cmp_summary as cmp_s -from tfcode import tf_utils - -value_iteration_network = cu.value_iteration_network -rotate_preds = cu.rotate_preds -deconv = cu.deconv -get_visual_frustum = cu.get_visual_frustum -fr_v2 = cu.fr_v2 - -setup_train_step_kwargs = nu.default_train_step_kwargs -compute_losses_multi_or = nu.compute_losses_multi_or - -get_repr_from_image = nu.get_repr_from_image - -_save_d_at_t = nu.save_d_at_t -_save_all = nu.save_all -_eval_ap = nu.eval_ap -_eval_dist = nu.eval_dist -_plot_trajectories = nu.plot_trajectories - -_vis_readout_maps = cmp_s._vis_readout_maps -_vis = cmp_s._vis -_summary_vis = cmp_s._summary_vis -_summary_readout_maps = cmp_s._summary_readout_maps -_add_summaries = cmp_s._add_summaries - -def _inputs(problem): - # Set up inputs. - with tf.name_scope('inputs'): - inputs = [] - inputs.append(('orig_maps', tf.float32, - (problem.batch_size, 1, None, None, 1))) - inputs.append(('goal_loc', tf.float32, - (problem.batch_size, problem.num_goals, 2))) - common_input_data, _ = tf_utils.setup_inputs(inputs) - - inputs = [] - if problem.input_type == 'vision': - # Multiple images from an array of cameras. - inputs.append(('imgs', tf.float32, - (problem.batch_size, None, len(problem.aux_delta_thetas)+1, - problem.img_height, problem.img_width, - problem.img_channels))) - elif problem.input_type == 'analytical_counts': - for i in range(len(problem.map_crop_sizes)): - inputs.append(('analytical_counts_{:d}'.format(i), tf.float32, - (problem.batch_size, None, problem.map_crop_sizes[i], - problem.map_crop_sizes[i], problem.map_channels))) - - if problem.outputs.readout_maps: - for i in range(len(problem.readout_maps_crop_sizes)): - inputs.append(('readout_maps_{:d}'.format(i), tf.float32, - (problem.batch_size, None, - problem.readout_maps_crop_sizes[i], - problem.readout_maps_crop_sizes[i], - problem.readout_maps_channels))) - - for i in range(len(problem.map_crop_sizes)): - inputs.append(('ego_goal_imgs_{:d}'.format(i), tf.float32, - (problem.batch_size, None, problem.map_crop_sizes[i], - problem.map_crop_sizes[i], problem.goal_channels))) - for s in ['sum_num', 'sum_denom', 'max_denom']: - inputs.append(('running_'+s+'_{:d}'.format(i), tf.float32, - (problem.batch_size, 1, problem.map_crop_sizes[i], - problem.map_crop_sizes[i], problem.map_channels))) - - inputs.append(('incremental_locs', tf.float32, - (problem.batch_size, None, 2))) - inputs.append(('incremental_thetas', tf.float32, - (problem.batch_size, None, 1))) - inputs.append(('step_number', tf.int32, (1, None, 1))) - inputs.append(('node_ids', tf.int32, (problem.batch_size, None, - problem.node_ids_dim))) - inputs.append(('perturbs', tf.float32, (problem.batch_size, None, - problem.perturbs_dim))) - - # For plotting result plots - inputs.append(('loc_on_map', tf.float32, (problem.batch_size, None, 2))) - inputs.append(('gt_dist_to_goal', tf.float32, (problem.batch_size, None, 1))) - - step_input_data, _ = tf_utils.setup_inputs(inputs) - - inputs = [] - inputs.append(('action', tf.int32, (problem.batch_size, None, problem.num_actions))) - train_data, _ = tf_utils.setup_inputs(inputs) - train_data.update(step_input_data) - train_data.update(common_input_data) - return common_input_data, step_input_data, train_data - -def readout_general(multi_scale_belief, num_neurons, strides, layers_per_block, - kernel_size, batch_norm_is_training_op, wt_decay): - multi_scale_belief = tf.stop_gradient(multi_scale_belief) - with tf.variable_scope('readout_maps_deconv'): - x, outs = deconv(multi_scale_belief, batch_norm_is_training_op, - wt_decay=wt_decay, neurons=num_neurons, strides=strides, - layers_per_block=layers_per_block, kernel_size=kernel_size, - conv_fn=slim.conv2d_transpose, offset=0, - name='readout_maps_deconv') - probs = tf.sigmoid(x) - return x, probs - - -def running_combine(fss_logits, confs_probs, incremental_locs, - incremental_thetas, previous_sum_num, previous_sum_denom, - previous_max_denom, map_size, num_steps): - # fss_logits is B x N x H x W x C - # confs_logits is B x N x H x W x C - # incremental_locs is B x N x 2 - # incremental_thetas is B x N x 1 - # previous_sum_num etc is B x 1 x H x W x C - - with tf.name_scope('combine_{:d}'.format(num_steps)): - running_sum_nums_ = []; running_sum_denoms_ = []; - running_max_denoms_ = []; - - fss_logits_ = tf.unstack(fss_logits, axis=1, num=num_steps) - confs_probs_ = tf.unstack(confs_probs, axis=1, num=num_steps) - incremental_locs_ = tf.unstack(incremental_locs, axis=1, num=num_steps) - incremental_thetas_ = tf.unstack(incremental_thetas, axis=1, num=num_steps) - running_sum_num = tf.unstack(previous_sum_num, axis=1, num=1)[0] - running_sum_denom = tf.unstack(previous_sum_denom, axis=1, num=1)[0] - running_max_denom = tf.unstack(previous_max_denom, axis=1, num=1)[0] - - for i in range(num_steps): - # Rotate the previous running_num and running_denom - running_sum_num, running_sum_denom, running_max_denom = rotate_preds( - incremental_locs_[i], incremental_thetas_[i], map_size, - [running_sum_num, running_sum_denom, running_max_denom], - output_valid_mask=False)[0] - # print i, num_steps, running_sum_num.get_shape().as_list() - running_sum_num = running_sum_num + fss_logits_[i] * confs_probs_[i] - running_sum_denom = running_sum_denom + confs_probs_[i] - running_max_denom = tf.maximum(running_max_denom, confs_probs_[i]) - running_sum_nums_.append(running_sum_num) - running_sum_denoms_.append(running_sum_denom) - running_max_denoms_.append(running_max_denom) - - running_sum_nums = tf.stack(running_sum_nums_, axis=1) - running_sum_denoms = tf.stack(running_sum_denoms_, axis=1) - running_max_denoms = tf.stack(running_max_denoms_, axis=1) - return running_sum_nums, running_sum_denoms, running_max_denoms - -def get_map_from_images(imgs, mapper_arch, task_params, freeze_conv, wt_decay, - is_training, batch_norm_is_training_op, num_maps, - split_maps=True): - # Hit image with a resnet. - n_views = len(task_params.aux_delta_thetas) + 1 - out = utils.Foo() - - images_reshaped = tf.reshape(imgs, - shape=[-1, task_params.img_height, - task_params.img_width, - task_params.img_channels], name='re_image') - - x, out.vars_to_restore = get_repr_from_image( - images_reshaped, task_params.modalities, task_params.data_augment, - mapper_arch.encoder, freeze_conv, wt_decay, is_training) - - # Reshape into nice things so that these can be accumulated over time steps - # for faster backprop. - sh_before = x.get_shape().as_list() - out.encoder_output = tf.reshape(x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:]) - x = tf.reshape(out.encoder_output, shape=[-1] + sh_before[1:]) - - # Add a layer to reduce dimensions for a fc layer. - if mapper_arch.dim_reduce_neurons > 0: - ks = 1; neurons = mapper_arch.dim_reduce_neurons; - init_var = np.sqrt(2.0/(ks**2)/neurons) - batch_norm_param = mapper_arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - out.conv_feat = slim.conv2d(x, neurons, kernel_size=ks, stride=1, - normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param, - padding='SAME', scope='dim_reduce', - weights_regularizer=slim.l2_regularizer(wt_decay), - weights_initializer=tf.random_normal_initializer(stddev=init_var)) - reshape_conv_feat = slim.flatten(out.conv_feat) - sh = reshape_conv_feat.get_shape().as_list() - out.reshape_conv_feat = tf.reshape(reshape_conv_feat, shape=[-1, sh[1]*n_views]) - - with tf.variable_scope('fc'): - # Fully connected layers to compute the representation in top-view space. - fc_batch_norm_param = {'center': True, 'scale': True, - 'activation_fn':tf.nn.relu, - 'is_training': batch_norm_is_training_op} - f = out.reshape_conv_feat - out_neurons = (mapper_arch.fc_out_size**2)*mapper_arch.fc_out_neurons - neurons = mapper_arch.fc_neurons + [out_neurons] - f, _ = tf_utils.fc_network(f, neurons=neurons, wt_decay=wt_decay, - name='fc', offset=0, - batch_norm_param=fc_batch_norm_param, - is_training=is_training, - dropout_ratio=mapper_arch.fc_dropout) - f = tf.reshape(f, shape=[-1, mapper_arch.fc_out_size, - mapper_arch.fc_out_size, - mapper_arch.fc_out_neurons], name='re_fc') - - # Use pool5 to predict the free space map via deconv layers. - with tf.variable_scope('deconv'): - x, outs = deconv(f, batch_norm_is_training_op, wt_decay=wt_decay, - neurons=mapper_arch.deconv_neurons, - strides=mapper_arch.deconv_strides, - layers_per_block=mapper_arch.deconv_layers_per_block, - kernel_size=mapper_arch.deconv_kernel_size, - conv_fn=slim.conv2d_transpose, offset=0, name='deconv') - - # Reshape x the right way. - sh = x.get_shape().as_list() - x = tf.reshape(x, shape=[task_params.batch_size, -1] + sh[1:]) - out.deconv_output = x - - # Separate out the map and the confidence predictions, pass the confidence - # through a sigmoid. - if split_maps: - with tf.name_scope('split'): - out_all = tf.split(value=x, axis=4, num_or_size_splits=2*num_maps) - out.fss_logits = out_all[:num_maps] - out.confs_logits = out_all[num_maps:] - with tf.name_scope('sigmoid'): - out.confs_probs = [tf.nn.sigmoid(x) for x in out.confs_logits] - return out - -def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode): - assert(args.arch.multi_scale), 'removed support for old single scale code.' - # Set up the model. - tf.set_random_seed(args.solver.seed) - task_params = args.navtask.task_params - - batch_norm_is_training_op = \ - tf.placeholder_with_default(batch_norm_is_training, shape=[], - name='batch_norm_is_training_op') - - # Setup the inputs - m.input_tensors = {} - m.train_ops = {} - m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \ - _inputs(task_params) - - m.init_fn = None - - if task_params.input_type == 'vision': - m.vision_ops = get_map_from_images( - m.input_tensors['step']['imgs'], args.mapper_arch, - task_params, args.solver.freeze_conv, - args.solver.wt_decay, is_training, batch_norm_is_training_op, - num_maps=len(task_params.map_crop_sizes)) - - # Load variables from snapshot if needed. - if args.solver.pretrained_path is not None: - m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path, - m.vision_ops.vars_to_restore) - - # Set up caching of vision features if needed. - if args.solver.freeze_conv: - m.train_ops['step_data_cache'] = [m.vision_ops.encoder_output] - else: - m.train_ops['step_data_cache'] = [] - - # Set up blobs that are needed for the computation in rest of the graph. - m.ego_map_ops = m.vision_ops.fss_logits - m.coverage_ops = m.vision_ops.confs_probs - - # Zero pad these to make them same size as what the planner expects. - for i in range(len(m.ego_map_ops)): - if args.mapper_arch.pad_map_with_zeros_each[i] > 0: - paddings = np.zeros((5,2), dtype=np.int32) - paddings[2:4,:] = args.mapper_arch.pad_map_with_zeros_each[i] - paddings_op = tf.constant(paddings, dtype=tf.int32) - m.ego_map_ops[i] = tf.pad(m.ego_map_ops[i], paddings=paddings_op) - m.coverage_ops[i] = tf.pad(m.coverage_ops[i], paddings=paddings_op) - - elif task_params.input_type == 'analytical_counts': - m.ego_map_ops = []; m.coverage_ops = [] - for i in range(len(task_params.map_crop_sizes)): - ego_map_op = m.input_tensors['step']['analytical_counts_{:d}'.format(i)] - coverage_op = tf.cast(tf.greater_equal( - tf.reduce_max(ego_map_op, reduction_indices=[4], - keep_dims=True), 1), tf.float32) - coverage_op = tf.ones_like(ego_map_op) * coverage_op - m.ego_map_ops.append(ego_map_op) - m.coverage_ops.append(coverage_op) - m.train_ops['step_data_cache'] = [] - - num_steps = task_params.num_steps - num_goals = task_params.num_goals - - map_crop_size_ops = [] - for map_crop_size in task_params.map_crop_sizes: - map_crop_size_ops.append(tf.constant(map_crop_size, dtype=tf.int32, shape=(2,))) - - with tf.name_scope('check_size'): - is_single_step = tf.equal(tf.unstack(tf.shape(m.ego_map_ops[0]), num=5)[1], 1) - - fr_ops = []; value_ops = []; - fr_intermediate_ops = []; value_intermediate_ops = []; - crop_value_ops = []; - resize_crop_value_ops = []; - confs = []; occupancys = []; - - previous_value_op = None - updated_state = []; state_names = []; - - for i in range(len(task_params.map_crop_sizes)): - map_crop_size = task_params.map_crop_sizes[i] - with tf.variable_scope('scale_{:d}'.format(i)): - # Accumulate the map. - fn = lambda ns: running_combine( - m.ego_map_ops[i], - m.coverage_ops[i], - m.input_tensors['step']['incremental_locs'] * task_params.map_scales[i], - m.input_tensors['step']['incremental_thetas'], - m.input_tensors['step']['running_sum_num_{:d}'.format(i)], - m.input_tensors['step']['running_sum_denom_{:d}'.format(i)], - m.input_tensors['step']['running_max_denom_{:d}'.format(i)], - map_crop_size, ns) - - running_sum_num, running_sum_denom, running_max_denom = \ - tf.cond(is_single_step, lambda: fn(1), lambda: fn(num_steps*num_goals)) - updated_state += [running_sum_num, running_sum_denom, running_max_denom] - state_names += ['running_sum_num_{:d}'.format(i), - 'running_sum_denom_{:d}'.format(i), - 'running_max_denom_{:d}'.format(i)] - - # Concat the accumulated map and goal - occupancy = running_sum_num / tf.maximum(running_sum_denom, 0.001) - conf = running_max_denom - # print occupancy.get_shape().as_list() - - # Concat occupancy, how much occupied and goal. - with tf.name_scope('concat'): - sh = [-1, map_crop_size, map_crop_size, task_params.map_channels] - occupancy = tf.reshape(occupancy, shape=sh) - conf = tf.reshape(conf, shape=sh) - - sh = [-1, map_crop_size, map_crop_size, task_params.goal_channels] - goal = tf.reshape(m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)], shape=sh) - to_concat = [occupancy, conf, goal] - - if previous_value_op is not None: - to_concat.append(previous_value_op) - - x = tf.concat(to_concat, 3) - - # Pass the map, previous rewards and the goal through a few convolutional - # layers to get fR. - fr_op, fr_intermediate_op = fr_v2( - x, output_neurons=args.arch.fr_neurons, - inside_neurons=args.arch.fr_inside_neurons, - is_training=batch_norm_is_training_op, name='fr', - wt_decay=args.solver.wt_decay, stride=args.arch.fr_stride) - - # Do Value Iteration on the fR - if args.arch.vin_num_iters > 0: - value_op, value_intermediate_op = value_iteration_network( - fr_op, num_iters=args.arch.vin_num_iters, - val_neurons=args.arch.vin_val_neurons, - action_neurons=args.arch.vin_action_neurons, - kernel_size=args.arch.vin_ks, share_wts=args.arch.vin_share_wts, - name='vin', wt_decay=args.solver.wt_decay) - else: - value_op = fr_op - value_intermediate_op = [] - - # Crop out and upsample the previous value map. - remove = args.arch.crop_remove_each - if remove > 0: - crop_value_op = value_op[:, remove:-remove, remove:-remove,:] - else: - crop_value_op = value_op - crop_value_op = tf.reshape(crop_value_op, shape=[-1, args.arch.value_crop_size, - args.arch.value_crop_size, - args.arch.vin_val_neurons]) - if i < len(task_params.map_crop_sizes)-1: - # Reshape it to shape of the next scale. - previous_value_op = tf.image.resize_bilinear(crop_value_op, - map_crop_size_ops[i+1], - align_corners=True) - resize_crop_value_ops.append(previous_value_op) - - occupancys.append(occupancy) - confs.append(conf) - value_ops.append(value_op) - crop_value_ops.append(crop_value_op) - fr_ops.append(fr_op) - fr_intermediate_ops.append(fr_intermediate_op) - - m.value_ops = value_ops - m.value_intermediate_ops = value_intermediate_ops - m.fr_ops = fr_ops - m.fr_intermediate_ops = fr_intermediate_ops - m.final_value_op = crop_value_op - m.crop_value_ops = crop_value_ops - m.resize_crop_value_ops = resize_crop_value_ops - m.confs = confs - m.occupancys = occupancys - - sh = [-1, args.arch.vin_val_neurons*((args.arch.value_crop_size)**2)] - m.value_features_op = tf.reshape(m.final_value_op, sh, name='reshape_value_op') - - # Determine what action to take. - with tf.variable_scope('action_pred'): - batch_norm_param = args.arch.pred_batch_norm_param - if batch_norm_param is not None: - batch_norm_param['is_training'] = batch_norm_is_training_op - m.action_logits_op, _ = tf_utils.fc_network( - m.value_features_op, neurons=args.arch.pred_neurons, - wt_decay=args.solver.wt_decay, name='pred', offset=0, - num_pred=task_params.num_actions, - batch_norm_param=batch_norm_param) - m.action_prob_op = tf.nn.softmax(m.action_logits_op) - - init_state = tf.constant(0., dtype=tf.float32, shape=[ - task_params.batch_size, 1, map_crop_size, map_crop_size, - task_params.map_channels]) - - m.train_ops['state_names'] = state_names - m.train_ops['updated_state'] = updated_state - m.train_ops['init_state'] = [init_state for _ in updated_state] - - m.train_ops['step'] = m.action_prob_op - m.train_ops['common'] = [m.input_tensors['common']['orig_maps'], - m.input_tensors['common']['goal_loc']] - m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op - m.loss_ops = []; m.loss_ops_names = []; - - if args.arch.readout_maps: - with tf.name_scope('readout_maps'): - all_occupancys = tf.concat(m.occupancys + m.confs, 3) - readout_maps, probs = readout_general( - all_occupancys, num_neurons=args.arch.rom_arch.num_neurons, - strides=args.arch.rom_arch.strides, - layers_per_block=args.arch.rom_arch.layers_per_block, - kernel_size=args.arch.rom_arch.kernel_size, - batch_norm_is_training_op=batch_norm_is_training_op, - wt_decay=args.solver.wt_decay) - - gt_ego_maps = [m.input_tensors['step']['readout_maps_{:d}'.format(i)] - for i in range(len(task_params.readout_maps_crop_sizes))] - m.readout_maps_gt = tf.concat(gt_ego_maps, 4) - gt_shape = tf.shape(m.readout_maps_gt) - m.readout_maps_logits = tf.reshape(readout_maps, gt_shape) - m.readout_maps_probs = tf.reshape(probs, gt_shape) - - # Add a loss op - m.readout_maps_loss_op = tf.losses.sigmoid_cross_entropy( - tf.reshape(m.readout_maps_gt, [-1, len(task_params.readout_maps_crop_sizes)]), - tf.reshape(readout_maps, [-1, len(task_params.readout_maps_crop_sizes)]), - scope='loss') - m.readout_maps_loss_op = 10.*m.readout_maps_loss_op - - ewma_decay = 0.99 if is_training else 0.0 - weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32, - name='weight') - m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \ - compute_losses_multi_or(m.action_logits_op, - m.input_tensors['train']['action'], weights=weight, - num_actions=task_params.num_actions, - data_loss_wt=args.solver.data_loss_wt, - reg_loss_wt=args.solver.reg_loss_wt, - ewma_decay=ewma_decay) - - if args.arch.readout_maps: - m.total_loss_op = m.total_loss_op + m.readout_maps_loss_op - m.loss_ops += [m.readout_maps_loss_op] - m.loss_ops_names += ['readout_maps_loss'] - - m.loss_ops += [m.reg_loss_op, m.data_loss_op, m.total_loss_op] - m.loss_ops_names += ['reg_loss', 'data_loss', 'total_loss'] - - if args.solver.freeze_conv: - vars_to_optimize = list(set(tf.trainable_variables()) - - set(m.vision_ops.vars_to_restore)) - else: - vars_to_optimize = None - - m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \ - m.sync_optimizer = tf_utils.setup_training( - m.total_loss_op, - args.solver.initial_learning_rate, - args.solver.steps_per_decay, - args.solver.learning_rate_decay, - args.solver.momentum, - args.solver.max_steps, - args.solver.sync, - args.solver.adjust_lr_sync, - args.solver.num_workers, - args.solver.task, - vars_to_optimize=vars_to_optimize, - clip_gradient_norm=args.solver.clip_gradient_norm, - typ=args.solver.typ, momentum2=args.solver.momentum2, - adam_eps=args.solver.adam_eps) - - if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay': - m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k, - m.global_step_op) - elif args.arch.sample_gt_prob_type == 'zero': - m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32) - - elif args.arch.sample_gt_prob_type.split('_')[0] == 'step': - step = int(args.arch.sample_gt_prob_type.split('_')[1]) - m.sample_gt_prob_op = tf_utils.step_gt_prob( - step, m.input_tensors['step']['step_number'][0,0,0]) - - m.sample_action_type = args.arch.action_sample_type - m.sample_action_combine_type = args.arch.action_sample_combine_type - - m.summary_ops = { - summary_mode: _add_summaries(m, args, summary_mode, - args.summary.arop_full_summary_iters)} - - m.init_op = tf.group(tf.global_variables_initializer(), - tf.local_variables_initializer()) - m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4, - write_version=tf.train.SaverDef.V2) - return m diff --git a/research/cognitive_mapping_and_planning/tfcode/cmp_summary.py b/research/cognitive_mapping_and_planning/tfcode/cmp_summary.py deleted file mode 100644 index 55313bfbd52a9e079e1de5093ae1882a9bf1d858..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/tfcode/cmp_summary.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Code for setting up summaries for CMP. -""" - -import sys, os, numpy as np -import matplotlib.pyplot as plt - - -import tensorflow as tf - -from tensorflow.contrib import slim -from tensorflow.contrib.slim import arg_scope - -import logging -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -from src import utils -import src.file_utils as fu -import tfcode.nav_utils as nu - -def _vis_readout_maps(outputs, global_step, output_dir, metric_summary, N): - # outputs is [gt_map, pred_map]: - if N >= 0: - outputs = outputs[:N] - N = len(outputs) - - plt.set_cmap('jet') - fig, axes = utils.subplot(plt, (N, outputs[0][0].shape[4]*2), (5,5)) - axes = axes.ravel()[::-1].tolist() - for i in range(N): - gt_map, pred_map = outputs[i] - for j in [0]: - for k in range(gt_map.shape[4]): - # Display something like the midpoint of the trajectory. - id = np.int(gt_map.shape[1]/2) - - ax = axes.pop(); - ax.imshow(gt_map[j,id,:,:,k], origin='lower', interpolation='none', - vmin=0., vmax=1.) - ax.set_axis_off(); - if i == 0: ax.set_title('gt_map') - - ax = axes.pop(); - ax.imshow(pred_map[j,id,:,:,k], origin='lower', interpolation='none', - vmin=0., vmax=1.) - ax.set_axis_off(); - if i == 0: ax.set_title('pred_map') - - file_name = os.path.join(output_dir, 'readout_map_{:d}.png'.format(global_step)) - with fu.fopen(file_name, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - plt.close(fig) - -def _vis(outputs, global_step, output_dir, metric_summary, N): - # Plot the value map, goal for various maps to see what if the model is - # learning anything useful. - # - # outputs is [values, goals, maps, occupancy, conf]. - # - if N >= 0: - outputs = outputs[:N] - N = len(outputs) - - plt.set_cmap('jet') - fig, axes = utils.subplot(plt, (N, outputs[0][0].shape[4]*5), (5,5)) - axes = axes.ravel()[::-1].tolist() - for i in range(N): - values, goals, maps, occupancy, conf = outputs[i] - for j in [0]: - for k in range(values.shape[4]): - # Display something like the midpoint of the trajectory. - id = np.int(values.shape[1]/2) - - ax = axes.pop(); - ax.imshow(goals[j,id,:,:,k], origin='lower', interpolation='none') - ax.set_axis_off(); - if i == 0: ax.set_title('goal') - - ax = axes.pop(); - ax.imshow(occupancy[j,id,:,:,k], origin='lower', interpolation='none') - ax.set_axis_off(); - if i == 0: ax.set_title('occupancy') - - ax = axes.pop(); - ax.imshow(conf[j,id,:,:,k], origin='lower', interpolation='none', - vmin=0., vmax=1.) - ax.set_axis_off(); - if i == 0: ax.set_title('conf') - - ax = axes.pop(); - ax.imshow(values[j,id,:,:,k], origin='lower', interpolation='none') - ax.set_axis_off(); - if i == 0: ax.set_title('value') - - ax = axes.pop(); - ax.imshow(maps[j,id,:,:,k], origin='lower', interpolation='none') - ax.set_axis_off(); - if i == 0: ax.set_title('incr map') - - file_name = os.path.join(output_dir, 'value_vis_{:d}.png'.format(global_step)) - with fu.fopen(file_name, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - plt.close(fig) - -def _summary_vis(m, batch_size, num_steps, arop_full_summary_iters): - arop = []; arop_summary_iters = []; arop_eval_fns = []; - vis_value_ops = []; vis_goal_ops = []; vis_map_ops = []; - vis_occupancy_ops = []; vis_conf_ops = []; - for i, val_op in enumerate(m.value_ops): - vis_value_op = tf.reduce_mean(tf.abs(val_op), axis=3, keep_dims=True) - vis_value_ops.append(vis_value_op) - - vis_occupancy_op = tf.reduce_mean(tf.abs(m.occupancys[i]), 3, True) - vis_occupancy_ops.append(vis_occupancy_op) - - vis_conf_op = tf.reduce_max(tf.abs(m.confs[i]), axis=3, keep_dims=True) - vis_conf_ops.append(vis_conf_op) - - ego_goal_imgs_i_op = m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)] - vis_goal_op = tf.reduce_max(ego_goal_imgs_i_op, 4, True) - vis_goal_ops.append(vis_goal_op) - - vis_map_op = tf.reduce_mean(tf.abs(m.ego_map_ops[i]), 4, True) - vis_map_ops.append(vis_map_op) - - vis_goal_ops = tf.concat(vis_goal_ops, 4) - vis_map_ops = tf.concat(vis_map_ops, 4) - vis_value_ops = tf.concat(vis_value_ops, 3) - vis_occupancy_ops = tf.concat(vis_occupancy_ops, 3) - vis_conf_ops = tf.concat(vis_conf_ops, 3) - - sh = tf.unstack(tf.shape(vis_value_ops))[1:] - vis_value_ops = tf.reshape(vis_value_ops, shape=[batch_size, -1] + sh) - - sh = tf.unstack(tf.shape(vis_conf_ops))[1:] - vis_conf_ops = tf.reshape(vis_conf_ops, shape=[batch_size, -1] + sh) - - sh = tf.unstack(tf.shape(vis_occupancy_ops))[1:] - vis_occupancy_ops = tf.reshape(vis_occupancy_ops, shape=[batch_size,-1] + sh) - - # Save memory, only return time steps that need to be visualized, factor of - # 32 CPU memory saving. - id = np.int(num_steps/2) - vis_goal_ops = tf.expand_dims(vis_goal_ops[:,id,:,:,:], axis=1) - vis_map_ops = tf.expand_dims(vis_map_ops[:,id,:,:,:], axis=1) - vis_value_ops = tf.expand_dims(vis_value_ops[:,id,:,:,:], axis=1) - vis_conf_ops = tf.expand_dims(vis_conf_ops[:,id,:,:,:], axis=1) - vis_occupancy_ops = tf.expand_dims(vis_occupancy_ops[:,id,:,:,:], axis=1) - - arop += [[vis_value_ops, vis_goal_ops, vis_map_ops, vis_occupancy_ops, - vis_conf_ops]] - arop_summary_iters += [arop_full_summary_iters] - arop_eval_fns += [_vis] - return arop, arop_summary_iters, arop_eval_fns - -def _summary_readout_maps(m, num_steps, arop_full_summary_iters): - arop = []; arop_summary_iters = []; arop_eval_fns = []; - id = np.int(num_steps-1) - vis_readout_maps_gt = m.readout_maps_gt - vis_readout_maps_prob = tf.reshape(m.readout_maps_probs, - shape=tf.shape(vis_readout_maps_gt)) - vis_readout_maps_gt = tf.expand_dims(vis_readout_maps_gt[:,id,:,:,:], 1) - vis_readout_maps_prob = tf.expand_dims(vis_readout_maps_prob[:,id,:,:,:], 1) - arop += [[vis_readout_maps_gt, vis_readout_maps_prob]] - arop_summary_iters += [arop_full_summary_iters] - arop_eval_fns += [_vis_readout_maps] - return arop, arop_summary_iters, arop_eval_fns - -def _add_summaries(m, args, summary_mode, arop_full_summary_iters): - task_params = args.navtask.task_params - - summarize_ops = [m.lr_op, m.global_step_op, m.sample_gt_prob_op] + \ - m.loss_ops + m.acc_ops - summarize_names = ['lr', 'global_step', 'sample_gt_prob_op'] + \ - m.loss_ops_names + ['acc_{:d}'.format(i) for i in range(len(m.acc_ops))] - to_aggregate = [0, 0, 0] + [1]*len(m.loss_ops_names) + [1]*len(m.acc_ops) - - scope_name = 'summary' - with tf.name_scope(scope_name): - s_ops = nu.add_default_summaries(summary_mode, arop_full_summary_iters, - summarize_ops, summarize_names, - to_aggregate, m.action_prob_op, - m.input_tensors, scope_name=scope_name) - if summary_mode == 'val': - arop, arop_summary_iters, arop_eval_fns = _summary_vis( - m, task_params.batch_size, task_params.num_steps, - arop_full_summary_iters) - s_ops.additional_return_ops += arop - s_ops.arop_summary_iters += arop_summary_iters - s_ops.arop_eval_fns += arop_eval_fns - - if args.arch.readout_maps: - arop, arop_summary_iters, arop_eval_fns = _summary_readout_maps( - m, task_params.num_steps, arop_full_summary_iters) - s_ops.additional_return_ops += arop - s_ops.arop_summary_iters += arop_summary_iters - s_ops.arop_eval_fns += arop_eval_fns - - return s_ops diff --git a/research/cognitive_mapping_and_planning/tfcode/cmp_utils.py b/research/cognitive_mapping_and_planning/tfcode/cmp_utils.py deleted file mode 100644 index 6d87c697b4b29128c8b8a42caac27aeb4d657ec6..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/tfcode/cmp_utils.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for setting up the CMP graph. -""" - -import os, numpy as np -import matplotlib.pyplot as plt - - -import tensorflow as tf - -from tensorflow.contrib import slim -from tensorflow.contrib.slim import arg_scope -import logging -from src import utils -import src.file_utils as fu -from tfcode import tf_utils - -resnet_v2 = tf_utils.resnet_v2 -custom_residual_block = tf_utils.custom_residual_block - -def value_iteration_network( - fr, num_iters, val_neurons, action_neurons, kernel_size, share_wts=False, - name='vin', wt_decay=0.0001, activation_fn=None, shape_aware=False): - """ - Constructs a Value Iteration Network, convolutions and max pooling across - channels. - Input: - fr: NxWxHxC - val_neurons: Number of channels for maintaining the value. - action_neurons: Computes action_neurons * val_neurons at each iteration to - max pool over. - Output: - value image: NxHxWx(val_neurons) - """ - init_var = np.sqrt(2.0/(kernel_size**2)/(val_neurons*action_neurons)) - vals = [] - with tf.variable_scope(name) as varscope: - if shape_aware == False: - fr_shape = tf.unstack(tf.shape(fr)) - val_shape = tf.stack(fr_shape[:-1] + [val_neurons]) - val = tf.zeros(val_shape, name='val_init') - else: - val = tf.expand_dims(tf.zeros_like(fr[:,:,:,0]), dim=-1) * \ - tf.constant(0., dtype=tf.float32, shape=[1,1,1,val_neurons]) - val_shape = tf.shape(val) - vals.append(val) - for i in range(num_iters): - if share_wts: - # The first Value Iteration maybe special, so it can have its own - # paramterss. - scope = 'conv' - if i == 0: scope = 'conv_0' - if i > 1: varscope.reuse_variables() - else: - scope = 'conv_{:d}'.format(i) - val = slim.conv2d(tf.concat([val, fr], 3, name='concat_{:d}'.format(i)), - num_outputs=action_neurons*val_neurons, - kernel_size=kernel_size, stride=1, activation_fn=activation_fn, - scope=scope, normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(wt_decay), - weights_initializer=tf.random_normal_initializer(stddev=init_var), - biases_initializer=tf.zeros_initializer()) - val = tf.reshape(val, [-1, action_neurons*val_neurons, 1, 1], - name='re_{:d}'.format(i)) - val = slim.max_pool2d(val, kernel_size=[action_neurons,1], - stride=[action_neurons,1], padding='VALID', - scope='val_{:d}'.format(i)) - val = tf.reshape(val, val_shape, name='unre_{:d}'.format(i)) - vals.append(val) - return val, vals - - -def rotate_preds(loc_on_map, relative_theta, map_size, preds, - output_valid_mask): - with tf.name_scope('rotate'): - flow_op = tf_utils.get_flow(loc_on_map, relative_theta, map_size=map_size) - if type(preds) != list: - rotated_preds, valid_mask_warps = tf_utils.dense_resample(preds, flow_op, - output_valid_mask) - else: - rotated_preds = [] ;valid_mask_warps = [] - for pred in preds: - rotated_pred, valid_mask_warp = tf_utils.dense_resample(pred, flow_op, - output_valid_mask) - rotated_preds.append(rotated_pred) - valid_mask_warps.append(valid_mask_warp) - return rotated_preds, valid_mask_warps - -def get_visual_frustum(map_size, shape_like, expand_dims=[0,0]): - with tf.name_scope('visual_frustum'): - l = np.tril(np.ones(map_size)) ;l = l + l[:,::-1] - l = (l == 2).astype(np.float32) - for e in expand_dims: - l = np.expand_dims(l, axis=e) - confs_probs = tf.constant(l, dtype=tf.float32) - confs_probs = tf.ones_like(shape_like, dtype=tf.float32) * confs_probs - return confs_probs - -def deconv(x, is_training, wt_decay, neurons, strides, layers_per_block, - kernel_size, conv_fn, name, offset=0): - """Generates a up sampling network with residual connections. - """ - batch_norm_param = {'center': True, 'scale': True, - 'activation_fn': tf.nn.relu, - 'is_training': is_training} - outs = [] - for i, (neuron, stride) in enumerate(zip(neurons, strides)): - for s in range(layers_per_block): - scope = '{:s}_{:d}_{:d}'.format(name, i+1+offset,s+1) - x = custom_residual_block(x, neuron, kernel_size, stride, scope, - is_training, wt_decay, use_residual=True, - residual_stride_conv=True, conv_fn=conv_fn, - batch_norm_param=batch_norm_param) - stride = 1 - outs.append((x,True)) - return x, outs - -def fr_v2(x, output_neurons, inside_neurons, is_training, name='fr', - wt_decay=0.0001, stride=1, updates_collections=tf.GraphKeys.UPDATE_OPS): - """Performs fusion of information between the map and the reward map. - Inputs - x: NxHxWxC1 - - Outputs - fr map: NxHxWx(output_neurons) - """ - if type(stride) != list: - stride = [stride] - with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope( - is_training=is_training, weight_decay=wt_decay)): - with slim.arg_scope([slim.batch_norm], updates_collections=updates_collections) as arg_sc: - # Change the updates_collections for the conv normalizer_params to None - for i in range(len(arg_sc.keys())): - if 'convolution' in arg_sc.keys()[i]: - arg_sc.values()[i]['normalizer_params']['updates_collections'] = updates_collections - with slim.arg_scope(arg_sc): - bottleneck = resnet_v2.bottleneck - blocks = [] - for i, s in enumerate(stride): - b = resnet_v2.resnet_utils.Block( - 'block{:d}'.format(i + 1), bottleneck, [{ - 'depth': output_neurons, - 'depth_bottleneck': inside_neurons, - 'stride': stride[i] - }]) - blocks.append(b) - x, outs = resnet_v2.resnet_v2(x, blocks, num_classes=None, global_pool=False, - output_stride=None, include_root_block=False, - reuse=False, scope=name) - return x, outs diff --git a/research/cognitive_mapping_and_planning/tfcode/nav_utils.py b/research/cognitive_mapping_and_planning/tfcode/nav_utils.py deleted file mode 100644 index 2f764f33df91a80f6539dcbae1e0fa7093becd29..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/tfcode/nav_utils.py +++ /dev/null @@ -1,435 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Various losses for training navigation agents. - -Defines various loss functions for navigation agents, -compute_losses_multi_or. -""" - -import os, numpy as np -import matplotlib.pyplot as plt - - -import tensorflow as tf - -from tensorflow.contrib import slim -from tensorflow.contrib.slim import arg_scope -from tensorflow.contrib.slim.nets import resnet_v2 -from tensorflow.python.training import moving_averages -import logging -from src import utils -import src.file_utils as fu -from tfcode import tf_utils - - -def compute_losses_multi_or(logits, actions_one_hot, weights=None, - num_actions=-1, data_loss_wt=1., reg_loss_wt=1., - ewma_decay=0.99, reg_loss_op=None): - assert(num_actions > 0), 'num_actions must be specified and must be > 0.' - - with tf.name_scope('loss'): - if weights is None: - weight = tf.ones_like(actions_one_hot, dtype=tf.float32, name='weight') - - actions_one_hot = tf.cast(tf.reshape(actions_one_hot, [-1, num_actions], - 're_actions_one_hot'), tf.float32) - weights = tf.reduce_sum(tf.reshape(weights, [-1, num_actions], 're_weight'), - reduction_indices=1) - total = tf.reduce_sum(weights) - - action_prob = tf.nn.softmax(logits) - action_prob = tf.reduce_sum(tf.multiply(action_prob, actions_one_hot), - reduction_indices=1) - example_loss = -tf.log(tf.maximum(tf.constant(1e-4), action_prob)) - - data_loss_op = tf.reduce_sum(example_loss * weights) / total - if reg_loss_op is None: - if reg_loss_wt > 0: - reg_loss_op = tf.add_n(tf.losses.get_regularization_losses()) - else: - reg_loss_op = tf.constant(0.) - - if reg_loss_wt > 0: - total_loss_op = data_loss_wt*data_loss_op + reg_loss_wt*reg_loss_op - else: - total_loss_op = data_loss_wt*data_loss_op - - is_correct = tf.cast(tf.greater(action_prob, 0.5, name='pred_class'), tf.float32) - acc_op = tf.reduce_sum(is_correct*weights) / total - - ewma_acc_op = moving_averages.weighted_moving_average( - acc_op, ewma_decay, weight=total, name='ewma_acc') - - acc_ops = [ewma_acc_op] - - return reg_loss_op, data_loss_op, total_loss_op, acc_ops - - -def get_repr_from_image(images_reshaped, modalities, data_augment, encoder, - freeze_conv, wt_decay, is_training): - # Pass image through lots of convolutional layers, to obtain pool5 - if modalities == ['rgb']: - with tf.name_scope('pre_rgb'): - x = (images_reshaped + 128.) / 255. # Convert to brightness between 0 and 1. - if data_augment.relight and is_training: - x = tf_utils.distort_image(x, fast_mode=data_augment.relight_fast) - x = (x-0.5)*2.0 - scope_name = encoder - elif modalities == ['depth']: - with tf.name_scope('pre_d'): - d_image = images_reshaped - x = 2*(d_image[...,0] - 80.0)/100.0 - y = d_image[...,1] - d_image = tf.concat([tf.expand_dims(x, -1), tf.expand_dims(y, -1)], 3) - x = d_image - scope_name = 'd_'+encoder - - resnet_is_training = is_training and (not freeze_conv) - with slim.arg_scope(resnet_v2.resnet_utils.resnet_arg_scope(resnet_is_training)): - fn = getattr(tf_utils, encoder) - x, end_points = fn(x, num_classes=None, global_pool=False, - output_stride=None, reuse=None, - scope=scope_name) - vars_ = slim.get_variables_to_restore() - - conv_feat = x - return conv_feat, vars_ - -def default_train_step_kwargs(m, obj, logdir, rng_seed, is_chief, num_steps, - iters, train_display_interval, - dagger_sample_bn_false): - train_step_kwargs = {} - train_step_kwargs['obj'] = obj - train_step_kwargs['m'] = m - - # rng_data has 2 independent rngs, one for sampling episodes and one for - # sampling perturbs (so that we can make results reproducible. - train_step_kwargs['rng_data'] = [np.random.RandomState(rng_seed), - np.random.RandomState(rng_seed)] - train_step_kwargs['rng_action'] = np.random.RandomState(rng_seed) - if is_chief: - train_step_kwargs['writer'] = tf.summary.FileWriter(logdir) #, m.tf_graph) - else: - train_step_kwargs['writer'] = None - train_step_kwargs['iters'] = iters - train_step_kwargs['train_display_interval'] = train_display_interval - train_step_kwargs['num_steps'] = num_steps - train_step_kwargs['logdir'] = logdir - train_step_kwargs['dagger_sample_bn_false'] = dagger_sample_bn_false - return train_step_kwargs - -# Utilities for visualizing and analysing validation output. -def save_d_at_t(outputs, global_step, output_dir, metric_summary, N): - """Save distance to goal at all time steps. - - Args: - outputs : [gt_dist_to_goal]. - global_step : number of iterations. - output_dir : output directory. - metric_summary : to append scalars to summary. - N : number of outputs to process. - - """ - d_at_t = np.concatenate(map(lambda x: x[0][:,:,0]*1, outputs), axis=0) - fig, axes = utils.subplot(plt, (1,1), (5,5)) - axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.') - axes.set_xlabel('time step') - axes.set_ylabel('dist to next goal') - axes.grid('on') - file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step)) - with fu.fopen(file_name, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step)) - utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True) - plt.close(fig) - return None - -def save_all(outputs, global_step, output_dir, metric_summary, N): - """Save numerous statistics. - - Args: - outputs : [locs, goal_loc, gt_dist_to_goal, node_ids, perturbs] - global_step : number of iterations. - output_dir : output directory. - metric_summary : to append scalars to summary. - N : number of outputs to process. - """ - all_locs = np.concatenate(map(lambda x: x[0], outputs), axis=0) - all_goal_locs = np.concatenate(map(lambda x: x[1], outputs), axis=0) - all_d_at_t = np.concatenate(map(lambda x: x[2][:,:,0]*1, outputs), axis=0) - all_node_ids = np.concatenate(map(lambda x: x[3], outputs), axis=0) - all_perturbs = np.concatenate(map(lambda x: x[4], outputs), axis=0) - - file_name = os.path.join(output_dir, 'all_locs_at_t_{:d}.pkl'.format(global_step)) - vars = [all_locs, all_goal_locs, all_d_at_t, all_node_ids, all_perturbs] - var_names = ['all_locs', 'all_goal_locs', 'all_d_at_t', 'all_node_ids', 'all_perturbs'] - utils.save_variables(file_name, vars, var_names, overwrite=True) - return None - -def eval_ap(outputs, global_step, output_dir, metric_summary, N, num_classes=4): - """Processes the collected outputs to compute AP for action prediction. - - Args: - outputs : [logits, labels] - global_step : global_step. - output_dir : where to store results. - metric_summary : summary object to add summaries to. - N : number of outputs to process. - num_classes : number of classes to compute AP over, and to reshape tensors. - """ - if N >= 0: - outputs = outputs[:N] - logits = np.concatenate(map(lambda x: x[0], outputs), axis=0).reshape((-1, num_classes)) - labels = np.concatenate(map(lambda x: x[1], outputs), axis=0).reshape((-1, num_classes)) - aps = [] - for i in range(logits.shape[1]): - ap, rec, prec = utils.calc_pr(labels[:,i], logits[:,i]) - ap = ap[0] - tf_utils.add_value_to_summary(metric_summary, 'aps/ap_{:d}: '.format(i), ap) - aps.append(ap) - return aps - -def eval_dist(outputs, global_step, output_dir, metric_summary, N): - """Processes the collected outputs during validation to - 1. Plot the distance over time curve. - 2. Compute mean and median distances. - 3. Plots histogram of end distances. - - Args: - outputs : [locs, goal_loc, gt_dist_to_goal]. - global_step : global_step. - output_dir : where to store results. - metric_summary : summary object to add summaries to. - N : number of outputs to process. - """ - SUCCESS_THRESH = 3 - if N >= 0: - outputs = outputs[:N] - - # Plot distance at time t. - d_at_t = [] - for i in range(len(outputs)): - locs, goal_loc, gt_dist_to_goal = outputs[i] - d_at_t.append(gt_dist_to_goal[:,:,0]*1) - - # Plot the distance. - fig, axes = utils.subplot(plt, (1,1), (5,5)) - d_at_t = np.concatenate(d_at_t, axis=0) - axes.plot(np.arange(d_at_t.shape[1]), np.mean(d_at_t, axis=0), 'r.') - axes.set_xlabel('time step') - axes.set_ylabel('dist to next goal') - axes.grid('on') - file_name = os.path.join(output_dir, 'dist_at_t_{:d}.png'.format(global_step)) - with fu.fopen(file_name, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - file_name = os.path.join(output_dir, 'dist_at_t_{:d}.pkl'.format(global_step)) - utils.save_variables(file_name, [d_at_t], ['d_at_t'], overwrite=True) - plt.close(fig) - - # Plot the trajectories and the init_distance and final distance. - d_inits = [] - d_ends = [] - for i in range(len(outputs)): - locs, goal_loc, gt_dist_to_goal = outputs[i] - d_inits.append(gt_dist_to_goal[:,0,0]*1) - d_ends.append(gt_dist_to_goal[:,-1,0]*1) - - # Plot the distance. - fig, axes = utils.subplot(plt, (1,1), (5,5)) - d_inits = np.concatenate(d_inits, axis=0) - d_ends = np.concatenate(d_ends, axis=0) - axes.plot(d_inits+np.random.rand(*(d_inits.shape))-0.5, - d_ends+np.random.rand(*(d_ends.shape))-0.5, '.', mec='red', mew=1.0) - axes.set_xlabel('init dist'); axes.set_ylabel('final dist'); - axes.grid('on'); axes.axis('equal'); - title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}' - title_str = title_str.format( - np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75), - 100*(np.mean(d_ends <= SUCCESS_THRESH))) - axes.set_title(title_str) - file_name = os.path.join(output_dir, 'dist_{:d}.png'.format(global_step)) - with fu.fopen(file_name, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - - file_name = os.path.join(output_dir, 'dist_{:d}.pkl'.format(global_step)) - utils.save_variables(file_name, [d_inits, d_ends], ['d_inits', 'd_ends'], - overwrite=True) - plt.close(fig) - - # Plot the histogram of the end_distance. - with plt.style.context('seaborn-white'): - d_ends_ = np.sort(d_ends) - d_inits_ = np.sort(d_inits) - leg = []; - fig, ax = utils.subplot(plt, (1,1), (5,5)) - ax.grid('on') - ax.set_xlabel('Distance from goal'); ax.xaxis.label.set_fontsize(16); - ax.set_ylabel('Fraction of data'); ax.yaxis.label.set_fontsize(16); - ax.plot(d_ends_, np.arange(d_ends_.size)*1./d_ends_.size, 'r') - ax.plot(d_inits_, np.arange(d_inits_.size)*1./d_inits_.size, 'k') - leg.append('Final'); leg.append('Init'); - ax.legend(leg, fontsize='x-large'); - ax.set_axis_on() - title_str = 'mean: {:0.1f}, 50: {:0.1f}, 75: {:0.2f}, s: {:0.1f}' - title_str = title_str.format( - np.mean(d_ends), np.median(d_ends), np.percentile(d_ends, q=75), - 100*(np.mean(d_ends <= SUCCESS_THRESH))) - ax.set_title(title_str) - file_name = os.path.join(output_dir, 'dist_hist_{:d}.png'.format(global_step)) - with fu.fopen(file_name, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - - # Log distance metrics. - tf_utils.add_value_to_summary(metric_summary, 'dists/success_init: ', - 100*(np.mean(d_inits <= SUCCESS_THRESH))) - tf_utils.add_value_to_summary(metric_summary, 'dists/success_end: ', - 100*(np.mean(d_ends <= SUCCESS_THRESH))) - tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (75): ', - np.percentile(d_inits, q=75)) - tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (75): ', - np.percentile(d_ends, q=75)) - tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (median): ', - np.median(d_inits)) - tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (median): ', - np.median(d_ends)) - tf_utils.add_value_to_summary(metric_summary, 'dists/dist_init (mean): ', - np.mean(d_inits)) - tf_utils.add_value_to_summary(metric_summary, 'dists/dist_end (mean): ', - np.mean(d_ends)) - return np.median(d_inits), np.median(d_ends), np.mean(d_inits), np.mean(d_ends), \ - np.percentile(d_inits, q=75), np.percentile(d_ends, q=75), \ - 100*(np.mean(d_inits) <= SUCCESS_THRESH), 100*(np.mean(d_ends) <= SUCCESS_THRESH) - -def plot_trajectories(outputs, global_step, output_dir, metric_summary, N): - """Processes the collected outputs during validation to plot the trajectories - in the top view. - - Args: - outputs : [locs, orig_maps, goal_loc]. - global_step : global_step. - output_dir : where to store results. - metric_summary : summary object to add summaries to. - N : number of outputs to process. - """ - if N >= 0: - outputs = outputs[:N] - N = len(outputs) - - plt.set_cmap('gray') - fig, axes = utils.subplot(plt, (N, outputs[0][1].shape[0]), (5,5)) - axes = axes.ravel()[::-1].tolist() - for i in range(N): - locs, orig_maps, goal_loc = outputs[i] - is_semantic = np.isnan(goal_loc[0,0,1]) - for j in range(orig_maps.shape[0]): - ax = axes.pop(); - ax.plot(locs[j,0,0], locs[j,0,1], 'ys') - # Plot one by one, so that they come in different colors. - for k in range(goal_loc.shape[1]): - if not is_semantic: - ax.plot(goal_loc[j,k,0], goal_loc[j,k,1], 's') - if False: - ax.plot(locs[j,:,0], locs[j,:,1], 'r.', ms=3) - ax.imshow(orig_maps[j,0,:,:,0], origin='lower') - ax.set_axis_off(); - else: - ax.scatter(locs[j,:,0], locs[j,:,1], c=np.arange(locs.shape[1]), - cmap='jet', s=10, lw=0) - ax.imshow(orig_maps[j,0,:,:,0], origin='lower', vmin=-1.0, vmax=2.0) - if not is_semantic: - xymin = np.minimum(np.min(goal_loc[j,:,:], axis=0), np.min(locs[j,:,:], axis=0)) - xymax = np.maximum(np.max(goal_loc[j,:,:], axis=0), np.max(locs[j,:,:], axis=0)) - else: - xymin = np.min(locs[j,:,:], axis=0) - xymax = np.max(locs[j,:,:], axis=0) - xy1 = (xymax+xymin)/2. - np.maximum(np.max(xymax-xymin), 12) - xy2 = (xymax+xymin)/2. + np.maximum(np.max(xymax-xymin), 12) - ax.set_xlim([xy1[0], xy2[0]]) - ax.set_ylim([xy1[1], xy2[1]]) - ax.set_axis_off() - file_name = os.path.join(output_dir, 'trajectory_{:d}.png'.format(global_step)) - with fu.fopen(file_name, 'w') as f: - fig.savefig(f, bbox_inches='tight', transparent=True, pad_inches=0) - plt.close(fig) - return None - -def add_default_summaries(mode, arop_full_summary_iters, summarize_ops, - summarize_names, to_aggregate, action_prob_op, - input_tensors, scope_name): - assert(mode == 'train' or mode == 'val' or mode == 'test'), \ - 'add_default_summaries mode is neither train or val or test.' - - s_ops = tf_utils.get_default_summary_ops() - - if mode == 'train': - s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \ - arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries( - summarize_ops, summarize_names, mode, to_aggregate=False, - scope_name=scope_name) - s_ops.additional_return_ops += additional_return_ops - s_ops.arop_summary_iters += arop_summary_iters - s_ops.arop_eval_fns += arop_eval_fns - elif mode == 'val': - s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \ - arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries( - summarize_ops, summarize_names, mode, to_aggregate=to_aggregate, - scope_name=scope_name) - s_ops.additional_return_ops += additional_return_ops - s_ops.arop_summary_iters += arop_summary_iters - s_ops.arop_eval_fns += arop_eval_fns - - elif mode == 'test': - s_ops.summary_ops, s_ops.print_summary_ops, additional_return_ops, \ - arop_summary_iters, arop_eval_fns = tf_utils.simple_summaries( - [], [], mode, to_aggregate=[], scope_name=scope_name) - s_ops.additional_return_ops += additional_return_ops - s_ops.arop_summary_iters += arop_summary_iters - s_ops.arop_eval_fns += arop_eval_fns - - - if mode == 'val': - arop = s_ops.additional_return_ops - arop += [[action_prob_op, input_tensors['train']['action']]] - arop += [[input_tensors['step']['loc_on_map'], - input_tensors['common']['goal_loc'], - input_tensors['step']['gt_dist_to_goal']]] - arop += [[input_tensors['step']['loc_on_map'], - input_tensors['common']['orig_maps'], - input_tensors['common']['goal_loc']]] - s_ops.arop_summary_iters += [-1, arop_full_summary_iters, - arop_full_summary_iters] - s_ops.arop_eval_fns += [eval_ap, eval_dist, plot_trajectories] - - elif mode == 'test': - arop = s_ops.additional_return_ops - arop += [[input_tensors['step']['loc_on_map'], - input_tensors['common']['goal_loc'], - input_tensors['step']['gt_dist_to_goal']]] - arop += [[input_tensors['step']['gt_dist_to_goal']]] - arop += [[input_tensors['step']['loc_on_map'], - input_tensors['common']['goal_loc'], - input_tensors['step']['gt_dist_to_goal'], - input_tensors['step']['node_ids'], - input_tensors['step']['perturbs']]] - arop += [[input_tensors['step']['loc_on_map'], - input_tensors['common']['orig_maps'], - input_tensors['common']['goal_loc']]] - s_ops.arop_summary_iters += [-1, -1, -1, arop_full_summary_iters] - s_ops.arop_eval_fns += [eval_dist, save_d_at_t, save_all, - plot_trajectories] - return s_ops - - diff --git a/research/cognitive_mapping_and_planning/tfcode/tf_utils.py b/research/cognitive_mapping_and_planning/tfcode/tf_utils.py deleted file mode 100644 index 5f96d8ff5ce7473f0ec49096abcbac274e6c4fcc..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/tfcode/tf_utils.py +++ /dev/null @@ -1,840 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import sys -import tensorflow as tf -import src.utils as utils -import logging -from tensorflow.contrib import slim -from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops -from tensorflow.contrib.slim import arg_scope -from tensorflow.contrib.slim.nets import resnet_v2 -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import variable_scope -sys.path.insert(0, '../slim') -from preprocessing import inception_preprocessing as ip - -resnet_v2_50 = resnet_v2.resnet_v2_50 - - -def custom_residual_block(x, neurons, kernel_size, stride, name, is_training, - wt_decay=0.0001, use_residual=True, - residual_stride_conv=True, conv_fn=slim.conv2d, - batch_norm_param=None): - - # batch norm x and relu - init_var = np.sqrt(2.0/(kernel_size**2)/neurons) - with arg_scope([conv_fn], - weights_regularizer=slim.l2_regularizer(wt_decay), - weights_initializer=tf.random_normal_initializer(stddev=init_var), - biases_initializer=tf.zeros_initializer()): - - if batch_norm_param is None: - batch_norm_param = {'center': True, 'scale': False, - 'activation_fn':tf.nn.relu, - 'is_training': is_training} - - y = slim.batch_norm(x, scope=name+'_bn', **batch_norm_param) - - y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size, stride=stride, - activation_fn=None, scope=name+'_1', - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_param) - - y = conv_fn(y, num_outputs=neurons, kernel_size=kernel_size, - stride=1, activation_fn=None, scope=name+'_2') - - if use_residual: - if stride != 1 or x.get_shape().as_list()[-1] != neurons: - batch_norm_param_ = dict(batch_norm_param) - batch_norm_param_['activation_fn'] = None - x = conv_fn(x, num_outputs=neurons, kernel_size=1, - stride=stride if residual_stride_conv else 1, - activation_fn=None, scope=name+'_0_1x1', - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_param_) - if not residual_stride_conv: - x = slim.avg_pool2d(x, 1, stride=stride, scope=name+'_0_avg') - - y = tf.add(x, y, name=name+'_add') - - return y - -def step_gt_prob(step, step_number_op): - # Change samping probability from 1 to -1 at step steps. - with tf.name_scope('step_gt_prob'): - out = tf.cond(tf.less(step_number_op, step), - lambda: tf.constant(1.), lambda: tf.constant(-1.)) - return out - -def inverse_sigmoid_decay(k, global_step_op): - with tf.name_scope('inverse_sigmoid_decay'): - k = tf.constant(k, dtype=tf.float32) - tmp = k*tf.exp(-tf.cast(global_step_op, tf.float32)/k) - tmp = tmp / (1. + tmp) - return tmp - -def dense_resample(im, flow_im, output_valid_mask, name='dense_resample'): - """ Resample reward at particular locations. - Args: - im: ...xHxWxC matrix to sample from. - flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given - by the flow_im. - """ - with tf.name_scope(name): - valid_mask = None - - x, y = tf.unstack(flow_im, axis=-1) - x = tf.cast(tf.reshape(x, [-1]), tf.float32) - y = tf.cast(tf.reshape(y, [-1]), tf.float32) - - # constants - shape = tf.unstack(tf.shape(im)) - channels = shape[-1] - width = shape[-2] - height = shape[-3] - num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32') - zero = tf.constant(0, dtype=tf.int32) - - # Round up and down. - x0 = tf.cast(tf.floor(x), 'int32'); x1 = x0 + 1; - y0 = tf.cast(tf.floor(y), 'int32'); y1 = y0 + 1; - - if output_valid_mask: - valid_mask = tf.logical_and( - tf.logical_and(tf.less_equal(x, tf.cast(width, tf.float32)-1.), tf.greater_equal(x, 0.)), - tf.logical_and(tf.less_equal(y, tf.cast(height, tf.float32)-1.), tf.greater_equal(y, 0.))) - valid_mask = tf.reshape(valid_mask, shape=shape[:-1] + [1]) - - x0 = tf.clip_by_value(x0, zero, width-1) - x1 = tf.clip_by_value(x1, zero, width-1) - y0 = tf.clip_by_value(y0, zero, height-1) - y1 = tf.clip_by_value(y1, zero, height-1) - - dim2 = width; dim1 = width * height; - - # Create base index - base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1,1]) - base = tf.reshape(tf.tile(base, [1, height*width]), shape=[-1]) - - base_y0 = base + y0 * dim2 - base_y1 = base + y1 * dim2 - idx_a = base_y0 + x0 - idx_b = base_y1 + x0 - idx_c = base_y0 + x1 - idx_d = base_y1 + x1 - - # use indices to lookup pixels in the flat image and restore channels dim - sh = tf.stack([tf.constant(-1,dtype=tf.int32), channels]) - im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32) - pixel_a = tf.gather(im_flat, idx_a) - pixel_b = tf.gather(im_flat, idx_b) - pixel_c = tf.gather(im_flat, idx_c) - pixel_d = tf.gather(im_flat, idx_d) - - # and finally calculate interpolated values - x1_f = tf.to_float(x1) - y1_f = tf.to_float(y1) - - wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) - wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1) - wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1) - wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1) - - output = tf.add_n([wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d]) - output = tf.reshape(output, shape=tf.shape(im)) - return output, valid_mask - -def get_flow(t, theta, map_size, name_scope='gen_flow'): - """ - Rotates the map by theta and translates the rotated map by t. - - Assume that the robot rotates by an angle theta and then moves forward by - translation t. This function returns the flow field field. For every pixel in - the new image it tells us which pixel in the original image it came from: - NewI(x, y) = OldI(flow_x(x,y), flow_y(x,y)). - - Assume there is a point p in the original image. Robot rotates by R and moves - forward by t. p1 = Rt*p; p2 = p1 - t; (the world moves in opposite direction. - So, p2 = Rt*p - t, thus p2 came from R*(p2+t), which is what this function - calculates. - - t: ... x 2 (translation for B batches of N motions each). - theta: ... x 1 (rotation for B batches of N motions each). - - Output: ... x map_size x map_size x 2 - """ - - with tf.name_scope(name_scope): - tx, ty = tf.unstack(tf.reshape(t, shape=[-1, 1, 1, 1, 2]), axis=4) - theta = tf.reshape(theta, shape=[-1, 1, 1, 1]) - c = tf.constant((map_size-1.)/2., dtype=tf.float32) - - x, y = np.meshgrid(np.arange(map_size), np.arange(map_size)) - x = tf.constant(x[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='x', - shape=[1, map_size, map_size, 1]) - y = tf.constant(y[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='y', - shape=[1,map_size, map_size, 1]) - - x = x-(-tx+c) - y = y-(-ty+c) - - sin_theta = tf.sin(theta) - cos_theta = tf.cos(theta) - xr = cos_theta*x - sin_theta*y - yr = sin_theta*x + cos_theta*y - - xr = xr + c - yr = yr + c - - flow = tf.stack([xr, yr], axis=-1) - sh = tf.unstack(tf.shape(t), axis=0) - sh = tf.stack(sh[:-1]+[tf.constant(_, dtype=tf.int32) for _ in [map_size, map_size, 2]]) - flow = tf.reshape(flow, shape=sh) - return flow - -def distort_image(im, fast_mode=False): - # All images in the same batch are transformed the same way, but over - # iterations you see different distortions. - # im should be float with values between 0 and 1. - im_ = tf.reshape(im, shape=(-1,1,3)) - im_ = ip.apply_with_random_selector( - im_, lambda x, ordering: ip.distort_color(x, ordering, fast_mode), - num_cases=4) - im_ = tf.reshape(im_, tf.shape(im)) - return im_ - -def fc_network(x, neurons, wt_decay, name, num_pred=None, offset=0, - batch_norm_param=None, dropout_ratio=0.0, is_training=None): - if dropout_ratio > 0: - assert(is_training is not None), \ - 'is_training needs to be defined when trainnig with dropout.' - - repr = [] - for i, neuron in enumerate(neurons): - init_var = np.sqrt(2.0/neuron) - if batch_norm_param is not None: - x = slim.fully_connected(x, neuron, activation_fn=None, - weights_initializer=tf.random_normal_initializer(stddev=init_var), - weights_regularizer=slim.l2_regularizer(wt_decay), - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_param, - biases_initializer=tf.zeros_initializer(), - scope='{:s}_{:d}'.format(name, offset+i)) - else: - x = slim.fully_connected(x, neuron, activation_fn=tf.nn.relu, - weights_initializer=tf.random_normal_initializer(stddev=init_var), - weights_regularizer=slim.l2_regularizer(wt_decay), - biases_initializer=tf.zeros_initializer(), - scope='{:s}_{:d}'.format(name, offset+i)) - if dropout_ratio > 0: - x = slim.dropout(x, keep_prob=1-dropout_ratio, is_training=is_training, - scope='{:s}_{:d}'.format('dropout_'+name, offset+i)) - repr.append(x) - - if num_pred is not None: - init_var = np.sqrt(2.0/num_pred) - x = slim.fully_connected(x, num_pred, - weights_regularizer=slim.l2_regularizer(wt_decay), - weights_initializer=tf.random_normal_initializer(stddev=init_var), - biases_initializer=tf.zeros_initializer(), - activation_fn=None, - scope='{:s}_pred'.format(name)) - return x, repr - -def concat_state_x_list(f, names): - af = {} - for i, k in enumerate(names): - af[k] = np.concatenate([x[i] for x in f], axis=1) - return af - -def concat_state_x(f, names): - af = {} - for k in names: - af[k] = np.concatenate([x[k] for x in f], axis=1) - # af[k] = np.swapaxes(af[k], 0, 1) - return af - -def sample_action(rng, action_probs, optimal_action, sample_gt_prob, - type='sample', combine_type='one_or_other'): - optimal_action_ = optimal_action/np.sum(optimal_action+0., 1, keepdims=True) - action_probs_ = action_probs/np.sum(action_probs+0.001, 1, keepdims=True) - batch_size = action_probs_.shape[0] - - action = np.zeros((batch_size), dtype=np.int32) - action_sample_wt = np.zeros((batch_size), dtype=np.float32) - if combine_type == 'add': - sample_gt_prob_ = np.minimum(np.maximum(sample_gt_prob, 0.), 1.) - - for i in range(batch_size): - if combine_type == 'one_or_other': - sample_gt = rng.rand() < sample_gt_prob - if sample_gt: distr_ = optimal_action_[i,:]*1. - else: distr_ = action_probs_[i,:]*1. - elif combine_type == 'add': - distr_ = optimal_action_[i,:]*sample_gt_prob_ + \ - (1.-sample_gt_prob_)*action_probs_[i,:] - distr_ = distr_ / np.sum(distr_) - - if type == 'sample': - action[i] = np.argmax(rng.multinomial(1, distr_, size=1)) - elif type == 'argmax': - action[i] = np.argmax(distr_) - action_sample_wt[i] = action_probs_[i, action[i]] / distr_[action[i]] - return action, action_sample_wt - -def train_step_custom_online_sampling(sess, train_op, global_step, - train_step_kwargs, mode='train'): - m = train_step_kwargs['m'] - obj = train_step_kwargs['obj'] - rng_data = train_step_kwargs['rng_data'] - rng_action = train_step_kwargs['rng_action'] - writer = train_step_kwargs['writer'] - iters = train_step_kwargs['iters'] - num_steps = train_step_kwargs['num_steps'] - logdir = train_step_kwargs['logdir'] - dagger_sample_bn_false = train_step_kwargs['dagger_sample_bn_false'] - train_display_interval = train_step_kwargs['train_display_interval'] - if 'outputs' not in m.train_ops: - m.train_ops['outputs'] = [] - - s_ops = m.summary_ops[mode] - val_additional_ops = [] - - # Print all variables here. - if False: - v = tf.get_collection(tf.GraphKeys.VARIABLES) - v_op = [_.value() for _ in v] - v_op_value = sess.run(v_op) - - filter = lambda x, y: 'Adam' in x.name - # filter = lambda x, y: np.is_any_nan(y) - ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)] - v = [v[i] for i in ind] - v_op_value = [v_op_value[i] for i in ind] - - for i in range(len(v)): - logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.', - v[i].name, np.any(np.isnan(v_op_value[i])), - np.linalg.norm(v_op_value[i])) - - tt = utils.Timer() - for i in range(iters): - tt.tic() - # Sample a room. - e = obj.sample_env(rng_data) - - # Initialize the agent. - init_env_state = e.reset(rng_data) - - # Get and process the common data. - input = e.get_common_data() - input = e.pre_common_data(input) - feed_dict = prepare_feed_dict(m.input_tensors['common'], input) - if dagger_sample_bn_false: - feed_dict[m.train_ops['batch_norm_is_training_op']] = False - common_data = sess.run(m.train_ops['common'], feed_dict=feed_dict) - - states = [] - state_features = [] - state_targets = [] - net_state_to_input = [] - step_data_cache = [] - executed_actions = [] - rewards = [] - action_sample_wts = [] - states.append(init_env_state) - - net_state = sess.run(m.train_ops['init_state'], feed_dict=feed_dict) - net_state = dict(zip(m.train_ops['state_names'], net_state)) - net_state_to_input.append(net_state) - for j in range(num_steps): - f = e.get_features(states[j], j) - f = e.pre_features(f) - f.update(net_state) - f['step_number'] = np.ones((1,1,1), dtype=np.int32)*j - state_features.append(f) - - feed_dict = prepare_feed_dict(m.input_tensors['step'], state_features[-1]) - optimal_action = e.get_optimal_action(states[j], j) - for x, v in zip(m.train_ops['common'], common_data): - feed_dict[x] = v - if dagger_sample_bn_false: - feed_dict[m.train_ops['batch_norm_is_training_op']] = False - outs = sess.run([m.train_ops['step'], m.sample_gt_prob_op, - m.train_ops['step_data_cache'], - m.train_ops['updated_state'], - m.train_ops['outputs']], feed_dict=feed_dict) - action_probs = outs[0] - sample_gt_prob = outs[1] - step_data_cache.append(dict(zip(m.train_ops['step_data_cache'], outs[2]))) - net_state = outs[3] - if hasattr(e, 'update_state'): - outputs = outs[4] - outputs = dict(zip(m.train_ops['output_names'], outputs)) - e.update_state(outputs, j) - state_targets.append(e.get_targets(states[j], j)) - - if j < num_steps-1: - # Sample from action_probs and optimal action. - action, action_sample_wt = sample_action( - rng_action, action_probs, optimal_action, sample_gt_prob, - m.sample_action_type, m.sample_action_combine_type) - next_state, reward = e.take_action(states[j], action, j) - executed_actions.append(action) - states.append(next_state) - rewards.append(reward) - action_sample_wts.append(action_sample_wt) - net_state = dict(zip(m.train_ops['state_names'], net_state)) - net_state_to_input.append(net_state) - - # Concatenate things together for training. - rewards = np.array(rewards).T - action_sample_wts = np.array(action_sample_wts).T - executed_actions = np.array(executed_actions).T - all_state_targets = concat_state_x(state_targets, e.get_targets_name()) - all_state_features = concat_state_x(state_features, - e.get_features_name()+['step_number']) - # all_state_net = concat_state_x(net_state_to_input, - # m.train_ops['state_names']) - all_step_data_cache = concat_state_x(step_data_cache, - m.train_ops['step_data_cache']) - - dict_train = dict(input) - dict_train.update(all_state_features) - dict_train.update(all_state_targets) - # dict_train.update(all_state_net) - dict_train.update(net_state_to_input[0]) - dict_train.update(all_step_data_cache) - dict_train.update({'rewards': rewards, - 'action_sample_wts': action_sample_wts, - 'executed_actions': executed_actions}) - feed_dict = prepare_feed_dict(m.input_tensors['train'], dict_train) - for x in m.train_ops['step_data_cache']: - feed_dict[x] = all_step_data_cache[x] - if mode == 'train': - n_step = sess.run(global_step) - - if np.mod(n_step, train_display_interval) == 0: - total_loss, np_global_step, summary, print_summary = sess.run( - [train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops], - feed_dict=feed_dict) - logging.error("") - else: - total_loss, np_global_step, summary = sess.run( - [train_op, global_step, s_ops.summary_ops], feed_dict=feed_dict) - - if writer is not None and summary is not None: - writer.add_summary(summary, np_global_step) - - should_stop = sess.run(m.should_stop_op) - - if mode != 'train': - arop = [[] for j in range(len(s_ops.additional_return_ops))] - for j in range(len(s_ops.additional_return_ops)): - if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]: - arop[j] = s_ops.additional_return_ops[j] - val = sess.run(arop, feed_dict=feed_dict) - val_additional_ops.append(val) - tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters), - type='time') - - if mode != 'train': - # Write the default val summaries. - summary, print_summary, np_global_step = sess.run( - [s_ops.summary_ops, s_ops.print_summary_ops, global_step]) - if writer is not None and summary is not None: - writer.add_summary(summary, np_global_step) - - # write custom validation ops - val_summarys = [] - val_additional_ops = zip(*val_additional_ops) - if len(s_ops.arop_eval_fns) > 0: - val_metric_summary = tf.summary.Summary() - for i in range(len(s_ops.arop_eval_fns)): - val_summary = None - if s_ops.arop_eval_fns[i] is not None: - val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i], - np_global_step, logdir, - val_metric_summary, - s_ops.arop_summary_iters[i]) - val_summarys.append(val_summary) - if writer is not None: - writer.add_summary(val_metric_summary, np_global_step) - - # Return the additional val_ops - total_loss = (val_additional_ops, val_summarys) - should_stop = None - - return total_loss, should_stop - -def train_step_custom_v2(sess, train_op, global_step, train_step_kwargs, - mode='train'): - m = train_step_kwargs['m'] - obj = train_step_kwargs['obj'] - rng = train_step_kwargs['rng'] - writer = train_step_kwargs['writer'] - iters = train_step_kwargs['iters'] - logdir = train_step_kwargs['logdir'] - train_display_interval = train_step_kwargs['train_display_interval'] - - s_ops = m.summary_ops[mode] - val_additional_ops = [] - - # Print all variables here. - if False: - v = tf.get_collection(tf.GraphKeys.VARIABLES) - v_op = [_.value() for _ in v] - v_op_value = sess.run(v_op) - - filter = lambda x, y: 'Adam' in x.name - # filter = lambda x, y: np.is_any_nan(y) - ind = [i for i, (_, __) in enumerate(zip(v, v_op_value)) if filter(_, __)] - v = [v[i] for i in ind] - v_op_value = [v_op_value[i] for i in ind] - - for i in range(len(v)): - logging.info('XXXX: variable: %30s, is_any_nan: %5s, norm: %f.', - v[i].name, np.any(np.isnan(v_op_value[i])), - np.linalg.norm(v_op_value[i])) - - tt = utils.Timer() - for i in range(iters): - tt.tic() - e = obj.sample_env(rng) - rngs = e.gen_rng(rng) - input_data = e.gen_data(*rngs) - input_data = e.pre_data(input_data) - feed_dict = prepare_feed_dict(m.input_tensors, input_data) - - if mode == 'train': - n_step = sess.run(global_step) - - if np.mod(n_step, train_display_interval) == 0: - total_loss, np_global_step, summary, print_summary = sess.run( - [train_op, global_step, s_ops.summary_ops, s_ops.print_summary_ops], - feed_dict=feed_dict) - else: - total_loss, np_global_step, summary = sess.run( - [train_op, global_step, s_ops.summary_ops], - feed_dict=feed_dict) - - if writer is not None and summary is not None: - writer.add_summary(summary, np_global_step) - - should_stop = sess.run(m.should_stop_op) - - if mode != 'train': - arop = [[] for j in range(len(s_ops.additional_return_ops))] - for j in range(len(s_ops.additional_return_ops)): - if s_ops.arop_summary_iters[j] < 0 or i < s_ops.arop_summary_iters[j]: - arop[j] = s_ops.additional_return_ops[j] - val = sess.run(arop, feed_dict=feed_dict) - val_additional_ops.append(val) - tt.toc(log_at=60, log_str='val timer {:d} / {:d}: '.format(i, iters), - type='time') - - if mode != 'train': - # Write the default val summaries. - summary, print_summary, np_global_step = sess.run( - [s_ops.summary_ops, s_ops.print_summary_ops, global_step]) - if writer is not None and summary is not None: - writer.add_summary(summary, np_global_step) - - # write custom validation ops - val_summarys = [] - val_additional_ops = zip(*val_additional_ops) - if len(s_ops.arop_eval_fns) > 0: - val_metric_summary = tf.summary.Summary() - for i in range(len(s_ops.arop_eval_fns)): - val_summary = None - if s_ops.arop_eval_fns[i] is not None: - val_summary = s_ops.arop_eval_fns[i](val_additional_ops[i], - np_global_step, logdir, - val_metric_summary, - s_ops.arop_summary_iters[i]) - val_summarys.append(val_summary) - if writer is not None: - writer.add_summary(val_metric_summary, np_global_step) - - # Return the additional val_ops - total_loss = (val_additional_ops, val_summarys) - should_stop = None - - return total_loss, should_stop - -def train_step_custom(sess, train_op, global_step, train_step_kwargs, - mode='train'): - m = train_step_kwargs['m'] - params = train_step_kwargs['params'] - rng = train_step_kwargs['rng'] - writer = train_step_kwargs['writer'] - iters = train_step_kwargs['iters'] - gen_rng = train_step_kwargs['gen_rng'] - logdir = train_step_kwargs['logdir'] - gen_data = train_step_kwargs['gen_data'] - pre_data = train_step_kwargs['pre_data'] - train_display_interval = train_step_kwargs['train_display_interval'] - - val_additional_ops = [] - # Print all variables here. - if False: - v = tf.get_collection(tf.GraphKeys.VARIABLES) - for _ in v: - val = sess.run(_.value()) - logging.info('variable: %30s, is_any_nan: %5s, norm: %f.', _.name, - np.any(np.isnan(val)), np.linalg.norm(val)) - - for i in range(iters): - rngs = gen_rng(params, rng) - input_data = gen_data(params, *rngs) - input_data = pre_data(params, input_data) - feed_dict = prepare_feed_dict(m.input_tensors, input_data) - - if mode == 'train': - n_step = sess.run(global_step) - - if np.mod(n_step, train_display_interval) == 0: - total_loss, np_global_step, summary, print_summary = sess.run( - [train_op, global_step, m.summary_op[mode], m.print_summary_op[mode]], - feed_dict=feed_dict) - else: - total_loss, np_global_step, summary = sess.run( - [train_op, global_step, m.summary_op[mode]], - feed_dict=feed_dict) - - if writer is not None: - writer.add_summary(summary, np_global_step) - - should_stop = sess.run(m.should_stop_op) - - if mode == 'val': - val = sess.run(m.agg_update_op[mode] + m.additional_return_op[mode], - feed_dict=feed_dict) - val_additional_ops.append(val[len(m.agg_update_op[mode]):]) - - if mode == 'val': - summary, print_summary, np_global_step = sess.run( - [m.summary_op[mode], m.print_summary_op[mode], global_step]) - if writer is not None: - writer.add_summary(summary, np_global_step) - sess.run([m.agg_reset_op[mode]]) - - # write custom validation ops - if m.eval_metrics_fn[mode] is not None: - val_metric_summary = m.eval_metrics_fn[mode](val_additional_ops, - np_global_step, logdir) - if writer is not None: - writer.add_summary(val_metric_summary, np_global_step) - - total_loss = val_additional_ops - should_stop = None - - return total_loss, should_stop - -def setup_training(loss_op, initial_learning_rate, steps_per_decay, - learning_rate_decay, momentum, max_steps, - sync=False, adjust_lr_sync=True, - num_workers=1, replica_id=0, vars_to_optimize=None, - clip_gradient_norm=0, typ=None, momentum2=0.999, - adam_eps=1e-8): - if sync and adjust_lr_sync: - initial_learning_rate = initial_learning_rate * num_workers - max_steps = np.int(max_steps / num_workers) - steps_per_decay = np.int(steps_per_decay / num_workers) - - global_step_op = slim.get_or_create_global_step() - lr_op = tf.train.exponential_decay(initial_learning_rate, - global_step_op, steps_per_decay, learning_rate_decay, staircase=True) - if typ == 'sgd': - optimizer = tf.train.MomentumOptimizer(lr_op, momentum) - elif typ == 'adam': - optimizer = tf.train.AdamOptimizer(learning_rate=lr_op, beta1=momentum, - beta2=momentum2, epsilon=adam_eps) - - if sync: - - sync_optimizer = tf.train.SyncReplicasOptimizer(optimizer, - replicas_to_aggregate=num_workers, - replica_id=replica_id, - total_num_replicas=num_workers) - train_op = slim.learning.create_train_op(loss_op, sync_optimizer, - variables_to_train=vars_to_optimize, - clip_gradient_norm=clip_gradient_norm) - else: - sync_optimizer = None - train_op = slim.learning.create_train_op(loss_op, optimizer, - variables_to_train=vars_to_optimize, - clip_gradient_norm=clip_gradient_norm) - should_stop_op = tf.greater_equal(global_step_op, max_steps) - return lr_op, global_step_op, train_op, should_stop_op, optimizer, sync_optimizer - -def add_value_to_summary(metric_summary, tag, val, log=True, tag_str=None): - """Adds a scalar summary to the summary object. Optionally also logs to - logging.""" - new_value = metric_summary.value.add(); - new_value.tag = tag - new_value.simple_value = val - if log: - if tag_str is None: - tag_str = tag + '%f' - logging.info(tag_str, val) - -def add_scalar_summary_op(tensor, name=None, - summary_key='summaries', print_summary_key='print_summaries', prefix=''): - collections = [] - op = tf.summary.scalar(name, tensor, collections=collections) - if summary_key != print_summary_key: - tf.add_to_collection(summary_key, op) - - op = tf.Print(op, [tensor], ' {:-<25s}: '.format(name) + prefix) - tf.add_to_collection(print_summary_key, op) - return op - -def setup_inputs(inputs): - input_tensors = {} - input_shapes = {} - for (name, typ, sz) in inputs: - _ = tf.placeholder(typ, shape=sz, name=name) - input_tensors[name] = _ - input_shapes[name] = sz - return input_tensors, input_shapes - -def prepare_feed_dict(input_tensors, inputs): - feed_dict = {} - for n in input_tensors.keys(): - feed_dict[input_tensors[n]] = inputs[n].astype(input_tensors[n].dtype.as_numpy_dtype) - return feed_dict - -def simple_add_summaries(summarize_ops, summarize_names, - summary_key='summaries', - print_summary_key='print_summaries', prefix=''): - for op, name, in zip(summarize_ops, summarize_names): - add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix) - - summary_op = tf.summary.merge_all(summary_key) - print_summary_op = tf.summary.merge_all(print_summary_key) - return summary_op, print_summary_op - -def add_summary_ops(m, summarize_ops, summarize_names, to_aggregate=None, - summary_key='summaries', - print_summary_key='print_summaries', prefix=''): - if type(to_aggregate) != list: - to_aggregate = [to_aggregate for _ in summarize_ops] - - # set up aggregating metrics - if np.any(to_aggregate): - agg_ops = [] - for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate): - if to_agg: - # agg_ops.append(slim.metrics.streaming_mean(op, return_reset_op=True)) - agg_ops.append(tf.contrib.metrics.streaming_mean(op)) - # agg_ops.append(tf.contrib.metrics.streaming_mean(op, return_reset_op=True)) - else: - agg_ops.append([None, None, None]) - - # agg_values_op, agg_update_op, agg_reset_op = zip(*agg_ops) - # agg_update_op = [x for x in agg_update_op if x is not None] - # agg_reset_op = [x for x in agg_reset_op if x is not None] - agg_values_op, agg_update_op = zip(*agg_ops) - agg_update_op = [x for x in agg_update_op if x is not None] - agg_reset_op = [tf.no_op()] - else: - agg_values_op = [None for _ in to_aggregate] - agg_update_op = [tf.no_op()] - agg_reset_op = [tf.no_op()] - - for op, name, to_agg, agg_op in zip(summarize_ops, summarize_names, to_aggregate, agg_values_op): - if to_agg: - add_scalar_summary_op(agg_op, name, summary_key, print_summary_key, prefix) - else: - add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix) - - summary_op = tf.summary.merge_all(summary_key) - print_summary_op = tf.summary.merge_all(print_summary_key) - return summary_op, print_summary_op, agg_update_op, agg_reset_op - - - -def accum_val_ops(outputs, names, global_step, output_dir, metric_summary, N): - """Processes the collected outputs to compute AP for action prediction. - - Args: - outputs : List of scalar ops to summarize. - names : Name of the scalar ops. - global_step : global_step. - output_dir : where to store results. - metric_summary : summary object to add summaries to. - N : number of outputs to process. - """ - outs = [] - if N >= 0: - outputs = outputs[:N] - for i in range(len(outputs[0])): - scalar = np.array(map(lambda x: x[i], outputs)) - assert(scalar.ndim == 1) - add_value_to_summary(metric_summary, names[i], np.mean(scalar), - tag_str='{:>27s}: [{:s}]: %f'.format(names[i], '')) - outs.append(np.mean(scalar)) - return outs - -def get_default_summary_ops(): - return utils.Foo(summary_ops=None, print_summary_ops=None, - additional_return_ops=[], arop_summary_iters=[], - arop_eval_fns=[]) - - -def simple_summaries(summarize_ops, summarize_names, mode, to_aggregate=False, - scope_name='summary'): - - if type(to_aggregate) != list: - to_aggregate = [to_aggregate for _ in summarize_ops] - - summary_key = '{:s}_summaries'.format(mode) - print_summary_key = '{:s}_print_summaries'.format(mode) - prefix=' [{:s}]: '.format(mode) - - # Default ops for things that dont need to be aggregated. - if not np.all(to_aggregate): - for op, name, to_agg in zip(summarize_ops, summarize_names, to_aggregate): - if not to_agg: - add_scalar_summary_op(op, name, summary_key, print_summary_key, prefix) - summary_ops = tf.summary.merge_all(summary_key) - print_summary_ops = tf.summary.merge_all(print_summary_key) - else: - summary_ops = tf.no_op() - print_summary_ops = tf.no_op() - - # Default ops for things that dont need to be aggregated. - if np.any(to_aggregate): - additional_return_ops = [[summarize_ops[i] - for i, x in enumerate(to_aggregate )if x]] - arop_summary_iters = [-1] - s_names = ['{:s}/{:s}'.format(scope_name, summarize_names[i]) - for i, x in enumerate(to_aggregate) if x] - fn = lambda outputs, global_step, output_dir, metric_summary, N: \ - accum_val_ops(outputs, s_names, global_step, output_dir, metric_summary, - N) - arop_eval_fns = [fn] - else: - additional_return_ops = [] - arop_summary_iters = [] - arop_eval_fns = [] - return summary_ops, print_summary_ops, additional_return_ops, \ - arop_summary_iters, arop_eval_fns diff --git a/research/cognitive_mapping_and_planning/tfcode/vision_baseline_lstm.py b/research/cognitive_mapping_and_planning/tfcode/vision_baseline_lstm.py deleted file mode 100644 index ccf3ab23b06b71ed2a6d300b9a7d2a67a396c52e..0000000000000000000000000000000000000000 --- a/research/cognitive_mapping_and_planning/tfcode/vision_baseline_lstm.py +++ /dev/null @@ -1,533 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np - - -import tensorflow as tf - -from tensorflow.contrib import slim - -import logging -from tensorflow.python.platform import app -from tensorflow.python.platform import flags -from src import utils -import src.file_utils as fu -import tfcode.nav_utils as nu -from tfcode import tf_utils - -setup_train_step_kwargs = nu.default_train_step_kwargs -compute_losses_multi_or = nu.compute_losses_multi_or -get_repr_from_image = nu.get_repr_from_image - -_save_d_at_t = nu.save_d_at_t -_save_all = nu.save_all -_eval_ap = nu.eval_ap -_eval_dist = nu.eval_dist -_plot_trajectories = nu.plot_trajectories - -def lstm_online(cell_fn, num_steps, inputs, state, varscope): - # inputs is B x num_steps x C, C channels. - # state is 2 tuple with B x 1 x C1, B x 1 x C2 - # Output state is always B x 1 x C - inputs = tf.unstack(inputs, axis=1, num=num_steps) - state = tf.unstack(state, axis=1, num=1)[0] - outputs = [] - - if num_steps > 1: - varscope.reuse_variables() - - for s in range(num_steps): - output, state = cell_fn(inputs[s], state) - outputs.append(output) - outputs = tf.stack(outputs, axis=1) - state = tf.stack([state], axis=1) - return outputs, state - -def _inputs(problem, lstm_states, lstm_state_dims): - # Set up inputs. - with tf.name_scope('inputs'): - n_views = problem.n_views - - inputs = [] - inputs.append(('orig_maps', tf.float32, - (problem.batch_size, 1, None, None, 1))) - inputs.append(('goal_loc', tf.float32, - (problem.batch_size, problem.num_goals, 2))) - - # For initing LSTM. - inputs.append(('rel_goal_loc_at_start', tf.float32, - (problem.batch_size, problem.num_goals, - problem.rel_goal_loc_dim))) - common_input_data, _ = tf_utils.setup_inputs(inputs) - - inputs = [] - inputs.append(('imgs', tf.float32, (problem.batch_size, None, n_views, - problem.img_height, problem.img_width, - problem.img_channels))) - # Goal location as a tuple of delta location and delta theta. - inputs.append(('rel_goal_loc', tf.float32, (problem.batch_size, None, - problem.rel_goal_loc_dim))) - if problem.outputs.visit_count: - inputs.append(('visit_count', tf.int32, (problem.batch_size, None, 1))) - inputs.append(('last_visit', tf.int32, (problem.batch_size, None, 1))) - - for i, (state, dim) in enumerate(zip(lstm_states, lstm_state_dims)): - inputs.append((state, tf.float32, (problem.batch_size, 1, dim))) - - if problem.outputs.egomotion: - inputs.append(('incremental_locs', tf.float32, - (problem.batch_size, None, 2))) - inputs.append(('incremental_thetas', tf.float32, - (problem.batch_size, None, 1))) - - inputs.append(('step_number', tf.int32, (1, None, 1))) - inputs.append(('node_ids', tf.int32, (problem.batch_size, None, - problem.node_ids_dim))) - inputs.append(('perturbs', tf.float32, (problem.batch_size, None, - problem.perturbs_dim))) - - # For plotting result plots - inputs.append(('loc_on_map', tf.float32, (problem.batch_size, None, 2))) - inputs.append(('gt_dist_to_goal', tf.float32, (problem.batch_size, None, 1))) - step_input_data, _ = tf_utils.setup_inputs(inputs) - - inputs = [] - inputs.append(('executed_actions', tf.int32, (problem.batch_size, None))) - inputs.append(('rewards', tf.float32, (problem.batch_size, None))) - inputs.append(('action_sample_wts', tf.float32, (problem.batch_size, None))) - inputs.append(('action', tf.int32, (problem.batch_size, None, - problem.num_actions))) - train_data, _ = tf_utils.setup_inputs(inputs) - train_data.update(step_input_data) - train_data.update(common_input_data) - return common_input_data, step_input_data, train_data - - -def _add_summaries(m, summary_mode, arop_full_summary_iters): - summarize_ops = [m.lr_op, m.global_step_op, m.sample_gt_prob_op, - m.total_loss_op, m.data_loss_op, m.reg_loss_op] + m.acc_ops - summarize_names = ['lr', 'global_step', 'sample_gt_prob_op', 'total_loss', - 'data_loss', 'reg_loss'] + \ - ['acc_{:d}'.format(i) for i in range(len(m.acc_ops))] - to_aggregate = [0, 0, 0, 1, 1, 1] + [1]*len(m.acc_ops) - - scope_name = 'summary' - with tf.name_scope(scope_name): - s_ops = nu.add_default_summaries(summary_mode, arop_full_summary_iters, - summarize_ops, summarize_names, - to_aggregate, m.action_prob_op, - m.input_tensors, scope_name=scope_name) - m.summary_ops = {summary_mode: s_ops} - -def visit_count_fc(visit_count, last_visit, embed_neurons, wt_decay, fc_dropout): - with tf.variable_scope('embed_visit_count'): - visit_count = tf.reshape(visit_count, shape=[-1]) - last_visit = tf.reshape(last_visit, shape=[-1]) - - visit_count = tf.clip_by_value(visit_count, clip_value_min=-1, - clip_value_max=15) - last_visit = tf.clip_by_value(last_visit, clip_value_min=-1, - clip_value_max=15) - visit_count = tf.one_hot(visit_count, depth=16, axis=1, dtype=tf.float32, - on_value=10., off_value=0.) - last_visit = tf.one_hot(last_visit, depth=16, axis=1, dtype=tf.float32, - on_value=10., off_value=0.) - f = tf.concat([visit_count, last_visit], 1) - x, _ = tf_utils.fc_network( - f, neurons=embed_neurons, wt_decay=wt_decay, name='visit_count_embed', - offset=0, batch_norm_param=None, dropout_ratio=fc_dropout, - is_training=is_training) - return x - -def lstm_setup(name, x, batch_size, is_single_step, lstm_dim, lstm_out, - num_steps, state_input_op): - # returns state_name, state_init_op, updated_state_op, out_op - with tf.name_scope('reshape_'+name): - sh = x.get_shape().as_list() - x = tf.reshape(x, shape=[batch_size, -1, sh[-1]]) - - with tf.variable_scope(name) as varscope: - cell = tf.contrib.rnn.LSTMCell( - num_units=lstm_dim, forget_bias=1.0, state_is_tuple=False, - num_proj=lstm_out, use_peepholes=True, - initializer=tf.random_uniform_initializer(-0.01, 0.01, seed=0), - cell_clip=None, proj_clip=None) - - sh = [batch_size, 1, lstm_dim+lstm_out] - state_init_op = tf.constant(0., dtype=tf.float32, shape=sh) - - fn = lambda ns: lstm_online(cell, ns, x, state_input_op, varscope) - out_op, updated_state_op = tf.cond(is_single_step, lambda: fn(1), lambda: - fn(num_steps)) - - return name, state_init_op, updated_state_op, out_op - -def combine_setup(name, combine_type, embed_img, embed_goal, num_img_neuorons=None, - num_goal_neurons=None): - with tf.name_scope(name + '_' + combine_type): - if combine_type == 'add': - # Simple concat features from goal and image - out = embed_img + embed_goal - - elif combine_type == 'multiply': - # Multiply things together - re_embed_img = tf.reshape( - embed_img, shape=[-1, num_img_neuorons / num_goal_neurons, - num_goal_neurons]) - re_embed_goal = tf.reshape(embed_goal, shape=[-1, num_goal_neurons, 1]) - x = tf.matmul(re_embed_img, re_embed_goal, transpose_a=False, transpose_b=False) - out = slim.flatten(x) - elif combine_type == 'none' or combine_type == 'imgonly': - out = embed_img - elif combine_type == 'goalonly': - out = embed_goal - else: - logging.fatal('Undefined combine_type: %s', combine_type) - return out - - -def preprocess_egomotion(locs, thetas): - with tf.name_scope('pre_ego'): - pre_ego = tf.concat([locs, tf.sin(thetas), tf.cos(thetas)], 2) - sh = pre_ego.get_shape().as_list() - pre_ego = tf.reshape(pre_ego, [-1, sh[-1]]) - return pre_ego - -def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode): - # Set up the model. - tf.set_random_seed(args.solver.seed) - task_params = args.navtask.task_params - num_steps = task_params.num_steps - num_goals = task_params.num_goals - num_actions = task_params.num_actions - num_actions_ = num_actions - - n_views = task_params.n_views - - batch_norm_is_training_op = \ - tf.placeholder_with_default(batch_norm_is_training, shape=[], - name='batch_norm_is_training_op') - # Setup the inputs - m.input_tensors = {} - lstm_states = []; lstm_state_dims = []; - state_names = []; updated_state_ops = []; init_state_ops = []; - if args.arch.lstm_output: - lstm_states += ['lstm_output'] - lstm_state_dims += [args.arch.lstm_output_dim+task_params.num_actions] - if args.arch.lstm_ego: - lstm_states += ['lstm_ego'] - lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out] - lstm_states += ['lstm_img'] - lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] - elif args.arch.lstm_img: - # An LSTM only on the image - lstm_states += ['lstm_img'] - lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out] - else: - # No LSTMs involved here. - None - - m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \ - _inputs(task_params, lstm_states, lstm_state_dims) - - with tf.name_scope('check_size'): - is_single_step = tf.equal(tf.unstack(tf.shape(m.input_tensors['step']['imgs']), - num=6)[1], 1) - - images_reshaped = tf.reshape(m.input_tensors['step']['imgs'], - shape=[-1, task_params.img_height, task_params.img_width, - task_params.img_channels], name='re_image') - - rel_goal_loc_reshaped = tf.reshape(m.input_tensors['step']['rel_goal_loc'], - shape=[-1, task_params.rel_goal_loc_dim], name='re_rel_goal_loc') - - x, vars_ = get_repr_from_image( - images_reshaped, task_params.modalities, task_params.data_augment, - args.arch.encoder, args.solver.freeze_conv, args.solver.wt_decay, - is_training) - - # Reshape into nice things so that these can be accumulated over time steps - # for faster backprop. - sh_before = x.get_shape().as_list() - m.encoder_output = tf.reshape( - x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:]) - x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:]) - - # Add a layer to reduce dimensions for a fc layer. - if args.arch.dim_reduce_neurons > 0: - ks = 1; neurons = args.arch.dim_reduce_neurons; - init_var = np.sqrt(2.0/(ks**2)/neurons) - batch_norm_param = args.arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - m.conv_feat = slim.conv2d( - x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce', - weights_regularizer=slim.l2_regularizer(args.solver.wt_decay), - weights_initializer=tf.random_normal_initializer(stddev=init_var)) - reshape_conv_feat = slim.flatten(m.conv_feat) - sh = reshape_conv_feat.get_shape().as_list() - m.reshape_conv_feat = tf.reshape(reshape_conv_feat, - shape=[-1, sh[1]*n_views]) - - # Restore these from a checkpoint. - if args.solver.pretrained_path is not None: - m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path, - vars_) - else: - m.init_fn = None - - # Hit the goal_location with a bunch of fully connected layers, to embed it - # into some space. - with tf.variable_scope('embed_goal'): - batch_norm_param = args.arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - m.embed_goal, _ = tf_utils.fc_network( - rel_goal_loc_reshaped, neurons=args.arch.goal_embed_neurons, - wt_decay=args.solver.wt_decay, name='goal_embed', offset=0, - batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, - is_training=is_training) - - if args.arch.embed_goal_for_state: - with tf.variable_scope('embed_goal_for_state'): - batch_norm_param = args.arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - m.embed_goal_for_state, _ = tf_utils.fc_network( - m.input_tensors['common']['rel_goal_loc_at_start'][:,0,:], - neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay, - name='goal_embed', offset=0, batch_norm_param=batch_norm_param, - dropout_ratio=args.arch.fc_dropout, is_training=is_training) - - # Hit the goal_location with a bunch of fully connected layers, to embed it - # into some space. - with tf.variable_scope('embed_img'): - batch_norm_param = args.arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - m.embed_img, _ = tf_utils.fc_network( - m.reshape_conv_feat, neurons=args.arch.img_embed_neurons, - wt_decay=args.solver.wt_decay, name='img_embed', offset=0, - batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, - is_training=is_training) - - # For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an - # LSTM, combine with image features and accumulate those in an LSTM. Finally - # combine what you get from the image LSTM with the goal to output an action. - if args.arch.lstm_ego: - ego_reshaped = preprocess_egomotion(m.input_tensors['step']['incremental_locs'], - m.input_tensors['step']['incremental_thetas']) - with tf.variable_scope('embed_ego'): - batch_norm_param = args.arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - m.embed_ego, _ = tf_utils.fc_network( - ego_reshaped, neurons=args.arch.ego_embed_neurons, - wt_decay=args.solver.wt_decay, name='ego_embed', offset=0, - batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout, - is_training=is_training) - - state_name, state_init_op, updated_state_op, out_op = lstm_setup( - 'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step, - args.arch.lstm_ego_dim, args.arch.lstm_ego_out, num_steps*num_goals, - m.input_tensors['step']['lstm_ego']) - state_names += [state_name] - init_state_ops += [state_init_op] - updated_state_ops += [updated_state_op] - - # Combine the output with the vision features. - m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego, - m.embed_img, out_op, - args.arch.img_embed_neurons[-1], - args.arch.lstm_ego_out) - - # LSTM on these vision features. - state_name, state_init_op, updated_state_op, out_op = lstm_setup( - 'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step, - args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals, - m.input_tensors['step']['lstm_img']) - state_names += [state_name] - init_state_ops += [state_init_op] - updated_state_ops += [updated_state_op] - - m.img_for_goal = out_op - num_img_for_goal_neurons = args.arch.lstm_img_out - - elif args.arch.lstm_img: - # LSTM on just the image features. - state_name, state_init_op, updated_state_op, out_op = lstm_setup( - 'lstm_img', m.embed_img, task_params.batch_size, is_single_step, - args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals, - m.input_tensors['step']['lstm_img']) - state_names += [state_name] - init_state_ops += [state_init_op] - updated_state_ops += [updated_state_op] - m.img_for_goal = out_op - num_img_for_goal_neurons = args.arch.lstm_img_out - - else: - m.img_for_goal = m.embed_img - num_img_for_goal_neurons = args.arch.img_embed_neurons[-1] - - - if args.arch.use_visit_count: - m.embed_visit_count = visit_count_fc( - m.input_tensors['step']['visit_count'], - m.input_tensors['step']['last_visit'], args.arch.goal_embed_neurons, - args.solver.wt_decay, args.arch.fc_dropout, is_training=is_training) - m.embed_goal = m.embed_goal + m.embed_visit_count - - m.combined_f = combine_setup('img_goal', args.arch.combine_type, - m.img_for_goal, m.embed_goal, - num_img_for_goal_neurons, - args.arch.goal_embed_neurons[-1]) - - # LSTM on the combined representation. - if args.arch.lstm_output: - name = 'lstm_output' - # A few fully connected layers here. - with tf.variable_scope('action_pred'): - batch_norm_param = args.arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - x, _ = tf_utils.fc_network( - m.combined_f, neurons=args.arch.pred_neurons, - wt_decay=args.solver.wt_decay, name='pred', offset=0, - batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout) - - if args.arch.lstm_output_init_state_from_goal: - # Use the goal embedding to initialize the LSTM state. - # UGLY CLUGGY HACK: if this is doing computation for a single time step - # then this will not involve back prop, so we can use the state input from - # the feed dict, otherwise we compute the state representation from the - # goal and feed that in. Necessary for using goal location to generate the - # state representation. - m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state, dim=1) - state_op = tf.cond(is_single_step, lambda: m.input_tensors['step'][name], - lambda: m.embed_goal_for_state) - state_name, state_init_op, updated_state_op, out_op = lstm_setup( - name, x, task_params.batch_size, is_single_step, - args.arch.lstm_output_dim, - num_actions_, - num_steps*num_goals, state_op) - init_state_ops += [m.embed_goal_for_state] - else: - state_op = m.input_tensors['step'][name] - state_name, state_init_op, updated_state_op, out_op = lstm_setup( - name, x, task_params.batch_size, is_single_step, - args.arch.lstm_output_dim, - num_actions_, num_steps*num_goals, state_op) - init_state_ops += [state_init_op] - - state_names += [state_name] - updated_state_ops += [updated_state_op] - - out_op = tf.reshape(out_op, shape=[-1, num_actions_]) - if num_actions_ > num_actions: - m.action_logits_op = out_op[:,:num_actions] - m.baseline_op = out_op[:,num_actions:] - else: - m.action_logits_op = out_op - m.baseline_op = None - m.action_prob_op = tf.nn.softmax(m.action_logits_op) - - else: - # A few fully connected layers here. - with tf.variable_scope('action_pred'): - batch_norm_param = args.arch.batch_norm_param - batch_norm_param['is_training'] = batch_norm_is_training_op - out_op, _ = tf_utils.fc_network( - m.combined_f, neurons=args.arch.pred_neurons, - wt_decay=args.solver.wt_decay, name='pred', offset=0, - num_pred=num_actions_, - batch_norm_param=batch_norm_param, - dropout_ratio=args.arch.fc_dropout, is_training=is_training) - if num_actions_ > num_actions: - m.action_logits_op = out_op[:,:num_actions] - m.baseline_op = out_op[:,num_actions:] - else: - m.action_logits_op = out_op - m.baseline_op = None - m.action_prob_op = tf.nn.softmax(m.action_logits_op) - - m.train_ops = {} - m.train_ops['step'] = m.action_prob_op - m.train_ops['common'] = [m.input_tensors['common']['orig_maps'], - m.input_tensors['common']['goal_loc'], - m.input_tensors['common']['rel_goal_loc_at_start']] - m.train_ops['state_names'] = state_names - m.train_ops['init_state'] = init_state_ops - m.train_ops['updated_state'] = updated_state_ops - m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op - - # Flat list of ops which cache the step data. - m.train_ops['step_data_cache'] = [tf.no_op()] - - if args.solver.freeze_conv: - m.train_ops['step_data_cache'] = [m.encoder_output] - else: - m.train_ops['step_data_cache'] = [] - - ewma_decay = 0.99 if is_training else 0.0 - weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32, - name='weight') - - m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \ - compute_losses_multi_or( - m.action_logits_op, m.input_tensors['train']['action'], - weights=weight, num_actions=num_actions, - data_loss_wt=args.solver.data_loss_wt, - reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay) - - - if args.solver.freeze_conv: - vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_)) - else: - vars_to_optimize = None - - m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \ - m.sync_optimizer = tf_utils.setup_training( - m.total_loss_op, - args.solver.initial_learning_rate, - args.solver.steps_per_decay, - args.solver.learning_rate_decay, - args.solver.momentum, - args.solver.max_steps, - args.solver.sync, - args.solver.adjust_lr_sync, - args.solver.num_workers, - args.solver.task, - vars_to_optimize=vars_to_optimize, - clip_gradient_norm=args.solver.clip_gradient_norm, - typ=args.solver.typ, momentum2=args.solver.momentum2, - adam_eps=args.solver.adam_eps) - - - if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay': - m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k, - m.global_step_op) - elif args.arch.sample_gt_prob_type == 'zero': - m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32) - elif args.arch.sample_gt_prob_type.split('_')[0] == 'step': - step = int(args.arch.sample_gt_prob_type.split('_')[1]) - m.sample_gt_prob_op = tf_utils.step_gt_prob( - step, m.input_tensors['step']['step_number'][0,0,0]) - - m.sample_action_type = args.arch.action_sample_type - m.sample_action_combine_type = args.arch.action_sample_combine_type - _add_summaries(m, summary_mode, args.summary.arop_full_summary_iters) - - m.init_op = tf.group(tf.global_variables_initializer(), - tf.local_variables_initializer()) - m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4, - write_version=tf.train.SaverDef.V2) - - return m diff --git a/research/compression/README.md b/research/compression/README.md deleted file mode 100644 index 7f431b5eac6805fbecc276783cef2bc6c62068e5..0000000000000000000000000000000000000000 --- a/research/compression/README.md +++ /dev/null @@ -1,19 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Compression with Neural Networks - -This is a [TensorFlow](http://www.tensorflow.org/) model repo containing -research on compression with neural networks. This repo currently contains -code for the following papers: - -[Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148) - -## Organization -[Image Encoder](image_encoder/): Encoding and decoding images into their binary representation. - -[Entropy Coder](entropy_coder/): Lossless compression of the binary representation. - -## Contact Info -Model repository maintained by Nick Johnston ([nmjohn](https://github.com/nmjohn)). diff --git a/research/compression/entropy_coder/README.md b/research/compression/entropy_coder/README.md deleted file mode 100644 index 59e889990aab71e12ed13122c9b5a796a048402a..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/README.md +++ /dev/null @@ -1,109 +0,0 @@ -# Neural net based entropy coding - -This is a [TensorFlow](http://www.tensorflow.org/) model for additional -lossless compression of bitstreams generated by neural net based image -encoders as described in -[https://arxiv.org/abs/1703.10114](https://arxiv.org/abs/1703.10114). - -To be more specific, the entropy coder aims at compressing further binary -codes which have a 3D tensor structure with: - -* the first two dimensions of the tensors corresponding to the height and -the width of the binary codes, -* the last dimension being the depth of the codes. The last dimension can be -sliced into N groups of K, where each additional group is used by the image -decoder to add more details to the reconstructed image. - -The code in this directory only contains the underlying code probability model -but does not perform the actual compression using arithmetic coding. -The code probability model is enough to compute the theoretical compression -ratio. - - -## Prerequisites -The only software requirements for running the encoder and decoder is having -Tensorflow installed. - -You will also need to add the top level source directory of the entropy coder -to your `PYTHONPATH`, for example: - -`export PYTHONPATH=${PYTHONPATH}:/tmp/models/compression` - - -## Training the entropy coder - -### Synthetic dataset -If you do not have a training dataset, there is a simple code generative model -that you can use to generate a dataset and play with the entropy coder. -The generative model is located under dataset/gen\_synthetic\_dataset.py. Note -that this simple generative model is not going to give good results on real -images as it is not supposed to be close to the statistics of the binary -representation of encoded images. Consider it as a toy dataset, no more, no -less. - -To generate a synthetic dataset with 20000 samples: - -`mkdir -p /tmp/dataset` - -`python ./dataset/gen_synthetic_dataset.py --dataset_dir=/tmp/dataset/ ---count=20000` - -Note that the generator has not been optimized at all, generating the synthetic -dataset is currently pretty slow. - -### Training - -If you just want to play with the entropy coder trainer, here is the command -line that can be used to train the entropy coder on the synthetic dataset: - -`mkdir -p /tmp/entropy_coder_train` - -`python ./core/entropy_coder_train.py --task=0 ---train_dir=/tmp/entropy_coder_train/ ---model=progressive ---model_config=./configs/synthetic/model_config.json ---train_config=./configs/synthetic/train_config.json ---input_config=./configs/synthetic/input_config.json -` - -Training is configured using 3 files formatted using JSON: - -* One file is used to configure the underlying entropy coder model. - Currently, only the *progressive* model is supported. - This model takes 2 mandatory parameters and an optional one: - * `layer_depth`: the number of bits per layer (a.k.a. iteration). - Background: the image decoder takes each layer to add more detail - to the image. - * `layer_count`: the maximum number of layers that should be supported - by the model. This should be equal or greater than the maximum number - of layers in the input binary codes. - * `coded_layer_count`: This can be used to consider only partial codes, - keeping only the first `coded_layer_count` layers and ignoring the - remaining layers. If left empty, the binary codes are left unchanged. -* One file to configure the training, including the learning rate, ... - The meaning of the parameters are pretty straightforward. Note that this - file is only used during training and is not needed during inference. -* One file to specify the input dataset to use during training. - The dataset is formatted using tf.RecordIO. - - -## Inference: file size after entropy coding. - -### Using a synthetic sample - -Here is the command line to generate a single synthetic sample formatted -in the same way as what is provided by the image encoder: - -`python ./dataset/gen_synthetic_single.py ---sample_filename=/tmp/dataset/sample_0000.npz` - -To actually compute the additional compression ratio using the entropy coder -trained in the previous step: - -`python ./core/entropy_coder_single.py ---model=progressive ---model_config=./configs/synthetic/model_config.json ---input_codes=/tmp/dataset/sample_0000.npz ---checkpoint=/tmp/entropy_coder_train/model.ckpt-209078` - -where the checkpoint number should be adjusted accordingly. diff --git a/research/compression/entropy_coder/__init__.py b/research/compression/entropy_coder/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/compression/entropy_coder/all_models/__init__.py b/research/compression/entropy_coder/all_models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/compression/entropy_coder/all_models/all_models.py b/research/compression/entropy_coder/all_models/all_models.py deleted file mode 100644 index e376dac737667a348065eec622920b0a81ed1ac9..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/all_models/all_models.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Import and register all the entropy coder models.""" - -# pylint: disable=unused-import -from entropy_coder.progressive import progressive diff --git a/research/compression/entropy_coder/all_models/all_models_test.py b/research/compression/entropy_coder/all_models/all_models_test.py deleted file mode 100644 index b8aff504a0a00d579d1b2768164b78b6c095b235..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/all_models/all_models_test.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Basic test of all registered models.""" - -import tensorflow as tf - -# pylint: disable=unused-import -import all_models -# pylint: enable=unused-import -from entropy_coder.model import model_factory - - -class AllModelsTest(tf.test.TestCase): - - def testBuildModelForTraining(self): - factory = model_factory.GetModelRegistry() - model_names = factory.GetAvailableModels() - - for m in model_names: - tf.reset_default_graph() - - global_step = tf.Variable(tf.zeros([], dtype=tf.int64), - trainable=False, - name='global_step') - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) - - batch_size = 3 - height = 40 - width = 20 - depth = 5 - binary_codes = tf.placeholder(dtype=tf.float32, - shape=[batch_size, height, width, depth]) - - # Create a model with the default configuration. - print('Creating model: {}'.format(m)) - model = factory.CreateModel(m) - model.Initialize(global_step, - optimizer, - model.GetConfigStringForUnitTest()) - self.assertTrue(model.loss is None, 'model: {}'.format(m)) - self.assertTrue(model.train_op is None, 'model: {}'.format(m)) - self.assertTrue(model.average_code_length is None, 'model: {}'.format(m)) - - # Build the Tensorflow graph corresponding to the model. - model.BuildGraph(binary_codes) - self.assertTrue(model.loss is not None, 'model: {}'.format(m)) - self.assertTrue(model.average_code_length is not None, - 'model: {}'.format(m)) - if model.train_op is None: - print('Model {} is not trainable'.format(m)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/compression/entropy_coder/configs/gru_prime3/model_config.json b/research/compression/entropy_coder/configs/gru_prime3/model_config.json deleted file mode 100644 index cf63a4c454df5c47c732c5eaeea481b2aa714665..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/configs/gru_prime3/model_config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "layer_count": 16, - "layer_depth": 32 -} diff --git a/research/compression/entropy_coder/configs/synthetic/input_config.json b/research/compression/entropy_coder/configs/synthetic/input_config.json deleted file mode 100644 index 18455e65120cd45cb04106ed8b6b2d6641e1d49a..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/configs/synthetic/input_config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "data": "/tmp/dataset/synthetic_dataset", - "unique_code_size": true -} diff --git a/research/compression/entropy_coder/configs/synthetic/model_config.json b/research/compression/entropy_coder/configs/synthetic/model_config.json deleted file mode 100644 index c6f1f3e11547a75c05019e24c59a7fc6d2a29e3b..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/configs/synthetic/model_config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "layer_depth": 2, - "layer_count": 8 -} diff --git a/research/compression/entropy_coder/configs/synthetic/train_config.json b/research/compression/entropy_coder/configs/synthetic/train_config.json deleted file mode 100644 index 79e4909fd3f93df983d79890e25b7b61ba14aa40..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/configs/synthetic/train_config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "batch_size": 4, - "learning_rate": 0.1, - "decay_rate": 0.9, - "samples_per_decay": 20000 -} diff --git a/research/compression/entropy_coder/core/code_loader.py b/research/compression/entropy_coder/core/code_loader.py deleted file mode 100644 index 603ab724afb0e6c4e94db9c121d7799eaf30fa02..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/core/code_loader.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Load binary codes stored as tf.Example in a TFRecord table.""" - -import tensorflow as tf - - -def ReadFirstCode(dataset): - """Read the first example from a binary code RecordIO table.""" - for record in tf.python_io.tf_record_iterator(dataset): - tf_example = tf.train.Example() - tf_example.ParseFromString(record) - break - return tf_example - - -def LoadBinaryCode(input_config, batch_size): - """Load a batch of binary codes from a tf.Example dataset. - - Args: - input_config: An InputConfig proto containing the input configuration. - batch_size: Output batch size of examples. - - Returns: - A batched tensor of binary codes. - """ - data = input_config.data - - # TODO: Possibly use multiple files (instead of just one). - file_list = [data] - filename_queue = tf.train.string_input_producer(file_list, - capacity=4) - reader = tf.TFRecordReader() - _, values = reader.read(filename_queue) - - serialized_example = tf.reshape(values, shape=[1]) - serialized_features = { - 'code_shape': tf.FixedLenFeature([3], - dtype=tf.int64), - 'code': tf.VarLenFeature(tf.float32), - } - example = tf.parse_example(serialized_example, serialized_features) - - # 3D shape: height x width x binary_code_depth - z = example['code_shape'] - code_shape = tf.reshape(tf.cast(z, tf.int32), [3]) - # Un-flatten the binary codes. - code = tf.reshape(tf.sparse_tensor_to_dense(example['code']), code_shape) - - queue_size = 10 - queue = tf.PaddingFIFOQueue( - queue_size + 3 * batch_size, - dtypes=[code.dtype], - shapes=[[None, None, None]]) - enqueue_op = queue.enqueue([code]) - dequeue_code = queue.dequeue_many(batch_size) - queue_runner = tf.train.queue_runner.QueueRunner(queue, [enqueue_op]) - tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, queue_runner) - - return dequeue_code diff --git a/research/compression/entropy_coder/core/config_helper.py b/research/compression/entropy_coder/core/config_helper.py deleted file mode 100644 index a7d949e329b93f33d330d1ba494f71ae1704fa3f..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/core/config_helper.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Helper functions used in both train and inference.""" - -import json -import os.path - -import tensorflow as tf - - -def GetConfigString(config_file): - config_string = '' - if config_file is not None: - config_string = open(config_file).read() - return config_string - - -class InputConfig(object): - - def __init__(self, config_string): - config = json.loads(config_string) - self.data = config["data"] - self.unique_code_size = config["unique_code_size"] - - -class TrainConfig(object): - - def __init__(self, config_string): - config = json.loads(config_string) - self.batch_size = config["batch_size"] - self.learning_rate = config["learning_rate"] - self.decay_rate = config["decay_rate"] - self.samples_per_decay = config["samples_per_decay"] - - -def SaveConfig(directory, filename, config_string): - path = os.path.join(directory, filename) - with tf.gfile.Open(path, mode='w') as f: - f.write(config_string) diff --git a/research/compression/entropy_coder/core/entropy_coder_single.py b/research/compression/entropy_coder/core/entropy_coder_single.py deleted file mode 100644 index 8a61b488b6bdd11e1cff4a2da672129240eb7240..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/core/entropy_coder_single.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Compute the additional compression ratio after entropy coding.""" - -import io -import os - -import numpy as np -import tensorflow as tf - -import config_helper - -# pylint: disable=unused-import -from entropy_coder.all_models import all_models -# pylint: enable=unused-import -from entropy_coder.model import model_factory - - -# Checkpoint used to restore the model parameters. -tf.app.flags.DEFINE_string('checkpoint', None, - """Model checkpoint.""") - -# Model selection and configuration. -tf.app.flags.DEFINE_string('model', None, """Underlying encoder model.""") -tf.app.flags.DEFINE_string('model_config', None, - """Model config protobuf given as text file.""") - -# File holding the binary codes. -tf.flags.DEFINE_string('input_codes', None, 'Location of binary code file.') - -FLAGS = tf.flags.FLAGS - - -def main(_): - if (FLAGS.input_codes is None or FLAGS.model is None): - print ('\nUsage: python entropy_coder_single.py --model=progressive ' - '--model_config=model_config.json' - '--iteration=15\n\n') - return - - #if FLAGS.iteration < -1 or FLAGS.iteration > 15: - # print ('\n--iteration must be between 0 and 15 inclusive, or -1 to infer ' - # 'from file.\n') - # return - #iteration = FLAGS.iteration - - if not tf.gfile.Exists(FLAGS.input_codes): - print('\nInput codes not found.\n') - return - - with tf.gfile.FastGFile(FLAGS.input_codes, 'rb') as code_file: - contents = code_file.read() - loaded_codes = np.load(io.BytesIO(contents)) - assert ['codes', 'shape'] not in loaded_codes.files - loaded_shape = loaded_codes['shape'] - loaded_array = loaded_codes['codes'] - - # Unpack and recover code shapes. - unpacked_codes = np.reshape(np.unpackbits(loaded_array) - [:np.prod(loaded_shape)], - loaded_shape) - - numpy_int_codes = unpacked_codes.transpose([1, 2, 3, 0, 4]) - numpy_int_codes = numpy_int_codes.reshape([numpy_int_codes.shape[0], - numpy_int_codes.shape[1], - numpy_int_codes.shape[2], - -1]) - numpy_codes = numpy_int_codes.astype(np.float32) * 2.0 - 1.0 - - with tf.Graph().as_default() as graph: - # TF tensor to hold the binary codes to losslessly compress. - batch_size = 1 - codes = tf.placeholder(tf.float32, shape=numpy_codes.shape) - - # Create the entropy coder model. - global_step = None - optimizer = None - model = model_factory.GetModelRegistry().CreateModel(FLAGS.model) - model_config_string = config_helper.GetConfigString(FLAGS.model_config) - model.Initialize(global_step, optimizer, model_config_string) - model.BuildGraph(codes) - - saver = tf.train.Saver(sharded=True, keep_checkpoint_every_n_hours=12.0) - - with tf.Session(graph=graph) as sess: - # Initialize local variables. - sess.run(tf.local_variables_initializer()) - - # Restore model variables. - saver.restore(sess, FLAGS.checkpoint) - - tf_tensors = { - 'code_length': model.average_code_length - } - feed_dict = {codes: numpy_codes} - np_tensors = sess.run(tf_tensors, feed_dict=feed_dict) - - print('Additional compression ratio: {}'.format( - np_tensors['code_length'])) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/compression/entropy_coder/core/entropy_coder_train.py b/research/compression/entropy_coder/core/entropy_coder_train.py deleted file mode 100644 index 27c489037d27095b578aed6ad10a5a190ec49b18..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/core/entropy_coder_train.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Train an entropy coder model.""" - -import time - -import tensorflow as tf - -import code_loader -import config_helper - -# pylint: disable=unused-import -from entropy_coder.all_models import all_models -# pylint: enable=unused-import -from entropy_coder.model import model_factory - - -FLAGS = tf.app.flags.FLAGS - -# Hardware resources configuration. -tf.app.flags.DEFINE_string('master', '', - """Name of the TensorFlow master to use.""") -tf.app.flags.DEFINE_string('train_dir', None, - """Directory where to write event logs.""") -tf.app.flags.DEFINE_integer('task', None, - """Task id of the replica running the training.""") -tf.app.flags.DEFINE_integer('ps_tasks', 0, """Number of tasks in the ps job. - If 0 no ps job is used.""") - -# Model selection and configuration. -tf.app.flags.DEFINE_string('model', None, """Underlying encoder model.""") -tf.app.flags.DEFINE_string('model_config', None, - """Model config protobuf given as text file.""") - -# Training data and parameters configuration. -tf.app.flags.DEFINE_string('input_config', None, - """Path to the training input config file.""") -tf.app.flags.DEFINE_string('train_config', None, - """Path to the training experiment config file.""") - - -def train(): - if FLAGS.train_dir is None: - raise ValueError('Parameter train_dir must be provided') - if FLAGS.task is None: - raise ValueError('Parameter task must be provided') - if FLAGS.model is None: - raise ValueError('Parameter model must be provided') - - input_config_string = config_helper.GetConfigString(FLAGS.input_config) - input_config = config_helper.InputConfig(input_config_string) - - # Training parameters. - train_config_string = config_helper.GetConfigString(FLAGS.train_config) - train_config = config_helper.TrainConfig(train_config_string) - - batch_size = train_config.batch_size - initial_learning_rate = train_config.learning_rate - decay_rate = train_config.decay_rate - samples_per_decay = train_config.samples_per_decay - - # Parameters for learning-rate decay. - # The formula is decay_rate ** floor(steps / decay_steps). - decay_steps = samples_per_decay / batch_size - decay_steps = max(decay_steps, 1) - - first_code = code_loader.ReadFirstCode(input_config.data) - first_code_height = ( - first_code.features.feature['code_shape'].int64_list.value[0]) - first_code_width = ( - first_code.features.feature['code_shape'].int64_list.value[1]) - max_bit_depth = ( - first_code.features.feature['code_shape'].int64_list.value[2]) - print('Maximum code depth: {}'.format(max_bit_depth)) - - with tf.Graph().as_default(): - ps_ops = ["Variable", "VariableV2", "AutoReloadVariable", "VarHandleOp"] - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks, - ps_ops=ps_ops)): - codes = code_loader.LoadBinaryCode( - input_config=input_config, - batch_size=batch_size) - if input_config.unique_code_size: - print('Input code size: {} x {}'.format(first_code_height, - first_code_width)) - codes.set_shape( - [batch_size, first_code_height, first_code_width, max_bit_depth]) - else: - codes.set_shape([batch_size, None, None, max_bit_depth]) - codes_effective_shape = tf.shape(codes) - - global_step = tf.contrib.framework.create_global_step() - - # Apply learning-rate decay. - learning_rate = tf.train.exponential_decay( - learning_rate=initial_learning_rate, - global_step=global_step, - decay_steps=decay_steps, - decay_rate=decay_rate, - staircase=True) - tf.summary.scalar('Learning Rate', learning_rate) - optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, - epsilon=1.0) - - # Create the entropy coder model. - model = model_factory.GetModelRegistry().CreateModel(FLAGS.model) - model_config_string = config_helper.GetConfigString(FLAGS.model_config) - model.Initialize(global_step, optimizer, model_config_string) - model.BuildGraph(codes) - - summary_op = tf.summary.merge_all() - - # Verify that the model can actually be trained. - if model.train_op is None: - raise ValueError('Input model {} is not trainable'.format(FLAGS.model)) - - # We disable the summary thread run by Supervisor class by passing - # summary_op=None. We still pass save_summaries_secs because it is used by - # the global step counter thread. - is_chief = (FLAGS.task == 0) - sv = tf.train.Supervisor(logdir=FLAGS.train_dir, - is_chief=is_chief, - global_step=global_step, - # saver=model.saver, - summary_op=None, - save_summaries_secs=120, - save_model_secs=600, - recovery_wait_secs=30) - - sess = sv.PrepareSession(FLAGS.master) - sv.StartQueueRunners(sess) - - step = sess.run(global_step) - print('Trainer initial step: {}.'.format(step)) - - # Once everything has been setup properly, save the configs. - if is_chief: - config_helper.SaveConfig(FLAGS.train_dir, 'input_config.json', - input_config_string) - config_helper.SaveConfig(FLAGS.train_dir, 'model_config.json', - model_config_string) - config_helper.SaveConfig(FLAGS.train_dir, 'train_config.json', - train_config_string) - - # Train the model. - next_summary_time = time.time() - while not sv.ShouldStop(): - feed_dict = None - - # Once in a while, update the summaries on the chief worker. - if is_chief and next_summary_time < time.time(): - summary_str = sess.run(summary_op, feed_dict=feed_dict) - sv.SummaryComputed(sess, summary_str) - next_summary_time = time.time() + sv.save_summaries_secs - else: - tf_tensors = { - 'train': model.train_op, - 'code_length': model.average_code_length - } - np_tensors = sess.run(tf_tensors, feed_dict=feed_dict) - print(np_tensors['code_length']) - - sv.Stop() - - -def main(argv=None): # pylint: disable=unused-argument - train() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/compression/entropy_coder/dataset/gen_synthetic_dataset.py b/research/compression/entropy_coder/dataset/gen_synthetic_dataset.py deleted file mode 100644 index de60aee324d4a6209d00a873ee681aa59aae0d8e..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/dataset/gen_synthetic_dataset.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generate a synthetic dataset.""" - -import os - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import synthetic_model - - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_string( - 'dataset_dir', None, - """Directory where to write the dataset and the configs.""") -tf.app.flags.DEFINE_integer( - 'count', 1000, - """Number of samples to generate.""") - - -def int64_feature(values): - """Returns a TF-Feature of int64s. - - Args: - values: A scalar or list of values. - - Returns: - A TF-Feature. - """ - if not isinstance(values, (tuple, list)): - values = [values] - return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) - - -def float_feature(values): - """Returns a TF-Feature of floats. - - Args: - values: A scalar of list of values. - - Returns: - A TF-Feature. - """ - if not isinstance(values, (tuple, list)): - values = [values] - return tf.train.Feature(float_list=tf.train.FloatList(value=values)) - - -def AddToTFRecord(code, tfrecord_writer): - example = tf.train.Example(features=tf.train.Features(feature={ - 'code_shape': int64_feature(code.shape), - 'code': float_feature(code.flatten().tolist()), - })) - tfrecord_writer.write(example.SerializeToString()) - - -def GenerateDataset(filename, count, code_shape): - with tf.python_io.TFRecordWriter(filename) as tfrecord_writer: - for _ in xrange(count): - code = synthetic_model.GenerateSingleCode(code_shape) - # Convert {0,1} codes to {-1,+1} codes. - code = 2.0 * code - 1.0 - AddToTFRecord(code, tfrecord_writer) - - -def main(argv=None): # pylint: disable=unused-argument - GenerateDataset(os.path.join(FLAGS.dataset_dir + '/synthetic_dataset'), - FLAGS.count, - [35, 48, 8]) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/compression/entropy_coder/dataset/gen_synthetic_single.py b/research/compression/entropy_coder/dataset/gen_synthetic_single.py deleted file mode 100644 index b8c3821c38b6a0b95f01ad7ffb283cca4beb34b3..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/dataset/gen_synthetic_single.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generate a single synthetic sample.""" - -import io -import os - -import numpy as np -import tensorflow as tf - -import synthetic_model - - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_string( - 'sample_filename', None, - """Output file to store the generated binary code.""") - - -def GenerateSample(filename, code_shape, layer_depth): - # {0, +1} binary codes. - # No conversion since the output file is expected to store - # codes using {0, +1} codes (and not {-1, +1}). - code = synthetic_model.GenerateSingleCode(code_shape) - code = np.round(code) - - # Reformat the code so as to be compatible with what is generated - # by the image encoder. - # The image encoder generates a tensor of size: - # iteration_count x batch_size x height x width x iteration_depth. - # Here: batch_size = 1 - if code_shape[-1] % layer_depth != 0: - raise ValueError('Number of layers is not an integer') - height = code_shape[0] - width = code_shape[1] - code = code.reshape([1, height, width, -1, layer_depth]) - code = np.transpose(code, [3, 0, 1, 2, 4]) - - int_codes = code.astype(np.int8) - exported_codes = np.packbits(int_codes.reshape(-1)) - - output = io.BytesIO() - np.savez_compressed(output, shape=int_codes.shape, codes=exported_codes) - with tf.gfile.FastGFile(filename, 'wb') as code_file: - code_file.write(output.getvalue()) - - -def main(argv=None): # pylint: disable=unused-argument - # Note: the height and the width is different from the training dataset. - # The main purpose is to show that the entropy coder model is fully - # convolutional and can be used on any image size. - layer_depth = 2 - GenerateSample(FLAGS.sample_filename, [31, 36, 8], layer_depth) - - -if __name__ == '__main__': - tf.app.run() - diff --git a/research/compression/entropy_coder/dataset/synthetic_model.py b/research/compression/entropy_coder/dataset/synthetic_model.py deleted file mode 100644 index 9cccb64a136aba5a623c95e7c2dede2191d2cd62..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/dataset/synthetic_model.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Binary code sample generator.""" - -import numpy as np -from six.moves import xrange - - -_CRC_LINE = [ - [0, 1, 0], - [1, 1, 0], - [1, 0, 0] -] - -_CRC_DEPTH = [1, 1, 0, 1] - - -def ComputeLineCrc(code, width, y, x, d): - crc = 0 - for dy in xrange(len(_CRC_LINE)): - i = y - 1 - dy - if i < 0: - continue - for dx in xrange(len(_CRC_LINE[dy])): - j = x - 2 + dx - if j < 0 or j >= width: - continue - crc += 1 if (code[i, j, d] != _CRC_LINE[dy][dx]) else 0 - return crc - - -def ComputeDepthCrc(code, y, x, d): - crc = 0 - for delta in xrange(len(_CRC_DEPTH)): - k = d - 1 - delta - if k < 0: - continue - crc += 1 if (code[y, x, k] != _CRC_DEPTH[delta]) else 0 - return crc - - -def GenerateSingleCode(code_shape): - code = np.zeros(code_shape, dtype=np.int) - - keep_value_proba = 0.8 - - height = code_shape[0] - width = code_shape[1] - depth = code_shape[2] - - for d in xrange(depth): - for y in xrange(height): - for x in xrange(width): - v1 = ComputeLineCrc(code, width, y, x, d) - v2 = ComputeDepthCrc(code, y, x, d) - v = 1 if (v1 + v2 >= 6) else 0 - if np.random.rand() < keep_value_proba: - code[y, x, d] = v - else: - code[y, x, d] = 1 - v - - return code diff --git a/research/compression/entropy_coder/lib/__init__.py b/research/compression/entropy_coder/lib/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/compression/entropy_coder/lib/block_base.py b/research/compression/entropy_coder/lib/block_base.py deleted file mode 100644 index 615dff82829dbbcab46c7217cd35f6259de01161..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/block_base.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base class for Tensorflow building blocks.""" - -import collections -import contextlib -import itertools - -import tensorflow as tf - -_block_stacks = collections.defaultdict(lambda: []) - - -class BlockBase(object): - """Base class for transform wrappers of Tensorflow. - - To implement a Tensorflow transform block, inherit this class. - - 1. To create a variable, use NewVar() method. Do not overload this method! - For example, use as follows. - a_variable = self.NewVar(initial_value) - - 2. All Tensorflow-related code must be done inside 'with self._BlockScope().' - Otherwise, name scoping and block hierarchy will not work. An exception - is _Apply() method, which is already called inside the context manager - by __call__() method. - - 3. Override and implement _Apply() method. This method is called by - __call__() method. - - The users would use blocks like the following. - nn1 = NN(128, bias=Bias(0), act=tf.nn.relu) - y = nn1(x) - - Some things to consider. - - - Use lazy-initialization if possible. That is, initialize at first Apply() - rather than at __init__(). - - Note: if needed, the variables can be created on a specific parameter - server by creating blocks in a scope like: - with g.device(device): - linear = Linear(...) - """ - - def __init__(self, name): - self._variables = [] - self._subblocks = [] - self._called = False - - # Intentionally distinguishing empty string and None. - # If name is an empty string, then do not use name scope. - self.name = name if name is not None else self.__class__.__name__ - self._graph = tf.get_default_graph() - - if self.name: - # Capture the scope string at the init time. - with self._graph.name_scope(self.name) as scope: - self._scope_str = scope - else: - self._scope_str = '' - - # Maintain hierarchy structure of blocks. - self._stack = _block_stacks[self._graph] - if self.__class__ is BlockBase: - # This code is only executed to create the root, which starts in the - # initialized state. - assert not self._stack - self._parent = None - self._called = True # The root is initialized. - return - - # Create a fake root if a root is not already present. - if not self._stack: - self._stack.append(BlockBase('NoOpRoot')) - - self._parent = self._stack[-1] - self._parent._subblocks.append(self) # pylint: disable=protected-access - - def __repr__(self): - return '"{}" ({})'.format(self._scope_str, self.__class__.__name__) - - @contextlib.contextmanager - def _OptionalNameScope(self, scope_str): - if scope_str: - with self._graph.name_scope(scope_str): - yield - else: - yield - - @contextlib.contextmanager - def _BlockScope(self): - """Context manager that handles graph, namescope, and nested blocks.""" - self._stack.append(self) - - try: - with self._graph.as_default(): - with self._OptionalNameScope(self._scope_str): - yield self - finally: # Pop from the stack no matter exception is raised or not. - # The following line is executed when leaving 'with self._BlockScope()' - self._stack.pop() - - def __call__(self, *args, **kwargs): - assert self._stack is _block_stacks[self._graph] - - with self._BlockScope(): - ret = self._Apply(*args, **kwargs) - - self._called = True - return ret - - def _Apply(self, *args, **kwargs): - """Implementation of __call__().""" - raise NotImplementedError() - - # Redirect all variable creation to this single function, so that we can - # switch to better variable creation scheme. - def NewVar(self, value, **kwargs): - """Creates a new variable. - - This function creates a variable, then returns a local copy created by - Identity operation. To get the Variable class object, use LookupRef() - method. - - Note that each time Variable class object is used as an input to an - operation, Tensorflow will create a new Send/Recv pair. This hurts - performance. - - If not for assign operations, use the local copy returned by this method. - - Args: - value: Initialization value of the variable. The shape and the data type - of the variable is determined by this initial value. - **kwargs: Extra named arguments passed to Variable.__init__(). - - Returns: - A local copy of the new variable. - """ - v = tf.Variable(value, **kwargs) - - self._variables.append(v) - return v - - @property - def initialized(self): - """Returns bool if the block is initialized. - - By default, BlockBase assumes that a block is initialized when __call__() - is executed for the first time. If this is an incorrect assumption for some - subclasses, override this property in those subclasses. - - Returns: - True if initialized, False otherwise. - """ - return self._called - - def AssertInitialized(self): - """Asserts initialized property.""" - if not self.initialized: - raise RuntimeError('{} has not been initialized.'.format(self)) - - def VariableList(self): - """Returns the list of all tensorflow variables used inside this block.""" - variables = list(itertools.chain( - itertools.chain.from_iterable( - t.VariableList() for t in self._subblocks), - self._VariableList())) - return variables - - def _VariableList(self): - """Returns the list of all tensorflow variables owned by this block.""" - self.AssertInitialized() - return self._variables - - def CreateWeightLoss(self): - """Returns L2 loss list of (almost) all variables used inside this block. - - When this method needs to be overridden, there are two choices. - - 1. Override CreateWeightLoss() to change the weight loss of all variables - that belong to this block, both directly and indirectly. - 2. Override _CreateWeightLoss() to change the weight loss of all - variables that directly belong to this block but not to the sub-blocks. - - Returns: - A Tensor object or None. - """ - losses = list(itertools.chain( - itertools.chain.from_iterable( - t.CreateWeightLoss() for t in self._subblocks), - self._CreateWeightLoss())) - return losses - - def _CreateWeightLoss(self): - """Returns weight loss list of variables that belong to this block.""" - self.AssertInitialized() - with self._BlockScope(): - return [tf.nn.l2_loss(v) for v in self._variables] - - def CreateUpdateOps(self): - """Creates update operations for this block and its sub-blocks.""" - ops = list(itertools.chain( - itertools.chain.from_iterable( - t.CreateUpdateOps() for t in self._subblocks), - self._CreateUpdateOps())) - return ops - - def _CreateUpdateOps(self): - """Creates update operations for this block.""" - self.AssertInitialized() - return [] - - def MarkAsNonTrainable(self): - """Mark all the variables of this block as non-trainable. - - All the variables owned directly or indirectly (through subblocks) are - marked as non trainable. - - This function along with CheckpointInitOp can be used to load a pretrained - model that consists in only one part of the whole graph. - """ - assert self._called - - all_variables = self.VariableList() - collection = tf.get_collection_ref(tf.GraphKeys.TRAINABLE_VARIABLES) - for v in all_variables: - if v in collection: - collection.remove(v) - - -def CreateWeightLoss(): - """Returns all weight losses from the blocks in the graph.""" - stack = _block_stacks[tf.get_default_graph()] - if not stack: - return [] - return stack[0].CreateWeightLoss() - - -def CreateBlockUpdates(): - """Combines all updates from the blocks in the graph.""" - stack = _block_stacks[tf.get_default_graph()] - if not stack: - return [] - return stack[0].CreateUpdateOps() diff --git a/research/compression/entropy_coder/lib/block_util.py b/research/compression/entropy_coder/lib/block_util.py deleted file mode 100644 index 80479cc66df95338aa119ba1216cd213ecfbe08d..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/block_util.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for blocks.""" - -from __future__ import division -from __future__ import unicode_literals - -import math - -import numpy as np -import six -import tensorflow as tf - - -class RsqrtInitializer(object): - """Gaussian initializer with standard deviation 1/sqrt(n). - - Note that tf.truncated_normal is used internally. Therefore any random sample - outside two-sigma will be discarded and re-sampled. - """ - - def __init__(self, dims=(0,), **kwargs): - """Creates an initializer. - - Args: - dims: Dimension(s) index to compute standard deviation: - 1.0 / sqrt(product(shape[dims])) - **kwargs: Extra keyword arguments to pass to tf.truncated_normal. - """ - if isinstance(dims, six.integer_types): - self._dims = [dims] - else: - self._dims = dims - self._kwargs = kwargs - - def __call__(self, shape, dtype): - stddev = 1.0 / np.sqrt(np.prod([shape[x] for x in self._dims])) - return tf.truncated_normal( - shape=shape, dtype=dtype, stddev=stddev, **self._kwargs) - - -class RectifierInitializer(object): - """Gaussian initializer with standard deviation sqrt(2/fan_in). - - Note that tf.random_normal is used internally to ensure the expected weight - distribution. This is intended to be used with ReLU activations, specially - in ResNets. - - For details please refer to: - Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet - Classification - """ - - def __init__(self, dims=(0,), scale=2.0, **kwargs): - """Creates an initializer. - - Args: - dims: Dimension(s) index to compute standard deviation: - sqrt(scale / product(shape[dims])) - scale: A constant scaling for the initialization used as - sqrt(scale / product(shape[dims])). - **kwargs: Extra keyword arguments to pass to tf.truncated_normal. - """ - if isinstance(dims, six.integer_types): - self._dims = [dims] - else: - self._dims = dims - self._kwargs = kwargs - self._scale = scale - - def __call__(self, shape, dtype): - stddev = np.sqrt(self._scale / np.prod([shape[x] for x in self._dims])) - return tf.random_normal( - shape=shape, dtype=dtype, stddev=stddev, **self._kwargs) - - -class GaussianInitializer(object): - """Gaussian initializer with a given standard deviation. - - Note that tf.truncated_normal is used internally. Therefore any random sample - outside two-sigma will be discarded and re-sampled. - """ - - def __init__(self, stddev=1.0): - self._stddev = stddev - - def __call__(self, shape, dtype): - return tf.truncated_normal(shape=shape, dtype=dtype, stddev=self._stddev) diff --git a/research/compression/entropy_coder/lib/blocks.py b/research/compression/entropy_coder/lib/blocks.py deleted file mode 100644 index 002384eb07045f1cad963d217a205ade51ba03b6..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from block_base import * -from block_util import * -from blocks_binarizer import * -from blocks_entropy_coding import * -from blocks_lstm import * -from blocks_masked_conv2d import * -from blocks_masked_conv2d_lstm import * -from blocks_operator import * -from blocks_std import * diff --git a/research/compression/entropy_coder/lib/blocks_binarizer.py b/research/compression/entropy_coder/lib/blocks_binarizer.py deleted file mode 100644 index 8206731610613af2cf3ec15210fd5b9977f4a916..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_binarizer.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Activation and weight binarizer implementations.""" - -import math - -import numpy as np -import tensorflow as tf - - -def ConvertSignCodeToZeroOneCode(x): - """Conversion from codes {-1, +1} to codes {0, 1}.""" - return 0.5 * (x + 1.0) - - -def ConvertZeroOneCodeToSignCode(x): - """Convert from codes {0, 1} to codes {-1, +1}.""" - return 2.0 * x - 1.0 - - -def CheckZeroOneCode(x): - return tf.reduce_all(tf.equal(x * (x - 1.0), 0)) diff --git a/research/compression/entropy_coder/lib/blocks_entropy_coding.py b/research/compression/entropy_coder/lib/blocks_entropy_coding.py deleted file mode 100644 index 6ee5d97926c1b50b12cb9853d16caa25ba31e8d7..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_entropy_coding.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Set of blocks related to entropy coding.""" - -import math - -import tensorflow as tf - -import block_base - -# pylint does not recognize block_base.BlockBase.__call__(). -# pylint: disable=not-callable - - -class CodeLength(block_base.BlockBase): - """Theoretical bound for a code length given a probability distribution. - """ - - def __init__(self, name=None): - super(CodeLength, self).__init__(name) - - def _Apply(self, c, p): - """Theoretical bound of the coded length given a probability distribution. - - Args: - c: The binary codes. Belong to {0, 1}. - p: The probability of: P(code==+1) - - Returns: - The average code length. - Note: the average code length can be greater than 1 bit (e.g. when - encoding the least likely symbol). - """ - entropy = ((1.0 - c) * tf.log(1.0 - p) + c * tf.log(p)) / (-math.log(2)) - entropy = tf.reduce_mean(entropy) - return entropy diff --git a/research/compression/entropy_coder/lib/blocks_entropy_coding_test.py b/research/compression/entropy_coder/lib/blocks_entropy_coding_test.py deleted file mode 100644 index 5209865f5991598ee873ed24a4be572e3f9fc515..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_entropy_coding_test.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for basic tensorflow blocks_entropy_coding.""" - -from __future__ import division -from __future__ import unicode_literals - -import math - -import numpy as np -import tensorflow as tf - -import blocks_entropy_coding - - -class BlocksEntropyCodingTest(tf.test.TestCase): - - def testCodeLength(self): - shape = [2, 4] - proba_feed = [[0.65, 0.25, 0.70, 0.10], - [0.28, 0.20, 0.44, 0.54]] - symbol_feed = [[1.0, 0.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 1.0]] - mean_code_length = - ( - (math.log(0.65) + math.log(0.75) + math.log(0.70) + math.log(0.90) + - math.log(0.72) + math.log(0.80) + math.log(0.56) + math.log(0.54)) / - math.log(2.0)) / (shape[0] * shape[1]) - - symbol = tf.placeholder(dtype=tf.float32, shape=shape) - proba = tf.placeholder(dtype=tf.float32, shape=shape) - code_length_calculator = blocks_entropy_coding.CodeLength() - code_length = code_length_calculator(symbol, proba) - - with self.test_session(): - tf.global_variables_initializer().run() - code_length_eval = code_length.eval( - feed_dict={symbol: symbol_feed, proba: proba_feed}) - - self.assertAllClose(mean_code_length, code_length_eval) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/compression/entropy_coder/lib/blocks_lstm.py b/research/compression/entropy_coder/lib/blocks_lstm.py deleted file mode 100644 index 6e474e3e3fcb6eeb3f18daf320e21a3acc88a2bf..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_lstm.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Blocks of LSTM and its variants.""" - -import numpy as np -import tensorflow as tf - -import block_base -import block_util -import blocks_std - -# pylint does not recognize block_base.BlockBase.__call__(). -# pylint: disable=not-callable - - -def LSTMBiasInit(shape, dtype): - """Returns ones for forget-gate, and zeros for the others.""" - shape = np.array(shape) - - # Check internal consistencies. - assert shape.shape == (1,), shape - assert shape[0] % 4 == 0, shape - - n = shape[0] // 4 - ones = tf.fill([n], tf.constant(1, dtype=dtype)) - zeros = tf.fill([3 * n], tf.constant(0, dtype=dtype)) - return tf.concat([ones, zeros], 0) - - -class LSTMBase(block_base.BlockBase): - """Base class for LSTM implementations. - - These LSTM implementations use the pattern found in [1]. No peephole - connection, i.e., cell content is not used in recurrence computation. - Hidden units are also output units. - - [1] Zaremba, Sutskever, Vinyals. Recurrent Neural Network Regularization, - 2015. arxiv:1409.2329. - """ - - def __init__(self, output_shape, name): - """Initializes LSTMBase class object. - - Args: - output_shape: List representing the LSTM output shape. This argument - does not include batch dimension. For example, if the LSTM output has - shape [batch, depth], then pass [depth]. - name: Name of this block. - """ - super(LSTMBase, self).__init__(name) - - with self._BlockScope(): - self._output_shape = [None] + list(output_shape) - self._hidden = None - self._cell = None - - @property - def hidden(self): - """Returns the hidden units of this LSTM.""" - return self._hidden - - @hidden.setter - def hidden(self, value): - """Assigns to the hidden units of this LSTM. - - Args: - value: The new value for the hidden units. If None, the hidden units are - considered to be filled with zeros. - """ - if value is not None: - value.get_shape().assert_is_compatible_with(self._output_shape) - self._hidden = value - - @property - def cell(self): - """Returns the cell units of this LSTM.""" - return self._cell - - @cell.setter - def cell(self, value): - """Assigns to the cell units of this LSTM. - - Args: - value: The new value for the cell units. If None, the cell units are - considered to be filled with zeros. - """ - if value is not None: - value.get_shape().assert_is_compatible_with(self._output_shape) - self._cell = value - - # Consider moving bias terms to the base, and require this method to be - # linear. - def _TransformInputs(self, _): - """Transforms the input units to (4 * depth) units. - - The forget-gate, input-gate, output-gate, and cell update is computed as - f, i, j, o = T(h) + R(x) - where h is hidden units, x is input units, and T, R are transforms of - h, x, respectively. - - This method implements R. Note that T is strictly linear, so if LSTM is - going to use bias, this method must include the bias to the transformation. - - Subclasses must implement this method. See _Apply() for more details. - """ - raise NotImplementedError() - - def _TransformHidden(self, _): - """Transforms the hidden units to (4 * depth) units. - - The forget-gate, input-gate, output-gate, and cell update is computed as - f, i, j, o = T(h) + R(x) - where h is hidden units, x is input units, and T, R are transforms of - h, x, respectively. - - This method implements T in the equation. The method must implement a - strictly linear transformation. For example, it may use MatMul or Conv2D, - but must not add bias. This is because when hidden units are zeros, then - the LSTM implementation will skip calling this method, instead of passing - zeros to this function. - - Subclasses must implement this method. See _Apply() for more details. - """ - raise NotImplementedError() - - def _Apply(self, *args): - xtransform = self._TransformInputs(*args) - depth_axis = len(self._output_shape) - 1 - - if self.hidden is not None: - htransform = self._TransformHidden(self.hidden) - f, i, j, o = tf.split( - value=htransform + xtransform, num_or_size_splits=4, axis=depth_axis) - else: - f, i, j, o = tf.split( - value=xtransform, num_or_size_splits=4, axis=depth_axis) - - if self.cell is not None: - self.cell = tf.sigmoid(f) * self.cell + tf.sigmoid(i) * tf.tanh(j) - else: - self.cell = tf.sigmoid(i) * tf.tanh(j) - - self.hidden = tf.sigmoid(o) * tf.tanh(self.cell) - return self.hidden - - -class LSTM(LSTMBase): - """Efficient LSTM implementation used in [1]. - - [1] Zaremba, Sutskever, Vinyals. Recurrent Neural Network Regularization, - 2015. arxiv:1409.2329. - """ - - def __init__(self, - depth, - bias=LSTMBiasInit, - initializer=block_util.RsqrtInitializer(), - name=None): - super(LSTM, self).__init__([depth], name) - - with self._BlockScope(): - self._depth = depth - self._nn = blocks_std.NN( - 4 * depth, bias=bias, act=None, initializer=initializer) - self._hidden_linear = blocks_std.Linear( - 4 * depth, initializer=initializer) - - def _TransformInputs(self, *args): - return self._nn(*args) - - def _TransformHidden(self, h): - return self._hidden_linear(h) - - -class Conv2DLSTM(LSTMBase): - """Convolutional LSTM implementation with optimizations inspired by [1]. - - Note that when using the batch normalization feature, the bias initializer - will not be used, since BN effectively cancels its effect out. - - [1] Zaremba, Sutskever, Vinyals. Recurrent Neural Network Regularization, - 2015. arxiv:1409.2329. - """ - - def __init__(self, - depth, - filter_size, - hidden_filter_size, - strides, - padding, - bias=LSTMBiasInit, - initializer=block_util.RsqrtInitializer(dims=(0, 1, 2)), - use_moving_average=False, - name=None): - super(Conv2DLSTM, self).__init__([None, None, depth], name) - self._iter = 0 - - with self._BlockScope(): - self._input_conv = blocks_std.Conv2D( - 4 * depth, - filter_size, - strides, - padding, - bias=None, - act=None, - initializer=initializer, - name='input_conv2d') - - self._hidden_conv = blocks_std.Conv2D( - 4 * depth, - hidden_filter_size, - [1, 1], - 'SAME', - bias=None, - act=None, - initializer=initializer, - name='hidden_conv2d') - - if bias is not None: - self._bias = blocks_std.BiasAdd(bias, name='biases') - else: - self._bias = blocks_std.PassThrough() - - def _TransformInputs(self, x): - return self._bias(self._input_conv(x)) - - def _TransformHidden(self, h): - return self._hidden_conv(h) - - def _Apply(self, *args): - xtransform = self._TransformInputs(*args) - depth_axis = len(self._output_shape) - 1 - - if self.hidden is not None: - htransform = self._TransformHidden(self.hidden) - f, i, j, o = tf.split( - value=htransform + xtransform, num_or_size_splits=4, axis=depth_axis) - else: - f, i, j, o = tf.split( - value=xtransform, num_or_size_splits=4, axis=depth_axis) - - if self.cell is not None: - self.cell = tf.sigmoid(f) * self.cell + tf.sigmoid(i) * tf.tanh(j) - else: - self.cell = tf.sigmoid(i) * tf.tanh(j) - - self.hidden = tf.sigmoid(o) * tf.tanh(self.cell) - - self._iter += 1 - return self.hidden diff --git a/research/compression/entropy_coder/lib/blocks_lstm_test.py b/research/compression/entropy_coder/lib/blocks_lstm_test.py deleted file mode 100644 index 03c32dc136effda11163f2e35c5a48496f0187c0..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_lstm_test.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for LSTM tensorflow blocks.""" -from __future__ import division - -import numpy as np -import tensorflow as tf - -import block_base -import blocks_std -import blocks_lstm - - -class BlocksLSTMTest(tf.test.TestCase): - - def CheckUnary(self, y, op_type): - self.assertEqual(op_type, y.op.type) - self.assertEqual(1, len(y.op.inputs)) - return y.op.inputs[0] - - def CheckBinary(self, y, op_type): - self.assertEqual(op_type, y.op.type) - self.assertEqual(2, len(y.op.inputs)) - return y.op.inputs - - def testLSTM(self): - lstm = blocks_lstm.LSTM(10) - lstm.hidden = tf.zeros(shape=[10, 10], dtype=tf.float32) - lstm.cell = tf.zeros(shape=[10, 10], dtype=tf.float32) - x = tf.placeholder(dtype=tf.float32, shape=[10, 11]) - y = lstm(x) - - o, tanhc = self.CheckBinary(y, 'Mul') - self.assertEqual(self.CheckUnary(o, 'Sigmoid').name, 'LSTM/split:3') - - self.assertIs(lstm.cell, self.CheckUnary(tanhc, 'Tanh')) - fc, ij = self.CheckBinary(lstm.cell, 'Add') - - f, _ = self.CheckBinary(fc, 'Mul') - self.assertEqual(self.CheckUnary(f, 'Sigmoid').name, 'LSTM/split:0') - - i, j = self.CheckBinary(ij, 'Mul') - self.assertEqual(self.CheckUnary(i, 'Sigmoid').name, 'LSTM/split:1') - j = self.CheckUnary(j, 'Tanh') - self.assertEqual(j.name, 'LSTM/split:2') - - def testLSTMBiasInit(self): - lstm = blocks_lstm.LSTM(9) - x = tf.placeholder(dtype=tf.float32, shape=[15, 7]) - lstm(x) - b = lstm._nn._bias - - with self.test_session(): - tf.global_variables_initializer().run() - bias_var = b._bias.eval() - - comp = ([1.0] * 9) + ([0.0] * 27) - self.assertAllEqual(bias_var, comp) - - def testConv2DLSTM(self): - lstm = blocks_lstm.Conv2DLSTM(depth=10, - filter_size=[1, 1], - hidden_filter_size=[1, 1], - strides=[1, 1], - padding='SAME') - lstm.hidden = tf.zeros(shape=[10, 11, 11, 10], dtype=tf.float32) - lstm.cell = tf.zeros(shape=[10, 11, 11, 10], dtype=tf.float32) - x = tf.placeholder(dtype=tf.float32, shape=[10, 11, 11, 1]) - y = lstm(x) - - o, tanhc = self.CheckBinary(y, 'Mul') - self.assertEqual(self.CheckUnary(o, 'Sigmoid').name, 'Conv2DLSTM/split:3') - - self.assertIs(lstm.cell, self.CheckUnary(tanhc, 'Tanh')) - fc, ij = self.CheckBinary(lstm.cell, 'Add') - - f, _ = self.CheckBinary(fc, 'Mul') - self.assertEqual(self.CheckUnary(f, 'Sigmoid').name, 'Conv2DLSTM/split:0') - - i, j = self.CheckBinary(ij, 'Mul') - self.assertEqual(self.CheckUnary(i, 'Sigmoid').name, 'Conv2DLSTM/split:1') - j = self.CheckUnary(j, 'Tanh') - self.assertEqual(j.name, 'Conv2DLSTM/split:2') - - def testConv2DLSTMBiasInit(self): - lstm = blocks_lstm.Conv2DLSTM(9, 1, 1, [1, 1], 'SAME') - x = tf.placeholder(dtype=tf.float32, shape=[1, 7, 7, 7]) - lstm(x) - b = lstm._bias - - with self.test_session(): - tf.global_variables_initializer().run() - bias_var = b._bias.eval() - - comp = ([1.0] * 9) + ([0.0] * 27) - self.assertAllEqual(bias_var, comp) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/compression/entropy_coder/lib/blocks_masked_conv2d.py b/research/compression/entropy_coder/lib/blocks_masked_conv2d.py deleted file mode 100644 index 3f562384a681964554ead02477da24c13715d4d1..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_masked_conv2d.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Define some typical masked 2D convolutions.""" - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import block_util -import blocks_std - -# pylint does not recognize block_base.BlockBase.__call__(). -# pylint: disable=not-callable - - -class RasterScanConv2D(blocks_std.Conv2DBase): - """Conv2D with no dependency on future pixels (in raster scan order). - - For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask: - T T T T T - T T T T T - T T x F F - F F F F F - F F F F F - where 'T' are pixels which are available when computing the convolution - for pixel 'x'. All the pixels marked with 'F' are not available. - 'x' itself is not available if strict_order is True, otherwise, it is - available. - """ - - def __init__(self, depth, filter_size, strides, padding, - strict_order=True, - bias=None, act=None, initializer=None, name=None): - super(RasterScanConv2D, self).__init__( - depth, filter_size, strides, padding, bias, act, name=name) - - if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1: - raise ValueError('Kernel size should be odd.') - - with self._BlockScope(): - if initializer is None: - initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) - self._initializer = initializer - self._strict_order = strict_order - - def _CreateKernel(self, shape, dtype): - init = self._initializer(shape, dtype) - kernel = self.NewVar(init) - - mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype) - center = shape[:2] // 2 - mask[center[0] + 1:, :] = 0 - if not self._strict_order: - mask[center[0], center[1] + 1:] = 0 - else: - mask[center[0], center[1]:] = 0 - mask = mask.reshape(mask.shape + (1, 1)) - - return tf.convert_to_tensor(mask, dtype) * kernel - - -class DepthOrderConv2D(blocks_std.Conv2DBase): - """Conv2D with no dependency on higher depth dimensions. - - More precisely, the output depth #n has only dependencies on input depths #k - for k < n (if strict_order is True) or for k <= n (if strict_order is False). - """ - - def __init__(self, depth, filter_size, strides, padding, - strict_order=True, - bias=None, act=None, initializer=None, name=None): - super(DepthOrderConv2D, self).__init__( - depth, filter_size, strides, padding, bias, act, name=name) - - with self._BlockScope(): - if initializer is None: - initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) - self._initializer = initializer - self._strict_order = strict_order - - def _CreateKernel(self, shape, dtype): - init = self._initializer(shape, dtype) - kernel = self.NewVar(init) - - mask = np.ones(shape[2:], dtype=dtype.as_numpy_dtype) - depth_output = shape[3] - for d in xrange(depth_output): - if self._strict_order: - mask[d:, d] = 0 - else: - mask[d + 1:, d] = 0 - mask = mask.reshape((1, 1) + mask.shape) - - return tf.convert_to_tensor(mask, dtype) * kernel - - -class GroupRasterScanConv2D(blocks_std.Conv2DBase): - """Conv2D with no dependency on future pixels (in raster scan order). - - This version only introduces dependencies on previous pixels in raster scan - order. It can also introduce some dependencies on previous depth positions - of the current pixel (current pixel = center pixel of the kernel) in the - following way: - the depth dimension of the input is split into Ki groups of size - |input_group_size|, the output dimension is split into Ko groups of size - |output_group_size| (usually Ki == Ko). Each output group ko of the current - pixel position can only depend on previous input groups ki - (i.e. ki < ko if strict_order is True or ki <= ko if strict_order is False). - - Notes: - - Block RasterScanConv2D is a special case of GroupRasterScanConv2D - where Ki == Ko == 1 (i.e. input_group_size == input_depth and - output_group_size == output_depth). - - For 1x1 convolution, block DepthOrderConv2D is a special case of - GroupRasterScanConv2D where input_group_size == 1 and - output_group_size == 1. - """ - - def __init__(self, depth, filter_size, strides, padding, - strict_order=True, - input_group_size=1, - output_group_size=1, - bias=None, act=None, initializer=None, name=None): - super(GroupRasterScanConv2D, self).__init__( - depth, filter_size, strides, padding, bias, act, name=name) - - if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1: - raise ValueError('Kernel size should be odd.') - - with self._BlockScope(): - if initializer is None: - initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) - self._initializer = initializer - self._input_group_size = input_group_size - self._output_group_size = output_group_size - self._strict_order = strict_order - - if depth % self._output_group_size != 0: - raise ValueError( - 'Invalid depth group size: {} for depth {}'.format( - self._output_group_size, depth)) - self._output_group_count = depth // self._output_group_size - - def _CreateKernel(self, shape, dtype): - init = self._initializer(shape, dtype) - kernel = self.NewVar(init) - - depth_input = shape[2] - if depth_input % self._input_group_size != 0: - raise ValueError( - 'Invalid depth group size: {} for depth {}'.format( - self._input_group_size, depth_input)) - input_group_count = depth_input // self._input_group_size - output_group_count = self._output_group_count - - # Set the mask to 0 for future pixels in raster scan order. - center = shape[:2] // 2 - mask = np.ones([shape[0], shape[1], - input_group_count, self._input_group_size, - output_group_count, self._output_group_size], - dtype=dtype.as_numpy_dtype) - mask[center[0] + 1:, :, :, :, :, :] = 0 - mask[center[0], center[1] + 1:, :, :, :, :] = 0 - - # Adjust the mask for the current position (the center position). - depth_output = shape[3] - for d in xrange(output_group_count): - mask[center[0], center[1], d + 1:, :, d:d + 1, :] = 0 - if self._strict_order: - mask[center[0], center[1], d, :, d:d + 1, :] = 0 - - mask = mask.reshape([shape[0], shape[1], depth_input, depth_output]) - return tf.convert_to_tensor(mask, dtype) * kernel - - -class InFillingConv2D(blocks_std.Conv2DBase): - """Conv2D with kernel having no dependency on the current pixel. - - For example, assuming a 5 x 5 kernel, the kernel is applied a spatial mask: - T T T T T - T T T T T - T T x T T - T T T T T - T T T T T - where 'T' marks a pixel which is available when computing the convolution - for pixel 'x'. 'x' itself is not available. - """ - - def __init__(self, depth, filter_size, strides, padding, - bias=None, act=None, initializer=None, name=None): - super(InFillingConv2D, self).__init__( - depth, filter_size, strides, padding, bias, act, name=name) - - if (filter_size[0] % 2) != 1 or (filter_size[1] % 2) != 1: - raise ValueError('Kernel size should be odd.') - if filter_size[0] == 1 and filter_size[1] == 1: - raise ValueError('Kernel size should be larger than 1x1.') - - with self._BlockScope(): - if initializer is None: - initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) - self._initializer = initializer - - def _CreateKernel(self, shape, dtype): - init = self._initializer(shape, dtype) - kernel = self.NewVar(init) - - mask = np.ones(shape[:2], dtype=dtype.as_numpy_dtype) - center = shape[:2] // 2 - mask[center[0], center[1]] = 0 - mask = mask.reshape(mask.shape + (1, 1)) - - return tf.convert_to_tensor(mask, dtype) * kernel diff --git a/research/compression/entropy_coder/lib/blocks_masked_conv2d_lstm.py b/research/compression/entropy_coder/lib/blocks_masked_conv2d_lstm.py deleted file mode 100644 index 2d6dfeffcaff1289adf3bdec33cb0560db6b0416..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_masked_conv2d_lstm.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Masked conv2d LSTM.""" - -import block_base -import block_util -import blocks_masked_conv2d -import blocks_lstm -import blocks_std - -# pylint: disable=not-callable - - -class RasterScanConv2DLSTM(blocks_lstm.LSTMBase): - """Convolutional LSTM implementation with optimizations inspired by [1]. - - Note that when using the batch normalization feature, the bias initializer - will not be used, since BN effectively cancels its effect out. - - [1] Zaremba, Sutskever, Vinyals. Recurrent Neural Network Regularization, - 2015. arxiv:1409.2329. - """ - - def __init__(self, - depth, - filter_size, - hidden_filter_size, - strides, - padding, - bias=blocks_lstm.LSTMBiasInit, - initializer=block_util.RsqrtInitializer(dims=(0, 1, 2)), - name=None): - super(RasterScanConv2DLSTM, self).__init__([None, None, depth], name) - - with self._BlockScope(): - self._input_conv = blocks_masked_conv2d.RasterScanConv2D( - 4 * depth, - filter_size, - strides, - padding, - strict_order=False, - bias=None, - act=None, - initializer=initializer, - name='input_conv2d') - - self._hidden_conv = blocks_std.Conv2D( - 4 * depth, - hidden_filter_size, - [1, 1], - 'SAME', - bias=None, - act=None, - initializer=initializer, - name='hidden_conv2d') - - if bias is not None: - self._bias = blocks_std.BiasAdd(bias, name='biases') - else: - self._bias = blocks_std.PassThrough() - - def _TransformInputs(self, x): - return self._bias(self._input_conv(x)) - - def _TransformHidden(self, h): - return self._hidden_conv(h) diff --git a/research/compression/entropy_coder/lib/blocks_masked_conv2d_test.py b/research/compression/entropy_coder/lib/blocks_masked_conv2d_test.py deleted file mode 100644 index 1d284ebffe5a24b91c96936c17d6c23febdf76d5..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_masked_conv2d_test.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests of the 2D masked convolution blocks.""" - -from __future__ import division -from __future__ import unicode_literals - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import blocks_masked_conv2d - - -class MaskedConv2DTest(tf.test.TestCase): - - def testRasterScanKernel(self): - kernel_size = 5 - input_depth = 1 - output_depth = 1 - kernel_shape = [kernel_size, kernel_size, input_depth, output_depth] - - # pylint: disable=bad-whitespace - kernel_feed = [[ 1.0, 2.0, 3.0, 4.0, 5.0], - [ 6.0, 7.0, 8.0, 9.0, 10.0], - [11.0, 12.0, 13.0, 14.0, 15.0], - [16.0, 17.0, 18.0, 19.0, 20.0], - [21.0, 22.0, 23.0, 24.0, 25.0]] - kernel_feed = np.reshape(kernel_feed, kernel_shape) - kernel_expected = [[ 1.0, 2.0, 3.0, 4.0, 5.0], - [ 6.0, 7.0, 8.0, 9.0, 10.0], - [11.0, 12.0, 0.0, 0.0, 0.0], - [ 0.0, 0.0, 0.0, 0.0, 0.0], - [ 0.0, 0.0, 0.0, 0.0, 0.0]] - kernel_expected = np.reshape(kernel_expected, kernel_shape) - # pylint: enable=bad-whitespace - - init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s) - masked_conv2d = blocks_masked_conv2d.RasterScanConv2D( - output_depth, [kernel_size] * 2, [1] * 2, 'SAME', - initializer=init_kernel) - x = tf.placeholder(dtype=tf.float32, shape=[10] * 3 + [input_depth]) - _ = masked_conv2d(x) - - with self.test_session(): - tf.global_variables_initializer().run() - kernel_value = masked_conv2d._kernel.eval() - - self.assertAllEqual(kernel_expected, kernel_value) - - def testDepthOrderKernel(self): - kernel_size = 1 - input_depth = 7 - output_depth = input_depth - kernel_shape = [kernel_size, kernel_size, input_depth, output_depth] - - kernel_feed = np.ones(kernel_shape) - x_shape = [5] * 3 + [input_depth] - x_feed = np.ones(x_shape) - y_expected = np.zeros(x_shape[0:3] + [output_depth]) - y_expected[:, :, :] = np.arange(output_depth) - - init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s) - masked_conv2d = blocks_masked_conv2d.DepthOrderConv2D( - output_depth, [kernel_size] * 2, [1] * 2, 'SAME', - strict_order=True, - initializer=init_kernel) - x = tf.placeholder(dtype=tf.float32, shape=x_shape) - y = masked_conv2d(x) - - with self.test_session(): - tf.global_variables_initializer().run() - y_value = y.eval(feed_dict={x: x_feed}) - - self.assertAllEqual(y_expected, y_value) - - def testGroupRasterScanKernel(self): - kernel_size = 3 - input_depth = 4 - input_group_size = 2 - output_depth = 2 - output_group_size = 1 - kernel_shape = [kernel_size, kernel_size, input_depth, output_depth] - kernel_feed = np.ones(shape=kernel_shape) - - height = 5 - width = 5 - x_shape = [1, height, width, input_depth] - x_feed = np.ones(shape=x_shape) - - # pylint: disable=bad-whitespace - y_expected = [ - [[ 0, 2], [ 4, 6], [ 4, 6], [ 4, 6], [ 4, 6]], - [[ 8, 10], [16, 18], [16, 18], [16, 18], [12, 14]], - [[ 8, 10], [16, 18], [16, 18], [16, 18], [12, 14]], - [[ 8, 10], [16, 18], [16, 18], [16, 18], [12, 14]], - [[ 8, 10], [16, 18], [16, 18], [16, 18], [12, 14]], - ] - y_expected = np.reshape(y_expected, [1, height, width, output_depth]) - # pylint: enable=bad-whitespace - - init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s) - masked_conv2d = blocks_masked_conv2d.GroupRasterScanConv2D( - output_depth, [kernel_size] * 2, [1] * 2, 'SAME', - strict_order=True, - input_group_size=input_group_size, - output_group_size=output_group_size, - initializer=init_kernel) - x = tf.placeholder(dtype=tf.float32, shape=x_shape) - y = masked_conv2d(x) - - with self.test_session(): - tf.global_variables_initializer().run() - y_value = y.eval(feed_dict={x: x_feed}) - - self.assertAllEqual(y_expected, y_value) - - def testInFillingKernel(self): - kernel_size = 5 - input_depth = 1 - output_depth = 1 - kernel_shape = [kernel_size, kernel_size, input_depth, output_depth] - - # pylint: disable=bad-whitespace - kernel_feed = [[ 1.0, 2.0, 3.0, 4.0, 5.0], - [ 6.0, 7.0, 8.0, 9.0, 10.0], - [11.0, 12.0, 13.0, 14.0, 15.0], - [16.0, 17.0, 18.0, 19.0, 20.0], - [21.0, 22.0, 23.0, 24.0, 25.0]] - kernel_feed = np.reshape(kernel_feed, kernel_shape) - kernel_expected = [[ 1.0, 2.0, 3.0, 4.0, 5.0], - [ 6.0, 7.0, 8.0, 9.0, 10.0], - [11.0, 12.0, 0.0, 14.0, 15.0], - [16.0, 17.0, 18.0, 19.0, 20.0], - [21.0, 22.0, 23.0, 24.0, 25.0]] - kernel_expected = np.reshape(kernel_expected, kernel_shape) - # pylint: enable=bad-whitespace - - init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s) - masked_conv2d = blocks_masked_conv2d.InFillingConv2D( - output_depth, [kernel_size] * 2, [1] * 2, 'SAME', - initializer=init_kernel) - x = tf.placeholder(dtype=tf.float32, shape=[10] * 3 + [input_depth]) - _ = masked_conv2d(x) - - with self.test_session(): - tf.global_variables_initializer().run() - kernel_value = masked_conv2d._kernel.eval() - - self.assertAllEqual(kernel_expected, kernel_value) - - def testConv2DMaskedNumerics(self): - kernel_size = 5 - input_shape = [1, 10, 10, 1] - filter_shape = [kernel_size, kernel_size, 1, 1] - strides = [1, 1, 1, 1] - output_shape = [1, 10, 10, 1] - - conv = blocks_masked_conv2d.RasterScanConv2D( - depth=filter_shape[-1], - filter_size=filter_shape[0:2], - strides=strides[1:3], - padding='SAME', - initializer=tf.constant_initializer(value=1.0)) - x = tf.placeholder(dtype=tf.float32, shape=input_shape) - y = conv(x) - - x_feed = - np.ones(input_shape, dtype=float) - y_expected = np.ones(output_shape, dtype=float) - for i in xrange(input_shape[1]): - for j in xrange(input_shape[2]): - x_feed[0, i, j, 0] = 10 * (j + 1) + i - v = 0 - ki_start = max(i - kernel_size // 2, 0) - kj_start = max(j - kernel_size // 2, 0) - kj_end = min(j + kernel_size // 2, input_shape[2] - 1) - for ki in range(ki_start, i + 1): - for kj in range(kj_start, kj_end + 1): - if ki > i: - continue - if ki == i and kj >= j: - continue - v += 10 * (kj + 1) + ki - y_expected[0, i, j, 0] = v - - with self.test_session(): - tf.global_variables_initializer().run() - y_value = y.eval(feed_dict={x: x_feed}) - - self.assertAllEqual(y_expected, y_value) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/compression/entropy_coder/lib/blocks_operator.py b/research/compression/entropy_coder/lib/blocks_operator.py deleted file mode 100644 index e35e37b27aa416ed48f91eda866d372601741cba..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_operator.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Common blocks which work as operators on other blocks.""" - -import tensorflow as tf - -import block_base - -# pylint: disable=not-callable - - -class CompositionOperator(block_base.BlockBase): - """Composition of several blocks.""" - - def __init__(self, block_list, name=None): - """Initialization of the composition operator. - - Args: - block_list: List of blocks.BlockBase that are chained to create - a new blocks.BlockBase. - name: Name of this block. - """ - super(CompositionOperator, self).__init__(name) - self._blocks = block_list - - def _Apply(self, x): - """Apply successively all the blocks on the given input tensor.""" - h = x - for layer in self._blocks: - h = layer(h) - return h - - -class LineOperator(block_base.BlockBase): - """Repeat the same block over all the lines of an input tensor.""" - - def __init__(self, block, name=None): - super(LineOperator, self).__init__(name) - self._block = block - - def _Apply(self, x): - height = x.get_shape()[1].value - if height is None: - raise ValueError('Unknown tensor height') - all_line_x = tf.split(value=x, num_or_size_splits=height, axis=1) - - y = [] - for line_x in all_line_x: - y.append(self._block(line_x)) - y = tf.concat(values=y, axis=1) - - return y - - -class TowerOperator(block_base.BlockBase): - """Parallel execution with concatenation of several blocks.""" - - def __init__(self, block_list, dim=3, name=None): - """Initialization of the parallel exec + concat (Tower). - - Args: - block_list: List of blocks.BlockBase that are chained to create - a new blocks.BlockBase. - dim: the dimension on which to concat. - name: Name of this block. - """ - super(TowerOperator, self).__init__(name) - self._blocks = block_list - self._concat_dim = dim - - def _Apply(self, x): - """Apply successively all the blocks on the given input tensor.""" - outputs = [layer(x) for layer in self._blocks] - return tf.concat(outputs, self._concat_dim) diff --git a/research/compression/entropy_coder/lib/blocks_operator_test.py b/research/compression/entropy_coder/lib/blocks_operator_test.py deleted file mode 100644 index 8b6d80da1d09102585e4725dd5c59f48d48eafcd..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_operator_test.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests of the block operators.""" - -import numpy as np -import tensorflow as tf - -import block_base -import blocks_operator - - -class AddOneBlock(block_base.BlockBase): - - def __init__(self, name=None): - super(AddOneBlock, self).__init__(name) - - def _Apply(self, x): - return x + 1.0 - - -class SquareBlock(block_base.BlockBase): - - def __init__(self, name=None): - super(SquareBlock, self).__init__(name) - - def _Apply(self, x): - return x * x - - -class BlocksOperatorTest(tf.test.TestCase): - - def testComposition(self): - x_value = np.array([[1.0, 2.0, 3.0], - [-1.0, -2.0, -3.0]]) - y_expected_value = np.array([[4.0, 9.0, 16.0], - [0.0, 1.0, 4.0]]) - - x = tf.placeholder(dtype=tf.float32, shape=[2, 3]) - complex_block = blocks_operator.CompositionOperator( - [AddOneBlock(), - SquareBlock()]) - y = complex_block(x) - - with self.test_session(): - y_value = y.eval(feed_dict={x: x_value}) - - self.assertAllClose(y_expected_value, y_value) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/compression/entropy_coder/lib/blocks_std.py b/research/compression/entropy_coder/lib/blocks_std.py deleted file mode 100644 index 2c617485342452f500d4b1b0b18e33b07d51e487..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_std.py +++ /dev/null @@ -1,363 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Basic blocks for building tensorflow models.""" - -import numpy as np -import tensorflow as tf - -import block_base -import block_util - -# pylint does not recognize block_base.BlockBase.__call__(). -# pylint: disable=not-callable - - -def HandleConvPaddingModes(x, padding, kernel_shape, strides): - """Returns an updated tensor and padding type for REFLECT and SYMMETRIC. - - Args: - x: A 4D tensor with shape [batch_size, height, width, depth]. - padding: Padding mode (SAME, VALID, REFLECT, or SYMMETRIC). - kernel_shape: Shape of convolution kernel that will be applied. - strides: Convolution stride that will be used. - - Returns: - x and padding after adjustments for REFLECT and SYMMETRIC. - """ - # For 1x1 convolution, all padding modes are the same. - if np.all(kernel_shape[:2] == 1): - return x, 'VALID' - - if padding == 'REFLECT' or padding == 'SYMMETRIC': - # We manually compute the number of paddings as if 'SAME'. - # From Tensorflow kernel, the formulas are as follows. - # output_shape = ceil(input_shape / strides) - # paddings = (output_shape - 1) * strides + filter_size - input_shape - # Let x, y, s be a shorthand notations for input_shape, output_shape, and - # strides, respectively. Let (x - 1) = sn + r where 0 <= r < s. Note that - # y - 1 = ceil(x / s) - 1 = floor((x - 1) / s) = n - # provided that x > 0. Therefore - # paddings = n * s + filter_size - (sn + r + 1) - # = filter_size - r - 1. - input_shape = x.get_shape() # shape at graph construction time - img_shape = tf.shape(x)[1:3] # image shape (no batch) at run time - remainder = tf.mod(img_shape - 1, strides[1:3]) - pad_sizes = kernel_shape[:2] - remainder - 1 - - pad_rows = pad_sizes[0] - pad_cols = pad_sizes[1] - pad = tf.stack([[0, 0], tf.stack([pad_rows // 2, (pad_rows + 1) // 2]), - tf.stack([pad_cols // 2, (pad_cols + 1) // 2]), [0, 0]]) - - # Manually pad the input and switch the padding mode to 'VALID'. - x = tf.pad(x, pad, mode=padding) - x.set_shape([input_shape[0], x.get_shape()[1], - x.get_shape()[2], input_shape[3]]) - padding = 'VALID' - - return x, padding - - -class PassThrough(block_base.BlockBase): - """A dummy transform block that does nothing.""" - - def __init__(self): - # Pass an empty string to disable name scoping. - super(PassThrough, self).__init__(name='') - - def _Apply(self, inp): - return inp - - @property - def initialized(self): - """Always returns True.""" - return True - - -class Bias(object): - """An initialization helper class for BiasAdd block below.""" - - def __init__(self, value=0): - self.value = value - - -class BiasAdd(block_base.BlockBase): - """A tf.nn.bias_add wrapper. - - This wrapper may act as a PassThrough block depending on the initializer - provided, to make easier optional bias applications in NN blocks, etc. - See __init__() for the details. - """ - - def __init__(self, initializer=Bias(0), name=None): - """Initializes Bias block. - - |initializer| parameter have two special cases. - - 1. If initializer is None, then this block works as a PassThrough. - 2. If initializer is a Bias class object, then tf.constant_initializer is - used with the stored value. - - Args: - initializer: An initializer for the bias variable. - name: Name of this block. - """ - super(BiasAdd, self).__init__(name) - - with self._BlockScope(): - if isinstance(initializer, Bias): - self._initializer = tf.constant_initializer(value=initializer.value) - else: - self._initializer = initializer - - self._bias = None - - def _Apply(self, x): - if not self._bias: - init = self._initializer([int(x.get_shape()[-1])], x.dtype) - self._bias = self.NewVar(init) - - return tf.nn.bias_add(x, self._bias) - - def CreateWeightLoss(self): - return [] - - -class LinearBase(block_base.BlockBase): - """A matmul wrapper. - - Returns input * W, where matrix W can be customized through derivation. - """ - - def __init__(self, depth, name=None): - super(LinearBase, self).__init__(name) - - with self._BlockScope(): - self._depth = depth - self._matrix = None - - def _CreateKernel(self, shape, dtype): - raise NotImplementedError('This method must be sub-classed.') - - def _Apply(self, x): - if not self._matrix: - shape = [int(x.get_shape()[-1]), self._depth] - self._matrix = self._CreateKernel(shape, x.dtype) - - return tf.matmul(x, self._matrix) - - -class Linear(LinearBase): - """A matmul wrapper. - - Returns input * W, where matrix W is learned. - """ - - def __init__(self, - depth, - initializer=block_util.RsqrtInitializer(), - name=None): - super(Linear, self).__init__(depth, name) - - with self._BlockScope(): - self._initializer = initializer - - def _CreateKernel(self, shape, dtype): - init = self._initializer(shape, dtype) - return self.NewVar(init) - - -class NN(block_base.BlockBase): - """A neural network layer wrapper. - - Returns act(input * W + b), where matrix W, bias b are learned, and act is an - optional activation function (i.e., nonlinearity). - - This transform block can handle multiple inputs. If x_1, x_2, ..., x_m are - the inputs, then returns act(x_1 * W_1 + ... + x_m * W_m + b). - - Attributes: - nunits: The dimension of the output. - """ - - def __init__(self, - depth, - bias=Bias(0), - act=None, # e.g., tf.nn.relu - initializer=block_util.RsqrtInitializer(), - linear_block_factory=(lambda d, i: Linear(d, initializer=i)), - name=None): - """Initializes NN block. - - Args: - depth: The depth of the output. - bias: An initializer for the bias, or a Bias class object. If None, there - will be no bias term for this NN block. See BiasAdd block. - act: Optional activation function. If None, no activation is applied. - initializer: The initialization method for the matrix weights. - linear_block_factory: A function used to create a linear block. - name: The name of this block. - """ - super(NN, self).__init__(name) - - with self._BlockScope(): - self._linear_block_factory = linear_block_factory - self._depth = depth - self._initializer = initializer - self._matrices = None - - self._bias = BiasAdd(bias) if bias else PassThrough() - self._act = act if act else PassThrough() - - def _Apply(self, *args): - if not self._matrices: - self._matrices = [ - self._linear_block_factory(self._depth, self._initializer) - for _ in args] - - if len(self._matrices) != len(args): - raise ValueError('{} expected {} inputs, but observed {} inputs'.format( - self.name, len(self._matrices), len(args))) - - if len(args) > 1: - y = tf.add_n([m(x) for m, x in zip(self._matrices, args)]) - else: - y = self._matrices[0](args[0]) - - return self._act(self._bias(y)) - - -class Conv2DBase(block_base.BlockBase): - """A tf.nn.conv2d operator.""" - - def __init__(self, depth, filter_size, strides, padding, - bias=None, act=None, atrous_rate=None, conv=tf.nn.conv2d, - name=None): - """Initializes a Conv2DBase block. - - Arguments: - depth: The output depth of the block (i.e. #filters); if negative, the - output depth will be set to be the same as the input depth. - filter_size: The size of the 2D filter. If it's specified as an integer, - it's going to create a square filter. Otherwise, this is a tuple - specifying the height x width of the filter. - strides: A tuple specifying the y and x stride. - padding: One of the valid padding modes allowed by tf.nn.conv2d, or - 'REFLECT'/'SYMMETRIC' for mirror padding. - bias: An initializer for the bias, or a Bias class object. If None, there - will be no bias in this block. See BiasAdd block. - act: Optional activation function applied to the output. - atrous_rate: optional input rate for ATrous convolution. If not None, this - will be used and the strides will be ignored. - conv: The convolution function to use (e.g. tf.nn.conv2d). - name: The name for this conv2d op. - """ - super(Conv2DBase, self).__init__(name) - - with self._BlockScope(): - self._act = act if act else PassThrough() - self._bias = BiasAdd(bias) if bias else PassThrough() - - self._kernel_shape = np.zeros((4,), dtype=np.int32) - self._kernel_shape[:2] = filter_size - self._kernel_shape[3] = depth - - self._strides = np.ones((4,), dtype=np.int32) - self._strides[1:3] = strides - self._strides = list(self._strides) - - self._padding = padding - - self._kernel = None - self._conv = conv - - self._atrous_rate = atrous_rate - - def _CreateKernel(self, shape, dtype): - raise NotImplementedError('This method must be sub-classed') - - def _Apply(self, x): - """Apply the self._conv op. - - Arguments: - x: input tensor. It needs to be a 4D tensor of the form - [batch, height, width, channels]. - Returns: - The output of the convolution of x with the current convolutional - kernel. - Raises: - ValueError: if number of channels is not defined at graph construction. - """ - input_shape = x.get_shape().with_rank(4) - input_shape[3:].assert_is_fully_defined() # channels must be defined - if self._kernel is None: - assert self._kernel_shape[2] == 0, self._kernel_shape - self._kernel_shape[2] = input_shape[3].value - if self._kernel_shape[3] < 0: - # Make output depth be the same as input depth. - self._kernel_shape[3] = self._kernel_shape[2] - self._kernel = self._CreateKernel(self._kernel_shape, x.dtype) - - x, padding = HandleConvPaddingModes( - x, self._padding, self._kernel_shape, self._strides) - if self._atrous_rate is None: - x = self._conv(x, self._kernel, strides=self._strides, padding=padding) - else: - x = self._conv(x, self._kernel, rate=self._atrous_rate, padding=padding) - - if self._padding != 'VALID': - # Manually update shape. Known shape information can be lost by tf.pad(). - height = (1 + (input_shape[1].value - 1) // self._strides[1] - if input_shape[1].value else None) - width = (1 + (input_shape[2].value - 1) // self._strides[2] - if input_shape[2].value else None) - shape = x.get_shape() - x.set_shape([shape[0], height, width, shape[3]]) - - return self._act(self._bias(x)) - - -class Conv2D(Conv2DBase): - """A tf.nn.conv2d operator.""" - - def __init__(self, depth, filter_size, strides, padding, - bias=None, act=None, initializer=None, name=None): - """Initializes a Conv2D block. - - Arguments: - depth: The output depth of the block (i.e., #filters) - filter_size: The size of the 2D filter. If it's specified as an integer, - it's going to create a square filter. Otherwise, this is a tuple - specifying the height x width of the filter. - strides: A tuple specifying the y and x stride. - padding: One of the valid padding modes allowed by tf.nn.conv2d, or - 'REFLECT'/'SYMMETRIC' for mirror padding. - bias: An initializer for the bias, or a Bias class object. If None, there - will be no bias in this block. See BiasAdd block. - act: Optional activation function applied to the output. - initializer: Optional initializer for weights. - name: The name for this conv2d op. - """ - super(Conv2D, self).__init__(depth, filter_size, strides, padding, bias, - act, conv=tf.nn.conv2d, name=name) - - with self._BlockScope(): - if initializer is None: - initializer = block_util.RsqrtInitializer(dims=(0, 1, 2)) - self._initializer = initializer - - def _CreateKernel(self, shape, dtype): - return self.NewVar(self._initializer(shape, dtype)) diff --git a/research/compression/entropy_coder/lib/blocks_std_test.py b/research/compression/entropy_coder/lib/blocks_std_test.py deleted file mode 100644 index 328ebc9d2173436b2108b343b98650128a4613e3..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/lib/blocks_std_test.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for basic tensorflow blocks_std.""" - -from __future__ import division -from __future__ import unicode_literals - -import math -import os - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import blocks_std - - -def _NumpyConv2D(x, f, strides, padding, rate=1): - assert strides[0] == 1 and strides[3] == 1, strides - - if rate > 1: - f_shape = f.shape - expand_f = np.zeros([f_shape[0], ((f_shape[1] - 1) * rate + 1), - f_shape[2], f_shape[3]]) - expand_f[:, [y * rate for y in range(f_shape[1])], :, :] = f - f = np.zeros([((f_shape[0] - 1) * rate + 1), expand_f.shape[1], - f_shape[2], f_shape[3]]) - f[[y * rate for y in range(f_shape[0])], :, :, :] = expand_f - - if padding != 'VALID': - assert x.shape[1] > 0 and x.shape[2] > 0, x.shape - # Compute the number of padded rows and cols. - # See Conv2D block comments for a math explanation. - remainder = ((x.shape[1] - 1) % strides[1], (x.shape[2] - 1) % strides[2]) - pad_rows = f.shape[0] - remainder[0] - 1 - pad_cols = f.shape[1] - remainder[1] - 1 - pad = ((0, 0), - (pad_rows // 2, (pad_rows + 1) // 2), - (pad_cols // 2, (pad_cols + 1) // 2), - (0, 0)) - - # Pad the input using numpy.pad(). - mode = None - if padding == 'SAME': - mode = str('constant') - if padding == 'REFLECT': - mode = str('reflect') - if padding == 'SYMMETRIC': - mode = str('symmetric') - x = np.pad(x, pad, mode=mode) - - # Since x is now properly padded, proceed as if padding mode is VALID. - x_window = np.empty( - (x.shape[0], - int(math.ceil((x.shape[1] - f.shape[0] + 1) / strides[1])), - int(math.ceil((x.shape[2] - f.shape[1] + 1) / strides[2])), - np.prod(f.shape[:3]))) - - # The output at pixel location (i, j) is the result of linear transformation - # applied to the window whose top-left corner is at - # (i * row_stride, j * col_stride). - for i in xrange(x_window.shape[1]): - k = i * strides[1] - for j in xrange(x_window.shape[2]): - l = j * strides[2] - x_window[:, i, j, :] = x[:, - k:(k + f.shape[0]), - l:(l + f.shape[1]), - :].reshape((x_window.shape[0], -1)) - - y = np.tensordot(x_window, f.reshape((-1, f.shape[3])), axes=1) - return y - - -class BlocksStdTest(tf.test.TestCase): - - def CheckUnary(self, y, op_type): - self.assertEqual(op_type, y.op.type) - self.assertEqual(1, len(y.op.inputs)) - return y.op.inputs[0] - - def CheckBinary(self, y, op_type): - self.assertEqual(op_type, y.op.type) - self.assertEqual(2, len(y.op.inputs)) - return y.op.inputs - - def testPassThrough(self): - p = blocks_std.PassThrough() - x = tf.placeholder(dtype=tf.float32, shape=[1]) - self.assertIs(p(x), x) - - def CheckBiasAdd(self, y, b): - x, u = self.CheckBinary(y, 'BiasAdd') - self.assertIs(u, b._bias.value()) - self.assertEqual(x.dtype, u.dtype.base_dtype) - return x - - def testBiasAdd(self): - b = blocks_std.BiasAdd() - x = tf.placeholder(dtype=tf.float32, shape=[4, 8]) - y = b(x) - self.assertEqual(b._bias.get_shape(), x.get_shape()[-1:]) - self.assertIs(x, self.CheckBiasAdd(y, b)) - - def testBiasRankTest(self): - b = blocks_std.BiasAdd() - x = tf.placeholder(dtype=tf.float32, shape=[10]) - with self.assertRaises(ValueError): - b(x) - - def CheckLinear(self, y, m): - x, w = self.CheckBinary(y, 'MatMul') - self.assertIs(w, m._matrix.value()) - self.assertEqual(x.dtype, w.dtype.base_dtype) - return x - - def testLinear(self): - m = blocks_std.Linear(10) - x = tf.placeholder(dtype=tf.float32, shape=[8, 9]) - y = m(x) - self.assertEqual(m._matrix.get_shape(), [9, 10]) - self.assertIs(x, self.CheckLinear(y, m)) - - def testLinearShared(self): - # Create a linear map which is applied twice on different inputs - # (i.e. the weights of the map are shared). - linear_map = blocks_std.Linear(6) - x1 = tf.random_normal(shape=[1, 5]) - x2 = tf.random_normal(shape=[1, 5]) - xs = x1 + x2 - - # Apply the transform with the same weights. - y1 = linear_map(x1) - y2 = linear_map(x2) - ys = linear_map(xs) - - with self.test_session() as sess: - # Initialize all the variables of the graph. - tf.global_variables_initializer().run() - - y1_res, y2_res, ys_res = sess.run([y1, y2, ys]) - self.assertAllClose(y1_res + y2_res, ys_res) - - def CheckNN(self, y, nn, act=None): - if act: - pre_act = self.CheckUnary(y, act) - else: - pre_act = y - - if not isinstance(nn._bias, blocks_std.PassThrough): - pre_bias = self.CheckBiasAdd(pre_act, nn._bias) - else: - pre_bias = pre_act - - if len(nn._matrices) > 1: - self.assertEqual('AddN', pre_bias.op.type) - pre_bias = pre_bias.op.inputs - else: - pre_bias = [pre_bias] - - self.assertEqual(len(pre_bias), len(nn._matrices)) - return [self.CheckLinear(u, m) for u, m in zip(pre_bias, nn._matrices)] - - def testNNWithoutActWithoutBias(self): - nn = blocks_std.NN(10, act=None, bias=None) - x = tf.placeholder(dtype=tf.float32, shape=[5, 7]) - y = nn(x) - self.assertIs(x, self.CheckNN(y, nn)[0]) - - def testNNWithoutBiasWithAct(self): - nn = blocks_std.NN(10, act=tf.nn.relu, bias=None) - x = tf.placeholder(dtype=tf.float32, shape=[5, 7]) - y = nn(x) - self.assertIs(x, self.CheckNN(y, nn, 'Relu')[0]) - - def testNNWithBiasWithoutAct(self): - nn = blocks_std.NN(10, bias=blocks_std.Bias(0), act=None) - x = tf.placeholder(dtype=tf.float32, shape=[5, 7]) - y = nn(x) - self.assertIs(x, self.CheckNN(y, nn)[0]) - - def testNNWithBiasWithAct(self): - nn = blocks_std.NN(10, bias=blocks_std.Bias(0), act=tf.square) - x = tf.placeholder(dtype=tf.float32, shape=[5, 7]) - y = nn(x) - self.assertIs(x, self.CheckNN(y, nn, 'Square')[0]) - - def testNNMultipleInputs(self): - nn = blocks_std.NN(10, bias=blocks_std.Bias(0), act=tf.tanh) - x = [tf.placeholder(dtype=tf.float32, shape=[5, 7]), - tf.placeholder(dtype=tf.float32, shape=[5, 3]), - tf.placeholder(dtype=tf.float32, shape=[5, 5])] - y = nn(*x) - xs = self.CheckNN(y, nn, 'Tanh') - self.assertEqual(len(x), len(xs)) - for u, v in zip(x, xs): - self.assertIs(u, v) - - def testConv2DSAME(self): - np.random.seed(142536) - - x_shape = [4, 16, 11, 5] - f_shape = [4, 3, 5, 6] - strides = [1, 2, 2, 1] - padding = 'SAME' - - conv = blocks_std.Conv2D(depth=f_shape[-1], - filter_size=f_shape[0:2], - strides=strides[1:3], - padding=padding, - act=None, - bias=None) - x_value = np.random.normal(size=x_shape) - x = tf.convert_to_tensor(x_value, dtype=tf.float32) - y = conv(x) - - with self.test_session(): - tf.global_variables_initializer().run() - f_value = conv._kernel.eval() - y_value = y.eval() - - y_expected = _NumpyConv2D(x_value, f_value, - strides=strides, padding=padding) - self.assertAllClose(y_expected, y_value) - - def testConv2DValid(self): - np.random.seed(253647) - - x_shape = [4, 11, 12, 5] - f_shape = [5, 2, 5, 5] - strides = [1, 2, 2, 1] - padding = 'VALID' - - conv = blocks_std.Conv2D(depth=f_shape[-1], - filter_size=f_shape[0:2], - strides=strides[1:3], - padding=padding, - act=None, - bias=None) - x_value = np.random.normal(size=x_shape) - x = tf.convert_to_tensor(x_value, dtype=tf.float32) - y = conv(x) - - with self.test_session(): - tf.global_variables_initializer().run() - f_value = conv._kernel.eval() - y_value = y.eval() - - y_expected = _NumpyConv2D(x_value, f_value, - strides=strides, padding=padding) - self.assertAllClose(y_expected, y_value) - - def testConv2DSymmetric(self): - np.random.seed(364758) - - x_shape = [4, 10, 12, 6] - f_shape = [3, 4, 6, 5] - strides = [1, 1, 1, 1] - padding = 'SYMMETRIC' - - conv = blocks_std.Conv2D(depth=f_shape[-1], - filter_size=f_shape[0:2], - strides=strides[1:3], - padding=padding, - act=None, - bias=None) - x_value = np.random.normal(size=x_shape) - x = tf.convert_to_tensor(x_value, dtype=tf.float32) - y = conv(x) - - with self.test_session(): - tf.global_variables_initializer().run() - f_value = conv._kernel.eval() - y_value = y.eval() - - y_expected = _NumpyConv2D(x_value, f_value, - strides=strides, padding=padding) - self.assertAllClose(y_expected, y_value) - - def testConv2DReflect(self): - np.random.seed(768798) - - x_shape = [4, 10, 12, 6] - f_shape = [3, 4, 6, 5] - strides = [1, 2, 2, 1] - padding = 'REFLECT' - - conv = blocks_std.Conv2D(depth=f_shape[-1], - filter_size=f_shape[0:2], - strides=strides[1:3], - padding=padding, - act=None, - bias=None) - x_value = np.random.normal(size=x_shape) - x = tf.convert_to_tensor(x_value, dtype=tf.float32) - y = conv(x) - - with self.test_session(): - tf.global_variables_initializer().run() - f_value = conv._kernel.eval() - y_value = y.eval() - - y_expected = _NumpyConv2D(x_value, f_value, - strides=strides, padding=padding) - self.assertAllClose(y_expected, y_value) - - def testConv2DBias(self): - input_shape = [19, 14, 14, 64] - filter_shape = [3, 7, 64, 128] - strides = [1, 2, 2, 1] - output_shape = [19, 6, 4, 128] - - conv = blocks_std.Conv2D(depth=filter_shape[-1], - filter_size=filter_shape[0:2], - strides=strides[1:3], - padding='VALID', - act=None, - bias=blocks_std.Bias(1)) - x = tf.placeholder(dtype=tf.float32, shape=input_shape) - - y = conv(x) - self.CheckBiasAdd(y, conv._bias) - self.assertEqual(output_shape, y.get_shape().as_list()) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/compression/entropy_coder/model/__init__.py b/research/compression/entropy_coder/model/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/compression/entropy_coder/model/entropy_coder_model.py b/research/compression/entropy_coder/model/entropy_coder_model.py deleted file mode 100644 index 67f7eb5bc05f3df7363529c19fa77d176caaabc1..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/model/entropy_coder_model.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Entropy coder model.""" - - -class EntropyCoderModel(object): - """Entropy coder model.""" - - def __init__(self): - # Loss used for training the model. - self.loss = None - - # Tensorflow op to run to train the model. - self.train_op = None - - # Tensor corresponding to the average code length of the input bit field - # tensor. The average code length is a number of output bits per input bit. - # To get an effective compression, this number should be between 0.0 - # and 1.0 (1.0 corresponds to no compression). - self.average_code_length = None - - def Initialize(self, global_step, optimizer, config_string): - raise NotImplementedError() - - def BuildGraph(self, input_codes): - """Build the Tensorflow graph corresponding to the entropy coder model. - - Args: - input_codes: Tensor of size: batch_size x height x width x bit_depth - corresponding to the codes to compress. - The input codes are {-1, +1} codes. - """ - # TODO: - # - consider switching to {0, 1} codes. - # - consider passing an extra tensor which gives for each (b, y, x) - # what is the actual depth (which would allow to use more or less bits - # for each (y, x) location. - raise NotImplementedError() - - def GetConfigStringForUnitTest(self): - """Returns a default model configuration to be used for unit tests.""" - return None diff --git a/research/compression/entropy_coder/model/model_factory.py b/research/compression/entropy_coder/model/model_factory.py deleted file mode 100644 index e6f9902f3bb720e76f228f2774a9eaf7774ef191..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/model/model_factory.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Entropy coder model registrar.""" - - -class ModelFactory(object): - """Factory of encoder/decoder models.""" - - def __init__(self): - self._model_dictionary = dict() - - def RegisterModel(self, - entropy_coder_model_name, - entropy_coder_model_factory): - self._model_dictionary[entropy_coder_model_name] = ( - entropy_coder_model_factory) - - def CreateModel(self, model_name): - current_model_factory = self._model_dictionary[model_name] - return current_model_factory() - - def GetAvailableModels(self): - return self._model_dictionary.keys() - - -_model_registry = ModelFactory() - - -def GetModelRegistry(): - return _model_registry - - -class RegisterEntropyCoderModel(object): - - def __init__(self, model_name): - self._model_name = model_name - - def __call__(self, f): - _model_registry.RegisterModel(self._model_name, f) - return f diff --git a/research/compression/entropy_coder/progressive/__init__.py b/research/compression/entropy_coder/progressive/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/compression/entropy_coder/progressive/progressive.py b/research/compression/entropy_coder/progressive/progressive.py deleted file mode 100644 index 7b03a07db055b62aa1c0f9cc89ddd2472899db3c..0000000000000000000000000000000000000000 --- a/research/compression/entropy_coder/progressive/progressive.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Code probability model used for entropy coding.""" - -import json - -from six.moves import xrange -import tensorflow as tf - -from entropy_coder.lib import blocks -from entropy_coder.model import entropy_coder_model -from entropy_coder.model import model_factory - -# pylint: disable=not-callable - - -class BrnnPredictor(blocks.BlockBase): - """BRNN prediction applied on one layer.""" - - def __init__(self, code_depth, name=None): - super(BrnnPredictor, self).__init__(name) - - with self._BlockScope(): - hidden_depth = 2 * code_depth - - # What is coming from the previous layer/iteration - # is going through a regular Conv2D layer as opposed to the binary codes - # of the current layer/iteration which are going through a masked - # convolution. - self._adaptation0 = blocks.RasterScanConv2D( - hidden_depth, [7, 7], [1, 1], 'SAME', - strict_order=True, - bias=blocks.Bias(0), act=tf.tanh) - self._adaptation1 = blocks.Conv2D( - hidden_depth, [3, 3], [1, 1], 'SAME', - bias=blocks.Bias(0), act=tf.tanh) - self._predictor = blocks.CompositionOperator([ - blocks.LineOperator( - blocks.RasterScanConv2DLSTM( - depth=hidden_depth, - filter_size=[1, 3], - hidden_filter_size=[1, 3], - strides=[1, 1], - padding='SAME')), - blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME', - bias=blocks.Bias(0), act=tf.tanh), - blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME', - bias=blocks.Bias(0), act=tf.tanh) - ]) - - def _Apply(self, x, s): - # Code estimation using both: - # - the state from the previous iteration/layer, - # - the binary codes that are before in raster scan order. - h = tf.concat(values=[self._adaptation0(x), self._adaptation1(s)], axis=3) - - estimated_codes = self._predictor(h) - - return estimated_codes - - -class LayerPrediction(blocks.BlockBase): - """Binary code prediction for one layer.""" - - def __init__(self, layer_count, code_depth, name=None): - super(LayerPrediction, self).__init__(name) - - self._layer_count = layer_count - - # No previous layer. - self._layer_state = None - self._current_layer = 0 - - with self._BlockScope(): - # Layers used to do the conditional code prediction. - self._brnn_predictors = [] - for _ in xrange(layer_count): - self._brnn_predictors.append(BrnnPredictor(code_depth)) - - # Layers used to generate the input of the LSTM operating on the - # iteration/depth domain. - hidden_depth = 2 * code_depth - self._state_blocks = [] - for _ in xrange(layer_count): - self._state_blocks.append(blocks.CompositionOperator([ - blocks.Conv2D( - hidden_depth, [3, 3], [1, 1], 'SAME', - bias=blocks.Bias(0), act=tf.tanh), - blocks.Conv2D( - code_depth, [3, 3], [1, 1], 'SAME', - bias=blocks.Bias(0), act=tf.tanh) - ])) - - # Memory of the RNN is equivalent to the size of 2 layers of binary - # codes. - hidden_depth = 2 * code_depth - self._layer_rnn = blocks.CompositionOperator([ - blocks.Conv2DLSTM( - depth=hidden_depth, - filter_size=[1, 1], - hidden_filter_size=[1, 1], - strides=[1, 1], - padding='SAME'), - blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME', - bias=blocks.Bias(0), act=tf.tanh), - blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME', - bias=blocks.Bias(0), act=tf.tanh) - ]) - - def _Apply(self, x): - assert self._current_layer < self._layer_count - - # Layer state is set to 0 when there is no previous iteration. - if self._layer_state is None: - self._layer_state = tf.zeros_like(x, dtype=tf.float32) - - # Code estimation using both: - # - the state from the previous iteration/layer, - # - the binary codes that are before in raster scan order. - estimated_codes = self._brnn_predictors[self._current_layer]( - x, self._layer_state) - - # Compute the updated layer state. - h = self._state_blocks[self._current_layer](x) - self._layer_state = self._layer_rnn(h) - self._current_layer += 1 - - return estimated_codes - - -class ProgressiveModel(entropy_coder_model.EntropyCoderModel): - """Progressive BRNN entropy coder model.""" - - def __init__(self): - super(ProgressiveModel, self).__init__() - - def Initialize(self, global_step, optimizer, config_string): - if config_string is None: - raise ValueError('The progressive model requires a configuration.') - config = json.loads(config_string) - if 'coded_layer_count' not in config: - config['coded_layer_count'] = 0 - - self._config = config - self._optimizer = optimizer - self._global_step = global_step - - def BuildGraph(self, input_codes): - """Build the graph corresponding to the progressive BRNN model.""" - layer_depth = self._config['layer_depth'] - layer_count = self._config['layer_count'] - - code_shape = input_codes.get_shape() - code_depth = code_shape[-1].value - if self._config['coded_layer_count'] > 0: - prefix_depth = self._config['coded_layer_count'] * layer_depth - if code_depth < prefix_depth: - raise ValueError('Invalid prefix depth: {} VS {}'.format( - prefix_depth, code_depth)) - input_codes = input_codes[:, :, :, :prefix_depth] - - code_shape = input_codes.get_shape() - code_depth = code_shape[-1].value - if code_depth % layer_depth != 0: - raise ValueError( - 'Code depth must be a multiple of the layer depth: {} vs {}'.format( - code_depth, layer_depth)) - code_layer_count = code_depth // layer_depth - if code_layer_count > layer_count: - raise ValueError('Input codes have too many layers: {}, max={}'.format( - code_layer_count, layer_count)) - - # Block used to estimate binary codes. - layer_prediction = LayerPrediction(layer_count, layer_depth) - - # Block used to compute code lengths. - code_length_block = blocks.CodeLength() - - # Loop over all the layers. - code_length = [] - code_layers = tf.split( - value=input_codes, num_or_size_splits=code_layer_count, axis=3) - for k in xrange(code_layer_count): - x = code_layers[k] - predicted_x = layer_prediction(x) - # Saturate the prediction to avoid infinite code length. - epsilon = 0.001 - predicted_x = tf.clip_by_value( - predicted_x, -1 + epsilon, +1 - epsilon) - code_length.append(code_length_block( - blocks.ConvertSignCodeToZeroOneCode(x), - blocks.ConvertSignCodeToZeroOneCode(predicted_x))) - tf.summary.scalar('code_length_layer_{:02d}'.format(k), code_length[-1]) - code_length = tf.stack(code_length) - self.loss = tf.reduce_mean(code_length) - tf.summary.scalar('loss', self.loss) - - # Loop over all the remaining layers just to make sure they are - # instantiated. Otherwise, loading model params could fail. - dummy_x = tf.zeros_like(code_layers[0]) - for _ in xrange(layer_count - code_layer_count): - dummy_predicted_x = layer_prediction(dummy_x) - - # Average bitrate over total_line_count. - self.average_code_length = tf.reduce_mean(code_length) - - if self._optimizer: - optim_op = self._optimizer.minimize(self.loss, - global_step=self._global_step) - block_updates = blocks.CreateBlockUpdates() - if block_updates: - with tf.get_default_graph().control_dependencies([optim_op]): - self.train_op = tf.group(*block_updates) - else: - self.train_op = optim_op - else: - self.train_op = None - - def GetConfigStringForUnitTest(self): - s = '{\n' - s += '"layer_depth": 1,\n' - s += '"layer_count": 8\n' - s += '}\n' - return s - - -@model_factory.RegisterEntropyCoderModel('progressive') -def CreateProgressiveModel(): - return ProgressiveModel() diff --git a/research/compression/image_encoder/README.md b/research/compression/image_encoder/README.md deleted file mode 100644 index a47da977aa4db4be26528c5ebfe030024f31291b..0000000000000000000000000000000000000000 --- a/research/compression/image_encoder/README.md +++ /dev/null @@ -1,105 +0,0 @@ -# Image Compression with Neural Networks - -This is a [TensorFlow](http://www.tensorflow.org/) model for compressing and -decompressing images using an already trained Residual GRU model as descibed -in [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148). Please consult the paper for more details -on the architecture and compression results. - -This code will allow you to perform the lossy compression on an model -already trained on compression. This code doesn't not currently contain the -Entropy Coding portions of our paper. - - -## Prerequisites -The only software requirements for running the encoder and decoder is having -Tensorflow installed. You will also need to [download](http://download.tensorflow.org/models/compression_residual_gru-2016-08-23.tar.gz) -and extract the model residual_gru.pb. - -If you want to generate the perceptual similarity under MS-SSIM, you will also -need to [Install SciPy](https://www.scipy.org/install.html). - -## Encoding -The Residual GRU network is fully convolutional, but requires the images -height and width in pixels by a multiple of 32. There is an image in this folder -called example.png that is 768x1024 if one is needed for testing. We also -rely on TensorFlow's built in decoding ops, which support only PNG and JPEG at -time of release. - -To encode an image, simply run the following command: - -`python encoder.py --input_image=/your/image/here.png ---output_codes=output_codes.npz --iteration=15 ---model=/path/to/model/residual_gru.pb -` - -The iteration parameter specifies the lossy-quality to target for compression. -The quality can be [0-15], where 0 corresponds to a target of 1/8 (bits per -pixel) bpp and every increment results in an additional 1/8 bpp. - -| Iteration | BPP | Compression Ratio | -|---: |---: |---: | -|0 | 0.125 | 192:1| -|1 | 0.250 | 96:1| -|2 | 0.375 | 64:1| -|3 | 0.500 | 48:1| -|4 | 0.625 | 38.4:1| -|5 | 0.750 | 32:1| -|6 | 0.875 | 27.4:1| -|7 | 1.000 | 24:1| -|8 | 1.125 | 21.3:1| -|9 | 1.250 | 19.2:1| -|10 | 1.375 | 17.4:1| -|11 | 1.500 | 16:1| -|12 | 1.625 | 14.7:1| -|13 | 1.750 | 13.7:1| -|14 | 1.875 | 12.8:1| -|15 | 2.000 | 12:1| - -The output_codes file contains the numpy shape and a flattened, bit-packed -array of the codes. These can be inspected in python by using numpy.load(). - - -## Decoding -After generating codes for an image, the lossy reconstructions for that image -can be done as follows: - -`python decoder.py --input_codes=codes.npz --output_directory=/tmp/decoded/ ---model=residual_gru.pb` - -The output_directory will contain images decoded at each quality level. - - -## Comparing Similarity -One of our primary metrics for comparing how similar two images are -is MS-SSIM. - -To generate these metrics on your images you can run: -`python msssim.py --original_image=/path/to/your/image.png ---compared_image=/tmp/decoded/image_15.png` - - -## Results -CSV results containing the post-entropy bitrates and MS-SSIM over Kodak can -are available for reference. Each row of the CSV represents each of the Kodak -images in their dataset number (1-24). Each column of the CSV represents each -iteration of the model (1-16). - -[Post Entropy Bitrates](https://storage.googleapis.com/compression-ml/residual_gru_results/bitrate.csv) - -[MS-SSIM](https://storage.googleapis.com/compression-ml/residual_gru_results/msssim.csv) - - -## FAQ - -#### How do I train my own compression network? -We currently don't provide the code to build and train a compression -graph from scratch. - -#### I get an InvalidArgumentError: Incompatible shapes. -This is usually due to the fact that our network only supports images that are -both height and width divisible by 32 pixel. Try padding your images to 32 -pixel boundaries. - - -## Contact Info -Model repository maintained by Nick Johnston ([nmjohn](https://github.com/nmjohn)). diff --git a/research/compression/image_encoder/decoder.py b/research/compression/image_encoder/decoder.py deleted file mode 100644 index 75bc18cad0fdd4055df7b42d5440635365504774..0000000000000000000000000000000000000000 --- a/research/compression/image_encoder/decoder.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/python -# -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Neural Network Image Compression Decoder. - -Decompress an image from the numpy's npz format generated by the encoder. - -Example usage: -python decoder.py --input_codes=output_codes.pkl --iteration=15 \ ---output_directory=/tmp/compression_output/ --model=residual_gru.pb -""" -import io -import os - -import numpy as np -import tensorflow as tf - -tf.flags.DEFINE_string('input_codes', None, 'Location of binary code file.') -tf.flags.DEFINE_integer('iteration', -1, 'The max quality level of ' - 'the images to output. Use -1 to infer from loaded ' - ' codes.') -tf.flags.DEFINE_string('output_directory', None, 'Directory to save decoded ' - 'images.') -tf.flags.DEFINE_string('model', None, 'Location of compression model.') - -FLAGS = tf.flags.FLAGS - - -def get_input_tensor_names(): - name_list = ['GruBinarizer/SignBinarizer/Sign:0'] - for i in range(1, 16): - name_list.append('GruBinarizer/SignBinarizer/Sign_{}:0'.format(i)) - return name_list - - -def get_output_tensor_names(): - return ['loop_{0:02d}/add:0'.format(i) for i in range(0, 16)] - - -def main(_): - if (FLAGS.input_codes is None or FLAGS.output_directory is None or - FLAGS.model is None): - print('\nUsage: python decoder.py --input_codes=output_codes.pkl ' - '--iteration=15 --output_directory=/tmp/compression_output/ ' - '--model=residual_gru.pb\n\n') - return - - if FLAGS.iteration < -1 or FLAGS.iteration > 15: - print('\n--iteration must be between 0 and 15 inclusive, or -1 to infer ' - 'from file.\n') - return - iteration = FLAGS.iteration - - if not tf.gfile.Exists(FLAGS.output_directory): - tf.gfile.MkDir(FLAGS.output_directory) - - if not tf.gfile.Exists(FLAGS.input_codes): - print('\nInput codes not found.\n') - return - - contents = '' - with tf.gfile.FastGFile(FLAGS.input_codes, 'rb') as code_file: - contents = code_file.read() - loaded_codes = np.load(io.BytesIO(contents)) - assert ['codes', 'shape'] not in loaded_codes.files - loaded_shape = loaded_codes['shape'] - loaded_array = loaded_codes['codes'] - - # Unpack and recover code shapes. - unpacked_codes = np.reshape(np.unpackbits(loaded_array) - [:np.prod(loaded_shape)], - loaded_shape) - - numpy_int_codes = np.split(unpacked_codes, len(unpacked_codes)) - if iteration == -1: - iteration = len(unpacked_codes) - 1 - # Convert back to float and recover scale. - numpy_codes = [np.squeeze(x.astype(np.float32), 0) * 2 - 1 for x in - numpy_int_codes] - - with tf.Graph().as_default() as graph: - # Load the inference model for decoding. - with tf.gfile.FastGFile(FLAGS.model, 'rb') as model_file: - graph_def = tf.GraphDef() - graph_def.ParseFromString(model_file.read()) - _ = tf.import_graph_def(graph_def, name='') - - # For encoding the tensors into PNGs. - input_image = tf.placeholder(tf.uint8) - encoded_image = tf.image.encode_png(input_image) - - input_tensors = [graph.get_tensor_by_name(name) for name in - get_input_tensor_names()][0:iteration+1] - outputs = [graph.get_tensor_by_name(name) for name in - get_output_tensor_names()][0:iteration+1] - - feed_dict = {key: value for (key, value) in zip(input_tensors, - numpy_codes)} - - with tf.Session(graph=graph) as sess: - results = sess.run(outputs, feed_dict=feed_dict) - - for index, result in enumerate(results): - img = np.uint8(np.clip(result + 0.5, 0, 255)) - img = img.squeeze() - png_img = sess.run(encoded_image, feed_dict={input_image: img}) - - with tf.gfile.FastGFile(os.path.join(FLAGS.output_directory, - 'image_{0:02d}.png'.format(index)), - 'w') as output_image: - output_image.write(png_img) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/compression/image_encoder/encoder.py b/research/compression/image_encoder/encoder.py deleted file mode 100644 index 27754bdaea19779cea653408d17ed2e6a051f0c5..0000000000000000000000000000000000000000 --- a/research/compression/image_encoder/encoder.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/python -# -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Neural Network Image Compression Encoder. - -Compresses an image to a binarized numpy array. The image must be padded to a -multiple of 32 pixels in height and width. - -Example usage: -python encoder.py --input_image=/your/image/here.png \ ---output_codes=output_codes.pkl --iteration=15 --model=residual_gru.pb -""" -import io -import os - -import numpy as np -import tensorflow as tf - -tf.flags.DEFINE_string('input_image', None, 'Location of input image. We rely ' - 'on tf.image to decode the image, so only PNG and JPEG ' - 'formats are currently supported.') -tf.flags.DEFINE_integer('iteration', 15, 'Quality level for encoding image. ' - 'Must be between 0 and 15 inclusive.') -tf.flags.DEFINE_string('output_codes', None, 'File to save output encoding.') -tf.flags.DEFINE_string('model', None, 'Location of compression model.') - -FLAGS = tf.flags.FLAGS - - -def get_output_tensor_names(): - name_list = ['GruBinarizer/SignBinarizer/Sign:0'] - for i in range(1, 16): - name_list.append('GruBinarizer/SignBinarizer/Sign_{}:0'.format(i)) - return name_list - - -def main(_): - if (FLAGS.input_image is None or FLAGS.output_codes is None or - FLAGS.model is None): - print('\nUsage: python encoder.py --input_image=/your/image/here.png ' - '--output_codes=output_codes.pkl --iteration=15 ' - '--model=residual_gru.pb\n\n') - return - - if FLAGS.iteration < 0 or FLAGS.iteration > 15: - print('\n--iteration must be between 0 and 15 inclusive.\n') - return - - with tf.gfile.FastGFile(FLAGS.input_image, 'rb') as input_image: - input_image_str = input_image.read() - - with tf.Graph().as_default() as graph: - # Load the inference model for encoding. - with tf.gfile.FastGFile(FLAGS.model, 'rb') as model_file: - graph_def = tf.GraphDef() - graph_def.ParseFromString(model_file.read()) - _ = tf.import_graph_def(graph_def, name='') - - input_tensor = graph.get_tensor_by_name('Placeholder:0') - outputs = [graph.get_tensor_by_name(name) for name in - get_output_tensor_names()] - - input_image = tf.placeholder(tf.string) - _, ext = os.path.splitext(FLAGS.input_image) - if ext == '.png': - decoded_image = tf.image.decode_png(input_image, channels=3) - elif ext == '.jpeg' or ext == '.jpg': - decoded_image = tf.image.decode_jpeg(input_image, channels=3) - else: - assert False, 'Unsupported file format {}'.format(ext) - decoded_image = tf.expand_dims(decoded_image, 0) - - with tf.Session(graph=graph) as sess: - img_array = sess.run(decoded_image, feed_dict={input_image: - input_image_str}) - results = sess.run(outputs, feed_dict={input_tensor: img_array}) - - results = results[0:FLAGS.iteration + 1] - int_codes = np.asarray([x.astype(np.int8) for x in results]) - - # Convert int codes to binary. - int_codes = (int_codes + 1)//2 - export = np.packbits(int_codes.reshape(-1)) - - output = io.BytesIO() - np.savez_compressed(output, shape=int_codes.shape, codes=export) - with tf.gfile.FastGFile(FLAGS.output_codes, 'w') as code_file: - code_file.write(output.getvalue()) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/compression/image_encoder/example.png b/research/compression/image_encoder/example.png deleted file mode 100644 index d3409b01a557fe8c3058fad21ed969f8af28cb97..0000000000000000000000000000000000000000 Binary files a/research/compression/image_encoder/example.png and /dev/null differ diff --git a/research/compression/image_encoder/msssim.py b/research/compression/image_encoder/msssim.py deleted file mode 100644 index f07a3712785c62feb261feb90016e0f621a3ee1d..0000000000000000000000000000000000000000 --- a/research/compression/image_encoder/msssim.py +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/python -# -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Python implementation of MS-SSIM. - -Usage: - -python msssim.py --original_image=original.png --compared_image=distorted.png -""" -import numpy as np -from scipy import signal -from scipy.ndimage.filters import convolve -import tensorflow as tf - - -tf.flags.DEFINE_string('original_image', None, 'Path to PNG image.') -tf.flags.DEFINE_string('compared_image', None, 'Path to PNG image.') -FLAGS = tf.flags.FLAGS - - -def _FSpecialGauss(size, sigma): - """Function to mimic the 'fspecial' gaussian MATLAB function.""" - radius = size // 2 - offset = 0.0 - start, stop = -radius, radius + 1 - if size % 2 == 0: - offset = 0.5 - stop -= 1 - x, y = np.mgrid[offset + start:stop, offset + start:stop] - assert len(x) == size - g = np.exp(-((x**2 + y**2)/(2.0 * sigma**2))) - return g / g.sum() - - -def _SSIMForMultiScale(img1, img2, max_val=255, filter_size=11, - filter_sigma=1.5, k1=0.01, k2=0.03): - """Return the Structural Similarity Map between `img1` and `img2`. - - This function attempts to match the functionality of ssim_index_new.m by - Zhou Wang: http://www.cns.nyu.edu/~lcv/ssim/msssim.zip - - Arguments: - img1: Numpy array holding the first RGB image batch. - img2: Numpy array holding the second RGB image batch. - max_val: the dynamic range of the images (i.e., the difference between the - maximum the and minimum allowed values). - filter_size: Size of blur kernel to use (will be reduced for small images). - filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced - for small images). - k1: Constant used to maintain stability in the SSIM calculation (0.01 in - the original paper). - k2: Constant used to maintain stability in the SSIM calculation (0.03 in - the original paper). - - Returns: - Pair containing the mean SSIM and contrast sensitivity between `img1` and - `img2`. - - Raises: - RuntimeError: If input images don't have the same shape or don't have four - dimensions: [batch_size, height, width, depth]. - """ - if img1.shape != img2.shape: - raise RuntimeError('Input images must have the same shape (%s vs. %s).', - img1.shape, img2.shape) - if img1.ndim != 4: - raise RuntimeError('Input images must have four dimensions, not %d', - img1.ndim) - - img1 = img1.astype(np.float64) - img2 = img2.astype(np.float64) - _, height, width, _ = img1.shape - - # Filter size can't be larger than height or width of images. - size = min(filter_size, height, width) - - # Scale down sigma if a smaller filter size is used. - sigma = size * filter_sigma / filter_size if filter_size else 0 - - if filter_size: - window = np.reshape(_FSpecialGauss(size, sigma), (1, size, size, 1)) - mu1 = signal.fftconvolve(img1, window, mode='valid') - mu2 = signal.fftconvolve(img2, window, mode='valid') - sigma11 = signal.fftconvolve(img1 * img1, window, mode='valid') - sigma22 = signal.fftconvolve(img2 * img2, window, mode='valid') - sigma12 = signal.fftconvolve(img1 * img2, window, mode='valid') - else: - # Empty blur kernel so no need to convolve. - mu1, mu2 = img1, img2 - sigma11 = img1 * img1 - sigma22 = img2 * img2 - sigma12 = img1 * img2 - - mu11 = mu1 * mu1 - mu22 = mu2 * mu2 - mu12 = mu1 * mu2 - sigma11 -= mu11 - sigma22 -= mu22 - sigma12 -= mu12 - - # Calculate intermediate values used by both ssim and cs_map. - c1 = (k1 * max_val) ** 2 - c2 = (k2 * max_val) ** 2 - v1 = 2.0 * sigma12 + c2 - v2 = sigma11 + sigma22 + c2 - ssim = np.mean((((2.0 * mu12 + c1) * v1) / ((mu11 + mu22 + c1) * v2))) - cs = np.mean(v1 / v2) - return ssim, cs - - -def MultiScaleSSIM(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5, - k1=0.01, k2=0.03, weights=None): - """Return the MS-SSIM score between `img1` and `img2`. - - This function implements Multi-Scale Structural Similarity (MS-SSIM) Image - Quality Assessment according to Zhou Wang's paper, "Multi-scale structural - similarity for image quality assessment" (2003). - Link: https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf - - Author's MATLAB implementation: - http://www.cns.nyu.edu/~lcv/ssim/msssim.zip - - Arguments: - img1: Numpy array holding the first RGB image batch. - img2: Numpy array holding the second RGB image batch. - max_val: the dynamic range of the images (i.e., the difference between the - maximum the and minimum allowed values). - filter_size: Size of blur kernel to use (will be reduced for small images). - filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced - for small images). - k1: Constant used to maintain stability in the SSIM calculation (0.01 in - the original paper). - k2: Constant used to maintain stability in the SSIM calculation (0.03 in - the original paper). - weights: List of weights for each level; if none, use five levels and the - weights from the original paper. - - Returns: - MS-SSIM score between `img1` and `img2`. - - Raises: - RuntimeError: If input images don't have the same shape or don't have four - dimensions: [batch_size, height, width, depth]. - """ - if img1.shape != img2.shape: - raise RuntimeError('Input images must have the same shape (%s vs. %s).', - img1.shape, img2.shape) - if img1.ndim != 4: - raise RuntimeError('Input images must have four dimensions, not %d', - img1.ndim) - - # Note: default weights don't sum to 1.0 but do match the paper / matlab code. - weights = np.array(weights if weights else - [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]) - levels = weights.size - downsample_filter = np.ones((1, 2, 2, 1)) / 4.0 - im1, im2 = [x.astype(np.float64) for x in [img1, img2]] - mssim = np.array([]) - mcs = np.array([]) - for _ in range(levels): - ssim, cs = _SSIMForMultiScale( - im1, im2, max_val=max_val, filter_size=filter_size, - filter_sigma=filter_sigma, k1=k1, k2=k2) - mssim = np.append(mssim, ssim) - mcs = np.append(mcs, cs) - filtered = [convolve(im, downsample_filter, mode='reflect') - for im in [im1, im2]] - im1, im2 = [x[:, ::2, ::2, :] for x in filtered] - return (np.prod(mcs[0:levels-1] ** weights[0:levels-1]) * - (mssim[levels-1] ** weights[levels-1])) - - -def main(_): - if FLAGS.original_image is None or FLAGS.compared_image is None: - print('\nUsage: python msssim.py --original_image=original.png ' - '--compared_image=distorted.png\n\n') - return - - if not tf.gfile.Exists(FLAGS.original_image): - print('\nCannot find --original_image.\n') - return - - if not tf.gfile.Exists(FLAGS.compared_image): - print('\nCannot find --compared_image.\n') - return - - with tf.gfile.FastGFile(FLAGS.original_image) as image_file: - img1_str = image_file.read('rb') - with tf.gfile.FastGFile(FLAGS.compared_image) as image_file: - img2_str = image_file.read('rb') - - input_img = tf.placeholder(tf.string) - decoded_image = tf.expand_dims(tf.image.decode_png(input_img, channels=3), 0) - - with tf.Session() as sess: - img1 = sess.run(decoded_image, feed_dict={input_img: img1_str}) - img2 = sess.run(decoded_image, feed_dict={input_img: img2_str}) - - print((MultiScaleSSIM(img1, img2, max_val=255))) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/deep_contextual_bandits/README.md b/research/deep_contextual_bandits/README.md deleted file mode 100644 index b81309af5b08003eb727e079e70c3dd08eedb6f6..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/README.md +++ /dev/null @@ -1,444 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Deep Bayesian Bandits Library - -This library corresponds to the *[Deep Bayesian Bandits Showdown: An Empirical -Comparison of Bayesian Deep Networks for Thompson -Sampling](https://arxiv.org/abs/1802.09127)* paper, published in -[ICLR](https://iclr.cc/) 2018. We provide a benchmark to test decision-making -algorithms for contextual-bandits. In particular, the current library implements -a variety of algorithms (many of them based on approximate Bayesian Neural -Networks and Thompson sampling), and a number of real and syntethic data -problems exhibiting a diverse set of properties. - -It is a Python library that uses [TensorFlow](https://www.tensorflow.org/). - -We encourage contributors to add new approximate Bayesian Neural Networks or, -more generally, contextual bandits algorithms to the library. Also, we would -like to extend the data sources over time, so we warmly encourage contributions -in this front too! - -Please, use the following when citing the code or the paper: - -``` -@article{riquelme2018deep, title={Deep Bayesian Bandits Showdown: An Empirical -Comparison of Bayesian Deep Networks for Thompson Sampling}, -author={Riquelme, Carlos and Tucker, George and Snoek, Jasper}, -journal={International Conference on Learning Representations, ICLR.}, year={2018}} -``` - -**Contact**. This repository is maintained by [Carlos Riquelme](http://rikel.me) ([rikel](https://github.com/rikel)). Feel free to reach out directly at [rikel@google.com](mailto:rikel@google.com) with any questions or comments. - - -We first briefly introduce contextual bandits, Thompson sampling, enumerate the -implemented algorithms, and the available data sources. Then, we provide a -simple complete example illustrating how to use the library. - -## Contextual Bandits - -Contextual bandits are a rich decision-making framework where an algorithm has -to choose among a set of *k* actions at every time step *t*, after observing -a context (or side-information) denoted by *Xt*. The general pseudocode for -the process if we use algorithm **A** is as follows: - -``` -At time t = 1, ..., T: - 1. Observe new context: X_t - 2. Choose action: a_t = A.action(X_t) - 3. Observe reward: r_t - 4. Update internal state of the algorithm: A.update((X_t, a_t, r_t)) -``` - -The goal is to maximize the total sum of rewards: ∑t rt - -For example, each *Xt* could encode the properties of a specific user (and -the time or day), and we may have to choose an ad, discount coupon, treatment, -hyper-parameters, or version of a website to show or provide to the user. -Hopefully, over time, we will learn how to match each type of user to the most -beneficial personalized action under some metric (the reward). - -## Thompson Sampling - -Thompson Sampling is a meta-algorithm that chooses an action for the contextual -bandit in a statistically efficient manner, simultaneously finding the best arm -while attempting to incur low cost. Informally speaking, we assume the expected -reward is given by some function -**E**[rt | Xt, at] = f(Xt, at). -Unfortunately, function **f** is unknown, as otherwise we could just choose the -action with highest expected value: -at* = arg maxi f(Xt, at). - -The idea behind Thompson Sampling is based on keeping a posterior distribution -πt over functions in some family f ∈ F after observing the first -*t-1* datapoints. Then, at time *t*, we sample one potential explanation of -the underlying process: ft ∼ πt, and act optimally (i.e., greedily) -*according to ft*. In other words, we choose -at = arg maxi ft(Xt, ai). -Finally, we update our posterior distribution with the new collected -datapoint (Xt, at, rt). - -The main issue is that keeping an updated posterior πt (or, even, -sampling from it) is often intractable for highly parameterized models like deep -neural networks. The algorithms we list in the next section provide tractable -*approximations* that can be used in combination with Thompson Sampling to solve -the contextual bandit problem. - -## Algorithms - -The Deep Bayesian Bandits library includes the following algorithms (see the -[paper](https://arxiv.org/abs/1802.09127) for further details): - -1. **Linear Algorithms**. As a powerful baseline, we provide linear algorithms. - In particular, we focus on the exact Bayesian linear regression - implementation, while it is easy to derive the greedy OLS version (possibly, - with epsilon-greedy exploration). The algorithm is implemented in - *linear_full_posterior_sampling.py*, and it is instantiated as follows: - - ``` - linear_full = LinearFullPosteriorSampling('MyLinearTS', my_hparams) - ``` - -2. **Neural Linear**. We introduce an algorithm we call Neural Linear, which - operates by learning a neural network to map contexts to rewards for each - action, and ---simultaneously--- it updates a Bayesian linear regression in - the last layer (i.e., the one that maps the final representation **z** to - the rewards **r**). Thompson Sampling samples the linear parameters - βi for each action *i*, but keeps the network that computes the - representation. Then, both parts (network and Bayesian linear regression) - are updated, possibly at different frequencies. The algorithm is implemented - in *neural_linear_sampling.py*, and we create an algorithm instance like - this: - - ``` - neural_linear = NeuralLinearPosteriorSampling('MyNLinear', my_hparams) - ``` - -3. **Neural Greedy**. Another standard benchmark is to train a neural network - that maps contexts to rewards, and at each time *t* just acts greedily - according to the current model. In particular, this approach does *not* - explicitly use Thompson Sampling. However, due to stochastic gradient - descent, there is still some randomness in its output. It is - straight-forward to add epsilon-greedy exploration to choose random - actions with probability ε ∈ (0, 1). The algorithm is - implemented in *neural_bandit_model.py*, and it is used together with - *PosteriorBNNSampling* (defined in *posterior_bnn_sampling.py*) by calling: - - ``` - neural_greedy = PosteriorBNNSampling('MyNGreedy', my_hparams, 'RMSProp') - ``` - -4. **Stochastic Variational Inference**, Bayes by Backpropagation. We implement - a Bayesian neural network by modeling each individual weight posterior as a - univariate Gaussian distribution: wij ∼ N(μij, σij2). - Thompson sampling then samples a network at each time step - by sampling each weight independently. The variational approach consists in - maximizing a proxy for maximum likelihood of the observed data, the ELBO or - variational lower bound, to fit the values of μij, σij2 - for every *i, j*. - - See [Weight Uncertainty in Neural - Networks](https://arxiv.org/abs/1505.05424). - - The BNN algorithm is implemented in *variational_neural_bandit_model.py*, - and it is used together with *PosteriorBNNSampling* (defined in - *posterior_bnn_sampling.py*) by calling: - - ``` - bbb = PosteriorBNNSampling('myBBB', my_hparams, 'Variational') - ``` - -5. **Expectation-Propagation**, Black-box alpha-divergence minimization. - The family of expectation-propagation algorithms is based on the message - passing framework . They iteratively approximate the posterior by updating a - single approximation factor (or site) at a time, which usually corresponds - to the likelihood of one data point. We focus on methods that directly - optimize the global EP objective via stochastic gradient descent, as, for - instance, Power EP. For further details see original paper below. - - See [Black-box alpha-divergence - Minimization](https://arxiv.org/abs/1511.03243). - - We create an instance of the algorithm like this: - - ``` - bb_adiv = PosteriorBNNSampling('MyEP', my_hparams, 'AlphaDiv') - ``` - -6. **Dropout**. Dropout is a training technique where the output of each neuron - is independently zeroed out with probability *p* at each forward pass. - Once the network has been trained, dropout can still be used to obtain a - distribution of predictions for a specific input. Following the best action - with respect to the random dropout prediction can be interpreted as an - implicit form of Thompson sampling. The code for dropout is the same as for - Neural Greedy (see above), but we need to set two hyper-parameters: - *use_dropout=True* and *keep_prob=p* where *p* takes the desired value in - (0, 1). Then: - - ``` - dropout = PosteriorBNNSampling('MyDropout', my_hparams, 'RMSProp') - ``` - -7. **Monte Carlo Methods**. To be added soon. - -8. **Bootstrapped Networks**. This algorithm trains simultaneously and in - parallel **q** neural networks based on different datasets D1, ..., Dq. The way those datasets are collected is by adding each new collected - datapoint (Xt, at, rt) to each dataset *Di* independently and with - probability p ∈ (0, 1]. Therefore, the main hyperparameters of the - algorithm are **(q, p)**. In order to choose an action for a new context, - one of the **q** networks is first selected with uniform probability (i.e., - *1/q*). Then, the best action according to the *selected* network is - played. - - See [Deep Exploration via Bootstrapped - DQN](https://arxiv.org/abs/1602.04621). - - The algorithm is implemented in *bootstrapped_bnn_sampling.py*, and we - instantiate it as (where *my_hparams* contains both **q** and **p**): - - ``` - bootstrap = BootstrappedBNNSampling('MyBoot', my_hparams) - ``` - -9. **Parameter-Noise**. Another approach to approximate a distribution over - neural networks (or more generally, models) that map contexts to rewards, - consists in randomly perturbing a point estimate trained by Stochastic - Gradient Descent on the data. The Parameter-Noise algorithm uses a heuristic - to control the amount of noise σt2 it adds independently to the - parameters representing a neural network: θt' = θt + ε where - ε ∼ N(0, σt2 Id). - After using θt' for decision making, the following SGD - training steps start again from θt. The key hyperparameters to set - are those controlling the noise heuristic. - - See [Parameter Space Noise for - Exploration](https://arxiv.org/abs/1706.01905). - - The algorithm is implemented in *parameter_noise_sampling.py*, and we create - an instance by calling: - - ``` - parameter_noise = ParameterNoiseSampling('MyParamNoise', my_hparams) - ``` - -10. **Gaussian Processes**. Another standard benchmark are Gaussian Processes, - see *Gaussian Processes for Machine Learning* by Rasmussen and Williams for - an introduction. To model the expected reward of different actions, we fit a - multitask GP. - - See [Multi-task Gaussian Process - Prediction](http://papers.nips.cc/paper/3189-multi-task-gaussian-process-prediction.pdf). - - Our implementation is provided in *multitask_gp.py*, and it is instantiated - as follows: - - ``` - gp = PosteriorBNNSampling('MyMultitaskGP', my_hparams, 'GP') - ``` - -In the code snippet at the bottom, we show how to instantiate some of these -algorithms, and how to run the contextual bandit simulator, and display the -high-level results. - -## Data - -In the paper we use two types of contextual datasets: synthetic and based on -real-world data. - -We provide functions that sample problems from those datasets. In the case of -real-world data, you first need to download the raw datasets, and pass the route -to the functions. Links for the datasets are provided below. - -### Synthetic Datasets - -Synthetic datasets are contained in the *synthetic_data_sampler.py* file. In -particular, it includes: - -1. **Linear data**. Provides a number of linear arms, and Gaussian contexts. - -2. **Sparse linear data**. Provides a number of sparse linear arms, and - Gaussian contexts. - -3. **Wheel bandit data**. Provides sampled data from the wheel bandit data, see - [Section 5.4](https://arxiv.org/abs/1802.09127) in the paper. - -### Real-World Datasets - -Real-world data generating functions are contained in the *data_sampler.py* -file. - -In particular, it includes: - -1. **Mushroom data**. Each incoming context represents a different type of - mushroom, and the actions are eat or no-eat. Eating an edible mushroom - provides positive reward, while eating a poisonous one provides positive - reward with probability *p*, and a large negative reward with probability - *1-p*. All the rewards, and the value of *p* are customizable. The - [dataset](https://archive.ics.uci.edu/ml/datasets/mushroom) is part of the - UCI repository, and the bandit problem was proposed in Blundell et al. - (2015). Data is available [here](https://storage.googleapis.com/bandits_datasets/mushroom.data) - or alternatively [here](https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/), - use the *agaricus-lepiota.data* file. - -2. **Stock data**. We created the Financial Dataset by pulling the stock prices - of *d = 21* publicly traded companies in NYSE and Nasdaq, for the last 14 - years (*n = 3713*). For each day, the context was the price difference - between the beginning and end of the session for each stock. We - synthetically created the arms to be a linear combination of the contexts, - representing *k = 8* different potential portfolios. Data is available - [here](https://storage.googleapis.com/bandits_datasets/raw_stock_contexts). - -3. **Jester data**. We create a recommendation system bandit problem as - follows. The Jester Dataset (Goldberg et al., 2001) provides continuous - ratings in *[-10, 10]* for 100 jokes from a total of 73421 users. We find - a *complete* subset of *n = 19181* users rating all 40 jokes. Following - Riquelme et al. (2017), we take *d = 32* of the ratings as the context of - the user, and *k = 8* as the arms. The agent recommends one joke, and - obtains the reward corresponding to the rating of the user for the selected - joke. Data is available [here](https://storage.googleapis.com/bandits_datasets/jester_data_40jokes_19181users.npy). - -4. **Statlog data**. The Shuttle Statlog Dataset (Asuncion & Newman, 2007) - provides the value of *d = 9* indicators during a space shuttle flight, - and the goal is to predict the state of the radiator subsystem of the - shuttle. There are *k = 7* possible states, and if the agent selects the - right state, then reward 1 is generated. Otherwise, the agent obtains no - reward (*r = 0*). The most interesting aspect of the dataset is that one - action is the optimal one in 80% of the cases, and some algorithms may - commit to this action instead of further exploring. In this case, the number - of contexts is *n = 43500*. Data is available [here](https://storage.googleapis.com/bandits_datasets/shuttle.trn) or alternatively - [here](https://archive.ics.uci.edu/ml/datasets/Statlog+\(Shuttle\)), use - *shuttle.trn* file. - -5. **Adult data**. The Adult Dataset (Kohavi, 1996; Asuncion & Newman, 2007) - comprises personal information from the US Census Bureau database, and the - standard prediction task is to determine if a person makes over 50K a year - or not. However, we consider the *k = 14* different occupations as - feasible actions, based on *d = 94* covariates (many of them binarized). - As in previous datasets, the agent obtains a reward of 1 for making the - right prediction, and 0 otherwise. The total number of contexts is *n = - 45222*. Data is available [here](https://storage.googleapis.com/bandits_datasets/adult.full) or alternatively - [here](https://archive.ics.uci.edu/ml/datasets/adult), use *adult.data* - file. - -6. **Census data**. The US Census (1990) Dataset (Asuncion & Newman, 2007) - contains a number of personal features (age, native language, education...) - which we summarize in *d = 389* covariates, including binary dummy - variables for categorical features. Our goal again is to predict the - occupation of the individual among *k = 9* classes. The agent obtains - reward 1 for making the right prediction, and 0 otherwise. Data is available - [here](https://storage.googleapis.com/bandits_datasets/USCensus1990.data.txt) or alternatively [here](https://archive.ics.uci.edu/ml/datasets/US+Census+Data+\(1990\)), use - *USCensus1990.data.txt* file. - -7. **Covertype data**. The Covertype Dataset (Asuncion & Newman, 2007) - classifies the cover type of northern Colorado forest areas in *k = 7* - classes, based on *d = 54* features, including elevation, slope, aspect, - and soil type. Again, the agent obtains reward 1 if the correct class is - selected, and 0 otherwise. Data is available [here](https://storage.googleapis.com/bandits_datasets/covtype.data) or alternatively - [here](https://archive.ics.uci.edu/ml/datasets/covertype), use - *covtype.data* file. - -In datasets 4-7, each feature of the dataset is normalized first. - -## Usage: Basic Example - -This library requires Tensorflow, Numpy, and Pandas. - -The file *example_main.py* provides a complete example on how to use the -library. We run the code: - -``` - python example_main.py -``` - -**Do not forget to** configure the routes to the data files at the top of *example_main.py*. - -For example, we can run the Mushroom bandit for 2000 contexts on a few -algorithms as follows: - -``` - # Problem parameters - num_contexts = 2000 - - # Choose data source among: - # {linear, sparse_linear, mushroom, financial, jester, - # statlog, adult, covertype, census, wheel} - data_type = 'mushroom' - - # Create dataset - sampled_vals = sample_data(data_type, num_contexts) - dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals - - # Define hyperparameters and algorithms - hparams_linear = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - a0=6, - b0=6, - lambda_prior=0.25, - initial_pulls=2) - - hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - optimizer='RMS', - reset_lr=True, - lr_decay_rate=0.5, - training_freq=50, - training_epochs=100, - keep_prob=0.80, - use_dropout=True) - - ### Create hyper-parameter configurations for other algorithms - [...] - - algos = [ - UniformSampling('Uniform Sampling', hparams), - PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'), - PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'), - NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear), - LinearFullPosteriorSampling('LinFullPost', hparams_linear), - BootstrappedBNNSampling('BootRMS', hparams_boot), - ParameterNoiseSampling('ParamNoise', hparams_pnoise), - ] - - # Run contextual bandit problem - t_init = time.time() - results = run_contextual_bandit(context_dim, num_actions, dataset, algos) - _, h_rewards = results - - # Display results - display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type) - -``` - -The previous code leads to final results that look like: - -``` ---------------------------------------------------- ---------------------------------------------------- -mushroom bandit completed after 69.8401839733 seconds. ---------------------------------------------------- - 0) LinFullPost | total reward = 4365.0. - 1) NeuralLinear | total reward = 4110.0. - 2) Dropout | total reward = 3430.0. - 3) ParamNoise | total reward = 3270.0. - 4) BootRMS | total reward = 3050.0. - 5) BBB | total reward = 2505.0. - 6) Uniform Sampling | total reward = -4930.0. ---------------------------------------------------- -Optimal total reward = 5235. -Frequency of optimal actions (action, frequency): -[[0, 953], [1, 1047]] ---------------------------------------------------- ---------------------------------------------------- -``` diff --git a/research/deep_contextual_bandits/bandits/algorithms/bb_alpha_divergence_model.py b/research/deep_contextual_bandits/bandits/algorithms/bb_alpha_divergence_model.py deleted file mode 100644 index 5b9c0ebd0988873eaf97d8d68d25dae5e5b9cd71..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/bb_alpha_divergence_model.py +++ /dev/null @@ -1,373 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bayesian NN using expectation propagation (Black-Box Alpha-Divergence). - -See https://arxiv.org/abs/1511.03243 for details. -All formulas used in this implementation are derived in: -https://www.overleaf.com/12837696kwzjxkyhdytk#/49028744/. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys -import numpy as np -import tensorflow as tf -from absl import flags - -from bandits.core.bayesian_nn import BayesianNN - - -FLAGS = flags.FLAGS -tfd = tf.contrib.distributions # update to: tensorflow_probability.distributions - - -def log_gaussian(x, mu, sigma, reduce_sum=True): - res = tfd.Normal(mu, sigma).log_prob(x) - if reduce_sum: - return tf.reduce_sum(res) - else: - return res - - -class BBAlphaDivergence(BayesianNN): - """Implements an approximate Bayesian NN via Black-Box Alpha-Divergence.""" - - def __init__(self, hparams, name): - - self.name = name - self.hparams = hparams - - self.alpha = getattr(self.hparams, 'alpha', 1.0) - self.num_mc_nn_samples = getattr(self.hparams, 'num_mc_nn_samples', 10) - - self.n_in = self.hparams.context_dim - self.n_out = self.hparams.num_actions - self.layers = self.hparams.layer_sizes - self.batch_size = self.hparams.batch_size - - self.show_training = self.hparams.show_training - self.freq_summary = self.hparams.freq_summary - self.verbose = getattr(self.hparams, 'verbose', True) - - self.cleared_times_trained = self.hparams.cleared_times_trained - self.initial_training_steps = self.hparams.initial_training_steps - self.training_schedule = np.linspace(self.initial_training_steps, - self.hparams.training_epochs, - self.cleared_times_trained) - - self.times_trained = 0 - self.initialize_model() - - def initialize_model(self): - """Builds and initialize the model.""" - - self.num_w = 0 - self.num_b = 0 - - self.weights_m = {} - self.weights_std = {} - self.biases_m = {} - self.biases_std = {} - - self.h_max_var = [] - - if self.hparams.use_sigma_exp_transform: - self.sigma_transform = tfd.bijectors.Exp() - else: - self.sigma_transform = tfd.bijectors.Softplus() - - # Build the graph corresponding to the Bayesian NN instance. - self.graph = tf.Graph() - - with self.graph.as_default(): - - self.sess = tf.Session() - self.x = tf.placeholder(shape=[None, self.n_in], - dtype=tf.float32, name='x') - self.y = tf.placeholder(shape=[None, self.n_out], - dtype=tf.float32, name='y') - self.weights = tf.placeholder(shape=[None, self.n_out], - dtype=tf.float32, name='w') - self.data_size = tf.placeholder(tf.float32, shape=(), name='data_size') - - self.prior_variance = self.hparams.prior_variance - if self.prior_variance < 0: - # if not fixed, we learn the prior. - self.prior_variance = self.sigma_transform.forward( - self.build_mu_variable([1, 1])) - - self.build_model() - self.sess.run(tf.global_variables_initializer()) - - def build_mu_variable(self, shape): - """Returns a mean variable initialized as N(0, 0.05).""" - return tf.Variable(tf.random_normal(shape, 0.0, 0.05)) - - def build_sigma_variable(self, shape, init=-5.): - """Returns a sigma variable initialized as N(init, 0.05).""" - # Initialize sigma to be very small initially to encourage MAP opt first - return tf.Variable(tf.random_normal(shape, init, 0.05)) - - def build_layer(self, input_x, shape, layer_id, activation_fn=tf.nn.relu): - """Builds a layer with N(mean, std) for each weight, and samples from it.""" - - w_mu = self.build_mu_variable(shape) - w_sigma = self.sigma_transform.forward(self.build_sigma_variable(shape)) - - w_noise = tf.random_normal(shape) - w = w_mu + w_sigma * w_noise - - b_mu = self.build_mu_variable([1, shape[1]]) - b_sigma = self.sigma_transform.forward( - self.build_sigma_variable([1, shape[1]])) - - b_noise = tf.random_normal([1, shape[1]]) - b = b_mu + b_sigma * b_noise - - # Create outputs - output_h = activation_fn(tf.matmul(input_x, w) + b) - - # Store means and stds - self.weights_m[layer_id] = w_mu - self.weights_std[layer_id] = w_sigma - self.biases_m[layer_id] = b_mu - self.biases_std[layer_id] = b_sigma - - return output_h - - def sample_neural_network(self, activation_fn=tf.nn.relu): - """Samples a nn from posterior, computes data log lk and log f factor.""" - - with self.graph.as_default(): - - log_f = 0 - n = self.data_size - input_x = self.x - - for layer_id in range(self.total_layers): - - # load mean and std of each weight - w_mu = self.weights_m[layer_id] - w_sigma = self.weights_std[layer_id] - b_mu = self.biases_m[layer_id] - b_sigma = self.biases_std[layer_id] - - # sample weights from Gaussian distribution - shape = w_mu.shape - w_noise = tf.random_normal(shape) - b_noise = tf.random_normal([1, int(shape[1])]) - w = w_mu + w_sigma * w_noise - b = b_mu + b_sigma * b_noise - - # compute contribution to log_f - t1 = w * w_mu / (n * w_sigma ** 2) - t2 = (0.5 * w ** 2 / n) * (1 / self.prior_variance - 1 / w_sigma ** 2) - log_f += tf.reduce_sum(t1 + t2) - - t1 = b * b_mu / (n * b_sigma ** 2) - t2 = (0.5 * b ** 2 / n) * (1 / self.prior_variance - 1 / b_sigma ** 2) - log_f += tf.reduce_sum(t1 + t2) - - if layer_id < self.total_layers - 1: - output_h = activation_fn(tf.matmul(input_x, w) + b) - else: - output_h = tf.matmul(input_x, w) + b - - input_x = output_h - - # compute log likelihood of the observed reward under the sampled nn - log_likelihood = log_gaussian( - self.y, output_h, self.noise_sigma, reduce_sum=False) - weighted_log_likelihood = tf.reduce_sum(log_likelihood * self.weights, -1) - - return log_f, weighted_log_likelihood - - def log_z_q(self): - """Computes log-partition function of current posterior parameters.""" - - with self.graph.as_default(): - - log_z_q = 0 - - for layer_id in range(self.total_layers): - - w_mu = self.weights_m[layer_id] - w_sigma = self.weights_std[layer_id] - b_mu = self.biases_m[layer_id] - b_sigma = self.biases_std[layer_id] - - w_term = 0.5 * tf.reduce_sum(w_mu ** 2 / w_sigma ** 2) - w_term += 0.5 * tf.reduce_sum(tf.log(2 * np.pi) + 2 * tf.log(w_sigma)) - - b_term = 0.5 * tf.reduce_sum(b_mu ** 2 / b_sigma ** 2) - b_term += 0.5 * tf.reduce_sum(tf.log(2 * np.pi) + 2 * tf.log(b_sigma)) - - log_z_q += w_term + b_term - - return log_z_q - - def log_z_prior(self): - """Computes log-partition function of the prior parameters.""" - num_params = self.num_w + self.num_b - return num_params * 0.5 * tf.log(2 * np.pi * self.prior_variance) - - def log_alpha_likelihood_ratio(self, activation_fn=tf.nn.relu): - - # each nn sample returns (log f, log likelihoods) - nn_samples = [ - self.sample_neural_network(activation_fn) - for _ in range(self.num_mc_nn_samples) - ] - nn_log_f_samples = [elt[0] for elt in nn_samples] - nn_log_lk_samples = [elt[1] for elt in nn_samples] - - # we stack the (log f, log likelihoods) from the k nn samples - nn_log_f_stack = tf.stack(nn_log_f_samples) # k x 1 - nn_log_lk_stack = tf.stack(nn_log_lk_samples) # k x N - nn_f_tile = tf.tile(nn_log_f_stack, [self.batch_size]) - nn_f_tile = tf.reshape(nn_f_tile, - [self.num_mc_nn_samples, self.batch_size]) - - # now both the log f and log likelihood terms have shape: k x N - # apply formula in https://www.overleaf.com/12837696kwzjxkyhdytk#/49028744/ - nn_log_ratio = nn_log_lk_stack - nn_f_tile - nn_log_ratio = self.alpha * tf.transpose(nn_log_ratio) - logsumexp_value = tf.reduce_logsumexp(nn_log_ratio, -1) - log_k_scalar = tf.log(tf.cast(self.num_mc_nn_samples, tf.float32)) - log_k = log_k_scalar * tf.ones([self.batch_size]) - - return tf.reduce_sum(logsumexp_value - log_k, -1) - - def build_model(self, activation_fn=tf.nn.relu): - """Defines the actual NN model with fully connected layers. - - Args: - activation_fn: Activation function for the neural network. - - The loss is computed for partial feedback settings (bandits), so only - the observed outcome is backpropagated (see weighted loss). - Selects the optimizer and, finally, it also initializes the graph. - """ - - print('Initializing model {}.'.format(self.name)) - - # Build terms for the noise sigma estimation for each action. - noise_sigma_mu = (self.build_mu_variable([1, self.n_out]) - + self.sigma_transform.inverse(self.hparams.noise_sigma)) - noise_sigma_sigma = self.sigma_transform.forward( - self.build_sigma_variable([1, self.n_out])) - - pre_noise_sigma = noise_sigma_mu + tf.random_normal( - [1, self.n_out]) * noise_sigma_sigma - self.noise_sigma = self.sigma_transform.forward(pre_noise_sigma) - - # Build network - input_x = self.x - n_in = self.n_in - self.total_layers = len(self.layers) + 1 - if self.layers[0] == 0: - self.total_layers = 1 - - for l_number, n_nodes in enumerate(self.layers): - if n_nodes > 0: - h = self.build_layer(input_x, [n_in, n_nodes], l_number) - input_x = h - n_in = n_nodes - self.num_w += n_in * n_nodes - self.num_b += n_nodes - - self.y_pred = self.build_layer(input_x, [n_in, self.n_out], - self.total_layers - 1, - activation_fn=lambda x: x) - - # Compute energy function based on sampled nn's - log_coeff = self.data_size / (self.batch_size * self.alpha) - log_ratio = log_coeff * self.log_alpha_likelihood_ratio(activation_fn) - logzprior = self.log_z_prior() - logzq = self.log_z_q() - energy = logzprior - logzq - log_ratio - - self.loss = energy - self.global_step = tf.train.get_or_create_global_step() - self.train_op = tf.train.AdamOptimizer(self.hparams.initial_lr).minimize( - self.loss, global_step=self.global_step) - - # Useful for debugging - sq_loss = tf.squared_difference(self.y_pred, self.y) - weighted_sq_loss = self.weights * sq_loss - self.cost = tf.reduce_sum(weighted_sq_loss) / self.batch_size - - # Create tensorboard metrics - self.create_summaries() - self.summary_writer = tf.summary.FileWriter('{}/graph_{}'.format( - FLAGS.logdir, self.name), self.sess.graph) - - def create_summaries(self): - tf.summary.scalar('loss', self.loss) - tf.summary.scalar('cost', self.cost) - self.summary_op = tf.summary.merge_all() - - def assign_lr(self): - """Resets the learning rate in dynamic schedules for subsequent trainings. - - In bandits settings, we do expand our dataset over time. Then, we need to - re-train the network with the new data. Those algorithms that do not keep - the step constant, can reset it at the start of each training process. - """ - - decay_steps = 1 - if self.hparams.activate_decay: - current_gs = self.sess.run(self.global_step) - with self.graph.as_default(): - self.lr = tf.train.inverse_time_decay(self.hparams.initial_lr, - self.global_step - current_gs, - decay_steps, - self.hparams.lr_decay_rate) - - def train(self, data, num_steps): - """Trains the BNN for num_steps, using the data in 'data'. - - Args: - data: ContextualDataset object that provides the data. - num_steps: Number of minibatches to train the network for. - """ - - if self.times_trained < self.cleared_times_trained: - num_steps = int(self.training_schedule[self.times_trained]) - self.times_trained += 1 - - if self.verbose: - print('Training {} for {} steps...'.format(self.name, num_steps)) - - with self.graph.as_default(): - - for step in range(num_steps): - x, y, w = data.get_batch_with_weights(self.hparams.batch_size) - _, summary, global_step, loss = self.sess.run( - [self.train_op, self.summary_op, self.global_step, self.loss], - feed_dict={self.x: x, self.y: y, self.weights: w, - self.data_size: data.num_points()}) - - weights_l = self.sess.run(self.weights_std[0]) - self.h_max_var.append(np.max(weights_l)) - - if step % self.freq_summary == 0: - if self.show_training: - print('step: {}, loss: {}'.format(step, loss)) - sys.stdout.flush() - self.summary_writer.add_summary(summary, global_step) diff --git a/research/deep_contextual_bandits/bandits/algorithms/bf_variational_neural_bandit_model.py b/research/deep_contextual_bandits/bandits/algorithms/bf_variational_neural_bandit_model.py deleted file mode 100644 index cb87c23358f27bd93e30528b20f7a3bb3ba876dd..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/bf_variational_neural_bandit_model.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bayesian NN using factorized VI (Bayes By Backprop. Blundell et al. 2014). - -See https://arxiv.org/abs/1505.05424 for details. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -# import tensorflow_probability as tfp - -from absl import flags -from bandits.core.bayesian_nn import BayesianNN - - -FLAGS = flags.FLAGS -# tfd = tfp.distributions -tfd = tf.contrib.distributions -tfl = tf.contrib.layers - - -def log_gaussian(x, mu, sigma, reduce_sum=True): - """Returns log Gaussian pdf.""" - res = tfd.Normal(mu, sigma).log_prob(x) - if reduce_sum: - return tf.reduce_sum(res) - else: - return res - - -def analytic_kl(mu_1, sigma_1, mu_2, sigma_2): - """KL for two Gaussian distributions with diagonal covariance matrix.""" - kl = tfd.kl_divergence(tfd.MVNDiag(mu_1, sigma_1), tfd.MVNDiag(mu_2, sigma_2)) - return kl - - -class BfVariationalNeuralBanditModel(BayesianNN): - """Implements an approximate Bayesian NN using Variational Inference.""" - - def __init__(self, hparams, name="BBBNN"): - - self.name = name - self.hparams = hparams - - self.n_in = self.hparams.context_dim - self.n_out = self.hparams.num_actions - self.layers = self.hparams.layer_sizes - self.init_scale = self.hparams.init_scale - self.f_num_points = None - if "f_num_points" in hparams: - self.f_num_points = self.hparams.f_num_points - - self.cleared_times_trained = self.hparams.cleared_times_trained - self.initial_training_steps = self.hparams.initial_training_steps - self.training_schedule = np.linspace(self.initial_training_steps, - self.hparams.training_epochs, - self.cleared_times_trained) - self.verbose = getattr(self.hparams, "verbose", True) - - self.weights_m = {} - self.weights_std = {} - self.biases_m = {} - self.biases_std = {} - - self.times_trained = 0 - - if self.hparams.use_sigma_exp_transform: - self.sigma_transform = tf.exp - self.inverse_sigma_transform = np.log - else: - self.sigma_transform = tf.nn.softplus - self.inverse_sigma_transform = lambda y: y + np.log(1. - np.exp(-y)) - - # Whether to use the local reparameterization trick to compute the loss. - # See details in https://arxiv.org/abs/1506.02557 - self.use_local_reparameterization = True - - self.build_graph() - - def build_mu_variable(self, shape): - """Returns a mean variable initialized as N(0, 0.05).""" - return tf.Variable(tf.random_normal(shape, 0.0, 0.05)) - - def build_sigma_variable(self, shape, init=-5.): - """Returns a sigma variable initialized as N(init, 0.05).""" - # Initialize sigma to be very small initially to encourage MAP opt first - return tf.Variable(tf.random_normal(shape, init, 0.05)) - - def build_layer(self, input_x, input_x_local, shape, - layer_id, activation_fn=tf.nn.relu): - """Builds a variational layer, and computes KL term. - - Args: - input_x: Input to the variational layer. - input_x_local: Input when the local reparameterization trick was applied. - shape: [number_inputs, number_outputs] for the layer. - layer_id: Number of layer in the architecture. - activation_fn: Activation function to apply. - - Returns: - output_h: Output of the variational layer. - output_h_local: Output when local reparameterization trick was applied. - neg_kl: Negative KL term for the layer. - """ - - w_mu = self.build_mu_variable(shape) - w_sigma = self.sigma_transform(self.build_sigma_variable(shape)) - w_noise = tf.random_normal(shape) - w = w_mu + w_sigma * w_noise - - b_mu = self.build_mu_variable([1, shape[1]]) - b_sigma = self.sigma_transform(self.build_sigma_variable([1, shape[1]])) - b = b_mu - - # Store means and stds - self.weights_m[layer_id] = w_mu - self.weights_std[layer_id] = w_sigma - self.biases_m[layer_id] = b_mu - self.biases_std[layer_id] = b_sigma - - # Create outputs - output_h = activation_fn(tf.matmul(input_x, w) + b) - - if self.use_local_reparameterization: - # Use analytic KL divergence wrt the prior - neg_kl = -analytic_kl(w_mu, w_sigma, - 0., tf.to_float(np.sqrt(2./shape[0]))) - else: - # Create empirical KL loss terms - log_p = log_gaussian(w, 0., tf.to_float(np.sqrt(2./shape[0]))) - log_q = log_gaussian(w, tf.stop_gradient(w_mu), tf.stop_gradient(w_sigma)) - neg_kl = log_p - log_q - - # Apply local reparameterization trick: sample activations pre nonlinearity - m_h = tf.matmul(input_x_local, w_mu) + b - v_h = tf.matmul(tf.square(input_x_local), tf.square(w_sigma)) - output_h_local = m_h + tf.sqrt(v_h + 1e-6) * tf.random_normal(tf.shape(v_h)) - output_h_local = activation_fn(output_h_local) - - return output_h, output_h_local, neg_kl - - def build_action_noise(self): - """Defines a model for additive noise per action, and its KL term.""" - - # Define mean and std variables (log-normal dist) for each action. - noise_sigma_mu = (self.build_mu_variable([1, self.n_out]) - + self.inverse_sigma_transform(self.hparams.noise_sigma)) - noise_sigma_sigma = self.sigma_transform( - self.build_sigma_variable([1, self.n_out])) - - pre_noise_sigma = (noise_sigma_mu - + tf.random_normal([1, self.n_out]) * noise_sigma_sigma) - self.noise_sigma = self.sigma_transform(pre_noise_sigma) - - # Compute KL for additive noise sigma terms. - if getattr(self.hparams, "infer_noise_sigma", False): - neg_kl_term = log_gaussian( - pre_noise_sigma, - self.inverse_sigma_transform(self.hparams.noise_sigma), - self.hparams.prior_sigma - ) - neg_kl_term -= log_gaussian(pre_noise_sigma, - noise_sigma_mu, - noise_sigma_sigma) - else: - neg_kl_term = 0. - - return neg_kl_term - - def build_model(self, activation_fn=tf.nn.relu): - """Defines the actual NN model with fully connected layers. - - The loss is computed for partial feedback settings (bandits), so only - the observed outcome is backpropagated (see weighted loss). - Selects the optimizer and, finally, it also initializes the graph. - - Args: - activation_fn: the activation function used in the nn layers. - """ - - def weight_prior(dtype, shape, c, d, e): - del c, d, e - return tfd.Independent( - tfd.Normal(loc=tf.zeros(shape, dtype), - scale=tf.to_float(np.sqrt(2) / shape[0])), - reinterpreted_batch_ndims=tf.size(shape)) - - if self.verbose: - print("Initializing model {}.".format(self.name)) - - # Compute model additive noise for each action with log-normal distribution - neg_kl_term = self.build_action_noise() - - # Build variational network using self.x as input. - input_x = self.x - - # Create Keras model using DenseLocalReparameterization (prior N(0, 1)). - model_layers = [ - tfl.DenseLocalReparameterization( - n_nodes, - activation=tf.nn.relu, - kernel_prior_fn=weight_prior - ) - for n_nodes in self.layers if n_nodes > 0 - ] - - output_layer = tfl.DenseLocalReparameterization( - self.n_out, - activation=lambda x: x, - kernel_prior_fn=weight_prior - ) - model_layers.append(output_layer) - - model = tf.keras.Sequential(model_layers) - self.y_pred = model(input_x) - - # Compute KL term - neg_kl_term -= tf.add_n(model.losses) - - # Compute log likelihood (with learned or fixed noise level) - if getattr(self.hparams, "infer_noise_sigma", False): - log_likelihood = log_gaussian( - self.y, self.y_pred, self.noise_sigma, reduce_sum=False) - else: - log_likelihood = log_gaussian( - self.y, self.y_pred, self.hparams.noise_sigma, reduce_sum=False) - - # Only take into account observed outcomes (bandits setting) - batch_size = tf.to_float(tf.shape(self.x)[0]) - weighted_log_likelihood = tf.reduce_sum( - log_likelihood * self.weights) / batch_size - - # The objective is 1/n * (\sum_i log_like_i - KL); neg_kl_term estimates -KL - elbo = weighted_log_likelihood + (neg_kl_term / self.n) - - self.loss = -elbo - self.global_step = tf.train.get_or_create_global_step() - self.train_op = tf.train.AdamOptimizer(self.hparams.initial_lr).minimize( - self.loss, global_step=self.global_step) - - # Create tensorboard metrics - self.create_summaries() - self.summary_writer = tf.summary.FileWriter( - "{}/graph_{}".format(FLAGS.logdir, self.name), self.sess.graph) - - def build_graph(self): - """Defines graph, session, placeholders, and model. - - Placeholders are: n (size of the dataset), x and y (context and observed - reward for each action), and weights (one-hot encoding of selected action - for each context, i.e., only possibly non-zero element in each y). - """ - - self.graph = tf.Graph() - with self.graph.as_default(): - - self.sess = tf.Session() - - self.n = tf.placeholder(shape=[], dtype=tf.float32) - - self.x = tf.placeholder(shape=[None, self.n_in], dtype=tf.float32) - self.y = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32) - self.weights = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32) - - self.build_model() - self.sess.run(tf.global_variables_initializer()) - - def create_summaries(self): - """Defines summaries including mean loss, and global step.""" - - with self.graph.as_default(): - with tf.name_scope(self.name + "_summaries"): - tf.summary.scalar("loss", self.loss) - tf.summary.scalar("global_step", self.global_step) - self.summary_op = tf.summary.merge_all() - - def assign_lr(self): - """Resets the learning rate in dynamic schedules for subsequent trainings. - - In bandits settings, we do expand our dataset over time. Then, we need to - re-train the network with the new data. The algorithms that do not keep - the step constant, can reset it at the start of each *training* process. - """ - - decay_steps = 1 - if self.hparams.activate_decay: - current_gs = self.sess.run(self.global_step) - with self.graph.as_default(): - self.lr = tf.train.inverse_time_decay(self.hparams.initial_lr, - self.global_step - current_gs, - decay_steps, - self.hparams.lr_decay_rate) - - def train(self, data, num_steps): - """Trains the BNN for num_steps, using the data in 'data'. - - Args: - data: ContextualDataset object that provides the data. - num_steps: Number of minibatches to train the network for. - - Returns: - losses: Loss history during training. - """ - - if self.times_trained < self.cleared_times_trained: - num_steps = int(self.training_schedule[self.times_trained]) - self.times_trained += 1 - - losses = [] - - with self.graph.as_default(): - - if self.verbose: - print("Training {} for {} steps...".format(self.name, num_steps)) - - for step in range(num_steps): - x, y, weights = data.get_batch_with_weights(self.hparams.batch_size) - _, summary, global_step, loss = self.sess.run( - [self.train_op, self.summary_op, self.global_step, self.loss], - feed_dict={ - self.x: x, - self.y: y, - self.weights: weights, - self.n: data.num_points(self.f_num_points), - }) - - losses.append(loss) - - if step % self.hparams.freq_summary == 0: - if self.hparams.show_training: - print("{} | step: {}, loss: {}".format( - self.name, global_step, loss)) - self.summary_writer.add_summary(summary, global_step) - - return losses diff --git a/research/deep_contextual_bandits/bandits/algorithms/bootstrapped_bnn_sampling.py b/research/deep_contextual_bandits/bandits/algorithms/bootstrapped_bnn_sampling.py deleted file mode 100644 index 7c44b681c7bd1da113ec29c1bb6d370c88d7053f..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/bootstrapped_bnn_sampling.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contextual algorithm based on boostrapping neural networks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from bandits.core.bandit_algorithm import BanditAlgorithm -from bandits.core.contextual_dataset import ContextualDataset -from bandits.algorithms.neural_bandit_model import NeuralBanditModel - - -class BootstrappedBNNSampling(BanditAlgorithm): - """Thompson Sampling algorithm based on training several neural networks.""" - - def __init__(self, name, hparams, optimizer='RMS'): - """Creates a BootstrappedSGDSampling object based on a specific optimizer. - - hparams.q: Number of models that are independently trained. - hparams.p: Prob of independently including each datapoint in each model. - - Args: - name: Name given to the instance. - hparams: Hyperparameters for each individual model. - optimizer: Neural network optimization algorithm. - """ - - self.name = name - self.hparams = hparams - self.optimizer_n = optimizer - - self.training_freq = hparams.training_freq - self.training_epochs = hparams.training_epochs - self.t = 0 - - self.q = hparams.q - self.p = hparams.p - - self.datasets = [ - ContextualDataset(hparams.context_dim, - hparams.num_actions, - hparams.buffer_s) - for _ in range(self.q) - ] - - self.bnn_boot = [ - NeuralBanditModel(optimizer, hparams, '{}-{}-bnn'.format(name, i)) - for i in range(self.q) - ] - - def action(self, context): - """Selects action for context based on Thompson Sampling using one BNN.""" - - if self.t < self.hparams.num_actions * self.hparams.initial_pulls: - # round robin until each action has been taken "initial_pulls" times - return self.t % self.hparams.num_actions - - # choose model uniformly at random - model_index = np.random.randint(self.q) - - with self.bnn_boot[model_index].graph.as_default(): - c = context.reshape((1, self.hparams.context_dim)) - output = self.bnn_boot[model_index].sess.run( - self.bnn_boot[model_index].y_pred, - feed_dict={self.bnn_boot[model_index].x: c}) - return np.argmax(output) - - def update(self, context, action, reward): - """Updates the data buffer, and re-trains the BNN every self.freq_update.""" - - self.t += 1 - for i in range(self.q): - # include the data point with probability p independently in each dataset - if np.random.random() < self.p or self.t < 2: - self.datasets[i].add(context, action, reward) - - if self.t % self.training_freq == 0: - # update all the models: - for i in range(self.q): - if self.hparams.reset_lr: - self.bnn_boot[i].assign_lr() - self.bnn_boot[i].train(self.datasets[i], self.training_epochs) diff --git a/research/deep_contextual_bandits/bandits/algorithms/fixed_policy_sampling.py b/research/deep_contextual_bandits/bandits/algorithms/fixed_policy_sampling.py deleted file mode 100644 index d5ad6e3ed9ed9d1478e6ac132b41cfb5ae1bb47a..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/fixed_policy_sampling.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contextual bandit algorithm that selects an action at random.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from bandits.core.bandit_algorithm import BanditAlgorithm - - -class FixedPolicySampling(BanditAlgorithm): - """Defines a baseline; returns an action at random with probs given by p.""" - - def __init__(self, name, p, hparams): - """Creates a FixedPolicySampling object. - - Args: - name: Name of the algorithm. - p: Vector of normalized probabilities corresponding to sampling each arm. - hparams: Hyper-parameters, including the number of arms (num_actions). - - Raises: - ValueError: when p dimension does not match the number of actions. - """ - - self.name = name - self.p = p - self.hparams = hparams - - if len(p) != self.hparams.num_actions: - raise ValueError('Policy needs k probabilities.') - - def action(self, context): - """Selects an action at random according to distribution p.""" - return np.random.choice(range(self.hparams.num_actions), p=self.p) diff --git a/research/deep_contextual_bandits/bandits/algorithms/linear_full_posterior_sampling.py b/research/deep_contextual_bandits/bandits/algorithms/linear_full_posterior_sampling.py deleted file mode 100644 index 15ef8fa9b562101111042dc2ce7b17174018ab6e..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/linear_full_posterior_sampling.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contextual algorithm that keeps a full linear posterior for each arm.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from scipy.stats import invgamma - -from bandits.core.bandit_algorithm import BanditAlgorithm -from bandits.core.contextual_dataset import ContextualDataset - - -class LinearFullPosteriorSampling(BanditAlgorithm): - """Thompson Sampling with independent linear models and unknown noise var.""" - - def __init__(self, name, hparams): - """Initialize posterior distributions and hyperparameters. - - Assume a linear model for each action i: reward = context^T beta_i + noise - Each beta_i has a Gaussian prior (lambda parameter), each sigma2_i (noise - level) has an inverse Gamma prior (a0, b0 parameters). Mean, covariance, - and precision matrices are initialized, and the ContextualDataset created. - - Args: - name: Name of the algorithm. - hparams: Hyper-parameters of the algorithm. - """ - - self.name = name - self.hparams = hparams - - # Gaussian prior for each beta_i - self._lambda_prior = self.hparams.lambda_prior - - self.mu = [ - np.zeros(self.hparams.context_dim + 1) - for _ in range(self.hparams.num_actions) - ] - - self.cov = [(1.0 / self.lambda_prior) * np.eye(self.hparams.context_dim + 1) - for _ in range(self.hparams.num_actions)] - - self.precision = [ - self.lambda_prior * np.eye(self.hparams.context_dim + 1) - for _ in range(self.hparams.num_actions) - ] - - # Inverse Gamma prior for each sigma2_i - self._a0 = self.hparams.a0 - self._b0 = self.hparams.b0 - - self.a = [self._a0 for _ in range(self.hparams.num_actions)] - self.b = [self._b0 for _ in range(self.hparams.num_actions)] - - self.t = 0 - self.data_h = ContextualDataset(hparams.context_dim, - hparams.num_actions, - intercept=True) - - def action(self, context): - """Samples beta's from posterior, and chooses best action accordingly. - - Args: - context: Context for which the action need to be chosen. - - Returns: - action: Selected action for the context. - """ - - # Round robin until each action has been selected "initial_pulls" times - if self.t < self.hparams.num_actions * self.hparams.initial_pulls: - return self.t % self.hparams.num_actions - - # Sample sigma2, and beta conditional on sigma2 - sigma2_s = [ - self.b[i] * invgamma.rvs(self.a[i]) - for i in range(self.hparams.num_actions) - ] - - try: - beta_s = [ - np.random.multivariate_normal(self.mu[i], sigma2_s[i] * self.cov[i]) - for i in range(self.hparams.num_actions) - ] - except np.linalg.LinAlgError as e: - # Sampling could fail if covariance is not positive definite - print('Exception when sampling from {}.'.format(self.name)) - print('Details: {} | {}.'.format(e.message, e.args)) - d = self.hparams.context_dim + 1 - beta_s = [ - np.random.multivariate_normal(np.zeros((d)), np.eye(d)) - for i in range(self.hparams.num_actions) - ] - - # Compute sampled expected values, intercept is last component of beta - vals = [ - np.dot(beta_s[i][:-1], context.T) + beta_s[i][-1] - for i in range(self.hparams.num_actions) - ] - - return np.argmax(vals) - - def update(self, context, action, reward): - """Updates action posterior using the linear Bayesian regression formula. - - Args: - context: Last observed context. - action: Last observed action. - reward: Last observed reward. - """ - - self.t += 1 - self.data_h.add(context, action, reward) - - # Update posterior of action with formulas: \beta | x,y ~ N(mu_q, cov_q) - x, y = self.data_h.get_data(action) - - # The algorithm could be improved with sequential update formulas (cheaper) - s = np.dot(x.T, x) - - # Some terms are removed as we assume prior mu_0 = 0. - precision_a = s + self.lambda_prior * np.eye(self.hparams.context_dim + 1) - cov_a = np.linalg.inv(precision_a) - mu_a = np.dot(cov_a, np.dot(x.T, y)) - - # Inverse Gamma posterior update - a_post = self.a0 + x.shape[0] / 2.0 - b_upd = 0.5 * (np.dot(y.T, y) - np.dot(mu_a.T, np.dot(precision_a, mu_a))) - b_post = self.b0 + b_upd - - # Store new posterior distributions - self.mu[action] = mu_a - self.cov[action] = cov_a - self.precision[action] = precision_a - self.a[action] = a_post - self.b[action] = b_post - - @property - def a0(self): - return self._a0 - - @property - def b0(self): - return self._b0 - - @property - def lambda_prior(self): - return self._lambda_prior diff --git a/research/deep_contextual_bandits/bandits/algorithms/multitask_gp.py b/research/deep_contextual_bandits/bandits/algorithms/multitask_gp.py deleted file mode 100644 index 0c35dfaeaf9e30993d49d807f16dd64e15d3fc66..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/multitask_gp.py +++ /dev/null @@ -1,374 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A Multitask Gaussian process.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import flags -from absl import logging - -import numpy as np -import tensorflow as tf -from bandits.core.bayesian_nn import BayesianNN - -FLAGS = flags.FLAGS -tfd = tf.contrib.distributions - -class MultitaskGP(BayesianNN): - """Implements a Gaussian process with multi-task outputs. - - Optimizes the hyperparameters over the log marginal likelihood. - Uses a Matern 3/2 + linear covariance and returns - sampled predictions for test inputs. The outputs are optionally - correlated where the correlation structure is learned through latent - embeddings of the tasks. - """ - - def __init__(self, hparams): - self.name = "MultiTaskGP" - self.hparams = hparams - - self.n_in = self.hparams.context_dim - self.n_out = self.hparams.num_outputs - self.keep_fixed_after_max_obs = self.hparams.keep_fixed_after_max_obs - - self._show_training = self.hparams.show_training - self._freq_summary = self.hparams.freq_summary - - # Dimensionality of the latent task vectors - self.task_latent_dim = self.hparams.task_latent_dim - - # Maximum number of observations to include - self.max_num_points = self.hparams.max_num_points - - if self.hparams.learn_embeddings: - self.learn_embeddings = self.hparams.learn_embeddings - else: - self.learn_embeddings = False - - # create the graph corresponding to the BNN instance - self.graph = tf.Graph() - with self.graph.as_default(): - # store a new session for the graph - self.sess = tf.Session() - - with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): - self.n = tf.placeholder(shape=[], dtype=tf.float64) - self.x = tf.placeholder(shape=[None, self.n_in], dtype=tf.float64) - self.x_in = tf.placeholder(shape=[None, self.n_in], dtype=tf.float64) - self.y = tf.placeholder(shape=[None, self.n_out], dtype=tf.float64) - self.weights = tf.placeholder(shape=[None, self.n_out], - dtype=tf.float64) - - self.build_model() - self.sess.run(tf.global_variables_initializer()) - - def atleast_2d(self, x, dims): - return tf.reshape(tf.expand_dims(x, axis=0), (-1, dims)) - - def sq_dist(self, x, x2): - a2 = tf.reduce_sum(tf.square(x), 1) - b2 = tf.reduce_sum(tf.square(x2), 1) - sqdists = tf.expand_dims(a2, 1) + b2 - 2.0 * tf.matmul(x, tf.transpose(x2)) - return sqdists - - # Covariance between outputs - def task_cov(self, x, x2): - """Squared Exponential Covariance Kernel over latent task embeddings.""" - # Index into latent task vectors - x_vecs = tf.gather(self.task_vectors, tf.argmax(x, axis=1), axis=0) - x2_vecs = tf.gather(self.task_vectors, tf.argmax(x2, axis=1), axis=0) - r = self.sq_dist(self.atleast_2d(x_vecs, self.task_latent_dim), - self.atleast_2d(x2_vecs, self.task_latent_dim)) - return tf.exp(-r) - - def cov(self, x, x2): - """Matern 3/2 + Linear Gaussian Process Covariance Function.""" - ls = tf.clip_by_value(self.length_scales, -5.0, 5.0) - ls_lin = tf.clip_by_value(self.length_scales_lin, -5.0, 5.0) - r = self.sq_dist(self.atleast_2d(x, self.n_in)/tf.nn.softplus(ls), - self.atleast_2d(x2, self.n_in)/tf.nn.softplus(ls)) - r = tf.clip_by_value(r, 0, 1e8) - - # Matern 3/2 Covariance - matern = (1.0 + tf.sqrt(3.0*r + 1e-16)) * tf.exp(-tf.sqrt(3.0*r + 1e-16)) - # Linear Covariance - lin = tf.matmul(x / tf.nn.softplus(ls_lin), - x2 / tf.nn.softplus(ls_lin), transpose_b=True) - return (tf.nn.softplus(self.amplitude) * matern + - tf.nn.softplus(self.amplitude_linear) * lin) - - def build_model(self): - """Defines the GP model. - - The loss is computed for partial feedback settings (bandits), so only - the observed outcome is backpropagated (see weighted loss). - Selects the optimizer and, finally, it also initializes the graph. - """ - - logging.info("Initializing model %s.", self.name) - self.global_step = tf.train.get_or_create_global_step() - - # Define state for the model (inputs, etc.) - self.x_train = tf.get_variable( - "training_data", - initializer=tf.ones( - [self.hparams.batch_size, self.n_in], dtype=tf.float64), - validate_shape=False, - trainable=False) - self.y_train = tf.get_variable( - "training_labels", - initializer=tf.zeros([self.hparams.batch_size, 1], dtype=tf.float64), - validate_shape=False, - trainable=False) - self.weights_train = tf.get_variable( - "weights_train", - initializer=tf.ones( - [self.hparams.batch_size, self.n_out], dtype=tf.float64), - validate_shape=False, - trainable=False) - self.input_op = tf.assign(self.x_train, self.x_in, validate_shape=False) - self.input_w_op = tf.assign( - self.weights_train, self.weights, validate_shape=False) - - self.input_std = tf.get_variable( - "data_standard_deviation", - initializer=tf.ones([1, self.n_out], dtype=tf.float64), - dtype=tf.float64, - trainable=False) - self.input_mean = tf.get_variable( - "data_mean", - initializer=tf.zeros([1, self.n_out], dtype=tf.float64), - dtype=tf.float64, - trainable=True) - - # GP Hyperparameters - self.noise = tf.get_variable( - "noise", initializer=tf.cast(0.0, dtype=tf.float64)) - self.amplitude = tf.get_variable( - "amplitude", initializer=tf.cast(1.0, dtype=tf.float64)) - self.amplitude_linear = tf.get_variable( - "linear_amplitude", initializer=tf.cast(1.0, dtype=tf.float64)) - self.length_scales = tf.get_variable( - "length_scales", initializer=tf.zeros([1, self.n_in], dtype=tf.float64)) - self.length_scales_lin = tf.get_variable( - "length_scales_linear", - initializer=tf.zeros([1, self.n_in], dtype=tf.float64)) - - # Latent embeddings of the different outputs for task covariance - self.task_vectors = tf.get_variable( - "latent_task_vectors", - initializer=tf.random_normal( - [self.n_out, self.task_latent_dim], dtype=tf.float64)) - - # Normalize outputs across each dimension - # Since we have different numbers of observations across each task, we - # normalize by their respective counts. - index_counts = self.atleast_2d(tf.reduce_sum(self.weights, axis=0), - self.n_out) - index_counts = tf.where(index_counts > 0, index_counts, - tf.ones(tf.shape(index_counts), dtype=tf.float64)) - self.mean_op = tf.assign(self.input_mean, - tf.reduce_sum(self.y, axis=0) / index_counts) - self.var_op = tf.assign( - self.input_std, tf.sqrt(1e-4 + tf.reduce_sum(tf.square( - self.y - tf.reduce_sum(self.y, axis=0) / index_counts), axis=0) - / index_counts)) - - with tf.control_dependencies([self.var_op]): - y_normed = self.atleast_2d( - (self.y - self.input_mean) / self.input_std, self.n_out) - y_normed = self.atleast_2d(tf.boolean_mask(y_normed, self.weights > 0), 1) - self.out_op = tf.assign(self.y_train, y_normed, validate_shape=False) - - # Observation noise - alpha = tf.nn.softplus(self.noise) + 1e-6 - - # Covariance - with tf.control_dependencies([self.input_op, self.input_w_op, self.out_op]): - self.self_cov = (self.cov(self.x_in, self.x_in) * - self.task_cov(self.weights, self.weights) + - tf.eye(tf.shape(self.x_in)[0], dtype=tf.float64) * alpha) - - self.chol = tf.cholesky(self.self_cov) - self.kinv = tf.cholesky_solve(self.chol, tf.eye(tf.shape(self.x_in)[0], - dtype=tf.float64)) - - self.input_inv = tf.Variable( - tf.eye(self.hparams.batch_size, dtype=tf.float64), - validate_shape=False, - trainable=False) - self.input_cov_op = tf.assign(self.input_inv, self.kinv, - validate_shape=False) - - # Log determinant by taking the singular values along the diagonal - # of self.chol - with tf.control_dependencies([self.input_cov_op]): - logdet = 2.0 * tf.reduce_sum(tf.log(tf.diag_part(self.chol) + 1e-16)) - - # Log Marginal likelihood - self.marginal_ll = -tf.reduce_sum(-0.5 * tf.matmul( - tf.transpose(y_normed), tf.matmul(self.kinv, y_normed)) - 0.5 * logdet - - 0.5 * self.n * np.log(2 * np.pi)) - - zero = tf.cast(0., dtype=tf.float64) - one = tf.cast(1., dtype=tf.float64) - standard_normal = tfd.Normal(loc=zero, scale=one) - - # Loss is marginal likelihood and priors - self.loss = tf.reduce_sum( - self.marginal_ll - - (standard_normal.log_prob(self.amplitude) + - standard_normal.log_prob(tf.exp(self.noise)) + - standard_normal.log_prob(self.amplitude_linear) + - tfd.Normal(loc=zero, scale=one * 10.).log_prob( - self.task_vectors)) - ) - - # Optimizer for hyperparameters - optimizer = tf.train.AdamOptimizer(learning_rate=self.hparams.lr) - vars_to_optimize = [ - self.amplitude, self.length_scales, self.length_scales_lin, - self.amplitude_linear, self.noise, self.input_mean - ] - - if self.learn_embeddings: - vars_to_optimize.append(self.task_vectors) - grads = optimizer.compute_gradients(self.loss, vars_to_optimize) - self.train_op = optimizer.apply_gradients(grads, - global_step=self.global_step) - - # Predictions for test data - self.y_mean, self.y_pred = self.posterior_mean_and_sample(self.x) - - # create tensorboard metrics - self.create_summaries() - self.summary_writer = tf.summary.FileWriter("{}/graph_{}".format( - FLAGS.logdir, self.name), self.sess.graph) - self.check = tf.add_check_numerics_ops() - - def posterior_mean_and_sample(self, candidates): - """Draw samples for test predictions. - - Given a Tensor of 'candidates' inputs, returns samples from the posterior - and the posterior mean prediction for those inputs. - - Args: - candidates: A (num-examples x num-dims) Tensor containing the inputs for - which to return predictions. - Returns: - y_mean: The posterior mean prediction given these inputs - y_sample: A sample from the posterior of the outputs given these inputs - """ - # Cross-covariance for test predictions - w = tf.identity(self.weights_train) - inds = tf.squeeze( - tf.reshape( - tf.tile( - tf.reshape(tf.range(self.n_out), (self.n_out, 1)), - (1, tf.shape(candidates)[0])), (-1, 1))) - - cross_cov = self.cov(tf.tile(candidates, [self.n_out, 1]), self.x_train) - cross_task_cov = self.task_cov(tf.one_hot(inds, self.n_out), w) - cross_cov *= cross_task_cov - - # Test mean prediction - y_mean = tf.matmul(cross_cov, tf.matmul(self.input_inv, self.y_train)) - - # Test sample predictions - # Note this can be done much more efficiently using Kronecker products - # if all tasks are fully observed (which we won't assume) - test_cov = ( - self.cov(tf.tile(candidates, [self.n_out, 1]), - tf.tile(candidates, [self.n_out, 1])) * - self.task_cov(tf.one_hot(inds, self.n_out), - tf.one_hot(inds, self.n_out)) - - tf.matmul(cross_cov, - tf.matmul(self.input_inv, - tf.transpose(cross_cov)))) - - # Get the matrix square root through an SVD for drawing samples - # This seems more numerically stable than the Cholesky - s, _, v = tf.svd(test_cov, full_matrices=True) - test_sqrt = tf.matmul(v, tf.matmul(tf.diag(s), tf.transpose(v))) - - y_sample = ( - tf.matmul( - test_sqrt, - tf.random_normal([tf.shape(test_sqrt)[0], 1], dtype=tf.float64)) + - y_mean) - - y_sample = ( - tf.transpose(tf.reshape(y_sample, - (self.n_out, -1))) * self.input_std + - self.input_mean) - - return y_mean, y_sample - - def create_summaries(self): - with self.graph.as_default(): - tf.summary.scalar("loss", self.loss) - tf.summary.scalar("log_noise", self.noise) - tf.summary.scalar("log_amp", self.amplitude) - tf.summary.scalar("log_amp_lin", self.amplitude_linear) - tf.summary.histogram("length_scales", self.length_scales) - tf.summary.histogram("length_scales_lin", self.length_scales_lin) - self.summary_op = tf.summary.merge_all() - - def train(self, data, num_steps): - """Trains the GP for num_steps, using the data in 'data'. - - Args: - data: ContextualDataset object that provides the data. - num_steps: Number of minibatches to train the network for. - """ - - logging.info("Training %s for %d steps...", self.name, num_steps) - for step in range(num_steps): - numpts = min(data.num_points(None), self.max_num_points) - if numpts >= self.max_num_points and self.keep_fixed_after_max_obs: - x = data.contexts[:numpts, :] - y = data.rewards[:numpts, :] - weights = np.zeros((x.shape[0], self.n_out)) - for i, val in enumerate(data.actions[:numpts]): - weights[i, val] = 1.0 - else: - x, y, weights = data.get_batch_with_weights(numpts) - - ops = [ - self.global_step, self.summary_op, self.loss, self.noise, - self.amplitude, self.amplitude_linear, self.length_scales, - self.length_scales_lin, self.input_cov_op, self.input_op, self.var_op, - self.input_w_op, self.out_op, self.train_op - ] - - res = self.sess.run(ops, - feed_dict={self.x: x, - self.x_in: x, - self.y: y, - self.weights: weights, - self.n: numpts, - }) - - if step % self._freq_summary == 0: - if self._show_training: - logging.info("step: %d, loss: %g noise: %f amp: %f amp_lin: %f", - step, res[2], res[3], res[4], res[5]) - summary = res[1] - global_step = res[0] - self.summary_writer.add_summary(summary, global_step=global_step) diff --git a/research/deep_contextual_bandits/bandits/algorithms/neural_bandit_model.py b/research/deep_contextual_bandits/bandits/algorithms/neural_bandit_model.py deleted file mode 100644 index 99d7cd4dc8e2c35571f82bbb79ea1564a148ff5d..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/neural_bandit_model.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Define a family of neural network architectures for bandits. - -The network accepts different type of optimizers that could lead to different -approximations of the posterior distribution or simply to point estimates. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from absl import flags -from bandits.core.bayesian_nn import BayesianNN - -FLAGS = flags.FLAGS - - -class NeuralBanditModel(BayesianNN): - """Implements a neural network for bandit problems.""" - - def __init__(self, optimizer, hparams, name): - """Saves hyper-params and builds the Tensorflow graph.""" - - self.opt_name = optimizer - self.name = name - self.hparams = hparams - self.verbose = getattr(self.hparams, "verbose", True) - self.times_trained = 0 - self.build_model() - - def build_layer(self, x, num_units): - """Builds a layer with input x; dropout and layer norm if specified.""" - - init_s = self.hparams.init_scale - - layer_n = getattr(self.hparams, "layer_norm", False) - dropout = getattr(self.hparams, "use_dropout", False) - - nn = tf.contrib.layers.fully_connected( - x, - num_units, - activation_fn=self.hparams.activation, - normalizer_fn=None if not layer_n else tf.contrib.layers.layer_norm, - normalizer_params={}, - weights_initializer=tf.random_uniform_initializer(-init_s, init_s) - ) - - if dropout: - nn = tf.nn.dropout(nn, self.hparams.keep_prob) - - return nn - - def forward_pass(self): - - init_s = self.hparams.init_scale - - scope_name = "prediction_{}".format(self.name) - with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): - nn = self.x - for num_units in self.hparams.layer_sizes: - if num_units > 0: - nn = self.build_layer(nn, num_units) - - y_pred = tf.layers.dense( - nn, - self.hparams.num_actions, - kernel_initializer=tf.random_uniform_initializer(-init_s, init_s)) - - return nn, y_pred - - def build_model(self): - """Defines the actual NN model with fully connected layers. - - The loss is computed for partial feedback settings (bandits), so only - the observed outcome is backpropagated (see weighted loss). - Selects the optimizer and, finally, it also initializes the graph. - """ - - # create and store the graph corresponding to the BNN instance - self.graph = tf.Graph() - - with self.graph.as_default(): - - # create and store a new session for the graph - self.sess = tf.Session() - - with tf.name_scope(self.name): - - self.global_step = tf.train.get_or_create_global_step() - - # context - self.x = tf.placeholder( - shape=[None, self.hparams.context_dim], - dtype=tf.float32, - name="{}_x".format(self.name)) - - # reward vector - self.y = tf.placeholder( - shape=[None, self.hparams.num_actions], - dtype=tf.float32, - name="{}_y".format(self.name)) - - # weights (1 for selected action, 0 otherwise) - self.weights = tf.placeholder( - shape=[None, self.hparams.num_actions], - dtype=tf.float32, - name="{}_w".format(self.name)) - - # with tf.variable_scope("prediction_{}".format(self.name)): - self.nn, self.y_pred = self.forward_pass() - self.loss = tf.squared_difference(self.y_pred, self.y) - self.weighted_loss = tf.multiply(self.weights, self.loss) - self.cost = tf.reduce_sum(self.weighted_loss) / self.hparams.batch_size - - if self.hparams.activate_decay: - self.lr = tf.train.inverse_time_decay( - self.hparams.initial_lr, self.global_step, - 1, self.hparams.lr_decay_rate) - else: - self.lr = tf.Variable(self.hparams.initial_lr, trainable=False) - - # create tensorboard metrics - self.create_summaries() - self.summary_writer = tf.summary.FileWriter( - "{}/graph_{}".format(FLAGS.logdir, self.name), self.sess.graph) - - tvars = tf.trainable_variables() - grads, _ = tf.clip_by_global_norm( - tf.gradients(self.cost, tvars), self.hparams.max_grad_norm) - - self.optimizer = self.select_optimizer() - - self.train_op = self.optimizer.apply_gradients( - zip(grads, tvars), global_step=self.global_step) - - self.init = tf.global_variables_initializer() - - self.initialize_graph() - - def initialize_graph(self): - """Initializes all variables.""" - - with self.graph.as_default(): - if self.verbose: - print("Initializing model {}.".format(self.name)) - self.sess.run(self.init) - - def assign_lr(self): - """Resets the learning rate in dynamic schedules for subsequent trainings. - - In bandits settings, we do expand our dataset over time. Then, we need to - re-train the network with the new data. The algorithms that do not keep - the step constant, can reset it at the start of each *training* process. - """ - - decay_steps = 1 - if self.hparams.activate_decay: - current_gs = self.sess.run(self.global_step) - with self.graph.as_default(): - self.lr = tf.train.inverse_time_decay(self.hparams.initial_lr, - self.global_step - current_gs, - decay_steps, - self.hparams.lr_decay_rate) - - def select_optimizer(self): - """Selects optimizer. To be extended (SGLD, KFAC, etc).""" - return tf.train.RMSPropOptimizer(self.lr) - - def create_summaries(self): - """Defines summaries including mean loss, learning rate, and global step.""" - - with self.graph.as_default(): - with tf.name_scope(self.name + "_summaries"): - tf.summary.scalar("cost", self.cost) - tf.summary.scalar("lr", self.lr) - tf.summary.scalar("global_step", self.global_step) - self.summary_op = tf.summary.merge_all() - - def train(self, data, num_steps): - """Trains the network for num_steps, using the provided data. - - Args: - data: ContextualDataset object that provides the data. - num_steps: Number of minibatches to train the network for. - """ - - if self.verbose: - print("Training {} for {} steps...".format(self.name, num_steps)) - - with self.graph.as_default(): - - for step in range(num_steps): - x, y, w = data.get_batch_with_weights(self.hparams.batch_size) - _, cost, summary, lr = self.sess.run( - [self.train_op, self.cost, self.summary_op, self.lr], - feed_dict={self.x: x, self.y: y, self.weights: w}) - - if step % self.hparams.freq_summary == 0: - if self.hparams.show_training: - print("{} | step: {}, lr: {}, loss: {}".format( - self.name, step, lr, cost)) - self.summary_writer.add_summary(summary, step) - - self.times_trained += 1 diff --git a/research/deep_contextual_bandits/bandits/algorithms/neural_linear_sampling.py b/research/deep_contextual_bandits/bandits/algorithms/neural_linear_sampling.py deleted file mode 100644 index 43fc551614b49ad34538aa64090bcda5f823a60f..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/neural_linear_sampling.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Thompson Sampling with linear posterior over a learnt deep representation.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from scipy.stats import invgamma - -from bandits.core.bandit_algorithm import BanditAlgorithm -from bandits.core.contextual_dataset import ContextualDataset -from bandits.algorithms.neural_bandit_model import NeuralBanditModel - - -class NeuralLinearPosteriorSampling(BanditAlgorithm): - """Full Bayesian linear regression on the last layer of a deep neural net.""" - - def __init__(self, name, hparams, optimizer='RMS'): - - self.name = name - self.hparams = hparams - self.latent_dim = self.hparams.layer_sizes[-1] - - # Gaussian prior for each beta_i - self._lambda_prior = self.hparams.lambda_prior - - self.mu = [ - np.zeros(self.latent_dim) - for _ in range(self.hparams.num_actions) - ] - - self.cov = [(1.0 / self.lambda_prior) * np.eye(self.latent_dim) - for _ in range(self.hparams.num_actions)] - - self.precision = [ - self.lambda_prior * np.eye(self.latent_dim) - for _ in range(self.hparams.num_actions) - ] - - # Inverse Gamma prior for each sigma2_i - self._a0 = self.hparams.a0 - self._b0 = self.hparams.b0 - - self.a = [self._a0 for _ in range(self.hparams.num_actions)] - self.b = [self._b0 for _ in range(self.hparams.num_actions)] - - # Regression and NN Update Frequency - self.update_freq_lr = hparams.training_freq - self.update_freq_nn = hparams.training_freq_network - - self.t = 0 - self.optimizer_n = optimizer - - self.num_epochs = hparams.training_epochs - self.data_h = ContextualDataset(hparams.context_dim, - hparams.num_actions, - intercept=False) - self.latent_h = ContextualDataset(self.latent_dim, - hparams.num_actions, - intercept=False) - self.bnn = NeuralBanditModel(optimizer, hparams, '{}-bnn'.format(name)) - - def action(self, context): - """Samples beta's from posterior, and chooses best action accordingly.""" - - # Round robin until each action has been selected "initial_pulls" times - if self.t < self.hparams.num_actions * self.hparams.initial_pulls: - return self.t % self.hparams.num_actions - - # Sample sigma2, and beta conditional on sigma2 - sigma2_s = [ - self.b[i] * invgamma.rvs(self.a[i]) - for i in range(self.hparams.num_actions) - ] - - try: - beta_s = [ - np.random.multivariate_normal(self.mu[i], sigma2_s[i] * self.cov[i]) - for i in range(self.hparams.num_actions) - ] - except np.linalg.LinAlgError as e: - # Sampling could fail if covariance is not positive definite - print('Exception when sampling for {}.'.format(self.name)) - print('Details: {} | {}.'.format(e.message, e.args)) - d = self.latent_dim - beta_s = [ - np.random.multivariate_normal(np.zeros((d)), np.eye(d)) - for i in range(self.hparams.num_actions) - ] - - # Compute last-layer representation for the current context - with self.bnn.graph.as_default(): - c = context.reshape((1, self.hparams.context_dim)) - z_context = self.bnn.sess.run(self.bnn.nn, feed_dict={self.bnn.x: c}) - - # Apply Thompson Sampling to last-layer representation - vals = [ - np.dot(beta_s[i], z_context.T) for i in range(self.hparams.num_actions) - ] - return np.argmax(vals) - - def update(self, context, action, reward): - """Updates the posterior using linear bayesian regression formula.""" - - self.t += 1 - self.data_h.add(context, action, reward) - c = context.reshape((1, self.hparams.context_dim)) - z_context = self.bnn.sess.run(self.bnn.nn, feed_dict={self.bnn.x: c}) - self.latent_h.add(z_context, action, reward) - - # Retrain the network on the original data (data_h) - if self.t % self.update_freq_nn == 0: - - if self.hparams.reset_lr: - self.bnn.assign_lr() - self.bnn.train(self.data_h, self.num_epochs) - - # Update the latent representation of every datapoint collected so far - new_z = self.bnn.sess.run(self.bnn.nn, - feed_dict={self.bnn.x: self.data_h.contexts}) - self.latent_h.replace_data(contexts=new_z) - - # Update the Bayesian Linear Regression - if self.t % self.update_freq_lr == 0: - - # Find all the actions to update - actions_to_update = self.latent_h.actions[:-self.update_freq_lr] - - for action_v in np.unique(actions_to_update): - - # Update action posterior with formulas: \beta | z,y ~ N(mu_q, cov_q) - z, y = self.latent_h.get_data(action_v) - - # The algorithm could be improved with sequential formulas (cheaper) - s = np.dot(z.T, z) - - # Some terms are removed as we assume prior mu_0 = 0. - precision_a = s + self.lambda_prior * np.eye(self.latent_dim) - cov_a = np.linalg.inv(precision_a) - mu_a = np.dot(cov_a, np.dot(z.T, y)) - - # Inverse Gamma posterior update - a_post = self.a0 + z.shape[0] / 2.0 - b_upd = 0.5 * np.dot(y.T, y) - b_upd -= 0.5 * np.dot(mu_a.T, np.dot(precision_a, mu_a)) - b_post = self.b0 + b_upd - - # Store new posterior distributions - self.mu[action_v] = mu_a - self.cov[action_v] = cov_a - self.precision[action_v] = precision_a - self.a[action_v] = a_post - self.b[action_v] = b_post - - @property - def a0(self): - return self._a0 - - @property - def b0(self): - return self._b0 - - @property - def lambda_prior(self): - return self._lambda_prior diff --git a/research/deep_contextual_bandits/bandits/algorithms/parameter_noise_sampling.py b/research/deep_contextual_bandits/bandits/algorithms/parameter_noise_sampling.py deleted file mode 100644 index 19944ad577372b6971f03f1117fc33d5a2a276b1..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/parameter_noise_sampling.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contextual algorithm based on Thompson Sampling + direct noise injection.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from scipy.special import logsumexp -import tensorflow as tf - -from absl import flags - -from bandits.core.bandit_algorithm import BanditAlgorithm -from bandits.core.contextual_dataset import ContextualDataset -from bandits.algorithms.neural_bandit_model import NeuralBanditModel - -FLAGS = flags.FLAGS - - -class ParameterNoiseSampling(BanditAlgorithm): - """Parameter Noise Sampling algorithm based on adding noise to net params. - - Described in https://arxiv.org/abs/1706.01905 - """ - - def __init__(self, name, hparams): - """Creates the algorithm, and sets up the adaptive Gaussian noise.""" - - self.name = name - self.hparams = hparams - self.verbose = getattr(self.hparams, 'verbose', True) - self.noise_std = getattr(self.hparams, 'noise_std', 0.005) - self.eps = getattr(self.hparams, 'eps', 0.05) - self.d_samples = getattr(self.hparams, 'd_samples', 300) - self.optimizer = getattr(self.hparams, 'optimizer', 'RMS') - - # keep track of noise heuristic statistics - self.std_h = [self.noise_std] - self.eps_h = [self.eps] - self.kl_h = [] - self.t = 0 - - self.freq_update = hparams.training_freq - self.num_epochs = hparams.training_epochs - - self.data_h = ContextualDataset(hparams.context_dim, hparams.num_actions, - hparams.buffer_s) - self.bnn = NeuralBanditModel(self.optimizer, hparams, '{}-bnn'.format(name)) - - with self.bnn.graph.as_default(): - - # noise-injection std placeholder - self.bnn.noise_std_ph = tf.placeholder(tf.float32, shape=()) - - # create noise corruption op; adds noise to all weights - tvars = tf.trainable_variables() - self.bnn.noisy_grads = [ - tf.random_normal(v.get_shape(), 0, self.bnn.noise_std_ph) - for v in tvars - ] - - # add noise to all params, then compute prediction, then subtract. - with tf.control_dependencies(self.bnn.noisy_grads): - self.bnn.noise_add_ops = [ - tvars[i].assign_add(n) for i, n in enumerate(self.bnn.noisy_grads) - ] - with tf.control_dependencies(self.bnn.noise_add_ops): - # we force the prediction for 'y' to be recomputed after adding noise - self.bnn.noisy_nn, self.bnn.noisy_pred_val = self.bnn.forward_pass() - - self.bnn.noisy_pred = tf.identity(self.bnn.noisy_pred_val) - with tf.control_dependencies([tf.identity(self.bnn.noisy_pred)]): - self.bnn.noise_sub_ops = [ - tvars[i].assign_add(-n) - for i, n in enumerate(self.bnn.noisy_grads) - ] - - def action(self, context): - """Selects action based on Thompson Sampling *after* adding noise.""" - - if self.t < self.hparams.num_actions * self.hparams.initial_pulls: - # round robin until each action has been taken "initial_pulls" times - return self.t % self.hparams.num_actions - - with self.bnn.graph.as_default(): - # run noise prediction op to choose action, and subtract noise op after. - c = context.reshape((1, self.hparams.context_dim)) - output, _ = self.bnn.sess.run( - [self.bnn.noisy_pred, self.bnn.noise_sub_ops], - feed_dict={self.bnn.x: c, - self.bnn.noise_std_ph: self.noise_std}) - return np.argmax(output) - - def update(self, context, action, reward): - """Updates the data buffer, and re-trains the BNN and noise level.""" - - self.t += 1 - self.data_h.add(context, action, reward) - - if self.t % self.freq_update == 0: - self.bnn.train(self.data_h, self.num_epochs) - self.update_noise() - - def update_noise(self): - """Increase noise if distance btw original and corrupted distrib small.""" - - kl = self.compute_distance() - delta = -np.log1p(- self.eps + self.eps / self.hparams.num_actions) - - if kl < delta: - self.noise_std *= 1.01 - else: - self.noise_std /= 1.01 - - self.eps *= 0.99 - - if self.verbose: - print('Update eps={} | kl={} | std={} | delta={} | increase={}.'.format( - self.eps, kl, self.noise_std, delta, kl < delta)) - - # store noise-injection statistics for inspection: std, KL, eps. - self.std_h.append(self.noise_std) - self.kl_h.append(kl) - self.eps_h.append(self.eps) - - def compute_distance(self): - """Computes empirical KL for original and corrupted output distributions.""" - - random_inputs, _ = self.data_h.get_batch(self.d_samples) - y_model = self.bnn.sess.run( - self.bnn.y_pred, - feed_dict={ - self.bnn.x: random_inputs, - self.bnn.noise_std_ph: self.noise_std - }) - y_noisy, _ = self.bnn.sess.run( - [self.bnn.noisy_pred, self.bnn.noise_sub_ops], - feed_dict={ - self.bnn.x: random_inputs, - self.bnn.noise_std_ph: self.noise_std - }) - - if self.verbose: - # display how often original & perturbed models propose different actions - s = np.sum([np.argmax(y_model[i, :]) == np.argmax(y_noisy[i, :]) - for i in range(y_model.shape[0])]) - print('{} | % of agreement btw original / corrupted actions: {}.'.format( - self.name, s / self.d_samples)) - - kl = self.compute_kl_with_logits(y_model, y_noisy) - return kl - - def compute_kl_with_logits(self, logits1, logits2): - """Computes KL from logits samples from two distributions.""" - - def exp_times_diff(a, b): - return np.multiply(np.exp(a), a - b) - - logsumexp1 = logsumexp(logits1, axis=1) - logsumexp2 = logsumexp(logits2, axis=1) - logsumexp_diff = logsumexp2 - logsumexp1 - - exp_diff = exp_times_diff(logits1, logits2) - exp_diff = np.sum(exp_diff, axis=1) - - inv_exp_sum = np.sum(np.exp(logits1), axis=1) - term1 = np.divide(exp_diff, inv_exp_sum) - - kl = term1 + logsumexp_diff - kl = np.maximum(kl, 0.0) - kl = np.nan_to_num(kl) - return np.mean(kl) diff --git a/research/deep_contextual_bandits/bandits/algorithms/posterior_bnn_sampling.py b/research/deep_contextual_bandits/bandits/algorithms/posterior_bnn_sampling.py deleted file mode 100644 index 0f0c5d365a3a3e48006fe6b4e7e47ab73ea756cf..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/posterior_bnn_sampling.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contextual bandit algorithm based on Thompson Sampling and a Bayesian NN.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from bandits.core.bandit_algorithm import BanditAlgorithm -from bandits.algorithms.bb_alpha_divergence_model import BBAlphaDivergence -from bandits.algorithms.bf_variational_neural_bandit_model import BfVariationalNeuralBanditModel -from bandits.core.contextual_dataset import ContextualDataset -from bandits.algorithms.multitask_gp import MultitaskGP -from bandits.algorithms.neural_bandit_model import NeuralBanditModel -from bandits.algorithms.variational_neural_bandit_model import VariationalNeuralBanditModel - - -class PosteriorBNNSampling(BanditAlgorithm): - """Posterior Sampling algorithm based on a Bayesian neural network.""" - - def __init__(self, name, hparams, bnn_model='RMSProp'): - """Creates a PosteriorBNNSampling object based on a specific optimizer. - - The algorithm has two basic tools: an Approx BNN and a Contextual Dataset. - The Bayesian Network keeps the posterior based on the optimizer iterations. - - Args: - name: Name of the algorithm. - hparams: Hyper-parameters of the algorithm. - bnn_model: Type of BNN. By default RMSProp (point estimate). - """ - - self.name = name - self.hparams = hparams - self.optimizer_n = hparams.optimizer - - self.training_freq = hparams.training_freq - self.training_epochs = hparams.training_epochs - self.t = 0 - self.data_h = ContextualDataset(hparams.context_dim, hparams.num_actions, - hparams.buffer_s) - - # to be extended with more BNNs (BB alpha-div, GPs, SGFS, constSGD...) - bnn_name = '{}-bnn'.format(name) - if bnn_model == 'Variational': - self.bnn = VariationalNeuralBanditModel(hparams, bnn_name) - elif bnn_model == 'AlphaDiv': - self.bnn = BBAlphaDivergence(hparams, bnn_name) - elif bnn_model == 'Variational_BF': - self.bnn = BfVariationalNeuralBanditModel(hparams, bnn_name) - elif bnn_model == 'GP': - self.bnn = MultitaskGP(hparams) - else: - self.bnn = NeuralBanditModel(self.optimizer_n, hparams, bnn_name) - - def action(self, context): - """Selects action for context based on Thompson Sampling using the BNN.""" - - if self.t < self.hparams.num_actions * self.hparams.initial_pulls: - # round robin until each action has been taken "initial_pulls" times - return self.t % self.hparams.num_actions - - with self.bnn.graph.as_default(): - c = context.reshape((1, self.hparams.context_dim)) - output = self.bnn.sess.run(self.bnn.y_pred, feed_dict={self.bnn.x: c}) - return np.argmax(output) - - def update(self, context, action, reward): - """Updates data buffer, and re-trains the BNN every training_freq steps.""" - - self.t += 1 - self.data_h.add(context, action, reward) - - if self.t % self.training_freq == 0: - if self.hparams.reset_lr: - self.bnn.assign_lr() - self.bnn.train(self.data_h, self.training_epochs) diff --git a/research/deep_contextual_bandits/bandits/algorithms/uniform_sampling.py b/research/deep_contextual_bandits/bandits/algorithms/uniform_sampling.py deleted file mode 100644 index 15c073fbe89da4e9aef595c8772ceaa3667e1952..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/uniform_sampling.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contextual bandit algorithm that selects an action uniformly at random.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from bandits.core.bandit_algorithm import BanditAlgorithm - - -class UniformSampling(BanditAlgorithm): - """Defines a baseline; returns one action uniformly at random.""" - - def __init__(self, name, hparams): - """Creates a UniformSampling object. - - Args: - name: Name of the algorithm. - hparams: Hyper-parameters, including the number of arms (num_actions). - """ - - self.name = name - self.hparams = hparams - - def action(self, context): - """Selects an action uniformly at random.""" - return np.random.choice(range(self.hparams.num_actions)) diff --git a/research/deep_contextual_bandits/bandits/algorithms/variational_neural_bandit_model.py b/research/deep_contextual_bandits/bandits/algorithms/variational_neural_bandit_model.py deleted file mode 100644 index 7700c08ba9f7861aac522ba6da9f7371b5e203af..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/algorithms/variational_neural_bandit_model.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Bayesian NN using factorized VI (Bayes By Backprop. Blundell et al. 2014). - -See https://arxiv.org/abs/1505.05424 for details. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from absl import flags -from bandits.core.bayesian_nn import BayesianNN - -FLAGS = flags.FLAGS - - -def log_gaussian(x, mu, sigma, reduce_sum=True): - """Returns log Gaussian pdf.""" - res = (-0.5 * np.log(2 * np.pi) - tf.log(sigma) - tf.square(x - mu) / - (2 * tf.square(sigma))) - if reduce_sum: - return tf.reduce_sum(res) - else: - return res - - -def analytic_kl(mu_1, sigma_1, mu_2, sigma_2): - """KL for two Gaussian distributions with diagonal covariance matrix.""" - sigma_1_sq = tf.square(sigma_1) - sigma_2_sq = tf.square(sigma_2) - - t1 = tf.square(mu_1 - mu_2) / (2. * sigma_2_sq) - t2 = (sigma_1_sq/sigma_2_sq - 1. - tf.log(sigma_1_sq) + tf.log(sigma_2_sq))/2. - return tf.reduce_sum(t1 + t2) - - -class VariationalNeuralBanditModel(BayesianNN): - """Implements an approximate Bayesian NN using Variational Inference.""" - - def __init__(self, hparams, name="BBBNN"): - - self.name = name - self.hparams = hparams - - self.n_in = self.hparams.context_dim - self.n_out = self.hparams.num_actions - self.layers = self.hparams.layer_sizes - self.init_scale = self.hparams.init_scale - self.f_num_points = None - if "f_num_points" in hparams: - self.f_num_points = self.hparams.f_num_points - - self.cleared_times_trained = self.hparams.cleared_times_trained - self.initial_training_steps = self.hparams.initial_training_steps - self.training_schedule = np.linspace(self.initial_training_steps, - self.hparams.training_epochs, - self.cleared_times_trained) - self.verbose = getattr(self.hparams, "verbose", True) - - self.weights_m = {} - self.weights_std = {} - self.biases_m = {} - self.biases_std = {} - - self.times_trained = 0 - - if self.hparams.use_sigma_exp_transform: - self.sigma_transform = tf.exp - self.inverse_sigma_transform = np.log - else: - self.sigma_transform = tf.nn.softplus - self.inverse_sigma_transform = lambda y: y + np.log(1. - np.exp(-y)) - - # Whether to use the local reparameterization trick to compute the loss. - # See details in https://arxiv.org/abs/1506.02557 - self.use_local_reparameterization = True - - self.build_graph() - - def build_mu_variable(self, shape): - """Returns a mean variable initialized as N(0, 0.05).""" - return tf.Variable(tf.random_normal(shape, 0.0, 0.05)) - - def build_sigma_variable(self, shape, init=-5.): - """Returns a sigma variable initialized as N(init, 0.05).""" - # Initialize sigma to be very small initially to encourage MAP opt first - return tf.Variable(tf.random_normal(shape, init, 0.05)) - - def build_layer(self, input_x, input_x_local, shape, - layer_id, activation_fn=tf.nn.relu): - """Builds a variational layer, and computes KL term. - - Args: - input_x: Input to the variational layer. - input_x_local: Input when the local reparameterization trick was applied. - shape: [number_inputs, number_outputs] for the layer. - layer_id: Number of layer in the architecture. - activation_fn: Activation function to apply. - - Returns: - output_h: Output of the variational layer. - output_h_local: Output when local reparameterization trick was applied. - neg_kl: Negative KL term for the layer. - """ - - w_mu = self.build_mu_variable(shape) - w_sigma = self.sigma_transform(self.build_sigma_variable(shape)) - w_noise = tf.random_normal(shape) - w = w_mu + w_sigma * w_noise - - b_mu = self.build_mu_variable([1, shape[1]]) - b_sigma = self.sigma_transform(self.build_sigma_variable([1, shape[1]])) - b = b_mu - - # Store means and stds - self.weights_m[layer_id] = w_mu - self.weights_std[layer_id] = w_sigma - self.biases_m[layer_id] = b_mu - self.biases_std[layer_id] = b_sigma - - # Create outputs - output_h = activation_fn(tf.matmul(input_x, w) + b) - - if self.use_local_reparameterization: - # Use analytic KL divergence wrt the prior - neg_kl = -analytic_kl(w_mu, w_sigma, - 0., tf.to_float(np.sqrt(2./shape[0]))) - else: - # Create empirical KL loss terms - log_p = log_gaussian(w, 0., tf.to_float(np.sqrt(2./shape[0]))) - log_q = log_gaussian(w, tf.stop_gradient(w_mu), tf.stop_gradient(w_sigma)) - neg_kl = log_p - log_q - - # Apply local reparameterization trick: sample activations pre nonlinearity - m_h = tf.matmul(input_x_local, w_mu) + b - v_h = tf.matmul(tf.square(input_x_local), tf.square(w_sigma)) - output_h_local = m_h + tf.sqrt(v_h + 1e-6) * tf.random_normal(tf.shape(v_h)) - output_h_local = activation_fn(output_h_local) - - return output_h, output_h_local, neg_kl - - def build_action_noise(self): - """Defines a model for additive noise per action, and its KL term.""" - - # Define mean and std variables (log-normal dist) for each action. - noise_sigma_mu = (self.build_mu_variable([1, self.n_out]) - + self.inverse_sigma_transform(self.hparams.noise_sigma)) - noise_sigma_sigma = self.sigma_transform( - self.build_sigma_variable([1, self.n_out])) - - pre_noise_sigma = (noise_sigma_mu - + tf.random_normal([1, self.n_out]) * noise_sigma_sigma) - self.noise_sigma = self.sigma_transform(pre_noise_sigma) - - # Compute KL for additive noise sigma terms. - if getattr(self.hparams, "infer_noise_sigma", False): - neg_kl_term = log_gaussian( - pre_noise_sigma, - self.inverse_sigma_transform(self.hparams.noise_sigma), - self.hparams.prior_sigma - ) - neg_kl_term -= log_gaussian(pre_noise_sigma, - noise_sigma_mu, - noise_sigma_sigma) - else: - neg_kl_term = 0. - - return neg_kl_term - - def build_model(self, activation_fn=tf.nn.relu): - """Defines the actual NN model with fully connected layers. - - The loss is computed for partial feedback settings (bandits), so only - the observed outcome is backpropagated (see weighted loss). - Selects the optimizer and, finally, it also initializes the graph. - - Args: - activation_fn: the activation function used in the nn layers. - """ - - if self.verbose: - print("Initializing model {}.".format(self.name)) - neg_kl_term, l_number = 0, 0 - use_local_reparameterization = self.use_local_reparameterization - - # Compute model additive noise for each action with log-normal distribution - neg_kl_term += self.build_action_noise() - - # Build network. - input_x = self.x - input_local = self.x - n_in = self.n_in - - for l_number, n_nodes in enumerate(self.layers): - if n_nodes > 0: - h, h_local, neg_kl = self.build_layer(input_x, input_local, - [n_in, n_nodes], l_number) - - neg_kl_term += neg_kl - input_x, input_local = h, h_local - n_in = n_nodes - - # Create last linear layer - h, h_local, neg_kl = self.build_layer(input_x, input_local, - [n_in, self.n_out], - l_number + 1, - activation_fn=lambda x: x) - neg_kl_term += neg_kl - - self.y_pred = h - self.y_pred_local = h_local - - # Compute log likelihood (with learned or fixed noise level) - if getattr(self.hparams, "infer_noise_sigma", False): - log_likelihood = log_gaussian( - self.y, self.y_pred_local, self.noise_sigma, reduce_sum=False) - else: - y_hat = self.y_pred_local if use_local_reparameterization else self.y_pred - log_likelihood = log_gaussian( - self.y, y_hat, self.hparams.noise_sigma, reduce_sum=False) - - # Only take into account observed outcomes (bandits setting) - batch_size = tf.to_float(tf.shape(self.x)[0]) - weighted_log_likelihood = tf.reduce_sum( - log_likelihood * self.weights) / batch_size - - # The objective is 1/n * (\sum_i log_like_i - KL); neg_kl_term estimates -KL - elbo = weighted_log_likelihood + (neg_kl_term / self.n) - - self.loss = -elbo - self.global_step = tf.train.get_or_create_global_step() - self.train_op = tf.train.AdamOptimizer(self.hparams.initial_lr).minimize( - self.loss, global_step=self.global_step) - - # Create tensorboard metrics - self.create_summaries() - self.summary_writer = tf.summary.FileWriter( - "{}/graph_{}".format(FLAGS.logdir, self.name), self.sess.graph) - - def build_graph(self): - """Defines graph, session, placeholders, and model. - - Placeholders are: n (size of the dataset), x and y (context and observed - reward for each action), and weights (one-hot encoding of selected action - for each context, i.e., only possibly non-zero element in each y). - """ - - self.graph = tf.Graph() - with self.graph.as_default(): - - self.sess = tf.Session() - - self.n = tf.placeholder(shape=[], dtype=tf.float32) - - self.x = tf.placeholder(shape=[None, self.n_in], dtype=tf.float32) - self.y = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32) - self.weights = tf.placeholder(shape=[None, self.n_out], dtype=tf.float32) - - self.build_model() - self.sess.run(tf.global_variables_initializer()) - - def create_summaries(self): - """Defines summaries including mean loss, and global step.""" - - with self.graph.as_default(): - with tf.name_scope(self.name + "_summaries"): - tf.summary.scalar("loss", self.loss) - tf.summary.scalar("global_step", self.global_step) - self.summary_op = tf.summary.merge_all() - - def assign_lr(self): - """Resets the learning rate in dynamic schedules for subsequent trainings. - - In bandits settings, we do expand our dataset over time. Then, we need to - re-train the network with the new data. The algorithms that do not keep - the step constant, can reset it at the start of each *training* process. - """ - - decay_steps = 1 - if self.hparams.activate_decay: - current_gs = self.sess.run(self.global_step) - with self.graph.as_default(): - self.lr = tf.train.inverse_time_decay(self.hparams.initial_lr, - self.global_step - current_gs, - decay_steps, - self.hparams.lr_decay_rate) - - def train(self, data, num_steps): - """Trains the BNN for num_steps, using the data in 'data'. - - Args: - data: ContextualDataset object that provides the data. - num_steps: Number of minibatches to train the network for. - - Returns: - losses: Loss history during training. - """ - - if self.times_trained < self.cleared_times_trained: - num_steps = int(self.training_schedule[self.times_trained]) - self.times_trained += 1 - - losses = [] - - with self.graph.as_default(): - - if self.verbose: - print("Training {} for {} steps...".format(self.name, num_steps)) - - for step in range(num_steps): - x, y, weights = data.get_batch_with_weights(self.hparams.batch_size) - _, summary, global_step, loss = self.sess.run( - [self.train_op, self.summary_op, self.global_step, self.loss], - feed_dict={ - self.x: x, - self.y: y, - self.weights: weights, - self.n: data.num_points(self.f_num_points), - }) - - losses.append(loss) - - if step % self.hparams.freq_summary == 0: - if self.hparams.show_training: - print("{} | step: {}, loss: {}".format( - self.name, global_step, loss)) - self.summary_writer.add_summary(summary, global_step) - - return losses diff --git a/research/deep_contextual_bandits/bandits/core/bandit_algorithm.py b/research/deep_contextual_bandits/bandits/core/bandit_algorithm.py deleted file mode 100644 index cae4e1676a865d538fa41936feb9118283b92a2c..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/core/bandit_algorithm.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Define the abstract class for contextual bandit algorithms.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -class BanditAlgorithm(object): - """A bandit algorithm must be able to do two basic operations. - - 1. Choose an action given a context. - 2. Update its internal model given a triple (context, played action, reward). - """ - - def action(self, context): - pass - - def update(self, context, action, reward): - pass diff --git a/research/deep_contextual_bandits/bandits/core/bayesian_nn.py b/research/deep_contextual_bandits/bandits/core/bayesian_nn.py deleted file mode 100644 index 310961591317f8c9ff958a5178e81e0422385baf..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/core/bayesian_nn.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Define the abstract class for Bayesian Neural Networks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -class BayesianNN(object): - """A Bayesian neural network keeps a distribution over neural nets.""" - - def __init__(self, optimizer): - pass - - def build_model(self): - pass - - def train(self, data): - pass - - def sample(self, steps): - pass diff --git a/research/deep_contextual_bandits/bandits/core/contextual_bandit.py b/research/deep_contextual_bandits/bandits/core/contextual_bandit.py deleted file mode 100644 index 98467378953b9f3e38057be8a0068fdbc7b59a84..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/core/contextual_bandit.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Define a contextual bandit from which we can sample and compute rewards. - -We can feed the data, sample a context, its reward for a specific action, and -also the optimal action for a given context. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - - -def run_contextual_bandit(context_dim, num_actions, dataset, algos): - """Run a contextual bandit problem on a set of algorithms. - - Args: - context_dim: Dimension of the context. - num_actions: Number of available actions. - dataset: Matrix where every row is a context + num_actions rewards. - algos: List of algorithms to use in the contextual bandit instance. - - Returns: - h_actions: Matrix with actions: size (num_context, num_algorithms). - h_rewards: Matrix with rewards: size (num_context, num_algorithms). - """ - - num_contexts = dataset.shape[0] - - # Create contextual bandit - cmab = ContextualBandit(context_dim, num_actions) - cmab.feed_data(dataset) - - h_actions = np.empty((0, len(algos)), float) - h_rewards = np.empty((0, len(algos)), float) - - # Run the contextual bandit process - for i in range(num_contexts): - context = cmab.context(i) - actions = [a.action(context) for a in algos] - rewards = [cmab.reward(i, action) for action in actions] - - for j, a in enumerate(algos): - a.update(context, actions[j], rewards[j]) - - h_actions = np.vstack((h_actions, np.array(actions))) - h_rewards = np.vstack((h_rewards, np.array(rewards))) - - return h_actions, h_rewards - - -class ContextualBandit(object): - """Implements a Contextual Bandit with d-dimensional contexts and k arms.""" - - def __init__(self, context_dim, num_actions): - """Creates a contextual bandit object. - - Args: - context_dim: Dimension of the contexts. - num_actions: Number of arms for the multi-armed bandit. - """ - - self._context_dim = context_dim - self._num_actions = num_actions - - def feed_data(self, data): - """Feeds the data (contexts + rewards) to the bandit object. - - Args: - data: Numpy array with shape [n, d+k], where n is the number of contexts, - d is the dimension of each context, and k the number of arms (rewards). - - Raises: - ValueError: when data dimensions do not correspond to the object values. - """ - - if data.shape[1] != self.context_dim + self.num_actions: - raise ValueError('Data dimensions do not match.') - - self._number_contexts = data.shape[0] - self.data = data - self.order = range(self.number_contexts) - - def reset(self): - """Randomly shuffle the order of the contexts to deliver.""" - self.order = np.random.permutation(self.number_contexts) - - def context(self, number): - """Returns the number-th context.""" - return self.data[self.order[number]][:self.context_dim] - - def reward(self, number, action): - """Returns the reward for the number-th context and action.""" - return self.data[self.order[number]][self.context_dim + action] - - def optimal(self, number): - """Returns the optimal action (in hindsight) for the number-th context.""" - return np.argmax(self.data[self.order[number]][self.context_dim:]) - - @property - def context_dim(self): - return self._context_dim - - @property - def num_actions(self): - return self._num_actions - - @property - def number_contexts(self): - return self._number_contexts diff --git a/research/deep_contextual_bandits/bandits/core/contextual_dataset.py b/research/deep_contextual_bandits/bandits/core/contextual_dataset.py deleted file mode 100644 index 9fae7629c7c2ee39ab6b98ddac73876b5fca421a..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/core/contextual_dataset.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Define a data buffer for contextual bandit algorithms.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - - -class ContextualDataset(object): - """The buffer is able to append new data, and sample random minibatches.""" - - def __init__(self, context_dim, num_actions, buffer_s=-1, intercept=False): - """Creates a ContextualDataset object. - - The data is stored in attributes: contexts and rewards. - The sequence of taken actions are stored in attribute actions. - - Args: - context_dim: Dimension of the contexts. - num_actions: Number of arms for the multi-armed bandit. - buffer_s: Size of buffer for training. Only last buffer_s will be - returned as minibatch. If buffer_s = -1, all data will be used. - intercept: If True, it adds a constant (1.0) dimension to each context X, - at the end. - """ - - self._context_dim = context_dim - self._num_actions = num_actions - self._contexts = None - self._rewards = None - self.actions = [] - self.buffer_s = buffer_s - self.intercept = intercept - - def add(self, context, action, reward): - """Adds a new triplet (context, action, reward) to the dataset. - - The reward for the actions that weren't played is assumed to be zero. - - Args: - context: A d-dimensional vector with the context. - action: Integer between 0 and k-1 representing the chosen arm. - reward: Real number representing the reward for the (context, action). - """ - - if self.intercept: - c = np.array(context[:]) - c = np.append(c, 1.0).reshape((1, self.context_dim + 1)) - else: - c = np.array(context[:]).reshape((1, self.context_dim)) - - if self.contexts is None: - self.contexts = c - else: - self.contexts = np.vstack((self.contexts, c)) - - r = np.zeros((1, self.num_actions)) - r[0, action] = reward - if self.rewards is None: - self.rewards = r - else: - self.rewards = np.vstack((self.rewards, r)) - - self.actions.append(action) - - def replace_data(self, contexts=None, actions=None, rewards=None): - if contexts is not None: - self.contexts = contexts - if actions is not None: - self.actions = actions - if rewards is not None: - self.rewards = rewards - - def get_batch(self, batch_size): - """Returns a random minibatch of (contexts, rewards) with batch_size.""" - n, _ = self.contexts.shape - if self.buffer_s == -1: - # use all the data - ind = np.random.choice(range(n), batch_size) - else: - # use only buffer (last buffer_s observations) - ind = np.random.choice(range(max(0, n - self.buffer_s), n), batch_size) - return self.contexts[ind, :], self.rewards[ind, :] - - def get_data(self, action): - """Returns all (context, reward) where the action was played.""" - n, _ = self.contexts.shape - ind = np.array([i for i in range(n) if self.actions[i] == action]) - return self.contexts[ind, :], self.rewards[ind, action] - - def get_data_with_weights(self): - """Returns all observations with one-hot weights for actions.""" - weights = np.zeros((self.contexts.shape[0], self.num_actions)) - a_ind = np.array([(i, val) for i, val in enumerate(self.actions)]) - weights[a_ind[:, 0], a_ind[:, 1]] = 1.0 - return self.contexts, self.rewards, weights - - def get_batch_with_weights(self, batch_size): - """Returns a random mini-batch with one-hot weights for actions.""" - n, _ = self.contexts.shape - if self.buffer_s == -1: - # use all the data - ind = np.random.choice(range(n), batch_size) - else: - # use only buffer (last buffer_s obs) - ind = np.random.choice(range(max(0, n - self.buffer_s), n), batch_size) - - weights = np.zeros((batch_size, self.num_actions)) - sampled_actions = np.array(self.actions)[ind] - a_ind = np.array([(i, val) for i, val in enumerate(sampled_actions)]) - weights[a_ind[:, 0], a_ind[:, 1]] = 1.0 - return self.contexts[ind, :], self.rewards[ind, :], weights - - def num_points(self, f=None): - """Returns number of points in the buffer (after applying function f).""" - if f is not None: - return f(self.contexts.shape[0]) - return self.contexts.shape[0] - - @property - def context_dim(self): - return self._context_dim - - @property - def num_actions(self): - return self._num_actions - - @property - def contexts(self): - return self._contexts - - @contexts.setter - def contexts(self, value): - self._contexts = value - - @property - def actions(self): - return self._actions - - @actions.setter - def actions(self, value): - self._actions = value - - @property - def rewards(self): - return self._rewards - - @rewards.setter - def rewards(self, value): - self._rewards = value diff --git a/research/deep_contextual_bandits/bandits/data/data_sampler.py b/research/deep_contextual_bandits/bandits/data/data_sampler.py deleted file mode 100644 index 55d1bae383637485182a9524ba8a3cb37b76bd0d..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/data/data_sampler.py +++ /dev/null @@ -1,374 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to create bandit problems from datasets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import pandas as pd -import tensorflow as tf - - -def one_hot(df, cols): - """Returns one-hot encoding of DataFrame df including columns in cols.""" - for col in cols: - dummies = pd.get_dummies(df[col], prefix=col, drop_first=False) - df = pd.concat([df, dummies], axis=1) - df = df.drop(col, axis=1) - return df - - -def sample_mushroom_data(file_name, - num_contexts, - r_noeat=0, - r_eat_safe=5, - r_eat_poison_bad=-35, - r_eat_poison_good=5, - prob_poison_bad=0.5): - """Samples bandit game from Mushroom UCI Dataset. - - Args: - file_name: Route of file containing the original Mushroom UCI dataset. - num_contexts: Number of points to sample, i.e. (context, action rewards). - r_noeat: Reward for not eating a mushroom. - r_eat_safe: Reward for eating a non-poisonous mushroom. - r_eat_poison_bad: Reward for eating a poisonous mushroom if harmed. - r_eat_poison_good: Reward for eating a poisonous mushroom if not harmed. - prob_poison_bad: Probability of being harmed by eating a poisonous mushroom. - - Returns: - dataset: Sampled matrix with n rows: (context, eat_reward, no_eat_reward). - opt_vals: Vector of expected optimal (reward, action) for each context. - - We assume r_eat_safe > r_noeat, and r_eat_poison_good > r_eat_poison_bad. - """ - - # first two cols of df encode whether mushroom is edible or poisonous - df = pd.read_csv(file_name, header=None) - df = one_hot(df, df.columns) - ind = np.random.choice(range(df.shape[0]), num_contexts, replace=True) - - contexts = df.iloc[ind, 2:] - no_eat_reward = r_noeat * np.ones((num_contexts, 1)) - random_poison = np.random.choice( - [r_eat_poison_bad, r_eat_poison_good], - p=[prob_poison_bad, 1 - prob_poison_bad], - size=num_contexts) - eat_reward = r_eat_safe * df.iloc[ind, 0] - eat_reward += np.multiply(random_poison, df.iloc[ind, 1]) - eat_reward = eat_reward.values.reshape((num_contexts, 1)) - - # compute optimal expected reward and optimal actions - exp_eat_poison_reward = r_eat_poison_bad * prob_poison_bad - exp_eat_poison_reward += r_eat_poison_good * (1 - prob_poison_bad) - opt_exp_reward = r_eat_safe * df.iloc[ind, 0] + max( - r_noeat, exp_eat_poison_reward) * df.iloc[ind, 1] - - if r_noeat > exp_eat_poison_reward: - # actions: no eat = 0 ; eat = 1 - opt_actions = df.iloc[ind, 0] # indicator of edible - else: - # should always eat (higher expected reward) - opt_actions = np.ones((num_contexts, 1)) - - opt_vals = (opt_exp_reward.values, opt_actions.values) - - return np.hstack((contexts, no_eat_reward, eat_reward)), opt_vals - - -def sample_stock_data(file_name, context_dim, num_actions, num_contexts, - sigma, shuffle_rows=True): - """Samples linear bandit game from stock prices dataset. - - Args: - file_name: Route of file containing the stock prices dataset. - context_dim: Context dimension (i.e. vector with the price of each stock). - num_actions: Number of actions (different linear portfolio strategies). - num_contexts: Number of contexts to sample. - sigma: Vector with additive noise levels for each action. - shuffle_rows: If True, rows from original dataset are shuffled. - - Returns: - dataset: Sampled matrix with rows: (context, reward_1, ..., reward_k). - opt_vals: Vector of expected optimal (reward, action) for each context. - """ - - with tf.gfile.Open(file_name, 'r') as f: - contexts = np.loadtxt(f, skiprows=1) - - if shuffle_rows: - np.random.shuffle(contexts) - contexts = contexts[:num_contexts, :] - - betas = np.random.uniform(-1, 1, (context_dim, num_actions)) - betas /= np.linalg.norm(betas, axis=0) - - mean_rewards = np.dot(contexts, betas) - noise = np.random.normal(scale=sigma, size=mean_rewards.shape) - rewards = mean_rewards + noise - - opt_actions = np.argmax(mean_rewards, axis=1) - opt_rewards = [mean_rewards[i, a] for i, a in enumerate(opt_actions)] - return np.hstack((contexts, rewards)), (np.array(opt_rewards), opt_actions) - - -def sample_jester_data(file_name, context_dim, num_actions, num_contexts, - shuffle_rows=True, shuffle_cols=False): - """Samples bandit game from (user, joke) dense subset of Jester dataset. - - Args: - file_name: Route of file containing the modified Jester dataset. - context_dim: Context dimension (i.e. vector with some ratings from a user). - num_actions: Number of actions (number of joke ratings to predict). - num_contexts: Number of contexts to sample. - shuffle_rows: If True, rows from original dataset are shuffled. - shuffle_cols: Whether or not context/action jokes are randomly shuffled. - - Returns: - dataset: Sampled matrix with rows: (context, rating_1, ..., rating_k). - opt_vals: Vector of deterministic optimal (reward, action) for each context. - """ - - with tf.gfile.Open(file_name, 'rb') as f: - dataset = np.load(f) - - if shuffle_cols: - dataset = dataset[:, np.random.permutation(dataset.shape[1])] - if shuffle_rows: - np.random.shuffle(dataset) - dataset = dataset[:num_contexts, :] - - assert context_dim + num_actions == dataset.shape[1], 'Wrong data dimensions.' - - opt_actions = np.argmax(dataset[:, context_dim:], axis=1) - opt_rewards = np.array([dataset[i, context_dim + a] - for i, a in enumerate(opt_actions)]) - - return dataset, (opt_rewards, opt_actions) - - -def sample_statlog_data(file_name, num_contexts, shuffle_rows=True, - remove_underrepresented=False): - """Returns bandit problem dataset based on the UCI statlog data. - - Args: - file_name: Route of file containing the Statlog dataset. - num_contexts: Number of contexts to sample. - shuffle_rows: If True, rows from original dataset are shuffled. - remove_underrepresented: If True, removes arms with very few rewards. - - Returns: - dataset: Sampled matrix with rows: (context, action rewards). - opt_vals: Vector of deterministic optimal (reward, action) for each context. - - https://archive.ics.uci.edu/ml/datasets/Statlog+(Shuttle) - """ - - with tf.gfile.Open(file_name, 'r') as f: - data = np.loadtxt(f) - - num_actions = 7 # some of the actions are very rarely optimal. - - # Shuffle data - if shuffle_rows: - np.random.shuffle(data) - data = data[:num_contexts, :] - - # Last column is label, rest are features - contexts = data[:, :-1] - labels = data[:, -1].astype(int) - 1 # convert to 0 based index - - if remove_underrepresented: - contexts, labels = remove_underrepresented_classes(contexts, labels) - - return classification_to_bandit_problem(contexts, labels, num_actions) - - -def sample_adult_data(file_name, num_contexts, shuffle_rows=True, - remove_underrepresented=False): - """Returns bandit problem dataset based on the UCI adult data. - - Args: - file_name: Route of file containing the Adult dataset. - num_contexts: Number of contexts to sample. - shuffle_rows: If True, rows from original dataset are shuffled. - remove_underrepresented: If True, removes arms with very few rewards. - - Returns: - dataset: Sampled matrix with rows: (context, action rewards). - opt_vals: Vector of deterministic optimal (reward, action) for each context. - - Preprocessing: - * drop rows with missing values - * convert categorical variables to 1 hot encoding - - https://archive.ics.uci.edu/ml/datasets/census+income - """ - with tf.gfile.Open(file_name, 'r') as f: - df = pd.read_csv(f, header=None, - na_values=[' ?']).dropna() - - num_actions = 14 - - if shuffle_rows: - df = df.sample(frac=1) - df = df.iloc[:num_contexts, :] - - labels = df[6].astype('category').cat.codes.as_matrix() - df = df.drop([6], axis=1) - - # Convert categorical variables to 1 hot encoding - cols_to_transform = [1, 3, 5, 7, 8, 9, 13, 14] - df = pd.get_dummies(df, columns=cols_to_transform) - - if remove_underrepresented: - df, labels = remove_underrepresented_classes(df, labels) - contexts = df.as_matrix() - - return classification_to_bandit_problem(contexts, labels, num_actions) - - -def sample_census_data(file_name, num_contexts, shuffle_rows=True, - remove_underrepresented=False): - """Returns bandit problem dataset based on the UCI census data. - - Args: - file_name: Route of file containing the Census dataset. - num_contexts: Number of contexts to sample. - shuffle_rows: If True, rows from original dataset are shuffled. - remove_underrepresented: If True, removes arms with very few rewards. - - Returns: - dataset: Sampled matrix with rows: (context, action rewards). - opt_vals: Vector of deterministic optimal (reward, action) for each context. - - Preprocessing: - * drop rows with missing labels - * convert categorical variables to 1 hot encoding - - Note: this is the processed (not the 'raw') dataset. It contains a subset - of the raw features and they've all been discretized. - - https://archive.ics.uci.edu/ml/datasets/US+Census+Data+%281990%29 - """ - # Note: this dataset is quite large. It will be slow to load and preprocess. - with tf.gfile.Open(file_name, 'r') as f: - df = (pd.read_csv(f, header=0, na_values=['?']) - .dropna()) - - num_actions = 9 - - if shuffle_rows: - df = df.sample(frac=1) - df = df.iloc[:num_contexts, :] - - # Assuming what the paper calls response variable is the label? - labels = df['dOccup'].astype('category').cat.codes.as_matrix() - # In addition to label, also drop the (unique?) key. - df = df.drop(['dOccup', 'caseid'], axis=1) - - # All columns are categorical. Convert to 1 hot encoding. - df = pd.get_dummies(df, columns=df.columns) - - if remove_underrepresented: - df, labels = remove_underrepresented_classes(df, labels) - contexts = df.as_matrix() - - return classification_to_bandit_problem(contexts, labels, num_actions) - - -def sample_covertype_data(file_name, num_contexts, shuffle_rows=True, - remove_underrepresented=False): - """Returns bandit problem dataset based on the UCI Cover_Type data. - - Args: - file_name: Route of file containing the Covertype dataset. - num_contexts: Number of contexts to sample. - shuffle_rows: If True, rows from original dataset are shuffled. - remove_underrepresented: If True, removes arms with very few rewards. - - Returns: - dataset: Sampled matrix with rows: (context, action rewards). - opt_vals: Vector of deterministic optimal (reward, action) for each context. - - Preprocessing: - * drop rows with missing labels - * convert categorical variables to 1 hot encoding - - https://archive.ics.uci.edu/ml/datasets/Covertype - """ - with tf.gfile.Open(file_name, 'r') as f: - df = (pd.read_csv(f, header=0, na_values=['?']) - .dropna()) - - num_actions = 7 - - if shuffle_rows: - df = df.sample(frac=1) - df = df.iloc[:num_contexts, :] - - # Assuming what the paper calls response variable is the label? - # Last column is label. - labels = df[df.columns[-1]].astype('category').cat.codes.as_matrix() - df = df.drop([df.columns[-1]], axis=1) - - # All columns are either quantitative or already converted to 1 hot. - if remove_underrepresented: - df, labels = remove_underrepresented_classes(df, labels) - contexts = df.as_matrix() - - return classification_to_bandit_problem(contexts, labels, num_actions) - - -def classification_to_bandit_problem(contexts, labels, num_actions=None): - """Normalize contexts and encode deterministic rewards.""" - - if num_actions is None: - num_actions = np.max(labels) + 1 - num_contexts = contexts.shape[0] - - # Due to random subsampling in small problems, some features may be constant - sstd = safe_std(np.std(contexts, axis=0, keepdims=True)[0, :]) - - # Normalize features - contexts = ((contexts - np.mean(contexts, axis=0, keepdims=True)) / sstd) - - # One hot encode labels as rewards - rewards = np.zeros((num_contexts, num_actions)) - rewards[np.arange(num_contexts), labels] = 1.0 - - return contexts, rewards, (np.ones(num_contexts), labels) - - -def safe_std(values): - """Remove zero std values for ones.""" - return np.array([val if val != 0.0 else 1.0 for val in values]) - - -def remove_underrepresented_classes(features, labels, thresh=0.0005): - """Removes classes when number of datapoints fraction is below a threshold.""" - - # Threshold doesn't seem to agree with https://arxiv.org/pdf/1706.04687.pdf - # Example: for Covertype, they report 4 classes after filtering, we get 7? - total_count = labels.shape[0] - unique, counts = np.unique(labels, return_counts=True) - ratios = counts.astype('float') / total_count - vals_and_ratios = dict(zip(unique, ratios)) - print('Unique classes and their ratio of total: %s' % vals_and_ratios) - keep = [vals_and_ratios[v] >= thresh for v in labels] - return features[keep], labels[np.array(keep)] diff --git a/research/deep_contextual_bandits/bandits/data/synthetic_data_sampler.py b/research/deep_contextual_bandits/bandits/data/synthetic_data_sampler.py deleted file mode 100644 index c7de48aba4de109392aa8efad06071886cf67964..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/bandits/data/synthetic_data_sampler.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Several functions to sample contextual data.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - - -def sample_contextual_data(num_contexts, dim_context, num_actions, sigma): - """Samples independent Gaussian data. - - There is nothing to learn here as the rewards do not depend on the context. - - Args: - num_contexts: Number of contexts to sample. - dim_context: Dimension of the contexts. - num_actions: Number of arms for the multi-armed bandit. - sigma: Standard deviation of the independent Gaussian samples. - - Returns: - data: A [num_contexts, dim_context + num_actions] numpy array with the data. - """ - size_data = [num_contexts, dim_context + num_actions] - return np.random.normal(scale=sigma, size=size_data) - - -def sample_linear_data(num_contexts, dim_context, num_actions, sigma=0.0): - """Samples data from linearly parameterized arms. - - The reward for context X and arm j is given by X^T beta_j, for some latent - set of parameters {beta_j : j = 1, ..., k}. The beta's are sampled uniformly - at random, the contexts are Gaussian, and sigma-noise is added to the rewards. - - Args: - num_contexts: Number of contexts to sample. - dim_context: Dimension of the contexts. - num_actions: Number of arms for the multi-armed bandit. - sigma: Standard deviation of the additive noise. Set to zero for no noise. - - Returns: - data: A [n, d+k] numpy array with the data. - betas: Latent parameters that determine expected reward for each arm. - opt: (optimal_rewards, optimal_actions) for all contexts. - """ - - betas = np.random.uniform(-1, 1, (dim_context, num_actions)) - betas /= np.linalg.norm(betas, axis=0) - contexts = np.random.normal(size=[num_contexts, dim_context]) - rewards = np.dot(contexts, betas) - opt_actions = np.argmax(rewards, axis=1) - rewards += np.random.normal(scale=sigma, size=rewards.shape) - opt_rewards = np.array([rewards[i, act] for i, act in enumerate(opt_actions)]) - return np.hstack((contexts, rewards)), betas, (opt_rewards, opt_actions) - - -def sample_sparse_linear_data(num_contexts, dim_context, num_actions, - sparse_dim, sigma=0.0): - """Samples data from sparse linearly parameterized arms. - - The reward for context X and arm j is given by X^T beta_j, for some latent - set of parameters {beta_j : j = 1, ..., k}. The beta's are sampled uniformly - at random, the contexts are Gaussian, and sigma-noise is added to the rewards. - Only s components out of d are non-zero for each arm's beta. - - Args: - num_contexts: Number of contexts to sample. - dim_context: Dimension of the contexts. - num_actions: Number of arms for the multi-armed bandit. - sparse_dim: Dimension of the latent subspace (sparsity pattern dimension). - sigma: Standard deviation of the additive noise. Set to zero for no noise. - - Returns: - data: A [num_contexts, dim_context+num_actions] numpy array with the data. - betas: Latent parameters that determine expected reward for each arm. - opt: (optimal_rewards, optimal_actions) for all contexts. - """ - - flatten = lambda l: [item for sublist in l for item in sublist] - sparse_pattern = flatten( - [[(j, i) for j in np.random.choice(range(dim_context), - sparse_dim, - replace=False)] - for i in range(num_actions)]) - betas = np.random.uniform(-1, 1, (dim_context, num_actions)) - mask = np.zeros((dim_context, num_actions)) - for elt in sparse_pattern: - mask[elt] = 1 - betas = np.multiply(betas, mask) - betas /= np.linalg.norm(betas, axis=0) - contexts = np.random.normal(size=[num_contexts, dim_context]) - rewards = np.dot(contexts, betas) - opt_actions = np.argmax(rewards, axis=1) - rewards += np.random.normal(scale=sigma, size=rewards.shape) - opt_rewards = np.array([rewards[i, act] for i, act in enumerate(opt_actions)]) - return np.hstack((contexts, rewards)), betas, (opt_rewards, opt_actions) - - -def sample_wheel_bandit_data(num_contexts, delta, mean_v, std_v, - mu_large, std_large): - """Samples from Wheel bandit game (see https://arxiv.org/abs/1802.09127). - - Args: - num_contexts: Number of points to sample, i.e. (context, action rewards). - delta: Exploration parameter: high reward in one region if norm above delta. - mean_v: Mean reward for each action if context norm is below delta. - std_v: Gaussian reward std for each action if context norm is below delta. - mu_large: Mean reward for optimal action if context norm is above delta. - std_large: Reward std for optimal action if context norm is above delta. - - Returns: - dataset: Sampled matrix with n rows: (context, action rewards). - opt_vals: Vector of expected optimal (reward, action) for each context. - """ - - context_dim = 2 - num_actions = 5 - - data = [] - rewards = [] - opt_actions = [] - opt_rewards = [] - - # sample uniform contexts in unit ball - while len(data) < num_contexts: - raw_data = np.random.uniform(-1, 1, (int(num_contexts / 3), context_dim)) - - for i in range(raw_data.shape[0]): - if np.linalg.norm(raw_data[i, :]) <= 1: - data.append(raw_data[i, :]) - - contexts = np.stack(data)[:num_contexts, :] - - # sample rewards - for i in range(num_contexts): - r = [np.random.normal(mean_v[j], std_v[j]) for j in range(num_actions)] - if np.linalg.norm(contexts[i, :]) >= delta: - # large reward in the right region for the context - r_big = np.random.normal(mu_large, std_large) - if contexts[i, 0] > 0: - if contexts[i, 1] > 0: - r[0] = r_big - opt_actions.append(0) - else: - r[1] = r_big - opt_actions.append(1) - else: - if contexts[i, 1] > 0: - r[2] = r_big - opt_actions.append(2) - else: - r[3] = r_big - opt_actions.append(3) - else: - opt_actions.append(np.argmax(mean_v)) - - opt_rewards.append(r[opt_actions[-1]]) - rewards.append(r) - - rewards = np.stack(rewards) - opt_rewards = np.array(opt_rewards) - opt_actions = np.array(opt_actions) - - return np.hstack((contexts, rewards)), (opt_rewards, opt_actions) diff --git a/research/deep_contextual_bandits/example_main.py b/research/deep_contextual_bandits/example_main.py deleted file mode 100644 index c71a5aa26f94adbf5989d002fd5c768582c14e14..0000000000000000000000000000000000000000 --- a/research/deep_contextual_bandits/example_main.py +++ /dev/null @@ -1,454 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple example of contextual bandits simulation. - -Code corresponding to: -Deep Bayesian Bandits Showdown: An Empirical Comparison of Bayesian Deep Networks -for Thompson Sampling, by Carlos Riquelme, George Tucker, and Jasper Snoek. -https://arxiv.org/abs/1802.09127 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time -from absl import app -from absl import flags -import numpy as np -import os -import tensorflow as tf - -from bandits.algorithms.bootstrapped_bnn_sampling import BootstrappedBNNSampling -from bandits.core.contextual_bandit import run_contextual_bandit -from bandits.data.data_sampler import sample_adult_data -from bandits.data.data_sampler import sample_census_data -from bandits.data.data_sampler import sample_covertype_data -from bandits.data.data_sampler import sample_jester_data -from bandits.data.data_sampler import sample_mushroom_data -from bandits.data.data_sampler import sample_statlog_data -from bandits.data.data_sampler import sample_stock_data -from bandits.algorithms.fixed_policy_sampling import FixedPolicySampling -from bandits.algorithms.linear_full_posterior_sampling import LinearFullPosteriorSampling -from bandits.algorithms.neural_linear_sampling import NeuralLinearPosteriorSampling -from bandits.algorithms.parameter_noise_sampling import ParameterNoiseSampling -from bandits.algorithms.posterior_bnn_sampling import PosteriorBNNSampling -from bandits.data.synthetic_data_sampler import sample_linear_data -from bandits.data.synthetic_data_sampler import sample_sparse_linear_data -from bandits.data.synthetic_data_sampler import sample_wheel_bandit_data -from bandits.algorithms.uniform_sampling import UniformSampling - -# Set up your file routes to the data files. -base_route = os.getcwd() -data_route = 'contextual_bandits/datasets' - -FLAGS = flags.FLAGS -FLAGS.set_default('alsologtostderr', True) -flags.DEFINE_string('logdir', '/tmp/bandits/', 'Base directory to save output') -flags.DEFINE_string( - 'mushroom_data', - os.path.join(base_route, data_route, 'mushroom.data'), - 'Directory where Mushroom data is stored.') -flags.DEFINE_string( - 'financial_data', - os.path.join(base_route, data_route, 'raw_stock_contexts'), - 'Directory where Financial data is stored.') -flags.DEFINE_string( - 'jester_data', - os.path.join(base_route, data_route, 'jester_data_40jokes_19181users.npy'), - 'Directory where Jester data is stored.') -flags.DEFINE_string( - 'statlog_data', - os.path.join(base_route, data_route, 'shuttle.trn'), - 'Directory where Statlog data is stored.') -flags.DEFINE_string( - 'adult_data', - os.path.join(base_route, data_route, 'adult.full'), - 'Directory where Adult data is stored.') -flags.DEFINE_string( - 'covertype_data', - os.path.join(base_route, data_route, 'covtype.data'), - 'Directory where Covertype data is stored.') -flags.DEFINE_string( - 'census_data', - os.path.join(base_route, data_route, 'USCensus1990.data.txt'), - 'Directory where Census data is stored.') - - -def sample_data(data_type, num_contexts=None): - """Sample data from given 'data_type'. - - Args: - data_type: Dataset from which to sample. - num_contexts: Number of contexts to sample. - - Returns: - dataset: Sampled matrix with rows: (context, reward_1, ..., reward_num_act). - opt_rewards: Vector of expected optimal reward for each context. - opt_actions: Vector of optimal action for each context. - num_actions: Number of available actions. - context_dim: Dimension of each context. - """ - - if data_type == 'linear': - # Create linear dataset - num_actions = 8 - context_dim = 10 - noise_stds = [0.01 * (i + 1) for i in range(num_actions)] - dataset, _, opt_linear = sample_linear_data(num_contexts, context_dim, - num_actions, sigma=noise_stds) - opt_rewards, opt_actions = opt_linear - elif data_type == 'sparse_linear': - # Create sparse linear dataset - num_actions = 7 - context_dim = 10 - noise_stds = [0.01 * (i + 1) for i in range(num_actions)] - num_nnz_dims = int(context_dim / 3.0) - dataset, _, opt_sparse_linear = sample_sparse_linear_data( - num_contexts, context_dim, num_actions, num_nnz_dims, sigma=noise_stds) - opt_rewards, opt_actions = opt_sparse_linear - elif data_type == 'mushroom': - # Create mushroom dataset - num_actions = 2 - context_dim = 117 - file_name = FLAGS.mushroom_data - dataset, opt_mushroom = sample_mushroom_data(file_name, num_contexts) - opt_rewards, opt_actions = opt_mushroom - elif data_type == 'financial': - num_actions = 8 - context_dim = 21 - num_contexts = min(3713, num_contexts) - noise_stds = [0.01 * (i + 1) for i in range(num_actions)] - file_name = FLAGS.financial_data - dataset, opt_financial = sample_stock_data(file_name, context_dim, - num_actions, num_contexts, - noise_stds, shuffle_rows=True) - opt_rewards, opt_actions = opt_financial - elif data_type == 'jester': - num_actions = 8 - context_dim = 32 - num_contexts = min(19181, num_contexts) - file_name = FLAGS.jester_data - dataset, opt_jester = sample_jester_data(file_name, context_dim, - num_actions, num_contexts, - shuffle_rows=True, - shuffle_cols=True) - opt_rewards, opt_actions = opt_jester - elif data_type == 'statlog': - file_name = FLAGS.statlog_data - num_actions = 7 - num_contexts = min(43500, num_contexts) - sampled_vals = sample_statlog_data(file_name, num_contexts, - shuffle_rows=True) - contexts, rewards, (opt_rewards, opt_actions) = sampled_vals - dataset = np.hstack((contexts, rewards)) - context_dim = contexts.shape[1] - elif data_type == 'adult': - file_name = FLAGS.adult_data - num_actions = 14 - num_contexts = min(45222, num_contexts) - sampled_vals = sample_adult_data(file_name, num_contexts, - shuffle_rows=True) - contexts, rewards, (opt_rewards, opt_actions) = sampled_vals - dataset = np.hstack((contexts, rewards)) - context_dim = contexts.shape[1] - elif data_type == 'covertype': - file_name = FLAGS.covertype_data - num_actions = 7 - num_contexts = min(150000, num_contexts) - sampled_vals = sample_covertype_data(file_name, num_contexts, - shuffle_rows=True) - contexts, rewards, (opt_rewards, opt_actions) = sampled_vals - dataset = np.hstack((contexts, rewards)) - context_dim = contexts.shape[1] - elif data_type == 'census': - file_name = FLAGS.census_data - num_actions = 9 - num_contexts = min(150000, num_contexts) - sampled_vals = sample_census_data(file_name, num_contexts, - shuffle_rows=True) - contexts, rewards, (opt_rewards, opt_actions) = sampled_vals - dataset = np.hstack((contexts, rewards)) - context_dim = contexts.shape[1] - elif data_type == 'wheel': - delta = 0.95 - num_actions = 5 - context_dim = 2 - mean_v = [1.0, 1.0, 1.0, 1.0, 1.2] - std_v = [0.05, 0.05, 0.05, 0.05, 0.05] - mu_large = 50 - std_large = 0.01 - dataset, opt_wheel = sample_wheel_bandit_data(num_contexts, delta, - mean_v, std_v, - mu_large, std_large) - opt_rewards, opt_actions = opt_wheel - - return dataset, opt_rewards, opt_actions, num_actions, context_dim - - -def display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, name): - """Displays summary statistics of the performance of each algorithm.""" - - print('---------------------------------------------------') - print('---------------------------------------------------') - print('{} bandit completed after {} seconds.'.format( - name, time.time() - t_init)) - print('---------------------------------------------------') - - performance_pairs = [] - for j, a in enumerate(algos): - performance_pairs.append((a.name, np.sum(h_rewards[:, j]))) - performance_pairs = sorted(performance_pairs, - key=lambda elt: elt[1], - reverse=True) - for i, (name, reward) in enumerate(performance_pairs): - print('{:3}) {:20}| \t \t total reward = {:10}.'.format(i, name, reward)) - - print('---------------------------------------------------') - print('Optimal total reward = {}.'.format(np.sum(opt_rewards))) - print('Frequency of optimal actions (action, frequency):') - print([[elt, list(opt_actions).count(elt)] for elt in set(opt_actions)]) - print('---------------------------------------------------') - print('---------------------------------------------------') - - -def main(_): - - # Problem parameters - num_contexts = 2000 - - # Data type in {linear, sparse_linear, mushroom, financial, jester, - # statlog, adult, covertype, census, wheel} - data_type = 'mushroom' - - # Create dataset - sampled_vals = sample_data(data_type, num_contexts) - dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals - - # Define hyperparameters and algorithms - hparams = tf.contrib.training.HParams(num_actions=num_actions) - - hparams_linear = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - a0=6, - b0=6, - lambda_prior=0.25, - initial_pulls=2) - - hparams_rms = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - optimizer='RMS', - reset_lr=True, - lr_decay_rate=0.5, - training_freq=50, - training_epochs=100, - p=0.95, - q=3) - - hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - optimizer='RMS', - reset_lr=True, - lr_decay_rate=0.5, - training_freq=50, - training_epochs=100, - use_dropout=True, - keep_prob=0.80) - - hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - optimizer='RMS', - use_sigma_exp_transform=True, - cleared_times_trained=10, - initial_training_steps=100, - noise_sigma=0.1, - reset_lr=False, - training_freq=50, - training_epochs=100) - - hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - reset_lr=True, - lr_decay_rate=0.5, - training_freq=1, - training_freq_network=50, - training_epochs=100, - a0=6, - b0=6, - lambda_prior=0.25) - - hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - reset_lr=True, - lr_decay_rate=0.5, - training_freq=10, - training_freq_network=50, - training_epochs=100, - a0=6, - b0=6, - lambda_prior=0.25) - - hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - optimizer='RMS', - reset_lr=True, - lr_decay_rate=0.5, - training_freq=50, - training_epochs=100, - noise_std=0.05, - eps=0.1, - d_samples=300, - ) - - hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions, - context_dim=context_dim, - init_scale=0.3, - activation=tf.nn.relu, - layer_sizes=[50], - batch_size=512, - activate_decay=True, - initial_lr=0.1, - max_grad_norm=5.0, - show_training=False, - freq_summary=1000, - buffer_s=-1, - initial_pulls=2, - optimizer='RMS', - use_sigma_exp_transform=True, - cleared_times_trained=10, - initial_training_steps=100, - noise_sigma=0.1, - reset_lr=False, - training_freq=50, - training_epochs=100, - alpha=1.0, - k=20, - prior_variance=0.1) - - hparams_gp = tf.contrib.training.HParams(num_actions=num_actions, - num_outputs=num_actions, - context_dim=context_dim, - reset_lr=False, - learn_embeddings=True, - max_num_points=1000, - show_training=False, - freq_summary=1000, - batch_size=512, - keep_fixed_after_max_obs=True, - training_freq=50, - initial_pulls=2, - training_epochs=100, - lr=0.01, - buffer_s=-1, - initial_lr=0.001, - lr_decay_rate=0.0, - optimizer='RMS', - task_latent_dim=5, - activate_decay=False) - - algos = [ - UniformSampling('Uniform Sampling', hparams), - UniformSampling('Uniform Sampling 2', hparams), - FixedPolicySampling('fixed1', [0.75, 0.25], hparams), - FixedPolicySampling('fixed2', [0.25, 0.75], hparams), - PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'), - PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'), - PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'), - NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear), - NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2), - LinearFullPosteriorSampling('LinFullPost', hparams_linear), - BootstrappedBNNSampling('BootRMS', hparams_rms), - ParameterNoiseSampling('ParamNoise', hparams_pnoise), - PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'), - PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'), - ] - - # Run contextual bandit problem - t_init = time.time() - results = run_contextual_bandit(context_dim, num_actions, dataset, algos) - _, h_rewards = results - - # Display results - display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type) - -if __name__ == '__main__': - app.run(main) diff --git a/research/deep_speech/README.md b/research/deep_speech/README.md index 59a9dea7f81e3963372b23d8c4436e3e04d763ac..06ded0c912773f477e9b8176f6655b609adeb56a 100644 --- a/research/deep_speech/README.md +++ b/research/deep_speech/README.md @@ -1,6 +1,6 @@ ![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) +[![TensorFlow 1.15.3](https://img.shields.io/badge/TensorFlow-1.15.3-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v1.15.3) +[![TensorFlow 2.3](https://img.shields.io/badge/TensorFlow-2.3-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.3.0) # DeepSpeech2 Model diff --git a/research/deep_speech/data/dataset.py b/research/deep_speech/data/dataset.py index 4a8cb59955c4608e20dcec6d9c5e38d705f24311..32391773dfe978b4bda2fd7e9552300c11126007 100644 --- a/research/deep_speech/data/dataset.py +++ b/research/deep_speech/data/dataset.py @@ -24,6 +24,7 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin import soundfile import tensorflow as tf +from absl import logging # pylint: enable=g-bad-import-order import data.featurizer as featurizer # pylint: disable=g-bad-import-order @@ -71,8 +72,8 @@ class DatasetConfig(object): """ self.audio_config = audio_config - assert tf.gfile.Exists(data_path) - assert tf.gfile.Exists(vocab_file_path) + assert tf.io.gfile.exists(data_path) + assert tf.io.gfile.exists(vocab_file_path) self.data_path = data_path self.vocab_file_path = vocab_file_path self.sortagrad = sortagrad @@ -125,8 +126,8 @@ def _preprocess_data(file_path): A list of tuples (wav_filename, wav_filesize, transcript) sorted by file_size. """ - tf.logging.info("Loading data set {}".format(file_path)) - with tf.gfile.Open(file_path, "r") as f: + logging.info("Loading data set {}".format(file_path)) + with tf.io.gfile.GFile(file_path, "r") as f: lines = f.read().splitlines() # Skip the csv header in lines[0]. lines = lines[1:] diff --git a/research/deep_speech/data/download.py b/research/deep_speech/data/download.py index 5ded03762138a0006c36585e2e8da450baaccac7..3ea6e2f3e54a98c95198638d28a4dfc0d96fff97 100644 --- a/research/deep_speech/data/download.py +++ b/research/deep_speech/data/download.py @@ -32,6 +32,7 @@ import pandas from six.moves import urllib from sox import Transformer import tensorflow as tf +from absl import logging LIBRI_SPEECH_URLS = { "train-clean-100": @@ -59,13 +60,13 @@ def download_and_extract(directory, url): url: the url to download the data file. """ - if not tf.gfile.Exists(directory): - tf.gfile.MakeDirs(directory) + if not tf.io.gfile.exists(directory): + tf.io.gfile.makedirs(directory) _, tar_filepath = tempfile.mkstemp(suffix=".tar.gz") try: - tf.logging.info("Downloading %s to %s" % (url, tar_filepath)) + logging.info("Downloading %s to %s" % (url, tar_filepath)) def _progress(count, block_size, total_size): sys.stdout.write("\r>> Downloading {} {:.1f}%".format( @@ -75,12 +76,12 @@ def download_and_extract(directory, url): urllib.request.urlretrieve(url, tar_filepath, _progress) print() statinfo = os.stat(tar_filepath) - tf.logging.info( + logging.info( "Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size)) with tarfile.open(tar_filepath, "r") as tar: tar.extractall(directory) finally: - tf.gfile.Remove(tar_filepath) + tf.io.gfile.remove(tar_filepath) def convert_audio_and_split_transcript(input_dir, source_name, target_name, @@ -112,18 +113,18 @@ def convert_audio_and_split_transcript(input_dir, source_name, target_name, output_file: the name of the newly generated csv file. e.g. test-clean.csv """ - tf.logging.info("Preprocessing audio and transcript for %s" % source_name) + logging.info("Preprocessing audio and transcript for %s" % source_name) source_dir = os.path.join(input_dir, source_name) target_dir = os.path.join(input_dir, target_name) - if not tf.gfile.Exists(target_dir): - tf.gfile.MakeDirs(target_dir) + if not tf.io.gfile.exists(target_dir): + tf.io.gfile.makedirs(target_dir) files = [] tfm = Transformer() # Convert all FLAC file into WAV format. At the same time, generate the csv # file. - for root, _, filenames in tf.gfile.Walk(source_dir): + for root, _, filenames in tf.io.gfile.walk(source_dir): for filename in fnmatch.filter(filenames, "*.trans.txt"): trans_file = os.path.join(root, filename) with codecs.open(trans_file, "r", "utf-8") as fin: @@ -137,7 +138,7 @@ def convert_audio_and_split_transcript(input_dir, source_name, target_name, # Convert FLAC to WAV. flac_file = os.path.join(root, seqid + ".flac") wav_file = os.path.join(target_dir, seqid + ".wav") - if not tf.gfile.Exists(wav_file): + if not tf.io.gfile.exists(wav_file): tfm.build(flac_file, wav_file) wav_filesize = os.path.getsize(wav_file) @@ -149,7 +150,7 @@ def convert_audio_and_split_transcript(input_dir, source_name, target_name, df = pandas.DataFrame( data=files, columns=["wav_filename", "wav_filesize", "transcript"]) df.to_csv(csv_file_path, index=False, sep="\t") - tf.logging.info("Successfully generated csv file {}".format(csv_file_path)) + logging.info("Successfully generated csv file {}".format(csv_file_path)) def download_and_process_datasets(directory, datasets): @@ -160,10 +161,10 @@ def download_and_process_datasets(directory, datasets): datasets: list of dataset names that will be downloaded and processed. """ - tf.logging.info("Preparing LibriSpeech dataset: {}".format( + logging.info("Preparing LibriSpeech dataset: {}".format( ",".join(datasets))) for dataset in datasets: - tf.logging.info("Preparing dataset %s", dataset) + logging.info("Preparing dataset %s", dataset) dataset_dir = os.path.join(directory, dataset) download_and_extract(dataset_dir, LIBRI_SPEECH_URLS[dataset]) convert_audio_and_split_transcript( @@ -185,8 +186,8 @@ def define_data_download_flags(): def main(_): - if not tf.gfile.Exists(FLAGS.data_dir): - tf.gfile.MakeDirs(FLAGS.data_dir) + if not tf.io.gfile.exists(FLAGS.data_dir): + tf.io.gfile.makedirs(FLAGS.data_dir) if FLAGS.train_only: download_and_process_datasets( @@ -202,7 +203,7 @@ def main(_): if __name__ == "__main__": - tf.logging.set_verbosity(tf.logging.INFO) + logging.set_verbosity(logging.INFO) define_data_download_flags() FLAGS = absl_flags.FLAGS absl_app.run(main) diff --git a/research/deep_speech/decoder.py b/research/deep_speech/decoder.py index f46983170f5885385942be1b24e262f30b4be8e0..bf618bcb63c82f3d041d1b93e74b0ef4b3a1b6c8 100644 --- a/research/deep_speech/decoder.py +++ b/research/deep_speech/decoder.py @@ -30,7 +30,7 @@ class DeepSpeechDecoder(object): def __init__(self, labels, blank_index=28): """Decoder initialization. - Arguments: + Args: labels: a string specifying the speech labels for the decoder to use. blank_index: an integer specifying index for the blank character. Defaults to 28. diff --git a/research/deep_speech/deep_speech.py b/research/deep_speech/deep_speech.py index 3d809c3cbc245e20b752bf2d2e45823f33521d64..468c7133115528d7114745c25d46274b34cd7207 100644 --- a/research/deep_speech/deep_speech.py +++ b/research/deep_speech/deep_speech.py @@ -21,6 +21,7 @@ import os # pylint: disable=g-bad-import-order from absl import app as absl_app from absl import flags +from absl import logging import tensorflow as tf # pylint: enable=g-bad-import-order @@ -61,25 +62,10 @@ def compute_length_after_conv(max_time_steps, ctc_time_steps, input_length): Returns: the ctc_input_length after convolution layer. """ - ctc_input_length = tf.to_float(tf.multiply( - input_length, ctc_time_steps)) - return tf.to_int32(tf.floordiv( - ctc_input_length, tf.to_float(max_time_steps))) - - -def ctc_loss(label_length, ctc_input_length, labels, logits): - """Computes the ctc loss for the current batch of predictions.""" - label_length = tf.to_int32(tf.squeeze(label_length)) - ctc_input_length = tf.to_int32(tf.squeeze(ctc_input_length)) - sparse_labels = tf.to_int32( - tf.keras.backend.ctc_label_dense_to_sparse(labels, label_length)) - y_pred = tf.log(tf.transpose( - logits, perm=[1, 0, 2]) + tf.keras.backend.epsilon()) - - return tf.expand_dims( - tf.nn.ctc_loss(labels=sparse_labels, inputs=y_pred, - sequence_length=ctc_input_length), - axis=1) + ctc_input_length = tf.cast(tf.multiply( + input_length, ctc_time_steps), dtype=tf.float32) + return tf.cast(tf.math.floordiv( + ctc_input_length, tf.cast(max_time_steps, dtype=tf.float32)), dtype=tf.int32) def evaluate_model(estimator, speech_labels, entries, input_fn_eval): @@ -123,11 +109,11 @@ def evaluate_model(estimator, speech_labels, entries, input_fn_eval): total_cer /= num_of_examples total_wer /= num_of_examples - global_step = estimator.get_variable_value(tf.GraphKeys.GLOBAL_STEP) + global_step = estimator.get_variable_value(tf.compat.v1.GraphKeys.GLOBAL_STEP) eval_results = { _WER_KEY: total_wer, _CER_KEY: total_cer, - tf.GraphKeys.GLOBAL_STEP: global_step, + tf.compat.v1.GraphKeys.GLOBAL_STEP: global_step, } return eval_results @@ -163,7 +149,7 @@ def model_fn(features, labels, mode, params): logits = model(features, training=False) predictions = { "classes": tf.argmax(logits, axis=2), - "probabilities": tf.nn.softmax(logits), + "probabilities": logits, "logits": logits } return tf.estimator.EstimatorSpec( @@ -172,17 +158,16 @@ def model_fn(features, labels, mode, params): # In training mode. logits = model(features, training=True) - probs = tf.nn.softmax(logits) ctc_input_length = compute_length_after_conv( - tf.shape(features)[1], tf.shape(probs)[1], input_length) + tf.shape(features)[1], tf.shape(logits)[1], input_length) # Compute CTC loss - loss = tf.reduce_mean(ctc_loss( - label_length, ctc_input_length, labels, probs)) + loss = tf.reduce_mean(tf.keras.backend.ctc_batch_cost( + labels, logits, ctc_input_length, label_length)) - optimizer = tf.train.AdamOptimizer(learning_rate=flags_obj.learning_rate) - global_step = tf.train.get_or_create_global_step() + optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=flags_obj.learning_rate) + global_step = tf.compat.v1.train.get_or_create_global_step() minimize_op = optimizer.minimize(loss, global_step=global_step) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) # Create the train_op that groups both minimize_ops and update_ops train_op = tf.group(minimize_op, update_ops) @@ -239,9 +224,9 @@ def per_device_batch_size(batch_size, num_gpus): def run_deep_speech(_): """Run deep speech training and eval loop.""" - tf.set_random_seed(flags_obj.seed) + tf.compat.v1.set_random_seed(flags_obj.seed) # Data preprocessing - tf.logging.info("Data preprocessing...") + logging.info("Data preprocessing...") train_speech_dataset = generate_dataset(flags_obj.train_data_dir) eval_speech_dataset = generate_dataset(flags_obj.eval_data_dir) @@ -287,7 +272,7 @@ def run_deep_speech(_): total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) for cycle_index in range(total_training_cycle): - tf.logging.info("Starting a training cycle: %d/%d", + logging.info("Starting a training cycle: %d/%d", cycle_index + 1, total_training_cycle) # Perform batch_wise dataset shuffling @@ -298,7 +283,7 @@ def run_deep_speech(_): estimator.train(input_fn=input_fn_train) # Evaluation - tf.logging.info("Starting to evaluate...") + logging.info("Starting to evaluate...") eval_results = evaluate_model( estimator, eval_speech_dataset.speech_labels, @@ -306,7 +291,7 @@ def run_deep_speech(_): # Log the WER and CER results. benchmark_logger.log_evaluation_result(eval_results) - tf.logging.info( + logging.info( "Iteration {}: WER = {:.2f}, CER = {:.2f}".format( cycle_index + 1, eval_results[_WER_KEY], eval_results[_CER_KEY])) @@ -425,7 +410,7 @@ def main(_): if __name__ == "__main__": - tf.logging.set_verbosity(tf.logging.INFO) + logging.set_verbosity(logging.INFO) define_deep_speech_flags() flags_obj = flags.FLAGS absl_app.run(main) diff --git a/research/deep_speech/deep_speech_model.py b/research/deep_speech/deep_speech_model.py index dd768f825c792eb9f8f8ca7dbef6a25f5ce81091..7860f379f0d7431b29fcdb7bc65aa1e831cc8073 100644 --- a/research/deep_speech/deep_speech_model.py +++ b/research/deep_speech/deep_speech_model.py @@ -22,9 +22,9 @@ import tensorflow as tf # Supported rnn cells. SUPPORTED_RNNS = { - "lstm": tf.contrib.rnn.BasicLSTMCell, - "rnn": tf.contrib.rnn.RNNCell, - "gru": tf.contrib.rnn.GRUCell, + "lstm": tf.keras.layers.LSTMCell, + "rnn": tf.keras.layers.SimpleRNNCell, + "gru": tf.keras.layers.GRUCell, } # Parameters for batch normalization. @@ -53,9 +53,8 @@ def batch_norm(inputs, training): Returns: tensor output from batch norm layer. """ - return tf.layers.batch_normalization( - inputs=inputs, momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, - fused=True, training=training) + return tf.keras.layers.BatchNormalization( + momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON)(inputs, training=training) def _conv_bn_layer(inputs, padding, filters, kernel_size, strides, layer_id, @@ -81,10 +80,10 @@ def _conv_bn_layer(inputs, padding, filters, kernel_size, strides, layer_id, inputs = tf.pad( inputs, [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]]) - inputs = tf.layers.conv2d( - inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, + inputs = tf.keras.layers.Conv2D( + filters=filters, kernel_size=kernel_size, strides=strides, padding="valid", use_bias=False, activation=tf.nn.relu6, - name="cnn_{}".format(layer_id)) + name="cnn_{}".format(layer_id))(inputs) return batch_norm(inputs, training) @@ -109,24 +108,16 @@ def _rnn_layer(inputs, rnn_cell, rnn_hidden_size, layer_id, is_batch_norm, if is_batch_norm: inputs = batch_norm(inputs, training) - # Construct forward/backward RNN cells. - fw_cell = rnn_cell(num_units=rnn_hidden_size, - name="rnn_fw_{}".format(layer_id)) - bw_cell = rnn_cell(num_units=rnn_hidden_size, - name="rnn_bw_{}".format(layer_id)) - if is_bidirectional: - outputs, _ = tf.nn.bidirectional_dynamic_rnn( - cell_fw=fw_cell, cell_bw=bw_cell, inputs=inputs, dtype=tf.float32, - swap_memory=True) - rnn_outputs = tf.concat(outputs, -1) + rnn_outputs = tf.keras.layers.Bidirectional( + tf.keras.layers.RNN(rnn_cell(rnn_hidden_size), + return_sequences=True))(inputs) else: - rnn_outputs = tf.nn.dynamic_rnn( - fw_cell, inputs, dtype=tf.float32, swap_memory=True) + rnn_outputs = tf.keras.layers.RNN( + rnn_cell(rnn_hidden_size), return_sequences=True)(inputs) return rnn_outputs - class DeepSpeech2(object): """Define DeepSpeech2 model.""" @@ -179,7 +170,8 @@ class DeepSpeech2(object): # FC layer with batch norm. inputs = batch_norm(inputs, training) - logits = tf.layers.dense(inputs, self.num_classes, use_bias=self.use_bias) + logits = tf.keras.layers.Dense( + self.num_classes, use_bias=self.use_bias, activation="softmax")(inputs) return logits diff --git a/research/deeplab/README.md b/research/deeplab/README.md index 8609432b5bd78b56bf18a557dd5b813ed6a3fc77..f29002ea59a29ca6906e8728cc81f605dcdc7651 100644 --- a/research/deeplab/README.md +++ b/research/deeplab/README.md @@ -246,7 +246,7 @@ PASCAL VOC 2012 and Cityscapes. ### March 5, 2018 * First release of DeepLab in TensorFlow including deeper Xception network -backbone. Included chekcpoints that have been pretrained on PASCAL VOC 2012 +backbone. Included checkpoints that have been pretrained on PASCAL VOC 2012 and Cityscapes. ## References diff --git a/research/deeplab/convert_to_tflite.py b/research/deeplab/convert_to_tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..d23ce9e2337829d7a71a8cde487fb73ab068b664 --- /dev/null +++ b/research/deeplab/convert_to_tflite.py @@ -0,0 +1,112 @@ +# Copyright 2018 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tools to convert a quantized deeplab model to tflite.""" + +from absl import app +from absl import flags +import numpy as np +from PIL import Image +import tensorflow as tf + + +flags.DEFINE_string('quantized_graph_def_path', None, + 'Path to quantized graphdef.') +flags.DEFINE_string('output_tflite_path', None, 'Output TFlite model path.') +flags.DEFINE_string( + 'input_tensor_name', None, + 'Input tensor to TFlite model. This usually should be the input tensor to ' + 'model backbone.' +) +flags.DEFINE_string( + 'output_tensor_name', 'ArgMax:0', + 'Output tensor name of TFlite model. By default we output the raw semantic ' + 'label predictions.' +) +flags.DEFINE_string( + 'test_image_path', None, + 'Path to an image to test the consistency between input graphdef / ' + 'converted tflite model.' +) + +FLAGS = flags.FLAGS + + +def convert_to_tflite(quantized_graphdef, + backbone_input_tensor, + output_tensor): + """Helper method to convert quantized deeplab model to TFlite.""" + with tf.Graph().as_default() as graph: + tf.graph_util.import_graph_def(quantized_graphdef, name='') + sess = tf.compat.v1.Session() + + tflite_input = graph.get_tensor_by_name(backbone_input_tensor) + tflite_output = graph.get_tensor_by_name(output_tensor) + converter = tf.compat.v1.lite.TFLiteConverter.from_session( + sess, [tflite_input], [tflite_output]) + converter.inference_type = tf.compat.v1.lite.constants.QUANTIZED_UINT8 + input_arrays = converter.get_input_arrays() + converter.quantized_input_stats = {input_arrays[0]: (127.5, 127.5)} + return converter.convert() + + +def check_tflite_consistency(graph_def, tflite_model, image_path): + """Runs tflite and frozen graph on same input, check their outputs match.""" + # Load tflite model and check input size. + interpreter = tf.lite.Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details() + output_details = interpreter.get_output_details() + height, width = input_details[0]['shape'][1:3] + + # Prepare input image data. + with tf.io.gfile.GFile(image_path, 'rb') as f: + image = Image.open(f) + image = np.asarray(image.convert('RGB').resize((width, height))) + image = np.expand_dims(image, 0) + + # Output from tflite model. + interpreter.set_tensor(input_details[0]['index'], image) + interpreter.invoke() + output_tflite = interpreter.get_tensor(output_details[0]['index']) + + with tf.Graph().as_default(): + tf.graph_util.import_graph_def(graph_def, name='') + with tf.compat.v1.Session() as sess: + # Note here the graph will include preprocessing part of the graph + # (e.g. resize, pad, normalize). Given the input image size is at the + # crop size (backbone input size), resize / pad should be an identity op. + output_graph = sess.run( + FLAGS.output_tensor_name, feed_dict={'ImageTensor:0': image}) + + print('%.2f%% pixels have matched semantic labels.' % ( + 100 * np.mean(output_graph == output_tflite))) + + +def main(unused_argv): + with tf.io.gfile.GFile(FLAGS.quantized_graph_def_path, 'rb') as f: + graph_def = tf.compat.v1.GraphDef.FromString(f.read()) + tflite_model = convert_to_tflite( + graph_def, FLAGS.input_tensor_name, FLAGS.output_tensor_name) + + if FLAGS.output_tflite_path: + with tf.io.gfile.GFile(FLAGS.output_tflite_path, 'wb') as f: + f.write(tflite_model) + + if FLAGS.test_image_path: + check_tflite_consistency(graph_def, tflite_model, FLAGS.test_image_path) + + +if __name__ == '__main__': + app.run(main) diff --git a/research/deeplab/datasets/build_cityscapes_data.py b/research/deeplab/datasets/build_cityscapes_data.py index ce81baef20a460abaa634d3f1dcb6760a0858dec..53c11e30310f38a8abadeeeae78a0d71f5f7f8cb 100644 --- a/research/deeplab/datasets/build_cityscapes_data.py +++ b/research/deeplab/datasets/build_cityscapes_data.py @@ -113,17 +113,23 @@ def _get_files(data, dataset_split): Args: data: String, desired data ('image' or 'label'). - dataset_split: String, dataset split ('train', 'val', 'test') + dataset_split: String, dataset split ('train_fine', 'val_fine', 'test_fine') Returns: A list of sorted file names or None when getting label for test set. """ - if data == 'label' and dataset_split == 'test': - return None + if dataset_split == 'train_fine': + split_dir = 'train' + elif dataset_split == 'val_fine': + split_dir = 'val' + elif dataset_split == 'test_fine': + split_dir = 'test' + else: + raise RuntimeError("Split {} is not supported".format(dataset_split)) pattern = '*%s.%s' % (_POSTFIX_MAP[data], _DATA_FORMAT_MAP[data]) search_files = os.path.join( - FLAGS.cityscapes_root, _FOLDERS_MAP[data], dataset_split, '*', pattern) + FLAGS.cityscapes_root, _FOLDERS_MAP[data], split_dir, '*', pattern) filenames = glob.glob(search_files) return sorted(filenames) @@ -132,7 +138,7 @@ def _convert_dataset(dataset_split): """Converts the specified dataset split to TFRecord format. Args: - dataset_split: The dataset split (e.g., train, val). + dataset_split: The dataset split (e.g., train_fine, val_fine). Raises: RuntimeError: If loaded image and label have different shape, or if the @@ -142,8 +148,12 @@ def _convert_dataset(dataset_split): label_files = _get_files('label', dataset_split) num_images = len(image_files) + num_labels = len(label_files) num_per_shard = int(math.ceil(num_images / _NUM_SHARDS)) + if num_images != num_labels: + raise RuntimeError("The number of images and labels doesn't match: {} {}".format(num_images, num_labels)) + image_reader = build_data.ImageReader('png', channels=3) label_reader = build_data.ImageReader('png', channels=1) @@ -179,8 +189,8 @@ def _convert_dataset(dataset_split): def main(unused_argv): - # Only support converting 'train' and 'val' sets for now. - for dataset_split in ['train', 'val']: + # Only support converting 'train_fine', 'val_fine' and 'test_fine' sets for now. + for dataset_split in ['train_fine', 'val_fine', 'test_fine']: _convert_dataset(dataset_split) diff --git a/research/deeplab/datasets/convert_cityscapes.sh b/research/deeplab/datasets/convert_cityscapes.sh index a95b5d66aad79ae7cbd6ad2d3ee60550ab7f6239..ddc39fb11ddfed38e0b9daf5974b96286a4aa43b 100644 --- a/research/deeplab/datasets/convert_cityscapes.sh +++ b/research/deeplab/datasets/convert_cityscapes.sh @@ -42,6 +42,8 @@ WORK_DIR="." # Root path for Cityscapes dataset. CITYSCAPES_ROOT="${WORK_DIR}/cityscapes" +export PYTHONPATH="${CITYSCAPES_ROOT}:${PYTHONPATH}" + # Create training labels. python "${CITYSCAPES_ROOT}/cityscapesscripts/preparation/createTrainIdLabelImgs.py" diff --git a/research/deeplab/datasets/download_and_convert_voc2012.sh b/research/deeplab/datasets/download_and_convert_voc2012.sh index c02235182d427dfb1d63154a8266ad37b0a1d53f..3126f729decac4b684b237d6402d6c29aabc7541 100644 --- a/research/deeplab/datasets/download_and_convert_voc2012.sh +++ b/research/deeplab/datasets/download_and_convert_voc2012.sh @@ -51,19 +51,20 @@ download_and_uncompress() { wget -nd -c "${BASE_URL}/${FILENAME}" fi echo "Uncompressing ${FILENAME}" - tar -xf "${FILENAME}" + sudo apt install unzip + unzip "${FILENAME}" } # Download the images. -BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/" -FILENAME="VOCtrainval_11-May-2012.tar" +BASE_URL="https://data.deepai.org/" +FILENAME="PascalVOC2012.zip" download_and_uncompress "${BASE_URL}" "${FILENAME}" cd "${CURRENT_DIR}" # Root path for PASCAL VOC 2012 dataset. -PASCAL_ROOT="${WORK_DIR}/VOCdevkit/VOC2012" +PASCAL_ROOT="${WORK_DIR}/VOC2012" # Remove the colormap in the ground truth annotations. SEG_FOLDER="${PASCAL_ROOT}/SegmentationClass" diff --git a/research/deeplab/deprecated/segmentation_dataset.py b/research/deeplab/deprecated/segmentation_dataset.py index 2a5980b1d940878cd1aead4a5d301cca7b4a642b..8a6a8c766e40f06e6bee4f7550018b7933a51b92 100644 --- a/research/deeplab/deprecated/segmentation_dataset.py +++ b/research/deeplab/deprecated/segmentation_dataset.py @@ -81,8 +81,8 @@ DatasetDescriptor = collections.namedtuple( _CITYSCAPES_INFORMATION = DatasetDescriptor( splits_to_sizes={ - 'train': 2975, - 'val': 500, + 'train_fine': 2975, + 'val_fine': 500, }, num_classes=19, ignore_label=255, diff --git a/research/deeplab/g3doc/cityscapes.md b/research/deeplab/g3doc/cityscapes.md index af703088e61b49aa81bf62b536469b410f0fb352..5a660aaca342d16461b2355cae058900a212db1a 100644 --- a/research/deeplab/g3doc/cityscapes.md +++ b/research/deeplab/g3doc/cityscapes.md @@ -43,7 +43,7 @@ A local training job using `xception_65` can be run with the following command: python deeplab/train.py \ --logtostderr \ --training_number_of_steps=90000 \ - --train_split="train" \ + --train_split="train_fine" \ --model_variant="xception_65" \ --atrous_rates=6 \ --atrous_rates=12 \ @@ -95,7 +95,7 @@ command: # From tensorflow/models/research/ python deeplab/eval.py \ --logtostderr \ - --eval_split="val" \ + --eval_split="val_fine" \ --model_variant="xception_65" \ --atrous_rates=6 \ --atrous_rates=12 \ @@ -121,7 +121,7 @@ command: # From tensorflow/models/research/ python deeplab/vis.py \ --logtostderr \ - --vis_split="val" \ + --vis_split="val_fine" \ --model_variant="xception_65" \ --atrous_rates=6 \ --atrous_rates=12 \ diff --git a/research/deeplab/g3doc/installation.md b/research/deeplab/g3doc/installation.md index 8629aba42207fc6e35c907024485c0e7f29f5e10..591a1f8da50d139f48cb3cd3c299535fcdab16a9 100644 --- a/research/deeplab/g3doc/installation.md +++ b/research/deeplab/g3doc/installation.md @@ -68,6 +68,6 @@ Quick running the whole code on the PASCAL VOC 2012 dataset: ```bash # From tensorflow/models/research/deeplab -sh local_test.sh +bash local_test.sh ``` diff --git a/research/deeplab/g3doc/quantize.md b/research/deeplab/g3doc/quantize.md index d88a2e9a8acbac4a0de6e3ea2bed65cb44535665..65dbdd70b4dd67838326b434dd5fe9753a5afc1c 100644 --- a/research/deeplab/g3doc/quantize.md +++ b/research/deeplab/g3doc/quantize.md @@ -42,7 +42,6 @@ python deeplab/train.py \ --train_batch_size=8 \ --base_learning_rate=3e-5 \ --dataset="pascal_voc_seg" \ - --initialize_last_layer \ --quantize_delay_step=0 \ --tf_initial_checkpoint=${PATH_TO_TRAINED_FLOAT_MODEL} \ --train_logdir=${PATH_TO_TRAIN_DIR} \ @@ -65,18 +64,12 @@ python deeplab/export_model.py \ Commandline below shows how to convert exported graphdef to TFlite model. ``` -tflite_convert \ - --graph_def_file=${OUTPUT_DIR}/frozen_inference_graph.pb \ - --output_file=${OUTPUT_DIR}/frozen_inference_graph.tflite \ - --output_format=TFLITE \ - --input_shape=1,513,513,3 \ - --input_arrays="MobilenetV2/MobilenetV2/input" \ - --inference_type=QUANTIZED_UINT8 \ - --inference_input_type=QUANTIZED_UINT8 \ - --std_dev_values=128 \ - --mean_values=128 \ - --change_concat_input_ranges=true \ - --output_arrays="ArgMax" +# From tensorflow/models/research/ +python deeplab/convert_to_tflite.py \ + --quantized_graph_def_path=${OUTPUT_DIR}/frozen_inference_graph.pb \ + --input_tensor_name=MobilenetV2/MobilenetV2/input:0 \ + --output_tflite_path=${OUTPUT_DIR}/frozen_inference_graph.tflite \ + --test_image_path=${PATH_TO_TEST_IMAGE} ``` **[Important]** Note that converted model expects 513x513 RGB input and doesn't diff --git a/research/deeplab/local_test.sh b/research/deeplab/local_test.sh index d5e4a5f42bb4241d4b6dd1b9d8a2619c4ca9dc8b..c9ad75f69280a9179a891b52f310b05a5b744def 100644 --- a/research/deeplab/local_test.sh +++ b/research/deeplab/local_test.sh @@ -19,7 +19,7 @@ # # Usage: # # From the tensorflow/models/research/deeplab directory. -# sh ./local_test.sh +# bash ./local_test.sh # # @@ -42,7 +42,7 @@ python "${WORK_DIR}"/model_test.py # Go to datasets folder and download PASCAL VOC 2012 segmentation dataset. DATASET_DIR="datasets" cd "${WORK_DIR}/${DATASET_DIR}" -sh download_and_convert_voc2012.sh +bash download_and_convert_voc2012.sh # Go back to original directory. cd "${CURRENT_DIR}" diff --git a/research/delf/README.md b/research/delf/README.md index a8bea62000a0f8a03c529284172107607d83192b..f89d71d555f214c4492e2dcedf9370cb9daf9bef 100644 --- a/research/delf/README.md +++ b/research/delf/README.md @@ -112,11 +112,17 @@ in the [Detect-to-Retrieve paper](https://arxiv.org/abs/1812.01584). Boosts performance by ~4% mAP compared to ICCV'17 DELF model. **DELG pre-trained on the Google-Landmarks dataset v1** -([link](http://storage.googleapis.com/delf/delg_gld_20200520.tar.gz)). Presented -in the [DELG paper](https://arxiv.org/abs/2001.05027). +([R101-DELG](http://storage.googleapis.com/delf/r101delg_gld_20200814.tar.gz), +[R50-DELG](http://storage.googleapis.com/delf/r50delg_gld_20200814.tar.gz)). +Presented in the [DELG paper](https://arxiv.org/abs/2001.05027). + +**DELG pre-trained on the Google-Landmarks dataset v2 (clean)** +([R101-DELG](https://storage.googleapis.com/delf/r101delg_gldv2clean_20200914.tar.gz), +[R50-DELG](https://storage.googleapis.com/delf/r50delg_gldv2clean_20200914.tar.gz)). +Presented in the [DELG paper](https://arxiv.org/abs/2001.05027). **RN101-ArcFace pre-trained on the Google-Landmarks dataset v2 (train-clean)** -([link](https://storage.googleapis.com/delf/rn101_af_gldv2clean_20200521.tar.gz)). +([link](https://storage.googleapis.com/delf/rn101_af_gldv2clean_20200814.tar.gz)). Presented in the [GLDv2 paper](https://arxiv.org/abs/2004.01804). **DELF pre-trained on Landmarks-Clean/Landmarks-Full dataset** diff --git a/research/delf/delf/__init__.py b/research/delf/delf/__init__.py index a52df3c4546414e61f479357d06b65d4c132c753..a3c5d37bc44e10b74ce383f15c5979115d875fb1 100644 --- a/research/delf/delf/__init__.py +++ b/research/delf/delf/__init__.py @@ -30,10 +30,13 @@ from delf.python import feature_aggregation_similarity from delf.python import feature_extractor from delf.python import feature_io from delf.python import utils +from delf.python import whiten from delf.python.examples import detector from delf.python.examples import extractor from delf.python import detect_to_retrieve from delf.python import training from delf.python.training import model -from delf.python.training import datasets +from delf.python import datasets +from delf.python.datasets import google_landmarks_dataset +from delf.python.datasets import revisited_op # pylint: enable=unused-import diff --git a/research/delf/delf/protos/delf_config.proto b/research/delf/delf/protos/delf_config.proto index d13d911b48b7a9fe46e7c8fcb3c1c20d5a927e04..c7cd5b1ce27b227a0ff4c3d4536087d1151a6396 100644 --- a/research/delf/delf/protos/delf_config.proto +++ b/research/delf/delf/protos/delf_config.proto @@ -49,6 +49,12 @@ message DelfLocalFeatureConfig { // PCA parameters for DELF local feature. This is used only if use_pca is // true. optional DelfPcaParameters pca_parameters = 6; + + // If true, the returned keypoint locations are grounded to coordinates of the + // resized image used for extraction. If false (default), the returned + // keypoint locations are grounded to coordinates of the original image that + // is fed into feature extraction. + optional bool use_resized_coordinates = 7 [default = false]; } message DelfGlobalFeatureConfig { diff --git a/research/delf/delf/python/datasets/google_landmarks_dataset/README.md b/research/delf/delf/python/datasets/google_landmarks_dataset/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4f34b59aaa4bf36040e49e67fd27adc80f62cb2a --- /dev/null +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/README.md @@ -0,0 +1,123 @@ +## GLDv2 code/models + +[![Paper](http://img.shields.io/badge/paper-arXiv.2004.01804-B3181B.svg)](https://arxiv.org/abs/2004.01804) + +These instructions can be used to reproduce results from the +[GLDv2 paper](https://arxiv.org/abs/2004.01804). We present here results on the +Revisited Oxford/Paris datasets since they are smaller and quicker to +reproduce -- but note that a very similar procedure can be used to obtain +results on the GLDv2 retrieval or recognition datasets. + +Note that this directory also contains code to compute GLDv2 metrics: see +`compute_retrieval_metrics.py`, `compute_recognition_metrics.py` and associated +file reading / metric computation modules. + +For more details on the dataset, please refer to its +[website](https://github.com/cvdfoundation/google-landmark). + +### Install DELF library + +To be able to use this code, please follow +[these instructions](../../../../INSTALL_INSTRUCTIONS.md) to properly install the +DELF library. + +### Download Revisited Oxford/Paris datasets + +```bash +mkdir -p ~/revisitop/data && cd ~/revisitop/data + +# Oxford dataset. +wget http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/oxbuild_images.tgz +mkdir oxford5k_images +tar -xvzf oxbuild_images.tgz -C oxford5k_images/ + +# Paris dataset. Download and move all images to same directory. +wget http://www.robots.ox.ac.uk/~vgg/data/parisbuildings/paris_1.tgz +wget http://www.robots.ox.ac.uk/~vgg/data/parisbuildings/paris_2.tgz +mkdir paris6k_images_tmp +tar -xvzf paris_1.tgz -C paris6k_images_tmp/ +tar -xvzf paris_2.tgz -C paris6k_images_tmp/ +mkdir paris6k_images +mv paris6k_images_tmp/paris/*/*.jpg paris6k_images/ + +# Revisited annotations. +wget http://cmp.felk.cvut.cz/revisitop/data/datasets/roxford5k/gnd_roxford5k.mat +wget http://cmp.felk.cvut.cz/revisitop/data/datasets/rparis6k/gnd_rparis6k.mat +``` + +### Download model + +```bash +# From models/research/delf/delf/python/datasets/google_landmarks_dataset +mkdir parameters && cd parameters + +# RN101-ArcFace model trained on GLDv2-clean. +wget https://storage.googleapis.com/delf/rn101_af_gldv2clean_20200814.tar.gz +tar -xvzf rn101_af_gldv2clean_20200814.tar.gz +``` + +### Feature extraction + +We present here commands for extraction on `roxford5k`. To extract on `rparis6k` +instead, please edit the arguments accordingly (especially the +`dataset_file_path` argument). + +#### Query feature extraction + +In the Revisited Oxford/Paris experimental protocol, query images must be the +cropped before feature extraction (this is done in the `extract_features` +script, when setting `image_set=query`). Note that this is specific to these +datasets, and not required for the GLDv2 retrieval/recognition datasets. + +Run query feature extraction as follows: + +```bash +# From models/research/delf/delf/python/datasets/google_landmarks_dataset +python3 ../../delg/extract_features.py \ + --delf_config_path rn101_af_gldv2clean_config.pbtxt \ + --dataset_file_path ~/revisitop/data/gnd_roxford5k.mat \ + --images_dir ~/revisitop/data/oxford5k_images \ + --image_set query \ + --output_features_dir ~/revisitop/data/oxford5k_features/query +``` + +#### Index feature extraction + +Run index feature extraction as follows: + +```bash +# From models/research/delf/delf/python/datasets/google_landmarks_dataset +python3 ../../delg/extract_features.py \ + --delf_config_path rn101_af_gldv2clean_config.pbtxt \ + --dataset_file_path ~/revisitop/data/gnd_roxford5k.mat \ + --images_dir ~/revisitop/data/oxford5k_images \ + --image_set index \ + --output_features_dir ~/revisitop/data/oxford5k_features/index +``` + +### Perform retrieval + +To run retrieval on `roxford5k`, the following command can be used: + +```bash +# From models/research/delf/delf/python/datasets/google_landmarks_dataset +python3 ../../delg/perform_retrieval.py \ + --dataset_file_path ~/revisitop/data/gnd_roxford5k.mat \ + --query_features_dir ~/revisitop/data/oxford5k_features/query \ + --index_features_dir ~/revisitop/data/oxford5k_features/index \ + --output_dir ~/revisitop/results/oxford5k +``` + +A file with named `metrics.txt` will be written to the path given in +`output_dir`. The contents should look approximately like: + +``` +hard + mAP=55.54 + mP@k[ 1 5 10] [88.57 80.86 70.14] + mR@k[ 1 5 10] [19.46 33.65 42.44] +medium + mAP=76.23 + mP@k[ 1 5 10] [95.71 92.86 90.43] + mR@k[ 1 5 10] [10.17 25.96 35.29] +``` diff --git a/research/delf/delf/python/datasets/google_landmarks_dataset/__init__.py b/research/delf/delf/python/datasets/google_landmarks_dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4e24e0fb7c56dc1ece37ff53f8a1a7dd5ba4ddfd --- /dev/null +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Module exposing Google Landmarks dataset for training.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from delf.python.datasets.google_landmarks_dataset import googlelandmarks +# pylint: enable=unused-import diff --git a/research/delf/delf/python/google_landmarks_dataset/compute_recognition_metrics.py b/research/delf/delf/python/datasets/google_landmarks_dataset/compute_recognition_metrics.py similarity index 95% rename from research/delf/delf/python/google_landmarks_dataset/compute_recognition_metrics.py rename to research/delf/delf/python/datasets/google_landmarks_dataset/compute_recognition_metrics.py index f80cf47de7487d4cd584c969d994f7d3f1135cae..4c241ed5380cacd635016f8a197156d389a62a63 100644 --- a/research/delf/delf/python/google_landmarks_dataset/compute_recognition_metrics.py +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/compute_recognition_metrics.py @@ -24,9 +24,9 @@ from __future__ import print_function import argparse import sys -from tensorflow.python.platform import app -from delf.python.google_landmarks_dataset import dataset_file_io -from delf.python.google_landmarks_dataset import metrics +from absl import app +from delf.python.datasets.google_landmarks_dataset import dataset_file_io +from delf.python.datasets.google_landmarks_dataset import metrics cmd_args = None diff --git a/research/delf/delf/python/google_landmarks_dataset/compute_retrieval_metrics.py b/research/delf/delf/python/datasets/google_landmarks_dataset/compute_retrieval_metrics.py similarity index 96% rename from research/delf/delf/python/google_landmarks_dataset/compute_retrieval_metrics.py rename to research/delf/delf/python/datasets/google_landmarks_dataset/compute_retrieval_metrics.py index adcee356e5d64d094236cda9656c86164c24faf8..231c320168cb61d175b6a486ec7a91de08d5fe6f 100644 --- a/research/delf/delf/python/google_landmarks_dataset/compute_retrieval_metrics.py +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/compute_retrieval_metrics.py @@ -24,9 +24,9 @@ from __future__ import print_function import argparse import sys -from tensorflow.python.platform import app -from delf.python.google_landmarks_dataset import dataset_file_io -from delf.python.google_landmarks_dataset import metrics +from absl import app +from delf.python.datasets.google_landmarks_dataset import dataset_file_io +from delf.python.datasets.google_landmarks_dataset import metrics cmd_args = None diff --git a/research/delf/delf/python/google_landmarks_dataset/dataset_file_io.py b/research/delf/delf/python/datasets/google_landmarks_dataset/dataset_file_io.py similarity index 100% rename from research/delf/delf/python/google_landmarks_dataset/dataset_file_io.py rename to research/delf/delf/python/datasets/google_landmarks_dataset/dataset_file_io.py diff --git a/research/delf/delf/python/google_landmarks_dataset/dataset_file_io_test.py b/research/delf/delf/python/datasets/google_landmarks_dataset/dataset_file_io_test.py similarity index 92% rename from research/delf/delf/python/google_landmarks_dataset/dataset_file_io_test.py rename to research/delf/delf/python/datasets/google_landmarks_dataset/dataset_file_io_test.py index 0101d989fba85e487842e65b3b1aec4e728c101c..8bd2ac5e0f363cd15a386b50c0ecae6c9825b61e 100644 --- a/research/delf/delf/python/google_landmarks_dataset/dataset_file_io_test.py +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/dataset_file_io_test.py @@ -23,7 +23,7 @@ import os from absl import flags import tensorflow as tf -from delf.python.google_landmarks_dataset import dataset_file_io +from delf.python.datasets.google_landmarks_dataset import dataset_file_io FLAGS = flags.FLAGS @@ -32,8 +32,7 @@ class DatasetFileIoTest(tf.test.TestCase): def testReadRecognitionSolutionWorks(self): # Define inputs. - file_path = os.path.join(FLAGS.test_tmpdir, - 'recognition_solution.csv') + file_path = os.path.join(FLAGS.test_tmpdir, 'recognition_solution.csv') with tf.io.gfile.GFile(file_path, 'w') as f: f.write('id,landmarks,Usage\n') f.write('0123456789abcdef,0 12,Public\n') @@ -64,8 +63,7 @@ class DatasetFileIoTest(tf.test.TestCase): def testReadRetrievalSolutionWorks(self): # Define inputs. - file_path = os.path.join(FLAGS.test_tmpdir, - 'retrieval_solution.csv') + file_path = os.path.join(FLAGS.test_tmpdir, 'retrieval_solution.csv') with tf.io.gfile.GFile(file_path, 'w') as f: f.write('id,images,Usage\n') f.write('0123456789abcdef,None,Ignored\n') @@ -96,8 +94,7 @@ class DatasetFileIoTest(tf.test.TestCase): def testReadRecognitionPredictionsWorks(self): # Define inputs. - file_path = os.path.join(FLAGS.test_tmpdir, - 'recognition_predictions.csv') + file_path = os.path.join(FLAGS.test_tmpdir, 'recognition_predictions.csv') with tf.io.gfile.GFile(file_path, 'w') as f: f.write('id,landmarks\n') f.write('0123456789abcdef,12 0.1 \n') @@ -134,8 +131,7 @@ class DatasetFileIoTest(tf.test.TestCase): def testReadRetrievalPredictionsWorks(self): # Define inputs. - file_path = os.path.join(FLAGS.test_tmpdir, - 'retrieval_predictions.csv') + file_path = os.path.join(FLAGS.test_tmpdir, 'retrieval_predictions.csv') with tf.io.gfile.GFile(file_path, 'w') as f: f.write('id,images\n') f.write('0123456789abcdef,fedcba9876543250 \n') diff --git a/research/delf/delf/python/training/datasets/googlelandmarks.py b/research/delf/delf/python/datasets/google_landmarks_dataset/googlelandmarks.py similarity index 96% rename from research/delf/delf/python/training/datasets/googlelandmarks.py rename to research/delf/delf/python/datasets/google_landmarks_dataset/googlelandmarks.py index 9d184aceca875bdba4109384141701049d6b389e..b6122f5c79c28a1eb159f2f8b838af10d7ea6276 100644 --- a/research/delf/delf/python/training/datasets/googlelandmarks.py +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/googlelandmarks.py @@ -46,8 +46,6 @@ class _DataAugmentationParams(object): central_fraction = 0.875 random_reflection = False - input_rows = 321 - input_cols = 321 def NormalizeImages(images, pixel_value_scale=0.5, pixel_value_offset=0.5): @@ -70,11 +68,12 @@ def NormalizeImages(images, pixel_value_scale=0.5, pixel_value_offset=0.5): return normalized_images -def _ImageNetCrop(image): +def _ImageNetCrop(image, image_size): """Imagenet-style crop with random bbox and aspect ratio. Args: image: a `Tensor`, image to crop. + image_size: an `int`. The image size for the decoded image, on each side. Returns: cropped_image: `Tensor`, cropped image. @@ -95,7 +94,7 @@ def _ImageNetCrop(image): cropped_image.set_shape([None, None, 3]) cropped_image = tf.image.resize( - cropped_image, [params.input_rows, params.input_cols], method='area') + cropped_image, [image_size, image_size], method='area') if params.random_reflection: cropped_image = tf.image.random_flip_left_right(cropped_image) @@ -122,7 +121,7 @@ def _ParseFunction(example, name_to_features, image_size, augmentation): image = NormalizeImages( image, pixel_value_scale=128.0, pixel_value_offset=128.0) if augmentation: - image = _ImageNetCrop(image) + image = _ImageNetCrop(image, image_size) else: image = tf.image.resize(image, [image_size, image_size]) image.set_shape([image_size, image_size, 3]) diff --git a/research/delf/delf/python/google_landmarks_dataset/metrics.py b/research/delf/delf/python/datasets/google_landmarks_dataset/metrics.py similarity index 100% rename from research/delf/delf/python/google_landmarks_dataset/metrics.py rename to research/delf/delf/python/datasets/google_landmarks_dataset/metrics.py diff --git a/research/delf/delf/python/google_landmarks_dataset/metrics_test.py b/research/delf/delf/python/datasets/google_landmarks_dataset/metrics_test.py similarity index 98% rename from research/delf/delf/python/google_landmarks_dataset/metrics_test.py rename to research/delf/delf/python/datasets/google_landmarks_dataset/metrics_test.py index 50838cae2b5bfaa8f6f0c5cbfab2a07aa20b7c52..ee8a443de1644f83031d2a32300bbe7f7b447732 100644 --- a/research/delf/delf/python/google_landmarks_dataset/metrics_test.py +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/metrics_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import tensorflow as tf -from delf.python.google_landmarks_dataset import metrics +from delf.python.datasets.google_landmarks_dataset import metrics def _CreateRecognitionSolution(): diff --git a/research/delf/delf/python/google_landmarks_dataset/rn101_af_gldv2clean_config.pbtxt b/research/delf/delf/python/datasets/google_landmarks_dataset/rn101_af_gldv2clean_config.pbtxt similarity index 76% rename from research/delf/delf/python/google_landmarks_dataset/rn101_af_gldv2clean_config.pbtxt rename to research/delf/delf/python/datasets/google_landmarks_dataset/rn101_af_gldv2clean_config.pbtxt index 992cb0fd142ba8c6d89c763d5e323a0d33e7a3a5..6a065d51280d0705f62969f9b28f711d4982819e 100644 --- a/research/delf/delf/python/google_landmarks_dataset/rn101_af_gldv2clean_config.pbtxt +++ b/research/delf/delf/python/datasets/google_landmarks_dataset/rn101_af_gldv2clean_config.pbtxt @@ -1,6 +1,6 @@ use_local_features: false use_global_features: true -model_path: "parameters/rn101_af_gldv2clean_20200521" +model_path: "parameters/rn101_af_gldv2clean_20200814" image_scales: 0.70710677 image_scales: 1.0 image_scales: 1.4142135 diff --git a/research/delf/delf/python/datasets/revisited_op/__init__.py b/research/delf/delf/python/datasets/revisited_op/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1a8b35fb1b46399bb735624a354fc7947d6ec242 --- /dev/null +++ b/research/delf/delf/python/datasets/revisited_op/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Module for revisited Oxford and Paris datasets.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from delf.python.datasets.revisited_op import dataset +# pylint: enable=unused-import diff --git a/research/delf/delf/python/datasets/revisited_op/dataset.py b/research/delf/delf/python/datasets/revisited_op/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ae3020cd3451955147dbcd41754fba15e0d9f3c7 --- /dev/null +++ b/research/delf/delf/python/datasets/revisited_op/dataset.py @@ -0,0 +1,535 @@ +# Copyright 2019 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python library to parse ground-truth/evaluate on Revisited datasets.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import pickle + +import numpy as np +from scipy.io import matlab +import tensorflow as tf + +_GROUND_TRUTH_KEYS = ['easy', 'hard', 'junk'] + +DATASET_NAMES = ['roxford5k', 'rparis6k'] + + +def ReadDatasetFile(dataset_file_path): + """Reads dataset file in Revisited Oxford/Paris ".mat" format. + + Args: + dataset_file_path: Path to dataset file, in .mat format. + + Returns: + query_list: List of query image names. + index_list: List of index image names. + ground_truth: List containing ground-truth information for dataset. Each + entry is a dict corresponding to the ground-truth information for a query. + The dict may have keys 'easy', 'hard', or 'junk', mapping to a NumPy + array of integers; additionally, it has a key 'bbx' mapping to a NumPy + array of floats with bounding box coordinates. + """ + with tf.io.gfile.GFile(dataset_file_path, 'rb') as f: + cfg = matlab.loadmat(f) + + # Parse outputs according to the specificities of the dataset file. + query_list = [str(im_array[0]) for im_array in np.squeeze(cfg['qimlist'])] + index_list = [str(im_array[0]) for im_array in np.squeeze(cfg['imlist'])] + ground_truth_raw = np.squeeze(cfg['gnd']) + ground_truth = [] + for query_ground_truth_raw in ground_truth_raw: + query_ground_truth = {} + for ground_truth_key in _GROUND_TRUTH_KEYS: + if ground_truth_key in query_ground_truth_raw.dtype.names: + adjusted_labels = query_ground_truth_raw[ground_truth_key] - 1 + query_ground_truth[ground_truth_key] = adjusted_labels.flatten() + + query_ground_truth['bbx'] = np.squeeze(query_ground_truth_raw['bbx']) + ground_truth.append(query_ground_truth) + + return query_list, index_list, ground_truth + + +def _ParseGroundTruth(ok_list, junk_list): + """Constructs dictionary of ok/junk indices for a data subset and query. + + Args: + ok_list: List of NumPy arrays containing true positive indices for query. + junk_list: List of NumPy arrays containing ignored indices for query. + + Returns: + ok_junk_dict: Dict mapping 'ok' and 'junk' strings to NumPy array of + indices. + """ + ok_junk_dict = {} + ok_junk_dict['ok'] = np.concatenate(ok_list) + ok_junk_dict['junk'] = np.concatenate(junk_list) + return ok_junk_dict + + +def ParseEasyMediumHardGroundTruth(ground_truth): + """Parses easy/medium/hard ground-truth from Revisited datasets. + + Args: + ground_truth: Usually the output from ReadDatasetFile(). List containing + ground-truth information for dataset. Each entry is a dict corresponding + to the ground-truth information for a query. The dict must have keys + 'easy', 'hard', and 'junk', mapping to a NumPy array of integers. + + Returns: + easy_ground_truth: List containing ground-truth information for easy subset + of dataset. Each entry is a dict corresponding to the ground-truth + information for a query. The dict has keys 'ok' and 'junk', mapping to a + NumPy array of integers. + medium_ground_truth: Same as `easy_ground_truth`, but for the medium subset. + hard_ground_truth: Same as `easy_ground_truth`, but for the hard subset. + """ + num_queries = len(ground_truth) + + easy_ground_truth = [] + medium_ground_truth = [] + hard_ground_truth = [] + for i in range(num_queries): + easy_ground_truth.append( + _ParseGroundTruth([ground_truth[i]['easy']], + [ground_truth[i]['junk'], ground_truth[i]['hard']])) + medium_ground_truth.append( + _ParseGroundTruth([ground_truth[i]['easy'], ground_truth[i]['hard']], + [ground_truth[i]['junk']])) + hard_ground_truth.append( + _ParseGroundTruth([ground_truth[i]['hard']], + [ground_truth[i]['junk'], ground_truth[i]['easy']])) + + return easy_ground_truth, medium_ground_truth, hard_ground_truth + + +def AdjustPositiveRanks(positive_ranks, junk_ranks): + """Adjusts positive ranks based on junk ranks. + + Args: + positive_ranks: Sorted 1D NumPy integer array. + junk_ranks: Sorted 1D NumPy integer array. + + Returns: + adjusted_positive_ranks: Sorted 1D NumPy array. + """ + if not junk_ranks.size: + return positive_ranks + + adjusted_positive_ranks = positive_ranks + j = 0 + for i, positive_index in enumerate(positive_ranks): + while (j < len(junk_ranks) and positive_index > junk_ranks[j]): + j += 1 + + adjusted_positive_ranks[i] -= j + + return adjusted_positive_ranks + + +def ComputeAveragePrecision(positive_ranks): + """Computes average precision according to dataset convention. + + It assumes that `positive_ranks` contains the ranks for all expected positive + index images to be retrieved. If `positive_ranks` is empty, returns + `average_precision` = 0. + + Note that average precision computation here does NOT use the finite sum + method (see + https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision) + which is common in information retrieval literature. Instead, the method + implemented here integrates over the precision-recall curve by averaging two + adjacent precision points, then multiplying by the recall step. This is the + convention for the Revisited Oxford/Paris datasets. + + Args: + positive_ranks: Sorted 1D NumPy integer array, zero-indexed. + + Returns: + average_precision: Float. + """ + average_precision = 0.0 + + num_expected_positives = len(positive_ranks) + if not num_expected_positives: + return average_precision + + recall_step = 1.0 / num_expected_positives + for i, rank in enumerate(positive_ranks): + if not rank: + left_precision = 1.0 + else: + left_precision = i / rank + + right_precision = (i + 1) / (rank + 1) + average_precision += (left_precision + right_precision) * recall_step / 2 + + return average_precision + + +def ComputePRAtRanks(positive_ranks, desired_pr_ranks): + """Computes precision/recall at desired ranks. + + It assumes that `positive_ranks` contains the ranks for all expected positive + index images to be retrieved. If `positive_ranks` is empty, return all-zeros + `precisions`/`recalls`. + + If a desired rank is larger than the last positive rank, its precision is + computed based on the last positive rank. For example, if `desired_pr_ranks` + is [10] and `positive_ranks` = [0, 7] --> `precisions` = [0.25], `recalls` = + [1.0]. + + Args: + positive_ranks: 1D NumPy integer array, zero-indexed. + desired_pr_ranks: List of integers containing the desired precision/recall + ranks to be reported. Eg, if precision@1/recall@1 and + precision@10/recall@10 are desired, this should be set to [1, 10]. + + Returns: + precisions: Precision @ `desired_pr_ranks` (NumPy array of + floats, with shape [len(desired_pr_ranks)]). + recalls: Recall @ `desired_pr_ranks` (NumPy array of floats, with + shape [len(desired_pr_ranks)]). + """ + num_desired_pr_ranks = len(desired_pr_ranks) + precisions = np.zeros([num_desired_pr_ranks]) + recalls = np.zeros([num_desired_pr_ranks]) + + num_expected_positives = len(positive_ranks) + if not num_expected_positives: + return precisions, recalls + + positive_ranks_one_indexed = positive_ranks + 1 + for i, desired_pr_rank in enumerate(desired_pr_ranks): + recalls[i] = np.sum( + positive_ranks_one_indexed <= desired_pr_rank) / num_expected_positives + + # If `desired_pr_rank` is larger than last positive's rank, only compute + # precision with respect to last positive's position. + precision_rank = min(max(positive_ranks_one_indexed), desired_pr_rank) + precisions[i] = np.sum( + positive_ranks_one_indexed <= precision_rank) / precision_rank + + return precisions, recalls + + +def ComputeMetrics(sorted_index_ids, ground_truth, desired_pr_ranks): + """Computes metrics for retrieval results on the Revisited datasets. + + If there are no valid ground-truth index images for a given query, the metric + results for the given query (`average_precisions`, `precisions` and `recalls`) + are set to NaN, and they are not taken into account when computing the + aggregated metrics (`mean_average_precision`, `mean_precisions` and + `mean_recalls`) over all queries. + + Args: + sorted_index_ids: Integer NumPy array of shape [#queries, #index_images]. + For each query, contains an array denoting the most relevant index images, + sorted from most to least relevant. + ground_truth: List containing ground-truth information for dataset. Each + entry is a dict corresponding to the ground-truth information for a query. + The dict has keys 'ok' and 'junk', mapping to a NumPy array of integers. + desired_pr_ranks: List of integers containing the desired precision/recall + ranks to be reported. Eg, if precision@1/recall@1 and + precision@10/recall@10 are desired, this should be set to [1, 10]. The + largest item should be <= #index_images. + + Returns: + mean_average_precision: Mean average precision (float). + mean_precisions: Mean precision @ `desired_pr_ranks` (NumPy array of + floats, with shape [len(desired_pr_ranks)]). + mean_recalls: Mean recall @ `desired_pr_ranks` (NumPy array of floats, with + shape [len(desired_pr_ranks)]). + average_precisions: Average precision for each query (NumPy array of floats, + with shape [#queries]). + precisions: Precision @ `desired_pr_ranks`, for each query (NumPy array of + floats, with shape [#queries, len(desired_pr_ranks)]). + recalls: Recall @ `desired_pr_ranks`, for each query (NumPy array of + floats, with shape [#queries, len(desired_pr_ranks)]). + + Raises: + ValueError: If largest desired PR rank in `desired_pr_ranks` > + #index_images. + """ + num_queries, num_index_images = sorted_index_ids.shape + num_desired_pr_ranks = len(desired_pr_ranks) + + sorted_desired_pr_ranks = sorted(desired_pr_ranks) + + if sorted_desired_pr_ranks[-1] > num_index_images: + raise ValueError( + 'Requested PR ranks up to %d, however there are only %d images' % + (sorted_desired_pr_ranks[-1], num_index_images)) + + # Instantiate all outputs, then loop over each query and gather metrics. + mean_average_precision = 0.0 + mean_precisions = np.zeros([num_desired_pr_ranks]) + mean_recalls = np.zeros([num_desired_pr_ranks]) + average_precisions = np.zeros([num_queries]) + precisions = np.zeros([num_queries, num_desired_pr_ranks]) + recalls = np.zeros([num_queries, num_desired_pr_ranks]) + num_empty_gt_queries = 0 + for i in range(num_queries): + ok_index_images = ground_truth[i]['ok'] + junk_index_images = ground_truth[i]['junk'] + + if not ok_index_images.size: + average_precisions[i] = float('nan') + precisions[i, :] = float('nan') + recalls[i, :] = float('nan') + num_empty_gt_queries += 1 + continue + + positive_ranks = np.arange(num_index_images)[np.in1d( + sorted_index_ids[i], ok_index_images)] + junk_ranks = np.arange(num_index_images)[np.in1d(sorted_index_ids[i], + junk_index_images)] + + adjusted_positive_ranks = AdjustPositiveRanks(positive_ranks, junk_ranks) + + average_precisions[i] = ComputeAveragePrecision(adjusted_positive_ranks) + precisions[i, :], recalls[i, :] = ComputePRAtRanks(adjusted_positive_ranks, + desired_pr_ranks) + + mean_average_precision += average_precisions[i] + mean_precisions += precisions[i, :] + mean_recalls += recalls[i, :] + + # Normalize aggregated metrics by number of queries. + num_valid_queries = num_queries - num_empty_gt_queries + mean_average_precision /= num_valid_queries + mean_precisions /= num_valid_queries + mean_recalls /= num_valid_queries + + return (mean_average_precision, mean_precisions, mean_recalls, + average_precisions, precisions, recalls) + + +def SaveMetricsFile(mean_average_precision, mean_precisions, mean_recalls, + pr_ranks, output_path): + """Saves aggregated retrieval metrics to text file. + + Args: + mean_average_precision: Dict mapping each dataset protocol to a float. + mean_precisions: Dict mapping each dataset protocol to a NumPy array of + floats with shape [len(pr_ranks)]. + mean_recalls: Dict mapping each dataset protocol to a NumPy array of floats + with shape [len(pr_ranks)]. + pr_ranks: List of integers. + output_path: Full file path. + """ + with tf.io.gfile.GFile(output_path, 'w') as f: + for k in sorted(mean_average_precision.keys()): + f.write('{}\n mAP={}\n mP@k{} {}\n mR@k{} {}\n'.format( + k, np.around(mean_average_precision[k] * 100, decimals=2), + np.array(pr_ranks), np.around(mean_precisions[k] * 100, decimals=2), + np.array(pr_ranks), np.around(mean_recalls[k] * 100, decimals=2))) + + +def _ParseSpaceSeparatedStringsInBrackets(line, prefixes, ind): + """Parses line containing space-separated strings in brackets. + + Args: + line: String, containing line in metrics file with mP@k or mR@k figures. + prefixes: Tuple/list of strings, containing valid prefixes. + ind: Integer indicating which field within brackets is parsed. + + Yields: + entry: String format entry. + + Raises: + ValueError: If input line does not contain a valid prefix. + """ + for prefix in prefixes: + if line.startswith(prefix): + line = line[len(prefix):] + break + else: + raise ValueError('Line %s is malformed, cannot find valid prefixes' % line) + + for entry in line.split('[')[ind].split(']')[0].split(): + yield entry + + +def _ParsePrRanks(line): + """Parses PR ranks from mP@k line in metrics file. + + Args: + line: String, containing line in metrics file with mP@k figures. + + Returns: + pr_ranks: List of integers, containing used ranks. + + Raises: + ValueError: If input line is malformed. + """ + return [ + int(pr_rank) for pr_rank in _ParseSpaceSeparatedStringsInBrackets( + line, [' mP@k['], 0) if pr_rank + ] + + +def _ParsePrScores(line, num_pr_ranks): + """Parses PR scores from line in metrics file. + + Args: + line: String, containing line in metrics file with mP@k or mR@k figures. + num_pr_ranks: Integer, number of scores that should be in output list. + + Returns: + pr_scores: List of floats, containing scores. + + Raises: + ValueError: If input line is malformed. + """ + pr_scores = [ + float(pr_score) for pr_score in _ParseSpaceSeparatedStringsInBrackets( + line, (' mP@k[', ' mR@k['), 1) if pr_score + ] + + if len(pr_scores) != num_pr_ranks: + raise ValueError('Line %s is malformed, expected %d scores but found %d' % + (line, num_pr_ranks, len(pr_scores))) + + return pr_scores + + +def ReadMetricsFile(metrics_path): + """Reads aggregated retrieval metrics from text file. + + Args: + metrics_path: Full file path, containing aggregated retrieval metrics. + + Returns: + mean_average_precision: Dict mapping each dataset protocol to a float. + pr_ranks: List of integer ranks used in aggregated recall/precision metrics. + mean_precisions: Dict mapping each dataset protocol to a NumPy array of + floats with shape [len(`pr_ranks`)]. + mean_recalls: Dict mapping each dataset protocol to a NumPy array of floats + with shape [len(`pr_ranks`)]. + + Raises: + ValueError: If input file is malformed. + """ + with tf.io.gfile.GFile(metrics_path, 'r') as f: + file_contents_stripped = [l.rstrip() for l in f] + + if len(file_contents_stripped) % 4: + raise ValueError( + 'Malformed input %s: number of lines must be a multiple of 4, ' + 'but it is %d' % (metrics_path, len(file_contents_stripped))) + + mean_average_precision = {} + pr_ranks = [] + mean_precisions = {} + mean_recalls = {} + protocols = set() + for i in range(0, len(file_contents_stripped), 4): + protocol = file_contents_stripped[i] + if protocol in protocols: + raise ValueError( + 'Malformed input %s: protocol %s is found a second time' % + (metrics_path, protocol)) + protocols.add(protocol) + + # Parse mAP. + mean_average_precision[protocol] = float( + file_contents_stripped[i + 1].split('=')[1]) / 100.0 + + # Parse (or check consistency of) pr_ranks. + parsed_pr_ranks = _ParsePrRanks(file_contents_stripped[i + 2]) + if not pr_ranks: + pr_ranks = parsed_pr_ranks + else: + if parsed_pr_ranks != pr_ranks: + raise ValueError('Malformed input %s: inconsistent PR ranks' % + metrics_path) + + # Parse mean precisions. + mean_precisions[protocol] = np.array( + _ParsePrScores(file_contents_stripped[i + 2], len(pr_ranks)), + dtype=float) / 100.0 + + # Parse mean recalls. + mean_recalls[protocol] = np.array( + _ParsePrScores(file_contents_stripped[i + 3], len(pr_ranks)), + dtype=float) / 100.0 + + return mean_average_precision, pr_ranks, mean_precisions, mean_recalls + + +def CreateConfigForTestDataset(dataset, dir_main): + """Creates the configuration dictionary for the test dataset. + + Args: + dataset: String, dataset name: either 'roxford5k' or 'rparis6k'. + dir_main: String, path to the folder containing ground truth files. + + Returns: + cfg: Dataset configuration in a form of dictionary. The configuration + includes: + `gnd_fname` - path to the ground truth file for the dataset, + `ext` and `qext` - image extensions for the images in the test dataset + and the query images, + `dir_data` - path to the folder containing ground truth files, + `dir_images` - path to the folder containing images, + `n` and `nq` - number of images and query images in the dataset + respectively, + `im_fname` and `qim_fname` - functions providing paths for the dataset + and query images respectively, + `dataset` - test dataset name. + + Raises: + ValueError: If an unknown dataset name is provided as an argument. + """ + dataset = dataset.lower() + + def _ConfigImname(cfg, i): + return os.path.join(cfg['dir_images'], cfg['imlist'][i] + cfg['ext']) + + def _ConfigQimname(cfg, i): + return os.path.join(cfg['dir_images'], cfg['qimlist'][i] + cfg['qext']) + + if dataset not in DATASET_NAMES: + raise ValueError('Unknown dataset: {}!'.format(dataset)) + + # Loading imlist, qimlist, and gnd in configuration as a dictionary. + gnd_fname = os.path.join(dir_main, 'gnd_{}.pkl'.format(dataset)) + with tf.io.gfile.GFile(gnd_fname, 'rb') as f: + cfg = pickle.load(f) + cfg['gnd_fname'] = gnd_fname + if dataset == 'rparis6k': + dir_images = 'paris6k_images' + elif dataset == 'roxford5k': + dir_images = 'oxford5k_images' + + cfg['ext'] = '.jpg' + cfg['qext'] = '.jpg' + cfg['dir_data'] = os.path.join(dir_main) + cfg['dir_images'] = os.path.join(cfg['dir_data'], dir_images) + + cfg['n'] = len(cfg['imlist']) + cfg['nq'] = len(cfg['qimlist']) + + cfg['im_fname'] = _ConfigImname + cfg['qim_fname'] = _ConfigQimname + + cfg['dataset'] = dataset + + return cfg diff --git a/research/delf/delf/python/detect_to_retrieve/dataset_test.py b/research/delf/delf/python/datasets/revisited_op/dataset_test.py similarity index 99% rename from research/delf/delf/python/detect_to_retrieve/dataset_test.py rename to research/delf/delf/python/datasets/revisited_op/dataset_test.py index 8e742703b04210787ede0bfc945a9f305d59efc7..04caa64f098040a2d01f18b24d218a1b3478e257 100644 --- a/research/delf/delf/python/detect_to_retrieve/dataset_test.py +++ b/research/delf/delf/python/datasets/revisited_op/dataset_test.py @@ -24,7 +24,7 @@ from absl import flags import numpy as np import tensorflow as tf -from delf.python.detect_to_retrieve import dataset +from delf.python.datasets.revisited_op import dataset FLAGS = flags.FLAGS diff --git a/research/delf/delf/python/datasets/utils.py b/research/delf/delf/python/datasets/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..596fca99a5d13492dd107b51db628fd975253b2d --- /dev/null +++ b/research/delf/delf/python/datasets/utils.py @@ -0,0 +1,74 @@ +# Lint as: python3 +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Supporting functions for data loading.""" + +import numpy as np +from PIL import Image + +import tensorflow as tf +from delf import utils as image_loading_utils + + +def pil_imagenet_loader(path, imsize, bounding_box=None, preprocess=True): + """Pillow loader for the images. + + Args: + path: Path to image to be loaded. + imsize: Integer, defines the maximum size of longer image side. + bounding_box: (x1,y1,x2,y2) tuple to crop the query image. + preprocess: Bool, whether to preprocess the images in respect to the + ImageNet dataset. + + Returns: + image: `Tensor`, image in ImageNet suitable format. + """ + img = image_loading_utils.RgbLoader(path) + + if bounding_box is not None: + imfullsize = max(img.size) + img = img.crop(bounding_box) + imsize = imsize * max(img.size) / imfullsize + + # Unlike `resize`, `thumbnail` resizes to the largest size that preserves + # the aspect ratio, making sure that the output image does not exceed the + # original image size and the size specified in the arguments of thumbnail. + img.thumbnail((imsize, imsize), Image.ANTIALIAS) + img = np.array(img) + + if preprocess: + # Preprocessing for ImageNet data. Converts the images from RGB to BGR, + # then zero-centers each color channel with respect to the ImageNet + # dataset, without scaling. + tf.keras.applications.imagenet_utils.preprocess_input(img, mode='caffe') + + return img + + +def default_loader(path, imsize, bounding_box=None, preprocess=True): + """Default loader for the images is using Pillow. + + Args: + path: Path to image to be loaded. + imsize: Integer, defines the maximum size of longer image side. + bounding_box: (x1,y1,x2,y2) tuple to crop the query image. + preprocess: Bool, whether to preprocess the images in respect to the + ImageNet dataset. + + Returns: + image: `Tensor`, image in ImageNet suitable format. + """ + img = pil_imagenet_loader(path, imsize, bounding_box, preprocess) + return img diff --git a/research/delf/delf/python/datasets/utils_test.py b/research/delf/delf/python/datasets/utils_test.py new file mode 100644 index 0000000000000000000000000000000000000000..e38671bc1b73e6fc98be1ff05d87b49bd4235813 --- /dev/null +++ b/research/delf/delf/python/datasets/utils_test.py @@ -0,0 +1,76 @@ +# Lint as: python3 +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for dataset utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from absl import flags +import numpy as np +from PIL import Image +import tensorflow as tf + +from delf.python.datasets import utils as image_loading_utils + +FLAGS = flags.FLAGS + + +class UtilsTest(tf.test.TestCase): + + def testDefaultLoader(self): + # Create a dummy image. + dummy_image = np.random.rand(1024, 750, 3) * 255 + img_out = Image.fromarray(dummy_image.astype('uint8')).convert('RGB') + filename = os.path.join(FLAGS.test_tmpdir, 'test_image.png') + # Save the dummy image. + img_out.save(filename) + + max_img_size = 1024 + # Load the saved dummy image. + img = image_loading_utils.default_loader( + filename, imsize=max_img_size, preprocess=False) + + # Make sure the values are the same before and after loading. + self.assertAllEqual(np.array(img_out), img) + + self.assertAllLessEqual(tf.shape(img), max_img_size) + + def testDefaultLoaderWithBoundingBox(self): + # Create a dummy image. + dummy_image = np.random.rand(1024, 750, 3) * 255 + img_out = Image.fromarray(dummy_image.astype('uint8')).convert('RGB') + filename = os.path.join(FLAGS.test_tmpdir, 'test_image.png') + # Save the dummy image. + img_out.save(filename) + + max_img_size = 1024 + # Load the saved dummy image. + expected_size = 400 + img = image_loading_utils.default_loader( + filename, + imsize=max_img_size, + bounding_box=[120, 120, 120 + expected_size, 120 + expected_size], + preprocess=False) + + # Check that the final shape is as expected. + self.assertAllEqual(tf.shape(img), [expected_size, expected_size, 3]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/delf/delf/python/delg/DELG_INSTRUCTIONS.md b/research/delf/delf/python/delg/DELG_INSTRUCTIONS.md index 2b62ac29003e3d4b13a3ccc8fad5b43e236f96da..dc72422e87cb143a4e7029609f9354181c7540ef 100644 --- a/research/delf/delf/python/delg/DELG_INSTRUCTIONS.md +++ b/research/delf/delf/python/delg/DELG_INSTRUCTIONS.md @@ -34,26 +34,42 @@ mv paris6k_images_tmp/paris/*/*.jpg paris6k_images/ # Revisited annotations. wget http://cmp.felk.cvut.cz/revisitop/data/datasets/roxford5k/gnd_roxford5k.mat wget http://cmp.felk.cvut.cz/revisitop/data/datasets/rparis6k/gnd_rparis6k.mat +wget http://cmp.felk.cvut.cz/cnnimageretrieval/data/test/roxford5k/gnd_roxford5k.pkl +wget http://cmp.felk.cvut.cz/cnnimageretrieval/data/test/rparis6k/gnd_rparis6k.pkl ``` ### Download model -This is necessary to reproduce the main paper results: +This is necessary to reproduce the main paper results. This example shows the +R50-DELG model, pretrained on GLD; see the available pre-trained models +[here](../../../README.md#pre-trained-models), for other variants (eg, R101, +trained on GLDv2-clean). ```bash # From models/research/delf/delf/python/delg mkdir parameters && cd parameters -# DELG-GLD model. -wget http://storage.googleapis.com/delf/delg_gld_20200520.tar.gz -tar -xvzf delg_gld_20200520.tar.gz +# R50-DELG-GLD model. +wget http://storage.googleapis.com/delf/r50delg_gld_20200814.tar.gz +tar -xvzf r50delg_gld_20200814.tar.gz ``` ### Feature extraction -We present here commands for extraction on `roxford5k`. To extract on `rparis6k` -instead, please edit the arguments accordingly (especially the -`dataset_file_path` argument). +We present here commands for R50-DELG (pretrained on GLD) extraction on +`roxford5k`. + +- To use the R101-DELG model pretrained on GLD, first download it as mentioned + above; then, replace the below argument `delf_config_path` by + `r101delg_gld_config.pbtxt` +- To use the R50-DELG model pretrained on GLDv2-clean, first download it as + mentioned above; then, replace the below argument `delf_config_path` by + `r50delg_gldv2clean_config.pbtxt` +- To use the R101-DELG model pretrained on GLDv2-clean, first download it as + mentioned above; then, replace the below argument `delf_config_path` by + `r101delg_gldv2clean_config.pbtxt` +- To extract on `rparis6k` instead, please edit the arguments accordingly + (especially the `dataset_file_path` argument). #### Query feature extraction @@ -67,7 +83,7 @@ Query feature extraction can be run as follows: ```bash # From models/research/delf/delf/python/delg python3 extract_features.py \ - --delf_config_path delg_gld_config.pbtxt \ + --delf_config_path r50delg_gld_config.pbtxt \ --dataset_file_path ~/delg/data/gnd_roxford5k.mat \ --images_dir ~/delg/data/oxford5k_images \ --image_set query \ @@ -81,7 +97,7 @@ Run index feature extraction as follows: ```bash # From models/research/delf/delf/python/delg python3 extract_features.py \ - --delf_config_path delg_gld_config.pbtxt \ + --delf_config_path r50delg_gld_config.pbtxt \ --dataset_file_path ~/delg/data/gnd_roxford5k.mat \ --images_dir ~/delg/data/oxford5k_images \ --image_set index \ diff --git a/research/delf/delf/python/delg/delg_gld_config.pbtxt b/research/delf/delf/python/delg/delg_gld_config.pbtxt deleted file mode 100644 index a659a0a3ee502c31f7d4b71fd634803f94d425b7..0000000000000000000000000000000000000000 --- a/research/delf/delf/python/delg/delg_gld_config.pbtxt +++ /dev/null @@ -1,22 +0,0 @@ -use_local_features: true -use_global_features: true -model_path: "parameters/delg_gld_20200520" -image_scales: 0.25 -image_scales: 0.35355338 -image_scales: 0.5 -image_scales: 0.70710677 -image_scales: 1.0 -image_scales: 1.4142135 -image_scales: 2.0 -delf_local_config { - use_pca: false - max_feature_num: 1000 - score_threshold: 175.0 -} -delf_global_config { - use_pca: false - image_scales_ind: 3 - image_scales_ind: 4 - image_scales_ind: 5 -} -max_image_size: 1024 diff --git a/research/delf/delf/python/delg/extract_features.py b/research/delf/delf/python/delg/extract_features.py index ad65d66e69ddaa032d1201b34a2f10a04fe61eb5..4ef10dc9415b32015f359b3139fc847edb931d0b 100644 --- a/research/delf/delf/python/delg/extract_features.py +++ b/research/delf/delf/python/delg/extract_features.py @@ -1,3 +1,4 @@ +# Lint as: python3 # Copyright 2020 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +41,7 @@ from delf import delf_config_pb2 from delf import datum_io from delf import feature_io from delf import utils -from delf.python.detect_to_retrieve import dataset +from delf.python.datasets.revisited_op import dataset from delf import extractor FLAGS = flags.FLAGS diff --git a/research/delf/delf/python/delg/measure_latency.py b/research/delf/delf/python/delg/measure_latency.py index 21ffbda4179a191139ae35244c8ae34693594fd9..966964d1072006ab78fef034a87d0ff0b9cc4aa6 100644 --- a/research/delf/delf/python/delg/measure_latency.py +++ b/research/delf/delf/python/delg/measure_latency.py @@ -42,6 +42,11 @@ flags.DEFINE_string('list_images_path', '/tmp/list_images.txt', 'Path to list of images whose features will be extracted.') flags.DEFINE_integer('repeat_per_image', 10, 'Number of times to repeat extraction per image.') +flags.DEFINE_boolean( + 'binary_local_features', False, + 'Whether to binarize local features after extraction, and take this extra ' + 'latency into account. This should only be used if use_local_features is ' + 'set in the input DelfConfig from `delf_config_path`.') # Pace to report extraction log. _STATUS_CHECK_ITERATIONS = 100 @@ -103,6 +108,12 @@ def main(argv): # Extract and save features. extracted_features = extractor_fn(im) + # Binarize local features, if desired (and if there are local features). + if (config.use_local_features and FLAGS.binary_local_features and + extracted_features['local_features']['attention'].size): + packed_descriptors = np.packbits( + extracted_features['local_features']['descriptors'] > 0, axis=1) + if __name__ == '__main__': app.run(main) diff --git a/research/delf/delf/python/delg/perform_retrieval.py b/research/delf/delf/python/delg/perform_retrieval.py index fb53abb1a9e15a5d5a040be42213f325ab345163..dc380077c56eac410679bec1df06d365a58767e6 100644 --- a/research/delf/delf/python/delg/perform_retrieval.py +++ b/research/delf/delf/python/delg/perform_retrieval.py @@ -1,3 +1,4 @@ +# Lint as: python3 # Copyright 2020 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,7 +28,7 @@ import numpy as np import tensorflow as tf from delf import datum_io -from delf.python.detect_to_retrieve import dataset +from delf.python.datasets.revisited_op import dataset from delf.python.detect_to_retrieve import image_reranking FLAGS = flags.FLAGS @@ -44,15 +45,19 @@ flags.DEFINE_boolean( 'If True, performs re-ranking using local feature-based geometric ' 'verification.') flags.DEFINE_float( - 'local_feature_distance_threshold', 1.0, + 'local_descriptor_matching_threshold', 1.0, 'Optional, only used if `use_geometric_verification` is True. ' - 'Distance threshold below which a pair of local descriptors is considered ' + 'Threshold below which a pair of local descriptors is considered ' 'a potential match, and will be fed into RANSAC.') flags.DEFINE_float( 'ransac_residual_threshold', 20.0, 'Optional, only used if `use_geometric_verification` is True. ' 'Residual error threshold for considering matches as inliers, used in ' 'RANSAC algorithm.') +flags.DEFINE_boolean( + 'use_ratio_test', False, + 'Optional, only used if `use_geometric_verification` is True. ' + 'Whether to use ratio test for local feature matching.') flags.DEFINE_string( 'output_dir', '/tmp/retrieval', 'Directory where retrieval output will be written to. A file containing ' @@ -152,8 +157,10 @@ def main(argv): junk_ids=set(medium_ground_truth[i]['junk']), local_feature_extension=_DELG_LOCAL_EXTENSION, ransac_seed=0, - feature_distance_threshold=FLAGS.local_feature_distance_threshold, - ransac_residual_threshold=FLAGS.ransac_residual_threshold) + descriptor_matching_threshold=FLAGS + .local_descriptor_matching_threshold, + ransac_residual_threshold=FLAGS.ransac_residual_threshold, + use_ratio_test=FLAGS.use_ratio_test) hard_ranks_after_gv[i] = image_reranking.RerankByGeometricVerification( input_ranks=ranks_before_gv[i], initial_scores=similarities, @@ -164,8 +171,10 @@ def main(argv): junk_ids=set(hard_ground_truth[i]['junk']), local_feature_extension=_DELG_LOCAL_EXTENSION, ransac_seed=0, - feature_distance_threshold=FLAGS.local_feature_distance_threshold, - ransac_residual_threshold=FLAGS.ransac_residual_threshold) + descriptor_matching_threshold=FLAGS + .local_descriptor_matching_threshold, + ransac_residual_threshold=FLAGS.ransac_residual_threshold, + use_ratio_test=FLAGS.use_ratio_test) elapsed = (time.time() - start) print('done! Retrieval for query %d took %f seconds' % (i, elapsed)) diff --git a/research/delf/delf/python/delg/r101delg_gld_config.pbtxt b/research/delf/delf/python/delg/r101delg_gld_config.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..ea8a70b53df6f94758428d108097e5a1d5a132fa --- /dev/null +++ b/research/delf/delf/python/delg/r101delg_gld_config.pbtxt @@ -0,0 +1,22 @@ +use_local_features: true +use_global_features: true +model_path: "parameters/r101delg_gld_20200814" +image_scales: 0.25 +image_scales: 0.35355338 +image_scales: 0.5 +image_scales: 0.70710677 +image_scales: 1.0 +image_scales: 1.4142135 +image_scales: 2.0 +delf_local_config { + use_pca: false + max_feature_num: 1000 + score_threshold: 166.1 +} +delf_global_config { + use_pca: false + image_scales_ind: 3 + image_scales_ind: 4 + image_scales_ind: 5 +} +max_image_size: 1024 diff --git a/research/delf/delf/python/delg/r101delg_gldv2clean_config.pbtxt b/research/delf/delf/python/delg/r101delg_gldv2clean_config.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..d34a039a4eada86b8820c6bb7106f1c0f8fcd25e --- /dev/null +++ b/research/delf/delf/python/delg/r101delg_gldv2clean_config.pbtxt @@ -0,0 +1,22 @@ +use_local_features: true +use_global_features: true +model_path: "parameters/r101delg_gldv2clean_20200914" +image_scales: 0.25 +image_scales: 0.35355338 +image_scales: 0.5 +image_scales: 0.70710677 +image_scales: 1.0 +image_scales: 1.4142135 +image_scales: 2.0 +delf_local_config { + use_pca: false + max_feature_num: 1000 + score_threshold: 357.48 +} +delf_global_config { + use_pca: false + image_scales_ind: 3 + image_scales_ind: 4 + image_scales_ind: 5 +} +max_image_size: 1024 diff --git a/research/delf/delf/python/delg/r50delg_gld_config.pbtxt b/research/delf/delf/python/delg/r50delg_gld_config.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..4457810b57514b6a3ba7d7289d1af327ce23282f --- /dev/null +++ b/research/delf/delf/python/delg/r50delg_gld_config.pbtxt @@ -0,0 +1,22 @@ +use_local_features: true +use_global_features: true +model_path: "parameters/r50delg_gld_20200814" +image_scales: 0.25 +image_scales: 0.35355338 +image_scales: 0.5 +image_scales: 0.70710677 +image_scales: 1.0 +image_scales: 1.4142135 +image_scales: 2.0 +delf_local_config { + use_pca: false + max_feature_num: 1000 + score_threshold: 175.0 +} +delf_global_config { + use_pca: false + image_scales_ind: 3 + image_scales_ind: 4 + image_scales_ind: 5 +} +max_image_size: 1024 diff --git a/research/delf/delf/python/delg/r50delg_gldv2clean_config.pbtxt b/research/delf/delf/python/delg/r50delg_gldv2clean_config.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..358d7cbe56c70b3433bd48f8f81b75694306482f --- /dev/null +++ b/research/delf/delf/python/delg/r50delg_gldv2clean_config.pbtxt @@ -0,0 +1,22 @@ +use_local_features: true +use_global_features: true +model_path: "parameters/r50delg_gldv2clean_20200914" +image_scales: 0.25 +image_scales: 0.35355338 +image_scales: 0.5 +image_scales: 0.70710677 +image_scales: 1.0 +image_scales: 1.4142135 +image_scales: 2.0 +delf_local_config { + use_pca: false + max_feature_num: 1000 + score_threshold: 454.6 +} +delf_global_config { + use_pca: false + image_scales_ind: 3 + image_scales_ind: 4 + image_scales_ind: 5 +} +max_image_size: 1024 diff --git a/research/delf/delf/python/detect_to_retrieve/__init__.py b/research/delf/delf/python/detect_to_retrieve/__init__.py index 06972a7d06738da1dc50e832c4e8443b0e6fb5b6..82a78321eb8d1c78eb1a2b8cd3969623d5071c5c 100644 --- a/research/delf/delf/python/detect_to_retrieve/__init__.py +++ b/research/delf/delf/python/detect_to_retrieve/__init__.py @@ -20,5 +20,4 @@ from __future__ import print_function # pylint: disable=unused-import from delf.python.detect_to_retrieve import aggregation_extraction from delf.python.detect_to_retrieve import boxes_and_features_extraction -from delf.python.detect_to_retrieve import dataset # pylint: enable=unused-import diff --git a/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py b/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py index ec18f306b20d2a702c8b488726d164de7817c262..f77d47b1db9eadc0124e69775c5420c318e9adae 100644 --- a/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py +++ b/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py @@ -1,3 +1,4 @@ +# Lint as: python3 # Copyright 2019 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -33,12 +34,12 @@ import os import sys import time +from absl import app import numpy as np import tensorflow as tf -from tensorflow.python.platform import app from delf import feature_io -from delf.python.detect_to_retrieve import dataset +from delf.python.datasets.revisited_op import dataset cmd_args = None diff --git a/research/delf/delf/python/detect_to_retrieve/dataset.py b/research/delf/delf/python/detect_to_retrieve/dataset.py deleted file mode 100644 index 9a1e6b247895aa7bd8022d3a2fb87b878bbb3b38..0000000000000000000000000000000000000000 --- a/research/delf/delf/python/detect_to_retrieve/dataset.py +++ /dev/null @@ -1,469 +0,0 @@ -# Copyright 2019 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Python library to parse ground-truth/evaluate on Revisited datasets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from scipy.io import matlab -import tensorflow as tf - -_GROUND_TRUTH_KEYS = ['easy', 'hard', 'junk'] - - -def ReadDatasetFile(dataset_file_path): - """Reads dataset file in Revisited Oxford/Paris ".mat" format. - - Args: - dataset_file_path: Path to dataset file, in .mat format. - - Returns: - query_list: List of query image names. - index_list: List of index image names. - ground_truth: List containing ground-truth information for dataset. Each - entry is a dict corresponding to the ground-truth information for a query. - The dict may have keys 'easy', 'hard', or 'junk', mapping to a NumPy - array of integers; additionally, it has a key 'bbx' mapping to a NumPy - array of floats with bounding box coordinates. - """ - with tf.io.gfile.GFile(dataset_file_path, 'rb') as f: - cfg = matlab.loadmat(f) - - # Parse outputs according to the specificities of the dataset file. - query_list = [str(im_array[0]) for im_array in np.squeeze(cfg['qimlist'])] - index_list = [str(im_array[0]) for im_array in np.squeeze(cfg['imlist'])] - ground_truth_raw = np.squeeze(cfg['gnd']) - ground_truth = [] - for query_ground_truth_raw in ground_truth_raw: - query_ground_truth = {} - for ground_truth_key in _GROUND_TRUTH_KEYS: - if ground_truth_key in query_ground_truth_raw.dtype.names: - adjusted_labels = query_ground_truth_raw[ground_truth_key] - 1 - query_ground_truth[ground_truth_key] = adjusted_labels.flatten() - - query_ground_truth['bbx'] = np.squeeze(query_ground_truth_raw['bbx']) - ground_truth.append(query_ground_truth) - - return query_list, index_list, ground_truth - - -def _ParseGroundTruth(ok_list, junk_list): - """Constructs dictionary of ok/junk indices for a data subset and query. - - Args: - ok_list: List of NumPy arrays containing true positive indices for query. - junk_list: List of NumPy arrays containing ignored indices for query. - - Returns: - ok_junk_dict: Dict mapping 'ok' and 'junk' strings to NumPy array of - indices. - """ - ok_junk_dict = {} - ok_junk_dict['ok'] = np.concatenate(ok_list) - ok_junk_dict['junk'] = np.concatenate(junk_list) - return ok_junk_dict - - -def ParseEasyMediumHardGroundTruth(ground_truth): - """Parses easy/medium/hard ground-truth from Revisited datasets. - - Args: - ground_truth: Usually the output from ReadDatasetFile(). List containing - ground-truth information for dataset. Each entry is a dict corresponding - to the ground-truth information for a query. The dict must have keys - 'easy', 'hard', and 'junk', mapping to a NumPy array of integers. - - Returns: - easy_ground_truth: List containing ground-truth information for easy subset - of dataset. Each entry is a dict corresponding to the ground-truth - information for a query. The dict has keys 'ok' and 'junk', mapping to a - NumPy array of integers. - medium_ground_truth: Same as `easy_ground_truth`, but for the medium subset. - hard_ground_truth: Same as `easy_ground_truth`, but for the hard subset. - """ - num_queries = len(ground_truth) - - easy_ground_truth = [] - medium_ground_truth = [] - hard_ground_truth = [] - for i in range(num_queries): - easy_ground_truth.append( - _ParseGroundTruth([ground_truth[i]['easy']], - [ground_truth[i]['junk'], ground_truth[i]['hard']])) - medium_ground_truth.append( - _ParseGroundTruth([ground_truth[i]['easy'], ground_truth[i]['hard']], - [ground_truth[i]['junk']])) - hard_ground_truth.append( - _ParseGroundTruth([ground_truth[i]['hard']], - [ground_truth[i]['junk'], ground_truth[i]['easy']])) - - return easy_ground_truth, medium_ground_truth, hard_ground_truth - - -def AdjustPositiveRanks(positive_ranks, junk_ranks): - """Adjusts positive ranks based on junk ranks. - - Args: - positive_ranks: Sorted 1D NumPy integer array. - junk_ranks: Sorted 1D NumPy integer array. - - Returns: - adjusted_positive_ranks: Sorted 1D NumPy array. - """ - if not junk_ranks.size: - return positive_ranks - - adjusted_positive_ranks = positive_ranks - j = 0 - for i, positive_index in enumerate(positive_ranks): - while (j < len(junk_ranks) and positive_index > junk_ranks[j]): - j += 1 - - adjusted_positive_ranks[i] -= j - - return adjusted_positive_ranks - - -def ComputeAveragePrecision(positive_ranks): - """Computes average precision according to dataset convention. - - It assumes that `positive_ranks` contains the ranks for all expected positive - index images to be retrieved. If `positive_ranks` is empty, returns - `average_precision` = 0. - - Note that average precision computation here does NOT use the finite sum - method (see - https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision) - which is common in information retrieval literature. Instead, the method - implemented here integrates over the precision-recall curve by averaging two - adjacent precision points, then multiplying by the recall step. This is the - convention for the Revisited Oxford/Paris datasets. - - Args: - positive_ranks: Sorted 1D NumPy integer array, zero-indexed. - - Returns: - average_precision: Float. - """ - average_precision = 0.0 - - num_expected_positives = len(positive_ranks) - if not num_expected_positives: - return average_precision - - recall_step = 1.0 / num_expected_positives - for i, rank in enumerate(positive_ranks): - if not rank: - left_precision = 1.0 - else: - left_precision = i / rank - - right_precision = (i + 1) / (rank + 1) - average_precision += (left_precision + right_precision) * recall_step / 2 - - return average_precision - - -def ComputePRAtRanks(positive_ranks, desired_pr_ranks): - """Computes precision/recall at desired ranks. - - It assumes that `positive_ranks` contains the ranks for all expected positive - index images to be retrieved. If `positive_ranks` is empty, return all-zeros - `precisions`/`recalls`. - - If a desired rank is larger than the last positive rank, its precision is - computed based on the last positive rank. For example, if `desired_pr_ranks` - is [10] and `positive_ranks` = [0, 7] --> `precisions` = [0.25], `recalls` = - [1.0]. - - Args: - positive_ranks: 1D NumPy integer array, zero-indexed. - desired_pr_ranks: List of integers containing the desired precision/recall - ranks to be reported. Eg, if precision@1/recall@1 and - precision@10/recall@10 are desired, this should be set to [1, 10]. - - Returns: - precisions: Precision @ `desired_pr_ranks` (NumPy array of - floats, with shape [len(desired_pr_ranks)]). - recalls: Recall @ `desired_pr_ranks` (NumPy array of floats, with - shape [len(desired_pr_ranks)]). - """ - num_desired_pr_ranks = len(desired_pr_ranks) - precisions = np.zeros([num_desired_pr_ranks]) - recalls = np.zeros([num_desired_pr_ranks]) - - num_expected_positives = len(positive_ranks) - if not num_expected_positives: - return precisions, recalls - - positive_ranks_one_indexed = positive_ranks + 1 - for i, desired_pr_rank in enumerate(desired_pr_ranks): - recalls[i] = np.sum( - positive_ranks_one_indexed <= desired_pr_rank) / num_expected_positives - - # If `desired_pr_rank` is larger than last positive's rank, only compute - # precision with respect to last positive's position. - precision_rank = min(max(positive_ranks_one_indexed), desired_pr_rank) - precisions[i] = np.sum( - positive_ranks_one_indexed <= precision_rank) / precision_rank - - return precisions, recalls - - -def ComputeMetrics(sorted_index_ids, ground_truth, desired_pr_ranks): - """Computes metrics for retrieval results on the Revisited datasets. - - If there are no valid ground-truth index images for a given query, the metric - results for the given query (`average_precisions`, `precisions` and `recalls`) - are set to NaN, and they are not taken into account when computing the - aggregated metrics (`mean_average_precision`, `mean_precisions` and - `mean_recalls`) over all queries. - - Args: - sorted_index_ids: Integer NumPy array of shape [#queries, #index_images]. - For each query, contains an array denoting the most relevant index images, - sorted from most to least relevant. - ground_truth: List containing ground-truth information for dataset. Each - entry is a dict corresponding to the ground-truth information for a query. - The dict has keys 'ok' and 'junk', mapping to a NumPy array of integers. - desired_pr_ranks: List of integers containing the desired precision/recall - ranks to be reported. Eg, if precision@1/recall@1 and - precision@10/recall@10 are desired, this should be set to [1, 10]. The - largest item should be <= #index_images. - - Returns: - mean_average_precision: Mean average precision (float). - mean_precisions: Mean precision @ `desired_pr_ranks` (NumPy array of - floats, with shape [len(desired_pr_ranks)]). - mean_recalls: Mean recall @ `desired_pr_ranks` (NumPy array of floats, with - shape [len(desired_pr_ranks)]). - average_precisions: Average precision for each query (NumPy array of floats, - with shape [#queries]). - precisions: Precision @ `desired_pr_ranks`, for each query (NumPy array of - floats, with shape [#queries, len(desired_pr_ranks)]). - recalls: Recall @ `desired_pr_ranks`, for each query (NumPy array of - floats, with shape [#queries, len(desired_pr_ranks)]). - - Raises: - ValueError: If largest desired PR rank in `desired_pr_ranks` > - #index_images. - """ - num_queries, num_index_images = sorted_index_ids.shape - num_desired_pr_ranks = len(desired_pr_ranks) - - sorted_desired_pr_ranks = sorted(desired_pr_ranks) - - if sorted_desired_pr_ranks[-1] > num_index_images: - raise ValueError( - 'Requested PR ranks up to %d, however there are only %d images' % - (sorted_desired_pr_ranks[-1], num_index_images)) - - # Instantiate all outputs, then loop over each query and gather metrics. - mean_average_precision = 0.0 - mean_precisions = np.zeros([num_desired_pr_ranks]) - mean_recalls = np.zeros([num_desired_pr_ranks]) - average_precisions = np.zeros([num_queries]) - precisions = np.zeros([num_queries, num_desired_pr_ranks]) - recalls = np.zeros([num_queries, num_desired_pr_ranks]) - num_empty_gt_queries = 0 - for i in range(num_queries): - ok_index_images = ground_truth[i]['ok'] - junk_index_images = ground_truth[i]['junk'] - - if not ok_index_images.size: - average_precisions[i] = float('nan') - precisions[i, :] = float('nan') - recalls[i, :] = float('nan') - num_empty_gt_queries += 1 - continue - - positive_ranks = np.arange(num_index_images)[np.in1d( - sorted_index_ids[i], ok_index_images)] - junk_ranks = np.arange(num_index_images)[np.in1d(sorted_index_ids[i], - junk_index_images)] - - adjusted_positive_ranks = AdjustPositiveRanks(positive_ranks, junk_ranks) - - average_precisions[i] = ComputeAveragePrecision(adjusted_positive_ranks) - precisions[i, :], recalls[i, :] = ComputePRAtRanks(adjusted_positive_ranks, - desired_pr_ranks) - - mean_average_precision += average_precisions[i] - mean_precisions += precisions[i, :] - mean_recalls += recalls[i, :] - - # Normalize aggregated metrics by number of queries. - num_valid_queries = num_queries - num_empty_gt_queries - mean_average_precision /= num_valid_queries - mean_precisions /= num_valid_queries - mean_recalls /= num_valid_queries - - return (mean_average_precision, mean_precisions, mean_recalls, - average_precisions, precisions, recalls) - - -def SaveMetricsFile(mean_average_precision, mean_precisions, mean_recalls, - pr_ranks, output_path): - """Saves aggregated retrieval metrics to text file. - - Args: - mean_average_precision: Dict mapping each dataset protocol to a float. - mean_precisions: Dict mapping each dataset protocol to a NumPy array of - floats with shape [len(pr_ranks)]. - mean_recalls: Dict mapping each dataset protocol to a NumPy array of floats - with shape [len(pr_ranks)]. - pr_ranks: List of integers. - output_path: Full file path. - """ - with tf.io.gfile.GFile(output_path, 'w') as f: - for k in sorted(mean_average_precision.keys()): - f.write('{}\n mAP={}\n mP@k{} {}\n mR@k{} {}\n'.format( - k, np.around(mean_average_precision[k] * 100, decimals=2), - np.array(pr_ranks), np.around(mean_precisions[k] * 100, decimals=2), - np.array(pr_ranks), np.around(mean_recalls[k] * 100, decimals=2))) - - -def _ParseSpaceSeparatedStringsInBrackets(line, prefixes, ind): - """Parses line containing space-separated strings in brackets. - - Args: - line: String, containing line in metrics file with mP@k or mR@k figures. - prefixes: Tuple/list of strings, containing valid prefixes. - ind: Integer indicating which field within brackets is parsed. - - Yields: - entry: String format entry. - - Raises: - ValueError: If input line does not contain a valid prefix. - """ - for prefix in prefixes: - if line.startswith(prefix): - line = line[len(prefix):] - break - else: - raise ValueError('Line %s is malformed, cannot find valid prefixes' % line) - - for entry in line.split('[')[ind].split(']')[0].split(): - yield entry - - -def _ParsePrRanks(line): - """Parses PR ranks from mP@k line in metrics file. - - Args: - line: String, containing line in metrics file with mP@k figures. - - Returns: - pr_ranks: List of integers, containing used ranks. - - Raises: - ValueError: If input line is malformed. - """ - return [ - int(pr_rank) for pr_rank in _ParseSpaceSeparatedStringsInBrackets( - line, [' mP@k['], 0) if pr_rank - ] - - -def _ParsePrScores(line, num_pr_ranks): - """Parses PR scores from line in metrics file. - - Args: - line: String, containing line in metrics file with mP@k or mR@k figures. - num_pr_ranks: Integer, number of scores that should be in output list. - - Returns: - pr_scores: List of floats, containing scores. - - Raises: - ValueError: If input line is malformed. - """ - pr_scores = [ - float(pr_score) for pr_score in _ParseSpaceSeparatedStringsInBrackets( - line, (' mP@k[', ' mR@k['), 1) if pr_score - ] - - if len(pr_scores) != num_pr_ranks: - raise ValueError('Line %s is malformed, expected %d scores but found %d' % - (line, num_pr_ranks, len(pr_scores))) - - return pr_scores - - -def ReadMetricsFile(metrics_path): - """Reads aggregated retrieval metrics from text file. - - Args: - metrics_path: Full file path, containing aggregated retrieval metrics. - - Returns: - mean_average_precision: Dict mapping each dataset protocol to a float. - pr_ranks: List of integer ranks used in aggregated recall/precision metrics. - mean_precisions: Dict mapping each dataset protocol to a NumPy array of - floats with shape [len(`pr_ranks`)]. - mean_recalls: Dict mapping each dataset protocol to a NumPy array of floats - with shape [len(`pr_ranks`)]. - - Raises: - ValueError: If input file is malformed. - """ - with tf.io.gfile.GFile(metrics_path, 'r') as f: - file_contents_stripped = [l.rstrip() for l in f] - - if len(file_contents_stripped) % 4: - raise ValueError( - 'Malformed input %s: number of lines must be a multiple of 4, ' - 'but it is %d' % (metrics_path, len(file_contents_stripped))) - - mean_average_precision = {} - pr_ranks = [] - mean_precisions = {} - mean_recalls = {} - protocols = set() - for i in range(0, len(file_contents_stripped), 4): - protocol = file_contents_stripped[i] - if protocol in protocols: - raise ValueError( - 'Malformed input %s: protocol %s is found a second time' % - (metrics_path, protocol)) - protocols.add(protocol) - - # Parse mAP. - mean_average_precision[protocol] = float( - file_contents_stripped[i + 1].split('=')[1]) / 100.0 - - # Parse (or check consistency of) pr_ranks. - parsed_pr_ranks = _ParsePrRanks(file_contents_stripped[i + 2]) - if not pr_ranks: - pr_ranks = parsed_pr_ranks - else: - if parsed_pr_ranks != pr_ranks: - raise ValueError('Malformed input %s: inconsistent PR ranks' % - metrics_path) - - # Parse mean precisions. - mean_precisions[protocol] = np.array( - _ParsePrScores(file_contents_stripped[i + 2], len(pr_ranks)), - dtype=float) / 100.0 - - # Parse mean recalls. - mean_recalls[protocol] = np.array( - _ParsePrScores(file_contents_stripped[i + 3], len(pr_ranks)), - dtype=float) / 100.0 - - return mean_average_precision, pr_ranks, mean_precisions, mean_recalls diff --git a/research/delf/delf/python/detect_to_retrieve/extract_aggregation.py b/research/delf/delf/python/detect_to_retrieve/extract_aggregation.py index f9a0fb3e6c62c0adc583ad3b30b809f36742d586..451c4137d93b9b37cedd487487e98b76854db87d 100644 --- a/research/delf/delf/python/detect_to_retrieve/extract_aggregation.py +++ b/research/delf/delf/python/detect_to_retrieve/extract_aggregation.py @@ -25,9 +25,9 @@ from __future__ import print_function import argparse import sys -from tensorflow.python.platform import app +from absl import app +from delf.python.datasets.revisited_op import dataset from delf.python.detect_to_retrieve import aggregation_extraction -from delf.python.detect_to_retrieve import dataset cmd_args = None diff --git a/research/delf/delf/python/detect_to_retrieve/extract_index_boxes_and_features.py b/research/delf/delf/python/detect_to_retrieve/extract_index_boxes_and_features.py index 2b891de4b0b093aa723c0dce547c2722ee475d7e..80bd721c87423174ce9559f9d9bc161a6adda5da 100644 --- a/research/delf/delf/python/detect_to_retrieve/extract_index_boxes_and_features.py +++ b/research/delf/delf/python/detect_to_retrieve/extract_index_boxes_and_features.py @@ -31,9 +31,9 @@ import argparse import os import sys -from tensorflow.python.platform import app +from absl import app +from delf.python.datasets.revisited_op import dataset from delf.python.detect_to_retrieve import boxes_and_features_extraction -from delf.python.detect_to_retrieve import dataset cmd_args = None diff --git a/research/delf/delf/python/detect_to_retrieve/extract_query_features.py b/research/delf/delf/python/detect_to_retrieve/extract_query_features.py index a0812b191265ec6e5350acf989432747d196a519..2ff4a5a23f50cc6753cff066ca510ad5e639ba1e 100644 --- a/research/delf/delf/python/detect_to_retrieve/extract_query_features.py +++ b/research/delf/delf/python/detect_to_retrieve/extract_query_features.py @@ -1,3 +1,4 @@ +# Lint as: python3 # Copyright 2017 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,15 +31,15 @@ import os import sys import time +from absl import app import numpy as np import tensorflow as tf from google.protobuf import text_format -from tensorflow.python.platform import app from delf import delf_config_pb2 from delf import feature_io from delf import utils -from delf.python.detect_to_retrieve import dataset +from delf.python.datasets.revisited_op import dataset from delf import extractor cmd_args = None @@ -75,8 +76,8 @@ def main(argv): query_image_name = query_list[i] input_image_filename = os.path.join(cmd_args.images_dir, query_image_name + _IMAGE_EXTENSION) - output_feature_filename = os.path.join( - cmd_args.output_features_dir, query_image_name + _DELF_EXTENSION) + output_feature_filename = os.path.join(cmd_args.output_features_dir, + query_image_name + _DELF_EXTENSION) if tf.io.gfile.exists(output_feature_filename): print(f'Skipping {query_image_name}') continue @@ -93,8 +94,7 @@ def main(argv): attention_out = extracted_features['local_features']['attention'] feature_io.WriteToFile(output_feature_filename, locations_out, - feature_scales_out, descriptors_out, - attention_out) + feature_scales_out, descriptors_out, attention_out) elapsed = (time.time() - start) print('Processed %d query images in %f seconds' % (num_images, elapsed)) diff --git a/research/delf/delf/python/detect_to_retrieve/image_reranking.py b/research/delf/delf/python/detect_to_retrieve/image_reranking.py index 60c29cc18a4436815c721855da0ca4577b06e6c4..8c115835d638e4ec2337233bdfeeebe6e5800f93 100644 --- a/research/delf/delf/python/detect_to_retrieve/image_reranking.py +++ b/research/delf/delf/python/detect_to_retrieve/image_reranking.py @@ -47,12 +47,13 @@ def MatchFeatures(query_locations, index_image_locations, index_image_descriptors, ransac_seed=None, - feature_distance_threshold=0.9, + descriptor_matching_threshold=0.9, ransac_residual_threshold=10.0, query_im_array=None, index_im_array=None, query_im_scale_factors=None, - index_im_scale_factors=None): + index_im_scale_factors=None, + use_ratio_test=False): """Matches local features using geometric verification. First, finds putative local feature matches by matching `query_descriptors` @@ -70,8 +71,10 @@ def MatchFeatures(query_locations, index_image_descriptors: Descriptors of local features for index image. NumPy array of shape [#index_image_features, depth]. ransac_seed: Seed used by RANSAC. If None (default), no seed is provided. - feature_distance_threshold: Distance threshold below which a pair of - features is considered a potential match, and will be fed into RANSAC. + descriptor_matching_threshold: Threshold below which a pair of local + descriptors is considered a potential match, and will be fed into RANSAC. + If use_ratio_test==False, this is a simple distance threshold. If + use_ratio_test==True, this is Lowe's ratio test threshold. ransac_residual_threshold: Residual error threshold for considering matches as inliers, used in RANSAC algorithm. query_im_array: Optional. If not None, contains a NumPy array with the query @@ -83,6 +86,8 @@ def MatchFeatures(query_locations, (ie, feature locations are not scaled). index_im_scale_factors: Optional. Same as `query_im_scale_factors`, but for index image. + use_ratio_test: If True, descriptor matching is performed via ratio test, + instead of distance-based threshold. Returns: score: Number of inliers of match. If no match is found, returns 0. @@ -105,22 +110,38 @@ def MatchFeatures(query_locations, 'Local feature dimensionality is not consistent for query and index ' 'images.') - # Find nearest-neighbor matches using a KD tree. + # Construct KD-tree used to find nearest neighbors. index_image_tree = spatial.cKDTree(index_image_descriptors) - _, indices = index_image_tree.query( - query_descriptors, distance_upper_bound=feature_distance_threshold) - - # Select feature locations for putative matches. - query_locations_to_use = np.array([ - query_locations[i,] - for i in range(num_features_query) - if indices[i] != num_features_index_image - ]) - index_image_locations_to_use = np.array([ - index_image_locations[indices[i],] - for i in range(num_features_query) - if indices[i] != num_features_index_image - ]) + if use_ratio_test: + distances, indices = index_image_tree.query( + query_descriptors, k=2, n_jobs=-1) + query_locations_to_use = np.array([ + query_locations[i,] + for i in range(num_features_query) + if distances[i][0] < descriptor_matching_threshold * distances[i][1] + ]) + index_image_locations_to_use = np.array([ + index_image_locations[indices[i][0],] + for i in range(num_features_query) + if distances[i][0] < descriptor_matching_threshold * distances[i][1] + ]) + else: + _, indices = index_image_tree.query( + query_descriptors, + distance_upper_bound=descriptor_matching_threshold, + n_jobs=-1) + + # Select feature locations for putative matches. + query_locations_to_use = np.array([ + query_locations[i,] + for i in range(num_features_query) + if indices[i] != num_features_index_image + ]) + index_image_locations_to_use = np.array([ + index_image_locations[indices[i],] + for i in range(num_features_query) + if indices[i] != num_features_index_image + ]) # If there are not enough putative matches, early return 0. if query_locations_to_use.shape[0] <= _MIN_RANSAC_SAMPLES: @@ -175,8 +196,9 @@ def RerankByGeometricVerification(input_ranks, junk_ids, local_feature_extension=_DELF_EXTENSION, ransac_seed=None, - feature_distance_threshold=0.9, - ransac_residual_threshold=10.0): + descriptor_matching_threshold=0.9, + ransac_residual_threshold=10.0, + use_ratio_test=False): """Re-ranks retrieval results using geometric verification. Args: @@ -195,10 +217,11 @@ def RerankByGeometricVerification(input_ranks, local_feature_extension: String, extension to use for loading local feature files. ransac_seed: Seed used by RANSAC. If None (default), no seed is provided. - feature_distance_threshold: Distance threshold below which a pair of local - features is considered a potential match, and will be fed into RANSAC. + descriptor_matching_threshold: Threshold used for local descriptor matching. ransac_residual_threshold: Residual error threshold for considering matches as inliers, used in RANSAC algorithm. + use_ratio_test: If True, descriptor matching is performed via ratio test, + instead of distance-based threshold. Returns: output_ranks: 1D NumPy array with index image indices, sorted from the most @@ -258,8 +281,9 @@ def RerankByGeometricVerification(input_ranks, index_image_locations, index_image_descriptors, ransac_seed=ransac_seed, - feature_distance_threshold=feature_distance_threshold, - ransac_residual_threshold=ransac_residual_threshold) + descriptor_matching_threshold=descriptor_matching_threshold, + ransac_residual_threshold=ransac_residual_threshold, + use_ratio_test=use_ratio_test) # Sort based on (inliers_score, initial_score). def _InliersInitialScoresSorting(k): diff --git a/research/delf/delf/python/detect_to_retrieve/perform_retrieval.py b/research/delf/delf/python/detect_to_retrieve/perform_retrieval.py index c2034dfb285118f4ed8928f996e031365a3ffbbf..2b7a22789259a6ba35ffbe99c0c7725fbc2f7ae5 100644 --- a/research/delf/delf/python/detect_to_retrieve/perform_retrieval.py +++ b/research/delf/delf/python/detect_to_retrieve/perform_retrieval.py @@ -1,3 +1,4 @@ +# Lint as: python3 # Copyright 2019 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -23,15 +24,15 @@ import os import sys import time +from absl import app import numpy as np import tensorflow as tf from google.protobuf import text_format -from tensorflow.python.platform import app from delf import aggregation_config_pb2 from delf import datum_io from delf import feature_aggregation_similarity -from delf.python.detect_to_retrieve import dataset +from delf.python.datasets.revisited_op import dataset from delf.python.detect_to_retrieve import image_reranking cmd_args = None diff --git a/research/delf/delf/python/examples/extract_boxes.py b/research/delf/delf/python/examples/extract_boxes.py index 8851c44fb9a051104adde50a4c869a28cfd513da..1a3b4886a39680ed4d293d3b1116d5a19aac9db6 100644 --- a/research/delf/delf/python/examples/extract_boxes.py +++ b/research/delf/delf/python/examples/extract_boxes.py @@ -27,12 +27,12 @@ import os import sys import time +from absl import app import matplotlib.patches as patches import matplotlib.pyplot as plt import numpy as np import tensorflow as tf -from tensorflow.python.platform import app from delf import box_io from delf import utils from delf import detector @@ -153,17 +153,14 @@ def main(argv): print('Starting to detect objects in images...') elif i % _STATUS_CHECK_ITERATIONS == 0: elapsed = (time.time() - start) - print( - f'Processing image {i} out of {num_images}, last ' - f'{_STATUS_CHECK_ITERATIONS} images took {elapsed} seconds' - ) + print(f'Processing image {i} out of {num_images}, last ' + f'{_STATUS_CHECK_ITERATIONS} images took {elapsed} seconds') start = time.time() # If descriptor already exists, skip its computation. base_boxes_filename, _ = os.path.splitext(os.path.basename(image_path)) out_boxes_filename = base_boxes_filename + _BOX_EXT - out_boxes_fullpath = os.path.join(cmd_args.output_dir, - out_boxes_filename) + out_boxes_fullpath = os.path.join(cmd_args.output_dir, out_boxes_filename) if tf.io.gfile.exists(out_boxes_fullpath): print(f'Skipping {image_path}') continue @@ -173,8 +170,7 @@ def main(argv): # Extract and save boxes. (boxes_out, scores_out, class_indices_out) = detector_fn(im) (selected_boxes, selected_scores, - selected_class_indices) = _FilterBoxesByScore(boxes_out[0], - scores_out[0], + selected_class_indices) = _FilterBoxesByScore(boxes_out[0], scores_out[0], class_indices_out[0], cmd_args.detector_thresh) @@ -182,8 +178,7 @@ def main(argv): selected_class_indices) if cmd_args.output_viz_dir: out_viz_filename = base_boxes_filename + _VIZ_SUFFIX - out_viz_fullpath = os.path.join(cmd_args.output_viz_dir, - out_viz_filename) + out_viz_fullpath = os.path.join(cmd_args.output_viz_dir, out_viz_filename) _PlotBoxesAndSaveImage(im[0], selected_boxes, out_viz_fullpath) diff --git a/research/delf/delf/python/examples/extract_features.py b/research/delf/delf/python/examples/extract_features.py index 05fd77316070d39722e133dbd544f5b53791f6d0..1b55cba9fb6f0773a0db5cbe63d5234c02c474cb 100644 --- a/research/delf/delf/python/examples/extract_features.py +++ b/research/delf/delf/python/examples/extract_features.py @@ -27,12 +27,12 @@ import os import sys import time +from absl import app import numpy as np from six.moves import range import tensorflow as tf from google.protobuf import text_format -from tensorflow.python.platform import app from delf import delf_config_pb2 from delf import feature_io from delf import utils @@ -87,10 +87,8 @@ def main(unused_argv): print('Starting to extract DELF features from images...') elif i % _STATUS_CHECK_ITERATIONS == 0: elapsed = (time.time() - start) - print( - f'Processing image {i} out of {num_images}, last ' - f'{_STATUS_CHECK_ITERATIONS} images took {elapsed} seconds' - ) + print(f'Processing image {i} out of {num_images}, last ' + f'{_STATUS_CHECK_ITERATIONS} images took {elapsed} seconds') start = time.time() # If descriptor already exists, skip its computation. diff --git a/research/delf/delf/python/examples/extractor.py b/research/delf/delf/python/examples/extractor.py index db7b80c9916df6e19006bc00841dabd320c64704..a6932b1de58592bacfef7fd79c03581161560f26 100644 --- a/research/delf/delf/python/examples/extractor.py +++ b/research/delf/delf/python/examples/extractor.py @@ -19,81 +19,18 @@ from __future__ import division from __future__ import print_function import numpy as np -from PIL import Image import tensorflow as tf from delf import datum_io from delf import feature_extractor +from delf import utils -# Minimum dimensions below which DELF features are not extracted (empty +# Minimum dimensions below which features are not extracted (empty # features are returned). This applies after any resizing is performed. _MIN_HEIGHT = 10 _MIN_WIDTH = 10 -def ResizeImage(image, config, resize_factor=1.0): - """Resizes image according to config. - - Args: - image: Uint8 array with shape (height, width, 3). - config: DelfConfig proto containing the model configuration. - resize_factor: Optional float resize factor for the input image. If given, - the maximum and minimum allowed image sizes in `config` are scaled by this - factor. Must be non-negative. - - Returns: - resized_image: Uint8 array with resized image. - scale_factors: 2D float array, with factors used for resizing along height - and width (If upscaling, larger than 1; if downscaling, smaller than 1). - - Raises: - ValueError: If `image` has incorrect number of dimensions/channels. - """ - if resize_factor < 0.0: - raise ValueError('negative resize_factor is not allowed: %f' % - resize_factor) - if image.ndim != 3: - raise ValueError('image has incorrect number of dimensions: %d' % - image.ndims) - height, width, channels = image.shape - - # Take into account resize factor. - max_image_size = resize_factor * config.max_image_size - min_image_size = resize_factor * config.min_image_size - - if channels != 3: - raise ValueError('image has incorrect number of channels: %d' % channels) - - largest_side = max(width, height) - - if max_image_size >= 0 and largest_side > max_image_size: - scale_factor = max_image_size / largest_side - elif min_image_size >= 0 and largest_side < min_image_size: - scale_factor = min_image_size / largest_side - elif config.use_square_images and (height != width): - scale_factor = 1.0 - else: - # No resizing needed, early return. - return image, np.ones(2, dtype=float) - - # Note that new_shape is in (width, height) format (PIL convention), while - # scale_factors are in (height, width) convention (NumPy convention). - if config.use_square_images: - new_shape = (int(round(largest_side * scale_factor)), - int(round(largest_side * scale_factor))) - else: - new_shape = (int(round(width * scale_factor)), - int(round(height * scale_factor))) - - scale_factors = np.array([new_shape[1] / height, new_shape[0] / width], - dtype=float) - - pil_image = Image.fromarray(image) - resized_image = np.array(pil_image.resize(new_shape, resample=Image.BILINEAR)) - - return resized_image, scale_factors - - def MakeExtractor(config): """Creates a function to extract global and/or local features from an image. @@ -106,18 +43,21 @@ def MakeExtractor(config): Raises: ValueError: if config is invalid. """ - # Assert the configuration - if config.use_global_features and hasattr( - config, 'is_tf2_exported') and config.is_tf2_exported: - raise ValueError('use_global_features is incompatible with is_tf2_exported') + # Assert the configuration. + if not config.use_local_features and not config.use_global_features: + raise ValueError('Invalid config: at least one of ' + '{use_local_features, use_global_features} must be True') # Load model. model = tf.saved_model.load(config.model_path) - # Input/output end-points/tensors. + # Input image scales to use for extraction. + image_scales_tensor = tf.convert_to_tensor(list(config.image_scales)) + + # Input (feeds) and output (fetches) end-points. These are only needed when + # using a model that was exported using TF1. feeds = ['input_image:0', 'input_scales:0'] fetches = [] - image_scales_tensor = tf.convert_to_tensor(list(config.image_scales)) # Custom configuration needed when local features are used. if config.use_local_features: @@ -159,8 +99,14 @@ def MakeExtractor(config): # Custom configuration needed when global features are used. if config.use_global_features: - # Extra output end-point. + # Extra input/output end-points/tensors. + feeds.append('input_global_scales_ind:0') fetches.append('global_descriptors:0') + if config.delf_global_config.image_scales_ind: + global_scales_ind_tensor = tf.constant( + list(config.delf_global_config.image_scales_ind)) + else: + global_scales_ind_tensor = tf.range(len(config.image_scales)) # If using PCA, pre-load required parameters. global_pca_parameters = {} @@ -206,7 +152,7 @@ def MakeExtractor(config): features (key 'local_features' mapping to a dict with keys 'locations', 'descriptors', 'scales', 'attention'). """ - resized_image, scale_factors = ResizeImage( + resized_image, scale_factors = utils.ResizeImage( image, config, resize_factor=resize_factor) # If the image is too small, returns empty features. @@ -231,9 +177,21 @@ def MakeExtractor(config): extracted_features = {} output = None - if config.use_local_features: - if hasattr(config, 'is_tf2_exported') and config.is_tf2_exported: - predict = model.signatures['serving_default'] + if hasattr(config, 'is_tf2_exported') and config.is_tf2_exported: + predict = model.signatures['serving_default'] + if config.use_local_features and config.use_global_features: + output_dict = predict( + input_image=image_tensor, + input_scales=image_scales_tensor, + input_max_feature_num=max_feature_num_tensor, + input_abs_thres=score_threshold_tensor, + input_global_scales_ind=global_scales_ind_tensor) + output = [ + output_dict['boxes'], output_dict['features'], + output_dict['scales'], output_dict['scores'], + output_dict['global_descriptors'] + ] + elif config.use_local_features: output_dict = predict( input_image=image_tensor, input_scales=image_scales_tensor, @@ -244,23 +202,29 @@ def MakeExtractor(config): output_dict['scales'], output_dict['scores'] ] else: + output_dict = predict( + input_image=image_tensor, + input_scales=image_scales_tensor, + input_global_scales_ind=global_scales_ind_tensor) + output = [output_dict['global_descriptors']] + else: + if config.use_local_features and config.use_global_features: + output = model(image_tensor, image_scales_tensor, + score_threshold_tensor, max_feature_num_tensor, + global_scales_ind_tensor) + elif config.use_local_features: output = model(image_tensor, image_scales_tensor, score_threshold_tensor, max_feature_num_tensor) - else: - output = model(image_tensor, image_scales_tensor) + else: + output = model(image_tensor, image_scales_tensor, + global_scales_ind_tensor) # Post-process extracted features: normalize, PCA (optional), pooling. if config.use_global_features: raw_global_descriptors = output[-1] - if config.delf_global_config.image_scales_ind: - raw_global_descriptors_selected_scales = tf.gather( - raw_global_descriptors, - list(config.delf_global_config.image_scales_ind)) - else: - raw_global_descriptors_selected_scales = raw_global_descriptors global_descriptors_per_scale = feature_extractor.PostProcessDescriptors( - raw_global_descriptors_selected_scales, - config.delf_global_config.use_pca, global_pca_parameters) + raw_global_descriptors, config.delf_global_config.use_pca, + global_pca_parameters) unnormalized_global_descriptor = tf.reduce_sum( global_descriptors_per_scale, axis=0, name='sum_pooling') global_descriptor = tf.nn.l2_normalize( @@ -281,7 +245,8 @@ def MakeExtractor(config): feature_extractor.DelfFeaturePostProcessing( boxes, raw_local_descriptors, config.delf_local_config.use_pca, local_pca_parameters)) - locations /= scale_factors + if not config.delf_local_config.use_resized_coordinates: + locations /= scale_factors extracted_features.update({ 'local_features': { diff --git a/research/delf/delf/python/examples/extractor_test.py b/research/delf/delf/python/examples/extractor_test.py deleted file mode 100644 index aa560c75a5ca7f8a48247eb7636643e2369c0e5e..0000000000000000000000000000000000000000 --- a/research/delf/delf/python/examples/extractor_test.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2019 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for DELF feature extractor.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from delf import delf_config_pb2 -from delf import extractor - - -class ExtractorTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - ('Max-1Min-1', -1, -1, 1.0, False, [4, 2, 3], [1.0, 1.0]), - ('Max-1Min-1Square', -1, -1, 1.0, True, [4, 4, 3], [1.0, 2.0]), - ('Max2Min-1', 2, -1, 1.0, False, [2, 1, 3], [0.5, 0.5]), - ('Max2Min-1Square', 2, -1, 1.0, True, [2, 2, 3], [0.5, 1.0]), - ('Max8Min-1', 8, -1, 1.0, False, [4, 2, 3], [1.0, 1.0]), - ('Max8Min-1Square', 8, -1, 1.0, True, [4, 4, 3], [1.0, 2.0]), - ('Max-1Min1', -1, 1, 1.0, False, [4, 2, 3], [1.0, 1.0]), - ('Max-1Min1Square', -1, 1, 1.0, True, [4, 4, 3], [1.0, 2.0]), - ('Max-1Min8', -1, 8, 1.0, False, [8, 4, 3], [2.0, 2.0]), - ('Max-1Min8Square', -1, 8, 1.0, True, [8, 8, 3], [2.0, 4.0]), - ('Max16Min8', 16, 8, 1.0, False, [8, 4, 3], [2.0, 2.0]), - ('Max16Min8Square', 16, 8, 1.0, True, [8, 8, 3], [2.0, 4.0]), - ('Max2Min2', 2, 2, 1.0, False, [2, 1, 3], [0.5, 0.5]), - ('Max2Min2Square', 2, 2, 1.0, True, [2, 2, 3], [0.5, 1.0]), - ('Max-1Min-1Factor0.5', -1, -1, 0.5, False, [4, 2, 3], [1.0, 1.0]), - ('Max-1Min-1Factor0.5Square', -1, -1, 0.5, True, [4, 4, 3], [1.0, 2.0]), - ('Max2Min-1Factor2.0', 2, -1, 2.0, False, [4, 2, 3], [1.0, 1.0]), - ('Max2Min-1Factor2.0Square', 2, -1, 2.0, True, [4, 4, 3], [1.0, 2.0]), - ('Max-1Min8Factor0.5', -1, 8, 0.5, False, [4, 2, 3], [1.0, 1.0]), - ('Max-1Min8Factor0.5Square', -1, 8, 0.5, True, [4, 4, 3], [1.0, 2.0]), - ('Max-1Min8Factor0.25', -1, 8, 0.25, False, [4, 2, 3], [1.0, 1.0]), - ('Max-1Min8Factor0.25Square', -1, 8, 0.25, True, [4, 4, 3], [1.0, 2.0]), - ('Max2Min2Factor2.0', 2, 2, 2.0, False, [4, 2, 3], [1.0, 1.0]), - ('Max2Min2Factor2.0Square', 2, 2, 2.0, True, [4, 4, 3], [1.0, 2.0]), - ('Max16Min8Factor0.5', 16, 8, 0.5, False, [4, 2, 3], [1.0, 1.0]), - ('Max16Min8Factor0.5Square', 16, 8, 0.5, True, [4, 4, 3], [1.0, 2.0]), - ) - def testResizeImageWorks(self, max_image_size, min_image_size, resize_factor, - square_output, expected_shape, - expected_scale_factors): - # Construct image of size 4x2x3. - image = np.array([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [3, 3, 3]], - [[4, 4, 4], [5, 5, 5]], [[6, 6, 6], [7, 7, 7]]], - dtype='uint8') - - # Set up config. - config = delf_config_pb2.DelfConfig( - max_image_size=max_image_size, - min_image_size=min_image_size, - use_square_images=square_output) - - resized_image, scale_factors = extractor.ResizeImage( - image, config, resize_factor) - self.assertAllEqual(resized_image.shape, expected_shape) - self.assertAllClose(scale_factors, expected_scale_factors) - - @parameterized.named_parameters( - ('Max2Min2', 2, 2, 1.0, False, [2, 1, 3], [0.666666, 0.5]), - ('Max2Min2Square', 2, 2, 1.0, True, [2, 2, 3], [0.666666, 1.0]), - ) - def testResizeImageRoundingWorks(self, max_image_size, min_image_size, - resize_factor, square_output, expected_shape, - expected_scale_factors): - # Construct image of size 3x2x3. - image = np.array([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [3, 3, 3]], - [[4, 4, 4], [5, 5, 5]]], - dtype='uint8') - - # Set up config. - config = delf_config_pb2.DelfConfig( - max_image_size=max_image_size, - min_image_size=min_image_size, - use_square_images=square_output) - - resized_image, scale_factors = extractor.ResizeImage( - image, config, resize_factor) - self.assertAllEqual(resized_image.shape, expected_shape) - self.assertAllClose(scale_factors, expected_scale_factors) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/delf/delf/python/examples/match_images.py b/research/delf/delf/python/examples/match_images.py index bb030739cb9067bf3be50f999368af622f083b54..f14f93f9eb568b06678e7cfc1162f8e653aa6d91 100644 --- a/research/delf/delf/python/examples/match_images.py +++ b/research/delf/delf/python/examples/match_images.py @@ -1,3 +1,4 @@ +# Lint as: python3 # Copyright 2017 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -27,6 +28,7 @@ from __future__ import print_function import argparse import sys +from absl import app import matplotlib # Needed before pyplot import for matplotlib to work properly. matplotlib.use('Agg') @@ -38,7 +40,6 @@ from skimage import feature from skimage import measure from skimage import transform -from tensorflow.python.platform import app from delf import feature_io cmd_args = None diff --git a/research/delf/delf/python/google_landmarks_dataset/README.md b/research/delf/delf/python/google_landmarks_dataset/README.md deleted file mode 100644 index 485c1a946b5b21ddb369cc1bc8645534abbfad1e..0000000000000000000000000000000000000000 --- a/research/delf/delf/python/google_landmarks_dataset/README.md +++ /dev/null @@ -1,123 +0,0 @@ -## GLDv2 code/models - -[![Paper](http://img.shields.io/badge/paper-arXiv.2004.01804-B3181B.svg)](https://arxiv.org/abs/2004.01804) - -These instructions can be used to reproduce results from the -[GLDv2 paper](https://arxiv.org/abs/2004.01804). We present here results on the -Revisited Oxford/Paris datasets since they are smaller and quicker to -reproduce -- but note that a very similar procedure can be used to obtain -results on the GLDv2 retrieval or recognition datasets. - -Note that this directory also contains code to compute GLDv2 metrics: see -`compute_retrieval_metrics.py`, `compute_recognition_metrics.py` and associated -file reading / metric computation modules. - -For more details on the dataset, please refer to its -[website](https://github.com/cvdfoundation/google-landmark). - -### Install DELF library - -To be able to use this code, please follow -[these instructions](../../../INSTALL_INSTRUCTIONS.md) to properly install the -DELF library. - -### Download Revisited Oxford/Paris datasets - -```bash -mkdir -p ~/revisitop/data && cd ~/revisitop/data - -# Oxford dataset. -wget http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/oxbuild_images.tgz -mkdir oxford5k_images -tar -xvzf oxbuild_images.tgz -C oxford5k_images/ - -# Paris dataset. Download and move all images to same directory. -wget http://www.robots.ox.ac.uk/~vgg/data/parisbuildings/paris_1.tgz -wget http://www.robots.ox.ac.uk/~vgg/data/parisbuildings/paris_2.tgz -mkdir paris6k_images_tmp -tar -xvzf paris_1.tgz -C paris6k_images_tmp/ -tar -xvzf paris_2.tgz -C paris6k_images_tmp/ -mkdir paris6k_images -mv paris6k_images_tmp/paris/*/*.jpg paris6k_images/ - -# Revisited annotations. -wget http://cmp.felk.cvut.cz/revisitop/data/datasets/roxford5k/gnd_roxford5k.mat -wget http://cmp.felk.cvut.cz/revisitop/data/datasets/rparis6k/gnd_rparis6k.mat -``` - -### Download model - -```bash -# From models/research/delf/delf/python/google_landmarks_dataset -mkdir parameters && cd parameters - -# RN101-ArcFace model trained on GLDv2-clean. -wget https://storage.googleapis.com/delf/rn101_af_gldv2clean_20200521.tar.gz -tar -xvzf rn101_af_gldv2clean_20200521.tar.gz -``` - -### Feature extraction - -We present here commands for extraction on `roxford5k`. To extract on `rparis6k` -instead, please edit the arguments accordingly (especially the -`dataset_file_path` argument). - -#### Query feature extraction - -In the Revisited Oxford/Paris experimental protocol, query images must be the -cropped before feature extraction (this is done in the `extract_features` -script, when setting `image_set=query`). Note that this is specific to these -datasets, and not required for the GLDv2 retrieval/recognition datasets. - -Run query feature extraction as follows: - -```bash -# From models/research/delf/delf/python/google_landmarks_dataset -python3 ../delg/extract_features.py \ - --delf_config_path rn101_af_gldv2clean_config.pbtxt \ - --dataset_file_path ~/revisitop/data/gnd_roxford5k.mat \ - --images_dir ~/revisitop/data/oxford5k_images \ - --image_set query \ - --output_features_dir ~/revisitop/data/oxford5k_features/query -``` - -#### Index feature extraction - -Run index feature extraction as follows: - -```bash -# From models/research/delf/delf/python/google_landmarks_dataset -python3 ../delg/extract_features.py \ - --delf_config_path rn101_af_gldv2clean_config.pbtxt \ - --dataset_file_path ~/revisitop/data/gnd_roxford5k.mat \ - --images_dir ~/revisitop/data/oxford5k_images \ - --image_set index \ - --output_features_dir ~/revisitop/data/oxford5k_features/index -``` - -### Perform retrieval - -To run retrieval on `roxford5k`, the following command can be used: - -```bash -# From models/research/delf/delf/python/google_landmarks_dataset -python3 ../delg/perform_retrieval.py \ - --dataset_file_path ~/revisitop/data/gnd_roxford5k.mat \ - --query_features_dir ~/revisitop/data/oxford5k_features/query \ - --index_features_dir ~/revisitop/data/oxford5k_features/index \ - --output_dir ~/revisitop/results/oxford5k -``` - -A file with named `metrics.txt` will be written to the path given in -`output_dir`. The contents should look approximately like: - -``` -hard - mAP=55.54 - mP@k[ 1 5 10] [88.57 80.86 70.14] - mR@k[ 1 5 10] [19.46 33.65 42.44] -medium - mAP=76.23 - mP@k[ 1 5 10] [95.71 92.86 90.43] - mR@k[ 1 5 10] [10.17 25.96 35.29] -``` diff --git a/research/delf/delf/python/normalization_layers/__init__.py b/research/delf/delf/python/normalization_layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9064f503de15c1aa115c5e1ff2f4a5345aadf2af --- /dev/null +++ b/research/delf/delf/python/normalization_layers/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/research/delf/delf/python/normalization_layers/normalization.py b/research/delf/delf/python/normalization_layers/normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..cfb75da753581bd453fa5168c72dc72bc59eda2b --- /dev/null +++ b/research/delf/delf/python/normalization_layers/normalization.py @@ -0,0 +1,40 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Normalization layer definitions.""" + +import tensorflow as tf + + +class L2Normalization(tf.keras.layers.Layer): + """Normalization layer using L2 norm.""" + + def __init__(self): + """Initialization of the L2Normalization layer.""" + super(L2Normalization, self).__init__() + # A lower bound value for the norm. + self.eps = 1e-6 + + def call(self, x, axis=1): + """Invokes the L2Normalization instance. + + Args: + x: A Tensor. + axis: Dimension along which to normalize. A scalar or a vector of + integers. + + Returns: + norm: A Tensor with the same shape as `x`. + """ + return tf.nn.l2_normalize(x, axis, epsilon=self.eps) diff --git a/research/delf/delf/python/normalization_layers/normalization_test.py b/research/delf/delf/python/normalization_layers/normalization_test.py new file mode 100644 index 0000000000000000000000000000000000000000..ea302566c6969538b5dd6fdea441fd94c2585df9 --- /dev/null +++ b/research/delf/delf/python/normalization_layers/normalization_test.py @@ -0,0 +1,36 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for normalization layers.""" + +import tensorflow as tf + +from delf.python.normalization_layers import normalization + + +class NormalizationsTest(tf.test.TestCase): + + def testL2Normalization(self): + x = tf.constant([-4.0, 0.0, 4.0]) + layer = normalization.L2Normalization() + # Run tested function. + result = layer(x, axis=0) + # Define expected result. + exp_output = [-0.70710677, 0.0, 0.70710677] + # Compare actual and expected. + self.assertAllClose(exp_output, result) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/delf/delf/python/pooling_layers/__init__.py b/research/delf/delf/python/pooling_layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9064f503de15c1aa115c5e1ff2f4a5345aadf2af --- /dev/null +++ b/research/delf/delf/python/pooling_layers/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/research/delf/delf/python/pooling_layers/pooling.py b/research/delf/delf/python/pooling_layers/pooling.py new file mode 100644 index 0000000000000000000000000000000000000000..8244a414b31d2a35919fd7836c9bb577eae08fc1 --- /dev/null +++ b/research/delf/delf/python/pooling_layers/pooling.py @@ -0,0 +1,194 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Pooling layers definitions.""" + +import tensorflow as tf + + +class MAC(tf.keras.layers.Layer): + """Global max pooling (MAC) layer. + + Maximum Activations of Convolutions (MAC) is simply constructed by + max-pooling over all dimensions per feature map. See + https://arxiv.org/abs/1511.05879 for a reference. + """ + + def call(self, x, axis=None): + """Invokes the MAC pooling instance. + + Args: + x: [B, H, W, D] A float32 Tensor. + axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced. + + Returns: + output: [B, D] A float32 Tensor. + """ + if axis is None: + axis = [1, 2] + return mac(x, axis=axis) + + +class SPoC(tf.keras.layers.Layer): + """Average pooling (SPoC) layer. + + Sum-pooled convolutional features (SPoC) is based on the sum pooling of the + deep features. See https://arxiv.org/pdf/1510.07493.pdf for a reference. + """ + + def call(self, x, axis=None): + """Invokes the SPoC instance. + + Args: + x: [B, H, W, D] A float32 Tensor. + axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced. + + Returns: + output: [B, D] A float32 Tensor. + """ + if axis is None: + axis = [1, 2] + return spoc(x, axis) + + +class GeM(tf.keras.layers.Layer): + """Generalized mean pooling (GeM) layer. + + Generalized Mean Pooling (GeM) computes the generalized mean of each + channel in a tensor. See https://arxiv.org/abs/1711.02512 for a reference. + """ + + def __init__(self, power=3.): + """Initialization of the generalized mean pooling (GeM) layer. + + Args: + power: Float power > 0 is an inverse exponent parameter, used during the + generalized mean pooling computation. Setting this exponent as power > 1 + increases the contrast of the pooled feature map and focuses on the + salient features of the image. GeM is a generalization of the average + pooling commonly used in classification networks (power = 1) and of + spatial max-pooling layer (power = inf). + """ + super(GeM, self).__init__() + self.power = power + self.eps = 1e-6 + + def call(self, x, axis=None): + """Invokes the GeM instance. + + Args: + x: [B, H, W, D] A float32 Tensor. + axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced. + + Returns: + output: [B, D] A float32 Tensor. + """ + if axis is None: + axis = [1, 2] + return gem(x, power=self.power, eps=self.eps, axis=axis) + + +class GeMPooling2D(tf.keras.layers.Layer): + """Generalized mean pooling (GeM) pooling operation for spatial data.""" + + def __init__(self, + power=20., + pool_size=(2, 2), + strides=None, + padding='valid', + data_format='channels_last'): + """Initialization of GeMPooling2D. + + Args: + power: Float, power > 0. is an inverse exponent parameter (GeM power). + pool_size: Integer or tuple of 2 integers, factors by which to downscale + (vertical, horizontal) + strides: Integer, tuple of 2 integers, or None. Strides values. If None, + it will default to `pool_size`. + padding: One of `valid` or `same`. `valid` means no padding. `same` + results in padding evenly to the left/right or up/down of the input such + that output has the same height/width dimension as the input. + data_format: A string, one of `channels_last` (default) or + `channels_first`. The ordering of the dimensions in the inputs. + `channels_last` corresponds to inputs with shape `(batch, height, width, + channels)` while `channels_first` corresponds to inputs with shape + `(batch, channels, height, width)`. + """ + super(GeMPooling2D, self).__init__() + self.power = power + self.eps = 1e-6 + self.pool_size = pool_size + self.strides = strides + self.padding = padding.upper() + data_format_conv = { + 'channels_last': 'NHWC', + 'channels_first': 'NCHW', + } + self.data_format = data_format_conv[data_format] + + def call(self, x): + tmp = tf.pow(x, self.power) + tmp = tf.nn.avg_pool(tmp, self.pool_size, self.strides, self.padding, + self.data_format) + out = tf.pow(tmp, 1. / self.power) + return out + + +def mac(x, axis=None): + """Performs global max pooling (MAC). + + Args: + x: [B, H, W, D] A float32 Tensor. + axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced. + + Returns: + output: [B, D] A float32 Tensor. + """ + if axis is None: + axis = [1, 2] + return tf.reduce_max(x, axis=axis, keepdims=False) + + +def spoc(x, axis=None): + """Performs average pooling (SPoC). + + Args: + x: [B, H, W, D] A float32 Tensor. + axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced. + + Returns: + output: [B, D] A float32 Tensor. + """ + if axis is None: + axis = [1, 2] + return tf.reduce_mean(x, axis=axis, keepdims=False) + + +def gem(x, axis=None, power=3., eps=1e-6): + """Performs generalized mean pooling (GeM). + + Args: + x: [B, H, W, D] A float32 Tensor. + axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced. + power: Float, power > 0 is an inverse exponent parameter (GeM power). + eps: Float, parameter for numerical stability. + + Returns: + output: [B, D] A float32 Tensor. + """ + if axis is None: + axis = [1, 2] + tmp = tf.pow(tf.maximum(x, eps), power) + out = tf.pow(tf.reduce_mean(tmp, axis=axis, keepdims=False), 1. / power) + return out diff --git a/research/delf/delf/python/pooling_layers/pooling_test.py b/research/delf/delf/python/pooling_layers/pooling_test.py new file mode 100644 index 0000000000000000000000000000000000000000..78653550e4582c32f59a1caab05fddb45518e2df --- /dev/null +++ b/research/delf/delf/python/pooling_layers/pooling_test.py @@ -0,0 +1,84 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for pooling layers.""" + +import tensorflow as tf + +from delf.python.pooling_layers import pooling + + +class PoolingsTest(tf.test.TestCase): + + def testMac(self): + x = tf.constant([[[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]]]) + # Run tested function. + result = pooling.mac(x) + # Define expected result. + exp_output = [[6., 7.]] + # Compare actual and expected. + self.assertAllClose(exp_output, result) + + def testSpoc(self): + x = tf.constant([[[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]]]) + # Run tested function. + result = pooling.spoc(x) + # Define expected result. + exp_output = [[3., 4.]] + # Compare actual and expected. + self.assertAllClose(exp_output, result) + + def testGem(self): + x = tf.constant([[[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]]]) + # Run tested function. + result = pooling.gem(x, power=3., eps=1e-6) + # Define expected result. + exp_output = [[4.1601677, 4.9866314]] + # Compare actual and expected. + self.assertAllClose(exp_output, result) + + def testGeMPooling2D(self): + # Create a testing tensor. + x = tf.constant([[[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]]]) + x = tf.reshape(x, [1, 3, 3, 1]) + + # Checking GeMPooling2D relation to MaxPooling2D for the large values of + # `p`. + max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), + strides=(1, 1), padding='valid') + out_max = max_pool_2d(x) + gem_pool_2d = pooling.GeMPooling2D(power=30., pool_size=(2, 2), + strides=(1, 1), padding='valid') + out_gem_max = gem_pool_2d(x) + + # Check that for large `p` GeMPooling2D is close to MaxPooling2D. + self.assertAllEqual(out_max, tf.round(out_gem_max)) + + # Checking GeMPooling2D relation to AveragePooling2D for the value + # of `p` = 1. + avg_pool_2d = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), + strides=(1, 1), + padding='valid') + out_avg = avg_pool_2d(x) + gem_pool_2d = pooling.GeMPooling2D(power=1., pool_size=(2, 2), + strides=(1, 1), padding='valid') + out_gem_avg = gem_pool_2d(x) + # Check that for `p` equals 1., GeMPooling2D becomes AveragePooling2D. + self.assertAllEqual(out_avg, out_gem_avg) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/delf/delf/python/training/README.md b/research/delf/delf/python/training/README.md index 6712ee2e41116c320bbf6c2c231138c552fa443f..41ea2a0b47f6cbee4be31e6689ba3b77e6aefdd7 100644 --- a/research/delf/delf/python/training/README.md +++ b/research/delf/delf/python/training/README.md @@ -1,7 +1,7 @@ -# DELF Training Instructions +# DELF/DELG Training Instructions -This README documents the end-to-end process for training a landmark detection -and retrieval model using the DELF library on the +This README documents the end-to-end process for training a local and/or global +image feature model on the [Google Landmarks Dataset v2](https://github.com/cvdfoundation/google-landmark) (GLDv2). This can be achieved following these steps: @@ -143,6 +143,8 @@ curl -Os http://storage.googleapis.com/delf/resnet50_imagenet_weights.tar.gz tar -xzvf resnet50_imagenet_weights.tar.gz ``` +### Training with Local Features + Assuming the TFRecord files were generated in the `gldv2_dataset/tfrecord/` directory, running the following command should start training a model and output the results in the `gldv2_training` directory: @@ -156,26 +158,76 @@ python3 train.py \ --logdir=gldv2_training/ ``` -On a multi-GPU machine the batch size can be increased to speed up the training -using the `--batch_size` parameter. On a 8 Tesla P100 GPUs machine you can set -the batch size to `256`: +*NOTE: The `--use_autoencoder` parameter is set by default to `True`, therefore +the model will be by default trained with the autoencoder.* + +### Training with Local and Global Features + +It is also possible to train the model with an improved global features head as +introduced in the [DELG paper](https://arxiv.org/abs/2001.05027). To do this, +specify the additional parameter `--delg_global_features` when launching the +training, like in the following example: ``` ---batch_size=256 +python3 train.py \ + --train_file_pattern=gldv2_dataset/tfrecord/train* \ + --validation_file_pattern=gldv2_dataset/tfrecord/validation* \ + --imagenet_checkpoint=resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5 \ + --dataset_version=gld_v2_clean \ + --logdir=gldv2_training/ \ + --delg_global_features ``` +*NOTE: The `--use_autoencoder` parameter is set by default to `True`, therefore +the model will be by default trained with the autoencoder.* + +### Hyperparameter Guidelines + +In order to improve the convergence of the training, the following +hyperparameter values have been tested and validated on the following +infrastructures, the remaining `train.py` flags keeping their **default +values**: +* 8 Tesla P100 GPUs: `--batch_size=256`, `--initial_lr=0.01` +* 4 Tesla P100 GPUs: `--batch_size=128`, `--initial_lr=0.005` + ## Exporting the Trained Model Assuming the training output, the TensorFlow checkpoint, is in the `gldv2_training` directory, running the following commands exports the model. -### DELF local feature model +### DELF local feature-only model + +This should be used when you are only interested in having a local feature +model. + +``` +python3 model/export_local_model.py \ + --ckpt_path=gldv2_training/delf_weights \ + --export_path=gldv2_model_local +``` + +### DELG global feature-only model + +This should be used when you are only interested in having a global feature +model. ``` -python3 model/export_model.py \ +python3 model/export_global_model.py \ --ckpt_path=gldv2_training/delf_weights \ - --export_path=gldv2_model_local \ - --block3_strides + --export_path=gldv2_model_global \ + --delg_global_features +``` + +### DELG local+global feature model + +This should be used when you are interested in jointly extracting local and +global features. + +``` +python3 model/export_local_and_global_model.py \ + --ckpt_path=gldv2_training/delf_weights \ + --export_path=gldv2_model_local_and_global \ + --delg_global_features ``` ### Kaggle-compatible global feature model @@ -184,6 +236,13 @@ To export a global feature model in the format required by the [2020 Landmark Retrieval challenge](https://www.kaggle.com/c/landmark-retrieval-2020), you can use the following command: +*NOTE*: this command is helpful to use the model directly in the above-mentioned +Kaggle competition; however, this is a different format than the one required in +this DELF/DELG codebase (ie, if you export the model this way, the commands +found in the [DELG instructions](../delg/DELG_INSTRUCTIONS.md) would not work). +To export the model in a manner compatible to this codebase, use a similar +command as the "DELG global feature-only model" above. + ``` python3 model/export_global_model.py \ --ckpt_path=gldv2_training/delf_weights \ @@ -193,7 +252,9 @@ python3 model/export_global_model.py \ --normalize_global_descriptor ``` -## Testing the Trained Model +## Testing the trained model + +### Testing the trained local feature model After the trained model has been exported, it can be used to extract DELF features from 2 images of the same landmark and to perform a matching test @@ -266,3 +327,13 @@ python3 ../examples/match_images.py \ The generated image `matched_images.png` should look similar to this one: ![MatchedImagesDemo](./matched_images_demo.png) + +### Testing the trained global (or global+local) feature model + +Please follow the [DELG instructions](../delg/DELG_INSTRUCTIONS.md). The only +modification should be to pass a different `delf_config_path` when doing feature +extraction, which should point to the newly-trained model. As described in the +[DelfConfig](../../protos/delf_config.proto), you should set the +`use_local_features` and `use_global_features` in the right way, depending on +which feature modalities you are using. Note also that you should set +`is_tf2_exported` to `true`. diff --git a/research/delf/delf/python/training/datasets/__init__.py b/research/delf/delf/python/training/datasets/__init__.py deleted file mode 100644 index 7e0a672716945394cce4b2c69ee3d086192da87c..0000000000000000000000000000000000000000 --- a/research/delf/delf/python/training/datasets/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2020 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Module exposing datasets for training.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import -from delf.python.training.datasets import googlelandmarks -# pylint: enable=unused-import diff --git a/research/delf/delf/python/training/global_features_utils.py b/research/delf/delf/python/training/global_features_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8b35178b1530ac7572bc8f50c3a6fabfff6baca6 --- /dev/null +++ b/research/delf/delf/python/training/global_features_utils.py @@ -0,0 +1,229 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for the global model training.""" + +import os + +from absl import logging + +import numpy as np +import tensorflow as tf + +from delf.python.datasets.revisited_op import dataset + + +class AverageMeter(): + """Computes and stores the average and current value of loss.""" + + def __init__(self): + """Initialization of the AverageMeter.""" + self.reset() + + def reset(self): + """Resets all the values.""" + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + """Updates values in the AverageMeter. + + Args: + val: Float, loss value. + n: Integer, number of instances. + """ + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def compute_metrics_and_print(dataset_name, + sorted_index_ids, + ground_truth, + desired_pr_ranks=None, + log=True): + """Computes and logs ground-truth metrics for Revisited datasets. + + Args: + dataset_name: String, name of the dataset. + sorted_index_ids: Integer NumPy array of shape [#queries, #index_images]. + For each query, contains an array denoting the most relevant index images, + sorted from most to least relevant. + ground_truth: List containing ground-truth information for dataset. Each + entry is a dict corresponding to the ground-truth information for a query. + The dict has keys 'ok' and 'junk', mapping to a NumPy array of integers. + desired_pr_ranks: List of integers containing the desired precision/recall + ranks to be reported. E.g., if precision@1/recall@1 and + precision@10/recall@10 are desired, this should be set to [1, 10]. The + largest item should be <= #sorted_index_ids. Default: [1, 5, 10]. + log: Whether to log results using logging.info(). + + Returns: + mAP: (metricsE, metricsM, metricsH) Tuple of the metrics for different + levels of complexity. Each metrics is a list containing: + mean_average_precision (float), mean_precisions (NumPy array of + floats, with shape [len(desired_pr_ranks)]), mean_recalls (NumPy array + of floats, with shape [len(desired_pr_ranks)]), average_precisions + (NumPy array of floats, with shape [#queries]), precisions (NumPy array of + floats, with shape [#queries, len(desired_pr_ranks)]), recalls (NumPy + array of floats, with shape [#queries, len(desired_pr_ranks)]). + + Raises: + ValueError: If an unknown dataset name is provided as an argument. + """ + if dataset not in dataset.DATASET_NAMES: + raise ValueError('Unknown dataset: {}!'.format(dataset)) + + if desired_pr_ranks is None: + desired_pr_ranks = [1, 5, 10] + + (easy_ground_truth, medium_ground_truth, + hard_ground_truth) = dataset.ParseEasyMediumHardGroundTruth(ground_truth) + + metrics_easy = dataset.ComputeMetrics(sorted_index_ids, easy_ground_truth, + desired_pr_ranks) + metrics_medium = dataset.ComputeMetrics(sorted_index_ids, medium_ground_truth, + desired_pr_ranks) + metrics_hard = dataset.ComputeMetrics(sorted_index_ids, hard_ground_truth, + desired_pr_ranks) + + debug_and_log( + '>> {}: mAP E: {}, M: {}, H: {}'.format( + dataset_name, np.around(metrics_easy[0] * 100, decimals=2), + np.around(metrics_medium[0] * 100, decimals=2), + np.around(metrics_hard[0] * 100, decimals=2)), + log=log) + + debug_and_log( + '>> {}: mP@k{} E: {}, M: {}, H: {}'.format( + dataset_name, desired_pr_ranks, + np.around(metrics_easy[1] * 100, decimals=2), + np.around(metrics_medium[1] * 100, decimals=2), + np.around(metrics_hard[1] * 100, decimals=2)), + log=log) + + return metrics_easy, metrics_medium, metrics_hard + + +def htime(time_difference): + """Time formatting function. + + Depending on the value of `time_difference` outputs time in an appropriate + time format. + + Args: + time_difference: Float, time difference between the two events. + + Returns: + time: String representing time in an appropriate time format. + """ + time_difference = round(time_difference) + + days = time_difference // 86400 + hours = time_difference // 3600 % 24 + minutes = time_difference // 60 % 60 + seconds = time_difference % 60 + + if days > 0: + return '{:d}d {:d}h {:d}m {:d}s'.format(days, hours, minutes, seconds) + if hours > 0: + return '{:d}h {:d}m {:d}s'.format(hours, minutes, seconds) + if minutes > 0: + return '{:d}m {:d}s'.format(minutes, seconds) + return '{:d}s'.format(seconds) + + +def debug_and_log(msg, debug=True, log=True, debug_on_the_same_line=False): + """Outputs `msg` to both stdout (if in the debug mode) and the log file. + + Args: + msg: String, message to be logged. + debug: Bool, if True, will print `msg` to stdout. + log: Bool, if True, will redirect `msg` to the logfile. + debug_on_the_same_line: Bool, if True, will print `msg` to stdout without a + new line. When using this mode, logging to a logfile is disabled. + """ + if debug_on_the_same_line: + print(msg, end='') + return + if debug: + print(msg) + if log: + logging.info(msg) + + +def get_standard_keras_models(): + """Gets the standard keras model names. + + Returns: + model_names: List, names of the standard keras models. + """ + model_names = sorted( + name for name in tf.keras.applications.__dict__ + if not name.startswith('__') and + callable(tf.keras.applications.__dict__[name])) + return model_names + + +def create_model_directory(training_dataset, arch, pool, whitening, pretrained, + loss, loss_margin, optimizer, lr, weight_decay, + neg_num, query_size, pool_size, batch_size, + update_every, image_size, directory): + """Based on the model parameters, creates the model directory. + + If the model directory does not exist, the directory is created. + + Args: + training_dataset: String, training dataset name. + arch: String, model architecture. + pool: String, pooling option. + whitening: Bool, whether the model is trained with global whitening. + pretrained: Bool, whether the model is initialized with the precomputed + weights. + loss: String, training loss type. + loss_margin: Float, loss margin. + optimizer: Sting, used optimizer. + lr: Float, initial learning rate. + weight_decay: Float, weight decay. + neg_num: Integer, Number of negative images per train/val tuple. + query_size: Integer, number of queries per one training epoch. + pool_size: Integer, size of the pool for hard negative mining. + batch_size: Integer, batch size. + update_every: Integer, frequency of the model weights update. + image_size: Integer, maximum size of longer image side used for training. + directory: String, destination where trained network should be saved. + + Returns: + folder: String, path to the model folder. + """ + folder = '{}_{}_{}'.format(training_dataset, arch, pool) + if whitening: + folder += '_whiten' + if not pretrained: + folder += '_notpretrained' + folder += ('_{}_m{:.2f}_{}_lr{:.1e}_wd{:.1e}_nnum{}_qsize{}_psize{}_bsize{}' + '_uevery{}_imsize{}').format(loss, loss_margin, optimizer, lr, + weight_decay, neg_num, query_size, + pool_size, batch_size, update_every, + image_size) + + folder = os.path.join(directory, folder) + debug_and_log( + '>> Creating directory if does not exist:\n>> \'{}\''.format(folder)) + if not os.path.exists(folder): + os.makedirs(folder) + return folder diff --git a/research/delf/delf/python/training/install_delf.sh b/research/delf/delf/python/training/install_delf.sh index 4feb464aa7def067028e65281d906e006f4533a2..340c83e3419b731920480021f71f223f2176472b 100755 --- a/research/delf/delf/python/training/install_delf.sh +++ b/research/delf/delf/python/training/install_delf.sh @@ -37,6 +37,7 @@ install_tensorflow() { # Install TensorFlow 2.2. echo "Installing TensorFlow 2.2" pip3 install --upgrade tensorflow==2.2.0 + pip3 install tensorflow-addons==0.11.2 local exit_code=$? handle_exit_code ${exit_code} "Unable to install Tensorflow 2.2." echo "Installing TensorFlow 2.2 for GPU" diff --git a/research/delf/delf/python/training/losses/__init__.py b/research/delf/delf/python/training/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9064f503de15c1aa115c5e1ff2f4a5345aadf2af --- /dev/null +++ b/research/delf/delf/python/training/losses/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== \ No newline at end of file diff --git a/research/delf/delf/python/training/losses/ranking_losses.py b/research/delf/delf/python/training/losses/ranking_losses.py new file mode 100644 index 0000000000000000000000000000000000000000..fc7c28447905a156b9e326199d218241f1703f3b --- /dev/null +++ b/research/delf/delf/python/training/losses/ranking_losses.py @@ -0,0 +1,175 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ranking loss definitions.""" + +import tensorflow as tf + + +class ContrastiveLoss(tf.keras.losses.Loss): + """Contrastive Loss layer. + + Contrastive Loss layer allows to compute contrastive loss for a batch of + images. Implementation based on: https://arxiv.org/abs/1604.02426. + """ + + def __init__(self, margin=0.7, reduction=tf.keras.losses.Reduction.NONE): + """Initialization of Contrastive Loss layer. + + Args: + margin: Float contrastive loss margin. + reduction: Type of loss reduction. + """ + super(ContrastiveLoss, self).__init__(reduction) + self.margin = margin + # Parameter for numerical stability. + self.eps = 1e-6 + + def __call__(self, queries, positives, negatives): + """Invokes the Contrastive Loss instance. + + Args: + queries: [batch_size, dim] Anchor input tensor. + positives: [batch_size, dim] Positive sample input tensor. + negatives: [batch_size, num_neg, dim] Negative sample input tensor. + + Returns: + loss: Scalar tensor. + """ + return contrastive_loss( + queries, positives, negatives, margin=self.margin, eps=self.eps) + + +class TripletLoss(tf.keras.losses.Loss): + """Triplet Loss layer. + + Triplet Loss layer computes triplet loss for a batch of images. Triplet + loss tries to keep all queries closer to positives than to any negatives. + Margin is used to specify when a triplet has become too "easy" and we no + longer want to adjust the weights from it. Differently from the Contrastive + Loss, Triplet Loss uses squared distances when computing the loss. + Implementation based on: https://arxiv.org/abs/1511.07247. + """ + + def __init__(self, margin=0.1, reduction=tf.keras.losses.Reduction.NONE): + """Initialization of Triplet Loss layer. + + Args: + margin: Triplet loss margin. + reduction: Type of loss reduction. + """ + super(TripletLoss, self).__init__(reduction) + self.margin = margin + + def __call__(self, queries, positives, negatives): + """Invokes the Triplet Loss instance. + + Args: + queries: [batch_size, dim] Anchor input tensor. + positives: [batch_size, dim] Positive sample input tensor. + negatives: [batch_size, num_neg, dim] Negative sample input tensor. + + Returns: + loss: Scalar tensor. + """ + return triplet_loss(queries, positives, negatives, margin=self.margin) + + +def contrastive_loss(queries, positives, negatives, margin=0.7, eps=1e-6): + """Calculates Contrastive Loss. + + We expect the `queries`, `positives` and `negatives` to be normalized with + unit length for training stability. The contrastive loss directly + optimizes this distance by encouraging all positive distances to + approach 0, while keeping negative distances above a certain threshold. + + Args: + queries: [batch_size, dim] Anchor input tensor. + positives: [batch_size, dim] Positive sample input tensor. + negatives: [batch_size, num_neg, dim] Negative sample input tensor. + margin: Float contrastive loss loss margin. + eps: Float parameter for numerical stability. + + Returns: + loss: Scalar tensor. + """ + dim = tf.shape(queries)[1] + # Number of `queries`. + batch_size = tf.shape(queries)[0] + # Number of `positives`. + np = tf.shape(positives)[0] + # Number of `negatives`. + num_neg = tf.shape(negatives)[1] + + # Preparing negatives. + stacked_negatives = tf.reshape(negatives, [num_neg * batch_size, dim]) + + # Preparing queries for further loss calculation. + stacked_queries = tf.repeat(queries, num_neg + 1, axis=0) + positives_and_negatives = tf.concat([positives, stacked_negatives], axis=0) + + # Calculate an Euclidean norm for each pair of points. For any positive + # pair of data points this distance should be small, and for + # negative pair it should be large. + distances = tf.norm(stacked_queries - positives_and_negatives + eps, axis=1) + + positives_part = 0.5 * tf.pow(distances[:np], 2.0) + negatives_part = 0.5 * tf.pow( + tf.math.maximum(margin - distances[np:], 0), 2.0) + + # Final contrastive loss calculation. + loss = tf.reduce_sum(tf.concat([positives_part, negatives_part], 0)) + return loss + + +def triplet_loss(queries, positives, negatives, margin=0.1): + """Calculates Triplet Loss. + + Triplet loss tries to keep all queries closer to positives than to any + negatives. Differently from the Contrastive Loss, Triplet Loss uses squared + distances when computing the loss. + + Args: + queries: [batch_size, dim] Anchor input tensor. + positives: [batch_size, dim] Positive sample input tensor. + negatives: [batch_size, num_neg, dim] Negative sample input tensor. + margin: Float triplet loss loss margin. + + Returns: + loss: Scalar tensor. + """ + dim = tf.shape(queries)[1] + # Number of `queries`. + batch_size = tf.shape(queries)[0] + # Number of `negatives`. + num_neg = tf.shape(negatives)[1] + + # Preparing negatives. + stacked_negatives = tf.reshape(negatives, [num_neg * batch_size, dim]) + + # Preparing queries for further loss calculation. + stacked_queries = tf.repeat(queries, num_neg, axis=0) + + # Preparing positives for further loss calculation. + stacked_positives = tf.repeat(positives, num_neg, axis=0) + + # Computes *squared* distances. + distance_positives = tf.reduce_sum( + tf.square(stacked_queries - stacked_positives), axis=1) + distance_negatives = tf.reduce_sum( + tf.square(stacked_queries - stacked_negatives), axis=1) + # Final triplet loss calculation. + loss = tf.reduce_sum( + tf.maximum(distance_positives - distance_negatives + margin, 0.0)) + return loss diff --git a/research/delf/delf/python/training/losses/ranking_losses_test.py b/research/delf/delf/python/training/losses/ranking_losses_test.py new file mode 100644 index 0000000000000000000000000000000000000000..8e540ca3ce62d59497698ee3a1affcf5757e6362 --- /dev/null +++ b/research/delf/delf/python/training/losses/ranking_losses_test.py @@ -0,0 +1,60 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Ranking losses.""" + +import tensorflow as tf +from delf.python.training.losses import ranking_losses + + +class RankingLossesTest(tf.test.TestCase): + + def testContrastiveLoss(self): + # Testing the correct numeric value. + queries = tf.math.l2_normalize(tf.constant([[1.0, 2.0, -2.0]])) + positives = tf.math.l2_normalize(tf.constant([[-1.0, 2.0, 0.0]])) + negatives = tf.math.l2_normalize(tf.constant([[[-5.0, 0.0, 3.0]]])) + + result = ranking_losses.contrastive_loss(queries, positives, negatives, + margin=0.7, eps=1e-6) + exp_output = 0.55278635 + self.assertAllClose(exp_output, result) + + def testTripletLossZeroLoss(self): + # Testing the correct numeric value in case if query-positive distance is + # smaller than the query-negative distance. + queries = tf.math.l2_normalize(tf.constant([[1.0, 2.0, -2.0]])) + positives = tf.math.l2_normalize(tf.constant([[-1.0, 2.0, 0.0]])) + negatives = tf.math.l2_normalize(tf.constant([[[-5.0, 0.0, 3.0]]])) + + result = ranking_losses.triplet_loss(queries, positives, negatives, + margin=0.1) + exp_output = 0.0 + self.assertAllClose(exp_output, result) + + def testTripletLossNonZeroLoss(self): + # Testing the correct numeric value in case if query-positive distance is + # bigger than the query-negative distance. + queries = tf.math.l2_normalize(tf.constant([[1.0, 2.0, -2.0]])) + positives = tf.math.l2_normalize(tf.constant([[-5.0, 0.0, 3.0]])) + negatives = tf.math.l2_normalize(tf.constant([[[-1.0, 2.0, 0.0]]])) + + result = ranking_losses.triplet_loss(queries, positives, negatives, + margin=0.1) + exp_output = 2.2520838 + self.assertAllClose(exp_output, result) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/delf/delf/python/training/model/__init__.py b/research/delf/delf/python/training/model/__init__.py index dcc888bd8a65e9ba48f15e4082064e7285ac2591..3fd7e87af3539ffd9869485a022cf7df5bffe26a 100644 --- a/research/delf/delf/python/training/model/__init__.py +++ b/research/delf/delf/python/training/model/__init__.py @@ -19,6 +19,7 @@ from __future__ import print_function # pylint: disable=unused-import from delf.python.training.model import delf_model +from delf.python.training.model import delg_model from delf.python.training.model import export_model_utils from delf.python.training.model import resnet50 # pylint: enable=unused-import diff --git a/research/delf/delf/python/training/model/delf_model.py b/research/delf/delf/python/training/model/delf_model.py index f5dda85263fcb3be1de41183e6dad564e607f76f..5cdad73babbada11d3241442da8bbad9f2c5e159 100644 --- a/research/delf/delf/python/training/model/delf_model.py +++ b/research/delf/delf/python/training/model/delf_model.py @@ -80,6 +80,38 @@ class AttentionModel(tf.keras.Model): return feat, prob, score +class AutoencoderModel(tf.keras.Model): + """Instantiates the Keras Autoencoder model.""" + + def __init__(self, reduced_dimension, expand_dimension, kernel_size=1, + name='autoencoder'): + """Initialization of Autoencoder model. + + Args: + reduced_dimension: int, the output dimension of the autoencoder layer. + expand_dimension: int, the input dimension of the autoencoder layer. + kernel_size: int or tuple, height and width of the 2D convolution window. + name: str, name to identify model. + """ + super(AutoencoderModel, self).__init__(name=name) + self.conv1 = layers.Conv2D( + reduced_dimension, + kernel_size, + padding='same', + name='autoenc_conv1') + self.conv2 = layers.Conv2D( + expand_dimension, + kernel_size, + activation=tf.keras.activations.relu, + padding='same', + name='autoenc_conv2') + + def call(self, inputs): + dim_reduced_features = self.conv1(inputs) + dim_expanded_features = self.conv2(dim_reduced_features) + return dim_expanded_features, dim_reduced_features + + class Delf(tf.keras.Model): """Instantiates Keras DELF model using ResNet50 as backbone. @@ -89,12 +121,36 @@ class Delf(tf.keras.Model): from conv_4 are used to compute an attention map of the same resolution. """ - def __init__(self, block3_strides=True, name='DELF'): + def __init__(self, + block3_strides=True, + name='DELF', + pooling='avg', + gem_power=3.0, + embedding_layer=False, + embedding_layer_dim=2048, + use_dim_reduction=False, + reduced_dimension=128, + dim_expand_channels=1024): """Initialization of DELF model. Args: block3_strides: bool, whether to add strides to the output of block3. name: str, name to identify model. + pooling: str, pooling mode for global feature extraction; possible values + are 'None', 'avg', 'max', 'gem.' + gem_power: float, GeM power for GeM pooling. Only used if pooling == + 'gem'. + embedding_layer: bool, whether to create an embedding layer (FC whitening + layer). + embedding_layer_dim: int, size of the embedding layer. + use_dim_reduction: Whether to integrate dimensionality reduction layers. + If True, extra layers are added to reduce the dimensionality of the + extracted features. + reduced_dimension: int, only used if use_dim_reduction is True. The output + dimension of the autoencoder layer. + dim_expand_channels: int, only used if use_dim_reduction is True. The + number of channels of the backbone block used. Default value 1024 is the + number of channels of backbone block 'block3'. """ super(Delf, self).__init__(name=name) @@ -103,41 +159,74 @@ class Delf(tf.keras.Model): 'channels_last', name='backbone', include_top=False, - pooling='avg', + pooling=pooling, block3_strides=block3_strides, - average_pooling=False) + average_pooling=False, + gem_power=gem_power, + embedding_layer=embedding_layer, + embedding_layer_dim=embedding_layer_dim) # Attention model. self.attention = AttentionModel(name='attention') - # Define classifiers for training backbone and attention models. - def init_classifiers(self, num_classes): - self.num_classes = num_classes - self.desc_classification = layers.Dense( - num_classes, activation=None, kernel_regularizer=None, name='desc_fc') + # Autoencoder model. + self._use_dim_reduction = use_dim_reduction + if self._use_dim_reduction: + self.autoencoder = AutoencoderModel(reduced_dimension, + dim_expand_channels, + name='autoencoder') + def init_classifiers(self, num_classes, desc_classification=None): + """Define classifiers for training backbone and attention models.""" + self.num_classes = num_classes + if desc_classification is None: + self.desc_classification = layers.Dense( + num_classes, activation=None, kernel_regularizer=None, name='desc_fc') + else: + self.desc_classification = desc_classification self.attn_classification = layers.Dense( num_classes, activation=None, kernel_regularizer=None, name='att_fc') - # Weights to optimize for descriptor fine tuning. - @property - def desc_trainable_weights(self): - return (self.backbone.trainable_weights + - self.desc_classification.trainable_weights) - - # Weights to optimize for attention model training. - @property - def attn_trainable_weights(self): - return (self.attention.trainable_weights + - self.attn_classification.trainable_weights) + def global_and_local_forward_pass(self, images, training=True): + """Run a forward to calculate global descriptor and attention prelogits. - def call(self, input_image, training=True): - blocks = {} - - self.backbone.build_call( - input_image, intermediates_dict=blocks, training=training) + Args: + images: Tensor containing the dataset on which to run the forward pass. + training: Indicator of wether the forward pass is running in training mode + or not. - features = blocks['block3'] # pytype: disable=key-error - _, probs, _ = self.attention(features, training=training) + Returns: + Global descriptor prelogits, attention prelogits, attention scores, + backbone weights. + """ + backbone_blocks = {} + desc_prelogits = self.backbone.build_call( + images, intermediates_dict=backbone_blocks, training=training) + # Prevent gradients from propagating into the backbone. See DELG paper: + # https://arxiv.org/abs/2001.05027. + block3 = backbone_blocks['block3'] # pytype: disable=key-error + block3 = tf.stop_gradient(block3) + if self._use_dim_reduction: + (dim_expanded_features, dim_reduced_features) = self.autoencoder(block3) + attn_prelogits, attn_scores, _ = self.attention(dim_expanded_features, + training=training) + else: + attn_prelogits, attn_scores, _ = self.attention(block3, training=training) + dim_expanded_features = None + dim_reduced_features = None + return (desc_prelogits, attn_prelogits, attn_scores, backbone_blocks, + dim_expanded_features, dim_reduced_features) + + def build_call(self, input_image, training=True): + (global_feature, _, attn_scores, backbone_blocks, _, + dim_reduced_features) = self.global_and_local_forward_pass(input_image, + training) + if self._use_dim_reduction: + features = dim_reduced_features + else: + features = backbone_blocks['block3'] # pytype: disable=key-error + return global_feature, attn_scores, features + def call(self, input_image, training=True): + _, probs, features = self.build_call(input_image, training=training) return probs, features diff --git a/research/delf/delf/python/training/model/delf_model_test.py b/research/delf/delf/python/training/model/delf_model_test.py index c4cbcef555db3cd6e6395aee69f1479863916bd4..7d5ca44e0c1802e2442fc58f7b7c38b10f99a7f1 100644 --- a/research/delf/delf/python/training/model/delf_model_test.py +++ b/research/delf/delf/python/training/model/delf_model_test.py @@ -87,28 +87,21 @@ class DelfTest(tf.test.TestCase, parameterized.TestCase): return tf.nn.compute_average_loss( per_example_loss, global_batch_size=batch_size) - with tf.GradientTape() as desc_tape: - blocks = {} - desc_prelogits = model.backbone( - images, intermediates_dict=blocks, training=False) - desc_logits = model.desc_classification(desc_prelogits) + with tf.GradientTape() as gradient_tape: + (desc_prelogits, attn_prelogits, _, _, _, + _) = model.global_and_local_forward_pass(images) + # Calculate global loss by applying the descriptor classifier. desc_logits = model.desc_classification(desc_prelogits) desc_loss = compute_loss(labels, desc_logits) - - gradients = desc_tape.gradient(desc_loss, model.desc_trainable_weights) - clipped, _ = tf.clip_by_global_norm(gradients, clip_norm=clip_val) - optimizer.apply_gradients(zip(clipped, model.desc_trainable_weights)) - - with tf.GradientTape() as attn_tape: - block3 = blocks['block3'] - block3 = tf.stop_gradient(block3) - attn_prelogits, _, _ = model.attention(block3, training=True) + # Calculate attention loss by applying the attention block classifier. attn_logits = model.attn_classification(attn_prelogits) attn_loss = compute_loss(labels, attn_logits) - - gradients = attn_tape.gradient(attn_loss, model.attn_trainable_weights) + # Cumulate global loss and attention loss and backpropagate through the + # descriptor layer and attention layer together. + total_loss = desc_loss + attn_loss + gradients = gradient_tape.gradient(total_loss, model.trainable_weights) clipped, _ = tf.clip_by_global_norm(gradients, clip_norm=clip_val) - optimizer.apply_gradients(zip(clipped, model.attn_trainable_weights)) + optimizer.apply_gradients(zip(clipped, model.trainable_weights)) if __name__ == '__main__': diff --git a/research/delf/delf/python/training/model/delg_model.py b/research/delf/delf/python/training/model/delg_model.py new file mode 100644 index 0000000000000000000000000000000000000000..a29161b0581320920fc9d65f2e14fe7772f64aff --- /dev/null +++ b/research/delf/delf/python/training/model/delg_model.py @@ -0,0 +1,178 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""DELG model implementation based on the following paper. + + Unifying Deep Local and Global Features for Image Search + https://arxiv.org/abs/2001.05027 +""" + +import functools +import math + +from absl import logging +import tensorflow as tf + +from delf.python.training.model import delf_model + +layers = tf.keras.layers + + +class Delg(delf_model.Delf): + """Instantiates Keras DELG model using ResNet50 as backbone. + + This class implements the [DELG](https://arxiv.org/abs/2001.05027) model for + extracting local and global features from images. The same attention layer + is trained as in the DELF model. In addition, the extraction of global + features is trained using GeMPooling, a FC whitening layer also called + "embedding layer" and ArcFace loss. + """ + + def __init__(self, + block3_strides=True, + name='DELG', + gem_power=3.0, + embedding_layer_dim=2048, + scale_factor_init=45.25, # sqrt(2048) + arcface_margin=0.1, + use_dim_reduction=False, + reduced_dimension=128, + dim_expand_channels=1024): + """Initialization of DELG model. + + Args: + block3_strides: bool, whether to add strides to the output of block3. + name: str, name to identify model. + gem_power: float, GeM power parameter. + embedding_layer_dim : int, dimension of the embedding layer. + scale_factor_init: float. + arcface_margin: float, ArcFace margin. + use_dim_reduction: Whether to integrate dimensionality reduction layers. + If True, extra layers are added to reduce the dimensionality of the + extracted features. + reduced_dimension: Only used if use_dim_reduction is True, the output + dimension of the dim_reduction layer. + dim_expand_channels: Only used if use_dim_reduction is True, the + number of channels of the backbone block used. Default value 1024 is the + number of channels of backbone block 'block3'. + """ + logging.info('Creating Delg model, gem_power %d, embedding_layer_dim %d', + gem_power, embedding_layer_dim) + super(Delg, self).__init__(block3_strides=block3_strides, + name=name, + pooling='gem', + gem_power=gem_power, + embedding_layer=True, + embedding_layer_dim=embedding_layer_dim, + use_dim_reduction=use_dim_reduction, + reduced_dimension=reduced_dimension, + dim_expand_channels=dim_expand_channels) + self._embedding_layer_dim = embedding_layer_dim + self._scale_factor_init = scale_factor_init + self._arcface_margin = arcface_margin + + def init_classifiers(self, num_classes): + """Define classifiers for training backbone and attention models.""" + logging.info('Initializing Delg backbone and attention models classifiers') + backbone_classifier_func = self._create_backbone_classifier(num_classes) + super(Delg, self).init_classifiers( + num_classes, + desc_classification=backbone_classifier_func) + + def _create_backbone_classifier(self, num_classes): + """Define the classifier for training the backbone model.""" + logging.info('Creating cosine classifier') + self.cosine_weights = tf.Variable( + initial_value=tf.initializers.GlorotUniform()( + shape=[self._embedding_layer_dim, num_classes]), + name='cosine_weights', + trainable=True) + self.scale_factor = tf.Variable(self._scale_factor_init, + name='scale_factor', + trainable=False) + classifier_func = functools.partial(cosine_classifier_logits, + num_classes=num_classes, + cosine_weights=self.cosine_weights, + scale_factor=self.scale_factor, + arcface_margin=self._arcface_margin) + classifier_func.trainable_weights = [self.cosine_weights] + return classifier_func + + +def cosine_classifier_logits(prelogits, + labels, + num_classes, + cosine_weights, + scale_factor, + arcface_margin, + training=True): + """Compute cosine classifier logits using ArFace margin. + + Args: + prelogits: float tensor of shape [batch_size, embedding_layer_dim]. + labels: int tensor of shape [batch_size]. + num_classes: int, number of classes. + cosine_weights: float tensor of shape [embedding_layer_dim, num_classes]. + scale_factor: float. + arcface_margin: float. Only used if greater than zero, and training is True. + training: bool, True if training, False if eval. + + Returns: + logits: Float tensor [batch_size, num_classes]. + """ + # L2-normalize prelogits, then obtain cosine similarity. + normalized_prelogits = tf.math.l2_normalize(prelogits, axis=1) + normalized_weights = tf.math.l2_normalize(cosine_weights, axis=0) + cosine_sim = tf.matmul(normalized_prelogits, normalized_weights) + + # Optionally use ArcFace margin. + if training and arcface_margin > 0.0: + # Reshape labels tensor from [batch_size] to [batch_size, num_classes]. + one_hot_labels = tf.one_hot(labels, num_classes) + cosine_sim = apply_arcface_margin(cosine_sim, + one_hot_labels, + arcface_margin) + + # Apply the scale factor to logits and return. + logits = scale_factor * cosine_sim + return logits + + +def apply_arcface_margin(cosine_sim, one_hot_labels, arcface_margin): + """Applies ArcFace margin to cosine similarity inputs. + + For a reference, see https://arxiv.org/pdf/1801.07698.pdf. ArFace margin is + applied to angles from correct classes (as per the ArcFace paper), and only + if they are <= (pi - margin). Otherwise, applying the margin may actually + improve their cosine similarity. + + Args: + cosine_sim: float tensor with shape [batch_size, num_classes]. + one_hot_labels: int tensor with shape [batch_size, num_classes]. + arcface_margin: float. + + Returns: + cosine_sim_with_margin: Float tensor with shape [batch_size, num_classes]. + """ + theta = tf.acos(cosine_sim, name='acos') + selected_labels = tf.where(tf.greater(theta, math.pi - arcface_margin), + tf.zeros_like(one_hot_labels), + one_hot_labels, + name='selected_labels') + final_theta = tf.where(tf.cast(selected_labels, dtype=tf.bool), + theta + arcface_margin, + theta, + name='final_theta') + return tf.cos(final_theta, name='cosine_sim_with_margin') diff --git a/research/delf/delf/python/training/model/export_global_model.py b/research/delf/delf/python/training/model/export_global_model.py index 820cb0e4b93ae75defd21e69ef5edd2e3d9f54a6..e5f9128a0bf72a48ba8bbc4189dc284ec31eba3b 100644 --- a/research/delf/delf/python/training/model/export_global_model.py +++ b/research/delf/delf/python/training/model/export_global_model.py @@ -15,7 +15,7 @@ # ============================================================================== """Export global feature tensorflow inference model. -This model includes image pyramids for multi-scale processing. +The exported model may leverage image pyramids for multi-scale processing. """ from __future__ import absolute_import @@ -29,6 +29,7 @@ from absl import flags import tensorflow as tf from delf.python.training.model import delf_model +from delf.python.training.model import delg_model from delf.python.training.model import export_model_utils FLAGS = flags.FLAGS @@ -50,6 +51,16 @@ flags.DEFINE_enum( "'global_descriptor'.") flags.DEFINE_boolean('normalize_global_descriptor', False, 'If True, L2-normalizes global descriptor.') +flags.DEFINE_boolean('delg_global_features', False, + 'Whether the model uses a DELG-like global feature head.') +flags.DEFINE_float( + 'delg_gem_power', 3.0, + 'Power for Generalized Mean pooling. Used only if --delg_global_features' + 'is present.') +flags.DEFINE_integer( + 'delg_embedding_layer_dim', 2048, + 'Size of the FC whitening layer (embedding layer). Used only if' + '--delg_global_features is present.') class _ExtractModule(tf.Module): @@ -58,7 +69,10 @@ class _ExtractModule(tf.Module): def __init__(self, multi_scale_pool_type='None', normalize_global_descriptor=False, - input_scales_tensor=None): + input_scales_tensor=None, + delg_global_features=False, + delg_gem_power=3.0, + delg_embedding_layer_dim=2048): """Initialization of global feature model. Args: @@ -69,6 +83,12 @@ class _ExtractModule(tf.Module): the exported model. If not None, the specified 1D tensor of floats will be hard-coded as the desired input scales, in conjunction with ExtractFeaturesFixedScales. + delg_global_features: Whether the model uses a DELG-like global feature + head. + delg_gem_power: Power for Generalized Mean pooling in the DELG model. Used + only if 'delg_global_features' is True. + delg_embedding_layer_dim: Size of the FC whitening layer (embedding + layer). Used only if 'delg_global_features' is True. """ self._multi_scale_pool_type = multi_scale_pool_type self._normalize_global_descriptor = normalize_global_descriptor @@ -78,7 +98,14 @@ class _ExtractModule(tf.Module): self._input_scales_tensor = input_scales_tensor # Setup the DELF model for extraction. - self._model = delf_model.Delf(block3_strides=False, name='DELF') + if delg_global_features: + self._model = delg_model.Delg( + block3_strides=False, + name='DELG', + gem_power=delg_gem_power, + embedding_layer_dim=delg_embedding_layer_dim) + else: + self._model = delf_model.Delf(block3_strides=False, name='DELF') def LoadWeights(self, checkpoint_path): self._model.load_weights(checkpoint_path) @@ -134,7 +161,8 @@ def main(argv): name='input_scales') module = _ExtractModule(FLAGS.multi_scale_pool_type, FLAGS.normalize_global_descriptor, - input_scales_tensor) + input_scales_tensor, FLAGS.delg_global_features, + FLAGS.delg_gem_power, FLAGS.delg_embedding_layer_dim) # Load the weights. checkpoint_path = FLAGS.ckpt_path diff --git a/research/delf/delf/python/training/model/export_local_and_global_model.py b/research/delf/delf/python/training/model/export_local_and_global_model.py new file mode 100644 index 0000000000000000000000000000000000000000..a6cee584f87ad0b117b2edbaa095f3cbe9318bb5 --- /dev/null +++ b/research/delf/delf/python/training/model/export_local_and_global_model.py @@ -0,0 +1,170 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Export DELG tensorflow inference model. + +The exported model can be used to jointly extract local and global features. It +may use an image pyramid for multi-scale processing, and will include receptive +field calculation and keypoint selection for the local feature head. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from absl import app +from absl import flags +import tensorflow as tf + +from delf.python.training.model import delf_model +from delf.python.training.model import delg_model +from delf.python.training.model import export_model_utils + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + 'ckpt_path', '/tmp/delf-logdir/delf-weights', 'Path to saved checkpoint.') +flags.DEFINE_string('export_path', None, 'Path where model will be exported.') +flags.DEFINE_boolean( + 'delg_global_features', True, + 'Whether the model uses a DELG-like global feature head.') +flags.DEFINE_float( + 'delg_gem_power', 3.0, + 'Power for Generalized Mean pooling. Used only if --delg_global_features' + 'is present.') +flags.DEFINE_integer( + 'delg_embedding_layer_dim', 2048, + 'Size of the FC whitening layer (embedding layer). Used only if' + '--delg_global_features is present.') +flags.DEFINE_boolean( + 'block3_strides', True, + 'Whether to apply strides after block3, used for local feature head.') +flags.DEFINE_float( + 'iou', 1.0, 'IOU for non-max suppression used in local feature head.') +flags.DEFINE_boolean( + 'use_autoencoder', True, + 'Whether the exported model should use an autoencoder.') +flags.DEFINE_float( + 'autoencoder_dimensions', 128, + 'Number of dimensions of the autoencoder. Used only if' + 'use_autoencoder=True.') +flags.DEFINE_float( + 'local_feature_map_channels', 1024, + 'Number of channels at backbone layer used for local feature extraction. ' + 'Default value 1024 is the number of channels of block3. Used only if' + 'use_autoencoder=True.') + + +class _ExtractModule(tf.Module): + """Helper module to build and save DELG model.""" + + def __init__(self, + delg_global_features=True, + delg_gem_power=3.0, + delg_embedding_layer_dim=2048, + block3_strides=True, + iou=1.0): + """Initialization of DELG model. + + Args: + delg_global_features: Whether the model uses a DELG-like global feature + head. + delg_gem_power: Power for Generalized Mean pooling in the DELG model. Used + only if 'delg_global_features' is True. + delg_embedding_layer_dim: Size of the FC whitening layer (embedding + layer). Used only if 'delg_global_features' is True. + block3_strides: bool, whether to add strides to the output of block3. + iou: IOU for non-max suppression. + """ + self._stride_factor = 2.0 if block3_strides else 1.0 + self._iou = iou + + # Setup the DELG model for extraction. + if delg_global_features: + self._model = delg_model.Delg( + block3_strides=block3_strides, + name='DELG', + gem_power=delg_gem_power, + embedding_layer_dim=delg_embedding_layer_dim, + use_dim_reduction=FLAGS.use_autoencoder, + reduced_dimension=FLAGS.autoencoder_dimensions, + dim_expand_channels=FLAGS.local_feature_map_channels) + else: + self._model = delf_model.Delf( + block3_strides=block3_strides, + name='DELF', + use_dim_reduction=FLAGS.use_autoencoder, + reduced_dimension=FLAGS.autoencoder_dimensions, + dim_expand_channels=FLAGS.local_feature_map_channels) + + def LoadWeights(self, checkpoint_path): + self._model.load_weights(checkpoint_path) + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image'), + tf.TensorSpec(shape=[None], dtype=tf.float32, name='input_scales'), + tf.TensorSpec(shape=(), dtype=tf.int32, name='input_max_feature_num'), + tf.TensorSpec(shape=(), dtype=tf.float32, name='input_abs_thres'), + tf.TensorSpec( + shape=[None], dtype=tf.int32, name='input_global_scales_ind') + ]) + def ExtractFeatures(self, input_image, input_scales, input_max_feature_num, + input_abs_thres, input_global_scales_ind): + extracted_features = export_model_utils.ExtractLocalAndGlobalFeatures( + input_image, input_scales, input_max_feature_num, input_abs_thres, + input_global_scales_ind, self._iou, + lambda x: self._model.build_call(x, training=False), + self._stride_factor) + + named_output_tensors = {} + named_output_tensors['boxes'] = tf.identity( + extracted_features[0], name='boxes') + named_output_tensors['features'] = tf.identity( + extracted_features[1], name='features') + named_output_tensors['scales'] = tf.identity( + extracted_features[2], name='scales') + named_output_tensors['scores'] = tf.identity( + extracted_features[3], name='scores') + named_output_tensors['global_descriptors'] = tf.identity( + extracted_features[4], name='global_descriptors') + return named_output_tensors + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + export_path = FLAGS.export_path + if os.path.exists(export_path): + raise ValueError(f'Export_path {export_path} already exists. Please ' + 'specify a different path or delete the existing one.') + + module = _ExtractModule(FLAGS.delg_global_features, FLAGS.delg_gem_power, + FLAGS.delg_embedding_layer_dim, FLAGS.block3_strides, + FLAGS.iou) + + # Load the weights. + checkpoint_path = FLAGS.ckpt_path + module.LoadWeights(checkpoint_path) + print('Checkpoint loaded from ', checkpoint_path) + + # Save the module + tf.saved_model.save(module, export_path) + + +if __name__ == '__main__': + app.run(main) diff --git a/research/delf/delf/python/training/model/export_local_model.py b/research/delf/delf/python/training/model/export_local_model.py new file mode 100644 index 0000000000000000000000000000000000000000..767d363ef7e7868b2caad9b61431dfa99e3beacd --- /dev/null +++ b/research/delf/delf/python/training/model/export_local_model.py @@ -0,0 +1,128 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Export DELF tensorflow inference model. + +The exported model may use an image pyramid for multi-scale processing, with +local feature extraction including receptive field calculation and keypoint +selection. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from absl import app +from absl import flags +import tensorflow as tf + +from delf.python.training.model import delf_model +from delf.python.training.model import export_model_utils + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + 'ckpt_path', '/tmp/delf-logdir/delf-weights', 'Path to saved checkpoint.') +flags.DEFINE_string('export_path', None, 'Path where model will be exported.') +flags.DEFINE_boolean( + 'block3_strides', True, 'Whether to apply strides after block3.') +flags.DEFINE_float('iou', 1.0, 'IOU for non-max suppression.') +flags.DEFINE_boolean( + 'use_autoencoder', True, + 'Whether the exported model should use an autoencoder.') +flags.DEFINE_float( + 'autoencoder_dimensions', 128, + 'Number of dimensions of the autoencoder. Used only if' + 'use_autoencoder=True.') +flags.DEFINE_float( + 'local_feature_map_channels', 1024, + 'Number of channels at backbone layer used for local feature extraction. ' + 'Default value 1024 is the number of channels of block3. Used only if' + 'use_autoencoder=True.') + + +class _ExtractModule(tf.Module): + """Helper module to build and save DELF model.""" + + def __init__(self, block3_strides, iou): + """Initialization of DELF model. + + Args: + block3_strides: bool, whether to add strides to the output of block3. + iou: IOU for non-max suppression. + """ + self._stride_factor = 2.0 if block3_strides else 1.0 + self._iou = iou + # Setup the DELF model for extraction. + self._model = delf_model.Delf( + block3_strides=block3_strides, + name='DELF', + use_dim_reduction=FLAGS.use_autoencoder, + reduced_dimension=FLAGS.autoencoder_dimensions, + dim_expand_channels=FLAGS.local_feature_map_channels) + + def LoadWeights(self, checkpoint_path): + self._model.load_weights(checkpoint_path) + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image'), + tf.TensorSpec(shape=[None], dtype=tf.float32, name='input_scales'), + tf.TensorSpec(shape=(), dtype=tf.int32, name='input_max_feature_num'), + tf.TensorSpec(shape=(), dtype=tf.float32, name='input_abs_thres') + ]) + def ExtractFeatures(self, input_image, input_scales, input_max_feature_num, + input_abs_thres): + + extracted_features = export_model_utils.ExtractLocalFeatures( + input_image, input_scales, input_max_feature_num, input_abs_thres, + self._iou, lambda x: self._model(x, training=False), + self._stride_factor) + + named_output_tensors = {} + named_output_tensors['boxes'] = tf.identity( + extracted_features[0], name='boxes') + named_output_tensors['features'] = tf.identity( + extracted_features[1], name='features') + named_output_tensors['scales'] = tf.identity( + extracted_features[2], name='scales') + named_output_tensors['scores'] = tf.identity( + extracted_features[3], name='scores') + return named_output_tensors + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + export_path = FLAGS.export_path + if os.path.exists(export_path): + raise ValueError(f'Export_path {export_path} already exists. Please ' + 'specify a different path or delete the existing one.') + + module = _ExtractModule(FLAGS.block3_strides, FLAGS.iou) + + # Load the weights. + checkpoint_path = FLAGS.ckpt_path + module.LoadWeights(checkpoint_path) + print('Checkpoint loaded from ', checkpoint_path) + + # Save the module + tf.saved_model.save(module, export_path) + + +if __name__ == '__main__': + app.run(main) diff --git a/research/delf/delf/python/training/model/export_model.py b/research/delf/delf/python/training/model/export_model.py deleted file mode 100644 index 10fb8905e1e7d6d575c9b0f6480276ca9719662c..0000000000000000000000000000000000000000 --- a/research/delf/delf/python/training/model/export_model.py +++ /dev/null @@ -1,111 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Export DELF tensorflow inference model. - -This model includes feature extraction, receptive field calculation and -key-point selection and outputs the selected feature descriptors. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from absl import app -from absl import flags -import tensorflow as tf - -from delf.python.training.model import delf_model -from delf.python.training.model import export_model_utils - -FLAGS = flags.FLAGS - -flags.DEFINE_string('ckpt_path', '/tmp/delf-logdir/delf-weights', - 'Path to saved checkpoint.') -flags.DEFINE_string('export_path', None, 'Path where model will be exported.') -flags.DEFINE_boolean('block3_strides', False, - 'Whether to apply strides after block3.') -flags.DEFINE_float('iou', 1.0, 'IOU for non-max suppression.') - - -class _ExtractModule(tf.Module): - """Helper module to build and save DELF model.""" - - def __init__(self, block3_strides, iou): - """Initialization of DELF model. - - Args: - block3_strides: bool, whether to add strides to the output of block3. - iou: IOU for non-max suppression. - """ - self._stride_factor = 2.0 if block3_strides else 1.0 - self._iou = iou - # Setup the DELF model for extraction. - self._model = delf_model.Delf( - block3_strides=block3_strides, name='DELF') - - def LoadWeights(self, checkpoint_path): - self._model.load_weights(checkpoint_path) - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image'), - tf.TensorSpec(shape=[None], dtype=tf.float32, name='input_scales'), - tf.TensorSpec(shape=(), dtype=tf.int32, name='input_max_feature_num'), - tf.TensorSpec(shape=(), dtype=tf.float32, name='input_abs_thres') - ]) - def ExtractFeatures(self, input_image, input_scales, input_max_feature_num, - input_abs_thres): - - extracted_features = export_model_utils.ExtractLocalFeatures( - input_image, input_scales, input_max_feature_num, input_abs_thres, - self._iou, lambda x: self._model(x, training=False), - self._stride_factor) - - named_output_tensors = {} - named_output_tensors['boxes'] = tf.identity( - extracted_features[0], name='boxes') - named_output_tensors['features'] = tf.identity( - extracted_features[1], name='features') - named_output_tensors['scales'] = tf.identity( - extracted_features[2], name='scales') - named_output_tensors['scores'] = tf.identity( - extracted_features[3], name='scores') - return named_output_tensors - - -def main(argv): - if len(argv) > 1: - raise app.UsageError('Too many command-line arguments.') - - export_path = FLAGS.export_path - if os.path.exists(export_path): - raise ValueError(f'Export_path {export_path} already exists. Please ' - 'specify a different path or delete the existing one.') - - module = _ExtractModule(FLAGS.block3_strides, FLAGS.iou) - - # Load the weights. - checkpoint_path = FLAGS.ckpt_path - module.LoadWeights(checkpoint_path) - print('Checkpoint loaded from ', checkpoint_path) - - # Save the module - tf.saved_model.save(module, export_path) - - -if __name__ == '__main__': - app.run(main) diff --git a/research/delf/delf/python/training/model/export_model_utils.py b/research/delf/delf/python/training/model/export_model_utils.py index 64d6672569e7d4973c921de8cc8c8c63051589b3..92a616e871544dbb8ec3ac0e5a1d6882b3ab2a7b 100644 --- a/research/delf/delf/python/training/model/export_model_utils.py +++ b/research/delf/delf/python/training/model/export_model_utils.py @@ -22,11 +22,14 @@ from __future__ import print_function import tensorflow as tf from delf import feature_extractor -from delf.python.training.datasets import googlelandmarks as gld +from delf.python.datasets.google_landmarks_dataset import googlelandmarks as gld from object_detection.core import box_list from object_detection.core import box_list_ops +# TODO(andrearaujo): Rewrite this function to be more similar to +# "ExtractLocalAndGlobalFeatures" below, leveraging autograph to avoid the need +# for tf.while loop. def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou, attention_model_fn, stride_factor): """Extract local features for input image. @@ -35,9 +38,9 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou, image: image tensor of type tf.uint8 with shape [h, w, channels]. image_scales: 1D float tensor which contains float scales used for image pyramid construction. - max_feature_num: int tensor denotes the maximum selected feature points. - abs_thres: float tensor denotes the score threshold for feature selection. - iou: float scalar denotes the iou threshold for NMS. + max_feature_num: int tensor denoting the maximum selected feature points. + abs_thres: float tensor denoting the score threshold for feature selection. + iou: float scalar denoting the iou threshold for NMS. attention_model_fn: model function. Follows the signature: * Args: * `images`: Image tensor which is re-scaled. @@ -55,7 +58,7 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou, scales such that larger image scales correspond to larger image regions, which is compatible with keypoints detected with other techniques, for example Congas. - scores: [N, 1] float tensor denotes the attention score. + scores: [N, 1] float tensor denoting the attention score. """ original_image_shape_float = tf.gather( @@ -66,6 +69,8 @@ def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou, image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims') # Hard code the feature depth and receptive field parameters for now. + # We need to revisit this once we change the architecture and selected + # convolutional blocks to use as local features. rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0] feature_depth = 1024 @@ -189,7 +194,7 @@ def ExtractGlobalFeatures(image, `image_scales`, those with corresponding indices from this tensor. model_fn: model function. Follows the signature: * Args: - * `images`: Image tensor which is re-scaled. + * `images`: Batched image tensor. * Returns: * `global_descriptors`: Global descriptors for input images. multi_scale_pool_type: If set, the global descriptor of each scale is pooled @@ -266,3 +271,138 @@ def ExtractGlobalFeatures(image, output_global, axis=normalization_axis, name='l2_normalization') return output_global + + +@tf.function +def ExtractLocalAndGlobalFeatures(image, image_scales, max_feature_num, + abs_thres, global_scales_ind, iou, model_fn, + stride_factor): + """Extract local+global features for input image. + + Args: + image: image tensor of type tf.uint8 with shape [h, w, channels]. + image_scales: 1D float tensor which contains float scales used for image + pyramid construction. + max_feature_num: int tensor denoting the maximum selected feature points. + abs_thres: float tensor denoting the score threshold for feature selection. + global_scales_ind: Global feature extraction happens only for a subset of + `image_scales`, those with corresponding indices from this tensor. + iou: float scalar denoting the iou threshold for NMS. + model_fn: model function. Follows the signature: + * Args: + * `images`: Batched image tensor. + * Returns: + * `global_descriptors`: Global descriptors for input images. + * `attention_prob`: Attention map after the non-linearity. + * `feature_map`: Feature map after ResNet convolution. + stride_factor: integer accounting for striding after block3. + + Returns: + boxes: [N, 4] float tensor which denotes the selected receptive boxes. N is + the number of final feature points which pass through keypoint selection + and NMS steps. + local_descriptors: [N, depth] float tensor. + feature_scales: [N] float tensor. It is the inverse of the input image + scales such that larger image scales correspond to larger image regions, + which is compatible with keypoints detected with other techniques, for + example Congas. + scores: [N, 1] float tensor denoting the attention score. + global_descriptors: [S, D] float tensor, with the global descriptors for + each scale; S is the number of scales, and D the global descriptor + dimensionality. + """ + original_image_shape_float = tf.gather( + tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1]) + image_tensor = gld.NormalizeImages( + image, pixel_value_offset=128.0, pixel_value_scale=128.0) + image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims') + + # Hard code the receptive field parameters for now. + # We need to revisit this once we change the architecture and selected + # convolutional blocks to use as local features. + rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0] + + def _ResizeAndExtract(scale_index): + """Helper function to resize image then extract features. + + Args: + scale_index: A valid index in image_scales. + + Returns: + global_descriptor: [1,D] tensor denoting the extracted global descriptor. + boxes: Box tensor with the shape of [K, 4]. + local_descriptors: Local descriptor tensor with the shape of [K, depth]. + scales: Scale tensor with the shape of [K]. + scores: Score tensor with the shape of [K]. + """ + scale = tf.gather(image_scales, scale_index) + new_image_size = tf.dtypes.cast( + tf.round(original_image_shape_float * scale), tf.int32) + resized_image = tf.image.resize(image_tensor, new_image_size) + global_descriptor, attention_prob, feature_map = model_fn(resized_image) + + attention_prob = tf.squeeze(attention_prob, axis=[0]) + feature_map = tf.squeeze(feature_map, axis=[0]) + + # Compute RF boxes and re-project them to the original image space. + rf_boxes = feature_extractor.CalculateReceptiveBoxes( + tf.shape(feature_map)[0], + tf.shape(feature_map)[1], rf, stride, padding) + rf_boxes = tf.divide(rf_boxes, scale) + + attention_prob = tf.reshape(attention_prob, [-1]) + feature_map = tf.reshape(feature_map, [-1, tf.shape(feature_map)[2]]) + + # Use attention score to select local features. + indices = tf.reshape(tf.where(attention_prob >= abs_thres), [-1]) + boxes = tf.gather(rf_boxes, indices) + local_descriptors = tf.gather(feature_map, indices) + scores = tf.gather(attention_prob, indices) + scales = tf.ones_like(scores, tf.float32) / scale + + return global_descriptor, boxes, local_descriptors, scales, scores + + # TODO(andrearaujo): Currently, a global feature is extracted even for scales + # which are not using it. The obtained result is correct, however feature + # extraction is slower than expected. We should try to fix this in the future. + + # Run first scale. + (output_global_descriptors, output_boxes, output_local_descriptors, + output_scales, output_scores) = _ResizeAndExtract(0) + if not tf.reduce_any(tf.equal(global_scales_ind, 0)): + # If global descriptor is not using the first scale, clear it out. + output_global_descriptors = tf.zeros( + [0, tf.shape(output_global_descriptors)[1]]) + + # Loop over subsequent scales. + num_scales = tf.shape(image_scales)[0] + for scale_index in tf.range(1, num_scales): + # Allow an undefined number of global feature scales to be extracted. + tf.autograph.experimental.set_loop_options( + shape_invariants=[(output_global_descriptors, + tf.TensorShape([None, None]))]) + + (global_descriptor, boxes, local_descriptors, scales, + scores) = _ResizeAndExtract(scale_index) + output_boxes = tf.concat([output_boxes, boxes], 0) + output_local_descriptors = tf.concat( + [output_local_descriptors, local_descriptors], 0) + output_scales = tf.concat([output_scales, scales], 0) + output_scores = tf.concat([output_scores, scores], 0) + if tf.reduce_any(tf.equal(global_scales_ind, scale_index)): + output_global_descriptors = tf.concat( + [output_global_descriptors, global_descriptor], 0) + + feature_boxes = box_list.BoxList(output_boxes) + feature_boxes.add_field('local_descriptors', output_local_descriptors) + feature_boxes.add_field('scales', output_scales) + feature_boxes.add_field('scores', output_scores) + + nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes()) + final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou, + nms_max_boxes) + + return (final_boxes.get(), final_boxes.get_field('local_descriptors'), + final_boxes.get_field('scales'), + tf.expand_dims(final_boxes.get_field('scores'), + 1), output_global_descriptors) diff --git a/research/delf/delf/python/training/model/resnet50.py b/research/delf/delf/python/training/model/resnet50.py index 6daaab67419d99ebcefd7b25f89c284bf00832af..3718ac5b05f85172e18086470855081fa4792751 100644 --- a/research/delf/delf/python/training/model/resnet50.py +++ b/research/delf/delf/python/training/model/resnet50.py @@ -29,6 +29,7 @@ from absl import logging import h5py import tensorflow as tf +from delf.python.pooling_layers import pooling as pooling_layers layers = tf.keras.layers @@ -183,13 +184,16 @@ class ResNet50(tf.keras.Model): output of the last convolutional layer. 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. 'max' means that global - max pooling will be applied. + max pooling will be applied. 'gem' means GeM pooling will be applied. block3_strides: whether to add a stride of 2 to block3 to make it compatible with tf.slim ResNet implementation. average_pooling: whether to do average pooling of block4 features before global pooling. classes: optional number of classes to classify images into, only to be specified if `include_top` is True. + gem_power: GeM power for GeM pooling. Only used if pooling == 'gem'. + embedding_layer: whether to create an embedding layer (FC whitening layer). + embedding_layer_dim: size of the embedding layer. Raises: ValueError: in case of invalid argument for data_format. @@ -202,7 +206,10 @@ class ResNet50(tf.keras.Model): pooling=None, block3_strides=False, average_pooling=True, - classes=1000): + classes=1000, + gem_power=3.0, + embedding_layer=False, + embedding_layer_dim=2048): super(ResNet50, self).__init__(name=name) valid_channel_values = ('channels_first', 'channels_last') @@ -286,8 +293,19 @@ class ResNet50(tf.keras.Model): elif pooling == 'max': self.global_pooling = functools.partial( tf.reduce_max, axis=reduction_indices, keepdims=False) + elif pooling == 'gem': + logging.info('Adding GeMPooling layer with power %f', gem_power) + self.global_pooling = functools.partial( + pooling_layers.gem, axis=reduction_indices, power=gem_power) else: self.global_pooling = None + if embedding_layer: + logging.info('Adding embedding layer with dimension %d', + embedding_layer_dim) + self.embedding_layer = layers.Dense( + embedding_layer_dim, name='embedding_layer') + else: + self.embedding_layer = None def build_call(self, inputs, training=True, intermediates_dict=None): """Building the ResNet50 model. @@ -358,7 +376,10 @@ class ResNet50(tf.keras.Model): if self.include_top: return self.fc1000(self.flatten(x)) elif self.global_pooling: - return self.global_pooling(x) + x = self.global_pooling(x) + if self.embedding_layer: + x = self.embedding_layer(x) + return x else: return x @@ -384,6 +405,7 @@ class ResNet50(tf.keras.Model): Args: filepath: String, path to the .h5 file + Raises: ValueError: if the file referenced by `filepath` does not exist. """ @@ -417,7 +439,7 @@ class ResNet50(tf.keras.Model): g = f[inlayer.name] weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] weight_values = [g[weight_name] for weight_name in weight_names] - print('Setting the weights for layer %s' % (inlayer.name)) + logging.info('Setting the weights for layer %s', inlayer.name) inlayer.set_weights(weight_values) finally: # Clean up the temporary file. @@ -435,5 +457,4 @@ class ResNet50(tf.keras.Model): weights = inlayer.get_weights() logging.info(weights) else: - logging.info('Layer %s does not have inner layers.', - layer.name) + logging.info('Layer %s does not have inner layers.', layer.name) diff --git a/research/delf/delf/python/training/train.py b/research/delf/delf/python/training/train.py index 12b7a5f9cc3282e59c738f74c7fbd4798021c429..d21decdd49bed9861d10dd6e5f6a1a6432022a18 100644 --- a/research/delf/delf/python/training/train.py +++ b/research/delf/delf/python/training/train.py @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Training script for DELF on Google Landmarks Dataset. +"""Training script for DELF/G on Google Landmarks Dataset. -Script to train DELF using classification loss on Google Landmarks Dataset -using MirroredStrategy to so it can run on multiple GPUs. +Uses classification loss, with MirroredStrategy, to support running on multiple +GPUs. """ from __future__ import absolute_import @@ -24,6 +24,7 @@ from __future__ import division from __future__ import print_function import os +import time from absl import app from absl import flags @@ -32,8 +33,9 @@ import tensorflow as tf import tensorflow_probability as tfp # Placeholder for internal import. Do not remove this line. -from delf.python.training.datasets import googlelandmarks as gld +from delf.python.datasets.google_landmarks_dataset import googlelandmarks as gld from delf.python.training.model import delf_model +from delf.python.training.model import delg_model FLAGS = flags.FLAGS @@ -45,8 +47,8 @@ flags.DEFINE_string('validation_file_pattern', '/tmp/data/validation*', 'File pattern of validation dataset files.') flags.DEFINE_enum( 'dataset_version', 'gld_v1', ['gld_v1', 'gld_v2', 'gld_v2_clean'], - 'Google Landmarks dataset version, used to determine the' - 'number of classes.') + 'Google Landmarks dataset version, used to determine the number of ' + 'classes.') flags.DEFINE_integer('seed', 0, 'Seed to training dataset.') flags.DEFINE_float('initial_lr', 0.01, 'Initial learning rate.') flags.DEFINE_integer('batch_size', 32, 'Global batch size.') @@ -57,6 +59,41 @@ flags.DEFINE_boolean('use_augmentation', True, flags.DEFINE_string( 'imagenet_checkpoint', None, 'ImageNet checkpoint for ResNet backbone. If None, no checkpoint is used.') +flags.DEFINE_float( + 'attention_loss_weight', 1.0, + 'Weight to apply to the attention loss when calculating the ' + 'total loss of the model.') +flags.DEFINE_boolean('delg_global_features', False, + 'Whether to train a DELG model.') +flags.DEFINE_float( + 'delg_gem_power', 3.0, 'Power for Generalized Mean pooling. Used only if ' + 'delg_global_features=True.') +flags.DEFINE_integer( + 'delg_embedding_layer_dim', 2048, + 'Size of the FC whitening layer (embedding layer). Used only if' + 'delg_global_features:True.') +flags.DEFINE_float( + 'delg_scale_factor_init', 45.25, + 'Initial value of the scaling factor of the cosine logits. The default ' + 'value is sqrt(2048). Used only if delg_global_features=True.') +flags.DEFINE_float('delg_arcface_margin', 0.1, + 'ArcFace margin. Used only if delg_global_features=True.') +flags.DEFINE_integer('image_size', 321, 'Size of each image side to use.') +flags.DEFINE_boolean('use_autoencoder', True, + 'Whether to train an autoencoder.') +flags.DEFINE_float( + 'reconstruction_loss_weight', 10.0, + 'Weight to apply to the reconstruction loss from the autoencoder when' + 'calculating total loss of the model. Used only if use_autoencoder=True.') +flags.DEFINE_float( + 'autoencoder_dimensions', 128, + 'Number of dimensions of the autoencoder. Used only if' + 'use_autoencoder=True.') +flags.DEFINE_float( + 'local_feature_map_channels', 1024, + 'Number of channels at backbone layer used for local feature extraction. ' + 'Default value 1024 is the number of channels of block3. Used only if' + 'use_autoencoder=True.') def _record_accuracy(metric, logits, labels): @@ -90,7 +127,24 @@ def _attention_summaries(scores, global_step): def create_model(num_classes): """Define DELF model, and initialize classifiers.""" - model = delf_model.Delf(block3_strides=FLAGS.block3_strides, name='DELF') + if FLAGS.delg_global_features: + model = delg_model.Delg( + block3_strides=FLAGS.block3_strides, + name='DELG', + gem_power=FLAGS.delg_gem_power, + embedding_layer_dim=FLAGS.delg_embedding_layer_dim, + scale_factor_init=FLAGS.delg_scale_factor_init, + arcface_margin=FLAGS.delg_arcface_margin, + use_dim_reduction=FLAGS.use_autoencoder, + reduced_dimension=FLAGS.autoencoder_dimensions, + dim_expand_channels=FLAGS.local_feature_map_channels) + else: + model = delf_model.Delf( + block3_strides=FLAGS.block3_strides, + name='DELF', + use_dim_reduction=FLAGS.use_autoencoder, + reduced_dimension=FLAGS.autoencoder_dimensions, + dim_expand_channels=FLAGS.local_feature_map_channels) model.init_classifiers(num_classes) return model @@ -130,11 +184,11 @@ def main(argv): max_iters = FLAGS.max_iters global_batch_size = FLAGS.batch_size - image_size = 321 + image_size = FLAGS.image_size num_eval_batches = int(50000 / global_batch_size) report_interval = 100 eval_interval = 1000 - save_interval = 20000 + save_interval = 1000 initial_lr = FLAGS.initial_lr @@ -146,7 +200,7 @@ def main(argv): max_iters = 100 num_eval_batches = 1 save_interval = 1 - report_interval = 1 + report_interval = 10 # Determine the number of classes based on the version of the dataset. gld_info = gld.GoogleLandmarksInfo() @@ -217,7 +271,12 @@ def main(argv): # Setup checkpoint directory. checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) manager = tf.train.CheckpointManager( - checkpoint, checkpoint_prefix, max_to_keep=3) + checkpoint, + checkpoint_prefix, + max_to_keep=10, + keep_checkpoint_every_n_hours=3) + # Restores the checkpoint, if existing. + checkpoint.restore(manager.latest_checkpoint) # ------------------------------------------------------------ # Train step to run on one GPU. @@ -227,15 +286,6 @@ def main(argv): # Temporary workaround to avoid some corrupted labels. labels = tf.clip_by_value(labels, 0, model.num_classes) - global_step = optimizer.iterations - tf.summary.image('batch_images', (images + 1.0) / 2.0, step=global_step) - tf.summary.scalar( - 'image_range/max', tf.reduce_max(images), step=global_step) - tf.summary.scalar( - 'image_range/min', tf.reduce_min(images), step=global_step) - - # TODO(andrearaujo): we should try to unify the backprop into a single - # function, instead of applying once to descriptor then to attention. def _backprop_loss(tape, loss, weights): """Backpropogate losses using clipped gradients. @@ -249,54 +299,70 @@ def main(argv): optimizer.apply_gradients(zip(clipped, weights)) # Record gradients and loss through backbone. - with tf.GradientTape() as desc_tape: - - blocks = {} - prelogits = model.backbone( - images, intermediates_dict=blocks, training=True) - - # Report sparsity. - activations_zero_fractions = { - 'sparsity/%s' % k: tf.nn.zero_fraction(v) - for k, v in blocks.items() - } - for k, v in activations_zero_fractions.items(): - tf.summary.scalar(k, v, step=global_step) - - # Apply descriptor classifier. - logits = model.desc_classification(prelogits) - - desc_loss = compute_loss(labels, logits) - - # Backprop only through backbone weights. - _backprop_loss(desc_tape, desc_loss, model.desc_trainable_weights) - - # Record descriptor train accuracy. - _record_accuracy(desc_train_accuracy, logits, labels) + with tf.GradientTape() as gradient_tape: + # Make a forward pass to calculate prelogits. + (desc_prelogits, attn_prelogits, attn_scores, backbone_blocks, + dim_expanded_features, _) = model.global_and_local_forward_pass(images) + + # Calculate global loss by applying the descriptor classifier. + if FLAGS.delg_global_features: + desc_logits = model.desc_classification(desc_prelogits, labels) + else: + desc_logits = model.desc_classification(desc_prelogits) + desc_loss = compute_loss(labels, desc_logits) + + # Calculate attention loss by applying the attention block classifier. + attn_logits = model.attn_classification(attn_prelogits) + attn_loss = compute_loss(labels, attn_logits) + + # Calculate reconstruction loss between the attention prelogits and the + # backbone. + if FLAGS.use_autoencoder: + block3 = tf.stop_gradient(backbone_blocks['block3']) + reconstruction_loss = tf.math.reduce_mean( + tf.keras.losses.MSE(block3, dim_expanded_features)) + else: + reconstruction_loss = 0 - # Record gradients and loss through attention block. - with tf.GradientTape() as attn_tape: - block3 = blocks['block3'] # pytype: disable=key-error + # Cumulate global loss, attention loss and reconstruction loss. + total_loss = ( + desc_loss + FLAGS.attention_loss_weight * attn_loss + + FLAGS.reconstruction_loss_weight * reconstruction_loss) - # Stopping gradients according to DELG paper: - # (https://arxiv.org/abs/2001.05027). - block3 = tf.stop_gradient(block3) + # Perform backpropagation through the descriptor and attention layers + # together. Note that this will increment the number of iterations of + # "optimizer". + _backprop_loss(gradient_tape, total_loss, model.trainable_weights) - prelogits, scores, _ = model.attention(block3, training=True) - _attention_summaries(scores, global_step) + # Step number, for summary purposes. + global_step = optimizer.iterations - # Apply attention block classifier. - logits = model.attn_classification(prelogits) + # Input image-related summaries. + tf.summary.image('batch_images', (images + 1.0) / 2.0, step=global_step) + tf.summary.scalar( + 'image_range/max', tf.reduce_max(images), step=global_step) + tf.summary.scalar( + 'image_range/min', tf.reduce_min(images), step=global_step) - attn_loss = compute_loss(labels, logits) + # Attention and sparsity summaries. + _attention_summaries(attn_scores, global_step) + activations_zero_fractions = { + 'sparsity/%s' % k: tf.nn.zero_fraction(v) + for k, v in backbone_blocks.items() + } + for k, v in activations_zero_fractions.items(): + tf.summary.scalar(k, v, step=global_step) - # Backprop only through attention weights. - _backprop_loss(attn_tape, attn_loss, model.attn_trainable_weights) + # Scaling factor summary for cosine logits for a DELG model. + if FLAGS.delg_global_features: + tf.summary.scalar( + 'desc/scale_factor', model.scale_factor, step=global_step) - # Record attention train accuracy. - _record_accuracy(attn_train_accuracy, logits, labels) + # Record train accuracies. + _record_accuracy(desc_train_accuracy, desc_logits, labels) + _record_accuracy(attn_train_accuracy, attn_logits, labels) - return desc_loss, attn_loss + return desc_loss, attn_loss, reconstruction_loss # ------------------------------------------------------------ def validation_step(inputs): @@ -308,7 +374,10 @@ def main(argv): blocks = {} prelogits = model.backbone( images, intermediates_dict=blocks, training=False) - logits = model.desc_classification(prelogits, training=False) + if FLAGS.delg_global_features: + logits = model.desc_classification(prelogits, labels, training=False) + else: + logits = model.desc_classification(prelogits, training=False) softmax_probabilities = tf.keras.layers.Softmax()(logits) validation_loss = loss_object(labels, logits) @@ -335,7 +404,7 @@ def main(argv): def distributed_train_step(dataset_inputs): """Get the actual losses.""" # Each (desc, attn) is a list of 3 losses - crossentropy, reg, total. - desc_per_replica_loss, attn_per_replica_loss = ( + desc_per_replica_loss, attn_per_replica_loss, recon_per_replica_loss = ( strategy.run(train_step, args=(dataset_inputs,))) # Reduce over the replicas. @@ -343,8 +412,10 @@ def main(argv): tf.distribute.ReduceOp.SUM, desc_per_replica_loss, axis=None) attn_global_loss = strategy.reduce( tf.distribute.ReduceOp.SUM, attn_per_replica_loss, axis=None) + recon_global_loss = strategy.reduce( + tf.distribute.ReduceOp.SUM, recon_per_replica_loss, axis=None) - return desc_global_loss, attn_global_loss + return desc_global_loss, attn_global_loss, recon_global_loss @tf.function def distributed_validation_step(dataset_inputs): @@ -353,15 +424,16 @@ def main(argv): # ------------------------------------------------------------ # *** TRAIN LOOP *** with summary_writer.as_default(): - with tf.summary.record_if( - tf.math.equal(0, optimizer.iterations % report_interval)): + record_cond = lambda: tf.equal(optimizer.iterations % report_interval, 0) + with tf.summary.record_if(record_cond): + global_step_value = optimizer.iterations.numpy() # TODO(dananghel): try to load pretrained weights at backbone creation. # Load pretrained weights for ResNet50 trained on ImageNet. - if FLAGS.imagenet_checkpoint is not None: + if (FLAGS.imagenet_checkpoint is not None) and (not global_step_value): logging.info('Attempting to load ImageNet pretrained weights.') input_batch = next(train_iter) - _, _ = distributed_train_step(input_batch) + _, _, _ = distributed_train_step(input_batch) model.backbone.restore_weights(FLAGS.imagenet_checkpoint) logging.info('Done.') else: @@ -369,9 +441,9 @@ def main(argv): if FLAGS.debug: model.backbone.log_weights() - global_step_value = optimizer.iterations.numpy() + last_summary_step_value = None + last_summary_time = None while global_step_value < max_iters: - # input_batch : images(b, h, w, c), labels(b,). try: input_batch = next(train_iter) @@ -381,24 +453,27 @@ def main(argv): global_step_value) break - # Set learning rate for optimizer to use. + # Set learning rate and run the training step over num_gpu gpus. + optimizer.learning_rate = _learning_rate_schedule( + optimizer.iterations.numpy(), max_iters, initial_lr) + desc_dist_loss, attn_dist_loss, recon_dist_loss = ( + distributed_train_step(input_batch)) + + # Step number, to be used for summary/logging. global_step = optimizer.iterations global_step_value = global_step.numpy() - learning_rate = _learning_rate_schedule(global_step_value, max_iters, - initial_lr) - optimizer.learning_rate = learning_rate + # LR, losses and accuracies summaries. tf.summary.scalar( 'learning_rate', optimizer.learning_rate, step=global_step) - - # Run the training step over num_gpu gpus. - desc_dist_loss, attn_dist_loss = distributed_train_step(input_batch) - - # Log losses and accuracies to tensorboard. tf.summary.scalar( 'loss/desc/crossentropy', desc_dist_loss, step=global_step) tf.summary.scalar( 'loss/attn/crossentropy', attn_dist_loss, step=global_step) + if FLAGS.use_autoencoder: + tf.summary.scalar( + 'loss/recon/mse', recon_dist_loss, step=global_step) + tf.summary.scalar( 'train_accuracy/desc', desc_train_accuracy.result(), @@ -408,6 +483,19 @@ def main(argv): attn_train_accuracy.result(), step=global_step) + # Summary for number of global steps taken per second. + current_time = time.time() + if (last_summary_step_value is not None and + last_summary_time is not None): + tf.summary.scalar( + 'global_steps_per_sec', + (global_step_value - last_summary_step_value) / + (current_time - last_summary_time), + step=global_step) + if tf.summary.should_record_summaries().numpy(): + last_summary_step_value = global_step_value + last_summary_time = current_time + # Print to console if running locally. if FLAGS.debug: if global_step_value % report_interval == 0: @@ -440,12 +528,14 @@ def main(argv): print('Validation: desc:', desc_validation_result.numpy()) print(' : attn:', attn_validation_result.numpy()) - # Save checkpoint once (each save_interval*n, n \in N) steps. + # Save checkpoint once (each save_interval*n, n \in N) steps, or if + # this is the last iteration. # TODO(andrearaujo): save only in one of the two ways. They are # identical, the only difference is that the manager adds some extra # prefixes and variables (eg, optimizer variables). - if global_step_value % save_interval == 0: - save_path = manager.save() + if (global_step_value % save_interval + == 0) or (global_step_value >= max_iters): + save_path = manager.save(checkpoint_number=global_step_value) logging.info('Saved (%d) at %s', global_step_value, save_path) file_path = '%s/delf_weights' % FLAGS.logdir @@ -461,9 +551,6 @@ def main(argv): desc_validation_accuracy.reset_states() attn_validation_accuracy.reset_states() - if global_step.numpy() > max_iters: - break - logging.info('Finished training for %d steps.', max_iters) diff --git a/research/delf/delf/python/utils.py b/research/delf/delf/python/utils.py index dbab2d8c7f1f423991c98851ad509e4684b738b7..46b62cbdf31a3e59e0416fb7caca0e82c78438d5 100644 --- a/research/delf/delf/python/utils.py +++ b/research/delf/delf/python/utils.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np from PIL import Image from PIL import ImageFile import tensorflow as tf @@ -39,3 +40,65 @@ def RgbLoader(path): img = Image.open(f) return img.convert('RGB') + +def ResizeImage(image, config, resize_factor=1.0): + """Resizes image according to config. + + Args: + image: Uint8 array with shape (height, width, 3). + config: DelfConfig proto containing the model configuration. + resize_factor: Optional float resize factor for the input image. If given, + the maximum and minimum allowed image sizes in `config` are scaled by this + factor. Must be non-negative. + + Returns: + resized_image: Uint8 array with resized image. + scale_factors: 2D float array, with factors used for resizing along height + and width (If upscaling, larger than 1; if downscaling, smaller than 1). + + Raises: + ValueError: If `image` has incorrect number of dimensions/channels. + """ + if resize_factor < 0.0: + raise ValueError('negative resize_factor is not allowed: %f' % + resize_factor) + if image.ndim != 3: + raise ValueError('image has incorrect number of dimensions: %d' % + image.ndims) + height, width, channels = image.shape + + # Take into account resize factor. + max_image_size = resize_factor * config.max_image_size + min_image_size = resize_factor * config.min_image_size + + if channels != 3: + raise ValueError('image has incorrect number of channels: %d' % channels) + + largest_side = max(width, height) + + if max_image_size >= 0 and largest_side > max_image_size: + scale_factor = max_image_size / largest_side + elif min_image_size >= 0 and largest_side < min_image_size: + scale_factor = min_image_size / largest_side + elif config.use_square_images and (height != width): + scale_factor = 1.0 + else: + # No resizing needed, early return. + return image, np.ones(2, dtype=float) + + # Note that new_shape is in (width, height) format (PIL convention), while + # scale_factors are in (height, width) convention (NumPy convention). + if config.use_square_images: + new_shape = (int(round(largest_side * scale_factor)), + int(round(largest_side * scale_factor))) + else: + new_shape = (int(round(width * scale_factor)), + int(round(height * scale_factor))) + + scale_factors = np.array([new_shape[1] / height, new_shape[0] / width], + dtype=float) + + pil_image = Image.fromarray(image) + resized_image = np.array(pil_image.resize(new_shape, resample=Image.BILINEAR)) + + return resized_image, scale_factors diff --git a/research/delf/delf/python/utils_test.py b/research/delf/delf/python/utils_test.py new file mode 100644 index 0000000000000000000000000000000000000000..a07d86d75d8ab5e972d8a8d4c96e2729e92757cb --- /dev/null +++ b/research/delf/delf/python/utils_test.py @@ -0,0 +1,103 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for helper utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from delf import delf_config_pb2 +from delf import utils + + +class UtilsTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ('Max-1Min-1', -1, -1, 1.0, False, [4, 2, 3], [1.0, 1.0]), + ('Max-1Min-1Square', -1, -1, 1.0, True, [4, 4, 3], [1.0, 2.0]), + ('Max2Min-1', 2, -1, 1.0, False, [2, 1, 3], [0.5, 0.5]), + ('Max2Min-1Square', 2, -1, 1.0, True, [2, 2, 3], [0.5, 1.0]), + ('Max8Min-1', 8, -1, 1.0, False, [4, 2, 3], [1.0, 1.0]), + ('Max8Min-1Square', 8, -1, 1.0, True, [4, 4, 3], [1.0, 2.0]), + ('Max-1Min1', -1, 1, 1.0, False, [4, 2, 3], [1.0, 1.0]), + ('Max-1Min1Square', -1, 1, 1.0, True, [4, 4, 3], [1.0, 2.0]), + ('Max-1Min8', -1, 8, 1.0, False, [8, 4, 3], [2.0, 2.0]), + ('Max-1Min8Square', -1, 8, 1.0, True, [8, 8, 3], [2.0, 4.0]), + ('Max16Min8', 16, 8, 1.0, False, [8, 4, 3], [2.0, 2.0]), + ('Max16Min8Square', 16, 8, 1.0, True, [8, 8, 3], [2.0, 4.0]), + ('Max2Min2', 2, 2, 1.0, False, [2, 1, 3], [0.5, 0.5]), + ('Max2Min2Square', 2, 2, 1.0, True, [2, 2, 3], [0.5, 1.0]), + ('Max-1Min-1Factor0.5', -1, -1, 0.5, False, [4, 2, 3], [1.0, 1.0]), + ('Max-1Min-1Factor0.5Square', -1, -1, 0.5, True, [4, 4, 3], [1.0, 2.0]), + ('Max2Min-1Factor2.0', 2, -1, 2.0, False, [4, 2, 3], [1.0, 1.0]), + ('Max2Min-1Factor2.0Square', 2, -1, 2.0, True, [4, 4, 3], [1.0, 2.0]), + ('Max-1Min8Factor0.5', -1, 8, 0.5, False, [4, 2, 3], [1.0, 1.0]), + ('Max-1Min8Factor0.5Square', -1, 8, 0.5, True, [4, 4, 3], [1.0, 2.0]), + ('Max-1Min8Factor0.25', -1, 8, 0.25, False, [4, 2, 3], [1.0, 1.0]), + ('Max-1Min8Factor0.25Square', -1, 8, 0.25, True, [4, 4, 3], [1.0, 2.0]), + ('Max2Min2Factor2.0', 2, 2, 2.0, False, [4, 2, 3], [1.0, 1.0]), + ('Max2Min2Factor2.0Square', 2, 2, 2.0, True, [4, 4, 3], [1.0, 2.0]), + ('Max16Min8Factor0.5', 16, 8, 0.5, False, [4, 2, 3], [1.0, 1.0]), + ('Max16Min8Factor0.5Square', 16, 8, 0.5, True, [4, 4, 3], [1.0, 2.0]), + ) + def testResizeImageWorks(self, max_image_size, min_image_size, resize_factor, + square_output, expected_shape, + expected_scale_factors): + # Construct image of size 4x2x3. + image = np.array([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [3, 3, 3]], + [[4, 4, 4], [5, 5, 5]], [[6, 6, 6], [7, 7, 7]]], + dtype='uint8') + + # Set up config. + config = delf_config_pb2.DelfConfig( + max_image_size=max_image_size, + min_image_size=min_image_size, + use_square_images=square_output) + + resized_image, scale_factors = utils.ResizeImage(image, config, + resize_factor) + self.assertAllEqual(resized_image.shape, expected_shape) + self.assertAllClose(scale_factors, expected_scale_factors) + + @parameterized.named_parameters( + ('Max2Min2', 2, 2, 1.0, False, [2, 1, 3], [0.666666, 0.5]), + ('Max2Min2Square', 2, 2, 1.0, True, [2, 2, 3], [0.666666, 1.0]), + ) + def testResizeImageRoundingWorks(self, max_image_size, min_image_size, + resize_factor, square_output, expected_shape, + expected_scale_factors): + # Construct image of size 3x2x3. + image = np.array([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [3, 3, 3]], + [[4, 4, 4], [5, 5, 5]]], + dtype='uint8') + + # Set up config. + config = delf_config_pb2.DelfConfig( + max_image_size=max_image_size, + min_image_size=min_image_size, + use_square_images=square_output) + + resized_image, scale_factors = utils.ResizeImage(image, config, + resize_factor) + self.assertAllEqual(resized_image.shape, expected_shape) + self.assertAllClose(scale_factors, expected_scale_factors) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/delf/delf/python/whiten.py b/research/delf/delf/python/whiten.py new file mode 100644 index 0000000000000000000000000000000000000000..d2c72d9f17edc72c85f1dbc07b1710290a0bbb43 --- /dev/null +++ b/research/delf/delf/python/whiten.py @@ -0,0 +1,125 @@ +# Copyright 2021 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Whitening learning functions.""" + +import os + +import numpy as np + + +def apply_whitening(descriptors, + mean_descriptor_vector, + projection, + output_dim=None): + """Applies the whitening to the descriptors as a post-processing step. + + Args: + descriptors: [N, D] NumPy array of L2-normalized descriptors to be + post-processed. + mean_descriptor_vector: Mean descriptor vector. + projection: Whitening projection matrix. + output_dim: Integer, parameter for the dimensionality reduction. If + `output_dim` is None, the dimensionality reduction is not performed. + + Returns: + descriptors_whitened: [N, output_dim] NumPy array of L2-normalized + descriptors `descriptors` after whitening application. + """ + eps = 1e-6 + if output_dim is None: + output_dim = projection.shape[0] + + descriptors = np.dot(projection[:output_dim, :], + descriptors - mean_descriptor_vector) + descriptors_whitened = descriptors / ( + np.linalg.norm(descriptors, ord=2, axis=0, keepdims=True) + eps) + return descriptors_whitened + + +def learn_whitening(descriptors, qidxs, pidxs): + """Learning the post-processing of fine-tuned descriptor vectors. + + This method of whitening learning leverages the provided labeled data and + uses linear discriminant projections. The projection is decomposed into two + parts: whitening and rotation. The whitening part is the inverse of the + square-root of the intraclass (matching pairs) covariance matrix. The + rotation part is the PCA of the interclass (non-matching pairs) covariance + matrix in the whitened space. The described approach acts as a + post-processing step, equivalently, once the fine-tuning of the CNN is + finished. For more information about the method refer to the section 3.4 + of https://arxiv.org/pdf/1711.02512.pdf. + + Args: + descriptors: [N, D] NumPy array of L2-normalized descriptors. + qidxs: List of query indexes. + pidxs: List of positive pairs indexes. + + Returns: + mean_descriptor_vector: [N, 1] NumPy array, mean descriptor vector. + projection: [N, N] NumPy array, whitening projection matrix. + """ + # Calculating the mean descriptor vector, which is used to perform centering. + mean_descriptor_vector = descriptors[:, qidxs].mean(axis=1, keepdims=True) + # Interclass (matching pairs) difference. + interclass_difference = descriptors[:, qidxs] - descriptors[:, pidxs] + covariance_matrix = ( + np.dot(interclass_difference, interclass_difference.T) / + interclass_difference.shape[1]) + + # Whitening part. + projection = np.linalg.inv(cholesky(covariance_matrix)) + + projected_descriptors = np.dot(projection, + descriptors - mean_descriptor_vector) + non_matching_covariance_matrix = np.dot(projected_descriptors, + projected_descriptors.T) + eigval, eigvec = np.linalg.eig(non_matching_covariance_matrix) + order = eigval.argsort()[::-1] + eigvec = eigvec[:, order] + + # Rotational part. + projection = np.dot(eigvec.T, projection) + return mean_descriptor_vector, projection + + +def cholesky(matrix): + """Cholesky decomposition. + + Cholesky decomposition suitable for non-positive definite matrices: involves + adding a small value `alpha` on the matrix diagonal until the matrix + becomes positive definite. + + Args: + matrix: [K, K] Square matrix to be decomposed. + + Returns: + decomposition: [K, K] Upper-triangular Cholesky factor of `matrix`, + a matrix with real and positive diagonal entries. + """ + alpha = 0 + while True: + try: + # If the input parameter matrix is not positive-definite, + # the decomposition fails and we iteratively add a small value `alpha` on + # the matrix diagonal. + decomposition = np.linalg.cholesky(matrix + alpha * np.eye(*matrix.shape)) + return decomposition + except np.linalg.LinAlgError: + if alpha == 0: + alpha = 1e-10 + else: + alpha *= 10 + print(">>>> {}::cholesky: Matrix is not positive definite, adding {:.0e} " + "on the diagonal".format(os.path.basename(__file__), alpha)) diff --git a/research/delf/delf/python/whiten_test.py b/research/delf/delf/python/whiten_test.py new file mode 100644 index 0000000000000000000000000000000000000000..52cc51e65d1087976bee04a1fdbf6fcbaed04217 --- /dev/null +++ b/research/delf/delf/python/whiten_test.py @@ -0,0 +1,73 @@ +# Lint as: python3 +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for whitening module.""" + +import numpy as np +import tensorflow as tf + +from delf import whiten + + +class WhitenTest(tf.test.TestCase): + + def testApplyWhitening(self): + # Testing the application of the learned whitening. + vectors = np.array([[0.14022471, 0.96360618], [0.37601032, 0.25528411]]) + # Learn whitening for the `vectors`. First element in the `vectors` is + # viewed is the example query and the second element is the corresponding + # positive. + mean_vector, projection = whiten.learn_whitening(vectors, [0], [1]) + # Apply the computed whitening. + whitened_vectors = whiten.apply_whitening(vectors, mean_vector, projection) + expected_whitened_vectors = np.array([[0., 9.99999000e-01], + [0., -2.81240452e-13]]) + # Compare the obtained whitened vectors with the expected result. + self.assertAllClose(whitened_vectors, expected_whitened_vectors) + + def testLearnWhitening(self): + # Testing whitening learning function. + descriptors = np.array([[0.14022471, 0.96360618], [0.37601032, 0.25528411]]) + # Obtain the mean descriptor vector and the projection matrix. + mean_vector, projection = whiten.learn_whitening(descriptors, [0], [1]) + expected_mean_vector = np.array([[0.14022471], [0.37601032]]) + expected_projection = np.array([[1.18894378e+00, -1.74326044e-01], + [1.45071361e+04, 9.89421193e+04]]) + # Check that the both calculated values are close to the expected values. + self.assertAllClose(mean_vector, expected_mean_vector) + self.assertAllClose(projection, expected_projection) + + def testCholeskyPositiveDefinite(self): + # Testing the Cholesky decomposition for the positive definite matrix. + descriptors = np.array([[1, -2j], [2j, 5]]) + output = whiten.cholesky(descriptors) + expected_output = np.array([[1. + 0.j, 0. + 0.j], [0. + 2.j, 1. + 0.j]]) + # Check that the expected output is obtained. + self.assertAllClose(output, expected_output) + # Check that the properties of the Cholesky decomposition are satisfied. + self.assertAllClose(np.matmul(output, output.T.conj()), descriptors) + + def testCholeskyNonPositiveDefinite(self): + # Testing the Cholesky decomposition for a non-positive definite matrix. + input_matrix = np.array([[1., 2.], [-2., 1.]]) + decomposition = whiten.cholesky(input_matrix) + expected_output = np.array([[2., -2.], [-2., 2.]]) + # Check that the properties of the Cholesky decomposition are satisfied. + self.assertAllClose( + np.matmul(decomposition, decomposition.T), expected_output) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/domain_adaptation/README.md b/research/domain_adaptation/README.md deleted file mode 100644 index e8a2b83794f11ed3711e6bc26254a90cb5469440..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/README.md +++ /dev/null @@ -1,124 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -## Introduction -This is the code used for two domain adaptation papers. - -The `domain_separation` directory contains code for the "Domain Separation -Networks" paper by Bousmalis K., Trigeorgis G., et al. which was presented at -NIPS 2016. The paper can be found here: https://arxiv.org/abs/1608.06019. - -The `pixel_domain_adaptation` directory contains the code used for the -"Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial -Networks" paper by Bousmalis K., et al. (presented at CVPR 2017). The paper can -be found here: https://arxiv.org/abs/1612.05424. PixelDA aims to perform domain -adaptation by transfering the visual style of the target domain (which has few -or no labels) to a source domain (which has many labels). This is accomplished -using a Generative Adversarial Network (GAN). - -### Other implementations -* [Simplified-DSN](https://github.com/AmirHussein96/Simplified-DSN): - An unofficial implementation of the [Domain Separation Networks paper](https://arxiv.org/abs/1608.06019). - -## Contact -The domain separation code was open-sourced -by [Konstantinos Bousmalis](https://github.com/bousmalis) -(konstantinos@google.com), while the pixel level domain adaptation code was -open-sourced by [David Dohan](https://github.com/dmrd) (ddohan@google.com). - -## Installation -You will need to have the following installed on your machine before trying out the DSN code. - -* TensorFlow 1.x: https://www.tensorflow.org/install/ -* Bazel: https://bazel.build/ - -## Initial setup -In order to run the MNIST to MNIST-M experiments, you will need to set the -data directory: - -``` -$ export DSN_DATA_DIR=/your/dir -``` - -Add models and models/slim to your `$PYTHONPATH` (assumes $PWD is /models): - -``` -$ export PYTHONPATH=$PYTHONPATH:$PWD:$PWD/slim -``` - -## Getting the datasets - -You can fetch the MNIST data by running - -``` - $ bazel run slim:download_and_convert_data -- --dataset_dir $DSN_DATA_DIR --dataset_name=mnist -``` - -The MNIST-M dataset is available online [here](http://bit.ly/2nrlUAJ). Once it is downloaded and extracted into your data directory, create TFRecord files by running: -``` -$ bazel run domain_adaptation/datasets:download_and_convert_mnist_m -- --dataset_dir $DSN_DATA_DIR -``` - -# Running PixelDA from MNIST to MNIST-M -You can run PixelDA as follows (using Tensorboard to examine the results): - -``` -$ bazel run domain_adaptation/pixel_domain_adaptation:pixelda_train -- --dataset_dir $DSN_DATA_DIR --source_dataset mnist --target_dataset mnist_m -``` - -And evaluation as: -``` -$ bazel run domain_adaptation/pixel_domain_adaptation:pixelda_eval -- --dataset_dir $DSN_DATA_DIR --source_dataset mnist --target_dataset mnist_m --target_split_name test -``` - -The MNIST-M results in the paper were run with the following hparams flag: -``` ---hparams arch=resnet,domain_loss_weight=0.135603587834,num_training_examples=16000000,style_transfer_loss_weight=0.0113173311334,task_loss_in_g_weight=0.0100959947002,task_tower=mnist,task_tower_in_g_step=true -``` - -### A note on terminology/language of the code: - -The components of the network can be grouped into two parts -which correspond to elements which are jointly optimized: The generator -component and the discriminator component. - -The generator component takes either an image or noise vector and produces an -output image. - -The discriminator component takes the generated images and the target images -and attempts to discriminate between them. - -## Running DSN code for adapting MNIST to MNIST-M - -Then you need to build the binaries with Bazel: - -``` -$ bazel build -c opt domain_adaptation/domain_separation/... -``` - -You can then train with the following command: - -``` -$ ./bazel-bin/domain_adaptation/domain_separation/dsn_train \ - --similarity_loss=dann_loss \ - --basic_tower=dann_mnist \ - --source_dataset=mnist \ - --target_dataset=mnist_m \ - --learning_rate=0.0117249 \ - --gamma_weight=0.251175 \ - --weight_decay=1e-6 \ - --layers_to_regularize=fc3 \ - --nouse_separation \ - --master="" \ - --dataset_dir=${DSN_DATA_DIR} \ - -v --use_logging -``` - -Evaluation can be invoked with the following command: - -``` -$ ./bazel-bin/domain_adaptation/domain_separation/dsn_eval \ - -v --dataset mnist_m --split test --num_examples=9001 \ - --dataset_dir=${DSN_DATA_DIR} -``` diff --git a/research/domain_adaptation/WORKSPACE b/research/domain_adaptation/WORKSPACE deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/domain_adaptation/__init__.py b/research/domain_adaptation/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/domain_adaptation/datasets/BUILD b/research/domain_adaptation/datasets/BUILD deleted file mode 100644 index 067a79374fbcedaa6fcd90293e5365aaad4c18c6..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/datasets/BUILD +++ /dev/null @@ -1,45 +0,0 @@ -# Domain Adaptation Scenarios Datasets - -package( - default_visibility = [ - ":internal", - ], -) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = [ - "//domain_adaptation/...", - ], -) - -py_library( - name = "dataset_factory", - srcs = ["dataset_factory.py"], - deps = [ - ":mnist_m", - "//slim:mnist", - ], -) - -py_binary( - name = "download_and_convert_mnist_m", - srcs = ["download_and_convert_mnist_m.py"], - deps = [ - - "//slim:dataset_utils", - ], -) - -py_binary( - name = "mnist_m", - srcs = ["mnist_m.py"], - deps = [ - - "//slim:dataset_utils", - ], -) diff --git a/research/domain_adaptation/datasets/__init__.py b/research/domain_adaptation/datasets/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/domain_adaptation/datasets/dataset_factory.py b/research/domain_adaptation/datasets/dataset_factory.py deleted file mode 100644 index 4ca1b41c412a78d25053fc786c8f81072fe90adb..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/datasets/dataset_factory.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""A factory-pattern class which returns image/label pairs.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports -import tensorflow as tf - -from slim.datasets import mnist -from domain_adaptation.datasets import mnist_m - -slim = tf.contrib.slim - - -def get_dataset(dataset_name, - split_name, - dataset_dir, - file_pattern=None, - reader=None): - """Given a dataset name and a split_name returns a Dataset. - - Args: - dataset_name: String, the name of the dataset. - split_name: A train/test split name. - dataset_dir: The directory where the dataset files are stored. - file_pattern: The file pattern to use for matching the dataset source files. - reader: The subclass of tf.ReaderBase. If left as `None`, then the default - reader defined by each dataset is used. - - Returns: - A tf-slim `Dataset` class. - - Raises: - ValueError: if `dataset_name` isn't recognized. - """ - dataset_name_to_module = {'mnist': mnist, 'mnist_m': mnist_m} - if dataset_name not in dataset_name_to_module: - raise ValueError('Name of dataset unknown %s.' % dataset_name) - - return dataset_name_to_module[dataset_name].get_split(split_name, dataset_dir, - file_pattern, reader) - - -def provide_batch(dataset_name, split_name, dataset_dir, num_readers, - batch_size, num_preprocessing_threads): - """Provides a batch of images and corresponding labels. - - Args: - dataset_name: String, the name of the dataset. - split_name: A train/test split name. - dataset_dir: The directory where the dataset files are stored. - num_readers: The number of readers used by DatasetDataProvider. - batch_size: The size of the batch requested. - num_preprocessing_threads: The number of preprocessing threads for - tf.train.batch. - file_pattern: The file pattern to use for matching the dataset source files. - reader: The subclass of tf.ReaderBase. If left as `None`, then the default - reader defined by each dataset is used. - - Returns: - A batch of - images: tensor of [batch_size, height, width, channels]. - labels: dictionary of labels. - """ - dataset = get_dataset(dataset_name, split_name, dataset_dir) - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - num_readers=num_readers, - common_queue_capacity=20 * batch_size, - common_queue_min=10 * batch_size) - [image, label] = provider.get(['image', 'label']) - - # Convert images to float32 - image = tf.image.convert_image_dtype(image, tf.float32) - image -= 0.5 - image *= 2 - - # Load the data. - labels = {} - images, labels['classes'] = tf.train.batch( - [image, label], - batch_size=batch_size, - num_threads=num_preprocessing_threads, - capacity=5 * batch_size) - labels['classes'] = slim.one_hot_encoding(labels['classes'], - dataset.num_classes) - - # Convert mnist to RGB and 32x32 so that it can match mnist_m. - if dataset_name == 'mnist': - images = tf.image.grayscale_to_rgb(images) - images = tf.image.resize_images(images, [32, 32]) - return images, labels diff --git a/research/domain_adaptation/datasets/download_and_convert_mnist_m.py b/research/domain_adaptation/datasets/download_and_convert_mnist_m.py deleted file mode 100644 index 3b5004d3d8aaf54656389e517c50f38299714bc7..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/datasets/download_and_convert_mnist_m.py +++ /dev/null @@ -1,237 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Downloads and converts MNIST-M data to TFRecords of TF-Example protos. - -This module downloads the MNIST-M data, uncompresses it, reads the files -that make up the MNIST-M data and creates two TFRecord datasets: one for train -and one for test. Each TFRecord dataset is comprised of a set of TF-Example -protocol buffers, each of which contain a single image and label. - -The script should take about a minute to run. - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import random -import sys - -# Dependency imports -import numpy as np -from six.moves import urllib -import tensorflow as tf - -from slim.datasets import dataset_utils - -tf.app.flags.DEFINE_string( - 'dataset_dir', None, - 'The directory where the output TFRecords and temporary files are saved.') - -FLAGS = tf.app.flags.FLAGS - -_IMAGE_SIZE = 32 -_NUM_CHANNELS = 3 - -# The number of images in the training set. -_NUM_TRAIN_SAMPLES = 59001 - -# The number of images to be kept from the training set for the validation set. -_NUM_VALIDATION = 1000 - -# The number of images in the test set. -_NUM_TEST_SAMPLES = 9001 - -# Seed for repeatability. -_RANDOM_SEED = 0 - -# The names of the classes. -_CLASS_NAMES = [ - 'zero', - 'one', - 'two', - 'three', - 'four', - 'five', - 'size', - 'seven', - 'eight', - 'nine', -] - - -class ImageReader(object): - """Helper class that provides TensorFlow image coding utilities.""" - - def __init__(self): - # Initializes function that decodes RGB PNG data. - self._decode_png_data = tf.placeholder(dtype=tf.string) - self._decode_png = tf.image.decode_png(self._decode_png_data, channels=3) - - def read_image_dims(self, sess, image_data): - image = self.decode_png(sess, image_data) - return image.shape[0], image.shape[1] - - def decode_png(self, sess, image_data): - image = sess.run( - self._decode_png, feed_dict={self._decode_png_data: image_data}) - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _convert_dataset(split_name, filenames, filename_to_class_id, dataset_dir): - """Converts the given filenames to a TFRecord dataset. - - Args: - split_name: The name of the dataset, either 'train' or 'valid'. - filenames: A list of absolute paths to png images. - filename_to_class_id: A dictionary from filenames (strings) to class ids - (integers). - dataset_dir: The directory where the converted datasets are stored. - """ - print('Converting the {} split.'.format(split_name)) - # Train and validation splits are both in the train directory. - if split_name in ['train', 'valid']: - png_directory = os.path.join(dataset_dir, 'mnist_m', 'mnist_m_train') - elif split_name == 'test': - png_directory = os.path.join(dataset_dir, 'mnist_m', 'mnist_m_test') - - with tf.Graph().as_default(): - image_reader = ImageReader() - - with tf.Session('') as sess: - output_filename = _get_output_filename(dataset_dir, split_name) - - with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: - for filename in filenames: - # Read the filename: - image_data = tf.gfile.FastGFile( - os.path.join(png_directory, filename), 'r').read() - height, width = image_reader.read_image_dims(sess, image_data) - - class_id = filename_to_class_id[filename] - example = dataset_utils.image_to_tfexample(image_data, 'png', height, - width, class_id) - tfrecord_writer.write(example.SerializeToString()) - - sys.stdout.write('\n') - sys.stdout.flush() - - -def _extract_labels(label_filename): - """Extract the labels into a dict of filenames to int labels. - - Args: - labels_filename: The filename of the MNIST-M labels. - - Returns: - A dictionary of filenames to int labels. - """ - print('Extracting labels from: ', label_filename) - label_file = tf.gfile.FastGFile(label_filename, 'r').readlines() - label_lines = [line.rstrip('\n').split() for line in label_file] - labels = {} - for line in label_lines: - assert len(line) == 2 - labels[line[0]] = int(line[1]) - return labels - - -def _get_output_filename(dataset_dir, split_name): - """Creates the output filename. - - Args: - dataset_dir: The directory where the temporary files are stored. - split_name: The name of the train/test split. - - Returns: - An absolute file path. - """ - return '%s/mnist_m_%s.tfrecord' % (dataset_dir, split_name) - - -def _get_filenames(dataset_dir): - """Returns a list of filenames and inferred class names. - - Args: - dataset_dir: A directory containing a set PNG encoded MNIST-M images. - - Returns: - A list of image file paths, relative to `dataset_dir`. - """ - photo_filenames = [] - for filename in os.listdir(dataset_dir): - photo_filenames.append(filename) - return photo_filenames - - -def run(dataset_dir): - """Runs the download and conversion operation. - - Args: - dataset_dir: The dataset directory where the dataset is stored. - """ - if not tf.gfile.Exists(dataset_dir): - tf.gfile.MakeDirs(dataset_dir) - - train_filename = _get_output_filename(dataset_dir, 'train') - testing_filename = _get_output_filename(dataset_dir, 'test') - - if tf.gfile.Exists(train_filename) and tf.gfile.Exists(testing_filename): - print('Dataset files already exist. Exiting without re-creating them.') - return - - # TODO(konstantinos): Add download and cleanup functionality - - train_validation_filenames = _get_filenames( - os.path.join(dataset_dir, 'mnist_m', 'mnist_m_train')) - test_filenames = _get_filenames( - os.path.join(dataset_dir, 'mnist_m', 'mnist_m_test')) - - # Divide into train and validation: - random.seed(_RANDOM_SEED) - random.shuffle(train_validation_filenames) - train_filenames = train_validation_filenames[_NUM_VALIDATION:] - validation_filenames = train_validation_filenames[:_NUM_VALIDATION] - - train_validation_filenames_to_class_ids = _extract_labels( - os.path.join(dataset_dir, 'mnist_m', 'mnist_m_train_labels.txt')) - test_filenames_to_class_ids = _extract_labels( - os.path.join(dataset_dir, 'mnist_m', 'mnist_m_test_labels.txt')) - - # Convert the train, validation, and test sets. - _convert_dataset('train', train_filenames, - train_validation_filenames_to_class_ids, dataset_dir) - _convert_dataset('valid', validation_filenames, - train_validation_filenames_to_class_ids, dataset_dir) - _convert_dataset('test', test_filenames, test_filenames_to_class_ids, - dataset_dir) - - # Finally, write the labels file: - labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES)) - dataset_utils.write_label_file(labels_to_class_names, dataset_dir) - - print('\nFinished converting the MNIST-M dataset!') - - -def main(_): - assert FLAGS.dataset_dir - run(FLAGS.dataset_dir) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/domain_adaptation/datasets/mnist_m.py b/research/domain_adaptation/datasets/mnist_m.py deleted file mode 100644 index fab6c443cf3d2e9783d19bf52c81b7aa62d56a38..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/datasets/mnist_m.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Provides data for the MNIST-M dataset. - -The dataset scripts used to create the dataset can be found at: -tensorflow_models/domain_adaptation_/datasets/download_and_convert_mnist_m_dataset.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# Dependency imports -import tensorflow as tf - -from slim.datasets import dataset_utils - -slim = tf.contrib.slim - -_FILE_PATTERN = 'mnist_m_%s.tfrecord' - -_SPLITS_TO_SIZES = {'train': 58001, 'valid': 1000, 'test': 9001} - -_NUM_CLASSES = 10 - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'A [32 x 32 x 1] RGB image.', - 'label': 'A single integer between 0 and 9', -} - - -def get_split(split_name, dataset_dir, file_pattern=None, reader=None): - """Gets a dataset tuple with instructions for reading MNIST. - - Args: - split_name: A train/test split name. - dataset_dir: The base directory of the dataset sources. - - Returns: - A `Dataset` namedtuple. - - Raises: - ValueError: if `split_name` is not a valid train/test split. - """ - if split_name not in _SPLITS_TO_SIZES: - raise ValueError('split name %s was not recognized.' % split_name) - - if not file_pattern: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - - # Allowing None in the signature so that dataset_factory can use the default. - if reader is None: - reader = tf.TFRecordReader - - keys_to_features = { - 'image/encoded': - tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': - tf.FixedLenFeature((), tf.string, default_value='png'), - 'image/class/label': - tf.FixedLenFeature( - [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), - } - - items_to_handlers = { - 'image': slim.tfexample_decoder.Image(shape=[32, 32, 3], channels=3), - 'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]), - } - - decoder = slim.tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handlers) - - labels_to_names = None - if dataset_utils.has_labels(dataset_dir): - labels_to_names = dataset_utils.read_label_file(dataset_dir) - - return slim.dataset.Dataset( - data_sources=file_pattern, - reader=reader, - decoder=decoder, - num_samples=_SPLITS_TO_SIZES[split_name], - num_classes=_NUM_CLASSES, - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - labels_to_names=labels_to_names) diff --git a/research/domain_adaptation/domain_separation/BUILD b/research/domain_adaptation/domain_separation/BUILD deleted file mode 100644 index 14dceda27e49d74eaaaeae21676183b78c72b9c2..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/BUILD +++ /dev/null @@ -1,157 +0,0 @@ -# Domain Separation Networks - -package( - default_visibility = [ - ":internal", - ], -) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = [ - "//domain_adaptation/...", - ], -) - -py_library( - name = "models", - srcs = [ - "models.py", - ], - deps = [ - ":utils", - ], -) - -py_library( - name = "losses", - srcs = [ - "losses.py", - ], - deps = [ - ":grl_op_grads_py", - ":grl_op_shapes_py", - ":grl_ops", - ":utils", - ], -) - -py_test( - name = "losses_test", - srcs = [ - "losses_test.py", - ], - deps = [ - ":losses", - ":utils", - ], -) - -py_library( - name = "dsn", - srcs = [ - "dsn.py", - ], - deps = [ - ":grl_op_grads_py", - ":grl_op_shapes_py", - ":grl_ops", - ":losses", - ":models", - ":utils", - ], -) - -py_test( - name = "dsn_test", - srcs = [ - "dsn_test.py", - ], - deps = [ - ":dsn", - ], -) - -py_binary( - name = "dsn_train", - srcs = [ - "dsn_train.py", - ], - deps = [ - ":dsn", - ":models", - "//domain_adaptation/datasets:dataset_factory", - ], -) - -py_binary( - name = "dsn_eval", - srcs = [ - "dsn_eval.py", - ], - deps = [ - ":dsn", - ":models", - "//domain_adaptation/datasets:dataset_factory", - ], -) - -py_test( - name = "models_test", - srcs = [ - "models_test.py", - ], - deps = [ - ":models", - "//domain_adaptation/datasets:dataset_factory", - ], -) - -py_library( - name = "utils", - srcs = [ - "utils.py", - ], - deps = [ - ], -) - -py_library( - name = "grl_op_grads_py", - srcs = [ - "grl_op_grads.py", - ], - deps = [ - ":grl_ops", - ], -) - -py_library( - name = "grl_op_shapes_py", - srcs = [ - "grl_op_shapes.py", - ], - deps = [ - ], -) - -py_library( - name = "grl_ops", - srcs = ["grl_ops.py"], - data = ["_grl_ops.so"], -) - -py_test( - name = "grl_ops_test", - size = "small", - srcs = ["grl_ops_test.py"], - deps = [ - ":grl_op_grads_py", - ":grl_op_shapes_py", - ":grl_ops", - ], -) diff --git a/research/domain_adaptation/domain_separation/__init__.py b/research/domain_adaptation/domain_separation/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/domain_adaptation/domain_separation/_grl_ops.so b/research/domain_adaptation/domain_separation/_grl_ops.so deleted file mode 100755 index 4c35473760a76dcb743d58f45eddccecb5f5161e..0000000000000000000000000000000000000000 Binary files a/research/domain_adaptation/domain_separation/_grl_ops.so and /dev/null differ diff --git a/research/domain_adaptation/domain_separation/dsn.py b/research/domain_adaptation/domain_separation/dsn.py deleted file mode 100644 index 3018e8a791840ae465bad493913235cc04c31cff..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/dsn.py +++ /dev/null @@ -1,355 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions to create a DSN model and add the different losses to it. - -Specifically, in this file we define the: - - Shared Encoding Similarity Loss Module, with: - - The MMD Similarity method - - The Correlation Similarity method - - The Gradient Reversal (Domain-Adversarial) method - - Difference Loss Module - - Reconstruction Loss Module - - Task Loss Module -""" -from functools import partial - -import tensorflow as tf - -import losses -import models -import utils - -slim = tf.contrib.slim - - -################################################################################ -# HELPER FUNCTIONS -################################################################################ -def dsn_loss_coefficient(params): - """The global_step-dependent weight that specifies when to kick in DSN losses. - - Args: - params: A dictionary of parameters. Expecting 'domain_separation_startpoint' - - Returns: - A weight to that effectively enables or disables the DSN-related losses, - i.e. similarity, difference, and reconstruction losses. - """ - return tf.where( - tf.less(slim.get_or_create_global_step(), - params['domain_separation_startpoint']), 1e-10, 1.0) - - -################################################################################ -# MODEL CREATION -################################################################################ -def create_model(source_images, source_labels, domain_selection_mask, - target_images, target_labels, similarity_loss, params, - basic_tower_name): - """Creates a DSN model. - - Args: - source_images: images from the source domain, a tensor of size - [batch_size, height, width, channels] - source_labels: a dictionary with the name, tensor pairs. 'classes' is one- - hot for the number of classes. - domain_selection_mask: a boolean tensor of size [batch_size, ] which denotes - the labeled images that belong to the source domain. - target_images: images from the target domain, a tensor of size - [batch_size, height width, channels]. - target_labels: a dictionary with the name, tensor pairs. - similarity_loss: The type of method to use for encouraging - the codes from the shared encoder to be similar. - params: A dictionary of parameters. Expecting 'weight_decay', - 'layers_to_regularize', 'use_separation', 'domain_separation_startpoint', - 'alpha_weight', 'beta_weight', 'gamma_weight', 'recon_loss_name', - 'decoder_name', 'encoder_name' - basic_tower_name: the name of the tower to use for the shared encoder. - - Raises: - ValueError: if the arch is not one of the available architectures. - """ - network = getattr(models, basic_tower_name) - num_classes = source_labels['classes'].get_shape().as_list()[1] - - # Make sure we are using the appropriate number of classes. - network = partial(network, num_classes=num_classes) - - # Add the classification/pose estimation loss to the source domain. - source_endpoints = add_task_loss(source_images, source_labels, network, - params) - - if similarity_loss == 'none': - # No domain adaptation, we can stop here. - return - - with tf.variable_scope('towers', reuse=True): - target_logits, target_endpoints = network( - target_images, weight_decay=params['weight_decay'], prefix='target') - - # Plot target accuracy of the train set. - target_accuracy = utils.accuracy( - tf.argmax(target_logits, 1), tf.argmax(target_labels['classes'], 1)) - - if 'quaternions' in target_labels: - target_quaternion_loss = losses.log_quaternion_loss( - target_labels['quaternions'], target_endpoints['quaternion_pred'], - params) - tf.summary.scalar('eval/Target quaternions', target_quaternion_loss) - - tf.summary.scalar('eval/Target accuracy', target_accuracy) - - source_shared = source_endpoints[params['layers_to_regularize']] - target_shared = target_endpoints[params['layers_to_regularize']] - - # When using the semisupervised model we include labeled target data in the - # source classifier. We do not want to include these target domain when - # we use the similarity loss. - indices = tf.range(0, source_shared.get_shape().as_list()[0]) - indices = tf.boolean_mask(indices, domain_selection_mask) - add_similarity_loss(similarity_loss, - tf.gather(source_shared, indices), - tf.gather(target_shared, indices), params) - - if params['use_separation']: - add_autoencoders( - source_images, - source_shared, - target_images, - target_shared, - params=params,) - - -def add_similarity_loss(method_name, - source_samples, - target_samples, - params, - scope=None): - """Adds a loss encouraging the shared encoding from each domain to be similar. - - Args: - method_name: the name of the encoding similarity method to use. Valid - options include `dann_loss', `mmd_loss' or `correlation_loss'. - source_samples: a tensor of shape [num_samples, num_features]. - target_samples: a tensor of shape [num_samples, num_features]. - params: a dictionary of parameters. Expecting 'gamma_weight'. - scope: optional name scope for summary tags. - Raises: - ValueError: if `method_name` is not recognized. - """ - weight = dsn_loss_coefficient(params) * params['gamma_weight'] - method = getattr(losses, method_name) - method(source_samples, target_samples, weight, scope) - - -def add_reconstruction_loss(recon_loss_name, images, recons, weight, domain): - """Adds a reconstruction loss. - - Args: - recon_loss_name: The name of the reconstruction loss. - images: A `Tensor` of size [batch_size, height, width, 3]. - recons: A `Tensor` whose size matches `images`. - weight: A scalar coefficient for the loss. - domain: The name of the domain being reconstructed. - - Raises: - ValueError: If `recon_loss_name` is not recognized. - """ - if recon_loss_name == 'sum_of_pairwise_squares': - loss_fn = tf.contrib.losses.mean_pairwise_squared_error - elif recon_loss_name == 'sum_of_squares': - loss_fn = tf.contrib.losses.mean_squared_error - else: - raise ValueError('recon_loss_name value [%s] not recognized.' % - recon_loss_name) - - loss = loss_fn(recons, images, weight) - assert_op = tf.Assert(tf.is_finite(loss), [loss]) - with tf.control_dependencies([assert_op]): - tf.summary.scalar('losses/%s Recon Loss' % domain, loss) - - -def add_autoencoders(source_data, source_shared, target_data, target_shared, - params): - """Adds the encoders/decoders for our domain separation model w/ incoherence. - - Args: - source_data: images from the source domain, a tensor of size - [batch_size, height, width, channels] - source_shared: a tensor with first dimension batch_size - target_data: images from the target domain, a tensor of size - [batch_size, height, width, channels] - target_shared: a tensor with first dimension batch_size - params: A dictionary of parameters. Expecting 'layers_to_regularize', - 'beta_weight', 'alpha_weight', 'recon_loss_name', 'decoder_name', - 'encoder_name', 'weight_decay' - """ - - def normalize_images(images): - images -= tf.reduce_min(images) - return images / tf.reduce_max(images) - - def concat_operation(shared_repr, private_repr): - return shared_repr + private_repr - - mu = dsn_loss_coefficient(params) - - # The layer to concatenate the networks at. - concat_layer = params['layers_to_regularize'] - - # The coefficient for modulating the private/shared difference loss. - difference_loss_weight = params['beta_weight'] * mu - - # The reconstruction weight. - recon_loss_weight = params['alpha_weight'] * mu - - # The reconstruction loss to use. - recon_loss_name = params['recon_loss_name'] - - # The decoder/encoder to use. - decoder_name = params['decoder_name'] - encoder_name = params['encoder_name'] - - _, height, width, _ = source_data.get_shape().as_list() - code_size = source_shared.get_shape().as_list()[-1] - weight_decay = params['weight_decay'] - - encoder_fn = getattr(models, encoder_name) - # Target Auto-encoding. - with tf.variable_scope('source_encoder'): - source_endpoints = encoder_fn( - source_data, code_size, weight_decay=weight_decay) - - with tf.variable_scope('target_encoder'): - target_endpoints = encoder_fn( - target_data, code_size, weight_decay=weight_decay) - - decoder_fn = getattr(models, decoder_name) - - decoder = partial( - decoder_fn, - height=height, - width=width, - channels=source_data.get_shape().as_list()[-1], - weight_decay=weight_decay) - - # Source Auto-encoding. - source_private = source_endpoints[concat_layer] - target_private = target_endpoints[concat_layer] - with tf.variable_scope('decoder'): - source_recons = decoder(concat_operation(source_shared, source_private)) - - with tf.variable_scope('decoder', reuse=True): - source_private_recons = decoder( - concat_operation(tf.zeros_like(source_private), source_private)) - source_shared_recons = decoder( - concat_operation(source_shared, tf.zeros_like(source_shared))) - - with tf.variable_scope('decoder', reuse=True): - target_recons = decoder(concat_operation(target_shared, target_private)) - target_shared_recons = decoder( - concat_operation(target_shared, tf.zeros_like(target_shared))) - target_private_recons = decoder( - concat_operation(tf.zeros_like(target_private), target_private)) - - losses.difference_loss( - source_private, - source_shared, - weight=difference_loss_weight, - name='Source') - losses.difference_loss( - target_private, - target_shared, - weight=difference_loss_weight, - name='Target') - - add_reconstruction_loss(recon_loss_name, source_data, source_recons, - recon_loss_weight, 'source') - add_reconstruction_loss(recon_loss_name, target_data, target_recons, - recon_loss_weight, 'target') - - # Add summaries - source_reconstructions = tf.concat( - axis=2, - values=map(normalize_images, [ - source_data, source_recons, source_shared_recons, - source_private_recons - ])) - target_reconstructions = tf.concat( - axis=2, - values=map(normalize_images, [ - target_data, target_recons, target_shared_recons, - target_private_recons - ])) - tf.summary.image( - 'Source Images:Recons:RGB', - source_reconstructions[:, :, :, :3], - max_outputs=10) - tf.summary.image( - 'Target Images:Recons:RGB', - target_reconstructions[:, :, :, :3], - max_outputs=10) - - if source_reconstructions.get_shape().as_list()[3] == 4: - tf.summary.image( - 'Source Images:Recons:Depth', - source_reconstructions[:, :, :, 3:4], - max_outputs=10) - tf.summary.image( - 'Target Images:Recons:Depth', - target_reconstructions[:, :, :, 3:4], - max_outputs=10) - - -def add_task_loss(source_images, source_labels, basic_tower, params): - """Adds a classification and/or pose estimation loss to the model. - - Args: - source_images: images from the source domain, a tensor of size - [batch_size, height, width, channels] - source_labels: labels from the source domain, a tensor of size [batch_size]. - or a tuple of (quaternions, class_labels) - basic_tower: a function that creates the single tower of the model. - params: A dictionary of parameters. Expecting 'weight_decay', 'pose_weight'. - Returns: - The source endpoints. - - Raises: - RuntimeError: if basic tower does not support pose estimation. - """ - with tf.variable_scope('towers'): - source_logits, source_endpoints = basic_tower( - source_images, weight_decay=params['weight_decay'], prefix='Source') - - if 'quaternions' in source_labels: # We have pose estimation as well - if 'quaternion_pred' not in source_endpoints: - raise RuntimeError('Please use a model for estimation e.g. pose_mini') - - loss = losses.log_quaternion_loss(source_labels['quaternions'], - source_endpoints['quaternion_pred'], - params) - - assert_op = tf.Assert(tf.is_finite(loss), [loss]) - with tf.control_dependencies([assert_op]): - quaternion_loss = loss - tf.summary.histogram('log_quaternion_loss_hist', quaternion_loss) - slim.losses.add_loss(quaternion_loss * params['pose_weight']) - tf.summary.scalar('losses/quaternion_loss', quaternion_loss) - - classification_loss = tf.losses.softmax_cross_entropy( - source_labels['classes'], source_logits) - - tf.summary.scalar('losses/classification_loss', classification_loss) - return source_endpoints diff --git a/research/domain_adaptation/domain_separation/dsn_eval.py b/research/domain_adaptation/domain_separation/dsn_eval.py deleted file mode 100644 index b6cccdfcc17e8f18e8381530b5c8f41501bda29b..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/dsn_eval.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# pylint: disable=line-too-long -"""Evaluation for Domain Separation Networks (DSNs).""" -# pylint: enable=line-too-long -import math - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from domain_adaptation.datasets import dataset_factory -from domain_adaptation.domain_separation import losses -from domain_adaptation.domain_separation import models - -slim = tf.contrib.slim - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_integer('batch_size', 32, - 'The number of images in each batch.') - -tf.app.flags.DEFINE_string('master', '', - 'BNS name of the TensorFlow master to use.') - -tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/da/', - 'Directory where the model was written to.') - -tf.app.flags.DEFINE_string( - 'eval_dir', '/tmp/da/', - 'Directory where we should write the tf summaries to.') - -tf.app.flags.DEFINE_string('dataset_dir', None, - 'The directory where the dataset files are stored.') - -tf.app.flags.DEFINE_string('dataset', 'mnist_m', - 'Which dataset to test on: "mnist", "mnist_m".') - -tf.app.flags.DEFINE_string('split', 'valid', - 'Which portion to test on: "valid", "test".') - -tf.app.flags.DEFINE_integer('num_examples', 1000, 'Number of test examples.') - -tf.app.flags.DEFINE_string('basic_tower', 'dann_mnist', - 'The basic tower building block.') - -tf.app.flags.DEFINE_bool('enable_precision_recall', False, - 'If True, precision and recall for each class will ' - 'be added to the metrics.') - -tf.app.flags.DEFINE_bool('use_logging', False, 'Debugging messages.') - - -def quaternion_metric(predictions, labels): - params = {'batch_size': FLAGS.batch_size, 'use_logging': False} - logcost = losses.log_quaternion_loss_batch(predictions, labels, params) - return slim.metrics.streaming_mean(logcost) - - -def angle_diff(true_q, pred_q): - angles = 2 * ( - 180.0 / - np.pi) * np.arccos(np.abs(np.sum(np.multiply(pred_q, true_q), axis=1))) - return angles - - -def provide_batch_fn(): - """ The provide_batch function to use. """ - return dataset_factory.provide_batch - - -def main(_): - g = tf.Graph() - with g.as_default(): - # Load the data. - images, labels = provide_batch_fn()( - FLAGS.dataset, FLAGS.split, FLAGS.dataset_dir, 4, FLAGS.batch_size, 4) - - num_classes = labels['classes'].get_shape().as_list()[1] - - tf.summary.image('eval_images', images, max_outputs=3) - - # Define the model: - with tf.variable_scope('towers'): - basic_tower = getattr(models, FLAGS.basic_tower) - predictions, endpoints = basic_tower( - images, - num_classes=num_classes, - is_training=False, - batch_norm_params=None) - metric_names_to_values = {} - - # Define the metrics: - if 'quaternions' in labels: # Also have to evaluate pose estimation! - quaternion_loss = quaternion_metric(labels['quaternions'], - endpoints['quaternion_pred']) - - angle_errors, = tf.py_func( - angle_diff, [labels['quaternions'], endpoints['quaternion_pred']], - [tf.float32]) - - metric_names_to_values[ - 'Angular mean error'] = slim.metrics.streaming_mean(angle_errors) - metric_names_to_values['Quaternion Loss'] = quaternion_loss - - accuracy = tf.contrib.metrics.streaming_accuracy( - tf.argmax(predictions, 1), tf.argmax(labels['classes'], 1)) - - predictions = tf.argmax(predictions, 1) - labels = tf.argmax(labels['classes'], 1) - metric_names_to_values['Accuracy'] = accuracy - - if FLAGS.enable_precision_recall: - for i in xrange(num_classes): - index_map = tf.one_hot(i, depth=num_classes) - name = 'PR/Precision_{}'.format(i) - metric_names_to_values[name] = slim.metrics.streaming_precision( - tf.gather(index_map, predictions), tf.gather(index_map, labels)) - name = 'PR/Recall_{}'.format(i) - metric_names_to_values[name] = slim.metrics.streaming_recall( - tf.gather(index_map, predictions), tf.gather(index_map, labels)) - - names_to_values, names_to_updates = slim.metrics.aggregate_metric_map( - metric_names_to_values) - - # Create the summary ops such that they also print out to std output: - summary_ops = [] - for metric_name, metric_value in names_to_values.iteritems(): - op = tf.summary.scalar(metric_name, metric_value) - op = tf.Print(op, [metric_value], metric_name) - summary_ops.append(op) - - # This ensures that we make a single pass over all of the data. - num_batches = math.ceil(FLAGS.num_examples / float(FLAGS.batch_size)) - - # Setup the global step. - slim.get_or_create_global_step() - slim.evaluation.evaluation_loop( - FLAGS.master, - checkpoint_dir=FLAGS.checkpoint_dir, - logdir=FLAGS.eval_dir, - num_evals=num_batches, - eval_op=names_to_updates.values(), - summary_op=tf.summary.merge(summary_ops)) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/domain_adaptation/domain_separation/dsn_test.py b/research/domain_adaptation/domain_separation/dsn_test.py deleted file mode 100644 index 3d687398a9b9356455f739417bc96ddb2ca5ad40..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/dsn_test.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for DSN model assembly functions.""" - -import numpy as np -import tensorflow as tf - -import dsn - - -class HelperFunctionsTest(tf.test.TestCase): - - def testBasicDomainSeparationStartPoint(self): - with self.test_session() as sess: - # Test for when global_step < domain_separation_startpoint - step = tf.contrib.slim.get_or_create_global_step() - sess.run(tf.global_variables_initializer()) # global_step = 0 - params = {'domain_separation_startpoint': 2} - weight = dsn.dsn_loss_coefficient(params) - weight_np = sess.run(weight) - self.assertAlmostEqual(weight_np, 1e-10) - - step_op = tf.assign_add(step, 1) - step_np = sess.run(step_op) # global_step = 1 - weight = dsn.dsn_loss_coefficient(params) - weight_np = sess.run(weight) - self.assertAlmostEqual(weight_np, 1e-10) - - # Test for when global_step >= domain_separation_startpoint - step_np = sess.run(step_op) # global_step = 2 - tf.logging.info(step_np) - weight = dsn.dsn_loss_coefficient(params) - weight_np = sess.run(weight) - self.assertAlmostEqual(weight_np, 1.0) - - -class DsnModelAssemblyTest(tf.test.TestCase): - - def _testBuildDefaultModel(self): - images = tf.to_float(np.random.rand(32, 28, 28, 1)) - labels = {} - labels['classes'] = tf.one_hot( - tf.to_int32(np.random.randint(0, 9, (32))), 10) - - params = { - 'use_separation': True, - 'layers_to_regularize': 'fc3', - 'weight_decay': 0.0, - 'ps_tasks': 1, - 'domain_separation_startpoint': 1, - 'alpha_weight': 1, - 'beta_weight': 1, - 'gamma_weight': 1, - 'recon_loss_name': 'sum_of_squares', - 'decoder_name': 'small_decoder', - 'encoder_name': 'default_encoder', - } - return images, labels, params - - def testBuildModelDann(self): - images, labels, params = self._testBuildDefaultModel() - - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, - 'dann_loss', params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 6) - - def testBuildModelDannSumOfPairwiseSquares(self): - images, labels, params = self._testBuildDefaultModel() - - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, - 'dann_loss', params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 6) - - def testBuildModelDannMultiPSTasks(self): - images, labels, params = self._testBuildDefaultModel() - params['ps_tasks'] = 10 - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, - 'dann_loss', params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 6) - - def testBuildModelMmd(self): - images, labels, params = self._testBuildDefaultModel() - - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, - 'mmd_loss', params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 6) - - def testBuildModelCorr(self): - images, labels, params = self._testBuildDefaultModel() - - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, - 'correlation_loss', params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 6) - - def testBuildModelNoDomainAdaptation(self): - images, labels, params = self._testBuildDefaultModel() - params['use_separation'] = False - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, 'none', - params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 1) - self.assertEqual(len(tf.contrib.losses.get_regularization_losses()), 0) - - def testBuildModelNoAdaptationWeightDecay(self): - images, labels, params = self._testBuildDefaultModel() - params['use_separation'] = False - params['weight_decay'] = 1e-5 - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, 'none', - params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 1) - self.assertTrue(len(tf.contrib.losses.get_regularization_losses()) >= 1) - - def testBuildModelNoSeparation(self): - images, labels, params = self._testBuildDefaultModel() - params['use_separation'] = False - with self.test_session(): - dsn.create_model(images, labels, - tf.cast(tf.ones([32,]), tf.bool), images, labels, - 'dann_loss', params, 'dann_mnist') - loss_tensors = tf.contrib.losses.get_losses() - self.assertEqual(len(loss_tensors), 2) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/domain_adaptation/domain_separation/dsn_train.py b/research/domain_adaptation/domain_separation/dsn_train.py deleted file mode 100644 index 5e364ad3037b041125a3523370b3b040478f0d8e..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/dsn_train.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Training for Domain Separation Networks (DSNs).""" -from __future__ import division - -import tensorflow as tf - -from domain_adaptation.datasets import dataset_factory -import dsn - -slim = tf.contrib.slim -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_integer('batch_size', 32, - 'The number of images in each batch.') - -tf.app.flags.DEFINE_string('source_dataset', 'pose_synthetic', - 'Source dataset to train on.') - -tf.app.flags.DEFINE_string('target_dataset', 'pose_real', - 'Target dataset to train on.') - -tf.app.flags.DEFINE_string('target_labeled_dataset', 'none', - 'Target dataset to train on.') - -tf.app.flags.DEFINE_string('dataset_dir', None, - 'The directory where the dataset files are stored.') - -tf.app.flags.DEFINE_string('master', '', - 'BNS name of the TensorFlow master to use.') - -tf.app.flags.DEFINE_string('train_log_dir', '/tmp/da/', - 'Directory where to write event logs.') - -tf.app.flags.DEFINE_string( - 'layers_to_regularize', 'fc3', - 'Comma-separated list of layer names to use MMD regularization on.') - -tf.app.flags.DEFINE_float('learning_rate', .01, 'The learning rate') - -tf.app.flags.DEFINE_float('alpha_weight', 1e-6, - 'The coefficient for scaling the reconstruction ' - 'loss.') - -tf.app.flags.DEFINE_float( - 'beta_weight', 1e-6, - 'The coefficient for scaling the private/shared difference loss.') - -tf.app.flags.DEFINE_float( - 'gamma_weight', 1e-6, - 'The coefficient for scaling the shared encoding similarity loss.') - -tf.app.flags.DEFINE_float('pose_weight', 0.125, - 'The coefficient for scaling the pose loss.') - -tf.app.flags.DEFINE_float( - 'weight_decay', 1e-6, - 'The coefficient for the L2 regularization applied for all weights.') - -tf.app.flags.DEFINE_integer( - 'save_summaries_secs', 60, - 'The frequency with which summaries are saved, in seconds.') - -tf.app.flags.DEFINE_integer( - 'save_interval_secs', 60, - 'The frequency with which the model is saved, in seconds.') - -tf.app.flags.DEFINE_integer( - 'max_number_of_steps', None, - 'The maximum number of gradient steps. Use None to train indefinitely.') - -tf.app.flags.DEFINE_integer( - 'domain_separation_startpoint', 1, - 'The global step to add the domain separation losses.') - -tf.app.flags.DEFINE_integer( - 'bipartite_assignment_top_k', 3, - 'The number of top-k matches to use in bipartite matching adaptation.') - -tf.app.flags.DEFINE_float('decay_rate', 0.95, 'Learning rate decay factor.') - -tf.app.flags.DEFINE_integer('decay_steps', 20000, 'Learning rate decay steps.') - -tf.app.flags.DEFINE_float('momentum', 0.9, 'The momentum value.') - -tf.app.flags.DEFINE_bool('use_separation', False, - 'Use our domain separation model.') - -tf.app.flags.DEFINE_bool('use_logging', False, 'Debugging messages.') - -tf.app.flags.DEFINE_integer( - 'ps_tasks', 0, - 'The number of parameter servers. If the value is 0, then the parameters ' - 'are handled locally by the worker.') - -tf.app.flags.DEFINE_integer( - 'num_readers', 4, - 'The number of parallel readers that read data from the dataset.') - -tf.app.flags.DEFINE_integer('num_preprocessing_threads', 4, - 'The number of threads used to create the batches.') - -tf.app.flags.DEFINE_integer( - 'task', 0, - 'The Task ID. This value is used when training with multiple workers to ' - 'identify each worker.') - -tf.app.flags.DEFINE_string('decoder_name', 'small_decoder', - 'The decoder to use.') -tf.app.flags.DEFINE_string('encoder_name', 'default_encoder', - 'The encoder to use.') - -################################################################################ -# Flags that control the architecture and losses -################################################################################ -tf.app.flags.DEFINE_string( - 'similarity_loss', 'grl', - 'The method to use for encouraging the common encoder codes to be ' - 'similar, one of "grl", "mmd", "corr".') - -tf.app.flags.DEFINE_string('recon_loss_name', 'sum_of_pairwise_squares', - 'The name of the reconstruction loss.') - -tf.app.flags.DEFINE_string('basic_tower', 'pose_mini', - 'The basic tower building block.') - -def provide_batch_fn(): - """ The provide_batch function to use. """ - return dataset_factory.provide_batch - -def main(_): - model_params = { - 'use_separation': FLAGS.use_separation, - 'domain_separation_startpoint': FLAGS.domain_separation_startpoint, - 'layers_to_regularize': FLAGS.layers_to_regularize, - 'alpha_weight': FLAGS.alpha_weight, - 'beta_weight': FLAGS.beta_weight, - 'gamma_weight': FLAGS.gamma_weight, - 'pose_weight': FLAGS.pose_weight, - 'recon_loss_name': FLAGS.recon_loss_name, - 'decoder_name': FLAGS.decoder_name, - 'encoder_name': FLAGS.encoder_name, - 'weight_decay': FLAGS.weight_decay, - 'batch_size': FLAGS.batch_size, - 'use_logging': FLAGS.use_logging, - 'ps_tasks': FLAGS.ps_tasks, - 'task': FLAGS.task, - } - g = tf.Graph() - with g.as_default(): - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): - # Load the data. - source_images, source_labels = provide_batch_fn()( - FLAGS.source_dataset, 'train', FLAGS.dataset_dir, FLAGS.num_readers, - FLAGS.batch_size, FLAGS.num_preprocessing_threads) - target_images, target_labels = provide_batch_fn()( - FLAGS.target_dataset, 'train', FLAGS.dataset_dir, FLAGS.num_readers, - FLAGS.batch_size, FLAGS.num_preprocessing_threads) - - # In the unsupervised case all the samples in the labeled - # domain are from the source domain. - domain_selection_mask = tf.fill((source_images.get_shape().as_list()[0],), - True) - - # When using the semisupervised model we include labeled target data in - # the source labelled data. - if FLAGS.target_labeled_dataset != 'none': - # 1000 is the maximum number of labelled target samples that exists in - # the datasets. - target_semi_images, target_semi_labels = provide_batch_fn()( - FLAGS.target_labeled_dataset, 'train', FLAGS.batch_size) - - # Calculate the proportion of source domain samples in the semi- - # supervised setting, so that the proportion is set accordingly in the - # batches. - proportion = float(source_labels['num_train_samples']) / ( - source_labels['num_train_samples'] + - target_semi_labels['num_train_samples']) - - rnd_tensor = tf.random_uniform( - (target_semi_images.get_shape().as_list()[0],)) - - domain_selection_mask = rnd_tensor < proportion - source_images = tf.where(domain_selection_mask, source_images, - target_semi_images) - source_class_labels = tf.where(domain_selection_mask, - source_labels['classes'], - target_semi_labels['classes']) - - if 'quaternions' in source_labels: - source_pose_labels = tf.where(domain_selection_mask, - source_labels['quaternions'], - target_semi_labels['quaternions']) - (source_images, source_class_labels, source_pose_labels, - domain_selection_mask) = tf.train.shuffle_batch( - [ - source_images, source_class_labels, source_pose_labels, - domain_selection_mask - ], - FLAGS.batch_size, - 50000, - 5000, - num_threads=1, - enqueue_many=True) - - else: - (source_images, source_class_labels, - domain_selection_mask) = tf.train.shuffle_batch( - [source_images, source_class_labels, domain_selection_mask], - FLAGS.batch_size, - 50000, - 5000, - num_threads=1, - enqueue_many=True) - source_labels = {} - source_labels['classes'] = source_class_labels - if 'quaternions' in source_labels: - source_labels['quaternions'] = source_pose_labels - - slim.get_or_create_global_step() - tf.summary.image('source_images', source_images, max_outputs=3) - tf.summary.image('target_images', target_images, max_outputs=3) - - dsn.create_model( - source_images, - source_labels, - domain_selection_mask, - target_images, - target_labels, - FLAGS.similarity_loss, - model_params, - basic_tower_name=FLAGS.basic_tower) - - # Configure the optimization scheme: - learning_rate = tf.train.exponential_decay( - FLAGS.learning_rate, - slim.get_or_create_global_step(), - FLAGS.decay_steps, - FLAGS.decay_rate, - staircase=True, - name='learning_rate') - - tf.summary.scalar('learning_rate', learning_rate) - tf.summary.scalar('total_loss', tf.losses.get_total_loss()) - - opt = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) - tf.logging.set_verbosity(tf.logging.INFO) - # Run training. - loss_tensor = slim.learning.create_train_op( - slim.losses.get_total_loss(), - opt, - summarize_gradients=True, - colocate_gradients_with_ops=True) - slim.learning.train( - train_op=loss_tensor, - logdir=FLAGS.train_log_dir, - master=FLAGS.master, - is_chief=FLAGS.task == 0, - number_of_steps=FLAGS.max_number_of_steps, - save_summaries_secs=FLAGS.save_summaries_secs, - save_interval_secs=FLAGS.save_interval_secs) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/domain_adaptation/domain_separation/grl_op_grads.py b/research/domain_adaptation/domain_separation/grl_op_grads.py deleted file mode 100644 index fcd85ba2b5e7912bffe646a73558af8184812ea6..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/grl_op_grads.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Gradients for operators defined in grl_ops.py.""" -import tensorflow as tf - - -@tf.RegisterGradient("GradientReversal") -def _GradientReversalGrad(_, grad): - """The gradients for `gradient_reversal`. - - Args: - _: The `gradient_reversal` `Operation` that we are differentiating, - which we can use to find the inputs and outputs of the original op. - grad: Gradient with respect to the output of the `gradient_reversal` op. - - Returns: - Gradient with respect to the input of `gradient_reversal`, which is simply - the negative of the input gradient. - - """ - return tf.negative(grad) diff --git a/research/domain_adaptation/domain_separation/grl_op_kernels.cc b/research/domain_adaptation/domain_separation/grl_op_kernels.cc deleted file mode 100644 index ba30128f11e9e88c702d3a80593d930519f346fe..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/grl_op_kernels.cc +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This file contains the implementations of the ops registered in -// grl_ops.cc. - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/types.pb.h" - -namespace tensorflow { - -// The gradient reversal op is used in domain adversarial training. It behaves -// as the identity op during forward propagation, and multiplies its input by -1 -// during backward propagation. -class GradientReversalOp : public OpKernel { - public: - explicit GradientReversalOp(OpKernelConstruction* context) - : OpKernel(context) {} - - // Gradient reversal op behaves as the identity op during forward - // propagation. Compute() function copied from the IdentityOp::Compute() - // function here: third_party/tensorflow/core/kernels/identity_op.h. - void Compute(OpKernelContext* context) override { - if (IsRefType(context->input_dtype(0))) { - context->forward_ref_input_to_ref_output(0, 0); - } else { - context->set_output(0, context->input(0)); - } - } -}; - -REGISTER_KERNEL_BUILDER(Name("GradientReversal").Device(DEVICE_CPU), - GradientReversalOp); - -} // namespace tensorflow diff --git a/research/domain_adaptation/domain_separation/grl_op_shapes.py b/research/domain_adaptation/domain_separation/grl_op_shapes.py deleted file mode 100644 index 52773c680af265beca9125e48bf68152b8a34e56..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/grl_op_shapes.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Shape inference for operators defined in grl_ops.cc.""" diff --git a/research/domain_adaptation/domain_separation/grl_ops.cc b/research/domain_adaptation/domain_separation/grl_ops.cc deleted file mode 100644 index d441c2b484215605db65a043be6cfa0ab90da2c3..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/grl_ops.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Contains custom ops. - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" - -namespace tensorflow { - -// This custom op is used by adversarial training. -REGISTER_OP("GradientReversal") - .Input("input: float") - .Output("output: float") - .SetShapeFn(shape_inference::UnchangedShape) - .Doc(R"doc( -This op copies the input to the output during forward propagation, and -negates the input during backward propagation. - -input: Tensor. -output: Tensor, copied from input. -)doc"); - -} // namespace tensorflow diff --git a/research/domain_adaptation/domain_separation/grl_ops.py b/research/domain_adaptation/domain_separation/grl_ops.py deleted file mode 100644 index 50447247b10caf3e41f3c0fb1c6f943dd3d9de6e..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/grl_ops.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""GradientReversal op Python library.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path - -import tensorflow as tf - -tf.logging.info(tf.resource_loader.get_data_files_path()) -_grl_ops_module = tf.load_op_library( - os.path.join(tf.resource_loader.get_data_files_path(), - '_grl_ops.so')) -gradient_reversal = _grl_ops_module.gradient_reversal diff --git a/research/domain_adaptation/domain_separation/grl_ops_test.py b/research/domain_adaptation/domain_separation/grl_ops_test.py deleted file mode 100644 index b431a6c02b60ade92a653d2ee8108c0586c70fbb..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/grl_ops_test.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for grl_ops.""" - -#from models.domain_adaptation.domain_separation import grl_op_grads # pylint: disable=unused-import -#from models.domain_adaptation.domain_separation import grl_op_shapes # pylint: disable=unused-import -import tensorflow as tf - -import grl_op_grads -import grl_ops - -FLAGS = tf.app.flags.FLAGS - - -class GRLOpsTest(tf.test.TestCase): - - def testGradientReversalOp(self): - with tf.Graph().as_default(): - with self.test_session(): - # Test that in forward prop, gradient reversal op acts as the - # identity operation. - examples = tf.constant([5.0, 4.0, 3.0, 2.0, 1.0]) - output = grl_ops.gradient_reversal(examples) - expected_output = examples - self.assertAllEqual(output.eval(), expected_output.eval()) - - # Test that shape inference works as expected. - self.assertAllEqual(output.get_shape(), expected_output.get_shape()) - - # Test that in backward prop, gradient reversal op multiplies - # gradients by -1. - examples = tf.constant([[1.0]]) - w = tf.get_variable(name='w', shape=[1, 1]) - b = tf.get_variable(name='b', shape=[1]) - init_op = tf.global_variables_initializer() - init_op.run() - features = tf.nn.xw_plus_b(examples, w, b) - # Construct two outputs: features layer passes directly to output1, but - # features layer passes through a gradient reversal layer before - # reaching output2. - output1 = features - output2 = grl_ops.gradient_reversal(features) - gold = tf.constant([1.0]) - loss1 = gold - output1 - loss2 = gold - output2 - opt = tf.train.GradientDescentOptimizer(learning_rate=0.01) - grads_and_vars_1 = opt.compute_gradients(loss1, - tf.trainable_variables()) - grads_and_vars_2 = opt.compute_gradients(loss2, - tf.trainable_variables()) - self.assertAllEqual(len(grads_and_vars_1), len(grads_and_vars_2)) - for i in range(len(grads_and_vars_1)): - g1 = grads_and_vars_1[i][0] - g2 = grads_and_vars_2[i][0] - # Verify that gradients of loss1 are the negative of gradients of - # loss2. - self.assertAllEqual(tf.negative(g1).eval(), g2.eval()) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/domain_adaptation/domain_separation/losses.py b/research/domain_adaptation/domain_separation/losses.py deleted file mode 100644 index 0d882340de10e4dd64d44f9357e8bfc5b1dd4712..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/losses.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Domain Adaptation Loss Functions. - -The following domain adaptation loss functions are defined: - -- Maximum Mean Discrepancy (MMD). - Relevant paper: - Gretton, Arthur, et al., - "A kernel two-sample test." - The Journal of Machine Learning Research, 2012 - -- Correlation Loss on a batch. -""" -from functools import partial -import tensorflow as tf - -import grl_op_grads # pylint: disable=unused-import -import grl_op_shapes # pylint: disable=unused-import -import grl_ops -import utils -slim = tf.contrib.slim - - -################################################################################ -# SIMILARITY LOSS -################################################################################ -def maximum_mean_discrepancy(x, y, kernel=utils.gaussian_kernel_matrix): - r"""Computes the Maximum Mean Discrepancy (MMD) of two samples: x and y. - - Maximum Mean Discrepancy (MMD) is a distance-measure between the samples of - the distributions of x and y. Here we use the kernel two sample estimate - using the empirical mean of the two distributions. - - MMD^2(P, Q) = || \E{\phi(x)} - \E{\phi(y)} ||^2 - = \E{ K(x, x) } + \E{ K(y, y) } - 2 \E{ K(x, y) }, - - where K = <\phi(x), \phi(y)>, - is the desired kernel function, in this case a radial basis kernel. - - Args: - x: a tensor of shape [num_samples, num_features] - y: a tensor of shape [num_samples, num_features] - kernel: a function which computes the kernel in MMD. Defaults to the - GaussianKernelMatrix. - - Returns: - a scalar denoting the squared maximum mean discrepancy loss. - """ - with tf.name_scope('MaximumMeanDiscrepancy'): - # \E{ K(x, x) } + \E{ K(y, y) } - 2 \E{ K(x, y) } - cost = tf.reduce_mean(kernel(x, x)) - cost += tf.reduce_mean(kernel(y, y)) - cost -= 2 * tf.reduce_mean(kernel(x, y)) - - # We do not allow the loss to become negative. - cost = tf.where(cost > 0, cost, 0, name='value') - return cost - - -def mmd_loss(source_samples, target_samples, weight, scope=None): - """Adds a similarity loss term, the MMD between two representations. - - This Maximum Mean Discrepancy (MMD) loss is calculated with a number of - different Gaussian kernels. - - Args: - source_samples: a tensor of shape [num_samples, num_features]. - target_samples: a tensor of shape [num_samples, num_features]. - weight: the weight of the MMD loss. - scope: optional name scope for summary tags. - - Returns: - a scalar tensor representing the MMD loss value. - """ - sigmas = [ - 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 20, 25, 30, 35, 100, - 1e3, 1e4, 1e5, 1e6 - ] - gaussian_kernel = partial( - utils.gaussian_kernel_matrix, sigmas=tf.constant(sigmas)) - - loss_value = maximum_mean_discrepancy( - source_samples, target_samples, kernel=gaussian_kernel) - loss_value = tf.maximum(1e-4, loss_value) * weight - assert_op = tf.Assert(tf.is_finite(loss_value), [loss_value]) - with tf.control_dependencies([assert_op]): - tag = 'MMD Loss' - if scope: - tag = scope + tag - tf.summary.scalar(tag, loss_value) - tf.losses.add_loss(loss_value) - - return loss_value - - -def correlation_loss(source_samples, target_samples, weight, scope=None): - """Adds a similarity loss term, the correlation between two representations. - - Args: - source_samples: a tensor of shape [num_samples, num_features] - target_samples: a tensor of shape [num_samples, num_features] - weight: a scalar weight for the loss. - scope: optional name scope for summary tags. - - Returns: - a scalar tensor representing the correlation loss value. - """ - with tf.name_scope('corr_loss'): - source_samples -= tf.reduce_mean(source_samples, 0) - target_samples -= tf.reduce_mean(target_samples, 0) - - source_samples = tf.nn.l2_normalize(source_samples, 1) - target_samples = tf.nn.l2_normalize(target_samples, 1) - - source_cov = tf.matmul(tf.transpose(source_samples), source_samples) - target_cov = tf.matmul(tf.transpose(target_samples), target_samples) - - corr_loss = tf.reduce_mean(tf.square(source_cov - target_cov)) * weight - - assert_op = tf.Assert(tf.is_finite(corr_loss), [corr_loss]) - with tf.control_dependencies([assert_op]): - tag = 'Correlation Loss' - if scope: - tag = scope + tag - tf.summary.scalar(tag, corr_loss) - tf.losses.add_loss(corr_loss) - - return corr_loss - - -def dann_loss(source_samples, target_samples, weight, scope=None): - """Adds the domain adversarial (DANN) loss. - - Args: - source_samples: a tensor of shape [num_samples, num_features]. - target_samples: a tensor of shape [num_samples, num_features]. - weight: the weight of the loss. - scope: optional name scope for summary tags. - - Returns: - a scalar tensor representing the correlation loss value. - """ - with tf.variable_scope('dann'): - batch_size = tf.shape(source_samples)[0] - samples = tf.concat(axis=0, values=[source_samples, target_samples]) - samples = slim.flatten(samples) - - domain_selection_mask = tf.concat( - axis=0, values=[tf.zeros((batch_size, 1)), tf.ones((batch_size, 1))]) - - # Perform the gradient reversal and be careful with the shape. - grl = grl_ops.gradient_reversal(samples) - grl = tf.reshape(grl, (-1, samples.get_shape().as_list()[1])) - - grl = slim.fully_connected(grl, 100, scope='fc1') - logits = slim.fully_connected(grl, 1, activation_fn=None, scope='fc2') - - domain_predictions = tf.sigmoid(logits) - - domain_loss = tf.losses.log_loss( - domain_selection_mask, domain_predictions, weights=weight) - - domain_accuracy = utils.accuracy( - tf.round(domain_predictions), domain_selection_mask) - - assert_op = tf.Assert(tf.is_finite(domain_loss), [domain_loss]) - with tf.control_dependencies([assert_op]): - tag_loss = 'losses/domain_loss' - tag_accuracy = 'losses/domain_accuracy' - if scope: - tag_loss = scope + tag_loss - tag_accuracy = scope + tag_accuracy - - tf.summary.scalar(tag_loss, domain_loss) - tf.summary.scalar(tag_accuracy, domain_accuracy) - - return domain_loss - - -################################################################################ -# DIFFERENCE LOSS -################################################################################ -def difference_loss(private_samples, shared_samples, weight=1.0, name=''): - """Adds the difference loss between the private and shared representations. - - Args: - private_samples: a tensor of shape [num_samples, num_features]. - shared_samples: a tensor of shape [num_samples, num_features]. - weight: the weight of the incoherence loss. - name: the name of the tf summary. - """ - private_samples -= tf.reduce_mean(private_samples, 0) - shared_samples -= tf.reduce_mean(shared_samples, 0) - - private_samples = tf.nn.l2_normalize(private_samples, 1) - shared_samples = tf.nn.l2_normalize(shared_samples, 1) - - correlation_matrix = tf.matmul( - private_samples, shared_samples, transpose_a=True) - - cost = tf.reduce_mean(tf.square(correlation_matrix)) * weight - cost = tf.where(cost > 0, cost, 0, name='value') - - tf.summary.scalar('losses/Difference Loss {}'.format(name), - cost) - assert_op = tf.Assert(tf.is_finite(cost), [cost]) - with tf.control_dependencies([assert_op]): - tf.losses.add_loss(cost) - - -################################################################################ -# TASK LOSS -################################################################################ -def log_quaternion_loss_batch(predictions, labels, params): - """A helper function to compute the error between quaternions. - - Args: - predictions: A Tensor of size [batch_size, 4]. - labels: A Tensor of size [batch_size, 4]. - params: A dictionary of parameters. Expecting 'use_logging', 'batch_size'. - - Returns: - A Tensor of size [batch_size], denoting the error between the quaternions. - """ - use_logging = params['use_logging'] - assertions = [] - if use_logging: - assertions.append( - tf.Assert( - tf.reduce_all( - tf.less( - tf.abs(tf.reduce_sum(tf.square(predictions), [1]) - 1), - 1e-4)), - ['The l2 norm of each prediction quaternion vector should be 1.'])) - assertions.append( - tf.Assert( - tf.reduce_all( - tf.less( - tf.abs(tf.reduce_sum(tf.square(labels), [1]) - 1), 1e-4)), - ['The l2 norm of each label quaternion vector should be 1.'])) - - with tf.control_dependencies(assertions): - product = tf.multiply(predictions, labels) - internal_dot_products = tf.reduce_sum(product, [1]) - - if use_logging: - internal_dot_products = tf.Print( - internal_dot_products, - [internal_dot_products, tf.shape(internal_dot_products)], - 'internal_dot_products:') - - logcost = tf.log(1e-4 + 1 - tf.abs(internal_dot_products)) - return logcost - - -def log_quaternion_loss(predictions, labels, params): - """A helper function to compute the mean error between batches of quaternions. - - The caller is expected to add the loss to the graph. - - Args: - predictions: A Tensor of size [batch_size, 4]. - labels: A Tensor of size [batch_size, 4]. - params: A dictionary of parameters. Expecting 'use_logging', 'batch_size'. - - Returns: - A Tensor of size 1, denoting the mean error between batches of quaternions. - """ - use_logging = params['use_logging'] - logcost = log_quaternion_loss_batch(predictions, labels, params) - logcost = tf.reduce_sum(logcost, [0]) - batch_size = params['batch_size'] - logcost = tf.multiply(logcost, 1.0 / batch_size, name='log_quaternion_loss') - if use_logging: - logcost = tf.Print( - logcost, [logcost], '[logcost]', name='log_quaternion_loss_print') - return logcost diff --git a/research/domain_adaptation/domain_separation/losses_test.py b/research/domain_adaptation/domain_separation/losses_test.py deleted file mode 100644 index 46e50301be56f5977adcb3fb00587f076934b785..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/losses_test.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for DSN losses.""" -from functools import partial - -import numpy as np -import tensorflow as tf - -import losses -import utils - - -def MaximumMeanDiscrepancySlow(x, y, sigmas): - num_samples = x.get_shape().as_list()[0] - - def AverageGaussianKernel(x, y, sigmas): - result = 0 - for sigma in sigmas: - dist = tf.reduce_sum(tf.square(x - y)) - result += tf.exp((-1.0 / (2.0 * sigma)) * dist) - return result / num_samples**2 - - total = 0 - - for i in range(num_samples): - for j in range(num_samples): - total += AverageGaussianKernel(x[i, :], x[j, :], sigmas) - total += AverageGaussianKernel(y[i, :], y[j, :], sigmas) - total += -2 * AverageGaussianKernel(x[i, :], y[j, :], sigmas) - - return total - - -class LogQuaternionLossTest(tf.test.TestCase): - - def test_log_quaternion_loss_batch(self): - with self.test_session(): - predictions = tf.random_uniform((10, 4), seed=1) - predictions = tf.nn.l2_normalize(predictions, 1) - labels = tf.random_uniform((10, 4), seed=1) - labels = tf.nn.l2_normalize(labels, 1) - params = {'batch_size': 10, 'use_logging': False} - x = losses.log_quaternion_loss_batch(predictions, labels, params) - self.assertTrue(((10,) == tf.shape(x).eval()).all()) - - -class MaximumMeanDiscrepancyTest(tf.test.TestCase): - - def test_mmd_name(self): - with self.test_session(): - x = tf.random_uniform((2, 3), seed=1) - kernel = partial(utils.gaussian_kernel_matrix, sigmas=tf.constant([1.])) - loss = losses.maximum_mean_discrepancy(x, x, kernel) - - self.assertEquals(loss.op.name, 'MaximumMeanDiscrepancy/value') - - def test_mmd_is_zero_when_inputs_are_same(self): - with self.test_session(): - x = tf.random_uniform((2, 3), seed=1) - kernel = partial(utils.gaussian_kernel_matrix, sigmas=tf.constant([1.])) - self.assertEquals(0, losses.maximum_mean_discrepancy(x, x, kernel).eval()) - - def test_fast_mmd_is_similar_to_slow_mmd(self): - with self.test_session(): - x = tf.constant(np.random.normal(size=(2, 3)), tf.float32) - y = tf.constant(np.random.rand(2, 3), tf.float32) - - cost_old = MaximumMeanDiscrepancySlow(x, y, [1.]).eval() - kernel = partial(utils.gaussian_kernel_matrix, sigmas=tf.constant([1.])) - cost_new = losses.maximum_mean_discrepancy(x, y, kernel).eval() - - self.assertAlmostEqual(cost_old, cost_new, delta=1e-5) - - def test_multiple_sigmas(self): - with self.test_session(): - x = tf.constant(np.random.normal(size=(2, 3)), tf.float32) - y = tf.constant(np.random.rand(2, 3), tf.float32) - - sigmas = tf.constant([2., 5., 10, 20, 30]) - kernel = partial(utils.gaussian_kernel_matrix, sigmas=sigmas) - cost_old = MaximumMeanDiscrepancySlow(x, y, [2., 5., 10, 20, 30]).eval() - cost_new = losses.maximum_mean_discrepancy(x, y, kernel=kernel).eval() - - self.assertAlmostEqual(cost_old, cost_new, delta=1e-5) - - def test_mmd_is_zero_when_distributions_are_same(self): - - with self.test_session(): - x = tf.random_uniform((1000, 10), seed=1) - y = tf.random_uniform((1000, 10), seed=3) - - kernel = partial(utils.gaussian_kernel_matrix, sigmas=tf.constant([100.])) - loss = losses.maximum_mean_discrepancy(x, y, kernel=kernel).eval() - - self.assertAlmostEqual(0, loss, delta=1e-4) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/domain_adaptation/domain_separation/models.py b/research/domain_adaptation/domain_separation/models.py deleted file mode 100644 index 04ccaf82eb9b31a6ea78871204c7df70eca3fbfd..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/models.py +++ /dev/null @@ -1,443 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains different architectures for the different DSN parts. - -We define here the modules that can be used in the different parts of the DSN -model. -- shared encoder (dsn_cropped_linemod, dann_xxxx) -- private encoder (default_encoder) -- decoder (large_decoder, gtsrb_decoder, small_decoder) -""" -import tensorflow as tf - -#from models.domain_adaptation.domain_separation -import utils - -slim = tf.contrib.slim - - -def default_batch_norm_params(is_training=False): - """Returns default batch normalization parameters for DSNs. - - Args: - is_training: whether or not the model is training. - - Returns: - a dictionary that maps batch norm parameter names (strings) to values. - """ - return { - # Decay for the moving averages. - 'decay': 0.5, - # epsilon to prevent 0s in variance. - 'epsilon': 0.001, - 'is_training': is_training - } - - -################################################################################ -# PRIVATE ENCODERS -################################################################################ -def default_encoder(images, code_size, batch_norm_params=None, - weight_decay=0.0): - """Encodes the given images to codes of the given size. - - Args: - images: a tensor of size [batch_size, height, width, 1]. - code_size: the number of hidden units in the code layer of the classifier. - batch_norm_params: a dictionary that maps batch norm parameter names to - values. - weight_decay: the value for the weight decay coefficient. - - Returns: - end_points: the code of the input. - """ - end_points = {} - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params): - with slim.arg_scope([slim.conv2d], kernel_size=[5, 5], padding='SAME'): - net = slim.conv2d(images, 32, scope='conv1') - net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') - net = slim.conv2d(net, 64, scope='conv2') - net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') - - net = slim.flatten(net) - end_points['flatten'] = net - net = slim.fully_connected(net, code_size, scope='fc1') - end_points['fc3'] = net - return end_points - - -################################################################################ -# DECODERS -################################################################################ -def large_decoder(codes, - height, - width, - channels, - batch_norm_params=None, - weight_decay=0.0): - """Decodes the codes to a fixed output size. - - Args: - codes: a tensor of size [batch_size, code_size]. - height: the height of the output images. - width: the width of the output images. - channels: the number of the output channels. - batch_norm_params: a dictionary that maps batch norm parameter names to - values. - weight_decay: the value for the weight decay coefficient. - - Returns: - recons: the reconstruction tensor of shape [batch_size, height, width, 3]. - """ - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params): - net = slim.fully_connected(codes, 600, scope='fc1') - batch_size = net.get_shape().as_list()[0] - net = tf.reshape(net, [batch_size, 10, 10, 6]) - - net = slim.conv2d(net, 32, [5, 5], scope='conv1_1') - - net = tf.image.resize_nearest_neighbor(net, (16, 16)) - - net = slim.conv2d(net, 32, [5, 5], scope='conv2_1') - - net = tf.image.resize_nearest_neighbor(net, (32, 32)) - - net = slim.conv2d(net, 32, [5, 5], scope='conv3_2') - - output_size = [height, width] - net = tf.image.resize_nearest_neighbor(net, output_size) - - with slim.arg_scope([slim.conv2d], kernel_size=[3, 3]): - net = slim.conv2d(net, channels, activation_fn=None, scope='conv4_1') - - return net - - -def gtsrb_decoder(codes, - height, - width, - channels, - batch_norm_params=None, - weight_decay=0.0): - """Decodes the codes to a fixed output size. This decoder is specific to GTSRB - - Args: - codes: a tensor of size [batch_size, 100]. - height: the height of the output images. - width: the width of the output images. - channels: the number of the output channels. - batch_norm_params: a dictionary that maps batch norm parameter names to - values. - weight_decay: the value for the weight decay coefficient. - - Returns: - recons: the reconstruction tensor of shape [batch_size, height, width, 3]. - - Raises: - ValueError: When the input code size is not 100. - """ - batch_size, code_size = codes.get_shape().as_list() - if code_size != 100: - raise ValueError('The code size used as an input to the GTSRB decoder is ' - 'expected to be 100.') - - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params): - net = codes - net = tf.reshape(net, [batch_size, 10, 10, 1]) - net = slim.conv2d(net, 32, [3, 3], scope='conv1_1') - - # First upsampling 20x20 - net = tf.image.resize_nearest_neighbor(net, [20, 20]) - - net = slim.conv2d(net, 32, [3, 3], scope='conv2_1') - - output_size = [height, width] - # Final upsampling 40 x 40 - net = tf.image.resize_nearest_neighbor(net, output_size) - - with slim.arg_scope([slim.conv2d], kernel_size=[3, 3]): - net = slim.conv2d(net, 16, scope='conv3_1') - net = slim.conv2d(net, channels, activation_fn=None, scope='conv3_2') - - return net - - -def small_decoder(codes, - height, - width, - channels, - batch_norm_params=None, - weight_decay=0.0): - """Decodes the codes to a fixed output size. - - Args: - codes: a tensor of size [batch_size, code_size]. - height: the height of the output images. - width: the width of the output images. - channels: the number of the output channels. - batch_norm_params: a dictionary that maps batch norm parameter names to - values. - weight_decay: the value for the weight decay coefficient. - - Returns: - recons: the reconstruction tensor of shape [batch_size, height, width, 3]. - """ - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params): - net = slim.fully_connected(codes, 300, scope='fc1') - batch_size = net.get_shape().as_list()[0] - net = tf.reshape(net, [batch_size, 10, 10, 3]) - - net = slim.conv2d(net, 16, [3, 3], scope='conv1_1') - net = slim.conv2d(net, 16, [3, 3], scope='conv1_2') - - output_size = [height, width] - net = tf.image.resize_nearest_neighbor(net, output_size) - - with slim.arg_scope([slim.conv2d], kernel_size=[3, 3]): - net = slim.conv2d(net, 16, scope='conv2_1') - net = slim.conv2d(net, channels, activation_fn=None, scope='conv2_2') - - return net - - -################################################################################ -# SHARED ENCODERS -################################################################################ -def dann_mnist(images, - weight_decay=0.0, - prefix='model', - num_classes=10, - **kwargs): - """Creates a convolution MNIST model. - - Note that this model implements the architecture for MNIST proposed in: - Y. Ganin et al., Domain-Adversarial Training of Neural Networks (DANN), - JMLR 2015 - - Args: - images: the MNIST digits, a tensor of size [batch_size, 28, 28, 1]. - weight_decay: the value for the weight decay coefficient. - prefix: name of the model to use when prefixing tags. - num_classes: the number of output classes to use. - **kwargs: Placeholder for keyword arguments used by other shared encoders. - - Returns: - the output logits, a tensor of size [batch_size, num_classes]. - a dictionary with key/values the layer names and tensors. - """ - end_points = {} - - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu,): - with slim.arg_scope([slim.conv2d], padding='SAME'): - end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1') - end_points['pool1'] = slim.max_pool2d( - end_points['conv1'], [2, 2], 2, scope='pool1') - end_points['conv2'] = slim.conv2d( - end_points['pool1'], 48, [5, 5], scope='conv2') - end_points['pool2'] = slim.max_pool2d( - end_points['conv2'], [2, 2], 2, scope='pool2') - end_points['fc3'] = slim.fully_connected( - slim.flatten(end_points['pool2']), 100, scope='fc3') - end_points['fc4'] = slim.fully_connected( - slim.flatten(end_points['fc3']), 100, scope='fc4') - - logits = slim.fully_connected( - end_points['fc4'], num_classes, activation_fn=None, scope='fc5') - - return logits, end_points - - -def dann_svhn(images, - weight_decay=0.0, - prefix='model', - num_classes=10, - **kwargs): - """Creates the convolutional SVHN model. - - Note that this model implements the architecture for MNIST proposed in: - Y. Ganin et al., Domain-Adversarial Training of Neural Networks (DANN), - JMLR 2015 - - Args: - images: the SVHN digits, a tensor of size [batch_size, 32, 32, 3]. - weight_decay: the value for the weight decay coefficient. - prefix: name of the model to use when prefixing tags. - num_classes: the number of output classes to use. - **kwargs: Placeholder for keyword arguments used by other shared encoders. - - Returns: - the output logits, a tensor of size [batch_size, num_classes]. - a dictionary with key/values the layer names and tensors. - """ - - end_points = {} - - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu,): - with slim.arg_scope([slim.conv2d], padding='SAME'): - - end_points['conv1'] = slim.conv2d(images, 64, [5, 5], scope='conv1') - end_points['pool1'] = slim.max_pool2d( - end_points['conv1'], [3, 3], 2, scope='pool1') - end_points['conv2'] = slim.conv2d( - end_points['pool1'], 64, [5, 5], scope='conv2') - end_points['pool2'] = slim.max_pool2d( - end_points['conv2'], [3, 3], 2, scope='pool2') - end_points['conv3'] = slim.conv2d( - end_points['pool2'], 128, [5, 5], scope='conv3') - - end_points['fc3'] = slim.fully_connected( - slim.flatten(end_points['conv3']), 3072, scope='fc3') - end_points['fc4'] = slim.fully_connected( - slim.flatten(end_points['fc3']), 2048, scope='fc4') - - logits = slim.fully_connected( - end_points['fc4'], num_classes, activation_fn=None, scope='fc5') - - return logits, end_points - - -def dann_gtsrb(images, - weight_decay=0.0, - prefix='model', - num_classes=43, - **kwargs): - """Creates the convolutional GTSRB model. - - Note that this model implements the architecture for MNIST proposed in: - Y. Ganin et al., Domain-Adversarial Training of Neural Networks (DANN), - JMLR 2015 - - Args: - images: the GTSRB images, a tensor of size [batch_size, 40, 40, 3]. - weight_decay: the value for the weight decay coefficient. - prefix: name of the model to use when prefixing tags. - num_classes: the number of output classes to use. - **kwargs: Placeholder for keyword arguments used by other shared encoders. - - Returns: - the output logits, a tensor of size [batch_size, num_classes]. - a dictionary with key/values the layer names and tensors. - """ - - end_points = {} - - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu,): - with slim.arg_scope([slim.conv2d], padding='SAME'): - - end_points['conv1'] = slim.conv2d(images, 96, [5, 5], scope='conv1') - end_points['pool1'] = slim.max_pool2d( - end_points['conv1'], [2, 2], 2, scope='pool1') - end_points['conv2'] = slim.conv2d( - end_points['pool1'], 144, [3, 3], scope='conv2') - end_points['pool2'] = slim.max_pool2d( - end_points['conv2'], [2, 2], 2, scope='pool2') - end_points['conv3'] = slim.conv2d( - end_points['pool2'], 256, [5, 5], scope='conv3') - end_points['pool3'] = slim.max_pool2d( - end_points['conv3'], [2, 2], 2, scope='pool3') - - end_points['fc3'] = slim.fully_connected( - slim.flatten(end_points['pool3']), 512, scope='fc3') - - logits = slim.fully_connected( - end_points['fc3'], num_classes, activation_fn=None, scope='fc4') - - return logits, end_points - - -def dsn_cropped_linemod(images, - weight_decay=0.0, - prefix='model', - num_classes=11, - batch_norm_params=None, - is_training=False): - """Creates the convolutional pose estimation model for Cropped Linemod. - - Args: - images: the Cropped Linemod samples, a tensor of size - [batch_size, 64, 64, 4]. - weight_decay: the value for the weight decay coefficient. - prefix: name of the model to use when prefixing tags. - num_classes: the number of output classes to use. - batch_norm_params: a dictionary that maps batch norm parameter names to - values. - is_training: specifies whether or not we're currently training the model. - This variable will determine the behaviour of the dropout layer. - - Returns: - the output logits, a tensor of size [batch_size, num_classes]. - a dictionary with key/values the layer names and tensors. - """ - - end_points = {} - - tf.summary.image('{}/input_images'.format(prefix), images) - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm if batch_norm_params else None, - normalizer_params=batch_norm_params): - with slim.arg_scope([slim.conv2d], padding='SAME'): - end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1') - end_points['pool1'] = slim.max_pool2d( - end_points['conv1'], [2, 2], 2, scope='pool1') - end_points['conv2'] = slim.conv2d( - end_points['pool1'], 64, [5, 5], scope='conv2') - end_points['pool2'] = slim.max_pool2d( - end_points['conv2'], [2, 2], 2, scope='pool2') - net = slim.flatten(end_points['pool2']) - end_points['fc3'] = slim.fully_connected(net, 128, scope='fc3') - net = slim.dropout( - end_points['fc3'], 0.5, is_training=is_training, scope='dropout') - - with tf.variable_scope('quaternion_prediction'): - predicted_quaternion = slim.fully_connected( - net, 4, activation_fn=tf.nn.tanh) - predicted_quaternion = tf.nn.l2_normalize(predicted_quaternion, 1) - logits = slim.fully_connected( - net, num_classes, activation_fn=None, scope='fc4') - end_points['quaternion_pred'] = predicted_quaternion - - return logits, end_points diff --git a/research/domain_adaptation/domain_separation/models_test.py b/research/domain_adaptation/domain_separation/models_test.py deleted file mode 100644 index 69d1a27259022569cc5865e49dd6bba5675d834f..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/models_test.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for DSN components.""" - -import numpy as np -import tensorflow as tf - -#from models.domain_adaptation.domain_separation -import models - - -class SharedEncodersTest(tf.test.TestCase): - - def _testSharedEncoder(self, - input_shape=[5, 28, 28, 1], - model=models.dann_mnist, - is_training=True): - images = tf.to_float(np.random.rand(*input_shape)) - - with self.test_session() as sess: - logits, _ = model(images) - sess.run(tf.global_variables_initializer()) - logits_np = sess.run(logits) - return logits_np - - def testBuildGRLMnistModel(self): - logits = self._testSharedEncoder(model=getattr(models, - 'dann_mnist')) - self.assertEqual(logits.shape, (5, 10)) - self.assertTrue(np.any(logits)) - - def testBuildGRLSvhnModel(self): - logits = self._testSharedEncoder(model=getattr(models, - 'dann_svhn')) - self.assertEqual(logits.shape, (5, 10)) - self.assertTrue(np.any(logits)) - - def testBuildGRLGtsrbModel(self): - logits = self._testSharedEncoder([5, 40, 40, 3], - getattr(models, 'dann_gtsrb')) - self.assertEqual(logits.shape, (5, 43)) - self.assertTrue(np.any(logits)) - - def testBuildPoseModel(self): - logits = self._testSharedEncoder([5, 64, 64, 4], - getattr(models, 'dsn_cropped_linemod')) - self.assertEqual(logits.shape, (5, 11)) - self.assertTrue(np.any(logits)) - - def testBuildPoseModelWithBatchNorm(self): - images = tf.to_float(np.random.rand(10, 64, 64, 4)) - - with self.test_session() as sess: - logits, _ = getattr(models, 'dsn_cropped_linemod')( - images, batch_norm_params=models.default_batch_norm_params(True)) - sess.run(tf.global_variables_initializer()) - logits_np = sess.run(logits) - self.assertEqual(logits_np.shape, (10, 11)) - self.assertTrue(np.any(logits_np)) - - -class EncoderTest(tf.test.TestCase): - - def _testEncoder(self, batch_norm_params=None, channels=1): - images = tf.to_float(np.random.rand(10, 28, 28, channels)) - - with self.test_session() as sess: - end_points = models.default_encoder( - images, 128, batch_norm_params=batch_norm_params) - sess.run(tf.global_variables_initializer()) - private_code = sess.run(end_points['fc3']) - self.assertEqual(private_code.shape, (10, 128)) - self.assertTrue(np.any(private_code)) - self.assertTrue(np.all(np.isfinite(private_code))) - - def testEncoder(self): - self._testEncoder() - - def testEncoderMultiChannel(self): - self._testEncoder(None, 4) - - def testEncoderIsTrainingBatchNorm(self): - self._testEncoder(models.default_batch_norm_params(True)) - - def testEncoderBatchNorm(self): - self._testEncoder(models.default_batch_norm_params(False)) - - -class DecoderTest(tf.test.TestCase): - - def _testDecoder(self, - height=64, - width=64, - channels=4, - batch_norm_params=None, - decoder=models.small_decoder): - codes = tf.to_float(np.random.rand(32, 100)) - - with self.test_session() as sess: - output = decoder( - codes, - height=height, - width=width, - channels=channels, - batch_norm_params=batch_norm_params) - sess.run(tf.global_variables_initializer()) - output_np = sess.run(output) - self.assertEqual(output_np.shape, (32, height, width, channels)) - self.assertTrue(np.any(output_np)) - self.assertTrue(np.all(np.isfinite(output_np))) - - def testSmallDecoder(self): - self._testDecoder(28, 28, 4, None, getattr(models, 'small_decoder')) - - def testSmallDecoderThreeChannels(self): - self._testDecoder(28, 28, 3) - - def testSmallDecoderBatchNorm(self): - self._testDecoder(28, 28, 4, models.default_batch_norm_params(False)) - - def testSmallDecoderIsTrainingBatchNorm(self): - self._testDecoder(28, 28, 4, models.default_batch_norm_params(True)) - - def testLargeDecoder(self): - self._testDecoder(32, 32, 4, None, getattr(models, 'large_decoder')) - - def testLargeDecoderThreeChannels(self): - self._testDecoder(32, 32, 3, None, getattr(models, 'large_decoder')) - - def testLargeDecoderBatchNorm(self): - self._testDecoder(32, 32, 4, - models.default_batch_norm_params(False), - getattr(models, 'large_decoder')) - - def testLargeDecoderIsTrainingBatchNorm(self): - self._testDecoder(32, 32, 4, - models.default_batch_norm_params(True), - getattr(models, 'large_decoder')) - - def testGtsrbDecoder(self): - self._testDecoder(40, 40, 3, None, getattr(models, 'large_decoder')) - - def testGtsrbDecoderBatchNorm(self): - self._testDecoder(40, 40, 4, - models.default_batch_norm_params(False), - getattr(models, 'gtsrb_decoder')) - - def testGtsrbDecoderIsTrainingBatchNorm(self): - self._testDecoder(40, 40, 4, - models.default_batch_norm_params(True), - getattr(models, 'gtsrb_decoder')) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/domain_adaptation/domain_separation/utils.py b/research/domain_adaptation/domain_separation/utils.py deleted file mode 100644 index e144ee86120bd58eb06b710fb35f3f58b5a05343..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/domain_separation/utils.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Auxiliary functions for domain adaptation related losses. -""" -import math -import tensorflow as tf - - -def create_summaries(end_points, prefix='', max_images=3, use_op_name=False): - """Creates a tf summary per endpoint. - - If the endpoint is a 4 dimensional tensor it displays it as an image - otherwise if it is a two dimensional one it creates a histogram summary. - - Args: - end_points: a dictionary of name, tf tensor pairs. - prefix: an optional string to prefix the summary with. - max_images: the maximum number of images to display per summary. - use_op_name: Use the op name as opposed to the shorter end_points key. - """ - for layer_name in end_points: - if use_op_name: - name = end_points[layer_name].op.name - else: - name = layer_name - if len(end_points[layer_name].get_shape().as_list()) == 4: - # if it's an actual image do not attempt to reshape it - if end_points[layer_name].get_shape().as_list()[-1] == 1 or end_points[ - layer_name].get_shape().as_list()[-1] == 3: - visualization_image = end_points[layer_name] - else: - visualization_image = reshape_feature_maps(end_points[layer_name]) - tf.summary.image( - '{}/{}'.format(prefix, name), - visualization_image, - max_outputs=max_images) - elif len(end_points[layer_name].get_shape().as_list()) == 3: - images = tf.expand_dims(end_points[layer_name], 3) - tf.summary.image( - '{}/{}'.format(prefix, name), - images, - max_outputs=max_images) - elif len(end_points[layer_name].get_shape().as_list()) == 2: - tf.summary.histogram('{}/{}'.format(prefix, name), end_points[layer_name]) - - -def reshape_feature_maps(features_tensor): - """Reshape activations for tf.summary.image visualization. - - Arguments: - features_tensor: a tensor of activations with a square number of feature - maps, eg 4, 9, 16, etc. - Returns: - A composite image with all the feature maps that can be passed as an - argument to tf.summary.image. - """ - assert len(features_tensor.get_shape().as_list()) == 4 - num_filters = features_tensor.get_shape().as_list()[-1] - assert num_filters > 0 - num_filters_sqrt = math.sqrt(num_filters) - assert num_filters_sqrt.is_integer( - ), 'Number of filters should be a square number but got {}'.format( - num_filters) - num_filters_sqrt = int(num_filters_sqrt) - conv_summary = tf.unstack(features_tensor, axis=3) - conv_one_row = tf.concat(axis=2, values=conv_summary[0:num_filters_sqrt]) - ind = 1 - conv_final = conv_one_row - for ind in range(1, num_filters_sqrt): - conv_one_row = tf.concat(axis=2, - values=conv_summary[ - ind * num_filters_sqrt + 0:ind * num_filters_sqrt + num_filters_sqrt]) - conv_final = tf.concat( - axis=1, values=[tf.squeeze(conv_final), tf.squeeze(conv_one_row)]) - conv_final = tf.expand_dims(conv_final, -1) - return conv_final - - -def accuracy(predictions, labels): - """Calculates the classificaton accuracy. - - Args: - predictions: the predicted values, a tensor whose size matches 'labels'. - labels: the ground truth values, a tensor of any size. - - Returns: - a tensor whose value on evaluation returns the total accuracy. - """ - return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32)) - - -def compute_upsample_values(input_tensor, upsample_height, upsample_width): - """Compute values for an upsampling op (ops.BatchCropAndResize). - - Args: - input_tensor: image tensor with shape [batch, height, width, in_channels] - upsample_height: integer - upsample_width: integer - - Returns: - grid_centers: tensor with shape [batch, 1] - crop_sizes: tensor with shape [batch, 1] - output_height: integer - output_width: integer - """ - batch, input_height, input_width, _ = input_tensor.shape - - height_half = input_height / 2. - width_half = input_width / 2. - grid_centers = tf.constant(batch * [[height_half, width_half]]) - crop_sizes = tf.constant(batch * [[input_height, input_width]]) - output_height = input_height * upsample_height - output_width = input_width * upsample_width - - return grid_centers, tf.to_float(crop_sizes), output_height, output_width - - -def compute_pairwise_distances(x, y): - """Computes the squared pairwise Euclidean distances between x and y. - - Args: - x: a tensor of shape [num_x_samples, num_features] - y: a tensor of shape [num_y_samples, num_features] - - Returns: - a distance matrix of dimensions [num_x_samples, num_y_samples]. - - Raises: - ValueError: if the inputs do no matched the specified dimensions. - """ - - if not len(x.get_shape()) == len(y.get_shape()) == 2: - raise ValueError('Both inputs should be matrices.') - - if x.get_shape().as_list()[1] != y.get_shape().as_list()[1]: - raise ValueError('The number of features should be the same.') - - norm = lambda x: tf.reduce_sum(tf.square(x), 1) - - # By making the `inner' dimensions of the two matrices equal to 1 using - # broadcasting then we are essentially substracting every pair of rows - # of x and y. - # x will be num_samples x num_features x 1, - # and y will be 1 x num_features x num_samples (after broadcasting). - # After the substraction we will get a - # num_x_samples x num_features x num_y_samples matrix. - # The resulting dist will be of shape num_y_samples x num_x_samples. - # and thus we need to transpose it again. - return tf.transpose(norm(tf.expand_dims(x, 2) - tf.transpose(y))) - - -def gaussian_kernel_matrix(x, y, sigmas): - r"""Computes a Guassian Radial Basis Kernel between the samples of x and y. - - We create a sum of multiple gaussian kernels each having a width sigma_i. - - Args: - x: a tensor of shape [num_samples, num_features] - y: a tensor of shape [num_samples, num_features] - sigmas: a tensor of floats which denote the widths of each of the - gaussians in the kernel. - Returns: - A tensor of shape [num_samples{x}, num_samples{y}] with the RBF kernel. - """ - beta = 1. / (2. * (tf.expand_dims(sigmas, 1))) - - dist = compute_pairwise_distances(x, y) - - s = tf.matmul(beta, tf.reshape(dist, (1, -1))) - - return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist)) diff --git a/research/domain_adaptation/pixel_domain_adaptation/BUILD b/research/domain_adaptation/pixel_domain_adaptation/BUILD deleted file mode 100644 index 2bc8d4a49a828f97b8f45166aa2bbc552d4a3b92..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/BUILD +++ /dev/null @@ -1,90 +0,0 @@ -# Description: -# Contains code for domain-adaptation style transfer. - -package( - default_visibility = [ - ":internal", - ], -) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = [ - "//domain_adaptation/...", - ], -) - -py_library( - name = "pixelda_preprocess", - srcs = ["pixelda_preprocess.py"], - deps = [ - - ], -) - -py_test( - name = "pixelda_preprocess_test", - srcs = ["pixelda_preprocess_test.py"], - deps = [ - ":pixelda_preprocess", - - ], -) - -py_library( - name = "pixelda_model", - srcs = [ - "pixelda_model.py", - "pixelda_task_towers.py", - "hparams.py", - ], - deps = [ - - ], -) - -py_library( - name = "pixelda_utils", - srcs = ["pixelda_utils.py"], - deps = [ - - ], -) - -py_library( - name = "pixelda_losses", - srcs = ["pixelda_losses.py"], - deps = [ - - ], -) - -py_binary( - name = "pixelda_train", - srcs = ["pixelda_train.py"], - deps = [ - ":pixelda_losses", - ":pixelda_model", - ":pixelda_preprocess", - ":pixelda_utils", - - "//domain_adaptation/datasets:dataset_factory", - ], -) - -py_binary( - name = "pixelda_eval", - srcs = ["pixelda_eval.py"], - deps = [ - ":pixelda_losses", - ":pixelda_model", - ":pixelda_preprocess", - ":pixelda_utils", - - "//domain_adaptation/datasets:dataset_factory", - ], -) diff --git a/research/domain_adaptation/pixel_domain_adaptation/README.md b/research/domain_adaptation/pixel_domain_adaptation/README.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/domain_adaptation/pixel_domain_adaptation/baselines/BUILD b/research/domain_adaptation/pixel_domain_adaptation/baselines/BUILD deleted file mode 100644 index c41a4ffeee80114145c4c3fc32a2191879b1b08a..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/baselines/BUILD +++ /dev/null @@ -1,23 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -py_binary( - name = "baseline_train", - srcs = ["baseline_train.py"], - deps = [ - - "//domain_adaptation/datasets:dataset_factory", - "//domain_adaptation/pixel_domain_adaptation:pixelda_model", - "//domain_adaptation/pixel_domain_adaptation:pixelda_preprocess", - ], -) - -py_binary( - name = "baseline_eval", - srcs = ["baseline_eval.py"], - deps = [ - - "//domain_adaptation/datasets:dataset_factory", - "//domain_adaptation/pixel_domain_adaptation:pixelda_model", - "//domain_adaptation/pixel_domain_adaptation:pixelda_preprocess", - ], -) diff --git a/research/domain_adaptation/pixel_domain_adaptation/baselines/README.md b/research/domain_adaptation/pixel_domain_adaptation/baselines/README.md deleted file mode 100644 index d61195ad2de6867801143aeda906cb5efe30a5e3..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/baselines/README.md +++ /dev/null @@ -1,60 +0,0 @@ -The best baselines are obtainable via the following configuration: - - -## MNIST => MNIST_M - -Accuracy: -MNIST-Train: 99.9 -MNIST_M-Train: 63.9 -MNIST_M-Valid: 63.9 -MNIST_M-Test: 63.6 - -Learning Rate = 0.0001 -Weight Decay = 0.0 -Number of Steps: 105,000 - -## MNIST => USPS - -Accuracy: -MNIST-Train: 100.0 -USPS-Train: 82.8 -USPS-Valid: 82.8 -USPS-Test: 78.9 - -Learning Rate = 0.0001 -Weight Decay = 0.0 -Number of Steps: 22,000 - -## MNIST_M => MNIST - -Accuracy: -MNIST_M-Train: 100 -MNIST-Train: 98.5 -MNIST-Valid: 98.5 -MNIST-Test: 98.1 - -Learning Rate = 0.001 -Weight Decay = 0.0 -Number of Steps: 604,400 - -## MNIST_M => MNIST_M - -Accuracy: -MNIST_M-Train: 100.0 -MNIST_M-Valid: 96.6 -MNIST_M-Test: 96.4 - -Learning Rate = 0.001 -Weight Decay = 0.0 -Number of Steps: 139,400 - -## USPS => USPS - -Accuracy: -USPS-Train: 100.0 -USPS-Valid: 100.0 -USPS-Test: 96.5 - -Learning Rate = 0.001 -Weight Decay = 0.0 -Number of Steps: 67,000 diff --git a/research/domain_adaptation/pixel_domain_adaptation/baselines/baseline_eval.py b/research/domain_adaptation/pixel_domain_adaptation/baselines/baseline_eval.py deleted file mode 100644 index 6b7ef6452b4897b00dc8c977bf40526ad5052ede..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/baselines/baseline_eval.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Evals the classification/pose baselines.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import math - -# Dependency imports - -import tensorflow as tf - -from domain_adaptation.datasets import dataset_factory -from domain_adaptation.pixel_domain_adaptation import pixelda_preprocess -from domain_adaptation.pixel_domain_adaptation import pixelda_task_towers - -flags = tf.app.flags -FLAGS = flags.FLAGS - -slim = tf.contrib.slim - -flags.DEFINE_string('master', '', 'BNS name of the tensorflow server') - -flags.DEFINE_string( - 'checkpoint_dir', None, 'The location of the checkpoint files.') - -flags.DEFINE_string( - 'eval_dir', None, 'The directory where evaluation logs are written.') - -flags.DEFINE_integer('batch_size', 32, 'The number of samples per batch.') - -flags.DEFINE_string('dataset_name', None, 'The name of the dataset.') - -flags.DEFINE_string('dataset_dir', None, - 'The directory where the data is stored.') - -flags.DEFINE_string('split_name', None, 'The name of the train/test split.') - -flags.DEFINE_integer('eval_interval_secs', 60 * 5, - 'How often (in seconds) to run evaluation.') - -flags.DEFINE_integer( - 'num_readers', 4, - 'The number of parallel readers that read data from the dataset.') - -def main(unused_argv): - tf.logging.set_verbosity(tf.logging.INFO) - hparams = tf.contrib.training.HParams() - hparams.weight_decay_task_classifier = 0.0 - - if FLAGS.dataset_name in ['mnist', 'mnist_m', 'usps']: - hparams.task_tower = 'mnist' - else: - raise ValueError('Unknown dataset %s' % FLAGS.dataset_name) - - if not tf.gfile.Exists(FLAGS.eval_dir): - tf.gfile.MakeDirs(FLAGS.eval_dir) - - with tf.Graph().as_default(): - dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.split_name, - FLAGS.dataset_dir) - num_classes = dataset.num_classes - num_samples = dataset.num_samples - - preprocess_fn = partial(pixelda_preprocess.preprocess_classification, - is_training=False) - - images, labels = dataset_factory.provide_batch( - FLAGS.dataset_name, - FLAGS.split_name, - dataset_dir=FLAGS.dataset_dir, - num_readers=FLAGS.num_readers, - batch_size=FLAGS.batch_size, - num_preprocessing_threads=FLAGS.num_readers) - - # Define the model - logits, _ = pixelda_task_towers.add_task_specific_model( - images, hparams, num_classes=num_classes, is_training=True) - - ##################### - # Define the losses # - ##################### - if 'classes' in labels: - one_hot_labels = labels['classes'] - loss = tf.losses.softmax_cross_entropy( - onehot_labels=one_hot_labels, logits=logits) - tf.summary.scalar('losses/Classification_Loss', loss) - else: - raise ValueError('Only support classification for now.') - - total_loss = tf.losses.get_total_loss() - - predictions = tf.reshape(tf.argmax(logits, 1), shape=[-1]) - class_labels = tf.argmax(labels['classes'], 1) - - metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map({ - 'Mean_Loss': - tf.contrib.metrics.streaming_mean(total_loss), - 'Accuracy': - tf.contrib.metrics.streaming_accuracy(predictions, - tf.reshape( - class_labels, - shape=[-1])), - 'Recall_at_5': - tf.contrib.metrics.streaming_recall_at_k(logits, class_labels, 5), - }) - - tf.summary.histogram('outputs/Predictions', predictions) - tf.summary.histogram('outputs/Ground_Truth', class_labels) - - for name, value in metrics_to_values.iteritems(): - tf.summary.scalar(name, value) - - num_batches = int(math.ceil(num_samples / float(FLAGS.batch_size))) - - slim.evaluation.evaluation_loop( - master=FLAGS.master, - checkpoint_dir=FLAGS.checkpoint_dir, - logdir=FLAGS.eval_dir, - num_evals=num_batches, - eval_op=metrics_to_updates.values(), - eval_interval_secs=FLAGS.eval_interval_secs) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/domain_adaptation/pixel_domain_adaptation/baselines/baseline_train.py b/research/domain_adaptation/pixel_domain_adaptation/baselines/baseline_train.py deleted file mode 100644 index 8c92bd81a7b68879000dd793ba2fd013f395f408..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/baselines/baseline_train.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Trains the classification/pose baselines.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -# Dependency imports - -import tensorflow as tf - -from domain_adaptation.datasets import dataset_factory -from domain_adaptation.pixel_domain_adaptation import pixelda_preprocess -from domain_adaptation.pixel_domain_adaptation import pixelda_task_towers - -flags = tf.app.flags -FLAGS = flags.FLAGS - -slim = tf.contrib.slim - -flags.DEFINE_string('master', '', 'BNS name of the tensorflow server') - -flags.DEFINE_integer('task', 0, 'The task ID.') - -flags.DEFINE_integer('num_ps_tasks', 0, - 'The number of parameter servers. If the value is 0, then ' - 'the parameters are handled locally by the worker.') - -flags.DEFINE_integer('batch_size', 32, 'The number of samples per batch.') - -flags.DEFINE_string('dataset_name', None, 'The name of the dataset.') - -flags.DEFINE_string('dataset_dir', None, - 'The directory where the data is stored.') - -flags.DEFINE_string('split_name', None, 'The name of the train/test split.') - -flags.DEFINE_float('learning_rate', 0.001, 'The initial learning rate.') - -flags.DEFINE_integer( - 'learning_rate_decay_steps', 20000, - 'The frequency, in steps, at which the learning rate is decayed.') - -flags.DEFINE_float('learning_rate_decay_factor', - 0.95, - 'The factor with which the learning rate is decayed.') - -flags.DEFINE_float('adam_beta1', 0.5, 'The beta1 value for the AdamOptimizer') - -flags.DEFINE_float('weight_decay', 1e-5, - 'The L2 coefficient on the model weights.') - -flags.DEFINE_string( - 'logdir', None, 'The location of the logs and checkpoints.') - -flags.DEFINE_integer('save_interval_secs', 600, - 'How often, in seconds, we save the model to disk.') - -flags.DEFINE_integer('save_summaries_secs', 600, - 'How often, in seconds, we compute the summaries.') - -flags.DEFINE_integer( - 'num_readers', 4, - 'The number of parallel readers that read data from the dataset.') - -flags.DEFINE_float( - 'moving_average_decay', 0.9999, - 'The amount of decay to use for moving averages.') - - -def main(unused_argv): - tf.logging.set_verbosity(tf.logging.INFO) - hparams = tf.contrib.training.HParams() - hparams.weight_decay_task_classifier = FLAGS.weight_decay - - if FLAGS.dataset_name in ['mnist', 'mnist_m', 'usps']: - hparams.task_tower = 'mnist' - else: - raise ValueError('Unknown dataset %s' % FLAGS.dataset_name) - - with tf.Graph().as_default(): - with tf.device( - tf.train.replica_device_setter(FLAGS.num_ps_tasks, merge_devices=True)): - dataset = dataset_factory.get_dataset(FLAGS.dataset_name, - FLAGS.split_name, FLAGS.dataset_dir) - num_classes = dataset.num_classes - - preprocess_fn = partial(pixelda_preprocess.preprocess_classification, - is_training=True) - - images, labels = dataset_factory.provide_batch( - FLAGS.dataset_name, - FLAGS.split_name, - dataset_dir=FLAGS.dataset_dir, - num_readers=FLAGS.num_readers, - batch_size=FLAGS.batch_size, - num_preprocessing_threads=FLAGS.num_readers) - # preprocess_fn=preprocess_fn) - - # Define the model - logits, _ = pixelda_task_towers.add_task_specific_model( - images, hparams, num_classes=num_classes, is_training=True) - - # Define the losses - if 'classes' in labels: - one_hot_labels = labels['classes'] - loss = tf.losses.softmax_cross_entropy( - onehot_labels=one_hot_labels, logits=logits) - tf.summary.scalar('losses/Classification_Loss', loss) - else: - raise ValueError('Only support classification for now.') - - total_loss = tf.losses.get_total_loss() - tf.summary.scalar('losses/Total_Loss', total_loss) - - # Setup the moving averages - moving_average_variables = slim.get_model_variables() - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, slim.get_or_create_global_step()) - tf.add_to_collection( - tf.GraphKeys.UPDATE_OPS, - variable_averages.apply(moving_average_variables)) - - # Specify the optimization scheme: - learning_rate = tf.train.exponential_decay( - FLAGS.learning_rate, - slim.get_or_create_global_step(), - FLAGS.learning_rate_decay_steps, - FLAGS.learning_rate_decay_factor, - staircase=True) - - optimizer = tf.train.AdamOptimizer(learning_rate, beta1=FLAGS.adam_beta1) - - train_op = slim.learning.create_train_op(total_loss, optimizer) - - slim.learning.train( - train_op, - FLAGS.logdir, - master=FLAGS.master, - is_chief=(FLAGS.task == 0), - save_summaries_secs=FLAGS.save_summaries_secs, - save_interval_secs=FLAGS.save_interval_secs) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/domain_adaptation/pixel_domain_adaptation/hparams.py b/research/domain_adaptation/pixel_domain_adaptation/hparams.py deleted file mode 100644 index ba9539f7d435c86f9fc92ed3406835bdaf2b50f3..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/hparams.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Define model HParams.""" -import tensorflow as tf - - -def create_hparams(hparam_string=None): - """Create model hyperparameters. Parse nondefault from given string.""" - hparams = tf.contrib.training.HParams( - # The name of the architecture to use. - arch='resnet', - lrelu_leakiness=0.2, - batch_norm_decay=0.9, - weight_decay=1e-5, - normal_init_std=0.02, - generator_kernel_size=3, - discriminator_kernel_size=3, - - # Stop training after this many examples are processed - # If none, train indefinitely - num_training_examples=0, - - # Apply data augmentation to datasets - # Applies only in training job - augment_source_images=False, - augment_target_images=False, - - # Discriminator - # Number of filters in first layer of discriminator - num_discriminator_filters=64, - discriminator_conv_block_size=1, # How many convs to have at each size - discriminator_filter_factor=2.0, # Multiply # filters by this each layer - # Add gaussian noise with this stddev to every hidden layer of D - discriminator_noise_stddev=0.2, # lmetz: Start seeing results at >= 0.1 - # If true, add this gaussian noise to input images to D as well - discriminator_image_noise=False, - discriminator_first_stride=1, # Stride in first conv of discriminator - discriminator_do_pooling=False, # If true, replace stride 2 with avg pool - discriminator_dropout_keep_prob=0.9, # keep probability for dropout - - # DCGAN Generator - # Number of filters in generator decoder last layer (repeatedly halved - # from 1st layer) - num_decoder_filters=64, - # Number of filters in generator encoder 1st layer (repeatedly doubled - # after 1st layer) - num_encoder_filters=64, - - # This is the shape to which the noise vector is projected (if we're - # transferring from noise). - # Write this way instead of [4, 4, 64] for hparam search flexibility - projection_shape_size=4, - projection_shape_channels=64, - - # Indicates the method by which we enlarge the spatial representation - # of an image. Possible values include: - # - resize_conv: Performs a nearest neighbor resize followed by a conv. - # - conv2d_transpose: Performs a conv2d_transpose. - upsample_method='resize_conv', - - # Visualization - summary_steps=500, # Output image summary every N steps - - ################################### - # Task Classifier Hyperparameters # - ################################### - - # Which task-specific prediction tower to use. Possible choices are: - # none: No task tower. - # doubling_pose_estimator: classifier + quaternion regressor. - # [conv + pool]* + FC - # Classifiers used in DSN paper: - # gtsrb: Classifier used for GTSRB - # svhn: Classifier used for SVHN - # mnist: Classifier used for MNIST - # pose_mini: Classifier + regressor used for pose_mini - task_tower='doubling_pose_estimator', - weight_decay_task_classifier=1e-5, - source_task_loss_weight=1.0, - transferred_task_loss_weight=1.0, - - # Number of private layers in doubling_pose_estimator task tower - num_private_layers=2, - - # The weight for the log quaternion loss we use for source and transferred - # samples of the cropped_linemod dataset. - # In the DSN work, 1/8 of the classifier weight worked well for our log - # quaternion loss - source_pose_weight=0.125 * 2.0, - transferred_pose_weight=0.125 * 1.0, - - # If set to True, the style transfer network also attempts to change its - # weights to maximize the performance of the task tower. If set to False, - # then the style transfer network only attempts to change its weights to - # make the transferred images more likely according to the domain - # classifier. - task_tower_in_g_step=True, - task_loss_in_g_weight=1.0, # Weight of task loss in G - - ######################################### - # 'simple` generator arch model hparams # - ######################################### - simple_num_conv_layers=1, - simple_conv_filters=8, - - ######################### - # Resnet Hyperparameters# - ######################### - resnet_blocks=6, # Number of resnet blocks - resnet_filters=64, # Number of filters per conv in resnet blocks - # If true, add original input back to result of convolutions inside the - # resnet arch. If false, it turns into a simple stack of conv/relu/BN - # layers. - resnet_residuals=True, - - ####################################### - # The residual / interpretable model. # - ####################################### - res_int_blocks=2, # The number of residual blocks. - res_int_convs=2, # The number of conv calls inside each block. - res_int_filters=64, # The number of filters used by each convolution. - - #################### - # Latent variables # - #################### - # if true, then generate random noise and project to input for generator - noise_channel=True, - # The number of dimensions in the input noise vector. - noise_dims=10, - - # If true, then one hot encode source image class and project as an - # additional channel for the input to generator. This gives the generator - # access to the class, which may help generation performance. - condition_on_source_class=False, - - ######################## - # Loss Hyperparameters # - ######################## - domain_loss_weight=1.0, - style_transfer_loss_weight=1.0, - - ######################################################################## - # Encourages the transferred images to be similar to the source images # - # using a configurable metric. # - ######################################################################## - - # The weight of the loss function encouraging the source and transferred - # images to be similar. If set to 0, then the loss function is not used. - transferred_similarity_loss_weight=0.0, - - # The type of loss used to encourage transferred and source image - # similarity. Valid values include: - # mpse: Mean Pairwise Squared Error - # mse: Mean Squared Error - # hinged_mse: Computes the mean squared error using squared differences - # greater than hparams.transferred_similarity_max_diff - # hinged_mae: Computes the mean absolute error using absolute - # differences greater than hparams.transferred_similarity_max_diff. - transferred_similarity_loss='mpse', - - # The maximum allowable difference between the source and target images. - # This value is used, in effect, to produce a hinge loss. Note that the - # range of values should be between 0 and 1. - transferred_similarity_max_diff=0.4, - - ################################ - # Optimization Hyperparameters # - ################################ - learning_rate=0.001, - batch_size=32, - lr_decay_steps=20000, - lr_decay_rate=0.95, - - # Recomendation from the DCGAN paper: - adam_beta1=0.5, - clip_gradient_norm=5.0, - - # The number of times we run the discriminator train_op in a row. - discriminator_steps=1, - - # The number of times we run the generator train_op in a row. - generator_steps=1) - - if hparam_string: - tf.logging.info('Parsing command line hparams: %s', hparam_string) - hparams.parse(hparam_string) - - tf.logging.info('Final parsed hparams: %s', hparams.values()) - return hparams diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_eval.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_eval.py deleted file mode 100644 index 23824249a9e95586ed85e40cd89c5f6814977969..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_eval.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Evaluates the PIXELDA model. - --- Compiles the model for CPU. -$ bazel build -c opt third_party/tensorflow_models/domain_adaptation/pixel_domain_adaptation:pixelda_eval - --- Compile the model for GPU. -$ bazel build -c opt --copt=-mavx --config=cuda \ - third_party/tensorflow_models/domain_adaptation/pixel_domain_adaptation:pixelda_eval - --- Runs the training. -$ ./bazel-bin/third_party/tensorflow_models/domain_adaptation/pixel_domain_adaptation/pixelda_eval \ - --source_dataset=mnist \ - --target_dataset=mnist_m \ - --dataset_dir=/tmp/datasets/ \ - --alsologtostderr - --- Visualize the results. -$ bash learning/brain/tensorboard/tensorboard.sh \ - --port 2222 --logdir=/tmp/pixelda/ -""" -from functools import partial -import math - -# Dependency imports - -import tensorflow as tf - -from domain_adaptation.datasets import dataset_factory -from domain_adaptation.pixel_domain_adaptation import pixelda_model -from domain_adaptation.pixel_domain_adaptation import pixelda_preprocess -from domain_adaptation.pixel_domain_adaptation import pixelda_utils -from domain_adaptation.pixel_domain_adaptation import pixelda_losses -from domain_adaptation.pixel_domain_adaptation.hparams import create_hparams - -slim = tf.contrib.slim - -flags = tf.app.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string('master', '', 'BNS name of the TensorFlow master to use.') - -flags.DEFINE_string('checkpoint_dir', '/tmp/pixelda/', - 'Directory where the model was written to.') - -flags.DEFINE_string('eval_dir', '/tmp/pixelda/', - 'Directory where the results are saved to.') - -flags.DEFINE_integer('eval_interval_secs', 60, - 'The frequency, in seconds, with which evaluation is run.') - -flags.DEFINE_string('target_split_name', 'test', - 'The name of the train/test split.') -flags.DEFINE_string('source_split_name', 'train', 'Split for source dataset.' - ' Defaults to train.') - -flags.DEFINE_string('source_dataset', 'mnist', - 'The name of the source dataset.') - -flags.DEFINE_string('target_dataset', 'mnist_m', - 'The name of the target dataset.') - -flags.DEFINE_string( - 'dataset_dir', - '', # None, - 'The directory where the datasets can be found.') - -flags.DEFINE_integer( - 'num_readers', 4, - 'The number of parallel readers that read data from the dataset.') - -flags.DEFINE_integer('num_preprocessing_threads', 4, - 'The number of threads used to create the batches.') - -# HParams - -flags.DEFINE_string('hparams', '', 'Comma separated hyperparameter values') - - -def run_eval(run_dir, checkpoint_dir, hparams): - """Runs the eval loop. - - Args: - run_dir: The directory where eval specific logs are placed - checkpoint_dir: The directory where the checkpoints are stored - hparams: The hyperparameters struct. - - Raises: - ValueError: if hparams.arch is not recognized. - """ - for checkpoint_path in slim.evaluation.checkpoints_iterator( - checkpoint_dir, FLAGS.eval_interval_secs): - with tf.Graph().as_default(): - ######################### - # Preprocess the inputs # - ######################### - target_dataset = dataset_factory.get_dataset( - FLAGS.target_dataset, - split_name=FLAGS.target_split_name, - dataset_dir=FLAGS.dataset_dir) - target_images, target_labels = dataset_factory.provide_batch( - FLAGS.target_dataset, FLAGS.target_split_name, FLAGS.dataset_dir, - FLAGS.num_readers, hparams.batch_size, - FLAGS.num_preprocessing_threads) - num_target_classes = target_dataset.num_classes - target_labels['class'] = tf.argmax(target_labels['classes'], 1) - del target_labels['classes'] - - if hparams.arch not in ['dcgan']: - source_dataset = dataset_factory.get_dataset( - FLAGS.source_dataset, - split_name=FLAGS.source_split_name, - dataset_dir=FLAGS.dataset_dir) - num_source_classes = source_dataset.num_classes - source_images, source_labels = dataset_factory.provide_batch( - FLAGS.source_dataset, FLAGS.source_split_name, FLAGS.dataset_dir, - FLAGS.num_readers, hparams.batch_size, - FLAGS.num_preprocessing_threads) - source_labels['class'] = tf.argmax(source_labels['classes'], 1) - del source_labels['classes'] - if num_source_classes != num_target_classes: - raise ValueError( - 'Input and output datasets must have same number of classes') - else: - source_images = None - source_labels = None - - #################### - # Define the model # - #################### - end_points = pixelda_model.create_model( - hparams, - target_images, - source_images=source_images, - source_labels=source_labels, - is_training=False, - num_classes=num_target_classes) - - ####################### - # Metrics & Summaries # - ####################### - names_to_values, names_to_updates = create_metrics(end_points, - source_labels, - target_labels, hparams) - pixelda_utils.summarize_model(end_points) - pixelda_utils.summarize_transferred_grid( - end_points['transferred_images'], source_images, name='Transferred') - if 'source_images_recon' in end_points: - pixelda_utils.summarize_transferred_grid( - end_points['source_images_recon'], - source_images, - name='Source Reconstruction') - pixelda_utils.summarize_images(target_images, 'Target') - - for name, value in names_to_values.iteritems(): - tf.summary.scalar(name, value) - - # Use the entire split by default - num_examples = target_dataset.num_samples - - num_batches = math.ceil(num_examples / float(hparams.batch_size)) - global_step = slim.get_or_create_global_step() - - result = slim.evaluation.evaluate_once( - master=FLAGS.master, - checkpoint_path=checkpoint_path, - logdir=run_dir, - num_evals=num_batches, - eval_op=names_to_updates.values(), - final_op=names_to_values) - - -def to_degrees(log_quaternion_loss): - """Converts a log quaternion distance to an angle. - - Args: - log_quaternion_loss: The log quaternion distance between two - unit quaternions (or a batch of pairs of quaternions). - - Returns: - The angle in degrees of the implied angle-axis representation. - """ - return tf.acos(-(tf.exp(log_quaternion_loss) - 1)) * 2 * 180 / math.pi - - -def create_metrics(end_points, source_labels, target_labels, hparams): - """Create metrics for the model. - - Args: - end_points: A dictionary of end point name to tensor - source_labels: Labels for source images. batch_size x 1 - target_labels: Labels for target images. batch_size x 1 - hparams: The hyperparameters struct. - - Returns: - Tuple of (names_to_values, names_to_updates), dictionaries that map a metric - name to its value and update op, respectively - - """ - ########################################### - # Evaluate the Domain Prediction Accuracy # - ########################################### - batch_size = hparams.batch_size - names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ - ('eval/Domain_Accuracy-Transferred'): - tf.contrib.metrics.streaming_accuracy( - tf.to_int32( - tf.round(tf.sigmoid(end_points[ - 'transferred_domain_logits']))), - tf.zeros(batch_size, dtype=tf.int32)), - ('eval/Domain_Accuracy-Target'): - tf.contrib.metrics.streaming_accuracy( - tf.to_int32( - tf.round(tf.sigmoid(end_points['target_domain_logits']))), - tf.ones(batch_size, dtype=tf.int32)) - }) - - ################################ - # Evaluate the task classifier # - ################################ - if 'source_task_logits' in end_points: - metric_name = 'eval/Task_Accuracy-Source' - names_to_values[metric_name], names_to_updates[ - metric_name] = tf.contrib.metrics.streaming_accuracy( - tf.argmax(end_points['source_task_logits'], 1), - source_labels['class']) - - if 'transferred_task_logits' in end_points: - metric_name = 'eval/Task_Accuracy-Transferred' - names_to_values[metric_name], names_to_updates[ - metric_name] = tf.contrib.metrics.streaming_accuracy( - tf.argmax(end_points['transferred_task_logits'], 1), - source_labels['class']) - - if 'target_task_logits' in end_points: - metric_name = 'eval/Task_Accuracy-Target' - names_to_values[metric_name], names_to_updates[ - metric_name] = tf.contrib.metrics.streaming_accuracy( - tf.argmax(end_points['target_task_logits'], 1), - target_labels['class']) - - ########################################################################## - # Pose data-specific losses. - ########################################################################## - if 'quaternion' in source_labels.keys(): - params = {} - params['use_logging'] = False - params['batch_size'] = batch_size - - angle_loss_source = to_degrees( - pixelda_losses.log_quaternion_loss_batch(end_points[ - 'source_quaternion'], source_labels['quaternion'], params)) - angle_loss_transferred = to_degrees( - pixelda_losses.log_quaternion_loss_batch(end_points[ - 'transferred_quaternion'], source_labels['quaternion'], params)) - angle_loss_target = to_degrees( - pixelda_losses.log_quaternion_loss_batch(end_points[ - 'target_quaternion'], target_labels['quaternion'], params)) - - metric_name = 'eval/Angle_Loss-Source' - names_to_values[metric_name], names_to_updates[ - metric_name] = slim.metrics.mean(angle_loss_source) - - metric_name = 'eval/Angle_Loss-Transferred' - names_to_values[metric_name], names_to_updates[ - metric_name] = slim.metrics.mean(angle_loss_transferred) - - metric_name = 'eval/Angle_Loss-Target' - names_to_values[metric_name], names_to_updates[ - metric_name] = slim.metrics.mean(angle_loss_target) - - return names_to_values, names_to_updates - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - hparams = create_hparams(FLAGS.hparams) - run_eval( - run_dir=FLAGS.eval_dir, - checkpoint_dir=FLAGS.checkpoint_dir, - hparams=hparams) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_losses.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_losses.py deleted file mode 100644 index cf39765d4d28c5a04cb8868cdc465cdd0129b0df..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_losses.py +++ /dev/null @@ -1,385 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Defines the various loss functions in use by the PIXELDA model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -import tensorflow as tf - -slim = tf.contrib.slim - - -def add_domain_classifier_losses(end_points, hparams): - """Adds losses related to the domain-classifier. - - Args: - end_points: A map of network end point names to `Tensors`. - hparams: The hyperparameters struct. - - Returns: - loss: A `Tensor` representing the total task-classifier loss. - """ - if hparams.domain_loss_weight == 0: - tf.logging.info( - 'Domain classifier loss weight is 0, so not creating losses.') - return 0 - - # The domain prediction loss is minimized with respect to the domain - # classifier features only. Its aim is to predict the domain of the images. - # Note: 1 = 'real image' label, 0 = 'fake image' label - transferred_domain_loss = tf.losses.sigmoid_cross_entropy( - multi_class_labels=tf.zeros_like(end_points['transferred_domain_logits']), - logits=end_points['transferred_domain_logits']) - tf.summary.scalar('Domain_loss_transferred', transferred_domain_loss) - - target_domain_loss = tf.losses.sigmoid_cross_entropy( - multi_class_labels=tf.ones_like(end_points['target_domain_logits']), - logits=end_points['target_domain_logits']) - tf.summary.scalar('Domain_loss_target', target_domain_loss) - - # Compute the total domain loss: - total_domain_loss = transferred_domain_loss + target_domain_loss - total_domain_loss *= hparams.domain_loss_weight - tf.summary.scalar('Domain_loss_total', total_domain_loss) - - return total_domain_loss - -def log_quaternion_loss_batch(predictions, labels, params): - """A helper function to compute the error between quaternions. - - Args: - predictions: A Tensor of size [batch_size, 4]. - labels: A Tensor of size [batch_size, 4]. - params: A dictionary of parameters. Expecting 'use_logging', 'batch_size'. - - Returns: - A Tensor of size [batch_size], denoting the error between the quaternions. - """ - use_logging = params['use_logging'] - assertions = [] - if use_logging: - assertions.append( - tf.Assert( - tf.reduce_all( - tf.less( - tf.abs(tf.reduce_sum(tf.square(predictions), [1]) - 1), - 1e-4)), - ['The l2 norm of each prediction quaternion vector should be 1.'])) - assertions.append( - tf.Assert( - tf.reduce_all( - tf.less( - tf.abs(tf.reduce_sum(tf.square(labels), [1]) - 1), 1e-4)), - ['The l2 norm of each label quaternion vector should be 1.'])) - - with tf.control_dependencies(assertions): - product = tf.multiply(predictions, labels) - internal_dot_products = tf.reduce_sum(product, [1]) - - if use_logging: - internal_dot_products = tf.Print(internal_dot_products, [ - internal_dot_products, - tf.shape(internal_dot_products) - ], 'internal_dot_products:') - - logcost = tf.log(1e-4 + 1 - tf.abs(internal_dot_products)) - return logcost - - -def log_quaternion_loss(predictions, labels, params): - """A helper function to compute the mean error between batches of quaternions. - - The caller is expected to add the loss to the graph. - - Args: - predictions: A Tensor of size [batch_size, 4]. - labels: A Tensor of size [batch_size, 4]. - params: A dictionary of parameters. Expecting 'use_logging', 'batch_size'. - - Returns: - A Tensor of size 1, denoting the mean error between batches of quaternions. - """ - use_logging = params['use_logging'] - logcost = log_quaternion_loss_batch(predictions, labels, params) - logcost = tf.reduce_sum(logcost, [0]) - batch_size = params['batch_size'] - logcost = tf.multiply(logcost, 1.0 / batch_size, name='log_quaternion_loss') - if use_logging: - logcost = tf.Print( - logcost, [logcost], '[logcost]', name='log_quaternion_loss_print') - return logcost - -def _quaternion_loss(labels, predictions, weight, batch_size, domain, - add_summaries): - """Creates a Quaternion Loss. - - Args: - labels: The true quaternions. - predictions: The predicted quaternions. - weight: A scalar weight. - batch_size: The size of the batches. - domain: The name of the domain from which the labels were taken. - add_summaries: Whether or not to add summaries for the losses. - - Returns: - A `Tensor` representing the loss. - """ - assert domain in ['Source', 'Transferred'] - - params = {'use_logging': False, 'batch_size': batch_size} - loss = weight * log_quaternion_loss(labels, predictions, params) - - if add_summaries: - assert_op = tf.Assert(tf.is_finite(loss), [loss]) - with tf.control_dependencies([assert_op]): - tf.summary.histogram( - 'Log_Quaternion_Loss_%s' % domain, loss, collections='losses') - tf.summary.scalar( - 'Task_Quaternion_Loss_%s' % domain, loss, collections='losses') - - return loss - - -def _add_task_specific_losses(end_points, source_labels, num_classes, hparams, - add_summaries=False): - """Adds losses related to the task-classifier. - - Args: - end_points: A map of network end point names to `Tensors`. - source_labels: A dictionary of output labels to `Tensors`. - num_classes: The number of classes used by the classifier. - hparams: The hyperparameters struct. - add_summaries: Whether or not to add the summaries. - - Returns: - loss: A `Tensor` representing the total task-classifier loss. - """ - # TODO(ddohan): Make sure the l2 regularization is added to the loss - - one_hot_labels = slim.one_hot_encoding(source_labels['class'], num_classes) - total_loss = 0 - - if 'source_task_logits' in end_points: - loss = tf.losses.softmax_cross_entropy( - onehot_labels=one_hot_labels, - logits=end_points['source_task_logits'], - weights=hparams.source_task_loss_weight) - if add_summaries: - tf.summary.scalar('Task_Classifier_Loss_Source', loss) - total_loss += loss - - if 'transferred_task_logits' in end_points: - loss = tf.losses.softmax_cross_entropy( - onehot_labels=one_hot_labels, - logits=end_points['transferred_task_logits'], - weights=hparams.transferred_task_loss_weight) - if add_summaries: - tf.summary.scalar('Task_Classifier_Loss_Transferred', loss) - total_loss += loss - - ######################### - # Pose specific losses. # - ######################### - if 'quaternion' in source_labels: - total_loss += _quaternion_loss( - source_labels['quaternion'], - end_points['source_quaternion'], - hparams.source_pose_weight, - hparams.batch_size, - 'Source', - add_summaries) - - total_loss += _quaternion_loss( - source_labels['quaternion'], - end_points['transferred_quaternion'], - hparams.transferred_pose_weight, - hparams.batch_size, - 'Transferred', - add_summaries) - - if add_summaries: - tf.summary.scalar('Task_Loss_Total', total_loss) - - return total_loss - - -def _transferred_similarity_loss(reconstructions, - source_images, - weight=1.0, - method='mse', - max_diff=0.4, - name='similarity'): - """Computes a loss encouraging similarity between source and transferred. - - Args: - reconstructions: A `Tensor` of shape [batch_size, height, width, channels] - source_images: A `Tensor` of shape [batch_size, height, width, channels]. - weight: Multiple similarity loss by this weight before returning - method: One of: - mpse = Mean Pairwise Squared Error - mse = Mean Squared Error - hinged_mse = Computes the mean squared error using squared differences - greater than hparams.transferred_similarity_max_diff - hinged_mae = Computes the mean absolute error using absolute - differences greater than hparams.transferred_similarity_max_diff. - max_diff: Maximum unpenalized difference for hinged losses - name: Identifying name to use for creating summaries - - - Returns: - A `Tensor` representing the transferred similarity loss. - - Raises: - ValueError: if `method` is not recognized. - """ - if weight == 0: - return 0 - - source_channels = source_images.shape.as_list()[-1] - reconstruction_channels = reconstructions.shape.as_list()[-1] - - # Convert grayscale source to RGB if target is RGB - if source_channels == 1 and reconstruction_channels != 1: - source_images = tf.tile(source_images, [1, 1, 1, reconstruction_channels]) - if reconstruction_channels == 1 and source_channels != 1: - reconstructions = tf.tile(reconstructions, [1, 1, 1, source_channels]) - - if method == 'mpse': - reconstruction_similarity_loss_fn = ( - tf.contrib.losses.mean_pairwise_squared_error) - elif method == 'masked_mpse': - - def masked_mpse(predictions, labels, weight): - """Masked mpse assuming we have a depth to create a mask from.""" - assert labels.shape.as_list()[-1] == 4 - mask = tf.to_float(tf.less(labels[:, :, :, 3:4], 0.99)) - mask = tf.tile(mask, [1, 1, 1, 4]) - predictions *= mask - labels *= mask - tf.image_summary('masked_pred', predictions) - tf.image_summary('masked_label', labels) - return tf.contrib.losses.mean_pairwise_squared_error( - predictions, labels, weight) - - reconstruction_similarity_loss_fn = masked_mpse - elif method == 'mse': - reconstruction_similarity_loss_fn = tf.contrib.losses.mean_squared_error - elif method == 'hinged_mse': - - def hinged_mse(predictions, labels, weight): - diffs = tf.square(predictions - labels) - diffs = tf.maximum(0.0, diffs - max_diff) - return tf.reduce_mean(diffs) * weight - - reconstruction_similarity_loss_fn = hinged_mse - elif method == 'hinged_mae': - - def hinged_mae(predictions, labels, weight): - diffs = tf.abs(predictions - labels) - diffs = tf.maximum(0.0, diffs - max_diff) - return tf.reduce_mean(diffs) * weight - - reconstruction_similarity_loss_fn = hinged_mae - else: - raise ValueError('Unknown reconstruction loss %s' % method) - - reconstruction_similarity_loss = reconstruction_similarity_loss_fn( - reconstructions, source_images, weight) - - name = '%s_Similarity_(%s)' % (name, method) - tf.summary.scalar(name, reconstruction_similarity_loss) - return reconstruction_similarity_loss - - -def g_step_loss(source_images, source_labels, end_points, hparams, num_classes): - """Configures the loss function which runs during the g-step. - - Args: - source_images: A `Tensor` of shape [batch_size, height, width, channels]. - source_labels: A dictionary of `Tensors` of shape [batch_size]. Valid keys - are 'class' and 'quaternion'. - end_points: A map of the network end points. - hparams: The hyperparameters struct. - num_classes: Number of classes for classifier loss - - Returns: - A `Tensor` representing a loss function. - - Raises: - ValueError: if hparams.transferred_similarity_loss_weight is non-zero but - hparams.transferred_similarity_loss is invalid. - """ - generator_loss = 0 - - ################################################################ - # Adds a loss which encourages the discriminator probabilities # - # to be high (near one). - ################################################################ - - # As per the GAN paper, maximize the log probs, instead of minimizing - # log(1-probs). Since we're minimizing, we'll minimize -log(probs) which is - # the same thing. - style_transfer_loss = tf.losses.sigmoid_cross_entropy( - logits=end_points['transferred_domain_logits'], - multi_class_labels=tf.ones_like(end_points['transferred_domain_logits']), - weights=hparams.style_transfer_loss_weight) - tf.summary.scalar('Style_transfer_loss', style_transfer_loss) - generator_loss += style_transfer_loss - - # Optimizes the style transfer network to produce transferred images similar - # to the source images. - generator_loss += _transferred_similarity_loss( - end_points['transferred_images'], - source_images, - weight=hparams.transferred_similarity_loss_weight, - method=hparams.transferred_similarity_loss, - name='transferred_similarity') - - # Optimizes the style transfer network to maximize classification accuracy. - if source_labels is not None and hparams.task_tower_in_g_step: - generator_loss += _add_task_specific_losses( - end_points, source_labels, num_classes, - hparams) * hparams.task_loss_in_g_weight - - return generator_loss - - -def d_step_loss(end_points, source_labels, num_classes, hparams): - """Configures the losses during the D-Step. - - Note that during the D-step, the model optimizes both the domain (binary) - classifier and the task classifier. - - Args: - end_points: A map of the network end points. - source_labels: A dictionary of output labels to `Tensors`. - num_classes: The number of classes used by the classifier. - hparams: The hyperparameters struct. - - Returns: - A `Tensor` representing the value of the D-step loss. - """ - domain_classifier_loss = add_domain_classifier_losses(end_points, hparams) - - task_classifier_loss = 0 - if source_labels is not None: - task_classifier_loss = _add_task_specific_losses( - end_points, source_labels, num_classes, hparams, add_summaries=True) - - return domain_classifier_loss + task_classifier_loss diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_model.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_model.py deleted file mode 100644 index 16b550a62d88ec2724c91f9dab9e3b34c736ec4f..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_model.py +++ /dev/null @@ -1,713 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Contains the Domain Adaptation via Style Transfer (PixelDA) model components. - -A number of details in the implementation make reference to one of the following -works: - -- "Unsupervised Representation Learning with Deep Convolutional - Generative Adversarial Networks"" - https://arxiv.org/abs/1511.06434 - -This paper makes several architecture recommendations: -1. Use strided convs in discriminator, fractional-strided convs in generator -2. batchnorm everywhere -3. remove fully connected layers for deep models -4. ReLu for all layers in generator, except tanh on output -5. LeakyReLu for everything in discriminator -""" -import functools -import math - -# Dependency imports -import numpy as np - -import tensorflow as tf - -slim = tf.contrib.slim - -from domain_adaptation.pixel_domain_adaptation import pixelda_task_towers - - -def create_model(hparams, - target_images, - source_images=None, - source_labels=None, - is_training=False, - noise=None, - num_classes=None): - """Create a GAN model. - - Arguments: - hparams: HParam object specifying model params - target_images: A `Tensor` of size [batch_size, height, width, channels]. It - is assumed that the images are [-1, 1] normalized. - source_images: A `Tensor` of size [batch_size, height, width, channels]. It - is assumed that the images are [-1, 1] normalized. - source_labels: A `Tensor` of size [batch_size] of categorical labels between - [0, num_classes] - is_training: whether model is currently training - noise: If None, model generates its own noise. Otherwise use provided. - num_classes: Number of classes for classification - - Returns: - end_points dict with model outputs - - Raises: - ValueError: unknown hparams.arch setting - """ - if num_classes is None and hparams.arch in ['resnet', 'simple']: - raise ValueError('Num classes must be provided to create task classifier') - - if target_images.dtype != tf.float32: - raise ValueError('target_images must be tf.float32 and [-1, 1] normalized.') - if source_images is not None and source_images.dtype != tf.float32: - raise ValueError('source_images must be tf.float32 and [-1, 1] normalized.') - - ########################### - # Create latent variables # - ########################### - latent_vars = dict() - - if hparams.noise_channel: - noise_shape = [hparams.batch_size, hparams.noise_dims] - if noise is not None: - assert noise.shape.as_list() == noise_shape - tf.logging.info('Using provided noise') - else: - tf.logging.info('Using random noise') - noise = tf.random_uniform( - shape=noise_shape, - minval=-1, - maxval=1, - dtype=tf.float32, - name='random_noise') - latent_vars['noise'] = noise - - #################### - # Create generator # - #################### - - with slim.arg_scope( - [slim.conv2d, slim.conv2d_transpose, slim.fully_connected], - normalizer_params=batch_norm_params(is_training, - hparams.batch_norm_decay), - weights_initializer=tf.random_normal_initializer( - stddev=hparams.normal_init_std), - weights_regularizer=tf.contrib.layers.l2_regularizer( - hparams.weight_decay)): - with slim.arg_scope([slim.conv2d], padding='SAME'): - if hparams.arch == 'dcgan': - end_points = dcgan( - target_images, latent_vars, hparams, scope='generator') - elif hparams.arch == 'resnet': - end_points = resnet_generator( - source_images, - target_images.shape.as_list()[1:4], - hparams=hparams, - latent_vars=latent_vars) - elif hparams.arch == 'residual_interpretation': - end_points = residual_interpretation_generator( - source_images, is_training=is_training, hparams=hparams) - elif hparams.arch == 'simple': - end_points = simple_generator( - source_images, - target_images, - is_training=is_training, - hparams=hparams, - latent_vars=latent_vars) - elif hparams.arch == 'identity': - # Pass through unmodified, besides changing # channels - # Used to calculate baseline numbers - # Also set `generator_steps=0` for baseline - if hparams.generator_steps: - raise ValueError('Must set generator_steps=0 for identity arch. Is %s' - % hparams.generator_steps) - transferred_images = source_images - source_channels = source_images.shape.as_list()[-1] - target_channels = target_images.shape.as_list()[-1] - if source_channels == 1 and target_channels == 3: - transferred_images = tf.tile(source_images, [1, 1, 1, 3]) - if source_channels == 3 and target_channels == 1: - transferred_images = tf.image.rgb_to_grayscale(source_images) - end_points = {'transferred_images': transferred_images} - else: - raise ValueError('Unknown architecture: %s' % hparams.arch) - - ##################### - # Domain Classifier # - ##################### - if hparams.arch in [ - 'dcgan', 'resnet', 'residual_interpretation', 'simple', 'identity', - ]: - - # Add a discriminator for these architectures - end_points['transferred_domain_logits'] = predict_domain( - end_points['transferred_images'], - hparams, - is_training=is_training, - reuse=False) - end_points['target_domain_logits'] = predict_domain( - target_images, - hparams, - is_training=is_training, - reuse=True) - - ################### - # Task Classifier # - ################### - if hparams.task_tower != 'none' and hparams.arch in [ - 'resnet', 'residual_interpretation', 'simple', 'identity', - ]: - with tf.variable_scope('discriminator'): - with tf.variable_scope('task_tower'): - end_points['source_task_logits'], end_points[ - 'source_quaternion'] = pixelda_task_towers.add_task_specific_model( - source_images, - hparams, - num_classes=num_classes, - is_training=is_training, - reuse_private=False, - private_scope='source_task_classifier', - reuse_shared=False) - end_points['transferred_task_logits'], end_points[ - 'transferred_quaternion'] = ( - pixelda_task_towers.add_task_specific_model( - end_points['transferred_images'], - hparams, - num_classes=num_classes, - is_training=is_training, - reuse_private=False, - private_scope='transferred_task_classifier', - reuse_shared=True)) - end_points['target_task_logits'], end_points[ - 'target_quaternion'] = pixelda_task_towers.add_task_specific_model( - target_images, - hparams, - num_classes=num_classes, - is_training=is_training, - reuse_private=True, - private_scope='transferred_task_classifier', - reuse_shared=True) - # Remove any endpoints with None values - return dict((k, v) for k, v in end_points.iteritems() if v is not None) - - -def batch_norm_params(is_training, batch_norm_decay): - return { - 'is_training': is_training, - # Decay for the moving averages. - 'decay': batch_norm_decay, - # epsilon to prevent 0s in variance. - 'epsilon': 0.001, - } - - -def lrelu(x, leakiness=0.2): - """Relu, with optional leaky support.""" - return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu') - - -def upsample(net, num_filters, scale=2, method='resize_conv', scope=None): - """Performs spatial upsampling of the given features. - - Args: - net: A `Tensor` of shape [batch_size, height, width, filters]. - num_filters: The number of output filters. - scale: The scale of the upsampling. Must be a positive integer greater or - equal to two. - method: The method by which the features are upsampled. Valid options - include 'resize_conv' and 'conv2d_transpose'. - scope: An optional variable scope. - - Returns: - A new set of features of shape - [batch_size, height*scale, width*scale, num_filters]. - - Raises: - ValueError: if `method` is not valid or - """ - if scale < 2: - raise ValueError('scale must be greater or equal to two.') - - with tf.variable_scope(scope, 'upsample', [net]): - if method == 'resize_conv': - net = tf.image.resize_nearest_neighbor( - net, [net.shape.as_list()[1] * scale, - net.shape.as_list()[2] * scale], - align_corners=True, - name='resize') - return slim.conv2d(net, num_filters, stride=1, scope='conv') - elif method == 'conv2d_transpose': - return slim.conv2d_transpose(net, num_filters, scope='deconv') - else: - raise ValueError('Upsample method [%s] was not recognized.' % method) - - -def project_latent_vars(hparams, proj_shape, latent_vars, combine_method='sum'): - """Generate noise and project to input volume size. - - Args: - hparams: The hyperparameter HParams struct. - proj_shape: Shape to project noise (not including batch size). - latent_vars: dictionary of `'key': Tensor of shape [batch_size, N]` - combine_method: How to combine the projected values. - sum = project to volume then sum - concat = concatenate along last dimension (i.e. channel) - - Returns: - If combine_method=sum, a `Tensor` of size `hparams.projection_shape` - If combine_method=concat and there are N latent vars, a `Tensor` of size - `hparams.projection_shape`, with the last channel multiplied by N - - - Raises: - ValueError: combine_method is not one of sum/concat - """ - values = [] - for var in latent_vars: - with tf.variable_scope(var): - # Project & reshape noise to a HxWxC input - projected = slim.fully_connected( - latent_vars[var], - np.prod(proj_shape), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm) - values.append(tf.reshape(projected, [hparams.batch_size] + proj_shape)) - - if combine_method == 'sum': - result = values[0] - for value in values[1:]: - result += value - elif combine_method == 'concat': - # Concatenate along last axis - result = tf.concat(values, len(proj_shape)) - else: - raise ValueError('Unknown combine_method %s' % combine_method) - - tf.logging.info('Latent variables projected to size %s volume', result.shape) - - return result - - -def resnet_block(net, hparams): - """Create a resnet block.""" - net_in = net - net = slim.conv2d( - net, - hparams.resnet_filters, - stride=1, - normalizer_fn=slim.batch_norm, - activation_fn=tf.nn.relu) - net = slim.conv2d( - net, - hparams.resnet_filters, - stride=1, - normalizer_fn=slim.batch_norm, - activation_fn=None) - if hparams.resnet_residuals: - net += net_in - return net - - -def resnet_stack(images, output_shape, hparams, scope=None): - """Create a resnet style transfer block. - - Args: - images: [batch-size, height, width, channels] image tensor to feed as input - output_shape: output image shape in form [height, width, channels] - hparams: hparams objects - scope: Variable scope - - Returns: - Images after processing with resnet blocks. - """ - end_points = {} - if hparams.noise_channel: - # separate the noise for visualization - end_points['noise'] = images[:, :, :, -1] - assert images.shape.as_list()[1:3] == output_shape[0:2] - - with tf.variable_scope(scope, 'resnet_style_transfer', [images]): - with slim.arg_scope( - [slim.conv2d], - normalizer_fn=slim.batch_norm, - kernel_size=[hparams.generator_kernel_size] * 2, - stride=1): - net = slim.conv2d( - images, - hparams.resnet_filters, - normalizer_fn=None, - activation_fn=tf.nn.relu) - for block in range(hparams.resnet_blocks): - net = resnet_block(net, hparams) - end_points['resnet_block_{}'.format(block)] = net - - net = slim.conv2d( - net, - output_shape[-1], - kernel_size=[1, 1], - normalizer_fn=None, - activation_fn=tf.nn.tanh, - scope='conv_out') - end_points['transferred_images'] = net - return net, end_points - - -def predict_domain(images, - hparams, - is_training=False, - reuse=False, - scope='discriminator'): - """Creates a discriminator for a GAN. - - Args: - images: A `Tensor` of size [batch_size, height, width, channels]. It is - assumed that the images are centered between -1 and 1. - hparams: hparam object with params for discriminator - is_training: Specifies whether or not we're training or testing. - reuse: Whether to reuse variable scope - scope: An optional variable_scope. - - Returns: - [batch size, 1] - logit output of discriminator. - """ - with tf.variable_scope(scope, 'discriminator', [images], reuse=reuse): - lrelu_partial = functools.partial(lrelu, leakiness=hparams.lrelu_leakiness) - with slim.arg_scope( - [slim.conv2d], - kernel_size=[hparams.discriminator_kernel_size] * 2, - activation_fn=lrelu_partial, - stride=2, - normalizer_fn=slim.batch_norm): - - def add_noise(hidden, scope_num=None): - if scope_num: - hidden = slim.dropout( - hidden, - hparams.discriminator_dropout_keep_prob, - is_training=is_training, - scope='dropout_%s' % scope_num) - if hparams.discriminator_noise_stddev == 0: - return hidden - return hidden + tf.random_normal( - hidden.shape.as_list(), - mean=0.0, - stddev=hparams.discriminator_noise_stddev) - - # As per the recommendation of the DCGAN paper, we don't use batch norm - # on the discriminator input (https://arxiv.org/pdf/1511.06434v2.pdf). - if hparams.discriminator_image_noise: - images = add_noise(images) - net = slim.conv2d( - images, - hparams.num_discriminator_filters, - normalizer_fn=None, - stride=hparams.discriminator_first_stride, - scope='conv1_stride%s' % hparams.discriminator_first_stride) - net = add_noise(net, 1) - - block_id = 2 - # Repeatedly stack - # discriminator_conv_block_size-1 conv layers with stride 1 - # followed by a stride 2 layer - # Add (optional) noise at every point - while net.shape.as_list()[1] > hparams.projection_shape_size: - num_filters = int(hparams.num_discriminator_filters * - (hparams.discriminator_filter_factor**(block_id - 1))) - for conv_id in range(1, hparams.discriminator_conv_block_size): - net = slim.conv2d( - net, - num_filters, - stride=1, - scope='conv_%s_%s' % (block_id, conv_id)) - if hparams.discriminator_do_pooling: - net = slim.conv2d( - net, num_filters, scope='conv_%s_prepool' % block_id) - net = slim.avg_pool2d( - net, kernel_size=[2, 2], stride=2, scope='pool_%s' % block_id) - else: - net = slim.conv2d( - net, num_filters, scope='conv_%s_stride2' % block_id) - net = add_noise(net, block_id) - block_id += 1 - net = slim.flatten(net) - net = slim.fully_connected( - net, - 1, - # Models with BN here generally produce noise - normalizer_fn=None, - activation_fn=None, - scope='fc_logit_out') # Returns logits! - return net - - -def dcgan_generator(images, output_shape, hparams, scope=None): - """Transforms the visual style of the input images. - - Args: - images: A `Tensor` of shape [batch_size, height, width, channels]. - output_shape: A list or tuple of 3 elements: the output height, width and - number of channels. - hparams: hparams object with generator parameters - scope: Scope to place generator inside - - Returns: - A `Tensor` of shape [batch_size, height, width, output_channels] which - represents the result of style transfer. - - Raises: - ValueError: If `output_shape` is not a list or tuple or if it doesn't have - three elements or if `output_shape` or `images` arent square. - """ - if not isinstance(output_shape, (tuple, list)): - raise ValueError('output_shape must be a tuple or list.') - elif len(output_shape) != 3: - raise ValueError('output_shape must have three elements.') - - if output_shape[0] != output_shape[1]: - raise ValueError('output_shape must be square') - if images.shape.as_list()[1] != images.shape.as_list()[2]: - raise ValueError('images height and width must match.') - - outdim = output_shape[0] - indim = images.shape.as_list()[1] - num_iterations = int(math.ceil(math.log(float(outdim) / float(indim), 2.0))) - - with slim.arg_scope( - [slim.conv2d, slim.conv2d_transpose], - kernel_size=[hparams.generator_kernel_size] * 2, - stride=2): - with tf.variable_scope(scope or 'generator'): - - net = images - - # Repeatedly halve # filters until = hparams.decode_filters in last layer - for i in range(num_iterations): - num_filters = hparams.num_decoder_filters * 2**(num_iterations - i - 1) - net = slim.conv2d_transpose(net, num_filters, scope='deconv_%s' % i) - - # Crop down to desired size (e.g. 32x32 -> 28x28) - dif = net.shape.as_list()[1] - outdim - low = dif / 2 - high = net.shape.as_list()[1] - low - net = net[:, low:high, low:high, :] - - # No batch norm on generator output - net = slim.conv2d( - net, - output_shape[2], - kernel_size=[1, 1], - stride=1, - normalizer_fn=None, - activation_fn=tf.tanh, - scope='conv_out') - return net - - -def dcgan(target_images, latent_vars, hparams, scope='dcgan'): - """Creates the PixelDA model. - - Args: - target_images: A `Tensor` of shape [batch_size, height, width, 3] - sampled from the image domain to which we want to transfer. - latent_vars: dictionary of 'key': Tensor of shape [batch_size, N] - hparams: The hyperparameter map. - scope: Surround generator component with this scope - - Returns: - A dictionary of model outputs. - """ - proj_shape = [ - hparams.projection_shape_size, hparams.projection_shape_size, - hparams.projection_shape_channels - ] - source_volume = project_latent_vars( - hparams, proj_shape, latent_vars, combine_method='concat') - - ################################################### - # Transfer the source images to the target style. # - ################################################### - with tf.variable_scope(scope, 'generator', [target_images]): - transferred_images = dcgan_generator( - source_volume, - output_shape=target_images.shape.as_list()[1:4], - hparams=hparams) - assert transferred_images.shape.as_list() == target_images.shape.as_list() - - return {'transferred_images': transferred_images} - - -def resnet_generator(images, output_shape, hparams, latent_vars=None): - """Creates a ResNet-based generator. - - Args: - images: A `Tensor` of shape [batch_size, height, width, num_channels] - sampled from the image domain from which we want to transfer - output_shape: A length-3 array indicating the height, width and channels of - the output. - hparams: The hyperparameter map. - latent_vars: dictionary of 'key': Tensor of shape [batch_size, N] - - Returns: - A dictionary of model outputs. - """ - with tf.variable_scope('generator'): - if latent_vars: - noise_channel = project_latent_vars( - hparams, - proj_shape=images.shape.as_list()[1:3] + [1], - latent_vars=latent_vars, - combine_method='concat') - images = tf.concat([images, noise_channel], 3) - - transferred_images, end_points = resnet_stack( - images, - output_shape=output_shape, - hparams=hparams, - scope='resnet_stack') - end_points['transferred_images'] = transferred_images - - return end_points - - -def residual_interpretation_block(images, hparams, scope): - """Learns a residual image which is added to the incoming image. - - Args: - images: A `Tensor` of size [batch_size, height, width, 3] - hparams: The hyperparameters struct. - scope: The name of the variable op scope. - - Returns: - The updated images. - """ - with tf.variable_scope(scope): - with slim.arg_scope( - [slim.conv2d], - normalizer_fn=None, - kernel_size=[hparams.generator_kernel_size] * 2): - - net = images - for _ in range(hparams.res_int_convs): - net = slim.conv2d( - net, hparams.res_int_filters, activation_fn=tf.nn.relu) - net = slim.conv2d(net, 3, activation_fn=tf.nn.tanh) - - # Add the residual - images += net - - # Clip the output - images = tf.maximum(images, -1.0) - images = tf.minimum(images, 1.0) - return images - - -def residual_interpretation_generator(images, - is_training, - hparams, - latent_vars=None): - """Creates a generator producing purely residual transformations. - - A residual generator differs from the resnet generator in that each 'block' of - the residual generator produces a residual image. Consequently, the 'progress' - of the model generation process can be directly observed at inference time, - making it easier to diagnose and understand. - - Args: - images: A `Tensor` of shape [batch_size, height, width, num_channels] - sampled from the image domain from which we want to transfer. It is - assumed that the images are centered between -1 and 1. - is_training: whether or not the model is training. - hparams: The hyperparameter map. - latent_vars: dictionary of 'key': Tensor of shape [batch_size, N] - - Returns: - A dictionary of model outputs. - """ - end_points = {} - - with tf.variable_scope('generator'): - if latent_vars: - projected_latent = project_latent_vars( - hparams, - proj_shape=images.shape.as_list()[1:3] + [images.shape.as_list()[-1]], - latent_vars=latent_vars, - combine_method='sum') - images += projected_latent - with tf.variable_scope(None, 'residual_style_transfer', [images]): - for i in range(hparams.res_int_blocks): - images = residual_interpretation_block(images, hparams, - 'residual_%d' % i) - end_points['transferred_images_%d' % i] = images - - end_points['transferred_images'] = images - - return end_points - - -def simple_generator(source_images, target_images, is_training, hparams, - latent_vars): - """Simple generator architecture (stack of convs) for trying small models.""" - end_points = {} - with tf.variable_scope('generator'): - feed_source_images = source_images - - if latent_vars: - projected_latent = project_latent_vars( - hparams, - proj_shape=source_images.shape.as_list()[1:3] + [1], - latent_vars=latent_vars, - combine_method='concat') - feed_source_images = tf.concat([source_images, projected_latent], 3) - - end_points = {} - - ################################################### - # Transfer the source images to the target style. # - ################################################### - with slim.arg_scope( - [slim.conv2d], - normalizer_fn=slim.batch_norm, - stride=1, - kernel_size=[hparams.generator_kernel_size] * 2): - net = feed_source_images - - # N convolutions - for i in range(1, hparams.simple_num_conv_layers): - normalizer_fn = None - if i != 0: - normalizer_fn = slim.batch_norm - net = slim.conv2d( - net, - hparams.simple_conv_filters, - normalizer_fn=normalizer_fn, - activation_fn=tf.nn.relu) - - # Project back to right # image channels - net = slim.conv2d( - net, - target_images.shape.as_list()[-1], - kernel_size=[1, 1], - stride=1, - normalizer_fn=None, - activation_fn=tf.tanh, - scope='conv_out') - - transferred_images = net - assert transferred_images.shape.as_list() == target_images.shape.as_list() - end_points['transferred_images'] = transferred_images - - return end_points diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_preprocess.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_preprocess.py deleted file mode 100644 index 747c17b18bf007d85e606015da6687a343bf74d2..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_preprocess.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Contains functions for preprocessing the inputs.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -import tensorflow as tf - - -def preprocess_classification(image, labels, is_training=False): - """Preprocesses the image and labels for classification purposes. - - Preprocessing includes shifting the images to be 0-centered between -1 and 1. - This is not only a popular method of preprocessing (inception) but is also - the mechanism used by DSNs. - - Args: - image: A `Tensor` of size [height, width, 3]. - labels: A dictionary of labels. - is_training: Whether or not we're training the model. - - Returns: - The preprocessed image and labels. - """ - # If the image is uint8, this will scale it to 0-1. - image = tf.image.convert_image_dtype(image, tf.float32) - image -= 0.5 - image *= 2 - - return image, labels - - -def preprocess_style_transfer(image, - labels, - augment=False, - size=None, - is_training=False): - """Preprocesses the image and labels for style transfer purposes. - - Args: - image: A `Tensor` of size [height, width, 3]. - labels: A dictionary of labels. - augment: Whether to apply data augmentation to inputs - size: The height and width to which images should be resized. If left as - `None`, then no resizing is performed - is_training: Whether or not we're training the model - - Returns: - The preprocessed image and labels. Scaled to [-1, 1] - """ - # If the image is uint8, this will scale it to 0-1. - image = tf.image.convert_image_dtype(image, tf.float32) - if augment and is_training: - image = image_augmentation(image) - - if size: - image = resize_image(image, size) - - image -= 0.5 - image *= 2 - - return image, labels - - -def image_augmentation(image): - """Performs data augmentation by randomly permuting the inputs. - - Args: - image: A float `Tensor` of size [height, width, channels] with values - in range[0,1]. - - Returns: - The mutated batch of images - """ - # Apply photometric data augmentation (contrast etc.) - num_channels = image.shape_as_list()[-1] - if num_channels == 4: - # Only augment image part - image, depth = image[:, :, 0:3], image[:, :, 3:4] - elif num_channels == 1: - image = tf.image.grayscale_to_rgb(image) - image = tf.image.random_brightness(image, max_delta=0.1) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.032) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.clip_by_value(image, 0, 1.0) - if num_channels == 4: - image = tf.concat(2, [image, depth]) - elif num_channels == 1: - image = tf.image.rgb_to_grayscale(image) - return image - - -def resize_image(image, size=None): - """Resize image to target size. - - Args: - image: A `Tensor` of size [height, width, 3]. - size: (height, width) to resize image to. - - Returns: - resized image - """ - if size is None: - raise ValueError('Must specify size') - - if image.shape_as_list()[:2] == size: - # Don't resize if not necessary - return image - image = tf.expand_dims(image, 0) - image = tf.image.resize_images(image, size) - image = tf.squeeze(image, 0) - return image diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_preprocess_test.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_preprocess_test.py deleted file mode 100644 index 73f8c7ff05fc7d2614c419759a02f78ffbcdfec0..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_preprocess_test.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for domain_adaptation.pixel_domain_adaptation.pixelda_preprocess.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -import tensorflow as tf - -from domain_adaptation.pixel_domain_adaptation import pixelda_preprocess - - -class PixelDAPreprocessTest(tf.test.TestCase): - - def assert_preprocess_classification_is_centered(self, dtype, is_training): - tf.set_random_seed(0) - - if dtype == tf.uint8: - image = tf.random_uniform((100, 200, 3), maxval=255, dtype=tf.int64) - image = tf.cast(image, tf.uint8) - else: - image = tf.random_uniform((100, 200, 3), maxval=1.0, dtype=dtype) - - labels = {} - image, labels = pixelda_preprocess.preprocess_classification( - image, labels, is_training=is_training) - - with self.test_session() as sess: - np_image = sess.run(image) - - self.assertTrue(np_image.min() <= -0.95) - self.assertTrue(np_image.min() >= -1.0) - self.assertTrue(np_image.max() >= 0.95) - self.assertTrue(np_image.max() <= 1.0) - - def testPreprocessClassificationZeroCentersUint8DuringTrain(self): - self.assert_preprocess_classification_is_centered( - tf.uint8, is_training=True) - - def testPreprocessClassificationZeroCentersUint8DuringTest(self): - self.assert_preprocess_classification_is_centered( - tf.uint8, is_training=False) - - def testPreprocessClassificationZeroCentersFloatDuringTrain(self): - self.assert_preprocess_classification_is_centered( - tf.float32, is_training=True) - - def testPreprocessClassificationZeroCentersFloatDuringTest(self): - self.assert_preprocess_classification_is_centered( - tf.float32, is_training=False) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_task_towers.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_task_towers.py deleted file mode 100644 index 1cb42e2d890a7759318cf0981640c0dd1645461e..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_task_towers.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Task towers for PixelDA model.""" -import tensorflow as tf - -slim = tf.contrib.slim - - -def add_task_specific_model(images, - hparams, - num_classes=10, - is_training=False, - reuse_private=False, - private_scope=None, - reuse_shared=False, - shared_scope=None): - """Create a classifier for the given images. - - The classifier is composed of a few 'private' layers followed by a few - 'shared' layers. This lets us account for different image 'style', while - sharing the last few layers as 'content' layers. - - Args: - images: A `Tensor` of size [batch_size, height, width, 3]. - hparams: model hparams - num_classes: The number of output classes. - is_training: whether model is training - reuse_private: Whether or not to reuse the private weights, which are the - first few layers in the classifier - private_scope: The name of the variable_scope for the private (unshared) - components of the classifier. - reuse_shared: Whether or not to reuse the shared weights, which are the last - few layers in the classifier - shared_scope: The name of the variable_scope for the shared components of - the classifier. - - Returns: - The logits, a `Tensor` of shape [batch_size, num_classes]. - - Raises: - ValueError: If hparams.task_classifier is an unknown value - """ - - model = hparams.task_tower - # Make sure the classifier name shows up in graph - shared_scope = shared_scope or (model + '_shared') - kwargs = { - 'num_classes': num_classes, - 'is_training': is_training, - 'reuse_private': reuse_private, - 'reuse_shared': reuse_shared, - } - - if private_scope: - kwargs['private_scope'] = private_scope - if shared_scope: - kwargs['shared_scope'] = shared_scope - - quaternion_pred = None - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - activation_fn=tf.nn.relu, - weights_regularizer=tf.contrib.layers.l2_regularizer( - hparams.weight_decay_task_classifier)): - with slim.arg_scope([slim.conv2d], padding='SAME'): - if model == 'doubling_pose_estimator': - logits, quaternion_pred = doubling_cnn_class_and_quaternion( - images, num_private_layers=hparams.num_private_layers, **kwargs) - elif model == 'mnist': - logits, _ = mnist_classifier(images, **kwargs) - elif model == 'svhn': - logits, _ = svhn_classifier(images, **kwargs) - elif model == 'gtsrb': - logits, _ = gtsrb_classifier(images, **kwargs) - elif model == 'pose_mini': - logits, quaternion_pred = pose_mini_tower(images, **kwargs) - else: - raise ValueError('Unknown task classifier %s' % model) - - return logits, quaternion_pred - - -##################################### -# Classifiers used in the DSN paper # -##################################### - - -def mnist_classifier(images, - is_training=False, - num_classes=10, - reuse_private=False, - private_scope='mnist', - reuse_shared=False, - shared_scope='task_model'): - """Creates the convolutional MNIST model from the gradient reversal paper. - - Note that since the output is a set of 'logits', the values fall in the - interval of (-infinity, infinity). Consequently, to convert the outputs to a - probability distribution over the characters, one will need to convert them - using the softmax function: - logits, endpoints = conv_mnist(images, is_training=False) - predictions = tf.nn.softmax(logits) - - Args: - images: the MNIST digits, a tensor of size [batch_size, 28, 28, 1]. - is_training: specifies whether or not we're currently training the model. - This variable will determine the behaviour of the dropout layer. - num_classes: the number of output classes to use. - - Returns: - the output logits, a tensor of size [batch_size, num_classes]. - a dictionary with key/values the layer names and tensors. - """ - - net = {} - - with tf.variable_scope(private_scope, reuse=reuse_private): - net['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1') - net['pool1'] = slim.max_pool2d(net['conv1'], [2, 2], 2, scope='pool1') - - with tf.variable_scope(shared_scope, reuse=reuse_shared): - net['conv2'] = slim.conv2d(net['pool1'], 48, [5, 5], scope='conv2') - net['pool2'] = slim.max_pool2d(net['conv2'], [2, 2], 2, scope='pool2') - net['fc3'] = slim.fully_connected( - slim.flatten(net['pool2']), 100, scope='fc3') - net['fc4'] = slim.fully_connected( - slim.flatten(net['fc3']), 100, scope='fc4') - logits = slim.fully_connected( - net['fc4'], num_classes, activation_fn=None, scope='fc5') - return logits, net - - -def svhn_classifier(images, - is_training=False, - num_classes=10, - reuse_private=False, - private_scope=None, - reuse_shared=False, - shared_scope='task_model'): - """Creates the convolutional SVHN model from the gradient reversal paper. - - Note that since the output is a set of 'logits', the values fall in the - interval of (-infinity, infinity). Consequently, to convert the outputs to a - probability distribution over the characters, one will need to convert them - using the softmax function: - logits = mnist.Mnist(images, is_training=False) - predictions = tf.nn.softmax(logits) - - Args: - images: the SVHN digits, a tensor of size [batch_size, 40, 40, 3]. - is_training: specifies whether or not we're currently training the model. - This variable will determine the behaviour of the dropout layer. - num_classes: the number of output classes to use. - - Returns: - the output logits, a tensor of size [batch_size, num_classes]. - a dictionary with key/values the layer names and tensors. - """ - - net = {} - - with tf.variable_scope(private_scope, reuse=reuse_private): - net['conv1'] = slim.conv2d(images, 64, [5, 5], scope='conv1') - net['pool1'] = slim.max_pool2d(net['conv1'], [3, 3], 2, scope='pool1') - - with tf.variable_scope(shared_scope, reuse=reuse_shared): - net['conv2'] = slim.conv2d(net['pool1'], 64, [5, 5], scope='conv2') - net['pool2'] = slim.max_pool2d(net['conv2'], [3, 3], 2, scope='pool2') - net['conv3'] = slim.conv2d(net['pool2'], 128, [5, 5], scope='conv3') - - net['fc3'] = slim.fully_connected( - slim.flatten(net['conv3']), 3072, scope='fc3') - net['fc4'] = slim.fully_connected( - slim.flatten(net['fc3']), 2048, scope='fc4') - - logits = slim.fully_connected( - net['fc4'], num_classes, activation_fn=None, scope='fc5') - - return logits, net - - -def gtsrb_classifier(images, - is_training=False, - num_classes=43, - reuse_private=False, - private_scope='gtsrb', - reuse_shared=False, - shared_scope='task_model'): - """Creates the convolutional GTSRB model from the gradient reversal paper. - - Note that since the output is a set of 'logits', the values fall in the - interval of (-infinity, infinity). Consequently, to convert the outputs to a - probability distribution over the characters, one will need to convert them - using the softmax function: - logits = mnist.Mnist(images, is_training=False) - predictions = tf.nn.softmax(logits) - - Args: - images: the SVHN digits, a tensor of size [batch_size, 40, 40, 3]. - is_training: specifies whether or not we're currently training the model. - This variable will determine the behaviour of the dropout layer. - num_classes: the number of output classes to use. - reuse_private: Whether or not to reuse the private components of the model. - private_scope: The name of the private scope. - reuse_shared: Whether or not to reuse the shared components of the model. - shared_scope: The name of the shared scope. - - Returns: - the output logits, a tensor of size [batch_size, num_classes]. - a dictionary with key/values the layer names and tensors. - """ - - net = {} - - with tf.variable_scope(private_scope, reuse=reuse_private): - net['conv1'] = slim.conv2d(images, 96, [5, 5], scope='conv1') - net['pool1'] = slim.max_pool2d(net['conv1'], [2, 2], 2, scope='pool1') - with tf.variable_scope(shared_scope, reuse=reuse_shared): - net['conv2'] = slim.conv2d(net['pool1'], 144, [3, 3], scope='conv2') - net['pool2'] = slim.max_pool2d(net['conv2'], [2, 2], 2, scope='pool2') - net['conv3'] = slim.conv2d(net['pool2'], 256, [5, 5], scope='conv3') - net['pool3'] = slim.max_pool2d(net['conv3'], [2, 2], 2, scope='pool3') - - net['fc3'] = slim.fully_connected( - slim.flatten(net['pool3']), 512, scope='fc3') - logits = slim.fully_connected( - net['fc3'], num_classes, activation_fn=None, scope='fc4') - - return logits, net - - -######################### -# pose_mini task towers # -######################### - - -def pose_mini_tower(images, - num_classes=11, - is_training=False, - reuse_private=False, - private_scope='pose_mini', - reuse_shared=False, - shared_scope='task_model'): - """Task tower for the pose_mini dataset.""" - - with tf.variable_scope(private_scope, reuse=reuse_private): - net = slim.conv2d(images, 32, [5, 5], scope='conv1') - net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool1') - with tf.variable_scope(shared_scope, reuse=reuse_shared): - net = slim.conv2d(net, 64, [5, 5], scope='conv2') - net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool2') - net = slim.flatten(net) - - net = slim.fully_connected(net, 128, scope='fc3') - net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout') - with tf.variable_scope('quaternion_prediction'): - quaternion_pred = slim.fully_connected( - net, 4, activation_fn=tf.tanh, scope='fc_q') - quaternion_pred = tf.nn.l2_normalize(quaternion_pred, 1) - - logits = slim.fully_connected( - net, num_classes, activation_fn=None, scope='fc4') - - return logits, quaternion_pred - - -def doubling_cnn_class_and_quaternion(images, - num_private_layers=1, - num_classes=10, - is_training=False, - reuse_private=False, - private_scope='doubling_cnn', - reuse_shared=False, - shared_scope='task_model'): - """Alternate conv, pool while doubling filter count.""" - net = images - depth = 32 - layer_id = 1 - - with tf.variable_scope(private_scope, reuse=reuse_private): - while num_private_layers > 0 and net.shape.as_list()[1] > 5: - net = slim.conv2d(net, depth, [3, 3], scope='conv%s' % layer_id) - net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool%s' % layer_id) - depth *= 2 - layer_id += 1 - num_private_layers -= 1 - - with tf.variable_scope(shared_scope, reuse=reuse_shared): - while net.shape.as_list()[1] > 5: - net = slim.conv2d(net, depth, [3, 3], scope='conv%s' % layer_id) - net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool%s' % layer_id) - depth *= 2 - layer_id += 1 - - net = slim.flatten(net) - net = slim.fully_connected(net, 100, scope='fc1') - net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout') - quaternion_pred = slim.fully_connected( - net, 4, activation_fn=tf.tanh, scope='fc_q') - quaternion_pred = tf.nn.l2_normalize(quaternion_pred, 1) - - logits = slim.fully_connected( - net, num_classes, activation_fn=None, scope='fc_logits') - - return logits, quaternion_pred diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_train.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_train.py deleted file mode 100644 index 4ca072cceafa48769623381b8e564fe650f2a514..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_train.py +++ /dev/null @@ -1,409 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Trains the PixelDA model.""" - -from functools import partial -import os - -# Dependency imports - -import tensorflow as tf - -from domain_adaptation.datasets import dataset_factory -from domain_adaptation.pixel_domain_adaptation import pixelda_losses -from domain_adaptation.pixel_domain_adaptation import pixelda_model -from domain_adaptation.pixel_domain_adaptation import pixelda_preprocess -from domain_adaptation.pixel_domain_adaptation import pixelda_utils -from domain_adaptation.pixel_domain_adaptation.hparams import create_hparams - -slim = tf.contrib.slim - -flags = tf.app.flags -FLAGS = flags.FLAGS - -flags.DEFINE_string('master', '', 'BNS name of the TensorFlow master to use.') - -flags.DEFINE_integer( - 'ps_tasks', 0, - 'The number of parameter servers. If the value is 0, then the parameters ' - 'are handled locally by the worker.') - -flags.DEFINE_integer( - 'task', 0, - 'The Task ID. This value is used when training with multiple workers to ' - 'identify each worker.') - -flags.DEFINE_string('train_log_dir', '/tmp/pixelda/', - 'Directory where to write event logs.') - -flags.DEFINE_integer( - 'save_summaries_steps', 500, - 'The frequency with which summaries are saved, in seconds.') - -flags.DEFINE_integer('save_interval_secs', 300, - 'The frequency with which the model is saved, in seconds.') - -flags.DEFINE_boolean('summarize_gradients', False, - 'Whether to summarize model gradients') - -flags.DEFINE_integer( - 'print_loss_steps', 100, - 'The frequency with which the losses are printed, in steps.') - -flags.DEFINE_string('source_dataset', 'mnist', 'The name of the source dataset.' - ' If hparams="arch=dcgan", this flag is ignored.') - -flags.DEFINE_string('target_dataset', 'mnist_m', - 'The name of the target dataset.') - -flags.DEFINE_string('source_split_name', 'train', - 'Name of the train split for the source.') - -flags.DEFINE_string('target_split_name', 'train', - 'Name of the train split for the target.') - -flags.DEFINE_string('dataset_dir', '', - 'The directory where the datasets can be found.') - -flags.DEFINE_integer( - 'num_readers', 4, - 'The number of parallel readers that read data from the dataset.') - -flags.DEFINE_integer('num_preprocessing_threads', 4, - 'The number of threads used to create the batches.') - -# HParams - -flags.DEFINE_string('hparams', '', 'Comma separated hyperparameter values') - - -def _get_vars_and_update_ops(hparams, scope): - """Returns the variables and update ops for a particular variable scope. - - Args: - hparams: The hyperparameters struct. - scope: The variable scope. - - Returns: - A tuple consisting of trainable variables and update ops. - """ - is_trainable = lambda x: x in tf.trainable_variables() - var_list = filter(is_trainable, slim.get_model_variables(scope)) - global_step = slim.get_or_create_global_step() - - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) - - tf.logging.info('All variables for scope: %s', - slim.get_model_variables(scope)) - tf.logging.info('Trainable variables for scope: %s', var_list) - - return var_list, update_ops - - -def _train(discriminator_train_op, - generator_train_op, - logdir, - master='', - is_chief=True, - scaffold=None, - hooks=None, - chief_only_hooks=None, - save_checkpoint_secs=600, - save_summaries_steps=100, - hparams=None): - """Runs the training loop. - - Args: - discriminator_train_op: A `Tensor` that, when executed, will apply the - gradients and return the loss value for the discriminator. - generator_train_op: A `Tensor` that, when executed, will apply the - gradients and return the loss value for the generator. - logdir: The directory where the graph and checkpoints are saved. - master: The URL of the master. - is_chief: Specifies whether or not the training is being run by the primary - replica during replica training. - scaffold: An tf.train.Scaffold instance. - hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the - training loop. - chief_only_hooks: List of `tf.train.SessionRunHook` instances which are run - inside the training loop for the chief trainer only. - save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. - save_summaries_steps: The frequency, in number of global steps, that the - summaries are written to disk using a default summary saver. If - `save_summaries_steps` is set to `None`, then the default summary saver - isn't used. - hparams: The hparams struct. - - Returns: - the value of the loss function after training. - - Raises: - ValueError: if `logdir` is `None` and either `save_checkpoint_secs` or - `save_summaries_steps` are `None. - """ - global_step = slim.get_or_create_global_step() - - scaffold = scaffold or tf.train.Scaffold() - - hooks = hooks or [] - - if is_chief: - session_creator = tf.train.ChiefSessionCreator( - scaffold=scaffold, checkpoint_dir=logdir, master=master) - - if chief_only_hooks: - hooks.extend(chief_only_hooks) - hooks.append(tf.train.StepCounterHook(output_dir=logdir)) - - if save_summaries_steps: - if logdir is None: - raise ValueError( - 'logdir cannot be None when save_summaries_steps is None') - hooks.append( - tf.train.SummarySaverHook( - scaffold=scaffold, - save_steps=save_summaries_steps, - output_dir=logdir)) - - if save_checkpoint_secs: - if logdir is None: - raise ValueError( - 'logdir cannot be None when save_checkpoint_secs is None') - hooks.append( - tf.train.CheckpointSaverHook( - logdir, save_secs=save_checkpoint_secs, scaffold=scaffold)) - else: - session_creator = tf.train.WorkerSessionCreator( - scaffold=scaffold, master=master) - - with tf.train.MonitoredSession( - session_creator=session_creator, hooks=hooks) as session: - loss = None - while not session.should_stop(): - # Run the domain classifier op X times. - for _ in range(hparams.discriminator_steps): - if session.should_stop(): - return loss - loss, np_global_step = session.run( - [discriminator_train_op, global_step]) - if np_global_step % FLAGS.print_loss_steps == 0: - tf.logging.info('Step %d: Discriminator Loss = %.2f', np_global_step, - loss) - - # Run the generator op X times. - for _ in range(hparams.generator_steps): - if session.should_stop(): - return loss - loss, np_global_step = session.run([generator_train_op, global_step]) - if np_global_step % FLAGS.print_loss_steps == 0: - tf.logging.info('Step %d: Generator Loss = %.2f', np_global_step, - loss) - return loss - - -def run_training(run_dir, checkpoint_dir, hparams): - """Runs the training loop. - - Args: - run_dir: The directory where training specific logs are placed - checkpoint_dir: The directory where the checkpoints and log files are - stored. - hparams: The hyperparameters struct. - - Raises: - ValueError: if hparams.arch is not recognized. - """ - for path in [run_dir, checkpoint_dir]: - if not tf.gfile.Exists(path): - tf.gfile.MakeDirs(path) - - # Serialize hparams to log dir - hparams_filename = os.path.join(checkpoint_dir, 'hparams.json') - with tf.gfile.FastGFile(hparams_filename, 'w') as f: - f.write(hparams.to_json()) - - with tf.Graph().as_default(): - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): - global_step = slim.get_or_create_global_step() - - ######################### - # Preprocess the inputs # - ######################### - target_dataset = dataset_factory.get_dataset( - FLAGS.target_dataset, - split_name='train', - dataset_dir=FLAGS.dataset_dir) - target_images, _ = dataset_factory.provide_batch( - FLAGS.target_dataset, 'train', FLAGS.dataset_dir, FLAGS.num_readers, - hparams.batch_size, FLAGS.num_preprocessing_threads) - num_target_classes = target_dataset.num_classes - - if hparams.arch not in ['dcgan']: - source_dataset = dataset_factory.get_dataset( - FLAGS.source_dataset, - split_name='train', - dataset_dir=FLAGS.dataset_dir) - num_source_classes = source_dataset.num_classes - source_images, source_labels = dataset_factory.provide_batch( - FLAGS.source_dataset, 'train', FLAGS.dataset_dir, FLAGS.num_readers, - hparams.batch_size, FLAGS.num_preprocessing_threads) - # Data provider provides 1 hot labels, but we expect categorical. - source_labels['class'] = tf.argmax(source_labels['classes'], 1) - del source_labels['classes'] - if num_source_classes != num_target_classes: - raise ValueError( - 'Source and Target datasets must have same number of classes. ' - 'Are %d and %d' % (num_source_classes, num_target_classes)) - else: - source_images = None - source_labels = None - - #################### - # Define the model # - #################### - end_points = pixelda_model.create_model( - hparams, - target_images, - source_images=source_images, - source_labels=source_labels, - is_training=True, - num_classes=num_target_classes) - - ################################# - # Get the variables to optimize # - ################################# - generator_vars, generator_update_ops = _get_vars_and_update_ops( - hparams, 'generator') - discriminator_vars, discriminator_update_ops = _get_vars_and_update_ops( - hparams, 'discriminator') - - ######################## - # Configure the losses # - ######################## - generator_loss = pixelda_losses.g_step_loss( - source_images, - source_labels, - end_points, - hparams, - num_classes=num_target_classes) - discriminator_loss = pixelda_losses.d_step_loss( - end_points, source_labels, num_target_classes, hparams) - - ########################### - # Create the training ops # - ########################### - learning_rate = hparams.learning_rate - if hparams.lr_decay_steps: - learning_rate = tf.train.exponential_decay( - learning_rate, - slim.get_or_create_global_step(), - decay_steps=hparams.lr_decay_steps, - decay_rate=hparams.lr_decay_rate, - staircase=True) - tf.summary.scalar('Learning_rate', learning_rate) - - - if hparams.discriminator_steps == 0: - discriminator_train_op = tf.no_op() - else: - discriminator_optimizer = tf.train.AdamOptimizer( - learning_rate, beta1=hparams.adam_beta1) - - discriminator_train_op = slim.learning.create_train_op( - discriminator_loss, - discriminator_optimizer, - update_ops=discriminator_update_ops, - variables_to_train=discriminator_vars, - clip_gradient_norm=hparams.clip_gradient_norm, - summarize_gradients=FLAGS.summarize_gradients) - - if hparams.generator_steps == 0: - generator_train_op = tf.no_op() - else: - generator_optimizer = tf.train.AdamOptimizer( - learning_rate, beta1=hparams.adam_beta1) - generator_train_op = slim.learning.create_train_op( - generator_loss, - generator_optimizer, - update_ops=generator_update_ops, - variables_to_train=generator_vars, - clip_gradient_norm=hparams.clip_gradient_norm, - summarize_gradients=FLAGS.summarize_gradients) - - ############# - # Summaries # - ############# - pixelda_utils.summarize_model(end_points) - pixelda_utils.summarize_transferred_grid( - end_points['transferred_images'], source_images, name='Transferred') - if 'source_images_recon' in end_points: - pixelda_utils.summarize_transferred_grid( - end_points['source_images_recon'], - source_images, - name='Source Reconstruction') - pixelda_utils.summaries_color_distributions(end_points['transferred_images'], - 'Transferred') - pixelda_utils.summaries_color_distributions(target_images, 'Target') - - if source_images is not None: - pixelda_utils.summarize_transferred(source_images, - end_points['transferred_images']) - pixelda_utils.summaries_color_distributions(source_images, 'Source') - pixelda_utils.summaries_color_distributions( - tf.abs(source_images - end_points['transferred_images']), - 'Abs(Source_minus_Transferred)') - - number_of_steps = None - if hparams.num_training_examples: - # Want to control by amount of data seen, not # steps - number_of_steps = hparams.num_training_examples / hparams.batch_size - - hooks = [tf.train.StepCounterHook(),] - - chief_only_hooks = [ - tf.train.CheckpointSaverHook( - saver=tf.train.Saver(), - checkpoint_dir=run_dir, - save_secs=FLAGS.save_interval_secs) - ] - - if number_of_steps: - hooks.append(tf.train.StopAtStepHook(last_step=number_of_steps)) - - _train( - discriminator_train_op, - generator_train_op, - logdir=run_dir, - master=FLAGS.master, - is_chief=FLAGS.task == 0, - hooks=hooks, - chief_only_hooks=chief_only_hooks, - save_checkpoint_secs=None, - save_summaries_steps=FLAGS.save_summaries_steps, - hparams=hparams) - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - hparams = create_hparams(FLAGS.hparams) - run_training( - run_dir=FLAGS.train_log_dir, - checkpoint_dir=FLAGS.train_log_dir, - hparams=hparams) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/domain_adaptation/pixel_domain_adaptation/pixelda_utils.py b/research/domain_adaptation/pixel_domain_adaptation/pixelda_utils.py deleted file mode 100644 index 28e8006f267f9bf7f13c3dff78625cc4cbd00185..0000000000000000000000000000000000000000 --- a/research/domain_adaptation/pixel_domain_adaptation/pixelda_utils.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Utilities for PixelDA model.""" -import math - -# Dependency imports - -import tensorflow as tf - -slim = tf.contrib.slim - -flags = tf.app.flags -FLAGS = flags.FLAGS - - -def remove_depth(images): - """Takes a batch of images and remove depth channel if present.""" - if images.shape.as_list()[-1] == 4: - return images[:, :, :, 0:3] - return images - - -def image_grid(images, max_grid_size=4): - """Given images and N, return first N^2 images as an NxN image grid. - - Args: - images: a `Tensor` of size [batch_size, height, width, channels] - max_grid_size: Maximum image grid height/width - - Returns: - Single image batch, of dim [1, h*n, w*n, c] - """ - images = remove_depth(images) - batch_size = images.shape.as_list()[0] - grid_size = min(int(math.sqrt(batch_size)), max_grid_size) - assert images.shape.as_list()[0] >= grid_size * grid_size - - # If we have a depth channel - if images.shape.as_list()[-1] == 4: - images = images[:grid_size * grid_size, :, :, 0:3] - depth = tf.image.grayscale_to_rgb(images[:grid_size * grid_size, :, :, 3:4]) - - images = tf.reshape(images, [-1, images.shape.as_list()[2], 3]) - split = tf.split(0, grid_size, images) - depth = tf.reshape(depth, [-1, images.shape.as_list()[2], 3]) - depth_split = tf.split(0, grid_size, depth) - grid = tf.concat(split + depth_split, 1) - return tf.expand_dims(grid, 0) - else: - images = images[:grid_size * grid_size, :, :, :] - images = tf.reshape( - images, [-1, images.shape.as_list()[2], - images.shape.as_list()[3]]) - split = tf.split(images, grid_size, 0) - grid = tf.concat(split, 1) - return tf.expand_dims(grid, 0) - - -def source_and_output_image_grid(output_images, - source_images=None, - max_grid_size=4): - """Create NxN image grid for output, concatenate source grid if given. - - Makes grid out of output_images and, if provided, source_images, and - concatenates them. - - Args: - output_images: [batch_size, h, w, c] tensor of images - source_images: optional[batch_size, h, w, c] tensor of images - max_grid_size: Image grid height/width - - Returns: - Single image batch, of dim [1, h*n, w*n, c] - - - """ - output_grid = image_grid(output_images, max_grid_size=max_grid_size) - if source_images is not None: - source_grid = image_grid(source_images, max_grid_size=max_grid_size) - # Make sure they have the same # of channels before concat - # Assumes either 1 or 3 channels - if output_grid.shape.as_list()[-1] != source_grid.shape.as_list()[-1]: - if output_grid.shape.as_list()[-1] == 1: - output_grid = tf.tile(output_grid, [1, 1, 1, 3]) - if source_grid.shape.as_list()[-1] == 1: - source_grid = tf.tile(source_grid, [1, 1, 1, 3]) - output_grid = tf.concat([output_grid, source_grid], 1) - return output_grid - - -def summarize_model(end_points): - """Summarizes the given model via its end_points. - - Args: - end_points: A dictionary of end_point names to `Tensor`. - """ - tf.summary.histogram('domain_logits_transferred', - tf.sigmoid(end_points['transferred_domain_logits'])) - - tf.summary.histogram('domain_logits_target', - tf.sigmoid(end_points['target_domain_logits'])) - - -def summarize_transferred_grid(transferred_images, - source_images=None, - name='Transferred'): - """Produces a visual grid summarization of the image transferrence. - - Args: - transferred_images: A `Tensor` of size [batch_size, height, width, c]. - source_images: A `Tensor` of size [batch_size, height, width, c]. - name: Name to use in summary name - """ - if source_images is not None: - grid = source_and_output_image_grid(transferred_images, source_images) - else: - grid = image_grid(transferred_images) - tf.summary.image('%s_Images_Grid' % name, grid, max_outputs=1) - - -def summarize_transferred(source_images, - transferred_images, - max_images=20, - name='Transferred'): - """Produces a visual summary of the image transferrence. - - This summary displays the source image, transferred image, and a grayscale - difference image which highlights the differences between input and output. - - Args: - source_images: A `Tensor` of size [batch_size, height, width, channels]. - transferred_images: A `Tensor` of size [batch_size, height, width, channels] - max_images: The number of images to show. - name: Name to use in summary name - - Raises: - ValueError: If number of channels in source and target are incompatible - """ - source_channels = source_images.shape.as_list()[-1] - transferred_channels = transferred_images.shape.as_list()[-1] - if source_channels < transferred_channels: - if source_channels != 1: - raise ValueError( - 'Source must be 1 channel or same # of channels as target') - source_images = tf.tile(source_images, [1, 1, 1, transferred_channels]) - if transferred_channels < source_channels: - if transferred_channels != 1: - raise ValueError( - 'Target must be 1 channel or same # of channels as source') - transferred_images = tf.tile(transferred_images, [1, 1, 1, source_channels]) - diffs = tf.abs(source_images - transferred_images) - diffs = tf.reduce_max(diffs, reduction_indices=[3], keep_dims=True) - diffs = tf.tile(diffs, [1, 1, 1, max(source_channels, transferred_channels)]) - - transition_images = tf.concat([ - source_images, - transferred_images, - diffs, - ], 2) - - tf.summary.image( - '%s_difference' % name, transition_images, max_outputs=max_images) - - -def summaries_color_distributions(images, name): - """Produces a histogram of the color distributions of the images. - - Args: - images: A `Tensor` of size [batch_size, height, width, 3]. - name: The name of the images being summarized. - """ - tf.summary.histogram('color_values/%s' % name, images) - - -def summarize_images(images, name): - """Produces a visual summary of the given images. - - Args: - images: A `Tensor` of size [batch_size, height, width, 3]. - name: The name of the images being summarized. - """ - grid = image_grid(images) - tf.summary.image('%s_Images' % name, grid, max_outputs=1) diff --git a/research/feelvos/CONTRIBUTING.md b/research/feelvos/CONTRIBUTING.md deleted file mode 100644 index 939e5341e74dc2371c8b47f0e27b50581bed5f63..0000000000000000000000000000000000000000 --- a/research/feelvos/CONTRIBUTING.md +++ /dev/null @@ -1,28 +0,0 @@ -# How to Contribute - -We'd love to accept your patches and contributions to this project. There are -just a few small guidelines you need to follow. - -## Contributor License Agreement - -Contributions to this project must be accompanied by a Contributor License -Agreement. You (or your employer) retain the copyright to your contribution; -this simply gives us permission to use and redistribute your contributions as -part of the project. Head over to to see -your current agreements on file or to sign a new one. - -You generally only need to submit a CLA once, so if you've already submitted one -(even if it was for a different project), you probably don't need to do it -again. - -## Code reviews - -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more -information on using pull requests. - -## Community Guidelines - -This project follows [Google's Open Source Community -Guidelines](https://opensource.google.com/conduct/). diff --git a/research/feelvos/LICENSE b/research/feelvos/LICENSE deleted file mode 100644 index d645695673349e3947e8e5ae42332d0ac3164cd7..0000000000000000000000000000000000000000 --- a/research/feelvos/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/research/feelvos/README.md b/research/feelvos/README.md deleted file mode 100644 index 69017c8b19fc1427c47cbdfbdce408ffa92ec32c..0000000000000000000000000000000000000000 --- a/research/feelvos/README.md +++ /dev/null @@ -1,102 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# FEELVOS: Fast End-to-End Embedding Learning for Video Object Segmentation - -FEELVOS is a fast model for video object segmentation which does not rely on fine-tuning on the -first frame. - -For details, please refer to our paper. If you find the code useful, please -also consider citing it. - -* FEELVOS: - -``` -@inproceedings{feelvos2019, - title={FEELVOS: Fast End-to-End Embedding Learning for Video Object Segmentation}, - author={Paul Voigtlaender and Yuning Chai and Florian Schroff and Hartwig Adam and Bastian Leibe and Liang-Chieh Chen}, - booktitle={CVPR}, - year={2019} -} -``` - -## Dependencies - -FEELVOS requires a good GPU with around 12 GB of memory and depends on the following libraries - -* TensorFlow -* Pillow -* Numpy -* Scipy -* Scikit Learn Image -* tf Slim (which is included in the "tensorflow/models/research/" checkout) -* DeepLab (which is included in the "tensorflow/models/research/" checkout) -* correlation_cost (optional, see below) - -For detailed steps to install Tensorflow, follow the [Tensorflow installation -instructions](https://www.tensorflow.org/install/). A typical user can install -Tensorflow using the following command: - -```bash -pip install tensorflow-gpu -``` - -The remaining libraries can also be installed with pip using: - -```bash -pip install pillow scipy scikit-image -``` - -## Dependency on correlation_cost - -For fast cross-correlation, we use correlation cost as an external dependency. By default FEELVOS -will use a slow and memory hungry fallback implementation without correlation_cost. If you care for -performance, you should set up correlation_cost by following the instructions in -correlation_cost/README and afterwards setting ```USE_CORRELATION_COST = True``` in -utils/embedding_utils.py. - -## Pre-trained Models - -We provide 2 pre-trained FEELVOS models, both are based on Xception-65: - -* [Trained on DAVIS 2017](http://download.tensorflow.org/models/feelvos_davis17_trained.tar.gz) -* [Trained on DAVIS 2017 and YouTube-VOS](http://download.tensorflow.org/models/feelvos_davis17_and_youtubevos_trained.tar.gz) - -Additionally, we provide a [DeepLab checkpoint for Xception-65 pre-trained on ImageNet and COCO](http://download.tensorflow.org/models/xception_65_coco_pretrained_2018_10_02.tar.gz), -which can be used as an initialization for training FEELVOS. - -## Pre-computed Segmentation Masks - -We provide [pre-computed segmentation masks](http://download.tensorflow.org/models/feelvos_precomputed_masks.zip) -for FEELVOS both for training with and without YouTube-VOS data for the following datasets: - -* DAVIS 2017 validation set -* DAVIS 2017 test-dev set -* YouTube-Objects dataset - -## Local Inference -For a demo of local inference on DAVIS 2017 run - -```bash -# From tensorflow/models/research/feelvos -sh eval.sh -``` - -## Local Training -For a demo of local training on DAVIS 2017 run - -```bash -# From tensorflow/models/research/feelvos -sh train.sh -``` - -## Contacts (Maintainers) -* Paul Voigtlaender, github: [pvoigtlaender](https://github.com/pvoigtlaender) -* Yuning Chai, github: [yuningchai](https://github.com/yuningchai) -* Liang-Chieh Chen, github: [aquariusjay](https://github.com/aquariusjay) - -## License - -All the codes in feelvos folder is covered by the [LICENSE](https://github.com/tensorflow/models/blob/master/LICENSE) -under tensorflow/models. Please refer to the LICENSE for details. diff --git a/research/feelvos/__init__.py b/research/feelvos/__init__.py deleted file mode 100644 index 6f1373443d0ff84fd90714e41dade400ab41a22c..0000000000000000000000000000000000000000 --- a/research/feelvos/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/research/feelvos/common.py b/research/feelvos/common.py deleted file mode 100644 index 98f5a9ce348aea36efa4b3cc57048d3659f18895..0000000000000000000000000000000000000000 --- a/research/feelvos/common.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides flags that are common to scripts. - -Common flags from train/vis_video.py are collected in this script. -""" -import tensorflow as tf - -from deeplab import common - -flags = tf.app.flags - -flags.DEFINE_enum( - 'classification_loss', 'softmax_with_attention', - ['softmax', 'triplet', 'softmax_with_attention'], - 'Type of loss function used for classifying pixels, can be either softmax, ' - 'softmax_with_attention, or triplet.') - -flags.DEFINE_integer('k_nearest_neighbors', 1, - 'The number of nearest neighbors to use.') - -flags.DEFINE_integer('embedding_dimension', 100, 'The dimension used for the ' - 'learned embedding') - -flags.DEFINE_boolean('use_softmax_feedback', True, - 'Whether to give the softmax predictions of the last ' - 'frame as additional input to the segmentation head.') - -flags.DEFINE_boolean('sample_adjacent_and_consistent_query_frames', True, - 'If true, the query frames (all but the first frame ' - 'which is the reference frame) will be sampled such ' - 'that they are adjacent video frames and have the same ' - 'crop coordinates and flip augmentation. Note that if ' - 'use_softmax_feedback is True, this option will ' - 'automatically be activated.') - -flags.DEFINE_integer('embedding_seg_feature_dimension', 256, - 'The dimensionality used in the segmentation head layers.') - -flags.DEFINE_integer('embedding_seg_n_layers', 4, 'The number of layers in the ' - 'segmentation head.') - -flags.DEFINE_integer('embedding_seg_kernel_size', 7, 'The kernel size used in ' - 'the segmentation head.') - -flags.DEFINE_multi_integer('embedding_seg_atrous_rates', [], - 'The atrous rates to use for the segmentation head.') - -flags.DEFINE_boolean('normalize_nearest_neighbor_distances', True, - 'Whether to normalize the nearest neighbor distances ' - 'to [0,1] using sigmoid, scale and shift.') - -flags.DEFINE_boolean('also_attend_to_previous_frame', True, 'Whether to also ' - 'use nearest neighbor attention with respect to the ' - 'previous frame.') - -flags.DEFINE_bool('use_local_previous_frame_attention', True, - 'Whether to restrict the previous frame attention to a local ' - 'search window. Only has an effect, if ' - 'also_attend_to_previous_frame is True.') - -flags.DEFINE_integer('previous_frame_attention_window_size', 15, - 'The window size used for local previous frame attention,' - ' if use_local_previous_frame_attention is True.') - -flags.DEFINE_boolean('use_first_frame_matching', True, 'Whether to extract ' - 'features by matching to the reference frame. This should ' - 'always be true except for ablation experiments.') - -FLAGS = flags.FLAGS - -# Constants - -# Perform semantic segmentation predictions. -OUTPUT_TYPE = common.OUTPUT_TYPE - -# Semantic segmentation item names. -LABELS_CLASS = common.LABELS_CLASS -IMAGE = common.IMAGE -HEIGHT = common.HEIGHT -WIDTH = common.WIDTH -IMAGE_NAME = common.IMAGE_NAME -SOURCE_ID = 'source_id' -VIDEO_ID = 'video_id' -LABEL = common.LABEL -ORIGINAL_IMAGE = common.ORIGINAL_IMAGE -PRECEDING_FRAME_LABEL = 'preceding_frame_label' - -# Test set name. -TEST_SET = common.TEST_SET - -# Internal constants. -OBJECT_LABEL = 'object_label' - - -class VideoModelOptions(common.ModelOptions): - """Internal version of immutable class to hold model options.""" - - def __new__(cls, - outputs_to_num_classes, - crop_size=None, - atrous_rates=None, - output_stride=8): - """Constructor to set default values. - - Args: - outputs_to_num_classes: A dictionary from output type to the number of - classes. For example, for the task of semantic segmentation with 21 - semantic classes, we would have outputs_to_num_classes['semantic'] = 21. - crop_size: A tuple [crop_height, crop_width]. - atrous_rates: A list of atrous convolution rates for ASPP. - output_stride: The ratio of input to output spatial resolution. - - Returns: - A new VideoModelOptions instance. - """ - self = super(VideoModelOptions, cls).__new__( - cls, - outputs_to_num_classes, - crop_size, - atrous_rates, - output_stride) - # Add internal flags. - self.classification_loss = FLAGS.classification_loss - - return self - - -def parse_decoder_output_stride(): - """Parses decoder output stride. - - FEELVOS assumes decoder_output_stride = 4. Thus, this function is created for - this particular purpose. - - Returns: - An integer specifying the decoder_output_stride. - - Raises: - ValueError: If decoder_output_stride is None or contains more than one - element. - """ - if FLAGS.decoder_output_stride: - decoder_output_stride = [ - int(x) for x in FLAGS.decoder_output_stride] - if len(decoder_output_stride) != 1: - raise ValueError('Expect decoder output stride has only one element.') - decoder_output_stride = decoder_output_stride[0] - else: - raise ValueError('Expect flag decoder output stride not to be None.') - return decoder_output_stride diff --git a/research/feelvos/correlation_cost/README.md b/research/feelvos/correlation_cost/README.md deleted file mode 100644 index 6cdbe550c7fcf63191f6967dd99c72cf341302bc..0000000000000000000000000000000000000000 --- a/research/feelvos/correlation_cost/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# correlation_cost - -FEELVOS uses correlation_cost as an optional dependency to improve the speed and memory consumption -of cross-correlation. - -## Installation - -Unfortunately we cannot provide the code for correlation_cost directly, so you -will have to copy some files from this pull request -https://github.com/tensorflow/tensorflow/pull/21392/. For your convenience we -prepared scripts to download and adjust the code automatically. - -In the best case, all you need to do is run compile.sh with the path to your -CUDA installation (tested only with CUDA 9). -Note that the path should be to a folder containing the cuda folder, not to the -cuda folder itself, e.g. if your cuda is in /usr/local/cuda-9.0, you can create -a symlink /usr/local/cuda pointing to /usr/local/cuda-9.0 and then run - -```bash -sh build.sh /usr/local/ -``` - -This will - -* Download the code via ```sh get_code.sh ``` -* Apply minor adjustments to the code via ```sh fix_code.sh``` -* Clone the dependencies cub and thrust from github via ```sh clone_dependencies.sh``` -* Compile a shared library correlation_cost.so for correlation_cost via -```sh compile.sh "${CUDA_DIR}"``` - -Please review the licenses of correlation_cost, cub, and thrust. - -## Enabling correlation_cost -If you managed to create the correlation_cost.so file, then set -```USE_CORRELATION_COST = True``` in feelvos/utils/embedding_utils.py and try to run -```sh eval.sh```. diff --git a/research/feelvos/correlation_cost/build.sh b/research/feelvos/correlation_cost/build.sh deleted file mode 100755 index 37d9adb3147df07646a462fd170772393abf5642..0000000000000000000000000000000000000000 --- a/research/feelvos/correlation_cost/build.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# This script is used to download and build the code for correlation_cost. -# -# Usage: -# sh ./build.sh cuda_dir -# Where cuda_dir points to a directory containing the cuda folder (not the cuda folder itself). -# -# - -if [ "$#" -ne 1 ]; then - echo "Illegal number of parameters, usage: ./build.sh cuda_dir" - echo "Where cuda_dir points to a directory containing the cuda folder (not the cuda folder itself)" - exit 1 -fi - -set -e -set -x - -sh ./get_code.sh -sh ./fix_code.sh -sh ./clone_dependencies.sh -sh ./compile.sh $1 diff --git a/research/feelvos/correlation_cost/clone_dependencies.sh b/research/feelvos/correlation_cost/clone_dependencies.sh deleted file mode 100755 index 9174313f58a833a5ab547e21c63cdc87681cbc5d..0000000000000000000000000000000000000000 --- a/research/feelvos/correlation_cost/clone_dependencies.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# This script is used to clone the dependencies, i.e. cub and thrust, of correlation_cost from github. -# -# Usage: -# sh ./clone_dependencies.sh -# -# - -# Clone cub. -if [ ! -d cub ] ; then - git clone https://github.com/dmlc/cub.git -fi -# Clone thrust. -if [ ! -d thrust ] ; then - git clone https://github.com/thrust/thrust.git -fi diff --git a/research/feelvos/correlation_cost/compile.sh b/research/feelvos/correlation_cost/compile.sh deleted file mode 100755 index 6025292dfa78b44dd6fcf2f1b349af936a43fcc7..0000000000000000000000000000000000000000 --- a/research/feelvos/correlation_cost/compile.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# This script is used to compile the code for correlation_cost and create correlation_cost.so. -# -# Usage: -# sh ./compile.sh cuda_dir -# Where cuda_dir points to a directory containing the cuda folder (not the cuda folder itself). -# -# - -if [ "$#" -ne 1 ]; then - echo "Illegal number of parameters, usage: ./compile.sh cuda_dir" - exit 1 -fi -CUDA_DIR=$1 - -if [ ! -d "${CUDA_DIR}/cuda" ]; then - echo "cuda_dir must point to a directory containing the cuda folder, not to the cuda folder itself" - exit 1 -fi - -TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) -TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') ) -CUB_DIR=cub -THRUST_DIR=thrust - -# Depending on the versions of your nvcc and gcc, the flag --expt-relaxed-constexpr might be required or should be removed. -# If nvcc complains about a too new gcc version, you can point it to another gcc -# version by using something like nvcc -ccbin /path/to/your/gcc6 -nvcc -std=c++11 --expt-relaxed-constexpr -I ./ -I ${CUB_DIR}/../ -I ${THRUST_DIR} -I ${CUDA_DIR}/ -c -o correlation_cost_op_gpu.o kernels/correlation_cost_op_gpu.cu.cc ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC - -g++ -std=c++11 -I ./ -L ${CUDA_DIR}/cuda/lib64 -shared -o correlation_cost.so ops/correlation_cost_op.cc kernels/correlation_cost_op.cc correlation_cost_op_gpu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]} -D GOOGLE_CUDA=1 diff --git a/research/feelvos/correlation_cost/fix_code.sh b/research/feelvos/correlation_cost/fix_code.sh deleted file mode 100755 index d4f285db3d745fc55a20bac57f97c6ca2fd8a5c4..0000000000000000000000000000000000000000 --- a/research/feelvos/correlation_cost/fix_code.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# This script is used to modify the downloaded code. -# -# Usage: -# sh ./fix_code.sh -# -# - -sed -i "s/tensorflow\/contrib\/correlation_cost\///g" kernels/correlation_cost_op_gpu.cu.cc -sed -i "s/tensorflow\/contrib\/correlation_cost\///g" kernels/correlation_cost_op.cc -sed -i "s/external\/cub_archive\//cub\//g" kernels/correlation_cost_op_gpu.cu.cc - -sed -i "s/from tensorflow.contrib.util import loader/import tensorflow as tf/g" python/ops/correlation_cost_op.py -grep -v "from tensorflow" python/ops/correlation_cost_op.py | grep -v resource_loader.get_path_to_datafile > correlation_cost_op.py.tmp && mv correlation_cost_op.py.tmp python/ops/correlation_cost_op.py -sed -i "s/array_ops/tf/g" python/ops/correlation_cost_op.py -sed -i "s/ops/tf/g" python/ops/correlation_cost_op.py -sed -i "s/loader.load_op_library(/tf.load_op_library('feelvos\/correlation_cost\/correlation_cost.so')/g" python/ops/correlation_cost_op.py -sed -i "s/gen_correlation_cost_op/_correlation_cost_op_so/g" python/ops/correlation_cost_op.py diff --git a/research/feelvos/correlation_cost/get_code.sh b/research/feelvos/correlation_cost/get_code.sh deleted file mode 100755 index 337142166ac4b61835417e807ef0a495532d749c..0000000000000000000000000000000000000000 --- a/research/feelvos/correlation_cost/get_code.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# This script is used to download the code for correlation_cost. -# -# Usage: -# sh ./get_code.sh -# -# - -mkdir -p kernels ops python/ops -touch __init__.py -touch python/__init__.py -touch python/ops/__init__.py -wget https://raw.githubusercontent.com/tensorflow/tensorflow/91b163b9bd8dd0f8c2631b4245a67dfd387536a6/tensorflow/contrib/correlation_cost/ops/correlation_cost_op.cc -O ops/correlation_cost_op.cc -wget https://raw.githubusercontent.com/tensorflow/tensorflow/91b163b9bd8dd0f8c2631b4245a67dfd387536a6/tensorflow/contrib/correlation_cost/python/ops/correlation_cost_op.py -O python/ops/correlation_cost_op.py -wget https://raw.githubusercontent.com/tensorflow/tensorflow/91b163b9bd8dd0f8c2631b4245a67dfd387536a6/tensorflow/contrib/correlation_cost/kernels/correlation_cost_op.cc -O kernels/correlation_cost_op.cc -wget https://raw.githubusercontent.com/tensorflow/tensorflow/91b163b9bd8dd0f8c2631b4245a67dfd387536a6/tensorflow/contrib/correlation_cost/kernels/correlation_cost_op.h -O kernels/correlation_cost_op.h -wget https://raw.githubusercontent.com/tensorflow/tensorflow/91b163b9bd8dd0f8c2631b4245a67dfd387536a6/tensorflow/contrib/correlation_cost/kernels/correlation_cost_op_gpu.cu.cc -O kernels/correlation_cost_op_gpu.cu.cc diff --git a/research/feelvos/datasets/__init__.py b/research/feelvos/datasets/__init__.py deleted file mode 100644 index 6f1373443d0ff84fd90714e41dade400ab41a22c..0000000000000000000000000000000000000000 --- a/research/feelvos/datasets/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/research/feelvos/datasets/build_davis2017_data.py b/research/feelvos/datasets/build_davis2017_data.py deleted file mode 100644 index 5e093fc3b4531f5439957ea3608770441bd5ce4a..0000000000000000000000000000000000000000 --- a/research/feelvos/datasets/build_davis2017_data.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Converts DAVIS 2017 data to TFRecord file format with SequenceExample protos. -""" - -import io -import math -import os -from StringIO import StringIO -import numpy as np -import PIL -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_string('data_folder', 'DAVIS2017/', - 'Folder containing the DAVIS 2017 data') - -tf.app.flags.DEFINE_string('imageset', 'val', - 'Which subset to use, either train or val') - -tf.app.flags.DEFINE_string( - 'output_dir', './tfrecord', - 'Path to save converted TFRecords of TensorFlow examples.') - -_NUM_SHARDS_TRAIN = 10 -_NUM_SHARDS_VAL = 1 - - -def read_image(path): - with open(path) as fid: - image_str = fid.read() - image = PIL.Image.open(io.BytesIO(image_str)) - w, h = image.size - return image_str, (h, w) - - -def read_annotation(path): - """Reads a single image annotation from a png image. - - Args: - path: Path to the png image. - - Returns: - png_string: The png encoded as string. - size: Tuple of (height, width). - """ - with open(path) as fid: - x = np.array(PIL.Image.open(fid)) - h, w = x.shape - im = PIL.Image.fromarray(x) - - output = StringIO() - im.save(output, format='png') - png_string = output.getvalue() - output.close() - - return png_string, (h, w) - - -def process_video(key, input_dir, anno_dir): - """Creates a SequenceExample for the video. - - Args: - key: Name of the video. - input_dir: Directory which contains the image files. - anno_dir: Directory which contains the annotation files. - - Returns: - The created SequenceExample. - """ - frame_names = sorted(tf.gfile.ListDirectory(input_dir)) - anno_files = sorted(tf.gfile.ListDirectory(anno_dir)) - assert len(frame_names) == len(anno_files) - - sequence = tf.train.SequenceExample() - context = sequence.context.feature - features = sequence.feature_lists.feature_list - - for i, name in enumerate(frame_names): - image_str, image_shape = read_image( - os.path.join(input_dir, name)) - anno_str, anno_shape = read_annotation( - os.path.join(anno_dir, name[:-4] + '.png')) - image_encoded = features['image/encoded'].feature.add() - image_encoded.bytes_list.value.append(image_str) - segmentation_encoded = features['segmentation/object/encoded'].feature.add() - segmentation_encoded.bytes_list.value.append(anno_str) - - np.testing.assert_array_equal(np.array(image_shape), np.array(anno_shape)) - - if i == 0: - first_shape = np.array(image_shape) - else: - np.testing.assert_array_equal(np.array(image_shape), first_shape) - - context['video_id'].bytes_list.value.append(key.encode('ascii')) - context['clip/frames'].int64_list.value.append(len(frame_names)) - context['image/format'].bytes_list.value.append('JPEG') - context['image/channels'].int64_list.value.append(3) - context['image/height'].int64_list.value.append(first_shape[0]) - context['image/width'].int64_list.value.append(first_shape[1]) - context['segmentation/object/format'].bytes_list.value.append('PNG') - context['segmentation/object/height'].int64_list.value.append(first_shape[0]) - context['segmentation/object/width'].int64_list.value.append(first_shape[1]) - - return sequence - - -def convert(data_folder, imageset, output_dir, num_shards): - """Converts the specified subset of DAVIS 2017 to TFRecord format. - - Args: - data_folder: The path to the DAVIS 2017 data. - imageset: The subset to use, either train or val. - output_dir: Where to store the TFRecords. - num_shards: The number of shards used for storing the data. - """ - sets_file = os.path.join(data_folder, 'ImageSets', '2017', imageset + '.txt') - vids = [x.strip() for x in open(sets_file).readlines()] - num_vids = len(vids) - num_vids_per_shard = int(math.ceil(num_vids) / float(num_shards)) - for shard_id in range(num_shards): - output_filename = os.path.join( - output_dir, - '%s-%05d-of-%05d.tfrecord' % (imageset, shard_id, num_shards)) - with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: - start_idx = shard_id * num_vids_per_shard - end_idx = min((shard_id + 1) * num_vids_per_shard, num_vids) - for i in range(start_idx, end_idx): - print('Converting video %d/%d shard %d video %s' % ( - i + 1, num_vids, shard_id, vids[i])) - img_dir = os.path.join(data_folder, 'JPEGImages', '480p', vids[i]) - anno_dir = os.path.join(data_folder, 'Annotations', '480p', vids[i]) - example = process_video(vids[i], img_dir, anno_dir) - tfrecord_writer.write(example.SerializeToString()) - - -def main(unused_argv): - imageset = FLAGS.imageset - assert imageset in ('train', 'val') - if imageset == 'train': - num_shards = _NUM_SHARDS_TRAIN - else: - num_shards = _NUM_SHARDS_VAL - convert(FLAGS.data_folder, FLAGS.imageset, FLAGS.output_dir, num_shards) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/feelvos/datasets/download_and_convert_davis17.sh b/research/feelvos/datasets/download_and_convert_davis17.sh deleted file mode 100644 index 011be61ba7586c8f3d141ccc00194d1c7ae56c3a..0000000000000000000000000000000000000000 --- a/research/feelvos/datasets/download_and_convert_davis17.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Script to download and preprocess the DAVIS 2017 dataset. -# -# Usage: -# bash ./download_and_convert_davis17.sh - -# Exit immediately if a command exits with a non-zero status. -set -e - -CURRENT_DIR=$(pwd) -WORK_DIR="./davis17" -mkdir -p "${WORK_DIR}" -cd "${WORK_DIR}" - -# Helper function to download and unpack the DAVIS 2017 dataset. -download_and_uncompress() { - local BASE_URL=${1} - local FILENAME=${2} - - if [ ! -f "${FILENAME}" ]; then - echo "Downloading ${FILENAME} to ${WORK_DIR}" - wget -nd -c "${BASE_URL}/${FILENAME}" - echo "Uncompressing ${FILENAME}" - unzip "${FILENAME}" - fi -} - -BASE_URL="https://data.vision.ee.ethz.ch/csergi/share/davis/" -FILENAME="DAVIS-2017-trainval-480p.zip" - -download_and_uncompress "${BASE_URL}" "${FILENAME}" - -cd "${CURRENT_DIR}" - -# Root path for DAVIS 2017 dataset. -DAVIS_ROOT="${WORK_DIR}/DAVIS" - -# Build TFRecords of the dataset. -# First, create output directory for storing TFRecords. -OUTPUT_DIR="${WORK_DIR}/tfrecord" -mkdir -p "${OUTPUT_DIR}" - -IMAGE_FOLDER="${DAVIS_ROOT}/JPEGImages" -LIST_FOLDER="${DAVIS_ROOT}/ImageSets/Segmentation" - -# Convert validation set. -if [ ! -f "${OUTPUT_DIR}/val-00000-of-00001.tfrecord" ]; then - echo "Converting DAVIS 2017 dataset (val)..." - python ./build_davis2017_data.py \ - --data_folder="${DAVIS_ROOT}" \ - --imageset=val \ - --output_dir="${OUTPUT_DIR}" -fi - -# Convert training set. -if [ ! -f "${OUTPUT_DIR}/train-00009-of-00010.tfrecord" ]; then - echo "Converting DAVIS 2017 dataset (train)..." - python ./build_davis2017_data.py \ - --data_folder="${DAVIS_ROOT}" \ - --imageset=train \ - --output_dir="${OUTPUT_DIR}" -fi diff --git a/research/feelvos/datasets/tfsequence_example_decoder.py b/research/feelvos/datasets/tfsequence_example_decoder.py deleted file mode 100644 index 2fa3e95d5b98eb00aa485371037b4ad6b0e7ece3..0000000000000000000000000000000000000000 --- a/research/feelvos/datasets/tfsequence_example_decoder.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains the TFExampleDecoder. - -The TFExampleDecode is a DataDecoder used to decode TensorFlow Example protos. -In order to do so each requested item must be paired with one or more Example -features that are parsed to produce the Tensor-based manifestation of the item. -""" - -import tensorflow as tf -slim = tf.contrib.slim -data_decoder = slim.data_decoder - - -class TFSequenceExampleDecoder(data_decoder.DataDecoder): - """A decoder for TensorFlow SequenceExamples. - - Decoding SequenceExample proto buffers is comprised of two stages: - (1) Example parsing and (2) tensor manipulation. - - In the first stage, the tf.parse_single_sequence_example function is called - with a list of FixedLenFeatures and SparseLenFeatures. These instances tell TF - how to parse the example. The output of this stage is a set of tensors. - - In the second stage, the resulting tensors are manipulated to provide the - requested 'item' tensors. - - To perform this decoding operation, a SequenceExampleDecoder is given a list - of ItemHandlers. Each ItemHandler indicates the set of features for stage 1 - and contains the instructions for post_processing its tensors for stage 2. - """ - - def __init__(self, keys_to_context_features, keys_to_sequence_features, - items_to_handlers): - """Constructs the decoder. - - Args: - keys_to_context_features: a dictionary from TF-SequenceExample context - keys to either tf.VarLenFeature or tf.FixedLenFeature instances. - See tensorflow's parsing_ops.py. - keys_to_sequence_features: a dictionary from TF-SequenceExample sequence - keys to either tf.VarLenFeature or tf.FixedLenSequenceFeature instances. - See tensorflow's parsing_ops.py. - items_to_handlers: a dictionary from items (strings) to ItemHandler - instances. Note that the ItemHandler's are provided the keys that they - use to return the final item Tensors. - - Raises: - ValueError: if the same key is present for context features and sequence - features. - """ - unique_keys = set() - unique_keys.update(keys_to_context_features) - unique_keys.update(keys_to_sequence_features) - if len(unique_keys) != ( - len(keys_to_context_features) + len(keys_to_sequence_features)): - # This situation is ambiguous in the decoder's keys_to_tensors variable. - raise ValueError('Context and sequence keys are not unique. \n' - ' Context keys: %s \n Sequence keys: %s' % - (list(keys_to_context_features.keys()), - list(keys_to_sequence_features.keys()))) - - self._keys_to_context_features = keys_to_context_features - self._keys_to_sequence_features = keys_to_sequence_features - self._items_to_handlers = items_to_handlers - - def list_items(self): - """See base class.""" - return self._items_to_handlers.keys() - - def decode(self, serialized_example, items=None): - """Decodes the given serialized TF-SequenceExample. - - Args: - serialized_example: a serialized TF-SequenceExample tensor. - items: the list of items to decode. These must be a subset of the item - keys in self._items_to_handlers. If `items` is left as None, then all - of the items in self._items_to_handlers are decoded. - - Returns: - the decoded items, a list of tensor. - """ - - context, feature_list = tf.parse_single_sequence_example( - serialized_example, self._keys_to_context_features, - self._keys_to_sequence_features) - - # Reshape non-sparse elements just once: - for k in self._keys_to_context_features: - v = self._keys_to_context_features[k] - if isinstance(v, tf.FixedLenFeature): - context[k] = tf.reshape(context[k], v.shape) - - if not items: - items = self._items_to_handlers.keys() - - outputs = [] - for item in items: - handler = self._items_to_handlers[item] - keys_to_tensors = { - key: context[key] if key in context else feature_list[key] - for key in handler.keys - } - outputs.append(handler.tensors_to_item(keys_to_tensors)) - return outputs diff --git a/research/feelvos/datasets/video_dataset.py b/research/feelvos/datasets/video_dataset.py deleted file mode 100644 index 17b62e989af866df0232a0e6d921faee84fe1fa7..0000000000000000000000000000000000000000 --- a/research/feelvos/datasets/video_dataset.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides data from video object segmentation datasets. - -This file provides both images and annotations (instance segmentations) for -TensorFlow. Currently, we support the following datasets: - -1. DAVIS 2017 (https://davischallenge.org/davis2017/code.html). - -2. DAVIS 2016 (https://davischallenge.org/davis2016/code.html). - -3. YouTube-VOS (https://youtube-vos.org/dataset/download). -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import os.path -import tensorflow as tf -from feelvos.datasets import tfsequence_example_decoder - -slim = tf.contrib.slim -dataset = slim.dataset -tfexample_decoder = slim.tfexample_decoder - - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'A color image of varying height and width.', - 'labels_class': ('A semantic segmentation label whose size matches image.' - 'Its values range from 0 (background) to num_classes.'), -} - -# Named tuple to describe the dataset properties. -DatasetDescriptor = collections.namedtuple( - 'DatasetDescriptor', - ['splits_to_sizes', # Splits of the dataset into training, val, and test. - 'num_classes', # Number of semantic classes. - 'ignore_label', # Ignore label value. - ] -) - -_DAVIS_2016_INFORMATION = DatasetDescriptor( - splits_to_sizes={'train': [30, 1830], - 'val': [20, 1376]}, - num_classes=2, - ignore_label=255, -) - -_DAVIS_2017_INFORMATION = DatasetDescriptor( - splits_to_sizes={'train': [60, 4219], - 'val': [30, 2023], - 'test-dev': [30, 2037]}, - num_classes=None, # Number of instances per videos differ. - ignore_label=255, -) - -_YOUTUBE_VOS_2018_INFORMATION = DatasetDescriptor( - # Leave these sizes as None to allow for different splits into - # training and validation sets. - splits_to_sizes={'train': [None, None], - 'val': [None, None]}, - num_classes=None, # Number of instances per video differs. - ignore_label=255, -) - -_DATASETS_INFORMATION = { - 'davis_2016': _DAVIS_2016_INFORMATION, - 'davis_2017': _DAVIS_2017_INFORMATION, - 'youtube_vos_2018': _YOUTUBE_VOS_2018_INFORMATION, -} - -# Default file pattern of SSTable. Note we include '-' to avoid the confusion -# between `train-` and `trainval-` sets. -_FILE_PATTERN = '%s-*' - - -def get_dataset(dataset_name, - split_name, - dataset_dir, - file_pattern=None, - data_type='tf_sequence_example', - decode_video_frames=False): - """Gets an instance of slim Dataset. - - Args: - dataset_name: String, dataset name. - split_name: String, the train/val Split name. - dataset_dir: String, the directory of the dataset sources. - file_pattern: String, file pattern of SSTable. - data_type: String, data type. Currently supports 'tf_example' and - 'annotated_image'. - decode_video_frames: Boolean, decode the images or not. Not decoding it here - is useful if we subsample later - - Returns: - An instance of slim Dataset. - - Raises: - ValueError: If the dataset_name or split_name is not recognized, or if - the dataset_type is not supported. - """ - if dataset_name not in _DATASETS_INFORMATION: - raise ValueError('The specified dataset is not supported yet.') - - splits_to_sizes = _DATASETS_INFORMATION[dataset_name].splits_to_sizes - - if split_name not in splits_to_sizes: - raise ValueError('data split name %s not recognized' % split_name) - - # Prepare the variables for different datasets. - num_classes = _DATASETS_INFORMATION[dataset_name].num_classes - ignore_label = _DATASETS_INFORMATION[dataset_name].ignore_label - - if file_pattern is None: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - if data_type == 'tf_sequence_example': - keys_to_context_features = { - 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), - 'image/height': tf.FixedLenFeature((), tf.int64, default_value=0), - 'image/width': tf.FixedLenFeature((), tf.int64, default_value=0), - 'segmentation/object/format': tf.FixedLenFeature( - (), tf.string, default_value='png'), - 'video_id': tf.FixedLenFeature((), tf.string, default_value='unknown') - } - label_name = 'class' if dataset_name == 'davis_2016' else 'object' - keys_to_sequence_features = { - 'image/encoded': tf.FixedLenSequenceFeature((), dtype=tf.string), - 'segmentation/{}/encoded'.format(label_name): - tf.FixedLenSequenceFeature((), tf.string), - 'segmentation/{}/encoded'.format(label_name): - tf.FixedLenSequenceFeature((), tf.string), - } - items_to_handlers = { - 'height': tfexample_decoder.Tensor('image/height'), - 'width': tfexample_decoder.Tensor('image/width'), - 'video_id': tfexample_decoder.Tensor('video_id') - } - if decode_video_frames: - decode_image_handler = tfexample_decoder.Image( - image_key='image/encoded', - format_key='image/format', - channels=3, - repeated=True) - items_to_handlers['image'] = decode_image_handler - decode_label_handler = tfexample_decoder.Image( - image_key='segmentation/{}/encoded'.format(label_name), - format_key='segmentation/{}/format'.format(label_name), - channels=1, - repeated=True) - items_to_handlers['labels_class'] = decode_label_handler - else: - items_to_handlers['image/encoded'] = tfexample_decoder.Tensor( - 'image/encoded') - items_to_handlers[ - 'segmentation/object/encoded'] = tfexample_decoder.Tensor( - 'segmentation/{}/encoded'.format(label_name)) - decoder = tfsequence_example_decoder.TFSequenceExampleDecoder( - keys_to_context_features, keys_to_sequence_features, items_to_handlers) - else: - raise ValueError('Unknown data type.') - - size = splits_to_sizes[split_name] - if isinstance(size, collections.Sequence): - num_videos = size[0] - num_samples = size[1] - else: - num_videos = 0 - num_samples = size - - return dataset.Dataset( - data_sources=file_pattern, - reader=tf.TFRecordReader, - decoder=decoder, - num_samples=num_samples, - num_videos=num_videos, - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - ignore_label=ignore_label, - num_classes=num_classes, - name=dataset_name, - multi_label=True) diff --git a/research/feelvos/eval.sh b/research/feelvos/eval.sh deleted file mode 100755 index 96cb7f409a1e652ba8263f35c3786cb0cb77f5d1..0000000000000000000000000000000000000000 --- a/research/feelvos/eval.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# This script is used to locally run inference on DAVIS 2017. Users could also -# modify from this script for their use case. See train.sh for an example of -# local training. -# -# Usage: -# # From the tensorflow/models/research/feelvos directory. -# sh ./eval.sh -# -# - -# Exit immediately if a command exits with a non-zero status. -set -e - -# Move one-level up to tensorflow/models/research directory. -cd .. - -# Update PYTHONPATH. -export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim:`pwd`/feelvos - -# Set up the working environment. -CURRENT_DIR=$(pwd) -WORK_DIR="${CURRENT_DIR}/feelvos" - -# Run embedding_utils_test first to make sure the PYTHONPATH is correctly set. -python "${WORK_DIR}"/utils/embedding_utils_test.py -v - -# Go to datasets folder and download and convert the DAVIS 2017 dataset. -DATASET_DIR="datasets" -cd "${WORK_DIR}/${DATASET_DIR}" -sh download_and_convert_davis17.sh - -# Go to models folder and download and unpack the DAVIS 2017 trained model. -MODELS_DIR="models" -mkdir -p "${WORK_DIR}/${MODELS_DIR}" -cd "${WORK_DIR}/${MODELS_DIR}" -if [ ! -d "feelvos_davis17_trained" ]; then - wget http://download.tensorflow.org/models/feelvos_davis17_trained.tar.gz - tar -xvf feelvos_davis17_trained.tar.gz - echo "model_checkpoint_path: \"model.ckpt-200004\"" > feelvos_davis17_trained/checkpoint - rm feelvos_davis17_trained.tar.gz -fi -CHECKPOINT_DIR="${WORK_DIR}/${MODELS_DIR}/feelvos_davis17_trained/" - -# Go back to orignal directory. -cd "${CURRENT_DIR}" - -# Set up the working directories. -DAVIS_FOLDER="davis17" -EXP_FOLDER="exp/eval_on_val_set" -VIS_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${DAVIS_FOLDER}/${EXP_FOLDER}/eval" -mkdir -p ${VIS_LOGDIR} - -DAVIS_DATASET="${WORK_DIR}/${DATASET_DIR}/${DAVIS_FOLDER}/tfrecord" - -python "${WORK_DIR}"/vis_video.py \ - --dataset=davis_2017 \ - --dataset_dir="${DAVIS_DATASET}" \ - --vis_logdir="${VIS_LOGDIR}" \ - --checkpoint_dir="${CHECKPOINT_DIR}" \ - --logtostderr \ - --atrous_rates=12 \ - --atrous_rates=24 \ - --atrous_rates=36 \ - --decoder_output_stride=4 \ - --model_variant=xception_65 \ - --multi_grid=1 \ - --multi_grid=1 \ - --multi_grid=1 \ - --output_stride=8 \ - --save_segmentations diff --git a/research/feelvos/input_preprocess.py b/research/feelvos/input_preprocess.py deleted file mode 100644 index 954c0b42ef2650b1c25ec8071933beee57e9bd69..0000000000000000000000000000000000000000 --- a/research/feelvos/input_preprocess.py +++ /dev/null @@ -1,280 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Prepare the data used for FEELVOS training/evaluation.""" -import tensorflow as tf - -from deeplab.core import feature_extractor -from deeplab.core import preprocess_utils - -# The probability of flipping the images and labels -# left-right during training -_PROB_OF_FLIP = 0.5 - -get_random_scale = preprocess_utils.get_random_scale -randomly_scale_image_and_label = ( - preprocess_utils.randomly_scale_image_and_label) - - -def preprocess_image_and_label(image, - label, - crop_height, - crop_width, - min_resize_value=None, - max_resize_value=None, - resize_factor=None, - min_scale_factor=1., - max_scale_factor=1., - scale_factor_step_size=0, - ignore_label=255, - is_training=True, - model_variant=None): - """Preprocesses the image and label. - - Args: - image: Input image. - label: Ground truth annotation label. - crop_height: The height value used to crop the image and label. - crop_width: The width value used to crop the image and label. - min_resize_value: Desired size of the smaller image side. - max_resize_value: Maximum allowed size of the larger image side. - resize_factor: Resized dimensions are multiple of factor plus one. - min_scale_factor: Minimum scale factor value. - max_scale_factor: Maximum scale factor value. - scale_factor_step_size: The step size from min scale factor to max scale - factor. The input is randomly scaled based on the value of - (min_scale_factor, max_scale_factor, scale_factor_step_size). - ignore_label: The label value which will be ignored for training and - evaluation. - is_training: If the preprocessing is used for training or not. - model_variant: Model variant (string) for choosing how to mean-subtract the - images. See feature_extractor.network_map for supported model variants. - - Returns: - original_image: Original image (could be resized). - processed_image: Preprocessed image. - label: Preprocessed ground truth segmentation label. - - Raises: - ValueError: Ground truth label not provided during training. - """ - if is_training and label is None: - raise ValueError('During training, label must be provided.') - if model_variant is None: - tf.logging.warning('Default mean-subtraction is performed. Please specify ' - 'a model_variant. See feature_extractor.network_map for ' - 'supported model variants.') - - # Keep reference to original image. - original_image = image - - processed_image = tf.cast(image, tf.float32) - - if label is not None: - label = tf.cast(label, tf.int32) - - # Resize image and label to the desired range. - if min_resize_value is not None or max_resize_value is not None: - [processed_image, label] = ( - preprocess_utils.resize_to_range( - image=processed_image, - label=label, - min_size=min_resize_value, - max_size=max_resize_value, - factor=resize_factor, - align_corners=True)) - # The `original_image` becomes the resized image. - original_image = tf.identity(processed_image) - - # Data augmentation by randomly scaling the inputs. - scale = get_random_scale( - min_scale_factor, max_scale_factor, scale_factor_step_size) - processed_image, label = randomly_scale_image_and_label( - processed_image, label, scale) - - processed_image.set_shape([None, None, 3]) - - if crop_height is not None and crop_width is not None: - # Pad image and label to have dimensions >= [crop_height, crop_width]. - image_shape = tf.shape(processed_image) - image_height = image_shape[0] - image_width = image_shape[1] - - target_height = image_height + tf.maximum(crop_height - image_height, 0) - target_width = image_width + tf.maximum(crop_width - image_width, 0) - - # Pad image with mean pixel value. - mean_pixel = tf.reshape( - feature_extractor.mean_pixel(model_variant), [1, 1, 3]) - processed_image = preprocess_utils.pad_to_bounding_box( - processed_image, 0, 0, target_height, target_width, mean_pixel) - - if label is not None: - label = preprocess_utils.pad_to_bounding_box( - label, 0, 0, target_height, target_width, ignore_label) - - # Randomly crop the image and label. - if is_training and label is not None: - processed_image, label = preprocess_utils.random_crop( - [processed_image, label], crop_height, crop_width) - - processed_image.set_shape([crop_height, crop_width, 3]) - - if label is not None: - label.set_shape([crop_height, crop_width, 1]) - - if is_training: - # Randomly left-right flip the image and label. - processed_image, label, _ = preprocess_utils.flip_dim( - [processed_image, label], _PROB_OF_FLIP, dim=1) - - return original_image, processed_image, label - - -def preprocess_images_and_labels_consistently(images, - labels, - crop_height, - crop_width, - min_resize_value=None, - max_resize_value=None, - resize_factor=None, - min_scale_factor=1., - max_scale_factor=1., - scale_factor_step_size=0, - ignore_label=255, - is_training=True, - model_variant=None): - """Preprocesses images and labels in a consistent way. - - Similar to preprocess_image_and_label, but works on a list of images - and a list of labels and uses the same crop coordinates and either flips - all images and labels or none of them. - - Args: - images: List of input images. - labels: List of ground truth annotation labels. - crop_height: The height value used to crop the image and label. - crop_width: The width value used to crop the image and label. - min_resize_value: Desired size of the smaller image side. - max_resize_value: Maximum allowed size of the larger image side. - resize_factor: Resized dimensions are multiple of factor plus one. - min_scale_factor: Minimum scale factor value. - max_scale_factor: Maximum scale factor value. - scale_factor_step_size: The step size from min scale factor to max scale - factor. The input is randomly scaled based on the value of - (min_scale_factor, max_scale_factor, scale_factor_step_size). - ignore_label: The label value which will be ignored for training and - evaluation. - is_training: If the preprocessing is used for training or not. - model_variant: Model variant (string) for choosing how to mean-subtract the - images. See feature_extractor.network_map for supported model variants. - - Returns: - original_images: Original images (could be resized). - processed_images: Preprocessed images. - labels: Preprocessed ground truth segmentation labels. - - Raises: - ValueError: Ground truth label not provided during training. - """ - if is_training and labels is None: - raise ValueError('During training, labels must be provided.') - if model_variant is None: - tf.logging.warning('Default mean-subtraction is performed. Please specify ' - 'a model_variant. See feature_extractor.network_map for ' - 'supported model variants.') - if labels is not None: - assert len(images) == len(labels) - num_imgs = len(images) - - # Keep reference to original images. - original_images = images - - processed_images = [tf.cast(image, tf.float32) for image in images] - - if labels is not None: - labels = [tf.cast(label, tf.int32) for label in labels] - - # Resize images and labels to the desired range. - if min_resize_value is not None or max_resize_value is not None: - processed_images, labels = zip(*[ - preprocess_utils.resize_to_range( - image=processed_image, - label=label, - min_size=min_resize_value, - max_size=max_resize_value, - factor=resize_factor, - align_corners=True) for processed_image, label - in zip(processed_images, labels)]) - # The `original_images` becomes the resized images. - original_images = [tf.identity(processed_image) - for processed_image in processed_images] - - # Data augmentation by randomly scaling the inputs. - scale = get_random_scale( - min_scale_factor, max_scale_factor, scale_factor_step_size) - processed_images, labels = zip( - *[randomly_scale_image_and_label(processed_image, label, scale) - for processed_image, label in zip(processed_images, labels)]) - - for processed_image in processed_images: - processed_image.set_shape([None, None, 3]) - - if crop_height is not None and crop_width is not None: - # Pad image and label to have dimensions >= [crop_height, crop_width]. - image_shape = tf.shape(processed_images[0]) - image_height = image_shape[0] - image_width = image_shape[1] - - target_height = image_height + tf.maximum(crop_height - image_height, 0) - target_width = image_width + tf.maximum(crop_width - image_width, 0) - - # Pad image with mean pixel value. - mean_pixel = tf.reshape( - feature_extractor.mean_pixel(model_variant), [1, 1, 3]) - processed_images = [preprocess_utils.pad_to_bounding_box( - processed_image, 0, 0, target_height, target_width, mean_pixel) - for processed_image in processed_images] - - if labels is not None: - labels = [preprocess_utils.pad_to_bounding_box( - label, 0, 0, target_height, target_width, ignore_label) - for label in labels] - - # Randomly crop the images and labels. - if is_training and labels is not None: - cropped = preprocess_utils.random_crop( - processed_images + labels, crop_height, crop_width) - assert len(cropped) == 2 * num_imgs - processed_images = cropped[:num_imgs] - labels = cropped[num_imgs:] - - for processed_image in processed_images: - processed_image.set_shape([crop_height, crop_width, 3]) - - if labels is not None: - for label in labels: - label.set_shape([crop_height, crop_width, 1]) - - if is_training: - # Randomly left-right flip the image and label. - res = preprocess_utils.flip_dim( - list(processed_images + labels), _PROB_OF_FLIP, dim=1) - maybe_flipped = res[:-1] - assert len(maybe_flipped) == 2 * num_imgs - processed_images = maybe_flipped[:num_imgs] - labels = maybe_flipped[num_imgs:] - - return original_images, processed_images, labels diff --git a/research/feelvos/model.py b/research/feelvos/model.py deleted file mode 100644 index f145f91616958b7327d99bb55efb1b7b5016a223..0000000000000000000000000000000000000000 --- a/research/feelvos/model.py +++ /dev/null @@ -1,480 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Provides DeepLab model definition and helper functions. - -DeepLab is a deep learning system for semantic image segmentation with -the following features: - -(1) Atrous convolution to explicitly control the resolution at which -feature responses are computed within Deep Convolutional Neural Networks. - -(2) Atrous spatial pyramid pooling (ASPP) to robustly segment objects at -multiple scales with filters at multiple sampling rates and effective -fields-of-views. - -(3) ASPP module augmented with image-level feature and batch normalization. - -(4) A simple yet effective decoder module to recover the object boundaries. - -See the following papers for more details: - -"Encoder-Decoder with Atrous Separable Convolution for Semantic Image -Segmentation" -Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, Hartwig Adam. -(https://arxiv.org/abs1802.02611) - -"Rethinking Atrous Convolution for Semantic Image Segmentation," -Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam -(https://arxiv.org/abs/1706.05587) - -"DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, -Atrous Convolution, and Fully Connected CRFs", -Liang-Chieh Chen*, George Papandreou*, Iasonas Kokkinos, Kevin Murphy, -Alan L Yuille (* equal contribution) -(https://arxiv.org/abs/1606.00915) - -"Semantic Image Segmentation with Deep Convolutional Nets and Fully Connected -CRFs" -Liang-Chieh Chen*, George Papandreou*, Iasonas Kokkinos, Kevin Murphy, -Alan L. Yuille (* equal contribution) -(https://arxiv.org/abs/1412.7062) -""" -import collections -import tensorflow as tf - -from deeplab import model -from feelvos import common -from feelvos.utils import embedding_utils -from feelvos.utils import train_utils - -slim = tf.contrib.slim - - -get_branch_logits = model.get_branch_logits -get_extra_layer_scopes = model.get_extra_layer_scopes -multi_scale_logits_v2 = model.multi_scale_logits -refine_by_decoder = model.refine_by_decoder -scale_dimension = model.scale_dimension -split_separable_conv2d = model.split_separable_conv2d - -MERGED_LOGITS_SCOPE = model.MERGED_LOGITS_SCOPE -IMAGE_POOLING_SCOPE = model.IMAGE_POOLING_SCOPE -ASPP_SCOPE = model.ASPP_SCOPE -CONCAT_PROJECTION_SCOPE = model.CONCAT_PROJECTION_SCOPE - - -def predict_labels(images, - model_options, - image_pyramid=None, - reference_labels=None, - k_nearest_neighbors=1, - embedding_dimension=None, - use_softmax_feedback=False, - initial_softmax_feedback=None, - embedding_seg_feature_dimension=256, - embedding_seg_n_layers=4, - embedding_seg_kernel_size=7, - embedding_seg_atrous_rates=None, - also_return_softmax_probabilities=False, - num_frames_per_video=None, - normalize_nearest_neighbor_distances=False, - also_attend_to_previous_frame=False, - use_local_previous_frame_attention=False, - previous_frame_attention_window_size=9, - use_first_frame_matching=True, - also_return_embeddings=False, - ref_embeddings=None): - """Predicts segmentation labels. - - Args: - images: A tensor of size [batch, height, width, channels]. - model_options: An InternalModelOptions instance to configure models. - image_pyramid: Input image scales for multi-scale feature extraction. - reference_labels: A tensor of size [batch, height, width, 1]. - ground truth labels used to perform a nearest neighbor query - k_nearest_neighbors: Integer, the number of neighbors to use for nearest - neighbor queries. - embedding_dimension: Integer, the dimension used for the learned embedding. - use_softmax_feedback: Boolean, whether to give the softmax predictions of - the last frame as additional input to the segmentation head. - initial_softmax_feedback: Float32 tensor, or None. Can be used to - initialize the softmax predictions used for the feedback loop. - Typically only useful for inference. Only has an effect if - use_softmax_feedback is True. - embedding_seg_feature_dimension: Integer, the dimensionality used in the - segmentation head layers. - embedding_seg_n_layers: Integer, the number of layers in the segmentation - head. - embedding_seg_kernel_size: Integer, the kernel size used in the - segmentation head. - embedding_seg_atrous_rates: List of integers of length - embedding_seg_n_layers, the atrous rates to use for the segmentation head. - also_return_softmax_probabilities: Boolean, if true, additionally return - the softmax probabilities as second return value. - num_frames_per_video: Integer, the number of frames per video. - normalize_nearest_neighbor_distances: Boolean, whether to normalize the - nearest neighbor distances to [0,1] using sigmoid, scale and shift. - also_attend_to_previous_frame: Boolean, whether to also use nearest - neighbor attention with respect to the previous frame. - use_local_previous_frame_attention: Boolean, whether to restrict the - previous frame attention to a local search window. - Only has an effect, if also_attend_to_previous_frame is True. - previous_frame_attention_window_size: Integer, the window size used for - local previous frame attention, if use_local_previous_frame_attention - is True. - use_first_frame_matching: Boolean, whether to extract features by matching - to the reference frame. This should always be true except for ablation - experiments. - also_return_embeddings: Boolean, whether to return the embeddings as well. - ref_embeddings: Tuple of - (first_frame_embeddings, previous_frame_embeddings), - each of shape [batch, height, width, embedding_dimension], or None. - - Returns: - A dictionary with keys specifying the output_type (e.g., semantic - prediction) and values storing Tensors representing predictions (argmax - over channels). Each prediction has size [batch, height, width]. - If also_return_softmax_probabilities is True, the second return value are - the softmax probabilities. - If also_return_embeddings is True, it will also return an embeddings - tensor of shape [batch, height, width, embedding_dimension]. - - Raises: - ValueError: If classification_loss is not softmax, softmax_with_attention, - nor triplet. - """ - if (model_options.classification_loss == 'triplet' and - reference_labels is None): - raise ValueError('Need reference_labels for triplet loss') - - if model_options.classification_loss == 'softmax_with_attention': - if embedding_dimension is None: - raise ValueError('Need embedding_dimension for softmax_with_attention ' - 'loss') - if reference_labels is None: - raise ValueError('Need reference_labels for softmax_with_attention loss') - res = ( - multi_scale_logits_with_nearest_neighbor_matching( - images, - model_options=model_options, - image_pyramid=image_pyramid, - is_training=False, - reference_labels=reference_labels, - clone_batch_size=1, - num_frames_per_video=num_frames_per_video, - embedding_dimension=embedding_dimension, - max_neighbors_per_object=0, - k_nearest_neighbors=k_nearest_neighbors, - use_softmax_feedback=use_softmax_feedback, - initial_softmax_feedback=initial_softmax_feedback, - embedding_seg_feature_dimension=embedding_seg_feature_dimension, - embedding_seg_n_layers=embedding_seg_n_layers, - embedding_seg_kernel_size=embedding_seg_kernel_size, - embedding_seg_atrous_rates=embedding_seg_atrous_rates, - normalize_nearest_neighbor_distances= - normalize_nearest_neighbor_distances, - also_attend_to_previous_frame=also_attend_to_previous_frame, - use_local_previous_frame_attention= - use_local_previous_frame_attention, - previous_frame_attention_window_size= - previous_frame_attention_window_size, - use_first_frame_matching=use_first_frame_matching, - also_return_embeddings=also_return_embeddings, - ref_embeddings=ref_embeddings - )) - if also_return_embeddings: - outputs_to_scales_to_logits, embeddings = res - else: - outputs_to_scales_to_logits = res - embeddings = None - else: - outputs_to_scales_to_logits = multi_scale_logits_v2( - images, - model_options=model_options, - image_pyramid=image_pyramid, - is_training=False, - fine_tune_batch_norm=False) - - predictions = {} - for output in sorted(outputs_to_scales_to_logits): - scales_to_logits = outputs_to_scales_to_logits[output] - original_logits = scales_to_logits[MERGED_LOGITS_SCOPE] - if isinstance(original_logits, list): - assert len(original_logits) == 1 - original_logits = original_logits[0] - logits = tf.image.resize_bilinear(original_logits, tf.shape(images)[1:3], - align_corners=True) - if model_options.classification_loss in ('softmax', - 'softmax_with_attention'): - predictions[output] = tf.argmax(logits, 3) - elif model_options.classification_loss == 'triplet': - # to keep this fast, we do the nearest neighbor assignment on the - # resolution at which the embedding is extracted and scale the result up - # afterwards - embeddings = original_logits - reference_labels_logits_size = tf.squeeze( - tf.image.resize_nearest_neighbor( - reference_labels[tf.newaxis], - train_utils.resolve_shape(embeddings)[1:3], - align_corners=True), axis=0) - nn_labels = embedding_utils.assign_labels_by_nearest_neighbors( - embeddings[0], embeddings[1:], reference_labels_logits_size, - k_nearest_neighbors) - predictions[common.OUTPUT_TYPE] = tf.image.resize_nearest_neighbor( - nn_labels, tf.shape(images)[1:3], align_corners=True) - else: - raise ValueError( - 'Only support softmax, triplet, or softmax_with_attention for ' - 'classification_loss.') - - if also_return_embeddings: - assert also_return_softmax_probabilities - return predictions, tf.nn.softmax(original_logits, axis=-1), embeddings - elif also_return_softmax_probabilities: - return predictions, tf.nn.softmax(original_logits, axis=-1) - else: - return predictions - - -def multi_scale_logits_with_nearest_neighbor_matching( - images, - model_options, - image_pyramid, - clone_batch_size, - reference_labels, - num_frames_per_video, - embedding_dimension, - max_neighbors_per_object, - weight_decay=0.0001, - is_training=False, - fine_tune_batch_norm=False, - k_nearest_neighbors=1, - use_softmax_feedback=False, - initial_softmax_feedback=None, - embedding_seg_feature_dimension=256, - embedding_seg_n_layers=4, - embedding_seg_kernel_size=7, - embedding_seg_atrous_rates=None, - normalize_nearest_neighbor_distances=False, - also_attend_to_previous_frame=False, - damage_initial_previous_frame_mask=False, - use_local_previous_frame_attention=False, - previous_frame_attention_window_size=9, - use_first_frame_matching=True, - also_return_embeddings=False, - ref_embeddings=None): - """Gets the logits for multi-scale inputs using nearest neighbor attention. - - Adjusted version of multi_scale_logits_v2 to support nearest neighbor - attention and a variable number of classes for each element of the batch. - The returned logits are all downsampled (due to max-pooling layers) - for both training and evaluation. - - Args: - images: A tensor of size [batch, height, width, channels]. - model_options: A ModelOptions instance to configure models. - image_pyramid: Input image scales for multi-scale feature extraction. - clone_batch_size: Integer, the number of videos on a batch. - reference_labels: The segmentation labels of the reference frame on which - attention is applied. - num_frames_per_video: Integer, the number of frames per video. - embedding_dimension: Integer, the dimension of the embedding. - max_neighbors_per_object: Integer, the maximum number of candidates - for the nearest neighbor query per object after subsampling. - Can be 0 for no subsampling. - weight_decay: The weight decay for model variables. - is_training: Is training or not. - fine_tune_batch_norm: Fine-tune the batch norm parameters or not. - k_nearest_neighbors: Integer, the number of nearest neighbors to use. - use_softmax_feedback: Boolean, whether to give the softmax predictions of - the last frame as additional input to the segmentation head. - initial_softmax_feedback: List of Float32 tensors, or None. - Can be used to initialize the softmax predictions used for the feedback - loop. Only has an effect if use_softmax_feedback is True. - embedding_seg_feature_dimension: Integer, the dimensionality used in the - segmentation head layers. - embedding_seg_n_layers: Integer, the number of layers in the segmentation - head. - embedding_seg_kernel_size: Integer, the kernel size used in the - segmentation head. - embedding_seg_atrous_rates: List of integers of length - embedding_seg_n_layers, the atrous rates to use for the segmentation head. - normalize_nearest_neighbor_distances: Boolean, whether to normalize the - nearest neighbor distances to [0,1] using sigmoid, scale and shift. - also_attend_to_previous_frame: Boolean, whether to also use nearest - neighbor attention with respect to the previous frame. - damage_initial_previous_frame_mask: Boolean, whether to artificially damage - the initial previous frame mask. Only has an effect if - also_attend_to_previous_frame is True. - use_local_previous_frame_attention: Boolean, whether to restrict the - previous frame attention to a local search window. - Only has an effect, if also_attend_to_previous_frame is True. - previous_frame_attention_window_size: Integer, the window size used for - local previous frame attention, if use_local_previous_frame_attention - is True. - use_first_frame_matching: Boolean, whether to extract features by matching - to the reference frame. This should always be true except for ablation - experiments. - also_return_embeddings: Boolean, whether to return the embeddings as well. - ref_embeddings: Tuple of - (first_frame_embeddings, previous_frame_embeddings), - each of shape [batch, height, width, embedding_dimension], or None. - - Returns: - outputs_to_scales_to_logits: A map of maps from output_type (e.g., - semantic prediction) to a dictionary of multi-scale logits names to - logits. For each output_type, the dictionary has keys which - correspond to the scales and values which correspond to the logits. - For example, if `scales` equals [1.0, 1.5], then the keys would - include 'merged_logits', 'logits_1.00' and 'logits_1.50'. - If also_return_embeddings is True, it will also return an embeddings - tensor of shape [batch, height, width, embedding_dimension]. - - Raises: - ValueError: If model_options doesn't specify crop_size and its - add_image_level_feature = True, since add_image_level_feature requires - crop_size information. - """ - # Setup default values. - if not image_pyramid: - image_pyramid = [1.0] - crop_height = ( - model_options.crop_size[0] - if model_options.crop_size else tf.shape(images)[1]) - crop_width = ( - model_options.crop_size[1] - if model_options.crop_size else tf.shape(images)[2]) - - # Compute the height, width for the output logits. - if model_options.decoder_output_stride: - logits_output_stride = min(model_options.decoder_output_stride) - else: - logits_output_stride = model_options.output_stride - logits_height = scale_dimension( - crop_height, - max(1.0, max(image_pyramid)) / logits_output_stride) - logits_width = scale_dimension( - crop_width, - max(1.0, max(image_pyramid)) / logits_output_stride) - - # Compute the logits for each scale in the image pyramid. - outputs_to_scales_to_logits = { - k: {} - for k in model_options.outputs_to_num_classes - } - - for image_scale in image_pyramid: - if image_scale != 1.0: - scaled_height = scale_dimension(crop_height, image_scale) - scaled_width = scale_dimension(crop_width, image_scale) - scaled_crop_size = [scaled_height, scaled_width] - scaled_images = tf.image.resize_bilinear( - images, scaled_crop_size, align_corners=True) - scaled_reference_labels = tf.image.resize_nearest_neighbor( - reference_labels, scaled_crop_size, align_corners=True - ) - if model_options.crop_size is None: - scaled_crop_size = None - if model_options.crop_size: - scaled_images.set_shape([None, scaled_height, scaled_width, 3]) - else: - scaled_crop_size = model_options.crop_size - scaled_images = images - scaled_reference_labels = reference_labels - - updated_options = model_options._replace(crop_size=scaled_crop_size) - res = embedding_utils.get_logits_with_matching( - scaled_images, - updated_options, - weight_decay=weight_decay, - reuse=tf.AUTO_REUSE, - is_training=is_training, - fine_tune_batch_norm=fine_tune_batch_norm, - reference_labels=scaled_reference_labels, - batch_size=clone_batch_size, - num_frames_per_video=num_frames_per_video, - embedding_dimension=embedding_dimension, - max_neighbors_per_object=max_neighbors_per_object, - k_nearest_neighbors=k_nearest_neighbors, - use_softmax_feedback=use_softmax_feedback, - initial_softmax_feedback=initial_softmax_feedback, - embedding_seg_feature_dimension=embedding_seg_feature_dimension, - embedding_seg_n_layers=embedding_seg_n_layers, - embedding_seg_kernel_size=embedding_seg_kernel_size, - embedding_seg_atrous_rates=embedding_seg_atrous_rates, - normalize_nearest_neighbor_distances= - normalize_nearest_neighbor_distances, - also_attend_to_previous_frame=also_attend_to_previous_frame, - damage_initial_previous_frame_mask=damage_initial_previous_frame_mask, - use_local_previous_frame_attention=use_local_previous_frame_attention, - previous_frame_attention_window_size= - previous_frame_attention_window_size, - use_first_frame_matching=use_first_frame_matching, - also_return_embeddings=also_return_embeddings, - ref_embeddings=ref_embeddings - ) - if also_return_embeddings: - outputs_to_logits, embeddings = res - else: - outputs_to_logits = res - embeddings = None - - # Resize the logits to have the same dimension before merging. - for output in sorted(outputs_to_logits): - if isinstance(outputs_to_logits[output], collections.Sequence): - outputs_to_logits[output] = [tf.image.resize_bilinear( - x, [logits_height, logits_width], align_corners=True) - for x in outputs_to_logits[output]] - else: - outputs_to_logits[output] = tf.image.resize_bilinear( - outputs_to_logits[output], [logits_height, logits_width], - align_corners=True) - - # Return when only one input scale. - if len(image_pyramid) == 1: - for output in sorted(model_options.outputs_to_num_classes): - outputs_to_scales_to_logits[output][ - MERGED_LOGITS_SCOPE] = outputs_to_logits[output] - if also_return_embeddings: - return outputs_to_scales_to_logits, embeddings - else: - return outputs_to_scales_to_logits - - # Save logits to the output map. - for output in sorted(model_options.outputs_to_num_classes): - outputs_to_scales_to_logits[output][ - 'logits_%.2f' % image_scale] = outputs_to_logits[output] - - # Merge the logits from all the multi-scale inputs. - for output in sorted(model_options.outputs_to_num_classes): - # Concatenate the multi-scale logits for each output type. - all_logits = [ - [tf.expand_dims(l, axis=4)] - for logits in outputs_to_scales_to_logits[output].values() - for l in logits - ] - transposed = map(list, zip(*all_logits)) - all_logits = [tf.concat(t, 4) for t in transposed] - merge_fn = ( - tf.reduce_max - if model_options.merge_method == 'max' else tf.reduce_mean) - outputs_to_scales_to_logits[output][MERGED_LOGITS_SCOPE] = [merge_fn( - l, axis=4) for l in all_logits] - - if also_return_embeddings: - return outputs_to_scales_to_logits, embeddings - else: - return outputs_to_scales_to_logits diff --git a/research/feelvos/train.py b/research/feelvos/train.py deleted file mode 100644 index 16c085722749bcfde5aeff15cdbec336e5efe451..0000000000000000000000000000000000000000 --- a/research/feelvos/train.py +++ /dev/null @@ -1,630 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Training script for the FEELVOS model. - -See model.py for more details and usage. -""" -import six -import tensorflow as tf - -from feelvos import common -from feelvos import model -from feelvos.datasets import video_dataset -from feelvos.utils import embedding_utils -from feelvos.utils import train_utils -from feelvos.utils import video_input_generator -from deployment import model_deploy - -slim = tf.contrib.slim -prefetch_queue = slim.prefetch_queue -flags = tf.app.flags -FLAGS = flags.FLAGS - -# Settings for multi-GPUs/multi-replicas training. - -flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy.') - -flags.DEFINE_boolean('clone_on_cpu', False, 'Use CPUs to deploy clones.') - -flags.DEFINE_integer('num_replicas', 1, 'Number of worker replicas.') - -flags.DEFINE_integer('startup_delay_steps', 15, - 'Number of training steps between replicas startup.') - -flags.DEFINE_integer('num_ps_tasks', 0, - 'The number of parameter servers. If the value is 0, then ' - 'the parameters are handled locally by the worker.') - -flags.DEFINE_string('master', '', 'BNS name of the tensorflow server') - -flags.DEFINE_integer('task', 0, 'The task ID.') - -# Settings for logging. - -flags.DEFINE_string('train_logdir', None, - 'Where the checkpoint and logs are stored.') - -flags.DEFINE_integer('log_steps', 10, - 'Display logging information at every log_steps.') - -flags.DEFINE_integer('save_interval_secs', 1200, - 'How often, in seconds, we save the model to disk.') - -flags.DEFINE_integer('save_summaries_secs', 600, - 'How often, in seconds, we compute the summaries.') - -# Settings for training strategy. - -flags.DEFINE_enum('learning_policy', 'poly', ['poly', 'step'], - 'Learning rate policy for training.') - -flags.DEFINE_float('base_learning_rate', 0.0007, - 'The base learning rate for model training.') - -flags.DEFINE_float('learning_rate_decay_factor', 0.1, - 'The rate to decay the base learning rate.') - -flags.DEFINE_integer('learning_rate_decay_step', 2000, - 'Decay the base learning rate at a fixed step.') - -flags.DEFINE_float('learning_power', 0.9, - 'The power value used in the poly learning policy.') - -flags.DEFINE_integer('training_number_of_steps', 200000, - 'The number of steps used for training') - -flags.DEFINE_float('momentum', 0.9, 'The momentum value to use') - -flags.DEFINE_integer('train_batch_size', 6, - 'The number of images in each batch during training.') - -flags.DEFINE_integer('train_num_frames_per_video', 3, - 'The number of frames used per video during training') - -flags.DEFINE_float('weight_decay', 0.00004, - 'The value of the weight decay for training.') - -flags.DEFINE_multi_integer('train_crop_size', [465, 465], - 'Image crop size [height, width] during training.') - -flags.DEFINE_float('last_layer_gradient_multiplier', 1.0, - 'The gradient multiplier for last layers, which is used to ' - 'boost the gradient of last layers if the value > 1.') - -flags.DEFINE_boolean('upsample_logits', True, - 'Upsample logits during training.') - -flags.DEFINE_integer('batch_capacity_factor', 16, 'Batch capacity factor.') - -flags.DEFINE_integer('num_readers', 1, 'Number of readers for data provider.') - -flags.DEFINE_integer('batch_num_threads', 1, 'Batch number of threads.') - -flags.DEFINE_integer('prefetch_queue_capacity_factor', 32, - 'Prefetch queue capacity factor.') - -flags.DEFINE_integer('prefetch_queue_num_threads', 1, - 'Prefetch queue number of threads.') - -flags.DEFINE_integer('train_max_neighbors_per_object', 1024, - 'The maximum number of candidates for the nearest ' - 'neighbor query per object after subsampling') - -# Settings for fine-tuning the network. - -flags.DEFINE_string('tf_initial_checkpoint', None, - 'The initial checkpoint in tensorflow format.') - -flags.DEFINE_boolean('initialize_last_layer', False, - 'Initialize the last layer.') - -flags.DEFINE_boolean('last_layers_contain_logits_only', False, - 'Only consider logits as last layers or not.') - -flags.DEFINE_integer('slow_start_step', 0, - 'Training model with small learning rate for few steps.') - -flags.DEFINE_float('slow_start_learning_rate', 1e-4, - 'Learning rate employed during slow start.') - -flags.DEFINE_boolean('fine_tune_batch_norm', False, - 'Fine tune the batch norm parameters or not.') - -flags.DEFINE_float('min_scale_factor', 1., - 'Mininum scale factor for data augmentation.') - -flags.DEFINE_float('max_scale_factor', 1.3, - 'Maximum scale factor for data augmentation.') - -flags.DEFINE_float('scale_factor_step_size', 0, - 'Scale factor step size for data augmentation.') - -flags.DEFINE_multi_integer('atrous_rates', None, - 'Atrous rates for atrous spatial pyramid pooling.') - -flags.DEFINE_integer('output_stride', 8, - 'The ratio of input to output spatial resolution.') - -flags.DEFINE_boolean('sample_only_first_frame_for_finetuning', False, - 'Whether to only sample the first frame during ' - 'fine-tuning. This should be False when using lucid data, ' - 'but True when fine-tuning on the first frame only. Only ' - 'has an effect if first_frame_finetuning is True.') - -flags.DEFINE_multi_integer('first_frame_finetuning', [0], - 'Whether to only sample the first frame for ' - 'fine-tuning.') - -# Dataset settings. - -flags.DEFINE_multi_string('dataset', [], 'Name of the segmentation datasets.') - -flags.DEFINE_multi_float('dataset_sampling_probabilities', [], - 'A list of probabilities to sample each of the ' - 'datasets.') - -flags.DEFINE_string('train_split', 'train', - 'Which split of the dataset to be used for training') - -flags.DEFINE_multi_string('dataset_dir', [], 'Where the datasets reside.') - -flags.DEFINE_multi_integer('three_frame_dataset', [0], - 'Whether the dataset has exactly three frames per ' - 'video of which the first is to be used as reference' - ' and the two others are consecutive frames to be ' - 'used as query frames.' - 'Set true for pascal lucid data.') - -flags.DEFINE_boolean('damage_initial_previous_frame_mask', False, - 'Whether to artificially damage the initial previous ' - 'frame mask. Only has an effect if ' - 'also_attend_to_previous_frame is True.') - -flags.DEFINE_float('top_k_percent_pixels', 0.15, 'Float in [0.0, 1.0].' - 'When its value < 1.0, only compute the loss for the top k' - 'percent pixels (e.g., the top 20% pixels). This is useful' - 'for hard pixel mining.') - -flags.DEFINE_integer('hard_example_mining_step', 100000, - 'The training step in which the hard exampling mining ' - 'kicks off. Note that we gradually reduce the mining ' - 'percent to the top_k_percent_pixels. For example, if ' - 'hard_example_mining_step=100K and ' - 'top_k_percent_pixels=0.25, then mining percent will ' - 'gradually reduce from 100% to 25% until 100K steps ' - 'after which we only mine top 25% pixels. Only has an ' - 'effect if top_k_percent_pixels < 1.0') - - -def _build_deeplab(inputs_queue_or_samples, outputs_to_num_classes, - ignore_label): - """Builds a clone of DeepLab. - - Args: - inputs_queue_or_samples: A prefetch queue for images and labels, or - directly a dict of the samples. - outputs_to_num_classes: A map from output type to the number of classes. - For example, for the task of semantic segmentation with 21 semantic - classes, we would have outputs_to_num_classes['semantic'] = 21. - ignore_label: Ignore label. - - Returns: - A map of maps from output_type (e.g., semantic prediction) to a - dictionary of multi-scale logits names to logits. For each output_type, - the dictionary has keys which correspond to the scales and values which - correspond to the logits. For example, if `scales` equals [1.0, 1.5], - then the keys would include 'merged_logits', 'logits_1.00' and - 'logits_1.50'. - - Raises: - ValueError: If classification_loss is not softmax, softmax_with_attention, - or triplet. - """ - if hasattr(inputs_queue_or_samples, 'dequeue'): - samples = inputs_queue_or_samples.dequeue() - else: - samples = inputs_queue_or_samples - train_crop_size = (None if 0 in FLAGS.train_crop_size else - FLAGS.train_crop_size) - - model_options = common.VideoModelOptions( - outputs_to_num_classes=outputs_to_num_classes, - crop_size=train_crop_size, - atrous_rates=FLAGS.atrous_rates, - output_stride=FLAGS.output_stride) - - if model_options.classification_loss == 'softmax_with_attention': - clone_batch_size = FLAGS.train_batch_size // FLAGS.num_clones - - # Create summaries of ground truth labels. - for n in range(clone_batch_size): - tf.summary.image( - 'gt_label_%d' % n, - tf.cast(samples[common.LABEL][ - n * FLAGS.train_num_frames_per_video: - (n + 1) * FLAGS.train_num_frames_per_video], - tf.uint8) * 32, max_outputs=FLAGS.train_num_frames_per_video) - - if common.PRECEDING_FRAME_LABEL in samples: - preceding_frame_label = samples[common.PRECEDING_FRAME_LABEL] - init_softmax = [] - for n in range(clone_batch_size): - init_softmax_n = embedding_utils.create_initial_softmax_from_labels( - preceding_frame_label[n, tf.newaxis], - samples[common.LABEL][n * FLAGS.train_num_frames_per_video, - tf.newaxis], - common.parse_decoder_output_stride(), - reduce_labels=True) - init_softmax_n = tf.squeeze(init_softmax_n, axis=0) - init_softmax.append(init_softmax_n) - tf.summary.image('preceding_frame_label', - tf.cast(preceding_frame_label[n, tf.newaxis], - tf.uint8) * 32) - else: - init_softmax = None - - outputs_to_scales_to_logits = ( - model.multi_scale_logits_with_nearest_neighbor_matching( - samples[common.IMAGE], - model_options=model_options, - image_pyramid=FLAGS.image_pyramid, - weight_decay=FLAGS.weight_decay, - is_training=True, - fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, - reference_labels=samples[common.LABEL], - clone_batch_size=FLAGS.train_batch_size // FLAGS.num_clones, - num_frames_per_video=FLAGS.train_num_frames_per_video, - embedding_dimension=FLAGS.embedding_dimension, - max_neighbors_per_object=FLAGS.train_max_neighbors_per_object, - k_nearest_neighbors=FLAGS.k_nearest_neighbors, - use_softmax_feedback=FLAGS.use_softmax_feedback, - initial_softmax_feedback=init_softmax, - embedding_seg_feature_dimension= - FLAGS.embedding_seg_feature_dimension, - embedding_seg_n_layers=FLAGS.embedding_seg_n_layers, - embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size, - embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates, - normalize_nearest_neighbor_distances= - FLAGS.normalize_nearest_neighbor_distances, - also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame, - damage_initial_previous_frame_mask= - FLAGS.damage_initial_previous_frame_mask, - use_local_previous_frame_attention= - FLAGS.use_local_previous_frame_attention, - previous_frame_attention_window_size= - FLAGS.previous_frame_attention_window_size, - use_first_frame_matching=FLAGS.use_first_frame_matching - )) - else: - outputs_to_scales_to_logits = model.multi_scale_logits_v2( - samples[common.IMAGE], - model_options=model_options, - image_pyramid=FLAGS.image_pyramid, - weight_decay=FLAGS.weight_decay, - is_training=True, - fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) - - if model_options.classification_loss == 'softmax': - for output, num_classes in six.iteritems(outputs_to_num_classes): - train_utils.add_softmax_cross_entropy_loss_for_each_scale( - outputs_to_scales_to_logits[output], - samples[common.LABEL], - num_classes, - ignore_label, - loss_weight=1.0, - upsample_logits=FLAGS.upsample_logits, - scope=output) - elif model_options.classification_loss == 'triplet': - for output, _ in six.iteritems(outputs_to_num_classes): - train_utils.add_triplet_loss_for_each_scale( - FLAGS.train_batch_size // FLAGS.num_clones, - FLAGS.train_num_frames_per_video, - FLAGS.embedding_dimension, outputs_to_scales_to_logits[output], - samples[common.LABEL], scope=output) - elif model_options.classification_loss == 'softmax_with_attention': - labels = samples[common.LABEL] - batch_size = FLAGS.train_batch_size // FLAGS.num_clones - num_frames_per_video = FLAGS.train_num_frames_per_video - h, w = train_utils.resolve_shape(labels)[1:3] - labels = tf.reshape(labels, tf.stack( - [batch_size, num_frames_per_video, h, w, 1])) - # Strip the reference labels off. - if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback: - n_ref_frames = 2 - else: - n_ref_frames = 1 - labels = labels[:, n_ref_frames:] - # Merge batch and time dimensions. - labels = tf.reshape(labels, tf.stack( - [batch_size * (num_frames_per_video - n_ref_frames), h, w, 1])) - - for output, num_classes in six.iteritems(outputs_to_num_classes): - train_utils.add_dynamic_softmax_cross_entropy_loss_for_each_scale( - outputs_to_scales_to_logits[output], - labels, - ignore_label, - loss_weight=1.0, - upsample_logits=FLAGS.upsample_logits, - scope=output, - top_k_percent_pixels=FLAGS.top_k_percent_pixels, - hard_example_mining_step=FLAGS.hard_example_mining_step) - else: - raise ValueError('Only support softmax, softmax_with_attention' - ' or triplet for classification_loss.') - - return outputs_to_scales_to_logits - - -def main(unused_argv): - # Set up deployment (i.e., multi-GPUs and/or multi-replicas). - config = model_deploy.DeploymentConfig( - num_clones=FLAGS.num_clones, - clone_on_cpu=FLAGS.clone_on_cpu, - replica_id=FLAGS.task, - num_replicas=FLAGS.num_replicas, - num_ps_tasks=FLAGS.num_ps_tasks) - - with tf.Graph().as_default(): - with tf.device(config.inputs_device()): - train_crop_size = (None if 0 in FLAGS.train_crop_size else - FLAGS.train_crop_size) - assert FLAGS.dataset - assert len(FLAGS.dataset) == len(FLAGS.dataset_dir) - if len(FLAGS.first_frame_finetuning) == 1: - first_frame_finetuning = (list(FLAGS.first_frame_finetuning) - * len(FLAGS.dataset)) - else: - first_frame_finetuning = FLAGS.first_frame_finetuning - if len(FLAGS.three_frame_dataset) == 1: - three_frame_dataset = (list(FLAGS.three_frame_dataset) - * len(FLAGS.dataset)) - else: - three_frame_dataset = FLAGS.three_frame_dataset - assert len(FLAGS.dataset) == len(first_frame_finetuning) - assert len(FLAGS.dataset) == len(three_frame_dataset) - datasets, samples_list = zip( - *[_get_dataset_and_samples(config, train_crop_size, dataset, - dataset_dir, bool(first_frame_finetuning_), - bool(three_frame_dataset_)) - for dataset, dataset_dir, first_frame_finetuning_, - three_frame_dataset_ in zip(FLAGS.dataset, FLAGS.dataset_dir, - first_frame_finetuning, - three_frame_dataset)]) - # Note that this way of doing things is wasteful since it will evaluate - # all branches but just use one of them. But let's do it anyway for now, - # since it's easy and will probably be fast enough. - dataset = datasets[0] - if len(samples_list) == 1: - samples = samples_list[0] - else: - probabilities = FLAGS.dataset_sampling_probabilities - if probabilities: - assert len(probabilities) == len(samples_list) - else: - # Default to uniform probabilities. - probabilities = [1.0 / len(samples_list) for _ in samples_list] - probabilities = tf.constant(probabilities) - logits = tf.log(probabilities[tf.newaxis]) - rand_idx = tf.squeeze(tf.multinomial(logits, 1, output_dtype=tf.int32), - axis=[0, 1]) - - def wrap(x): - def f(): - return x - return f - - samples = tf.case({tf.equal(rand_idx, idx): wrap(s) - for idx, s in enumerate(samples_list)}, - exclusive=True) - - # Prefetch_queue requires the shape to be known at graph creation time. - # So we only use it if we crop to a fixed size. - if train_crop_size is None: - inputs_queue = samples - else: - inputs_queue = prefetch_queue.prefetch_queue( - samples, - capacity=FLAGS.prefetch_queue_capacity_factor*config.num_clones, - num_threads=FLAGS.prefetch_queue_num_threads) - - # Create the global step on the device storing the variables. - with tf.device(config.variables_device()): - global_step = tf.train.get_or_create_global_step() - - # Define the model and create clones. - model_fn = _build_deeplab - if FLAGS.classification_loss == 'triplet': - embedding_dim = FLAGS.embedding_dimension - output_type_to_dim = {'embedding': embedding_dim} - else: - output_type_to_dim = {common.OUTPUT_TYPE: dataset.num_classes} - model_args = (inputs_queue, output_type_to_dim, dataset.ignore_label) - clones = model_deploy.create_clones(config, model_fn, args=model_args) - - # Gather update_ops from the first clone. These contain, for example, - # the updates for the batch_norm variables created by model_fn. - first_clone_scope = config.clone_scope(0) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) - - # Gather initial summaries. - summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) - - # Add summaries for model variables. - for model_var in tf.contrib.framework.get_model_variables(): - summaries.add(tf.summary.histogram(model_var.op.name, model_var)) - - # Add summaries for losses. - for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): - summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) - - # Build the optimizer based on the device specification. - with tf.device(config.optimizer_device()): - learning_rate = train_utils.get_model_learning_rate( - FLAGS.learning_policy, - FLAGS.base_learning_rate, - FLAGS.learning_rate_decay_step, - FLAGS.learning_rate_decay_factor, - FLAGS.training_number_of_steps, - FLAGS.learning_power, - FLAGS.slow_start_step, - FLAGS.slow_start_learning_rate) - optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) - summaries.add(tf.summary.scalar('learning_rate', learning_rate)) - - startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps - - with tf.device(config.variables_device()): - total_loss, grads_and_vars = model_deploy.optimize_clones( - clones, optimizer) - total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') - summaries.add(tf.summary.scalar('total_loss', total_loss)) - - # Modify the gradients for biases and last layer variables. - last_layers = model.get_extra_layer_scopes( - FLAGS.last_layers_contain_logits_only) - grad_mult = train_utils.get_model_gradient_multipliers( - last_layers, FLAGS.last_layer_gradient_multiplier) - if grad_mult: - grads_and_vars = slim.learning.multiply_gradients(grads_and_vars, - grad_mult) - - with tf.name_scope('grad_clipping'): - grads_and_vars = slim.learning.clip_gradient_norms(grads_and_vars, 5.0) - - # Create histogram summaries for the gradients. - # We have too many summaries for mldash, so disable this one for now. - # for grad, var in grads_and_vars: - # summaries.add(tf.summary.histogram( - # var.name.replace(':0', '_0') + '/gradient', grad)) - - # Create gradient update op. - grad_updates = optimizer.apply_gradients(grads_and_vars, - global_step=global_step) - update_ops.append(grad_updates) - update_op = tf.group(*update_ops) - with tf.control_dependencies([update_op]): - train_tensor = tf.identity(total_loss, name='train_op') - - # Add the summaries from the first clone. These contain the summaries - # created by model_fn and either optimize_clones() or _gather_clone_loss(). - summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, - first_clone_scope)) - - # Merge all summaries together. - summary_op = tf.summary.merge(list(summaries)) - - # Soft placement allows placing on CPU ops without GPU implementation. - session_config = tf.ConfigProto(allow_soft_placement=True, - log_device_placement=False) - - # Start the training. - slim.learning.train( - train_tensor, - logdir=FLAGS.train_logdir, - log_every_n_steps=FLAGS.log_steps, - master=FLAGS.master, - number_of_steps=FLAGS.training_number_of_steps, - is_chief=(FLAGS.task == 0), - session_config=session_config, - startup_delay_steps=startup_delay_steps, - init_fn=train_utils.get_model_init_fn(FLAGS.train_logdir, - FLAGS.tf_initial_checkpoint, - FLAGS.initialize_last_layer, - last_layers, - ignore_missing_vars=True), - summary_op=summary_op, - save_summaries_secs=FLAGS.save_summaries_secs, - save_interval_secs=FLAGS.save_interval_secs) - - -def _get_dataset_and_samples(config, train_crop_size, dataset_name, - dataset_dir, first_frame_finetuning, - three_frame_dataset): - """Creates dataset object and samples dict of tensor. - - Args: - config: A DeploymentConfig. - train_crop_size: Integer, the crop size used for training. - dataset_name: String, the name of the dataset. - dataset_dir: String, the directory of the dataset. - first_frame_finetuning: Boolean, whether the used dataset is a dataset - for first frame fine-tuning. - three_frame_dataset: Boolean, whether the dataset has exactly three frames - per video of which the first is to be used as reference and the two - others are consecutive frames to be used as query frames. - - Returns: - dataset: An instance of slim Dataset. - samples: A dictionary of tensors for semantic segmentation. - """ - - # Split the batch across GPUs. - assert FLAGS.train_batch_size % config.num_clones == 0, ( - 'Training batch size not divisble by number of clones (GPUs).') - - clone_batch_size = FLAGS.train_batch_size / config.num_clones - - if first_frame_finetuning: - train_split = 'val' - else: - train_split = FLAGS.train_split - - data_type = 'tf_sequence_example' - # Get dataset-dependent information. - dataset = video_dataset.get_dataset( - dataset_name, - train_split, - dataset_dir=dataset_dir, - data_type=data_type) - - tf.gfile.MakeDirs(FLAGS.train_logdir) - tf.logging.info('Training on %s set', train_split) - - samples = video_input_generator.get( - dataset, - FLAGS.train_num_frames_per_video, - train_crop_size, - clone_batch_size, - num_readers=FLAGS.num_readers, - num_threads=FLAGS.batch_num_threads, - min_resize_value=FLAGS.min_resize_value, - max_resize_value=FLAGS.max_resize_value, - resize_factor=FLAGS.resize_factor, - min_scale_factor=FLAGS.min_scale_factor, - max_scale_factor=FLAGS.max_scale_factor, - scale_factor_step_size=FLAGS.scale_factor_step_size, - dataset_split=FLAGS.train_split, - is_training=True, - model_variant=FLAGS.model_variant, - batch_capacity_factor=FLAGS.batch_capacity_factor, - decoder_output_stride=common.parse_decoder_output_stride(), - first_frame_finetuning=first_frame_finetuning, - sample_only_first_frame_for_finetuning= - FLAGS.sample_only_first_frame_for_finetuning, - sample_adjacent_and_consistent_query_frames= - FLAGS.sample_adjacent_and_consistent_query_frames or - FLAGS.use_softmax_feedback, - remap_labels_to_reference_frame=True, - three_frame_dataset=three_frame_dataset, - add_prev_frame_label=not FLAGS.also_attend_to_previous_frame - ) - return dataset, samples - - -if __name__ == '__main__': - flags.mark_flag_as_required('train_logdir') - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/research/feelvos/train.sh b/research/feelvos/train.sh deleted file mode 100755 index 63b7ea19d4c53dea932322c3885abb9a95237e0c..0000000000000000000000000000000000000000 --- a/research/feelvos/train.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# This script is used to run local training on DAVIS 2017. Users could also -# modify from this script for their use case. See eval.sh for an example of -# local inference with a pre-trained model. -# -# Note that this script runs local training with a single GPU and a smaller crop -# and batch size, while in the paper, we trained our models with 16 GPUS with -# --num_clones=2, --train_batch_size=6, --num_replicas=8, -# --training_number_of_steps=200000, --train_crop_size=465, -# --train_crop_size=465. -# -# Usage: -# # From the tensorflow/models/research/feelvos directory. -# sh ./train.sh -# -# - -# Exit immediately if a command exits with a non-zero status. -set -e - -# Move one-level up to tensorflow/models/research directory. -cd .. - -# Update PYTHONPATH. -export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim:`pwd`/feelvos - -# Set up the working environment. -CURRENT_DIR=$(pwd) -WORK_DIR="${CURRENT_DIR}/feelvos" - -# Set up the working directories. -DATASET_DIR="datasets" -DAVIS_FOLDER="davis17" -DAVIS_DATASET="${WORK_DIR}/${DATASET_DIR}/${DAVIS_FOLDER}/tfrecord" -EXP_FOLDER="exp/train" -TRAIN_LOGDIR="${WORK_DIR}/${DATASET_DIR}/${DAVIS_FOLDER}/${EXP_FOLDER}/train" -mkdir -p ${TRAIN_LOGDIR} - -# Go to datasets folder and download and convert the DAVIS 2017 dataset. -DATASET_DIR="datasets" -cd "${WORK_DIR}/${DATASET_DIR}" -sh download_and_convert_davis17.sh - -# Go to models folder and download and unpack the COCO pre-trained model. -MODELS_DIR="models" -mkdir -p "${WORK_DIR}/${MODELS_DIR}" -cd "${WORK_DIR}/${MODELS_DIR}" -if [ ! -d "xception_65_coco_pretrained" ]; then - wget http://download.tensorflow.org/models/xception_65_coco_pretrained_2018_10_02.tar.gz - tar -xvf xception_65_coco_pretrained_2018_10_02.tar.gz - rm xception_65_coco_pretrained_2018_10_02.tar.gz -fi -INIT_CKPT="${WORK_DIR}/${MODELS_DIR}/xception_65_coco_pretrained/x65-b2u1s2p-d48-2-3x256-sc-cr300k_init.ckpt" - -# Go back to orignal directory. -cd "${CURRENT_DIR}" - -python "${WORK_DIR}"/train.py \ - --dataset=davis_2017 \ - --dataset_dir="${DAVIS_DATASET}" \ - --train_logdir="${TRAIN_LOGDIR}" \ - --tf_initial_checkpoint="${INIT_CKPT}" \ - --logtostderr \ - --atrous_rates=6 \ - --atrous_rates=12 \ - --atrous_rates=18 \ - --decoder_output_stride=4 \ - --model_variant=xception_65 \ - --multi_grid=1 \ - --multi_grid=1 \ - --multi_grid=1 \ - --output_stride=16 \ - --weight_decay=0.00004 \ - --num_clones=1 \ - --train_batch_size=1 \ - --train_crop_size=300 \ - --train_crop_size=300 diff --git a/research/feelvos/utils/__init__.py b/research/feelvos/utils/__init__.py deleted file mode 100644 index 6f1373443d0ff84fd90714e41dade400ab41a22c..0000000000000000000000000000000000000000 --- a/research/feelvos/utils/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== diff --git a/research/feelvos/utils/embedding_utils.py b/research/feelvos/utils/embedding_utils.py deleted file mode 100644 index 233c70d9327d08251537c58821dd8405b42f0fe7..0000000000000000000000000000000000000000 --- a/research/feelvos/utils/embedding_utils.py +++ /dev/null @@ -1,1082 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for the instance embedding for segmentation.""" - -import numpy as np -import tensorflow as tf -from deeplab import model -from deeplab.core import preprocess_utils -from feelvos.utils import mask_damaging - -slim = tf.contrib.slim -resolve_shape = preprocess_utils.resolve_shape -WRONG_LABEL_PADDING_DISTANCE = 1e20 - -# With correlation_cost local matching will be much faster. But we provide a -# slow fallback for convenience. -USE_CORRELATION_COST = False -if USE_CORRELATION_COST: - # pylint: disable=g-import-not-at-top - from correlation_cost.python.ops import correlation_cost_op - - -def pairwise_distances(x, y): - """Computes pairwise squared l2 distances between tensors x and y. - - Args: - x: Tensor of shape [n, feature_dim]. - y: Tensor of shape [m, feature_dim]. - - Returns: - Float32 distances tensor of shape [n, m]. - """ - # d[i,j] = (x[i] - y[j]) * (x[i] - y[j])' - # = sum(x[i]^2, 1) + sum(y[j]^2, 1) - 2 * x[i] * y[j]' - xs = tf.reduce_sum(x * x, axis=1)[:, tf.newaxis] - ys = tf.reduce_sum(y * y, axis=1)[tf.newaxis, :] - d = xs + ys - 2 * tf.matmul(x, y, transpose_b=True) - return d - - -def pairwise_distances2(x, y): - """Computes pairwise squared l2 distances between tensors x and y. - - Naive implementation, high memory use. Could be useful to test the more - efficient implementation. - - Args: - x: Tensor of shape [n, feature_dim]. - y: Tensor of shape [m, feature_dim]. - - Returns: - distances of shape [n, m]. - """ - return tf.reduce_sum(tf.squared_difference( - x[:, tf.newaxis], y[tf.newaxis, :]), axis=-1) - - -def cross_correlate(x, y, max_distance=9): - """Efficiently computes the cross correlation of x and y. - - Optimized implementation using correlation_cost. - Note that we do not normalize by the feature dimension. - - Args: - x: Float32 tensor of shape [height, width, feature_dim]. - y: Float32 tensor of shape [height, width, feature_dim]. - max_distance: Integer, the maximum distance in pixel coordinates - per dimension which is considered to be in the search window. - - Returns: - Float32 tensor of shape [height, width, (2 * max_distance + 1) ** 2]. - """ - with tf.name_scope('cross_correlation'): - corr = correlation_cost_op.correlation_cost( - x[tf.newaxis], y[tf.newaxis], kernel_size=1, - max_displacement=max_distance, stride_1=1, stride_2=1, - pad=max_distance) - corr = tf.squeeze(corr, axis=0) - # This correlation implementation takes the mean over the feature_dim, - # but we want sum here, so multiply by feature_dim. - feature_dim = resolve_shape(x)[-1] - corr *= feature_dim - return corr - - -def local_pairwise_distances(x, y, max_distance=9): - """Computes pairwise squared l2 distances using a local search window. - - Optimized implementation using correlation_cost. - - Args: - x: Float32 tensor of shape [height, width, feature_dim]. - y: Float32 tensor of shape [height, width, feature_dim]. - max_distance: Integer, the maximum distance in pixel coordinates - per dimension which is considered to be in the search window. - - Returns: - Float32 distances tensor of shape - [height, width, (2 * max_distance + 1) ** 2]. - """ - with tf.name_scope('local_pairwise_distances'): - # d[i,j] = (x[i] - y[j]) * (x[i] - y[j])' - # = sum(x[i]^2, -1) + sum(y[j]^2, -1) - 2 * x[i] * y[j]' - corr = cross_correlate(x, y, max_distance=max_distance) - xs = tf.reduce_sum(x * x, axis=2)[..., tf.newaxis] - ys = tf.reduce_sum(y * y, axis=2)[..., tf.newaxis] - ones_ys = tf.ones_like(ys) - ys = cross_correlate(ones_ys, ys, max_distance=max_distance) - d = xs + ys - 2 * corr - # Boundary should be set to Inf. - boundary = tf.equal( - cross_correlate(ones_ys, ones_ys, max_distance=max_distance), 0) - d = tf.where(boundary, tf.fill(tf.shape(d), tf.constant(np.float('inf'))), - d) - return d - - -def local_pairwise_distances2(x, y, max_distance=9): - """Computes pairwise squared l2 distances using a local search window. - - Naive implementation using map_fn. - Used as a slow fallback for when correlation_cost is not available. - - Args: - x: Float32 tensor of shape [height, width, feature_dim]. - y: Float32 tensor of shape [height, width, feature_dim]. - max_distance: Integer, the maximum distance in pixel coordinates - per dimension which is considered to be in the search window. - - Returns: - Float32 distances tensor of shape - [height, width, (2 * max_distance + 1) ** 2]. - """ - with tf.name_scope('local_pairwise_distances2'): - padding_val = 1e20 - padded_y = tf.pad(y, [[max_distance, max_distance], - [max_distance, max_distance], [0, 0]], - constant_values=padding_val) - height, width, _ = resolve_shape(x) - dists = [] - for y_start in range(2 * max_distance + 1): - y_end = y_start + height - y_slice = padded_y[y_start:y_end] - for x_start in range(2 * max_distance + 1): - x_end = x_start + width - offset_y = y_slice[:, x_start:x_end] - dist = tf.reduce_sum(tf.squared_difference(x, offset_y), axis=2) - dists.append(dist) - dists = tf.stack(dists, axis=2) - return dists - - -def majority_vote(labels): - """Performs a label majority vote along axis 1. - - Second try, hopefully this time more efficient. - We assume that the labels are contiguous starting from 0. - It will also work for non-contiguous labels, but be inefficient. - - Args: - labels: Int tensor of shape [n, k] - - Returns: - The majority of labels along axis 1 - """ - max_label = tf.reduce_max(labels) - one_hot = tf.one_hot(labels, depth=max_label + 1) - summed = tf.reduce_sum(one_hot, axis=1) - majority = tf.argmax(summed, axis=1) - return majority - - -def assign_labels_by_nearest_neighbors(reference_embeddings, query_embeddings, - reference_labels, k=1): - """Segments by nearest neighbor query wrt the reference frame. - - Args: - reference_embeddings: Tensor of shape [height, width, embedding_dim], - the embedding vectors for the reference frame - query_embeddings: Tensor of shape [n_query_images, height, width, - embedding_dim], the embedding vectors for the query frames - reference_labels: Tensor of shape [height, width, 1], the class labels of - the reference frame - k: Integer, the number of nearest neighbors to use - - Returns: - The labels of the nearest neighbors as [n_query_frames, height, width, 1] - tensor - - Raises: - ValueError: If k < 1. - """ - if k < 1: - raise ValueError('k must be at least 1') - dists = flattened_pairwise_distances(reference_embeddings, query_embeddings) - if k == 1: - nn_indices = tf.argmin(dists, axis=1)[..., tf.newaxis] - else: - _, nn_indices = tf.nn.top_k(-dists, k, sorted=False) - reference_labels = tf.reshape(reference_labels, [-1]) - nn_labels = tf.gather(reference_labels, nn_indices) - if k == 1: - nn_labels = tf.squeeze(nn_labels, axis=1) - else: - nn_labels = majority_vote(nn_labels) - height = tf.shape(reference_embeddings)[0] - width = tf.shape(reference_embeddings)[1] - n_query_frames = query_embeddings.shape[0] - nn_labels = tf.reshape(nn_labels, [n_query_frames, height, width, 1]) - return nn_labels - - -def flattened_pairwise_distances(reference_embeddings, query_embeddings): - """Calculates flattened tensor of pairwise distances between ref and query. - - Args: - reference_embeddings: Tensor of shape [..., embedding_dim], - the embedding vectors for the reference frame - query_embeddings: Tensor of shape [n_query_images, height, width, - embedding_dim], the embedding vectors for the query frames. - - Returns: - A distance tensor of shape [reference_embeddings.size / embedding_dim, - query_embeddings.size / embedding_dim] - """ - embedding_dim = resolve_shape(query_embeddings)[-1] - reference_embeddings = tf.reshape(reference_embeddings, [-1, embedding_dim]) - first_dim = -1 - query_embeddings = tf.reshape(query_embeddings, [first_dim, embedding_dim]) - dists = pairwise_distances(query_embeddings, reference_embeddings) - return dists - - -def nearest_neighbor_features_per_object( - reference_embeddings, query_embeddings, reference_labels, - max_neighbors_per_object, k_nearest_neighbors, gt_ids=None, n_chunks=100): - """Calculates the distance to the nearest neighbor per object. - - For every pixel of query_embeddings calculate the distance to the - nearest neighbor in the (possibly subsampled) reference_embeddings per object. - - Args: - reference_embeddings: Tensor of shape [height, width, embedding_dim], - the embedding vectors for the reference frame. - query_embeddings: Tensor of shape [n_query_images, height, width, - embedding_dim], the embedding vectors for the query frames. - reference_labels: Tensor of shape [height, width, 1], the class labels of - the reference frame. - max_neighbors_per_object: Integer, the maximum number of candidates - for the nearest neighbor query per object after subsampling, - or 0 for no subsampling. - k_nearest_neighbors: Integer, the number of nearest neighbors to use. - gt_ids: Int tensor of shape [n_objs] of the sorted unique ground truth - ids in the first frame. If None, it will be derived from - reference_labels. - n_chunks: Integer, the number of chunks to use to save memory - (set to 1 for no chunking). - - Returns: - nn_features: A float32 tensor of nearest neighbor features of shape - [n_query_images, height, width, n_objects, feature_dim]. - gt_ids: An int32 tensor of the unique sorted object ids present - in the reference labels. - """ - with tf.name_scope('nn_features_per_object'): - reference_labels_flat = tf.reshape(reference_labels, [-1]) - if gt_ids is None: - ref_obj_ids, _ = tf.unique(reference_labels_flat) - ref_obj_ids = tf.contrib.framework.sort(ref_obj_ids) - gt_ids = ref_obj_ids - embedding_dim = resolve_shape(reference_embeddings)[-1] - reference_embeddings_flat = tf.reshape(reference_embeddings, - [-1, embedding_dim]) - - reference_embeddings_flat, reference_labels_flat = ( - subsample_reference_embeddings_and_labels(reference_embeddings_flat, - reference_labels_flat, - gt_ids, - max_neighbors_per_object)) - shape = resolve_shape(query_embeddings) - query_embeddings_flat = tf.reshape(query_embeddings, [-1, embedding_dim]) - nn_features = _nearest_neighbor_features_per_object_in_chunks( - reference_embeddings_flat, query_embeddings_flat, reference_labels_flat, - gt_ids, k_nearest_neighbors, n_chunks) - nn_features_dim = resolve_shape(nn_features)[-1] - nn_features_reshaped = tf.reshape(nn_features, - tf.stack(shape[:3] + [tf.size(gt_ids), - nn_features_dim])) - return nn_features_reshaped, gt_ids - - -def _nearest_neighbor_features_per_object_in_chunks( - reference_embeddings_flat, query_embeddings_flat, reference_labels_flat, - ref_obj_ids, k_nearest_neighbors, n_chunks): - """Calculates the nearest neighbor features per object in chunks to save mem. - - Uses chunking to bound the memory use. - - Args: - reference_embeddings_flat: Tensor of shape [n, embedding_dim], - the embedding vectors for the reference frame. - query_embeddings_flat: Tensor of shape [m, embedding_dim], the embedding - vectors for the query frames. - reference_labels_flat: Tensor of shape [n], the class labels of the - reference frame. - ref_obj_ids: int tensor of unique object ids in the reference labels. - k_nearest_neighbors: Integer, the number of nearest neighbors to use. - n_chunks: Integer, the number of chunks to use to save memory - (set to 1 for no chunking). - - Returns: - nn_features: A float32 tensor of nearest neighbor features of shape - [m, n_objects, feature_dim]. - """ - chunk_size = tf.cast(tf.ceil(tf.cast(tf.shape(query_embeddings_flat)[0], - tf.float32) / n_chunks), tf.int32) - wrong_label_mask = tf.not_equal(reference_labels_flat, - ref_obj_ids[:, tf.newaxis]) - all_features = [] - for n in range(n_chunks): - if n_chunks == 1: - query_embeddings_flat_chunk = query_embeddings_flat - else: - chunk_start = n * chunk_size - chunk_end = (n + 1) * chunk_size - query_embeddings_flat_chunk = query_embeddings_flat[chunk_start:chunk_end] - # Use control dependencies to make sure that the chunks are not processed - # in parallel which would prevent any peak memory savings. - with tf.control_dependencies(all_features): - features = _nn_features_per_object_for_chunk( - reference_embeddings_flat, query_embeddings_flat_chunk, - wrong_label_mask, k_nearest_neighbors - ) - all_features.append(features) - if n_chunks == 1: - nn_features = all_features[0] - else: - nn_features = tf.concat(all_features, axis=0) - return nn_features - - -def _nn_features_per_object_for_chunk( - reference_embeddings, query_embeddings, wrong_label_mask, - k_nearest_neighbors): - """Extracts features for each object using nearest neighbor attention. - - Args: - reference_embeddings: Tensor of shape [n_chunk, embedding_dim], - the embedding vectors for the reference frame. - query_embeddings: Tensor of shape [m_chunk, embedding_dim], the embedding - vectors for the query frames. - wrong_label_mask: - k_nearest_neighbors: Integer, the number of nearest neighbors to use. - - Returns: - nn_features: A float32 tensor of nearest neighbor features of shape - [m_chunk, n_objects, feature_dim]. - """ - reference_embeddings_key = reference_embeddings - query_embeddings_key = query_embeddings - dists = flattened_pairwise_distances(reference_embeddings_key, - query_embeddings_key) - dists = (dists[:, tf.newaxis, :] + - tf.cast(wrong_label_mask[tf.newaxis, :, :], tf.float32) * - WRONG_LABEL_PADDING_DISTANCE) - if k_nearest_neighbors == 1: - features = tf.reduce_min(dists, axis=2, keepdims=True) - else: - # Find the closest k and combine them according to attention_feature_type - dists, _ = tf.nn.top_k(-dists, k=k_nearest_neighbors) - dists = -dists - # If not enough real neighbors were found, pad with the farthest real - # neighbor. - valid_mask = tf.less(dists, WRONG_LABEL_PADDING_DISTANCE) - masked_dists = dists * tf.cast(valid_mask, tf.float32) - pad_dist = tf.tile(tf.reduce_max(masked_dists, axis=2)[..., tf.newaxis], - multiples=[1, 1, k_nearest_neighbors]) - dists = tf.where(valid_mask, dists, pad_dist) - # take mean of distances - features = tf.reduce_mean(dists, axis=2, keepdims=True) - return features - - -def create_embedding_segmentation_features(features, feature_dimension, - n_layers, kernel_size, reuse, - atrous_rates=None): - """Extracts features which can be used to estimate the final segmentation. - - Args: - features: input features of shape [batch, height, width, features] - feature_dimension: Integer, the dimensionality used in the segmentation - head layers. - n_layers: Integer, the number of layers in the segmentation head. - kernel_size: Integer, the kernel size used in the segmentation head. - reuse: reuse mode for the variable_scope. - atrous_rates: List of integers of length n_layers, the atrous rates to use. - - Returns: - Features to be used to estimate the segmentation labels of shape - [batch, height, width, embedding_seg_feat_dim]. - """ - if atrous_rates is None or not atrous_rates: - atrous_rates = [1 for _ in range(n_layers)] - assert len(atrous_rates) == n_layers - with tf.variable_scope('embedding_seg', reuse=reuse): - for n in range(n_layers): - features = model.split_separable_conv2d( - features, feature_dimension, kernel_size=kernel_size, - rate=atrous_rates[n], scope='split_separable_conv2d_{}'.format(n)) - return features - - -def add_image_summaries(images, nn_features, logits, batch_size, - prev_frame_nn_features=None): - """Adds image summaries of input images, attention features and logits. - - Args: - images: Image tensor of shape [batch, height, width, channels]. - nn_features: Nearest neighbor attention features of shape - [batch_size, height, width, n_objects, 1]. - logits: Float32 tensor of logits. - batch_size: Integer, the number of videos per clone per mini-batch. - prev_frame_nn_features: Nearest neighbor attention features wrt. the - last frame of shape [batch_size, height, width, n_objects, 1]. - Can be None. - """ - # Separate reference and query images. - reshaped_images = tf.reshape(images, tf.stack( - [batch_size, -1] + resolve_shape(images)[1:])) - reference_images = reshaped_images[:, 0] - query_images = reshaped_images[:, 1:] - query_images_reshaped = tf.reshape(query_images, tf.stack( - [-1] + resolve_shape(images)[1:])) - tf.summary.image('ref_images', reference_images, max_outputs=batch_size) - tf.summary.image('query_images', query_images_reshaped, max_outputs=10) - predictions = tf.cast( - tf.argmax(logits, axis=-1), tf.uint8)[..., tf.newaxis] - # Scale up so that we can actually see something. - tf.summary.image('predictions', predictions * 32, max_outputs=10) - # We currently only show the first dimension of the features for background - # and the first foreground object. - tf.summary.image('nn_fg_features', nn_features[..., 0:1, 0], - max_outputs=batch_size) - if prev_frame_nn_features is not None: - tf.summary.image('nn_fg_features_prev', prev_frame_nn_features[..., 0:1, 0], - max_outputs=batch_size) - tf.summary.image('nn_bg_features', nn_features[..., 1:2, 0], - max_outputs=batch_size) - if prev_frame_nn_features is not None: - tf.summary.image('nn_bg_features_prev', - prev_frame_nn_features[..., 1:2, 0], - max_outputs=batch_size) - - -def get_embeddings(images, model_options, embedding_dimension): - """Extracts embedding vectors for images. Should only be used for inference. - - Args: - images: A tensor of shape [batch, height, width, channels]. - model_options: A ModelOptions instance to configure models. - embedding_dimension: Integer, the dimension of the embedding. - - Returns: - embeddings: A tensor of shape [batch, height, width, embedding_dimension]. - """ - features, end_points = model.extract_features( - images, - model_options, - is_training=False) - - if model_options.decoder_output_stride is not None: - decoder_output_stride = min(model_options.decoder_output_stride) - if model_options.crop_size is None: - height = tf.shape(images)[1] - width = tf.shape(images)[2] - else: - height, width = model_options.crop_size - features = model.refine_by_decoder( - features, - end_points, - crop_size=[height, width], - decoder_output_stride=[decoder_output_stride], - decoder_use_separable_conv=model_options.decoder_use_separable_conv, - model_variant=model_options.model_variant, - is_training=False) - - with tf.variable_scope('embedding'): - embeddings = split_separable_conv2d_with_identity_initializer( - features, embedding_dimension, scope='split_separable_conv2d') - return embeddings - - -def get_logits_with_matching(images, - model_options, - weight_decay=0.0001, - reuse=None, - is_training=False, - fine_tune_batch_norm=False, - reference_labels=None, - batch_size=None, - num_frames_per_video=None, - embedding_dimension=None, - max_neighbors_per_object=0, - k_nearest_neighbors=1, - use_softmax_feedback=True, - initial_softmax_feedback=None, - embedding_seg_feature_dimension=256, - embedding_seg_n_layers=4, - embedding_seg_kernel_size=7, - embedding_seg_atrous_rates=None, - normalize_nearest_neighbor_distances=True, - also_attend_to_previous_frame=True, - damage_initial_previous_frame_mask=False, - use_local_previous_frame_attention=True, - previous_frame_attention_window_size=15, - use_first_frame_matching=True, - also_return_embeddings=False, - ref_embeddings=None): - """Gets the logits by atrous/image spatial pyramid pooling using attention. - - Args: - images: A tensor of size [batch, height, width, channels]. - model_options: A ModelOptions instance to configure models. - weight_decay: The weight decay for model variables. - reuse: Reuse the model variables or not. - is_training: Is training or not. - fine_tune_batch_norm: Fine-tune the batch norm parameters or not. - reference_labels: The segmentation labels of the reference frame on which - attention is applied. - batch_size: Integer, the number of videos on a batch - num_frames_per_video: Integer, the number of frames per video - embedding_dimension: Integer, the dimension of the embedding - max_neighbors_per_object: Integer, the maximum number of candidates - for the nearest neighbor query per object after subsampling. - Can be 0 for no subsampling. - k_nearest_neighbors: Integer, the number of nearest neighbors to use. - use_softmax_feedback: Boolean, whether to give the softmax predictions of - the last frame as additional input to the segmentation head. - initial_softmax_feedback: List of Float32 tensors, or None. Can be used to - initialize the softmax predictions used for the feedback loop. - Only has an effect if use_softmax_feedback is True. - embedding_seg_feature_dimension: Integer, the dimensionality used in the - segmentation head layers. - embedding_seg_n_layers: Integer, the number of layers in the segmentation - head. - embedding_seg_kernel_size: Integer, the kernel size used in the - segmentation head. - embedding_seg_atrous_rates: List of integers of length - embedding_seg_n_layers, the atrous rates to use for the segmentation head. - normalize_nearest_neighbor_distances: Boolean, whether to normalize the - nearest neighbor distances to [0,1] using sigmoid, scale and shift. - also_attend_to_previous_frame: Boolean, whether to also use nearest - neighbor attention with respect to the previous frame. - damage_initial_previous_frame_mask: Boolean, whether to artificially damage - the initial previous frame mask. Only has an effect if - also_attend_to_previous_frame is True. - use_local_previous_frame_attention: Boolean, whether to restrict the - previous frame attention to a local search window. - Only has an effect, if also_attend_to_previous_frame is True. - previous_frame_attention_window_size: Integer, the window size used for - local previous frame attention, if use_local_previous_frame_attention - is True. - use_first_frame_matching: Boolean, whether to extract features by matching - to the reference frame. This should always be true except for ablation - experiments. - also_return_embeddings: Boolean, whether to return the embeddings as well. - ref_embeddings: Tuple of - (first_frame_embeddings, previous_frame_embeddings), - each of shape [batch, height, width, embedding_dimension], or None. - Returns: - outputs_to_logits: A map from output_type to logits. - If also_return_embeddings is True, it will also return an embeddings - tensor of shape [batch, height, width, embedding_dimension]. - """ - features, end_points = model.extract_features( - images, - model_options, - weight_decay=weight_decay, - reuse=reuse, - is_training=is_training, - fine_tune_batch_norm=fine_tune_batch_norm) - - if model_options.decoder_output_stride: - decoder_output_stride = min(model_options.decoder_output_stride) - if model_options.crop_size is None: - height = tf.shape(images)[1] - width = tf.shape(images)[2] - else: - height, width = model_options.crop_size - decoder_height = model.scale_dimension(height, 1.0 / decoder_output_stride) - decoder_width = model.scale_dimension(width, 1.0 / decoder_output_stride) - features = model.refine_by_decoder( - features, - end_points, - crop_size=[height, width], - decoder_output_stride=[decoder_output_stride], - decoder_use_separable_conv=model_options.decoder_use_separable_conv, - model_variant=model_options.model_variant, - weight_decay=weight_decay, - reuse=reuse, - is_training=is_training, - fine_tune_batch_norm=fine_tune_batch_norm) - - with tf.variable_scope('embedding', reuse=reuse): - embeddings = split_separable_conv2d_with_identity_initializer( - features, embedding_dimension, scope='split_separable_conv2d') - embeddings = tf.identity(embeddings, name='embeddings') - scaled_reference_labels = tf.image.resize_nearest_neighbor( - reference_labels, - resolve_shape(embeddings, 4)[1:3], - align_corners=True) - h, w = decoder_height, decoder_width - if num_frames_per_video is None: - num_frames_per_video = tf.size(embeddings) // ( - batch_size * h * w * embedding_dimension) - new_labels_shape = tf.stack([batch_size, -1, h, w, 1]) - reshaped_reference_labels = tf.reshape(scaled_reference_labels, - new_labels_shape) - new_embeddings_shape = tf.stack([batch_size, - num_frames_per_video, h, w, - embedding_dimension]) - reshaped_embeddings = tf.reshape(embeddings, new_embeddings_shape) - all_nn_features = [] - all_ref_obj_ids = [] - # To keep things simple, we do all this separate for each sequence for now. - for n in range(batch_size): - embedding = reshaped_embeddings[n] - if ref_embeddings is None: - n_chunks = 100 - reference_embedding = embedding[0] - if also_attend_to_previous_frame or use_softmax_feedback: - queries_embedding = embedding[2:] - else: - queries_embedding = embedding[1:] - else: - if USE_CORRELATION_COST: - n_chunks = 20 - else: - n_chunks = 500 - reference_embedding = ref_embeddings[0][n] - queries_embedding = embedding - reference_labels = reshaped_reference_labels[n][0] - nn_features_n, ref_obj_ids = nearest_neighbor_features_per_object( - reference_embedding, queries_embedding, reference_labels, - max_neighbors_per_object, k_nearest_neighbors, n_chunks=n_chunks) - if normalize_nearest_neighbor_distances: - nn_features_n = (tf.nn.sigmoid(nn_features_n) - 0.5) * 2 - all_nn_features.append(nn_features_n) - all_ref_obj_ids.append(ref_obj_ids) - - feat_dim = resolve_shape(features)[-1] - features = tf.reshape(features, tf.stack( - [batch_size, num_frames_per_video, h, w, feat_dim])) - if ref_embeddings is None: - # Strip the features for the reference frame. - if also_attend_to_previous_frame or use_softmax_feedback: - features = features[:, 2:] - else: - features = features[:, 1:] - - # To keep things simple, we do all this separate for each sequence for now. - outputs_to_logits = {output: [] for - output in model_options.outputs_to_num_classes} - for n in range(batch_size): - features_n = features[n] - nn_features_n = all_nn_features[n] - nn_features_n_tr = tf.transpose(nn_features_n, [3, 0, 1, 2, 4]) - n_objs = tf.shape(nn_features_n_tr)[0] - # Repeat features for every object. - features_n_tiled = tf.tile(features_n[tf.newaxis], - multiples=[n_objs, 1, 1, 1, 1]) - prev_frame_labels = None - if also_attend_to_previous_frame: - prev_frame_labels = reshaped_reference_labels[n, 1] - if is_training and damage_initial_previous_frame_mask: - # Damage the previous frame masks. - prev_frame_labels = mask_damaging.damage_masks(prev_frame_labels, - dilate=False) - tf.summary.image('prev_frame_labels', - tf.cast(prev_frame_labels[tf.newaxis], - tf.uint8) * 32) - initial_softmax_feedback_n = create_initial_softmax_from_labels( - prev_frame_labels, reshaped_reference_labels[n][0], - decoder_output_stride=None, reduce_labels=True) - elif initial_softmax_feedback is not None: - initial_softmax_feedback_n = initial_softmax_feedback[n] - else: - initial_softmax_feedback_n = None - if initial_softmax_feedback_n is None: - last_softmax = tf.zeros((n_objs, h, w, 1), dtype=tf.float32) - else: - last_softmax = tf.transpose(initial_softmax_feedback_n, [2, 0, 1])[ - ..., tf.newaxis] - assert len(model_options.outputs_to_num_classes) == 1 - output = model_options.outputs_to_num_classes.keys()[0] - logits = [] - n_ref_frames = 1 - prev_frame_nn_features_n = None - if also_attend_to_previous_frame or use_softmax_feedback: - n_ref_frames += 1 - if ref_embeddings is not None: - n_ref_frames = 0 - for t in range(num_frames_per_video - n_ref_frames): - to_concat = [features_n_tiled[:, t]] - if use_first_frame_matching: - to_concat.append(nn_features_n_tr[:, t]) - if use_softmax_feedback: - to_concat.append(last_softmax) - if also_attend_to_previous_frame: - assert normalize_nearest_neighbor_distances, ( - 'previous frame attention currently only works when normalized ' - 'distances are used') - embedding = reshaped_embeddings[n] - if ref_embeddings is None: - last_frame_embedding = embedding[t + 1] - query_embeddings = embedding[t + 2, tf.newaxis] - else: - last_frame_embedding = ref_embeddings[1][0] - query_embeddings = embedding - if use_local_previous_frame_attention: - assert query_embeddings.shape[0] == 1 - prev_frame_nn_features_n = ( - local_previous_frame_nearest_neighbor_features_per_object( - last_frame_embedding, - query_embeddings[0], - prev_frame_labels, - all_ref_obj_ids[n], - max_distance=previous_frame_attention_window_size) - ) - else: - prev_frame_nn_features_n, _ = ( - nearest_neighbor_features_per_object( - last_frame_embedding, query_embeddings, prev_frame_labels, - max_neighbors_per_object, k_nearest_neighbors, - gt_ids=all_ref_obj_ids[n])) - prev_frame_nn_features_n = (tf.nn.sigmoid( - prev_frame_nn_features_n) - 0.5) * 2 - prev_frame_nn_features_n_sq = tf.squeeze(prev_frame_nn_features_n, - axis=0) - prev_frame_nn_features_n_tr = tf.transpose( - prev_frame_nn_features_n_sq, [2, 0, 1, 3]) - to_concat.append(prev_frame_nn_features_n_tr) - features_n_concat_t = tf.concat(to_concat, axis=-1) - embedding_seg_features_n_t = ( - create_embedding_segmentation_features( - features_n_concat_t, embedding_seg_feature_dimension, - embedding_seg_n_layers, embedding_seg_kernel_size, - reuse or n > 0, atrous_rates=embedding_seg_atrous_rates)) - logits_t = model.get_branch_logits( - embedding_seg_features_n_t, - 1, - model_options.atrous_rates, - aspp_with_batch_norm=model_options.aspp_with_batch_norm, - kernel_size=model_options.logits_kernel_size, - weight_decay=weight_decay, - reuse=reuse or n > 0 or t > 0, - scope_suffix=output - ) - logits.append(logits_t) - prev_frame_labels = tf.transpose(tf.argmax(logits_t, axis=0), - [2, 0, 1]) - last_softmax = tf.nn.softmax(logits_t, axis=0) - logits = tf.stack(logits, axis=1) - logits_shape = tf.stack( - [n_objs, num_frames_per_video - n_ref_frames] + - resolve_shape(logits)[2:-1]) - logits_reshaped = tf.reshape(logits, logits_shape) - logits_transposed = tf.transpose(logits_reshaped, [1, 2, 3, 0]) - outputs_to_logits[output].append(logits_transposed) - - add_image_summaries( - images[n * num_frames_per_video: (n+1) * num_frames_per_video], - nn_features_n, - logits_transposed, - batch_size=1, - prev_frame_nn_features=prev_frame_nn_features_n) - if also_return_embeddings: - return outputs_to_logits, embeddings - else: - return outputs_to_logits - - -def subsample_reference_embeddings_and_labels( - reference_embeddings_flat, reference_labels_flat, ref_obj_ids, - max_neighbors_per_object): - """Subsamples the reference embedding vectors and labels. - - After subsampling, at most max_neighbors_per_object items will remain per - class. - - Args: - reference_embeddings_flat: Tensor of shape [n, embedding_dim], - the embedding vectors for the reference frame. - reference_labels_flat: Tensor of shape [n, 1], - the class labels of the reference frame. - ref_obj_ids: An int32 tensor of the unique object ids present - in the reference labels. - max_neighbors_per_object: Integer, the maximum number of candidates - for the nearest neighbor query per object after subsampling, - or 0 for no subsampling. - - Returns: - reference_embeddings_flat: Tensor of shape [n_sub, embedding_dim], - the subsampled embedding vectors for the reference frame. - reference_labels_flat: Tensor of shape [n_sub, 1], - the class labels of the reference frame. - """ - if max_neighbors_per_object == 0: - return reference_embeddings_flat, reference_labels_flat - same_label_mask = tf.equal(reference_labels_flat[tf.newaxis, :], - ref_obj_ids[:, tf.newaxis]) - max_neighbors_per_object_repeated = tf.tile( - tf.constant(max_neighbors_per_object)[tf.newaxis], - multiples=[tf.size(ref_obj_ids)]) - # Somehow map_fn on GPU caused trouble sometimes, so let's put it on CPU - # for now. - with tf.device('cpu:0'): - subsampled_indices = tf.map_fn(_create_subsampling_mask, - (same_label_mask, - max_neighbors_per_object_repeated), - dtype=tf.int64, - name='subsample_labels_map_fn', - parallel_iterations=1) - mask = tf.not_equal(subsampled_indices, tf.constant(-1, dtype=tf.int64)) - masked_indices = tf.boolean_mask(subsampled_indices, mask) - reference_embeddings_flat = tf.gather(reference_embeddings_flat, - masked_indices) - reference_labels_flat = tf.gather(reference_labels_flat, masked_indices) - return reference_embeddings_flat, reference_labels_flat - - -def _create_subsampling_mask(args): - """Creates boolean mask which can be used to subsample the labels. - - Args: - args: tuple of (label_mask, max_neighbors_per_object), where label_mask - is the mask to be subsampled and max_neighbors_per_object is a int scalar, - the maximum number of neighbors to be retained after subsampling. - - Returns: - The boolean mask for subsampling the labels. - """ - label_mask, max_neighbors_per_object = args - indices = tf.squeeze(tf.where(label_mask), axis=1) - shuffled_indices = tf.random_shuffle(indices) - subsampled_indices = shuffled_indices[:max_neighbors_per_object] - n_pad = max_neighbors_per_object - tf.size(subsampled_indices) - padded_label = -1 - padding = tf.fill((n_pad,), tf.constant(padded_label, dtype=tf.int64)) - padded = tf.concat([subsampled_indices, padding], axis=0) - return padded - - -def conv2d_identity_initializer(scale=1.0, mean=0, stddev=3e-2): - """Creates an identity initializer for TensorFlow conv2d. - - We add a small amount of normal noise to the initialization matrix. - Code copied from lcchen@. - - Args: - scale: The scale coefficient for the identity weight matrix. - mean: A 0-D Tensor or Python value of type `dtype`. The mean of the - truncated normal distribution. - stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation - of the truncated normal distribution. - - Returns: - An identity initializer function for TensorFlow conv2d. - """ - def _initializer(shape, dtype=tf.float32, partition_info=None): - """Returns the identity matrix scaled by `scale`. - - Args: - shape: A tuple of int32 numbers indicating the shape of the initializing - matrix. - dtype: The data type of the initializing matrix. - partition_info: (Optional) variable_scope._PartitionInfo object holding - additional information about how the variable is partitioned. This input - is not used in our case, but is required by TensorFlow. - - Returns: - A identity matrix. - - Raises: - ValueError: If len(shape) != 4, or shape[0] != shape[1], or shape[0] is - not odd, or shape[1] is not odd.. - """ - del partition_info - if len(shape) != 4: - raise ValueError('Expect shape length to be 4.') - if shape[0] != shape[1]: - raise ValueError('Expect shape[0] = shape[1].') - if shape[0] % 2 != 1: - raise ValueError('Expect shape[0] to be odd value.') - if shape[1] % 2 != 1: - raise ValueError('Expect shape[1] to be odd value.') - weights = np.zeros(shape, dtype=np.float32) - center_y = shape[0] / 2 - center_x = shape[1] / 2 - min_channel = min(shape[2], shape[3]) - for i in range(min_channel): - weights[center_y, center_x, i, i] = scale - return tf.constant(weights, dtype=dtype) + tf.truncated_normal( - shape, mean=mean, stddev=stddev, dtype=dtype) - - return _initializer - - -def split_separable_conv2d_with_identity_initializer( - inputs, - filters, - kernel_size=3, - rate=1, - weight_decay=0.00004, - scope=None): - """Splits a separable conv2d into depthwise and pointwise conv2d. - - This operation differs from `tf.layers.separable_conv2d` as this operation - applies activation function between depthwise and pointwise conv2d. - - Args: - inputs: Input tensor with shape [batch, height, width, channels]. - filters: Number of filters in the 1x1 pointwise convolution. - kernel_size: A list of length 2: [kernel_height, kernel_width] of - of the filters. Can be an int if both values are the same. - rate: Atrous convolution rate for the depthwise convolution. - weight_decay: The weight decay to use for regularizing the model. - scope: Optional scope for the operation. - - Returns: - Computed features after split separable conv2d. - """ - initializer = conv2d_identity_initializer() - outputs = slim.separable_conv2d( - inputs, - None, - kernel_size=kernel_size, - depth_multiplier=1, - rate=rate, - weights_initializer=initializer, - weights_regularizer=None, - scope=scope + '_depthwise') - return slim.conv2d( - outputs, - filters, - 1, - weights_initializer=initializer, - weights_regularizer=slim.l2_regularizer(weight_decay), - scope=scope + '_pointwise') - - -def create_initial_softmax_from_labels(last_frame_labels, reference_labels, - decoder_output_stride, reduce_labels): - """Creates initial softmax predictions from last frame labels. - - Args: - last_frame_labels: last frame labels of shape [1, height, width, 1]. - reference_labels: reference frame labels of shape [1, height, width, 1]. - decoder_output_stride: Integer, the stride of the decoder. Can be None, in - this case it's assumed that the last_frame_labels and reference_labels - are already scaled to the decoder output resolution. - reduce_labels: Boolean, whether to reduce the depth of the softmax one_hot - encoding to the actual number of labels present in the reference frame - (otherwise the depth will be the highest label index + 1). - - Returns: - init_softmax: the initial softmax predictions. - """ - if decoder_output_stride is None: - labels_output_size = last_frame_labels - reference_labels_output_size = reference_labels - else: - h = tf.shape(last_frame_labels)[1] - w = tf.shape(last_frame_labels)[2] - h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) - w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) - labels_output_size = tf.image.resize_nearest_neighbor( - last_frame_labels, [h_sub, w_sub], align_corners=True) - reference_labels_output_size = tf.image.resize_nearest_neighbor( - reference_labels, [h_sub, w_sub], align_corners=True) - if reduce_labels: - unique_labels, _ = tf.unique(tf.reshape(reference_labels_output_size, [-1])) - depth = tf.size(unique_labels) - else: - depth = tf.reduce_max(reference_labels_output_size) + 1 - one_hot_assertion = tf.assert_less(tf.reduce_max(labels_output_size), depth) - with tf.control_dependencies([one_hot_assertion]): - init_softmax = tf.one_hot(tf.squeeze(labels_output_size, - axis=-1), - depth=depth, - dtype=tf.float32) - return init_softmax - - -def local_previous_frame_nearest_neighbor_features_per_object( - prev_frame_embedding, query_embedding, prev_frame_labels, - gt_ids, max_distance=9): - """Computes nearest neighbor features while only allowing local matches. - - Args: - prev_frame_embedding: Tensor of shape [height, width, embedding_dim], - the embedding vectors for the last frame. - query_embedding: Tensor of shape [height, width, embedding_dim], - the embedding vectors for the query frames. - prev_frame_labels: Tensor of shape [height, width, 1], the class labels of - the previous frame. - gt_ids: Int Tensor of shape [n_objs] of the sorted unique ground truth - ids in the first frame. - max_distance: Integer, the maximum distance allowed for local matching. - - Returns: - nn_features: A float32 np.array of nearest neighbor features of shape - [1, height, width, n_objects, 1]. - """ - with tf.name_scope( - 'local_previous_frame_nearest_neighbor_features_per_object'): - if USE_CORRELATION_COST: - tf.logging.info('Using correlation_cost.') - d = local_pairwise_distances(query_embedding, prev_frame_embedding, - max_distance=max_distance) - else: - # Slow fallback in case correlation_cost is not available. - tf.logging.warn('correlation cost is not available, using slow fallback ' - 'implementation.') - d = local_pairwise_distances2(query_embedding, prev_frame_embedding, - max_distance=max_distance) - d = (tf.nn.sigmoid(d) - 0.5) * 2 - height = tf.shape(prev_frame_embedding)[0] - width = tf.shape(prev_frame_embedding)[1] - - # Create offset versions of the mask. - if USE_CORRELATION_COST: - # New, faster code with cross-correlation via correlation_cost. - # Due to padding we have to add 1 to the labels. - offset_labels = correlation_cost_op.correlation_cost( - tf.ones((1, height, width, 1)), - tf.cast(prev_frame_labels + 1, tf.float32)[tf.newaxis], - kernel_size=1, - max_displacement=max_distance, stride_1=1, stride_2=1, - pad=max_distance) - offset_labels = tf.squeeze(offset_labels, axis=0)[..., tf.newaxis] - # Subtract the 1 again and round. - offset_labels = tf.round(offset_labels - 1) - offset_masks = tf.equal( - offset_labels, - tf.cast(gt_ids, tf.float32)[tf.newaxis, tf.newaxis, tf.newaxis, :]) - else: - # Slower code, without dependency to correlation_cost - masks = tf.equal(prev_frame_labels, gt_ids[tf.newaxis, tf.newaxis]) - padded_masks = tf.pad(masks, - [[max_distance, max_distance], - [max_distance, max_distance], - [0, 0]]) - offset_masks = [] - for y_start in range(2 * max_distance + 1): - y_end = y_start + height - masks_slice = padded_masks[y_start:y_end] - for x_start in range(2 * max_distance + 1): - x_end = x_start + width - offset_mask = masks_slice[:, x_start:x_end] - offset_masks.append(offset_mask) - offset_masks = tf.stack(offset_masks, axis=2) - - pad = tf.ones((height, width, (2 * max_distance + 1) ** 2, tf.size(gt_ids))) - d_tiled = tf.tile(d[..., tf.newaxis], multiples=(1, 1, 1, tf.size(gt_ids))) - d_masked = tf.where(offset_masks, d_tiled, pad) - dists = tf.reduce_min(d_masked, axis=2) - dists = tf.reshape(dists, (1, height, width, tf.size(gt_ids), 1)) - return dists diff --git a/research/feelvos/utils/embedding_utils_test.py b/research/feelvos/utils/embedding_utils_test.py deleted file mode 100644 index ddebd7b4e7fcc9402887ebf59d247fea815d6cda..0000000000000000000000000000000000000000 --- a/research/feelvos/utils/embedding_utils_test.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for embedding utils.""" - -import unittest -import numpy as np -import tensorflow as tf -from feelvos.utils import embedding_utils - -if embedding_utils.USE_CORRELATION_COST: - # pylint: disable=g-import-not-at-top - from correlation_cost.python.ops import correlation_cost_op - - -class EmbeddingUtilsTest(tf.test.TestCase): - - def test_pairwise_distances(self): - x = np.arange(100, dtype=np.float32).reshape(20, 5) - y = np.arange(100, 200, dtype=np.float32).reshape(20, 5) - g = tf.Graph() - with g.as_default(): - with self.test_session(graph=g) as sess: - x = tf.constant(x) - y = tf.constant(y) - d1 = embedding_utils.pairwise_distances(x, y) - d2 = embedding_utils.pairwise_distances2(x, y) - d1_val, d2_val = sess.run([d1, d2]) - self.assertAllClose(d1_val, d2_val) - - @unittest.skipIf(not embedding_utils.USE_CORRELATION_COST, - 'depends on correlation_cost') - def test_correlation_cost_one_dimensional(self): - a = np.array([[[[1.0], [2.0]], [[3.0], [4.0]]]]) - b = np.array([[[[2.0], [1.0]], [[4.0], [3.0]]]]) - g = tf.Graph() - with g.as_default(): - with self.test_session(graph=g) as sess: - c = correlation_cost_op.correlation_cost( - a, b, kernel_size=1, max_displacement=1, stride_1=1, stride_2=1, - pad=1) - c = tf.squeeze(c, axis=0) - c_val = sess.run(c) - self.assertAllEqual(c_val.shape, (2, 2, 9)) - for y in range(2): - for x in range(2): - for dy in range(-1, 2): - for dx in range(-1, 2): - a_slice = a[0, y, x, 0] - if y + dy < 0 or y + dy > 1 or x + dx < 0 or x + dx > 1: - b_slice = 0 - else: - b_slice = b[0, y + dy, x + dx, 0] - expected = a_slice * b_slice - dy0 = dy + 1 - dx0 = dx + 1 - self.assertAlmostEqual(c_val[y, x, 3 * dy0 + dx0], expected) - - @unittest.skipIf(not embedding_utils.USE_CORRELATION_COST, - 'depends on correlation_cost') - def test_correlation_cost_two_dimensional(self): - a = np.array([[[[1.0, -5.0], [7.0, 2.0]], [[1.0, 3.0], [3.0, 4.0]]]]) - b = np.array([[[[2.0, 1.0], [0.0, -9.0]], [[4.0, 3.0], [3.0, 1.0]]]]) - g = tf.Graph() - with g.as_default(): - with self.test_session(graph=g) as sess: - c = correlation_cost_op.correlation_cost( - a, b, kernel_size=1, max_displacement=1, stride_1=1, stride_2=1, - pad=1) - c = tf.squeeze(c, axis=0) - c_val = sess.run(c) - self.assertAllEqual(c_val.shape, (2, 2, 9)) - for y in range(2): - for x in range(2): - for dy in range(-1, 2): - for dx in range(-1, 2): - a_slice = a[0, y, x, :] - if y + dy < 0 or y + dy > 1 or x + dx < 0 or x + dx > 1: - b_slice = 0 - else: - b_slice = b[0, y + dy, x + dx, :] - expected = (a_slice * b_slice).mean() - dy0 = dy + 1 - dx0 = dx + 1 - self.assertAlmostEqual(c_val[y, x, 3 * dy0 + dx0], expected) - - @unittest.skipIf(not embedding_utils.USE_CORRELATION_COST, - 'depends on correlation_cost') - def test_local_pairwise_distances_one_dimensional(self): - a = np.array([[[1.0], [2.0]], [[3.0], [4.0]]]) - b = np.array([[[2.0], [1.0]], [[4.0], [3.0]]]) - g = tf.Graph() - with g.as_default(): - with self.test_session(graph=g) as sess: - a_tf = tf.constant(a, dtype=tf.float32) - b_tf = tf.constant(b, dtype=tf.float32) - d = embedding_utils.local_pairwise_distances(a_tf, b_tf, - max_distance=1) - d_val = sess.run(d) - for y in range(2): - for x in range(2): - for dy in range(-1, 2): - for dx in range(-1, 2): - a_slice = a[y, x, 0] - if y + dy < 0 or y + dy > 1 or x + dx < 0 or x + dx > 1: - expected = np.float('inf') - else: - b_slice = b[y + dy, x + dx, 0] - expected = (a_slice - b_slice) ** 2 - dy0 = dy + 1 - dx0 = dx + 1 - self.assertAlmostEqual(d_val[y, x, 3 * dy0 + dx0], expected) - - @unittest.skipIf(not embedding_utils.USE_CORRELATION_COST, - 'depends on correlation_cost') - def test_local_pairwise_distances_shape(self): - a = np.zeros((4, 5, 2)) - b = np.zeros((4, 5, 2)) - g = tf.Graph() - with g.as_default(): - with self.test_session(graph=g) as sess: - a_tf = tf.constant(a, dtype=tf.float32) - b_tf = tf.constant(b, dtype=tf.float32) - d = embedding_utils.local_pairwise_distances(a_tf, b_tf, max_distance=4) - d_val = sess.run(d) - self.assertAllEqual(d_val.shape, (4, 5, 81)) - - @unittest.skipIf(not embedding_utils.USE_CORRELATION_COST, - 'depends on correlation_cost') - def test_local_pairwise_distances_two_dimensional(self): - a = np.array([[[1.0, -5.0], [7.0, 2.0]], [[1.0, 3.0], [3.0, 4.0]]]) - b = np.array([[[2.0, 1.0], [0.0, -9.0]], [[4.0, 3.0], [3.0, 1.0]]]) - g = tf.Graph() - with g.as_default(): - with self.test_session(graph=g) as sess: - a_tf = tf.constant(a, dtype=tf.float32) - b_tf = tf.constant(b, dtype=tf.float32) - d = embedding_utils.local_pairwise_distances(a_tf, b_tf, - max_distance=1) - d_val = sess.run(d) - for y in range(2): - for x in range(2): - for dy in range(-1, 2): - for dx in range(-1, 2): - a_slice = a[y, x, :] - if y + dy < 0 or y + dy > 1 or x + dx < 0 or x + dx > 1: - expected = np.float('inf') - else: - b_slice = b[y + dy, x + dx, :] - expected = ((a_slice - b_slice) ** 2).sum() - dy0 = dy + 1 - dx0 = dx + 1 - self.assertAlmostEqual(d_val[y, x, 3 * dy0 + dx0], expected) - - @unittest.skipIf(not embedding_utils.USE_CORRELATION_COST, - 'depends on correlation_cost') - def test_local_previous_frame_nearest_neighbor_features_per_object(self): - prev_frame_embedding = np.array([[[1.0, -5.0], [7.0, 2.0]], - [[1.0, 3.0], [3.0, 4.0]]]) / 10 - query_embedding = np.array([[[2.0, 1.0], [0.0, -9.0]], - [[4.0, 3.0], [3.0, 1.0]]]) / 10 - prev_frame_labels = np.array([[[0], [1]], [[1], [0]]]) - gt_ids = np.array([0, 1]) - g = tf.Graph() - with g.as_default(): - with self.test_session(graph=g) as sess: - prev_frame_embedding_tf = tf.constant(prev_frame_embedding, - dtype=tf.float32) - query_embedding_tf = tf.constant(query_embedding, dtype=tf.float32) - embu = embedding_utils - dists = ( - embu.local_previous_frame_nearest_neighbor_features_per_object( - prev_frame_embedding_tf, query_embedding_tf, - prev_frame_labels, gt_ids, max_distance=1)) - dists = tf.squeeze(dists, axis=4) - dists = tf.squeeze(dists, axis=0) - dists_val = sess.run(dists) - for obj_id in gt_ids: - for y in range(2): - for x in range(2): - curr_min = 1.0 - for dy in range(-1, 2): - for dx in range(-1, 2): - # Attention: here we shift the prev frame embedding, - # not the query. - if y + dy < 0 or y + dy > 1 or x + dx < 0 or x + dx > 1: - continue - if prev_frame_labels[y + dy, x + dx, 0] != obj_id: - continue - prev_frame_slice = prev_frame_embedding[y + dy, x + dx, :] - query_frame_slice = query_embedding[y, x, :] - v_unnorm = ((prev_frame_slice - query_frame_slice) ** 2).sum() - v = ((1.0 / (1.0 + np.exp(-v_unnorm))) - 0.5) * 2 - curr_min = min(curr_min, v) - expected = curr_min - self.assertAlmostEqual(dists_val[y, x, obj_id], expected, - places=5) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/feelvos/utils/eval_utils.py b/research/feelvos/utils/eval_utils.py deleted file mode 100644 index 517ec0d788eb3a6ec48246e10920dd4b55332bf5..0000000000000000000000000000000000000000 --- a/research/feelvos/utils/eval_utils.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for evaluations.""" - -import numpy as np -import PIL -import tensorflow as tf - -pascal_colormap = [ - 0, 0, 0, - 0.5020, 0, 0, - 0, 0.5020, 0, - 0.5020, 0.5020, 0, - 0, 0, 0.5020, - 0.5020, 0, 0.5020, - 0, 0.5020, 0.5020, - 0.5020, 0.5020, 0.5020, - 0.2510, 0, 0, - 0.7529, 0, 0, - 0.2510, 0.5020, 0, - 0.7529, 0.5020, 0, - 0.2510, 0, 0.5020, - 0.7529, 0, 0.5020, - 0.2510, 0.5020, 0.5020, - 0.7529, 0.5020, 0.5020, - 0, 0.2510, 0, - 0.5020, 0.2510, 0, - 0, 0.7529, 0, - 0.5020, 0.7529, 0, - 0, 0.2510, 0.5020, - 0.5020, 0.2510, 0.5020, - 0, 0.7529, 0.5020, - 0.5020, 0.7529, 0.5020, - 0.2510, 0.2510, 0] - - -def save_segmentation_with_colormap(filename, img): - """Saves a segmentation with the pascal colormap as expected for DAVIS eval. - - Args: - filename: Where to store the segmentation. - img: A numpy array of the segmentation to be saved. - """ - if img.shape[-1] == 1: - img = img[..., 0] - - # Save with colormap. - colormap = (np.array(pascal_colormap) * 255).round().astype('uint8') - colormap_image = PIL.Image.new('P', (16, 16)) - colormap_image.putpalette(colormap) - pil_image = PIL.Image.fromarray(img.astype('uint8')) - pil_image_with_colormap = pil_image.quantize(palette=colormap_image) - with tf.gfile.GFile(filename, 'w') as f: - pil_image_with_colormap.save(f) - - -def save_embeddings(filename, embeddings): - with tf.gfile.GFile(filename, 'w') as f: - np.save(f, embeddings) - - -def calculate_iou(pred_labels, ref_labels): - """Calculates the intersection over union for binary segmentation. - - Args: - pred_labels: predicted segmentation labels. - ref_labels: reference segmentation labels. - - Returns: - The IoU between pred_labels and ref_labels - """ - if ref_labels.any(): - i = np.logical_and(pred_labels, ref_labels).sum() - u = np.logical_or(pred_labels, ref_labels).sum() - return i.astype('float') / u - else: - if pred_labels.any(): - return 0.0 - else: - return 1.0 - - -def calculate_multi_object_miou_tf(pred_labels, ref_labels): - """Calculates the mIoU for a batch of predicted and reference labels. - - Args: - pred_labels: Int32 tensor of shape [batch, height, width, 1]. - ref_labels: Int32 tensor of shape [batch, height, width, 1]. - - Returns: - The mIoU between pred_labels and ref_labels as float32 scalar tensor. - """ - - def calculate_multi_object_miou(pred_labels_, ref_labels_): - """Calculates the mIoU for predicted and reference labels in numpy. - - Args: - pred_labels_: int32 np.array of shape [batch, height, width, 1]. - ref_labels_: int32 np.array of shape [batch, height, width, 1]. - - Returns: - The mIoU between pred_labels_ and ref_labels_. - """ - assert len(pred_labels_.shape) == 4 - assert pred_labels_.shape[3] == 1 - assert pred_labels_.shape == ref_labels_.shape - ious = [] - for pred_label, ref_label in zip(pred_labels_, ref_labels_): - ids = np.setdiff1d(np.unique(ref_label), [0]) - if ids.size == 0: - continue - for id_ in ids: - iou = calculate_iou(pred_label == id_, ref_label == id_) - ious.append(iou) - if ious: - return np.cast['float32'](np.mean(ious)) - else: - return np.cast['float32'](1.0) - - miou = tf.py_func(calculate_multi_object_miou, [pred_labels, ref_labels], - tf.float32, name='calculate_multi_object_miou') - miou.set_shape(()) - return miou - - -def calculate_multi_object_ious(pred_labels, ref_labels, label_set): - """Calculates the intersection over union for binary segmentation. - - Args: - pred_labels: predicted segmentation labels. - ref_labels: reference segmentation labels. - label_set: int np.array of object ids. - - Returns: - float np.array of IoUs between pred_labels and ref_labels - for each object in label_set. - """ - # Background should not be included as object label. - return np.array([calculate_iou(pred_labels == label, ref_labels == label) - for label in label_set if label != 0]) diff --git a/research/feelvos/utils/mask_damaging.py b/research/feelvos/utils/mask_damaging.py deleted file mode 100644 index 74f3cdab5a0e4374f0cd238544a9a582fd0eef92..0000000000000000000000000000000000000000 --- a/research/feelvos/utils/mask_damaging.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for artificially damaging segmentation masks.""" - -import numpy as np -from scipy.ndimage import interpolation -from skimage import morphology -from skimage import transform -import tensorflow as tf - - -def damage_masks(labels, shift=True, scale=True, rotate=True, dilate=True): - """Damages segmentation masks by random transformations. - - Args: - labels: Int32 labels tensor of shape (height, width, 1). - shift: Boolean, whether to damage the masks by shifting. - scale: Boolean, whether to damage the masks by scaling. - rotate: Boolean, whether to damage the masks by rotation. - dilate: Boolean, whether to damage the masks by dilation. - - Returns: - The damaged version of labels. - """ - def _damage_masks_np(labels_): - return damage_masks_np(labels_, shift, scale, rotate, dilate) - damaged_masks = tf.py_func(_damage_masks_np, [labels], tf.int32, - name='damage_masks') - damaged_masks.set_shape(labels.get_shape()) - return damaged_masks - - -def damage_masks_np(labels, shift=True, scale=True, rotate=True, dilate=True): - """Performs the actual mask damaging in numpy. - - Args: - labels: Int32 numpy array of shape (height, width, 1). - shift: Boolean, whether to damage the masks by shifting. - scale: Boolean, whether to damage the masks by scaling. - rotate: Boolean, whether to damage the masks by rotation. - dilate: Boolean, whether to damage the masks by dilation. - - Returns: - The damaged version of labels. - """ - unique_labels = np.unique(labels) - unique_labels = np.setdiff1d(unique_labels, [0]) - # Shuffle to get random depth ordering when combining together. - np.random.shuffle(unique_labels) - damaged_labels = np.zeros_like(labels) - for l in unique_labels: - obj_mask = (labels == l) - damaged_obj_mask = _damage_single_object_mask(obj_mask, shift, scale, - rotate, dilate) - damaged_labels[damaged_obj_mask] = l - return damaged_labels - - -def _damage_single_object_mask(mask, shift, scale, rotate, dilate): - """Performs mask damaging in numpy for a single object. - - Args: - mask: Boolean numpy array of shape(height, width, 1). - shift: Boolean, whether to damage the masks by shifting. - scale: Boolean, whether to damage the masks by scaling. - rotate: Boolean, whether to damage the masks by rotation. - dilate: Boolean, whether to damage the masks by dilation. - - Returns: - The damaged version of mask. - """ - # For now we just do shifting and scaling. Better would be Affine or thin - # spline plate transformations. - if shift: - mask = _shift_mask(mask) - if scale: - mask = _scale_mask(mask) - if rotate: - mask = _rotate_mask(mask) - if dilate: - mask = _dilate_mask(mask) - return mask - - -def _shift_mask(mask, max_shift_factor=0.05): - """Damages a mask for a single object by randomly shifting it in numpy. - - Args: - mask: Boolean numpy array of shape(height, width, 1). - max_shift_factor: Float scalar, the maximum factor for random shifting. - - Returns: - The shifted version of mask. - """ - nzy, nzx, _ = mask.nonzero() - h = nzy.max() - nzy.min() - w = nzx.max() - nzx.min() - size = np.sqrt(h * w) - offset = np.random.uniform(-size * max_shift_factor, size * max_shift_factor, - 2) - shifted_mask = interpolation.shift(np.squeeze(mask, axis=2), - offset, order=0).astype('bool')[..., - np.newaxis] - return shifted_mask - - -def _scale_mask(mask, scale_amount=0.025): - """Damages a mask for a single object by randomly scaling it in numpy. - - Args: - mask: Boolean numpy array of shape(height, width, 1). - scale_amount: Float scalar, the maximum factor for random scaling. - - Returns: - The scaled version of mask. - """ - nzy, nzx, _ = mask.nonzero() - cy = 0.5 * (nzy.max() - nzy.min()) - cx = 0.5 * (nzx.max() - nzx.min()) - scale_factor = np.random.uniform(1.0 - scale_amount, 1.0 + scale_amount) - shift = transform.SimilarityTransform(translation=[-cx, -cy]) - inv_shift = transform.SimilarityTransform(translation=[cx, cy]) - s = transform.SimilarityTransform(scale=[scale_factor, scale_factor]) - m = (shift + (s + inv_shift)).inverse - scaled_mask = transform.warp(mask, m) > 0.5 - return scaled_mask - - -def _rotate_mask(mask, max_rot_degrees=3.0): - """Damages a mask for a single object by randomly rotating it in numpy. - - Args: - mask: Boolean numpy array of shape(height, width, 1). - max_rot_degrees: Float scalar, the maximum number of degrees to rotate. - - Returns: - The scaled version of mask. - """ - cy = 0.5 * mask.shape[0] - cx = 0.5 * mask.shape[1] - rot_degrees = np.random.uniform(-max_rot_degrees, max_rot_degrees) - shift = transform.SimilarityTransform(translation=[-cx, -cy]) - inv_shift = transform.SimilarityTransform(translation=[cx, cy]) - r = transform.SimilarityTransform(rotation=np.deg2rad(rot_degrees)) - m = (shift + (r + inv_shift)).inverse - scaled_mask = transform.warp(mask, m) > 0.5 - return scaled_mask - - -def _dilate_mask(mask, dilation_radius=5): - """Damages a mask for a single object by dilating it in numpy. - - Args: - mask: Boolean numpy array of shape(height, width, 1). - dilation_radius: Integer, the radius of the used disk structure element. - - Returns: - The dilated version of mask. - """ - disk = morphology.disk(dilation_radius, dtype=np.bool) - dilated_mask = morphology.binary_dilation( - np.squeeze(mask, axis=2), selem=disk)[..., np.newaxis] - return dilated_mask diff --git a/research/feelvos/utils/train_utils.py b/research/feelvos/utils/train_utils.py deleted file mode 100644 index 02a04cd33645931c5c795bef8559c0d3f5c4c23c..0000000000000000000000000000000000000000 --- a/research/feelvos/utils/train_utils.py +++ /dev/null @@ -1,269 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for training.""" -import collections -import six -import tensorflow as tf - -from deeplab.core import preprocess_utils -from deeplab.utils import train_utils -from feelvos.utils import embedding_utils -from feelvos.utils import eval_utils - -slim = tf.contrib.slim -add_softmax_cross_entropy_loss_for_each_scale = ( - train_utils.add_softmax_cross_entropy_loss_for_each_scale) -get_model_gradient_multipliers = train_utils.get_model_gradient_multipliers -get_model_learning_rate = train_utils.get_model_learning_rate -resolve_shape = preprocess_utils.resolve_shape - - -def add_triplet_loss_for_each_scale(batch_size, num_frames_per_video, - embedding_dim, scales_to_embeddings, - labels, scope): - """Adds triplet loss for logits of each scale. - - Args: - batch_size: Int, the number of video chunks sampled per batch - num_frames_per_video: Int, the number of frames per video. - embedding_dim: Int, the dimension of the learned embedding - scales_to_embeddings: A map from embedding names for different scales to - embeddings. The embeddings have shape [batch, embeddings_height, - embeddings_width, embedding_dim]. - labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. - scope: String, the scope for the loss. - - Raises: - ValueError: labels is None. - """ - if labels is None: - raise ValueError('No label for triplet loss.') - for scale, embeddings in scales_to_embeddings.iteritems(): - loss_scope = None - if scope: - loss_scope = '%s_%s' % (scope, scale) - # Label is downsampled to the same size as logits. - scaled_labels = tf.image.resize_nearest_neighbor( - labels, - resolve_shape(embeddings, 4)[1:3], - align_corners=True) - # Reshape from [batch * num_frames, ...] to [batch, num_frames, ...]. - h = tf.shape(embeddings)[1] - w = tf.shape(embeddings)[2] - new_labels_shape = tf.stack([batch_size, num_frames_per_video, h, w, 1]) - reshaped_labels = tf.reshape(scaled_labels, new_labels_shape) - new_embeddings_shape = tf.stack([batch_size, num_frames_per_video, h, w, - -1]) - reshaped_embeddings = tf.reshape(embeddings, new_embeddings_shape) - - with tf.name_scope(loss_scope): - total_loss = tf.constant(0, dtype=tf.float32) - for n in range(batch_size): - embedding = reshaped_embeddings[n] - label = reshaped_labels[n] - n_pixels = h * w - n_anchors_used = 256 - sampled_anchor_indices = tf.random_shuffle(tf.range(n_pixels))[ - :n_anchors_used] - anchors_pool = tf.reshape(embedding[0], [-1, embedding_dim]) - anchors_pool_classes = tf.reshape(label[0], [-1]) - anchors = tf.gather(anchors_pool, sampled_anchor_indices) - anchor_classes = tf.gather(anchors_pool_classes, sampled_anchor_indices) - - pos_neg_pool = tf.reshape(embedding[1:], [-1, embedding_dim]) - pos_neg_pool_classes = tf.reshape(label[1:], [-1]) - dists = embedding_utils.pairwise_distances(anchors, pos_neg_pool) - pos_mask = tf.equal(anchor_classes[:, tf.newaxis], - pos_neg_pool_classes[tf.newaxis, :]) - neg_mask = tf.logical_not(pos_mask) - pos_mask_f = tf.cast(pos_mask, tf.float32) - neg_mask_f = tf.cast(neg_mask, tf.float32) - pos_dists = pos_mask_f * dists + 1e20 * neg_mask_f - neg_dists = neg_mask_f * dists + 1e20 * pos_mask_f - pos_dists_min = tf.reduce_min(pos_dists, axis=1) - neg_dists_min = tf.reduce_min(neg_dists, axis=1) - margin = 1.0 - loss = tf.nn.relu(pos_dists_min - neg_dists_min + margin) - # Handle case that no positive is present (per anchor). - any_pos = tf.reduce_any(pos_mask, axis=1) - loss *= tf.cast(any_pos, tf.float32) - # Average over anchors - loss = tf.reduce_mean(loss, axis=0) - total_loss += loss - total_loss /= batch_size - # Scale the loss up a bit. - total_loss *= 3.0 - tf.add_to_collection(tf.GraphKeys.LOSSES, total_loss) - - -def add_dynamic_softmax_cross_entropy_loss_for_each_scale( - scales_to_logits, labels, ignore_label, loss_weight=1.0, - upsample_logits=True, scope=None, top_k_percent_pixels=1.0, - hard_example_mining_step=100000): - """Adds softmax cross entropy loss per scale for logits with varying classes. - - Also adds summaries for mIoU. - - Args: - scales_to_logits: A map from logits names for different scales to logits. - The logits are a list of length batch_size of tensors of shape - [time, logits_height, logits_width, num_classes]. - labels: Groundtruth labels with shape [batch_size * time, image_height, - image_width, 1]. - ignore_label: Integer, label to ignore. - loss_weight: Float, loss weight. - upsample_logits: Boolean, upsample logits or not. - scope: String, the scope for the loss. - top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its - value < 1.0, only compute the loss for the top k percent pixels (e.g., - the top 20% pixels). This is useful for hard pixel mining. - hard_example_mining_step: An integer, the training step in which the - hard exampling mining kicks off. Note that we gradually reduce the - mining percent to the top_k_percent_pixels. For example, if - hard_example_mining_step=100K and top_k_percent_pixels=0.25, then - mining percent will gradually reduce from 100% to 25% until 100K steps - after which we only mine top 25% pixels. - - Raises: - ValueError: Label or logits is None. - """ - if labels is None: - raise ValueError('No label for softmax cross entropy loss.') - - if top_k_percent_pixels < 0 or top_k_percent_pixels > 1: - raise ValueError('Unexpected value of top_k_percent_pixels.') - - for scale, logits in six.iteritems(scales_to_logits): - loss_scope = None - if scope: - loss_scope = '%s_%s' % (scope, scale) - - if upsample_logits: - # Label is not downsampled, and instead we upsample logits. - assert isinstance(logits, collections.Sequence) - logits = [tf.image.resize_bilinear( - x, - preprocess_utils.resolve_shape(labels, 4)[1:3], - align_corners=True) for x in logits] - scaled_labels = labels - else: - # Label is downsampled to the same size as logits. - assert isinstance(logits, collections.Sequence) - scaled_labels = tf.image.resize_nearest_neighbor( - labels, - preprocess_utils.resolve_shape(logits[0], 4)[1:3], - align_corners=True) - - batch_size = len(logits) - num_time = preprocess_utils.resolve_shape(logits[0])[0] - reshaped_labels = tf.reshape( - scaled_labels, ([batch_size, num_time] + - preprocess_utils.resolve_shape(scaled_labels)[1:])) - for n, logits_n in enumerate(logits): - labels_n = reshaped_labels[n] - labels_n = tf.reshape(labels_n, shape=[-1]) - not_ignore_mask = tf.to_float(tf.not_equal(labels_n, - ignore_label)) * loss_weight - num_classes_n = tf.shape(logits_n)[-1] - one_hot_labels = slim.one_hot_encoding( - labels_n, num_classes_n, on_value=1.0, off_value=0.0) - logits_n_flat = tf.reshape(logits_n, shape=[-1, num_classes_n]) - if top_k_percent_pixels == 1.0: - tf.losses.softmax_cross_entropy( - one_hot_labels, - logits_n_flat, - weights=not_ignore_mask, - scope=loss_scope) - else: - # Only compute the loss for top k percent pixels. - # First, compute the loss for all pixels. Note we do not put the loss - # to loss_collection and set reduction = None to keep the shape. - num_pixels = tf.to_float(tf.shape(logits_n_flat)[0]) - pixel_losses = tf.losses.softmax_cross_entropy( - one_hot_labels, - logits_n_flat, - weights=not_ignore_mask, - scope='pixel_losses', - loss_collection=None, - reduction=tf.losses.Reduction.NONE) - # Compute the top_k_percent pixels based on current training step. - if hard_example_mining_step == 0: - # Directly focus on the top_k pixels. - top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels) - else: - # Gradually reduce the mining percent to top_k_percent_pixels. - global_step = tf.to_float(tf.train.get_or_create_global_step()) - ratio = tf.minimum(1.0, global_step / hard_example_mining_step) - top_k_pixels = tf.to_int32( - (ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels) - _, top_k_indices = tf.nn.top_k(pixel_losses, - k=top_k_pixels, - sorted=True, - name='top_k_percent_pixels') - # Compute the loss for the top k percent pixels. - tf.losses.softmax_cross_entropy( - tf.gather(one_hot_labels, top_k_indices), - tf.gather(logits_n_flat, top_k_indices), - weights=tf.gather(not_ignore_mask, top_k_indices), - scope=loss_scope) - - pred_n = tf.argmax(logits_n, axis=-1, output_type=tf.int32)[ - ..., tf.newaxis] - labels_n = labels[n * num_time: (n + 1) * num_time] - miou = eval_utils.calculate_multi_object_miou_tf(pred_n, labels_n) - tf.summary.scalar('miou', miou) - - -def get_model_init_fn(train_logdir, - tf_initial_checkpoint, - initialize_last_layer, - last_layers, - ignore_missing_vars=False): - """Gets the function initializing model variables from a checkpoint. - - Args: - train_logdir: Log directory for training. - tf_initial_checkpoint: TensorFlow checkpoint for initialization. - initialize_last_layer: Initialize last layer or not. - last_layers: Last layers of the model. - ignore_missing_vars: Ignore missing variables in the checkpoint. - - Returns: - Initialization function. - """ - if tf_initial_checkpoint is None: - tf.logging.info('Not initializing the model from a checkpoint.') - return None - - if tf.train.latest_checkpoint(train_logdir): - tf.logging.info('Ignoring initialization; other checkpoint exists') - return None - - tf.logging.info('Initializing model from path: %s', tf_initial_checkpoint) - - # Variables that will not be restored. - exclude_list = ['global_step'] - if not initialize_last_layer: - exclude_list.extend(last_layers) - - variables_to_restore = slim.get_variables_to_restore(exclude=exclude_list) - - if variables_to_restore: - return slim.assign_from_checkpoint_fn( - tf_initial_checkpoint, - variables_to_restore, - ignore_missing_vars=ignore_missing_vars) - return None diff --git a/research/feelvos/utils/video_input_generator.py b/research/feelvos/utils/video_input_generator.py deleted file mode 100644 index c0135e50110c677865217c8a3f13d1d1d891f0b2..0000000000000000000000000000000000000000 --- a/research/feelvos/utils/video_input_generator.py +++ /dev/null @@ -1,558 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Wrapper for providing semantic segmentation video data.""" - -import tensorflow as tf -from feelvos import input_preprocess -from feelvos import model -from feelvos.utils import mask_damaging -from feelvos.utils import train_utils - -slim = tf.contrib.slim -dataset_data_provider = slim.dataset_data_provider - - -MIN_LABEL_COUNT = 10 - - -def decode_image_sequence(tensor, image_format='jpeg', shape=None, - channels=3, raw_dtype=tf.uint8): - """Decodes a sequence of images. - - Args: - tensor: the tensor of strings to decode, shape: [num_images] - image_format: a string (possibly tensor) with the format of the image. - Options include 'jpeg', 'png', and 'raw'. - shape: a list or tensor of the decoded image shape for a single image. - channels: if 'shape' is None, the third dimension of the image is set to - this value. - raw_dtype: if the image is encoded as raw bytes, this is the method of - decoding the bytes into values. - Returns: - The decoded images with shape [time, height, width, channels]. - """ - handler = slim.tfexample_decoder.Image( - shape=shape, channels=channels, dtype=raw_dtype, repeated=True) - return handler.tensors_to_item({'image/encoded': tensor, - 'image/format': image_format}) - - -def _get_data(data_provider, dataset_split, video_frames_are_decoded): - """Gets data from data provider. - - Args: - data_provider: An object of slim.data_provider. - dataset_split: Dataset split. - video_frames_are_decoded: Boolean, whether the video frames are already - decoded - - Returns: - image: Image Tensor. - label: Label Tensor storing segmentation annotations. - object_label: An integer refers to object_label according to labelmap. If - the example has more than one object_label, take the first one. - image_name: Image name. - height: Image height. - width: Image width. - video_id: String tensor representing the name of the video. - - Raises: - ValueError: Failed to find label. - """ - - if video_frames_are_decoded: - image, = data_provider.get(['image']) - else: - image, = data_provider.get(['image/encoded']) - - # Some datasets do not contain image_name. - if 'image_name' in data_provider.list_items(): - image_name, = data_provider.get(['image_name']) - else: - image_name = tf.constant('') - - height, width = data_provider.get(['height', 'width']) - - label = None - if dataset_split != 'test': - if video_frames_are_decoded: - if 'labels_class' not in data_provider.list_items(): - raise ValueError('Failed to find labels.') - label, = data_provider.get(['labels_class']) - else: - key = 'segmentation/object/encoded' - if key not in data_provider.list_items(): - raise ValueError('Failed to find labels.') - label, = data_provider.get([key]) - - object_label = None - video_id, = data_provider.get(['video_id']) - - return image, label, object_label, image_name, height, width, video_id - - -def _has_foreground_and_background_in_first_frame(label, subsampling_factor): - """Checks if the labels have foreground and background in the first frame. - - Args: - label: Label tensor of shape [num_frames, height, width, 1]. - subsampling_factor: Integer, the subsampling factor. - - Returns: - Boolean, whether the labels have foreground and background in the first - frame. - """ - h, w = train_utils.resolve_shape(label)[1:3] - label_downscaled = tf.squeeze( - tf.image.resize_nearest_neighbor(label[0, tf.newaxis], - [h // subsampling_factor, - w // subsampling_factor], - align_corners=True), - axis=0) - is_bg = tf.equal(label_downscaled, 0) - is_fg = tf.logical_not(is_bg) - # Just using reduce_any was not robust enough, so lets make sure the count - # is above MIN_LABEL_COUNT. - fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) - bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) - has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) - has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) - return tf.logical_and(has_bg, has_fg) - - -def _has_foreground_and_background_in_first_frame_2(label, - decoder_output_stride): - """Checks if the labels have foreground and background in the first frame. - - Second attempt, this time we use the actual output dimension for resizing. - - Args: - label: Label tensor of shape [num_frames, height, width, 1]. - decoder_output_stride: Integer, the stride of the decoder output. - - Returns: - Boolean, whether the labels have foreground and background in the first - frame. - """ - h, w = train_utils.resolve_shape(label)[1:3] - h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) - w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) - label_downscaled = tf.squeeze( - tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub], - align_corners=True), axis=0) - is_bg = tf.equal(label_downscaled, 0) - is_fg = tf.logical_not(is_bg) - # Just using reduce_any was not robust enough, so lets make sure the count - # is above MIN_LABEL_COUNT. - fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32)) - bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32)) - has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT) - has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT) - return tf.logical_and(has_bg, has_fg) - - -def _has_enough_pixels_of_each_object_in_first_frame( - label, decoder_output_stride): - """Checks if for each object (incl. background) enough pixels are visible. - - During test time, we will usually not see a reference frame in which only - very few pixels of one object are visible. These cases can be problematic - during training, especially if more than the 1-nearest neighbor is used. - That's why this function can be used to detect and filter these cases. - - Args: - label: Label tensor of shape [num_frames, height, width, 1]. - decoder_output_stride: Integer, the stride of the decoder output. - - Returns: - Boolean, whether the labels have enough pixels of each object in the first - frame. - """ - h, w = train_utils.resolve_shape(label)[1:3] - h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride) - w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride) - label_downscaled = tf.squeeze( - tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub], - align_corners=True), axis=0) - _, _, counts = tf.unique_with_counts( - tf.reshape(label_downscaled, [-1])) - has_enough_pixels_per_object = tf.reduce_all( - tf.greater_equal(counts, MIN_LABEL_COUNT)) - return has_enough_pixels_per_object - - -def get(dataset, - num_frames_per_video, - crop_size, - batch_size, - min_resize_value=None, - max_resize_value=None, - resize_factor=None, - min_scale_factor=1., - max_scale_factor=1., - scale_factor_step_size=0, - preprocess_image_and_label=True, - num_readers=1, - num_threads=1, - dataset_split=None, - is_training=True, - model_variant=None, - batch_capacity_factor=32, - video_frames_are_decoded=False, - decoder_output_stride=None, - first_frame_finetuning=False, - sample_only_first_frame_for_finetuning=False, - sample_adjacent_and_consistent_query_frames=False, - remap_labels_to_reference_frame=True, - generate_prev_frame_mask_by_mask_damaging=False, - three_frame_dataset=False, - add_prev_frame_label=True): - """Gets the dataset split for semantic segmentation. - - This functions gets the dataset split for semantic segmentation. In - particular, it is a wrapper of (1) dataset_data_provider which returns the raw - dataset split, (2) input_preprcess which preprocess the raw data, and (3) the - Tensorflow operation of batching the preprocessed data. Then, the output could - be directly used by training, evaluation or visualization. - - Args: - dataset: An instance of slim Dataset. - num_frames_per_video: The number of frames used per video - crop_size: Image crop size [height, width]. - batch_size: Batch size. - min_resize_value: Desired size of the smaller image side. - max_resize_value: Maximum allowed size of the larger image side. - resize_factor: Resized dimensions are multiple of factor plus one. - min_scale_factor: Minimum scale factor value. - max_scale_factor: Maximum scale factor value. - scale_factor_step_size: The step size from min scale factor to max scale - factor. The input is randomly scaled based on the value of - (min_scale_factor, max_scale_factor, scale_factor_step_size). - preprocess_image_and_label: Boolean variable specifies if preprocessing of - image and label will be performed or not. - num_readers: Number of readers for data provider. - num_threads: Number of threads for batching data. - dataset_split: Dataset split. - is_training: Is training or not. - model_variant: Model variant (string) for choosing how to mean-subtract the - images. See feature_extractor.network_map for supported model variants. - batch_capacity_factor: Batch capacity factor affecting the training queue - batch capacity. - video_frames_are_decoded: Boolean, whether the video frames are already - decoded - decoder_output_stride: Integer, the stride of the decoder output. - first_frame_finetuning: Boolean, whether to only sample the first frame - for fine-tuning. - sample_only_first_frame_for_finetuning: Boolean, whether to only sample the - first frame during fine-tuning. This should be False when using lucid or - wonderland data, but true when fine-tuning on the first frame only. - Only has an effect if first_frame_finetuning is True. - sample_adjacent_and_consistent_query_frames: Boolean, if true, the query - frames (all but the first frame which is the reference frame) will be - sampled such that they are adjacent video frames and have the same - crop coordinates and flip augmentation. - remap_labels_to_reference_frame: Boolean, whether to remap the labels of - the query frames to match the labels of the (downscaled) reference frame. - If a query frame contains a label which is not present in the reference, - it will be mapped to background. - generate_prev_frame_mask_by_mask_damaging: Boolean, whether to generate - the masks used as guidance from the previous frame by damaging the - ground truth mask. - three_frame_dataset: Boolean, whether the dataset has exactly three frames - per video of which the first is to be used as reference and the two - others are consecutive frames to be used as query frames. - add_prev_frame_label: Boolean, whether to sample one more frame before the - first query frame to obtain a previous frame label. Only has an effect, - if sample_adjacent_and_consistent_query_frames is True and - generate_prev_frame_mask_by_mask_damaging is False. - - Returns: - A dictionary of batched Tensors for semantic segmentation. - - Raises: - ValueError: dataset_split is None, or Failed to find labels. - """ - if dataset_split is None: - raise ValueError('Unknown dataset split.') - if model_variant is None: - tf.logging.warning('Please specify a model_variant. See ' - 'feature_extractor.network_map for supported model ' - 'variants.') - - data_provider = dataset_data_provider.DatasetDataProvider( - dataset, - num_readers=num_readers, - num_epochs=None if is_training else 1, - shuffle=is_training) - image, label, object_label, image_name, height, width, video_id = _get_data( - data_provider, dataset_split, video_frames_are_decoded) - - sampling_is_valid = tf.constant(True) - if num_frames_per_video is not None: - total_num_frames = tf.shape(image)[0] - if first_frame_finetuning or three_frame_dataset: - if sample_only_first_frame_for_finetuning: - assert not sample_adjacent_and_consistent_query_frames, ( - 'this option does not make sense for sampling only first frame.') - # Sample the first frame num_frames_per_video times. - sel_indices = tf.tile(tf.constant(0, dtype=tf.int32)[tf.newaxis], - multiples=[num_frames_per_video]) - else: - if sample_adjacent_and_consistent_query_frames: - if add_prev_frame_label: - num_frames_per_video += 1 - # Since this is first frame fine-tuning, we'll for now assume that - # each sequence has exactly 3 images: the ref frame and 2 adjacent - # query frames. - assert num_frames_per_video == 3 - with tf.control_dependencies([tf.assert_equal(total_num_frames, 3)]): - sel_indices = tf.constant([1, 2], dtype=tf.int32) - else: - # Sample num_frames_per_video - 1 query frames which are not the - # first frame. - sel_indices = tf.random_shuffle( - tf.range(1, total_num_frames))[:(num_frames_per_video - 1)] - # Concat first frame as reference frame to the front. - sel_indices = tf.concat([tf.constant(0, dtype=tf.int32)[tf.newaxis], - sel_indices], axis=0) - else: - if sample_adjacent_and_consistent_query_frames: - if add_prev_frame_label: - # Sample one more frame which we can use to provide initial softmax - # feedback. - num_frames_per_video += 1 - ref_idx = tf.random_shuffle(tf.range(total_num_frames))[0] - sampling_is_valid = tf.greater_equal(total_num_frames, - num_frames_per_video) - def sample_query_start_idx(): - return tf.random_shuffle( - tf.range(total_num_frames - num_frames_per_video + 1))[0] - query_start_idx = tf.cond(sampling_is_valid, sample_query_start_idx, - lambda: tf.constant(0, dtype=tf.int32)) - def sample_sel_indices(): - return tf.concat( - [ref_idx[tf.newaxis], - tf.range( - query_start_idx, - query_start_idx + (num_frames_per_video - 1))], axis=0) - sel_indices = tf.cond( - sampling_is_valid, sample_sel_indices, - lambda: tf.zeros((num_frames_per_video,), dtype=tf.int32)) - else: - # Randomly sample some frames from the video. - sel_indices = tf.random_shuffle( - tf.range(total_num_frames))[:num_frames_per_video] - image = tf.gather(image, sel_indices, axis=0) - if not video_frames_are_decoded: - image = decode_image_sequence(image) - - if label is not None: - if num_frames_per_video is not None: - label = tf.gather(label, sel_indices, axis=0) - if not video_frames_are_decoded: - label = decode_image_sequence(label, image_format='png', channels=1) - - # Sometimes, label is saved as [num_frames_per_video, height, width] or - # [num_frames_per_video, height, width, 1]. We change it to be - # [num_frames_per_video, height, width, 1]. - if label.shape.ndims == 3: - label = tf.expand_dims(label, 3) - elif label.shape.ndims == 4 and label.shape.dims[3] == 1: - pass - else: - raise ValueError('Input label shape must be ' - '[num_frames_per_video, height, width],' - ' or [num_frames, height, width, 1]. ' - 'Got {}'.format(label.shape.ndims)) - label.set_shape([None, None, None, 1]) - - # Add size of first dimension since tf can't figure it out automatically. - image.set_shape((num_frames_per_video, None, None, None)) - if label is not None: - label.set_shape((num_frames_per_video, None, None, None)) - - preceding_frame_label = None - if preprocess_image_and_label: - if num_frames_per_video is None: - raise ValueError('num_frame_per_video must be specified for preproc.') - original_images = [] - images = [] - labels = [] - if sample_adjacent_and_consistent_query_frames: - num_frames_individual_preproc = 1 - else: - num_frames_individual_preproc = num_frames_per_video - for frame_idx in range(num_frames_individual_preproc): - original_image_t, image_t, label_t = ( - input_preprocess.preprocess_image_and_label( - image[frame_idx], - label[frame_idx], - crop_height=crop_size[0] if crop_size is not None else None, - crop_width=crop_size[1] if crop_size is not None else None, - min_resize_value=min_resize_value, - max_resize_value=max_resize_value, - resize_factor=resize_factor, - min_scale_factor=min_scale_factor, - max_scale_factor=max_scale_factor, - scale_factor_step_size=scale_factor_step_size, - ignore_label=dataset.ignore_label, - is_training=is_training, - model_variant=model_variant)) - original_images.append(original_image_t) - images.append(image_t) - labels.append(label_t) - if sample_adjacent_and_consistent_query_frames: - imgs_for_preproc = [image[frame_idx] for frame_idx in - range(1, num_frames_per_video)] - labels_for_preproc = [label[frame_idx] for frame_idx in - range(1, num_frames_per_video)] - original_image_rest, image_rest, label_rest = ( - input_preprocess.preprocess_images_and_labels_consistently( - imgs_for_preproc, - labels_for_preproc, - crop_height=crop_size[0] if crop_size is not None else None, - crop_width=crop_size[1] if crop_size is not None else None, - min_resize_value=min_resize_value, - max_resize_value=max_resize_value, - resize_factor=resize_factor, - min_scale_factor=min_scale_factor, - max_scale_factor=max_scale_factor, - scale_factor_step_size=scale_factor_step_size, - ignore_label=dataset.ignore_label, - is_training=is_training, - model_variant=model_variant)) - original_images.extend(original_image_rest) - images.extend(image_rest) - labels.extend(label_rest) - assert len(original_images) == num_frames_per_video - assert len(images) == num_frames_per_video - assert len(labels) == num_frames_per_video - - if remap_labels_to_reference_frame: - # Remap labels to indices into the labels of the (downscaled) reference - # frame, or 0, i.e. background, for labels which are not present - # in the reference. - reference_labels = labels[0][tf.newaxis] - h, w = train_utils.resolve_shape(reference_labels)[1:3] - embedding_height = model.scale_dimension( - h, 1.0 / decoder_output_stride) - embedding_width = model.scale_dimension( - w, 1.0 / decoder_output_stride) - reference_labels_embedding_size = tf.squeeze( - tf.image.resize_nearest_neighbor( - reference_labels, tf.stack([embedding_height, embedding_width]), - align_corners=True), - axis=0) - # Get sorted unique labels in the reference frame. - labels_in_ref_frame, _ = tf.unique( - tf.reshape(reference_labels_embedding_size, [-1])) - labels_in_ref_frame = tf.contrib.framework.sort(labels_in_ref_frame) - for idx in range(1, len(labels)): - ref_label_mask = tf.equal( - labels[idx], - labels_in_ref_frame[tf.newaxis, tf.newaxis, :]) - remapped = tf.argmax(tf.cast(ref_label_mask, tf.uint8), axis=-1, - output_type=tf.int32) - # Set to 0 if label is not present - is_in_ref = tf.reduce_any(ref_label_mask, axis=-1) - remapped *= tf.cast(is_in_ref, tf.int32) - labels[idx] = remapped[..., tf.newaxis] - - if sample_adjacent_and_consistent_query_frames: - if first_frame_finetuning and generate_prev_frame_mask_by_mask_damaging: - preceding_frame_label = mask_damaging.damage_masks(labels[1]) - elif add_prev_frame_label: - # Discard the image of the additional frame and take the label as - # initialization for softmax feedback. - original_images = [original_images[0]] + original_images[2:] - preceding_frame_label = labels[1] - images = [images[0]] + images[2:] - labels = [labels[0]] + labels[2:] - num_frames_per_video -= 1 - - original_image = tf.stack(original_images, axis=0) - image = tf.stack(images, axis=0) - label = tf.stack(labels, axis=0) - else: - if label is not None: - # Need to set label shape due to batching. - label.set_shape([num_frames_per_video, - None if crop_size is None else crop_size[0], - None if crop_size is None else crop_size[1], - 1]) - original_image = tf.to_float(tf.zeros_like(label)) - if crop_size is None: - height = tf.shape(image)[1] - width = tf.shape(image)[2] - else: - height = crop_size[0] - width = crop_size[1] - - sample = {'image': image, - 'image_name': image_name, - 'height': height, - 'width': width, - 'video_id': video_id} - if label is not None: - sample['label'] = label - - if object_label is not None: - sample['object_label'] = object_label - - if preceding_frame_label is not None: - sample['preceding_frame_label'] = preceding_frame_label - - if not is_training: - # Original image is only used during visualization. - sample['original_image'] = original_image - - if is_training: - if first_frame_finetuning: - keep_input = tf.constant(True) - else: - keep_input = tf.logical_and(sampling_is_valid, tf.logical_and( - _has_enough_pixels_of_each_object_in_first_frame( - label, decoder_output_stride), - _has_foreground_and_background_in_first_frame_2( - label, decoder_output_stride))) - - batched = tf.train.maybe_batch(sample, - keep_input=keep_input, - batch_size=batch_size, - num_threads=num_threads, - capacity=batch_capacity_factor * batch_size, - dynamic_pad=True) - else: - batched = tf.train.batch(sample, - batch_size=batch_size, - num_threads=num_threads, - capacity=batch_capacity_factor * batch_size, - dynamic_pad=True) - - # Flatten from [batch, num_frames_per_video, ...] to - # batch * num_frames_per_video, ...]. - cropped_height = train_utils.resolve_shape(batched['image'])[2] - cropped_width = train_utils.resolve_shape(batched['image'])[3] - if num_frames_per_video is None: - first_dim = -1 - else: - first_dim = batch_size * num_frames_per_video - batched['image'] = tf.reshape(batched['image'], - [first_dim, cropped_height, cropped_width, 3]) - if label is not None: - batched['label'] = tf.reshape(batched['label'], - [first_dim, cropped_height, cropped_width, 1]) - return batched diff --git a/research/feelvos/vis_video.py b/research/feelvos/vis_video.py deleted file mode 100644 index 211bccf52acdef83aca298285fc473748126de02..0000000000000000000000000000000000000000 --- a/research/feelvos/vis_video.py +++ /dev/null @@ -1,500 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Segmentation results evaluation and visualization for videos using attention. -""" - -import math -import os -import time -import numpy as np - -import tensorflow as tf - -from feelvos import common -from feelvos import model -from feelvos.datasets import video_dataset -from feelvos.utils import embedding_utils -from feelvos.utils import eval_utils -from feelvos.utils import video_input_generator - - -slim = tf.contrib.slim -flags = tf.app.flags -FLAGS = flags.FLAGS - -flags.DEFINE_integer('eval_interval_secs', 60 * 5, - 'How often (in seconds) to run evaluation.') - -flags.DEFINE_string('master', '', 'BNS name of the tensorflow server') - -flags.DEFINE_integer('vis_batch_size', 1, - 'The number of images in each batch during evaluation.') - -flags.DEFINE_string('vis_logdir', None, 'Where to write the event logs.') - -flags.DEFINE_string('checkpoint_dir', None, 'Directory of model checkpoints.') - -flags.DEFINE_integer('output_stride', 8, - 'The ratio of input to output spatial resolution.') - -flags.DEFINE_string('dataset', 'davis_2016', - 'Name of the segmentation dataset.') - -flags.DEFINE_string('vis_split', 'val', - 'Which split of the dataset used for visualizing results') - -flags.DEFINE_string( - 'dataset_dir', - '/cns/is-d/home/lcchen/data/pascal_voc_seg/example_sstables', - 'Where the dataset resides.') - -flags.DEFINE_integer('num_vis_examples', -1, - 'Number of examples for visualization. If -1, use all ' - 'samples in the vis data.') - -flags.DEFINE_multi_integer('atrous_rates', None, - 'Atrous rates for atrous spatial pyramid pooling.') - -flags.DEFINE_bool('save_segmentations', False, 'Whether to save the ' - 'segmentation masks as ' - 'png images. Might be slow ' - 'on cns.') - -flags.DEFINE_bool('save_embeddings', False, 'Whether to save the embeddings as' - 'pickle. Might be slow on cns.') - -flags.DEFINE_bool('eval_once_and_quit', False, - 'Whether to just run the eval a single time and quit ' - 'afterwards. Otherwise, the eval is run in a loop with ' - 'new checkpoints.') - -flags.DEFINE_boolean('first_frame_finetuning', False, - 'Whether to only sample the first frame for fine-tuning.') - -# the folder where segmentations are saved. -_SEGMENTATION_SAVE_FOLDER = 'segmentation' -_EMBEDDINGS_SAVE_FOLDER = 'embeddings' - - -def _process_seq_data(segmentation_dir, embeddings_dir, seq_name, - predicted_labels, gt_labels, embeddings): - """Calculates the sequence IoU and optionally save the segmentation masks. - - Args: - segmentation_dir: Directory in which the segmentation results are stored. - embeddings_dir: Directory in which the embeddings are stored. - seq_name: String, the name of the sequence. - predicted_labels: Int64 np.array of shape [n_frames, height, width]. - gt_labels: Ground truth labels, Int64 np.array of shape - [n_frames, height, width]. - embeddings: Float32 np.array of embeddings of shape - [n_frames, decoder_height, decoder_width, embedding_dim], or None. - - Returns: - The IoU for the sequence (float). - """ - sequence_dir = os.path.join(segmentation_dir, seq_name) - tf.gfile.MakeDirs(sequence_dir) - embeddings_seq_dir = os.path.join(embeddings_dir, seq_name) - tf.gfile.MakeDirs(embeddings_seq_dir) - label_set = np.unique(gt_labels[0]) - ious = [] - assert len(predicted_labels) == len(gt_labels) - if embeddings is not None: - assert len(predicted_labels) == len(embeddings) - for t, (predicted_label, gt_label) in enumerate( - zip(predicted_labels, gt_labels)): - if FLAGS.save_segmentations: - seg_filename = os.path.join(segmentation_dir, seq_name, '%05d.png' % t) - eval_utils.save_segmentation_with_colormap(seg_filename, predicted_label) - if FLAGS.save_embeddings: - embedding_filename = os.path.join(embeddings_dir, seq_name, - '%05d.npy' % t) - assert embeddings is not None - eval_utils.save_embeddings(embedding_filename, embeddings[t]) - object_ious_t = eval_utils.calculate_multi_object_ious( - predicted_label, gt_label, label_set) - ious.append(object_ious_t) - # First and last frame are excluded in DAVIS eval. - seq_ious = np.mean(ious[1:-1], axis=0) - tf.logging.info('seq ious: %s %s', seq_name, seq_ious) - return seq_ious - - -def create_predictions(samples, reference_labels, first_frame_img, - model_options): - """Predicts segmentation labels for each frame of the video. - - Slower version than create_predictions_fast, but does support more options. - - Args: - samples: Dictionary of input samples. - reference_labels: Int tensor of shape [1, height, width, 1]. - first_frame_img: Float32 tensor of shape [height, width, 3]. - model_options: An InternalModelOptions instance to configure models. - - Returns: - predicted_labels: Int tensor of shape [time, height, width] of - predicted labels for each frame. - all_embeddings: Float32 tensor of shape - [time, height, width, embedding_dim], or None. - """ - - def predict(args, imgs): - """Predicts segmentation labels and softmax probabilities for each image. - - Args: - args: A tuple of (predictions, softmax_probabilities), where predictions - is an int tensor of shape [1, h, w] and softmax_probabilities is a - float32 tensor of shape [1, h_decoder, w_decoder, n_objects]. - imgs: Either a one-tuple of the image to predict labels for of shape - [h, w, 3], or pair of previous frame and current frame image. - - Returns: - predictions: The predicted labels as int tensor of shape [1, h, w]. - softmax_probabilities: The softmax probabilities of shape - [1, h_decoder, w_decoder, n_objects]. - """ - if FLAGS.save_embeddings: - last_frame_predictions, last_softmax_probabilities, _ = args - else: - last_frame_predictions, last_softmax_probabilities = args - - if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback: - ref_labels_to_use = tf.concat( - [reference_labels, last_frame_predictions[..., tf.newaxis]], - axis=0) - else: - ref_labels_to_use = reference_labels - - predictions, softmax_probabilities = model.predict_labels( - tf.stack((first_frame_img,) + imgs), - model_options=model_options, - image_pyramid=FLAGS.image_pyramid, - embedding_dimension=FLAGS.embedding_dimension, - reference_labels=ref_labels_to_use, - k_nearest_neighbors=FLAGS.k_nearest_neighbors, - use_softmax_feedback=FLAGS.use_softmax_feedback, - initial_softmax_feedback=last_softmax_probabilities, - embedding_seg_feature_dimension= - FLAGS.embedding_seg_feature_dimension, - embedding_seg_n_layers=FLAGS.embedding_seg_n_layers, - embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size, - embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates, - also_return_softmax_probabilities=True, - num_frames_per_video= - (3 if FLAGS.also_attend_to_previous_frame or - FLAGS.use_softmax_feedback else 2), - normalize_nearest_neighbor_distances= - FLAGS.normalize_nearest_neighbor_distances, - also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame, - use_local_previous_frame_attention= - FLAGS.use_local_previous_frame_attention, - previous_frame_attention_window_size= - FLAGS.previous_frame_attention_window_size, - use_first_frame_matching=FLAGS.use_first_frame_matching - ) - predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32) - - if FLAGS.save_embeddings: - names = [n.name for n in tf.get_default_graph().as_graph_def().node] - embedding_names = [x for x in names if 'embeddings' in x] - # This will crash when multi-scale inference is used. - assert len(embedding_names) == 1, len(embedding_names) - embedding_name = embedding_names[0] + ':0' - embeddings = tf.get_default_graph().get_tensor_by_name(embedding_name) - return predictions, softmax_probabilities, embeddings - else: - return predictions, softmax_probabilities - - init_labels = tf.squeeze(reference_labels, axis=-1) - init_softmax = embedding_utils.create_initial_softmax_from_labels( - reference_labels, reference_labels, common.parse_decoder_output_stride(), - reduce_labels=False) - if FLAGS.save_embeddings: - decoder_height = tf.shape(init_softmax)[1] - decoder_width = tf.shape(init_softmax)[2] - n_frames = (3 if FLAGS.also_attend_to_previous_frame - or FLAGS.use_softmax_feedback else 2) - embeddings_init = tf.zeros((n_frames, decoder_height, decoder_width, - FLAGS.embedding_dimension)) - init = (init_labels, init_softmax, embeddings_init) - else: - init = (init_labels, init_softmax) - # Do not eval the first frame again but concat the first frame ground - # truth instead. - if FLAGS.also_attend_to_previous_frame or FLAGS.use_softmax_feedback: - elems = (samples[common.IMAGE][:-1], samples[common.IMAGE][1:]) - else: - elems = (samples[common.IMAGE][1:],) - res = tf.scan(predict, elems, - initializer=init, - parallel_iterations=1, - swap_memory=True) - if FLAGS.save_embeddings: - predicted_labels, _, all_embeddings = res - first_frame_embeddings = all_embeddings[0, 0, tf.newaxis] - other_frame_embeddings = all_embeddings[:, -1] - all_embeddings = tf.concat( - [first_frame_embeddings, other_frame_embeddings], axis=0) - else: - predicted_labels, _ = res - all_embeddings = None - predicted_labels = tf.concat([reference_labels[..., 0], - tf.squeeze(predicted_labels, axis=1)], - axis=0) - return predicted_labels, all_embeddings - - -def create_predictions_fast(samples, reference_labels, first_frame_img, - model_options): - """Predicts segmentation labels for each frame of the video. - - Faster version than create_predictions, but does not support all options. - - Args: - samples: Dictionary of input samples. - reference_labels: Int tensor of shape [1, height, width, 1]. - first_frame_img: Float32 tensor of shape [height, width, 3]. - model_options: An InternalModelOptions instance to configure models. - - Returns: - predicted_labels: Int tensor of shape [time, height, width] of - predicted labels for each frame. - all_embeddings: Float32 tensor of shape - [time, height, width, embedding_dim], or None. - - Raises: - ValueError: If FLAGS.save_embeddings is True, FLAGS.use_softmax_feedback is - False, or FLAGS.also_attend_to_previous_frame is False. - """ - if FLAGS.save_embeddings: - raise ValueError('save_embeddings does not work with ' - 'create_predictions_fast. Use the slower ' - 'create_predictions instead.') - if not FLAGS.use_softmax_feedback: - raise ValueError('use_softmax_feedback must be True for ' - 'create_predictions_fast. Use the slower ' - 'create_predictions instead.') - if not FLAGS.also_attend_to_previous_frame: - raise ValueError('also_attend_to_previous_frame must be True for ' - 'create_predictions_fast. Use the slower ' - 'create_predictions instead.') - # Extract embeddings for first frame and prepare initial predictions. - first_frame_embeddings = embedding_utils.get_embeddings( - first_frame_img[tf.newaxis], model_options, FLAGS.embedding_dimension) - init_labels = tf.squeeze(reference_labels, axis=-1) - init_softmax = embedding_utils.create_initial_softmax_from_labels( - reference_labels, reference_labels, common.parse_decoder_output_stride(), - reduce_labels=False) - init = (init_labels, init_softmax, first_frame_embeddings) - - def predict(args, img): - """Predicts segmentation labels and softmax probabilities for each image. - - Args: - args: tuple of - (predictions, softmax_probabilities, last_frame_embeddings), where - predictions is an int tensor of shape [1, h, w], - softmax_probabilities is a float32 tensor of shape - [1, h_decoder, w_decoder, n_objects], - and last_frame_embeddings is a float32 tensor of shape - [h_decoder, w_decoder, embedding_dimension]. - img: Image to predict labels for of shape [h, w, 3]. - - Returns: - predictions: The predicted labels as int tensor of shape [1, h, w]. - softmax_probabilities: The softmax probabilities of shape - [1, h_decoder, w_decoder, n_objects]. - """ - (last_frame_predictions, last_softmax_probabilities, - prev_frame_embeddings) = args - ref_labels_to_use = tf.concat( - [reference_labels, last_frame_predictions[..., tf.newaxis]], - axis=0) - - predictions, softmax_probabilities, embeddings = model.predict_labels( - img[tf.newaxis], - model_options=model_options, - image_pyramid=FLAGS.image_pyramid, - embedding_dimension=FLAGS.embedding_dimension, - reference_labels=ref_labels_to_use, - k_nearest_neighbors=FLAGS.k_nearest_neighbors, - use_softmax_feedback=FLAGS.use_softmax_feedback, - initial_softmax_feedback=last_softmax_probabilities, - embedding_seg_feature_dimension= - FLAGS.embedding_seg_feature_dimension, - embedding_seg_n_layers=FLAGS.embedding_seg_n_layers, - embedding_seg_kernel_size=FLAGS.embedding_seg_kernel_size, - embedding_seg_atrous_rates=FLAGS.embedding_seg_atrous_rates, - also_return_softmax_probabilities=True, - num_frames_per_video=1, - normalize_nearest_neighbor_distances= - FLAGS.normalize_nearest_neighbor_distances, - also_attend_to_previous_frame=FLAGS.also_attend_to_previous_frame, - use_local_previous_frame_attention= - FLAGS.use_local_previous_frame_attention, - previous_frame_attention_window_size= - FLAGS.previous_frame_attention_window_size, - use_first_frame_matching=FLAGS.use_first_frame_matching, - also_return_embeddings=True, - ref_embeddings=(first_frame_embeddings, prev_frame_embeddings) - ) - predictions = tf.cast(predictions[common.OUTPUT_TYPE], tf.int32) - return predictions, softmax_probabilities, embeddings - - # Do not eval the first frame again but concat the first frame ground - # truth instead. - # If you have a lot of GPU memory, you can try to set swap_memory=False, - # and/or parallel_iterations=2. - elems = samples[common.IMAGE][1:] - res = tf.scan(predict, elems, - initializer=init, - parallel_iterations=1, - swap_memory=True) - predicted_labels, _, _ = res - predicted_labels = tf.concat([reference_labels[..., 0], - tf.squeeze(predicted_labels, axis=1)], - axis=0) - return predicted_labels - - -def main(unused_argv): - if FLAGS.vis_batch_size != 1: - raise ValueError('Only batch size 1 is supported for now') - - data_type = 'tf_sequence_example' - # Get dataset-dependent information. - dataset = video_dataset.get_dataset( - FLAGS.dataset, - FLAGS.vis_split, - dataset_dir=FLAGS.dataset_dir, - data_type=data_type) - - # Prepare for visualization. - tf.gfile.MakeDirs(FLAGS.vis_logdir) - segmentation_dir = os.path.join(FLAGS.vis_logdir, _SEGMENTATION_SAVE_FOLDER) - tf.gfile.MakeDirs(segmentation_dir) - embeddings_dir = os.path.join(FLAGS.vis_logdir, _EMBEDDINGS_SAVE_FOLDER) - tf.gfile.MakeDirs(embeddings_dir) - num_vis_examples = (dataset.num_videos if (FLAGS.num_vis_examples < 0) - else FLAGS.num_vis_examples) - if FLAGS.first_frame_finetuning: - num_vis_examples = 1 - - tf.logging.info('Visualizing on %s set', FLAGS.vis_split) - g = tf.Graph() - with g.as_default(): - # Without setting device to CPU we run out of memory. - with tf.device('cpu:0'): - samples = video_input_generator.get( - dataset, - None, - None, - FLAGS.vis_batch_size, - min_resize_value=FLAGS.min_resize_value, - max_resize_value=FLAGS.max_resize_value, - resize_factor=FLAGS.resize_factor, - dataset_split=FLAGS.vis_split, - is_training=False, - model_variant=FLAGS.model_variant, - preprocess_image_and_label=False, - remap_labels_to_reference_frame=False) - samples[common.IMAGE] = tf.cast(samples[common.IMAGE], tf.float32) - samples[common.LABEL] = tf.cast(samples[common.LABEL], tf.int32) - first_frame_img = samples[common.IMAGE][0] - reference_labels = samples[common.LABEL][0, tf.newaxis] - gt_labels = tf.squeeze(samples[common.LABEL], axis=-1) - seq_name = samples[common.VIDEO_ID][0] - - model_options = common.VideoModelOptions( - outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes}, - crop_size=None, - atrous_rates=FLAGS.atrous_rates, - output_stride=FLAGS.output_stride) - - all_embeddings = None - predicted_labels = create_predictions_fast( - samples, reference_labels, first_frame_img, model_options) - # If you need more options like saving embeddings, replace the call to - # create_predictions_fast with create_predictions. - - tf.train.get_or_create_global_step() - saver = tf.train.Saver(slim.get_variables_to_restore()) - sv = tf.train.Supervisor(graph=g, - logdir=FLAGS.vis_logdir, - init_op=tf.global_variables_initializer(), - summary_op=None, - summary_writer=None, - global_step=None, - saver=saver) - num_batches = int( - math.ceil(num_vis_examples / float(FLAGS.vis_batch_size))) - last_checkpoint = None - - # Infinite loop to visualize the results when new checkpoint is created. - while True: - last_checkpoint = slim.evaluation.wait_for_new_checkpoint( - FLAGS.checkpoint_dir, last_checkpoint) - start = time.time() - tf.logging.info( - 'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', - time.gmtime())) - tf.logging.info('Visualizing with model %s', last_checkpoint) - - all_ious = [] - with sv.managed_session(FLAGS.master, - start_standard_services=False) as sess: - sv.start_queue_runners(sess) - sv.saver.restore(sess, last_checkpoint) - - for batch in range(num_batches): - ops = [predicted_labels, gt_labels, seq_name] - if FLAGS.save_embeddings: - ops.append(all_embeddings) - tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches) - res = sess.run(ops) - tf.logging.info('Forwarding done') - pred_labels_val, gt_labels_val, seq_name_val = res[:3] - if FLAGS.save_embeddings: - all_embeddings_val = res[3] - else: - all_embeddings_val = None - seq_ious = _process_seq_data(segmentation_dir, embeddings_dir, - seq_name_val, pred_labels_val, - gt_labels_val, all_embeddings_val) - all_ious.append(seq_ious) - all_ious = np.concatenate(all_ious, axis=0) - tf.logging.info('n_seqs %s, mIoU %f', all_ious.shape, all_ious.mean()) - tf.logging.info( - 'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S', - time.gmtime())) - result_dir = FLAGS.vis_logdir + '/results/' - tf.gfile.MakeDirs(result_dir) - with tf.gfile.GFile(result_dir + seq_name_val + '.txt', 'w') as f: - f.write(str(all_ious)) - if FLAGS.first_frame_finetuning or FLAGS.eval_once_and_quit: - break - time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() - if time_to_next_eval > 0: - time.sleep(time_to_next_eval) - - -if __name__ == '__main__': - flags.mark_flag_as_required('checkpoint_dir') - flags.mark_flag_as_required('vis_logdir') - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/research/fivo/.gitattributes b/research/fivo/.gitattributes deleted file mode 100644 index f706c0421d718f8af8e62d96d69101fe383d2b4f..0000000000000000000000000000000000000000 --- a/research/fivo/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -*.pkl binary -*.tfrecord binary diff --git a/research/fivo/.gitignore b/research/fivo/.gitignore deleted file mode 100644 index af2f537516daf33fdaf579436dfa33fdd9044f49..0000000000000000000000000000000000000000 --- a/research/fivo/.gitignore +++ /dev/null @@ -1,104 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -.static_storage/ -.media/ -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ diff --git a/research/fivo/README.md b/research/fivo/README.md deleted file mode 100644 index 36d355b1b2961f2c8c8b721b5ce13c0c3eab1e8b..0000000000000000000000000000000000000000 --- a/research/fivo/README.md +++ /dev/null @@ -1,215 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Filtering Variational Objectives - -This folder contains a TensorFlow implementation of the algorithms from - -Chris J. Maddison\*, Dieterich Lawson\*, George Tucker\*, Nicolas Heess, Mohammad Norouzi, Andriy Mnih, Arnaud Doucet, and Yee Whye Teh. "Filtering Variational Objectives." NIPS 2017. - -[https://arxiv.org/abs/1705.09279](https://arxiv.org/abs/1705.09279) - -This code implements 3 different bounds for training sequential latent variable models: the evidence lower bound (ELBO), the importance weighted auto-encoder bound (IWAE), and our bound, the filtering variational objective (FIVO). - -Additionally it contains several sequential latent variable model implementations: - -* Variational recurrent neural network (VRNN) -* Stochastic recurrent neural network (SRNN) -* Gaussian hidden Markov model with linear conditionals (GHMM) - -The VRNN and SRNN can be trained for sequence modeling of pianoroll and speech data. The GHMM is trainable on a synthetic dataset, useful as a simple example of an analytically tractable model. - -#### Directory Structure -The important parts of the code are organized as follows. - -``` -run_fivo.py # main script, contains flag definitions -fivo -├─smc.py # a sequential Monte Carlo implementation -├─bounds.py # code for computing each bound, uses smc.py -├─runners.py # code for VRNN and SRNN training and evaluation -├─ghmm_runners.py # code for GHMM training and evaluation -├─data -| ├─datasets.py # readers for pianoroll and speech datasets -| ├─calculate_pianoroll_mean.py # preprocesses the pianoroll datasets -| └─create_timit_dataset.py # preprocesses the TIMIT dataset -└─models - ├─base.py # base classes used in other models - ├─vrnn.py # VRNN implementation - ├─srnn.py # SRNN implementation - └─ghmm.py # Gaussian hidden Markov model (GHMM) implementation -bin -├─run_train.sh # an example script that runs training -├─run_eval.sh # an example script that runs evaluation -├─run_sample.sh # an example script that runs sampling -├─run_tests.sh # a script that runs all tests -└─download_pianorolls.sh # a script that downloads pianoroll files -``` - -### Pianorolls - -Requirements before we start: - -* TensorFlow (see [tensorflow.org](http://tensorflow.org) for how to install) -* [scipy](https://www.scipy.org/) -* [sonnet](https://github.com/deepmind/sonnet) - - -#### Download the Data - -The pianoroll datasets are encoded as pickled sparse arrays and are available at [http://www-etud.iro.umontreal.ca/~boulanni/icml2012](http://www-etud.iro.umontreal.ca/~boulanni/icml2012). You can use the script `bin/download_pianorolls.sh` to download the files into a directory of your choosing. -``` -export PIANOROLL_DIR=~/pianorolls -mkdir $PIANOROLL_DIR -sh bin/download_pianorolls.sh $PIANOROLL_DIR -``` - -#### Preprocess the Data - -The script `calculate_pianoroll_mean.py` loads a pianoroll pickle file, calculates the mean, updates the pickle file to include the mean under the key `train_mean`, and writes the file back to disk in-place. You should do this for all pianoroll datasets you wish to train on. - -``` -python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/piano-midi.de.pkl -python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/nottingham.de.pkl -python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/musedata.pkl -python data/calculate_pianoroll_mean.py --in_file=$PIANOROLL_DIR/jsb.pkl -``` - -#### Training - -Now we can train a model. Here is the command for a standard training run, taken from `bin/run_train.sh`: -``` -python run_fivo.py \ - --mode=train \ - --logdir=/tmp/fivo \ - --model=vrnn \ - --bound=fivo \ - --summarize_every=100 \ - --batch_size=4 \ - --num_samples=4 \ - --learning_rate=0.0001 \ - --dataset_path="$PIANOROLL_DIR/jsb.pkl" \ - --dataset_type="pianoroll" -``` - -You should see output that looks something like this (with extra logging cruft): - -``` -Saving checkpoints for 0 into /tmp/fivo/model.ckpt. -Step 1, fivo bound per timestep: -11.322491 -global_step/sec: 7.49971 -Step 101, fivo bound per timestep: -11.399275 -global_step/sec: 8.04498 -Step 201, fivo bound per timestep: -11.174991 -global_step/sec: 8.03989 -Step 301, fivo bound per timestep: -11.073008 -``` -#### Evaluation - -You can also evaluate saved checkpoints. The `eval` mode loads a model checkpoint, tests its performance on all items in a dataset, and reports the log-likelihood averaged over the dataset. For example here is a command, taken from `bin/run_eval.sh`, that will evaluate a JSB model on the test set: - -``` -python run_fivo.py \ - --mode=eval \ - --split=test \ - --alsologtostderr \ - --logdir=/tmp/fivo \ - --model=vrnn \ - --batch_size=4 \ - --num_samples=4 \ - --dataset_path="$PIANOROLL_DIR/jsb.pkl" \ - --dataset_type="pianoroll" -``` - -You should see output like this: -``` -Restoring parameters from /tmp/fivo/model.ckpt-0 -Model restored from step 0, evaluating. -test elbo ll/t: -12.198834, iwae ll/t: -11.981187 fivo ll/t: -11.579776 -test elbo ll/seq: -748.564789, iwae ll/seq: -735.209206 fivo ll/seq: -710.577141 -``` -The evaluation script prints log-likelihood in both nats per timestep (ll/t) and nats per sequence (ll/seq) for all three bounds. - -#### Sampling - -You can also sample from trained models. The `sample` mode loads a model checkpoint, conditions the model on a prefix of a randomly chosen datapoint, samples a sequence of outputs from the conditioned model, and writes out the samples and prefix to a `.npz` file in `logdir`. For example here is a command that samples from a model trained on JSB, taken from `bin/run_sample.sh`: -``` -python run_fivo.py \ - --mode=sample \ - --alsologtostderr \ - --logdir="/tmp/fivo" \ - --model=vrnn \ - --bound=fivo \ - --batch_size=4 \ - --num_samples=4 \ - --split=test \ - --dataset_path="$PIANOROLL_DIR/jsb.pkl" \ - --dataset_type="pianoroll" \ - --prefix_length=25 \ - --sample_length=50 -``` - -Here `num_samples` denotes the number of samples used when conditioning the model as well as the number of trajectories to sample for each prefix. - -You should see very little output. -``` -Restoring parameters from /tmp/fivo/model.ckpt-0 -Running local_init_op. -Done running local_init_op. -``` - -Loading the samples with `np.load` confirms that we conditioned the model on 4 -prefixes of length 25 and sampled 4 sequences of length 50 for each prefix. -``` ->>> import numpy as np ->>> x = np.load("/tmp/fivo/samples.npz") ->>> x[()]['prefixes'].shape -(25, 4, 88) ->>> x[()]['samples'].shape -(50, 4, 4, 88) -``` - -### Training on TIMIT - -The TIMIT speech dataset is available at the [Linguistic Data Consortium website](https://catalog.ldc.upenn.edu/LDC93S1), but is unfortunately not free. These instructions will proceed assuming you have downloaded the TIMIT archive and extracted it into the directory `$RAW_TIMIT_DIR`. - -#### Preprocess TIMIT - -We preprocess TIMIT (as described in our paper) and write it out to a series of TFRecord files. To prepare the TIMIT dataset use the script `create_timit_dataset.py` -``` -export $TIMIT_DIR=~/timit_dataset -mkdir $TIMIT_DIR -python data/create_timit_dataset.py \ - --raw_timit_dir=$RAW_TIMIT_DIR \ - --out_dir=$TIMIT_DIR -``` -You should see this exact output: -``` -4389 train / 231 valid / 1680 test -train mean: 0.006060 train std: 548.136169 -``` - -#### Training on TIMIT -This is very similar to training on pianoroll datasets, with just a few flags switched. -``` -python run_fivo.py \ - --mode=train \ - --logdir=/tmp/fivo \ - --model=vrnn \ - --bound=fivo \ - --summarize_every=100 \ - --batch_size=4 \ - --num_samples=4 \ - --learning_rate=0.0001 \ - --dataset_path="$TIMIT_DIR/train" \ - --dataset_type="speech" -``` -Evaluation and sampling are similar. - -### Tests -This codebase comes with a number of tests to verify correctness, runnable via `bin/run_tests.sh`. The tests are also useful to look at for examples of how to use the code. - -### Contact - -This codebase is maintained by Dieterich Lawson. For questions and issues please open an issue on the tensorflow/models issues tracker and assign it to @dieterichlawson. diff --git a/research/fivo/bin/download_pianorolls.sh b/research/fivo/bin/download_pianorolls.sh deleted file mode 100644 index ef7050b4df5fb9815be04d133e659fa31d8d055e..0000000000000000000000000000000000000000 --- a/research/fivo/bin/download_pianorolls.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# A script to download the pianoroll datasets. -# Accepts one argument, the directory to put the files in - -if [ -z "$1" ] - then - echo "Error, must provide a directory to download the files to." - exit -fi - -echo "Downloading datasets into $1" -curl -s "http://www-etud.iro.umontreal.ca/~boulanni/Piano-midi.de.pickle" > $1/piano-midi.de.pkl -curl -s "http://www-etud.iro.umontreal.ca/~boulanni/Nottingham.pickle" > $1/nottingham.pkl -curl -s "http://www-etud.iro.umontreal.ca/~boulanni/MuseData.pickle" > $1/musedata.pkl -curl -s "http://www-etud.iro.umontreal.ca/~boulanni/JSB%20Chorales.pickle" > $1/jsb.pkl diff --git a/research/fivo/bin/run_eval.sh b/research/fivo/bin/run_eval.sh deleted file mode 100644 index b30bcedc2d16e5bdd681386100ecca23612a139a..0000000000000000000000000000000000000000 --- a/research/fivo/bin/run_eval.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# An example of running evaluation. - -PIANOROLL_DIR=$HOME/pianorolls - -python run_fivo.py \ - --mode=eval \ - --logdir=/tmp/fivo \ - --model=vrnn \ - --batch_size=4 \ - --num_samples=4 \ - --split=test \ - --dataset_path="$PIANOROLL_DIR/jsb.pkl" \ - --dataset_type="pianoroll" diff --git a/research/fivo/bin/run_sample.sh b/research/fivo/bin/run_sample.sh deleted file mode 100644 index e0c82a0cb137822e85035a23081ecf6408b7cca1..0000000000000000000000000000000000000000 --- a/research/fivo/bin/run_sample.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# An example of sampling from the model. - -PIANOROLL_DIR=$HOME/pianorolls - -python run_fivo.py \ - --mode=sample \ - --alsologtostderr \ - --logdir="/tmp/fivo" \ - --model=vrnn \ - --bound=fivo \ - --batch_size=4 \ - --num_samples=4 \ - --split=test \ - --dataset_path="$PIANOROLL_DIR/jsb.pkl" \ - --dataset_type="pianoroll" \ - --prefix_length=25 \ - --sample_length=50 diff --git a/research/fivo/bin/run_tests.sh b/research/fivo/bin/run_tests.sh deleted file mode 100644 index 2ea58f016620db98e258494919c6d339b5fd996e..0000000000000000000000000000000000000000 --- a/research/fivo/bin/run_tests.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -python -m fivo.smc_test && \ -python -m fivo.bounds_test && \ -python -m fivo.nested_utils_test && \ -python -m fivo.data.datasets_test && \ -python -m fivo.models.ghmm_test && \ -python -m fivo.models.vrnn_test && \ -python -m fivo.models.srnn_test && \ -python -m fivo.ghmm_runners_test && \ -python -m fivo.runners_test diff --git a/research/fivo/bin/run_train.sh b/research/fivo/bin/run_train.sh deleted file mode 100644 index a845959770c77cd99528005e1ee69e4593fcae0c..0000000000000000000000000000000000000000 --- a/research/fivo/bin/run_train.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# An example of running training. - -PIANOROLL_DIR=$HOME/pianorolls - -python run_fivo.py \ - --mode=train \ - --logdir=/tmp/fivo \ - --model=vrnn \ - --bound=fivo \ - --summarize_every=100 \ - --batch_size=4 \ - --num_samples=4 \ - --learning_rate=0.0001 \ - --dataset_path="$PIANOROLL_DIR/jsb.pkl" \ - --dataset_type="pianoroll" diff --git a/research/fivo/experimental/README.md b/research/fivo/experimental/README.md deleted file mode 100644 index 649de0ba95cdee2fa1b101a588dc48903b2ca13b..0000000000000000000000000000000000000000 --- a/research/fivo/experimental/README.md +++ /dev/null @@ -1 +0,0 @@ -An experimental codebase for running simple examples. diff --git a/research/fivo/experimental/bounds.py b/research/fivo/experimental/bounds.py deleted file mode 100644 index afc970c59a1a86dbe8438b4e8bba791d3c95aa63..0000000000000000000000000000000000000000 --- a/research/fivo/experimental/bounds.py +++ /dev/null @@ -1,673 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import namedtuple - -import tensorflow as tf -import summary_utils as summ - -Loss = namedtuple("Loss", "name loss vars") -Loss.__new__.__defaults__ = (tf.GraphKeys.TRAINABLE_VARIABLES,) - - -def iwae(model, observation, num_timesteps, num_samples=1, - summarize=False): - """Compute the IWAE evidence lower bound. - - Args: - model: A callable that computes one timestep of the model. - observation: A shape [batch_size*num_samples, state_size] Tensor - containing z_n, the observation for each sequence in the batch. - num_timesteps: The number of timesteps in each sequence, an integer. - num_samples: The number of samples to use to compute the IWAE bound. - Returns: - log_p_hat: The IWAE estimator of the lower bound on the log marginal. - loss: A tensor that you can perform gradient descent on to optimize the - bound. - maintain_ema_op: A no-op included for compatibility with FIVO. - states: The sequence of states sampled. - """ - # Initialization - num_instances = tf.shape(observation)[0] - batch_size = tf.cast(num_instances / num_samples, tf.int32) - states = [model.zero_state(num_instances)] - log_weights = [] - log_weight_acc = tf.zeros([num_samples, batch_size], dtype=observation.dtype) - - for t in xrange(num_timesteps): - # run the model for one timestep - (zt, log_q_zt, log_p_zt, log_p_x_given_z, _) = model( - states[-1], observation, t) - # update accumulators - states.append(zt) - log_weight = log_p_zt + log_p_x_given_z - log_q_zt - log_weight_acc += tf.reshape(log_weight, [num_samples, batch_size]) - if summarize: - weight_dist = tf.contrib.distributions.Categorical( - logits=tf.transpose(log_weight_acc, perm=[1, 0]), - allow_nan_stats=False) - weight_entropy = weight_dist.entropy() - weight_entropy = tf.reduce_mean(weight_entropy) - tf.summary.scalar("weight_entropy/%d" % t, weight_entropy) - log_weights.append(log_weight_acc) - # Compute the lower bound on the log evidence. - log_p_hat = (tf.reduce_logsumexp(log_weight_acc, axis=0) - - tf.log(tf.cast(num_samples, observation.dtype))) / num_timesteps - loss = -tf.reduce_mean(log_p_hat) - losses = [Loss("log_p_hat", loss)] - - # we clip off the initial state before returning. - # there are no emas for iwae, so we return a noop for that - return log_p_hat, losses, tf.no_op(), states[1:], log_weights - - -def multinomial_resampling(log_weights, states, n, b): - """Resample states with multinomial resampling. - - Args: - log_weights: A (n x b) Tensor representing a batch of b logits for n-ary - Categorical distribution. - states: A list of (b*n x d) Tensors that will be resample in from the groups - of every n-th row. - - Returns: - resampled_states: A list of (b*n x d) Tensors resampled via stratified sampling. - log_probs: A (n x b) Tensor of the log probabilities of the ancestry decisions. - resampling_parameters: The Tensor of parameters of the resampling distribution. - ancestors: An (n x b) Tensor of integral indices representing the ancestry decisions. - resampling_dist: The distribution object for resampling. - """ - log_weights = tf.convert_to_tensor(log_weights) - states = [tf.convert_to_tensor(state) for state in states] - - resampling_parameters = tf.transpose(log_weights, perm=[1,0]) - resampling_dist = tf.contrib.distributions.Categorical(logits=resampling_parameters) - ancestors = tf.stop_gradient( - resampling_dist.sample(sample_shape=n)) - log_probs = resampling_dist.log_prob(ancestors) - - offset = tf.expand_dims(tf.range(b), 0) - ancestor_inds = tf.reshape(ancestors * b + offset, [-1]) - - resampled_states = [] - for state in states: - resampled_states.append(tf.gather(state, ancestor_inds)) - return resampled_states, log_probs, resampling_parameters, ancestors, resampling_dist - -def stratified_resampling(log_weights, states, n, b): - """Resample states with straitified resampling. - - Args: - log_weights: A (n x b) Tensor representing a batch of b logits for n-ary - Categorical distribution. - states: A list of (b*n x d) Tensors that will be resample in from the groups - of every n-th row. - - Returns: - resampled_states: A list of (b*n x d) Tensors resampled via stratified sampling. - log_probs: A (n x b) Tensor of the log probabilities of the ancestry decisions. - resampling_parameters: The Tensor of parameters of the resampling distribution. - ancestors: An (n x b) Tensor of integral indices representing the ancestry decisions. - resampling_dist: The distribution object for resampling. - """ - log_weights = tf.convert_to_tensor(log_weights) - states = [tf.convert_to_tensor(state) for state in states] - - log_weights = tf.transpose(log_weights, perm=[1,0]) - - probs = tf.nn.softmax( - tf.tile(tf.expand_dims(log_weights, axis=1), - [1, n, 1]) - ) - - cdfs = tf.concat([tf.zeros((b,n,1), dtype=probs.dtype), tf.cumsum(probs, axis=2)], 2) - - bins = tf.range(n, dtype=probs.dtype) / n - bins = tf.tile(tf.reshape(bins, [1,-1,1]), [b,1,n+1]) - - strat_cdfs = tf.minimum(tf.maximum((cdfs - bins) * n, 0.0), 1.0) - resampling_parameters = strat_cdfs[:,:,1:] - strat_cdfs[:,:,:-1] - - resampling_dist = tf.contrib.distributions.Categorical( - probs = resampling_parameters, - allow_nan_stats=False) - - ancestors = tf.stop_gradient( - resampling_dist.sample()) - log_probs = resampling_dist.log_prob(ancestors) - - ancestors = tf.transpose(ancestors, perm=[1,0]) - log_probs = tf.transpose(log_probs, perm=[1,0]) - - offset = tf.expand_dims(tf.range(b), 0) - ancestor_inds = tf.reshape(ancestors * b + offset, [-1]) - - resampled_states = [] - for state in states: - resampled_states.append(tf.gather(state, ancestor_inds)) - - return resampled_states, log_probs, resampling_parameters, ancestors, resampling_dist - -def systematic_resampling(log_weights, states, n, b): - """Resample states with systematic resampling. - - Args: - log_weights: A (n x b) Tensor representing a batch of b logits for n-ary - Categorical distribution. - states: A list of (b*n x d) Tensors that will be resample in from the groups - of every n-th row. - - Returns: - resampled_states: A list of (b*n x d) Tensors resampled via stratified sampling. - log_probs: A (n x b) Tensor of the log probabilities of the ancestry decisions. - resampling_parameters: The Tensor of parameters of the resampling distribution. - ancestors: An (n x b) Tensor of integral indices representing the ancestry decisions. - resampling_dist: The distribution object for resampling. - """ - - log_weights = tf.convert_to_tensor(log_weights) - states = [tf.convert_to_tensor(state) for state in states] - - log_weights = tf.transpose(log_weights, perm=[1,0]) - - probs = tf.nn.softmax( - tf.tile(tf.expand_dims(log_weights, axis=1), - [1, n, 1]) - ) - - cdfs = tf.concat([tf.zeros((b,n,1), dtype=probs.dtype), tf.cumsum(probs, axis=2)], 2) - - bins = tf.range(n, dtype=probs.dtype) / n - bins = tf.tile(tf.reshape(bins, [1,-1,1]), [b,1,n+1]) - - strat_cdfs = tf.minimum(tf.maximum((cdfs - bins) * n, 0.0), 1.0) - resampling_parameters = strat_cdfs[:,:,1:] - strat_cdfs[:,:,:-1] - - resampling_dist = tf.contrib.distributions.Categorical( - probs=resampling_parameters, - allow_nan_stats=True) - - U = tf.random_uniform((b, 1, 1), dtype=probs.dtype) - - ancestors = tf.stop_gradient(tf.reduce_sum(tf.to_float(U > strat_cdfs[:,:,1:]), axis=-1)) - log_probs = resampling_dist.log_prob(ancestors) - - ancestors = tf.transpose(ancestors, perm=[1,0]) - log_probs = tf.transpose(log_probs, perm=[1,0]) - - offset = tf.expand_dims(tf.range(b, dtype=probs.dtype), 0) - ancestor_inds = tf.reshape(ancestors * b + offset, [-1]) - - resampled_states = [] - for state in states: - resampled_states.append(tf.gather(state, ancestor_inds)) - - return resampled_states, log_probs, resampling_parameters, ancestors, resampling_dist - - -def log_blend(inputs, weights): - """Blends state in the log space. - - Args: - inputs: A set of scalar states, one for each particle in each particle filter. - Should be [num_samples, batch_size]. - weights: A set of weights used to blend the state. Each set of weights - should be of dimension [num_samples] (one weight for each previous particle). - There should be one set of weights for each new particle in each particle filter. - Thus the shape should be [num_samples, batch_size, num_samples] where - the first axis indexes new particle and the last axis indexes old particles. - Returns: - blended: The blended states, a tensor of shape [num_samples, batch_size]. - """ - raw_max = tf.reduce_max(inputs, axis=0, keepdims=True) - my_max = tf.stop_gradient( - tf.where(tf.is_finite(raw_max), raw_max, tf.zeros_like(raw_max)) - ) - # Don't ask. - blended = tf.log(tf.einsum("ijk,kj->ij", weights, tf.exp(inputs - raw_max))) + my_max - return blended - - -def relaxed_resampling(log_weights, states, num_samples, batch_size, - log_r_x=None, blend_type="log", temperature=0.5, - straight_through=False): - """Resample states with relaxed resampling. - - Args: - log_weights: A (n x b) Tensor representing a batch of b logits for n-ary - Categorical distribution. - states: A list of (b*n x d) Tensors that will be resample in from the groups - of every n-th row. - - Returns: - resampled_states: A list of (b*n x d) Tensors resampled via stratified sampling. - log_probs: A (n x b) Tensor of the log probabilities of the ancestry decisions. - resampling_parameters: The Tensor of parameters of the resampling distribution. - ancestors: An (n x b x n) Tensor of relaxed one hot representations of the ancestry decisions. - resampling_dist: The distribution object for resampling. - """ - assert blend_type in ["log", "linear"], "Blend type must be 'log' or 'linear'." - log_weights = tf.convert_to_tensor(log_weights) - states = [tf.convert_to_tensor(state) for state in states] - state_dim = states[0].get_shape().as_list()[-1] - # weights are num_samples by batch_size, so we transpose to get a - # set of batch_size distributions over [0,num_samples). - resampling_parameters = tf.transpose(log_weights, perm=[1, 0]) - resampling_dist = tf.contrib.distributions.RelaxedOneHotCategorical( - temperature, - logits=resampling_parameters) - - # sample num_samples samples from the distribution, resulting in a - # [num_samples, batch_size, num_samples] Tensor that represents a set of - # [num_samples, batch_size] blending weights. The dimensions represent - # [sample index, batch index, blending weight index] - ancestors = resampling_dist.sample(sample_shape=num_samples) - if straight_through: - # Forward pass discrete choices, backwards pass soft choices - hard_ancestor_indices = tf.argmax(ancestors, axis=-1) - hard_ancestors = tf.one_hot(hard_ancestor_indices, num_samples, - dtype=ancestors.dtype) - ancestors = tf.stop_gradient(hard_ancestors - ancestors) + ancestors - log_probs = resampling_dist.log_prob(ancestors) - if log_r_x is not None and blend_type == "log": - log_r_x = tf.reshape(log_r_x, [num_samples, batch_size]) - log_r_x = log_blend(log_r_x, ancestors) - log_r_x = tf.reshape(log_r_x, [num_samples*batch_size]) - elif log_r_x is not None and blend_type == "linear": - # If blend type is linear just add log_r to the states that will be blended - # linearly. - states.append(log_r_x) - - # transpose the 'indices' to be [batch_index, blending weight index, sample index] - ancestor_inds = tf.transpose(ancestors, perm=[1, 2, 0]) - resampled_states = [] - for state in states: - # state is currently [num_samples * batch_size, state_dim] so we reshape - # to [num_samples, batch_size, state_dim] and then transpose to - # [batch_size, state_size, num_samples] - state = tf.transpose(tf.reshape(state, [num_samples, batch_size, -1]), perm=[1, 2, 0]) - # state is now (batch_size, state_size, num_samples) - # and ancestor is (batch index, blending weight index, sample index) - # multiplying these gives a matrix of size [batch_size, state_size, num_samples] - next_state = tf.matmul(state, ancestor_inds) - # transpose the state to be [num_samples, batch_size, state_size] - # and then reshape it to match the state format. - next_state = tf.reshape(tf.transpose(next_state, perm=[2,0,1]), [num_samples*batch_size, state_dim]) - resampled_states.append(next_state) - - new_dist = tf.contrib.distributions.Categorical( - logits=resampling_parameters) - - if log_r_x is not None and blend_type == "linear": - # If blend type is linear pop off log_r that we added to the states. - log_r_x = tf.squeeze(resampled_states[-1]) - resampled_states = resampled_states[:-1] - return resampled_states, log_probs, log_r_x, resampling_parameters, ancestors, new_dist - - -def fivo(model, - observation, - num_timesteps, - resampling_schedule, - num_samples=1, - use_resampling_grads=True, - resampling_type="multinomial", - resampling_temperature=0.5, - aux=True, - summarize=False): - """Compute the FIVO evidence lower bound. - - Args: - model: A callable that computes one timestep of the model. - observation: A shape [batch_size*num_samples, state_size] Tensor - containing z_n, the observation for each sequence in the batch. - num_timesteps: The number of timesteps in each sequence, an integer. - resampling_schedule: A list of booleans of length num_timesteps, contains - True if a resampling should occur on a specific timestep. - num_samples: The number of samples to use to compute the IWAE bound. - use_resampling_grads: Whether or not to include the resampling gradients - in loss. - resampling type: The type of resampling, one of "multinomial", "stratified", - "relaxed-logblend", "relaxed-linearblend", "relaxed-stateblend", or - "systematic". - resampling_temperature: A positive temperature only used for relaxed - resampling. - aux: If true, compute the FIVO-AUX bound. - Returns: - log_p_hat: The IWAE estimator of the lower bound on the log marginal. - loss: A tensor that you can perform gradient descent on to optimize the - bound. - maintain_ema_op: An op to update the baseline ema used for the resampling - gradients. - states: The sequence of states sampled. - """ - # Initialization - num_instances = tf.cast(tf.shape(observation)[0], tf.int32) - batch_size = tf.cast(num_instances / num_samples, tf.int32) - states = [model.zero_state(num_instances)] - prev_state = states[0] - log_weight_acc = tf.zeros(shape=[num_samples, batch_size], dtype=observation.dtype) - prev_log_r_zt = tf.zeros([num_instances], dtype=observation.dtype) - log_weights = [] - log_weights_all = [] - log_p_hats = [] - resampling_log_probs = [] - for t in xrange(num_timesteps): - # run the model for one timestep - (zt, log_q_zt, log_p_zt, log_p_x_given_z, log_r_zt) = model( - prev_state, observation, t) - # update accumulators - states.append(zt) - log_weight = log_p_zt + log_p_x_given_z - log_q_zt - if aux: - if t == num_timesteps - 1: - log_weight -= prev_log_r_zt - else: - log_weight += log_r_zt - prev_log_r_zt - prev_log_r_zt = log_r_zt - log_weight_acc += tf.reshape(log_weight, [num_samples, batch_size]) - log_weights_all.append(log_weight_acc) - if resampling_schedule[t]: - - # These objects will be resampled - to_resample = [states[-1]] - if aux and "relaxed" not in resampling_type: - to_resample.append(prev_log_r_zt) - - # do the resampling - if resampling_type == "multinomial": - (resampled, - resampling_log_prob, - _, _, _) = multinomial_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size) - elif resampling_type == "stratified": - (resampled, - resampling_log_prob, - _, _, _) = stratified_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size) - elif resampling_type == "systematic": - (resampled, - resampling_log_prob, - _, _, _) = systematic_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size) - elif "relaxed" in resampling_type: - if aux: - if resampling_type == "relaxed-logblend": - (resampled, - resampling_log_prob, - prev_log_r_zt, - _, _, _) = relaxed_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size, - temperature=resampling_temperature, - log_r_x=prev_log_r_zt, - blend_type="log") - elif resampling_type == "relaxed-linearblend": - (resampled, - resampling_log_prob, - prev_log_r_zt, - _, _, _) = relaxed_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size, - temperature=resampling_temperature, - log_r_x=prev_log_r_zt, - blend_type="linear") - elif resampling_type == "relaxed-stateblend": - (resampled, - resampling_log_prob, - _, _, _, _) = relaxed_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size, - temperature=resampling_temperature) - # Calculate prev_log_r_zt from the post-resampling state - prev_r_zt = model.r.r_xn(resampled[0], t) - prev_log_r_zt = tf.reduce_sum( - prev_r_zt.log_prob(observation), axis=[1]) - elif resampling_type == "relaxed-stateblend-st": - (resampled, - resampling_log_prob, - _, _, _, _) = relaxed_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size, - temperature=resampling_temperature, - straight_through=True) - # Calculate prev_log_r_zt from the post-resampling state - prev_r_zt = model.r.r_xn(resampled[0], t) - prev_log_r_zt = tf.reduce_sum( - prev_r_zt.log_prob(observation), axis=[1]) - else: - (resampled, - resampling_log_prob, - _, _, _, _) = relaxed_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size, - temperature=resampling_temperature) - #if summarize: - # resampling_entropy = resampling_dist.entropy() - # resampling_entropy = tf.reduce_mean(resampling_entropy) - # tf.summary.scalar("weight_entropy/%d" % t, resampling_entropy) - - resampling_log_probs.append(tf.reduce_sum(resampling_log_prob, axis=0)) - prev_state = resampled[0] - if aux and "relaxed" not in resampling_type: - # Squeeze out the extra dim potentially added by resampling. - # prev_log_r_zt should always be [num_instances] - prev_log_r_zt = tf.squeeze(resampled[1]) - # Update the log p hat estimate, taking a log sum exp over the sample - # dimension. The appended tensor is [batch_size]. - log_p_hats.append( - tf.reduce_logsumexp(log_weight_acc, axis=0) - tf.log( - tf.cast(num_samples, dtype=observation.dtype))) - # reset the weights - log_weights.append(log_weight_acc) - log_weight_acc = tf.zeros_like(log_weight_acc) - else: - prev_state = states[-1] - # Compute the final weight update. If we just resampled this will be zero. - final_update = (tf.reduce_logsumexp(log_weight_acc, axis=0) - - tf.log(tf.cast(num_samples, dtype=observation.dtype))) - # If we ever resampled, then sum up the previous log p hat terms - if len(log_p_hats) > 0: - log_p_hat = tf.reduce_sum(log_p_hats, axis=0) + final_update - else: # otherwise, log_p_hat only comes from the final update - log_p_hat = final_update - - if use_resampling_grads and any(resampling_schedule): - # compute the rewards - # cumsum([a, b, c]) => [a, a+b, a+b+c] - # learning signal at timestep t is - # [sum from i=t+1 to T of log_p_hat_i for t=1:T] - # so we will compute (sum from i=1 to T of log_p_hat_i) - # and at timestep t will subtract off (sum from i=1 to t of log_p_hat_i) - # rewards is a [num_resampling_events, batch_size] Tensor - rewards = tf.stop_gradient( - tf.expand_dims(log_p_hat, 0) - tf.cumsum(log_p_hats, axis=0)) - batch_avg_rewards = tf.reduce_mean(rewards, axis=1) - # compute ema baseline. - # centered_rewards is [num_resampling_events, batch_size] - baseline_ema = tf.train.ExponentialMovingAverage(decay=0.94) - maintain_baseline_op = baseline_ema.apply([batch_avg_rewards]) - baseline = tf.expand_dims(baseline_ema.average(batch_avg_rewards), 1) - centered_rewards = rewards - baseline - if summarize: - summ.summarize_learning_signal(rewards, "rewards") - summ.summarize_learning_signal(centered_rewards, "centered_rewards") - # compute the loss tensor. - resampling_grads = tf.reduce_sum( - tf.stop_gradient(centered_rewards) * resampling_log_probs, axis=0) - losses = [Loss("log_p_hat", -tf.reduce_mean(log_p_hat)/num_timesteps), - Loss("resampling_grads", -tf.reduce_mean(resampling_grads)/num_timesteps)] - else: - losses = [Loss("log_p_hat", -tf.reduce_mean(log_p_hat)/num_timesteps)] - maintain_baseline_op = tf.no_op() - - log_p_hat /= num_timesteps - # we clip off the initial state before returning. - return log_p_hat, losses, maintain_baseline_op, states[1:], log_weights_all - - -def fivo_aux_td( - model, - observation, - num_timesteps, - resampling_schedule, - num_samples=1, - summarize=False): - """Compute the FIVO_AUX evidence lower bound.""" - # Initialization - num_instances = tf.cast(tf.shape(observation)[0], tf.int32) - batch_size = tf.cast(num_instances / num_samples, tf.int32) - states = [model.zero_state(num_instances)] - prev_state = states[0] - log_weight_acc = tf.zeros(shape=[num_samples, batch_size], dtype=observation.dtype) - prev_log_r = tf.zeros([num_instances], dtype=observation.dtype) - # must be pre-resampling - log_rs = [] - # must be post-resampling - r_tilde_params = [model.r_tilde.r_zt(states[0], observation, 0)] - log_r_tildes = [] - log_p_xs = [] - # contains the weight at each timestep before resampling only on resampling timesteps - log_weights = [] - # contains weight at each timestep before resampling - log_weights_all = [] - log_p_hats = [] - for t in xrange(num_timesteps): - # run the model for one timestep - # zt is state, [num_instances, state_dim] - # log_q_zt, log_p_x_given_z is [num_instances] - # r_tilde_mu, r_tilde_sigma is [num_instances, state_dim] - # p_ztplus1 is a normal distribution on [num_instances, state_dim] - (zt, log_q_zt, log_p_zt, log_p_x_given_z, - r_tilde_mu, r_tilde_sigma_sq, p_ztplus1) = model(prev_state, observation, t) - - # Compute the log weight without log r. - log_weight = log_p_zt + log_p_x_given_z - log_q_zt - - # Compute log r. - if t == num_timesteps - 1: - log_r = tf.zeros_like(prev_log_r) - else: - p_mu = p_ztplus1.mean() - p_sigma_sq = p_ztplus1.variance() - log_r = (tf.log(r_tilde_sigma_sq) - - tf.log(r_tilde_sigma_sq + p_sigma_sq) - - tf.square(r_tilde_mu - p_mu)/(r_tilde_sigma_sq + p_sigma_sq)) - log_r = 0.5*tf.reduce_sum(log_r, axis=-1) - - #log_weight += tf.stop_gradient(log_r - prev_log_r) - log_weight += log_r - prev_log_r - log_weight_acc += tf.reshape(log_weight, [num_samples, batch_size]) - - # Update accumulators - states.append(zt) - log_weights_all.append(log_weight_acc) - log_p_xs.append(log_p_x_given_z) - log_rs.append(log_r) - - # Compute log_r_tilde as [num_instances] Tensor. - prev_r_tilde_mu, prev_r_tilde_sigma_sq = r_tilde_params[-1] - prev_log_r_tilde = -0.5*tf.reduce_sum( - tf.square(zt - prev_r_tilde_mu)/prev_r_tilde_sigma_sq, axis=-1) - #tf.square(tf.stop_gradient(zt) - r_tilde_mu)/r_tilde_sigma_sq, axis=-1) - #tf.square(zt - r_tilde_mu)/r_tilde_sigma_sq, axis=-1) - log_r_tildes.append(prev_log_r_tilde) - - # optionally resample - if resampling_schedule[t]: - # These objects will be resampled - if t < num_timesteps - 1: - to_resample = [zt, log_r, r_tilde_mu, r_tilde_sigma_sq] - else: - to_resample = [zt, log_r] - (resampled, - _, _, _, _) = multinomial_resampling(log_weight_acc, - to_resample, - num_samples, - batch_size) - prev_state = resampled[0] - # Squeeze out the extra dim potentially added by resampling. - # prev_log_r_zt and log_r_tilde should always be [num_instances] - prev_log_r = tf.squeeze(resampled[1]) - if t < num_timesteps -1: - r_tilde_params.append((resampled[2], resampled[3])) - # Update the log p hat estimate, taking a log sum exp over the sample - # dimension. The appended tensor is [batch_size]. - log_p_hats.append( - tf.reduce_logsumexp(log_weight_acc, axis=0) - tf.log( - tf.cast(num_samples, dtype=observation.dtype))) - # reset the weights - log_weights.append(log_weight_acc) - log_weight_acc = tf.zeros_like(log_weight_acc) - else: - prev_state = zt - prev_log_r = log_r - if t < num_timesteps - 1: - r_tilde_params.append((r_tilde_mu, r_tilde_sigma_sq)) - - # Compute the final weight update. If we just resampled this will be zero. - final_update = (tf.reduce_logsumexp(log_weight_acc, axis=0) - - tf.log(tf.cast(num_samples, dtype=observation.dtype))) - # If we ever resampled, then sum up the previous log p hat terms - if len(log_p_hats) > 0: - log_p_hat = tf.reduce_sum(log_p_hats, axis=0) + final_update - else: # otherwise, log_p_hat only comes from the final update - log_p_hat = final_update - - # Compute the bellman loss. - # Will remove the first timestep as it is not used. - # log p(x_t|z_t) is in row t-1. - log_p_x = tf.reshape(tf.stack(log_p_xs), - [num_timesteps, num_samples, batch_size]) - # log r_t is contained in row t-1. - # last column is zeros (because at timestep T (num_timesteps) r is 1. - log_r = tf.reshape(tf.stack(log_rs), - [num_timesteps, num_samples, batch_size]) - # [num_timesteps, num_instances]. log r_tilde_t is in row t-1. - log_r_tilde = tf.reshape(tf.stack(log_r_tildes), - [num_timesteps, num_samples, batch_size]) - log_lambda = tf.reduce_mean(log_r_tilde - log_p_x - log_r, axis=1, - keepdims=True) - bellman_sos = tf.reduce_mean(tf.square( - log_r_tilde - tf.stop_gradient(log_lambda + log_p_x + log_r)), axis=[0, 1]) - bellman_loss = tf.reduce_mean(bellman_sos)/num_timesteps - tf.summary.scalar("bellman_loss", bellman_loss) - - if len(tf.get_collection("LOG_P_HAT_VARS")) == 0: - log_p_hat_collection = list(set(tf.trainable_variables()) - - set(tf.get_collection("R_TILDE_VARS"))) - for v in log_p_hat_collection: - tf.add_to_collection("LOG_P_HAT_VARS", v) - - log_p_hat /= num_timesteps - losses = [Loss("log_p_hat", -tf.reduce_mean(log_p_hat), "LOG_P_HAT_VARS"), - Loss("bellman_loss", bellman_loss, "R_TILDE_VARS")] - - return log_p_hat, losses, tf.no_op(), states[1:], log_weights_all diff --git a/research/fivo/experimental/data.py b/research/fivo/experimental/data.py deleted file mode 100644 index 0842f212991e1651a12cca239c5b8380fea9d0f8..0000000000000000000000000000000000000000 --- a/research/fivo/experimental/data.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Datasets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -import models - - -def make_long_chain_dataset( - state_size=1, - num_obs=5, - steps_per_obs=3, - variance=1., - observation_variance=1., - batch_size=4, - num_samples=1, - observation_type=models.STANDARD_OBSERVATION, - transition_type=models.STANDARD_TRANSITION, - fixed_observation=None, - dtype="float32"): - """Creates a long chain data generating process. - - Creates a tf.data.Dataset that provides batches of data from a long - chain. - - Args: - state_size: The dimension of the state space of the process. - num_obs: The number of observations in the chain. - steps_per_obs: The number of steps between each observation. - variance: The variance of the normal distributions used at each timestep. - batch_size: The number of trajectories to include in each batch. - num_samples: The number of replicas of each trajectory to include in each - batch. - dtype: The datatype of the states and observations. - Returns: - dataset: A tf.data.Dataset that can be iterated over. - """ - num_timesteps = num_obs * steps_per_obs - def data_generator(): - """An infinite generator of latents and observations from the model.""" - while True: - states = [] - observations = [] - # z0 ~ Normal(0, sqrt(variance)). - states.append( - np.random.normal(size=[state_size], - scale=np.sqrt(variance)).astype(dtype)) - # start at 1 because we've already generated z0 - # go to num_timesteps+1 because we want to include the num_timesteps-th step - for t in xrange(1, num_timesteps+1): - if transition_type == models.ROUND_TRANSITION: - loc = np.round(states[-1]) - elif transition_type == models.STANDARD_TRANSITION: - loc = states[-1] - new_state = np.random.normal(size=[state_size], - loc=loc, - scale=np.sqrt(variance)) - states.append(new_state.astype(dtype)) - if t % steps_per_obs == 0: - if fixed_observation is None: - if observation_type == models.SQUARED_OBSERVATION: - loc = np.square(states[-1]) - elif observation_type == models.ABS_OBSERVATION: - loc = np.abs(states[-1]) - elif observation_type == models.STANDARD_OBSERVATION: - loc = states[-1] - new_obs = np.random.normal(size=[state_size], - loc=loc, - scale=np.sqrt(observation_variance)).astype(dtype) - else: - new_obs = np.ones([state_size])* fixed_observation - - observations.append(new_obs) - yield states, observations - - dataset = tf.data.Dataset.from_generator( - data_generator, - output_types=(tf.as_dtype(dtype), tf.as_dtype(dtype)), - output_shapes=([num_timesteps+1, state_size], [num_obs, state_size])) - dataset = dataset.repeat().batch(batch_size) - - def tile_batch(state, observation): - state = tf.tile(state, [num_samples, 1, 1]) - observation = tf.tile(observation, [num_samples, 1, 1]) - return state, observation - - dataset = dataset.map(tile_batch, num_parallel_calls=12).prefetch(1024) - return dataset - - -def make_dataset(bs=None, - state_size=1, - num_timesteps=10, - variance=1., - prior_type="unimodal", - bimodal_prior_weight=0.5, - bimodal_prior_mean=1, - transition_type=models.STANDARD_TRANSITION, - fixed_observation=None, - batch_size=4, - num_samples=1, - dtype='float32'): - """Creates a data generating process. - - Creates a tf.data.Dataset that provides batches of data. - - Args: - bs: The parameters of the data generating process. If None, new bs are - randomly generated. - state_size: The dimension of the state space of the process. - num_timesteps: The length of the state sequences in the process. - variance: The variance of the normal distributions used at each timestep. - batch_size: The number of trajectories to include in each batch. - num_samples: The number of replicas of each trajectory to include in each - batch. - Returns: - bs: The true bs used to generate the data - dataset: A tf.data.Dataset that can be iterated over. - """ - - if bs is None: - bs = [np.random.uniform(size=[state_size]).astype(dtype) for _ in xrange(num_timesteps)] - tf.logging.info("data generating processs bs: %s", - np.array(bs).reshape(num_timesteps)) - - - def data_generator(): - """An infinite generator of latents and observations from the model.""" - while True: - states = [] - if prior_type == "unimodal" or prior_type == "nonlinear": - # Prior is Normal(0, sqrt(variance)). - states.append(np.random.normal(size=[state_size], scale=np.sqrt(variance)).astype(dtype)) - elif prior_type == "bimodal": - if np.random.uniform() > bimodal_prior_weight: - loc = bimodal_prior_mean - else: - loc = - bimodal_prior_mean - states.append(np.random.normal(size=[state_size], - loc=loc, - scale=np.sqrt(variance) - ).astype(dtype)) - - for t in xrange(num_timesteps): - if transition_type == models.ROUND_TRANSITION: - loc = np.round(states[-1]) - elif transition_type == models.STANDARD_TRANSITION: - loc = states[-1] - loc += bs[t] - new_state = np.random.normal(size=[state_size], - loc=loc, - scale=np.sqrt(variance)).astype(dtype) - states.append(new_state) - - if fixed_observation is None: - observation = states[-1] - else: - observation = np.ones_like(states[-1]) * fixed_observation - yield np.array(states[:-1]), observation - - dataset = tf.data.Dataset.from_generator( - data_generator, - output_types=(tf.as_dtype(dtype), tf.as_dtype(dtype)), - output_shapes=([num_timesteps, state_size], [state_size])) - dataset = dataset.repeat().batch(batch_size) - - def tile_batch(state, observation): - state = tf.tile(state, [num_samples, 1, 1]) - observation = tf.tile(observation, [num_samples, 1]) - return state, observation - - dataset = dataset.map(tile_batch, num_parallel_calls=12).prefetch(1024) - return np.array(bs), dataset diff --git a/research/fivo/experimental/models.py b/research/fivo/experimental/models.py deleted file mode 100644 index 62801ca1ee145e64c80b66e0c83dd7d834ac0847..0000000000000000000000000000000000000000 --- a/research/fivo/experimental/models.py +++ /dev/null @@ -1,1227 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import sonnet as snt -import tensorflow as tf -import numpy as np -import math - -SQUARED_OBSERVATION = "squared" -ABS_OBSERVATION = "abs" -STANDARD_OBSERVATION = "standard" -OBSERVATION_TYPES = [SQUARED_OBSERVATION, ABS_OBSERVATION, STANDARD_OBSERVATION] - -ROUND_TRANSITION = "round" -STANDARD_TRANSITION = "standard" -TRANSITION_TYPES = [ROUND_TRANSITION, STANDARD_TRANSITION] - - -class Q(object): - - def __init__(self, - state_size, - num_timesteps, - sigma_min=1e-5, - dtype=tf.float32, - random_seed=None, - init_mu0_to_zero=False, - graph_collection_name="Q_VARS"): - self.sigma_min = sigma_min - self.dtype = dtype - self.graph_collection_name = graph_collection_name - initializers = [] - for t in xrange(num_timesteps): - if t == 0 and init_mu0_to_zero: - initializers.append( - {"w": tf.zeros_initializer, "b": tf.zeros_initializer}) - else: - initializers.append( - {"w": tf.random_uniform_initializer(seed=random_seed), - "b": tf.zeros_initializer}) - - def custom_getter(getter, *args, **kwargs): - out = getter(*args, **kwargs) - ref = tf.get_collection_ref(self.graph_collection_name) - if out not in ref: - ref.append(out) - return out - - self.mus = [ - snt.Linear(output_size=state_size, - initializers=initializers[t], - name="q_mu_%d" % t, - custom_getter=custom_getter - ) - for t in xrange(num_timesteps) - ] - self.sigmas = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="q_sigma_%d" % (t + 1), - collections=[tf.GraphKeys.GLOBAL_VARIABLES, graph_collection_name], - initializer=tf.random_uniform_initializer(seed=random_seed)) - for t in xrange(num_timesteps) - ] - - def q_zt(self, observation, prev_state, t): - batch_size = tf.shape(prev_state)[0] - q_mu = self.mus[t](tf.concat([observation, prev_state], axis=1)) - q_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) - q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1]) - q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma)) - return q_zt - - def summarize_weights(self): - for t, sigma in enumerate(self.sigmas): - tf.summary.scalar("q_sigma/%d" % t, sigma[0]) - for t, f in enumerate(self.mus): - tf.summary.scalar("q_mu/b_%d" % t, f.b[0]) - tf.summary.scalar("q_mu/w_obs_%d" % t, f.w[0,0]) - if t != 0: - tf.summary.scalar("q_mu/w_prev_state_%d" % t, f.w[1,0]) - - -class PreviousStateQ(Q): - - def q_zt(self, unused_observation, prev_state, t): - batch_size = tf.shape(prev_state)[0] - q_mu = self.mus[t](prev_state) - q_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) - q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1]) - q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma)) - return q_zt - - def summarize_weights(self): - for t, sigma in enumerate(self.sigmas): - tf.summary.scalar("q_sigma/%d" % t, sigma[0]) - for t, f in enumerate(self.mus): - tf.summary.scalar("q_mu/b_%d" % t, f.b[0]) - tf.summary.scalar("q_mu/w_prev_state_%d" % t, f.w[0,0]) - - -class ObservationQ(Q): - - def q_zt(self, observation, prev_state, t): - batch_size = tf.shape(prev_state)[0] - q_mu = self.mus[t](observation) - q_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) - q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1]) - q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma)) - return q_zt - - def summarize_weights(self): - for t, sigma in enumerate(self.sigmas): - tf.summary.scalar("q_sigma/%d" % t, sigma[0]) - for t, f in enumerate(self.mus): - tf.summary.scalar("q_mu/b_%d" % t, f.b[0]) - tf.summary.scalar("q_mu/w_obs_%d" % t, f.w[0,0]) - - -class SimpleMeanQ(object): - - def __init__(self, - state_size, - num_timesteps, - sigma_min=1e-5, - dtype=tf.float32, - random_seed=None, - init_mu0_to_zero=False, - graph_collection_name="Q_VARS"): - self.sigma_min = sigma_min - self.dtype = dtype - self.graph_collection_name = graph_collection_name - initializers = [] - for t in xrange(num_timesteps): - if t == 0 and init_mu0_to_zero: - initializers.append(tf.zeros_initializer) - else: - initializers.append(tf.random_uniform_initializer(seed=random_seed)) - - self.mus = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="q_mu_%d" % (t + 1), - collections=[tf.GraphKeys.GLOBAL_VARIABLES, graph_collection_name], - initializer=initializers[t]) - for t in xrange(num_timesteps) - ] - self.sigmas = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="q_sigma_%d" % (t + 1), - collections=[tf.GraphKeys.GLOBAL_VARIABLES, graph_collection_name], - initializer=tf.random_uniform_initializer(seed=random_seed)) - for t in xrange(num_timesteps) - ] - - def q_zt(self, unused_observation, prev_state, t): - batch_size = tf.shape(prev_state)[0] - q_mu = tf.tile(self.mus[t][tf.newaxis, :], [batch_size, 1]) - q_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) - q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1]) - q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma)) - return q_zt - - def summarize_weights(self): - for t, sigma in enumerate(self.sigmas): - tf.summary.scalar("q_sigma/%d" % t, sigma[0]) - for t, f in enumerate(self.mus): - tf.summary.scalar("q_mu/%d" % t, f[0]) - - -class R(object): - - def __init__(self, - state_size, - num_timesteps, - sigma_min=1e-5, - dtype=tf.float32, - sigma_init=1., - random_seed=None, - graph_collection_name="R_VARS"): - self.dtype = dtype - self.sigma_min = sigma_min - initializers = {"w": tf.truncated_normal_initializer(seed=random_seed), - "b": tf.zeros_initializer} - self.graph_collection_name=graph_collection_name - - def custom_getter(getter, *args, **kwargs): - out = getter(*args, **kwargs) - ref = tf.get_collection_ref(self.graph_collection_name) - if out not in ref: - ref.append(out) - return out - - self.mus= [ - snt.Linear(output_size=state_size, - initializers=initializers, - name="r_mu_%d" % t, - custom_getter=custom_getter) - for t in xrange(num_timesteps) - ] - - self.sigmas = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="r_sigma_%d" % (t + 1), - collections=[tf.GraphKeys.GLOBAL_VARIABLES, graph_collection_name], - #initializer=tf.random_uniform_initializer(seed=random_seed, maxval=100)) - initializer=tf.constant_initializer(sigma_init)) - for t in xrange(num_timesteps) - ] - - def r_xn(self, z_t, t): - batch_size = tf.shape(z_t)[0] - r_mu = self.mus[t](z_t) - r_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) - r_sigma = tf.tile(r_sigma[tf.newaxis, :], [batch_size, 1]) - return tf.contrib.distributions.Normal( - loc=r_mu, scale=tf.sqrt(r_sigma)) - - def summarize_weights(self): - for t in range(len(self.mus) - 1): - tf.summary.scalar("r_mu/%d" % t, self.mus[t][0]) - tf.summary.scalar("r_sigma/%d" % t, self.sigmas[t][0]) - - -class P(object): - - def __init__(self, - state_size, - num_timesteps, - sigma_min=1e-5, - variance=1.0, - dtype=tf.float32, - random_seed=None, - trainable=True, - init_bs_to_zero=False, - graph_collection_name="P_VARS"): - self.state_size = state_size - self.num_timesteps = num_timesteps - self.sigma_min = sigma_min - self.dtype = dtype - self.variance = variance - self.graph_collection_name = graph_collection_name - if init_bs_to_zero: - initializers = [tf.zeros_initializer for _ in xrange(num_timesteps)] - else: - initializers = [tf.random_uniform_initializer(seed=random_seed) for _ in xrange(num_timesteps)] - - self.bs = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="p_b_%d" % (t + 1), - initializer=initializers[t], - collections=[tf.GraphKeys.GLOBAL_VARIABLES, graph_collection_name], - trainable=trainable) for t in xrange(num_timesteps) - ] - self.Bs = tf.cumsum(self.bs, reverse=True, axis=0) - - def posterior(self, observation, prev_state, t): - """Computes the true posterior p(z_t|z_{t-1}, z_n).""" - # bs[0] is really b_1 - # Bs[i] is sum from k=i+1^n b_k - mu = observation - self.Bs[t] - if t > 0: - mu += (prev_state + self.bs[t - 1]) * float(self.num_timesteps - t) - mu /= float(self.num_timesteps - t + 1) - sigma = tf.ones_like(mu) * self.variance * ( - float(self.num_timesteps - t) / float(self.num_timesteps - t + 1)) - return tf.contrib.distributions.Normal(loc=mu, scale=tf.sqrt(sigma)) - - def lookahead(self, state, t): - """Computes the true lookahead distribution p(z_n|z_t).""" - mu = state + self.Bs[t] - sigma = tf.ones_like(state) * self.variance * float(self.num_timesteps - t) - return tf.contrib.distributions.Normal(loc=mu, scale=tf.sqrt(sigma)) - - def likelihood(self, observation): - batch_size = tf.shape(observation)[0] - mu = tf.tile(tf.reduce_sum(self.bs, axis=0)[tf.newaxis, :], [batch_size, 1]) - sigma = tf.ones_like(mu) * self.variance * (self.num_timesteps + 1) - dist = tf.contrib.distributions.Normal(loc=mu, scale=tf.sqrt(sigma)) - # Average over the batch and take the sum over the state size - return tf.reduce_mean(tf.reduce_sum(dist.log_prob(observation), axis=1)) - - def p_zt(self, prev_state, t): - """Computes the model p(z_t| z_{t-1}).""" - batch_size = tf.shape(prev_state)[0] - if t > 0: - z_mu_p = prev_state + self.bs[t - 1] - else: # p(z_0) is Normal(0,1) - z_mu_p = tf.zeros([batch_size, self.state_size], dtype=self.dtype) - p_zt = tf.contrib.distributions.Normal( - loc=z_mu_p, scale=tf.sqrt(tf.ones_like(z_mu_p) * self.variance)) - return p_zt - - def generative(self, unused_observation, z_nm1): - """Computes the model's generative distribution p(z_n| z_{n-1}).""" - generative_p_mu = z_nm1 + self.bs[-1] - return tf.contrib.distributions.Normal( - loc=generative_p_mu, scale=tf.sqrt(tf.ones_like(generative_p_mu) * self.variance)) - - -class ShortChainNonlinearP(object): - - def __init__(self, - state_size, - num_timesteps, - sigma_min=1e-5, - variance=1.0, - observation_variance=1.0, - transition_type=STANDARD_TRANSITION, - transition_dist=tf.contrib.distributions.Normal, - dtype=tf.float32, - random_seed=None): - self.state_size = state_size - self.num_timesteps = num_timesteps - self.sigma_min = sigma_min - self.dtype = dtype - self.variance = variance - self.observation_variance = observation_variance - self.transition_type = transition_type - self.transition_dist = transition_dist - - def p_zt(self, prev_state, t): - """Computes the model p(z_t| z_{t-1}).""" - batch_size = tf.shape(prev_state)[0] - if t > 0: - if self.transition_type == ROUND_TRANSITION: - loc = tf.round(prev_state) - tf.logging.info("p(z_%d | z_%d) ~ N(round(z_%d), %0.1f)" % (t, t-1, t-1, self.variance)) - elif self.transition_type == STANDARD_TRANSITION: - loc = prev_state - tf.logging.info("p(z_%d | z_%d) ~ N(z_%d, %0.1f)" % (t, t-1, t-1, self.variance)) - else: # p(z_0) is Normal(0,1) - loc = tf.zeros([batch_size, self.state_size], dtype=self.dtype) - tf.logging.info("p(z_0) ~ N(0,%0.1f)" % self.variance) - - p_zt = self.transition_dist( - loc=loc, - scale=tf.sqrt(tf.ones_like(loc) * self.variance)) - return p_zt - - def generative(self, unused_obs, z_ni): - """Computes the model's generative distribution p(x_i| z_{ni}).""" - if self.transition_type == ROUND_TRANSITION: - loc = tf.round(z_ni) - elif self.transition_type == STANDARD_TRANSITION: - loc = z_ni - generative_sigma_sq = tf.ones_like(loc) * self.observation_variance - return self.transition_dist( - loc=loc, scale=tf.sqrt(generative_sigma_sq)) - - -class BimodalPriorP(object): - - def __init__(self, - state_size, - num_timesteps, - mixing_coeff=0.5, - prior_mode_mean=1, - sigma_min=1e-5, - variance=1.0, - dtype=tf.float32, - random_seed=None, - trainable=True, - init_bs_to_zero=False, - graph_collection_name="P_VARS"): - self.state_size = state_size - self.num_timesteps = num_timesteps - self.sigma_min = sigma_min - self.dtype = dtype - self.variance = variance - self.mixing_coeff = mixing_coeff - self.prior_mode_mean = prior_mode_mean - - if init_bs_to_zero: - initializers = [tf.zeros_initializer for _ in xrange(num_timesteps)] - else: - initializers = [tf.random_uniform_initializer(seed=random_seed) for _ in xrange(num_timesteps)] - - self.bs = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="b_%d" % (t + 1), - initializer=initializers[t], - collections=[tf.GraphKeys.GLOBAL_VARIABLES, graph_collection_name], - trainable=trainable) for t in xrange(num_timesteps) - ] - self.Bs = tf.cumsum(self.bs, reverse=True, axis=0) - - def posterior(self, observation, prev_state, t): - # NOTE: This is currently wrong, but would require a refactoring of - # summarize_q to fix as kl is not defined for a mixture - """Computes the true posterior p(z_t|z_{t-1}, z_n).""" - # bs[0] is really b_1 - # Bs[i] is sum from k=i+1^n b_k - mu = observation - self.Bs[t] - if t > 0: - mu += (prev_state + self.bs[t - 1]) * float(self.num_timesteps - t) - mu /= float(self.num_timesteps - t + 1) - sigma = tf.ones_like(mu) * self.variance * ( - float(self.num_timesteps - t) / float(self.num_timesteps - t + 1)) - return tf.contrib.distributions.Normal(loc=mu, scale=tf.sqrt(sigma)) - - def lookahead(self, state, t): - """Computes the true lookahead distribution p(z_n|z_t).""" - mu = state + self.Bs[t] - sigma = tf.ones_like(state) * self.variance * float(self.num_timesteps - t) - return tf.contrib.distributions.Normal(loc=mu, scale=tf.sqrt(sigma)) - - def likelihood(self, observation): - batch_size = tf.shape(observation)[0] - sum_of_bs = tf.tile(tf.reduce_sum(self.bs, axis=0)[tf.newaxis, :], [batch_size, 1]) - sigma = tf.ones_like(sum_of_bs) * self.variance * (self.num_timesteps + 1) - mu_pos = (tf.ones([batch_size, self.state_size], dtype=self.dtype) * self.prior_mode_mean) + sum_of_bs - mu_neg = (tf.ones([batch_size, self.state_size], dtype=self.dtype) * -self.prior_mode_mean) + sum_of_bs - zn_pos = tf.contrib.distributions.Normal( - loc=mu_pos, - scale=tf.sqrt(sigma)) - zn_neg = tf.contrib.distributions.Normal( - loc=mu_neg, - scale=tf.sqrt(sigma)) - mode_probs = tf.convert_to_tensor([self.mixing_coeff, 1-self.mixing_coeff], dtype=tf.float64) - mode_probs = tf.tile(mode_probs[tf.newaxis, tf.newaxis, :], [batch_size, 1, 1]) - mode_selection_dist = tf.contrib.distributions.Categorical(probs=mode_probs) - zn_dist = tf.contrib.distributions.Mixture( - cat=mode_selection_dist, - components=[zn_pos, zn_neg], - validate_args=True) - # Average over the batch and take the sum over the state size - return tf.reduce_mean(tf.reduce_sum(zn_dist.log_prob(observation), axis=1)) - - def p_zt(self, prev_state, t): - """Computes the model p(z_t| z_{t-1}).""" - batch_size = tf.shape(prev_state)[0] - if t > 0: - z_mu_p = prev_state + self.bs[t - 1] - p_zt = tf.contrib.distributions.Normal( - loc=z_mu_p, scale=tf.sqrt(tf.ones_like(z_mu_p) * self.variance)) - return p_zt - else: # p(z_0) is mixture of two Normals - mu_pos = tf.ones([batch_size, self.state_size], dtype=self.dtype) * self.prior_mode_mean - mu_neg = tf.ones([batch_size, self.state_size], dtype=self.dtype) * -self.prior_mode_mean - z0_pos = tf.contrib.distributions.Normal( - loc=mu_pos, - scale=tf.sqrt(tf.ones_like(mu_pos) * self.variance)) - z0_neg = tf.contrib.distributions.Normal( - loc=mu_neg, - scale=tf.sqrt(tf.ones_like(mu_neg) * self.variance)) - mode_probs = tf.convert_to_tensor([self.mixing_coeff, 1-self.mixing_coeff], dtype=tf.float64) - mode_probs = tf.tile(mode_probs[tf.newaxis, tf.newaxis, :], [batch_size, 1, 1]) - mode_selection_dist = tf.contrib.distributions.Categorical(probs=mode_probs) - z0_dist = tf.contrib.distributions.Mixture( - cat=mode_selection_dist, - components=[z0_pos, z0_neg], - validate_args=False) - return z0_dist - - def generative(self, unused_observation, z_nm1): - """Computes the model's generative distribution p(z_n| z_{n-1}).""" - generative_p_mu = z_nm1 + self.bs[-1] - return tf.contrib.distributions.Normal( - loc=generative_p_mu, scale=tf.sqrt(tf.ones_like(generative_p_mu) * self.variance)) - -class Model(object): - - def __init__(self, - p, - q, - r, - state_size, - num_timesteps, - dtype=tf.float32): - self.p = p - self.q = q - self.r = r - self.state_size = state_size - self.num_timesteps = num_timesteps - self.dtype = dtype - - def zero_state(self, batch_size): - return tf.zeros([batch_size, self.state_size], dtype=self.dtype) - - def __call__(self, prev_state, observation, t): - # Compute the q distribution over z, q(z_t|z_n, z_{t-1}). - q_zt = self.q.q_zt(observation, prev_state, t) - # Compute the p distribution over z, p(z_t|z_{t-1}). - p_zt = self.p.p_zt(prev_state, t) - # sample from q - zt = q_zt.sample() - r_xn = self.r.r_xn(zt, t) - # Calculate the logprobs and sum over the state size. - log_q_zt = tf.reduce_sum(q_zt.log_prob(zt), axis=1) - log_p_zt = tf.reduce_sum(p_zt.log_prob(zt), axis=1) - log_r_xn = tf.reduce_sum(r_xn.log_prob(observation), axis=1) - # If we're at the last timestep, also calc the logprob of the observation. - if t == self.num_timesteps - 1: - generative_dist = self.p.generative(observation, zt) - log_p_x_given_z = tf.reduce_sum(generative_dist.log_prob(observation), axis=1) - else: - log_p_x_given_z = tf.zeros_like(log_q_zt) - return (zt, log_q_zt, log_p_zt, log_p_x_given_z, log_r_xn) - - @staticmethod - def create(state_size, - num_timesteps, - sigma_min=1e-5, - r_sigma_init=1, - variance=1.0, - mixing_coeff=0.5, - prior_mode_mean=1.0, - dtype=tf.float32, - random_seed=None, - train_p=True, - p_type="unimodal", - q_type="normal", - observation_variance=1.0, - transition_type=STANDARD_TRANSITION, - use_bs=True): - if p_type == "unimodal": - p = P(state_size, - num_timesteps, - sigma_min=sigma_min, - variance=variance, - dtype=dtype, - random_seed=random_seed, - trainable=train_p, - init_bs_to_zero=not use_bs) - elif p_type == "bimodal": - p = BimodalPriorP( - state_size, - num_timesteps, - mixing_coeff=mixing_coeff, - prior_mode_mean=prior_mode_mean, - sigma_min=sigma_min, - variance=variance, - dtype=dtype, - random_seed=random_seed, - trainable=train_p, - init_bs_to_zero=not use_bs) - elif "nonlinear" in p_type: - if "cauchy" in p_type: - trans_dist = tf.contrib.distributions.Cauchy - else: - trans_dist = tf.contrib.distributions.Normal - p = ShortChainNonlinearP( - state_size, - num_timesteps, - sigma_min=sigma_min, - variance=variance, - observation_variance=observation_variance, - transition_type=transition_type, - transition_dist=trans_dist, - dtype=dtype, - random_seed=random_seed - ) - - if q_type == "normal": - q_class = Q - elif q_type == "simple_mean": - q_class = SimpleMeanQ - elif q_type == "prev_state": - q_class = PreviousStateQ - elif q_type == "observation": - q_class = ObservationQ - - q = q_class(state_size, - num_timesteps, - sigma_min=sigma_min, - dtype=dtype, - random_seed=random_seed, - init_mu0_to_zero=not use_bs) - r = R(state_size, - num_timesteps, - sigma_min=sigma_min, - sigma_init=r_sigma_init, - dtype=dtype, - random_seed=random_seed) - model = Model(p, q, r, state_size, num_timesteps, dtype=dtype) - return model - - -class BackwardsModel(object): - - def __init__(self, - state_size, - num_timesteps, - sigma_min=1e-5, - dtype=tf.float32): - self.state_size = state_size - self.num_timesteps = num_timesteps - self.sigma_min = sigma_min - self.dtype = dtype - self.bs = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="b_%d" % (t + 1), - initializer=tf.zeros_initializer) for t in xrange(num_timesteps) - ] - self.Bs = tf.cumsum(self.bs, reverse=True, axis=0) - self.q_mus = [ - snt.Linear(output_size=state_size) for _ in xrange(num_timesteps) - ] - self.q_sigmas = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="q_sigma_%d" % (t + 1), - initializer=tf.zeros_initializer) for t in xrange(num_timesteps) - ] - self.r_mus = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="r_mu_%d" % (t + 1), - initializer=tf.zeros_initializer) for t in xrange(num_timesteps) - ] - self.r_sigmas = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="r_sigma_%d" % (t + 1), - initializer=tf.zeros_initializer) for t in xrange(num_timesteps) - ] - - def zero_state(self, batch_size): - return tf.zeros([batch_size, self.state_size], dtype=self.dtype) - - def posterior(self, unused_observation, prev_state, unused_t): - # TODO(dieterichl): Correct this. - return tf.contrib.distributions.Normal( - loc=tf.zeros_like(prev_state), scale=tf.zeros_like(prev_state)) - - def lookahead(self, state, unused_t): - # TODO(dieterichl): Correct this. - return tf.contrib.distributions.Normal( - loc=tf.zeros_like(state), scale=tf.zeros_like(state)) - - def q_zt(self, observation, next_state, t): - """Computes the variational posterior q(z_{t}|z_{t+1}, z_n).""" - t_backwards = self.num_timesteps - t - 1 - batch_size = tf.shape(next_state)[0] - q_mu = self.q_mus[t_backwards](tf.concat([observation, next_state], axis=1)) - q_sigma = tf.maximum( - tf.nn.softplus(self.q_sigmas[t_backwards]), self.sigma_min) - q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1]) - q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma)) - return q_zt - - def p_zt(self, prev_state, t): - """Computes the model p(z_{t+1}| z_{t}).""" - t_backwards = self.num_timesteps - t - 1 - z_mu_p = prev_state + self.bs[t_backwards] - p_zt = tf.contrib.distributions.Normal( - loc=z_mu_p, scale=tf.ones_like(z_mu_p)) - return p_zt - - def generative(self, unused_observation, z_nm1): - """Computes the model's generative distribution p(z_n| z_{n-1}).""" - generative_p_mu = z_nm1 + self.bs[-1] - return tf.contrib.distributions.Normal( - loc=generative_p_mu, scale=tf.ones_like(generative_p_mu)) - - def r(self, z_t, t): - t_backwards = self.num_timesteps - t - 1 - batch_size = tf.shape(z_t)[0] - r_mu = tf.tile(self.r_mus[t_backwards][tf.newaxis, :], [batch_size, 1]) - r_sigma = tf.maximum( - tf.nn.softplus(self.r_sigmas[t_backwards]), self.sigma_min) - r_sigma = tf.tile(r_sigma[tf.newaxis, :], [batch_size, 1]) - return tf.contrib.distributions.Normal(loc=r_mu, scale=tf.sqrt(r_sigma)) - - def likelihood(self, observation): - batch_size = tf.shape(observation)[0] - mu = tf.tile(tf.reduce_sum(self.bs, axis=0)[tf.newaxis, :], [batch_size, 1]) - sigma = tf.ones_like(mu) * (self.num_timesteps + 1) - dist = tf.contrib.distributions.Normal(loc=mu, scale=tf.sqrt(sigma)) - # Average over the batch and take the sum over the state size - return tf.reduce_mean(tf.reduce_sum(dist.log_prob(observation), axis=1)) - - def __call__(self, next_state, observation, t): - # next state = z_{t+1} - # Compute the q distribution over z, q(z_{t}|z_n, z_{t+1}). - q_zt = self.q_zt(observation, next_state, t) - # sample from q - zt = q_zt.sample() - # Compute the p distribution over z, p(z_{t+1}|z_{t}). - p_zt = self.p_zt(zt, t) - # Compute log p(z_{t+1} | z_t) - if t == 0: - log_p_zt = p_zt.log_prob(observation) - else: - log_p_zt = p_zt.log_prob(next_state) - - # Compute r prior over zt - r_zt = self.r(zt, t) - log_r_zt = r_zt.log_prob(zt) - # Compute proposal density at zt - log_q_zt = q_zt.log_prob(zt) - # If we're at the last timestep, also calc the logprob of the observation. - - if t == self.num_timesteps - 1: - p_z0_dist = tf.contrib.distributions.Normal( - loc=tf.zeros_like(zt), scale=tf.ones_like(zt)) - z0_log_prob = p_z0_dist.log_prob(zt) - else: - z0_log_prob = tf.zeros_like(log_q_zt) - return (zt, log_q_zt, log_p_zt, z0_log_prob, log_r_zt) - - -class LongChainP(object): - - def __init__(self, - state_size, - num_obs, - steps_per_obs, - sigma_min=1e-5, - variance=1.0, - observation_variance=1.0, - observation_type=STANDARD_OBSERVATION, - transition_type=STANDARD_TRANSITION, - dtype=tf.float32, - random_seed=None): - self.state_size = state_size - self.steps_per_obs = steps_per_obs - self.num_obs = num_obs - self.num_timesteps = steps_per_obs*num_obs + 1 - self.sigma_min = sigma_min - self.dtype = dtype - self.variance = variance - self.observation_variance = observation_variance - self.observation_type = observation_type - self.transition_type = transition_type - - def likelihood(self, observations): - """Computes the model's true likelihood of the observations. - - Args: - observations: A [batch_size, m, state_size] Tensor representing each of - the m observations. - Returns: - logprob: The true likelihood of the observations given the model. - """ - raise ValueError("Likelihood is not defined for long-chain models") - # batch_size = tf.shape(observations)[0] - # mu = tf.zeros([batch_size, self.state_size, self.num_obs], dtype=self.dtype) - # sigma = np.fromfunction( - # lambda i, j: 1 + self.steps_per_obs*np.minimum(i+1, j+1), - # [self.num_obs, self.num_obs]) - # sigma += np.eye(self.num_obs) - # sigma = tf.convert_to_tensor(sigma * self.variance, dtype=self.dtype) - # sigma = tf.tile(sigma[tf.newaxis, tf.newaxis, ...], - # [batch_size, self.state_size, 1, 1]) - # dist = tf.contrib.distributions.MultivariateNormalFullCovariance( - # loc=mu, - # covariance_matrix=sigma) - # Average over the batch and take the sum over the state size - #return tf.reduce_mean(tf.reduce_sum(dist.log_prob(observations), axis=1)) - - def p_zt(self, prev_state, t): - """Computes the model p(z_t| z_{t-1}).""" - batch_size = tf.shape(prev_state)[0] - if t > 0: - if self.transition_type == ROUND_TRANSITION: - loc = tf.round(prev_state) - tf.logging.info("p(z_%d | z_%d) ~ N(round(z_%d), %0.1f)" % (t, t-1, t-1, self.variance)) - elif self.transition_type == STANDARD_TRANSITION: - loc = prev_state - tf.logging.info("p(z_%d | z_%d) ~ N(z_%d, %0.1f)" % (t, t-1, t-1, self.variance)) - else: # p(z_0) is Normal(0,1) - loc = tf.zeros([batch_size, self.state_size], dtype=self.dtype) - tf.logging.info("p(z_0) ~ N(0,%0.1f)" % self.variance) - - p_zt = tf.contrib.distributions.Normal( - loc=loc, - scale=tf.sqrt(tf.ones_like(loc) * self.variance)) - return p_zt - - def generative(self, z_ni, t): - """Computes the model's generative distribution p(x_i| z_{ni}).""" - if self.observation_type == SQUARED_OBSERVATION: - generative_mu = tf.square(z_ni) - tf.logging.info("p(x_%d | z_%d) ~ N(z_%d^2, %0.1f)" % (t, t, t, self.variance)) - elif self.observation_type == ABS_OBSERVATION: - generative_mu = tf.abs(z_ni) - tf.logging.info("p(x_%d | z_%d) ~ N(|z_%d|, %0.1f)" % (t, t, t, self.variance)) - elif self.observation_type == STANDARD_OBSERVATION: - generative_mu = z_ni - tf.logging.info("p(x_%d | z_%d) ~ N(z_%d, %0.1f)" % (t, t, t, self.variance)) - generative_sigma_sq = tf.ones_like(generative_mu) * self.observation_variance - return tf.contrib.distributions.Normal( - loc=generative_mu, scale=tf.sqrt(generative_sigma_sq)) - - -class LongChainQ(object): - - def __init__(self, - state_size, - num_obs, - steps_per_obs, - sigma_min=1e-5, - dtype=tf.float32, - random_seed=None): - self.state_size = state_size - self.sigma_min = sigma_min - self.dtype = dtype - self.steps_per_obs = steps_per_obs - self.num_obs = num_obs - self.num_timesteps = num_obs*steps_per_obs +1 - - initializers = { - "w": tf.random_uniform_initializer(seed=random_seed), - "b": tf.zeros_initializer - } - self.mus = [ - snt.Linear(output_size=state_size, initializers=initializers) - for t in xrange(self.num_timesteps) - ] - self.sigmas = [ - tf.get_variable( - shape=[state_size], - dtype=self.dtype, - name="q_sigma_%d" % (t + 1), - initializer=tf.random_uniform_initializer(seed=random_seed)) - for t in xrange(self.num_timesteps) - ] - - def first_relevant_obs_index(self, t): - return int(max((t-1)/self.steps_per_obs, 0)) - - def q_zt(self, observations, prev_state, t): - """Computes a distribution over z_t. - - Args: - observations: a [batch_size, num_observations, state_size] Tensor. - prev_state: a [batch_size, state_size] Tensor. - t: The current timestep, an int Tensor. - """ - # filter out unneeded past obs - first_relevant_obs_index = int(math.floor(max(t-1, 0) / self.steps_per_obs)) - num_relevant_observations = self.num_obs - first_relevant_obs_index - observations = observations[:,first_relevant_obs_index:,:] - batch_size = tf.shape(prev_state)[0] - # concatenate the prev state and observations along the second axis (that is - # not the batch or state size axis, and then flatten it to - # [batch_size, (num_relevant_observations + 1) * state_size] to feed it into - # the linear layer. - q_input = tf.concat([observations, prev_state[:,tf.newaxis, :]], axis=1) - q_input = tf.reshape(q_input, - [batch_size, (num_relevant_observations + 1) * self.state_size]) - q_mu = self.mus[t](q_input) - q_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) - q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1]) - q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma)) - tf.logging.info( - "q(z_{t} | z_{tm1}, x_{obsf}:{obst}) ~ N(Linear([z_{tm1},x_{obsf}:{obst}]), sigma_{t})".format( - **{"t": t, - "tm1": t-1, - "obsf": (first_relevant_obs_index+1)*self.steps_per_obs, - "obst":self.steps_per_obs*self.num_obs})) - return q_zt - - def summarize_weights(self): - pass - -class LongChainR(object): - - def __init__(self, - state_size, - num_obs, - steps_per_obs, - sigma_min=1e-5, - dtype=tf.float32, - random_seed=None): - self.state_size = state_size - self.dtype = dtype - self.sigma_min = sigma_min - self.steps_per_obs = steps_per_obs - self.num_obs = num_obs - self.num_timesteps = num_obs*steps_per_obs + 1 - self.sigmas = [ - tf.get_variable( - shape=[self.num_future_obs(t)], - dtype=self.dtype, - name="r_sigma_%d" % (t + 1), - #initializer=tf.random_uniform_initializer(seed=random_seed, maxval=100)) - initializer=tf.constant_initializer(1.0)) - for t in range(self.num_timesteps) - ] - - def first_future_obs_index(self, t): - return int(math.floor(t / self.steps_per_obs)) - - def num_future_obs(self, t): - return int(self.num_obs - self.first_future_obs_index(t)) - - def r_xn(self, z_t, t): - """Computes a distribution over the future observations given current latent - state. - - The indexing in these messages is 1 indexed and inclusive. This is - consistent with the latex documents. - - Args: - z_t: [batch_size, state_size] Tensor - t: Current timestep - """ - tf.logging.info( - "r(x_{start}:{end} | z_{t}) ~ N(z_{t}, sigma_{t})".format( - **{"t": t, - "start": (self.first_future_obs_index(t)+1)*self.steps_per_obs, - "end": self.num_timesteps-1})) - batch_size = tf.shape(z_t)[0] - # the mean for all future observations is the same. - # this tiling results in a [batch_size, num_future_obs, state_size] Tensor - r_mu = tf.tile(z_t[:,tf.newaxis,:], [1, self.num_future_obs(t), 1]) - # compute the variance - r_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) - # the variance is the same across all state dimensions, so we only have to - # time sigma to be [batch_size, num_future_obs]. - r_sigma = tf.tile(r_sigma[tf.newaxis,:, tf.newaxis], [batch_size, 1, self.state_size]) - return tf.contrib.distributions.Normal( - loc=r_mu, scale=tf.sqrt(r_sigma)) - - def summarize_weights(self): - pass - - -class LongChainModel(object): - - def __init__(self, - p, - q, - r, - state_size, - num_obs, - steps_per_obs, - dtype=tf.float32, - disable_r=False): - self.p = p - self.q = q - self.r = r - self.disable_r = disable_r - self.state_size = state_size - self.num_obs = num_obs - self.steps_per_obs = steps_per_obs - self.num_timesteps = steps_per_obs*num_obs + 1 - self.dtype = dtype - - def zero_state(self, batch_size): - return tf.zeros([batch_size, self.state_size], dtype=self.dtype) - - def next_obs_ind(self, t): - return int(math.floor(max(t-1,0)/self.steps_per_obs)) - - def __call__(self, prev_state, observations, t): - """Computes the importance weight for the model system. - - Args: - prev_state: [batch_size, state_size] Tensor - observations: [batch_size, num_observations, state_size] Tensor - """ - # Compute the q distribution over z, q(z_t|z_n, z_{t-1}). - q_zt = self.q.q_zt(observations, prev_state, t) - # Compute the p distribution over z, p(z_t|z_{t-1}). - p_zt = self.p.p_zt(prev_state, t) - # sample from q and evaluate the logprobs, summing over the state size - zt = q_zt.sample() - log_q_zt = tf.reduce_sum(q_zt.log_prob(zt), axis=1) - log_p_zt = tf.reduce_sum(p_zt.log_prob(zt), axis=1) - if not self.disable_r and t < self.num_timesteps-1: - # score the remaining observations using r - r_xn = self.r.r_xn(zt, t) - log_r_xn = r_xn.log_prob(observations[:, self.next_obs_ind(t+1):, :]) - # sum over state size and observation, leaving the batch index - log_r_xn = tf.reduce_sum(log_r_xn, axis=[1,2]) - else: - log_r_xn = tf.zeros_like(log_p_zt) - if t != 0 and t % self.steps_per_obs == 0: - generative_dist = self.p.generative(zt, t) - log_p_x_given_z = generative_dist.log_prob(observations[:,self.next_obs_ind(t),:]) - log_p_x_given_z = tf.reduce_sum(log_p_x_given_z, axis=1) - else: - log_p_x_given_z = tf.zeros_like(log_q_zt) - return (zt, log_q_zt, log_p_zt, log_p_x_given_z, log_r_xn) - - @staticmethod - def create(state_size, - num_obs, - steps_per_obs, - sigma_min=1e-5, - variance=1.0, - observation_variance=1.0, - observation_type=STANDARD_OBSERVATION, - transition_type=STANDARD_TRANSITION, - dtype=tf.float32, - random_seed=None, - disable_r=False): - p = LongChainP( - state_size, - num_obs, - steps_per_obs, - sigma_min=sigma_min, - variance=variance, - observation_variance=observation_variance, - observation_type=observation_type, - transition_type=transition_type, - dtype=dtype, - random_seed=random_seed) - q = LongChainQ( - state_size, - num_obs, - steps_per_obs, - sigma_min=sigma_min, - dtype=dtype, - random_seed=random_seed) - r = LongChainR( - state_size, - num_obs, - steps_per_obs, - sigma_min=sigma_min, - dtype=dtype, - random_seed=random_seed) - model = LongChainModel( - p, q, r, state_size, num_obs, steps_per_obs, - dtype=dtype, - disable_r=disable_r) - return model - - -class RTilde(object): - - def __init__(self, - state_size, - num_timesteps, - sigma_min=1e-5, - dtype=tf.float32, - random_seed=None, - graph_collection_name="R_TILDE_VARS"): - self.dtype = dtype - self.sigma_min = sigma_min - initializers = {"w": tf.truncated_normal_initializer(seed=random_seed), - "b": tf.zeros_initializer} - self.graph_collection_name=graph_collection_name - - def custom_getter(getter, *args, **kwargs): - out = getter(*args, **kwargs) - ref = tf.get_collection_ref(self.graph_collection_name) - if out not in ref: - ref.append(out) - return out - - self.fns = [ - snt.Linear(output_size=2*state_size, - initializers=initializers, - name="r_tilde_%d" % t, - custom_getter=custom_getter) - for t in xrange(num_timesteps) - ] - - def r_zt(self, z_t, observation, t): - #out = self.fns[t](tf.stop_gradient(tf.concat([z_t, observation], axis=1))) - out = self.fns[t](tf.concat([z_t, observation], axis=1)) - mu, raw_sigma_sq = tf.split(out, 2, axis=1) - sigma_sq = tf.maximum(tf.nn.softplus(raw_sigma_sq), self.sigma_min) - return mu, sigma_sq - -class TDModel(object): - - def __init__(self, - p, - q, - r_tilde, - state_size, - num_timesteps, - dtype=tf.float32, - disable_r=False): - self.p = p - self.q = q - self.r_tilde = r_tilde - self.disable_r = disable_r - self.state_size = state_size - self.num_timesteps = num_timesteps - self.dtype = dtype - - def zero_state(self, batch_size): - return tf.zeros([batch_size, self.state_size], dtype=self.dtype) - - def __call__(self, prev_state, observation, t): - """Computes the importance weight for the model system. - - Args: - prev_state: [batch_size, state_size] Tensor - observations: [batch_size, num_observations, state_size] Tensor - """ - # Compute the q distribution over z, q(z_t|z_n, z_{t-1}). - q_zt = self.q.q_zt(observation, prev_state, t) - # Compute the p distribution over z, p(z_t|z_{t-1}). - p_zt = self.p.p_zt(prev_state, t) - # sample from q and evaluate the logprobs, summing over the state size - zt = q_zt.sample() - # If it isn't the last timestep, compute the distribution over the next z. - if t < self.num_timesteps - 1: - p_ztplus1 = self.p.p_zt(zt, t+1) - else: - p_ztplus1 = None - log_q_zt = tf.reduce_sum(q_zt.log_prob(zt), axis=1) - log_p_zt = tf.reduce_sum(p_zt.log_prob(zt), axis=1) - - if not self.disable_r and t < self.num_timesteps-1: - # score the remaining observations using r - r_tilde_mu, r_tilde_sigma_sq = self.r_tilde.r_zt(zt, observation, t+1) - else: - r_tilde_mu = None - r_tilde_sigma_sq = None - if t == self.num_timesteps - 1: - generative_dist = self.p.generative(observation, zt) - log_p_x_given_z = tf.reduce_sum(generative_dist.log_prob(observation), axis=1) - else: - log_p_x_given_z = tf.zeros_like(log_q_zt) - return (zt, log_q_zt, log_p_zt, log_p_x_given_z, - r_tilde_mu, r_tilde_sigma_sq, p_ztplus1) - - @staticmethod - def create(state_size, - num_timesteps, - sigma_min=1e-5, - variance=1.0, - dtype=tf.float32, - random_seed=None, - train_p=True, - p_type="unimodal", - q_type="normal", - mixing_coeff=0.5, - prior_mode_mean=1.0, - observation_variance=1.0, - transition_type=STANDARD_TRANSITION, - use_bs=True): - if p_type == "unimodal": - p = P(state_size, - num_timesteps, - sigma_min=sigma_min, - variance=variance, - dtype=dtype, - random_seed=random_seed, - trainable=train_p, - init_bs_to_zero=not use_bs) - elif p_type == "bimodal": - p = BimodalPriorP( - state_size, - num_timesteps, - mixing_coeff=mixing_coeff, - prior_mode_mean=prior_mode_mean, - sigma_min=sigma_min, - variance=variance, - dtype=dtype, - random_seed=random_seed, - trainable=train_p, - init_bs_to_zero=not use_bs) - elif "nonlinear" in p_type: - if "cauchy" in p_type: - trans_dist = tf.contrib.distributions.Cauchy - else: - trans_dist = tf.contrib.distributions.Normal - - p = ShortChainNonlinearP( - state_size, - num_timesteps, - sigma_min=sigma_min, - variance=variance, - observation_variance=observation_variance, - transition_type=transition_type, - transition_dist=trans_dist, - dtype=dtype, - random_seed=random_seed - ) - - if q_type == "normal": - q_class = Q - elif q_type == "simple_mean": - q_class = SimpleMeanQ - elif q_type == "prev_state": - q_class = PreviousStateQ - elif q_type == "observation": - q_class = ObservationQ - - q = q_class(state_size, - num_timesteps, - sigma_min=sigma_min, - dtype=dtype, - random_seed=random_seed, - init_mu0_to_zero=not use_bs) - r_tilde = RTilde( - state_size, - num_timesteps, - sigma_min=sigma_min, - dtype=dtype, - random_seed=random_seed) - model = TDModel(p, q, r_tilde, state_size, num_timesteps, dtype=dtype) - return model diff --git a/research/fivo/experimental/run.sh b/research/fivo/experimental/run.sh deleted file mode 100644 index c650f636d5313a196960a92b509202b47e7da518..0000000000000000000000000000000000000000 --- a/research/fivo/experimental/run.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -model="forward" -T=5 -num_obs=1 -var=0.1 -n=4 -lr=0.0001 -bound="fivo-aux" -q_type="normal" -resampling_method="multinomial" -rgrad="true" -p_type="unimodal" -use_bs=false - -LOGDIR=/tmp/fivo/model-$model-$bound-$resampling_method-resampling-rgrad-$rgrad-T-$T-var-$var-n-$n-lr-$lr-q-$q_type-p-$p_type - -python train.py \ - --logdir=$LOGDIR \ - --model=$model \ - --bound=$bound \ - --q_type=$q_type \ - --p_type=$p_type \ - --variance=$var \ - --use_resampling_grads=$rgrad \ - --resampling=always \ - --resampling_method=$resampling_method \ - --batch_size=4 \ - --num_samples=$n \ - --num_timesteps=$T \ - --num_eval_samples=256 \ - --summarize_every=100 \ - --learning_rate=$lr \ - --decay_steps=1000000 \ - --max_steps=1000000000 \ - --random_seed=1234 \ - --train_p=false \ - --use_bs=$use_bs \ - --alsologtostderr diff --git a/research/fivo/experimental/summary_utils.py b/research/fivo/experimental/summary_utils.py deleted file mode 100644 index 04e4aeea257577e60d3651656d0c62355d501ea8..0000000000000000000000000000000000000000 --- a/research/fivo/experimental/summary_utils.py +++ /dev/null @@ -1,332 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utils for plotting and summarizing. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import numpy as np -import scipy - -import tensorflow as tf - -import models - - -def summarize_ess(weights, only_last_timestep=False): - """Plots the effective sample size. - - Args: - weights: List of length num_timesteps Tensors of shape - [num_samples, batch_size] - """ - num_timesteps = len(weights) - batch_size = tf.cast(tf.shape(weights[0])[1], dtype=tf.float64) - for i in range(num_timesteps): - if only_last_timestep and i < num_timesteps-1: continue - - w = tf.nn.softmax(weights[i], dim=0) - centered_weights = w - tf.reduce_mean(w, axis=0, keepdims=True) - variance = tf.reduce_sum(tf.square(centered_weights))/(batch_size-1) - ess = 1./tf.reduce_mean(tf.reduce_sum(tf.square(w), axis=0)) - tf.summary.scalar("ess/%d" % i, ess) - tf.summary.scalar("ese/%d" % i, ess / batch_size) - tf.summary.scalar("weight_variance/%d" % i, variance) - - -def summarize_particles(states, weights, observation, model): - """Plots particle locations and weights. - - Args: - states: List of length num_timesteps Tensors of shape - [batch_size*num_particles, state_size]. - weights: List of length num_timesteps Tensors of shape [num_samples, - batch_size] - observation: Tensor of shape [batch_size*num_samples, state_size] - """ - num_timesteps = len(weights) - num_samples, batch_size = weights[0].get_shape().as_list() - # get q0 information for plotting - q0_dist = model.q.q_zt(observation, tf.zeros_like(states[0]), 0) - q0_loc = q0_dist.loc[0:batch_size, 0] - q0_scale = q0_dist.scale[0:batch_size, 0] - # get posterior information for plotting - post = (model.p.mixing_coeff, model.p.prior_mode_mean, model.p.variance, - tf.reduce_sum(model.p.bs), model.p.num_timesteps) - - # Reshape states and weights to be [time, num_samples, batch_size] - states = tf.stack(states) - weights = tf.stack(weights) - # normalize the weights over the sample dimension - weights = tf.nn.softmax(weights, dim=1) - states = tf.reshape(states, tf.shape(weights)) - - ess = 1./tf.reduce_sum(tf.square(weights), axis=1) - - def _plot_states(states_batch, weights_batch, observation_batch, ess_batch, q0, post): - """ - states: [time, num_samples, batch_size] - weights [time, num_samples, batch_size] - observation: [batch_size, 1] - q0: ([batch_size], [batch_size]) - post: ... - """ - num_timesteps, _, batch_size = states_batch.shape - plots = [] - for i in range(batch_size): - states = states_batch[:,:,i] - weights = weights_batch[:,:,i] - observation = observation_batch[i] - ess = ess_batch[:,i] - q0_loc = q0[0][i] - q0_scale = q0[1][i] - - fig = plt.figure(figsize=(7, (num_timesteps + 1) * 2)) - # Each timestep gets two plots -- a bar plot and a histogram of state locs. - # The bar plot will be bar_rows rows tall. - # The histogram will be 1 row tall. - # There is also 1 extra plot at the top showing the posterior and q. - bar_rows = 8 - num_rows = (num_timesteps + 1) * (bar_rows + 1) - gs = gridspec.GridSpec(num_rows, 1) - - # Figure out how wide to make the plot - prior_lims = (post[1] * -2, post[1] * 2) - q_lims = (scipy.stats.norm.ppf(0.01, loc=q0_loc, scale=q0_scale), - scipy.stats.norm.ppf(0.99, loc=q0_loc, scale=q0_scale)) - state_width = states.max() - states.min() - state_lims = (states.min() - state_width * 0.15, - states.max() + state_width * 0.15) - - lims = (min(prior_lims[0], q_lims[0], state_lims[0]), - max(prior_lims[1], q_lims[1], state_lims[1])) - # plot the posterior - z0 = np.arange(lims[0], lims[1], 0.1) - alpha, pos_mu, sigma_sq, B, T = post - neg_mu = -pos_mu - scale = np.sqrt((T + 1) * sigma_sq) - p_zn = ( - alpha * scipy.stats.norm.pdf( - observation, loc=pos_mu + B, scale=scale) + (1 - alpha) * - scipy.stats.norm.pdf(observation, loc=neg_mu + B, scale=scale)) - p_z0 = ( - alpha * scipy.stats.norm.pdf(z0, loc=pos_mu, scale=np.sqrt(sigma_sq)) - + (1 - alpha) * scipy.stats.norm.pdf( - z0, loc=neg_mu, scale=np.sqrt(sigma_sq))) - p_zn_given_z0 = scipy.stats.norm.pdf( - observation, loc=z0 + B, scale=np.sqrt(T * sigma_sq)) - post_z0 = (p_z0 * p_zn_given_z0) / p_zn - # plot q - q_z0 = scipy.stats.norm.pdf(z0, loc=q0_loc, scale=q0_scale) - ax = plt.subplot(gs[0:bar_rows, :]) - ax.plot(z0, q_z0, color="blue") - ax.plot(z0, post_z0, color="green") - ax.plot(z0, p_z0, color="red") - ax.legend(("q", "posterior", "prior"), loc="best", prop={"size": 10}) - - ax.set_xticks([]) - ax.set_xlim(*lims) - - # plot the states - for t in range(num_timesteps): - start = (t + 1) * (bar_rows + 1) - ax1 = plt.subplot(gs[start:start + bar_rows, :]) - ax2 = plt.subplot(gs[start + bar_rows:start + bar_rows + 1, :]) - # plot the states barplot - # ax1.hist( - # states[t, :], - # weights=weights[t, :], - # bins=50, - # edgecolor="none", - # alpha=0.2) - ax1.bar(states[t,:], weights[t,:], width=0.02, alpha=0.2, edgecolor = "none") - ax1.set_ylabel("t=%d" % t) - ax1.set_xticks([]) - ax1.grid(True, which="both") - ax1.set_xlim(*lims) - # plot the observation - ax1.axvline(x=observation, color="red", linestyle="dashed") - # add the ESS - ax1.text(0.1, 0.9, "ESS: %0.2f" % ess[t], - ha='center', va='center', transform=ax1.transAxes) - - # plot the state location histogram - ax2.hist2d( - states[t, :], np.zeros_like(states[t, :]), bins=[50, 1], cmap="Greys") - ax2.grid(False) - ax2.set_yticks([]) - ax2.set_xlim(*lims) - if t != num_timesteps - 1: - ax2.set_xticks([]) - - fig.canvas.draw() - p = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="") - plots.append(p.reshape(fig.canvas.get_width_height()[::-1] + (3,))) - plt.close(fig) - return np.stack(plots) - - plots = tf.py_func(_plot_states, - [states, weights, observation, ess, (q0_loc, q0_scale), post], - [tf.uint8])[0] - tf.summary.image("states", plots, 5, collections=["infrequent_summaries"]) - - -def plot_weights(weights, resampled=None): - """Plots the weights and effective sample size from an SMC rollout. - - Args: - weights: [num_timesteps, num_samples, batch_size] importance weights - resampled: [num_timesteps] 0/1 indicating if resampling ocurred - """ - weights = tf.convert_to_tensor(weights) - - def _make_plots(weights, resampled): - num_timesteps, num_samples, batch_size = weights.shape - plots = [] - for i in range(batch_size): - fig, axes = plt.subplots(nrows=1, sharex=True, figsize=(8, 4)) - axes.stackplot(np.arange(num_timesteps), np.transpose(weights[:, :, i])) - axes.set_title("Weights") - axes.set_xlabel("Steps") - axes.set_ylim([0, 1]) - axes.set_xlim([0, num_timesteps - 1]) - for j in np.where(resampled > 0)[0]: - axes.axvline(x=j, color="red", linestyle="dashed", ymin=0.0, ymax=1.0) - fig.canvas.draw() - data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="") - data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) - plots.append(data) - plt.close(fig) - return np.stack(plots, axis=0) - - if resampled is None: - num_timesteps, _, batch_size = weights.get_shape().as_list() - resampled = tf.zeros([num_timesteps], dtype=tf.float32) - plots = tf.py_func(_make_plots, - [tf.nn.softmax(weights, dim=1), - tf.to_float(resampled)], [tf.uint8])[0] - batch_size = weights.get_shape().as_list()[-1] - tf.summary.image( - "weights", plots, batch_size, collections=["infrequent_summaries"]) - - -def summarize_weights(weights, num_timesteps, num_samples): - # weights is [num_timesteps, num_samples, batch_size] - weights = tf.convert_to_tensor(weights) - mean = tf.reduce_mean(weights, axis=1, keepdims=True) - squared_diff = tf.square(weights - mean) - variances = tf.reduce_sum(squared_diff, axis=1) / (num_samples - 1) - # average the variance over the batch - variances = tf.reduce_mean(variances, axis=1) - avg_magnitude = tf.reduce_mean(tf.abs(weights), axis=[1, 2]) - for t in xrange(num_timesteps): - tf.summary.scalar("weights/variance_%d" % t, variances[t]) - tf.summary.scalar("weights/magnitude_%d" % t, avg_magnitude[t]) - tf.summary.histogram("weights/step_%d" % t, weights[t]) - - -def summarize_learning_signal(rewards, tag): - num_resampling_events, _ = rewards.get_shape().as_list() - mean = tf.reduce_mean(rewards, axis=1) - avg_magnitude = tf.reduce_mean(tf.abs(rewards), axis=1) - reward_square = tf.reduce_mean(tf.square(rewards), axis=1) - for t in xrange(num_resampling_events): - tf.summary.scalar("%s/mean_%d" % (tag, t), mean[t]) - tf.summary.scalar("%s/magnitude_%d" % (tag, t), avg_magnitude[t]) - tf.summary.scalar("%s/squared_%d" % (tag, t), reward_square[t]) - tf.summary.histogram("%s/step_%d" % (tag, t), rewards[t]) - - -def summarize_qs(model, observation, states): - model.q.summarize_weights() - if hasattr(model.p, "posterior") and callable(getattr(model.p, "posterior")): - states = [tf.zeros_like(states[0])] + states[:-1] - for t, prev_state in enumerate(states): - p = model.p.posterior(observation, prev_state, t) - q = model.q.q_zt(observation, prev_state, t) - kl = tf.reduce_mean(tf.contrib.distributions.kl_divergence(p, q)) - tf.summary.scalar("kl_q/%d" % t, tf.reduce_mean(kl)) - mean_diff = q.loc - p.loc - mean_abs_err = tf.abs(mean_diff) - mean_rel_err = tf.abs(mean_diff / p.loc) - tf.summary.scalar("q_mean_convergence/absolute_error_%d" % t, - tf.reduce_mean(mean_abs_err)) - tf.summary.scalar("q_mean_convergence/relative_error_%d" % t, - tf.reduce_mean(mean_rel_err)) - sigma_diff = tf.square(q.scale) - tf.square(p.scale) - sigma_abs_err = tf.abs(sigma_diff) - sigma_rel_err = tf.abs(sigma_diff / tf.square(p.scale)) - tf.summary.scalar("q_variance_convergence/absolute_error_%d" % t, - tf.reduce_mean(sigma_abs_err)) - tf.summary.scalar("q_variance_convergence/relative_error_%d" % t, - tf.reduce_mean(sigma_rel_err)) - - -def summarize_rs(model, states): - model.r.summarize_weights() - for t, state in enumerate(states): - true_r = model.p.lookahead(state, t) - r = model.r.r_xn(state, t) - kl = tf.reduce_mean(tf.contrib.distributions.kl_divergence(true_r, r)) - tf.summary.scalar("kl_r/%d" % t, tf.reduce_mean(kl)) - mean_diff = true_r.loc - r.loc - mean_abs_err = tf.abs(mean_diff) - mean_rel_err = tf.abs(mean_diff / true_r.loc) - tf.summary.scalar("r_mean_convergence/absolute_error_%d" % t, - tf.reduce_mean(mean_abs_err)) - tf.summary.scalar("r_mean_convergence/relative_error_%d" % t, - tf.reduce_mean(mean_rel_err)) - sigma_diff = tf.square(r.scale) - tf.square(true_r.scale) - sigma_abs_err = tf.abs(sigma_diff) - sigma_rel_err = tf.abs(sigma_diff / tf.square(true_r.scale)) - tf.summary.scalar("r_variance_convergence/absolute_error_%d" % t, - tf.reduce_mean(sigma_abs_err)) - tf.summary.scalar("r_variance_convergence/relative_error_%d" % t, - tf.reduce_mean(sigma_rel_err)) - - -def summarize_model(model, true_bs, observation, states, bound, summarize_r=True): - if hasattr(model.p, "bs"): - model_b = tf.reduce_sum(model.p.bs, axis=0) - true_b = tf.reduce_sum(true_bs, axis=0) - abs_err = tf.abs(model_b - true_b) - rel_err = abs_err / true_b - tf.summary.scalar("sum_of_bs/data_generating_process", tf.reduce_mean(true_b)) - tf.summary.scalar("sum_of_bs/model", tf.reduce_mean(model_b)) - tf.summary.scalar("sum_of_bs/absolute_error", tf.reduce_mean(abs_err)) - tf.summary.scalar("sum_of_bs/relative_error", tf.reduce_mean(rel_err)) - #summarize_qs(model, observation, states) - #if bound == "fivo-aux" and summarize_r: - # summarize_rs(model, states) - - -def summarize_grads(grads, loss_name): - grad_ema = tf.train.ExponentialMovingAverage(decay=0.99) - vectorized_grads = tf.concat( - [tf.reshape(g, [-1]) for g, _ in grads if g is not None], axis=0) - new_second_moments = tf.square(vectorized_grads) - new_first_moments = vectorized_grads - maintain_grad_ema_op = grad_ema.apply([new_first_moments, new_second_moments]) - first_moments = grad_ema.average(new_first_moments) - second_moments = grad_ema.average(new_second_moments) - variances = second_moments - tf.square(first_moments) - tf.summary.scalar("grad_variance/%s" % loss_name, tf.reduce_mean(variances)) - tf.summary.histogram("grad_variance/%s" % loss_name, variances) - tf.summary.histogram("grad_mean/%s" % loss_name, first_moments) - return maintain_grad_ema_op diff --git a/research/fivo/experimental/train.py b/research/fivo/experimental/train.py deleted file mode 100644 index 8abc9909b115298a30151a332d340f7b25e3cf90..0000000000000000000000000000000000000000 --- a/research/fivo/experimental/train.py +++ /dev/null @@ -1,637 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Main script for running fivo""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import defaultdict - -import numpy as np -import tensorflow as tf - -import bounds -import data -import models -import summary_utils as summ - -tf.logging.set_verbosity(tf.logging.INFO) - -tf.app.flags.DEFINE_integer("random_seed", None, - "A random seed for the data generating process. Same seed " - "-> same data generating process and initialization.") -tf.app.flags.DEFINE_enum("bound", "fivo", ["iwae", "fivo", "fivo-aux", "fivo-aux-td"], - "The bound to optimize.") -tf.app.flags.DEFINE_enum("model", "forward", ["forward", "long_chain"], - "The model to use.") -tf.app.flags.DEFINE_enum("q_type", "normal", - ["normal", "simple_mean", "prev_state", "observation"], - "The parameterization to use for q") -tf.app.flags.DEFINE_enum("p_type", "unimodal", ["unimodal", "bimodal", "nonlinear"], - "The type of prior.") -tf.app.flags.DEFINE_boolean("train_p", True, - "If false, do not train the model p.") - -tf.app.flags.DEFINE_integer("state_size", 1, - "The dimensionality of the state space.") -tf.app.flags.DEFINE_float("variance", 1.0, - "The variance of the data generating process.") - -tf.app.flags.DEFINE_boolean("use_bs", True, - "If False, initialize all bs to 0.") -tf.app.flags.DEFINE_float("bimodal_prior_weight", 0.5, - "The weight assigned to the positive mode of the prior in " - "both the data generating process and p.") -tf.app.flags.DEFINE_float("bimodal_prior_mean", None, - "If supplied, sets the mean of the 2 modes of the prior to " - "be 1 and -1 times the supplied value. This is for both the " - "data generating process and p.") -tf.app.flags.DEFINE_float("fixed_observation", None, - "If supplied, fix the observation to a constant value in the" - " data generating process only.") -tf.app.flags.DEFINE_float("r_sigma_init", 1., - "Value to initialize variance of r to.") -tf.app.flags.DEFINE_enum("observation_type", - models.STANDARD_OBSERVATION, models.OBSERVATION_TYPES, - "The type of observation for the long chain model.") -tf.app.flags.DEFINE_enum("transition_type", - models.STANDARD_TRANSITION, models.TRANSITION_TYPES, - "The type of transition for the long chain model.") -tf.app.flags.DEFINE_float("observation_variance", None, - "The variance of the observation. Defaults to 'variance'") - -tf.app.flags.DEFINE_integer("num_timesteps", 5, - "Number of timesteps in the sequence.") -tf.app.flags.DEFINE_integer("num_observations", 1, - "The number of observations.") -tf.app.flags.DEFINE_integer("steps_per_observation", 5, - "The number of timesteps between each observation.") - -tf.app.flags.DEFINE_integer("batch_size", 4, - "The number of examples per batch.") -tf.app.flags.DEFINE_integer("num_samples", 4, - "The number particles to use.") -tf.app.flags.DEFINE_integer("num_eval_samples", 512, - "The batch size and # of particles to use for eval.") - -tf.app.flags.DEFINE_string("resampling", "always", - "How to resample. Accepts 'always','never', or a " - "comma-separated list of booleans like 'true,true,false'.") -tf.app.flags.DEFINE_enum("resampling_method", "multinomial", ["multinomial", - "stratified", - "systematic", - "relaxed-logblend", - "relaxed-stateblend", - "relaxed-linearblend", - "relaxed-stateblend-st",], - "Type of resampling method to use.") -tf.app.flags.DEFINE_boolean("use_resampling_grads", True, - "Whether or not to use resampling grads to optimize FIVO." - "Disabled automatically if resampling_method=relaxed.") -tf.app.flags.DEFINE_boolean("disable_r", False, - "If false, r is not used for fivo-aux and is set to zeros.") - -tf.app.flags.DEFINE_float("learning_rate", 1e-4, - "The learning rate to use for ADAM or SGD.") -tf.app.flags.DEFINE_integer("decay_steps", 25000, - "The number of steps before the learning rate is halved.") -tf.app.flags.DEFINE_integer("max_steps", int(1e6), - "The number of steps to run training for.") - -tf.app.flags.DEFINE_string("logdir", "/tmp/fivo-aux", - "Directory for summaries and checkpoints.") - -tf.app.flags.DEFINE_integer("summarize_every", int(1e3), - "The number of steps between each evaluation.") -FLAGS = tf.app.flags.FLAGS - - -def combine_grad_lists(grad_lists): - # grads is num_losses by num_variables. - # each list could have different variables. - # for each variable, sum the grads across all losses. - grads_dict = defaultdict(list) - var_dict = {} - for grad_list in grad_lists: - for grad, var in grad_list: - if grad is not None: - grads_dict[var.name].append(grad) - var_dict[var.name] = var - - final_grads = [] - for var_name, var in var_dict.iteritems(): - grads = grads_dict[var_name] - if len(grads) > 0: - tf.logging.info("Var %s has combined grads from %s." % - (var_name, [g.name for g in grads])) - grad = tf.reduce_sum(grads, axis=0) - else: - tf.logging.info("Var %s has no grads" % var_name) - grad = None - final_grads.append((grad, var)) - return final_grads - - -def make_apply_grads_op(losses, global_step, learning_rate, lr_decay_steps): - for l in losses: - assert isinstance(l, bounds.Loss) - - lr = tf.train.exponential_decay( - learning_rate, global_step, lr_decay_steps, 0.5, staircase=False) - tf.summary.scalar("learning_rate", lr) - opt = tf.train.AdamOptimizer(lr) - - ema_ops = [] - grads = [] - for loss_name, loss, loss_var_collection in losses: - tf.logging.info("Computing grads of %s w.r.t. vars in collection %s" % - (loss_name, loss_var_collection)) - g = opt.compute_gradients(loss, - var_list=tf.get_collection(loss_var_collection)) - ema_ops.append(summ.summarize_grads(g, loss_name)) - grads.append(g) - - all_grads = combine_grad_lists(grads) - apply_grads_op = opt.apply_gradients(all_grads, global_step=global_step) - - # Update the emas after applying the grads. - with tf.control_dependencies([apply_grads_op]): - train_op = tf.group(*ema_ops) - return train_op - - -def add_check_numerics_ops(): - check_op = [] - for op in tf.get_default_graph().get_operations(): - bad = ["logits/Log", "sample/Reshape", "log_prob/mul", - "log_prob/SparseSoftmaxCrossEntropyWithLogits/Reshape", - "entropy/Reshape", "entropy/LogSoftmax", "Categorical", "Mean"] - if all([x not in op.name for x in bad]): - for output in op.outputs: - if output.dtype in [tf.float16, tf.float32, tf.float64]: - if op._get_control_flow_context() is not None: # pylint: disable=protected-access - raise ValueError("`tf.add_check_numerics_ops() is not compatible " - "with TensorFlow control flow operations such as " - "`tf.cond()` or `tf.while_loop()`.") - - message = op.name + ":" + str(output.value_index) - with tf.control_dependencies(check_op): - check_op = [tf.check_numerics(output, message=message)] - return tf.group(*check_op) - - -def create_long_chain_graph(bound, state_size, num_obs, steps_per_obs, - batch_size, num_samples, num_eval_samples, - resampling_schedule, use_resampling_grads, - learning_rate, lr_decay_steps, dtype="float64"): - num_timesteps = num_obs * steps_per_obs + 1 - # Make the dataset. - dataset = data.make_long_chain_dataset( - state_size=state_size, - num_obs=num_obs, - steps_per_obs=steps_per_obs, - batch_size=batch_size, - num_samples=num_samples, - variance=FLAGS.variance, - observation_variance=FLAGS.observation_variance, - dtype=dtype, - observation_type=FLAGS.observation_type, - transition_type=FLAGS.transition_type, - fixed_observation=FLAGS.fixed_observation) - itr = dataset.make_one_shot_iterator() - _, observations = itr.get_next() - # Make the dataset for eval - eval_dataset = data.make_long_chain_dataset( - state_size=state_size, - num_obs=num_obs, - steps_per_obs=steps_per_obs, - batch_size=batch_size, - num_samples=num_eval_samples, - variance=FLAGS.variance, - observation_variance=FLAGS.observation_variance, - dtype=dtype, - observation_type=FLAGS.observation_type, - transition_type=FLAGS.transition_type, - fixed_observation=FLAGS.fixed_observation) - eval_itr = eval_dataset.make_one_shot_iterator() - _, eval_observations = eval_itr.get_next() - - # Make the model. - model = models.LongChainModel.create( - state_size, - num_obs, - steps_per_obs, - observation_type=FLAGS.observation_type, - transition_type=FLAGS.transition_type, - variance=FLAGS.variance, - observation_variance=FLAGS.observation_variance, - dtype=tf.as_dtype(dtype), - disable_r=FLAGS.disable_r) - - # Compute the bound and loss - if bound == "iwae": - (_, losses, ema_op, _, _) = bounds.iwae( - model, - observations, - num_timesteps, - num_samples=num_samples) - (eval_log_p_hat, _, _, _, eval_log_weights) = bounds.iwae( - model, - eval_observations, - num_timesteps, - num_samples=num_eval_samples, - summarize=False) - eval_log_p_hat = tf.reduce_mean(eval_log_p_hat) - elif bound == "fivo" or "fivo-aux": - (_, losses, ema_op, _, _) = bounds.fivo( - model, - observations, - num_timesteps, - resampling_schedule=resampling_schedule, - use_resampling_grads=use_resampling_grads, - resampling_type=FLAGS.resampling_method, - aux=("aux" in bound), - num_samples=num_samples) - (eval_log_p_hat, _, _, _, eval_log_weights) = bounds.fivo( - model, - eval_observations, - num_timesteps, - resampling_schedule=resampling_schedule, - use_resampling_grads=False, - resampling_type="multinomial", - aux=("aux" in bound), - num_samples=num_eval_samples, - summarize=False) - eval_log_p_hat = tf.reduce_mean(eval_log_p_hat) - - summ.summarize_ess(eval_log_weights, only_last_timestep=True) - - tf.summary.scalar("log_p_hat", eval_log_p_hat) - - # Compute and apply grads. - global_step = tf.train.get_or_create_global_step() - - apply_grads = make_apply_grads_op(losses, - global_step, - learning_rate, - lr_decay_steps) - - # Update the emas after applying the grads. - with tf.control_dependencies([apply_grads]): - train_op = tf.group(ema_op) - - # We can't calculate the likelihood for most of these models - # so we just return zeros. - eval_likelihood = tf.zeros([], dtype=dtype) - return global_step, train_op, eval_log_p_hat, eval_likelihood - - -def create_graph(bound, state_size, num_timesteps, batch_size, - num_samples, num_eval_samples, resampling_schedule, - use_resampling_grads, learning_rate, lr_decay_steps, - train_p, dtype='float64'): - if FLAGS.use_bs: - true_bs = None - else: - true_bs = [np.zeros([state_size]).astype(dtype) for _ in xrange(num_timesteps)] - - # Make the dataset. - true_bs, dataset = data.make_dataset( - bs=true_bs, - state_size=state_size, - num_timesteps=num_timesteps, - batch_size=batch_size, - num_samples=num_samples, - variance=FLAGS.variance, - prior_type=FLAGS.p_type, - bimodal_prior_weight=FLAGS.bimodal_prior_weight, - bimodal_prior_mean=FLAGS.bimodal_prior_mean, - transition_type=FLAGS.transition_type, - fixed_observation=FLAGS.fixed_observation, - dtype=dtype) - itr = dataset.make_one_shot_iterator() - _, observations = itr.get_next() - # Make the dataset for eval - _, eval_dataset = data.make_dataset( - bs=true_bs, - state_size=state_size, - num_timesteps=num_timesteps, - batch_size=num_eval_samples, - num_samples=num_eval_samples, - variance=FLAGS.variance, - prior_type=FLAGS.p_type, - bimodal_prior_weight=FLAGS.bimodal_prior_weight, - bimodal_prior_mean=FLAGS.bimodal_prior_mean, - transition_type=FLAGS.transition_type, - fixed_observation=FLAGS.fixed_observation, - dtype=dtype) - eval_itr = eval_dataset.make_one_shot_iterator() - _, eval_observations = eval_itr.get_next() - - # Make the model. - if bound == "fivo-aux-td": - model = models.TDModel.create( - state_size, - num_timesteps, - variance=FLAGS.variance, - train_p=train_p, - p_type=FLAGS.p_type, - q_type=FLAGS.q_type, - mixing_coeff=FLAGS.bimodal_prior_weight, - prior_mode_mean=FLAGS.bimodal_prior_mean, - observation_variance=FLAGS.observation_variance, - transition_type=FLAGS.transition_type, - use_bs=FLAGS.use_bs, - dtype=tf.as_dtype(dtype), - random_seed=FLAGS.random_seed) - else: - model = models.Model.create( - state_size, - num_timesteps, - variance=FLAGS.variance, - train_p=train_p, - p_type=FLAGS.p_type, - q_type=FLAGS.q_type, - mixing_coeff=FLAGS.bimodal_prior_weight, - prior_mode_mean=FLAGS.bimodal_prior_mean, - observation_variance=FLAGS.observation_variance, - transition_type=FLAGS.transition_type, - use_bs=FLAGS.use_bs, - r_sigma_init=FLAGS.r_sigma_init, - dtype=tf.as_dtype(dtype), - random_seed=FLAGS.random_seed) - - # Compute the bound and loss - if bound == "iwae": - (_, losses, ema_op, _, _) = bounds.iwae( - model, - observations, - num_timesteps, - num_samples=num_samples) - (eval_log_p_hat, _, _, eval_states, eval_log_weights) = bounds.iwae( - model, - eval_observations, - num_timesteps, - num_samples=num_eval_samples, - summarize=True) - - eval_log_p_hat = tf.reduce_mean(eval_log_p_hat) - - elif "fivo" in bound: - if bound == "fivo-aux-td": - (_, losses, ema_op, _, _) = bounds.fivo_aux_td( - model, - observations, - num_timesteps, - resampling_schedule=resampling_schedule, - num_samples=num_samples) - (eval_log_p_hat, _, _, eval_states, eval_log_weights) = bounds.fivo_aux_td( - model, - eval_observations, - num_timesteps, - resampling_schedule=resampling_schedule, - num_samples=num_eval_samples, - summarize=True) - else: - (_, losses, ema_op, _, _) = bounds.fivo( - model, - observations, - num_timesteps, - resampling_schedule=resampling_schedule, - use_resampling_grads=use_resampling_grads, - resampling_type=FLAGS.resampling_method, - aux=("aux" in bound), - num_samples=num_samples) - (eval_log_p_hat, _, _, eval_states, eval_log_weights) = bounds.fivo( - model, - eval_observations, - num_timesteps, - resampling_schedule=resampling_schedule, - use_resampling_grads=False, - resampling_type="multinomial", - aux=("aux" in bound), - num_samples=num_eval_samples, - summarize=True) - eval_log_p_hat = tf.reduce_mean(eval_log_p_hat) - - summ.summarize_ess(eval_log_weights, only_last_timestep=True) - - # if FLAGS.p_type == "bimodal": - # # create the observations that showcase the model. - # mode_odds_ratio = tf.convert_to_tensor([1., 3., 1./3., 512., 1./512.], - # dtype=tf.float64) - # mode_odds_ratio = tf.expand_dims(mode_odds_ratio, 1) - # k = ((num_timesteps+1) * FLAGS.variance) / (2*FLAGS.bimodal_prior_mean) - # explain_obs = tf.reduce_sum(model.p.bs) + tf.log(mode_odds_ratio) * k - # explain_obs = tf.tile(explain_obs, [num_eval_samples, 1]) - # # run the model on the explainable observations - # if bound == "iwae": - # (_, _, _, explain_states, explain_log_weights) = bounds.iwae( - # model, - # explain_obs, - # num_timesteps, - # num_samples=num_eval_samples) - # elif bound == "fivo" or "fivo-aux": - # (_, _, _, explain_states, explain_log_weights) = bounds.fivo( - # model, - # explain_obs, - # num_timesteps, - # resampling_schedule=resampling_schedule, - # use_resampling_grads=False, - # resampling_type="multinomial", - # aux=("aux" in bound), - # num_samples=num_eval_samples) - # summ.summarize_particles(explain_states, - # explain_log_weights, - # explain_obs, - # model) - - # Calculate the true likelihood. - if hasattr(model.p, 'likelihood') and callable(getattr(model.p, 'likelihood')): - eval_likelihood = model.p.likelihood(eval_observations)/ FLAGS.num_timesteps - else: - eval_likelihood = tf.zeros_like(eval_log_p_hat) - - tf.summary.scalar("log_p_hat", eval_log_p_hat) - tf.summary.scalar("likelihood", eval_likelihood) - tf.summary.scalar("bound_gap", eval_likelihood - eval_log_p_hat) - summ.summarize_model(model, true_bs, eval_observations, eval_states, bound, - summarize_r=not bound == "fivo-aux-td") - - # Compute and apply grads. - global_step = tf.train.get_or_create_global_step() - - apply_grads = make_apply_grads_op(losses, - global_step, - learning_rate, - lr_decay_steps) - - # Update the emas after applying the grads. - with tf.control_dependencies([apply_grads]): - train_op = tf.group(ema_op) - #train_op = tf.group(ema_op, add_check_numerics_ops()) - - return global_step, train_op, eval_log_p_hat, eval_likelihood - - -def parse_resampling_schedule(schedule, num_timesteps): - schedule = schedule.strip().lower() - if schedule == "always": - return [True] * (num_timesteps - 1) + [False] - elif schedule == "never": - return [False] * num_timesteps - elif "every" in schedule: - n = int(schedule.split("_")[1]) - return [(i+1) % n == 0 for i in xrange(num_timesteps)] - else: - sched = [x.strip() == "true" for x in schedule.split(",")] - assert len( - sched - ) == num_timesteps, "Wrong number of timesteps in resampling schedule." - return sched - - -def create_log_hook(step, eval_log_p_hat, eval_likelihood): - def summ_formatter(d): - return ("Step {step}, log p_hat: {log_p_hat:.5f} likelihood: {likelihood:.5f}".format(**d)) - hook = tf.train.LoggingTensorHook( - { - "step": step, - "log_p_hat": eval_log_p_hat, - "likelihood": eval_likelihood, - }, - every_n_iter=FLAGS.summarize_every, - formatter=summ_formatter) - return hook - - -def create_infrequent_summary_hook(): - infrequent_summary_hook = tf.train.SummarySaverHook( - save_steps=10000, - output_dir=FLAGS.logdir, - summary_op=tf.summary.merge_all(key="infrequent_summaries") - ) - return infrequent_summary_hook - - -def main(unused_argv): - if FLAGS.model == "long_chain": - resampling_schedule = parse_resampling_schedule(FLAGS.resampling, - FLAGS.num_timesteps + 1) - else: - resampling_schedule = parse_resampling_schedule(FLAGS.resampling, - FLAGS.num_timesteps) - if FLAGS.random_seed is None: - seed = np.random.randint(0, high=10000) - else: - seed = FLAGS.random_seed - tf.logging.info("Using random seed %d", seed) - - if FLAGS.model == "long_chain": - assert FLAGS.q_type == "normal", "Q type %s not supported for long chain models" % FLAGS.q_type - assert FLAGS.p_type == "unimodal", "Bimodal priors are not supported for long chain models" - assert not FLAGS.use_bs, "Bs are not supported with long chain models" - assert FLAGS.num_timesteps == FLAGS.num_observations * FLAGS.steps_per_observation, "Num timesteps does not match." - assert FLAGS.bound != "fivo-aux-td", "TD Training is not compatible with long chain models." - - if FLAGS.model == "forward": - if "nonlinear" not in FLAGS.p_type: - assert FLAGS.transition_type == models.STANDARD_TRANSITION, "Non-standard transitions not supported by the forward model." - assert FLAGS.observation_type == models.STANDARD_OBSERVATION, "Non-standard observations not supported by the forward model." - assert FLAGS.observation_variance is None, "Forward model does not support observation variance." - assert FLAGS.num_observations == 1, "Forward model only supports 1 observation." - - if "relaxed" in FLAGS.resampling_method: - FLAGS.use_resampling_grads = False - assert FLAGS.bound != "fivo-aux-td", "TD Training is not compatible with relaxed resampling." - - if FLAGS.observation_variance is None: - FLAGS.observation_variance = FLAGS.variance - - if FLAGS.p_type == "bimodal": - assert FLAGS.bimodal_prior_mean is not None, "Must specify prior mean if using bimodal p." - - if FLAGS.p_type == "nonlinear" or FLAGS.p_type == "nonlinear-cauchy": - assert not FLAGS.use_bs, "Using bs is not compatible with the nonlinear model." - - g = tf.Graph() - with g.as_default(): - # Set the seeds. - tf.set_random_seed(seed) - np.random.seed(seed) - if FLAGS.model == "long_chain": - (global_step, train_op, eval_log_p_hat, - eval_likelihood) = create_long_chain_graph( - FLAGS.bound, - FLAGS.state_size, - FLAGS.num_observations, - FLAGS.steps_per_observation, - FLAGS.batch_size, - FLAGS.num_samples, - FLAGS.num_eval_samples, - resampling_schedule, - FLAGS.use_resampling_grads, - FLAGS.learning_rate, - FLAGS.decay_steps) - else: - (global_step, train_op, - eval_log_p_hat, eval_likelihood) = create_graph( - FLAGS.bound, - FLAGS.state_size, - FLAGS.num_timesteps, - FLAGS.batch_size, - FLAGS.num_samples, - FLAGS.num_eval_samples, - resampling_schedule, - FLAGS.use_resampling_grads, - FLAGS.learning_rate, - FLAGS.decay_steps, - FLAGS.train_p) - - log_hooks = [create_log_hook(global_step, eval_log_p_hat, eval_likelihood)] - if len(tf.get_collection("infrequent_summaries")) > 0: - log_hooks.append(create_infrequent_summary_hook()) - - tf.logging.info("trainable variables:") - tf.logging.info([v.name for v in tf.trainable_variables()]) - tf.logging.info("p vars:") - tf.logging.info([v.name for v in tf.get_collection("P_VARS")]) - tf.logging.info("q vars:") - tf.logging.info([v.name for v in tf.get_collection("Q_VARS")]) - tf.logging.info("r vars:") - tf.logging.info([v.name for v in tf.get_collection("R_VARS")]) - tf.logging.info("r tilde vars:") - tf.logging.info([v.name for v in tf.get_collection("R_TILDE_VARS")]) - - with tf.train.MonitoredTrainingSession( - master="", - is_chief=True, - hooks=log_hooks, - checkpoint_dir=FLAGS.logdir, - save_checkpoint_secs=120, - save_summaries_steps=FLAGS.summarize_every, - log_step_count_steps=FLAGS.summarize_every) as sess: - cur_step = -1 - while True: - if sess.should_stop() or cur_step > FLAGS.max_steps: - break - # run a step - _, cur_step = sess.run([train_op, global_step]) - - -if __name__ == "__main__": - tf.app.run(main) diff --git a/research/fivo/fivo/__init__.py b/research/fivo/fivo/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/fivo/fivo/bounds.py b/research/fivo/fivo/bounds.py deleted file mode 100644 index 088519033dd80669e99015b8e465888bd94a4cb1..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/bounds.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Implementation of objectives for training stochastic latent variable models. - -Contains implementations of the Importance Weighted Autoencoder objective (IWAE) -and the Filtering Variational objective (FIVO). -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import tensorflow as tf - -from fivo import nested_utils as nested -from fivo import smc - - -def iwae(model, - observations, - seq_lengths, - num_samples=1, - parallel_iterations=30, - swap_memory=True): - """Computes the IWAE lower bound on the log marginal probability. - - This method accepts a stochastic latent variable model and some observations - and computes a stochastic lower bound on the log marginal probability of the - observations. The IWAE estimator is defined by averaging multiple importance - weights. For more details see "Importance Weighted Autoencoders" by Burda - et al. https://arxiv.org/abs/1509.00519. - - When num_samples = 1, this bound becomes the evidence lower bound (ELBO). - - Args: - model: A subclass of ELBOTrainableSequenceModel that implements one - timestep of the model. See models/vrnn.py for an example. - observations: The inputs to the model. A potentially nested list or tuple of - Tensors each of shape [max_seq_len, batch_size, ...]. The Tensors must - have a rank at least two and have matching shapes in the first two - dimensions, which represent time and the batch respectively. The model - will be provided with the observations before computing the bound. - seq_lengths: A [batch_size] Tensor of ints encoding the length of each - sequence in the batch (sequences can be padded to a common length). - num_samples: The number of samples to use. - parallel_iterations: The number of parallel iterations to use for the - internal while loop. - swap_memory: Whether GPU-CPU memory swapping should be enabled for the - internal while loop. - - Returns: - log_p_hat: A Tensor of shape [batch_size] containing IWAE's estimate of the - log marginal probability of the observations. - log_weights: A Tensor of shape [max_seq_len, batch_size, num_samples] - containing the log weights at each timestep. Will not be valid for - timesteps past the end of a sequence. - """ - log_p_hat, log_weights, _, final_state = fivo( - model, - observations, - seq_lengths, - num_samples=num_samples, - resampling_criterion=smc.never_resample_criterion, - parallel_iterations=parallel_iterations, - swap_memory=swap_memory) - return log_p_hat, log_weights, final_state - - -def fivo(model, - observations, - seq_lengths, - num_samples=1, - resampling_criterion=smc.ess_criterion, - resampling_type='multinomial', - relaxed_resampling_temperature=0.5, - parallel_iterations=30, - swap_memory=True, - random_seed=None): - """Computes the FIVO lower bound on the log marginal probability. - - This method accepts a stochastic latent variable model and some observations - and computes a stochastic lower bound on the log marginal probability of the - observations. The lower bound is defined by a particle filter's unbiased - estimate of the marginal probability of the observations. For more details see - "Filtering Variational Objectives" by Maddison et al. - https://arxiv.org/abs/1705.09279. - - When the resampling criterion is "never resample", this bound becomes IWAE. - - Args: - model: A subclass of ELBOTrainableSequenceModel that implements one - timestep of the model. See models/vrnn.py for an example. - observations: The inputs to the model. A potentially nested list or tuple of - Tensors each of shape [max_seq_len, batch_size, ...]. The Tensors must - have a rank at least two and have matching shapes in the first two - dimensions, which represent time and the batch respectively. The model - will be provided with the observations before computing the bound. - seq_lengths: A [batch_size] Tensor of ints encoding the length of each - sequence in the batch (sequences can be padded to a common length). - num_samples: The number of particles to use in each particle filter. - resampling_criterion: The resampling criterion to use for this particle - filter. Must accept the number of samples, the current log weights, - and the current timestep and return a boolean Tensor of shape [batch_size] - indicating whether each particle filter should resample. See - ess_criterion and related functions for examples. When - resampling_criterion is never_resample_criterion, resampling_fn is ignored - and never called. - resampling_type: The type of resampling, one of "multinomial" or "relaxed". - relaxed_resampling_temperature: A positive temperature only used for relaxed - resampling. - parallel_iterations: The number of parallel iterations to use for the - internal while loop. Note that values greater than 1 can introduce - non-determinism even when random_seed is provided. - swap_memory: Whether GPU-CPU memory swapping should be enabled for the - internal while loop. - random_seed: The random seed to pass to the resampling operations in - the particle filter. Mainly useful for testing. - - Returns: - log_p_hat: A Tensor of shape [batch_size] containing FIVO's estimate of the - log marginal probability of the observations. - log_weights: A Tensor of shape [max_seq_len, batch_size, num_samples] - containing the log weights at each timestep of the particle filter. Note - that on timesteps when a resampling operation is performed the log weights - are reset to 0. Will not be valid for timesteps past the end of a - sequence. - resampled: A Tensor of shape [max_seq_len, batch_size] indicating when the - particle filters resampled. Will be 1.0 on timesteps when resampling - occurred and 0.0 on timesteps when it did not. - """ - # batch_size is the number of particle filters running in parallel. - batch_size = tf.shape(seq_lengths)[0] - - # Each sequence in the batch will be the input data for a different - # particle filter. The batch will be laid out as: - # particle 1 of particle filter 1 - # particle 1 of particle filter 2 - # ... - # particle 1 of particle filter batch_size - # particle 2 of particle filter 1 - # ... - # particle num_samples of particle filter batch_size - observations = nested.tile_tensors(observations, [1, num_samples]) - tiled_seq_lengths = tf.tile(seq_lengths, [num_samples]) - model.set_observations(observations, tiled_seq_lengths) - - if resampling_type == 'multinomial': - resampling_fn = smc.multinomial_resampling - elif resampling_type == 'relaxed': - resampling_fn = functools.partial( - smc.relaxed_resampling, temperature=relaxed_resampling_temperature) - resampling_fn = functools.partial(resampling_fn, random_seed=random_seed) - - def transition_fn(prev_state, t): - if prev_state is None: - return model.zero_state(batch_size * num_samples, tf.float32) - return model.propose_and_weight(prev_state, t) - - log_p_hat, log_weights, resampled, final_state, _ = smc.smc( - transition_fn, - seq_lengths, - num_particles=num_samples, - resampling_criterion=resampling_criterion, - resampling_fn=resampling_fn, - parallel_iterations=parallel_iterations, - swap_memory=swap_memory) - - return log_p_hat, log_weights, resampled, final_state - -def fivo_aux_td( - model, - observations, - seq_lengths, - num_samples=1, - resampling_criterion=smc.ess_criterion, - resampling_type='multinomial', - relaxed_resampling_temperature=0.5, - parallel_iterations=30, - swap_memory=True, - random_seed=None): - """Experimental.""" - # batch_size is the number of particle filters running in parallel. - batch_size = tf.shape(seq_lengths)[0] - max_seq_len = tf.reduce_max(seq_lengths) - - # Each sequence in the batch will be the input data for a different - # particle filter. The batch will be laid out as: - # particle 1 of particle filter 1 - # particle 1 of particle filter 2 - # ... - # particle 1 of particle filter batch_size - # particle 2 of particle filter 1 - # ... - # particle num_samples of particle filter batch_size - observations = nested.tile_tensors(observations, [1, num_samples]) - tiled_seq_lengths = tf.tile(seq_lengths, [num_samples]) - model.set_observations(observations, tiled_seq_lengths) - - if resampling_type == 'multinomial': - resampling_fn = smc.multinomial_resampling - elif resampling_type == 'relaxed': - resampling_fn = functools.partial( - smc.relaxed_resampling, temperature=relaxed_resampling_temperature) - resampling_fn = functools.partial(resampling_fn, random_seed=random_seed) - - def transition_fn(prev_state, t): - if prev_state is None: - model_init_state = model.zero_state(batch_size * num_samples, tf.float32) - return (tf.zeros([num_samples*batch_size], dtype=tf.float32), - (tf.zeros([num_samples*batch_size, model.latent_size], dtype=tf.float32), - tf.zeros([num_samples*batch_size, model.latent_size], dtype=tf.float32)), - model_init_state) - - prev_log_r, prev_log_r_tilde, prev_model_state = prev_state - (new_model_state, zt, log_q_zt, log_p_zt, - log_p_x_given_z, log_r_tilde, p_ztplus1) = model(prev_model_state, t) - r_tilde_mu, r_tilde_sigma_sq = log_r_tilde - # Compute the weight without r. - log_weight = log_p_zt + log_p_x_given_z - log_q_zt - # Compute log_r and log_r_tilde. - p_mu = tf.stop_gradient(p_ztplus1.mean()) - p_sigma_sq = tf.stop_gradient(p_ztplus1.variance()) - log_r = (tf.log(r_tilde_sigma_sq) - - tf.log(r_tilde_sigma_sq + p_sigma_sq) - - tf.square(r_tilde_mu - p_mu)/(r_tilde_sigma_sq + p_sigma_sq)) - # log_r is [num_samples*batch_size, latent_size]. We sum it along the last - # dimension to compute log r. - log_r = 0.5*tf.reduce_sum(log_r, axis=-1) - # Compute prev log r tilde - prev_r_tilde_mu, prev_r_tilde_sigma_sq = prev_log_r_tilde - prev_log_r_tilde = -0.5*tf.reduce_sum( - tf.square(tf.stop_gradient(zt) - prev_r_tilde_mu)/prev_r_tilde_sigma_sq, axis=-1) - # If the sequence is on the last timestep, log_r and log_r_tilde are just zeros. - last_timestep = t >= (tiled_seq_lengths - 1) - log_r = tf.where(last_timestep, - tf.zeros_like(log_r), - log_r) - prev_log_r_tilde = tf.where(last_timestep, - tf.zeros_like(prev_log_r_tilde), - prev_log_r_tilde) - log_weight += tf.stop_gradient(log_r - prev_log_r) - new_state = (log_r, log_r_tilde, new_model_state) - loop_fn_args = (log_r, prev_log_r_tilde, log_p_x_given_z, log_r - prev_log_r) - return log_weight, new_state, loop_fn_args - - def loop_fn(loop_state, loop_args, unused_model_state, log_weights, resampled, mask, t): - if loop_state is None: - return (tf.zeros([batch_size], dtype=tf.float32), - tf.zeros([batch_size], dtype=tf.float32), - tf.zeros([num_samples, batch_size], dtype=tf.float32)) - log_p_hat_acc, bellman_loss_acc, log_r_diff_acc = loop_state - log_r, prev_log_r_tilde, log_p_x_given_z, log_r_diff = loop_args - # Compute the log_p_hat update - log_p_hat_update = tf.reduce_logsumexp( - log_weights, axis=0) - tf.log(tf.to_float(num_samples)) - # If it is the last timestep, we always add the update. - log_p_hat_acc += tf.cond(t >= max_seq_len-1, - lambda: log_p_hat_update, - lambda: log_p_hat_update * resampled) - # Compute the Bellman update. - log_r = tf.reshape(log_r, [num_samples, batch_size]) - prev_log_r_tilde = tf.reshape(prev_log_r_tilde, [num_samples, batch_size]) - log_p_x_given_z = tf.reshape(log_p_x_given_z, [num_samples, batch_size]) - mask = tf.reshape(mask, [num_samples, batch_size]) - # On the first timestep there is no bellman error because there is no - # prev_log_r_tilde. - mask = tf.cond(tf.equal(t, 0), - lambda: tf.zeros_like(mask), - lambda: mask) - # On the first timestep also fix up prev_log_r_tilde, which will be -inf. - prev_log_r_tilde = tf.where( - tf.is_inf(prev_log_r_tilde), - tf.zeros_like(prev_log_r_tilde), - prev_log_r_tilde) - # log_lambda is [num_samples, batch_size] - log_lambda = tf.reduce_mean(prev_log_r_tilde - log_p_x_given_z - log_r, - axis=0, keepdims=True) - bellman_error = mask * tf.square( - prev_log_r_tilde - - tf.stop_gradient(log_lambda + log_p_x_given_z + log_r) - ) - bellman_loss_acc += tf.reduce_mean(bellman_error, axis=0) - # Compute the log_r_diff update - log_r_diff_acc += mask * tf.reshape(log_r_diff, [num_samples, batch_size]) - return (log_p_hat_acc, bellman_loss_acc, log_r_diff_acc) - - log_weights, resampled, accs = smc.smc( - transition_fn, - seq_lengths, - num_particles=num_samples, - resampling_criterion=resampling_criterion, - resampling_fn=resampling_fn, - loop_fn=loop_fn, - parallel_iterations=parallel_iterations, - swap_memory=swap_memory) - - log_p_hat, bellman_loss, log_r_diff = accs - loss_per_seq = [- log_p_hat, bellman_loss] - tf.summary.scalar("bellman_loss", - tf.reduce_mean(bellman_loss / tf.to_float(seq_lengths))) - tf.summary.scalar("log_r_diff", - tf.reduce_mean(tf.reduce_mean(log_r_diff, axis=0) / tf.to_float(seq_lengths))) - return loss_per_seq, log_p_hat, log_weights, resampled diff --git a/research/fivo/fivo/bounds_test.py b/research/fivo/fivo/bounds_test.py deleted file mode 100644 index c970f74f4cec36a855c54bbe6cdf8d76c3f86599..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/bounds_test.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.bounds""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from fivo.test_utils import create_vrnn -from fivo import bounds - - -class BoundsTest(tf.test.TestCase): - - def test_elbo(self): - """A golden-value test for the ELBO (the IWAE bound with num_samples=1).""" - tf.set_random_seed(1234) - with self.test_session() as sess: - model, inputs, targets, lengths = create_vrnn(random_seed=1234) - outs = bounds.iwae(model, (inputs, targets), lengths, num_samples=1, - parallel_iterations=1) - sess.run(tf.global_variables_initializer()) - log_p_hat, _, _ = sess.run(outs) - self.assertAllClose([-21.615765, -13.614225], log_p_hat) - - def test_iwae(self): - """A golden-value test for the IWAE bound.""" - tf.set_random_seed(1234) - with self.test_session() as sess: - model, inputs, targets, lengths = create_vrnn(random_seed=1234) - outs = bounds.iwae(model, (inputs, targets), lengths, num_samples=4, - parallel_iterations=1) - sess.run(tf.global_variables_initializer()) - log_p_hat, weights, _ = sess.run(outs) - self.assertAllClose([-23.301426, -13.64028], log_p_hat) - weights_gt = np.array( - [[[-3.66708851, -2.07074022, -4.91751671, -5.03293562], - [-2.99690723, -3.17782736, -4.50084877, -3.48536515]], - [[-6.2539978, -4.37615728, -7.43738699, -7.85044909], - [-8.27518654, -6.71545124, -8.96198845, -7.05567837]], - [[-9.19093227, -8.01637268, -11.64603615, -10.51128292], - [-12.34527206, -11.54284477, -11.8667469, -9.69417381]], - [[-12.20609856, -10.47217369, -13.66270638, -13.46115875], - [-17.17656708, -16.25190353, -15.28658581, -12.33067703]], - [[-16.14766312, -15.57472229, -17.47755432, -17.98189926], - [-17.17656708, -16.25190353, -15.28658581, -12.33067703]], - [[-20.07182884, -18.43191147, -20.1606636, -21.45263863], - [-17.17656708, -16.25190353, -15.28658581, -12.33067703]], - [[-24.10270691, -22.20865822, -24.14675522, -25.27248383], - [-17.17656708, -16.25190353, -15.28658581, -12.33067703]]]) - self.assertAllClose(weights_gt, weights) - - def test_fivo(self): - """A golden-value test for the FIVO bound.""" - tf.set_random_seed(1234) - with self.test_session() as sess: - model, inputs, targets, lengths = create_vrnn(random_seed=1234) - outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, - random_seed=1234, parallel_iterations=1) - sess.run(tf.global_variables_initializer()) - log_p_hat, weights, resampled, _ = sess.run(outs) - self.assertAllClose([-22.98902512, -14.21689224], log_p_hat) - weights_gt = np.array( - [[[-3.66708851, -2.07074022, -4.91751671, -5.03293562], - [-2.99690723, -3.17782736, -4.50084877, -3.48536515]], - [[-2.67100811, -2.30541706, -2.34178066, -2.81751347], - [-8.27518654, -6.71545124, -8.96198845, -7.05567837]], - [[-5.65190411, -5.94563246, -6.55041981, -5.4783473], - [-12.34527206, -11.54284477, -11.8667469, -9.69417381]], - [[-8.71947861, -8.40143299, -8.54593086, -8.42822266], - [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], - [[-12.7003831, -13.5039815, -12.3569726, -12.9489622], - [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], - [[-16.4520301, -16.3611698, -15.0314846, -16.4197006], - [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], - [[-20.7010765, -20.1379165, -19.0020351, -20.2395458], - [-4.28782988, -4.50591278, -3.40847206, -2.63650274]]]) - self.assertAllClose(weights_gt, weights) - resampled_gt = np.array( - [[1., 0.], - [0., 0.], - [0., 1.], - [0., 0.], - [0., 0.], - [0., 0.], - [0., 0.]]) - self.assertAllClose(resampled_gt, resampled) - - def test_fivo_relaxed(self): - """A golden-value test for the FIVO bound with relaxed sampling.""" - tf.set_random_seed(1234) - with self.test_session() as sess: - model, inputs, targets, lengths = create_vrnn(random_seed=1234) - outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, - random_seed=1234, parallel_iterations=1, - resampling_type="relaxed") - sess.run(tf.global_variables_initializer()) - log_p_hat, weights, resampled, _ = sess.run(outs) - self.assertAllClose([-22.942394, -14.273882], log_p_hat) - weights_gt = np.array( - [[[-3.66708851, -2.07074118, -4.91751575, -5.03293514], - [-2.99690628, -3.17782831, -4.50084877, -3.48536515]], - [[-2.84939098, -2.30087185, -2.35649204, -2.48417377], - [-8.27518654, -6.71545172, -8.96199131, -7.05567837]], - [[-5.92327023, -5.9433074, -6.5826683, -5.04259014], - [-12.34527206, -11.54284668, -11.86675072, -9.69417477]], - [[-8.95323944, -8.40061855, -8.52760506, -7.99130583], - [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], - [[-12.87836456, -13.49628639, -12.31680107, -12.74228859], - [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], - [[-16.78347397, -16.35150909, -14.98797417, -16.35162735], - [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], - [[-20.81165886, -20.1307621, -18.92229652, -20.17458153], - [-4.58102798, -4.56017351, -3.46283388, -2.65550804]]]) - self.assertAllClose(weights_gt, weights) - resampled_gt = np.array( - [[1., 0.], - [0., 0.], - [0., 1.], - [0., 0.], - [0., 0.], - [0., 0.], - [0., 0.]]) - self.assertAllClose(resampled_gt, resampled) - - def test_fivo_aux_relaxed(self): - """A golden-value test for the FIVO-AUX bound with relaxed sampling.""" - tf.set_random_seed(1234) - with self.test_session() as sess: - model, inputs, targets, lengths = create_vrnn(random_seed=1234, - use_tilt=True) - outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, - random_seed=1234, parallel_iterations=1, - resampling_type="relaxed") - sess.run(tf.global_variables_initializer()) - log_p_hat, weights, resampled, _ = sess.run(outs) - self.assertAllClose([-23.1395, -14.271059], log_p_hat) - weights_gt = np.array( - [[[-5.19826221, -3.55476403, -5.98663855, -6.08058834], - [-6.31685925, -5.70243931, -7.07638931, -6.18138981]], - [[-3.97986865, -3.58831525, -3.85753584, -3.5010016], - [-11.38203049, -8.66213989, -11.23646641, -10.02024746]], - [[-6.62269831, -6.36680222, -6.78096485, -5.80072498], - [-3.55419445, -8.11326408, -3.48766923, -3.08593249]], - [[-10.56472301, -10.16084099, -9.96741676, -8.5270071], - [-6.04880285, -7.80853653, -4.72652149, -3.49711013]], - [[-13.36585426, -16.08720398, -13.33416367, -13.1017189], - [-0., -0., -0., -0.]], - [[-17.54233551, -17.35167503, -16.79163361, -16.51471138], - [0., -0., -0., -0.]], - [[-19.74024963, -18.69452858, -17.76246452, -18.76182365], - [0., -0., -0., -0.]]]) - self.assertAllClose(weights_gt, weights) - resampled_gt = np.array([[1., 0.], - [0., 1.], - [0., 0.], - [0., 1.], - [0., 0.], - [0., 0.], - [0., 0.]]) - self.assertAllClose(resampled_gt, resampled) - - -if __name__ == "__main__": - np.set_printoptions(threshold=np.nan) # Used to easily see the gold values. - # Use print(repr(numpy_array)) to print the values. - tf.test.main() diff --git a/research/fivo/fivo/data/__init__.py b/research/fivo/fivo/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/fivo/fivo/data/calculate_pianoroll_mean.py b/research/fivo/fivo/data/calculate_pianoroll_mean.py deleted file mode 100644 index 93f712bd328f61a83faffc55ad2cf6ca33b47fb7..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/data/calculate_pianoroll_mean.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Script to calculate the mean of a pianoroll dataset. - -Given a pianoroll pickle file, this script loads the dataset and -calculates the mean of the training set. Then it updates the pickle file -so that the key "train_mean" points to the mean vector. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import pickle -import numpy as np - -import tensorflow as tf - - -from datasets import sparse_pianoroll_to_dense - -tf.app.flags.DEFINE_string('in_file', None, - 'Filename of the pickled pianoroll dataset to load.') -tf.app.flags.DEFINE_string('out_file', None, - 'Name of the output pickle file. Defaults to in_file, ' - 'updating the input pickle file.') -tf.app.flags.mark_flag_as_required('in_file') - -FLAGS = tf.app.flags.FLAGS - -MIN_NOTE = 21 -MAX_NOTE = 108 -NUM_NOTES = MAX_NOTE - MIN_NOTE + 1 - - -def main(unused_argv): - if FLAGS.out_file is None: - FLAGS.out_file = FLAGS.in_file - with tf.gfile.Open(FLAGS.in_file, 'r') as f: - pianorolls = pickle.load(f) - dense_pianorolls = [sparse_pianoroll_to_dense(p, MIN_NOTE, NUM_NOTES)[0] - for p in pianorolls['train']] - # Concatenate all elements along the time axis. - concatenated = np.concatenate(dense_pianorolls, axis=0) - mean = np.mean(concatenated, axis=0) - pianorolls['train_mean'] = mean - # Write out the whole pickle file, including the train mean. - pickle.dump(pianorolls, open(FLAGS.out_file, 'wb')) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/fivo/fivo/data/create_timit_dataset.py b/research/fivo/fivo/data/create_timit_dataset.py deleted file mode 100644 index ea1cd3b10cb0812c2d6aad51491924ecfe8eec37..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/data/create_timit_dataset.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Preprocesses TIMIT from raw wavfiles to create a set of TFRecords. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import glob -import os -import random -import re - -import numpy as np -import tensorflow as tf - -tf.app.flags.DEFINE_string("raw_timit_dir", None, - "Directory containing TIMIT files.") -tf.app.flags.DEFINE_string("out_dir", None, - "Output directory for TFRecord files.") -tf.app.flags.DEFINE_float("valid_frac", 0.05, - "Fraction of train set to use as valid set. " - "Must be between 0.0 and 1.0.") - -tf.app.flags.mark_flag_as_required("raw_timit_dir") -tf.app.flags.mark_flag_as_required("out_dir") - -FLAGS = tf.app.flags.FLAGS - -NUM_TRAIN_FILES = 4620 -NUM_TEST_FILES = 1680 -SAMPLES_PER_TIMESTEP = 200 - -# Regexes for reading SPHERE header files. -SAMPLE_COUNT_REGEX = re.compile(r"sample_count -i (\d+)") -SAMPLE_MIN_REGEX = re.compile(r"sample_min -i (-?\d+)") -SAMPLE_MAX_REGEX = re.compile(r"sample_max -i (-?\d+)") - - -def get_filenames(split): - """Get all wav filenames from the TIMIT archive.""" - path = os.path.join(FLAGS.raw_timit_dir, "TIMIT", split, "*", "*", "*.WAV") - # Sort the output by name so the order is deterministic. - files = sorted(glob.glob(path)) - return files - - -def load_timit_wav(filename): - """Loads a TIMIT wavfile into a numpy array. - - TIMIT wavfiles include a SPHERE header, detailed in the TIMIT docs. The first - line is the header type and the second is the length of the header in bytes. - After the header, the remaining bytes are actual WAV data. - - The header includes information about the WAV data such as the number of - samples and minimum and maximum amplitude. This function asserts that the - loaded wav data matches the header. - - Args: - filename: The name of the TIMIT wavfile to load. - Returns: - wav: A numpy array containing the loaded wav data. - """ - wav_file = open(filename, "rb") - header_type = wav_file.readline() - header_length_str = wav_file.readline() - # The header length includes the length of the first two lines. - header_remaining_bytes = (int(header_length_str) - len(header_type) - - len(header_length_str)) - header = wav_file.read(header_remaining_bytes) - # Read the relevant header fields. - sample_count = int(SAMPLE_COUNT_REGEX.search(header).group(1)) - sample_min = int(SAMPLE_MIN_REGEX.search(header).group(1)) - sample_max = int(SAMPLE_MAX_REGEX.search(header).group(1)) - wav = np.fromstring(wav_file.read(), dtype="int16").astype("float32") - # Check that the loaded data conforms to the header description. - assert len(wav) == sample_count - assert wav.min() == sample_min - assert wav.max() == sample_max - return wav - - -def preprocess(wavs, block_size, mean, std): - """Normalize the wav data and reshape it into chunks.""" - processed_wavs = [] - for wav in wavs: - wav = (wav - mean) / std - wav_length = wav.shape[0] - if wav_length % block_size != 0: - pad_width = block_size - (wav_length % block_size) - wav = np.pad(wav, (0, pad_width), "constant") - assert wav.shape[0] % block_size == 0 - wav = wav.reshape((-1, block_size)) - processed_wavs.append(wav) - return processed_wavs - - -def create_tfrecord_from_wavs(wavs, output_file): - """Writes processed wav files to disk as sharded TFRecord files.""" - with tf.python_io.TFRecordWriter(output_file) as builder: - for wav in wavs: - builder.write(wav.astype(np.float32).tobytes()) - - -def main(unused_argv): - train_filenames = get_filenames("TRAIN") - test_filenames = get_filenames("TEST") - - num_train_files = len(train_filenames) - num_test_files = len(test_filenames) - num_valid_files = int(num_train_files * FLAGS.valid_frac) - num_train_files -= num_valid_files - - print("%d train / %d valid / %d test" % ( - num_train_files, num_valid_files, num_test_files)) - - random.seed(1234) - random.shuffle(train_filenames) - - valid_filenames = train_filenames[:num_valid_files] - train_filenames = train_filenames[num_valid_files:] - - # Make sure there is no overlap in the train, test, and valid sets. - train_s = set(train_filenames) - test_s = set(test_filenames) - valid_s = set(valid_filenames) - # Disable explicit length testing to make the assertions more readable. - # pylint: disable=g-explicit-length-test - assert len(train_s & test_s) == 0 - assert len(train_s & valid_s) == 0 - assert len(valid_s & test_s) == 0 - # pylint: enable=g-explicit-length-test - - train_wavs = [load_timit_wav(f) for f in train_filenames] - valid_wavs = [load_timit_wav(f) for f in valid_filenames] - test_wavs = [load_timit_wav(f) for f in test_filenames] - assert len(train_wavs) + len(valid_wavs) == NUM_TRAIN_FILES - assert len(test_wavs) == NUM_TEST_FILES - - # Calculate the mean and standard deviation of the train set. - train_stacked = np.hstack(train_wavs) - train_mean = np.mean(train_stacked) - train_std = np.std(train_stacked) - print("train mean: %f train std: %f" % (train_mean, train_std)) - - # Process all data, normalizing with the train set statistics. - processed_train_wavs = preprocess(train_wavs, SAMPLES_PER_TIMESTEP, - train_mean, train_std) - processed_valid_wavs = preprocess(valid_wavs, SAMPLES_PER_TIMESTEP, - train_mean, train_std) - processed_test_wavs = preprocess(test_wavs, SAMPLES_PER_TIMESTEP, train_mean, - train_std) - - # Write the datasets to disk. - create_tfrecord_from_wavs( - processed_train_wavs, - os.path.join(FLAGS.out_dir, "train")) - create_tfrecord_from_wavs( - processed_valid_wavs, - os.path.join(FLAGS.out_dir, "valid")) - create_tfrecord_from_wavs( - processed_test_wavs, - os.path.join(FLAGS.out_dir, "test")) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/fivo/fivo/data/datasets.py b/research/fivo/fivo/data/datasets.py deleted file mode 100644 index 6d5324623250e31d65b23c97e7e684de59da1ba6..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/data/datasets.py +++ /dev/null @@ -1,453 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Code for creating sequence datasets. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import pickle - -import numpy as np -from scipy.sparse import coo_matrix -import tensorflow as tf - -# The default number of threads used to process data in parallel. -DEFAULT_PARALLELISM = 12 - - -def sparse_pianoroll_to_dense(pianoroll, min_note, num_notes): - """Converts a sparse pianoroll to a dense numpy array. - - Given a sparse pianoroll, converts it to a dense numpy array of shape - [num_timesteps, num_notes] where entry i,j is 1.0 if note j is active on - timestep i and 0.0 otherwise. - - Args: - pianoroll: A sparse pianoroll object, a list of tuples where the i'th tuple - contains the indices of the notes active at timestep i. - min_note: The minimum note in the pianoroll, subtracted from all notes so - that the minimum note becomes 0. - num_notes: The number of possible different note indices, determines the - second dimension of the resulting dense array. - Returns: - dense_pianoroll: A [num_timesteps, num_notes] numpy array of floats. - num_timesteps: A python int, the number of timesteps in the pianoroll. - """ - num_timesteps = len(pianoroll) - inds = [] - for time, chord in enumerate(pianoroll): - # Re-index the notes to start from min_note. - inds.extend((time, note-min_note) for note in chord) - shape = [num_timesteps, num_notes] - values = [1.] * len(inds) - sparse_pianoroll = coo_matrix( - (values, ([x[0] for x in inds], [x[1] for x in inds])), - shape=shape) - return sparse_pianoroll.toarray(), num_timesteps - - -def create_pianoroll_dataset(path, - split, - batch_size, - num_parallel_calls=DEFAULT_PARALLELISM, - shuffle=False, - repeat=False, - min_note=21, - max_note=108): - """Creates a pianoroll dataset. - - Args: - path: The path of a pickle file containing the dataset to load. - split: The split to use, can be train, test, or valid. - batch_size: The batch size. If repeat is False then it is not guaranteed - that the true batch size will match for all batches since batch_size - may not necessarily evenly divide the number of elements. - num_parallel_calls: The number of threads to use for parallel processing of - the data. - shuffle: If true, shuffles the order of the dataset. - repeat: If true, repeats the dataset endlessly. - min_note: The minimum note number of the dataset. For all pianoroll datasets - the minimum note is number 21, and changing this affects the dimension of - the data. This is useful mostly for testing. - max_note: The maximum note number of the dataset. For all pianoroll datasets - the maximum note is number 108, and changing this affects the dimension of - the data. This is useful mostly for testing. - Returns: - inputs: A batch of input sequences represented as a dense Tensor of shape - [time, batch_size, data_dimension]. The sequences in inputs are the - sequences in targets shifted one timestep into the future, padded with - zeros. This tensor is mean-centered, with the mean taken from the pickle - file key 'train_mean'. - targets: A batch of target sequences represented as a dense Tensor of - shape [time, batch_size, data_dimension]. - lens: An int Tensor of shape [batch_size] representing the lengths of each - sequence in the batch. - mean: A float Tensor of shape [data_dimension] containing the mean loaded - from the pickle file. - """ - # Load the data from disk. - num_notes = max_note - min_note + 1 - with tf.gfile.Open(path, "r") as f: - raw_data = pickle.load(f) - pianorolls = raw_data[split] - mean = raw_data["train_mean"] - num_examples = len(pianorolls) - - def pianoroll_generator(): - for sparse_pianoroll in pianorolls: - yield sparse_pianoroll_to_dense(sparse_pianoroll, min_note, num_notes) - - dataset = tf.data.Dataset.from_generator( - pianoroll_generator, - output_types=(tf.float64, tf.int64), - output_shapes=([None, num_notes], [])) - - if repeat: dataset = dataset.repeat() - if shuffle: dataset = dataset.shuffle(num_examples) - - # Batch sequences togther, padding them to a common length in time. - dataset = dataset.padded_batch(batch_size, - padded_shapes=([None, num_notes], [])) - - def process_pianoroll_batch(data, lengths): - """Create mean-centered and time-major next-step prediction Tensors.""" - data = tf.to_float(tf.transpose(data, perm=[1, 0, 2])) - lengths = tf.to_int32(lengths) - targets = data - # Mean center the inputs. - inputs = data - tf.constant(mean, dtype=tf.float32, - shape=[1, 1, mean.shape[0]]) - # Shift the inputs one step forward in time. Also remove the last timestep - # so that targets and inputs are the same length. - inputs = tf.pad(inputs, [[1, 0], [0, 0], [0, 0]], mode="CONSTANT")[:-1] - # Mask out unused timesteps. - inputs *= tf.expand_dims(tf.transpose( - tf.sequence_mask(lengths, dtype=inputs.dtype)), 2) - return inputs, targets, lengths - - dataset = dataset.map(process_pianoroll_batch, - num_parallel_calls=num_parallel_calls) - dataset = dataset.prefetch(num_examples) - - itr = dataset.make_one_shot_iterator() - inputs, targets, lengths = itr.get_next() - return inputs, targets, lengths, tf.constant(mean, dtype=tf.float32) - - -def create_human_pose_dataset( - path, - split, - batch_size, - num_parallel_calls=DEFAULT_PARALLELISM, - shuffle=False, - repeat=False,): - """Creates a human pose dataset. - - Args: - path: The path of a pickle file containing the dataset to load. - split: The split to use, can be train, test, or valid. - batch_size: The batch size. If repeat is False then it is not guaranteed - that the true batch size will match for all batches since batch_size - may not necessarily evenly divide the number of elements. - num_parallel_calls: The number of threads to use for parallel processing of - the data. - shuffle: If true, shuffles the order of the dataset. - repeat: If true, repeats the dataset endlessly. - Returns: - inputs: A batch of input sequences represented as a dense Tensor of shape - [time, batch_size, data_dimension]. The sequences in inputs are the - sequences in targets shifted one timestep into the future, padded with - zeros. This tensor is mean-centered, with the mean taken from the pickle - file key 'train_mean'. - targets: A batch of target sequences represented as a dense Tensor of - shape [time, batch_size, data_dimension]. - lens: An int Tensor of shape [batch_size] representing the lengths of each - sequence in the batch. - mean: A float Tensor of shape [data_dimension] containing the mean loaded - from the pickle file. - """ - # Load the data from disk. - with tf.gfile.Open(path, "r") as f: - raw_data = pickle.load(f) - - mean = raw_data["train_mean"] - pose_sequences = raw_data[split] - num_examples = len(pose_sequences) - num_features = pose_sequences[0].shape[1] - - def pose_generator(): - """A generator that yields pose data sequences.""" - # Each timestep has 32 x values followed by 32 y values so is 64 - # dimensional. - for pose_sequence in pose_sequences: - yield pose_sequence, pose_sequence.shape[0] - - dataset = tf.data.Dataset.from_generator( - pose_generator, - output_types=(tf.float64, tf.int64), - output_shapes=([None, num_features], [])) - - if repeat: - dataset = dataset.repeat() - if shuffle: - dataset = dataset.shuffle(num_examples) - - # Batch sequences togther, padding them to a common length in time. - dataset = dataset.padded_batch( - batch_size, padded_shapes=([None, num_features], [])) - - # Post-process each batch, ensuring that it is mean-centered and time-major. - def process_pose_data(data, lengths): - """Creates Tensors for next step prediction and mean-centers the input.""" - data = tf.to_float(tf.transpose(data, perm=[1, 0, 2])) - lengths = tf.to_int32(lengths) - targets = data - # Mean center the inputs. - inputs = data - tf.constant( - mean, dtype=tf.float32, shape=[1, 1, mean.shape[0]]) - # Shift the inputs one step forward in time. Also remove the last timestep - # so that targets and inputs are the same length. - inputs = tf.pad(inputs, [[1, 0], [0, 0], [0, 0]], mode="CONSTANT")[:-1] - # Mask out unused timesteps. - inputs *= tf.expand_dims( - tf.transpose(tf.sequence_mask(lengths, dtype=inputs.dtype)), 2) - return inputs, targets, lengths - - dataset = dataset.map( - process_pose_data, - num_parallel_calls=num_parallel_calls) - dataset = dataset.prefetch(num_examples) - - itr = dataset.make_one_shot_iterator() - inputs, targets, lengths = itr.get_next() - return inputs, targets, lengths, tf.constant(mean, dtype=tf.float32) - - -def create_speech_dataset(path, - batch_size, - samples_per_timestep=200, - num_parallel_calls=DEFAULT_PARALLELISM, - prefetch_buffer_size=2048, - shuffle=False, - repeat=False): - """Creates a speech dataset. - - Args: - path: The path of a possibly sharded TFRecord file containing the data. - batch_size: The batch size. If repeat is False then it is not guaranteed - that the true batch size will match for all batches since batch_size - may not necessarily evenly divide the number of elements. - samples_per_timestep: The number of audio samples per timestep. Used to - reshape the data into sequences of shape [time, samples_per_timestep]. - Should not change except for testing -- in all speech datasets 200 is the - number of samples per timestep. - num_parallel_calls: The number of threads to use for parallel processing of - the data. - prefetch_buffer_size: The size of the prefetch queues to use after reading - and processing the raw data. - shuffle: If true, shuffles the order of the dataset. - repeat: If true, repeats the dataset endlessly. - Returns: - inputs: A batch of input sequences represented as a dense Tensor of shape - [time, batch_size, samples_per_timestep]. The sequences in inputs are the - sequences in targets shifted one timestep into the future, padded with - zeros. - targets: A batch of target sequences represented as a dense Tensor of - shape [time, batch_size, samples_per_timestep]. - lens: An int Tensor of shape [batch_size] representing the lengths of each - sequence in the batch. - """ - filenames = [path] - - def read_speech_example(value): - """Parses a single tf.Example from the TFRecord file.""" - decoded = tf.decode_raw(value, out_type=tf.float32) - example = tf.reshape(decoded, [-1, samples_per_timestep]) - length = tf.shape(example)[0] - return example, length - - # Create the dataset from the TFRecord files - dataset = tf.data.TFRecordDataset(filenames).map( - read_speech_example, num_parallel_calls=num_parallel_calls) - dataset = dataset.prefetch(prefetch_buffer_size) - - if repeat: dataset = dataset.repeat() - if shuffle: dataset = dataset.shuffle(prefetch_buffer_size) - - dataset = dataset.padded_batch( - batch_size, padded_shapes=([None, samples_per_timestep], [])) - - def process_speech_batch(data, lengths): - """Creates Tensors for next step prediction.""" - data = tf.transpose(data, perm=[1, 0, 2]) - lengths = tf.to_int32(lengths) - targets = data - # Shift the inputs one step forward in time. Also remove the last timestep - # so that targets and inputs are the same length. - inputs = tf.pad(data, [[1, 0], [0, 0], [0, 0]], mode="CONSTANT")[:-1] - # Mask out unused timesteps. - inputs *= tf.expand_dims( - tf.transpose(tf.sequence_mask(lengths, dtype=inputs.dtype)), 2) - return inputs, targets, lengths - - dataset = dataset.map(process_speech_batch, - num_parallel_calls=num_parallel_calls) - dataset = dataset.prefetch(prefetch_buffer_size) - - itr = dataset.make_one_shot_iterator() - inputs, targets, lengths = itr.get_next() - return inputs, targets, lengths - - -SQUARED_OBSERVATION = "squared" -ABS_OBSERVATION = "abs" -STANDARD_OBSERVATION = "standard" -OBSERVATION_TYPES = [SQUARED_OBSERVATION, ABS_OBSERVATION, STANDARD_OBSERVATION] - -ROUND_TRANSITION = "round" -STANDARD_TRANSITION = "standard" -TRANSITION_TYPES = [ROUND_TRANSITION, STANDARD_TRANSITION] - - -def create_chain_graph_dataset( - batch_size, - num_timesteps, - steps_per_observation=None, - state_size=1, - transition_variance=1., - observation_variance=1., - transition_type=STANDARD_TRANSITION, - observation_type=STANDARD_OBSERVATION, - fixed_observation=None, - prefetch_buffer_size=2048, - dtype="float32"): - """Creates a toy chain graph dataset. - - Creates a dataset where the data are sampled from a diffusion process. The - 'latent' states of the process are sampled as a chain of Normals: - - z0 ~ N(0, transition_variance) - z1 ~ N(transition_fn(z0), transition_variance) - ... - - where transition_fn could be round z0 or pass it through unchanged. - - The observations are produced every steps_per_observation timesteps as a - function of the latent zs. For example if steps_per_observation is 3 then the - first observation will be produced as a function of z3: - - x1 ~ N(observation_fn(z3), observation_variance) - - where observation_fn could square z3, take the absolute value, or pass - it through unchanged. - - Only the observations are returned. - - Args: - batch_size: The batch size. The number of trajectories to run in parallel. - num_timesteps: The length of the chain of latent states (i.e. the - number of z's excluding z0. - steps_per_observation: The number of latent states between each observation, - must evenly divide num_timesteps. - state_size: The size of the latent state and observation, must be a - python int. - transition_variance: The variance of the transition density. - observation_variance: The variance of the observation density. - transition_type: Must be one of "round" or "standard". "round" means that - the transition density is centered at the rounded previous latent state. - "standard" centers the transition density at the previous latent state, - unchanged. - observation_type: Must be one of "squared", "abs" or "standard". "squared" - centers the observation density at the squared latent state. "abs" - centers the observaiton density at the absolute value of the current - latent state. "standard" centers the observation density at the current - latent state. - fixed_observation: If not None, fixes all observations to be a constant. - Must be a scalar. - prefetch_buffer_size: The size of the prefetch queues to use after reading - and processing the raw data. - dtype: A string convertible to a tensorflow datatype. The datatype used - to represent the states and observations. - Returns: - observations: A batch of observations represented as a dense Tensor of - shape [num_observations, batch_size, state_size]. num_observations is - num_timesteps/steps_per_observation. - lens: An int Tensor of shape [batch_size] representing the lengths of each - sequence in the batch. Will contain num_observations as each entry. - Raises: - ValueError: Raised if steps_per_observation does not evenly divide - num_timesteps. - """ - if steps_per_observation is None: - steps_per_observation = num_timesteps - if num_timesteps % steps_per_observation != 0: - raise ValueError("steps_per_observation must evenly divide num_timesteps.") - num_observations = int(num_timesteps / steps_per_observation) - def data_generator(): - """An infinite generator of latents and observations from the model.""" - transition_std = np.sqrt(transition_variance) - observation_std = np.sqrt(observation_variance) - while True: - states = [] - observations = [] - # Sample z0 ~ Normal(0, sqrt(variance)). - states.append( - np.random.normal(size=[state_size], - scale=observation_std).astype(dtype)) - # Start the range at 1 because we've already generated z0. - # The range ends at num_timesteps+1 because we want to include the - # num_timesteps-th step. - for t in xrange(1, num_timesteps+1): - if transition_type == ROUND_TRANSITION: - loc = np.round(states[-1]) - elif transition_type == STANDARD_TRANSITION: - loc = states[-1] - z_t = np.random.normal(size=[state_size], loc=loc, scale=transition_std) - states.append(z_t.astype(dtype)) - if t % steps_per_observation == 0: - if fixed_observation is None: - if observation_type == SQUARED_OBSERVATION: - loc = np.square(states[-1]) - elif observation_type == ABS_OBSERVATION: - loc = np.abs(states[-1]) - elif observation_type == STANDARD_OBSERVATION: - loc = states[-1] - x_t = np.random.normal(size=[state_size], - loc=loc, - scale=observation_std).astype(dtype) - else: - x_t = np.ones([state_size]) * fixed_observation - - observations.append(x_t) - yield states, observations - - dataset = tf.data.Dataset.from_generator( - data_generator, - output_types=(tf.as_dtype(dtype), tf.as_dtype(dtype)), - output_shapes=([num_timesteps+1, state_size], - [num_observations, state_size]) - ) - dataset = dataset.repeat().batch(batch_size) - dataset = dataset.prefetch(prefetch_buffer_size) - itr = dataset.make_one_shot_iterator() - _, observations = itr.get_next() - # Transpose observations from [batch, time, state_size] to - # [time, batch, state_size]. - observations = tf.transpose(observations, perm=[1, 0, 2]) - lengths = tf.ones([batch_size], dtype=tf.int32) * num_observations - return observations, lengths diff --git a/research/fivo/fivo/data/datasets_test.py b/research/fivo/fivo/data/datasets_test.py deleted file mode 100644 index e6bbfda67aa44efc0bc4b1a34eb0cb9f09d53de5..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/data/datasets_test.py +++ /dev/null @@ -1,303 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.data.datasets.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import pickle -import os - -import numpy as np -import tensorflow as tf - -from fivo.data import datasets - -FLAGS = tf.app.flags.FLAGS - - -class DatasetsTest(tf.test.TestCase): - - def test_sparse_pianoroll_to_dense_empty_at_end(self): - sparse_pianoroll = [(0, 1), (1, 0), (), (1,), (), ()] - dense_pianoroll, num_timesteps = datasets.sparse_pianoroll_to_dense( - sparse_pianoroll, min_note=0, num_notes=2) - self.assertEqual(num_timesteps, 6) - self.assertAllEqual([[1, 1], - [1, 1], - [0, 0], - [0, 1], - [0, 0], - [0, 0]], dense_pianoroll) - - def test_sparse_pianoroll_to_dense_with_chord(self): - sparse_pianoroll = [(0, 1), (1, 0), (), (1,)] - dense_pianoroll, num_timesteps = datasets.sparse_pianoroll_to_dense( - sparse_pianoroll, min_note=0, num_notes=2) - self.assertEqual(num_timesteps, 4) - self.assertAllEqual([[1, 1], - [1, 1], - [0, 0], - [0, 1]], dense_pianoroll) - - def test_sparse_pianoroll_to_dense_simple(self): - sparse_pianoroll = [(0,), (), (1,)] - dense_pianoroll, num_timesteps = datasets.sparse_pianoroll_to_dense( - sparse_pianoroll, min_note=0, num_notes=2) - self.assertEqual(num_timesteps, 3) - self.assertAllEqual([[1, 0], - [0, 0], - [0, 1]], dense_pianoroll) - - def test_sparse_pianoroll_to_dense_subtracts_min_note(self): - sparse_pianoroll = [(4, 5), (5, 4), (), (5,), (), ()] - dense_pianoroll, num_timesteps = datasets.sparse_pianoroll_to_dense( - sparse_pianoroll, min_note=4, num_notes=2) - self.assertEqual(num_timesteps, 6) - self.assertAllEqual([[1, 1], - [1, 1], - [0, 0], - [0, 1], - [0, 0], - [0, 0]], dense_pianoroll) - - def test_sparse_pianoroll_to_dense_uses_num_notes(self): - sparse_pianoroll = [(4, 5), (5, 4), (), (5,), (), ()] - dense_pianoroll, num_timesteps = datasets.sparse_pianoroll_to_dense( - sparse_pianoroll, min_note=4, num_notes=3) - self.assertEqual(num_timesteps, 6) - self.assertAllEqual([[1, 1, 0], - [1, 1, 0], - [0, 0, 0], - [0, 1, 0], - [0, 0, 0], - [0, 0, 0]], dense_pianoroll) - - def test_pianoroll_dataset(self): - pianoroll_data = [[(0,), (), (1,)], - [(0, 1), (1,)], - [(1,), (0,), (), (0, 1), (), ()]] - pianoroll_mean = np.zeros([3]) - pianoroll_mean[-1] = 1 - data = {"train": pianoroll_data, "train_mean": pianoroll_mean} - path = os.path.join(tf.test.get_temp_dir(), "test.pkl") - pickle.dump(data, open(path, "wb")) - with self.test_session() as sess: - inputs, targets, lens, mean = datasets.create_pianoroll_dataset( - path, "train", 2, num_parallel_calls=1, - shuffle=False, repeat=False, - min_note=0, max_note=2) - i1, t1, l1 = sess.run([inputs, targets, lens]) - i2, t2, l2 = sess.run([inputs, targets, lens]) - m = sess.run(mean) - # Check the lengths. - self.assertAllEqual([3, 2], l1) - self.assertAllEqual([6], l2) - # Check the mean. - self.assertAllEqual(pianoroll_mean, m) - # Check the targets. The targets should not be mean-centered and should - # be padded with zeros to a common length within a batch. - self.assertAllEqual([[1, 0, 0], - [0, 0, 0], - [0, 1, 0]], t1[:, 0, :]) - self.assertAllEqual([[1, 1, 0], - [0, 1, 0], - [0, 0, 0]], t1[:, 1, :]) - self.assertAllEqual([[0, 1, 0], - [1, 0, 0], - [0, 0, 0], - [1, 1, 0], - [0, 0, 0], - [0, 0, 0]], t2[:, 0, :]) - # Check the inputs. Each sequence should start with zeros on the first - # timestep. Each sequence should be padded with zeros to a common length - # within a batch. The mean should be subtracted from all timesteps except - # the first and the padding. - self.assertAllEqual([[0, 0, 0], - [1, 0, -1], - [0, 0, -1]], i1[:, 0, :]) - self.assertAllEqual([[0, 0, 0], - [1, 1, -1], - [0, 0, 0]], i1[:, 1, :]) - self.assertAllEqual([[0, 0, 0], - [0, 1, -1], - [1, 0, -1], - [0, 0, -1], - [1, 1, -1], - [0, 0, -1]], i2[:, 0, :]) - - def test_human_pose_dataset(self): - pose_data = [ - [[0, 0], [2, 2]], - [[2, 2]], - [[0, 0], [0, 0], [2, 2], [2, 2], [0, 0]], - ] - pose_data = [np.array(x, dtype=np.float64) for x in pose_data] - pose_data_mean = np.array([1, 1], dtype=np.float64) - data = { - "train": pose_data, - "train_mean": pose_data_mean, - } - path = os.path.join(tf.test.get_temp_dir(), "test_human_pose_dataset.pkl") - with open(path, "wb") as out: - pickle.dump(data, out) - with self.test_session() as sess: - inputs, targets, lens, mean = datasets.create_human_pose_dataset( - path, "train", 2, num_parallel_calls=1, shuffle=False, repeat=False) - i1, t1, l1 = sess.run([inputs, targets, lens]) - i2, t2, l2 = sess.run([inputs, targets, lens]) - m = sess.run(mean) - # Check the lengths. - self.assertAllEqual([2, 1], l1) - self.assertAllEqual([5], l2) - # Check the mean. - self.assertAllEqual(pose_data_mean, m) - # Check the targets. The targets should not be mean-centered and should - # be padded with zeros to a common length within a batch. - self.assertAllEqual([[0, 0], [2, 2]], t1[:, 0, :]) - self.assertAllEqual([[2, 2], [0, 0]], t1[:, 1, :]) - self.assertAllEqual([[0, 0], [0, 0], [2, 2], [2, 2], [0, 0]], t2[:, 0, :]) - # Check the inputs. Each sequence should start with zeros on the first - # timestep. Each sequence should be padded with zeros to a common length - # within a batch. The mean should be subtracted from all timesteps except - # the first and the padding. - self.assertAllEqual([[0, 0], [-1, -1]], i1[:, 0, :]) - self.assertAllEqual([[0, 0], [0, 0]], i1[:, 1, :]) - self.assertAllEqual([[0, 0], [-1, -1], [-1, -1], [1, 1], [1, 1]], - i2[:, 0, :]) - - def test_speech_dataset(self): - with self.test_session() as sess: - path = os.path.join( - os.path.dirname(os.path.dirname(os.path.realpath(__file__))), - "test_data", - "tiny_speech_dataset.tfrecord") - inputs, targets, lens = datasets.create_speech_dataset( - path, 3, samples_per_timestep=2, num_parallel_calls=1, - prefetch_buffer_size=3, shuffle=False, repeat=False) - inputs1, targets1, lengths1 = sess.run([inputs, targets, lens]) - inputs2, targets2, lengths2 = sess.run([inputs, targets, lens]) - # Check the lengths. - self.assertAllEqual([1, 2, 3], lengths1) - self.assertAllEqual([4], lengths2) - # Check the targets. The targets should be padded with zeros to a common - # length within a batch. - self.assertAllEqual([[[0., 1.], [0., 1.], [0., 1.]], - [[0., 0.], [2., 3.], [2., 3.]], - [[0., 0.], [0., 0.], [4., 5.]]], - targets1) - self.assertAllEqual([[[0., 1.]], - [[2., 3.]], - [[4., 5.]], - [[6., 7.]]], - targets2) - # Check the inputs. Each sequence should start with zeros on the first - # timestep. Each sequence should be padded with zeros to a common length - # within a batch. - self.assertAllEqual([[[0., 0.], [0., 0.], [0., 0.]], - [[0., 0.], [0., 1.], [0., 1.]], - [[0., 0.], [0., 0.], [2., 3.]]], - inputs1) - self.assertAllEqual([[[0., 0.]], - [[0., 1.]], - [[2., 3.]], - [[4., 5.]]], - inputs2) - - def test_chain_graph_raises_error_on_wrong_steps_per_observation(self): - with self.assertRaises(ValueError): - datasets.create_chain_graph_dataset( - batch_size=4, - num_timesteps=10, - steps_per_observation=9) - - def test_chain_graph_single_obs(self): - with self.test_session() as sess: - np.random.seed(1234) - num_observations = 1 - num_timesteps = 5 - batch_size = 2 - state_size = 1 - observations, lengths = datasets.create_chain_graph_dataset( - batch_size=batch_size, - num_timesteps=num_timesteps, - state_size=state_size) - out_observations, out_lengths = sess.run([observations, lengths]) - self.assertAllEqual([num_observations, num_observations], out_lengths) - self.assertAllClose( - [[[1.426677], [-1.789461]]], - out_observations) - - def test_chain_graph_multiple_obs(self): - with self.test_session() as sess: - np.random.seed(1234) - num_observations = 3 - num_timesteps = 6 - batch_size = 2 - state_size = 1 - observations, lengths = datasets.create_chain_graph_dataset( - batch_size=batch_size, - num_timesteps=num_timesteps, - steps_per_observation=num_timesteps/num_observations, - state_size=state_size) - out_observations, out_lengths = sess.run([observations, lengths]) - self.assertAllEqual([num_observations, num_observations], out_lengths) - self.assertAllClose( - [[[0.40051451], [1.07405114]], - [[1.73932898], [3.16880035]], - [[-1.98377144], [2.82669163]]], - out_observations) - - def test_chain_graph_state_dims(self): - with self.test_session() as sess: - np.random.seed(1234) - num_observations = 1 - num_timesteps = 5 - batch_size = 2 - state_size = 3 - observations, lengths = datasets.create_chain_graph_dataset( - batch_size=batch_size, - num_timesteps=num_timesteps, - state_size=state_size) - out_observations, out_lengths = sess.run([observations, lengths]) - self.assertAllEqual([num_observations, num_observations], out_lengths) - self.assertAllClose( - [[[1.052287, -4.560759, 3.07988], - [2.008926, 0.495567, 3.488678]]], - out_observations) - - def test_chain_graph_fixed_obs(self): - with self.test_session() as sess: - np.random.seed(1234) - num_observations = 3 - num_timesteps = 6 - batch_size = 2 - state_size = 1 - observations, lengths = datasets.create_chain_graph_dataset( - batch_size=batch_size, - num_timesteps=num_timesteps, - steps_per_observation=num_timesteps/num_observations, - state_size=state_size, - fixed_observation=4.) - out_observations, out_lengths = sess.run([observations, lengths]) - self.assertAllEqual([num_observations, num_observations], out_lengths) - self.assertAllClose( - np.ones([num_observations, batch_size, state_size]) * 4., - out_observations) - -if __name__ == "__main__": - tf.test.main() diff --git a/research/fivo/fivo/ghmm_runners.py b/research/fivo/fivo/ghmm_runners.py deleted file mode 100644 index 1f1ba6d4f9ea9ed9dee7d95449ba73285c77f24d..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/ghmm_runners.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Creates and runs Gaussian HMM-related graphs.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -import tensorflow as tf - -from fivo import smc -from fivo import bounds -from fivo.data import datasets -from fivo.models import ghmm - - -def run_train(config): - """Runs training for a Gaussian HMM setup.""" - - def create_logging_hook(step, bound_value, likelihood, bound_gap): - """Creates a logging hook that prints the bound value periodically.""" - bound_label = config.bound + "/t" - def summary_formatter(log_dict): - string = ("Step {step}, %s: {value:.3f}, " - "likelihood: {ll:.3f}, gap: {gap:.3e}") % bound_label - return string.format(**log_dict) - logging_hook = tf.train.LoggingTensorHook( - {"step": step, "value": bound_value, - "ll": likelihood, "gap": bound_gap}, - every_n_iter=config.summarize_every, - formatter=summary_formatter) - return logging_hook - - def create_losses(model, observations, lengths): - """Creates the loss to be optimized. - - Args: - model: A Trainable GHMM model. - observations: A set of observations. - lengths: The lengths of each sequence in the observations. - Returns: - loss: A float Tensor that when differentiated yields the gradients - to apply to the model. Should be optimized via gradient descent. - bound: A float Tensor containing the value of the bound that is - being optimized. - true_ll: The true log-likelihood of the data under the model. - bound_gap: The gap between the bound and the true log-likelihood. - """ - # Compute lower bounds on the log likelihood. - if config.bound == "elbo": - ll_per_seq, _, _ = bounds.iwae( - model, observations, lengths, num_samples=1, - parallel_iterations=config.parallel_iterations - ) - elif config.bound == "iwae": - ll_per_seq, _, _ = bounds.iwae( - model, observations, lengths, num_samples=config.num_samples, - parallel_iterations=config.parallel_iterations - ) - elif config.bound == "fivo": - if config.resampling_type == "relaxed": - ll_per_seq, _, _, _ = bounds.fivo( - model, - observations, - lengths, - num_samples=config.num_samples, - resampling_criterion=smc.ess_criterion, - resampling_type=config.resampling_type, - relaxed_resampling_temperature=config. - relaxed_resampling_temperature, - random_seed=config.random_seed, - parallel_iterations=config.parallel_iterations) - else: - ll_per_seq, _, _, _ = bounds.fivo( - model, observations, lengths, - num_samples=config.num_samples, - resampling_criterion=smc.ess_criterion, - resampling_type=config.resampling_type, - random_seed=config.random_seed, - parallel_iterations=config.parallel_iterations - ) - ll_per_t = tf.reduce_mean(ll_per_seq / tf.to_float(lengths)) - # Compute the data's true likelihood under the model and the bound gap. - true_ll_per_seq = model.likelihood(tf.squeeze(observations)) - true_ll_per_t = tf.reduce_mean(true_ll_per_seq / tf.to_float(lengths)) - bound_gap = true_ll_per_seq - ll_per_seq - bound_gap = tf.reduce_mean(bound_gap/ tf.to_float(lengths)) - tf.summary.scalar("train_ll_bound", ll_per_t) - tf.summary.scalar("train_true_ll", true_ll_per_t) - tf.summary.scalar("bound_gap", bound_gap) - return -ll_per_t, ll_per_t, true_ll_per_t, bound_gap - - def create_graph(): - """Creates the training graph.""" - global_step = tf.train.get_or_create_global_step() - xs, lengths = datasets.create_chain_graph_dataset( - config.batch_size, - config.num_timesteps, - steps_per_observation=1, - state_size=1, - transition_variance=config.variance, - observation_variance=config.variance) - model = ghmm.TrainableGaussianHMM( - config.num_timesteps, - config.proposal_type, - transition_variances=config.variance, - emission_variances=config.variance, - random_seed=config.random_seed) - loss, bound, true_ll, gap = create_losses(model, xs, lengths) - opt = tf.train.AdamOptimizer(config.learning_rate) - grads = opt.compute_gradients(loss, var_list=tf.trainable_variables()) - train_op = opt.apply_gradients(grads, global_step=global_step) - return bound, true_ll, gap, train_op, global_step - - with tf.Graph().as_default(): - if config.random_seed: - tf.set_random_seed(config.random_seed) - np.random.seed(config.random_seed) - bound, true_ll, gap, train_op, global_step = create_graph() - log_hook = create_logging_hook(global_step, bound, true_ll, gap) - with tf.train.MonitoredTrainingSession( - master="", - hooks=[log_hook], - checkpoint_dir=config.logdir, - save_checkpoint_secs=120, - save_summaries_steps=config.summarize_every, - log_step_count_steps=config.summarize_every*20) as sess: - cur_step = -1 - while cur_step <= config.max_steps and not sess.should_stop(): - cur_step = sess.run(global_step) - _, cur_step = sess.run([train_op, global_step]) - - -def run_eval(config): - """Evaluates a Gaussian HMM using the given config.""" - - def create_bound(model, xs, lengths): - """Creates the bound to be evaluated.""" - if config.bound == "elbo": - ll_per_seq, log_weights, _ = bounds.iwae( - model, xs, lengths, num_samples=1, - parallel_iterations=config.parallel_iterations - ) - elif config.bound == "iwae": - ll_per_seq, log_weights, _ = bounds.iwae( - model, xs, lengths, num_samples=config.num_samples, - parallel_iterations=config.parallel_iterations - ) - elif config.bound == "fivo": - ll_per_seq, log_weights, resampled, _ = bounds.fivo( - model, xs, lengths, - num_samples=config.num_samples, - resampling_criterion=smc.ess_criterion, - resampling_type=config.resampling_type, - random_seed=config.random_seed, - parallel_iterations=config.parallel_iterations - ) - # Compute bound scaled by number of timesteps. - bound_per_t = ll_per_seq / tf.to_float(lengths) - if config.bound == "fivo": - return bound_per_t, log_weights, resampled - else: - return bound_per_t, log_weights - - def create_graph(): - """Creates the dataset, model, and bound.""" - xs, lengths = datasets.create_chain_graph_dataset( - config.batch_size, - config.num_timesteps, - steps_per_observation=1, - state_size=1, - transition_variance=config.variance, - observation_variance=config.variance) - model = ghmm.TrainableGaussianHMM( - config.num_timesteps, - config.proposal_type, - transition_variances=config.variance, - emission_variances=config.variance, - random_seed=config.random_seed) - true_likelihood = tf.reduce_mean( - model.likelihood(tf.squeeze(xs)) / tf.to_float(lengths)) - outs = [true_likelihood] - outs.extend(list(create_bound(model, xs, lengths))) - return outs - - with tf.Graph().as_default(): - if config.random_seed: - tf.set_random_seed(config.random_seed) - np.random.seed(config.random_seed) - graph_outs = create_graph() - with tf.train.SingularMonitoredSession( - checkpoint_dir=config.logdir) as sess: - outs = sess.run(graph_outs) - likelihood = outs[0] - avg_bound = np.mean(outs[1]) - std = np.std(outs[1]) - log_weights = outs[2] - log_weight_variances = np.var(log_weights, axis=2) - avg_log_weight_variance = np.var(log_weight_variances, axis=1) - avg_log_weight = np.mean(log_weights, axis=(1, 2)) - data = {"mean": avg_bound, "std": std, "log_weights": log_weights, - "log_weight_means": avg_log_weight, - "log_weight_variances": avg_log_weight_variance} - if len(outs) == 4: - data["resampled"] = outs[3] - data["avg_resampled"] = np.mean(outs[3], axis=1) - # Log some useful statistics. - tf.logging.info("Evaled bound %s with batch_size: %d, num_samples: %d." - % (config.bound, config.batch_size, config.num_samples)) - tf.logging.info("mean: %f, std: %f" % (avg_bound, std)) - tf.logging.info("true likelihood: %s" % likelihood) - tf.logging.info("avg log weight: %s" % avg_log_weight) - tf.logging.info("log weight variance: %s" % avg_log_weight_variance) - if len(outs) == 4: - tf.logging.info("avg resamples per t: %s" % data["avg_resampled"]) - if not tf.gfile.Exists(config.logdir): - tf.gfile.MakeDirs(config.logdir) - with tf.gfile.Open(os.path.join(config.logdir, "out.npz"), "w") as fout: - np.save(fout, data) diff --git a/research/fivo/fivo/ghmm_runners_test.py b/research/fivo/fivo/ghmm_runners_test.py deleted file mode 100644 index 50044ad475b3458858b580a6ff7664267485757b..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/ghmm_runners_test.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.ghmm_runners.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np -import tensorflow as tf - -from fivo import ghmm_runners - - -class GHMMRunnersTest(tf.test.TestCase): - - def default_config(self): - class Config(object): - pass - config = Config() - config.model = "ghmm" - config.bound = "fivo" - config.proposal_type = "prior" - config.batch_size = 4 - config.num_samples = 4 - config.num_timesteps = 10 - config.variance = 0.1 - config.resampling_type = "multinomial" - config.random_seed = 1234 - config.parallel_iterations = 1 - config.learning_rate = 1e-4 - config.summarize_every = 1 - config.max_steps = 1 - return config - - def test_eval_ghmm_notraining_fivo_prior(self): - self.eval_ghmm_notraining("fivo", "prior", -3.063864) - - def test_eval_ghmm_notraining_fivo_true_filtering(self): - self.eval_ghmm_notraining("fivo", "true-filtering", -1.1409812) - - def test_eval_ghmm_notraining_fivo_true_smoothing(self): - self.eval_ghmm_notraining("fivo", "true-smoothing", -0.85592091) - - def test_eval_ghmm_notraining_iwae_prior(self): - self.eval_ghmm_notraining("iwae", "prior", -5.9730167) - - def test_eval_ghmm_notraining_iwae_true_filtering(self): - self.eval_ghmm_notraining("iwae", "true-filtering", -1.1485999) - - def test_eval_ghmm_notraining_iwae_true_smoothing(self): - self.eval_ghmm_notraining("iwae", "true-smoothing", -0.85592091) - - def eval_ghmm_notraining(self, bound, proposal_type, expected_bound_avg): - config = self.default_config() - config.proposal_type = proposal_type - config.bound = bound - config.logdir = os.path.join( - tf.test.get_temp_dir(), "test-ghmm-%s-%s" % (proposal_type, bound)) - - ghmm_runners.run_eval(config) - - data = np.load(os.path.join(config.logdir, "out.npz")).item() - self.assertAlmostEqual(expected_bound_avg, data["mean"], places=3) - - def test_train_ghmm_for_one_step_and_eval_fivo_filtering(self): - self.train_ghmm_for_one_step_and_eval("fivo", "filtering", -16.727108) - - def test_train_ghmm_for_one_step_and_eval_fivo_smoothing(self): - self.train_ghmm_for_one_step_and_eval("fivo", "smoothing", -19.381277) - - def test_train_ghmm_for_one_step_and_eval_iwae_filtering(self): - self.train_ghmm_for_one_step_and_eval("iwae", "filtering", -33.31966) - - def test_train_ghmm_for_one_step_and_eval_iwae_smoothing(self): - self.train_ghmm_for_one_step_and_eval("iwae", "smoothing", -46.388447) - - def train_ghmm_for_one_step_and_eval(self, bound, proposal_type, expected_bound_avg): - config = self.default_config() - config.proposal_type = proposal_type - config.bound = bound - config.max_steps = 1 - config.logdir = os.path.join( - tf.test.get_temp_dir(), "test-ghmm-training-%s-%s" % (proposal_type, bound)) - ghmm_runners.run_train(config) - ghmm_runners.run_eval(config) - data = np.load(os.path.join(config.logdir, "out.npz")).item() - self.assertAlmostEqual(expected_bound_avg, data["mean"], places=2) - - -if __name__ == "__main__": - tf.test.main() diff --git a/research/fivo/fivo/models/__init__.py b/research/fivo/fivo/models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/fivo/fivo/models/base.py b/research/fivo/fivo/models/base.py deleted file mode 100644 index 5ffcb7af216f5659e71d7425eeb4e2c3158b3d47..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/models/base.py +++ /dev/null @@ -1,342 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Reusable model classes for FIVO.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sonnet as snt -import tensorflow as tf - -from fivo import nested_utils as nested - -tfd = tf.contrib.distributions - - -class ELBOTrainableSequenceModel(object): - """An abstract class for ELBO-trainable sequence models to extend. - - Because the ELBO, IWAE, and FIVO bounds all accept the same arguments, - any model that is ELBO-trainable is also IWAE- and FIVO-trainable. - """ - - def zero_state(self, batch_size, dtype): - """Returns the initial state of the model as a Tensor or tuple of Tensors. - - Args: - batch_size: The batch size. - dtype: The datatype to use for the state. - """ - raise NotImplementedError("zero_state not yet implemented.") - - def set_observations(self, observations, seq_lengths): - """Sets the observations for the model. - - This method provides the model with all observed variables including both - inputs and targets. It will be called before running any computations with - the model that require the observations, e.g. training the model or - computing bounds, and should be used to run any necessary preprocessing - steps. - - Args: - observations: A potentially nested set of Tensors containing - all observations for the model, both inputs and targets. Typically - a set of Tensors with shape [max_seq_len, batch_size, data_size]. - seq_lengths: A [batch_size] Tensor of ints encoding the length of each - sequence in the batch (sequences can be padded to a common length). - """ - self.observations = observations - self.max_seq_len = tf.reduce_max(seq_lengths) - self.observations_ta = nested.tas_for_tensors( - observations, self.max_seq_len, clear_after_read=False) - self.seq_lengths = seq_lengths - - def propose_and_weight(self, state, t): - """Propogates model state one timestep and computes log weights. - - This method accepts the current state of the model and computes the state - for the next timestep as well as the incremental log weight of each - element in the batch. - - Args: - state: The current state of the model. - t: A scalar integer Tensor representing the current timestep. - Returns: - next_state: The state of the model after one timestep. - log_weights: A [batch_size] Tensor containing the incremental log weights. - """ - raise NotImplementedError("propose_and_weight not yet implemented.") - -DEFAULT_INITIALIZERS = {"w": tf.contrib.layers.xavier_initializer(), - "b": tf.zeros_initializer()} - - -class ConditionalNormalDistribution(object): - """A Normal distribution conditioned on Tensor inputs via a fc network.""" - - def __init__(self, size, hidden_layer_sizes, sigma_min=0.0, - raw_sigma_bias=0.25, hidden_activation_fn=tf.nn.relu, - initializers=None, name="conditional_normal_distribution"): - """Creates a conditional Normal distribution. - - Args: - size: The dimension of the random variable. - hidden_layer_sizes: The sizes of the hidden layers of the fully connected - network used to condition the distribution on the inputs. - sigma_min: The minimum standard deviation allowed, a scalar. - raw_sigma_bias: A scalar that is added to the raw standard deviation - output from the fully connected network. Set to 0.25 by default to - prevent standard deviations close to 0. - hidden_activation_fn: The activation function to use on the hidden layers - of the fully connected network. - initializers: The variable intitializers to use for the fully connected - network. The network is implemented using snt.nets.MLP so it must - be a dictionary mapping the keys 'w' and 'b' to the initializers for - the weights and biases. Defaults to xavier for the weights and zeros - for the biases when initializers is None. - name: The name of this distribution, used for sonnet scoping. - """ - self.sigma_min = sigma_min - self.raw_sigma_bias = raw_sigma_bias - self.name = name - self.size = size - if initializers is None: - initializers = DEFAULT_INITIALIZERS - self.fcnet = snt.nets.MLP( - output_sizes=hidden_layer_sizes + [2*size], - activation=hidden_activation_fn, - initializers=initializers, - activate_final=False, - use_bias=True, - name=name + "_fcnet") - - def condition(self, tensor_list, **unused_kwargs): - """Computes the parameters of a normal distribution based on the inputs.""" - inputs = tf.concat(tensor_list, axis=1) - outs = self.fcnet(inputs) - mu, sigma = tf.split(outs, 2, axis=1) - sigma = tf.maximum(tf.nn.softplus(sigma + self.raw_sigma_bias), - self.sigma_min) - return mu, sigma - - def __call__(self, *args, **kwargs): - """Creates a normal distribution conditioned on the inputs.""" - mu, sigma = self.condition(args, **kwargs) - return tf.contrib.distributions.Normal(loc=mu, scale=sigma) - - -class ConditionalBernoulliDistribution(object): - """A Bernoulli distribution conditioned on Tensor inputs via a fc net.""" - - def __init__(self, size, hidden_layer_sizes, hidden_activation_fn=tf.nn.relu, - initializers=None, bias_init=0.0, - name="conditional_bernoulli_distribution"): - """Creates a conditional Bernoulli distribution. - - Args: - size: The dimension of the random variable. - hidden_layer_sizes: The sizes of the hidden layers of the fully connected - network used to condition the distribution on the inputs. - hidden_activation_fn: The activation function to use on the hidden layers - of the fully connected network. - initializers: The variable intiializers to use for the fully connected - network. The network is implemented using snt.nets.MLP so it must - be a dictionary mapping the keys 'w' and 'b' to the initializers for - the weights and biases. Defaults to xavier for the weights and zeros - for the biases when initializers is None. - bias_init: A scalar or vector Tensor that is added to the output of the - fully-connected network that parameterizes the mean of this - distribution. - name: The name of this distribution, used for sonnet scoping. - """ - self.bias_init = bias_init - self.size = size - if initializers is None: - initializers = DEFAULT_INITIALIZERS - self.fcnet = snt.nets.MLP( - output_sizes=hidden_layer_sizes + [size], - activation=hidden_activation_fn, - initializers=initializers, - activate_final=False, - use_bias=True, - name=name + "_fcnet") - - def condition(self, tensor_list): - """Computes the p parameter of the Bernoulli distribution.""" - inputs = tf.concat(tensor_list, axis=1) - return self.fcnet(inputs) + self.bias_init - - def __call__(self, *args): - p = self.condition(args) - return tf.contrib.distributions.Bernoulli(logits=p) - - -class NormalApproximatePosterior(ConditionalNormalDistribution): - """A Normally-distributed approx. posterior with res_q parameterization.""" - - def __init__(self, size, hidden_layer_sizes, sigma_min=0.0, - raw_sigma_bias=0.25, hidden_activation_fn=tf.nn.relu, - initializers=None, smoothing=False, - name="conditional_normal_distribution"): - super(NormalApproximatePosterior, self).__init__( - size, hidden_layer_sizes, sigma_min=sigma_min, - raw_sigma_bias=raw_sigma_bias, - hidden_activation_fn=hidden_activation_fn, initializers=initializers, - name=name) - self.smoothing = smoothing - - def condition(self, tensor_list, prior_mu, smoothing_tensors=None): - """Generates the mean and variance of the normal distribution. - - Args: - tensor_list: The list of Tensors to condition on. Will be concatenated and - fed through a fully connected network. - prior_mu: The mean of the prior distribution associated with this - approximate posterior. Will be added to the mean produced by - this approximate posterior, in res_q fashion. - smoothing_tensors: A list of Tensors. If smoothing is True, these Tensors - will be concatenated with the tensors in tensor_list. - Returns: - mu: The mean of the approximate posterior. - sigma: The standard deviation of the approximate posterior. - """ - if self.smoothing: - tensor_list.extend(smoothing_tensors) - mu, sigma = super(NormalApproximatePosterior, self).condition(tensor_list) - return mu + prior_mu, sigma - - -class NonstationaryLinearDistribution(object): - """A set of loc-scale distributions that are linear functions of inputs. - - This class defines a series of location-scale distributions such that - the means are learnable linear functions of the inputs and the log variances - are learnable constants. The functions and log variances are different across - timesteps, allowing the distributions to be nonstationary. - """ - - def __init__(self, - num_timesteps, - inputs_per_timestep=None, - outputs_per_timestep=None, - initializers=None, - variance_min=0.0, - output_distribution=tfd.Normal, - dtype=tf.float32): - """Creates a NonstationaryLinearDistribution. - - Args: - num_timesteps: The number of timesteps, i.e. the number of distributions. - inputs_per_timestep: A list of python ints, the dimension of inputs to the - linear function at each timestep. If not provided, the dimension at each - timestep is assumed to be 1. - outputs_per_timestep: A list of python ints, the dimension of the output - distribution at each timestep. If not provided, the dimension at each - timestep is assumed to be 1. - initializers: A dictionary containing intializers for the variables. The - initializer under the key 'w' is used for the weights in the linear - function and the initializer under the key 'b' is used for the biases. - Defaults to xavier initialization for the weights and zeros for the - biases. - variance_min: Python float, the minimum variance of each distribution. - output_distribution: A locatin-scale subclass of tfd.Distribution that - defines the output distribution, e.g. Normal. - dtype: The dtype of the weights and biases. - """ - if not initializers: - initializers = DEFAULT_INITIALIZERS - if not inputs_per_timestep: - inputs_per_timestep = [1] * num_timesteps - if not outputs_per_timestep: - outputs_per_timestep = [1] * num_timesteps - self.num_timesteps = num_timesteps - self.variance_min = variance_min - self.initializers = initializers - self.dtype = dtype - self.output_distribution = output_distribution - - def _get_variables_ta(shapes, name, initializer, trainable=True): - """Creates a sequence of variables and stores them in a TensorArray.""" - # Infer shape if all shapes are equal. - first_shape = shapes[0] - infer_shape = all(shape == first_shape for shape in shapes) - ta = tf.TensorArray( - dtype=dtype, size=len(shapes), dynamic_size=False, - clear_after_read=False, infer_shape=infer_shape) - for t, shape in enumerate(shapes): - var = tf.get_variable( - name % t, shape=shape, initializer=initializer, trainable=trainable) - ta = ta.write(t, var) - return ta - - bias_shapes = [[num_outputs] for num_outputs in outputs_per_timestep] - self.log_variances = _get_variables_ta( - bias_shapes, "proposal_log_variance_%d", initializers["b"]) - self.mean_biases = _get_variables_ta( - bias_shapes, "proposal_b_%d", initializers["b"]) - weight_shapes = zip(inputs_per_timestep, outputs_per_timestep) - self.mean_weights = _get_variables_ta( - weight_shapes, "proposal_w_%d", initializers["w"]) - self.shapes = tf.TensorArray( - dtype=tf.int32, size=num_timesteps, - dynamic_size=False, clear_after_read=False).unstack(weight_shapes) - - def __call__(self, t, inputs): - """Computes the distribution at timestep t. - - Args: - t: Scalar integer Tensor, the current timestep. Must be in - [0, num_timesteps). - inputs: The inputs to the linear function parameterizing the mean of - the current distribution. A Tensor of shape [batch_size, num_inputs_t]. - Returns: - A tfd.Distribution subclass representing the distribution at timestep t. - """ - b = self.mean_biases.read(t) - w = self.mean_weights.read(t) - shape = self.shapes.read(t) - w = tf.reshape(w, shape) - b = tf.reshape(b, [shape[1], 1]) - log_variance = self.log_variances.read(t) - scale = tf.sqrt(tf.maximum(tf.exp(log_variance), self.variance_min)) - loc = tf.matmul(w, inputs, transpose_a=True) + b - return self.output_distribution(loc=loc, scale=scale) - - -def encode_all(inputs, encoder): - """Encodes a timeseries of inputs with a time independent encoder. - - Args: - inputs: A [time, batch, feature_dimensions] tensor. - encoder: A network that takes a [batch, features_dimensions] input and - encodes the input. - Returns: - A [time, batch, encoded_feature_dimensions] output tensor. - """ - input_shape = tf.shape(inputs) - num_timesteps, batch_size = input_shape[0], input_shape[1] - reshaped_inputs = tf.reshape(inputs, [-1, inputs.shape[-1]]) - inputs_encoded = encoder(reshaped_inputs) - inputs_encoded = tf.reshape(inputs_encoded, - [num_timesteps, batch_size, encoder.output_size]) - return inputs_encoded - - -def ta_for_tensor(x, **kwargs): - """Creates a TensorArray for the input tensor.""" - return tf.TensorArray( - x.dtype, tf.shape(x)[0], dynamic_size=False, **kwargs).unstack(x) diff --git a/research/fivo/fivo/models/ghmm.py b/research/fivo/fivo/models/ghmm.py deleted file mode 100644 index 07cf6c50e803383ef5690e8d24010e4706286eb7..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/models/ghmm.py +++ /dev/null @@ -1,483 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A Gaussian hidden markov model. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from fivo.models import base - -tfd = tf.contrib.distributions - - -class GaussianHMM(object): - """A hidden markov model with 1-D Gaussian latent space and observations. - - This is a hidden markov model where the state and observations are - one-dimensional Gaussians. The mean of each latent state is a linear - function of the previous latent state, and the mean of each observation - is a linear function of the current latent state. - - The description that follows is 0-indexed instead of 1-indexed to make - it easier to reason about the parameters passed to the model. - - The parameters of the model are: - T: The number timesteps, latent states, and observations. - vz_t, t=0 to T-1: The variance of the latent state at timestep t. - vx_t, t=0 to T-1: The variance of the observation at timestep t. - wz_t, t=1 to T-1: The weight that defines the latent transition at t. - wx_t, t=0 to T-1: The weight that defines the observation function at t. - - There are T vz_t, vx_t, and wx_t but only T-1 wz_t because there are only - T-1 transitions in the model. - - Given these parameters, sampling from the model is defined as - - z_0 ~ N(0, vz_0) - x_0 | z_0 ~ N(wx_0 * z_0, vx_0) - z_1 | z_0 ~ N(wz_1 * z_0, vz_1) - x_1 | z_1 ~ N(wx_1 * z_1, vx_1) - ... - z_{T-1} | z_{T-2} ~ N(wz_{T-1} * z_{T-2}, vz_{T-1}) - x_{T-1} | z_{T-1} ~ N(wx_{T-1} * z_{T-1}, vx_{T-1}). - """ - - def __init__(self, - num_timesteps, - transition_variances=1., - emission_variances=1., - transition_weights=1., - emission_weights=1., - dtype=tf.float32): - """Creates a gaussian hidden markov model. - - Args: - num_timesteps: A python int, the number of timesteps in the model. - transition_variances: The variance of p(z_t | z_t-1). Can be a scalar, - setting all variances to be the same, or a Tensor of shape - [num_timesteps]. - emission_variances: The variance of p(x_t | z_t). Can be a scalar, - setting all variances to be the same, or a Tensor of shape - [num_timesteps]. - transition_weights: The weight that defines the linear function that - produces the mean of z_t given z_{t-1}. Can be a scalar, setting - all weights to be the same, or a Tensor of shape [num_timesteps-1]. - emission_weights: The weight that defines the linear function that - produces the mean of x_t given z_t. Can be a scalar, setting - all weights to be the same, or a Tensor of shape [num_timesteps]. - dtype: The datatype of the state. - """ - self.num_timesteps = num_timesteps - self.dtype = dtype - - def _expand_param(param, size): - param = tf.convert_to_tensor(param, dtype=self.dtype) - if not param.get_shape().as_list(): - param = tf.tile(param[tf.newaxis], [size]) - - return param - - def _ta_for_param(param): - size = tf.shape(param)[0] - ta = tf.TensorArray(dtype=param.dtype, - size=size, - dynamic_size=False, - clear_after_read=False).unstack(param) - return ta - - self.transition_variances = _ta_for_param( - _expand_param(transition_variances, num_timesteps)) - self.transition_weights = _ta_for_param( - _expand_param(transition_weights, num_timesteps-1)) - em_var = _expand_param(emission_variances, num_timesteps) - self.emission_variances = _ta_for_param(em_var) - em_w = _expand_param(emission_weights, num_timesteps) - self.emission_weights = _ta_for_param(em_w) - self._compute_covariances(em_w, em_var) - - def _compute_covariances(self, emission_weights, emission_variances): - """Compute all covariance matrices. - - Computes the covaraince matrix for the latent variables, the observations, - and the covariance between the latents and observations. - - Args: - emission_weights: A Tensor of shape [num_timesteps] containing - the emission distribution weights at each timestep. - emission_variances: A Tensor of shape [num_timesteps] containing - the emiision distribution variances at each timestep. - """ - # Compute the marginal variance of each latent. - z_variances = [self.transition_variances.read(0)] - for i in range(1, self.num_timesteps): - z_variances.append( - z_variances[i-1] * tf.square(self.transition_weights.read(i-1)) + - self.transition_variances.read(i)) - # Compute the latent covariance matrix. - sigma_z = [] - for i in range(self.num_timesteps): - sigma_z_row = [] - for j in range(self.num_timesteps): - if i == j: - sigma_z_row.append(z_variances[i]) - continue - min_ind = min(i, j) - max_ind = max(i, j) - weight = tf.reduce_prod( - self.transition_weights.gather(tf.range(min_ind, max_ind))) - sigma_z_row.append(z_variances[min_ind] * weight) - sigma_z.append(tf.stack(sigma_z_row)) - self.sigma_z = tf.stack(sigma_z) - # Compute the observation covariance matrix. - x_weights_outer = tf.einsum("i,j->ij", emission_weights, emission_weights) - self.sigma_x = x_weights_outer * self.sigma_z + tf.diag(emission_variances) - # Compute the latent - observation covariance matrix. - # The first axis will index latents, the second axis will index observtions. - self.sigma_zx = emission_weights[tf.newaxis, :] * self.sigma_z - self.obs_dist = tfd.MultivariateNormalFullCovariance( - loc=tf.zeros([self.num_timesteps], dtype=tf.float32), - covariance_matrix=self.sigma_x) - - def transition(self, t, z_prev): - """Compute the transition distribution p(z_t | z_t-1). - - Args: - t: The current timestep, a scalar integer Tensor. When t=0 z_prev is - mostly ignored and the distribution p(z_0) is returned. z_prev is - 'mostly' ignored because it is still used to derive batch_size. - z_prev: A [batch_size] set of states. - Returns: - p(z_t | z_t-1) as a univariate normal distribution. - """ - batch_size = tf.shape(z_prev)[0] - scale = tf.sqrt(self.transition_variances.read(t)) - scale = tf.tile(scale[tf.newaxis], [batch_size]) - loc = tf.cond(tf.greater(t, 0), - lambda: self.transition_weights.read(t-1)*z_prev, - lambda: tf.zeros_like(scale)) - return tfd.Normal(loc=loc, scale=scale) - - def emission(self, t, z): - """Compute the emission distribution p(x_t | z_t). - - Args: - t: The current timestep, a scalar integer Tensor. - z: A [batch_size] set of the current states. - Returns: - p(x_t | z_t) as a univariate normal distribution. - """ - batch_size = tf.shape(z)[0] - scale = tf.sqrt(self.emission_variances.read(t)) - scale = tf.tile(scale[tf.newaxis], [batch_size]) - loc = self.emission_weights.read(t)*z - return tfd.Normal(loc=loc, scale=scale) - - def filtering(self, t, z_prev, x_cur): - """Computes the filtering distribution p(z_t | z_{t-1}, x_t). - - Args: - t: A python int, the index for z_t. When t is 0, z_prev is ignored, - giving p(z_0 | x_0). - z_prev: z_{t-1}, the previous z to condition on. A Tensor of shape - [batch_size]. - x_cur: x_t, the current x to condition on. A Tensor of shape [batch_size]. - Returns: - p(z_t | z_{t-1}, x_t) as a univariate normal distribution. - """ - z_prev = tf.convert_to_tensor(z_prev) - x_cur = tf.convert_to_tensor(x_cur) - batch_size = tf.shape(z_prev)[0] - z_var = self.transition_variances.read(t) - x_var = self.emission_variances.read(t) - x_weight = self.emission_weights.read(t) - prev_state_weight = x_var/(tf.square(x_weight)*z_var + x_var) - prev_state_weight *= tf.cond(tf.greater(t, 0), - lambda: self.transition_weights.read(t-1), - lambda: tf.zeros_like(prev_state_weight)) - cur_obs_weight = (x_weight*z_var)/(tf.square(x_weight)*z_var + x_var) - loc = prev_state_weight*z_prev + cur_obs_weight*x_cur - scale = tf.sqrt((z_var*x_var)/(tf.square(x_weight)*z_var + x_var)) - scale = tf.tile(scale[tf.newaxis], [batch_size]) - return tfd.Normal(loc=loc, scale=scale) - - def smoothing(self, t, z_prev, xs): - """Computes the smoothing distribution p(z_t | z_{t-1}, x_{t:num_timesteps). - - Args: - t: A python int, the index for z_t. When t is 0, z_prev is ignored, - giving p(z_0 | x_{0:num_timesteps-1}). - z_prev: z_{t-1}, the previous z to condition on. A Tensor of shape - [batch_size]. - xs: x_{t:num_timesteps}, the future xs to condition on. A Tensor of shape - [num_timesteps - t, batch_size]. - Returns: - p(z_t | z_{t-1}, x_{t:num_timesteps}) as a univariate normal distribution. - """ - xs = tf.convert_to_tensor(xs) - z_prev = tf.convert_to_tensor(z_prev) - batch_size = tf.shape(xs)[1] - mess_mean, mess_prec = tf.cond( - tf.less(t, self.num_timesteps-1), - lambda: tf.unstack(self._compute_backwards_messages(xs[1:]).read(0)), - lambda: [tf.zeros([batch_size]), tf.zeros([batch_size])]) - return self._smoothing_from_message(t, z_prev, xs[0], mess_mean, mess_prec) - - def _smoothing_from_message(self, t, z_prev, x_t, mess_mean, mess_prec): - """Computes the smoothing distribution given message incoming to z_t. - - Computes p(z_t | z_{t-1}, x_{t:num_timesteps}) given the message incoming - to the node for z_t. - - Args: - t: A python int, the index for z_t. When t is 0, z_prev is ignored. - z_prev: z_{t-1}, the previous z to condition on. A Tensor of shape - [batch_size]. - x_t: The observation x at timestep t. - mess_mean: The mean of the message incoming to z_t, in information form. - mess_prec: The precision of the message incoming to z_t. - Returns: - p(z_t | z_{t-1}, x_{t:num_timesteps}) as a univariate normal distribution. - """ - - batch_size = tf.shape(x_t)[0] - z_var = self.transition_variances.read(t) - x_var = self.emission_variances.read(t) - w_x = self.emission_weights.read(t) - - def transition_term(): - return (tf.square(self.transition_weights.read(t))/ - self.transition_variances.read(t+1)) - - prec = 1./z_var + tf.square(w_x)/x_var + mess_prec - prec += tf.cond(tf.less(t, self.num_timesteps-1), - transition_term, lambda: 0.) - mean = x_t*(w_x/x_var) + mess_mean - mean += tf.cond(tf.greater(t, 0), - lambda: z_prev*(self.transition_weights.read(t-1)/z_var), - lambda: 0.) - mean = tf.reshape(mean / prec, [batch_size]) - scale = tf.reshape(tf.sqrt(1./prec), [batch_size]) - return tfd.Normal(loc=mean, scale=scale) - - def _compute_backwards_messages(self, xs): - """Computes the backwards messages used in smoothing.""" - batch_size = tf.shape(xs)[1] - num_xs = tf.shape(xs)[0] - until_t = self.num_timesteps - num_xs - xs = tf.TensorArray(dtype=xs.dtype, - size=num_xs, - dynamic_size=False, - clear_after_read=True).unstack(xs) - messages_ta = tf.TensorArray(dtype=xs.dtype, - size=num_xs, - dynamic_size=False, - clear_after_read=False) - - def compute_message(t, prev_mean, prev_prec, messages_ta): - """Computes one step of the backwards messages.""" - z_var = self.transition_variances.read(t) - w_z = self.transition_weights.read(t-1) - x_var = self.emission_variances.read(t) - w_x = self.emission_weights.read(t) - cur_x = xs.read(t - until_t) - - # If it isn't the first message, add the terms from the transition. - def transition_term(): - return (tf.square(self.transition_weights.read(t))/ - self.transition_variances.read(t+1)) - - unary_prec = 1/z_var + tf.square(w_x)/x_var - unary_prec += tf.cond(tf.less(t, self.num_timesteps-1), - transition_term, lambda: 0.) - - unary_mean = (w_x / x_var) * cur_x - pairwise_prec = w_z / z_var - - next_prec = -tf.square(pairwise_prec)/(unary_prec + prev_prec) - next_mean = (pairwise_prec * (unary_mean + prev_mean) / - (unary_prec + prev_prec)) - next_prec = tf.reshape(next_prec, [batch_size]) - next_mean = tf.reshape(next_mean, [batch_size]) - messages_ta = messages_ta.write(t - until_t, - tf.stack([next_mean, next_prec])) - return t-1, next_mean, next_prec, messages_ta - - def pred(t, *unused_args): - return tf.greater_equal(t, until_t) - - init_prec = tf.zeros([batch_size], dtype=xs.dtype) - init_mean = tf.zeros([batch_size], dtype=xs.dtype) - t0 = tf.constant(self.num_timesteps - 1, dtype=tf.int32) - - outs = tf.while_loop(pred, compute_message, - (t0, init_mean, init_prec, messages_ta)) - messages = outs[-1] - return messages - - def lookahead(self, t, z_prev): - """Compute the 'lookahead' distribution, p(x_{t:T} | z_{t-1}). - - Args: - t: A scalar Tensor int, the current timestep. Must be at least 1. - z_prev: The latent state at time t-1. A Tensor of shape [batch_size]. - Returns: - p(x_{t:T} | z_{t-1}) as a multivariate normal distribution. - """ - z_prev = tf.convert_to_tensor(z_prev) - sigma_zx = self.sigma_zx[t-1, t:] - z_var = self.sigma_z[t-1, t-1] - mean = tf.einsum("i,j->ij", z_prev, sigma_zx) / z_var - variance = (self.sigma_x[t:, t:] - - tf.einsum("i,j->ij", sigma_zx, sigma_zx) / z_var) - return tfd.MultivariateNormalFullCovariance( - loc=mean, covariance_matrix=variance) - - def likelihood(self, xs): - """Compute the true marginal likelihood of the data. - - Args: - xs: The observations, a [num_timesteps, batch_size] float Tensor. - Returns: - likelihoods: A [batch_size] float Tensor representing the likelihood of - each sequence of observations in the batch. - """ - return self.obs_dist.log_prob(tf.transpose(xs)) - - -class TrainableGaussianHMM(GaussianHMM, base.ELBOTrainableSequenceModel): - """An interface between importance-sampling training methods and the GHMM.""" - - def __init__(self, - num_timesteps, - proposal_type, - transition_variances=1., - emission_variances=1., - transition_weights=1., - emission_weights=1., - random_seed=None, - dtype=tf.float32): - """Constructs a trainable Gaussian HMM. - - Args: - num_timesteps: A python int, the number of timesteps in the model. - proposal_type: The type of proposal to use in the importance sampling - setup. Could be "filtering", "smoothing", "prior", "true-filtering", - or "true-smoothing". If "true-filtering" or "true-smoothing" are - selected, then the true filtering or smoothing distributions are used to - propose new states. If "learned-filtering" is selected then a - distribution with learnable parameters is used. Specifically at each - timestep the proposal is Gaussian with mean that is a learnable linear - function of the previous state and current observation. The log variance - is a per-timestep learnable constant. "learned-smoothing" is similar, - but the mean is a learnable linear function of the previous state and - all future observations. Note that this proposal class includes the true - posterior. If "prior" is selected then states are proposed from the - model's prior. - transition_variances: The variance of p(z_t | z_t-1). Can be a scalar, - setting all variances to be the same, or a Tensor of shape - [num_timesteps]. - emission_variances: The variance of p(x_t | z_t). Can be a scalar, - setting all variances to be the same, or a Tensor of shape - [num_timesteps]. - transition_weights: The weight that defines the linear function that - produces the mean of z_t given z_{t-1}. Can be a scalar, setting - all weights to be the same, or a Tensor of shape [num_timesteps-1]. - emission_weights: The weight that defines the linear function that - produces the mean of x_t given z_t. Can be a scalar, setting - all weights to be the same, or a Tensor of shape [num_timesteps]. - random_seed: A seed for the proposal sampling, mainly useful for testing. - dtype: The datatype of the state. - """ - super(TrainableGaussianHMM, self).__init__( - num_timesteps, transition_variances, emission_variances, - transition_weights, emission_weights, dtype=dtype) - self.random_seed = random_seed - assert proposal_type in ["filtering", "smoothing", "prior", - "true-filtering", "true-smoothing"] - if proposal_type == "true-filtering": - self.proposal = self._filtering_proposal - elif proposal_type == "true-smoothing": - self.proposal = self._smoothing_proposal - elif proposal_type == "prior": - self.proposal = self.transition - elif proposal_type == "filtering": - self._learned_proposal_fn = base.NonstationaryLinearDistribution( - num_timesteps, inputs_per_timestep=[1] + [2] * (num_timesteps-1)) - self.proposal = self._learned_filtering_proposal - elif proposal_type == "smoothing": - inputs_per_timestep = [num_timesteps] + [num_timesteps - t - for t in range(num_timesteps-1)] - self._learned_proposal_fn = base.NonstationaryLinearDistribution( - num_timesteps, inputs_per_timestep=inputs_per_timestep) - self.proposal = self._learned_smoothing_proposal - - def set_observations(self, xs, seq_lengths): - """Sets the observations and stores the backwards messages.""" - # Squeeze out data dimension since everything is 1-d. - xs = tf.squeeze(xs) - self.batch_size = tf.shape(xs)[1] - super(TrainableGaussianHMM, self).set_observations(xs, seq_lengths) - self.messages = self._compute_backwards_messages(xs[1:]) - - def zero_state(self, batch_size, dtype): - return tf.zeros([batch_size], dtype=dtype) - - def propose_and_weight(self, state, t): - """Computes the next state and log weights for the GHMM.""" - state_shape = tf.shape(state) - xt = self.observations[t] - p_zt = self.transition(t, state) - q_zt = self.proposal(t, state) - zt = q_zt.sample(seed=self.random_seed) - zt = tf.reshape(zt, state_shape) - p_xt_given_zt = self.emission(t, zt) - log_p_zt = p_zt.log_prob(zt) - log_q_zt = q_zt.log_prob(zt) - log_p_xt_given_zt = p_xt_given_zt.log_prob(xt) - weight = log_p_zt + log_p_xt_given_zt - log_q_zt - return weight, zt - - def _filtering_proposal(self, t, state): - """Uses the stored observations to compute the filtering distribution.""" - cur_x = self.observations[t] - return self.filtering(t, state, cur_x) - - def _smoothing_proposal(self, t, state): - """Uses the stored messages to compute the smoothing distribution.""" - mess_mean, mess_prec = tf.cond( - tf.less(t, self.num_timesteps-1), - lambda: tf.unstack(self.messages.read(t)), - lambda: [tf.zeros([self.batch_size]), tf.zeros([self.batch_size])]) - return self._smoothing_from_message(t, state, self.observations[t], - mess_mean, mess_prec) - - def _learned_filtering_proposal(self, t, state): - cur_x = self.observations[t] - inputs = tf.cond(tf.greater(t, 0), - lambda: tf.stack([state, cur_x], axis=0), - lambda: cur_x[tf.newaxis, :]) - return self._learned_proposal_fn(t, inputs) - - def _learned_smoothing_proposal(self, t, state): - xs = self.observations_ta.gather(tf.range(t, self.num_timesteps)) - inputs = tf.cond(tf.greater(t, 0), - lambda: tf.concat([state[tf.newaxis, :], xs], axis=0), - lambda: xs) - return self._learned_proposal_fn(t, inputs) diff --git a/research/fivo/fivo/models/ghmm_test.py b/research/fivo/fivo/models/ghmm_test.py deleted file mode 100644 index 15a03c0c7abeae09bd1cfc87f917ef53ecac205f..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/models/ghmm_test.py +++ /dev/null @@ -1,313 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.models.ghmm""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from fivo.models.ghmm import GaussianHMM -from fivo.models.ghmm import TrainableGaussianHMM - - -class GHMMTest(tf.test.TestCase): - - def test_transition_no_weights(self): - with self.test_session() as sess: - ghmm = GaussianHMM(3, - transition_variances=[1., 2., 3.]) - prev_z = tf.constant([1., 2.], dtype=tf.float32) - z0 = ghmm.transition(0, prev_z) - z1 = ghmm.transition(1, prev_z) - z2 = ghmm.transition(2, prev_z) - outs = sess.run([z0.mean(), z0.variance(), - z1.mean(), z1.variance(), - z2.mean(), z2.variance()]) - self.assertAllClose(outs, [[0., 0.], [1., 1.], - [1., 2.], [2., 2.], - [1., 2.], [3., 3.]]) - - def test_transition_with_weights(self): - with self.test_session() as sess: - ghmm = GaussianHMM(3, - transition_variances=[1., 2., 3.], - transition_weights=[2., 3.]) - prev_z = tf.constant([1., 2.], dtype=tf.float32) - z0 = ghmm.transition(0, prev_z) - z1 = ghmm.transition(1, prev_z) - z2 = ghmm.transition(2, prev_z) - outs = sess.run([z0.mean(), z0.variance(), - z1.mean(), z1.variance(), - z2.mean(), z2.variance()]) - self.assertAllClose(outs, [[0., 0.], [1., 1.], - [2., 4.], [2., 2.], - [3., 6.], [3., 3.]]) - - def test_emission_no_weights(self): - with self.test_session() as sess: - ghmm = GaussianHMM(3, emission_variances=[1., 2., 3.]) - z = tf.constant([1., 2.], dtype=tf.float32) - x0 = ghmm.emission(0, z) - x1 = ghmm.emission(1, z) - x2 = ghmm.emission(2, z) - outs = sess.run([x0.mean(), x0.variance(), - x1.mean(), x1.variance(), - x2.mean(), x2.variance()]) - self.assertAllClose(outs, [[1., 2.], [1., 1.], - [1., 2.], [2., 2.], - [1., 2.], [3., 3.]]) - - def test_emission_with_weights(self): - with self.test_session() as sess: - ghmm = GaussianHMM(3, - emission_variances=[1., 2., 3.], - emission_weights=[1., 2., 3.]) - z = tf.constant([1., 2.], dtype=tf.float32) - x0 = ghmm.emission(0, z) - x1 = ghmm.emission(1, z) - x2 = ghmm.emission(2, z) - outs = sess.run([x0.mean(), x0.variance(), - x1.mean(), x1.variance(), - x2.mean(), x2.variance()]) - self.assertAllClose(outs, [[1., 2.], [1., 1.], - [2., 4.], [2., 2.], - [3., 6.], [3., 3.]]) - - def test_filtering_no_weights(self): - with self.test_session() as sess: - ghmm = GaussianHMM(3, - transition_variances=[1., 2., 3.], - emission_variances=[4., 5., 6.]) - z_prev = tf.constant([1., 2.], dtype=tf.float32) - x_cur = tf.constant([3., 4.], dtype=tf.float32) - expected_outs = [[[3./5., 4./5.], [4./5., 4./5.]], - [[11./7., 18./7.], [10./7., 10./7.]], - [[5./3., 8./3.], [2., 2.]]] - f_post_0 = ghmm.filtering(0, z_prev, x_cur) - f_post_1 = ghmm.filtering(1, z_prev, x_cur) - f_post_2 = ghmm.filtering(2, z_prev, x_cur) - outs = sess.run([[f_post_0.mean(), f_post_0.variance()], - [f_post_1.mean(), f_post_1.variance()], - [f_post_2.mean(), f_post_2.variance()]]) - self.assertAllClose(expected_outs, outs) - - def test_filtering_with_weights(self): - with self.test_session() as sess: - ghmm = GaussianHMM(3, - transition_variances=[1., 2., 3.], - emission_variances=[4., 5., 6.], - transition_weights=[7., 8.], - emission_weights=[9., 10., 11]) - z_prev = tf.constant([1., 2.], dtype=tf.float32) - x_cur = tf.constant([3., 4.], dtype=tf.float32) - expected_outs = [[[27./85., 36./85.], [4./85., 4./85.]], - [[95./205., 150./205.], [10./205., 10./205.]], - [[147./369., 228./369.], [18./369., 18./369.]]] - f_post_0 = ghmm.filtering(0, z_prev, x_cur) - f_post_1 = ghmm.filtering(1, z_prev, x_cur) - f_post_2 = ghmm.filtering(2, z_prev, x_cur) - outs = sess.run([[f_post_0.mean(), f_post_0.variance()], - [f_post_1.mean(), f_post_1.variance()], - [f_post_2.mean(), f_post_2.variance()]]) - self.assertAllClose(expected_outs, outs) - - def test_smoothing(self): - with self.test_session() as sess: - ghmm = GaussianHMM(3, - transition_variances=[1., 2., 3.], - emission_variances=[4., 5., 6.]) - z_prev = tf.constant([1., 2.], dtype=tf.float32) - xs = tf.constant([[1., 2.], - [3., 4.], - [5., 6.]], dtype=tf.float32) - s_post1 = ghmm.smoothing(0, z_prev, xs) - outs = sess.run([s_post1.mean(), s_post1.variance()]) - expected_outs = [[281./421., 410./421.], [292./421., 292./421.]] - self.assertAllClose(expected_outs, outs) - - expected_outs = [[149./73., 222./73.], [90./73., 90./73.]] - s_post2 = ghmm.smoothing(1, z_prev, xs[1:]) - outs = sess.run([s_post2.mean(), s_post2.variance()]) - self.assertAllClose(expected_outs, outs) - - s_post3 = ghmm.smoothing(2, z_prev, xs[2:]) - outs = sess.run([s_post3.mean(), s_post3.variance()]) - expected_outs = [[7./3., 10./3.], [2., 2.]] - self.assertAllClose(expected_outs, outs) - - def test_smoothing_with_weights(self): - with self.test_session() as sess: - x_weight = np.array([4, 5, 6, 7], dtype=np.float32) - sigma_x = np.array([5, 6, 7, 8], dtype=np.float32) - z_weight = np.array([1, 2, 3], dtype=np.float32) - sigma_z = np.array([1, 2, 3, 4], dtype=np.float32) - z_prev = np.array([1, 2], dtype=np.float32) - batch_size = 2 - xs = np.array([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=np.float32) - - z_cov, x_cov, z_x_cov = self._compute_covariance_matrices( - x_weight, z_weight, sigma_x, sigma_z) - - expected_outs = [] - # Compute mean and variance for z_0 when we don't condition - # on previous zs. - sigma_12 = z_x_cov[0, :] - sigma_12_22 = np.dot(sigma_12, np.linalg.inv(x_cov)) - mean = np.dot(sigma_12_22, xs) - variance = np.squeeze(z_cov[0, 0] - np.dot(sigma_12_22, sigma_12)) - expected_outs.append([mean, np.tile(variance, [batch_size])]) - - # Compute mean and variance for remaining z_ts. - for t in xrange(1, 4): - sigma_12 = np.concatenate([[z_cov[t, t - 1]], z_x_cov[t, t:]]) - sigma_22 = np.vstack(( - np.hstack((z_cov[t-1, t-1], z_x_cov[t-1, t:])), - np.hstack((np.transpose([z_x_cov[t-1, t:]]), x_cov[t:, t:])) - )) - sigma_12_22 = np.dot(sigma_12, np.linalg.inv(sigma_22)) - mean = np.dot(sigma_12_22, np.vstack((z_prev, xs[t:]))) - variance = np.squeeze(z_cov[t, t] - np.dot(sigma_12_22, sigma_12)) - expected_outs.append([mean, np.tile(variance, [batch_size])]) - - ghmm = GaussianHMM(4, - transition_variances=sigma_z, - emission_variances=sigma_x, - transition_weights=z_weight, - emission_weights=x_weight) - out_dists = [ghmm.smoothing(t, z_prev, xs[t:]) for t in range(0, 4)] - outs = [[d.mean(), d.variance()] for d in out_dists] - run_outs = sess.run(outs) - self.assertAllClose(expected_outs, run_outs) - - def test_covariance_matrices(self): - with self.test_session() as sess: - x_weight = np.array([4, 5, 6, 7], dtype=np.float32) - sigma_x = np.array([5, 6, 7, 8], dtype=np.float32) - z_weight = np.array([1, 2, 3], dtype=np.float32) - sigma_z = np.array([1, 2, 3, 4], dtype=np.float32) - - z_cov, x_cov, z_x_cov = self._compute_covariance_matrices( - x_weight, z_weight, sigma_x, sigma_z) - - ghmm = GaussianHMM(4, - transition_variances=sigma_z, - emission_variances=sigma_x, - transition_weights=z_weight, - emission_weights=x_weight) - self.assertAllClose(z_cov, sess.run(ghmm.sigma_z)) - self.assertAllClose(x_cov, sess.run(ghmm.sigma_x)) - self.assertAllClose(z_x_cov, sess.run(ghmm.sigma_zx)) - - def _compute_covariance_matrices(self, x_weight, z_weight, sigma_x, sigma_z): - # Create z covariance matrix from the definitions. - z_cov = np.zeros([4, 4]) - z_cov[0, 0] = sigma_z[0] - for i in range(1, 4): - z_cov[i, i] = (z_cov[i - 1, i - 1] * np.square(z_weight[i - 1]) + - sigma_z[i]) - for i in range(4): - for j in range(4): - if i == j: continue - min_ind = min(i, j) - max_ind = max(i, j) - weights = np.prod(z_weight[min_ind:max_ind]) - z_cov[i, j] = z_cov[min_ind, min_ind] * weights - # Compute the x covariance matrix and the z-x covariance matrix. - x_weights_outer = np.outer(x_weight, x_weight) - x_cov = x_weights_outer * z_cov + np.diag(sigma_x) - z_x_cov = x_weight * z_cov - return z_cov, x_cov, z_x_cov - - def test_lookahead(self): - x_weight = np.array([4, 5, 6, 7], dtype=np.float32) - sigma_x = np.array([5, 6, 7, 8], dtype=np.float32) - z_weight = np.array([1, 2, 3], dtype=np.float32) - sigma_z = np.array([1, 2, 3, 4], dtype=np.float32) - z_prev = np.array([1, 2], dtype=np.float32) - - with self.test_session() as sess: - z_cov, x_cov, z_x_cov = self._compute_covariance_matrices( - x_weight, z_weight, sigma_x, sigma_z) - - expected_outs = [] - for t in range(1, 4): - sigma_12 = z_x_cov[t-1, t:] - z_var = z_cov[t-1, t-1] - mean = np.outer(z_prev, sigma_12/z_var) - variance = x_cov[t:, t:] - np.outer(sigma_12, sigma_12)/ z_var - expected_outs.append([mean, variance]) - - ghmm = GaussianHMM(4, - transition_variances=sigma_z, - emission_variances=sigma_x, - transition_weights=z_weight, - emission_weights=x_weight) - out_dists = [ghmm.lookahead(t, z_prev) for t in range(1, 4)] - outs = [[d.mean(), d.covariance()] for d in out_dists] - run_outs = sess.run(outs) - self.assertAllClose(expected_outs, run_outs) - - -class TrainableGHMMTest(tf.test.TestCase): - - def test_filtering_proposal(self): - """Check that stashing the xs doesn't change the filtering distributions.""" - with self.test_session() as sess: - ghmm = TrainableGaussianHMM( - 3, "filtering", - transition_variances=[1., 2., 3.], - emission_variances=[4., 5., 6.], - transition_weights=[7., 8.], - emission_weights=[9., 10., 11]) - observations = tf.constant([[3., 4.], - [3., 4.], - [3., 4.]], dtype=tf.float32) - ghmm.set_observations(observations, [3, 3]) - z_prev = tf.constant([1., 2.], dtype=tf.float32) - - proposals = [ghmm._filtering_proposal(t, z_prev) for t in range(3)] - dist_params = [[p.mean(), p.variance()] for p in proposals] - - expected_outs = [[[27./85., 36./85.], [4./85., 4./85.]], - [[95./205., 150./205.], [10./205., 10./205.]], - [[147./369., 228./369.], [18./369., 18./369.]]] - self.assertAllClose(expected_outs, sess.run(dist_params)) - - def test_smoothing_proposal(self): - with self.test_session() as sess: - ghmm = TrainableGaussianHMM( - 3, "smoothing", - transition_variances=[1., 2., 3.], - emission_variances=[4., 5., 6.]) - xs = tf.constant([[1., 2.], - [3., 4.], - [5., 6.]], dtype=tf.float32) - ghmm.set_observations(xs, [3, 3]) - z_prev = tf.constant([1., 2.], dtype=tf.float32) - - proposals = [ghmm._smoothing_proposal(t, z_prev) for t in range(3)] - dist_params = [[p.mean(), p.variance()] for p in proposals] - - expected_outs = [[[281./421., 410./421.], [292./421., 292./421.]], - [[149./73., 222./73.], [90./73., 90./73.]], - [[7./3., 10./3.], [2., 2.]]] - self.assertAllClose(expected_outs, sess.run(dist_params)) - -if __name__ == "__main__": - tf.test.main() diff --git a/research/fivo/fivo/models/srnn.py b/research/fivo/fivo/models/srnn.py deleted file mode 100644 index cdfb560eedffccf8edf41dbab4e85bbd8bbfab46..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/models/srnn.py +++ /dev/null @@ -1,587 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SRNN classes.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import namedtuple -import functools - -import sonnet as snt -import tensorflow as tf - -from fivo.models import base - - -SRNNState = namedtuple("SRNNState", "rnn_state latent_encoded") - - -class SRNN(object): - """Implementation of a Stochastic Recurrent Neural Network (SRNN). - - Introduced in "Sequential Neural Models with Stochastic Layers" - by Fraccaro et al. https://arxiv.org/pdf/1605.07571.pdf. - - The SRNN is a sequence model similar to an RNN that uses stochastic latent - variables to improve its representational power. It can be thought of as a - sequential analogue to the variational auto-encoder (VAE). - - The SRNN has a deterministic RNN as its backbone, represented by the - sequence of RNN hidden states h_t. The latent state is conditioned on - the deterministic RNN states and previous latent state. Unlike the VRNN, the - the RNN state is not conditioned on the previous latent state. The latent - states have a Markov structure and it is assumed that - p(z_t | z_{1:t-1}) = p(z_t | z_{t-1}). - - In this implementation of the SRNN the latent state z_t is Gaussian. The - model's prior over z_t (also called the transition distribution) is - distributed as Normal(mu_t, diag(sigma_t^2)) where mu_t and sigma_t are the - mean and standard deviation output from a fully connected network that accepts - the rnn hidden state h_t and previous latent state z_{t-1} as input. - - The emission distribution p(x_t|z_t, h_t) is conditioned on the latent state - z_t as well as the current RNN hidden state h_t via a fully connected network. - - To increase the modeling power of the SRNN, two additional networks are - used to extract features from the data and the latent state. Those networks - are called data_encoder and latent_encoder respectively. - - For an example of how to call the SRNN's methods see sample_step. - - There are a few differences between this exposition and the paper. The main - goal was to be consistent with the VRNN code. A few components are renamed. - The backward RNN for approximating the posterior, g_phi_a in the paper, is the - rev_rnn_cell. The forward RNN that conditions the latent distribution, d in - the paper, is the rnn_cell. The paper doesn't name the NN's that serve as - feature extractors, and we name them here as the data_encoder and - latent_encoder. - """ - - def __init__(self, - rnn_cell, - data_encoder, - latent_encoder, - transition, - emission, - random_seed=None): - """Create a SRNN. - - Args: - rnn_cell: A subclass of tf.nn.rnn_cell.RNNCell that will form the - deterministic backbone of the SRNN. The inputs to the RNN will be the - the encoded input of the current timestep, a Tensor of shape - [batch_size, encoded_data_size]. - data_encoder: A callable that accepts a batch of data x_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument the inputs x_t, a Tensor of the shape - [batch_size, data_size] and return a Tensor of shape - [batch_size, encoded_data_size]. This callable will be called multiple - times in the SRNN cell so if scoping is not handled correctly then - multiple copies of the variables in this network could be made. It is - recommended to use a snt.nets.MLP module, which takes care of this for - you. - latent_encoder: A callable that accepts a latent state z_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument a Tensor of shape [batch_size, latent_size] and - return a Tensor of shape [batch_size, encoded_latent_size]. - This callable must also have the property 'output_size' defined, - returning encoded_latent_size. - transition: A callable that implements the transition distribution - p(z_t|h_t, z_t-1). Must accept as argument the previous RNN hidden state - and previous encoded latent state then return a tf.distributions.Normal - distribution conditioned on the input. - emission: A callable that implements the emission distribution - p(x_t|z_t, h_t). Must accept as arguments the encoded latent state - and the RNN hidden state and return a subclass of - tf.distributions.Distribution that can be used to evaluate the logprob - of the targets. - random_seed: The seed for the random ops. Sets the seed for sample_step. - """ - self.random_seed = random_seed - self.rnn_cell = rnn_cell - self.data_encoder = data_encoder - self.latent_encoder = latent_encoder - self.encoded_z_size = latent_encoder.output_size - self.state_size = (self.rnn_cell.state_size) - self._transition = transition - self._emission = emission - - def zero_state(self, batch_size, dtype): - """The initial state of the SRNN. - - Contains the initial state of the RNN and the inital encoded latent. - - Args: - batch_size: The batch size. - dtype: The data type of the SRNN. - Returns: - zero_state: The initial state of the SRNN. - """ - return SRNNState( - rnn_state=self.rnn_cell.zero_state(batch_size, dtype), - latent_encoded=tf.zeros( - [batch_size, self.latent_encoder.output_size], dtype=dtype)) - - def run_rnn(self, prev_rnn_state, inputs): - """Runs the deterministic RNN for one step. - - Args: - prev_rnn_state: The state of the RNN from the previous timestep. - inputs: A Tensor of shape [batch_size, data_size], the current inputs to - the model. Most often this is x_{t-1}, the previous token in the - observation sequence. - Returns: - rnn_out: The output of the RNN. - rnn_state: The new state of the RNN. - """ - rnn_inputs = self.data_encoder(tf.to_float(inputs)) - rnn_out, rnn_state = self.rnn_cell(rnn_inputs, prev_rnn_state) - return rnn_out, rnn_state - - def transition(self, rnn_out, prev_latent_encoded): - """Computes the transition distribution p(z_t|h_t, z_{t-1}). - - Note that p(z_t | h_t, z_{t-1}) = p(z_t| z_{t-1}, x_{1:t-1}) - - Args: - rnn_out: The output of the rnn for the current timestep. - prev_latent_encoded: Float Tensor of shape - [batch_size, encoded_latent_size], the previous latent state z_{t-1} - run through latent_encoder. - Returns: - p(z_t | h_t): A normal distribution with event shape - [batch_size, latent_size]. - """ - return self._transition(rnn_out, prev_latent_encoded) - - def emission(self, latent, rnn_out): - """Computes the emission distribution p(x_t | z_t, h_t). - - Note that p(x_t | z_t, h_t) = p(x_t | z_t, x_{1:t-1}) - - Args: - latent: The stochastic latent state z_t. - rnn_out: The output of the rnn for the current timestep. - Returns: - p(x_t | z_t, h_t): A distribution with event shape - [batch_size, data_size]. - latent_encoded: The latent state encoded with latent_encoder. Should be - passed to transition() on the next timestep. - """ - latent_encoded = self.latent_encoder(latent) - return self._emission(latent_encoded, rnn_out), latent_encoded - - def sample_step(self, prev_state, inputs, unused_t): - """Samples one output from the model. - - Args: - prev_state: The previous state of the model, a SRNNState containing the - previous rnn state and the previous encoded latent. - inputs: A Tensor of shape [batch_size, data_size], the current inputs to - the model. Most often this is x_{t-1}, the previous token in the - observation sequence. - unused_t: The current timestep. Not used currently. - Returns: - new_state: The next state of the model, a SRNNState. - xt: A float Tensor of shape [batch_size, data_size], an output sampled - from the emission distribution. - """ - rnn_out, rnn_state = self.run_rnn(prev_state.rnn_state, - inputs) - p_zt = self.transition(rnn_out, prev_state.latent_encoded) - zt = p_zt.sample(seed=self.random_seed) - p_xt_given_zt, latent_encoded = self.emission(zt, rnn_out) - xt = p_xt_given_zt.sample(seed=self.random_seed) - new_state = SRNNState(rnn_state=rnn_state, latent_encoded=latent_encoded) - return new_state, tf.to_float(xt) - -# pylint: disable=invalid-name -# pylint thinks this is a top-level constant. -TrainableSRNNState = namedtuple("TrainableSRNNState", - SRNNState._fields + ("rnn_out",)) -# pylint: enable=g-invalid-name - - -class TrainableSRNN(SRNN, base.ELBOTrainableSequenceModel): - """A SRNN subclass with proposals and methods for training and evaluation. - - This class adds proposals used for training with importance-sampling based - methods such as the ELBO. The model can be configured to propose from one - of three proposals: a learned filtering proposal, a learned smoothing - proposal, or the prior (i.e. the transition distribution). - - As described in the SRNN paper, the learned filtering proposal is - parameterized by a fully connected neural network that accepts as input the - current target x_t and the current rnn output h_t. The learned smoothing - proposal is also given the hidden state of an RNN run in reverse over the - inputs, so as to incorporate information about future observations. - - All learned proposals use the 'res_q' parameterization, meaning that instead - of directly producing the mean of z_t, the proposal network predicts the - 'residual' from the prior's mean. This is explored more in section 3.3 of - https://arxiv.org/pdf/1605.07571.pdf. - - During training, the latent state z_t is sampled from the proposal and the - reparameterization trick is used to provide low-variance gradients. - - Note that the SRNN paper refers to the proposals as the approximate posterior, - but we match the VRNN convention of referring to it as the encoder. - """ - - def __init__(self, - rnn_cell, - data_encoder, - latent_encoder, - transition, - emission, - proposal_type, - proposal=None, - rev_rnn_cell=None, - tilt=None, - random_seed=None): - """Create a trainable RNN. - - Args: - rnn_cell: A subclass of tf.nn.rnn_cell.RNNCell that will form the - deterministic backbone of the SRNN. The inputs to the RNN will be the - the encoded input of the current timestep, a Tensor of shape - [batch_size, encoded_data_size]. - data_encoder: A callable that accepts a batch of data x_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument the inputs x_t, a Tensor of the shape - [batch_size, data_size] and return a Tensor of shape - [batch_size, encoded_data_size]. This callable will be called multiple - times in the SRNN cell so if scoping is not handled correctly then - multiple copies of the variables in this network could be made. It is - recommended to use a snt.nets.MLP module, which takes care of this for - you. - latent_encoder: A callable that accepts a latent state z_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument a Tensor of shape [batch_size, latent_size] and - return a Tensor of shape [batch_size, encoded_latent_size]. - This callable must also have the property 'output_size' defined, - returning encoded_latent_size. - transition: A callable that implements the transition distribution - p(z_t|h_t, z_t-1). Must accept as argument the previous RNN hidden state - and previous encoded latent state then return a tf.distributions.Normal - distribution conditioned on the input. - emission: A callable that implements the emission distribution - p(x_t|z_t, h_t). Must accept as arguments the encoded latent state - and the RNN hidden state and return a subclass of - tf.distributions.Distribution that can be used to evaluate the logprob - of the targets. - proposal_type: A string indicating the type of proposal to use. Can - be either "filtering", "smoothing", or "prior". When proposal_type is - "filtering" or "smoothing", proposal must be provided. When - proposal_type is "smoothing", rev_rnn_cell must also be provided. - proposal: A callable that implements the proposal q(z_t| h_t, x_{1:T}). - If proposal_type is "filtering" then proposal must accept as arguments - the current rnn output, the encoded target of the current timestep, - and the mean of the prior. If proposal_type is "smoothing" then - in addition to the current rnn output and the mean of the prior - proposal must accept as arguments the output of the reverse rnn. - proposal should return a tf.distributions.Normal distribution - conditioned on its inputs. If proposal_type is "prior" this argument is - ignored. - rev_rnn_cell: A subclass of tf.nn.rnn_cell.RNNCell that will aggregate - forward rnn outputs in the reverse direction. The inputs to the RNN - will be the encoded reverse input of the current timestep, a Tensor of - shape [batch_size, encoded_data_size]. - tilt: A callable that implements the log of a positive tilting function - (ideally approximating log p(x_{t+1}|z_t, h_t). Must accept as arguments - the encoded latent state and the RNN hidden state and return a subclass - of tf.distributions.Distribution that can be used to evaluate the - logprob of x_{t+1}. Optionally, None and then no tilt is used. - random_seed: The seed for the random ops. Sets the seed for sample_step - and __call__. - """ - super(TrainableSRNN, self).__init__( - rnn_cell, data_encoder, latent_encoder, - transition, emission, random_seed=random_seed) - self.rev_rnn_cell = rev_rnn_cell - self._tilt = tilt - assert proposal_type in ["filtering", "smoothing", "prior"] - self._proposal = proposal - self.proposal_type = proposal_type - if proposal_type != "prior": - assert proposal, "If not proposing from the prior, must provide proposal." - if proposal_type == "smoothing": - assert rev_rnn_cell, "Must provide rev_rnn_cell for smoothing proposal." - - def zero_state(self, batch_size, dtype): - super_state = super(TrainableSRNN, self).zero_state(batch_size, dtype) - return TrainableSRNNState( - rnn_out=tf.zeros([batch_size, self.rnn_cell.output_size], dtype=dtype), - **super_state._asdict()) - - def set_observations(self, observations, seq_lengths): - """Stores the model's observations. - - Stores the observations (inputs and targets) in TensorArrays and precomputes - things for later like the reverse RNN output and encoded targets. - - Args: - observations: The observations of the model, a tuple containing two - Tensors of shape [max_seq_len, batch_size, data_size]. The Tensors - should be the inputs and targets, respectively. - seq_lengths: An int Tensor of shape [batch_size] containing the length - of each sequence in observations. - """ - inputs, targets = observations - self.seq_lengths = seq_lengths - self.max_seq_len = tf.reduce_max(seq_lengths) - self.targets_ta = base.ta_for_tensor(targets, clear_after_read=False) - targets_encoded = base.encode_all(targets, self.data_encoder) - self.targets_encoded_ta = base.ta_for_tensor(targets_encoded, - clear_after_read=False) - inputs_encoded = base.encode_all(inputs, self.data_encoder) - rnn_out, _ = tf.nn.dynamic_rnn(self.rnn_cell, - inputs_encoded, - time_major=True, - dtype=tf.float32, - scope="forward_rnn") - self.rnn_ta = base.ta_for_tensor(rnn_out, - clear_after_read=False) - if self.rev_rnn_cell: - targets_and_rnn_out = tf.concat([rnn_out, targets_encoded], 2) - reversed_targets_and_rnn_out = tf.reverse_sequence( - targets_and_rnn_out, seq_lengths, seq_axis=0, batch_axis=1) - # Compute the reverse rnn over the targets. - reverse_rnn_out, _ = tf.nn.dynamic_rnn(self.rev_rnn_cell, - reversed_targets_and_rnn_out, - time_major=True, - dtype=tf.float32, - scope="reverse_rnn") - reverse_rnn_out = tf.reverse_sequence(reverse_rnn_out, seq_lengths, - seq_axis=0, batch_axis=1) - self.reverse_rnn_ta = base.ta_for_tensor(reverse_rnn_out, - clear_after_read=False) - - def _filtering_proposal(self, rnn_out, prev_latent_encoded, prior, t): - """Computes the filtering proposal distribution.""" - return self._proposal(rnn_out, - prev_latent_encoded, - self.targets_encoded_ta.read(t), - prior_mu=prior.mean()) - - def _smoothing_proposal(self, rnn_out, prev_latent_encoded, prior, t): - """Computes the smoothing proposal distribution.""" - return self._proposal(rnn_out, - prev_latent_encoded, - smoothing_tensors=[self.reverse_rnn_ta.read(t)], - prior_mu=prior.mean()) - - def proposal(self, rnn_out, prev_latent_encoded, prior, t): - """Computes the proposal distribution specified by proposal_type. - - Args: - rnn_out: The output of the rnn for the current timestep. - prev_latent_encoded: Float Tensor of shape - [batch_size, encoded_latent_size], the previous latent state z_{t-1} - run through latent_encoder. - prior: A tf.distributions.Normal distribution representing the prior - over z_t, p(z_t | z_{1:t-1}, x_{1:t-1}). Used for 'res_q'. - t: A scalar int Tensor, the current timestep. - """ - if self.proposal_type == "filtering": - return self._filtering_proposal(rnn_out, prev_latent_encoded, prior, t) - elif self.proposal_type == "smoothing": - return self._smoothing_proposal(rnn_out, prev_latent_encoded, prior, t) - elif self.proposal_type == "prior": - return self.transition(rnn_out, prev_latent_encoded) - - def tilt(self, rnn_out, latent_encoded, targets): - r_func = self._tilt(rnn_out, latent_encoded) - return tf.reduce_sum(r_func.log_prob(targets), axis=-1) - - def propose_and_weight(self, state, t): - """Runs the model and computes importance weights for one timestep. - - Runs the model and computes importance weights, sampling from the proposal - instead of the transition/prior. - - Args: - state: The previous state of the model, a TrainableSRNNState containing - the previous rnn state, the previous rnn outs, and the previous encoded - latent. - t: A scalar integer Tensor, the current timestep. - Returns: - weights: A float Tensor of shape [batch_size]. - new_state: The new state of the model. - """ - targets = self.targets_ta.read(t) - rnn_out = self.rnn_ta.read(t) - p_zt = self.transition(rnn_out, state.latent_encoded) - q_zt = self.proposal(rnn_out, state.latent_encoded, p_zt, t) - zt = q_zt.sample(seed=self.random_seed) - p_xt_given_zt, latent_encoded = self.emission(zt, rnn_out) - log_p_xt_given_zt = tf.reduce_sum(p_xt_given_zt.log_prob(targets), axis=-1) - log_p_zt = tf.reduce_sum(p_zt.log_prob(zt), axis=-1) - log_q_zt = tf.reduce_sum(q_zt.log_prob(zt), axis=-1) - weights = log_p_zt + log_p_xt_given_zt - log_q_zt - if self._tilt: - prev_log_r = tf.cond( - tf.greater(t, 0), - lambda: self.tilt(state.rnn_out, state.latent_encoded, targets), - lambda: 0.) # On the first step, prev_log_r = 0. - log_r = tf.cond( - tf.less(t + 1, self.max_seq_len), - lambda: self.tilt(rnn_out, latent_encoded, self.targets_ta.read(t+1)), - lambda: 0.) - # On the last step, log_r = 0. - log_r *= tf.to_float(t < self.seq_lengths - 1) - weights += log_r - prev_log_r - - # This reshape is required because the TensorArray reports different shapes - # than the initial state provides (where the first dimension is unknown). - # The difference breaks the while_loop. Reshape prevents the error. - rnn_out = tf.reshape(rnn_out, tf.shape(state.rnn_out)) - - new_state = TrainableSRNNState(rnn_out=rnn_out, - rnn_state=state.rnn_state, # unmodified - latent_encoded=latent_encoded) - return weights, new_state - - -_DEFAULT_INITIALIZERS = {"w": tf.contrib.layers.xavier_initializer(), - "b": tf.zeros_initializer()} - - -def create_srnn( - data_size, - latent_size, - emission_class, - rnn_hidden_size=None, - fcnet_hidden_sizes=None, - encoded_data_size=None, - encoded_latent_size=None, - sigma_min=0.0, - raw_sigma_bias=0.25, - emission_bias_init=0.0, - use_tilt=False, - proposal_type="filtering", - initializers=None, - random_seed=None): - """A factory method for creating SRNN cells. - - Args: - data_size: The dimension of the vectors that make up the data sequences. - latent_size: The size of the stochastic latent state of the SRNN. - emission_class: The class of the emission distribution. Can be either - ConditionalNormalDistribution or ConditionalBernoulliDistribution. - rnn_hidden_size: The hidden state dimension of the RNN that forms the - deterministic part of this SRNN. If None, then it defaults - to latent_size. - fcnet_hidden_sizes: A list of python integers, the size of the hidden - layers of the fully connected networks that parameterize the conditional - distributions of the SRNN. If None, then it defaults to one hidden - layer of size latent_size. - encoded_data_size: The size of the output of the data encoding network. If - None, defaults to latent_size. - encoded_latent_size: The size of the output of the latent state encoding - network. If None, defaults to latent_size. - sigma_min: The minimum value that the standard deviation of the - distribution over the latent state can take. - raw_sigma_bias: A scalar that is added to the raw standard deviation - output from the neural networks that parameterize the prior and - approximate posterior. Useful for preventing standard deviations close - to zero. - emission_bias_init: A bias to added to the raw output of the fully - connected network that parameterizes the emission distribution. Useful - for initalizing the mean of the distribution to a sensible starting point - such as the mean of the training data. Only used with Bernoulli generative - distributions. - use_tilt: If true, create a SRNN with a tilting function. - proposal_type: The type of proposal to use. Can be "filtering", "smoothing", - or "prior". - initializers: The variable intitializers to use for the fully connected - networks and RNN cell. Must be a dictionary mapping the keys 'w' and 'b' - to the initializers for the weights and biases. Defaults to xavier for - the weights and zeros for the biases when initializers is None. - random_seed: A random seed for the SRNN resampling operations. - Returns: - model: A TrainableSRNN object. - """ - if rnn_hidden_size is None: - rnn_hidden_size = latent_size - if fcnet_hidden_sizes is None: - fcnet_hidden_sizes = [latent_size] - if encoded_data_size is None: - encoded_data_size = latent_size - if encoded_latent_size is None: - encoded_latent_size = latent_size - if initializers is None: - initializers = _DEFAULT_INITIALIZERS - data_encoder = snt.nets.MLP( - output_sizes=fcnet_hidden_sizes + [encoded_data_size], - initializers=initializers, - name="data_encoder") - latent_encoder = snt.nets.MLP( - output_sizes=fcnet_hidden_sizes + [encoded_latent_size], - initializers=initializers, - name="latent_encoder") - transition = base.ConditionalNormalDistribution( - size=latent_size, - hidden_layer_sizes=fcnet_hidden_sizes, - sigma_min=sigma_min, - raw_sigma_bias=raw_sigma_bias, - initializers=initializers, - name="prior") - # Construct the emission distribution. - if emission_class == base.ConditionalBernoulliDistribution: - # For Bernoulli distributed outputs, we initialize the bias so that the - # network generates on average the mean from the training set. - emission_dist = functools.partial(base.ConditionalBernoulliDistribution, - bias_init=emission_bias_init) - else: - emission_dist = base.ConditionalNormalDistribution - emission = emission_dist( - size=data_size, - hidden_layer_sizes=fcnet_hidden_sizes, - initializers=initializers, - name="generative") - # Construct the proposal distribution. - if proposal_type in ["filtering", "smoothing"]: - proposal = base.NormalApproximatePosterior( - size=latent_size, - hidden_layer_sizes=fcnet_hidden_sizes, - sigma_min=sigma_min, - raw_sigma_bias=raw_sigma_bias, - initializers=initializers, - smoothing=(proposal_type == "smoothing"), - name="approximate_posterior") - else: - proposal = None - - if use_tilt: - tilt = emission_dist( - size=data_size, - hidden_layer_sizes=fcnet_hidden_sizes, - initializers=initializers, - name="tilt") - else: - tilt = None - - rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_hidden_size, - initializer=initializers["w"]) - rev_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_hidden_size, - initializer=initializers["w"]) - return TrainableSRNN( - rnn_cell, data_encoder, latent_encoder, transition, - emission, proposal_type, proposal=proposal, rev_rnn_cell=rev_rnn_cell, - tilt=tilt, random_seed=random_seed) diff --git a/research/fivo/fivo/models/srnn_test.py b/research/fivo/fivo/models/srnn_test.py deleted file mode 100644 index 39e10da134d3834babcf2eef1bb3e97fce12a07a..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/models/srnn_test.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.models.srnn.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from fivo.models import base -from fivo.test_utils import create_srnn - - -class SrnnTest(tf.test.TestCase): - - def test_srnn_normal_emission(self): - self.run_srnn(base.ConditionalNormalDistribution, [-5.947752, -1.182961]) - - def test_srnn_bernoulli_emission(self): - self.run_srnn(base.ConditionalBernoulliDistribution, [-2.566631, -2.479234]) - - def run_srnn(self, generative_class, gt_log_alpha): - """Tests the SRNN. - - All test values are 'golden values' derived by running the code and copying - the output. - - Args: - generative_class: The class of the generative distribution to use. - gt_log_alpha: The ground-truth value of log alpha. - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - batch_size = 2 - model, inputs, targets, _ = create_srnn(generative_class=generative_class, - batch_size=batch_size, - data_lengths=(1, 1), - random_seed=1234) - zero_state = model.zero_state(batch_size=batch_size, dtype=tf.float32) - model.set_observations([inputs, targets], tf.convert_to_tensor([1, 1])) - model_out = model.propose_and_weight(zero_state, 0) - sess.run(tf.global_variables_initializer()) - log_alpha, state = sess.run(model_out) - self.assertAllClose( - state.latent_encoded, - [[0.591787, 1.310583], [-1.523136, 0.953918]]) - self.assertAllClose(state.rnn_out, - [[0.041675, -0.056038, -0.001823, 0.005224], - [0.042925, -0.044619, 0.021401, 0.016998]]) - self.assertAllClose(log_alpha, gt_log_alpha) - - def test_srnn_with_tilt_normal_emission(self): - self.run_srnn_with_tilt(base.ConditionalNormalDistribution, [-9.13577, -4.56725]) - - - def test_srnn_with_tilt_bernoulli_emission(self): - self.run_srnn_with_tilt(base.ConditionalBernoulliDistribution, [-4.617461, -5.079248]) - - def run_srnn_with_tilt(self, generative_class, gt_log_alpha): - """Tests the SRNN with a tilting function. - - All test values are 'golden values' derived by running the code and copying - the output. - - Args: - generative_class: The class of the generative distribution to use. - gt_log_alpha: The ground-truth value of log alpha. - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - batch_size = 2 - model, inputs, targets, _ = create_srnn(generative_class=generative_class, - batch_size=batch_size, - data_lengths=(3, 2), - random_seed=1234, - use_tilt=True) - zero_state = model.zero_state(batch_size=batch_size, dtype=tf.float32) - model.set_observations([inputs, targets], tf.convert_to_tensor([3, 2])) - model_out = model.propose_and_weight(zero_state, 0) - sess.run(tf.global_variables_initializer()) - log_alpha, state = sess.run(model_out) - self.assertAllClose( - state.latent_encoded, - [[0.591787, 1.310583], [-1.523136, 0.953918]]) - self.assertAllClose(state.rnn_out, - [[0.041675, -0.056038, -0.001823, 0.005224], - [0.042925, -0.044619, 0.021401, 0.016998]]) - self.assertAllClose(log_alpha, gt_log_alpha) - -if __name__ == "__main__": - tf.test.main() diff --git a/research/fivo/fivo/models/vrnn.py b/research/fivo/fivo/models/vrnn.py deleted file mode 100644 index 4e2552088c19f141a75d791d2be0d0a5238ed87c..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/models/vrnn.py +++ /dev/null @@ -1,572 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""VRNN classes.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import namedtuple -import functools - -import sonnet as snt -import tensorflow as tf - -from fivo.models import base - - -VRNNState = namedtuple("VRNNState", "rnn_state latent_encoded") - - -class VRNN(object): - """Implementation of a Variational Recurrent Neural Network (VRNN). - - Introduced in "A Recurrent Latent Variable Model for Sequential data" - by Chung et al. https://arxiv.org/pdf/1506.02216.pdf. - - The VRNN is a sequence model similar to an RNN that uses stochastic latent - variables to improve its representational power. It can be thought of as a - sequential analogue to the variational auto-encoder (VAE). - - The VRNN has a deterministic RNN as its backbone, represented by the - sequence of RNN hidden states h_t. At each timestep, the RNN hidden state h_t - is conditioned on the previous sequence element, x_{t-1}, as well as the - latent state from the previous timestep, z_{t-1}. - - In this implementation of the VRNN the latent state z_t is Gaussian. The - model's prior over z_t (also called the transition distribution) is - distributed as Normal(mu_t, diag(sigma_t^2)) where mu_t and sigma_t are the - mean and standard deviation output from a fully connected network that accepts - the rnn hidden state h_t as input. - - The emission distribution p(x_t|z_t, h_t) is conditioned on the latent state - z_t as well as the current RNN hidden state h_t via a fully connected network. - - To increase the modeling power of the VRNN, two additional networks are - used to extract features from the data and the latent state. Those networks - are called data_encoder and latent_encoder respectively. - - For an example of how to call the VRNN's methods see sample_step. - - There are a few differences between this exposition and the paper. - First, the indexing scheme for h_t is different than the paper's -- what the - paper calls h_t we call h_{t+1}. This is the same notation used by Fraccaro - et al. to describe the VRNN in the paper linked above. Also, the VRNN paper - uses VAE terminology to refer to the different internal networks, so it - refers to the emission distribution as the decoder. This implementation also - renames the functions phi_x and phi_z in the paper to data_encoder and - latent_encoder. - """ - - def __init__(self, - rnn_cell, - data_encoder, - latent_encoder, - transition, - emission, - random_seed=None): - """Create a VRNN. - - Args: - rnn_cell: A subclass of tf.nn.rnn_cell.RNNCell that will form the - deterministic backbone of the VRNN. The inputs to the RNN will be the - encoded latent state of the previous timestep with shape - [batch_size, encoded_latent_size] as well as the encoded input of the - current timestep, a Tensor of shape [batch_size, encoded_data_size]. - data_encoder: A callable that accepts a batch of data x_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument the inputs x_t, a Tensor of the shape - [batch_size, data_size] and return a Tensor of shape - [batch_size, encoded_data_size]. This callable will be called multiple - times in the VRNN cell so if scoping is not handled correctly then - multiple copies of the variables in this network could be made. It is - recommended to use a snt.nets.MLP module, which takes care of this for - you. - latent_encoder: A callable that accepts a latent state z_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument a Tensor of shape [batch_size, latent_size] and - return a Tensor of shape [batch_size, encoded_latent_size]. - This callable must also have the property 'output_size' defined, - returning encoded_latent_size. - transition: A callable that implements the transition distribution - p(z_t|h_t). Must accept as argument the previous RNN hidden state and - return a tf.distributions.Normal distribution conditioned on the input. - emission: A callable that implements the emission distribution - p(x_t|z_t, h_t). Must accept as arguments the encoded latent state - and the RNN hidden state and return a subclass of - tf.distributions.Distribution that can be used to evaluate the logprob - of the targets. - random_seed: The seed for the random ops. Sets the seed for sample_step. - """ - self.random_seed = random_seed - self.rnn_cell = rnn_cell - self.data_encoder = data_encoder - self.latent_encoder = latent_encoder - self.encoded_z_size = latent_encoder.output_size - self.state_size = (self.rnn_cell.state_size) - self._transition = transition - self._emission = emission - - def zero_state(self, batch_size, dtype): - """The initial state of the VRNN. - - Contains the initial state of the RNN and the inital encoded latent. - - Args: - batch_size: The batch size. - dtype: The data type of the VRNN. - Returns: - zero_state: The initial state of the VRNN. - """ - return VRNNState( - rnn_state=self.rnn_cell.zero_state(batch_size, dtype), - latent_encoded=tf.zeros( - [batch_size, self.latent_encoder.output_size], dtype=dtype)) - - def run_rnn(self, prev_rnn_state, prev_latent_encoded, inputs): - """Runs the deterministic RNN for one step. - - Args: - prev_rnn_state: The state of the RNN from the previous timestep. - prev_latent_encoded: Float Tensor of shape - [batch_size, encoded_latent_size], the previous latent state z_{t-1} - run through latent_encoder. - inputs: A Tensor of shape [batch_size, data_size], the current inputs to - the model. Most often this is x_{t-1}, the previous token in the - observation sequence. - Returns: - rnn_out: The output of the RNN. - rnn_state: The new state of the RNN. - """ - inputs_encoded = self.data_encoder(tf.to_float(inputs)) - rnn_inputs = tf.concat([inputs_encoded, prev_latent_encoded], axis=1) - rnn_out, rnn_state = self.rnn_cell(rnn_inputs, prev_rnn_state) - return rnn_out, rnn_state - - def transition(self, rnn_out): - """Computes the transition distribution p(z_t|h_t). - - Note that p(z_t | h_t) = p(z_t| z_{1:t-1}, x_{1:t-1}) - - Args: - rnn_out: The output of the rnn for the current timestep. - Returns: - p(z_t | h_t): A normal distribution with event shape - [batch_size, latent_size]. - """ - return self._transition(rnn_out) - - def emission(self, latent, rnn_out): - """Computes the emission distribution p(x_t | z_t, h_t). - - Note that p(x_t | z_t, h_t) = p(x_t | z_{1:t}, x_{1:t-1}). - - Args: - latent: The stochastic latent state z_t. - rnn_out: The output of the rnn for the current timestep. - Returns: - p(x_t | z_t, h_t): A distribution with event shape - [batch_size, data_size]. - latent_encoded: The latent state encoded with latent_encoder. Should be - passed to run_rnn on the next timestep. - """ - latent_encoded = self.latent_encoder(latent) - return self._emission(latent_encoded, rnn_out), latent_encoded - - def sample_step(self, prev_state, inputs, unused_t): - """Samples one output from the model. - - Args: - prev_state: The previous state of the model, a VRNNState containing the - previous rnn state and the previous encoded latent. - inputs: A Tensor of shape [batch_size, data_size], the current inputs to - the model. Most often this is x_{t-1}, the previous token in the - observation sequence. - unused_t: The current timestep. Not used currently. - Returns: - new_state: The next state of the model, a VRNNState. - xt: A float Tensor of shape [batch_size, data_size], an output sampled - from the emission distribution. - """ - rnn_out, rnn_state = self.run_rnn(prev_state.rnn_state, - prev_state.latent_encoded, - inputs) - p_zt = self.transition(rnn_out) - zt = p_zt.sample(seed=self.random_seed) - p_xt_given_zt, latent_encoded = self.emission(zt, rnn_out) - xt = p_xt_given_zt.sample(seed=self.random_seed) - new_state = VRNNState(rnn_state=rnn_state, latent_encoded=latent_encoded) - return new_state, tf.to_float(xt) - -# pylint: disable=invalid-name -# pylint thinks this is a top-level constant. -TrainableVRNNState = namedtuple("TrainableVRNNState", - VRNNState._fields + ("rnn_out",)) -# pylint: enable=g-invalid-name - - -class TrainableVRNN(VRNN, base.ELBOTrainableSequenceModel): - """A VRNN subclass with proposals and methods for training and evaluation. - - This class adds proposals used for training with importance-sampling based - methods such as the ELBO. The model can be configured to propose from one - of three proposals: a learned filtering proposal, a learned smoothing - proposal, or the prior (i.e. the transition distribution). - - As described in the VRNN paper, the learned filtering proposal is - parameterized by a fully connected neural network that accepts as input the - current target x_t and the current rnn output h_t. The learned smoothing - proposal is also given the hidden state of an RNN run in reverse over the - inputs, so as to incorporate information about future observations. This - smoothing proposal is not described in the VRNN paper. - - All learned proposals use the 'res_q' parameterization, meaning that instead - of directly producing the mean of z_t, the proposal network predicts the - 'residual' from the prior's mean. This is explored more in section 3.3 of - https://arxiv.org/pdf/1605.07571.pdf. - - During training, the latent state z_t is sampled from the proposal and the - reparameterization trick is used to provide low-variance gradients. - - Note that the VRNN paper uses VAE terminology to refer to the different - internal networks, so the proposal is referred to as the encoder. - """ - - def __init__(self, - rnn_cell, - data_encoder, - latent_encoder, - transition, - emission, - proposal_type, - proposal=None, - rev_rnn_cell=None, - tilt=None, - random_seed=None): - """Create a trainable RNN. - - Args: - rnn_cell: A subclass of tf.nn.rnn_cell.RNNCell that will form the - deterministic backbone of the VRNN. The inputs to the RNN will be the - encoded latent state of the previous timestep with shape - [batch_size, encoded_latent_size] as well as the encoded input of the - current timestep, a Tensor of shape [batch_size, encoded_data_size]. - data_encoder: A callable that accepts a batch of data x_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument the inputs x_t, a Tensor of the shape - [batch_size, data_size] and return a Tensor of shape - [batch_size, encoded_data_size]. This callable will be called multiple - times in the VRNN cell so if scoping is not handled correctly then - multiple copies of the variables in this network could be made. It is - recommended to use a snt.nets.MLP module, which takes care of this for - you. - latent_encoder: A callable that accepts a latent state z_t and - 'encodes' it, e.g. runs it through a fully connected network. Must - accept as argument a Tensor of shape [batch_size, latent_size] and - return a Tensor of shape [batch_size, encoded_latent_size]. - This callable must also have the property 'output_size' defined, - returning encoded_latent_size. - transition: A callable that implements the transition distribution - p(z_t|h_t). Must accept as argument the previous RNN hidden state and - return a tf.distributions.Normal distribution conditioned on the input. - emission: A callable that implements the emission distribution - p(x_t|z_t, h_t). Must accept as arguments the encoded latent state - and the RNN hidden state and return a subclass of - tf.distributions.Distribution that can be used to evaluate the logprob - of the targets. - proposal_type: A string indicating the type of proposal to use. Can - be either "filtering", "smoothing", or "prior". When proposal_type is - "filtering" or "smoothing", proposal must be provided. When - proposal_type is "smoothing", rev_rnn_cell must also be provided. - proposal: A callable that implements the proposal q(z_t| h_t, x_{1:T}). - If proposal_type is "filtering" then proposal must accept as arguments - the current rnn output, the encoded target of the current timestep, - and the mean of the prior. If proposal_type is "smoothing" then - in addition to the current rnn output and the mean of the prior - proposal must accept as arguments the output of the reverse rnn. - proposal should return a tf.distributions.Normal distribution - conditioned on its inputs. If proposal_type is "prior" this argument is - ignored. - rev_rnn_cell: A subclass of tf.nn.rnn_cell.RNNCell that will aggregate - observation statistics in the reverse direction. The inputs to the RNN - will be the encoded reverse input of the current timestep, a Tensor of - shape [batch_size, encoded_data_size]. - tilt: A callable that implements the log of a positive tilting function - (ideally approximating log p(x_{t+1}|z_t, h_t). Must accept as arguments - the encoded latent state and the RNN hidden state and return a subclass - of tf.distributions.Distribution that can be used to evaluate the - logprob of x_{t+1}. Optionally, None and then no tilt is used. - random_seed: The seed for the random ops. Sets the seed for sample_step - and __call__. - """ - super(TrainableVRNN, self).__init__( - rnn_cell, data_encoder, latent_encoder, - transition, emission, random_seed=random_seed) - self.rev_rnn_cell = rev_rnn_cell - self._tilt = tilt - assert proposal_type in ["filtering", "smoothing", "prior"] - self._proposal = proposal - self.proposal_type = proposal_type - if proposal_type != "prior": - assert proposal, "If not proposing from the prior, must provide proposal." - if proposal_type == "smoothing": - assert rev_rnn_cell, "Must provide rev_rnn_cell for smoothing proposal." - - def zero_state(self, batch_size, dtype): - super_state = super(TrainableVRNN, self).zero_state(batch_size, dtype) - return TrainableVRNNState( - rnn_out=tf.zeros([batch_size, self.rnn_cell.output_size], dtype=dtype), - **super_state._asdict()) - - def set_observations(self, observations, seq_lengths): - """Stores the model's observations. - - Stores the observations (inputs and targets) in TensorArrays and precomputes - things for later like the reverse RNN output and encoded targets. - - Args: - observations: The observations of the model, a tuple containing two - Tensors of shape [max_seq_len, batch_size, data_size]. The Tensors - should be the inputs and targets, respectively. - seq_lengths: An int Tensor of shape [batch_size] containing the length - of each sequence in observations. - """ - inputs, targets = observations - self.seq_lengths = seq_lengths - self.max_seq_len = tf.reduce_max(seq_lengths) - self.inputs_ta = base.ta_for_tensor(inputs, clear_after_read=False) - self.targets_ta = base.ta_for_tensor(targets, clear_after_read=False) - targets_encoded = base.encode_all(targets, self.data_encoder) - self.targets_encoded_ta = base.ta_for_tensor(targets_encoded, - clear_after_read=False) - if self.rev_rnn_cell: - reverse_targets_encoded = tf.reverse_sequence( - targets_encoded, seq_lengths, seq_axis=0, batch_axis=1) - # Compute the reverse rnn over the targets. - reverse_rnn_out, _ = tf.nn.dynamic_rnn(self.rev_rnn_cell, - reverse_targets_encoded, - time_major=True, - dtype=tf.float32) - reverse_rnn_out = tf.reverse_sequence(reverse_rnn_out, seq_lengths, - seq_axis=0, batch_axis=1) - self.reverse_rnn_ta = base.ta_for_tensor(reverse_rnn_out, - clear_after_read=False) - - def _filtering_proposal(self, rnn_out, prior, t): - """Computes the filtering proposal distribution.""" - return self._proposal(rnn_out, - self.targets_encoded_ta.read(t), - prior_mu=prior.mean()) - - def _smoothing_proposal(self, rnn_out, prior, t): - """Computes the smoothing proposal distribution.""" - return self._proposal(rnn_out, - smoothing_tensors=[self.reverse_rnn_ta.read(t)], - prior_mu=prior.mean()) - - def proposal(self, rnn_out, prior, t): - """Computes the proposal distribution specified by proposal_type. - - Args: - rnn_out: The output of the rnn for the current timestep. - prior: A tf.distributions.Normal distribution representing the prior - over z_t, p(z_t | z_{1:t-1}, x_{1:t-1}). Used for 'res_q'. - t: A scalar int Tensor, the current timestep. - """ - if self.proposal_type == "filtering": - return self._filtering_proposal(rnn_out, prior, t) - elif self.proposal_type == "smoothing": - return self._smoothing_proposal(rnn_out, prior, t) - elif self.proposal_type == "prior": - return self.transition(rnn_out) - - def tilt(self, rnn_out, latent_encoded, targets): - r_func = self._tilt(rnn_out, latent_encoded) - return tf.reduce_sum(r_func.log_prob(targets), axis=-1) - - def propose_and_weight(self, state, t): - """Runs the model and computes importance weights for one timestep. - - Runs the model and computes importance weights, sampling from the proposal - instead of the transition/prior. - - Args: - state: The previous state of the model, a TrainableVRNNState containing - the previous rnn state, the previous rnn outs, and the previous encoded - latent. - t: A scalar integer Tensor, the current timestep. - Returns: - weights: A float Tensor of shape [batch_size]. - new_state: The new state of the model. - """ - inputs = self.inputs_ta.read(t) - targets = self.targets_ta.read(t) - rnn_out, next_rnn_state = self.run_rnn(state.rnn_state, - state.latent_encoded, - inputs) - p_zt = self.transition(rnn_out) - q_zt = self.proposal(rnn_out, p_zt, t) - zt = q_zt.sample(seed=self.random_seed) - p_xt_given_zt, latent_encoded = self.emission(zt, rnn_out) - log_p_xt_given_zt = tf.reduce_sum(p_xt_given_zt.log_prob(targets), axis=-1) - log_p_zt = tf.reduce_sum(p_zt.log_prob(zt), axis=-1) - log_q_zt = tf.reduce_sum(q_zt.log_prob(zt), axis=-1) - weights = log_p_zt + log_p_xt_given_zt - log_q_zt - if self._tilt: - prev_log_r = tf.cond( - tf.greater(t, 0), - lambda: self.tilt(state.rnn_out, state.latent_encoded, targets), - lambda: 0.) # On the first step, prev_log_r = 0. - log_r = tf.cond( - tf.less(t + 1, self.max_seq_len), - lambda: self.tilt(rnn_out, latent_encoded, self.targets_ta.read(t+1)), - lambda: 0.) - # On the last step, log_r = 0. - log_r *= tf.to_float(t < self.seq_lengths - 1) - weights += log_r - prev_log_r - new_state = TrainableVRNNState(rnn_state=next_rnn_state, - rnn_out=rnn_out, - latent_encoded=latent_encoded) - return weights, new_state - - -_DEFAULT_INITIALIZERS = {"w": tf.contrib.layers.xavier_initializer(), - "b": tf.zeros_initializer()} - - -def create_vrnn( - data_size, - latent_size, - emission_class, - rnn_hidden_size=None, - fcnet_hidden_sizes=None, - encoded_data_size=None, - encoded_latent_size=None, - sigma_min=0.0, - raw_sigma_bias=0.25, - emission_bias_init=0.0, - use_tilt=False, - proposal_type="filtering", - initializers=None, - random_seed=None): - """A factory method for creating VRNN cells. - - Args: - data_size: The dimension of the vectors that make up the data sequences. - latent_size: The size of the stochastic latent state of the VRNN. - emission_class: The class of the emission distribution. Can be either - ConditionalNormalDistribution or ConditionalBernoulliDistribution. - rnn_hidden_size: The hidden state dimension of the RNN that forms the - deterministic part of this VRNN. If None, then it defaults - to latent_size. - fcnet_hidden_sizes: A list of python integers, the size of the hidden - layers of the fully connected networks that parameterize the conditional - distributions of the VRNN. If None, then it defaults to one hidden - layer of size latent_size. - encoded_data_size: The size of the output of the data encoding network. If - None, defaults to latent_size. - encoded_latent_size: The size of the output of the latent state encoding - network. If None, defaults to latent_size. - sigma_min: The minimum value that the standard deviation of the - distribution over the latent state can take. - raw_sigma_bias: A scalar that is added to the raw standard deviation - output from the neural networks that parameterize the prior and - approximate posterior. Useful for preventing standard deviations close - to zero. - emission_bias_init: A bias to added to the raw output of the fully - connected network that parameterizes the emission distribution. Useful - for initalizing the mean of the distribution to a sensible starting point - such as the mean of the training data. Only used with Bernoulli generative - distributions. - use_tilt: If true, create a VRNN with a tilting function. - proposal_type: The type of proposal to use. Can be "filtering", "smoothing", - or "prior". - initializers: The variable intitializers to use for the fully connected - networks and RNN cell. Must be a dictionary mapping the keys 'w' and 'b' - to the initializers for the weights and biases. Defaults to xavier for - the weights and zeros for the biases when initializers is None. - random_seed: A random seed for the VRNN resampling operations. - Returns: - model: A TrainableVRNN object. - """ - if rnn_hidden_size is None: - rnn_hidden_size = latent_size - if fcnet_hidden_sizes is None: - fcnet_hidden_sizes = [latent_size] - if encoded_data_size is None: - encoded_data_size = latent_size - if encoded_latent_size is None: - encoded_latent_size = latent_size - if initializers is None: - initializers = _DEFAULT_INITIALIZERS - data_encoder = snt.nets.MLP( - output_sizes=fcnet_hidden_sizes + [encoded_data_size], - initializers=initializers, - name="data_encoder") - latent_encoder = snt.nets.MLP( - output_sizes=fcnet_hidden_sizes + [encoded_latent_size], - initializers=initializers, - name="latent_encoder") - transition = base.ConditionalNormalDistribution( - size=latent_size, - hidden_layer_sizes=fcnet_hidden_sizes, - sigma_min=sigma_min, - raw_sigma_bias=raw_sigma_bias, - initializers=initializers, - name="prior") - # Construct the emission distribution. - if emission_class == base.ConditionalBernoulliDistribution: - # For Bernoulli distributed outputs, we initialize the bias so that the - # network generates on average the mean from the training set. - emission_dist = functools.partial(base.ConditionalBernoulliDistribution, - bias_init=emission_bias_init) - else: - emission_dist = base.ConditionalNormalDistribution - emission = emission_dist( - size=data_size, - hidden_layer_sizes=fcnet_hidden_sizes, - initializers=initializers, - name="generative") - # Construct the proposal distribution. - if proposal_type in ["filtering", "smoothing"]: - proposal = base.NormalApproximatePosterior( - size=latent_size, - hidden_layer_sizes=fcnet_hidden_sizes, - sigma_min=sigma_min, - raw_sigma_bias=raw_sigma_bias, - initializers=initializers, - smoothing=(proposal_type == "smoothing"), - name="approximate_posterior") - else: - proposal = None - - if use_tilt: - tilt = emission_dist( - size=data_size, - hidden_layer_sizes=fcnet_hidden_sizes, - initializers=initializers, - name="tilt") - else: - tilt = None - - rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_hidden_size, - initializer=initializers["w"]) - rev_rnn_cell = tf.nn.rnn_cell.LSTMCell(rnn_hidden_size, - initializer=initializers["w"]) - return TrainableVRNN( - rnn_cell, data_encoder, latent_encoder, transition, - emission, proposal_type, proposal=proposal, rev_rnn_cell=rev_rnn_cell, - tilt=tilt, random_seed=random_seed) diff --git a/research/fivo/fivo/models/vrnn_test.py b/research/fivo/fivo/models/vrnn_test.py deleted file mode 100644 index 2d9bde3d5b6c6f66a82bd331cf50a87737864239..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/models/vrnn_test.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.models.vrnn.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import tensorflow as tf - -from fivo.models import base -from fivo.test_utils import create_vrnn - - -class VrnnTest(tf.test.TestCase): - - def test_vrnn_normal_emission(self): - self.run_vrnn(base.ConditionalNormalDistribution, [-4.509767, -3.242221]) - - def test_vrnn_bernoulli_emission(self): - self.run_vrnn(base.ConditionalBernoulliDistribution, [-2.63812733, -2.02216434]), - - def run_vrnn(self, generative_class, gt_log_p_x_given_z): - """Tests the VRNN. - - All test values are 'golden values' derived by running the code and copying - the output. - - Args: - generative_class: The class of the generative distribution to use. - gt_log_p_x_given_z: The ground-truth value of log p(x|z). - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - batch_size = 2 - model, inputs, targets, _ = create_vrnn(generative_class=generative_class, - batch_size=batch_size, - data_lengths=(1, 1), - random_seed=1234) - zero_state = model.zero_state(batch_size=batch_size, dtype=tf.float32) - model.set_observations([inputs, targets], tf.convert_to_tensor([1, 1])) - model_out = model.propose_and_weight(zero_state, 0) - sess.run(tf.global_variables_initializer()) - log_alpha, state = sess.run(model_out) - rnn_state, latent_state, rnn_out = state - self.assertAllClose( - rnn_state.c, - [[-0.15014534, 0.0143046, 0.00160489, -0.12899463], - [-0.25015137, 0.09377634, -0.05000039, -0.17123522]]) - self.assertAllClose( - rnn_state.h, - [[-0.06842659, 0.00760155, 0.00096106, -0.05434214], - [-0.1109542, 0.0441804, -0.03121299, -0.07882939]] - ) - self.assertAllClose( - latent_state, - [[0.025241, 0.122011, 1.066661, 0.316209, -0.25369, 0.108215, - -1.501128, -0.440111, -0.40447, -0.156649, 1.206028], - [0.066824, 0.519937, 0.610973, 0.977739, -0.121889, -0.223429, - -0.32687, -0.578763, -0.56965, 0.751886, 0.681606]] - ) - self.assertAllClose(rnn_out, [[-0.068427, 0.007602, 0.000961, -0.054342], - [-0.110954, 0.04418, -0.031213, -0.078829]]) - gt_log_q_z = [-8.0895052, -6.75819111] - gt_log_p_z = [-7.246827, -6.512877] - gt_log_alpha = (np.array(gt_log_p_z) + - np.array(gt_log_p_x_given_z) - - np.array(gt_log_q_z)) - self.assertAllClose(log_alpha, gt_log_alpha) - - def test_vrnn_with_tilt_normal_emission(self): - self.run_vrnn_with_tilt(base.ConditionalNormalDistribution, [-5.198263, -6.31686]) - - def test_vrnn_with_tilt_bernoulli_emission(self): - self.run_vrnn_with_tilt(base.ConditionalBernoulliDistribution, [-4.66985, -3.802245]) - - def run_vrnn_with_tilt(self, generative_class, gt_log_alpha): - """Tests the VRNN with a tilting function. - - All test values are 'golden values' derived by running the code and copying - the output. - - Args: - generative_class: The class of the generative distribution to use. - gt_log_alpha: The ground-truth value of log alpha. - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - batch_size = 2 - model, inputs, targets, _ = create_vrnn(generative_class=generative_class, - batch_size=batch_size, - data_lengths=(3, 2), - random_seed=1234, - use_tilt=True) - zero_state = model.zero_state(batch_size=batch_size, dtype=tf.float32) - model.set_observations([inputs, targets], tf.convert_to_tensor([3, 2])) - model_out = model.propose_and_weight(zero_state, 0) - sess.run(tf.global_variables_initializer()) - log_alpha, state = sess.run(model_out) - rnn_state, latent_state, rnn_out = state - self.assertAllClose( - rnn_state.c, - [[-0.15014534, 0.0143046, 0.00160489, -0.12899463], - [-0.25015137, 0.09377634, -0.05000039, -0.17123522]]) - self.assertAllClose( - rnn_state.h, - [[-0.06842659, 0.00760155, 0.00096106, -0.05434214], - [-0.1109542, 0.0441804, -0.03121299, -0.07882939]] - ) - self.assertAllClose( - latent_state, - [[0.025241, 0.122011, 1.066661, 0.316209, -0.25369, 0.108215, - -1.501128, -0.440111, -0.40447, -0.156649, 1.206028], - [0.066824, 0.519937, 0.610973, 0.977739, -0.121889, -0.223429, - -0.32687, -0.578763, -0.56965, 0.751886, 0.681606]] - ) - self.assertAllClose(rnn_out, [[-0.068427, 0.007602, 0.000961, -0.054342], - [-0.110954, 0.04418, -0.031213, -0.078829]]) - self.assertAllClose(log_alpha, gt_log_alpha) - -if __name__ == "__main__": - tf.test.main() diff --git a/research/fivo/fivo/nested_utils.py b/research/fivo/fivo/nested_utils.py deleted file mode 100644 index ef956a80c40d55331a3acbfe78111e099559ddea..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/nested_utils.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A set of utils for dealing with nested lists and tuples of Tensors.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import itertools -import tensorflow as tf - -from tensorflow.python.util import nest - - -def map_nested(map_fn, nested): - """Executes map_fn on every element in a (potentially) nested structure. - - Args: - map_fn: A callable to execute on each element in 'nested'. - nested: A potentially nested combination of sequence objects. Sequence - objects include tuples, lists, namedtuples, and all subclasses of - collections.Sequence except strings. See nest.is_sequence for details. - For example [1, ('hello', 4.3)] is a nested structure containing elements - 1, 'hello', and 4.3. - Returns: - out_structure: A potentially nested combination of sequence objects with the - same structure as the 'nested' input argument. out_structure - contains the result of applying map_fn to each element in 'nested'. For - example map_nested(lambda x: x+1, [1, (3, 4.3)]) returns [2, (4, 5.3)]. - """ - out = map(map_fn, nest.flatten(nested)) - return nest.pack_sequence_as(nested, out) - - -def tile_tensors(tensors, multiples): - """Tiles a set of Tensors. - - Args: - tensors: A potentially nested tuple or list of Tensors with rank - greater than or equal to the length of 'multiples'. The Tensors do not - need to have the same rank, but their rank must not be dynamic. - multiples: A python list of ints indicating how to tile each Tensor - in 'tensors'. Similar to the 'multiples' argument to tf.tile. - Returns: - tiled_tensors: A potentially nested tuple or list of Tensors with the same - structure as the 'tensors' input argument. Contains the result of - applying tf.tile to each Tensor in 'tensors'. When the rank of a Tensor - in 'tensors' is greater than the length of multiples, multiples is padded - at the end with 1s. For example when tiling a 4-dimensional Tensor with - multiples [3, 4], multiples would be padded to [3, 4, 1, 1] before tiling. - """ - def tile_fn(x): - return tf.tile(x, multiples + [1] * (x.shape.ndims - len(multiples))) - - return map_nested(tile_fn, tensors) - - -def where_tensors(condition, x_tensors, y_tensors): - """Performs a tf.where operation on a two sets of Tensors. - - Args: - condition: The condition tensor to use for the where operation. - x_tensors: A potentially nested tuple or list of Tensors. - y_tensors: A potentially nested tuple or list of Tensors. Must have the - same structure as x_tensors. - Returns: - whered_tensors: A potentially nested tuple or list of Tensors with the - same structure as the 'tensors' input argument. Contains the result of - applying tf.where(condition, x, y) on each pair of elements in x_tensors - and y_tensors. - """ - flat_x = nest.flatten(x_tensors) - flat_y = nest.flatten(y_tensors) - result = [tf.where(condition, x, y) for x, y in - itertools.izip(flat_x, flat_y)] - - return nest.pack_sequence_as(x_tensors, result) - - -def gather_tensors(tensors, indices): - """Performs a tf.gather operation on a set of Tensors. - - Args: - tensors: A potentially nested tuple or list of Tensors. - indices: The indices to use for the gather operation. - Returns: - gathered_tensors: A potentially nested tuple or list of Tensors with the - same structure as the 'tensors' input argument. Contains the result of - applying tf.gather(x, indices) on each element x in 'tensors'. - """ - return map_nested(lambda x: tf.gather(x, indices), tensors) - - -def tas_for_tensors(tensors, length, **kwargs): - """Unstacks a set of Tensors into TensorArrays. - - Args: - tensors: A potentially nested tuple or list of Tensors with length in the - first dimension greater than or equal to the 'length' input argument. - length: The desired length of the TensorArrays. - **kwargs: Keyword args for TensorArray constructor. - Returns: - tensorarrays: A potentially nested tuple or list of TensorArrays with the - same structure as 'tensors'. Contains the result of unstacking each Tensor - in 'tensors'. - """ - def map_fn(x): - ta = tf.TensorArray(x.dtype, length, - name=x.name.split(':')[0] + '_ta', **kwargs) - return ta.unstack(x[:length, :]) - return map_nested(map_fn, tensors) - - -def read_tas(tas, index): - """Performs a read operation on a set of TensorArrays. - - Args: - tas: A potentially nested tuple or list of TensorArrays with length greater - than 'index'. - index: The location to read from. - Returns: - read_tensors: A potentially nested tuple or list of Tensors with the same - structure as the 'tas' input argument. Contains the result of - performing a read operation at 'index' on each TensorArray in 'tas'. - """ - return map_nested(lambda ta: ta.read(index), tas) diff --git a/research/fivo/fivo/nested_utils_test.py b/research/fivo/fivo/nested_utils_test.py deleted file mode 100644 index 87991dd79cdb29d12944f9afa3fd0c5178dc4eb5..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/nested_utils_test.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.nested_utils.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import tensorflow as tf -nest = tf.contrib.framework.nest - -from fivo import nested_utils - -# An example namedtuple for use in the following tests. -ExampleTuple = collections.namedtuple('ExampleTuple', ['a', 'b']) - - -class NestedUtilsTest(tf.test.TestCase): - - def test_map_nested_works_on_nested_structures(self): - """Check that map_nested works with nested structures.""" - original = [1, (2, 3.2, (4., ExampleTuple(5, 6)))] - expected = [2, (3, 4.2, (5., ExampleTuple(6, 7)))] - out = nested_utils.map_nested(lambda x: x+1, original) - self.assertEqual(expected, out) - - def test_map_nested_works_on_single_objects(self): - """Check that map_nested works with raw objects.""" - original = 1 - expected = 2 - out = nested_utils.map_nested(lambda x: x+1, original) - self.assertEqual(expected, out) - - def test_map_nested_works_on_flat_lists(self): - """Check that map_nested works with a flat list.""" - original = [1, 2, 3] - expected = [2, 3, 4] - out = nested_utils.map_nested(lambda x: x+1, original) - self.assertEqual(expected, out) - - def test_tile_tensors(self): - """Checks that tile_tensors correctly tiles tensors of different ranks.""" - a = tf.range(20) - b = tf.reshape(a, [2, 10]) - c = tf.reshape(a, [2, 2, 5]) - a_tiled = tf.tile(a, [3]) - b_tiled = tf.tile(b, [3, 1]) - c_tiled = tf.tile(c, [3, 1, 1]) - tensors = [a, (b, ExampleTuple(c, c))] - expected_tensors = [a_tiled, (b_tiled, ExampleTuple(c_tiled, c_tiled))] - tiled = nested_utils.tile_tensors(tensors, [3]) - nest.assert_same_structure(expected_tensors, tiled) - with self.test_session() as sess: - expected, out = sess.run([expected_tensors, tiled]) - expected = nest.flatten(expected) - out = nest.flatten(out) - # Check that the tiling is correct. - for x, y in zip(expected, out): - self.assertAllClose(x, y) - - def test_gather_tensors(self): - a = tf.reshape(tf.range(20), [5, 4]) - inds = [0, 0, 1, 4] - a_gathered = tf.gather(a, inds) - tensors = [a, (a, ExampleTuple(a, a))] - gt_gathered = [a_gathered, (a_gathered, - ExampleTuple(a_gathered, a_gathered))] - gathered = nested_utils.gather_tensors(tensors, inds) - nest.assert_same_structure(gt_gathered, gathered) - with self.test_session() as sess: - gt, out = sess.run([gt_gathered, gathered]) - gt = nest.flatten(gt) - out = nest.flatten(out) - # Check that the gathering is correct. - for x, y in zip(gt, out): - self.assertAllClose(x, y) - - def test_tas_for_tensors(self): - a = tf.reshape(tf.range(20), [5, 4]) - tensors = [a, (a, ExampleTuple(a, a))] - tas = nested_utils.tas_for_tensors(tensors, 5) - nest.assert_same_structure(tensors, tas) - # We can't pass TensorArrays to sess.run so instead we turn then back into - # tensors to check that they were created correctly. - stacked = nested_utils.map_nested(lambda x: x.stack(), tas) - with self.test_session() as sess: - gt, out = sess.run([tensors, stacked]) - gt = nest.flatten(gt) - out = nest.flatten(out) - # Check that the tas were created correctly. - for x, y in zip(gt, out): - self.assertAllClose(x, y) - - def test_read_tas(self): - a = tf.reshape(tf.range(20), [5, 4]) - a_read = a[3, :] - tensors = [a, (a, ExampleTuple(a, a))] - gt_read = [a_read, (a_read, ExampleTuple(a_read, a_read))] - tas = nested_utils.tas_for_tensors(tensors, 5) - tas_read = nested_utils.read_tas(tas, 3) - nest.assert_same_structure(tas, tas_read) - with self.test_session() as sess: - gt, out = sess.run([gt_read, tas_read]) - gt = nest.flatten(gt) - out = nest.flatten(out) - # Check that the tas were read correctly. - for x, y in zip(gt, out): - self.assertAllClose(x, y) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/fivo/fivo/runners.py b/research/fivo/fivo/runners.py deleted file mode 100644 index ec6fb91bf51fa2c7c44d7402e635d257f80c3f7a..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/runners.py +++ /dev/null @@ -1,489 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""High-level code for creating and running FIVO-related Tensorflow graphs. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import os -import time - -import numpy as np -import tensorflow as tf - -from fivo import bounds -from fivo import smc - -from fivo.data import datasets -from fivo.models import base -from fivo.models import srnn -from fivo.models import vrnn - - -def create_dataset_and_model(config, split, shuffle, repeat): - """Creates the dataset and model for a given config. - - Args: - config: A configuration object with config values accessible as properties. - Most likely a FLAGS object. This function expects the properties - batch_size, dataset_path, dataset_type, and latent_size to be defined. - split: The dataset split to load. - shuffle: If true, shuffle the dataset randomly. - repeat: If true, repeat the dataset endlessly. - Returns: - inputs: A batch of input sequences represented as a dense Tensor of shape - [time, batch_size, data_dimension]. - targets: A batch of target sequences represented as a dense Tensor of - shape [time, batch_size, data_dimension]. - lens: An int Tensor of shape [batch_size] representing the lengths of each - sequence in the batch. - model: A vrnn.VRNNCell model object. - Raises: - ValueError: if the config is invalid. - """ - sigma_min = 0.0 - if config.dataset_type == "pianoroll": - inputs, targets, lengths, mean = datasets.create_pianoroll_dataset( - config.dataset_path, split, config.batch_size, shuffle=shuffle, - repeat=repeat) - # Convert the mean of the training set to logit space so it can be used to - # initialize the bias of the generative distribution. - emission_bias_init = -tf.log( - 1. / tf.clip_by_value(mean, 0.0001, 0.9999) - 1) - emission_distribution_class = base.ConditionalBernoulliDistribution - elif config.dataset_type == "speech": - inputs, targets, lengths = datasets.create_speech_dataset( - config.dataset_path, config.batch_size, - samples_per_timestep=config.data_dimension, prefetch_buffer_size=1, - shuffle=False, repeat=False) - # There is no bias for the generative distribution because the test set - # is assumed to be already standardized with the training set statistics. - mean = None - emission_bias_init = None - emission_distribution_class = base.ConditionalNormalDistribution - if config.model == "vrnn": - model = vrnn.create_vrnn(inputs.get_shape().as_list()[2], - config.latent_size, - emission_distribution_class, - emission_bias_init=emission_bias_init, - proposal_type=config.proposal_type, - sigma_min=sigma_min, - raw_sigma_bias=0.5, - use_tilt=(config.bound == "fivo-aux")) - elif config.model == "srnn": - model = srnn.create_srnn(inputs.get_shape().as_list()[2], - config.latent_size, - emission_distribution_class, - emission_bias_init=emission_bias_init, - proposal_type=config.proposal_type, - sigma_min=sigma_min, - raw_sigma_bias=0.5, - use_tilt=(config.bound == "fivo-aux")) - else: - raise ValueError("model flag: %s is unrecognized" % config.model) - return inputs, targets, lengths, model, mean - - -def restore_checkpoint_if_exists(saver, sess, logdir): - """Looks for a checkpoint and restores the session from it if found. - - Args: - saver: A tf.train.Saver for restoring the session. - sess: A TensorFlow session. - logdir: The directory to look for checkpoints in. - Returns: - True if a checkpoint was found and restored, False otherwise. - """ - checkpoint = tf.train.get_checkpoint_state(logdir) - if checkpoint: - checkpoint_name = os.path.basename(checkpoint.model_checkpoint_path) - full_checkpoint_path = os.path.join(logdir, checkpoint_name) - saver.restore(sess, full_checkpoint_path) - return True - return False - - -def wait_for_checkpoint(saver, sess, logdir): - """Loops until the session is restored from a checkpoint in logdir. - - Args: - saver: A tf.train.Saver for restoring the session. - sess: A TensorFlow session. - logdir: The directory to look for checkpoints in. - """ - while not restore_checkpoint_if_exists(saver, sess, logdir): - tf.logging.info("Checkpoint not found in %s, sleeping for 60 seconds." - % logdir) - time.sleep(60) - - -def run_train(config, create_dataset_and_model_fn=create_dataset_and_model): - """Runs training for a sequential latent variable model. - - Args: - config: A configuration object with config values accessible as properties. - Most likely a FLAGS object. For a list of expected properties and their - meaning see the flags defined in fivo.py. - create_dataset_and_model_fn: If present, calls this function to create a - dataset and model instead of create_dataset_and_model() above. The - signature must be the same. - """ - - def create_logging_hook(step, bound_value): - """Creates a logging hook that prints the bound value periodically.""" - bound_label = config.bound + " bound" - if config.normalize_by_seq_len: - bound_label += " per timestep" - else: - bound_label += " per sequence" - def summary_formatter(log_dict): - return "Step %d, %s: %f" % ( - log_dict["step"], bound_label, log_dict["bound_value"]) - logging_hook = tf.train.LoggingTensorHook( - {"step": step, "bound_value": bound_value}, - every_n_iter=config.summarize_every, - formatter=summary_formatter) - return logging_hook - - def create_loss(): - """Creates the loss to be optimized. - - Returns: - bound: A float Tensor containing the value of the bound that is - being optimized. - loss: A float Tensor that when differentiated yields the gradients - to apply to the model. Should be optimized via gradient descent. - """ - inputs, targets, lengths, model, _ = create_dataset_and_model_fn( - config, split="train", shuffle=True, repeat=True) - # Compute lower bounds on the log likelihood. - if config.bound == "elbo": - ll_per_seq, _, _ = bounds.iwae( - model, (inputs, targets), lengths, num_samples=1, - parallel_iterations=config.parallel_iterations - ) - elif config.bound == "iwae": - ll_per_seq, _, _ = bounds.iwae( - model, (inputs, targets), lengths, num_samples=config.num_samples, - parallel_iterations=config.parallel_iterations - ) - elif config.bound in ("fivo", "fivo-aux"): - if config.resampling_type == "relaxed": - ll_per_seq, _, _, _ = bounds.fivo( - model, (inputs, targets), - lengths, - num_samples=config.num_samples, - resampling_criterion=smc.ess_criterion, - resampling_type=config.resampling_type, - random_seed=config.random_seed, - relaxed_resampling_temperature=config. - relaxed_resampling_temperature, - parallel_iterations=config.parallel_iterations - ) - else: - ll_per_seq, _, _, _ = bounds.fivo( - model, (inputs, targets), lengths, num_samples=config.num_samples, - resampling_criterion=smc.ess_criterion, - resampling_type=config.resampling_type, - random_seed=config.random_seed, - parallel_iterations=config.parallel_iterations - ) - # Compute loss scaled by number of timesteps. - ll_per_t = tf.reduce_mean(ll_per_seq / tf.to_float(lengths)) - ll_per_seq = tf.reduce_mean(ll_per_seq) - - tf.summary.scalar("train_ll_per_seq", ll_per_seq) - tf.summary.scalar("train_ll_per_t", ll_per_t) - - if config.normalize_by_seq_len: - return ll_per_t, -ll_per_t - else: - return ll_per_seq, -ll_per_seq - - def create_graph(): - """Creates the training graph.""" - global_step = tf.train.get_or_create_global_step() - bound, loss = create_loss() - opt = tf.train.AdamOptimizer(config.learning_rate) - grads = opt.compute_gradients(loss, var_list=tf.trainable_variables()) - train_op = opt.apply_gradients(grads, global_step=global_step) - return bound, train_op, global_step - - device = tf.train.replica_device_setter(ps_tasks=config.ps_tasks) - with tf.Graph().as_default(): - if config.random_seed: tf.set_random_seed(config.random_seed) - with tf.device(device): - bound, train_op, global_step = create_graph() - log_hook = create_logging_hook(global_step, bound) - start_training = not config.stagger_workers - with tf.train.MonitoredTrainingSession( - master=config.master, - is_chief=config.task == 0, - hooks=[log_hook], - checkpoint_dir=config.logdir, - save_checkpoint_secs=120, - save_summaries_steps=config.summarize_every, - log_step_count_steps=config.summarize_every) as sess: - cur_step = -1 - while not sess.should_stop() and cur_step <= config.max_steps: - if config.task > 0 and not start_training: - cur_step = sess.run(global_step) - tf.logging.info("task %d not active yet, sleeping at step %d" % - (config.task, cur_step)) - time.sleep(30) - if cur_step >= config.task * 1000: - start_training = True - else: - _, cur_step = sess.run([train_op, global_step]) - - -def run_eval(config, create_dataset_and_model_fn=create_dataset_and_model): - """Runs evaluation for a sequential latent variable model. - - This method runs only one evaluation over the dataset, writes summaries to - disk, and then terminates. It does not loop indefinitely. - - Args: - config: A configuration object with config values accessible as properties. - Most likely a FLAGS object. For a list of expected properties and their - meaning see the flags defined in fivo.py. - create_dataset_and_model_fn: If present, calls this function to create a - dataset and model instead of create_dataset_and_model() above. The - signature must be the same. - """ - - def create_graph(): - """Creates the evaluation graph. - - Returns: - lower_bounds: A tuple of float Tensors containing the values of the 3 - evidence lower bounds, summed across the batch. - total_batch_length: The total number of timesteps in the batch, summed - across batch examples. - batch_size: The batch size. - global_step: The global step the checkpoint was loaded from. - """ - global_step = tf.train.get_or_create_global_step() - inputs, targets, lengths, model, _ = create_dataset_and_model_fn( - config, split=config.split, shuffle=False, repeat=False) - # Compute lower bounds on the log likelihood. - elbo_ll_per_seq, _, _ = bounds.iwae( - model, (inputs, targets), lengths, num_samples=1, - parallel_iterations=config.parallel_iterations - ) - iwae_ll_per_seq, _, _ = bounds.iwae( - model, (inputs, targets), lengths, num_samples=config.num_samples, - parallel_iterations=config.parallel_iterations - ) - # The resampling type should only be used for training, so we ignore it. - fivo_ll_per_seq, _, _, _ = bounds.fivo( - model, (inputs, targets), lengths, num_samples=config.num_samples, - resampling_criterion=smc.ess_criterion, random_seed=config.random_seed, - parallel_iterations=config.parallel_iterations - ) - elbo_ll = tf.reduce_sum(elbo_ll_per_seq) - iwae_ll = tf.reduce_sum(iwae_ll_per_seq) - fivo_ll = tf.reduce_sum(fivo_ll_per_seq) - batch_size = tf.shape(lengths)[0] - total_batch_length = tf.reduce_sum(lengths) - return ((elbo_ll, iwae_ll, fivo_ll), total_batch_length, batch_size, - global_step) - - def average_bounds_over_dataset(lower_bounds, total_batch_length, batch_size, - sess): - """Computes the values of the bounds, averaged over the datset. - - Args: - lower_bounds: Tuple of float Tensors containing the values of the bounds - evaluated on a single batch. - total_batch_length: Integer Tensor that represents the total number of - timesteps in the current batch. - batch_size: Integer Tensor containing the batch size. This can vary if the - requested batch_size does not evenly divide the size of the dataset. - sess: A TensorFlow Session object. - Returns: - ll_per_t: A length 3 numpy array of floats containing each bound's average - value, normalized by the total number of timesteps in the datset. Can - be interpreted as a lower bound on the average log likelihood per - timestep in the dataset. - ll_per_seq: A length 3 numpy array of floats containing each bound's - average value, normalized by the number of sequences in the dataset. - Can be interpreted as a lower bound on the average log likelihood per - sequence in the datset. - """ - total_ll = np.zeros(3, dtype=np.float64) - total_n_elems = 0.0 - total_length = 0.0 - while True: - try: - outs = sess.run([lower_bounds, batch_size, total_batch_length]) - except tf.errors.OutOfRangeError: - break - total_ll += outs[0] - total_n_elems += outs[1] - total_length += outs[2] - ll_per_t = total_ll / total_length - ll_per_seq = total_ll / total_n_elems - return ll_per_t, ll_per_seq - - def summarize_lls(lls_per_t, lls_per_seq, summary_writer, step): - """Creates log-likelihood lower bound summaries and writes them to disk. - - Args: - lls_per_t: An array of 3 python floats, contains the values of the - evaluated bounds normalized by the number of timesteps. - lls_per_seq: An array of 3 python floats, contains the values of the - evaluated bounds normalized by the number of sequences. - summary_writer: A tf.SummaryWriter. - step: The current global step. - """ - def scalar_summary(name, value): - value = tf.Summary.Value(tag=name, simple_value=value) - return tf.Summary(value=[value]) - - for i, bound in enumerate(["elbo", "iwae", "fivo"]): - per_t_summary = scalar_summary("%s/%s_ll_per_t" % (config.split, bound), - lls_per_t[i]) - per_seq_summary = scalar_summary("%s/%s_ll_per_seq" % - (config.split, bound), - lls_per_seq[i]) - summary_writer.add_summary(per_t_summary, global_step=step) - summary_writer.add_summary(per_seq_summary, global_step=step) - summary_writer.flush() - - with tf.Graph().as_default(): - if config.random_seed: tf.set_random_seed(config.random_seed) - lower_bounds, total_batch_length, batch_size, global_step = create_graph() - summary_dir = config.logdir + "/" + config.split - summary_writer = tf.summary.FileWriter( - summary_dir, flush_secs=15, max_queue=100) - saver = tf.train.Saver() - with tf.train.SingularMonitoredSession() as sess: - wait_for_checkpoint(saver, sess, config.logdir) - step = sess.run(global_step) - tf.logging.info("Model restored from step %d, evaluating." % step) - ll_per_t, ll_per_seq = average_bounds_over_dataset( - lower_bounds, total_batch_length, batch_size, sess) - summarize_lls(ll_per_t, ll_per_seq, summary_writer, step) - tf.logging.info("%s elbo ll/t: %f, iwae ll/t: %f fivo ll/t: %f", - config.split, ll_per_t[0], ll_per_t[1], ll_per_t[2]) - tf.logging.info("%s elbo ll/seq: %f, iwae ll/seq: %f fivo ll/seq: %f", - config.split, ll_per_seq[0], ll_per_seq[1], ll_per_seq[2]) - - -def run_sample(config, create_dataset_and_model_fn=create_dataset_and_model): - """Sample from the model. Only pianorolls and pose datasets are supported.""" - - def sample_from_model(model, initial_state, initial_inputs, mean): - """Samples a sequence of outputs from the model. - - The mean must be supplied -- if it isn't the results will be incorrect. - - Args: - model: A model with sample_step implemented. See models/vrnn.py for an - example. - initial_state: The initial state of the model. - initial_inputs: The initial inputs to feed into the model. - mean: The mean of the training set, a Tensor of shape [data_dimension]. - Returns: - samples: A Tensor of shape [sample_length, batch_size, num_timesteps, - data_dimension] containing the samples from the model. - """ - initial_state, initial_output = model.sample_step(initial_state, - initial_inputs, 0) - output_ta = tf.TensorArray(size=config.sample_length, - dtype=tf.float32, - dynamic_size=False, - clear_after_read=True) - output_ta = output_ta.write(0, initial_output) - t0 = tf.constant(1, dtype=tf.int32) - - def sample_step(t, state, prev_outputs, output_ta): - state, output = model.sample_step(state, prev_outputs, t) - output_ta = output_ta.write(t, output) - centered_output = output - mean[tf.newaxis, :] - return t+1, state, centered_output, output_ta - - def sample_predicate(t, *unused_args): - return t < config.sample_length - - _, _, _, output_ta = tf.while_loop( - sample_predicate, - sample_step, - loop_vars=(t0, initial_state, initial_output, output_ta), - parallel_iterations=config.parallel_iterations - ) - samples = output_ta.stack() - samples = tf.reshape(samples, [config.sample_length, config.batch_size, - config.num_samples, config.data_dimension]) - return samples - - def create_graph(): - """Creates the graph to sample from the model. - - First, the model is conditioned on a prefix by sampling a batch of data - and trimming it to prefix_length. The configured bound is used to do the - conditioning. Then the final state from the conditioning is used to sample - from the model. - - Returns: - samples: A Tensor of shape [sample_length, batch_size, - num_samples, data_dimension] representing samples from the model. - prefixes: A Tensor of shape [prefix_length, batch_size, data_dimension] - representing the prefixes the model was conditioned on. - """ - inputs, targets, lengths, model, mean = create_dataset_and_model_fn( - config, split=config.split, shuffle=True, repeat=True) - input_prefixes = inputs[:config.prefix_length] - target_prefixes = targets[:config.prefix_length] - prefix_lengths = tf.ones_like(lengths) * config.prefix_length - if config.bound == "elbo": - _, _, state = bounds.iwae( - model, (input_prefixes, target_prefixes), - prefix_lengths, num_samples=1) - elif config.bound == "iwae": - _, _, state = bounds.iwae( - model, (input_prefixes, target_prefixes), - prefix_lengths, num_samples=config.num_samples) - elif config.bound == "fivo": - _, _, _, state = bounds.fivo( - model, (input_prefixes, target_prefixes), prefix_lengths, - num_samples=config.num_samples, - resampling_criterion=smc.ess_criterion, - random_seed=config.random_seed) - sample_inputs = tf.tile(inputs[config.prefix_length], - [config.num_samples, 1]) - samples = sample_from_model(model, state, sample_inputs, mean) - return samples, target_prefixes - - with tf.Graph().as_default(): - if config.random_seed: - tf.set_random_seed(config.random_seed) - samples, prefixes = create_graph() - if config.sample_out_dir: - out_dir = config.sample_our_dir - else: - out_dir = config.logdir - if not tf.gfile.Exists(out_dir): - tf.gfile.MakeDirs(out_dir) - with tf.train.SingularMonitoredSession( - checkpoint_dir=config.logdir) as sess: - samples_out, prefixes_out = sess.run([samples, prefixes]) - with tf.gfile.Open(os.path.join(out_dir, "samples.npz"), "w") as fout: - np.save(fout, {"prefixes": prefixes_out, "samples": samples_out}) diff --git a/research/fivo/fivo/runners_test.py b/research/fivo/fivo/runners_test.py deleted file mode 100644 index eb050c0a0b38b2511f3d2fb9ec846e63ead3b5ac..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/runners_test.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.runners""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np -import tensorflow as tf - -from fivo import runners -from fivo.models import base -from fivo.models import vrnn - -FLAGS = tf.app.flags.FLAGS - - -class RunnersTest(tf.test.TestCase): - - def default_config(self): - class Config(object): - pass - config = Config() - config.model = "vrnn" - config.latent_size = 64 - config.batch_size = 4 - config.num_samples = 4 - config.resampling_type = "multinomial" - config.normalize_by_seq_len = True - config.learning_rate = 0.0001 - config.max_steps = int(1e6) - config.summarize_every = 50 - # Master must be "" to prevent state from persisting between sessions. - config.master = "" - config.task = 0 - config.ps_tasks = 0 - config.stagger_workers = True - config.random_seed = 1234 - config.parallel_iterations = 1 - config.dataset_type = "pianoroll" - config.data_dimension = None - config.dataset_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "test_data", "tiny_pianoroll.pkl") - config.proposal_type = "filtering" - return config - - def run_training_one_step(self, bound, dataset_type, data_dimension, - dataset_filename, dir_prefix, resampling_type, - model, batch_size=2, num_samples=3, - create_dataset_and_model_fn=(runners.create_dataset_and_model)): - config = self.default_config() - config.model = model - config.resampling_type = resampling_type - config.relaxed_resampling_temperature = 0.5 - config.bound = bound - config.split = "train" - config.dataset_type = dataset_type - config.dataset_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "test_data", - dataset_filename) - config.max_steps = 1 - config.batch_size = batch_size - config.num_samples = num_samples - config.latent_size = 4 - config.data_dimension = data_dimension - config.logdir = os.path.join(tf.test.get_temp_dir(), "%s-%s-%s-%s" % - (dir_prefix, bound, dataset_type, model)) - runners.run_train(config, - create_dataset_and_model_fn=create_dataset_and_model_fn) - return config - - def dummmy_dataset_and_model_fn(self, *unused_args, **unused_kwargs): - # We ignore the arguments in the dummy but need to preserve prototype. - batch_elements = 5 - sequence_length = 4 - data_dimensions = 3 - dataset = tf.data.Dataset.from_tensors( - tf.zeros((sequence_length, batch_elements, data_dimensions), - dtype=tf.float32)) - inputs = dataset.make_one_shot_iterator().get_next() - targets = tf.zeros_like(inputs) - lengths = tf.constant([sequence_length] * batch_elements) - mean = tf.constant((0.0, 0.0, 0.0)) - model = vrnn.create_vrnn(data_dimensions, 1, - base.ConditionalNormalDistribution) - return inputs, targets, lengths, model, mean - - def test_training_one_step_fivo_pianoroll_vrnn(self): - self.run_training_one_step("fivo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "multinomial", "vrnn") - - def test_training_one_step_iwae_pianoroll_vrnn(self): - self.run_training_one_step("iwae", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "multinomial", "vrnn") - - def test_training_one_step_elbo_pianoroll_vrnn(self): - self.run_training_one_step("elbo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "multinomial", "vrnn") - - def test_training_one_step_fivo_speech_vrnn(self): - self.run_training_one_step("fivo", "speech", 2, "tiny_speech_dataset.tfrecord", - "test-training", "multinomial", "vrnn") - - def test_training_one_step_iwae_speech_vrnn(self): - self.run_training_one_step("iwae", "speech", 2, "tiny_speech_dataset.tfrecord", - "test-training", "multinomial", "vrnn") - - def test_training_one_step_elbo_speech_vrnn(self): - self.run_training_one_step("elbo", "speech", 2, "tiny_speech_dataset.tfrecord", - "test-training", "multinomial", "vrnn") - - def test_training_one_step_fivo_pianoroll_srnn(self): - self.run_training_one_step("fivo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "multinomial", "srnn") - - def test_training_one_step_iwae_pianoroll_srnn(self): - self.run_training_one_step("iwae", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "multinomial", "srnn") - - def test_training_one_step_elbo_pianoroll_srnn(self): - self.run_training_one_step("elbo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "multinomial", "srnn") - - def test_training_one_step_fivo_speech_srnn(self): - self.run_training_one_step("fivo", "speech", 2, "tiny_speech_dataset.tfrecord", - "test-training", "multinomial", "srnn") - - def test_training_one_step_iwae_speech_srnn(self): - self.run_training_one_step("iwae", "speech", 2, "tiny_speech_dataset.tfrecord", - "test-training", "multinomial", "srnn") - - def test_training_one_step_elbo_speech_srnn(self): - self.run_training_one_step("elbo", "speech", 2, "tiny_speech_dataset.tfrecord", - "test-training", "multinomial", "srnn") - - def test_training_one_step_fivo_pianoroll_vrnn_relaxed(self): - self.run_training_one_step("fivo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "relaxed", "vrnn") - - def test_training_one_step_iwae_pianoroll_vrnn_relaxed(self): - self.run_training_one_step("iwae", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "relaxed", "vrnn") - - def test_training_one_step_elbo_pianoroll_vrnn_relaxed(self): - self.run_training_one_step("elbo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "relaxed", "vrnn") - - def test_training_one_step_fivo_pianoroll_srnn_relaxed(self): - self.run_training_one_step("fivo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "relaxed", "srnn") - - def test_training_one_step_iwae_pianoroll_srnn_relaxed(self): - self.run_training_one_step("iwae", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "relaxed", "srnn") - - def test_training_one_step_elbo_pianoroll_srnn_relaxed(self): - self.run_training_one_step("elbo", "pianoroll", 88, "tiny_pianoroll.pkl", - "test-training", "relaxed", "srnn") - - def test_eval_vrnn(self): - self.run_eval("vrnn") - - def test_eval_srnn(self): - self.run_eval("srnn") - - def run_eval(self, model): - config = self.run_training_one_step( - "fivo", "pianoroll", 88, "tiny_pianoroll.pkl", "test-eval-" + model, - "multinomial", model) - config.split = "train" - runners.run_eval(config) - - def test_sampling_vrnn(self): - self.run_sampling("vrnn") - - def test_sampling_srnn(self): - self.run_sampling("srnn") - - def run_sampling(self, model): - """Test sampling from the model.""" - config = self.run_training_one_step( - "fivo", "pianoroll", 88, "tiny_pianoroll.pkl", "test-sampling", "multinomial", - model) - config.prefix_length = 3 - config.sample_length = 6 - config.split = "train" - config.sample_out_dir = None - - runners.run_sample(config) - unused_samples = np.load(os.path.join(config.logdir, "samples.npz")) - - def test_training_with_custom_fn(self): - self.run_training_one_step( - "fivo", "pianoroll", 3, "tiny_pianoroll.pkl", - "test-training-custom-fn", "multinomial", "vrnn", batch_size=5, - create_dataset_and_model_fn=self.dummmy_dataset_and_model_fn) - - def test_eval_with_custom_fn(self): - config = self.run_training_one_step( - "fivo", "pianoroll", 1, "tiny_pianoroll.pkl", - "test-eval-custom-fn", "multinomial", "vrnn", batch_size=1, - create_dataset_and_model_fn=self.dummmy_dataset_and_model_fn) - config.split = "train" - runners.run_eval( - config, - create_dataset_and_model_fn=self.dummmy_dataset_and_model_fn) - - def test_sampling_with_custom_fn(self): - config = self.run_training_one_step( - "fivo", "pianoroll", 3, "tiny_pianoroll.pkl", - "test-sample-custom-fn", "multinomial", "vrnn", batch_size=5, - create_dataset_and_model_fn=self.dummmy_dataset_and_model_fn) - config.prefix_length = 2 - config.sample_length = 3 - config.split = "train" - config.sample_out_dir = None - - runners.run_sample( - config, - create_dataset_and_model_fn=self.dummmy_dataset_and_model_fn) - unused_samples = np.load(os.path.join(config.logdir, "samples.npz")) - - -if __name__ == "__main__": - tf.test.main() diff --git a/research/fivo/fivo/smc.py b/research/fivo/fivo/smc.py deleted file mode 100644 index 25d4969043e2cb8bc2c2c7a3770d3d2dfcca0bef..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/smc.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Implementation of sequential Monte Carlo algorithms. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -import fivo.nested_utils as nested - - -def ess_criterion(log_weights, unused_t): - """A criterion that resamples based on effective sample size.""" - num_particles = tf.shape(log_weights)[0] - # Calculate the effective sample size. - ess_num = 2 * tf.reduce_logsumexp(log_weights, axis=0) - ess_denom = tf.reduce_logsumexp(2 * log_weights, axis=0) - log_ess = ess_num - ess_denom - return log_ess <= tf.log(tf.to_float(num_particles) / 2.0) - - -def never_resample_criterion(log_weights, unused_t): - """A criterion that never resamples.""" - batch_size = tf.shape(log_weights)[1] - return tf.cast(tf.zeros([batch_size]), tf.bool) - - -def always_resample_criterion(log_weights, unused_t): - """A criterion resamples at every timestep.""" - batch_size = tf.shape(log_weights)[1] - return tf.cast(tf.ones([batch_size]), tf.bool) - - -def multinomial_resampling(log_weights, states, num_particles, batch_size, - random_seed=None): - """Resample states with multinomial resampling. - - Args: - log_weights: A [num_particles, batch_size] Tensor representing a batch - of batch_size logits for num_particles-ary Categorical distribution. - states: A nested list of [batch_size*num_particles, data_size] Tensors that - will be resampled from the groups of every num_particles-th row. - num_particles: The number of particles/samples. - batch_size: The batch size. - random_seed: The random seed to pass to the resampling operations in - the particle filter. Mainly useful for testing. - - Returns: - resampled_states: A nested list of [batch_size*num_particles, data_size] - Tensors resampled via multinomial sampling. - """ - # Calculate the ancestor indices via resampling. Because we maintain the - # log unnormalized weights, we pass the weights in as logits, allowing - # the distribution object to apply a softmax and normalize them. - resampling_parameters = tf.transpose(log_weights, perm=[1, 0]) - resampling_dist = tf.contrib.distributions.Categorical( - logits=resampling_parameters) - ancestors = tf.stop_gradient( - resampling_dist.sample(sample_shape=num_particles, seed=random_seed)) - - # Because the batch is flattened, we must modify ancestor_inds to index the - # proper samples. The particles in the ith filter are distributed every - # batch_size rows in the batch, and offset i rows from the top. So, to - # correct the indices we multiply by the batch_size and add the proper offset. - # Crucially, when ancestor_inds is flattened the layout of the batch is - # maintained. - offset = tf.expand_dims(tf.range(batch_size), 0) - ancestor_inds = tf.reshape(ancestors * batch_size + offset, [-1]) - - resampled_states = nested.gather_tensors(states, ancestor_inds) - return resampled_states - - -def _blend_tensor(blending_weights, tensor, num_particles, batch_size): - """Blend tensor according to the weights. - - The first dimension of tensor is actually a 2d index compacted to a 1d - index and similarly for blended_tensor. So if we index these Tensors - by [(i, j), k], then - - blended_tensor[(i, j), k] = - sum_l tensor[(l, j), :] * blending_weights[i, j, l]. - - Args: - blending_weights: [num_particles, batch_size, num_particles] weights where - the indices represent [sample index, batch index, blending weight index]. - tensor: [num_particles * batch_size, state_dim] Tensor to be blended. - num_particles: The number of particles/samples. - batch_size: The batch size. - - Returns: - blended_tensor: [num_particles*batch_size, state_dim] blended Tensor. - """ - # tensor is currently [num_particles * batch_size, state_dim], so we reshape - # it to [num_particles, batch_size, state_dim]. Then, transpose it to - # [batch_size, state_size, num_particles]. - tensor = tf.transpose( - tf.reshape(tensor, [num_particles, batch_size, -1]), perm=[1, 2, 0]) - blending_weights = tf.transpose(blending_weights, perm=[1, 2, 0]) - # blendeding_weights is [batch index, blending weight index, sample index]. - # Multiplying these gives a matrix of size [batch_size, state_size, - # num_particles]. - tensor = tf.matmul(tensor, blending_weights) - # transpose the tensor to be [num_particles, batch_size, state_size] - # and then reshape it to match the original format. - tensor = tf.reshape(tf.transpose(tensor, perm=[2, 0, 1]), - [num_particles*batch_size, -1]) - return tensor - - -def relaxed_resampling(log_weights, states, num_particles, batch_size, - temperature=0.5, random_seed=None): - """Resample states with relaxed resampling. - - Draw soft "ancestors" using the Gumbel-Softmax distribution. - - Args: - log_weights: A [num_particles, batch_size] Tensor representing a batch - of batch_size logits for num_particles-ary Categorical distribution. - states: A nested list of [batch_size * num_particles, d] Tensors that will - be resampled from the groups of every num_particles-th row. - num_particles: The number of particles/samples. - batch_size: The batch size. - temperature: The temperature used for the relaxed one hot distribution. - random_seed: The random seed to pass to the resampling operations in - the particle filter. Mainly useful for testing. - - Returns: - resampled_states: A nested list of [batch_size * num_particles, d] - Tensors resampled via multinomial sampling. - """ - # log_weights are [num_particles, batch_size], so we transpose to get a - # set of batch_size distributions over [0, num_particles). - resampling_parameters = tf.transpose(log_weights, perm=[1, 0]) - resampling_dist = tf.contrib.distributions.RelaxedOneHotCategorical( - temperature, - logits=resampling_parameters) - - # Sample num_particles samples from the distribution, resulting in a - # [num_particles, batch_size, num_particles] Tensor that represents a set of - # [num_particles, batch_size] blending weights. The dimensions represent - # [particle index, batch index, blending weight index]. - ancestors = resampling_dist.sample(sample_shape=num_particles, - seed=random_seed) - def map_fn(tensor): - return _blend_tensor(ancestors, tensor, num_particles, batch_size) - - resampled_states = nested.map_nested(map_fn, states) - return resampled_states - - -def smc( - transition_fn, - num_steps, - num_particles=1, - resampling_criterion=ess_criterion, - resampling_fn=multinomial_resampling, - loop_fn=None, - parallel_iterations=30, - swap_memory=True): - """Run a sequential Monte Carlo (SMC) algorithm. - - This method runs an SMC algorithm that evolves systems of particles - using the supplied transition function for the specified number of steps. The - particles are optionally resampled using resampling_fn when indicated by - resampling_criterion. - - Args: - transition_fn: A callable that propogates a batch of particles one step. - Must accept as arguments a batch of particle states and the current - timestep. Must return the particle states one timestep in the future, the - incremental weights of each particle as a [num_samples*batch_size] float - Tensor, and optionally a set of arguments to pass to the loop_fn. If - the loop args are not provided, they will be set to None. Before the - first timestep transition_fn will be called with the arguments None, -1 - and should return the initial particle states. - num_steps: A [batch_size] Tensor of ints representing the number of steps - to run each filter for. - num_particles: A scalar int, the number of particles to use in each filter. - resampling_criterion: The resampling criterion to use for this particle - filter. Must accept the current log weights and timestep and - return a boolean Tensor of shape [batch_size] indicating whether each - particle filter should resample. See ess_criterion and related functions - for examples. When resampling_criterion is never_resample_criterion, - resampling_fn is ignored and never called. - resampling_fn: A callable that performs the resampling operation. Must - accept as arguments the log weights, particle states, num_particles, - and batch_size and return the resampled particle states. See - multinomial_resampling and relaxed_resampling for examples. - loop_fn: A callable that performs operations on the weights and - particle states, useful for accumulating and processing state that - shouldn't be resampled. At each timestep after (possibly) resampling - loop_fn will be called with the previous loop_state, a set of arguments - produced by transition_fn called loop_args, the resampled particle states, - the current log weights as [num_particles, batch_size] float Tensor, a - [batch_size] float Tensor representing whether or not each filter - resampled, the current mask indicating which filters are active, and the - current timestep. It must return the next loop state. Before the first - timestep loop_fn will be called with the arguments None, None, None, None, - -1 and must return the initial loop state. The loop state can be a - possibly nested structure of Tensors and TensorArrays. - parallel_iterations: The number of parallel iterations to use for the - internal while loop. Note that values greater than 1 can introduce - non-determinism even when resampling is deterministic. - swap_memory: Whether GPU-CPU memory swapping should be enabled for the - internal while loop. - - Returns: - log_z_hat: A Tensor of shape [batch_size] containing an estimate of the log - normalizing constant that converts between the unormalized target - distribution (as defined by the weights) and the true target distribution. - log_weights: A Tensor of shape [max_num_steps, batch_size, num_particles] - containing the log weights at each timestep of the particle filter. - Will not be valid for timesteps past the supplied num_steps. - resampled: A float Tensor of shape [max_num_steps, batch_size] indicating - when the particle filters resampled. Will be 1.0 on timesteps when - resampling occurred and 0.0 on timesteps when it did not. - final_loop_state: The final state returned by loop_fn. If loop_fn is None - then 0 will be returned. - """ - # batch_size represents the number of particle filters running in parallel. - batch_size = tf.shape(num_steps)[0] - # Create a TensorArray where element t is the [num_particles*batch_size] - # sequence mask for timestep t. - max_num_steps = tf.reduce_max(num_steps) - seq_mask = tf.transpose( - tf.sequence_mask(num_steps, maxlen=max_num_steps, dtype=tf.float32), - perm=[1, 0]) - seq_mask = tf.tile(seq_mask, [1, num_particles]) - mask_ta = tf.TensorArray(seq_mask.dtype, - max_num_steps, - name='mask_ta') - mask_ta = mask_ta.unstack(seq_mask) - # Initialize the state. - t0 = tf.constant(0, tf.int32) - init_particle_state = transition_fn(None, -1) - - def transition(*args): - transition_outs = transition_fn(*args) - if len(transition_outs) == 2: - return transition_outs + (None,) - else: - return transition_outs - - if loop_fn is None: - loop_fn = lambda *args: 0 - - init_loop_state = loop_fn(None, None, None, None, None, None, -1) - init_states = (init_particle_state, init_loop_state) - ta_names = ['log_weights', 'resampled'] - tas = [tf.TensorArray(tf.float32, max_num_steps, name='%s_ta' % n) - for n in ta_names] - log_weights_acc = tf.zeros([num_particles, batch_size], dtype=tf.float32) - log_z_hat_acc = tf.zeros([batch_size], dtype=tf.float32) - - def while_predicate(t, *unused_args): - return t < max_num_steps - - def while_step(t, state, tas, log_weights_acc, log_z_hat_acc): - """Implements one timestep of the particle filter.""" - particle_state, loop_state = state - cur_mask = nested.read_tas(mask_ta, t) - # Propagate the particles one step. - log_alpha, new_particle_state, loop_args = transition(particle_state, t) - # Update the current weights with the incremental weights. - log_alpha *= cur_mask - log_alpha = tf.reshape(log_alpha, [num_particles, batch_size]) - log_weights_acc += log_alpha - - should_resample = resampling_criterion(log_weights_acc, t) - - if resampling_criterion == never_resample_criterion: - resampled = tf.to_float(should_resample) - else: - # Compute the states as if we did resample. - resampled_states = resampling_fn( - log_weights_acc, - new_particle_state, - num_particles, - batch_size) - # Decide whether or not we should resample; don't resample if we are past - # the end of a sequence. - should_resample = tf.logical_and(should_resample, - cur_mask[:batch_size] > 0.) - float_should_resample = tf.to_float(should_resample) - new_particle_state = nested.where_tensors( - tf.tile(should_resample, [num_particles]), - resampled_states, - new_particle_state) - resampled = float_should_resample - - new_loop_state = loop_fn(loop_state, loop_args, new_particle_state, - log_weights_acc, resampled, cur_mask, t) - # Update log Z hat. - log_z_hat_update = tf.reduce_logsumexp( - log_weights_acc, axis=0) - tf.log(tf.to_float(num_particles)) - # If it is the last timestep, always add the update. - log_z_hat_acc += tf.cond(t < max_num_steps - 1, - lambda: log_z_hat_update * resampled, - lambda: log_z_hat_update) - # Update the TensorArrays before we reset the weights so that we capture - # the incremental weights and not zeros. - ta_updates = [log_weights_acc, resampled] - new_tas = [ta.write(t, x) for ta, x in zip(tas, ta_updates)] - # For the particle filters that resampled, reset weights to zero. - log_weights_acc *= (1. - tf.tile(resampled[tf.newaxis, :], - [num_particles, 1])) - new_state = (new_particle_state, new_loop_state) - return t + 1, new_state, new_tas, log_weights_acc, log_z_hat_acc - - _, final_state, tas, _, log_z_hat = tf.while_loop( - while_predicate, - while_step, - loop_vars=(t0, init_states, tas, log_weights_acc, log_z_hat_acc), - parallel_iterations=parallel_iterations, - swap_memory=swap_memory) - - log_weights, resampled = [x.stack() for x in tas] - log_weights = tf.transpose(log_weights, perm=[0, 2, 1]) - final_particle_state, final_loop_state = final_state - return (log_z_hat, log_weights, resampled, - final_particle_state, final_loop_state) diff --git a/research/fivo/fivo/smc_test.py b/research/fivo/fivo/smc_test.py deleted file mode 100644 index ae32a62f21e037252bda44e3e1f47e007c9b7b9b..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/smc_test.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for fivo.smc.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import scipy -import tensorflow as tf - -from fivo import smc - -lse = scipy.special.logsumexp - - -def _simple_transition_fn(state, unused_t): - if state is None: - return tf.zeros([4], dtype=tf.float32) - return tf.constant([5., 4., 1., 0.5]), tf.zeros([4], dtype=tf.float32) - - -def _resample_at_step_criterion(step): - """A criterion that resamples once at a specific timestep.""" - def criterion(log_weights, t): - batch_size = tf.shape(log_weights)[1] - return tf.fill([batch_size], tf.equal(t, step)) - return criterion - - -class SMCTest(tf.test.TestCase): - - def test_never_resampling(self): - """Test that never_resample_criterion makes smc not resample. - - Also test that the weights and log_z_hat are computed correctly when never - resampling. - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - outs = smc.smc( - _simple_transition_fn, - num_steps=tf.convert_to_tensor([5, 3]), - num_particles=2, - resampling_criterion=smc.never_resample_criterion) - log_z_hat, weights, resampled = sess.run(outs[0:3]) - gt_weights = np.array( - [[[5, 1], [4, .5]], - [[10, 2], [8, 1]], - [[15, 3], [12, 1.5]], - [[20, 4], [12, 1.5]], - [[25, 5], [12, 1.5]]], - dtype=np.float32) - gt_log_z_hat = np.array( - [lse([25, 5]) - np.log(2), - lse([12, 1.5]) - np.log(2)], - dtype=np.float32) - self.assertAllClose(gt_log_z_hat, log_z_hat) - self.assertAllClose(gt_weights, weights) - self.assertAllEqual(np.zeros_like(resampled), resampled) - - def test_always_resampling(self): - """Test always_resample_criterion makes smc always resample. - - Past a sequence end the filter should not resample, however. - Also check that weights and log_z_hat estimate are correct. - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - outs = smc.smc( - _simple_transition_fn, - num_steps=tf.convert_to_tensor([5, 3]), - num_particles=2, - resampling_criterion=smc.always_resample_criterion) - log_z_hat, weights, resampled = sess.run(outs[0:3]) - gt_weights = np.array( - [[[5, 1], [4, .5]], - [[5, 1], [4, .5]], - [[5, 1], [4, .5]], - [[5, 1], [0., 0.]], - [[5, 1], [0., 0.]]], - dtype=np.float32) - gt_log_z_hat = np.array( - [5*lse([5, 1]) - 5*np.log(2), - 3*lse([4, .5]) - 3*np.log(2)], - dtype=np.float32) - gt_resampled = np.array( - [[1, 1], [1, 1], [1, 1], [1, 0], [1, 0]], - dtype=np.float32) - self.assertAllClose(gt_log_z_hat, log_z_hat) - self.assertAllClose(gt_weights, weights) - self.assertAllEqual(gt_resampled, resampled) - - def test_weights_reset_when_resampling_at_sequence_end(self): - """Test that the weights are reset when resampling at the sequence end. - - When resampling happens on the last timestep of a sequence the weights - should be set to zero on the next timestep and remain zero afterwards. - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - outs = smc.smc( - _simple_transition_fn, - num_steps=tf.convert_to_tensor([5, 3]), - num_particles=2, - resampling_criterion=_resample_at_step_criterion(2)) - log_z_hat, weights, resampled = sess.run(outs[0:3]) - gt_log_z = np.array( - [lse([15, 3]) + lse([10, 2]) - 2*np.log(2), - lse([12, 1.5]) - np.log(2)], - dtype=np.float32) - gt_resampled = np.array( - [[0, 0], [0, 0], [1, 1], [0, 0], [0, 0]], - dtype=np.float32) - gt_weights = np.array( - [[[5, 1], [4, .5]], - [[10, 2], [8, 1]], - [[15, 3], [12, 1.5]], - [[5, 1], [0, 0]], - [[10, 2], [0, 0]]], - dtype=np.float32) - self.assertAllClose(gt_log_z, log_z_hat) - self.assertAllEqual(gt_resampled, resampled) - self.assertAllEqual(gt_weights, weights) - - def test_weights_not_updated_past_sequence_end(self): - """Test that non-zero weights are not updated past the end of a sequence.""" - tf.set_random_seed(1234) - with self.test_session() as sess: - outs = smc.smc( - _simple_transition_fn, - num_steps=tf.convert_to_tensor([6, 4]), - num_particles=2, - resampling_criterion=_resample_at_step_criterion(1)) - log_z_hat, weights, resampled = sess.run(outs[0:3]) - gt_log_z_hat = np.array( - [lse([10, 2]) + lse([20, 4]) - 2*np.log(2), - lse([8, 1]) + lse([8, 1]) - 2*np.log(2)], - dtype=np.float32) - # Ensure that we only resample on the 2nd timestep. - gt_resampled = np.array( - [[0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0]], - dtype=np.float32) - # Ensure that the weights after the end of the sequence don't change. - # Ensure that the weights after resampling before the end of the sequence - # do change. - gt_weights = np.array( - [[[5, 1], [4, .5]], - [[10, 2], [8, 1]], - [[5, 1], [4, .5]], - [[10, 2], [8, 1]], - [[15, 3], [8, 1]], - [[20, 4], [8, 1]]], - dtype=np.float32) - self.assertAllClose(gt_log_z_hat, log_z_hat) - self.assertAllEqual(gt_resampled, resampled) - self.assertAllEqual(gt_weights, weights) - - def test_resampling_on_max_num_steps(self): - """Test that everything is correct when resampling on step max_num_steps. - - When resampling on step max_num_steps (i.e. the last step of the longest - sequence), ensure that there are no off-by-one errors preventing resampling - and also that the weights are not updated. - """ - tf.set_random_seed(1234) - with self.test_session() as sess: - outs = smc.smc( - _simple_transition_fn, - num_steps=tf.convert_to_tensor([4, 2]), - num_particles=2, - resampling_criterion=_resample_at_step_criterion(3)) - log_z_hat, weights, resampled = sess.run(outs[0:3]) - gt_log_z_hat = np.array( - [lse([20, 4]) - np.log(2), - lse([8, 1]) - np.log(2)], - dtype=np.float32) - # Ensure that we only resample on the 3rd timestep and that the second - # filter doesn't resample at all because it is only run for 2 steps. - gt_resampled = np.array( - [[0, 0], [0, 0], [0, 0], [1, 0]], - dtype=np.float32) - gt_weights = np.array( - [[[5, 1], [4, .5]], - [[10, 2], [8, 1]], - [[15, 3], [8, 1]], - [[20, 4], [8, 1]]], - dtype=np.float32) - self.assertAllClose(gt_log_z_hat, log_z_hat) - self.assertAllEqual(gt_resampled, resampled) - self.assertAllEqual(gt_weights, weights) - - def test_multinomial_resampling(self): - """Test that mulitnomial resampling selects the correct states.""" - tf.set_random_seed(1234) - with self.test_session() as sess: - # Setup input. - inf = 1000.0 # Very large value in log space. - num_samples = 2 - batch_size = 2 - log_weights = tf.convert_to_tensor([[inf, 0], [0, inf]]) - states = tf.convert_to_tensor([1, 2, 3, 4]) - # Run test. - resampled_states = smc.multinomial_resampling( - log_weights, states, num_samples, batch_size, random_seed=0) - resampled_states_values = sess.run(resampled_states) - self.assertAllEqual(resampled_states_values, [1, 4, 1, 4]) - - def test_blend_tensor(self): - """Test that relaxed resampling blends the correct states.""" - tf.set_random_seed(1234) - with self.test_session() as sess: - # Setup input. - num_samples = 2 - batch_size = 2 - blending_weights = tf.convert_to_tensor( - [[[0.5, 0.5], [0.25, 0.75]], [[0.75, 0.25], [0.5, 0.5]]]) - states = tf.convert_to_tensor([4., 8., 12., 16.]) - # Run test. - blended_states = smc._blend_tensor(blending_weights, states, - num_samples, batch_size) - blended_states_values = sess.run(blended_states) - self.assertAllClose(blended_states_values[:, 0], [8., 14., 6., 12.]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/fivo/fivo/test_data/tiny_pianoroll.pkl b/research/fivo/fivo/test_data/tiny_pianoroll.pkl deleted file mode 100644 index c5501c6ceac1a6601b5f1be4c422be4f2c1baa86..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/test_data/tiny_pianoroll.pkl +++ /dev/null @@ -1,10979 +0,0 @@ -(dp1 -S'train_mean' -p2 -cnumpy.core.multiarray -_reconstruct -p3 -(cnumpy -ndarray -p4 -(I0 -tS'b' -tRp5 -(I1 -(I88 -tcnumpy -dtype -p6 -(S'f8' -I0 -I1 -tRp7 -(I3 -S'<' -NNNI-1 -I-1 -I0 -tbI00 -S'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9e0^X\xbez,?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9e0^X\xbez\x00\x00\x00\x00\x00\x00\x00' -tRp101 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp102 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp103 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp104 -tp105 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp106 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp107 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp108 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp109 -tp110 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp111 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp112 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp113 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp114 -tp115 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp116 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp117 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp118 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp119 -tp120 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp121 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp122 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp123 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp124 -tp125 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp126 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp127 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp128 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp129 -tp130 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp131 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp132 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp133 -tp134 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp135 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp136 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp137 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp138 -tp139 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp140 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp141 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp142 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp143 -tp144 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp145 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp146 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp147 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp148 -tp149 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp150 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp151 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp152 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp153 -tp154 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp155 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp156 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp157 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp158 -tp159 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp160 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp161 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp162 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp163 -tp164 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp165 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp166 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp167 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp168 -tp169 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp170 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp171 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp172 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp173 -tp174 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp175 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp176 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp177 -tp178 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp179 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp180 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp181 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp182 -tp183 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp184 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp185 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp186 -tp187 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp188 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp189 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp190 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp191 -tp192 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp193 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp194 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp195 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp196 -tp197 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp198 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp199 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp200 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp201 -tp202 -a(g11 -(g12 -S':\x00\x00\x00\x00\x00\x00\x00' -tRp203 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp204 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp205 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp206 -tp207 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp208 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp209 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp210 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp211 -tp212 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp213 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp214 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp215 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp216 -tp217 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp218 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp219 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp220 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp221 -tp222 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp223 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp224 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp225 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp226 -tp227 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp228 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp229 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp230 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp231 -tp232 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp233 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp234 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp235 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp236 -tp237 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp238 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp239 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp240 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp241 -tp242 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp243 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp244 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp245 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp246 -tp247 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp248 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp249 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp250 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp251 -tp252 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp253 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp254 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp255 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp256 -tp257 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp258 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp259 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp260 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp261 -tp262 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp263 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp264 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp265 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp266 -tp267 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp268 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp269 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp270 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp271 -tp272 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp273 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp274 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp275 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp276 -tp277 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp278 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp279 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp280 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp281 -tp282 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp283 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp284 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp285 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp286 -tp287 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp288 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp289 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp290 -tp291 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp292 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp293 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp294 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp295 -tp296 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp297 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp298 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp299 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp300 -tp301 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp302 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp303 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp304 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp305 -tp306 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp307 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp308 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp309 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp310 -tp311 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp312 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp313 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp314 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp315 -tp316 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp317 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp318 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp319 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp320 -tp321 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp322 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp323 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp324 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp325 -tp326 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp327 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp328 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp329 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp330 -tp331 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp332 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp333 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp334 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp335 -tp336 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp337 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp338 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp339 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp340 -tp341 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp342 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp343 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp344 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp345 -tp346 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp347 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp348 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp349 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp350 -tp351 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp352 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp353 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp354 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp355 -tp356 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp357 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp358 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp359 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp360 -tp361 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp362 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp363 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp364 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp365 -tp366 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp367 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp368 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp369 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp370 -tp371 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp372 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp373 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp374 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp375 -tp376 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp377 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp378 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp379 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp380 -tp381 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp382 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp383 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp384 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp385 -tp386 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp387 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp388 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp389 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp390 -tp391 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp392 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp393 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp394 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp395 -tp396 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp397 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp398 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp399 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp400 -tp401 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp402 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp403 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp404 -tp405 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp406 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp407 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp408 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp409 -tp410 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp411 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp412 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp413 -tp414 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp415 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp416 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp417 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp418 -tp419 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp420 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp421 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp422 -tp423 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp424 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp425 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp426 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp427 -tp428 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp429 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp430 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp431 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp432 -tp433 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp434 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp435 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp436 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp437 -tp438 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp439 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp440 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp441 -tp442 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp443 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp444 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp445 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp446 -tp447 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp448 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp449 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp450 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp451 -tp452 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp453 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp454 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp455 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp456 -tp457 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp458 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp459 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp460 -tp461 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp462 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp463 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp464 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp465 -tp466 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp467 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp468 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp469 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp470 -tp471 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp472 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp473 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp474 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp475 -tp476 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp477 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp478 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp479 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp480 -tp481 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp482 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp483 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp484 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp485 -tp486 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp487 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp488 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp489 -tp490 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp491 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp492 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp493 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp494 -tp495 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp496 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp497 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp498 -tp499 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp500 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp501 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp502 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp503 -tp504 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp505 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp506 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp507 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp508 -tp509 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp510 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp511 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp512 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp513 -tp514 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp515 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp516 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp517 -tp518 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp519 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp520 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp521 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp522 -tp523 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp524 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp525 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp526 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp527 -tp528 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp529 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp530 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp531 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp532 -tp533 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp534 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp535 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp536 -tp537 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp538 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp539 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp540 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp541 -tp542 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp543 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp544 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp545 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp546 -tp547 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp548 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp549 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp550 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp551 -tp552 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp553 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp554 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp555 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp556 -tp557 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp558 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp559 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp560 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp561 -tp562 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp563 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp564 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp565 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp566 -tp567 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp568 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp569 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp570 -tp571 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp572 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp573 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp574 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp575 -tp576 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp577 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp578 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp579 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp580 -tp581 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp582 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp583 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp584 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp585 -tp586 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp587 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp588 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp589 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp590 -tp591 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp592 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp593 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp594 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp595 -tp596 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp597 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp598 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp599 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp600 -tp601 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp602 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp603 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp604 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp605 -tp606 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp607 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp608 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp609 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp610 -tp611 -a(g11 -(g12 -S'6\x00\x00\x00\x00\x00\x00\x00' -tRp612 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp613 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp614 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp615 -tp616 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp617 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp618 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp619 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp620 -tp621 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp622 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp623 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp624 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp625 -tp626 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp627 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp628 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp629 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp630 -tp631 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp632 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp633 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp634 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp635 -tp636 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp637 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp638 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp639 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp640 -tp641 -aa(lp642 -(g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp643 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp644 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp645 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp646 -tp647 -a(g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp648 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp649 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp650 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp651 -tp652 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp653 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp654 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp655 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp656 -tp657 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp658 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp659 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp660 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp661 -tp662 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp663 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp664 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp665 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp666 -tp667 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp668 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp669 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp670 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp671 -tp672 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp673 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp674 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp675 -tp676 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp677 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp678 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp679 -tp680 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp681 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp682 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp683 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp684 -tp685 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp686 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp687 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp688 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp689 -tp690 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp691 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp692 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp693 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp694 -tp695 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp696 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp697 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp698 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp699 -tp700 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp701 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp702 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp703 -tp704 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp705 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp706 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp707 -tp708 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp709 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp710 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp711 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp712 -tp713 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp714 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp715 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp716 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp717 -tp718 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp719 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp720 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp721 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp722 -tp723 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp724 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp725 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp726 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp727 -tp728 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp729 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp730 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp731 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp732 -tp733 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp734 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp735 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp736 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp737 -tp738 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp739 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp740 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp741 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp742 -tp743 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp744 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp745 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp746 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp747 -tp748 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp749 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp750 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp751 -tp752 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp753 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp754 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp755 -tp756 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp757 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp758 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp759 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp760 -tp761 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp762 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp763 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp764 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp765 -tp766 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp767 -g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp768 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp769 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp770 -tp771 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp772 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp773 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp774 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp775 -tp776 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp777 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp778 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp779 -tp780 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp781 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp782 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp783 -tp784 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp785 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp786 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp787 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp788 -tp789 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp790 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp791 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp792 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp793 -tp794 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp795 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp796 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp797 -g11 -(g12 -S'\\\x00\x00\x00\x00\x00\x00\x00' -tRp798 -tp799 -a(g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp800 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp801 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp802 -g11 -(g12 -S'Z\x00\x00\x00\x00\x00\x00\x00' -tRp803 -tp804 -a(g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp805 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp806 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp807 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp808 -tp809 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp810 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp811 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp812 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp813 -tp814 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp815 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp816 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp817 -tp818 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp819 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp820 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp821 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp822 -tp823 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp824 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp825 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp826 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp827 -tp828 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp829 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp830 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp831 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp832 -tp833 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp834 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp835 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp836 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp837 -tp838 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp839 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp840 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp841 -g11 -(g12 -S'Z\x00\x00\x00\x00\x00\x00\x00' -tRp842 -tp843 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp844 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp845 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp846 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp847 -tp848 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp849 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp850 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp851 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp852 -tp853 -a(g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp854 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp855 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp856 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp857 -tp858 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp859 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp860 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp861 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp862 -tp863 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp864 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp865 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp866 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp867 -tp868 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp869 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp870 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp871 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp872 -tp873 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp874 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp875 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp876 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp877 -tp878 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp879 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp880 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp881 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp882 -tp883 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp884 -g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp885 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp886 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp887 -tp888 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp889 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp890 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp891 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp892 -tp893 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp894 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp895 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp896 -g11 -(g12 -S'Z\x00\x00\x00\x00\x00\x00\x00' -tRp897 -tp898 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp899 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp900 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp901 -g11 -(g12 -S'\\\x00\x00\x00\x00\x00\x00\x00' -tRp902 -tp903 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp904 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp905 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp906 -g11 -(g12 -S'Z\x00\x00\x00\x00\x00\x00\x00' -tRp907 -tp908 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp909 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp910 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp911 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp912 -tp913 -a(g11 -(g12 -S'F\x00\x00\x00\x00\x00\x00\x00' -tRp914 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp915 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp916 -g11 -(g12 -S'Z\x00\x00\x00\x00\x00\x00\x00' -tRp917 -tp918 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp919 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp920 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp921 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp922 -tp923 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp924 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp925 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp926 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp927 -tp928 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp929 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp930 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp931 -g11 -(g12 -S'\\\x00\x00\x00\x00\x00\x00\x00' -tRp932 -tp933 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp934 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp935 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp936 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp937 -tp938 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp939 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp940 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp941 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp942 -tp943 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp944 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp945 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp946 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp947 -tp948 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp949 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp950 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp951 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp952 -tp953 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp954 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp955 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp956 -g11 -(g12 -S'U\x00\x00\x00\x00\x00\x00\x00' -tRp957 -tp958 -aa(lp959 -(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp960 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp961 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp962 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp963 -tp964 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp965 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp966 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp967 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp968 -tp969 -a(g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp970 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp971 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp972 -tp973 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp974 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp975 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp976 -g11 -(g12 -S']\x00\x00\x00\x00\x00\x00\x00' -tRp977 -tp978 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp979 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp980 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp981 -g11 -(g12 -S']\x00\x00\x00\x00\x00\x00\x00' -tRp982 -tp983 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp984 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp985 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp986 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp987 -tp988 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp989 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp990 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp991 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp992 -tp993 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp994 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp995 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp996 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp997 -tp998 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp999 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1000 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1001 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1002 -tp1003 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1004 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1005 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1006 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp1007 -tp1008 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1009 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1010 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1011 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1012 -tp1013 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1014 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1015 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1016 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1017 -tp1018 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1019 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1020 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1021 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1022 -tp1023 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1024 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1025 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1026 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1027 -tp1028 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1029 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1030 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1031 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1032 -tp1033 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1034 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1035 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1036 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1037 -tp1038 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1039 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1040 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1041 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1042 -tp1043 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1044 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1045 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1046 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1047 -tp1048 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1049 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1050 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1051 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1052 -tp1053 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1054 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1055 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1056 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1057 -tp1058 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1059 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1060 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1061 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1062 -tp1063 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1064 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1065 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1066 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1067 -tp1068 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1069 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1070 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1071 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1072 -tp1073 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1074 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1075 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1076 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1077 -tp1078 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1079 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1080 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1081 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1082 -tp1083 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1084 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1085 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1086 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1087 -tp1088 -a(g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1089 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1090 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1091 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp1092 -tp1093 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1094 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1095 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1096 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp1097 -tp1098 -a(g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1099 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1100 -g11 -(g12 -S'Z\x00\x00\x00\x00\x00\x00\x00' -tRp1101 -tp1102 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1103 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1104 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1105 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp1106 -tp1107 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1108 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1109 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1110 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp1111 -tp1112 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1113 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1114 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1115 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp1116 -tp1117 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1118 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1119 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1120 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp1121 -tp1122 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1123 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1124 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1125 -g11 -(g12 -S']\x00\x00\x00\x00\x00\x00\x00' -tRp1126 -tp1127 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1128 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1129 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1130 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp1131 -tp1132 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1133 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1134 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1135 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp1136 -tp1137 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1138 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp1139 -g11 -(g12 -S'W\x00\x00\x00\x00\x00\x00\x00' -tRp1140 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1141 -tp1142 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1143 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1144 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1145 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp1146 -tp1147 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1148 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1149 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1150 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp1151 -tp1152 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1153 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1154 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1155 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp1156 -tp1157 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1158 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1159 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1160 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1161 -tp1162 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1163 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1164 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1165 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1166 -tp1167 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1168 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1169 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1170 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp1171 -tp1172 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1173 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1174 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1175 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1176 -tp1177 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1178 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1179 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1180 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1181 -tp1182 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1183 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1184 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1185 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1186 -tp1187 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1188 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1189 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1190 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1191 -tp1192 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1193 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1194 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1195 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1196 -tp1197 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1198 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1199 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1200 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1201 -tp1202 -aa(lp1203 -(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1204 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1205 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1206 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1207 -tp1208 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1209 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1210 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1211 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1212 -tp1213 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1214 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1215 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1216 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1217 -tp1218 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1219 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1220 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1221 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1222 -tp1223 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1224 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1225 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1226 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1227 -tp1228 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1229 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1230 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1231 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1232 -tp1233 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1234 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1235 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1236 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1237 -tp1238 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1239 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1240 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1241 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1242 -tp1243 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp1244 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1245 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1246 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1247 -tp1248 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1249 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1250 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1251 -tp1252 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1253 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1254 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1255 -tp1256 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1257 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1258 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1259 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1260 -tp1261 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1262 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1263 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1264 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1265 -tp1266 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1267 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1268 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1269 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1270 -tp1271 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1272 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1273 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1274 -tp1275 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1276 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1277 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1278 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1279 -tp1280 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1281 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1282 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1283 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1284 -tp1285 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1286 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1287 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1288 -tp1289 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1290 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1291 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1292 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1293 -tp1294 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1295 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1296 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1297 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1298 -tp1299 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1300 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1301 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1302 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1303 -tp1304 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1305 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1306 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1307 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1308 -tp1309 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1310 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1311 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1312 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1313 -tp1314 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1315 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1316 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1317 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1318 -tp1319 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1320 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1321 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1322 -tp1323 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1324 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1325 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1326 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1327 -tp1328 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1329 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1330 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1331 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp1332 -tp1333 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1334 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1335 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1336 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1337 -tp1338 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1339 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1340 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1341 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1342 -tp1343 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1344 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1345 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1346 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1347 -tp1348 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1349 -g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp1350 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1351 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1352 -tp1353 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1354 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1355 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1356 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1357 -tp1358 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1359 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1360 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1361 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1362 -tp1363 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1364 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1365 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1366 -tp1367 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1368 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1369 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1370 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1371 -tp1372 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1373 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1374 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1375 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1376 -tp1377 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1378 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1379 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1380 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp1381 -tp1382 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1383 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1384 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1385 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1386 -tp1387 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1388 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1389 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1390 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1391 -tp1392 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1393 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1394 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1395 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1396 -tp1397 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp1398 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1399 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1400 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1401 -tp1402 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1403 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1404 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1405 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1406 -tp1407 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1408 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1409 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp1410 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1411 -tp1412 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1413 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1414 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp1415 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp1416 -tp1417 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1418 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1419 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1420 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1421 -tp1422 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1423 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1424 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1425 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1426 -tp1427 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1428 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1429 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1430 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1431 -tp1432 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1433 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1434 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1435 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1436 -tp1437 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1438 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1439 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1440 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1441 -tp1442 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1443 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1444 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1445 -tp1446 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1447 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1448 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1449 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1450 -tp1451 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1452 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1453 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1454 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1455 -tp1456 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp1457 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1458 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1459 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1460 -tp1461 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1462 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1463 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1464 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1465 -tp1466 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1467 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1468 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1469 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1470 -tp1471 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1472 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1473 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1474 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1475 -tp1476 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1477 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1478 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1479 -tp1480 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1481 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1482 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1483 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1484 -tp1485 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1486 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1487 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1488 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1489 -tp1490 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1491 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1492 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1493 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1494 -tp1495 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1496 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1497 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1498 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1499 -tp1500 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1501 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1502 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1503 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1504 -tp1505 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1506 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1507 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1508 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1509 -tp1510 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1511 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1512 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1513 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1514 -tp1515 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1516 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1517 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1518 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1519 -tp1520 -aa(lp1521 -(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp1522 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1523 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1524 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1525 -tp1526 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1527 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1528 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1529 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1530 -tp1531 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1532 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1533 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1534 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp1535 -tp1536 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1537 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1538 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1539 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1540 -tp1541 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1542 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1543 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1544 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1545 -tp1546 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1547 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1548 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1549 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1550 -tp1551 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1552 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1553 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1554 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1555 -tp1556 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1557 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1558 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1559 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1560 -tp1561 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1562 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1563 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1564 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1565 -tp1566 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1567 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1568 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1569 -tp1570 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1571 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1572 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1573 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1574 -tp1575 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1576 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1577 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1578 -tp1579 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1580 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1581 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1582 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1583 -tp1584 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1585 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1586 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1587 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1588 -tp1589 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1590 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1591 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1592 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1593 -tp1594 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1595 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1596 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1597 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1598 -tp1599 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1600 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1601 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1602 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1603 -tp1604 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp1605 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1606 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1607 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1608 -tp1609 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1610 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1611 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1612 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1613 -tp1614 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1615 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1616 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1617 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1618 -tp1619 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1620 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1621 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1622 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1623 -tp1624 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1625 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1626 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1627 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1628 -tp1629 -a(g11 -(g12 -S'2\x00\x00\x00\x00\x00\x00\x00' -tRp1630 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1631 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1632 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1633 -tp1634 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp1635 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1636 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1637 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1638 -tp1639 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1640 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1641 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1642 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1643 -tp1644 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1645 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1646 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1647 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1648 -tp1649 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1650 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1651 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp1652 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1653 -tp1654 -a(g11 -(g12 -S'2\x00\x00\x00\x00\x00\x00\x00' -tRp1655 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1656 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1657 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1658 -tp1659 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1660 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1661 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1662 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1663 -tp1664 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1665 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1666 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1667 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1668 -tp1669 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1670 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1671 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1672 -tp1673 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1674 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1675 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1676 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1677 -tp1678 -a(g11 -(g12 -S'2\x00\x00\x00\x00\x00\x00\x00' -tRp1679 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1680 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1681 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1682 -tp1683 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp1684 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1685 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1686 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1687 -tp1688 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1689 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1690 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1691 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1692 -tp1693 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1694 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1695 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1696 -tp1697 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1698 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1699 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1700 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1701 -tp1702 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1703 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1704 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1705 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1706 -tp1707 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1708 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1709 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1710 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp1711 -tp1712 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1713 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1714 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1715 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1716 -tp1717 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1718 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1719 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1720 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1721 -tp1722 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1723 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1724 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1725 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1726 -tp1727 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1728 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1729 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1730 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1731 -tp1732 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1733 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1734 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1735 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1736 -tp1737 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1738 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1739 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1740 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1741 -tp1742 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1743 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1744 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1745 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1746 -tp1747 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1748 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1749 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1750 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1751 -tp1752 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1753 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1754 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1755 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1756 -tp1757 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1758 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1759 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1760 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1761 -tp1762 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1763 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1764 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1765 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1766 -tp1767 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1768 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1769 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1770 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1771 -tp1772 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1773 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1774 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1775 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp1776 -tp1777 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1778 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1779 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1780 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp1781 -tp1782 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1783 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp1784 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1785 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1786 -tp1787 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1788 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1789 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1790 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1791 -tp1792 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1793 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1794 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1795 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1796 -tp1797 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1798 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1799 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1800 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1801 -tp1802 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1803 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1804 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1805 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1806 -tp1807 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1808 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1809 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1810 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1811 -tp1812 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1813 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1814 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1815 -tp1816 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1817 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1818 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1819 -tp1820 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1821 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1822 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1823 -tp1824 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1825 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp1826 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1827 -tp1828 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1829 -g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1830 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1831 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1832 -tp1833 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1834 -g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1835 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1836 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1837 -tp1838 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1839 -g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1840 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1841 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1842 -tp1843 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1844 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1845 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1846 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp1847 -tp1848 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1849 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1850 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp1851 -tp1852 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1853 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1854 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1855 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp1856 -tp1857 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1858 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1859 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1860 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1861 -tp1862 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1863 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1864 -g11 -(g12 -S'I\x00\x00\x00\x00\x00\x00\x00' -tRp1865 -tp1866 -a(g11 -(g12 -S'2\x00\x00\x00\x00\x00\x00\x00' -tRp1867 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1868 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1869 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1870 -tp1871 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp1872 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1873 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1874 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1875 -tp1876 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1877 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1878 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1879 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1880 -tp1881 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1882 -g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp1883 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1884 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1885 -tp1886 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1887 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1888 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1889 -tp1890 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1891 -g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp1892 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1893 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1894 -tp1895 -a(g11 -(g12 -S'2\x00\x00\x00\x00\x00\x00\x00' -tRp1896 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1897 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1898 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1899 -tp1900 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1901 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1902 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1903 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1904 -tp1905 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1906 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1907 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1908 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1909 -tp1910 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp1911 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1912 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1913 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1914 -tp1915 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1916 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1917 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1918 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1919 -tp1920 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1921 -g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp1922 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1923 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1924 -tp1925 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1926 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1927 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1928 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1929 -tp1930 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp1931 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1932 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1933 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1934 -tp1935 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1936 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1937 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1938 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1939 -tp1940 -a(g11 -(g12 -S'2\x00\x00\x00\x00\x00\x00\x00' -tRp1941 -g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp1942 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1943 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1944 -tp1945 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1946 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1947 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1948 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1949 -tp1950 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1951 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1952 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1953 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp1954 -tp1955 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp1956 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1957 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp1958 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1959 -tp1960 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1961 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1962 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1963 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp1964 -tp1965 -a(g11 -(g12 -S'4\x00\x00\x00\x00\x00\x00\x00' -tRp1966 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1967 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1968 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1969 -tp1970 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp1971 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1972 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1973 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp1974 -tp1975 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp1976 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1977 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1978 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1979 -tp1980 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp1981 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1982 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp1983 -tp1984 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp1985 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1986 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp1987 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1988 -tp1989 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp1990 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp1991 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1992 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp1993 -tp1994 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp1995 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp1996 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp1997 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp1998 -tp1999 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2000 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2001 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2002 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2003 -tp2004 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2005 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2006 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2007 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2008 -tp2009 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2010 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2011 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2012 -tp2013 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2014 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2015 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2016 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2017 -tp2018 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2019 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2020 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2021 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2022 -tp2023 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2024 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2025 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2026 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2027 -tp2028 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2029 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2030 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2031 -tp2032 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2033 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2034 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2035 -tp2036 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2037 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2038 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2039 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2040 -tp2041 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2042 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2043 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2044 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2045 -tp2046 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2047 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2048 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2049 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2050 -tp2051 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp2052 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2053 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2054 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2055 -tp2056 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2057 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2058 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2059 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2060 -tp2061 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp2062 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2063 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2064 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2065 -tp2066 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp2067 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2068 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2069 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2070 -tp2071 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp2072 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2073 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2074 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2075 -tp2076 -aa(lp2077 -(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2078 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2079 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2080 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2081 -tp2082 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2083 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2084 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2085 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2086 -tp2087 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2088 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2089 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2090 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2091 -tp2092 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2093 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2094 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2095 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2096 -tp2097 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2098 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2099 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2100 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2101 -tp2102 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2103 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2104 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2105 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2106 -tp2107 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2108 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2109 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2110 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2111 -tp2112 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2113 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2114 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2115 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2116 -tp2117 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2118 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2119 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2120 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2121 -tp2122 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2123 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2124 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2125 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2126 -tp2127 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp2128 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2129 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2130 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2131 -tp2132 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp2133 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2134 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2135 -tp2136 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2137 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2138 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2139 -tp2140 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2141 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2142 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2143 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2144 -tp2145 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2146 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2147 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2148 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2149 -tp2150 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2151 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2152 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2153 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2154 -tp2155 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2156 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2157 -g11 -(g12 -S'P\x00\x00\x00\x00\x00\x00\x00' -tRp2158 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2159 -tp2160 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2161 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2162 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2163 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2164 -tp2165 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2166 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2167 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2168 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2169 -tp2170 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2171 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2172 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2173 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2174 -tp2175 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2176 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2177 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2178 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2179 -tp2180 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2181 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2182 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2183 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2184 -tp2185 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2186 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2187 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2188 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2189 -tp2190 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2191 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2192 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2193 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2194 -tp2195 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp2196 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2197 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2198 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2199 -tp2200 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2201 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2202 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2203 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp2204 -tp2205 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2206 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2207 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2208 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2209 -tp2210 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2211 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2212 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2213 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2214 -tp2215 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2216 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2217 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2218 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2219 -tp2220 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2221 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2222 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2223 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2224 -tp2225 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2226 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2227 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2228 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2229 -tp2230 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2231 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2232 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2233 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2234 -tp2235 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2236 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2237 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2238 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2239 -tp2240 -aa(lp2241 -(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2242 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2243 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2244 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2245 -tp2246 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp2247 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2248 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2249 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2250 -tp2251 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2252 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2253 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2254 -tp2255 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2256 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2257 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2258 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2259 -tp2260 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2261 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2262 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2263 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2264 -tp2265 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2266 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2267 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2268 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2269 -tp2270 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2271 -g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp2272 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2273 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2274 -tp2275 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2276 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2277 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2278 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2279 -tp2280 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2281 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2282 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2283 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2284 -tp2285 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2286 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2287 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2288 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2289 -tp2290 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2291 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2292 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2293 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2294 -tp2295 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2296 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2297 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2298 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2299 -tp2300 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2301 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2302 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2303 -tp2304 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2305 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2306 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2307 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2308 -tp2309 -a(g11 -(g12 -S'2\x00\x00\x00\x00\x00\x00\x00' -tRp2310 -g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp2311 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2312 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2313 -tp2314 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2315 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2316 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2317 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2318 -tp2319 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2320 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2321 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2322 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2323 -tp2324 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp2325 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2326 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2327 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2328 -tp2329 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2330 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2331 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2332 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2333 -tp2334 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2335 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2336 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2337 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2338 -tp2339 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2340 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2341 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2342 -tp2343 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2344 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2345 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2346 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2347 -tp2348 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2349 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2350 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2351 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2352 -tp2353 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2354 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2355 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2356 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2357 -tp2358 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2359 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2360 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2361 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2362 -tp2363 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp2364 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2365 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2366 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2367 -tp2368 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp2369 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2370 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2371 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2372 -tp2373 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2374 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2375 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2376 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2377 -tp2378 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2379 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2380 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2381 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2382 -tp2383 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2384 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2385 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2386 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2387 -tp2388 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2389 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2390 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2391 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2392 -tp2393 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2394 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2395 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2396 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2397 -tp2398 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2399 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2400 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2401 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2402 -tp2403 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2404 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2405 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2406 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2407 -tp2408 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2409 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2410 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2411 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2412 -tp2413 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2414 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2415 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2416 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2417 -tp2418 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2419 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2420 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2421 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2422 -tp2423 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp2424 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2425 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2426 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2427 -tp2428 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2429 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2430 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2431 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2432 -tp2433 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2434 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2435 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2436 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2437 -tp2438 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2439 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2440 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2441 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2442 -tp2443 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2444 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2445 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2446 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2447 -tp2448 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2449 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2450 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2451 -tp2452 -a(g11 -(g12 -S'5\x00\x00\x00\x00\x00\x00\x00' -tRp2453 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2454 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2455 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2456 -tp2457 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2458 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2459 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2460 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2461 -tp2462 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2463 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2464 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2465 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2466 -tp2467 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2468 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2469 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2470 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2471 -tp2472 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2473 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2474 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2475 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2476 -tp2477 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp2478 -g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp2479 -g11 -(g12 -S'K\x00\x00\x00\x00\x00\x00\x00' -tRp2480 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2481 -tp2482 -a(g11 -(g12 -S'=\x00\x00\x00\x00\x00\x00\x00' -tRp2483 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2484 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2485 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2486 -tp2487 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2488 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2489 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2490 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2491 -tp2492 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2493 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2494 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2495 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2496 -tp2497 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2498 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2499 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2500 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2501 -tp2502 -a(g11 -(g12 -S'6\x00\x00\x00\x00\x00\x00\x00' -tRp2503 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2504 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2505 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2506 -tp2507 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2508 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2509 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2510 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2511 -tp2512 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2513 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2514 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2515 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2516 -tp2517 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2518 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2519 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2520 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2521 -tp2522 -aa(lp2523 -(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2524 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2525 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2526 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2527 -tp2528 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2529 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2530 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2531 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2532 -tp2533 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2534 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2535 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2536 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2537 -tp2538 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2539 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2540 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2541 -g11 -(g12 -S']\x00\x00\x00\x00\x00\x00\x00' -tRp2542 -tp2543 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2544 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2545 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2546 -g11 -(g12 -S']\x00\x00\x00\x00\x00\x00\x00' -tRp2547 -tp2548 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2549 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2550 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2551 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2552 -tp2553 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2554 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2555 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2556 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2557 -tp2558 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2559 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2560 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2561 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2562 -tp2563 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2564 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2565 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2566 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2567 -tp2568 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2569 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2570 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2571 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp2572 -tp2573 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2574 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2575 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2576 -tp2577 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2578 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2579 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2580 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2581 -tp2582 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2583 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2584 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2585 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2586 -tp2587 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2588 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2589 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2590 -tp2591 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2592 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2593 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2594 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2595 -tp2596 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2597 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2598 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2599 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2600 -tp2601 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2602 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2603 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2604 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2605 -tp2606 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2607 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2608 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2609 -tp2610 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2611 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2612 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2613 -tp2614 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2615 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2616 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2617 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2618 -tp2619 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2620 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2621 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2622 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2623 -tp2624 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2625 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2626 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2627 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2628 -tp2629 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2630 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2631 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2632 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2633 -tp2634 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2635 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2636 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2637 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2638 -tp2639 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2640 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2641 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2642 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2643 -tp2644 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2645 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2646 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2647 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2648 -tp2649 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2650 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2651 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2652 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2653 -tp2654 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2655 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2656 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2657 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2658 -tp2659 -a(g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2660 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2661 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2662 -g11 -(g12 -S'Z\x00\x00\x00\x00\x00\x00\x00' -tRp2663 -tp2664 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2665 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2666 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2667 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2668 -tp2669 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2670 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2671 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2672 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2673 -tp2674 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2675 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2676 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2677 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2678 -tp2679 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2680 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2681 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2682 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2683 -tp2684 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2685 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2686 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2687 -g11 -(g12 -S']\x00\x00\x00\x00\x00\x00\x00' -tRp2688 -tp2689 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2690 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2691 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2692 -g11 -(g12 -S'[\x00\x00\x00\x00\x00\x00\x00' -tRp2693 -tp2694 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2695 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2696 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2697 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp2698 -tp2699 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2700 -g11 -(g12 -S'R\x00\x00\x00\x00\x00\x00\x00' -tRp2701 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2702 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2703 -tp2704 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2705 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2706 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2707 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp2708 -tp2709 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2710 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2711 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2712 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp2713 -tp2714 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2715 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2716 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2717 -g11 -(g12 -S'Y\x00\x00\x00\x00\x00\x00\x00' -tRp2718 -tp2719 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2720 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2721 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2722 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2723 -tp2724 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2725 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2726 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2727 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2728 -tp2729 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2730 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2731 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2732 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp2733 -tp2734 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2735 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2736 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2737 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2738 -tp2739 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2740 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2741 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2742 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2743 -tp2744 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2745 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2746 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2747 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2748 -tp2749 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2750 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2751 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2752 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2753 -tp2754 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2755 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2756 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2757 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2758 -tp2759 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2760 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2761 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2762 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2763 -tp2764 -aa(lp2765 -(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2766 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2767 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2768 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2769 -tp2770 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2771 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2772 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2773 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2774 -tp2775 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2776 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2777 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2778 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2779 -tp2780 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2781 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2782 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2783 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2784 -tp2785 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2786 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2787 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2788 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2789 -tp2790 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2791 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2792 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2793 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2794 -tp2795 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2796 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2797 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2798 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2799 -tp2800 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2801 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2802 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2803 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2804 -tp2805 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2806 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2807 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2808 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2809 -tp2810 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2811 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2812 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2813 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2814 -tp2815 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2816 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2817 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2818 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2819 -tp2820 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2821 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2822 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2823 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2824 -tp2825 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2826 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2827 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2828 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2829 -tp2830 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2831 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2832 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2833 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2834 -tp2835 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2836 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2837 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2838 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2839 -tp2840 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp2841 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2842 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2843 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2844 -tp2845 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2846 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2847 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2848 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2849 -tp2850 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2851 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2852 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2853 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2854 -tp2855 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2856 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2857 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2858 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2859 -tp2860 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2861 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2862 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2863 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2864 -tp2865 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2866 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp2867 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2868 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2869 -tp2870 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2871 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp2872 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2873 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2874 -tp2875 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2876 -g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp2877 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2878 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2879 -tp2880 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2881 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2882 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2883 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2884 -tp2885 -a(g11 -(g12 -S'?\x00\x00\x00\x00\x00\x00\x00' -tRp2886 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2887 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2888 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2889 -tp2890 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2891 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2892 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2893 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2894 -tp2895 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp2896 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2897 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2898 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2899 -tp2900 -a(g11 -(g12 -S'D\x00\x00\x00\x00\x00\x00\x00' -tRp2901 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2902 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2903 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2904 -tp2905 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2906 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2907 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2908 -tp2909 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp2910 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2911 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2912 -tp2913 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2914 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2915 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2916 -tp2917 -a(g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2918 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2919 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp2920 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2921 -tp2922 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp2923 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2924 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2925 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2926 -tp2927 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2928 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2929 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2930 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2931 -tp2932 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp2933 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp2934 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2935 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2936 -tp2937 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp2938 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2939 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2940 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2941 -tp2942 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2943 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2944 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2945 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2946 -tp2947 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2948 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2949 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2950 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2951 -tp2952 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2953 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2954 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2955 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2956 -tp2957 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2958 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp2959 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2960 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2961 -tp2962 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp2963 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2964 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2965 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2966 -tp2967 -a(g11 -(g12 -S'8\x00\x00\x00\x00\x00\x00\x00' -tRp2968 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2969 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2970 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2971 -tp2972 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp2973 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp2974 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2975 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp2976 -tp2977 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp2978 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2979 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2980 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2981 -tp2982 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2983 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2984 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2985 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2986 -tp2987 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp2988 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2989 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp2990 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp2991 -tp2992 -a(g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2993 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp2994 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp2995 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp2996 -tp2997 -a(g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp2998 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp2999 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3000 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp3001 -tp3002 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3003 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3004 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3005 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp3006 -tp3007 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3008 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3009 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3010 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp3011 -tp3012 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3013 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3014 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3015 -g11 -(g12 -S'X\x00\x00\x00\x00\x00\x00\x00' -tRp3016 -tp3017 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3018 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp3019 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp3020 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp3021 -tp3022 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3023 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3024 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3025 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3026 -tp3027 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3028 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3029 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3030 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3031 -tp3032 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3033 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3034 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3035 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3036 -tp3037 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3038 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3039 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3040 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3041 -tp3042 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp3043 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3044 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3045 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3046 -tp3047 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3048 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3049 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3050 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3051 -tp3052 -a(g11 -(g12 -S'6\x00\x00\x00\x00\x00\x00\x00' -tRp3053 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3054 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3055 -g11 -(g12 -S'V\x00\x00\x00\x00\x00\x00\x00' -tRp3056 -tp3057 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3058 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3059 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3060 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3061 -tp3062 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3063 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3064 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3065 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3066 -tp3067 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3068 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3069 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp3070 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3071 -tp3072 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3073 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3074 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3075 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp3076 -tp3077 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3078 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3079 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3080 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp3081 -tp3082 -aa(lp3083 -(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3084 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3085 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3086 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3087 -tp3088 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3089 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3090 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3091 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3092 -tp3093 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp3094 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp3095 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3096 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp3097 -tp3098 -a(g11 -(g12 -S';\x00\x00\x00\x00\x00\x00\x00' -tRp3099 -g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3100 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3101 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3102 -tp3103 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3104 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3105 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3106 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3107 -tp3108 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3109 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp3110 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp3111 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp3112 -tp3113 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3114 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3115 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3116 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3117 -tp3118 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3119 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3120 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3121 -tp3122 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3123 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3124 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3125 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3126 -tp3127 -a(g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp3128 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3129 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3130 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3131 -tp3132 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3133 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3134 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3135 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3136 -tp3137 -a(g11 -(g12 -S'B\x00\x00\x00\x00\x00\x00\x00' -tRp3138 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3139 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3140 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp3141 -tp3142 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3143 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3144 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3145 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3146 -tp3147 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3148 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3149 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3150 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3151 -tp3152 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3153 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3154 -g11 -(g12 -S'N\x00\x00\x00\x00\x00\x00\x00' -tRp3155 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3156 -tp3157 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3158 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3159 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3160 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3161 -tp3162 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3163 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3164 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3165 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3166 -tp3167 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3168 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3169 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3170 -g11 -(g12 -S'T\x00\x00\x00\x00\x00\x00\x00' -tRp3171 -tp3172 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3173 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp3174 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3175 -g11 -(g12 -S'S\x00\x00\x00\x00\x00\x00\x00' -tRp3176 -tp3177 -a(g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp3178 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp3179 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3180 -g11 -(g12 -S'Q\x00\x00\x00\x00\x00\x00\x00' -tRp3181 -tp3182 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3183 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp3184 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3185 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3186 -tp3187 -a(g11 -(g12 -S'>\x00\x00\x00\x00\x00\x00\x00' -tRp3188 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp3189 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3190 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp3191 -tp3192 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3193 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3194 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3195 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3196 -tp3197 -a(g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3198 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3199 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3200 -tp3201 -a(g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3202 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3203 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3204 -g11 -(g12 -S'O\x00\x00\x00\x00\x00\x00\x00' -tRp3205 -tp3206 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp3207 -g11 -(g12 -S'E\x00\x00\x00\x00\x00\x00\x00' -tRp3208 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3209 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp3210 -tp3211 -a(g11 -(g12 -S'<\x00\x00\x00\x00\x00\x00\x00' -tRp3212 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3213 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3214 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3215 -tp3216 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3217 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3218 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3219 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3220 -tp3221 -a(g11 -(g12 -S'9\x00\x00\x00\x00\x00\x00\x00' -tRp3222 -g11 -(g12 -S'A\x00\x00\x00\x00\x00\x00\x00' -tRp3223 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3224 -g11 -(g12 -S'M\x00\x00\x00\x00\x00\x00\x00' -tRp3225 -tp3226 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3227 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3228 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3229 -g11 -(g12 -S'L\x00\x00\x00\x00\x00\x00\x00' -tRp3230 -tp3231 -a(g11 -(g12 -S'7\x00\x00\x00\x00\x00\x00\x00' -tRp3232 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3233 -g11 -(g12 -S'G\x00\x00\x00\x00\x00\x00\x00' -tRp3234 -g11 -(g12 -S'J\x00\x00\x00\x00\x00\x00\x00' -tRp3235 -tp3236 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp3237 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3238 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3239 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3240 -tp3241 -a(g11 -(g12 -S'0\x00\x00\x00\x00\x00\x00\x00' -tRp3242 -g11 -(g12 -S'@\x00\x00\x00\x00\x00\x00\x00' -tRp3243 -g11 -(g12 -S'C\x00\x00\x00\x00\x00\x00\x00' -tRp3244 -g11 -(g12 -S'H\x00\x00\x00\x00\x00\x00\x00' -tRp3245 -tp3246 -aas. \ No newline at end of file diff --git a/research/fivo/fivo/test_data/tiny_speech_dataset.tfrecord b/research/fivo/fivo/test_data/tiny_speech_dataset.tfrecord deleted file mode 100644 index 93fe8791b631da35b9d03d37e6494cc7c50cb55d..0000000000000000000000000000000000000000 Binary files a/research/fivo/fivo/test_data/tiny_speech_dataset.tfrecord and /dev/null differ diff --git a/research/fivo/fivo/test_utils.py b/research/fivo/fivo/test_utils.py deleted file mode 100644 index 48bbd3d483c45457b82b12ac1587d4c314b79f49..0000000000000000000000000000000000000000 --- a/research/fivo/fivo/test_utils.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for testing FIVO. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from fivo.models import base -from fivo.models import srnn -from fivo.models import vrnn - - -def create_vrnn(generative_class=base.ConditionalNormalDistribution, - batch_size=2, data_size=3, rnn_hidden_size=4, - latent_size=5, fcnet_hidden_size=7, encoded_data_size=9, - encoded_latent_size=11, num_timesteps=7, data_lengths=(7, 4), - use_tilt=False, random_seed=None): - """Creates a VRNN and some dummy data to feed it for testing purposes. - - Args: - generative_class: The class of the generative distribution. - batch_size: The number of elements per batch. - data_size: The dimension of the vectors that make up the data sequences. - rnn_hidden_size: The hidden state dimension of the RNN that forms the - deterministic part of this VRNN. - latent_size: The size of the stochastic latent state of the VRNN. - fcnet_hidden_size: The size of the hidden layer of the fully connected - networks that parameterize the conditional probability distributions - of the VRNN. - encoded_data_size: The size of the output of the data encoding network. - encoded_latent_size: The size of the output of the latent state encoding - network. - num_timesteps: The maximum number of timesteps in the data. - data_lengths: A tuple of size batch_size that contains the desired lengths - of each sequence in the dummy data. - use_tilt: Use a tilting function. - random_seed: A random seed to feed the VRNN, mainly useful for testing - purposes. - - Returns: - model: A VRNN object. - inputs: A Tensor of shape [num_timesteps, batch_size, data_size], the inputs - to the model, also known as the observations. - targets: A Tensor of shape [num_timesteps, batch_size, data_size], the - desired outputs of the model. - lengths: A Tensor of shape [batch_size], the lengths of the sequences in the - batch. - """ - - fcnet_hidden_sizes = [fcnet_hidden_size] - initializers = {"w": tf.contrib.layers.xavier_initializer(seed=random_seed), - "b": tf.zeros_initializer()} - model = vrnn.create_vrnn( - data_size, - latent_size, - generative_class, - rnn_hidden_size=rnn_hidden_size, - fcnet_hidden_sizes=fcnet_hidden_sizes, - encoded_data_size=encoded_data_size, - encoded_latent_size=encoded_latent_size, - use_tilt=use_tilt, - initializers=initializers, - random_seed=random_seed) - inputs = tf.random_uniform([num_timesteps, batch_size, data_size], - seed=random_seed, dtype=tf.float32) - targets = tf.random_uniform([num_timesteps, batch_size, data_size], - seed=random_seed, dtype=tf.float32) - lengths = tf.constant(data_lengths, dtype=tf.int32) - return model, inputs, targets, lengths - - -def create_srnn(generative_class=base.ConditionalNormalDistribution, - batch_size=2, data_size=3, rnn_hidden_size=4, - latent_size=5, fcnet_hidden_size=7, encoded_data_size=3, - encoded_latent_size=2, num_timesteps=7, data_lengths=(7, 4), - use_tilt=False, random_seed=None): - """Creates a SRNN and some dummy data to feed it for testing purposes. - - Args: - generative_class: The class of the generative distribution. - batch_size: The number of elements per batch. - data_size: The dimension of the vectors that make up the data sequences. - rnn_hidden_size: The hidden state dimension of the RNN that forms the - deterministic part of this SRNN. - latent_size: The size of the stochastic latent state of the SRNN. - fcnet_hidden_size: The size of the hidden layer of the fully connected - networks that parameterize the conditional probability distributions - of the SRNN. - encoded_data_size: The size of the output of the data encoding network. - encoded_latent_size: The size of the output of the latent state encoding - network. - num_timesteps: The maximum number of timesteps in the data. - data_lengths: A tuple of size batch_size that contains the desired lengths - of each sequence in the dummy data. - use_tilt: Use a tilting function. - random_seed: A random seed to feed the SRNN, mainly useful for testing - purposes. - - Returns: - model: A SRNN object. - inputs: A Tensor of shape [num_timesteps, batch_size, data_size], the inputs - to the model, also known as the observations. - targets: A Tensor of shape [num_timesteps, batch_size, data_size], the - desired outputs of the model. - lengths: A Tensor of shape [batch_size], the lengths of the sequences in the - batch. - """ - - fcnet_hidden_sizes = [fcnet_hidden_size] - initializers = {"w": tf.contrib.layers.xavier_initializer(seed=random_seed), - "b": tf.zeros_initializer()} - model = srnn.create_srnn( - data_size, - latent_size, - generative_class, - rnn_hidden_size=rnn_hidden_size, - fcnet_hidden_sizes=fcnet_hidden_sizes, - encoded_data_size=encoded_data_size, - encoded_latent_size=encoded_latent_size, - use_tilt=use_tilt, - initializers=initializers, - random_seed=random_seed) - inputs = tf.random_uniform([num_timesteps, batch_size, data_size], - seed=random_seed, dtype=tf.float32) - targets = tf.random_uniform([num_timesteps, batch_size, data_size], - seed=random_seed, dtype=tf.float32) - lengths = tf.constant(data_lengths, dtype=tf.int32) - return model, inputs, targets, lengths diff --git a/research/fivo/run_fivo.py b/research/fivo/run_fivo.py deleted file mode 100644 index 1ca079421f09fb65439dae210b1c3760240b51ad..0000000000000000000000000000000000000000 --- a/research/fivo/run_fivo.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A script to run training for sequential latent variable models. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from fivo import ghmm_runners -from fivo import runners - -# Shared flags. -tf.app.flags.DEFINE_enum("mode", "train", - ["train", "eval", "sample"], - "The mode of the binary.") -tf.app.flags.DEFINE_enum("model", "vrnn", - ["vrnn", "ghmm", "srnn"], - "Model choice.") -tf.app.flags.DEFINE_integer("latent_size", 64, - "The size of the latent state of the model.") -tf.app.flags.DEFINE_enum("dataset_type", "pianoroll", - ["pianoroll", "speech", "pose"], - "The type of dataset.") -tf.app.flags.DEFINE_string("dataset_path", "", - "Path to load the dataset from.") -tf.app.flags.DEFINE_integer("data_dimension", None, - "The dimension of each vector in the data sequence. " - "Defaults to 88 for pianoroll datasets and 200 for speech " - "datasets. Should not need to be changed except for " - "testing.") -tf.app.flags.DEFINE_integer("batch_size", 4, - "Batch size.") -tf.app.flags.DEFINE_integer("num_samples", 4, - "The number of samples (or particles) for multisample " - "algorithms.") -tf.app.flags.DEFINE_string("logdir", "/tmp/smc_vi", - "The directory to keep checkpoints and summaries in.") -tf.app.flags.DEFINE_integer("random_seed", None, - "A random seed for seeding the TensorFlow graph.") -tf.app.flags.DEFINE_integer("parallel_iterations", 30, - "The number of parallel iterations to use for the while " - "loop that computes the bounds.") - -# Training flags. -tf.app.flags.DEFINE_enum("bound", "fivo", - ["elbo", "iwae", "fivo", "fivo-aux"], - "The bound to optimize.") -tf.app.flags.DEFINE_boolean("normalize_by_seq_len", True, - "If true, normalize the loss by the number of timesteps " - "per sequence.") -tf.app.flags.DEFINE_float("learning_rate", 0.0002, - "The learning rate for ADAM.") -tf.app.flags.DEFINE_integer("max_steps", int(1e9), - "The number of gradient update steps to train for.") -tf.app.flags.DEFINE_integer("summarize_every", 50, - "The number of steps between summaries.") -tf.app.flags.DEFINE_enum("resampling_type", "multinomial", - ["multinomial", "relaxed"], - "The resampling strategy to use for training.") -tf.app.flags.DEFINE_float("relaxed_resampling_temperature", 0.5, - "The relaxation temperature for relaxed resampling.") -tf.app.flags.DEFINE_enum("proposal_type", "filtering", - ["prior", "filtering", "smoothing", - "true-filtering", "true-smoothing"], - "The type of proposal to use. true-filtering and true-smoothing " - "are only available for the GHMM. The specific implementation " - "of each proposal type is left to model-writers.") - -# Distributed training flags. -tf.app.flags.DEFINE_string("master", "", - "The BNS name of the TensorFlow master to use.") -tf.app.flags.DEFINE_integer("task", 0, - "Task id of the replica running the training.") -tf.app.flags.DEFINE_integer("ps_tasks", 0, - "Number of tasks in the ps job. If 0 no ps job is used.") -tf.app.flags.DEFINE_boolean("stagger_workers", True, - "If true, bring one worker online every 1000 steps.") - -# Evaluation flags. -tf.app.flags.DEFINE_enum("split", "train", - ["train", "test", "valid"], - "Split to evaluate the model on.") - -# Sampling flags. -tf.app.flags.DEFINE_integer("sample_length", 50, - "The number of timesteps to sample for.") -tf.app.flags.DEFINE_integer("prefix_length", 25, - "The number of timesteps to condition the model on " - "before sampling.") -tf.app.flags.DEFINE_string("sample_out_dir", None, - "The directory to write the samples to. " - "Defaults to logdir.") - -# GHMM flags. -tf.app.flags.DEFINE_float("variance", 0.1, - "The variance of the ghmm.") -tf.app.flags.DEFINE_integer("num_timesteps", 5, - "The number of timesteps to run the gmp for.") -FLAGS = tf.app.flags.FLAGS - -PIANOROLL_DEFAULT_DATA_DIMENSION = 88 -SPEECH_DEFAULT_DATA_DIMENSION = 200 - - -def main(unused_argv): - tf.logging.set_verbosity(tf.logging.INFO) - if FLAGS.model in ["vrnn", "srnn"]: - if FLAGS.data_dimension is None: - if FLAGS.dataset_type == "pianoroll": - FLAGS.data_dimension = PIANOROLL_DEFAULT_DATA_DIMENSION - elif FLAGS.dataset_type == "speech": - FLAGS.data_dimension = SPEECH_DEFAULT_DATA_DIMENSION - if FLAGS.mode == "train": - runners.run_train(FLAGS) - elif FLAGS.mode == "eval": - runners.run_eval(FLAGS) - elif FLAGS.mode == "sample": - runners.run_sample(FLAGS) - elif FLAGS.model == "ghmm": - if FLAGS.mode == "train": - ghmm_runners.run_train(FLAGS) - elif FLAGS.mode == "eval": - ghmm_runners.run_eval(FLAGS) - -if __name__ == "__main__": - tf.app.run(main) diff --git a/research/global_objectives/README.md b/research/global_objectives/README.md deleted file mode 100644 index f9a778c59d420f9bf5deccf4b2b45147636de582..0000000000000000000000000000000000000000 --- a/research/global_objectives/README.md +++ /dev/null @@ -1,152 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Global Objectives -The Global Objectives library provides TensorFlow loss functions that optimize -directly for a variety of objectives including AUC, recall at precision, and -more. The global objectives losses can be used as drop-in replacements for -TensorFlow's standard multilabel loss functions: -`tf.nn.sigmoid_cross_entropy_with_logits` and `tf.losses.sigmoid_cross_entropy`. - -Many machine learning classification models are optimized for classification -accuracy, when the real objective the user cares about is different and can be -precision at a fixed recall, precision-recall AUC, ROC AUC or similar metrics. -These are referred to as "global objectives" because they depend on how the -model classifies the dataset as a whole and do not decouple across data points -as accuracy does. - -Because these objectives are combinatorial, discontinuous, and essentially -intractable to optimize directly, the functions in this library approximate -their corresponding objectives. This approximation approach follows the same -pattern as optimizing for accuracy, where a surrogate objective such as -cross-entropy or the hinge loss is used as an upper bound on the error rate. - -## Getting Started -For a full example of how to use the loss functions in practice, see -loss_layers_example.py. - -Briefly, global objective losses can be used to replace -`tf.nn.sigmoid_cross_entropy_with_logits` by providing the relevant -additional arguments. For example, - -``` python -tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) -``` - -could be replaced with - -``` python -global_objectives.recall_at_precision_loss( - labels=labels, - logits=logits, - target_precision=0.95)[0] -``` - -Just as minimizing the cross-entropy loss will maximize accuracy, the loss -functions in loss_layers.py were written so that minimizing the loss will -maximize the corresponding objective. - -The global objective losses have two return values -- the loss tensor and -additional quantities for debugging and customization -- which is why the first -value is used above. For more information, see -[Visualization & Debugging](#visualization-debugging). - -## Binary Label Format -Binary classification problems can be represented as a multi-class problem with -two classes, or as a multi-label problem with one label. (Recall that multiclass -problems have mutually exclusive classes, e.g. 'cat xor dog', and multilabel -have classes which are not mutually exclusive, e.g. an image can contain a cat, -a dog, both, or neither.) The softmax loss -(`tf.nn.softmax_cross_entropy_with_logits`) is used for multi-class problems, -while the sigmoid loss (`tf.nn.sigmoid_cross_entropy_with_logits`) is used for -multi-label problems. - -A multiclass label format for binary classification might represent positives -with the label [1, 0] and negatives with the label [0, 1], while the multilbel -format for the same problem would use [1] and [0], respectively. - -All global objectives loss functions assume that the multilabel format is used. -Accordingly, if your current loss function is softmax, the labels will have to -be reformatted for the loss to work properly. - -## Dual Variables -Global objectives losses (except for `roc_auc_loss`) use internal variables -called dual variables or Lagrange multipliers to enforce the desired constraint -(e.g. if optimzing for recall at precision, the constraint is on precision). - -These dual variables are created and initialized internally by the loss -functions, and are updated during training by the same optimizer used for the -model's other variables. To initialize the dual variables to a particular value, -use the `lambdas_initializer` argument. The dual variables can be found under -the key `lambdas` in the `other_outputs` dictionary returned by the losses. - -## Loss Function Arguments -The following arguments are common to all loss functions in the library, and are -either required or very important. - -* `labels`: Corresponds directly to the `labels` argument of - `tf.nn.sigmoid_cross_entropy_with_logits`. -* `logits`: Corresponds directly to the `logits` argument of - `tf.nn.sigmoid_cross_entropy_with_logits`. -* `dual_rate_factor`: A floating point value which controls the step size for - the Lagrange multipliers. Setting this value less than 1.0 will cause the - constraint to be enforced more gradually and will result in more stable - training. - -In addition, the objectives with a single constraint (e.g. -`recall_at_precision_loss`) have an argument (e.g. `target_precision`) used to -specify the value of the constraint. The optional `precision_range` argument to -`precision_recall_auc_loss` is used to specify the range of precision values -over which to optimize the AUC, and defaults to the interval [0, 1]. - -Optional arguments: - -* `weights`: A tensor which acts as coefficients for the loss. If a weight of x - is provided for a datapoint and that datapoint is a true (false) positive - (negative), it will be counted as x true (false) positives (negatives). - Defaults to 1.0. -* `label_priors`: A tensor specifying the fraction of positive datapoints for - each label. If not provided, it will be computed inside the loss function. -* `surrogate_type`: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. -* `lambdas_initializer`: An initializer for the dual variables (Lagrange - multipliers). See also the Dual Variables section. -* `num_anchors` (precision_recall_auc_loss only): The number of grid points used - when approximating the AUC as a Riemann sum. - -## Hyperparameters -While the functional form of the global objectives losses allow them to be -easily substituted in place of `sigmoid_cross_entropy_with_logits`, model -hyperparameters such as learning rate, weight decay, etc. may need to be -fine-tuned to the new loss. Fortunately, the amount of hyperparameter re-tuning -is usually minor. - -The most important hyperparameters to modify are the learning rate and -dual_rate_factor (see the section on Loss Function Arguments, above). - -## Visualization & Debugging -The global objectives losses return two values. The first is a tensor -representing the numerical value of the loss, which can be passed to an -optimizer. The second is a dictionary of tensors created by the loss function -which are not necessary for optimization but useful in debugging. These vary -depending on the loss function, but usually include `lambdas` (the Lagrange -multipliers) as well as the lower bound on true positives and upper bound on -false positives. - -When visualizing the loss during training, note that the global objectives -losses differ from standard losses in some important ways: - -* The global losses may be negative. This is because the value returned by the - loss includes terms involving the Lagrange multipliers, which may be negative. -* The global losses may not decrease over the course of training. To enforce the - constraints in the objective, the loss changes over time and may increase. - -## More Info -For more details, see the [Global Objectives paper](https://arxiv.org/abs/1608.04802). - -## Maintainers - -* Mariano Schain -* Elad Eban -* [Alan Mackey](https://github.com/mackeya-google) diff --git a/research/global_objectives/loss_layers.py b/research/global_objectives/loss_layers.py deleted file mode 100644 index eaea05398ef3771247060afda63be184ea76cdf0..0000000000000000000000000000000000000000 --- a/research/global_objectives/loss_layers.py +++ /dev/null @@ -1,930 +0,0 @@ -# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Loss functions for learning global objectives. - -These functions have two return values: a Tensor with the value of -the loss, and a dictionary of internal quantities for customizability. -""" - -# Dependency imports -import numpy -import tensorflow as tf - -from global_objectives import util - - -def precision_recall_auc_loss( - labels, - logits, - precision_range=(0.0, 1.0), - num_anchors=20, - weights=1.0, - dual_rate_factor=0.1, - label_priors=None, - surrogate_type='xent', - lambdas_initializer=tf.constant_initializer(1.0), - reuse=None, - variables_collections=None, - trainable=True, - scope=None): - """Computes precision-recall AUC loss. - - The loss is based on a sum of losses for recall at a range of - precision values (anchor points). This sum is a Riemann sum that - approximates the area under the precision-recall curve. - - The per-example `weights` argument changes not only the coefficients of - individual training examples, but how the examples are counted toward the - constraint. If `label_priors` is given, it MUST take `weights` into account. - That is, - label_priors = P / (P + N) - where - P = sum_i (wt_i on positives) - N = sum_i (wt_i on negatives). - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` with the same shape as `labels`. - precision_range: A length-two tuple, the range of precision values over - which to compute AUC. The entries must be nonnegative, increasing, and - less than or equal to 1.0. - num_anchors: The number of grid points used to approximate the Riemann sum. - weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape - [batch_size] or [batch_size, num_labels]. - dual_rate_factor: A floating point value which controls the step size for - the Lagrange multipliers. - label_priors: None, or a floating point `Tensor` of shape [num_labels] - containing the prior probability of each label (i.e. the fraction of the - training data consisting of positive examples). If None, the label - priors are computed from `labels` with a moving average. See the notes - above regarding the interaction with `weights` and do not set this unless - you have a good reason to do so. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. - lambdas_initializer: An initializer for the Lagrange multipliers. - reuse: Whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - variables_collections: Optional list of collections for the variables. - trainable: If `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - scope: Optional scope for `variable_scope`. - - Returns: - loss: A `Tensor` of the same shape as `logits` with the component-wise - loss. - other_outputs: A dictionary of useful internal quantities for debugging. For - more details, see http://arxiv.org/pdf/1608.04802.pdf. - lambdas: A Tensor of shape [1, num_labels, num_anchors] consisting of the - Lagrange multipliers. - biases: A Tensor of shape [1, num_labels, num_anchors] consisting of the - learned bias term for each. - label_priors: A Tensor of shape [1, num_labels, 1] consisting of the prior - probability of each label learned by the loss, if not provided. - true_positives_lower_bound: Lower bound on the number of true positives - given `labels` and `logits`. This is the same lower bound which is used - in the loss expression to be optimized. - false_positives_upper_bound: Upper bound on the number of false positives - given `labels` and `logits`. This is the same upper bound which is used - in the loss expression to be optimized. - - Raises: - ValueError: If `surrogate_type` is not `xent` or `hinge`. - """ - with tf.variable_scope(scope, - 'precision_recall_auc', - [labels, logits, label_priors], - reuse=reuse): - labels, logits, weights, original_shape = _prepare_labels_logits_weights( - labels, logits, weights) - num_labels = util.get_num_labels(logits) - - # Convert other inputs to tensors and standardize dtypes. - dual_rate_factor = util.convert_and_cast( - dual_rate_factor, 'dual_rate_factor', logits.dtype) - - # Create Tensor of anchor points and distance between anchors. - precision_values, delta = _range_to_anchors_and_delta( - precision_range, num_anchors, logits.dtype) - # Create lambdas with shape [1, num_labels, num_anchors]. - lambdas, lambdas_variable = _create_dual_variable( - 'lambdas', - shape=[1, num_labels, num_anchors], - dtype=logits.dtype, - initializer=lambdas_initializer, - collections=variables_collections, - trainable=trainable, - dual_rate_factor=dual_rate_factor) - # Create biases with shape [1, num_labels, num_anchors]. - biases = tf.contrib.framework.model_variable( - name='biases', - shape=[1, num_labels, num_anchors], - dtype=logits.dtype, - initializer=tf.zeros_initializer(), - collections=variables_collections, - trainable=trainable) - # Maybe create label_priors. - label_priors = maybe_create_label_priors( - label_priors, labels, weights, variables_collections) - label_priors = tf.reshape(label_priors, [1, num_labels, 1]) - - # Expand logits, labels, and weights to shape [batch_size, num_labels, 1]. - logits = tf.expand_dims(logits, 2) - labels = tf.expand_dims(labels, 2) - weights = tf.expand_dims(weights, 2) - - # Calculate weighted loss and other outputs. The log(2.0) term corrects for - # logloss not being an upper bound on the indicator function. - loss = weights * util.weighted_surrogate_loss( - labels, - logits + biases, - surrogate_type=surrogate_type, - positive_weights=1.0 + lambdas * (1.0 - precision_values), - negative_weights=lambdas * precision_values) - maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 - maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) - lambda_term = lambdas * (1.0 - precision_values) * label_priors * maybe_log2 - per_anchor_loss = loss - lambda_term - per_label_loss = delta * tf.reduce_sum(per_anchor_loss, 2) - # Normalize the AUC such that a perfect score function will have AUC 1.0. - # Because precision_range is discretized into num_anchors + 1 intervals - # but only num_anchors terms are included in the Riemann sum, the - # effective length of the integration interval is `delta` less than the - # length of precision_range. - scaled_loss = tf.div(per_label_loss, - precision_range[1] - precision_range[0] - delta, - name='AUC_Normalize') - scaled_loss = tf.reshape(scaled_loss, original_shape) - - other_outputs = { - 'lambdas': lambdas_variable, - 'biases': biases, - 'label_priors': label_priors, - 'true_positives_lower_bound': true_positives_lower_bound( - labels, logits, weights, surrogate_type), - 'false_positives_upper_bound': false_positives_upper_bound( - labels, logits, weights, surrogate_type)} - - return scaled_loss, other_outputs - - -def roc_auc_loss( - labels, - logits, - weights=1.0, - surrogate_type='xent', - scope=None): - """Computes ROC AUC loss. - - The area under the ROC curve is the probability p that a randomly chosen - positive example will be scored higher than a randomly chosen negative - example. This loss approximates 1-p by using a surrogate (either hinge loss or - cross entropy) for the indicator function. Specifically, the loss is: - - sum_i sum_j w_i*w_j*loss(logit_i - logit_j) - - where i ranges over the positive datapoints, j ranges over the negative - datapoints, logit_k denotes the logit (or score) of the k-th datapoint, and - loss is either the hinge or log loss given a positive label. - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` with the same shape and dtype as `labels`. - weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape - [batch_size] or [batch_size, num_labels]. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for the indicator function. - scope: Optional scope for `name_scope`. - - Returns: - loss: A `Tensor` of the same shape as `logits` with the component-wise loss. - other_outputs: An empty dictionary, for consistency. - - Raises: - ValueError: If `surrogate_type` is not `xent` or `hinge`. - """ - with tf.name_scope(scope, 'roc_auc', [labels, logits, weights]): - # Convert inputs to tensors and standardize dtypes. - labels, logits, weights, original_shape = _prepare_labels_logits_weights( - labels, logits, weights) - - # Create tensors of pairwise differences for logits and labels, and - # pairwise products of weights. These have shape - # [batch_size, batch_size, num_labels]. - logits_difference = tf.expand_dims(logits, 0) - tf.expand_dims(logits, 1) - labels_difference = tf.expand_dims(labels, 0) - tf.expand_dims(labels, 1) - weights_product = tf.expand_dims(weights, 0) * tf.expand_dims(weights, 1) - - signed_logits_difference = labels_difference * logits_difference - raw_loss = util.weighted_surrogate_loss( - labels=tf.ones_like(signed_logits_difference), - logits=signed_logits_difference, - surrogate_type=surrogate_type) - weighted_loss = weights_product * raw_loss - - # Zero out entries of the loss where labels_difference zero (so loss is only - # computed on pairs with different labels). - loss = tf.reduce_mean(tf.abs(labels_difference) * weighted_loss, 0) * 0.5 - loss = tf.reshape(loss, original_shape) - return loss, {} - - -def recall_at_precision_loss( - labels, - logits, - target_precision, - weights=1.0, - dual_rate_factor=0.1, - label_priors=None, - surrogate_type='xent', - lambdas_initializer=tf.constant_initializer(1.0), - reuse=None, - variables_collections=None, - trainable=True, - scope=None): - """Computes recall at precision loss. - - The loss is based on a surrogate of the form - wt * w(+) * loss(+) + wt * w(-) * loss(-) - c * pi, - where: - - w(+) = 1 + lambdas * (1 - target_precision) - - loss(+) is the cross-entropy loss on the positive examples - - w(-) = lambdas * target_precision - - loss(-) is the cross-entropy loss on the negative examples - - wt is a scalar or tensor of per-example weights - - c = lambdas * (1 - target_precision) - - pi is the label_priors. - - The per-example weights change not only the coefficients of individual - training examples, but how the examples are counted toward the constraint. - If `label_priors` is given, it MUST take `weights` into account. That is, - label_priors = P / (P + N) - where - P = sum_i (wt_i on positives) - N = sum_i (wt_i on negatives). - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` with the same shape as `labels`. - target_precision: The precision at which to compute the loss. Can be a - floating point value between 0 and 1 for a single precision value, or a - `Tensor` of shape [num_labels], holding each label's target precision - value. - weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape - [batch_size] or [batch_size, num_labels]. - dual_rate_factor: A floating point value which controls the step size for - the Lagrange multipliers. - label_priors: None, or a floating point `Tensor` of shape [num_labels] - containing the prior probability of each label (i.e. the fraction of the - training data consisting of positive examples). If None, the label - priors are computed from `labels` with a moving average. See the notes - above regarding the interaction with `weights` and do not set this unless - you have a good reason to do so. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. - lambdas_initializer: An initializer for the Lagrange multipliers. - reuse: Whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - variables_collections: Optional list of collections for the variables. - trainable: If `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - scope: Optional scope for `variable_scope`. - - Returns: - loss: A `Tensor` of the same shape as `logits` with the component-wise - loss. - other_outputs: A dictionary of useful internal quantities for debugging. For - more details, see http://arxiv.org/pdf/1608.04802.pdf. - lambdas: A Tensor of shape [num_labels] consisting of the Lagrange - multipliers. - label_priors: A Tensor of shape [num_labels] consisting of the prior - probability of each label learned by the loss, if not provided. - true_positives_lower_bound: Lower bound on the number of true positives - given `labels` and `logits`. This is the same lower bound which is used - in the loss expression to be optimized. - false_positives_upper_bound: Upper bound on the number of false positives - given `labels` and `logits`. This is the same upper bound which is used - in the loss expression to be optimized. - - Raises: - ValueError: If `logits` and `labels` do not have the same shape. - """ - with tf.variable_scope(scope, - 'recall_at_precision', - [logits, labels, label_priors], - reuse=reuse): - labels, logits, weights, original_shape = _prepare_labels_logits_weights( - labels, logits, weights) - num_labels = util.get_num_labels(logits) - - # Convert other inputs to tensors and standardize dtypes. - target_precision = util.convert_and_cast( - target_precision, 'target_precision', logits.dtype) - dual_rate_factor = util.convert_and_cast( - dual_rate_factor, 'dual_rate_factor', logits.dtype) - - # Create lambdas. - lambdas, lambdas_variable = _create_dual_variable( - 'lambdas', - shape=[num_labels], - dtype=logits.dtype, - initializer=lambdas_initializer, - collections=variables_collections, - trainable=trainable, - dual_rate_factor=dual_rate_factor) - # Maybe create label_priors. - label_priors = maybe_create_label_priors( - label_priors, labels, weights, variables_collections) - - # Calculate weighted loss and other outputs. The log(2.0) term corrects for - # logloss not being an upper bound on the indicator function. - weighted_loss = weights * util.weighted_surrogate_loss( - labels, - logits, - surrogate_type=surrogate_type, - positive_weights=1.0 + lambdas * (1.0 - target_precision), - negative_weights=lambdas * target_precision) - maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 - maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) - lambda_term = lambdas * (1.0 - target_precision) * label_priors * maybe_log2 - loss = tf.reshape(weighted_loss - lambda_term, original_shape) - other_outputs = { - 'lambdas': lambdas_variable, - 'label_priors': label_priors, - 'true_positives_lower_bound': true_positives_lower_bound( - labels, logits, weights, surrogate_type), - 'false_positives_upper_bound': false_positives_upper_bound( - labels, logits, weights, surrogate_type)} - - return loss, other_outputs - - -def precision_at_recall_loss( - labels, - logits, - target_recall, - weights=1.0, - dual_rate_factor=0.1, - label_priors=None, - surrogate_type='xent', - lambdas_initializer=tf.constant_initializer(1.0), - reuse=None, - variables_collections=None, - trainable=True, - scope=None): - """Computes precision at recall loss. - - The loss is based on a surrogate of the form - wt * loss(-) + lambdas * (pi * (b - 1) + wt * loss(+)) - where: - - loss(-) is the cross-entropy loss on the negative examples - - loss(+) is the cross-entropy loss on the positive examples - - wt is a scalar or tensor of per-example weights - - b is the target recall - - pi is the label_priors. - - The per-example weights change not only the coefficients of individual - training examples, but how the examples are counted toward the constraint. - If `label_priors` is given, it MUST take `weights` into account. That is, - label_priors = P / (P + N) - where - P = sum_i (wt_i on positives) - N = sum_i (wt_i on negatives). - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` with the same shape as `labels`. - target_recall: The recall at which to compute the loss. Can be a floating - point value between 0 and 1 for a single target recall value, or a - `Tensor` of shape [num_labels] holding each label's target recall value. - weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape - [batch_size] or [batch_size, num_labels]. - dual_rate_factor: A floating point value which controls the step size for - the Lagrange multipliers. - label_priors: None, or a floating point `Tensor` of shape [num_labels] - containing the prior probability of each label (i.e. the fraction of the - training data consisting of positive examples). If None, the label - priors are computed from `labels` with a moving average. See the notes - above regarding the interaction with `weights` and do not set this unless - you have a good reason to do so. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. - lambdas_initializer: An initializer for the Lagrange multipliers. - reuse: Whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - variables_collections: Optional list of collections for the variables. - trainable: If `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - scope: Optional scope for `variable_scope`. - - Returns: - loss: A `Tensor` of the same shape as `logits` with the component-wise - loss. - other_outputs: A dictionary of useful internal quantities for debugging. For - more details, see http://arxiv.org/pdf/1608.04802.pdf. - lambdas: A Tensor of shape [num_labels] consisting of the Lagrange - multipliers. - label_priors: A Tensor of shape [num_labels] consisting of the prior - probability of each label learned by the loss, if not provided. - true_positives_lower_bound: Lower bound on the number of true positives - given `labels` and `logits`. This is the same lower bound which is used - in the loss expression to be optimized. - false_positives_upper_bound: Upper bound on the number of false positives - given `labels` and `logits`. This is the same upper bound which is used - in the loss expression to be optimized. - """ - with tf.variable_scope(scope, - 'precision_at_recall', - [logits, labels, label_priors], - reuse=reuse): - labels, logits, weights, original_shape = _prepare_labels_logits_weights( - labels, logits, weights) - num_labels = util.get_num_labels(logits) - - # Convert other inputs to tensors and standardize dtypes. - target_recall = util.convert_and_cast( - target_recall, 'target_recall', logits.dtype) - dual_rate_factor = util.convert_and_cast( - dual_rate_factor, 'dual_rate_factor', logits.dtype) - - # Create lambdas. - lambdas, lambdas_variable = _create_dual_variable( - 'lambdas', - shape=[num_labels], - dtype=logits.dtype, - initializer=lambdas_initializer, - collections=variables_collections, - trainable=trainable, - dual_rate_factor=dual_rate_factor) - # Maybe create label_priors. - label_priors = maybe_create_label_priors( - label_priors, labels, weights, variables_collections) - - # Calculate weighted loss and other outputs. The log(2.0) term corrects for - # logloss not being an upper bound on the indicator function. - weighted_loss = weights * util.weighted_surrogate_loss( - labels, - logits, - surrogate_type, - positive_weights=lambdas, - negative_weights=1.0) - maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 - maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) - lambda_term = lambdas * label_priors * (target_recall - 1.0) * maybe_log2 - loss = tf.reshape(weighted_loss + lambda_term, original_shape) - other_outputs = { - 'lambdas': lambdas_variable, - 'label_priors': label_priors, - 'true_positives_lower_bound': true_positives_lower_bound( - labels, logits, weights, surrogate_type), - 'false_positives_upper_bound': false_positives_upper_bound( - labels, logits, weights, surrogate_type)} - - return loss, other_outputs - - -def false_positive_rate_at_true_positive_rate_loss( - labels, - logits, - target_rate, - weights=1.0, - dual_rate_factor=0.1, - label_priors=None, - surrogate_type='xent', - lambdas_initializer=tf.constant_initializer(1.0), - reuse=None, - variables_collections=None, - trainable=True, - scope=None): - """Computes false positive rate at true positive rate loss. - - Note that `true positive rate` is a synonym for Recall, and that minimizing - the false positive rate and maximizing precision are equivalent for a fixed - Recall. Therefore, this function is identical to precision_at_recall_loss. - - The per-example weights change not only the coefficients of individual - training examples, but how the examples are counted toward the constraint. - If `label_priors` is given, it MUST take `weights` into account. That is, - label_priors = P / (P + N) - where - P = sum_i (wt_i on positives) - N = sum_i (wt_i on negatives). - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` with the same shape as `labels`. - target_rate: The true positive rate at which to compute the loss. Can be a - floating point value between 0 and 1 for a single true positive rate, or - a `Tensor` of shape [num_labels] holding each label's true positive rate. - weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape - [batch_size] or [batch_size, num_labels]. - dual_rate_factor: A floating point value which controls the step size for - the Lagrange multipliers. - label_priors: None, or a floating point `Tensor` of shape [num_labels] - containing the prior probability of each label (i.e. the fraction of the - training data consisting of positive examples). If None, the label - priors are computed from `labels` with a moving average. See the notes - above regarding the interaction with `weights` and do not set this unless - you have a good reason to do so. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. 'xent' will use the cross-entropy - loss surrogate, and 'hinge' will use the hinge loss. - lambdas_initializer: An initializer op for the Lagrange multipliers. - reuse: Whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - variables_collections: Optional list of collections for the variables. - trainable: If `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - scope: Optional scope for `variable_scope`. - - Returns: - loss: A `Tensor` of the same shape as `logits` with the component-wise - loss. - other_outputs: A dictionary of useful internal quantities for debugging. For - more details, see http://arxiv.org/pdf/1608.04802.pdf. - lambdas: A Tensor of shape [num_labels] consisting of the Lagrange - multipliers. - label_priors: A Tensor of shape [num_labels] consisting of the prior - probability of each label learned by the loss, if not provided. - true_positives_lower_bound: Lower bound on the number of true positives - given `labels` and `logits`. This is the same lower bound which is used - in the loss expression to be optimized. - false_positives_upper_bound: Upper bound on the number of false positives - given `labels` and `logits`. This is the same upper bound which is used - in the loss expression to be optimized. - - Raises: - ValueError: If `surrogate_type` is not `xent` or `hinge`. - """ - return precision_at_recall_loss(labels=labels, - logits=logits, - target_recall=target_rate, - weights=weights, - dual_rate_factor=dual_rate_factor, - label_priors=label_priors, - surrogate_type=surrogate_type, - lambdas_initializer=lambdas_initializer, - reuse=reuse, - variables_collections=variables_collections, - trainable=trainable, - scope=scope) - - -def true_positive_rate_at_false_positive_rate_loss( - labels, - logits, - target_rate, - weights=1.0, - dual_rate_factor=0.1, - label_priors=None, - surrogate_type='xent', - lambdas_initializer=tf.constant_initializer(1.0), - reuse=None, - variables_collections=None, - trainable=True, - scope=None): - """Computes true positive rate at false positive rate loss. - - The loss is based on a surrogate of the form - wt * loss(+) + lambdas * (wt * loss(-) - r * (1 - pi)) - where: - - loss(-) is the loss on the negative examples - - loss(+) is the loss on the positive examples - - wt is a scalar or tensor of per-example weights - - r is the target rate - - pi is the label_priors. - - The per-example weights change not only the coefficients of individual - training examples, but how the examples are counted toward the constraint. - If `label_priors` is given, it MUST take `weights` into account. That is, - label_priors = P / (P + N) - where - P = sum_i (wt_i on positives) - N = sum_i (wt_i on negatives). - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` with the same shape as `labels`. - target_rate: The false positive rate at which to compute the loss. Can be a - floating point value between 0 and 1 for a single false positive rate, or - a `Tensor` of shape [num_labels] holding each label's false positive rate. - weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape - [batch_size] or [batch_size, num_labels]. - dual_rate_factor: A floating point value which controls the step size for - the Lagrange multipliers. - label_priors: None, or a floating point `Tensor` of shape [num_labels] - containing the prior probability of each label (i.e. the fraction of the - training data consisting of positive examples). If None, the label - priors are computed from `labels` with a moving average. See the notes - above regarding the interaction with `weights` and do not set this unless - you have a good reason to do so. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. 'xent' will use the cross-entropy - loss surrogate, and 'hinge' will use the hinge loss. - lambdas_initializer: An initializer op for the Lagrange multipliers. - reuse: Whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - variables_collections: Optional list of collections for the variables. - trainable: If `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - scope: Optional scope for `variable_scope`. - - Returns: - loss: A `Tensor` of the same shape as `logits` with the component-wise - loss. - other_outputs: A dictionary of useful internal quantities for debugging. For - more details, see http://arxiv.org/pdf/1608.04802.pdf. - lambdas: A Tensor of shape [num_labels] consisting of the Lagrange - multipliers. - label_priors: A Tensor of shape [num_labels] consisting of the prior - probability of each label learned by the loss, if not provided. - true_positives_lower_bound: Lower bound on the number of true positives - given `labels` and `logits`. This is the same lower bound which is used - in the loss expression to be optimized. - false_positives_upper_bound: Upper bound on the number of false positives - given `labels` and `logits`. This is the same upper bound which is used - in the loss expression to be optimized. - - Raises: - ValueError: If `surrogate_type` is not `xent` or `hinge`. - """ - with tf.variable_scope(scope, - 'tpr_at_fpr', - [labels, logits, label_priors], - reuse=reuse): - labels, logits, weights, original_shape = _prepare_labels_logits_weights( - labels, logits, weights) - num_labels = util.get_num_labels(logits) - - # Convert other inputs to tensors and standardize dtypes. - target_rate = util.convert_and_cast( - target_rate, 'target_rate', logits.dtype) - dual_rate_factor = util.convert_and_cast( - dual_rate_factor, 'dual_rate_factor', logits.dtype) - - # Create lambdas. - lambdas, lambdas_variable = _create_dual_variable( - 'lambdas', - shape=[num_labels], - dtype=logits.dtype, - initializer=lambdas_initializer, - collections=variables_collections, - trainable=trainable, - dual_rate_factor=dual_rate_factor) - # Maybe create label_priors. - label_priors = maybe_create_label_priors( - label_priors, labels, weights, variables_collections) - - # Loss op and other outputs. The log(2.0) term corrects for - # logloss not being an upper bound on the indicator function. - weighted_loss = weights * util.weighted_surrogate_loss( - labels, - logits, - surrogate_type=surrogate_type, - positive_weights=1.0, - negative_weights=lambdas) - maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 - maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) - lambda_term = lambdas * target_rate * (1.0 - label_priors) * maybe_log2 - loss = tf.reshape(weighted_loss - lambda_term, original_shape) - other_outputs = { - 'lambdas': lambdas_variable, - 'label_priors': label_priors, - 'true_positives_lower_bound': true_positives_lower_bound( - labels, logits, weights, surrogate_type), - 'false_positives_upper_bound': false_positives_upper_bound( - labels, logits, weights, surrogate_type)} - - return loss, other_outputs - - -def _prepare_labels_logits_weights(labels, logits, weights): - """Validates labels, logits, and weights. - - Converts inputs to tensors, checks shape compatibility, and casts dtype if - necessary. - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` with the same shape as `labels`. - weights: Either `None` or a `Tensor` with shape broadcastable to `logits`. - - Returns: - labels: Same as `labels` arg after possible conversion to tensor, cast, and - reshape. - logits: Same as `logits` arg after possible conversion to tensor and - reshape. - weights: Same as `weights` arg after possible conversion, cast, and reshape. - original_shape: Shape of `labels` and `logits` before reshape. - - Raises: - ValueError: If `labels` and `logits` do not have the same shape. - """ - # Convert `labels` and `logits` to Tensors and standardize dtypes. - logits = tf.convert_to_tensor(logits, name='logits') - labels = util.convert_and_cast(labels, 'labels', logits.dtype.base_dtype) - weights = util.convert_and_cast(weights, 'weights', logits.dtype.base_dtype) - - try: - labels.get_shape().merge_with(logits.get_shape()) - except ValueError: - raise ValueError('logits and labels must have the same shape (%s vs %s)' % - (logits.get_shape(), labels.get_shape())) - - original_shape = labels.get_shape().as_list() - if labels.get_shape().ndims > 0: - original_shape[0] = -1 - if labels.get_shape().ndims <= 1: - labels = tf.reshape(labels, [-1, 1]) - logits = tf.reshape(logits, [-1, 1]) - - if weights.get_shape().ndims == 1: - # Weights has shape [batch_size]. Reshape to [batch_size, 1]. - weights = tf.reshape(weights, [-1, 1]) - if weights.get_shape().ndims == 0: - # Weights is a scalar. Change shape of weights to match logits. - weights *= tf.ones_like(logits) - - return labels, logits, weights, original_shape - - -def _range_to_anchors_and_delta(precision_range, num_anchors, dtype): - """Calculates anchor points from precision range. - - Args: - precision_range: As required in precision_recall_auc_loss. - num_anchors: int, number of equally spaced anchor points. - dtype: Data type of returned tensors. - - Returns: - precision_values: A `Tensor` of data type dtype with equally spaced values - in the interval precision_range. - delta: The spacing between the values in precision_values. - - Raises: - ValueError: If precision_range is invalid. - """ - # Validate precision_range. - if not 0 <= precision_range[0] <= precision_range[-1] <= 1: - raise ValueError('precision values must obey 0 <= %f <= %f <= 1' % - (precision_range[0], precision_range[-1])) - if not 0 < len(precision_range) < 3: - raise ValueError('length of precision_range (%d) must be 1 or 2' % - len(precision_range)) - - # Sets precision_values uniformly between min_precision and max_precision. - values = numpy.linspace(start=precision_range[0], - stop=precision_range[1], - num=num_anchors+2)[1:-1] - precision_values = util.convert_and_cast( - values, 'precision_values', dtype) - delta = util.convert_and_cast( - values[0] - precision_range[0], 'delta', dtype) - # Makes precision_values [1, 1, num_anchors]. - precision_values = util.expand_outer(precision_values, 3) - return precision_values, delta - - -def _create_dual_variable(name, shape, dtype, initializer, collections, - trainable, dual_rate_factor): - """Creates a new dual variable. - - Dual variables are required to be nonnegative. If trainable, their gradient - is reversed so that they are maximized (rather than minimized) by the - optimizer. - - Args: - name: A string, the name for the new variable. - shape: Shape of the new variable. - dtype: Data type for the new variable. - initializer: Initializer for the new variable. - collections: List of graph collections keys. The new variable is added to - these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. - trainable: If `True`, the default, also adds the variable to the graph - collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as - the default list of variables to use by the `Optimizer` classes. - dual_rate_factor: A floating point value or `Tensor`. The learning rate for - the dual variable is scaled by this factor. - - Returns: - dual_value: An op that computes the absolute value of the dual variable - and reverses its gradient. - dual_variable: The underlying variable itself. - """ - # We disable partitioning while constructing dual variables because they will - # be updated with assign, which is not available for partitioned variables. - partitioner = tf.get_variable_scope().partitioner - try: - tf.get_variable_scope().set_partitioner(None) - dual_variable = tf.contrib.framework.model_variable( - name=name, - shape=shape, - dtype=dtype, - initializer=initializer, - collections=collections, - trainable=trainable) - finally: - tf.get_variable_scope().set_partitioner(partitioner) - # Using the absolute value enforces nonnegativity. - dual_value = tf.abs(dual_variable) - - if trainable: - # To reverse the gradient on the dual variable, multiply the gradient by - # -dual_rate_factor - dual_value = (tf.stop_gradient((1.0 + dual_rate_factor) * dual_value) - - dual_rate_factor * dual_value) - return dual_value, dual_variable - - -def maybe_create_label_priors(label_priors, - labels, - weights, - variables_collections): - """Creates moving average ops to track label priors, if necessary. - - Args: - label_priors: As required in e.g. precision_recall_auc_loss. - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - weights: As required in e.g. precision_recall_auc_loss. - variables_collections: Optional list of collections for the variables, if - any must be created. - - Returns: - label_priors: A Tensor of shape [num_labels] consisting of the - weighted label priors, after updating with moving average ops if created. - """ - if label_priors is not None: - label_priors = util.convert_and_cast( - label_priors, name='label_priors', dtype=labels.dtype.base_dtype) - return tf.squeeze(label_priors) - - label_priors = util.build_label_priors( - labels, - weights, - variables_collections=variables_collections) - return label_priors - - -def true_positives_lower_bound(labels, logits, weights, surrogate_type): - """Calculate a lower bound on the number of true positives. - - This lower bound on the number of true positives given `logits` and `labels` - is the same one used in the global objectives loss functions. - - Args: - labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. - logits: A `Tensor` of shape [batch_size, num_labels] or - [batch_size, num_labels, num_anchors]. If the third dimension is present, - the lower bound is computed on each slice [:, :, k] independently. - weights: Per-example loss coefficients, with shape broadcast-compatible with - that of `labels`. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. - - Returns: - A `Tensor` of shape [num_labels] or [num_labels, num_anchors]. - """ - maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 - maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) - if logits.get_shape().ndims == 3 and labels.get_shape().ndims < 3: - labels = tf.expand_dims(labels, 2) - loss_on_positives = util.weighted_surrogate_loss( - labels, logits, surrogate_type, negative_weights=0.0) / maybe_log2 - return tf.reduce_sum(weights * (labels - loss_on_positives), 0) - - -def false_positives_upper_bound(labels, logits, weights, surrogate_type): - """Calculate an upper bound on the number of false positives. - - This upper bound on the number of false positives given `logits` and `labels` - is the same one used in the global objectives loss functions. - - Args: - labels: A `Tensor` of shape [batch_size, num_labels] - logits: A `Tensor` of shape [batch_size, num_labels] or - [batch_size, num_labels, num_anchors]. If the third dimension is present, - the lower bound is computed on each slice [:, :, k] independently. - weights: Per-example loss coefficients, with shape broadcast-compatible with - that of `labels`. - surrogate_type: Either 'xent' or 'hinge', specifying which upper bound - should be used for indicator functions. - - Returns: - A `Tensor` of shape [num_labels] or [num_labels, num_anchors]. - """ - maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 - maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) - loss_on_negatives = util.weighted_surrogate_loss( - labels, logits, surrogate_type, positive_weights=0.0) / maybe_log2 - return tf.reduce_sum(weights * loss_on_negatives, 0) diff --git a/research/global_objectives/loss_layers_example.py b/research/global_objectives/loss_layers_example.py deleted file mode 100644 index 2323cb0762e7f4eade8f283162be61cc45513d49..0000000000000000000000000000000000000000 --- a/research/global_objectives/loss_layers_example.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Example for using global objectives. - -Illustrate, using synthetic data, how using the precision_at_recall loss -significanly improves the performace of a linear classifier. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports -import numpy as np -from sklearn.metrics import precision_score -import tensorflow as tf -from global_objectives import loss_layers - -# When optimizing using global_objectives, if set to True then the saddle point -# optimization steps are performed internally by the Tensorflow optimizer, -# otherwise by dedicated saddle-point steps as part of the optimization loop. -USE_GO_SADDLE_POINT_OPT = False - -TARGET_RECALL = 0.98 -TRAIN_ITERATIONS = 150 -LEARNING_RATE = 1.0 -GO_DUAL_RATE_FACTOR = 15.0 -NUM_CHECKPOINTS = 6 - -EXPERIMENT_DATA_CONFIG = { - 'positives_centers': [[0, 1.0], [1, -0.5]], - 'negatives_centers': [[0, -0.5], [1, 1.0]], - 'positives_variances': [0.15, 0.1], - 'negatives_variances': [0.15, 0.1], - 'positives_counts': [500, 50], - 'negatives_counts': [3000, 100] -} - - -def create_training_and_eval_data_for_experiment(**data_config): - """Creates train and eval data sets. - - Note: The synthesized binary-labeled data is a mixture of four Gaussians - two - positives and two negatives. The centers, variances, and sizes for each of - the two positives and negatives mixtures are passed in the respective keys - of data_config: - - Args: - **data_config: Dictionary with Array entries as follows: - positives_centers - float [2,2] two centers of positives data sets. - negatives_centers - float [2,2] two centers of negatives data sets. - positives_variances - float [2] Variances for the positives sets. - negatives_variances - float [2] Variances for the negatives sets. - positives_counts - int [2] Counts for each of the two positives sets. - negatives_counts - int [2] Counts for each of the two negatives sets. - - Returns: - A dictionary with two shuffled data sets created - one for training and one - for eval. The dictionary keys are 'train_data', 'train_labels', 'eval_data', - and 'eval_labels'. The data points are two-dimentional floats, and the - labels are in {0,1}. - """ - def data_points(is_positives, index): - variance = data_config['positives_variances' - if is_positives else 'negatives_variances'][index] - center = data_config['positives_centers' - if is_positives else 'negatives_centers'][index] - count = data_config['positives_counts' - if is_positives else 'negatives_counts'][index] - return variance*np.random.randn(count, 2) + np.array([center]) - - def create_data(): - return np.concatenate([data_points(False, 0), - data_points(True, 0), - data_points(True, 1), - data_points(False, 1)], axis=0) - - def create_labels(): - """Creates an array of 0.0 or 1.0 labels for the data_config batches.""" - return np.array([0.0]*data_config['negatives_counts'][0] + - [1.0]*data_config['positives_counts'][0] + - [1.0]*data_config['positives_counts'][1] + - [0.0]*data_config['negatives_counts'][1]) - - permutation = np.random.permutation( - sum(data_config['positives_counts'] + data_config['negatives_counts'])) - - train_data = create_data()[permutation, :] - eval_data = create_data()[permutation, :] - train_labels = create_labels()[permutation] - eval_labels = create_labels()[permutation] - - return { - 'train_data': train_data, - 'train_labels': train_labels, - 'eval_data': eval_data, - 'eval_labels': eval_labels - } - - -def train_model(data, use_global_objectives): - """Trains a linear model for maximal accuracy or precision at given recall.""" - - def precision_at_recall(scores, labels, target_recall): - """Computes precision - at target recall - over data.""" - positive_scores = scores[labels == 1.0] - threshold = np.percentile(positive_scores, 100 - target_recall*100) - predicted = scores >= threshold - return precision_score(labels, predicted) - - w = tf.Variable(tf.constant([-1.0, -1.0], shape=[2, 1]), trainable=True, - name='weights', dtype=tf.float32) - b = tf.Variable(tf.zeros([1]), trainable=True, name='biases', - dtype=tf.float32) - - logits = tf.matmul(tf.cast(data['train_data'], tf.float32), w) + b - - labels = tf.constant( - data['train_labels'], - shape=[len(data['train_labels']), 1], - dtype=tf.float32) - - if use_global_objectives: - loss, other_outputs = loss_layers.precision_at_recall_loss( - labels, logits, - TARGET_RECALL, - dual_rate_factor=GO_DUAL_RATE_FACTOR) - loss = tf.reduce_mean(loss) - else: - loss = tf.reduce_mean( - tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)) - - global_step = tf.Variable(0, trainable=False) - - learning_rate = tf.train.polynomial_decay( - LEARNING_RATE, - global_step, - TRAIN_ITERATIONS, (LEARNING_RATE / TRAIN_ITERATIONS), - power=1.0, - cycle=False, - name='learning_rate') - - optimizer = tf.train.GradientDescentOptimizer(learning_rate) - - if (not use_global_objectives) or USE_GO_SADDLE_POINT_OPT: - training_op = optimizer.minimize(loss, global_step=global_step) - else: - lambdas = other_outputs['lambdas'] - primal_update_op = optimizer.minimize(loss, var_list=[w, b]) - dual_update_op = optimizer.minimize( - loss, global_step=global_step, var_list=[lambdas]) - - # Training loop: - with tf.Session() as sess: - checkpoint_step = TRAIN_ITERATIONS // NUM_CHECKPOINTS - sess.run(tf.global_variables_initializer()) - step = sess.run(global_step) - - while step <= TRAIN_ITERATIONS: - if (not use_global_objectives) or USE_GO_SADDLE_POINT_OPT: - _, step, loss_value, w_value, b_value = sess.run( - [training_op, global_step, loss, w, b]) - else: - _, w_value, b_value = sess.run([primal_update_op, w, b]) - _, loss_value, step = sess.run([dual_update_op, loss, global_step]) - - if use_global_objectives: - go_outputs = sess.run(other_outputs.values()) - - if step % checkpoint_step == 0: - precision = precision_at_recall( - np.dot(data['train_data'], w_value) + b_value, - data['train_labels'], TARGET_RECALL) - - tf.logging.info('Loss = %f Precision = %f', loss_value, precision) - if use_global_objectives: - for i, output_name in enumerate(other_outputs.keys()): - tf.logging.info('\t%s = %f', output_name, go_outputs[i]) - - w_value, b_value = sess.run([w, b]) - return precision_at_recall(np.dot(data['eval_data'], w_value) + b_value, - data['eval_labels'], - TARGET_RECALL) - - -def main(unused_argv): - del unused_argv - experiment_data = create_training_and_eval_data_for_experiment( - **EXPERIMENT_DATA_CONFIG) - global_objectives_loss_precision = train_model(experiment_data, True) - tf.logging.info('global_objectives precision at requested recall is %f', - global_objectives_loss_precision) - cross_entropy_loss_precision = train_model(experiment_data, False) - tf.logging.info('cross_entropy precision at requested recall is %f', - cross_entropy_loss_precision) - - -if __name__ == '__main__': - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/research/global_objectives/loss_layers_test.py b/research/global_objectives/loss_layers_test.py deleted file mode 100644 index 3f91c80deec16a34f5271cdfadbd0d364c3a8cea..0000000000000000000000000000000000000000 --- a/research/global_objectives/loss_layers_test.py +++ /dev/null @@ -1,1379 +0,0 @@ -# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for global objectives loss layers.""" - -# Dependency imports -from absl.testing import parameterized -import numpy -import tensorflow as tf - -from global_objectives import loss_layers -from global_objectives import util - - -# TODO: Include weights in the lagrange multiplier update tests. -class PrecisionRecallAUCLossTest(parameterized.TestCase, tf.test.TestCase): - - @parameterized.named_parameters( - ('_xent', 'xent', 0.7), - ('_hinge', 'hinge', 0.7), - ('_hinge_2', 'hinge', 0.5) - ) - def testSinglePointAUC(self, surrogate_type, target_precision): - # Tests a case with only one anchor point, where the loss should equal - # recall_at_precision_loss - batch_shape = [10, 2] - logits = tf.Variable(tf.random_normal(batch_shape)) - labels = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - - auc_loss, _ = loss_layers.precision_recall_auc_loss( - labels, - logits, - precision_range=(target_precision - 0.01, target_precision + 0.01), - num_anchors=1, - surrogate_type=surrogate_type) - point_loss, _ = loss_layers.recall_at_precision_loss( - labels, logits, target_precision=target_precision, - surrogate_type=surrogate_type) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(auc_loss.eval(), point_loss.eval()) - - def testThreePointAUC(self): - # Tests a case with three anchor points against a weighted sum of recall - # at precision losses. - batch_shape = [11, 3] - logits = tf.Variable(tf.random_normal(batch_shape)) - labels = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - - # TODO: Place the hing/xent loss in a for loop. - auc_loss, _ = loss_layers.precision_recall_auc_loss( - labels, logits, num_anchors=1) - first_point_loss, _ = loss_layers.recall_at_precision_loss( - labels, logits, target_precision=0.25) - second_point_loss, _ = loss_layers.recall_at_precision_loss( - labels, logits, target_precision=0.5) - third_point_loss, _ = loss_layers.recall_at_precision_loss( - labels, logits, target_precision=0.75) - expected_loss = (first_point_loss + second_point_loss + - third_point_loss) / 3 - - auc_loss_hinge, _ = loss_layers.precision_recall_auc_loss( - labels, logits, num_anchors=1, surrogate_type='hinge') - first_point_hinge, _ = loss_layers.recall_at_precision_loss( - labels, logits, target_precision=0.25, surrogate_type='hinge') - second_point_hinge, _ = loss_layers.recall_at_precision_loss( - labels, logits, target_precision=0.5, surrogate_type='hinge') - third_point_hinge, _ = loss_layers.recall_at_precision_loss( - labels, logits, target_precision=0.75, surrogate_type='hinge') - expected_hinge = (first_point_hinge + second_point_hinge + - third_point_hinge) / 3 - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(auc_loss.eval(), expected_loss.eval()) - self.assertAllClose(auc_loss_hinge.eval(), expected_hinge.eval()) - - def testLagrangeMultiplierUpdateDirection(self): - for target_precision in [0.35, 0.65]: - precision_range = (target_precision - 0.01, target_precision + 0.01) - - for surrogate_type in ['xent', 'hinge']: - kwargs = {'precision_range': precision_range, - 'num_anchors': 1, - 'surrogate_type': surrogate_type, - 'scope': 'pr-auc_{}_{}'.format(target_precision, - surrogate_type)} - run_lagrange_multiplier_test( - global_objective=loss_layers.precision_recall_auc_loss, - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=loss_layers.precision_recall_auc_loss, - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - -class ROCAUCLossTest(parameterized.TestCase, tf.test.TestCase): - - def testSimpleScores(self): - # Tests the loss on data with only one negative example with score zero. - # In this case, the loss should equal the surrogate loss on the scores with - # positive labels. - num_positives = 10 - scores_positives = tf.constant(3.0 * numpy.random.randn(num_positives), - shape=[num_positives, 1]) - labels = tf.constant([0.0] + [1.0] * num_positives, - shape=[num_positives + 1, 1]) - scores = tf.concat([[[0.0]], scores_positives], 0) - - loss = tf.reduce_sum( - loss_layers.roc_auc_loss(labels, scores, surrogate_type='hinge')[0]) - expected_loss = tf.reduce_sum( - tf.maximum(1.0 - scores_positives, 0)) / (num_positives + 1) - with self.test_session(): - self.assertAllClose(expected_loss.eval(), loss.eval()) - - def testRandomROCLoss(self): - # Checks that random Bernoulli scores and labels has ~25% swaps. - shape = [1000, 30] - scores = tf.constant( - numpy.random.randint(0, 2, size=shape), shape=shape, dtype=tf.float32) - labels = tf.constant( - numpy.random.randint(0, 2, size=shape), shape=shape, dtype=tf.float32) - loss = tf.reduce_mean(loss_layers.roc_auc_loss( - labels, scores, surrogate_type='hinge')[0]) - with self.test_session(): - self.assertAllClose(0.25, loss.eval(), 1e-2) - - @parameterized.named_parameters( - ('_zero_hinge', 'xent', - [0.0, 0.0, 0.0, 1.0, 1.0, 1.0], - [-5.0, -7.0, -9.0, 8.0, 10.0, 14.0], - 0.0), - ('_zero_xent', 'hinge', - [0.0, 0.0, 0.0, 1.0, 1.0, 1.0], - [-0.2, 0, -0.1, 1.0, 1.1, 1.0], - 0.0), - ('_xent', 'xent', - [0.0, 0.0, 0.0, 1.0, 1.0, 1.0], - [0.0, -17.0, -19.0, 1.0, 14.0, 14.0], - numpy.log(1.0 + numpy.exp(-1.0)) / 6), - ('_hinge', 'hinge', - [0.0, 0.0, 0.0, 1.0, 1.0, 1.0], - [-0.2, -0.05, 0.0, 0.95, 0.8, 1.0], - 0.4 / 6) - ) - def testManualROCLoss(self, surrogate_type, labels, logits, expected_value): - labels = tf.constant(labels) - logits = tf.constant(logits) - loss, _ = loss_layers.roc_auc_loss( - labels=labels, logits=logits, surrogate_type=surrogate_type) - - with self.test_session(): - self.assertAllClose(expected_value, tf.reduce_sum(loss).eval()) - - def testMultiLabelROCLoss(self): - # Tests the loss on multi-label data against manually computed loss. - targets = numpy.array([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) - scores = numpy.array([[0.1, 1.0, 1.1, 1.0], [1.0, 0.0, 1.3, 1.1]]) - class_1_auc = tf.reduce_sum( - loss_layers.roc_auc_loss(targets[0], scores[0])[0]) - class_2_auc = tf.reduce_sum( - loss_layers.roc_auc_loss(targets[1], scores[1])[0]) - total_auc = tf.reduce_sum(loss_layers.roc_auc_loss( - targets.transpose(), scores.transpose())[0]) - - with self.test_session(): - self.assertAllClose(total_auc.eval(), - class_1_auc.eval() + class_2_auc.eval()) - - def testWeights(self): - # Test the loss with per-example weights. - # The logits_negatives below are repeated, so that setting half their - # weights to 2 and the other half to 0 should leave the loss unchanged. - logits_positives = tf.constant([2.54321, -0.26, 3.334334], shape=[3, 1]) - logits_negatives = tf.constant([-0.6, 1, -1.3, -1.3, -0.6, 1], shape=[6, 1]) - logits = tf.concat([logits_positives, logits_negatives], 0) - targets = tf.constant([1, 1, 1, 0, 0, 0, 0, 0, 0], - shape=[9, 1], dtype=tf.float32) - weights = tf.constant([1, 1, 1, 0, 0, 0, 2, 2, 2], - shape=[9, 1], dtype=tf.float32) - - loss = tf.reduce_sum(loss_layers.roc_auc_loss(targets, logits)[0]) - weighted_loss = tf.reduce_sum( - loss_layers.roc_auc_loss(targets, logits, weights)[0]) - - with self.test_session(): - self.assertAllClose(loss.eval(), weighted_loss.eval()) - - -class RecallAtPrecisionTest(tf.test.TestCase): - - def testEqualWeightLoss(self): - # Tests a special case where the loss should equal cross entropy loss. - target_precision = 1.0 - num_labels = 5 - batch_shape = [20, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.7))) - label_priors = tf.constant(0.34, shape=[num_labels]) - - loss, _ = loss_layers.recall_at_precision_loss( - targets, logits, target_precision, label_priors=label_priors) - expected_loss = ( - tf.contrib.nn.deprecated_flipped_sigmoid_cross_entropy_with_logits( - logits, targets)) - - with self.test_session() as session: - tf.global_variables_initializer().run() - loss_val, expected_val = session.run([loss, expected_loss]) - self.assertAllClose(loss_val, expected_val) - - def testEqualWeightLossWithMultiplePrecisions(self): - """Tests a case where the loss equals xent loss with multiple precisions.""" - target_precision = [1.0, 1.0] - num_labels = 2 - batch_size = 20 - target_shape = [batch_size, num_labels] - logits = tf.Variable(tf.random_normal(target_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(target_shape), 0.7))) - label_priors = tf.constant([0.34], shape=[num_labels]) - - loss, _ = loss_layers.recall_at_precision_loss( - targets, - logits, - target_precision, - label_priors=label_priors, - surrogate_type='xent', - ) - - expected_loss = ( - tf.contrib.nn.deprecated_flipped_sigmoid_cross_entropy_with_logits( - logits, targets)) - - with self.test_session() as session: - tf.global_variables_initializer().run() - loss_val, expected_val = session.run([loss, expected_loss]) - self.assertAllClose(loss_val, expected_val) - - def testPositivesOnlyLoss(self): - # Tests a special case where the loss should equal cross entropy loss - # on the negatives only. - target_precision = 1.0 - num_labels = 3 - batch_shape = [30, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - label_priors = tf.constant(0.45, shape=[num_labels]) - - loss, _ = loss_layers.recall_at_precision_loss( - targets, logits, target_precision, label_priors=label_priors, - lambdas_initializer=tf.zeros_initializer()) - expected_loss = util.weighted_sigmoid_cross_entropy_with_logits( - targets, - logits, - positive_weights=1.0, - negative_weights=0.0) - - with self.test_session() as session: - tf.global_variables_initializer().run() - loss_val, expected_val = session.run([loss, expected_loss]) - self.assertAllClose(loss_val, expected_val) - - def testEquivalenceBetweenSingleAndMultiplePrecisions(self): - """Checks recall at precision with different precision values. - - Runs recall at precision with multiple precision values, and runs each label - seperately with its own precision value as a scalar. Validates that the - returned loss values are the same. - """ - target_precision = [0.2, 0.9, 0.4] - num_labels = 3 - batch_shape = [30, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - label_priors = tf.constant([0.45, 0.8, 0.3], shape=[num_labels]) - - multi_label_loss, _ = loss_layers.recall_at_precision_loss( - targets, logits, target_precision, label_priors=label_priors, - ) - - single_label_losses = [ - loss_layers.recall_at_precision_loss( - tf.expand_dims(targets[:, i], -1), - tf.expand_dims(logits[:, i], -1), - target_precision[i], - label_priors=label_priors[i])[0] - for i in range(num_labels) - ] - - single_label_losses = tf.concat(single_label_losses, 1) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_label_loss_val, single_label_loss_val = session.run( - [multi_label_loss, single_label_losses]) - self.assertAllClose(multi_label_loss_val, single_label_loss_val) - - def testEquivalenceBetweenSingleAndEqualMultiplePrecisions(self): - """Compares single and multiple target precisions with the same value. - - Checks that using a single target precision and multiple target precisions - with the same value would result in the same loss value. - """ - num_labels = 2 - target_shape = [20, num_labels] - logits = tf.Variable(tf.random_normal(target_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(target_shape), 0.7))) - label_priors = tf.constant([0.34], shape=[num_labels]) - - multi_precision_loss, _ = loss_layers.recall_at_precision_loss( - targets, - logits, - [0.75, 0.75], - label_priors=label_priors, - surrogate_type='xent', - ) - - single_precision_loss, _ = loss_layers.recall_at_precision_loss( - targets, - logits, - 0.75, - label_priors=label_priors, - surrogate_type='xent', - ) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_precision_loss_val, single_precision_loss_val = session.run( - [multi_precision_loss, single_precision_loss]) - self.assertAllClose(multi_precision_loss_val, single_precision_loss_val) - - def testLagrangeMultiplierUpdateDirection(self): - for target_precision in [0.35, 0.65]: - for surrogate_type in ['xent', 'hinge']: - kwargs = {'target_precision': target_precision, - 'surrogate_type': surrogate_type, - 'scope': 'r-at-p_{}_{}'.format(target_precision, - surrogate_type)} - run_lagrange_multiplier_test( - global_objective=loss_layers.recall_at_precision_loss, - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=loss_layers.recall_at_precision_loss, - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - def testLagrangeMultiplierUpdateDirectionWithMultiplePrecisions(self): - """Runs Lagrange multiplier test with multiple precision values.""" - target_precision = [0.65, 0.35] - - for surrogate_type in ['xent', 'hinge']: - scope_str = 'r-at-p_{}_{}'.format( - '_'.join([str(precision) for precision in target_precision]), - surrogate_type) - kwargs = { - 'target_precision': target_precision, - 'surrogate_type': surrogate_type, - 'scope': scope_str, - } - run_lagrange_multiplier_test( - global_objective=loss_layers.recall_at_precision_loss, - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=loss_layers.recall_at_precision_loss, - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - -class PrecisionAtRecallTest(tf.test.TestCase): - - def testCrossEntropyEquivalence(self): - # Checks a special case where the loss should equal cross-entropy loss. - target_recall = 1.0 - num_labels = 3 - batch_shape = [10, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - - loss, _ = loss_layers.precision_at_recall_loss( - targets, logits, target_recall, - lambdas_initializer=tf.constant_initializer(1.0)) - expected_loss = util.weighted_sigmoid_cross_entropy_with_logits( - targets, logits) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(loss.eval(), expected_loss.eval()) - - def testNegativesOnlyLoss(self): - # Checks a special case where the loss should equal the loss on - # the negative examples only. - target_recall = 0.61828 - num_labels = 4 - batch_shape = [8, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.6))) - - loss, _ = loss_layers.precision_at_recall_loss( - targets, - logits, - target_recall, - surrogate_type='hinge', - lambdas_initializer=tf.constant_initializer(0.0), - scope='negatives_only_test') - expected_loss = util.weighted_hinge_loss( - targets, logits, positive_weights=0.0, negative_weights=1.0) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(expected_loss.eval(), loss.eval()) - - def testLagrangeMultiplierUpdateDirection(self): - for target_recall in [0.34, 0.66]: - for surrogate_type in ['xent', 'hinge']: - kwargs = {'target_recall': target_recall, - 'dual_rate_factor': 1.0, - 'surrogate_type': surrogate_type, - 'scope': 'p-at-r_{}_{}'.format(target_recall, surrogate_type)} - - run_lagrange_multiplier_test( - global_objective=loss_layers.precision_at_recall_loss, - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=loss_layers.precision_at_recall_loss, - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - def testCrossEntropyEquivalenceWithMultipleRecalls(self): - """Checks a case where the loss equals xent loss with multiple recalls.""" - num_labels = 3 - target_recall = [1.0] * num_labels - batch_shape = [10, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - - loss, _ = loss_layers.precision_at_recall_loss( - targets, logits, target_recall, - lambdas_initializer=tf.constant_initializer(1.0)) - expected_loss = util.weighted_sigmoid_cross_entropy_with_logits( - targets, logits) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(loss.eval(), expected_loss.eval()) - - def testNegativesOnlyLossWithMultipleRecalls(self): - """Tests a case where the loss equals the loss on the negative examples. - - Checks this special case using multiple target recall values. - """ - num_labels = 4 - target_recall = [0.61828] * num_labels - batch_shape = [8, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.6))) - - loss, _ = loss_layers.precision_at_recall_loss( - targets, - logits, - target_recall, - surrogate_type='hinge', - lambdas_initializer=tf.constant_initializer(0.0), - scope='negatives_only_test') - expected_loss = util.weighted_hinge_loss( - targets, logits, positive_weights=0.0, negative_weights=1.0) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(expected_loss.eval(), loss.eval()) - - def testLagrangeMultiplierUpdateDirectionWithMultipleRecalls(self): - """Runs Lagrange multiplier test with multiple recall values.""" - target_recall = [0.34, 0.66] - for surrogate_type in ['xent', 'hinge']: - scope_str = 'p-at-r_{}_{}'.format( - '_'.join([str(recall) for recall in target_recall]), - surrogate_type) - kwargs = {'target_recall': target_recall, - 'dual_rate_factor': 1.0, - 'surrogate_type': surrogate_type, - 'scope': scope_str} - - run_lagrange_multiplier_test( - global_objective=loss_layers.precision_at_recall_loss, - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=loss_layers.precision_at_recall_loss, - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - def testEquivalenceBetweenSingleAndMultipleRecalls(self): - """Checks precision at recall with multiple different recall values. - - Runs precision at recall with multiple recall values, and runs each label - seperately with its own recall value as a scalar. Validates that the - returned loss values are the same. - """ - target_precision = [0.7, 0.9, 0.4] - num_labels = 3 - batch_shape = [30, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - label_priors = tf.constant(0.45, shape=[num_labels]) - - multi_label_loss, _ = loss_layers.precision_at_recall_loss( - targets, logits, target_precision, label_priors=label_priors - ) - - single_label_losses = [ - loss_layers.precision_at_recall_loss( - tf.expand_dims(targets[:, i], -1), - tf.expand_dims(logits[:, i], -1), - target_precision[i], - label_priors=label_priors[i])[0] - for i in range(num_labels) - ] - - single_label_losses = tf.concat(single_label_losses, 1) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_label_loss_val, single_label_loss_val = session.run( - [multi_label_loss, single_label_losses]) - self.assertAllClose(multi_label_loss_val, single_label_loss_val) - - def testEquivalenceBetweenSingleAndEqualMultipleRecalls(self): - """Compares single and multiple target recalls of the same value. - - Checks that using a single target recall and multiple recalls with the - same value would result in the same loss value. - """ - num_labels = 2 - target_shape = [20, num_labels] - logits = tf.Variable(tf.random_normal(target_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(target_shape), 0.7))) - label_priors = tf.constant([0.34], shape=[num_labels]) - - multi_precision_loss, _ = loss_layers.precision_at_recall_loss( - targets, - logits, - [0.75, 0.75], - label_priors=label_priors, - surrogate_type='xent', - ) - - single_precision_loss, _ = loss_layers.precision_at_recall_loss( - targets, - logits, - 0.75, - label_priors=label_priors, - surrogate_type='xent', - ) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_precision_loss_val, single_precision_loss_val = session.run( - [multi_precision_loss, single_precision_loss]) - self.assertAllClose(multi_precision_loss_val, single_precision_loss_val) - - -class FalsePositiveRateAtTruePositiveRateTest(tf.test.TestCase): - - def testNegativesOnlyLoss(self): - # Checks a special case where the loss returned should be the loss on the - # negative examples. - target_recall = 0.6 - num_labels = 3 - batch_shape = [3, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - label_priors = tf.constant(numpy.random.uniform(size=[num_labels]), - dtype=tf.float32) - - xent_loss, _ = loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, target_recall, label_priors=label_priors, - lambdas_initializer=tf.constant_initializer(0.0)) - xent_expected = util.weighted_sigmoid_cross_entropy_with_logits( - targets, - logits, - positive_weights=0.0, - negative_weights=1.0) - hinge_loss, _ = loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, target_recall, label_priors=label_priors, - lambdas_initializer=tf.constant_initializer(0.0), - surrogate_type='hinge') - hinge_expected = util.weighted_hinge_loss( - targets, - logits, - positive_weights=0.0, - negative_weights=1.0) - - with self.test_session() as session: - tf.global_variables_initializer().run() - xent_val, xent_expected = session.run([xent_loss, xent_expected]) - self.assertAllClose(xent_val, xent_expected) - hinge_val, hinge_expected = session.run([hinge_loss, hinge_expected]) - self.assertAllClose(hinge_val, hinge_expected) - - def testPositivesOnlyLoss(self): - # Checks a special case where the loss returned should be the loss on the - # positive examples only. - target_recall = 1.0 - num_labels = 5 - batch_shape = [5, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.ones_like(logits) - label_priors = tf.constant(numpy.random.uniform(size=[num_labels]), - dtype=tf.float32) - - loss, _ = loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, target_recall, label_priors=label_priors) - expected_loss = tf.nn.sigmoid_cross_entropy_with_logits( - labels=targets, logits=logits) - hinge_loss, _ = loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, target_recall, label_priors=label_priors, - surrogate_type='hinge') - expected_hinge = util.weighted_hinge_loss( - targets, logits) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(loss.eval(), expected_loss.eval()) - self.assertAllClose(hinge_loss.eval(), expected_hinge.eval()) - - def testEqualWeightLoss(self): - # Checks a special case where the loss returned should be proportional to - # the ordinary loss. - target_recall = 1.0 - num_labels = 4 - batch_shape = [40, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.6))) - label_priors = tf.constant(0.5, shape=[num_labels]) - - loss, _ = loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, target_recall, label_priors=label_priors) - expected_loss = tf.nn.sigmoid_cross_entropy_with_logits( - labels=targets, logits=logits) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(loss.eval(), expected_loss.eval()) - - def testLagrangeMultiplierUpdateDirection(self): - for target_rate in [0.35, 0.65]: - for surrogate_type in ['xent', 'hinge']: - kwargs = {'target_rate': target_rate, - 'surrogate_type': surrogate_type, - 'scope': 'fpr-at-tpr_{}_{}'.format(target_rate, - surrogate_type)} - # True positive rate is a synonym for recall, so we use the - # recall constraint data. - run_lagrange_multiplier_test( - global_objective=( - loss_layers.false_positive_rate_at_true_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=( - loss_layers.false_positive_rate_at_true_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - def testLagrangeMultiplierUpdateDirectionWithMultipleRates(self): - """Runs Lagrange multiplier test with multiple target rates.""" - target_rate = [0.35, 0.65] - for surrogate_type in ['xent', 'hinge']: - kwargs = {'target_rate': target_rate, - 'surrogate_type': surrogate_type, - 'scope': 'fpr-at-tpr_{}_{}'.format( - '_'.join([str(target) for target in target_rate]), - surrogate_type)} - # True positive rate is a synonym for recall, so we use the - # recall constraint data. - run_lagrange_multiplier_test( - global_objective=( - loss_layers.false_positive_rate_at_true_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=( - loss_layers.false_positive_rate_at_true_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - def testEquivalenceBetweenSingleAndEqualMultipleRates(self): - """Compares single and multiple target rates of the same value. - - Checks that using a single target rate and multiple rates with the - same value would result in the same loss value. - """ - num_labels = 2 - target_shape = [20, num_labels] - logits = tf.Variable(tf.random_normal(target_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(target_shape), 0.7))) - label_priors = tf.constant([0.34], shape=[num_labels]) - - multi_label_loss, _ = ( - loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, [0.75, 0.75], label_priors=label_priors)) - - single_label_loss, _ = ( - loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, 0.75, label_priors=label_priors)) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_label_loss_val, single_label_loss_val = session.run( - [multi_label_loss, single_label_loss]) - self.assertAllClose(multi_label_loss_val, single_label_loss_val) - - def testEquivalenceBetweenSingleAndMultipleRates(self): - """Compares single and multiple target rates of different values. - - Runs false_positive_rate_at_true_positive_rate_loss with multiple target - rates, and runs each label seperately with its own target rate as a - scalar. Validates that the returned loss values are the same. - """ - target_precision = [0.7, 0.9, 0.4] - num_labels = 3 - batch_shape = [30, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - label_priors = tf.constant(0.45, shape=[num_labels]) - - multi_label_loss, _ = ( - loss_layers.false_positive_rate_at_true_positive_rate_loss( - targets, logits, target_precision, label_priors=label_priors)) - - single_label_losses = [ - loss_layers.false_positive_rate_at_true_positive_rate_loss( - tf.expand_dims(targets[:, i], -1), - tf.expand_dims(logits[:, i], -1), - target_precision[i], - label_priors=label_priors[i])[0] - for i in range(num_labels) - ] - - single_label_losses = tf.concat(single_label_losses, 1) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_label_loss_val, single_label_loss_val = session.run( - [multi_label_loss, single_label_losses]) - self.assertAllClose(multi_label_loss_val, single_label_loss_val) - - -class TruePositiveRateAtFalsePositiveRateTest(tf.test.TestCase): - - def testPositivesOnlyLoss(self): - # A special case where the loss should equal the loss on the positive - # examples. - target_rate = numpy.random.uniform() - num_labels = 3 - batch_shape = [20, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.6))) - label_priors = tf.constant(numpy.random.uniform(size=[num_labels]), - dtype=tf.float32) - - xent_loss, _ = loss_layers.true_positive_rate_at_false_positive_rate_loss( - targets, logits, target_rate, label_priors=label_priors, - lambdas_initializer=tf.constant_initializer(0.0)) - xent_expected = util.weighted_sigmoid_cross_entropy_with_logits( - targets, - logits, - positive_weights=1.0, - negative_weights=0.0) - hinge_loss, _ = loss_layers.true_positive_rate_at_false_positive_rate_loss( - targets, logits, target_rate, label_priors=label_priors, - lambdas_initializer=tf.constant_initializer(0.0), - surrogate_type='hinge') - hinge_expected = util.weighted_hinge_loss( - targets, - logits, - positive_weights=1.0, - negative_weights=0.0) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(xent_expected.eval(), xent_loss.eval()) - self.assertAllClose(hinge_expected.eval(), hinge_loss.eval()) - - def testNegativesOnlyLoss(self): - # A special case where the loss should equal the loss on the negative - # examples, minus target_rate * (1 - label_priors) * maybe_log2. - target_rate = numpy.random.uniform() - num_labels = 3 - batch_shape = [25, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.zeros_like(logits) - label_priors = tf.constant(numpy.random.uniform(size=[num_labels]), - dtype=tf.float32) - - xent_loss, _ = loss_layers.true_positive_rate_at_false_positive_rate_loss( - targets, logits, target_rate, label_priors=label_priors) - xent_expected = tf.subtract( - util.weighted_sigmoid_cross_entropy_with_logits(targets, - logits, - positive_weights=0.0, - negative_weights=1.0), - target_rate * (1.0 - label_priors) * numpy.log(2)) - hinge_loss, _ = loss_layers.true_positive_rate_at_false_positive_rate_loss( - targets, logits, target_rate, label_priors=label_priors, - surrogate_type='hinge') - hinge_expected = util.weighted_hinge_loss( - targets, logits) - target_rate * (1.0 - label_priors) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(xent_expected.eval(), xent_loss.eval()) - self.assertAllClose(hinge_expected.eval(), hinge_loss.eval()) - - def testLagrangeMultiplierUpdateDirection(self): - for target_rate in [0.35, 0.65]: - for surrogate_type in ['xent', 'hinge']: - kwargs = {'target_rate': target_rate, - 'surrogate_type': surrogate_type, - 'scope': 'tpr-at-fpr_{}_{}'.format(target_rate, - surrogate_type)} - run_lagrange_multiplier_test( - global_objective=( - loss_layers.true_positive_rate_at_false_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=( - loss_layers.true_positive_rate_at_false_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - def testLagrangeMultiplierUpdateDirectionWithMultipleRates(self): - """Runs Lagrange multiplier test with multiple target rates.""" - target_rate = [0.35, 0.65] - for surrogate_type in ['xent', 'hinge']: - kwargs = {'target_rate': target_rate, - 'surrogate_type': surrogate_type, - 'scope': 'tpr-at-fpr_{}_{}'.format( - '_'.join([str(target) for target in target_rate]), - surrogate_type)} - run_lagrange_multiplier_test( - global_objective=( - loss_layers.true_positive_rate_at_false_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_multilabel_data, - test_object=self) - kwargs['scope'] = 'other-' + kwargs['scope'] - run_lagrange_multiplier_test( - global_objective=( - loss_layers.true_positive_rate_at_false_positive_rate_loss), - objective_kwargs=kwargs, - data_builder=_other_multilabel_data(surrogate_type), - test_object=self) - - def testEquivalenceBetweenSingleAndEqualMultipleRates(self): - """Compares single and multiple target rates of the same value. - - Checks that using a single target rate and multiple rates with the - same value would result in the same loss value. - """ - num_labels = 2 - target_shape = [20, num_labels] - logits = tf.Variable(tf.random_normal(target_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(target_shape), 0.7))) - label_priors = tf.constant([0.34], shape=[num_labels]) - - multi_label_loss, _ = ( - loss_layers.true_positive_rate_at_false_positive_rate_loss( - targets, logits, [0.75, 0.75], label_priors=label_priors)) - - single_label_loss, _ = ( - loss_layers.true_positive_rate_at_false_positive_rate_loss( - targets, logits, 0.75, label_priors=label_priors)) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_label_loss_val, single_label_loss_val = session.run( - [multi_label_loss, single_label_loss]) - self.assertAllClose(multi_label_loss_val, single_label_loss_val) - - def testEquivalenceBetweenSingleAndMultipleRates(self): - """Compares single and multiple target rates of different values. - - Runs true_positive_rate_at_false_positive_rate_loss with multiple target - rates, and runs each label seperately with its own target rate as a - scalar. Validates that the returned loss values are the same. - """ - target_precision = [0.7, 0.9, 0.4] - num_labels = 3 - batch_shape = [30, num_labels] - logits = tf.Variable(tf.random_normal(batch_shape)) - targets = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - label_priors = tf.constant(0.45, shape=[num_labels]) - - multi_label_loss, _ = ( - loss_layers.true_positive_rate_at_false_positive_rate_loss( - targets, logits, target_precision, label_priors=label_priors)) - - single_label_losses = [ - loss_layers.true_positive_rate_at_false_positive_rate_loss( - tf.expand_dims(targets[:, i], -1), - tf.expand_dims(logits[:, i], -1), - target_precision[i], - label_priors=label_priors[i])[0] - for i in range(num_labels) - ] - - single_label_losses = tf.concat(single_label_losses, 1) - - with self.test_session() as session: - tf.global_variables_initializer().run() - multi_label_loss_val, single_label_loss_val = session.run( - [multi_label_loss, single_label_losses]) - self.assertAllClose(multi_label_loss_val, single_label_loss_val) - - -class UtilityFunctionsTest(tf.test.TestCase): - - def testTrainableDualVariable(self): - # Confirm correct behavior of a trainable dual variable. - x = tf.get_variable('primal', dtype=tf.float32, initializer=2.0) - y_value, y = loss_layers._create_dual_variable( - 'dual', shape=None, dtype=tf.float32, initializer=1.0, collections=None, - trainable=True, dual_rate_factor=0.3) - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - update = optimizer.minimize(0.5 * tf.square(x - y_value)) - - with self.test_session(): - tf.global_variables_initializer().run() - update.run() - self.assertAllClose(0.7, y.eval()) - - def testUntrainableDualVariable(self): - # Confirm correct behavior of dual variable which is not trainable. - x = tf.get_variable('primal', dtype=tf.float32, initializer=-2.0) - y_value, y = loss_layers._create_dual_variable( - 'dual', shape=None, dtype=tf.float32, initializer=1.0, collections=None, - trainable=False, dual_rate_factor=0.8) - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - update = optimizer.minimize(tf.square(x) * y_value + tf.exp(y_value)) - - with self.test_session(): - tf.global_variables_initializer().run() - update.run() - self.assertAllClose(1.0, y.eval()) - - -class BoundTest(parameterized.TestCase, tf.test.TestCase): - - @parameterized.named_parameters( - ('_xent', 'xent', 1.0, [2.0, 1.0]), - ('_xent_weighted', 'xent', - numpy.array([0, 2, 0.5, 1, 2, 3]).reshape(6, 1), [2.5, 0]), - ('_hinge', 'hinge', 1.0, [2.0, 1.0]), - ('_hinge_weighted', 'hinge', - numpy.array([1.0, 2, 3, 4, 5, 6]).reshape(6, 1), [5.0, 1])) - def testLowerBoundMultilabel(self, surrogate_type, weights, expected): - labels, logits, _ = _multilabel_data() - lower_bound = loss_layers.true_positives_lower_bound( - labels, logits, weights, surrogate_type) - - with self.test_session(): - self.assertAllClose(lower_bound.eval(), expected) - - @parameterized.named_parameters( - ('_xent', 'xent'), ('_hinge', 'hinge')) - def testLowerBoundOtherMultilabel(self, surrogate_type): - labels, logits, _ = _other_multilabel_data(surrogate_type)() - lower_bound = loss_layers.true_positives_lower_bound( - labels, logits, 1.0, surrogate_type) - - with self.test_session(): - self.assertAllClose(lower_bound.eval(), [4.0, 2.0], atol=1e-5) - - @parameterized.named_parameters( - ('_xent', 'xent', 1.0, [1.0, 2.0]), - ('_xent_weighted', 'xent', - numpy.array([3.0, 2, 1, 0, 1, 2]).reshape(6, 1), [2.0, 1.0]), - ('_hinge', 'hinge', 1.0, [1.0, 2.0]), - ('_hinge_weighted', 'hinge', - numpy.array([13, 12, 11, 0.5, 0, 0.5]).reshape(6, 1), [0.5, 0.5])) - def testUpperBoundMultilabel(self, surrogate_type, weights, expected): - labels, logits, _ = _multilabel_data() - upper_bound = loss_layers.false_positives_upper_bound( - labels, logits, weights, surrogate_type) - - with self.test_session(): - self.assertAllClose(upper_bound.eval(), expected) - - @parameterized.named_parameters( - ('_xent', 'xent'), ('_hinge', 'hinge')) - def testUpperBoundOtherMultilabel(self, surrogate_type): - labels, logits, _ = _other_multilabel_data(surrogate_type)() - upper_bound = loss_layers.false_positives_upper_bound( - labels, logits, 1.0, surrogate_type) - - with self.test_session(): - self.assertAllClose(upper_bound.eval(), [2.0, 4.0], atol=1e-5) - - @parameterized.named_parameters( - ('_lower', 'lower'), ('_upper', 'upper')) - def testThreeDimensionalLogits(self, bound): - bound_function = loss_layers.false_positives_upper_bound - if bound == 'lower': - bound_function = loss_layers.true_positives_lower_bound - random_labels = numpy.float32(numpy.random.uniform(size=[2, 3]) > 0.5) - random_logits = numpy.float32(numpy.random.randn(2, 3, 2)) - first_slice_logits = random_logits[:, :, 0].reshape(2, 3) - second_slice_logits = random_logits[:, :, 1].reshape(2, 3) - - full_bound = bound_function( - tf.constant(random_labels), tf.constant(random_logits), 1.0, 'xent') - first_slice_bound = bound_function(tf.constant(random_labels), - tf.constant(first_slice_logits), - 1.0, - 'xent') - second_slice_bound = bound_function(tf.constant(random_labels), - tf.constant(second_slice_logits), - 1.0, - 'xent') - stacked_bound = tf.stack([first_slice_bound, second_slice_bound], axis=1) - - with self.test_session(): - self.assertAllClose(full_bound.eval(), stacked_bound.eval()) - - -def run_lagrange_multiplier_test(global_objective, - objective_kwargs, - data_builder, - test_object): - """Runs a test for the Lagrange multiplier update of `global_objective`. - - The test checks that the constraint for `global_objective` is satisfied on - the first label of the data produced by `data_builder` but not the second. - - Args: - global_objective: One of the global objectives. - objective_kwargs: A dictionary of keyword arguments to pass to - `global_objective`. Must contain an entry for the constraint argument - of `global_objective`, e.g. 'target_rate' or 'target_precision'. - data_builder: A function which returns tensors corresponding to labels, - logits, and label priors. - test_object: An instance of tf.test.TestCase. - """ - # Construct global objective kwargs from a copy of `objective_kwargs`. - kwargs = dict(objective_kwargs) - targets, logits, priors = data_builder() - kwargs['labels'] = targets - kwargs['logits'] = logits - kwargs['label_priors'] = priors - - loss, output_dict = global_objective(**kwargs) - lambdas = tf.squeeze(output_dict['lambdas']) - opt = tf.train.GradientDescentOptimizer(learning_rate=0.1) - update_op = opt.minimize(loss, var_list=[output_dict['lambdas']]) - - with test_object.test_session() as session: - tf.global_variables_initializer().run() - lambdas_before = session.run(lambdas) - session.run(update_op) - lambdas_after = session.run(lambdas) - test_object.assertLess(lambdas_after[0], lambdas_before[0]) - test_object.assertGreater(lambdas_after[1], lambdas_before[1]) - - -class CrossFunctionTest(parameterized.TestCase, tf.test.TestCase): - - @parameterized.named_parameters( - ('_auc01xent', loss_layers.precision_recall_auc_loss, { - 'precision_range': (0.0, 1.0), 'surrogate_type': 'xent' - }), - ('_auc051xent', loss_layers.precision_recall_auc_loss, { - 'precision_range': (0.5, 1.0), 'surrogate_type': 'xent' - }), - ('_auc01)hinge', loss_layers.precision_recall_auc_loss, { - 'precision_range': (0.0, 1.0), 'surrogate_type': 'hinge' - }), - ('_ratp04', loss_layers.recall_at_precision_loss, { - 'target_precision': 0.4, 'surrogate_type': 'xent' - }), - ('_ratp066', loss_layers.recall_at_precision_loss, { - 'target_precision': 0.66, 'surrogate_type': 'xent' - }), - ('_ratp07_hinge', loss_layers.recall_at_precision_loss, { - 'target_precision': 0.7, 'surrogate_type': 'hinge' - }), - ('_fpattp066', loss_layers.false_positive_rate_at_true_positive_rate_loss, - {'target_rate': 0.66, 'surrogate_type': 'xent'}), - ('_fpattp046', loss_layers.false_positive_rate_at_true_positive_rate_loss, - { - 'target_rate': 0.46, 'surrogate_type': 'xent' - }), - ('_fpattp076_hinge', - loss_layers.false_positive_rate_at_true_positive_rate_loss, { - 'target_rate': 0.76, 'surrogate_type': 'hinge' - }), - ('_fpattp036_hinge', - loss_layers.false_positive_rate_at_true_positive_rate_loss, { - 'target_rate': 0.36, 'surrogate_type': 'hinge' - }), - ) - def testWeigtedGlobalObjective(self, - global_objective, - objective_kwargs): - """Runs a test of `global_objective` with per-example weights. - - Args: - global_objective: One of the global objectives. - objective_kwargs: A dictionary of keyword arguments to pass to - `global_objective`. Must contain keys 'surrogate_type', and the keyword - for the constraint argument of `global_objective`, e.g. 'target_rate' or - 'target_precision'. - """ - logits_positives = tf.constant([1, -0.5, 3], shape=[3, 1]) - logits_negatives = tf.constant([-0.5, 1, -1, -1, -0.5, 1], shape=[6, 1]) - - # Dummy tensor is used to compute the gradients. - dummy = tf.constant(1.0) - logits = tf.concat([logits_positives, logits_negatives], 0) - logits = tf.multiply(logits, dummy) - targets = tf.constant([1, 1, 1, 0, 0, 0, 0, 0, 0], - shape=[9, 1], dtype=tf.float32) - priors = tf.constant(1.0/3.0, shape=[1]) - weights = tf.constant([1, 1, 1, 0, 0, 0, 2, 2, 2], - shape=[9, 1], dtype=tf.float32) - - # Construct global objective kwargs. - objective_kwargs['labels'] = targets - objective_kwargs['logits'] = logits - objective_kwargs['label_priors'] = priors - - scope = 'weighted_test' - # Unweighted loss. - objective_kwargs['scope'] = scope + '_plain' - raw_loss, update = global_objective(**objective_kwargs) - loss = tf.reduce_sum(raw_loss) - - # Weighted loss. - objective_kwargs['weights'] = weights - objective_kwargs['scope'] = scope + '_weighted' - raw_weighted_loss, weighted_update = global_objective(**objective_kwargs) - weighted_loss = tf.reduce_sum(raw_weighted_loss) - - lambdas = tf.contrib.framework.get_unique_variable(scope + '_plain/lambdas') - weighted_lambdas = tf.contrib.framework.get_unique_variable( - scope + '_weighted/lambdas') - logits_gradient = tf.gradients(loss, dummy) - weighted_logits_gradient = tf.gradients(weighted_loss, dummy) - - with self.test_session() as session: - tf.global_variables_initializer().run() - self.assertAllClose(loss.eval(), weighted_loss.eval()) - - logits_grad, weighted_logits_grad = session.run( - [logits_gradient, weighted_logits_gradient]) - self.assertAllClose(logits_grad, weighted_logits_grad) - - session.run([update, weighted_update]) - lambdas_value, weighted_lambdas_value = session.run( - [lambdas, weighted_lambdas]) - self.assertAllClose(lambdas_value, weighted_lambdas_value) - - @parameterized.named_parameters( - ('_prauc051xent', loss_layers.precision_recall_auc_loss, { - 'precision_range': (0.5, 1.0), 'surrogate_type': 'xent' - }), - ('_prauc01hinge', loss_layers.precision_recall_auc_loss, { - 'precision_range': (0.0, 1.0), 'surrogate_type': 'hinge' - }), - ('_rocxent', loss_layers.roc_auc_loss, {'surrogate_type': 'xent'}), - ('_rochinge', loss_layers.roc_auc_loss, {'surrogate_type': 'xent'}), - ('_ratp04', loss_layers.recall_at_precision_loss, { - 'target_precision': 0.4, 'surrogate_type': 'xent' - }), - ('_ratp07_hinge', loss_layers.recall_at_precision_loss, { - 'target_precision': 0.7, 'surrogate_type': 'hinge' - }), - ('_patr05', loss_layers.precision_at_recall_loss, { - 'target_recall': 0.4, 'surrogate_type': 'xent' - }), - ('_patr08_hinge', loss_layers.precision_at_recall_loss, { - 'target_recall': 0.7, 'surrogate_type': 'hinge' - }), - ('_fpattp046', loss_layers.false_positive_rate_at_true_positive_rate_loss, - { - 'target_rate': 0.46, 'surrogate_type': 'xent' - }), - ('_fpattp036_hinge', - loss_layers.false_positive_rate_at_true_positive_rate_loss, { - 'target_rate': 0.36, 'surrogate_type': 'hinge' - }), - ('_tpatfp076', loss_layers.true_positive_rate_at_false_positive_rate_loss, - { - 'target_rate': 0.76, 'surrogate_type': 'xent' - }), - ('_tpatfp036_hinge', - loss_layers.true_positive_rate_at_false_positive_rate_loss, { - 'target_rate': 0.36, 'surrogate_type': 'hinge' - }), - ) - def testVectorAndMatrixLabelEquivalence(self, - global_objective, - objective_kwargs): - """Tests equivalence between label shape [batch_size] or [batch_size, 1].""" - vector_labels = tf.constant([1.0, 1.0, 0.0, 0.0], shape=[4]) - vector_logits = tf.constant([1.0, 0.1, 0.1, -1.0], shape=[4]) - - # Construct vector global objective kwargs and loss. - vector_kwargs = objective_kwargs.copy() - vector_kwargs['labels'] = vector_labels - vector_kwargs['logits'] = vector_logits - vector_loss, _ = global_objective(**vector_kwargs) - vector_loss_sum = tf.reduce_sum(vector_loss) - - # Construct matrix global objective kwargs and loss. - matrix_kwargs = objective_kwargs.copy() - matrix_kwargs['labels'] = tf.expand_dims(vector_labels, 1) - matrix_kwargs['logits'] = tf.expand_dims(vector_logits, 1) - matrix_loss, _ = global_objective(**matrix_kwargs) - matrix_loss_sum = tf.reduce_sum(matrix_loss) - - self.assertEqual(1, vector_loss.get_shape().ndims) - self.assertEqual(2, matrix_loss.get_shape().ndims) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(vector_loss_sum.eval(), matrix_loss_sum.eval()) - - @parameterized.named_parameters( - ('_prauc', loss_layers.precision_recall_auc_loss, None), - ('_roc', loss_layers.roc_auc_loss, None), - ('_rap', loss_layers.recall_at_precision_loss, {'target_precision': 0.8}), - ('_patr', loss_layers.precision_at_recall_loss, {'target_recall': 0.7}), - ('_fpattp', loss_layers.false_positive_rate_at_true_positive_rate_loss, - {'target_rate': 0.9}), - ('_tpatfp', loss_layers.true_positive_rate_at_false_positive_rate_loss, - {'target_rate': 0.1}) - ) - def testUnknownBatchSize(self, global_objective, objective_kwargs): - # Tests that there are no errors when the batch size is not known. - batch_shape = [5, 2] - logits = tf.placeholder(tf.float32) - logits_feed = numpy.random.randn(*batch_shape) - labels = tf.placeholder(tf.float32) - labels_feed = logits_feed > 0.1 - logits.set_shape([None, 2]) - labels.set_shape([None, 2]) - - if objective_kwargs is None: - objective_kwargs = {} - - placeholder_kwargs = objective_kwargs.copy() - placeholder_kwargs['labels'] = labels - placeholder_kwargs['logits'] = logits - placeholder_loss, _ = global_objective(**placeholder_kwargs) - - kwargs = objective_kwargs.copy() - kwargs['labels'] = labels_feed - kwargs['logits'] = logits_feed - loss, _ = global_objective(**kwargs) - - with self.test_session() as session: - tf.global_variables_initializer().run() - feed_loss_val = session.run(placeholder_loss, - feed_dict={logits: logits_feed, - labels: labels_feed}) - loss_val = session.run(loss) - self.assertAllClose(feed_loss_val, loss_val) - - -# Both sets of logits below are designed so that the surrogate precision and -# recall (true positive rate) of class 1 is ~ 2/3, and the same surrogates for -# class 2 are ~ 1/3. The false positive rate surrogates are ~ 1/3 and 2/3. -def _multilabel_data(): - targets = tf.constant([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], shape=[6, 1]) - targets = tf.concat([targets, targets], 1) - logits_positives = tf.constant([[0.0, 15], - [16, 0.0], - [14, 0.0]], shape=[3, 2]) - logits_negatives = tf.constant([[-17, 0.0], - [-15, 0.0], - [0.0, -101]], shape=[3, 2]) - logits = tf.concat([logits_positives, logits_negatives], 0) - priors = tf.constant(0.5, shape=[2]) - - return targets, logits, priors - - -def _other_multilabel_data(surrogate_type): - targets = tf.constant( - [1.0] * 6 + [0.0] * 6, shape=[12, 1]) - targets = tf.concat([targets, targets], 1) - logits_positives = tf.constant([[0.0, 13], - [12, 0.0], - [15, 0.0], - [0.0, 30], - [13, 0.0], - [18, 0.0]], shape=[6, 2]) - # A score of cost_2 incurs a loss of ~2.0. - cost_2 = 1.0 if surrogate_type == 'hinge' else 1.09861229 - logits_negatives = tf.constant([[-16, cost_2], - [-15, cost_2], - [cost_2, -111], - [-133, -14,], - [-14.0100101, -16,], - [-19.888828882, -101]], shape=[6, 2]) - logits = tf.concat([logits_positives, logits_negatives], 0) - priors = tf.constant(0.5, shape=[2]) - - def builder(): - return targets, logits, priors - - return builder - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/global_objectives/test_all.py b/research/global_objectives/test_all.py deleted file mode 100644 index d7e439e219840a9ec5c65382c6bc392b1d68b447..0000000000000000000000000000000000000000 --- a/research/global_objectives/test_all.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Runs all unit tests in the Global Objectives package. - -Requires that TensorFlow and abseil (https://github.com/abseil/abseil-py) be -installed on your machine. Command to run the tests: -python test_all.py - -""" - -import os -import sys -import unittest - -this_file = os.path.realpath(__file__) -start_dir = os.path.dirname(this_file) -parent_dir = os.path.dirname(start_dir) - -sys.path.append(parent_dir) -loader = unittest.TestLoader() -suite = loader.discover(start_dir, pattern='*_test.py') - -runner = unittest.TextTestRunner(verbosity=2) -runner.run(suite) diff --git a/research/global_objectives/util.py b/research/global_objectives/util.py deleted file mode 100644 index e2b287a90bd743e5466b875c933c3872868f4a5f..0000000000000000000000000000000000000000 --- a/research/global_objectives/util.py +++ /dev/null @@ -1,348 +0,0 @@ -# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains utility functions for the global objectives library.""" - -# Dependency imports -import tensorflow as tf - - -def weighted_sigmoid_cross_entropy_with_logits(labels, - logits, - positive_weights=1.0, - negative_weights=1.0, - name=None): - """Computes a weighting of sigmoid cross entropy given `logits`. - - Measures the weighted probability error in discrete classification tasks in - which classes are independent and not mutually exclusive. For instance, one - could perform multilabel classification where a picture can contain both an - elephant and a dog at the same time. The class weight multiplies the - different types of errors. - For brevity, let `x = logits`, `z = labels`, `c = positive_weights`, - `d = negative_weights` The - weighed logistic loss is - - ``` - c * z * -log(sigmoid(x)) + d * (1 - z) * -log(1 - sigmoid(x)) - = c * z * -log(1 / (1 + exp(-x))) - d * (1 - z) * log(exp(-x) / (1 + exp(-x))) - = c * z * log(1 + exp(-x)) + d * (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) - = c * z * log(1 + exp(-x)) + d * (1 - z) * (x + log(1 + exp(-x))) - = (1 - z) * x * d + (1 - z + c * z ) * log(1 + exp(-x)) - = - d * x * z + d * x + (d - d * z + c * z ) * log(1 + exp(-x)) - ``` - - To ensure stability and avoid overflow, the implementation uses the identity - log(1 + exp(-x)) = max(0,-x) + log(1 + exp(-abs(x))) - and the result is computed as - - ``` - = -d * x * z + d * x - + (d - d * z + c * z ) * (max(0,-x) + log(1 + exp(-abs(x)))) - ``` - - Note that the loss is NOT an upper bound on the 0-1 loss, unless it is divided - by log(2). - - Args: - labels: A `Tensor` of type `float32` or `float64`. `labels` can be a 2D - tensor with shape [batch_size, num_labels] or a 3D tensor with shape - [batch_size, num_labels, K]. - logits: A `Tensor` of the same type and shape as `labels`. If `logits` has - shape [batch_size, num_labels, K], the loss is computed separately on each - slice [:, :, k] of `logits`. - positive_weights: A `Tensor` that holds positive weights and has the - following semantics according to its shape: - scalar - A global positive weight. - 1D tensor - must be of size K, a weight for each 'attempt' - 2D tensor - of size [num_labels, K'] where K' is either K or 1. - The `positive_weights` will be expanded to the left to match the - dimensions of logits and labels. - negative_weights: A `Tensor` that holds positive weight and has the - semantics identical to positive_weights. - name: A name for the operation (optional). - - Returns: - A `Tensor` of the same shape as `logits` with the componentwise - weighted logistic losses. - """ - with tf.name_scope( - name, - 'weighted_logistic_loss', - [logits, labels, positive_weights, negative_weights]) as name: - labels, logits, positive_weights, negative_weights = prepare_loss_args( - labels, logits, positive_weights, negative_weights) - - softplus_term = tf.add(tf.maximum(-logits, 0.0), - tf.log(1.0 + tf.exp(-tf.abs(logits)))) - weight_dependent_factor = ( - negative_weights + (positive_weights - negative_weights) * labels) - return (negative_weights * (logits - labels * logits) + - weight_dependent_factor * softplus_term) - - -def weighted_hinge_loss(labels, - logits, - positive_weights=1.0, - negative_weights=1.0, - name=None): - """Computes weighted hinge loss given logits `logits`. - - The loss applies to multi-label classification tasks where labels are - independent and not mutually exclusive. See also - `weighted_sigmoid_cross_entropy_with_logits`. - - Args: - labels: A `Tensor` of type `float32` or `float64`. Each entry must be - either 0 or 1. `labels` can be a 2D tensor with shape - [batch_size, num_labels] or a 3D tensor with shape - [batch_size, num_labels, K]. - logits: A `Tensor` of the same type and shape as `labels`. If `logits` has - shape [batch_size, num_labels, K], the loss is computed separately on each - slice [:, :, k] of `logits`. - positive_weights: A `Tensor` that holds positive weights and has the - following semantics according to its shape: - scalar - A global positive weight. - 1D tensor - must be of size K, a weight for each 'attempt' - 2D tensor - of size [num_labels, K'] where K' is either K or 1. - The `positive_weights` will be expanded to the left to match the - dimensions of logits and labels. - negative_weights: A `Tensor` that holds positive weight and has the - semantics identical to positive_weights. - name: A name for the operation (optional). - - Returns: - A `Tensor` of the same shape as `logits` with the componentwise - weighted hinge loss. - """ - with tf.name_scope( - name, 'weighted_hinge_loss', - [logits, labels, positive_weights, negative_weights]) as name: - labels, logits, positive_weights, negative_weights = prepare_loss_args( - labels, logits, positive_weights, negative_weights) - - positives_term = positive_weights * labels * tf.maximum(1.0 - logits, 0) - negatives_term = (negative_weights * (1.0 - labels) - * tf.maximum(1.0 + logits, 0)) - return positives_term + negatives_term - - -def weighted_surrogate_loss(labels, - logits, - surrogate_type='xent', - positive_weights=1.0, - negative_weights=1.0, - name=None): - """Returns either weighted cross-entropy or hinge loss. - - For example `surrogate_type` is 'xent' returns the weighted cross - entropy loss. - - Args: - labels: A `Tensor` of type `float32` or `float64`. Each entry must be - between 0 and 1. `labels` can be a 2D tensor with shape - [batch_size, num_labels] or a 3D tensor with shape - [batch_size, num_labels, K]. - logits: A `Tensor` of the same type and shape as `labels`. If `logits` has - shape [batch_size, num_labels, K], each slice [:, :, k] represents an - 'attempt' to predict `labels` and the loss is computed per slice. - surrogate_type: A string that determines which loss to return, supports - 'xent' for cross-entropy and 'hinge' for hinge loss. - positive_weights: A `Tensor` that holds positive weights and has the - following semantics according to its shape: - scalar - A global positive weight. - 1D tensor - must be of size K, a weight for each 'attempt' - 2D tensor - of size [num_labels, K'] where K' is either K or 1. - The `positive_weights` will be expanded to the left to match the - dimensions of logits and labels. - negative_weights: A `Tensor` that holds positive weight and has the - semantics identical to positive_weights. - name: A name for the operation (optional). - - Returns: - The weigthed loss. - - Raises: - ValueError: If value of `surrogate_type` is not supported. - """ - with tf.name_scope( - name, 'weighted_loss', - [logits, labels, surrogate_type, positive_weights, - negative_weights]) as name: - if surrogate_type == 'xent': - return weighted_sigmoid_cross_entropy_with_logits( - logits=logits, - labels=labels, - positive_weights=positive_weights, - negative_weights=negative_weights, - name=name) - elif surrogate_type == 'hinge': - return weighted_hinge_loss( - logits=logits, - labels=labels, - positive_weights=positive_weights, - negative_weights=negative_weights, - name=name) - raise ValueError('surrogate_type %s not supported.' % surrogate_type) - - -def expand_outer(tensor, rank): - """Expands the given `Tensor` outwards to a target rank. - - For example if rank = 3 and tensor.shape is [3, 4], this function will expand - to such that the resulting shape will be [1, 3, 4]. - - Args: - tensor: The tensor to expand. - rank: The target dimension. - - Returns: - The expanded tensor. - - Raises: - ValueError: If rank of `tensor` is unknown, or if `rank` is smaller than - the rank of `tensor`. - """ - if tensor.get_shape().ndims is None: - raise ValueError('tensor dimension must be known.') - if len(tensor.get_shape()) > rank: - raise ValueError( - '`rank` must be at least the current tensor dimension: (%s vs %s).' % - (rank, len(tensor.get_shape()))) - while len(tensor.get_shape()) < rank: - tensor = tf.expand_dims(tensor, 0) - return tensor - - -def build_label_priors(labels, - weights=None, - positive_pseudocount=1.0, - negative_pseudocount=1.0, - variables_collections=None): - """Creates an op to maintain and update label prior probabilities. - - For each label, the label priors are estimated as - (P + sum_i w_i y_i) / (P + N + sum_i w_i), - where y_i is the ith label, w_i is the ith weight, P is a pseudo-count of - positive labels, and N is a pseudo-count of negative labels. The index i - ranges over all labels observed during all evaluations of the returned op. - - Args: - labels: A `Tensor` with shape [batch_size, num_labels]. Entries should be - in [0, 1]. - weights: Coefficients representing the weight of each label. Must be either - a Tensor of shape [batch_size, num_labels] or `None`, in which case each - weight is treated as 1.0. - positive_pseudocount: Number of positive labels used to initialize the label - priors. - negative_pseudocount: Number of negative labels used to initialize the label - priors. - variables_collections: Optional list of collections for created variables. - - Returns: - label_priors: An op to update the weighted label_priors. Gives the - current value of the label priors when evaluated. - """ - dtype = labels.dtype.base_dtype - num_labels = get_num_labels(labels) - - if weights is None: - weights = tf.ones_like(labels) - - # We disable partitioning while constructing dual variables because they will - # be updated with assign, which is not available for partitioned variables. - partitioner = tf.get_variable_scope().partitioner - try: - tf.get_variable_scope().set_partitioner(None) - # Create variable and update op for weighted label counts. - weighted_label_counts = tf.contrib.framework.model_variable( - name='weighted_label_counts', - shape=[num_labels], - dtype=dtype, - initializer=tf.constant_initializer( - [positive_pseudocount] * num_labels, dtype=dtype), - collections=variables_collections, - trainable=False) - weighted_label_counts_update = weighted_label_counts.assign_add( - tf.reduce_sum(weights * labels, 0)) - - # Create variable and update op for the sum of the weights. - weight_sum = tf.contrib.framework.model_variable( - name='weight_sum', - shape=[num_labels], - dtype=dtype, - initializer=tf.constant_initializer( - [positive_pseudocount + negative_pseudocount] * num_labels, - dtype=dtype), - collections=variables_collections, - trainable=False) - weight_sum_update = weight_sum.assign_add(tf.reduce_sum(weights, 0)) - - finally: - tf.get_variable_scope().set_partitioner(partitioner) - - label_priors = tf.div( - weighted_label_counts_update, - weight_sum_update) - return label_priors - - -def convert_and_cast(value, name, dtype): - """Convert input to tensor and cast to dtype. - - Args: - value: An object whose type has a registered Tensor conversion function, - e.g. python numerical type or numpy array. - name: Name to use for the new Tensor, if one is created. - dtype: Optional element type for the returned tensor. - - Returns: - A tensor. - """ - return tf.cast(tf.convert_to_tensor(value, name=name), dtype=dtype) - - -def prepare_loss_args(labels, logits, positive_weights, negative_weights): - """Prepare arguments for weighted loss functions. - - If needed, will convert given arguments to appropriate type and shape. - - Args: - labels: labels or labels of the loss function. - logits: Logits of the loss function. - positive_weights: Weight on the positive examples. - negative_weights: Weight on the negative examples. - - Returns: - Converted labels, logits, positive_weights, negative_weights. - """ - logits = tf.convert_to_tensor(logits, name='logits') - labels = convert_and_cast(labels, 'labels', logits.dtype) - if len(labels.get_shape()) == 2 and len(logits.get_shape()) == 3: - labels = tf.expand_dims(labels, [2]) - - positive_weights = convert_and_cast(positive_weights, 'positive_weights', - logits.dtype) - positive_weights = expand_outer(positive_weights, logits.get_shape().ndims) - negative_weights = convert_and_cast(negative_weights, 'negative_weights', - logits.dtype) - negative_weights = expand_outer(negative_weights, logits.get_shape().ndims) - return labels, logits, positive_weights, negative_weights - - -def get_num_labels(labels_or_logits): - """Returns the number of labels inferred from labels_or_logits.""" - if labels_or_logits.get_shape().ndims <= 1: - return 1 - return labels_or_logits.get_shape()[1].value diff --git a/research/global_objectives/util_test.py b/research/global_objectives/util_test.py deleted file mode 100644 index 195252a53eb1d0a50735d2f987b0882681b0544a..0000000000000000000000000000000000000000 --- a/research/global_objectives/util_test.py +++ /dev/null @@ -1,333 +0,0 @@ -# Copyright 2018 The TensorFlow Global Objectives Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for global objectives util functions.""" - -# Dependency imports -from absl.testing import parameterized -import numpy as np -import tensorflow as tf - -from global_objectives import util - - -def weighted_sigmoid_cross_entropy(targets, logits, weight): - return (weight * targets * np.log(1.0 + np.exp(-logits)) + ( - (1.0 - targets) * np.log(1.0 + 1.0 / np.exp(-logits)))) - - -def hinge_loss(labels, logits): - # Mostly copied from tensorflow.python.ops.losses but with loss per datapoint. - labels = tf.to_float(labels) - all_ones = tf.ones_like(labels) - labels = tf.subtract(2 * labels, all_ones) - return tf.nn.relu(tf.subtract(all_ones, tf.multiply(labels, logits))) - - -class WeightedSigmoidCrossEntropyTest(parameterized.TestCase, tf.test.TestCase): - - def testTrivialCompatibilityWithSigmoidCrossEntropy(self): - """Tests compatibility with unweighted function with weight 1.0.""" - x_shape = [300, 10] - targets = np.random.random_sample(x_shape).astype(np.float32) - logits = np.random.randn(*x_shape).astype(np.float32) - weighted_loss = util.weighted_sigmoid_cross_entropy_with_logits( - targets, - logits) - expected_loss = ( - tf.contrib.nn.deprecated_flipped_sigmoid_cross_entropy_with_logits( - logits, targets)) - with self.test_session(): - self.assertAllClose(expected_loss.eval(), - weighted_loss.eval(), - atol=0.000001) - - def testNonTrivialCompatibilityWithSigmoidCrossEntropy(self): - """Tests use of an arbitrary weight (4.12).""" - x_shape = [300, 10] - targets = np.random.random_sample(x_shape).astype(np.float32) - logits = np.random.randn(*x_shape).astype(np.float32) - weight = 4.12 - weighted_loss = util.weighted_sigmoid_cross_entropy_with_logits( - targets, - logits, - weight, - weight) - expected_loss = ( - weight * - tf.contrib.nn.deprecated_flipped_sigmoid_cross_entropy_with_logits( - logits, targets)) - with self.test_session(): - self.assertAllClose(expected_loss.eval(), - weighted_loss.eval(), - atol=0.000001) - - def testDifferentSizeWeightedSigmoidCrossEntropy(self): - """Tests correctness on 3D tensors. - - Tests that the function works as expected when logits is a 3D tensor and - targets is a 2D tensor. - """ - targets_shape = [30, 4] - logits_shape = [targets_shape[0], targets_shape[1], 3] - targets = np.random.random_sample(targets_shape).astype(np.float32) - logits = np.random.randn(*logits_shape).astype(np.float32) - - weight_vector = [2.0, 3.0, 13.0] - loss = util.weighted_sigmoid_cross_entropy_with_logits(targets, - logits, - weight_vector) - - with self.test_session(): - loss = loss.eval() - for i in range(0, len(weight_vector)): - expected = weighted_sigmoid_cross_entropy(targets, logits[:, :, i], - weight_vector[i]) - self.assertAllClose(loss[:, :, i], expected, atol=0.000001) - - @parameterized.parameters((300, 10, 0.3), (20, 4, 2.0), (30, 4, 3.9)) - def testWeightedSigmoidCrossEntropy(self, batch_size, num_labels, weight): - """Tests thats the tf and numpy functions agree on many instances.""" - x_shape = [batch_size, num_labels] - targets = np.random.random_sample(x_shape).astype(np.float32) - logits = np.random.randn(*x_shape).astype(np.float32) - - with self.test_session(): - loss = util.weighted_sigmoid_cross_entropy_with_logits( - targets, - logits, - weight, - 1.0, - name='weighted-loss') - expected = weighted_sigmoid_cross_entropy(targets, logits, weight) - self.assertAllClose(expected, loss.eval(), atol=0.000001) - - def testGradients(self): - """Tests that weighted loss gradients behave as expected.""" - dummy_tensor = tf.constant(1.0) - - positives_shape = [10, 1] - positives_logits = dummy_tensor * tf.Variable( - tf.random_normal(positives_shape) + 1.0) - positives_targets = tf.ones(positives_shape) - positives_weight = 4.6 - positives_loss = ( - tf.contrib.nn.deprecated_flipped_sigmoid_cross_entropy_with_logits( - positives_logits, positives_targets) * positives_weight) - - negatives_shape = [190, 1] - negatives_logits = dummy_tensor * tf.Variable( - tf.random_normal(negatives_shape)) - negatives_targets = tf.zeros(negatives_shape) - negatives_weight = 0.9 - negatives_loss = ( - tf.contrib.nn.deprecated_flipped_sigmoid_cross_entropy_with_logits( - negatives_logits, negatives_targets) * negatives_weight) - - all_logits = tf.concat([positives_logits, negatives_logits], 0) - all_targets = tf.concat([positives_targets, negatives_targets], 0) - weighted_loss = tf.reduce_sum( - util.weighted_sigmoid_cross_entropy_with_logits( - all_targets, all_logits, positives_weight, negatives_weight)) - weighted_gradients = tf.gradients(weighted_loss, dummy_tensor) - - expected_loss = tf.add( - tf.reduce_sum(positives_loss), - tf.reduce_sum(negatives_loss)) - expected_gradients = tf.gradients(expected_loss, dummy_tensor) - - with tf.Session() as session: - tf.global_variables_initializer().run() - grad, expected_grad = session.run( - [weighted_gradients, expected_gradients]) - self.assertAllClose(grad, expected_grad) - - def testDtypeFlexibility(self): - """Tests the loss on inputs of varying data types.""" - shape = [20, 3] - logits = np.random.randn(*shape) - targets = tf.truncated_normal(shape) - positive_weights = tf.constant(3, dtype=tf.int64) - negative_weights = 1 - - loss = util.weighted_sigmoid_cross_entropy_with_logits( - targets, logits, positive_weights, negative_weights) - - with self.test_session(): - self.assertEqual(loss.eval().dtype, np.float) - - -class WeightedHingeLossTest(tf.test.TestCase): - - def testTrivialCompatibilityWithHinge(self): - # Tests compatibility with unweighted hinge loss. - x_shape = [55, 10] - logits = tf.constant(np.random.randn(*x_shape).astype(np.float32)) - targets = tf.to_float(tf.constant(np.random.random_sample(x_shape) > 0.3)) - weighted_loss = util.weighted_hinge_loss(targets, logits) - expected_loss = hinge_loss(targets, logits) - with self.test_session(): - self.assertAllClose(expected_loss.eval(), weighted_loss.eval()) - - def testLessTrivialCompatibilityWithHinge(self): - # Tests compatibility with a constant weight for positives and negatives. - x_shape = [56, 11] - logits = tf.constant(np.random.randn(*x_shape).astype(np.float32)) - targets = tf.to_float(tf.constant(np.random.random_sample(x_shape) > 0.7)) - weight = 1.0 + 1.0/2 + 1.0/3 + 1.0/4 + 1.0/5 + 1.0/6 + 1.0/7 - weighted_loss = util.weighted_hinge_loss(targets, logits, weight, weight) - expected_loss = hinge_loss(targets, logits) * weight - with self.test_session(): - self.assertAllClose(expected_loss.eval(), weighted_loss.eval()) - - def testNontrivialCompatibilityWithHinge(self): - # Tests compatibility with different positive and negative weights. - x_shape = [23, 8] - logits_positives = tf.constant(np.random.randn(*x_shape).astype(np.float32)) - logits_negatives = tf.constant(np.random.randn(*x_shape).astype(np.float32)) - targets_positives = tf.ones(x_shape) - targets_negatives = tf.zeros(x_shape) - logits = tf.concat([logits_positives, logits_negatives], 0) - targets = tf.concat([targets_positives, targets_negatives], 0) - - raw_loss = util.weighted_hinge_loss(targets, - logits, - positive_weights=3.4, - negative_weights=1.2) - loss = tf.reduce_sum(raw_loss, 0) - positives_hinge = hinge_loss(targets_positives, logits_positives) - negatives_hinge = hinge_loss(targets_negatives, logits_negatives) - expected = tf.add(tf.reduce_sum(3.4 * positives_hinge, 0), - tf.reduce_sum(1.2 * negatives_hinge, 0)) - - with self.test_session(): - self.assertAllClose(loss.eval(), expected.eval()) - - def test3DLogitsAndTargets(self): - # Tests correctness when logits is 3D and targets is 2D. - targets_shape = [30, 4] - logits_shape = [targets_shape[0], targets_shape[1], 3] - targets = tf.to_float( - tf.constant(np.random.random_sample(targets_shape) > 0.7)) - logits = tf.constant(np.random.randn(*logits_shape).astype(np.float32)) - weight_vector = [1.0, 1.0, 1.0] - loss = util.weighted_hinge_loss(targets, logits, weight_vector) - - with self.test_session(): - loss_value = loss.eval() - for i in range(len(weight_vector)): - expected = hinge_loss(targets, logits[:, :, i]).eval() - self.assertAllClose(loss_value[:, :, i], expected) - - -class BuildLabelPriorsTest(tf.test.TestCase): - - def testLabelPriorConsistency(self): - # Checks that, with zero pseudocounts, the returned label priors reproduce - # label frequencies in the batch. - batch_shape = [4, 10] - labels = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.678))) - - label_priors_update = util.build_label_priors( - labels=labels, positive_pseudocount=0, negative_pseudocount=0) - expected_priors = tf.reduce_mean(labels, 0) - - with self.test_session(): - tf.global_variables_initializer().run() - self.assertAllClose(label_priors_update.eval(), expected_priors.eval()) - - def testLabelPriorsUpdate(self): - # Checks that the update of label priors behaves as expected. - batch_shape = [1, 5] - labels = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.4))) - label_priors_update = util.build_label_priors(labels) - - label_sum = np.ones(shape=batch_shape) - weight_sum = 2.0 * np.ones(shape=batch_shape) - - with self.test_session() as session: - tf.global_variables_initializer().run() - - for _ in range(3): - label_sum += labels.eval() - weight_sum += np.ones(shape=batch_shape) - expected_posteriors = label_sum / weight_sum - label_priors = label_priors_update.eval().reshape(batch_shape) - self.assertAllClose(label_priors, expected_posteriors) - - # Re-initialize labels to get a new random sample. - session.run(labels.initializer) - - def testLabelPriorsUpdateWithWeights(self): - # Checks the update of label priors with per-example weights. - batch_size = 6 - num_labels = 5 - batch_shape = [batch_size, num_labels] - labels = tf.Variable( - tf.to_float(tf.greater(tf.random_uniform(batch_shape), 0.6))) - weights = tf.Variable(tf.random_uniform(batch_shape) * 6.2) - - update_op = util.build_label_priors(labels, weights=weights) - - expected_weighted_label_counts = 1.0 + tf.reduce_sum(weights * labels, 0) - expected_weight_sum = 2.0 + tf.reduce_sum(weights, 0) - expected_label_posteriors = tf.divide(expected_weighted_label_counts, - expected_weight_sum) - - with self.test_session() as session: - tf.global_variables_initializer().run() - - updated_priors, expected_posteriors = session.run( - [update_op, expected_label_posteriors]) - self.assertAllClose(updated_priors, expected_posteriors) - - -class WeightedSurrogateLossTest(parameterized.TestCase, tf.test.TestCase): - - @parameterized.parameters( - ('hinge', util.weighted_hinge_loss), - ('xent', util.weighted_sigmoid_cross_entropy_with_logits)) - def testCompatibilityLoss(self, loss_name, loss_fn): - x_shape = [28, 4] - logits = tf.constant(np.random.randn(*x_shape).astype(np.float32)) - targets = tf.to_float(tf.constant(np.random.random_sample(x_shape) > 0.5)) - positive_weights = 0.66 - negative_weights = 11.1 - expected_loss = loss_fn( - targets, - logits, - positive_weights=positive_weights, - negative_weights=negative_weights) - computed_loss = util.weighted_surrogate_loss( - targets, - logits, - loss_name, - positive_weights=positive_weights, - negative_weights=negative_weights) - with self.test_session(): - self.assertAllClose(expected_loss.eval(), computed_loss.eval()) - - def testSurrogatgeError(self): - x_shape = [7, 3] - logits = tf.constant(np.random.randn(*x_shape).astype(np.float32)) - targets = tf.to_float(tf.constant(np.random.random_sample(x_shape) > 0.5)) - - with self.assertRaises(ValueError): - util.weighted_surrogate_loss(logits, targets, 'bug') - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/im2txt/.gitignore b/research/im2txt/.gitignore deleted file mode 100644 index fb46913cc7a5994c4324de50829c95d7858c30f4..0000000000000000000000000000000000000000 --- a/research/im2txt/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -/bazel-bin -/bazel-ci_build-cache -/bazel-genfiles -/bazel-out -/bazel-im2txt -/bazel-testlogs -/bazel-tf diff --git a/research/im2txt/README.md b/research/im2txt/README.md deleted file mode 100644 index 2eb72822a39e3959a5a9370f26a9cc5c12be0fda..0000000000000000000000000000000000000000 --- a/research/im2txt/README.md +++ /dev/null @@ -1,342 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Show and Tell: A Neural Image Caption Generator - -A TensorFlow implementation of the image-to-text model described in the paper: - -"Show and Tell: Lessons learned from the 2015 MSCOCO Image Captioning -Challenge." - -Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan. - -*IEEE transactions on pattern analysis and machine intelligence (2016).* - -Full text available at: http://arxiv.org/abs/1609.06647 - -## Contact -***Author:*** Chris Shallue - -***Pull requests and issues:*** @cshallue - -## Contents -* [Model Overview](#model-overview) - * [Introduction](#introduction) - * [Architecture](#architecture) -* [Getting Started](#getting-started) - * [A Note on Hardware and Training Time](#a-note-on-hardware-and-training-time) - * [Install Required Packages](#install-required-packages) - * [Prepare the Training Data](#prepare-the-training-data) - * [Download the Inception v3 Checkpoint](#download-the-inception-v3-checkpoint) -* [Training a Model](#training-a-model) - * [Initial Training](#initial-training) - * [Fine Tune the Inception v3 Model](#fine-tune-the-inception-v3-model) -* [Generating Captions](#generating-captions) - -## Model Overview - -### Introduction - -The *Show and Tell* model is a deep neural network that learns how to describe -the content of images. For example: - -![Example captions](g3doc/example_captions.jpg) - -### Architecture - -The *Show and Tell* model is an example of an *encoder-decoder* neural network. -It works by first "encoding" an image into a fixed-length vector representation, -and then "decoding" the representation into a natural language description. - -The image encoder is a deep convolutional neural network. This type of -network is widely used for image tasks and is currently state-of-the-art for -object recognition and detection. Our particular choice of network is the -[*Inception v3*](http://arxiv.org/abs/1512.00567) image recognition model -pretrained on the -[ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) image -classification dataset. - -The decoder is a long short-term memory (LSTM) network. This type of network is -commonly used for sequence modeling tasks such as language modeling and machine -translation. In the *Show and Tell* model, the LSTM network is trained as a -language model conditioned on the image encoding. - -Words in the captions are represented with an embedding model. Each word in the -vocabulary is associated with a fixed-length vector representation that is -learned during training. - -The following diagram illustrates the model architecture. - -![Show and Tell Architecture](g3doc/show_and_tell_architecture.png) - -In this diagram, \{*s*0, *s*1, ..., *s**N*-1\} -are the words of the caption and \{*w**e**s*0, -*w**e**s*1, ..., *w**e**s**N*-1\} -are their corresponding word embedding vectors. The outputs \{*p*1, -*p*2, ..., *p**N*\} of the LSTM are probability -distributions generated by the model for the next word in the sentence. The -terms \{log *p*1(*s*1), -log *p*2(*s*2), ..., -log *p**N*(*s**N*)\} are the log-likelihoods of the -correct word at each step; the negated sum of these terms is the minimization -objective of the model. - -During the first phase of training the parameters of the *Inception v3* model -are kept fixed: it is simply a static image encoder function. A single trainable -layer is added on top of the *Inception v3* model to transform the image -embedding into the word embedding vector space. The model is trained with -respect to the parameters of the word embeddings, the parameters of the layer on -top of *Inception v3* and the parameters of the LSTM. In the second phase of -training, all parameters - including the parameters of *Inception v3* - are -trained to jointly fine-tune the image encoder and the LSTM. - -Given a trained model and an image we use *beam search* to generate captions for -that image. Captions are generated word-by-word, where at each step *t* we use -the set of sentences already generated with length *t* - 1 to generate a new set -of sentences with length *t*. We keep only the top *k* candidates at each step, -where the hyperparameter *k* is called the *beam size*. We have found the best -performance with *k* = 3. - -## Getting Started - -### A Note on Hardware and Training Time - -The time required to train the *Show and Tell* model depends on your specific -hardware and computational capacity. In this guide we assume you will be running -training on a single machine with a GPU. In our experience on an NVIDIA Tesla -K20m GPU the initial training phase takes 1-2 weeks. The second training phase -may take several additional weeks to achieve peak performance (but you can stop -this phase early and still get reasonable results). - -It is possible to achieve a speed-up by implementing distributed training across -a cluster of machines with GPUs, but that is not covered in this guide. - -Whilst it is possible to run this code on a CPU, beware that this may be -approximately 10 times slower. - -### Install Required Packages -First ensure that you have installed the following required packages: - -* **Bazel** ([instructions](http://bazel.io/docs/install.html)) -* **Python 2.7** -* **TensorFlow** 1.0 or greater ([instructions](https://www.tensorflow.org/install/)) -* **NumPy** ([instructions](http://www.scipy.org/install.html)) -* **Natural Language Toolkit (NLTK)**: - * First install NLTK ([instructions](http://www.nltk.org/install.html)) - * Then install the NLTK data package "punkt" ([instructions](http://www.nltk.org/data.html)) -* **Unzip** -### Prepare the Training Data - -To train the model you will need to provide training data in native TFRecord -format. The TFRecord format consists of a set of sharded files containing -serialized `tf.SequenceExample` protocol buffers. Each `tf.SequenceExample` -proto contains an image (JPEG format), a caption and metadata such as the image -id. - -Each caption is a list of words. During preprocessing, a dictionary is created -that assigns each word in the vocabulary to an integer-valued id. Each caption -is encoded as a list of integer word ids in the `tf.SequenceExample` protos. - -We have provided a script to download and preprocess the [MSCOCO](http://mscoco.org/) image captioning data set into this format. Downloading -and preprocessing the data may take several hours depending on your network and -computer speed. Please be patient. - -Before running the script, ensure that your hard disk has at least 150GB of -available space for storing the downloaded and processed data. - -```shell -# Location to save the MSCOCO data. -MSCOCO_DIR="${HOME}/im2txt/data/mscoco" - -# Build the preprocessing script. -cd research/im2txt -bazel build //im2txt:download_and_preprocess_mscoco - -# Run the preprocessing script. -bazel-bin/im2txt/download_and_preprocess_mscoco "${MSCOCO_DIR}" -``` - -The final line of the output should read: - -``` -2016-09-01 16:47:47.296630: Finished processing all 20267 image-caption pairs in data set 'test'. -``` - -When the script finishes you will find 256 training, 4 validation and 8 testing -files in `DATA_DIR`. The files will match the patterns `train-?????-of-00256`, -`val-?????-of-00004` and `test-?????-of-00008`, respectively. - -### Download the Inception v3 Checkpoint - -The *Show and Tell* model requires a pretrained *Inception v3* checkpoint file -to initialize the parameters of its image encoder submodel. - -This checkpoint file is provided by the -[TensorFlow-Slim image classification library](https://github.com/tensorflow/models/tree/master/research/slim#tensorflow-slim-image-classification-library) -which provides a suite of pre-trained image classification models. You can read -more about the models provided by the library -[here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). - - -Run the following commands to download the *Inception v3* checkpoint. - -```shell -# Location to save the Inception v3 checkpoint. -INCEPTION_DIR="${HOME}/im2txt/data" -mkdir -p ${INCEPTION_DIR} - -wget "http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz" -tar -xvf "inception_v3_2016_08_28.tar.gz" -C ${INCEPTION_DIR} -rm "inception_v3_2016_08_28.tar.gz" -``` - -Note that the *Inception v3* checkpoint will only be used for initializing the -parameters of the *Show and Tell* model. Once the *Show and Tell* model starts -training it will save its own checkpoint files containing the values of all its -parameters (including copies of the *Inception v3* parameters). If training is -stopped and restarted, the parameter values will be restored from the latest -*Show and Tell* checkpoint and the *Inception v3* checkpoint will be ignored. In -other words, the *Inception v3* checkpoint is only used in the 0-th global step -(initialization) of training the *Show and Tell* model. - -## Training a Model - -### Initial Training - -Run the training script. - -```shell -# Directory containing preprocessed MSCOCO data. -MSCOCO_DIR="${HOME}/im2txt/data/mscoco" - -# Inception v3 checkpoint file. -INCEPTION_CHECKPOINT="${HOME}/im2txt/data/inception_v3.ckpt" - -# Directory to save the model. -MODEL_DIR="${HOME}/im2txt/model" - -# Build the model. -cd research/im2txt -bazel build -c opt //im2txt/... - -# Run the training script. -bazel-bin/im2txt/train \ - --input_file_pattern="${MSCOCO_DIR}/train-?????-of-00256" \ - --inception_checkpoint_file="${INCEPTION_CHECKPOINT}" \ - --train_dir="${MODEL_DIR}/train" \ - --train_inception=false \ - --number_of_steps=1000000 -``` - -Run the evaluation script in a separate process. This will log evaluation -metrics to TensorBoard which allows training progress to be monitored in -real-time. - -Note that you may run out of memory if you run the evaluation script on the same -GPU as the training script. You can run the command -`export CUDA_VISIBLE_DEVICES=""` to force the evaluation script to run on CPU. -If evaluation runs too slowly on CPU, you can decrease the value of -`--num_eval_examples`. - -```shell -MSCOCO_DIR="${HOME}/im2txt/data/mscoco" -MODEL_DIR="${HOME}/im2txt/model" - -# Ignore GPU devices (only necessary if your GPU is currently memory -# constrained, for example, by running the training script). -export CUDA_VISIBLE_DEVICES="" - -# Run the evaluation script. This will run in a loop, periodically loading the -# latest model checkpoint file and computing evaluation metrics. -bazel-bin/im2txt/evaluate \ - --input_file_pattern="${MSCOCO_DIR}/val-?????-of-00004" \ - --checkpoint_dir="${MODEL_DIR}/train" \ - --eval_dir="${MODEL_DIR}/eval" -``` - -Run a TensorBoard server in a separate process for real-time monitoring of -training progress and evaluation metrics. - -```shell -MODEL_DIR="${HOME}/im2txt/model" - -# Run a TensorBoard server. -tensorboard --logdir="${MODEL_DIR}" -``` - -### Fine Tune the Inception v3 Model - -Your model will already be able to generate reasonable captions after the first -phase of training. Try it out! (See [Generating Captions](#generating-captions)). - -You can further improve the performance of the model by running a -second training phase to jointly fine-tune the parameters of the *Inception v3* -image submodel and the LSTM. - -```shell -# Restart the training script with --train_inception=true. -bazel-bin/im2txt/train \ - --input_file_pattern="${MSCOCO_DIR}/train-?????-of-00256" \ - --train_dir="${MODEL_DIR}/train" \ - --train_inception=true \ - --number_of_steps=3000000 # Additional 2M steps (assuming 1M in initial training). -``` - -Note that training will proceed much slower now, and the model will continue to -improve by a small amount for a long time. We have found that it will improve -slowly for an additional 2-2.5 million steps before it begins to overfit. This -may take several weeks on a single GPU. If you don't care about absolutely -optimal performance then feel free to halt training sooner by stopping the -training script or passing a smaller value to the flag `--number_of_steps`. Your -model will still work reasonably well. - -## Generating Captions - -Your trained *Show and Tell* model can generate captions for any JPEG image! The -following command line will generate captions for an image from the test set. - -```shell -# Path to checkpoint file or a directory containing checkpoint files. Passing -# a directory will only work if there is also a file named 'checkpoint' which -# lists the available checkpoints in the directory. It will not work if you -# point to a directory with just a copy of a model checkpoint: in that case, -# you will need to pass the checkpoint path explicitly. -CHECKPOINT_PATH="${HOME}/im2txt/model/train" - -# Vocabulary file generated by the preprocessing script. -VOCAB_FILE="${HOME}/im2txt/data/mscoco/word_counts.txt" - -# JPEG image file to caption. -IMAGE_FILE="${HOME}/im2txt/data/mscoco/raw-data/val2014/COCO_val2014_000000224477.jpg" - -# Build the inference binary. -cd research/im2txt -bazel build -c opt //im2txt:run_inference - -# Ignore GPU devices (only necessary if your GPU is currently memory -# constrained, for example, by running the training script). -export CUDA_VISIBLE_DEVICES="" - -# Run inference to generate captions. -bazel-bin/im2txt/run_inference \ - --checkpoint_path=${CHECKPOINT_PATH} \ - --vocab_file=${VOCAB_FILE} \ - --input_files=${IMAGE_FILE} -``` - -Example output: - -``` -Captions for image COCO_val2014_000000224477.jpg: - 0) a man riding a wave on top of a surfboard . (p=0.040413) - 1) a person riding a surf board on a wave (p=0.017452) - 2) a man riding a wave on a surfboard in the ocean . (p=0.005743) -``` - -Note: you may get different results. Some variation between different models is -expected. - -Here is the image: - -![Surfer](g3doc/COCO_val2014_000000224477.jpg) diff --git a/research/im2txt/WORKSPACE b/research/im2txt/WORKSPACE deleted file mode 100644 index 22da718b06f9c61be4ffdf45e48919ed4a5f17ae..0000000000000000000000000000000000000000 --- a/research/im2txt/WORKSPACE +++ /dev/null @@ -1 +0,0 @@ -workspace(name = "im2txt") diff --git a/research/im2txt/conda-env/ubuntu-18-04-environment.yaml b/research/im2txt/conda-env/ubuntu-18-04-environment.yaml deleted file mode 100644 index 332ff2a47f8f49fcdde7b769c29ff84cf5a5ff9d..0000000000000000000000000000000000000000 --- a/research/im2txt/conda-env/ubuntu-18-04-environment.yaml +++ /dev/null @@ -1,142 +0,0 @@ -name: im2txt -channels: - - defaults -dependencies: - - _tflow_select=2.3.0=mkl - - absl-py=0.5.0=py27_0 - - astor=0.7.1=py27_0 - - backports=1.0=py27_1 - - backports.functools_lru_cache=1.5=py27_1 - - backports.shutil_get_terminal_size=1.0.0=py27_2 - - backports.weakref=1.0.post1=py27_0 - - backports_abc=0.5=py27_0 - - blas=1.0=mkl - - bleach=3.0.2=py27_0 - - ca-certificates=2018.03.07=0 - - certifi=2018.10.15=py27_0 - - configparser=3.5.0=py27_0 - - cycler=0.10.0=py27_0 - - dbus=1.13.2=h714fa37_1 - - decorator=4.3.0=py27_0 - - entrypoints=0.2.3=py27_2 - - enum34=1.1.6=py27_1 - - expat=2.2.6=he6710b0_0 - - fastcache=1.0.2=py27h14c3975_2 - - fontconfig=2.13.0=h9420a91_0 - - freetype=2.9.1=h8a8886c_1 - - funcsigs=1.0.2=py27_0 - - functools32=3.2.3.2=py27_1 - - futures=3.2.0=py27_0 - - gast=0.2.0=py27_0 - - glib=2.56.2=hd408876_0 - - gmp=6.1.2=h6c8ec71_1 - - gmpy2=2.0.8=py27h10f8cd9_2 - - grpcio=1.12.1=py27hdbcaa40_0 - - gst-plugins-base=1.14.0=hbbd80ab_1 - - gstreamer=1.14.0=hb453b48_1 - - h5py=2.8.0=py27h989c5e5_3 - - hdf5=1.10.2=hba1933b_1 - - icu=58.2=h9c2bf20_1 - - intel-openmp=2019.0=118 - - ipaddress=1.0.22=py27_0 - - ipykernel=4.10.0=py27_0 - - ipython=5.8.0=py27_0 - - ipython_genutils=0.2.0=py27_0 - - ipywidgets=7.4.2=py27_0 - - jinja2=2.10=py27_0 - - jpeg=9b=h024ee3a_2 - - jsonschema=2.6.0=py27_0 - - jupyter=1.0.0=py27_7 - - jupyter_client=5.2.3=py27_0 - - jupyter_console=5.2.0=py27_1 - - jupyter_core=4.4.0=py27_0 - - keras-applications=1.0.6=py27_0 - - keras-preprocessing=1.0.5=py27_0 - - kiwisolver=1.0.1=py27hf484d3e_0 - - libedit=3.1.20170329=h6b74fdf_2 - - libffi=3.2.1=hd88cf55_4 - - libgcc-ng=8.2.0=hdf63c60_1 - - libgfortran-ng=7.3.0=hdf63c60_0 - - libpng=1.6.35=hbc83047_0 - - libprotobuf=3.6.0=hdbcaa40_0 - - libsodium=1.0.16=h1bed415_0 - - libstdcxx-ng=8.2.0=hdf63c60_1 - - libuuid=1.0.3=h1bed415_2 - - libxcb=1.13=h1bed415_1 - - libxml2=2.9.8=h26e45fe_1 - - linecache2=1.0.0=py27_0 - - markdown=3.0.1=py27_0 - - markupsafe=1.0=py27h14c3975_1 - - matplotlib=2.2.3=py27hb69df0a_0 - - mistune=0.8.4=py27h7b6447c_0 - - mkl=2019.0=118 - - mkl_fft=1.0.6=py27h7dd41cf_0 - - mkl_random=1.0.1=py27h4414c95_1 - - mock=2.0.0=py27_0 - - mpc=1.1.0=h10f8cd9_1 - - mpfr=4.0.1=hdf1c602_3 - - mpmath=1.0.0=py27_2 - - nbconvert=5.3.1=py27_0 - - nbformat=4.4.0=py27_0 - - ncurses=6.1=hf484d3e_0 - - nltk=3.3.0=py27_0 - - nose=1.3.7=py27_2 - - notebook=5.7.0=py27_0 - - numpy=1.15.3=py27h1d66e8a_0 - - numpy-base=1.15.3=py27h81de0dd_0 - - openssl=1.0.2p=h14c3975_0 - - pandas=0.23.4=py27h04863e7_0 - - pandoc=2.2.3.2=0 - - pandocfilters=1.4.2=py27_1 - - pathlib2=2.3.2=py27_0 - - pbr=4.3.0=py27_0 - - pcre=8.42=h439df22_0 - - pexpect=4.6.0=py27_0 - - pickleshare=0.7.5=py27_0 - - pip=10.0.1=py27_0 - - prometheus_client=0.4.2=py27_0 - - prompt_toolkit=1.0.15=py27_0 - - protobuf=3.6.0=py27hf484d3e_0 - - ptyprocess=0.6.0=py27_0 - - pygments=2.2.0=py27_0 - - pyparsing=2.2.2=py27_0 - - pyqt=5.9.2=py27h05f1152_2 - - python=2.7.15=h77bded6_2 - - python-dateutil=2.7.3=py27_0 - - pytz=2018.5=py27_0 - - pyzmq=17.1.2=py27h14c3975_0 - - qt=5.9.6=h8703b6f_2 - - qtconsole=4.4.2=py27_0 - - readline=7.0=h7b6447c_5 - - scandir=1.9.0=py27h14c3975_0 - - scipy=1.1.0=py27hfa4b5c9_1 - - send2trash=1.5.0=py27_0 - - setuptools=40.4.3=py27_0 - - simplegeneric=0.8.1=py27_2 - - singledispatch=3.4.0.3=py27_0 - - sip=4.19.8=py27hf484d3e_0 - - six=1.11.0=py27_1 - - sqlite=3.25.2=h7b6447c_0 - - subprocess32=3.5.3=py27h7b6447c_0 - - sympy=1.3=py27_0 - - tensorboard=1.11.0=py27hf484d3e_0 - - tensorflow=1.11.0=mkl_py27h25e0b76_0 - - tensorflow-base=1.11.0=mkl_py27h3c3e929_0 - - termcolor=1.1.0=py27_1 - - terminado=0.8.1=py27_1 - - testpath=0.4.2=py27_0 - - tk=8.6.8=hbc83047_0 - - tornado=5.1.1=py27h7b6447c_0 - - traceback2=1.4.0=py27_0 - - traitlets=4.3.2=py27_0 - - unittest2=1.1.0=py27_0 - - wcwidth=0.1.7=py27_0 - - webencodings=0.5.1=py27_1 - - werkzeug=0.14.1=py27_0 - - wheel=0.32.2=py27_0 - - widgetsnbextension=3.4.2=py27_0 - - xz=5.2.4=h14c3975_4 - - zeromq=4.2.5=hf484d3e_1 - - zlib=1.2.11=ha838bed_2 -prefix: /home/arinto_murdopo/anaconda3/envs/im2txt - diff --git a/research/im2txt/g3doc/COCO_val2014_000000224477.jpg b/research/im2txt/g3doc/COCO_val2014_000000224477.jpg deleted file mode 100644 index 8976fa84b40b04c5bf1205a49c8d236b747f8f9b..0000000000000000000000000000000000000000 Binary files a/research/im2txt/g3doc/COCO_val2014_000000224477.jpg and /dev/null differ diff --git a/research/im2txt/g3doc/example_captions.jpg b/research/im2txt/g3doc/example_captions.jpg deleted file mode 100644 index b3a8f43247e5c9c39a3f93daaf1ad34837959ec5..0000000000000000000000000000000000000000 Binary files a/research/im2txt/g3doc/example_captions.jpg and /dev/null differ diff --git a/research/im2txt/g3doc/show_and_tell_architecture.png b/research/im2txt/g3doc/show_and_tell_architecture.png deleted file mode 100644 index 984590d54ba4aa089b5740fd69f6dc6216b9047f..0000000000000000000000000000000000000000 Binary files a/research/im2txt/g3doc/show_and_tell_architecture.png and /dev/null differ diff --git a/research/im2txt/im2txt/BUILD b/research/im2txt/im2txt/BUILD deleted file mode 100644 index 8c403171153c36ee43cde2788dbfcaf9c7bf4293..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/BUILD +++ /dev/null @@ -1,96 +0,0 @@ -package(default_visibility = [":internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = [ - "//im2txt/...", - ], -) - -py_binary( - name = "build_mscoco_data", - srcs = [ - "data/build_mscoco_data.py", - ], -) - -sh_binary( - name = "download_and_preprocess_mscoco", - srcs = ["data/download_and_preprocess_mscoco.sh"], - data = [ - ":build_mscoco_data", - ], -) - -py_library( - name = "configuration", - srcs = ["configuration.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "show_and_tell_model", - srcs = ["show_and_tell_model.py"], - srcs_version = "PY2AND3", - deps = [ - "//im2txt/ops:image_embedding", - "//im2txt/ops:image_processing", - "//im2txt/ops:inputs", - ], -) - -py_test( - name = "show_and_tell_model_test", - size = "large", - srcs = ["show_and_tell_model_test.py"], - deps = [ - ":configuration", - ":show_and_tell_model", - ], -) - -py_library( - name = "inference_wrapper", - srcs = ["inference_wrapper.py"], - srcs_version = "PY2AND3", - deps = [ - ":show_and_tell_model", - "//im2txt/inference_utils:inference_wrapper_base", - ], -) - -py_binary( - name = "train", - srcs = ["train.py"], - srcs_version = "PY2AND3", - deps = [ - ":configuration", - ":show_and_tell_model", - ], -) - -py_binary( - name = "evaluate", - srcs = ["evaluate.py"], - srcs_version = "PY2AND3", - deps = [ - ":configuration", - ":show_and_tell_model", - ], -) - -py_binary( - name = "run_inference", - srcs = ["run_inference.py"], - srcs_version = "PY2AND3", - deps = [ - ":configuration", - ":inference_wrapper", - "//im2txt/inference_utils:caption_generator", - "//im2txt/inference_utils:vocabulary", - ], -) diff --git a/research/im2txt/im2txt/configuration.py b/research/im2txt/im2txt/configuration.py deleted file mode 100644 index 3b664eb9f0cd963fb26929d019ec9cdb3282d0a8..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/configuration.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Image-to-text model and training configurations.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -class ModelConfig(object): - """Wrapper class for model hyperparameters.""" - - def __init__(self): - """Sets the default model hyperparameters.""" - # File pattern of sharded TFRecord file containing SequenceExample protos. - # Must be provided in training and evaluation modes. - self.input_file_pattern = None - - # Image format ("jpeg" or "png"). - self.image_format = "jpeg" - - # Approximate number of values per input shard. Used to ensure sufficient - # mixing between shards in training. - self.values_per_input_shard = 2300 - # Minimum number of shards to keep in the input queue. - self.input_queue_capacity_factor = 2 - # Number of threads for prefetching SequenceExample protos. - self.num_input_reader_threads = 1 - - # Name of the SequenceExample context feature containing image data. - self.image_feature_name = "image/data" - # Name of the SequenceExample feature list containing integer captions. - self.caption_feature_name = "image/caption_ids" - - # Number of unique words in the vocab (plus 1, for ). - # The default value is larger than the expected actual vocab size to allow - # for differences between tokenizer versions used in preprocessing. There is - # no harm in using a value greater than the actual vocab size, but using a - # value less than the actual vocab size will result in an error. - self.vocab_size = 12000 - - # Number of threads for image preprocessing. Should be a multiple of 2. - self.num_preprocess_threads = 4 - - # Batch size. - self.batch_size = 32 - - # File containing an Inception v3 checkpoint to initialize the variables - # of the Inception model. Must be provided when starting training for the - # first time. - self.inception_checkpoint_file = None - - # Dimensions of Inception v3 input images. - self.image_height = 299 - self.image_width = 299 - - # Scale used to initialize model variables. - self.initializer_scale = 0.08 - - # LSTM input and output dimensionality, respectively. - self.embedding_size = 512 - self.num_lstm_units = 512 - - # If < 1.0, the dropout keep probability applied to LSTM variables. - self.lstm_dropout_keep_prob = 0.7 - - -class TrainingConfig(object): - """Wrapper class for training hyperparameters.""" - - def __init__(self): - """Sets the default training hyperparameters.""" - # Number of examples per epoch of training data. - self.num_examples_per_epoch = 586363 - - # Optimizer for training the model. - self.optimizer = "SGD" - - # Learning rate for the initial phase of training. - self.initial_learning_rate = 2.0 - self.learning_rate_decay_factor = 0.5 - self.num_epochs_per_decay = 8.0 - - # Learning rate when fine tuning the Inception v3 parameters. - self.train_inception_learning_rate = 0.0005 - - # If not None, clip gradients to this value. - self.clip_gradients = 5.0 - - # How many model checkpoints to keep. - self.max_checkpoints_to_keep = 5 diff --git a/research/im2txt/im2txt/data/build_mscoco_data.py b/research/im2txt/im2txt/data/build_mscoco_data.py deleted file mode 100644 index 2c3e9d977669bf63d8e39128336319b48c0432dd..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/data/build_mscoco_data.py +++ /dev/null @@ -1,483 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Converts MSCOCO data to TFRecord file format with SequenceExample protos. - -The MSCOCO images are expected to reside in JPEG files located in the following -directory structure: - - train_image_dir/COCO_train2014_000000000151.jpg - train_image_dir/COCO_train2014_000000000260.jpg - ... - -and - - val_image_dir/COCO_val2014_000000000042.jpg - val_image_dir/COCO_val2014_000000000073.jpg - ... - -The MSCOCO annotations JSON files are expected to reside in train_captions_file -and val_captions_file respectively. - -This script converts the combined MSCOCO data into sharded data files consisting -of 256, 4 and 8 TFRecord files, respectively: - - output_dir/train-00000-of-00256 - output_dir/train-00001-of-00256 - ... - output_dir/train-00255-of-00256 - -and - - output_dir/val-00000-of-00004 - ... - output_dir/val-00003-of-00004 - -and - - output_dir/test-00000-of-00008 - ... - output_dir/test-00007-of-00008 - -Each TFRecord file contains ~2300 records. Each record within the TFRecord file -is a serialized SequenceExample proto consisting of precisely one image-caption -pair. Note that each image has multiple captions (usually 5) and therefore each -image is replicated multiple times in the TFRecord files. - -The SequenceExample proto contains the following fields: - - context: - image/image_id: integer MSCOCO image identifier - image/data: string containing JPEG encoded image in RGB colorspace - - feature_lists: - image/caption: list of strings containing the (tokenized) caption words - image/caption_ids: list of integer ids corresponding to the caption words - -The captions are tokenized using the NLTK (http://www.nltk.org/) word tokenizer. -The vocabulary of word identifiers is constructed from the sorted list (by -descending frequency) of word tokens in the training set. Only tokens appearing -at least 4 times are considered; all other words get the "unknown" word id. - -NOTE: This script will consume around 100GB of disk space because each image -in the MSCOCO dataset is replicated ~5 times (once per caption) in the output. -This is done for two reasons: - 1. In order to better shuffle the training data. - 2. It makes it easier to perform asynchronous preprocessing of each image in - TensorFlow. - -Running this script using 16 threads may take around 1 hour on a HP Z420. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import Counter -from collections import namedtuple -from datetime import datetime -import json -import os.path -import random -import sys -import threading - - - -import nltk.tokenize -import numpy as np -from six.moves import xrange -import tensorflow as tf - -tf.flags.DEFINE_string("train_image_dir", "/tmp/train2014/", - "Training image directory.") -tf.flags.DEFINE_string("val_image_dir", "/tmp/val2014", - "Validation image directory.") - -tf.flags.DEFINE_string("train_captions_file", "/tmp/captions_train2014.json", - "Training captions JSON file.") -tf.flags.DEFINE_string("val_captions_file", "/tmp/captions_val2014.json", - "Validation captions JSON file.") - -tf.flags.DEFINE_string("output_dir", "/tmp/", "Output data directory.") - -tf.flags.DEFINE_integer("train_shards", 256, - "Number of shards in training TFRecord files.") -tf.flags.DEFINE_integer("val_shards", 4, - "Number of shards in validation TFRecord files.") -tf.flags.DEFINE_integer("test_shards", 8, - "Number of shards in testing TFRecord files.") - -tf.flags.DEFINE_string("start_word", "", - "Special word added to the beginning of each sentence.") -tf.flags.DEFINE_string("end_word", "", - "Special word added to the end of each sentence.") -tf.flags.DEFINE_string("unknown_word", "", - "Special word meaning 'unknown'.") -tf.flags.DEFINE_integer("min_word_count", 4, - "The minimum number of occurrences of each word in the " - "training set for inclusion in the vocabulary.") -tf.flags.DEFINE_string("word_counts_output_file", "/tmp/word_counts.txt", - "Output vocabulary file of word counts.") - -tf.flags.DEFINE_integer("num_threads", 8, - "Number of threads to preprocess the images.") - -FLAGS = tf.flags.FLAGS - -ImageMetadata = namedtuple("ImageMetadata", - ["image_id", "filename", "captions"]) - - -class Vocabulary(object): - """Simple vocabulary wrapper.""" - - def __init__(self, vocab, unk_id): - """Initializes the vocabulary. - - Args: - vocab: A dictionary of word to word_id. - unk_id: Id of the special 'unknown' word. - """ - self._vocab = vocab - self._unk_id = unk_id - - def word_to_id(self, word): - """Returns the integer id of a word string.""" - if word in self._vocab: - return self._vocab[word] - else: - return self._unk_id - - -class ImageDecoder(object): - """Helper class for decoding images in TensorFlow.""" - - def __init__(self): - # Create a single TensorFlow Session for all image decoding calls. - self._sess = tf.Session() - - # TensorFlow ops for JPEG decoding. - self._encoded_jpeg = tf.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._encoded_jpeg, channels=3) - - def decode_jpeg(self, encoded_jpeg): - image = self._sess.run(self._decode_jpeg, - feed_dict={self._encoded_jpeg: encoded_jpeg}) - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _int64_feature(value): - """Wrapper for inserting an int64 Feature into a SequenceExample proto.""" - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - -def _bytes_feature(value): - """Wrapper for inserting a bytes Feature into a SequenceExample proto.""" - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)])) - - -def _int64_feature_list(values): - """Wrapper for inserting an int64 FeatureList into a SequenceExample proto.""" - return tf.train.FeatureList(feature=[_int64_feature(v) for v in values]) - - -def _bytes_feature_list(values): - """Wrapper for inserting a bytes FeatureList into a SequenceExample proto.""" - return tf.train.FeatureList(feature=[_bytes_feature(v) for v in values]) - - -def _to_sequence_example(image, decoder, vocab): - """Builds a SequenceExample proto for an image-caption pair. - - Args: - image: An ImageMetadata object. - decoder: An ImageDecoder object. - vocab: A Vocabulary object. - - Returns: - A SequenceExample proto. - """ - with tf.gfile.FastGFile(image.filename, "r") as f: - encoded_image = f.read() - - try: - decoder.decode_jpeg(encoded_image) - except (tf.errors.InvalidArgumentError, AssertionError): - print("Skipping file with invalid JPEG data: %s" % image.filename) - return - - context = tf.train.Features(feature={ - "image/image_id": _int64_feature(image.image_id), - "image/data": _bytes_feature(encoded_image), - }) - - assert len(image.captions) == 1 - caption = image.captions[0] - caption_ids = [vocab.word_to_id(word) for word in caption] - feature_lists = tf.train.FeatureLists(feature_list={ - "image/caption": _bytes_feature_list(caption), - "image/caption_ids": _int64_feature_list(caption_ids) - }) - sequence_example = tf.train.SequenceExample( - context=context, feature_lists=feature_lists) - - return sequence_example - - -def _process_image_files(thread_index, ranges, name, images, decoder, vocab, - num_shards): - """Processes and saves a subset of images as TFRecord files in one thread. - - Args: - thread_index: Integer thread identifier within [0, len(ranges)]. - ranges: A list of pairs of integers specifying the ranges of the dataset to - process in parallel. - name: Unique identifier specifying the dataset. - images: List of ImageMetadata. - decoder: An ImageDecoder object. - vocab: A Vocabulary object. - num_shards: Integer number of shards for the output files. - """ - # Each thread produces N shards where N = num_shards / num_threads. For - # instance, if num_shards = 128, and num_threads = 2, then the first thread - # would produce shards [0, 64). - num_threads = len(ranges) - assert not num_shards % num_threads - num_shards_per_batch = int(num_shards / num_threads) - - shard_ranges = np.linspace(ranges[thread_index][0], ranges[thread_index][1], - num_shards_per_batch + 1).astype(int) - num_images_in_thread = ranges[thread_index][1] - ranges[thread_index][0] - - counter = 0 - for s in xrange(num_shards_per_batch): - # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' - shard = thread_index * num_shards_per_batch + s - output_filename = "%s-%.5d-of-%.5d" % (name, shard, num_shards) - output_file = os.path.join(FLAGS.output_dir, output_filename) - writer = tf.python_io.TFRecordWriter(output_file) - - shard_counter = 0 - images_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) - for i in images_in_shard: - image = images[i] - - sequence_example = _to_sequence_example(image, decoder, vocab) - if sequence_example is not None: - writer.write(sequence_example.SerializeToString()) - shard_counter += 1 - counter += 1 - - if not counter % 1000: - print("%s [thread %d]: Processed %d of %d items in thread batch." % - (datetime.now(), thread_index, counter, num_images_in_thread)) - sys.stdout.flush() - - writer.close() - print("%s [thread %d]: Wrote %d image-caption pairs to %s" % - (datetime.now(), thread_index, shard_counter, output_file)) - sys.stdout.flush() - shard_counter = 0 - print("%s [thread %d]: Wrote %d image-caption pairs to %d shards." % - (datetime.now(), thread_index, counter, num_shards_per_batch)) - sys.stdout.flush() - - -def _process_dataset(name, images, vocab, num_shards): - """Processes a complete data set and saves it as a TFRecord. - - Args: - name: Unique identifier specifying the dataset. - images: List of ImageMetadata. - vocab: A Vocabulary object. - num_shards: Integer number of shards for the output files. - """ - # Break up each image into a separate entity for each caption. - images = [ImageMetadata(image.image_id, image.filename, [caption]) - for image in images for caption in image.captions] - - # Shuffle the ordering of images. Make the randomization repeatable. - random.seed(12345) - random.shuffle(images) - - # Break the images into num_threads batches. Batch i is defined as - # images[ranges[i][0]:ranges[i][1]]. - num_threads = min(num_shards, FLAGS.num_threads) - spacing = np.linspace(0, len(images), num_threads + 1).astype(np.int) - ranges = [] - threads = [] - for i in xrange(len(spacing) - 1): - ranges.append([spacing[i], spacing[i + 1]]) - - # Create a mechanism for monitoring when all threads are finished. - coord = tf.train.Coordinator() - - # Create a utility for decoding JPEG images to run sanity checks. - decoder = ImageDecoder() - - # Launch a thread for each batch. - print("Launching %d threads for spacings: %s" % (num_threads, ranges)) - for thread_index in xrange(len(ranges)): - args = (thread_index, ranges, name, images, decoder, vocab, num_shards) - t = threading.Thread(target=_process_image_files, args=args) - t.start() - threads.append(t) - - # Wait for all the threads to terminate. - coord.join(threads) - print("%s: Finished processing all %d image-caption pairs in data set '%s'." % - (datetime.now(), len(images), name)) - - -def _create_vocab(captions): - """Creates the vocabulary of word to word_id. - - The vocabulary is saved to disk in a text file of word counts. The id of each - word in the file is its corresponding 0-based line number. - - Args: - captions: A list of lists of strings. - - Returns: - A Vocabulary object. - """ - print("Creating vocabulary.") - counter = Counter() - for c in captions: - counter.update(c) - print("Total words:", len(counter)) - - # Filter uncommon words and sort by descending count. - word_counts = [x for x in counter.items() if x[1] >= FLAGS.min_word_count] - word_counts.sort(key=lambda x: x[1], reverse=True) - print("Words in vocabulary:", len(word_counts)) - - # Write out the word counts file. - with tf.gfile.FastGFile(FLAGS.word_counts_output_file, "w") as f: - f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts])) - print("Wrote vocabulary file:", FLAGS.word_counts_output_file) - - # Create the vocabulary dictionary. - reverse_vocab = [x[0] for x in word_counts] - unk_id = len(reverse_vocab) - vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) - vocab = Vocabulary(vocab_dict, unk_id) - - return vocab - - -def _process_caption(caption): - """Processes a caption string into a list of tonenized words. - - Args: - caption: A string caption. - - Returns: - A list of strings; the tokenized caption. - """ - tokenized_caption = [FLAGS.start_word] - tokenized_caption.extend(nltk.tokenize.word_tokenize(caption.lower())) - tokenized_caption.append(FLAGS.end_word) - return tokenized_caption - - -def _load_and_process_metadata(captions_file, image_dir): - """Loads image metadata from a JSON file and processes the captions. - - Args: - captions_file: JSON file containing caption annotations. - image_dir: Directory containing the image files. - - Returns: - A list of ImageMetadata. - """ - with tf.gfile.FastGFile(captions_file, "r") as f: - caption_data = json.load(f) - - # Extract the filenames. - id_to_filename = [(x["id"], x["file_name"]) for x in caption_data["images"]] - - # Extract the captions. Each image_id is associated with multiple captions. - id_to_captions = {} - for annotation in caption_data["annotations"]: - image_id = annotation["image_id"] - caption = annotation["caption"] - id_to_captions.setdefault(image_id, []) - id_to_captions[image_id].append(caption) - - assert len(id_to_filename) == len(id_to_captions) - assert set([x[0] for x in id_to_filename]) == set(id_to_captions.keys()) - print("Loaded caption metadata for %d images from %s" % - (len(id_to_filename), captions_file)) - - # Process the captions and combine the data into a list of ImageMetadata. - print("Processing captions.") - image_metadata = [] - num_captions = 0 - for image_id, base_filename in id_to_filename: - filename = os.path.join(image_dir, base_filename) - captions = [_process_caption(c) for c in id_to_captions[image_id]] - image_metadata.append(ImageMetadata(image_id, filename, captions)) - num_captions += len(captions) - print("Finished processing %d captions for %d images in %s" % - (num_captions, len(id_to_filename), captions_file)) - - return image_metadata - - -def main(unused_argv): - def _is_valid_num_shards(num_shards): - """Returns True if num_shards is compatible with FLAGS.num_threads.""" - return num_shards < FLAGS.num_threads or not num_shards % FLAGS.num_threads - - assert _is_valid_num_shards(FLAGS.train_shards), ( - "Please make the FLAGS.num_threads commensurate with FLAGS.train_shards") - assert _is_valid_num_shards(FLAGS.val_shards), ( - "Please make the FLAGS.num_threads commensurate with FLAGS.val_shards") - assert _is_valid_num_shards(FLAGS.test_shards), ( - "Please make the FLAGS.num_threads commensurate with FLAGS.test_shards") - - if not tf.gfile.IsDirectory(FLAGS.output_dir): - tf.gfile.MakeDirs(FLAGS.output_dir) - - # Load image metadata from caption files. - mscoco_train_dataset = _load_and_process_metadata(FLAGS.train_captions_file, - FLAGS.train_image_dir) - mscoco_val_dataset = _load_and_process_metadata(FLAGS.val_captions_file, - FLAGS.val_image_dir) - - # Redistribute the MSCOCO data as follows: - # train_dataset = 100% of mscoco_train_dataset + 85% of mscoco_val_dataset. - # val_dataset = 5% of mscoco_val_dataset (for validation during training). - # test_dataset = 10% of mscoco_val_dataset (for final evaluation). - train_cutoff = int(0.85 * len(mscoco_val_dataset)) - val_cutoff = int(0.90 * len(mscoco_val_dataset)) - train_dataset = mscoco_train_dataset + mscoco_val_dataset[0:train_cutoff] - val_dataset = mscoco_val_dataset[train_cutoff:val_cutoff] - test_dataset = mscoco_val_dataset[val_cutoff:] - - # Create vocabulary from the training captions. - train_captions = [c for image in train_dataset for c in image.captions] - vocab = _create_vocab(train_captions) - - _process_dataset("train", train_dataset, vocab, FLAGS.train_shards) - _process_dataset("val", val_dataset, vocab, FLAGS.val_shards) - _process_dataset("test", test_dataset, vocab, FLAGS.test_shards) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/im2txt/im2txt/data/download_and_preprocess_mscoco.sh b/research/im2txt/im2txt/data/download_and_preprocess_mscoco.sh deleted file mode 100755 index ab3ff28d576adcbf1992de4c00dfa350dd93b1c3..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/data/download_and_preprocess_mscoco.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download and preprocess the MSCOCO data set. -# -# The outputs of this script are sharded TFRecord files containing serialized -# SequenceExample protocol buffers. See build_mscoco_data.py for details of how -# the SequenceExample protocol buffers are constructed. -# -# usage: -# ./download_and_preprocess_mscoco.sh -set -e - -if [ -z "$1" ]; then - echo "usage download_and_preproces_mscoco.sh [data dir]" - exit -fi - -if [ "$(uname)" == "Darwin" ]; then - UNZIP="tar -xf" -else - UNZIP="unzip -nq" -fi - -# Create the output directories. -OUTPUT_DIR="${1%/}" -SCRATCH_DIR="${OUTPUT_DIR}/raw-data" -mkdir -p "${OUTPUT_DIR}" -mkdir -p "${SCRATCH_DIR}" -CURRENT_DIR=$(pwd) -WORK_DIR="$0.runfiles/im2txt/im2txt" - -# Helper function to download and unpack a .zip file. -function download_and_unzip() { - local BASE_URL=${1} - local FILENAME=${2} - - if [ ! -f ${FILENAME} ]; then - echo "Downloading ${FILENAME} to $(pwd)" - wget -nd -c "${BASE_URL}/${FILENAME}" - else - echo "Skipping download of ${FILENAME}" - fi - echo "Unzipping ${FILENAME}" - ${UNZIP} ${FILENAME} -} - -cd ${SCRATCH_DIR} - -# Download the images. -BASE_IMAGE_URL="http://msvocds.blob.core.windows.net/coco2014" - -TRAIN_IMAGE_FILE="train2014.zip" -download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE} -TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2014" - -VAL_IMAGE_FILE="val2014.zip" -download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE} -VAL_IMAGE_DIR="${SCRATCH_DIR}/val2014" - -# Download the captions. -BASE_CAPTIONS_URL="http://msvocds.blob.core.windows.net/annotations-1-0-3" -CAPTIONS_FILE="captions_train-val2014.zip" -download_and_unzip ${BASE_CAPTIONS_URL} ${CAPTIONS_FILE} -TRAIN_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_train2014.json" -VAL_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_val2014.json" - -# Build TFRecords of the image data. -cd "${CURRENT_DIR}" -BUILD_SCRIPT="${WORK_DIR}/build_mscoco_data" -"${BUILD_SCRIPT}" \ - --train_image_dir="${TRAIN_IMAGE_DIR}" \ - --val_image_dir="${VAL_IMAGE_DIR}" \ - --train_captions_file="${TRAIN_CAPTIONS_FILE}" \ - --val_captions_file="${VAL_CAPTIONS_FILE}" \ - --output_dir="${OUTPUT_DIR}" \ - --word_counts_output_file="${OUTPUT_DIR}/word_counts.txt" \ diff --git a/research/im2txt/im2txt/evaluate.py b/research/im2txt/im2txt/evaluate.py deleted file mode 100644 index 0c81a59dab56626cb2c6a19433544f4d239cbd9d..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/evaluate.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Evaluate the model. - -This script should be run concurrently with training so that summaries show up -in TensorBoard. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import os.path -import time - - -import numpy as np -import tensorflow as tf - -from im2txt import configuration -from im2txt import show_and_tell_model - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("input_file_pattern", "", - "File pattern of sharded TFRecord input files.") -tf.flags.DEFINE_string("checkpoint_dir", "", - "Directory containing model checkpoints.") -tf.flags.DEFINE_string("eval_dir", "", "Directory to write event logs.") - -tf.flags.DEFINE_integer("eval_interval_secs", 600, - "Interval between evaluation runs.") -tf.flags.DEFINE_integer("num_eval_examples", 10132, - "Number of examples for evaluation.") - -tf.flags.DEFINE_integer("min_global_step", 5000, - "Minimum global step to run evaluation.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def evaluate_model(sess, model, global_step, summary_writer, summary_op): - """Computes perplexity-per-word over the evaluation dataset. - - Summaries and perplexity-per-word are written out to the eval directory. - - Args: - sess: Session object. - model: Instance of ShowAndTellModel; the model to evaluate. - global_step: Integer; global step of the model checkpoint. - summary_writer: Instance of FileWriter. - summary_op: Op for generating model summaries. - """ - # Log model summaries on a single batch. - summary_str = sess.run(summary_op) - summary_writer.add_summary(summary_str, global_step) - - # Compute perplexity over the entire dataset. - num_eval_batches = int( - math.ceil(FLAGS.num_eval_examples / model.config.batch_size)) - - start_time = time.time() - sum_losses = 0. - sum_weights = 0. - for i in range(num_eval_batches): - cross_entropy_losses, weights = sess.run([ - model.target_cross_entropy_losses, - model.target_cross_entropy_loss_weights - ]) - sum_losses += np.sum(cross_entropy_losses * weights) - sum_weights += np.sum(weights) - if not i % 100: - tf.logging.info("Computed losses for %d of %d batches.", i + 1, - num_eval_batches) - eval_time = time.time() - start_time - - perplexity = math.exp(sum_losses / sum_weights) - tf.logging.info("Perplexity = %f (%.2g sec)", perplexity, eval_time) - - # Log perplexity to the FileWriter. - summary = tf.Summary() - value = summary.value.add() - value.simple_value = perplexity - value.tag = "Perplexity" - summary_writer.add_summary(summary, global_step) - - # Write the Events file to the eval directory. - summary_writer.flush() - tf.logging.info("Finished processing evaluation at global step %d.", - global_step) - - -def run_once(model, saver, summary_writer, summary_op): - """Evaluates the latest model checkpoint. - - Args: - model: Instance of ShowAndTellModel; the model to evaluate. - saver: Instance of tf.train.Saver for restoring model Variables. - summary_writer: Instance of FileWriter. - summary_op: Op for generating model summaries. - """ - model_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) - if not model_path: - tf.logging.info("Skipping evaluation. No checkpoint found in: %s", - FLAGS.checkpoint_dir) - return - - with tf.Session() as sess: - # Load model from checkpoint. - tf.logging.info("Loading model from checkpoint: %s", model_path) - saver.restore(sess, model_path) - global_step = tf.train.global_step(sess, model.global_step.name) - tf.logging.info("Successfully loaded %s at global step = %d.", - os.path.basename(model_path), global_step) - if global_step < FLAGS.min_global_step: - tf.logging.info("Skipping evaluation. Global step = %d < %d", global_step, - FLAGS.min_global_step) - return - - # Start the queue runners. - coord = tf.train.Coordinator() - threads = tf.train.start_queue_runners(coord=coord) - - # Run evaluation on the latest checkpoint. - try: - evaluate_model( - sess=sess, - model=model, - global_step=global_step, - summary_writer=summary_writer, - summary_op=summary_op) - except Exception as e: # pylint: disable=broad-except - tf.logging.error("Evaluation failed.") - coord.request_stop(e) - - coord.request_stop() - coord.join(threads, stop_grace_period_secs=10) - - -def run(): - """Runs evaluation in a loop, and logs summaries to TensorBoard.""" - # Create the evaluation directory if it doesn't exist. - eval_dir = FLAGS.eval_dir - if not tf.gfile.IsDirectory(eval_dir): - tf.logging.info("Creating eval directory: %s", eval_dir) - tf.gfile.MakeDirs(eval_dir) - - g = tf.Graph() - with g.as_default(): - # Build the model for evaluation. - model_config = configuration.ModelConfig() - model_config.input_file_pattern = FLAGS.input_file_pattern - model = show_and_tell_model.ShowAndTellModel(model_config, mode="eval") - model.build() - - # Create the Saver to restore model Variables. - saver = tf.train.Saver() - - # Create the summary operation and the summary writer. - summary_op = tf.summary.merge_all() - summary_writer = tf.summary.FileWriter(eval_dir) - - g.finalize() - - # Run a new evaluation run every eval_interval_secs. - while True: - start = time.time() - tf.logging.info("Starting evaluation at " + time.strftime( - "%Y-%m-%d-%H:%M:%S", time.localtime())) - run_once(model, saver, summary_writer, summary_op) - time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() - if time_to_next_eval > 0: - time.sleep(time_to_next_eval) - - -def main(unused_argv): - assert FLAGS.input_file_pattern, "--input_file_pattern is required" - assert FLAGS.checkpoint_dir, "--checkpoint_dir is required" - assert FLAGS.eval_dir, "--eval_dir is required" - run() - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/im2txt/im2txt/inference_utils/BUILD b/research/im2txt/im2txt/inference_utils/BUILD deleted file mode 100644 index 82a15fd3ca487e542c41ab337404f8caa63b8c63..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/inference_utils/BUILD +++ /dev/null @@ -1,31 +0,0 @@ -package(default_visibility = ["//im2txt:internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_library( - name = "inference_wrapper_base", - srcs = ["inference_wrapper_base.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "vocabulary", - srcs = ["vocabulary.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "caption_generator", - srcs = ["caption_generator.py"], - srcs_version = "PY2AND3", -) - -py_test( - name = "caption_generator_test", - srcs = ["caption_generator_test.py"], - deps = [ - ":caption_generator", - ], -) diff --git a/research/im2txt/im2txt/inference_utils/caption_generator.py b/research/im2txt/im2txt/inference_utils/caption_generator.py deleted file mode 100644 index f158d3d2330e8f839efdad4cbc4d38811b58d826..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/inference_utils/caption_generator.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Class for generating captions from an image-to-text model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import heapq -import math - - -import numpy as np - - -class Caption(object): - """Represents a complete or partial caption.""" - - def __init__(self, sentence, state, logprob, score, metadata=None): - """Initializes the Caption. - - Args: - sentence: List of word ids in the caption. - state: Model state after generating the previous word. - logprob: Log-probability of the caption. - score: Score of the caption. - metadata: Optional metadata associated with the partial sentence. If not - None, a list of strings with the same length as 'sentence'. - """ - self.sentence = sentence - self.state = state - self.logprob = logprob - self.score = score - self.metadata = metadata - - def __cmp__(self, other): - """Compares Captions by score.""" - assert isinstance(other, Caption) - if self.score == other.score: - return 0 - elif self.score < other.score: - return -1 - else: - return 1 - - # For Python 3 compatibility (__cmp__ is deprecated). - def __lt__(self, other): - assert isinstance(other, Caption) - return self.score < other.score - - # Also for Python 3 compatibility. - def __eq__(self, other): - assert isinstance(other, Caption) - return self.score == other.score - - -class TopN(object): - """Maintains the top n elements of an incrementally provided set.""" - - def __init__(self, n): - self._n = n - self._data = [] - - def size(self): - assert self._data is not None - return len(self._data) - - def push(self, x): - """Pushes a new element.""" - assert self._data is not None - if len(self._data) < self._n: - heapq.heappush(self._data, x) - else: - heapq.heappushpop(self._data, x) - - def extract(self, sort=False): - """Extracts all elements from the TopN. This is a destructive operation. - - The only method that can be called immediately after extract() is reset(). - - Args: - sort: Whether to return the elements in descending sorted order. - - Returns: - A list of data; the top n elements provided to the set. - """ - assert self._data is not None - data = self._data - self._data = None - if sort: - data.sort(reverse=True) - return data - - def reset(self): - """Returns the TopN to an empty state.""" - self._data = [] - - -class CaptionGenerator(object): - """Class to generate captions from an image-to-text model.""" - - def __init__(self, - model, - vocab, - beam_size=3, - max_caption_length=20, - length_normalization_factor=0.0): - """Initializes the generator. - - Args: - model: Object encapsulating a trained image-to-text model. Must have - methods feed_image() and inference_step(). For example, an instance of - InferenceWrapperBase. - vocab: A Vocabulary object. - beam_size: Beam size to use when generating captions. - max_caption_length: The maximum caption length before stopping the search. - length_normalization_factor: If != 0, a number x such that captions are - scored by logprob/length^x, rather than logprob. This changes the - relative scores of captions depending on their lengths. For example, if - x > 0 then longer captions will be favored. - """ - self.vocab = vocab - self.model = model - - self.beam_size = beam_size - self.max_caption_length = max_caption_length - self.length_normalization_factor = length_normalization_factor - - def beam_search(self, sess, encoded_image): - """Runs beam search caption generation on a single image. - - Args: - sess: TensorFlow Session object. - encoded_image: An encoded image string. - - Returns: - A list of Caption sorted by descending score. - """ - # Feed in the image to get the initial state. - initial_state = self.model.feed_image(sess, encoded_image) - - initial_beam = Caption( - sentence=[self.vocab.start_id], - state=initial_state[0], - logprob=0.0, - score=0.0, - metadata=[""]) - partial_captions = TopN(self.beam_size) - partial_captions.push(initial_beam) - complete_captions = TopN(self.beam_size) - - # Run beam search. - for _ in range(self.max_caption_length - 1): - partial_captions_list = partial_captions.extract() - partial_captions.reset() - input_feed = np.array([c.sentence[-1] for c in partial_captions_list]) - state_feed = np.array([c.state for c in partial_captions_list]) - - softmax, new_states, metadata = self.model.inference_step(sess, - input_feed, - state_feed) - - for i, partial_caption in enumerate(partial_captions_list): - word_probabilities = softmax[i] - state = new_states[i] - # For this partial caption, get the beam_size most probable next words. - # Sort the indexes with numpy, select the last self.beam_size - # (3 by default) (ie, the most likely) and then reverse the sorted - # indexes with [::-1] to sort them from higher to lower. - most_likely_words = np.argsort(word_probabilities)[:-self.beam_size][::-1] - - for w in most_likely_words: - p = word_probabilities[w] - if p < 1e-12: - continue # Avoid log(0). - sentence = partial_caption.sentence + [w] - logprob = partial_caption.logprob + math.log(p) - score = logprob - if metadata: - metadata_list = partial_caption.metadata + [metadata[i]] - else: - metadata_list = None - if w == self.vocab.end_id: - if self.length_normalization_factor > 0: - score /= len(sentence)**self.length_normalization_factor - beam = Caption(sentence, state, logprob, score, metadata_list) - complete_captions.push(beam) - else: - beam = Caption(sentence, state, logprob, score, metadata_list) - partial_captions.push(beam) - if partial_captions.size() == 0: - # We have run out of partial candidates; happens when beam_size = 1. - break - - # If we have no complete captions then fall back to the partial captions. - # But never output a mixture of complete and partial captions because a - # partial caption could have a higher score than all the complete captions. - if not complete_captions.size(): - complete_captions = partial_captions - - return complete_captions.extract(sort=True) diff --git a/research/im2txt/im2txt/inference_utils/caption_generator_test.py b/research/im2txt/im2txt/inference_utils/caption_generator_test.py deleted file mode 100644 index bbd069313ac4ddb10a8463d166ab282b68b2e24d..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/inference_utils/caption_generator_test.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Unit tests for CaptionGenerator.""" - -import math - - - -import numpy as np -import tensorflow as tf - -from im2txt.inference_utils import caption_generator - - -class FakeVocab(object): - """Fake Vocabulary for testing purposes.""" - - def __init__(self): - self.start_id = 0 # Word id denoting sentence start. - self.end_id = 1 # Word id denoting sentence end. - - -class FakeModel(object): - """Fake model for testing purposes.""" - - def __init__(self): - # Number of words in the vocab. - self._vocab_size = 12 - - # Dimensionality of the nominal model state. - self._state_size = 1 - - # Map of previous word to the probability distribution of the next word. - self._probabilities = { - 0: {1: 0.1, - 2: 0.2, - 3: 0.3, - 4: 0.4}, - 2: {5: 0.1, - 6: 0.9}, - 3: {1: 0.1, - 7: 0.4, - 8: 0.5}, - 4: {1: 0.3, - 9: 0.3, - 10: 0.4}, - 5: {1: 1.0}, - 6: {1: 1.0}, - 7: {1: 1.0}, - 8: {1: 1.0}, - 9: {1: 0.5, - 11: 0.5}, - 10: {1: 1.0}, - 11: {1: 1.0}, - } - - # pylint: disable=unused-argument - - def feed_image(self, sess, encoded_image): - # Return a nominal model state. - return np.zeros([1, self._state_size]) - - def inference_step(self, sess, input_feed, state_feed): - # Compute the matrix of softmax distributions for the next batch of words. - batch_size = input_feed.shape[0] - softmax_output = np.zeros([batch_size, self._vocab_size]) - for batch_index, word_id in enumerate(input_feed): - for next_word, probability in self._probabilities[word_id].items(): - softmax_output[batch_index, next_word] = probability - - # Nominal state and metadata. - new_state = np.zeros([batch_size, self._state_size]) - metadata = None - - return softmax_output, new_state, metadata - - # pylint: enable=unused-argument - - -class CaptionGeneratorTest(tf.test.TestCase): - - def _assertExpectedCaptions(self, - expected_captions, - beam_size=3, - max_caption_length=20, - length_normalization_factor=0): - """Tests that beam search generates the expected captions. - - Args: - expected_captions: A sequence of pairs (sentence, probability), where - sentence is a list of integer ids and probability is a float in [0, 1]. - beam_size: Parameter passed to beam_search(). - max_caption_length: Parameter passed to beam_search(). - length_normalization_factor: Parameter passed to beam_search(). - """ - expected_sentences = [c[0] for c in expected_captions] - expected_probabilities = [c[1] for c in expected_captions] - - # Generate captions. - generator = caption_generator.CaptionGenerator( - model=FakeModel(), - vocab=FakeVocab(), - beam_size=beam_size, - max_caption_length=max_caption_length, - length_normalization_factor=length_normalization_factor) - actual_captions = generator.beam_search(sess=None, encoded_image=None) - - actual_sentences = [c.sentence for c in actual_captions] - actual_probabilities = [math.exp(c.logprob) for c in actual_captions] - - self.assertEqual(expected_sentences, actual_sentences) - self.assertAllClose(expected_probabilities, actual_probabilities) - - def testBeamSize(self): - # Beam size = 1. - expected = [([0, 4, 10, 1], 0.16)] - self._assertExpectedCaptions(expected, beam_size=1) - - # Beam size = 2. - expected = [([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15)] - self._assertExpectedCaptions(expected, beam_size=2) - - # Beam size = 3. - expected = [ - ([0, 2, 6, 1], 0.18), ([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15) - ] - self._assertExpectedCaptions(expected, beam_size=3) - - def testMaxLength(self): - # Max length = 1. - expected = [([0], 1.0)] - self._assertExpectedCaptions(expected, max_caption_length=1) - - # Max length = 2. - # There are no complete sentences, so partial sentences are returned. - expected = [([0, 4], 0.4), ([0, 3], 0.3), ([0, 2], 0.2)] - self._assertExpectedCaptions(expected, max_caption_length=2) - - # Max length = 3. - # There is at least one complete sentence, so only complete sentences are - # returned. - expected = [([0, 4, 1], 0.12), ([0, 3, 1], 0.03)] - self._assertExpectedCaptions(expected, max_caption_length=3) - - # Max length = 4. - expected = [ - ([0, 2, 6, 1], 0.18), ([0, 4, 10, 1], 0.16), ([0, 3, 8, 1], 0.15) - ] - self._assertExpectedCaptions(expected, max_caption_length=4) - - def testLengthNormalization(self): - # Length normalization factor = 3. - # The longest caption is returned first, despite having low probability, - # because it has the highest log(probability)/length**3. - expected = [ - ([0, 4, 9, 11, 1], 0.06), - ([0, 2, 6, 1], 0.18), - ([0, 4, 10, 1], 0.16), - ([0, 3, 8, 1], 0.15), - ] - self._assertExpectedCaptions( - expected, beam_size=4, length_normalization_factor=3) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/im2txt/im2txt/inference_utils/inference_wrapper_base.py b/research/im2txt/im2txt/inference_utils/inference_wrapper_base.py deleted file mode 100644 index e94cd6af474488e4b8175fc959e1dbe33cca18c9..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/inference_utils/inference_wrapper_base.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Base wrapper class for performing inference with an image-to-text model. - -Subclasses must implement the following methods: - - build_model(): - Builds the model for inference and returns the model object. - - feed_image(): - Takes an encoded image and returns the initial model state, where "state" - is a numpy array whose specifics are defined by the subclass, e.g. - concatenated LSTM state. It's assumed that feed_image() will be called - precisely once at the start of inference for each image. Subclasses may - compute and/or save per-image internal context in this method. - - inference_step(): - Takes a batch of inputs and states at a single time-step. Returns the - softmax output corresponding to the inputs, and the new states of the batch. - Optionally also returns metadata about the current inference step, e.g. a - serialized numpy array containing activations from a particular model layer. - -Client usage: - 1. Build the model inference graph via build_graph_from_config() or - build_graph_from_proto(). - 2. Call the resulting restore_fn to load the model checkpoint. - 3. For each image in a batch of images: - a) Call feed_image() once to get the initial state. - b) For each step of caption generation, call inference_step(). -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path - - -import tensorflow as tf - -# pylint: disable=unused-argument - - -class InferenceWrapperBase(object): - """Base wrapper class for performing inference with an image-to-text model.""" - - def __init__(self): - pass - - def build_model(self, model_config): - """Builds the model for inference. - - Args: - model_config: Object containing configuration for building the model. - - Returns: - model: The model object. - """ - tf.logging.fatal("Please implement build_model in subclass") - - def _create_restore_fn(self, checkpoint_path, saver): - """Creates a function that restores a model from checkpoint. - - Args: - checkpoint_path: Checkpoint file or a directory containing a checkpoint - file. - saver: Saver for restoring variables from the checkpoint file. - - Returns: - restore_fn: A function such that restore_fn(sess) loads model variables - from the checkpoint file. - - Raises: - ValueError: If checkpoint_path does not refer to a checkpoint file or a - directory containing a checkpoint file. - """ - if tf.gfile.IsDirectory(checkpoint_path): - checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) - if not checkpoint_path: - raise ValueError("No checkpoint file found in: %s" % checkpoint_path) - - def _restore_fn(sess): - tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) - saver.restore(sess, checkpoint_path) - tf.logging.info("Successfully loaded checkpoint: %s", - os.path.basename(checkpoint_path)) - - return _restore_fn - - def build_graph_from_config(self, model_config, checkpoint_path): - """Builds the inference graph from a configuration object. - - Args: - model_config: Object containing configuration for building the model. - checkpoint_path: Checkpoint file or a directory containing a checkpoint - file. - - Returns: - restore_fn: A function such that restore_fn(sess) loads model variables - from the checkpoint file. - """ - tf.logging.info("Building model.") - self.build_model(model_config) - saver = tf.train.Saver() - - return self._create_restore_fn(checkpoint_path, saver) - - def build_graph_from_proto(self, graph_def_file, saver_def_file, - checkpoint_path): - """Builds the inference graph from serialized GraphDef and SaverDef protos. - - Args: - graph_def_file: File containing a serialized GraphDef proto. - saver_def_file: File containing a serialized SaverDef proto. - checkpoint_path: Checkpoint file or a directory containing a checkpoint - file. - - Returns: - restore_fn: A function such that restore_fn(sess) loads model variables - from the checkpoint file. - """ - # Load the Graph. - tf.logging.info("Loading GraphDef from file: %s", graph_def_file) - graph_def = tf.GraphDef() - with tf.gfile.FastGFile(graph_def_file, "rb") as f: - graph_def.ParseFromString(f.read()) - tf.import_graph_def(graph_def, name="") - - # Load the Saver. - tf.logging.info("Loading SaverDef from file: %s", saver_def_file) - saver_def = tf.train.SaverDef() - with tf.gfile.FastGFile(saver_def_file, "rb") as f: - saver_def.ParseFromString(f.read()) - saver = tf.train.Saver(saver_def=saver_def) - - return self._create_restore_fn(checkpoint_path, saver) - - def feed_image(self, sess, encoded_image): - """Feeds an image and returns the initial model state. - - See comments at the top of file. - - Args: - sess: TensorFlow Session object. - encoded_image: An encoded image string. - - Returns: - state: A numpy array of shape [1, state_size]. - """ - tf.logging.fatal("Please implement feed_image in subclass") - - def inference_step(self, sess, input_feed, state_feed): - """Runs one step of inference. - - Args: - sess: TensorFlow Session object. - input_feed: A numpy array of shape [batch_size]. - state_feed: A numpy array of shape [batch_size, state_size]. - - Returns: - softmax_output: A numpy array of shape [batch_size, vocab_size]. - new_state: A numpy array of shape [batch_size, state_size]. - metadata: Optional. If not None, a string containing metadata about the - current inference step (e.g. serialized numpy array containing - activations from a particular model layer.). - """ - tf.logging.fatal("Please implement inference_step in subclass") - -# pylint: enable=unused-argument diff --git a/research/im2txt/im2txt/inference_utils/vocabulary.py b/research/im2txt/im2txt/inference_utils/vocabulary.py deleted file mode 100644 index ecf0ada9c2242cb32c2ea9a300d16411f5e83fab..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/inference_utils/vocabulary.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Vocabulary class for an image-to-text model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - - -class Vocabulary(object): - """Vocabulary class for an image-to-text model.""" - - def __init__(self, - vocab_file, - start_word="", - end_word="", - unk_word=""): - """Initializes the vocabulary. - - Args: - vocab_file: File containing the vocabulary, where the words are the first - whitespace-separated token on each line (other tokens are ignored) and - the word ids are the corresponding line numbers. - start_word: Special word denoting sentence start. - end_word: Special word denoting sentence end. - unk_word: Special word denoting unknown words. - """ - if not tf.gfile.Exists(vocab_file): - tf.logging.fatal("Vocab file %s not found.", vocab_file) - tf.logging.info("Initializing vocabulary from file: %s", vocab_file) - - with tf.gfile.GFile(vocab_file, mode="r") as f: - reverse_vocab = list(f.readlines()) - reverse_vocab = [line.split()[0] for line in reverse_vocab] - assert start_word in reverse_vocab - assert end_word in reverse_vocab - if unk_word not in reverse_vocab: - reverse_vocab.append(unk_word) - vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) - - tf.logging.info("Created vocabulary with %d words" % len(vocab)) - - self.vocab = vocab # vocab[word] = id - self.reverse_vocab = reverse_vocab # reverse_vocab[id] = word - - # Save special word ids. - self.start_id = vocab[start_word] - self.end_id = vocab[end_word] - self.unk_id = vocab[unk_word] - - def word_to_id(self, word): - """Returns the integer word id of a word string.""" - if word in self.vocab: - return self.vocab[word] - else: - return self.unk_id - - def id_to_word(self, word_id): - """Returns the word string of an integer word id.""" - if word_id >= len(self.reverse_vocab): - return self.reverse_vocab[self.unk_id] - else: - return self.reverse_vocab[word_id] diff --git a/research/im2txt/im2txt/inference_wrapper.py b/research/im2txt/im2txt/inference_wrapper.py deleted file mode 100644 index a047a9c8d084fd9e69c937915cea8553c2d51817..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/inference_wrapper.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model wrapper class for performing inference with a ShowAndTellModel.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - - -from im2txt import show_and_tell_model -from im2txt.inference_utils import inference_wrapper_base - - -class InferenceWrapper(inference_wrapper_base.InferenceWrapperBase): - """Model wrapper class for performing inference with a ShowAndTellModel.""" - - def __init__(self): - super(InferenceWrapper, self).__init__() - - def build_model(self, model_config): - model = show_and_tell_model.ShowAndTellModel(model_config, mode="inference") - model.build() - return model - - def feed_image(self, sess, encoded_image): - initial_state = sess.run(fetches="lstm/initial_state:0", - feed_dict={"image_feed:0": encoded_image}) - return initial_state - - def inference_step(self, sess, input_feed, state_feed): - softmax_output, state_output = sess.run( - fetches=["softmax:0", "lstm/state:0"], - feed_dict={ - "input_feed:0": input_feed, - "lstm/state_feed:0": state_feed, - }) - return softmax_output, state_output, None diff --git a/research/im2txt/im2txt/ops/BUILD b/research/im2txt/im2txt/ops/BUILD deleted file mode 100644 index 7d48bf3938c7ecfc94ac6498386e7ce214b8be92..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/ops/BUILD +++ /dev/null @@ -1,32 +0,0 @@ -package(default_visibility = ["//im2txt:internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_library( - name = "image_processing", - srcs = ["image_processing.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "image_embedding", - srcs = ["image_embedding.py"], - srcs_version = "PY2AND3", -) - -py_test( - name = "image_embedding_test", - size = "small", - srcs = ["image_embedding_test.py"], - deps = [ - ":image_embedding", - ], -) - -py_library( - name = "inputs", - srcs = ["inputs.py"], - srcs_version = "PY2AND3", -) diff --git a/research/im2txt/im2txt/ops/image_embedding.py b/research/im2txt/im2txt/ops/image_embedding.py deleted file mode 100644 index 58e3ddaa95fa799f245fe2a46f2e948be7d9ebf2..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/ops/image_embedding.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Image embedding ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base - -slim = tf.contrib.slim - - -def inception_v3(images, - trainable=True, - is_training=True, - weight_decay=0.00004, - stddev=0.1, - dropout_keep_prob=0.8, - use_batch_norm=True, - batch_norm_params=None, - add_summaries=True, - scope="InceptionV3"): - """Builds an Inception V3 subgraph for image embeddings. - - Args: - images: A float32 Tensor of shape [batch, height, width, channels]. - trainable: Whether the inception submodel should be trainable or not. - is_training: Boolean indicating training mode or not. - weight_decay: Coefficient for weight regularization. - stddev: The standard deviation of the trunctated normal weight initializer. - dropout_keep_prob: Dropout keep probability. - use_batch_norm: Whether to use batch normalization. - batch_norm_params: Parameters for batch normalization. See - tf.contrib.layers.batch_norm for details. - add_summaries: Whether to add activation summaries. - scope: Optional Variable scope. - - Returns: - end_points: A dictionary of activations from inception_v3 layers. - """ - # Only consider the inception model to be in training mode if it's trainable. - is_inception_model_training = trainable and is_training - - if use_batch_norm: - # Default parameters for batch normalization. - if not batch_norm_params: - batch_norm_params = { - "is_training": is_inception_model_training, - "trainable": trainable, - # Decay for the moving averages. - "decay": 0.9997, - # Epsilon to prevent 0s in variance. - "epsilon": 0.001, - # Collection containing the moving mean and moving variance. - "variables_collections": { - "beta": None, - "gamma": None, - "moving_mean": ["moving_vars"], - "moving_variance": ["moving_vars"], - } - } - else: - batch_norm_params = None - - if trainable: - weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - else: - weights_regularizer = None - - with tf.variable_scope(scope, "InceptionV3", [images]) as scope: - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=weights_regularizer, - trainable=trainable): - with slim.arg_scope( - [slim.conv2d], - weights_initializer=tf.truncated_normal_initializer(stddev=stddev), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params): - net, end_points = inception_v3_base(images, scope=scope) - with tf.variable_scope("logits"): - shape = net.get_shape() - net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") - net = slim.dropout( - net, - keep_prob=dropout_keep_prob, - is_training=is_inception_model_training, - scope="dropout") - net = slim.flatten(net, scope="flatten") - - # Add summaries. - if add_summaries: - for v in end_points.values(): - tf.contrib.layers.summaries.summarize_activation(v) - - return net diff --git a/research/im2txt/im2txt/ops/image_embedding_test.py b/research/im2txt/im2txt/ops/image_embedding_test.py deleted file mode 100644 index 66324d68eee0ec9c450375c25229d80283fc909f..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/ops/image_embedding_test.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for tensorflow_models.im2txt.ops.image_embedding.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from im2txt.ops import image_embedding - - -class InceptionV3Test(tf.test.TestCase): - - def setUp(self): - super(InceptionV3Test, self).setUp() - - batch_size = 4 - height = 299 - width = 299 - num_channels = 3 - self._images = tf.placeholder(tf.float32, - [batch_size, height, width, num_channels]) - self._batch_size = batch_size - - def _countInceptionParameters(self): - """Counts the number of parameters in the inception model at top scope.""" - counter = {} - for v in tf.global_variables(): - name_tokens = v.op.name.split("/") - if name_tokens[0] == "InceptionV3": - name = "InceptionV3/" + name_tokens[1] - num_params = v.get_shape().num_elements() - assert num_params - counter[name] = counter.get(name, 0) + num_params - return counter - - def _verifyParameterCounts(self): - """Verifies the number of parameters in the inception model.""" - param_counts = self._countInceptionParameters() - expected_param_counts = { - "InceptionV3/Conv2d_1a_3x3": 960, - "InceptionV3/Conv2d_2a_3x3": 9312, - "InceptionV3/Conv2d_2b_3x3": 18624, - "InceptionV3/Conv2d_3b_1x1": 5360, - "InceptionV3/Conv2d_4a_3x3": 138816, - "InceptionV3/Mixed_5b": 256368, - "InceptionV3/Mixed_5c": 277968, - "InceptionV3/Mixed_5d": 285648, - "InceptionV3/Mixed_6a": 1153920, - "InceptionV3/Mixed_6b": 1298944, - "InceptionV3/Mixed_6c": 1692736, - "InceptionV3/Mixed_6d": 1692736, - "InceptionV3/Mixed_6e": 2143872, - "InceptionV3/Mixed_7a": 1699584, - "InceptionV3/Mixed_7b": 5047872, - "InceptionV3/Mixed_7c": 6080064, - } - self.assertDictEqual(expected_param_counts, param_counts) - - def _assertCollectionSize(self, expected_size, collection): - actual_size = len(tf.get_collection(collection)) - if expected_size != actual_size: - self.fail("Found %d items in collection %s (expected %d)." % - (actual_size, collection, expected_size)) - - def testTrainableTrueIsTrainingTrue(self): - embeddings = image_embedding.inception_v3( - self._images, trainable=True, is_training=True) - self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) - - self._verifyParameterCounts() - self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES) - self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES) - self._assertCollectionSize(188, tf.GraphKeys.UPDATE_OPS) - self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES) - self._assertCollectionSize(0, tf.GraphKeys.LOSSES) - self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) - - def testTrainableTrueIsTrainingFalse(self): - embeddings = image_embedding.inception_v3( - self._images, trainable=True, is_training=False) - self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) - - self._verifyParameterCounts() - self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES) - self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES) - self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS) - self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES) - self._assertCollectionSize(0, tf.GraphKeys.LOSSES) - self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) - - def testTrainableFalseIsTrainingTrue(self): - embeddings = image_embedding.inception_v3( - self._images, trainable=False, is_training=True) - self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) - - self._verifyParameterCounts() - self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES) - self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES) - self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS) - self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES) - self._assertCollectionSize(0, tf.GraphKeys.LOSSES) - self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) - - def testTrainableFalseIsTrainingFalse(self): - embeddings = image_embedding.inception_v3( - self._images, trainable=False, is_training=False) - self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list()) - - self._verifyParameterCounts() - self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES) - self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES) - self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS) - self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES) - self._assertCollectionSize(0, tf.GraphKeys.LOSSES) - self._assertCollectionSize(23, tf.GraphKeys.SUMMARIES) - - -if __name__ == "__main__": - tf.test.main() diff --git a/research/im2txt/im2txt/ops/image_processing.py b/research/im2txt/im2txt/ops/image_processing.py deleted file mode 100644 index 6a7545547d5507febaabebf642ee81b6f94319f6..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/ops/image_processing.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Helper functions for image preprocessing.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - - -def distort_image(image, thread_id): - """Perform random distortions on an image. - - Args: - image: A float32 Tensor of shape [height, width, 3] with values in [0, 1). - thread_id: Preprocessing thread id used to select the ordering of color - distortions. There should be a multiple of 2 preprocessing threads. - - Returns: - distorted_image: A float32 Tensor of shape [height, width, 3] with values in - [0, 1]. - """ - # Randomly flip horizontally. - with tf.name_scope("flip_horizontal", values=[image]): - image = tf.image.random_flip_left_right(image) - - # Randomly distort the colors based on thread id. - color_ordering = thread_id % 2 - with tf.name_scope("distort_color", values=[image]): - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.032) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.032) - - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - - return image - - -def process_image(encoded_image, - is_training, - height, - width, - resize_height=346, - resize_width=346, - thread_id=0, - image_format="jpeg"): - """Decode an image, resize and apply random distortions. - - In training, images are distorted slightly differently depending on thread_id. - - Args: - encoded_image: String Tensor containing the image. - is_training: Boolean; whether preprocessing for training or eval. - height: Height of the output image. - width: Width of the output image. - resize_height: If > 0, resize height before crop to final dimensions. - resize_width: If > 0, resize width before crop to final dimensions. - thread_id: Preprocessing thread id used to select the ordering of color - distortions. There should be a multiple of 2 preprocessing threads. - image_format: "jpeg" or "png". - - Returns: - A float32 Tensor of shape [height, width, 3] with values in [-1, 1]. - - Raises: - ValueError: If image_format is invalid. - """ - # Helper function to log an image summary to the visualizer. Summaries are - # only logged in thread 0. - def image_summary(name, image): - if not thread_id: - tf.summary.image(name, tf.expand_dims(image, 0)) - - # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1). - with tf.name_scope("decode", values=[encoded_image]): - if image_format == "jpeg": - image = tf.image.decode_jpeg(encoded_image, channels=3) - elif image_format == "png": - image = tf.image.decode_png(encoded_image, channels=3) - else: - raise ValueError("Invalid image format: %s" % image_format) - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - image_summary("original_image", image) - - # Resize image. - assert (resize_height > 0) == (resize_width > 0) - if resize_height: - image = tf.image.resize_images(image, - size=[resize_height, resize_width], - method=tf.image.ResizeMethod.BILINEAR) - - # Crop to final dimensions. - if is_training: - image = tf.random_crop(image, [height, width, 3]) - else: - # Central crop, assuming resize_height > height, resize_width > width. - image = tf.image.resize_image_with_crop_or_pad(image, height, width) - - image_summary("resized_image", image) - - # Randomly distort the image. - if is_training: - image = distort_image(image, thread_id) - - image_summary("final_image", image) - - # Rescale to [-1,1] instead of [0, 1] - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image diff --git a/research/im2txt/im2txt/ops/inputs.py b/research/im2txt/im2txt/ops/inputs.py deleted file mode 100644 index 5dc90c0ce5dfd5c30fe0e0e543999bb15cc13a8c..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/ops/inputs.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Input ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - - -def parse_sequence_example(serialized, image_feature, caption_feature): - """Parses a tensorflow.SequenceExample into an image and caption. - - Args: - serialized: A scalar string Tensor; a single serialized SequenceExample. - image_feature: Name of SequenceExample context feature containing image - data. - caption_feature: Name of SequenceExample feature list containing integer - captions. - - Returns: - encoded_image: A scalar string Tensor containing a JPEG encoded image. - caption: A 1-D uint64 Tensor with dynamically specified length. - """ - context, sequence = tf.parse_single_sequence_example( - serialized, - context_features={ - image_feature: tf.FixedLenFeature([], dtype=tf.string) - }, - sequence_features={ - caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64), - }) - - encoded_image = context[image_feature] - caption = sequence[caption_feature] - return encoded_image, caption - - -def prefetch_input_data(reader, - file_pattern, - is_training, - batch_size, - values_per_shard, - input_queue_capacity_factor=16, - num_reader_threads=1, - shard_queue_name="filename_queue", - value_queue_name="input_queue"): - """Prefetches string values from disk into an input queue. - - In training the capacity of the queue is important because a larger queue - means better mixing of training examples between shards. The minimum number of - values kept in the queue is values_per_shard * input_queue_capacity_factor, - where input_queue_memory factor should be chosen to trade-off better mixing - with memory usage. - - Args: - reader: Instance of tf.ReaderBase. - file_pattern: Comma-separated list of file patterns (e.g. - /tmp/train_data-?????-of-00100). - is_training: Boolean; whether prefetching for training or eval. - batch_size: Model batch size used to determine queue capacity. - values_per_shard: Approximate number of values per shard. - input_queue_capacity_factor: Minimum number of values to keep in the queue - in multiples of values_per_shard. See comments above. - num_reader_threads: Number of reader threads to fill the queue. - shard_queue_name: Name for the shards filename queue. - value_queue_name: Name for the values input queue. - - Returns: - A Queue containing prefetched string values. - """ - data_files = [] - for pattern in file_pattern.split(","): - data_files.extend(tf.gfile.Glob(pattern)) - if not data_files: - tf.logging.fatal("Found no input files matching %s", file_pattern) - else: - tf.logging.info("Prefetching values from %d files matching %s", - len(data_files), file_pattern) - - if is_training: - filename_queue = tf.train.string_input_producer( - data_files, shuffle=True, capacity=16, name=shard_queue_name) - min_queue_examples = values_per_shard * input_queue_capacity_factor - capacity = min_queue_examples + 100 * batch_size - values_queue = tf.RandomShuffleQueue( - capacity=capacity, - min_after_dequeue=min_queue_examples, - dtypes=[tf.string], - name="random_" + value_queue_name) - else: - filename_queue = tf.train.string_input_producer( - data_files, shuffle=False, capacity=1, name=shard_queue_name) - capacity = values_per_shard + 3 * batch_size - values_queue = tf.FIFOQueue( - capacity=capacity, dtypes=[tf.string], name="fifo_" + value_queue_name) - - enqueue_ops = [] - for _ in range(num_reader_threads): - _, value = reader.read(filename_queue) - enqueue_ops.append(values_queue.enqueue([value])) - tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner( - values_queue, enqueue_ops)) - tf.summary.scalar( - "queue/%s/fraction_of_%d_full" % (values_queue.name, capacity), - tf.cast(values_queue.size(), tf.float32) * (1. / capacity)) - - return values_queue - - -def batch_with_dynamic_pad(images_and_captions, - batch_size, - queue_capacity, - add_summaries=True): - """Batches input images and captions. - - This function splits the caption into an input sequence and a target sequence, - where the target sequence is the input sequence right-shifted by 1. Input and - target sequences are batched and padded up to the maximum length of sequences - in the batch. A mask is created to distinguish real words from padding words. - - Example: - Actual captions in the batch ('-' denotes padded character): - [ - [ 1 2 3 4 5 ], - [ 1 2 3 4 - ], - [ 1 2 3 - - ], - ] - - input_seqs: - [ - [ 1 2 3 4 ], - [ 1 2 3 - ], - [ 1 2 - - ], - ] - - target_seqs: - [ - [ 2 3 4 5 ], - [ 2 3 4 - ], - [ 2 3 - - ], - ] - - mask: - [ - [ 1 1 1 1 ], - [ 1 1 1 0 ], - [ 1 1 0 0 ], - ] - - Args: - images_and_captions: A list of pairs [image, caption], where image is a - Tensor of shape [height, width, channels] and caption is a 1-D Tensor of - any length. Each pair will be processed and added to the queue in a - separate thread. - batch_size: Batch size. - queue_capacity: Queue capacity. - add_summaries: If true, add caption length summaries. - - Returns: - images: A Tensor of shape [batch_size, height, width, channels]. - input_seqs: An int32 Tensor of shape [batch_size, padded_length]. - target_seqs: An int32 Tensor of shape [batch_size, padded_length]. - mask: An int32 0/1 Tensor of shape [batch_size, padded_length]. - """ - enqueue_list = [] - for image, caption in images_and_captions: - caption_length = tf.shape(caption)[0] - input_length = tf.expand_dims(tf.subtract(caption_length, 1), 0) - - input_seq = tf.slice(caption, [0], input_length) - target_seq = tf.slice(caption, [1], input_length) - indicator = tf.ones(input_length, dtype=tf.int32) - enqueue_list.append([image, input_seq, target_seq, indicator]) - - images, input_seqs, target_seqs, mask = tf.train.batch_join( - enqueue_list, - batch_size=batch_size, - capacity=queue_capacity, - dynamic_pad=True, - name="batch_and_pad") - - if add_summaries: - lengths = tf.add(tf.reduce_sum(mask, 1), 1) - tf.summary.scalar("caption_length/batch_min", tf.reduce_min(lengths)) - tf.summary.scalar("caption_length/batch_max", tf.reduce_max(lengths)) - tf.summary.scalar("caption_length/batch_mean", tf.reduce_mean(lengths)) - - return images, input_seqs, target_seqs, mask diff --git a/research/im2txt/im2txt/run_inference.py b/research/im2txt/im2txt/run_inference.py deleted file mode 100644 index 9848522df162e52394ee8349dab1f5220aeb88f6..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/run_inference.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Generate captions for images using default beam search parameters.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import os - - -import tensorflow as tf - -from im2txt import configuration -from im2txt import inference_wrapper -from im2txt.inference_utils import caption_generator -from im2txt.inference_utils import vocabulary - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("checkpoint_path", "", - "Model checkpoint file or directory containing a " - "model checkpoint file.") -tf.flags.DEFINE_string("vocab_file", "", "Text file containing the vocabulary.") -tf.flags.DEFINE_string("input_files", "", - "File pattern or comma-separated list of file patterns " - "of image files.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def main(_): - # Build the inference graph. - g = tf.Graph() - with g.as_default(): - model = inference_wrapper.InferenceWrapper() - restore_fn = model.build_graph_from_config(configuration.ModelConfig(), - FLAGS.checkpoint_path) - g.finalize() - - # Create the vocabulary. - vocab = vocabulary.Vocabulary(FLAGS.vocab_file) - - filenames = [] - for file_pattern in FLAGS.input_files.split(","): - filenames.extend(tf.gfile.Glob(file_pattern)) - tf.logging.info("Running caption generation on %d files matching %s", - len(filenames), FLAGS.input_files) - - with tf.Session(graph=g) as sess: - # Load the model from checkpoint. - restore_fn(sess) - - # Prepare the caption generator. Here we are implicitly using the default - # beam search parameters. See caption_generator.py for a description of the - # available beam search parameters. - generator = caption_generator.CaptionGenerator(model, vocab) - - for filename in filenames: - with tf.gfile.GFile(filename, "rb") as f: - image = f.read() - captions = generator.beam_search(sess, image) - print("Captions for image %s:" % os.path.basename(filename)) - for i, caption in enumerate(captions): - # Ignore begin and end words. - sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]] - sentence = " ".join(sentence) - print(" %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob))) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/im2txt/im2txt/show_and_tell_model.py b/research/im2txt/im2txt/show_and_tell_model.py deleted file mode 100644 index 0ac29e7fdb80fbefe3594eabc972648a3fb32312..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/show_and_tell_model.py +++ /dev/null @@ -1,358 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Image-to-text implementation based on http://arxiv.org/abs/1411.4555. - -"Show and Tell: A Neural Image Caption Generator" -Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from im2txt.ops import image_embedding -from im2txt.ops import image_processing -from im2txt.ops import inputs as input_ops - - -class ShowAndTellModel(object): - """Image-to-text implementation based on http://arxiv.org/abs/1411.4555. - - "Show and Tell: A Neural Image Caption Generator" - Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan - """ - - def __init__(self, config, mode, train_inception=False): - """Basic setup. - - Args: - config: Object containing configuration parameters. - mode: "train", "eval" or "inference". - train_inception: Whether the inception submodel variables are trainable. - """ - assert mode in ["train", "eval", "inference"] - self.config = config - self.mode = mode - self.train_inception = train_inception - - # Reader for the input data. - self.reader = tf.TFRecordReader() - - # To match the "Show and Tell" paper we initialize all variables with a - # random uniform initializer. - self.initializer = tf.random_uniform_initializer( - minval=-self.config.initializer_scale, - maxval=self.config.initializer_scale) - - # A float32 Tensor with shape [batch_size, height, width, channels]. - self.images = None - - # An int32 Tensor with shape [batch_size, padded_length]. - self.input_seqs = None - - # An int32 Tensor with shape [batch_size, padded_length]. - self.target_seqs = None - - # An int32 0/1 Tensor with shape [batch_size, padded_length]. - self.input_mask = None - - # A float32 Tensor with shape [batch_size, embedding_size]. - self.image_embeddings = None - - # A float32 Tensor with shape [batch_size, padded_length, embedding_size]. - self.seq_embeddings = None - - # A float32 scalar Tensor; the total loss for the trainer to optimize. - self.total_loss = None - - # A float32 Tensor with shape [batch_size * padded_length]. - self.target_cross_entropy_losses = None - - # A float32 Tensor with shape [batch_size * padded_length]. - self.target_cross_entropy_loss_weights = None - - # Collection of variables from the inception submodel. - self.inception_variables = [] - - # Function to restore the inception submodel from checkpoint. - self.init_fn = None - - # Global step Tensor. - self.global_step = None - - def is_training(self): - """Returns true if the model is built for training mode.""" - return self.mode == "train" - - def process_image(self, encoded_image, thread_id=0): - """Decodes and processes an image string. - - Args: - encoded_image: A scalar string Tensor; the encoded image. - thread_id: Preprocessing thread id used to select the ordering of color - distortions. - - Returns: - A float32 Tensor of shape [height, width, 3]; the processed image. - """ - return image_processing.process_image(encoded_image, - is_training=self.is_training(), - height=self.config.image_height, - width=self.config.image_width, - thread_id=thread_id, - image_format=self.config.image_format) - - def build_inputs(self): - """Input prefetching, preprocessing and batching. - - Outputs: - self.images - self.input_seqs - self.target_seqs (training and eval only) - self.input_mask (training and eval only) - """ - if self.mode == "inference": - # In inference mode, images and inputs are fed via placeholders. - image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed") - input_feed = tf.placeholder(dtype=tf.int64, - shape=[None], # batch_size - name="input_feed") - - # Process image and insert batch dimensions. - images = tf.expand_dims(self.process_image(image_feed), 0) - input_seqs = tf.expand_dims(input_feed, 1) - - # No target sequences or input mask in inference mode. - target_seqs = None - input_mask = None - else: - # Prefetch serialized SequenceExample protos. - input_queue = input_ops.prefetch_input_data( - self.reader, - self.config.input_file_pattern, - is_training=self.is_training(), - batch_size=self.config.batch_size, - values_per_shard=self.config.values_per_input_shard, - input_queue_capacity_factor=self.config.input_queue_capacity_factor, - num_reader_threads=self.config.num_input_reader_threads) - - # Image processing and random distortion. Split across multiple threads - # with each thread applying a slightly different distortion. - assert self.config.num_preprocess_threads % 2 == 0 - images_and_captions = [] - for thread_id in range(self.config.num_preprocess_threads): - serialized_sequence_example = input_queue.dequeue() - encoded_image, caption = input_ops.parse_sequence_example( - serialized_sequence_example, - image_feature=self.config.image_feature_name, - caption_feature=self.config.caption_feature_name) - image = self.process_image(encoded_image, thread_id=thread_id) - images_and_captions.append([image, caption]) - - # Batch inputs. - queue_capacity = (2 * self.config.num_preprocess_threads * - self.config.batch_size) - images, input_seqs, target_seqs, input_mask = ( - input_ops.batch_with_dynamic_pad(images_and_captions, - batch_size=self.config.batch_size, - queue_capacity=queue_capacity)) - - self.images = images - self.input_seqs = input_seqs - self.target_seqs = target_seqs - self.input_mask = input_mask - - def build_image_embeddings(self): - """Builds the image model subgraph and generates image embeddings. - - Inputs: - self.images - - Outputs: - self.image_embeddings - """ - inception_output = image_embedding.inception_v3( - self.images, - trainable=self.train_inception, - is_training=self.is_training()) - self.inception_variables = tf.get_collection( - tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3") - - # Map inception output into embedding space. - with tf.variable_scope("image_embedding") as scope: - image_embeddings = tf.contrib.layers.fully_connected( - inputs=inception_output, - num_outputs=self.config.embedding_size, - activation_fn=None, - weights_initializer=self.initializer, - biases_initializer=None, - scope=scope) - - # Save the embedding size in the graph. - tf.constant(self.config.embedding_size, name="embedding_size") - - self.image_embeddings = image_embeddings - - def build_seq_embeddings(self): - """Builds the input sequence embeddings. - - Inputs: - self.input_seqs - - Outputs: - self.seq_embeddings - """ - with tf.variable_scope("seq_embedding"), tf.device("/cpu:0"): - embedding_map = tf.get_variable( - name="map", - shape=[self.config.vocab_size, self.config.embedding_size], - initializer=self.initializer) - seq_embeddings = tf.nn.embedding_lookup(embedding_map, self.input_seqs) - - self.seq_embeddings = seq_embeddings - - def build_model(self): - """Builds the model. - - Inputs: - self.image_embeddings - self.seq_embeddings - self.target_seqs (training and eval only) - self.input_mask (training and eval only) - - Outputs: - self.total_loss (training and eval only) - self.target_cross_entropy_losses (training and eval only) - self.target_cross_entropy_loss_weights (training and eval only) - """ - # This LSTM cell has biases and outputs tanh(new_c) * sigmoid(o), but the - # modified LSTM in the "Show and Tell" paper has no biases and outputs - # new_c * sigmoid(o). - lstm_cell = tf.contrib.rnn.BasicLSTMCell( - num_units=self.config.num_lstm_units, state_is_tuple=True) - if self.mode == "train": - lstm_cell = tf.contrib.rnn.DropoutWrapper( - lstm_cell, - input_keep_prob=self.config.lstm_dropout_keep_prob, - output_keep_prob=self.config.lstm_dropout_keep_prob) - - with tf.variable_scope("lstm", initializer=self.initializer) as lstm_scope: - # Feed the image embeddings to set the initial LSTM state. - zero_state = lstm_cell.zero_state( - batch_size=self.image_embeddings.get_shape()[0], dtype=tf.float32) - _, initial_state = lstm_cell(self.image_embeddings, zero_state) - - # Allow the LSTM variables to be reused. - lstm_scope.reuse_variables() - - if self.mode == "inference": - # In inference mode, use concatenated states for convenient feeding and - # fetching. - tf.concat(axis=1, values=initial_state, name="initial_state") - - # Placeholder for feeding a batch of concatenated states. - state_feed = tf.placeholder(dtype=tf.float32, - shape=[None, sum(lstm_cell.state_size)], - name="state_feed") - state_tuple = tf.split(value=state_feed, num_or_size_splits=2, axis=1) - - # Run a single LSTM step. - lstm_outputs, state_tuple = lstm_cell( - inputs=tf.squeeze(self.seq_embeddings, axis=[1]), - state=state_tuple) - - # Concatentate the resulting state. - tf.concat(axis=1, values=state_tuple, name="state") - else: - # Run the batch of sequence embeddings through the LSTM. - sequence_length = tf.reduce_sum(self.input_mask, 1) - lstm_outputs, _ = tf.nn.dynamic_rnn(cell=lstm_cell, - inputs=self.seq_embeddings, - sequence_length=sequence_length, - initial_state=initial_state, - dtype=tf.float32, - scope=lstm_scope) - - # Stack batches vertically. - lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size]) - - with tf.variable_scope("logits") as logits_scope: - logits = tf.contrib.layers.fully_connected( - inputs=lstm_outputs, - num_outputs=self.config.vocab_size, - activation_fn=None, - weights_initializer=self.initializer, - scope=logits_scope) - - if self.mode == "inference": - tf.nn.softmax(logits, name="softmax") - else: - targets = tf.reshape(self.target_seqs, [-1]) - weights = tf.to_float(tf.reshape(self.input_mask, [-1])) - - # Compute losses. - losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, - logits=logits) - batch_loss = tf.div(tf.reduce_sum(tf.multiply(losses, weights)), - tf.reduce_sum(weights), - name="batch_loss") - tf.losses.add_loss(batch_loss) - total_loss = tf.losses.get_total_loss() - - # Add summaries. - tf.summary.scalar("losses/batch_loss", batch_loss) - tf.summary.scalar("losses/total_loss", total_loss) - for var in tf.trainable_variables(): - tf.summary.histogram("parameters/" + var.op.name, var) - - self.total_loss = total_loss - self.target_cross_entropy_losses = losses # Used in evaluation. - self.target_cross_entropy_loss_weights = weights # Used in evaluation. - - def setup_inception_initializer(self): - """Sets up the function to restore inception variables from checkpoint.""" - if self.mode != "inference": - # Restore inception variables only. - saver = tf.train.Saver(self.inception_variables) - - def restore_fn(sess): - tf.logging.info("Restoring Inception variables from checkpoint file %s", - self.config.inception_checkpoint_file) - saver.restore(sess, self.config.inception_checkpoint_file) - - self.init_fn = restore_fn - - def setup_global_step(self): - """Sets up the global step Tensor.""" - global_step = tf.Variable( - initial_value=0, - name="global_step", - trainable=False, - collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES]) - - self.global_step = global_step - - def build(self): - """Creates all ops for training and evaluation.""" - self.build_inputs() - self.build_image_embeddings() - self.build_seq_embeddings() - self.build_model() - self.setup_inception_initializer() - self.setup_global_step() diff --git a/research/im2txt/im2txt/show_and_tell_model_test.py b/research/im2txt/im2txt/show_and_tell_model_test.py deleted file mode 100644 index 0bdfb6e1a3ae3c15bd1c8daf005fe2542436ca8e..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/show_and_tell_model_test.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for tensorflow_models.im2txt.show_and_tell_model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import numpy as np -import tensorflow as tf - -from im2txt import configuration -from im2txt import show_and_tell_model - - -class ShowAndTellModel(show_and_tell_model.ShowAndTellModel): - """Subclass of ShowAndTellModel without the disk I/O.""" - - def build_inputs(self): - if self.mode == "inference": - # Inference mode doesn't read from disk, so defer to parent. - return super(ShowAndTellModel, self).build_inputs() - else: - # Replace disk I/O with random Tensors. - self.images = tf.random_uniform( - shape=[self.config.batch_size, self.config.image_height, - self.config.image_width, 3], - minval=-1, - maxval=1) - self.input_seqs = tf.random_uniform( - [self.config.batch_size, 15], - minval=0, - maxval=self.config.vocab_size, - dtype=tf.int64) - self.target_seqs = tf.random_uniform( - [self.config.batch_size, 15], - minval=0, - maxval=self.config.vocab_size, - dtype=tf.int64) - self.input_mask = tf.ones_like(self.input_seqs) - - -class ShowAndTellModelTest(tf.test.TestCase): - - def setUp(self): - super(ShowAndTellModelTest, self).setUp() - self._model_config = configuration.ModelConfig() - - def _countModelParameters(self): - """Counts the number of parameters in the model at top level scope.""" - counter = {} - for v in tf.global_variables(): - name = v.op.name.split("/")[0] - num_params = v.get_shape().num_elements() - assert num_params - counter[name] = counter.get(name, 0) + num_params - return counter - - def _checkModelParameters(self): - """Verifies the number of parameters in the model.""" - param_counts = self._countModelParameters() - expected_param_counts = { - "InceptionV3": 21802784, - # inception_output_size * embedding_size - "image_embedding": 1048576, - # vocab_size * embedding_size - "seq_embedding": 6144000, - # (embedding_size + num_lstm_units + 1) * 4 * num_lstm_units - "lstm": 2099200, - # (num_lstm_units + 1) * vocab_size - "logits": 6156000, - "global_step": 1, - } - self.assertDictEqual(expected_param_counts, param_counts) - - def _checkOutputs(self, expected_shapes, feed_dict=None): - """Verifies that the model produces expected outputs. - - Args: - expected_shapes: A dict mapping Tensor or Tensor name to expected output - shape. - feed_dict: Values of Tensors to feed into Session.run(). - """ - fetches = expected_shapes.keys() - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - outputs = sess.run(fetches, feed_dict) - - for index, output in enumerate(outputs): - tensor = fetches[index] - expected = expected_shapes[tensor] - actual = output.shape - if expected != actual: - self.fail("Tensor %s has shape %s (expected %s)." % - (tensor, actual, expected)) - - def testBuildForTraining(self): - model = ShowAndTellModel(self._model_config, mode="train") - model.build() - - self._checkModelParameters() - - expected_shapes = { - # [batch_size, image_height, image_width, 3] - model.images: (32, 299, 299, 3), - # [batch_size, sequence_length] - model.input_seqs: (32, 15), - # [batch_size, sequence_length] - model.target_seqs: (32, 15), - # [batch_size, sequence_length] - model.input_mask: (32, 15), - # [batch_size, embedding_size] - model.image_embeddings: (32, 512), - # [batch_size, sequence_length, embedding_size] - model.seq_embeddings: (32, 15, 512), - # Scalar - model.total_loss: (), - # [batch_size * sequence_length] - model.target_cross_entropy_losses: (480,), - # [batch_size * sequence_length] - model.target_cross_entropy_loss_weights: (480,), - } - self._checkOutputs(expected_shapes) - - def testBuildForEval(self): - model = ShowAndTellModel(self._model_config, mode="eval") - model.build() - - self._checkModelParameters() - - expected_shapes = { - # [batch_size, image_height, image_width, 3] - model.images: (32, 299, 299, 3), - # [batch_size, sequence_length] - model.input_seqs: (32, 15), - # [batch_size, sequence_length] - model.target_seqs: (32, 15), - # [batch_size, sequence_length] - model.input_mask: (32, 15), - # [batch_size, embedding_size] - model.image_embeddings: (32, 512), - # [batch_size, sequence_length, embedding_size] - model.seq_embeddings: (32, 15, 512), - # Scalar - model.total_loss: (), - # [batch_size * sequence_length] - model.target_cross_entropy_losses: (480,), - # [batch_size * sequence_length] - model.target_cross_entropy_loss_weights: (480,), - } - self._checkOutputs(expected_shapes) - - def testBuildForInference(self): - model = ShowAndTellModel(self._model_config, mode="inference") - model.build() - - self._checkModelParameters() - - # Test feeding an image to get the initial LSTM state. - images_feed = np.random.rand(1, 299, 299, 3) - feed_dict = {model.images: images_feed} - expected_shapes = { - # [batch_size, embedding_size] - model.image_embeddings: (1, 512), - # [batch_size, 2 * num_lstm_units] - "lstm/initial_state:0": (1, 1024), - } - self._checkOutputs(expected_shapes, feed_dict) - - # Test feeding a batch of inputs and LSTM states to get softmax output and - # LSTM states. - input_feed = np.random.randint(0, 10, size=3) - state_feed = np.random.rand(3, 1024) - feed_dict = {"input_feed:0": input_feed, "lstm/state_feed:0": state_feed} - expected_shapes = { - # [batch_size, 2 * num_lstm_units] - "lstm/state:0": (3, 1024), - # [batch_size, vocab_size] - "softmax:0": (3, 12000), - } - self._checkOutputs(expected_shapes, feed_dict) - - -if __name__ == "__main__": - tf.test.main() diff --git a/research/im2txt/im2txt/train.py b/research/im2txt/im2txt/train.py deleted file mode 100644 index db602735ba11e7f540a4e985333d8a457512c977..0000000000000000000000000000000000000000 --- a/research/im2txt/im2txt/train.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Train the model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from im2txt import configuration -from im2txt import show_and_tell_model - -FLAGS = tf.app.flags.FLAGS - -tf.flags.DEFINE_string("input_file_pattern", "", - "File pattern of sharded TFRecord input files.") -tf.flags.DEFINE_string("inception_checkpoint_file", "", - "Path to a pretrained inception_v3 model.") -tf.flags.DEFINE_string("train_dir", "", - "Directory for saving and loading model checkpoints.") -tf.flags.DEFINE_boolean("train_inception", False, - "Whether to train inception submodel variables.") -tf.flags.DEFINE_integer("number_of_steps", 1000000, "Number of training steps.") -tf.flags.DEFINE_integer("log_every_n_steps", 1, - "Frequency at which loss and global step are logged.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def main(unused_argv): - assert FLAGS.input_file_pattern, "--input_file_pattern is required" - assert FLAGS.train_dir, "--train_dir is required" - - model_config = configuration.ModelConfig() - model_config.input_file_pattern = FLAGS.input_file_pattern - model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file - training_config = configuration.TrainingConfig() - - # Create training directory. - train_dir = FLAGS.train_dir - if not tf.gfile.IsDirectory(train_dir): - tf.logging.info("Creating training directory: %s", train_dir) - tf.gfile.MakeDirs(train_dir) - - # Build the TensorFlow graph. - g = tf.Graph() - with g.as_default(): - # Build the model. - model = show_and_tell_model.ShowAndTellModel( - model_config, mode="train", train_inception=FLAGS.train_inception) - model.build() - - # Set up the learning rate. - learning_rate_decay_fn = None - if FLAGS.train_inception: - learning_rate = tf.constant(training_config.train_inception_learning_rate) - else: - learning_rate = tf.constant(training_config.initial_learning_rate) - if training_config.learning_rate_decay_factor > 0: - num_batches_per_epoch = (training_config.num_examples_per_epoch / - model_config.batch_size) - decay_steps = int(num_batches_per_epoch * - training_config.num_epochs_per_decay) - - def _learning_rate_decay_fn(learning_rate, global_step): - return tf.train.exponential_decay( - learning_rate, - global_step, - decay_steps=decay_steps, - decay_rate=training_config.learning_rate_decay_factor, - staircase=True) - - learning_rate_decay_fn = _learning_rate_decay_fn - - # Set up the training ops. - train_op = tf.contrib.layers.optimize_loss( - loss=model.total_loss, - global_step=model.global_step, - learning_rate=learning_rate, - optimizer=training_config.optimizer, - clip_gradients=training_config.clip_gradients, - learning_rate_decay_fn=learning_rate_decay_fn) - - # Set up the Saver for saving and restoring model checkpoints. - saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep) - - # Run training. - tf.contrib.slim.learning.train( - train_op, - train_dir, - log_every_n_steps=FLAGS.log_every_n_steps, - graph=g, - global_step=model.global_step, - number_of_steps=FLAGS.number_of_steps, - init_fn=model.init_fn, - saver=saver) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/inception/.gitignore b/research/inception/.gitignore deleted file mode 100644 index 58cbf2f4e0d5d39a0e3910d6993508546dad429f..0000000000000000000000000000000000000000 --- a/research/inception/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -/bazel-bin -/bazel-ci_build-cache -/bazel-genfiles -/bazel-out -/bazel-inception -/bazel-testlogs -/bazel-tf diff --git a/research/inception/README.md b/research/inception/README.md deleted file mode 100644 index beed66cf5cd83a6843ec39b28b5dbd88f1c0d3d0..0000000000000000000000000000000000000000 --- a/research/inception/README.md +++ /dev/null @@ -1,858 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -**NOTE: For the most part, you will find a newer version of this code at [models/research/slim](https://github.com/tensorflow/models/tree/master/research/slim).** In particular: - -* `inception_train.py` and `imagenet_train.py` should no longer be used. The slim editions for running on multiple GPUs are the current best examples. -* `inception_distributed_train.py` and `imagenet_distributed_train.py` are still valid examples of distributed training. - -For performance benchmarking, please see https://www.tensorflow.org/performance/benchmarks. - ---- - -# Inception in TensorFlow - -[ImageNet](http://www.image-net.org/) is a common academic data set in machine -learning for training an image recognition system. Code in this directory -demonstrates how to use TensorFlow to train and evaluate a type of convolutional -neural network (CNN) on this academic data set. In particular, we demonstrate -how to train the Inception v3 architecture as specified in: - -_Rethinking the Inception Architecture for Computer Vision_ - -Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew -Wojna - -http://arxiv.org/abs/1512.00567 - -This network achieves 21.2% top-1 and 5.6% top-5 error for single frame -evaluation with a computational cost of 5 billion multiply-adds per inference -and with using less than 25 million parameters. Below is a visualization of the -model architecture. - -![Inception-v3 Architecture](g3doc/inception_v3_architecture.png) - -## Description of Code - -The code base provides three core binaries for: - -* Training an Inception v3 network from scratch across multiple GPUs and/or - multiple machines using the ImageNet 2012 Challenge training data set. -* Evaluating an Inception v3 network using the ImageNet 2012 Challenge - validation data set. -* Retraining an Inception v3 network on a novel task and back-propagating the - errors to fine tune the network weights. - -The training procedure employs synchronous stochastic gradient descent across -multiple GPUs. The user may specify the number of GPUs they wish to harness. The -synchronous training performs *batch-splitting* by dividing a given batch across -multiple GPUs. - -The training set up is nearly identical to the section [Training a Model Using -Multiple GPU Cards](https://www.tensorflow.org/tutorials/deep_cnn/index.html#launching_and_training_the_model_on_multiple_gpu_cards) -where we have substituted the CIFAR-10 model architecture with Inception v3. The -primary differences with that setup are: - -* Calculate and update the batch-norm statistics during training so that they - may be substituted in during evaluation. -* Specify the model architecture using a (still experimental) higher level - language called TensorFlow-Slim. - -For more details about TensorFlow-Slim, please see the [Slim README](inception/slim/README.md). Please note that this higher-level language is still -*experimental* and the API may change over time depending on usage and -subsequent research. - -## Getting Started - -Before you run the training script for the first time, you will need to download -and convert the ImageNet data to native TFRecord format. The TFRecord format -consists of a set of sharded files where each entry is a serialized `tf.Example` -proto. Each `tf.Example` proto contains the ImageNet image (JPEG encoded) as -well as metadata such as label and bounding box information. See -[`parse_example_proto`](inception/image_processing.py) for details. - -We provide a single [script](inception/data/download_and_preprocess_imagenet.sh) for -downloading and converting ImageNet data to TFRecord format. Downloading and -preprocessing the data may take several hours (up to half a day) depending on -your network and computer speed. Please be patient. - -To begin, you will need to sign up for an account with [ImageNet](http://image-net.org) to gain access to the data. Look for the sign up page, -create an account and request an access key to download the data. - -After you have `USERNAME` and `PASSWORD`, you are ready to run our script. Make -sure that your hard disk has at least 500 GB of free space for downloading and -storing the data. Here we select `DATA_DIR=$HOME/imagenet-data` as such a -location but feel free to edit accordingly. - -When you run the below script, please enter *USERNAME* and *PASSWORD* when -prompted. This will occur at the very beginning. Once these values are entered, -you will not need to interact with the script again. - -```shell -# location of where to place the ImageNet data -DATA_DIR=$HOME/imagenet-data - -# build the preprocessing script. -cd tensorflow-models/inception -bazel build //inception:download_and_preprocess_imagenet - -# run it -bazel-bin/inception/download_and_preprocess_imagenet "${DATA_DIR}" -``` - -The final line of the output script should read: - -```shell -2016-02-17 14:30:17.287989: Finished writing all 1281167 images in data set. -``` - -When the script finishes, you will find 1024 training files and 128 validation -files in the `DATA_DIR`. The files will match the patterns -`train-?????-of-01024` and `validation-?????-of-00128`, respectively. - -[Congratulations!](https://www.youtube.com/watch?v=9bZkp7q19f0) You are now -ready to train or evaluate with the ImageNet data set. - -## How to Train from Scratch - -**WARNING** Training an Inception v3 network from scratch is a computationally -intensive task and depending on your compute setup may take several days or even -weeks. - -*Before proceeding* please read the [Convolutional Neural Networks](https://www.tensorflow.org/tutorials/deep_cnn/index.html) tutorial; in -particular, focus on [Training a Model Using Multiple GPU Cards](https://www.tensorflow.org/tutorials/deep_cnn/index.html#launching_and_training_the_model_on_multiple_gpu_cards). The model training method is nearly identical to that described in the -CIFAR-10 multi-GPU model training. Briefly, the model training - -* Places an individual model replica on each GPU. -* Splits the batch across the GPUs. -* Updates model parameters synchronously by waiting for all GPUs to finish - processing a batch of data. - -The training procedure is encapsulated by this diagram of how operations and -variables are placed on CPU and GPUs respectively. - -
- -
- -Each tower computes the gradients for a portion of the batch and the gradients -are combined and averaged across the multiple towers in order to provide a -single update of the Variables stored on the CPU. - -A crucial aspect of training a network of this size is *training speed* in terms -of wall-clock time. The training speed is dictated by many factors -- most -importantly the batch size and the learning rate schedule. Both of these -parameters are heavily coupled to the hardware set up. - -Generally speaking, a batch size is a difficult parameter to tune as it requires -balancing memory demands of the model, memory available on the GPU and speed of -computation. Generally speaking, employing larger batch sizes leads to more -efficient computation and potentially more efficient training steps. - -We have tested several hardware setups for training this model from scratch but -we emphasize that depending your hardware set up, you may need to adapt the -batch size and learning rate schedule. - -Please see the comments in `inception_train.py` for a few selected learning rate -plans based on some selected hardware setups. - -To train this model, you simply need to specify the following: - -```shell -# Build the model. Note that we need to make sure the TensorFlow is ready to -# use before this as this command will not build TensorFlow. -cd tensorflow-models/inception -bazel build //inception:imagenet_train - -# run it -bazel-bin/inception/imagenet_train --num_gpus=1 --batch_size=32 --train_dir=/tmp/imagenet_train --data_dir=/tmp/imagenet_data -``` - -The model reads in the ImageNet training data from `--data_dir`. If you followed -the instructions in [Getting Started](#getting-started), then set -`--data_dir="${DATA_DIR}"`. The script assumes that there exists a set of -sharded TFRecord files containing the ImageNet data. If you have not created -TFRecord files, please refer to [Getting Started](#getting-started) - -Here is the output of the above command line when running on a Tesla K40c: - -```shell -2016-03-07 12:24:59.922898: step 0, loss = 13.11 (5.3 examples/sec; 6.064 sec/batch) -2016-03-07 12:25:55.206783: step 10, loss = 13.71 (9.4 examples/sec; 3.394 sec/batch) -2016-03-07 12:26:28.905231: step 20, loss = 14.81 (9.5 examples/sec; 3.380 sec/batch) -2016-03-07 12:27:02.699719: step 30, loss = 14.45 (9.5 examples/sec; 3.378 sec/batch) -2016-03-07 12:27:36.515699: step 40, loss = 13.98 (9.5 examples/sec; 3.376 sec/batch) -2016-03-07 12:28:10.220956: step 50, loss = 13.92 (9.6 examples/sec; 3.327 sec/batch) -2016-03-07 12:28:43.658223: step 60, loss = 13.28 (9.6 examples/sec; 3.350 sec/batch) -... -``` - -In this example, a log entry is printed every 10 step and the line includes the -total loss (starts around 13.0-14.0) and the speed of processing in terms of -throughput (examples / sec) and batch speed (sec/batch). - -The number of GPU devices is specified by `--num_gpus` (which defaults to 1). -Specifying `--num_gpus` greater then 1 splits the batch evenly split across the -GPU cards. - -```shell -# Build the model. Note that we need to make sure the TensorFlow is ready to -# use before this as this command will not build TensorFlow. -cd tensorflow-models/inception -bazel build //inception:imagenet_train - -# run it -bazel-bin/inception/imagenet_train --num_gpus=2 --batch_size=64 --train_dir=/tmp/imagenet_train -``` - -This model splits the batch of 64 images across 2 GPUs and calculates the -average gradient by waiting for both GPUs to finish calculating the gradients -from their respective data (See diagram above). Generally speaking, using larger -numbers of GPUs leads to higher throughput as well as the opportunity to use -larger batch sizes. In turn, larger batch sizes imply better estimates of the -gradient enabling the usage of higher learning rates. In summary, using more -GPUs results in simply faster training speed. - -Note that selecting a batch size is a difficult parameter to tune as it requires -balancing memory demands of the model, memory available on the GPU and speed of -computation. Generally speaking, employing larger batch sizes leads to more -efficient computation and potentially more efficient training steps. - -Note that there is considerable noise in the loss function on individual steps -in the previous log. Because of this noise, it is difficult to discern how well -a model is learning. The solution to the last problem is to launch TensorBoard -pointing to the directory containing the events log. - -```shell -tensorboard --logdir=/tmp/imagenet_train -``` - -TensorBoard has access to the many Summaries produced by the model that describe -multitudes of statistics tracking the model behavior and the quality of the -learned model. In particular, TensorBoard tracks a exponentially smoothed -version of the loss. In practice, it is far easier to judge how well a model -learns by monitoring the smoothed version of the loss. - -## How to Train from Scratch in a Distributed Setting - -**NOTE** Distributed TensorFlow requires version 0.8 or later. - -Distributed TensorFlow lets us use multiple machines to train a model faster. -This is quite different from the training with multiple GPU towers on a single -machine where all parameters and gradients computation are in the same place. We -coordinate the computation across multiple machines by employing a centralized -repository for parameters that maintains a unified, single copy of model -parameters. Each individual machine sends gradient updates to the centralized -parameter repository which coordinates these updates and sends back updated -parameters to the individual machines running the model training. - -We term each machine that runs a copy of the training a `worker` or `replica`. -We term each machine that maintains model parameters a `ps`, short for -`parameter server`. Note that we might have more than one machine acting as a -`ps` as the model parameters may be sharded across multiple machines. - -Variables may be updated with synchronous or asynchronous gradient updates. One -may construct a an [`Optimizer`](https://www.tensorflow.org/api_docs/python/train.html#optimizers) in TensorFlow -that constructs the necessary graph for either case diagrammed below from the -TensorFlow [Whitepaper](http://download.tensorflow.org/paper/whitepaper2015.pdf): - -
- -
- -In [a recent paper](https://arxiv.org/abs/1604.00981), synchronous gradient -updates have demonstrated to reach higher accuracy in a shorter amount of time. -In this distributed Inception example we employ synchronous gradient updates. - -Note that in this example each replica has a single tower that uses one GPU. - -The command-line flags `worker_hosts` and `ps_hosts` specify available servers. -The same binary will be used for both the `worker` jobs and the `ps` jobs. -Command line flag `job_name` will be used to specify what role a task will be -playing and `task_id` will be used to identify which one of the jobs it is -running. Several things to note here: - -* The numbers of `ps` and `worker` tasks are inferred from the lists of hosts - specified in the flags. The `task_id` should be within the range `[0, - num_ps_tasks)` for `ps` tasks and `[0, num_worker_tasks)` for `worker` - tasks. -* `ps` and `worker` tasks can run on the same machine, as long as that machine - has sufficient resources to handle both tasks. Note that the `ps` task does - not benefit from a GPU, so it should not attempt to use one (see below). -* Multiple `worker` tasks can run on the same machine with multiple GPUs so - machine_A with 2 GPUs may have 2 workers while machine_B with 1 GPU just has - 1 worker. -* The default learning rate schedule works well for a wide range of number of - replicas [25, 50, 100] but feel free to tune it for even better results. -* The command line of both `ps` and `worker` tasks should include the complete - list of `ps_hosts` and `worker_hosts`. -* There is a chief `worker` among all workers which defaults to `worker` 0. - The chief will be in charge of initializing all the parameters, writing out - the summaries and the checkpoint. The checkpoint and summary will be in the - `train_dir` of the host for `worker` 0. -* Each worker processes a batch_size number of examples but each gradient - update is computed from all replicas. Hence, the effective batch size of - this model is batch_size * num_workers. - -```shell -# Build the model. Note that we need to make sure the TensorFlow is ready to -# use before this as this command will not build TensorFlow. -cd tensorflow-models/inception -bazel build //inception:imagenet_distributed_train - -# To start worker 0, go to the worker0 host and run the following (Note that -# task_id should be in the range [0, num_worker_tasks): -bazel-bin/inception/imagenet_distributed_train \ ---batch_size=32 \ ---data_dir=$HOME/imagenet-data \ ---job_name='worker' \ ---task_id=0 \ ---ps_hosts='ps0.example.com:2222' \ ---worker_hosts='worker0.example.com:2222,worker1.example.com:2222' - -# To start worker 1, go to the worker1 host and run the following (Note that -# task_id should be in the range [0, num_worker_tasks): -bazel-bin/inception/imagenet_distributed_train \ ---batch_size=32 \ ---data_dir=$HOME/imagenet-data \ ---job_name='worker' \ ---task_id=1 \ ---ps_hosts='ps0.example.com:2222' \ ---worker_hosts='worker0.example.com:2222,worker1.example.com:2222' - -# To start the parameter server (ps), go to the ps host and run the following (Note -# that task_id should be in the range [0, num_ps_tasks): -bazel-bin/inception/imagenet_distributed_train \ ---job_name='ps' \ ---task_id=0 \ ---ps_hosts='ps0.example.com:2222' \ ---worker_hosts='worker0.example.com:2222,worker1.example.com:2222' -``` - -If you have installed a GPU-compatible version of TensorFlow, the `ps` will also -try to allocate GPU memory although it is not helpful. This could potentially -crash the worker on the same machine as it has little to no GPU memory to -allocate. To avoid this, you can prepend the previous command to start `ps` -with: `CUDA_VISIBLE_DEVICES=''` - -```shell -CUDA_VISIBLE_DEVICES='' bazel-bin/inception/imagenet_distributed_train \ ---job_name='ps' \ ---task_id=0 \ ---ps_hosts='ps0.example.com:2222' \ ---worker_hosts='worker0.example.com:2222,worker1.example.com:2222' -``` - -If you have run everything correctly, you should see a log in each `worker` job -that looks like the following. Note the training speed varies depending on your -hardware and the first several steps could take much longer. - -```shell -INFO:tensorflow:PS hosts are: ['ps0.example.com:2222', 'ps1.example.com:2222'] -INFO:tensorflow:Worker hosts are: ['worker0.example.com:2222', 'worker1.example.com:2222'] -I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:206] Initialize HostPortsGrpcChannelCache for job ps -> {ps0.example.com:2222, ps1.example.com:2222} -I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:206] Initialize HostPortsGrpcChannelCache for job worker -> {localhost:2222, worker1.example.com:2222} -I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:202] Started server with target: grpc://localhost:2222 -INFO:tensorflow:Created variable global_step:0 with shape () and init - -... - -INFO:tensorflow:Created variable logits/logits/biases:0 with shape (1001,) and init -INFO:tensorflow:SyncReplicas enabled: replicas_to_aggregate=2; total_num_replicas=2 -INFO:tensorflow:2016-04-13 01:56:26.405639 Supervisor -INFO:tensorflow:Started 2 queues for processing input data. -INFO:tensorflow:global_step/sec: 0 -INFO:tensorflow:Worker 0: 2016-04-13 01:58:40.342404: step 0, loss = 12.97(0.0 examples/sec; 65.428  sec/batch) -INFO:tensorflow:global_step/sec: 0.0172907 -... -``` - -and a log in each `ps` job that looks like the following: - -```shell -INFO:tensorflow:PS hosts are: ['ps0.example.com:2222', 'ps1.example.com:2222'] -INFO:tensorflow:Worker hosts are: ['worker0.example.com:2222', 'worker1.example.com:2222'] -I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:206] Initialize HostPortsGrpcChannelCache for job ps -> {localhost:2222, ps1.example.com:2222} -I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:206] Initialize HostPortsGrpcChannelCache for job worker -> {worker0.example.com:2222, worker1.example.com:2222} -I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:202] Started server with target: grpc://localhost:2222 -``` - -If you compiled TensorFlow (from v1.1-rc3) with VERBS support and you have the -required device and IB verbs SW stack, you can specify --protocol='grpc+verbs' -In order to use Verbs RDMA for Tensor passing between workers and ps. -Need to add the the --protocol flag in all tasks (ps and workers). -The default protocol is the TensorFlow default protocol of grpc. - - -[Congratulations!](https://www.youtube.com/watch?v=9bZkp7q19f0) You are now -training Inception in a distributed manner. - -## How to Evaluate - -Evaluating an Inception v3 model on the ImageNet 2012 validation data set -requires running a separate binary. - -The evaluation procedure is nearly identical to [Evaluating a Model](https://www.tensorflow.org/tutorials/deep_cnn/index.html#evaluating_a_model) -described in the [Convolutional Neural Network](https://www.tensorflow.org/tutorials/deep_cnn/index.html) tutorial. - -**WARNING** Be careful not to run the evaluation and training binary on the same -GPU or else you might run out of memory. Consider running the evaluation on a -separate GPU if available or suspending the training binary while running the -evaluation on the same GPU. - -Briefly, one can evaluate the model by running: - -```shell -# Build the model. Note that we need to make sure the TensorFlow is ready to -# use before this as this command will not build TensorFlow. -cd tensorflow-models/inception -bazel build //inception:imagenet_eval - -# run it -bazel-bin/inception/imagenet_eval --checkpoint_dir=/tmp/imagenet_train --eval_dir=/tmp/imagenet_eval -``` - -Note that we point `--checkpoint_dir` to the location of the checkpoints saved -by `inception_train.py` above. Running the above command results in the -following output: - -```shell -2016-02-17 22:32:50.391206: precision @ 1 = 0.735 -... -``` - -The script calculates the precision @ 1 over the entire validation data -periodically. The precision @ 1 measures the how often the highest scoring -prediction from the model matched the ImageNet label -- in this case, 73.5%. If -you wish to run the eval just once and not periodically, append the `--run_once` -option. - -Much like the training script, `imagenet_eval.py` also exports summaries that -may be visualized in TensorBoard. These summaries calculate additional -statistics on the predictions (e.g. recall @ 5) as well as monitor the -statistics of the model activations and weights during evaluation. - -## How to Fine-Tune a Pre-Trained Model on a New Task - -### Getting Started - -Much like training the ImageNet model we must first convert a new data set to -the sharded TFRecord format which each entry is a serialized `tf.Example` proto. - -We have provided a script demonstrating how to do this for small data set of of -a few thousand flower images spread across 5 labels: - -```shell -daisy, dandelion, roses, sunflowers, tulips -``` - -There is a single automated script that downloads the data set and converts it -to the TFRecord format. Much like the ImageNet data set, each record in the -TFRecord format is a serialized `tf.Example` proto whose entries include a -JPEG-encoded string and an integer label. Please see [`parse_example_proto`](inception/image_processing.py) for details. - -The script just takes a few minutes to run depending your network connection -speed for downloading and processing the images. Your hard disk requires 200MB -of free storage. Here we select `DATA_DIR=/tmp/flowers-data/` as such a location -but feel free to edit accordingly. - -```shell -# location of where to place the flowers data -FLOWERS_DATA_DIR=/tmp/flowers-data/ - -# build the preprocessing script. -cd tensorflow-models/inception -bazel build //inception:download_and_preprocess_flowers - -# run it -bazel-bin/inception/download_and_preprocess_flowers "${FLOWERS_DATA_DIR}" -``` - -If the script runs successfully, the final line of the terminal output should -look like: - -```shell -2016-02-24 20:42:25.067551: Finished writing all 3170 images in data set. -``` - -When the script finishes you will find 2 shards for the training and validation -files in the `DATA_DIR`. The files will match the patterns `train-?????-of-00002` -and `validation-?????-of-00002`, respectively. - -**NOTE** If you wish to prepare a custom image data set for transfer learning, -you will need to invoke [`build_image_data.py`](inception/data/build_image_data.py) on -your custom data set. Please see the associated options and assumptions behind -this script by reading the comments section of [`build_image_data.py`](inception/data/build_image_data.py). Also, if your custom data has a different -number of examples or classes, you need to change the appropriate values in -[`imagenet_data.py`](inception/imagenet_data.py). - -The second piece you will need is a trained Inception v3 image model. You have -the option of either training one yourself (See [How to Train from Scratch](#how-to-train-from-scratch) for details) or you can download a pre-trained -model like so: - -```shell -# location of where to place the Inception v3 model -INCEPTION_MODEL_DIR=$HOME/inception-v3-model -mkdir -p ${INCEPTION_MODEL_DIR} -cd ${INCEPTION_MODEL_DIR} - -# download the Inception v3 model -curl -O http://download.tensorflow.org/models/image/imagenet/inception-v3-2016-03-01.tar.gz -tar xzf inception-v3-2016-03-01.tar.gz - -# this will create a directory called inception-v3 which contains the following files. -> ls inception-v3 -README.txt -checkpoint -model.ckpt-157585 -``` - -[Congratulations!](https://www.youtube.com/watch?v=9bZkp7q19f0) You are now -ready to fine-tune your pre-trained Inception v3 model with the flower data set. - -### How to Retrain a Trained Model on the Flowers Data - -We are now ready to fine-tune a pre-trained Inception-v3 model on the flowers -data set. This requires two distinct changes to our training procedure: - -1. Build the exact same model as previously except we change the number of - labels in the final classification layer. - -2. Restore all weights from the pre-trained Inception-v3 except for the final - classification layer; this will get randomly initialized instead. - -We can perform these two operations by specifying two flags: -`--pretrained_model_checkpoint_path` and `--fine_tune`. The first flag is a -string that points to the path of a pre-trained Inception-v3 model. If this flag -is specified, it will load the entire model from the checkpoint before the -script begins training. - -The second flag `--fine_tune` is a boolean that indicates whether the last -classification layer should be randomly initialized or restored. You may set -this flag to false if you wish to continue training a pre-trained model from a -checkpoint. If you set this flag to true, you can train a new classification -layer from scratch. - -In order to understand how `--fine_tune` works, please see the discussion on -`Variables` in the TensorFlow-Slim [`README.md`](inception/slim/README.md). - -Putting this all together you can retrain a pre-trained Inception-v3 model on -the flowers data set with the following command. - -```shell -# Build the model. Note that we need to make sure the TensorFlow is ready to -# use before this as this command will not build TensorFlow. -cd tensorflow-models/inception -bazel build //inception:flowers_train - -# Path to the downloaded Inception-v3 model. -MODEL_PATH="${INCEPTION_MODEL_DIR}/inception-v3/model.ckpt-157585" - -# Directory where the flowers data resides. -FLOWERS_DATA_DIR=/tmp/flowers-data/ - -# Directory where to save the checkpoint and events files. -TRAIN_DIR=/tmp/flowers_train/ - -# Run the fine-tuning on the flowers data set starting from the pre-trained -# Imagenet-v3 model. -bazel-bin/inception/flowers_train \ - --train_dir="${TRAIN_DIR}" \ - --data_dir="${FLOWERS_DATA_DIR}" \ - --pretrained_model_checkpoint_path="${MODEL_PATH}" \ - --fine_tune=True \ - --initial_learning_rate=0.001 \ - --input_queue_memory_factor=1 -``` - -We have added a few extra options to the training procedure. - -* Fine-tuning a model a separate data set requires significantly lowering the - initial learning rate. We set the initial learning rate to 0.001. -* The flowers data set is quite small so we shrink the size of the shuffling - queue of examples. See [Adjusting Memory Demands](#adjusting-memory-demands) - for more details. - -The training script will only reports the loss. To evaluate the quality of the -fine-tuned model, you will need to run `flowers_eval`: - -```shell -# Build the model. Note that we need to make sure the TensorFlow is ready to -# use before this as this command will not build TensorFlow. -cd tensorflow-models/inception -bazel build //inception:flowers_eval - -# Directory where we saved the fine-tuned checkpoint and events files. -TRAIN_DIR=/tmp/flowers_train/ - -# Directory where the flowers data resides. -FLOWERS_DATA_DIR=/tmp/flowers-data/ - -# Directory where to save the evaluation events files. -EVAL_DIR=/tmp/flowers_eval/ - -# Evaluate the fine-tuned model on a hold-out of the flower data set. -bazel-bin/inception/flowers_eval \ - --eval_dir="${EVAL_DIR}" \ - --data_dir="${FLOWERS_DATA_DIR}" \ - --subset=validation \ - --num_examples=500 \ - --checkpoint_dir="${TRAIN_DIR}" \ - --input_queue_memory_factor=1 \ - --run_once -``` - -We find that the evaluation arrives at roughly 93.4% precision@1 after the model -has been running for 2000 steps. - -```shell -Successfully loaded model from /tmp/flowers/model.ckpt-1999 at step=1999. -2016-03-01 16:52:51.761219: starting evaluation on (validation). -2016-03-01 16:53:05.450419: [20 batches out of 20] (36.5 examples/sec; 0.684sec/batch) -2016-03-01 16:53:05.450471: precision @ 1 = 0.9340 recall @ 5 = 0.9960 [500 examples] -``` - -## How to Construct a New Dataset for Retraining - -One can use the existing scripts supplied with this model to build a new dataset -for training or fine-tuning. The main script to employ is -[`build_image_data.py`](inception/data/build_image_data.py). Briefly, this script takes a -structured directory of images and converts it to a sharded `TFRecord` that can -be read by the Inception model. - -In particular, you will need to create a directory of training images that -reside within `$TRAIN_DIR` and `$VALIDATION_DIR` arranged as such: - -```shell - $TRAIN_DIR/dog/image0.jpeg - $TRAIN_DIR/dog/image1.jpg - $TRAIN_DIR/dog/image2.png - ... - $TRAIN_DIR/cat/weird-image.jpeg - $TRAIN_DIR/cat/my-image.jpeg - $TRAIN_DIR/cat/my-image.JPG - ... - $VALIDATION_DIR/dog/imageA.jpeg - $VALIDATION_DIR/dog/imageB.jpg - $VALIDATION_DIR/dog/imageC.png - ... - $VALIDATION_DIR/cat/weird-image.PNG - $VALIDATION_DIR/cat/that-image.jpg - $VALIDATION_DIR/cat/cat.JPG - ... -``` -**NOTE**: This script will append an extra background class indexed at 0, so -your class labels will range from 0 to num_labels. Using the example above, the -corresponding class labels generated from `build_image_data.py` will be as -follows: -```shell -0 -1 dog -2 cat -``` - -Each sub-directory in `$TRAIN_DIR` and `$VALIDATION_DIR` corresponds to a unique -label for the images that reside within that sub-directory. The images may be -JPEG or PNG images. We do not support other images types currently. - -Once the data is arranged in this directory structure, we can run -`build_image_data.py` on the data to generate the sharded `TFRecord` dataset. -Each entry of the `TFRecord` is a serialized `tf.Example` protocol buffer. A -complete list of information contained in the `tf.Example` is described in the -comments of `build_image_data.py`. - -To run `build_image_data.py`, you can run the following command line: - -```shell -# location to where to save the TFRecord data. -OUTPUT_DIRECTORY=$HOME/my-custom-data/ - -# build the preprocessing script. -cd tensorflow-models/inception -bazel build //inception:build_image_data - -# convert the data. -bazel-bin/inception/build_image_data \ - --train_directory="${TRAIN_DIR}" \ - --validation_directory="${VALIDATION_DIR}" \ - --output_directory="${OUTPUT_DIRECTORY}" \ - --labels_file="${LABELS_FILE}" \ - --train_shards=128 \ - --validation_shards=24 \ - --num_threads=8 -``` - -where the `$OUTPUT_DIRECTORY` is the location of the sharded `TFRecords`. The -`$LABELS_FILE` will be a text file that is read by the script that provides -a list of all of the labels. For instance, in the case flowers data set, the -`$LABELS_FILE` contained the following data: - -```shell -daisy -dandelion -roses -sunflowers -tulips -``` - -Note that each row of each label corresponds with the entry in the final -classifier in the model. That is, the `daisy` corresponds to the classifier for -entry `1`; `dandelion` is entry `2`, etc. We skip label `0` as a background -class. - -After running this script produces files that look like the following: - -```shell - $TRAIN_DIR/train-00000-of-00128 - $TRAIN_DIR/train-00001-of-00128 - ... - $TRAIN_DIR/train-00127-of-00128 - -and - - $VALIDATION_DIR/validation-00000-of-00024 - $VALIDATION_DIR/validation-00001-of-00024 - ... - $VALIDATION_DIR/validation-00023-of-00024 -``` - -where 128 and 24 are the number of shards specified for each dataset, -respectively. Generally speaking, we aim for selecting the number of shards such -that roughly 1024 images reside in each shard. Once this data set is built, you -are ready to train or fine-tune an Inception model on this data set. - -Note, if you are piggy backing on the flowers retraining scripts, be sure to -update `num_classes()` and `num_examples_per_epoch()` in `flowers_data.py` -to correspond with your data. - -## Practical Considerations for Training a Model - -The model architecture and training procedure is heavily dependent on the -hardware used to train the model. If you wish to train or fine-tune this model -on your machine **you will need to adjust and empirically determine a good set -of training hyper-parameters for your setup**. What follows are some general -considerations for novices. - -### Finding Good Hyperparameters - -Roughly 5-10 hyper-parameters govern the speed at which a network is trained. In -addition to `--batch_size` and `--num_gpus`, there are several constants defined -in [inception_train.py](inception/inception_train.py) which dictate the learning -schedule. - -```shell -RMSPROP_DECAY = 0.9 # Decay term for RMSProp. -MOMENTUM = 0.9 # Momentum in RMSProp. -RMSPROP_EPSILON = 1.0 # Epsilon term for RMSProp. -INITIAL_LEARNING_RATE = 0.1 # Initial learning rate. -NUM_EPOCHS_PER_DECAY = 30.0 # Epochs after which learning rate decays. -LEARNING_RATE_DECAY_FACTOR = 0.16 # Learning rate decay factor. -``` - -There are many papers that discuss the various tricks and trade-offs associated -with training a model with stochastic gradient descent. For those new to the -field, some great references are: - -* Y Bengio, [Practical recommendations for gradient-based training of deep - architectures](http://arxiv.org/abs/1206.5533) -* I Goodfellow, Y Bengio and A Courville, [Deep Learning] - (http://www.deeplearningbook.org/) - -What follows is a summary of some general advice for identifying appropriate -model hyper-parameters in the context of this particular model training setup. -Namely, this library provides *synchronous* updates to model parameters based on -batch-splitting the model across multiple GPUs. - -* Higher learning rates leads to faster training. Too high of learning rate - leads to instability and will cause model parameters to diverge to infinity - or NaN. - -* Larger batch sizes lead to higher quality estimates of the gradient and - permit training the model with higher learning rates. - -* Often the GPU memory is a bottleneck that prevents employing larger batch - sizes. Employing more GPUs allows one to use larger batch sizes because - this model splits the batch across the GPUs. - -**NOTE** If one wishes to train this model with *asynchronous* gradient updates, -one will need to substantially alter this model and new considerations need to -be factored into hyperparameter tuning. See [Large Scale Distributed Deep -Networks](http://research.google.com/archive/large_deep_networks_nips2012.html) -for a discussion in this domain. - -### Adjusting Memory Demands - -Training this model has large memory demands in terms of the CPU and GPU. Let's -discuss each item in turn. - -GPU memory is relatively small compared to CPU memory. Two items dictate the -amount of GPU memory employed -- model architecture and batch size. Assuming -that you keep the model architecture fixed, the sole parameter governing the GPU -demand is the batch size. A good rule of thumb is to try employ as large of -batch size as will fit on the GPU. - -If you run out of GPU memory, either lower the `--batch_size` or employ more -GPUs on your desktop. The model performs batch-splitting across GPUs, thus N -GPUs can handle N times the batch size of 1 GPU. - -The model requires a large amount of CPU memory as well. We have tuned the model -to employ about ~20GB of CPU memory. Thus, having access to about 40 GB of CPU -memory would be ideal. - -If that is not possible, you can tune down the memory demands of the model via -lowering `--input_queue_memory_factor`. Images are preprocessed asynchronously -with respect to the main training across `--num_preprocess_threads` threads. The -preprocessed images are stored in shuffling queue in which each GPU performs a -dequeue operation in order to receive a `batch_size` worth of images. - -In order to guarantee good shuffling across the data, we maintain a large -shuffling queue of 1024 x `input_queue_memory_factor` images. For the current -model architecture, this corresponds to about 4GB of CPU memory. You may lower -`input_queue_memory_factor` in order to decrease the memory footprint. Keep in -mind though that lowering this value drastically may result in a model with -slightly lower predictive accuracy when training from scratch. Please see -comments in [`image_processing.py`](inception/image_processing.py) for more details. - -## Troubleshooting - -#### The model runs out of CPU memory. - -In lieu of buying more CPU memory, an easy fix is to decrease -`--input_queue_memory_factor`. See [Adjusting Memory Demands](#adjusting-memory-demands). - -#### The model runs out of GPU memory. - -The data is not able to fit on the GPU card. The simplest solution is to -decrease the batch size of the model. Otherwise, you will need to think about a -more sophisticated method for specifying the training which cuts up the model -across multiple `session.run()` calls or partitions the model across multiple -GPUs. See [Using GPUs](https://www.tensorflow.org/how_tos/using_gpu/index.html) -and [Adjusting Memory Demands](#adjusting-memory-demands) for more information. - -#### The model training results in NaN's. - -The learning rate of the model is too high. Turn down your learning rate. - -#### I wish to train a model with a different image size. - -The simplest solution is to artificially resize your images to `299x299` pixels. -See [Images](https://www.tensorflow.org/api_docs/python/image.html) section for -many resizing, cropping and padding methods. Note that the entire model -architecture is predicated on a `299x299` image, thus if you wish to change the -input image size, then you may need to redesign the entire model architecture. - -#### What hardware specification are these hyper-parameters targeted for? - -We targeted a desktop with 128GB of CPU ram connected to 8 NVIDIA Tesla K40 GPU -cards but we have run this on desktops with 32GB of CPU ram and 1 NVIDIA Tesla -K40. You can get a sense of the various training configurations we tested by -reading the comments in [`inception_train.py`](inception/inception_train.py). - -#### How do I continue training from a checkpoint in distributed setting? - -You only need to make sure that the checkpoint is in a location that can be -reached by all of the `ps` tasks. By specifying the checkpoint location with -`--train_dir` , the `ps` servers will load the checkpoint before commencing -training. diff --git a/research/inception/WORKSPACE b/research/inception/WORKSPACE deleted file mode 100644 index 2d7b4fb254a0fcebe695cb3fd3685af29a02e0b0..0000000000000000000000000000000000000000 --- a/research/inception/WORKSPACE +++ /dev/null @@ -1 +0,0 @@ -workspace(name = "inception") diff --git a/research/inception/g3doc/inception_v3_architecture.png b/research/inception/g3doc/inception_v3_architecture.png deleted file mode 100644 index 91fb734a104b2f63114ade7c8f9b2f95ce6334a6..0000000000000000000000000000000000000000 Binary files a/research/inception/g3doc/inception_v3_architecture.png and /dev/null differ diff --git a/research/inception/inception/BUILD b/research/inception/inception/BUILD deleted file mode 100644 index 21fc27aa57c14f6a72359cf15d446787c8ea6c2e..0000000000000000000000000000000000000000 --- a/research/inception/inception/BUILD +++ /dev/null @@ -1,198 +0,0 @@ -# Description: -# Example TensorFlow models for ImageNet. - -package(default_visibility = [":internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = ["//inception/..."], -) - -py_library( - name = "dataset", - srcs = [ - "dataset.py", - ], -) - -py_library( - name = "imagenet_data", - srcs = [ - "imagenet_data.py", - ], - deps = [ - ":dataset", - ], -) - -py_library( - name = "flowers_data", - srcs = [ - "flowers_data.py", - ], - deps = [ - ":dataset", - ], -) - -py_library( - name = "image_processing", - srcs = [ - "image_processing.py", - ], -) - -py_library( - name = "inception", - srcs = [ - "inception_model.py", - ], - visibility = ["//visibility:public"], - deps = [ - ":dataset", - "//inception/slim", - ], -) - -py_binary( - name = "imagenet_eval", - srcs = [ - "imagenet_eval.py", - ], - deps = [ - ":imagenet_data", - ":inception_eval", - ], -) - -py_binary( - name = "flowers_eval", - srcs = [ - "flowers_eval.py", - ], - deps = [ - ":flowers_data", - ":inception_eval", - ], -) - -py_library( - name = "inception_eval", - srcs = [ - "inception_eval.py", - ], - deps = [ - ":image_processing", - ":inception", - ], -) - -py_binary( - name = "imagenet_train", - srcs = [ - "imagenet_train.py", - ], - deps = [ - ":imagenet_data", - ":inception_train", - ], -) - -py_binary( - name = "imagenet_distributed_train", - srcs = [ - "imagenet_distributed_train.py", - ], - deps = [ - ":imagenet_data", - ":inception_distributed_train", - ], -) - -py_binary( - name = "flowers_train", - srcs = [ - "flowers_train.py", - ], - deps = [ - ":flowers_data", - ":inception_train", - ], -) - -py_library( - name = "inception_train", - srcs = [ - "inception_train.py", - ], - deps = [ - ":image_processing", - ":inception", - ], -) - -py_library( - name = "inception_distributed_train", - srcs = [ - "inception_distributed_train.py", - ], - deps = [ - ":image_processing", - ":inception", - ], -) - -py_binary( - name = "build_image_data", - srcs = ["data/build_image_data.py"], -) - -sh_binary( - name = "download_and_preprocess_flowers", - srcs = ["data/download_and_preprocess_flowers.sh"], - data = [ - ":build_image_data", - ], -) - -sh_binary( - name = "download_and_preprocess_imagenet", - srcs = ["data/download_and_preprocess_imagenet.sh"], - data = [ - "data/download_imagenet.sh", - "data/imagenet_2012_validation_synset_labels.txt", - "data/imagenet_lsvrc_2015_synsets.txt", - "data/imagenet_metadata.txt", - "data/preprocess_imagenet_validation_data.py", - "data/process_bounding_boxes.py", - ":build_imagenet_data", - ], -) - -py_binary( - name = "build_imagenet_data", - srcs = ["data/build_imagenet_data.py"], -) - -filegroup( - name = "srcs", - srcs = glob( - [ - "**/*.py", - "BUILD", - ], - ), -) - -filegroup( - name = "imagenet_metadata", - srcs = [ - "data/imagenet_lsvrc_2015_synsets.txt", - "data/imagenet_metadata.txt", - ], - visibility = ["//visibility:public"], -) diff --git a/research/inception/inception/data/build_image_data.py b/research/inception/inception/data/build_image_data.py deleted file mode 100755 index 894388b7f758a46746870f2f0d55d1df7d3fe29b..0000000000000000000000000000000000000000 --- a/research/inception/inception/data/build_image_data.py +++ /dev/null @@ -1,436 +0,0 @@ -#!/usr/bin/python -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Converts image data to TFRecords file format with Example protos. - -The image data set is expected to reside in JPEG files located in the -following directory structure. - - data_dir/label_0/image0.jpeg - data_dir/label_0/image1.jpg - ... - data_dir/label_1/weird-image.jpeg - data_dir/label_1/my-image.jpeg - ... - -where the sub-directory is the unique label associated with these images. - -This TensorFlow script converts the training and evaluation data into -a sharded data set consisting of TFRecord files - - train_directory/train-00000-of-01024 - train_directory/train-00001-of-01024 - ... - train_directory/train-01023-of-01024 - -and - - validation_directory/validation-00000-of-00128 - validation_directory/validation-00001-of-00128 - ... - validation_directory/validation-00127-of-00128 - -where we have selected 1024 and 128 shards for each data set. Each record -within the TFRecord file is a serialized Example proto. The Example proto -contains the following fields: - - image/encoded: string containing JPEG encoded image in RGB colorspace - image/height: integer, image height in pixels - image/width: integer, image width in pixels - image/colorspace: string, specifying the colorspace, always 'RGB' - image/channels: integer, specifying the number of channels, always 3 - image/format: string, specifying the format, always 'JPEG' - - image/filename: string containing the basename of the image file - e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' - image/class/label: integer specifying the index in a classification layer. - The label ranges from [0, num_labels] where 0 is unused and left as - the background class. - image/class/text: string specifying the human-readable version of the label - e.g. 'dog' - -If your data set involves bounding boxes, please look at build_imagenet_data.py. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import os -import random -import sys -import threading - -import numpy as np -import tensorflow as tf - -tf.app.flags.DEFINE_string('train_directory', '/tmp/', - 'Training data directory') -tf.app.flags.DEFINE_string('validation_directory', '/tmp/', - 'Validation data directory') -tf.app.flags.DEFINE_string('output_directory', '/tmp/', - 'Output data directory') - -tf.app.flags.DEFINE_integer('train_shards', 2, - 'Number of shards in training TFRecord files.') -tf.app.flags.DEFINE_integer('validation_shards', 2, - 'Number of shards in validation TFRecord files.') - -tf.app.flags.DEFINE_integer('num_threads', 2, - 'Number of threads to preprocess the images.') - -# The labels file contains a list of valid labels are held in this file. -# Assumes that the file contains entries as such: -# dog -# cat -# flower -# where each line corresponds to a label. We map each label contained in -# the file to an integer corresponding to the line number starting from 0. -tf.app.flags.DEFINE_string('labels_file', '', 'Labels file') - - -FLAGS = tf.app.flags.FLAGS - - -def _int64_feature(value): - """Wrapper for inserting int64 features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def _bytes_feature(value): - """Wrapper for inserting bytes features into Example proto.""" - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def _convert_to_example(filename, image_buffer, label, text, height, width): - """Build an Example proto for an example. - - Args: - filename: string, path to an image file, e.g., '/path/to/example.JPG' - image_buffer: string, JPEG encoding of RGB image - label: integer, identifier for the ground truth for the network - text: string, unique human-readable, e.g. 'dog' - height: integer, image height in pixels - width: integer, image width in pixels - Returns: - Example proto - """ - - colorspace = 'RGB' - channels = 3 - image_format = 'JPEG' - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': _int64_feature(height), - 'image/width': _int64_feature(width), - 'image/colorspace': _bytes_feature(tf.compat.as_bytes(colorspace)), - 'image/channels': _int64_feature(channels), - 'image/class/label': _int64_feature(label), - 'image/class/text': _bytes_feature(tf.compat.as_bytes(text)), - 'image/format': _bytes_feature(tf.compat.as_bytes(image_format)), - 'image/filename': _bytes_feature(tf.compat.as_bytes(os.path.basename(filename))), - 'image/encoded': _bytes_feature(tf.compat.as_bytes(image_buffer))})) - return example - - -class ImageCoder(object): - """Helper class that provides TensorFlow image coding utilities.""" - - def __init__(self): - # Create a single Session to run all image coding calls. - self._sess = tf.Session() - - # Initializes function that converts PNG to JPEG data. - self._png_data = tf.placeholder(dtype=tf.string) - image = tf.image.decode_png(self._png_data, channels=3) - self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that decodes RGB JPEG data. - self._decode_jpeg_data = tf.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) - - def png_to_jpeg(self, image_data): - return self._sess.run(self._png_to_jpeg, - feed_dict={self._png_data: image_data}) - - def decode_jpeg(self, image_data): - image = self._sess.run(self._decode_jpeg, - feed_dict={self._decode_jpeg_data: image_data}) - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _is_png(filename): - """Determine if a file contains a PNG format image. - - Args: - filename: string, path of the image file. - - Returns: - boolean indicating if the image is a PNG. - """ - return filename.endswith('.png') - - -def _process_image(filename, coder): - """Process a single image file. - - Args: - filename: string, path to an image file e.g., '/path/to/example.JPG'. - coder: instance of ImageCoder to provide TensorFlow image coding utils. - Returns: - image_buffer: string, JPEG encoding of RGB image. - height: integer, image height in pixels. - width: integer, image width in pixels. - """ - # Read the image file. - with tf.gfile.FastGFile(filename, 'rb') as f: - image_data = f.read() - - # Convert any PNG to JPEG's for consistency. - if _is_png(filename): - print('Converting PNG to JPEG for %s' % filename) - image_data = coder.png_to_jpeg(image_data) - - # Decode the RGB JPEG. - image = coder.decode_jpeg(image_data) - - # Check that image converted to RGB - assert len(image.shape) == 3 - height = image.shape[0] - width = image.shape[1] - assert image.shape[2] == 3 - - return image_data, height, width - - -def _process_image_files_batch(coder, thread_index, ranges, name, filenames, - texts, labels, num_shards): - """Processes and saves list of images as TFRecord in 1 thread. - - Args: - coder: instance of ImageCoder to provide TensorFlow image coding utils. - thread_index: integer, unique batch to run index is within [0, len(ranges)). - ranges: list of pairs of integers specifying ranges of each batches to - analyze in parallel. - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - texts: list of strings; each string is human readable, e.g. 'dog' - labels: list of integer; each integer identifies the ground truth - num_shards: integer number of shards for this data set. - """ - # Each thread produces N shards where N = int(num_shards / num_threads). - # For instance, if num_shards = 128, and the num_threads = 2, then the first - # thread would produce shards [0, 64). - num_threads = len(ranges) - assert not num_shards % num_threads - num_shards_per_batch = int(num_shards / num_threads) - - shard_ranges = np.linspace(ranges[thread_index][0], - ranges[thread_index][1], - num_shards_per_batch + 1).astype(int) - num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] - - counter = 0 - for s in range(num_shards_per_batch): - # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' - shard = thread_index * num_shards_per_batch + s - output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) - output_file = os.path.join(FLAGS.output_directory, output_filename) - writer = tf.python_io.TFRecordWriter(output_file) - - shard_counter = 0 - files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) - for i in files_in_shard: - filename = filenames[i] - label = labels[i] - text = texts[i] - - try: - image_buffer, height, width = _process_image(filename, coder) - except Exception as e: - print(e) - print('SKIPPED: Unexpected error while decoding %s.' % filename) - continue - - example = _convert_to_example(filename, image_buffer, label, - text, height, width) - writer.write(example.SerializeToString()) - shard_counter += 1 - counter += 1 - - if not counter % 1000: - print('%s [thread %d]: Processed %d of %d images in thread batch.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - writer.close() - print('%s [thread %d]: Wrote %d images to %s' % - (datetime.now(), thread_index, shard_counter, output_file)) - sys.stdout.flush() - shard_counter = 0 - print('%s [thread %d]: Wrote %d images to %d shards.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - -def _process_image_files(name, filenames, texts, labels, num_shards): - """Process and save list of images as TFRecord of Example protos. - - Args: - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - texts: list of strings; each string is human readable, e.g. 'dog' - labels: list of integer; each integer identifies the ground truth - num_shards: integer number of shards for this data set. - """ - assert len(filenames) == len(texts) - assert len(filenames) == len(labels) - - # Break all images into batches with a [ranges[i][0], ranges[i][1]]. - spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) - ranges = [] - for i in range(len(spacing) - 1): - ranges.append([spacing[i], spacing[i + 1]]) - - # Launch a thread for each batch. - print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) - sys.stdout.flush() - - # Create a mechanism for monitoring when all threads are finished. - coord = tf.train.Coordinator() - - # Create a generic TensorFlow-based utility for converting all image codings. - coder = ImageCoder() - - threads = [] - for thread_index in range(len(ranges)): - args = (coder, thread_index, ranges, name, filenames, - texts, labels, num_shards) - t = threading.Thread(target=_process_image_files_batch, args=args) - t.start() - threads.append(t) - - # Wait for all the threads to terminate. - coord.join(threads) - print('%s: Finished writing all %d images in data set.' % - (datetime.now(), len(filenames))) - sys.stdout.flush() - - -def _find_image_files(data_dir, labels_file): - """Build a list of all images files and labels in the data set. - - Args: - data_dir: string, path to the root directory of images. - - Assumes that the image data set resides in JPEG files located in - the following directory structure. - - data_dir/dog/another-image.JPEG - data_dir/dog/my-image.jpg - - where 'dog' is the label associated with these images. - - labels_file: string, path to the labels file. - - The list of valid labels are held in this file. Assumes that the file - contains entries as such: - dog - cat - flower - where each line corresponds to a label. We map each label contained in - the file to an integer starting with the integer 0 corresponding to the - label contained in the first line. - - Returns: - filenames: list of strings; each string is a path to an image file. - texts: list of strings; each string is the class, e.g. 'dog' - labels: list of integer; each integer identifies the ground truth. - """ - print('Determining list of input files and labels from %s.' % data_dir) - unique_labels = [l.strip() for l in tf.gfile.FastGFile( - labels_file, 'r').readlines()] - - labels = [] - filenames = [] - texts = [] - - # Leave label index 0 empty as a background class. - label_index = 1 - - # Construct the list of JPEG files and labels. - for text in unique_labels: - jpeg_file_path = '%s/%s/*' % (data_dir, text) - matching_files = tf.gfile.Glob(jpeg_file_path) - - labels.extend([label_index] * len(matching_files)) - texts.extend([text] * len(matching_files)) - filenames.extend(matching_files) - - if not label_index % 100: - print('Finished finding files in %d of %d classes.' % ( - label_index, len(labels))) - label_index += 1 - - # Shuffle the ordering of all image files in order to guarantee - # random ordering of the images with respect to label in the - # saved TFRecord files. Make the randomization repeatable. - shuffled_index = list(range(len(filenames))) - random.seed(12345) - random.shuffle(shuffled_index) - - filenames = [filenames[i] for i in shuffled_index] - texts = [texts[i] for i in shuffled_index] - labels = [labels[i] for i in shuffled_index] - - print('Found %d JPEG files across %d labels inside %s.' % - (len(filenames), len(unique_labels), data_dir)) - return filenames, texts, labels - - -def _process_dataset(name, directory, num_shards, labels_file): - """Process a complete data set and save it as a TFRecord. - - Args: - name: string, unique identifier specifying the data set. - directory: string, root path to the data set. - num_shards: integer number of shards for this data set. - labels_file: string, path to the labels file. - """ - filenames, texts, labels = _find_image_files(directory, labels_file) - _process_image_files(name, filenames, texts, labels, num_shards) - - -def main(unused_argv): - assert not FLAGS.train_shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards') - assert not FLAGS.validation_shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with ' - 'FLAGS.validation_shards') - print('Saving results to %s' % FLAGS.output_directory) - - # Run it! - _process_dataset('validation', FLAGS.validation_directory, - FLAGS.validation_shards, FLAGS.labels_file) - _process_dataset('train', FLAGS.train_directory, - FLAGS.train_shards, FLAGS.labels_file) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/inception/inception/data/build_imagenet_data.py b/research/inception/inception/data/build_imagenet_data.py deleted file mode 100644 index c054735e782297f990451e29ff4383af24bbe802..0000000000000000000000000000000000000000 --- a/research/inception/inception/data/build_imagenet_data.py +++ /dev/null @@ -1,707 +0,0 @@ -#!/usr/bin/python -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Converts ImageNet data to TFRecords file format with Example protos. - -The raw ImageNet data set is expected to reside in JPEG files located in the -following directory structure. - - data_dir/n01440764/ILSVRC2012_val_00000293.JPEG - data_dir/n01440764/ILSVRC2012_val_00000543.JPEG - ... - -where 'n01440764' is the unique synset label associated with -these images. - -The training data set consists of 1000 sub-directories (i.e. labels) -each containing 1200 JPEG images for a total of 1.2M JPEG images. - -The evaluation data set consists of 1000 sub-directories (i.e. labels) -each containing 50 JPEG images for a total of 50K JPEG images. - -This TensorFlow script converts the training and evaluation data into -a sharded data set consisting of 1024 and 128 TFRecord files, respectively. - - train_directory/train-00000-of-01024 - train_directory/train-00001-of-01024 - ... - train_directory/train-01023-of-01024 - -and - - validation_directory/validation-00000-of-00128 - validation_directory/validation-00001-of-00128 - ... - validation_directory/validation-00127-of-00128 - -Each validation TFRecord file contains ~390 records. Each training TFREcord -file contains ~1250 records. Each record within the TFRecord file is a -serialized Example proto. The Example proto contains the following fields: - - image/encoded: string containing JPEG encoded image in RGB colorspace - image/height: integer, image height in pixels - image/width: integer, image width in pixels - image/colorspace: string, specifying the colorspace, always 'RGB' - image/channels: integer, specifying the number of channels, always 3 - image/format: string, specifying the format, always 'JPEG' - - image/filename: string containing the basename of the image file - e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' - image/class/label: integer specifying the index in a classification layer. - The label ranges from [1, 1000] where 0 is not used. - image/class/synset: string specifying the unique ID of the label, - e.g. 'n01440764' - image/class/text: string specifying the human-readable version of the label - e.g. 'red fox, Vulpes vulpes' - - image/object/bbox/xmin: list of integers specifying the 0+ human annotated - bounding boxes - image/object/bbox/xmax: list of integers specifying the 0+ human annotated - bounding boxes - image/object/bbox/ymin: list of integers specifying the 0+ human annotated - bounding boxes - image/object/bbox/ymax: list of integers specifying the 0+ human annotated - bounding boxes - image/object/bbox/label: integer specifying the index in a classification - layer. The label ranges from [1, 1000] where 0 is not used. Note this is - always identical to the image label. - -Note that the length of xmin is identical to the length of xmax, ymin and ymax -for each example. - -Running this script using 16 threads may take around ~2.5 hours on an HP Z420. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import os -import random -import sys -import threading - -import numpy as np -import six -import tensorflow as tf - -tf.app.flags.DEFINE_string('train_directory', '/tmp/', - 'Training data directory') -tf.app.flags.DEFINE_string('validation_directory', '/tmp/', - 'Validation data directory') -tf.app.flags.DEFINE_string('output_directory', '/tmp/', - 'Output data directory') - -tf.app.flags.DEFINE_integer('train_shards', 1024, - 'Number of shards in training TFRecord files.') -tf.app.flags.DEFINE_integer('validation_shards', 128, - 'Number of shards in validation TFRecord files.') - -tf.app.flags.DEFINE_integer('num_threads', 8, - 'Number of threads to preprocess the images.') - -# The labels file contains a list of valid labels are held in this file. -# Assumes that the file contains entries as such: -# n01440764 -# n01443537 -# n01484850 -# where each line corresponds to a label expressed as a synset. We map -# each synset contained in the file to an integer (based on the alphabetical -# ordering). See below for details. -tf.app.flags.DEFINE_string('labels_file', - 'imagenet_lsvrc_2015_synsets.txt', - 'Labels file') - -# This file containing mapping from synset to human-readable label. -# Assumes each line of the file looks like: -# -# n02119247 black fox -# n02119359 silver fox -# n02119477 red fox, Vulpes fulva -# -# where each line corresponds to a unique mapping. Note that each line is -# formatted as \t. -tf.app.flags.DEFINE_string('imagenet_metadata_file', - 'imagenet_metadata.txt', - 'ImageNet metadata file') - -# This file is the output of process_bounding_box.py -# Assumes each line of the file looks like: -# -# n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940 -# -# where each line corresponds to one bounding box annotation associated -# with an image. Each line can be parsed as: -# -# , , , , -# -# Note that there might exist mulitple bounding box annotations associated -# with an image file. -tf.app.flags.DEFINE_string('bounding_box_file', - './imagenet_2012_bounding_boxes.csv', - 'Bounding box file') - -FLAGS = tf.app.flags.FLAGS - - -def _int64_feature(value): - """Wrapper for inserting int64 features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def _float_feature(value): - """Wrapper for inserting float features into Example proto.""" - if not isinstance(value, list): - value = [value] - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - -def _bytes_feature(value): - """Wrapper for inserting bytes features into Example proto.""" - if six.PY3 and isinstance(value, six.text_type): - value = six.binary_type(value, encoding='utf-8') - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def _convert_to_example(filename, image_buffer, label, synset, human, bbox, - height, width): - """Build an Example proto for an example. - - Args: - filename: string, path to an image file, e.g., '/path/to/example.JPG' - image_buffer: string, JPEG encoding of RGB image - label: integer, identifier for the ground truth for the network - synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' - human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' - bbox: list of bounding boxes; each box is a list of integers - specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to - the same label as the image label. - height: integer, image height in pixels - width: integer, image width in pixels - Returns: - Example proto - """ - xmin = [] - ymin = [] - xmax = [] - ymax = [] - for b in bbox: - assert len(b) == 4 - # pylint: disable=expression-not-assigned - [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)] - # pylint: enable=expression-not-assigned - - colorspace = 'RGB' - channels = 3 - image_format = 'JPEG' - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/height': _int64_feature(height), - 'image/width': _int64_feature(width), - 'image/colorspace': _bytes_feature(colorspace), - 'image/channels': _int64_feature(channels), - 'image/class/label': _int64_feature(label), - 'image/class/synset': _bytes_feature(synset), - 'image/class/text': _bytes_feature(human), - 'image/object/bbox/xmin': _float_feature(xmin), - 'image/object/bbox/xmax': _float_feature(xmax), - 'image/object/bbox/ymin': _float_feature(ymin), - 'image/object/bbox/ymax': _float_feature(ymax), - 'image/object/bbox/label': _int64_feature([label] * len(xmin)), - 'image/format': _bytes_feature(image_format), - 'image/filename': _bytes_feature(os.path.basename(filename)), - 'image/encoded': _bytes_feature(image_buffer)})) - return example - - -class ImageCoder(object): - """Helper class that provides TensorFlow image coding utilities.""" - - def __init__(self): - # Create a single Session to run all image coding calls. - self._sess = tf.Session() - - # Initializes function that converts PNG to JPEG data. - self._png_data = tf.placeholder(dtype=tf.string) - image = tf.image.decode_png(self._png_data, channels=3) - self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that converts CMYK JPEG data to RGB JPEG data. - self._cmyk_data = tf.placeholder(dtype=tf.string) - image = tf.image.decode_jpeg(self._cmyk_data, channels=0) - self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100) - - # Initializes function that decodes RGB JPEG data. - self._decode_jpeg_data = tf.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) - - def png_to_jpeg(self, image_data): - return self._sess.run(self._png_to_jpeg, - feed_dict={self._png_data: image_data}) - - def cmyk_to_rgb(self, image_data): - return self._sess.run(self._cmyk_to_rgb, - feed_dict={self._cmyk_data: image_data}) - - def decode_jpeg(self, image_data): - image = self._sess.run(self._decode_jpeg, - feed_dict={self._decode_jpeg_data: image_data}) - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _is_png(filename): - """Determine if a file contains a PNG format image. - - Args: - filename: string, path of the image file. - - Returns: - boolean indicating if the image is a PNG. - """ - # File list from: - # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU - return 'n02105855_2933.JPEG' in filename - - -def _is_cmyk(filename): - """Determine if file contains a CMYK JPEG format image. - - Args: - filename: string, path of the image file. - - Returns: - boolean indicating if the image is a JPEG encoded with CMYK color space. - """ - # File list from: - # https://github.com/cytsai/ilsvrc-cmyk-image-list - blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG', - 'n02447366_23489.JPEG', 'n02492035_15739.JPEG', - 'n02747177_10752.JPEG', 'n03018349_4028.JPEG', - 'n03062245_4620.JPEG', 'n03347037_9675.JPEG', - 'n03467068_12171.JPEG', 'n03529860_11437.JPEG', - 'n03544143_17228.JPEG', 'n03633091_5218.JPEG', - 'n03710637_5125.JPEG', 'n03961711_5286.JPEG', - 'n04033995_2932.JPEG', 'n04258138_17003.JPEG', - 'n04264628_27969.JPEG', 'n04336792_7448.JPEG', - 'n04371774_5854.JPEG', 'n04596742_4225.JPEG', - 'n07583066_647.JPEG', 'n13037406_4650.JPEG'] - return filename.split('/')[-1] in blacklist - - -def _process_image(filename, coder): - """Process a single image file. - - Args: - filename: string, path to an image file e.g., '/path/to/example.JPG'. - coder: instance of ImageCoder to provide TensorFlow image coding utils. - Returns: - image_buffer: string, JPEG encoding of RGB image. - height: integer, image height in pixels. - width: integer, image width in pixels. - """ - # Read the image file. - with tf.gfile.FastGFile(filename, 'rb') as f: - image_data = f.read() - - # Clean the dirty data. - if _is_png(filename): - # 1 image is a PNG. - print('Converting PNG to JPEG for %s' % filename) - image_data = coder.png_to_jpeg(image_data) - elif _is_cmyk(filename): - # 22 JPEG images are in CMYK colorspace. - print('Converting CMYK to RGB for %s' % filename) - image_data = coder.cmyk_to_rgb(image_data) - - # Decode the RGB JPEG. - image = coder.decode_jpeg(image_data) - - # Check that image converted to RGB - assert len(image.shape) == 3 - height = image.shape[0] - width = image.shape[1] - assert image.shape[2] == 3 - - return image_data, height, width - - -def _process_image_files_batch(coder, thread_index, ranges, name, filenames, - synsets, labels, humans, bboxes, num_shards): - """Processes and saves list of images as TFRecord in 1 thread. - - Args: - coder: instance of ImageCoder to provide TensorFlow image coding utils. - thread_index: integer, unique batch to run index is within [0, len(ranges)). - ranges: list of pairs of integers specifying ranges of each batches to - analyze in parallel. - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - synsets: list of strings; each string is a unique WordNet ID - labels: list of integer; each integer identifies the ground truth - humans: list of strings; each string is a human-readable label - bboxes: list of bounding boxes for each image. Note that each entry in this - list might contain from 0+ entries corresponding to the number of bounding - box annotations for the image. - num_shards: integer number of shards for this data set. - """ - # Each thread produces N shards where N = int(num_shards / num_threads). - # For instance, if num_shards = 128, and the num_threads = 2, then the first - # thread would produce shards [0, 64). - num_threads = len(ranges) - assert not num_shards % num_threads - num_shards_per_batch = int(num_shards / num_threads) - - shard_ranges = np.linspace(ranges[thread_index][0], - ranges[thread_index][1], - num_shards_per_batch + 1).astype(int) - num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] - - counter = 0 - for s in range(num_shards_per_batch): - # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' - shard = thread_index * num_shards_per_batch + s - output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) - output_file = os.path.join(FLAGS.output_directory, output_filename) - writer = tf.python_io.TFRecordWriter(output_file) - - shard_counter = 0 - files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) - for i in files_in_shard: - filename = filenames[i] - label = labels[i] - synset = synsets[i] - human = humans[i] - bbox = bboxes[i] - - image_buffer, height, width = _process_image(filename, coder) - - example = _convert_to_example(filename, image_buffer, label, - synset, human, bbox, - height, width) - writer.write(example.SerializeToString()) - shard_counter += 1 - counter += 1 - - if not counter % 1000: - print('%s [thread %d]: Processed %d of %d images in thread batch.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - writer.close() - print('%s [thread %d]: Wrote %d images to %s' % - (datetime.now(), thread_index, shard_counter, output_file)) - sys.stdout.flush() - shard_counter = 0 - print('%s [thread %d]: Wrote %d images to %d shards.' % - (datetime.now(), thread_index, counter, num_files_in_thread)) - sys.stdout.flush() - - -def _process_image_files(name, filenames, synsets, labels, humans, - bboxes, num_shards): - """Process and save list of images as TFRecord of Example protos. - - Args: - name: string, unique identifier specifying the data set - filenames: list of strings; each string is a path to an image file - synsets: list of strings; each string is a unique WordNet ID - labels: list of integer; each integer identifies the ground truth - humans: list of strings; each string is a human-readable label - bboxes: list of bounding boxes for each image. Note that each entry in this - list might contain from 0+ entries corresponding to the number of bounding - box annotations for the image. - num_shards: integer number of shards for this data set. - """ - assert len(filenames) == len(synsets) - assert len(filenames) == len(labels) - assert len(filenames) == len(humans) - assert len(filenames) == len(bboxes) - - # Break all images into batches with a [ranges[i][0], ranges[i][1]]. - spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) - ranges = [] - threads = [] - for i in range(len(spacing) - 1): - ranges.append([spacing[i], spacing[i + 1]]) - - # Launch a thread for each batch. - print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) - sys.stdout.flush() - - # Create a mechanism for monitoring when all threads are finished. - coord = tf.train.Coordinator() - - # Create a generic TensorFlow-based utility for converting all image codings. - coder = ImageCoder() - - threads = [] - for thread_index in range(len(ranges)): - args = (coder, thread_index, ranges, name, filenames, - synsets, labels, humans, bboxes, num_shards) - t = threading.Thread(target=_process_image_files_batch, args=args) - t.start() - threads.append(t) - - # Wait for all the threads to terminate. - coord.join(threads) - print('%s: Finished writing all %d images in data set.' % - (datetime.now(), len(filenames))) - sys.stdout.flush() - - -def _find_image_files(data_dir, labels_file): - """Build a list of all images files and labels in the data set. - - Args: - data_dir: string, path to the root directory of images. - - Assumes that the ImageNet data set resides in JPEG files located in - the following directory structure. - - data_dir/n01440764/ILSVRC2012_val_00000293.JPEG - data_dir/n01440764/ILSVRC2012_val_00000543.JPEG - - where 'n01440764' is the unique synset label associated with these images. - - labels_file: string, path to the labels file. - - The list of valid labels are held in this file. Assumes that the file - contains entries as such: - n01440764 - n01443537 - n01484850 - where each line corresponds to a label expressed as a synset. We map - each synset contained in the file to an integer (based on the alphabetical - ordering) starting with the integer 1 corresponding to the synset - contained in the first line. - - The reason we start the integer labels at 1 is to reserve label 0 as an - unused background class. - - Returns: - filenames: list of strings; each string is a path to an image file. - synsets: list of strings; each string is a unique WordNet ID. - labels: list of integer; each integer identifies the ground truth. - """ - print('Determining list of input files and labels from %s.' % data_dir) - challenge_synsets = [l.strip() for l in - tf.gfile.FastGFile(labels_file, 'r').readlines()] - - labels = [] - filenames = [] - synsets = [] - - # Leave label index 0 empty as a background class. - label_index = 1 - - # Construct the list of JPEG files and labels. - for synset in challenge_synsets: - jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) - matching_files = tf.gfile.Glob(jpeg_file_path) - - labels.extend([label_index] * len(matching_files)) - synsets.extend([synset] * len(matching_files)) - filenames.extend(matching_files) - - if not label_index % 100: - print('Finished finding files in %d of %d classes.' % ( - label_index, len(challenge_synsets))) - label_index += 1 - - # Shuffle the ordering of all image files in order to guarantee - # random ordering of the images with respect to label in the - # saved TFRecord files. Make the randomization repeatable. - shuffled_index = list(range(len(filenames))) - random.seed(12345) - random.shuffle(shuffled_index) - - filenames = [filenames[i] for i in shuffled_index] - synsets = [synsets[i] for i in shuffled_index] - labels = [labels[i] for i in shuffled_index] - - print('Found %d JPEG files across %d labels inside %s.' % - (len(filenames), len(challenge_synsets), data_dir)) - return filenames, synsets, labels - - -def _find_human_readable_labels(synsets, synset_to_human): - """Build a list of human-readable labels. - - Args: - synsets: list of strings; each string is a unique WordNet ID. - synset_to_human: dict of synset to human labels, e.g., - 'n02119022' --> 'red fox, Vulpes vulpes' - - Returns: - List of human-readable strings corresponding to each synset. - """ - humans = [] - for s in synsets: - assert s in synset_to_human, ('Failed to find: %s' % s) - humans.append(synset_to_human[s]) - return humans - - -def _find_image_bounding_boxes(filenames, image_to_bboxes): - """Find the bounding boxes for a given image file. - - Args: - filenames: list of strings; each string is a path to an image file. - image_to_bboxes: dictionary mapping image file names to a list of - bounding boxes. This list contains 0+ bounding boxes. - Returns: - List of bounding boxes for each image. Note that each entry in this - list might contain from 0+ entries corresponding to the number of bounding - box annotations for the image. - """ - num_image_bbox = 0 - bboxes = [] - for f in filenames: - basename = os.path.basename(f) - if basename in image_to_bboxes: - bboxes.append(image_to_bboxes[basename]) - num_image_bbox += 1 - else: - bboxes.append([]) - print('Found %d images with bboxes out of %d images' % ( - num_image_bbox, len(filenames))) - return bboxes - - -def _process_dataset(name, directory, num_shards, synset_to_human, - image_to_bboxes): - """Process a complete data set and save it as a TFRecord. - - Args: - name: string, unique identifier specifying the data set. - directory: string, root path to the data set. - num_shards: integer number of shards for this data set. - synset_to_human: dict of synset to human labels, e.g., - 'n02119022' --> 'red fox, Vulpes vulpes' - image_to_bboxes: dictionary mapping image file names to a list of - bounding boxes. This list contains 0+ bounding boxes. - """ - filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file) - humans = _find_human_readable_labels(synsets, synset_to_human) - bboxes = _find_image_bounding_boxes(filenames, image_to_bboxes) - _process_image_files(name, filenames, synsets, labels, - humans, bboxes, num_shards) - - -def _build_synset_lookup(imagenet_metadata_file): - """Build lookup for synset to human-readable label. - - Args: - imagenet_metadata_file: string, path to file containing mapping from - synset to human-readable label. - - Assumes each line of the file looks like: - - n02119247 black fox - n02119359 silver fox - n02119477 red fox, Vulpes fulva - - where each line corresponds to a unique mapping. Note that each line is - formatted as \t. - - Returns: - Dictionary of synset to human labels, such as: - 'n02119022' --> 'red fox, Vulpes vulpes' - """ - lines = tf.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() - synset_to_human = {} - for l in lines: - if l: - parts = l.strip().split('\t') - assert len(parts) == 2 - synset = parts[0] - human = parts[1] - synset_to_human[synset] = human - return synset_to_human - - -def _build_bounding_box_lookup(bounding_box_file): - """Build a lookup from image file to bounding boxes. - - Args: - bounding_box_file: string, path to file with bounding boxes annotations. - - Assumes each line of the file looks like: - - n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940 - - where each line corresponds to one bounding box annotation associated - with an image. Each line can be parsed as: - - , , , , - - Note that there might exist mulitple bounding box annotations associated - with an image file. This file is the output of process_bounding_boxes.py. - - Returns: - Dictionary mapping image file names to a list of bounding boxes. This list - contains 0+ bounding boxes. - """ - lines = tf.gfile.FastGFile(bounding_box_file, 'r').readlines() - images_to_bboxes = {} - num_bbox = 0 - num_image = 0 - for l in lines: - if l: - parts = l.split(',') - assert len(parts) == 5, ('Failed to parse: %s' % l) - filename = parts[0] - xmin = float(parts[1]) - ymin = float(parts[2]) - xmax = float(parts[3]) - ymax = float(parts[4]) - box = [xmin, ymin, xmax, ymax] - - if filename not in images_to_bboxes: - images_to_bboxes[filename] = [] - num_image += 1 - images_to_bboxes[filename].append(box) - num_bbox += 1 - - print('Successfully read %d bounding boxes ' - 'across %d images.' % (num_bbox, num_image)) - return images_to_bboxes - - -def main(unused_argv): - assert not FLAGS.train_shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards') - assert not FLAGS.validation_shards % FLAGS.num_threads, ( - 'Please make the FLAGS.num_threads commensurate with ' - 'FLAGS.validation_shards') - print('Saving results to %s' % FLAGS.output_directory) - - # Build a map from synset to human-readable label. - synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file) - image_to_bboxes = _build_bounding_box_lookup(FLAGS.bounding_box_file) - - # Run it! - _process_dataset('validation', FLAGS.validation_directory, - FLAGS.validation_shards, synset_to_human, image_to_bboxes) - _process_dataset('train', FLAGS.train_directory, FLAGS.train_shards, - synset_to_human, image_to_bboxes) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/inception/inception/data/download_and_preprocess_flowers.sh b/research/inception/inception/data/download_and_preprocess_flowers.sh deleted file mode 100755 index ee045c164e803ab38be69fb1933134e7f37f1793..0000000000000000000000000000000000000000 --- a/research/inception/inception/data/download_and_preprocess_flowers.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download and preprocess the flowers data set. This data set -# provides a demonstration for how to perform fine-tuning (i.e. tranfer -# learning) from one model to a new data set. -# -# This script provides a demonstration for how to prepare an arbitrary -# data set for training an Inception v3 model. -# -# We demonstrate this with the flowers data set which consists of images -# of labeled flower images from 5 classes: -# -# daisy, dandelion, roses, sunflowers, tulips -# -# The final output of this script are sharded TFRecord files containing -# serialized Example protocol buffers. See build_image_data.py for -# details of how the Example protocol buffer contains image data. -# -# usage: -# ./download_and_preprocess_flowers.sh [data-dir] -set -e - -if [ -z "$1" ]; then - echo "Usage: download_and_preprocess_flowers.sh [data dir]" - exit -fi - -# Create the output and temporary directories. -DATA_DIR="${1%/}" -SCRATCH_DIR="${DATA_DIR}/raw-data" -WORK_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -mkdir -p "${DATA_DIR}" -mkdir -p "${SCRATCH_DIR}" - -# Download the flowers data. -DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz" -CURRENT_DIR=$(pwd) -cd "${DATA_DIR}" -TARBALL="flower_photos.tgz" -if [ ! -f ${TARBALL} ]; then - echo "Downloading flower data set." - curl -o ${TARBALL} "${DATA_URL}" -else - echo "Skipping download of flower data." -fi - -# Note the locations of the train and validation data. -TRAIN_DIRECTORY="${SCRATCH_DIR}/train" -VALIDATION_DIRECTORY="${SCRATCH_DIR}/validation" - -# Expands the data into the flower_photos/ directory and rename it as the -# train directory. -tar xf flower_photos.tgz -rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}" -mv flower_photos "${TRAIN_DIRECTORY}" - -# Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips -LABELS_FILE="${SCRATCH_DIR}/labels.txt" -ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}" - -# Generate the validation data set. -while read LABEL; do - VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}/${LABEL}" - TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}/${LABEL}" - - # Move the first randomly selected 100 images to the validation set. - mkdir -p "${VALIDATION_DIR_FOR_LABEL}" - VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | shuf | head -100) - for IMAGE in ${VALIDATION_IMAGES}; do - mv -f "${TRAIN_DIRECTORY}/${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}" - done -done < "${LABELS_FILE}" - -# Build the TFRecords version of the image data. -cd "${CURRENT_DIR}" -BUILD_SCRIPT="${WORK_DIR}/build_image_data.py" -OUTPUT_DIRECTORY="${DATA_DIR}" -"${BUILD_SCRIPT}" \ - --train_directory="${TRAIN_DIRECTORY}" \ - --validation_directory="${VALIDATION_DIRECTORY}" \ - --output_directory="${OUTPUT_DIRECTORY}" \ - --labels_file="${LABELS_FILE}" diff --git a/research/inception/inception/data/download_and_preprocess_flowers_mac.sh b/research/inception/inception/data/download_and_preprocess_flowers_mac.sh deleted file mode 100644 index 154905635b19aeaaea087a8e76afda9b8c624d59..0000000000000000000000000000000000000000 --- a/research/inception/inception/data/download_and_preprocess_flowers_mac.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download and preprocess the flowers data set. This data set -# provides a demonstration for how to perform fine-tuning (i.e. tranfer -# learning) from one model to a new data set. -# -# This script provides a demonstration for how to prepare an arbitrary -# data set for training an Inception v3 model. -# -# We demonstrate this with the flowers data set which consists of images -# of labeled flower images from 5 classes: -# -# daisy, dandelion, roses, sunflowers, tulips -# -# The final output of this script are sharded TFRecord files containing -# serialized Example protocol buffers. See build_image_data.py for -# details of how the Example protocol buffer contains image data. -# -# usage: -# ./download_and_preprocess_flowers.sh [data-dir] -set -e - -if [ -z "$1" ]; then - echo "Usage: download_and_preprocess_flowers.sh [data dir]" - exit -fi - -# Create the output and temporary directories. -DATA_DIR="${1%/}" -SCRATCH_DIR="${DATA_DIR}/raw-data/" -mkdir -p "${DATA_DIR}" -mkdir -p "${SCRATCH_DIR}" -WORK_DIR="$0.runfiles/inception/inception" - -# Download the flowers data. -DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz" -CURRENT_DIR=$(pwd) -cd "${DATA_DIR}" -TARBALL="flower_photos.tgz" -if [ ! -f ${TARBALL} ]; then - echo "Downloading flower data set." - curl -o ${TARBALL} "${DATA_URL}" -else - echo "Skipping download of flower data." -fi - -# Note the locations of the train and validation data. -TRAIN_DIRECTORY="${SCRATCH_DIR}train/" -VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" - -# Expands the data into the flower_photos/ directory and rename it as the -# train directory. -tar xf flower_photos.tgz -rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}" -mv flower_photos "${TRAIN_DIRECTORY}" - -# Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips -LABELS_FILE="${SCRATCH_DIR}/labels.txt" -ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}" - -# Generate the validation data set. -while read LABEL; do - VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}${LABEL}" - TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}${LABEL}" - - # Move the first randomly selected 100 images to the validation set. - mkdir -p "${VALIDATION_DIR_FOR_LABEL}" - VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | gshuf | head -100) - for IMAGE in ${VALIDATION_IMAGES}; do - mv -f "${TRAIN_DIRECTORY}${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}" - done -done < "${LABELS_FILE}" - -# Build the TFRecords version of the image data. -cd "${CURRENT_DIR}" -BUILD_SCRIPT="${WORK_DIR}/build_image_data" -OUTPUT_DIRECTORY="${DATA_DIR}" -"${BUILD_SCRIPT}" \ - --train_directory="${TRAIN_DIRECTORY}" \ - --validation_directory="${VALIDATION_DIRECTORY}" \ - --output_directory="${OUTPUT_DIRECTORY}" \ - --labels_file="${LABELS_FILE}" diff --git a/research/inception/inception/data/download_and_preprocess_imagenet.sh b/research/inception/inception/data/download_and_preprocess_imagenet.sh deleted file mode 100755 index 6faae831075d4f6bfdc8bf8797219f7a0e4c1797..0000000000000000000000000000000000000000 --- a/research/inception/inception/data/download_and_preprocess_imagenet.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download and preprocess ImageNet Challenge 2012 -# training and validation data set. -# -# The final output of this script are sharded TFRecord files containing -# serialized Example protocol buffers. See build_imagenet_data.py for -# details of how the Example protocol buffers contain the ImageNet data. -# -# The final output of this script appears as such: -# -# data_dir/train-00000-of-01024 -# data_dir/train-00001-of-01024 -# ... -# data_dir/train-01023-of-01024 -# -# and -# -# data_dir/validation-00000-of-00128 -# data_dir/validation-00001-of-00128 -# ... -# data_dir/validation-00127-of-00128 -# -# Note that this script may take several hours to run to completion. The -# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending -# on the speed of your machine. Please be patient. -# -# **IMPORTANT** -# To download the raw images, the user must create an account with image-net.org -# and generate a username and access_key. The latter two are required for -# downloading the raw images. -# -# usage: -# ./download_and_preprocess_imagenet.sh [data-dir] -set -e - -if [ -z "$1" ]; then - echo "Usage: download_and_preprocess_imagenet.sh [data dir]" - exit -fi - -# Create the output and temporary directories. -DATA_DIR="${1%/}" -SCRATCH_DIR="${DATA_DIR}/raw-data/" -mkdir -p "${DATA_DIR}" -mkdir -p "${SCRATCH_DIR}" -WORK_DIR="$0.runfiles/inception/inception" - -# Download the ImageNet data. -LABELS_FILE="${WORK_DIR}/data/imagenet_lsvrc_2015_synsets.txt" -DOWNLOAD_SCRIPT="${WORK_DIR}/data/download_imagenet.sh" -"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" - -# Note the locations of the train and validation data. -TRAIN_DIRECTORY="${SCRATCH_DIR}train/" -VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" - -# Preprocess the validation data by moving the images into the appropriate -# sub-directory based on the label (synset) of the image. -echo "Organizing the validation data into sub-directories." -PREPROCESS_VAL_SCRIPT="${WORK_DIR}/data/preprocess_imagenet_validation_data.py" -VAL_LABELS_FILE="${WORK_DIR}/data/imagenet_2012_validation_synset_labels.txt" - -"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" - -# Convert the XML files for bounding box annotations into a single CSV. -echo "Extracting bounding box information from XML." -BOUNDING_BOX_SCRIPT="${WORK_DIR}/data/process_bounding_boxes.py" -BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" -BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" - -"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ - | sort > "${BOUNDING_BOX_FILE}" -echo "Finished downloading and preprocessing the ImageNet data." - -# Build the TFRecords version of the ImageNet data. -BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" -OUTPUT_DIRECTORY="${DATA_DIR}" -IMAGENET_METADATA_FILE="${WORK_DIR}/data/imagenet_metadata.txt" - -"${BUILD_SCRIPT}" \ - --train_directory="${TRAIN_DIRECTORY}" \ - --validation_directory="${VALIDATION_DIRECTORY}" \ - --output_directory="${OUTPUT_DIRECTORY}" \ - --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ - --labels_file="${LABELS_FILE}" \ - --bounding_box_file="${BOUNDING_BOX_FILE}" diff --git a/research/inception/inception/data/download_imagenet.sh b/research/inception/inception/data/download_imagenet.sh deleted file mode 100755 index f6c77781c0bcaad642ec7a38a7ba00693ef8ef83..0000000000000000000000000000000000000000 --- a/research/inception/inception/data/download_imagenet.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Script to download ImageNet Challenge 2012 training and validation data set. -# -# Downloads and decompresses raw images and bounding boxes. -# -# **IMPORTANT** -# To download the raw images, the user must create an account with image-net.org -# and generate a username and access_key. The latter two are required for -# downloading the raw images. -# -# usage: -# ./download_imagenet.sh [dir name] [synsets file] -set -e - -if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then - cat < ') - sys.exit(-1) - data_dir = sys.argv[1] - validation_labels_file = sys.argv[2] - - # Read in the 50000 synsets associated with the validation data set. - labels = [l.strip() for l in open(validation_labels_file).readlines()] - unique_labels = set(labels) - - # Make all sub-directories in the validation data dir. - for label in unique_labels: - labeled_data_dir = os.path.join(data_dir, label) - # Catch error if sub-directory exists - try: - os.makedirs(labeled_data_dir) - except OSError as e: - # Raise all errors but 'EEXIST' - if e.errno != errno.EEXIST: - raise - - # Move all of the image to the appropriate sub-directory. - for i in range(len(labels)): - basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1) - original_filename = os.path.join(data_dir, basename) - if not os.path.exists(original_filename): - print('Failed to find: %s' % original_filename) - sys.exit(-1) - new_filename = os.path.join(data_dir, labels[i], basename) - os.rename(original_filename, new_filename) diff --git a/research/inception/inception/data/process_bounding_boxes.py b/research/inception/inception/data/process_bounding_boxes.py deleted file mode 100755 index 5e9fd786e40b6d95b89fcc9f9774aa7f132c1a6f..0000000000000000000000000000000000000000 --- a/research/inception/inception/data/process_bounding_boxes.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/python -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Process the ImageNet Challenge bounding boxes for TensorFlow model training. - -This script is called as - -process_bounding_boxes.py
[synsets-file] - -Where is a directory containing the downloaded and unpacked bounding box -data. If [synsets-file] is supplied, then only the bounding boxes whose -synstes are contained within this file are returned. Note that the -[synsets-file] file contains synset ids, one per line. - -The script dumps out a CSV text file in which each line contains an entry. - n00007846_64193.JPEG,0.0060,0.2620,0.7545,0.9940 - -The entry can be read as: - , , , , - -The bounding box for contains two points (xmin, ymin) and -(xmax, ymax) specifying the lower-left corner and upper-right corner of a -bounding box in *relative* coordinates. - -The user supplies a directory where the XML files reside. The directory -structure in the directory is assumed to look like this: - -/nXXXXXXXX/nXXXXXXXX_YYYY.xml - -Each XML file contains a bounding box annotation. The script: - - (1) Parses the XML file and extracts the filename, label and bounding box info. - - (2) The bounding box is specified in the XML files as integer (xmin, ymin) and - (xmax, ymax) *relative* to image size displayed to the human annotator. The - size of the image displayed to the human annotator is stored in the XML file - as integer (height, width). - - Note that the displayed size will differ from the actual size of the image - downloaded from image-net.org. To make the bounding box annotation useable, - we convert bounding box to floating point numbers relative to displayed - height and width of the image. - - Note that each XML file might contain N bounding box annotations. - - Note that the points are all clamped at a range of [0.0, 1.0] because some - human annotations extend outside the range of the supplied image. - - See details here: http://image-net.org/download-bboxes - -(3) By default, the script outputs all valid bounding boxes. If a - [synsets-file] is supplied, only the subset of bounding boxes associated - with those synsets are outputted. Importantly, one can supply a list of - synsets in the ImageNet Challenge and output the list of bounding boxes - associated with the training images of the ILSVRC. - - We use these bounding boxes to inform the random distortion of images - supplied to the network. - -If you run this script successfully, you will see the following output -to stderr: -> Finished processing 544546 XML files. -> Skipped 0 XML files not in ImageNet Challenge. -> Skipped 0 bounding boxes not in ImageNet Challenge. -> Wrote 615299 bounding boxes from 544546 annotated images. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import glob -import os.path -import sys -import xml.etree.ElementTree as ET - - -class BoundingBox(object): - pass - - -def GetItem(name, root, index=0): - count = 0 - for item in root.iter(name): - if count == index: - return item.text - count += 1 - # Failed to find "index" occurrence of item. - return -1 - - -def GetInt(name, root, index=0): - # In some XML annotation files, the point values are not integers, but floats. - # So we add a float function to avoid ValueError. - return int(float(GetItem(name, root, index))) - - -def FindNumberBoundingBoxes(root): - index = 0 - while True: - if GetInt('xmin', root, index) == -1: - break - index += 1 - return index - - -def ProcessXMLAnnotation(xml_file): - """Process a single XML file containing a bounding box.""" - # pylint: disable=broad-except - try: - tree = ET.parse(xml_file) - except Exception: - print('Failed to parse: ' + xml_file, file=sys.stderr) - return None - # pylint: enable=broad-except - root = tree.getroot() - - num_boxes = FindNumberBoundingBoxes(root) - boxes = [] - - for index in range(num_boxes): - box = BoundingBox() - # Grab the 'index' annotation. - box.xmin = GetInt('xmin', root, index) - box.ymin = GetInt('ymin', root, index) - box.xmax = GetInt('xmax', root, index) - box.ymax = GetInt('ymax', root, index) - - box.width = GetInt('width', root) - box.height = GetInt('height', root) - box.filename = GetItem('filename', root) + '.JPEG' - box.label = GetItem('name', root) - - xmin = float(box.xmin) / float(box.width) - xmax = float(box.xmax) / float(box.width) - ymin = float(box.ymin) / float(box.height) - ymax = float(box.ymax) / float(box.height) - - # Some images contain bounding box annotations that - # extend outside of the supplied image. See, e.g. - # n03127925/n03127925_147.xml - # Additionally, for some bounding boxes, the min > max - # or the box is entirely outside of the image. - min_x = min(xmin, xmax) - max_x = max(xmin, xmax) - box.xmin_scaled = min(max(min_x, 0.0), 1.0) - box.xmax_scaled = min(max(max_x, 0.0), 1.0) - - min_y = min(ymin, ymax) - max_y = max(ymin, ymax) - box.ymin_scaled = min(max(min_y, 0.0), 1.0) - box.ymax_scaled = min(max(max_y, 0.0), 1.0) - - boxes.append(box) - - return boxes - -if __name__ == '__main__': - if len(sys.argv) < 2 or len(sys.argv) > 3: - print('Invalid usage\n' - 'usage: process_bounding_boxes.py [synsets-file]', - file=sys.stderr) - sys.exit(-1) - - xml_files = glob.glob(sys.argv[1] + '/*/*.xml') - print('Identified %d XML files in %s' % (len(xml_files), sys.argv[1]), - file=sys.stderr) - - if len(sys.argv) == 3: - labels = set([l.strip() for l in open(sys.argv[2]).readlines()]) - print('Identified %d synset IDs in %s' % (len(labels), sys.argv[2]), - file=sys.stderr) - else: - labels = None - - skipped_boxes = 0 - skipped_files = 0 - saved_boxes = 0 - saved_files = 0 - for file_index, one_file in enumerate(xml_files): - # Example: <...>/n06470073/n00141669_6790.xml - label = os.path.basename(os.path.dirname(one_file)) - - # Determine if the annotation is from an ImageNet Challenge label. - if labels is not None and label not in labels: - skipped_files += 1 - continue - - bboxes = ProcessXMLAnnotation(one_file) - assert bboxes is not None, 'No bounding boxes found in ' + one_file - - found_box = False - for bbox in bboxes: - if labels is not None: - if bbox.label != label: - # Note: There is a slight bug in the bounding box annotation data. - # Many of the dog labels have the human label 'Scottish_deerhound' - # instead of the synset ID 'n02092002' in the bbox.label field. As a - # simple hack to overcome this issue, we only exclude bbox labels - # *which are synset ID's* that do not match original synset label for - # the XML file. - if bbox.label in labels: - skipped_boxes += 1 - continue - - # Guard against improperly specified boxes. - if (bbox.xmin_scaled >= bbox.xmax_scaled or - bbox.ymin_scaled >= bbox.ymax_scaled): - skipped_boxes += 1 - continue - - # Note bbox.filename occasionally contains '%s' in the name. This is - # data set noise that is fixed by just using the basename of the XML file. - image_filename = os.path.splitext(os.path.basename(one_file))[0] - print('%s.JPEG,%.4f,%.4f,%.4f,%.4f' % - (image_filename, - bbox.xmin_scaled, bbox.ymin_scaled, - bbox.xmax_scaled, bbox.ymax_scaled)) - - saved_boxes += 1 - found_box = True - if found_box: - saved_files += 1 - else: - skipped_files += 1 - - if not file_index % 5000: - print('--> processed %d of %d XML files.' % - (file_index + 1, len(xml_files)), - file=sys.stderr) - print('--> skipped %d boxes and %d XML files.' % - (skipped_boxes, skipped_files), file=sys.stderr) - - print('Finished processing %d XML files.' % len(xml_files), file=sys.stderr) - print('Skipped %d XML files not in ImageNet Challenge.' % skipped_files, - file=sys.stderr) - print('Skipped %d bounding boxes not in ImageNet Challenge.' % skipped_boxes, - file=sys.stderr) - print('Wrote %d bounding boxes from %d annotated images.' % - (saved_boxes, saved_files), - file=sys.stderr) - print('Finished.', file=sys.stderr) diff --git a/research/inception/inception/dataset.py b/research/inception/inception/dataset.py deleted file mode 100644 index 752c97e03b0361975d64b72892cc94333e353dfb..0000000000000000000000000000000000000000 --- a/research/inception/inception/dataset.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Small library that points to a data set. - -Methods of Data class: - data_files: Returns a python list of all (sharded) data set files. - num_examples_per_epoch: Returns the number of examples in the data set. - num_classes: Returns the number of classes in the data set. - reader: Return a reader for a single entry from the data set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from abc import ABCMeta -from abc import abstractmethod -import os - - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - -# Basic model parameters. -tf.app.flags.DEFINE_string('data_dir', '/tmp/mydata', - """Path to the processed data, i.e. """ - """TFRecord of Example protos.""") - - -class Dataset(object): - """A simple class for handling data sets.""" - __metaclass__ = ABCMeta - - def __init__(self, name, subset): - """Initialize dataset using a subset and the path to the data.""" - assert subset in self.available_subsets(), self.available_subsets() - self.name = name - self.subset = subset - - @abstractmethod - def num_classes(self): - """Returns the number of classes in the data set.""" - pass - # return 10 - - @abstractmethod - def num_examples_per_epoch(self): - """Returns the number of examples in the data subset.""" - pass - # if self.subset == 'train': - # return 10000 - # if self.subset == 'validation': - # return 1000 - - @abstractmethod - def download_message(self): - """Prints a download message for the Dataset.""" - pass - - def available_subsets(self): - """Returns the list of available subsets.""" - return ['train', 'validation'] - - def data_files(self): - """Returns a python list of all (sharded) data subset files. - - Returns: - python list of all (sharded) data set files. - Raises: - ValueError: if there are not data_files matching the subset. - """ - tf_record_pattern = os.path.join(FLAGS.data_dir, '%s-*' % self.subset) - data_files = tf.gfile.Glob(tf_record_pattern) - if not data_files: - print('No files found for dataset %s/%s at %s' % (self.name, - self.subset, - FLAGS.data_dir)) - - self.download_message() - exit(-1) - return data_files - - def reader(self): - """Return a reader for a single entry from the data set. - - See io_ops.py for details of Reader class. - - Returns: - Reader object that reads the data set. - """ - return tf.TFRecordReader() diff --git a/research/inception/inception/flowers_data.py b/research/inception/inception/flowers_data.py deleted file mode 100644 index 022b5234deef035a6150a54ed74445b510f1b148..0000000000000000000000000000000000000000 --- a/research/inception/inception/flowers_data.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Small library that points to the flowers data set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - - -from inception.dataset import Dataset - - -class FlowersData(Dataset): - """Flowers data set.""" - - def __init__(self, subset): - super(FlowersData, self).__init__('Flowers', subset) - - def num_classes(self): - """Returns the number of classes in the data set.""" - return 5 - - def num_examples_per_epoch(self): - """Returns the number of examples in the data subset.""" - if self.subset == 'train': - return 3170 - if self.subset == 'validation': - return 500 - - def download_message(self): - """Instruction to download and extract the tarball from Flowers website.""" - - print('Failed to find any Flowers %s files'% self.subset) - print('') - print('If you have already downloaded and processed the data, then make ' - 'sure to set --data_dir to point to the directory containing the ' - 'location of the sharded TFRecords.\n') - print('Please see README.md for instructions on how to build ' - 'the flowers dataset using download_and_preprocess_flowers.\n') diff --git a/research/inception/inception/flowers_eval.py b/research/inception/inception/flowers_eval.py deleted file mode 100644 index ae3e9dc14c8dc83368aa83f523ade92e12113554..0000000000000000000000000000000000000000 --- a/research/inception/inception/flowers_eval.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A binary to evaluate Inception on the flowers data set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from inception import inception_eval -from inception.flowers_data import FlowersData - -FLAGS = tf.app.flags.FLAGS - - -def main(unused_argv=None): - dataset = FlowersData(subset=FLAGS.subset) - assert dataset.data_files() - if tf.gfile.Exists(FLAGS.eval_dir): - tf.gfile.DeleteRecursively(FLAGS.eval_dir) - tf.gfile.MakeDirs(FLAGS.eval_dir) - inception_eval.evaluate(dataset) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/inception/inception/flowers_train.py b/research/inception/inception/flowers_train.py deleted file mode 100644 index 1f044a539d48ef6ce011831210b4bc31eba278f3..0000000000000000000000000000000000000000 --- a/research/inception/inception/flowers_train.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A binary to train Inception on the flowers data set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - - -import tensorflow as tf - -from inception import inception_train -from inception.flowers_data import FlowersData - -FLAGS = tf.app.flags.FLAGS - - -def main(_): - dataset = FlowersData(subset=FLAGS.subset) - assert dataset.data_files() - if tf.gfile.Exists(FLAGS.train_dir): - tf.gfile.DeleteRecursively(FLAGS.train_dir) - tf.gfile.MakeDirs(FLAGS.train_dir) - inception_train.train(dataset) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/inception/inception/image_processing.py b/research/inception/inception/image_processing.py deleted file mode 100644 index fe74f1b3c9958060b15f52df80b11606c7ccf343..0000000000000000000000000000000000000000 --- a/research/inception/inception/image_processing.py +++ /dev/null @@ -1,513 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Read and preprocess image data. - - Image processing occurs on a single image at a time. Image are read and - preprocessed in parallel across multiple threads. The resulting images - are concatenated together to form a single batch for training or evaluation. - - -- Provide processed image data for a network: - inputs: Construct batches of evaluation examples of images. - distorted_inputs: Construct batches of training examples of images. - batch_inputs: Construct batches of training or evaluation examples of images. - - -- Data processing: - parse_example_proto: Parses an Example proto containing a training example - of an image. - - -- Image decoding: - decode_jpeg: Decode a JPEG encoded string into a 3-D float32 Tensor. - - -- Image preprocessing: - image_preprocessing: Decode and preprocess one image for evaluation or training - distort_image: Distort one image for training a network. - eval_image: Prepare one image for evaluation. - distort_color: Distort the color in one image for training. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_integer('batch_size', 32, - """Number of images to process in a batch.""") -tf.app.flags.DEFINE_integer('image_size', 299, - """Provide square images of this size.""") -tf.app.flags.DEFINE_integer('num_preprocess_threads', 4, - """Number of preprocessing threads per tower. """ - """Please make this a multiple of 4.""") -tf.app.flags.DEFINE_integer('num_readers', 4, - """Number of parallel readers during train.""") - -# Images are preprocessed asynchronously using multiple threads specified by -# --num_preprocss_threads and the resulting processed images are stored in a -# random shuffling queue. The shuffling queue dequeues --batch_size images -# for processing on a given Inception tower. A larger shuffling queue guarantees -# better mixing across examples within a batch and results in slightly higher -# predictive performance in a trained model. Empirically, -# --input_queue_memory_factor=16 works well. A value of 16 implies a queue size -# of 1024*16 images. Assuming RGB 299x299 images, this implies a queue size of -# 16GB. If the machine is memory limited, then decrease this factor to -# decrease the CPU memory footprint, accordingly. -tf.app.flags.DEFINE_integer('input_queue_memory_factor', 16, - """Size of the queue of preprocessed images. """ - """Default is ideal but try smaller values, e.g. """ - """4, 2 or 1, if host memory is constrained. See """ - """comments in code for more details.""") - - -def inputs(dataset, batch_size=None, num_preprocess_threads=None): - """Generate batches of ImageNet images for evaluation. - - Use this function as the inputs for evaluating a network. - - Note that some (minimal) image preprocessing occurs during evaluation - including central cropping and resizing of the image to fit the network. - - Args: - dataset: instance of Dataset class specifying the dataset. - batch_size: integer, number of examples in batch - num_preprocess_threads: integer, total number of preprocessing threads but - None defaults to FLAGS.num_preprocess_threads. - - Returns: - images: Images. 4D tensor of size [batch_size, FLAGS.image_size, - image_size, 3]. - labels: 1-D integer Tensor of [FLAGS.batch_size]. - """ - if not batch_size: - batch_size = FLAGS.batch_size - - # Force all input processing onto CPU in order to reserve the GPU for - # the forward inference and back-propagation. - with tf.device('/cpu:0'): - images, labels = batch_inputs( - dataset, batch_size, train=False, - num_preprocess_threads=num_preprocess_threads, - num_readers=1) - - return images, labels - - -def distorted_inputs(dataset, batch_size=None, num_preprocess_threads=None): - """Generate batches of distorted versions of ImageNet images. - - Use this function as the inputs for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - dataset: instance of Dataset class specifying the dataset. - batch_size: integer, number of examples in batch - num_preprocess_threads: integer, total number of preprocessing threads but - None defaults to FLAGS.num_preprocess_threads. - - Returns: - images: Images. 4D tensor of size [batch_size, FLAGS.image_size, - FLAGS.image_size, 3]. - labels: 1-D integer Tensor of [batch_size]. - """ - if not batch_size: - batch_size = FLAGS.batch_size - - # Force all input processing onto CPU in order to reserve the GPU for - # the forward inference and back-propagation. - with tf.device('/cpu:0'): - images, labels = batch_inputs( - dataset, batch_size, train=True, - num_preprocess_threads=num_preprocess_threads, - num_readers=FLAGS.num_readers) - return images, labels - - -def decode_jpeg(image_buffer, scope=None): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - with tf.name_scope(values=[image_buffer], name=scope, - default_name='decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3) - - # After this point, all image pixels reside in [0,1) - # until the very end, when they're rescaled to (-1, 1). The various - # adjust_* ops all require this range for dtype float. - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - return image - - -def distort_color(image, thread_id=0, scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather than adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: Tensor containing single image. - thread_id: preprocessing thread ID. - scope: Optional scope for name_scope. - Returns: - color-distorted image - """ - with tf.name_scope(values=[image], name=scope, default_name='distort_color'): - color_ordering = thread_id % 2 - - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image - - -def distort_image(image, height, width, bbox, thread_id=0, scope=None): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image: 3-D float Tensor of image - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - thread_id: integer indicating the preprocessing thread. - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of distorted image used for training. - """ - with tf.name_scope(values=[image, height, width, bbox], name=scope, - default_name='distort_image'): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # Display the bounding box in the first thread only. - if not thread_id: - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - tf.summary.image('image_with_bounding_boxes', image_with_box) - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an allowed - # range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=0.1, - aspect_ratio_range=[0.75, 1.33], - area_range=[0.05, 1.0], - max_attempts=100, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - if not thread_id: - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distort_bbox) - tf.summary.image('images_with_distorted_bounding_box', - image_with_distorted_box) - - # Crop the image to the specified bounding box. - distorted_image = tf.slice(image, bbox_begin, bbox_size) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - resize_method = thread_id % 4 - distorted_image = tf.image.resize_images(distorted_image, [height, width], - method=resize_method) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([height, width, 3]) - if not thread_id: - tf.summary.image('cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Randomly distort the colors. - distorted_image = distort_color(distorted_image, thread_id) - - if not thread_id: - tf.summary.image('final_distorted_image', - tf.expand_dims(distorted_image, 0)) - return distorted_image - - -def eval_image(image, height, width, scope=None): - """Prepare one image for evaluation. - - Args: - image: 3-D float Tensor - height: integer - width: integer - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of prepared image. - """ - with tf.name_scope(values=[image, height, width], name=scope, - default_name='eval_image'): - # Crop the central region of the image with an area containing 87.5% of - # the original image. - image = tf.image.central_crop(image, central_fraction=0.875) - - # Resize the image to the original height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - return image - - -def image_preprocessing(image_buffer, bbox, train, thread_id=0): - """Decode and preprocess one image for evaluation or training. - - Args: - image_buffer: JPEG encoded string Tensor - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - train: boolean - thread_id: integer indicating preprocessing thread - - Returns: - 3-D float Tensor containing an appropriately scaled image - - Raises: - ValueError: if user does not provide bounding box - """ - if bbox is None: - raise ValueError('Please supply a bounding box.') - - image = decode_jpeg(image_buffer) - height = FLAGS.image_size - width = FLAGS.image_size - - if train: - image = distort_image(image, height, width, bbox, thread_id) - else: - image = eval_image(image, height, width) - - # Finally, rescale to [-1,1] instead of [0, 1) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image - - -def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat(axis=0, values=[ymin, xmin, ymax, xmax]) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] - - -def batch_inputs(dataset, batch_size, train, num_preprocess_threads=None, - num_readers=1): - """Contruct batches of training or evaluation examples from the image dataset. - - Args: - dataset: instance of Dataset class specifying the dataset. - See dataset.py for details. - batch_size: integer - train: boolean - num_preprocess_threads: integer, total number of preprocessing threads - num_readers: integer, number of parallel readers - - Returns: - images: 4-D float Tensor of a batch of images - labels: 1-D integer Tensor of [batch_size]. - - Raises: - ValueError: if data is not found - """ - with tf.name_scope('batch_processing'): - data_files = dataset.data_files() - if data_files is None: - raise ValueError('No data files found for this dataset') - - # Create filename_queue - if train: - filename_queue = tf.train.string_input_producer(data_files, - shuffle=True, - capacity=16) - else: - filename_queue = tf.train.string_input_producer(data_files, - shuffle=False, - capacity=1) - if num_preprocess_threads is None: - num_preprocess_threads = FLAGS.num_preprocess_threads - - if num_preprocess_threads % 4: - raise ValueError('Please make num_preprocess_threads a multiple ' - 'of 4 (%d % 4 != 0).', num_preprocess_threads) - - if num_readers is None: - num_readers = FLAGS.num_readers - - if num_readers < 1: - raise ValueError('Please make num_readers at least 1') - - # Approximate number of examples per shard. - examples_per_shard = 1024 - # Size the random shuffle queue to balance between good global - # mixing (more examples) and memory use (fewer examples). - # 1 image uses 299*299*3*4 bytes = 1MB - # The default input_queue_memory_factor is 16 implying a shuffling queue - # size: examples_per_shard * 16 * 1MB = 17.6GB - min_queue_examples = examples_per_shard * FLAGS.input_queue_memory_factor - if train: - examples_queue = tf.RandomShuffleQueue( - capacity=min_queue_examples + 3 * batch_size, - min_after_dequeue=min_queue_examples, - dtypes=[tf.string]) - else: - examples_queue = tf.FIFOQueue( - capacity=examples_per_shard + 3 * batch_size, - dtypes=[tf.string]) - - # Create multiple readers to populate the queue of examples. - if num_readers > 1: - enqueue_ops = [] - for _ in range(num_readers): - reader = dataset.reader() - _, value = reader.read(filename_queue) - enqueue_ops.append(examples_queue.enqueue([value])) - - tf.train.queue_runner.add_queue_runner( - tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) - example_serialized = examples_queue.dequeue() - else: - reader = dataset.reader() - _, example_serialized = reader.read(filename_queue) - - images_and_labels = [] - for thread_id in range(num_preprocess_threads): - # Parse a serialized Example proto to extract the image and metadata. - image_buffer, label_index, bbox, _ = parse_example_proto( - example_serialized) - image = image_preprocessing(image_buffer, bbox, train, thread_id) - images_and_labels.append([image, label_index]) - - images, label_index_batch = tf.train.batch_join( - images_and_labels, - batch_size=batch_size, - capacity=2 * num_preprocess_threads * batch_size) - - # Reshape images into these desired dimensions. - height = FLAGS.image_size - width = FLAGS.image_size - depth = 3 - - images = tf.cast(images, tf.float32) - images = tf.reshape(images, shape=[batch_size, height, width, depth]) - - # Display the training images in the visualizer. - tf.summary.image('images', images) - - return images, tf.reshape(label_index_batch, [batch_size]) diff --git a/research/inception/inception/imagenet_data.py b/research/inception/inception/imagenet_data.py deleted file mode 100644 index 0a6d22e1292632f0899355d5aa7183c3f5f33b2c..0000000000000000000000000000000000000000 --- a/research/inception/inception/imagenet_data.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Small library that points to the ImageNet data set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - - -from inception.dataset import Dataset - - -class ImagenetData(Dataset): - """ImageNet data set.""" - - def __init__(self, subset): - super(ImagenetData, self).__init__('ImageNet', subset) - - def num_classes(self): - """Returns the number of classes in the data set.""" - return 1000 - - def num_examples_per_epoch(self): - """Returns the number of examples in the data set.""" - # Bounding box data consists of 615299 bounding boxes for 544546 images. - if self.subset == 'train': - return 1281167 - if self.subset == 'validation': - return 50000 - - def download_message(self): - """Instruction to download and extract the tarball from Flowers website.""" - - print('Failed to find any ImageNet %s files'% self.subset) - print('') - print('If you have already downloaded and processed the data, then make ' - 'sure to set --data_dir to point to the directory containing the ' - 'location of the sharded TFRecords.\n') - print('If you have not downloaded and prepared the ImageNet data in the ' - 'TFRecord format, you will need to do this at least once. This ' - 'process could take several hours depending on the speed of your ' - 'computer and network connection\n') - print('Please see README.md for instructions on how to build ' - 'the ImageNet dataset using download_and_preprocess_imagenet.\n') - print('Note that the raw data size is 300 GB and the processed data size ' - 'is 150 GB. Please ensure you have at least 500GB disk space.') diff --git a/research/inception/inception/imagenet_distributed_train.py b/research/inception/inception/imagenet_distributed_train.py deleted file mode 100644 index f3615e012f042649b52e37aeaeeb2c3efc07f92c..0000000000000000000000000000000000000000 --- a/research/inception/inception/imagenet_distributed_train.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# pylint: disable=line-too-long -"""A binary to train Inception in a distributed manner using multiple systems. - -Please see accompanying README.md for details and instructions. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from inception import inception_distributed_train -from inception.imagenet_data import ImagenetData - -FLAGS = tf.app.flags.FLAGS - - -def main(unused_args): - assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker' - - # Extract all the hostnames for the ps and worker jobs to construct the - # cluster spec. - ps_hosts = FLAGS.ps_hosts.split(',') - worker_hosts = FLAGS.worker_hosts.split(',') - tf.logging.info('PS hosts are: %s' % ps_hosts) - tf.logging.info('Worker hosts are: %s' % worker_hosts) - - cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts, - 'worker': worker_hosts}) - server = tf.train.Server( - {'ps': ps_hosts, - 'worker': worker_hosts}, - job_name=FLAGS.job_name, - task_index=FLAGS.task_id, - protocol=FLAGS.protocol) - - if FLAGS.job_name == 'ps': - # `ps` jobs wait for incoming connections from the workers. - server.join() - else: - # `worker` jobs will actually do the work. - dataset = ImagenetData(subset=FLAGS.subset) - assert dataset.data_files() - # Only the chief checks for or creates train_dir. - if FLAGS.task_id == 0: - if not tf.gfile.Exists(FLAGS.train_dir): - tf.gfile.MakeDirs(FLAGS.train_dir) - inception_distributed_train.train(server.target, dataset, cluster_spec) - -if __name__ == '__main__': - tf.logging.set_verbosity(tf.logging.INFO) - tf.app.run() diff --git a/research/inception/inception/imagenet_eval.py b/research/inception/inception/imagenet_eval.py deleted file mode 100644 index e6f8bac2ee71021914715172296d63dd56b5a6f9..0000000000000000000000000000000000000000 --- a/research/inception/inception/imagenet_eval.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A binary to evaluate Inception on the ImageNet data set. - -Note that using the supplied pre-trained inception checkpoint, the eval should -achieve: - precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples] - -See the README.md for more details. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from inception import inception_eval -from inception.imagenet_data import ImagenetData - -FLAGS = tf.app.flags.FLAGS - - -def main(unused_argv=None): - dataset = ImagenetData(subset=FLAGS.subset) - assert dataset.data_files() - if tf.gfile.Exists(FLAGS.eval_dir): - tf.gfile.DeleteRecursively(FLAGS.eval_dir) - tf.gfile.MakeDirs(FLAGS.eval_dir) - inception_eval.evaluate(dataset) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/inception/inception/imagenet_train.py b/research/inception/inception/imagenet_train.py deleted file mode 100644 index 3ffb55ee963e5b9f8e31915a78eef518324642aa..0000000000000000000000000000000000000000 --- a/research/inception/inception/imagenet_train.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A binary to train Inception on the ImageNet data set. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - - -import tensorflow as tf - -from inception import inception_train -from inception.imagenet_data import ImagenetData - -FLAGS = tf.app.flags.FLAGS - - -def main(_): - dataset = ImagenetData(subset=FLAGS.subset) - assert dataset.data_files() - if tf.gfile.Exists(FLAGS.train_dir): - tf.gfile.DeleteRecursively(FLAGS.train_dir) - tf.gfile.MakeDirs(FLAGS.train_dir) - inception_train.train(dataset) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/inception/inception/inception_distributed_train.py b/research/inception/inception/inception_distributed_train.py deleted file mode 100644 index c1a589acb5fe386fd648ae3fae926ee927c0ca79..0000000000000000000000000000000000000000 --- a/research/inception/inception/inception_distributed_train.py +++ /dev/null @@ -1,314 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A library to train Inception using multiple replicas with synchronous update. - -Please see accompanying README.md for details and instructions. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import os.path -import time - -import numpy as np -import tensorflow as tf - -from inception import image_processing -from inception import inception_model as inception -from inception.slim import slim - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_string('job_name', '', 'One of "ps", "worker"') -tf.app.flags.DEFINE_string('ps_hosts', '', - """Comma-separated list of hostname:port for the """ - """parameter server jobs. e.g. """ - """'machine1:2222,machine2:1111,machine2:2222'""") -tf.app.flags.DEFINE_string('worker_hosts', '', - """Comma-separated list of hostname:port for the """ - """worker jobs. e.g. """ - """'machine1:2222,machine2:1111,machine2:2222'""") -tf.app.flags.DEFINE_string('protocol', 'grpc', - """Communication protocol to use in distributed """ - """execution (default grpc) """) - -tf.app.flags.DEFINE_string('train_dir', '/tmp/imagenet_train', - """Directory where to write event logs """ - """and checkpoint.""") -tf.app.flags.DEFINE_integer('max_steps', 1000000, 'Number of batches to run.') -tf.app.flags.DEFINE_string('subset', 'train', 'Either "train" or "validation".') -tf.app.flags.DEFINE_boolean('log_device_placement', False, - 'Whether to log device placement.') - -# Task ID is used to select the chief and also to access the local_step for -# each replica to check staleness of the gradients in SyncReplicasOptimizer. -tf.app.flags.DEFINE_integer( - 'task_id', 0, 'Task ID of the worker/replica running the training.') - -# More details can be found in the SyncReplicasOptimizer class: -# tensorflow/python/training/sync_replicas_optimizer.py -tf.app.flags.DEFINE_integer('num_replicas_to_aggregate', -1, - """Number of gradients to collect before """ - """updating the parameters.""") -tf.app.flags.DEFINE_integer('save_interval_secs', 10 * 60, - 'Save interval seconds.') -tf.app.flags.DEFINE_integer('save_summaries_secs', 180, - 'Save summaries interval seconds.') - -# **IMPORTANT** -# Please note that this learning rate schedule is heavily dependent on the -# hardware architecture, batch size and any changes to the model architecture -# specification. Selecting a finely tuned learning rate schedule is an -# empirical process that requires some experimentation. Please see README.md -# more guidance and discussion. -# -# Learning rate decay factor selected from https://arxiv.org/abs/1604.00981 -tf.app.flags.DEFINE_float('initial_learning_rate', 0.045, - 'Initial learning rate.') -tf.app.flags.DEFINE_float('num_epochs_per_decay', 2.0, - 'Epochs after which learning rate decays.') -tf.app.flags.DEFINE_float('learning_rate_decay_factor', 0.94, - 'Learning rate decay factor.') - -# Constants dictating the learning rate schedule. -RMSPROP_DECAY = 0.9 # Decay term for RMSProp. -RMSPROP_MOMENTUM = 0.9 # Momentum in RMSProp. -RMSPROP_EPSILON = 1.0 # Epsilon term for RMSProp. - - -def train(target, dataset, cluster_spec): - """Train Inception on a dataset for a number of steps.""" - # Number of workers and parameter servers are inferred from the workers and ps - # hosts string. - num_workers = len(cluster_spec.as_dict()['worker']) - num_parameter_servers = len(cluster_spec.as_dict()['ps']) - # If no value is given, num_replicas_to_aggregate defaults to be the number of - # workers. - if FLAGS.num_replicas_to_aggregate == -1: - num_replicas_to_aggregate = num_workers - else: - num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate - - # Both should be greater than 0 in a distributed training. - assert num_workers > 0 and num_parameter_servers > 0, (' num_workers and ' - 'num_parameter_servers' - ' must be > 0.') - - # Choose worker 0 as the chief. Note that any worker could be the chief - # but there should be only one chief. - is_chief = (FLAGS.task_id == 0) - - # Ops are assigned to worker by default. - with tf.device('/job:worker/task:%d' % FLAGS.task_id): - # Variables and its related init/assign ops are assigned to ps. - with slim.scopes.arg_scope( - [slim.variables.variable, slim.variables.global_step], - device=slim.variables.VariableDeviceChooser(num_parameter_servers)): - # Create a variable to count the number of train() calls. This equals the - # number of updates applied to the variables. - global_step = slim.variables.global_step() - - # Calculate the learning rate schedule. - num_batches_per_epoch = (dataset.num_examples_per_epoch() / - FLAGS.batch_size) - # Decay steps need to be divided by the number of replicas to aggregate. - decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay / - num_replicas_to_aggregate) - - # Decay the learning rate exponentially based on the number of steps. - lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, - global_step, - decay_steps, - FLAGS.learning_rate_decay_factor, - staircase=True) - # Add a summary to track the learning rate. - tf.summary.scalar('learning_rate', lr) - - # Create an optimizer that performs gradient descent. - opt = tf.train.RMSPropOptimizer(lr, - RMSPROP_DECAY, - momentum=RMSPROP_MOMENTUM, - epsilon=RMSPROP_EPSILON) - - images, labels = image_processing.distorted_inputs( - dataset, - batch_size=FLAGS.batch_size, - num_preprocess_threads=FLAGS.num_preprocess_threads) - - # Number of classes in the Dataset label set plus 1. - # Label 0 is reserved for an (unused) background class. - num_classes = dataset.num_classes() + 1 - logits = inception.inference(images, num_classes, for_training=True) - # Add classification loss. - inception.loss(logits, labels) - - # Gather all of the losses including regularization losses. - losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) - losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) - - total_loss = tf.add_n(losses, name='total_loss') - - if is_chief: - # Compute the moving average of all individual losses and the - # total loss. - loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') - loss_averages_op = loss_averages.apply(losses + [total_loss]) - - # Attach a scalar summmary to all individual losses and the total loss; - # do the same for the averaged version of the losses. - for l in losses + [total_loss]: - loss_name = l.op.name - # Name each loss as '(raw)' and name the moving average version of the - # loss as the original loss name. - tf.summary.scalar(loss_name + ' (raw)', l) - tf.summary.scalar(loss_name, loss_averages.average(l)) - - # Add dependency to compute loss_averages. - with tf.control_dependencies([loss_averages_op]): - total_loss = tf.identity(total_loss) - - # Track the moving averages of all trainable variables. - # Note that we maintain a 'double-average' of the BatchNormalization - # global statistics. - # This is not needed when the number of replicas are small but important - # for synchronous distributed training with tens of workers/replicas. - exp_moving_averager = tf.train.ExponentialMovingAverage( - inception.MOVING_AVERAGE_DECAY, global_step) - - variables_to_average = ( - tf.trainable_variables() + tf.moving_average_variables()) - - # Add histograms for model variables. - for var in variables_to_average: - tf.summary.histogram(var.op.name, var) - - # Create synchronous replica optimizer. - opt = tf.train.SyncReplicasOptimizer( - opt, - replicas_to_aggregate=num_replicas_to_aggregate, - total_num_replicas=num_workers, - variable_averages=exp_moving_averager, - variables_to_average=variables_to_average) - - batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION) - assert batchnorm_updates, 'Batchnorm updates are missing' - batchnorm_updates_op = tf.group(*batchnorm_updates) - # Add dependency to compute batchnorm_updates. - with tf.control_dependencies([batchnorm_updates_op]): - total_loss = tf.identity(total_loss) - - # Compute gradients with respect to the loss. - grads = opt.compute_gradients(total_loss) - - # Add histograms for gradients. - for grad, var in grads: - if grad is not None: - tf.summary.histogram(var.op.name + '/gradients', grad) - - apply_gradients_op = opt.apply_gradients(grads, global_step=global_step) - - with tf.control_dependencies([apply_gradients_op]): - train_op = tf.identity(total_loss, name='train_op') - - # Get chief queue_runners and init_tokens, which is used to synchronize - # replicas. More details can be found in SyncReplicasOptimizer. - chief_queue_runners = [opt.get_chief_queue_runner()] - init_tokens_op = opt.get_init_tokens_op() - - # Create a saver. - saver = tf.train.Saver() - - # Build the summary operation based on the TF collection of Summaries. - summary_op = tf.summary.merge_all() - - # Build an initialization operation to run below. - init_op = tf.global_variables_initializer() - - # We run the summaries in the same thread as the training operations by - # passing in None for summary_op to avoid a summary_thread being started. - # Running summaries and training operations in parallel could run out of - # GPU memory. - sv = tf.train.Supervisor(is_chief=is_chief, - logdir=FLAGS.train_dir, - init_op=init_op, - summary_op=None, - global_step=global_step, - saver=saver, - save_model_secs=FLAGS.save_interval_secs) - - tf.logging.info('%s Supervisor' % datetime.now()) - - sess_config = tf.ConfigProto( - allow_soft_placement=True, - log_device_placement=FLAGS.log_device_placement) - - # Get a session. - sess = sv.prepare_or_wait_for_session(target, config=sess_config) - - # Start the queue runners. - queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS) - sv.start_queue_runners(sess, queue_runners) - tf.logging.info('Started %d queues for processing input data.', - len(queue_runners)) - - if is_chief: - sv.start_queue_runners(sess, chief_queue_runners) - sess.run(init_tokens_op) - - # Train, checking for Nans. Concurrently run the summary operation at a - # specified interval. Note that the summary_op and train_op never run - # simultaneously in order to prevent running out of GPU memory. - next_summary_time = time.time() + FLAGS.save_summaries_secs - while not sv.should_stop(): - try: - start_time = time.time() - loss_value, step = sess.run([train_op, global_step]) - assert not np.isnan(loss_value), 'Model diverged with loss = NaN' - if step > FLAGS.max_steps: - break - duration = time.time() - start_time - - if step % 30 == 0: - examples_per_sec = FLAGS.batch_size / float(duration) - format_str = ('Worker %d: %s: step %d, loss = %.2f' - '(%.1f examples/sec; %.3f sec/batch)') - tf.logging.info(format_str % - (FLAGS.task_id, datetime.now(), step, loss_value, - examples_per_sec, duration)) - - # Determine if the summary_op should be run on the chief worker. - if is_chief and next_summary_time < time.time(): - tf.logging.info('Running Summary operation on the chief.') - summary_str = sess.run(summary_op) - sv.summary_computed(sess, summary_str) - tf.logging.info('Finished running Summary operation.') - - # Determine the next time for running the summary. - next_summary_time += FLAGS.save_summaries_secs - except: - if is_chief: - tf.logging.info('Chief got exception while running!') - raise - - # Stop the supervisor. This also waits for service threads to finish. - sv.stop() - - # Save after the training ends. - if is_chief: - saver.save(sess, - os.path.join(FLAGS.train_dir, 'model.ckpt'), - global_step=global_step) diff --git a/research/inception/inception/inception_eval.py b/research/inception/inception/inception_eval.py deleted file mode 100644 index e7cfc3c399dd82a915b3a49c7ddd4a8565292f69..0000000000000000000000000000000000000000 --- a/research/inception/inception/inception_eval.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A library to evaluate Inception on a single GPU. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import math -import os.path -import time - - -import numpy as np -import tensorflow as tf - -from inception import image_processing -from inception import inception_model as inception - - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_string('eval_dir', '/tmp/imagenet_eval', - """Directory where to write event logs.""") -tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/imagenet_train', - """Directory where to read model checkpoints.""") - -# Flags governing the frequency of the eval. -tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5, - """How often to run the eval.""") -tf.app.flags.DEFINE_boolean('run_once', False, - """Whether to run eval only once.""") - -# Flags governing the data used for the eval. -tf.app.flags.DEFINE_integer('num_examples', 50000, - """Number of examples to run. Note that the eval """ - """ImageNet dataset contains 50000 examples.""") -tf.app.flags.DEFINE_string('subset', 'validation', - """Either 'validation' or 'train'.""") - - -def _eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op): - """Runs Eval once. - - Args: - saver: Saver. - summary_writer: Summary writer. - top_1_op: Top 1 op. - top_5_op: Top 5 op. - summary_op: Summary op. - """ - with tf.Session() as sess: - ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) - if ckpt and ckpt.model_checkpoint_path: - if os.path.isabs(ckpt.model_checkpoint_path): - # Restores from checkpoint with absolute path. - saver.restore(sess, ckpt.model_checkpoint_path) - else: - # Restores from checkpoint with relative path. - saver.restore(sess, os.path.join(FLAGS.checkpoint_dir, - ckpt.model_checkpoint_path)) - - # Assuming model_checkpoint_path looks something like: - # /my-favorite-path/imagenet_train/model.ckpt-0, - # extract global_step from it. - global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] - print('Successfully loaded model from %s at step=%s.' % - (ckpt.model_checkpoint_path, global_step)) - else: - print('No checkpoint file found') - return - - # Start the queue runners. - coord = tf.train.Coordinator() - try: - threads = [] - for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): - threads.extend(qr.create_threads(sess, coord=coord, daemon=True, - start=True)) - - num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) - # Counts the number of correct predictions. - count_top_1 = 0.0 - count_top_5 = 0.0 - total_sample_count = num_iter * FLAGS.batch_size - step = 0 - - print('%s: starting evaluation on (%s).' % (datetime.now(), FLAGS.subset)) - start_time = time.time() - while step < num_iter and not coord.should_stop(): - top_1, top_5 = sess.run([top_1_op, top_5_op]) - count_top_1 += np.sum(top_1) - count_top_5 += np.sum(top_5) - step += 1 - if step % 20 == 0: - duration = time.time() - start_time - sec_per_batch = duration / 20.0 - examples_per_sec = FLAGS.batch_size / sec_per_batch - print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f' - 'sec/batch)' % (datetime.now(), step, num_iter, - examples_per_sec, sec_per_batch)) - start_time = time.time() - - # Compute precision @ 1. - precision_at_1 = count_top_1 / total_sample_count - recall_at_5 = count_top_5 / total_sample_count - print('%s: precision @ 1 = %.4f recall @ 5 = %.4f [%d examples]' % - (datetime.now(), precision_at_1, recall_at_5, total_sample_count)) - - summary = tf.Summary() - summary.ParseFromString(sess.run(summary_op)) - summary.value.add(tag='Precision @ 1', simple_value=precision_at_1) - summary.value.add(tag='Recall @ 5', simple_value=recall_at_5) - summary_writer.add_summary(summary, global_step) - - except Exception as e: # pylint: disable=broad-except - coord.request_stop(e) - - coord.request_stop() - coord.join(threads, stop_grace_period_secs=10) - - -def evaluate(dataset): - """Evaluate model on Dataset for a number of steps.""" - with tf.Graph().as_default(): - # Get images and labels from the dataset. - images, labels = image_processing.inputs(dataset) - - # Number of classes in the Dataset label set plus 1. - # Label 0 is reserved for an (unused) background class. - num_classes = dataset.num_classes() + 1 - - # Build a Graph that computes the logits predictions from the - # inference model. - logits, _ = inception.inference(images, num_classes) - - # Calculate predictions. - top_1_op = tf.nn.in_top_k(logits, labels, 1) - top_5_op = tf.nn.in_top_k(logits, labels, 5) - - # Restore the moving average version of the learned variables for eval. - variable_averages = tf.train.ExponentialMovingAverage( - inception.MOVING_AVERAGE_DECAY) - variables_to_restore = variable_averages.variables_to_restore() - saver = tf.train.Saver(variables_to_restore) - - # Build the summary operation based on the TF collection of Summaries. - summary_op = tf.summary.merge_all() - - graph_def = tf.get_default_graph().as_graph_def() - summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, - graph_def=graph_def) - - while True: - _eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op) - if FLAGS.run_once: - break - time.sleep(FLAGS.eval_interval_secs) diff --git a/research/inception/inception/inception_model.py b/research/inception/inception/inception_model.py deleted file mode 100644 index fedae13ae712f09d23ff020b161d86e87ee46e95..0000000000000000000000000000000000000000 --- a/research/inception/inception/inception_model.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Build the Inception v3 network on ImageNet data set. - -The Inception v3 architecture is described in http://arxiv.org/abs/1512.00567 - -Summary of available functions: - inference: Compute inference on the model inputs to make a prediction - loss: Compute the loss of the prediction with respect to the labels -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re - -import tensorflow as tf - -from inception.slim import slim - -FLAGS = tf.app.flags.FLAGS - -# If a model is trained using multiple GPUs, prefix all Op names with tower_name -# to differentiate the operations. Note that this prefix is removed from the -# names of the summaries when visualizing a model. -TOWER_NAME = 'tower' - -# Batch normalization. Constant governing the exponential moving average of -# the 'global' mean and variance for all activations. -BATCHNORM_MOVING_AVERAGE_DECAY = 0.9997 - -# The decay to use for the moving average. -MOVING_AVERAGE_DECAY = 0.9999 - - -def inference(images, num_classes, for_training=False, restore_logits=True, - scope=None): - """Build Inception v3 model architecture. - - See here for reference: http://arxiv.org/abs/1512.00567 - - Args: - images: Images returned from inputs() or distorted_inputs(). - num_classes: number of classes - for_training: If set to `True`, build the inference model for training. - Kernels that operate differently for inference during training - e.g. dropout, are appropriately configured. - restore_logits: whether or not the logits layers should be restored. - Useful for fine-tuning a model with different num_classes. - scope: optional prefix string identifying the ImageNet tower. - - Returns: - Logits. 2-D float Tensor. - Auxiliary Logits. 2-D float Tensor of side-head. Used for training only. - """ - # Parameters for BatchNorm. - batch_norm_params = { - # Decay for the moving averages. - 'decay': BATCHNORM_MOVING_AVERAGE_DECAY, - # epsilon to prevent 0s in variance. - 'epsilon': 0.001, - } - # Set weight_decay for weights in Conv and FC layers. - with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004): - with slim.arg_scope([slim.ops.conv2d], - stddev=0.1, - activation=tf.nn.relu, - batch_norm_params=batch_norm_params): - logits, endpoints = slim.inception.inception_v3( - images, - dropout_keep_prob=0.8, - num_classes=num_classes, - is_training=for_training, - restore_logits=restore_logits, - scope=scope) - - # Add summaries for viewing model statistics on TensorBoard. - _activation_summaries(endpoints) - - # Grab the logits associated with the side head. Employed during training. - auxiliary_logits = endpoints['aux_logits'] - - return logits, auxiliary_logits - - -def loss(logits, labels, batch_size=None): - """Adds all losses for the model. - - Note the final loss is not returned. Instead, the list of losses are collected - by slim.losses. The losses are accumulated in tower_loss() and summed to - calculate the total loss. - - Args: - logits: List of logits from inference(). Each entry is a 2-D float Tensor. - labels: Labels from distorted_inputs or inputs(). 1-D tensor - of shape [batch_size] - batch_size: integer - """ - if not batch_size: - batch_size = FLAGS.batch_size - - # Reshape the labels into a dense Tensor of - # shape [FLAGS.batch_size, num_classes]. - sparse_labels = tf.reshape(labels, [batch_size, 1]) - indices = tf.reshape(tf.range(batch_size), [batch_size, 1]) - concated = tf.concat(axis=1, values=[indices, sparse_labels]) - num_classes = logits[0].get_shape()[-1].value - dense_labels = tf.sparse_to_dense(concated, - [batch_size, num_classes], - 1.0, 0.0) - - # Cross entropy loss for the main softmax prediction. - slim.losses.cross_entropy_loss(logits[0], - dense_labels, - label_smoothing=0.1, - weight=1.0) - - # Cross entropy loss for the auxiliary softmax head. - slim.losses.cross_entropy_loss(logits[1], - dense_labels, - label_smoothing=0.1, - weight=0.4, - scope='aux_loss') - - -def _activation_summary(x): - """Helper to create summaries for activations. - - Creates a summary that provides a histogram of activations. - Creates a summary that measure the sparsity of activations. - - Args: - x: Tensor - """ - # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training - # session. This helps the clarity of presentation on tensorboard. - tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name) - tf.summary.histogram(tensor_name + '/activations', x) - tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) - - -def _activation_summaries(endpoints): - with tf.name_scope('summaries'): - for act in endpoints.values(): - _activation_summary(act) diff --git a/research/inception/inception/inception_train.py b/research/inception/inception/inception_train.py deleted file mode 100644 index e1c32713b2012aec8a18637ec5dd79a1cc84d90f..0000000000000000000000000000000000000000 --- a/research/inception/inception/inception_train.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A library to train Inception using multiple GPUs with synchronous updates. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from datetime import datetime -import os.path -import re -import time - -import numpy as np -import tensorflow as tf - -from inception import image_processing -from inception import inception_model as inception -from inception.slim import slim - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_string('train_dir', '/tmp/imagenet_train', - """Directory where to write event logs """ - """and checkpoint.""") -tf.app.flags.DEFINE_integer('max_steps', 10000000, - """Number of batches to run.""") -tf.app.flags.DEFINE_string('subset', 'train', - """Either 'train' or 'validation'.""") - -# Flags governing the hardware employed for running TensorFlow. -tf.app.flags.DEFINE_integer('num_gpus', 1, - """How many GPUs to use.""") -tf.app.flags.DEFINE_boolean('log_device_placement', False, - """Whether to log device placement.""") - -# Flags governing the type of training. -tf.app.flags.DEFINE_boolean('fine_tune', False, - """If set, randomly initialize the final layer """ - """of weights in order to train the network on a """ - """new task.""") -tf.app.flags.DEFINE_string('pretrained_model_checkpoint_path', '', - """If specified, restore this pretrained model """ - """before beginning any training.""") - -# **IMPORTANT** -# Please note that this learning rate schedule is heavily dependent on the -# hardware architecture, batch size and any changes to the model architecture -# specification. Selecting a finely tuned learning rate schedule is an -# empirical process that requires some experimentation. Please see README.md -# more guidance and discussion. -# -# With 8 Tesla K40's and a batch size = 256, the following setup achieves -# precision@1 = 73.5% after 100 hours and 100K steps (20 epochs). -# Learning rate decay factor selected from http://arxiv.org/abs/1404.5997. -tf.app.flags.DEFINE_float('initial_learning_rate', 0.1, - """Initial learning rate.""") -tf.app.flags.DEFINE_float('num_epochs_per_decay', 30.0, - """Epochs after which learning rate decays.""") -tf.app.flags.DEFINE_float('learning_rate_decay_factor', 0.16, - """Learning rate decay factor.""") - -# Constants dictating the learning rate schedule. -RMSPROP_DECAY = 0.9 # Decay term for RMSProp. -RMSPROP_MOMENTUM = 0.9 # Momentum in RMSProp. -RMSPROP_EPSILON = 1.0 # Epsilon term for RMSProp. - - -def _tower_loss(images, labels, num_classes, scope, reuse_variables=None): - """Calculate the total loss on a single tower running the ImageNet model. - - We perform 'batch splitting'. This means that we cut up a batch across - multiple GPUs. For instance, if the batch size = 32 and num_gpus = 2, - then each tower will operate on an batch of 16 images. - - Args: - images: Images. 4D tensor of size [batch_size, FLAGS.image_size, - FLAGS.image_size, 3]. - labels: 1-D integer Tensor of [batch_size]. - num_classes: number of classes - scope: unique prefix string identifying the ImageNet tower, e.g. - 'tower_0'. - - Returns: - Tensor of shape [] containing the total loss for a batch of data - """ - # When fine-tuning a model, we do not restore the logits but instead we - # randomly initialize the logits. The number of classes in the output of the - # logit is the number of classes in specified Dataset. - restore_logits = not FLAGS.fine_tune - - # Build inference Graph. - with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): - logits = inception.inference(images, num_classes, for_training=True, - restore_logits=restore_logits, - scope=scope) - - # Build the portion of the Graph calculating the losses. Note that we will - # assemble the total_loss using a custom function below. - split_batch_size = images.get_shape().as_list()[0] - inception.loss(logits, labels, batch_size=split_batch_size) - - # Assemble all of the losses for the current tower only. - losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope) - - # Calculate the total loss for the current tower. - regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) - total_loss = tf.add_n(losses + regularization_losses, name='total_loss') - - # Compute the moving average of all individual losses and the total loss. - loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') - loss_averages_op = loss_averages.apply(losses + [total_loss]) - - # Attach a scalar summmary to all individual losses and the total loss; do the - # same for the averaged version of the losses. - for l in losses + [total_loss]: - # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training - # session. This helps the clarity of presentation on TensorBoard. - loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name) - # Name each loss as '(raw)' and name the moving average version of the loss - # as the original loss name. - tf.summary.scalar(loss_name +' (raw)', l) - tf.summary.scalar(loss_name, loss_averages.average(l)) - - with tf.control_dependencies([loss_averages_op]): - total_loss = tf.identity(total_loss) - return total_loss - - -def _average_gradients(tower_grads): - """Calculate the average gradient for each shared variable across all towers. - - Note that this function provides a synchronization point across all towers. - - Args: - tower_grads: List of lists of (gradient, variable) tuples. The outer list - is over individual gradients. The inner list is over the gradient - calculation for each tower. - Returns: - List of pairs of (gradient, variable) where the gradient has been averaged - across all towers. - """ - average_grads = [] - for grad_and_vars in zip(*tower_grads): - # Note that each grad_and_vars looks like the following: - # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) - grads = [] - for g, _ in grad_and_vars: - # Add 0 dimension to the gradients to represent the tower. - expanded_g = tf.expand_dims(g, 0) - - # Append on a 'tower' dimension which we will average over below. - grads.append(expanded_g) - - # Average over the 'tower' dimension. - grad = tf.concat(axis=0, values=grads) - grad = tf.reduce_mean(grad, 0) - - # Keep in mind that the Variables are redundant because they are shared - # across towers. So .. we will just return the first tower's pointer to - # the Variable. - v = grad_and_vars[0][1] - grad_and_var = (grad, v) - average_grads.append(grad_and_var) - return average_grads - - -def train(dataset): - """Train on dataset for a number of steps.""" - with tf.Graph().as_default(), tf.device('/cpu:0'): - # Create a variable to count the number of train() calls. This equals the - # number of batches processed * FLAGS.num_gpus. - global_step = tf.get_variable( - 'global_step', [], - initializer=tf.constant_initializer(0), trainable=False) - - # Calculate the learning rate schedule. - num_batches_per_epoch = (dataset.num_examples_per_epoch() / - FLAGS.batch_size) - decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay) - - # Decay the learning rate exponentially based on the number of steps. - lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, - global_step, - decay_steps, - FLAGS.learning_rate_decay_factor, - staircase=True) - - # Create an optimizer that performs gradient descent. - opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, - momentum=RMSPROP_MOMENTUM, - epsilon=RMSPROP_EPSILON) - - # Get images and labels for ImageNet and split the batch across GPUs. - assert FLAGS.batch_size % FLAGS.num_gpus == 0, ( - 'Batch size must be divisible by number of GPUs') - split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus) - - # Override the number of preprocessing threads to account for the increased - # number of GPU towers. - num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus - images, labels = image_processing.distorted_inputs( - dataset, - num_preprocess_threads=num_preprocess_threads) - - input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES)) - - # Number of classes in the Dataset label set plus 1. - # Label 0 is reserved for an (unused) background class. - num_classes = dataset.num_classes() + 1 - - # Split the batch of images and labels for towers. - images_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=images) - labels_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=labels) - - # Calculate the gradients for each model tower. - tower_grads = [] - reuse_variables = None - for i in range(FLAGS.num_gpus): - with tf.device('/gpu:%d' % i): - with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope: - # Force all Variables to reside on the CPU. - with slim.arg_scope([slim.variables.variable], device='/cpu:0'): - # Calculate the loss for one tower of the ImageNet model. This - # function constructs the entire ImageNet model but shares the - # variables across all towers. - loss = _tower_loss(images_splits[i], labels_splits[i], num_classes, - scope, reuse_variables) - - # Reuse variables for the next tower. - reuse_variables = True - - # Retain the summaries from the final tower. - summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) - - # Retain the Batch Normalization updates operations only from the - # final tower. Ideally, we should grab the updates from all towers - # but these stats accumulate extremely fast so we can ignore the - # other stats from the other towers without significant detriment. - batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION, - scope) - - # Calculate the gradients for the batch of data on this ImageNet - # tower. - grads = opt.compute_gradients(loss) - - # Keep track of the gradients across all towers. - tower_grads.append(grads) - - # We must calculate the mean of each gradient. Note that this is the - # synchronization point across all towers. - grads = _average_gradients(tower_grads) - - # Add a summaries for the input processing and global_step. - summaries.extend(input_summaries) - - # Add a summary to track the learning rate. - summaries.append(tf.summary.scalar('learning_rate', lr)) - - # Add histograms for gradients. - for grad, var in grads: - if grad is not None: - summaries.append( - tf.summary.histogram(var.op.name + '/gradients', grad)) - - # Apply the gradients to adjust the shared variables. - apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) - - # Add histograms for trainable variables. - for var in tf.trainable_variables(): - summaries.append(tf.summary.histogram(var.op.name, var)) - - # Track the moving averages of all trainable variables. - # Note that we maintain a "double-average" of the BatchNormalization - # global statistics. This is more complicated then need be but we employ - # this for backward-compatibility with our previous models. - variable_averages = tf.train.ExponentialMovingAverage( - inception.MOVING_AVERAGE_DECAY, global_step) - - # Another possibility is to use tf.slim.get_variables(). - variables_to_average = (tf.trainable_variables() + - tf.moving_average_variables()) - variables_averages_op = variable_averages.apply(variables_to_average) - - # Group all updates to into a single train op. - batchnorm_updates_op = tf.group(*batchnorm_updates) - train_op = tf.group(apply_gradient_op, variables_averages_op, - batchnorm_updates_op) - - # Create a saver. - saver = tf.train.Saver(tf.global_variables()) - - # Build the summary operation from the last tower summaries. - summary_op = tf.summary.merge(summaries) - - # Build an initialization operation to run below. - init = tf.global_variables_initializer() - - # Start running operations on the Graph. allow_soft_placement must be set to - # True to build towers on GPU, as some of the ops do not have GPU - # implementations. - sess = tf.Session(config=tf.ConfigProto( - allow_soft_placement=True, - log_device_placement=FLAGS.log_device_placement)) - sess.run(init) - - if FLAGS.pretrained_model_checkpoint_path: - assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path) - variables_to_restore = tf.get_collection( - slim.variables.VARIABLES_TO_RESTORE) - restorer = tf.train.Saver(variables_to_restore) - restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path) - print('%s: Pre-trained model restored from %s' % - (datetime.now(), FLAGS.pretrained_model_checkpoint_path)) - - # Start the queue runners. - tf.train.start_queue_runners(sess=sess) - - summary_writer = tf.summary.FileWriter( - FLAGS.train_dir, - graph=sess.graph) - - for step in range(FLAGS.max_steps): - start_time = time.time() - _, loss_value = sess.run([train_op, loss]) - duration = time.time() - start_time - - assert not np.isnan(loss_value), 'Model diverged with loss = NaN' - - if step % 10 == 0: - examples_per_sec = FLAGS.batch_size / float(duration) - format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' - 'sec/batch)') - print(format_str % (datetime.now(), step, loss_value, - examples_per_sec, duration)) - - if step % 100 == 0: - summary_str = sess.run(summary_op) - summary_writer.add_summary(summary_str, step) - - # Save the model checkpoint periodically. - if step % 5000 == 0 or (step + 1) == FLAGS.max_steps: - checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') - saver.save(sess, checkpoint_path, global_step=step) diff --git a/research/inception/inception/slim/BUILD b/research/inception/inception/slim/BUILD deleted file mode 100644 index 174e77d5c2654380232174a2bb8b29c6b9affc5d..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/BUILD +++ /dev/null @@ -1,112 +0,0 @@ -# Description: -# Contains the operations and nets for building TensorFlow-Slim models. - -package(default_visibility = ["//inception:internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_library( - name = "scopes", - srcs = ["scopes.py"], -) - -py_test( - name = "scopes_test", - size = "small", - srcs = ["scopes_test.py"], - deps = [ - ":scopes", - ], -) - -py_library( - name = "variables", - srcs = ["variables.py"], - deps = [ - ":scopes", - ], -) - -py_test( - name = "variables_test", - size = "small", - srcs = ["variables_test.py"], - deps = [ - ":variables", - ], -) - -py_library( - name = "losses", - srcs = ["losses.py"], -) - -py_test( - name = "losses_test", - size = "small", - srcs = ["losses_test.py"], - deps = [ - ":losses", - ], -) - -py_library( - name = "ops", - srcs = ["ops.py"], - deps = [ - ":losses", - ":scopes", - ":variables", - ], -) - -py_test( - name = "ops_test", - size = "small", - srcs = ["ops_test.py"], - deps = [ - ":ops", - ":variables", - ], -) - -py_library( - name = "inception", - srcs = ["inception_model.py"], - deps = [ - ":ops", - ":scopes", - ], -) - -py_test( - name = "inception_test", - size = "medium", - srcs = ["inception_test.py"], - deps = [ - ":inception", - ], -) - -py_library( - name = "slim", - srcs = ["slim.py"], - deps = [ - ":inception", - ":losses", - ":ops", - ":scopes", - ":variables", - ], -) - -py_test( - name = "collections_test", - size = "small", - srcs = ["collections_test.py"], - deps = [ - ":slim", - ], -) diff --git a/research/inception/inception/slim/README.md b/research/inception/inception/slim/README.md deleted file mode 100644 index 36d8b7eb19ae47d8810ed97abe203aa34be50a75..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/README.md +++ /dev/null @@ -1,621 +0,0 @@ -# TensorFlow-Slim - -TF-Slim is a lightweight library for defining, training and evaluating models in -TensorFlow. It enables defining complex networks quickly and concisely while -keeping a model's architecture transparent and its hyperparameters explicit. - -[TOC] - -## Teaser - -As a demonstration of the simplicity of using TF-Slim, compare the simplicity of -the code necessary for defining the entire [VGG](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) network using TF-Slim to -the lengthy and verbose nature of defining just the first three layers (out of -16) using native tensorflow: - -```python{.good} -# VGG16 in TF-Slim. -def vgg16(inputs): - with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005): - net = slim.ops.repeat_op(2, inputs, slim.ops.conv2d, 64, [3, 3], scope='conv1') - net = slim.ops.max_pool(net, [2, 2], scope='pool1') - net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 128, [3, 3], scope='conv2') - net = slim.ops.max_pool(net, [2, 2], scope='pool2') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 256, [3, 3], scope='conv3') - net = slim.ops.max_pool(net, [2, 2], scope='pool3') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv4') - net = slim.ops.max_pool(net, [2, 2], scope='pool4') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv5') - net = slim.ops.max_pool(net, [2, 2], scope='pool5') - net = slim.ops.flatten(net, scope='flatten5') - net = slim.ops.fc(net, 4096, scope='fc6') - net = slim.ops.dropout(net, 0.5, scope='dropout6') - net = slim.ops.fc(net, 4096, scope='fc7') - net = slim.ops.dropout(net, 0.5, scope='dropout7') - net = slim.ops.fc(net, 1000, activation=None, scope='fc8') - return net -``` - -```python{.bad} -# Layers 1-3 (out of 16) of VGG16 in native tensorflow. -def vgg16(inputs): - with tf.name_scope('conv1_1') as scope: - kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32, stddev=1e-1), name='weights') - conv = tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='SAME') - biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=True, name='biases') - bias = tf.nn.bias_add(conv, biases) - conv1 = tf.nn.relu(bias, name=scope) - with tf.name_scope('conv1_2') as scope: - kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32, stddev=1e-1), name='weights') - conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') - biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=True, name='biases') - bias = tf.nn.bias_add(conv, biases) - conv1 = tf.nn.relu(bias, name=scope) - with tf.name_scope('pool1') - pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1') -``` - -## Why TF-Slim? - -TF-Slim offers several advantages over just the built-in tensorflow libraries: - -* Allows one to define models much more compactly by eliminating boilerplate - code. This is accomplished through the use of [argument scoping](./scopes.py) - and numerous high level [operations](./ops.py). These tools increase - readability and maintainability, reduce the likelihood of an error from - copy-and-pasting hyperparameter values and simplifies hyperparameter tuning. -* Makes developing models simple by providing commonly used [loss functions](./losses.py) -* Provides a concise [definition](./inception_model.py) of [Inception v3](http://arxiv.org/abs/1512.00567) network architecture ready to be used - out-of-the-box or subsumed into new models. - -Additionally TF-Slim was designed with several principles in mind: - -* The various modules of TF-Slim (scopes, variables, ops, losses) are - independent. This flexibility allows users to pick and choose components of - TF-Slim completely à la carte. -* TF-Slim is written using a Functional Programming style. That means it's - super-lightweight and can be used right alongside any of TensorFlow's native - operations. -* Makes re-using network architectures easy. This allows users to build new - networks on top of existing ones as well as fine-tuning pre-trained models - on new tasks. - -## What are the various components of TF-Slim? - -TF-Slim is composed of several parts which were designed to exist independently. -These include: - -* [scopes.py](./scopes.py): provides a new scope named `arg_scope` that allows - a user to define default arguments for specific operations within that - scope. -* [variables.py](./variables.py): provides convenience wrappers for variable - creation and manipulation. -* [ops.py](./ops.py): provides high level operations for building models using - tensorflow. -* [losses.py](./losses.py): contains commonly used loss functions. - -## Defining Models - -Models can be succinctly defined using TF-Slim by combining its variables, -operations and scopes. Each of these elements are defined below. - -### Variables - -Creating [`Variables`](https://www.tensorflow.org/how_tos/variables/index.html) -in native tensorflow requires either a predefined value or an initialization -mechanism (random, normally distributed). Furthermore, if a variable needs to be -created on a specific device, such as a GPU, the specification must be [made -explicit](https://www.tensorflow.org/how_tos/using_gpu/index.html). To alleviate -the code required for variable creation, TF-Slim provides a set of thin wrapper -functions in [variables.py](./variables.py) which allow callers to easily define -variables. - -For example, to create a `weight` variable, initialize it using a truncated -normal distribution, regularize it with an `l2_loss` and place it on the `CPU`, -one need only declare the following: - -```python -weights = variables.variable('weights', - shape=[10, 10, 3 , 3], - initializer=tf.truncated_normal_initializer(stddev=0.1), - regularizer=lambda t: losses.l2_loss(t, weight=0.05), - device='/cpu:0') -``` - -In addition to the functionality provided by `tf.Variable`, `slim.variables` -keeps track of the variables created by `slim.ops` to define a model, which -allows one to distinguish variables that belong to the model versus other -variables. - -```python -# Get all the variables defined by the model. -model_variables = slim.variables.get_variables() - -# Get all the variables with the same given name, i.e. 'weights', 'biases'. -weights = slim.variables.get_variables_by_name('weights') -biases = slim.variables.get_variables_by_name('biases') - -# Get all the variables in VARIABLES_TO_RESTORE collection. -variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) - - -weights = variables.variable('weights', - shape=[10, 10, 3 , 3], - initializer=tf.truncated_normal_initializer(stddev=0.1), - regularizer=lambda t: losses.l2_loss(t, weight=0.05), - device='/cpu:0') -``` - -### Operations (Layers) - -While the set of TensorFlow operations is quite extensive, builders of neural -networks typically think of models in terms of "layers". A layer, such as a -Convolutional Layer, a Fully Connected Layer or a BatchNorm Layer are more -abstract than a single TensorFlow operation and typically involve many such -operations. For example, a Convolutional Layer in a neural network is built -using several steps: - -1. Creating the weight variables -2. Creating the bias variables -3. Convolving the weights with the input from the previous layer -4. Adding the biases to the result of the convolution. - -In python code this can be rather laborious: - -```python -input = ... -with tf.name_scope('conv1_1') as scope: - kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32, - stddev=1e-1), name='weights') - conv = tf.nn.conv2d(input, kernel, [1, 1, 1, 1], padding='SAME') - biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), - trainable=True, name='biases') - bias = tf.nn.bias_add(conv, biases) - conv1 = tf.nn.relu(bias, name=scope) -``` - -To alleviate the need to duplicate this code repeatedly, TF-Slim provides a -number of convenient operations defined at the (more abstract) level of neural -network layers. For example, compare the code above to an invocation of the -TF-Slim code: - -```python -input = ... -net = slim.ops.conv2d(input, [3, 3], 128, scope='conv1_1') -``` - -TF-Slim provides numerous operations used in building neural networks which -roughly correspond to such layers. These include: - -Layer | TF-Slim Op ---------------------- | ------------------------ -Convolutional Layer | [ops.conv2d](./ops.py) -Fully Connected Layer | [ops.fc](./ops.py) -BatchNorm layer | [ops.batch_norm](./ops.py) -Max Pooling Layer | [ops.max_pool](./ops.py) -Avg Pooling Layer | [ops.avg_pool](./ops.py) -Dropout Layer | [ops.dropout](./ops.py) - -[ops.py](./ops.py) also includes operations that are not really "layers" per se, -but are often used to manipulate hidden unit representations during inference: - -Operation | TF-Slim Op ---------- | --------------------- -Flatten | [ops.flatten](./ops.py) - -TF-Slim also provides a meta-operation called `repeat_op` that allows one to -repeatedly perform the same operation. Consider the following snippet from the -[VGG](https://www.robots.ox.ac.uk/~vgg/research/very_deep/) network whose layers -perform several convolutions in a row between pooling layers: - -```python -net = ... -net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_1') -net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_2') -net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_3') -net = slim.ops.max_pool(net, [2, 2], scope='pool3') -``` - -This clear duplication of code can be removed via a standard loop: - -```python -net = ... -for i in range(3): - net = slim.ops.conv2d(net, 256, [3, 3], scope='conv3_' % (i+1)) -net = slim.ops.max_pool(net, [2, 2], scope='pool3') -``` - -While this does reduce the amount of duplication, it can be made even cleaner by -using the `RepeatOp`: - -```python -net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 256, [3, 3], scope='conv3') -net = slim.ops.max_pool(net, [2, 2], scope='pool2') -``` - -Notice that the RepeatOp not only applies the same argument in-line, it also is -smart enough to unroll the scopes such that the scopes assigned to each -subsequent call of `ops.conv2d` is appended with an underscore and iteration -number. More concretely, the scopes in the example above would be 'conv3_1', -'conv3_2' and 'conv3_3'. - -### Scopes - -In addition to the types of scope mechanisms in TensorFlow ([name_scope](https://www.tensorflow.org/api_docs/python/framework.html#name_scope), -[variable_scope](https://www.tensorflow.org/api_docs/python/state_ops.html#variable_scope), -TF-Slim adds a new scoping mechanism called "argument scope" or [arg_scope](./scopes.py). This new scope allows a user to specify one or more operations and -a set of arguments which will be passed to each of the operations defined in the -`arg_scope`. This functionality is best illustrated by example. Consider the -following code snippet: - -```python -net = slim.ops.conv2d(inputs, 64, [11, 11], 4, padding='SAME', stddev=0.01, weight_decay=0.0005, scope='conv1') -net = slim.ops.conv2d(net, 128, [11, 11], padding='VALID', stddev=0.01, weight_decay=0.0005, scope='conv2') -net = slim.ops.conv2d(net, 256, [11, 11], padding='SAME', stddev=0.01, weight_decay=0.0005, scope='conv3') -``` - -It should be clear that these three Convolution layers share many of the same -hyperparameters. Two have the same padding, all three have the same weight_decay -and standard deviation of its weights. Not only do the duplicated values make -the code more difficult to read, it also adds the addition burder to the writer -of needing to doublecheck that all of the values are identical in each step. One -solution would be to specify default values using variables: - -```python -padding='SAME' -stddev=0.01 -weight_decay=0.0005 -net = slim.ops.conv2d(inputs, 64, [11, 11], 4, padding=padding, stddev=stddev, weight_decay=weight_decay, scope='conv1') -net = slim.ops.conv2d(net, 128, [11, 11], padding='VALID', stddev=stddev, weight_decay=weight_decay, scope='conv2') -net = slim.ops.conv2d(net, 256, [11, 11], padding=padding, stddev=stddev, weight_decay=weight_decay, scope='conv3') - -``` - -This solution ensures that all three convolutions share the exact same variable -values but doesn't reduce the code clutter. By using an `arg_scope`, we can both -ensure that each layer uses the same values and simplify the code: - -```python - with slim.arg_scope([slim.ops.conv2d], padding='SAME', stddev=0.01, weight_decay=0.0005): - net = slim.ops.conv2d(inputs, 64, [11, 11], scope='conv1') - net = slim.ops.conv2d(net, 128, [11, 11], padding='VALID', scope='conv2') - net = slim.ops.conv2d(net, 256, [11, 11], scope='conv3') -``` - -As the example illustrates, the use of arg_scope makes the code cleaner, simpler -and easier to maintain. Notice that while argument values are specifed in the -arg_scope, they can be overwritten locally. In particular, while the padding -argument has been set to 'SAME', the second convolution overrides it with the -value of 'VALID'. - -One can also nest `arg_scope`s and use multiple operations in the same scope. -For example: - -```python -with arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005): - with arg_scope([slim.ops.conv2d], padding='SAME'), slim.arg_scope([slim.ops.fc], bias=1.0): - net = slim.ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') - net = slim.ops.conv2d(net, 256, [5, 5], stddev=0.03, scope='conv2') - net = slim.ops.flatten(net) - net = slim.ops.fc(net, 1000, activation=None, scope='fc') -``` - -In this example, the first `arg_scope` applies the same `stddev` and -`weight_decay` arguments to the `conv2d` and `fc` ops in its scope. In the -second `arg_scope`, additional default arguments to `conv2d` only are specified. - -In addition to `arg_scope`, TF-Slim provides several decorators that wrap the -use of tensorflow arg scopes. These include `@AddArgScope`, `@AddNameScope`, -`@AddVariableScope`, `@AddOpScope` and `@AddVariableOpScope`. To illustrate -their use, consider the following example. - -```python -def MyNewOp(inputs): - varA = ... - varB = ... - outputs = tf.multiply(varA, inputs) + varB - return outputs - -``` - -In this example, the user has created a new op which creates two variables. To -ensure that these variables exist within a certain variable scope (to avoid -collisions with variables with the same name), in standard TF, the op must be -called within a variable scope: - -```python -inputs = ... -with tf.variable_scope('layer1'): - outputs = MyNewOp(inputs) -``` - -As an alternative, one can use TF-Slim's decorators to decorate the function and -simplify the call: - -```python -@AddVariableScope -def MyNewOp(inputs): - ... - return outputs - - -inputs = ... -outputs = MyNewOp('layer1') -``` - -The `@AddVariableScope` decorater simply applies the `tf.variable_scope` scoping -to the called function taking "layer1" as its argument. This allows the code to -be written more concisely. - -### Losses - -The loss function defines a quantity that we want to minimize. For -classification problems, this is typically the cross entropy between the true -(one-hot) distribution and the predicted probability distribution across -classes. For regression problems, this is often the sum-of-squares differences -between the predicted and true values. - -Certain models, such as multi-task learning models, require the use of multiple -loss functions simultaneously. In other words, the loss function ultimatey being -minimized is the sum of various other loss functions. For example, consider a -model that predicts both the type of scene in an image as well as the depth from -the camera of each pixel. This model's loss function would be the sum of the -classification loss and depth prediction loss. - -TF-Slim provides an easy-to-use mechanism for defining and keeping track of loss -functions via the [losses.py](./losses.py) module. Consider the simple case -where we want to train the VGG network: - -```python -# Load the images and labels. -images, labels = ... - -# Create the model. -predictions = ... - -# Define the loss functions and get the total loss. -loss = losses.cross_entropy_loss(predictions, labels) -``` - -In this example, we start by creating the model (using TF-Slim's VGG -implementation), and add the standard classification loss. Now, lets turn to the -case where we have a multi-task model that produces multiple outputs: - -```python -# Load the images and labels. -images, scene_labels, depth_labels = ... - -# Create the model. -scene_predictions, depth_predictions = CreateMultiTaskModel(images) - -# Define the loss functions and get the total loss. -classification_loss = slim.losses.cross_entropy_loss(scene_predictions, scene_labels) -sum_of_squares_loss = slim.losses.l2loss(depth_predictions - depth_labels) - -# The following two lines have the same effect: -total_loss1 = classification_loss + sum_of_squares_loss -total_loss2 = tf.get_collection(slim.losses.LOSSES_COLLECTION) -``` - -In this example, we have two losses which we add by calling -`losses.cross_entropy_loss` and `losses.l2loss`. We can obtain the -total loss by adding them together (`total_loss1`) or by calling -`losses.GetTotalLoss()`. How did this work? When you create a loss function via -TF-Slim, TF-Slim adds the loss to a special TensorFlow collection of loss -functions. This enables you to either manage the total loss manually, or allow -TF-Slim to manage them for you. - -What if you want to let TF-Slim manage the losses for you but have a custom loss -function? [losses.py](./losses.py) also has a function that adds this loss to -TF-Slims collection. For example: - -```python -# Load the images and labels. -images, scene_labels, depth_labels, pose_labels = ... - -# Create the model. -scene_predictions, depth_predictions, pose_predictions = CreateMultiTaskModel(images) - -# Define the loss functions and get the total loss. -classification_loss = slim.losses.cross_entropy_loss(scene_predictions, scene_labels) -sum_of_squares_loss = slim.losses.l2loss(depth_predictions - depth_labels) -pose_loss = MyCustomLossFunction(pose_predictions, pose_labels) -tf.add_to_collection(slim.losses.LOSSES_COLLECTION, pose_loss) # Letting TF-Slim know about the additional loss. - -# The following two lines have the same effect: -total_loss1 = classification_loss + sum_of_squares_loss + pose_loss -total_loss2 = losses.GetTotalLoss() -``` - -In this example, we can again either produce the total loss function manually or -let TF-Slim know about the additional loss and let TF-Slim handle the losses. - -## Putting the Pieces Together - -By combining TF-Slim Variables, Operations and scopes, we can write a normally -very complex network with very few lines of code. For example, the entire [VGG](https://www.robots.ox.ac.uk/~vgg/research/very_deep/) architecture can be -defined with just the following snippet: - -```python -with arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005): - net = slim.ops.repeat_op(2, inputs, slim.ops.conv2d, 64, [3, 3], scope='conv1') - net = slim.ops.max_pool(net, [2, 2], scope='pool1') - net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 128, [3, 3], scope='conv2') - net = slim.ops.max_pool(net, [2, 2], scope='pool2') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 256, [3, 3], scope='conv3') - net = slim.ops.max_pool(net, [2, 2], scope='pool3') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv4') - net = slim.ops.max_pool(net, [2, 2], scope='pool4') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv5') - net = slim.ops.max_pool(net, [2, 2], scope='pool5') - net = slim.ops.flatten(net, scope='flatten5') - net = slim.ops.fc(net, 4096, scope='fc6') - net = slim.ops.dropout(net, 0.5, scope='dropout6') - net = slim.ops.fc(net, 4096, scope='fc7') - net = slim.ops.dropout(net, 0.5, scope='dropout7') - net = slim.ops.fc(net, 1000, activation=None, scope='fc8') -return net -``` - -## Re-using previously defined network architectures and pre-trained models. - -### Brief Recap on Restoring Variables from a Checkpoint - -After a model has been trained, it can be restored using `tf.train.Saver()` -which restores `Variables` from a given checkpoint. For many cases, -`tf.train.Saver()` provides a simple mechanism to restore all or just a few -variables. - -```python -# Create some variables. -v1 = tf.Variable(..., name="v1") -v2 = tf.Variable(..., name="v2") -... -# Add ops to restore all the variables. -restorer = tf.train.Saver() - -# Add ops to restore some variables. -restorer = tf.train.Saver([v1, v2]) - -# Later, launch the model, use the saver to restore variables from disk, and -# do some work with the model. -with tf.Session() as sess: - # Restore variables from disk. - restorer.restore(sess, "/tmp/model.ckpt") - print("Model restored.") - # Do some work with the model - ... -``` - -See [Restoring Variables](https://www.tensorflow.org/versions/r0.7/how_tos/variables/index.html#restoring-variables) -and [Choosing which Variables to Save and Restore](https://www.tensorflow.org/versions/r0.7/how_tos/variables/index.html#choosing-which-variables-to-save-and-restore) -sections of the [Variables](https://www.tensorflow.org/versions/r0.7/how_tos/variables/index.html) page for -more details. - -### Using slim.variables to Track which Variables need to be Restored - -It is often desirable to fine-tune a pre-trained model on an entirely new -dataset or even a new task. In these situations, one must specify which layers -of the model should be reused (and consequently loaded from a checkpoint) and -which layers are new. Indicating which variables or layers should be restored is -a process that quickly becomes cumbersome when done manually. - -To help keep track of which variables to restore, `slim.variables` provides a -`restore` argument when creating each Variable. By default, all variables are -marked as `restore=True`, which results in all variables defined by the model -being restored. - -```python -# Create some variables. -v1 = slim.variables.variable(name="v1", ..., restore=False) -v2 = slim.variables.variable(name="v2", ...) # By default restore=True -... -# Get list of variables to restore (which contains only 'v2') -variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) -restorer = tf.train.Saver(variables_to_restore) -with tf.Session() as sess: - # Restore variables from disk. - restorer.restore(sess, "/tmp/model.ckpt") - print("Model restored.") - # Do some work with the model - ... -``` - -Additionally, every layer in `slim.ops` that creates slim.variables (such as -`slim.ops.conv2d`, `slim.ops.fc`, `slim.ops.batch_norm`) also has a `restore` -argument which controls whether the variables created by that layer should be -restored or not. - -```python -# Create a small network. -net = slim.ops.conv2d(images, 32, [7, 7], stride=2, scope='conv1') -net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2') -net = slim.ops.conv2d(net, 128, [3, 3], scope='conv3') -net = slim.ops.max_pool(net, [3, 3], stride=2, scope='pool3') -net = slim.ops.flatten(net) -net = slim.ops.fc(net, 10, scope='logits', restore=False) -... - -# VARIABLES_TO_RESTORE would contain the 'weights' and 'bias' defined by 'conv1' -# 'conv2' and 'conv3' but not the ones defined by 'logits' -variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) - -# Create a restorer that would restore only the needed variables. -restorer = tf.train.Saver(variables_to_restore) - -# Create a saver that would save all the variables (including 'logits'). -saver = tf.train.Saver() -with tf.Session() as sess: - # Restore variables from disk. - restorer.restore(sess, "/tmp/model.ckpt") - print("Model restored.") - - # Do some work with the model - ... - saver.save(sess, "/tmp/new_model.ckpt") -``` - -Note: When restoring variables from a checkpoint, the `Saver` locates the -variable names in a checkpoint file and maps them to variables in the current -graph. Above, we created a saver by passing to it a list of variables. In this -case, the names of the variables to locate in the checkpoint file were -implicitly obtained from each provided variable's `var.op.name`. - -This works well when the variable names in the checkpoint file match those in -the graph. However, sometimes, we want to restore a model from a checkpoint -whose variables have different names those in the current graph. In this case, -we must provide the `Saver` a dictionary that maps from each checkpoint variable -name to each graph variable. Consider the following example where the checkpoint -variables names are obtained via a simple function: - -```python -# Assuming that 'conv1/weights' should be restored from 'vgg16/conv1/weights' -def name_in_checkpoint(var): - return 'vgg16/' + var.op.name - -# Assuming that 'conv1/weights' and 'conv1/bias' should be restored from 'conv1/params1' and 'conv1/params2' -def name_in_checkpoint(var): - if "weights" in var.op.name: - return var.op.name.replace("weights", "params1") - if "bias" in var.op.name: - return var.op.name.replace("bias", "params2") - -variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) -variables_to_restore = {name_in_checkpoint(var):var for var in variables_to_restore} -restorer = tf.train.Saver(variables_to_restore) -with tf.Session() as sess: - # Restore variables from disk. - restorer.restore(sess, "/tmp/model.ckpt") -``` - -### Reusing the VGG16 network defined in TF-Slim on a different task, i.e. PASCAL-VOC. - -Assuming one have already a pre-trained VGG16 model, one just need to replace -the last layer `fc8` with a new layer `fc8_pascal` and use `restore=False`. - -```python -def vgg16_pascal(inputs): - with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], stddev=0.01, weight_decay=0.0005): - net = slim.ops.repeat_op(2, inputs, slim.ops.conv2d, 64, [3, 3], scope='conv1') - net = slim.ops.max_pool(net, [2, 2], scope='pool1') - net = slim.ops.repeat_op(2, net, slim.ops.conv2d, 128, [3, 3], scope='conv2') - net = slim.ops.max_pool(net, [2, 2], scope='pool2') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 256, [3, 3], scope='conv3') - net = slim.ops.max_pool(net, [2, 2], scope='pool3') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv4') - net = slim.ops.max_pool(net, [2, 2], scope='pool4') - net = slim.ops.repeat_op(3, net, slim.ops.conv2d, 512, [3, 3], scope='conv5') - net = slim.ops.max_pool(net, [2, 2], scope='pool5') - net = slim.ops.flatten(net, scope='flatten5') - net = slim.ops.fc(net, 4096, scope='fc6') - net = slim.ops.dropout(net, 0.5, scope='dropout6') - net = slim.ops.fc(net, 4096, scope='fc7') - net = slim.ops.dropout(net, 0.5, scope='dropout7') - # To reuse vgg16 on PASCAL-VOC, just change the last layer. - net = slim.ops.fc(net, 21, activation=None, scope='fc8_pascal', restore=False) - return net -``` - -## Authors - -Sergio Guadarrama and Nathan Silberman diff --git a/research/inception/inception/slim/collections_test.py b/research/inception/inception/slim/collections_test.py deleted file mode 100644 index 2a1f170edaaedae337df8e0b552a03dd82b263d4..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/collections_test.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for inception.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from inception.slim import slim - - -def get_variables(scope=None): - return slim.variables.get_variables(scope) - - -def get_variables_by_name(name): - return slim.variables.get_variables_by_name(name) - - -class CollectionsTest(tf.test.TestCase): - - def testVariables(self): - batch_size = 5 - height, width = 299, 299 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope([slim.ops.conv2d], - batch_norm_params={'decay': 0.9997}): - slim.inception.inception_v3(inputs) - self.assertEqual(len(get_variables()), 388) - self.assertEqual(len(get_variables_by_name('weights')), 98) - self.assertEqual(len(get_variables_by_name('biases')), 2) - self.assertEqual(len(get_variables_by_name('beta')), 96) - self.assertEqual(len(get_variables_by_name('gamma')), 0) - self.assertEqual(len(get_variables_by_name('moving_mean')), 96) - self.assertEqual(len(get_variables_by_name('moving_variance')), 96) - - def testVariablesWithoutBatchNorm(self): - batch_size = 5 - height, width = 299, 299 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope([slim.ops.conv2d], - batch_norm_params=None): - slim.inception.inception_v3(inputs) - self.assertEqual(len(get_variables()), 196) - self.assertEqual(len(get_variables_by_name('weights')), 98) - self.assertEqual(len(get_variables_by_name('biases')), 98) - self.assertEqual(len(get_variables_by_name('beta')), 0) - self.assertEqual(len(get_variables_by_name('gamma')), 0) - self.assertEqual(len(get_variables_by_name('moving_mean')), 0) - self.assertEqual(len(get_variables_by_name('moving_variance')), 0) - - def testVariablesByLayer(self): - batch_size = 5 - height, width = 299, 299 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope([slim.ops.conv2d], - batch_norm_params={'decay': 0.9997}): - slim.inception.inception_v3(inputs) - self.assertEqual(len(get_variables()), 388) - self.assertEqual(len(get_variables('conv0')), 4) - self.assertEqual(len(get_variables('conv1')), 4) - self.assertEqual(len(get_variables('conv2')), 4) - self.assertEqual(len(get_variables('conv3')), 4) - self.assertEqual(len(get_variables('conv4')), 4) - self.assertEqual(len(get_variables('mixed_35x35x256a')), 28) - self.assertEqual(len(get_variables('mixed_35x35x288a')), 28) - self.assertEqual(len(get_variables('mixed_35x35x288b')), 28) - self.assertEqual(len(get_variables('mixed_17x17x768a')), 16) - self.assertEqual(len(get_variables('mixed_17x17x768b')), 40) - self.assertEqual(len(get_variables('mixed_17x17x768c')), 40) - self.assertEqual(len(get_variables('mixed_17x17x768d')), 40) - self.assertEqual(len(get_variables('mixed_17x17x768e')), 40) - self.assertEqual(len(get_variables('mixed_8x8x2048a')), 36) - self.assertEqual(len(get_variables('mixed_8x8x2048b')), 36) - self.assertEqual(len(get_variables('logits')), 2) - self.assertEqual(len(get_variables('aux_logits')), 10) - - def testVariablesToRestore(self): - batch_size = 5 - height, width = 299, 299 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope([slim.ops.conv2d], - batch_norm_params={'decay': 0.9997}): - slim.inception.inception_v3(inputs) - variables_to_restore = tf.get_collection( - slim.variables.VARIABLES_TO_RESTORE) - self.assertEqual(len(variables_to_restore), 388) - self.assertListEqual(variables_to_restore, get_variables()) - - def testVariablesToRestoreWithoutLogits(self): - batch_size = 5 - height, width = 299, 299 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope([slim.ops.conv2d], - batch_norm_params={'decay': 0.9997}): - slim.inception.inception_v3(inputs, restore_logits=False) - variables_to_restore = tf.get_collection( - slim.variables.VARIABLES_TO_RESTORE) - self.assertEqual(len(variables_to_restore), 384) - - def testRegularizationLosses(self): - batch_size = 5 - height, width = 299, 299 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004): - slim.inception.inception_v3(inputs) - losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), len(get_variables_by_name('weights'))) - - def testTotalLossWithoutRegularization(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1001 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - dense_labels = tf.random_uniform((batch_size, num_classes)) - with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0): - logits, end_points = slim.inception.inception_v3( - inputs, - num_classes=num_classes) - # Cross entropy loss for the main softmax prediction. - slim.losses.cross_entropy_loss(logits, - dense_labels, - label_smoothing=0.1, - weight=1.0) - # Cross entropy loss for the auxiliary softmax head. - slim.losses.cross_entropy_loss(end_points['aux_logits'], - dense_labels, - label_smoothing=0.1, - weight=0.4, - scope='aux_loss') - losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) - self.assertEqual(len(losses), 2) - - def testTotalLossWithRegularization(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - dense_labels = tf.random_uniform((batch_size, num_classes)) - with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004): - logits, end_points = slim.inception.inception_v3(inputs, num_classes) - # Cross entropy loss for the main softmax prediction. - slim.losses.cross_entropy_loss(logits, - dense_labels, - label_smoothing=0.1, - weight=1.0) - # Cross entropy loss for the auxiliary softmax head. - slim.losses.cross_entropy_loss(end_points['aux_logits'], - dense_labels, - label_smoothing=0.1, - weight=0.4, - scope='aux_loss') - losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) - self.assertEqual(len(losses), 2) - reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(reg_losses), 98) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/inception/inception/slim/inception_model.py b/research/inception/inception/slim/inception_model.py deleted file mode 100644 index 6136ab1ba68716f4f135110a4d5c518b732b23df..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/inception_model.py +++ /dev/null @@ -1,356 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Inception-v3 expressed in TensorFlow-Slim. - - Usage: - - # Parameters for BatchNorm. - batch_norm_params = { - # Decay for the batch_norm moving averages. - 'decay': BATCHNORM_MOVING_AVERAGE_DECAY, - # epsilon to prevent 0s in variance. - 'epsilon': 0.001, - } - # Set weight_decay for weights in Conv and FC layers. - with slim.arg_scope([slim.ops.conv2d, slim.ops.fc], weight_decay=0.00004): - with slim.arg_scope([slim.ops.conv2d], - stddev=0.1, - activation=tf.nn.relu, - batch_norm_params=batch_norm_params): - # Force all Variables to reside on the CPU. - with slim.arg_scope([slim.variables.variable], device='/cpu:0'): - logits, endpoints = slim.inception.inception_v3( - images, - dropout_keep_prob=0.8, - num_classes=num_classes, - is_training=for_training, - restore_logits=restore_logits, - scope=scope) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from inception.slim import ops -from inception.slim import scopes - - -def inception_v3(inputs, - dropout_keep_prob=0.8, - num_classes=1000, - is_training=True, - restore_logits=True, - scope=''): - """Latest Inception from http://arxiv.org/abs/1512.00567. - - "Rethinking the Inception Architecture for Computer Vision" - - Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, - Zbigniew Wojna - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - dropout_keep_prob: dropout keep_prob. - num_classes: number of predicted classes. - is_training: whether is training or not. - restore_logits: whether or not the logits layers should be restored. - Useful for fine-tuning a model with different num_classes. - scope: Optional scope for name_scope. - - Returns: - a list containing 'logits', 'aux_logits' Tensors. - """ - # end_points will collect relevant activations for external use, for example - # summaries or losses. - end_points = {} - with tf.name_scope(scope, 'inception_v3', [inputs]): - with scopes.arg_scope([ops.conv2d, ops.fc, ops.batch_norm, ops.dropout], - is_training=is_training): - with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], - stride=1, padding='VALID'): - # 299 x 299 x 3 - end_points['conv0'] = ops.conv2d(inputs, 32, [3, 3], stride=2, - scope='conv0') - # 149 x 149 x 32 - end_points['conv1'] = ops.conv2d(end_points['conv0'], 32, [3, 3], - scope='conv1') - # 147 x 147 x 32 - end_points['conv2'] = ops.conv2d(end_points['conv1'], 64, [3, 3], - padding='SAME', scope='conv2') - # 147 x 147 x 64 - end_points['pool1'] = ops.max_pool(end_points['conv2'], [3, 3], - stride=2, scope='pool1') - # 73 x 73 x 64 - end_points['conv3'] = ops.conv2d(end_points['pool1'], 80, [1, 1], - scope='conv3') - # 73 x 73 x 80. - end_points['conv4'] = ops.conv2d(end_points['conv3'], 192, [3, 3], - scope='conv4') - # 71 x 71 x 192. - end_points['pool2'] = ops.max_pool(end_points['conv4'], [3, 3], - stride=2, scope='pool2') - # 35 x 35 x 192. - net = end_points['pool2'] - # Inception blocks - with scopes.arg_scope([ops.conv2d, ops.max_pool, ops.avg_pool], - stride=1, padding='SAME'): - # mixed: 35 x 35 x 256. - with tf.variable_scope('mixed_35x35x256a'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 64, [1, 1]) - with tf.variable_scope('branch5x5'): - branch5x5 = ops.conv2d(net, 48, [1, 1]) - branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) - with tf.variable_scope('branch3x3dbl'): - branch3x3dbl = ops.conv2d(net, 64, [1, 1]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 32, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool]) - end_points['mixed_35x35x256a'] = net - # mixed_1: 35 x 35 x 288. - with tf.variable_scope('mixed_35x35x288a'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 64, [1, 1]) - with tf.variable_scope('branch5x5'): - branch5x5 = ops.conv2d(net, 48, [1, 1]) - branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) - with tf.variable_scope('branch3x3dbl'): - branch3x3dbl = ops.conv2d(net, 64, [1, 1]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool]) - end_points['mixed_35x35x288a'] = net - # mixed_2: 35 x 35 x 288. - with tf.variable_scope('mixed_35x35x288b'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 64, [1, 1]) - with tf.variable_scope('branch5x5'): - branch5x5 = ops.conv2d(net, 48, [1, 1]) - branch5x5 = ops.conv2d(branch5x5, 64, [5, 5]) - with tf.variable_scope('branch3x3dbl'): - branch3x3dbl = ops.conv2d(net, 64, [1, 1]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 64, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3dbl, branch_pool]) - end_points['mixed_35x35x288b'] = net - # mixed_3: 17 x 17 x 768. - with tf.variable_scope('mixed_17x17x768a'): - with tf.variable_scope('branch3x3'): - branch3x3 = ops.conv2d(net, 384, [3, 3], stride=2, padding='VALID') - with tf.variable_scope('branch3x3dbl'): - branch3x3dbl = ops.conv2d(net, 64, [1, 1]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 96, [3, 3], - stride=2, padding='VALID') - with tf.variable_scope('branch_pool'): - branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') - net = tf.concat(axis=3, values=[branch3x3, branch3x3dbl, branch_pool]) - end_points['mixed_17x17x768a'] = net - # mixed4: 17 x 17 x 768. - with tf.variable_scope('mixed_17x17x768b'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 192, [1, 1]) - with tf.variable_scope('branch7x7'): - branch7x7 = ops.conv2d(net, 128, [1, 1]) - branch7x7 = ops.conv2d(branch7x7, 128, [1, 7]) - branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) - with tf.variable_scope('branch7x7dbl'): - branch7x7dbl = ops.conv2d(net, 128, [1, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [1, 7]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 128, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool]) - end_points['mixed_17x17x768b'] = net - # mixed_5: 17 x 17 x 768. - with tf.variable_scope('mixed_17x17x768c'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 192, [1, 1]) - with tf.variable_scope('branch7x7'): - branch7x7 = ops.conv2d(net, 160, [1, 1]) - branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) - branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) - with tf.variable_scope('branch7x7dbl'): - branch7x7dbl = ops.conv2d(net, 160, [1, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool]) - end_points['mixed_17x17x768c'] = net - # mixed_6: 17 x 17 x 768. - with tf.variable_scope('mixed_17x17x768d'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 192, [1, 1]) - with tf.variable_scope('branch7x7'): - branch7x7 = ops.conv2d(net, 160, [1, 1]) - branch7x7 = ops.conv2d(branch7x7, 160, [1, 7]) - branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) - with tf.variable_scope('branch7x7dbl'): - branch7x7dbl = ops.conv2d(net, 160, [1, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [1, 7]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 160, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool]) - end_points['mixed_17x17x768d'] = net - # mixed_7: 17 x 17 x 768. - with tf.variable_scope('mixed_17x17x768e'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 192, [1, 1]) - with tf.variable_scope('branch7x7'): - branch7x7 = ops.conv2d(net, 192, [1, 1]) - branch7x7 = ops.conv2d(branch7x7, 192, [1, 7]) - branch7x7 = ops.conv2d(branch7x7, 192, [7, 1]) - with tf.variable_scope('branch7x7dbl'): - branch7x7dbl = ops.conv2d(net, 192, [1, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [7, 1]) - branch7x7dbl = ops.conv2d(branch7x7dbl, 192, [1, 7]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch7x7, branch7x7dbl, branch_pool]) - end_points['mixed_17x17x768e'] = net - # Auxiliary Head logits - aux_logits = tf.identity(end_points['mixed_17x17x768e']) - with tf.variable_scope('aux_logits'): - aux_logits = ops.avg_pool(aux_logits, [5, 5], stride=3, - padding='VALID') - aux_logits = ops.conv2d(aux_logits, 128, [1, 1], scope='proj') - # Shape of feature map before the final layer. - shape = aux_logits.get_shape() - aux_logits = ops.conv2d(aux_logits, 768, shape[1:3], stddev=0.01, - padding='VALID') - aux_logits = ops.flatten(aux_logits) - aux_logits = ops.fc(aux_logits, num_classes, activation=None, - stddev=0.001, restore=restore_logits) - end_points['aux_logits'] = aux_logits - # mixed_8: 8 x 8 x 1280. - # Note that the scope below is not changed to not void previous - # checkpoints. - # (TODO) Fix the scope when appropriate. - with tf.variable_scope('mixed_17x17x1280a'): - with tf.variable_scope('branch3x3'): - branch3x3 = ops.conv2d(net, 192, [1, 1]) - branch3x3 = ops.conv2d(branch3x3, 320, [3, 3], stride=2, - padding='VALID') - with tf.variable_scope('branch7x7x3'): - branch7x7x3 = ops.conv2d(net, 192, [1, 1]) - branch7x7x3 = ops.conv2d(branch7x7x3, 192, [1, 7]) - branch7x7x3 = ops.conv2d(branch7x7x3, 192, [7, 1]) - branch7x7x3 = ops.conv2d(branch7x7x3, 192, [3, 3], - stride=2, padding='VALID') - with tf.variable_scope('branch_pool'): - branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID') - net = tf.concat(axis=3, values=[branch3x3, branch7x7x3, branch_pool]) - end_points['mixed_17x17x1280a'] = net - # mixed_9: 8 x 8 x 2048. - with tf.variable_scope('mixed_8x8x2048a'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 320, [1, 1]) - with tf.variable_scope('branch3x3'): - branch3x3 = ops.conv2d(net, 384, [1, 1]) - branch3x3 = tf.concat(axis=3, values=[ops.conv2d(branch3x3, 384, [1, 3]), - ops.conv2d(branch3x3, 384, [3, 1])]) - with tf.variable_scope('branch3x3dbl'): - branch3x3dbl = ops.conv2d(net, 448, [1, 1]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) - branch3x3dbl = tf.concat(axis=3, values=[ops.conv2d(branch3x3dbl, 384, [1, 3]), - ops.conv2d(branch3x3dbl, 384, [3, 1])]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch3x3, branch3x3dbl, branch_pool]) - end_points['mixed_8x8x2048a'] = net - # mixed_10: 8 x 8 x 2048. - with tf.variable_scope('mixed_8x8x2048b'): - with tf.variable_scope('branch1x1'): - branch1x1 = ops.conv2d(net, 320, [1, 1]) - with tf.variable_scope('branch3x3'): - branch3x3 = ops.conv2d(net, 384, [1, 1]) - branch3x3 = tf.concat(axis=3, values=[ops.conv2d(branch3x3, 384, [1, 3]), - ops.conv2d(branch3x3, 384, [3, 1])]) - with tf.variable_scope('branch3x3dbl'): - branch3x3dbl = ops.conv2d(net, 448, [1, 1]) - branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3]) - branch3x3dbl = tf.concat(axis=3, values=[ops.conv2d(branch3x3dbl, 384, [1, 3]), - ops.conv2d(branch3x3dbl, 384, [3, 1])]) - with tf.variable_scope('branch_pool'): - branch_pool = ops.avg_pool(net, [3, 3]) - branch_pool = ops.conv2d(branch_pool, 192, [1, 1]) - net = tf.concat(axis=3, values=[branch1x1, branch3x3, branch3x3dbl, branch_pool]) - end_points['mixed_8x8x2048b'] = net - # Final pooling and prediction - with tf.variable_scope('logits'): - shape = net.get_shape() - net = ops.avg_pool(net, shape[1:3], padding='VALID', scope='pool') - # 1 x 1 x 2048 - net = ops.dropout(net, dropout_keep_prob, scope='dropout') - net = ops.flatten(net, scope='flatten') - # 2048 - logits = ops.fc(net, num_classes, activation=None, scope='logits', - restore=restore_logits) - # 1000 - end_points['logits'] = logits - end_points['predictions'] = tf.nn.softmax(logits, name='predictions') - return logits, end_points - - -def inception_v3_parameters(weight_decay=0.00004, stddev=0.1, - batch_norm_decay=0.9997, batch_norm_epsilon=0.001): - """Yields the scope with the default parameters for inception_v3. - - Args: - weight_decay: the weight decay for weights variables. - stddev: standard deviation of the truncated guassian weight distribution. - batch_norm_decay: decay for the moving average of batch_norm momentums. - batch_norm_epsilon: small float added to variance to avoid dividing by zero. - - Yields: - a arg_scope with the parameters needed for inception_v3. - """ - # Set weight_decay for weights in Conv and FC layers. - with scopes.arg_scope([ops.conv2d, ops.fc], - weight_decay=weight_decay): - # Set stddev, activation and parameters for batch_norm. - with scopes.arg_scope([ops.conv2d], - stddev=stddev, - activation=tf.nn.relu, - batch_norm_params={ - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon}) as arg_scope: - yield arg_scope diff --git a/research/inception/inception/slim/inception_test.py b/research/inception/inception/slim/inception_test.py deleted file mode 100644 index 231dea298f4b761aa90224df1c263873bc890ac5..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/inception_test.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.inception.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from inception.slim import inception_model as inception - - -class InceptionTest(tf.test.TestCase): - - def testBuildLogits(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_v3(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testBuildEndPoints(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v3(inputs, num_classes) - self.assertTrue('logits' in end_points) - logits = end_points['logits'] - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue('aux_logits' in end_points) - aux_logits = end_points['aux_logits'] - self.assertListEqual(aux_logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['mixed_8x8x2048b'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 8, 8, 2048]) - - def testVariablesSetDevice(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - # Force all Variables to reside on the device. - with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): - inception.inception_v3(inputs, num_classes) - with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): - inception.inception_v3(inputs, num_classes) - for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'): - self.assertDeviceEqual(v.device, '/cpu:0') - for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'): - self.assertDeviceEqual(v.device, '/gpu:0') - - def testHalfSizeImages(self): - batch_size = 5 - height, width = 150, 150 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v3(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['mixed_8x8x2048b'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 3, 3, 2048]) - - def testUnknowBatchSize(self): - batch_size = 1 - height, width = 299, 299 - num_classes = 1000 - with self.test_session() as sess: - inputs = tf.placeholder(tf.float32, (None, height, width, 3)) - logits, _ = inception.inception_v3(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, num_classes]) - images = tf.random_uniform((batch_size, height, width, 3)) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEquals(output.shape, (batch_size, num_classes)) - - def testEvaluation(self): - batch_size = 2 - height, width = 299, 299 - num_classes = 1000 - with self.test_session() as sess: - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_v3(eval_inputs, num_classes, - is_training=False) - predictions = tf.argmax(logits, 1) - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (batch_size,)) - - def testTrainEvalWithReuse(self): - train_batch_size = 5 - eval_batch_size = 2 - height, width = 150, 150 - num_classes = 1000 - with self.test_session() as sess: - train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) - inception.inception_v3(train_inputs, num_classes) - tf.get_variable_scope().reuse_variables() - eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) - logits, _ = inception.inception_v3(eval_inputs, num_classes, - is_training=False) - predictions = tf.argmax(logits, 1) - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (eval_batch_size,)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/inception/inception/slim/losses.py b/research/inception/inception/slim/losses.py deleted file mode 100644 index 78298d092fab3afc264e427fb060602c27ea97b0..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/losses.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains convenience wrappers for various Neural Network TensorFlow losses. - - All the losses defined here add themselves to the LOSSES_COLLECTION - collection. - - l1_loss: Define a L1 Loss, useful for regularization, i.e. lasso. - l2_loss: Define a L2 Loss, useful for regularization, i.e. weight decay. - cross_entropy_loss: Define a cross entropy loss using - softmax_cross_entropy_with_logits. Useful for classification. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -# In order to gather all losses in a network, the user should use this -# key for get_collection, i.e: -# losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) -LOSSES_COLLECTION = '_losses' - - -def l1_regularizer(weight=1.0, scope=None): - """Define a L1 regularizer. - - Args: - weight: scale the loss by this factor. - scope: Optional scope for name_scope. - - Returns: - a regularizer function. - """ - def regularizer(tensor): - with tf.name_scope(scope, 'L1Regularizer', [tensor]): - l1_weight = tf.convert_to_tensor(weight, - dtype=tensor.dtype.base_dtype, - name='weight') - return tf.multiply(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value') - return regularizer - - -def l2_regularizer(weight=1.0, scope=None): - """Define a L2 regularizer. - - Args: - weight: scale the loss by this factor. - scope: Optional scope for name_scope. - - Returns: - a regularizer function. - """ - def regularizer(tensor): - with tf.name_scope(scope, 'L2Regularizer', [tensor]): - l2_weight = tf.convert_to_tensor(weight, - dtype=tensor.dtype.base_dtype, - name='weight') - return tf.multiply(l2_weight, tf.nn.l2_loss(tensor), name='value') - return regularizer - - -def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None): - """Define a L1L2 regularizer. - - Args: - weight_l1: scale the L1 loss by this factor. - weight_l2: scale the L2 loss by this factor. - scope: Optional scope for name_scope. - - Returns: - a regularizer function. - """ - def regularizer(tensor): - with tf.name_scope(scope, 'L1L2Regularizer', [tensor]): - weight_l1_t = tf.convert_to_tensor(weight_l1, - dtype=tensor.dtype.base_dtype, - name='weight_l1') - weight_l2_t = tf.convert_to_tensor(weight_l2, - dtype=tensor.dtype.base_dtype, - name='weight_l2') - reg_l1 = tf.multiply(weight_l1_t, tf.reduce_sum(tf.abs(tensor)), - name='value_l1') - reg_l2 = tf.multiply(weight_l2_t, tf.nn.l2_loss(tensor), - name='value_l2') - return tf.add(reg_l1, reg_l2, name='value') - return regularizer - - -def l1_loss(tensor, weight=1.0, scope=None): - """Define a L1Loss, useful for regularize, i.e. lasso. - - Args: - tensor: tensor to regularize. - weight: scale the loss by this factor. - scope: Optional scope for name_scope. - - Returns: - the L1 loss op. - """ - with tf.name_scope(scope, 'L1Loss', [tensor]): - weight = tf.convert_to_tensor(weight, - dtype=tensor.dtype.base_dtype, - name='loss_weight') - loss = tf.multiply(weight, tf.reduce_sum(tf.abs(tensor)), name='value') - tf.add_to_collection(LOSSES_COLLECTION, loss) - return loss - - -def l2_loss(tensor, weight=1.0, scope=None): - """Define a L2Loss, useful for regularize, i.e. weight decay. - - Args: - tensor: tensor to regularize. - weight: an optional weight to modulate the loss. - scope: Optional scope for name_scope. - - Returns: - the L2 loss op. - """ - with tf.name_scope(scope, 'L2Loss', [tensor]): - weight = tf.convert_to_tensor(weight, - dtype=tensor.dtype.base_dtype, - name='loss_weight') - loss = tf.multiply(weight, tf.nn.l2_loss(tensor), name='value') - tf.add_to_collection(LOSSES_COLLECTION, loss) - return loss - - -def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0, - weight=1.0, scope=None): - """Define a Cross Entropy loss using softmax_cross_entropy_with_logits. - - It can scale the loss by weight factor, and smooth the labels. - - Args: - logits: [batch_size, num_classes] logits outputs of the network . - one_hot_labels: [batch_size, num_classes] target one_hot_encoded labels. - label_smoothing: if greater than 0 then smooth the labels. - weight: scale the loss by this factor. - scope: Optional scope for name_scope. - - Returns: - A tensor with the softmax_cross_entropy loss. - """ - logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape()) - with tf.name_scope(scope, 'CrossEntropyLoss', [logits, one_hot_labels]): - num_classes = one_hot_labels.get_shape()[-1].value - one_hot_labels = tf.cast(one_hot_labels, logits.dtype) - if label_smoothing > 0: - smooth_positives = 1.0 - label_smoothing - smooth_negatives = label_smoothing / num_classes - one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives - cross_entropy = tf.contrib.nn.deprecated_flipped_softmax_cross_entropy_with_logits( - logits, one_hot_labels, name='xentropy') - - weight = tf.convert_to_tensor(weight, - dtype=logits.dtype.base_dtype, - name='loss_weight') - loss = tf.multiply(weight, tf.reduce_mean(cross_entropy), name='value') - tf.add_to_collection(LOSSES_COLLECTION, loss) - return loss diff --git a/research/inception/inception/slim/losses_test.py b/research/inception/inception/slim/losses_test.py deleted file mode 100644 index e267f6520779f63be0becf41ceccc7de494e14f7..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/losses_test.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.losses.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from inception.slim import losses - - -class LossesTest(tf.test.TestCase): - - def testL1Loss(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - weights = tf.constant(1.0, shape=shape) - wd = 0.01 - loss = losses.l1_loss(weights, wd) - self.assertEquals(loss.op.name, 'L1Loss/value') - self.assertAlmostEqual(loss.eval(), num_elem * wd, 5) - - def testL2Loss(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - weights = tf.constant(1.0, shape=shape) - wd = 0.01 - loss = losses.l2_loss(weights, wd) - self.assertEquals(loss.op.name, 'L2Loss/value') - self.assertAlmostEqual(loss.eval(), num_elem * wd / 2, 5) - - -class RegularizersTest(tf.test.TestCase): - - def testL1Regularizer(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - loss = losses.l1_regularizer()(tensor) - self.assertEquals(loss.op.name, 'L1Regularizer/value') - self.assertAlmostEqual(loss.eval(), num_elem, 5) - - def testL1RegularizerWithScope(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - loss = losses.l1_regularizer(scope='L1')(tensor) - self.assertEquals(loss.op.name, 'L1/value') - self.assertAlmostEqual(loss.eval(), num_elem, 5) - - def testL1RegularizerWithWeight(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - weight = 0.01 - loss = losses.l1_regularizer(weight)(tensor) - self.assertEquals(loss.op.name, 'L1Regularizer/value') - self.assertAlmostEqual(loss.eval(), num_elem * weight, 5) - - def testL2Regularizer(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - loss = losses.l2_regularizer()(tensor) - self.assertEquals(loss.op.name, 'L2Regularizer/value') - self.assertAlmostEqual(loss.eval(), num_elem / 2, 5) - - def testL2RegularizerWithScope(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - loss = losses.l2_regularizer(scope='L2')(tensor) - self.assertEquals(loss.op.name, 'L2/value') - self.assertAlmostEqual(loss.eval(), num_elem / 2, 5) - - def testL2RegularizerWithWeight(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - weight = 0.01 - loss = losses.l2_regularizer(weight)(tensor) - self.assertEquals(loss.op.name, 'L2Regularizer/value') - self.assertAlmostEqual(loss.eval(), num_elem * weight / 2, 5) - - def testL1L2Regularizer(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - loss = losses.l1_l2_regularizer()(tensor) - self.assertEquals(loss.op.name, 'L1L2Regularizer/value') - self.assertAlmostEqual(loss.eval(), num_elem + num_elem / 2, 5) - - def testL1L2RegularizerWithScope(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - loss = losses.l1_l2_regularizer(scope='L1L2')(tensor) - self.assertEquals(loss.op.name, 'L1L2/value') - self.assertAlmostEqual(loss.eval(), num_elem + num_elem / 2, 5) - - def testL1L2RegularizerWithWeights(self): - with self.test_session(): - shape = [5, 5, 5] - num_elem = 5 * 5 * 5 - tensor = tf.constant(1.0, shape=shape) - weight_l1 = 0.01 - weight_l2 = 0.05 - loss = losses.l1_l2_regularizer(weight_l1, weight_l2)(tensor) - self.assertEquals(loss.op.name, 'L1L2Regularizer/value') - self.assertAlmostEqual(loss.eval(), - num_elem * weight_l1 + num_elem * weight_l2 / 2, 5) - - -class CrossEntropyLossTest(tf.test.TestCase): - - def testCrossEntropyLossAllCorrect(self): - with self.test_session(): - logits = tf.constant([[10.0, 0.0, 0.0], - [0.0, 10.0, 0.0], - [0.0, 0.0, 10.0]]) - labels = tf.constant([[1, 0, 0], - [0, 1, 0], - [0, 0, 1]]) - loss = losses.cross_entropy_loss(logits, labels) - self.assertEquals(loss.op.name, 'CrossEntropyLoss/value') - self.assertAlmostEqual(loss.eval(), 0.0, 3) - - def testCrossEntropyLossAllWrong(self): - with self.test_session(): - logits = tf.constant([[10.0, 0.0, 0.0], - [0.0, 10.0, 0.0], - [0.0, 0.0, 10.0]]) - labels = tf.constant([[0, 0, 1], - [1, 0, 0], - [0, 1, 0]]) - loss = losses.cross_entropy_loss(logits, labels) - self.assertEquals(loss.op.name, 'CrossEntropyLoss/value') - self.assertAlmostEqual(loss.eval(), 10.0, 3) - - def testCrossEntropyLossAllWrongWithWeight(self): - with self.test_session(): - logits = tf.constant([[10.0, 0.0, 0.0], - [0.0, 10.0, 0.0], - [0.0, 0.0, 10.0]]) - labels = tf.constant([[0, 0, 1], - [1, 0, 0], - [0, 1, 0]]) - loss = losses.cross_entropy_loss(logits, labels, weight=0.5) - self.assertEquals(loss.op.name, 'CrossEntropyLoss/value') - self.assertAlmostEqual(loss.eval(), 5.0, 3) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/inception/inception/slim/ops.py b/research/inception/inception/slim/ops.py deleted file mode 100644 index 54fda4eb81f3a138d9bb2748c21164b88570ede9..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/ops.py +++ /dev/null @@ -1,473 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains convenience wrappers for typical Neural Network TensorFlow layers. - - Additionally it maintains a collection with update_ops that need to be - updated after the ops have been computed, for example to update moving means - and moving variances of batch_norm. - - Ops that have different behavior during training or eval have an is_training - parameter. Additionally Ops that contain variables.variable have a trainable - parameter, which control if the ops variables are trainable or not. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from tensorflow.python.training import moving_averages - -from inception.slim import losses -from inception.slim import scopes -from inception.slim import variables - -# Used to keep the update ops done by batch_norm. -UPDATE_OPS_COLLECTION = '_update_ops_' - - -@scopes.add_arg_scope -def batch_norm(inputs, - decay=0.999, - center=True, - scale=False, - epsilon=0.001, - moving_vars='moving_vars', - activation=None, - is_training=True, - trainable=True, - restore=True, - scope=None, - reuse=None): - """Adds a Batch Normalization layer. - - Args: - inputs: a tensor of size [batch_size, height, width, channels] - or [batch_size, channels]. - decay: decay for the moving average. - center: If True, subtract beta. If False, beta is not created and ignored. - scale: If True, multiply by gamma. If False, gamma is - not used. When the next layer is linear (also e.g. ReLU), this can be - disabled since the scaling can be done by the next layer. - epsilon: small float added to variance to avoid dividing by zero. - moving_vars: collection to store the moving_mean and moving_variance. - activation: activation function. - is_training: whether or not the model is in training mode. - trainable: whether or not the variables should be trainable or not. - restore: whether or not the variables should be marked for restore. - scope: Optional scope for variable_scope. - reuse: whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - - Returns: - a tensor representing the output of the operation. - - """ - inputs_shape = inputs.get_shape() - with tf.variable_scope(scope, 'BatchNorm', [inputs], reuse=reuse): - axis = list(range(len(inputs_shape) - 1)) - params_shape = inputs_shape[-1:] - # Allocate parameters for the beta and gamma of the normalization. - beta, gamma = None, None - if center: - beta = variables.variable('beta', - params_shape, - initializer=tf.zeros_initializer(), - trainable=trainable, - restore=restore) - if scale: - gamma = variables.variable('gamma', - params_shape, - initializer=tf.ones_initializer(), - trainable=trainable, - restore=restore) - # Create moving_mean and moving_variance add them to - # GraphKeys.MOVING_AVERAGE_VARIABLES collections. - moving_collections = [moving_vars, tf.GraphKeys.MOVING_AVERAGE_VARIABLES] - moving_mean = variables.variable('moving_mean', - params_shape, - initializer=tf.zeros_initializer(), - trainable=False, - restore=restore, - collections=moving_collections) - moving_variance = variables.variable('moving_variance', - params_shape, - initializer=tf.ones_initializer(), - trainable=False, - restore=restore, - collections=moving_collections) - if is_training: - # Calculate the moments based on the individual batch. - mean, variance = tf.nn.moments(inputs, axis) - - update_moving_mean = moving_averages.assign_moving_average( - moving_mean, mean, decay) - tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean) - update_moving_variance = moving_averages.assign_moving_average( - moving_variance, variance, decay) - tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance) - else: - # Just use the moving_mean and moving_variance. - mean = moving_mean - variance = moving_variance - # Normalize the activations. - outputs = tf.nn.batch_normalization( - inputs, mean, variance, beta, gamma, epsilon) - outputs.set_shape(inputs.get_shape()) - if activation: - outputs = activation(outputs) - return outputs - - -def _two_element_tuple(int_or_tuple): - """Converts `int_or_tuple` to height, width. - - Several of the functions that follow accept arguments as either - a tuple of 2 integers or a single integer. A single integer - indicates that the 2 values of the tuple are the same. - - This functions normalizes the input value by always returning a tuple. - - Args: - int_or_tuple: A list of 2 ints, a single int or a tf.TensorShape. - - Returns: - A tuple with 2 values. - - Raises: - ValueError: If `int_or_tuple` it not well formed. - """ - if isinstance(int_or_tuple, (list, tuple)): - if len(int_or_tuple) != 2: - raise ValueError('Must be a list with 2 elements: %s' % int_or_tuple) - return int(int_or_tuple[0]), int(int_or_tuple[1]) - if isinstance(int_or_tuple, int): - return int(int_or_tuple), int(int_or_tuple) - if isinstance(int_or_tuple, tf.TensorShape): - if len(int_or_tuple) == 2: - return int_or_tuple[0], int_or_tuple[1] - raise ValueError('Must be an int, a list with 2 elements or a TensorShape of ' - 'length 2') - - -@scopes.add_arg_scope -def conv2d(inputs, - num_filters_out, - kernel_size, - stride=1, - padding='SAME', - activation=tf.nn.relu, - stddev=0.01, - bias=0.0, - weight_decay=0, - batch_norm_params=None, - is_training=True, - trainable=True, - restore=True, - scope=None, - reuse=None): - """Adds a 2D convolution followed by an optional batch_norm layer. - - conv2d creates a variable called 'weights', representing the convolutional - kernel, that is convolved with the input. If `batch_norm_params` is None, a - second variable called 'biases' is added to the result of the convolution - operation. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_filters_out: the number of output filters. - kernel_size: a list of length 2: [kernel_height, kernel_width] of - of the filters. Can be an int if both values are the same. - stride: a list of length 2: [stride_height, stride_width]. - Can be an int if both strides are the same. Note that presently - both strides must have the same value. - padding: one of 'VALID' or 'SAME'. - activation: activation function. - stddev: standard deviation of the truncated guassian weight distribution. - bias: the initial value of the biases. - weight_decay: the weight decay. - batch_norm_params: parameters for the batch_norm. If is None don't use it. - is_training: whether or not the model is in training mode. - trainable: whether or not the variables should be trainable or not. - restore: whether or not the variables should be marked for restore. - scope: Optional scope for variable_scope. - reuse: whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - Returns: - a tensor representing the output of the operation. - - """ - with tf.variable_scope(scope, 'Conv', [inputs], reuse=reuse): - kernel_h, kernel_w = _two_element_tuple(kernel_size) - stride_h, stride_w = _two_element_tuple(stride) - num_filters_in = inputs.get_shape()[-1] - weights_shape = [kernel_h, kernel_w, - num_filters_in, num_filters_out] - weights_initializer = tf.truncated_normal_initializer(stddev=stddev) - l2_regularizer = None - if weight_decay and weight_decay > 0: - l2_regularizer = losses.l2_regularizer(weight_decay) - weights = variables.variable('weights', - shape=weights_shape, - initializer=weights_initializer, - regularizer=l2_regularizer, - trainable=trainable, - restore=restore) - conv = tf.nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1], - padding=padding) - if batch_norm_params is not None: - with scopes.arg_scope([batch_norm], is_training=is_training, - trainable=trainable, restore=restore): - outputs = batch_norm(conv, **batch_norm_params) - else: - bias_shape = [num_filters_out,] - bias_initializer = tf.constant_initializer(bias) - biases = variables.variable('biases', - shape=bias_shape, - initializer=bias_initializer, - trainable=trainable, - restore=restore) - outputs = tf.nn.bias_add(conv, biases) - if activation: - outputs = activation(outputs) - return outputs - - -@scopes.add_arg_scope -def fc(inputs, - num_units_out, - activation=tf.nn.relu, - stddev=0.01, - bias=0.0, - weight_decay=0, - batch_norm_params=None, - is_training=True, - trainable=True, - restore=True, - scope=None, - reuse=None): - """Adds a fully connected layer followed by an optional batch_norm layer. - - FC creates a variable called 'weights', representing the fully connected - weight matrix, that is multiplied by the input. If `batch_norm` is None, a - second variable called 'biases' is added to the result of the initial - vector-matrix multiplication. - - Args: - inputs: a [B x N] tensor where B is the batch size and N is the number of - input units in the layer. - num_units_out: the number of output units in the layer. - activation: activation function. - stddev: the standard deviation for the weights. - bias: the initial value of the biases. - weight_decay: the weight decay. - batch_norm_params: parameters for the batch_norm. If is None don't use it. - is_training: whether or not the model is in training mode. - trainable: whether or not the variables should be trainable or not. - restore: whether or not the variables should be marked for restore. - scope: Optional scope for variable_scope. - reuse: whether or not the layer and its variables should be reused. To be - able to reuse the layer scope must be given. - - Returns: - the tensor variable representing the result of the series of operations. - """ - with tf.variable_scope(scope, 'FC', [inputs], reuse=reuse): - num_units_in = inputs.get_shape()[1] - weights_shape = [num_units_in, num_units_out] - weights_initializer = tf.truncated_normal_initializer(stddev=stddev) - l2_regularizer = None - if weight_decay and weight_decay > 0: - l2_regularizer = losses.l2_regularizer(weight_decay) - weights = variables.variable('weights', - shape=weights_shape, - initializer=weights_initializer, - regularizer=l2_regularizer, - trainable=trainable, - restore=restore) - if batch_norm_params is not None: - outputs = tf.matmul(inputs, weights) - with scopes.arg_scope([batch_norm], is_training=is_training, - trainable=trainable, restore=restore): - outputs = batch_norm(outputs, **batch_norm_params) - else: - bias_shape = [num_units_out,] - bias_initializer = tf.constant_initializer(bias) - biases = variables.variable('biases', - shape=bias_shape, - initializer=bias_initializer, - trainable=trainable, - restore=restore) - outputs = tf.nn.xw_plus_b(inputs, weights, biases) - if activation: - outputs = activation(outputs) - return outputs - - -def one_hot_encoding(labels, num_classes, scope=None): - """Transform numeric labels into onehot_labels. - - Args: - labels: [batch_size] target labels. - num_classes: total number of classes. - scope: Optional scope for name_scope. - Returns: - one hot encoding of the labels. - """ - with tf.name_scope(scope, 'OneHotEncoding', [labels]): - batch_size = labels.get_shape()[0] - indices = tf.expand_dims(tf.range(0, batch_size), 1) - labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype) - concated = tf.concat(axis=1, values=[indices, labels]) - onehot_labels = tf.sparse_to_dense( - concated, tf.stack([batch_size, num_classes]), 1.0, 0.0) - onehot_labels.set_shape([batch_size, num_classes]) - return onehot_labels - - -@scopes.add_arg_scope -def max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): - """Adds a Max Pooling layer. - - It is assumed by the wrapper that the pooling is only done per image and not - in depth or batch. - - Args: - inputs: a tensor of size [batch_size, height, width, depth]. - kernel_size: a list of length 2: [kernel_height, kernel_width] of the - pooling kernel over which the op is computed. Can be an int if both - values are the same. - stride: a list of length 2: [stride_height, stride_width]. - Can be an int if both strides are the same. Note that presently - both strides must have the same value. - padding: the padding method, either 'VALID' or 'SAME'. - scope: Optional scope for name_scope. - - Returns: - a tensor representing the results of the pooling operation. - Raises: - ValueError: if 'kernel_size' is not a 2-D list - """ - with tf.name_scope(scope, 'MaxPool', [inputs]): - kernel_h, kernel_w = _two_element_tuple(kernel_size) - stride_h, stride_w = _two_element_tuple(stride) - return tf.nn.max_pool(inputs, - ksize=[1, kernel_h, kernel_w, 1], - strides=[1, stride_h, stride_w, 1], - padding=padding) - - -@scopes.add_arg_scope -def avg_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None): - """Adds a Avg Pooling layer. - - It is assumed by the wrapper that the pooling is only done per image and not - in depth or batch. - - Args: - inputs: a tensor of size [batch_size, height, width, depth]. - kernel_size: a list of length 2: [kernel_height, kernel_width] of the - pooling kernel over which the op is computed. Can be an int if both - values are the same. - stride: a list of length 2: [stride_height, stride_width]. - Can be an int if both strides are the same. Note that presently - both strides must have the same value. - padding: the padding method, either 'VALID' or 'SAME'. - scope: Optional scope for name_scope. - - Returns: - a tensor representing the results of the pooling operation. - """ - with tf.name_scope(scope, 'AvgPool', [inputs]): - kernel_h, kernel_w = _two_element_tuple(kernel_size) - stride_h, stride_w = _two_element_tuple(stride) - return tf.nn.avg_pool(inputs, - ksize=[1, kernel_h, kernel_w, 1], - strides=[1, stride_h, stride_w, 1], - padding=padding) - - -@scopes.add_arg_scope -def dropout(inputs, keep_prob=0.5, is_training=True, scope=None): - """Returns a dropout layer applied to the input. - - Args: - inputs: the tensor to pass to the Dropout layer. - keep_prob: the probability of keeping each input unit. - is_training: whether or not the model is in training mode. If so, dropout is - applied and values scaled. Otherwise, inputs is returned. - scope: Optional scope for name_scope. - - Returns: - a tensor representing the output of the operation. - """ - if is_training and keep_prob > 0: - with tf.name_scope(scope, 'Dropout', [inputs]): - return tf.nn.dropout(inputs, keep_prob) - else: - return inputs - - -def flatten(inputs, scope=None): - """Flattens the input while maintaining the batch_size. - - Assumes that the first dimension represents the batch. - - Args: - inputs: a tensor of size [batch_size, ...]. - scope: Optional scope for name_scope. - - Returns: - a flattened tensor with shape [batch_size, k]. - Raises: - ValueError: if inputs.shape is wrong. - """ - if len(inputs.get_shape()) < 2: - raise ValueError('Inputs must be have a least 2 dimensions') - dims = inputs.get_shape()[1:] - k = dims.num_elements() - with tf.name_scope(scope, 'Flatten', [inputs]): - return tf.reshape(inputs, [-1, k]) - - -def repeat_op(repetitions, inputs, op, *args, **kwargs): - """Build a sequential Tower starting from inputs by using an op repeatedly. - - It creates new scopes for each operation by increasing the counter. - Example: given repeat_op(3, _, ops.conv2d, 64, [3, 3], scope='conv1') - it will repeat the given op under the following variable_scopes: - conv1/Conv - conv1/Conv_1 - conv1/Conv_2 - - Args: - repetitions: number or repetitions. - inputs: a tensor of size [batch_size, height, width, channels]. - op: an operation. - *args: args for the op. - **kwargs: kwargs for the op. - - Returns: - a tensor result of applying the operation op, num times. - Raises: - ValueError: if the op is unknown or wrong. - """ - scope = kwargs.pop('scope', None) - with tf.variable_scope(scope, 'RepeatOp', [inputs]): - tower = inputs - for _ in range(repetitions): - tower = op(tower, *args, **kwargs) - return tower diff --git a/research/inception/inception/slim/ops_test.py b/research/inception/inception/slim/ops_test.py deleted file mode 100644 index 13dc5d9aacf6e283540a406d419a67d2d7215161..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/ops_test.py +++ /dev/null @@ -1,687 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import numpy as np -import tensorflow as tf - -from inception.slim import ops -from inception.slim import scopes -from inception.slim import variables - - -class ConvTest(tf.test.TestCase): - - def testCreateConv(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, [3, 3]) - self.assertEquals(output.op.name, 'Conv/Relu') - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 32]) - - def testCreateSquareConv(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, 3) - self.assertEquals(output.op.name, 'Conv/Relu') - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 32]) - - def testCreateConvWithTensorShape(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, images.get_shape()[1:3]) - self.assertEquals(output.op.name, 'Conv/Relu') - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 32]) - - def testCreateFullyConv(self): - height, width = 6, 6 - with self.test_session(): - images = tf.random_uniform((5, height, width, 32), seed=1) - output = ops.conv2d(images, 64, images.get_shape()[1:3], padding='VALID') - self.assertEquals(output.op.name, 'Conv/Relu') - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 64]) - - def testCreateVerticalConv(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, [3, 1]) - self.assertEquals(output.op.name, 'Conv/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height, width, 32]) - - def testCreateHorizontalConv(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, [1, 3]) - self.assertEquals(output.op.name, 'Conv/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height, width, 32]) - - def testCreateConvWithStride(self): - height, width = 6, 6 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, [3, 3], stride=2) - self.assertEquals(output.op.name, 'Conv/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height/2, width/2, 32]) - - def testCreateConvCreatesWeightsAndBiasesVars(self): - height, width = 3, 3 - images = tf.random_uniform((5, height, width, 3), seed=1) - with self.test_session(): - self.assertFalse(variables.get_variables('conv1/weights')) - self.assertFalse(variables.get_variables('conv1/biases')) - ops.conv2d(images, 32, [3, 3], scope='conv1') - self.assertTrue(variables.get_variables('conv1/weights')) - self.assertTrue(variables.get_variables('conv1/biases')) - - def testCreateConvWithScope(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, [3, 3], scope='conv1') - self.assertEquals(output.op.name, 'conv1/Relu') - - def testCreateConvWithoutActivation(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, [3, 3], activation=None) - self.assertEquals(output.op.name, 'Conv/BiasAdd') - - def testCreateConvValid(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.conv2d(images, 32, [3, 3], padding='VALID') - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 32]) - - def testCreateConvWithWD(self): - height, width = 3, 3 - with self.test_session() as sess: - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.conv2d(images, 32, [3, 3], weight_decay=0.01) - wd = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)[0] - self.assertEquals(wd.op.name, - 'Conv/weights/Regularizer/L2Regularizer/value') - sess.run(tf.global_variables_initializer()) - self.assertTrue(sess.run(wd) <= 0.01) - - def testCreateConvWithoutWD(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.conv2d(images, 32, [3, 3], weight_decay=0) - self.assertEquals( - tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), []) - - def testReuseVars(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.conv2d(images, 32, [3, 3], scope='conv1') - self.assertEquals(len(variables.get_variables()), 2) - ops.conv2d(images, 32, [3, 3], scope='conv1', reuse=True) - self.assertEquals(len(variables.get_variables()), 2) - - def testNonReuseVars(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.conv2d(images, 32, [3, 3]) - self.assertEquals(len(variables.get_variables()), 2) - ops.conv2d(images, 32, [3, 3]) - self.assertEquals(len(variables.get_variables()), 4) - - def testReuseConvWithWD(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.conv2d(images, 32, [3, 3], weight_decay=0.01, scope='conv1') - self.assertEquals(len(variables.get_variables()), 2) - self.assertEquals( - len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), 1) - ops.conv2d(images, 32, [3, 3], weight_decay=0.01, scope='conv1', - reuse=True) - self.assertEquals(len(variables.get_variables()), 2) - self.assertEquals( - len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), 1) - - def testConvWithBatchNorm(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 32), seed=1) - with scopes.arg_scope([ops.conv2d], batch_norm_params={'decay': 0.9}): - net = ops.conv2d(images, 32, [3, 3]) - net = ops.conv2d(net, 32, [3, 3]) - self.assertEquals(len(variables.get_variables()), 8) - self.assertEquals(len(variables.get_variables('Conv/BatchNorm')), 3) - self.assertEquals(len(variables.get_variables('Conv_1/BatchNorm')), 3) - - def testReuseConvWithBatchNorm(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 32), seed=1) - with scopes.arg_scope([ops.conv2d], batch_norm_params={'decay': 0.9}): - net = ops.conv2d(images, 32, [3, 3], scope='Conv') - net = ops.conv2d(net, 32, [3, 3], scope='Conv', reuse=True) - self.assertEquals(len(variables.get_variables()), 4) - self.assertEquals(len(variables.get_variables('Conv/BatchNorm')), 3) - self.assertEquals(len(variables.get_variables('Conv_1/BatchNorm')), 0) - - -class FCTest(tf.test.TestCase): - - def testCreateFC(self): - height, width = 3, 3 - with self.test_session(): - inputs = tf.random_uniform((5, height * width * 3), seed=1) - output = ops.fc(inputs, 32) - self.assertEquals(output.op.name, 'FC/Relu') - self.assertListEqual(output.get_shape().as_list(), [5, 32]) - - def testCreateFCWithScope(self): - height, width = 3, 3 - with self.test_session(): - inputs = tf.random_uniform((5, height * width * 3), seed=1) - output = ops.fc(inputs, 32, scope='fc1') - self.assertEquals(output.op.name, 'fc1/Relu') - - def testCreateFcCreatesWeightsAndBiasesVars(self): - height, width = 3, 3 - inputs = tf.random_uniform((5, height * width * 3), seed=1) - with self.test_session(): - self.assertFalse(variables.get_variables('fc1/weights')) - self.assertFalse(variables.get_variables('fc1/biases')) - ops.fc(inputs, 32, scope='fc1') - self.assertTrue(variables.get_variables('fc1/weights')) - self.assertTrue(variables.get_variables('fc1/biases')) - - def testReuseVars(self): - height, width = 3, 3 - inputs = tf.random_uniform((5, height * width * 3), seed=1) - with self.test_session(): - ops.fc(inputs, 32, scope='fc1') - self.assertEquals(len(variables.get_variables('fc1')), 2) - ops.fc(inputs, 32, scope='fc1', reuse=True) - self.assertEquals(len(variables.get_variables('fc1')), 2) - - def testNonReuseVars(self): - height, width = 3, 3 - inputs = tf.random_uniform((5, height * width * 3), seed=1) - with self.test_session(): - ops.fc(inputs, 32) - self.assertEquals(len(variables.get_variables('FC')), 2) - ops.fc(inputs, 32) - self.assertEquals(len(variables.get_variables('FC')), 4) - - def testCreateFCWithoutActivation(self): - height, width = 3, 3 - with self.test_session(): - inputs = tf.random_uniform((5, height * width * 3), seed=1) - output = ops.fc(inputs, 32, activation=None) - self.assertEquals(output.op.name, 'FC/xw_plus_b') - - def testCreateFCWithWD(self): - height, width = 3, 3 - with self.test_session() as sess: - inputs = tf.random_uniform((5, height * width * 3), seed=1) - ops.fc(inputs, 32, weight_decay=0.01) - wd = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)[0] - self.assertEquals(wd.op.name, - 'FC/weights/Regularizer/L2Regularizer/value') - sess.run(tf.global_variables_initializer()) - self.assertTrue(sess.run(wd) <= 0.01) - - def testCreateFCWithoutWD(self): - height, width = 3, 3 - with self.test_session(): - inputs = tf.random_uniform((5, height * width * 3), seed=1) - ops.fc(inputs, 32, weight_decay=0) - self.assertEquals( - tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), []) - - def testReuseFCWithWD(self): - height, width = 3, 3 - with self.test_session(): - inputs = tf.random_uniform((5, height * width * 3), seed=1) - ops.fc(inputs, 32, weight_decay=0.01, scope='fc') - self.assertEquals(len(variables.get_variables()), 2) - self.assertEquals( - len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), 1) - ops.fc(inputs, 32, weight_decay=0.01, scope='fc', reuse=True) - self.assertEquals(len(variables.get_variables()), 2) - self.assertEquals( - len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), 1) - - def testFCWithBatchNorm(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height * width * 3), seed=1) - with scopes.arg_scope([ops.fc], batch_norm_params={}): - net = ops.fc(images, 27) - net = ops.fc(net, 27) - self.assertEquals(len(variables.get_variables()), 8) - self.assertEquals(len(variables.get_variables('FC/BatchNorm')), 3) - self.assertEquals(len(variables.get_variables('FC_1/BatchNorm')), 3) - - def testReuseFCWithBatchNorm(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height * width * 3), seed=1) - with scopes.arg_scope([ops.fc], batch_norm_params={'decay': 0.9}): - net = ops.fc(images, 27, scope='fc1') - net = ops.fc(net, 27, scope='fc1', reuse=True) - self.assertEquals(len(variables.get_variables()), 4) - self.assertEquals(len(variables.get_variables('fc1/BatchNorm')), 3) - - -class MaxPoolTest(tf.test.TestCase): - - def testCreateMaxPool(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.max_pool(images, [3, 3]) - self.assertEquals(output.op.name, 'MaxPool/MaxPool') - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3]) - - def testCreateSquareMaxPool(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.max_pool(images, 3) - self.assertEquals(output.op.name, 'MaxPool/MaxPool') - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3]) - - def testCreateMaxPoolWithScope(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.max_pool(images, [3, 3], scope='pool1') - self.assertEquals(output.op.name, 'pool1/MaxPool') - - def testCreateMaxPoolSAME(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.max_pool(images, [3, 3], padding='SAME') - self.assertListEqual(output.get_shape().as_list(), [5, 2, 2, 3]) - - def testCreateMaxPoolStrideSAME(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.max_pool(images, [3, 3], stride=1, padding='SAME') - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3]) - - def testGlobalMaxPool(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.max_pool(images, images.get_shape()[1:3], stride=1) - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3]) - - -class AvgPoolTest(tf.test.TestCase): - - def testCreateAvgPool(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.avg_pool(images, [3, 3]) - self.assertEquals(output.op.name, 'AvgPool/AvgPool') - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3]) - - def testCreateSquareAvgPool(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.avg_pool(images, 3) - self.assertEquals(output.op.name, 'AvgPool/AvgPool') - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3]) - - def testCreateAvgPoolWithScope(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.avg_pool(images, [3, 3], scope='pool1') - self.assertEquals(output.op.name, 'pool1/AvgPool') - - def testCreateAvgPoolSAME(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.avg_pool(images, [3, 3], padding='SAME') - self.assertListEqual(output.get_shape().as_list(), [5, 2, 2, 3]) - - def testCreateAvgPoolStrideSAME(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.avg_pool(images, [3, 3], stride=1, padding='SAME') - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3]) - - def testGlobalAvgPool(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.avg_pool(images, images.get_shape()[1:3], stride=1) - self.assertListEqual(output.get_shape().as_list(), [5, 1, 1, 3]) - - -class OneHotEncodingTest(tf.test.TestCase): - - def testOneHotEncodingCreate(self): - with self.test_session(): - labels = tf.constant([0, 1, 2]) - output = ops.one_hot_encoding(labels, num_classes=3) - self.assertEquals(output.op.name, 'OneHotEncoding/SparseToDense') - self.assertListEqual(output.get_shape().as_list(), [3, 3]) - - def testOneHotEncoding(self): - with self.test_session(): - labels = tf.constant([0, 1, 2]) - one_hot_labels = tf.constant([[1, 0, 0], - [0, 1, 0], - [0, 0, 1]]) - output = ops.one_hot_encoding(labels, num_classes=3) - self.assertAllClose(output.eval(), one_hot_labels.eval()) - - -class DropoutTest(tf.test.TestCase): - - def testCreateDropout(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.dropout(images) - self.assertEquals(output.op.name, 'Dropout/dropout/mul') - output.get_shape().assert_is_compatible_with(images.get_shape()) - - def testCreateDropoutNoTraining(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1, name='images') - output = ops.dropout(images, is_training=False) - self.assertEquals(output, images) - - -class FlattenTest(tf.test.TestCase): - - def testFlatten4D(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1, name='images') - output = ops.flatten(images) - self.assertEquals(output.get_shape().num_elements(), - images.get_shape().num_elements()) - self.assertEqual(output.get_shape()[0], images.get_shape()[0]) - - def testFlatten3D(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width), seed=1, name='images') - output = ops.flatten(images) - self.assertEquals(output.get_shape().num_elements(), - images.get_shape().num_elements()) - self.assertEqual(output.get_shape()[0], images.get_shape()[0]) - - def testFlattenBatchSize(self): - height, width = 3, 3 - with self.test_session() as sess: - images = tf.random_uniform((5, height, width, 3), seed=1, name='images') - inputs = tf.placeholder(tf.int32, (None, height, width, 3)) - output = ops.flatten(inputs) - self.assertEquals(output.get_shape().as_list(), - [None, height * width * 3]) - output = sess.run(output, {inputs: images.eval()}) - self.assertEquals(output.size, - images.get_shape().num_elements()) - self.assertEqual(output.shape[0], images.get_shape()[0]) - - -class BatchNormTest(tf.test.TestCase): - - def testCreateOp(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - output = ops.batch_norm(images) - self.assertTrue(output.op.name.startswith('BatchNorm/batchnorm')) - self.assertListEqual(output.get_shape().as_list(), [5, height, width, 3]) - - def testCreateVariables(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images) - beta = variables.get_variables_by_name('beta')[0] - self.assertEquals(beta.op.name, 'BatchNorm/beta') - gamma = variables.get_variables_by_name('gamma') - self.assertEquals(gamma, []) - moving_mean = tf.moving_average_variables()[0] - moving_variance = tf.moving_average_variables()[1] - self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean') - self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance') - - def testCreateVariablesWithScale(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images, scale=True) - beta = variables.get_variables_by_name('beta')[0] - gamma = variables.get_variables_by_name('gamma')[0] - self.assertEquals(beta.op.name, 'BatchNorm/beta') - self.assertEquals(gamma.op.name, 'BatchNorm/gamma') - moving_mean = tf.moving_average_variables()[0] - moving_variance = tf.moving_average_variables()[1] - self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean') - self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance') - - def testCreateVariablesWithoutCenterWithScale(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images, center=False, scale=True) - beta = variables.get_variables_by_name('beta') - self.assertEquals(beta, []) - gamma = variables.get_variables_by_name('gamma')[0] - self.assertEquals(gamma.op.name, 'BatchNorm/gamma') - moving_mean = tf.moving_average_variables()[0] - moving_variance = tf.moving_average_variables()[1] - self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean') - self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance') - - def testCreateVariablesWithoutCenterWithoutScale(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images, center=False, scale=False) - beta = variables.get_variables_by_name('beta') - self.assertEquals(beta, []) - gamma = variables.get_variables_by_name('gamma') - self.assertEquals(gamma, []) - moving_mean = tf.moving_average_variables()[0] - moving_variance = tf.moving_average_variables()[1] - self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean') - self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance') - - def testMovingAverageVariables(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images, scale=True) - moving_mean = tf.moving_average_variables()[0] - moving_variance = tf.moving_average_variables()[1] - self.assertEquals(moving_mean.op.name, 'BatchNorm/moving_mean') - self.assertEquals(moving_variance.op.name, 'BatchNorm/moving_variance') - - def testUpdateOps(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images) - update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION) - update_moving_mean = update_ops[0] - update_moving_variance = update_ops[1] - self.assertEquals(update_moving_mean.op.name, - 'BatchNorm/AssignMovingAvg') - self.assertEquals(update_moving_variance.op.name, - 'BatchNorm/AssignMovingAvg_1') - - def testReuseVariables(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images, scale=True, scope='bn') - ops.batch_norm(images, scale=True, scope='bn', reuse=True) - beta = variables.get_variables_by_name('beta') - gamma = variables.get_variables_by_name('gamma') - self.assertEquals(len(beta), 1) - self.assertEquals(len(gamma), 1) - moving_vars = tf.get_collection('moving_vars') - self.assertEquals(len(moving_vars), 2) - - def testReuseUpdateOps(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - ops.batch_norm(images, scope='bn') - self.assertEquals(len(tf.get_collection(ops.UPDATE_OPS_COLLECTION)), 2) - ops.batch_norm(images, scope='bn', reuse=True) - self.assertEquals(len(tf.get_collection(ops.UPDATE_OPS_COLLECTION)), 4) - - def testCreateMovingVars(self): - height, width = 3, 3 - with self.test_session(): - images = tf.random_uniform((5, height, width, 3), seed=1) - _ = ops.batch_norm(images, moving_vars='moving_vars') - moving_mean = tf.get_collection('moving_vars', - 'BatchNorm/moving_mean') - self.assertEquals(len(moving_mean), 1) - self.assertEquals(moving_mean[0].op.name, 'BatchNorm/moving_mean') - moving_variance = tf.get_collection('moving_vars', - 'BatchNorm/moving_variance') - self.assertEquals(len(moving_variance), 1) - self.assertEquals(moving_variance[0].op.name, 'BatchNorm/moving_variance') - - def testComputeMovingVars(self): - height, width = 3, 3 - with self.test_session() as sess: - image_shape = (10, height, width, 3) - image_values = np.random.rand(*image_shape) - expected_mean = np.mean(image_values, axis=(0, 1, 2)) - expected_var = np.var(image_values, axis=(0, 1, 2)) - images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) - output = ops.batch_norm(images, decay=0.1) - update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION) - with tf.control_dependencies(update_ops): - output = tf.identity(output) - # Initialize all variables - sess.run(tf.global_variables_initializer()) - moving_mean = variables.get_variables('BatchNorm/moving_mean')[0] - moving_variance = variables.get_variables('BatchNorm/moving_variance')[0] - mean, variance = sess.run([moving_mean, moving_variance]) - # After initialization moving_mean == 0 and moving_variance == 1. - self.assertAllClose(mean, [0] * 3) - self.assertAllClose(variance, [1] * 3) - for _ in range(10): - sess.run([output]) - mean = moving_mean.eval() - variance = moving_variance.eval() - # After 10 updates with decay 0.1 moving_mean == expected_mean and - # moving_variance == expected_var. - self.assertAllClose(mean, expected_mean) - self.assertAllClose(variance, expected_var) - - def testEvalMovingVars(self): - height, width = 3, 3 - with self.test_session() as sess: - image_shape = (10, height, width, 3) - image_values = np.random.rand(*image_shape) - expected_mean = np.mean(image_values, axis=(0, 1, 2)) - expected_var = np.var(image_values, axis=(0, 1, 2)) - images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) - output = ops.batch_norm(images, decay=0.1, is_training=False) - update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION) - with tf.control_dependencies(update_ops): - output = tf.identity(output) - # Initialize all variables - sess.run(tf.global_variables_initializer()) - moving_mean = variables.get_variables('BatchNorm/moving_mean')[0] - moving_variance = variables.get_variables('BatchNorm/moving_variance')[0] - mean, variance = sess.run([moving_mean, moving_variance]) - # After initialization moving_mean == 0 and moving_variance == 1. - self.assertAllClose(mean, [0] * 3) - self.assertAllClose(variance, [1] * 3) - # Simulate assigment from saver restore. - init_assigns = [tf.assign(moving_mean, expected_mean), - tf.assign(moving_variance, expected_var)] - sess.run(init_assigns) - for _ in range(10): - sess.run([output], {images: np.random.rand(*image_shape)}) - mean = moving_mean.eval() - variance = moving_variance.eval() - # Although we feed different images, the moving_mean and moving_variance - # shouldn't change. - self.assertAllClose(mean, expected_mean) - self.assertAllClose(variance, expected_var) - - def testReuseVars(self): - height, width = 3, 3 - with self.test_session() as sess: - image_shape = (10, height, width, 3) - image_values = np.random.rand(*image_shape) - expected_mean = np.mean(image_values, axis=(0, 1, 2)) - expected_var = np.var(image_values, axis=(0, 1, 2)) - images = tf.constant(image_values, shape=image_shape, dtype=tf.float32) - output = ops.batch_norm(images, decay=0.1, is_training=False) - update_ops = tf.get_collection(ops.UPDATE_OPS_COLLECTION) - with tf.control_dependencies(update_ops): - output = tf.identity(output) - # Initialize all variables - sess.run(tf.global_variables_initializer()) - moving_mean = variables.get_variables('BatchNorm/moving_mean')[0] - moving_variance = variables.get_variables('BatchNorm/moving_variance')[0] - mean, variance = sess.run([moving_mean, moving_variance]) - # After initialization moving_mean == 0 and moving_variance == 1. - self.assertAllClose(mean, [0] * 3) - self.assertAllClose(variance, [1] * 3) - # Simulate assigment from saver restore. - init_assigns = [tf.assign(moving_mean, expected_mean), - tf.assign(moving_variance, expected_var)] - sess.run(init_assigns) - for _ in range(10): - sess.run([output], {images: np.random.rand(*image_shape)}) - mean = moving_mean.eval() - variance = moving_variance.eval() - # Although we feed different images, the moving_mean and moving_variance - # shouldn't change. - self.assertAllClose(mean, expected_mean) - self.assertAllClose(variance, expected_var) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/inception/inception/slim/scopes.py b/research/inception/inception/slim/scopes.py deleted file mode 100644 index 2c2fb0a2efa7d30eaddb36fc30265f30cbaeb9ef..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/scopes.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the new arg_scope used for TF-Slim ops. - - Allows one to define models much more compactly by eliminating boilerplate - code. This is accomplished through the use of argument scoping (arg_scope). - - Example of how to use scopes.arg_scope: - - with scopes.arg_scope(ops.conv2d, padding='SAME', - stddev=0.01, weight_decay=0.0005): - net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') - net = ops.conv2d(net, 256, [5, 5], scope='conv2') - - The first call to conv2d will overwrite padding: - ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', - stddev=0.01, weight_decay=0.0005, scope='conv1') - - The second call to Conv will use predefined args: - ops.conv2d(inputs, 256, [5, 5], padding='SAME', - stddev=0.01, weight_decay=0.0005, scope='conv2') - - Example of how to reuse an arg_scope: - with scopes.arg_scope(ops.conv2d, padding='SAME', - stddev=0.01, weight_decay=0.0005) as conv2d_arg_scope: - net = ops.conv2d(net, 256, [5, 5], scope='conv1') - .... - - with scopes.arg_scope(conv2d_arg_scope): - net = ops.conv2d(net, 256, [5, 5], scope='conv2') - - Example of how to use scopes.add_arg_scope: - - @scopes.add_arg_scope - def conv2d(*args, **kwargs) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import functools - -from tensorflow.python.framework import ops - -_ARGSTACK_KEY = ("__arg_stack",) - -_DECORATED_OPS = set() - - -def _get_arg_stack(): - stack = ops.get_collection(_ARGSTACK_KEY) - if stack: - return stack[0] - else: - stack = [{}] - ops.add_to_collection(_ARGSTACK_KEY, stack) - return stack - - -def _current_arg_scope(): - stack = _get_arg_stack() - return stack[-1] - - -def _add_op(op): - key_op = (op.__module__, op.__name__) - if key_op not in _DECORATED_OPS: - _DECORATED_OPS.add(key_op) - - -@contextlib.contextmanager -def arg_scope(list_ops_or_scope, **kwargs): - """Stores the default arguments for the given set of list_ops. - - For usage, please see examples at top of the file. - - Args: - list_ops_or_scope: List or tuple of operations to set argument scope for or - a dictionary containg the current scope. When list_ops_or_scope is a dict, - kwargs must be empty. When list_ops_or_scope is a list or tuple, then - every op in it need to be decorated with @add_arg_scope to work. - **kwargs: keyword=value that will define the defaults for each op in - list_ops. All the ops need to accept the given set of arguments. - - Yields: - the current_scope, which is a dictionary of {op: {arg: value}} - Raises: - TypeError: if list_ops is not a list or a tuple. - ValueError: if any op in list_ops has not be decorated with @add_arg_scope. - """ - if isinstance(list_ops_or_scope, dict): - # Assumes that list_ops_or_scope is a scope that is being reused. - if kwargs: - raise ValueError("When attempting to re-use a scope by suppling a" - "dictionary, kwargs must be empty.") - current_scope = list_ops_or_scope.copy() - try: - _get_arg_stack().append(current_scope) - yield current_scope - finally: - _get_arg_stack().pop() - else: - # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs. - if not isinstance(list_ops_or_scope, (list, tuple)): - raise TypeError("list_ops_or_scope must either be a list/tuple or reused" - "scope (i.e. dict)") - try: - current_scope = _current_arg_scope().copy() - for op in list_ops_or_scope: - key_op = (op.__module__, op.__name__) - if not has_arg_scope(op): - raise ValueError("%s is not decorated with @add_arg_scope", key_op) - if key_op in current_scope: - current_kwargs = current_scope[key_op].copy() - current_kwargs.update(kwargs) - current_scope[key_op] = current_kwargs - else: - current_scope[key_op] = kwargs.copy() - _get_arg_stack().append(current_scope) - yield current_scope - finally: - _get_arg_stack().pop() - - -def add_arg_scope(func): - """Decorates a function with args so it can be used within an arg_scope. - - Args: - func: function to decorate. - - Returns: - A tuple with the decorated function func_with_args(). - """ - @functools.wraps(func) - def func_with_args(*args, **kwargs): - current_scope = _current_arg_scope() - current_args = kwargs - key_func = (func.__module__, func.__name__) - if key_func in current_scope: - current_args = current_scope[key_func].copy() - current_args.update(kwargs) - return func(*args, **current_args) - _add_op(func) - return func_with_args - - -def has_arg_scope(func): - """Checks whether a func has been decorated with @add_arg_scope or not. - - Args: - func: function to check. - - Returns: - a boolean. - """ - key_op = (func.__module__, func.__name__) - return key_op in _DECORATED_OPS diff --git a/research/inception/inception/slim/scopes_test.py b/research/inception/inception/slim/scopes_test.py deleted file mode 100644 index cd349399ed7300dde38ac9bcb9818abc9d0680b4..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/scopes_test.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests slim.scopes.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf -from inception.slim import scopes - - -@scopes.add_arg_scope -def func1(*args, **kwargs): - return (args, kwargs) - - -@scopes.add_arg_scope -def func2(*args, **kwargs): - return (args, kwargs) - - -class ArgScopeTest(tf.test.TestCase): - - def testEmptyArgScope(self): - with self.test_session(): - self.assertEqual(scopes._current_arg_scope(), {}) - - def testCurrentArgScope(self): - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - key_op = (func1.__module__, func1.__name__) - current_scope = {key_op: func1_kwargs.copy()} - with self.test_session(): - with scopes.arg_scope([func1], a=1, b=None, c=[1]) as scope: - self.assertDictEqual(scope, current_scope) - - def testCurrentArgScopeNested(self): - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - func2_kwargs = {'b': 2, 'd': [2]} - key = lambda f: (f.__module__, f.__name__) - current_scope = {key(func1): func1_kwargs.copy(), - key(func2): func2_kwargs.copy()} - with self.test_session(): - with scopes.arg_scope([func1], a=1, b=None, c=[1]): - with scopes.arg_scope([func2], b=2, d=[2]) as scope: - self.assertDictEqual(scope, current_scope) - - def testReuseArgScope(self): - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - key_op = (func1.__module__, func1.__name__) - current_scope = {key_op: func1_kwargs.copy()} - with self.test_session(): - with scopes.arg_scope([func1], a=1, b=None, c=[1]) as scope1: - pass - with scopes.arg_scope(scope1) as scope: - self.assertDictEqual(scope, current_scope) - - def testReuseArgScopeNested(self): - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - func2_kwargs = {'b': 2, 'd': [2]} - key = lambda f: (f.__module__, f.__name__) - current_scope1 = {key(func1): func1_kwargs.copy()} - current_scope2 = {key(func1): func1_kwargs.copy(), - key(func2): func2_kwargs.copy()} - with self.test_session(): - with scopes.arg_scope([func1], a=1, b=None, c=[1]) as scope1: - with scopes.arg_scope([func2], b=2, d=[2]) as scope2: - pass - with scopes.arg_scope(scope1): - self.assertDictEqual(scopes._current_arg_scope(), current_scope1) - with scopes.arg_scope(scope2): - self.assertDictEqual(scopes._current_arg_scope(), current_scope2) - - def testSimpleArgScope(self): - func1_args = (0,) - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - with self.test_session(): - with scopes.arg_scope([func1], a=1, b=None, c=[1]): - args, kwargs = func1(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - - def testSimpleArgScopeWithTuple(self): - func1_args = (0,) - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - with self.test_session(): - with scopes.arg_scope((func1,), a=1, b=None, c=[1]): - args, kwargs = func1(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - - def testOverwriteArgScope(self): - func1_args = (0,) - func1_kwargs = {'a': 1, 'b': 2, 'c': [1]} - with scopes.arg_scope([func1], a=1, b=None, c=[1]): - args, kwargs = func1(0, b=2) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - - def testNestedArgScope(self): - func1_args = (0,) - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - with scopes.arg_scope([func1], a=1, b=None, c=[1]): - args, kwargs = func1(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - func1_kwargs['b'] = 2 - with scopes.arg_scope([func1], b=2): - args, kwargs = func1(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - - def testSharedArgScope(self): - func1_args = (0,) - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - with scopes.arg_scope([func1, func2], a=1, b=None, c=[1]): - args, kwargs = func1(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - args, kwargs = func2(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - - def testSharedArgScopeTuple(self): - func1_args = (0,) - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - with scopes.arg_scope((func1, func2), a=1, b=None, c=[1]): - args, kwargs = func1(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - args, kwargs = func2(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - - def testPartiallySharedArgScope(self): - func1_args = (0,) - func1_kwargs = {'a': 1, 'b': None, 'c': [1]} - func2_args = (1,) - func2_kwargs = {'a': 1, 'b': None, 'd': [2]} - with scopes.arg_scope([func1, func2], a=1, b=None): - with scopes.arg_scope([func1], c=[1]), scopes.arg_scope([func2], d=[2]): - args, kwargs = func1(0) - self.assertTupleEqual(args, func1_args) - self.assertDictEqual(kwargs, func1_kwargs) - args, kwargs = func2(1) - self.assertTupleEqual(args, func2_args) - self.assertDictEqual(kwargs, func2_kwargs) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/inception/inception/slim/slim.py b/research/inception/inception/slim/slim.py deleted file mode 100644 index b7a5c0f8c52b66db899835480c331ffafdc386e2..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/slim.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""TF-Slim grouped API. Please see README.md for details and usage.""" -# pylint: disable=unused-import - -# Collapse tf-slim into a single namespace. -from inception.slim import inception_model as inception -from inception.slim import losses -from inception.slim import ops -from inception.slim import scopes -from inception.slim import variables -from inception.slim.scopes import arg_scope diff --git a/research/inception/inception/slim/variables.py b/research/inception/inception/slim/variables.py deleted file mode 100644 index 1d967b79e9563724b1114995a732cfd4dd486afd..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/variables.py +++ /dev/null @@ -1,289 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains convenience wrappers for creating variables in TF-Slim. - -The variables module is typically used for defining model variables from the -ops routines (see slim.ops). Such variables are used for training, evaluation -and inference of models. - -All the variables created through this module would be added to the -MODEL_VARIABLES collection, if you create a model variable outside slim, it can -be added with slim.variables.add_variable(external_variable, reuse). - -Usage: - weights_initializer = tf.truncated_normal_initializer(stddev=0.01) - l2_regularizer = lambda t: losses.l2_loss(t, weight=0.0005) - weights = variables.variable('weights', - shape=[100, 100], - initializer=weights_initializer, - regularizer=l2_regularizer, - device='/cpu:0') - - biases = variables.variable('biases', - shape=[100], - initializer=tf.zeros_initializer(), - device='/cpu:0') - - # More complex example. - - net = slim.ops.conv2d(input, 32, [3, 3], scope='conv1') - net = slim.ops.conv2d(net, 64, [3, 3], scope='conv2') - with slim.arg_scope([variables.variable], restore=False): - net = slim.ops.conv2d(net, 64, [3, 3], scope='conv3') - - # Get all model variables from all the layers. - model_variables = slim.variables.get_variables() - - # Get all model variables from a specific the layer, i.e 'conv1'. - conv1_variables = slim.variables.get_variables('conv1') - - # Get all weights from all the layers. - weights = slim.variables.get_variables_by_name('weights') - - # Get all bias from all the layers. - biases = slim.variables.get_variables_by_name('biases') - - # Get all variables to restore. - # (i.e. only those created by 'conv1' and 'conv2') - variables_to_restore = slim.variables.get_variables_to_restore() - -************************************************ -* Initializing model variables from a checkpoint -************************************************ - -# Create some variables. -v1 = slim.variables.variable(name="v1", ..., restore=False) -v2 = slim.variables.variable(name="v2", ...) # By default restore=True -... -# The list of variables to restore should only contain 'v2'. -variables_to_restore = slim.variables.get_variables_to_restore() -restorer = tf.train.Saver(variables_to_restore) -with tf.Session() as sess: - # Restore variables from disk. - restorer.restore(sess, "/tmp/model.ckpt") - print("Model restored.") - # Do some work with the model - ... - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from inception.slim import scopes - -# Collection containing all the variables created using slim.variables -MODEL_VARIABLES = '_model_variables_' - -# Collection containing the slim.variables that are created with restore=True. -VARIABLES_TO_RESTORE = '_variables_to_restore_' - - -def add_variable(var, restore=True): - """Adds a variable to the MODEL_VARIABLES collection. - - Optionally it will add the variable to the VARIABLES_TO_RESTORE collection. - Args: - var: a variable. - restore: whether the variable should be added to the - VARIABLES_TO_RESTORE collection. - - """ - collections = [MODEL_VARIABLES] - if restore: - collections.append(VARIABLES_TO_RESTORE) - for collection in collections: - if var not in tf.get_collection(collection): - tf.add_to_collection(collection, var) - - -def get_variables(scope=None, suffix=None): - """Gets the list of variables, filtered by scope and/or suffix. - - Args: - scope: an optional scope for filtering the variables to return. - suffix: an optional suffix for filtering the variables to return. - - Returns: - a copied list of variables with scope and suffix. - """ - candidates = tf.get_collection(MODEL_VARIABLES, scope)[:] - if suffix is not None: - candidates = [var for var in candidates if var.op.name.endswith(suffix)] - return candidates - - -def get_variables_to_restore(): - """Gets the list of variables to restore. - - Returns: - a copied list of variables. - """ - return tf.get_collection(VARIABLES_TO_RESTORE)[:] - - -def get_variables_by_name(given_name, scope=None): - """Gets the list of variables that were given that name. - - Args: - given_name: name given to the variable without scope. - scope: an optional scope for filtering the variables to return. - - Returns: - a copied list of variables with the given name and prefix. - """ - return get_variables(scope=scope, suffix=given_name) - - -def get_unique_variable(name): - """Gets the variable uniquely identified by that name. - - Args: - name: a name that uniquely identifies the variable. - - Returns: - a tensorflow variable. - - Raises: - ValueError: if no variable uniquely identified by the name exists. - """ - candidates = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, name) - if not candidates: - raise ValueError('Couldnt find variable %s' % name) - - for candidate in candidates: - if candidate.op.name == name: - return candidate - raise ValueError('Variable %s does not uniquely identify a variable', name) - - -class VariableDeviceChooser(object): - """Slim device chooser for variables. - - When using a parameter server it will assign them in a round-robin fashion. - When not using a parameter server it allows GPU:0 placement otherwise CPU:0. - """ - - def __init__(self, - num_parameter_servers=0, - ps_device='/job:ps', - placement='CPU:0'): - """Initialize VariableDeviceChooser. - - Args: - num_parameter_servers: number of parameter servers. - ps_device: string representing the parameter server device. - placement: string representing the placement of the variable either CPU:0 - or GPU:0. When using parameter servers forced to CPU:0. - """ - self._num_ps = num_parameter_servers - self._ps_device = ps_device - self._placement = placement if num_parameter_servers == 0 else 'CPU:0' - self._next_task_id = 0 - - def __call__(self, op): - device_string = '' - if self._num_ps > 0: - task_id = self._next_task_id - self._next_task_id = (self._next_task_id + 1) % self._num_ps - device_string = '%s/task:%d' % (self._ps_device, task_id) - device_string += '/%s' % self._placement - return device_string - - -# TODO(sguada) Remove once get_variable is able to colocate op.devices. -def variable_device(device, name): - """Fix the variable device to colocate its ops.""" - if callable(device): - var_name = tf.get_variable_scope().name + '/' + name - var_def = tf.NodeDef(name=var_name, op='Variable') - device = device(var_def) - if device is None: - device = '' - return device - - -@scopes.add_arg_scope -def global_step(device=''): - """Returns the global step variable. - - Args: - device: Optional device to place the variable. It can be an string or a - function that is called to get the device for the variable. - - Returns: - the tensor representing the global step variable. - """ - global_step_ref = tf.get_collection(tf.GraphKeys.GLOBAL_STEP) - if global_step_ref: - return global_step_ref[0] - else: - collections = [ - VARIABLES_TO_RESTORE, - tf.GraphKeys.GLOBAL_VARIABLES, - tf.GraphKeys.GLOBAL_STEP, - ] - # Get the device for the variable. - with tf.device(variable_device(device, 'global_step')): - return tf.get_variable('global_step', shape=[], dtype=tf.int64, - initializer=tf.zeros_initializer(), - trainable=False, collections=collections) - - -@scopes.add_arg_scope -def variable(name, shape=None, dtype=tf.float32, initializer=None, - regularizer=None, trainable=True, collections=None, device='', - restore=True): - """Gets an existing variable with these parameters or creates a new one. - - It also add itself to a group with its name. - - Args: - name: the name of the new or existing variable. - shape: shape of the new or existing variable. - dtype: type of the new or existing variable (defaults to `DT_FLOAT`). - initializer: initializer for the variable if one is created. - regularizer: a (Tensor -> Tensor or None) function; the result of - applying it on a newly created variable will be added to the collection - GraphKeys.REGULARIZATION_LOSSES and can be used for regularization. - trainable: If `True` also add the variable to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). - collections: A list of collection names to which the Variable will be added. - Note that the variable is always also added to the tf.GraphKeys.GLOBAL_VARIABLES - and MODEL_VARIABLES collections. - device: Optional device to place the variable. It can be an string or a - function that is called to get the device for the variable. - restore: whether the variable should be added to the - VARIABLES_TO_RESTORE collection. - - Returns: - The created or existing variable. - """ - collections = list(collections or []) - - # Make sure variables are added to tf.GraphKeys.GLOBAL_VARIABLES and MODEL_VARIABLES - collections += [tf.GraphKeys.GLOBAL_VARIABLES, MODEL_VARIABLES] - # Add to VARIABLES_TO_RESTORE if necessary - if restore: - collections.append(VARIABLES_TO_RESTORE) - # Remove duplicates - collections = set(collections) - # Get the device for the variable. - with tf.device(variable_device(device, name)): - return tf.get_variable(name, shape=shape, dtype=dtype, - initializer=initializer, regularizer=regularizer, - trainable=trainable, collections=collections) diff --git a/research/inception/inception/slim/variables_test.py b/research/inception/inception/slim/variables_test.py deleted file mode 100644 index b8c1944dfeb0fba7ad99f104b0c366c41d737c63..0000000000000000000000000000000000000000 --- a/research/inception/inception/slim/variables_test.py +++ /dev/null @@ -1,392 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.variables.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from inception.slim import scopes -from inception.slim import variables - - -class VariablesTest(tf.test.TestCase): - - def testCreateVariable(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5]) - self.assertEquals(a.op.name, 'A/a') - self.assertListEqual(a.get_shape().as_list(), [5]) - - def testGetVariables(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5]) - with tf.variable_scope('B'): - b = variables.variable('a', [5]) - self.assertEquals([a, b], variables.get_variables()) - self.assertEquals([a], variables.get_variables('A')) - self.assertEquals([b], variables.get_variables('B')) - - def testGetVariablesSuffix(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5]) - with tf.variable_scope('A'): - b = variables.variable('b', [5]) - self.assertEquals([a], variables.get_variables(suffix='a')) - self.assertEquals([b], variables.get_variables(suffix='b')) - - def testGetVariableWithSingleVar(self): - with self.test_session(): - with tf.variable_scope('parent'): - a = variables.variable('child', [5]) - self.assertEquals(a, variables.get_unique_variable('parent/child')) - - def testGetVariableWithDistractors(self): - with self.test_session(): - with tf.variable_scope('parent'): - a = variables.variable('child', [5]) - with tf.variable_scope('child'): - variables.variable('grandchild1', [7]) - variables.variable('grandchild2', [9]) - self.assertEquals(a, variables.get_unique_variable('parent/child')) - - def testGetVariableThrowsExceptionWithNoMatch(self): - var_name = 'cant_find_me' - with self.test_session(): - with self.assertRaises(ValueError): - variables.get_unique_variable(var_name) - - def testGetThrowsExceptionWithChildrenButNoMatch(self): - var_name = 'parent/child' - with self.test_session(): - with tf.variable_scope(var_name): - variables.variable('grandchild1', [7]) - variables.variable('grandchild2', [9]) - with self.assertRaises(ValueError): - variables.get_unique_variable(var_name) - - def testGetVariablesToRestore(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5]) - with tf.variable_scope('B'): - b = variables.variable('a', [5]) - self.assertEquals([a, b], variables.get_variables_to_restore()) - - def testNoneGetVariablesToRestore(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5], restore=False) - with tf.variable_scope('B'): - b = variables.variable('a', [5], restore=False) - self.assertEquals([], variables.get_variables_to_restore()) - self.assertEquals([a, b], variables.get_variables()) - - def testGetMixedVariablesToRestore(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5]) - b = variables.variable('b', [5], restore=False) - with tf.variable_scope('B'): - c = variables.variable('c', [5]) - d = variables.variable('d', [5], restore=False) - self.assertEquals([a, b, c, d], variables.get_variables()) - self.assertEquals([a, c], variables.get_variables_to_restore()) - - def testReuseVariable(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', []) - with tf.variable_scope('A', reuse=True): - b = variables.variable('a', []) - self.assertEquals(a, b) - self.assertListEqual([a], variables.get_variables()) - - def testVariableWithDevice(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [], device='cpu:0') - b = variables.variable('b', [], device='cpu:1') - self.assertDeviceEqual(a.device, 'cpu:0') - self.assertDeviceEqual(b.device, 'cpu:1') - - def testVariableWithDeviceFromScope(self): - with self.test_session(): - with tf.device('/cpu:0'): - a = variables.variable('a', []) - b = variables.variable('b', [], device='cpu:1') - self.assertDeviceEqual(a.device, 'cpu:0') - self.assertDeviceEqual(b.device, 'cpu:1') - - def testVariableWithDeviceFunction(self): - class DevFn(object): - - def __init__(self): - self.counter = -1 - - def __call__(self, op): - self.counter += 1 - return 'cpu:%d' % self.counter - - with self.test_session(): - with scopes.arg_scope([variables.variable], device=DevFn()): - a = variables.variable('a', []) - b = variables.variable('b', []) - c = variables.variable('c', [], device='cpu:12') - d = variables.variable('d', []) - with tf.device('cpu:99'): - e_init = tf.constant(12) - e = variables.variable('e', initializer=e_init) - self.assertDeviceEqual(a.device, 'cpu:0') - self.assertDeviceEqual(a.initial_value.device, 'cpu:0') - self.assertDeviceEqual(b.device, 'cpu:1') - self.assertDeviceEqual(b.initial_value.device, 'cpu:1') - self.assertDeviceEqual(c.device, 'cpu:12') - self.assertDeviceEqual(c.initial_value.device, 'cpu:12') - self.assertDeviceEqual(d.device, 'cpu:2') - self.assertDeviceEqual(d.initial_value.device, 'cpu:2') - self.assertDeviceEqual(e.device, 'cpu:3') - self.assertDeviceEqual(e.initial_value.device, 'cpu:99') - - def testVariableWithReplicaDeviceSetter(self): - with self.test_session(): - with tf.device(tf.train.replica_device_setter(ps_tasks=2)): - a = variables.variable('a', []) - b = variables.variable('b', []) - c = variables.variable('c', [], device='cpu:12') - d = variables.variable('d', []) - with tf.device('cpu:99'): - e_init = tf.constant(12) - e = variables.variable('e', initializer=e_init) - # The values below highlight how the replica_device_setter puts initial - # values on the worker job, and how it merges explicit devices. - self.assertDeviceEqual(a.device, '/job:ps/task:0/cpu:0') - self.assertDeviceEqual(a.initial_value.device, '/job:worker/cpu:0') - self.assertDeviceEqual(b.device, '/job:ps/task:1/cpu:0') - self.assertDeviceEqual(b.initial_value.device, '/job:worker/cpu:0') - self.assertDeviceEqual(c.device, '/job:ps/task:0/cpu:12') - self.assertDeviceEqual(c.initial_value.device, '/job:worker/cpu:12') - self.assertDeviceEqual(d.device, '/job:ps/task:1/cpu:0') - self.assertDeviceEqual(d.initial_value.device, '/job:worker/cpu:0') - self.assertDeviceEqual(e.device, '/job:ps/task:0/cpu:0') - self.assertDeviceEqual(e.initial_value.device, '/job:worker/cpu:99') - - def testVariableWithVariableDeviceChooser(self): - - with tf.Graph().as_default(): - device_fn = variables.VariableDeviceChooser(num_parameter_servers=2) - with scopes.arg_scope([variables.variable], device=device_fn): - a = variables.variable('a', []) - b = variables.variable('b', []) - c = variables.variable('c', [], device='cpu:12') - d = variables.variable('d', []) - with tf.device('cpu:99'): - e_init = tf.constant(12) - e = variables.variable('e', initializer=e_init) - # The values below highlight how the VariableDeviceChooser puts initial - # values on the same device as the variable job. - self.assertDeviceEqual(a.device, '/job:ps/task:0/cpu:0') - self.assertDeviceEqual(a.initial_value.device, a.device) - self.assertDeviceEqual(b.device, '/job:ps/task:1/cpu:0') - self.assertDeviceEqual(b.initial_value.device, b.device) - self.assertDeviceEqual(c.device, '/cpu:12') - self.assertDeviceEqual(c.initial_value.device, c.device) - self.assertDeviceEqual(d.device, '/job:ps/task:0/cpu:0') - self.assertDeviceEqual(d.initial_value.device, d.device) - self.assertDeviceEqual(e.device, '/job:ps/task:1/cpu:0') - self.assertDeviceEqual(e.initial_value.device, '/cpu:99') - - def testVariableGPUPlacement(self): - - with tf.Graph().as_default(): - device_fn = variables.VariableDeviceChooser(placement='gpu:0') - with scopes.arg_scope([variables.variable], device=device_fn): - a = variables.variable('a', []) - b = variables.variable('b', []) - c = variables.variable('c', [], device='cpu:12') - d = variables.variable('d', []) - with tf.device('cpu:99'): - e_init = tf.constant(12) - e = variables.variable('e', initializer=e_init) - # The values below highlight how the VariableDeviceChooser puts initial - # values on the same device as the variable job. - self.assertDeviceEqual(a.device, '/gpu:0') - self.assertDeviceEqual(a.initial_value.device, a.device) - self.assertDeviceEqual(b.device, '/gpu:0') - self.assertDeviceEqual(b.initial_value.device, b.device) - self.assertDeviceEqual(c.device, '/cpu:12') - self.assertDeviceEqual(c.initial_value.device, c.device) - self.assertDeviceEqual(d.device, '/gpu:0') - self.assertDeviceEqual(d.initial_value.device, d.device) - self.assertDeviceEqual(e.device, '/gpu:0') - self.assertDeviceEqual(e.initial_value.device, '/cpu:99') - - def testVariableCollection(self): - with self.test_session(): - a = variables.variable('a', [], collections='A') - b = variables.variable('b', [], collections='B') - self.assertEquals(a, tf.get_collection('A')[0]) - self.assertEquals(b, tf.get_collection('B')[0]) - - def testVariableCollections(self): - with self.test_session(): - a = variables.variable('a', [], collections=['A', 'C']) - b = variables.variable('b', [], collections=['B', 'C']) - self.assertEquals(a, tf.get_collection('A')[0]) - self.assertEquals(b, tf.get_collection('B')[0]) - - def testVariableCollectionsWithArgScope(self): - with self.test_session(): - with scopes.arg_scope([variables.variable], collections='A'): - a = variables.variable('a', []) - b = variables.variable('b', []) - self.assertListEqual([a, b], tf.get_collection('A')) - - def testVariableCollectionsWithArgScopeNested(self): - with self.test_session(): - with scopes.arg_scope([variables.variable], collections='A'): - a = variables.variable('a', []) - with scopes.arg_scope([variables.variable], collections='B'): - b = variables.variable('b', []) - self.assertEquals(a, tf.get_collection('A')[0]) - self.assertEquals(b, tf.get_collection('B')[0]) - - def testVariableCollectionsWithArgScopeNonNested(self): - with self.test_session(): - with scopes.arg_scope([variables.variable], collections='A'): - a = variables.variable('a', []) - with scopes.arg_scope([variables.variable], collections='B'): - b = variables.variable('b', []) - variables.variable('c', []) - self.assertListEqual([a], tf.get_collection('A')) - self.assertListEqual([b], tf.get_collection('B')) - - def testVariableRestoreWithArgScopeNested(self): - with self.test_session(): - with scopes.arg_scope([variables.variable], restore=True): - a = variables.variable('a', []) - with scopes.arg_scope([variables.variable], - trainable=False, - collections=['A', 'B']): - b = variables.variable('b', []) - c = variables.variable('c', []) - self.assertListEqual([a, b, c], variables.get_variables_to_restore()) - self.assertListEqual([a, c], tf.trainable_variables()) - self.assertListEqual([b], tf.get_collection('A')) - self.assertListEqual([b], tf.get_collection('B')) - - -class GetVariablesByNameTest(tf.test.TestCase): - - def testGetVariableGivenNameScoped(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5]) - b = variables.variable('b', [5]) - self.assertEquals([a], variables.get_variables_by_name('a')) - self.assertEquals([b], variables.get_variables_by_name('b')) - - def testGetVariablesByNameReturnsByValueWithScope(self): - with self.test_session(): - with tf.variable_scope('A'): - a = variables.variable('a', [5]) - matched_variables = variables.get_variables_by_name('a') - - # If variables.get_variables_by_name returns the list by reference, the - # following append should persist, and be returned, in subsequent calls - # to variables.get_variables_by_name('a'). - matched_variables.append(4) - - matched_variables = variables.get_variables_by_name('a') - self.assertEquals([a], matched_variables) - - def testGetVariablesByNameReturnsByValueWithoutScope(self): - with self.test_session(): - a = variables.variable('a', [5]) - matched_variables = variables.get_variables_by_name('a') - - # If variables.get_variables_by_name returns the list by reference, the - # following append should persist, and be returned, in subsequent calls - # to variables.get_variables_by_name('a'). - matched_variables.append(4) - - matched_variables = variables.get_variables_by_name('a') - self.assertEquals([a], matched_variables) - - -class GlobalStepTest(tf.test.TestCase): - - def testStable(self): - with tf.Graph().as_default(): - gs = variables.global_step() - gs2 = variables.global_step() - self.assertTrue(gs is gs2) - - def testDevice(self): - with tf.Graph().as_default(): - with scopes.arg_scope([variables.global_step], device='/gpu:0'): - gs = variables.global_step() - self.assertDeviceEqual(gs.device, '/gpu:0') - - def testDeviceFn(self): - class DevFn(object): - - def __init__(self): - self.counter = -1 - - def __call__(self, op): - self.counter += 1 - return '/cpu:%d' % self.counter - - with tf.Graph().as_default(): - with scopes.arg_scope([variables.global_step], device=DevFn()): - gs = variables.global_step() - gs2 = variables.global_step() - self.assertDeviceEqual(gs.device, '/cpu:0') - self.assertEquals(gs, gs2) - self.assertDeviceEqual(gs2.device, '/cpu:0') - - def testReplicaDeviceSetter(self): - device_fn = tf.train.replica_device_setter(2) - with tf.Graph().as_default(): - with scopes.arg_scope([variables.global_step], device=device_fn): - gs = variables.global_step() - gs2 = variables.global_step() - self.assertEquals(gs, gs2) - self.assertDeviceEqual(gs.device, '/job:ps/task:0') - self.assertDeviceEqual(gs.initial_value.device, '/job:ps/task:0') - self.assertDeviceEqual(gs2.device, '/job:ps/task:0') - self.assertDeviceEqual(gs2.initial_value.device, '/job:ps/task:0') - - def testVariableWithVariableDeviceChooser(self): - - with tf.Graph().as_default(): - device_fn = variables.VariableDeviceChooser() - with scopes.arg_scope([variables.global_step], device=device_fn): - gs = variables.global_step() - gs2 = variables.global_step() - self.assertEquals(gs, gs2) - self.assertDeviceEqual(gs.device, 'cpu:0') - self.assertDeviceEqual(gs.initial_value.device, gs.device) - self.assertDeviceEqual(gs2.device, 'cpu:0') - self.assertDeviceEqual(gs2.initial_value.device, gs2.device) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/keypointnet/CONTRIBUTING.md b/research/keypointnet/CONTRIBUTING.md deleted file mode 100644 index 939e5341e74dc2371c8b47f0e27b50581bed5f63..0000000000000000000000000000000000000000 --- a/research/keypointnet/CONTRIBUTING.md +++ /dev/null @@ -1,28 +0,0 @@ -# How to Contribute - -We'd love to accept your patches and contributions to this project. There are -just a few small guidelines you need to follow. - -## Contributor License Agreement - -Contributions to this project must be accompanied by a Contributor License -Agreement. You (or your employer) retain the copyright to your contribution; -this simply gives us permission to use and redistribute your contributions as -part of the project. Head over to to see -your current agreements on file or to sign a new one. - -You generally only need to submit a CLA once, so if you've already submitted one -(even if it was for a different project), you probably don't need to do it -again. - -## Code reviews - -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more -information on using pull requests. - -## Community Guidelines - -This project follows [Google's Open Source Community -Guidelines](https://opensource.google.com/conduct/). diff --git a/research/keypointnet/LICENSE b/research/keypointnet/LICENSE deleted file mode 100644 index d645695673349e3947e8e5ae42332d0ac3164cd7..0000000000000000000000000000000000000000 --- a/research/keypointnet/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/research/keypointnet/README.md b/research/keypointnet/README.md deleted file mode 100644 index 8de88ca5a18816984302a9c20639364a7c8cde53..0000000000000000000000000000000000000000 --- a/research/keypointnet/README.md +++ /dev/null @@ -1,46 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# KeypointNet -This is an implementation of the keypoint network proposed in "Discovery of -Latent 3D Keypoints via End-to-end Geometric Reasoning -[[pdf](https://arxiv.org/pdf/1807.03146.pdf)]". Given a single 2D image of a -known class, this network can predict a set of 3D keypoints that are consistent -across viewing angles of the same object and across object instances. These -keypoints and their detectors are discovered and learned automatically without -keypoint location supervision [[demo](https://keypointnet.github.io)]. - -## Datasets: - ShapeNet's rendering for - [Cars](https://storage.googleapis.com/discovery-3dkeypoints-data/cars_with_keypoints.zip), - [Planes](https://storage.googleapis.com/discovery-3dkeypoints-data/planes_with_keypoints.zip), - [Chairs](https://storage.googleapis.com/discovery-3dkeypoints-data/chairs_with_keypoints.zip). - - Each set contains: -1. tfrecords -2. train.txt, a list of tfrecords used for training. -2. dev.txt, a list of tfrecords used for validation. -3. test.txt, a list of tfrecords used for testing. -4. projection.txt, storing the global 4x4 camera projection matrix. -5. job.txt, storing ShapeNet's object IDs in each tfrecord. - -## Training: - Run `main.py --model_dir=MODEL_DIR --dset=DSET` - - where MODEL_DIR is a folder for storing model checkpoints: (see [tf.estimator](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator)), and DSET should point to the folder containing tfrecords (download above). - -## Inference: - Run `main.py --model_dir=MODEL_DIR --input=INPUT --predict` - - where MODEL_DIR is the model checkpoint folder, and INPUT is a folder containing png or jpeg test images. - We trained the network using the total batch size of 256 (8 x 32 replicas). You may have to tune the learning rate if your batch size is different. - -## Code credit: - Supasorn Suwajanakorn - -## Contact: - supasorn@gmail.com, [snavely,tompson,mnorouzi]@google.com - - -(This is not an officially supported Google product) diff --git a/research/keypointnet/main.py b/research/keypointnet/main.py deleted file mode 100644 index 04b30159404e01529c898ee75fb1ed78f705f539..0000000000000000000000000000000000000000 --- a/research/keypointnet/main.py +++ /dev/null @@ -1,697 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -"""KeypointNet!! - -A reimplementation of 'Discovery of Latent 3D Keypoints via End-to-end -Geometric Reasoning' keypoint network. Given a single 2D image of a known class, -this network can predict a set of 3D keypoints that are consistent across -viewing angles of the same object and across object instances. These keypoints -and their detectors are discovered and learned automatically without -keypoint location supervision. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import matplotlib.pyplot as plt -import numpy as np -import os -from scipy import misc -import sys -import tensorflow as tf -import tensorflow.contrib.slim as slim -import utils - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_boolean("predict", False, "Running inference if true") -tf.app.flags.DEFINE_string( - "input", - "", - "Input folder containing images") -tf.app.flags.DEFINE_string("model_dir", None, "Estimator model_dir") -tf.app.flags.DEFINE_string( - "dset", - "", - "Path to the directory containing the dataset.") -tf.app.flags.DEFINE_integer("steps", 200000, "Training steps") -tf.app.flags.DEFINE_integer("batch_size", 8, "Size of mini-batch.") -tf.app.flags.DEFINE_string( - "hparams", "", - "A comma-separated list of `name=value` hyperparameter values. This flag " - "is used to override hyperparameter settings either when manually " - "selecting hyperparameters or when using Vizier.") -tf.app.flags.DEFINE_integer( - "sync_replicas", -1, - "If > 0, use SyncReplicasOptimizer and use this many replicas per sync.") - -# Fixed input size 128 x 128. -vw = vh = 128 - - -def create_input_fn(split, batch_size): - """Returns input_fn for tf.estimator.Estimator. - - Reads tfrecords and construts input_fn for either training or eval. All - tfrecords not in test.txt or dev.txt will be assigned to training set. - - Args: - split: A string indicating the split. Can be either 'train' or 'validation'. - batch_size: The batch size! - - Returns: - input_fn for tf.estimator.Estimator. - - Raises: - IOError: If test.txt or dev.txt are not found. - """ - - if (not os.path.exists(os.path.join(FLAGS.dset, "test.txt")) or - not os.path.exists(os.path.join(FLAGS.dset, "dev.txt"))): - raise IOError("test.txt or dev.txt not found") - - with open(os.path.join(FLAGS.dset, "test.txt"), "r") as f: - testset = [x.strip() for x in f.readlines()] - - with open(os.path.join(FLAGS.dset, "dev.txt"), "r") as f: - validset = [x.strip() for x in f.readlines()] - - files = os.listdir(FLAGS.dset) - filenames = [] - for f in files: - sp = os.path.splitext(f) - if sp[1] != ".tfrecord" or sp[0] in testset: - continue - - if ((split == "validation" and sp[0] in validset) or - (split == "train" and sp[0] not in validset)): - filenames.append(os.path.join(FLAGS.dset, f)) - - def input_fn(): - """input_fn for tf.estimator.Estimator.""" - - def parser(serialized_example): - """Parses a single tf.Example into image and label tensors.""" - fs = tf.parse_single_example( - serialized_example, - features={ - "img0": tf.FixedLenFeature([], tf.string), - "img1": tf.FixedLenFeature([], tf.string), - "mv0": tf.FixedLenFeature([16], tf.float32), - "mvi0": tf.FixedLenFeature([16], tf.float32), - "mv1": tf.FixedLenFeature([16], tf.float32), - "mvi1": tf.FixedLenFeature([16], tf.float32), - }) - - fs["img0"] = tf.div(tf.to_float(tf.image.decode_png(fs["img0"], 4)), 255) - fs["img1"] = tf.div(tf.to_float(tf.image.decode_png(fs["img1"], 4)), 255) - - fs["img0"].set_shape([vh, vw, 4]) - fs["img1"].set_shape([vh, vw, 4]) - - # fs["lr0"] = [fs["mv0"][0]] - # fs["lr1"] = [fs["mv1"][0]] - - fs["lr0"] = tf.convert_to_tensor([fs["mv0"][0]]) - fs["lr1"] = tf.convert_to_tensor([fs["mv1"][0]]) - - return fs - - np.random.shuffle(filenames) - dataset = tf.data.TFRecordDataset(filenames) - dataset = dataset.map(parser, num_parallel_calls=4) - dataset = dataset.shuffle(400).repeat().batch(batch_size) - dataset = dataset.prefetch(buffer_size=256) - - return dataset.make_one_shot_iterator().get_next(), None - - return input_fn - - -class Transformer(object): - """A utility for projecting 3D points to 2D coordinates and vice versa. - - 3D points are represented in 4D-homogeneous world coordinates. The pixel - coordinates are represented in normalized device coordinates [-1, 1]. - See https://learnopengl.com/Getting-started/Coordinate-Systems. - """ - - def __get_matrix(self, lines): - return np.array([[float(y) for y in x.strip().split(" ")] for x in lines]) - - def __read_projection_matrix(self, filename): - if not os.path.exists(filename): - filename = "/cns/vz-d/home/supasorn/datasets/cars/projection.txt" - with open(filename, "r") as f: - lines = f.readlines() - return self.__get_matrix(lines) - - def __init__(self, w, h, dataset_dir): - self.w = w - self.h = h - p = self.__read_projection_matrix(dataset_dir + "projection.txt") - - # transposed of inversed projection matrix. - self.pinv_t = tf.constant([[1.0 / p[0, 0], 0, 0, - 0], [0, 1.0 / p[1, 1], 0, 0], [0, 0, 1, 0], - [0, 0, 0, 1]]) - self.f = p[0, 0] - - def project(self, xyzw): - """Projects homogeneous 3D coordinates to normalized device coordinates.""" - - z = xyzw[:, :, 2:3] + 1e-8 - return tf.concat([-self.f * xyzw[:, :, :2] / z, z], axis=2) - - def unproject(self, xyz): - """Unprojects normalized device coordinates with depth to 3D coordinates.""" - - z = xyz[:, :, 2:] - xy = -xyz * z - - def batch_matmul(a, b): - return tf.reshape( - tf.matmul(tf.reshape(a, [-1, a.shape[2].value]), b), - [-1, a.shape[1].value, a.shape[2].value]) - - return batch_matmul( - tf.concat([xy[:, :, :2], z, tf.ones_like(z)], axis=2), self.pinv_t) - - -def meshgrid(h): - """Returns a meshgrid ranging from [-1, 1] in x, y axes.""" - - r = np.arange(0.5, h, 1) / (h / 2) - 1 - ranx, rany = tf.meshgrid(r, -r) - return tf.to_float(ranx), tf.to_float(rany) - - -def estimate_rotation(xyz0, xyz1, pconf, noise): - """Estimates the rotation between two sets of keypoints. - - The rotation is estimated by first subtracting mean from each set of keypoints - and computing SVD of the covariance matrix. - - Args: - xyz0: [batch, num_kp, 3] The first set of keypoints. - xyz1: [batch, num_kp, 3] The second set of keypoints. - pconf: [batch, num_kp] The weights used to compute the rotation estimate. - noise: A number indicating the noise added to the keypoints. - - Returns: - [batch, 3, 3] A batch of transposed 3 x 3 rotation matrices. - """ - - xyz0 += tf.random_normal(tf.shape(xyz0), mean=0, stddev=noise) - xyz1 += tf.random_normal(tf.shape(xyz1), mean=0, stddev=noise) - - pconf2 = tf.expand_dims(pconf, 2) - cen0 = tf.reduce_sum(xyz0 * pconf2, 1, keepdims=True) - cen1 = tf.reduce_sum(xyz1 * pconf2, 1, keepdims=True) - - x = xyz0 - cen0 - y = xyz1 - cen1 - - cov = tf.matmul(tf.matmul(x, tf.matrix_diag(pconf), transpose_a=True), y) - _, u, v = tf.svd(cov, full_matrices=True) - - d = tf.matrix_determinant(tf.matmul(v, u, transpose_b=True)) - ud = tf.concat( - [u[:, :, :-1], u[:, :, -1:] * tf.expand_dims(tf.expand_dims(d, 1), 1)], - axis=2) - return tf.matmul(ud, v, transpose_b=True) - - -def relative_pose_loss(xyz0, xyz1, rot, pconf, noise): - """Computes the relative pose loss (chordal, angular). - - Args: - xyz0: [batch, num_kp, 3] The first set of keypoints. - xyz1: [batch, num_kp, 3] The second set of keypoints. - rot: [batch, 4, 4] The ground-truth rotation matrices. - pconf: [batch, num_kp] The weights used to compute the rotation estimate. - noise: A number indicating the noise added to the keypoints. - - Returns: - A tuple (chordal loss, angular loss). - """ - - r_transposed = estimate_rotation(xyz0, xyz1, pconf, noise) - rotation = rot[:, :3, :3] - frob_sqr = tf.reduce_sum(tf.square(r_transposed - rotation), axis=[1, 2]) - frob = tf.sqrt(frob_sqr) - - return tf.reduce_mean(frob_sqr), \ - 2.0 * tf.reduce_mean(tf.asin(tf.minimum(1.0, frob / (2 * math.sqrt(2))))) - - -def separation_loss(xyz, delta): - """Computes the separation loss. - - Args: - xyz: [batch, num_kp, 3] Input keypoints. - delta: A separation threshold. Incur 0 cost if the distance >= delta. - - Returns: - The seperation loss. - """ - - num_kp = tf.shape(xyz)[1] - t1 = tf.tile(xyz, [1, num_kp, 1]) - - t2 = tf.reshape(tf.tile(xyz, [1, 1, num_kp]), tf.shape(t1)) - diffsq = tf.square(t1 - t2) - - # -> [batch, num_kp ^ 2] - lensqr = tf.reduce_sum(diffsq, axis=2) - - return (tf.reduce_sum(tf.maximum(-lensqr + delta, 0.0)) / tf.to_float( - num_kp * FLAGS.batch_size * 2)) - - -def consistency_loss(uv0, uv1, pconf): - """Computes multi-view consistency loss between two sets of keypoints. - - Args: - uv0: [batch, num_kp, 2] The first set of keypoint 2D coordinates. - uv1: [batch, num_kp, 2] The second set of keypoint 2D coordinates. - pconf: [batch, num_kp] The weights used to compute the rotation estimate. - - Returns: - The consistency loss. - """ - - # [batch, num_kp, 2] - wd = tf.square(uv0 - uv1) * tf.expand_dims(pconf, 2) - wd = tf.reduce_sum(wd, axis=[1, 2]) - return tf.reduce_mean(wd) - - -def variance_loss(probmap, ranx, rany, uv): - """Computes the variance loss as part of Sillhouette consistency. - - Args: - probmap: [batch, num_kp, h, w] The distribution map of keypoint locations. - ranx: X-axis meshgrid. - rany: Y-axis meshgrid. - uv: [batch, num_kp, 2] Keypoint locations (in NDC). - - Returns: - The variance loss. - """ - - ran = tf.stack([ranx, rany], axis=2) - - sh = tf.shape(ran) - # [batch, num_kp, vh, vw, 2] - ran = tf.reshape(ran, [1, 1, sh[0], sh[1], 2]) - - sh = tf.shape(uv) - uv = tf.reshape(uv, [sh[0], sh[1], 1, 1, 2]) - - diff = tf.reduce_sum(tf.square(uv - ran), axis=4) - diff *= probmap - - return tf.reduce_mean(tf.reduce_sum(diff, axis=[2, 3])) - - -def dilated_cnn(images, num_filters, is_training): - """Constructs a base dilated convolutional network. - - Args: - images: [batch, h, w, 3] Input RGB images. - num_filters: The number of filters for all layers. - is_training: True if this function is called during training. - - Returns: - Output of this dilated CNN. - """ - - net = images - - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - normalizer_fn=slim.batch_norm, - activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1), - normalizer_params={"is_training": is_training}): - for i, r in enumerate([1, 1, 2, 4, 8, 16, 1, 2, 4, 8, 16, 1]): - net = slim.conv2d(net, num_filters, [3, 3], rate=r, scope="dconv%d" % i) - - return net - - -def orientation_network(images, num_filters, is_training): - """Constructs a network that infers the orientation of an object. - - Args: - images: [batch, h, w, 3] Input RGB images. - num_filters: The number of filters for all layers. - is_training: True if this function is called during training. - - Returns: - Output of the orientation network. - """ - - with tf.variable_scope("OrientationNetwork"): - net = dilated_cnn(images, num_filters, is_training) - - modules = 2 - prob = slim.conv2d(net, 2, [3, 3], rate=1, activation_fn=None) - prob = tf.transpose(prob, [0, 3, 1, 2]) - - prob = tf.reshape(prob, [-1, modules, vh * vw]) - prob = tf.nn.softmax(prob) - ranx, rany = meshgrid(vh) - - prob = tf.reshape(prob, [-1, 2, vh, vw]) - - sx = tf.reduce_sum(prob * ranx, axis=[2, 3]) - sy = tf.reduce_sum(prob * rany, axis=[2, 3]) # -> batch x modules - - out_xy = tf.reshape(tf.stack([sx, sy], -1), [-1, modules, 2]) - - return out_xy - - -def keypoint_network(rgba, - num_filters, - num_kp, - is_training, - lr_gt=None, - anneal=1): - """Constructs our main keypoint network that predicts 3D keypoints. - - Args: - rgba: [batch, h, w, 4] Input RGB images with alpha channel. - num_filters: The number of filters for all layers. - num_kp: The number of keypoints. - is_training: True if this function is called during training. - lr_gt: The groundtruth orientation flag used at the beginning of training. - Then we linearly anneal in the prediction. - anneal: A number between [0, 1] where 1 means using the ground-truth - orientation and 0 means using our estimate. - - Returns: - uv: [batch, num_kp, 2] 2D locations of keypoints. - z: [batch, num_kp] The depth of keypoints. - orient: [batch, 2, 2] Two 2D coordinates that correspond to [1, 0, 0] and - [-1, 0, 0] in object space. - sill: The Sillhouette loss. - variance: The variance loss. - prob_viz: A visualization of all predicted keypoints. - prob_vizs: A list of visualizations of each keypoint. - - """ - - images = rgba[:, :, :, :3] - - # [batch, 1] - orient = orientation_network(images, num_filters * 0.5, is_training) - - # [batch, 1] - lr_estimated = tf.maximum(0.0, tf.sign(orient[:, 0, :1] - orient[:, 1, :1])) - - if lr_gt is None: - lr = lr_estimated - else: - lr_gt = tf.maximum(0.0, tf.sign(lr_gt[:, :1])) - lr = tf.round(lr_gt * anneal + lr_estimated * (1 - anneal)) - - lrtiled = tf.tile( - tf.expand_dims(tf.expand_dims(lr, 1), 1), - [1, images.shape[1], images.shape[2], 1]) - - images = tf.concat([images, lrtiled], axis=3) - - mask = rgba[:, :, :, 3] - mask = tf.cast(tf.greater(mask, tf.zeros_like(mask)), dtype=tf.float32) - - net = dilated_cnn(images, num_filters, is_training) - - # The probability distribution map. - prob = slim.conv2d( - net, num_kp, [3, 3], rate=1, scope="conv_xy", activation_fn=None) - - # We added the fixed camera distance as a bias. - z = -30 + slim.conv2d( - net, num_kp, [3, 3], rate=1, scope="conv_z", activation_fn=None) - - prob = tf.transpose(prob, [0, 3, 1, 2]) - z = tf.transpose(z, [0, 3, 1, 2]) - - prob = tf.reshape(prob, [-1, num_kp, vh * vw]) - prob = tf.nn.softmax(prob, name="softmax") - - ranx, rany = meshgrid(vh) - prob = tf.reshape(prob, [-1, num_kp, vh, vw]) - - # These are for visualizing the distribution maps. - prob_viz = tf.expand_dims(tf.reduce_sum(prob, 1), 3) - prob_vizs = [tf.expand_dims(prob[:, i, :, :], 3) for i in range(num_kp)] - - sx = tf.reduce_sum(prob * ranx, axis=[2, 3]) - sy = tf.reduce_sum(prob * rany, axis=[2, 3]) # -> batch x num_kp - - # [batch, num_kp] - sill = tf.reduce_sum(prob * tf.expand_dims(mask, 1), axis=[2, 3]) - sill = tf.reduce_mean(-tf.log(sill + 1e-12)) - - z = tf.reduce_sum(prob * z, axis=[2, 3]) - uv = tf.reshape(tf.stack([sx, sy], -1), [-1, num_kp, 2]) - - variance = variance_loss(prob, ranx, rany, uv) - - return uv, z, orient, sill, variance, prob_viz, prob_vizs - - -def model_fn(features, labels, mode, hparams): - """Returns model_fn for tf.estimator.Estimator.""" - - del labels - - is_training = (mode == tf.estimator.ModeKeys.TRAIN) - t = Transformer(vw, vh, FLAGS.dset) - - def func1(x): - return tf.transpose(tf.reshape(features[x], [-1, 4, 4]), [0, 2, 1]) - - mv = [func1("mv%d" % i) for i in range(2)] - mvi = [func1("mvi%d" % i) for i in range(2)] - - uvz = [None] * 2 - uvz_proj = [None] * 2 # uvz coordinates projected on to the other view. - viz = [None] * 2 - vizs = [None] * 2 - - loss_sill = 0 - loss_variance = 0 - loss_con = 0 - loss_sep = 0 - loss_lr = 0 - - for i in range(2): - with tf.variable_scope("KeypointNetwork", reuse=i > 0): - # anneal: 1 = using ground-truth, 0 = using our estimate orientation. - anneal = tf.to_float(hparams.lr_anneal_end - tf.train.get_global_step()) - anneal = tf.clip_by_value( - anneal / (hparams.lr_anneal_end - hparams.lr_anneal_start), 0.0, 1.0) - - uv, z, orient, sill, variance, viz[i], vizs[i] = keypoint_network( - features["img%d" % i], - hparams.num_filters, - hparams.num_kp, - is_training, - lr_gt=features["lr%d" % i], - anneal=anneal) - - # x-positive/negative axes (dominant direction). - xp_axis = tf.tile( - tf.constant([[[1.0, 0, 0, 1], [-1.0, 0, 0, 1]]]), - [tf.shape(orient)[0], 1, 1]) - - # [batch, 2, 4] = [batch, 2, 4] x [batch, 4, 4] - xp = tf.matmul(xp_axis, mv[i]) - - # [batch, 2, 3] - xp = t.project(xp) - - loss_lr += tf.losses.mean_squared_error(orient[:, :, :2], xp[:, :, :2]) - loss_variance += variance - loss_sill += sill - - uv = tf.reshape(uv, [-1, hparams.num_kp, 2]) - z = tf.reshape(z, [-1, hparams.num_kp, 1]) - - # [batch, num_kp, 3] - uvz[i] = tf.concat([uv, z], axis=2) - - world_coords = tf.matmul(t.unproject(uvz[i]), mvi[i]) - - # [batch, num_kp, 3] - uvz_proj[i] = t.project(tf.matmul(world_coords, mv[1 - i])) - - pconf = tf.ones( - [tf.shape(uv)[0], tf.shape(uv)[1]], dtype=tf.float32) / hparams.num_kp - - for i in range(2): - loss_con += consistency_loss(uvz_proj[i][:, :, :2], uvz[1 - i][:, :, :2], - pconf) - loss_sep += separation_loss( - t.unproject(uvz[i])[:, :, :3], hparams.sep_delta) - - chordal, angular = relative_pose_loss( - t.unproject(uvz[0])[:, :, :3], - t.unproject(uvz[1])[:, :, :3], tf.matmul(mvi[0], mv[1]), pconf, - hparams.noise) - - loss = ( - hparams.loss_pose * angular + - hparams.loss_con * loss_con + - hparams.loss_sep * loss_sep + - hparams.loss_sill * loss_sill + - hparams.loss_lr * loss_lr + - hparams.loss_variance * loss_variance - ) - - def touint8(img): - return tf.cast(img * 255.0, tf.uint8) - - with tf.variable_scope("output"): - tf.summary.image("0_img0", touint8(features["img0"][:, :, :, :3])) - tf.summary.image("1_combined", viz[0]) - for i in range(hparams.num_kp): - tf.summary.image("2_f%02d" % i, vizs[0][i]) - - with tf.variable_scope("stats"): - tf.summary.scalar("anneal", anneal) - tf.summary.scalar("closs", loss_con) - tf.summary.scalar("seploss", loss_sep) - tf.summary.scalar("angular", angular) - tf.summary.scalar("chordal", chordal) - tf.summary.scalar("lrloss", loss_lr) - tf.summary.scalar("sill", loss_sill) - tf.summary.scalar("vloss", loss_variance) - - return { - "loss": loss, - "predictions": { - "img0": features["img0"], - "img1": features["img1"], - "uvz0": uvz[0], - "uvz1": uvz[1] - }, - "eval_metric_ops": { - "closs": tf.metrics.mean(loss_con), - "angular_loss": tf.metrics.mean(angular), - "chordal_loss": tf.metrics.mean(chordal), - } - } - - -def predict(input_folder, hparams): - """Predicts keypoints on all images in input_folder.""" - - cols = plt.cm.get_cmap("rainbow")( - np.linspace(0, 1.0, hparams.num_kp))[:, :4] - - img = tf.placeholder(tf.float32, shape=(1, 128, 128, 4)) - - with tf.variable_scope("KeypointNetwork"): - ret = keypoint_network( - img, hparams.num_filters, hparams.num_kp, False) - - uv = tf.reshape(ret[0], [-1, hparams.num_kp, 2]) - z = tf.reshape(ret[1], [-1, hparams.num_kp, 1]) - uvz = tf.concat([uv, z], axis=2) - - sess = tf.Session() - saver = tf.train.Saver() - ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) - - print("loading model: ", ckpt.model_checkpoint_path) - saver.restore(sess, ckpt.model_checkpoint_path) - - files = [x for x in os.listdir(input_folder) - if x[-3:] in ["jpg", "png"]] - - output_folder = os.path.join(input_folder, "output") - if not os.path.exists(output_folder): - os.mkdir(output_folder) - - for f in files: - orig = misc.imread(os.path.join(input_folder, f)).astype(float) / 255 - if orig.shape[2] == 3: - orig = np.concatenate((orig, np.ones_like(orig[:, :, :1])), axis=2) - - uv_ret = sess.run(uvz, feed_dict={img: np.expand_dims(orig, 0)}) - - utils.draw_ndc_points(orig, uv_ret.reshape(hparams.num_kp, 3), cols) - misc.imsave(os.path.join(output_folder, f), orig) - - -def _default_hparams(): - """Returns default or overridden user-specified hyperparameters.""" - - hparams = tf.contrib.training.HParams( - num_filters=64, # Number of filters. - num_kp=10, # Numer of keypoints. - - loss_pose=0.2, # Pose Loss. - loss_con=1.0, # Multiview consistency Loss. - loss_sep=1.0, # Seperation Loss. - loss_sill=1.0, # Sillhouette Loss. - loss_lr=1.0, # Orientation Loss. - loss_variance=0.5, # Variance Loss (part of Sillhouette loss). - - sep_delta=0.05, # Seperation threshold. - noise=0.1, # Noise added during estimating rotation. - - learning_rate=1.0e-3, - lr_anneal_start=30000, # When to anneal in the orientation prediction. - lr_anneal_end=60000, # When to use the prediction completely. - ) - if FLAGS.hparams: - hparams = hparams.parse(FLAGS.hparams) - return hparams - - -def main(argv): - del argv - - hparams = _default_hparams() - - if FLAGS.predict: - predict(FLAGS.input, hparams) - else: - utils.train_and_eval( - model_dir=FLAGS.model_dir, - model_fn=model_fn, - input_fn=create_input_fn, - hparams=hparams, - steps=FLAGS.steps, - batch_size=FLAGS.batch_size, - save_checkpoints_secs=600, - eval_throttle_secs=1800, - eval_steps=5, - sync_replicas=FLAGS.sync_replicas, - ) - - -if __name__ == "__main__": - sys.excepthook = utils.colored_hook( - os.path.dirname(os.path.realpath(__file__))) - tf.app.run() diff --git a/research/keypointnet/tools/gen_tfrecords.py b/research/keypointnet/tools/gen_tfrecords.py deleted file mode 100644 index 2f973b7fe5f16951dbfa01edd2a759b96b4f79db..0000000000000000000000000000000000000000 --- a/research/keypointnet/tools/gen_tfrecords.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -"""An example script to generate a tfrecord file from a folder containing the -renderings. - -Example usage: - python gen_tfrecords.py --input=FOLDER --output=output.tfrecord - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import os -from scipy import misc -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS -tf.app.flags.DEFINE_string("input", "", "Input folder containing images") -tf.app.flags.DEFINE_string("output", "", "Output tfrecord.") - - -def get_matrix(lines): - return np.array([[float(y) for y in x.strip().split(" ")] for x in lines]) - - -def read_model_view_matrices(filename): - with open(filename, "r") as f: - lines = f.readlines() - return get_matrix(lines[:4]), get_matrix(lines[4:]) - - -def bytes_feature(values): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) - - -def generate(): - with tf.python_io.TFRecordWriter(FLAGS.output) as tfrecord_writer: - with tf.Graph().as_default(): - im0 = tf.placeholder(dtype=tf.uint8) - im1 = tf.placeholder(dtype=tf.uint8) - encoded0 = tf.image.encode_png(im0) - encoded1 = tf.image.encode_png(im1) - - with tf.Session() as sess: - count = 0 - indir = FLAGS.input + "/" - while tf.gfile.Exists(indir + "%06d.txt" % count): - print("saving %06d" % count) - image0 = misc.imread(indir + "%06d.png" % (count * 2)) - image1 = misc.imread(indir + "%06d.png" % (count * 2 + 1)) - - mat0, mat1 = read_model_view_matrices(indir + "%06d.txt" % count) - - mati0 = np.linalg.inv(mat0).flatten() - mati1 = np.linalg.inv(mat1).flatten() - mat0 = mat0.flatten() - mat1 = mat1.flatten() - - st0, st1 = sess.run([encoded0, encoded1], - feed_dict={im0: image0, im1: image1}) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'img0': bytes_feature(st0), - 'img1': bytes_feature(st1), - 'mv0': tf.train.Feature( - float_list=tf.train.FloatList(value=mat0)), - 'mvi0': tf.train.Feature( - float_list=tf.train.FloatList(value=mati0)), - 'mv1': tf.train.Feature( - float_list=tf.train.FloatList(value=mat1)), - 'mvi1': tf.train.Feature( - float_list=tf.train.FloatList(value=mati1)), - })) - - tfrecord_writer.write(example.SerializeToString()) - count += 1 - - -def main(argv): - del argv - generate() - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/keypointnet/tools/render.py b/research/keypointnet/tools/render.py deleted file mode 100644 index 3a8872675d83cc414d6348dbc7a56e924541b8d7..0000000000000000000000000000000000000000 --- a/research/keypointnet/tools/render.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -"""Script to render object views from ShapeNet obj models. - -Example usage: - blender -b --python render.py -- -m model.obj -o output/ -s 128 -n 120 -fov 5 - -""" -from __future__ import print_function - -import argparse -import itertools -import json -from math import pi -import os -import random -import sys -from mathutils import Vector -import math -import mathutils -import time -import copy - -import bpy - -sys.path.append(os.path.dirname(__file__)) - -BG_LUMINANCE = 0 - - -def look_at(obj_camera, point): - loc_camera = obj_camera.location - direction = point - loc_camera - # point the cameras '-Z' and use its 'Y' as up - rot_quat = direction.to_track_quat('-Z', 'Y') - - obj_camera.rotation_euler = rot_quat.to_euler() - - -def roll_camera(obj_camera): - roll_rotate = mathutils.Euler( - (0, 0, random.random() * math.pi - math.pi * 0.5), 'XYZ') - obj_camera.rotation_euler = (obj_camera.rotation_euler.to_matrix() * - roll_rotate.to_matrix()).to_euler() - - -def norm(x): - return math.sqrt(x[0] * x[0] + x[1] * x[1] + x[2] * x[2]) - - -def normalize(x): - n = norm(x) - x[0] /= n - x[1] /= n - x[2] /= n - - -def random_top_sphere(): - xyz = [random.normalvariate(0, 1) for x in range(3)] - normalize(xyz) - - if xyz[2] < 0: - xyz[2] *= -1 - return xyz - - -def perturb_sphere(loc, size): - while True: - xyz = [random.normalvariate(0, 1) for x in range(3)] - normalize(xyz) - - nloc = [loc[i] + xyz[i] * random.random() * size for i in range(3)] - normalize(nloc) - - if nloc[2] >= 0: - return nloc - - -def perturb(loc, size): - while True: - nloc = [loc[i] + random.random() * size * 2 - size for i in range(3)] - if nloc[2] >= 0: - return nloc - - bpy.ops.object.mode_set() - - -def delete_all_objects(): - bpy.ops.object.select_by_type(type="MESH") - bpy.ops.object.delete(use_global=False) - - -def set_scene(render_size, fov, alpha=False): - """Set up default scene properties.""" - delete_all_objects() - - cam = bpy.data.cameras["Camera"] - cam.angle = fov * pi / 180 - - light = bpy.data.objects["Lamp"] - light.location = (0, 0, 1) - look_at(light, Vector((0.0, 0, 0))) - bpy.data.lamps['Lamp'].type = "HEMI" - bpy.data.lamps['Lamp'].energy = 1 - bpy.data.lamps['Lamp'].use_specular = False - bpy.data.lamps['Lamp'].use_diffuse = True - - bpy.context.scene.world.horizon_color = ( - BG_LUMINANCE, BG_LUMINANCE, BG_LUMINANCE) - - bpy.context.scene.render.resolution_x = render_size - bpy.context.scene.render.resolution_y = render_size - bpy.context.scene.render.resolution_percentage = 100 - - bpy.context.scene.render.use_antialiasing = True - bpy.context.scene.render.antialiasing_samples = '5' - - -def get_modelview_matrix(): - cam = bpy.data.objects["Camera"] - bpy.context.scene.update() - - # when apply to object with CV coordinate i.e. to_blender * obj - # this gives object in blender coordinate - to_blender = mathutils.Matrix( - ((1., 0., 0., 0.), - (0., 0., -1., 0.), - (0., 1., 0., 0.), - (0., 0., 0., 1.))) - return cam.matrix_world.inverted() * to_blender - - -def print_matrix(f, mat): - for i in range(4): - for j in range(4): - f.write("%lf " % mat[i][j]) - f.write("\n") - - -def mul(loc, v): - return [loc[i] * v for i in range(3)] - - -def merge_all(): - bpy.ops.object.select_by_type(type="MESH") - bpy.context.scene.objects.active = bpy.context.selected_objects[0] - bpy.ops.object.join() - obj = bpy.context.scene.objects.active - bpy.ops.object.origin_set(type="ORIGIN_CENTER_OF_MASS") - return obj - - -def insert_frame(obj, frame_number): - obj.keyframe_insert(data_path="location", frame=frame_number) - obj.keyframe_insert(data_path="rotation_euler", frame=frame_number) - obj.keyframe_insert(data_path="scale", frame=frame_number) - - -def render(output_prefix): - bpy.context.scene.render.filepath = output_prefix - bpy.context.scene.render.image_settings.file_format = "PNG" - bpy.context.scene.render.alpha_mode = "TRANSPARENT" - bpy.context.scene.render.image_settings.color_mode = "RGBA" - bpy.ops.render.render(write_still=True, animation=True) - - -def render_obj( - obj_fn, save_dir, n, perturb_size, rotate=False, roll=False, scale=1.0): - - # Load object. - bpy.ops.import_scene.obj(filepath=obj_fn) - cur_obj = merge_all() - - scale = 2.0 / max(cur_obj.dimensions) * scale - cur_obj.scale = (scale, scale, scale) - # Using the center of mass as the origin doesn't really work, because Blender - # assumes the object is a solid shell. This seems to generate better-looking - # rotations. - - bpy.ops.object.origin_set(type='ORIGIN_GEOMETRY', center='BOUNDS') - - # bpy.ops.mesh.primitive_cube_add(location=(0, 0, 1)) - # cube = bpy.data.objects["Cube"] - # cube.scale = (0.2, 0.2, 0.2) - - for polygon in cur_obj.data.polygons: - polygon.use_smooth = True - - bpy.ops.object.select_all(action="DESELECT") - - camera = bpy.data.objects["Camera"] - - # os.system("mkdir " + save_dir) - for i in range(n): - fo = open(save_dir + "/%06d.txt" % i, "w") - d = 30 - shift = 0.2 - if rotate: - t = 1.0 * i / (n-1) * 2 * math.pi - loc = [math.sin(t), math.cos(t), 1] - - normalize(loc) - camera.location = mul(loc, d) - look_at(camera, Vector((0.0, 0, 0))) - - print_matrix(fo, get_modelview_matrix()) - print_matrix(fo, get_modelview_matrix()) - - insert_frame(camera, 2 * i) - insert_frame(camera, 2 * i + 1) - - else: - loc = random_top_sphere() - - camera.location = mul(loc, d) - look_at(camera, Vector((0.0, 0, 0))) - - if roll: - roll_camera(camera) - camera.location = perturb(mul(loc, d), shift) - - print_matrix(fo, get_modelview_matrix()) - insert_frame(camera, 2 * i) - - if perturb_size > 0: - loc = perturb_sphere(loc, perturb_size) - else: - loc = random_top_sphere() - - camera.location = mul(loc, d) - look_at(camera, Vector((0.0, 0, 0))) - if roll: - roll_camera(camera) - camera.location = perturb(mul(loc, d), shift) - - print_matrix(fo, get_modelview_matrix()) - insert_frame(camera, 2 * i + 1) - - fo.close() - - # Create a bunch of views of the object - bpy.context.scene.frame_start = 0 - bpy.context.scene.frame_end = 2 * n - 1 - - stem = os.path.join(save_dir, '######') - render(stem) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('-m', '--model', dest='model', - required=True, - help='Path to model obj file.') - parser.add_argument('-o', '--output_dir', dest='output_dir', - required=True, - help='Where to output files.') - parser.add_argument('-s', '--output_size', dest='output_size', - required=True, - help='Width and height of output in pixels, e.g. 32x32.') - parser.add_argument('-n', '--num_frames', dest='n', type=int, - required=True, - help='Number of frames to generate per clip.') - - parser.add_argument('-scale', '--scale', dest='scale', type=float, - help='object scaling', default=1) - - parser.add_argument('-perturb', '--perturb', dest='perturb', type=float, - help='sphere perturbation', default=0) - - parser.add_argument('-rotate', '--rotate', dest='rotate', action='store_true', - help='render rotating test set') - - parser.add_argument('-roll', '--roll', dest='roll', action='store_true', - help='add roll') - - parser.add_argument( - '-fov', '--fov', dest='fov', type=float, required=True, - help='field of view') - - if '--' not in sys.argv: - parser.print_help() - exit(1) - - argv = sys.argv[sys.argv.index('--') + 1:] - args, _ = parser.parse_known_args(argv) - - random.seed(args.model + str(time.time()) + str(os.getpid())) - # random.seed(0) - - set_scene(int(args.output_size), args.fov) - render_obj( - args.model, args.output_dir, args.n, args.perturb, args.rotate, - args.roll, args.scale) - exit() - - -if __name__ == '__main__': - main() diff --git a/research/keypointnet/utils.py b/research/keypointnet/utils.py deleted file mode 100644 index 148b7a3ed843638cff597be0c462b7e335df9857..0000000000000000000000000000000000000000 --- a/research/keypointnet/utils.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -"""Utility functions for KeypointNet. - -These are helper / tensorflow related functions. The actual implementation and -algorithm is in main.py. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import numpy as np -import os -import re -import tensorflow as tf -import tensorflow.contrib.slim as slim -import time -import traceback - - -class TrainingHook(tf.train.SessionRunHook): - """A utility for displaying training information such as the loss, percent - completed, estimated finish date and time.""" - - def __init__(self, steps): - self.steps = steps - - self.last_time = time.time() - self.last_est = self.last_time - - self.eta_interval = int(math.ceil(0.1 * self.steps)) - self.current_interval = 0 - - def before_run(self, run_context): - graph = tf.get_default_graph() - return tf.train.SessionRunArgs( - {"loss": graph.get_collection("total_loss")[0]}) - - def after_run(self, run_context, run_values): - step = run_context.session.run(tf.train.get_global_step()) - now = time.time() - - if self.current_interval < self.eta_interval: - self.duration = now - self.last_est - self.current_interval += 1 - if step % self.eta_interval == 0: - self.duration = now - self.last_est - self.last_est = now - - eta_time = float(self.steps - step) / self.current_interval * \ - self.duration - m, s = divmod(eta_time, 60) - h, m = divmod(m, 60) - eta = "%d:%02d:%02d" % (h, m, s) - - print("%.2f%% (%d/%d): %.3e t %.3f @ %s (%s)" % ( - step * 100.0 / self.steps, - step, - self.steps, - run_values.results["loss"], - now - self.last_time, - time.strftime("%a %d %H:%M:%S", time.localtime(time.time() + eta_time)), - eta)) - - self.last_time = now - - -def standard_model_fn( - func, steps, run_config=None, sync_replicas=0, optimizer_fn=None): - """Creates model_fn for tf.Estimator. - - Args: - func: A model_fn with prototype model_fn(features, labels, mode, hparams). - steps: Training steps. - run_config: tf.estimatorRunConfig (usually passed in from TF_CONFIG). - sync_replicas: The number of replicas used to compute gradient for - synchronous training. - optimizer_fn: The type of the optimizer. Default to Adam. - - Returns: - model_fn for tf.estimator.Estimator. - """ - - def fn(features, labels, mode, params): - """Returns model_fn for tf.estimator.Estimator.""" - - is_training = (mode == tf.estimator.ModeKeys.TRAIN) - ret = func(features, labels, mode, params) - - tf.add_to_collection("total_loss", ret["loss"]) - train_op = None - - training_hooks = [] - if is_training: - training_hooks.append(TrainingHook(steps)) - - if optimizer_fn is None: - optimizer = tf.train.AdamOptimizer(params.learning_rate) - else: - optimizer = optimizer_fn - - if run_config is not None and run_config.num_worker_replicas > 1: - sr = sync_replicas - if sr <= 0: - sr = run_config.num_worker_replicas - - optimizer = tf.train.SyncReplicasOptimizer( - optimizer, - replicas_to_aggregate=sr, - total_num_replicas=run_config.num_worker_replicas) - - training_hooks.append( - optimizer.make_session_run_hook( - run_config.is_chief, num_tokens=run_config.num_worker_replicas)) - - optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, 5) - train_op = slim.learning.create_train_op(ret["loss"], optimizer) - - if "eval_metric_ops" not in ret: - ret["eval_metric_ops"] = {} - - return tf.estimator.EstimatorSpec( - mode=mode, - predictions=ret["predictions"], - loss=ret["loss"], - train_op=train_op, - eval_metric_ops=ret["eval_metric_ops"], - training_hooks=training_hooks) - return fn - - -def train_and_eval( - model_dir, - steps, - batch_size, - model_fn, - input_fn, - hparams, - keep_checkpoint_every_n_hours=0.5, - save_checkpoints_secs=180, - save_summary_steps=50, - eval_steps=20, - eval_start_delay_secs=10, - eval_throttle_secs=300, - sync_replicas=0): - """Trains and evaluates our model. Supports local and distributed training. - - Args: - model_dir: The output directory for trained parameters, checkpoints, etc. - steps: Training steps. - batch_size: Batch size. - model_fn: A func with prototype model_fn(features, labels, mode, hparams). - input_fn: A input function for the tf.estimator.Estimator. - hparams: tf.HParams containing a set of hyperparameters. - keep_checkpoint_every_n_hours: Number of hours between each checkpoint - to be saved. - save_checkpoints_secs: Save checkpoints every this many seconds. - save_summary_steps: Save summaries every this many steps. - eval_steps: Number of steps to evaluate model. - eval_start_delay_secs: Start evaluating after waiting for this many seconds. - eval_throttle_secs: Do not re-evaluate unless the last evaluation was - started at least this many seconds ago - sync_replicas: Number of synchronous replicas for distributed training. - - Returns: - None - """ - - run_config = tf.estimator.RunConfig( - keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, - save_checkpoints_secs=save_checkpoints_secs, - save_summary_steps=save_summary_steps) - - estimator = tf.estimator.Estimator( - model_dir=model_dir, - model_fn=standard_model_fn( - model_fn, - steps, - run_config, - sync_replicas=sync_replicas), - params=hparams, config=run_config) - - train_spec = tf.estimator.TrainSpec( - input_fn=input_fn(split="train", batch_size=batch_size), - max_steps=steps) - - eval_spec = tf.estimator.EvalSpec( - input_fn=input_fn(split="validation", batch_size=batch_size), - steps=eval_steps, - start_delay_secs=eval_start_delay_secs, - throttle_secs=eval_throttle_secs) - - tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) - - -def draw_circle(rgb, u, v, col, r): - """Draws a simple anti-aliasing circle in-place. - - Args: - rgb: Input image to be modified. - u: Horizontal coordinate. - v: Vertical coordinate. - col: Color. - r: Radius. - """ - - ir = int(math.ceil(r)) - for i in range(-ir-1, ir+2): - for j in range(-ir-1, ir+2): - nu = int(round(u + i)) - nv = int(round(v + j)) - if nu < 0 or nu >= rgb.shape[1] or nv < 0 or nv >= rgb.shape[0]: - continue - - du = abs(nu - u) - dv = abs(nv - v) - - # need sqrt to keep scale - t = math.sqrt(du * du + dv * dv) - math.sqrt(r * r) - if t < 0: - rgb[nv, nu, :] = col - else: - t = 1 - t - if t > 0: - # t = t ** 0.3 - rgb[nv, nu, :] = col * t + rgb[nv, nu, :] * (1-t) - - -def draw_ndc_points(rgb, xy, cols): - """Draws keypoints onto an input image. - - Args: - rgb: Input image to be modified. - xy: [n x 2] matrix of 2D locations. - cols: A list of colors for the keypoints. - """ - - vh, vw = rgb.shape[0], rgb.shape[1] - - for j in range(len(cols)): - x, y = xy[j, :2] - x = (min(max(x, -1), 1) * vw / 2 + vw / 2) - 0.5 - y = vh - 0.5 - (min(max(y, -1), 1) * vh / 2 + vh / 2) - - x = int(round(x)) - y = int(round(y)) - if x < 0 or y < 0 or x >= vw or y >= vh: - continue - - rad = 1.5 - rad *= rgb.shape[0] / 128.0 - draw_circle(rgb, x, y, np.array([0.0, 0.0, 0.0, 1.0]), rad * 1.5) - draw_circle(rgb, x, y, cols[j], rad) - - -def colored_hook(home_dir): - """Colorizes python's error message. - - Args: - home_dir: directory where code resides (to highlight your own files). - Returns: - The traceback hook. - """ - - def hook(type_, value, tb): - def colorize(text, color, own=0): - """Returns colorized text.""" - endcolor = "\x1b[0m" - codes = { - "green": "\x1b[0;32m", - "green_own": "\x1b[1;32;40m", - "red": "\x1b[0;31m", - "red_own": "\x1b[1;31m", - "yellow": "\x1b[0;33m", - "yellow_own": "\x1b[1;33m", - "black": "\x1b[0;90m", - "black_own": "\x1b[1;90m", - "cyan": "\033[1;36m", - } - return codes[color + ("_own" if own else "")] + text + endcolor - - for filename, line_num, func, text in traceback.extract_tb(tb): - basename = os.path.basename(filename) - own = (home_dir in filename) or ("/" not in filename) - - print(colorize("\"" + basename + '"', "green", own) + " in " + func) - print("%s: %s" % ( - colorize("%5d" % line_num, "red", own), - colorize(text, "yellow", own))) - print(" %s" % colorize(filename, "black", own)) - - print(colorize("%s: %s" % (type_.__name__, value), "cyan")) - return hook diff --git a/research/learned_optimizer/.gitignore b/research/learned_optimizer/.gitignore deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/learned_optimizer/BUILD b/research/learned_optimizer/BUILD deleted file mode 100644 index 629c9a06b51d10eb7cab69ed0d9dd0bfa52fd2f0..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/BUILD +++ /dev/null @@ -1,33 +0,0 @@ -# Learning to Optimize Learning (LOL) - -package(default_visibility = ["//visibility:public"]) - -# Libraries -# ========= - -py_library( - name = "metaopt", - srcs = ["metaopt.py"], - deps = [ - "//learned_optimizer/problems:datasets", - "//learned_optimizer/problems:problem_generator", - ], -) - -# Binaries -# ======== -py_binary( - name = "metarun", - srcs = ["metarun.py"], - deps = [ - ":metaopt", - "//learned_optimizer/optimizer:coordinatewise_rnn", - "//learned_optimizer/optimizer:global_learning_rate", - "//learned_optimizer/optimizer:hierarchical_rnn", - "//learned_optimizer/optimizer:learning_rate_schedule", - "//learned_optimizer/optimizer:trainable_adam", - "//learned_optimizer/problems:problem_sets", - "//learned_optimizer/problems:problem_spec", - ], -) - diff --git a/research/learned_optimizer/README.md b/research/learned_optimizer/README.md deleted file mode 100644 index 6a32514f053f97bc64dc87c4ec972c8223a83fe2..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/README.md +++ /dev/null @@ -1,47 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Learned Optimizer - -Code for [Learned Optimizers that Scale and Generalize](https://arxiv.org/abs/1703.04813). - -## Requirements - -* Bazel ([install](https://bazel.build/versions/master/docs/install.html)) -* TensorFlow >= v1.3 -* Python 2.7.x - -## Training a Learned Optimizer - -## Code Overview -In the top-level directory, ```metaopt.py``` contains the code to train and test a learned optimizer. ```metarun.py``` packages the actual training procedure into a -single file, defining and exposing many flags to tune the procedure, from selecting the optimizer type and problem set to more fine-grained hyperparameter settings. -There is no testing binary; testing can be done ad-hoc via ```metaopt.test_optimizer``` by passing an optimizer object and a directory with a checkpoint. - -The ```optimizer``` directory contains a base ```trainable_optimizer.py``` class and a number of extensions, including the ```hierarchical_rnn``` optimizer used in -the paper, a ```coordinatewise_rnn``` optimizer that more closely matches previous work, and a number of simpler optimizers to demonstrate the basic mechanics of -a learnable optimizer. - -The ```problems``` directory contains the code to build the problems that were used in the meta-training set. - -### Binaries -```metarun.py```: meta-training of a learned optimizer - -### Command-Line Flags -The flags most relevant to meta-training are defined in ```metarun.py```. The default values will meta-train a HierarchicalRNN optimizer with the hyperparameter -settings used in the paper. - -### Using a Learned Optimizer as a Black Box -The ```trainable_optimizer``` inherits from ```tf.train.Optimizer```, so a properly instantiated version can be used to train any model in any APIs that accept -this class. There are just 2 caveats: - -1. If using the Hierarchical RNN optimizer, the apply_gradients return type must be changed (see comments inline for what exactly must be removed) - -2. Care must be taken to restore the variables from the optimizer without overriding them. Optimizer variables should be loaded manually using a pretrained checkpoint -and a ```tf.train.Saver``` with only the optimizer variables. Then, when constructing the session, ensure that any automatic variable initialization does not -re-initialize the loaded optimizer variables. - -## Contact for Issues - -* Olga Wichrowska (@olganw), Niru Maheswaranathan (@nirum) diff --git a/research/learned_optimizer/metaopt.py b/research/learned_optimizer/metaopt.py deleted file mode 100644 index 62c06272d3096ed63296744792c8742826380536..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/metaopt.py +++ /dev/null @@ -1,639 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Helper utilities for training and testing optimizers.""" - -from collections import defaultdict -import random -import sys -import time - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from learned_optimizer.optimizer import trainable_optimizer -from learned_optimizer.optimizer import utils -from learned_optimizer.problems import datasets -from learned_optimizer.problems import problem_generator - -tf.app.flags.DEFINE_integer("ps_tasks", 0, - """Number of tasks in the ps job. - If 0 no ps job is used.""") -tf.app.flags.DEFINE_float("nan_l2_reg", 1e-2, - """Strength of l2-reg when NaNs are encountered.""") -tf.app.flags.DEFINE_float("l2_reg", 0., - """Lambda value for parameter regularization.""") -# Default is 0.9 -tf.app.flags.DEFINE_float("rms_decay", 0.9, - """Decay value for the RMSProp metaoptimizer.""") -# Default is 1e-10 -tf.app.flags.DEFINE_float("rms_epsilon", 1e-20, - """Epsilon value for the RMSProp metaoptimizer.""") -tf.app.flags.DEFINE_boolean("set_profiling", False, - """Enable memory usage and computation time """ - """tracing for tensorflow nodes (available in """ - """TensorBoard).""") -tf.app.flags.DEFINE_boolean("reset_rnn_params", True, - """Reset the parameters of the optimizer - from one meta-iteration to the next.""") - -FLAGS = tf.app.flags.FLAGS -OPTIMIZER_SCOPE = "LOL" -OPT_SUM_COLLECTION = "LOL_summaries" - - -def sigmoid_weights(n, slope=0.1, offset=5): - """Generates a sigmoid, scaled to sum to 1. - - This function is used to generate weights that serve to mask out - the early objective values of an optimization problem such that - initial variation in the objective is phased out (hence the sigmoid - starts at zero and ramps up to the maximum value, and the total - weight is normalized to sum to one) - - Args: - n: the number of samples - slope: slope of the sigmoid (Default: 0.1) - offset: threshold of the sigmoid (Default: 5) - - Returns: - No - """ - x = np.arange(n) - y = 1. / (1. + np.exp(-slope * (x-offset))) - y_normalized = y / np.sum(y) - return y_normalized - - -def sample_numiter(scale, min_steps=50): - """Samples a number of iterations from an exponential distribution. - - Args: - scale: parameter for the exponential distribution - min_steps: minimum number of steps to run (additive) - - Returns: - num_steps: An integer equal to a rounded sample from the exponential - distribution + the value of min_steps. - """ - return int(np.round(np.random.exponential(scale=scale)) + min_steps) - - -def train_optimizer(logdir, - optimizer_spec, - problems_and_data, - num_problems, - num_meta_iterations, - num_unroll_func, - num_partial_unroll_itrs_func, - learning_rate=1e-4, - gradient_clip=5., - is_chief=False, - select_random_problems=True, - callbacks=None, - obj_train_max_multiplier=-1, - out=sys.stdout): - """Trains the meta-parameters of this optimizer. - - Args: - logdir: a directory filepath for storing model checkpoints (must exist) - optimizer_spec: specification for an Optimizer (see utils.Spec) - problems_and_data: a list of tuples containing three elements: a problem - specification (see utils.Spec), a dataset (see datasets.Dataset), and - a batch_size (int) for generating a problem and corresponding dataset. If - the problem doesn't have data, set dataset to None. - num_problems: the number of problems to sample during meta-training - num_meta_iterations: the number of iterations (steps) to run the - meta-optimizer for on each subproblem. - num_unroll_func: called once per meta iteration and returns the number of - unrolls to do for that meta iteration. - num_partial_unroll_itrs_func: called once per unroll and returns the number - of iterations to do for that unroll. - learning_rate: learning rate of the RMSProp meta-optimizer (Default: 1e-4) - gradient_clip: value to clip gradients at (Default: 5.0) - is_chief: whether this is the chief task (Default: False) - select_random_problems: whether to select training problems randomly - (Default: True) - callbacks: a list of callback functions that is run after every random - problem draw - obj_train_max_multiplier: the maximum increase in the objective value over - a single training run. Ignored if < 0. - out: where to write output to, e.g. a file handle (Default: sys.stdout) - - Raises: - ValueError: If one of the subproblems has a negative objective value. - """ - - if select_random_problems: - # iterate over random draws of problem / dataset pairs - sampler = (random.choice(problems_and_data) for _ in range(num_problems)) - else: - # iterate over a random shuffle of problems, looping if necessary - num_repeats = (num_problems / len(problems_and_data)) + 1 - random.shuffle(problems_and_data) - sampler = (problems_and_data * num_repeats)[:num_problems] - - for problem_itr, (problem_spec, dataset, batch_size) in enumerate(sampler): - - # timer used to time how long it takes to initialize a problem - problem_start_time = time.time() - - # if dataset is None, use the EMPTY_DATASET - if dataset is None: - dataset = datasets.EMPTY_DATASET - batch_size = dataset.size - - # build a new graph for this problem - graph = tf.Graph() - real_device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks) - - def custom_device_setter(op): - # Places the local variables onto the workers. - if trainable_optimizer.is_local_state_variable(op): - return "/job:worker" - else: - return real_device_setter(op) - - if real_device_setter: - device_setter = custom_device_setter - else: - device_setter = None - - with graph.as_default(), graph.device(device_setter): - - # initialize a problem - problem = problem_spec.build() - - # build the optimizer - opt = optimizer_spec.build() - - # get the meta-objective for training the optimizer - train_output = opt.train(problem, dataset) - - state_keys = opt.state_keys - for key, val in zip(state_keys, train_output.output_state[0]): - finite_val = utils.make_finite(val, replacement=tf.zeros_like(val)) - tf.summary.histogram("State/{}".format(key), finite_val, - collections=[OPT_SUM_COLLECTION]) - - tf.summary.scalar("MetaObjective", train_output.metaobj, - collections=[OPT_SUM_COLLECTION]) - - # Per-problem meta-objective - tf.summary.scalar(problem_spec.callable.__name__ + "_MetaObjective", - train_output.metaobj, - collections=[OPT_SUM_COLLECTION]) - - # create the meta-train_op - global_step = tf.Variable(0, name="global_step", trainable=False) - meta_parameters = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, - scope=OPTIMIZER_SCOPE) - # parameter regularization - reg_l2 = FLAGS.l2_reg * sum([tf.reduce_sum(param ** 2) - for param in meta_parameters]) - - # compute the meta-gradients - meta_opt = tf.train.RMSPropOptimizer(learning_rate, decay=FLAGS.rms_decay, - use_locking=True, - epsilon=FLAGS.rms_epsilon) - grads_and_vars = meta_opt.compute_gradients(train_output.metaobj + reg_l2, - meta_parameters) - - # clip the gradients - clipped_grads_and_vars = [] - for grad, var in grads_and_vars: - clipped_grad = tf.clip_by_value( - utils.make_finite(grad, replacement=tf.zeros_like(var)), - -gradient_clip, gradient_clip) - clipped_grads_and_vars.append((clipped_grad, var)) - - # histogram summary of grads and vars - for grad, var in grads_and_vars: - tf.summary.histogram( - var.name + "_rawgrad", - utils.make_finite( - grad, replacement=tf.zeros_like(grad)), - collections=[OPT_SUM_COLLECTION]) - for grad, var in clipped_grads_and_vars: - tf.summary.histogram(var.name + "_var", var, - collections=[OPT_SUM_COLLECTION]) - tf.summary.histogram(var.name + "_grad", grad, - collections=[OPT_SUM_COLLECTION]) - - # builds the train and summary operations - train_op = meta_opt.apply_gradients(clipped_grads_and_vars, - global_step=global_step) - - # only grab summaries defined for LOL, not inside the problem - summary_op = tf.summary.merge_all(key=OPT_SUM_COLLECTION) - - # make sure the state gets propagated after the gradients and summaries - # were computed. - with tf.control_dependencies([train_op, summary_op]): - propagate_loop_state_ops = [] - for dest, src in zip( - train_output.init_loop_vars, train_output.output_loop_vars): - propagate_loop_state_ops.append(dest.assign(src)) - propagate_loop_state_op = tf.group(*propagate_loop_state_ops) - - # create the supervisor - sv = tf.train.Supervisor( - graph=graph, - is_chief=is_chief, - logdir=logdir, - summary_op=None, - save_model_secs=0, # we save checkpoints manually - global_step=global_step, - ) - - with sv.managed_session() as sess: - - init_time = time.time() - problem_start_time - out.write("--------- Problem #{} ---------\n".format(problem_itr)) - out.write("{callable.__name__}{args}{kwargs}\n".format( - **problem_spec.__dict__)) - out.write("Took {} seconds to initialize.\n".format(init_time)) - out.flush() - - # For profiling summaries - if FLAGS.set_profiling: - summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) - - # used to store information during training - metadata = defaultdict(list) - - for k in range(num_meta_iterations): - - if sv.should_stop(): - break - - problem.init_fn(sess) - - # set run options (for profiling) - full_trace_opt = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) - run_options = full_trace_opt if FLAGS.set_profiling else None - run_metadata = tf.RunMetadata() if FLAGS.set_profiling else None - - num_unrolls = num_unroll_func() - partial_unroll_iters = [ - num_partial_unroll_itrs_func() for _ in xrange(num_unrolls) - ] - total_num_iter = sum(partial_unroll_iters) - - objective_weights = [np.ones(num) / float(num) - for num in partial_unroll_iters] - db = dataset.batch_indices(total_num_iter, batch_size) - dataset_batches = [] - last_index = 0 - for num in partial_unroll_iters: - dataset_batches.append(db[last_index:last_index + num]) - last_index += num - - train_start_time = time.time() - - unroll_itr = 0 - additional_log_info = "" - - for unroll_itr in range(num_unrolls): - first_unroll = unroll_itr == 0 - if FLAGS.reset_rnn_params: - reset_state = first_unroll and k == 0 - else: - reset_state = first_unroll - - feed = { - train_output.obj_weights: objective_weights[unroll_itr], - train_output.batches: dataset_batches[unroll_itr], - train_output.first_unroll: first_unroll, - train_output.reset_state: reset_state, - } - - # run the train and summary ops - # when a "save_diagnostics" flag is turned on - fetches_list = [ - train_output.metaobj, - train_output.problem_objectives, - train_output.initial_obj, - summary_op, - clipped_grads_and_vars, - train_op - ] - if unroll_itr + 1 < num_unrolls: - fetches_list += [propagate_loop_state_op] - - fetched = sess.run(fetches_list, feed_dict=feed, - options=run_options, run_metadata=run_metadata) - meta_obj = fetched[0] - sub_obj = fetched[1] - init_obj = fetched[2] - summ = fetched[3] - meta_grads_and_params = fetched[4] - - # assert that the subproblem objectives are non-negative - # (this is so that we can rescale the objective by the initial value - # and not worry about rescaling by a negative value) - if np.any(sub_obj < 0): - raise ValueError( - "Training problem objectives must be nonnegative.") - # If the objective has increased more than we want, exit this - # training run and start over on another meta iteration. - if obj_train_max_multiplier > 0 and ( - sub_obj[-1] > (init_obj + - abs(init_obj) * (obj_train_max_multiplier - 1))): - msg = " Broke early at {} out of {} unrolls. ".format( - unroll_itr + 1, num_unrolls) - additional_log_info += msg - break - - # only the chief task is allowed to write the summary - if is_chief: - sv.summary_computed(sess, summ) - - metadata["subproblem_objs"].append(sub_obj) - # store training metadata to pass to the callback - metadata["meta_objs"].append(meta_obj) - metadata["meta_grads_and_params"].append(meta_grads_and_params) - - optimization_time = time.time() - train_start_time - - if FLAGS.set_profiling: - summary_name = "%02d_iter%04d_%02d" % (FLAGS.task, problem_itr, k) - summary_writer.add_run_metadata(run_metadata, summary_name) - - metadata["global_step"].append(sess.run(global_step)) - metadata["runtimes"].append(optimization_time) - - # write a diagnostic message to the output - args = (k, meta_obj, optimization_time, - sum(partial_unroll_iters[:unroll_itr+1])) - out.write(" [{:02}] {}, {} seconds, {} iters ".format(*args)) - out.write("(unrolled {} steps)".format( - ", ".join([str(s) for s in partial_unroll_iters[:unroll_itr+1]]))) - out.write("{}\n".format(additional_log_info)) - out.flush() - - if FLAGS.set_profiling: - summary_writer.close() - - # force a checkpoint save before we load a new problem - # only the chief task has the save_path and can write the checkpoint - if is_chief: - sv.saver.save(sess, sv.save_path, global_step=global_step) - - # run the callbacks on the chief - if is_chief and callbacks is not None: - for callback in callbacks: - if hasattr(callback, "__call__"): - problem_name = problem_spec.callable.__name__ - callback(problem_name, problem_itr, logdir, metadata) - - -def test_optimizer(optimizer, - problem, - num_iter, - dataset=datasets.EMPTY_DATASET, - batch_size=None, - seed=None, - graph=None, - logdir=None, - record_every=None): - """Tests an optimization algorithm on a given problem. - - Args: - optimizer: Either a tf.train.Optimizer instance, or an Optimizer instance - inheriting from trainable_optimizer.py - problem: A Problem instance that defines an optimization problem to solve - num_iter: The number of iterations of the optimizer to run - dataset: The dataset to train the problem against - batch_size: The number of samples per batch. If None (default), the - batch size is set to the full batch (dataset.size) - seed: A random seed used for drawing the initial parameters, or a list of - numpy arrays used to explicitly initialize the parameters. - graph: The tensorflow graph to execute (if None, uses the default graph) - logdir: A directory containing model checkpoints. If given, then the - parameters of the optimizer are loaded from the latest checkpoint - in this folder. - record_every: if an integer, stores the parameters, objective, and gradient - every recored_every iterations. If None, nothing is stored - - Returns: - objective_values: A list of the objective values during optimization - parameters: The parameters obtained after training - records: A dictionary containing lists of the parameters and gradients - during optimization saved every record_every iterations (empty if - record_every is set to None) - """ - - if dataset is None: - dataset = datasets.EMPTY_DATASET - batch_size = dataset.size - else: - # default batch size is the entire dataset - batch_size = dataset.size if batch_size is None else batch_size - - graph = tf.get_default_graph() if graph is None else graph - with graph.as_default(): - - # define the parameters of the optimization problem - if isinstance(seed, (list, tuple)): - # seed is a list of arrays - params = problem_generator.init_fixed_variables(seed) - else: - # seed is an int or None - params = problem.init_variables(seed) - - data_placeholder = tf.placeholder(tf.float32) - labels_placeholder = tf.placeholder(tf.int32) - - # get the problem objective and gradient(s) - obj = problem.objective(params, data_placeholder, labels_placeholder) - gradients = problem.gradients(obj, params) - - vars_to_preinitialize = params - - with tf.Session(graph=graph) as sess: - # initialize the parameter scope variables; necessary for apply_gradients - sess.run(tf.variables_initializer(vars_to_preinitialize)) - coord = tf.train.Coordinator() - threads = tf.train.start_queue_runners(sess=sess, coord=coord) - - # create the train operation and training variables - try: - train_op, real_params = optimizer.apply_gradients(zip(gradients, params)) - obj = problem.objective(real_params, data_placeholder, labels_placeholder) - except TypeError: - # If all goes well, this exception should only be thrown when we are using - # a non-hrnn optimizer. - train_op = optimizer.apply_gradients(zip(gradients, params)) - - vars_to_restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, - scope=OPTIMIZER_SCOPE) - vars_to_initialize = list( - set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) - - set(vars_to_restore) - set(vars_to_preinitialize)) - # load or initialize optimizer variables - if logdir is not None: - restorer = tf.Saver(var_list=vars_to_restore) - ckpt = tf.train.latest_checkpoint(logdir) - restorer.restore(sess, ckpt) - else: - sess.run(tf.variables_initializer(vars_to_restore)) - # initialize all the other variables - sess.run(tf.variables_initializer(vars_to_initialize)) - - problem.init_fn(sess) - - # generate the minibatch indices - batch_inds = dataset.batch_indices(num_iter, batch_size) - - # run the train operation for n iterations and save the objectives - records = defaultdict(list) - objective_values = [] - for itr, batch in enumerate(batch_inds): - - # data to feed in - feed = {data_placeholder: dataset.data[batch], - labels_placeholder: dataset.labels[batch]} - full_feed = {data_placeholder: dataset.data, - labels_placeholder: dataset.labels} - - # record stuff - if record_every is not None and (itr % record_every) == 0: - def grad_value(g): - if isinstance(g, tf.IndexedSlices): - return g.values - else: - return g - - records_fetch = {} - for p in params: - for key in optimizer.get_slot_names(): - v = optimizer.get_slot(p, key) - records_fetch[p.name + "_" + key] = v - gav_fetch = [(grad_value(g), v) for g, v in zip(gradients, params)] - - _, gav_eval, records_eval = sess.run( - (obj, gav_fetch, records_fetch), feed_dict=feed) - full_obj_eval = sess.run([obj], feed_dict=full_feed) - - records["objective"].append(full_obj_eval) - records["grad_norm"].append([np.linalg.norm(g.ravel()) - for g, _ in gav_eval]) - records["param_norm"].append([np.linalg.norm(v.ravel()) - for _, v in gav_eval]) - records["grad"].append([g for g, _ in gav_eval]) - records["param"].append([v for _, v in gav_eval]) - records["iter"].append(itr) - - for k, v in records_eval.iteritems(): - records[k].append(v) - - # run the optimization train operation - objective_values.append(sess.run([train_op, obj], feed_dict=feed)[1]) - - # final parameters - parameters = [sess.run(p) for p in params] - coord.request_stop() - coord.join(threads) - - return objective_values, parameters, records - - -def run_wall_clock_test(optimizer, - problem, - num_steps, - dataset=datasets.EMPTY_DATASET, - seed=None, - logdir=None, - batch_size=None): - """Runs optimization with the given parameters and return average iter time. - - Args: - optimizer: The tf.train.Optimizer instance - problem: The problem to optimize (a problem_generator.Problem) - num_steps: The number of steps to run optimization for - dataset: The dataset to train the problem against - seed: The seed used for drawing the initial parameters, or a list of - numpy arrays used to explicitly initialize the parameters - logdir: A directory containing model checkpoints. If given, then the - parameters of the optimizer are loaded from the latest checkpoint - in this folder. - batch_size: The number of samples per batch. - - Returns: - The average time in seconds for a single optimization iteration. - """ - if dataset is None: - dataset = datasets.EMPTY_DATASET - batch_size = dataset.size - else: - # default batch size is the entire dataset - batch_size = dataset.size if batch_size is None else batch_size - - # define the parameters of the optimization problem - if isinstance(seed, (list, tuple)): - # seed is a list of arrays - params = problem_generator.init_fixed_variables(seed) - else: - # seed is an int or None - params = problem.init_variables(seed) - - data_placeholder = tf.placeholder(tf.float32) - labels_placeholder = tf.placeholder(tf.int32) - - obj = problem.objective(params, data_placeholder, labels_placeholder) - gradients = problem.gradients(obj, params) - vars_to_preinitialize = params - - with tf.Session(graph=tf.get_default_graph()) as sess: - # initialize the parameter scope variables; necessary for apply_gradients - sess.run(tf.variables_initializer(vars_to_preinitialize)) - train_op = optimizer.apply_gradients(zip(gradients, params)) - if isinstance(train_op, tuple) or isinstance(train_op, list): - # LOL apply_gradients returns a tuple. Regular optimizers do not. - train_op = train_op[0] - vars_to_restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, - scope=OPTIMIZER_SCOPE) - vars_to_initialize = list( - set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) - - set(vars_to_restore) - set(vars_to_preinitialize)) - # load or initialize optimizer variables - if logdir is not None: - restorer = tf.Saver(var_list=vars_to_restore) - ckpt = tf.train.latest_checkpoint(logdir) - restorer.restore(sess, ckpt) - else: - sess.run(tf.variables_initializer(vars_to_restore)) - # initialize all the other variables - sess.run(tf.variables_initializer(vars_to_initialize)) - - problem.init_fn(sess) - - # generate the minibatch indices - batch_inds = dataset.batch_indices(num_steps, batch_size) - - avg_iter_time = [] - for batch in batch_inds: - # data to feed in - feed = {data_placeholder: dataset.data[batch], - labels_placeholder: dataset.labels[batch]} - - # run the optimization train operation - start = time.time() - sess.run([train_op], feed_dict=feed) - avg_iter_time.append(time.time() - start) - - return np.median(np.array(avg_iter_time)) diff --git a/research/learned_optimizer/metarun.py b/research/learned_optimizer/metarun.py deleted file mode 100644 index 45a29623c7fd1381cef590c4e8440d8749585b72..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/metarun.py +++ /dev/null @@ -1,394 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Scripts for meta-optimization.""" - -from __future__ import print_function - -import os - -import tensorflow as tf - -import metaopt -from learned_optimizer.optimizer import coordinatewise_rnn -from learned_optimizer.optimizer import global_learning_rate -from learned_optimizer.optimizer import hierarchical_rnn -from learned_optimizer.optimizer import learning_rate_schedule -from learned_optimizer.optimizer import trainable_adam -from learned_optimizer.problems import problem_sets as ps -from learned_optimizer.problems import problem_spec - -tf.app.flags.DEFINE_string("train_dir", "/tmp/lol/", - """Directory to store parameters and results.""") - -tf.app.flags.DEFINE_integer("task", 0, - """Task id of the replica running the training.""") -tf.app.flags.DEFINE_integer("worker_tasks", 1, - """Number of tasks in the worker job.""") - -tf.app.flags.DEFINE_integer("num_problems", 1000, - """Number of sub-problems to run.""") -tf.app.flags.DEFINE_integer("num_meta_iterations", 5, - """Number of meta-iterations to optimize.""") -tf.app.flags.DEFINE_integer("num_unroll_scale", 40, - """The scale parameter of the exponential - distribution from which the number of partial - unrolls is drawn""") -tf.app.flags.DEFINE_integer("min_num_unrolls", 1, - """The minimum number of unrolls per problem.""") -tf.app.flags.DEFINE_integer("num_partial_unroll_itr_scale", 200, - """The scale parameter of the exponential - distribution from which the number of iterations - per unroll is drawn.""") -tf.app.flags.DEFINE_integer("min_num_itr_partial_unroll", 50, - """The minimum number of iterations for one - unroll.""") - -tf.app.flags.DEFINE_string("optimizer", "HierarchicalRNN", - """Which meta-optimizer to train.""") - -# CoordinatewiseRNN-specific flags -tf.app.flags.DEFINE_integer("cell_size", 20, - """Size of the RNN hidden state in each layer.""") -tf.app.flags.DEFINE_integer("num_cells", 2, - """Number of RNN layers.""") -tf.app.flags.DEFINE_string("cell_cls", "GRUCell", - """Type of RNN cell to use.""") - -# Metaoptimization parameters -tf.app.flags.DEFINE_float("meta_learning_rate", 1e-6, - """The learning rate for the meta-optimizer.""") -tf.app.flags.DEFINE_float("gradient_clip_level", 1e4, - """The level to clip gradients to.""") - -# Training set selection -tf.app.flags.DEFINE_boolean("include_quadratic_problems", False, - """Include non-noisy quadratic problems.""") -tf.app.flags.DEFINE_boolean("include_noisy_quadratic_problems", True, - """Include noisy quadratic problems.""") -tf.app.flags.DEFINE_boolean("include_large_quadratic_problems", True, - """Include very large quadratic problems.""") -tf.app.flags.DEFINE_boolean("include_bowl_problems", True, - """Include 2D bowl problems.""") -tf.app.flags.DEFINE_boolean("include_softmax_2_class_problems", True, - """Include 2-class logistic regression problems.""") -tf.app.flags.DEFINE_boolean("include_noisy_softmax_2_class_problems", True, - """Include noisy 2-class logistic regression - problems.""") -tf.app.flags.DEFINE_boolean("include_optimization_test_problems", True, - """Include non-noisy versions of classic - optimization test problems, e.g. Rosenbrock.""") -tf.app.flags.DEFINE_boolean("include_noisy_optimization_test_problems", True, - """Include gradient-noise versions of classic - optimization test problems, e.g. Rosenbrock""") -tf.app.flags.DEFINE_boolean("include_fully_connected_random_2_class_problems", - True, """Include MLP problems for 2 classes.""") -tf.app.flags.DEFINE_boolean("include_matmul_problems", True, - """Include matrix multiplication problems.""") -tf.app.flags.DEFINE_boolean("include_log_objective_problems", True, - """Include problems where the objective is the log - objective of another problem, e.g. Bowl.""") -tf.app.flags.DEFINE_boolean("include_rescale_problems", True, - """Include problems where the parameters are scaled - version of the original parameters.""") -tf.app.flags.DEFINE_boolean("include_norm_problems", True, - """Include problems where the objective is the - N-norm of another problem, e.g. Quadratic.""") -tf.app.flags.DEFINE_boolean("include_sum_problems", True, - """Include problems where the objective is the sum - of the objectives of the subproblems that make - up the problem parameters. Per-problem tensors - are still independent of each other.""") -tf.app.flags.DEFINE_boolean("include_sparse_gradient_problems", True, - """Include problems where the gradient is set to 0 - with some high probability.""") -tf.app.flags.DEFINE_boolean("include_sparse_softmax_problems", False, - """Include sparse softmax problems.""") -tf.app.flags.DEFINE_boolean("include_one_hot_sparse_softmax_problems", False, - """Include one-hot sparse softmax problems.""") -tf.app.flags.DEFINE_boolean("include_noisy_bowl_problems", True, - """Include noisy bowl problems.""") -tf.app.flags.DEFINE_boolean("include_noisy_norm_problems", True, - """Include noisy norm problems.""") -tf.app.flags.DEFINE_boolean("include_noisy_sum_problems", True, - """Include noisy sum problems.""") -tf.app.flags.DEFINE_boolean("include_sum_of_quadratics_problems", False, - """Include sum of quadratics problems.""") -tf.app.flags.DEFINE_boolean("include_projection_quadratic_problems", False, - """Include projection quadratic problems.""") -tf.app.flags.DEFINE_boolean("include_outward_snake_problems", False, - """Include outward snake problems.""") -tf.app.flags.DEFINE_boolean("include_dependency_chain_problems", False, - """Include dependency chain problems.""") -tf.app.flags.DEFINE_boolean("include_min_max_well_problems", False, - """Include min-max well problems.""") - -# Optimizer parameters: initialization and scale values -tf.app.flags.DEFINE_float("min_lr", 1e-6, - """The minimum initial learning rate.""") -tf.app.flags.DEFINE_float("max_lr", 1e-2, - """The maximum initial learning rate.""") - -# Optimizer parameters: small features. -tf.app.flags.DEFINE_boolean("zero_init_lr_weights", True, - """Whether to initialize the learning rate weights - to 0 rather than the scaled random initialization - used for other RNN variables.""") -tf.app.flags.DEFINE_boolean("use_relative_lr", True, - """Whether to use the relative learning rate as an - input during training. Can only be used if - learnable_decay is also True.""") -tf.app.flags.DEFINE_boolean("use_extreme_indicator", False, - """Whether to use the extreme indicator for learning - rates as an input during training. Can only be - used if learnable_decay is also True.""") -tf.app.flags.DEFINE_boolean("use_log_means_squared", True, - """Whether to track the log of the mean squared - grads instead of the means squared grads.""") -tf.app.flags.DEFINE_boolean("use_problem_lr_mean", True, - """Whether to use the mean over all learning rates - in the problem when calculating the relative - learning rate.""") - -# Optimizer parameters: major features -tf.app.flags.DEFINE_boolean("learnable_decay", True, - """Whether to learn weights that dynamically - modulate the input scale via RMS decay.""") -tf.app.flags.DEFINE_boolean("dynamic_output_scale", True, - """Whether to learn weights that dynamically - modulate the output scale.""") -tf.app.flags.DEFINE_boolean("use_log_objective", True, - """Whether to use the log of the scaled objective - rather than just the scaled obj for training.""") -tf.app.flags.DEFINE_boolean("use_attention", False, - """Whether to learn where to attend.""") -tf.app.flags.DEFINE_boolean("use_second_derivatives", True, - """Whether to use second derivatives.""") -tf.app.flags.DEFINE_integer("num_gradient_scales", 4, - """How many different timescales to keep for - gradient history. If > 1, also learns a scale - factor for gradient history.""") -tf.app.flags.DEFINE_float("max_log_lr", 33, - """The maximum log learning rate allowed.""") -tf.app.flags.DEFINE_float("objective_training_max_multiplier", -1, - """How much the objective can grow before training on - this problem / param pair is terminated. Sets a max - on the objective value when multiplied by the - initial objective. If <= 0, not used.""") -tf.app.flags.DEFINE_boolean("use_gradient_shortcut", True, - """Whether to add a learned affine projection of the - gradient to the update delta in addition to the - gradient function computed by the RNN.""") -tf.app.flags.DEFINE_boolean("use_lr_shortcut", False, - """Whether to add the difference between the current - learning rate and the desired learning rate to - the RNN input.""") -tf.app.flags.DEFINE_boolean("use_grad_products", True, - """Whether to use gradient products in the input to - the RNN. Only applicable when num_gradient_scales - > 1.""") -tf.app.flags.DEFINE_boolean("use_multiple_scale_decays", False, - """Whether to use many-timescale scale decays.""") -tf.app.flags.DEFINE_boolean("use_numerator_epsilon", False, - """Whether to use epsilon in the numerator of the - log objective.""") -tf.app.flags.DEFINE_boolean("learnable_inp_decay", True, - """Whether to learn input decay weight and bias.""") -tf.app.flags.DEFINE_boolean("learnable_rnn_init", True, - """Whether to learn RNN state initialization.""") - -FLAGS = tf.app.flags.FLAGS - -# The Size of the RNN hidden state in each layer: -# [PerParam, PerTensor, Global]. The length of this list must be 1, 2, or 3. -# If less than 3, the Global and/or PerTensor RNNs will not be created. - -HRNN_CELL_SIZES = [10, 20, 20] - - - -def register_optimizers(): - opts = {} - opts["CoordinatewiseRNN"] = coordinatewise_rnn.CoordinatewiseRNN - opts["GlobalLearningRate"] = global_learning_rate.GlobalLearningRate - opts["HierarchicalRNN"] = hierarchical_rnn.HierarchicalRNN - opts["LearningRateSchedule"] = learning_rate_schedule.LearningRateSchedule - opts["TrainableAdam"] = trainable_adam.TrainableAdam - return opts - - -def main(unused_argv): - """Runs the main script.""" - - opts = register_optimizers() - - # Choose a set of problems to optimize. By default this includes quadratics, - # 2-dimensional bowls, 2-class softmax problems, and non-noisy optimization - # test problems (e.g. Rosenbrock, Beale) - problems_and_data = [] - - if FLAGS.include_sparse_softmax_problems: - problems_and_data.extend(ps.sparse_softmax_2_class_sparse_problems()) - - if FLAGS.include_one_hot_sparse_softmax_problems: - problems_and_data.extend( - ps.one_hot_sparse_softmax_2_class_sparse_problems()) - - if FLAGS.include_quadratic_problems: - problems_and_data.extend(ps.quadratic_problems()) - - if FLAGS.include_noisy_quadratic_problems: - problems_and_data.extend(ps.quadratic_problems_noisy()) - - if FLAGS.include_large_quadratic_problems: - problems_and_data.extend(ps.quadratic_problems_large()) - - if FLAGS.include_bowl_problems: - problems_and_data.extend(ps.bowl_problems()) - - if FLAGS.include_noisy_bowl_problems: - problems_and_data.extend(ps.bowl_problems_noisy()) - - if FLAGS.include_softmax_2_class_problems: - problems_and_data.extend(ps.softmax_2_class_problems()) - - if FLAGS.include_noisy_softmax_2_class_problems: - problems_and_data.extend(ps.softmax_2_class_problems_noisy()) - - if FLAGS.include_optimization_test_problems: - problems_and_data.extend(ps.optimization_test_problems()) - - if FLAGS.include_noisy_optimization_test_problems: - problems_and_data.extend(ps.optimization_test_problems_noisy()) - - if FLAGS.include_fully_connected_random_2_class_problems: - problems_and_data.extend(ps.fully_connected_random_2_class_problems()) - - if FLAGS.include_matmul_problems: - problems_and_data.extend(ps.matmul_problems()) - - if FLAGS.include_log_objective_problems: - problems_and_data.extend(ps.log_objective_problems()) - - if FLAGS.include_rescale_problems: - problems_and_data.extend(ps.rescale_problems()) - - if FLAGS.include_norm_problems: - problems_and_data.extend(ps.norm_problems()) - - if FLAGS.include_noisy_norm_problems: - problems_and_data.extend(ps.norm_problems_noisy()) - - if FLAGS.include_sum_problems: - problems_and_data.extend(ps.sum_problems()) - - if FLAGS.include_noisy_sum_problems: - problems_and_data.extend(ps.sum_problems_noisy()) - - if FLAGS.include_sparse_gradient_problems: - problems_and_data.extend(ps.sparse_gradient_problems()) - if FLAGS.include_fully_connected_random_2_class_problems: - problems_and_data.extend(ps.sparse_gradient_problems_mlp()) - - if FLAGS.include_min_max_well_problems: - problems_and_data.extend(ps.min_max_well_problems()) - - if FLAGS.include_sum_of_quadratics_problems: - problems_and_data.extend(ps.sum_of_quadratics_problems()) - - if FLAGS.include_projection_quadratic_problems: - problems_and_data.extend(ps.projection_quadratic_problems()) - - if FLAGS.include_outward_snake_problems: - problems_and_data.extend(ps.outward_snake_problems()) - - if FLAGS.include_dependency_chain_problems: - problems_and_data.extend(ps.dependency_chain_problems()) - - # log directory - logdir = os.path.join(FLAGS.train_dir, - "{}_{}_{}_{}".format(FLAGS.optimizer, - FLAGS.cell_cls, - FLAGS.cell_size, - FLAGS.num_cells)) - - # get the optimizer class and arguments - optimizer_cls = opts[FLAGS.optimizer] - - assert len(HRNN_CELL_SIZES) in [1, 2, 3] - optimizer_args = (HRNN_CELL_SIZES,) - - optimizer_kwargs = { - "init_lr_range": (FLAGS.min_lr, FLAGS.max_lr), - "learnable_decay": FLAGS.learnable_decay, - "dynamic_output_scale": FLAGS.dynamic_output_scale, - "cell_cls": getattr(tf.contrib.rnn, FLAGS.cell_cls), - "use_attention": FLAGS.use_attention, - "use_log_objective": FLAGS.use_log_objective, - "num_gradient_scales": FLAGS.num_gradient_scales, - "zero_init_lr_weights": FLAGS.zero_init_lr_weights, - "use_log_means_squared": FLAGS.use_log_means_squared, - "use_relative_lr": FLAGS.use_relative_lr, - "use_extreme_indicator": FLAGS.use_extreme_indicator, - "max_log_lr": FLAGS.max_log_lr, - "obj_train_max_multiplier": FLAGS.objective_training_max_multiplier, - "use_problem_lr_mean": FLAGS.use_problem_lr_mean, - "use_gradient_shortcut": FLAGS.use_gradient_shortcut, - "use_second_derivatives": FLAGS.use_second_derivatives, - "use_lr_shortcut": FLAGS.use_lr_shortcut, - "use_grad_products": FLAGS.use_grad_products, - "use_multiple_scale_decays": FLAGS.use_multiple_scale_decays, - "use_numerator_epsilon": FLAGS.use_numerator_epsilon, - "learnable_inp_decay": FLAGS.learnable_inp_decay, - "learnable_rnn_init": FLAGS.learnable_rnn_init, - } - optimizer_spec = problem_spec.Spec( - optimizer_cls, optimizer_args, optimizer_kwargs) - - # make log directory - tf.gfile.MakeDirs(logdir) - - is_chief = FLAGS.task == 0 - # if this is a distributed run, make the chief run through problems in order - select_random_problems = FLAGS.worker_tasks == 1 or not is_chief - - def num_unrolls(): - return metaopt.sample_numiter(FLAGS.num_unroll_scale, FLAGS.min_num_unrolls) - - def num_partial_unroll_itrs(): - return metaopt.sample_numiter(FLAGS.num_partial_unroll_itr_scale, - FLAGS.min_num_itr_partial_unroll) - - # run it - metaopt.train_optimizer( - logdir, - optimizer_spec, - problems_and_data, - FLAGS.num_problems, - FLAGS.num_meta_iterations, - num_unrolls, - num_partial_unroll_itrs, - learning_rate=FLAGS.meta_learning_rate, - gradient_clip=FLAGS.gradient_clip_level, - is_chief=is_chief, - select_random_problems=select_random_problems, - obj_train_max_multiplier=FLAGS.objective_training_max_multiplier, - callbacks=[]) - - return 0 - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/learned_optimizer/optimizer/BUILD b/research/learned_optimizer/optimizer/BUILD deleted file mode 100644 index 8953e7592ace416b786be2a6fa59f4c537c82644..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/BUILD +++ /dev/null @@ -1,69 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -# Libraries -# ========= -py_library( - name = "coordinatewise_rnn", - srcs = ["coordinatewise_rnn.py"], - deps = [ - ":trainable_optimizer", - ":utils", - ], -) - -py_library( - name = "global_learning_rate", - srcs = ["global_learning_rate.py"], - deps = [ - ":trainable_optimizer", - ], -) - -py_library( - name = "hierarchical_rnn", - srcs = ["hierarchical_rnn.py"], - deps = [ - ":rnn_cells", - ":trainable_optimizer", - ":utils", - ], -) - -py_library( - name = "learning_rate_schedule", - srcs = ["learning_rate_schedule.py"], - deps = [ - ":trainable_optimizer", - ], -) - -py_library( - name = "rnn_cells", - srcs = ["rnn_cells.py"], - deps = [ - ":utils", - ], -) - -py_library( - name = "trainable_adam", - srcs = ["trainable_adam.py"], - deps = [ - ":trainable_optimizer", - ":utils", - ], -) - -py_library( - name = "trainable_optimizer", - srcs = ["trainable_optimizer.py"], - deps = [ - ], -) - -py_library( - name = "utils", - srcs = ["utils.py"], - deps = [ - ], -) diff --git a/research/learned_optimizer/optimizer/coordinatewise_rnn.py b/research/learned_optimizer/optimizer/coordinatewise_rnn.py deleted file mode 100644 index 3d699504b7a3d86643bea6b295d20b2434131a99..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/coordinatewise_rnn.py +++ /dev/null @@ -1,316 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Collection of trainable optimizers for meta-optimization.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import numpy as np -import tensorflow as tf - -from learned_optimizer.optimizer import utils -from learned_optimizer.optimizer import trainable_optimizer as opt - - -# Default was 1e-3 -tf.app.flags.DEFINE_float("crnn_rnn_readout_scale", 0.5, - """The initialization scale for the RNN readouts.""") -tf.app.flags.DEFINE_float("crnn_default_decay_var_init", 2.2, - """The default initializer value for any decay/ - momentum style variables and constants. - sigmoid(2.2) ~ 0.9, sigmoid(-2.2) ~ 0.01.""") - -FLAGS = tf.flags.FLAGS - - -class CoordinatewiseRNN(opt.TrainableOptimizer): - """RNN that operates on each coordinate of the problem independently.""" - - def __init__(self, - cell_sizes, - cell_cls, - init_lr_range=(1., 1.), - dynamic_output_scale=True, - learnable_decay=True, - zero_init_lr_weights=False, - **kwargs): - """Initializes the RNN per-parameter optimizer. - - Args: - cell_sizes: List of hidden state sizes for each RNN cell in the network - cell_cls: tf.contrib.rnn class for specifying the RNN cell type - init_lr_range: the range in which to initialize the learning rates. - dynamic_output_scale: whether to learn weights that dynamically modulate - the output scale (default: True) - learnable_decay: whether to learn weights that dynamically modulate the - input scale via RMS style decay (default: True) - zero_init_lr_weights: whether to initialize the lr weights to zero - **kwargs: args passed to TrainableOptimizer's constructor - - Raises: - ValueError: If the init lr range is not of length 2. - ValueError: If the init lr range is not a valid range (min > max). - """ - if len(init_lr_range) != 2: - raise ValueError( - "Initial LR range must be len 2, was {}".format(len(init_lr_range))) - if init_lr_range[0] > init_lr_range[1]: - raise ValueError("Initial LR range min is greater than max.") - self.init_lr_range = init_lr_range - - self.zero_init_lr_weights = zero_init_lr_weights - self.reuse_vars = False - - # create the RNN cell - with tf.variable_scope(opt.OPTIMIZER_SCOPE): - self.component_cells = [cell_cls(sz) for sz in cell_sizes] - self.cell = tf.contrib.rnn.MultiRNNCell(self.component_cells) - - # random normal initialization scaled by the output size - scale_factor = FLAGS.crnn_rnn_readout_scale / math.sqrt(cell_sizes[-1]) - scaled_init = tf.random_normal_initializer(0., scale_factor) - - # weights for projecting the hidden state to a parameter update - self.update_weights = tf.get_variable("update_weights", - shape=(cell_sizes[-1], 1), - initializer=scaled_init) - - self._initialize_decay(learnable_decay, (cell_sizes[-1], 1), scaled_init) - - self._initialize_lr(dynamic_output_scale, (cell_sizes[-1], 1), - scaled_init) - - state_size = sum([sum(state_size) for state_size in self.cell.state_size]) - self._init_vector = tf.get_variable( - "init_vector", shape=[1, state_size], - initializer=tf.random_uniform_initializer(-1., 1.)) - - state_keys = ["rms", "rnn", "learning_rate", "decay"] - super(CoordinatewiseRNN, self).__init__("cRNN", state_keys, **kwargs) - - def _initialize_decay( - self, learnable_decay, weights_tensor_shape, scaled_init): - """Initializes the decay weights and bias variables or tensors. - - Args: - learnable_decay: Whether to use learnable decay. - weights_tensor_shape: The shape the weight tensor should take. - scaled_init: The scaled initialization for the weights tensor. - """ - if learnable_decay: - - # weights for projecting the hidden state to the RMS decay term - self.decay_weights = tf.get_variable("decay_weights", - shape=weights_tensor_shape, - initializer=scaled_init) - self.decay_bias = tf.get_variable( - "decay_bias", shape=(1,), - initializer=tf.constant_initializer( - FLAGS.crnn_default_decay_var_init)) - else: - self.decay_weights = tf.zeros_like(self.update_weights) - self.decay_bias = tf.constant(FLAGS.crnn_default_decay_var_init) - - def _initialize_lr( - self, dynamic_output_scale, weights_tensor_shape, scaled_init): - """Initializes the learning rate weights and bias variables or tensors. - - Args: - dynamic_output_scale: Whether to use a dynamic output scale. - weights_tensor_shape: The shape the weight tensor should take. - scaled_init: The scaled initialization for the weights tensor. - """ - if dynamic_output_scale: - zero_init = tf.constant_initializer(0.) - wt_init = zero_init if self.zero_init_lr_weights else scaled_init - self.lr_weights = tf.get_variable("learning_rate_weights", - shape=weights_tensor_shape, - initializer=wt_init) - self.lr_bias = tf.get_variable("learning_rate_bias", shape=(1,), - initializer=zero_init) - else: - self.lr_weights = tf.zeros_like(self.update_weights) - self.lr_bias = tf.zeros([1, 1]) - - def _initialize_state(self, var): - """Return a dictionary mapping names of state variables to their values.""" - vectorized_shape = [var.get_shape().num_elements(), 1] - - min_lr = self.init_lr_range[0] - max_lr = self.init_lr_range[1] - if min_lr == max_lr: - init_lr = tf.constant(min_lr, shape=vectorized_shape) - else: - actual_vals = tf.random_uniform(vectorized_shape, - np.log(min_lr), - np.log(max_lr)) - init_lr = tf.exp(actual_vals) - - ones = tf.ones(vectorized_shape) - rnn_init = ones * self._init_vector - - return { - "rms": tf.ones(vectorized_shape), - "learning_rate": init_lr, - "rnn": rnn_init, - "decay": tf.ones(vectorized_shape), - } - - def _compute_update(self, param, grad, state): - """Update parameters given the gradient and state. - - Args: - param: tensor of parameters - grad: tensor of gradients with the same shape as param - state: a dictionary containing any state for the optimizer - - Returns: - updated_param: updated parameters - updated_state: updated state variables in a dictionary - """ - - with tf.variable_scope(opt.OPTIMIZER_SCOPE) as scope: - - if self.reuse_vars: - scope.reuse_variables() - else: - self.reuse_vars = True - - param_shape = tf.shape(param) - - (grad_values, decay_state, rms_state, rnn_state, learning_rate_state, - grad_indices) = self._extract_gradients_and_internal_state( - grad, state, param_shape) - - # Vectorize and scale the gradients. - grad_scaled, rms = utils.rms_scaling(grad_values, decay_state, rms_state) - - # Apply the RNN update. - rnn_state_tuples = self._unpack_rnn_state_into_tuples(rnn_state) - rnn_output, rnn_state_tuples = self.cell(grad_scaled, rnn_state_tuples) - rnn_state = self._pack_tuples_into_rnn_state(rnn_state_tuples) - - # Compute the update direction (a linear projection of the RNN output). - delta = utils.project(rnn_output, self.update_weights) - - # The updated decay is an affine projection of the hidden state - decay = utils.project(rnn_output, self.decay_weights, - bias=self.decay_bias, activation=tf.nn.sigmoid) - - # Compute the change in learning rate (an affine projection of the RNN - # state, passed through a 2x sigmoid, so the change is bounded). - learning_rate_change = 2. * utils.project(rnn_output, self.lr_weights, - bias=self.lr_bias, - activation=tf.nn.sigmoid) - - # Update the learning rate. - new_learning_rate = learning_rate_change * learning_rate_state - - # Apply the update to the parameters. - update = tf.reshape(new_learning_rate * delta, tf.shape(grad_values)) - - if isinstance(grad, tf.IndexedSlices): - update = utils.stack_tensor(update, grad_indices, param, - param_shape[:1]) - rms = utils.update_slices(rms, grad_indices, state["rms"], param_shape) - new_learning_rate = utils.update_slices(new_learning_rate, grad_indices, - state["learning_rate"], - param_shape) - rnn_state = utils.update_slices(rnn_state, grad_indices, state["rnn"], - param_shape) - decay = utils.update_slices(decay, grad_indices, state["decay"], - param_shape) - - new_param = param - update - - # Collect the update and new state. - new_state = { - "rms": rms, - "learning_rate": new_learning_rate, - "rnn": rnn_state, - "decay": decay, - } - - return new_param, new_state - - def _extract_gradients_and_internal_state(self, grad, state, param_shape): - """Extracts the gradients and relevant internal state. - - If the gradient is sparse, extracts the appropriate slices from the state. - - Args: - grad: The current gradient. - state: The current state. - param_shape: The shape of the parameter (used if gradient is sparse). - - Returns: - grad_values: The gradient value tensor. - decay_state: The current decay state. - rms_state: The current rms state. - rnn_state: The current state of the internal rnns. - learning_rate_state: The current learning rate state. - grad_indices: The indices for the gradient tensor, if sparse. - None otherwise. - """ - if isinstance(grad, tf.IndexedSlices): - grad_indices, grad_values = utils.accumulate_sparse_gradients(grad) - decay_state = utils.slice_tensor(state["decay"], grad_indices, - param_shape) - rms_state = utils.slice_tensor(state["rms"], grad_indices, param_shape) - rnn_state = utils.slice_tensor(state["rnn"], grad_indices, param_shape) - learning_rate_state = utils.slice_tensor(state["learning_rate"], - grad_indices, param_shape) - decay_state.set_shape([None, 1]) - rms_state.set_shape([None, 1]) - else: - grad_values = grad - grad_indices = None - - decay_state = state["decay"] - rms_state = state["rms"] - rnn_state = state["rnn"] - learning_rate_state = state["learning_rate"] - return (grad_values, decay_state, rms_state, rnn_state, learning_rate_state, - grad_indices) - - def _unpack_rnn_state_into_tuples(self, rnn_state): - """Creates state tuples from the rnn state vector.""" - rnn_state_tuples = [] - cur_state_pos = 0 - for cell in self.component_cells: - total_state_size = sum(cell.state_size) - cur_state = tf.slice(rnn_state, [0, cur_state_pos], - [-1, total_state_size]) - cur_state_tuple = tf.split(value=cur_state, num_or_size_splits=2, - axis=1) - rnn_state_tuples.append(cur_state_tuple) - cur_state_pos += total_state_size - return rnn_state_tuples - - def _pack_tuples_into_rnn_state(self, rnn_state_tuples): - """Creates a single state vector concatenated along column axis.""" - rnn_state = None - for new_state_tuple in rnn_state_tuples: - new_c, new_h = new_state_tuple - if rnn_state is None: - rnn_state = tf.concat([new_c, new_h], axis=1) - else: - rnn_state = tf.concat([rnn_state, tf.concat([new_c, new_h], 1)], axis=1) - return rnn_state - diff --git a/research/learned_optimizer/optimizer/global_learning_rate.py b/research/learned_optimizer/optimizer/global_learning_rate.py deleted file mode 100644 index bcf102fff054e9fe9e92d4379538f6394314fe1c..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/global_learning_rate.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A trainable optimizer that learns a single global learning rate.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from learned_optimizer.optimizer import trainable_optimizer - - -class GlobalLearningRate(trainable_optimizer.TrainableOptimizer): - """Optimizes for a single global learning rate.""" - - def __init__(self, initial_rate=1e-3, **kwargs): - """Initializes the global learning rate.""" - with tf.variable_scope(trainable_optimizer.OPTIMIZER_SCOPE): - initializer = tf.constant_initializer(initial_rate) - self.learning_rate = tf.get_variable("global_learning_rate", shape=(), - initializer=initializer) - super(GlobalLearningRate, self).__init__("GLR", [], **kwargs) - - def _compute_update(self, param, grad, state): - return param - tf.scalar_mul(self.learning_rate, grad), state - diff --git a/research/learned_optimizer/optimizer/hierarchical_rnn.py b/research/learned_optimizer/optimizer/hierarchical_rnn.py deleted file mode 100644 index 953b72b5d04724a11a0e95385bbe0c6a0d91289d..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/hierarchical_rnn.py +++ /dev/null @@ -1,792 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Collection of trainable optimizers for meta-optimization.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import numpy as np -import tensorflow as tf - -from tensorflow.python.ops import state_ops -from learned_optimizer.optimizer import rnn_cells -from learned_optimizer.optimizer import trainable_optimizer as opt -from learned_optimizer.optimizer import utils - -# Default was 0.1 -tf.app.flags.DEFINE_float("biasgrucell_scale", 0.5, - """The scale for the internal BiasGRUCell vars.""") -# Default was 0 -tf.app.flags.DEFINE_float("biasgrucell_gate_bias_init", 2.2, - """The bias for the internal BiasGRUCell reset and - update gate variables.""") -# Default was 1e-3 -tf.app.flags.DEFINE_float("hrnn_rnn_readout_scale", 0.5, - """The initialization scale for the RNN readouts.""") -tf.app.flags.DEFINE_float("hrnn_default_decay_var_init", 2.2, - """The default initializer value for any decay/ - momentum style variables and constants. - sigmoid(2.2) ~ 0.9, sigmoid(-2.2) ~ 0.01.""") -# Default was 2.2 -tf.app.flags.DEFINE_float("scale_decay_bias_init", 3.2, - """The initialization for the scale decay bias. This - is the initial bias for the timescale for the - exponential avg of the mean square gradients.""") -tf.app.flags.DEFINE_float("learning_rate_momentum_logit_init", 3.2, - """Initialization for the learning rate momentum.""") -# Default was 0.1 -tf.app.flags.DEFINE_float("hrnn_affine_scale", 0.5, - """The initialization scale for the weight matrix of - the bias variables in layer0 and 1 of the hrnn.""") - -FLAGS = tf.flags.FLAGS - - -class HierarchicalRNN(opt.TrainableOptimizer): - """3 level hierarchical RNN. - - Optionally uses second order gradient information and has decoupled evaluation - and update locations. - """ - - def __init__(self, level_sizes, init_lr_range=(1e-6, 1e-2), - learnable_decay=True, dynamic_output_scale=True, - use_attention=False, use_log_objective=True, - num_gradient_scales=4, zero_init_lr_weights=True, - use_log_means_squared=True, use_relative_lr=True, - use_extreme_indicator=False, max_log_lr=33, - obj_train_max_multiplier=-1, use_problem_lr_mean=False, - use_gradient_shortcut=False, use_lr_shortcut=False, - use_grad_products=False, use_multiple_scale_decays=False, - learnable_inp_decay=True, learnable_rnn_init=True, - random_seed=None, **kwargs): - """Initializes the RNN per-parameter optimizer. - - The hierarchy consists of up to three levels: - Level 0: per parameter RNN - Level 1: per tensor RNN - Level 2: global RNN - - Args: - level_sizes: list or tuple with 1, 2, or 3 integers, the number of units - in each RNN in the hierarchy (level0, level1, level2). - length 1: only coordinatewise rnn's will be used - length 2: coordinatewise and tensor-level rnn's will be used - length 3: a single global-level rnn will be used in addition to - coordinatewise and tensor-level - init_lr_range: the range in which to initialize the learning rates - learnable_decay: whether to learn weights that dynamically modulate the - input scale via RMS style decay - dynamic_output_scale: whether to learn weights that dynamically modulate - the output scale - use_attention: whether to use attention to train the optimizer - use_log_objective: whether to train on the log of the objective - num_gradient_scales: the number of scales to use for gradient history - zero_init_lr_weights: whether to initialize the lr weights to zero - use_log_means_squared: whether to track the log of the means_squared, - used as a measure of signal vs. noise in gradient. - use_relative_lr: whether to use the relative learning rate as an - input during training (requires learnable_decay=True) - use_extreme_indicator: whether to use the extreme indicator for learning - rates as an input during training (requires learnable_decay=True) - max_log_lr: the maximum log learning rate allowed during train or test - obj_train_max_multiplier: max objective increase during a training run - use_problem_lr_mean: whether to use the mean over all learning rates in - the problem when calculating the relative learning rate as opposed to - the per-tensor mean - use_gradient_shortcut: Whether to add a learned affine projection of the - gradient to the update delta in addition to the gradient function - computed by the RNN - use_lr_shortcut: Whether to add as input the difference between the log lr - and the desired log lr (1e-3) - use_grad_products: Whether to use gradient products in the rnn input. - Only applicable if num_gradient_scales > 1 - use_multiple_scale_decays: Whether to use multiple scales for the scale - decay, as with input decay - learnable_inp_decay: Whether to learn the input decay weights and bias. - learnable_rnn_init: Whether to learn the RNN state initialization. - random_seed: Random seed for random variable initializers. (Default: None) - **kwargs: args passed to TrainableOptimizer's constructor - - Raises: - ValueError: If level_sizes is not a length 1, 2, or 3 list. - ValueError: If there are any non-integer sizes in level_sizes. - ValueError: If the init lr range is not of length 2. - ValueError: If the init lr range is not a valid range (min > max). - """ - if len(level_sizes) not in [1, 2, 3]: - raise ValueError("HierarchicalRNN only supports 1, 2, or 3 levels in the " - "hierarchy, but {} were requested.".format( - len(level_sizes))) - if any(not isinstance(level, int) for level in level_sizes): - raise ValueError("Level sizes must be integer values, were {}".format( - level_sizes)) - if len(init_lr_range) != 2: - raise ValueError( - "Initial LR range must be len 2, was {}".format(len(init_lr_range))) - if init_lr_range[0] > init_lr_range[1]: - raise ValueError("Initial LR range min is greater than max.") - - self.learnable_decay = learnable_decay - self.dynamic_output_scale = dynamic_output_scale - self.use_attention = use_attention - self.use_log_objective = use_log_objective - self.num_gradient_scales = num_gradient_scales - self.zero_init_lr_weights = zero_init_lr_weights - self.use_log_means_squared = use_log_means_squared - self.use_relative_lr = use_relative_lr - self.use_extreme_indicator = use_extreme_indicator - self.max_log_lr = max_log_lr - self.use_problem_lr_mean = use_problem_lr_mean - self.use_gradient_shortcut = use_gradient_shortcut - self.use_lr_shortcut = use_lr_shortcut - self.use_grad_products = use_grad_products - self.use_multiple_scale_decays = use_multiple_scale_decays - self.learnable_inp_decay = learnable_inp_decay - self.learnable_rnn_init = learnable_rnn_init - - self.random_seed = random_seed - - self.num_layers = len(level_sizes) - self.init_lr_range = init_lr_range - - self.reuse_vars = None - self.reuse_global_state = None - self.cells = [] - self.init_vectors = [] - - with tf.variable_scope(opt.OPTIMIZER_SCOPE): - - self._initialize_rnn_cells(level_sizes) - - # get the cell size for the per-parameter RNN (Level 0) - cell_size = level_sizes[0] - - # Random normal initialization scaled by the output size. This is the - # scale for the RNN *readouts*. RNN internal weight scale is set in the - # BiasGRUCell call. - scale_factor = FLAGS.hrnn_rnn_readout_scale / math.sqrt(cell_size) - scaled_init = tf.random_normal_initializer(0., scale_factor, - seed=self.random_seed) - - # weights for projecting the hidden state to a parameter update - self.update_weights = tf.get_variable("update_weights", - shape=(cell_size, 1), - initializer=scaled_init) - - if self.use_attention: - # weights for projecting the hidden state to the location at which the - # gradient is attended - self.attention_weights = tf.get_variable( - "attention_weights", - initializer=self.update_weights.initialized_value()) - - # weights for projecting the hidden state to the RMS decay term - self._initialize_scale_decay((cell_size, 1), scaled_init) - self._initialize_input_decay((cell_size, 1), scaled_init) - - self._initialize_lr((cell_size, 1), scaled_init) - - state_keys = ["parameter", "layer", "scl_decay", "inp_decay", "true_param"] - - if self.dynamic_output_scale: - state_keys.append("log_learning_rate") - - for i in range(self.num_gradient_scales): - state_keys.append("grad_accum{}".format(i + 1)) - state_keys.append("ms{}".format(i + 1)) - - super(HierarchicalRNN, self).__init__( - "hRNN", state_keys, use_attention=use_attention, - use_log_objective=use_log_objective, - obj_train_max_multiplier=obj_train_max_multiplier, **kwargs) - - def _initialize_rnn_cells(self, level_sizes): - """Initializes the RNN cells to use in the hierarchical RNN.""" - - # RNN Cell layers (0 -> lowest, 1 -> middle, 2 -> global) - for level in range(self.num_layers): - scope = "Level{}_RNN".format(level) - with tf.variable_scope(scope): - hcell = rnn_cells.BiasGRUCell( - level_sizes[level], - scale=FLAGS.biasgrucell_scale, - gate_bias_init=FLAGS.biasgrucell_gate_bias_init, - random_seed=self.random_seed) - self.cells.append(hcell) - if self.learnable_rnn_init: - self.init_vectors.append(tf.Variable( - tf.random_uniform([1, hcell.state_size], -1., 1., - seed=self.random_seed), - name="init_vector")) - else: - self.init_vectors.append( - tf.random_uniform([1, hcell.state_size], -1., 1., - seed=self.random_seed)) - - def _initialize_scale_decay(self, weights_tensor_shape, scaled_init): - """Initializes the scale decay weights and bias variables or tensors. - - Args: - weights_tensor_shape: The shape the weight tensor should take. - scaled_init: The scaled initialization for the weights tensor. - """ - if self.learnable_decay: - self.scl_decay_weights = tf.get_variable("scl_decay_weights", - shape=weights_tensor_shape, - initializer=scaled_init) - scl_decay_bias_init = tf.constant_initializer( - FLAGS.scale_decay_bias_init) - self.scl_decay_bias = tf.get_variable("scl_decay_bias", - shape=(1,), - initializer=scl_decay_bias_init) - else: - self.scl_decay_weights = tf.zeros_like(self.update_weights) - self.scl_decay_bias = tf.log(0.93 / (1. - 0.93)) - - def _initialize_input_decay(self, weights_tensor_shape, scaled_init): - """Initializes the input scale decay weights and bias variables or tensors. - - Args: - weights_tensor_shape: The shape the weight tensor should take. - scaled_init: The scaled initialization for the weights tensor. - """ - if (self.learnable_decay and self.num_gradient_scales > 1 and - self.learnable_inp_decay): - self.inp_decay_weights = tf.get_variable("inp_decay_weights", - shape=weights_tensor_shape, - initializer=scaled_init) - inp_decay_bias_init = tf.constant_initializer( - FLAGS.hrnn_default_decay_var_init) - self.inp_decay_bias = tf.get_variable("inp_decay_bias", - shape=(1,), - initializer=inp_decay_bias_init) - else: - self.inp_decay_weights = tf.zeros_like(self.update_weights) - self.inp_decay_bias = tf.log(0.89 / (1. - 0.89)) - - def _initialize_lr(self, weights_tensor_shape, scaled_init): - """Initializes the learning rate weights and bias variables or tensors. - - Args: - weights_tensor_shape: The shape the weight tensor should take. - scaled_init: The scaled initialization for the weights tensor. - """ - if self.dynamic_output_scale: - zero_init = tf.constant_initializer(0.) - wt_init = zero_init if self.zero_init_lr_weights else scaled_init - self.lr_weights = tf.get_variable("learning_rate_weights", - shape=weights_tensor_shape, - initializer=wt_init) - self.lr_bias = tf.get_variable("learning_rate_bias", shape=(1,), - initializer=zero_init) - else: - self.lr_weights = tf.zeros_like(self.update_weights) - self.lr_bias = tf.zeros([1, 1]) - - def _initialize_state(self, var): - """Return a dictionary mapping names of state variables to their values.""" - var_vectorized = tf.reshape(var, [-1, 1]) - ndim = var_vectorized.get_shape().as_list()[0] - - state = { - # parameter init tensor is [var_ndim x layer0_cell_size] - "parameter": tf.ones([ndim, 1]) * self.init_vectors[0], - "scl_decay": tf.zeros_like(var_vectorized), - "inp_decay": tf.zeros_like(var_vectorized), - "true_param": var, - } - - if self.num_layers > 1: - # layer init tensor is [1 x layer1_cell_size] - state["layer"] = tf.ones([1, 1]) * self.init_vectors[1] - - if self.dynamic_output_scale: - min_lr = self.init_lr_range[0] - max_lr = self.init_lr_range[1] - if min_lr == max_lr: - log_init_lr = tf.log(min_lr * tf.ones_like(var_vectorized)) - else: - # Use a random offset to increase the likelihood that the average of the - # LRs for this variable is different from the LRs for other variables. - actual_vals = tf.random_uniform(var_vectorized.get_shape().as_list(), - np.log(min_lr) / 2., - np.log(max_lr) / 2., - seed=self.random_seed) - offset = tf.random_uniform((), np.log(min_lr) / 2., np.log(max_lr) / 2., - seed=self.random_seed) - log_init_lr = actual_vals + offset - # Clip the log learning rate to the flag at the top end, and to - # (log(min int32) - 1) at the bottom - clipped = tf.clip_by_value(log_init_lr, -33, self.max_log_lr) - state["log_learning_rate"] = clipped - - for i in range(self.num_gradient_scales): - state["grad_accum{}".format(i + 1)] = tf.zeros_like(var_vectorized) - state["ms{}".format(i + 1)] = tf.zeros_like(var_vectorized) - - return state - - def _initialize_global_state(self): - if self.num_layers < 3: - return [] - rnn_global_init = tf.ones([1, 1]) * self.init_vectors[2] - return [rnn_global_init] - - def _compute_updates(self, params, grads, states, global_state): - # Store the updated parameters and states. - updated_params = [] - updated_attention = [] - updated_states = [] - - with tf.variable_scope(opt.OPTIMIZER_SCOPE): - - mean_log_lr = self._compute_mean_log_lr(states) - - # Iterate over the layers. - for param, grad_unflat, state in zip(params, grads, states): - - with tf.variable_scope("PerTensor", reuse=self.reuse_vars): - self.reuse_vars = True - grad = tf.reshape(grad_unflat, [-1, 1]) - - # Create the RNN input. We will optionally extend it with additional - # features such as curvature and gradient signal vs. noise. - (grads_scaled, mean_squared_gradients, - grads_accum) = self._compute_scaled_and_ms_grads(grad, state) - rnn_input = [g for g in grads_scaled] - - self._extend_rnn_input(rnn_input, state, grads_scaled, - mean_squared_gradients, mean_log_lr) - - # Concatenate any features we've collected. - rnn_input_tensor = tf.concat(rnn_input, 1) - - layer_state, new_param_state = self._update_rnn_cells( - state, global_state, rnn_input_tensor, - len(rnn_input) != len(grads_scaled)) - - (scl_decay, inp_decay, new_log_lr, update_step, lr_attend, - attention_delta) = self._compute_rnn_state_projections( - state, new_param_state, grads_scaled) - - # Apply updates and store state variables. - if self.use_attention: - truth = state["true_param"] - updated_param = truth - update_step - attention_step = tf.reshape(lr_attend * attention_delta, - truth.get_shape()) - updated_attention.append(truth - attention_step) - else: - updated_param = param - update_step - updated_attention.append(updated_param) - updated_params.append(updated_param) - - # Collect the new state. - new_state = { - "parameter": new_param_state, - "scl_decay": scl_decay, - "inp_decay": inp_decay, - "true_param": updated_param, - } - if layer_state is not None: - new_state["layer"] = layer_state - - if self.dynamic_output_scale: - new_state["log_learning_rate"] = new_log_lr - - for i in range(self.num_gradient_scales): - new_state["grad_accum{}".format(i + 1)] = grads_accum[i] - new_state["ms{}".format(i + 1)] = mean_squared_gradients[i] - updated_states.append(new_state) - - updated_global_state = self._compute_updated_global_state([layer_state], - global_state) - - return (updated_params, updated_states, [updated_global_state], - updated_attention) - - def _compute_mean_log_lr(self, states): - """Computes the mean log learning rate across all variables.""" - if self.use_problem_lr_mean and self.use_relative_lr: - - sum_log_lr = 0. - count_log_lr = 0. - for state in states: - sum_log_lr += tf.reduce_sum(state["log_learning_rate"]) - # Note: get_shape().num_elements()=num elements in the original tensor. - count_log_lr += state["log_learning_rate"].get_shape().num_elements() - return sum_log_lr / count_log_lr - - def _compute_scaled_and_ms_grads(self, grad, state): - """Computes the scaled gradient and the mean squared gradients. - - Gradients are also accumulated across different timescales if appropriate. - - Args: - grad: The gradient tensor for this layer. - state: The optimizer state for this layer. - - Returns: - The scaled gradients, mean squared gradients, and accumulated gradients. - """ - input_decays = [state["inp_decay"]] - scale_decays = [state["scl_decay"]] - if self.use_multiple_scale_decays and self.num_gradient_scales > 1: - for i in range(self.num_gradient_scales - 1): - scale_decays.append(tf.sqrt(scale_decays[i])) - - for i in range(self.num_gradient_scales - 1): - # Each accumulator on twice the timescale of the one before. - input_decays.append(tf.sqrt(input_decays[i])) - grads_accum = [] - grads_scaled = [] - mean_squared_gradients = [] - - # populate the scaled gradients and associated mean_squared values - if self.num_gradient_scales > 0: - for i, decay in enumerate(input_decays): - if self.num_gradient_scales == 1: - # We don't accumulate if no scales, just take the current gradient. - grad_accum = grad - else: - # The state vars are 1-indexed. - old_accum = state["grad_accum{}".format(i + 1)] - grad_accum = grad * (1. - decay) + old_accum * decay - - grads_accum.append(grad_accum) - - sd = scale_decays[i if self.use_multiple_scale_decays else 0] - grad_scaled, ms = utils.rms_scaling(grad_accum, sd, - state["ms{}".format(i + 1)], - update_ms=True) - grads_scaled.append(grad_scaled) - mean_squared_gradients.append(ms) - - return grads_scaled, mean_squared_gradients, grads_accum - - def _extend_rnn_input(self, rnn_input, state, grads_scaled, - mean_squared_gradients, mean_log_lr): - """Computes additional rnn inputs and adds them to the rnn_input list.""" - if self.num_gradient_scales > 1 and self.use_grad_products: - # This gives a measure of curvature relative to input averaging - # lengthscale and to the learning rate - grad_products = [a * b for a, b in - zip(grads_scaled[:-1], grads_scaled[1:])] - rnn_input.extend([g for g in grad_products]) - - if self.use_log_means_squared: - log_means_squared = [tf.log(ms + 1e-16) - for ms in mean_squared_gradients] - - avg = tf.reduce_mean(log_means_squared, axis=0) - # This gives a measure of the signal vs. noise contribution to the - # gradient, at the current averaging lengthscale. If all the noise - # is averaged out, and if updates are small, these will be 0. - mean_log_means_squared = [m - avg for m in log_means_squared] - - rnn_input.extend([m for m in mean_log_means_squared]) - - if self.use_relative_lr or self.use_extreme_indicator: - if not self.dynamic_output_scale: - raise Exception("Relative LR and Extreme Indicator features " - "require dynamic_output_scale to be set to True.") - log_lr_vec = tf.reshape(state["log_learning_rate"], [-1, 1]) - if self.use_relative_lr: - if self.use_problem_lr_mean: - # Learning rate of this dimension vs. rest of target problem. - relative_lr = log_lr_vec - mean_log_lr - else: - # Learning rate of this dimension vs. rest of tensor. - relative_lr = log_lr_vec - tf.reduce_mean(log_lr_vec) - rnn_input.append(relative_lr) - if self.use_extreme_indicator: - # Indicator of extremely large or extremely small learning rate. - extreme_indicator = (tf.nn.relu(log_lr_vec - tf.log(1.)) - - tf.nn.relu(tf.log(1e-6) - log_lr_vec)) - rnn_input.append(extreme_indicator) - - if self.use_lr_shortcut: - log_lr_vec = tf.reshape(state["log_learning_rate"], [-1, 1]) - rnn_input.append(log_lr_vec - tf.log(1e-3)) - - def _update_rnn_cells(self, state, global_state, rnn_input_tensor, - use_additional_features): - """Updates the component RNN cells with the given state and tensor. - - Args: - state: The current state of the optimizer. - global_state: The current global RNN state. - rnn_input_tensor: The input tensor to the RNN. - use_additional_features: Whether the rnn input tensor contains additional - features beyond the scaled gradients (affects whether the rnn input - tensor is used as input to the RNN.) - - Returns: - layer_state: The new state of the per-tensor RNN. - new_param_state: The new state of the per-parameter RNN. - """ - # lowest level (per parameter) - # input -> gradient for this parameter - # bias -> output from the layer RNN - with tf.variable_scope("Layer0_RNN"): - total_bias = None - if self.num_layers > 1: - sz = 3 * self.cells[0].state_size # size of the concatenated bias - param_bias = utils.affine([state["layer"]], sz, - scope="Param/Affine", - scale=FLAGS.hrnn_affine_scale, - random_seed=self.random_seed) - total_bias = param_bias - if self.num_layers == 3: - global_bias = utils.affine(global_state, sz, - scope="Global/Affine", - scale=FLAGS.hrnn_affine_scale, - random_seed=self.random_seed) - total_bias += global_bias - - new_param_state, _ = self.cells[0]( - rnn_input_tensor, state["parameter"], bias=total_bias) - - if self.num_layers > 1: - # middle level (per layer) - # input -> average hidden state from each parameter in this layer - # bias -> output from the RNN at the global level - with tf.variable_scope("Layer1_RNN"): - if not use_additional_features: - # Restore old behavior and only add the mean of the new params. - layer_input = tf.reduce_mean(new_param_state, 0, keep_dims=True) - else: - layer_input = tf.reduce_mean( - tf.concat((new_param_state, rnn_input_tensor), 1), 0, - keep_dims=True) - if self.num_layers == 3: - sz = 3 * self.cells[1].state_size - layer_bias = utils.affine(global_state, sz, - scale=FLAGS.hrnn_affine_scale, - random_seed=self.random_seed) - layer_state, _ = self.cells[1]( - layer_input, state["layer"], bias=layer_bias) - else: - layer_state, _ = self.cells[1](layer_input, state["layer"]) - else: - layer_state = None - - return layer_state, new_param_state - - def _compute_rnn_state_projections(self, state, new_param_state, - grads_scaled): - """Computes the RNN state-based updates to parameters and update steps.""" - # Compute the update direction (a linear projection of the RNN output). - update_weights = self.update_weights - - update_delta = utils.project(new_param_state, update_weights) - if self.use_gradient_shortcut: - # Include an affine projection of just the direction of the gradient - # so that RNN hidden states are freed up to store more complex - # functions of the gradient and other parameters. - grads_scaled_tensor = tf.concat([g for g in grads_scaled], 1) - update_delta += utils.affine(grads_scaled_tensor, 1, - scope="GradsToDelta", - include_bias=False, - vec_mean=1. / len(grads_scaled), - random_seed=self.random_seed) - if self.dynamic_output_scale: - denom = tf.sqrt(tf.reduce_mean(update_delta ** 2) + 1e-16) - - update_delta /= denom - - if self.use_attention: - attention_weights = self.attention_weights - attention_delta = utils.project(new_param_state, - attention_weights) - if self.use_gradient_shortcut: - attention_delta += utils.affine(grads_scaled_tensor, 1, - scope="GradsToAttnDelta", - include_bias=False, - vec_mean=1. / len(grads_scaled), - random_seed=self.random_seed) - if self.dynamic_output_scale: - attention_delta /= tf.sqrt( - tf.reduce_mean(attention_delta ** 2) + 1e-16) - else: - attention_delta = None - - # The updated decay is an affine projection of the hidden state. - scl_decay = utils.project(new_param_state, self.scl_decay_weights, - bias=self.scl_decay_bias, - activation=tf.nn.sigmoid) - # This is only used if learnable_decay and num_gradient_scales > 1 - inp_decay = utils.project(new_param_state, self.inp_decay_weights, - bias=self.inp_decay_bias, - activation=tf.nn.sigmoid) - - # Also update the learning rate. - lr_param, lr_attend, new_log_lr = self._compute_new_learning_rate( - state, new_param_state) - - update_step = tf.reshape(lr_param * update_delta, - state["true_param"].get_shape()) - - return (scl_decay, inp_decay, new_log_lr, update_step, lr_attend, - attention_delta) - - def _compute_new_learning_rate(self, state, new_param_state): - if self.dynamic_output_scale: - # Compute the change in learning rate (an affine projection of the - # RNN state, passed through a sigmoid or log depending on flags). - # Update the learning rate, w/ momentum. - lr_change = utils.project(new_param_state, self.lr_weights, - bias=self.lr_bias) - step_log_lr = state["log_learning_rate"] + lr_change - - # Clip the log learning rate to the flag at the top end, and to - # (log(min int32) - 1) at the bottom - - # Check out this hack: we want to be able to compute the gradient - # of the downstream result w.r.t lr weights and bias, even if the - # value of step_log_lr is outside the clip range. So we clip, - # subtract off step_log_lr, and wrap all that in a stop_gradient so - # TF never tries to take the gradient of the clip... or the - # subtraction. Then we add BACK step_log_lr so that downstream still - # receives the clipped value. But the GRADIENT of step_log_lr will - # be the gradient of the unclipped value, which we added back in - # after stop_gradients. - step_log_lr += tf.stop_gradient( - tf.clip_by_value(step_log_lr, -33, self.max_log_lr) - - step_log_lr) - - lr_momentum_logit = tf.get_variable( - "learning_rate_momentum_logit", - initializer=FLAGS.learning_rate_momentum_logit_init) - lrm = tf.nn.sigmoid(lr_momentum_logit) - new_log_lr = (lrm * state["log_learning_rate"] + - (1. - lrm) * step_log_lr) - param_stepsize_offset = tf.get_variable("param_stepsize_offset", - initializer=-1.) - lr_param = tf.exp(step_log_lr + param_stepsize_offset) - lr_attend = tf.exp(step_log_lr) if self.use_attention else lr_param - else: - # Dynamic output scale is off, LR param is always 1. - lr_param = 2. * utils.project(new_param_state, self.lr_weights, - bias=self.lr_bias, - activation=tf.nn.sigmoid) - new_log_lr = None - lr_attend = lr_param - - return lr_param, lr_attend, new_log_lr - - def _compute_updated_global_state(self, layer_states, global_state): - """Computes the new global state gives the layers states and old state. - - Args: - layer_states: The current layer states. - global_state: The old global state. - - Returns: - The updated global state. - """ - updated_global_state = [] - if self.num_layers == 3: - # highest (global) layer - # input -> average hidden state from each layer-specific RNN - # bias -> None - with tf.variable_scope("Layer2_RNN", reuse=self.reuse_global_state): - self.reuse_global_state = True - global_input = tf.reduce_mean(tf.concat(layer_states, 0), 0, - keep_dims=True) - updated_global_state, _ = self.cells[2](global_input, global_state[0]) - return updated_global_state - - def apply_gradients(self, grads_and_vars, global_step=None, name=None): - """Overwrites the tf.train.Optimizer interface for applying gradients.""" - - # Pull out the variables. - grads_and_vars = tuple(grads_and_vars) # Make sure repeat iteration works. - for g, v in grads_and_vars: - if not isinstance(g, (tf.Tensor, tf.IndexedSlices, type(None))): - raise TypeError( - "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) - if not isinstance(v, tf.Variable): - raise TypeError( - "Variable must be a tf.Variable: %s" % v) - if g is not None: - self._assert_valid_dtypes([g, v]) - var_list = [v for g, v in grads_and_vars if g is not None] - if not var_list: - raise ValueError("No gradients provided for any variable: %s" % - (grads_and_vars,)) - - # Create slots for the variables. - with tf.control_dependencies(None): - self._create_slots(var_list) - - # Store update ops in this list. - with tf.op_scope([], name, self._name) as name: - - # Prepare the global state. - with tf.variable_scope(self._name, reuse=self.reuse_global_state): - gs = self._initialize_global_state() - if gs: - global_state = [tf.get_variable("global_state", initializer=gs[0])] - else: - global_state = [] - - # Get the states for each variable in the list. - states = [{key: self.get_slot(var, key) for key in self.get_slot_names()} - for var in var_list] - - # Compute updated values. - grads, params = zip(*grads_and_vars) - args = (params, grads, states, global_state) - updates = self._compute_updates(*args) - new_params, new_states, new_global_state, new_attention = updates - # Assign op for new global state. - update_ops = [tf.assign(gs, ngs) - for gs, ngs in zip(global_state, new_global_state)] - - # Create the assign ops for the params and state variables. - args = (params, states, new_params, new_attention, new_states) - for var, state, new_var, new_var_attend, new_state in zip(*args): - # Assign updates to the state variables. - state_assign_ops = [tf.assign(state_var, new_state[key]) - for key, state_var in state.items()] - - # Update the parameter. - with tf.control_dependencies(state_assign_ops): - if self.use_attention: - # Assign to the attended location, rather than the actual location - # so that the gradients are computed where attention is. - param_update_op = var.assign(new_var_attend) - else: - param_update_op = var.assign(new_var) - - with tf.name_scope("update_" + var.op.name): #, tf.colocate_with(var): - update_ops.append(param_update_op) - - real_params = [self.get_slot(var, "true_param") for var in var_list] - - if global_step is None: - # NOTE: if using the optimizer in a non-test-optimizer setting (e.g. - # on Inception), remove the real_params return value. Otherwise - # the code will throw an error. - return self._finish(update_ops, name), real_params - else: - with tf.control_dependencies([self._finish(update_ops, "update")]): - return state_ops.assign_add(global_step, 1, name=name).op, real_params diff --git a/research/learned_optimizer/optimizer/learning_rate_schedule.py b/research/learned_optimizer/optimizer/learning_rate_schedule.py deleted file mode 100644 index 53db8addd3d152bfa02630ec6e37f0cc1776abc8..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/learning_rate_schedule.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A trainable optimizer that learns a learning rate schedule.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from learned_optimizer.optimizer import trainable_optimizer - - -class LearningRateSchedule(trainable_optimizer.TrainableOptimizer): - """Learns a learning rate schedule over a fixed number of iterations.""" - - def __init__(self, initial_rate=0.0, n_steps=1000, **kwargs): - """Initializes the learning rates.""" - self.max_index = tf.constant(n_steps-1, dtype=tf.int32) - - with tf.variable_scope(trainable_optimizer.OPTIMIZER_SCOPE): - initializer = tf.constant_initializer(initial_rate) - self.learning_rates = tf.get_variable("learning_rates", - shape=([n_steps,]), - initializer=initializer) - - super(LearningRateSchedule, self).__init__("LRS", ["itr"], **kwargs) - - def _initialize_state(self, var): - """Return a dictionary mapping names of state variables to their values.""" - return { - "itr": tf.constant(0, dtype=tf.int32), - } - - def _compute_update(self, param, grad, state): - """Compute updates of parameters.""" - - # get the learning rate at the current index, if the index - # is greater than the number of available learning rates, - # use the last one - index = tf.minimum(state["itr"], self.max_index) - learning_rate = tf.gather(self.learning_rates, index) - - # update the parameters: parameter - learning_rate * gradient - updated_param = param - tf.scalar_mul(learning_rate, grad) - - return updated_param, {"itr": state["itr"] + 1} diff --git a/research/learned_optimizer/optimizer/rnn_cells.py b/research/learned_optimizer/optimizer/rnn_cells.py deleted file mode 100644 index 3d68de04ca5318bb0f264d4f4647ddbc6fbe08e0..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/rnn_cells.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Custom RNN cells for hierarchical RNNs.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from learned_optimizer.optimizer import utils - - -class BiasGRUCell(tf.contrib.rnn.RNNCell): - """GRU cell (cf. http://arxiv.org/abs/1406.1078) with an additional bias.""" - - def __init__(self, num_units, activation=tf.tanh, scale=0.1, - gate_bias_init=0., random_seed=None): - self._num_units = num_units - self._activation = activation - self._scale = scale - self._gate_bias_init = gate_bias_init - self._random_seed = random_seed - - @property - def state_size(self): - return self._num_units - - @property - def output_size(self): - return self._num_units - - def __call__(self, inputs, state, bias=None): - # Split the injected bias vector into a bias for the r, u, and c updates. - if bias is None: - bias = tf.zeros((1, 3)) - - r_bias, u_bias, c_bias = tf.split(bias, 3, 1) - - with tf.variable_scope(type(self).__name__): # "BiasGRUCell" - with tf.variable_scope("gates"): # Reset gate and update gate. - proj = utils.affine([inputs, state], 2 * self._num_units, - scale=self._scale, bias_init=self._gate_bias_init, - random_seed=self._random_seed) - r_lin, u_lin = tf.split(proj, 2, 1) - r, u = tf.nn.sigmoid(r_lin + r_bias), tf.nn.sigmoid(u_lin + u_bias) - - with tf.variable_scope("candidate"): - proj = utils.affine([inputs, r * state], self._num_units, - scale=self._scale, random_seed=self._random_seed) - c = self._activation(proj + c_bias) - - new_h = u * state + (1 - u) * c - - return new_h, new_h diff --git a/research/learned_optimizer/optimizer/trainable_adam.py b/research/learned_optimizer/optimizer/trainable_adam.py deleted file mode 100644 index 638217f1b723da8633dc7a82623392eaaf190829..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/trainable_adam.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A trainable ADAM optimizer that learns its internal variables.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from learned_optimizer.optimizer import trainable_optimizer as opt -from learned_optimizer.optimizer import utils - - -class TrainableAdam(opt.TrainableOptimizer): - """Adam optimizer with learnable scalar parameters. - - See Kingma et. al., 2014 for algorithm (http://arxiv.org/abs/1412.6980). - """ - - def __init__(self, - learning_rate=1e-3, - beta1=0.9, - beta2=0.999, - epsilon=1e-8, - **kwargs): - """Initializes the TrainableAdam optimizer with the given initial values. - - Args: - learning_rate: The learning rate (default: 1e-3). - beta1: The exponential decay rate for the 1st moment estimates. - beta2: The exponential decay rate for the 2nd moment estimates. - epsilon: A small constant for numerical stability. - **kwargs: Any additional keyword arguments for TrainableOptimizer. - - Raises: - ValueError: if the learning rate or epsilon is not positive - ValueError: if beta1 or beta2 is not in (0, 1). - """ - if learning_rate <= 0: - raise ValueError("Learning rate must be positive.") - if epsilon <= 0: - raise ValueError("Epsilon must be positive.") - if not 0 < beta1 < 1 or not 0 < beta2 < 1: - raise ValueError("Beta values must be between 0 and 1, exclusive.") - - self._reuse_vars = False - - with tf.variable_scope(opt.OPTIMIZER_SCOPE): - def inv_sigmoid(x): - return np.log(x / (1.0 - x)) - - self.log_learning_rate = tf.get_variable( - "log_learning_rate", - shape=[], - initializer=tf.constant_initializer(np.log(learning_rate))) - self.beta1_logit = tf.get_variable( - "beta1_logit", - shape=[], - initializer=tf.constant_initializer(inv_sigmoid(beta1))) - self.beta2_logit = tf.get_variable( - "beta2_logit", - shape=[], - initializer=tf.constant_initializer(inv_sigmoid(beta2))) - self.log_epsilon = tf.get_variable( - "log_epsilon", - shape=[], - initializer=tf.constant_initializer(np.log(epsilon))) - - # Key names are derived from Algorithm 1 described in - # https://arxiv.org/pdf/1412.6980.pdf - state_keys = ["m", "v", "t"] - super(TrainableAdam, self).__init__("Adam", state_keys, **kwargs) - - def _initialize_state(self, var): - """Returns a dictionary mapping names of state variables to their values.""" - vectorized_shape = var.get_shape().num_elements(), 1 - - return {key: tf.zeros(vectorized_shape) for key in self.state_keys} - - def _compute_update(self, param, grad, state): - """Calculates the new internal state and parameters. - - If the gradient is sparse, updates the appropriate slices in the internal - state and stacks the update tensor. - - Args: - param: A tensor of parameters. - grad: A tensor of gradients with the same shape as param. - state: A dictionary containing any state for the optimizer. - - Returns: - updated_param: The updated parameters. - updated_state: The updated state variables in a dictionary. - """ - - with tf.variable_scope(opt.OPTIMIZER_SCOPE) as scope: - - if self._reuse_vars: - scope.reuse_variables() - else: - self._reuse_vars = True - - (grad_values, first_moment, second_moment, timestep, grad_indices - ) = self._extract_gradients_and_internal_state( - grad, state, tf.shape(param)) - - beta1 = tf.nn.sigmoid(self.beta1_logit) - beta2 = tf.nn.sigmoid(self.beta2_logit) - epsilon = tf.exp(self.log_epsilon) + 1e-10 - learning_rate = tf.exp(self.log_learning_rate) - - old_grad_shape = tf.shape(grad_values) - grad_values = tf.reshape(grad_values, [-1, 1]) - - new_timestep = timestep + 1 - new_first_moment = self._update_adam_estimate( - first_moment, grad_values, beta1) - new_second_moment = self._debias_adam_estimate( - second_moment, tf.square(grad_values), beta2) - - debiased_first_moment = self._debias_adam_estimate( - new_first_moment, beta1, new_timestep) - debiased_second_moment = self._debias_adam_estimate( - new_second_moment, beta2, new_timestep) - - # Propagating through the square root of 0 is very bad for stability. - update = (learning_rate * debiased_first_moment / - (tf.sqrt(debiased_second_moment + 1e-10) + epsilon)) - - update = tf.reshape(update, old_grad_shape) - - if grad_indices is not None: - param_shape = tf.shape(param) - update = utils.stack_tensor( - update, grad_indices, param, param_shape[:1]) - new_first_moment = utils.update_slices( - new_first_moment, grad_indices, state["m"], param_shape) - new_second_moment = utils.update_slices( - new_second_moment, grad_indices, state["v"], param_shape) - new_timestep = utils.update_slices( - new_timestep, grad_indices, state["t"], param_shape) - - new_param = param - update - - # collect the update and new state - new_state = { - "m": new_first_moment, - "v": new_second_moment, - "t": new_timestep - } - - return new_param, new_state - - def _update_adam_estimate(self, estimate, value, beta): - """Returns a beta-weighted average of estimate and value.""" - return (beta * estimate) + ((1 - beta) * value) - - def _debias_adam_estimate(self, estimate, beta, t_step): - """Returns a debiased estimate based on beta and the timestep.""" - return estimate / (1 - tf.pow(beta, t_step)) - - def _extract_gradients_and_internal_state(self, grad, state, param_shape): - """Extracts the gradients and relevant internal state. - - If the gradient is sparse, extracts the appropriate slices from the state. - - Args: - grad: The current gradient. - state: The current state. - param_shape: The shape of the parameter (used if gradient is sparse). - - Returns: - grad_values: The gradient value tensor. - first_moment: The first moment tensor (internal state). - second_moment: The second moment tensor (internal state). - timestep: The current timestep (internal state). - grad_indices: The indices for the gradient tensor, if sparse. - None otherwise. - """ - grad_values = grad - grad_indices = None - first_moment = state["m"] - second_moment = state["v"] - timestep = state["t"] - - if isinstance(grad, tf.IndexedSlices): - grad_indices, grad_values = utils.accumulate_sparse_gradients(grad) - first_moment = utils.slice_tensor( - first_moment, grad_indices, param_shape) - second_moment = utils.slice_tensor( - second_moment, grad_indices, param_shape) - timestep = utils.slice_tensor(timestep, grad_indices, param_shape) - - return grad_values, first_moment, second_moment, timestep, grad_indices - diff --git a/research/learned_optimizer/optimizer/trainable_optimizer.py b/research/learned_optimizer/optimizer/trainable_optimizer.py deleted file mode 100644 index 955112a9dd1d3b0af5ae2f5f0fe8eff65d2dbfc7..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/trainable_optimizer.py +++ /dev/null @@ -1,574 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A base class definition for trainable optimizers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import itertools - -import tensorflow as tf - -from tensorflow.python.framework import tensor_shape - -OPTIMIZER_SCOPE = "LOL" -_LOCAL_VARIABLE_PREFIX = "local_state_" -_LOCAL_STATE_VARIABLE_COLLECTION = "local_state_collection" -EPSILON = 1e-6 - - -class TrainableOptimizer(tf.train.Optimizer): - """Base class for trainable optimizers. - - A trainable optimizer is an optimizer that has parameters that can themselves - be learned (meta-optimized). - - Subclasses must implement: - _compute_update(self, param, grad, state) - """ - - def __init__(self, name, state_keys, use_attention=False, - use_log_objective=False, obj_train_max_multiplier=-1, - use_second_derivatives=True, use_numerator_epsilon=False, - **kwargs): - """Initializes the optimizer with the given name and settings. - - Args: - name: The name string for this optimizer. - state_keys: The names of any required state variables (list) - use_attention: Whether this optimizer uses attention (Default: True) - use_log_objective: Whether this optimizer uses the logarithm of the - objective when computing the loss (Default: False) - obj_train_max_multiplier: The maximum multiplier for the increase in the - objective before meta-training is stopped. If <= 0, meta-training is - not stopped early. (Default: -1) - use_second_derivatives: Whether this optimizer uses second derivatives in - meta-training. This should be set to False if some second derivatives - in the meta-training problem set are not defined in Tensorflow. - (Default: True) - use_numerator_epsilon: Whether to use epsilon in the numerator when - scaling the problem objective during meta-training. (Default: False) - **kwargs: Any additional keyword arguments. - """ - self.use_second_derivatives = use_second_derivatives - self.state_keys = sorted(state_keys) - self.use_attention = use_attention - self.use_log_objective = use_log_objective - self.obj_train_max_multiplier = obj_train_max_multiplier - self.use_numerator_epsilon = use_numerator_epsilon - - use_locking = False - super(TrainableOptimizer, self).__init__(use_locking, name) - - def _create_slots(self, var_list): - """Creates all slots needed by the variables. - - Args: - var_list: A list of `Variable` objects. - """ - for var in var_list: - init_states = self._initialize_state(var) - for slot_name in sorted(init_states): - slot_var_name = "{}_{}".format(self.get_name(), slot_name) - value = init_states[slot_name] - self._get_or_make_slot(var, value, slot_name, slot_var_name) - - def _initialize_state(self, var): - """Initializes any state required for this variable. - - Args: - var: a tensor containing parameters to be optimized - - Returns: - state: a dictionary mapping state keys to initial state values (tensors) - """ - return {} - - def _initialize_global_state(self): - """Initializes any global state values.""" - return [] - - def _apply_common(self, grad, var): - """Applies the optimizer updates to the variables. - - Note: this should only get called via _apply_dense or _apply_sparse when - using the optimizer via optimizer.minimize or optimizer.apply_gradients. - During meta-training, the optimizer.train function should be used to - construct an optimization path that is differentiable. - - Args: - grad: A tensor representing the gradient. - var: A tf.Variable with the same shape as grad. - - Returns: - update_op: A tensorflow op that assigns new values to the variable, and - also defines dependencies that update the state variables for the - optimizer. - """ - state = {key: self.get_slot(var, key) for key in self.get_slot_names()} - new_var, new_state = self._compute_update(var, grad, state) - state_assign_ops = [tf.assign(state_var, new_state[key]) - for key, state_var in state.items()] - with tf.control_dependencies(state_assign_ops): - update_op = var.assign(new_var) - - return update_op - - def _apply_dense(self, grad, var): - """Adds ops to apply dense gradients to 'var'.""" - return self._apply_common(grad, var) - - def _apply_sparse(self, grad, var): - """Adds ops to apply sparse gradients to 'var'.""" - return self._apply_common(grad, var) - - def _compute_update(self, param, grad, state): - """Computes the update step for optimization. - - Args: - param: A tensor of parameters to optimize. - grad: The gradient tensor of the objective with respect to the parameters. - (It has the same shape as param.) - state: A dictionary containing any extra state required by the optimizer. - - Returns: - updated_params: The updated parameters. - updated_state: The dictionary of updated state variable(s). - """ - raise NotImplementedError - - def _compute_updates(self, params, grads, states, global_state): - """Maps the compute update functions for each parameter. - - This function can be overriden by a subclass if the subclass wants to - combine information across the different parameters in the list. - - Args: - params: A list of parameter tensors. - grads: A list of gradients corresponding to each parameter. - states: A list of state variables corresponding to each parameter. - global_state: A list of global state variables for the problem. - - Returns: - new_params: The updated parameters. - new_states: The updated states. - new_global_state: The updated global state. - attention_params: A list of attention parameters. This is the same as - new_params if the optimizer does not use attention. - """ - # Zip up the arguments to _compute_update. - args = zip(params, grads, states) - - # Call compute_update on each set of parameter/gradient/state args. - new_params, new_states = zip(*list( - itertools.starmap(self._compute_update, args))) - - # Global state is unused in the basic case, just pass it through. - return list(new_params), list(new_states), global_state, list(new_params) - - def train(self, problem, dataset): - """Creates graph operations to train the optimizer. - - Args: - problem: A problem_generator.Problem instance to train on. - dataset: A datasets.Dataset tuple to use when training. - - Returns: - meta_objective: A tensorflow operation for computing the meta-objective - obj_weights: A tensor placeholder for feeding in the objective weights - obj_values: The subproblem objective values during optimization - batches: The batch indexes tensor for overriding with feed_dict - first_unroll: A placeholder signifying if this is a first unroll - (this will propagate the gradients slightly differently). - reset_state: A placeholder signifying that the rnn state should be reset. - output_state: The final state of the optimizer - init_loop_vars_to_override: Local variables that can be assigned to - propagate the optimizer and problem state for unrolling - final_loop_vals: Final values of the loop variables that can be - assigned to init_loop_vars_to_override. - """ - - # Placeholder for the objective weights - obj_weights = tf.placeholder(tf.float32) - num_iter = tf.shape(obj_weights)[0] - - # Unpack the dataset and generate the minibatches for training - data, labels = dataset - # Convert the ndarrays to tensors so we can pass them back in via feed_dict - data = tf.constant(data) - labels = tf.constant(labels) - batches = tf.placeholder(tf.int32) - first_unroll = tf.placeholder_with_default(False, []) - reset_state = tf.placeholder_with_default(False, []) - - training_output = collections.namedtuple("TrainingOutput", - ["metaobj", - "obj_weights", - "problem_objectives", - "initial_obj", - "batches", - "first_unroll", - "reset_state", - "output_state", - "init_loop_vars", - "output_loop_vars"]) - - def loop_body(itr, obj_accum, params, attend_params, flattened_states, - global_state, all_obj, unused_init_obj, data, - labels, batches): - """Body of the meta-training while loop for optimizing a sub-problem. - - Args: - itr: The current meta-training iteration. - obj_accum: The accumulated objective over all training steps so far. - params: The parameters of the sub-problem. - attend_params: The parameters of the sub-problems at the attended - location. - flattened_states: The states of the trainable optimizer, sorted and - flattened into a list (since a while loop can't handle nested lists - or dictionaries). - global_state: The global state of the optimizer. - all_obj: The list of all objective values in the training process. - unused_init_obj: The initial objective (unused here, but needed in the - variable list because it's used in a stopping condition in the - loop_cond.) - data: The data for this problem. - labels: The labels corresponding to the data. - batches: The batch indexes needed for shuffled minibatch creation. - - Returns: - itr: The updated meta-training iteration. - obj_accum: The updated accumulated objective. - params: The new parameters of the sub-problem. - attend_params: The new parameters of the sub-problems at the attended - location. - flattened_states: The new states of the trainable optimizer. - global_state: The updated global state. - all_obj: The updates list of all objective values. - unused_init_obj: The initial objective. - data: The data for this problem. - labels: The labels corresponding to the data. - batches: The batch indexes needed for shuffled minibatch creation. - """ - batch_indices = tf.gather(batches, itr) - batch_data = tf.gather(data, batch_indices) - batch_labels = tf.gather(labels, batch_indices) - - # Compute the objective over the entire dataset (full batch). - obj = problem.objective(params, data, labels) - - # Compute the gradients on just the current batch - if self.use_attention: - current_obj = problem.objective(attend_params, batch_data, batch_labels) - grads = problem.gradients(current_obj, attend_params) - else: - current_obj = problem.objective(params, batch_data, batch_labels) - grads = problem.gradients(current_obj, params) - - if not self.use_second_derivatives: - new_grads = [] - for grad in grads: - if isinstance(grad, tf.IndexedSlices): - new_grads.append( - tf.IndexedSlices(tf.stop_gradient(grad.values), grad.indices)) - else: - new_grads.append(tf.stop_gradient(grad)) - grads = new_grads - - # store the objective value for the entire problem at each iteration - all_obj = tf.concat([all_obj, tf.reshape(obj, (1,))], 0) - - # accumulate the weighted objective for the entire dataset - acc = tf.gather(obj_weights, itr) * obj - - obj_accum = tf.add(obj_accum, acc) - # Set the shape to keep the shape invariant for obj_accum. Without this, - # the graph builder thinks the tensor shape is unknown on the 2nd iter. - obj_accum.set_shape([]) - - # convert flattened_states to dictionaries - dict_states = [dict(zip(self.state_keys, flat_state)) - for flat_state in flattened_states] - - # compute the new parameters and states - args = (params, grads, dict_states, global_state) - updates = self._compute_updates(*args) - new_params, new_states, new_global_state, new_attend_params = updates - - # flatten the states - new_flattened_states = map(flatten_and_sort, new_states) - - return [itr + 1, obj_accum, new_params, new_attend_params, - new_flattened_states, new_global_state, all_obj, unused_init_obj, - data, labels, batches] - - def loop_cond(itr, obj_accum, unused_params, unused_attend_params, - unused_flattened_states, unused_global_state, all_obj, - init_obj, *args): - """Termination conditions of the sub-problem optimization loop.""" - del args # unused - - cond1 = tf.less(itr, num_iter) # We've run < num_iter times - cond2 = tf.is_finite(obj_accum) # The objective is still finite - - if self.obj_train_max_multiplier > 0: - current_obj = tf.gather(all_obj, itr) - # Account for negative init_obj too - max_diff = (self.obj_train_max_multiplier - 1) * tf.abs(init_obj) - max_obj = init_obj + max_diff - # The objective is a reasonable multiplier of the original objective - cond3 = tf.less(current_obj, max_obj) - - return tf.logical_and(tf.logical_and(cond1, cond2), cond3, - name="training_loop_cond") - else: - return tf.logical_and(cond1, cond2, name="training_loop_cond") - - init = self._initialize_training_loop_parameters( - problem, data, labels, batches, first_unroll, reset_state) - loop_vars, invariants, initial_obj, init_loop_vars_to_override = init - - loop_output = tf.while_loop(loop_cond, loop_body, loop_vars, - swap_memory=True, shape_invariants=invariants) - meta_obj, problem_objectives = loop_output[1], loop_output[6] - - # The meta objective is normalized by the initial objective at the start of - # the series of partial unrolls. - scaled_meta_objective = self.scale_objective( - meta_obj, problem_objectives, initial_obj) - - final_loop_vals = ( - [initial_obj] + loop_output[2] + loop_output[3] + loop_output[5]) - final_loop_vals.extend(itertools.chain(*loop_output[4])) - - return training_output(scaled_meta_objective, - obj_weights, - problem_objectives, - initial_obj, - batches, - first_unroll, - reset_state, - loop_output[4], - init_loop_vars_to_override, - final_loop_vals) - - def _initialize_training_loop_parameters( - self, problem, data, labels, batches, first_unroll, reset_state): - """Initializes the vars and params needed for the training process. - - Args: - problem: The problem being optimized. - data: The data for the problem. - labels: The corresponding labels for the data. - batches: The indexes needed to create shuffled batches of the data. - first_unroll: Whether this is the first unroll in a partial unrolling. - reset_state: Whether RNN state variables should be reset. - - Returns: - loop_vars: The while loop variables for training. - invariants: The corresponding variable shapes (required by while loop). - initial_obj: The initial objective (used later for scaling). - init_loop_vars_to_override: The loop vars that can be overridden when - performing training via partial unrolls. - """ - # Extract these separately so we don't have to make inter-variable - # dependencies. - initial_tensors = problem.init_tensors() - - return_initial_tensor_values = first_unroll - initial_params_vars, initial_params = local_state_variables( - initial_tensors, return_initial_tensor_values) - initial_attend_params_vars, initial_attend_params = local_state_variables( - initial_tensors, return_initial_tensor_values) - # Recalculate the initial objective for the list on each partial unroll with - # the new initial_params. initial_obj holds the value from the very first - # unroll. - initial_obj_init = problem.objective(initial_params, data, labels) - return_initial_obj_init = first_unroll - [initial_obj_var], [initial_obj] = local_state_variables( - [initial_obj_init], return_initial_obj_init) - - # Initialize the loop variables. - initial_itr = tf.constant(0, dtype=tf.int32) - initial_meta_obj = tf.constant(0, dtype=tf.float32) - # N.B. the use of initial_obj_init here rather than initial_obj - initial_problem_objectives = tf.reshape(initial_obj_init, (1,)) - - # Initialize the extra state. - initial_state_vars = [] - initial_state = [] - state_shapes = [] - return_initial_state_values = reset_state - for param in initial_tensors: - param_state_vars, param_state = local_state_variables( - flatten_and_sort(self._initialize_state(param)), - return_initial_state_values) - - initial_state_vars.append(param_state_vars) - initial_state.append(param_state) - state_shapes.append([f.get_shape() for f in param_state]) - - # Initialize any global (problem-level) state. - initial_global_state_vars, initial_global_state = local_state_variables( - self._initialize_global_state(), return_initial_state_values) - - global_shapes = [] - for item in initial_global_state: - global_shapes.append(item.get_shape()) - - # build the list of loop variables: - loop_vars = [ - initial_itr, - initial_meta_obj, - initial_params, # Local variables. - initial_attend_params, # Local variables. - initial_state, # Local variables. - initial_global_state, # Local variables. - initial_problem_objectives, - initial_obj, # Local variable. - data, - labels, - batches, - ] - - invariants = [ - initial_itr.get_shape(), - initial_meta_obj.get_shape(), - [t.get_shape() for t in initial_params], - [t.get_shape() for t in initial_attend_params], - state_shapes, - global_shapes, - tensor_shape.TensorShape([None]), # The problem objectives list grows - initial_obj.get_shape(), - tensor_shape.unknown_shape(), # Placeholder shapes are unknown - tensor_shape.unknown_shape(), - tensor_shape.unknown_shape(), - ] - - # Initialize local variables that we will override with final tensors at the - # next iter. - init_loop_vars_to_override = ( - [initial_obj_var] + initial_params_vars + initial_attend_params_vars + - initial_global_state_vars) - init_loop_vars_to_override.extend(itertools.chain(*initial_state_vars)) - - return loop_vars, invariants, initial_obj, init_loop_vars_to_override - - def scale_objective(self, total_obj, all_objs, initial_obj, - obj_scale_eps=1e-6): - """Normalizes the objective based on the initial objective value. - - Args: - total_obj: The total accumulated objective over the training run. - all_objs: A list of all the individual objectives over the training run. - initial_obj: The initial objective value. - obj_scale_eps: The epsilon value to use in computations for stability. - - Returns: - The scaled objective as a single value. - """ - if self.use_log_objective: - if self.use_numerator_epsilon: - scaled_problem_obj = ((all_objs + obj_scale_eps) / - (initial_obj + obj_scale_eps)) - log_scaled_problem_obj = tf.log(scaled_problem_obj) - else: - scaled_problem_obj = all_objs / (initial_obj + obj_scale_eps) - log_scaled_problem_obj = tf.log(scaled_problem_obj + obj_scale_eps) - return tf.reduce_mean(log_scaled_problem_obj) - else: - return total_obj / (initial_obj + obj_scale_eps) - - -def local_state_variables(init_values, return_init_values): - """Create local variables initialized from init_values. - - This will create local variables from a list of init_values. Each variable - will be named based on the value's shape and dtype. - - As a convenience, a boolean tensor allows you to return value from - the created local variable or from the original init value. - - Args: - init_values: iterable of tensors - return_init_values: boolean tensor - - Returns: - local_vars: list of the created local variables. - vals: if return_init_values is true, then this returns the values of - init_values. Otherwise it returns the values of the local_vars. - """ - if not init_values: - return [], [] - - # This generates a harmless warning when saving the metagraph. - variable_use_count = tf.get_collection_ref(_LOCAL_STATE_VARIABLE_COLLECTION) - if not variable_use_count: - variable_use_count.append(collections.defaultdict(int)) - variable_use_count = variable_use_count[0] - - local_vars = [] - with tf.variable_scope(OPTIMIZER_SCOPE): - # We can't use the init_value as an initializer as init_value may - # itself depend on some problem variables. This would produce - # inter-variable initialization order dependence which TensorFlow - # sucks at making easy. - for init_value in init_values: - name = create_local_state_variable_name(init_value) - unique_name = name + "_" + str(variable_use_count[name]) - variable_use_count[name] += 1 - # The overarching idea here is to be able to reuse variables between - # different sessions on the same TensorFlow master without errors. By - # uniquifying based on the type and name we mirror the checks made inside - # TensorFlow, while still allowing some memory reuse. Ultimately this is a - # hack due to the broken Session.reset(). - local_vars.append( - tf.get_local_variable( - unique_name, - initializer=tf.zeros( - init_value.get_shape(), dtype=init_value.dtype))) - - # It makes things a lot simpler if we use the init_value the first - # iteration, instead of the variable itself. It allows us to propagate - # gradients through it as well as simplifying initialization. The variable - # ends up assigned to after the first iteration. - vals = tf.cond(return_init_values, lambda: init_values, lambda: local_vars) - if len(init_values) == 1: - # tf.cond extracts elements from singleton lists. - vals = [vals] - return local_vars, vals - - -def create_local_state_variable_name(tensor): - """Create a name of the variable based on its type and shape.""" - if not tensor.get_shape().is_fully_defined(): - raise ValueError("Need a fully specified shape to create a local variable.") - - return (_LOCAL_VARIABLE_PREFIX + "_".join( - map(str, tensor.get_shape().as_list())) + "_" + tensor.dtype.name) - - -def is_local_state_variable(op): - """Returns if this op is a local state variable created for training.""" - return op.node_def.op in ["Variable", "VariableV2"] and op.name.startswith( - OPTIMIZER_SCOPE + "/" + _LOCAL_VARIABLE_PREFIX) - - -def flatten_and_sort(dictionary): - """Flattens a dictionary into a list of values sorted by the keys.""" - return [dictionary[k] for k in sorted(dictionary.keys())] diff --git a/research/learned_optimizer/optimizer/utils.py b/research/learned_optimizer/optimizer/utils.py deleted file mode 100644 index 58744f4cb7919a84ecc8702ff1236e4c0a03f218..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/optimizer/utils.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities and helper functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - - -def make_finite(t, replacement): - """Replaces non-finite tensor values with the replacement value.""" - return tf.where(tf.is_finite(t), t, replacement) - - -def asinh(x): - """Computes the inverse hyperbolic sine function (in tensorflow).""" - return tf.log(x + tf.sqrt(1. + x ** 2)) - - -def affine(inputs, output_size, scope="Affine", scale=0.1, vec_mean=0., - include_bias=True, bias_init=0., random_seed=None): - """Computes an affine function of the inputs. - - Creates or recalls tensorflow variables "Matrix" and "Bias" - to generate an affine operation on the input. - - If the inputs are a list of tensors, they are concatenated together. - - Initial weights for the matrix are drawn from a Gaussian with zero - mean and standard deviation that is the given scale divided by the - square root of the input dimension. Initial weights for the bias are - set to zero. - - Args: - inputs: List of tensors with shape (batch_size, input_size) - output_size: Size (dimension) of the output - scope: Variable scope for these parameters (default: "Affine") - scale: Initial weight scale for the matrix parameters (default: 0.1), - this constant is divided by the sqrt of the input size to get the - std. deviation of the initial weights - vec_mean: The mean for the random initializer - include_bias: Whether to include the bias term - bias_init: The initializer bias (default 0.) - random_seed: Random seed for random initializers. (Default: None) - - Returns: - output: Tensor with shape (batch_size, output_size) - """ - - # Concatenate the input arguments. - x = tf.concat(inputs, 1) - - with tf.variable_scope(scope): - input_size = x.get_shape().as_list()[1] - - sigma = scale / np.sqrt(input_size) - rand_init = tf.random_normal_initializer(mean=vec_mean, stddev=sigma, - seed=random_seed) - - matrix = tf.get_variable("Matrix", [input_size, output_size], - dtype=tf.float32, initializer=rand_init) - - if include_bias: - bias = tf.get_variable("Bias", [output_size], dtype=tf.float32, - initializer=tf.constant_initializer(bias_init, - tf.float32)) - else: - bias = 0. - output = tf.matmul(x, matrix) + bias - - return output - - -def project(inputs, weights, bias=0., activation=tf.identity): - """Computes an affine or linear projection of the inputs. - - Projects the inputs onto the given weight vector and (optionally) - adds a bias and passes the result through an activation function. - - Args: - inputs: matrix of inputs with shape [batch_size, dim] - weights: weight matrix with shape [dim, output_dim] - bias: bias vector with shape [output_dim] (default: 0) - activation: nonlinear activation function (default: tf.identity) - - Returns: - outputs: an op which computes activation(inputs @ weights + bias) - """ - return activation(tf.matmul(inputs, weights) + bias) - - -def new_mean_squared(grad_vec, decay, ms): - """Calculates the new accumulated mean squared of the gradient. - - Args: - grad_vec: the vector for the current gradient - decay: the decay term - ms: the previous mean_squared value - - Returns: - the new mean_squared value - """ - decay_size = decay.get_shape().num_elements() - decay_check_ops = [ - tf.assert_less_equal(decay, 1., summarize=decay_size), - tf.assert_greater_equal(decay, 0., summarize=decay_size)] - - with tf.control_dependencies(decay_check_ops): - grad_squared = tf.square(grad_vec) - - # If the previous mean_squared is the 0 vector, don't use the decay and just - # return the full grad_squared. This should only happen on the first timestep. - decay = tf.cond(tf.reduce_all(tf.equal(ms, 0.)), - lambda: tf.zeros_like(decay, dtype=tf.float32), lambda: decay) - - # Update the running average of squared gradients. - epsilon = 1e-12 - return (1. - decay) * (grad_squared + epsilon) + decay * ms - - -def rms_scaling(gradient, decay, ms, update_ms=True): - """Vectorizes and scales a tensor of gradients. - - Args: - gradient: the current gradient - decay: the current decay value. - ms: the previous mean squared value - update_ms: Whether to update the mean squared value (default: True) - - Returns: - The scaled gradient and the new ms value if update_ms is True, - the old ms value otherwise. - """ - - # Vectorize the gradients and compute the squared gradients. - grad_vec = tf.reshape(gradient, [-1, 1]) - - if update_ms: - ms = new_mean_squared(grad_vec, decay, ms) - - # Scale the current gradients by the RMS, squashed by the asinh function. - scaled_gradient = asinh(grad_vec / tf.sqrt(ms + 1e-16)) - - return scaled_gradient, ms - - -def accumulate_sparse_gradients(grad): - """Accumulates repeated indices of a sparse gradient update. - - Args: - grad: a tf.IndexedSlices gradient - - Returns: - grad_indices: unique indices - grad_values: gradient values corresponding to the indices - """ - - grad_indices, grad_segments = tf.unique(grad.indices) - grad_values = tf.unsorted_segment_sum(grad.values, grad_segments, - tf.shape(grad_indices)[0]) - return grad_indices, grad_values - - -def slice_tensor(dense_tensor, indices, head_dims): - """Extracts slices from a partially flattened dense tensor. - - indices is assumed to index into the first dimension of head_dims. - dense_tensor is assumed to have a shape [D_0, D_1, ...] such that - prod(head_dims) == D_0. This function will extract slices along the - first_dimension of head_dims. - - Example: - - Consider a tensor with shape head_dims = [100, 2] and a dense_tensor with - shape [200, 3]. Note that the first dimension of dense_tensor equals the - product of head_dims. This function will reshape dense_tensor such that - its shape is now [100, 2, 3] (i.e. the first dimension became head-dims) - and then slice it along the first dimension. After slicing, the slices will - have their initial dimensions flattened just as they were in dense_tensor - (e.g. if there are 4 indices, the return value will have a shape of [4, 3]). - - Args: - dense_tensor: a N-D dense tensor. Shape: [D_0, D_1, ...] - indices: a 1-D integer tensor. Shape: [K] - head_dims: True dimensions of the dense_tensor's first dimension. - - Returns: - Extracted slices. Shape [K, D_1, ...] - """ - - tail_dims = tf.shape(dense_tensor)[1:] - dense_tensor = tf.reshape(dense_tensor, - tf.concat([head_dims, tail_dims], 0)) - - slices = tf.gather(dense_tensor, indices) - # NOTE(siege): This kills the shape annotation. - return tf.reshape(slices, tf.concat([[-1], tail_dims], 0)) - - -def stack_tensor(slices, indices, dense_tensor, head_dims): - """Reconsititutes a tensor from slices and corresponding indices. - - This is an inverse operation to slice_tensor. Missing slices are set to 0. - - Args: - slices: a tensor. Shape [K, D_1, ...] - indices: a 1-D integer tensor. Shape: [K] - dense_tensor: the original tensor the slices were taken - from. Shape: [D_0, D_1, ...] - head_dims: True dimensions of the dense_tensor's first dimension. - - Returns: - Reconsituted tensor. Shape: [D_0, D_1, ...] - """ - # NOTE(siege): This cast shouldn't be necessary. - indices = tf.cast(indices, tf.int32) - - tail_dims = tf.shape(dense_tensor)[1:] - dense_shape = tf.concat([head_dims, tail_dims], 0) - - slices = tf.reshape(slices, tf.concat([[-1], dense_shape[1:]], 0)) - indices = tf.expand_dims(indices, -1) - - return tf.reshape(tf.scatter_nd(indices, slices, dense_shape), - tf.shape(dense_tensor)) - - -def update_slices(slices, indices, dense_tensor, head_dims): - """Reconstitutes a tensor from slices and corresponding indices. - - Like _stack_tensor, but instead of setting missing slices to 0, sets them to - what they were in the original tensor. The return value is reshaped to be - the same as dense_tensor. - - Args: - slices: a tensor. Shape [K, D_1, ...] - indices: a 1-D integer tensor. Shape: [K] - dense_tensor: the original tensor the slices were taken - from. Shape: [D_0, D_1, ...] - head_dims: True dimensions of the dense_tensor's first dimension. - - Returns: - Reconsituted tensor. Shape: [D_0, D_1, ...] - """ - # NOTE(siege): This cast shouldn't be necessary. - indices = tf.cast(indices, tf.int32) - - tail_dims = tf.shape(dense_tensor)[1:] - dense_shape = tf.concat([head_dims, tail_dims], 0) - - update_mask_vals = tf.fill(tf.shape(indices), 1) - reshaped_indices = tf.expand_dims(indices, -1) - update_mask = tf.equal( - tf.scatter_nd(reshaped_indices, update_mask_vals, head_dims[:1]), 1) - - reshaped_dense_slices = tf.reshape( - stack_tensor(slices, indices, dense_tensor, head_dims), dense_shape) - reshaped_dense_tensor = tf.reshape(dense_tensor, dense_shape) - - return tf.reshape( - tf.where(update_mask, reshaped_dense_slices, reshaped_dense_tensor), - tf.shape(dense_tensor)) diff --git a/research/learned_optimizer/problems/BUILD b/research/learned_optimizer/problems/BUILD deleted file mode 100644 index c704618821b36ca23f221f724888cde4e5d5a5ad..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/problems/BUILD +++ /dev/null @@ -1,43 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -# Libraries -# ===== - -py_library( - name = "datasets", - srcs = ["datasets.py"], - deps = [ - ], -) - -py_library( - name = "model_adapter", - srcs = ["model_adapter.py"], - deps = [ - ":problem_generator", - ], -) - -py_library( - name = "problem_generator", - srcs = ["problem_generator.py"], - deps = [ - ":problem_spec", - ], -) - -py_library( - name = "problem_sets", - srcs = ["problem_sets.py"], - deps = [ - ":datasets", - ":model_adapter", - ":problem_generator", - ], -) - -py_library( - name = "problem_spec", - srcs = ["problem_spec.py"], - deps = [], -) diff --git a/research/learned_optimizer/problems/datasets.py b/research/learned_optimizer/problems/datasets.py deleted file mode 100644 index edf3df6532178b0e60ab93c78611d2313798e639..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/problems/datasets.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Functions to generate or load datasets for supervised learning.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import namedtuple - -import numpy as np -from sklearn.datasets import make_classification - -MAX_SEED = 4294967295 - - -class Dataset(namedtuple("Dataset", "data labels")): - """Helper class for managing a supervised learning dataset. - - Args: - data: an array of type float32 with N samples, each of which is the set - of features for that sample. (Shape (N, D_i), where N is the number of - samples and D_i is the number of features for that sample.) - labels: an array of type int32 or int64 with N elements, indicating the - class label for the corresponding set of features in data. - """ - # Since this is an immutable object, we don't need to reserve slots. - __slots__ = () - - @property - def size(self): - """Dataset size (number of samples).""" - return len(self.data) - - def batch_indices(self, num_batches, batch_size): - """Creates indices of shuffled minibatches. - - Args: - num_batches: the number of batches to generate - batch_size: the size of each batch - - Returns: - batch_indices: a list of minibatch indices, arranged so that the dataset - is randomly shuffled. - - Raises: - ValueError: if the data and labels have different lengths - """ - if len(self.data) != len(self.labels): - raise ValueError("Labels and data must have the same number of samples.") - - batch_indices = [] - - # Follows logic in mnist.py to ensure we cover the entire dataset. - index_in_epoch = 0 - dataset_size = len(self.data) - dataset_indices = np.arange(dataset_size) - np.random.shuffle(dataset_indices) - - for _ in range(num_batches): - start = index_in_epoch - index_in_epoch += batch_size - if index_in_epoch > dataset_size: - - # Finished epoch, reshuffle. - np.random.shuffle(dataset_indices) - - # Start next epoch. - start = 0 - index_in_epoch = batch_size - - end = index_in_epoch - batch_indices.append(dataset_indices[start:end].tolist()) - - return batch_indices - - -def noisy_parity_class(n_samples, - n_classes=2, - n_context_ids=5, - noise_prob=0.25, - random_seed=None): - """Returns a randomly generated sparse-to-sparse dataset. - - The label is a parity class of a set of context classes. - - Args: - n_samples: number of samples (data points) - n_classes: number of class labels (default: 2) - n_context_ids: how many classes to take the parity of (default: 5). - noise_prob: how often to corrupt the label (default: 0.25) - random_seed: seed used for drawing the random data (default: None) - Returns: - dataset: A Dataset namedtuple containing the generated data and labels - """ - np.random.seed(random_seed) - x = np.random.randint(0, n_classes, [n_samples, n_context_ids]) - noise = np.random.binomial(1, noise_prob, [n_samples]) - y = (np.sum(x, 1) + noise) % n_classes - return Dataset(x.astype("float32"), y.astype("int32")) - - -def random(n_features, n_samples, n_classes=2, sep=1.0, random_seed=None): - """Returns a randomly generated classification dataset. - - Args: - n_features: number of features (dependent variables) - n_samples: number of samples (data points) - n_classes: number of class labels (default: 2) - sep: separation of the two classes, a higher value corresponds to - an easier classification problem (default: 1.0) - random_seed: seed used for drawing the random data (default: None) - - Returns: - dataset: A Dataset namedtuple containing the generated data and labels - """ - # Generate the problem data. - x, y = make_classification(n_samples=n_samples, - n_features=n_features, - n_informative=n_features, - n_redundant=0, - n_classes=n_classes, - class_sep=sep, - random_state=random_seed) - - return Dataset(x.astype("float32"), y.astype("int32")) - - -def random_binary(n_features, n_samples, random_seed=None): - """Returns a randomly generated dataset of binary values. - - Args: - n_features: number of features (dependent variables) - n_samples: number of samples (data points) - random_seed: seed used for drawing the random data (default: None) - - Returns: - dataset: A Dataset namedtuple containing the generated data and labels - """ - random_seed = (np.random.randint(MAX_SEED) if random_seed is None - else random_seed) - np.random.seed(random_seed) - - x = np.random.randint(2, size=(n_samples, n_features)) - y = np.zeros((n_samples, 1)) - - return Dataset(x.astype("float32"), y.astype("int32")) - - -def random_symmetric(n_features, n_samples, random_seed=None): - """Returns a randomly generated dataset of values and their negatives. - - Args: - n_features: number of features (dependent variables) - n_samples: number of samples (data points) - random_seed: seed used for drawing the random data (default: None) - - Returns: - dataset: A Dataset namedtuple containing the generated data and labels - """ - random_seed = (np.random.randint(MAX_SEED) if random_seed is None - else random_seed) - np.random.seed(random_seed) - - x1 = np.random.normal(size=(int(n_samples/2), n_features)) - x = np.concatenate((x1, -x1), axis=0) - y = np.zeros((n_samples, 1)) - - return Dataset(x.astype("float32"), y.astype("int32")) - - -def random_mlp(n_features, n_samples, random_seed=None, n_layers=6, width=20): - """Returns a generated output of an MLP with random weights. - - Args: - n_features: number of features (dependent variables) - n_samples: number of samples (data points) - random_seed: seed used for drawing the random data (default: None) - n_layers: number of layers in random MLP - width: width of the layers in random MLP - - Returns: - dataset: A Dataset namedtuple containing the generated data and labels - """ - random_seed = (np.random.randint(MAX_SEED) if random_seed is None - else random_seed) - np.random.seed(random_seed) - - x = np.random.normal(size=(n_samples, n_features)) - y = x - n_in = n_features - scale_factor = np.sqrt(2.) / np.sqrt(n_features) - for _ in range(n_layers): - weights = np.random.normal(size=(n_in, width)) * scale_factor - y = np.dot(y, weights).clip(min=0) - n_in = width - - y = y[:, 0] - y[y > 0] = 1 - - return Dataset(x.astype("float32"), y.astype("int32")) - - -EMPTY_DATASET = Dataset(np.array([], dtype="float32"), - np.array([], dtype="int32")) diff --git a/research/learned_optimizer/problems/model_adapter.py b/research/learned_optimizer/problems/model_adapter.py deleted file mode 100644 index 8455992366dd46172e2a78471004779b1a4f091b..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/problems/model_adapter.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Implementation of the ModelAdapter class.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import mock -import tensorflow as tf - -from learned_optimizer.problems import problem_generator as pg - - -class ModelAdapter(pg.Problem): - """Adapts Tensorflow models/graphs into a form suitable for meta-training. - - This class adapts an existing TensorFlow graph into a form suitable for - meta-training a learned optimizer. - """ - - def __init__(self, make_loss_and_init_fn): - """Wraps a model in the Problem interface. - - make_loss_and_init argument is a callable that returns a tuple of - two other callables as follows. - - The first will construct most of the graph and return the problem loss. It - is essential that this graph contains the totality of the model's variables, - but none of its queues. - - The second will return construct the model initialization graph given a list - of parameters and return a callable that is passed an instance of - tf.Session, and should initialize the models' parameters. - - An argument value function would look like this: - - ```python - def make_loss_and_init_fn(): - inputs = queued_reader() - - def make_loss(): - return create_model_with_variables(inputs) - - def make_init_fn(parameters): - saver = tf.Saver(parameters) - def init_fn(sess): - sess.restore(sess, ...) - return init_fn - - return make_loss, make_init_fn - ``` - - Args: - make_loss_and_init_fn: a callable, as described aboce - """ - make_loss_fn, make_init_fn = make_loss_and_init_fn() - - self.make_loss_fn = make_loss_fn - self.parameters, self.constants = _get_variables(make_loss_fn) - - if make_init_fn is not None: - init_fn = make_init_fn(self.parameters + self.constants) - else: - init_op = tf.initialize_variables(self.parameters + self.constants) - init_fn = lambda sess: sess.run(init_op) - - tf.logging.info("ModelAdapter parameters: %s", - [op.name for op in self.parameters]) - tf.logging.info("ModelAdapter constants: %s", - [op.name for op in self.constants]) - - super(ModelAdapter, self).__init__( - [], random_seed=None, noise_stdev=0.0, init_fn=init_fn) - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return self.parameters - - def init_variables(self, seed=None): - """Returns a list of variables with the given shape.""" - # NOTE(siege): This is awkward, as these are not set as trainable. - return self.parameters - - def objective(self, parameters, data=None, labels=None): - """Computes the objective given a list of parameters. - - Args: - parameters: The parameters to optimize (as a list of tensors) - data: An optional batch of data for calculating objectives - labels: An optional batch of corresponding labels - - Returns: - A scalar tensor representing the objective value - """ - # We need to set up a mapping based on the original parameter names, because - # the parameters passed can be arbitrary tensors. - parameter_mapping = { - old_p.name: p - for old_p, p in zip(self.parameters, parameters) - } - - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - return _make_with_custom_variables(self.make_loss_fn, parameter_mapping) - - -def _get_variables(func): - """Calls func, returning any variables created. - - The created variables are modified to not be trainable, and are placed into - the LOCAL_VARIABLES collection. - - Args: - func: Function to be called. - - Returns: - A tuple (variables, constants) where the first element is a list of - trainable variables and the second is the non-trainable variables. - """ - variables = [] - constants = [] - - # We need to create these variables like normal, so grab the original - # constructor before we mock it. - original_init = tf.Variable.__init__ - - def custom_init(self, *args, **kwargs): - trainable = kwargs["trainable"] - kwargs["trainable"] = False - # Making these variables local keeps them out of the optimizer's checkpoints - # somehow. - kwargs["collections"] = [tf.GraphKeys.LOCAL_VARIABLES] - original_init(self, *args, **kwargs) - if trainable: - variables.append(self) - else: - constants.append(self) - - # This name-scope is just a nicety for TensorBoard. - with tf.name_scope("unused_graph"): - with mock.patch.object(tf.Variable, "__init__", custom_init): - func() - - return variables, constants - - -def _make_with_custom_variables(func, variable_mapping): - """Calls func and replaces the value of some variables created in it. - - Args: - func: Function to be called. - variable_mapping: A mapping of variable name to the replacement tensor or - tf.Variable. - - Returns: - The return value of func is returned. - """ - original_value = tf.Variable.value - - def custom_value(self): - if self.name in variable_mapping: - replacement = variable_mapping[self.name] - tf.logging.info("Replaced %s with %s" % (self.name, replacement)) - - # value() method needs to return a tensor, we need to call value on it. - # This has to be done manually like this otherwise we'll get an infinite - # loop. - if isinstance(replacement, tf.Variable): - replacement = original_value(replacement) - - return replacement - else: - return original_value(self) - - with mock.patch.object(tf.Variable, "value", custom_value): - with mock.patch.object(tf.Variable, "_AsTensor", custom_value): - return func() diff --git a/research/learned_optimizer/problems/problem_generator.py b/research/learned_optimizer/problems/problem_generator.py deleted file mode 100644 index abe1008faadbb04163bc27e0b991e3ec4ba9e6bc..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/problems/problem_generator.py +++ /dev/null @@ -1,1016 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generates toy optimization problems. - -This module contains a base class, Problem, that defines a minimal interface -for optimization problems, and a few specific problem types that subclass it. - -Test functions for optimization: http://www.sfu.ca/~ssurjano/optimization.html -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from learned_optimizer.problems import problem_spec as prob_spec - -tf.app.flags.DEFINE_float("l2_reg_scale", 1e-3, - """Scaling factor for parameter value regularization - in softmax classifier problems.""") -FLAGS = tf.app.flags.FLAGS - -EPSILON = 1e-6 -MAX_SEED = 4294967295 -PARAMETER_SCOPE = "parameters" - -_Spec = prob_spec.Spec - - -class Problem(object): - """Base class for optimization problems. - - This defines an interface for optimization problems, including objective and - gradients functions and a feed_generator function that yields data to pass to - feed_dict in tensorflow. - - Subclasses of Problem must (at the minimum) override the objective method, - which computes the objective/loss/cost to minimize, and specify the desired - shape of the parameters in a list in the param_shapes attribute. - """ - - def __init__(self, param_shapes, random_seed, noise_stdev, init_fn=None): - """Initializes a global random seed for the problem. - - Args: - param_shapes: A list of tuples defining the expected shapes of the - parameters for this problem - random_seed: Either an integer (or None, in which case the seed is - randomly drawn) - noise_stdev: Strength (standard deviation) of added gradient noise - init_fn: A function taking a tf.Session object that is used to - initialize the problem's variables. - - Raises: - ValueError: If the random_seed is not an integer and not None - """ - if random_seed is not None and not isinstance(random_seed, int): - raise ValueError("random_seed must be an integer or None") - - # Pick a random seed. - self.random_seed = (np.random.randint(MAX_SEED) if random_seed is None - else random_seed) - - # Store the noise level. - self.noise_stdev = noise_stdev - - # Set the random seed to ensure any random data in the problem is the same. - np.random.seed(self.random_seed) - - # Store the parameter shapes. - self.param_shapes = param_shapes - - if init_fn is not None: - self.init_fn = init_fn - else: - self.init_fn = lambda _: None - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_normal(shape, seed=seed) for shape in self.param_shapes] - - def init_variables(self, seed=None): - """Returns a list of variables with the given shape.""" - with tf.variable_scope(PARAMETER_SCOPE): - params = [tf.Variable(param) for param in self.init_tensors(seed)] - return params - - def objective(self, parameters, data=None, labels=None): - """Computes the objective given a list of parameters. - - Args: - parameters: The parameters to optimize (as a list of tensors) - data: An optional batch of data for calculating objectives - labels: An optional batch of corresponding labels - - Returns: - A scalar tensor representing the objective value - """ - raise NotImplementedError - - def gradients(self, objective, parameters): - """Compute gradients of the objective with respect to the parameters. - - Args: - objective: The objective op (e.g. output of self.objective()) - parameters: A list of tensors (the parameters to optimize) - - Returns: - A list of tensors representing the gradient for each parameter, - returned in the same order as the given list - """ - grads = tf.gradients(objective, list(parameters)) - noisy_grads = [] - - for grad in grads: - if isinstance(grad, tf.IndexedSlices): - noise = self.noise_stdev * tf.random_normal(tf.shape(grad.values)) - new_grad = tf.IndexedSlices(grad.values + noise, grad.indices) - else: - new_grad = grad + self.noise_stdev * tf.random_normal(grad.get_shape()) - noisy_grads.append(new_grad) - - return noisy_grads - - -class Quadratic(Problem): - """Optimizes a random quadratic function. - - The objective is: f(x) = (1/2) ||Wx - y||_2^2 - where W is a random Gaussian matrix and y is a random Gaussian vector. - """ - - def __init__(self, ndim, random_seed=None, noise_stdev=0.0): - """Initializes a random quadratic problem.""" - param_shapes = [(ndim, 1)] - super(Quadratic, self).__init__(param_shapes, random_seed, noise_stdev) - - # Generate a random problem instance. - self.w = np.random.randn(ndim, ndim).astype("float32") - self.y = np.random.randn(ndim, 1).astype("float32") - - def objective(self, params, data=None, labels=None): - """Quadratic objective (see base class for details).""" - return tf.nn.l2_loss(tf.matmul(self.w, params[0]) - self.y) - - -class SoftmaxClassifier(Problem): - """Helper functions for supervised softmax classification problems.""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_normal(shape, seed=seed) * 1.2 / np.sqrt(shape[0]) - for shape in self.param_shapes] - - def inference(self, params, data): - """Computes logits given parameters and data. - - Args: - params: List of parameter tensors or variables - data: Batch of features with samples along the first dimension - - Returns: - logits: Un-normalized logits with shape (num_samples, num_classes) - """ - raise NotImplementedError - - def objective(self, params, data, labels): - """Computes the softmax cross entropy. - - Args: - params: List of parameter tensors or variables - data: Batch of features with samples along the first dimension - labels: Vector of labels with the same number of samples as the data - - Returns: - loss: Softmax cross entropy loss averaged over the samples in the batch - - Raises: - ValueError: If the objective is to be computed over >2 classes, because - this operation is broken in tensorflow at the moment. - """ - # Forward pass. - logits = self.inference(params, data) - - # Compute the loss. - l2reg = [tf.reduce_sum(param ** 2) for param in params] - if int(logits.get_shape()[1]) == 2: - labels = tf.cast(labels, tf.float32) - losses = tf.nn.sigmoid_cross_entropy_with_logits( - labels=labels, logits=logits[:, 0]) - else: - raise ValueError("Unable to compute softmax cross entropy for more than" - " 2 classes.") - - return tf.reduce_mean(losses) + tf.reduce_mean(l2reg) * FLAGS.l2_reg_scale - - def argmax(self, logits): - """Samples the most likely class label given the logits. - - Args: - logits: Un-normalized logits with shape (num_samples, num_classes) - - Returns: - predictions: Predicted class labels, has shape (num_samples,) - """ - return tf.cast(tf.argmax(tf.nn.softmax(logits), 1), tf.int32) - - def accuracy(self, params, data, labels): - """Computes the accuracy (fraction of correct classifications). - - Args: - params: List of parameter tensors or variables - data: Batch of features with samples along the first dimension - labels: Vector of labels with the same number of samples as the data - - Returns: - accuracy: Fraction of correct classifications across the batch - """ - predictions = self.argmax(self.inference(params, data)) - return tf.contrib.metrics.accuracy(predictions, tf.cast(labels, tf.int32)) - - -class SoftmaxRegression(SoftmaxClassifier): - """Builds a softmax regression problem.""" - - def __init__(self, n_features, n_classes, activation=tf.identity, - random_seed=None, noise_stdev=0.0): - self.activation = activation - self.n_features = n_features - param_shapes = [(n_features, n_classes), (n_classes,)] - super(SoftmaxRegression, self).__init__(param_shapes, - random_seed, - noise_stdev) - - def inference(self, params, data): - features = tf.reshape(data, (-1, self.n_features)) - return tf.matmul(features, params[0]) + params[1] - - -class SparseSoftmaxRegression(SoftmaxClassifier): - """Builds a sparse input softmax regression problem.""" - - def __init__(self, - n_features, - n_classes, - activation=tf.identity, - random_seed=None, - noise_stdev=0.0): - self.activation = activation - self.n_features = n_features - param_shapes = [(n_classes, n_features), (n_features, n_classes), ( - n_classes,)] - super(SparseSoftmaxRegression, self).__init__(param_shapes, random_seed, - noise_stdev) - - def inference(self, params, data): - all_embeddings, softmax_weights, softmax_bias = params - embeddings = tf.nn.embedding_lookup(all_embeddings, tf.cast(data, tf.int32)) - embeddings = tf.reduce_sum(embeddings, 1) - return tf.matmul(embeddings, softmax_weights) + softmax_bias - - -class OneHotSparseSoftmaxRegression(SoftmaxClassifier): - """Builds a sparse input softmax regression problem. - - This is identical to SparseSoftmaxRegression, but without using embedding - ops. - """ - - def __init__(self, - n_features, - n_classes, - activation=tf.identity, - random_seed=None, - noise_stdev=0.0): - self.activation = activation - self.n_features = n_features - self.n_classes = n_classes - param_shapes = [(n_classes, n_features), (n_features, n_classes), ( - n_classes,)] - super(OneHotSparseSoftmaxRegression, self).__init__(param_shapes, - random_seed, - noise_stdev) - - def inference(self, params, data): - all_embeddings, softmax_weights, softmax_bias = params - num_ids = tf.shape(data)[1] - one_hot_embeddings = tf.one_hot(tf.cast(data, tf.int32), self.n_classes) - one_hot_embeddings = tf.reshape(one_hot_embeddings, [-1, self.n_classes]) - embeddings = tf.matmul(one_hot_embeddings, all_embeddings) - embeddings = tf.reshape(embeddings, [-1, num_ids, self.n_features]) - embeddings = tf.reduce_sum(embeddings, 1) - return tf.matmul(embeddings, softmax_weights) + softmax_bias - - -class FullyConnected(SoftmaxClassifier): - """Builds a multi-layer perceptron classifier.""" - - def __init__(self, n_features, n_classes, hidden_sizes=(32, 64), - activation=tf.nn.sigmoid, random_seed=None, noise_stdev=0.0): - """Initializes an multi-layer perceptron classification problem.""" - # Store the number of features and activation function. - self.n_features = n_features - self.activation = activation - - # Define the network as a list of weight + bias shapes for each layer. - param_shapes = [] - for ix, sz in enumerate(hidden_sizes + (n_classes,)): - - # The previous layer"s size (n_features if input). - prev_size = n_features if ix == 0 else hidden_sizes[ix - 1] - - # Weight shape for this layer. - param_shapes.append((prev_size, sz)) - - # Bias shape for this layer. - param_shapes.append((sz,)) - - super(FullyConnected, self).__init__(param_shapes, random_seed, noise_stdev) - - def inference(self, params, data): - # Flatten the features into a vector. - features = tf.reshape(data, (-1, self.n_features)) - - # Pass the data through the network. - preactivations = tf.matmul(features, params[0]) + params[1] - - for layer in range(2, len(self.param_shapes), 2): - net = self.activation(preactivations) - preactivations = tf.matmul(net, params[layer]) + params[layer + 1] - - return preactivations - - def accuracy(self, params, data, labels): - """Computes the accuracy (fraction of correct classifications). - - Args: - params: List of parameter tensors or variables - data: Batch of features with samples along the first dimension - labels: Vector of labels with the same number of samples as the data - - Returns: - accuracy: Fraction of correct classifications across the batch - """ - predictions = self.argmax(self.activation(self.inference(params, data))) - return tf.contrib.metrics.accuracy(predictions, tf.cast(labels, tf.int32)) - - -class ConvNet(SoftmaxClassifier): - """Builds an N-layer convnet for image classification.""" - - def __init__(self, - image_shape, - n_classes, - filter_list, - activation=tf.nn.relu, - random_seed=None, - noise_stdev=0.0): - # Number of channels, number of pixels in x- and y- dimensions. - n_channels, px, py = image_shape - - # Store the activation. - self.activation = activation - - param_shapes = [] - input_size = n_channels - for fltr in filter_list: - # Add conv2d filters. - param_shapes.append((fltr[0], fltr[1], input_size, fltr[2])) - input_size = fltr[2] - - # Number of units in the final (dense) layer. - self.affine_size = input_size * px * py - - param_shapes.append((self.affine_size, n_classes)) # affine weights - param_shapes.append((n_classes,)) # affine bias - - super(ConvNet, self).__init__(param_shapes, random_seed, noise_stdev) - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_normal(shape, mean=0., stddev=0.01, seed=seed) - for shape in self.param_shapes] - - def inference(self, params, data): - - # Unpack. - w_conv_list = params[:-2] - output_w, output_b = params[-2:] - - conv_input = data - for w_conv in w_conv_list: - layer = tf.nn.conv2d(conv_input, w_conv, strides=[1] * 4, padding="SAME") - output = self.activation(layer) - conv_input = output - - # Flatten. - flattened = tf.reshape(conv_input, (-1, self.affine_size)) - - # Fully connected layer. - return tf.matmul(flattened, output_w) + output_b - - -class Bowl(Problem): - """A 2D quadratic bowl.""" - - def __init__(self, condition_number, angle=0.0, - random_seed=None, noise_stdev=0.0): - assert condition_number > 0, "Condition number must be positive." - - # Define parameter shapes. - param_shapes = [(2, 1)] - super(Bowl, self).__init__(param_shapes, random_seed, noise_stdev) - - self.condition_number = condition_number - self.angle = angle - self._build_matrix(condition_number, angle) - - def _build_matrix(self, condition_number, angle): - """Builds the Hessian matrix.""" - hessian = np.array([[condition_number, 0.], [0., 1.]], dtype="float32") - - # Build the rotation matrix. - rotation_matrix = np.array([ - [np.cos(angle), -np.sin(angle)], - [np.sin(angle), np.cos(angle)] - ]) - - # The objective is 0.5 * || Ax ||_2^2 - # where the data matrix (A) is: sqrt(Hessian).dot(rotation_matrix). - self.matrix = np.sqrt(hessian).dot(rotation_matrix) - - def objective(self, params, data=None, labels=None): - mtx = tf.constant(self.matrix, dtype=tf.float32) - return tf.nn.l2_loss(tf.matmul(mtx, params[0])) - - def surface(self, xlim=5, ylim=5, n=50): - xm, ym = _mesh(xlim, ylim, n) - pts = np.vstack([xm.ravel(), ym.ravel()]) - zm = 0.5 * np.linalg.norm(self.matrix.dot(pts), axis=0) ** 2 - return xm, ym, zm.reshape(n, n) - - -class Problem2D(Problem): - - def __init__(self, random_seed=None, noise_stdev=0.0): - param_shapes = [(2,)] - super(Problem2D, self).__init__(param_shapes, random_seed, noise_stdev) - - def surface(self, n=50, xlim=5, ylim=5): - """Computes the objective surface over a 2d mesh.""" - - # Create a mesh over the given coordinate ranges. - xm, ym = _mesh(xlim, ylim, n) - - with tf.Graph().as_default(), tf.Session() as sess: - - # Ops to compute the objective at every (x, y) point. - x = tf.placeholder(tf.float32, shape=xm.shape) - y = tf.placeholder(tf.float32, shape=ym.shape) - obj = self.objective([[x, y]]) - - # Run the computation. - zm = sess.run(obj, feed_dict={x: xm, y: ym}) - - return xm, ym, zm - - -class Rosenbrock(Problem2D): - """See https://en.wikipedia.org/wiki/Rosenbrock_function. - - This function has a single global minima at [1, 1] - The objective value at this point is zero. - """ - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_uniform(shape, minval=-5., maxval=10., seed=seed) - for shape in self.param_shapes] - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - obj = (1 - x)**2 + 100 * (y - x**2)**2 - return tf.squeeze(obj) - - -def make_rosenbrock_loss_and_init(device=None): - """A variable-backed version of Rosenbrock problem. - - See the Rosenbrock class for details. - - Args: - device: Where to place the ops of this problem. - - Returns: - A tuple of two callables, first of which creates the loss and the second - creates the parameter initializer function. - """ - def make_rosenbrock_loss(): - with tf.name_scope("optimizee"): - with tf.device(device): - x = tf.get_variable("x", [1]) - y = tf.get_variable("y", [1]) - c = tf.get_variable( - "c", [1], - initializer=tf.constant_initializer(100.0), - trainable=False) - obj = (1 - x)**2 + c * (y - x**2)**2 - return tf.squeeze(obj) - - def make_init_fn(parameters): - with tf.device(device): - init_op = tf.variables_initializer(parameters) - def init_fn(sess): - tf.logging.info("Initializing model parameters.") - sess.run(init_op) - return init_fn - - return make_rosenbrock_loss, make_init_fn - - -class Saddle(Problem2D): - """Loss surface around a saddle point.""" - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - obj = x ** 2 - y ** 2 - return tf.squeeze(obj) - - -class LogSumExp(Problem2D): - """2D function defined by the log of the sum of exponentials.""" - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - obj = tf.log(tf.exp(x + 3. * y - 0.1) + - tf.exp(x - 3. * y - 0.1) + - tf.exp(-x - 0.1) + 1.0) - return tf.squeeze(obj) - - -class Ackley(Problem2D): - """Ackley's function (contains many local minima).""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_uniform(shape, minval=-32.768, maxval=32.768, seed=seed) - for shape in self.param_shapes] - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - obj = (-20 * tf.exp(-0.2 * tf.sqrt(0.5 * (x ** 2 + y ** 2))) - - tf.exp(0.5 * (tf.cos(2 * np.pi * x) + tf.cos(2 * np.pi * y))) + - tf.exp(1.0) + 20.) - return tf.squeeze(obj) - - -class Beale(Problem2D): - """Beale function (a multimodal function with sharp peaks).""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_uniform(shape, minval=-4.5, maxval=4.5, seed=seed) - for shape in self.param_shapes] - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - obj = ((1.5 - x + x * y) ** 2 + - (2.25 - x + x * y ** 2) ** 2 + - (2.625 - x + x * y ** 3) ** 2) - return tf.squeeze(obj) - - -class Booth(Problem2D): - """Booth's function (has a long valley along one dimension).""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_uniform(shape, minval=-10., maxval=10., seed=seed) - for shape in self.param_shapes] - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - obj = (x + 2 * y - 7) ** 2 + (2 * x + y - 5) ** 2 - return tf.squeeze(obj) - - -class StyblinskiTang(Problem2D): - """Styblinski-Tang function (a bumpy function in two dimensions).""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_uniform(shape, minval=-5., maxval=5., seed=seed) - for shape in self.param_shapes] - - def objective(self, params, data=None, labels=None): - params = tf.split(params[0], 2, axis=0) - obj = 0.5 * tf.reduce_sum([x ** 4 - 16 * x ** 2 + 5 * x - for x in params], 0) + 80. - return tf.squeeze(obj) - - -class Matyas(Problem2D): - """Matyas function (a function with a single global minimum in a valley).""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_uniform(shape, minval=-10, maxval=10, seed=seed) - for shape in self.param_shapes] - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - obj = 0.26 * (x ** 2 + y ** 2) - 0.48 * x * y - return tf.squeeze(obj) - - -class Branin(Problem2D): - """Branin function (a function with three global minima).""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - x1 = tf.random_uniform((1,), minval=-5., maxval=10., - seed=seed) - x2 = tf.random_uniform((1,), minval=0., maxval=15., - seed=seed) - return [tf.concat([x1, x2], 0)] - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - - # Define some constants. - a = 1. - b = 5.1 / (4. * np.pi ** 2) - c = 5 / np.pi - r = 6. - s = 10. - t = 1 / (8. * np.pi) - - # Evaluate the function. - obj = a * (y - b * x ** 2 + c * x - r) ** 2 + s * (1 - t) * tf.cos(x) + s - return tf.squeeze(obj) - - -class Michalewicz(Problem2D): - """Michalewicz function (has steep ridges and valleys).""" - - def init_tensors(self, seed=None): - """Returns a list of tensors with the given shape.""" - return [tf.random_uniform(shape, minval=0., maxval=np.pi, seed=seed) - for shape in self.param_shapes] - - def objective(self, params, data=None, labels=None): - x, y = tf.split(params[0], 2, axis=0) - m = 5 # Defines how steep the ridges are (larger m => steeper ridges). - obj = 2. - (tf.sin(x) * tf.sin(x ** 2 / np.pi) ** (2 * m) + - tf.sin(y) * tf.sin(2 * y ** 2 / np.pi) ** (2 * m)) - return tf.squeeze(obj) - - -class Rescale(Problem): - """Takes an existing problem, and rescales all the parameters.""" - - def __init__(self, problem_spec, scale=10., noise_stdev=0.0): - self.problem = problem_spec.build() - self.param_shapes = self.problem.param_shapes - self.scale = scale - - super(Rescale, self).__init__(self.param_shapes, random_seed=None, - noise_stdev=noise_stdev) - - def init_tensors(self, seed=None): - params_raw = self.problem.init_tensors(seed=seed) - params = [t * self.scale for t in params_raw] - return params - - def objective(self, params, data=None, labels=None): - params_raw = [t/self.scale for t in params] - - problem_obj = self.problem.objective(params_raw, data, labels) - return problem_obj - - -class SumTask(Problem): - """Takes a list of problems and modifies the objective to be their sum.""" - - def __init__(self, problem_specs, noise_stdev=0.0): - self.problems = [ps.build() for ps in problem_specs] - self.param_shapes = [] - for prob in self.problems: - self.param_shapes += prob.param_shapes - - super(SumTask, self).__init__(self.param_shapes, random_seed=None, - noise_stdev=noise_stdev) - - def init_tensors(self, seed=None): - tensors = [] - for prob in self.problems: - tensors += prob.init_tensors(seed=seed) - return tensors - - def objective(self, params, data=None, labels=None): - obj = 0. - index = 0 - for prob in self.problems: - num_params = len(prob.param_shapes) - obj += prob.objective(params[index:index + num_params]) - index += num_params - return obj - - -class IsotropicQuadratic(Problem): - """An isotropic quadratic problem.""" - - def objective(self, params, data=None, labels=None): - return sum([tf.reduce_sum(param ** 2) for param in params]) - - -class Norm(Problem): - """Takes an existing problem and modifies the objective to be its N-norm.""" - - def __init__(self, ndim, random_seed=None, noise_stdev=0.0, norm_power=2.): - param_shapes = [(ndim, 1)] - super(Norm, self).__init__(param_shapes, random_seed, noise_stdev) - - # Generate a random problem instance. - self.w = np.random.randn(ndim, ndim).astype("float32") - self.y = np.random.randn(ndim, 1).astype("float32") - self.norm_power = norm_power - - def objective(self, params, data=None, labels=None): - diff = tf.matmul(self.w, params[0]) - self.y - exp = 1. / self.norm_power - loss = tf.reduce_sum((tf.abs(diff) + EPSILON) ** self.norm_power) ** exp - return loss - - -class LogObjective(Problem): - """Takes an existing problem and modifies the objective to be its log.""" - - def __init__(self, problem_spec): - self.problem = problem_spec.build() - self.param_shapes = self.problem.param_shapes - - super(LogObjective, self).__init__(self.param_shapes, - random_seed=None, - noise_stdev=0.0) - - def objective(self, params, data=None, labels=None): - problem_obj = self.problem.objective(params, data, labels) - return tf.log(problem_obj + EPSILON) - tf.log(EPSILON) - - -class SparseProblem(Problem): - """Takes a problem and sets gradients to 0 with the given probability.""" - - def __init__(self, - problem_spec, - zero_probability=0.99, - random_seed=None, - noise_stdev=0.0): - self.problem = problem_spec.build() - self.param_shapes = self.problem.param_shapes - self.zero_prob = zero_probability - - super(SparseProblem, self).__init__(self.param_shapes, - random_seed=random_seed, - noise_stdev=noise_stdev) - - def objective(self, parameters, data=None, labels=None): - return self.problem.objective(parameters, data, labels) - - def gradients(self, objective, parameters): - grads = tf.gradients(objective, list(parameters)) - - new_grads = [] - for grad in grads: - mask = tf.greater(self.zero_prob, tf.random_uniform(grad.get_shape())) - zero_grad = tf.zeros_like(grad, dtype=tf.float32) - noisy_grad = grad + self.noise_stdev * tf.random_normal(grad.get_shape()) - new_grads.append(tf.where(mask, zero_grad, noisy_grad)) - return new_grads - - -class DependencyChain(Problem): - """A problem in which parameters must be optimized in order. - - A sequence of parameters which all need to be brought to 0, but where each - parameter in the sequence can't be brought to 0 until the preceding one - has been. This should take a long time to optimize, with steady - (or accelerating) progress throughout the entire process. - """ - - def __init__(self, ndim, random_seed=None, noise_stdev=0.): - param_shapes = [(ndim + 1,)] - self.ndim = ndim - super(DependencyChain, self).__init__( - param_shapes, random_seed, noise_stdev) - - def objective(self, params, data=None, labels=None): - terms = params[0][0]**2 + params[0][1:]**2 / (params[0][:-1]**2 + EPSILON) - return tf.reduce_sum(terms) - - -class MinMaxWell(Problem): - """Problem with global min when both the min and max (absolute) params are 1. - - The gradient for all but two parameters (the min and max) is zero. This - should therefore encourage the optimizer to behave sensible even when - parameters have zero gradients, as is common eg for some deep neural nets. - """ - - def __init__(self, ndim, random_seed=None, noise_stdev=0.): - param_shapes = [(ndim,)] - self.ndim = ndim - super(MinMaxWell, self).__init__(param_shapes, random_seed, noise_stdev) - - def objective(self, params, data=None, labels=None): - params_sqr = params[0]**2 - min_sqr = tf.reduce_min(params_sqr) - max_sqr = tf.reduce_max(params_sqr) - epsilon = 1e-12 - - return max_sqr + 1./min_sqr - 2. + epsilon - - -class OutwardSnake(Problem): - """A winding path out to infinity. - - Ideal step length stays constant along the entire path. - """ - - def __init__(self, ndim, random_seed=None, noise_stdev=0.): - param_shapes = [(ndim,)] - self.ndim = ndim - super(OutwardSnake, self).__init__(param_shapes, random_seed, noise_stdev) - - def objective(self, params, data, labels=None): - radius = tf.sqrt(tf.reduce_sum(params[0]**2)) - rad_loss = tf.reduce_sum(1. / (radius + 1e-6) * data[:, 0]) - - sin_dist = params[0][1:] - tf.cos(params[0][:-1]) * np.pi - sin_loss = tf.reduce_sum((sin_dist * data[:, 1:])**2) - - return rad_loss + sin_loss - - -class ProjectionQuadratic(Problem): - """Dataset consists of different directions to probe. Global min is at 0.""" - - def __init__(self, ndim, random_seed=None, noise_stdev=0.): - param_shapes = [(1, ndim)] - super(ProjectionQuadratic, self).__init__( - param_shapes, random_seed, noise_stdev) - - def objective(self, params, data, labels=None): - return tf.reduce_sum((params[0] * data)**2) - - -class SumOfQuadratics(Problem): - - def __init__(self, ndim, random_seed=None, noise_stdev=0.): - param_shapes = [(1, ndim)] - super(SumOfQuadratics, self).__init__( - param_shapes, random_seed, noise_stdev) - - def objective(self, params, data, labels=None): - epsilon = 1e-12 - # Assume dataset is designed so that the global minimum is at params=0. - # Subtract loss at params=0, so that global minimum has objective value - # epsilon (added to avoid floating point issues). - return (tf.reduce_sum((params[0] - data)**2) - tf.reduce_sum(data**2) + - epsilon) - - -class MatMulAlgorithm(Problem): - """A 6-th order polynomial optimization problem. - - This problem is parametrized by n and k. A solution to this problem with - objective value exactly zero defines a matrix multiplication algorithm of - n x n matrices using k multiplications between matrices. When applied - recursively, such an algorithm has complexity O(n^(log_n(k))). - - Given n, it is not known in general which values of k in [n^2, n^3] have a - solution. There is always a solution with k = n^3 (this is the naive - algorithm). - - In the special case n = 2, it is known that there are solutions for k = {7, 8} - but not for k <= 6. For n = 3, it is known that there are exact solutions for - 23 <= k <= 27, and there are asymptotic solutions for k = {21, 22}, but the - other cases are unknown. - - For a given n and k, if one solution exists then infinitely many solutions - exist due to permutation and scaling symmetries in the parameters. - - This is a very hard problem for some values of n and k (e.g. n = 3, k = 21), - but very easy for other values (e.g. n = 2, k = 7). - - For a given n and k, the specific formulation of this problem is as follows. - Let theta_a, theta_b, theta_c be parameter matrices with respective dimensions - [n**2, k], [n**2, k], [k, n**2]. Then for any matrices a, b with shape [n, n], - we can form the matrix c with shape [n, n] via the operation: - ((vec(a) * theta_a) .* (vec(b) * theta_b)) * theta_c = vec(c), (#) - where vec(x) is the operator that flattens a matrix with shape [n, n] into a - row vector with shape [1, n**2], * denotes matrix multiplication and .* - denotes elementwise multiplication. - - This operation, parameterized by theta_a, theta_b, theta_c, is a matrix - multiplication algorithm iff c = a*b for all [n, n] matrices a and b. But - actually it suffices to verify all combinations of one-hot matrices a and b, - of which there are n**4 such combinations. This gives a batch of n**4 matrix - triplets (a, b, c) such that equation (#) must hold for each triplet. We solve - for theta_a, theta_b, theta_c by minimizing the sum of squares of errors - across this batch. - - Finally, theta_c can be computed from theta_a and theta_b. Therefore it - suffices to learn theta_a and theta_b, from which theta_c and therefore the - objective value can be computed. - """ - - def __init__(self, n, k): - assert isinstance(n, int), "n must be an integer" - assert isinstance(k, int), "k must be an integer" - assert n >= 2, "Must have n >= 2" - assert k >= n**2 and k <= n**3, "Must have n**2 <= k <= n**3" - - param_shapes = [(n**2, k), (n**2, k)] # theta_a, theta_b - super(MatMulAlgorithm, self).__init__( - param_shapes, random_seed=None, noise_stdev=0.0) - - self.n = n - self.k = k - - # Build a batch of all combinations of one-hot matrices a, b, and their - # respective products c. Correctness on this batch is a necessary and - # sufficient condition for the algorithm to be valid. The number of matrices - # in {a, b, c}_3d is n**4 and each matrix is n x n. - onehots = np.identity(n**2).reshape(n**2, n, n) - a_3d = np.repeat(onehots, n**2, axis=0) - b_3d = np.tile(onehots, [n**2, 1, 1]) - c_3d = np.matmul(a_3d, b_3d) - - # Convert the batch to 2D Tensors. - self.a = tf.constant(a_3d.reshape(n**4, n**2), tf.float32, name="a") - self.b = tf.constant(b_3d.reshape(n**4, n**2), tf.float32, name="b") - self.c = tf.constant(c_3d.reshape(n**4, n**2), tf.float32, name="c") - - def init_tensors(self, seed=None): - # Initialize params such that the columns of theta_a and theta_b have L2 - # norm 1. - def _param_initializer(shape, seed=None): - x = tf.random_normal(shape, dtype=tf.float32, seed=seed) - return tf.transpose(tf.nn.l2_normalize(tf.transpose(x), 1)) - - return [_param_initializer(shape, seed) for shape in self.param_shapes] - - def objective(self, parameters, data=None, labels=None): - theta_a = parameters[0] - theta_b = parameters[1] - - # Compute theta_c from theta_a and theta_b. - p = tf.matmul(self.a, theta_a) * tf.matmul(self.b, theta_b) - p_trans = tf.transpose(p, name="p_trans") - p_inv = tf.matmul( - tf.matrix_inverse(tf.matmul(p_trans, p)), p_trans, name="p_inv") - theta_c = tf.matmul(p_inv, self.c, name="theta_c") - - # Compute the "predicted" value of c. - c_hat = tf.matmul(p, theta_c, name="c_hat") - - # Compute the loss (sum of squared errors). - loss = tf.reduce_sum((c_hat - self.c)**2, name="loss") - - return loss - - -def matmul_problem_sequence(n, k_min, k_max): - """Helper to generate a sequence of matrix multiplication problems.""" - return [(_Spec(MatMulAlgorithm, (n, k), {}), None, None) - for k in range(k_min, k_max + 1)] - - -def init_fixed_variables(arrays): - with tf.variable_scope(PARAMETER_SCOPE): - params = [tf.Variable(arr.astype("float32")) for arr in arrays] - return params - - -def _mesh(xlim, ylim, n): - """Creates a 2D meshgrid covering the given ranges. - - Args: - xlim: int that defines the desired x-range (-xlim, xlim) - ylim: int that defines the desired y-range (-ylim, ylim) - n: number of points in each dimension of the mesh - - Returns: - xm: 2D array of x-values in the mesh - ym: 2D array of y-values in the mesh - """ - return np.meshgrid(np.linspace(-xlim, xlim, n), - np.linspace(-ylim, ylim, n)) diff --git a/research/learned_optimizer/problems/problem_sets.py b/research/learned_optimizer/problems/problem_sets.py deleted file mode 100644 index eaf9273b87ef69c6b3087330bdf46c8de7107a15..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/problems/problem_sets.py +++ /dev/null @@ -1,561 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Groups of problems of different types for optimizer training.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from learned_optimizer.problems import datasets -from learned_optimizer.problems import model_adapter -from learned_optimizer.problems import problem_generator as pg -from learned_optimizer.problems import problem_spec - -_Spec = problem_spec.Spec - - -def quadratic_problems(): - return [ - (_Spec(pg.Quadratic, (20,), {}), None, None), - (_Spec(pg.Quadratic, (25,), {}), None, None), - (_Spec(pg.Quadratic, (50,), {}), None, None), - (_Spec(pg.Quadratic, (100,), {}), None, None), - ] - - -# Note: this group contains one non-noisy problem for historical reasons. The -# original training set before the refactor included this set of quadratics. -def quadratic_problems_noisy(): - return [ - (_Spec(pg.Quadratic, (20,), {"noise_stdev": 0.5}), None, None), - (_Spec(pg.Quadratic, (25,), {"noise_stdev": 0.0}), None, None), - (_Spec(pg.Quadratic, (50,), {"noise_stdev": 1.0}), None, None), - (_Spec(pg.Quadratic, (100,), {"noise_stdev": 2.0}), None, None), - ] - - -def quadratic_problems_large(): - return [ - (_Spec(pg.Quadratic, (784,), {}), None, None), - (_Spec(pg.Quadratic, (1024,), {}), None, None), - (_Spec(pg.Quadratic, (2048,), {}), None, None), - ] - - -def bowl_problems(): - return [ - (_Spec(pg.Bowl, (0.1,), {"noise_stdev": 0.0}), None, None), - (_Spec(pg.Bowl, (1.0,), {"noise_stdev": 0.0}), None, None), - (_Spec(pg.Bowl, (5.0,), {"noise_stdev": 0.0}), None, None), - (_Spec(pg.Bowl, (5.0,), {"noise_stdev": 0.0, "angle": np.pi / 4.}), - None, None), - ] - - -def bowl_problems_noisy(): - return [ - (_Spec(pg.Bowl, (0.1,), {"noise_stdev": 0.1}), None, None), - (_Spec(pg.Bowl, (1.0,), {"noise_stdev": 0.1}), None, None), - (_Spec(pg.Bowl, (5.0,), {"noise_stdev": 0.1}), None, None), - (_Spec(pg.Bowl, (5.0,), {"noise_stdev": 0.1, "angle": np.pi / 4.}), - None, None), - ] - - -def sparse_softmax_2_class_sparse_problems(): - return [(_Spec(pg.SparseSoftmaxRegression, (5, 2), {"noise_stdev": 0.0}), - datasets.noisy_parity_class(5, random_seed=123), 23),] - - -def one_hot_sparse_softmax_2_class_sparse_problems(): - return [ - (_Spec(pg.OneHotSparseSoftmaxRegression, (5, 2), {"noise_stdev": 0.0}), - datasets.noisy_parity_class(5, random_seed=123), 23), - ] - - -def softmax_2_class_problems(): - return [ - (_Spec(pg.SoftmaxRegression, (10, 2), {}), datasets.random( - 10, 1000, random_seed=123, sep=2.0), 100), - (_Spec(pg.SoftmaxRegression, (100, 2), {}), datasets.random( - 100, 1000, random_seed=123), 50), - (_Spec(pg.SoftmaxRegression, (200, 2), {}), datasets.random( - 200, 1000, random_seed=123, sep=1.5), 20), - (_Spec(pg.SoftmaxRegression, (256, 2), {}), datasets.random( - 256, 1000, random_seed=123, sep=1.5), 100), - ] - - -def softmax_2_class_problems_noisy(): - return [ - (_Spec(pg.SoftmaxRegression, (10, 2), {"noise_stdev": 0.5}), - datasets.random(10, 1000, random_seed=123, sep=2.0), 100), - (_Spec(pg.SoftmaxRegression, (100, 2), {"noise_stdev": 0.1}), - datasets.random(100, 1000, random_seed=123), 50), - (_Spec(pg.SoftmaxRegression, (200, 2), {"noise_stdev": 0.1}), - datasets.random(200, 1000, random_seed=123, sep=1.5), 20), - (_Spec(pg.SoftmaxRegression, (256, 2), {"noise_stdev": 0.5}), - datasets.random(256, 1000, random_seed=123, sep=1.5), 100), - ] - - -def optimization_test_problems(): - return [ - (_Spec(pg.Ackley, (), {}), None, None), - (_Spec(pg.Beale, (), {}), None, None), - (_Spec(pg.Booth, (), {}), None, None), - (_Spec(pg.Branin, (), {}), None, None), - (_Spec(pg.LogSumExp, (), {}), None, None), - (_Spec(pg.Matyas, (), {}), None, None), - (_Spec(pg.Michalewicz, (), {}), None, None), - (_Spec(pg.Rosenbrock, (), {}), None, None), - (_Spec(pg.StyblinskiTang, (), {}), None, None), - ] - - -def optimization_test_problems_noisy(): - return [ - (_Spec(pg.Ackley, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.Beale, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.Booth, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.Branin, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.LogSumExp, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.Matyas, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.Michalewicz, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.Rosenbrock, (), {"noise_stdev": 1.}), None, None), - (_Spec(pg.StyblinskiTang, (), {"noise_stdev": 1.}), None, None), - ] - - -def fully_connected_random_2_class_problems(): - return [ - (_Spec(pg.FullyConnected, (8, 2), - {"hidden_sizes": (8, 5,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(8, 1000), 10), - (_Spec(pg.FullyConnected, (12, 2), - {"hidden_sizes": (8, 5, 3), "activation": tf.nn.sigmoid}), - datasets.random_mlp(12, 1000), 200), - (_Spec(pg.FullyConnected, (5, 2), - {"hidden_sizes": (4, 4, 4, 4,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(5, 1000), 100), - (_Spec(pg.FullyConnected, (11, 2), - {"hidden_sizes": (4, 5, 6,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(11, 1000), 64), - (_Spec(pg.FullyConnected, (9, 2), - {"hidden_sizes": (8,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(9, 1000), 128), - (_Spec(pg.FullyConnected, (7, 2), - {"hidden_sizes": (8, 5,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(7, 1000), 16), - (_Spec(pg.FullyConnected, (8, 2), - {"hidden_sizes": (32, 64,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(8, 1000), 10), - (_Spec(pg.FullyConnected, (12, 2), - {"hidden_sizes": (16, 8, 3), "activation": tf.nn.sigmoid}), - datasets.random_mlp(12, 1000), 200), - (_Spec(pg.FullyConnected, (5, 2), - {"hidden_sizes": (8, 8, 8, 8,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(5, 1000), 100), - (_Spec(pg.FullyConnected, (11, 2), - {"hidden_sizes": (10, 12, 12,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(11, 1000), 64), - (_Spec(pg.FullyConnected, (9, 2), - {"hidden_sizes": (32,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(9, 1000), 128), - (_Spec(pg.FullyConnected, (7, 2), - {"hidden_sizes": (32, 64,), "activation": tf.nn.sigmoid}), - datasets.random_mlp(7, 1000), 16), - ] - - -def matmul_problems(): - return sum([ - pg.matmul_problem_sequence(2, 5, 8), - pg.matmul_problem_sequence(3, 19, 24)], []) - - -def log_objective_problems(): - return [ - (_Spec(pg.LogObjective, [_Spec(pg.Quadratic, (20,), {})], {}), - None, None), - (_Spec(pg.LogObjective, [_Spec(pg.Quadratic, (50,), {})], {}), - None, None), - (_Spec(pg.LogObjective, [_Spec(pg.Quadratic, (100,), {})], {}), - None, None), - (_Spec(pg.LogObjective, [_Spec(pg.Bowl, (0.1,), {})], {}), None, None), - (_Spec(pg.LogObjective, [_Spec(pg.Bowl, (1.0,), {})], {}), None, None), - (_Spec(pg.LogObjective, [_Spec(pg.Bowl, (5.0,), {})], {}), None, None), - ] - - -def sparse_gradient_problems(): - return [ - (_Spec(pg.SparseProblem, [_Spec(pg.Quadratic, (20,), {})], {}), - None, None), - (_Spec(pg.SparseProblem, [_Spec(pg.Quadratic, (50,), {})], {}), - None, None), - (_Spec(pg.SparseProblem, [_Spec(pg.Quadratic, (100,), {})], {}), - None, None), - (_Spec(pg.SparseProblem, [_Spec(pg.Bowl, (0.1,), {})], {}), None, None), - (_Spec(pg.SparseProblem, [_Spec(pg.Bowl, (1.0,), {})], {}), None, None), - (_Spec(pg.SparseProblem, [_Spec(pg.Bowl, (5.0,), {})], {}), None, None), - ] - - -def sparse_gradient_problems_mlp(): - return [ - (_Spec(pg.SparseProblem, [ - _Spec(pg.FullyConnected, (8, 2), { - "hidden_sizes": (8, 5,), - "activation": tf.nn.sigmoid - }) - ], {}), datasets.random_mlp(8, 1000), 10), - (_Spec(pg.SparseProblem, [ - _Spec(pg.FullyConnected, (12, 2), { - "hidden_sizes": (8, 5, 3), - "activation": tf.nn.sigmoid - }) - ], {}), datasets.random_mlp(12, 1000), 200), - (_Spec(pg.SparseProblem, [ - _Spec(pg.FullyConnected, (5, 2), { - "hidden_sizes": (4, 4, 4, 4,), - "activation": tf.nn.sigmoid - }) - ], {}), datasets.random_mlp(5, 1000), 100), - ] - - -def rescale_problems(): - return [ - (_Spec(pg.Rescale, [_Spec(pg.Norm, (18,), {"norm_power": 2.5})], - {"scale": 0.123}), None, None), - (_Spec(pg.Rescale, [_Spec(pg.Norm, (18,), {"norm_power": 1.5})], - {"scale": 8}), None, None), - (_Spec(pg.Rescale, [_Spec(pg.Norm, (18,), {"norm_power": 2.})], - {"scale": 50}), None, None), - (_Spec(pg.Rescale, [_Spec(pg.Norm, (18,), {"norm_power": 3.})], - {"scale": 200}), None, None), - (_Spec(pg.Rescale, [_Spec(pg.Norm, (18,), {"norm_power": 1.})], - {"scale": 1000}), None, None), - (_Spec(pg.Rescale, [_Spec(pg.Quadratic, (20,), {})], {"scale": 0.1}), - None, None), - (_Spec(pg.Rescale, [_Spec(pg.Quadratic, (25,), {})], {"scale": 10.}), - None, None), - (_Spec(pg.Rescale, [_Spec(pg.Quadratic, (50,), {})], {"scale": 350.}), - None, None), - (_Spec(pg.Rescale, [_Spec(pg.Quadratic, (100,), {})], {"scale": 132}), - None, None), - ] - - -def norm_problems(): - return [ - # < 1 Norm causes NaN gradients early in training. - (_Spec(pg.Norm, (27,), {"norm_power": 1.}), None, None), - (_Spec(pg.Norm, (25,), {"norm_power": 2.}), None, None), - (_Spec(pg.Norm, (22,), {"norm_power": 3.}), None, None), - ] - - -def norm_problems_noisy(): - return [ - # < 1 Norm causes NaN gradients early in training. - (_Spec(pg.Norm, (19,), {"noise_stdev": .1, "norm_power": 1.}), - None, None), - (_Spec(pg.Norm, (26,), {"noise_stdev": .1, "norm_power": 2.}), - None, None), - (_Spec(pg.Norm, (23,), {"noise_stdev": .1, "norm_power": 3.}), - None, None), - ] - - -def sum_problems(): - return [ - (_Spec(pg.SumTask, [[ - _Spec(pg.Quadratic, (11,), {}), - _Spec(pg.Quadratic, (3,), {}), - _Spec(pg.Quadratic, (9,), {}), - _Spec(pg.Quadratic, (7,), {}), - _Spec(pg.Quadratic, (5,), {}), - _Spec(pg.Quadratic, (13,), {}), - _Spec(pg.Quadratic, (12,), {}) - ]], {}), None, None), - (_Spec(pg.SumTask, [[ - _Spec(pg.Norm, (18,), {"norm_power": 3}), - _Spec(pg.Quadratic, (25,), {}), - _Spec(pg.Rosenbrock, (), {}) - ]], {}), None, None), - (_Spec(pg.SumTask, [[ - _Spec(pg.Rosenbrock, (), {}), - _Spec(pg.LogSumExp, (), {}), - _Spec(pg.Ackley, (), {}), - _Spec(pg.Beale, (), {}), - _Spec(pg.Booth, (), {}), - _Spec(pg.StyblinskiTang, (), {}), - _Spec(pg.Matyas, (), {}), - _Spec(pg.Branin, (), {}), - _Spec(pg.Michalewicz, (), {}) - ]], {}), None, None), - (_Spec(pg.SumTask, [[ - _Spec(pg.Rosenbrock, (), {}), - _Spec(pg.LogSumExp, (), {}), - _Spec(pg.Ackley, (), {}), - _Spec(pg.Beale, (), {}), - _Spec(pg.Booth, (), {}), - _Spec(pg.StyblinskiTang, (), {}), - _Spec(pg.Matyas, (), {}), - _Spec(pg.Branin, (), {}), - _Spec(pg.Michalewicz, (), {}), - _Spec(pg.Quadratic, (5,), {}), - _Spec(pg.Quadratic, (13,), {}) - ]], {}), None, None), - (_Spec(pg.SumTask, [[ - _Spec(pg.Quadratic, (11,), {}), - _Spec(pg.Quadratic, (3,), {}) - ]], {}), None, None), - (_Spec(pg.SumTask, [[ - _Spec(pg.Rosenbrock, (), {}), - _Spec(pg.LogSumExp, (), {}), - _Spec(pg.Ackley, (), {}) - ]], {}), None, None), - ] - - -def sum_problems_noisy(): - return [ - (_Spec(pg.SumTask, [[ - _Spec(pg.Quadratic, (11,), {"noise_stdev": 0.1}), - _Spec(pg.Quadratic, (3,), {"noise_stdev": 0.1}), - _Spec(pg.Quadratic, (9,), {"noise_stdev": 0.1}), - _Spec(pg.Quadratic, (7,), {"noise_stdev": 0.1}), - _Spec(pg.Quadratic, (5,), {"noise_stdev": 0.1}), - _Spec(pg.Quadratic, (13,), {"noise_stdev": 0.1}), - _Spec(pg.Quadratic, (12,), {"noise_stdev": 0.1}) - ]], {}), None, None), - (_Spec(pg.SumTask, [[ - _Spec(pg.Rosenbrock, (), {}), - _Spec(pg.LogSumExp, (), {}), - _Spec(pg.Ackley, (), {}), - _Spec(pg.Beale, (), {}), - _Spec(pg.Booth, (), {}), - _Spec(pg.StyblinskiTang, (), {}), - _Spec(pg.Matyas, (), {}), - _Spec(pg.Branin, (), {}), - _Spec(pg.Michalewicz, (), {}), - _Spec(pg.Quadratic, (5,), {}), - _Spec(pg.Quadratic, (13,), {"noise_stdev": 0.5}) - ]], {}), None, None), - ] - - -def dependency_chain_problems(): - return [ - (_Spec(pg.DependencyChain, (20,), {}), datasets.random_binary( - 20, 1000), 100), - (_Spec(pg.DependencyChain, (12,), {}), datasets.random_binary( - 12, 200), 10), - (_Spec(pg.DependencyChain, (56,), {}), datasets.random_binary( - 56, 5000), 100), - (_Spec(pg.DependencyChain, (64,), {}), datasets.random_binary( - 64, 1000), 50), - (_Spec(pg.DependencyChain, (13,), {}), datasets.random_binary( - 13, 10000), 50), - (_Spec(pg.DependencyChain, (20,), {}), datasets.random_binary( - 20, 1000), 128), - (_Spec(pg.DependencyChain, (12,), {}), datasets.random_binary( - 12, 300), 16), - (_Spec(pg.DependencyChain, (56,), {}), datasets.random_binary( - 56, 5000), 128), - (_Spec(pg.DependencyChain, (64,), {}), datasets.random_binary( - 64, 1000), 64), - (_Spec(pg.DependencyChain, (13,), {}), datasets.random_binary( - 13, 10000), 32), - ] - - -def outward_snake_problems(): - return [ - (_Spec(pg.OutwardSnake, (20,), {}), datasets.random_binary( - 20, 1000), 100), - (_Spec(pg.OutwardSnake, (12,), {}), datasets.random_binary( - 12, 200), 10), - (_Spec(pg.OutwardSnake, (56,), {}), datasets.random_binary( - 56, 5000), 100), - (_Spec(pg.OutwardSnake, (64,), {}), datasets.random_binary( - 64, 1000), 50), - (_Spec(pg.OutwardSnake, (13,), {}), datasets.random_binary( - 13, 10000), 50), - (_Spec(pg.OutwardSnake, (20,), {}), datasets.random_binary( - 20, 1000), 128), - (_Spec(pg.OutwardSnake, (12,), {}), datasets.random_binary( - 12, 300), 16), - (_Spec(pg.OutwardSnake, (56,), {}), datasets.random_binary( - 56, 5000), 128), - (_Spec(pg.OutwardSnake, (64,), {}), datasets.random_binary( - 64, 1000), 64), - (_Spec(pg.OutwardSnake, (13,), {}), datasets.random_binary( - 13, 10000), 32), - ] - - -def min_max_well_problems(): - return [ - (_Spec(pg.MinMaxWell, (20,), {}), None, None), - (_Spec(pg.MinMaxWell, (12,), {}), None, None), - (_Spec(pg.MinMaxWell, (56,), {}), None, None), - (_Spec(pg.MinMaxWell, (64,), {}), None, None), - (_Spec(pg.MinMaxWell, (13,), {}), None, None), - ] - - -def sum_of_quadratics_problems(): - return [ - (_Spec(pg.SumOfQuadratics, (20,), {}), - datasets.random_symmetric(20, 1000), 100), - (_Spec(pg.SumOfQuadratics, (12,), {}), - datasets.random_symmetric(12, 100), 10), - (_Spec(pg.SumOfQuadratics, (56,), {}), - datasets.random_symmetric(56, 5000), 100), - (_Spec(pg.SumOfQuadratics, (64,), {}), - datasets.random_symmetric(64, 1000), 50), - (_Spec(pg.SumOfQuadratics, (13,), {}), - datasets.random_symmetric(13, 10000), 50), - (_Spec(pg.SumOfQuadratics, (20,), {}), - datasets.random_symmetric(20, 1000), 128), - (_Spec(pg.SumOfQuadratics, (12,), {}), - datasets.random_symmetric(12, 100), 16), - (_Spec(pg.SumOfQuadratics, (56,), {}), - datasets.random_symmetric(56, 5000), 128), - (_Spec(pg.SumOfQuadratics, (64,), {}), - datasets.random_symmetric(64, 1000), 64), - (_Spec(pg.SumOfQuadratics, (13,), {}), - datasets.random_symmetric(13, 10000), 32), - ] - - -def projection_quadratic_problems(): - return [ - (_Spec(pg.ProjectionQuadratic, (20,), {}), - datasets.random_symmetric(20, 1000), 100), - (_Spec(pg.ProjectionQuadratic, (12,), {}), - datasets.random_symmetric(12, 100), 10), - (_Spec(pg.ProjectionQuadratic, (56,), {}), - datasets.random_symmetric(56, 5000), 100), - (_Spec(pg.ProjectionQuadratic, (64,), {}), - datasets.random_symmetric(64, 1000), 50), - (_Spec(pg.ProjectionQuadratic, (13,), {}), - datasets.random_symmetric(13, 10000), 50), - (_Spec(pg.ProjectionQuadratic, (20,), {}), - datasets.random_symmetric(20, 1000), 128), - (_Spec(pg.ProjectionQuadratic, (12,), {}), - datasets.random_symmetric(12, 100), 16), - (_Spec(pg.ProjectionQuadratic, (56,), {}), - datasets.random_symmetric(56, 5000), 128), - (_Spec(pg.ProjectionQuadratic, (64,), {}), - datasets.random_symmetric(64, 1000), 64), - (_Spec(pg.ProjectionQuadratic, (13,), {}), - datasets.random_symmetric(13, 10000), 32), - ] - - -def adapter_rosenbrock_local(): - return [(_Spec(model_adapter.ModelAdapter, - (pg.make_rosenbrock_loss_and_init,), {}), None, None),] - - -def adapter_rosenbrock_worker(): - return [(_Spec(model_adapter.ModelAdapter, - (pg.make_rosenbrock_loss_and_init,), - {"device": "/job:worker"}), None, None),] - - -def _test_problem_mlp_scaled_init_small(): - return [ - np.random.randn(10, 32) * np.sqrt(2./10), - np.random.randn(32,) * 0.1, - np.random.randn(32, 64) * np.sqrt(2./32.), - np.random.randn(64,) * 0.1, - np.random.randn(64, 2) * np.sqrt(2./64.), - np.random.randn(2,) * 0.1 - ] - - -def _test_problem_mlp_scaled_init_large(): - return [ - np.random.randn(20, 32) * np.sqrt(2./20), - np.random.randn(32,) * 0.1, - np.random.randn(32, 64) * np.sqrt(2./32.), - np.random.randn(64,) * 0.1, - np.random.randn(64, 10) * np.sqrt(2./64.), - np.random.randn(10,) * 0.1 - ] - - -def _test_problem_mlp_scaled_init_mnist(): - return [ - np.random.randn(784, 64) * np.sqrt(2./784.), - np.random.randn(64,) * 0.1, - np.random.randn(64, 10) * np.sqrt(2./ 64.), - np.random.randn(10,) * 0.1, - ] - - -# Wrap this construction in a function to avoid UnparsedFlagAccessError -def test_problems(): - """Test problems for visualizations.""" - # Unlike the training problem sets, these test problems are made up of - # length-5 tuples. The final items in the tuple are the name of the problem - # and the initialization random_seed for testing consistency. - tp = [ - (_Spec(pg.Quadratic, (20,), {"random_seed": 1234}), None, None, - "quad_problem", 5678), - (_Spec(pg.Quadratic, (20,), {"noise_stdev": 1.0, "random_seed": 1234}), - None, None, "quad_problem_noise", 5678), - (_Spec(pg.Rosenbrock, (), {"random_seed": 1234}), None, None, - "rosenbrock", 5678), - (_Spec(pg.Rosenbrock, (), {"random_seed": 1234, "noise_stdev": 1.0}), - None, None, "rosenbrock_noise", 5678), - (_Spec(pg.SoftmaxRegression, (10, 2), {}), datasets.random( - 10, 10000, random_seed=1234), 100, "softmax", 5678), - (_Spec(pg.SoftmaxRegression, (10, 2), {"noise_stdev": 1.0}), - datasets.random(10, 10000, random_seed=1234), 100, "softmax_noise", - 5678), - (_Spec(pg.FullyConnected, (10, 2), {}), datasets.random( - 10, 10000, random_seed=1234), 100, "mlp_small", - _test_problem_mlp_scaled_init_small()), - (_Spec(pg.FullyConnected, (20, 10), {}), datasets.random( - 20, 10000, n_classes=10, random_seed=1234), 100, "mlp_large", - _test_problem_mlp_scaled_init_large()), - (_Spec(pg.FullyConnected, (784, 10), - {"hidden_sizes": (64,), "activation": tf.nn.sigmoid}), - datasets.mnist(), 64, "mlp_mnist_sigmoid", - _test_problem_mlp_scaled_init_mnist()), - (_Spec(pg.FullyConnected, (784, 10), - {"hidden_sizes": (64,), "activation": tf.nn.relu}), - datasets.mnist(), 64, "mlp_mnist_relu", - _test_problem_mlp_scaled_init_mnist()), - (_Spec(pg.ConvNet, ((1, 28, 28), 10, [(3, 3, 8), (5, 5, 8)]), - {"activation": tf.nn.sigmoid}), datasets.mnist(), 64, - "convnet_mnist_sigmoid", None), - (_Spec(pg.ConvNet, ((1, 28, 28), 10, [(3, 3, 8), (5, 5, 8)]), - {"activation": tf.nn.relu}), datasets.mnist(), 64, - "convnet_mnist_relu", None), - ] - return tp diff --git a/research/learned_optimizer/problems/problem_spec.py b/research/learned_optimizer/problems/problem_spec.py deleted file mode 100644 index e30c47b277e5c8b3b8aba3b8d691a2af3a595ef6..0000000000000000000000000000000000000000 --- a/research/learned_optimizer/problems/problem_spec.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2017 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Wrapper around a training problem.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import namedtuple - - -class Spec(namedtuple("Spec", "callable args kwargs")): - """Syntactic sugar for keeping track of a function/class + args.""" - - # Since this is an immutable object, we don't need to reserve slots. - __slots__ = () - - def build(self): - """Returns the output of the callable.""" - return self.callable(*self.args, **self.kwargs) diff --git a/research/learning_to_remember_rare_events/README.md b/research/learning_to_remember_rare_events/README.md deleted file mode 100644 index 2eeadea784d4d22efc88c56e482c5d5374c90e24..0000000000000000000000000000000000000000 --- a/research/learning_to_remember_rare_events/README.md +++ /dev/null @@ -1,61 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - ---- - -Code for the Memory Module as described -in "Learning to Remember Rare Events" by -Lukasz Kaiser, Ofir Nachum, Aurko Roy, and Samy Bengio -published as a conference paper at ICLR 2017. - -Requirements: -* TensorFlow (see tensorflow.org for how to install) -* Some basic command-line utilities (git, unzip). - -Description: - -The general memory module is located in memory.py. -Some code is provided to see the memory module in -action on the standard Omniglot dataset. -Download and setup the dataset using data_utils.py -and then run the training script train.py -(see example commands below). - -Note that the structure and parameters of the model -are optimized for the data preparation as provided. - -Quick Start: - -First download and set-up Omniglot data by running - -``` -python data_utils.py -``` - -Then run the training script: - -``` -python train.py --memory_size=8192 \ - --batch_size=16 --validation_length=50 \ - --episode_width=5 --episode_length=30 -``` - -The first validation batch may look like this (although it is noisy): -``` -0-shot: 0.040, 1-shot: 0.404, 2-shot: 0.516, 3-shot: 0.604, - 4-shot: 0.656, 5-shot: 0.684 -``` -At step 500 you may see something like this: -``` -0-shot: 0.036, 1-shot: 0.836, 2-shot: 0.900, 3-shot: 0.940, - 4-shot: 0.944, 5-shot: 0.916 -``` -At step 4000 you may see something like this: -``` -0-shot: 0.044, 1-shot: 0.960, 2-shot: 1.000, 3-shot: 0.988, - 4-shot: 0.972, 5-shot: 0.992 -``` - -Maintained by Ofir Nachum (ofirnachum) and -Lukasz Kaiser (lukaszkaiser). diff --git a/research/learning_to_remember_rare_events/data_utils.py b/research/learning_to_remember_rare_events/data_utils.py deleted file mode 100644 index 03d5dafb251d4e058a6780b447aabdcd1a84a1d4..0000000000000000000000000000000000000000 --- a/research/learning_to_remember_rare_events/data_utils.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -"""Data loading and other utilities. - -Use this file to first copy over and pre-process the Omniglot dataset. -Simply call - python data_utils.py -""" - -import logging -import os -import subprocess -from six.moves import cPickle as pickle - -import numpy as np -from scipy.misc import imresize -from scipy.misc import imrotate -from scipy.ndimage import imread -from six.moves import xrange -import tensorflow as tf - - -MAIN_DIR = '' -REPO_LOCATION = 'https://github.com/brendenlake/omniglot.git' -REPO_DIR = os.path.join(MAIN_DIR, 'omniglot') -DATA_DIR = os.path.join(REPO_DIR, 'python') -TRAIN_DIR = os.path.join(DATA_DIR, 'images_background') -TEST_DIR = os.path.join(DATA_DIR, 'images_evaluation') -DATA_FILE_FORMAT = os.path.join(MAIN_DIR, '%s_omni.pkl') - -TRAIN_ROTATIONS = True # augment training data with rotations -TEST_ROTATIONS = False # augment testing data with rotations -IMAGE_ORIGINAL_SIZE = 105 -IMAGE_NEW_SIZE = 28 - - -def get_data(): - """Get data in form suitable for episodic training. - - Returns: - Train and test data as dictionaries mapping - label to list of examples. - """ - with tf.gfile.GFile(DATA_FILE_FORMAT % 'train', 'rb') as f: - processed_train_data = pickle.load(f) - with tf.gfile.GFile(DATA_FILE_FORMAT % 'test', 'rb') as f: - processed_test_data = pickle.load(f) - - train_data = {} - test_data = {} - - for data, processed_data in zip([train_data, test_data], - [processed_train_data, processed_test_data]): - for image, label in zip(processed_data['images'], - processed_data['labels']): - if label not in data: - data[label] = [] - data[label].append(image.reshape([-1]).astype('float32')) - - intersection = set(train_data.keys()) & set(test_data.keys()) - assert not intersection, 'Train and test data intersect.' - ok_num_examples = [len(ll) == 20 for _, ll in train_data.items()] - assert all(ok_num_examples), 'Bad number of examples in train data.' - ok_num_examples = [len(ll) == 20 for _, ll in test_data.items()] - assert all(ok_num_examples), 'Bad number of examples in test data.' - - logging.info('Number of labels in train data: %d.', len(train_data)) - logging.info('Number of labels in test data: %d.', len(test_data)) - - return train_data, test_data - - -def crawl_directory(directory, augment_with_rotations=False, - first_label=0): - """Crawls data directory and returns stuff.""" - label_idx = first_label - images = [] - labels = [] - info = [] - - # traverse root directory - for root, _, files in os.walk(directory): - logging.info('Reading files from %s', root) - fileflag = 0 - for file_name in files: - full_file_name = os.path.join(root, file_name) - img = imread(full_file_name, flatten=True) - for i, angle in enumerate([0, 90, 180, 270]): - if not augment_with_rotations and i > 0: - break - - images.append(imrotate(img, angle)) - labels.append(label_idx + i) - info.append(full_file_name) - - fileflag = 1 - - if fileflag: - label_idx += 4 if augment_with_rotations else 1 - - return images, labels, info - - -def resize_images(images, new_width, new_height): - """Resize images to new dimensions.""" - resized_images = np.zeros([images.shape[0], new_width, new_height], - dtype=np.float32) - - for i in range(images.shape[0]): - resized_images[i, :, :] = imresize(images[i, :, :], - [new_width, new_height], - interp='bilinear', - mode=None) - return resized_images - - -def write_datafiles(directory, write_file, - resize=True, rotate=False, - new_width=IMAGE_NEW_SIZE, new_height=IMAGE_NEW_SIZE, - first_label=0): - """Load and preprocess images from a directory and write them to a file. - - Args: - directory: Directory of alphabet sub-directories. - write_file: Filename to write to. - resize: Whether to resize the images. - rotate: Whether to augment the dataset with rotations. - new_width: New resize width. - new_height: New resize height. - first_label: Label to start with. - - Returns: - Number of new labels created. - """ - - # these are the default sizes for Omniglot: - imgwidth = IMAGE_ORIGINAL_SIZE - imgheight = IMAGE_ORIGINAL_SIZE - - logging.info('Reading the data.') - images, labels, info = crawl_directory(directory, - augment_with_rotations=rotate, - first_label=first_label) - - images_np = np.zeros([len(images), imgwidth, imgheight], dtype=np.bool) - labels_np = np.zeros([len(labels)], dtype=np.uint32) - for i in xrange(len(images)): - images_np[i, :, :] = images[i] - labels_np[i] = labels[i] - - if resize: - logging.info('Resizing images.') - resized_images = resize_images(images_np, new_width, new_height) - - logging.info('Writing resized data in float32 format.') - data = {'images': resized_images, - 'labels': labels_np, - 'info': info} - with tf.gfile.GFile(write_file, 'w') as f: - pickle.dump(data, f) - else: - logging.info('Writing original sized data in boolean format.') - data = {'images': images_np, - 'labels': labels_np, - 'info': info} - with tf.gfile.GFile(write_file, 'w') as f: - pickle.dump(data, f) - - return len(np.unique(labels_np)) - - -def maybe_download_data(): - """Download Omniglot repo if it does not exist.""" - if os.path.exists(REPO_DIR): - logging.info('It appears that Git repo already exists.') - else: - logging.info('It appears that Git repo does not exist.') - logging.info('Cloning now.') - - subprocess.check_output('git clone %s' % REPO_LOCATION, shell=True) - - if os.path.exists(TRAIN_DIR): - logging.info('It appears that train data has already been unzipped.') - else: - logging.info('It appears that train data has not been unzipped.') - logging.info('Unzipping now.') - - subprocess.check_output('unzip %s.zip -d %s' % (TRAIN_DIR, DATA_DIR), - shell=True) - - if os.path.exists(TEST_DIR): - logging.info('It appears that test data has already been unzipped.') - else: - logging.info('It appears that test data has not been unzipped.') - logging.info('Unzipping now.') - - subprocess.check_output('unzip %s.zip -d %s' % (TEST_DIR, DATA_DIR), - shell=True) - - -def preprocess_omniglot(): - """Download and prepare raw Omniglot data. - - Downloads the data from GitHub if it does not exist. - Then load the images, augment with rotations if desired. - Resize the images and write them to a pickle file. - """ - - maybe_download_data() - - directory = TRAIN_DIR - write_file = DATA_FILE_FORMAT % 'train' - num_labels = write_datafiles( - directory, write_file, resize=True, rotate=TRAIN_ROTATIONS, - new_width=IMAGE_NEW_SIZE, new_height=IMAGE_NEW_SIZE) - - directory = TEST_DIR - write_file = DATA_FILE_FORMAT % 'test' - write_datafiles(directory, write_file, resize=True, rotate=TEST_ROTATIONS, - new_width=IMAGE_NEW_SIZE, new_height=IMAGE_NEW_SIZE, - first_label=num_labels) - - -def main(unused_argv): - logging.basicConfig(level=logging.INFO) - preprocess_omniglot() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/learning_to_remember_rare_events/memory.py b/research/learning_to_remember_rare_events/memory.py deleted file mode 100644 index 2f40ff57f9434994f08b1ad97dc23142bb23daaa..0000000000000000000000000000000000000000 --- a/research/learning_to_remember_rare_events/memory.py +++ /dev/null @@ -1,392 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -"""Memory module for storing "nearest neighbors". - -Implements a key-value memory for generalized one-shot learning -as described in the paper -"Learning to Remember Rare Events" -by Lukasz Kaiser, Ofir Nachum, Aurko Roy, Samy Bengio, -published as a conference paper at ICLR 2017. -""" - -import numpy as np -from six.moves import xrange -import tensorflow as tf - - -class Memory(object): - """Memory module.""" - - def __init__(self, key_dim, memory_size, vocab_size, - choose_k=256, alpha=0.1, correct_in_top=1, age_noise=8.0, - var_cache_device='', nn_device=''): - self.key_dim = key_dim - self.memory_size = memory_size - self.vocab_size = vocab_size - self.choose_k = min(choose_k, memory_size) - self.alpha = alpha - self.correct_in_top = correct_in_top - self.age_noise = age_noise - self.var_cache_device = var_cache_device # Variables are cached here. - self.nn_device = nn_device # Device to perform nearest neighbour matmul. - - caching_device = var_cache_device if var_cache_device else None - self.update_memory = tf.constant(True) # Can be fed "false" if needed. - self.mem_keys = tf.get_variable( - 'memkeys', [self.memory_size, self.key_dim], trainable=False, - initializer=tf.random_uniform_initializer(-0.0, 0.0), - caching_device=caching_device) - self.mem_vals = tf.get_variable( - 'memvals', [self.memory_size], dtype=tf.int32, trainable=False, - initializer=tf.constant_initializer(0, tf.int32), - caching_device=caching_device) - self.mem_age = tf.get_variable( - 'memage', [self.memory_size], dtype=tf.float32, trainable=False, - initializer=tf.constant_initializer(0.0), caching_device=caching_device) - self.recent_idx = tf.get_variable( - 'recent_idx', [self.vocab_size], dtype=tf.int32, trainable=False, - initializer=tf.constant_initializer(0, tf.int32)) - - # variable for projecting query vector into memory key - self.query_proj = tf.get_variable( - 'memory_query_proj', [self.key_dim, self.key_dim], dtype=tf.float32, - initializer=tf.truncated_normal_initializer(0, 0.01), - caching_device=caching_device) - - def get(self): - return self.mem_keys, self.mem_vals, self.mem_age, self.recent_idx - - def set(self, k, v, a, r=None): - return tf.group( - self.mem_keys.assign(k), - self.mem_vals.assign(v), - self.mem_age.assign(a), - (self.recent_idx.assign(r) if r is not None else tf.group())) - - def clear(self): - return tf.variables_initializer([self.mem_keys, self.mem_vals, self.mem_age, - self.recent_idx]) - - def get_hint_pool_idxs(self, normalized_query): - """Get small set of idxs to compute nearest neighbor queries on. - - This is an expensive look-up on the whole memory that is used to - avoid more expensive operations later on. - - Args: - normalized_query: A Tensor of shape [None, key_dim]. - - Returns: - A Tensor of shape [None, choose_k] of indices in memory - that are closest to the queries. - - """ - # look up in large memory, no gradients - with tf.device(self.nn_device): - similarities = tf.matmul(tf.stop_gradient(normalized_query), - self.mem_keys, transpose_b=True, name='nn_mmul') - _, hint_pool_idxs = tf.nn.top_k( - tf.stop_gradient(similarities), k=self.choose_k, name='nn_topk') - return hint_pool_idxs - - def make_update_op(self, upd_idxs, upd_keys, upd_vals, - batch_size, use_recent_idx, intended_output): - """Function that creates all the update ops.""" - mem_age_incr = self.mem_age.assign_add(tf.ones([self.memory_size], - dtype=tf.float32)) - with tf.control_dependencies([mem_age_incr]): - mem_age_upd = tf.scatter_update( - self.mem_age, upd_idxs, tf.zeros([batch_size], dtype=tf.float32)) - - mem_key_upd = tf.scatter_update( - self.mem_keys, upd_idxs, upd_keys) - mem_val_upd = tf.scatter_update( - self.mem_vals, upd_idxs, upd_vals) - - if use_recent_idx: - recent_idx_upd = tf.scatter_update( - self.recent_idx, intended_output, upd_idxs) - else: - recent_idx_upd = tf.group() - - return tf.group(mem_age_upd, mem_key_upd, mem_val_upd, recent_idx_upd) - - def query(self, query_vec, intended_output, use_recent_idx=True): - """Queries memory for nearest neighbor. - - Args: - query_vec: A batch of vectors to query (embedding of input to model). - intended_output: The values that would be the correct output of the - memory. - use_recent_idx: Whether to always insert at least one instance of a - correct memory fetch. - - Returns: - A tuple (result, mask, teacher_loss). - result: The result of the memory look up. - mask: The affinity of the query to the result. - teacher_loss: The loss for training the memory module. - """ - - batch_size = tf.shape(query_vec)[0] - output_given = intended_output is not None - - # prepare query for memory lookup - query_vec = tf.matmul(query_vec, self.query_proj) - normalized_query = tf.nn.l2_normalize(query_vec, dim=1) - - hint_pool_idxs = self.get_hint_pool_idxs(normalized_query) - - if output_given and use_recent_idx: # add at least one correct memory - most_recent_hint_idx = tf.gather(self.recent_idx, intended_output) - hint_pool_idxs = tf.concat( - axis=1, - values=[hint_pool_idxs, tf.expand_dims(most_recent_hint_idx, 1)]) - choose_k = tf.shape(hint_pool_idxs)[1] - - with tf.device(self.var_cache_device): - # create small memory and look up with gradients - my_mem_keys = tf.stop_gradient(tf.gather(self.mem_keys, hint_pool_idxs, - name='my_mem_keys_gather')) - similarities = tf.matmul(tf.expand_dims(normalized_query, 1), - my_mem_keys, adjoint_b=True, name='batch_mmul') - hint_pool_sims = tf.squeeze(similarities, [1], name='hint_pool_sims') - hint_pool_mem_vals = tf.gather(self.mem_vals, hint_pool_idxs, - name='hint_pool_mem_vals') - # Calculate softmax mask on the top-k if requested. - # Softmax temperature. Say we have K elements at dist x and one at (x+a). - # Softmax of the last is e^tm(x+a)/Ke^tm*x + e^tm(x+a) = e^tm*a/K+e^tm*a. - # To make that 20% we'd need to have e^tm*a ~= 0.2K, so tm = log(0.2K)/a. - softmax_temp = max(1.0, np.log(0.2 * self.choose_k) / self.alpha) - mask = tf.nn.softmax(hint_pool_sims[:, :choose_k - 1] * softmax_temp) - - # prepare returned values - nearest_neighbor = tf.to_int32( - tf.argmax(hint_pool_sims[:, :choose_k - 1], 1)) - - no_teacher_idxs = tf.gather( - tf.reshape(hint_pool_idxs, [-1]), - nearest_neighbor + choose_k * tf.range(batch_size)) - - with tf.device(self.var_cache_device): - result = tf.gather(self.mem_vals, tf.reshape(no_teacher_idxs, [-1])) - - if not output_given: - teacher_loss = None - return result, mask, teacher_loss - - # prepare hints from the teacher on hint pool - teacher_hints = tf.to_float( - tf.abs(tf.expand_dims(intended_output, 1) - hint_pool_mem_vals)) - teacher_hints = 1.0 - tf.minimum(1.0, teacher_hints) - - teacher_vals, teacher_hint_idxs = tf.nn.top_k( - hint_pool_sims * teacher_hints, k=1) - neg_teacher_vals, _ = tf.nn.top_k( - hint_pool_sims * (1 - teacher_hints), k=1) - - # bring back idxs to full memory - teacher_idxs = tf.gather( - tf.reshape(hint_pool_idxs, [-1]), - teacher_hint_idxs[:, 0] + choose_k * tf.range(batch_size)) - - # zero-out teacher_vals if there are no hints - teacher_vals *= ( - 1 - tf.to_float(tf.equal(0.0, tf.reduce_sum(teacher_hints, 1)))) - - # we'll determine whether to do an update to memory based on whether - # memory was queried correctly - sliced_hints = tf.slice(teacher_hints, [0, 0], [-1, self.correct_in_top]) - incorrect_memory_lookup = tf.equal(0.0, tf.reduce_sum(sliced_hints, 1)) - - # loss based on triplet loss - teacher_loss = (tf.nn.relu(neg_teacher_vals - teacher_vals + self.alpha) - - self.alpha) - - # prepare memory updates - update_keys = normalized_query - update_vals = intended_output - - fetched_idxs = teacher_idxs # correctly fetched from memory - with tf.device(self.var_cache_device): - fetched_keys = tf.gather(self.mem_keys, fetched_idxs, name='fetched_keys') - fetched_vals = tf.gather(self.mem_vals, fetched_idxs, name='fetched_vals') - - # do memory updates here - fetched_keys_upd = update_keys + fetched_keys # Momentum-like update - fetched_keys_upd = tf.nn.l2_normalize(fetched_keys_upd, dim=1) - # Randomize age a bit, e.g., to select different ones in parallel workers. - mem_age_with_noise = self.mem_age + tf.random_uniform( - [self.memory_size], - self.age_noise, self.age_noise) - - _, oldest_idxs = tf.nn.top_k(mem_age_with_noise, k=batch_size, sorted=False) - - with tf.control_dependencies([result]): - upd_idxs = tf.where(incorrect_memory_lookup, - oldest_idxs, - fetched_idxs) - # upd_idxs = tf.Print(upd_idxs, [upd_idxs], "UPD IDX", summarize=8) - upd_keys = tf.where(incorrect_memory_lookup, - update_keys, - fetched_keys_upd) - upd_vals = tf.where(incorrect_memory_lookup, - update_vals, - fetched_vals) - - def make_update_op(): - return self.make_update_op(upd_idxs, upd_keys, upd_vals, - batch_size, use_recent_idx, intended_output) - - update_op = tf.cond(self.update_memory, make_update_op, tf.no_op) - - with tf.control_dependencies([update_op]): - result = tf.identity(result) - mask = tf.identity(mask) - teacher_loss = tf.identity(teacher_loss) - - return result, mask, tf.reduce_mean(teacher_loss) - - -class LSHMemory(Memory): - """Memory employing locality sensitive hashing. - - Note: Not fully tested. - """ - - def __init__(self, key_dim, memory_size, vocab_size, - choose_k=256, alpha=0.1, correct_in_top=1, age_noise=8.0, - var_cache_device='', nn_device='', - num_hashes=None, num_libraries=None): - super(LSHMemory, self).__init__( - key_dim, memory_size, vocab_size, - choose_k=choose_k, alpha=alpha, correct_in_top=1, age_noise=age_noise, - var_cache_device=var_cache_device, nn_device=nn_device) - - self.num_libraries = num_libraries or int(self.choose_k ** 0.5) - self.num_per_hash_slot = max(1, self.choose_k // self.num_libraries) - self.num_hashes = (num_hashes or - int(np.log2(self.memory_size / self.num_per_hash_slot))) - self.num_hashes = min(max(self.num_hashes, 1), 20) - self.num_hash_slots = 2 ** self.num_hashes - - # hashing vectors - self.hash_vecs = [ - tf.get_variable( - 'hash_vecs%d' % i, [self.num_hashes, self.key_dim], - dtype=tf.float32, trainable=False, - initializer=tf.truncated_normal_initializer(0, 1)) - for i in xrange(self.num_libraries)] - - # map representing which hash slots map to which mem keys - self.hash_slots = [ - tf.get_variable( - 'hash_slots%d' % i, [self.num_hash_slots, self.num_per_hash_slot], - dtype=tf.int32, trainable=False, - initializer=tf.random_uniform_initializer(maxval=self.memory_size, - dtype=tf.int32)) - for i in xrange(self.num_libraries)] - - def get(self): # not implemented - return self.mem_keys, self.mem_vals, self.mem_age, self.recent_idx - - def set(self, k, v, a, r=None): # not implemented - return tf.group( - self.mem_keys.assign(k), - self.mem_vals.assign(v), - self.mem_age.assign(a), - (self.recent_idx.assign(r) if r is not None else tf.group())) - - def clear(self): - return tf.variables_initializer([self.mem_keys, self.mem_vals, self.mem_age, - self.recent_idx] + self.hash_slots) - - def get_hash_slots(self, query): - """Gets hashed-to buckets for batch of queries. - - Args: - query: 2-d Tensor of query vectors. - - Returns: - A list of hashed-to buckets for each hash function. - """ - - binary_hash = [ - tf.less(tf.matmul(query, self.hash_vecs[i], transpose_b=True), 0) - for i in xrange(self.num_libraries)] - hash_slot_idxs = [ - tf.reduce_sum( - tf.to_int32(binary_hash[i]) * - tf.constant([[2 ** i for i in xrange(self.num_hashes)]], - dtype=tf.int32), 1) - for i in xrange(self.num_libraries)] - return hash_slot_idxs - - def get_hint_pool_idxs(self, normalized_query): - """Get small set of idxs to compute nearest neighbor queries on. - - This is an expensive look-up on the whole memory that is used to - avoid more expensive operations later on. - - Args: - normalized_query: A Tensor of shape [None, key_dim]. - - Returns: - A Tensor of shape [None, choose_k] of indices in memory - that are closest to the queries. - - """ - # get hash of query vecs - hash_slot_idxs = self.get_hash_slots(normalized_query) - - # grab mem idxs in the hash slots - hint_pool_idxs = [ - tf.maximum(tf.minimum( - tf.gather(self.hash_slots[i], idxs), - self.memory_size - 1), 0) - for i, idxs in enumerate(hash_slot_idxs)] - - return tf.concat(axis=1, values=hint_pool_idxs) - - def make_update_op(self, upd_idxs, upd_keys, upd_vals, - batch_size, use_recent_idx, intended_output): - """Function that creates all the update ops.""" - base_update_op = super(LSHMemory, self).make_update_op( - upd_idxs, upd_keys, upd_vals, - batch_size, use_recent_idx, intended_output) - - # compute hash slots to be updated - hash_slot_idxs = self.get_hash_slots(upd_keys) - - # make updates - update_ops = [] - with tf.control_dependencies([base_update_op]): - for i, slot_idxs in enumerate(hash_slot_idxs): - # for each slot, choose which entry to replace - entry_idx = tf.random_uniform([batch_size], - maxval=self.num_per_hash_slot, - dtype=tf.int32) - entry_mul = 1 - tf.one_hot(entry_idx, self.num_per_hash_slot, - dtype=tf.int32) - entry_add = (tf.expand_dims(upd_idxs, 1) * - tf.one_hot(entry_idx, self.num_per_hash_slot, - dtype=tf.int32)) - - mul_op = tf.scatter_mul(self.hash_slots[i], slot_idxs, entry_mul) - with tf.control_dependencies([mul_op]): - add_op = tf.scatter_add(self.hash_slots[i], slot_idxs, entry_add) - update_ops.append(add_op) - - return tf.group(*update_ops) diff --git a/research/learning_to_remember_rare_events/model.py b/research/learning_to_remember_rare_events/model.py deleted file mode 100644 index 7a6b460047fda3349c04d0e024c035f69a300461..0000000000000000000000000000000000000000 --- a/research/learning_to_remember_rare_events/model.py +++ /dev/null @@ -1,302 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -"""Model using memory component. - -The model embeds images using a standard CNN architecture. -These embeddings are used as keys to the memory component, -which returns nearest neighbors. -""" - -import tensorflow as tf - -import memory - -FLAGS = tf.flags.FLAGS - - -class BasicClassifier(object): - - def __init__(self, output_dim): - self.output_dim = output_dim - - def core_builder(self, memory_val, x, y): - del x, y - y_pred = memory_val - loss = 0.0 - - return loss, y_pred - - -class LeNet(object): - """Standard CNN architecture.""" - - def __init__(self, image_size, num_channels, hidden_dim): - self.image_size = image_size - self.num_channels = num_channels - self.hidden_dim = hidden_dim - self.matrix_init = tf.truncated_normal_initializer(stddev=0.1) - self.vector_init = tf.constant_initializer(0.0) - - def core_builder(self, x): - """Embeds x using standard CNN architecture. - - Args: - x: Batch of images as a 2-d Tensor [batch_size, -1]. - - Returns: - A 2-d Tensor [batch_size, hidden_dim] of embedded images. - """ - - ch1 = 32 * 2 # number of channels in 1st layer - ch2 = 64 * 2 # number of channels in 2nd layer - conv1_weights = tf.get_variable('conv1_w', - [3, 3, self.num_channels, ch1], - initializer=self.matrix_init) - conv1_biases = tf.get_variable('conv1_b', [ch1], - initializer=self.vector_init) - conv1a_weights = tf.get_variable('conv1a_w', - [3, 3, ch1, ch1], - initializer=self.matrix_init) - conv1a_biases = tf.get_variable('conv1a_b', [ch1], - initializer=self.vector_init) - - conv2_weights = tf.get_variable('conv2_w', [3, 3, ch1, ch2], - initializer=self.matrix_init) - conv2_biases = tf.get_variable('conv2_b', [ch2], - initializer=self.vector_init) - conv2a_weights = tf.get_variable('conv2a_w', [3, 3, ch2, ch2], - initializer=self.matrix_init) - conv2a_biases = tf.get_variable('conv2a_b', [ch2], - initializer=self.vector_init) - - # fully connected - fc1_weights = tf.get_variable( - 'fc1_w', [self.image_size // 4 * self.image_size // 4 * ch2, - self.hidden_dim], initializer=self.matrix_init) - fc1_biases = tf.get_variable('fc1_b', [self.hidden_dim], - initializer=self.vector_init) - - # define model - x = tf.reshape(x, - [-1, self.image_size, self.image_size, self.num_channels]) - batch_size = tf.shape(x)[0] - - conv1 = tf.nn.conv2d(x, conv1_weights, - strides=[1, 1, 1, 1], padding='SAME') - relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases)) - conv1 = tf.nn.conv2d(relu1, conv1a_weights, - strides=[1, 1, 1, 1], padding='SAME') - relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1a_biases)) - - pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], padding='SAME') - - conv2 = tf.nn.conv2d(pool1, conv2_weights, - strides=[1, 1, 1, 1], padding='SAME') - relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) - conv2 = tf.nn.conv2d(relu2, conv2a_weights, - strides=[1, 1, 1, 1], padding='SAME') - relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2a_biases)) - - pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], padding='SAME') - - reshape = tf.reshape(pool2, [batch_size, -1]) - hidden = tf.matmul(reshape, fc1_weights) + fc1_biases - - return hidden - - -class Model(object): - """Model for coordinating between CNN embedder and Memory module.""" - - def __init__(self, input_dim, output_dim, rep_dim, memory_size, vocab_size, - learning_rate=0.0001, use_lsh=False): - self.input_dim = input_dim - self.output_dim = output_dim - self.rep_dim = rep_dim - self.memory_size = memory_size - self.vocab_size = vocab_size - self.learning_rate = learning_rate - self.use_lsh = use_lsh - - self.embedder = self.get_embedder() - self.memory = self.get_memory() - self.classifier = self.get_classifier() - - self.global_step = tf.train.get_or_create_global_step() - - def get_embedder(self): - return LeNet(int(self.input_dim ** 0.5), 1, self.rep_dim) - - def get_memory(self): - cls = memory.LSHMemory if self.use_lsh else memory.Memory - return cls(self.rep_dim, self.memory_size, self.vocab_size) - - def get_classifier(self): - return BasicClassifier(self.output_dim) - - def core_builder(self, x, y, keep_prob, use_recent_idx=True): - embeddings = self.embedder.core_builder(x) - if keep_prob < 1.0: - embeddings = tf.nn.dropout(embeddings, keep_prob) - memory_val, _, teacher_loss = self.memory.query( - embeddings, y, use_recent_idx=use_recent_idx) - loss, y_pred = self.classifier.core_builder(memory_val, x, y) - - return loss + teacher_loss, y_pred - - def train(self, x, y): - loss, _ = self.core_builder(x, y, keep_prob=0.3) - gradient_ops = self.training_ops(loss) - return loss, gradient_ops - - def eval(self, x, y): - _, y_preds = self.core_builder(x, y, keep_prob=1.0, - use_recent_idx=False) - return y_preds - - def get_xy_placeholders(self): - return (tf.placeholder(tf.float32, [None, self.input_dim]), - tf.placeholder(tf.int32, [None])) - - def setup(self): - """Sets up all components of the computation graph.""" - - self.x, self.y = self.get_xy_placeholders() - - # This context creates variables - with tf.variable_scope('core', reuse=None): - self.loss, self.gradient_ops = self.train(self.x, self.y) - # And this one re-uses them (thus the `reuse=True`) - with tf.variable_scope('core', reuse=True): - self.y_preds = self.eval(self.x, self.y) - - def training_ops(self, loss): - opt = self.get_optimizer() - params = tf.trainable_variables() - gradients = tf.gradients(loss, params) - clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) - return opt.apply_gradients(zip(clipped_gradients, params), - global_step=self.global_step) - - def get_optimizer(self): - return tf.train.AdamOptimizer(learning_rate=self.learning_rate, - epsilon=1e-4) - - def one_step(self, sess, x, y): - outputs = [self.loss, self.gradient_ops] - return sess.run(outputs, feed_dict={self.x: x, self.y: y}) - - def episode_step(self, sess, x, y, clear_memory=False): - """Performs training steps on episodic input. - - Args: - sess: A Tensorflow Session. - x: A list of batches of images defining the episode. - y: A list of batches of labels corresponding to x. - clear_memory: Whether to clear the memory before the episode. - - Returns: - List of losses the same length as the episode. - """ - - outputs = [self.loss, self.gradient_ops] - - if clear_memory: - self.clear_memory(sess) - - losses = [] - for xx, yy in zip(x, y): - out = sess.run(outputs, feed_dict={self.x: xx, self.y: yy}) - loss = out[0] - losses.append(loss) - - return losses - - def predict(self, sess, x, y=None): - """Predict the labels on a single batch of examples. - - Args: - sess: A Tensorflow Session. - x: A batch of images. - y: The labels for the images in x. - This allows for updating the memory. - - Returns: - Predicted y. - """ - - # Storing current memory state to restore it after prediction - mem_keys, mem_vals, mem_age, _ = self.memory.get() - cur_memory = ( - tf.identity(mem_keys), - tf.identity(mem_vals), - tf.identity(mem_age), - None, - ) - - outputs = [self.y_preds] - if y is None: - ret = sess.run(outputs, feed_dict={self.x: x}) - else: - ret = sess.run(outputs, feed_dict={self.x: x, self.y: y}) - - # Restoring memory state - self.memory.set(*cur_memory) - - return ret - - def episode_predict(self, sess, x, y, clear_memory=False): - """Predict the labels on an episode of examples. - - Args: - sess: A Tensorflow Session. - x: A list of batches of images. - y: A list of labels for the images in x. - This allows for updating the memory. - clear_memory: Whether to clear the memory before the episode. - - Returns: - List of predicted y. - """ - - # Storing current memory state to restore it after prediction - mem_keys, mem_vals, mem_age, _ = self.memory.get() - cur_memory = ( - tf.identity(mem_keys), - tf.identity(mem_vals), - tf.identity(mem_age), - None, - ) - - if clear_memory: - self.clear_memory(sess) - - outputs = [self.y_preds] - y_preds = [] - for xx, yy in zip(x, y): - out = sess.run(outputs, feed_dict={self.x: xx, self.y: yy}) - y_pred = out[0] - y_preds.append(y_pred) - - # Restoring memory state - self.memory.set(*cur_memory) - - return y_preds - - def clear_memory(self, sess): - sess.run([self.memory.clear()]) diff --git a/research/learning_to_remember_rare_events/train.py b/research/learning_to_remember_rare_events/train.py deleted file mode 100644 index c5c6d06b5ee02e73128ee2b23f3b399d29b1e212..0000000000000000000000000000000000000000 --- a/research/learning_to_remember_rare_events/train.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -r"""Script for training model. - -Simple command to get up and running: - python train.py --memory_size=8192 \ - --batch_size=16 --validation_length=50 \ - --episode_width=5 --episode_length=30 -""" - -import logging -import os -import random - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import data_utils -import model - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_integer('rep_dim', 128, - 'dimension of keys to use in memory') -tf.flags.DEFINE_integer('episode_length', 100, 'length of episode') -tf.flags.DEFINE_integer('episode_width', 5, - 'number of distinct labels in a single episode') -tf.flags.DEFINE_integer('memory_size', None, 'number of slots in memory. ' - 'Leave as None to default to episode length') -tf.flags.DEFINE_integer('batch_size', 16, 'batch size') -tf.flags.DEFINE_integer('num_episodes', 100000, 'number of training episodes') -tf.flags.DEFINE_integer('validation_frequency', 20, - 'every so many training episodes, ' - 'assess validation accuracy') -tf.flags.DEFINE_integer('validation_length', 10, - 'number of episodes to use to compute ' - 'validation accuracy') -tf.flags.DEFINE_integer('seed', 888, 'random seed for training sampling') -tf.flags.DEFINE_string('save_dir', '', 'directory to save model to') -tf.flags.DEFINE_bool('use_lsh', False, - 'use locality-sensitive hashing ' - '(NOTE: not fully tested)') - - -class Trainer(object): - """Class that takes care of training, validating, and checkpointing model.""" - - def __init__(self, train_data, valid_data, input_dim, output_dim=None): - self.train_data = train_data - self.valid_data = valid_data - self.input_dim = input_dim - - self.rep_dim = FLAGS.rep_dim - self.episode_length = FLAGS.episode_length - self.episode_width = FLAGS.episode_width - self.batch_size = FLAGS.batch_size - self.memory_size = (self.episode_length * self.batch_size - if FLAGS.memory_size is None else FLAGS.memory_size) - self.use_lsh = FLAGS.use_lsh - - self.output_dim = (output_dim if output_dim is not None - else self.episode_width) - - def get_model(self): - # vocab size is the number of distinct values that - # could go into the memory key-value storage - vocab_size = self.episode_width * self.batch_size - return model.Model( - self.input_dim, self.output_dim, self.rep_dim, self.memory_size, - vocab_size, use_lsh=self.use_lsh) - - def sample_episode_batch(self, data, - episode_length, episode_width, batch_size): - """Generates a random batch for training or validation. - - Structures each element of the batch as an 'episode'. - Each episode contains episode_length examples and - episode_width distinct labels. - - Args: - data: A dictionary mapping label to list of examples. - episode_length: Number of examples in each episode. - episode_width: Distinct number of labels in each episode. - batch_size: Batch size (number of episodes). - - Returns: - A tuple (x, y) where x is a list of batches of examples - with size episode_length and y is a list of batches of labels. - """ - - episodes_x = [[] for _ in xrange(episode_length)] - episodes_y = [[] for _ in xrange(episode_length)] - assert len(data) >= episode_width - keys = data.keys() - for b in xrange(batch_size): - episode_labels = random.sample(keys, episode_width) - remainder = episode_length % episode_width - remainders = [0] * (episode_width - remainder) + [1] * remainder - episode_x = [ - random.sample(data[lab], - r + (episode_length - remainder) // episode_width) - for lab, r in zip(episode_labels, remainders)] - episode = sum([[(x, i, ii) for ii, x in enumerate(xx)] - for i, xx in enumerate(episode_x)], []) - random.shuffle(episode) - # Arrange episode so that each distinct label is seen before moving to - # 2nd showing - episode.sort(key=lambda elem: elem[2]) - assert len(episode) == episode_length - for i in xrange(episode_length): - episodes_x[i].append(episode[i][0]) - episodes_y[i].append(episode[i][1] + b * episode_width) - - return ([np.array(xx).astype('float32') for xx in episodes_x], - [np.array(yy).astype('int32') for yy in episodes_y]) - - def compute_correct(self, ys, y_preds): - return np.mean(np.equal(y_preds, np.array(ys))) - - def individual_compute_correct(self, y, y_pred): - return y_pred == y - - def run(self): - """Performs training. - - Trains a model using episodic training. - Every so often, runs some evaluations on validation data. - """ - - train_data, valid_data = self.train_data, self.valid_data - input_dim, output_dim = self.input_dim, self.output_dim - rep_dim, episode_length = self.rep_dim, self.episode_length - episode_width, memory_size = self.episode_width, self.memory_size - batch_size = self.batch_size - - train_size = len(train_data) - valid_size = len(valid_data) - logging.info('train_size (number of labels) %d', train_size) - logging.info('valid_size (number of labels) %d', valid_size) - logging.info('input_dim %d', input_dim) - logging.info('output_dim %d', output_dim) - logging.info('rep_dim %d', rep_dim) - logging.info('episode_length %d', episode_length) - logging.info('episode_width %d', episode_width) - logging.info('memory_size %d', memory_size) - logging.info('batch_size %d', batch_size) - - assert all(len(v) >= float(episode_length) / episode_width - for v in train_data.values()) - assert all(len(v) >= float(episode_length) / episode_width - for v in valid_data.values()) - - output_dim = episode_width - self.model = self.get_model() - self.model.setup() - - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - - saver = tf.train.Saver(max_to_keep=10) - ckpt = None - if FLAGS.save_dir: - ckpt = tf.train.get_checkpoint_state(FLAGS.save_dir) - if ckpt and ckpt.model_checkpoint_path: - logging.info('restoring from %s', ckpt.model_checkpoint_path) - saver.restore(sess, ckpt.model_checkpoint_path) - - logging.info('starting now') - losses = [] - random.seed(FLAGS.seed) - np.random.seed(FLAGS.seed) - for i in xrange(FLAGS.num_episodes): - x, y = self.sample_episode_batch( - train_data, episode_length, episode_width, batch_size) - outputs = self.model.episode_step(sess, x, y, clear_memory=True) - loss = outputs - losses.append(loss) - - if i % FLAGS.validation_frequency == 0: - logging.info('episode batch %d, avg train loss %f', - i, np.mean(losses)) - losses = [] - - # validation - correct = [] - num_shots = episode_length // episode_width - correct_by_shot = dict((k, []) for k in xrange(num_shots)) - for _ in xrange(FLAGS.validation_length): - x, y = self.sample_episode_batch( - valid_data, episode_length, episode_width, 1) - outputs = self.model.episode_predict( - sess, x, y, clear_memory=True) - y_preds = outputs - correct.append(self.compute_correct(np.array(y), y_preds)) - - # compute per-shot accuracies - seen_counts = [0] * episode_width - # loop over episode steps - for yy, yy_preds in zip(y, y_preds): - # loop over batch examples - yyy, yyy_preds = int(yy[0]), int(yy_preds[0]) - count = seen_counts[yyy % episode_width] - if count in correct_by_shot: - correct_by_shot[count].append( - self.individual_compute_correct(yyy, yyy_preds)) - seen_counts[yyy % episode_width] = count + 1 - - logging.info('validation overall accuracy %f', np.mean(correct)) - logging.info('%d-shot: %.3f, ' * num_shots, - *sum([[k, np.mean(correct_by_shot[k])] - for k in xrange(num_shots)], [])) - - if saver and FLAGS.save_dir: - saved_file = saver.save(sess, - os.path.join(FLAGS.save_dir, 'model.ckpt'), - global_step=self.model.global_step) - logging.info('saved model to %s', saved_file) - - -def main(unused_argv): - train_data, valid_data = data_utils.get_data() - trainer = Trainer(train_data, valid_data, data_utils.IMAGE_NEW_SIZE ** 2) - trainer.run() - - -if __name__ == '__main__': - logging.basicConfig(level=logging.INFO) - tf.app.run() diff --git a/research/learning_unsupervised_learning/.gitignore b/research/learning_unsupervised_learning/.gitignore deleted file mode 100644 index 0d20b6487c61e7d1bde93acf4a14b7a89083a16d..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/research/learning_unsupervised_learning/README.md b/research/learning_unsupervised_learning/README.md deleted file mode 100644 index 0e38717f5de29df28959062889abeb1ce578feea..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/README.md +++ /dev/null @@ -1,40 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Learning Unsupervised Learning Rules -This repository contains code and weights for the learned update rule -presented in "Learning Unsupervised Learning Rules." At this time, this -code can not meta-train the update rule. - -### Structure -`run_eval.py` contains the main training loop. This constructs an op -that runs one iteration of the learned update rule and assigns the -results to variables. Additionally, it loads the weights from our -pre-trained model. - -The base model and the update rule architecture definition can be found in -`architectures/more_local_weight_update.py`. For a complete description -of the model, see our [paper](https://arxiv.org/abs/1804.00222). - -### Dependencies -[absl]([https://github.com/abseil/abseil-py), [tensorflow](https://tensorflow.org), [sonnet](https://github.com/deepmind/sonnet) - -### Usage - -First, download the [pre-trained optimizer model weights](https://storage.googleapis.com/learning_unsupervised_learning/200_tf_graph.zip) and extract it. - -```bash -# move to the folder above this folder -cd path_to/research/learning_unsupervised_learning/../ - -# launch the eval script -python -m learning_unsupervised_learning.run_eval \ ---train_log_dir="/tmp/learning_unsupervised_learning" \ ---checkpoint_dir="/path/to/downloaded/model/tf_graph_data.ckpt" -``` - -### Contact -Luke Metz, Niru Maheswaranathan, Github: @lukemetz, @nirum. Email: {lmetz, nirum}@google.com - - diff --git a/research/learning_unsupervised_learning/__init__.py b/research/learning_unsupervised_learning/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/learning_unsupervised_learning/architectures/__init__.py b/research/learning_unsupervised_learning/architectures/__init__.py deleted file mode 100644 index af9545f26da538aa986b19a96b6cfa2bc7459227..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/architectures/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -import more_local_weight_update diff --git a/research/learning_unsupervised_learning/architectures/common.py b/research/learning_unsupervised_learning/architectures/common.py deleted file mode 100644 index 43a2d4f8965ecd337abd3a072a7ecb789df21910..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/architectures/common.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sonnet as snt -import tensorflow as tf -import numpy as np -import collections -from learning_unsupervised_learning import utils - -from tensorflow.python.util import nest - -from learning_unsupervised_learning import variable_replace - - -class LinearBatchNorm(snt.AbstractModule): - """Module that does a Linear layer then a BatchNorm followed by an activation fn""" - def __init__(self, size, activation_fn=tf.nn.relu, name="LinearBatchNorm"): - self.size = size - self.activation_fn = activation_fn - super(LinearBatchNorm, self).__init__(name=name) - - def _build(self, x): - x = tf.to_float(x) - initializers={"w": tf.truncated_normal_initializer(stddev=0.01)} - lin = snt.Linear(self.size, use_bias=False, initializers=initializers) - z = lin(x) - - scale = tf.constant(1., dtype=tf.float32) - offset = tf.get_variable( - "b", - shape=[1, z.shape.as_list()[1]], - initializer=tf.truncated_normal_initializer(stddev=0.1), - dtype=tf.float32 - ) - - mean, var = tf.nn.moments(z, [0], keep_dims=True) - z = ((z - mean) * tf.rsqrt(var + 1e-6)) * scale + offset - - x_p = self.activation_fn(z) - - return z, x_p - - # This needs to work by string name sadly due to how the variable replace - # works and would also work even if the custom getter approuch was used. - # This is verbose, but it should atleast be clear as to what is going on. - # TODO(lmetz) a better way to do this (the next 3 functions: - # _raw_name, w(), b() ) - def _raw_name(self, var_name): - """Return just the name of the variable, not the scopes.""" - return var_name.split("/")[-1].split(":")[0] - - - @property - def w(self): - var_list = snt.get_variables_in_module(self) - w = [x for x in var_list if self._raw_name(x.name) == "w"] - assert len(w) == 1 - return w[0] - - @property - def b(self): - var_list = snt.get_variables_in_module(self) - b = [x for x in var_list if self._raw_name(x.name) == "b"] - assert len(b) == 1 - return b[0] - - - -class Linear(snt.AbstractModule): - def __init__(self, size, use_bias=True, init_const_mag=True): - self.size = size - self.use_bias = use_bias - self.init_const_mag = init_const_mag - super(Linear, self).__init__(name="commonLinear") - - def _build(self, x): - if self.init_const_mag: - initializers={"w": tf.truncated_normal_initializer(stddev=0.01)} - else: - initializers={} - lin = snt.Linear(self.size, use_bias=self.use_bias, initializers=initializers) - z = lin(x) - return z - - # This needs to work by string name sadly due to how the variable replace - # works and would also work even if the custom getter approuch was used. - # This is verbose, but it should atleast be clear as to what is going on. - # TODO(lmetz) a better way to do this (the next 3 functions: - # _raw_name, w(), b() ) - def _raw_name(self, var_name): - """Return just the name of the variable, not the scopes.""" - return var_name.split("/")[-1].split(":")[0] - - @property - def w(self): - var_list = snt.get_variables_in_module(self) - if self.use_bias: - assert len(var_list) == 2, "Found not 2 but %d" % len(var_list) - else: - assert len(var_list) == 1, "Found not 1 but %d" % len(var_list) - w = [x for x in var_list if self._raw_name(x.name) == "w"] - assert len(w) == 1 - return w[0] - - @property - def b(self): - var_list = snt.get_variables_in_module(self) - assert len(var_list) == 2, "Found not 2 but %d" % len(var_list) - b = [x for x in var_list if self._raw_name(x.name) == "b"] - assert len(b) == 1 - return b[0] - - -def transformer_at_state(base_model, new_variables): - """Get the base_model that has been transformed to use the variables - in final_state. - Args: - base_model: snt.Module - Goes from batch to features - new_variables: list - New list of variables to use - Returns: - func: callable of same api as base_model. - """ - assert not variable_replace.in_variable_replace_scope() - - def _feature_transformer(input_data): - """Feature transformer at the end of training.""" - initial_variables = base_model.get_variables() - replacement = collections.OrderedDict( - utils.eqzip(initial_variables, new_variables)) - with variable_replace.variable_replace(replacement): - features = base_model(input_data) - return features - - return _feature_transformer diff --git a/research/learning_unsupervised_learning/architectures/more_local_weight_update.py b/research/learning_unsupervised_learning/architectures/more_local_weight_update.py deleted file mode 100644 index 117549af0f21f9e5148435b73f664a08013f8786..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/architectures/more_local_weight_update.py +++ /dev/null @@ -1,861 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import numpy as np -import sonnet as snt -import tensorflow as tf - -from learning_unsupervised_learning.architectures import common -from learning_unsupervised_learning import optimizers -from learning_unsupervised_learning import utils -from learning_unsupervised_learning import summary_utils - -OptState = collections.namedtuple('OptState', - ['variables', 'opt_state', 'index']) - -BaseModelOutputs = collections.namedtuple( - 'BaseModelOutputs', ['xs', 'zs', 'mods', 'batch', 'backward_mods']) - - -class GradChannelReadout(snt.AbstractModule): - """Perform a linear readout and reshape from input 3 tensor.""" - - def __init__(self, - num_grad_channels, - device, - perm=(2, 0, 1), - name='GradChannelReadout'): - """Args: - - num_grad_channels: int - number of channels to readout to. - device: str or callable - devicwe to place weights. - perm: list or tuple - transpose applied. - """ - - self.num_grad_channels = num_grad_channels - self.device = device - self.perm = perm - super(GradChannelReadout, self).__init__(name=name) - - def _build(self, h): - with tf.device(self.device): - mod = snt.Linear(self.num_grad_channels) - ret = snt.BatchApply(mod)(h) - # return as [num_grad_channels] x [bs] x [num units] - return tf.transpose(ret, perm=self.perm) - - -def get_weight_stats(x, axis): - """ Compute weight statistics over the given axis. - - Args: - x: tf.Tensor - a batch of activations. - axis: int - axis to perform statistics over. - Returns: - tf.Tensor - a 3-D tensor with statistics. - """ - if x is None: - return [] - - stats = [] - l1 = tf.reduce_mean(tf.abs(x), axis=axis) - l2 = tf.sqrt(tf.reduce_mean(x**2, axis=axis) + 1e-6) - - mean, var = tf.nn.moments(x, [axis]) - stats.extend([l1, l2, mean, tf.sqrt(var + 1e-8)]) - - stats = [tf.reshape(s, [-1, 1, 1]) for s in stats] - - return stats - - -class AddUnitBatchStatistics(snt.AbstractModule): - """Compute some number of statistics over units and concat them on.""" - - def __init__(self, name='AddUnitBatchStatistics'): - super(AddUnitBatchStatistics, self).__init__(name=name) - - def _build(self, x): - # [channel, bs, 1] - output = x - for d in [0, 1]: - stats = [] - l1 = tf.reduce_mean(tf.abs(x), axis=d, keepdims=True) - l2 = tf.sqrt(tf.reduce_mean(x**2, axis=d, keepdims=True) + 1e-6) - - mean, var = tf.nn.moments(x, [d], keepdims=True) - stats.extend([l1, l2, mean, tf.sqrt(var + 1e-8)]) - - to_add = tf.concat(stats, axis=2) # [channels/1, units/1, stats] - output += snt.BatchApply(snt.Linear(x.shape.as_list()[2]))(to_add) - return output - - -class ConcatUnitConv(snt.AbstractModule): - """Do a small number of convolutions over units and concat / add them on.""" - - def __init__(self, add=True): - self.add = add - super(ConcatUnitConv, self).__init__(name='ConcatUnitConv') - - def _build(self, x): - # x is [units, bs, 1] - net = tf.transpose(x, [1, 0, 2]) # now [bs x units x 1] - channels = x.shape.as_list()[2] - mod = snt.Conv1D(output_channels=channels, kernel_shape=[3]) - net = mod(net) - net = snt.BatchNorm(axis=[0, 1])(net, is_training=False) - net = tf.nn.relu(net) - mod = snt.Conv1D(output_channels=channels, kernel_shape=[3]) - net = mod(net) - net = snt.BatchNorm(axis=[0, 1])(net, is_training=False) - net = tf.nn.relu(net) - to_concat = tf.transpose(net, [1, 0, 2]) - if self.add: - return x + to_concat - else: - return tf.concat([x, to_concat], 2) - - -class MoreLocalWeightUpdateProcess(snt.AbstractModule): - - def __init__( - self, - remote_device, - local_device, - top_delta_size=64, - top_delta_layers=2, - compute_h_size=64, - compute_h_layers=1, - delta_dim=32, - num_grad_channels=4, - normalize_epsilon=1., - ): - self.local_device = local_device - self.remote_device = remote_device - self.top_delta_size = top_delta_size - self.top_delta_layers = top_delta_layers - self.compute_h_size = compute_h_size - self.compute_h_layers = compute_h_layers - self.delta_dim = delta_dim - self.num_grad_channels = num_grad_channels - self.normalize_epsilon = normalize_epsilon, - - with tf.device(local_device): - self.opt = optimizers.UnrollableGradientDescentRollingOptimizer( - learning_rate=1e-4) - - # lazily initialized for readouts - self.readout_mods = {} - - super(MoreLocalWeightUpdateProcess, - self).__init__(name='MoreLocalWeightUpdateProcess') - - with tf.device(remote_device): - self() - - def normalize(self, change_w, normalize_epsilon=None): - if normalize_epsilon is None: - normalize_epsilon = self.normalize_epsilon - - # normalize the weights per receptive-field, rather than per-matrix - var = tf.reduce_mean(tf.square(change_w), axis=0, keepdims=True) - change_w = (change_w) / tf.sqrt(normalize_epsilon + var) - return change_w - - def _build(self): - pass - - @snt.reuse_variables - def compute_top_delta(self, z): - """ parameterization of topD. This converts the top level activation - to an error signal. - Args: - z: tf.Tensor - batch of final layer post activations - Returns - delta: tf.Tensor - the error signal - """ - s_idx = 0 - with tf.variable_scope('compute_top_delta'), tf.device(self.remote_device): - # typically this takes [BS, length, input_channels], - # We are applying this such that we convolve over the batch dimension. - act = tf.expand_dims(tf.transpose(z, [1, 0]), 2) # [channels, BS, 1] - - mod = snt.Conv1D(output_channels=self.top_delta_size, kernel_shape=[5]) - act = mod(act) - - act = snt.BatchNorm(axis=[0, 1])(act, is_training=False) - act = tf.nn.relu(act) - - bs = act.shape.as_list()[0] - act = tf.transpose(act, [2, 1, 0]) - act = snt.Conv1D(output_channels=bs, kernel_shape=[3])(act) - act = snt.BatchNorm(axis=[0, 1])(act, is_training=False) - act = tf.nn.relu(act) - act = snt.Conv1D(output_channels=bs, kernel_shape=[3])(act) - act = snt.BatchNorm(axis=[0, 1])(act, is_training=False) - act = tf.nn.relu(act) - act = tf.transpose(act, [2, 1, 0]) - - prev_act = act - for i in range(self.top_delta_layers): - mod = snt.Conv1D(output_channels=self.top_delta_size, kernel_shape=[3]) - act = mod(act) - - act = snt.BatchNorm(axis=[0, 1])(act, is_training=False) - act = tf.nn.relu(act) - - prev_act = act - - mod = snt.Conv1D(output_channels=self.delta_dim, kernel_shape=[3]) - act = mod(act) - - # [bs, feature_channels, delta_channels] - act = tf.transpose(act, [1, 0, 2]) - return act - - @snt.reuse_variables - def compute_h(self, - x, - z, - d, - bias, - W_bot, - W_top, - compute_perc=1.0, - compute_units=None): - """z = [BS, n_units] a = [BS, n_units] b = [BS, n_units] d = [BS, n_units, delta_channels] - - """ - - s_idx = 0 - if compute_perc != 1.0: - assert compute_units is None - - with tf.device(self.remote_device): - inp_feat = [x, z] - inp_feat = [tf.transpose(f, [1, 0]) for f in inp_feat] - - units = x.shape.as_list()[1] - bs = x.shape.as_list()[0] - - # add unit ID, to help the network differentiate units - id_theta = tf.linspace(0., (4) * np.pi, units) - assert bs is not None - id_theta_bs = tf.reshape(id_theta, [-1, 1]) * tf.ones([1, bs]) - inp_feat += [tf.sin(id_theta_bs), tf.cos(id_theta_bs)] - - # list of [units, BS, 1] - inp_feat = [tf.expand_dims(f, 2) for f in inp_feat] - - d_trans = tf.transpose(d, [1, 0, 2]) - - if compute_perc != 1.0: - compute_units = int(compute_perc * inp_feat.shape.as_list()[0]) - - # add weight matrix statistics, both from above and below - w_stats_bot = get_weight_stats(W_bot, 0) - w_stats_top = get_weight_stats(W_top, 1) - w_stats = w_stats_bot + w_stats_top - if W_bot is None or W_top is None: - # if it's an edge layer (top or bottom), just duplicate the stats for - # the weight matrix that does exist - w_stats = w_stats + w_stats - w_stats = [tf.ones([1, x.shape[0], 1]) * ww for ww in w_stats] - # w_stats is a list, with entries with shape UNITS x 1 x channels - - if compute_units is None: - inp_feat_in = inp_feat - d_trans_in = d_trans - w_stats_in = w_stats - bias_in = tf.transpose(bias) - else: - # only run on a subset of the activations. - mask = tf.random_uniform( - minval=0, - maxval=1, - dtype=tf.float32, - shape=inp_feat[0].shape.as_list()[0:1]) - _, ind = tf.nn.top_k(mask, k=compute_units) - ind = tf.reshape(ind, [-1, 1]) - - inp_feat_in = [tf.gather_nd(xx, ind) for xx in inp_feat] - w_stats_in = [tf.gather_nd(xx, ind) for xx in w_stats] - d_trans_in = tf.gather_nd(d_trans, ind) - bias_in = tf.gather_nd(tf.transpose(bias), ind) - - w_stats_in = tf.concat(w_stats_in, 2) - w_stats_in_norm = w_stats_in * tf.rsqrt( - tf.reduce_mean(w_stats_in**2) + 1e-6) - - act = tf.concat(inp_feat_in + [d_trans_in], 2) - act = snt.BatchNorm(axis=[0, 1])(act, is_training=True) - - bias_dense = tf.reshape(bias_in, [-1, 1, 1]) * tf.ones([1, bs, 1]) - act = tf.concat([w_stats_in_norm, bias_dense, act], 2) - - mod = snt.Conv1D(output_channels=self.compute_h_size, kernel_shape=[3]) - act = mod(act) - - act = snt.BatchNorm(axis=[0, 1])(act, is_training=True) - act = tf.nn.relu(act) - - act2 = ConcatUnitConv()(act) - act = act2 - - prev_act = act - for i in range(self.compute_h_layers): - mod = snt.Conv1D(output_channels=self.compute_h_size, kernel_shape=[3]) - act = mod(act) - - act = snt.BatchNorm(axis=[0, 1])(act, is_training=True) - act = tf.nn.relu(act) - - act = ConcatUnitConv()(act) - - prev_act = act - - h = act - if compute_units is not None: - shape = inp_feat[0].shape.as_list()[:1] + h.shape.as_list()[1:] - h = tf.scatter_nd(ind, h, shape=shape) - - h = tf.transpose(h, [1, 0, 2]) # [bs, units, channels] - - return h - - ## wrappers to allow forward and backward to have different variables - @snt.reuse_variables - def merge_change_w_forward(self, change_w_terms, global_prefix='', prefix=''): - return self.merge_change_w( - change_w_terms, global_prefix=global_prefix, prefix=prefix) - - @snt.reuse_variables - def merge_change_w_backward(self, change_w_terms, global_prefix='', - prefix=''): - return self.merge_change_w( - change_w_terms, global_prefix=global_prefix, prefix=prefix) - - def merge_change_w(self, change_w_terms, global_prefix='', prefix=''): - with tf.device( - self.remote_device), tf.name_scope(global_prefix + '_merge_change_w'): - w_base = change_w_terms['w_base'] - - for kk in sorted(change_w_terms.keys()): - name = global_prefix + 'change_w_plane_%s' % kk - delta_w = change_w_terms[kk] - mean, var = tf.nn.moments(delta_w, [0, 1]) - root_mean_square = tf.sqrt(tf.reduce_mean(delta_w**2) + 1e-6) - - for kk in sorted(change_w_terms.keys()): - change_w_terms[kk] = self.normalize(change_w_terms[kk]) - - initializers = { - 'w': tf.constant_initializer(0.1), - 'b': tf.zeros_initializer() - } - mod = snt.Linear( - 1, - name=global_prefix + '_weight_readout_coeffs', - initializers=initializers) - - change_w_terms_list = [ - change_w_terms[kk] for kk in sorted(change_w_terms.keys()) - ] - stack_terms = tf.stack(change_w_terms_list, axis=-1) - change_w = tf.squeeze( - snt.BatchApply(mod)(stack_terms), axis=-1) / len(change_w_terms) - - # only allow perpendicular updates, or updates which grow length. don't - # allow length to decay towards zero. - ip = tf.reduce_mean(change_w * w_base) - # zero out any updates that shrink length - ip = tf.nn.relu(ip) - change_w -= w_base * ip - change_w /= tf.sqrt(len(change_w_terms) * 1.) - - change_w = self.normalize(change_w) - - # encourage the receptive field to not collapse to 0 - change_w -= w_base / 7. # This is an arbitrary scale choice - - return tf.identity(change_w) - - @snt.reuse_variables - def bias_readout(self, h): - with tf.device(self.remote_device): - mod = snt.Linear(1, name='bias_readout') - ret = snt.BatchApply(mod)(h) - return tf.squeeze(ret, 2) - - @snt.reuse_variables - def next_delta(self, z, h, d): - with tf.device(self.remote_device): - return d * tf.expand_dims(tf.nn.sigmoid(z), 2) + self.to_delta_size(h) - - @utils.create_variables_in_class_scope - def get_readout_mod(self, name): - if name not in self.readout_mods: - self.readout_mods[name] = GradChannelReadout( - self.num_grad_channels, device=self.remote_device, name=name) - - return self.readout_mods[name] - - @utils.create_variables_in_class_scope - def low_rank_readout(self, name, h1, h2, psd=False): - BS = h1.shape.as_list()[0] - r_t = self.get_readout_mod(name + '_top')(h1) - if psd: - r_b = r_t - else: - r_b = self.get_readout_mod(name + '_bottom')(h2) - return tf.reduce_mean(tf.matmul(r_b, r_t, transpose_a=True), axis=0) / BS - - @snt.reuse_variables - def to_delta_size(self, h): - with tf.device(self.remote_device): - mod = snt.Linear(self.delta_dim) - return snt.BatchApply(mod)(h) - - @snt.reuse_variables - def initial_state(self, variables): - """The inner optimization state. - - Args: - variables: list of tf.Variable - list of variables to get the initial state of. - Returns: - opt_state: OptState - """ - - with tf.device(self.local_device): - initial_opt_state = self.opt.get_state(variables) - - return OptState( - variables=variables, opt_state=initial_opt_state, index=tf.constant(0)) - - @snt.reuse_variables - def compute_next_state(self, grads, learning_rate, cur_state, - cur_transformer): - - summaries = [] - with tf.device(self.local_device): - with tf.control_dependencies(summaries): - new_vars, new_state = self.opt.compute_updates( - cur_state.variables, grads, learning_rate, cur_state.opt_state) - pass - - return OptState( - variables=tuple(new_vars), - opt_state=new_state, - index=cur_state.index + 1) - - def assign_state(self, base_model, next_state): - var_ups = [ - v.assign(nv) for v, nv in utils.eqzip(base_model.get_variables(), - next_state.variables) - ] - - opt_ups = self.opt.assign_state(next_state.opt_state) - - return tf.group(opt_ups, *var_ups) - - def local_variables(self): - return list(self.opt.get_variables()) - - def remote_variables(self): - train = list( - snt.get_variables_in_module(self, tf.GraphKeys.TRAINABLE_VARIABLES)) - train += list( - snt.get_variables_in_module(self, - tf.GraphKeys.MOVING_AVERAGE_VARIABLES)) - return train - - -class MoreLocalWeightUpdateWLearner(snt.AbstractModule): - """The BaseModel that the UnsupervisedUpdateRule acts on. - """ - - def __init__(self, - remote_device, - local_device, - inner_size=128, - output_size=32, - n_layers=4, - shuffle_input=True, - activation_fn=tf.nn.relu, - identical_updates=True, - **kwargs): - self.local_device = local_device - self.remote_device = remote_device - self.inner_size = inner_size - self.n_layers = n_layers - self.shuffle_input = shuffle_input - self.activation_fn = activation_fn - self.identical_updates = identical_updates - - self.output_size = output_size - if output_size == None: - self.output_size = inner_size - - self.shuffle_ind = None - - super(MoreLocalWeightUpdateWLearner, self).__init__( - name='LocalWeightUpdateWLearner', **kwargs) - - @snt.reuse_variables - def get_shuffle_ind(self, size): - if self.shuffle_ind is None: - # put the shuffle in tf memory to make the eval jobs - # re-entrant. - shuffle_ind_val = np.random.permutation(size) - shuffle_ind = tf.get_variable( - name='shuffle_ind', dtype=tf.int64, initializer=shuffle_ind_val) - unshuffle_ind = tf.scatter_nd( - tf.reshape(shuffle_ind, [-1, 1]), tf.range(size), [size]) - - return shuffle_ind, unshuffle_ind - - def _build(self, batch): - image = batch.image - x0 = snt.BatchFlatten()(image) - if self.shuffle_input: - size = x0.shape.as_list()[1] - shuffle_ind, unshuffle_ind = self.get_shuffle_ind(size) - x0 = tf.gather(x0, shuffle_ind, axis=1) - - xs = [x0] - mods = [] - zs = [] - init = {} - - for i in range(self.n_layers): - mod = common.LinearBatchNorm( - self.inner_size, activation_fn=self.activation_fn) - z, x = mod(xs[i]) - xs.append(x) - zs.append(z) - mods.append(mod) - - mod = common.LinearBatchNorm( - self.output_size, activation_fn=self.activation_fn) - z, x = mod(xs[-1]) - mods.append(mod) - - xs.append(x) - zs.append(z) - - embedding_x = xs[-1] - - # make a random set of backward mods - backward_mods = [] - for i, (x, x_p1) in enumerate(zip(xs[0:-1], xs[1:])): - m = common.LinearBatchNorm( - x_p1.shape.as_list()[1], activation_fn=tf.identity) - _ = m(x) - backward_mods.append(m) - - shape = image.shape.as_list()[1:4] - - for mods_p, prefix in [(mods, 'forward'), (backward_mods, 'backward')]: - if self.shuffle_input: - unshuf_w = tf.gather(mods_p[0].w, unshuffle_ind, axis=0) - else: - unshuf_w = mods_p[0].w - img = summary_utils.first_layer_weight_image(unshuf_w, shape) - tf.summary.image(prefix + '_w0_receptive_field', img) - - for i, m in enumerate(mods_p[0:]): - img = summary_utils.inner_layer_weight_image(m.w) - tf.summary.image(prefix + '_w%d' % (i + 1), img) - - img = summary_utils.sorted_images(image, batch.label_onehot) - tf.summary.image('inputs', img) - - # log out pre-activations and activations - for all_vis, base_name in [(xs, 'x'), (zs, 'z')]: - for i, x_vis in enumerate(all_vis): - img = summary_utils.activation_image(x_vis, batch.label_onehot) - tf.summary.image('%s%d' % (base_name, i), img) - - embedding_x = tf.identity(embedding_x) - - outputs = BaseModelOutputs( - xs=xs, zs=zs, mods=mods, batch=batch, backward_mods=backward_mods) - - return embedding_x, outputs - - def compute_next_h_d(self, meta_opt, w_bot, w_top, bias, x, z, d, backward_w): - """ Propogate error back down the network while computing hidden state. - """ - if z is None: - z = x - - h = meta_opt.compute_h(x, z, d, bias, w_bot, - w_top) # [bs x 60 x h_channels] - - # compute the next d - delta = meta_opt.next_delta(z, h, d) - - if backward_w is not None: - - def delta_matmul(w, delta): - d = tf.transpose(delta, [0, 2, 1]) # [bs x delta_channels x n_units) - d = snt.BatchApply(lambda x: tf.matmul(x, w, transpose_b=True))(d) - d = tf.transpose(d, [0, 2, 1]) - return d - - # replace the "backward pass" with a random matrix. - d = delta_matmul(backward_w, delta) # [bs x 60 x delta_channels] - var = tf.reduce_mean(tf.square(d), [2], keepdims=True) - d = d * tf.rsqrt(1e-6 + var) - - return h, d - - def weight_change_for_layer(self, meta_opt, l_idx, w_base, b_base, upper_h, - lower_h, upper_x, lower_x, prefix, include_bias): - """Compute the change in weights for each layer. - This computes something roughly analagous to a gradient. - """ - reduce_upper_h = upper_h - reduce_lower_h = lower_h - - BS = lower_x.shape.as_list()[0] - - change_w_terms = dict() - - # initial weight value normalized - # normalize the weights per receptive-field, rather than per-matrix - weight_scale = tf.rsqrt( - tf.reduce_mean(w_base**2, axis=0, keepdims=True) + 1e-6) - w_base *= weight_scale - - change_w_terms['w_base'] = w_base - - # this will act to decay larger weights towards zero - change_w_terms['large_decay'] = w_base**2 * tf.sign(w_base) - - # term based on activations - ux0 = upper_x - tf.reduce_mean(upper_x, axis=0, keepdims=True) - uxs0 = ux0 * tf.rsqrt(tf.reduce_mean(ux0**2, axis=0, keepdims=True) + 1e-6) - change_U = tf.matmul(uxs0, uxs0, transpose_a=True) / BS - change_U /= tf.sqrt(float(change_U.shape.as_list()[0])) - - cw = tf.matmul(w_base, change_U) - cw_scale = tf.rsqrt(tf.reduce_mean(cw**2 + 1e-8)) - cw *= cw_scale - change_w_terms['decorr_x'] = cw - - # hebbian term - lx0 = lower_x - tf.reduce_mean(lower_x, axis=0, keepdims=True) - lxs0 = lx0 * tf.rsqrt(tf.reduce_mean(lx0**2, axis=0, keepdims=True) + 1e-6) - cw = tf.matmul(lxs0, uxs0, transpose_a=True) / BS - change_w_terms['hebb'] = -cw - - # 0th order term - w_term = meta_opt.low_rank_readout(prefix + 'weight_readout_0', upper_h, - lower_h) - change_w_terms['0_order'] = w_term - - # # rbf term (weight update scaled by distance from 0) - w_term = meta_opt.low_rank_readout(prefix + 'weight_readout_rbf', - reduce_upper_h, reduce_lower_h) - change_w_terms['rbf'] = tf.exp(-w_base**2) * w_term - - # 1st order term (weight dependent update to weights) - w_term = meta_opt.low_rank_readout(prefix + 'weight_readout_1', - reduce_upper_h, reduce_lower_h) - change_w_terms['1_order'] = w_base * w_term - - # more terms based on single layer readouts. - for update_type in ['lin', 'sqr']: - for h_source, h_source_name in [(reduce_upper_h, 'upper'), - (reduce_lower_h, 'lower')]: - structures = ['symm'] - if update_type == 'lin' and h_source_name == 'upper': - structures += ['psd'] - for structure in structures: - name = update_type + '_' + h_source_name + '_' + structure - if structure == 'symm': - change_U = meta_opt.low_rank_readout(prefix + name, h_source, - h_source) - change_U = (change_U + tf.transpose(change_U)) / tf.sqrt(2.) - change_U = tf.matrix_set_diag(change_U, - tf.zeros( - [change_U.shape.as_list()[0]])) - elif structure == 'psd': - change_U = meta_opt.low_rank_readout( - prefix + name, h_source, None, psd=True) - else: - assert False - change_U /= tf.sqrt(float(change_U.shape.as_list()[0])) - - if update_type == 'lin': - sign_multiplier = tf.ones_like(w_base) - w_base_l = w_base - elif update_type == 'sqr': - sign_multiplier = tf.sign(w_base) - w_base_l = tf.sqrt(1. + w_base**2) - 1. - - if h_source_name == 'upper': - cw = tf.matmul(w_base_l, change_U) # [N^l-1 x N^l] - elif h_source_name == 'lower': - cw = tf.matmul(change_U, w_base_l) - change_w_terms[name] = cw * sign_multiplier - - - if prefix == 'forward': - change_w = meta_opt.merge_change_w_forward( - change_w_terms, global_prefix=prefix, prefix='l%d' % l_idx) - elif prefix == 'backward': - change_w = meta_opt.merge_change_w_backward( - change_w_terms, global_prefix=prefix, prefix='l%d' % l_idx) - else: - assert (False) - - if not include_bias: - return change_w - - change_b = tf.reduce_mean(meta_opt.bias_readout(upper_h), [0]) - - # force nonlinearities to be exercised -- biases can't all be increased without bound - change_b_mean = tf.reduce_mean(change_b) - offset = -tf.nn.relu(-change_b_mean) - change_b -= offset - - var = tf.reduce_mean(tf.square(change_b), [0], keepdims=True) - change_b = (change_b) / tf.sqrt(0.5 + var) - return change_w, change_b - - def compute_next_state(self, outputs, meta_opt, previous_state): - zs = outputs.zs - xs = outputs.xs - batch = outputs.batch - mods = outputs.mods - backward_mods = outputs.backward_mods - variables = self.get_variables() - - rev_mods = mods[::-1] - rev_backward_mods = backward_mods[::-1] - rev_xs = xs[::-1] - rev_zs = zs[::-1] + [None] - - to_top = xs[-1] - - # variables that change in the loop - hs = [] - d = meta_opt.compute_top_delta(to_top) # [bs x 32 x delta_channels] - - iterator = utils.eqzip(rev_backward_mods + [None], rev_mods + [None], - [None] + rev_mods, rev_xs, rev_zs) - for (backward_mod, lower_mod, upper_mod, x, z) in iterator: - w_bot = None - if not lower_mod is None: - w_bot = previous_state.variables[variables.index(lower_mod.w)] - w_top = None - if not upper_mod is None: - w_top = previous_state.variables[variables.index(upper_mod.w)] - backward_w = None - if backward_mod is not None: - backward_w = previous_state.variables[variables.index(backward_mod.w)] - if lower_mod is not None: - bias = previous_state.variables[variables.index(lower_mod.b)] - else: - bias = tf.zeros([x.shape[1]]) - - h, d = self.compute_next_h_d( - meta_opt=meta_opt, - w_bot=w_bot, - w_top=w_top, - bias=bias, - backward_w=backward_w, - x=x, - z=z, - d=d) - hs.append(h) - - w_forward_var_idx = [variables.index(mod.w) for mod in rev_mods] - w_backward_var_idx = [variables.index(mod.w) for mod in rev_backward_mods] - b_var_idx = [variables.index(mod.b) for mod in rev_mods] - - # storage location for outputs of below loop - grads = [None for _ in previous_state.variables] - - # over-ride learning rate for perturbation variables - learning_rate = [None for _ in previous_state.variables] - - # This is a map -- no state is shared cross loop - for l_idx, w_forward_idx, w_backward_idx, b_idx, upper_h, lower_h, lower_x, upper_x in utils.eqzip( - range(len(w_forward_var_idx)), w_forward_var_idx, w_backward_var_idx, - b_var_idx, hs[:-1], hs[1:], xs[::-1][1:], xs[::-1][:-1]): - - b_base = previous_state.variables[b_idx] - change_w_forward, change_b = self.weight_change_for_layer( - meta_opt=meta_opt, - l_idx=l_idx, - w_base=previous_state.variables[w_forward_idx], - b_base=b_base, - upper_h=upper_h, - lower_h=lower_h, - upper_x=upper_x, - lower_x=lower_x, - prefix='forward', - include_bias=True) - - if self.identical_updates: - change_w_backward = change_w_forward - else: - change_w_backward = self.weight_change_for_layer( - meta_opt=meta_opt, - l_idx=l_idx, - w_base=previous_state.variables[w_backward_idx], - b_base=b_base, - upper_h=upper_h, - lower_h=lower_h, - upper_x=upper_x, - lower_x=lower_x, - prefix='backward', - include_bias=False) - - grads[w_forward_idx] = change_w_forward - - grads[w_backward_idx] = change_w_backward - - grads[b_idx] = change_b - - cur_transformer = common.transformer_at_state(self, - previous_state.variables) - next_state = meta_opt.compute_next_state( - grads, - learning_rate=learning_rate, - cur_state=previous_state, - cur_transformer=lambda x: cur_transformer(x)[0]) - return next_state - - def initial_state(self, meta_opt): - return meta_opt.initial_state(self.get_variables()) diff --git a/research/learning_unsupervised_learning/datasets/__init__.py b/research/learning_unsupervised_learning/datasets/__init__.py deleted file mode 100644 index 9949cd96ca8f2fe1c39705a5ca8570de9cad5a66..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/datasets/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import mnist diff --git a/research/learning_unsupervised_learning/datasets/common.py b/research/learning_unsupervised_learning/datasets/common.py deleted file mode 100644 index 11f65ceab57a4114ca3876b3cb6eed86e2263745..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/datasets/common.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -import tensorflow as tf -import numpy as np - -ImageLabelOnehot = collections.namedtuple('ImageLabelOnehot', - ['image', 'label', 'label_onehot']) -ImageLabelOnehotRegression = collections.namedtuple( - "ImageLabelOnehotRegression", - ["image", "label", "label_onehot", "regression_target"]) diff --git a/research/learning_unsupervised_learning/datasets/mnist.py b/research/learning_unsupervised_learning/datasets/mnist.py deleted file mode 100644 index 6ee595d99ad2523042454f038b4665095f501caf..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/datasets/mnist.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -import sonnet as snt -import tensorflow as tf -from tensorflow.python.keras.datasets import mnist -from learning_unsupervised_learning.datasets import common - -class Mnist(snt.AbstractModule): - def __init__(self, device, batch_size=128, name="Mnist"): - self.device = device - self.batch_size = batch_size - - self._make_dataset() - self.iterator = None - - super(Mnist, self).__init__(name=name) - - def _make_dataset(self): - (x_train, y_train), (x_test, y_test) = mnist.load_data() - - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - - dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) - dataset = dataset.repeat() - dataset = dataset.shuffle(self.batch_size * 3) - dataset = dataset.batch(self.batch_size) - def _map_fn(image, label): - image = tf.to_float(image) / 255. - label.set_shape([self.batch_size]) - label = tf.cast(label, dtype=tf.int32) - label_onehot = tf.one_hot(label, 10) - image = tf.reshape(image, [self.batch_size, 28, 28, 1]) - return common.ImageLabelOnehot( - image=image, label=label, label_onehot=label_onehot) - - self.dataset = dataset.map(_map_fn) - - def _build(self): - if self.iterator is None: - self.iterator = self.dataset.make_one_shot_iterator() - batch = self.iterator.get_next() - [b.set_shape([self.batch_size] + b.shape.as_list()[1:]) for b in batch] - return batch - - -class TinyMnist(Mnist): - def __init__(self, *args, **kwargs): - kwargs.setdefault("name", "TinyMnist") - super(TinyMnist, self).__init__(*args, **kwargs) - - def _make_dataset(self): - super(TinyMnist, self)._make_dataset() - - def _map_fn(batch): - new_img = tf.image.resize_images(batch.image, [14, 14]) - return common.ImageLabelOnehot( - image=new_img, label=batch.label, label_onehot=batch.label_onehot) - - self.dataset = self.dataset.map(_map_fn) diff --git a/research/learning_unsupervised_learning/evaluation.py b/research/learning_unsupervised_learning/evaluation.py deleted file mode 100644 index 2ec40e99a672f9420200653b92818374e0e84d78..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/evaluation.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -"""Evaluation job. - -This sits on the side and performs evaluation on a saved model. -This is a separate process for ease of use and stability of numbers. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from learning_unsupervised_learning import utils - - -def construct_evaluation_graph(theta_process_fn=None, - w_learner_fn=None, - dataset_fn=None, - meta_objectives=None, - ): - """Construct the evaluation graph. - """ - if meta_objectives is None: - meta_objectives = [] - - tf.train.create_global_step() - - local_device = "" - remote_device = "" - - meta_opt = theta_process_fn( - remote_device=remote_device, local_device=local_device) - - base_model = w_learner_fn( - remote_device=remote_device, local_device=local_device) - - train_dataset = dataset_fn(device=local_device) - - # construct variables - x, outputs = base_model(train_dataset()) - initial_state = base_model.initial_state(meta_opt, max_steps=10) - next_state = base_model.compute_next_state(outputs, meta_opt, initial_state) - with utils.state_barrier_context(next_state): - train_one_step_op = meta_opt.assign_state(base_model, next_state) - - meta_objs = [] - for meta_obj_fn in meta_objectives: - meta_obj = meta_obj_fn(local_device="", remote_device="") - meta_objs.append(meta_obj) - J = meta_obj(train_dataset, lambda x: base_model(x)[0]) - tf.summary.scalar(str(meta_obj.__class__.__name__)+"_J", tf.reduce_mean(J)) - - # TODO(lmetz) this is kinda error prone. - # We should share the construction of the global variables across train and - # make sure both sets of savable variables are the same - checkpoint_vars = meta_opt.remote_variables() + [tf.train.get_global_step()] - for meta_obj in meta_objs: - checkpoint_vars.extend(meta_obj.remote_variables()) - - return checkpoint_vars, train_one_step_op, (base_model, train_dataset) diff --git a/research/learning_unsupervised_learning/meta_objective/__init__.py b/research/learning_unsupervised_learning/meta_objective/__init__.py deleted file mode 100644 index 54c46145e3c3a9f19110f92197f1d3cb2afe31fb..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/meta_objective/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -import sklearn -import linear_regression diff --git a/research/learning_unsupervised_learning/meta_objective/linear_regression.py b/research/learning_unsupervised_learning/meta_objective/linear_regression.py deleted file mode 100644 index b49fc2529ccba08a6b47019cd7546f8fb409b28b..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/meta_objective/linear_regression.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - - -"""Closed form linear regression. - -Can be differentiated through. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import numpy as np -import sonnet as snt -import tensorflow as tf - -from learning_unsupervised_learning import utils -from learning_unsupervised_learning import variable_replace - - -def solve_ridge(x, y, ridge_factor): - with tf.name_scope("solve_ridge"): - # Added a column of ones to the end of the feature matrix for bias - A = tf.concat([x, tf.ones((x.shape.as_list()[0], 1))], axis=1) - - # Analytic solution for the ridge regression loss - inv_target = tf.matmul(A, A, transpose_a=True) - np_diag_penalty = ridge_factor * np.ones( - A.shape.as_list()[1], dtype="float32") - # Remove penalty on bias component of weights - np_diag_penalty[-1] = 0. - diag_penalty = tf.constant(np_diag_penalty) - inv_target += tf.diag(diag_penalty) - - inv = tf.matrix_inverse(inv_target) - w = tf.matmul(inv, tf.matmul(A, y, transpose_a=True)) - return w - - -class LinearRegressionMetaObjective(snt.AbstractModule): - """A meta objective based on training Ridge Regression with analytic solution. - - This is used to evaluate the performance of a given feature set trained in - some other manner. - """ - - def __init__(self, - local_device=None, - remote_device=None, - zero_one_labels=True, - normalize_y_hat=True, - normalize_act=False, - averages=1, - ridge_factor=0.1, - center_y=True, - hinge_loss=False, - samples_per_class=10, - test_train_scalar=1.0, - ): - self._local_device = local_device - self._remote_device = remote_device - self.zero_one_labels = zero_one_labels - self.normalize_y_hat = normalize_y_hat - self.normalize_act = normalize_act - self.ridge_factor = ridge_factor - self.averages = averages - self.samples_per_class = samples_per_class - self.center_y=center_y - self.test_train_scalar=test_train_scalar - self.hinge_loss = hinge_loss - - self.dataset_map = {} - - super(LinearRegressionMetaObjective, - self).__init__(name="LinearRegressionMetaObjective") - - def _build(self, dataset, feature_transformer): - if self.samples_per_class is not None: - if dataset not in self.dataset_map: - # datasets are outside of frames from while loops - with tf.control_dependencies(None): - self.dataset_map[dataset] = utils.sample_n_per_class( - dataset, self.samples_per_class) - - dataset = self.dataset_map[dataset] - - stats = collections.defaultdict(list) - losses = [] - # TODO(lmetz) move this to ingraph control flow? - for _ in xrange(self.averages): - loss, stat = self._build_once(dataset, feature_transformer) - losses.append(loss) - for k, v in stat.items(): - stats[k].append(v) - stats = {k: tf.add_n(v) / float(len(v)) for k, v in stats.items()} - - summary_updates = [] - for k, v in stats.items(): - tf.summary.scalar(k, v) - - with tf.control_dependencies(summary_updates): - return tf.add_n(losses) / float(len(losses)) - - def _build_once(self, dataset, feature_transformer): - with tf.device(self._local_device): - batch = dataset() - num_classes = batch.label_onehot.shape.as_list()[1] - - regression_mod = snt.Linear(num_classes) - - if self.normalize_act: - - def normalize_transformer(x): - unnorm_x = feature_transformer(x) - return tf.nn.l2_normalize(unnorm_x, 0) - - feature_transformer_wrap = normalize_transformer - else: - feature_transformer_wrap = feature_transformer - - # construct the variables of the right shape in the sonnet module by - # calling a forward pass through the regressor. - with utils.assert_no_new_variables(): - dummy_features = feature_transformer_wrap(batch) - regression_mod(dummy_features) - reg_w = regression_mod.w - reg_b = regression_mod.b - - batch_test = dataset() - all_batch = utils.structure_map_multi(lambda x: tf.concat(x, 0), [batch, batch_test]) - #all_batch = tf.concat([batch, batch_test], 0) - # Grab a new batch of data from the dataset. - features = feature_transformer_wrap(all_batch) - features, features_test = utils.structure_map_split(lambda x: tf.split(x, 2, axis=0), features) - - def center_y(y): - y -= tf.reduce_mean(y) - y *= tf.rsqrt(tf.reduce_mean(tf.reduce_sum(y**2, axis=[1], keep_dims=True))) - return y - def get_y_vec(batch): - y_pieces = [] - if hasattr(batch, "label_onehot"): - if self.zero_one_labels: - y_pieces += [batch.label_onehot] - else: - y_pieces += [2. * batch.label_onehot - 1.] - if hasattr(batch, "regression_target"): - y_pieces += [batch.regression_target] - y = tf.concat(y_pieces, 1) - if self.center_y: - y = center_y(y) - return y - - y_train = get_y_vec(batch) - - w = solve_ridge(features, y_train, self.ridge_factor) - - # Generate features from another batch to evaluate loss on the validation - # set. This provide a less overfit signal to the learned optimizer. - y_test = get_y_vec(batch_test) - - def compute_logit(features): - # We have updated the classifier mod in previous steps, we need to - # substitute out those variables to get new values. - replacement = collections.OrderedDict([(reg_w, w[:-1]), (reg_b, w[-1])]) - with variable_replace.variable_replace(replacement): - logits = regression_mod(features) - - return logits - - batch_size = y_train.shape.as_list()[0] - - logit_train = compute_logit(features) - logit_test_unnorm = compute_logit(features_test) - if self.normalize_y_hat: - logit_test = logit_test_unnorm / tf.sqrt( - tf.reduce_sum(logit_test_unnorm**2, axis=[1], keep_dims=True)) - else: - logit_test = logit_test_unnorm - - stats = {} - - if self.hinge_loss: - # slightly closer to the true classification loss - # any distance smaller than 1 is guaranteed to map to the correct class - mse_test = tf.reduce_sum(tf.nn.relu(tf.reduce_sum(tf.square(logit_test - y_test), axis=1)-1.)) / batch_size - else: - mse_test = tf.reduce_sum(tf.square(logit_test - y_test)) / batch_size - - stats["mse_test"] = mse_test - - mse_train = tf.reduce_sum(tf.square(logit_train - y_train)) / batch_size - stats["mse_train"] = mse_train - - is_correct_test = tf.equal(tf.argmax(logit_test, 1), tf.argmax(y_test, 1)) - accuracy_test = tf.reduce_mean(tf.cast(is_correct_test, tf.float32)) - stats["accuracy_test"] = accuracy_test - - def test_confusion_fn(): - test_confusion = tf.confusion_matrix(tf.argmax(y_test, 1), tf.argmax(logit_test, 1)) - test_confusion = tf.to_float(test_confusion) / tf.constant((logit_test.shape.as_list()[0] / float(logit_test.shape.as_list()[1])), dtype=tf.float32) - test_confusion = tf.expand_dims(tf.expand_dims(test_confusion, 0), 3) - return test_confusion - tf.summary.image("test_confusion", test_confusion_fn()) - - def train_confusion_fn(): - train_confusion = tf.confusion_matrix(tf.argmax(y_train, 1), tf.argmax(logit_train, 1)) - train_confusion = tf.to_float(train_confusion) / tf.constant((logit_train.shape.as_list()[0] / float(logit_train.shape.as_list()[1])), dtype=tf.float32) - train_confusion = tf.expand_dims(tf.expand_dims(train_confusion, 0), 3) - return train_confusion - tf.summary.image("train_confusion", train_confusion_fn()) - - is_correct = tf.equal(tf.argmax(logit_train, 1), tf.argmax(y_train, 1)) - accuracy_train = tf.reduce_mean(tf.cast(is_correct, tf.float32)) - stats["accuracy_train"] = accuracy_train - - reg = self.ridge_factor * tf.reduce_sum(tf.square(w[:-1])) / batch_size - stats["ridge_component"] = reg - - stats["total_loss"] = mse_test + reg - - loss_to_train_at = (reg+ mse_test) * self.test_train_scalar + (mse_train + reg)*(1 - self.test_train_scalar) - - loss_to_train_at = tf.identity(loss_to_train_at) - - # Minimizing the test loss should not require regurization because the - # metaobjective is solved for the training loss - return loss_to_train_at, stats - - def local_variables(self): - """List of variables that need to be updated for each evaluation. - - These variables should not be stored on a parameter server and - should be reset every computation of a meta_objective loss. - - Returns: - vars: list of tf.Variable - """ - return list( - snt.get_variables_in_module(self, tf.GraphKeys.TRAINABLE_VARIABLES)) - - def remote_variables(self): - return [] diff --git a/research/learning_unsupervised_learning/meta_objective/sklearn.py b/research/learning_unsupervised_learning/meta_objective/sklearn.py deleted file mode 100644 index 4f1f2d59102c511fd42ad323c32ab1709bd60c90..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/meta_objective/sklearn.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -""" - -Can NOT be differentiated through. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import numpy as np -import sonnet as snt -import tensorflow as tf -from tensorflow.python.framework import function - -from learning_unsupervised_learning import utils - -from learning_unsupervised_learning.meta_objective import utils as meta_obj_utils - -from sklearn import svm -from sklearn import linear_model - - -def build_fit(device, model_fn, num_classes, probs=True): - - def _py_fit_predict(trX, trY, teX): - assert len(np.unique(trY)) == num_classes - model = model_fn() - model.fit(trX, trY) - trP = model.predict(trX) - teP = model.predict(teX) - if probs: - teP_probs = model.predict_log_proba(teX) - return trP.astype(np.int64), teP.astype(np.int64), teP_probs.astype( - np.float32) - else: - teP = model.predict(teX) - return trP.astype(np.int64), teP.astype(np.int64) - - def return_fn(trX, trY, teX): - with tf.device(device): - with tf.device("/cpu:0"): - if probs: - return tf.py_func( - _py_fit_predict, - [tf.identity(trX), - tf.identity(trY), - tf.identity(teX)], [tf.int64, tf.int64, tf.float32]) - else: - return tf.py_func( - _py_fit_predict, - [tf.identity(trX), - tf.identity(trY), - tf.identity(teX)], [tf.int64, tf.int64]) - - return return_fn - - -class SKLearn(meta_obj_utils.MultiTrialMetaObjective): - - def __init__( - self, - local_device=None, - remote_device=None, - averages=1, - samples_per_class=10, - probs=False, - stddev=0.01, - n_samples=10, - name="SKLearn", - ): - self._local_device = local_device - self._remote_device = remote_device - self.name = name - self.probs = probs - self.n_samples = n_samples - self.stddev = stddev - - super(SKLearn, self).__init__( - name=name, samples_per_class=samples_per_class, averages=averages) - - def _get_model(self): - raise NotImplemented() - - def _build_once(self, dataset, feature_transformer): - with tf.device(self._local_device): - tr_batch = dataset() - te_batch = dataset() - num_classes = tr_batch.label_onehot.shape.as_list()[1] - all_batch = utils.structure_map_multi(lambda x: tf.concat(x, 0), - [tr_batch, te_batch]) - features = feature_transformer(all_batch) - trX, teX = utils.structure_map_split(lambda x: tf.split(x, 2, axis=0), - features) - trY = tf.to_int64(tr_batch.label) - trY_onehot = tf.to_int32(tr_batch.label_onehot) - teY = tf.to_int64(te_batch.label) - teY_shape = teY.shape.as_list() - - def blackbox((trX, trY, teX, teY)): - trY = tf.to_int32(tf.rint(trY)) - teY = tf.to_int32(tf.rint(teY)) - tf_fn = build_fit( - self._local_device, - self._get_model, - num_classes=num_classes, - probs=self.probs) - if self.probs: - trP, teP, teP_probs = tf_fn(trX, trY, teX) - else: - trP, teP = tf_fn(trX, trY, teX) - - teY.set_shape(teY_shape) - if self.probs: - onehot = tf.one_hot(teY, num_classes) - crossent = -tf.reduce_sum(onehot * teP_probs, [1]) - return tf.reduce_mean(crossent) - else: - # use error rate as the loss if no surrogate is avalible. - return 1 - tf.reduce_mean( - tf.to_float(tf.equal(teY, tf.to_int32(teP)))) - - test_loss = blackbox((trX, tf.to_float(trY), teX, tf.to_float(teY))) - - stats = {} - - tf_fn = build_fit( - self._local_device, - self._get_model, - num_classes=num_classes, - probs=self.probs) - if self.probs: - trP, teP, teP_probs = tf_fn(trX, trY, teX) - else: - trP, teP = tf_fn(trX, trY, teX) - stats["%s/accuracy_train" % self.name] = tf.reduce_mean( - tf.to_float(tf.equal(tf.to_int32(trY), tf.to_int32(trP)))) - stats["%s/accuracy_test" % self.name] = tf.reduce_mean( - tf.to_float(tf.equal(tf.to_int32(teY), tf.to_int32(teP)))) - stats["%s/test_loss" % self.name] = test_loss - return test_loss, stats - - -class LogisticRegression(SKLearn): - - def __init__(self, C=1.0, name="LogisticRegression", probs=True, **kwargs): - self.C = C - super(LogisticRegression, self).__init__(name=name, probs=probs, **kwargs) - - def _get_model(self): - return linear_model.LogisticRegression(C=self.C) diff --git a/research/learning_unsupervised_learning/meta_objective/utils.py b/research/learning_unsupervised_learning/meta_objective/utils.py deleted file mode 100644 index a29197d1d0cb7f0fdcebac3980027640651f185b..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/meta_objective/utils.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import numpy as np -import sonnet as snt -import tensorflow as tf - -from learning_unsupervised_learning import optimizers -from learning_unsupervised_learning import utils -from learning_unsupervised_learning import summary_utils -from learning_unsupervised_learning import variable_replace - -class MultiTrialMetaObjective(snt.AbstractModule): - def __init__(self, samples_per_class, averages, **kwargs): - self.samples_per_class = samples_per_class - self.averages = averages - self.dataset_map = {} - - super(MultiTrialMetaObjective, - self).__init__(**kwargs) - - def _build(self, dataset, feature_transformer): - if self.samples_per_class is not None: - if dataset not in self.dataset_map: - # datasets are outside of frames from while loops - with tf.control_dependencies(None): - self.dataset_map[dataset] = utils.sample_n_per_class( - dataset, self.samples_per_class) - - dataset = self.dataset_map[dataset] - - stats = collections.defaultdict(list) - losses = [] - # TODO(lmetz) move this to ingraph control flow? - for _ in xrange(self.averages): - loss, stat = self._build_once(dataset, feature_transformer) - losses.append(loss) - for k, v in stat.items(): - stats[k].append(v) - stats = {k: tf.add_n(v) / float(len(v)) for k, v in stats.items()} - - for k, v in stats.items(): - tf.summary.scalar(k, v) - - return tf.add_n(losses) / float(len(losses)) - - def local_variables(self): - """List of variables that need to be updated for each evaluation. - - These variables should not be stored on a parameter server and - should be reset every computation of a meta_objective loss. - - Returns: - vars: list of tf.Variable - """ - return list( - snt.get_variables_in_module(self, tf.GraphKeys.TRAINABLE_VARIABLES)) - - def remote_variables(self): - return [] diff --git a/research/learning_unsupervised_learning/optimizers.py b/research/learning_unsupervised_learning/optimizers.py deleted file mode 100644 index 02c6106b19d1255907beb0ade07c46c5b065f701..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/optimizers.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - - -"""Optimizers for use in unrolled optimization. - -These optimizers contain a compute_updates function and its own ability to keep -track of internal state. -These functions can be used with a tf.while_loop to perform multiple training -steps per sess.run. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import collections -import tensorflow as tf -import sonnet as snt - -from learning_unsupervised_learning import utils - -from tensorflow.python.framework import ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.training import optimizer -from tensorflow.python.training import training_ops - - -class UnrollableOptimizer(snt.AbstractModule): - """Interface for optimizers that can be used in unrolled computation. - apply_gradients is derrived from compute_update and assign_state. - """ - - def __init__(self, *args, **kwargs): - super(UnrollableOptimizer, self).__init__(*args, **kwargs) - self() - - @abc.abstractmethod - def compute_updates(self, xs, gs, state=None): - """Compute next step updates for a given variable list and state. - - Args: - xs: list of tensors - The "variables" to perform an update on. - Note these must match the same order for which get_state was originally - called. - gs: list of tensors - Gradients of `xs` with respect to some loss. - state: Any - Optimizer specific state to keep track of accumulators such as momentum - terms - """ - raise NotImplementedError() - - def _build(self): - pass - - @abc.abstractmethod - def get_state(self, var_list): - """Get the state value associated with a list of tf.Variables. - - This state is commonly going to be a NamedTuple that contains some - mapping between variables and the state associated with those variables. - This state could be a moving momentum variable tracked by the optimizer. - - Args: - var_list: list of tf.Variable - Returns: - state: Any - Optimizer specific state - """ - raise NotImplementedError() - - def assign_state(self, state): - """Assigns the state to the optimizers internal variables. - - Args: - state: Any - Returns: - op: tf.Operation - The operation that performs the assignment. - """ - raise NotImplementedError() - - def apply_gradients(self, grad_vars): - gradients, variables = zip(*grad_vars) - state = self.get_state(variables) - new_vars, new_state = self.compute_updates(variables, gradients, state) - assign_op = self.assign_state(new_state) - op = utils.assign_variables(variables, new_vars) - return tf.group(assign_op, op, name="apply_gradients") - - -class UnrollableGradientDescentRollingOptimizer(UnrollableOptimizer): - - def __init__(self, - learning_rate, - name="UnrollableGradientDescentRollingOptimizer"): - self.learning_rate = learning_rate - super(UnrollableGradientDescentRollingOptimizer, self).__init__(name=name) - - - def compute_updates(self, xs, gs, learning_rates, state): - new_vars = [] - for x, g, lr in utils.eqzip(xs, gs, learning_rates): - if lr is None: - lr = self.learning_rate - if g is not None: - new_vars.append((x * (1 - lr) - g * lr)) - else: - new_vars.append(x) - return new_vars, state - - def get_state(self, var_list): - return tf.constant(0.0) - - def assign_state(self, state, var_list=None): - return tf.no_op() diff --git a/research/learning_unsupervised_learning/run_eval.py b/research/learning_unsupervised_learning/run_eval.py deleted file mode 100644 index dcb2529dd4cc5354012befd5790c8d402f4caafd..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/run_eval.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -""" Script that iteratively applies the unsupervised update rule and evaluates the - -meta-objective performance. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import flags -from absl import app - -from learning_unsupervised_learning import evaluation -from learning_unsupervised_learning import datasets -from learning_unsupervised_learning import architectures -from learning_unsupervised_learning import summary_utils -from learning_unsupervised_learning import meta_objective - -import tensorflow as tf -import sonnet as snt - -from tensorflow.contrib.framework.python.framework import checkpoint_utils - -flags.DEFINE_string("checkpoint_dir", None, "Dir to load pretrained update rule from") -flags.DEFINE_string("train_log_dir", None, "Training log directory") - -FLAGS = flags.FLAGS - - -def train(train_log_dir, checkpoint_dir, eval_every_n_steps=10, num_steps=3000): - dataset_fn = datasets.mnist.TinyMnist - w_learner_fn = architectures.more_local_weight_update.MoreLocalWeightUpdateWLearner - theta_process_fn = architectures.more_local_weight_update.MoreLocalWeightUpdateProcess - - meta_objectives = [] - meta_objectives.append( - meta_objective.linear_regression.LinearRegressionMetaObjective) - meta_objectives.append(meta_objective.sklearn.LogisticRegression) - - checkpoint_vars, train_one_step_op, ( - base_model, dataset) = evaluation.construct_evaluation_graph( - theta_process_fn=theta_process_fn, - w_learner_fn=w_learner_fn, - dataset_fn=dataset_fn, - meta_objectives=meta_objectives) - batch = dataset() - pre_logit, outputs = base_model(batch) - - global_step = tf.train.get_or_create_global_step() - var_list = list( - snt.get_variables_in_module(base_model, tf.GraphKeys.TRAINABLE_VARIABLES)) - - tf.logging.info("all vars") - for v in tf.all_variables(): - tf.logging.info(" %s" % str(v)) - global_step = tf.train.get_global_step() - accumulate_global_step = global_step.assign_add(1) - reset_global_step = global_step.assign(0) - - train_op = tf.group( - train_one_step_op, accumulate_global_step, name="train_op") - - summary_op = tf.summary.merge_all() - - file_writer = summary_utils.LoggingFileWriter(train_log_dir, regexes=[".*"]) - if checkpoint_dir: - str_var_list = checkpoint_utils.list_variables(checkpoint_dir) - name_to_v_map = {v.op.name: v for v in tf.all_variables()} - var_list = [ - name_to_v_map[vn] for vn, _ in str_var_list if vn in name_to_v_map - ] - saver = tf.train.Saver(var_list) - missed_variables = [ - v.op.name for v in set( - snt.get_variables_in_scope("LocalWeightUpdateProcess", - tf.GraphKeys.GLOBAL_VARIABLES)) - - set(var_list) - ] - assert len(missed_variables) == 0, "Missed a theta variable." - - hooks = [] - - with tf.train.SingularMonitoredSession(master="", hooks=hooks) as sess: - - # global step should be restored from the evals job checkpoint or zero for fresh. - step = sess.run(global_step) - - if step == 0 and checkpoint_dir: - tf.logging.info("force restore") - saver.restore(sess, checkpoint_dir) - tf.logging.info("force restore done") - sess.run(reset_global_step) - step = sess.run(global_step) - - while step < num_steps: - if step % eval_every_n_steps == 0: - s, _, step = sess.run([summary_op, train_op, global_step]) - file_writer.add_summary(s, step) - else: - _, step = sess.run([train_op, global_step]) - - -def main(argv): - train(FLAGS.train_log_dir, FLAGS.checkpoint_dir) - - -if __name__ == "__main__": - app.run(main) diff --git a/research/learning_unsupervised_learning/summary_utils.py b/research/learning_unsupervised_learning/summary_utils.py deleted file mode 100644 index d5c0fdd9186bdef0b4e25ca10978e22ab910d276..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/summary_utils.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - - -import collections -import functools -import threading -import tensorflow as tf -import matplotlib -import numpy as np -import time -import re -import math -matplotlib.use("Agg") - -import matplotlib.pyplot as plt -import scipy.signal - -from tensorflow.python.util import tf_should_use -from tensorflow.contrib.summary import summary_ops -from tensorflow.python.ops import summary_op_util -from tensorflow.contrib.summary import gen_summary_ops - -_DEBUG_DISABLE_SUMMARIES=False - -class LoggingFileWriter(tf.summary.FileWriter): - """A FileWriter that also logs things out. - - This is entirely for ease of debugging / not having to open up Tensorboard - a lot. - """ - - def __init__(self, logdir, regexes=[], **kwargs): - self.regexes = regexes - super(LoggingFileWriter, self).__init__(logdir, **kwargs) - - def add_summary(self, summary, global_step): - if type(summary) != tf.Summary: - summary_p = tf.Summary() - summary_p.ParseFromString(summary) - summary = summary_p - for s in summary.value: - for exists in [re.match(p, s.tag) for p in self.regexes]: - if exists is not None: - tf.logging.info("%d ] %s : %f", global_step, s.tag, s.simple_value) - break - super(LoggingFileWriter, self).add_summary(summary, global_step) - - -def image_grid(images, max_grid_size=4, border=1): - """Given images and N, return first N^2 images as an NxN image grid. - - Args: - images: a `Tensor` of size [batch_size, height, width, channels] - max_grid_size: Maximum image grid height/width - - Returns: - Single image batch, of dim [1, h*n, w*n, c] - """ - batch_size = images.shape.as_list()[0] - to_pad = int((np.ceil(np.sqrt(batch_size)))**2 - batch_size) - images = tf.pad(images, [[0, to_pad], [0, border], [0, border], [0, 0]]) - - batch_size = images.shape.as_list()[0] - grid_size = min(int(np.sqrt(batch_size)), max_grid_size) - assert images.shape.as_list()[0] >= grid_size * grid_size - - # If we have a depth channel - if images.shape.as_list()[-1] == 4: - images = images[:grid_size * grid_size, :, :, 0:3] - depth = tf.image.grayscale_to_rgb(images[:grid_size * grid_size, :, :, 3:4]) - - images = tf.reshape(images, [-1, images.shape.as_list()[2], 3]) - split = tf.split(images, grid_size, axis=0) - depth = tf.reshape(depth, [-1, images.shape.as_list()[2], 3]) - depth_split = tf.split(depth, grid_size, axis=0) - grid = tf.concat(split + depth_split, 1) - return tf.expand_dims(grid, 0) - else: - images = images[:grid_size * grid_size, :, :, :] - images = tf.reshape( - images, [-1, images.shape.as_list()[2], - images.shape.as_list()[3]]) - split = tf.split(value=images, num_or_size_splits=grid_size, axis=0) - grid = tf.concat(split, 1) - return tf.expand_dims(grid, 0) - - -def first_layer_weight_image(weight, shape): - weight_image = tf.reshape(weight, - shape + [tf.identity(weight).shape.as_list()[1]]) - # [winx, winy, wout] - mean, var = tf.nn.moments(weight_image, [0,1,2], keep_dims=True) - #mean, var = tf.nn.moments(weight_image, [0,1], keep_dims=True) - weight_image = (weight_image - mean) / tf.sqrt(var + 1e-5) - weight_image = (weight_image + 1.0) / 2.0 - weight_image = tf.clip_by_value(weight_image, 0, 1) - weight_image = tf.transpose(weight_image, (3, 0, 1, 2)) - grid = image_grid(weight_image, max_grid_size=10) - return grid - -def inner_layer_weight_image(weight): - """Visualize a weight matrix of an inner layer. - Add padding to make it square, then visualize as a gray scale image - """ - weight = tf.identity(weight) # turn into a tensor - weight = weight / (tf.reduce_max(tf.abs(weight), [0], keep_dims=True)) - weight = tf.reshape(weight, [1]+weight.shape.as_list() + [1]) - return weight - - -def activation_image(activations, label_onehot): - """Make a row sorted by class for each activation. Put a black line around the activations.""" - labels = tf.argmax(label_onehot, axis=1) - _, n_classes = label_onehot.shape.as_list() - mean, var = tf.nn.moments(activations, [0, 1]) - activations = (activations - mean)/tf.sqrt(var+1e-5) - - activations = tf.clip_by_value(activations, -1, 1) - activations = (activations + 1.0) / 2.0 # shift to [0, 1] - - canvas = [] - for i in xrange(n_classes): - inds = tf.where(tf.equal(labels, i)) - - def _gather(): - return tf.squeeze(tf.gather(activations, inds), 1) - - def _empty(): - return tf.zeros([0, activations.shape.as_list()[1]], dtype=tf.float32) - - assert inds.shape.as_list()[0] is None - x = tf.cond(tf.equal(tf.shape(inds)[0], 0), _empty, _gather) - canvas.append(x) - canvas.append(tf.zeros([1, activations.shape.as_list()[1]])) - canvas = tf.concat(canvas, 0) - canvas = tf.reshape(canvas, [1, activations.shape.as_list()[0]+n_classes, canvas.shape.as_list()[1], 1]) - return canvas - - -def sorted_images(images, label_onehot): - # images is [bs, x, y, c] - labels = tf.argmax(label_onehot, axis=1) - _, n_classes = label_onehot.shape.as_list() - to_stack = [] - for i in xrange(n_classes): - inds = tf.where(tf.equal(labels, i)) - - def _gather(): - return tf.squeeze(tf.gather(images, inds), 1) - - def _empty(): - return tf.zeros([0] + images.shape.as_list()[1:], dtype=tf.float32) - - assert inds.shape.as_list()[0] is None - x = tf.cond(tf.equal(tf.shape(inds)[0], 0), _empty, _gather) - to_stack.append(x) - # pad / trim all up to 10. - padded = [] - for t in to_stack: - n_found = tf.shape(t)[0] - pad = tf.pad(t[0:10], tf.stack([tf.stack([0,tf.maximum(0, 10-n_found)]), [0,0], [0,0], [0,0]])) - padded.append(pad) - - xs = [tf.concat(tf.split(p, 10), axis=1) for p in padded] - ys = tf.concat(xs, axis=2) - ys = tf.cast(tf.clip_by_value(ys, 0., 1.) * 255., tf.uint8) - return ys diff --git a/research/learning_unsupervised_learning/utils.py b/research/learning_unsupervised_learning/utils.py deleted file mode 100644 index ca56ca93181df1ed9c403fef79e8154c3c9515b4..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/utils.py +++ /dev/null @@ -1,287 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import tensorflow as tf -import sonnet as snt -import itertools -import functools - -from tensorflow.core.framework import node_def_pb2 -from tensorflow.python.framework import device as pydev -from tensorflow.python.framework import errors -from tensorflow.python.ops import variable_scope as variable_scope_ops -from sonnet.python.modules import util as snt_util - -from tensorflow.python.util import nest - - -def eqzip(*args): - """Zip but raises error if lengths don't match. - - Args: - *args: list of lists or tuples - Returns: - list: the result of zip - Raises: - ValueError: when the lengths don't match - """ - - sizes = [len(x) for x in args] - if not all([sizes[0] == x for x in sizes]): - raise ValueError("Lists are of different sizes. \n %s"%str(sizes)) - return zip(*args) - - -@contextlib.contextmanager -def assert_no_new_variables(): - """Ensure that no tf.Variables are constructed inside the context. - - Yields: - None - Raises: - ValueError: if there is a variable created. - """ - num_vars = len(tf.global_variables()) - old_variables = tf.global_variables() - yield - if len(tf.global_variables()) != num_vars: - new_vars = set(tf.global_variables()) - set(old_variables) - tf.logging.error("NEW VARIABLES CREATED") - tf.logging.error(10*"=") - for v in new_vars: - tf.logging.error(v) - - raise ValueError("Variables created inside an " - "assert_no_new_variables context") - if old_variables != tf.global_variables(): - raise ValueError("Variables somehow changed inside an " - "assert_no_new_variables context." - "This means something modified the tf.global_variables()") - - -def get_variables_in_modules(module_list): - var_list = [] - for m in module_list: - var_list.extend(snt.get_variables_in_module(m)) - return var_list - - -def state_barrier_context(state): - """Return a context manager that prevents interior ops from running - unless the whole state has been computed. - - This is to prevent assign race conditions. - """ - tensors = [x for x in nest.flatten(state) if type(x) == tf.Tensor] - tarray = [x.flow for x in nest.flatten(state) if hasattr(x, "flow")] - return tf.control_dependencies(tensors + tarray) - - -def _identity_fn(tf_entity): - if hasattr(tf_entity, "identity"): - return tf_entity.identity() - else: - return tf.identity(tf_entity) - - -def state_barrier_result(state): - """Return the same state, but with a control dependency to prevent it from - being partially computed - """ - with state_barrier_context(state): - return nest.map_structure(_identity_fn, state) - - -def train_iterator(num_iterations): - """Iterator that returns an index of the current step. - This iterator runs forever if num_iterations is None - otherwise it runs for some fixed amount of steps. - """ - if num_iterations is None: - return itertools.count() - else: - return xrange(num_iterations) - - -def print_op(op, msg): - """Print a string and return an op wrapped in a control dependency to make - sure it ran.""" - print_op = tf.Print(tf.constant(0), [tf.constant(0)], msg) - return tf.group(op, print_op) - - -class MultiQueueRunner(tf.train.QueueRunner): - """A QueueRunner with multiple queues """ - def __init__(self, queues, enqueue_ops): - close_op = tf.group(* [q.close() for q in queues]) - cancel_op = tf.group( - * [q.close(cancel_pending_enqueues=True) for q in queues]) - queue_closed_exception_types = (errors.OutOfRangeError,) - - enqueue_op = tf.group(*enqueue_ops, name="multi_enqueue") - - super(MultiQueueRunner, self).__init__( - queues[0], - enqueue_ops=[enqueue_op], - close_op=close_op, - cancel_op=cancel_op, - queue_closed_exception_types=queue_closed_exception_types) - - -# This function is not elegant, but I tried so many other ways to get this to -# work and this is the only one that ended up not incuring significant overhead -# or obscure tensorflow bugs. -def sample_n_per_class(dataset, samples_per_class): - """Create a new callable / dataset object that returns batches of each with - samples_per_class per label. - - Args: - dataset: fn - samples_per_class: int - Returns: - function, [] -> batch where batch is the same type as the return of - dataset(). - """ - - with tf.control_dependencies(None), tf.name_scope(None): - with tf.name_scope("queue_runner/sample_n_per_class"): - batch = dataset() - num_classes = batch.label_onehot.shape.as_list()[1] - batch_size = num_classes * samples_per_class - - flatten = nest.flatten(batch) - queues = [] - enqueue_ops = [] - capacity = samples_per_class * 20 - for i in xrange(num_classes): - queue = tf.FIFOQueue( - capacity=capacity, - shapes=[f.shape.as_list()[1:] for f in flatten], - dtypes=[f.dtype for f in flatten]) - queues.append(queue) - - idx = tf.where(tf.equal(batch.label, i)) - sub_batch = [] - to_enqueue = [] - for elem in batch: - new_e = tf.gather(elem, idx) - new_e = tf.squeeze(new_e, 1) - to_enqueue.append(new_e) - - remaining = (capacity - queue.size()) - to_add = tf.minimum(tf.shape(idx)[0], remaining) - - def _enqueue(): - return queue.enqueue_many([t[:to_add] for t in to_enqueue]) - - enqueue_op = tf.cond( - tf.equal(to_add, 0), tf.no_op, _enqueue) - enqueue_ops.append(enqueue_op) - - # This has caused many deadlocks / issues. This is some logging to at least - # shed light to what is going on. - print_lam = lambda: tf.Print(tf.constant(0.0), [q.size() for q in queues], "MultiQueueRunner queues status. Has capacity %d"%capacity) - some_percent_of_time = tf.less(tf.random_uniform([]), 0.0005) - maybe_print = tf.cond(some_percent_of_time, print_lam, lambda: tf.constant(0.0)) - with tf.control_dependencies([maybe_print]): - enqueue_ops = [tf.group(e) for e in enqueue_ops] - qr = MultiQueueRunner(queues=queues, enqueue_ops=enqueue_ops) - tf.train.add_queue_runner(qr) - - def dequeue_batch(): - with tf.name_scope("sample_n_per_batch/dequeue/"): - entries = [] - for q in queues: - entries.append(q.dequeue_many(samples_per_class)) - - flat_batch = [tf.concat(x, 0) for x in zip(*entries)] - idx = tf.random_shuffle(tf.range(batch_size)) - flat_batch = [tf.gather(f, idx, axis=0) for f in flat_batch] - return nest.pack_sequence_as(batch, flat_batch) - - return dequeue_batch - -def structure_map_multi(func, values): - all_values = [nest.flatten(v) for v in values] - rets = [] - for pair in zip(*all_values): - rets.append(func(pair)) - return nest.pack_sequence_as(values[0], rets) - -def structure_map_split(func, value): - vv = nest.flatten(value) - rets = [] - for v in vv: - rets.append(func(v)) - return [nest.pack_sequence_as(value, r) for r in zip(*rets)] - -def assign_variables(targets, values): - return tf.group(*[t.assign(v) for t,v in eqzip(targets, values)], - name="assign_variables") - - -def create_variables_in_class_scope(method): - """Force the variables constructed in this class to live in the sonnet module. - Wraps a method on a sonnet module. - - For example the following will create two different variables. - ``` - class Mod(snt.AbstractModule): - @create_variables_in_class_scope - def dynamic_thing(self, input, name): - return snt.Linear(name)(input) - mod.dynamic_thing(x, name="module_nameA") - mod.dynamic_thing(x, name="module_nameB") - # reuse - mod.dynamic_thing(y, name="module_nameA") - ``` - """ - @functools.wraps(method) - def wrapper(obj, *args, **kwargs): - def default_context_manager(reuse=None): - variable_scope = obj.variable_scope - return tf.variable_scope(variable_scope, reuse=reuse) - - variable_scope_context_manager = getattr(obj, "_enter_variable_scope", - default_context_manager) - graph = tf.get_default_graph() - - # Temporarily enter the variable scope to capture it - with variable_scope_context_manager() as tmp_variable_scope: - variable_scope = tmp_variable_scope - - with variable_scope_ops._pure_variable_scope( - variable_scope, reuse=tf.AUTO_REUSE) as pure_variable_scope: - - name_scope = variable_scope.original_name_scope - if name_scope[-1] != "/": - name_scope += "/" - - with tf.name_scope(name_scope): - sub_scope = snt_util.to_snake_case(method.__name__) - with tf.name_scope(sub_scope) as scope: - out_ops = method(obj, *args, **kwargs) - return out_ops - - return wrapper - diff --git a/research/learning_unsupervised_learning/variable_replace.py b/research/learning_unsupervised_learning/variable_replace.py deleted file mode 100644 index ebfbeadc8aba7f8a09e1392f1de8d7b33f10d43c..0000000000000000000000000000000000000000 --- a/research/learning_unsupervised_learning/variable_replace.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from __future__ import absolute_import -from __future__ import division - -import tensorflow as tf -from contextlib import contextmanager - -from tensorflow.python.ops import variable_scope - -# sanity global state to ensure non recursive. -_is_variable_replacing = [False] - -def in_variable_replace_scope(): - return _is_variable_replacing[0] - -@contextmanager -def variable_replace(replacements, no_new=True): - """ A context manager that replaces variables. - - This is a context manager that replaces all calls to - get_variable with the variable in replacements. - This function does not support recursive application. - - Args: - replacements: dict - dictionary mapping a variable to replace (the key), with - the variable one wants to replace this variable with (the value). - no_new: bool - raise an error if variables were created. - This is for sanity checking. - Raises: - ValueError: if a new variable or not all the replacements are used. - """ - # TODO(lmetz) This function is a bit scary, as it relies on monkey patching - # the call to get_variable. Ideally this can be done with variable_scope's - # custom_getter attribute, but when initially writing this that was not - # avalible. - - replacements = {k: v for k, v in replacements.items() if not k == v} - - init_vars = tf.trainable_variables() - old_get_variable = variable_scope.get_variable - old_tf_get_variable = tf.get_variable - - names_replace = {} - has_replaced_names = [] - tf.logging.vlog(2, "Trying to replace") - for k, v in replacements.items(): - tf.logging.vlog(2, k.name + " >> " + v.name) - tf.logging.vlog(2, "===") - - for k, v in replacements.items(): - strip_name = k.name.replace("/read:0", "") - strip_name = strip_name.replace(":0", "") - names_replace[strip_name] = v - # TODO(lmetz) is there a cleaner way to do this? - def new_get_variable(name, *args, **kwargs): - #print "Monkeypatch get variable run with name:", name - n = tf.get_variable_scope().name + "/" + name - #print "Monkeypatch get variable run with name:", n - if n in names_replace: - has_replaced_names.append(n) - return names_replace[n] - else: - return old_get_variable(name, *args, **kwargs) - - # perform the monkey patch - if _is_variable_replacing[0] == True: - raise ValueError("No recursive calling to variable replace allowed.") - - variable_scope.get_variable = new_get_variable - tf.get_variable = new_get_variable - - _is_variable_replacing[0] = True - - yield - - if set(has_replaced_names) != set(names_replace.keys()): - print "Didn't use all replacements" - print "replaced variables that are not requested??" - print "===" - for n in list(set(has_replaced_names) - set(names_replace.keys())): - print n - print "Missed replacing variables" - print "===" - for n in list(set(names_replace.keys()) - set(has_replaced_names)): - print n, "==>", names_replace[n].name - raise ValueError("Fix this -- see stderr") - - # undo the monkey patch - tf.get_variable = old_tf_get_variable - variable_scope.get_variable = old_get_variable - - _is_variable_replacing[0] = False - - final_vars = tf.trainable_variables() - assert set(init_vars) == set(final_vars), "trainable variables changed" diff --git a/research/lexnet_nc/README.md b/research/lexnet_nc/README.md deleted file mode 100644 index 4ecb5d39867c2ebf7280b9d19bbabb41957b9465..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/README.md +++ /dev/null @@ -1,215 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# LexNET for Noun Compound Relation Classification - -This is a [Tensorflow](http://www.tensorflow.org/) implementation of the LexNET -algorithm for classifying relationships, specifically applied to classifying the -relationships that hold between noun compounds: - -* *olive oil* is oil that is *made from* olives -* *cooking oil* which is oil that is *used for* cooking -* *motor oil* is oil that is *contained in* a motor - -The model is a supervised classifier that predicts the relationship that holds -between the constituents of a two-word noun compound using: - -1. A neural "paraphrase" of each syntactic dependency path that connects the - constituents in a large corpus. For example, given a sentence like *This fine - oil is made from first-press olives*, the dependency path is something like - `oil from POBJ> olive`. -2. The distributional information provided by the individual words; i.e., the - word embeddings of the two consituents. -3. The distributional signal provided by the compound itself; i.e., the - embedding of the noun compound in context. - -The model includes several variants: *path-based model* uses (1) alone, the -*distributional model* uses (2) alone, and the *integrated model* uses (1) and -(2). The *distributional-nc model* and the *integrated-nc* model each add (3). - -Training a model requires the following: - -1. A collection of noun compounds that have been labeled using a *relation - inventory*. The inventory describes the specific relationships that you'd - like the model to differentiate (e.g. *part of* versus *composed of* versus - *purpose*), and generally may consist of tens of classes. You can download - the dataset used in the paper from - [here](https://vered1986.github.io/papers/Tratz2011_Dataset.tar.gz). -2. A collection of word embeddings: the path-based model uses the word - embeddings as part of the path representation, and the distributional models - use the word embeddings directly as prediction features. -3. The path-based model requires a collection of syntactic dependency parses - that connect the constituents for each noun compound. To generate these, - you'll need a corpus from which to train this data; we used Wikipedia and the - [LDC GigaWord5](https://catalog.ldc.upenn.edu/LDC2011T07) corpora. - -# Contents - -The following source code is included here: - -* `learn_path_embeddings.py` is a script that trains and evaluates a path-based - model to predict a noun-compound relationship given labeled noun-compounds and - dependency parse paths. -* `learn_classifier.py` is a script that trains and evaluates a classifier based - on any combination of paths, word embeddings, and noun-compound embeddings. -* `get_indicative_paths.py` is a script that generates the most indicative - syntactic dependency paths for a particular relationship. - -Also included are utilities for preparing data for training: - -* `text_embeddings_to_binary.py` converts a text file containing word embeddings - into a binary file that is quicker to load. -* `extract_paths.py` finds all the dependency paths that connect words in a - corpus. -* `sorted_paths_to_examples.py` processes the output of `extract_paths.py` to - produce summarized training data. - -This code (in particular, the utilities used to prepare the data) differs from -the code that was used to prepare data for the paper. Notably, we used a -proprietary dependency parser instead of spaCy, which is used here. - -# Dependencies - -* [TensorFlow](http://www.tensorflow.org/): see detailed installation - instructions at that site. -* [SciKit Learn](http://scikit-learn.org/): you can probably just install this - with `pip install sklearn`. -* [SpaCy](https://spacy.io/): `pip install spacy` ought to do the trick, along - with the English model. - -# Creating the Model - -This sections described the steps necessary to create and evaluate the model -described in the paper. - -## Generate Path Data - -To begin, you need three text files: - -1. **Corpus**. This file should contain natural language sentences, written with - one sentence per line. For purposes of exposition, we'll assume that you - have English Wikipedia serialized this way in `${HOME}/data/wiki.txt`. -2. **Labeled Noun Compound Pairs**. This file contain (modfier, head, label) - tuples, tab-separated, with one per line. The *label* represented the - relationship between the head and the modifier; e.g., if `purpose` is one - your labels, you could possibly include `toothpastepurpose`. -3. **Word Embeddings**. We used the - [GloVe](https://nlp.stanford.edu/projects/glove/) word embeddings; in - particular the 6B token, 300d variant. We'll assume you have this file as - `${HOME}/data/glove.6B.300d.txt`. - -We first processed the embeddings from their text format into something that we -can load a little bit more quickly: - - ./text_embeddings_to_binary.py \ - --input ${HOME}/data/glove.6B.300d.txt \ - --output_vocab ${HOME}/data/vocab.txt \ - --output_npy ${HOME}/data/glove.6B.300d.npy - -Next, we'll extract all the dependency parse paths connecting our labeled pairs -from the corpus. This process takes a *looooong* time, but is trivially -parallelized using map-reduce if you have access to that technology. - - ./extract_paths.py \ - --corpus ${HOME}/data/wiki.txt \ - --labeled_pairs ${HOME}/data/labeled-pairs.tsv \ - --output ${HOME}/data/paths.tsv - -The file it produces (`paths.tsv`) is a tab-separated file that contains the -modifier, the head, the label, the encoded path, and the sentence from which the -path was drawn. (This last is mostly for sanity checking.) A sample row might -look something like this (where newlines would actually be tab characters): - - navy - captain - owner_emp_use - /PROPN/dobj/>::enter/VERB/ROOT/^::follow/VERB/advcl/<::in/ADP/prep/<::footstep/NOUN/pobj/<::of/ADP/prep/<::father/NOUN/pobj/<::bover/PROPN/appos/<::/PROPN/compound/< - He entered the Royal Navy following in the footsteps of his father Captain John Bover and two of his elder brothers as volunteer aboard HMS Perseus - -This file must be sorted as follows: - - sort -k1,3 -t$'\t' paths.tsv > sorted.paths.tsv - -In particular, rows with the same modifier, head, and label must appear -contiguously. - -We next create a file that contains all the relation labels from our original -labeled pairs: - - awk 'BEGIN {FS="\t"} {print $3}' < ${HOME}/data/labeled-pairs.tsv \ - | sort -u > ${HOME}/data/relations.txt - -With these in hand, we're ready to produce the train, validation, and test data: - - ./sorted_paths_to_examples.py \ - --input ${HOME}/data/sorted.paths.tsv \ - --vocab ${HOME}/data/vocab.txt \ - --relations ${HOME}/data/relations.txt \ - --splits ${HOME}/data/splits.txt \ - --output_dir ${HOME}/data - -Here, `splits.txt` is a file that indicates which "split" (train, test, or -validation) you want the pair to appear in. It should be a tab-separate file -which conatins the modifier, head, and the dataset ( `train`, `test`, or `val`) -into which the pair should be placed; e.g.,: - - tooth paste train - banana seat test - -The program will produce a separate file for each dataset split in the directory -specified by `--output_dir`. Each file is contains `tf.train.Example` protocol -buffers encoded using the `TFRecord` file format. - -## Create Path Embeddings - -Now we're ready to train the path embeddings using `learn_path_embeddings.py`: - - ./learn_path_embeddings.py \ - --train ${HOME}/data/train.tfrecs.gz \ - --val ${HOME}/data/val.tfrecs.gz \ - --text ${HOME}/data/test.tfrecs.gz \ - --embeddings ${HOME}/data/glove.6B.300d.npy - --relations ${HOME}/data/relations.txt - --output ${HOME}/data/path-embeddings \ - --logdir /tmp/learn_path_embeddings - -The path embeddings will be placed at the location specified by `--output`. - -## Train classifiers - -Train classifiers and evaluate on the validation and test data using -`train_classifiers.py` script. This shell script fragment will iterate through -each dataset, split, corpus, and model type to train and evaluate classifiers. - - LOGDIR=/tmp/learn_classifier - for DATASET in tratz/fine_grained tratz/coarse_grained ; do - for SPLIT in random lexical_head lexical_mod lexical_full ; do - for CORPUS in wiki_gigiawords ; do - for MODEL in dist dist-nc path integrated integrated-nc ; do - # Filename for the log that will contain the classifier results. - LOGFILE=$(echo "${DATASET}.${SPLIT}.${CORPUS}.${MODEL}.log" | sed -e "s,/,.,g") - python learn_classifier.py \ - --dataset_dir ~/lexnet/datasets \ - --dataset "${DATASET}" \ - --corpus "${SPLIT}/${CORPUS}" \ - --embeddings_base_path ~/lexnet/embeddings \ - --logdir ${LOGDIR} \ - --input "${MODEL}" > "${LOGDIR}/${LOGFILE}" - done - done - done - done - -The log file will contain the final performance (precision, recall, F1) on the -train, dev, and test sets, and will include a confusion matrix for each. - -# Contact - -If you have any questions, issues, or suggestions, feel free to contact either -@vered1986 or @waterson. - -If you use this code for any published research, please include the following citation: - -Olive Oil Is Made of Olives, Baby Oil Is Made for Babies: Interpreting Noun Compounds Using Paraphrases in a Neural Model. -Vered Shwartz and Chris Waterson. NAACL 2018. [link](https://arxiv.org/pdf/1803.08073.pdf). diff --git a/research/lexnet_nc/extract_paths.py b/research/lexnet_nc/extract_paths.py deleted file mode 100755 index 833eec2c1b8a176b487d4e663a737b9502b49eda..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/extract_paths.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import itertools -import sys - -import spacy -import tensorflow as tf - -tf.flags.DEFINE_string('corpus', '', 'Filename of corpus') -tf.flags.DEFINE_string('labeled_pairs', '', 'Filename of labeled pairs') -tf.flags.DEFINE_string('output', '', 'Filename of output file') -FLAGS = tf.flags.FLAGS - - -def get_path(mod_token, head_token): - """Returns the path between a modifier token and a head token.""" - # Compute the path from the root to each token. - mod_ancestors = list(reversed(list(mod_token.ancestors))) - head_ancestors = list(reversed(list(head_token.ancestors))) - - # If the paths don't start at the same place (odd!) then there is no path at - # all. - if (not mod_ancestors or not head_ancestors - or mod_ancestors[0] != head_ancestors[0]): - return None - - # Eject elements from the common path until we reach the first differing - # ancestor. - ix = 1 - while (ix < len(mod_ancestors) and ix < len(head_ancestors) - and mod_ancestors[ix] == head_ancestors[ix]): - ix += 1 - - # Construct the path. TODO: add "satellites", possibly honor sentence - # ordering between modifier and head rather than just always traversing from - # the modifier to the head? - path = ['/'.join(('', mod_token.pos_, mod_token.dep_, '>'))] - - path += ['/'.join((tok.lemma_, tok.pos_, tok.dep_, '>')) - for tok in reversed(mod_ancestors[ix:])] - - root_token = mod_ancestors[ix - 1] - path += ['/'.join((root_token.lemma_, root_token.pos_, root_token.dep_, '^'))] - - path += ['/'.join((tok.lemma_, tok.pos_, tok.dep_, '<')) - for tok in head_ancestors[ix:]] - - path += ['/'.join(('', head_token.pos_, head_token.dep_, '<'))] - - return '::'.join(path) - - -def main(_): - nlp = spacy.load('en_core_web_sm') - - # Grab the set of labeled pairs for which we wish to collect paths. - with tf.gfile.GFile(FLAGS.labeled_pairs) as fh: - parts = (l.decode('utf-8').split('\t') for l in fh.read().splitlines()) - labeled_pairs = {(mod, head): rel for mod, head, rel in parts} - - # Create a mapping from each head to the modifiers that are used with it. - mods_for_head = { - head: set(hm[1] for hm in head_mods) - for head, head_mods in itertools.groupby( - sorted((head, mod) for (mod, head) in labeled_pairs.iterkeys()), - lambda (head, mod): head)} - - # Collect all the heads that we know about. - heads = set(mods_for_head.keys()) - - # For each sentence that contains a (head, modifier) pair that's in our set, - # emit the dependency path that connects the pair. - out_fh = sys.stdout if not FLAGS.output else tf.gfile.GFile(FLAGS.output, 'w') - in_fh = sys.stdin if not FLAGS.corpus else tf.gfile.GFile(FLAGS.corpus) - - num_paths = 0 - for line, sen in enumerate(in_fh, start=1): - if line % 100 == 0: - print('\rProcessing line %d: %d paths' % (line, num_paths), - end='', file=sys.stderr) - - sen = sen.decode('utf-8').strip() - doc = nlp(sen) - - for head_token in doc: - head_text = head_token.text.lower() - if head_text in heads: - mods = mods_for_head[head_text] - for mod_token in doc: - mod_text = mod_token.text.lower() - if mod_text in mods: - path = get_path(mod_token, head_token) - if path: - label = labeled_pairs[(mod_text, head_text)] - line = '\t'.join((mod_text, head_text, label, path, sen)) - print(line.encode('utf-8'), file=out_fh) - num_paths += 1 - - out_fh.close() - -if __name__ == '__main__': - tf.app.run() diff --git a/research/lexnet_nc/get_indicative_paths.py b/research/lexnet_nc/get_indicative_paths.py deleted file mode 100755 index f8b34cca221a07c0b633024b71f082b8f61b3a45..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/get_indicative_paths.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Extracts paths that are indicative of each relation.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import tensorflow as tf - -from . import path_model -from . import lexnet_common - -tf.flags.DEFINE_string( - 'dataset_dir', 'datasets', - 'Dataset base directory') - -tf.flags.DEFINE_string( - 'dataset', - 'tratz/fine_grained', - 'Subdirectory containing the corpus directories: ' - 'subdirectory of dataset_dir') - -tf.flags.DEFINE_string( - 'corpus', 'random/wiki', - 'Subdirectory containing the corpus and split: ' - 'subdirectory of dataset_dir/dataset') - -tf.flags.DEFINE_string( - 'embeddings_base_path', 'embeddings', - 'Embeddings base directory') - -tf.flags.DEFINE_string( - 'logdir', 'logdir', - 'Directory of model output files') - -tf.flags.DEFINE_integer( - 'top_k', 20, 'Number of top paths to extract') - -tf.flags.DEFINE_float( - 'threshold', 0.8, 'Threshold above which to consider paths as indicative') - -FLAGS = tf.flags.FLAGS - - -def main(_): - hparams = path_model.PathBasedModel.default_hparams() - - # First things first. Load the path data. - path_embeddings_file = 'path_embeddings/{dataset}/{corpus}'.format( - dataset=FLAGS.dataset, - corpus=FLAGS.corpus) - - path_dim = (hparams.lemma_dim + hparams.pos_dim + - hparams.dep_dim + hparams.dir_dim) - - path_embeddings, path_to_index = path_model.load_path_embeddings( - os.path.join(FLAGS.embeddings_base_path, path_embeddings_file), - path_dim) - - # Load and count the classes so we can correctly instantiate the model. - classes_filename = os.path.join( - FLAGS.dataset_dir, FLAGS.dataset, 'classes.txt') - - with open(classes_filename) as f_in: - classes = f_in.read().splitlines() - - hparams.num_classes = len(classes) - - # We need the word embeddings to instantiate the model, too. - print('Loading word embeddings...') - lemma_embeddings = lexnet_common.load_word_embeddings( - FLAGS.embeddings_base_path, hparams.lemma_embeddings_file) - - # Instantiate the model. - with tf.Graph().as_default(): - with tf.variable_scope('lexnet'): - instance = tf.placeholder(dtype=tf.string) - model = path_model.PathBasedModel( - hparams, lemma_embeddings, instance) - - with tf.Session() as session: - model_dir = '{logdir}/results/{dataset}/path/{corpus}'.format( - logdir=FLAGS.logdir, - dataset=FLAGS.dataset, - corpus=FLAGS.corpus) - - saver = tf.train.Saver() - saver.restore(session, os.path.join(model_dir, 'best.ckpt')) - - path_model.get_indicative_paths( - model, session, path_to_index, path_embeddings, classes, - model_dir, FLAGS.top_k, FLAGS.threshold) - -if __name__ == '__main__': - tf.app.run() diff --git a/research/lexnet_nc/learn_classifier.py b/research/lexnet_nc/learn_classifier.py deleted file mode 100755 index ec284029535609ffd2cc0f2f5cddb9b87954aa81..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/learn_classifier.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env python -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Trains the integrated LexNET classifier.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import lexnet_common -import lexnet_model -import path_model -from sklearn import metrics -import tensorflow as tf - -tf.flags.DEFINE_string( - 'dataset_dir', 'datasets', - 'Dataset base directory') - -tf.flags.DEFINE_string( - 'dataset', 'tratz/fine_grained', - 'Subdirectory containing the corpus directories: ' - 'subdirectory of dataset_dir') - -tf.flags.DEFINE_string( - 'corpus', 'wiki/random', - 'Subdirectory containing the corpus and split: ' - 'subdirectory of dataset_dir/dataset') - -tf.flags.DEFINE_string( - 'embeddings_base_path', 'embeddings', - 'Embeddings base directory') - -tf.flags.DEFINE_string( - 'logdir', 'logdir', - 'Directory of model output files') - -tf.flags.DEFINE_string('hparams', '', 'Hyper-parameters') - -tf.flags.DEFINE_string( - 'input', 'integrated', - 'The model(dist/dist-nc/path/integrated/integrated-nc') - -FLAGS = tf.flags.FLAGS - - -def main(_): - # Pick up any one-off hyper-parameters. - hparams = lexnet_model.LexNETModel.default_hparams() - hparams.corpus = FLAGS.corpus - hparams.input = FLAGS.input - hparams.path_embeddings_file = 'path_embeddings/%s/%s' % ( - FLAGS.dataset, FLAGS.corpus) - - input_dir = hparams.input if hparams.input != 'path' else 'path_classifier' - - # Set the number of classes - classes_filename = os.path.join( - FLAGS.dataset_dir, FLAGS.dataset, 'classes.txt') - with open(classes_filename) as f_in: - classes = f_in.read().splitlines() - - hparams.num_classes = len(classes) - print('Model will predict into %d classes' % hparams.num_classes) - - # Get the datasets - train_set, val_set, test_set = ( - os.path.join( - FLAGS.dataset_dir, FLAGS.dataset, FLAGS.corpus, - filename + '.tfrecs.gz') - for filename in ['train', 'val', 'test']) - - print('Running with hyper-parameters: {}'.format(hparams)) - - # Load the instances - print('Loading instances...') - opts = tf.python_io.TFRecordOptions( - compression_type=tf.python_io.TFRecordCompressionType.GZIP) - train_instances = list(tf.python_io.tf_record_iterator(train_set, opts)) - val_instances = list(tf.python_io.tf_record_iterator(val_set, opts)) - test_instances = list(tf.python_io.tf_record_iterator(test_set, opts)) - - # Load the word embeddings - print('Loading word embeddings...') - relata_embeddings, path_embeddings, nc_embeddings, path_to_index = ( - None, None, None, None) - if hparams.input in ['dist', 'dist-nc', 'integrated', 'integrated-nc']: - relata_embeddings = lexnet_common.load_word_embeddings( - FLAGS.embeddings_base_path, hparams.relata_embeddings_file) - - if hparams.input in ['path', 'integrated', 'integrated-nc']: - path_embeddings, path_to_index = path_model.load_path_embeddings( - os.path.join(FLAGS.embeddings_base_path, hparams.path_embeddings_file), - hparams.path_dim) - - if hparams.input in ['dist-nc', 'integrated-nc']: - nc_embeddings = lexnet_common.load_word_embeddings( - FLAGS.embeddings_base_path, hparams.nc_embeddings_file) - - # Define the graph and the model - with tf.Graph().as_default(): - model = lexnet_model.LexNETModel( - hparams, relata_embeddings, path_embeddings, - nc_embeddings, path_to_index) - - # Initialize a session and start training - session = tf.Session() - session.run(tf.global_variables_initializer()) - - # Initalize the path mapping - if hparams.input in ['path', 'integrated', 'integrated-nc']: - session.run(tf.tables_initializer()) - session.run(model.initialize_path_op, { - model.path_initial_value_t: path_embeddings - }) - - # Initialize the NC embeddings - if hparams.input in ['dist-nc', 'integrated-nc']: - session.run(model.initialize_nc_op, { - model.nc_initial_value_t: nc_embeddings - }) - - # Load the labels - print('Loading labels...') - train_labels = model.load_labels(session, train_instances) - val_labels = model.load_labels(session, val_instances) - test_labels = model.load_labels(session, test_instances) - - save_path = '{logdir}/results/{dataset}/{input}/{corpus}'.format( - logdir=FLAGS.logdir, dataset=FLAGS.dataset, - corpus=model.hparams.corpus, input=input_dir) - - if not os.path.exists(save_path): - os.makedirs(save_path) - - # Train the model - print('Training the model...') - model.fit(session, train_instances, epoch_completed, - val_instances, val_labels, save_path) - - # Print the best performance on the validation set - print('Best performance on the validation set: F1=%.3f' % - epoch_completed.best_f1) - - # Evaluate on the train and validation sets - lexnet_common.full_evaluation(model, session, train_instances, train_labels, - 'Train', classes) - lexnet_common.full_evaluation(model, session, val_instances, val_labels, - 'Validation', classes) - test_predictions = lexnet_common.full_evaluation( - model, session, test_instances, test_labels, 'Test', classes) - - # Write the test predictions to a file - predictions_file = os.path.join(save_path, 'test_predictions.tsv') - print('Saving test predictions to %s' % save_path) - test_pairs = model.load_pairs(session, test_instances) - lexnet_common.write_predictions(test_pairs, test_labels, test_predictions, - classes, predictions_file) - - -def epoch_completed(model, session, epoch, epoch_loss, - val_instances, val_labels, save_path): - """Runs every time an epoch completes. - - Print the performance on the validation set, and update the saved model if - its performance is better on the previous ones. If the performance dropped, - tell the training to stop. - - Args: - model: The currently trained path-based model. - session: The current TensorFlow session. - epoch: The epoch number. - epoch_loss: The current epoch loss. - val_instances: The validation set instances (evaluation between epochs). - val_labels: The validation set labels (for evaluation between epochs). - save_path: Where to save the model. - - Returns: - whether the training should stop. - """ - stop_training = False - - # Evaluate on the validation set - val_pred = model.predict(session, val_instances) - precision, recall, f1, _ = metrics.precision_recall_fscore_support( - val_labels, val_pred, average='weighted') - print( - 'Epoch: %d/%d, Loss: %f, validation set: P: %.3f, R: %.3f, F1: %.3f\n' % ( - epoch + 1, model.hparams.num_epochs, epoch_loss, - precision, recall, f1)) - - # If the F1 is much smaller than the previous one, stop training. Else, if - # it's bigger, save the model. - if f1 < epoch_completed.best_f1 - 0.08: - stop_training = True - - if f1 > epoch_completed.best_f1: - saver = tf.train.Saver() - checkpoint_filename = os.path.join(save_path, 'best.ckpt') - print('Saving model in: %s' % checkpoint_filename) - saver.save(session, checkpoint_filename) - print('Model saved in file: %s' % checkpoint_filename) - epoch_completed.best_f1 = f1 - - return stop_training - -epoch_completed.best_f1 = 0 - -if __name__ == '__main__': - tf.app.run(main) diff --git a/research/lexnet_nc/learn_path_embeddings.py b/research/lexnet_nc/learn_path_embeddings.py deleted file mode 100755 index 480378f4aa010ee27f0387685bac488cedbb2ab9..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/learn_path_embeddings.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Trains the LexNET path-based model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import lexnet_common -import path_model -from sklearn import metrics -import tensorflow as tf - -tf.flags.DEFINE_string('train', '', 'training dataset, tfrecs') -tf.flags.DEFINE_string('val', '', 'validation dataset, tfrecs') -tf.flags.DEFINE_string('test', '', 'test dataset, tfrecs') -tf.flags.DEFINE_string('embeddings', '', 'embeddings, npy') -tf.flags.DEFINE_string('relations', '', 'file containing relation labels') -tf.flags.DEFINE_string('output_dir', '', 'output directory for path embeddings') -tf.flags.DEFINE_string('logdir', '', 'directory for model training') -FLAGS = tf.flags.FLAGS - - -def main(_): - # Pick up any one-off hyper-parameters. - hparams = path_model.PathBasedModel.default_hparams() - - with open(FLAGS.relations) as fh: - relations = fh.read().splitlines() - - hparams.num_classes = len(relations) - print('Model will predict into %d classes' % hparams.num_classes) - - print('Running with hyper-parameters: {}'.format(hparams)) - - # Load the instances - print('Loading instances...') - opts = tf.python_io.TFRecordOptions( - compression_type=tf.python_io.TFRecordCompressionType.GZIP) - - train_instances = list(tf.python_io.tf_record_iterator(FLAGS.train, opts)) - val_instances = list(tf.python_io.tf_record_iterator(FLAGS.val, opts)) - test_instances = list(tf.python_io.tf_record_iterator(FLAGS.test, opts)) - - # Load the word embeddings - print('Loading word embeddings...') - lemma_embeddings = lexnet_common.load_word_embeddings(FLAGS.embeddings) - - # Define the graph and the model - with tf.Graph().as_default(): - with tf.variable_scope('lexnet'): - options = tf.python_io.TFRecordOptions( - compression_type=tf.python_io.TFRecordCompressionType.GZIP) - reader = tf.TFRecordReader(options=options) - _, train_instance = reader.read( - tf.train.string_input_producer([FLAGS.train])) - shuffled_train_instance = tf.train.shuffle_batch( - [train_instance], - batch_size=1, - num_threads=1, - capacity=len(train_instances), - min_after_dequeue=100, - )[0] - - train_model = path_model.PathBasedModel( - hparams, lemma_embeddings, shuffled_train_instance) - - with tf.variable_scope('lexnet', reuse=True): - val_instance = tf.placeholder(dtype=tf.string) - val_model = path_model.PathBasedModel( - hparams, lemma_embeddings, val_instance) - - # Initialize a session and start training - best_model_saver = tf.train.Saver() - f1_t = tf.placeholder(tf.float32) - best_f1_t = tf.Variable(0.0, trainable=False, name='best_f1') - assign_best_f1_op = tf.assign(best_f1_t, f1_t) - - supervisor = tf.train.Supervisor( - logdir=FLAGS.logdir, - global_step=train_model.global_step) - - with supervisor.managed_session() as session: - # Load the labels - print('Loading labels...') - val_labels = train_model.load_labels(session, val_instances) - - # Train the model - print('Training the model...') - - while True: - step = session.run(train_model.global_step) - epoch = (step + len(train_instances) - 1) // len(train_instances) - if epoch > hparams.num_epochs: - break - - print('Starting epoch %d (step %d)...' % (1 + epoch, step)) - - epoch_loss = train_model.run_one_epoch(session, len(train_instances)) - - best_f1 = session.run(best_f1_t) - f1 = epoch_completed(val_model, session, epoch, epoch_loss, - val_instances, val_labels, best_model_saver, - FLAGS.logdir, best_f1) - - if f1 > best_f1: - session.run(assign_best_f1_op, {f1_t: f1}) - - if f1 < best_f1 - 0.08: - tf.logging.info('Stopping training after %d epochs.\n' % epoch) - break - - # Print the best performance on the validation set - best_f1 = session.run(best_f1_t) - print('Best performance on the validation set: F1=%.3f' % best_f1) - - # Save the path embeddings - print('Computing the path embeddings...') - instances = train_instances + val_instances + test_instances - path_index, path_vectors = path_model.compute_path_embeddings( - val_model, session, instances) - - if not os.path.exists(path_emb_dir): - os.makedirs(path_emb_dir) - - path_model.save_path_embeddings( - val_model, path_vectors, path_index, FLAGS.output_dir) - - -def epoch_completed(model, session, epoch, epoch_loss, - val_instances, val_labels, saver, save_path, best_f1): - """Runs every time an epoch completes. - - Print the performance on the validation set, and update the saved model if - its performance is better on the previous ones. If the performance dropped, - tell the training to stop. - - Args: - model: The currently trained path-based model. - session: The current TensorFlow session. - epoch: The epoch number. - epoch_loss: The current epoch loss. - val_instances: The validation set instances (evaluation between epochs). - val_labels: The validation set labels (for evaluation between epochs). - saver: tf.Saver object - save_path: Where to save the model. - best_f1: the best F1 achieved so far. - - Returns: - The F1 achieved on the training set. - """ - # Evaluate on the validation set - val_pred = model.predict(session, val_instances) - precision, recall, f1, _ = metrics.precision_recall_fscore_support( - val_labels, val_pred, average='weighted') - print( - 'Epoch: %d/%d, Loss: %f, validation set: P: %.3f, R: %.3f, F1: %.3f\n' % ( - epoch + 1, model.hparams.num_epochs, epoch_loss, - precision, recall, f1)) - - if f1 > best_f1: - save_filename = os.path.join(save_path, 'best.ckpt') - print('Saving model in: %s' % save_filename) - saver.save(session, save_filename) - print('Model saved in file: %s' % save_filename) - - return f1 - - -if __name__ == '__main__': - tf.app.run(main) diff --git a/research/lexnet_nc/lexnet_common.py b/research/lexnet_nc/lexnet_common.py deleted file mode 100644 index a2e8a104d00c1c2f90731f4045c3c8e69e370dbf..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/lexnet_common.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Common stuff used with LexNET.""" -# pylint: disable=bad-whitespace - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np -from sklearn import metrics -import tensorflow as tf - -# Part of speech tags used in the paths. -POSTAGS = [ - 'PAD', 'VERB', 'CONJ', 'NOUN', 'PUNCT', - 'ADP', 'ADJ', 'DET', 'ADV', 'PART', - 'NUM', 'X', 'INTJ', 'SYM', -] - -POSTAG_TO_ID = {tag: tid for tid, tag in enumerate(POSTAGS)} - -# Dependency labels used in the paths. -DEPLABELS = [ - 'PAD', 'UNK', 'ROOT', 'abbrev', 'acomp', 'advcl', - 'advmod', 'agent', 'amod', 'appos', 'attr', 'aux', - 'auxpass', 'cc', 'ccomp', 'complm', 'conj', 'cop', - 'csubj', 'csubjpass', 'dep', 'det', 'dobj', 'expl', - 'infmod', 'iobj', 'mark', 'mwe', 'nc', 'neg', - 'nn', 'npadvmod', 'nsubj', 'nsubjpass', 'num', 'number', - 'p', 'parataxis', 'partmod', 'pcomp', 'pobj', 'poss', - 'preconj', 'predet', 'prep', 'prepc', 'prt', 'ps', - 'purpcl', 'quantmod', 'rcmod', 'ref', 'rel', 'suffix', - 'title', 'tmod', 'xcomp', 'xsubj', -] - -DEPLABEL_TO_ID = {label: lid for lid, label in enumerate(DEPLABELS)} - -# Direction codes used in the paths. -DIRS = '_^V<>' -DIR_TO_ID = {dir: did for did, dir in enumerate(DIRS)} - - -def load_word_embeddings(embedding_filename): - """Loads pretrained word embeddings from a binary file and returns the matrix. - - Adds the , , , and tokens to the beginning of the vocab. - - Args: - embedding_filename: filename of the binary NPY data - - Returns: - The word embeddings matrix - """ - embeddings = np.load(embedding_filename) - dim = embeddings.shape[1] - - # Four initially random vectors for the special tokens: , , , - special_embeddings = np.random.normal(0, 0.1, (4, dim)) - embeddings = np.vstack((special_embeddings, embeddings)) - embeddings = embeddings.astype(np.float32) - - return embeddings - - -def full_evaluation(model, session, instances, labels, set_name, classes): - """Prints a full evaluation on the current set. - - Performance (recall, precision and F1), classification report (per - class performance), and confusion matrix). - - Args: - model: The currently trained path-based model. - session: The current TensorFlow session. - instances: The current set instances. - labels: The current set labels. - set_name: The current set name (train/validation/test). - classes: The class label names. - - Returns: - The model's prediction for the given instances. - """ - - # Predict the labels - pred = model.predict(session, instances) - - # Print the performance - precision, recall, f1, _ = metrics.precision_recall_fscore_support( - labels, pred, average='weighted') - - print('%s set: Precision: %.3f, Recall: %.3f, F1: %.3f' % ( - set_name, precision, recall, f1)) - - # Print a classification report - print('%s classification report:' % set_name) - print(metrics.classification_report(labels, pred, target_names=classes)) - - # Print the confusion matrix - print('%s confusion matrix:' % set_name) - cm = metrics.confusion_matrix(labels, pred, labels=range(len(classes))) - cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100 - print_cm(cm, labels=classes) - return pred - - -def print_cm(cm, labels): - """Pretty print for confusion matrices. - - From: https://gist.github.com/zachguo/10296432. - - Args: - cm: The confusion matrix. - labels: The class names. - """ - columnwidth = 10 - empty_cell = ' ' * columnwidth - short_labels = [label[:12].rjust(10, ' ') for label in labels] - - # Print header - header = empty_cell + ' ' - header += ''.join([' %{0}s '.format(columnwidth) % label - for label in short_labels]) - - print(header) - - # Print rows - for i, label1 in enumerate(short_labels): - row = '%{0}s '.format(columnwidth) % label1[:10] - for j in range(len(short_labels)): - value = int(cm[i, j]) if not np.isnan(cm[i, j]) else 0 - cell = ' %{0}d '.format(10) % value - row += cell + ' ' - print(row) - - -def load_all_labels(records): - """Reads TensorFlow examples from a RecordReader and returns only the labels. - - Args: - records: a record list with TensorFlow examples. - - Returns: - The labels - """ - curr_features = tf.parse_example(records, { - 'rel_id': tf.FixedLenFeature([1], dtype=tf.int64), - }) - - labels = tf.squeeze(curr_features['rel_id'], [-1]) - return labels - - -def load_all_pairs(records): - """Reads TensorFlow examples from a RecordReader and returns the word pairs. - - Args: - records: a record list with TensorFlow examples. - - Returns: - The word pairs - """ - curr_features = tf.parse_example(records, { - 'pair': tf.FixedLenFeature([1], dtype=tf.string) - }) - - word_pairs = curr_features['pair'] - return word_pairs - - -def write_predictions(pairs, labels, predictions, classes, predictions_file): - """Write the predictions to a file. - - Args: - pairs: the word pairs (list of tuple of two strings). - labels: the gold-standard labels for these pairs (array of rel ID). - predictions: the predicted labels for these pairs (array of rel ID). - classes: a list of relation names. - predictions_file: where to save the predictions. - """ - with open(predictions_file, 'w') as f_out: - for pair, label, pred in zip(pairs, labels, predictions): - w1, w2 = pair - f_out.write('\t'.join([w1, w2, classes[label], classes[pred]]) + '\n') diff --git a/research/lexnet_nc/lexnet_model.py b/research/lexnet_nc/lexnet_model.py deleted file mode 100644 index b0f16b030b3bb3fee68b91122bcd03226ffcfa4a..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/lexnet_model.py +++ /dev/null @@ -1,438 +0,0 @@ -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""The integrated LexNET model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import lexnet_common -import numpy as np -import tensorflow as tf -from six.moves import xrange - - -class LexNETModel(object): - """The LexNET model for classifying relationships between noun compounds.""" - - @classmethod - def default_hparams(cls): - """Returns the default hyper-parameters.""" - return tf.contrib.training.HParams( - batch_size=10, - num_classes=37, - num_epochs=30, - input_keep_prob=0.9, - input='integrated', # dist/ dist-nc/ path/ integrated/ integrated-nc - learn_relata=False, - corpus='wiki_gigawords', - random_seed=133, # zero means no random seed - relata_embeddings_file='glove/glove.6B.300d.bin', - nc_embeddings_file='nc_glove/vecs.6B.300d.bin', - path_embeddings_file='path_embeddings/tratz/fine_grained/wiki', - hidden_layers=1, - path_dim=60) - - def __init__(self, hparams, relata_embeddings, path_embeddings, nc_embeddings, - path_to_index): - """Initialize the LexNET classifier. - - Args: - hparams: the hyper-parameters. - relata_embeddings: word embeddings for the distributional component. - path_embeddings: embeddings for the paths. - nc_embeddings: noun compound embeddings. - path_to_index: a mapping from string path to an index in the path - embeddings matrix. - """ - self.hparams = hparams - - self.path_embeddings = path_embeddings - self.relata_embeddings = relata_embeddings - self.nc_embeddings = nc_embeddings - - self.vocab_size, self.relata_dim = 0, 0 - self.path_to_index = None - self.path_dim = 0 - - # Set the random seed - if hparams.random_seed > 0: - tf.set_random_seed(hparams.random_seed) - - # Get the vocabulary size and relata dim - if self.hparams.input in ['dist', 'dist-nc', 'integrated', 'integrated-nc']: - self.vocab_size, self.relata_dim = self.relata_embeddings.shape - - # Create the mapping from string path to an index in the embeddings matrix - if self.hparams.input in ['path', 'integrated', 'integrated-nc']: - self.path_to_index = tf.contrib.lookup.HashTable( - tf.contrib.lookup.KeyValueTensorInitializer( - tf.constant(path_to_index.keys()), - tf.constant(path_to_index.values()), - key_dtype=tf.string, value_dtype=tf.int32), 0) - - self.path_dim = self.path_embeddings.shape[1] - - # Create the network - self.__create_computation_graph__() - - def __create_computation_graph__(self): - """Initialize the model and define the graph.""" - network_input = 0 - - # Define the network inputs - # Distributional x and y - if self.hparams.input in ['dist', 'dist-nc', 'integrated', 'integrated-nc']: - network_input += 2 * self.relata_dim - self.relata_lookup = tf.get_variable( - 'relata_lookup', - initializer=self.relata_embeddings, - dtype=tf.float32, - trainable=self.hparams.learn_relata) - - # Path-based - if self.hparams.input in ['path', 'integrated', 'integrated-nc']: - network_input += self.path_dim - - self.path_initial_value_t = tf.placeholder(tf.float32, None) - - self.path_lookup = tf.get_variable( - name='path_lookup', - dtype=tf.float32, - trainable=False, - shape=self.path_embeddings.shape) - - self.initialize_path_op = tf.assign( - self.path_lookup, self.path_initial_value_t, validate_shape=False) - - # Distributional noun compound - if self.hparams.input in ['dist-nc', 'integrated-nc']: - network_input += self.relata_dim - - self.nc_initial_value_t = tf.placeholder(tf.float32, None) - - self.nc_lookup = tf.get_variable( - name='nc_lookup', - dtype=tf.float32, - trainable=False, - shape=self.nc_embeddings.shape) - - self.initialize_nc_op = tf.assign( - self.nc_lookup, self.nc_initial_value_t, validate_shape=False) - - hidden_dim = network_input // 2 - - # Define the MLP - if self.hparams.hidden_layers == 0: - self.weights1 = tf.get_variable( - 'W1', - shape=[network_input, self.hparams.num_classes], - dtype=tf.float32) - self.bias1 = tf.get_variable( - 'b1', - shape=[self.hparams.num_classes], - dtype=tf.float32) - - elif self.hparams.hidden_layers == 1: - - self.weights1 = tf.get_variable( - 'W1', - shape=[network_input, hidden_dim], - dtype=tf.float32) - self.bias1 = tf.get_variable( - 'b1', - shape=[hidden_dim], - dtype=tf.float32) - - self.weights2 = tf.get_variable( - 'W2', - shape=[hidden_dim, self.hparams.num_classes], - dtype=tf.float32) - self.bias2 = tf.get_variable( - 'b2', - shape=[self.hparams.num_classes], - dtype=tf.float32) - - else: - raise ValueError('Only 0 or 1 hidden layers are supported') - - # Define the variables - self.instances = tf.placeholder(dtype=tf.string, - shape=[self.hparams.batch_size]) - - (self.x_embedding_id, - self.y_embedding_id, - self.nc_embedding_id, - self.path_embedding_id, - self.path_counts, - self.labels) = parse_tensorflow_examples( - self.instances, self.hparams.batch_size, self.path_to_index) - - # Create the MLP - self.__mlp__() - - self.instances_to_load = tf.placeholder(dtype=tf.string, shape=[None]) - self.labels_to_load = lexnet_common.load_all_labels(self.instances_to_load) - self.pairs_to_load = lexnet_common.load_all_pairs(self.instances_to_load) - - def load_labels(self, session, instances): - """Loads the labels for these instances. - - Args: - session: The current TensorFlow session, - instances: The instances for which to load the labels. - - Returns: - the labels of these instances. - """ - return session.run(self.labels_to_load, - feed_dict={self.instances_to_load: instances}) - - def load_pairs(self, session, instances): - """Loads the word pairs for these instances. - - Args: - session: The current TensorFlow session, - instances: The instances for which to load the labels. - - Returns: - the word pairs of these instances. - """ - word_pairs = session.run(self.pairs_to_load, - feed_dict={self.instances_to_load: instances}) - return [pair[0].split('::') for pair in word_pairs] - - def __train_single_batch__(self, session, batch_instances): - """Train a single batch. - - Args: - session: The current TensorFlow session. - batch_instances: TensorFlow examples containing the training intances - - Returns: - The cost for the current batch. - """ - cost, _ = session.run([self.cost, self.train_op], - feed_dict={self.instances: batch_instances}) - - return cost - - def fit(self, session, inputs, on_epoch_completed, val_instances, val_labels, - save_path): - """Train the model. - - Args: - session: The current TensorFlow session. - inputs: - on_epoch_completed: A method to call after each epoch. - val_instances: The validation set instances (evaluation between epochs). - val_labels: The validation set labels (for evaluation between epochs). - save_path: Where to save the model. - """ - for epoch in range(self.hparams.num_epochs): - - losses = [] - epoch_indices = list(np.random.permutation(len(inputs))) - - # If the number of instances doesn't divide by batch_size, enlarge it - # by duplicating training examples - mod = len(epoch_indices) % self.hparams.batch_size - if mod > 0: - epoch_indices.extend([np.random.randint(0, high=len(inputs))] * mod) - - # Define the batches - n_batches = len(epoch_indices) // self.hparams.batch_size - - for minibatch in range(n_batches): - - batch_indices = epoch_indices[minibatch * self.hparams.batch_size:( - minibatch + 1) * self.hparams.batch_size] - batch_instances = [inputs[i] for i in batch_indices] - - loss = self.__train_single_batch__(session, batch_instances) - losses.append(loss) - - epoch_loss = np.nanmean(losses) - - if on_epoch_completed: - should_stop = on_epoch_completed(self, session, epoch, epoch_loss, - val_instances, val_labels, save_path) - if should_stop: - print('Stopping training after %d epochs.' % epoch) - return - - def predict(self, session, inputs): - """Predict the classification of the test set. - - Args: - session: The current TensorFlow session. - inputs: the train paths, x, y and/or nc vectors - - Returns: - The test predictions. - """ - predictions, _ = zip(*self.predict_with_score(session, inputs)) - return np.array(predictions) - - def predict_with_score(self, session, inputs): - """Predict the classification of the test set. - - Args: - session: The current TensorFlow session. - inputs: the test paths, x, y and/or nc vectors - - Returns: - The test predictions along with their scores. - """ - test_pred = [0] * len(inputs) - - for chunk in xrange(0, len(test_pred), self.hparams.batch_size): - - # Initialize the variables with the current batch data - batch_indices = list( - range(chunk, min(chunk + self.hparams.batch_size, len(test_pred)))) - - # If the batch is too small, add a few other examples - if len(batch_indices) < self.hparams.batch_size: - batch_indices += [0] * (self.hparams.batch_size-len(batch_indices)) - - batch_instances = [inputs[i] for i in batch_indices] - - predictions, scores = session.run( - [self.predictions, self.scores], - feed_dict={self.instances: batch_instances}) - - for index_in_batch, index_in_dataset in enumerate(batch_indices): - prediction = predictions[index_in_batch] - score = scores[index_in_batch][prediction] - test_pred[index_in_dataset] = (prediction, score) - - return test_pred - - def __mlp__(self): - """Performs the MLP operations. - - Returns: the prediction object to be computed in a Session - """ - # Define the operations - - # Network input - vec_inputs = [] - - # Distributional component - if self.hparams.input in ['dist', 'dist-nc', 'integrated', 'integrated-nc']: - for emb_id in [self.x_embedding_id, self.y_embedding_id]: - vec_inputs.append(tf.nn.embedding_lookup(self.relata_lookup, emb_id)) - - # Noun compound component - if self.hparams.input in ['dist-nc', 'integrated-nc']: - vec = tf.nn.embedding_lookup(self.nc_lookup, self.nc_embedding_id) - vec_inputs.append(vec) - - # Path-based component - if self.hparams.input in ['path', 'integrated', 'integrated-nc']: - - # Get the current paths for each batch instance - self.path_embeddings = tf.nn.embedding_lookup(self.path_lookup, - self.path_embedding_id) - - # self.path_embeddings is of shape - # [batch_size, max_path_per_instance, output_dim] - # We need to multiply it by path counts - # ([batch_size, max_path_per_instance]). - # Start by duplicating path_counts along the output_dim axis. - self.path_freq = tf.tile(tf.expand_dims(self.path_counts, -1), - [1, 1, self.path_dim]) - - # Compute the averaged path vector for each instance. - # First, multiply the path embeddings and frequencies element-wise. - self.weighted = tf.multiply(self.path_freq, self.path_embeddings) - - # Second, take the sum to get a tensor of shape [batch_size, output_dim]. - self.pair_path_embeddings = tf.reduce_sum(self.weighted, 1) - - # Finally, divide by the total number of paths. - # The number of paths for each pair has a shape [batch_size, 1], - # We duplicate it output_dim times along the second axis. - self.num_paths = tf.clip_by_value( - tf.reduce_sum(self.path_counts, 1), 1, np.inf) - self.num_paths = tf.tile(tf.expand_dims(self.num_paths, -1), - [1, self.path_dim]) - - # And finally, divide pair_path_embeddings by num_paths element-wise. - self.pair_path_embeddings = tf.div( - self.pair_path_embeddings, self.num_paths) - vec_inputs.append(self.pair_path_embeddings) - - # Concatenate the inputs and feed to the MLP - self.input_vec = tf.nn.dropout( - tf.concat(vec_inputs, 1), - keep_prob=self.hparams.input_keep_prob) - - h = tf.matmul(self.input_vec, self.weights1) - self.output = h - - if self.hparams.hidden_layers == 1: - self.output = tf.matmul(tf.nn.tanh(h), self.weights2) - - self.scores = self.output - self.predictions = tf.argmax(self.scores, axis=1) - - # Define the loss function and the optimization algorithm - self.cross_entropies = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=self.scores, labels=self.labels) - self.cost = tf.reduce_sum(self.cross_entropies, name='cost') - self.global_step = tf.Variable(0, name='global_step', trainable=False) - self.optimizer = tf.train.AdamOptimizer() - self.train_op = self.optimizer.minimize( - self.cost, global_step=self.global_step) - - -def parse_tensorflow_examples(record, batch_size, path_to_index): - """Reads TensorFlow examples from a RecordReader. - - Args: - record: a record with TensorFlow examples. - batch_size: the number of instances in a minibatch - path_to_index: mapping from string path to index in the embeddings matrix. - - Returns: - The word embeddings IDs, paths and counts - """ - features = tf.parse_example( - record, { - 'x_embedding_id': tf.FixedLenFeature([1], dtype=tf.int64), - 'y_embedding_id': tf.FixedLenFeature([1], dtype=tf.int64), - 'nc_embedding_id': tf.FixedLenFeature([1], dtype=tf.int64), - 'reprs': tf.FixedLenSequenceFeature( - shape=(), dtype=tf.string, allow_missing=True), - 'counts': tf.FixedLenSequenceFeature( - shape=(), dtype=tf.int64, allow_missing=True), - 'rel_id': tf.FixedLenFeature([1], dtype=tf.int64) - }) - - x_embedding_id = tf.squeeze(features['x_embedding_id'], [-1]) - y_embedding_id = tf.squeeze(features['y_embedding_id'], [-1]) - nc_embedding_id = tf.squeeze(features['nc_embedding_id'], [-1]) - labels = tf.squeeze(features['rel_id'], [-1]) - path_counts = tf.to_float(tf.reshape(features['counts'], [batch_size, -1])) - - path_embedding_id = None - if path_to_index: - path_embedding_id = path_to_index.lookup(features['reprs']) - - return ( - x_embedding_id, y_embedding_id, nc_embedding_id, - path_embedding_id, path_counts, labels) diff --git a/research/lexnet_nc/path_model.py b/research/lexnet_nc/path_model.py deleted file mode 100644 index c283841775d673baa8a4bc8c438d65f288a2c555..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/path_model.py +++ /dev/null @@ -1,547 +0,0 @@ -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""LexNET Path-based Model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import itertools -import os - -import lexnet_common -import numpy as np -import tensorflow as tf - - -class PathBasedModel(object): - """The LexNET path-based model for classifying semantic relations.""" - - @classmethod - def default_hparams(cls): - """Returns the default hyper-parameters.""" - return tf.contrib.training.HParams( - max_path_len=8, - num_classes=37, - num_epochs=30, - input_keep_prob=0.9, - learning_rate=0.001, - learn_lemmas=False, - random_seed=133, # zero means no random seed - lemma_embeddings_file='glove/glove.6B.50d.bin', - num_pos=len(lexnet_common.POSTAGS), - num_dep=len(lexnet_common.DEPLABELS), - num_directions=len(lexnet_common.DIRS), - lemma_dim=50, - pos_dim=4, - dep_dim=5, - dir_dim=1) - - def __init__(self, hparams, lemma_embeddings, instance): - """Initialize the LexNET classifier. - - Args: - hparams: the hyper-parameters. - lemma_embeddings: word embeddings for the path-based component. - instance: string tensor containing the input instance - """ - self.hparams = hparams - self.lemma_embeddings = lemma_embeddings - self.instance = instance - self.vocab_size, self.lemma_dim = self.lemma_embeddings.shape - - # Set the random seed - if hparams.random_seed > 0: - tf.set_random_seed(hparams.random_seed) - - # Create the network - self.__create_computation_graph__() - - def __create_computation_graph__(self): - """Initialize the model and define the graph.""" - self.lstm_input_dim = sum([self.hparams.lemma_dim, self.hparams.pos_dim, - self.hparams.dep_dim, self.hparams.dir_dim]) - self.lstm_output_dim = self.lstm_input_dim - - network_input = self.lstm_output_dim - self.lemma_lookup = tf.get_variable( - 'lemma_lookup', - initializer=self.lemma_embeddings, - dtype=tf.float32, - trainable=self.hparams.learn_lemmas) - self.pos_lookup = tf.get_variable( - 'pos_lookup', - shape=[self.hparams.num_pos, self.hparams.pos_dim], - dtype=tf.float32) - self.dep_lookup = tf.get_variable( - 'dep_lookup', - shape=[self.hparams.num_dep, self.hparams.dep_dim], - dtype=tf.float32) - self.dir_lookup = tf.get_variable( - 'dir_lookup', - shape=[self.hparams.num_directions, self.hparams.dir_dim], - dtype=tf.float32) - - self.weights1 = tf.get_variable( - 'W1', - shape=[network_input, self.hparams.num_classes], - dtype=tf.float32) - self.bias1 = tf.get_variable( - 'b1', - shape=[self.hparams.num_classes], - dtype=tf.float32) - - # Define the variables - (self.batch_paths, - self.path_counts, - self.seq_lengths, - self.path_strings, - self.batch_labels) = _parse_tensorflow_example( - self.instance, self.hparams.max_path_len, self.hparams.input_keep_prob) - - # Create the LSTM - self.__lstm__() - - # Create the MLP - self.__mlp__() - - self.instances_to_load = tf.placeholder(dtype=tf.string, shape=[None]) - self.labels_to_load = lexnet_common.load_all_labels(self.instances_to_load) - - def load_labels(self, session, batch_instances): - """Loads the labels of the current instances. - - Args: - session: the current TensorFlow session. - batch_instances: the dataset instances. - - Returns: - the labels. - """ - return session.run(self.labels_to_load, - feed_dict={self.instances_to_load: batch_instances}) - - def run_one_epoch(self, session, num_steps): - """Train the model. - - Args: - session: The current TensorFlow session. - num_steps: The number of steps in each epoch. - - Returns: - The mean loss for the epoch. - - Raises: - ArithmeticError: if the loss becomes non-finite. - """ - losses = [] - - for step in range(num_steps): - curr_loss, _ = session.run([self.cost, self.train_op]) - if not np.isfinite(curr_loss): - raise ArithmeticError('nan loss at step %d' % step) - - losses.append(curr_loss) - - return np.mean(losses) - - def predict(self, session, inputs): - """Predict the classification of the test set. - - Args: - session: The current TensorFlow session. - inputs: the train paths, x, y and/or nc vectors - - Returns: - The test predictions. - """ - predictions, _ = zip(*self.predict_with_score(session, inputs)) - return np.array(predictions) - - def predict_with_score(self, session, inputs): - """Predict the classification of the test set. - - Args: - session: The current TensorFlow session. - inputs: the test paths, x, y and/or nc vectors - - Returns: - The test predictions along with their scores. - """ - test_pred = [0] * len(inputs) - - for index, instance in enumerate(inputs): - - prediction, scores = session.run( - [self.predictions, self.scores], - feed_dict={self.instance: instance}) - - test_pred[index] = (prediction, scores[prediction]) - - return test_pred - - def __mlp__(self): - """Performs the MLP operations. - - Returns: the prediction object to be computed in a Session - """ - # Feed the paths to the MLP: path_embeddings is - # [num_batch_paths, output_dim], and when we multiply it by W - # ([output_dim, num_classes]), we get a matrix of class distributions: - # [num_batch_paths, num_classes]. - self.distributions = tf.matmul(self.path_embeddings, self.weights1) - - # Now, compute weighted average on the class distributions, using the path - # frequency as weights. - - # First, reshape path_freq to the same shape of distributions - self.path_freq = tf.tile(tf.expand_dims(self.path_counts, -1), - [1, self.hparams.num_classes]) - - # Second, multiply the distributions and frequencies element-wise. - self.weighted = tf.multiply(self.path_freq, self.distributions) - - # Finally, take the average to get a tensor of shape [1, num_classes]. - self.weighted_sum = tf.reduce_sum(self.weighted, 0) - self.num_paths = tf.clip_by_value(tf.reduce_sum(self.path_counts), - 1, np.inf) - self.num_paths = tf.tile(tf.expand_dims(self.num_paths, -1), - [self.hparams.num_classes]) - self.scores = tf.div(self.weighted_sum, self.num_paths) - self.predictions = tf.argmax(self.scores) - - # Define the loss function and the optimization algorithm - self.cross_entropies = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=self.scores, labels=tf.reduce_mean(self.batch_labels)) - self.cost = tf.reduce_sum(self.cross_entropies, name='cost') - self.global_step = tf.Variable(0, name='global_step', trainable=False) - self.optimizer = tf.train.AdamOptimizer() - self.train_op = self.optimizer.minimize(self.cost, - global_step=self.global_step) - - def __lstm__(self): - """Defines the LSTM operations. - - Returns: - A matrix of path embeddings. - """ - lookup_tables = [self.lemma_lookup, self.pos_lookup, - self.dep_lookup, self.dir_lookup] - - # Split the edges to components: list of 4 tensors - # [num_batch_paths, max_path_len, 1] - self.edge_components = tf.split(self.batch_paths, 4, axis=2) - - # Look up the components embeddings and concatenate them back together - self.path_matrix = tf.concat([ - tf.squeeze(tf.nn.embedding_lookup(lookup_table, component), 2) - for lookup_table, component in - zip(lookup_tables, self.edge_components) - ], axis=2) - - self.sequence_lengths = tf.reshape(self.seq_lengths, [-1]) - - # Define the LSTM. - # The input is [num_batch_paths, max_path_len, input_dim]. - lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.lstm_output_dim) - - # The output is [num_batch_paths, max_path_len, output_dim]. - self.lstm_outputs, _ = tf.nn.dynamic_rnn( - lstm_cell, self.path_matrix, dtype=tf.float32, - sequence_length=self.sequence_lengths) - - # Slice the last *relevant* output for each instance -> - # [num_batch_paths, output_dim] - self.path_embeddings = _extract_last_relevant(self.lstm_outputs, - self.sequence_lengths) - - -def _parse_tensorflow_example(record, max_path_len, input_keep_prob): - """Reads TensorFlow examples from a RecordReader. - - Args: - record: a record with TensorFlow example. - max_path_len: the maximum path length. - input_keep_prob: 1 - the word dropout probability - - Returns: - The paths and counts - """ - features = tf.parse_single_example(record, { - 'lemmas': - tf.FixedLenSequenceFeature( - shape=(), dtype=tf.int64, allow_missing=True), - 'postags': - tf.FixedLenSequenceFeature( - shape=(), dtype=tf.int64, allow_missing=True), - 'deplabels': - tf.FixedLenSequenceFeature( - shape=(), dtype=tf.int64, allow_missing=True), - 'dirs': - tf.FixedLenSequenceFeature( - shape=(), dtype=tf.int64, allow_missing=True), - 'counts': - tf.FixedLenSequenceFeature( - shape=(), dtype=tf.int64, allow_missing=True), - 'pathlens': - tf.FixedLenSequenceFeature( - shape=(), dtype=tf.int64, allow_missing=True), - 'reprs': - tf.FixedLenSequenceFeature( - shape=(), dtype=tf.string, allow_missing=True), - 'rel_id': - tf.FixedLenFeature([], dtype=tf.int64) - }) - - path_counts = tf.to_float(features['counts']) - seq_lengths = features['pathlens'] - - # Concatenate the edge components to create a path tensor: - # [max_paths_per_ins, max_path_length, 4] - lemmas = _word_dropout( - tf.reshape(features['lemmas'], [-1, max_path_len]), input_keep_prob) - - paths = tf.stack( - [lemmas] + [ - tf.reshape(features[f], [-1, max_path_len]) - for f in ('postags', 'deplabels', 'dirs') - ], - axis=-1) - - path_strings = features['reprs'] - - # Add an empty path to pairs with no paths - paths = tf.cond( - tf.shape(paths)[0] > 0, - lambda: paths, - lambda: tf.zeros([1, max_path_len, 4], dtype=tf.int64)) - - # Paths are left-padded. We reverse them to make them right-padded. - #paths = tf.reverse(paths, axis=[1]) - - path_counts = tf.cond( - tf.shape(path_counts)[0] > 0, - lambda: path_counts, - lambda: tf.constant([1.0], dtype=tf.float32)) - - seq_lengths = tf.cond( - tf.shape(seq_lengths)[0] > 0, - lambda: seq_lengths, - lambda: tf.constant([1], dtype=tf.int64)) - - # Duplicate the label for each path - labels = tf.ones_like(path_counts, dtype=tf.int64) * features['rel_id'] - - return paths, path_counts, seq_lengths, path_strings, labels - - -def _extract_last_relevant(output, seq_lengths): - """Get the last relevant LSTM output cell for each batch instance. - - Args: - output: the LSTM outputs - a tensor with shape - [num_paths, output_dim, max_path_len] - seq_lengths: the sequences length per instance - - Returns: - The last relevant LSTM output cell for each batch instance. - """ - max_length = int(output.get_shape()[1]) - path_lengths = tf.clip_by_value(seq_lengths - 1, 0, max_length) - relevant = tf.reduce_sum(tf.multiply(output, tf.expand_dims( - tf.one_hot(path_lengths, max_length), -1)), 1) - return relevant - - -def _word_dropout(words, input_keep_prob): - """Drops words with probability 1 - input_keep_prob. - - Args: - words: a list of lemmas from the paths. - input_keep_prob: the probability to keep the word. - - Returns: - The revised list where some of the words are ed. - """ - # Create the mask: (-1) to drop, 1 to keep - prob = tf.random_uniform(tf.shape(words), 0, 1) - condition = tf.less(prob, (1 - input_keep_prob)) - mask = tf.where(condition, - tf.negative(tf.ones_like(words)), tf.ones_like(words)) - - # We need to keep zeros (), and change other numbers to 1 () - # if their mask is -1. First, we multiply the mask and the words. - # Zeros will stay zeros, and words to drop will become negative. - # Then, we change negative values to 1. - masked_words = tf.multiply(mask, words) - condition = tf.less(masked_words, 0) - dropped_words = tf.where(condition, tf.ones_like(words), words) - return dropped_words - - -def compute_path_embeddings(model, session, instances): - """Compute the path embeddings for all the distinct paths. - - Args: - model: The trained path-based model. - session: The current TensorFlow session. - instances: All the train, test and validation instances. - - Returns: - The path to ID index and the path embeddings. - """ - # Get an index for each distinct path - path_index = collections.defaultdict(itertools.count(0).next) - path_vectors = {} - - for instance in instances: - curr_path_embeddings, curr_path_strings = session.run( - [model.path_embeddings, model.path_strings], - feed_dict={model.instance: instance}) - - for i, path in enumerate(curr_path_strings): - if not path: - continue - - # Set a new/existing index for the path - index = path_index[path] - - # Save its vector - path_vectors[index] = curr_path_embeddings[i, :] - - print('Number of distinct paths: %d' % len(path_index)) - return path_index, path_vectors - - -def save_path_embeddings(model, path_vectors, path_index, embeddings_base_path): - """Saves the path embeddings. - - Args: - model: The trained path-based model. - path_vectors: The path embeddings. - path_index: A map from path to ID. - embeddings_base_path: The base directory where the embeddings are. - """ - index_range = range(max(path_index.values()) + 1) - path_matrix = [path_vectors[i] for i in index_range] - path_matrix = np.vstack(path_matrix) - - # Save the path embeddings - path_vector_filename = os.path.join( - embeddings_base_path, '%d_path_vectors' % model.lstm_output_dim) - with open(path_vector_filename, 'w') as f_out: - np.save(f_out, path_matrix) - - index_to_path = {i: p for p, i in path_index.iteritems()} - path_vocab = [index_to_path[i] for i in index_range] - - # Save the path vocabulary - path_vocab_filename = os.path.join( - embeddings_base_path, '%d_path_vocab' % model.lstm_output_dim) - with open(path_vocab_filename, 'w') as f_out: - f_out.write('\n'.join(path_vocab)) - f_out.write('\n') - - print('Saved path embeddings.') - - -def load_path_embeddings(path_embeddings_dir, path_dim): - """Loads pretrained path embeddings from a binary file and returns the matrix. - - Args: - path_embeddings_dir: The directory for the path embeddings. - path_dim: The dimension of the path embeddings, used as prefix to the - path_vocab and path_vectors files. - - Returns: - The path embeddings matrix and the path_to_index dictionary. - """ - prefix = path_embeddings_dir + '/%d' % path_dim + '_' - with open(prefix + 'path_vocab') as f_in: - vocab = f_in.read().splitlines() - - vocab_size = len(vocab) - embedding_file = prefix + 'path_vectors' - - print('Embedding file "%s" has %d paths' % (embedding_file, vocab_size)) - - with open(embedding_file) as f_in: - embeddings = np.load(f_in) - - path_to_index = {p: i for i, p in enumerate(vocab)} - return embeddings, path_to_index - - -def get_indicative_paths(model, session, path_index, path_vectors, classes, - save_dir, k=20, threshold=0.8): - """Gets the most indicative paths for each class. - - Args: - model: The trained path-based model. - session: The current TensorFlow session. - path_index: A map from path to ID. - path_vectors: The path embeddings. - classes: The class label names. - save_dir: Where to save the paths. - k: The k for top-k paths. - threshold: The threshold above which to consider paths as indicative. - """ - # Define graph variables for this operation - p_path_embedding = tf.placeholder(dtype=tf.float32, - shape=[1, model.lstm_output_dim]) - p_distributions = tf.nn.softmax(tf.matmul(p_path_embedding, model.weights1)) - - # Treat each path as a pair instance with a single path, and get the - # relation distribution for it. Then, take the top paths for each relation. - - # This dictionary contains a relation as a key, and the value is a list of - # tuples of path index and score. A relation r will contain (p, s) if the - # path p is classified to r with a confidence of s. - prediction_per_relation = collections.defaultdict(list) - - index_to_path = {i: p for p, i in path_index.iteritems()} - - # Predict all the paths - for index in range(len(path_index)): - curr_path_vector = path_vectors[index] - - distribution = session.run(p_distributions, - feed_dict={ - p_path_embedding: np.reshape( - curr_path_vector, - [1, model.lstm_output_dim])}) - - distribution = distribution[0, :] - prediction = np.argmax(distribution) - prediction_per_relation[prediction].append( - (index, distribution[prediction])) - - if index % 10000 == 0: - print('Classified %d/%d (%3.2f%%) of the paths' % ( - index, len(path_index), 100 * index / len(path_index))) - - # Retrieve k-best scoring paths for each relation - for relation_index, relation in enumerate(classes): - curr_paths = sorted(prediction_per_relation[relation_index], - key=lambda item: item[1], reverse=True) - above_t = [(p, s) for (p, s) in curr_paths if s >= threshold] - top_k = curr_paths[k+1] - relation_paths = above_t if len(above_t) > len(top_k) else top_k - - paths_filename = os.path.join(save_dir, '%s.paths' % relation) - with open(paths_filename, 'w') as f_out: - for index, score in relation_paths: - print('\t'.join([index_to_path[index], str(score)]), file=f_out) diff --git a/research/lexnet_nc/sorted_paths_to_examples.py b/research/lexnet_nc/sorted_paths_to_examples.py deleted file mode 100755 index c21d25d710ae793f6eefd889b98414c923e4fbe6..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/sorted_paths_to_examples.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Takes as input a sorted, tab-separated of paths to produce tf.Examples.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import itertools -import os -import sys -import tensorflow as tf - -import lexnet_common - -tf.flags.DEFINE_string('input', '', 'tab-separated input data') -tf.flags.DEFINE_string('vocab', '', 'a text file containing lemma vocabulary') -tf.flags.DEFINE_string('relations', '', 'a text file containing the relations') -tf.flags.DEFINE_string('output_dir', '', 'output directory') -tf.flags.DEFINE_string('splits', '', 'text file enumerating splits') -tf.flags.DEFINE_string('default_split', '', 'default split for unlabeled pairs') -tf.flags.DEFINE_string('compression', 'GZIP', 'compression for output records') -tf.flags.DEFINE_integer('max_paths', 100, 'maximum number of paths per record') -tf.flags.DEFINE_integer('max_pathlen', 8, 'maximum path length') -FLAGS = tf.flags.FLAGS - - -def _int64_features(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def _bytes_features(value): - value = [v.encode('utf-8') if isinstance(v, unicode) else v for v in value] - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - - -class CreateExampleFn(object): - - def __init__(self): - # Read the vocabulary. N.B. that 0 = PAD, 1 = UNK, 2 = , 3 = , hence - # the enumeration starting at 4. - with tf.gfile.GFile(FLAGS.vocab) as fh: - self.vocab = {w: ix for ix, w in enumerate(fh.read().splitlines(), start=4)} - - self.vocab.update({'': 0, '': 1, '': 2, '': 3}) - - # Read the relations. - with tf.gfile.GFile(FLAGS.relations) as fh: - self.relations = {r: ix for ix, r in enumerate(fh.read().splitlines())} - - # Some hackery to map from SpaCy postags to Google's. - lexnet_common.POSTAG_TO_ID['PROPN'] = lexnet_common.POSTAG_TO_ID['NOUN'] - lexnet_common.POSTAG_TO_ID['PRON'] = lexnet_common.POSTAG_TO_ID['NOUN'] - lexnet_common.POSTAG_TO_ID['CCONJ'] = lexnet_common.POSTAG_TO_ID['CONJ'] - #lexnet_common.DEPLABEL_TO_ID['relcl'] = lexnet_common.DEPLABEL_TO_ID['rel'] - #lexnet_common.DEPLABEL_TO_ID['compound'] = lexnet_common.DEPLABEL_TO_ID['xcomp'] - #lexnet_common.DEPLABEL_TO_ID['oprd'] = lexnet_common.DEPLABEL_TO_ID['UNK'] - - def __call__(self, mod, head, rel, raw_paths): - # Drop any really long paths. - paths = [] - counts = [] - for raw, count in raw_paths.most_common(FLAGS.max_paths): - path = raw.split('::') - if len(path) <= FLAGS.max_pathlen: - paths.append(path) - counts.append(count) - - if not paths: - return None - - # Compute the true length. - pathlens = [len(path) for path in paths] - - # Pad each path out to max_pathlen so the LSTM can eat it. - paths = ( - itertools.islice( - itertools.chain(path, itertools.repeat('/PAD/PAD/_')), - FLAGS.max_pathlen) - for path in paths) - - # Split the lemma, POS, dependency label, and direction each into a - # separate feature. - lemmas, postags, deplabels, dirs = zip( - *(part.split('/') for part in itertools.chain(*paths))) - - lemmas = [self.vocab.get(lemma, 1) for lemma in lemmas] - postags = [lexnet_common.POSTAG_TO_ID[pos] for pos in postags] - deplabels = [lexnet_common.DEPLABEL_TO_ID.get(dep, 1) for dep in deplabels] - dirs = [lexnet_common.DIR_TO_ID.get(d, 0) for d in dirs] - - return tf.train.Example(features=tf.train.Features(feature={ - 'pair': _bytes_features(['::'.join((mod, head))]), - 'rel': _bytes_features([rel]), - 'rel_id': _int64_features([self.relations[rel]]), - 'reprs': _bytes_features(raw_paths), - 'pathlens': _int64_features(pathlens), - 'counts': _int64_features(counts), - 'lemmas': _int64_features(lemmas), - 'dirs': _int64_features(dirs), - 'deplabels': _int64_features(deplabels), - 'postags': _int64_features(postags), - 'x_embedding_id': _int64_features([self.vocab[mod]]), - 'y_embedding_id': _int64_features([self.vocab[head]]), - })) - - -def main(_): - # Read the splits file, if there is one. - assignments = {} - if FLAGS.splits: - with tf.gfile.GFile(FLAGS.splits) as fh: - parts = (line.split('\t') for line in fh.read().splitlines()) - assignments = {(mod, head): split for mod, head, split in parts} - - splits = set(assignments.itervalues()) - if FLAGS.default_split: - default_split = FLAGS.default_split - splits.add(FLAGS.default_split) - elif splits: - default_split = iter(splits).next() - else: - print('Please specify --splits, --default_split, or both', file=sys.stderr) - return 1 - - last_mod, last_head, last_label = None, None, None - raw_paths = collections.Counter() - - # Keep track of pairs we've seen to ensure that we don't get unsorted data. - seen_labeled_pairs = set() - - # Set up output compression - compression_type = getattr( - tf.python_io.TFRecordCompressionType, FLAGS.compression) - options = tf.python_io.TFRecordOptions(compression_type=compression_type) - - writers = { - split: tf.python_io.TFRecordWriter( - os.path.join(FLAGS.output_dir, '%s.tfrecs.gz' % split), - options=options) - for split in splits} - - create_example = CreateExampleFn() - - in_fh = sys.stdin if not FLAGS.input else tf.gfile.GFile(FLAGS.input) - for lineno, line in enumerate(in_fh, start=1): - if lineno % 100 == 0: - print('\rProcessed %d lines...' % lineno, end='', file=sys.stderr) - - parts = line.decode('utf-8').strip().split('\t') - if len(parts) != 5: - print('Skipping line %d: %d columns (expected 5)' % ( - lineno, len(parts)), file=sys.stderr) - - continue - - mod, head, label, raw_path, source = parts - if mod == last_mod and head == last_head and label == last_label: - raw_paths.update([raw_path]) - continue - - if last_mod and last_head and last_label and raw_paths: - if (last_mod, last_head, last_label) in seen_labeled_pairs: - print('It looks like the input data is not sorted; ignoring extra ' - 'record for (%s::%s, %s) at line %d' % ( - last_mod, last_head, last_label, lineno)) - else: - ex = create_example(last_mod, last_head, last_label, raw_paths) - if ex: - split = assignments.get((last_mod, last_head), default_split) - writers[split].write(ex.SerializeToString()) - - seen_labeled_pairs.add((last_mod, last_head, last_label)) - - last_mod, last_head, last_label = mod, head, label - raw_paths = collections.Counter() - - if last_mod and last_head and last_label and raw_paths: - ex = create_example(last_mod, last_head, last_label, raw_paths) - if ex: - split = assignments.get((last_mod, last_head), default_split) - writers[split].write(ex.SerializeToString()) - - for writer in writers.itervalues(): - writer.close() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/lexnet_nc/text_embeddings_to_binary.py b/research/lexnet_nc/text_embeddings_to_binary.py deleted file mode 100755 index 8226a7654e6da733ba1e8c46810a8ec8afd7a2c0..0000000000000000000000000000000000000000 --- a/research/lexnet_nc/text_embeddings_to_binary.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# Copyright 2017, 2018 Google, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Converts a text embedding file into a binary format for quicker loading.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -tf.flags.DEFINE_string('input', '', 'text file containing embeddings') -tf.flags.DEFINE_string('output_vocab', '', 'output file for vocabulary') -tf.flags.DEFINE_string('output_npy', '', 'output file for binary') -FLAGS = tf.flags.FLAGS - -def main(_): - vecs = [] - vocab = [] - with tf.gfile.GFile(FLAGS.input) as fh: - for line in fh: - parts = line.strip().split() - vocab.append(parts[0]) - vecs.append([float(x) for x in parts[1:]]) - - with tf.gfile.GFile(FLAGS.output_vocab, 'w') as fh: - fh.write('\n'.join(vocab)) - fh.write('\n') - - vecs = np.array(vecs, dtype=np.float32) - np.save(FLAGS.output_npy, vecs, allow_pickle=False) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/lm_1b/BUILD b/research/lm_1b/BUILD deleted file mode 100644 index ca5bc1f6ce4347a3b5f18d1bb59284aa9d07a567..0000000000000000000000000000000000000000 --- a/research/lm_1b/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -package(default_visibility = [":internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = [ - "//lm_1b/...", - ], -) - -py_library( - name = "data_utils", - srcs = ["data_utils.py"], -) - -py_binary( - name = "lm_1b_eval", - srcs = [ - "lm_1b_eval.py", - ], - deps = [ - ":data_utils", - ], -) diff --git a/research/lm_1b/README.md b/research/lm_1b/README.md deleted file mode 100644 index f48afbfe23aff6681e641296e73b2c6b0e5a9b48..0000000000000000000000000000000000000000 --- a/research/lm_1b/README.md +++ /dev/null @@ -1,198 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -Language Model on One Billion Word Benchmark - -Authors: - -Oriol Vinyals (vinyals@google.com, github: OriolVinyals), -Xin Pan - -Paper Authors: - -Rafal Jozefowicz, Oriol Vinyals, Mike Schuster, Noam Shazeer, Yonghui Wu - -TL;DR - -This is a pretrained model on One Billion Word Benchmark. -If you use this model in your publication, please cite the original paper: - -@article{jozefowicz2016exploring, - title={Exploring the Limits of Language Modeling}, - author={Jozefowicz, Rafal and Vinyals, Oriol and Schuster, Mike - and Shazeer, Noam and Wu, Yonghui}, - journal={arXiv preprint arXiv:1602.02410}, - year={2016} -} - -Introduction - -In this release, we open source a model trained on the One Billion Word -Benchmark (http://arxiv.org/abs/1312.3005), a large language corpus in English -which was released in 2013. This dataset contains about one billion words, and -has a vocabulary size of about 800K words. It contains mostly news data. Since -sentences in the training set are shuffled, models can ignore the context and -focus on sentence level language modeling. - -In the original release and subsequent work, people have used the same test set -to train models on this dataset as a standard benchmark for language modeling. -Recently, we wrote an article (http://arxiv.org/abs/1602.02410) describing a -model hybrid between character CNN, a large and deep LSTM, and a specific -Softmax architecture which allowed us to train the best model on this dataset -thus far, almost halving the best perplexity previously obtained by others. - -Code Release - -The open-sourced components include: - -* TensorFlow GraphDef proto buffer text file. -* TensorFlow pre-trained checkpoint shards. -* Code used to evaluate the pre-trained model. -* Vocabulary file. -* Test set from LM-1B evaluation. - -The code supports 4 evaluation modes: - -* Given provided dataset, calculate the model's perplexity. -* Given a prefix sentence, predict the next words. -* Dump the softmax embedding, character-level CNN word embeddings. -* Give a sentence, dump the embedding from the LSTM state. - -Results - -Model | Test Perplexity | Number of Params [billions] -------|-----------------|---------------------------- -Sigmoid-RNN-2048 [Blackout] | 68.3 | 4.1 -Interpolated KN 5-gram, 1.1B n-grams [chelba2013one] | 67.6 | 1.76 -Sparse Non-Negative Matrix LM [shazeer2015sparse] | 52.9 | 33 -RNN-1024 + MaxEnt 9-gram features [chelba2013one] | 51.3 | 20 -LSTM-512-512 | 54.1 | 0.82 -LSTM-1024-512 | 48.2 | 0.82 -LSTM-2048-512 | 43.7 | 0.83 -LSTM-8192-2048 (No Dropout) | 37.9 | 3.3 -LSTM-8192-2048 (50\% Dropout) | 32.2 | 3.3 -2-Layer LSTM-8192-1024 (BIG LSTM) | 30.6 | 1.8 -(THIS RELEASE) BIG LSTM+CNN Inputs | 30.0 | 1.04 - -How To Run - -Prerequisites: - -* Install TensorFlow. -* Install Bazel. -* Download the data files: - * Model GraphDef file: - [link](http://download.tensorflow.org/models/LM_LSTM_CNN/graph-2016-09-10.pbtxt) - * Model Checkpoint sharded file: - [1](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-base) - [2](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-char-embedding) - [3](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-lstm) - [4](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax0) - [5](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax1) - [6](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax2) - [7](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax3) - [8](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax4) - [9](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax5) - [10](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax6) - [11](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax7) - [12](http://download.tensorflow.org/models/LM_LSTM_CNN/all_shards-2016-09-10/ckpt-softmax8) - * Vocabulary file: - [link](http://download.tensorflow.org/models/LM_LSTM_CNN/vocab-2016-09-10.txt) - * test dataset: link - [link](http://download.tensorflow.org/models/LM_LSTM_CNN/test/news.en.heldout-00000-of-00050) -* It is recommended to run on a modern desktop instead of a laptop. - -```shell -# 1. Clone the code to your workspace. -# 2. Download the data to your workspace. -# 3. Create an empty WORKSPACE file in your workspace. -# 4. Create an empty output directory in your workspace. -# Example directory structure below: -$ ls -R -.: -data lm_1b output WORKSPACE - -./data: -ckpt-base ckpt-lstm ckpt-softmax1 ckpt-softmax3 ckpt-softmax5 -ckpt-softmax7 graph-2016-09-10.pbtxt vocab-2016-09-10.txt -ckpt-char-embedding ckpt-softmax0 ckpt-softmax2 ckpt-softmax4 ckpt-softmax6 -ckpt-softmax8 news.en.heldout-00000-of-00050 - -./lm_1b: -BUILD data_utils.py lm_1b_eval.py README.md - -./output: - -# Build the codes. -$ bazel build -c opt lm_1b/... -# Run sample mode: -$ bazel-bin/lm_1b/lm_1b_eval --mode sample \ - --prefix "I love that I" \ - --pbtxt data/graph-2016-09-10.pbtxt \ - --vocab_file data/vocab-2016-09-10.txt \ - --ckpt 'data/ckpt-*' -...(omitted some TensorFlow output) -I love -I love that -I love that I -I love that I find -I love that I find that -I love that I find that amazing -...(omitted) - -# Run eval mode: -$ bazel-bin/lm_1b/lm_1b_eval --mode eval \ - --pbtxt data/graph-2016-09-10.pbtxt \ - --vocab_file data/vocab-2016-09-10.txt \ - --input_data data/news.en.heldout-00000-of-00050 \ - --ckpt 'data/ckpt-*' -...(omitted some TensorFlow output) -Loaded step 14108582. -# perplexity is high initially because words without context are harder to -# predict. -Eval Step: 0, Average Perplexity: 2045.512297. -Eval Step: 1, Average Perplexity: 229.478699. -Eval Step: 2, Average Perplexity: 208.116787. -Eval Step: 3, Average Perplexity: 338.870601. -Eval Step: 4, Average Perplexity: 228.950107. -Eval Step: 5, Average Perplexity: 197.685857. -Eval Step: 6, Average Perplexity: 156.287063. -Eval Step: 7, Average Perplexity: 124.866189. -Eval Step: 8, Average Perplexity: 147.204975. -Eval Step: 9, Average Perplexity: 90.124864. -Eval Step: 10, Average Perplexity: 59.897914. -Eval Step: 11, Average Perplexity: 42.591137. -...(omitted) -Eval Step: 4529, Average Perplexity: 29.243668. -Eval Step: 4530, Average Perplexity: 29.302362. -Eval Step: 4531, Average Perplexity: 29.285674. -...(omitted. At convergence, it should be around 30.) - -# Run dump_emb mode: -$ bazel-bin/lm_1b/lm_1b_eval --mode dump_emb \ - --pbtxt data/graph-2016-09-10.pbtxt \ - --vocab_file data/vocab-2016-09-10.txt \ - --ckpt 'data/ckpt-*' \ - --save_dir output -...(omitted some TensorFlow output) -Finished softmax weights -Finished word embedding 0/793471 -Finished word embedding 1/793471 -Finished word embedding 2/793471 -...(omitted) -$ ls output/ -embeddings_softmax.npy ... - -# Run dump_lstm_emb mode: -$ bazel-bin/lm_1b/lm_1b_eval --mode dump_lstm_emb \ - --pbtxt data/graph-2016-09-10.pbtxt \ - --vocab_file data/vocab-2016-09-10.txt \ - --ckpt 'data/ckpt-*' \ - --sentence "I love who I am ." \ - --save_dir output -$ ls output/ -lstm_emb_step_0.npy lstm_emb_step_2.npy lstm_emb_step_4.npy -lstm_emb_step_6.npy lstm_emb_step_1.npy lstm_emb_step_3.npy -lstm_emb_step_5.npy -``` diff --git a/research/lm_1b/data_utils.py b/research/lm_1b/data_utils.py deleted file mode 100644 index ad8d3391ef6db07c1d6c234450a6d23a8e19a178..0000000000000000000000000000000000000000 --- a/research/lm_1b/data_utils.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A library for loading 1B word benchmark dataset.""" - -import random - -import numpy as np -import tensorflow as tf - - -class Vocabulary(object): - """Class that holds a vocabulary for the dataset.""" - - def __init__(self, filename): - """Initialize vocabulary. - - Args: - filename: Vocabulary file name. - """ - - self._id_to_word = [] - self._word_to_id = {} - self._unk = -1 - self._bos = -1 - self._eos = -1 - - with tf.gfile.Open(filename) as f: - idx = 0 - for line in f: - word_name = line.strip() - if word_name == '': - self._bos = idx - elif word_name == '': - self._eos = idx - elif word_name == '': - self._unk = idx - if word_name == '!!!MAXTERMID': - continue - - self._id_to_word.append(word_name) - self._word_to_id[word_name] = idx - idx += 1 - - @property - def bos(self): - return self._bos - - @property - def eos(self): - return self._eos - - @property - def unk(self): - return self._unk - - @property - def size(self): - return len(self._id_to_word) - - def word_to_id(self, word): - if word in self._word_to_id: - return self._word_to_id[word] - return self.unk - - def id_to_word(self, cur_id): - if cur_id < self.size: - return self._id_to_word[cur_id] - return 'ERROR' - - def decode(self, cur_ids): - """Convert a list of ids to a sentence, with space inserted.""" - return ' '.join([self.id_to_word(cur_id) for cur_id in cur_ids]) - - def encode(self, sentence): - """Convert a sentence to a list of ids, with special tokens added.""" - word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()] - return np.array([self.bos] + word_ids + [self.eos], dtype=np.int32) - - -class CharsVocabulary(Vocabulary): - """Vocabulary containing character-level information.""" - - def __init__(self, filename, max_word_length): - super(CharsVocabulary, self).__init__(filename) - self._max_word_length = max_word_length - chars_set = set() - - for word in self._id_to_word: - chars_set |= set(word) - - free_ids = [] - for i in range(256): - if chr(i) in chars_set: - continue - free_ids.append(chr(i)) - - if len(free_ids) < 5: - raise ValueError('Not enough free char ids: %d' % len(free_ids)) - - self.bos_char = free_ids[0] # - self.eos_char = free_ids[1] # - self.bow_char = free_ids[2] # - self.eow_char = free_ids[3] # - self.pad_char = free_ids[4] # - - chars_set |= {self.bos_char, self.eos_char, self.bow_char, self.eow_char, - self.pad_char} - - self._char_set = chars_set - num_words = len(self._id_to_word) - - self._word_char_ids = np.zeros([num_words, max_word_length], dtype=np.int32) - - self.bos_chars = self._convert_word_to_char_ids(self.bos_char) - self.eos_chars = self._convert_word_to_char_ids(self.eos_char) - - for i, word in enumerate(self._id_to_word): - self._word_char_ids[i] = self._convert_word_to_char_ids(word) - - @property - def word_char_ids(self): - return self._word_char_ids - - @property - def max_word_length(self): - return self._max_word_length - - def _convert_word_to_char_ids(self, word): - code = np.zeros([self.max_word_length], dtype=np.int32) - code[:] = ord(self.pad_char) - - if len(word) > self.max_word_length - 2: - word = word[:self.max_word_length-2] - cur_word = self.bow_char + word + self.eow_char - for j in range(len(cur_word)): - code[j] = ord(cur_word[j]) - return code - - def word_to_char_ids(self, word): - if word in self._word_to_id: - return self._word_char_ids[self._word_to_id[word]] - else: - return self._convert_word_to_char_ids(word) - - def encode_chars(self, sentence): - chars_ids = [self.word_to_char_ids(cur_word) - for cur_word in sentence.split()] - return np.vstack([self.bos_chars] + chars_ids + [self.eos_chars]) - - -def get_batch(generator, batch_size, num_steps, max_word_length, pad=False): - """Read batches of input.""" - cur_stream = [None] * batch_size - - inputs = np.zeros([batch_size, num_steps], np.int32) - char_inputs = np.zeros([batch_size, num_steps, max_word_length], np.int32) - global_word_ids = np.zeros([batch_size, num_steps], np.int32) - targets = np.zeros([batch_size, num_steps], np.int32) - weights = np.ones([batch_size, num_steps], np.float32) - - no_more_data = False - while True: - inputs[:] = 0 - char_inputs[:] = 0 - global_word_ids[:] = 0 - targets[:] = 0 - weights[:] = 0.0 - - for i in range(batch_size): - cur_pos = 0 - - while cur_pos < num_steps: - if cur_stream[i] is None or len(cur_stream[i][0]) <= 1: - try: - cur_stream[i] = list(generator.next()) - except StopIteration: - # No more data, exhaust current streams and quit - no_more_data = True - break - - how_many = min(len(cur_stream[i][0]) - 1, num_steps - cur_pos) - next_pos = cur_pos + how_many - - inputs[i, cur_pos:next_pos] = cur_stream[i][0][:how_many] - char_inputs[i, cur_pos:next_pos] = cur_stream[i][1][:how_many] - global_word_ids[i, cur_pos:next_pos] = cur_stream[i][2][:how_many] - targets[i, cur_pos:next_pos] = cur_stream[i][0][1:how_many+1] - weights[i, cur_pos:next_pos] = 1.0 - - cur_pos = next_pos - cur_stream[i][0] = cur_stream[i][0][how_many:] - cur_stream[i][1] = cur_stream[i][1][how_many:] - cur_stream[i][2] = cur_stream[i][2][how_many:] - - if pad: - break - - if no_more_data and np.sum(weights) == 0: - # There is no more data and this is an empty batch. Done! - break - yield inputs, char_inputs, global_word_ids, targets, weights - - -class LM1BDataset(object): - """Utility class for 1B word benchmark dataset. - - The current implementation reads the data from the tokenized text files. - """ - - def __init__(self, filepattern, vocab): - """Initialize LM1BDataset reader. - - Args: - filepattern: Dataset file pattern. - vocab: Vocabulary. - """ - self._vocab = vocab - self._all_shards = tf.gfile.Glob(filepattern) - tf.logging.info('Found %d shards at %s', len(self._all_shards), filepattern) - - def _load_random_shard(self): - """Randomly select a file and read it.""" - return self._load_shard(random.choice(self._all_shards)) - - def _load_shard(self, shard_name): - """Read one file and convert to ids. - - Args: - shard_name: file path. - - Returns: - list of (id, char_id, global_word_id) tuples. - """ - tf.logging.info('Loading data from: %s', shard_name) - with tf.gfile.Open(shard_name) as f: - sentences = f.readlines() - chars_ids = [self.vocab.encode_chars(sentence) for sentence in sentences] - ids = [self.vocab.encode(sentence) for sentence in sentences] - - global_word_ids = [] - current_idx = 0 - for word_ids in ids: - current_size = len(word_ids) - 1 # without symbol - cur_ids = np.arange(current_idx, current_idx + current_size) - global_word_ids.append(cur_ids) - current_idx += current_size - - tf.logging.info('Loaded %d words.', current_idx) - tf.logging.info('Finished loading') - return zip(ids, chars_ids, global_word_ids) - - def _get_sentence(self, forever=True): - while True: - ids = self._load_random_shard() - for current_ids in ids: - yield current_ids - if not forever: - break - - def get_batch(self, batch_size, num_steps, pad=False, forever=True): - return get_batch(self._get_sentence(forever), batch_size, num_steps, - self.vocab.max_word_length, pad=pad) - - @property - def vocab(self): - return self._vocab diff --git a/research/lm_1b/lm_1b_eval.py b/research/lm_1b/lm_1b_eval.py deleted file mode 100644 index ce8634757558c135ba137a9b9e09a733977adc3a..0000000000000000000000000000000000000000 --- a/research/lm_1b/lm_1b_eval.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Eval pre-trained 1 billion word language model. -""" -import os -import sys - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from google.protobuf import text_format -import data_utils - -FLAGS = tf.flags.FLAGS -# General flags. -tf.flags.DEFINE_string('mode', 'eval', - 'One of [sample, eval, dump_emb, dump_lstm_emb]. ' - '"sample" mode samples future word predictions, using ' - 'FLAGS.prefix as prefix (prefix could be left empty). ' - '"eval" mode calculates perplexity of the ' - 'FLAGS.input_data. ' - '"dump_emb" mode dumps word and softmax embeddings to ' - 'FLAGS.save_dir. embeddings are dumped in the same ' - 'order as words in vocabulary. All words in vocabulary ' - 'are dumped.' - 'dump_lstm_emb dumps lstm embeddings of FLAGS.sentence ' - 'to FLAGS.save_dir.') -tf.flags.DEFINE_string('pbtxt', '', - 'GraphDef proto text file used to construct model ' - 'structure.') -tf.flags.DEFINE_string('ckpt', '', - 'Checkpoint directory used to fill model values.') -tf.flags.DEFINE_string('vocab_file', '', 'Vocabulary file.') -tf.flags.DEFINE_string('save_dir', '', - 'Used for "dump_emb" mode to save word embeddings.') -# sample mode flags. -tf.flags.DEFINE_string('prefix', '', - 'Used for "sample" mode to predict next words.') -tf.flags.DEFINE_integer('max_sample_words', 100, - 'Sampling stops either when is met or this number ' - 'of steps has passed.') -tf.flags.DEFINE_integer('num_samples', 3, - 'Number of samples to generate for the prefix.') -# dump_lstm_emb mode flags. -tf.flags.DEFINE_string('sentence', '', - 'Used as input for "dump_lstm_emb" mode.') -# eval mode flags. -tf.flags.DEFINE_string('input_data', '', - 'Input data files for eval model.') -tf.flags.DEFINE_integer('max_eval_steps', 1000000, - 'Maximum mumber of steps to run "eval" mode.') - - -# For saving demo resources, use batch size 1 and step 1. -BATCH_SIZE = 1 -NUM_TIMESTEPS = 1 -MAX_WORD_LEN = 50 - - -def _LoadModel(gd_file, ckpt_file): - """Load the model from GraphDef and Checkpoint. - - Args: - gd_file: GraphDef proto text file. - ckpt_file: TensorFlow Checkpoint file. - - Returns: - TensorFlow session and tensors dict. - """ - with tf.Graph().as_default(): - sys.stderr.write('Recovering graph.\n') - with tf.gfile.FastGFile(gd_file, 'r') as f: - s = f.read().decode() - gd = tf.GraphDef() - text_format.Merge(s, gd) - - tf.logging.info('Recovering Graph %s', gd_file) - t = {} - [t['states_init'], t['lstm/lstm_0/control_dependency'], - t['lstm/lstm_1/control_dependency'], t['softmax_out'], t['class_ids_out'], - t['class_weights_out'], t['log_perplexity_out'], t['inputs_in'], - t['targets_in'], t['target_weights_in'], t['char_inputs_in'], - t['all_embs'], t['softmax_weights'], t['global_step'] - ] = tf.import_graph_def(gd, {}, ['states_init', - 'lstm/lstm_0/control_dependency:0', - 'lstm/lstm_1/control_dependency:0', - 'softmax_out:0', - 'class_ids_out:0', - 'class_weights_out:0', - 'log_perplexity_out:0', - 'inputs_in:0', - 'targets_in:0', - 'target_weights_in:0', - 'char_inputs_in:0', - 'all_embs_out:0', - 'Reshape_3:0', - 'global_step:0'], name='') - - sys.stderr.write('Recovering checkpoint %s\n' % ckpt_file) - sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) - sess.run('save/restore_all', {'save/Const:0': ckpt_file}) - sess.run(t['states_init']) - - return sess, t - - -def _EvalModel(dataset): - """Evaluate model perplexity using provided dataset. - - Args: - dataset: LM1BDataset object. - """ - sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt) - - current_step = t['global_step'].eval(session=sess) - sys.stderr.write('Loaded step %d.\n' % current_step) - - data_gen = dataset.get_batch(BATCH_SIZE, NUM_TIMESTEPS, forever=False) - sum_num = 0.0 - sum_den = 0.0 - perplexity = 0.0 - for i, (inputs, char_inputs, _, targets, weights) in enumerate(data_gen): - input_dict = {t['inputs_in']: inputs, - t['targets_in']: targets, - t['target_weights_in']: weights} - if 'char_inputs_in' in t: - input_dict[t['char_inputs_in']] = char_inputs - log_perp = sess.run(t['log_perplexity_out'], feed_dict=input_dict) - - if np.isnan(log_perp): - sys.stderr.error('log_perplexity is Nan.\n') - else: - sum_num += log_perp * weights.mean() - sum_den += weights.mean() - if sum_den > 0: - perplexity = np.exp(sum_num / sum_den) - - sys.stderr.write('Eval Step: %d, Average Perplexity: %f.\n' % - (i, perplexity)) - - if i > FLAGS.max_eval_steps: - break - - -def _SampleSoftmax(softmax): - return min(np.sum(np.cumsum(softmax) < np.random.rand()), len(softmax) - 1) - - -def _SampleModel(prefix_words, vocab): - """Predict next words using the given prefix words. - - Args: - prefix_words: Prefix words. - vocab: Vocabulary. Contains max word chard id length and converts between - words and ids. - """ - targets = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32) - weights = np.ones([BATCH_SIZE, NUM_TIMESTEPS], np.float32) - - sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt) - - if prefix_words.find('') != 0: - prefix_words = ' ' + prefix_words - - prefix = [vocab.word_to_id(w) for w in prefix_words.split()] - prefix_char_ids = [vocab.word_to_char_ids(w) for w in prefix_words.split()] - for _ in xrange(FLAGS.num_samples): - inputs = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32) - char_ids_inputs = np.zeros( - [BATCH_SIZE, NUM_TIMESTEPS, vocab.max_word_length], np.int32) - samples = prefix[:] - char_ids_samples = prefix_char_ids[:] - sent = '' - while True: - inputs[0, 0] = samples[0] - char_ids_inputs[0, 0, :] = char_ids_samples[0] - samples = samples[1:] - char_ids_samples = char_ids_samples[1:] - - softmax = sess.run(t['softmax_out'], - feed_dict={t['char_inputs_in']: char_ids_inputs, - t['inputs_in']: inputs, - t['targets_in']: targets, - t['target_weights_in']: weights}) - - sample = _SampleSoftmax(softmax[0]) - sample_char_ids = vocab.word_to_char_ids(vocab.id_to_word(sample)) - - if not samples: - samples = [sample] - char_ids_samples = [sample_char_ids] - sent += vocab.id_to_word(samples[0]) + ' ' - sys.stderr.write('%s\n' % sent) - - if (vocab.id_to_word(samples[0]) == '' or - len(sent) > FLAGS.max_sample_words): - break - - -def _DumpEmb(vocab): - """Dump the softmax weights and word embeddings to files. - - Args: - vocab: Vocabulary. Contains vocabulary size and converts word to ids. - """ - assert FLAGS.save_dir, 'Must specify FLAGS.save_dir for dump_emb.' - inputs = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32) - targets = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32) - weights = np.ones([BATCH_SIZE, NUM_TIMESTEPS], np.float32) - - sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt) - - softmax_weights = sess.run(t['softmax_weights']) - fname = FLAGS.save_dir + '/embeddings_softmax.npy' - with tf.gfile.Open(fname, mode='w') as f: - np.save(f, softmax_weights) - sys.stderr.write('Finished softmax weights\n') - - all_embs = np.zeros([vocab.size, 1024]) - for i in xrange(vocab.size): - input_dict = {t['inputs_in']: inputs, - t['targets_in']: targets, - t['target_weights_in']: weights} - if 'char_inputs_in' in t: - input_dict[t['char_inputs_in']] = ( - vocab.word_char_ids[i].reshape([-1, 1, MAX_WORD_LEN])) - embs = sess.run(t['all_embs'], input_dict) - all_embs[i, :] = embs - sys.stderr.write('Finished word embedding %d/%d\n' % (i, vocab.size)) - - fname = FLAGS.save_dir + '/embeddings_char_cnn.npy' - with tf.gfile.Open(fname, mode='w') as f: - np.save(f, all_embs) - sys.stderr.write('Embedding file saved\n') - - -def _DumpSentenceEmbedding(sentence, vocab): - """Predict next words using the given prefix words. - - Args: - sentence: Sentence words. - vocab: Vocabulary. Contains max word chard id length and converts between - words and ids. - """ - targets = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32) - weights = np.ones([BATCH_SIZE, NUM_TIMESTEPS], np.float32) - - sess, t = _LoadModel(FLAGS.pbtxt, FLAGS.ckpt) - - if sentence.find('') != 0: - sentence = ' ' + sentence - - word_ids = [vocab.word_to_id(w) for w in sentence.split()] - char_ids = [vocab.word_to_char_ids(w) for w in sentence.split()] - - inputs = np.zeros([BATCH_SIZE, NUM_TIMESTEPS], np.int32) - char_ids_inputs = np.zeros( - [BATCH_SIZE, NUM_TIMESTEPS, vocab.max_word_length], np.int32) - for i in xrange(len(word_ids)): - inputs[0, 0] = word_ids[i] - char_ids_inputs[0, 0, :] = char_ids[i] - - # Add 'lstm/lstm_0/control_dependency' if you want to dump previous layer - # LSTM. - lstm_emb = sess.run(t['lstm/lstm_1/control_dependency'], - feed_dict={t['char_inputs_in']: char_ids_inputs, - t['inputs_in']: inputs, - t['targets_in']: targets, - t['target_weights_in']: weights}) - - fname = os.path.join(FLAGS.save_dir, 'lstm_emb_step_%d.npy' % i) - with tf.gfile.Open(fname, mode='w') as f: - np.save(f, lstm_emb) - sys.stderr.write('LSTM embedding step %d file saved\n' % i) - - -def main(unused_argv): - vocab = data_utils.CharsVocabulary(FLAGS.vocab_file, MAX_WORD_LEN) - - if FLAGS.mode == 'eval': - dataset = data_utils.LM1BDataset(FLAGS.input_data, vocab) - _EvalModel(dataset) - elif FLAGS.mode == 'sample': - _SampleModel(FLAGS.prefix, vocab) - elif FLAGS.mode == 'dump_emb': - _DumpEmb(vocab) - elif FLAGS.mode == 'dump_lstm_emb': - _DumpSentenceEmbedding(FLAGS.sentence, vocab) - else: - raise Exception('Mode not supported.') - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/lm_commonsense/README.md b/research/lm_commonsense/README.md deleted file mode 100644 index 78c8f53ca226f09c4b185490d6966f98bf584889..0000000000000000000000000000000000000000 --- a/research/lm_commonsense/README.md +++ /dev/null @@ -1,170 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# A Simple Method for Commonsense Reasoning - -This repository contains code to reproduce results from [*A Simple Method for Commonsense Reasoning*](https://arxiv.org/abs/1806.02847). - -Authors and contact: - -* Trieu H. Trinh (thtrieu@google.com, github: thtrieu) -* Quoc V. Le (qvl@google.com) - -## TL;DR - -Commonsense reasoning is a long-standing challenge for deep learning. For example, -it is difficult to use neural networks to tackle the Winograd Schema dataset - a difficult subset of Pronoun Disambiguation problems. In this work, we use language models to score substitued sentences to decide the correct reference of the ambiguous pronoun (see Figure below for an example). - -![Figure 1. Overview of our method.](method.jpg) - -This simple unsupervised method achieves new state-of-the-art (*as of June 1st, 2018*) results on both benchmark PDP-60 and WSC-273 (See Table below), without using rule-based reasoning nor expensive annotated knowledge bases. - -| Commonsense-reasoning test | Previous best result | Ours | -| ----------------------------|:----------------------:|:-----:| -| Pronoun Disambiguation | 66.7% | 70% | -| Winograd Schema Challenge | 52.8% | 63.7% | - - - -## Citation - -If you use our released models below in your publication, please cite the original paper: - -@article{TBD} - - -## Requirements -* Python >=2.6 -* Tensorflow >= v1.4 -* Numpy >= 1.12.1 - -## Details of this release - -The open-sourced components include: - -* Test sets from Pronoun Disambiguation Problem (PDP-60) and Winograd Schema Challenges (WSC-273). -* Tensorflow metagraph and checkpoints of 14 language models (See Appendix A in the paper). -* A vocabulary file. -* Code to reproduce results from the original paper. - -## How to run - -### 1. Download data files - -Download all files from the [Google Cloud Storage of this project](https://console.cloud.google.com/storage/browser/commonsense-reasoning/). The easiest way is to install and use `gsutil cp` command-line tool (See [install gsutil](https://cloud.google.com/storage/docs/gsutil_install)). - - -```shell -# Download everything from the project gs://commonsense-reasoning -$ gsutil cp -R gs://commonsense-reasoning/* . -Copying gs://commonsense-reasoning/reproduce/vocab.txt... -Copying gs://commonsense-reasoning/reproduce/commonsense_test/pdp60.json... -Copying gs://commonsense-reasoning/reproduce/commonsense_test/wsc273.json... - -...(omitted) -``` - -All downloaded content should be in `./reproduce/`. This includes two tests `pdp60.json` and `wsc273.json`, a vocabulary file `vocab.txt` and checkpoints for all 14 language models, each includes three files (`.data`, `.index` and `.meta`). All checkpoint names start with `ckpt-best` since they are saved at the best perplexity on a hold-out text corpus. - -```shell -# Check for the content -$ ls reproduce/* -reproduce/vocab.txt - -reproduce/commonsense_test: -pdp60.json wsc273.json - -reproduce/lm01: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm02: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm03: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm04: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm05: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm06: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm07: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm08: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm09: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm10: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm11: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm12: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm13: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta - -reproduce/lm14: -ckpt-best.data-00000-of-00001 ckpt-best.index ckpt-best.meta -``` - -### 2. Run evaluation code - -To reproduce results from the paper, simply run `eval.py` script. - -```shell -$ python eval.py --data_dir=reproduce - -Restored from ./reproduce/lm01 -Reset RNN states. -Processing patch (1, 1) / (2, 4) -Probs for -[['Then' 'Dad' 'figured' ..., 'man' "'s" 'board-bill'] - ['Then' 'Dad' 'figured' ..., 'man' "'s" 'board-bill'] - ['Always' 'before' ',' ..., 'now' ',' 'for'] - ..., - ['Mark' 'was' 'close' ..., 'promising' 'him' ','] - ['Mark' 'was' 'close' ..., 'promising' 'him' ','] - ['Mark' 'was' 'close' ..., 'promising' 'him' ',']] -= -[[ 1.64250596e-05 1.77780055e-06 4.14267970e-06 ..., 1.87315454e-03 - 1.57723188e-01 6.31845817e-02] - [ 1.64250596e-05 1.77780055e-06 4.14267970e-06 ..., 1.87315454e-03 - 1.57723188e-01 6.31845817e-02] - [ 1.28243030e-07 3.80435935e-03 1.12383246e-01 ..., 9.67682712e-03 - 2.17407525e-01 1.08243264e-01] - ..., - [ 1.15557734e-04 2.92792241e-03 3.46455898e-04 ..., 2.72328052e-05 - 3.37066874e-02 7.89367408e-02] - [ 1.15557734e-04 2.92792241e-03 3.46455898e-04 ..., 2.72328052e-05 - 3.37066874e-02 7.89367408e-02] - [ 1.15557734e-04 2.92792241e-03 3.46455898e-04 ..., 2.72328052e-05 - 3.37066874e-02 7.89367408e-02]] -Processing patch (1, 2) / (2, 4) - -...(omitted) - -Accuracy of 1 LM(s) on pdp60 = 0.6 - -...(omitted) - -Accuracy of 5 LM(s) on pdp60 = 0.7 - -...(omitted) - -Accuracy of 10 LM(s) on wsc273 = 0.615 - -...(omitted) - -Accuracy of 14 LM(s) on wsc273 = 0.637 -``` diff --git a/research/lm_commonsense/eval.py b/research/lm_commonsense/eval.py deleted file mode 100644 index e5b7ff98b50a5af4e066d3d9f82c1acae81c3e93..0000000000000000000000000000000000000000 --- a/research/lm_commonsense/eval.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import pickle as pkl -import numpy as np -import tensorflow as tf -import utils - -tf.app.flags.DEFINE_string( - 'data_dir', 'reproduce', - 'Path to directory containing data and model checkpoints.') - - -FLAGS = tf.app.flags.FLAGS - - -class EnsembleLM(object): - """Ensemble of language models.""" - - def __init__(self, test_data_name='wsc273'): - vocab_file = os.path.join(FLAGS.data_dir, 'vocab.txt') - self.vocab = utils.CharsVocabulary(vocab_file, 50) - assert test_data_name in ['pdp60', 'wsc273'], ( - 'Test data must be pdp60 or wsc273, got {}'.format(test_data_name)) - self.test_data_name = test_data_name - - test_data = utils.parse_commonsense_reasoning_test(test_data_name) - self.question_ids, self.sentences, self.labels = test_data - self.all_probs = [] # aggregate single-model prediction here. - - def add_single_model(self, model_name='lm1'): - """Add a single model into the current ensemble.""" - # Create single LM - single_lm = SingleRecurrentLanguageModel(self.vocab, model_name) - - # Add the single LM prediction. - probs = single_lm.assign_probs(self.sentences, self.test_data_name) - self.all_probs.append(probs) - print('Done adding {}'.format(model_name)) - - def evaluate(self): - """Evaluate the current ensemble.""" - # Attach word probabilities and correctness label to each substitution - ensembled_probs = sum(self.all_probs) / len(self.all_probs) - scorings = [] - for i, sentence in enumerate(self.sentences): - correctness = self.labels[i] - word_probs = ensembled_probs[i, :len(sentence)] - joint_prob = np.prod(word_probs, dtype=np.float64) - - scorings.append(dict( - correctness=correctness, - sentence=sentence, - joint_prob=joint_prob, - word_probs=word_probs)) - scoring_mode = 'full' if self.test_data_name == 'pdp60' else 'partial' - return utils.compare_substitutions( - self.question_ids, scorings, scoring_mode) - - -class SingleRecurrentLanguageModel(object): - """Single Recurrent Language Model.""" - - def __init__(self, vocab, model_name='lm01'): - self.vocab = vocab - self.log_dir = os.path.join(FLAGS.data_dir, model_name) - - def reset(self): - self.sess.run(self.tensors['states_init']) - - def _score(self, word_patch): - """Score a matrix of shape (batch_size, num_timesteps+1) str tokens.""" - word_ids = np.array( - [[self.vocab.word_to_id(word) for word in row] - for row in word_patch]) - char_ids = np.array( - [[self.vocab.word_to_char_ids(word) for word in row] - for row in word_patch]) - print('Probs for \n{}\n='.format(np.array(word_patch)[:, 1:])) - - input_ids, target_ids = word_ids[:, :-1], word_ids[:, 1:] - input_char_ids = char_ids[:, :-1, :] - - softmax = self.sess.run(self.tensors['softmax_out'], feed_dict={ - self.tensors['inputs_in']: input_ids, - self.tensors['char_inputs_in']: input_char_ids - }) - - batch_size, num_timesteps = self.shape - softmax = softmax.reshape((num_timesteps, batch_size, -1)) - softmax = np.transpose(softmax, [1, 0, 2]) - probs = np.array([[softmax[row, col, target_ids[row, col]] - for col in range(num_timesteps)] - for row in range(batch_size)]) - print(probs) - return probs - - def _score_patches(self, word_patches): - """Score a 2D matrix of word_patches and stitch results together.""" - batch_size, num_timesteps = self.shape - nrow, ncol = len(word_patches), len(word_patches[0]) - max_len = num_timesteps * ncol - probs = np.zeros([0, max_len]) # accumulate results into this. - - # Loop through the 2D matrix of word_patches and score each. - for i, row in enumerate(word_patches): - print('Reset RNN states.') - self.reset() # reset states before processing each row. - row_probs = np.zeros([batch_size, 0]) - for j, word_patch in enumerate(row): - print('Processing patch ' - '({}, {}) / ({}, {})'.format(i+1, j+1, nrow, ncol)) - patch_probs = (self._score(word_patch) if word_patch else - np.zeros([batch_size, num_timesteps])) - row_probs = np.concatenate([row_probs, patch_probs], 1) - probs = np.concatenate([probs, row_probs], 0) - return probs - - def assign_probs(self, sentences, test_data_name='wsc273'): - """Return prediction accuracy using this LM for a test.""" - - probs_cache = os.path.join(self.log_dir, '{}.probs'.format(test_data_name)) - if os.path.exists(probs_cache): - print('Reading cached result from {}'.format(probs_cache)) - with tf.gfile.Open(probs_cache, 'r') as f: - probs = pkl.load(f) - else: - tf.reset_default_graph() - self.sess = tf.Session() - # Build the graph. - saver = tf.train.import_meta_graph( - os.path.join(self.log_dir, 'ckpt-best.meta')) - saver.restore(self.sess, os.path.join(self.log_dir, 'ckpt-best')) - print('Restored from {}'.format(self.log_dir)) - graph = tf.get_default_graph() - self.tensors = dict( - inputs_in=graph.get_tensor_by_name('test_inputs_in:0'), - char_inputs_in=graph.get_tensor_by_name('test_char_inputs_in:0'), - softmax_out=graph.get_tensor_by_name('SotaRNN_1/softmax_out:0'), - states_init=graph.get_operation_by_name('SotaRNN_1/states_init')) - self.shape = self.tensors['inputs_in'].shape.as_list() - - # Cut sentences into patches of shape processable by the LM. - batch_size, num_timesteps = self.shape - word_patches = utils.cut_to_patches(sentences, batch_size, num_timesteps) - probs = self._score_patches(word_patches) - - # Cache the probs since they are expensive to evaluate - with tf.gfile.Open(probs_cache, 'w') as f: - pkl.dump(probs, f) - return probs - - -def evaluate_ensemble(test_data_name, number_of_lms): - ensemble = EnsembleLM(test_data_name) - model_list = ['lm{:02d}'.format(i+1) for i in range(number_of_lms)] - for model_name in model_list: - ensemble.add_single_model(model_name) - accuracy = ensemble.evaluate() - print('Accuracy of {} LM(s) on {} = {}'.format( - number_of_lms, test_data_name, accuracy)) - - -def main(_): - evaluate_ensemble('pdp60', 1) # 60% - evaluate_ensemble('pdp60', 5) # 70% - evaluate_ensemble('wsc273', 10) # 61.5% - evaluate_ensemble('wsc273', 14) # 63.7% - - -if __name__ == '__main__': - tf.app.run(main) diff --git a/research/lm_commonsense/method.jpg b/research/lm_commonsense/method.jpg deleted file mode 100644 index ee8a5506fccca3cbb67f7bda0ccef78303cb228b..0000000000000000000000000000000000000000 Binary files a/research/lm_commonsense/method.jpg and /dev/null differ diff --git a/research/lm_commonsense/utils.py b/research/lm_commonsense/utils.py deleted file mode 100644 index d75f2b0fb72716860ea6d438e6b8ca2732d13c84..0000000000000000000000000000000000000000 --- a/research/lm_commonsense/utils.py +++ /dev/null @@ -1,368 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import os -import numpy as np -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - - -class Vocabulary(object): - """Class that holds a vocabulary for the dataset.""" - - def __init__(self, filename): - - self._id_to_word = [] - self._word_to_id = {} - self._unk = -1 - self._bos = -1 - self._eos = -1 - - with tf.gfile.Open(filename) as f: - idx = 0 - for line in f: - word_name = line.strip() - if word_name == '': - self._bos = idx - elif word_name == '': - self._eos = idx - elif word_name == '': - self._unk = idx - if word_name == '!!!MAXTERMID': - continue - - self._id_to_word.append(word_name) - self._word_to_id[word_name] = idx - idx += 1 - - @property - def bos(self): - return self._bos - - @property - def eos(self): - return self._eos - - @property - def unk(self): - return self._unk - - @property - def size(self): - return len(self._id_to_word) - - def word_to_id(self, word): - if word in self._word_to_id: - return self._word_to_id[word] - else: - if word.lower() in self._word_to_id: - return self._word_to_id[word.lower()] - return self.unk - - def id_to_word(self, cur_id): - if cur_id < self.size: - return self._id_to_word[int(cur_id)] - return '' - - def decode(self, cur_ids): - return ' '.join([self.id_to_word(cur_id) for cur_id in cur_ids]) - - def encode(self, sentence): - word_ids = [self.word_to_id(cur_word) for cur_word in sentence.split()] - return np.array([self.bos] + word_ids + [self.eos], dtype=np.int32) - - -class CharsVocabulary(Vocabulary): - """Vocabulary containing character-level information.""" - - def __init__(self, filename, max_word_length): - super(CharsVocabulary, self).__init__(filename) - - self._max_word_length = max_word_length - chars_set = set() - - for word in self._id_to_word: - chars_set |= set(word) - - free_ids = [] - for i in range(256): - if chr(i) in chars_set: - continue - free_ids.append(chr(i)) - - if len(free_ids) < 5: - raise ValueError('Not enough free char ids: %d' % len(free_ids)) - - self.bos_char = free_ids[0] # - self.eos_char = free_ids[1] # - self.bow_char = free_ids[2] # - self.eow_char = free_ids[3] # - self.pad_char = free_ids[4] # - - chars_set |= {self.bos_char, self.eos_char, self.bow_char, self.eow_char, - self.pad_char} - - self._char_set = chars_set - num_words = len(self._id_to_word) - - self._word_char_ids = np.zeros([num_words, max_word_length], dtype=np.int32) - - self.bos_chars = self._convert_word_to_char_ids(self.bos_char) - self.eos_chars = self._convert_word_to_char_ids(self.eos_char) - - for i, word in enumerate(self._id_to_word): - if i == self.bos: - self._word_char_ids[i] = self.bos_chars - elif i == self.eos: - self._word_char_ids[i] = self.eos_chars - else: - self._word_char_ids[i] = self._convert_word_to_char_ids(word) - - @property - def max_word_length(self): - return self._max_word_length - - def _convert_word_to_char_ids(self, word): - code = np.zeros([self.max_word_length], dtype=np.int32) - code[:] = ord(self.pad_char) - - if len(word) > self.max_word_length - 2: - word = word[:self.max_word_length-2] - cur_word = self.bow_char + word + self.eow_char - for j in range(len(cur_word)): - code[j] = ord(cur_word[j]) - return code - - def word_to_char_ids(self, word): - if word in self._word_to_id: - return self._word_char_ids[self._word_to_id[word]] - else: - return self._convert_word_to_char_ids(word) - - def encode_chars(self, sentence): - chars_ids = [self.word_to_char_ids(cur_word) - for cur_word in sentence.split()] - return np.vstack([self.bos_chars] + chars_ids + [self.eos_chars]) - - -_SPECIAL_CHAR_MAP = { - '\xe2\x80\x98': '\'', - '\xe2\x80\x99': '\'', - '\xe2\x80\x9c': '"', - '\xe2\x80\x9d': '"', - '\xe2\x80\x93': '-', - '\xe2\x80\x94': '-', - '\xe2\x88\x92': '-', - '\xce\x84': '\'', - '\xc2\xb4': '\'', - '`': '\'' -} - -_START_SPECIAL_CHARS = ['.', ',', '?', '!', ';', ':', '[', ']', '\'', '+', '/', - '\xc2\xa3', '$', '~', '*', '%', '{', '}', '#', '&', '-', - '"', '(', ')', '='] + list(_SPECIAL_CHAR_MAP.keys()) -_SPECIAL_CHARS = _START_SPECIAL_CHARS + [ - '\'s', '\'m', '\'t', '\'re', '\'d', '\'ve', '\'ll'] - - -def tokenize(sentence): - """Tokenize a sentence.""" - sentence = str(sentence) - words = sentence.strip().split() - tokenized = [] # return this - - for word in words: - if word.lower() in ['mr.', 'ms.']: - tokenized.append(word) - continue - - # Split special chars at the start of word - will_split = True - while will_split: - will_split = False - for char in _START_SPECIAL_CHARS: - if word.startswith(char): - tokenized.append(char) - word = word[len(char):] - will_split = True - - # Split special chars at the end of word - special_end_tokens = [] - will_split = True - while will_split: - will_split = False - for char in _SPECIAL_CHARS: - if word.endswith(char): - special_end_tokens = [char] + special_end_tokens - word = word[:-len(char)] - will_split = True - - if word: - tokenized.append(word) - tokenized += special_end_tokens - - # Add necessary end of sentence token. - if tokenized[-1] not in ['.', '!', '?']: - tokenized += ['.'] - return tokenized - - -def parse_commonsense_reasoning_test(test_data_name): - """Read JSON test data.""" - with tf.gfile.Open(os.path.join( - FLAGS.data_dir, 'commonsense_test', - '{}.json'.format(test_data_name)), 'r') as f: - data = json.load(f) - - question_ids = [d['question_id'] for d in data] - sentences = [tokenize(d['substitution']) for d in data] - labels = [d['correctness'] for d in data] - - return question_ids, sentences, labels - - -PAD = '' - - -def cut_to_patches(sentences, batch_size, num_timesteps): - """Cut sentences into patches of shape (batch_size, num_timesteps). - - Args: - sentences: a list of sentences, each sentence is a list of str token. - batch_size: batch size - num_timesteps: number of backprop step - - Returns: - patches: A 2D matrix, - each entry is a matrix of shape (batch_size, num_timesteps). - """ - preprocessed = [['']+sentence+[''] for sentence in sentences] - max_len = max([len(sent) for sent in preprocessed]) - - # Pad to shape [height, width] - # where height is a multiple of batch_size - # and width is a multiple of num_timesteps - nrow = int(np.ceil(len(preprocessed) * 1.0 / batch_size)) - ncol = int(np.ceil(max_len * 1.0 / num_timesteps)) - height, width = nrow * batch_size, ncol * num_timesteps + 1 - preprocessed = [sent + [PAD] * (width - len(sent)) for sent in preprocessed] - preprocessed += [[PAD] * width] * (height - len(preprocessed)) - - # Cut preprocessed into patches of shape [batch_size, num_timesteps] - patches = [] - for row in range(nrow): - patches.append([]) - for col in range(ncol): - patch = [sent[col * num_timesteps: - (col+1) * num_timesteps + 1] - for sent in preprocessed[row * batch_size: - (row+1) * batch_size]] - if np.all(np.array(patch)[:, 1:] == PAD): - patch = None # no need to process this patch. - patches[-1].append(patch) - return patches - - -def _substitution_mask(sent1, sent2): - """Binary mask identifying substituted part in two sentences. - - Example sentence and their mask: - First sentence = "I like the cat 's color" - 0 0 0 1 0 0 - Second sentence = "I like the yellow dog 's color" - 0 0 0 1 1 0 0 - - Args: - sent1: first sentence - sent2: second sentence - - Returns: - mask1: mask for first sentence - mask2: mask for second sentence - """ - mask1_start, mask2_start = [], [] - while sent1[0] == sent2[0]: - sent1 = sent1[1:] - sent2 = sent2[1:] - mask1_start.append(0.) - mask2_start.append(0.) - - mask1_end, mask2_end = [], [] - while sent1[-1] == sent2[-1]: - if (len(sent1) == 1) or (len(sent2) == 1): - break - sent1 = sent1[:-1] - sent2 = sent2[:-1] - mask1_end = [0.] + mask1_end - mask2_end = [0.] + mask2_end - - assert sent1 or sent2, 'Two sentences are identical.' - return (mask1_start + [1.] * len(sent1) + mask1_end, - mask2_start + [1.] * len(sent2) + mask2_end) - - -def _convert_to_partial(scoring1, scoring2): - """Convert full scoring into partial scoring.""" - mask1, mask2 = _substitution_mask( - scoring1['sentence'], scoring2['sentence']) - - def _partial_score(scoring, mask): - word_probs = [max(_) for _ in zip(scoring['word_probs'], mask)] - scoring.update(word_probs=word_probs, - joint_prob=np.prod(word_probs)) - - _partial_score(scoring1, mask1) - _partial_score(scoring2, mask2) - - -def compare_substitutions(question_ids, scorings, mode='full'): - """Return accuracy by comparing two consecutive scorings.""" - prediction_correctness = [] - # Compare two consecutive substitutions - for i in range(len(scorings) // 2): - scoring1, scoring2 = scorings[2*i: 2*i+2] - if mode == 'partial': # fix joint prob into partial prob - _convert_to_partial(scoring1, scoring2) - - prediction_correctness.append( - (scoring2['joint_prob'] > scoring1['joint_prob']) == - scoring2['correctness']) - - # Two consecutive substitutions always belong to the same question - question_ids = [qid for i, qid in enumerate(question_ids) if i % 2 == 0] - assert len(question_ids) == len(prediction_correctness) - num_questions = len(set(question_ids)) - - # Question is correctly answered only if - # all predictions of the same question_id is correct - num_correct_answer = 0 - previous_qid = None - correctly_answered = False - for predict, qid in zip(prediction_correctness, question_ids): - if qid != previous_qid: - previous_qid = qid - num_correct_answer += int(correctly_answered) - correctly_answered = True - correctly_answered = correctly_answered and predict - num_correct_answer += int(correctly_answered) - - return num_correct_answer / num_questions diff --git a/research/lstm_object_detection/tflite/protos/mobile_ssd_client_options.proto b/research/lstm_object_detection/tflite/protos/mobile_ssd_client_options.proto index d501c213c11476675250f232b430e3ab1b62dac4..0fbd07574f74507cb128e4685e640c9497da5cb9 100644 --- a/research/lstm_object_detection/tflite/protos/mobile_ssd_client_options.proto +++ b/research/lstm_object_detection/tflite/protos/mobile_ssd_client_options.proto @@ -37,10 +37,10 @@ message ClientOptions { // The threshold on intersection-over-union used by non-maxima suppression. optional float iou_threshold = 5 [default = 0.3]; - // Optional whitelist of class names. If non-empty, detections whose class + // Optional allowlist of class names. If non-empty, detections whose class // name is not in this set will be filtered out. Duplicate or unknown class // names are ignored. - repeated string class_name_whitelist = 6; + repeated string class_name_allowlist = 6; // SSD in single class agnostic model. optional bool agnostic_mode = 7 [default = false]; diff --git a/research/lstm_object_detection/tflite/utils/ssd_utils.h b/research/lstm_object_detection/tflite/utils/ssd_utils.h index a9199e7fd5ca2c5826caf9ee6db9c1afc82a4534..a8efc00d3eafa00ee060348b798426f0ab0dad5e 100644 --- a/research/lstm_object_detection/tflite/utils/ssd_utils.h +++ b/research/lstm_object_detection/tflite/utils/ssd_utils.h @@ -63,7 +63,7 @@ void NonMaxSuppressionMultiClassFast( // Similar to NonMaxSuppressionMultiClassFast, but restricts the results to // the provided list of class indices. This effectively filters out any class -// whose index is not in this whitelist. +// whose index is not in this allowlist. void NonMaxSuppressionMultiClassRestrict( std::vector restricted_class_indices, const protos::BoxCornerEncoding& boxes, const std::vector& scores, diff --git a/research/maskgan/README.md b/research/maskgan/README.md deleted file mode 100644 index 10ee8a4c4dd546983469b07e2fb8207fc200534d..0000000000000000000000000000000000000000 --- a/research/maskgan/README.md +++ /dev/null @@ -1,111 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# MaskGAN: Better Text Generation via Filling in the ______ - -Code for [*MaskGAN: Better Text Generation via Filling in the -______*](https://arxiv.org/abs/1801.07736) published at ICLR 2018. - -## Requirements - -* TensorFlow >= v1.5 - -## Instructions - -Warning: The open-source version of this code is still in the process of being -tested. Pretraining may not work correctly. - -For training on PTB: - -1. Follow instructions here ([Tensorflow RNN Language Model Tutorial](https://www.tensorflow.org/tutorials/sequences/recurrent)) to train a language model on PTB dataset. -Copy PTB data downloaded from the above tensorflow RNN tutorial to folder "/tmp/ptb". It should contain following three files: ptb.train.txt, ptb.test.txt, ptb.valid.txt -Make folder /tmp/pretrain-lm and copy checkpoints from above Tensorflow RNN tutorial under this folder. - - -2. Run MaskGAN in MLE pretraining mode. If step 1 was not run*, set -`language_model_ckpt_dir` to empty. - -```bash -python train_mask_gan.py \ - --data_dir='/tmp/ptb' \ - --batch_size=20 \ - --sequence_length=20 \ - --base_directory='/tmp/maskGAN' \ - --hparams="gen_rnn_size=650,dis_rnn_size=650,gen_num_layers=2,dis_num_layers=2,gen_learning_rate=0.00074876,dis_learning_rate=5e-4,baseline_decay=0.99,dis_train_iterations=1,gen_learning_rate_decay=0.95" \ - --mode='TRAIN' \ - --max_steps=100000 \ - --language_model_ckpt_dir=/tmp/pretrain-lm/ \ - --generator_model='seq2seq_vd' \ - --discriminator_model='rnn_zaremba' \ - --is_present_rate=0.5 \ - --summaries_every=10 \ - --print_every=250 \ - --max_num_to_print=3 \ - --gen_training_strategy=cross_entropy \ - --seq2seq_share_embedding -``` - -3. Run MaskGAN in GAN mode. If step 2 was not run, set `maskgan_ckpt` to empty. -```bash -python train_mask_gan.py \ - --data_dir='/tmp/ptb' \ - --batch_size=128 \ - --sequence_length=20 \ - --base_directory='/tmp/maskGAN' \ - --mask_strategy=contiguous \ - --maskgan_ckpt='/tmp/maskGAN' \ - --hparams="gen_rnn_size=650,dis_rnn_size=650,gen_num_layers=2,dis_num_layers=2,gen_learning_rate=0.000038877,gen_learning_rate_decay=1.0,gen_full_learning_rate_steps=2000000,gen_vd_keep_prob=0.33971,rl_discount_rate=0.89072,dis_learning_rate=5e-4,baseline_decay=0.99,dis_train_iterations=2,dis_pretrain_learning_rate=0.005,critic_learning_rate=5.1761e-7,dis_vd_keep_prob=0.71940" \ - --mode='TRAIN' \ - --max_steps=100000 \ - --generator_model='seq2seq_vd' \ - --discriminator_model='seq2seq_vd' \ - --is_present_rate=0.5 \ - --summaries_every=250 \ - --print_every=250 \ - --max_num_to_print=3 \ - --gen_training_strategy='reinforce' \ - --seq2seq_share_embedding=true \ - --baseline_method=critic \ - --attention_option=luong -``` - -4. Generate samples: -```bash -python generate_samples.py \ - --data_dir /tmp/ptb/ \ - --data_set=ptb \ - --batch_size=256 \ - --sequence_length=20 \ - --base_directory /tmp/imdbsample/ \ - --hparams="gen_rnn_size=650,dis_rnn_size=650,gen_num_layers=2,gen_vd_keep_prob=0.33971" \ - --generator_model=seq2seq_vd \ - --discriminator_model=seq2seq_vd \ - --is_present_rate=0.0 \ - --maskgan_ckpt=/tmp/maskGAN \ - --seq2seq_share_embedding=True \ - --dis_share_embedding=True \ - --attention_option=luong \ - --mask_strategy=contiguous \ - --baseline_method=critic \ - --number_epochs=4 -``` - - -* While trying to run Step 2, the following error appears: - NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error: - - Key critic/rnn/biases not found in checkpoint - [[node save/RestoreV2 (defined at train_mask_gan.py:431) ]] - - This is an issue with seq2seq model because it uses the attention mechanism. - The issue arises if you saved the model with an earlier version (seq2seq is old) and restore with a recent one (saver.restore got updated). - The naming convention for LSTM parameters changed, e.g. cell_0/basic_lstm_cell/weights became cell_0/basic_lstm_cell/kernel. - Which is why you cannot restore them if you try to restore old checkpoints with recent TF. - The below script will help rename the variables and everything will work as expected. - https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/tools/checkpoint_convert.py - -## Contact for Issues - -* Liam Fedus, @liamb315 -* Andrew M. Dai, @a-dai diff --git a/research/maskgan/data/__init__.py b/research/maskgan/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/maskgan/data/imdb_loader.py b/research/maskgan/data/imdb_loader.py deleted file mode 100644 index 8169b3336b4ac0e1a36e35dbaed4c01f38f1ec02..0000000000000000000000000000000000000000 --- a/research/maskgan/data/imdb_loader.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""IMDB data loader and helpers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# Dependency imports -import numpy as np - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS -tf.app.flags.DEFINE_boolean('prefix_label', True, 'Vocabulary file.') - -np.set_printoptions(precision=3) -np.set_printoptions(suppress=True) - -EOS_INDEX = 88892 - - -def _read_words(filename, use_prefix=True): - all_words = [] - sequence_example = tf.train.SequenceExample() - for r in tf.python_io.tf_record_iterator(filename): - sequence_example.ParseFromString(r) - - if FLAGS.prefix_label and use_prefix: - label = sequence_example.context.feature['class'].int64_list.value[0] - review_words = [EOS_INDEX + 1 + label] - else: - review_words = [] - review_words.extend([ - f.int64_list.value[0] - for f in sequence_example.feature_lists.feature_list['token_id'].feature - ]) - all_words.append(review_words) - return all_words - - -def build_vocab(vocab_file): - word_to_id = {} - - with tf.gfile.GFile(vocab_file, 'r') as f: - index = 0 - for word in f: - word_to_id[word.strip()] = index - index += 1 - word_to_id[''] = EOS_INDEX - - return word_to_id - - -def imdb_raw_data(data_path=None): - """Load IMDB raw data from data directory "data_path". - Reads IMDB tf record files containing integer ids, - and performs mini-batching of the inputs. - Args: - data_path: string path to the directory where simple-examples.tgz has - been extracted. - Returns: - tuple (train_data, valid_data) - where each of the data objects can be passed to IMDBIterator. - """ - - train_path = os.path.join(data_path, 'train_lm.tfrecords') - valid_path = os.path.join(data_path, 'test_lm.tfrecords') - - train_data = _read_words(train_path) - valid_data = _read_words(valid_path) - return train_data, valid_data - - -def imdb_iterator(raw_data, batch_size, num_steps, epoch_size_override=None): - """Iterate on the raw IMDB data. - - This generates batch_size pointers into the raw IMDB data, and allows - minibatch iteration along these pointers. - - Args: - raw_data: one of the raw data outputs from imdb_raw_data. - batch_size: int, the batch size. - num_steps: int, the number of unrolls. - - Yields: - Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. - The second element of the tuple is the same data time-shifted to the - right by one. The third is a set of weights with 1 indicating a word was - present and 0 not. - - Raises: - ValueError: if batch_size or num_steps are too high. - """ - del epoch_size_override - data_len = len(raw_data) - num_batches = data_len // batch_size - 1 - - for batch in range(num_batches): - x = np.zeros([batch_size, num_steps], dtype=np.int32) - y = np.zeros([batch_size, num_steps], dtype=np.int32) - w = np.zeros([batch_size, num_steps], dtype=np.float) - - for i in range(batch_size): - data_index = batch * batch_size + i - example = raw_data[data_index] - - if len(example) > num_steps: - final_x = example[:num_steps] - final_y = example[1:(num_steps + 1)] - w[i] = 1 - - else: - to_fill_in = num_steps - len(example) - final_x = example + [EOS_INDEX] * to_fill_in - final_y = final_x[1:] + [EOS_INDEX] - w[i] = [1] * len(example) + [0] * to_fill_in - - x[i] = final_x - y[i] = final_y - - yield (x, y, w) diff --git a/research/maskgan/data/ptb_loader.py b/research/maskgan/data/ptb_loader.py deleted file mode 100644 index 43105952a667f968faf12a4561f85964f0a123ae..0000000000000000000000000000000000000000 --- a/research/maskgan/data/ptb_loader.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""PTB data loader and helpers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import os -# Dependency imports -import numpy as np - -import tensorflow as tf - -EOS_INDEX = 0 - - -def _read_words(filename): - with tf.gfile.GFile(filename, "r") as f: - return f.read().decode("utf-8").replace("\n", "").split() - - -def build_vocab(filename): - data = _read_words(filename) - - counter = collections.Counter(data) - count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) - - words, _ = list(zip(*count_pairs)) - word_to_id = dict(zip(words, range(len(words)))) - print(":", word_to_id[""]) - global EOS_INDEX - EOS_INDEX = word_to_id[""] - - return word_to_id - - -def _file_to_word_ids(filename, word_to_id): - data = _read_words(filename) - return [word_to_id[word] for word in data if word in word_to_id] - - -def ptb_raw_data(data_path=None): - """Load PTB raw data from data directory "data_path". - Reads PTB text files, converts strings to integer ids, - and performs mini-batching of the inputs. - The PTB dataset comes from Tomas Mikolov's webpage: - http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz - Args: - data_path: string path to the directory where simple-examples.tgz has - been extracted. - Returns: - tuple (train_data, valid_data, test_data, vocabulary) - where each of the data objects can be passed to PTBIterator. - """ - - train_path = os.path.join(data_path, "ptb.train.txt") - valid_path = os.path.join(data_path, "ptb.valid.txt") - test_path = os.path.join(data_path, "ptb.test.txt") - - word_to_id = build_vocab(train_path) - train_data = _file_to_word_ids(train_path, word_to_id) - valid_data = _file_to_word_ids(valid_path, word_to_id) - test_data = _file_to_word_ids(test_path, word_to_id) - vocabulary = len(word_to_id) - return train_data, valid_data, test_data, vocabulary - - -def ptb_iterator(raw_data, batch_size, num_steps, epoch_size_override=None): - """Iterate on the raw PTB data. - - This generates batch_size pointers into the raw PTB data, and allows - minibatch iteration along these pointers. - - Args: - raw_data: one of the raw data outputs from ptb_raw_data. - batch_size: int, the batch size. - num_steps: int, the number of unrolls. - - Yields: - Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. - The second element of the tuple is the same data time-shifted to the - right by one. - - Raises: - ValueError: if batch_size or num_steps are too high. - """ - raw_data = np.array(raw_data, dtype=np.int32) - - data_len = len(raw_data) - batch_len = data_len // batch_size - data = np.full([batch_size, batch_len], EOS_INDEX, dtype=np.int32) - for i in range(batch_size): - data[i] = raw_data[batch_len * i:batch_len * (i + 1)] - - if epoch_size_override: - epoch_size = epoch_size_override - else: - epoch_size = (batch_len - 1) // num_steps - - if epoch_size == 0: - raise ValueError("epoch_size == 0, decrease batch_size or num_steps") - - # print("Number of batches per epoch: %d" % epoch_size) - for i in range(epoch_size): - x = data[:, i * num_steps:(i + 1) * num_steps] - y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1] - w = np.ones_like(x) - yield (x, y, w) diff --git a/research/maskgan/generate_samples.py b/research/maskgan/generate_samples.py deleted file mode 100644 index d4215ebc75a074b316010eb60189bf7428dfcfc5..0000000000000000000000000000000000000000 --- a/research/maskgan/generate_samples.py +++ /dev/null @@ -1,281 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generate samples from the MaskGAN. - -Launch command: - python generate_samples.py - --data_dir=/tmp/data/imdb --data_set=imdb - --batch_size=256 --sequence_length=20 --base_directory=/tmp/imdb - --hparams="gen_rnn_size=650,dis_rnn_size=650,gen_num_layers=2, - gen_vd_keep_prob=1.0" --generator_model=seq2seq_vd - --discriminator_model=seq2seq_vd --is_present_rate=0.5 - --maskgan_ckpt=/tmp/model.ckpt-45494 - --seq2seq_share_embedding=True --dis_share_embedding=True - --attention_option=luong --mask_strategy=contiguous --baseline_method=critic - --number_epochs=4 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial -import os -# Dependency imports - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import train_mask_gan -from data import imdb_loader -from data import ptb_loader - -# Data. -from model_utils import helper -from model_utils import model_utils - -SAMPLE_TRAIN = 'TRAIN' -SAMPLE_VALIDATION = 'VALIDATION' - -## Sample Generation. -## Binary and setup FLAGS. -tf.app.flags.DEFINE_enum('sample_mode', 'TRAIN', - [SAMPLE_TRAIN, SAMPLE_VALIDATION], - 'Dataset to sample from.') -tf.app.flags.DEFINE_string('output_path', '/tmp', 'Model output directory.') -tf.app.flags.DEFINE_boolean( - 'output_masked_logs', False, - 'Whether to display for human evaluation (show masking).') -tf.app.flags.DEFINE_integer('number_epochs', 1, - 'The number of epochs to produce.') - -FLAGS = tf.app.flags.FLAGS - - -def get_iterator(data): - """Return the data iterator.""" - if FLAGS.data_set == 'ptb': - iterator = ptb_loader.ptb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length, - FLAGS.epoch_size_override) - elif FLAGS.data_set == 'imdb': - iterator = imdb_loader.imdb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length) - return iterator - - -def convert_to_human_readable(id_to_word, arr, p, max_num_to_print): - """Convert a np.array of indices into words using id_to_word dictionary. - Return max_num_to_print results. - """ - - assert arr.ndim == 2 - - samples = [] - for sequence_id in xrange(min(len(arr), max_num_to_print)): - sample = [] - for i, index in enumerate(arr[sequence_id, :]): - if p[sequence_id, i] == 1: - sample.append(str(id_to_word[index])) - else: - sample.append('*' + str(id_to_word[index])) - buffer_str = ' '.join(sample) - samples.append(buffer_str) - return samples - - -def write_unmasked_log(log, id_to_word, sequence_eval): - """Helper function for logging evaluated sequences without mask.""" - indices_arr = np.asarray(sequence_eval) - samples = helper.convert_to_human_readable(id_to_word, indices_arr, - FLAGS.batch_size) - for sample in samples: - log.write(sample + '\n') - log.flush() - return samples - - -def write_masked_log(log, id_to_word, sequence_eval, present_eval): - indices_arr = np.asarray(sequence_eval) - samples = convert_to_human_readable(id_to_word, indices_arr, present_eval, - FLAGS.batch_size) - for sample in samples: - log.write(sample + '\n') - log.flush() - return samples - - -def generate_logs(sess, model, log, id_to_word, feed): - """Impute Sequences using the model for a particular feed and send it to - logs. - """ - # Impute Sequences. - [p, inputs_eval, sequence_eval] = sess.run( - [model.present, model.inputs, model.fake_sequence], feed_dict=feed) - - # Add the 0th time-step for coherence. - first_token = np.expand_dims(inputs_eval[:, 0], axis=1) - sequence_eval = np.concatenate((first_token, sequence_eval), axis=1) - - # 0th token always present. - p = np.concatenate((np.ones((FLAGS.batch_size, 1)), p), axis=1) - - if FLAGS.output_masked_logs: - samples = write_masked_log(log, id_to_word, sequence_eval, p) - else: - samples = write_unmasked_log(log, id_to_word, sequence_eval) - return samples - - -def generate_samples(hparams, data, id_to_word, log_dir, output_file): - """"Generate samples. - - Args: - hparams: Hyperparameters for the MaskGAN. - data: Data to evaluate. - id_to_word: Dictionary of indices to words. - log_dir: Log directory. - output_file: Output file for the samples. - """ - # Boolean indicating operational mode. - is_training = False - - # Set a random seed to keep fixed mask. - np.random.seed(0) - - with tf.Graph().as_default(): - # Construct the model. - model = train_mask_gan.create_MaskGAN(hparams, is_training) - - ## Retrieve the initial savers. - init_savers = model_utils.retrieve_init_savers(hparams) - - ## Initial saver function to supervisor. - init_fn = partial(model_utils.init_fn, init_savers) - - is_chief = FLAGS.task == 0 - - # Create the supervisor. It will take care of initialization, summaries, - # checkpoints, and recovery. - sv = tf.Supervisor( - logdir=log_dir, - is_chief=is_chief, - saver=model.saver, - global_step=model.global_step, - recovery_wait_secs=30, - summary_op=None, - init_fn=init_fn) - - # Get an initialized, and possibly recovered session. Launch the - # services: Checkpointing, Summaries, step counting. - # - # When multiple replicas of this program are running the services are - # only launched by the 'chief' replica. - with sv.managed_session( - FLAGS.master, start_standard_services=False) as sess: - - # Generator statefulness over the epoch. - [gen_initial_state_eval, fake_gen_initial_state_eval] = sess.run( - [model.eval_initial_state, model.fake_gen_initial_state]) - - for n in xrange(FLAGS.number_epochs): - print('Epoch number: %d' % n) - # print('Percent done: %.2f' % float(n) / float(FLAGS.number_epochs)) - iterator = get_iterator(data) - for x, y, _ in iterator: - if FLAGS.eval_language_model: - is_present_rate = 0. - else: - is_present_rate = FLAGS.is_present_rate - tf.logging.info( - 'Evaluating on is_present_rate=%.3f.' % is_present_rate) - - model_utils.assign_percent_real(sess, model.percent_real_update, - model.new_rate, is_present_rate) - - # Randomly mask out tokens. - p = model_utils.generate_mask() - - eval_feed = {model.inputs: x, model.targets: y, model.present: p} - - if FLAGS.data_set == 'ptb': - # Statefulness for *evaluation* Generator. - for i, (c, h) in enumerate(model.eval_initial_state): - eval_feed[c] = gen_initial_state_eval[i].c - eval_feed[h] = gen_initial_state_eval[i].h - - # Statefulness for the Generator. - for i, (c, h) in enumerate(model.fake_gen_initial_state): - eval_feed[c] = fake_gen_initial_state_eval[i].c - eval_feed[h] = fake_gen_initial_state_eval[i].h - - [gen_initial_state_eval, fake_gen_initial_state_eval, _] = sess.run( - [ - model.eval_final_state, model.fake_gen_final_state, - model.global_step - ], - feed_dict=eval_feed) - - generate_logs(sess, model, output_file, id_to_word, eval_feed) - output_file.close() - print('Closing output_file.') - return - - -def main(_): - hparams = train_mask_gan.create_hparams() - log_dir = FLAGS.base_directory - - tf.gfile.MakeDirs(FLAGS.output_path) - output_file = tf.gfile.GFile( - os.path.join(FLAGS.output_path, 'reviews.txt'), mode='w') - - # Load data set. - if FLAGS.data_set == 'ptb': - raw_data = ptb_loader.ptb_raw_data(FLAGS.data_dir) - train_data, valid_data, _, _ = raw_data - elif FLAGS.data_set == 'imdb': - raw_data = imdb_loader.imdb_raw_data(FLAGS.data_dir) - train_data, valid_data = raw_data - else: - raise NotImplementedError - - # Generating more data on train set. - if FLAGS.sample_mode == SAMPLE_TRAIN: - data_set = train_data - elif FLAGS.sample_mode == SAMPLE_VALIDATION: - data_set = valid_data - else: - raise NotImplementedError - - # Dictionary and reverse dictionry. - if FLAGS.data_set == 'ptb': - word_to_id = ptb_loader.build_vocab( - os.path.join(FLAGS.data_dir, 'ptb.train.txt')) - elif FLAGS.data_set == 'imdb': - word_to_id = imdb_loader.build_vocab( - os.path.join(FLAGS.data_dir, 'vocab.txt')) - id_to_word = {v: k for k, v in word_to_id.iteritems()} - - FLAGS.vocab_size = len(id_to_word) - print('Vocab size: %d' % FLAGS.vocab_size) - - generate_samples(hparams, data_set, id_to_word, log_dir, output_file) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/maskgan/losses/__init__.py b/research/maskgan/losses/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/maskgan/losses/losses.py b/research/maskgan/losses/losses.py deleted file mode 100644 index 38d0e7b4d13cfae9652d8c70f08bfba5c478e150..0000000000000000000000000000000000000000 --- a/research/maskgan/losses/losses.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Losses for Generator and Discriminator.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def discriminator_loss(predictions, labels, missing_tokens): - """Discriminator loss based on predictions and labels. - - Args: - predictions: Discriminator linear predictions Tensor of shape [batch_size, - sequence_length] - labels: Labels for predictions, Tensor of shape [batch_size, - sequence_length] - missing_tokens: Indicator for the missing tokens. Evaluate the loss only - on the tokens that were missing. - - Returns: - loss: Scalar tf.float32 loss. - - """ - loss = tf.losses.sigmoid_cross_entropy(labels, - predictions, - weights=missing_tokens) - loss = tf.Print( - loss, [loss, labels, missing_tokens], - message='loss, labels, missing_tokens', - summarize=25, - first_n=25) - return loss - - -def cross_entropy_loss_matrix(gen_labels, gen_logits): - """Computes the cross entropy loss for G. - - Args: - gen_labels: Labels for the correct token. - gen_logits: Generator logits. - - Returns: - loss_matrix: Loss matrix of shape [batch_size, sequence_length]. - """ - cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=gen_labels, logits=gen_logits) - return cross_entropy_loss - - -def GAN_loss_matrix(dis_predictions): - """Computes the cross entropy loss for G. - - Args: - dis_predictions: Discriminator predictions. - - Returns: - loss_matrix: Loss matrix of shape [batch_size, sequence_length]. - """ - eps = tf.constant(1e-7, tf.float32) - gan_loss_matrix = -tf.log(dis_predictions + eps) - return gan_loss_matrix - - -def generator_GAN_loss(predictions): - """Generator GAN loss based on Discriminator predictions.""" - return -tf.log(tf.reduce_mean(predictions)) - - -def generator_blended_forward_loss(gen_logits, gen_labels, dis_predictions, - is_real_input): - """Computes the masked-loss for G. This will be a blend of cross-entropy - loss where the true label is known and GAN loss where the true label has been - masked. - - Args: - gen_logits: Generator logits. - gen_labels: Labels for the correct token. - dis_predictions: Discriminator predictions. - is_real_input: Tensor indicating whether the label is present. - - Returns: - loss: Scalar tf.float32 total loss. - """ - cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=gen_labels, logits=gen_logits) - gan_loss = -tf.log(dis_predictions) - loss_matrix = tf.where(is_real_input, cross_entropy_loss, gan_loss) - return tf.reduce_mean(loss_matrix) - - -def wasserstein_generator_loss(gen_logits, gen_labels, dis_values, - is_real_input): - """Computes the masked-loss for G. This will be a blend of cross-entropy - loss where the true label is known and GAN loss where the true label is - missing. - - Args: - gen_logits: Generator logits. - gen_labels: Labels for the correct token. - dis_values: Discriminator values Tensor of shape [batch_size, - sequence_length]. - is_real_input: Tensor indicating whether the label is present. - - Returns: - loss: Scalar tf.float32 total loss. - """ - cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=gen_labels, logits=gen_logits) - # Maximize the dis_values (minimize the negative) - gan_loss = -dis_values - loss_matrix = tf.where(is_real_input, cross_entropy_loss, gan_loss) - loss = tf.reduce_mean(loss_matrix) - return loss - - -def wasserstein_discriminator_loss(real_values, fake_values): - """Wasserstein discriminator loss. - - Args: - real_values: Value given by the Wasserstein Discriminator to real data. - fake_values: Value given by the Wasserstein Discriminator to fake data. - - Returns: - loss: Scalar tf.float32 loss. - - """ - real_avg = tf.reduce_mean(real_values) - fake_avg = tf.reduce_mean(fake_values) - - wasserstein_loss = real_avg - fake_avg - return wasserstein_loss - - -def wasserstein_discriminator_loss_intrabatch(values, is_real_input): - """Wasserstein discriminator loss. This is an odd variant where the value - difference is between the real tokens and the fake tokens within a single - batch. - - Args: - values: Value given by the Wasserstein Discriminator of shape [batch_size, - sequence_length] to an imputed batch (real and fake). - is_real_input: tf.bool Tensor of shape [batch_size, sequence_length]. If - true, it indicates that the label is known. - - Returns: - wasserstein_loss: Scalar tf.float32 loss. - - """ - zero_tensor = tf.constant(0., dtype=tf.float32, shape=[]) - - present = tf.cast(is_real_input, tf.float32) - missing = tf.cast(1 - present, tf.float32) - - # Counts for real and fake tokens. - real_count = tf.reduce_sum(present) - fake_count = tf.reduce_sum(missing) - - # Averages for real and fake token values. - real = tf.mul(values, present) - fake = tf.mul(values, missing) - real_avg = tf.reduce_sum(real) / real_count - fake_avg = tf.reduce_sum(fake) / fake_count - - # If there are no real or fake entries in the batch, we assign an average - # value of zero. - real_avg = tf.where(tf.equal(real_count, 0), zero_tensor, real_avg) - fake_avg = tf.where(tf.equal(fake_count, 0), zero_tensor, fake_avg) - - wasserstein_loss = real_avg - fake_avg - return wasserstein_loss diff --git a/research/maskgan/model_utils/__init__.py b/research/maskgan/model_utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/maskgan/model_utils/helper.py b/research/maskgan/model_utils/helper.py deleted file mode 100644 index 36115b484a007cda715b038e5cf52cbdd0b072ba..0000000000000000000000000000000000000000 --- a/research/maskgan/model_utils/helper.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Random helper functions for converting between indices and one-hot encodings -as well as printing/logging helpers. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from six.moves import xrange -import tensorflow as tf - - -def variable_summaries(var, name): - """Attach a lot of summaries to a Tensor.""" - mean = tf.reduce_mean(var) - tf.summary.scalar('mean/' + name, mean) - with tf.name_scope('stddev'): - stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean))) - tf.summary.scalar('sttdev/' + name, stddev) - tf.summary.scalar('max/' + name, tf.reduce_max(var)) - tf.summary.scalar('min/' + name, tf.reduce_min(var)) - tf.summary.histogram(name, var) - - -def zip_seq_pred_crossent(id_to_word, sequences, predictions, cross_entropy): - """Zip together the sequences, predictions, cross entropy.""" - indices = convert_to_indices(sequences) - - batch_of_metrics = [] - - for ind_batch, pred_batch, crossent_batch in zip(indices, predictions, - cross_entropy): - metrics = [] - - for index, pred, crossent in zip(ind_batch, pred_batch, crossent_batch): - metrics.append([str(id_to_word[index]), pred, crossent]) - - batch_of_metrics.append(metrics) - return batch_of_metrics - - -def print_and_log(log, id_to_word, sequence_eval, max_num_to_print=5): - """Helper function for printing and logging evaluated sequences.""" - indices_eval = convert_to_indices(sequence_eval) - indices_arr = np.asarray(indices_eval) - samples = convert_to_human_readable(id_to_word, indices_arr, max_num_to_print) - - for i, sample in enumerate(samples): - print('Sample', i, '. ', sample) - log.write('\nSample ' + str(i) + '. ' + sample) - log.write('\n') - print('\n') - log.flush() - - -def convert_to_human_readable(id_to_word, arr, max_num_to_print): - """Convert a np.array of indices into words using id_to_word dictionary. - Return max_num_to_print results. - """ - assert arr.ndim == 2 - - samples = [] - for sequence_id in xrange(min(len(arr), max_num_to_print)): - buffer_str = ' '.join( - [str(id_to_word[index]) for index in arr[sequence_id, :]]) - samples.append(buffer_str) - return samples - - -def index_to_vocab_array(indices, vocab_size, sequence_length): - """Convert the indices into an array with vocab_size one-hot encoding.""" - - # Extract properties of the indices. - num_batches = len(indices) - shape = list(indices.shape) - shape.append(vocab_size) - - # Construct the vocab_size array. - new_arr = np.zeros(shape) - - for n in xrange(num_batches): - indices_batch = indices[n] - new_arr_batch = new_arr[n] - - # We map all indices greater than the vocabulary size to an unknown - # character. - indices_batch = np.where(indices_batch < vocab_size, indices_batch, - vocab_size - 1) - - # Convert indices to vocab_size dimensions. - new_arr_batch[np.arange(sequence_length), indices_batch] = 1 - return new_arr - - -def convert_to_indices(sequences): - """Convert a list of size [batch_size, sequence_length, vocab_size] to - a list of size [batch_size, sequence_length] where the vocab element is - denoted by the index. - """ - batch_of_indices = [] - - for sequence in sequences: - indices = [] - for embedding in sequence: - indices.append(np.argmax(embedding)) - batch_of_indices.append(indices) - return batch_of_indices - - -def convert_and_zip(id_to_word, sequences, predictions): - """Helper function for printing or logging. Retrieves list of sequences - and predictions and zips them together. - """ - indices = convert_to_indices(sequences) - - batch_of_indices_predictions = [] - - for index_batch, pred_batch in zip(indices, predictions): - indices_predictions = [] - - for index, pred in zip(index_batch, pred_batch): - indices_predictions.append([str(id_to_word[index]), pred]) - batch_of_indices_predictions.append(indices_predictions) - return batch_of_indices_predictions - - -def recursive_length(item): - """Recursively determine the total number of elements in nested list.""" - if type(item) == list: - return sum(recursive_length(subitem) for subitem in item) - else: - return 1. - - -def percent_correct(real_sequence, fake_sequences): - """Determine the percent of tokens correctly generated within a batch.""" - identical = 0. - for fake_sequence in fake_sequences: - for real, fake in zip(real_sequence, fake_sequence): - if real == fake: - identical += 1. - return identical / recursive_length(fake_sequences) diff --git a/research/maskgan/model_utils/model_construction.py b/research/maskgan/model_utils/model_construction.py deleted file mode 100644 index 8dfa1df343984d903ace5984a90c36cc0b67dbe3..0000000000000000000000000000000000000000 --- a/research/maskgan/model_utils/model_construction.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model construction.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -import tensorflow as tf -from models import bidirectional -from models import bidirectional_vd - -from models import bidirectional_zaremba -from models import cnn -from models import critic_vd -from models import feedforward -from models import rnn -from models import rnn_nas -from models import rnn_vd -from models import rnn_zaremba -from models import seq2seq -from models import seq2seq_nas -from models import seq2seq_vd -from models import seq2seq_zaremba - -FLAGS = tf.app.flags.FLAGS - - -# TODO(adai): IMDB labels placeholder to model. -def create_generator(hparams, - inputs, - targets, - present, - is_training, - is_validating, - reuse=None): - """Create the Generator model specified by the FLAGS and hparams. - - Args; - hparams: Hyperparameters for the MaskGAN. - inputs: tf.int32 Tensor of the sequence input of shape [batch_size, - sequence_length]. - present: tf.bool Tensor indicating the presence or absence of the token - of shape [batch_size, sequence_length]. - is_training: Whether the model is training. - is_validating: Whether the model is being run in validation mode for - calculating the perplexity. - reuse (Optional): Whether to reuse the model. - - Returns: - Tuple of the (sequence, logits, log_probs) of the Generator. Sequence - and logits have shape [batch_size, sequence_length, vocab_size]. The - log_probs will have shape [batch_size, sequence_length]. Log_probs - corresponds to the log probability of selecting the words. - """ - if FLAGS.generator_model == 'rnn': - (sequence, logits, log_probs, initial_state, final_state) = rnn.generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - elif FLAGS.generator_model == 'rnn_zaremba': - (sequence, logits, log_probs, initial_state, - final_state) = rnn_zaremba.generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - elif FLAGS.generator_model == 'seq2seq': - (sequence, logits, log_probs, initial_state, - final_state) = seq2seq.generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - elif FLAGS.generator_model == 'seq2seq_zaremba': - (sequence, logits, log_probs, initial_state, - final_state) = seq2seq_zaremba.generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - elif FLAGS.generator_model == 'rnn_nas': - (sequence, logits, log_probs, initial_state, - final_state) = rnn_nas.generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - elif FLAGS.generator_model == 'seq2seq_nas': - (sequence, logits, log_probs, initial_state, - final_state) = seq2seq_nas.generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - elif FLAGS.generator_model == 'seq2seq_vd': - (sequence, logits, log_probs, initial_state, final_state, - encoder_states) = seq2seq_vd.generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - else: - raise NotImplementedError - return (sequence, logits, log_probs, initial_state, final_state, - encoder_states) - - -def create_discriminator(hparams, - sequence, - is_training, - reuse=None, - initial_state=None, - inputs=None, - present=None): - """Create the Discriminator model specified by the FLAGS and hparams. - - Args: - hparams: Hyperparameters for the MaskGAN. - sequence: tf.int32 Tensor sequence of shape [batch_size, sequence_length] - is_training: Whether the model is training. - reuse (Optional): Whether to reuse the model. - - Returns: - predictions: tf.float32 Tensor of predictions of shape [batch_size, - sequence_length] - """ - if FLAGS.discriminator_model == 'cnn': - predictions = cnn.discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - elif FLAGS.discriminator_model == 'fnn': - predictions = feedforward.discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - elif FLAGS.discriminator_model == 'rnn': - predictions = rnn.discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - elif FLAGS.discriminator_model == 'bidirectional': - predictions = bidirectional.discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - elif FLAGS.discriminator_model == 'bidirectional_zaremba': - predictions = bidirectional_zaremba.discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - elif FLAGS.discriminator_model == 'seq2seq_vd': - predictions = seq2seq_vd.discriminator( - hparams, - inputs, - present, - sequence, - is_training=is_training, - reuse=reuse) - elif FLAGS.discriminator_model == 'rnn_zaremba': - predictions = rnn_zaremba.discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - elif FLAGS.discriminator_model == 'rnn_nas': - predictions = rnn_nas.discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - elif FLAGS.discriminator_model == 'rnn_vd': - predictions = rnn_vd.discriminator( - hparams, - sequence, - is_training=is_training, - reuse=reuse, - initial_state=initial_state) - elif FLAGS.discriminator_model == 'bidirectional_vd': - predictions = bidirectional_vd.discriminator( - hparams, - sequence, - is_training=is_training, - reuse=reuse, - initial_state=initial_state) - else: - raise NotImplementedError - return predictions - - -def create_critic(hparams, sequence, is_training, reuse=None): - """Create the Critic model specified by the FLAGS and hparams. - - Args: - hparams: Hyperparameters for the MaskGAN. - sequence: tf.int32 Tensor sequence of shape [batch_size, sequence_length] - is_training: Whether the model is training. - reuse (Optional): Whether to reuse the model. - - Returns: - values: tf.float32 Tensor of predictions of shape [batch_size, - sequence_length] - """ - if FLAGS.baseline_method == 'critic': - if FLAGS.discriminator_model == 'seq2seq_vd': - values = critic_vd.critic_seq2seq_vd_derivative( - hparams, sequence, is_training, reuse=reuse) - else: - raise NotImplementedError - else: - raise NotImplementedError - return values diff --git a/research/maskgan/model_utils/model_losses.py b/research/maskgan/model_utils/model_losses.py deleted file mode 100644 index c8f337dc48b4f1efb1cf8604327376ddaa9994ea..0000000000000000000000000000000000000000 --- a/research/maskgan/model_utils/model_losses.py +++ /dev/null @@ -1,327 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model loss construction.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports -import numpy as np -from six.moves import xrange -import tensorflow as tf - -# Useful for REINFORCE baseline. -from losses import losses - -FLAGS = tf.app.flags.FLAGS - - -def create_dis_loss(fake_predictions, real_predictions, targets_present): - """Compute Discriminator loss across real/fake.""" - - missing = tf.cast(targets_present, tf.int32) - missing = 1 - missing - missing = tf.cast(missing, tf.bool) - - real_labels = tf.ones([FLAGS.batch_size, FLAGS.sequence_length]) - dis_loss_real = tf.losses.sigmoid_cross_entropy( - real_labels, real_predictions, weights=missing) - dis_loss_fake = tf.losses.sigmoid_cross_entropy( - targets_present, fake_predictions, weights=missing) - - dis_loss = (dis_loss_fake + dis_loss_real) / 2. - return dis_loss, dis_loss_fake, dis_loss_real - - -def create_critic_loss(cumulative_rewards, estimated_values, present): - """Compute Critic loss in estimating the value function. This should be an - estimate only for the missing elements.""" - missing = tf.cast(present, tf.int32) - missing = 1 - missing - missing = tf.cast(missing, tf.bool) - - loss = tf.losses.mean_squared_error( - labels=cumulative_rewards, predictions=estimated_values, weights=missing) - return loss - - -def create_masked_cross_entropy_loss(targets, present, logits): - """Calculate the cross entropy loss matrices for the masked tokens.""" - cross_entropy_losses = losses.cross_entropy_loss_matrix(targets, logits) - - # Zeros matrix. - zeros_losses = tf.zeros( - shape=[FLAGS.batch_size, FLAGS.sequence_length], dtype=tf.float32) - - missing_ce_loss = tf.where(present, zeros_losses, cross_entropy_losses) - - return missing_ce_loss - - -def calculate_reinforce_objective(hparams, - log_probs, - dis_predictions, - present, - estimated_values=None): - """Calculate the REINFORCE objectives. The REINFORCE objective should - only be on the tokens that were missing. Specifically, the final Generator - reward should be based on the Discriminator predictions on missing tokens. - The log probaibilities should be only for missing tokens and the baseline - should be calculated only on the missing tokens. - - For this model, we optimize the reward is the log of the *conditional* - probability the Discriminator assigns to the distribution. Specifically, for - a Discriminator D which outputs probability of real, given the past context, - - r_t = log D(x_t|x_0,x_1,...x_{t-1}) - - And the policy for Generator G is the log-probability of taking action x2 - given the past context. - - - Args: - hparams: MaskGAN hyperparameters. - log_probs: tf.float32 Tensor of log probailities of the tokens selected by - the Generator. Shape [batch_size, sequence_length]. - dis_predictions: tf.float32 Tensor of the predictions from the - Discriminator. Shape [batch_size, sequence_length]. - present: tf.bool Tensor indicating which tokens are present. Shape - [batch_size, sequence_length]. - estimated_values: tf.float32 Tensor of estimated state values of tokens. - Shape [batch_size, sequence_length] - - Returns: - final_gen_objective: Final REINFORCE objective for the sequence. - rewards: tf.float32 Tensor of rewards for sequence of shape [batch_size, - sequence_length] - advantages: tf.float32 Tensor of advantages for sequence of shape - [batch_size, sequence_length] - baselines: tf.float32 Tensor of baselines for sequence of shape - [batch_size, sequence_length] - maintain_averages_op: ExponentialMovingAverage apply average op to - maintain the baseline. - """ - # Final Generator objective. - final_gen_objective = 0. - gamma = hparams.rl_discount_rate - eps = 1e-7 - - # Generator rewards are log-probabilities. - eps = tf.constant(1e-7, tf.float32) - dis_predictions = tf.nn.sigmoid(dis_predictions) - rewards = tf.log(dis_predictions + eps) - - # Apply only for missing elements. - zeros = tf.zeros_like(present, dtype=tf.float32) - log_probs = tf.where(present, zeros, log_probs) - rewards = tf.where(present, zeros, rewards) - - # Unstack Tensors into lists. - rewards_list = tf.unstack(rewards, axis=1) - log_probs_list = tf.unstack(log_probs, axis=1) - missing = 1. - tf.cast(present, tf.float32) - missing_list = tf.unstack(missing, axis=1) - - # Cumulative Discounted Returns. The true value function V*(s). - cumulative_rewards = [] - for t in xrange(FLAGS.sequence_length): - cum_value = tf.zeros(shape=[FLAGS.batch_size]) - for s in xrange(t, FLAGS.sequence_length): - cum_value += missing_list[s] * np.power(gamma, (s - t)) * rewards_list[s] - cumulative_rewards.append(cum_value) - cumulative_rewards = tf.stack(cumulative_rewards, axis=1) - - ## REINFORCE with different baselines. - # We create a separate critic functionality for the Discriminator. This - # will need to operate unidirectionally and it may take in the past context. - if FLAGS.baseline_method == 'critic': - - # Critic loss calculated from the estimated value function \hat{V}(s) - # versus the true value function V*(s). - critic_loss = create_critic_loss(cumulative_rewards, estimated_values, - present) - - # Baselines are coming from the critic's estimated state values. - baselines = tf.unstack(estimated_values, axis=1) - - ## Calculate the Advantages, A(s,a) = Q(s,a) - \hat{V}(s). - advantages = [] - for t in xrange(FLAGS.sequence_length): - log_probability = log_probs_list[t] - cum_advantage = tf.zeros(shape=[FLAGS.batch_size]) - - for s in xrange(t, FLAGS.sequence_length): - cum_advantage += missing_list[s] * np.power(gamma, - (s - t)) * rewards_list[s] - cum_advantage -= baselines[t] - # Clip advantages. - cum_advantage = tf.clip_by_value(cum_advantage, -FLAGS.advantage_clipping, - FLAGS.advantage_clipping) - advantages.append(missing_list[t] * cum_advantage) - final_gen_objective += tf.multiply( - log_probability, missing_list[t] * tf.stop_gradient(cum_advantage)) - - maintain_averages_op = None - baselines = tf.stack(baselines, axis=1) - advantages = tf.stack(advantages, axis=1) - - # Split the batch into half. Use half for MC estimates for REINFORCE. - # Use the other half to establish a baseline. - elif FLAGS.baseline_method == 'dis_batch': - # TODO(liamfedus): Recheck. - [rewards_half, baseline_half] = tf.split( - rewards, num_or_size_splits=2, axis=0) - [log_probs_half, _] = tf.split(log_probs, num_or_size_splits=2, axis=0) - [reward_present_half, baseline_present_half] = tf.split( - present, num_or_size_splits=2, axis=0) - - # Unstack to lists. - baseline_list = tf.unstack(baseline_half, axis=1) - baseline_missing = 1. - tf.cast(baseline_present_half, tf.float32) - baseline_missing_list = tf.unstack(baseline_missing, axis=1) - - baselines = [] - for t in xrange(FLAGS.sequence_length): - # Calculate baseline only for missing tokens. - num_missing = tf.reduce_sum(baseline_missing_list[t]) - - avg_baseline = tf.reduce_sum( - baseline_missing_list[t] * baseline_list[t], keep_dims=True) / ( - num_missing + eps) - baseline = tf.tile(avg_baseline, multiples=[FLAGS.batch_size / 2]) - baselines.append(baseline) - - # Unstack to lists. - rewards_list = tf.unstack(rewards_half, axis=1) - log_probs_list = tf.unstack(log_probs_half, axis=1) - reward_missing = 1. - tf.cast(reward_present_half, tf.float32) - reward_missing_list = tf.unstack(reward_missing, axis=1) - - ## Calculate the Advantages, A(s,a) = Q(s,a) - \hat{V}(s). - advantages = [] - for t in xrange(FLAGS.sequence_length): - log_probability = log_probs_list[t] - cum_advantage = tf.zeros(shape=[FLAGS.batch_size / 2]) - - for s in xrange(t, FLAGS.sequence_length): - cum_advantage += reward_missing_list[s] * np.power(gamma, (s - t)) * ( - rewards_list[s] - baselines[s]) - # Clip advantages. - cum_advantage = tf.clip_by_value(cum_advantage, -FLAGS.advantage_clipping, - FLAGS.advantage_clipping) - advantages.append(reward_missing_list[t] * cum_advantage) - final_gen_objective += tf.multiply( - log_probability, - reward_missing_list[t] * tf.stop_gradient(cum_advantage)) - - # Cumulative Discounted Returns. The true value function V*(s). - cumulative_rewards = [] - for t in xrange(FLAGS.sequence_length): - cum_value = tf.zeros(shape=[FLAGS.batch_size / 2]) - for s in xrange(t, FLAGS.sequence_length): - cum_value += reward_missing_list[s] * np.power(gamma, ( - s - t)) * rewards_list[s] - cumulative_rewards.append(cum_value) - cumulative_rewards = tf.stack(cumulative_rewards, axis=1) - - rewards = rewards_half - critic_loss = None - maintain_averages_op = None - baselines = tf.stack(baselines, axis=1) - advantages = tf.stack(advantages, axis=1) - - # Exponential Moving Average baseline. - elif FLAGS.baseline_method == 'ema': - # TODO(liamfedus): Recheck. - # Lists of rewards and Log probabilities of the actions taken only for - # missing tokens. - ema = tf.train.ExponentialMovingAverage(decay=hparams.baseline_decay) - maintain_averages_op = ema.apply(rewards_list) - - baselines = [] - for r in rewards_list: - baselines.append(ema.average(r)) - - ## Calculate the Advantages, A(s,a) = Q(s,a) - \hat{V}(s). - advantages = [] - for t in xrange(FLAGS.sequence_length): - log_probability = log_probs_list[t] - - # Calculate the forward advantage only on the missing tokens. - cum_advantage = tf.zeros(shape=[FLAGS.batch_size]) - for s in xrange(t, FLAGS.sequence_length): - cum_advantage += missing_list[s] * np.power(gamma, (s - t)) * ( - rewards_list[s] - baselines[s]) - # Clip advantages. - cum_advantage = tf.clip_by_value(cum_advantage, -FLAGS.advantage_clipping, - FLAGS.advantage_clipping) - advantages.append(missing_list[t] * cum_advantage) - final_gen_objective += tf.multiply( - log_probability, missing_list[t] * tf.stop_gradient(cum_advantage)) - - critic_loss = None - baselines = tf.stack(baselines, axis=1) - advantages = tf.stack(advantages, axis=1) - - elif FLAGS.baseline_method is None: - num_missing = tf.reduce_sum(missing) - final_gen_objective += tf.reduce_sum(rewards) / (num_missing + eps) - baselines = tf.zeros_like(rewards) - critic_loss = None - maintain_averages_op = None - advantages = cumulative_rewards - - else: - raise NotImplementedError - - return [ - final_gen_objective, log_probs, rewards, advantages, baselines, - maintain_averages_op, critic_loss, cumulative_rewards - ] - - -def calculate_log_perplexity(logits, targets, present): - """Calculate the average log perplexity per *missing* token. - - Args: - logits: tf.float32 Tensor of the logits of shape [batch_size, - sequence_length, vocab_size]. - targets: tf.int32 Tensor of the sequence target of shape [batch_size, - sequence_length]. - present: tf.bool Tensor indicating the presence or absence of the token - of shape [batch_size, sequence_length]. - - Returns: - avg_log_perplexity: Scalar indicating the average log perplexity per - missing token in the batch. - """ - # logits = tf.Print(logits, [logits], message='logits:', summarize=50) - # targets = tf.Print(targets, [targets], message='targets:', summarize=50) - eps = 1e-12 - logits = tf.reshape(logits, [-1, FLAGS.vocab_size]) - - # Only calculate log-perplexity on missing tokens. - weights = tf.cast(present, tf.float32) - weights = 1. - weights - weights = tf.reshape(weights, [-1]) - num_missing = tf.reduce_sum(weights) - - log_perplexity = tf.contrib.legacy_seq2seq.sequence_loss_by_example( - [logits], [tf.reshape(targets, [-1])], [weights]) - - avg_log_perplexity = tf.reduce_sum(log_perplexity) / (num_missing + eps) - return avg_log_perplexity diff --git a/research/maskgan/model_utils/model_optimization.py b/research/maskgan/model_utils/model_optimization.py deleted file mode 100644 index caae271fe8bed390f032763972a43312f7a8ce9b..0000000000000000000000000000000000000000 --- a/research/maskgan/model_utils/model_optimization.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model optimization.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - - -def create_dis_pretrain_op(hparams, dis_loss, global_step): - """Create a train op for pretraining.""" - with tf.name_scope('pretrain_generator'): - optimizer = tf.train.AdamOptimizer(hparams.dis_pretrain_learning_rate) - dis_vars = [ - v for v in tf.trainable_variables() if v.op.name.startswith('dis') - ] - if FLAGS.dis_update_share_embedding and FLAGS.dis_share_embedding: - shared_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/decoder/rnn/embedding' - ][0] - dis_vars.append(shared_embedding) - dis_grads = tf.gradients(dis_loss, dis_vars) - dis_grads_clipped, _ = tf.clip_by_global_norm(dis_grads, - FLAGS.grad_clipping) - dis_pretrain_op = optimizer.apply_gradients( - zip(dis_grads_clipped, dis_vars), global_step=global_step) - return dis_pretrain_op - - -def create_gen_pretrain_op(hparams, cross_entropy_loss, global_step): - """Create a train op for pretraining.""" - with tf.name_scope('pretrain_generator'): - optimizer = tf.train.AdamOptimizer(hparams.gen_pretrain_learning_rate) - gen_vars = [ - v for v in tf.trainable_variables() if v.op.name.startswith('gen') - ] - gen_grads = tf.gradients(cross_entropy_loss, gen_vars) - gen_grads_clipped, _ = tf.clip_by_global_norm(gen_grads, - FLAGS.grad_clipping) - gen_pretrain_op = optimizer.apply_gradients( - zip(gen_grads_clipped, gen_vars), global_step=global_step) - return gen_pretrain_op - - -def create_gen_train_op(hparams, learning_rate, gen_loss, global_step, mode): - """Create Generator train op.""" - del hparams - with tf.name_scope('train_generator'): - if FLAGS.generator_optimizer == 'sgd': - gen_optimizer = tf.train.GradientDescentOptimizer(learning_rate) - elif FLAGS.generator_optimizer == 'adam': - gen_optimizer = tf.train.AdamOptimizer(learning_rate) - else: - raise NotImplementedError - gen_vars = [ - v for v in tf.trainable_variables() if v.op.name.startswith('gen') - ] - print('Optimizing Generator vars.') - for v in gen_vars: - print(v) - if mode == 'MINIMIZE': - gen_grads = tf.gradients(gen_loss, gen_vars) - elif mode == 'MAXIMIZE': - gen_grads = tf.gradients(-gen_loss, gen_vars) - else: - raise ValueError("Must be one of 'MINIMIZE' or 'MAXIMIZE'") - gen_grads_clipped, _ = tf.clip_by_global_norm(gen_grads, - FLAGS.grad_clipping) - gen_train_op = gen_optimizer.apply_gradients( - zip(gen_grads_clipped, gen_vars), global_step=global_step) - return gen_train_op, gen_grads_clipped, gen_vars - - -def create_reinforce_gen_train_op(hparams, learning_rate, final_gen_reward, - averages_op, global_step): - """Create the Generator train_op when using REINFORCE. - - Args: - hparams: MaskGAN hyperparameters. - learning_rate: tf.Variable scalar learning rate. - final_gen_objective: Scalar final REINFORCE objective for the sequence. - averages_op: ExponentialMovingAverage apply average op to - maintain the baseline. - global_step: global_step tf.Variable. - - Returns: - gen_train_op: Generator training op. - """ - del hparams - with tf.name_scope('train_generator'): - if FLAGS.generator_optimizer == 'sgd': - gen_optimizer = tf.train.GradientDescentOptimizer(learning_rate) - elif FLAGS.generator_optimizer == 'adam': - gen_optimizer = tf.train.AdamOptimizer(learning_rate) - else: - raise NotImplementedError - gen_vars = [ - v for v in tf.trainable_variables() if v.op.name.startswith('gen') - ] - print('\nOptimizing Generator vars:') - for v in gen_vars: - print(v) - - # Maximize reward. - gen_grads = tf.gradients(-final_gen_reward, gen_vars) - gen_grads_clipped, _ = tf.clip_by_global_norm(gen_grads, - FLAGS.grad_clipping) - maximize_op = gen_optimizer.apply_gradients( - zip(gen_grads_clipped, gen_vars), global_step=global_step) - - # Group maintain averages op. - if averages_op: - gen_train_op = tf.group(maximize_op, averages_op) - else: - gen_train_op = maximize_op - - return [gen_train_op, gen_grads, gen_vars] - - -def create_dis_train_op(hparams, dis_loss, global_step): - """Create Discriminator train op.""" - with tf.name_scope('train_discriminator'): - dis_optimizer = tf.train.AdamOptimizer(hparams.dis_learning_rate) - dis_vars = [ - v for v in tf.trainable_variables() if v.op.name.startswith('dis') - ] - if FLAGS.dis_update_share_embedding and FLAGS.dis_share_embedding: - shared_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/decoder/rnn/embedding' - ][0] - dis_vars.append(shared_embedding) - print('\nOptimizing Discriminator vars:') - for v in dis_vars: - print(v) - dis_grads = tf.gradients(dis_loss, dis_vars) - dis_grads_clipped, _ = tf.clip_by_global_norm(dis_grads, - FLAGS.grad_clipping) - dis_train_op = dis_optimizer.apply_gradients( - zip(dis_grads_clipped, dis_vars), global_step=global_step) - return dis_train_op, dis_grads_clipped, dis_vars - - -def create_critic_train_op(hparams, critic_loss, global_step): - """Create Discriminator train op.""" - with tf.name_scope('train_critic'): - critic_optimizer = tf.train.AdamOptimizer(hparams.critic_learning_rate) - output_vars = [ - v for v in tf.trainable_variables() if v.op.name.startswith('critic') - ] - - if FLAGS.critic_update_dis_vars: - if FLAGS.discriminator_model == 'bidirectional_vd': - critic_vars = [ - v for v in tf.trainable_variables() - if v.op.name.startswith('dis/rnn') - ] - elif FLAGS.discriminator_model == 'seq2seq_vd': - critic_vars = [ - v for v in tf.trainable_variables() - if v.op.name.startswith('dis/decoder/rnn/multi_rnn_cell') - ] - critic_vars.extend(output_vars) - else: - critic_vars = output_vars - print('\nOptimizing Critic vars:') - for v in critic_vars: - print(v) - critic_grads = tf.gradients(critic_loss, critic_vars) - critic_grads_clipped, _ = tf.clip_by_global_norm(critic_grads, - FLAGS.grad_clipping) - critic_train_op = critic_optimizer.apply_gradients( - zip(critic_grads_clipped, critic_vars), global_step=global_step) - return critic_train_op, critic_grads_clipped, critic_vars diff --git a/research/maskgan/model_utils/model_utils.py b/research/maskgan/model_utils/model_utils.py deleted file mode 100644 index 0e3183582e0f17b7d4ca54450231ea9bad039e40..0000000000000000000000000000000000000000 --- a/research/maskgan/model_utils/model_utils.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model utilities.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports -import numpy as np - -import tensorflow as tf -from model_utils import variable_mapping - -FLAGS = tf.app.flags.FLAGS - - -def generate_mask(): - """Generate the mask to be fed into the model.""" - if FLAGS.mask_strategy == 'random': - p = np.random.choice( - [True, False], - size=[FLAGS.batch_size, FLAGS.sequence_length], - p=[FLAGS.is_present_rate, 1. - FLAGS.is_present_rate]) - - elif FLAGS.mask_strategy == 'contiguous': - masked_length = int((1 - FLAGS.is_present_rate) * FLAGS.sequence_length) - 1 - # Determine location to start masking. - start_mask = np.random.randint( - 1, FLAGS.sequence_length - masked_length + 1, size=FLAGS.batch_size) - p = np.full([FLAGS.batch_size, FLAGS.sequence_length], True, dtype=bool) - - # Create contiguous masked section to be False. - for i, index in enumerate(start_mask): - p[i, index:index + masked_length] = False - - else: - raise NotImplementedError - - return p - - -def assign_percent_real(session, percent_real_update, new_rate, current_rate): - """Run assign operation where the we load the current_rate of percent - real into a Tensorflow variable. - - Args: - session: Current tf.Session. - percent_real_update: tf.assign operation. - new_rate: tf.placeholder for the new rate. - current_rate: Percent of tokens that are currently real. Fake tokens - are the ones being imputed by the Generator. - """ - session.run(percent_real_update, feed_dict={new_rate: current_rate}) - - -def assign_learning_rate(session, lr_update, lr_placeholder, new_lr): - """Run assign operation where the we load the current_rate of percent - real into a Tensorflow variable. - - Args: - session: Current tf.Session. - lr_update: tf.assign operation. - lr_placeholder: tf.placeholder for the new learning rate. - new_lr: New learning rate to use. - """ - session.run(lr_update, feed_dict={lr_placeholder: new_lr}) - - -def clip_weights(variables, c_lower, c_upper): - """Clip a list of weights to be within a certain range. - - Args: - variables: List of tf.Variable weights. - c_lower: Lower bound for weights. - c_upper: Upper bound for weights. - """ - clip_ops = [] - - for var in variables: - clipped_var = tf.clip_by_value(var, c_lower, c_upper) - - clip_ops.append(tf.assign(var, clipped_var)) - return tf.group(*clip_ops) - - -def retrieve_init_savers(hparams): - """Retrieve a dictionary of all the initial savers for the models. - - Args: - hparams: MaskGAN hyperparameters. - """ - ## Dictionary of init savers. - init_savers = {} - - ## Load Generator weights from MaskGAN checkpoint. - if FLAGS.maskgan_ckpt: - gen_vars = [ - v for v in tf.trainable_variables() if v.op.name.startswith('gen') - ] - init_saver = tf.train.Saver(var_list=gen_vars) - init_savers['init_saver'] = init_saver - - ## Load the Discriminator weights from the MaskGAN checkpoint if - # the weights are compatible. - if FLAGS.discriminator_model == 'seq2seq_vd': - dis_variable_maps = variable_mapping.dis_seq2seq_vd(hparams) - dis_init_saver = tf.train.Saver(var_list=dis_variable_maps) - init_savers['dis_init_saver'] = dis_init_saver - - ## Load weights from language model checkpoint. - if FLAGS.language_model_ckpt_dir: - if FLAGS.maskgan_ckpt is None: - ## Generator Variables/Savers. - if FLAGS.generator_model == 'rnn_nas': - gen_variable_maps = variable_mapping.rnn_nas(hparams, model='gen') - gen_init_saver = tf.train.Saver(var_list=gen_variable_maps) - init_savers['gen_init_saver'] = gen_init_saver - - elif FLAGS.generator_model == 'seq2seq_nas': - # Encoder. - gen_encoder_variable_maps = variable_mapping.gen_encoder_seq2seq_nas( - hparams) - gen_encoder_init_saver = tf.train.Saver( - var_list=gen_encoder_variable_maps) - # Decoder. - gen_decoder_variable_maps = variable_mapping.gen_decoder_seq2seq_nas( - hparams) - gen_decoder_init_saver = tf.train.Saver( - var_list=gen_decoder_variable_maps) - init_savers['gen_encoder_init_saver'] = gen_encoder_init_saver - init_savers['gen_decoder_init_saver'] = gen_decoder_init_saver - - # seq2seq_vd derived from the same code base as seq2seq_zaremba. - elif (FLAGS.generator_model == 'seq2seq_zaremba' or - FLAGS.generator_model == 'seq2seq_vd'): - # Encoder. - gen_encoder_variable_maps = variable_mapping.gen_encoder_seq2seq( - hparams) - gen_encoder_init_saver = tf.train.Saver( - var_list=gen_encoder_variable_maps) - # Decoder. - gen_decoder_variable_maps = variable_mapping.gen_decoder_seq2seq( - hparams) - gen_decoder_init_saver = tf.train.Saver( - var_list=gen_decoder_variable_maps) - init_savers['gen_encoder_init_saver'] = gen_encoder_init_saver - init_savers['gen_decoder_init_saver'] = gen_decoder_init_saver - - else: - raise NotImplementedError - - ## Discriminator Variables/Savers. - if FLAGS.discriminator_model == 'rnn_nas': - dis_variable_maps = variable_mapping.rnn_nas(hparams, model='dis') - dis_init_saver = tf.train.Saver(var_list=dis_variable_maps) - init_savers['dis_init_saver'] = dis_init_saver - - # rnn_vd derived from the same code base as rnn_zaremba. - elif (FLAGS.discriminator_model == 'rnn_zaremba' or - FLAGS.discriminator_model == 'rnn_vd'): - dis_variable_maps = variable_mapping.rnn_zaremba(hparams, model='dis') - dis_init_saver = tf.train.Saver(var_list=dis_variable_maps) - init_savers['dis_init_saver'] = dis_init_saver - - elif (FLAGS.discriminator_model == 'bidirectional_zaremba' or - FLAGS.discriminator_model == 'bidirectional_vd'): - dis_fwd_variable_maps = variable_mapping.dis_fwd_bidirectional(hparams) - dis_bwd_variable_maps = variable_mapping.dis_bwd_bidirectional(hparams) - # Savers for the forward/backward Discriminator components. - dis_fwd_init_saver = tf.train.Saver(var_list=dis_fwd_variable_maps) - dis_bwd_init_saver = tf.train.Saver(var_list=dis_bwd_variable_maps) - init_savers['dis_fwd_init_saver'] = dis_fwd_init_saver - init_savers['dis_bwd_init_saver'] = dis_bwd_init_saver - - elif FLAGS.discriminator_model == 'cnn': - dis_variable_maps = variable_mapping.cnn() - dis_init_saver = tf.train.Saver(var_list=dis_variable_maps) - init_savers['dis_init_saver'] = dis_init_saver - - elif FLAGS.discriminator_model == 'seq2seq_vd': - # Encoder. - dis_encoder_variable_maps = variable_mapping.dis_encoder_seq2seq(hparams) - dis_encoder_init_saver = tf.train.Saver( - var_list=dis_encoder_variable_maps) - # Decoder. - dis_decoder_variable_maps = variable_mapping.dis_decoder_seq2seq(hparams) - dis_decoder_init_saver = tf.train.Saver( - var_list=dis_decoder_variable_maps) - init_savers['dis_encoder_init_saver'] = dis_encoder_init_saver - init_savers['dis_decoder_init_saver'] = dis_decoder_init_saver - - return init_savers - - -def init_fn(init_savers, sess): - """The init_fn to be passed to the Supervisor. - - Args: - init_savers: Dictionary of init_savers. 'init_saver_name': init_saver. - sess: tf.Session. - """ - ## Load Generator weights from MaskGAN checkpoint. - if FLAGS.maskgan_ckpt: - print('Restoring Generator from %s.' % FLAGS.maskgan_ckpt) - tf.logging.info('Restoring Generator from %s.' % FLAGS.maskgan_ckpt) - print('Asserting Generator is a seq2seq-variant.') - tf.logging.info('Asserting Generator is a seq2seq-variant.') - assert FLAGS.generator_model.startswith('seq2seq') - init_saver = init_savers['init_saver'] - init_saver.restore(sess, FLAGS.maskgan_ckpt) - - ## Load the Discriminator weights from the MaskGAN checkpoint if - # the weights are compatible. - if FLAGS.discriminator_model == 'seq2seq_vd': - print('Restoring Discriminator from %s.' % FLAGS.maskgan_ckpt) - tf.logging.info('Restoring Discriminator from %s.' % FLAGS.maskgan_ckpt) - dis_init_saver = init_savers['dis_init_saver'] - dis_init_saver.restore(sess, FLAGS.maskgan_ckpt) - - ## Load weights from language model checkpoint. - if FLAGS.language_model_ckpt_dir: - if FLAGS.maskgan_ckpt is None: - ## Generator Models. - if FLAGS.generator_model == 'rnn_nas': - load_ckpt = tf.train.latest_checkpoint(FLAGS.language_model_ckpt_dir) - print('Restoring Generator from %s.' % load_ckpt) - tf.logging.info('Restoring Generator from %s.' % load_ckpt) - gen_init_saver = init_savers['gen_init_saver'] - gen_init_saver.restore(sess, load_ckpt) - - elif FLAGS.generator_model.startswith('seq2seq'): - load_ckpt = tf.train.latest_checkpoint(FLAGS.language_model_ckpt_dir) - print('Restoring Generator from %s.' % load_ckpt) - tf.logging.info('Restoring Generator from %s.' % load_ckpt) - gen_encoder_init_saver = init_savers['gen_encoder_init_saver'] - gen_decoder_init_saver = init_savers['gen_decoder_init_saver'] - gen_encoder_init_saver.restore(sess, load_ckpt) - gen_decoder_init_saver.restore(sess, load_ckpt) - - ## Discriminator Models. - if (FLAGS.discriminator_model == 'rnn_nas' or - FLAGS.discriminator_model == 'rnn_zaremba' or - FLAGS.discriminator_model == 'rnn_vd' or - FLAGS.discriminator_model == 'cnn'): - load_ckpt = tf.train.latest_checkpoint(FLAGS.language_model_ckpt_dir) - print('Restoring Discriminator from %s.' % load_ckpt) - tf.logging.info('Restoring Discriminator from %s.' % load_ckpt) - dis_init_saver = init_savers['dis_init_saver'] - dis_init_saver.restore(sess, load_ckpt) - - elif (FLAGS.discriminator_model == 'bidirectional_zaremba' or - FLAGS.discriminator_model == 'bidirectional_vd'): - assert FLAGS.language_model_ckpt_dir_reversed is not None, ( - 'Need a reversed directory to fill in the backward components.') - load_fwd_ckpt = tf.train.latest_checkpoint(FLAGS.language_model_ckpt_dir) - load_bwd_ckpt = tf.train.latest_checkpoint( - FLAGS.language_model_ckpt_dir_reversed) - print('Restoring Discriminator from %s and %s.' % (load_fwd_ckpt, - load_bwd_ckpt)) - tf.logging.info('Restoring Discriminator from %s and %s.' % - (load_fwd_ckpt, load_bwd_ckpt)) - dis_fwd_init_saver = init_savers['dis_fwd_init_saver'] - dis_bwd_init_saver = init_savers['dis_bwd_init_saver'] - dis_fwd_init_saver.restore(sess, load_fwd_ckpt) - dis_bwd_init_saver.restore(sess, load_bwd_ckpt) - - elif FLAGS.discriminator_model == 'seq2seq_vd': - load_ckpt = tf.train.latest_checkpoint(FLAGS.language_model_ckpt_dir) - print('Restoring Discriminator from %s.' % load_ckpt) - tf.logging.info('Restoring Discriminator from %s.' % load_ckpt) - dis_encoder_init_saver = init_savers['dis_encoder_init_saver'] - dis_decoder_init_saver = init_savers['dis_decoder_init_saver'] - dis_encoder_init_saver.restore(sess, load_ckpt) - dis_decoder_init_saver.restore(sess, load_ckpt) - - else: - return diff --git a/research/maskgan/model_utils/n_gram.py b/research/maskgan/model_utils/n_gram.py deleted file mode 100644 index b889dde849a60d95aa38c57cd8c864249233514f..0000000000000000000000000000000000000000 --- a/research/maskgan/model_utils/n_gram.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""We calculate n-Grams from the training text. We will use this as an -evaluation metric.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange - - -def hash_function(input_tuple): - """Hash function for a tuple.""" - return hash(input_tuple) - - -def find_all_ngrams(dataset, n): - """Generate a list of all ngrams.""" - return zip(*[dataset[i:] for i in xrange(n)]) - - -def construct_ngrams_dict(ngrams_list): - """Construct a ngram dictionary which maps an ngram tuple to the number - of times it appears in the text.""" - counts = {} - - for t in ngrams_list: - key = hash_function(t) - if key in counts: - counts[key] += 1 - else: - counts[key] = 1 - return counts - - -def percent_unique_ngrams_in_train(train_ngrams_dict, gen_ngrams_dict): - """Compute the percent of ngrams generated by the model that are - present in the training text and are unique.""" - - # *Total* number of n-grams produced by the generator. - total_ngrams_produced = 0 - - for _, value in gen_ngrams_dict.iteritems(): - total_ngrams_produced += value - - # The unique ngrams in the training set. - unique_ngrams_in_train = 0. - - for key, _ in gen_ngrams_dict.iteritems(): - if key in train_ngrams_dict: - unique_ngrams_in_train += 1 - return float(unique_ngrams_in_train) / float(total_ngrams_produced) diff --git a/research/maskgan/model_utils/variable_mapping.py b/research/maskgan/model_utils/variable_mapping.py deleted file mode 100644 index 0301b969716fe473ac98c2e3bba5c04662461954..0000000000000000000000000000000000000000 --- a/research/maskgan/model_utils/variable_mapping.py +++ /dev/null @@ -1,745 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - - -def rnn_nas(hparams, model): - assert model == 'gen' or model == 'dis' - - # This logic is only valid for rnn_zaremba - if model == 'gen': - assert FLAGS.generator_model == 'rnn_nas' - assert hparams.gen_num_layers == 2 - - if model == 'dis': - assert FLAGS.discriminator_model == 'rnn_nas' - assert hparams.dis_num_layers == 2 - - # Output variables only for the Generator. Discriminator output biases - # will begin randomly initialized. - if model == 'gen': - softmax_b = [ - v for v in tf.trainable_variables() if v.op.name == 'gen/rnn/softmax_b' - ][0] - - # Common elements to Generator and Discriminator. - embedding = [ - v for v in tf.trainable_variables() - if v.op.name == str(model) + '/rnn/embedding' - ][0] - lstm_w_0 = [ - v for v in tf.trainable_variables() - if v.op.name == - str(model) + '/rnn/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat' - ][0] - lstm_b_0 = [ - v for v in tf.trainable_variables() - if v.op.name == str(model) + - '/rnn/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat' - ][0] - lstm_w_1 = [ - v for v in tf.trainable_variables() - if v.op.name == - str(model) + '/rnn/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat' - ][0] - lstm_b_1 = [ - v for v in tf.trainable_variables() - if v.op.name == str(model) + - '/rnn/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat' - ][0] - - # Dictionary mapping. - if model == 'gen': - variable_mapping = { - 'Model/embeddings/input_embedding': - embedding, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat': - lstm_w_0, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat': - lstm_b_0, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat': - lstm_w_1, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat': - lstm_b_1, - 'Model/softmax_b': - softmax_b - } - else: - variable_mapping = { - 'Model/embeddings/input_embedding': - embedding, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat': - lstm_w_0, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat': - lstm_b_0, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat': - lstm_w_1, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat': - lstm_b_1 - } - - return variable_mapping - - -def cnn(): - """Variable mapping for the CNN embedding. - - Returns: - variable_mapping: Dictionary with Key: ckpt_name, Value: model_var. - """ - # This logic is only valid for cnn - assert FLAGS.discriminator_model == 'cnn' - - # Retrieve CNN embedding. - embedding = [ - v for v in tf.trainable_variables() if v.op.name == 'dis/embedding' - ][0] - - # Variable mapping. - variable_mapping = {'Model/embedding': embedding} - - return variable_mapping - - -def rnn_zaremba(hparams, model): - """Returns the PTB Variable name to MaskGAN Variable dictionary mapping. This - is a highly restrictive function just for testing. This will need to be - generalized. - - Args: - hparams: Hyperparameters for the MaskGAN. - model: Model type, one of ['gen', 'dis']. - - Returns: - variable_mapping: Dictionary with Key: ckpt_name, Value: model_var. - """ - assert model == 'gen' or model == 'dis' - - # This logic is only valid for rnn_zaremba - if model == 'gen': - assert FLAGS.generator_model == 'rnn_zaremba' - assert hparams.gen_num_layers == 2 - - if model == 'dis': - assert (FLAGS.discriminator_model == 'rnn_zaremba' or - FLAGS.discriminator_model == 'rnn_vd') - assert hparams.dis_num_layers == 2 - - # Output variables only for the Generator. Discriminator output weights - # and biases will begin randomly initialized. - if model == 'gen': - softmax_w = [ - v for v in tf.trainable_variables() if v.op.name == 'gen/rnn/softmax_w' - ][0] - softmax_b = [ - v for v in tf.trainable_variables() if v.op.name == 'gen/rnn/softmax_b' - ][0] - - # Common elements to Generator and Discriminator. - if not FLAGS.dis_share_embedding or model != 'dis': - embedding = [ - v for v in tf.trainable_variables() - if v.op.name == str(model) + '/rnn/embedding' - ][0] - lstm_w_0 = [ - v for v in tf.trainable_variables() if v.op.name == str(model) + - '/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - lstm_b_0 = [ - v for v in tf.trainable_variables() if v.op.name == str(model) + - '/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - lstm_w_1 = [ - v for v in tf.trainable_variables() if v.op.name == str(model) + - '/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - lstm_b_1 = [ - v for v in tf.trainable_variables() if v.op.name == str(model) + - '/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - - # Dictionary mapping. - if model == 'gen': - variable_mapping = { - 'Model/embedding': embedding, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': lstm_w_0, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': lstm_b_0, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': lstm_w_1, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': lstm_b_1, - 'Model/softmax_w': softmax_w, - 'Model/softmax_b': softmax_b - } - else: - if FLAGS.dis_share_embedding: - variable_mapping = { - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': lstm_w_0, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': lstm_b_0, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': lstm_w_1, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': lstm_b_1 - } - else: - variable_mapping = { - 'Model/embedding': embedding, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': lstm_w_0, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': lstm_b_0, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': lstm_w_1, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': lstm_b_1 - } - - return variable_mapping - - -def gen_encoder_seq2seq_nas(hparams): - """Returns the NAS Variable name to MaskGAN Variable - dictionary mapping. This is a highly restrictive function just for testing. - This is for the *unidirecitional* seq2seq_nas encoder. - - Args: - hparams: Hyperparameters for the MaskGAN. - - Returns: - variable_mapping: Dictionary with Key: ckpt_name, Value: model_varself. - """ - assert FLAGS.generator_model == 'seq2seq_nas' - assert hparams.gen_num_layers == 2 - ## Encoder forward variables. - - if not FLAGS.seq2seq_share_embedding: - encoder_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/encoder/rnn/embedding' - ][0] - encoder_lstm_w_0 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/encoder/rnn/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat' - ][0] - encoder_lstm_b_0 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/encoder/rnn/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat' - ][0] - encoder_lstm_w_1 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/encoder/rnn/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat' - ][0] - encoder_lstm_b_1 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/encoder/rnn/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat' - ][0] - - if not FLAGS.seq2seq_share_embedding: - variable_mapping = { - 'Model/embeddings/input_embedding': - encoder_embedding, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat': - encoder_lstm_w_0, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat': - encoder_lstm_b_0, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat': - encoder_lstm_w_1, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat': - encoder_lstm_b_1 - } - else: - variable_mapping = { - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat': - encoder_lstm_w_0, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat': - encoder_lstm_b_0, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat': - encoder_lstm_w_1, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat': - encoder_lstm_b_1 - } - return variable_mapping - - -def gen_decoder_seq2seq_nas(hparams): - assert FLAGS.generator_model == 'seq2seq_nas' - assert hparams.gen_num_layers == 2 - - decoder_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/decoder/rnn/embedding' - ][0] - decoder_lstm_w_0 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/decoder/rnn/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat' - ][0] - decoder_lstm_b_0 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/decoder/rnn/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat' - ][0] - decoder_lstm_w_1 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/decoder/rnn/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat' - ][0] - decoder_lstm_b_1 = [ - v for v in tf.trainable_variables() - if v.op.name == - 'gen/decoder/rnn/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat' - ][0] - - decoder_softmax_b = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/decoder/rnn/softmax_b' - ][0] - - variable_mapping = { - 'Model/embeddings/input_embedding': - decoder_embedding, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_h_mat': - decoder_lstm_w_0, - 'Model/RNN/GenericMultiRNNCell/Cell0/Alien/rnn_builder/big_inputs_mat': - decoder_lstm_b_0, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_h_mat': - decoder_lstm_w_1, - 'Model/RNN/GenericMultiRNNCell/Cell1/Alien/rnn_builder/big_inputs_mat': - decoder_lstm_b_1, - 'Model/softmax_b': - decoder_softmax_b - } - - return variable_mapping - - -def gen_encoder_seq2seq(hparams): - """Returns the PTB Variable name to MaskGAN Variable - dictionary mapping. This is a highly restrictive function just for testing. - This is foe the *unidirecitional* seq2seq_zaremba encoder. - - Args: - hparams: Hyperparameters for the MaskGAN. - - Returns: - variable_mapping: Dictionary with Key: ckpt_name, Value: model_varself. - """ - assert (FLAGS.generator_model == 'seq2seq_zaremba' or - FLAGS.generator_model == 'seq2seq_vd') - assert hparams.gen_num_layers == 2 - - ## Encoder forward variables. - if not FLAGS.seq2seq_share_embedding: - encoder_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/encoder/rnn/embedding' - ][0] - encoder_lstm_w_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - encoder_lstm_b_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - encoder_lstm_w_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - encoder_lstm_b_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - - if FLAGS.data_set == 'ptb': - model_str = 'Model' - else: - model_str = 'model' - - if not FLAGS.seq2seq_share_embedding: - variable_mapping = { - str(model_str) + '/embedding': - encoder_embedding, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - encoder_lstm_w_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - encoder_lstm_b_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - encoder_lstm_w_1, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - encoder_lstm_b_1 - } - else: - variable_mapping = { - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - encoder_lstm_w_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - encoder_lstm_b_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - encoder_lstm_w_1, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - encoder_lstm_b_1 - } - return variable_mapping - - -def gen_decoder_seq2seq(hparams): - assert (FLAGS.generator_model == 'seq2seq_zaremba' or - FLAGS.generator_model == 'seq2seq_vd') - assert hparams.gen_num_layers == 2 - - decoder_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/decoder/rnn/embedding' - ][0] - decoder_lstm_w_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - decoder_lstm_b_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - decoder_lstm_w_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - decoder_lstm_b_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'gen/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - decoder_softmax_b = [ - v for v in tf.trainable_variables() - if v.op.name == 'gen/decoder/rnn/softmax_b' - ][0] - - if FLAGS.data_set == 'ptb': - model_str = 'Model' - else: - model_str = 'model' - - variable_mapping = { - str(model_str) + '/embedding': - decoder_embedding, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - decoder_lstm_w_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - decoder_lstm_b_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - decoder_lstm_w_1, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - decoder_lstm_b_1, - str(model_str) + '/softmax_b': - decoder_softmax_b - } - return variable_mapping - - -def dis_fwd_bidirectional(hparams): - """Returns the *forward* PTB Variable name to MaskGAN Variable dictionary - mapping. This is a highly restrictive function just for testing. This is for - the bidirectional_zaremba discriminator. - - Args: - FLAGS: Flags for the model. - hparams: Hyperparameters for the MaskGAN. - - Returns: - variable_mapping: Dictionary with Key: ckpt_name, Value: model_varself. - """ - assert (FLAGS.discriminator_model == 'bidirectional_zaremba' or - FLAGS.discriminator_model == 'bidirectional_vd') - assert hparams.dis_num_layers == 2 - - # Forward Discriminator Elements. - if not FLAGS.dis_share_embedding: - embedding = [ - v for v in tf.trainable_variables() if v.op.name == 'dis/embedding' - ][0] - fw_lstm_w_0 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - fw_lstm_b_0 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - fw_lstm_w_1 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/fw/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - fw_lstm_b_1 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/fw/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - if FLAGS.dis_share_embedding: - variable_mapping = { - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': fw_lstm_w_0, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': fw_lstm_b_0, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': fw_lstm_w_1, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': fw_lstm_b_1 - } - else: - variable_mapping = { - 'Model/embedding': embedding, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': fw_lstm_w_0, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': fw_lstm_b_0, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': fw_lstm_w_1, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': fw_lstm_b_1 - } - return variable_mapping - - -def dis_bwd_bidirectional(hparams): - """Returns the *backward* PTB Variable name to MaskGAN Variable dictionary - mapping. This is a highly restrictive function just for testing. This is for - the bidirectional_zaremba discriminator. - - Args: - hparams: Hyperparameters for the MaskGAN. - - Returns: - variable_mapping: Dictionary with Key: ckpt_name, Value: model_varself. - """ - assert (FLAGS.discriminator_model == 'bidirectional_zaremba' or - FLAGS.discriminator_model == 'bidirectional_vd') - assert hparams.dis_num_layers == 2 - - # Backward Discriminator Elements. - bw_lstm_w_0 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/bw/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - bw_lstm_b_0 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/bw/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - bw_lstm_w_1 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/bw/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - bw_lstm_b_1 = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/rnn/bw/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - - variable_mapping = { - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': bw_lstm_w_0, - 'Model/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': bw_lstm_b_0, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': bw_lstm_w_1, - 'Model/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': bw_lstm_b_1 - } - return variable_mapping - - -def dis_encoder_seq2seq(hparams): - """Returns the PTB Variable name to MaskGAN Variable - dictionary mapping. - - Args: - hparams: Hyperparameters for the MaskGAN. - - Returns: - variable_mapping: Dictionary with Key: ckpt_name, Value: model_varself. - """ - assert FLAGS.discriminator_model == 'seq2seq_vd' - assert hparams.dis_num_layers == 2 - - ## Encoder forward variables. - encoder_lstm_w_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - encoder_lstm_b_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - encoder_lstm_w_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - encoder_lstm_b_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - - if FLAGS.data_set == 'ptb': - model_str = 'Model' - else: - model_str = 'model' - - variable_mapping = { - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - encoder_lstm_w_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - encoder_lstm_b_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - encoder_lstm_w_1, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - encoder_lstm_b_1 - } - return variable_mapping - - -def dis_decoder_seq2seq(hparams): - assert FLAGS.discriminator_model == 'seq2seq_vd' - assert hparams.dis_num_layers == 2 - - if not FLAGS.dis_share_embedding: - decoder_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/decoder/rnn/embedding' - ][0] - decoder_lstm_w_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - decoder_lstm_b_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - decoder_lstm_w_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - decoder_lstm_b_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - - if FLAGS.data_set == 'ptb': - model_str = 'Model' - else: - model_str = 'model' - - if not FLAGS.dis_share_embedding: - variable_mapping = { - str(model_str) + '/embedding': - decoder_embedding, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - decoder_lstm_w_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - decoder_lstm_b_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - decoder_lstm_w_1, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - decoder_lstm_b_1 - } - else: - variable_mapping = { - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - decoder_lstm_w_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - decoder_lstm_b_0, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - decoder_lstm_w_1, - str(model_str) + '/RNN/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - decoder_lstm_b_1, - } - return variable_mapping - - -def dis_seq2seq_vd(hparams): - assert FLAGS.discriminator_model == 'seq2seq_vd' - assert hparams.dis_num_layers == 2 - - if not FLAGS.dis_share_embedding: - decoder_embedding = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/decoder/rnn/embedding' - ][0] - - ## Encoder variables. - encoder_lstm_w_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - encoder_lstm_b_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - encoder_lstm_w_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - encoder_lstm_b_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - - ## Attention. - if FLAGS.attention_option is not None: - decoder_attention_keys = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/decoder/attention_keys/weights' - ][0] - decoder_attention_construct_weights = [ - v for v in tf.trainable_variables() - if v.op.name == 'dis/decoder/rnn/attention_construct/weights' - ][0] - - ## Decoder. - decoder_lstm_w_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' - ][0] - decoder_lstm_b_0 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' - ][0] - decoder_lstm_w_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' - ][0] - decoder_lstm_b_1 = [ - v for v in tf.trainable_variables() if v.op.name == - 'dis/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' - ][0] - - # Standard variable mappings. - variable_mapping = { - 'gen/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - encoder_lstm_w_0, - 'gen/encoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - encoder_lstm_b_0, - 'gen/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - encoder_lstm_w_1, - 'gen/encoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - encoder_lstm_b_1, - 'gen/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel': - decoder_lstm_w_0, - 'gen/decoder/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias': - decoder_lstm_b_0, - 'gen/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel': - decoder_lstm_w_1, - 'gen/decoder/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias': - decoder_lstm_b_1 - } - - # Optional variable mappings. - if not FLAGS.dis_share_embedding: - variable_mapping['gen/decoder/rnn/embedding'] = decoder_embedding - if FLAGS.attention_option is not None: - variable_mapping[ - 'gen/decoder/attention_keys/weights'] = decoder_attention_keys - variable_mapping[ - 'gen/decoder/rnn/attention_construct/weights'] = decoder_attention_construct_weights - - return variable_mapping diff --git a/research/maskgan/models/__init__.py b/research/maskgan/models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/maskgan/models/attention_utils.py b/research/maskgan/models/attention_utils.py deleted file mode 100644 index 4bd9e41dd3178d6210e8f81d628b7d92004a6601..0000000000000000000000000000000000000000 --- a/research/maskgan/models/attention_utils.py +++ /dev/null @@ -1,477 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Attention-based decoder functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.framework import function - -__all__ = [ - "prepare_attention", "attention_decoder_fn_train", - "attention_decoder_fn_inference" -] - - -def attention_decoder_fn_train(encoder_state, - attention_keys, - attention_values, - attention_score_fn, - attention_construct_fn, - name=None): - """Attentional decoder function for `dynamic_rnn_decoder` during training. - - The `attention_decoder_fn_train` is a training function for an - attention-based sequence-to-sequence model. It should be used when - `dynamic_rnn_decoder` is in the training mode. - - The `attention_decoder_fn_train` is called with a set of the user arguments - and returns the `decoder_fn`, which can be passed to the - `dynamic_rnn_decoder`, such that - - ``` - dynamic_fn_train = attention_decoder_fn_train(encoder_state) - outputs_train, state_train = dynamic_rnn_decoder( - decoder_fn=dynamic_fn_train, ...) - ``` - - Further usage can be found in the `kernel_tests/seq2seq_test.py`. - - Args: - encoder_state: The encoded state to initialize the `dynamic_rnn_decoder`. - attention_keys: to be compared with target states. - attention_values: to be used to construct context vectors. - attention_score_fn: to compute similarity between key and target states. - attention_construct_fn: to build attention states. - name: (default: `None`) NameScope for the decoder function; - defaults to "simple_decoder_fn_train" - - Returns: - A decoder function with the required interface of `dynamic_rnn_decoder` - intended for training. - """ - with tf.name_scope(name, "attention_decoder_fn_train", [ - encoder_state, attention_keys, attention_values, attention_score_fn, - attention_construct_fn - ]): - pass - - def decoder_fn(time, cell_state, cell_input, cell_output, context_state): - """Decoder function used in the `dynamic_rnn_decoder` for training. - - Args: - time: positive integer constant reflecting the current timestep. - cell_state: state of RNNCell. - cell_input: input provided by `dynamic_rnn_decoder`. - cell_output: output of RNNCell. - context_state: context state provided by `dynamic_rnn_decoder`. - - Returns: - A tuple (done, next state, next input, emit output, next context state) - where: - - done: `None`, which is used by the `dynamic_rnn_decoder` to indicate - that `sequence_lengths` in `dynamic_rnn_decoder` should be used. - - next state: `cell_state`, this decoder function does not modify the - given state. - - next input: `cell_input`, this decoder function does not modify the - given input. The input could be modified when applying e.g. attention. - - emit output: `cell_output`, this decoder function does not modify the - given output. - - next context state: `context_state`, this decoder function does not - modify the given context state. The context state could be modified when - applying e.g. beam search. - """ - with tf.name_scope( - name, "attention_decoder_fn_train", - [time, cell_state, cell_input, cell_output, context_state]): - if cell_state is None: # first call, return encoder_state - cell_state = encoder_state - - # init attention - attention = _init_attention(encoder_state) - else: - # construct attention - attention = attention_construct_fn(cell_output, attention_keys, - attention_values) - cell_output = attention - - # combine cell_input and attention - next_input = tf.concat([cell_input, attention], 1) - - return (None, cell_state, next_input, cell_output, context_state) - - return decoder_fn - - -def attention_decoder_fn_inference(output_fn, - encoder_state, - attention_keys, - attention_values, - attention_score_fn, - attention_construct_fn, - embeddings, - start_of_sequence_id, - end_of_sequence_id, - maximum_length, - num_decoder_symbols, - dtype=tf.int32, - name=None): - """Attentional decoder function for `dynamic_rnn_decoder` during inference. - - The `attention_decoder_fn_inference` is a simple inference function for a - sequence-to-sequence model. It should be used when `dynamic_rnn_decoder` is - in the inference mode. - - The `attention_decoder_fn_inference` is called with user arguments - and returns the `decoder_fn`, which can be passed to the - `dynamic_rnn_decoder`, such that - - ``` - dynamic_fn_inference = attention_decoder_fn_inference(...) - outputs_inference, state_inference = dynamic_rnn_decoder( - decoder_fn=dynamic_fn_inference, ...) - ``` - - Further usage can be found in the `kernel_tests/seq2seq_test.py`. - - Args: - output_fn: An output function to project your `cell_output` onto class - logits. - - An example of an output function; - - ``` - tf.variable_scope("decoder") as varscope - output_fn = lambda x: tf.contrib.layers.linear(x, num_decoder_symbols, - scope=varscope) - - outputs_train, state_train = seq2seq.dynamic_rnn_decoder(...) - logits_train = output_fn(outputs_train) - - varscope.reuse_variables() - logits_inference, state_inference = seq2seq.dynamic_rnn_decoder( - output_fn=output_fn, ...) - ``` - - If `None` is supplied it will act as an identity function, which - might be wanted when using the RNNCell `OutputProjectionWrapper`. - - encoder_state: The encoded state to initialize the `dynamic_rnn_decoder`. - attention_keys: to be compared with target states. - attention_values: to be used to construct context vectors. - attention_score_fn: to compute similarity between key and target states. - attention_construct_fn: to build attention states. - embeddings: The embeddings matrix used for the decoder sized - `[num_decoder_symbols, embedding_size]`. - start_of_sequence_id: The start of sequence ID in the decoder embeddings. - end_of_sequence_id: The end of sequence ID in the decoder embeddings. - maximum_length: The maximum allowed of time steps to decode. - num_decoder_symbols: The number of classes to decode at each time step. - dtype: (default: `tf.int32`) The default data type to use when - handling integer objects. - name: (default: `None`) NameScope for the decoder function; - defaults to "attention_decoder_fn_inference" - - Returns: - A decoder function with the required interface of `dynamic_rnn_decoder` - intended for inference. - """ - with tf.name_scope(name, "attention_decoder_fn_inference", [ - output_fn, encoder_state, attention_keys, attention_values, - attention_score_fn, attention_construct_fn, embeddings, - start_of_sequence_id, end_of_sequence_id, maximum_length, - num_decoder_symbols, dtype - ]): - start_of_sequence_id = tf.convert_to_tensor(start_of_sequence_id, dtype) - end_of_sequence_id = tf.convert_to_tensor(end_of_sequence_id, dtype) - maximum_length = tf.convert_to_tensor(maximum_length, dtype) - num_decoder_symbols = tf.convert_to_tensor(num_decoder_symbols, dtype) - encoder_info = tf.contrib.framework.nest.flatten(encoder_state)[0] - batch_size = encoder_info.get_shape()[0].value - if output_fn is None: - output_fn = lambda x: x - if batch_size is None: - batch_size = tf.shape(encoder_info)[0] - - def decoder_fn(time, cell_state, cell_input, cell_output, context_state): - """Decoder function used in the `dynamic_rnn_decoder` for inference. - - The main difference between this decoder function and the `decoder_fn` in - `attention_decoder_fn_train` is how `next_cell_input` is calculated. In - decoder function we calculate the next input by applying an argmax across - the feature dimension of the output from the decoder. This is a - greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014) - use beam-search instead. - - Args: - time: positive integer constant reflecting the current timestep. - cell_state: state of RNNCell. - cell_input: input provided by `dynamic_rnn_decoder`. - cell_output: output of RNNCell. - context_state: context state provided by `dynamic_rnn_decoder`. - - Returns: - A tuple (done, next state, next input, emit output, next context state) - where: - - done: A boolean vector to indicate which sentences has reached a - `end_of_sequence_id`. This is used for early stopping by the - `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with - all elements as `true` is returned. - - next state: `cell_state`, this decoder function does not modify the - given state. - - next input: The embedding from argmax of the `cell_output` is used as - `next_input`. - - emit output: If `output_fn is None` the supplied `cell_output` is - returned, else the `output_fn` is used to update the `cell_output` - before calculating `next_input` and returning `cell_output`. - - next context state: `context_state`, this decoder function does not - modify the given context state. The context state could be modified when - applying e.g. beam search. - - Raises: - ValueError: if cell_input is not None. - - """ - with tf.name_scope( - name, "attention_decoder_fn_inference", - [time, cell_state, cell_input, cell_output, context_state]): - if cell_input is not None: - raise ValueError( - "Expected cell_input to be None, but saw: %s" % cell_input) - if cell_output is None: - # invariant that this is time == 0 - next_input_id = tf.ones( - [ - batch_size, - ], dtype=dtype) * ( - start_of_sequence_id) - done = tf.zeros( - [ - batch_size, - ], dtype=tf.bool) - cell_state = encoder_state - cell_output = tf.zeros([num_decoder_symbols], dtype=tf.float32) - cell_input = tf.gather(embeddings, next_input_id) - - # init attention - attention = _init_attention(encoder_state) - else: - # construct attention - attention = attention_construct_fn(cell_output, attention_keys, - attention_values) - cell_output = attention - - # argmax decoder - cell_output = output_fn(cell_output) # logits - next_input_id = tf.cast(tf.argmax(cell_output, 1), dtype=dtype) - done = tf.equal(next_input_id, end_of_sequence_id) - cell_input = tf.gather(embeddings, next_input_id) - - # combine cell_input and attention - next_input = tf.concat([cell_input, attention], 1) - - # if time > maxlen, return all true vector - done = tf.cond( - tf.greater(time, maximum_length), - lambda: tf.ones([ - batch_size,], dtype=tf.bool), lambda: done) - return (done, cell_state, next_input, cell_output, context_state) - - return decoder_fn - - -## Helper functions ## -def prepare_attention(attention_states, attention_option, num_units, - reuse=None): - """Prepare keys/values/functions for attention. - - Args: - attention_states: hidden states to attend over. - attention_option: how to compute attention, either "luong" or "bahdanau". - num_units: hidden state dimension. - reuse: whether to reuse variable scope. - - Returns: - attention_keys: to be compared with target states. - attention_values: to be used to construct context vectors. - attention_score_fn: to compute similarity between key and target states. - attention_construct_fn: to build attention states. - """ - # Prepare attention keys / values from attention_states - with tf.variable_scope("attention_keys", reuse=reuse) as scope: - attention_keys = tf.contrib.layers.linear( - attention_states, num_units, biases_initializer=None, scope=scope) - attention_values = attention_states - - # Attention score function - attention_score_fn = _create_attention_score_fn("attention_score", num_units, - attention_option, reuse) - # Attention construction function - attention_construct_fn = _create_attention_construct_fn( - "attention_construct", num_units, attention_score_fn, reuse) - - return (attention_keys, attention_values, attention_score_fn, - attention_construct_fn) - - -def _init_attention(encoder_state): - """Initialize attention. Handling both LSTM and GRU. - - Args: - encoder_state: The encoded state to initialize the `dynamic_rnn_decoder`. - - Returns: - attn: initial zero attention vector. - """ - - # Multi- vs single-layer - # TODO(thangluong): is this the best way to check? - if isinstance(encoder_state, tuple): - top_state = encoder_state[-1] - else: - top_state = encoder_state - - # LSTM vs GRU - if isinstance(top_state, tf.contrib.rnn.LSTMStateTuple): - attn = tf.zeros_like(top_state.h) - else: - attn = tf.zeros_like(top_state) - - return attn - - -def _create_attention_construct_fn(name, num_units, attention_score_fn, reuse): - """Function to compute attention vectors. - - Args: - name: to label variables. - num_units: hidden state dimension. - attention_score_fn: to compute similarity between key and target states. - reuse: whether to reuse variable scope. - - Returns: - attention_construct_fn: to build attention states. - """ - - def construct_fn(attention_query, attention_keys, attention_values): - with tf.variable_scope(name, reuse=reuse) as scope: - context = attention_score_fn(attention_query, attention_keys, - attention_values) - concat_input = tf.concat([attention_query, context], 1) - attention = tf.contrib.layers.linear( - concat_input, num_units, biases_initializer=None, scope=scope) - return attention - - return construct_fn - - -# keys: [batch_size, attention_length, attn_size] -# query: [batch_size, 1, attn_size] -# return weights [batch_size, attention_length] -@function.Defun(func_name="attn_add_fun", noinline=True) -def _attn_add_fun(v, keys, query): - return tf.reduce_sum(v * tf.tanh(keys + query), [2]) - - -@function.Defun(func_name="attn_mul_fun", noinline=True) -def _attn_mul_fun(keys, query): - return tf.reduce_sum(keys * query, [2]) - - -def _create_attention_score_fn(name, - num_units, - attention_option, - reuse, - dtype=tf.float32): - """Different ways to compute attention scores. - - Args: - name: to label variables. - num_units: hidden state dimension. - attention_option: how to compute attention, either "luong" or "bahdanau". - "bahdanau": additive (Bahdanau et al., ICLR'2015) - "luong": multiplicative (Luong et al., EMNLP'2015) - reuse: whether to reuse variable scope. - dtype: (default: `tf.float32`) data type to use. - - Returns: - attention_score_fn: to compute similarity between key and target states. - """ - with tf.variable_scope(name, reuse=reuse): - if attention_option == "bahdanau": - query_w = tf.get_variable("attnW", [num_units, num_units], dtype=dtype) - score_v = tf.get_variable("attnV", [num_units], dtype=dtype) - - def attention_score_fn(query, keys, values): - """Put attention masks on attention_values using attention_keys and query. - - Args: - query: A Tensor of shape [batch_size, num_units]. - keys: A Tensor of shape [batch_size, attention_length, num_units]. - values: A Tensor of shape [batch_size, attention_length, num_units]. - - Returns: - context_vector: A Tensor of shape [batch_size, num_units]. - - Raises: - ValueError: if attention_option is neither "luong" or "bahdanau". - - - """ - if attention_option == "bahdanau": - # transform query - query = tf.matmul(query, query_w) - - # reshape query: [batch_size, 1, num_units] - query = tf.reshape(query, [-1, 1, num_units]) - - # attn_fun - scores = _attn_add_fun(score_v, keys, query) - elif attention_option == "luong": - # reshape query: [batch_size, 1, num_units] - query = tf.reshape(query, [-1, 1, num_units]) - - # attn_fun - scores = _attn_mul_fun(keys, query) - else: - raise ValueError("Unknown attention option %s!" % attention_option) - - # Compute alignment weights - # scores: [batch_size, length] - # alignments: [batch_size, length] - # TODO(thangluong): not normalize over padding positions. - alignments = tf.nn.softmax(scores) - - # Now calculate the attention-weighted vector. - alignments = tf.expand_dims(alignments, 2) - context_vector = tf.reduce_sum(alignments * values, [1]) - context_vector.set_shape([None, num_units]) - - return context_vector - - return attention_score_fn diff --git a/research/maskgan/models/bidirectional.py b/research/maskgan/models/bidirectional.py deleted file mode 100644 index 1e6b3fe45f9ffe7dffdeb5c0d571de7e68227498..0000000000000000000000000000000000000000 --- a/research/maskgan/models/bidirectional.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple bidirectional model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -# ZoneoutWrapper. -from regularization import zoneout - -FLAGS = tf.app.flags.FLAGS - - -def discriminator(hparams, sequence, is_training, reuse=None): - """Define the bidirectional Discriminator graph.""" - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('dis', reuse=reuse): - cell_fwd = tf.contrib.rnn.LayerNormBasicLSTMCell( - hparams.dis_rnn_size, forget_bias=1.0, reuse=reuse) - cell_bwd = tf.contrib.rnn.LayerNormBasicLSTMCell( - hparams.dis_rnn_size, forget_bias=1.0, reuse=reuse) - if FLAGS.zoneout_drop_prob > 0.0: - cell_fwd = zoneout.ZoneoutWrapper( - cell_fwd, - zoneout_drop_prob=FLAGS.zoneout_drop_prob, - is_training=is_training) - cell_bwd = zoneout.ZoneoutWrapper( - cell_bwd, - zoneout_drop_prob=FLAGS.zoneout_drop_prob, - is_training=is_training) - - state_fwd = cell_fwd.zero_state(FLAGS.batch_size, tf.float32) - state_bwd = cell_bwd.zero_state(FLAGS.batch_size, tf.float32) - - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - rnn_inputs = tf.unstack(rnn_inputs, axis=1) - - with tf.variable_scope('rnn') as vs: - outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( - cell_fwd, cell_bwd, rnn_inputs, state_fwd, state_bwd, scope=vs) - - # Prediction is linear output for Discriminator. - predictions = tf.contrib.layers.linear(outputs, 1, scope=vs) - - predictions = tf.transpose(predictions, [1, 0, 2]) - return tf.squeeze(predictions, axis=2) diff --git a/research/maskgan/models/bidirectional_vd.py b/research/maskgan/models/bidirectional_vd.py deleted file mode 100644 index 469af9da57a8a0dbf280327308a17fa6e0277a86..0000000000000000000000000000000000000000 --- a/research/maskgan/models/bidirectional_vd.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple bidirectional model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from regularization import variational_dropout - -FLAGS = tf.app.flags.FLAGS - - -def discriminator(hparams, - sequence, - is_training, - reuse=None, - initial_state=None): - """Define the Discriminator graph.""" - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/decoder/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('dis', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - hparams.dis_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and hparams.dis_vd_keep_prob < 1: - - def attn_cell(): - return variational_dropout.VariationalDropoutWrapper( - lstm_cell(), FLAGS.batch_size, hparams.dis_rnn_size, - hparams.dis_vd_keep_prob, hparams.dis_vd_keep_prob) - - cell_fwd = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - cell_bwd = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - # print initial_state - # print cell_fwd.zero_state(FLAGS.batch_size, tf.float32) - if initial_state: - state_fwd = [[tf.identity(x) for x in inner_initial_state] - for inner_initial_state in initial_state] - state_bwd = cell_bwd.zero_state(FLAGS.batch_size, tf.float32) - else: - state_fwd = cell_fwd.zero_state(FLAGS.batch_size, tf.float32) - state_bwd = cell_bwd.zero_state(FLAGS.batch_size, tf.float32) - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) - return tf.floor(random_tensor) / keep_prob - - if is_training: - output_mask = make_mask(hparams.dis_vd_keep_prob, - 2 * hparams.dis_rnn_size) - - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - rnn_inputs = tf.unstack(rnn_inputs, axis=1) - - with tf.variable_scope('rnn') as vs: - outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( - cell_fwd, cell_bwd, rnn_inputs, state_fwd, state_bwd, scope=vs) - - if is_training: - outputs *= output_mask - - # Prediction is linear output for Discriminator. - predictions = tf.contrib.layers.linear(outputs, 1, scope=vs) - predictions = tf.transpose(predictions, [1, 0, 2]) - - if FLAGS.baseline_method == 'critic': - with tf.variable_scope('critic', reuse=reuse) as critic_scope: - values = tf.contrib.layers.linear(outputs, 1, scope=critic_scope) - values = tf.transpose(values, [1, 0, 2]) - - return tf.squeeze(predictions, axis=2), tf.squeeze(values, axis=2) - - else: - return tf.squeeze(predictions, axis=2), None diff --git a/research/maskgan/models/bidirectional_zaremba.py b/research/maskgan/models/bidirectional_zaremba.py deleted file mode 100644 index b0683d7cc1493a8aa0298b7dc91020a152a9da36..0000000000000000000000000000000000000000 --- a/research/maskgan/models/bidirectional_zaremba.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple bidirectional model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - - -def discriminator(hparams, sequence, is_training, reuse=None): - """Define the bidirectional Discriminator graph.""" - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('dis', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - hparams.dis_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and FLAGS.keep_prob < 1: - - def attn_cell(): - return tf.contrib.rnn.DropoutWrapper( - lstm_cell(), output_keep_prob=FLAGS.keep_prob) - - cell_fwd = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - cell_bwd = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - state_fwd = cell_fwd.zero_state(FLAGS.batch_size, tf.float32) - state_bwd = cell_bwd.zero_state(FLAGS.batch_size, tf.float32) - - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - if is_training and FLAGS.keep_prob < 1: - rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) - rnn_inputs = tf.unstack(rnn_inputs, axis=1) - - with tf.variable_scope('rnn') as vs: - outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( - cell_fwd, cell_bwd, rnn_inputs, state_fwd, state_bwd, scope=vs) - - # Prediction is linear output for Discriminator. - predictions = tf.contrib.layers.linear(outputs, 1, scope=vs) - - predictions = tf.transpose(predictions, [1, 0, 2]) - return tf.squeeze(predictions, axis=2) diff --git a/research/maskgan/models/cnn.py b/research/maskgan/models/cnn.py deleted file mode 100644 index ca682debf1630f5773cef48b874334d28d1fc6fc..0000000000000000000000000000000000000000 --- a/research/maskgan/models/cnn.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple CNN model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - - -def discriminator(hparams, sequence, is_training, reuse=None): - """Define the Discriminator graph.""" - del is_training - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - "If you wish to share Discriminator/Generator embeddings, they must be" - " same dimension.") - with tf.variable_scope("gen/rnn", reuse=True): - embedding = tf.get_variable("embedding", - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - dis_filter_sizes = [3, 4, 5, 6, 7, 8, 9, 10, 15, 20] - - with tf.variable_scope("dis", reuse=reuse): - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable("embedding", - [FLAGS.vocab_size, hparams.dis_rnn_size]) - cnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - # Create a convolution layer for each filter size - conv_outputs = [] - for filter_size in dis_filter_sizes: - with tf.variable_scope("conv-%s" % filter_size): - # Convolution Layer - filter_shape = [ - filter_size, hparams.dis_rnn_size, hparams.dis_num_filters - ] - W = tf.get_variable( - name="W", initializer=tf.truncated_normal(filter_shape, stddev=0.1)) - b = tf.get_variable( - name="b", - initializer=tf.constant(0.1, shape=[hparams.dis_num_filters])) - conv = tf.nn.conv1d( - cnn_inputs, W, stride=1, padding="SAME", name="conv") - - # Apply nonlinearity - h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") - - conv_outputs.append(h) - - # Combine all the pooled features - dis_num_filters_total = hparams.dis_num_filters * len(dis_filter_sizes) - - h_conv = tf.concat(conv_outputs, axis=2) - h_conv_flat = tf.reshape(h_conv, [-1, dis_num_filters_total]) - - # Add dropout - with tf.variable_scope("dropout"): - h_drop = tf.nn.dropout(h_conv_flat, FLAGS.keep_prob) - - with tf.variable_scope("fully_connected"): - fc = tf.contrib.layers.fully_connected( - h_drop, num_outputs=dis_num_filters_total / 2) - - # Final (unnormalized) scores and predictions - with tf.variable_scope("output"): - W = tf.get_variable( - "W", - shape=[dis_num_filters_total / 2, 1], - initializer=tf.contrib.layers.xavier_initializer()) - b = tf.get_variable(name="b", initializer=tf.constant(0.1, shape=[1])) - predictions = tf.nn.xw_plus_b(fc, W, b, name="predictions") - predictions = tf.reshape( - predictions, shape=[FLAGS.batch_size, FLAGS.sequence_length]) - return predictions diff --git a/research/maskgan/models/critic_vd.py b/research/maskgan/models/critic_vd.py deleted file mode 100644 index ede8b7bb77af28f562c2e3942728899fe9b16422..0000000000000000000000000000000000000000 --- a/research/maskgan/models/critic_vd.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Critic model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf -from regularization import variational_dropout - -FLAGS = tf.app.flags.FLAGS - - -def critic_seq2seq_vd_derivative(hparams, sequence, is_training, reuse=None): - """Define the Critic graph which is derived from the seq2seq_vd - Discriminator. This will be initialized with the same parameters as the - language model and will share the forward RNN components with the - Discriminator. This estimates the V(s_t), where the state - s_t = x_0,...,x_t-1. - """ - assert FLAGS.discriminator_model == 'seq2seq_vd' - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/decoder/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - else: - with tf.variable_scope('dis/decoder/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - with tf.variable_scope( - 'dis/decoder/rnn/multi_rnn_cell', reuse=True) as dis_scope: - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - hparams.dis_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=True) - - attn_cell = lstm_cell - if is_training and hparams.dis_vd_keep_prob < 1: - - def attn_cell(): - return variational_dropout.VariationalDropoutWrapper( - lstm_cell(), FLAGS.batch_size, hparams.dis_rnn_size, - hparams.dis_vd_keep_prob, hparams.dis_vd_keep_prob) - - cell_critic = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - with tf.variable_scope('critic', reuse=reuse): - state_dis = cell_critic.zero_state(FLAGS.batch_size, tf.float32) - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) - return tf.floor(random_tensor) / keep_prob - - if is_training: - output_mask = make_mask(hparams.dis_vd_keep_prob, hparams.dis_rnn_size) - - with tf.variable_scope('rnn') as vs: - values = [] - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - if t == 0: - rnn_in = tf.zeros_like(rnn_inputs[:, 0]) - else: - rnn_in = rnn_inputs[:, t - 1] - rnn_out, state_dis = cell_critic(rnn_in, state_dis, scope=dis_scope) - - if is_training: - rnn_out *= output_mask - - # Prediction is linear output for Discriminator. - value = tf.contrib.layers.linear(rnn_out, 1, scope=vs) - - values.append(value) - values = tf.stack(values, axis=1) - return tf.squeeze(values, axis=2) diff --git a/research/maskgan/models/evaluation_utils.py b/research/maskgan/models/evaluation_utils.py deleted file mode 100644 index fc2a3a16f0b2c03736bfaa881c5c14546240d283..0000000000000000000000000000000000000000 --- a/research/maskgan/models/evaluation_utils.py +++ /dev/null @@ -1,280 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Evaluation utilities.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import Counter -# Dependency imports -import numpy as np -from scipy.special import expit - -import tensorflow as tf - -from model_utils import helper -from model_utils import n_gram - -FLAGS = tf.app.flags.FLAGS - - -def print_and_log_losses(log, step, is_present_rate, avg_dis_loss, - avg_gen_loss): - """Prints and logs losses to the log file. - - Args: - log: GFile for logs. - step: Global step. - is_present_rate: Current masking rate. - avg_dis_loss: List of Discriminator losses. - avg_gen_loss: List of Generator losses. - """ - print('global_step: %d' % step) - print(' is_present_rate: %.3f' % is_present_rate) - print(' D train loss: %.5f' % np.mean(avg_dis_loss)) - print(' G train loss: %.5f' % np.mean(avg_gen_loss)) - log.write('\nglobal_step: %d\n' % step) - log.write((' is_present_rate: %.3f\n' % is_present_rate)) - log.write(' D train loss: %.5f\n' % np.mean(avg_dis_loss)) - log.write(' G train loss: %.5f\n' % np.mean(avg_gen_loss)) - - -def print_and_log(log, id_to_word, sequence_eval, max_num_to_print=5): - """Helper function for printing and logging evaluated sequences.""" - indices_arr = np.asarray(sequence_eval) - samples = helper.convert_to_human_readable(id_to_word, indices_arr, - max_num_to_print) - - for i, sample in enumerate(samples): - print('Sample', i, '. ', sample) - log.write('\nSample ' + str(i) + '. ' + sample) - log.write('\n') - print('\n') - log.flush() - return samples - - -def zip_seq_pred_crossent(id_to_word, sequences, predictions, cross_entropy): - """Zip together the sequences, predictions, cross entropy.""" - indices = np.asarray(sequences) - - batch_of_metrics = [] - - for ind_batch, pred_batch, crossent_batch in zip(indices, predictions, - cross_entropy): - metrics = [] - - for index, pred, crossent in zip(ind_batch, pred_batch, crossent_batch): - metrics.append([str(id_to_word[index]), pred, crossent]) - - batch_of_metrics.append(metrics) - return batch_of_metrics - - -def zip_metrics(indices, *args): - """Zip together the indices matrices with the provided metrics matrices.""" - batch_of_metrics = [] - for metrics_batch in zip(indices, *args): - - metrics = [] - for m in zip(*metrics_batch): - metrics.append(m) - batch_of_metrics.append(metrics) - return batch_of_metrics - - -def print_formatted(present, id_to_word, log, batch_of_tuples): - """Print and log metrics.""" - num_cols = len(batch_of_tuples[0][0]) - repeat_float_format = '{:<12.3f} ' - repeat_str_format = '{:<13}' - - format_str = ''.join( - ['[{:<1}] {:<20}', - str(repeat_float_format * (num_cols - 1))]) - - # TODO(liamfedus): Generalize the logging. This is sloppy. - header_format_str = ''.join( - ['[{:<1}] {:<20}', - str(repeat_str_format * (num_cols - 1))]) - header_str = header_format_str.format('p', 'Word', 'p(real)', 'log-perp', - 'log(p(a))', 'r', 'R=V*(s)', 'b=V(s)', - 'A(a,s)') - - for i, batch in enumerate(batch_of_tuples): - print(' Sample: %d' % i) - log.write(' Sample %d.\n' % i) - print(' ', header_str) - log.write(' ' + str(header_str) + '\n') - - for j, t in enumerate(batch): - t = list(t) - t[0] = id_to_word[t[0]] - buffer_str = format_str.format(int(present[i][j]), *t) - print(' ', buffer_str) - log.write(' ' + str(buffer_str) + '\n') - log.flush() - - -def generate_RL_logs(sess, model, log, id_to_word, feed): - """Generate complete logs while running with REINFORCE.""" - # Impute Sequences. - [ - p, - fake_sequence_eval, - fake_predictions_eval, - _, - fake_cross_entropy_losses_eval, - _, - fake_log_probs_eval, - fake_rewards_eval, - fake_baselines_eval, - cumulative_rewards_eval, - fake_advantages_eval, - ] = sess.run( - [ - model.present, - model.fake_sequence, - model.fake_predictions, - model.real_predictions, - model.fake_cross_entropy_losses, - model.fake_logits, - model.fake_log_probs, - model.fake_rewards, - model.fake_baselines, - model.cumulative_rewards, - model.fake_advantages, - ], - feed_dict=feed) - - indices = np.asarray(fake_sequence_eval) - - # Convert Discriminator linear layer to probability. - fake_prob_eval = expit(fake_predictions_eval) - - # Add metrics. - fake_tuples = zip_metrics(indices, fake_prob_eval, - fake_cross_entropy_losses_eval, fake_log_probs_eval, - fake_rewards_eval, cumulative_rewards_eval, - fake_baselines_eval, fake_advantages_eval) - - # real_tuples = zip_metrics(indices, ) - - # Print forward sequences. - tuples_to_print = fake_tuples[:FLAGS.max_num_to_print] - print_formatted(p, id_to_word, log, tuples_to_print) - - print('Samples') - log.write('Samples\n') - samples = print_and_log(log, id_to_word, fake_sequence_eval, - FLAGS.max_num_to_print) - return samples - - -def generate_logs(sess, model, log, id_to_word, feed): - """Impute Sequences using the model for a particular feed and send it to - logs.""" - # Impute Sequences. - [ - p, sequence_eval, fake_predictions_eval, fake_cross_entropy_losses_eval, - fake_logits_eval - ] = sess.run( - [ - model.present, model.fake_sequence, model.fake_predictions, - model.fake_cross_entropy_losses, model.fake_logits - ], - feed_dict=feed) - - # Convert Discriminator linear layer to probability. - fake_prob_eval = expit(fake_predictions_eval) - - # Forward Masked Tuples. - fake_tuples = zip_seq_pred_crossent(id_to_word, sequence_eval, fake_prob_eval, - fake_cross_entropy_losses_eval) - - tuples_to_print = fake_tuples[:FLAGS.max_num_to_print] - - if FLAGS.print_verbose: - print('fake_logits_eval') - print(fake_logits_eval) - - for i, batch in enumerate(tuples_to_print): - print(' Sample %d.' % i) - log.write(' Sample %d.\n' % i) - for j, pred in enumerate(batch): - buffer_str = ('[{:<1}] {:<20} {:<7.3f} {:<7.3f}').format( - int(p[i][j]), pred[0], pred[1], pred[2]) - print(' ', buffer_str) - log.write(' ' + str(buffer_str) + '\n') - log.flush() - - print('Samples') - log.write('Samples\n') - samples = print_and_log(log, id_to_word, sequence_eval, - FLAGS.max_num_to_print) - return samples - - -def create_merged_ngram_dictionaries(indices, n): - """Generate a single dictionary for the full batch. - - Args: - indices: List of lists of indices. - n: Degree of n-grams. - - Returns: - Dictionary of hashed(n-gram tuples) to counts in the batch of indices. - """ - ngram_dicts = [] - - for ind in indices: - ngrams = n_gram.find_all_ngrams(ind, n=n) - ngram_counts = n_gram.construct_ngrams_dict(ngrams) - ngram_dicts.append(ngram_counts) - - merged_gen_dict = Counter() - for ngram_dict in ngram_dicts: - merged_gen_dict += Counter(ngram_dict) - return merged_gen_dict - - -def sequence_ngram_evaluation(sess, sequence, log, feed, data_ngram_count, n): - """Calculates the percent of ngrams produced in the sequence is present in - data_ngram_count. - - Args: - sess: tf.Session. - sequence: Sequence Tensor from the MaskGAN model. - log: gFile log. - feed: Feed to evaluate. - data_ngram_count: Dictionary of hashed(n-gram tuples) to counts in the - data_set. - - Returns: - avg_percent_captured: Percent of produced ngrams that appear in the - data_ngram_count. - """ - del log - # Impute sequence. - [sequence_eval] = sess.run([sequence], feed_dict=feed) - indices = sequence_eval - - # Retrieve the counts across the batch of indices. - gen_ngram_counts = create_merged_ngram_dictionaries( - indices, n=n) - return n_gram.percent_unique_ngrams_in_train(data_ngram_count, - gen_ngram_counts) diff --git a/research/maskgan/models/feedforward.py b/research/maskgan/models/feedforward.py deleted file mode 100644 index d48a517d6bea65477b8a940ed770f92203da6dfd..0000000000000000000000000000000000000000 --- a/research/maskgan/models/feedforward.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple FNN model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - - -def discriminator(hparams, sequence, is_training, reuse=None): - """Define the Discriminator graph.""" - del is_training - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - "If you wish to share Discriminator/Generator embeddings, they must be" - " same dimension.") - with tf.variable_scope("gen/rnn", reuse=True): - embedding = tf.get_variable("embedding", - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope("dis", reuse=reuse): - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable("embedding", - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - embeddings = tf.nn.embedding_lookup(embedding, sequence) - - # Input matrices. - W = tf.get_variable( - "W", - initializer=tf.truncated_normal( - shape=[3 * hparams.dis_embedding_dim, hparams.dis_hidden_dim], - stddev=0.1)) - b = tf.get_variable( - "b", initializer=tf.constant(0.1, shape=[hparams.dis_hidden_dim])) - - # Output matrices. - W_out = tf.get_variable( - "W_out", - initializer=tf.truncated_normal( - shape=[hparams.dis_hidden_dim, 1], stddev=0.1)) - b_out = tf.get_variable("b_out", initializer=tf.constant(0.1, shape=[1])) - - predictions = [] - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - inp = embeddings[:, t] - - if t > 0: - past_inp = tf.unstack(embeddings[:, 0:t], axis=1) - avg_past_inp = tf.add_n(past_inp) / len(past_inp) - else: - avg_past_inp = tf.zeros_like(inp) - - if t < FLAGS.sequence_length: - future_inp = tf.unstack(embeddings[:, t:], axis=1) - avg_future_inp = tf.add_n(future_inp) / len(future_inp) - else: - avg_future_inp = tf.zeros_like(inp) - - # Cumulative input. - concat_inp = tf.concat([avg_past_inp, inp, avg_future_inp], axis=1) - - # Hidden activations. - hidden = tf.nn.relu(tf.nn.xw_plus_b(concat_inp, W, b, name="scores")) - - # Add dropout - with tf.variable_scope("dropout"): - hidden = tf.nn.dropout(hidden, FLAGS.keep_prob) - - # Output. - output = tf.nn.xw_plus_b(hidden, W_out, b_out, name="output") - - predictions.append(output) - predictions = tf.stack(predictions, axis=1) - return tf.squeeze(predictions, axis=2) diff --git a/research/maskgan/models/rnn.py b/research/maskgan/models/rnn.py deleted file mode 100644 index 40b3a7aa3b85ddfd3002d845416b5004088620fc..0000000000000000000000000000000000000000 --- a/research/maskgan/models/rnn.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple RNN model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf - -# ZoneoutWrapper. -from regularization import zoneout - -FLAGS = tf.app.flags.FLAGS - - -def generator(hparams, - inputs, - targets, - targets_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph. - - G will now impute tokens that have been masked from the input seqeunce. - """ - tf.logging.warning( - 'Undirectional generative model is not a useful model for this MaskGAN ' - 'because future context is needed. Use only for debugging purposes.') - init_scale = 0.05 - initializer = tf.random_uniform_initializer(-init_scale, init_scale) - - with tf.variable_scope('gen', reuse=reuse, initializer=initializer): - - def lstm_cell(): - return tf.contrib.rnn.LayerNormBasicLSTMCell( - hparams.gen_rnn_size, reuse=reuse) - - attn_cell = lstm_cell - if FLAGS.zoneout_drop_prob > 0.0: - - def attn_cell(): - return zoneout.ZoneoutWrapper( - lstm_cell(), - zoneout_drop_prob=FLAGS.zoneout_drop_prob, - is_training=is_training) - - cell_gen = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - initial_state = cell_gen.zero_state(FLAGS.batch_size, tf.float32) - - with tf.variable_scope('rnn'): - sequence, logits, log_probs = [], [], [] - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - softmax_w = tf.get_variable('softmax_w', - [hparams.gen_rnn_size, FLAGS.vocab_size]) - softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - # Input to the model is the first token to provide context. The - # model will then predict token t > 0. - if t == 0: - # Always provide the real input at t = 0. - state_gen = initial_state - rnn_inp = rnn_inputs[:, t] - - # If the target at the last time-step was present, read in the real. - # If the target at the last time-step was not present, read in the fake. - else: - real_rnn_inp = rnn_inputs[:, t] - fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) - - # Use teacher forcing. - if (is_training and - FLAGS.gen_training_strategy == 'cross_entropy') or is_validating: - rnn_inp = real_rnn_inp - else: - # Note that targets_t-1 == inputs_(t) - rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, - fake_rnn_inp) - - # RNN. - rnn_out, state_gen = cell_gen(rnn_inp, state_gen) - logit = tf.matmul(rnn_out, softmax_w) + softmax_b - - # Real sample. - real = targets[:, t] - - # Fake sample. - categorical = tf.contrib.distributions.Categorical(logits=logit) - fake = categorical.sample() - log_prob = categorical.log_prob(fake) - - # Output for Generator will either be generated or the target. - # If present: Return real. - # If not present: Return fake. - output = tf.where(targets_present[:, t], real, fake) - - # Append to lists. - sequence.append(output) - logits.append(logit) - log_probs.append(log_prob) - - # Produce the RNN state had the model operated only - # over real data. - real_state_gen = initial_state - for t in xrange(FLAGS.sequence_length): - tf.get_variable_scope().reuse_variables() - - rnn_inp = rnn_inputs[:, t] - - # RNN. - rnn_out, real_state_gen = cell_gen(rnn_inp, real_state_gen) - - final_state = real_state_gen - - return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack( - log_probs, axis=1), initial_state, final_state) - - -def discriminator(hparams, sequence, is_training, reuse=None): - """Define the Discriminator graph. - - Args: - hparams: Hyperparameters for the MaskGAN. - FLAGS: Current flags. - sequence: [FLAGS.batch_size, FLAGS.sequence_length] - is_training: - reuse - - Returns: - predictions: - """ - tf.logging.warning( - 'Undirectional Discriminative model is not a useful model for this ' - 'MaskGAN because future context is needed. Use only for debugging ' - 'purposes.') - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('dis', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.LayerNormBasicLSTMCell( - hparams.dis_rnn_size, reuse=reuse) - - attn_cell = lstm_cell - if FLAGS.zoneout_drop_prob > 0.0: - - def attn_cell(): - return zoneout.ZoneoutWrapper( - lstm_cell(), - zoneout_drop_prob=FLAGS.zoneout_drop_prob, - is_training=is_training) - - cell_dis = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - state_dis = cell_dis.zero_state(FLAGS.batch_size, tf.float32) - - with tf.variable_scope('rnn') as vs: - predictions = [] - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_in = rnn_inputs[:, t] - rnn_out, state_dis = cell_dis(rnn_in, state_dis) - - # Prediction is linear output for Discriminator. - pred = tf.contrib.layers.linear(rnn_out, 1, scope=vs) - - predictions.append(pred) - predictions = tf.stack(predictions, axis=1) - return tf.squeeze(predictions, axis=2) diff --git a/research/maskgan/models/rnn_nas.py b/research/maskgan/models/rnn_nas.py deleted file mode 100644 index 618ace2f8196fb4718ae01bc406f114523fd44cc..0000000000000000000000000000000000000000 --- a/research/maskgan/models/rnn_nas.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple RNN model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -from six.moves import xrange -import tensorflow as tf - -# NAS Code.. -from nas_utils import configs -from nas_utils import custom_cell -from nas_utils import variational_dropout - -FLAGS = tf.app.flags.FLAGS - - -def get_config(): - return configs.AlienConfig2() - - -LSTMTuple = collections.namedtuple('LSTMTuple', ['c', 'h']) - - -def generator(hparams, - inputs, - targets, - targets_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph. - - G will now impute tokens that have been masked from the input seqeunce. - """ - tf.logging.info( - 'Undirectional generative model is not a useful model for this MaskGAN ' - 'because future context is needed. Use only for debugging purposes.') - config = get_config() - config.keep_prob = [hparams.gen_nas_keep_prob_0, hparams.gen_nas_keep_prob_1] - configs.print_config(config) - - init_scale = config.init_scale - initializer = tf.random_uniform_initializer(-init_scale, init_scale) - - with tf.variable_scope('gen', reuse=reuse, initializer=initializer): - # Neural architecture search cell. - cell = custom_cell.Alien(config.hidden_size) - - if is_training: - [h2h_masks, _, _, - output_mask] = variational_dropout.generate_variational_dropout_masks( - hparams, config.keep_prob) - else: - output_mask = None - - cell_gen = custom_cell.GenericMultiRNNCell([cell] * config.num_layers) - initial_state = cell_gen.zero_state(FLAGS.batch_size, tf.float32) - - with tf.variable_scope('rnn'): - sequence, logits, log_probs = [], [], [] - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - softmax_w = tf.matrix_transpose(embedding) - softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) - - if is_training and FLAGS.keep_prob < 1: - rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - # Input to the model is the first token to provide context. The - # model will then predict token t > 0. - if t == 0: - # Always provide the real input at t = 0. - state_gen = initial_state - rnn_inp = rnn_inputs[:, t] - - # If the input is present, read in the input at t. - # If the input is not present, read in the previously generated. - else: - real_rnn_inp = rnn_inputs[:, t] - fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) - - # While validating, the decoder should be operating in teacher - # forcing regime. Also, if we're just training with cross_entropy - # use teacher forcing. - if is_validating or (is_training and - FLAGS.gen_training_strategy == 'cross_entropy'): - rnn_inp = real_rnn_inp - else: - rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, - fake_rnn_inp) - - if is_training: - state_gen = list(state_gen) - for layer_num, per_layer_state in enumerate(state_gen): - per_layer_state = LSTMTuple( - per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num]) - state_gen[layer_num] = per_layer_state - - # RNN. - rnn_out, state_gen = cell_gen(rnn_inp, state_gen) - - if is_training: - rnn_out = output_mask * rnn_out - - logit = tf.matmul(rnn_out, softmax_w) + softmax_b - - # Real sample. - real = targets[:, t] - - categorical = tf.contrib.distributions.Categorical(logits=logit) - fake = categorical.sample() - log_prob = categorical.log_prob(fake) - - # Output for Generator will either be generated or the input. - # - # If present: Return real. - # If not present: Return fake. - output = tf.where(targets_present[:, t], real, fake) - - # Add to lists. - sequence.append(output) - log_probs.append(log_prob) - logits.append(logit) - - # Produce the RNN state had the model operated only - # over real data. - real_state_gen = initial_state - for t in xrange(FLAGS.sequence_length): - tf.get_variable_scope().reuse_variables() - - rnn_inp = rnn_inputs[:, t] - - # RNN. - rnn_out, real_state_gen = cell_gen(rnn_inp, real_state_gen) - - final_state = real_state_gen - - return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack( - log_probs, axis=1), initial_state, final_state) - - -def discriminator(hparams, sequence, is_training, reuse=None): - """Define the Discriminator graph.""" - tf.logging.info( - 'Undirectional Discriminative model is not a useful model for this ' - 'MaskGAN because future context is needed. Use only for debugging ' - 'purposes.') - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - config = get_config() - config.keep_prob = [hparams.dis_nas_keep_prob_0, hparams.dis_nas_keep_prob_1] - configs.print_config(config) - - with tf.variable_scope('dis', reuse=reuse): - # Neural architecture search cell. - cell = custom_cell.Alien(config.hidden_size) - - if is_training: - [h2h_masks, _, _, - output_mask] = variational_dropout.generate_variational_dropout_masks( - hparams, config.keep_prob) - else: - output_mask = None - - cell_dis = custom_cell.GenericMultiRNNCell([cell] * config.num_layers) - state_dis = cell_dis.zero_state(FLAGS.batch_size, tf.float32) - - with tf.variable_scope('rnn') as vs: - predictions = [] - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - if is_training and FLAGS.keep_prob < 1: - rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_in = rnn_inputs[:, t] - - if is_training: - state_dis = list(state_dis) - for layer_num, per_layer_state in enumerate(state_dis): - per_layer_state = LSTMTuple( - per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num]) - state_dis[layer_num] = per_layer_state - - # RNN. - rnn_out, state_dis = cell_dis(rnn_in, state_dis) - - if is_training: - rnn_out = output_mask * rnn_out - - # Prediction is linear output for Discriminator. - pred = tf.contrib.layers.linear(rnn_out, 1, scope=vs) - - predictions.append(pred) - predictions = tf.stack(predictions, axis=1) - return tf.squeeze(predictions, axis=2) diff --git a/research/maskgan/models/rnn_vd.py b/research/maskgan/models/rnn_vd.py deleted file mode 100644 index 428f1a54bda7d6e5f9dd55061149664b1b3e751d..0000000000000000000000000000000000000000 --- a/research/maskgan/models/rnn_vd.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple RNN model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf -from regularization import variational_dropout - -FLAGS = tf.app.flags.FLAGS - - -def discriminator(hparams, - sequence, - is_training, - reuse=None, - initial_state=None): - """Define the Discriminator graph.""" - tf.logging.info( - 'Undirectional Discriminative model is not a useful model for this ' - 'MaskGAN because future context is needed. Use only for debugging ' - 'purposes.') - sequence = tf.cast(sequence, tf.int32) - - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/decoder/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('dis', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - hparams.dis_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and hparams.dis_vd_keep_prob < 1: - - def attn_cell(): - return variational_dropout.VariationalDropoutWrapper( - lstm_cell(), FLAGS.batch_size, hparams.dis_rnn_size, - hparams.dis_vd_keep_prob, hparams.dis_vd_keep_prob) - - cell_dis = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - if initial_state: - state_dis = [[tf.identity(x) for x in inner_initial_state] - for inner_initial_state in initial_state] - else: - state_dis = cell_dis.zero_state(FLAGS.batch_size, tf.float32) - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) - return tf.floor(random_tensor) / keep_prob - - if is_training: - output_mask = make_mask(hparams.dis_vd_keep_prob, hparams.dis_rnn_size) - - with tf.variable_scope('rnn') as vs: - predictions, rnn_outs = [], [] - - if not FLAGS.dis_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_in = rnn_inputs[:, t] - rnn_out, state_dis = cell_dis(rnn_in, state_dis) - - if is_training: - rnn_out *= output_mask - - # Prediction is linear output for Discriminator. - pred = tf.contrib.layers.linear(rnn_out, 1, scope=vs) - predictions.append(pred) - rnn_outs.append(rnn_out) - - predictions = tf.stack(predictions, axis=1) - - if FLAGS.baseline_method == 'critic': - with tf.variable_scope('critic', reuse=reuse) as critic_scope: - rnn_outs = tf.stack(rnn_outs, axis=1) - values = tf.contrib.layers.linear(rnn_outs, 1, scope=critic_scope) - return tf.squeeze(predictions, axis=2), tf.squeeze(values, axis=2) - - else: - return tf.squeeze(predictions, axis=2), None diff --git a/research/maskgan/models/rnn_zaremba.py b/research/maskgan/models/rnn_zaremba.py deleted file mode 100644 index 9369c77fbb849551721b46321e6868a7aeaceea6..0000000000000000000000000000000000000000 --- a/research/maskgan/models/rnn_zaremba.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple RNN model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - - -def generator(hparams, - inputs, - targets, - targets_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph. - - G will now impute tokens that have been masked from the input seqeunce. - """ - tf.logging.warning( - 'Undirectional generative model is not a useful model for this MaskGAN ' - 'because future context is needed. Use only for debugging purposes.') - init_scale = 0.05 - initializer = tf.random_uniform_initializer(-init_scale, init_scale) - with tf.variable_scope('gen', reuse=reuse, initializer=initializer): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell(hparams.gen_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and FLAGS.keep_prob < 1: - - def attn_cell(): - return tf.contrib.rnn.DropoutWrapper( - lstm_cell(), output_keep_prob=FLAGS.keep_prob) - - cell_gen = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - initial_state = cell_gen.zero_state(FLAGS.batch_size, tf.float32) - - with tf.variable_scope('rnn'): - sequence, logits, log_probs = [], [], [] - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - softmax_w = tf.get_variable('softmax_w', - [hparams.gen_rnn_size, FLAGS.vocab_size]) - softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) - - if is_training and FLAGS.keep_prob < 1: - rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) - - fake = None - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - # Input to the model is the first token to provide context. The - # model will then predict token t > 0. - if t == 0: - # Always provide the real input at t = 0. - state_gen = initial_state - rnn_inp = rnn_inputs[:, t] - - # If the input is present, read in the input at t. - # If the input is not present, read in the previously generated. - else: - real_rnn_inp = rnn_inputs[:, t] - fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) - - # While validating, the decoder should be operating in teacher - # forcing regime. Also, if we're just training with cross_entropy - # use teacher forcing. - if is_validating or (is_training and - FLAGS.gen_training_strategy == 'cross_entropy'): - rnn_inp = real_rnn_inp - else: - rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, - fake_rnn_inp) - - # RNN. - rnn_out, state_gen = cell_gen(rnn_inp, state_gen) - logit = tf.matmul(rnn_out, softmax_w) + softmax_b - - # Real sample. - real = targets[:, t] - - categorical = tf.contrib.distributions.Categorical(logits=logit) - fake = categorical.sample() - log_prob = categorical.log_prob(fake) - - # Output for Generator will either be generated or the input. - # - # If present: Return real. - # If not present: Return fake. - output = tf.where(targets_present[:, t], real, fake) - - # Add to lists. - sequence.append(output) - log_probs.append(log_prob) - logits.append(logit) - - # Produce the RNN state had the model operated only - # over real data. - real_state_gen = initial_state - for t in xrange(FLAGS.sequence_length): - tf.get_variable_scope().reuse_variables() - - rnn_inp = rnn_inputs[:, t] - - # RNN. - rnn_out, real_state_gen = cell_gen(rnn_inp, real_state_gen) - - final_state = real_state_gen - - return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack( - log_probs, axis=1), initial_state, final_state) - - -def discriminator(hparams, sequence, is_training, reuse=None): - """Define the Discriminator graph.""" - tf.logging.warning( - 'Undirectional Discriminative model is not a useful model for this ' - 'MaskGAN because future context is needed. Use only for debugging ' - 'purposes.') - sequence = tf.cast(sequence, tf.int32) - - with tf.variable_scope('dis', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell(hparams.dis_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and FLAGS.keep_prob < 1: - - def attn_cell(): - return tf.contrib.rnn.DropoutWrapper( - lstm_cell(), output_keep_prob=FLAGS.keep_prob) - - cell_dis = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - state_dis = cell_dis.zero_state(FLAGS.batch_size, tf.float32) - - with tf.variable_scope('rnn') as vs: - predictions = [] - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - if is_training and FLAGS.keep_prob < 1: - rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_in = rnn_inputs[:, t] - rnn_out, state_dis = cell_dis(rnn_in, state_dis) - - # Prediction is linear output for Discriminator. - pred = tf.contrib.layers.linear(rnn_out, 1, scope=vs) - - predictions.append(pred) - predictions = tf.stack(predictions, axis=1) - return tf.squeeze(predictions, axis=2) diff --git a/research/maskgan/models/rollout.py b/research/maskgan/models/rollout.py deleted file mode 100644 index 6919af2e31fa362f702e96e135d4a2bc06e063a2..0000000000000000000000000000000000000000 --- a/research/maskgan/models/rollout.py +++ /dev/null @@ -1,384 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Rollout RNN model definitions which call rnn_zaremba code.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -from six.moves import xrange -import tensorflow as tf - -from losses import losses -from model_utils import helper -from model_utils import model_construction -from model_utils import model_losses -from model_utils import model_optimization - -FLAGS = tf.app.flags.FLAGS - - -def create_rollout_MaskGAN(hparams, is_training): - """Create the MaskGAN model. - - Args: - hparams: Hyperparameters for the MaskGAN. - is_training: Boolean indicating operational mode (train/inference). - evaluated with a teacher forcing regime. - - Return: - model: Namedtuple for specifying the MaskGAN.""" - global_step = tf.Variable(0, name='global_step', trainable=False) - - new_learning_rate = tf.placeholder(tf.float32, [], name='new_learning_rate') - learning_rate = tf.Variable(0.0, name='learning_rate', trainable=False) - learning_rate_update = tf.assign(learning_rate, new_learning_rate) - - new_rate = tf.placeholder(tf.float32, [], name='new_rate') - percent_real_var = tf.Variable(0.0, trainable=False) - percent_real_update = tf.assign(percent_real_var, new_rate) - - ## Placeholders. - inputs = tf.placeholder( - tf.int32, shape=[FLAGS.batch_size, FLAGS.sequence_length]) - present = tf.placeholder( - tf.bool, shape=[FLAGS.batch_size, FLAGS.sequence_length]) - inv_present = tf.placeholder( - tf.bool, shape=[FLAGS.batch_size, FLAGS.sequence_length]) - - ## Rollout Generator. - fwd_gen_rollouts = rollout_generator( - hparams, inputs, present, is_training=is_training, is_validating=False) - inv_gen_rollouts = rollout_generator( - hparams, - inputs, - inv_present, - is_training=is_training, - is_validating=False, - reuse=True) - - ## Rollout Discriminator. - fwd_dis_rollouts = rollout_discriminator( - hparams, fwd_gen_rollouts, is_training=is_training) - inv_dis_rollouts = rollout_discriminator( - hparams, inv_gen_rollouts, is_training=is_training, reuse=True) - - ## Discriminator Loss. - [dis_loss, dis_loss_pred, dis_loss_inv_pred] = rollout_discriminator_loss( - fwd_dis_rollouts, present, inv_dis_rollouts, inv_present) - - ## Average log-perplexity for only missing words. However, to do this, - # the logits are still computed using teacher forcing, that is, the ground - # truth tokens are fed in at each time point to be valid. - # TODO(liamfedus): Fix the naming convention. - with tf.variable_scope('gen_rollout'): - _, fwd_eval_logits, _ = model_construction.create_generator( - hparams, - inputs, - present, - is_training=False, - is_validating=True, - reuse=True) - - avg_log_perplexity = model_losses.calculate_log_perplexity( - fwd_eval_logits, inputs, present) - - ## Generator Loss. - # 1. Cross Entropy losses on missing tokens. - [fwd_cross_entropy_losses, - inv_cross_entropy_losses] = rollout_masked_cross_entropy_loss( - inputs, present, inv_present, fwd_gen_rollouts, inv_gen_rollouts) - - # 2. GAN losses on missing tokens. - [fwd_RL_loss, - fwd_RL_statistics, fwd_averages_op] = rollout_reinforce_objective( - hparams, fwd_gen_rollouts, fwd_dis_rollouts, present) - [inv_RL_loss, - inv_RL_statistics, inv_averages_op] = rollout_reinforce_objective( - hparams, inv_gen_rollouts, inv_dis_rollouts, inv_present) - - # TODO(liamfedus): Generalize this to use all logs. - [fwd_sequence, fwd_logits, fwd_log_probs] = fwd_gen_rollouts[-1] - [inv_sequence, inv_logits, inv_log_probs] = inv_gen_rollouts[-1] - - # TODO(liamfedus): Generalize this to use all logs. - fwd_predictions = fwd_dis_rollouts[-1] - inv_predictions = inv_dis_rollouts[-1] - - # TODO(liamfedus): Generalize this to use all logs. - [fwd_log_probs, fwd_rewards, fwd_advantages, - fwd_baselines] = fwd_RL_statistics[-1] - [inv_log_probs, inv_rewards, inv_advantages, - inv_baselines] = inv_RL_statistics[-1] - - ## Pre-training. - if FLAGS.gen_pretrain_steps: - # TODO(liamfedus): Rewrite this. - fwd_cross_entropy_loss = tf.reduce_mean(fwd_cross_entropy_losses) - gen_pretrain_op = model_optimization.create_gen_pretrain_op( - hparams, fwd_cross_entropy_loss, global_step) - else: - gen_pretrain_op = tf.no_op('gen_pretrain_no_op') - if FLAGS.dis_pretrain_steps: - dis_pretrain_op = model_optimization.create_dis_pretrain_op( - hparams, dis_loss, global_step) - else: - dis_pretrain_op = tf.no_op('dis_pretrain_no_op') - - ## Generator Train Op. - # 1. Cross-Entropy. - if FLAGS.gen_training_strategy == 'cross_entropy': - gen_loss = tf.reduce_mean( - fwd_cross_entropy_losses + inv_cross_entropy_losses) / 2. - [gen_train_op, gen_grads, - gen_vars] = model_optimization.create_gen_train_op( - hparams, learning_rate, gen_loss, global_step, mode='MINIMIZE') - - # 2. GAN (REINFORCE) - elif FLAGS.gen_training_strategy == 'reinforce': - gen_loss = (fwd_RL_loss + inv_RL_loss) / 2. - [gen_train_op, gen_grads, - gen_vars] = model_optimization.create_reinforce_gen_train_op( - hparams, learning_rate, gen_loss, fwd_averages_op, inv_averages_op, - global_step) - - else: - raise NotImplementedError - - ## Discriminator Train Op. - dis_train_op, dis_grads, dis_vars = model_optimization.create_dis_train_op( - hparams, dis_loss, global_step) - - ## Summaries. - with tf.name_scope('general'): - tf.summary.scalar('percent_real', percent_real_var) - tf.summary.scalar('learning_rate', learning_rate) - - with tf.name_scope('generator_losses'): - tf.summary.scalar('gen_loss', tf.reduce_mean(gen_loss)) - tf.summary.scalar('gen_loss_fwd_cross_entropy', - tf.reduce_mean(fwd_cross_entropy_losses)) - tf.summary.scalar('gen_loss_inv_cross_entropy', - tf.reduce_mean(inv_cross_entropy_losses)) - - with tf.name_scope('REINFORCE'): - with tf.name_scope('objective'): - tf.summary.scalar('fwd_RL_loss', tf.reduce_mean(fwd_RL_loss)) - tf.summary.scalar('inv_RL_loss', tf.reduce_mean(inv_RL_loss)) - - with tf.name_scope('rewards'): - helper.variable_summaries(fwd_rewards, 'fwd_rewards') - helper.variable_summaries(inv_rewards, 'inv_rewards') - - with tf.name_scope('advantages'): - helper.variable_summaries(fwd_advantages, 'fwd_advantages') - helper.variable_summaries(inv_advantages, 'inv_advantages') - - with tf.name_scope('baselines'): - helper.variable_summaries(fwd_baselines, 'fwd_baselines') - helper.variable_summaries(inv_baselines, 'inv_baselines') - - with tf.name_scope('log_probs'): - helper.variable_summaries(fwd_log_probs, 'fwd_log_probs') - helper.variable_summaries(inv_log_probs, 'inv_log_probs') - - with tf.name_scope('discriminator_losses'): - tf.summary.scalar('dis_loss', dis_loss) - tf.summary.scalar('dis_loss_fwd_sequence', dis_loss_pred) - tf.summary.scalar('dis_loss_inv_sequence', dis_loss_inv_pred) - - with tf.name_scope('logits'): - helper.variable_summaries(fwd_logits, 'fwd_logits') - helper.variable_summaries(inv_logits, 'inv_logits') - - for v, g in zip(gen_vars, gen_grads): - helper.variable_summaries(v, v.op.name) - helper.variable_summaries(g, 'grad/' + v.op.name) - - for v, g in zip(dis_vars, dis_grads): - helper.variable_summaries(v, v.op.name) - helper.variable_summaries(g, 'grad/' + v.op.name) - - merge_summaries_op = tf.summary.merge_all() - - # Model saver. - saver = tf.train.Saver(keep_checkpoint_every_n_hours=1, max_to_keep=5) - - # Named tuple that captures elements of the MaskGAN model. - Model = collections.namedtuple('Model', [ - 'inputs', 'present', 'inv_present', 'percent_real_update', 'new_rate', - 'fwd_sequence', 'fwd_logits', 'fwd_rewards', 'fwd_advantages', - 'fwd_log_probs', 'fwd_predictions', 'fwd_cross_entropy_losses', - 'inv_sequence', 'inv_logits', 'inv_rewards', 'inv_advantages', - 'inv_log_probs', 'inv_predictions', 'inv_cross_entropy_losses', - 'avg_log_perplexity', 'dis_loss', 'gen_loss', 'dis_train_op', - 'gen_train_op', 'gen_pretrain_op', 'dis_pretrain_op', - 'merge_summaries_op', 'global_step', 'new_learning_rate', - 'learning_rate_update', 'saver' - ]) - - model = Model( - inputs, present, inv_present, percent_real_update, new_rate, fwd_sequence, - fwd_logits, fwd_rewards, fwd_advantages, fwd_log_probs, fwd_predictions, - fwd_cross_entropy_losses, inv_sequence, inv_logits, inv_rewards, - inv_advantages, inv_log_probs, inv_predictions, inv_cross_entropy_losses, - avg_log_perplexity, dis_loss, gen_loss, dis_train_op, gen_train_op, - gen_pretrain_op, dis_pretrain_op, merge_summaries_op, global_step, - new_learning_rate, learning_rate_update, saver) - return model - - -def rollout_generator(hparams, - inputs, - input_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph which does rollouts. - - G will now impute tokens that have been masked from the input seqeunce. - """ - rollouts = [] - - with tf.variable_scope('gen_rollout'): - for n in xrange(FLAGS.num_rollouts): - if n > 0: - # TODO(liamfedus): Why is it necessary here to manually set reuse? - reuse = True - tf.get_variable_scope().reuse_variables() - - [sequence, logits, log_probs] = model_construction.create_generator( - hparams, - inputs, - input_present, - is_training, - is_validating, - reuse=reuse) - - rollouts.append([sequence, logits, log_probs]) - - # Length assertion. - assert len(rollouts) == FLAGS.num_rollouts - - return rollouts - - -def rollout_discriminator(hparams, gen_rollouts, is_training, reuse=None): - """Define the Discriminator graph which does rollouts. - - G will now impute tokens that have been masked from the input seqeunce. - """ - rollout_predictions = [] - - with tf.variable_scope('dis_rollout'): - for n, rollout in enumerate(gen_rollouts): - if n > 0: - # TODO(liamfedus): Why is it necessary here to manually set reuse? - reuse = True - tf.get_variable_scope().reuse_variables() - - [sequence, _, _] = rollout - - predictions = model_construction.create_discriminator( - hparams, sequence, is_training=is_training, reuse=reuse) - - # Predictions for each rollout. - rollout_predictions.append(predictions) - - # Length assertion. - assert len(rollout_predictions) == FLAGS.num_rollouts - - return rollout_predictions - - -def rollout_reinforce_objective(hparams, gen_rollouts, dis_rollouts, present): - cumulative_gen_objective = 0. - cumulative_averages_op = [] - cumulative_statistics = [] - - assert len(gen_rollouts) == len(dis_rollouts) - - for gen_rollout, dis_rollout in zip(gen_rollouts, dis_rollouts): - [_, _, log_probs] = gen_rollout - dis_predictions = dis_rollout - - [ - final_gen_objective, log_probs, rewards, advantages, baselines, - maintain_averages_op - ] = model_losses.calculate_reinforce_objective(hparams, log_probs, - dis_predictions, present) - - # Accumulate results. - cumulative_gen_objective += final_gen_objective - cumulative_averages_op.append(maintain_averages_op) - cumulative_statistics.append([log_probs, rewards, advantages, baselines]) - - # Group all the averaging operations. - cumulative_averages_op = tf.group(*cumulative_averages_op) - cumulative_gen_objective /= FLAGS.num_rollouts - [log_probs, rewards, advantages, baselines] = cumulative_statistics[-1] - - # Length assertion. - assert len(cumulative_statistics) == FLAGS.num_rollouts - - return [ - cumulative_gen_objective, cumulative_statistics, cumulative_averages_op - ] - - -def rollout_masked_cross_entropy_loss(inputs, present, inv_present, - fwd_rollouts, inv_rollouts): - cumulative_fwd_cross_entropy_losses = tf.zeros( - shape=[FLAGS.batch_size, FLAGS.sequence_length]) - cumulative_inv_cross_entropy_losses = tf.zeros( - shape=[FLAGS.batch_size, FLAGS.sequence_length]) - - for fwd_rollout, inv_rollout in zip(fwd_rollouts, inv_rollouts): - [_, fwd_logits, _] = fwd_rollout - [_, inv_logits, _] = inv_rollout - - [fwd_cross_entropy_losses, - inv_cross_entropy_losses] = model_losses.create_masked_cross_entropy_loss( - inputs, present, inv_present, fwd_logits, inv_logits) - - cumulative_fwd_cross_entropy_losses = tf.add( - cumulative_fwd_cross_entropy_losses, fwd_cross_entropy_losses) - cumulative_inv_cross_entropy_losses = tf.add( - cumulative_inv_cross_entropy_losses, inv_cross_entropy_losses) - - return [ - cumulative_fwd_cross_entropy_losses, cumulative_inv_cross_entropy_losses - ] - - -def rollout_discriminator_loss(fwd_rollouts, present, inv_rollouts, - inv_present): - - dis_loss = 0 - dis_loss_pred = 0 - dis_loss_inv_pred = 0 - - for fwd_predictions, inv_predictions in zip(fwd_rollouts, inv_rollouts): - dis_loss_pred += losses.discriminator_loss(fwd_predictions, present) - dis_loss_inv_pred += losses.discriminator_loss(inv_predictions, inv_present) - - dis_loss_pred /= FLAGS.num_rollouts - dis_loss_inv_pred /= FLAGS.num_rollouts - - dis_loss = (dis_loss_pred + dis_loss_inv_pred) / 2. - return [dis_loss, dis_loss_pred, dis_loss_inv_pred] diff --git a/research/maskgan/models/seq2seq.py b/research/maskgan/models/seq2seq.py deleted file mode 100644 index fac397c98381309f6c7c6d428fcec3c665bcff98..0000000000000000000000000000000000000000 --- a/research/maskgan/models/seq2seq.py +++ /dev/null @@ -1,277 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple seq2seq model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from six.moves import xrange -from models import attention_utils - -# ZoneoutWrapper. -from regularization import zoneout - -FLAGS = tf.app.flags.FLAGS - - -def transform_input_with_is_missing_token(inputs, targets_present): - """Transforms the inputs to have missing tokens when it's masked out. The - mask is for the targets, so therefore, to determine if an input at time t is - masked, we have to check if the target at time t - 1 is masked out. - - e.g. - inputs = [a, b, c, d] - targets = [b, c, d, e] - targets_present = [1, 0, 1, 0] - - then, - transformed_input = [a, b, , d] - - Args: - inputs: tf.int32 Tensor of shape [batch_size, sequence_length] with tokens - up to, but not including, vocab_size. - targets_present: tf.bool Tensor of shape [batch_size, sequence_length] with - True representing the presence of the word. - - Returns: - transformed_input: tf.int32 Tensor of shape [batch_size, sequence_length] - which takes on value of inputs when the input is present and takes on - value=vocab_size to indicate a missing token. - """ - # To fill in if the input is missing. - input_missing = tf.constant( - FLAGS.vocab_size, - dtype=tf.int32, - shape=[FLAGS.batch_size, FLAGS.sequence_length]) - - # The 0th input will always be present to MaskGAN. - zeroth_input_present = tf.constant(True, tf.bool, shape=[FLAGS.batch_size, 1]) - - # Input present mask. - inputs_present = tf.concat( - [zeroth_input_present, targets_present[:, :-1]], axis=1) - - transformed_input = tf.where(inputs_present, inputs, input_missing) - return transformed_input - - -def gen_encoder(hparams, inputs, targets_present, is_training, reuse=None): - """Define the Encoder graph.""" - # We will use the same variable from the decoder. - if FLAGS.seq2seq_share_embedding: - with tf.variable_scope('decoder/rnn'): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('encoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.LayerNormBasicLSTMCell( - hparams.gen_rnn_size, reuse=reuse) - - attn_cell = lstm_cell - if FLAGS.zoneout_drop_prob > 0.0: - - def attn_cell(): - return zoneout.ZoneoutWrapper( - lstm_cell(), - zoneout_drop_prob=FLAGS.zoneout_drop_prob, - is_training=is_training) - - cell = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - initial_state = cell.zero_state(FLAGS.batch_size, tf.float32) - - # Add a missing token for inputs not present. - real_inputs = inputs - masked_inputs = transform_input_with_is_missing_token( - inputs, targets_present) - - with tf.variable_scope('rnn'): - hidden_states = [] - - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size + 1, hparams.gen_rnn_size]) - - real_rnn_inputs = tf.nn.embedding_lookup(embedding, real_inputs) - masked_rnn_inputs = tf.nn.embedding_lookup(embedding, masked_inputs) - - state = initial_state - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_inp = masked_rnn_inputs[:, t] - rnn_out, state = cell(rnn_inp, state) - hidden_states.append(rnn_out) - final_masked_state = state - hidden_states = tf.stack(hidden_states, axis=1) - - # Produce the RNN state had the model operated only - # over real data. - real_state = initial_state - for t in xrange(FLAGS.sequence_length): - tf.get_variable_scope().reuse_variables() - - # RNN. - rnn_inp = real_rnn_inputs[:, t] - rnn_out, real_state = cell(rnn_inp, real_state) - final_state = real_state - - return (hidden_states, final_masked_state), initial_state, final_state - - -def gen_decoder(hparams, - inputs, - targets, - targets_present, - encoding_state, - is_training, - is_validating, - reuse=None): - """Define the Decoder graph. The Decoder will now impute tokens that - have been masked from the input seqeunce. - """ - gen_decoder_rnn_size = hparams.gen_rnn_size - - with tf.variable_scope('decoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.LayerNormBasicLSTMCell( - gen_decoder_rnn_size, reuse=reuse) - - attn_cell = lstm_cell - if FLAGS.zoneout_drop_prob > 0.0: - - def attn_cell(): - return zoneout.ZoneoutWrapper( - lstm_cell(), - zoneout_drop_prob=FLAGS.zoneout_drop_prob, - is_training=is_training) - - cell_gen = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - # Hidden encoder states. - hidden_vector_encodings = encoding_state[0] - - # Carry forward the final state tuple from the encoder. - # State tuples. - state_gen = encoding_state[1] - - if FLAGS.attention_option is not None: - (attention_keys, attention_values, _, - attention_construct_fn) = attention_utils.prepare_attention( - hidden_vector_encodings, - FLAGS.attention_option, - num_units=gen_decoder_rnn_size, - reuse=reuse) - - with tf.variable_scope('rnn'): - sequence, logits, log_probs = [], [], [] - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, gen_decoder_rnn_size]) - softmax_w = tf.get_variable('softmax_w', - [gen_decoder_rnn_size, FLAGS.vocab_size]) - softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - # Input to the Decoder. - if t == 0: - # Always provide the real input at t = 0. - rnn_inp = rnn_inputs[:, t] - - # If the input is present, read in the input at t. - # If the input is not present, read in the previously generated. - else: - real_rnn_inp = rnn_inputs[:, t] - fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) - - # While validating, the decoder should be operating in teacher - # forcing regime. Also, if we're just training with cross_entropy - # use teacher forcing. - if is_validating or (is_training and - FLAGS.gen_training_strategy == 'cross_entropy'): - rnn_inp = real_rnn_inp - else: - rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, - fake_rnn_inp) - - # RNN. - rnn_out, state_gen = cell_gen(rnn_inp, state_gen) - - if FLAGS.attention_option is not None: - rnn_out = attention_construct_fn(rnn_out, attention_keys, - attention_values) - # # TODO(liamfedus): Assert not "monotonic" attention_type. - # # TODO(liamfedus): FLAGS.attention_type. - # context_state = revised_attention_utils._empty_state() - # rnn_out, context_state = attention_construct_fn( - # rnn_out, attention_keys, attention_values, context_state, t) - logit = tf.matmul(rnn_out, softmax_w) + softmax_b - - # Output for Decoder. - # If input is present: Return real at t+1. - # If input is not present: Return fake for t+1. - real = targets[:, t] - - categorical = tf.contrib.distributions.Categorical(logits=logit) - fake = categorical.sample() - log_prob = categorical.log_prob(fake) - - output = tf.where(targets_present[:, t], real, fake) - - # Add to lists. - sequence.append(output) - log_probs.append(log_prob) - logits.append(logit) - - return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack( - log_probs, axis=1)) - - -def generator(hparams, - inputs, - targets, - targets_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph.""" - with tf.variable_scope('gen', reuse=reuse): - encoder_states, initial_state, final_state = gen_encoder( - hparams, inputs, targets_present, is_training=is_training, reuse=reuse) - stacked_sequence, stacked_logits, stacked_log_probs = gen_decoder( - hparams, - inputs, - targets, - targets_present, - encoder_states, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - return (stacked_sequence, stacked_logits, stacked_log_probs, initial_state, - final_state) diff --git a/research/maskgan/models/seq2seq_nas.py b/research/maskgan/models/seq2seq_nas.py deleted file mode 100644 index cede90f5625c6e46740ad7601681712e73f07450..0000000000000000000000000000000000000000 --- a/research/maskgan/models/seq2seq_nas.py +++ /dev/null @@ -1,333 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple seq2seq model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -from six.moves import xrange -import tensorflow as tf - -from models import attention_utils - -# NAS Code.. -from nas_utils import configs -from nas_utils import custom_cell -from nas_utils import variational_dropout - -FLAGS = tf.app.flags.FLAGS - - -def get_config(): - return configs.AlienConfig2() - - -LSTMTuple = collections.namedtuple('LSTMTuple', ['c', 'h']) - - -def transform_input_with_is_missing_token(inputs, targets_present): - """Transforms the inputs to have missing tokens when it's masked out. The - mask is for the targets, so therefore, to determine if an input at time t is - masked, we have to check if the target at time t - 1 is masked out. - - e.g. - inputs = [a, b, c, d] - targets = [b, c, d, e] - targets_present = [1, 0, 1, 0] - - then, - transformed_input = [a, b, , d] - - Args: - inputs: tf.int32 Tensor of shape [batch_size, sequence_length] with tokens - up to, but not including, vocab_size. - targets_present: tf.bool Tensor of shape [batch_size, sequence_length] with - True representing the presence of the word. - - Returns: - transformed_input: tf.int32 Tensor of shape [batch_size, sequence_length] - which takes on value of inputs when the input is present and takes on - value=vocab_size to indicate a missing token. - """ - # To fill in if the input is missing. - input_missing = tf.constant( - FLAGS.vocab_size, - dtype=tf.int32, - shape=[FLAGS.batch_size, FLAGS.sequence_length]) - - # The 0th input will always be present to MaskGAN. - zeroth_input_present = tf.constant(True, tf.bool, shape=[FLAGS.batch_size, 1]) - - # Input present mask. - inputs_present = tf.concat( - [zeroth_input_present, targets_present[:, :-1]], axis=1) - - transformed_input = tf.where(inputs_present, inputs, input_missing) - return transformed_input - - -def gen_encoder(hparams, inputs, targets_present, is_training, reuse=None): - """Define the Encoder graph. - - - Args: - hparams: Hyperparameters for the MaskGAN. - inputs: tf.int32 Tensor of shape [batch_size, sequence_length] with tokens - up to, but not including, vocab_size. - targets_present: tf.bool Tensor of shape [batch_size, sequence_length] with - True representing the presence of the target. - is_training: Boolean indicating operational mode (train/inference). - reuse (Optional): Whether to reuse the variables. - - Returns: - Tuple of (hidden_states, final_state). - """ - config = get_config() - configs.print_config(config) - # We will use the same variable from the decoder. - if FLAGS.seq2seq_share_embedding: - with tf.variable_scope('decoder/rnn'): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('encoder', reuse=reuse): - # Neural architecture search cell. - cell = custom_cell.Alien(config.hidden_size) - - if is_training: - [h2h_masks, h2i_masks, _, - output_mask] = variational_dropout.generate_variational_dropout_masks( - hparams, config.keep_prob) - else: - h2i_masks, output_mask = None, None - - cell = custom_cell.GenericMultiRNNCell([cell] * config.num_layers) - - initial_state = cell.zero_state(FLAGS.batch_size, tf.float32) - - # Add a missing token for inputs not present. - real_inputs = inputs - masked_inputs = transform_input_with_is_missing_token( - inputs, targets_present) - - with tf.variable_scope('rnn'): - hidden_states = [] - - # Split the embedding into two parts so that we can load the PTB - # weights into one part of the Variable. - if not FLAGS.seq2seq_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - missing_embedding = tf.get_variable('missing_embedding', - [1, hparams.gen_rnn_size]) - embedding = tf.concat([embedding, missing_embedding], axis=0) - - real_rnn_inputs = tf.nn.embedding_lookup(embedding, real_inputs) - masked_rnn_inputs = tf.nn.embedding_lookup(embedding, masked_inputs) - - if is_training and FLAGS.keep_prob < 1: - masked_rnn_inputs = tf.nn.dropout(masked_rnn_inputs, FLAGS.keep_prob) - - state = initial_state - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_inp = masked_rnn_inputs[:, t] - - if is_training: - state = list(state) - for layer_num, per_layer_state in enumerate(state): - per_layer_state = LSTMTuple( - per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num]) - state[layer_num] = per_layer_state - - rnn_out, state = cell(rnn_inp, state, h2i_masks) - - if is_training: - rnn_out = output_mask * rnn_out - - hidden_states.append(rnn_out) - final_masked_state = state - hidden_states = tf.stack(hidden_states, axis=1) - - # Produce the RNN state had the model operated only - # over real data. - real_state = initial_state - for t in xrange(FLAGS.sequence_length): - tf.get_variable_scope().reuse_variables() - - # RNN. - rnn_inp = real_rnn_inputs[:, t] - rnn_out, real_state = cell(rnn_inp, real_state) - final_state = real_state - - return (hidden_states, final_masked_state), initial_state, final_state - - -def gen_decoder(hparams, - inputs, - targets, - targets_present, - encoding_state, - is_training, - is_validating, - reuse=None): - """Define the Decoder graph. The Decoder will now impute tokens that - have been masked from the input seqeunce. - """ - config = get_config() - gen_decoder_rnn_size = hparams.gen_rnn_size - - if FLAGS.seq2seq_share_embedding: - with tf.variable_scope('decoder/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, gen_decoder_rnn_size]) - - with tf.variable_scope('decoder', reuse=reuse): - # Neural architecture search cell. - cell = custom_cell.Alien(config.hidden_size) - - if is_training: - [h2h_masks, _, _, - output_mask] = variational_dropout.generate_variational_dropout_masks( - hparams, config.keep_prob) - else: - output_mask = None - - cell_gen = custom_cell.GenericMultiRNNCell([cell] * config.num_layers) - - # Hidden encoder states. - hidden_vector_encodings = encoding_state[0] - - # Carry forward the final state tuple from the encoder. - # State tuples. - state_gen = encoding_state[1] - - if FLAGS.attention_option is not None: - (attention_keys, attention_values, _, - attention_construct_fn) = attention_utils.prepare_attention( - hidden_vector_encodings, - FLAGS.attention_option, - num_units=gen_decoder_rnn_size, - reuse=reuse) - - with tf.variable_scope('rnn'): - sequence, logits, log_probs = [], [], [] - - if not FLAGS.seq2seq_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, gen_decoder_rnn_size]) - softmax_w = tf.matrix_transpose(embedding) - softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) - - if is_training and FLAGS.keep_prob < 1: - rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - # Input to the Decoder. - if t == 0: - # Always provide the real input at t = 0. - rnn_inp = rnn_inputs[:, t] - - # If the input is present, read in the input at t. - # If the input is not present, read in the previously generated. - else: - real_rnn_inp = rnn_inputs[:, t] - fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) - - # While validating, the decoder should be operating in teacher - # forcing regime. Also, if we're just training with cross_entropy - # use teacher forcing. - if is_validating or (is_training and - FLAGS.gen_training_strategy == 'cross_entropy'): - rnn_inp = real_rnn_inp - else: - rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, - fake_rnn_inp) - - if is_training: - state_gen = list(state_gen) - for layer_num, per_layer_state in enumerate(state_gen): - per_layer_state = LSTMTuple( - per_layer_state[0], per_layer_state[1] * h2h_masks[layer_num]) - state_gen[layer_num] = per_layer_state - - # RNN. - rnn_out, state_gen = cell_gen(rnn_inp, state_gen) - - if is_training: - rnn_out = output_mask * rnn_out - - if FLAGS.attention_option is not None: - rnn_out = attention_construct_fn(rnn_out, attention_keys, - attention_values) - # # TODO(liamfedus): Assert not "monotonic" attention_type. - # # TODO(liamfedus): FLAGS.attention_type. - # context_state = revised_attention_utils._empty_state() - # rnn_out, context_state = attention_construct_fn( - # rnn_out, attention_keys, attention_values, context_state, t) - logit = tf.matmul(rnn_out, softmax_w) + softmax_b - - # Output for Decoder. - # If input is present: Return real at t+1. - # If input is not present: Return fake for t+1. - real = targets[:, t] - - categorical = tf.contrib.distributions.Categorical(logits=logit) - fake = categorical.sample() - log_prob = categorical.log_prob(fake) - - output = tf.where(targets_present[:, t], real, fake) - - # Add to lists. - sequence.append(output) - log_probs.append(log_prob) - logits.append(logit) - - return (tf.stack(sequence, axis=1), tf.stack(logits, axis=1), tf.stack( - log_probs, axis=1)) - - -def generator(hparams, - inputs, - targets, - targets_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph.""" - with tf.variable_scope('gen', reuse=reuse): - encoder_states, initial_state, final_state = gen_encoder( - hparams, inputs, targets_present, is_training=is_training, reuse=reuse) - stacked_sequence, stacked_logits, stacked_log_probs = gen_decoder( - hparams, - inputs, - targets, - targets_present, - encoder_states, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - return (stacked_sequence, stacked_logits, stacked_log_probs, initial_state, - final_state) diff --git a/research/maskgan/models/seq2seq_vd.py b/research/maskgan/models/seq2seq_vd.py deleted file mode 100644 index 850eda435c48c73d574a06b1b65a12f71a18f276..0000000000000000000000000000000000000000 --- a/research/maskgan/models/seq2seq_vd.py +++ /dev/null @@ -1,609 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple seq2seq model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf - -from models import attention_utils -from regularization import variational_dropout - -FLAGS = tf.app.flags.FLAGS - - -def transform_input_with_is_missing_token(inputs, targets_present): - """Transforms the inputs to have missing tokens when it's masked out. The - mask is for the targets, so therefore, to determine if an input at time t is - masked, we have to check if the target at time t - 1 is masked out. - - e.g. - inputs = [a, b, c, d] - targets = [b, c, d, e] - targets_present = [1, 0, 1, 0] - - which computes, - inputs_present = [1, 1, 0, 1] - - and outputs, - transformed_input = [a, b, , d] - - Args: - inputs: tf.int32 Tensor of shape [batch_size, sequence_length] with tokens - up to, but not including, vocab_size. - targets_present: tf.bool Tensor of shape [batch_size, sequence_length] with - True representing the presence of the word. - - Returns: - transformed_input: tf.int32 Tensor of shape [batch_size, sequence_length] - which takes on value of inputs when the input is present and takes on - value=vocab_size to indicate a missing token. - """ - # To fill in if the input is missing. - input_missing = tf.constant( - FLAGS.vocab_size, - dtype=tf.int32, - shape=[FLAGS.batch_size, FLAGS.sequence_length]) - - # The 0th input will always be present to MaskGAN. - zeroth_input_present = tf.constant(True, tf.bool, shape=[FLAGS.batch_size, 1]) - - # Input present mask. - inputs_present = tf.concat( - [zeroth_input_present, targets_present[:, :-1]], axis=1) - - transformed_input = tf.where(inputs_present, inputs, input_missing) - return transformed_input - - -# TODO(adai): IMDB labels placeholder to encoder. -def gen_encoder(hparams, inputs, targets_present, is_training, reuse=None): - """Define the Encoder graph. - - Args: - hparams: Hyperparameters for the MaskGAN. - inputs: tf.int32 Tensor of shape [batch_size, sequence_length] with tokens - up to, but not including, vocab_size. - targets_present: tf.bool Tensor of shape [batch_size, sequence_length] with - True representing the presence of the target. - is_training: Boolean indicating operational mode (train/inference). - reuse (Optional): Whether to reuse the variables. - - Returns: - Tuple of (hidden_states, final_state). - """ - # We will use the same variable from the decoder. - if FLAGS.seq2seq_share_embedding: - with tf.variable_scope('decoder/rnn'): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('encoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - hparams.gen_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and hparams.gen_vd_keep_prob < 1: - - def attn_cell(): - return variational_dropout.VariationalDropoutWrapper( - lstm_cell(), FLAGS.batch_size, hparams.gen_rnn_size, - hparams.gen_vd_keep_prob, hparams.gen_vd_keep_prob) - - cell = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - initial_state = cell.zero_state(FLAGS.batch_size, tf.float32) - - # Add a missing token for inputs not present. - real_inputs = inputs - masked_inputs = transform_input_with_is_missing_token( - inputs, targets_present) - - with tf.variable_scope('rnn') as scope: - hidden_states = [] - - # Split the embedding into two parts so that we can load the PTB - # weights into one part of the Variable. - if not FLAGS.seq2seq_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - missing_embedding = tf.get_variable('missing_embedding', - [1, hparams.gen_rnn_size]) - embedding = tf.concat([embedding, missing_embedding], axis=0) - - # TODO(adai): Perhaps append IMDB labels placeholder to input at - # each time point. - real_rnn_inputs = tf.nn.embedding_lookup(embedding, real_inputs) - masked_rnn_inputs = tf.nn.embedding_lookup(embedding, masked_inputs) - - state = initial_state - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform( - tf.stack([FLAGS.batch_size, 1, units])) - return tf.floor(random_tensor) / keep_prob - - if is_training: - output_mask = make_mask(hparams.gen_vd_keep_prob, hparams.gen_rnn_size) - - hidden_states, state = tf.nn.dynamic_rnn( - cell, masked_rnn_inputs, initial_state=state, scope=scope) - if is_training: - hidden_states *= output_mask - - final_masked_state = state - - # Produce the RNN state had the model operated only - # over real data. - real_state = initial_state - _, real_state = tf.nn.dynamic_rnn( - cell, real_rnn_inputs, initial_state=real_state, scope=scope) - final_state = real_state - - return (hidden_states, final_masked_state), initial_state, final_state - - -# TODO(adai): IMDB labels placeholder to encoder. -def gen_encoder_cnn(hparams, inputs, targets_present, is_training, reuse=None): - """Define the CNN Encoder graph.""" - del reuse - sequence = transform_input_with_is_missing_token(inputs, targets_present) - - # TODO(liamfedus): Make this a hyperparameter. - dis_filter_sizes = [3, 4, 5, 6, 7, 8, 9, 10, 15, 20] - - # Keeping track of l2 regularization loss (optional) - # l2_loss = tf.constant(0.0) - - with tf.variable_scope('encoder', reuse=True): - with tf.variable_scope('rnn'): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - cnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - # Create a convolution layer for each filter size - conv_outputs = [] - for filter_size in dis_filter_sizes: - with tf.variable_scope('conv-%s' % filter_size): - # Convolution Layer - filter_shape = [ - filter_size, hparams.gen_rnn_size, hparams.dis_num_filters - ] - W = tf.get_variable( - name='W', initializer=tf.truncated_normal(filter_shape, stddev=0.1)) - b = tf.get_variable( - name='b', - initializer=tf.constant(0.1, shape=[hparams.dis_num_filters])) - conv = tf.nn.conv1d(cnn_inputs, W, stride=1, padding='SAME', name='conv') - - # Apply nonlinearity - h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') - - conv_outputs.append(h) - - # Combine all the pooled features - dis_num_filters_total = hparams.dis_num_filters * len(dis_filter_sizes) - - h_conv = tf.concat(conv_outputs, axis=2) - h_conv_flat = tf.reshape(h_conv, [-1, dis_num_filters_total]) - - # Add dropout - if is_training: - with tf.variable_scope('dropout'): - h_conv_flat = tf.nn.dropout(h_conv_flat, hparams.gen_vd_keep_prob) - - # Final (unnormalized) scores and predictions - with tf.variable_scope('output'): - W = tf.get_variable( - 'W', - shape=[dis_num_filters_total, hparams.gen_rnn_size], - initializer=tf.contrib.layers.xavier_initializer()) - b = tf.get_variable( - name='b', initializer=tf.constant(0.1, shape=[hparams.gen_rnn_size])) - # l2_loss += tf.nn.l2_loss(W) - # l2_loss += tf.nn.l2_loss(b) - predictions = tf.nn.xw_plus_b(h_conv_flat, W, b, name='predictions') - predictions = tf.reshape( - predictions, - shape=[FLAGS.batch_size, FLAGS.sequence_length, hparams.gen_rnn_size]) - final_state = tf.reduce_mean(predictions, 1) - return predictions, (final_state, final_state) - - -# TODO(adai): IMDB labels placeholder to decoder. -def gen_decoder(hparams, - inputs, - targets, - targets_present, - encoding_state, - is_training, - is_validating, - reuse=None): - """Define the Decoder graph. The Decoder will now impute tokens that - have been masked from the input seqeunce. - """ - gen_decoder_rnn_size = hparams.gen_rnn_size - - targets = tf.Print(targets, [targets], message='targets', summarize=50) - if FLAGS.seq2seq_share_embedding: - with tf.variable_scope('decoder/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - - with tf.variable_scope('decoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - gen_decoder_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and hparams.gen_vd_keep_prob < 1: - - def attn_cell(): - return variational_dropout.VariationalDropoutWrapper( - lstm_cell(), FLAGS.batch_size, hparams.gen_rnn_size, - hparams.gen_vd_keep_prob, hparams.gen_vd_keep_prob) - - cell_gen = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - # Hidden encoder states. - hidden_vector_encodings = encoding_state[0] - - # Carry forward the final state tuple from the encoder. - # State tuples. - state_gen = encoding_state[1] - - if FLAGS.attention_option is not None: - (attention_keys, attention_values, _, - attention_construct_fn) = attention_utils.prepare_attention( - hidden_vector_encodings, - FLAGS.attention_option, - num_units=gen_decoder_rnn_size, - reuse=reuse) - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) - return tf.floor(random_tensor) / keep_prob - - if is_training: - output_mask = make_mask(hparams.gen_vd_keep_prob, hparams.gen_rnn_size) - - with tf.variable_scope('rnn'): - sequence, logits, log_probs = [], [], [] - - if not FLAGS.seq2seq_share_embedding: - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - softmax_w = tf.matrix_transpose(embedding) - softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) - # TODO(adai): Perhaps append IMDB labels placeholder to input at - # each time point. - - rnn_outs = [] - - fake = None - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - # Input to the Decoder. - if t == 0: - # Always provide the real input at t = 0. - rnn_inp = rnn_inputs[:, t] - - # If the input is present, read in the input at t. - # If the input is not present, read in the previously generated. - else: - real_rnn_inp = rnn_inputs[:, t] - - # While validating, the decoder should be operating in teacher - # forcing regime. Also, if we're just training with cross_entropy - # use teacher forcing. - if is_validating or FLAGS.gen_training_strategy == 'cross_entropy': - rnn_inp = real_rnn_inp - else: - fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) - rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, - fake_rnn_inp) - - # RNN. - rnn_out, state_gen = cell_gen(rnn_inp, state_gen) - - if FLAGS.attention_option is not None: - rnn_out = attention_construct_fn(rnn_out, attention_keys, - attention_values) - if is_training: - rnn_out *= output_mask - - rnn_outs.append(rnn_out) - if FLAGS.gen_training_strategy != 'cross_entropy': - logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b) - - # Output for Decoder. - # If input is present: Return real at t+1. - # If input is not present: Return fake for t+1. - real = targets[:, t] - - categorical = tf.contrib.distributions.Categorical(logits=logit) - if FLAGS.use_gen_mode: - fake = categorical.mode() - else: - fake = categorical.sample() - log_prob = categorical.log_prob(fake) - output = tf.where(targets_present[:, t], real, fake) - - else: - real = targets[:, t] - logit = tf.zeros(tf.stack([FLAGS.batch_size, FLAGS.vocab_size])) - log_prob = tf.zeros(tf.stack([FLAGS.batch_size])) - output = real - - # Add to lists. - sequence.append(output) - log_probs.append(log_prob) - logits.append(logit) - - if FLAGS.gen_training_strategy == 'cross_entropy': - logits = tf.nn.bias_add( - tf.matmul( - tf.reshape(tf.stack(rnn_outs, 1), [-1, gen_decoder_rnn_size]), - softmax_w), softmax_b) - logits = tf.reshape(logits, - [-1, FLAGS.sequence_length, FLAGS.vocab_size]) - else: - logits = tf.stack(logits, axis=1) - - return (tf.stack(sequence, axis=1), logits, tf.stack(log_probs, axis=1)) - - -def dis_encoder(hparams, masked_inputs, is_training, reuse=None, - embedding=None): - """Define the Discriminator encoder. Reads in the masked inputs for context - and produces the hidden states of the encoder.""" - with tf.variable_scope('encoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - hparams.dis_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and hparams.dis_vd_keep_prob < 1: - - def attn_cell(): - return variational_dropout.VariationalDropoutWrapper( - lstm_cell(), FLAGS.batch_size, hparams.dis_rnn_size, - hparams.dis_vd_keep_prob, hparams.dis_vd_keep_prob) - - cell_dis = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - state_dis = cell_dis.zero_state(FLAGS.batch_size, tf.float32) - - with tf.variable_scope('rnn'): - hidden_states = [] - - missing_embedding = tf.get_variable('missing_embedding', - [1, hparams.dis_rnn_size]) - embedding = tf.concat([embedding, missing_embedding], axis=0) - masked_rnn_inputs = tf.nn.embedding_lookup(embedding, masked_inputs) - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) - return tf.floor(random_tensor) / keep_prob - - if is_training: - output_mask = make_mask(hparams.dis_vd_keep_prob, hparams.dis_rnn_size) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_in = masked_rnn_inputs[:, t] - rnn_out, state_dis = cell_dis(rnn_in, state_dis) - if is_training: - rnn_out *= output_mask - hidden_states.append(rnn_out) - final_state = state_dis - - return (tf.stack(hidden_states, axis=1), final_state) - - -def dis_decoder(hparams, - sequence, - encoding_state, - is_training, - reuse=None, - embedding=None): - """Define the Discriminator decoder. Read in the sequence and predict - at each time point.""" - sequence = tf.cast(sequence, tf.int32) - - with tf.variable_scope('decoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell( - hparams.dis_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and hparams.dis_vd_keep_prob < 1: - - def attn_cell(): - return variational_dropout.VariationalDropoutWrapper( - lstm_cell(), FLAGS.batch_size, hparams.dis_rnn_size, - hparams.dis_vd_keep_prob, hparams.dis_vd_keep_prob) - - cell_dis = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.dis_num_layers)], - state_is_tuple=True) - - # Hidden encoder states. - hidden_vector_encodings = encoding_state[0] - - # Carry forward the final state tuple from the encoder. - # State tuples. - state = encoding_state[1] - - if FLAGS.attention_option is not None: - (attention_keys, attention_values, _, - attention_construct_fn) = attention_utils.prepare_attention( - hidden_vector_encodings, - FLAGS.attention_option, - num_units=hparams.dis_rnn_size, - reuse=reuse) - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform(tf.stack([FLAGS.batch_size, units])) - return tf.floor(random_tensor) / keep_prob - - if is_training: - output_mask = make_mask(hparams.dis_vd_keep_prob, hparams.dis_rnn_size) - - with tf.variable_scope('rnn') as vs: - predictions = [] - - rnn_inputs = tf.nn.embedding_lookup(embedding, sequence) - - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_in = rnn_inputs[:, t] - rnn_out, state = cell_dis(rnn_in, state) - - if FLAGS.attention_option is not None: - rnn_out = attention_construct_fn(rnn_out, attention_keys, - attention_values) - if is_training: - rnn_out *= output_mask - - # Prediction is linear output for Discriminator. - pred = tf.contrib.layers.linear(rnn_out, 1, scope=vs) - predictions.append(pred) - - predictions = tf.stack(predictions, axis=1) - return tf.squeeze(predictions, axis=2) - - -def discriminator(hparams, - inputs, - targets_present, - sequence, - is_training, - reuse=None): - """Define the Discriminator graph.""" - if FLAGS.dis_share_embedding: - assert hparams.dis_rnn_size == hparams.gen_rnn_size, ( - 'If you wish to share Discriminator/Generator embeddings, they must be' - ' same dimension.') - with tf.variable_scope('gen/decoder/rnn', reuse=True): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - else: - # Explicitly share the embedding. - with tf.variable_scope('dis/decoder/rnn', reuse=reuse): - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.dis_rnn_size]) - - # Mask the input sequence. - masked_inputs = transform_input_with_is_missing_token(inputs, targets_present) - - # Confirm masking. - masked_inputs = tf.Print( - masked_inputs, [inputs, targets_present, masked_inputs, sequence], - message='inputs, targets_present, masked_inputs, sequence', - summarize=10) - - with tf.variable_scope('dis', reuse=reuse): - encoder_states = dis_encoder( - hparams, - masked_inputs, - is_training=is_training, - reuse=reuse, - embedding=embedding) - predictions = dis_decoder( - hparams, - sequence, - encoder_states, - is_training=is_training, - reuse=reuse, - embedding=embedding) - - # if FLAGS.baseline_method == 'critic': - # with tf.variable_scope('critic', reuse=reuse) as critic_scope: - # values = tf.contrib.layers.linear(rnn_outs, 1, scope=critic_scope) - # values = tf.squeeze(values, axis=2) - # else: - # values = None - - return predictions - - -# TODO(adai): IMDB labels placeholder to encoder/decoder. -def generator(hparams, - inputs, - targets, - targets_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph.""" - with tf.variable_scope('gen', reuse=reuse): - encoder_states, initial_state, final_state = gen_encoder( - hparams, inputs, targets_present, is_training=is_training, reuse=reuse) - stacked_sequence, stacked_logits, stacked_log_probs = gen_decoder( - hparams, - inputs, - targets, - targets_present, - encoder_states, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - return (stacked_sequence, stacked_logits, stacked_log_probs, initial_state, - final_state, encoder_states) diff --git a/research/maskgan/models/seq2seq_zaremba.py b/research/maskgan/models/seq2seq_zaremba.py deleted file mode 100644 index 25f6ce44f0cb2fe650e23b332ace014ab7cdf469..0000000000000000000000000000000000000000 --- a/research/maskgan/models/seq2seq_zaremba.py +++ /dev/null @@ -1,305 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple seq2seq model definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from six.moves import xrange -from models import attention_utils - -FLAGS = tf.app.flags.FLAGS - - -def transform_input_with_is_missing_token(inputs, targets_present): - """Transforms the inputs to have missing tokens when it's masked out. The - mask is for the targets, so therefore, to determine if an input at time t is - masked, we have to check if the target at time t - 1 is masked out. - - e.g. - inputs = [a, b, c, d] - targets = [b, c, d, e] - targets_present = [1, 0, 1, 0] - - then, - transformed_input = [a, b, , d] - - Args: - inputs: tf.int32 Tensor of shape [batch_size, sequence_length] with tokens - up to, but not including, vocab_size. - targets_present: tf.bool Tensor of shape [batch_size, sequence_length] with - True representing the presence of the word. - - Returns: - transformed_input: tf.int32 Tensor of shape [batch_size, sequence_length] - which takes on value of inputs when the input is present and takes on - value=vocab_size to indicate a missing token. - """ - # To fill in if the input is missing. - input_missing = tf.constant(FLAGS.vocab_size, - dtype=tf.int32, - shape=[FLAGS.batch_size, FLAGS.sequence_length]) - - # The 0th input will always be present to MaskGAN. - zeroth_input_present = tf.constant(True, tf.bool, shape=[FLAGS.batch_size, 1]) - - # Input present mask. - inputs_present = tf.concat( - [zeroth_input_present, targets_present[:, :-1]], axis=1) - - transformed_input = tf.where(inputs_present, inputs, input_missing) - return transformed_input - - -def gen_encoder(hparams, inputs, targets_present, is_training, reuse=None): - """Define the Encoder graph. - - - Args: - hparams: Hyperparameters for the MaskGAN. - inputs: tf.int32 Tensor of shape [batch_size, sequence_length] with tokens - up to, but not including, vocab_size. - targets_present: tf.bool Tensor of shape [batch_size, sequence_length] with - True representing the presence of the target. - is_training: Boolean indicating operational mode (train/inference). - reuse (Optional): Whether to reuse the variables. - - Returns: - Tuple of (hidden_states, final_state). - """ - with tf.variable_scope('encoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell(hparams.gen_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and FLAGS.keep_prob < 1: - - def attn_cell(): - return tf.contrib.rnn.DropoutWrapper( - lstm_cell(), output_keep_prob=FLAGS.keep_prob) - - cell = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - initial_state = cell.zero_state(FLAGS.batch_size, tf.float32) - - # Add a missing token for inputs not present. - real_inputs = inputs - masked_inputs = transform_input_with_is_missing_token(inputs, - targets_present) - - with tf.variable_scope('rnn'): - hidden_states = [] - - # Split the embedding into two parts so that we can load the PTB - # weights into one part of the Variable. - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - missing_embedding = tf.get_variable('missing_embedding', - [1, hparams.gen_rnn_size]) - embedding = tf.concat([embedding, missing_embedding], axis=0) - - real_rnn_inputs = tf.nn.embedding_lookup(embedding, real_inputs) - masked_rnn_inputs = tf.nn.embedding_lookup(embedding, masked_inputs) - - if is_training and FLAGS.keep_prob < 1: - masked_rnn_inputs = tf.nn.dropout(masked_rnn_inputs, FLAGS.keep_prob) - - state = initial_state - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - rnn_inp = masked_rnn_inputs[:, t] - rnn_out, state = cell(rnn_inp, state) - hidden_states.append(rnn_out) - final_masked_state = state - hidden_states = tf.stack(hidden_states, axis=1) - - # Produce the RNN state had the model operated only - # over real data. - real_state = initial_state - for t in xrange(FLAGS.sequence_length): - tf.get_variable_scope().reuse_variables() - - # RNN. - rnn_inp = real_rnn_inputs[:, t] - rnn_out, real_state = cell(rnn_inp, real_state) - final_state = real_state - - return (hidden_states, final_masked_state), initial_state, final_state - - -def gen_decoder(hparams, - inputs, - targets, - targets_present, - encoding_state, - is_training, - is_validating, - reuse=None): - """Define the Decoder graph. The Decoder will now impute tokens that - have been masked from the input seqeunce. - """ - gen_decoder_rnn_size = hparams.gen_rnn_size - - with tf.variable_scope('decoder', reuse=reuse): - - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell(gen_decoder_rnn_size, - forget_bias=0.0, - state_is_tuple=True, - reuse=reuse) - - attn_cell = lstm_cell - if is_training and FLAGS.keep_prob < 1: - - def attn_cell(): - return tf.contrib.rnn.DropoutWrapper( - lstm_cell(), output_keep_prob=FLAGS.keep_prob) - - cell_gen = tf.contrib.rnn.MultiRNNCell( - [attn_cell() for _ in range(hparams.gen_num_layers)], - state_is_tuple=True) - - # Hidden encoder states. - hidden_vector_encodings = encoding_state[0] - - # Carry forward the final state tuple from the encoder. - # State tuples. - state_gen = encoding_state[1] - - if FLAGS.attention_option is not None: - (attention_keys, attention_values, _, - attention_construct_fn) = attention_utils.prepare_attention( - hidden_vector_encodings, - FLAGS.attention_option, - num_units=gen_decoder_rnn_size, - reuse=reuse) - - with tf.variable_scope('rnn'): - sequence, logits, log_probs = [], [], [] - - embedding = tf.get_variable('embedding', - [FLAGS.vocab_size, hparams.gen_rnn_size]) - softmax_w = tf.matrix_transpose(embedding) - softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) - - rnn_inputs = tf.nn.embedding_lookup(embedding, inputs) - - if is_training and FLAGS.keep_prob < 1: - rnn_inputs = tf.nn.dropout(rnn_inputs, FLAGS.keep_prob) - - rnn_outs = [] - - fake = None - for t in xrange(FLAGS.sequence_length): - if t > 0: - tf.get_variable_scope().reuse_variables() - - # Input to the Decoder. - if t == 0: - # Always provide the real input at t = 0. - rnn_inp = rnn_inputs[:, t] - - # If the input is present, read in the input at t. - # If the input is not present, read in the previously generated. - else: - real_rnn_inp = rnn_inputs[:, t] - - # While validating, the decoder should be operating in teacher - # forcing regime. Also, if we're just training with cross_entropy - # use teacher forcing. - if is_validating or FLAGS.gen_training_strategy == 'cross_entropy': - rnn_inp = real_rnn_inp - else: - fake_rnn_inp = tf.nn.embedding_lookup(embedding, fake) - rnn_inp = tf.where(targets_present[:, t - 1], real_rnn_inp, - fake_rnn_inp) - - # RNN. - rnn_out, state_gen = cell_gen(rnn_inp, state_gen) - - if FLAGS.attention_option is not None: - rnn_out = attention_construct_fn(rnn_out, attention_keys, - attention_values) - rnn_outs.append(rnn_out) - if FLAGS.gen_training_strategy != 'cross_entropy': - logit = tf.nn.bias_add(tf.matmul(rnn_out, softmax_w), softmax_b) - - # Output for Decoder. - # If input is present: Return real at t+1. - # If input is not present: Return fake for t+1. - real = targets[:, t] - - categorical = tf.contrib.distributions.Categorical(logits=logit) - fake = categorical.sample() - log_prob = categorical.log_prob(fake) - - output = tf.where(targets_present[:, t], real, fake) - - else: - batch_size = tf.shape(rnn_out)[0] - logit = tf.zeros(tf.stack([batch_size, FLAGS.vocab_size])) - log_prob = tf.zeros(tf.stack([batch_size])) - output = targets[:, t] - - # Add to lists. - sequence.append(output) - log_probs.append(log_prob) - logits.append(logit) - if FLAGS.gen_training_strategy == 'cross_entropy': - logits = tf.nn.bias_add( - tf.matmul( - tf.reshape(tf.stack(rnn_outs, 1), [-1, gen_decoder_rnn_size]), - softmax_w), softmax_b) - logits = tf.reshape(logits, - [-1, FLAGS.sequence_length, FLAGS.vocab_size]) - else: - logits = tf.stack(logits, axis=1) - - return (tf.stack(sequence, axis=1), logits, tf.stack(log_probs, axis=1)) - - -def generator(hparams, - inputs, - targets, - targets_present, - is_training, - is_validating, - reuse=None): - """Define the Generator graph.""" - with tf.variable_scope('gen', reuse=reuse): - encoder_states, initial_state, final_state = gen_encoder( - hparams, inputs, targets_present, is_training=is_training, reuse=reuse) - stacked_sequence, stacked_logits, stacked_log_probs = gen_decoder( - hparams, - inputs, - targets, - targets_present, - encoder_states, - is_training=is_training, - is_validating=is_validating, - reuse=reuse) - return (stacked_sequence, stacked_logits, stacked_log_probs, initial_state, - final_state) diff --git a/research/maskgan/nas_utils/__init__.py b/research/maskgan/nas_utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/maskgan/nas_utils/configs.py b/research/maskgan/nas_utils/configs.py deleted file mode 100644 index 80d867c36d1de07663d59d6c161aaf9cbe241d95..0000000000000000000000000000000000000000 --- a/research/maskgan/nas_utils/configs.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -def print_config(config): - print("-" * 10, "Configuration Specs", "-" * 10) - for item in dir(config): - if list(item)[0] != "_": - print(item, getattr(config, item)) - print("-" * 29) - - -class AlienConfig2(object): - """Base 8 740 shared embeddings, gets 64.0 (mean: std: min: max: ).""" - init_scale = 0.05 - learning_rate = 1.0 - max_grad_norm = 10 - num_layers = 2 - num_steps = 25 - hidden_size = 740 - max_epoch = 70 - max_max_epoch = 250 - keep_prob = [1 - 0.15, 1 - 0.45] - lr_decay = 0.95 - batch_size = 20 - vocab_size = 10000 - weight_decay = 1e-4 - share_embeddings = True - cell = "alien" - dropout_type = "variational" diff --git a/research/maskgan/nas_utils/custom_cell.py b/research/maskgan/nas_utils/custom_cell.py deleted file mode 100644 index 6add7ffa4e0d69da56d2bba7d9da3875b5c4dd3b..0000000000000000000000000000000000000000 --- a/research/maskgan/nas_utils/custom_cell.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import numpy as np -import tensorflow as tf - -flags = tf.flags -FLAGS = tf.app.flags.FLAGS -LSTMTuple = collections.namedtuple('LSTMTuple', ['c', 'h']) - - -def cell_depth(num): - num /= 2 - val = np.log2(1 + num) - assert abs(val - int(val)) == 0 - return int(val) - - -class GenericMultiRNNCell(tf.contrib.rnn.RNNCell): - """More generic version of MultiRNNCell that allows you to pass in a dropout mask""" - - def __init__(self, cells): - """Create a RNN cell composed sequentially of a number of RNNCells. - - Args: - cells: list of RNNCells that will be composed in this order. - state_is_tuple: If True, accepted and returned states are n-tuples, where - `n = len(cells)`. If False, the states are all - concatenated along the column axis. This latter behavior will soon be - deprecated. - - Raises: - ValueError: if cells is empty (not allowed), or at least one of the cells - returns a state tuple but the flag `state_is_tuple` is `False`. - """ - self._cells = cells - - @property - def state_size(self): - return tuple(cell.state_size for cell in self._cells) - - @property - def output_size(self): - return self._cells[-1].output_size - - def __call__(self, inputs, state, input_masks=None, scope=None): - """Run this multi-layer cell on inputs, starting from state.""" - with tf.variable_scope(scope or type(self).__name__): - cur_inp = inputs - new_states = [] - for i, cell in enumerate(self._cells): - with tf.variable_scope('Cell%d' % i): - cur_state = state[i] - if input_masks is not None: - cur_inp *= input_masks[i] - cur_inp, new_state = cell(cur_inp, cur_state) - new_states.append(new_state) - new_states = tuple(new_states) - return cur_inp, new_states - - -class AlienRNNBuilder(tf.contrib.rnn.RNNCell): - - def __init__(self, num_units, params, additional_params, base_size): - self.num_units = num_units - self.cell_create_index = additional_params[0] - self.cell_inject_index = additional_params[1] - self.base_size = base_size - self.cell_params = params[ - -2:] # Cell injection parameters are always the last two - params = params[:-2] - self.depth = cell_depth(len(params)) - self.params = params - self.units_per_layer = [2**i for i in range(self.depth) - ][::-1] # start with the biggest layer - - def __call__(self, inputs, state, scope=None): - with tf.variable_scope(scope or type(self).__name__): - definition1 = ['add', 'elem_mult', 'max'] - definition2 = [tf.identity, tf.tanh, tf.sigmoid, tf.nn.relu, tf.sin] - layer_outputs = [[] for _ in range(self.depth)] - with tf.variable_scope('rnn_builder'): - curr_index = 0 - c, h = state - - # Run all dense matrix multiplications at once - big_h_mat = tf.get_variable( - 'big_h_mat', [self.num_units, - self.base_size * self.num_units], tf.float32) - big_inputs_mat = tf.get_variable( - 'big_inputs_mat', [self.num_units, - self.base_size * self.num_units], tf.float32) - big_h_output = tf.matmul(h, big_h_mat) - big_inputs_output = tf.matmul(inputs, big_inputs_mat) - h_splits = tf.split(big_h_output, self.base_size, axis=1) - inputs_splits = tf.split(big_inputs_output, self.base_size, axis=1) - - for layer_num, units in enumerate(self.units_per_layer): - for unit_num in range(units): - with tf.variable_scope( - 'layer_{}_unit_{}'.format(layer_num, unit_num)): - if layer_num == 0: - prev1_mat = h_splits[unit_num] - prev2_mat = inputs_splits[unit_num] - else: - prev1_mat = layer_outputs[layer_num - 1][2 * unit_num] - prev2_mat = layer_outputs[layer_num - 1][2 * unit_num + 1] - if definition1[self.params[curr_index]] == 'add': - output = prev1_mat + prev2_mat - elif definition1[self.params[curr_index]] == 'elem_mult': - output = prev1_mat * prev2_mat - elif definition1[self.params[curr_index]] == 'max': - output = tf.maximum(prev1_mat, prev2_mat) - if curr_index / 2 == self.cell_create_index: # Take the new cell before the activation - new_c = tf.identity(output) - output = definition2[self.params[curr_index + 1]](output) - if curr_index / 2 == self.cell_inject_index: - if definition1[self.cell_params[0]] == 'add': - output += c - elif definition1[self.cell_params[0]] == 'elem_mult': - output *= c - elif definition1[self.cell_params[0]] == 'max': - output = tf.maximum(output, c) - output = definition2[self.cell_params[1]](output) - layer_outputs[layer_num].append(output) - curr_index += 2 - new_h = layer_outputs[-1][-1] - return new_h, LSTMTuple(new_c, new_h) - - @property - def state_size(self): - return LSTMTuple(self.num_units, self.num_units) - - @property - def output_size(self): - return self.num_units - - -class Alien(AlienRNNBuilder): - """Base 8 Cell.""" - - def __init__(self, num_units): - params = [ - 0, 2, 0, 3, 0, 2, 1, 3, 0, 1, 0, 2, 0, 1, 0, 2, 1, 1, 0, 1, 1, 1, 0, 2, - 1, 0, 0, 1, 1, 1, 0, 1 - ] - additional_params = [12, 8] - base_size = 8 - super(Alien, self).__init__(num_units, params, additional_params, base_size) diff --git a/research/maskgan/nas_utils/variational_dropout.py b/research/maskgan/nas_utils/variational_dropout.py deleted file mode 100644 index 49cc29f0cd77f7bef9e3c47e7d7dae73fa877ecd..0000000000000000000000000000000000000000 --- a/research/maskgan/nas_utils/variational_dropout.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Variational Dropout.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS - - -def generate_dropout_masks(keep_prob, shape, amount): - masks = [] - for _ in range(amount): - dropout_mask = tf.random_uniform(shape) + (keep_prob) - dropout_mask = tf.floor(dropout_mask) / (keep_prob) - masks.append(dropout_mask) - return masks - - -def generate_variational_dropout_masks(hparams, keep_prob): - [batch_size, num_steps, size, num_layers] = [ - FLAGS.batch_size, FLAGS.sequence_length, hparams.gen_rnn_size, - hparams.gen_num_layers - ] - if len(keep_prob) == 2: - emb_keep_prob = keep_prob[0] # keep prob for embedding matrix - h2h_keep_prob = emb_keep_prob # keep prob for hidden to hidden connections - h2i_keep_prob = keep_prob[1] # keep prob for hidden to input connections - out_keep_prob = h2i_keep_prob # keep probability for output state - else: - emb_keep_prob = keep_prob[0] # keep prob for embedding matrix - h2h_keep_prob = keep_prob[1] # keep prob for hidden to hidden connections - h2i_keep_prob = keep_prob[2] # keep prob for hidden to input connections - out_keep_prob = keep_prob[3] # keep probability for output state - h2i_masks = [] # Masks for input to recurrent connections - h2h_masks = [] # Masks for recurrent to recurrent connections - - # Input word dropout mask - emb_masks = generate_dropout_masks(emb_keep_prob, [num_steps, 1], batch_size) - output_mask = generate_dropout_masks(out_keep_prob, [batch_size, size], 1)[0] - h2i_masks = generate_dropout_masks(h2i_keep_prob, [batch_size, size], - num_layers) - h2h_masks = generate_dropout_masks(h2h_keep_prob, [batch_size, size], - num_layers) - return h2h_masks, h2i_masks, emb_masks, output_mask diff --git a/research/maskgan/pretrain_mask_gan.py b/research/maskgan/pretrain_mask_gan.py deleted file mode 100644 index 1a9d8ee947deaa3e31cc4c332969ed529e60305e..0000000000000000000000000000000000000000 --- a/research/maskgan/pretrain_mask_gan.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Pretraining functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports - -import numpy as np - -import tensorflow as tf - -from data import imdb_loader -from data import ptb_loader - -# Data. -from model_utils import model_utils -from models import evaluation_utils - -tf.app.flags.DEFINE_integer( - 'gen_pretrain_steps', None, - 'The number of steps to pretrain the generator with cross entropy loss.') -tf.app.flags.DEFINE_integer( - 'dis_pretrain_steps', None, - 'The number of steps to pretrain the discriminator.') - -FLAGS = tf.app.flags.FLAGS - - -def pretrain_generator(sv, sess, model, data, log, id_to_word, - data_ngram_counts, is_chief): - """Pretrain the generator with classic language modeling training.""" - print('\nPretraining generator for %d steps.' % FLAGS.gen_pretrain_steps) - log.write( - '\nPretraining generator for %d steps.\n' % FLAGS.gen_pretrain_steps) - - is_pretraining = True - - while is_pretraining: - - costs = 0. - iters = 0 - if FLAGS.data_set == 'ptb': - iterator = ptb_loader.ptb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length, - FLAGS.epoch_size_override) - elif FLAGS.data_set == 'imdb': - iterator = imdb_loader.imdb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length) - - for x, y, _ in iterator: - - # For pretraining with cross entropy loss, we have all tokens in the - # forward sequence present (all True). - model_utils.assign_percent_real(sess, model.percent_real_update, - model.new_rate, 1.0) - p = np.ones(shape=[FLAGS.batch_size, FLAGS.sequence_length], dtype=bool) - - pretrain_feed = {model.inputs: x, model.targets: y, model.present: p} - - [losses, cost_eval, _, step] = sess.run( - [ - model.fake_cross_entropy_losses, model.avg_log_perplexity, - model.gen_pretrain_op, model.global_step - ], - feed_dict=pretrain_feed) - - costs += cost_eval - iters += FLAGS.sequence_length - - # Calulate rolling perplexity. - perplexity = np.exp(costs / iters) - - # Summaries. - if is_chief and step % FLAGS.summaries_every == 0: - # Graph summaries. - summary_str = sess.run( - model.merge_summaries_op, feed_dict=pretrain_feed) - sv.SummaryComputed(sess, summary_str) - - # Additional summary. - for n, data_ngram_count in data_ngram_counts.iteritems(): - avg_percent_captured = evaluation_utils.sequence_ngram_evaluation( - sess, model.fake_sequence, log, pretrain_feed, data_ngram_count, - int(n)) - summary_percent_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/%s-grams_percent_correct' % n, - simple_value=avg_percent_captured) - ]) - sv.SummaryComputed(sess, summary_percent_str, global_step=step) - - summary_perplexity_str = tf.Summary(value=[ - tf.Summary.Value(tag='general/perplexity', simple_value=perplexity) - ]) - sv.SummaryComputed(sess, summary_perplexity_str, global_step=step) - - # Printing and logging - if is_chief and step % FLAGS.print_every == 0: - print('global_step: %d' % step) - print(' generator loss: %.3f' % np.mean(losses)) - print(' perplexity: %.3f' % perplexity) - log.write('global_step: %d\n' % step) - log.write(' generator loss: %.3f\n' % np.mean(losses)) - log.write(' perplexity: %.3f\n' % perplexity) - - for n, data_ngram_count in data_ngram_counts.iteritems(): - avg_percent_captured = evaluation_utils.sequence_ngram_evaluation( - sess, model.fake_sequence, log, pretrain_feed, data_ngram_count, - int(n)) - print(' percent of %s-grams captured: %.3f.\n' % - (n, avg_percent_captured)) - log.write(' percent of %s-grams captured: %.3f.\n\n' % - (n, avg_percent_captured)) - - evaluation_utils.generate_logs(sess, model, log, id_to_word, - pretrain_feed) - - if step >= FLAGS.gen_pretrain_steps: - is_pretraining = False - break - return - - -def pretrain_discriminator(sv, sess, model, data, log, id_to_word, - data_ngram_counts, is_chief): - print('\nPretraining discriminator for %d steps.' % FLAGS.dis_pretrain_steps) - log.write( - '\nPretraining discriminator for %d steps.\n' % FLAGS.dis_pretrain_steps) - - is_pretraining = True - - while is_pretraining: - - cumulative_costs = 0. - iters = 0 - if FLAGS.data_set == 'ptb': - iterator = ptb_loader.ptb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length, - FLAGS.epoch_size_override) - elif FLAGS.data_set == 'imdb': - iterator = imdb_loader.imdb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length) - - for x, y, _ in iterator: - is_present_rate = FLAGS.is_present_rate - # is_present_rate = np.random.uniform(low=0.0, high=1.0) - model_utils.assign_percent_real(sess, model.percent_real_update, - model.new_rate, is_present_rate) - # Randomly mask out tokens. - p = model_utils.generate_mask() - - pretrain_feed = {model.inputs: x, model.targets: y, model.present: p} - - [_, dis_loss_eval, gen_log_perplexity_eval, step] = sess.run( - [ - model.dis_pretrain_op, model.dis_loss, model.avg_log_perplexity, - model.global_step - ], - feed_dict=pretrain_feed) - - cumulative_costs += gen_log_perplexity_eval - iters += 1 - - # Calulate rolling perplexity. - perplexity = np.exp(cumulative_costs / iters) - - # Summaries. - if is_chief and step % FLAGS.summaries_every == 0: - # Graph summaries. - summary_str = sess.run( - model.merge_summaries_op, feed_dict=pretrain_feed) - sv.SummaryComputed(sess, summary_str) - - # Additional summary. - for n, data_ngram_count in data_ngram_counts.iteritems(): - avg_percent_captured = evaluation_utils.sequence_ngram_evaluation( - sess, model.fake_sequence, log, pretrain_feed, data_ngram_count, - int(n)) - summary_percent_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/%s-grams_percent_correct' % n, - simple_value=avg_percent_captured) - ]) - sv.SummaryComputed(sess, summary_percent_str, global_step=step) - - summary_perplexity_str = tf.Summary(value=[ - tf.Summary.Value(tag='general/perplexity', simple_value=perplexity) - ]) - sv.SummaryComputed(sess, summary_perplexity_str, global_step=step) - - # Printing and logging - if is_chief and step % FLAGS.print_every == 0: - print('global_step: %d' % step) - print(' discriminator loss: %.3f' % dis_loss_eval) - print(' perplexity: %.3f' % perplexity) - log.write('global_step: %d\n' % step) - log.write(' discriminator loss: %.3f\n' % dis_loss_eval) - log.write(' perplexity: %.3f\n' % perplexity) - - for n, data_ngram_count in data_ngram_counts.iteritems(): - avg_percent_captured = evaluation_utils.sequence_ngram_evaluation( - sess, model.fake_sequence, log, pretrain_feed, data_ngram_count, - int(n)) - print(' percent of %s-grams captured: %.3f.\n' % - (n, avg_percent_captured)) - log.write(' percent of %s-grams captured: %.3f.\n\n' % - (n, avg_percent_captured)) - - evaluation_utils.generate_logs(sess, model, log, id_to_word, - pretrain_feed) - - if step >= FLAGS.dis_pretrain_steps + int(FLAGS.gen_pretrain_steps or 0): - is_pretraining = False - break - return diff --git a/research/maskgan/regularization/__init__.py b/research/maskgan/regularization/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/maskgan/regularization/variational_dropout.py b/research/maskgan/regularization/variational_dropout.py deleted file mode 100644 index d67fe52eee45c31012fe50e5de662d27565befae..0000000000000000000000000000000000000000 --- a/research/maskgan/regularization/variational_dropout.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Variational Dropout Wrapper.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -class VariationalDropoutWrapper(tf.contrib.rnn.RNNCell): - """Add variational dropout to a RNN cell.""" - - def __init__(self, cell, batch_size, input_size, recurrent_keep_prob, - input_keep_prob): - self._cell = cell - self._recurrent_keep_prob = recurrent_keep_prob - self._input_keep_prob = input_keep_prob - - def make_mask(keep_prob, units): - random_tensor = keep_prob - # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) - random_tensor += tf.random_uniform(tf.stack([batch_size, units])) - return tf.floor(random_tensor) / keep_prob - - self._recurrent_mask = make_mask(recurrent_keep_prob, - self._cell.state_size[0]) - self._input_mask = self._recurrent_mask - - @property - def state_size(self): - return self._cell.state_size - - @property - def output_size(self): - return self._cell.output_size - - def __call__(self, inputs, state, scope=None): - dropped_inputs = inputs * self._input_mask - dropped_state = (state[0], state[1] * self._recurrent_mask) - new_h, new_state = self._cell(dropped_inputs, dropped_state, scope) - return new_h, new_state diff --git a/research/maskgan/regularization/zoneout.py b/research/maskgan/regularization/zoneout.py deleted file mode 100644 index 5f9ef3e3014ae6f2e7eea1a2937c5f1e2c356411..0000000000000000000000000000000000000000 --- a/research/maskgan/regularization/zoneout.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Zoneout Wrapper""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -class ZoneoutWrapper(tf.contrib.rnn.RNNCell): - """Add Zoneout to a RNN cell.""" - - def __init__(self, cell, zoneout_drop_prob, is_training=True): - self._cell = cell - self._zoneout_prob = zoneout_drop_prob - self._is_training = is_training - - @property - def state_size(self): - return self._cell.state_size - - @property - def output_size(self): - return self._cell.output_size - - def __call__(self, inputs, state, scope=None): - output, new_state = self._cell(inputs, state, scope) - if not isinstance(self._cell.state_size, tuple): - new_state = tf.split(value=new_state, num_or_size_splits=2, axis=1) - state = tf.split(value=state, num_or_size_splits=2, axis=1) - final_new_state = [new_state[0], new_state[1]] - if self._is_training: - for i, state_element in enumerate(state): - random_tensor = 1 - self._zoneout_prob # keep probability - random_tensor += tf.random_uniform(tf.shape(state_element)) - # 0. if [zoneout_prob, 1.0) and 1. if [1.0, 1.0 + zoneout_prob) - binary_tensor = tf.floor(random_tensor) - final_new_state[ - i] = (new_state[i] - state_element) * binary_tensor + state_element - else: - for i, state_element in enumerate(state): - final_new_state[ - i] = state_element * self._zoneout_prob + new_state[i] * ( - 1 - self._zoneout_prob) - if isinstance(self._cell.state_size, tuple): - return output, tf.contrib.rnn.LSTMStateTuple( - final_new_state[0], final_new_state[1]) - - return output, tf.concat([final_new_state[0], final_new_state[1]], 1) diff --git a/research/maskgan/sample_shuffler.py b/research/maskgan/sample_shuffler.py deleted file mode 100644 index 58c31fb573a864b33f3d6e2f17b42e42f1d0ea4d..0000000000000000000000000000000000000000 --- a/research/maskgan/sample_shuffler.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Shuffle samples for human evaluation. - -Local launch command: - python sample_shuffler.py - --input_ml_path=/tmp/ptb/seq2seq_vd_shareemb_forreal_55_3 - --input_gan_path=/tmp/ptb/MaskGAN_PTB_ari_avg_56.29_v2.0.0 - --output_file_name=/tmp/ptb/shuffled_output.txt - - python sample_shuffler.py - --input_ml_path=/tmp/generate_samples/MaskGAN_IMDB_Benchmark_87.1_v0.3.0 - --input_gan_path=/tmp/generate_samples/MaskGAN_IMDB_v1.0.1 - --output_file_name=/tmp/imdb/shuffled_output.txt -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# Dependency imports -import numpy as np - -import tensorflow as tf - -tf.app.flags.DEFINE_string('input_ml_path', '/tmp', 'Model output directory.') -tf.app.flags.DEFINE_string('input_gan_path', '/tmp', 'Model output directory.') -tf.app.flags.DEFINE_string('output_file_name', '/tmp/ptb/shuffled_output.txt', - 'Model output file.') -tf.app.flags.DEFINE_boolean( - 'output_masked_logs', False, - 'Whether to display for human evaluation (show masking).') -tf.app.flags.DEFINE_integer('number_epochs', 1, - 'The number of epochs to produce.') - -FLAGS = tf.app.flags.FLAGS - - -def shuffle_samples(input_file_1, input_file_2): - """Shuffle the examples.""" - shuffled = [] - - # Set a random seed to keep fixed mask. - np.random.seed(0) - - for line_1, line_2 in zip(input_file_1, input_file_2): - rand = np.random.randint(1, 3) - if rand == 1: - shuffled.append((rand, line_1, line_2)) - else: - shuffled.append((rand, line_2, line_1)) - input_file_1.close() - input_file_2.close() - return shuffled - - -def generate_output(shuffled_tuples, output_file_name): - output_file = tf.gfile.GFile(output_file_name, mode='w') - - for tup in shuffled_tuples: - formatted_tuple = ('\n{:<1}, {:<1}, {:<1}').format(tup[0], tup[1].rstrip(), - tup[2].rstrip()) - output_file.write(formatted_tuple) - output_file.close() - - -def main(_): - ml_samples_file = tf.gfile.GFile( - os.path.join(FLAGS.input_ml_path, 'reviews.txt'), mode='r') - gan_samples_file = tf.gfile.GFile( - os.path.join(FLAGS.input_gan_path, 'reviews.txt'), mode='r') - - # Generate shuffled tuples. - shuffled_tuples = shuffle_samples(ml_samples_file, gan_samples_file) - - # Output to file. - generate_output(shuffled_tuples, FLAGS.output_file_name) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/maskgan/train_mask_gan.py b/research/maskgan/train_mask_gan.py deleted file mode 100644 index 1e70c2284a8704b1c92dcdec850ac29fc9625667..0000000000000000000000000000000000000000 --- a/research/maskgan/train_mask_gan.py +++ /dev/null @@ -1,1167 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Launch example: - -[IMDB] -python train_mask_gan.py --data_dir -/tmp/imdb --data_set imdb --batch_size 128 ---sequence_length 20 --base_directory /tmp/maskGAN_v0.01 ---hparams="gen_rnn_size=650,gen_num_layers=2,dis_rnn_size=650,dis_num_layers=2 -,critic_learning_rate=0.0009756,dis_learning_rate=0.0000585, -dis_train_iterations=8,gen_learning_rate=0.0016624, -gen_full_learning_rate_steps=1e9,gen_learning_rate_decay=0.999999, -rl_discount_rate=0.8835659" --mode TRAIN --max_steps 1000000 ---generator_model seq2seq_vd --discriminator_model seq2seq_vd ---is_present_rate 0.5 --summaries_every 25 --print_every 25 - --max_num_to_print=3 --generator_optimizer=adam - --seq2seq_share_embedding=True --baseline_method=critic - --attention_option=luong --n_gram_eval=4 --mask_strategy=contiguous - --gen_training_strategy=reinforce --dis_pretrain_steps=100 - --perplexity_threshold=1000000 - --dis_share_embedding=True --maskgan_ckpt - /tmp/model.ckpt-171091 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -from functools import partial -import os -import time -# Dependency imports - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import pretrain_mask_gan -from data import imdb_loader -from data import ptb_loader -from model_utils import helper -from model_utils import model_construction -from model_utils import model_losses -from model_utils import model_optimization - -# Data. -from model_utils import model_utils - -from model_utils import n_gram -from models import evaluation_utils - -from models import rollout - -np.set_printoptions(precision=3) -np.set_printoptions(suppress=True) - -MODE_TRAIN = 'TRAIN' -MODE_TRAIN_EVAL = 'TRAIN_EVAL' -MODE_VALIDATION = 'VALIDATION' -MODE_TEST = 'TEST' - -## Binary and setup FLAGS. -tf.app.flags.DEFINE_enum( - 'mode', 'TRAIN', [MODE_TRAIN, MODE_VALIDATION, MODE_TEST, MODE_TRAIN_EVAL], - 'What this binary will do.') -tf.app.flags.DEFINE_string('master', '', - """Name of the TensorFlow master to use.""") -tf.app.flags.DEFINE_string('eval_master', '', - """Name prefix of the Tensorflow eval master.""") -tf.app.flags.DEFINE_integer('task', 0, - """Task id of the replica running the training.""") -tf.app.flags.DEFINE_integer('ps_tasks', 0, """Number of tasks in the ps job. - If 0 no ps job is used.""") - -## General FLAGS. -tf.app.flags.DEFINE_string( - 'hparams', '', 'Comma separated list of name=value hyperparameter pairs.') -tf.app.flags.DEFINE_integer('batch_size', 20, 'The batch size.') -tf.app.flags.DEFINE_integer('vocab_size', 10000, 'The vocabulary size.') -tf.app.flags.DEFINE_integer('sequence_length', 20, 'The sequence length.') -tf.app.flags.DEFINE_integer('max_steps', 1000000, - 'Maximum number of steps to run.') -tf.app.flags.DEFINE_string( - 'mask_strategy', 'random', 'Strategy for masking the words. Determine the ' - 'characterisitics of how the words are dropped out. One of ' - "['contiguous', 'random'].") -tf.app.flags.DEFINE_float('is_present_rate', 0.5, - 'Percent of tokens present in the forward sequence.') -tf.app.flags.DEFINE_float('is_present_rate_decay', None, 'Decay rate for the ' - 'percent of words that are real (are present).') -tf.app.flags.DEFINE_string( - 'generator_model', 'seq2seq', - "Type of Generator model. One of ['rnn', 'seq2seq', 'seq2seq_zaremba'," - "'rnn_zaremba', 'rnn_nas', 'seq2seq_nas']") -tf.app.flags.DEFINE_string( - 'attention_option', None, - "Attention mechanism. One of [None, 'luong', 'bahdanau']") -tf.app.flags.DEFINE_string( - 'discriminator_model', 'bidirectional', - "Type of Discriminator model. One of ['cnn', 'rnn', 'bidirectional', " - "'rnn_zaremba', 'bidirectional_zaremba', 'rnn_nas', 'rnn_vd', 'seq2seq_vd']" -) -tf.app.flags.DEFINE_boolean('seq2seq_share_embedding', False, - 'Whether to share the ' - 'embeddings between the encoder and decoder.') -tf.app.flags.DEFINE_boolean( - 'dis_share_embedding', False, 'Whether to share the ' - 'embeddings between the generator and discriminator.') -tf.app.flags.DEFINE_boolean('dis_update_share_embedding', False, 'Whether the ' - 'discriminator should update the shared embedding.') -tf.app.flags.DEFINE_boolean('use_gen_mode', False, - 'Use the mode of the generator ' - 'to produce samples.') -tf.app.flags.DEFINE_boolean('critic_update_dis_vars', False, - 'Whether the critic ' - 'updates the discriminator variables.') - -## Training FLAGS. -tf.app.flags.DEFINE_string( - 'gen_training_strategy', 'reinforce', - "Method for training the Generator. One of ['cross_entropy', 'reinforce']") -tf.app.flags.DEFINE_string( - 'generator_optimizer', 'adam', - "Type of Generator optimizer. One of ['sgd', 'adam']") -tf.app.flags.DEFINE_float('grad_clipping', 10., 'Norm for gradient clipping.') -tf.app.flags.DEFINE_float('advantage_clipping', 5., 'Clipping for advantages.') -tf.app.flags.DEFINE_string( - 'baseline_method', None, - "Approach for baseline. One of ['critic', 'dis_batch', 'ema', None]") -tf.app.flags.DEFINE_float('perplexity_threshold', 15000, - 'Limit for perplexity before terminating job.') -tf.app.flags.DEFINE_float('zoneout_drop_prob', 0.1, - 'Probability for dropping parameter for zoneout.') -tf.app.flags.DEFINE_float('keep_prob', 0.5, - 'Probability for keeping parameter for dropout.') - -## Logging and evaluation FLAGS. -tf.app.flags.DEFINE_integer('print_every', 250, - 'Frequency to print and log the ' - 'outputs of the model.') -tf.app.flags.DEFINE_integer('max_num_to_print', 5, - 'Number of samples to log/print.') -tf.app.flags.DEFINE_boolean('print_verbose', False, 'Whether to print in full.') -tf.app.flags.DEFINE_integer('summaries_every', 100, - 'Frequency to compute summaries.') -tf.app.flags.DEFINE_boolean('eval_language_model', False, - 'Whether to evaluate on ' - 'all words as in language modeling.') -tf.app.flags.DEFINE_float('eval_interval_secs', 60, - 'Delay for evaluating model.') -tf.app.flags.DEFINE_integer( - 'n_gram_eval', 4, """The degree of the n-grams to use for evaluation.""") -tf.app.flags.DEFINE_integer( - 'epoch_size_override', None, - 'If an integer, this dictates the size of the epochs and will potentially ' - 'not iterate over all the data.') -tf.app.flags.DEFINE_integer('eval_epoch_size_override', None, - 'Number of evaluation steps.') - -## Directories and checkpoints. -tf.app.flags.DEFINE_string('base_directory', '/tmp/maskGAN_v0.00', - 'Base directory for the logging, events and graph.') -tf.app.flags.DEFINE_string('data_set', 'ptb', 'Data set to operate on. One of' - "['ptb', 'imdb']") -tf.app.flags.DEFINE_string('data_dir', '/tmp/data/ptb', - 'Directory for the training data.') -tf.app.flags.DEFINE_string( - 'language_model_ckpt_dir', None, - 'Directory storing checkpoints to initialize the model. Pretrained models' - 'are stored at /tmp/maskGAN/pretrained/') -tf.app.flags.DEFINE_string( - 'language_model_ckpt_dir_reversed', None, - 'Directory storing checkpoints of reversed models to initialize the model.' - 'Pretrained models stored at' - 'are stored at /tmp/PTB/pretrained_reversed') -tf.app.flags.DEFINE_string( - 'maskgan_ckpt', None, - 'Override which checkpoint file to use to restore the ' - 'model. A pretrained seq2seq_zaremba model is stored at ' - '/tmp/maskGAN/pretrain/seq2seq_zaremba/train/model.ckpt-64912') - -tf.app.flags.DEFINE_boolean('wasserstein_objective', False, - '(DEPRECATED) Whether to use the WGAN training.') -tf.app.flags.DEFINE_integer('num_rollouts', 1, - 'The number of rolled out predictions to make.') -tf.app.flags.DEFINE_float('c_lower', -0.01, 'Lower bound for weights.') -tf.app.flags.DEFINE_float('c_upper', 0.01, 'Upper bound for weights.') - -FLAGS = tf.app.flags.FLAGS - - -def create_hparams(): - """Create the hparams object for generic training hyperparameters.""" - hparams = tf.contrib.training.HParams( - gen_num_layers=2, - dis_num_layers=2, - gen_rnn_size=740, - dis_rnn_size=740, - gen_learning_rate=5e-4, - dis_learning_rate=5e-3, - critic_learning_rate=5e-3, - dis_train_iterations=1, - gen_learning_rate_decay=1.0, - gen_full_learning_rate_steps=1e7, - baseline_decay=0.999999, - rl_discount_rate=0.9, - gen_vd_keep_prob=0.5, - dis_vd_keep_prob=0.5, - dis_pretrain_learning_rate=5e-3, - dis_num_filters=128, - dis_hidden_dim=128, - gen_nas_keep_prob_0=0.85, - gen_nas_keep_prob_1=0.55, - dis_nas_keep_prob_0=0.85, - dis_nas_keep_prob_1=0.55) - # Command line flags override any of the preceding hyperparameter values. - if FLAGS.hparams: - hparams = hparams.parse(FLAGS.hparams) - return hparams - - -def create_MaskGAN(hparams, is_training): - """Create the MaskGAN model. - - Args: - hparams: Hyperparameters for the MaskGAN. - is_training: Boolean indicating operational mode (train/inference). - evaluated with a teacher forcing regime. - - Return: - model: Namedtuple for specifying the MaskGAN. - """ - global_step = tf.Variable(0, name='global_step', trainable=False) - - new_learning_rate = tf.placeholder(tf.float32, [], name='new_learning_rate') - learning_rate = tf.Variable(0.0, name='learning_rate', trainable=False) - learning_rate_update = tf.assign(learning_rate, new_learning_rate) - - new_rate = tf.placeholder(tf.float32, [], name='new_rate') - percent_real_var = tf.Variable(0.0, trainable=False) - percent_real_update = tf.assign(percent_real_var, new_rate) - - ## Placeholders. - inputs = tf.placeholder( - tf.int32, shape=[FLAGS.batch_size, FLAGS.sequence_length]) - targets = tf.placeholder( - tf.int32, shape=[FLAGS.batch_size, FLAGS.sequence_length]) - present = tf.placeholder( - tf.bool, shape=[FLAGS.batch_size, FLAGS.sequence_length]) - # TODO(adai): Placeholder for IMDB label. - - ## Real Sequence is the targets. - real_sequence = targets - - ## Fakse Sequence from the Generator. - # TODO(adai): Generator must have IMDB labels placeholder. - (fake_sequence, fake_logits, fake_log_probs, fake_gen_initial_state, - fake_gen_final_state, _) = model_construction.create_generator( - hparams, - inputs, - targets, - present, - is_training=is_training, - is_validating=False) - (_, eval_logits, _, eval_initial_state, eval_final_state, - _) = model_construction.create_generator( - hparams, - inputs, - targets, - present, - is_training=False, - is_validating=True, - reuse=True) - - ## Discriminator. - fake_predictions = model_construction.create_discriminator( - hparams, - fake_sequence, - is_training=is_training, - inputs=inputs, - present=present) - real_predictions = model_construction.create_discriminator( - hparams, - real_sequence, - is_training=is_training, - reuse=True, - inputs=inputs, - present=present) - - ## Critic. - # The critic will be used to estimate the forward rewards to the Generator. - if FLAGS.baseline_method == 'critic': - est_state_values = model_construction.create_critic( - hparams, fake_sequence, is_training=is_training) - else: - est_state_values = None - - ## Discriminator Loss. - [dis_loss, dis_loss_fake, dis_loss_real] = model_losses.create_dis_loss( - fake_predictions, real_predictions, present) - - ## Average log-perplexity for only missing words. However, to do this, - # the logits are still computed using teacher forcing, that is, the ground - # truth tokens are fed in at each time point to be valid. - avg_log_perplexity = model_losses.calculate_log_perplexity( - eval_logits, targets, present) - - ## Generator Objective. - # 1. Cross Entropy losses on missing tokens. - fake_cross_entropy_losses = model_losses.create_masked_cross_entropy_loss( - targets, present, fake_logits) - - # 2. GAN REINFORCE losses. - [ - fake_RL_loss, fake_log_probs, fake_rewards, fake_advantages, - fake_baselines, fake_averages_op, critic_loss, cumulative_rewards - ] = model_losses.calculate_reinforce_objective( - hparams, fake_log_probs, fake_predictions, present, est_state_values) - - ## Pre-training. - if FLAGS.gen_pretrain_steps: - raise NotImplementedError - # # TODO(liamfedus): Rewrite this. - # fwd_cross_entropy_loss = tf.reduce_mean(fwd_cross_entropy_losses) - # gen_pretrain_op = model_optimization.create_gen_pretrain_op( - # hparams, fwd_cross_entropy_loss, global_step) - else: - gen_pretrain_op = None - if FLAGS.dis_pretrain_steps: - dis_pretrain_op = model_optimization.create_dis_pretrain_op( - hparams, dis_loss, global_step) - else: - dis_pretrain_op = None - - ## Generator Train Op. - # 1. Cross-Entropy. - if FLAGS.gen_training_strategy == 'cross_entropy': - gen_loss = tf.reduce_mean(fake_cross_entropy_losses) - [gen_train_op, gen_grads, - gen_vars] = model_optimization.create_gen_train_op( - hparams, learning_rate, gen_loss, global_step, mode='MINIMIZE') - - # 2. GAN (REINFORCE) - elif FLAGS.gen_training_strategy == 'reinforce': - gen_loss = fake_RL_loss - [gen_train_op, gen_grads, - gen_vars] = model_optimization.create_reinforce_gen_train_op( - hparams, learning_rate, gen_loss, fake_averages_op, global_step) - - else: - raise NotImplementedError - - ## Discriminator Train Op. - dis_train_op, dis_grads, dis_vars = model_optimization.create_dis_train_op( - hparams, dis_loss, global_step) - - ## Critic Train Op. - if critic_loss is not None: - [critic_train_op, _, _] = model_optimization.create_critic_train_op( - hparams, critic_loss, global_step) - dis_train_op = tf.group(dis_train_op, critic_train_op) - - ## Summaries. - with tf.name_scope('general'): - tf.summary.scalar('percent_real', percent_real_var) - tf.summary.scalar('learning_rate', learning_rate) - - with tf.name_scope('generator_objectives'): - tf.summary.scalar('gen_objective', tf.reduce_mean(gen_loss)) - tf.summary.scalar('gen_loss_cross_entropy', - tf.reduce_mean(fake_cross_entropy_losses)) - - with tf.name_scope('REINFORCE'): - with tf.name_scope('objective'): - tf.summary.scalar('fake_RL_loss', tf.reduce_mean(fake_RL_loss)) - - with tf.name_scope('rewards'): - helper.variable_summaries(cumulative_rewards, 'rewards') - - with tf.name_scope('advantages'): - helper.variable_summaries(fake_advantages, 'advantages') - - with tf.name_scope('baselines'): - helper.variable_summaries(fake_baselines, 'baselines') - - with tf.name_scope('log_probs'): - helper.variable_summaries(fake_log_probs, 'log_probs') - - with tf.name_scope('discriminator_losses'): - tf.summary.scalar('dis_loss', dis_loss) - tf.summary.scalar('dis_loss_fake_sequence', dis_loss_fake) - tf.summary.scalar('dis_loss_prob_fake_sequence', tf.exp(-dis_loss_fake)) - tf.summary.scalar('dis_loss_real_sequence', dis_loss_real) - tf.summary.scalar('dis_loss_prob_real_sequence', tf.exp(-dis_loss_real)) - - if critic_loss is not None: - with tf.name_scope('critic_losses'): - tf.summary.scalar('critic_loss', critic_loss) - - with tf.name_scope('logits'): - helper.variable_summaries(fake_logits, 'fake_logits') - - for v, g in zip(gen_vars, gen_grads): - helper.variable_summaries(v, v.op.name) - helper.variable_summaries(g, 'grad/' + v.op.name) - - for v, g in zip(dis_vars, dis_grads): - helper.variable_summaries(v, v.op.name) - helper.variable_summaries(g, 'grad/' + v.op.name) - - merge_summaries_op = tf.summary.merge_all() - text_summary_placeholder = tf.placeholder(tf.string) - text_summary_op = tf.summary.text('Samples', text_summary_placeholder) - - # Model saver. - saver = tf.train.Saver(keep_checkpoint_every_n_hours=1, max_to_keep=5) - - # Named tuple that captures elements of the MaskGAN model. - Model = collections.namedtuple('Model', [ - 'inputs', 'targets', 'present', 'percent_real_update', 'new_rate', - 'fake_sequence', 'fake_logits', 'fake_rewards', 'fake_baselines', - 'fake_advantages', 'fake_log_probs', 'fake_predictions', - 'real_predictions', 'fake_cross_entropy_losses', 'fake_gen_initial_state', - 'fake_gen_final_state', 'eval_initial_state', 'eval_final_state', - 'avg_log_perplexity', 'dis_loss', 'gen_loss', 'critic_loss', - 'cumulative_rewards', 'dis_train_op', 'gen_train_op', 'gen_pretrain_op', - 'dis_pretrain_op', 'merge_summaries_op', 'global_step', - 'new_learning_rate', 'learning_rate_update', 'saver', 'text_summary_op', - 'text_summary_placeholder' - ]) - - model = Model( - inputs, targets, present, percent_real_update, new_rate, fake_sequence, - fake_logits, fake_rewards, fake_baselines, fake_advantages, - fake_log_probs, fake_predictions, real_predictions, - fake_cross_entropy_losses, fake_gen_initial_state, fake_gen_final_state, - eval_initial_state, eval_final_state, avg_log_perplexity, dis_loss, - gen_loss, critic_loss, cumulative_rewards, dis_train_op, gen_train_op, - gen_pretrain_op, dis_pretrain_op, merge_summaries_op, global_step, - new_learning_rate, learning_rate_update, saver, text_summary_op, - text_summary_placeholder) - return model - - -def compute_geometric_average(percent_captured): - """Compute the geometric average of the n-gram metrics.""" - - res = 1. - for _, n_gram_percent in percent_captured.iteritems(): - res *= n_gram_percent - - return np.power(res, 1. / float(len(percent_captured))) - - -def compute_arithmetic_average(percent_captured): - """Compute the arithmetic average of the n-gram metrics.""" - N = len(percent_captured) - - res = 0. - for _, n_gram_percent in percent_captured.iteritems(): - res += n_gram_percent - - return res / float(N) - - -def get_iterator(data): - """Return the data iterator.""" - if FLAGS.data_set == 'ptb': - iterator = ptb_loader.ptb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length, - FLAGS.epoch_size_override) - elif FLAGS.data_set == 'imdb': - iterator = imdb_loader.imdb_iterator(data, FLAGS.batch_size, - FLAGS.sequence_length) - return iterator - - -def train_model(hparams, data, log_dir, log, id_to_word, data_ngram_counts): - """Train model. - - Args: - hparams: Hyperparameters for the MaskGAN. - data: Data to evaluate. - log_dir: Directory to save checkpoints. - log: Readable log for the experiment. - id_to_word: Dictionary of indices to words. - data_ngram_counts: Dictionary of hashed(n-gram tuples) to counts in the - data_set. - """ - print('Training model.') - tf.logging.info('Training model.') - - # Boolean indicating operational mode. - is_training = True - - # Write all the information to the logs. - log.write('hparams\n') - log.write(str(hparams)) - log.flush() - - is_chief = FLAGS.task == 0 - - with tf.Graph().as_default(): - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): - container_name = '' - with tf.container(container_name): - # Construct the model. - if FLAGS.num_rollouts == 1: - model = create_MaskGAN(hparams, is_training) - elif FLAGS.num_rollouts > 1: - model = rollout.create_rollout_MaskGAN(hparams, is_training) - else: - raise ValueError - - print('\nTrainable Variables in Graph:') - for v in tf.trainable_variables(): - print(v) - - ## Retrieve the initial savers. - init_savers = model_utils.retrieve_init_savers(hparams) - - ## Initial saver function to supervisor. - init_fn = partial(model_utils.init_fn, init_savers) - - # Create the supervisor. It will take care of initialization, - # summaries, checkpoints, and recovery. - sv = tf.train.Supervisor( - logdir=log_dir, - is_chief=is_chief, - saver=model.saver, - global_step=model.global_step, - save_model_secs=60, - recovery_wait_secs=30, - summary_op=None, - init_fn=init_fn) - - # Get an initialized, and possibly recovered session. Launch the - # services: Checkpointing, Summaries, step counting. - # - # When multiple replicas of this program are running the services are - # only launched by the 'chief' replica. - with sv.managed_session(FLAGS.master) as sess: - - ## Pretrain the generator. - if FLAGS.gen_pretrain_steps: - pretrain_mask_gan.pretrain_generator(sv, sess, model, data, log, - id_to_word, data_ngram_counts, - is_chief) - - ## Pretrain the discriminator. - if FLAGS.dis_pretrain_steps: - pretrain_mask_gan.pretrain_discriminator( - sv, sess, model, data, log, id_to_word, data_ngram_counts, - is_chief) - - # Initial indicators for printing and summarizing. - print_step_division = -1 - summary_step_division = -1 - - # Run iterative computation in a loop. - while not sv.ShouldStop(): - is_present_rate = FLAGS.is_present_rate - - if FLAGS.is_present_rate_decay is not None: - is_present_rate *= (1. - FLAGS.is_present_rate_decay) - - model_utils.assign_percent_real(sess, model.percent_real_update, - model.new_rate, is_present_rate) - - # GAN training. - avg_epoch_gen_loss, avg_epoch_dis_loss = [], [] - cumulative_costs = 0. - gen_iters = 0 - - # Generator and Discriminator statefulness initial evaluation. - # TODO(liamfedus): Throughout the code I am implicitly assuming - # that the Generator and Discriminator are equal sized. - [gen_initial_state_eval, fake_gen_initial_state_eval] = sess.run( - [model.eval_initial_state, model.fake_gen_initial_state]) - dis_initial_state_eval = fake_gen_initial_state_eval - - # Save zeros state to reset later. - zeros_state = fake_gen_initial_state_eval - - ## Offset Discriminator. - if FLAGS.ps_tasks == 0: - dis_offset = 1 - else: - dis_offset = FLAGS.task * 1000 + 1 - dis_iterator = get_iterator(data) - - for i in range(dis_offset): - try: - dis_x, dis_y, _ = next(dis_iterator) - except StopIteration: - dis_iterator = get_iterator(data) - dis_initial_state_eval = zeros_state - dis_x, dis_y, _ = next(dis_iterator) - - p = model_utils.generate_mask() - - # Construct the train feed. - train_feed = { - model.inputs: dis_x, - model.targets: dis_y, - model.present: p - } - - if FLAGS.data_set == 'ptb': - # Statefulness of the Generator being used for Discriminator. - for i, (c, h) in enumerate(model.fake_gen_initial_state): - train_feed[c] = dis_initial_state_eval[i].c - train_feed[h] = dis_initial_state_eval[i].h - - # Determine the state had the Generator run over real data. We - # use this state for the Discriminator. - [dis_initial_state_eval] = sess.run( - [model.fake_gen_final_state], train_feed) - - ## Training loop. - iterator = get_iterator(data) - gen_initial_state_eval = zeros_state - - if FLAGS.ps_tasks > 0: - gen_offset = FLAGS.task * 1000 + 1 - for i in range(gen_offset): - try: - next(iterator) - except StopIteration: - dis_iterator = get_iterator(data) - dis_initial_state_eval = zeros_state - next(dis_iterator) - - for x, y, _ in iterator: - for _ in xrange(hparams.dis_train_iterations): - try: - dis_x, dis_y, _ = next(dis_iterator) - except StopIteration: - dis_iterator = get_iterator(data) - dis_initial_state_eval = zeros_state - dis_x, dis_y, _ = next(dis_iterator) - - if FLAGS.data_set == 'ptb': - [dis_initial_state_eval] = sess.run( - [model.fake_gen_initial_state]) - - p = model_utils.generate_mask() - - # Construct the train feed. - train_feed = { - model.inputs: dis_x, - model.targets: dis_y, - model.present: p - } - - # Statefulness for the Discriminator. - if FLAGS.data_set == 'ptb': - for i, (c, h) in enumerate(model.fake_gen_initial_state): - train_feed[c] = dis_initial_state_eval[i].c - train_feed[h] = dis_initial_state_eval[i].h - - _, dis_loss_eval, step = sess.run( - [model.dis_train_op, model.dis_loss, model.global_step], - feed_dict=train_feed) - - # Determine the state had the Generator run over real data. - # Use this state for the Discriminator. - [dis_initial_state_eval] = sess.run( - [model.fake_gen_final_state], train_feed) - - # Randomly mask out tokens. - p = model_utils.generate_mask() - - # Construct the train feed. - train_feed = {model.inputs: x, model.targets: y, model.present: p} - - # Statefulness for Generator. - if FLAGS.data_set == 'ptb': - tf.logging.info('Generator is stateful.') - print('Generator is stateful.') - # Statefulness for *evaluation* Generator. - for i, (c, h) in enumerate(model.eval_initial_state): - train_feed[c] = gen_initial_state_eval[i].c - train_feed[h] = gen_initial_state_eval[i].h - - # Statefulness for Generator. - for i, (c, h) in enumerate(model.fake_gen_initial_state): - train_feed[c] = fake_gen_initial_state_eval[i].c - train_feed[h] = fake_gen_initial_state_eval[i].h - - # Determine whether to decay learning rate. - lr_decay = hparams.gen_learning_rate_decay**max( - step + 1 - hparams.gen_full_learning_rate_steps, 0.0) - - # Assign learning rate. - gen_learning_rate = hparams.gen_learning_rate * lr_decay - model_utils.assign_learning_rate(sess, model.learning_rate_update, - model.new_learning_rate, - gen_learning_rate) - - [_, gen_loss_eval, gen_log_perplexity_eval, step] = sess.run( - [ - model.gen_train_op, model.gen_loss, - model.avg_log_perplexity, model.global_step - ], - feed_dict=train_feed) - - cumulative_costs += gen_log_perplexity_eval - gen_iters += 1 - - # Determine the state had the Generator run over real data. - [gen_initial_state_eval, fake_gen_initial_state_eval] = sess.run( - [model.eval_final_state, - model.fake_gen_final_state], train_feed) - - avg_epoch_dis_loss.append(dis_loss_eval) - avg_epoch_gen_loss.append(gen_loss_eval) - - ## Summaries. - # Calulate rolling perplexity. - perplexity = np.exp(cumulative_costs / gen_iters) - - if is_chief and (step / FLAGS.summaries_every > - summary_step_division): - summary_step_division = step / FLAGS.summaries_every - - # Confirm perplexity is not infinite. - if (not np.isfinite(perplexity) or - perplexity >= FLAGS.perplexity_threshold): - print('Training raising FloatingPoinError.') - raise FloatingPointError( - 'Training infinite perplexity: %.3f' % perplexity) - - # Graph summaries. - summary_str = sess.run( - model.merge_summaries_op, feed_dict=train_feed) - sv.SummaryComputed(sess, summary_str) - - # Summary: n-gram - avg_percent_captured = {'2': 0., '3': 0., '4': 0.} - for n, data_ngram_count in data_ngram_counts.iteritems(): - batch_percent_captured = evaluation_utils.sequence_ngram_evaluation( - sess, model.fake_sequence, log, train_feed, - data_ngram_count, int(n)) - summary_percent_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/%s-grams_percent_correct' % n, - simple_value=batch_percent_captured) - ]) - sv.SummaryComputed( - sess, summary_percent_str, global_step=step) - - # Summary: geometric_avg - geometric_avg = compute_geometric_average(avg_percent_captured) - summary_geometric_avg_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/geometric_avg', simple_value=geometric_avg) - ]) - sv.SummaryComputed( - sess, summary_geometric_avg_str, global_step=step) - - # Summary: arithmetic_avg - arithmetic_avg = compute_arithmetic_average( - avg_percent_captured) - summary_arithmetic_avg_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/arithmetic_avg', - simple_value=arithmetic_avg) - ]) - sv.SummaryComputed( - sess, summary_arithmetic_avg_str, global_step=step) - - # Summary: perplexity - summary_perplexity_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/perplexity', simple_value=perplexity) - ]) - sv.SummaryComputed( - sess, summary_perplexity_str, global_step=step) - - ## Printing and logging - if is_chief and (step / FLAGS.print_every > print_step_division): - print_step_division = (step / FLAGS.print_every) - print('global_step: %d' % step) - print(' perplexity: %.3f' % perplexity) - print(' gen_learning_rate: %.6f' % gen_learning_rate) - log.write('global_step: %d\n' % step) - log.write(' perplexity: %.3f\n' % perplexity) - log.write(' gen_learning_rate: %.6f' % gen_learning_rate) - - # Average percent captured for each of the n-grams. - avg_percent_captured = {'2': 0., '3': 0., '4': 0.} - for n, data_ngram_count in data_ngram_counts.iteritems(): - batch_percent_captured = evaluation_utils.sequence_ngram_evaluation( - sess, model.fake_sequence, log, train_feed, - data_ngram_count, int(n)) - avg_percent_captured[n] = batch_percent_captured - print(' percent of %s-grams captured: %.3f.' % - (n, batch_percent_captured)) - log.write(' percent of %s-grams captured: %.3f.\n' % - (n, batch_percent_captured)) - geometric_avg = compute_geometric_average(avg_percent_captured) - print(' geometric_avg: %.3f.' % geometric_avg) - log.write(' geometric_avg: %.3f.' % geometric_avg) - arithmetic_avg = compute_arithmetic_average( - avg_percent_captured) - print(' arithmetic_avg: %.3f.' % arithmetic_avg) - log.write(' arithmetic_avg: %.3f.' % arithmetic_avg) - - evaluation_utils.print_and_log_losses( - log, step, is_present_rate, avg_epoch_dis_loss, - avg_epoch_gen_loss) - - if FLAGS.gen_training_strategy == 'reinforce': - evaluation_utils.generate_RL_logs(sess, model, log, - id_to_word, train_feed) - else: - evaluation_utils.generate_logs(sess, model, log, id_to_word, - train_feed) - log.flush() - - log.close() - - -def evaluate_once(data, sv, model, sess, train_dir, log, id_to_word, - data_ngram_counts, eval_saver): - """Evaluate model for a number of steps. - - Args: - data: Dataset. - sv: Supervisor. - model: The GAN model we have just built. - sess: A session to use. - train_dir: Path to a directory containing checkpoints. - log: Evaluation log for evaluation. - id_to_word: Dictionary of indices to words. - data_ngram_counts: Dictionary of hashed(n-gram tuples) to counts in the - data_set. - eval_saver: Evaluation saver.r. - """ - tf.logging.info('Evaluate Once.') - # Load the last model checkpoint, or initialize the graph. - model_save_path = tf.latest_checkpoint(train_dir) - if not model_save_path: - tf.logging.warning('No checkpoint yet in: %s', train_dir) - return - - tf.logging.info('Starting eval of: %s' % model_save_path) - tf.logging.info('Only restoring trainable variables.') - eval_saver.restore(sess, model_save_path) - - # Run the requested number of evaluation steps - avg_epoch_gen_loss, avg_epoch_dis_loss = [], [] - cumulative_costs = 0. - - # Average percent captured for each of the n-grams. - avg_percent_captured = {'2': 0., '3': 0., '4': 0.} - - # Set a random seed to keep fixed mask. - np.random.seed(0) - gen_iters = 0 - - # Generator statefulness over the epoch. - # TODO(liamfedus): Check this. - [gen_initial_state_eval, fake_gen_initial_state_eval] = sess.run( - [model.eval_initial_state, model.fake_gen_initial_state]) - - if FLAGS.eval_language_model: - is_present_rate = 0. - tf.logging.info('Overriding is_present_rate=0. for evaluation.') - print('Overriding is_present_rate=0. for evaluation.') - - iterator = get_iterator(data) - - for x, y, _ in iterator: - if FLAGS.eval_language_model: - is_present_rate = 0. - else: - is_present_rate = FLAGS.is_present_rate - tf.logging.info('Evaluating on is_present_rate=%.3f.' % is_present_rate) - - model_utils.assign_percent_real(sess, model.percent_real_update, - model.new_rate, is_present_rate) - - # Randomly mask out tokens. - p = model_utils.generate_mask() - - eval_feed = {model.inputs: x, model.targets: y, model.present: p} - - if FLAGS.data_set == 'ptb': - # Statefulness for *evaluation* Generator. - for i, (c, h) in enumerate(model.eval_initial_state): - eval_feed[c] = gen_initial_state_eval[i].c - eval_feed[h] = gen_initial_state_eval[i].h - - # Statefulness for the Generator. - for i, (c, h) in enumerate(model.fake_gen_initial_state): - eval_feed[c] = fake_gen_initial_state_eval[i].c - eval_feed[h] = fake_gen_initial_state_eval[i].h - - [ - gen_log_perplexity_eval, dis_loss_eval, gen_loss_eval, - gen_initial_state_eval, fake_gen_initial_state_eval, step - ] = sess.run( - [ - model.avg_log_perplexity, model.dis_loss, model.gen_loss, - model.eval_final_state, model.fake_gen_final_state, - model.global_step - ], - feed_dict=eval_feed) - - for n, data_ngram_count in data_ngram_counts.iteritems(): - batch_percent_captured = evaluation_utils.sequence_ngram_evaluation( - sess, model.fake_sequence, log, eval_feed, data_ngram_count, int(n)) - avg_percent_captured[n] += batch_percent_captured - - cumulative_costs += gen_log_perplexity_eval - - avg_epoch_dis_loss.append(dis_loss_eval) - avg_epoch_gen_loss.append(gen_loss_eval) - - gen_iters += 1 - - # Calulate rolling metrics. - perplexity = np.exp(cumulative_costs / gen_iters) - for n, _ in avg_percent_captured.iteritems(): - avg_percent_captured[n] /= gen_iters - - # Confirm perplexity is not infinite. - if not np.isfinite(perplexity) or perplexity >= FLAGS.perplexity_threshold: - print('Evaluation raising FloatingPointError.') - raise FloatingPointError( - 'Evaluation infinite perplexity: %.3f' % perplexity) - - ## Printing and logging. - evaluation_utils.print_and_log_losses(log, step, is_present_rate, - avg_epoch_dis_loss, avg_epoch_gen_loss) - print(' perplexity: %.3f' % perplexity) - log.write(' perplexity: %.3f\n' % perplexity) - - for n, n_gram_percent in avg_percent_captured.iteritems(): - n = int(n) - print(' percent of %d-grams captured: %.3f.' % (n, n_gram_percent)) - log.write(' percent of %d-grams captured: %.3f.\n' % (n, n_gram_percent)) - - samples = evaluation_utils.generate_logs(sess, model, log, id_to_word, - eval_feed) - - ## Summaries. - summary_str = sess.run(model.merge_summaries_op, feed_dict=eval_feed) - sv.SummaryComputed(sess, summary_str) - - # Summary: text - summary_str = sess.run(model.text_summary_op, - {model.text_summary_placeholder: '\n\n'.join(samples)}) - sv.SummaryComputed(sess, summary_str, global_step=step) - - # Summary: n-gram - for n, n_gram_percent in avg_percent_captured.iteritems(): - n = int(n) - summary_percent_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/%d-grams_percent_correct' % n, - simple_value=n_gram_percent) - ]) - sv.SummaryComputed(sess, summary_percent_str, global_step=step) - - # Summary: geometric_avg - geometric_avg = compute_geometric_average(avg_percent_captured) - summary_geometric_avg_str = tf.Summary(value=[ - tf.Summary.Value(tag='general/geometric_avg', simple_value=geometric_avg) - ]) - sv.SummaryComputed(sess, summary_geometric_avg_str, global_step=step) - - # Summary: arithmetic_avg - arithmetic_avg = compute_arithmetic_average(avg_percent_captured) - summary_arithmetic_avg_str = tf.Summary(value=[ - tf.Summary.Value( - tag='general/arithmetic_avg', simple_value=arithmetic_avg) - ]) - sv.SummaryComputed(sess, summary_arithmetic_avg_str, global_step=step) - - # Summary: perplexity - summary_perplexity_str = tf.Summary(value=[ - tf.Summary.Value(tag='general/perplexity', simple_value=perplexity) - ]) - sv.SummaryComputed(sess, summary_perplexity_str, global_step=step) - - -def evaluate_model(hparams, data, train_dir, log, id_to_word, - data_ngram_counts): - """Evaluate MaskGAN model. - - Args: - hparams: Hyperparameters for the MaskGAN. - data: Data to evaluate. - train_dir: Path to a directory containing checkpoints. - id_to_word: Dictionary of indices to words. - data_ngram_counts: Dictionary of hashed(n-gram tuples) to counts in the - data_set. - """ - tf.logging.error('Evaluate model.') - - # Boolean indicating operational mode. - is_training = False - - if FLAGS.mode == MODE_VALIDATION: - logdir = FLAGS.base_directory + '/validation' - elif FLAGS.mode == MODE_TRAIN_EVAL: - logdir = FLAGS.base_directory + '/train_eval' - elif FLAGS.mode == MODE_TEST: - logdir = FLAGS.base_directory + '/test' - else: - raise NotImplementedError - - # Wait for a checkpoint to exist. - print(train_dir) - print(tf.train.latest_checkpoint(train_dir)) - while not tf.train.latest_checkpoint(train_dir): - tf.logging.error('Waiting for checkpoint...') - print('Waiting for checkpoint...') - time.sleep(10) - - with tf.Graph().as_default(): - # Use a separate container for each trial - container_name = '' - with tf.container(container_name): - - # Construct the model. - if FLAGS.num_rollouts == 1: - model = create_MaskGAN(hparams, is_training) - elif FLAGS.num_rollouts > 1: - model = rollout.create_rollout_MaskGAN(hparams, is_training) - else: - raise ValueError - - # Create the supervisor. It will take care of initialization, summaries, - # checkpoints, and recovery. We only pass the trainable variables - # to load since things like baselines keep batch_size which may not - # match between training and evaluation. - evaluation_variables = tf.trainable_variables() - evaluation_variables.append(model.global_step) - eval_saver = tf.train.Saver(var_list=evaluation_variables) - sv = tf.Supervisor(logdir=logdir) - sess = sv.PrepareSession(FLAGS.eval_master, start_standard_services=False) - - tf.logging.info('Before sv.Loop.') - sv.Loop(FLAGS.eval_interval_secs, evaluate_once, - (data, sv, model, sess, train_dir, log, id_to_word, - data_ngram_counts, eval_saver)) - - sv.WaitForStop() - tf.logging.info('sv.Stop().') - sv.Stop() - - -def main(_): - hparams = create_hparams() - train_dir = FLAGS.base_directory + '/train' - - # Load data set. - if FLAGS.data_set == 'ptb': - raw_data = ptb_loader.ptb_raw_data(FLAGS.data_dir) - train_data, valid_data, test_data, _ = raw_data - valid_data_flat = valid_data - elif FLAGS.data_set == 'imdb': - raw_data = imdb_loader.imdb_raw_data(FLAGS.data_dir) - # TODO(liamfedus): Get an IMDB test partition. - train_data, valid_data = raw_data - valid_data_flat = [word for review in valid_data for word in review] - else: - raise NotImplementedError - - if FLAGS.mode == MODE_TRAIN or FLAGS.mode == MODE_TRAIN_EVAL: - data_set = train_data - elif FLAGS.mode == MODE_VALIDATION: - data_set = valid_data - elif FLAGS.mode == MODE_TEST: - data_set = test_data - else: - raise NotImplementedError - - # Dictionary and reverse dictionry. - if FLAGS.data_set == 'ptb': - word_to_id = ptb_loader.build_vocab( - os.path.join(FLAGS.data_dir, 'ptb.train.txt')) - elif FLAGS.data_set == 'imdb': - word_to_id = imdb_loader.build_vocab( - os.path.join(FLAGS.data_dir, 'vocab.txt')) - id_to_word = {v: k for k, v in word_to_id.iteritems()} - - # Dictionary of Training Set n-gram counts. - bigram_tuples = n_gram.find_all_ngrams(valid_data_flat, n=2) - trigram_tuples = n_gram.find_all_ngrams(valid_data_flat, n=3) - fourgram_tuples = n_gram.find_all_ngrams(valid_data_flat, n=4) - - bigram_counts = n_gram.construct_ngrams_dict(bigram_tuples) - trigram_counts = n_gram.construct_ngrams_dict(trigram_tuples) - fourgram_counts = n_gram.construct_ngrams_dict(fourgram_tuples) - print('Unique %d-grams: %d' % (2, len(bigram_counts))) - print('Unique %d-grams: %d' % (3, len(trigram_counts))) - print('Unique %d-grams: %d' % (4, len(fourgram_counts))) - - data_ngram_counts = { - '2': bigram_counts, - '3': trigram_counts, - '4': fourgram_counts - } - - # TODO(liamfedus): This was necessary because there was a problem with our - # originally trained IMDB models. The EOS_INDEX was off by one, which means, - # two words were mapping to index 86933. The presence of '' is going - # to throw and out of vocabulary error. - FLAGS.vocab_size = len(id_to_word) - print('Vocab size: %d' % FLAGS.vocab_size) - - tf.gfile.MakeDirs(FLAGS.base_directory) - - if FLAGS.mode == MODE_TRAIN: - log = tf.gfile.GFile( - os.path.join(FLAGS.base_directory, 'train-log.txt'), mode='w') - elif FLAGS.mode == MODE_VALIDATION: - log = tf.gfile.GFile( - os.path.join(FLAGS.base_directory, 'validation-log.txt'), mode='w') - elif FLAGS.mode == MODE_TRAIN_EVAL: - log = tf.gfile.GFile( - os.path.join(FLAGS.base_directory, 'train_eval-log.txt'), mode='w') - else: - log = tf.gfile.GFile( - os.path.join(FLAGS.base_directory, 'test-log.txt'), mode='w') - - if FLAGS.mode == MODE_TRAIN: - train_model(hparams, data_set, train_dir, log, id_to_word, - data_ngram_counts) - - elif FLAGS.mode == MODE_VALIDATION: - evaluate_model(hparams, data_set, train_dir, log, id_to_word, - data_ngram_counts) - elif FLAGS.mode == MODE_TRAIN_EVAL: - evaluate_model(hparams, data_set, train_dir, log, id_to_word, - data_ngram_counts) - - elif FLAGS.mode == MODE_TEST: - evaluate_model(hparams, data_set, train_dir, log, id_to_word, - data_ngram_counts) - - else: - raise NotImplementedError - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/namignizer/.gitignore b/research/namignizer/.gitignore deleted file mode 100644 index 2dae8043534bc7a079f36caa6c673f74c39e5dfa..0000000000000000000000000000000000000000 --- a/research/namignizer/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -# Remove the pyc files -*.pyc - -# Ignore the model and the data -model/ -data/ diff --git a/research/namignizer/README.md b/research/namignizer/README.md deleted file mode 100644 index 475a087541913aaa3fca9d2094b4c23de52dbb41..0000000000000000000000000000000000000000 --- a/research/namignizer/README.md +++ /dev/null @@ -1,86 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Namignizer - -Use a variation of the [PTB](https://www.tensorflow.org/versions/r0.8/tutorials/recurrent/index.html#recurrent-neural-networks) model to recognize and generate names using the [Kaggle Baby Name Database](https://www.kaggle.com/kaggle/us-baby-names). - -### API -Namignizer is implemented in Tensorflow 0.8r and uses the python package `pandas` for some data processing. - -#### How to use -Download the data from Kaggle and place it in your data directory (or use the small training data provided). The example data looks like so: - -``` -Id,Name,Year,Gender,Count -1,Mary,1880,F,7065 -2,Anna,1880,F,2604 -3,Emma,1880,F,2003 -4,Elizabeth,1880,F,1939 -5,Minnie,1880,F,1746 -6,Margaret,1880,F,1578 -7,Ida,1880,F,1472 -8,Alice,1880,F,1414 -9,Bertha,1880,F,1320 -``` - -But any data with the two columns: `Name` and `Count` will work. - -With the data, we can then train the model: - -```python -train("data/SmallNames.txt", "model/namignizer", SmallConfig) -``` - -And you will get the output: - -``` -Reading Name data in data/SmallNames.txt -Epoch: 1 Learning rate: 1.000 -0.090 perplexity: 18.539 speed: 282 lps -... -0.890 perplexity: 1.478 speed: 285 lps -0.990 perplexity: 1.477 speed: 284 lps -Epoch: 13 Train Perplexity: 1.477 -``` - -This will as a side effect write model checkpoints to the `model` directory. With this you will be able to determine the perplexity your model will give you for any arbitrary set of names like so: - -```python -namignize(["mary", "ida", "gazorpazorp", "houyhnhnms", "bob"], - tf.train.latest_checkpoint("model"), SmallConfig) -``` -You will provide the same config and the same checkpoint directory. This will allow you to use a the model you just trained. You will then get a perplexity output for each name like so: - -``` -Name mary gives us a perplexity of 1.03105580807 -Name ida gives us a perplexity of 1.07770049572 -Name gazorpazorp gives us a perplexity of 175.940353394 -Name houyhnhnms gives us a perplexity of 9.53870773315 -Name bob gives us a perplexity of 6.03938627243 -``` - -Finally, you will also be able generate names using the model like so: - -```python -namignator(tf.train.latest_checkpoint("model"), SmallConfig) -``` - -Again, you will need to provide the same config and the same checkpoint directory. This will allow you to use a the model you just trained. You will then get a single generated name. Examples of output that I got when using the provided data are: - -``` -['b', 'e', 'r', 't', 'h', 'a', '`'] -['m', 'a', 'r', 'y', '`'] -['a', 'n', 'n', 'a', '`'] -['m', 'a', 'r', 'y', '`'] -['b', 'e', 'r', 't', 'h', 'a', '`'] -['a', 'n', 'n', 'a', '`'] -['e', 'l', 'i', 'z', 'a', 'b', 'e', 't', 'h', '`'] -``` - -Notice that each name ends with a backtick. This marks the end of the name. - -### Contact Info - -Feel free to reach out to me at knt(at google) or k.nathaniel.tucker(at gmail) diff --git a/research/namignizer/data_utils.py b/research/namignizer/data_utils.py deleted file mode 100644 index 4320215026ccf7a2b31ffd476c25a153ecd92b86..0000000000000000000000000000000000000000 --- a/research/namignizer/data_utils.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utilities for parsing Kaggle baby names files.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import os - -import numpy as np -import tensorflow as tf -import pandas as pd - -# the default end of name rep will be zero -_EON = 0 - - -def read_names(names_path): - """read data from downloaded file. See SmallNames.txt for example format - or go to https://www.kaggle.com/kaggle/us-baby-names for full lists - - Args: - names_path: path to the csv file similar to the example type - Returns: - Dataset: a namedtuple of two elements: deduped names and their associated - counts. The names contain only 26 chars and are all lower case - """ - names_data = pd.read_csv(names_path) - names_data.Name = names_data.Name.str.lower() - - name_data = names_data.groupby(by=["Name"])["Count"].sum() - name_counts = np.array(name_data.tolist()) - names_deduped = np.array(name_data.index.tolist()) - - Dataset = collections.namedtuple('Dataset', ['Name', 'Count']) - return Dataset(names_deduped, name_counts) - - -def _letter_to_number(letter): - """converts letters to numbers between 1 and 27""" - # ord of lower case 'a' is 97 - return ord(letter) - 96 - - -def namignizer_iterator(names, counts, batch_size, num_steps, epoch_size): - """Takes a list of names and counts like those output from read_names, and - makes an iterator yielding a batch_size by num_steps array of random names - separated by an end of name token. The names are chosen randomly according - to their counts. The batch may end mid-name - - Args: - names: a set of lowercase names composed of 26 characters - counts: a list of the frequency of those names - batch_size: int - num_steps: int - epoch_size: number of batches to yield - Yields: - (x, y): a batch_size by num_steps array of ints representing letters, where - x will be the input and y will be the target - """ - name_distribution = counts / counts.sum() - - for i in range(epoch_size): - data = np.zeros(batch_size * num_steps + 1) - samples = np.random.choice(names, size=batch_size * num_steps // 2, - replace=True, p=name_distribution) - - data_index = 0 - for sample in samples: - if data_index >= batch_size * num_steps: - break - for letter in map(_letter_to_number, sample) + [_EON]: - if data_index >= batch_size * num_steps: - break - data[data_index] = letter - data_index += 1 - - x = data[:batch_size * num_steps].reshape((batch_size, num_steps)) - y = data[1:batch_size * num_steps + 1].reshape((batch_size, num_steps)) - - yield (x, y) - - -def name_to_batch(name, batch_size, num_steps): - """ Takes a single name and fills a batch with it - - Args: - name: lowercase composed of 26 characters - batch_size: int - num_steps: int - Returns: - x, y: a batch_size by num_steps array of ints representing letters, where - x will be the input and y will be the target. The array is filled up - to the length of the string, the rest is filled with zeros - """ - data = np.zeros(batch_size * num_steps + 1) - - data_index = 0 - for letter in map(_letter_to_number, name) + [_EON]: - data[data_index] = letter - data_index += 1 - - x = data[:batch_size * num_steps].reshape((batch_size, num_steps)) - y = data[1:batch_size * num_steps + 1].reshape((batch_size, num_steps)) - - return x, y diff --git a/research/namignizer/model.py b/research/namignizer/model.py deleted file mode 100644 index 72c5c5ecb61e8a92ec2e74b8cc7ca13bb6ace817..0000000000000000000000000000000000000000 --- a/research/namignizer/model.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""RNN model with embeddings""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -class NamignizerModel(object): - """The Namignizer model ~ strongly based on PTB""" - - def __init__(self, is_training, config): - self.batch_size = batch_size = config.batch_size - self.num_steps = num_steps = config.num_steps - size = config.hidden_size - # will always be 27 - vocab_size = config.vocab_size - - # placeholders for inputs - self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) - self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) - # weights for the loss function - self._weights = tf.placeholder(tf.float32, [batch_size * num_steps]) - - # lstm for our RNN cell (GRU supported too) - lstm_cells = [] - for layer in range(config.num_layers): - lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0) - if is_training and config.keep_prob < 1: - lstm_cell = tf.contrib.rnn.DropoutWrapper( - lstm_cell, output_keep_prob=config.keep_prob) - lstm_cells.append(lstm_cell) - cell = tf.contrib.rnn.MultiRNNCell(lstm_cells) - - self._initial_state = cell.zero_state(batch_size, tf.float32) - - with tf.device("/cpu:0"): - embedding = tf.get_variable("embedding", [vocab_size, size]) - inputs = tf.nn.embedding_lookup(embedding, self._input_data) - - if is_training and config.keep_prob < 1: - inputs = tf.nn.dropout(inputs, config.keep_prob) - - outputs = [] - state = self._initial_state - with tf.variable_scope("RNN"): - for time_step in range(num_steps): - if time_step > 0: - tf.get_variable_scope().reuse_variables() - (cell_output, state) = cell(inputs[:, time_step, :], state) - outputs.append(cell_output) - - output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, size]) - softmax_w = tf.get_variable("softmax_w", [size, vocab_size]) - softmax_b = tf.get_variable("softmax_b", [vocab_size]) - logits = tf.matmul(output, softmax_w) + softmax_b - loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( - [logits], - [tf.reshape(self._targets, [-1])], - [self._weights]) - self._loss = loss - self._cost = cost = tf.reduce_sum(loss) / batch_size - self._final_state = state - - # probabilities of each letter - self._activations = tf.nn.softmax(logits) - - # ability to save the model - self.saver = tf.train.Saver(tf.global_variables()) - - if not is_training: - return - - self._lr = tf.Variable(0.0, trainable=False) - tvars = tf.trainable_variables() - grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), - config.max_grad_norm) - optimizer = tf.train.GradientDescentOptimizer(self.lr) - self._train_op = optimizer.apply_gradients(zip(grads, tvars)) - - def assign_lr(self, session, lr_value): - session.run(tf.assign(self.lr, lr_value)) - - @property - def input_data(self): - return self._input_data - - @property - def targets(self): - return self._targets - - @property - def activations(self): - return self._activations - - @property - def weights(self): - return self._weights - - @property - def initial_state(self): - return self._initial_state - - @property - def cost(self): - return self._cost - - @property - def loss(self): - return self._loss - - @property - def final_state(self): - return self._final_state - - @property - def lr(self): - return self._lr - - @property - def train_op(self): - return self._train_op diff --git a/research/namignizer/names.py b/research/namignizer/names.py deleted file mode 100644 index 253742716391f2f4b7a0c0cf4987e40a2aaa808f..0000000000000000000000000000000000000000 --- a/research/namignizer/names.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A library showing off sequence recognition and generation with the simple -example of names. - -We use recurrent neural nets to learn complex functions able to recognize and -generate sequences of a given form. This can be used for natural language -syntax recognition, dynamically generating maps or puzzles and of course -baby name generation. - -Before using this module, it is recommended to read the Tensorflow tutorial on -recurrent neural nets, as it explains the basic concepts of this model, and -will show off another module, the PTB module on which this model bases itself. - -Here is an overview of the functions available in this module: - -* RNN Module for sequence functions based on PTB - -* Name recognition specifically for recognizing names, but can be adapted to - recognizing sequence patterns - -* Name generations specifically for generating names, but can be adapted to - generating arbitrary sequence patterns -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time - -import tensorflow as tf -import numpy as np - -from model import NamignizerModel -import data_utils - - -class SmallConfig(object): - """Small config.""" - init_scale = 0.1 - learning_rate = 1.0 - max_grad_norm = 5 - num_layers = 2 - num_steps = 20 - hidden_size = 200 - max_epoch = 4 - max_max_epoch = 13 - keep_prob = 1.0 - lr_decay = 0.5 - batch_size = 20 - vocab_size = 27 - epoch_size = 100 - - -class LargeConfig(object): - """Medium config.""" - init_scale = 0.05 - learning_rate = 1.0 - max_grad_norm = 5 - num_layers = 2 - num_steps = 35 - hidden_size = 650 - max_epoch = 6 - max_max_epoch = 39 - keep_prob = 0.5 - lr_decay = 0.8 - batch_size = 20 - vocab_size = 27 - epoch_size = 100 - - -class TestConfig(object): - """Tiny config, for testing.""" - init_scale = 0.1 - learning_rate = 1.0 - max_grad_norm = 1 - num_layers = 1 - num_steps = 2 - hidden_size = 2 - max_epoch = 1 - max_max_epoch = 1 - keep_prob = 1.0 - lr_decay = 0.5 - batch_size = 20 - vocab_size = 27 - epoch_size = 100 - - -def run_epoch(session, m, names, counts, epoch_size, eval_op, verbose=False): - """Runs the model on the given data for one epoch - - Args: - session: the tf session holding the model graph - m: an instance of the NamignizerModel - names: a set of lowercase names of 26 characters - counts: a list of the frequency of the above names - epoch_size: the number of batches to run - eval_op: whether to change the params or not, and how to do it - Kwargs: - verbose: whether to print out state of training during the epoch - Returns: - cost: the average cost during the last stage of the epoch - """ - start_time = time.time() - costs = 0.0 - iters = 0 - for step, (x, y) in enumerate(data_utils.namignizer_iterator(names, counts, - m.batch_size, m.num_steps, epoch_size)): - - cost, _ = session.run([m.cost, eval_op], - {m.input_data: x, - m.targets: y, - m.weights: np.ones(m.batch_size * m.num_steps)}) - costs += cost - iters += m.num_steps - - if verbose and step % (epoch_size // 10) == 9: - print("%.3f perplexity: %.3f speed: %.0f lps" % - (step * 1.0 / epoch_size, np.exp(costs / iters), - iters * m.batch_size / (time.time() - start_time))) - - if step >= epoch_size: - break - - return np.exp(costs / iters) - - -def train(data_dir, checkpoint_path, config): - """Trains the model with the given data - - Args: - data_dir: path to the data for the model (see data_utils for data - format) - checkpoint_path: the path to save the trained model checkpoints - config: one of the above configs that specify the model and how it - should be run and trained - Returns: - None - """ - # Prepare Name data. - print("Reading Name data in %s" % data_dir) - names, counts = data_utils.read_names(data_dir) - - with tf.Graph().as_default(), tf.Session() as session: - initializer = tf.random_uniform_initializer(-config.init_scale, - config.init_scale) - with tf.variable_scope("model", reuse=None, initializer=initializer): - m = NamignizerModel(is_training=True, config=config) - - tf.global_variables_initializer().run() - - for i in range(config.max_max_epoch): - lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) - m.assign_lr(session, config.learning_rate * lr_decay) - - print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) - train_perplexity = run_epoch(session, m, names, counts, config.epoch_size, m.train_op, - verbose=True) - print("Epoch: %d Train Perplexity: %.3f" % - (i + 1, train_perplexity)) - - m.saver.save(session, checkpoint_path, global_step=i) - - -def namignize(names, checkpoint_path, config): - """Recognizes names and prints the Perplexity of the model for each names - in the list - - Args: - names: a list of names in the model format - checkpoint_path: the path to restore the trained model from, should not - include the model name, just the path to - config: one of the above configs that specify the model and how it - should be run and trained - Returns: - None - """ - with tf.Graph().as_default(), tf.Session() as session: - - with tf.variable_scope("model"): - m = NamignizerModel(is_training=False, config=config) - - m.saver.restore(session, checkpoint_path) - - for name in names: - x, y = data_utils.name_to_batch(name, m.batch_size, m.num_steps) - - cost, loss, _ = session.run([m.cost, m.loss, tf.no_op()], - {m.input_data: x, - m.targets: y, - m.weights: np.concatenate(( - np.ones(len(name)), np.zeros(m.batch_size * m.num_steps - len(name))))}) - - print("Name {} gives us a perplexity of {}".format( - name, np.exp(cost))) - - -def namignator(checkpoint_path, config): - """Generates names randomly according to a given model - - Args: - checkpoint_path: the path to restore the trained model from, should not - include the model name, just the path to - config: one of the above configs that specify the model and how it - should be run and trained - Returns: - None - """ - # mutate the config to become a name generator config - config.num_steps = 1 - config.batch_size = 1 - - with tf.Graph().as_default(), tf.Session() as session: - - with tf.variable_scope("model"): - m = NamignizerModel(is_training=False, config=config) - - m.saver.restore(session, checkpoint_path) - - activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()], - {m.input_data: np.zeros((1, 1)), - m.targets: np.zeros((1, 1)), - m.weights: np.ones(1)}) - - # sample from our softmax activations - next_letter = np.random.choice(27, p=activations[0]) - name = [next_letter] - while next_letter != 0: - activations, final_state, _ = session.run([m.activations, m.final_state, tf.no_op()], - {m.input_data: [[next_letter]], - m.targets: np.zeros((1, 1)), - m.initial_state: final_state, - m.weights: np.ones(1)}) - - next_letter = np.random.choice(27, p=activations[0]) - name += [next_letter] - - print(map(lambda x: chr(x + 96), name)) - - -if __name__ == "__main__": - train("data/SmallNames.txt", "model/namignizer", SmallConfig) - - namignize(["mary", "ida", "gazorbazorb", "mmmhmm", "bob"], - tf.train.latest_checkpoint("model"), SmallConfig) - - namignator(tf.train.latest_checkpoint("model"), SmallConfig) diff --git a/research/neural_gpu/README.md b/research/neural_gpu/README.md deleted file mode 100644 index 097ef318c4e071f59e4212b0cd901907758d73e7..0000000000000000000000000000000000000000 --- a/research/neural_gpu/README.md +++ /dev/null @@ -1,87 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# NeuralGPU -Code for the Neural GPU model described in http://arxiv.org/abs/1511.08228. -The extended version was described in https://arxiv.org/abs/1610.08613. - -Requirements: -* TensorFlow (see tensorflow.org for how to install) - -The model can be trained on the following algorithmic tasks: - -* `sort` - Sort a symbol list -* `kvsort` - Sort symbol keys in dictionary -* `id` - Return the same symbol list -* `rev` - Reverse a symbol list -* `rev2` - Reverse a symbol dictionary by key -* `incr` - Add one to a symbol value -* `add` - Long decimal addition -* `left` - First symbol in list -* `right` - Last symbol in list -* `left-shift` - Left shift a symbol list -* `right-shift` - Right shift a symbol list -* `bmul` - Long binary multiplication -* `mul` - Long decimal multiplication -* `dup` - Duplicate a symbol list with padding -* `badd` - Long binary addition -* `qadd` - Long quaternary addition -* `search` - Search for symbol key in dictionary - -It can also be trained on the WMT English-French translation task: - -* `wmt` - WMT English-French translation (data will be downloaded) - -The value range for symbols are defined by the `vocab_size` flag. -In particular, the values are in the range `vocab_size - 1`. -So if you set `--vocab_size=16` (the default) then `--problem=rev` -will be reversing lists of 15 symbols, and `--problem=id` will be identity -on a list of up to 15 symbols. - - -To train the model on the binary multiplication task run: - -``` -python neural_gpu_trainer.py --problem=bmul -``` - -This trains the Extended Neural GPU, to train the original model run: - -``` -python neural_gpu_trainer.py --problem=bmul --beam_size=0 -``` - -While training, interim / checkpoint model parameters will be -written to `/tmp/neural_gpu/`. - -Once the amount of error gets down to what you're comfortable -with, hit `Ctrl-C` to stop the training process. The latest -model parameters will be in `/tmp/neural_gpu/neural_gpu.ckpt-` -and used on any subsequent run. - -To evaluate a trained model on how well it decodes run: - -``` -python neural_gpu_trainer.py --problem=bmul --mode=1 -``` - -To interact with a model (experimental, see code) run: - -``` -python neural_gpu_trainer.py --problem=bmul --mode=2 -``` - -To train on WMT data, set a larger --nmaps and --vocab_size and avoid curriculum: - -``` -python neural_gpu_trainer.py --problem=wmt --vocab_size=32768 --nmaps=256 - --vec_size=256 --curriculum_seq=1.0 --max_length=60 --data_dir ~/wmt -``` - -With less memory, try lower batch size, e.g. `--batch_size=4`. With more GPUs -in your system, there will be a batch on every GPU so you can run larger models. -For example, `--batch_size=4 --num_gpus=4 --nmaps=512 --vec_size=512` will -run a large model (512-size) on 4 GPUs, with effective batches of 4*4=16. - -Maintained by Lukasz Kaiser (lukaszkaiser) diff --git a/research/neural_gpu/data_utils.py b/research/neural_gpu/data_utils.py deleted file mode 100644 index 3c14ff701fce79408fde6505239530dc5b848dd7..0000000000000000000000000000000000000000 --- a/research/neural_gpu/data_utils.py +++ /dev/null @@ -1,458 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Neural GPU -- data generation and batching utilities.""" - -import math -import os -import random -import sys -import time - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import program_utils - -FLAGS = tf.app.flags.FLAGS - -bins = [2 + bin_idx_i for bin_idx_i in xrange(256)] -all_tasks = ["sort", "kvsort", "id", "rev", "rev2", "incr", "add", "left", - "right", "left-shift", "right-shift", "bmul", "mul", "dup", - "badd", "qadd", "search", "progeval", "progsynth"] -log_filename = "" -vocab, rev_vocab = None, None - - -def pad(l): - for b in bins: - if b >= l: return b - return bins[-1] - - -def bin_for(l): - for i, b in enumerate(bins): - if b >= l: return i - return len(bins) - 1 - - -train_set = {} -test_set = {} -for some_task in all_tasks: - train_set[some_task] = [] - test_set[some_task] = [] - for all_max_len in xrange(10000): - train_set[some_task].append([]) - test_set[some_task].append([]) - - -def read_tmp_file(name): - """Read from a file with the given name in our log directory or above.""" - dirname = os.path.dirname(log_filename) - fname = os.path.join(dirname, name + ".txt") - if not tf.gfile.Exists(fname): - print_out("== not found file: " + fname) - fname = os.path.join(dirname, "../" + name + ".txt") - if not tf.gfile.Exists(fname): - print_out("== not found file: " + fname) - fname = os.path.join(dirname, "../../" + name + ".txt") - if not tf.gfile.Exists(fname): - print_out("== not found file: " + fname) - return None - print_out("== found file: " + fname) - res = [] - with tf.gfile.GFile(fname, mode="r") as f: - for line in f: - res.append(line.strip()) - return res - - -def write_tmp_file(name, lines): - dirname = os.path.dirname(log_filename) - fname = os.path.join(dirname, name + ".txt") - with tf.gfile.GFile(fname, mode="w") as f: - for line in lines: - f.write(line + "\n") - - -def add(n1, n2, base=10): - """Add two numbers represented as lower-endian digit lists.""" - k = max(len(n1), len(n2)) + 1 - d1 = n1 + [0 for _ in xrange(k - len(n1))] - d2 = n2 + [0 for _ in xrange(k - len(n2))] - res = [] - carry = 0 - for i in xrange(k): - if d1[i] + d2[i] + carry < base: - res.append(d1[i] + d2[i] + carry) - carry = 0 - else: - res.append(d1[i] + d2[i] + carry - base) - carry = 1 - while res and res[-1] == 0: - res = res[:-1] - if res: return res - return [0] - - -def init_data(task, length, nbr_cases, nclass): - """Data initialization.""" - def rand_pair(l, task): - """Random data pair for a task. Total length should be <= l.""" - k = int((l-1)/2) - base = 10 - if task[0] == "b": base = 2 - if task[0] == "q": base = 4 - d1 = [np.random.randint(base) for _ in xrange(k)] - d2 = [np.random.randint(base) for _ in xrange(k)] - if task in ["add", "badd", "qadd"]: - res = add(d1, d2, base) - elif task in ["mul", "bmul"]: - d1n = sum([d * (base ** i) for i, d in enumerate(d1)]) - d2n = sum([d * (base ** i) for i, d in enumerate(d2)]) - if task == "bmul": - res = [int(x) for x in list(reversed(str(bin(d1n * d2n))))[:-2]] - else: - res = [int(x) for x in list(reversed(str(d1n * d2n)))] - else: - sys.exit() - sep = [12] - if task in ["add", "badd", "qadd"]: sep = [11] - inp = [d + 1 for d in d1] + sep + [d + 1 for d in d2] - return inp, [r + 1 for r in res] - - def rand_dup_pair(l): - """Random data pair for duplication task. Total length should be <= l.""" - k = int(l/2) - x = [np.random.randint(nclass - 1) + 1 for _ in xrange(k)] - inp = x + [0 for _ in xrange(l - k)] - res = x + x + [0 for _ in xrange(l - 2*k)] - return inp, res - - def rand_rev2_pair(l): - """Random data pair for reverse2 task. Total length should be <= l.""" - inp = [(np.random.randint(nclass - 1) + 1, - np.random.randint(nclass - 1) + 1) for _ in xrange(l/2)] - res = [i for i in reversed(inp)] - return [x for p in inp for x in p], [x for p in res for x in p] - - def rand_search_pair(l): - """Random data pair for search task. Total length should be <= l.""" - inp = [(np.random.randint(nclass - 1) + 1, - np.random.randint(nclass - 1) + 1) for _ in xrange(l-1/2)] - q = np.random.randint(nclass - 1) + 1 - res = 0 - for (k, v) in reversed(inp): - if k == q: - res = v - return [x for p in inp for x in p] + [q], [res] - - def rand_kvsort_pair(l): - """Random data pair for key-value sort. Total length should be <= l.""" - keys = [(np.random.randint(nclass - 1) + 1, i) for i in xrange(l/2)] - vals = [np.random.randint(nclass - 1) + 1 for _ in xrange(l/2)] - kv = [(k, vals[i]) for (k, i) in keys] - sorted_kv = [(k, vals[i]) for (k, i) in sorted(keys)] - return [x for p in kv for x in p], [x for p in sorted_kv for x in p] - - def prog_io_pair(prog, max_len, counter=0): - try: - ilen = np.random.randint(max_len - 3) + 1 - bound = max(15 - (counter / 20), 1) - inp = [random.choice(range(-bound, bound)) for _ in range(ilen)] - inp_toks = [program_utils.prog_rev_vocab[t] - for t in program_utils.tokenize(str(inp)) if t != ","] - out = program_utils.evaluate(prog, {"a": inp}) - out_toks = [program_utils.prog_rev_vocab[t] - for t in program_utils.tokenize(str(out)) if t != ","] - if counter > 400: - out_toks = [] - if (out_toks and out_toks[0] == program_utils.prog_rev_vocab["["] and - len(out_toks) != len([o for o in out if o == ","]) + 3): - raise ValueError("generated list with too long ints") - if (out_toks and out_toks[0] != program_utils.prog_rev_vocab["["] and - len(out_toks) > 1): - raise ValueError("generated one int but tokenized it to many") - if len(out_toks) > max_len: - raise ValueError("output too long") - return (inp_toks, out_toks) - except ValueError: - return prog_io_pair(prog, max_len, counter+1) - - def spec(inp): - """Return the target given the input for some tasks.""" - if task == "sort": - return sorted(inp) - elif task == "id": - return inp - elif task == "rev": - return [i for i in reversed(inp)] - elif task == "incr": - carry = 1 - res = [] - for i in xrange(len(inp)): - if inp[i] + carry < nclass: - res.append(inp[i] + carry) - carry = 0 - else: - res.append(1) - carry = 1 - return res - elif task == "left": - return [inp[0]] - elif task == "right": - return [inp[-1]] - elif task == "left-shift": - return [inp[l-1] for l in xrange(len(inp))] - elif task == "right-shift": - return [inp[l+1] for l in xrange(len(inp))] - else: - print_out("Unknown spec for task " + str(task)) - sys.exit() - - l = length - cur_time = time.time() - total_time = 0.0 - - is_prog = task in ["progeval", "progsynth"] - if is_prog: - inputs_per_prog = 5 - program_utils.make_vocab() - progs = read_tmp_file("programs_len%d" % (l / 10)) - if not progs: - progs = program_utils.gen(l / 10, 1.2 * nbr_cases / inputs_per_prog) - write_tmp_file("programs_len%d" % (l / 10), progs) - prog_ios = read_tmp_file("programs_len%d_io" % (l / 10)) - nbr_cases = min(nbr_cases, len(progs) * inputs_per_prog) / 1.2 - if not prog_ios: - # Generate program io data. - prog_ios = [] - for pidx, prog in enumerate(progs): - if pidx % 500 == 0: - print_out("== generating io pairs for program %d" % pidx) - if pidx * inputs_per_prog > nbr_cases * 1.2: - break - ptoks = [program_utils.prog_rev_vocab[t] - for t in program_utils.tokenize(prog)] - ptoks.append(program_utils.prog_rev_vocab["_EOS"]) - plen = len(ptoks) - for _ in xrange(inputs_per_prog): - if task == "progeval": - inp, out = prog_io_pair(prog, plen) - prog_ios.append(str(inp) + "\t" + str(out) + "\t" + prog) - elif task == "progsynth": - plen = max(len(ptoks), 8) - for _ in xrange(3): - inp, out = prog_io_pair(prog, plen / 2) - prog_ios.append(str(inp) + "\t" + str(out) + "\t" + prog) - write_tmp_file("programs_len%d_io" % (l / 10), prog_ios) - prog_ios_dict = {} - for s in prog_ios: - i, o, p = s.split("\t") - i_clean = "".join([c for c in i if c.isdigit() or c == " "]) - o_clean = "".join([c for c in o if c.isdigit() or c == " "]) - inp = [int(x) for x in i_clean.split()] - out = [int(x) for x in o_clean.split()] - if inp and out: - if p in prog_ios_dict: - prog_ios_dict[p].append([inp, out]) - else: - prog_ios_dict[p] = [[inp, out]] - # Use prog_ios_dict to create data. - progs = [] - for prog in prog_ios_dict: - if len([c for c in prog if c == ";"]) <= (l / 10): - progs.append(prog) - nbr_cases = min(nbr_cases, len(progs) * inputs_per_prog) / 1.2 - print_out("== %d training cases on %d progs" % (nbr_cases, len(progs))) - for pidx, prog in enumerate(progs): - if pidx * inputs_per_prog > nbr_cases * 1.2: - break - ptoks = [program_utils.prog_rev_vocab[t] - for t in program_utils.tokenize(prog)] - ptoks.append(program_utils.prog_rev_vocab["_EOS"]) - plen = len(ptoks) - dset = train_set if pidx < nbr_cases / inputs_per_prog else test_set - for _ in xrange(inputs_per_prog): - if task == "progeval": - inp, out = prog_ios_dict[prog].pop() - dset[task][bin_for(plen)].append([[ptoks, inp, [], []], [out]]) - elif task == "progsynth": - plen, ilist = max(len(ptoks), 8), [[]] - for _ in xrange(3): - inp, out = prog_ios_dict[prog].pop() - ilist.append(inp + out) - dset[task][bin_for(plen)].append([ilist, [ptoks]]) - - for case in xrange(0 if is_prog else nbr_cases): - total_time += time.time() - cur_time - cur_time = time.time() - if l > 10000 and case % 100 == 1: - print_out(" avg gen time %.4f s" % (total_time / float(case))) - if task in ["add", "badd", "qadd", "bmul", "mul"]: - i, t = rand_pair(l, task) - train_set[task][bin_for(len(i))].append([[[], i, [], []], [t]]) - i, t = rand_pair(l, task) - test_set[task][bin_for(len(i))].append([[[], i, [], []], [t]]) - elif task == "dup": - i, t = rand_dup_pair(l) - train_set[task][bin_for(len(i))].append([[i], [t]]) - i, t = rand_dup_pair(l) - test_set[task][bin_for(len(i))].append([[i], [t]]) - elif task == "rev2": - i, t = rand_rev2_pair(l) - train_set[task][bin_for(len(i))].append([[i], [t]]) - i, t = rand_rev2_pair(l) - test_set[task][bin_for(len(i))].append([[i], [t]]) - elif task == "search": - i, t = rand_search_pair(l) - train_set[task][bin_for(len(i))].append([[i], [t]]) - i, t = rand_search_pair(l) - test_set[task][bin_for(len(i))].append([[i], [t]]) - elif task == "kvsort": - i, t = rand_kvsort_pair(l) - train_set[task][bin_for(len(i))].append([[i], [t]]) - i, t = rand_kvsort_pair(l) - test_set[task][bin_for(len(i))].append([[i], [t]]) - elif task not in ["progeval", "progsynth"]: - inp = [np.random.randint(nclass - 1) + 1 for i in xrange(l)] - target = spec(inp) - train_set[task][bin_for(l)].append([[inp], [target]]) - inp = [np.random.randint(nclass - 1) + 1 for i in xrange(l)] - target = spec(inp) - test_set[task][bin_for(l)].append([[inp], [target]]) - - -def to_symbol(i): - """Covert ids to text.""" - if i == 0: return "" - if i == 11: return "+" - if i == 12: return "*" - return str(i-1) - - -def to_id(s): - """Covert text to ids.""" - if s == "+": return 11 - if s == "*": return 12 - return int(s) + 1 - - -def get_batch(bin_id, batch_size, data_set, height, offset=None, preset=None): - """Get a batch of data, training or testing.""" - inputs, targets = [], [] - pad_length = bins[bin_id] - for b in xrange(batch_size): - if preset is None: - elem = random.choice(data_set[bin_id]) - if offset is not None and offset + b < len(data_set[bin_id]): - elem = data_set[bin_id][offset + b] - else: - elem = preset - inpt, targett, inpl, targetl = elem[0], elem[1], [], [] - for inp in inpt: - inpl.append(inp + [0 for _ in xrange(pad_length - len(inp))]) - if len(inpl) == 1: - for _ in xrange(height - 1): - inpl.append([0 for _ in xrange(pad_length)]) - for target in targett: - targetl.append(target + [0 for _ in xrange(pad_length - len(target))]) - inputs.append(inpl) - targets.append(targetl) - res_input = np.array(inputs, dtype=np.int32) - res_target = np.array(targets, dtype=np.int32) - assert list(res_input.shape) == [batch_size, height, pad_length] - assert list(res_target.shape) == [batch_size, 1, pad_length] - return res_input, res_target - - -def print_out(s, newline=True): - """Print a message out and log it to file.""" - if log_filename: - try: - with tf.gfile.GFile(log_filename, mode="a") as f: - f.write(s + ("\n" if newline else "")) - # pylint: disable=bare-except - except: - sys.stderr.write("Error appending to %s\n" % log_filename) - sys.stdout.write(s + ("\n" if newline else "")) - sys.stdout.flush() - - -def decode(output): - return [np.argmax(o, axis=1) for o in output] - - -def accuracy(inpt_t, output, target_t, batch_size, nprint, - beam_out=None, beam_scores=None): - """Calculate output accuracy given target.""" - assert nprint < batch_size + 1 - inpt = [] - for h in xrange(inpt_t.shape[1]): - inpt.extend([inpt_t[:, h, l] for l in xrange(inpt_t.shape[2])]) - target = [target_t[:, 0, l] for l in xrange(target_t.shape[2])] - def tok(i): - if rev_vocab and i < len(rev_vocab): - return rev_vocab[i] - return str(i - 1) - def task_print(inp, output, target): - stop_bound = 0 - print_len = 0 - while print_len < len(target) and target[print_len] > stop_bound: - print_len += 1 - print_out(" i: " + " ".join([tok(i) for i in inp if i > 0])) - print_out(" o: " + - " ".join([tok(output[l]) for l in xrange(print_len)])) - print_out(" t: " + - " ".join([tok(target[l]) for l in xrange(print_len)])) - decoded_target = target - decoded_output = decode(output) - # Use beam output if given and score is high enough. - if beam_out is not None: - for b in xrange(batch_size): - if beam_scores[b] >= 10.0: - for l in xrange(min(len(decoded_output), beam_out.shape[2])): - decoded_output[l][b] = int(beam_out[b, 0, l]) - total = 0 - errors = 0 - seq = [0 for b in xrange(batch_size)] - for l in xrange(len(decoded_output)): - for b in xrange(batch_size): - if decoded_target[l][b] > 0: - total += 1 - if decoded_output[l][b] != decoded_target[l][b]: - seq[b] = 1 - errors += 1 - e = 0 # Previous error index - for _ in xrange(min(nprint, sum(seq))): - while seq[e] == 0: - e += 1 - task_print([inpt[l][e] for l in xrange(len(inpt))], - [decoded_output[l][e] for l in xrange(len(decoded_target))], - [decoded_target[l][e] for l in xrange(len(decoded_target))]) - e += 1 - for b in xrange(nprint - errors): - task_print([inpt[l][b] for l in xrange(len(inpt))], - [decoded_output[l][b] for l in xrange(len(decoded_target))], - [decoded_target[l][b] for l in xrange(len(decoded_target))]) - return errors, total, sum(seq) - - -def safe_exp(x): - perp = 10000 - x = float(x) - if x < 100: perp = math.exp(x) - if perp > 10000: return 10000 - return perp diff --git a/research/neural_gpu/neural_gpu.py b/research/neural_gpu/neural_gpu.py deleted file mode 100644 index 55b2b3e99224b31c672014195e9ef23fa1e892f7..0000000000000000000000000000000000000000 --- a/research/neural_gpu/neural_gpu.py +++ /dev/null @@ -1,747 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""The Neural GPU Model.""" - -import time - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from tensorflow.python.framework import function -import data_utils as data - -do_jit = False # Gives more speed but experimental for now. -jit_scope = tf.contrib.compiler.jit.experimental_jit_scope - - -def conv_linear(args, kw, kh, nin, nout, rate, do_bias, bias_start, prefix): - """Convolutional linear map.""" - if not isinstance(args, (list, tuple)): - args = [args] - with tf.variable_scope(prefix): - with tf.device("/cpu:0"): - k = tf.get_variable("CvK", [kw, kh, nin, nout]) - if len(args) == 1: - arg = args[0] - else: - arg = tf.concat(axis=3, values=args) - res = tf.nn.convolution(arg, k, dilation_rate=(rate, 1), padding="SAME") - if not do_bias: return res - with tf.device("/cpu:0"): - bias_term = tf.get_variable( - "CvB", [nout], initializer=tf.constant_initializer(bias_start)) - bias_term = tf.reshape(bias_term, [1, 1, 1, nout]) - return res + bias_term - - -def sigmoid_cutoff(x, cutoff): - """Sigmoid with cutoff, e.g., 1.2sigmoid(x) - 0.1.""" - y = tf.sigmoid(x) - if cutoff < 1.01: return y - d = (cutoff - 1.0) / 2.0 - return tf.minimum(1.0, tf.maximum(0.0, cutoff * y - d), name="cutoff_min") - - -@function.Defun(tf.float32, noinline=True) -def sigmoid_cutoff_12(x): - """Sigmoid with cutoff 1.2, specialized for speed and memory use.""" - y = tf.sigmoid(x) - return tf.minimum(1.0, tf.maximum(0.0, 1.2 * y - 0.1), name="cutoff_min_12") - - -@function.Defun(tf.float32, noinline=True) -def sigmoid_hard(x): - """Hard sigmoid.""" - return tf.minimum(1.0, tf.maximum(0.0, 0.25 * x + 0.5)) - - -def place_at14(decided, selected, it): - """Place selected at it-th coordinate of decided, dim=1 of 4.""" - slice1 = decided[:, :it, :, :] - slice2 = decided[:, it + 1:, :, :] - return tf.concat(axis=1, values=[slice1, selected, slice2]) - - -def place_at13(decided, selected, it): - """Place selected at it-th coordinate of decided, dim=1 of 3.""" - slice1 = decided[:, :it, :] - slice2 = decided[:, it + 1:, :] - return tf.concat(axis=1, values=[slice1, selected, slice2]) - - -def tanh_cutoff(x, cutoff): - """Tanh with cutoff, e.g., 1.1tanh(x) cut to [-1. 1].""" - y = tf.tanh(x) - if cutoff < 1.01: return y - d = (cutoff - 1.0) / 2.0 - return tf.minimum(1.0, tf.maximum(-1.0, (1.0 + d) * y)) - - -@function.Defun(tf.float32, noinline=True) -def tanh_hard(x): - """Hard tanh.""" - return tf.minimum(1.0, tf.maximum(0.0, x)) - - -def layer_norm(x, nmaps, prefix, epsilon=1e-5): - """Layer normalize the 4D tensor x, averaging over the last dimension.""" - with tf.variable_scope(prefix): - scale = tf.get_variable("layer_norm_scale", [nmaps], - initializer=tf.ones_initializer()) - bias = tf.get_variable("layer_norm_bias", [nmaps], - initializer=tf.zeros_initializer()) - mean, variance = tf.nn.moments(x, [3], keep_dims=True) - norm_x = (x - mean) / tf.sqrt(variance + epsilon) - return norm_x * scale + bias - - -def conv_gru(inpts, mem, kw, kh, nmaps, rate, cutoff, prefix, do_layer_norm, - args_len=None): - """Convolutional GRU.""" - def conv_lin(args, suffix, bias_start): - total_args_len = args_len or len(args) * nmaps - res = conv_linear(args, kw, kh, total_args_len, nmaps, rate, True, - bias_start, prefix + "/" + suffix) - if do_layer_norm: - return layer_norm(res, nmaps, prefix + "/" + suffix) - else: - return res - if cutoff == 1.2: - reset = sigmoid_cutoff_12(conv_lin(inpts + [mem], "r", 1.0)) - gate = sigmoid_cutoff_12(conv_lin(inpts + [mem], "g", 1.0)) - elif cutoff > 10: - reset = sigmoid_hard(conv_lin(inpts + [mem], "r", 1.0)) - gate = sigmoid_hard(conv_lin(inpts + [mem], "g", 1.0)) - else: - reset = sigmoid_cutoff(conv_lin(inpts + [mem], "r", 1.0), cutoff) - gate = sigmoid_cutoff(conv_lin(inpts + [mem], "g", 1.0), cutoff) - if cutoff > 10: - candidate = tanh_hard(conv_lin(inpts + [reset * mem], "c", 0.0)) - else: - # candidate = tanh_cutoff(conv_lin(inpts + [reset * mem], "c", 0.0), cutoff) - candidate = tf.tanh(conv_lin(inpts + [reset * mem], "c", 0.0)) - return gate * mem + (1 - gate) * candidate - - -CHOOSE_K = 256 - - -def memory_call(q, l, nmaps, mem_size, vocab_size, num_gpus, update_mem): - raise ValueError("Fill for experiments with additional memory structures.") - - -def memory_run(step, nmaps, mem_size, batch_size, vocab_size, - global_step, do_training, update_mem, decay_factor, num_gpus, - target_emb_weights, output_w, gpu_targets_tn, it): - """Run memory.""" - q = step[:, 0, it, :] - mlabels = gpu_targets_tn[:, it, 0] - res, mask, mem_loss = memory_call( - q, mlabels, nmaps, mem_size, vocab_size, num_gpus, update_mem) - res = tf.gather(target_emb_weights, res) * tf.expand_dims(mask[:, 0], 1) - - # Mix gold and original in the first steps, 20% later. - gold = tf.nn.dropout(tf.gather(target_emb_weights, mlabels), 0.7) - use_gold = 1.0 - tf.cast(global_step, tf.float32) / (1000. * decay_factor) - use_gold = tf.maximum(use_gold, 0.2) * do_training - mem = tf.cond(tf.less(tf.random_uniform([]), use_gold), - lambda: use_gold * gold + (1.0 - use_gold) * res, - lambda: res) - mem = tf.reshape(mem, [-1, 1, 1, nmaps]) - return mem, mem_loss, update_mem - - -@tf.RegisterGradient("CustomIdG") -def _custom_id_grad(_, grads): - return grads - - -def quantize(t, quant_scale, max_value=1.0): - """Quantize a tensor t with each element in [-max_value, max_value].""" - t = tf.minimum(max_value, tf.maximum(t, -max_value)) - big = quant_scale * (t + max_value) + 0.5 - with tf.get_default_graph().gradient_override_map({"Floor": "CustomIdG"}): - res = (tf.floor(big) / quant_scale) - max_value - return res - - -def quantize_weights_op(quant_scale, max_value): - ops = [v.assign(quantize(v, quant_scale, float(max_value))) - for v in tf.trainable_variables()] - return tf.group(*ops) - - -def autoenc_quantize(x, nbits, nmaps, do_training, layers=1): - """Autoencoder into nbits vectors of bits, using noise and sigmoids.""" - enc_x = tf.reshape(x, [-1, nmaps]) - for i in xrange(layers - 1): - enc_x = tf.layers.dense(enc_x, nmaps, name="autoenc_%d" % i) - enc_x = tf.layers.dense(enc_x, nbits, name="autoenc_%d" % (layers - 1)) - noise = tf.truncated_normal(tf.shape(enc_x), stddev=2.0) - dec_x = sigmoid_cutoff_12(enc_x + noise * do_training) - dec_x = tf.reshape(dec_x, [-1, nbits]) - for i in xrange(layers): - dec_x = tf.layers.dense(dec_x, nmaps, name="autodec_%d" % i) - return tf.reshape(dec_x, tf.shape(x)) - - -def make_dense(targets, noclass, low_param): - """Move a batch of targets to a dense 1-hot representation.""" - low = low_param / float(noclass - 1) - high = 1.0 - low * (noclass - 1) - targets = tf.cast(targets, tf.int64) - return tf.one_hot(targets, depth=noclass, on_value=high, off_value=low) - - -def reorder_beam(beam_size, batch_size, beam_val, output, is_first, - tensors_to_reorder): - """Reorder to minimize beam costs.""" - # beam_val is [batch_size x beam_size]; let b = batch_size * beam_size - # decided is len x b x a x b - # output is b x out_size; step is b x len x a x b; - outputs = tf.split(axis=0, num_or_size_splits=beam_size, value=tf.nn.log_softmax(output)) - all_beam_vals, all_beam_idx = [], [] - beam_range = 1 if is_first else beam_size - for i in xrange(beam_range): - top_out, top_out_idx = tf.nn.top_k(outputs[i], k=beam_size) - cur_beam_val = beam_val[:, i] - top_out = tf.Print(top_out, [top_out, top_out_idx, beam_val, i, - cur_beam_val], "GREPO", summarize=8) - all_beam_vals.append(top_out + tf.expand_dims(cur_beam_val, 1)) - all_beam_idx.append(top_out_idx) - all_beam_idx = tf.reshape(tf.transpose(tf.concat(axis=1, values=all_beam_idx), [1, 0]), - [-1]) - top_beam, top_beam_idx = tf.nn.top_k(tf.concat(axis=1, values=all_beam_vals), k=beam_size) - top_beam_idx = tf.Print(top_beam_idx, [top_beam, top_beam_idx], - "GREP", summarize=8) - reordered = [[] for _ in xrange(len(tensors_to_reorder) + 1)] - top_out_idx = [] - for i in xrange(beam_size): - which_idx = top_beam_idx[:, i] * batch_size + tf.range(batch_size) - top_out_idx.append(tf.gather(all_beam_idx, which_idx)) - which_beam = top_beam_idx[:, i] / beam_size # [batch] - which_beam = which_beam * batch_size + tf.range(batch_size) - reordered[0].append(tf.gather(output, which_beam)) - for i, t in enumerate(tensors_to_reorder): - reordered[i + 1].append(tf.gather(t, which_beam)) - new_tensors = [tf.concat(axis=0, values=t) for t in reordered] - top_out_idx = tf.concat(axis=0, values=top_out_idx) - return (top_beam, new_tensors[0], top_out_idx, new_tensors[1:]) - - -class NeuralGPU(object): - """Neural GPU Model.""" - - def __init__(self, nmaps, vec_size, niclass, noclass, dropout, - max_grad_norm, cutoff, nconvs, kw, kh, height, mem_size, - learning_rate, min_length, num_gpus, num_replicas, - grad_noise_scale, sampling_rate, act_noise=0.0, do_rnn=False, - atrous=False, beam_size=1, backward=True, do_layer_norm=False, - autoenc_decay=1.0): - # Feeds for parameters and ops to update them. - self.nmaps = nmaps - if backward: - self.global_step = tf.Variable(0, trainable=False, name="global_step") - self.cur_length = tf.Variable(min_length, trainable=False) - self.cur_length_incr_op = self.cur_length.assign_add(1) - self.lr = tf.Variable(learning_rate, trainable=False) - self.lr_decay_op = self.lr.assign(self.lr * 0.995) - self.do_training = tf.placeholder(tf.float32, name="do_training") - self.update_mem = tf.placeholder(tf.int32, name="update_mem") - self.noise_param = tf.placeholder(tf.float32, name="noise_param") - - # Feeds for inputs, targets, outputs, losses, etc. - self.input = tf.placeholder(tf.int32, name="inp") - self.target = tf.placeholder(tf.int32, name="tgt") - self.prev_step = tf.placeholder(tf.float32, name="prev_step") - gpu_input = tf.split(axis=0, num_or_size_splits=num_gpus, value=self.input) - gpu_target = tf.split(axis=0, num_or_size_splits=num_gpus, value=self.target) - gpu_prev_step = tf.split(axis=0, num_or_size_splits=num_gpus, value=self.prev_step) - batch_size = tf.shape(gpu_input[0])[0] - - if backward: - adam_lr = 0.005 * self.lr - adam = tf.train.AdamOptimizer(adam_lr, epsilon=1e-3) - - def adam_update(grads): - return adam.apply_gradients(zip(grads, tf.trainable_variables()), - global_step=self.global_step, - name="adam_update") - - # When switching from Adam to SGD we perform reverse-decay. - if backward: - global_step_float = tf.cast(self.global_step, tf.float32) - sampling_decay_exponent = global_step_float / 100000.0 - sampling_decay = tf.maximum(0.05, tf.pow(0.5, sampling_decay_exponent)) - self.sampling = sampling_rate * 0.05 / sampling_decay - else: - self.sampling = tf.constant(0.0) - - # Cache variables on cpu if needed. - if num_replicas > 1 or num_gpus > 1: - with tf.device("/cpu:0"): - caching_const = tf.constant(0) - tf.get_variable_scope().set_caching_device(caching_const.op.device) - # partitioner = tf.variable_axis_size_partitioner(1024*256*4) - # tf.get_variable_scope().set_partitioner(partitioner) - - def gpu_avg(l): - if l[0] is None: - for elem in l: - assert elem is None - return 0.0 - if len(l) < 2: - return l[0] - return sum(l) / float(num_gpus) - - self.length_tensor = tf.placeholder(tf.int32, name="length") - - with tf.device("/cpu:0"): - emb_weights = tf.get_variable( - "embedding", [niclass, vec_size], - initializer=tf.random_uniform_initializer(-1.7, 1.7)) - if beam_size > 0: - target_emb_weights = tf.get_variable( - "target_embedding", [noclass, nmaps], - initializer=tf.random_uniform_initializer(-1.7, 1.7)) - e0 = tf.scatter_update(emb_weights, - tf.constant(0, dtype=tf.int32, shape=[1]), - tf.zeros([1, vec_size])) - output_w = tf.get_variable("output_w", [nmaps, noclass], tf.float32) - - def conv_rate(layer): - if atrous: - return 2**layer - return 1 - - # pylint: disable=cell-var-from-loop - def enc_step(step): - """Encoder step.""" - if autoenc_decay < 1.0: - quant_step = autoenc_quantize(step, 16, nmaps, self.do_training) - if backward: - exp_glob = tf.train.exponential_decay(1.0, self.global_step - 10000, - 1000, autoenc_decay) - dec_factor = 1.0 - exp_glob # * self.do_training - dec_factor = tf.cond(tf.less(self.global_step, 10500), - lambda: tf.constant(0.05), lambda: dec_factor) - else: - dec_factor = 1.0 - cur = tf.cond(tf.less(tf.random_uniform([]), dec_factor), - lambda: quant_step, lambda: step) - else: - cur = step - if dropout > 0.0001: - cur = tf.nn.dropout(cur, keep_prob) - if act_noise > 0.00001: - cur += tf.truncated_normal(tf.shape(cur)) * act_noise_scale - # Do nconvs-many CGRU steps. - if do_jit and tf.get_variable_scope().reuse: - with jit_scope(): - for layer in xrange(nconvs): - cur = conv_gru([], cur, kw, kh, nmaps, conv_rate(layer), - cutoff, "ecgru_%d" % layer, do_layer_norm) - else: - for layer in xrange(nconvs): - cur = conv_gru([], cur, kw, kh, nmaps, conv_rate(layer), - cutoff, "ecgru_%d" % layer, do_layer_norm) - return cur - - zero_tgt = tf.zeros([batch_size, nmaps, 1]) - zero_tgt.set_shape([None, nmaps, 1]) - - def dec_substep(step, decided): - """Decoder sub-step.""" - cur = step - if dropout > 0.0001: - cur = tf.nn.dropout(cur, keep_prob) - if act_noise > 0.00001: - cur += tf.truncated_normal(tf.shape(cur)) * act_noise_scale - # Do nconvs-many CGRU steps. - if do_jit and tf.get_variable_scope().reuse: - with jit_scope(): - for layer in xrange(nconvs): - cur = conv_gru([decided], cur, kw, kh, nmaps, conv_rate(layer), - cutoff, "dcgru_%d" % layer, do_layer_norm) - else: - for layer in xrange(nconvs): - cur = conv_gru([decided], cur, kw, kh, nmaps, conv_rate(layer), - cutoff, "dcgru_%d" % layer, do_layer_norm) - return cur - # pylint: enable=cell-var-from-loop - - def dec_step(step, it, it_int, decided, output_ta, tgts, - mloss, nupd_in, out_idx, beam_cost): - """Decoder step.""" - nupd, mem_loss = 0, 0.0 - if mem_size > 0: - it_incr = tf.minimum(it+1, length - 1) - mem, mem_loss, nupd = memory_run( - step, nmaps, mem_size, batch_size, noclass, self.global_step, - self.do_training, self.update_mem, 10, num_gpus, - target_emb_weights, output_w, gpu_targets_tn, it_incr) - step = dec_substep(step, decided) - output_l = tf.expand_dims(tf.expand_dims(step[:, it, 0, :], 1), 1) - # Calculate argmax output. - output = tf.reshape(output_l, [-1, nmaps]) - # pylint: disable=cell-var-from-loop - output = tf.matmul(output, output_w) - if beam_size > 1: - beam_cost, output, out, reordered = reorder_beam( - beam_size, batch_size, beam_cost, output, it_int == 0, - [output_l, out_idx, step, decided]) - [output_l, out_idx, step, decided] = reordered - else: - # Scheduled sampling. - out = tf.multinomial(tf.stop_gradient(output), 1) - out = tf.to_int32(tf.squeeze(out, [1])) - out_write = output_ta.write(it, output_l[:batch_size, :, :, :]) - output = tf.gather(target_emb_weights, out) - output = tf.reshape(output, [-1, 1, nmaps]) - output = tf.concat(axis=1, values=[output] * height) - tgt = tgts[it, :, :, :] - selected = tf.cond(tf.less(tf.random_uniform([]), self.sampling), - lambda: output, lambda: tgt) - # pylint: enable=cell-var-from-loop - dec_write = place_at14(decided, tf.expand_dims(selected, 1), it) - out_idx = place_at13( - out_idx, tf.reshape(out, [beam_size * batch_size, 1, 1]), it) - if mem_size > 0: - mem = tf.concat(axis=2, values=[mem] * height) - dec_write = place_at14(dec_write, mem, it_incr) - return (step, dec_write, out_write, mloss + mem_loss, nupd_in + nupd, - out_idx, beam_cost) - - # Main model construction. - gpu_outputs = [] - gpu_losses = [] - gpu_grad_norms = [] - grads_list = [] - gpu_out_idx = [] - self.after_enc_step = [] - for gpu in xrange(num_gpus): # Multi-GPU towers, average gradients later. - length = self.length_tensor - length_float = tf.cast(length, tf.float32) - if gpu > 0: - tf.get_variable_scope().reuse_variables() - gpu_outputs.append([]) - gpu_losses.append([]) - gpu_grad_norms.append([]) - with tf.name_scope("gpu%d" % gpu), tf.device("/gpu:%d" % gpu): - # Main graph creation loop. - data.print_out("Creating model.") - start_time = time.time() - - # Embed inputs and calculate mask. - with tf.device("/cpu:0"): - tgt_shape = tf.shape(tf.squeeze(gpu_target[gpu], [1])) - weights = tf.where(tf.squeeze(gpu_target[gpu], [1]) > 0, - tf.ones(tgt_shape), tf.zeros(tgt_shape)) - - # Embed inputs and targets. - with tf.control_dependencies([e0]): - start = tf.gather(emb_weights, gpu_input[gpu]) # b x h x l x nmaps - gpu_targets_tn = gpu_target[gpu] # b x 1 x len - if beam_size > 0: - embedded_targets_tn = tf.gather(target_emb_weights, - gpu_targets_tn) - embedded_targets_tn = tf.transpose( - embedded_targets_tn, [2, 0, 1, 3]) # len x b x 1 x nmaps - embedded_targets_tn = tf.concat(axis=2, values=[embedded_targets_tn] * height) - - # First image comes from start by applying convolution and adding 0s. - start = tf.transpose(start, [0, 2, 1, 3]) # Now b x len x h x vec_s - first = conv_linear(start, 1, 1, vec_size, nmaps, 1, True, 0.0, "input") - first = layer_norm(first, nmaps, "input") - - # Computation steps. - keep_prob = dropout * 3.0 / tf.sqrt(length_float) - keep_prob = 1.0 - self.do_training * keep_prob - act_noise_scale = act_noise * self.do_training - - # Start with a convolutional gate merging previous step. - step = conv_gru([gpu_prev_step[gpu]], first, - kw, kh, nmaps, 1, cutoff, "first", do_layer_norm) - - # This is just for running a baseline RNN seq2seq model. - if do_rnn: - self.after_enc_step.append(step) # Not meaningful here, but needed. - def lstm_cell(): - return tf.contrib.rnn.BasicLSTMCell(height * nmaps) - cell = tf.contrib.rnn.MultiRNNCell( - [lstm_cell() for _ in range(nconvs)]) - with tf.variable_scope("encoder"): - encoder_outputs, encoder_state = tf.nn.dynamic_rnn( - cell, tf.reshape(step, [batch_size, length, height * nmaps]), - dtype=tf.float32, time_major=False) - - # Attention. - attn = tf.layers.dense( - encoder_outputs, height * nmaps, name="attn1") - - # pylint: disable=cell-var-from-loop - @function.Defun(noinline=True) - def attention_query(query, attn_v): - vecs = tf.tanh(attn + tf.expand_dims(query, 1)) - mask = tf.reduce_sum(vecs * tf.reshape(attn_v, [1, 1, -1]), 2) - mask = tf.nn.softmax(mask) - return tf.reduce_sum(encoder_outputs * tf.expand_dims(mask, 2), 1) - - with tf.variable_scope("decoder"): - def decoder_loop_fn(state__prev_cell_out__unused, cell_inp__cur_tgt): - """Decoder loop function.""" - state, prev_cell_out, _ = state__prev_cell_out__unused - cell_inp, cur_tgt = cell_inp__cur_tgt - attn_q = tf.layers.dense(prev_cell_out, height * nmaps, - name="attn_query") - attn_res = attention_query(attn_q, tf.get_variable( - "attn_v", [height * nmaps], - initializer=tf.random_uniform_initializer(-0.1, 0.1))) - concatenated = tf.reshape(tf.concat(axis=1, values=[cell_inp, attn_res]), - [batch_size, 2 * height * nmaps]) - cell_inp = tf.layers.dense( - concatenated, height * nmaps, name="attn_merge") - output, new_state = cell(cell_inp, state) - - mem_loss = 0.0 - if mem_size > 0: - res, mask, mem_loss = memory_call( - output, cur_tgt, height * nmaps, mem_size, noclass, - num_gpus, self.update_mem) - res = tf.gather(target_emb_weights, res) - res *= tf.expand_dims(mask[:, 0], 1) - output = tf.layers.dense( - tf.concat(axis=1, values=[output, res]), height * nmaps, name="rnnmem") - - return new_state, output, mem_loss - # pylint: enable=cell-var-from-loop - gpu_targets = tf.squeeze(gpu_target[gpu], [1]) # b x len - gpu_tgt_trans = tf.transpose(gpu_targets, [1, 0]) - dec_zero = tf.zeros([batch_size, 1], dtype=tf.int32) - dec_inp = tf.concat(axis=1, values=[dec_zero, gpu_targets]) - dec_inp = dec_inp[:, :length] - embedded_dec_inp = tf.gather(target_emb_weights, dec_inp) - embedded_dec_inp_proj = tf.layers.dense( - embedded_dec_inp, height * nmaps, name="dec_proj") - embedded_dec_inp_proj = tf.transpose(embedded_dec_inp_proj, - [1, 0, 2]) - init_vals = (encoder_state, - tf.zeros([batch_size, height * nmaps]), 0.0) - _, dec_outputs, mem_losses = tf.scan( - decoder_loop_fn, (embedded_dec_inp_proj, gpu_tgt_trans), - initializer=init_vals) - mem_loss = tf.reduce_mean(mem_losses) - outputs = tf.layers.dense(dec_outputs, nmaps, name="out_proj") - # Final convolution to get logits, list outputs. - outputs = tf.matmul(tf.reshape(outputs, [-1, nmaps]), output_w) - outputs = tf.reshape(outputs, [length, batch_size, noclass]) - gpu_out_idx.append(tf.argmax(outputs, 2)) - else: # Here we go with the Neural GPU. - # Encoder. - enc_length = length - step = enc_step(step) # First step hard-coded. - # pylint: disable=cell-var-from-loop - i = tf.constant(1) - c = lambda i, _s: tf.less(i, enc_length) - def enc_step_lambda(i, step): - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - new_step = enc_step(step) - return (i + 1, new_step) - _, step = tf.while_loop( - c, enc_step_lambda, [i, step], - parallel_iterations=1, swap_memory=True) - # pylint: enable=cell-var-from-loop - - self.after_enc_step.append(step) - - # Decoder. - if beam_size > 0: - output_ta = tf.TensorArray( - dtype=tf.float32, size=length, dynamic_size=False, - infer_shape=False, name="outputs") - out_idx = tf.zeros([beam_size * batch_size, length, 1], - dtype=tf.int32) - decided_t = tf.zeros([beam_size * batch_size, length, - height, vec_size]) - - # Prepare for beam search. - tgts = tf.concat(axis=1, values=[embedded_targets_tn] * beam_size) - beam_cost = tf.zeros([batch_size, beam_size]) - step = tf.concat(axis=0, values=[step] * beam_size) - # First step hard-coded. - step, decided_t, output_ta, mem_loss, nupd, oi, bc = dec_step( - step, 0, 0, decided_t, output_ta, tgts, 0.0, 0, out_idx, - beam_cost) - tf.get_variable_scope().reuse_variables() - # pylint: disable=cell-var-from-loop - def step_lambda(i, step, dec_t, out_ta, ml, nu, oi, bc): - with tf.variable_scope(tf.get_variable_scope(), reuse=True): - s, d, t, nml, nu, oi, bc = dec_step( - step, i, 1, dec_t, out_ta, tgts, ml, nu, oi, bc) - return (i + 1, s, d, t, nml, nu, oi, bc) - i = tf.constant(1) - c = lambda i, _s, _d, _o, _ml, _nu, _oi, _bc: tf.less(i, length) - _, step, _, output_ta, mem_loss, nupd, out_idx, _ = tf.while_loop( - c, step_lambda, - [i, step, decided_t, output_ta, mem_loss, nupd, oi, bc], - parallel_iterations=1, swap_memory=True) - # pylint: enable=cell-var-from-loop - gpu_out_idx.append(tf.squeeze(out_idx, [2])) - outputs = output_ta.stack() - outputs = tf.squeeze(outputs, [2, 3]) # Now l x b x nmaps - else: - # If beam_size is 0 or less, we don't have a decoder. - mem_loss = 0.0 - outputs = tf.transpose(step[:, :, 1, :], [1, 0, 2]) - gpu_out_idx.append(tf.argmax(outputs, 2)) - - # Final convolution to get logits, list outputs. - outputs = tf.matmul(tf.reshape(outputs, [-1, nmaps]), output_w) - outputs = tf.reshape(outputs, [length, batch_size, noclass]) - gpu_outputs[gpu] = tf.nn.softmax(outputs) - - # Calculate cross-entropy loss and normalize it. - targets_soft = make_dense(tf.squeeze(gpu_target[gpu], [1]), - noclass, 0.1) - targets_soft = tf.reshape(targets_soft, [-1, noclass]) - targets_hard = make_dense(tf.squeeze(gpu_target[gpu], [1]), - noclass, 0.0) - targets_hard = tf.reshape(targets_hard, [-1, noclass]) - output = tf.transpose(outputs, [1, 0, 2]) - xent_soft = tf.reshape(tf.nn.softmax_cross_entropy_with_logits( - logits=tf.reshape(output, [-1, noclass]), labels=targets_soft), - [batch_size, length]) - xent_hard = tf.reshape(tf.nn.softmax_cross_entropy_with_logits( - logits=tf.reshape(output, [-1, noclass]), labels=targets_hard), - [batch_size, length]) - low, high = 0.1 / float(noclass - 1), 0.9 - const = high * tf.log(high) + float(noclass - 1) * low * tf.log(low) - weight_sum = tf.reduce_sum(weights) + 1e-20 - true_perp = tf.reduce_sum(xent_hard * weights) / weight_sum - soft_loss = tf.reduce_sum(xent_soft * weights) / weight_sum - perp_loss = soft_loss + const - # Final loss: cross-entropy + shared parameter relaxation part + extra. - mem_loss = 0.5 * tf.reduce_mean(mem_loss) / length_float - total_loss = perp_loss + mem_loss - gpu_losses[gpu].append(true_perp) - - # Gradients. - if backward: - data.print_out("Creating backward pass for the model.") - grads = tf.gradients( - total_loss, tf.trainable_variables(), - colocate_gradients_with_ops=True) - for g_i, g in enumerate(grads): - if isinstance(g, tf.IndexedSlices): - grads[g_i] = tf.convert_to_tensor(g) - grads, norm = tf.clip_by_global_norm(grads, max_grad_norm) - gpu_grad_norms[gpu].append(norm) - for g in grads: - if grad_noise_scale > 0.001: - g += tf.truncated_normal(tf.shape(g)) * self.noise_param - grads_list.append(grads) - else: - gpu_grad_norms[gpu].append(0.0) - data.print_out("Created model for gpu %d in %.2f s." - % (gpu, time.time() - start_time)) - - self.updates = [] - self.after_enc_step = tf.concat(axis=0, values=self.after_enc_step) # Concat GPUs. - if backward: - tf.get_variable_scope()._reuse = False - tf.get_variable_scope().set_caching_device(None) - grads = [gpu_avg([grads_list[g][i] for g in xrange(num_gpus)]) - for i in xrange(len(grads_list[0]))] - update = adam_update(grads) - self.updates.append(update) - else: - self.updates.append(tf.no_op()) - - self.losses = [gpu_avg([gpu_losses[g][i] for g in xrange(num_gpus)]) - for i in xrange(len(gpu_losses[0]))] - self.out_idx = tf.concat(axis=0, values=gpu_out_idx) - self.grad_norms = [gpu_avg([gpu_grad_norms[g][i] for g in xrange(num_gpus)]) - for i in xrange(len(gpu_grad_norms[0]))] - self.outputs = [tf.concat(axis=1, values=[gpu_outputs[g] for g in xrange(num_gpus)])] - self.quantize_op = quantize_weights_op(512, 8) - if backward: - self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) - - def step(self, sess, inp, target, do_backward_in, noise_param=None, - beam_size=2, eos_id=2, eos_cost=0.0, update_mem=None, state=None): - """Run a step of the network.""" - batch_size, height, length = inp.shape[0], inp.shape[1], inp.shape[2] - do_backward = do_backward_in - train_mode = True - if do_backward_in is None: - do_backward = False - train_mode = False - if update_mem is None: - update_mem = do_backward - feed_in = {} - # print " feeding sequences of length %d" % length - if state is None: - state = np.zeros([batch_size, length, height, self.nmaps]) - feed_in[self.prev_step.name] = state - feed_in[self.length_tensor.name] = length - feed_in[self.noise_param.name] = noise_param if noise_param else 0.0 - feed_in[self.do_training.name] = 1.0 if do_backward else 0.0 - feed_in[self.update_mem.name] = 1 if update_mem else 0 - if do_backward_in is False: - feed_in[self.sampling.name] = 0.0 - index = 0 # We're dynamic now. - feed_out = [] - if do_backward: - feed_out.append(self.updates[index]) - feed_out.append(self.grad_norms[index]) - if train_mode: - feed_out.append(self.losses[index]) - feed_in[self.input.name] = inp - feed_in[self.target.name] = target - feed_out.append(self.outputs[index]) - if train_mode: - # Make a full-sequence training step with one call to session.run. - res = sess.run([self.after_enc_step] + feed_out, feed_in) - after_enc_state, res = res[0], res[1:] - else: - # Make a full-sequence decoding step with one call to session.run. - feed_in[self.sampling.name] = 1.1 # Sample every time. - res = sess.run([self.after_enc_step, self.out_idx] + feed_out, feed_in) - after_enc_state, out_idx = res[0], res[1] - res = [res[2][l] for l in xrange(length)] - outputs = [out_idx[:, i] for i in xrange(length)] - cost = [0.0 for _ in xrange(beam_size * batch_size)] - seen_eos = [0 for _ in xrange(beam_size * batch_size)] - for idx, logit in enumerate(res): - best = outputs[idx] - for b in xrange(batch_size): - if seen_eos[b] > 1: - cost[b] -= eos_cost - else: - cost[b] += np.log(logit[b][best[b]]) - if best[b] in [eos_id]: - seen_eos[b] += 1 - res = [[-c for c in cost]] + outputs - # Collect and output results. - offset = 0 - norm = None - if do_backward: - offset = 2 - norm = res[1] - if train_mode: - outputs = res[offset + 1] - outputs = [outputs[l] for l in xrange(length)] - return res[offset], outputs, norm, after_enc_state diff --git a/research/neural_gpu/neural_gpu_trainer.py b/research/neural_gpu/neural_gpu_trainer.py deleted file mode 100644 index 1f704b0da880dbde4b09bf2cc108edb034d7b1a0..0000000000000000000000000000000000000000 --- a/research/neural_gpu/neural_gpu_trainer.py +++ /dev/null @@ -1,1027 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Neural GPU.""" - -from __future__ import print_function - -import math -import os -import random -import sys -import threading -import time - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import program_utils -import data_utils as data -import neural_gpu as ngpu -import wmt_utils as wmt - -tf.app.flags.DEFINE_float("lr", 0.1, "Learning rate.") -tf.app.flags.DEFINE_float("init_weight", 0.8, "Initial weights deviation.") -tf.app.flags.DEFINE_float("max_grad_norm", 4.0, "Clip gradients to this norm.") -tf.app.flags.DEFINE_float("cutoff", 1.2, "Cutoff at the gates.") -tf.app.flags.DEFINE_float("curriculum_ppx", 9.9, "Move curriculum if ppl < X.") -tf.app.flags.DEFINE_float("curriculum_seq", 0.3, "Move curriculum if seq < X.") -tf.app.flags.DEFINE_float("dropout", 0.1, "Dropout that much.") -tf.app.flags.DEFINE_float("grad_noise_scale", 0.0, "Gradient noise scale.") -tf.app.flags.DEFINE_float("max_sampling_rate", 0.1, "Maximal sampling rate.") -tf.app.flags.DEFINE_float("length_norm", 0.0, "Length normalization.") -tf.app.flags.DEFINE_float("train_beam_freq", 0.0, "Beam-based training.") -tf.app.flags.DEFINE_float("train_beam_anneal", 20000, "How many steps anneal.") -tf.app.flags.DEFINE_integer("eval_beam_steps", 4, "How many beam steps eval.") -tf.app.flags.DEFINE_integer("batch_size", 32, "Batch size.") -tf.app.flags.DEFINE_integer("steps_per_checkpoint", 100, "Steps per epoch.") -tf.app.flags.DEFINE_integer("nmaps", 64, "Number of floats in each cell.") -tf.app.flags.DEFINE_integer("vec_size", 64, "Size of word vectors.") -tf.app.flags.DEFINE_integer("train_data_size", 1000, "Training examples/len.") -tf.app.flags.DEFINE_integer("max_length", 40, "Maximum length.") -tf.app.flags.DEFINE_integer("random_seed", 125459, "Random seed.") -tf.app.flags.DEFINE_integer("nconvs", 2, "How many convolutions / 1 step.") -tf.app.flags.DEFINE_integer("kw", 3, "Kernel width.") -tf.app.flags.DEFINE_integer("kh", 3, "Kernel height.") -tf.app.flags.DEFINE_integer("height", 4, "Height.") -tf.app.flags.DEFINE_integer("mem_size", -1, "Memory size (sqrt)") -tf.app.flags.DEFINE_integer("soft_mem_size", 1024, "Softmax memory this size.") -tf.app.flags.DEFINE_integer("num_gpus", 1, "Number of GPUs to use.") -tf.app.flags.DEFINE_integer("num_replicas", 1, "Number of replicas in use.") -tf.app.flags.DEFINE_integer("beam_size", 1, "Beam size during decoding. " - "If 0, no decoder, the non-extended Neural GPU.") -tf.app.flags.DEFINE_integer("max_target_vocab", 0, - "Maximal size of target vocabulary.") -tf.app.flags.DEFINE_integer("decode_offset", 0, "Offset for decoding.") -tf.app.flags.DEFINE_integer("task", -1, "Task id when running on borg.") -tf.app.flags.DEFINE_integer("nprint", 0, "How many test examples to print out.") -tf.app.flags.DEFINE_integer("eval_bin_print", 3, "How many bins step in eval.") -tf.app.flags.DEFINE_integer("mode", 0, "Mode: 0-train other-decode.") -tf.app.flags.DEFINE_bool("atrous", False, "Whether to use atrous convs.") -tf.app.flags.DEFINE_bool("layer_norm", False, "Do layer normalization.") -tf.app.flags.DEFINE_bool("quantize", False, "Whether to quantize variables.") -tf.app.flags.DEFINE_bool("do_train", True, "If false, only update memory.") -tf.app.flags.DEFINE_bool("rnn_baseline", False, "If true build an RNN instead.") -tf.app.flags.DEFINE_bool("simple_tokenizer", False, - "If true, tokenize on spaces only, digits are 0.") -tf.app.flags.DEFINE_bool("normalize_digits", True, - "Whether to normalize digits with simple tokenizer.") -tf.app.flags.DEFINE_integer("vocab_size", 16, "Joint vocabulary size.") -tf.app.flags.DEFINE_string("data_dir", "/tmp", "Data directory") -tf.app.flags.DEFINE_string("train_dir", "/tmp/", "Directory to store models.") -tf.app.flags.DEFINE_string("test_file_prefix", "", "Files to test (.en,.fr).") -tf.app.flags.DEFINE_integer("max_train_data_size", 0, - "Limit on the size of training data (0: no limit).") -tf.app.flags.DEFINE_string("word_vector_file_en", "", - "Optional file with word vectors to start training.") -tf.app.flags.DEFINE_string("word_vector_file_fr", "", - "Optional file with word vectors to start training.") -tf.app.flags.DEFINE_string("problem", "wmt", "What problem are we solving?.") - -tf.app.flags.DEFINE_integer("ps_tasks", 0, "Number of ps tasks used.") -tf.app.flags.DEFINE_string("master", "", "Name of the TensorFlow master.") - -FLAGS = tf.app.flags.FLAGS -EXTRA_EVAL = 10 -EVAL_LEN_INCR = 8 -MAXLEN_F = 2.0 - - -def zero_split(tok_list, append=None): - """Split tok_list (list of ints) on 0s, append int to all parts if given.""" - res, cur, l = [], [], 0 - for tok in tok_list: - if tok == 0: - if append is not None: - cur.append(append) - res.append(cur) - l = max(l, len(cur)) - cur = [] - else: - cur.append(tok) - if append is not None: - cur.append(append) - res.append(cur) - l = max(l, len(cur)) - return res, l - - -def read_data(source_path, target_path, buckets, max_size=None, print_out=True): - """Read data from source and target files and put into buckets. - - Args: - source_path: path to the files with token-ids for the source language. - target_path: path to the file with token-ids for the target language; - it must be aligned with the source file: n-th line contains the desired - output for n-th line from the source_path. - buckets: the buckets to use. - max_size: maximum number of lines to read, all other will be ignored; - if 0 or None, data files will be read completely (no limit). - If set to 1, no data will be returned (empty lists of the right form). - print_out: whether to print out status or not. - - Returns: - data_set: a list of length len(_buckets); data_set[n] contains a list of - (source, target) pairs read from the provided data files that fit - into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and - len(target) < _buckets[n][1]; source and target are lists of token-ids. - """ - data_set = [[] for _ in buckets] - counter = 0 - if max_size != 1: - with tf.gfile.GFile(source_path, mode="r") as source_file: - with tf.gfile.GFile(target_path, mode="r") as target_file: - source, target = source_file.readline(), target_file.readline() - while source and target and (not max_size or counter < max_size): - counter += 1 - if counter % 100000 == 0 and print_out: - print(" reading data line %d" % counter) - sys.stdout.flush() - source_ids = [int(x) for x in source.split()] - target_ids = [int(x) for x in target.split()] - source_ids, source_len = zero_split(source_ids) - target_ids, target_len = zero_split(target_ids, append=wmt.EOS_ID) - for bucket_id, size in enumerate(buckets): - if source_len <= size and target_len <= size: - data_set[bucket_id].append([source_ids, target_ids]) - break - source, target = source_file.readline(), target_file.readline() - return data_set - - -global_train_set = {"wmt": []} -train_buckets_scale = {"wmt": []} - - -def calculate_buckets_scale(data_set, buckets, problem): - """Calculate buckets scales for the given data set.""" - train_bucket_sizes = [len(data_set[b]) for b in xrange(len(buckets))] - train_total_size = max(1, float(sum(train_bucket_sizes))) - - # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use - # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to - # the size if i-th training bucket, as used later. - if problem not in train_buckets_scale: - train_buckets_scale[problem] = [] - train_buckets_scale[problem].append( - [sum(train_bucket_sizes[:i + 1]) / train_total_size - for i in xrange(len(train_bucket_sizes))]) - return train_total_size - - -def read_data_into_global(source_path, target_path, buckets, - max_size=None, print_out=True): - """Read data into the global variables (can be in a separate thread).""" - # pylint: disable=global-variable-not-assigned - global global_train_set, train_buckets_scale - # pylint: enable=global-variable-not-assigned - data_set = read_data(source_path, target_path, buckets, max_size, print_out) - global_train_set["wmt"].append(data_set) - train_total_size = calculate_buckets_scale(data_set, buckets, "wmt") - if print_out: - print(" Finished global data reading (%d)." % train_total_size) - - -def initialize(sess=None): - """Initialize data and model.""" - global MAXLEN_F - # Create training directory if it does not exist. - if not tf.gfile.IsDirectory(FLAGS.train_dir): - data.print_out("Creating training directory %s." % FLAGS.train_dir) - tf.gfile.MkDir(FLAGS.train_dir) - decode_suffix = "beam%dln%d" % (FLAGS.beam_size, - int(100 * FLAGS.length_norm)) - if FLAGS.mode == 0: - decode_suffix = "" - if FLAGS.task >= 0: - data.log_filename = os.path.join(FLAGS.train_dir, - "log%d%s" % (FLAGS.task, decode_suffix)) - else: - data.log_filename = os.path.join(FLAGS.train_dir, "neural_gpu/log") - - # Set random seed. - if FLAGS.random_seed > 0: - seed = FLAGS.random_seed + max(0, FLAGS.task) - tf.set_random_seed(seed) - random.seed(seed) - np.random.seed(seed) - - # Check data sizes. - assert data.bins - max_length = min(FLAGS.max_length, data.bins[-1]) - while len(data.bins) > 1 and data.bins[-2] >= max_length + EXTRA_EVAL: - data.bins = data.bins[:-1] - if sess is None and FLAGS.task == 0 and FLAGS.num_replicas > 1: - if max_length > 60: - max_length = max_length * 1 / 2 # Save memory on chief. - min_length = min(14, max_length - 3) if FLAGS.problem == "wmt" else 3 - for p in FLAGS.problem.split("-"): - if p in ["progeval", "progsynth"]: - min_length = max(26, min_length) - assert max_length + 1 > min_length - while len(data.bins) > 1 and data.bins[-2] >= max_length + EXTRA_EVAL: - data.bins = data.bins[:-1] - - # Create checkpoint directory if it does not exist. - if FLAGS.mode == 0 or FLAGS.task < 0: - checkpoint_dir = os.path.join(FLAGS.train_dir, "neural_gpu%s" - % ("" if FLAGS.task < 0 else str(FLAGS.task))) - else: - checkpoint_dir = FLAGS.train_dir - if not tf.gfile.IsDirectory(checkpoint_dir): - data.print_out("Creating checkpoint directory %s." % checkpoint_dir) - tf.gfile.MkDir(checkpoint_dir) - - # Prepare data. - if FLAGS.problem == "wmt": - # Prepare WMT data. - data.print_out("Preparing WMT data in %s" % FLAGS.data_dir) - if FLAGS.simple_tokenizer: - MAXLEN_F = 3.5 - (en_train, fr_train, en_dev, fr_dev, - en_path, fr_path) = wmt.prepare_wmt_data( - FLAGS.data_dir, FLAGS.vocab_size, - tokenizer=wmt.space_tokenizer, - normalize_digits=FLAGS.normalize_digits) - else: - (en_train, fr_train, en_dev, fr_dev, - en_path, fr_path) = wmt.prepare_wmt_data( - FLAGS.data_dir, FLAGS.vocab_size) - - # Read data into buckets and compute their sizes. - fr_vocab, rev_fr_vocab = wmt.initialize_vocabulary(fr_path) - data.vocab = fr_vocab - data.rev_vocab = rev_fr_vocab - data.print_out("Reading development and training data (limit: %d)." - % FLAGS.max_train_data_size) - dev_set = {} - dev_set["wmt"] = read_data(en_dev, fr_dev, data.bins) - def data_read(size, print_out): - read_data_into_global(en_train, fr_train, data.bins, size, print_out) - data_read(50000, False) - read_thread_small = threading.Thread( - name="reading-data-small", target=lambda: data_read(900000, False)) - read_thread_small.start() - read_thread_full = threading.Thread( - name="reading-data-full", - target=lambda: data_read(FLAGS.max_train_data_size, True)) - read_thread_full.start() - data.print_out("Data reading set up.") - else: - # Prepare algorithmic data. - en_path, fr_path = None, None - tasks = FLAGS.problem.split("-") - data_size = FLAGS.train_data_size - for t in tasks: - data.print_out("Generating data for %s." % t) - if t in ["progeval", "progsynth"]: - data.init_data(t, data.bins[-1], 20 * data_size, FLAGS.vocab_size) - if len(program_utils.prog_vocab) > FLAGS.vocab_size - 2: - raise ValueError("Increase vocab_size to %d for prog-tasks." - % (len(program_utils.prog_vocab) + 2)) - data.rev_vocab = program_utils.prog_vocab - data.vocab = program_utils.prog_rev_vocab - else: - for l in xrange(max_length + EXTRA_EVAL - 1): - data.init_data(t, l, data_size, FLAGS.vocab_size) - data.init_data(t, data.bins[-2], data_size, FLAGS.vocab_size) - data.init_data(t, data.bins[-1], data_size, FLAGS.vocab_size) - if t not in global_train_set: - global_train_set[t] = [] - global_train_set[t].append(data.train_set[t]) - calculate_buckets_scale(data.train_set[t], data.bins, t) - dev_set = data.test_set - - # Grid-search parameters. - lr = FLAGS.lr - init_weight = FLAGS.init_weight - max_grad_norm = FLAGS.max_grad_norm - if sess is not None and FLAGS.task > -1: - def job_id_factor(step): - """If jobid / step mod 3 is 0, 1, 2: say 0, 1, -1.""" - return ((((FLAGS.task / step) % 3) + 1) % 3) - 1 - lr *= math.pow(2, job_id_factor(1)) - init_weight *= math.pow(1.5, job_id_factor(3)) - max_grad_norm *= math.pow(2, job_id_factor(9)) - - # Print out parameters. - curriculum = FLAGS.curriculum_seq - msg1 = ("layers %d kw %d h %d kh %d batch %d noise %.2f" - % (FLAGS.nconvs, FLAGS.kw, FLAGS.height, FLAGS.kh, - FLAGS.batch_size, FLAGS.grad_noise_scale)) - msg2 = ("cut %.2f lr %.3f iw %.2f cr %.2f nm %d d%.4f gn %.2f %s" - % (FLAGS.cutoff, lr, init_weight, curriculum, FLAGS.nmaps, - FLAGS.dropout, max_grad_norm, msg1)) - data.print_out(msg2) - - # Create model and initialize it. - tf.get_variable_scope().set_initializer( - tf.orthogonal_initializer(gain=1.8 * init_weight)) - max_sampling_rate = FLAGS.max_sampling_rate if FLAGS.mode == 0 else 0.0 - o = FLAGS.vocab_size if FLAGS.max_target_vocab < 1 else FLAGS.max_target_vocab - ngpu.CHOOSE_K = FLAGS.soft_mem_size - do_beam_model = FLAGS.train_beam_freq > 0.0001 and FLAGS.beam_size > 1 - beam_size = FLAGS.beam_size if FLAGS.mode > 0 and not do_beam_model else 1 - beam_size = min(beam_size, FLAGS.beam_size) - beam_model = None - def make_ngpu(cur_beam_size, back): - return ngpu.NeuralGPU( - FLAGS.nmaps, FLAGS.vec_size, FLAGS.vocab_size, o, - FLAGS.dropout, max_grad_norm, FLAGS.cutoff, FLAGS.nconvs, - FLAGS.kw, FLAGS.kh, FLAGS.height, FLAGS.mem_size, - lr / math.sqrt(FLAGS.num_replicas), min_length + 3, FLAGS.num_gpus, - FLAGS.num_replicas, FLAGS.grad_noise_scale, max_sampling_rate, - atrous=FLAGS.atrous, do_rnn=FLAGS.rnn_baseline, - do_layer_norm=FLAGS.layer_norm, beam_size=cur_beam_size, backward=back) - if sess is None: - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): - model = make_ngpu(beam_size, True) - if do_beam_model: - tf.get_variable_scope().reuse_variables() - beam_model = make_ngpu(FLAGS.beam_size, False) - else: - model = make_ngpu(beam_size, True) - if do_beam_model: - tf.get_variable_scope().reuse_variables() - beam_model = make_ngpu(FLAGS.beam_size, False) - - sv = None - if sess is None: - # The supervisor configuration has a few overriden options. - sv = tf.train.Supervisor(logdir=checkpoint_dir, - is_chief=(FLAGS.task < 1), - saver=model.saver, - summary_op=None, - save_summaries_secs=60, - save_model_secs=15 * 60, - global_step=model.global_step) - - config = tf.ConfigProto(allow_soft_placement=True) - sess = sv.PrepareSession(FLAGS.master, config=config) - - data.print_out("Created model. Checkpoint dir %s" % checkpoint_dir) - - # Load model from parameters if a checkpoint exists. - ckpt = tf.train.get_checkpoint_state(checkpoint_dir) - if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path + ".index"): - data.print_out("Reading model parameters from %s" - % ckpt.model_checkpoint_path) - model.saver.restore(sess, ckpt.model_checkpoint_path) - elif sv is None: - sess.run(tf.global_variables_initializer()) - data.print_out("Initialized variables (no supervisor mode).") - elif FLAGS.task < 1 and FLAGS.mem_size > 0: - # sess.run(model.mem_norm_op) - data.print_out("Created new model and normalized mem (on chief).") - - # Return the model and needed variables. - return (model, beam_model, min_length, max_length, checkpoint_dir, - (global_train_set, dev_set, en_path, fr_path), sv, sess) - - -def m_step(model, beam_model, sess, batch_size, inp, target, bucket, nsteps, p): - """Evaluation multi-step for program synthesis.""" - state, scores, hist = None, [[-11.0 for _ in xrange(batch_size)]], [] - for _ in xrange(nsteps): - # Get the best beam (no training, just forward model). - new_target, new_first, new_inp, new_scores = get_best_beam( - beam_model, sess, inp, target, - batch_size, FLAGS.beam_size, bucket, hist, p, test_mode=True) - hist.append(new_first) - _, _, _, state = model.step(sess, inp, new_target, False, state=state) - inp = new_inp - scores.append([max(scores[-1][i], new_scores[i]) - for i in xrange(batch_size)]) - # The final step with the true target. - loss, res, _, _ = model.step(sess, inp, target, False, state=state) - return loss, res, new_target, scores[1:] - - -def single_test(bin_id, model, sess, nprint, batch_size, dev, p, print_out=True, - offset=None, beam_model=None): - """Test model on test data of length l using the given session.""" - if not dev[p][bin_id]: - data.print_out(" bin %d (%d)\t%s\tppl NA errors NA seq-errors NA" - % (bin_id, data.bins[bin_id], p)) - return 1.0, 1.0, 0.0 - inpt, target = data.get_batch( - bin_id, batch_size, dev[p], FLAGS.height, offset) - if FLAGS.beam_size > 1 and beam_model: - loss, res, new_tgt, scores = m_step( - model, beam_model, sess, batch_size, inpt, target, bin_id, - FLAGS.eval_beam_steps, p) - score_avgs = [sum(s) / float(len(s)) for s in scores] - score_maxs = [max(s) for s in scores] - score_str = ["(%.2f, %.2f)" % (score_avgs[i], score_maxs[i]) - for i in xrange(FLAGS.eval_beam_steps)] - data.print_out(" == scores (avg, max): %s" % "; ".join(score_str)) - errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, - nprint, new_tgt, scores[-1]) - else: - loss, res, _, _ = model.step(sess, inpt, target, False) - errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, - nprint) - seq_err = float(seq_err) / batch_size - if total > 0: - errors = float(errors) / total - if print_out: - data.print_out(" bin %d (%d)\t%s\tppl %.2f errors %.2f seq-errors %.2f" - % (bin_id, data.bins[bin_id], p, data.safe_exp(loss), - 100 * errors, 100 * seq_err)) - return (errors, seq_err, loss) - - -def assign_vectors(word_vector_file, embedding_key, vocab_path, sess): - """Assign the embedding_key variable from the given word vectors file.""" - # For words in the word vector file, set their embedding at start. - if not tf.gfile.Exists(word_vector_file): - data.print_out("Word vector file does not exist: %s" % word_vector_file) - sys.exit(1) - vocab, _ = wmt.initialize_vocabulary(vocab_path) - vectors_variable = [v for v in tf.trainable_variables() - if embedding_key == v.name] - if len(vectors_variable) != 1: - data.print_out("Word vector variable not found or too many.") - sys.exit(1) - vectors_variable = vectors_variable[0] - vectors = vectors_variable.eval() - data.print_out("Pre-setting word vectors from %s" % word_vector_file) - with tf.gfile.GFile(word_vector_file, mode="r") as f: - # Lines have format: dog 0.045123 -0.61323 0.413667 ... - for line in f: - line_parts = line.split() - # The first part is the word. - word = line_parts[0] - if word in vocab: - # Remaining parts are components of the vector. - word_vector = np.array(map(float, line_parts[1:])) - if len(word_vector) != FLAGS.vec_size: - data.print_out("Warn: Word '%s', Expecting vector size %d, " - "found %d" % (word, FLAGS.vec_size, - len(word_vector))) - else: - vectors[vocab[word]] = word_vector - # Assign the modified vectors to the vectors_variable in the graph. - sess.run([vectors_variable.initializer], - {vectors_variable.initializer.inputs[1]: vectors}) - - -def print_vectors(embedding_key, vocab_path, word_vector_file): - """Print vectors from the given variable.""" - _, rev_vocab = wmt.initialize_vocabulary(vocab_path) - vectors_variable = [v for v in tf.trainable_variables() - if embedding_key == v.name] - if len(vectors_variable) != 1: - data.print_out("Word vector variable not found or too many.") - sys.exit(1) - vectors_variable = vectors_variable[0] - vectors = vectors_variable.eval() - l, s = vectors.shape[0], vectors.shape[1] - data.print_out("Printing %d word vectors from %s to %s." - % (l, embedding_key, word_vector_file)) - with tf.gfile.GFile(word_vector_file, mode="w") as f: - # Lines have format: dog 0.045123 -0.61323 0.413667 ... - for i in xrange(l): - f.write(rev_vocab[i]) - for j in xrange(s): - f.write(" %.8f" % vectors[i][j]) - f.write("\n") - - -def get_bucket_id(train_buckets_scale_c, max_cur_length, data_set): - """Get a random bucket id.""" - # Choose a bucket according to data distribution. Pick a random number - # in [0, 1] and use the corresponding interval in train_buckets_scale. - random_number_01 = np.random.random_sample() - bucket_id = min([i for i in xrange(len(train_buckets_scale_c)) - if train_buckets_scale_c[i] > random_number_01]) - while bucket_id > 0 and not data_set[bucket_id]: - bucket_id -= 1 - for _ in xrange(10 if np.random.random_sample() < 0.9 else 1): - if data.bins[bucket_id] > max_cur_length: - random_number_01 = min(random_number_01, np.random.random_sample()) - bucket_id = min([i for i in xrange(len(train_buckets_scale_c)) - if train_buckets_scale_c[i] > random_number_01]) - while bucket_id > 0 and not data_set[bucket_id]: - bucket_id -= 1 - return bucket_id - - -def score_beams(beams, target, inp, history, p, - print_out=False, test_mode=False): - """Score beams.""" - if p == "progsynth": - return score_beams_prog(beams, target, inp, history, print_out, test_mode) - elif test_mode: - return beams[0], 10.0 if str(beams[0][:len(target)]) == str(target) else 0.0 - else: - history_s = [str(h) for h in history] - best, best_score, tgt, eos_id = None, -1000.0, target, None - if p == "wmt": - eos_id = wmt.EOS_ID - if eos_id and eos_id in target: - tgt = target[:target.index(eos_id)] - for beam in beams: - if eos_id and eos_id in beam: - beam = beam[:beam.index(eos_id)] - l = min(len(tgt), len(beam)) - score = len([i for i in xrange(l) if tgt[i] == beam[i]]) / float(len(tgt)) - hist_score = 20.0 if str([b for b in beam if b > 0]) in history_s else 0.0 - if score < 1.0: - score -= hist_score - if score > best_score: - best = beam - best_score = score - return best, best_score - - -def score_beams_prog(beams, target, inp, history, print_out=False, - test_mode=False): - """Score beams for program synthesis.""" - tgt_prog = linearize(target, program_utils.prog_vocab, True, 1) - hist_progs = [linearize(h, program_utils.prog_vocab, True, 1) - for h in history] - tgt_set = set(target) - if print_out: - print("target: ", tgt_prog) - inps, tgt_outs = [], [] - for i in xrange(3): - ilist = [inp[i + 1, l] for l in xrange(inp.shape[1])] - clist = [program_utils.prog_vocab[x] for x in ilist if x > 0] - olist = clist[clist.index("]") + 1:] # outputs - clist = clist[1:clist.index("]")] # inputs - inps.append([int(x) for x in clist]) - if olist[0] == "[": # olist may be [int] or just int - tgt_outs.append(str([int(x) for x in olist[1:-1]])) - else: - if len(olist) == 1: - tgt_outs.append(olist[0]) - else: - print([program_utils.prog_vocab[x] for x in ilist if x > 0]) - print(olist) - print(tgt_prog) - print(program_utils.evaluate(tgt_prog, {"a": inps[-1]})) - print("AAAAA") - tgt_outs.append(olist[0]) - if not test_mode: - for _ in xrange(7): - ilen = np.random.randint(len(target) - 3) + 1 - inps.append([random.choice(range(-15, 15)) for _ in range(ilen)]) - tgt_outs.extend([program_utils.evaluate(tgt_prog, {"a": inp}) - for inp in inps[3:]]) - best, best_prog, best_score = None, "", -1000.0 - for beam in beams: - b_prog = linearize(beam, program_utils.prog_vocab, True, 1) - b_set = set(beam) - jsim = len(tgt_set & b_set) / float(len(tgt_set | b_set)) - b_outs = [program_utils.evaluate(b_prog, {"a": inp}) for inp in inps] - errs = len([x for x in b_outs if x == "ERROR"]) - imatches = len([i for i in xrange(3) if b_outs[i] == tgt_outs[i]]) - perfect = 10.0 if imatches == 3 else 0.0 - hist_score = 20.0 if b_prog in hist_progs else 0.0 - if test_mode: - score = perfect - errs - else: - matches = len([i for i in xrange(10) if b_outs[i] == tgt_outs[i]]) - score = perfect + matches + jsim - errs - if score < 10.0: - score -= hist_score - # print b_prog - # print "jsim: ", jsim, " errs: ", errs, " mtchs: ", matches, " s: ", score - if score > best_score: - best = beam - best_prog = b_prog - best_score = score - if print_out: - print("best score: ", best_score, " best prog: ", best_prog) - return best, best_score - - -def get_best_beam(beam_model, sess, inp, target, batch_size, beam_size, - bucket, history, p, test_mode=False): - """Run beam_model, score beams, and return the best as target and in input.""" - _, output_logits, _, _ = beam_model.step( - sess, inp, target, None, beam_size=FLAGS.beam_size) - new_targets, new_firsts, scores, new_inp = [], [], [], np.copy(inp) - for b in xrange(batch_size): - outputs = [] - history_b = [[h[b, 0, l] for l in xrange(data.bins[bucket])] - for h in history] - for beam_idx in xrange(beam_size): - outputs.append([int(o[beam_idx * batch_size + b]) - for o in output_logits]) - target_t = [target[b, 0, l] for l in xrange(data.bins[bucket])] - best, best_score = score_beams( - outputs, [t for t in target_t if t > 0], inp[b, :, :], - [[t for t in h if t > 0] for h in history_b], p, test_mode=test_mode) - scores.append(best_score) - if 1 in best: # Only until _EOS. - best = best[:best.index(1) + 1] - best += [0 for _ in xrange(len(target_t) - len(best))] - new_targets.append([best]) - first, _ = score_beams( - outputs, [t for t in target_t if t > 0], inp[b, :, :], - [[t for t in h if t > 0] for h in history_b], p, test_mode=True) - if 1 in first: # Only until _EOS. - first = first[:first.index(1) + 1] - first += [0 for _ in xrange(len(target_t) - len(first))] - new_inp[b, 0, :] = np.array(first, dtype=np.int32) - new_firsts.append([first]) - # Change target if we found a great answer. - new_target = np.array(new_targets, dtype=np.int32) - for b in xrange(batch_size): - if scores[b] >= 10.0: - target[b, 0, :] = new_target[b, 0, :] - new_first = np.array(new_firsts, dtype=np.int32) - return new_target, new_first, new_inp, scores - - -def train(): - """Train the model.""" - batch_size = FLAGS.batch_size * FLAGS.num_gpus - (model, beam_model, min_length, max_length, checkpoint_dir, - (train_set, dev_set, en_vocab_path, fr_vocab_path), sv, sess) = initialize() - with sess.as_default(): - quant_op = model.quantize_op - max_cur_length = min(min_length + 3, max_length) - prev_acc_perp = [1000000 for _ in xrange(5)] - prev_seq_err = 1.0 - is_chief = FLAGS.task < 1 - do_report = False - - # Main traning loop. - while not sv.ShouldStop(): - global_step, max_cur_length, learning_rate = sess.run( - [model.global_step, model.cur_length, model.lr]) - acc_loss, acc_l1, acc_total, acc_errors, acc_seq_err = 0.0, 0.0, 0, 0, 0 - acc_grad_norm, step_count, step_c1, step_time = 0.0, 0, 0, 0.0 - - # For words in the word vector file, set their embedding at start. - bound1 = FLAGS.steps_per_checkpoint - 1 - if FLAGS.word_vector_file_en and global_step < bound1 and is_chief: - assign_vectors(FLAGS.word_vector_file_en, "embedding:0", - en_vocab_path, sess) - if FLAGS.max_target_vocab < 1: - assign_vectors(FLAGS.word_vector_file_en, "target_embedding:0", - en_vocab_path, sess) - - if FLAGS.word_vector_file_fr and global_step < bound1 and is_chief: - assign_vectors(FLAGS.word_vector_file_fr, "embedding:0", - fr_vocab_path, sess) - if FLAGS.max_target_vocab < 1: - assign_vectors(FLAGS.word_vector_file_fr, "target_embedding:0", - fr_vocab_path, sess) - - for _ in xrange(FLAGS.steps_per_checkpoint): - step_count += 1 - step_c1 += 1 - global_step = int(model.global_step.eval()) - train_beam_anneal = global_step / float(FLAGS.train_beam_anneal) - train_beam_freq = FLAGS.train_beam_freq * min(1.0, train_beam_anneal) - p = random.choice(FLAGS.problem.split("-")) - train_set = global_train_set[p][-1] - bucket_id = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, - train_set) - # Prefer longer stuff 60% of time if not wmt. - if np.random.randint(100) < 60 and FLAGS.problem != "wmt": - bucket1 = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, - train_set) - bucket_id = max(bucket1, bucket_id) - - # Run a step and time it. - start_time = time.time() - inp, target = data.get_batch(bucket_id, batch_size, train_set, - FLAGS.height) - noise_param = math.sqrt(math.pow(global_step + 1, -0.55) * - prev_seq_err) * FLAGS.grad_noise_scale - # In multi-step mode, we use best from beam for middle steps. - state, new_target, scores, history = None, None, None, [] - while (FLAGS.beam_size > 1 and - train_beam_freq > np.random.random_sample()): - # Get the best beam (no training, just forward model). - new_target, new_first, new_inp, scores = get_best_beam( - beam_model, sess, inp, target, - batch_size, FLAGS.beam_size, bucket_id, history, p) - history.append(new_first) - # Training step with the previous input and the best beam as target. - _, _, _, state = model.step(sess, inp, new_target, FLAGS.do_train, - noise_param, update_mem=True, state=state) - # Change input to the new one for the next step. - inp = new_inp - # If all results are great, stop (todo: not to wait for all?). - if FLAGS.nprint > 1: - print(scores) - if sum(scores) / float(len(scores)) >= 10.0: - break - # The final step with the true target. - loss, res, gnorm, _ = model.step( - sess, inp, target, FLAGS.do_train, noise_param, - update_mem=True, state=state) - step_time += time.time() - start_time - acc_grad_norm += 0.0 if gnorm is None else float(gnorm) - - # Accumulate statistics. - acc_loss += loss - acc_l1 += loss - errors, total, seq_err = data.accuracy( - inp, res, target, batch_size, 0, new_target, scores) - if FLAGS.nprint > 1: - print("seq_err: ", seq_err) - acc_total += total - acc_errors += errors - acc_seq_err += seq_err - - # Report summary every 10 steps. - if step_count + 3 > FLAGS.steps_per_checkpoint: - do_report = True # Don't polute plot too early. - if is_chief and step_count % 10 == 1 and do_report: - cur_loss = acc_l1 / float(step_c1) - acc_l1, step_c1 = 0.0, 0 - cur_perp = data.safe_exp(cur_loss) - summary = tf.Summary() - summary.value.extend( - [tf.Summary.Value(tag="log_perplexity", simple_value=cur_loss), - tf.Summary.Value(tag="perplexity", simple_value=cur_perp)]) - sv.SummaryComputed(sess, summary, global_step) - - # Normalize and print out accumulated statistics. - acc_loss /= step_count - step_time /= FLAGS.steps_per_checkpoint - acc_seq_err = float(acc_seq_err) / (step_count * batch_size) - prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. - acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0 - t_size = float(sum([len(x) for x in train_set])) / float(1000000) - msg = ("step %d step-time %.2f train-size %.3f lr %.6f grad-norm %.4f" - % (global_step + 1, step_time, t_size, learning_rate, - acc_grad_norm / FLAGS.steps_per_checkpoint)) - data.print_out("%s len %d ppl %.6f errors %.2f sequence-errors %.2f" % - (msg, max_cur_length, data.safe_exp(acc_loss), - 100*acc_errors, 100*acc_seq_err)) - - # If errors are below the curriculum threshold, move curriculum forward. - is_good = FLAGS.curriculum_ppx > data.safe_exp(acc_loss) - is_good = is_good and FLAGS.curriculum_seq > acc_seq_err - if is_good and is_chief: - if FLAGS.quantize: - # Quantize weights. - data.print_out(" Quantizing parameters.") - sess.run([quant_op]) - # Increase current length (until the next with training data). - sess.run(model.cur_length_incr_op) - # Forget last perplexities if we're not yet at the end. - if max_cur_length < max_length: - prev_acc_perp.append(1000000) - - # Lower learning rate if we're worse than the last 5 checkpoints. - acc_perp = data.safe_exp(acc_loss) - if acc_perp > max(prev_acc_perp[-5:]) and is_chief: - sess.run(model.lr_decay_op) - prev_acc_perp.append(acc_perp) - - # Save checkpoint. - if is_chief: - checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") - model.saver.save(sess, checkpoint_path, - global_step=model.global_step) - - # Run evaluation. - bin_bound = 4 - for p in FLAGS.problem.split("-"): - total_loss, total_err, tl_counter = 0.0, 0.0, 0 - for bin_id in xrange(len(data.bins)): - if bin_id < bin_bound or bin_id % FLAGS.eval_bin_print == 1: - err, _, loss = single_test(bin_id, model, sess, FLAGS.nprint, - batch_size * 4, dev_set, p, - beam_model=beam_model) - if loss > 0.0: - total_loss += loss - total_err += err - tl_counter += 1 - test_loss = total_loss / max(1, tl_counter) - test_err = total_err / max(1, tl_counter) - test_perp = data.safe_exp(test_loss) - summary = tf.Summary() - summary.value.extend( - [tf.Summary.Value(tag="test/%s/loss" % p, simple_value=test_loss), - tf.Summary.Value(tag="test/%s/error" % p, simple_value=test_err), - tf.Summary.Value(tag="test/%s/perplexity" % p, - simple_value=test_perp)]) - sv.SummaryComputed(sess, summary, global_step) - - -def linearize(output, rev_fr_vocab, simple_tokenizer=None, eos_id=wmt.EOS_ID): - # If there is an EOS symbol in outputs, cut them at that point (WMT). - if eos_id in output: - output = output[:output.index(eos_id)] - # Print out French sentence corresponding to outputs. - if simple_tokenizer or FLAGS.simple_tokenizer: - vlen = len(rev_fr_vocab) - def vget(o): - if o < vlen: - return rev_fr_vocab[o] - return "UNK" - return " ".join([vget(o) for o in output]) - else: - return wmt.basic_detokenizer([rev_fr_vocab[o] for o in output]) - - -def evaluate(): - """Evaluate an existing model.""" - batch_size = FLAGS.batch_size * FLAGS.num_gpus - with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: - (model, beam_model, _, _, _, - (_, dev_set, en_vocab_path, fr_vocab_path), _, sess) = initialize(sess) - for p in FLAGS.problem.split("-"): - for bin_id in xrange(len(data.bins)): - if (FLAGS.task >= 0 and bin_id > 4) or (FLAGS.nprint == 0 and - bin_id > 8 and p == "wmt"): - break - single_test(bin_id, model, sess, FLAGS.nprint, batch_size, dev_set, p, - beam_model=beam_model) - path = FLAGS.test_file_prefix - xid = "" if FLAGS.task < 0 else ("%.4d" % (FLAGS.task+FLAGS.decode_offset)) - en_path, fr_path = path + ".en" + xid, path + ".fr" + xid - # Evaluate the test file if they exist. - if path and tf.gfile.Exists(en_path) and tf.gfile.Exists(fr_path): - data.print_out("Translating test set %s" % en_path) - # Read lines. - en_lines, fr_lines = [], [] - with tf.gfile.GFile(en_path, mode="r") as f: - for line in f: - en_lines.append(line.strip()) - with tf.gfile.GFile(fr_path, mode="r") as f: - for line in f: - fr_lines.append(line.strip()) - # Tokenize and convert to ids. - en_vocab, _ = wmt.initialize_vocabulary(en_vocab_path) - _, rev_fr_vocab = wmt.initialize_vocabulary(fr_vocab_path) - if FLAGS.simple_tokenizer: - en_ids = [wmt.sentence_to_token_ids( - l, en_vocab, tokenizer=wmt.space_tokenizer, - normalize_digits=FLAGS.normalize_digits) - for l in en_lines] - else: - en_ids = [wmt.sentence_to_token_ids(l, en_vocab) for l in en_lines] - # Translate. - results = [] - for idx, token_ids in enumerate(en_ids): - if idx % 5 == 0: - data.print_out("Translating example %d of %d." % (idx, len(en_ids))) - # Which bucket does it belong to? - buckets = [b for b in xrange(len(data.bins)) - if data.bins[b] >= len(token_ids)] - if buckets: - result, result_cost = [], 100000000.0 - for bucket_id in buckets: - if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR: - break - # Get a 1-element batch to feed the sentence to the model. - used_batch_size = 1 # batch_size - inp, target = data.get_batch( - bucket_id, used_batch_size, None, FLAGS.height, - preset=([token_ids], [[]])) - loss, output_logits, _, _ = model.step( - sess, inp, target, None, beam_size=FLAGS.beam_size) - outputs = [int(o[0]) for o in output_logits] - loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm) - if FLAGS.simple_tokenizer: - cur_out = outputs - if wmt.EOS_ID in cur_out: - cur_out = cur_out[:cur_out.index(wmt.EOS_ID)] - res_tags = [rev_fr_vocab[o] for o in cur_out] - bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags) - loss += 1000.0 * bad_words + 100.0 * bad_brack - # print (bucket_id, loss) - if loss < result_cost: - result = outputs - result_cost = loss - final = linearize(result, rev_fr_vocab) - results.append("%s\t%s\n" % (final, fr_lines[idx])) - # print result_cost - sys.stderr.write(results[-1]) - sys.stderr.flush() - else: - sys.stderr.write("TOOO_LONG\t%s\n" % fr_lines[idx]) - sys.stderr.flush() - if xid: - decode_suffix = "beam%dln%dn" % (FLAGS.beam_size, - int(100 * FLAGS.length_norm)) - with tf.gfile.GFile(path + ".res" + decode_suffix + xid, mode="w") as f: - for line in results: - f.write(line) - - -def mul(l): - res = 1.0 - for s in l: - res *= s - return res - - -def interactive(): - """Interactively probe an existing model.""" - with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: - # Initialize model. - (model, _, _, _, _, (_, _, en_path, fr_path), _, _) = initialize(sess) - # Load vocabularies. - en_vocab, rev_en_vocab = wmt.initialize_vocabulary(en_path) - _, rev_fr_vocab = wmt.initialize_vocabulary(fr_path) - # Print out vectors and variables. - if FLAGS.nprint > 0 and FLAGS.word_vector_file_en: - print_vectors("embedding:0", en_path, FLAGS.word_vector_file_en) - if FLAGS.nprint > 0 and FLAGS.word_vector_file_fr: - print_vectors("target_embedding:0", fr_path, FLAGS.word_vector_file_fr) - total = 0 - for v in tf.trainable_variables(): - shape = v.get_shape().as_list() - total += mul(shape) - print(v.name, shape, mul(shape)) - print(total) - # Start interactive loop. - sys.stdout.write("Input to Neural GPU Translation Model.\n") - sys.stdout.write("> ") - sys.stdout.flush() - inpt = sys.stdin.readline(), "" - while inpt: - cures = [] - # Get token-ids for the input sentence. - if FLAGS.simple_tokenizer: - token_ids = wmt.sentence_to_token_ids( - inpt, en_vocab, tokenizer=wmt.space_tokenizer, - normalize_digits=FLAGS.normalize_digits) - else: - token_ids = wmt.sentence_to_token_ids(inpt, en_vocab) - print([rev_en_vocab[t] for t in token_ids]) - # Which bucket does it belong to? - buckets = [b for b in xrange(len(data.bins)) - if data.bins[b] >= max(len(token_ids), len(cures))] - if cures: - buckets = [buckets[0]] - if buckets: - result, result_cost = [], 10000000.0 - for bucket_id in buckets: - if data.bins[bucket_id] > MAXLEN_F * len(token_ids) + EVAL_LEN_INCR: - break - glen = 1 - for gen_idx in xrange(glen): - # Get a 1-element batch to feed the sentence to the model. - inp, target = data.get_batch( - bucket_id, 1, None, FLAGS.height, preset=([token_ids], [cures])) - loss, output_logits, _, _ = model.step( - sess, inp, target, None, beam_size=FLAGS.beam_size, - update_mem=False) - # If it is a greedy decoder, outputs are argmaxes of output_logits. - if FLAGS.beam_size > 1: - outputs = [int(o) for o in output_logits] - else: - loss = loss[0] - (data.bins[bucket_id] * FLAGS.length_norm) - outputs = [int(np.argmax(logit, axis=1)) - for logit in output_logits] - print([rev_fr_vocab[t] for t in outputs]) - print(loss, data.bins[bucket_id]) - print(linearize(outputs, rev_fr_vocab)) - cures.append(outputs[gen_idx]) - print(cures) - print(linearize(cures, rev_fr_vocab)) - if FLAGS.simple_tokenizer: - cur_out = outputs - if wmt.EOS_ID in cur_out: - cur_out = cur_out[:cur_out.index(wmt.EOS_ID)] - res_tags = [rev_fr_vocab[o] for o in cur_out] - bad_words, bad_brack = wmt.parse_constraints(token_ids, res_tags) - loss += 1000.0 * bad_words + 100.0 * bad_brack - if loss < result_cost: - result = outputs - result_cost = loss - print("FINAL", result_cost) - print([rev_fr_vocab[t] for t in result]) - print(linearize(result, rev_fr_vocab)) - else: - print("TOOO_LONG") - sys.stdout.write("> ") - sys.stdout.flush() - inpt = sys.stdin.readline(), "" - - -def main(_): - if FLAGS.mode == 0: - train() - elif FLAGS.mode == 1: - evaluate() - else: - interactive() - -if __name__ == "__main__": - tf.app.run() diff --git a/research/neural_gpu/program_utils.py b/research/neural_gpu/program_utils.py deleted file mode 100644 index 1f49d01292012487c4a01a5832fb044a378645ff..0000000000000000000000000000000000000000 --- a/research/neural_gpu/program_utils.py +++ /dev/null @@ -1,444 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for generating program synthesis and evaluation data.""" - -import contextlib -import sys -import random -import os - -try: - import StringIO -except ImportError: - from io import StringIO - -class ListType(object): - def __init__(self, arg): - self.arg = arg - - def __str__(self): - return "[" + str(self.arg) + "]" - - def __eq__(self, other): - if not isinstance(other, ListType): - return False - return self.arg == other.arg - - def __hash__(self): - return hash(self.arg) - -class VarType(object): - def __init__(self, arg): - self.arg = arg - - def __str__(self): - return str(self.arg) - - def __eq__(self, other): - if not isinstance(other, VarType): - return False - return self.arg == other.arg - - def __hash__(self): - return hash(self.arg) - -class FunctionType(object): - def __init__(self, args): - self.args = args - - def __str__(self): - return str(self.args[0]) + " -> " + str(self.args[1]) - - def __eq__(self, other): - if not isinstance(other, FunctionType): - return False - return self.args == other.args - - def __hash__(self): - return hash(tuple(self.args)) - - -class Function(object): - def __init__(self, name, arg_types, output_type, fn_arg_types = None): - self.name = name - self.arg_types = arg_types - self.fn_arg_types = fn_arg_types or [] - self.output_type = output_type - -Null = 100 -## Functions -f_head = Function("c_head", [ListType("Int")], "Int") -def c_head(xs): return xs[0] if len(xs) > 0 else Null - -f_last = Function("c_last", [ListType("Int")], "Int") -def c_last(xs): return xs[-1] if len(xs) > 0 else Null - -f_take = Function("c_take", ["Int", ListType("Int")], ListType("Int")) -def c_take(n, xs): return xs[:n] - -f_drop = Function("c_drop", ["Int", ListType("Int")], ListType("Int")) -def c_drop(n, xs): return xs[n:] - -f_access = Function("c_access", ["Int", ListType("Int")], "Int") -def c_access(n, xs): return xs[n] if n >= 0 and len(xs) > n else Null - -f_max = Function("c_max", [ListType("Int")], "Int") -def c_max(xs): return max(xs) if len(xs) > 0 else Null - -f_min = Function("c_min", [ListType("Int")], "Int") -def c_min(xs): return min(xs) if len(xs) > 0 else Null - -f_reverse = Function("c_reverse", [ListType("Int")], ListType("Int")) -def c_reverse(xs): return list(reversed(xs)) - -f_sort = Function("sorted", [ListType("Int")], ListType("Int")) -# def c_sort(xs): return sorted(xs) - -f_sum = Function("sum", [ListType("Int")], "Int") -# def c_sum(xs): return sum(xs) - - -## Lambdas -# Int -> Int -def plus_one(x): return x + 1 -def minus_one(x): return x - 1 -def times_two(x): return x * 2 -def neg(x): return x * (-1) -def div_two(x): return int(x/2) -def sq(x): return x**2 -def times_three(x): return x * 3 -def div_three(x): return int(x/3) -def times_four(x): return x * 4 -def div_four(x): return int(x/4) - -# Int -> Bool -def pos(x): return x > 0 -def neg(x): return x < 0 -def even(x): return x%2 == 0 -def odd(x): return x%2 == 1 - -# Int -> Int -> Int -def add(x, y): return x + y -def sub(x, y): return x - y -def mul(x, y): return x * y - -# HOFs -f_map = Function("map", [ListType("Int")], - ListType("Int"), - [FunctionType(["Int", "Int"])]) -f_filter = Function("filter", [ListType("Int")], - ListType("Int"), - [FunctionType(["Int", "Bool"])]) -f_count = Function("c_count", [ListType("Int")], - "Int", - [FunctionType(["Int", "Bool"])]) -def c_count(f, xs): return len([x for x in xs if f(x)]) - -f_zipwith = Function("c_zipwith", [ListType("Int"), ListType("Int")], - ListType("Int"), - [FunctionType(["Int", "Int", "Int"])]) #FIX -def c_zipwith(f, xs, ys): return [f(x, y) for (x, y) in zip(xs, ys)] - -f_scan = Function("c_scan", [ListType("Int")], - ListType("Int"), - [FunctionType(["Int", "Int", "Int"])]) -def c_scan(f, xs): - out = xs - for i in range(1, len(xs)): - out[i] = f(xs[i], xs[i -1]) - return out - -@contextlib.contextmanager -def stdoutIO(stdout=None): - old = sys.stdout - if stdout is None: - stdout = StringIO.StringIO() - sys.stdout = stdout - yield stdout - sys.stdout = old - - -def evaluate(program_str, input_names_to_vals, default="ERROR"): - exec_str = [] - for name, val in input_names_to_vals.iteritems(): - exec_str += name + " = " + str(val) + "; " - exec_str += program_str - if type(exec_str) is list: - exec_str = "".join(exec_str) - - with stdoutIO() as s: - # pylint: disable=bare-except - try: - exec(exec_str + " print(out)") - return s.getvalue()[:-1] - except: - return default - # pylint: enable=bare-except - - -class Statement(object): - """Statement class.""" - - def __init__(self, fn, output_var, arg_vars, fn_args=None): - self.fn = fn - self.output_var = output_var - self.arg_vars = arg_vars - self.fn_args = fn_args or [] - - def __str__(self): - return "%s = %s(%s%s%s)"%(self.output_var, - self.fn.name, - ", ".join(self.fn_args), - ", " if self.fn_args else "", - ", ".join(self.arg_vars)) - - def substitute(self, env): - self.output_var = env.get(self.output_var, self.output_var) - self.arg_vars = [env.get(v, v) for v in self.arg_vars] - - -class ProgramGrower(object): - """Grow programs.""" - - def __init__(self, functions, types_to_lambdas): - self.functions = functions - self.types_to_lambdas = types_to_lambdas - - def grow_body(self, new_var_name, dependencies, types_to_vars): - """Grow the program body.""" - choices = [] - for f in self.functions: - if all([a in types_to_vars.keys() for a in f.arg_types]): - choices.append(f) - - f = random.choice(choices) - args = [] - for t in f.arg_types: - possible_vars = random.choice(types_to_vars[t]) - var = random.choice(possible_vars) - args.append(var) - dependencies.setdefault(new_var_name, []).extend( - [var] + (dependencies[var])) - - fn_args = [random.choice(self.types_to_lambdas[t]) for t in f.fn_arg_types] - types_to_vars.setdefault(f.output_type, []).append(new_var_name) - - return Statement(f, new_var_name, args, fn_args) - - def grow(self, program_len, input_types): - """Grow the program.""" - var_names = list(reversed(map(chr, range(97, 123)))) - dependencies = dict() - types_to_vars = dict() - input_names = [] - for t in input_types: - var = var_names.pop() - dependencies[var] = [] - types_to_vars.setdefault(t, []).append(var) - input_names.append(var) - - statements = [] - for _ in range(program_len - 1): - var = var_names.pop() - statements.append(self.grow_body(var, dependencies, types_to_vars)) - statements.append(self.grow_body("out", dependencies, types_to_vars)) - - new_var_names = [c for c in map(chr, range(97, 123)) - if c not in input_names] - new_var_names.reverse() - keep_statements = [] - env = dict() - for s in statements: - if s.output_var in dependencies["out"]: - keep_statements.append(s) - env[s.output_var] = new_var_names.pop() - if s.output_var == "out": - keep_statements.append(s) - - for k in keep_statements: - k.substitute(env) - - return Program(input_names, input_types, ";".join( - [str(k) for k in keep_statements])) - - -class Program(object): - """The program class.""" - - def __init__(self, input_names, input_types, body): - self.input_names = input_names - self.input_types = input_types - self.body = body - - def evaluate(self, inputs): - """Evaluate this program.""" - if len(inputs) != len(self.input_names): - raise AssertionError("inputs and input_names have to" - "have the same len. inp: %s , names: %s" % - (str(inputs), str(self.input_names))) - inp_str = "" - for (name, inp) in zip(self.input_names, inputs): - inp_str += name + " = " + str(inp) + "; " - - with stdoutIO() as s: - # pylint: disable=exec-used - exec(inp_str + self.body + "; print(out)") - # pylint: enable=exec-used - return s.getvalue()[:-1] - - def flat_str(self): - out = "" - for s in self.body.split(";"): - out += s + ";" - return out - - def __str__(self): - out = "" - for (n, t) in zip(self.input_names, self.input_types): - out += n + " = " + str(t) + "\n" - for s in self.body.split(";"): - out += s + "\n" - return out - - -prog_vocab = [] -prog_rev_vocab = {} - - -def tokenize(string, tokens=None): - """Tokenize the program string.""" - if tokens is None: - tokens = prog_vocab - tokens = sorted(tokens, key=len, reverse=True) - out = [] - string = string.strip() - while string: - found = False - for t in tokens: - if string.startswith(t): - out.append(t) - string = string[len(t):] - found = True - break - if not found: - raise ValueError("Couldn't tokenize this: " + string) - string = string.strip() - return out - - -def clean_up(output, max_val=100): - o = eval(str(output)) - if isinstance(o, bool): - return o - if isinstance(o, int): - if o >= 0: - return min(o, max_val) - else: - return max(o, -1 * max_val) - if isinstance(o, list): - return [clean_up(l) for l in o] - - -def make_vocab(): - gen(2, 0) - - -def gen(max_len, how_many): - """Generate some programs.""" - functions = [f_head, f_last, f_take, f_drop, f_access, f_max, f_min, - f_reverse, f_sort, f_sum, f_map, f_filter, f_count, f_zipwith, - f_scan] - - types_to_lambdas = { - FunctionType(["Int", "Int"]): ["plus_one", "minus_one", "times_two", - "div_two", "sq", "times_three", - "div_three", "times_four", "div_four"], - FunctionType(["Int", "Bool"]): ["pos", "neg", "even", "odd"], - FunctionType(["Int", "Int", "Int"]): ["add", "sub", "mul"] - } - - tokens = [] - for f in functions: - tokens.append(f.name) - for v in types_to_lambdas.values(): - tokens.extend(v) - tokens.extend(["=", ";", ",", "(", ")", "[", "]", "Int", "out"]) - tokens.extend(map(chr, range(97, 123))) - - io_tokens = map(str, range(-220, 220)) - if not prog_vocab: - prog_vocab.extend(["_PAD", "_EOS"] + tokens + io_tokens) - for i, t in enumerate(prog_vocab): - prog_rev_vocab[t] = i - - io_tokens += [",", "[", "]", ")", "(", "None"] - grower = ProgramGrower(functions=functions, - types_to_lambdas=types_to_lambdas) - - def mk_inp(l): - return [random.choice(range(-5, 5)) for _ in range(l)] - - tar = [ListType("Int")] - inps = [[mk_inp(3)], [mk_inp(5)], [mk_inp(7)], [mk_inp(15)]] - - save_prefix = None - outcomes_to_programs = dict() - tried = set() - counter = 0 - choices = [0] if max_len == 0 else range(max_len) - while counter < 100 * how_many and len(outcomes_to_programs) < how_many: - counter += 1 - length = random.choice(choices) - t = grower.grow(length, tar) - while t in tried: - length = random.choice(choices) - t = grower.grow(length, tar) - # print(t.flat_str()) - tried.add(t) - outcomes = [clean_up(t.evaluate(i)) for i in inps] - outcome_str = str(zip(inps, outcomes)) - if outcome_str in outcomes_to_programs: - outcomes_to_programs[outcome_str] = min( - [t.flat_str(), outcomes_to_programs[outcome_str]], - key=lambda x: len(tokenize(x, tokens))) - else: - outcomes_to_programs[outcome_str] = t.flat_str() - if counter % 5000 == 0: - print("== proggen: tried: " + str(counter)) - print("== proggen: kept: " + str(len(outcomes_to_programs))) - - if counter % 250000 == 0 and save_prefix is not None: - print("saving...") - save_counter = 0 - progfilename = os.path.join(save_prefix, "prog_" + str(counter) + ".txt") - iofilename = os.path.join(save_prefix, "io_" + str(counter) + ".txt") - prog_token_filename = os.path.join(save_prefix, - "prog_tokens_" + str(counter) + ".txt") - io_token_filename = os.path.join(save_prefix, - "io_tokens_" + str(counter) + ".txt") - with open(progfilename, "a+") as fp, \ - open(iofilename, "a+") as fi, \ - open(prog_token_filename, "a+") as ftp, \ - open(io_token_filename, "a+") as fti: - for (o, p) in outcomes_to_programs.iteritems(): - save_counter += 1 - if save_counter % 500 == 0: - print("saving %d of %d" % (save_counter, len(outcomes_to_programs))) - fp.write(p+"\n") - fi.write(o+"\n") - ftp.write(str(tokenize(p, tokens))+"\n") - fti.write(str(tokenize(o, io_tokens))+"\n") - - return list(outcomes_to_programs.values()) diff --git a/research/neural_gpu/wmt_utils.py b/research/neural_gpu/wmt_utils.py deleted file mode 100644 index ef831918f9c9279eb1c6e560e5730739e5fe9521..0000000000000000000000000000000000000000 --- a/research/neural_gpu/wmt_utils.py +++ /dev/null @@ -1,437 +0,0 @@ -# Copyright 2015 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for downloading data from WMT, tokenizing, vocabularies.""" - -from __future__ import print_function - -import gzip -import os -import re -import tarfile - -from six.moves import urllib -import tensorflow as tf - -# Special vocabulary symbols - we always put them at the start. -_PAD = b"_PAD" -_GO = b"_GO" -_EOS = b"_EOS" -_UNK = b"_CHAR_UNK" -_SPACE = b"_SPACE" -_START_VOCAB = [_PAD, _GO, _EOS, _UNK, _SPACE] - -PAD_ID = 0 -GO_ID = 1 -EOS_ID = 2 -UNK_ID = 3 -SPACE_ID = 4 - -# Regular expressions used to tokenize. -_CHAR_MARKER = "_CHAR_" -_CHAR_MARKER_LEN = len(_CHAR_MARKER) -_SPEC_CHARS = "" + chr(226) + chr(153) + chr(128) -_PUNCTUATION = "][.,!?\"':;%$#@&*+}{|><=/^~)(_`,0123456789" + _SPEC_CHARS + "-" -_WORD_SPLIT = re.compile("([" + _PUNCTUATION + "])") -_OLD_WORD_SPLIT = re.compile(b"([.,!?\"':;)(])") -_DIGIT_RE = re.compile(br"\d") - -# URLs for WMT data. -_WMT_ENFR_TRAIN_URL = "http://www.statmt.org/wmt10/training-giga-fren.tar" -_WMT_ENFR_DEV_URL = "http://www.statmt.org/wmt15/dev-v2.tgz" - - -def maybe_download(directory, filename, url): - """Download filename from url unless it's already in directory.""" - if not tf.gfile.Exists(directory): - print("Creating directory %s" % directory) - os.mkdir(directory) - filepath = os.path.join(directory, filename) - if not tf.gfile.Exists(filepath): - print("Downloading %s to %s" % (url, filepath)) - filepath, _ = urllib.request.urlretrieve(url, filepath) - statinfo = os.stat(filepath) - print("Successfully downloaded", filename, statinfo.st_size, "bytes") - return filepath - - -def gunzip_file(gz_path, new_path): - """Unzips from gz_path into new_path.""" - print("Unpacking %s to %s" % (gz_path, new_path)) - with gzip.open(gz_path, "rb") as gz_file: - with open(new_path, "wb") as new_file: - for line in gz_file: - new_file.write(line) - - -def get_wmt_enfr_train_set(directory): - """Download the WMT en-fr training corpus to directory unless it's there.""" - train_path = os.path.join(directory, "giga-fren.release2.fixed") - if not (tf.gfile.Exists(train_path +".fr") and - tf.gfile.Exists(train_path +".en")): - corpus_file = maybe_download(directory, "training-giga-fren.tar", - _WMT_ENFR_TRAIN_URL) - print("Extracting tar file %s" % corpus_file) - with tarfile.open(corpus_file, "r") as corpus_tar: - corpus_tar.extractall(directory) - gunzip_file(train_path + ".fr.gz", train_path + ".fr") - gunzip_file(train_path + ".en.gz", train_path + ".en") - return train_path - - -def get_wmt_enfr_dev_set(directory): - """Download the WMT en-fr training corpus to directory unless it's there.""" - dev_name = "newstest2013" - dev_path = os.path.join(directory, dev_name) - if not (tf.gfile.Exists(dev_path + ".fr") and - tf.gfile.Exists(dev_path + ".en")): - dev_file = maybe_download(directory, "dev-v2.tgz", _WMT_ENFR_DEV_URL) - print("Extracting tgz file %s" % dev_file) - with tarfile.open(dev_file, "r:gz") as dev_tar: - fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr") - en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en") - fr_dev_file.name = dev_name + ".fr" # Extract without "dev/" prefix. - en_dev_file.name = dev_name + ".en" - dev_tar.extract(fr_dev_file, directory) - dev_tar.extract(en_dev_file, directory) - return dev_path - - -def is_char(token): - if len(token) > _CHAR_MARKER_LEN: - if token[:_CHAR_MARKER_LEN] == _CHAR_MARKER: - return True - return False - - -def basic_detokenizer(tokens): - """Reverse the process of the basic tokenizer below.""" - result = [] - previous_nospace = True - for t in tokens: - if is_char(t): - result.append(t[_CHAR_MARKER_LEN:]) - previous_nospace = True - elif t == _SPACE: - result.append(" ") - previous_nospace = True - elif previous_nospace: - result.append(t) - previous_nospace = False - else: - result.extend([" ", t]) - previous_nospace = False - return "".join(result) - - -old_style = False - - -def basic_tokenizer(sentence): - """Very basic tokenizer: split the sentence into a list of tokens.""" - words = [] - if old_style: - for space_separated_fragment in sentence.strip().split(): - words.extend(re.split(_OLD_WORD_SPLIT, space_separated_fragment)) - return [w for w in words if w] - for space_separated_fragment in sentence.strip().split(): - tokens = [t for t in re.split(_WORD_SPLIT, space_separated_fragment) if t] - first_is_char = False - for i, t in enumerate(tokens): - if len(t) == 1 and t in _PUNCTUATION: - tokens[i] = _CHAR_MARKER + t - if i == 0: - first_is_char = True - if words and words[-1] != _SPACE and (first_is_char or is_char(words[-1])): - tokens = [_SPACE] + tokens - spaced_tokens = [] - for i, tok in enumerate(tokens): - spaced_tokens.append(tokens[i]) - if i < len(tokens) - 1: - if tok != _SPACE and not (is_char(tok) or is_char(tokens[i+1])): - spaced_tokens.append(_SPACE) - words.extend(spaced_tokens) - return words - - -def space_tokenizer(sentence): - return sentence.strip().split() - - -def is_pos_tag(token): - """Check if token is a part-of-speech tag.""" - return(token in ["CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR", - "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", - "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", - "UH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", - "WP$", "WRB", ".", ",", ":", ")", "-LRB-", "(", "-RRB-", - "HYPH", "$", "``", "''", "ADD", "AFX", "QTR", "BES", "-DFL-", - "GW", "HVS", "NFP"]) - - -def parse_constraints(inpt, res): - ntags = len(res) - nwords = len(inpt) - npostags = len([x for x in res if is_pos_tag(x)]) - nclose = len([x for x in res if x[0] == "/"]) - nopen = ntags - nclose - npostags - return (abs(npostags - nwords), abs(nclose - nopen)) - - -def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size, - tokenizer=None, normalize_digits=False): - """Create vocabulary file (if it does not exist yet) from data file. - - Data file is assumed to contain one sentence per line. Each sentence is - tokenized and digits are normalized (if normalize_digits is set). - Vocabulary contains the most-frequent tokens up to max_vocabulary_size. - We write it to vocabulary_path in a one-token-per-line format, so that later - token in the first line gets id=0, second line gets id=1, and so on. - - Args: - vocabulary_path: path where the vocabulary will be created. - data_path: data file that will be used to create vocabulary. - max_vocabulary_size: limit on the size of the created vocabulary. - tokenizer: a function to use to tokenize each data sentence; - if None, basic_tokenizer will be used. - normalize_digits: Boolean; if true, all digits are replaced by 0s. - """ - if not tf.gfile.Exists(vocabulary_path): - print("Creating vocabulary %s from data %s" % (vocabulary_path, data_path)) - vocab, chars = {}, {} - for c in _PUNCTUATION: - chars[c] = 1 - - # Read French file. - with tf.gfile.GFile(data_path + ".fr", mode="rb") as f: - counter = 0 - for line_in in f: - line = " ".join(line_in.split()) - counter += 1 - if counter % 100000 == 0: - print(" processing fr line %d" % counter) - for c in line: - if c in chars: - chars[c] += 1 - else: - chars[c] = 1 - tokens = tokenizer(line) if tokenizer else basic_tokenizer(line) - tokens = [t for t in tokens if not is_char(t) and t != _SPACE] - for w in tokens: - word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w - if word in vocab: - vocab[word] += 1000000000 # We want target words first. - else: - vocab[word] = 1000000000 - - # Read English file. - with tf.gfile.GFile(data_path + ".en", mode="rb") as f: - counter = 0 - for line_in in f: - line = " ".join(line_in.split()) - counter += 1 - if counter % 100000 == 0: - print(" processing en line %d" % counter) - for c in line: - if c in chars: - chars[c] += 1 - else: - chars[c] = 1 - tokens = tokenizer(line) if tokenizer else basic_tokenizer(line) - tokens = [t for t in tokens if not is_char(t) and t != _SPACE] - for w in tokens: - word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w - if word in vocab: - vocab[word] += 1 - else: - vocab[word] = 1 - - sorted_vocab = sorted(vocab, key=vocab.get, reverse=True) - sorted_chars = sorted(chars, key=vocab.get, reverse=True) - sorted_chars = [_CHAR_MARKER + c for c in sorted_chars] - vocab_list = _START_VOCAB + sorted_chars + sorted_vocab - if tokenizer: - vocab_list = _START_VOCAB + sorted_vocab - if len(vocab_list) > max_vocabulary_size: - vocab_list = vocab_list[:max_vocabulary_size] - with tf.gfile.GFile(vocabulary_path, mode="wb") as vocab_file: - for w in vocab_list: - vocab_file.write(w + b"\n") - - -def initialize_vocabulary(vocabulary_path): - """Initialize vocabulary from file. - - We assume the vocabulary is stored one-item-per-line, so a file: - dog - cat - will result in a vocabulary {"dog": 0, "cat": 1}, and this function will - also return the reversed-vocabulary ["dog", "cat"]. - - Args: - vocabulary_path: path to the file containing the vocabulary. - - Returns: - a pair: the vocabulary (a dictionary mapping string to integers), and - the reversed vocabulary (a list, which reverses the vocabulary mapping). - - Raises: - ValueError: if the provided vocabulary_path does not exist. - """ - if tf.gfile.Exists(vocabulary_path): - rev_vocab = [] - with tf.gfile.GFile(vocabulary_path, mode="rb") as f: - rev_vocab.extend(f.readlines()) - rev_vocab = [line.strip() for line in rev_vocab] - vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)]) - return vocab, rev_vocab - else: - raise ValueError("Vocabulary file %s not found.", vocabulary_path) - - -def sentence_to_token_ids_raw(sentence, vocabulary, - tokenizer=None, normalize_digits=old_style): - """Convert a string to list of integers representing token-ids. - - For example, a sentence "I have a dog" may become tokenized into - ["I", "have", "a", "dog"] and with vocabulary {"I": 1, "have": 2, - "a": 4, "dog": 7"} this function will return [1, 2, 4, 7]. - - Args: - sentence: the sentence in bytes format to convert to token-ids. - vocabulary: a dictionary mapping tokens to integers. - tokenizer: a function to use to tokenize each sentence; - if None, basic_tokenizer will be used. - normalize_digits: Boolean; if true, all digits are replaced by 0s. - - Returns: - a list of integers, the token-ids for the sentence. - """ - if tokenizer: - words = tokenizer(sentence) - else: - words = basic_tokenizer(sentence) - result = [] - for w in words: - if normalize_digits: - w = re.sub(_DIGIT_RE, b"0", w) - if w in vocabulary: - result.append(vocabulary[w]) - else: - if tokenizer: - result.append(UNK_ID) - else: - result.append(SPACE_ID) - for c in w: - result.append(vocabulary.get(_CHAR_MARKER + c, UNK_ID)) - result.append(SPACE_ID) - while result and result[0] == SPACE_ID: - result = result[1:] - while result and result[-1] == SPACE_ID: - result = result[:-1] - return result - - -def sentence_to_token_ids(sentence, vocabulary, - tokenizer=None, normalize_digits=old_style): - """Convert a string to list of integers representing token-ids, tab=0.""" - tab_parts = sentence.strip().split("\t") - toks = [sentence_to_token_ids_raw(t, vocabulary, tokenizer, normalize_digits) - for t in tab_parts] - res = [] - for t in toks: - res.extend(t) - res.append(0) - return res[:-1] - - -def data_to_token_ids(data_path, target_path, vocabulary_path, - tokenizer=None, normalize_digits=False): - """Tokenize data file and turn into token-ids using given vocabulary file. - - This function loads data line-by-line from data_path, calls the above - sentence_to_token_ids, and saves the result to target_path. See comment - for sentence_to_token_ids on the details of token-ids format. - - Args: - data_path: path to the data file in one-sentence-per-line format. - target_path: path where the file with token-ids will be created. - vocabulary_path: path to the vocabulary file. - tokenizer: a function to use to tokenize each sentence; - if None, basic_tokenizer will be used. - normalize_digits: Boolean; if true, all digits are replaced by 0s. - """ - if not tf.gfile.Exists(target_path): - print("Tokenizing data in %s" % data_path) - vocab, _ = initialize_vocabulary(vocabulary_path) - with tf.gfile.GFile(data_path, mode="rb") as data_file: - with tf.gfile.GFile(target_path, mode="w") as tokens_file: - counter = 0 - for line in data_file: - counter += 1 - if counter % 100000 == 0: - print(" tokenizing line %d" % counter) - token_ids = sentence_to_token_ids(line, vocab, tokenizer, - normalize_digits) - tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n") - - -def prepare_wmt_data(data_dir, vocabulary_size, - tokenizer=None, normalize_digits=False): - """Get WMT data into data_dir, create vocabularies and tokenize data. - - Args: - data_dir: directory in which the data sets will be stored. - vocabulary_size: size of the joint vocabulary to create and use. - tokenizer: a function to use to tokenize each data sentence; - if None, basic_tokenizer will be used. - normalize_digits: Boolean; if true, all digits are replaced by 0s. - - Returns: - A tuple of 6 elements: - (1) path to the token-ids for English training data-set, - (2) path to the token-ids for French training data-set, - (3) path to the token-ids for English development data-set, - (4) path to the token-ids for French development data-set, - (5) path to the vocabulary file, - (6) path to the vocabulary file (for compatibility with non-joint vocab). - """ - # Get wmt data to the specified directory. - train_path = get_wmt_enfr_train_set(data_dir) - dev_path = get_wmt_enfr_dev_set(data_dir) - - # Create vocabularies of the appropriate sizes. - vocab_path = os.path.join(data_dir, "vocab%d.txt" % vocabulary_size) - create_vocabulary(vocab_path, train_path, vocabulary_size, - tokenizer=tokenizer, normalize_digits=normalize_digits) - - # Create token ids for the training data. - fr_train_ids_path = train_path + (".ids%d.fr" % vocabulary_size) - en_train_ids_path = train_path + (".ids%d.en" % vocabulary_size) - data_to_token_ids(train_path + ".fr", fr_train_ids_path, vocab_path, - tokenizer=tokenizer, normalize_digits=normalize_digits) - data_to_token_ids(train_path + ".en", en_train_ids_path, vocab_path, - tokenizer=tokenizer, normalize_digits=normalize_digits) - - # Create token ids for the development data. - fr_dev_ids_path = dev_path + (".ids%d.fr" % vocabulary_size) - en_dev_ids_path = dev_path + (".ids%d.en" % vocabulary_size) - data_to_token_ids(dev_path + ".fr", fr_dev_ids_path, vocab_path, - tokenizer=tokenizer, normalize_digits=normalize_digits) - data_to_token_ids(dev_path + ".en", en_dev_ids_path, vocab_path, - tokenizer=tokenizer, normalize_digits=normalize_digits) - - return (en_train_ids_path, fr_train_ids_path, - en_dev_ids_path, fr_dev_ids_path, - vocab_path, vocab_path) diff --git a/research/neural_programmer/README.md b/research/neural_programmer/README.md deleted file mode 100644 index dcc27f6fb015ec625935a0ea37d814a2ba10d2e3..0000000000000000000000000000000000000000 --- a/research/neural_programmer/README.md +++ /dev/null @@ -1,26 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Neural Programmer - -Implementation of the Neural Programmer model as described in this [paper](https://openreview.net/pdf?id=ry2YOrcge). - -Download and extract the data from the [WikiTableQuestions](https://ppasupat.github.io/WikiTableQuestions/) site. The dataset contains -11321, 2831, and 4344 examples for training, development, and testing respectively. We use their tokenization, number and date pre-processing. Please note that the above paper used the [initial release](https://github.com/ppasupat/WikiTableQuestions/releases/tag/v0.2) for training, development and testing. - -Change the `data_dir FLAG` to the location of the data. - -### Training -Run `python neural_programmer.py` - -The models are written to `FLAGS.output_dir`. - -### Testing -Run `python neural_programmer.py --evaluator_job=True` - -The models are loaded from `FLAGS.output_dir`. The evaluation is done on development data. - -In case of errors because of encoding, add `"# -*- coding: utf-8 -*-"` as the first line in `wiki_data.py` - -Maintained by Arvind Neelakantan (arvind2505) diff --git a/research/neural_programmer/data_utils.py b/research/neural_programmer/data_utils.py deleted file mode 100644 index 4df80c66ad21d2e046fabf78446dd199ae117b44..0000000000000000000000000000000000000000 --- a/research/neural_programmer/data_utils.py +++ /dev/null @@ -1,666 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions for constructing vocabulary, converting the examples to integer format and building the required masks for batch computation Author: aneelakantan (Arvind Neelakantan) -""" - -from __future__ import print_function - -import copy -import numbers -import numpy as np -import wiki_data - - -def return_index(a): - for i in range(len(a)): - if (a[i] == 1.0): - return i - - -def construct_vocab(data, utility, add_word=False): - ans = [] - for example in data: - sent = "" - for word in example.question: - if (not (isinstance(word, numbers.Number))): - sent += word + " " - example.original_nc = copy.deepcopy(example.number_columns) - example.original_wc = copy.deepcopy(example.word_columns) - example.original_nc_names = copy.deepcopy(example.number_column_names) - example.original_wc_names = copy.deepcopy(example.word_column_names) - if (add_word): - continue - number_found = 0 - if (not (example.is_bad_example)): - for word in example.question: - if (isinstance(word, numbers.Number)): - number_found += 1 - else: - if (not (utility.word_ids.has_key(word))): - utility.words.append(word) - utility.word_count[word] = 1 - utility.word_ids[word] = len(utility.word_ids) - utility.reverse_word_ids[utility.word_ids[word]] = word - else: - utility.word_count[word] += 1 - for col_name in example.word_column_names: - for word in col_name: - if (isinstance(word, numbers.Number)): - number_found += 1 - else: - if (not (utility.word_ids.has_key(word))): - utility.words.append(word) - utility.word_count[word] = 1 - utility.word_ids[word] = len(utility.word_ids) - utility.reverse_word_ids[utility.word_ids[word]] = word - else: - utility.word_count[word] += 1 - for col_name in example.number_column_names: - for word in col_name: - if (isinstance(word, numbers.Number)): - number_found += 1 - else: - if (not (utility.word_ids.has_key(word))): - utility.words.append(word) - utility.word_count[word] = 1 - utility.word_ids[word] = len(utility.word_ids) - utility.reverse_word_ids[utility.word_ids[word]] = word - else: - utility.word_count[word] += 1 - - -def word_lookup(word, utility): - if (utility.word_ids.has_key(word)): - return word - else: - return utility.unk_token - - -def convert_to_int_2d_and_pad(a, utility): - ans = [] - #print a - for b in a: - temp = [] - if (len(b) > utility.FLAGS.max_entry_length): - b = b[0:utility.FLAGS.max_entry_length] - for remaining in range(len(b), utility.FLAGS.max_entry_length): - b.append(utility.dummy_token) - assert len(b) == utility.FLAGS.max_entry_length - for word in b: - temp.append(utility.word_ids[word_lookup(word, utility)]) - ans.append(temp) - #print ans - return ans - - -def convert_to_bool_and_pad(a, utility): - a = a.tolist() - for i in range(len(a)): - for j in range(len(a[i])): - if (a[i][j] < 1): - a[i][j] = False - else: - a[i][j] = True - a[i] = a[i] + [False] * (utility.FLAGS.max_elements - len(a[i])) - return a - - -seen_tables = {} - - -def partial_match(question, table, number): - answer = [] - match = {} - for i in range(len(table)): - temp = [] - for j in range(len(table[i])): - temp.append(0) - answer.append(temp) - for i in range(len(table)): - for j in range(len(table[i])): - for word in question: - if (number): - if (word == table[i][j]): - answer[i][j] = 1.0 - match[i] = 1.0 - else: - if (word in table[i][j]): - answer[i][j] = 1.0 - match[i] = 1.0 - return answer, match - - -def exact_match(question, table, number): - #performs exact match operation - answer = [] - match = {} - matched_indices = [] - for i in range(len(table)): - temp = [] - for j in range(len(table[i])): - temp.append(0) - answer.append(temp) - for i in range(len(table)): - for j in range(len(table[i])): - if (number): - for word in question: - if (word == table[i][j]): - match[i] = 1.0 - answer[i][j] = 1.0 - else: - table_entry = table[i][j] - for k in range(len(question)): - if (k + len(table_entry) <= len(question)): - if (table_entry == question[k:(k + len(table_entry))]): - #if(len(table_entry) == 1): - #print "match: ", table_entry, question - match[i] = 1.0 - answer[i][j] = 1.0 - matched_indices.append((k, len(table_entry))) - return answer, match, matched_indices - - -def partial_column_match(question, table, number): - answer = [] - for i in range(len(table)): - answer.append(0) - for i in range(len(table)): - for word in question: - if (word in table[i]): - answer[i] = 1.0 - return answer - - -def exact_column_match(question, table, number): - #performs exact match on column names - answer = [] - matched_indices = [] - for i in range(len(table)): - answer.append(0) - for i in range(len(table)): - table_entry = table[i] - for k in range(len(question)): - if (k + len(table_entry) <= len(question)): - if (table_entry == question[k:(k + len(table_entry))]): - answer[i] = 1.0 - matched_indices.append((k, len(table_entry))) - return answer, matched_indices - - -def get_max_entry(a): - e = {} - for w in a: - if (w != "UNK, "): - if (e.has_key(w)): - e[w] += 1 - else: - e[w] = 1 - if (len(e) > 0): - (key, val) = sorted(e.items(), key=lambda x: -1 * x[1])[0] - if (val > 1): - return key - else: - return -1.0 - else: - return -1.0 - - -def list_join(a): - ans = "" - for w in a: - ans += str(w) + ", " - return ans - - -def group_by_max(table, number): - #computes the most frequently occurring entry in a column - answer = [] - for i in range(len(table)): - temp = [] - for j in range(len(table[i])): - temp.append(0) - answer.append(temp) - for i in range(len(table)): - if (number): - curr = table[i] - else: - curr = [list_join(w) for w in table[i]] - max_entry = get_max_entry(curr) - #print i, max_entry - for j in range(len(curr)): - if (max_entry == curr[j]): - answer[i][j] = 1.0 - else: - answer[i][j] = 0.0 - return answer - - -def pick_one(a): - for i in range(len(a)): - if (1.0 in a[i]): - return True - return False - - -def check_processed_cols(col, utility): - return True in [ - True for y in col - if (y != utility.FLAGS.pad_int and y != - utility.FLAGS.bad_number_pre_process) - ] - - -def complete_wiki_processing(data, utility, train=True): - #convert to integers and padding - processed_data = [] - num_bad_examples = 0 - for example in data: - number_found = 0 - if (example.is_bad_example): - num_bad_examples += 1 - if (not (example.is_bad_example)): - example.string_question = example.question[:] - #entry match - example.processed_number_columns = example.processed_number_columns[:] - example.processed_word_columns = example.processed_word_columns[:] - example.word_exact_match, word_match, matched_indices = exact_match( - example.string_question, example.original_wc, number=False) - example.number_exact_match, number_match, _ = exact_match( - example.string_question, example.original_nc, number=True) - if (not (pick_one(example.word_exact_match)) and not ( - pick_one(example.number_exact_match))): - assert len(word_match) == 0 - assert len(number_match) == 0 - example.word_exact_match, word_match = partial_match( - example.string_question, example.original_wc, number=False) - #group by max - example.word_group_by_max = group_by_max(example.original_wc, False) - example.number_group_by_max = group_by_max(example.original_nc, True) - #column name match - example.word_column_exact_match, wcol_matched_indices = exact_column_match( - example.string_question, example.original_wc_names, number=False) - example.number_column_exact_match, ncol_matched_indices = exact_column_match( - example.string_question, example.original_nc_names, number=False) - if (not (1.0 in example.word_column_exact_match) and not ( - 1.0 in example.number_column_exact_match)): - example.word_column_exact_match = partial_column_match( - example.string_question, example.original_wc_names, number=False) - example.number_column_exact_match = partial_column_match( - example.string_question, example.original_nc_names, number=False) - if (len(word_match) > 0 or len(number_match) > 0): - example.question.append(utility.entry_match_token) - if (1.0 in example.word_column_exact_match or - 1.0 in example.number_column_exact_match): - example.question.append(utility.column_match_token) - example.string_question = example.question[:] - example.number_lookup_matrix = np.transpose( - example.number_lookup_matrix)[:] - example.word_lookup_matrix = np.transpose(example.word_lookup_matrix)[:] - example.columns = example.number_columns[:] - example.word_columns = example.word_columns[:] - example.len_total_cols = len(example.word_column_names) + len( - example.number_column_names) - example.column_names = example.number_column_names[:] - example.word_column_names = example.word_column_names[:] - example.string_column_names = example.number_column_names[:] - example.string_word_column_names = example.word_column_names[:] - example.sorted_number_index = [] - example.sorted_word_index = [] - example.column_mask = [] - example.word_column_mask = [] - example.processed_column_mask = [] - example.processed_word_column_mask = [] - example.word_column_entry_mask = [] - example.question_attention_mask = [] - example.question_number = example.question_number_1 = -1 - example.question_attention_mask = [] - example.ordinal_question = [] - example.ordinal_question_one = [] - new_question = [] - if (len(example.number_columns) > 0): - example.len_col = len(example.number_columns[0]) - else: - example.len_col = len(example.word_columns[0]) - for (start, length) in matched_indices: - for j in range(length): - example.question[start + j] = utility.unk_token - #print example.question - for word in example.question: - if (isinstance(word, numbers.Number) or wiki_data.is_date(word)): - if (not (isinstance(word, numbers.Number)) and - wiki_data.is_date(word)): - word = word.replace("X", "").replace("-", "") - number_found += 1 - if (number_found == 1): - example.question_number = word - if (len(example.ordinal_question) > 0): - example.ordinal_question[len(example.ordinal_question) - 1] = 1.0 - else: - example.ordinal_question.append(1.0) - elif (number_found == 2): - example.question_number_1 = word - if (len(example.ordinal_question_one) > 0): - example.ordinal_question_one[len(example.ordinal_question_one) - - 1] = 1.0 - else: - example.ordinal_question_one.append(1.0) - else: - new_question.append(word) - example.ordinal_question.append(0.0) - example.ordinal_question_one.append(0.0) - example.question = [ - utility.word_ids[word_lookup(w, utility)] for w in new_question - ] - example.question_attention_mask = [0.0] * len(example.question) - #when the first question number occurs before a word - example.ordinal_question = example.ordinal_question[0:len( - example.question)] - example.ordinal_question_one = example.ordinal_question_one[0:len( - example.question)] - #question-padding - example.question = [utility.word_ids[utility.dummy_token]] * ( - utility.FLAGS.question_length - len(example.question) - ) + example.question - example.question_attention_mask = [-10000.0] * ( - utility.FLAGS.question_length - len(example.question_attention_mask) - ) + example.question_attention_mask - example.ordinal_question = [0.0] * (utility.FLAGS.question_length - - len(example.ordinal_question) - ) + example.ordinal_question - example.ordinal_question_one = [0.0] * (utility.FLAGS.question_length - - len(example.ordinal_question_one) - ) + example.ordinal_question_one - if (True): - #number columns and related-padding - num_cols = len(example.columns) - start = 0 - for column in example.number_columns: - if (check_processed_cols(example.processed_number_columns[start], - utility)): - example.processed_column_mask.append(0.0) - sorted_index = sorted( - range(len(example.processed_number_columns[start])), - key=lambda k: example.processed_number_columns[start][k], - reverse=True) - sorted_index = sorted_index + [utility.FLAGS.pad_int] * ( - utility.FLAGS.max_elements - len(sorted_index)) - example.sorted_number_index.append(sorted_index) - example.columns[start] = column + [utility.FLAGS.pad_int] * ( - utility.FLAGS.max_elements - len(column)) - example.processed_number_columns[start] += [utility.FLAGS.pad_int] * ( - utility.FLAGS.max_elements - - len(example.processed_number_columns[start])) - start += 1 - example.column_mask.append(0.0) - for remaining in range(num_cols, utility.FLAGS.max_number_cols): - example.sorted_number_index.append([utility.FLAGS.pad_int] * - (utility.FLAGS.max_elements)) - example.columns.append([utility.FLAGS.pad_int] * - (utility.FLAGS.max_elements)) - example.processed_number_columns.append([utility.FLAGS.pad_int] * - (utility.FLAGS.max_elements)) - example.number_exact_match.append([0.0] * - (utility.FLAGS.max_elements)) - example.number_group_by_max.append([0.0] * - (utility.FLAGS.max_elements)) - example.column_mask.append(-100000000.0) - example.processed_column_mask.append(-100000000.0) - example.number_column_exact_match.append(0.0) - example.column_names.append([utility.dummy_token]) - #word column and related-padding - start = 0 - word_num_cols = len(example.word_columns) - for column in example.word_columns: - if (check_processed_cols(example.processed_word_columns[start], - utility)): - example.processed_word_column_mask.append(0.0) - sorted_index = sorted( - range(len(example.processed_word_columns[start])), - key=lambda k: example.processed_word_columns[start][k], - reverse=True) - sorted_index = sorted_index + [utility.FLAGS.pad_int] * ( - utility.FLAGS.max_elements - len(sorted_index)) - example.sorted_word_index.append(sorted_index) - column = convert_to_int_2d_and_pad(column, utility) - example.word_columns[start] = column + [[ - utility.word_ids[utility.dummy_token] - ] * utility.FLAGS.max_entry_length] * (utility.FLAGS.max_elements - - len(column)) - example.processed_word_columns[start] += [utility.FLAGS.pad_int] * ( - utility.FLAGS.max_elements - - len(example.processed_word_columns[start])) - example.word_column_entry_mask.append([0] * len(column) + [ - utility.word_ids[utility.dummy_token] - ] * (utility.FLAGS.max_elements - len(column))) - start += 1 - example.word_column_mask.append(0.0) - for remaining in range(word_num_cols, utility.FLAGS.max_word_cols): - example.sorted_word_index.append([utility.FLAGS.pad_int] * - (utility.FLAGS.max_elements)) - example.word_columns.append([[utility.word_ids[utility.dummy_token]] * - utility.FLAGS.max_entry_length] * - (utility.FLAGS.max_elements)) - example.word_column_entry_mask.append( - [utility.word_ids[utility.dummy_token]] * - (utility.FLAGS.max_elements)) - example.word_exact_match.append([0.0] * (utility.FLAGS.max_elements)) - example.word_group_by_max.append([0.0] * (utility.FLAGS.max_elements)) - example.processed_word_columns.append([utility.FLAGS.pad_int] * - (utility.FLAGS.max_elements)) - example.word_column_mask.append(-100000000.0) - example.processed_word_column_mask.append(-100000000.0) - example.word_column_exact_match.append(0.0) - example.word_column_names.append([utility.dummy_token] * - utility.FLAGS.max_entry_length) - seen_tables[example.table_key] = 1 - #convert column and word column names to integers - example.column_ids = convert_to_int_2d_and_pad(example.column_names, - utility) - example.word_column_ids = convert_to_int_2d_and_pad( - example.word_column_names, utility) - for i_em in range(len(example.number_exact_match)): - example.number_exact_match[i_em] = example.number_exact_match[ - i_em] + [0.0] * (utility.FLAGS.max_elements - - len(example.number_exact_match[i_em])) - example.number_group_by_max[i_em] = example.number_group_by_max[ - i_em] + [0.0] * (utility.FLAGS.max_elements - - len(example.number_group_by_max[i_em])) - for i_em in range(len(example.word_exact_match)): - example.word_exact_match[i_em] = example.word_exact_match[ - i_em] + [0.0] * (utility.FLAGS.max_elements - - len(example.word_exact_match[i_em])) - example.word_group_by_max[i_em] = example.word_group_by_max[ - i_em] + [0.0] * (utility.FLAGS.max_elements - - len(example.word_group_by_max[i_em])) - example.exact_match = example.number_exact_match + example.word_exact_match - example.group_by_max = example.number_group_by_max + example.word_group_by_max - example.exact_column_match = example.number_column_exact_match + example.word_column_exact_match - #answer and related mask, padding - if (example.is_lookup): - example.answer = example.calc_answer - example.number_print_answer = example.number_lookup_matrix.tolist() - example.word_print_answer = example.word_lookup_matrix.tolist() - for i_answer in range(len(example.number_print_answer)): - example.number_print_answer[i_answer] = example.number_print_answer[ - i_answer] + [0.0] * (utility.FLAGS.max_elements - - len(example.number_print_answer[i_answer])) - for i_answer in range(len(example.word_print_answer)): - example.word_print_answer[i_answer] = example.word_print_answer[ - i_answer] + [0.0] * (utility.FLAGS.max_elements - - len(example.word_print_answer[i_answer])) - example.number_lookup_matrix = convert_to_bool_and_pad( - example.number_lookup_matrix, utility) - example.word_lookup_matrix = convert_to_bool_and_pad( - example.word_lookup_matrix, utility) - for remaining in range(num_cols, utility.FLAGS.max_number_cols): - example.number_lookup_matrix.append([False] * - utility.FLAGS.max_elements) - example.number_print_answer.append([0.0] * utility.FLAGS.max_elements) - for remaining in range(word_num_cols, utility.FLAGS.max_word_cols): - example.word_lookup_matrix.append([False] * - utility.FLAGS.max_elements) - example.word_print_answer.append([0.0] * utility.FLAGS.max_elements) - example.print_answer = example.number_print_answer + example.word_print_answer - else: - example.answer = example.calc_answer - example.print_answer = [[0.0] * (utility.FLAGS.max_elements)] * ( - utility.FLAGS.max_number_cols + utility.FLAGS.max_word_cols) - #question_number masks - if (example.question_number == -1): - example.question_number_mask = np.zeros([utility.FLAGS.max_elements]) - else: - example.question_number_mask = np.ones([utility.FLAGS.max_elements]) - if (example.question_number_1 == -1): - example.question_number_one_mask = -10000.0 - else: - example.question_number_one_mask = np.float64(0.0) - if (example.len_col > utility.FLAGS.max_elements): - continue - processed_data.append(example) - return processed_data - - -def add_special_words(utility): - utility.words.append(utility.entry_match_token) - utility.word_ids[utility.entry_match_token] = len(utility.word_ids) - utility.reverse_word_ids[utility.word_ids[ - utility.entry_match_token]] = utility.entry_match_token - utility.entry_match_token_id = utility.word_ids[utility.entry_match_token] - print("entry match token: ", utility.word_ids[ - utility.entry_match_token], utility.entry_match_token_id) - utility.words.append(utility.column_match_token) - utility.word_ids[utility.column_match_token] = len(utility.word_ids) - utility.reverse_word_ids[utility.word_ids[ - utility.column_match_token]] = utility.column_match_token - utility.column_match_token_id = utility.word_ids[utility.column_match_token] - print("entry match token: ", utility.word_ids[ - utility.column_match_token], utility.column_match_token_id) - utility.words.append(utility.dummy_token) - utility.word_ids[utility.dummy_token] = len(utility.word_ids) - utility.reverse_word_ids[utility.word_ids[ - utility.dummy_token]] = utility.dummy_token - utility.dummy_token_id = utility.word_ids[utility.dummy_token] - utility.words.append(utility.unk_token) - utility.word_ids[utility.unk_token] = len(utility.word_ids) - utility.reverse_word_ids[utility.word_ids[ - utility.unk_token]] = utility.unk_token - - -def perform_word_cutoff(utility): - if (utility.FLAGS.word_cutoff > 0): - for word in utility.word_ids.keys(): - if (utility.word_count.has_key(word) and utility.word_count[word] < - utility.FLAGS.word_cutoff and word != utility.unk_token and - word != utility.dummy_token and word != utility.entry_match_token and - word != utility.column_match_token): - utility.word_ids.pop(word) - utility.words.remove(word) - - -def word_dropout(question, utility): - if (utility.FLAGS.word_dropout_prob > 0.0): - new_question = [] - for i in range(len(question)): - if (question[i] != utility.dummy_token_id and - utility.random.random() > utility.FLAGS.word_dropout_prob): - new_question.append(utility.word_ids[utility.unk_token]) - else: - new_question.append(question[i]) - return new_question - else: - return question - - -def generate_feed_dict(data, curr, batch_size, gr, train=False, utility=None): - #prepare feed dict dictionary - feed_dict = {} - feed_examples = [] - for j in range(batch_size): - feed_examples.append(data[curr + j]) - if (train): - feed_dict[gr.batch_question] = [ - word_dropout(feed_examples[j].question, utility) - for j in range(batch_size) - ] - else: - feed_dict[gr.batch_question] = [ - feed_examples[j].question for j in range(batch_size) - ] - feed_dict[gr.batch_question_attention_mask] = [ - feed_examples[j].question_attention_mask for j in range(batch_size) - ] - feed_dict[ - gr.batch_answer] = [feed_examples[j].answer for j in range(batch_size)] - feed_dict[gr.batch_number_column] = [ - feed_examples[j].columns for j in range(batch_size) - ] - feed_dict[gr.batch_processed_number_column] = [ - feed_examples[j].processed_number_columns for j in range(batch_size) - ] - feed_dict[gr.batch_processed_sorted_index_number_column] = [ - feed_examples[j].sorted_number_index for j in range(batch_size) - ] - feed_dict[gr.batch_processed_sorted_index_word_column] = [ - feed_examples[j].sorted_word_index for j in range(batch_size) - ] - feed_dict[gr.batch_question_number] = np.array( - [feed_examples[j].question_number for j in range(batch_size)]).reshape( - (batch_size, 1)) - feed_dict[gr.batch_question_number_one] = np.array( - [feed_examples[j].question_number_1 for j in range(batch_size)]).reshape( - (batch_size, 1)) - feed_dict[gr.batch_question_number_mask] = [ - feed_examples[j].question_number_mask for j in range(batch_size) - ] - feed_dict[gr.batch_question_number_one_mask] = np.array( - [feed_examples[j].question_number_one_mask for j in range(batch_size) - ]).reshape((batch_size, 1)) - feed_dict[gr.batch_print_answer] = [ - feed_examples[j].print_answer for j in range(batch_size) - ] - feed_dict[gr.batch_exact_match] = [ - feed_examples[j].exact_match for j in range(batch_size) - ] - feed_dict[gr.batch_group_by_max] = [ - feed_examples[j].group_by_max for j in range(batch_size) - ] - feed_dict[gr.batch_column_exact_match] = [ - feed_examples[j].exact_column_match for j in range(batch_size) - ] - feed_dict[gr.batch_ordinal_question] = [ - feed_examples[j].ordinal_question for j in range(batch_size) - ] - feed_dict[gr.batch_ordinal_question_one] = [ - feed_examples[j].ordinal_question_one for j in range(batch_size) - ] - feed_dict[gr.batch_number_column_mask] = [ - feed_examples[j].column_mask for j in range(batch_size) - ] - feed_dict[gr.batch_number_column_names] = [ - feed_examples[j].column_ids for j in range(batch_size) - ] - feed_dict[gr.batch_processed_word_column] = [ - feed_examples[j].processed_word_columns for j in range(batch_size) - ] - feed_dict[gr.batch_word_column_mask] = [ - feed_examples[j].word_column_mask for j in range(batch_size) - ] - feed_dict[gr.batch_word_column_names] = [ - feed_examples[j].word_column_ids for j in range(batch_size) - ] - feed_dict[gr.batch_word_column_entry_mask] = [ - feed_examples[j].word_column_entry_mask for j in range(batch_size) - ] - return feed_dict diff --git a/research/neural_programmer/model.py b/research/neural_programmer/model.py deleted file mode 100644 index 610d66699e6e41188be58cc1f623c030d243c689..0000000000000000000000000000000000000000 --- a/research/neural_programmer/model.py +++ /dev/null @@ -1,679 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Author: aneelakantan (Arvind Neelakantan) -""" - -from __future__ import print_function - -import numpy as np -import tensorflow as tf -import nn_utils - - -class Graph(): - - def __init__(self, utility, batch_size, max_passes, mode="train"): - self.utility = utility - self.data_type = self.utility.tf_data_type[self.utility.FLAGS.data_type] - self.max_elements = self.utility.FLAGS.max_elements - max_elements = self.utility.FLAGS.max_elements - self.num_cols = self.utility.FLAGS.max_number_cols - self.num_word_cols = self.utility.FLAGS.max_word_cols - self.question_length = self.utility.FLAGS.question_length - self.batch_size = batch_size - self.max_passes = max_passes - self.mode = mode - self.embedding_dims = self.utility.FLAGS.embedding_dims - #input question and a mask - self.batch_question = tf.placeholder(tf.int32, - [batch_size, self.question_length]) - self.batch_question_attention_mask = tf.placeholder( - self.data_type, [batch_size, self.question_length]) - #ground truth scalar answer and lookup answer - self.batch_answer = tf.placeholder(self.data_type, [batch_size]) - self.batch_print_answer = tf.placeholder( - self.data_type, - [batch_size, self.num_cols + self.num_word_cols, max_elements]) - #number columns and its processed version - self.batch_number_column = tf.placeholder( - self.data_type, [batch_size, self.num_cols, max_elements - ]) #columns with numeric entries - self.batch_processed_number_column = tf.placeholder( - self.data_type, [batch_size, self.num_cols, max_elements]) - self.batch_processed_sorted_index_number_column = tf.placeholder( - tf.int32, [batch_size, self.num_cols, max_elements]) - #word columns and its processed version - self.batch_processed_word_column = tf.placeholder( - self.data_type, [batch_size, self.num_word_cols, max_elements]) - self.batch_processed_sorted_index_word_column = tf.placeholder( - tf.int32, [batch_size, self.num_word_cols, max_elements]) - self.batch_word_column_entry_mask = tf.placeholder( - tf.int32, [batch_size, self.num_word_cols, max_elements]) - #names of word and number columns along with their mask - self.batch_word_column_names = tf.placeholder( - tf.int32, - [batch_size, self.num_word_cols, self.utility.FLAGS.max_entry_length]) - self.batch_word_column_mask = tf.placeholder( - self.data_type, [batch_size, self.num_word_cols]) - self.batch_number_column_names = tf.placeholder( - tf.int32, - [batch_size, self.num_cols, self.utility.FLAGS.max_entry_length]) - self.batch_number_column_mask = tf.placeholder(self.data_type, - [batch_size, self.num_cols]) - #exact match and group by max operation - self.batch_exact_match = tf.placeholder( - self.data_type, - [batch_size, self.num_cols + self.num_word_cols, max_elements]) - self.batch_column_exact_match = tf.placeholder( - self.data_type, [batch_size, self.num_cols + self.num_word_cols]) - self.batch_group_by_max = tf.placeholder( - self.data_type, - [batch_size, self.num_cols + self.num_word_cols, max_elements]) - #numbers in the question along with their position. This is used to compute arguments to the comparison operations - self.batch_question_number = tf.placeholder(self.data_type, [batch_size, 1]) - self.batch_question_number_one = tf.placeholder(self.data_type, - [batch_size, 1]) - self.batch_question_number_mask = tf.placeholder( - self.data_type, [batch_size, max_elements]) - self.batch_question_number_one_mask = tf.placeholder(self.data_type, - [batch_size, 1]) - self.batch_ordinal_question = tf.placeholder( - self.data_type, [batch_size, self.question_length]) - self.batch_ordinal_question_one = tf.placeholder( - self.data_type, [batch_size, self.question_length]) - - def LSTM_question_embedding(self, sentence, sentence_length): - #LSTM processes the input question - lstm_params = "question_lstm" - hidden_vectors = [] - sentence = self.batch_question - question_hidden = tf.zeros( - [self.batch_size, self.utility.FLAGS.embedding_dims], self.data_type) - question_c_hidden = tf.zeros( - [self.batch_size, self.utility.FLAGS.embedding_dims], self.data_type) - if (self.utility.FLAGS.rnn_dropout > 0.0): - if (self.mode == "train"): - rnn_dropout_mask = tf.cast( - tf.random_uniform( - tf.shape(question_hidden), minval=0.0, maxval=1.0) < - self.utility.FLAGS.rnn_dropout, - self.data_type) / self.utility.FLAGS.rnn_dropout - else: - rnn_dropout_mask = tf.ones_like(question_hidden) - for question_iterator in range(self.question_length): - curr_word = sentence[:, question_iterator] - question_vector = nn_utils.apply_dropout( - nn_utils.get_embedding(curr_word, self.utility, self.params), - self.utility.FLAGS.dropout, self.mode) - question_hidden, question_c_hidden = nn_utils.LSTMCell( - question_vector, question_hidden, question_c_hidden, lstm_params, - self.params) - if (self.utility.FLAGS.rnn_dropout > 0.0): - question_hidden = question_hidden * rnn_dropout_mask - hidden_vectors.append(tf.expand_dims(question_hidden, 0)) - hidden_vectors = tf.concat(axis=0, values=hidden_vectors) - return question_hidden, hidden_vectors - - def history_recurrent_step(self, curr_hprev, hprev): - #A single RNN step for controller or history RNN - return tf.tanh( - tf.matmul( - tf.concat(axis=1, values=[hprev, curr_hprev]), self.params[ - "history_recurrent"])) + self.params["history_recurrent_bias"] - - def question_number_softmax(self, hidden_vectors): - #Attention on quetsion to decide the question number to passed to comparison ops - def compute_ans(op_embedding, comparison): - op_embedding = tf.expand_dims(op_embedding, 0) - #dot product of operation embedding with hidden state to the left of the number occurrence - first = tf.transpose( - tf.matmul(op_embedding, - tf.transpose( - tf.reduce_sum(hidden_vectors * tf.tile( - tf.expand_dims( - tf.transpose(self.batch_ordinal_question), 2), - [1, 1, self.utility.FLAGS.embedding_dims]), 0)))) - second = self.batch_question_number_one_mask + tf.transpose( - tf.matmul(op_embedding, - tf.transpose( - tf.reduce_sum(hidden_vectors * tf.tile( - tf.expand_dims( - tf.transpose(self.batch_ordinal_question_one), 2 - ), [1, 1, self.utility.FLAGS.embedding_dims]), 0)))) - question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second])) - if (self.mode == "test"): - cond = tf.equal(question_number_softmax, - tf.reshape( - tf.reduce_max(question_number_softmax, 1), - [self.batch_size, 1])) - question_number_softmax = tf.where( - cond, - tf.fill(tf.shape(question_number_softmax), 1.0), - tf.fill(tf.shape(question_number_softmax), 0.0)) - question_number_softmax = tf.cast(question_number_softmax, - self.data_type) - ans = tf.reshape( - tf.reduce_sum(question_number_softmax * tf.concat( - axis=1, values=[self.batch_question_number, self.batch_question_number_one]), - 1), [self.batch_size, 1]) - return ans - - def compute_op_position(op_name): - for i in range(len(self.utility.operations_set)): - if (op_name == self.utility.operations_set[i]): - return i - - def compute_question_number(op_name): - op_embedding = tf.nn.embedding_lookup(self.params_unit, - compute_op_position(op_name)) - return compute_ans(op_embedding, op_name) - - curr_greater_question_number = compute_question_number("greater") - curr_lesser_question_number = compute_question_number("lesser") - curr_geq_question_number = compute_question_number("geq") - curr_leq_question_number = compute_question_number("leq") - return curr_greater_question_number, curr_lesser_question_number, curr_geq_question_number, curr_leq_question_number - - def perform_attention(self, context_vector, hidden_vectors, length, mask): - #Performs attention on hiddent_vectors using context vector - context_vector = tf.tile( - tf.expand_dims(context_vector, 0), [length, 1, 1]) #time * bs * d - attention_softmax = tf.nn.softmax( - tf.transpose(tf.reduce_sum(context_vector * hidden_vectors, 2)) + - mask) #batch_size * time - attention_softmax = tf.tile( - tf.expand_dims(tf.transpose(attention_softmax), 2), - [1, 1, self.embedding_dims]) - ans_vector = tf.reduce_sum(attention_softmax * hidden_vectors, 0) - return ans_vector - - #computes embeddings for column names using parameters of question module - def get_column_hidden_vectors(self): - #vector representations for the column names - self.column_hidden_vectors = tf.reduce_sum( - nn_utils.get_embedding(self.batch_number_column_names, self.utility, - self.params), 2) - self.word_column_hidden_vectors = tf.reduce_sum( - nn_utils.get_embedding(self.batch_word_column_names, self.utility, - self.params), 2) - - def create_summary_embeddings(self): - #embeddings for each text entry in the table using parameters of the question module - self.summary_text_entry_embeddings = tf.reduce_sum( - tf.expand_dims(self.batch_exact_match, 3) * tf.expand_dims( - tf.expand_dims( - tf.expand_dims( - nn_utils.get_embedding(self.utility.entry_match_token_id, - self.utility, self.params), 0), 1), - 2), 2) - - def compute_column_softmax(self, column_controller_vector, time_step): - #compute softmax over all the columns using column controller vector - column_controller_vector = tf.tile( - tf.expand_dims(column_controller_vector, 1), - [1, self.num_cols + self.num_word_cols, 1]) #max_cols * bs * d - column_controller_vector = nn_utils.apply_dropout( - column_controller_vector, self.utility.FLAGS.dropout, self.mode) - self.full_column_hidden_vectors = tf.concat( - axis=1, values=[self.column_hidden_vectors, self.word_column_hidden_vectors]) - self.full_column_hidden_vectors += self.summary_text_entry_embeddings - self.full_column_hidden_vectors = nn_utils.apply_dropout( - self.full_column_hidden_vectors, self.utility.FLAGS.dropout, self.mode) - column_logits = tf.reduce_sum( - column_controller_vector * self.full_column_hidden_vectors, 2) + ( - self.params["word_match_feature_column_name"] * - self.batch_column_exact_match) + self.full_column_mask - column_softmax = tf.nn.softmax(column_logits) #batch_size * max_cols - return column_softmax - - def compute_first_or_last(self, select, first=True): - #perform first ot last operation on row select with probabilistic row selection - answer = tf.zeros_like(select) - running_sum = tf.zeros([self.batch_size, 1], self.data_type) - for i in range(self.max_elements): - if (first): - current = tf.slice(select, [0, i], [self.batch_size, 1]) - else: - current = tf.slice(select, [0, self.max_elements - 1 - i], - [self.batch_size, 1]) - curr_prob = current * (1 - running_sum) - curr_prob = curr_prob * tf.cast(curr_prob >= 0.0, self.data_type) - running_sum += curr_prob - temp_ans = [] - curr_prob = tf.expand_dims(tf.reshape(curr_prob, [self.batch_size]), 0) - for i_ans in range(self.max_elements): - if (not (first) and i_ans == self.max_elements - 1 - i): - temp_ans.append(curr_prob) - elif (first and i_ans == i): - temp_ans.append(curr_prob) - else: - temp_ans.append(tf.zeros_like(curr_prob)) - temp_ans = tf.transpose(tf.concat(axis=0, values=temp_ans)) - answer += temp_ans - return answer - - def make_hard_softmax(self, softmax): - #converts soft selection to hard selection. used at test time - cond = tf.equal( - softmax, tf.reshape(tf.reduce_max(softmax, 1), [self.batch_size, 1])) - softmax = tf.where( - cond, tf.fill(tf.shape(softmax), 1.0), tf.fill(tf.shape(softmax), 0.0)) - softmax = tf.cast(softmax, self.data_type) - return softmax - - def compute_max_or_min(self, select, maxi=True): - #computes the argmax and argmin of a column with probabilistic row selection - answer = tf.zeros([ - self.batch_size, self.num_cols + self.num_word_cols, self.max_elements - ], self.data_type) - sum_prob = tf.zeros([self.batch_size, self.num_cols + self.num_word_cols], - self.data_type) - for j in range(self.max_elements): - if (maxi): - curr_pos = j - else: - curr_pos = self.max_elements - 1 - j - select_index = tf.slice(self.full_processed_sorted_index_column, - [0, 0, curr_pos], [self.batch_size, -1, 1]) - select_mask = tf.equal( - tf.tile( - tf.expand_dims( - tf.tile( - tf.expand_dims(tf.range(self.max_elements), 0), - [self.batch_size, 1]), 1), - [1, self.num_cols + self.num_word_cols, 1]), select_index) - curr_prob = tf.expand_dims(select, 1) * tf.cast( - select_mask, self.data_type) * self.select_bad_number_mask - curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2) - curr_prob = curr_prob * tf.expand_dims( - tf.cast((1 - sum_prob) > 0.0, self.data_type), 2) - answer = tf.where(select_mask, curr_prob, answer) - sum_prob += tf.reduce_sum(curr_prob, 2) - return answer - - def perform_operations(self, softmax, full_column_softmax, select, - prev_select_1, curr_pass): - #performs all the 15 operations. computes scalar output, lookup answer and row selector - column_softmax = tf.slice(full_column_softmax, [0, 0], - [self.batch_size, self.num_cols]) - word_column_softmax = tf.slice(full_column_softmax, [0, self.num_cols], - [self.batch_size, self.num_word_cols]) - init_max = self.compute_max_or_min(select, maxi=True) - init_min = self.compute_max_or_min(select, maxi=False) - #operations that are column independent - count = tf.reshape(tf.reduce_sum(select, 1), [self.batch_size, 1]) - select_full_column_softmax = tf.tile( - tf.expand_dims(full_column_softmax, 2), - [1, 1, self.max_elements - ]) #BS * (max_cols + max_word_cols) * max_elements - select_word_column_softmax = tf.tile( - tf.expand_dims(word_column_softmax, 2), - [1, 1, self.max_elements]) #BS * max_word_cols * max_elements - select_greater = tf.reduce_sum( - self.init_select_greater * select_full_column_softmax, - 1) * self.batch_question_number_mask #BS * max_elements - select_lesser = tf.reduce_sum( - self.init_select_lesser * select_full_column_softmax, - 1) * self.batch_question_number_mask #BS * max_elements - select_geq = tf.reduce_sum( - self.init_select_geq * select_full_column_softmax, - 1) * self.batch_question_number_mask #BS * max_elements - select_leq = tf.reduce_sum( - self.init_select_leq * select_full_column_softmax, - 1) * self.batch_question_number_mask #BS * max_elements - select_max = tf.reduce_sum(init_max * select_full_column_softmax, - 1) #BS * max_elements - select_min = tf.reduce_sum(init_min * select_full_column_softmax, - 1) #BS * max_elements - select_prev = tf.concat(axis=1, values=[ - tf.slice(select, [0, 1], [self.batch_size, self.max_elements - 1]), - tf.cast(tf.zeros([self.batch_size, 1]), self.data_type) - ]) - select_next = tf.concat(axis=1, values=[ - tf.cast(tf.zeros([self.batch_size, 1]), self.data_type), tf.slice( - select, [0, 0], [self.batch_size, self.max_elements - 1]) - ]) - select_last_rs = self.compute_first_or_last(select, False) - select_first_rs = self.compute_first_or_last(select, True) - select_word_match = tf.reduce_sum(self.batch_exact_match * - select_full_column_softmax, 1) - select_group_by_max = tf.reduce_sum(self.batch_group_by_max * - select_full_column_softmax, 1) - length_content = 1 - length_select = 13 - length_print = 1 - values = tf.concat(axis=1, values=[count]) - softmax_content = tf.slice(softmax, [0, 0], - [self.batch_size, length_content]) - #compute scalar output - output = tf.reduce_sum(tf.multiply(softmax_content, values), 1) - #compute lookup answer - softmax_print = tf.slice(softmax, [0, length_content + length_select], - [self.batch_size, length_print]) - curr_print = select_full_column_softmax * tf.tile( - tf.expand_dims(select, 1), - [1, self.num_cols + self.num_word_cols, 1 - ]) #BS * max_cols * max_elements (conisders only column) - self.batch_lookup_answer = curr_print * tf.tile( - tf.expand_dims(softmax_print, 2), - [1, self.num_cols + self.num_word_cols, self.max_elements - ]) #BS * max_cols * max_elements - self.batch_lookup_answer = self.batch_lookup_answer * self.select_full_mask - #compute row select - softmax_select = tf.slice(softmax, [0, length_content], - [self.batch_size, length_select]) - select_lists = [ - tf.expand_dims(select_prev, 1), tf.expand_dims(select_next, 1), - tf.expand_dims(select_first_rs, 1), tf.expand_dims(select_last_rs, 1), - tf.expand_dims(select_group_by_max, 1), - tf.expand_dims(select_greater, 1), tf.expand_dims(select_lesser, 1), - tf.expand_dims(select_geq, 1), tf.expand_dims(select_leq, 1), - tf.expand_dims(select_max, 1), tf.expand_dims(select_min, 1), - tf.expand_dims(select_word_match, 1), - tf.expand_dims(self.reset_select, 1) - ] - select = tf.reduce_sum( - tf.tile(tf.expand_dims(softmax_select, 2), [1, 1, self.max_elements]) * - tf.concat(axis=1, values=select_lists), 1) - select = select * self.select_whole_mask - return output, select - - def one_pass(self, select, question_embedding, hidden_vectors, hprev, - prev_select_1, curr_pass): - #Performs one timestep which involves selecting an operation and a column - attention_vector = self.perform_attention( - hprev, hidden_vectors, self.question_length, - self.batch_question_attention_mask) #batch_size * embedding_dims - controller_vector = tf.nn.relu( - tf.matmul(hprev, self.params["controller_prev"]) + tf.matmul( - tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[ - "controller"])) - column_controller_vector = tf.nn.relu( - tf.matmul(hprev, self.params["column_controller_prev"]) + tf.matmul( - tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[ - "column_controller"])) - controller_vector = nn_utils.apply_dropout( - controller_vector, self.utility.FLAGS.dropout, self.mode) - self.operation_logits = tf.matmul(controller_vector, - tf.transpose(self.params_unit)) - softmax = tf.nn.softmax(self.operation_logits) - soft_softmax = softmax - #compute column softmax: bs * max_columns - weighted_op_representation = tf.transpose( - tf.matmul(tf.transpose(self.params_unit), tf.transpose(softmax))) - column_controller_vector = tf.nn.relu( - tf.matmul( - tf.concat(axis=1, values=[ - column_controller_vector, weighted_op_representation - ]), self.params["break_conditional"])) - full_column_softmax = self.compute_column_softmax(column_controller_vector, - curr_pass) - soft_column_softmax = full_column_softmax - if (self.mode == "test"): - full_column_softmax = self.make_hard_softmax(full_column_softmax) - softmax = self.make_hard_softmax(softmax) - output, select = self.perform_operations(softmax, full_column_softmax, - select, prev_select_1, curr_pass) - return output, select, softmax, soft_softmax, full_column_softmax, soft_column_softmax - - def compute_lookup_error(self, val): - #computes lookup error. - cond = tf.equal(self.batch_print_answer, val) - inter = tf.where( - cond, self.init_print_error, - tf.tile( - tf.reshape(tf.constant(1e10, self.data_type), [1, 1, 1]), [ - self.batch_size, self.utility.FLAGS.max_word_cols + - self.utility.FLAGS.max_number_cols, - self.utility.FLAGS.max_elements - ])) - return tf.reduce_min(tf.reduce_min(inter, 1), 1) * tf.cast( - tf.greater( - tf.reduce_sum(tf.reduce_sum(tf.cast(cond, self.data_type), 1), 1), - 0.0), self.data_type) - - def soft_min(self, x, y): - return tf.maximum(-1.0 * (1 / ( - self.utility.FLAGS.soft_min_value + 0.0)) * tf.log( - tf.exp(-self.utility.FLAGS.soft_min_value * x) + tf.exp( - -self.utility.FLAGS.soft_min_value * y)), tf.zeros_like(x)) - - def error_computation(self): - #computes the error of each example in a batch - math_error = 0.5 * tf.square(tf.subtract(self.scalar_output, self.batch_answer)) - #scale math error - math_error = math_error / self.rows - math_error = tf.minimum(math_error, self.utility.FLAGS.max_math_error * - tf.ones(tf.shape(math_error), self.data_type)) - self.init_print_error = tf.where( - self.batch_gold_select, -1 * tf.log(self.batch_lookup_answer + 1e-300 + - self.invert_select_full_mask), -1 * - tf.log(1 - self.batch_lookup_answer)) * self.select_full_mask - print_error_1 = self.init_print_error * tf.cast( - tf.equal(self.batch_print_answer, 0.0), self.data_type) - print_error = tf.reduce_sum(tf.reduce_sum((print_error_1), 1), 1) - for val in range(1, 58): - print_error += self.compute_lookup_error(val + 0.0) - print_error = print_error * self.utility.FLAGS.print_cost / self.num_entries - if (self.mode == "train"): - error = tf.where( - tf.logical_and( - tf.not_equal(self.batch_answer, 0.0), - tf.not_equal( - tf.reduce_sum(tf.reduce_sum(self.batch_print_answer, 1), 1), - 0.0)), - self.soft_min(math_error, print_error), - tf.where( - tf.not_equal(self.batch_answer, 0.0), math_error, print_error)) - else: - error = tf.where( - tf.logical_and( - tf.equal(self.scalar_output, 0.0), - tf.equal( - tf.reduce_sum(tf.reduce_sum(self.batch_lookup_answer, 1), 1), - 0.0)), - tf.ones_like(math_error), - tf.where( - tf.equal(self.scalar_output, 0.0), print_error, math_error)) - return error - - def batch_process(self): - #Computes loss and fraction of correct examples in a batch. - self.params_unit = nn_utils.apply_dropout( - self.params["unit"], self.utility.FLAGS.dropout, self.mode) - batch_size = self.batch_size - max_passes = self.max_passes - num_timesteps = 1 - max_elements = self.max_elements - select = tf.cast( - tf.fill([self.batch_size, max_elements], 1.0), self.data_type) - hprev = tf.cast( - tf.fill([self.batch_size, self.embedding_dims], 0.0), - self.data_type) #running sum of the hidden states of the model - output = tf.cast(tf.fill([self.batch_size, 1], 0.0), - self.data_type) #output of the model - correct = tf.cast( - tf.fill([1], 0.0), self.data_type - ) #to compute accuracy, returns number of correct examples for this batch - total_error = 0.0 - prev_select_1 = tf.zeros_like(select) - self.create_summary_embeddings() - self.get_column_hidden_vectors() - #get question embedding - question_embedding, hidden_vectors = self.LSTM_question_embedding( - self.batch_question, self.question_length) - #compute arguments for comparison operation - greater_question_number, lesser_question_number, geq_question_number, leq_question_number = self.question_number_softmax( - hidden_vectors) - self.init_select_greater = tf.cast( - tf.greater(self.full_processed_column, - tf.expand_dims(greater_question_number, 2)), self. - data_type) * self.select_bad_number_mask #bs * max_cols * max_elements - self.init_select_lesser = tf.cast( - tf.less(self.full_processed_column, - tf.expand_dims(lesser_question_number, 2)), self. - data_type) * self.select_bad_number_mask #bs * max_cols * max_elements - self.init_select_geq = tf.cast( - tf.greater_equal(self.full_processed_column, - tf.expand_dims(geq_question_number, 2)), self. - data_type) * self.select_bad_number_mask #bs * max_cols * max_elements - self.init_select_leq = tf.cast( - tf.less_equal(self.full_processed_column, - tf.expand_dims(leq_question_number, 2)), self. - data_type) * self.select_bad_number_mask #bs * max_cols * max_elements - self.init_select_word_match = 0 - if (self.utility.FLAGS.rnn_dropout > 0.0): - if (self.mode == "train"): - history_rnn_dropout_mask = tf.cast( - tf.random_uniform( - tf.shape(hprev), minval=0.0, maxval=1.0) < - self.utility.FLAGS.rnn_dropout, - self.data_type) / self.utility.FLAGS.rnn_dropout - else: - history_rnn_dropout_mask = tf.ones_like(hprev) - select = select * self.select_whole_mask - self.batch_log_prob = tf.zeros([self.batch_size], dtype=self.data_type) - #Perform max_passes and at each pass select operation and column - for curr_pass in range(max_passes): - print("step: ", curr_pass) - output, select, softmax, soft_softmax, column_softmax, soft_column_softmax = self.one_pass( - select, question_embedding, hidden_vectors, hprev, prev_select_1, - curr_pass) - prev_select_1 = select - #compute input to history RNN - input_op = tf.transpose( - tf.matmul( - tf.transpose(self.params_unit), tf.transpose( - soft_softmax))) #weighted average of emebdding of operations - input_col = tf.reduce_sum( - tf.expand_dims(soft_column_softmax, 2) * - self.full_column_hidden_vectors, 1) - history_input = tf.concat(axis=1, values=[input_op, input_col]) - history_input = nn_utils.apply_dropout( - history_input, self.utility.FLAGS.dropout, self.mode) - hprev = self.history_recurrent_step(history_input, hprev) - if (self.utility.FLAGS.rnn_dropout > 0.0): - hprev = hprev * history_rnn_dropout_mask - self.scalar_output = output - error = self.error_computation() - cond = tf.less(error, 0.0001, name="cond") - correct_add = tf.where( - cond, tf.fill(tf.shape(cond), 1.0), tf.fill(tf.shape(cond), 0.0)) - correct = tf.reduce_sum(correct_add) - error = error / batch_size - total_error = tf.reduce_sum(error) - total_correct = correct / batch_size - return total_error, total_correct - - def compute_error(self): - #Sets mask variables and performs batch processing - self.batch_gold_select = self.batch_print_answer > 0.0 - self.full_column_mask = tf.concat( - axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask]) - self.full_processed_column = tf.concat( - axis=1, - values=[self.batch_processed_number_column, self.batch_processed_word_column]) - self.full_processed_sorted_index_column = tf.concat(axis=1, values=[ - self.batch_processed_sorted_index_number_column, - self.batch_processed_sorted_index_word_column - ]) - self.select_bad_number_mask = tf.cast( - tf.logical_and( - tf.not_equal(self.full_processed_column, - self.utility.FLAGS.pad_int), - tf.not_equal(self.full_processed_column, - self.utility.FLAGS.bad_number_pre_process)), - self.data_type) - self.select_mask = tf.cast( - tf.logical_not( - tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int)), - self.data_type) - self.select_word_mask = tf.cast( - tf.logical_not( - tf.equal(self.batch_word_column_entry_mask, - self.utility.dummy_token_id)), self.data_type) - self.select_full_mask = tf.concat( - axis=1, values=[self.select_mask, self.select_word_mask]) - self.select_whole_mask = tf.maximum( - tf.reshape( - tf.slice(self.select_mask, [0, 0, 0], - [self.batch_size, 1, self.max_elements]), - [self.batch_size, self.max_elements]), - tf.reshape( - tf.slice(self.select_word_mask, [0, 0, 0], - [self.batch_size, 1, self.max_elements]), - [self.batch_size, self.max_elements])) - self.invert_select_full_mask = tf.cast( - tf.concat(axis=1, values=[ - tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int), - tf.equal(self.batch_word_column_entry_mask, - self.utility.dummy_token_id) - ]), self.data_type) - self.batch_lookup_answer = tf.zeros(tf.shape(self.batch_gold_select)) - self.reset_select = self.select_whole_mask - self.rows = tf.reduce_sum(self.select_whole_mask, 1) - self.num_entries = tf.reshape( - tf.reduce_sum(tf.reduce_sum(self.select_full_mask, 1), 1), - [self.batch_size]) - self.final_error, self.final_correct = self.batch_process() - return self.final_error - - def create_graph(self, params, global_step): - #Creates the graph to compute error, gradient computation and updates parameters - self.params = params - batch_size = self.batch_size - learning_rate = tf.cast(self.utility.FLAGS.learning_rate, self.data_type) - self.total_cost = self.compute_error() - optimize_params = self.params.values() - optimize_names = self.params.keys() - print("optimize params ", optimize_names) - if (self.utility.FLAGS.l2_regularizer > 0.0): - reg_cost = 0.0 - for ind_param in self.params.keys(): - reg_cost += tf.nn.l2_loss(self.params[ind_param]) - self.total_cost += self.utility.FLAGS.l2_regularizer * reg_cost - grads = tf.gradients(self.total_cost, optimize_params, name="gradients") - grad_norm = 0.0 - for p, name in zip(grads, optimize_names): - print("grads: ", p, name) - if isinstance(p, tf.IndexedSlices): - grad_norm += tf.reduce_sum(p.values * p.values) - elif not (p == None): - grad_norm += tf.reduce_sum(p * p) - grad_norm = tf.sqrt(grad_norm) - max_grad_norm = np.float32(self.utility.FLAGS.clip_gradients).astype( - self.utility.np_data_type[self.utility.FLAGS.data_type]) - grad_scale = tf.minimum( - tf.cast(1.0, self.data_type), max_grad_norm / grad_norm) - clipped_grads = list() - for p in grads: - if isinstance(p, tf.IndexedSlices): - tmp = p.values * grad_scale - clipped_grads.append(tf.IndexedSlices(tmp, p.indices)) - elif not (p == None): - clipped_grads.append(p * grad_scale) - else: - clipped_grads.append(p) - grads = clipped_grads - self.global_step = global_step - params_list = self.params.values() - params_list.append(self.global_step) - adam = tf.train.AdamOptimizer( - learning_rate, - epsilon=tf.cast(self.utility.FLAGS.eps, self.data_type), - use_locking=True) - self.step = adam.apply_gradients(zip(grads, optimize_params), - global_step=self.global_step) - self.init_op = tf.global_variables_initializer() diff --git a/research/neural_programmer/neural_programmer.py b/research/neural_programmer/neural_programmer.py deleted file mode 100644 index 145ca13d6ac8ce80d651f902440bfb3240f1c7a2..0000000000000000000000000000000000000000 --- a/research/neural_programmer/neural_programmer.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Implementation of the Neural Programmer model described in https://openreview.net/pdf?id=ry2YOrcge - -This file calls functions to load & pre-process data, construct the TF graph -and performs training or evaluation as specified by the flag evaluator_job -Author: aneelakantan (Arvind Neelakantan) -""" -from __future__ import print_function - -import time -from random import Random -import numpy as np -import tensorflow as tf -import model -import wiki_data -import parameters -import data_utils - -tf.flags.DEFINE_integer("train_steps", 100001, "Number of steps to train") -tf.flags.DEFINE_integer("eval_cycle", 500, - "Evaluate model at every eval_cycle steps") -tf.flags.DEFINE_integer("max_elements", 100, - "maximum rows that are considered for processing") -tf.flags.DEFINE_integer( - "max_number_cols", 15, - "maximum number columns that are considered for processing") -tf.flags.DEFINE_integer( - "max_word_cols", 25, - "maximum number columns that are considered for processing") -tf.flags.DEFINE_integer("question_length", 62, "maximum question length") -tf.flags.DEFINE_integer("max_entry_length", 1, "") -tf.flags.DEFINE_integer("max_passes", 4, "number of operation passes") -tf.flags.DEFINE_integer("embedding_dims", 256, "") -tf.flags.DEFINE_integer("batch_size", 20, "") -tf.flags.DEFINE_float("clip_gradients", 1.0, "") -tf.flags.DEFINE_float("eps", 1e-6, "") -tf.flags.DEFINE_float("param_init", 0.1, "") -tf.flags.DEFINE_float("learning_rate", 0.001, "") -tf.flags.DEFINE_float("l2_regularizer", 0.0001, "") -tf.flags.DEFINE_float("print_cost", 50.0, - "weighting factor in the objective function") -tf.flags.DEFINE_string("job_id", "temp", """job id""") -tf.flags.DEFINE_string("output_dir", "../model/", - """output_dir""") -tf.flags.DEFINE_string("data_dir", "../data/", - """data_dir""") -tf.flags.DEFINE_integer("write_every", 500, "wrtie every N") -tf.flags.DEFINE_integer("param_seed", 150, "") -tf.flags.DEFINE_integer("python_seed", 200, "") -tf.flags.DEFINE_float("dropout", 0.8, "dropout keep probability") -tf.flags.DEFINE_float("rnn_dropout", 0.9, - "dropout keep probability for rnn connections") -tf.flags.DEFINE_float("pad_int", -20000.0, - "number columns are padded with pad_int") -tf.flags.DEFINE_string("data_type", "double", "float or double") -tf.flags.DEFINE_float("word_dropout_prob", 0.9, "word dropout keep prob") -tf.flags.DEFINE_integer("word_cutoff", 10, "") -tf.flags.DEFINE_integer("vocab_size", 10800, "") -tf.flags.DEFINE_boolean("evaluator_job", False, - "wehther to run as trainer/evaluator") -tf.flags.DEFINE_float( - "bad_number_pre_process", -200000.0, - "number that is added to a corrupted table entry in a number column") -tf.flags.DEFINE_float("max_math_error", 3.0, - "max square loss error that is considered") -tf.flags.DEFINE_float("soft_min_value", 5.0, "") -FLAGS = tf.flags.FLAGS - - -class Utility: - #holds FLAGS and other variables that are used in different files - def __init__(self): - global FLAGS - self.FLAGS = FLAGS - self.unk_token = "UNK" - self.entry_match_token = "entry_match" - self.column_match_token = "column_match" - self.dummy_token = "dummy_token" - self.tf_data_type = {} - self.tf_data_type["double"] = tf.float64 - self.tf_data_type["float"] = tf.float32 - self.np_data_type = {} - self.np_data_type["double"] = np.float64 - self.np_data_type["float"] = np.float32 - self.operations_set = ["count"] + [ - "prev", "next", "first_rs", "last_rs", "group_by_max", "greater", - "lesser", "geq", "leq", "max", "min", "word-match" - ] + ["reset_select"] + ["print"] - self.word_ids = {} - self.reverse_word_ids = {} - self.word_count = {} - self.random = Random(FLAGS.python_seed) - - -def evaluate(sess, data, batch_size, graph, i): - #computes accuracy - num_examples = 0.0 - gc = 0.0 - for j in range(0, len(data) - batch_size + 1, batch_size): - [ct] = sess.run([graph.final_correct], - feed_dict=data_utils.generate_feed_dict(data, j, batch_size, - graph)) - gc += ct * batch_size - num_examples += batch_size - print("dev set accuracy after ", i, " : ", gc / num_examples) - print(num_examples, len(data)) - print("--------") - - -def Train(graph, utility, batch_size, train_data, sess, model_dir, - saver): - #performs training - curr = 0 - train_set_loss = 0.0 - utility.random.shuffle(train_data) - start = time.time() - for i in range(utility.FLAGS.train_steps): - curr_step = i - if (i > 0 and i % FLAGS.write_every == 0): - model_file = model_dir + "/model_" + str(i) - saver.save(sess, model_file) - if curr + batch_size >= len(train_data): - curr = 0 - utility.random.shuffle(train_data) - step, cost_value = sess.run( - [graph.step, graph.total_cost], - feed_dict=data_utils.generate_feed_dict( - train_data, curr, batch_size, graph, train=True, utility=utility)) - curr = curr + batch_size - train_set_loss += cost_value - if (i > 0 and i % FLAGS.eval_cycle == 0): - end = time.time() - time_taken = end - start - print("step ", i, " ", time_taken, " seconds ") - start = end - print(" printing train set loss: ", train_set_loss / utility.FLAGS.eval_cycle) - train_set_loss = 0.0 - - -def master(train_data, dev_data, utility): - #creates TF graph and calls trainer or evaluator - batch_size = utility.FLAGS.batch_size - model_dir = utility.FLAGS.output_dir + "/model" + utility.FLAGS.job_id + "/" - #create all paramters of the model - param_class = parameters.Parameters(utility) - params, global_step, init = param_class.parameters(utility) - key = "test" if (FLAGS.evaluator_job) else "train" - graph = model.Graph(utility, batch_size, utility.FLAGS.max_passes, mode=key) - graph.create_graph(params, global_step) - prev_dev_error = 0.0 - final_loss = 0.0 - final_accuracy = 0.0 - #start session - with tf.Session() as sess: - sess.run(init.name) - sess.run(graph.init_op.name) - to_save = params.copy() - saver = tf.train.Saver(to_save, max_to_keep=500) - if (FLAGS.evaluator_job): - while True: - selected_models = {} - file_list = tf.gfile.ListDirectory(model_dir) - for model_file in file_list: - if ("checkpoint" in model_file or "index" in model_file or - "meta" in model_file): - continue - if ("data" in model_file): - model_file = model_file.split(".")[0] - model_step = int( - model_file.split("_")[len(model_file.split("_")) - 1]) - selected_models[model_step] = model_file - file_list = sorted(selected_models.items(), key=lambda x: x[0]) - if (len(file_list) > 0): - file_list = file_list[0:len(file_list) - 1] - print("list of models: ", file_list) - for model_file in file_list: - model_file = model_file[1] - print("restoring: ", model_file) - saver.restore(sess, model_dir + "/" + model_file) - model_step = int( - model_file.split("_")[len(model_file.split("_")) - 1]) - print("evaluating on dev ", model_file, model_step) - evaluate(sess, dev_data, batch_size, graph, model_step) - else: - ckpt = tf.train.get_checkpoint_state(model_dir) - print("model dir: ", model_dir) - if (not (tf.gfile.IsDirectory(utility.FLAGS.output_dir))): - print("create dir: ", utility.FLAGS.output_dir) - tf.gfile.MkDir(utility.FLAGS.output_dir) - if (not (tf.gfile.IsDirectory(model_dir))): - print("create dir: ", model_dir) - tf.gfile.MkDir(model_dir) - Train(graph, utility, batch_size, train_data, sess, model_dir, - saver) - -def main(args): - utility = Utility() - train_name = "random-split-1-train.examples" - dev_name = "random-split-1-dev.examples" - test_name = "pristine-unseen-tables.examples" - #load data - dat = wiki_data.WikiQuestionGenerator(train_name, dev_name, test_name, FLAGS.data_dir) - train_data, dev_data, test_data = dat.load() - utility.words = [] - utility.word_ids = {} - utility.reverse_word_ids = {} - #construct vocabulary - data_utils.construct_vocab(train_data, utility) - data_utils.construct_vocab(dev_data, utility, True) - data_utils.construct_vocab(test_data, utility, True) - data_utils.add_special_words(utility) - data_utils.perform_word_cutoff(utility) - #convert data to int format and pad the inputs - train_data = data_utils.complete_wiki_processing(train_data, utility, True) - dev_data = data_utils.complete_wiki_processing(dev_data, utility, False) - test_data = data_utils.complete_wiki_processing(test_data, utility, False) - print("# train examples ", len(train_data)) - print("# dev examples ", len(dev_data)) - print("# test examples ", len(test_data)) - print("running open source") - #construct TF graph and train or evaluate - master(train_data, dev_data, utility) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/neural_programmer/nn_utils.py b/research/neural_programmer/nn_utils.py deleted file mode 100644 index 2f3a1a98bf7f71631410fc88982b336d33a02f52..0000000000000000000000000000000000000000 --- a/research/neural_programmer/nn_utils.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Author: aneelakantan (Arvind Neelakantan) -""" - -import tensorflow as tf - -def get_embedding(word, utility, params): - return tf.nn.embedding_lookup(params["word"], word) - - -def apply_dropout(x, dropout_rate, mode): - if (dropout_rate > 0.0): - if (mode == "train"): - x = tf.nn.dropout(x, dropout_rate) - else: - x = x - return x - - -def LSTMCell(x, mprev, cprev, key, params): - """Create an LSTM cell. - - Implements the equations in pg.2 from - "Long Short-Term Memory Based Recurrent Neural Network Architectures - For Large Vocabulary Speech Recognition", - Hasim Sak, Andrew Senior, Francoise Beaufays. - - Args: - w: A dictionary of the weights and optional biases as returned - by LSTMParametersSplit(). - x: Inputs to this cell. - mprev: m_{t-1}, the recurrent activations (same as the output) - from the previous cell. - cprev: c_{t-1}, the cell activations from the previous cell. - keep_prob: Keep probability on the input and the outputs of a cell. - - Returns: - m: Outputs of this cell. - c: Cell Activations. - """ - - i = tf.matmul(x, params[key + "_ix"]) + tf.matmul(mprev, params[key + "_im"]) - i = tf.nn.bias_add(i, params[key + "_i"]) - f = tf.matmul(x, params[key + "_fx"]) + tf.matmul(mprev, params[key + "_fm"]) - f = tf.nn.bias_add(f, params[key + "_f"]) - c = tf.matmul(x, params[key + "_cx"]) + tf.matmul(mprev, params[key + "_cm"]) - c = tf.nn.bias_add(c, params[key + "_c"]) - o = tf.matmul(x, params[key + "_ox"]) + tf.matmul(mprev, params[key + "_om"]) - o = tf.nn.bias_add(o, params[key + "_o"]) - i = tf.sigmoid(i, name="i_gate") - f = tf.sigmoid(f, name="f_gate") - o = tf.sigmoid(o, name="o_gate") - c = f * cprev + i * tf.tanh(c) - m = o * c - return m, c diff --git a/research/neural_programmer/parameters.py b/research/neural_programmer/parameters.py deleted file mode 100644 index c576ae822b2d93c561381e27fe65afd2902b564e..0000000000000000000000000000000000000000 --- a/research/neural_programmer/parameters.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Author: aneelakantan (Arvind Neelakantan) -""" - -import numpy as np -import tensorflow as tf - - -class Parameters: - - def __init__(self, u): - self.utility = u - self.init_seed_counter = 0 - self.word_init = {} - - def parameters(self, utility): - params = {} - inits = [] - embedding_dims = self.utility.FLAGS.embedding_dims - params["unit"] = tf.Variable( - self.RandomUniformInit([len(utility.operations_set), embedding_dims])) - params["word"] = tf.Variable( - self.RandomUniformInit([utility.FLAGS.vocab_size, embedding_dims])) - params["word_match_feature_column_name"] = tf.Variable( - self.RandomUniformInit([1])) - params["controller"] = tf.Variable( - self.RandomUniformInit([2 * embedding_dims, embedding_dims])) - params["column_controller"] = tf.Variable( - self.RandomUniformInit([2 * embedding_dims, embedding_dims])) - params["column_controller_prev"] = tf.Variable( - self.RandomUniformInit([embedding_dims, embedding_dims])) - params["controller_prev"] = tf.Variable( - self.RandomUniformInit([embedding_dims, embedding_dims])) - global_step = tf.Variable(1, name="global_step") - #weigths of question and history RNN (or LSTM) - key_list = ["question_lstm"] - for key in key_list: - # Weights going from inputs to nodes. - for wgts in ["ix", "fx", "cx", "ox"]: - params[key + "_" + wgts] = tf.Variable( - self.RandomUniformInit([embedding_dims, embedding_dims])) - # Weights going from nodes to nodes. - for wgts in ["im", "fm", "cm", "om"]: - params[key + "_" + wgts] = tf.Variable( - self.RandomUniformInit([embedding_dims, embedding_dims])) - #Biases for the gates and cell - for bias in ["i", "f", "c", "o"]: - if (bias == "f"): - print("forget gate bias") - params[key + "_" + bias] = tf.Variable( - tf.random_uniform([embedding_dims], 1.0, 1.1, self.utility. - tf_data_type[self.utility.FLAGS.data_type])) - else: - params[key + "_" + bias] = tf.Variable( - self.RandomUniformInit([embedding_dims])) - params["history_recurrent"] = tf.Variable( - self.RandomUniformInit([3 * embedding_dims, embedding_dims])) - params["history_recurrent_bias"] = tf.Variable( - self.RandomUniformInit([1, embedding_dims])) - params["break_conditional"] = tf.Variable( - self.RandomUniformInit([2 * embedding_dims, embedding_dims])) - init = tf.global_variables_initializer() - return params, global_step, init - - def RandomUniformInit(self, shape): - """Returns a RandomUniform Tensor between -param_init and param_init.""" - param_seed = self.utility.FLAGS.param_seed - self.init_seed_counter += 1 - return tf.random_uniform( - shape, -1.0 * - (np.float32(self.utility.FLAGS.param_init) - ).astype(self.utility.np_data_type[self.utility.FLAGS.data_type]), - (np.float32(self.utility.FLAGS.param_init) - ).astype(self.utility.np_data_type[self.utility.FLAGS.data_type]), - self.utility.tf_data_type[self.utility.FLAGS.data_type], - param_seed + self.init_seed_counter) diff --git a/research/neural_programmer/wiki_data.py b/research/neural_programmer/wiki_data.py deleted file mode 100644 index c91637ca1ae537526ebddf4408b0fccd22d0f5e1..0000000000000000000000000000000000000000 --- a/research/neural_programmer/wiki_data.py +++ /dev/null @@ -1,532 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Loads the WikiQuestions dataset. - -An example consists of question, table. Additionally, we store the processed -columns which store the entries after performing number, date and other -preprocessing as done in the baseline. -columns, column names and processed columns are split into word and number -columns. -lookup answer (or matrix) is also split into number and word lookup matrix -Author: aneelakantan (Arvind Neelakantan) -""" -from __future__ import print_function - -import math -import os -import re -import numpy as np -import unicodedata as ud -import tensorflow as tf - -bad_number = -200000.0 #number that is added to a corrupted table entry in a number column - -def is_nan_or_inf(number): - return math.isnan(number) or math.isinf(number) - -def strip_accents(s): - u = unicode(s, "utf-8") - u_new = ''.join(c for c in ud.normalize('NFKD', u) if ud.category(c) != 'Mn') - return u_new.encode("utf-8") - - -def correct_unicode(string): - string = strip_accents(string) - string = re.sub("\xc2\xa0", " ", string).strip() - string = re.sub("\xe2\x80\x93", "-", string).strip() - #string = re.sub(ur'[\u0300-\u036F]', "", string) - string = re.sub("‚", ",", string) - string = re.sub("…", "...", string) - #string = re.sub("[·・]", ".", string) - string = re.sub("ˆ", "^", string) - string = re.sub("Ëœ", "~", string) - string = re.sub("‹", "<", string) - string = re.sub("›", ">", string) - #string = re.sub("[‘’´`]", "'", string) - #string = re.sub("[“”«»]", "\"", string) - #string = re.sub("[•†‡]", "", string) - #string = re.sub("[‐‑–—]", "-", string) - string = re.sub(r'[\u2E00-\uFFFF]', "", string) - string = re.sub("\\s+", " ", string).strip() - return string - - -def simple_normalize(string): - string = correct_unicode(string) - # Citations - string = re.sub("\[(nb ?)?\d+\]", "", string) - string = re.sub("\*+$", "", string) - # Year in parenthesis - string = re.sub("\(\d* ?-? ?\d*\)", "", string) - string = re.sub("^\"(.*)\"$", "", string) - return string - - -def full_normalize(string): - #print "an: ", string - string = simple_normalize(string) - # Remove trailing info in brackets - string = re.sub("\[[^\]]*\]", "", string) - # Remove most unicode characters in other languages - string = re.sub(r'[\u007F-\uFFFF]', "", string.strip()) - # Remove trailing info in parenthesis - string = re.sub("\([^)]*\)$", "", string.strip()) - string = final_normalize(string) - # Get rid of question marks - string = re.sub("\?", "", string).strip() - # Get rid of trailing colons (usually occur in column titles) - string = re.sub("\:$", " ", string).strip() - # Get rid of slashes - string = re.sub(r"/", " ", string).strip() - string = re.sub(r"\\", " ", string).strip() - # Replace colon, slash, and dash with space - # Note: need better replacement for this when parsing time - string = re.sub(r"\:", " ", string).strip() - string = re.sub("/", " ", string).strip() - string = re.sub("-", " ", string).strip() - # Convert empty strings to UNK - # Important to do this last or near last - if not string: - string = "UNK" - return string - -def final_normalize(string): - # Remove leading and trailing whitespace - string = re.sub("\\s+", " ", string).strip() - # Convert entirely to lowercase - string = string.lower() - # Get rid of strangely escaped newline characters - string = re.sub("\\\\n", " ", string).strip() - # Get rid of quotation marks - string = re.sub(r"\"", "", string).strip() - string = re.sub(r"\'", "", string).strip() - string = re.sub(r"`", "", string).strip() - # Get rid of * - string = re.sub("\*", "", string).strip() - return string - -def is_number(x): - try: - f = float(x) - return not is_nan_or_inf(f) - except ValueError: - return False - except TypeError: - return False - - -class WikiExample(object): - - def __init__(self, id, question, answer, table_key): - self.question_id = id - self.question = question - self.answer = answer - self.table_key = table_key - self.lookup_matrix = [] - self.is_bad_example = False - self.is_word_lookup = False - self.is_ambiguous_word_lookup = False - self.is_number_lookup = False - self.is_number_calc = False - self.is_unknown_answer = False - - -class TableInfo(object): - - def __init__(self, word_columns, word_column_names, word_column_indices, - number_columns, number_column_names, number_column_indices, - processed_word_columns, processed_number_columns, orig_columns): - self.word_columns = word_columns - self.word_column_names = word_column_names - self.word_column_indices = word_column_indices - self.number_columns = number_columns - self.number_column_names = number_column_names - self.number_column_indices = number_column_indices - self.processed_word_columns = processed_word_columns - self.processed_number_columns = processed_number_columns - self.orig_columns = orig_columns - - -class WikiQuestionLoader(object): - - def __init__(self, data_name, root_folder): - self.root_folder = root_folder - self.data_folder = os.path.join(self.root_folder, "data") - self.examples = [] - self.data_name = data_name - - def num_questions(self): - return len(self.examples) - - def load_qa(self): - data_source = os.path.join(self.data_folder, self.data_name) - f = tf.gfile.GFile(data_source, "r") - id_regex = re.compile("\(id ([^\)]*)\)") - for line in f: - id_match = id_regex.search(line) - id = id_match.group(1) - self.examples.append(id) - - def load(self): - self.load_qa() - - -def is_date(word): - if (not (bool(re.search("[a-z0-9]", word, re.IGNORECASE)))): - return False - if (len(word) != 10): - return False - if (word[4] != "-"): - return False - if (word[7] != "-"): - return False - for i in range(len(word)): - if (not (word[i] == "X" or word[i] == "x" or word[i] == "-" or re.search( - "[0-9]", word[i]))): - return False - return True - - -class WikiQuestionGenerator(object): - - def __init__(self, train_name, dev_name, test_name, root_folder): - self.train_name = train_name - self.dev_name = dev_name - self.test_name = test_name - self.train_loader = WikiQuestionLoader(train_name, root_folder) - self.dev_loader = WikiQuestionLoader(dev_name, root_folder) - self.test_loader = WikiQuestionLoader(test_name, root_folder) - self.bad_examples = 0 - self.root_folder = root_folder - self.data_folder = os.path.join(self.root_folder, "annotated/data") - self.annotated_examples = {} - self.annotated_tables = {} - self.annotated_word_reject = {} - self.annotated_word_reject["-lrb-"] = 1 - self.annotated_word_reject["-rrb-"] = 1 - self.annotated_word_reject["UNK"] = 1 - - def is_money(self, word): - if (not (bool(re.search("[a-z0-9]", word, re.IGNORECASE)))): - return False - for i in range(len(word)): - if (not (word[i] == "E" or word[i] == "." or re.search("[0-9]", - word[i]))): - return False - return True - - def remove_consecutive(self, ner_tags, ner_values): - for i in range(len(ner_tags)): - if ((ner_tags[i] == "NUMBER" or ner_tags[i] == "MONEY" or - ner_tags[i] == "PERCENT" or ner_tags[i] == "DATE") and - i + 1 < len(ner_tags) and ner_tags[i] == ner_tags[i + 1] and - ner_values[i] == ner_values[i + 1] and ner_values[i] != ""): - word = ner_values[i] - word = word.replace(">", "").replace("<", "").replace("=", "").replace( - "%", "").replace("~", "").replace("$", "").replace("£", "").replace( - "€", "") - if (re.search("[A-Z]", word) and not (is_date(word)) and not ( - self.is_money(word))): - ner_values[i] = "A" - else: - ner_values[i] = "," - return ner_tags, ner_values - - def pre_process_sentence(self, tokens, ner_tags, ner_values): - sentence = [] - tokens = tokens.split("|") - ner_tags = ner_tags.split("|") - ner_values = ner_values.split("|") - ner_tags, ner_values = self.remove_consecutive(ner_tags, ner_values) - #print "old: ", tokens - for i in range(len(tokens)): - word = tokens[i] - if (ner_values[i] != "" and - (ner_tags[i] == "NUMBER" or ner_tags[i] == "MONEY" or - ner_tags[i] == "PERCENT" or ner_tags[i] == "DATE")): - word = ner_values[i] - word = word.replace(">", "").replace("<", "").replace("=", "").replace( - "%", "").replace("~", "").replace("$", "").replace("£", "").replace( - "€", "") - if (re.search("[A-Z]", word) and not (is_date(word)) and not ( - self.is_money(word))): - word = tokens[i] - if (is_number(ner_values[i])): - word = float(ner_values[i]) - elif (is_number(word)): - word = float(word) - if (tokens[i] == "score"): - word = "score" - if (is_number(word)): - word = float(word) - if (not (self.annotated_word_reject.has_key(word))): - if (is_number(word) or is_date(word) or self.is_money(word)): - sentence.append(word) - else: - word = full_normalize(word) - if (not (self.annotated_word_reject.has_key(word)) and - bool(re.search("[a-z0-9]", word, re.IGNORECASE))): - m = re.search(",", word) - sentence.append(word.replace(",", "")) - if (len(sentence) == 0): - sentence.append("UNK") - return sentence - - def load_annotated_data(self, in_file): - self.annotated_examples = {} - self.annotated_tables = {} - f = tf.gfile.GFile(in_file, "r") - counter = 0 - for line in f: - if (counter > 0): - line = line.strip() - (question_id, utterance, context, target_value, tokens, lemma_tokens, - pos_tags, ner_tags, ner_values, target_canon) = line.split("\t") - question = self.pre_process_sentence(tokens, ner_tags, ner_values) - target_canon = target_canon.split("|") - self.annotated_examples[question_id] = WikiExample( - question_id, question, target_canon, context) - self.annotated_tables[context] = [] - counter += 1 - print("Annotated examples loaded ", len(self.annotated_examples)) - f.close() - - def is_number_column(self, a): - for w in a: - if (len(w) != 1): - return False - if (not (is_number(w[0]))): - return False - return True - - def convert_table(self, table): - answer = [] - for i in range(len(table)): - temp = [] - for j in range(len(table[i])): - temp.append(" ".join([str(w) for w in table[i][j]])) - answer.append(temp) - return answer - - def load_annotated_tables(self): - for table in self.annotated_tables.keys(): - annotated_table = table.replace("csv", "annotated") - orig_columns = [] - processed_columns = [] - f = tf.gfile.GFile(os.path.join(self.root_folder, annotated_table), "r") - counter = 0 - for line in f: - if (counter > 0): - line = line.strip() - line = line + "\t" * (13 - len(line.split("\t"))) - (row, col, read_id, content, tokens, lemma_tokens, pos_tags, ner_tags, - ner_values, number, date, num2, read_list) = line.split("\t") - counter += 1 - f.close() - max_row = int(row) - max_col = int(col) - for i in range(max_col + 1): - orig_columns.append([]) - processed_columns.append([]) - for j in range(max_row + 1): - orig_columns[i].append(bad_number) - processed_columns[i].append(bad_number) - #print orig_columns - f = tf.gfile.GFile(os.path.join(self.root_folder, annotated_table), "r") - counter = 0 - column_names = [] - for line in f: - if (counter > 0): - line = line.strip() - line = line + "\t" * (13 - len(line.split("\t"))) - (row, col, read_id, content, tokens, lemma_tokens, pos_tags, ner_tags, - ner_values, number, date, num2, read_list) = line.split("\t") - entry = self.pre_process_sentence(tokens, ner_tags, ner_values) - if (row == "-1"): - column_names.append(entry) - else: - orig_columns[int(col)][int(row)] = entry - if (len(entry) == 1 and is_number(entry[0])): - processed_columns[int(col)][int(row)] = float(entry[0]) - else: - for single_entry in entry: - if (is_number(single_entry)): - processed_columns[int(col)][int(row)] = float(single_entry) - break - nt = ner_tags.split("|") - nv = ner_values.split("|") - for i_entry in range(len(tokens.split("|"))): - if (nt[i_entry] == "DATE" and - is_number(nv[i_entry].replace("-", "").replace("X", ""))): - processed_columns[int(col)][int(row)] = float(nv[ - i_entry].replace("-", "").replace("X", "")) - #processed_columns[int(col)][int(row)] = float(nv[i_entry]) - if (len(entry) == 1 and (is_number(entry[0]) or is_date(entry[0]) or - self.is_money(entry[0]))): - if (len(entry) == 1 and not (is_number(entry[0])) and - is_date(entry[0])): - entry[0] = entry[0].replace("X", "x") - counter += 1 - word_columns = [] - processed_word_columns = [] - word_column_names = [] - word_column_indices = [] - number_columns = [] - processed_number_columns = [] - number_column_names = [] - number_column_indices = [] - for i in range(max_col + 1): - if (self.is_number_column(orig_columns[i])): - number_column_indices.append(i) - number_column_names.append(column_names[i]) - temp = [] - for w in orig_columns[i]: - if (is_number(w[0])): - temp.append(w[0]) - number_columns.append(temp) - processed_number_columns.append(processed_columns[i]) - else: - word_column_indices.append(i) - word_column_names.append(column_names[i]) - word_columns.append(orig_columns[i]) - processed_word_columns.append(processed_columns[i]) - table_info = TableInfo( - word_columns, word_column_names, word_column_indices, number_columns, - number_column_names, number_column_indices, processed_word_columns, - processed_number_columns, orig_columns) - self.annotated_tables[table] = table_info - f.close() - - def answer_classification(self): - lookup_questions = 0 - number_lookup_questions = 0 - word_lookup_questions = 0 - ambiguous_lookup_questions = 0 - number_questions = 0 - bad_questions = 0 - ice_bad_questions = 0 - tot = 0 - got = 0 - ice = {} - with tf.gfile.GFile( - self.root_folder + "/arvind-with-norms-2.tsv", mode="r") as f: - lines = f.readlines() - for line in lines: - line = line.strip() - if (not (self.annotated_examples.has_key(line.split("\t")[0]))): - continue - if (len(line.split("\t")) == 4): - line = line + "\t" * (5 - len(line.split("\t"))) - if (not (is_number(line.split("\t")[2]))): - ice_bad_questions += 1 - (example_id, ans_index, ans_raw, process_answer, - matched_cells) = line.split("\t") - if (ice.has_key(example_id)): - ice[example_id].append(line.split("\t")) - else: - ice[example_id] = [line.split("\t")] - for q_id in self.annotated_examples.keys(): - tot += 1 - example = self.annotated_examples[q_id] - table_info = self.annotated_tables[example.table_key] - # Figure out if the answer is numerical or lookup - n_cols = len(table_info.orig_columns) - n_rows = len(table_info.orig_columns[0]) - example.lookup_matrix = np.zeros((n_rows, n_cols)) - exact_matches = {} - for (example_id, ans_index, ans_raw, process_answer, - matched_cells) in ice[q_id]: - for match_cell in matched_cells.split("|"): - if (len(match_cell.split(",")) == 2): - (row, col) = match_cell.split(",") - row = int(row) - col = int(col) - if (row >= 0): - exact_matches[ans_index] = 1 - answer_is_in_table = len(exact_matches) == len(example.answer) - if (answer_is_in_table): - for (example_id, ans_index, ans_raw, process_answer, - matched_cells) in ice[q_id]: - for match_cell in matched_cells.split("|"): - if (len(match_cell.split(",")) == 2): - (row, col) = match_cell.split(",") - row = int(row) - col = int(col) - example.lookup_matrix[row, col] = float(ans_index) + 1.0 - example.lookup_number_answer = 0.0 - if (answer_is_in_table): - lookup_questions += 1 - if len(example.answer) == 1 and is_number(example.answer[0]): - example.number_answer = float(example.answer[0]) - number_lookup_questions += 1 - example.is_number_lookup = True - else: - #print "word lookup" - example.calc_answer = example.number_answer = 0.0 - word_lookup_questions += 1 - example.is_word_lookup = True - else: - if (len(example.answer) == 1 and is_number(example.answer[0])): - example.number_answer = example.answer[0] - example.is_number_calc = True - else: - bad_questions += 1 - example.is_bad_example = True - example.is_unknown_answer = True - example.is_lookup = example.is_word_lookup or example.is_number_lookup - if not example.is_word_lookup and not example.is_bad_example: - number_questions += 1 - example.calc_answer = example.answer[0] - example.lookup_number_answer = example.calc_answer - # Split up the lookup matrix into word part and number part - number_column_indices = table_info.number_column_indices - word_column_indices = table_info.word_column_indices - example.word_columns = table_info.word_columns - example.number_columns = table_info.number_columns - example.word_column_names = table_info.word_column_names - example.processed_number_columns = table_info.processed_number_columns - example.processed_word_columns = table_info.processed_word_columns - example.number_column_names = table_info.number_column_names - example.number_lookup_matrix = example.lookup_matrix[:, - number_column_indices] - example.word_lookup_matrix = example.lookup_matrix[:, word_column_indices] - - def load(self): - train_data = [] - dev_data = [] - test_data = [] - self.load_annotated_data( - os.path.join(self.data_folder, "training.annotated")) - self.load_annotated_tables() - self.answer_classification() - self.train_loader.load() - self.dev_loader.load() - for i in range(self.train_loader.num_questions()): - example = self.train_loader.examples[i] - example = self.annotated_examples[example] - train_data.append(example) - for i in range(self.dev_loader.num_questions()): - example = self.dev_loader.examples[i] - dev_data.append(self.annotated_examples[example]) - - self.load_annotated_data( - os.path.join(self.data_folder, "pristine-unseen-tables.annotated")) - self.load_annotated_tables() - self.answer_classification() - self.test_loader.load() - for i in range(self.test_loader.num_questions()): - example = self.test_loader.examples[i] - test_data.append(self.annotated_examples[example]) - return train_data, dev_data, test_data diff --git a/research/next_frame_prediction/README.md b/research/next_frame_prediction/README.md deleted file mode 100644 index 9aa9b6fc5a3146a5e24ce53422d985570891d42b..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/README.md +++ /dev/null @@ -1,89 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -Visual Dynamics: Probabilistic Future Frame Synthesis via Cross Convolutional Networks. - -Introduction - -https://arxiv.org/pdf/1607.02586v1.pdf - -This is an implementation based on my understanding, with small -variations. It doesn't necessarily represents the paper published -by the original authors. - -Authors: Xin Pan, Anelia Angelova - -Results: - -![Sample1](g3doc/cross_conv.png) - -![Sample2](g3doc/cross_conv2.png) - -![Loss](g3doc/cross_conv3.png) - -Prerequisite: - -1. Install TensorFlow (r0.12), Bazel. - -2. Download the Sprites dataset or generate moving object dataset. - -Sprites data is located here: - -http://www.scottreed.info/files/nips2015-analogy-data.tar.gz - -Convert .mat files into images and use sprites_gen.py to convert them -to tf.SequenceExample. - -How to run: - -```shell -$ ls -R -.: -data next_frame_prediction WORKSPACE - -./data: -tfrecords tfrecords_test - -./next_frame_prediction: -cross_conv g3doc README.md - -./next_frame_prediction/cross_conv: -BUILD eval.py objects_gen.py model.py reader.py sprites_gen.py train.py - -./next_frame_prediction/g3doc: -cross_conv2.png cross_conv3.png cross_conv.png - - -# Build everything. -$ bazel build -c opt next_frame_prediction/... - -# The following example runs the generated 2d objects. -# For Sprites dataset, image_size should be 60, norm_scale should be 255.0. -# Batch size is normally 16~64, depending on your memory size. - -# Run training. -$ bazel-bin/next_frame_prediction/cross_conv/train \ - --batch_size=1 \ - --data_filepattern=data/tfrecords \ - --image_size=64 \ - --log_root=/tmp/predict - -step: 1, loss: 24.428671 -step: 2, loss: 19.211605 -step: 3, loss: 5.543143 -step: 4, loss: 3.035339 -step: 5, loss: 1.771392 -step: 6, loss: 2.099824 -step: 7, loss: 1.747665 -step: 8, loss: 1.572436 -step: 9, loss: 1.586816 -step: 10, loss: 1.434191 - -# Run eval. -$ bazel-bin/next_frame_prediction/cross_conv/eval \ - --batch_size=1 \ - --data_filepattern=data/tfrecords_test \ - --image_size=64 \ - --log_root=/tmp/predict -``` diff --git a/research/next_frame_prediction/cross_conv/BUILD b/research/next_frame_prediction/cross_conv/BUILD deleted file mode 100644 index b435087f34f6ffbeba016119c60724d8ac3eb180..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/cross_conv/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -package_group( - name = "internal", - packages = [ - "//next_frame_prediction/...", - ], -) - -package(default_visibility = [":internal"]) - -py_library( - name = "model", - srcs = ["model.py"], -) - -py_library( - name = "reader", - srcs = ["reader.py"], -) - -py_binary( - name = "train", - srcs = ["train.py"], - deps = [ - ":model", - ":reader", - ], -) - -py_binary( - name = "eval", - srcs = ["eval.py"], - deps = [ - ":model", - ":reader", - ], -) - -py_binary( - name = "example_gen", - srcs = ["example_gen.py"], -) - -py_binary( - name = "sprites_gen", - srcs = ["sprites_gen.py"], -) diff --git a/research/next_frame_prediction/cross_conv/eval.py b/research/next_frame_prediction/cross_conv/eval.py deleted file mode 100644 index 17ebc0e0edd2911f828cbb145ee40a06db8795b5..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/cross_conv/eval.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Eval Cross Convolutional Model.""" -import io -import os -import sys -import time - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import model as cross_conv_model -import reader - -FLAGS = tf.flags.FLAGS -tf.flags.DEFINE_string('log_root', '/tmp/moving_obj', 'The root dir of output.') -tf.flags.DEFINE_string('data_filepattern', - 'est', - 'training data file pattern.') -tf.flags.DEFINE_integer('batch_size', 1, 'Batch size.') -tf.flags.DEFINE_integer('image_size', 64, 'Image height and width.') -tf.flags.DEFINE_float('norm_scale', 1.0, 'Normalize the original image') -tf.flags.DEFINE_float('scale', 10.0, - 'Scale the image after norm_scale and move the diff ' - 'to the positive realm.') -tf.flags.DEFINE_integer('sequence_length', 2, 'tf.SequenceExample length.') -tf.flags.DEFINE_integer('eval_batch_count', 100, - 'Average the result this number of examples.') -tf.flags.DEFINE_bool('l2_loss', True, 'If true, include l2_loss.') -tf.flags.DEFINE_bool('reconstr_loss', False, 'If true, include reconstr_loss.') -tf.flags.DEFINE_bool('kl_loss', True, 'If true, include KL loss.') - -slim = tf.contrib.slim - - -def _Eval(): - params = dict() - params['batch_size'] = FLAGS.batch_size - params['seq_len'] = FLAGS.sequence_length - params['image_size'] = FLAGS.image_size - params['is_training'] = False - params['norm_scale'] = FLAGS.norm_scale - params['scale'] = FLAGS.scale - params['l2_loss'] = FLAGS.l2_loss - params['reconstr_loss'] = FLAGS.reconstr_loss - params['kl_loss'] = FLAGS.kl_loss - - eval_dir = os.path.join(FLAGS.log_root, 'eval') - - images = reader.ReadInput( - FLAGS.data_filepattern, shuffle=False, params=params) - images *= params['scale'] - # Increase the value makes training much faster. - image_diff_list = reader.SequenceToImageAndDiff(images) - model = cross_conv_model.CrossConvModel(image_diff_list, params) - model.Build() - - summary_writer = tf.summary.FileWriter(eval_dir) - saver = tf.train.Saver() - sess = tf.Session('', config=tf.ConfigProto(allow_soft_placement=True)) - tf.train.start_queue_runners(sess) - - while True: - time.sleep(60) - try: - ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) - except tf.errors.OutOfRangeError as e: - sys.stderr.write('Cannot restore checkpoint: %s\n' % e) - continue - if not (ckpt_state and ckpt_state.model_checkpoint_path): - sys.stderr.write('No model to eval yet at %s\n' % FLAGS.log_root) - continue - sys.stderr.write('Loading checkpoint %s\n' % - ckpt_state.model_checkpoint_path) - saver.restore(sess, ckpt_state.model_checkpoint_path) - # Use the empirical distribution of z from training set. - if not tf.gfile.Exists(os.path.join(FLAGS.log_root, 'z_mean.npy')): - sys.stderr.write('No z at %s\n' % FLAGS.log_root) - continue - - with tf.gfile.Open(os.path.join(FLAGS.log_root, 'z_mean.npy')) as f: - sample_z_mean = np.load(io.BytesIO(f.read())) - with tf.gfile.Open( - os.path.join(FLAGS.log_root, 'z_stddev_log.npy')) as f: - sample_z_stddev_log = np.load(io.BytesIO(f.read())) - - total_loss = 0.0 - for _ in xrange(FLAGS.eval_batch_count): - loss_val, total_steps, summaries = sess.run( - [model.loss, model.global_step, model.summary_op], - feed_dict={model.z_mean: sample_z_mean, - model.z_stddev_log: sample_z_stddev_log}) - total_loss += loss_val - - summary_writer.add_summary(summaries, total_steps) - sys.stderr.write('steps: %d, loss: %f\n' % - (total_steps, total_loss / FLAGS.eval_batch_count)) - - -def main(_): - _Eval() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/next_frame_prediction/cross_conv/example_gen.py b/research/next_frame_prediction/cross_conv/example_gen.py deleted file mode 100644 index bcda0bc405a60c3116e8c488cae92f502720fec4..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/cross_conv/example_gen.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generate examples of two objects moving in different directions.""" -import random -import sys - -import numpy as np -from six.moves import xrange -import tensorflow as tf - - -tf.flags.DEFINE_string('out_file', '', - 'Output file for the tfrecords.') - - -def _add_object(obj_type, image, image2, xpos, ypos): - """Add a moving obj to two consecutive images.""" - obj_size = random.randint(8, 10) - channel = random.randint(0, 2) - move = random.randint(6, 10) - - obj = np.zeros([obj_size, obj_size, 3]) - if obj_type == 'rectangle': - xpos2 = xpos + move - ypos2 = ypos - for i in xrange(obj_size): - obj[i, 0:i+1, channel] = [1.0 for _ in xrange(i+1)] - elif obj_type == 'square': - xpos2 = xpos - ypos2 = ypos + move - obj[:, :, channel] = 1.0 - - for x in xrange(obj_size): - for y in xrange(obj_size): - if obj[x, y, channel] == 1.0: - image[xpos+x, ypos+y, channel] = 1.0 - image2[xpos2+x, ypos2+y, channel] = 1.0 - - -def _images_to_example(image, image2): - """Convert two consecutive images to SequenceExample.""" - example = tf.SequenceExample() - feature_list = example.feature_lists.feature_list['moving_objs'] - feature = feature_list.feature.add() - feature.float_list.value.extend(np.reshape(image, [-1]).tolist()) - feature = feature_list.feature.add() - feature.float_list.value.extend(np.reshape(image2, [-1]).tolist()) - return example - - -def generate_input(): - """Generate tfrecords.""" - writer = tf.python_io.TFRecordWriter(tf.flags.FLAGS.out_file) - writer2 = tf.python_io.TFRecordWriter(tf.flags.FLAGS.out_file + '_test') - - examples = [] - for xpos in xrange(0, 40, 3): - for ypos in xrange(0, 40, 3): - for xpos2 in xrange(0, 40, 3): - for ypos2 in xrange(0, 40, 3): - image = np.zeros([64, 64, 3]) - image2 = np.zeros([64, 64, 3]) - _add_object('rectangle', image, image2, xpos, ypos) - _add_object('square', image, image2, xpos2, ypos2) - examples.append(_images_to_example(image, image2)) - - sys.stderr.write('Finish generating examples.\n') - random.shuffle(examples) - for count, ex in enumerate(examples): - if count % 10 == 0: - writer2.write(ex.SerializeToString()) - else: - writer.write(ex.SerializeToString()) - -def main(_): - generate_input() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/next_frame_prediction/cross_conv/model.py b/research/next_frame_prediction/cross_conv/model.py deleted file mode 100644 index 7b48e446e18b70fec87142f6834f33332287d02e..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/cross_conv/model.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Cross Convolutional Model. - -https://arxiv.org/pdf/1607.02586v1.pdf -""" -import math -import sys - -from six.moves import xrange -import tensorflow as tf - -slim = tf.contrib.slim - - -class CrossConvModel(object): - - def __init__(self, image_diff_list, params): - """Constructor. - - Args: - image_diff_list: A list of (image, diff) tuples, with shape - [batch_size, image_size, image_size, 3] and image_sizes as - [32, 64, 128, 256]. - params: Dict of parameters. - """ - self.images = [i for (i, _) in image_diff_list] - # Move the diff to the positive realm. - self.diffs = [(d + params['scale']) / 2 for (i, d) in image_diff_list] - self.params = params - - def Build(self): - with tf.device('/gpu:0'): - with slim.arg_scope([slim.conv2d], - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params={'is_training': - self.params['is_training']}): - self._BuildMotionKernel() - encoded_images = self._BuildImageEncoder() - cross_conved_images = self._CrossConv(encoded_images) - self._BuildImageDecoder(cross_conved_images) - self._BuildLoss() - - image = self.images[1] - diff = self.diffs[1] - - self.global_step = tf.Variable(0, name='global_step', trainable=False) - - if self.params['is_training']: - self._BuildTrainOp() - - diff = diff * 2.0 - self.params['scale'] - diff_output = self.diff_output * 2.0 - self.params['scale'] - concat_image = tf.concat( - axis=1, values=[image, image + diff_output, image + diff, diff_output]) - tf.summary.image('origin_predict_expect_predictdiff', concat_image) - self.summary_op = tf.summary.merge_all() - return self.loss - - def _BuildTrainOp(self): - lrn_rate = tf.maximum( - 0.01, # min_lr_rate. - tf.train.exponential_decay( - self.params['learning_rate'], self.global_step, 10000, 0.5)) - tf.summary.scalar('learning rate', lrn_rate) - optimizer = tf.train.GradientDescentOptimizer(lrn_rate) - self.train_op = slim.learning.create_train_op( - self.loss, optimizer, global_step=self.global_step) - - def _BuildLoss(self): - # 1. reconstr_loss seems doesn't do better than l2 loss. - # 2. Only works when using reduce_mean. reduce_sum doesn't work. - # 3. It seems kl loss doesn't play an important role. - self.loss = 0 - with tf.variable_scope('loss'): - if self.params['l2_loss']: - l2_loss = tf.reduce_mean(tf.square(self.diff_output - self.diffs[1])) - tf.summary.scalar('l2_loss', l2_loss) - self.loss += l2_loss - if self.params['reconstr_loss']: - reconstr_loss = (-tf.reduce_mean( - self.diffs[1] * (1e-10 + self.diff_output) + - (1-self.diffs[1]) * tf.log(1e-10 + 1 - self.diff_output))) - reconstr_loss = tf.check_numerics(reconstr_loss, 'reconstr_loss') - tf.summary.scalar('reconstr_loss', reconstr_loss) - self.loss += reconstr_loss - if self.params['kl_loss']: - kl_loss = (0.5 * tf.reduce_mean( - tf.square(self.z_mean) + tf.square(self.z_stddev) - - 2 * self.z_stddev_log - 1)) - tf.summary.scalar('kl_loss', kl_loss) - self.loss += kl_loss - - tf.summary.scalar('loss', self.loss) - - def _BuildMotionKernel(self): - image = self.images[-2] - diff = self.diffs[-2] - shape = image.get_shape().as_list() - assert shape[1] == shape[2] and shape[1] == 128 - batch_size = shape[0] - - net = tf.concat(axis=3, values=[image, diff]) - with tf.variable_scope('motion_encoder'): - with slim.arg_scope([slim.conv2d], padding='VALID'): - net = slim.conv2d(net, 96, [5, 5], stride=1) - net = slim.max_pool2d(net, [2, 2]) - net = slim.conv2d(net, 96, [5, 5], stride=1) - net = slim.max_pool2d(net, [2, 2]) - net = slim.conv2d(net, 128, [5, 5], stride=1) - net = slim.conv2d(net, 128, [5, 5], stride=1) - net = slim.max_pool2d(net, [2, 2]) - net = slim.conv2d(net, 256, [4, 4], stride=1) - net = slim.conv2d(net, 256, [3, 3], stride=1) - - z = tf.reshape(net, shape=[batch_size, -1]) - self.z_mean, self.z_stddev_log = tf.split( - axis=1, num_or_size_splits=2, value=z) - self.z_stddev = tf.exp(self.z_stddev_log) - - epsilon = tf.random_normal( - self.z_mean.get_shape().as_list(), 0, 1, dtype=tf.float32) - kernel = self.z_mean + tf.multiply(self.z_stddev, epsilon) - - width = int(math.sqrt(kernel.get_shape().as_list()[1] // 128)) - kernel = tf.reshape(kernel, [batch_size, width, width, 128]) - with tf.variable_scope('kernel_decoder'): - with slim.arg_scope([slim.conv2d], padding='SAME'): - kernel = slim.conv2d(kernel, 128, [5, 5], stride=1) - self.kernel = slim.conv2d(kernel, 128, [5, 5], stride=1) - - sys.stderr.write('kernel shape: %s\n' % kernel.get_shape()) - - def _BuildImageEncoder(self): - feature_maps = [] - for (i, image) in enumerate(self.images): - with tf.variable_scope('image_encoder_%d' % i): - with slim.arg_scope([slim.conv2d, slim.max_pool2d], padding='SAME'): - net = slim.conv2d(image, 64, [5, 5], stride=1) - net = slim.conv2d(net, 64, [5, 5], stride=1) - net = slim.max_pool2d(net, [5, 5]) - net = slim.conv2d(net, 64, [5, 5], stride=1) - net = slim.conv2d(net, 32, [5, 5], stride=1) - net = slim.max_pool2d(net, [2, 2]) - sys.stderr.write('image_conv shape: %s\n' % net.get_shape()) - feature_maps.append(net) - return feature_maps - - def _CrossConvHelper(self, encoded_image, kernel): - """Cross Convolution. - - The encoded image and kernel are of the same shape. Namely - [batch_size, image_size, image_size, channels]. They are split - into [image_size, image_size] image squares [kernel_size, kernel_size] - kernel squares. kernel squares are used to convolute image squares. - """ - images = tf.expand_dims(encoded_image, 0) - kernels = tf.expand_dims(kernel, 3) - return tf.nn.depthwise_conv2d(images, kernels, [1, 1, 1, 1], 'SAME') - - def _CrossConv(self, encoded_images): - """Apply the motion kernel on the encoded_images.""" - cross_conved_images = [] - kernels = tf.split(axis=3, num_or_size_splits=4, value=self.kernel) - for (i, encoded_image) in enumerate(encoded_images): - with tf.variable_scope('cross_conv_%d' % i): - kernel = kernels[i] - - encoded_image = tf.unstack(encoded_image, axis=0) - kernel = tf.unstack(kernel, axis=0) - assert len(encoded_image) == len(kernel) - assert len(encoded_image) == self.params['batch_size'] - conved_image = [] - for j in xrange(len(encoded_image)): - conved_image.append(self._CrossConvHelper( - encoded_image[j], kernel[j])) - cross_conved_images.append(tf.concat(axis=0, values=conved_image)) - sys.stderr.write('cross_conved shape: %s\n' % - cross_conved_images[-1].get_shape()) - return cross_conved_images - - def _Deconv(self, net, out_filters, kernel_size, stride): - shape = net.get_shape().as_list() - in_filters = shape[3] - kernel_shape = [kernel_size, kernel_size, out_filters, in_filters] - - weights = tf.get_variable( - name='weights', - shape=kernel_shape, - dtype=tf.float32, - initializer=tf.truncated_normal_initializer(stddev=0.01)) - - - out_height = shape[1] * stride - out_width = shape[2] * stride - batch_size = shape[0] - - output_shape = [batch_size, out_height, out_width, out_filters] - net = tf.nn.conv2d_transpose(net, weights, output_shape, - [1, stride, stride, 1], padding='SAME') - slim.batch_norm(net) - return net - - def _BuildImageDecoder(self, cross_conved_images): - """Decode the cross_conved feature maps into the predicted images.""" - nets = [] - for i, cross_conved_image in enumerate(cross_conved_images): - with tf.variable_scope('image_decoder_%d' % i): - stride = 64 / cross_conved_image.get_shape().as_list()[1] - # TODO(xpan): Alternative solution for upsampling? - nets.append(self._Deconv( - cross_conved_image, 64, kernel_size=3, stride=stride)) - - net = tf.concat(axis=3, values=nets) - net = slim.conv2d(net, 128, [9, 9], padding='SAME', stride=1) - net = slim.conv2d(net, 128, [1, 1], padding='SAME', stride=1) - net = slim.conv2d(net, 3, [1, 1], padding='SAME', stride=1) - self.diff_output = net - sys.stderr.write('diff_output shape: %s\n' % self.diff_output.get_shape()) diff --git a/research/next_frame_prediction/cross_conv/reader.py b/research/next_frame_prediction/cross_conv/reader.py deleted file mode 100644 index ab4ab698dda938f182be0019168aa132c1e3c5af..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/cross_conv/reader.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Read image sequence.""" - -from six.moves import xrange -import tensorflow as tf - - -def SequenceToImageAndDiff(images): - """Convert image sequence batch into image and diff batch. - - Each image pair is converted to the first image and their diff. - Batch size will increase if sequence length is larger than 2. - - Args: - images: Image sequence with shape - [batch_size, seq_len, image_size, image_size, channel] - - Returns: - the list of (image, diff) tuples with shape - [batch_size2, image_size, image_size, channel]. image_sizes are - [32, 64, 128, 256]. - """ - image_diff_list = [] - image_seq = tf.unstack(images, axis=1) - for size in [32, 64, 128, 256]: - resized_images = [ - tf.image.resize_images(i, [size, size]) for i in image_seq] - diffs = [] - for i in xrange(0, len(resized_images)-1): - diffs.append(resized_images[i+1] - resized_images[i]) - image_diff_list.append( - (tf.concat(axis=0, values=resized_images[:-1]), tf.concat(axis=0, values=diffs))) - return image_diff_list - - -def ReadInput(data_filepattern, shuffle, params): - """Read the tf.SequenceExample tfrecord files. - - Args: - data_filepattern: tf.SequenceExample tfrecord filepattern. - shuffle: Whether to shuffle the examples. - params: parameter dict. - - Returns: - image sequence batch [batch_size, seq_len, image_size, image_size, channel]. - """ - image_size = params['image_size'] - filenames = tf.gfile.Glob(data_filepattern) - filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle) - reader = tf.TFRecordReader() - _, example = reader.read(filename_queue) - feature_sepc = { - 'moving_objs': tf.FixedLenSequenceFeature( - shape=[image_size * image_size * 3], dtype=tf.float32)} - _, features = tf.parse_single_sequence_example( - example, sequence_features=feature_sepc) - moving_objs = tf.reshape( - features['moving_objs'], [params['seq_len'], image_size, image_size, 3]) - if shuffle: - examples = tf.train.shuffle_batch( - [moving_objs], - batch_size=params['batch_size'], - num_threads=64, - capacity=params['batch_size'] * 100, - min_after_dequeue=params['batch_size'] * 4) - else: - examples = tf.train.batch([moving_objs], - batch_size=params['batch_size'], - num_threads=16, - capacity=params['batch_size']) - examples /= params['norm_scale'] - return examples diff --git a/research/next_frame_prediction/cross_conv/sprites_gen.py b/research/next_frame_prediction/cross_conv/sprites_gen.py deleted file mode 100644 index 0d36c255cd93a90797272d7a80389f16fc6f3702..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/cross_conv/sprites_gen.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generate the sprites tfrecords from raw_images.""" -import os -import random -import re -import sys - -import numpy as np -import scipy.misc -from six.moves import xrange -import tensorflow as tf - - -tf.flags.DEFINE_string('data_filepattern', '', 'The raw images.') -tf.flags.DEFINE_string('out_file', '', - 'File name for the tfrecord output.') - - -def _read_images(): - """Read images from image files into data structure.""" - sprites = dict() - files = tf.gfile.Glob(tf.flags.FLAGS.data_filepattern) - for f in files: - image = scipy.misc.imread(f) - m = re.search('image_([0-9]+)_([0-9]+)_([0-9]+).jpg', os.path.basename(f)) - if m.group(1) not in sprites: - sprites[m.group(1)] = dict() - character = sprites[m.group(1)] - if m.group(2) not in character: - character[m.group(2)] = dict() - pose = character[m.group(2)] - pose[int(m.group(3))] = image - return sprites - - -def _images_to_example(image, image2): - """Convert 2 consecutive image to a SequenceExample.""" - example = tf.SequenceExample() - feature_list = example.feature_lists.feature_list['moving_objs'] - feature = feature_list.feature.add() - feature.float_list.value.extend(np.reshape(image, [-1]).tolist()) - feature = feature_list.feature.add() - feature.float_list.value.extend(np.reshape(image2, [-1]).tolist()) - return example - - -def generate_input(): - """Generate tfrecords.""" - sprites = _read_images() - sys.stderr.write('Finish reading images.\n') - train_writer = tf.python_io.TFRecordWriter( - tf.flags.FLAGS.out_file.replace('sprites', 'sprites_train')) - test_writer = tf.python_io.TFRecordWriter( - tf.flags.FLAGS.out_file.replace('sprites', 'sprites_test')) - - train_examples = [] - test_examples = [] - for i in sprites: - if int(i) < 24: - examples = test_examples - else: - examples = train_examples - - character = sprites[i] - for j in character.keys(): - pose = character[j] - for k in xrange(1, len(pose), 1): - image = pose[k] - image2 = pose[k+1] - examples.append(_images_to_example(image, image2)) - - sys.stderr.write('Finish generating examples: %d, %d.\n' % - (len(train_examples), len(test_examples))) - random.shuffle(train_examples) - _ = [train_writer.write(ex.SerializeToString()) for ex in train_examples] - _ = [test_writer.write(ex.SerializeToString()) for ex in test_examples] - - -def main(_): - generate_input() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/next_frame_prediction/cross_conv/train.py b/research/next_frame_prediction/cross_conv/train.py deleted file mode 100644 index 5b9973f52cc3946b3396c1e0b87fda19901735f6..0000000000000000000000000000000000000000 --- a/research/next_frame_prediction/cross_conv/train.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Train the cross convolutional model.""" -import os -import sys - -import numpy as np -import tensorflow as tf - -import model as cross_conv_model -import reader - -FLAGS = tf.flags.FLAGS -tf.flags.DEFINE_string('master', '', 'Session address.') -tf.flags.DEFINE_string('log_root', '/tmp/moving_obj', 'The root dir of output.') -tf.flags.DEFINE_string('data_filepattern', '', - 'training data file pattern.') -tf.flags.DEFINE_integer('image_size', 64, 'Image height and width.') -tf.flags.DEFINE_integer('batch_size', 1, 'Batch size.') -tf.flags.DEFINE_float('norm_scale', 1.0, 'Normalize the original image') -tf.flags.DEFINE_float('scale', 10.0, - 'Scale the image after norm_scale and move the diff ' - 'to the positive realm.') -tf.flags.DEFINE_integer('sequence_length', 2, 'tf.SequenceExample length.') -tf.flags.DEFINE_float('learning_rate', 0.8, 'Learning rate.') -tf.flags.DEFINE_bool('l2_loss', True, 'If true, include l2_loss.') -tf.flags.DEFINE_bool('reconstr_loss', False, 'If true, include reconstr_loss.') -tf.flags.DEFINE_bool('kl_loss', True, 'If true, include KL loss.') - -slim = tf.contrib.slim - - -def _Train(): - params = dict() - params['batch_size'] = FLAGS.batch_size - params['seq_len'] = FLAGS.sequence_length - params['image_size'] = FLAGS.image_size - params['is_training'] = True - params['norm_scale'] = FLAGS.norm_scale - params['scale'] = FLAGS.scale - params['learning_rate'] = FLAGS.learning_rate - params['l2_loss'] = FLAGS.l2_loss - params['reconstr_loss'] = FLAGS.reconstr_loss - params['kl_loss'] = FLAGS.kl_loss - - train_dir = os.path.join(FLAGS.log_root, 'train') - - images = reader.ReadInput(FLAGS.data_filepattern, shuffle=True, params=params) - images *= params['scale'] - # Increase the value makes training much faster. - image_diff_list = reader.SequenceToImageAndDiff(images) - model = cross_conv_model.CrossConvModel(image_diff_list, params) - model.Build() - tf.contrib.tfprof.model_analyzer.print_model_analysis(tf.get_default_graph()) - - summary_writer = tf.summary.FileWriter(train_dir) - sv = tf.train.Supervisor(logdir=FLAGS.log_root, - summary_op=None, - is_chief=True, - save_model_secs=60, - global_step=model.global_step) - sess = sv.prepare_or_wait_for_session( - FLAGS.master, config=tf.ConfigProto(allow_soft_placement=True)) - - total_loss = 0.0 - step = 0 - sample_z_mean = np.zeros(model.z_mean.get_shape().as_list()) - sample_z_stddev_log = np.zeros(model.z_stddev_log.get_shape().as_list()) - sample_step = 0 - - while True: - _, loss_val, total_steps, summaries, z_mean, z_stddev_log = sess.run( - [model.train_op, model.loss, model.global_step, - model.summary_op, - model.z_mean, model.z_stddev_log]) - - sample_z_mean += z_mean - sample_z_stddev_log += z_stddev_log - total_loss += loss_val - step += 1 - sample_step += 1 - - if step % 100 == 0: - summary_writer.add_summary(summaries, total_steps) - sys.stderr.write('step: %d, loss: %f\n' % - (total_steps, total_loss / step)) - total_loss = 0.0 - step = 0 - - # Sampled z is used for eval. - # It seems 10k is better than 1k. Maybe try 100k next? - if sample_step % 10000 == 0: - with tf.gfile.Open(os.path.join(FLAGS.log_root, 'z_mean.npy'), 'w') as f: - np.save(f, sample_z_mean / sample_step) - with tf.gfile.Open( - os.path.join(FLAGS.log_root, 'z_stddev_log.npy'), 'w') as f: - np.save(f, sample_z_stddev_log / sample_step) - sample_z_mean = np.zeros(model.z_mean.get_shape().as_list()) - sample_z_stddev_log = np.zeros( - model.z_stddev_log.get_shape().as_list()) - sample_step = 0 - - -def main(_): - _Train() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/next_frame_prediction/g3doc/cross_conv.png b/research/next_frame_prediction/g3doc/cross_conv.png deleted file mode 100644 index 13915f944188adf0b0a3dc85219fce7bcb5e7de9..0000000000000000000000000000000000000000 Binary files a/research/next_frame_prediction/g3doc/cross_conv.png and /dev/null differ diff --git a/research/next_frame_prediction/g3doc/cross_conv2.png b/research/next_frame_prediction/g3doc/cross_conv2.png deleted file mode 100644 index c4b5e8e9d6169a1a908858a91fdc6467ae03ea2a..0000000000000000000000000000000000000000 Binary files a/research/next_frame_prediction/g3doc/cross_conv2.png and /dev/null differ diff --git a/research/next_frame_prediction/g3doc/cross_conv3.png b/research/next_frame_prediction/g3doc/cross_conv3.png deleted file mode 100644 index 054d7d1edf2043c50a3ea8d332cc83a8dcb32c9b..0000000000000000000000000000000000000000 Binary files a/research/next_frame_prediction/g3doc/cross_conv3.png and /dev/null differ diff --git a/research/object_detection/README.md b/research/object_detection/README.md index 9e9bf549861c2ae5aff7c386ce2efe6ca2bb1ff9..5a79bcc456884fe63a55d30eb6926a39f3d94d73 100644 --- a/research/object_detection/README.md +++ b/research/object_detection/README.md @@ -54,19 +54,55 @@ Note: The models we provide in [TF2 Zoo](g3doc/tf2_detection_zoo.md) and [TF1 Zoo](g3doc/tf1_detection_zoo.md) are specific to the TensorFlow major version and are not interoperable. -Please select one of the two links below for TensorFlow version specific +Please select one of the links below for TensorFlow version-specific documentation of the Object Detection API: +### Tensorflow 2.x + * + Object Detection API TensorFlow 2
+ * + TensorFlow 2 Model Zoo
+ +### Tensorflow 1.x + * + Object Detection API TensorFlow 1
+ * + TensorFlow 1 Model Zoo
+ +## Whats New -| [![Object Detection API TensorFlow 2](https://img.shields.io/badge/Object%20Detection%20API-TensorFlow%202-orange)](g3doc/tf2.md) | [![TensorFlow 2 Model Zoo](https://img.shields.io/badge/Model%20Zoo-TensorFlow%202-Orange)](g3doc/tf2_detection_zoo.md) | -|---|---| -| [![Object Detection API TensorFlow 1](https://img.shields.io/badge/Object%20Detection%20API-TensorFlow%201-orange)](g3doc/tf1.md) | [![TensorFlow 1 Model Zoo](https://img.shields.io/badge/Model%20Zoo-TensorFlow%201-Orange)](g3doc/tf1_detection_zoo.md) | +### DeepMAC architecture - +We have released our new architecture, **DeepMAC**, desgined for partially +supervised instance segmentation. DeepMAC stands for Deep Mask-heads +Above CenterNet, and is based on our CenterNet implementation. In our +[paper](https://arxiv.org/abs/2104.00613) we show that DeepMAC achieves +state-of-the-art results for the partially supervised instance segmentation +task without using any specialty modules or losses; just better mask-head +architectures. The findings from our paper are not specific to CenterNet and +can also be applied to Mask R-CNN or without any detector at all. +Please see links below for more details -## Whats New +* [DeepMAC documentation](g3doc/deepmac.md). +* [Mask RCNN code](https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/deepmac_maskrcnn) + in TF Model garden code base. +* [DeepMAC Colab](./colab_tutorials/deepmac_colab.ipynb) that lets you run a + pre-trained DeepMAC model on user-specified boxes. Note that you are not + restricted to COCO classes! +* Project website - [git.io/deepmac](https://git.io/deepmac) + +Thanks to contributors: Vighnesh Birodkar, Zhichao Lu, Siyang Li, + Vivek Rathod, Jonathan Huang + + +### Mobile Inference for TF2 models + +TF2 OD API models can now be converted to TensorFlow Lite! Only SSD models +currently supported. See documentation. + +**Thanks to contributors**: Sachin Joglekar ### TensorFlow 2 Support @@ -104,6 +140,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi, Yixin Shi, Yu-hui Chen, Zhichao Lu. +### MobileDet GPU + +We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP +higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson +Xavier at comparable latency (3.2ms vs 3.3ms). + +Along with the model definition, we are also releasing model checkpoints trained +on the COCO dataset. + +Thanks to contributors: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An +(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA). + ### Context R-CNN We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that diff --git a/research/object_detection/builders/box_predictor_builder.py b/research/object_detection/builders/box_predictor_builder.py index 029649d8d9dd68877adac6bb971d5fd024f62246..d0f994c8eca1313e34c3c5133c56abe8be14f784 100644 --- a/research/object_detection/builders/box_predictor_builder.py +++ b/research/object_detection/builders/box_predictor_builder.py @@ -329,6 +329,8 @@ def build_weight_shared_convolutional_keras_box_predictor( share_prediction_tower=False, apply_batch_norm=True, use_depthwise=False, + apply_conv_hyperparams_to_heads=False, + apply_conv_hyperparams_pointwise=False, score_converter_fn=tf.identity, box_encodings_clip_range=None, name='WeightSharedConvolutionalBoxPredictor', @@ -369,6 +371,14 @@ def build_weight_shared_convolutional_keras_box_predictor( apply_batch_norm: Whether to apply batch normalization to conv layers in this predictor. use_depthwise: Whether to use depthwise separable conv2d instead of conv2d. + apply_conv_hyperparams_to_heads: Whether to apply conv_hyperparams to + depthwise seperable convolution layers in the box and class heads. By + default, the conv_hyperparams are only applied to layers in the predictor + tower when using depthwise separable convolutions. + apply_conv_hyperparams_pointwise: Whether to apply the conv_hyperparams to + the pointwise_initializer and pointwise_regularizer when using depthwise + separable convolutions. By default, conv_hyperparams are only applied to + the depthwise initializer and regularizer when use_depthwise is true. score_converter_fn: Callable score converter to perform elementwise op on class scores. box_encodings_clip_range: Min and max values for clipping the box_encodings. @@ -391,6 +401,7 @@ def build_weight_shared_convolutional_keras_box_predictor( conv_hyperparams=conv_hyperparams, num_predictions_per_location=num_predictions_per_location, use_depthwise=use_depthwise, + apply_conv_hyperparams_to_heads=apply_conv_hyperparams_to_heads, box_encodings_clip_range=box_encodings_clip_range, name='WeightSharedConvolutionalBoxHead') class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead( @@ -403,6 +414,7 @@ def build_weight_shared_convolutional_keras_box_predictor( num_predictions_per_location=num_predictions_per_location, class_prediction_bias_init=class_prediction_bias_init, use_depthwise=use_depthwise, + apply_conv_hyperparams_to_heads=apply_conv_hyperparams_to_heads, score_converter_fn=score_converter_fn, name='WeightSharedConvolutionalClassHead') other_heads = {} @@ -423,6 +435,7 @@ def build_weight_shared_convolutional_keras_box_predictor( apply_batch_norm=apply_batch_norm, share_prediction_tower=share_prediction_tower, use_depthwise=use_depthwise, + apply_conv_hyperparams_pointwise=apply_conv_hyperparams_pointwise, name=name)) @@ -920,6 +933,10 @@ def build_keras(hyperparams_fn, freeze_batchnorm, inplace_batchnorm_update, share_prediction_tower=config_box_predictor.share_prediction_tower, apply_batch_norm=apply_batch_norm, use_depthwise=config_box_predictor.use_depthwise, + apply_conv_hyperparams_to_heads=( + config_box_predictor.apply_conv_hyperparams_to_heads), + apply_conv_hyperparams_pointwise=( + config_box_predictor.apply_conv_hyperparams_pointwise), score_converter_fn=score_converter_fn, box_encodings_clip_range=box_encodings_clip_range, keyword_args=keyword_args) diff --git a/research/object_detection/builders/dataset_builder.py b/research/object_detection/builders/dataset_builder.py index c1c1ce3ecd17c2625585cd83f080b49c0150151a..2a47c628645145be0b11425d5d7ec97f59b80544 100644 --- a/research/object_detection/builders/dataset_builder.py +++ b/research/object_detection/builders/dataset_builder.py @@ -27,6 +27,7 @@ from __future__ import division from __future__ import print_function import functools +import math import tensorflow.compat.v1 as tf from object_detection.builders import decoder_builder @@ -50,20 +51,24 @@ def make_initializable_iterator(dataset): return iterator -def read_dataset(file_read_func, input_files, config, - filename_shard_fn=None): +def _read_dataset_internal(file_read_func, + input_files, + num_readers, + config, + filename_shard_fn=None): """Reads a dataset, and handles repetition and shuffling. Args: - file_read_func: Function to use in tf_data.parallel_interleave, to - read every individual file into a tf.data.Dataset. + file_read_func: Function to use in tf_data.parallel_interleave, to read + every individual file into a tf.data.Dataset. input_files: A list of file paths to read. + num_readers: Number of readers to use. config: A input_reader_builder.InputReader object. - filename_shard_fn: optional, A funciton used to shard filenames across - replicas. This function takes as input a TF dataset of filenames and - is expected to return its sharded version. It is useful when the - dataset is being loaded on one of possibly many replicas and we want - to evenly shard the files between the replicas. + filename_shard_fn: optional, A function used to shard filenames across + replicas. This function takes as input a TF dataset of filenames and is + expected to return its sharded version. It is useful when the dataset is + being loaded on one of possibly many replicas and we want to evenly shard + the files between the replicas. Returns: A tf.data.Dataset of (undecoded) tf-records based on config. @@ -71,12 +76,12 @@ def read_dataset(file_read_func, input_files, config, Raises: RuntimeError: If no files are found at the supplied path(s). """ - # Shard, shuffle, and read files. filenames = tf.gfile.Glob(input_files) + tf.logging.info('Reading record datasets for input file: %s' % input_files) + tf.logging.info('Number of filenames to read: %s' % len(filenames)) if not filenames: raise RuntimeError('Did not find any input files matching the glob pattern ' '{}'.format(input_files)) - num_readers = config.num_readers if num_readers > len(filenames): num_readers = len(filenames) tf.logging.warning('num_readers has been reduced to %d to match input file ' @@ -103,6 +108,63 @@ def read_dataset(file_read_func, input_files, config, return records_dataset +def read_dataset(file_read_func, input_files, config, filename_shard_fn=None): + """Reads multiple datasets with sampling. + + Args: + file_read_func: Function to use in tf_data.parallel_interleave, to read + every individual file into a tf.data.Dataset. + input_files: A list of file paths to read. + config: A input_reader_builder.InputReader object. + filename_shard_fn: optional, A function used to shard filenames across + replicas. This function takes as input a TF dataset of filenames and is + expected to return its sharded version. It is useful when the dataset is + being loaded on one of possibly many replicas and we want to evenly shard + the files between the replicas. + + Returns: + A tf.data.Dataset of (undecoded) tf-records based on config. + + Raises: + RuntimeError: If no files are found at the supplied path(s). + """ + if config.sample_from_datasets_weights: + tf.logging.info('Reading weighted datasets: %s' % input_files) + if len(input_files) != len(config.sample_from_datasets_weights): + raise ValueError('Expected the number of input files to be the same as ' + 'the number of dataset sample weights. But got ' + '[input_files, sample_from_datasets_weights]: [' + + input_files + ', ' + + str(config.sample_from_datasets_weights) + ']') + tf.logging.info('Sampling from datasets %s with weights %s' % + (input_files, config.sample_from_datasets_weights)) + records_datasets = [] + dataset_weights = [] + for i, input_file in enumerate(input_files): + weight = config.sample_from_datasets_weights[i] + num_readers = math.ceil(config.num_readers * + weight / + sum(config.sample_from_datasets_weights)) + tf.logging.info( + 'Num readers for dataset [%s]: %d', input_file, num_readers) + if num_readers == 0: + tf.logging.info('Skipping dataset due to zero weights: %s', input_file) + continue + tf.logging.info( + 'Num readers for dataset [%s]: %d', input_file, num_readers) + records_dataset = _read_dataset_internal(file_read_func, [input_file], + num_readers, config, + filename_shard_fn) + dataset_weights.append(weight) + records_datasets.append(records_dataset) + return tf.data.experimental.sample_from_datasets(records_datasets, + dataset_weights) + else: + tf.logging.info('Reading unweighted datasets: %s' % input_files) + return _read_dataset_internal(file_read_func, input_files, + config.num_readers, config, filename_shard_fn) + + def shard_function_for_context(input_context): """Returns a function that shards filenames based on the input context.""" @@ -195,7 +257,8 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None, dataset = dataset_map_fn(dataset, transform_input_data_fn, batch_size, input_reader_config) if batch_size: - dataset = dataset.batch(batch_size, drop_remainder=True) + dataset = dataset.batch(batch_size, + drop_remainder=input_reader_config.drop_remainder) dataset = dataset.prefetch(input_reader_config.num_prefetch_batches) return dataset diff --git a/research/object_detection/builders/dataset_builder_test.py b/research/object_detection/builders/dataset_builder_test.py index eb2cdb3ccbd891e5f089281d9b506d636d26d6a9..c47218a008d2ae192a78614be8063184f35e1629 100644 --- a/research/object_detection/builders/dataset_builder_test.py +++ b/research/object_detection/builders/dataset_builder_test.py @@ -532,6 +532,9 @@ class ReadDatasetTest(test_case.TestCase): return get_iterator_next_for_testing(dataset, self.is_tf2()) + def _assert_item_count(self, data, item, percentage): + self.assertAlmostEqual(data.count(item)/len(data), percentage, places=1) + def test_make_initializable_iterator_with_hashTable(self): def graph_fn(): @@ -554,6 +557,88 @@ class ReadDatasetTest(test_case.TestCase): result = self.execute(graph_fn, []) self.assertAllEqual(result, [-1, 100, 1, 100]) + def test_read_dataset_sample_from_datasets_weights_equal_weight(self): + """Ensure that the files' values are equally-weighted.""" + config = input_reader_pb2.InputReader() + config.num_readers = 2 + config.shuffle = False + config.sample_from_datasets_weights.extend([0.5, 0.5]) + + def graph_fn(): + return self._get_dataset_next( + [self._path_template % '0', self._path_template % '1'], + config, + batch_size=1000) + + data = list(self.execute(graph_fn, [])) + self.assertEqual(len(data), 1000) + self._assert_item_count(data, 1, 0.25) + self._assert_item_count(data, 10, 0.25) + self._assert_item_count(data, 2, 0.25) + self._assert_item_count(data, 20, 0.25) + + def test_read_dataset_sample_from_datasets_weights_non_normalized(self): + """Ensure that the values are equally-weighted when not normalized.""" + config = input_reader_pb2.InputReader() + config.num_readers = 2 + config.shuffle = False + # Values are not normalized to sum to 1. In this case, it's a 50/50 split + # with each dataset having weight of 1. + config.sample_from_datasets_weights.extend([1, 1]) + + def graph_fn(): + return self._get_dataset_next( + [self._path_template % '0', self._path_template % '1'], + config, + batch_size=1000) + + data = list(self.execute(graph_fn, [])) + self.assertEqual(len(data), 1000) + self._assert_item_count(data, 1, 0.25) + self._assert_item_count(data, 10, 0.25) + self._assert_item_count(data, 2, 0.25) + self._assert_item_count(data, 20, 0.25) + + def test_read_dataset_sample_from_datasets_weights_zero_weight(self): + """Ensure that the files' values are equally-weighted.""" + config = input_reader_pb2.InputReader() + config.num_readers = 2 + config.shuffle = False + config.sample_from_datasets_weights.extend([1.0, 0.0]) + + def graph_fn(): + return self._get_dataset_next( + [self._path_template % '0', self._path_template % '1'], + config, + batch_size=1000) + + data = list(self.execute(graph_fn, [])) + self.assertEqual(len(data), 1000) + self._assert_item_count(data, 1, 0.5) + self._assert_item_count(data, 10, 0.5) + self._assert_item_count(data, 2, 0.0) + self._assert_item_count(data, 20, 0.0) + + def test_read_dataset_sample_from_datasets_weights_unbalanced(self): + """Ensure that the files' values are equally-weighted.""" + config = input_reader_pb2.InputReader() + config.num_readers = 2 + config.shuffle = False + config.sample_from_datasets_weights.extend([0.1, 0.9]) + + def graph_fn(): + return self._get_dataset_next( + [self._path_template % '0', self._path_template % '1'], + config, + batch_size=1000) + + data = list(self.execute(graph_fn, [])) + self.assertEqual(len(data), 1000) + self._assert_item_count(data, 1, 0.05) + self._assert_item_count(data, 10, 0.05) + self._assert_item_count(data, 2, 0.45) + self._assert_item_count(data, 20, 0.45) + def test_read_dataset(self): config = input_reader_pb2.InputReader() config.num_readers = 1 diff --git a/research/object_detection/builders/decoder_builder.py b/research/object_detection/builders/decoder_builder.py index c0895051ac1a5dcdd8bd1528f13a7d5f909b306f..43986c35e248160b9c5741d4f8460b2f41655276 100644 --- a/research/object_detection/builders/decoder_builder.py +++ b/research/object_detection/builders/decoder_builder.py @@ -59,12 +59,16 @@ def build(input_reader_config): num_additional_channels=input_reader_config.num_additional_channels, num_keypoints=input_reader_config.num_keypoints, expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy, - load_dense_pose=input_reader_config.load_dense_pose) + load_dense_pose=input_reader_config.load_dense_pose, + load_track_id=input_reader_config.load_track_id, + load_keypoint_depth_features=input_reader_config + .load_keypoint_depth_features) return decoder elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'): decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder( label_map_proto_file=label_map_proto_file, - load_context_features=input_reader_config.load_context_features) + load_context_features=input_reader_config.load_context_features, + load_context_image_ids=input_reader_config.load_context_image_ids) return decoder raise ValueError('Unsupported input_type in config.') diff --git a/research/object_detection/builders/decoder_builder_test.py b/research/object_detection/builders/decoder_builder_test.py index d45285fd19f7648ab4d9365b155ba35a2ce0d3ed..6b13a3b3e70a5ff539a02b3d7c0aa182a61e090c 100644 --- a/research/object_detection/builders/decoder_builder_test.py +++ b/research/object_detection/builders/decoder_builder_test.py @@ -65,6 +65,8 @@ class DecoderBuilderTest(test_case.TestCase): 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]), 'image/object/class/label': dataset_util.int64_list_feature([2]), 'image/object/mask': dataset_util.float_list_feature(flat_mask), + 'image/object/keypoint/x': dataset_util.float_list_feature([1.0, 1.0]), + 'image/object/keypoint/y': dataset_util.float_list_feature([1.0, 1.0]) } if has_additional_channels: additional_channels_key = 'image/additional_channels/encoded' @@ -188,6 +190,28 @@ class DecoderBuilderTest(test_case.TestCase): masks = self.execute_cpu(graph_fn, []) self.assertAllEqual((1, 4, 5), masks.shape) + def test_build_tf_record_input_reader_and_load_keypoint_depth(self): + input_reader_text_proto = """ + load_keypoint_depth_features: true + num_keypoints: 2 + tf_record_input_reader {} + """ + input_reader_proto = input_reader_pb2.InputReader() + text_format.Parse(input_reader_text_proto, input_reader_proto) + + decoder = decoder_builder.build(input_reader_proto) + serialized_example = self._make_serialized_tf_example() + + def graph_fn(): + tensor_dict = decoder.decode(serialized_example) + return (tensor_dict[fields.InputDataFields.groundtruth_keypoint_depths], + tensor_dict[ + fields.InputDataFields.groundtruth_keypoint_depth_weights]) + + (kpts_depths, kpts_depth_weights) = self.execute_cpu(graph_fn, []) + self.assertAllEqual((1, 2), kpts_depths.shape) + self.assertAllEqual((1, 2), kpts_depth_weights.shape) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/builders/hyperparams_builder.py b/research/object_detection/builders/hyperparams_builder.py index 90aef43ac1bd92fb86dbd730cdb0420858572c18..9fdf4450abd919c9d09aefbf22ff1a24a4726078 100644 --- a/research/object_detection/builders/hyperparams_builder.py +++ b/research/object_detection/builders/hyperparams_builder.py @@ -20,7 +20,11 @@ import tf_slim as slim from object_detection.core import freezable_batch_norm from object_detection.protos import hyperparams_pb2 from object_detection.utils import context_manager +from object_detection.utils import tf_version +# pylint: disable=g-import-not-at-top +if tf_version.is_tf2(): + from object_detection.core import freezable_sync_batch_norm # pylint: enable=g-import-not-at-top @@ -60,9 +64,14 @@ class KerasLayerHyperparams(object): 'hyperparams_pb.Hyperparams.') self._batch_norm_params = None + self._use_sync_batch_norm = False if hyperparams_config.HasField('batch_norm'): self._batch_norm_params = _build_keras_batch_norm_params( hyperparams_config.batch_norm) + elif hyperparams_config.HasField('sync_batch_norm'): + self._use_sync_batch_norm = True + self._batch_norm_params = _build_keras_batch_norm_params( + hyperparams_config.sync_batch_norm) self._force_use_bias = hyperparams_config.force_use_bias self._activation_fn = _build_activation_fn(hyperparams_config.activation) @@ -81,6 +90,9 @@ class KerasLayerHyperparams(object): def use_batch_norm(self): return self._batch_norm_params is not None + def use_sync_batch_norm(self): + return self._use_sync_batch_norm + def force_use_bias(self): return self._force_use_bias @@ -133,10 +145,12 @@ class KerasLayerHyperparams(object): is False) """ if self.use_batch_norm(): - return freezable_batch_norm.FreezableBatchNorm( - training=training, - **self.batch_norm_params(**overrides) - ) + if self._use_sync_batch_norm: + return freezable_sync_batch_norm.FreezableSyncBatchNorm( + training=training, **self.batch_norm_params(**overrides)) + else: + return freezable_batch_norm.FreezableBatchNorm( + training=training, **self.batch_norm_params(**overrides)) else: return tf.keras.layers.Lambda(tf.identity) @@ -154,6 +168,20 @@ class KerasLayerHyperparams(object): else: return tf.keras.layers.Lambda(tf.identity, name=name) + def get_regularizer_weight(self): + """Returns the l1 or l2 regularizer weight. + + Returns: A float value corresponding to the l1 or l2 regularization weight, + or None if neither l1 or l2 regularization is defined. + """ + regularizer = self._op_params['kernel_regularizer'] + if hasattr(regularizer, 'l1'): + return float(regularizer.l1) + elif hasattr(regularizer, 'l2'): + return float(regularizer.l2) + else: + return None + def params(self, include_activation=False, **overrides): """Returns a dict containing the layer construction hyperparameters to use. @@ -219,6 +247,10 @@ def build(hyperparams_config, is_training): raise ValueError('Hyperparams force_use_bias only supported by ' 'KerasLayerHyperparams.') + if hyperparams_config.HasField('sync_batch_norm'): + raise ValueError('Hyperparams sync_batch_norm only supported by ' + 'KerasLayerHyperparams.') + normalizer_fn = None batch_norm_params = None if hyperparams_config.HasField('batch_norm'): @@ -327,7 +359,7 @@ def _build_initializer(initializer, build_for_keras=False): operators. If false builds for Slim. Returns: - tf initializer. + tf initializer or string corresponding to the tf keras initializer name. Raises: ValueError: On unknown initializer. @@ -383,6 +415,13 @@ def _build_initializer(initializer, build_for_keras=False): factor=initializer.variance_scaling_initializer.factor, mode=mode, uniform=initializer.variance_scaling_initializer.uniform) + if initializer_oneof == 'keras_initializer_by_name': + if build_for_keras: + return initializer.keras_initializer_by_name + else: + raise ValueError( + 'Unsupported non-Keras usage of keras_initializer_by_name: {}'.format( + initializer.keras_initializer_by_name)) if initializer_oneof is None: return None raise ValueError('Unknown initializer function: {}'.format( diff --git a/research/object_detection/builders/hyperparams_builder_test.py b/research/object_detection/builders/hyperparams_builder_test.py index e48ac23bcb547c9729038b901a9612d3712d69cb..d21f52dc5fab256cd86a0f0cd379f3fb6702a38c 100644 --- a/research/object_detection/builders/hyperparams_builder_test.py +++ b/research/object_detection/builders/hyperparams_builder_test.py @@ -558,7 +558,7 @@ class KerasHyperparamsBuilderTest(tf.test.TestCase): result = regularizer(tf.constant(weights)).numpy() self.assertAllClose(np.abs(weights).sum() * 0.5, result) - def test_return_l2_regularizer_weights_keras(self): + def test_return_l2_regularized_weights_keras(self): conv_hyperparams_text_proto = """ regularizer { l2_regularizer { @@ -580,6 +580,63 @@ class KerasHyperparamsBuilderTest(tf.test.TestCase): result = regularizer(tf.constant(weights)).numpy() self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result) + def test_return_l1_regularizer_weight_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l1_regularizer { + weight: 0.5 + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + + regularizer_weight = keras_config.get_regularizer_weight() + self.assertIsInstance(regularizer_weight, float) + self.assertAlmostEqual(regularizer_weight, 0.5) + + def test_return_l2_regularizer_weight_keras(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + weight: 0.5 + } + } + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + + regularizer_weight = keras_config.get_regularizer_weight() + self.assertIsInstance(regularizer_weight, float) + self.assertAlmostEqual(regularizer_weight, 0.25) + + def test_return_undefined_regularizer_weight_keras(self): + conv_hyperparams_text_proto = """ + initializer { + truncated_normal_initializer { + } + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + + regularizer_weight = keras_config.get_regularizer_weight() + self.assertIsNone(regularizer_weight) + def test_return_non_default_batch_norm_params_keras( self): conv_hyperparams_text_proto = """ @@ -973,5 +1030,26 @@ class KerasHyperparamsBuilderTest(tf.test.TestCase): self._assert_variance_in_range(initializer, shape=[100, 40], variance=0.64, tol=1e-1) + def test_keras_initializer_by_name(self): + conv_hyperparams_text_proto = """ + regularizer { + l2_regularizer { + } + } + initializer { + keras_initializer_by_name: "glorot_uniform" + } + """ + conv_hyperparams_proto = hyperparams_pb2.Hyperparams() + text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams_proto) + keras_config = hyperparams_builder.KerasLayerHyperparams( + conv_hyperparams_proto) + initializer_arg = keras_config.params()['kernel_initializer'] + conv_layer = tf.keras.layers.Conv2D( + filters=16, kernel_size=3, **keras_config.params()) + self.assertEqual(initializer_arg, 'glorot_uniform') + self.assertIsInstance(conv_layer.kernel_initializer, + type(tf.keras.initializers.get('glorot_uniform'))) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/builders/input_reader_builder.py b/research/object_detection/builders/input_reader_builder.py index c7755177e70d528984ea425f21fb9afaf11d9eaa..50a8becbe0542cfe916946a7c31aa4c6af4b9ac0 100644 --- a/research/object_detection/builders/input_reader_builder.py +++ b/research/object_detection/builders/input_reader_builder.py @@ -85,7 +85,8 @@ def build(input_reader_config): elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'): decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder( label_map_proto_file=label_map_proto_file, - load_context_features=input_reader_config.load_context_features) + load_context_features=input_reader_config.load_context_features, + load_context_image_ids=input_reader_config.load_context_image_ids) return decoder.decode(string_tensor) raise ValueError('Unsupported input_type.') raise ValueError('Unsupported input_reader_config.') diff --git a/research/object_detection/builders/losses_builder.py b/research/object_detection/builders/losses_builder.py index 5a69c9b602c95ab6c8368638b2e38448ae113b9c..5aec7e192104cda8672f34bc2230efb725938a7c 100644 --- a/research/object_detection/builders/losses_builder.py +++ b/research/object_detection/builders/losses_builder.py @@ -204,6 +204,9 @@ def _build_localization_loss(loss_config): if loss_type == 'l1_localization_loss': return losses.L1LocalizationLoss() + if loss_type == 'weighted_giou': + return losses.WeightedGIOULocalizationLoss() + raise ValueError('Empty loss config.') @@ -227,7 +230,7 @@ def _build_classification_loss(loss_config): if loss_type == 'weighted_sigmoid': return losses.WeightedSigmoidClassificationLoss() - if loss_type == 'weighted_sigmoid_focal': + elif loss_type == 'weighted_sigmoid_focal': config = loss_config.weighted_sigmoid_focal alpha = None if config.HasField('alpha'): @@ -236,25 +239,31 @@ def _build_classification_loss(loss_config): gamma=config.gamma, alpha=alpha) - if loss_type == 'weighted_softmax': + elif loss_type == 'weighted_softmax': config = loss_config.weighted_softmax return losses.WeightedSoftmaxClassificationLoss( logit_scale=config.logit_scale) - if loss_type == 'weighted_logits_softmax': + elif loss_type == 'weighted_logits_softmax': config = loss_config.weighted_logits_softmax return losses.WeightedSoftmaxClassificationAgainstLogitsLoss( logit_scale=config.logit_scale) - if loss_type == 'bootstrapped_sigmoid': + elif loss_type == 'bootstrapped_sigmoid': config = loss_config.bootstrapped_sigmoid return losses.BootstrappedSigmoidClassificationLoss( alpha=config.alpha, bootstrap_type=('hard' if config.hard_bootstrap else 'soft')) - if loss_type == 'penalty_reduced_logistic_focal_loss': + elif loss_type == 'penalty_reduced_logistic_focal_loss': config = loss_config.penalty_reduced_logistic_focal_loss return losses.PenaltyReducedLogisticFocalLoss( alpha=config.alpha, beta=config.beta) - raise ValueError('Empty loss config.') + elif loss_type == 'weighted_dice_classification_loss': + config = loss_config.weighted_dice_classification_loss + return losses.WeightedDiceClassificationLoss( + squared_normalization=config.squared_normalization) + + else: + raise ValueError('Empty loss config.') diff --git a/research/object_detection/builders/losses_builder_test.py b/research/object_detection/builders/losses_builder_test.py index b37b7f3195427b951e2c508f0df191f176b9d835..07c01653b20532782e808030e452e8df84e533a1 100644 --- a/research/object_detection/builders/losses_builder_test.py +++ b/research/object_detection/builders/losses_builder_test.py @@ -97,6 +97,23 @@ class LocalizationLossBuilderTest(tf.test.TestCase): self.assertIsInstance(localization_loss, losses.WeightedIOULocalizationLoss) + def test_build_weighted_giou_localization_loss(self): + losses_text_proto = """ + localization_loss { + weighted_giou { + } + } + classification_loss { + weighted_softmax { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertIsInstance(localization_loss, + losses.WeightedGIOULocalizationLoss) + def test_anchorwise_output(self): losses_text_proto = """ localization_loss { @@ -298,6 +315,45 @@ class ClassificationLossBuilderTest(tf.test.TestCase): with self.assertRaises(ValueError): losses_builder.build(losses_proto) + def test_build_penalty_reduced_logistic_focal_loss(self): + losses_text_proto = """ + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + localization_loss { + l1_localization_loss { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertIsInstance(classification_loss, + losses.PenaltyReducedLogisticFocalLoss) + self.assertAlmostEqual(classification_loss._alpha, 2.0) + self.assertAlmostEqual(classification_loss._beta, 4.0) + + def test_build_dice_loss(self): + losses_text_proto = """ + classification_loss { + weighted_dice_classification_loss { + squared_normalization: true + } + } + localization_loss { + l1_localization_loss { + } + } + """ + losses_proto = losses_pb2.Loss() + text_format.Merge(losses_text_proto, losses_proto) + classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto) + self.assertIsInstance(classification_loss, + losses.WeightedDiceClassificationLoss) + assert classification_loss._squared_normalization class HardExampleMinerBuilderTest(tf.test.TestCase): diff --git a/research/object_detection/builders/model_builder.py b/research/object_detection/builders/model_builder.py index d69b7bc7c2546a6461a2d75d3576d725d748d409..08c65e0763ab2450a368f280859c6b329b31d7a6 100644 --- a/research/object_detection/builders/model_builder.py +++ b/research/object_detection/builders/model_builder.py @@ -17,6 +17,9 @@ import functools import sys + +from absl import logging + from object_detection.builders import anchor_generator_builder from object_detection.builders import box_coder_builder from object_detection.builders import box_predictor_builder @@ -39,6 +42,7 @@ from object_detection.protos import losses_pb2 from object_detection.protos import model_pb2 from object_detection.utils import label_map_util from object_detection.utils import ops +from object_detection.utils import spatial_transform_ops as spatial_ops from object_detection.utils import tf_version ## Feature Extractors for TF @@ -48,6 +52,8 @@ from object_detection.utils import tf_version # pylint: disable=g-import-not-at-top if tf_version.is_tf2(): from object_detection.models import center_net_hourglass_feature_extractor + from object_detection.models import center_net_mobilenet_v2_feature_extractor + from object_detection.models import center_net_mobilenet_v2_fpn_feature_extractor from object_detection.models import center_net_resnet_feature_extractor from object_detection.models import center_net_resnet_v1_fpn_feature_extractor from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras @@ -138,13 +144,34 @@ if tf_version.is_tf2(): } CENTER_NET_EXTRACTOR_FUNCTION_MAP = { - 'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50, - 'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101, + 'resnet_v2_50': + center_net_resnet_feature_extractor.resnet_v2_50, + 'resnet_v2_101': + center_net_resnet_feature_extractor.resnet_v2_101, + 'resnet_v1_18_fpn': + center_net_resnet_v1_fpn_feature_extractor.resnet_v1_18_fpn, + 'resnet_v1_34_fpn': + center_net_resnet_v1_fpn_feature_extractor.resnet_v1_34_fpn, 'resnet_v1_50_fpn': center_net_resnet_v1_fpn_feature_extractor.resnet_v1_50_fpn, 'resnet_v1_101_fpn': center_net_resnet_v1_fpn_feature_extractor.resnet_v1_101_fpn, - 'hourglass_104': center_net_hourglass_feature_extractor.hourglass_104, + 'hourglass_10': + center_net_hourglass_feature_extractor.hourglass_10, + 'hourglass_20': + center_net_hourglass_feature_extractor.hourglass_20, + 'hourglass_32': + center_net_hourglass_feature_extractor.hourglass_32, + 'hourglass_52': + center_net_hourglass_feature_extractor.hourglass_52, + 'hourglass_104': + center_net_hourglass_feature_extractor.hourglass_104, + 'mobilenet_v2': + center_net_mobilenet_v2_feature_extractor.mobilenet_v2, + 'mobilenet_v2_fpn': + center_net_mobilenet_v2_fpn_feature_extractor.mobilenet_v2_fpn, + 'mobilenet_v2_fpn_sep_conv': + center_net_mobilenet_v2_fpn_feature_extractor.mobilenet_v2_fpn, } FEATURE_EXTRACTOR_MAPS = [ @@ -220,9 +247,12 @@ if tf_version.is_tf1(): frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor, } + CENTER_NET_EXTRACTOR_FUNCTION_MAP = {} + FEATURE_EXTRACTOR_MAPS = [ SSD_FEATURE_EXTRACTOR_CLASS_MAP, - FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP + FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP, + CENTER_NET_EXTRACTOR_FUNCTION_MAP ] @@ -515,9 +545,31 @@ def _build_faster_rcnn_keras_feature_extractor( feature_type)) feature_extractor_class = FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[ feature_type] + + kwargs = {} + + if feature_extractor_config.HasField('conv_hyperparams'): + kwargs.update({ + 'conv_hyperparams': + hyperparams_builder.KerasLayerHyperparams( + feature_extractor_config.conv_hyperparams), + 'override_base_feature_extractor_hyperparams': + feature_extractor_config.override_base_feature_extractor_hyperparams + }) + + if feature_extractor_config.HasField('fpn'): + kwargs.update({ + 'fpn_min_level': + feature_extractor_config.fpn.min_level, + 'fpn_max_level': + feature_extractor_config.fpn.max_level, + 'additional_layer_depth': + feature_extractor_config.fpn.additional_layer_depth, + }) + return feature_extractor_class( is_training, first_stage_features_stride, - batch_norm_trainable) + batch_norm_trainable, **kwargs) def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): @@ -648,8 +700,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): second_stage_localization_loss_weight) crop_and_resize_fn = ( - ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize - else ops.native_crop_and_resize) + spatial_ops.multilevel_matmul_crop_and_resize + if frcnn_config.use_matmul_crop_and_resize + else spatial_ops.multilevel_native_crop_and_resize) clip_anchors_to_image = ( frcnn_config.clip_anchors_to_image) @@ -719,7 +772,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): 'return_raw_detections_during_predict': frcnn_config.return_raw_detections_during_predict, 'output_final_box_features': - frcnn_config.output_final_box_features + frcnn_config.output_final_box_features, + 'output_final_box_rpn_features': + frcnn_config.output_final_box_rpn_features, } if ((not is_keras and isinstance(second_stage_box_predictor, @@ -736,7 +791,19 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): 'attention_bottleneck_dimension': context_config.attention_bottleneck_dimension, 'attention_temperature': - context_config.attention_temperature + context_config.attention_temperature, + 'use_self_attention': + context_config.use_self_attention, + 'use_long_term_attention': + context_config.use_long_term_attention, + 'self_attention_in_sequence': + context_config.self_attention_in_sequence, + 'num_attention_heads': + context_config.num_attention_heads, + 'num_attention_layers': + context_config.num_attention_layers, + 'attention_position': + context_config.attention_position }) return context_rcnn_meta_arch.ContextRCNNMetaArch( initial_crop_size=initial_crop_size, @@ -792,6 +859,25 @@ def keypoint_proto_to_params(kp_config, keypoint_map_dict): for label, value in kp_config.keypoint_label_to_std.items(): keypoint_std_dev_dict[label] = value keypoint_std_dev = [keypoint_std_dev_dict[label] for label in keypoint_labels] + if kp_config.HasField('heatmap_head_params'): + heatmap_head_num_filters = list(kp_config.heatmap_head_params.num_filters) + heatmap_head_kernel_sizes = list(kp_config.heatmap_head_params.kernel_sizes) + else: + heatmap_head_num_filters = [256] + heatmap_head_kernel_sizes = [3] + if kp_config.HasField('offset_head_params'): + offset_head_num_filters = list(kp_config.offset_head_params.num_filters) + offset_head_kernel_sizes = list(kp_config.offset_head_params.kernel_sizes) + else: + offset_head_num_filters = [256] + offset_head_kernel_sizes = [3] + if kp_config.HasField('regress_head_params'): + regress_head_num_filters = list(kp_config.regress_head_params.num_filters) + regress_head_kernel_sizes = list( + kp_config.regress_head_params.kernel_sizes) + else: + regress_head_num_filters = [256] + regress_head_kernel_sizes = [3] return center_net_meta_arch.KeypointEstimationParams( task_name=kp_config.task_name, class_id=label_map_item.id - CLASS_ID_OFFSET, @@ -814,7 +900,19 @@ def keypoint_proto_to_params(kp_config, keypoint_map_dict): candidate_search_scale=kp_config.candidate_search_scale, candidate_ranking_mode=kp_config.candidate_ranking_mode, offset_peak_radius=kp_config.offset_peak_radius, - per_keypoint_offset=kp_config.per_keypoint_offset) + per_keypoint_offset=kp_config.per_keypoint_offset, + predict_depth=kp_config.predict_depth, + per_keypoint_depth=kp_config.per_keypoint_depth, + keypoint_depth_loss_weight=kp_config.keypoint_depth_loss_weight, + score_distance_offset=kp_config.score_distance_offset, + clip_out_of_frame_keypoints=kp_config.clip_out_of_frame_keypoints, + rescore_instances=kp_config.rescore_instances, + heatmap_head_num_filters=heatmap_head_num_filters, + heatmap_head_kernel_sizes=heatmap_head_kernel_sizes, + offset_head_num_filters=offset_head_num_filters, + offset_head_kernel_sizes=offset_head_kernel_sizes, + regress_head_num_filters=regress_head_num_filters, + regress_head_kernel_sizes=regress_head_kernel_sizes) def object_detection_proto_to_params(od_config): @@ -844,13 +942,26 @@ def object_center_proto_to_params(oc_config): losses_pb2.WeightedL2LocalizationLoss()) loss.classification_loss.CopyFrom(oc_config.classification_loss) classification_loss, _, _, _, _, _, _ = (losses_builder.build(loss)) + keypoint_weights_for_center = [] + if oc_config.keypoint_weights_for_center: + keypoint_weights_for_center = list(oc_config.keypoint_weights_for_center) + + if oc_config.HasField('center_head_params'): + center_head_num_filters = list(oc_config.center_head_params.num_filters) + center_head_kernel_sizes = list(oc_config.center_head_params.kernel_sizes) + else: + center_head_num_filters = [256] + center_head_kernel_sizes = [3] return center_net_meta_arch.ObjectCenterParams( classification_loss=classification_loss, object_center_loss_weight=oc_config.object_center_loss_weight, heatmap_bias_init=oc_config.heatmap_bias_init, min_box_overlap_iou=oc_config.min_box_overlap_iou, max_box_predictions=oc_config.max_box_predictions, - use_labeled_classes=oc_config.use_labeled_classes) + use_labeled_classes=oc_config.use_labeled_classes, + keypoint_weights_for_center=keypoint_weights_for_center, + center_head_num_filters=center_head_num_filters, + center_head_kernel_sizes=center_head_kernel_sizes) def mask_proto_to_params(mask_config): @@ -886,6 +997,39 @@ def densepose_proto_to_params(densepose_config): heatmap_bias_init=densepose_config.heatmap_bias_init) +def tracking_proto_to_params(tracking_config): + """Converts CenterNet.TrackEstimation proto to parameter namedtuple.""" + loss = losses_pb2.Loss() + # Add dummy localization loss to avoid the loss_builder throwing error. + # TODO(yuhuic): update the loss builder to take the localization loss + # directly. + loss.localization_loss.weighted_l2.CopyFrom( + losses_pb2.WeightedL2LocalizationLoss()) + loss.classification_loss.CopyFrom(tracking_config.classification_loss) + classification_loss, _, _, _, _, _, _ = losses_builder.build(loss) + return center_net_meta_arch.TrackParams( + num_track_ids=tracking_config.num_track_ids, + reid_embed_size=tracking_config.reid_embed_size, + classification_loss=classification_loss, + num_fc_layers=tracking_config.num_fc_layers, + task_loss_weight=tracking_config.task_loss_weight) + + +def temporal_offset_proto_to_params(temporal_offset_config): + """Converts CenterNet.TemporalOffsetEstimation proto to param-tuple.""" + loss = losses_pb2.Loss() + # Add dummy classification loss to avoid the loss_builder throwing error. + # TODO(yuhuic): update the loss builder to take the classification loss + # directly. + loss.classification_loss.weighted_sigmoid.CopyFrom( + losses_pb2.WeightedSigmoidClassificationLoss()) + loss.localization_loss.CopyFrom(temporal_offset_config.localization_loss) + _, localization_loss, _, _, _, _, _ = losses_builder.build(loss) + return center_net_meta_arch.TemporalOffsetParams( + localization_loss=localization_loss, + task_loss_weight=temporal_offset_config.task_loss_weight) + + def _build_center_net_model(center_net_config, is_training, add_summaries): """Build a CenterNet detection model. @@ -903,7 +1047,7 @@ def _build_center_net_model(center_net_config, is_training, add_summaries): center_net_config.image_resizer) _check_feature_extractor_exists(center_net_config.feature_extractor.type) feature_extractor = _build_center_net_feature_extractor( - center_net_config.feature_extractor) + center_net_config.feature_extractor, is_training) object_center_params = object_center_proto_to_params( center_net_config.object_center_params) @@ -943,6 +1087,20 @@ def _build_center_net_model(center_net_config, is_training, add_summaries): densepose_params = densepose_proto_to_params( center_net_config.densepose_estimation_task) + track_params = None + if center_net_config.HasField('track_estimation_task'): + track_params = tracking_proto_to_params( + center_net_config.track_estimation_task) + + temporal_offset_params = None + if center_net_config.HasField('temporal_offset_task'): + temporal_offset_params = temporal_offset_proto_to_params( + center_net_config.temporal_offset_task) + non_max_suppression_fn = None + if center_net_config.HasField('post_processing'): + non_max_suppression_fn, _ = post_processing_builder.build( + center_net_config.post_processing) + return center_net_meta_arch.CenterNetMetaArch( is_training=is_training, add_summaries=add_summaries, @@ -953,22 +1111,35 @@ def _build_center_net_model(center_net_config, is_training, add_summaries): object_detection_params=object_detection_params, keypoint_params_dict=keypoint_params_dict, mask_params=mask_params, - densepose_params=densepose_params) + densepose_params=densepose_params, + track_params=track_params, + temporal_offset_params=temporal_offset_params, + use_depthwise=center_net_config.use_depthwise, + compute_heatmap_sparse=center_net_config.compute_heatmap_sparse, + non_max_suppression_fn=non_max_suppression_fn) -def _build_center_net_feature_extractor( - feature_extractor_config): +def _build_center_net_feature_extractor(feature_extractor_config, is_training): """Build a CenterNet feature extractor from the given config.""" if feature_extractor_config.type not in CENTER_NET_EXTRACTOR_FUNCTION_MAP: raise ValueError('\'{}\' is not a known CenterNet feature extractor type' .format(feature_extractor_config.type)) + # For backwards compatibility: + use_separable_conv = ( + feature_extractor_config.use_separable_conv or + feature_extractor_config.type == 'mobilenet_v2_fpn_sep_conv') + kwargs = { + 'channel_means': list(feature_extractor_config.channel_means), + 'channel_stds': list(feature_extractor_config.channel_stds), + 'bgr_ordering': feature_extractor_config.bgr_ordering, + 'depth_multiplier': feature_extractor_config.depth_multiplier, + 'use_separable_conv': use_separable_conv, + } + return CENTER_NET_EXTRACTOR_FUNCTION_MAP[feature_extractor_config.type]( - channel_means=list(feature_extractor_config.channel_means), - channel_stds=list(feature_extractor_config.channel_stds), - bgr_ordering=feature_extractor_config.bgr_ordering - ) + **kwargs) META_ARCH_BUILDER_MAP = { diff --git a/research/object_detection/builders/model_builder_tf2_test.py b/research/object_detection/builders/model_builder_tf2_test.py index 9cbefdc0f1f598b380570d0b0ab140c29855d8d0..9d386fdd353f51a404f2b1bc5592d9f06446b792 100644 --- a/research/object_detection/builders/model_builder_tf2_test.py +++ b/research/object_detection/builders/model_builder_tf2_test.py @@ -18,20 +18,23 @@ import os import unittest +from absl.testing import parameterized import tensorflow.compat.v1 as tf from google.protobuf import text_format from object_detection.builders import model_builder from object_detection.builders import model_builder_test from object_detection.core import losses -from object_detection.models import center_net_resnet_feature_extractor +from object_detection.models import center_net_hourglass_feature_extractor +from object_detection.models.keras_models import hourglass_network from object_detection.protos import center_net_pb2 from object_detection.protos import model_pb2 from object_detection.utils import tf_version @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') -class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): +class ModelBuilderTF2Test( + model_builder_test.ModelBuilderTest, parameterized.TestCase): def default_ssd_feature_extractor(self): return 'ssd_resnet50_v1_fpn_keras' @@ -78,7 +81,7 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): f.write(keypoint_spec_text) return keypoint_label_map_path - def get_fake_keypoint_proto(self): + def get_fake_keypoint_proto(self, customize_head_params=False): task_proto_txt = """ task_name: "human_pose" task_loss_weight: 0.9 @@ -116,12 +119,30 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): candidate_ranking_mode: "score_distance_ratio" offset_peak_radius: 3 per_keypoint_offset: true + predict_depth: true + per_keypoint_depth: true + keypoint_depth_loss_weight: 0.3 """ + if customize_head_params: + task_proto_txt += """ + heatmap_head_params { + num_filters: 64 + num_filters: 32 + kernel_sizes: 5 + kernel_sizes: 3 + } + offset_head_params { + num_filters: 128 + num_filters: 64 + kernel_sizes: 5 + kernel_sizes: 3 + } + """ config = text_format.Merge(task_proto_txt, center_net_pb2.CenterNet.KeypointEstimation()) return config - def get_fake_object_center_proto(self): + def get_fake_object_center_proto(self, customize_head_params=False): proto_txt = """ object_center_loss_weight: 0.5 heatmap_bias_init: 3.14 @@ -134,6 +155,35 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): } } """ + if customize_head_params: + proto_txt += """ + center_head_params { + num_filters: 64 + num_filters: 32 + kernel_sizes: 5 + kernel_sizes: 3 + } + """ + return text_format.Merge(proto_txt, + center_net_pb2.CenterNet.ObjectCenterParams()) + + def get_fake_object_center_from_keypoints_proto(self): + proto_txt = """ + object_center_loss_weight: 0.5 + heatmap_bias_init: 3.14 + min_box_overlap_iou: 0.2 + max_box_predictions: 15 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 3.0 + beta: 4.0 + } + } + keypoint_weights_for_center: 1.0 + keypoint_weights_for_center: 0.0 + keypoint_weights_for_center: 1.0 + keypoint_weights_for_center: 0.0 + """ return text_format.Merge(proto_txt, center_net_pb2.CenterNet.ObjectCenterParams()) @@ -186,13 +236,17 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): return text_format.Merge(proto_txt, center_net_pb2.CenterNet.DensePoseEstimation()) - def test_create_center_net_model(self): + @parameterized.parameters( + {'customize_head_params': True}, + {'customize_head_params': False} + ) + def test_create_center_net_model(self, customize_head_params): """Test building a CenterNet model from proto txt.""" proto_txt = """ center_net { num_classes: 10 feature_extractor { - type: "resnet_v2_101" + type: "hourglass_52" channel_stds: [4, 5, 6] bgr_ordering: true } @@ -208,11 +262,13 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): # Set up the configuration proto. config = text_format.Merge(proto_txt, model_pb2.DetectionModel()) config.center_net.object_center_params.CopyFrom( - self.get_fake_object_center_proto()) + self.get_fake_object_center_proto( + customize_head_params=customize_head_params)) config.center_net.object_detection_task.CopyFrom( self.get_fake_object_detection_proto()) config.center_net.keypoint_estimation_task.append( - self.get_fake_keypoint_proto()) + self.get_fake_keypoint_proto( + customize_head_params=customize_head_params)) config.center_net.keypoint_label_map_path = ( self.get_fake_label_map_file_path()) config.center_net.mask_estimation_task.CopyFrom( @@ -233,6 +289,12 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): self.assertAlmostEqual( model._center_params.heatmap_bias_init, 3.14, places=4) self.assertEqual(model._center_params.max_box_predictions, 15) + if customize_head_params: + self.assertEqual(model._center_params.center_head_num_filters, [64, 32]) + self.assertEqual(model._center_params.center_head_kernel_sizes, [5, 3]) + else: + self.assertEqual(model._center_params.center_head_num_filters, [256]) + self.assertEqual(model._center_params.center_head_kernel_sizes, [3]) # Check object detection related parameters. self.assertAlmostEqual(model._od_params.offset_loss_weight, 0.1) @@ -264,6 +326,21 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): self.assertEqual(kp_params.candidate_ranking_mode, 'score_distance_ratio') self.assertEqual(kp_params.offset_peak_radius, 3) self.assertEqual(kp_params.per_keypoint_offset, True) + self.assertEqual(kp_params.predict_depth, True) + self.assertEqual(kp_params.per_keypoint_depth, True) + self.assertAlmostEqual(kp_params.keypoint_depth_loss_weight, 0.3) + if customize_head_params: + # Set by the config. + self.assertEqual(kp_params.heatmap_head_num_filters, [64, 32]) + self.assertEqual(kp_params.heatmap_head_kernel_sizes, [5, 3]) + self.assertEqual(kp_params.offset_head_num_filters, [128, 64]) + self.assertEqual(kp_params.offset_head_kernel_sizes, [5, 3]) + else: + # Default values: + self.assertEqual(kp_params.heatmap_head_num_filters, [256]) + self.assertEqual(kp_params.heatmap_head_kernel_sizes, [3]) + self.assertEqual(kp_params.offset_head_num_filters, [256]) + self.assertEqual(kp_params.offset_head_kernel_sizes, [3]) # Check mask related parameters. self.assertAlmostEqual(model._mask_params.task_loss_weight, 0.7) @@ -292,11 +369,58 @@ class ModelBuilderTF2Test(model_builder_test.ModelBuilderTest): # Check feature extractor parameters. self.assertIsInstance( - model._feature_extractor, - center_net_resnet_feature_extractor.CenterNetResnetFeatureExtractor) + model._feature_extractor, center_net_hourglass_feature_extractor + .CenterNetHourglassFeatureExtractor) self.assertAllClose(model._feature_extractor._channel_means, [0, 0, 0]) self.assertAllClose(model._feature_extractor._channel_stds, [4, 5, 6]) self.assertTrue(model._feature_extractor._bgr_ordering) + backbone = model._feature_extractor._network + self.assertIsInstance(backbone, hourglass_network.HourglassNetwork) + self.assertTrue(backbone.num_hourglasses, 1) + + def test_create_center_net_model_from_keypoints(self): + """Test building a CenterNet model from proto txt.""" + proto_txt = """ + center_net { + num_classes: 10 + feature_extractor { + type: "hourglass_52" + channel_stds: [4, 5, 6] + bgr_ordering: true + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 512 + max_dimension: 512 + pad_to_max_dimension: true + } + } + } + """ + # Set up the configuration proto. + config = text_format.Merge(proto_txt, model_pb2.DetectionModel()) + # Only add object center and keypoint estimation configs here. + config.center_net.object_center_params.CopyFrom( + self.get_fake_object_center_from_keypoints_proto()) + config.center_net.keypoint_estimation_task.append( + self.get_fake_keypoint_proto()) + config.center_net.keypoint_label_map_path = ( + self.get_fake_label_map_file_path()) + + # Build the model from the configuration. + model = model_builder.build(config, is_training=True) + + # Check object center related parameters. + self.assertEqual(model._num_classes, 10) + self.assertEqual(model._center_params.keypoint_weights_for_center, + [1.0, 0.0, 1.0, 0.0]) + + # Check keypoint estimation related parameters. + kp_params = model._kp_params_dict['human_pose'] + self.assertAlmostEqual(kp_params.task_loss_weight, 0.9) + self.assertEqual(kp_params.keypoint_indices, [0, 1, 2, 3]) + self.assertEqual(kp_params.keypoint_labels, + ['nose', 'left_shoulder', 'right_shoulder', 'hip']) if __name__ == '__main__': diff --git a/research/object_detection/builders/optimizer_builder.py b/research/object_detection/builders/optimizer_builder.py index d602bad1292e222b5cbc532a873299dd918ef011..f24747aa9bac11574612db938d87d9bc50d2e9ea 100644 --- a/research/object_detection/builders/optimizer_builder.py +++ b/research/object_detection/builders/optimizer_builder.py @@ -18,6 +18,12 @@ import tensorflow.compat.v1 as tf from object_detection.utils import learning_schedules +from object_detection.utils import tf_version + +# pylint: disable=g-import-not-at-top +if tf_version.is_tf2(): + from official.modeling.optimization import ema_optimizer +# pylint: enable=g-import-not-at-top try: from tensorflow.contrib import opt as tf_opt # pylint: disable=g-import-not-at-top @@ -130,7 +136,9 @@ def build_optimizers_tf_v2(optimizer_config, global_step=None): raise ValueError('Optimizer %s not supported.' % optimizer_type) if optimizer_config.use_moving_average: - raise ValueError('Moving average not supported in eager mode.') + optimizer = ema_optimizer.ExponentialMovingAverage( + optimizer=optimizer, + average_decay=optimizer_config.moving_average_decay) return optimizer, summary_vars diff --git a/research/object_detection/builders/optimizer_builder_tf2_test.py b/research/object_detection/builders/optimizer_builder_tf2_test.py index 2c555f9a0f4c22b7c27955c92eaa3655c8fae5c6..5ae125fa0489b80fa76b76d2af4521aed15ae6e2 100644 --- a/research/object_detection/builders/optimizer_builder_tf2_test.py +++ b/research/object_detection/builders/optimizer_builder_tf2_test.py @@ -82,7 +82,7 @@ class OptimizerBuilderV2Test(tf.test.TestCase): optimizer, _ = optimizer_builder.build(optimizer_proto) self.assertIsInstance(optimizer, tf.keras.optimizers.Adam) - def testMovingAverageOptimizerUnsupported(self): + def testBuildMovingAverageOptimizer(self): optimizer_text_proto = """ adam_optimizer: { learning_rate: { @@ -95,8 +95,8 @@ class OptimizerBuilderV2Test(tf.test.TestCase): """ optimizer_proto = optimizer_pb2.Optimizer() text_format.Merge(optimizer_text_proto, optimizer_proto) - with self.assertRaises(ValueError): - optimizer_builder.build(optimizer_proto) + optimizer, _ = optimizer_builder.build(optimizer_proto) + self.assertIsInstance(optimizer, tf.keras.optimizers.Optimizer) if __name__ == '__main__': diff --git a/research/object_detection/builders/post_processing_builder.py b/research/object_detection/builders/post_processing_builder.py index 18795f58ccb4a382bdc457c2d15e069d1bb52662..c61f6891e29eeced8ba5fbe3f78fe1c95eb60501 100644 --- a/research/object_detection/builders/post_processing_builder.py +++ b/research/object_detection/builders/post_processing_builder.py @@ -103,7 +103,8 @@ def _build_non_max_suppressor(nms_config): use_partitioned_nms=nms_config.use_partitioned_nms, use_combined_nms=nms_config.use_combined_nms, change_coordinate_frame=nms_config.change_coordinate_frame, - use_hard_nms=nms_config.use_hard_nms) + use_hard_nms=nms_config.use_hard_nms, + use_cpu_nms=nms_config.use_cpu_nms) return non_max_suppressor_fn diff --git a/research/object_detection/builders/preprocessor_builder.py b/research/object_detection/builders/preprocessor_builder.py index b61239d2e1ec87c232fbfdb53d0cb3c39da26e3e..9e59d94aa72a884806ac2e9f41244bccd8632aaf 100644 --- a/research/object_detection/builders/preprocessor_builder.py +++ b/research/object_detection/builders/preprocessor_builder.py @@ -89,8 +89,6 @@ PREPROCESSING_FUNCTION_MAP = { preprocessor.random_adjust_saturation, 'random_distort_color': preprocessor.random_distort_color, - 'random_jitter_boxes': - preprocessor.random_jitter_boxes, 'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio, 'random_black_patches': @@ -109,6 +107,8 @@ PREPROCESSING_FUNCTION_MAP = { preprocessor.subtract_channel_mean, 'convert_class_logits_to_softmax': preprocessor.convert_class_logits_to_softmax, + 'adjust_gamma': + preprocessor.adjust_gamma, } @@ -123,6 +123,16 @@ RESIZE_METHOD_MAP = { } +def get_random_jitter_kwargs(proto): + return { + 'ratio': + proto.ratio, + 'jitter_mode': + preprocessor_pb2.RandomJitterBoxes.JitterMode.Name(proto.jitter_mode + ).lower() + } + + def build(preprocessor_step_config): """Builds preprocessing step based on the configuration. @@ -425,4 +435,8 @@ def build(preprocessor_step_config): 'output_size': config.output_size, } + if step_type == 'random_jitter_boxes': + config = preprocessor_step_config.random_jitter_boxes + kwargs = get_random_jitter_kwargs(config) + return preprocessor.random_jitter_boxes, kwargs raise ValueError('Unknown preprocessing step.') diff --git a/research/object_detection/builders/preprocessor_builder_test.py b/research/object_detection/builders/preprocessor_builder_test.py index 9e90344d0478229fa95355b53ecfa5f876325936..396adec5673f817478578500d8c7728c06c25855 100644 --- a/research/object_detection/builders/preprocessor_builder_test.py +++ b/research/object_detection/builders/preprocessor_builder_test.py @@ -216,13 +216,14 @@ class PreprocessorBuilderTest(tf.test.TestCase): preprocessor_text_proto = """ random_jitter_boxes { ratio: 0.1 + jitter_mode: SHRINK } """ preprocessor_proto = preprocessor_pb2.PreprocessingStep() text_format.Merge(preprocessor_text_proto, preprocessor_proto) function, args = preprocessor_builder.build(preprocessor_proto) self.assertEqual(function, preprocessor.random_jitter_boxes) - self.assert_dictionary_close(args, {'ratio': 0.1}) + self.assert_dictionary_close(args, {'ratio': 0.1, 'jitter_mode': 'shrink'}) def test_build_random_crop_image(self): preprocessor_text_proto = """ @@ -753,6 +754,19 @@ class PreprocessorBuilderTest(tf.test.TestCase): 'max_border': 128 }) + def test_adjust_gamma(self): + preprocessor_text_proto = """ + adjust_gamma { + gamma: 2.2 + gain: 2.0 + } + """ + preprocessor_proto = preprocessor_pb2.PreprocessingStep() + text_format.Parse(preprocessor_text_proto, preprocessor_proto) + function, args = preprocessor_builder.build(preprocessor_proto) + self.assertEqual(function, preprocessor.adjust_gamma) + self.assert_dictionary_close(args, {'gamma': 2.2, 'gain': 2.0}) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/colab_tutorials/centernet_on_device.ipynb b/research/object_detection/colab_tutorials/centernet_on_device.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..bb3af3034490b12fc68cb09e8903c01d07ac31d6 --- /dev/null +++ b/research/object_detection/colab_tutorials/centernet_on_device.ipynb @@ -0,0 +1,762 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "centernet_on_mobile.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pDIqEDDxWAh2" + }, + "source": [ + "#Introduction\r\n", + "\r\n", + "Welcome to the **CenterNet on-device with TensorFlow Lite** Colab. Here, we demonstrate how you can run a mobile-optimized version of the [CenterNet](https://arxiv.org/abs/1904.08189) architecture with [TensorFlow Lite](https://www.tensorflow.org/lite) (a.k.a. TFLite). \r\n", + "\r\n", + "Users can use this notebook as a reference for obtaining TFLite version of CenterNet for *Object Detection* or [*Keypoint detection*](https://cocodataset.org/#keypoints-2020). The code also shows how to perform pre-/post-processing & inference with TFLite's Python API.\r\n", + "\r\n", + "**NOTE:** CenterNet support in TFLite is still experimental, and currently works with floating-point inference only." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3LQWTJ-BWzmW" + }, + "source": [ + "# Set Up" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gx84EpH7INPj" + }, + "source": [ + "## Libraries & Imports" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EU_hXi7IW9QC" + }, + "source": [ + "!pip install tf-nightly" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZTU9_JcOZz-J" + }, + "source": [ + "import os\r\n", + "import pathlib\r\n", + "\r\n", + "# Clone the tensorflow models repository if it doesn't already exist\r\n", + "if \"models\" in pathlib.Path.cwd().parts:\r\n", + " while \"models\" in pathlib.Path.cwd().parts:\r\n", + " os.chdir('..')\r\n", + "elif not pathlib.Path('models').exists():\r\n", + " !git clone --depth 1 https://github.com/tensorflow/models" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "laJxis1WZ2xj" + }, + "source": [ + "# Install the Object Detection API\r\n", + "%%bash\r\n", + "cd models/research/\r\n", + "protoc object_detection/protos/*.proto --python_out=.\r\n", + "cp object_detection/packages/tf2/setup.py .\r\n", + "python -m pip install ." + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "je0LrJNjDsk9" + }, + "source": [ + "import matplotlib\r\n", + "import matplotlib.pyplot as plt\r\n", + "\r\n", + "import os\r\n", + "import random\r\n", + "import io\r\n", + "import imageio\r\n", + "import glob\r\n", + "import scipy.misc\r\n", + "import numpy as np\r\n", + "from six import BytesIO\r\n", + "from PIL import Image, ImageDraw, ImageFont\r\n", + "from IPython.display import display, Javascript\r\n", + "from IPython.display import Image as IPyImage\r\n", + "\r\n", + "import tensorflow as tf\r\n", + "\r\n", + "from object_detection.utils import label_map_util\r\n", + "from object_detection.utils import config_util\r\n", + "from object_detection.utils import visualization_utils as viz_utils\r\n", + "from object_detection.utils import colab_utils\r\n", + "from object_detection.utils import config_util\r\n", + "from object_detection.builders import model_builder\r\n", + "\r\n", + "%matplotlib inline" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O5IXwbhhH0bs" + }, + "source": [ + "## Test Image from COCO\r\n", + "\r\n", + "We use a sample image from the COCO'17 validation dataset that contains people, to showcase inference with CenterNet." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "h-JuG84HDvm-" + }, + "source": [ + "# Download COCO'17 validation set for test image\r\n", + "%%bash\r\n", + "mkdir -p coco && cd coco\r\n", + "wget -q -N http://images.cocodataset.org/zips/val2017.zip\r\n", + "unzip -q -o val2017.zip && rm *.zip\r\n", + "cd .." + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "peX5mPGmEj8s" + }, + "source": [ + "# Print the image we are going to test on as a sanity check.\r\n", + "\r\n", + "def load_image_into_numpy_array(path):\r\n", + " \"\"\"Load an image from file into a numpy array.\r\n", + "\r\n", + " Puts image into numpy array to feed into tensorflow graph.\r\n", + " Note that by convention we put it into a numpy array with shape\r\n", + " (height, width, channels), where channels=3 for RGB.\r\n", + "\r\n", + " Args:\r\n", + " path: a file path.\r\n", + "\r\n", + " Returns:\r\n", + " uint8 numpy array with shape (img_height, img_width, 3)\r\n", + " \"\"\"\r\n", + " img_data = tf.io.gfile.GFile(path, 'rb').read()\r\n", + " image = Image.open(BytesIO(img_data))\r\n", + " (im_width, im_height) = image.size\r\n", + " return np.array(image.getdata()).reshape(\r\n", + " (im_height, im_width, 3)).astype(np.uint8)\r\n", + "\r\n", + "image_path = 'coco/val2017/000000013729.jpg'\r\n", + "plt.figure(figsize = (30, 20))\r\n", + "plt.imshow(load_image_into_numpy_array(image_path))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6cqOdvfrR1vW" + }, + "source": [ + "## Utilities for Inference\r\n", + "\r\n", + "The `detect` function shown below describes how input and output tensors from CenterNet (obtained in subsequent sections) can be processed. This logic can be ported to other languages depending on your application (for e.g. to Java for Android apps)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "13Ouc2C3RyqR" + }, + "source": [ + "def detect(interpreter, input_tensor, include_keypoint=False):\r\n", + " \"\"\"Run detection on an input image.\r\n", + "\r\n", + " Args:\r\n", + " interpreter: tf.lite.Interpreter\r\n", + " input_tensor: A [1, height, width, 3] Tensor of type tf.float32.\r\n", + " Note that height and width can be anything since the image will be\r\n", + " immediately resized according to the needs of the model within this\r\n", + " function.\r\n", + " include_keypoint: True if model supports keypoints output. See\r\n", + " https://cocodataset.org/#keypoints-2020\r\n", + "\r\n", + " Returns:\r\n", + " A sequence containing the following output tensors:\r\n", + " boxes: a numpy array of shape [N, 4]\r\n", + " classes: a numpy array of shape [N]. Note that class indices are \r\n", + " 1-based, and match the keys in the label map.\r\n", + " scores: a numpy array of shape [N] or None. If scores=None, then\r\n", + " this function assumes that the boxes to be plotted are groundtruth\r\n", + " boxes and plot all boxes as black with no classes or scores.\r\n", + " category_index: a dict containing category dictionaries (each holding\r\n", + " category index `id` and category name `name`) keyed by category \r\n", + " indices.\r\n", + " If include_keypoints is True, the following are also returned:\r\n", + " keypoints: (optional) a numpy array of shape [N, 17, 2] representing\r\n", + " the yx-coordinates of the detection 17 COCO human keypoints\r\n", + " (https://cocodataset.org/#keypoints-2020) in normalized image frame\r\n", + " (i.e. [0.0, 1.0]). \r\n", + " keypoint_scores: (optional) a numpy array of shape [N, 17] representing the\r\n", + " keypoint prediction confidence scores.\r\n", + " \"\"\"\r\n", + " input_details = interpreter.get_input_details()\r\n", + " output_details = interpreter.get_output_details()\r\n", + "\r\n", + " interpreter.set_tensor(input_details[0]['index'], input_tensor.numpy())\r\n", + "\r\n", + " interpreter.invoke()\r\n", + "\r\n", + " boxes = interpreter.get_tensor(output_details[0]['index'])\r\n", + " classes = interpreter.get_tensor(output_details[1]['index'])\r\n", + " scores = interpreter.get_tensor(output_details[2]['index'])\r\n", + " num_detections = interpreter.get_tensor(output_details[3]['index'])\r\n", + "\r\n", + " if include_keypoint:\r\n", + " kpts = interpreter.get_tensor(output_details[4]['index'])\r\n", + " kpts_scores = interpreter.get_tensor(output_details[5]['index'])\r\n", + " return boxes, classes, scores, num_detections, kpts, kpts_scores\r\n", + " else:\r\n", + " return boxes, classes, scores, num_detections\r\n", + "\r\n", + "# Utility for visualizing results\r\n", + "def plot_detections(image_np,\r\n", + " boxes,\r\n", + " classes,\r\n", + " scores,\r\n", + " category_index,\r\n", + " keypoints=None,\r\n", + " keypoint_scores=None,\r\n", + " figsize=(12, 16),\r\n", + " image_name=None):\r\n", + " \"\"\"Wrapper function to visualize detections.\r\n", + "\r\n", + " Args:\r\n", + " image_np: uint8 numpy array with shape (img_height, img_width, 3)\r\n", + " boxes: a numpy array of shape [N, 4]\r\n", + " classes: a numpy array of shape [N]. Note that class indices are 1-based,\r\n", + " and match the keys in the label map.\r\n", + " scores: a numpy array of shape [N] or None. If scores=None, then\r\n", + " this function assumes that the boxes to be plotted are groundtruth\r\n", + " boxes and plot all boxes as black with no classes or scores.\r\n", + " category_index: a dict containing category dictionaries (each holding\r\n", + " category index `id` and category name `name`) keyed by category indices.\r\n", + " keypoints: (optional) a numpy array of shape [N, 17, 2] representing the \r\n", + " yx-coordinates of the detection 17 COCO human keypoints\r\n", + " (https://cocodataset.org/#keypoints-2020) in normalized image frame\r\n", + " (i.e. [0.0, 1.0]). \r\n", + " keypoint_scores: (optional) anumpy array of shape [N, 17] representing the\r\n", + " keypoint prediction confidence scores.\r\n", + " figsize: size for the figure.\r\n", + " image_name: a name for the image file.\r\n", + " \"\"\"\r\n", + "\r\n", + " keypoint_edges = [(0, 1),\r\n", + " (0, 2),\r\n", + " (1, 3),\r\n", + " (2, 4),\r\n", + " (0, 5),\r\n", + " (0, 6),\r\n", + " (5, 7),\r\n", + " (7, 9),\r\n", + " (6, 8),\r\n", + " (8, 10),\r\n", + " (5, 6),\r\n", + " (5, 11),\r\n", + " (6, 12),\r\n", + " (11, 12),\r\n", + " (11, 13),\r\n", + " (13, 15),\r\n", + " (12, 14),\r\n", + " (14, 16)]\r\n", + " image_np_with_annotations = image_np.copy()\r\n", + " # Only visualize objects that get a score > 0.3.\r\n", + " viz_utils.visualize_boxes_and_labels_on_image_array(\r\n", + " image_np_with_annotations,\r\n", + " boxes,\r\n", + " classes,\r\n", + " scores,\r\n", + " category_index,\r\n", + " keypoints=keypoints,\r\n", + " keypoint_scores=keypoint_scores,\r\n", + " keypoint_edges=keypoint_edges,\r\n", + " use_normalized_coordinates=True,\r\n", + " min_score_thresh=0.3)\r\n", + " if image_name:\r\n", + " plt.imsave(image_name, image_np_with_annotations)\r\n", + " else:\r\n", + " return image_np_with_annotations" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3cNYi8HuIWzO" + }, + "source": [ + "# Object Detection" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "azdDCdWQMSoH" + }, + "source": [ + "## Download Model from Detection Zoo\r\n", + "\r\n", + "**NOTE:** Not all CenterNet models from the [TF2 Detection Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md) work with TFLite, only the [MobileNet-based version](http://download.tensorflow.org/models/object_detection/tf2/20210210/centernet_mobilenetv2fpn_512x512_coco17_od.tar.gz) does.\r\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Sywt8MKzIeOi" + }, + "source": [ + "# Get mobile-friendly CenterNet for Object Detection\r\n", + "# See TensorFlow 2 Detection Model Zoo for more details:\r\n", + "# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md\r\n", + "\r\n", + "%%bash\r\n", + "wget http://download.tensorflow.org/models/object_detection/tf2/20210210/centernet_mobilenetv2fpn_512x512_coco17_od.tar.gz\r\n", + "tar -xf centernet_mobilenetv2fpn_512x512_coco17_od.tar.gz\r\n", + "rm centernet_mobilenetv2fpn_512x512_coco17_od.tar.gz*" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MiRrVpTnLvsk" + }, + "source": [ + "Now that we have downloaded the CenterNet model that uses MobileNet as a backbone, we can obtain a TensorFlow Lite model from it. \r\n", + "\r\n", + "The downloaded archive already contains `model.tflite` that works with TensorFlow Lite, but we re-generate the model in the next sub-section to account for cases where you might re-train the model on your own dataset (with corresponding changes to `pipeline.config` & `checkpoint` directory)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jT0bruuxM496" + }, + "source": [ + "## Generate TensorFlow Lite Model\r\n", + "\r\n", + "First, we invoke `export_tflite_graph_tf2.py` to generate a TFLite-friendly intermediate SavedModel. This will then be passed to the TensorFlow Lite Converter for generating the final model.\r\n", + "\r\n", + "This is similar to what we do for [SSD architectures](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/running_on_mobile_tf2.md)." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jpcCjiQ_JrU5", + "collapsed": true + }, + "source": [ + "%%bash\r\n", + "# Export the intermediate SavedModel that outputs 10 detections & takes in an \r\n", + "# image of dim 320x320.\r\n", + "# Modify these parameters according to your needs.\r\n", + "\r\n", + "python models/research/object_detection/export_tflite_graph_tf2.py \\\r\n", + " --pipeline_config_path=centernet_mobilenetv2_fpn_od/pipeline.config \\\r\n", + " --trained_checkpoint_dir=centernet_mobilenetv2_fpn_od/checkpoint \\\r\n", + " --output_directory=centernet_mobilenetv2_fpn_od/tflite \\\r\n", + " --centernet_include_keypoints=false \\\r\n", + " --max_detections=10 \\\r\n", + " --config_override=\" \\\r\n", + " model{ \\\r\n", + " center_net { \\\r\n", + " image_resizer { \\\r\n", + " fixed_shape_resizer { \\\r\n", + " height: 320 \\\r\n", + " width: 320 \\\r\n", + " } \\\r\n", + " } \\\r\n", + " } \\\r\n", + " }\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "zhhP6HL8PUGq" + }, + "source": [ + "# Generate TensorFlow Lite model using the converter.\r\n", + "%%bash\r\n", + "tflite_convert --output_file=centernet_mobilenetv2_fpn_od/model.tflite \\\r\n", + " --saved_model_dir=centernet_mobilenetv2_fpn_od/tflite/saved_model" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gj1Q_e_2Rn5i" + }, + "source": [ + "## TensorFlow Lite Inference\r\n", + "\r\n", + "Use a TensorFlow Lite Interpreter to detect objects in the test image." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uV9t9icURsei" + }, + "source": [ + "%matplotlib inline\r\n", + "\r\n", + "# Load the TFLite model and allocate tensors.\r\n", + "model_path = 'centernet_mobilenetv2_fpn_od/model.tflite'\r\n", + "label_map_path = 'centernet_mobilenetv2_fpn_od/label_map.txt'\r\n", + "image_path = 'coco/val2017/000000013729.jpg'\r\n", + "\r\n", + "# Initialize TensorFlow Lite Interpreter.\r\n", + "interpreter = tf.lite.Interpreter(model_path=model_path)\r\n", + "interpreter.allocate_tensors()\r\n", + "\r\n", + "# Label map can be used to figure out what class ID maps to what\r\n", + "# label. `label_map.txt` is human-readable.\r\n", + "category_index = label_map_util.create_category_index_from_labelmap(\r\n", + " label_map_path)\r\n", + "\r\n", + "label_id_offset = 1\r\n", + "\r\n", + "image = tf.io.read_file(image_path)\r\n", + "image = tf.compat.v1.image.decode_jpeg(image)\r\n", + "image = tf.expand_dims(image, axis=0)\r\n", + "image_numpy = image.numpy()\r\n", + "\r\n", + "input_tensor = tf.convert_to_tensor(image_numpy, dtype=tf.float32)\r\n", + "# Note that CenterNet doesn't require any pre-processing except resizing to the\r\n", + "# input size that the TensorFlow Lite Interpreter was generated with.\r\n", + "input_tensor = tf.image.resize(input_tensor, (320, 320))\r\n", + "boxes, classes, scores, num_detections = detect(interpreter, input_tensor)\r\n", + "\r\n", + "vis_image = plot_detections(\r\n", + " image_numpy[0],\r\n", + " boxes[0],\r\n", + " classes[0].astype(np.uint32) + label_id_offset,\r\n", + " scores[0],\r\n", + " category_index)\r\n", + "plt.figure(figsize = (30, 20))\r\n", + "plt.imshow(vis_image)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DefXu4JXVxPD" + }, + "source": [ + "# Keypoints\r\n", + "\r\n", + "Unlike SSDs, CenterNet also supports COCO [Keypoint detection](https://cocodataset.org/#keypoints-2020). To be more specific, the 'keypoints' version of CenterNet shown here provides keypoints as a `[N, 17, 2]`-shaped tensor representing the (normalized) yx-coordinates of 17 COCO human keypoints.\r\n", + "\r\n", + "See the `detect()` function in the **Utilities for Inference** section to better understand the keypoints output." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xu47DkrDV18O" + }, + "source": [ + "## Download Model from Detection Zoo\r\n", + "\r\n", + "**NOTE:** Not all CenterNet models from the [TF2 Detection Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md) work with TFLite, only the [MobileNet-based version](http://download.tensorflow.org/models/object_detection/tf2/20210210/centernet_mobilenetv2fpn_512x512_coco17_od.tar.gz) does." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "sd7f64WjWD7z" + }, + "source": [ + "# Get mobile-friendly CenterNet for Keypoint detection task.\r\n", + "# See TensorFlow 2 Detection Model Zoo for more details:\r\n", + "# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md\r\n", + "\r\n", + "%%bash\r\n", + "wget http://download.tensorflow.org/models/object_detection/tf2/20210210/centernet_mobilenetv2fpn_512x512_coco17_kpts.tar.gz\r\n", + "tar -xf centernet_mobilenetv2fpn_512x512_coco17_kpts.tar.gz\r\n", + "rm centernet_mobilenetv2fpn_512x512_coco17_kpts.tar.gz*" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NSFc-xSLX1ZC" + }, + "source": [ + "## Generate TensorFlow Lite Model\r\n", + "\r\n", + "As before, we leverage `export_tflite_graph_tf2.py` to generate a TFLite-friendly intermediate SavedModel. This will then be passed to the TFLite converter to generating the final model.\r\n", + "\r\n", + "Note that we need to include an additional `keypoint_label_map_path` parameter for exporting the keypoints outputs." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8kEhwYynX-cD" + }, + "source": [ + "%%bash\r\n", + "# Export the intermediate SavedModel that outputs 10 detections & takes in an \r\n", + "# image of dim 320x320.\r\n", + "# Modify these parameters according to your needs.\r\n", + "\r\n", + "python models/research/object_detection/export_tflite_graph_tf2.py \\\r\n", + " --pipeline_config_path=centernet_mobilenetv2_fpn_kpts/pipeline.config \\\r\n", + " --trained_checkpoint_dir=centernet_mobilenetv2_fpn_kpts/checkpoint \\\r\n", + " --output_directory=centernet_mobilenetv2_fpn_kpts/tflite \\\r\n", + " --centernet_include_keypoints=true \\\r\n", + " --keypoint_label_map_path=centernet_mobilenetv2_fpn_kpts/label_map.txt \\\r\n", + " --max_detections=10 \\\r\n", + " --config_override=\" \\\r\n", + " model{ \\\r\n", + " center_net { \\\r\n", + " image_resizer { \\\r\n", + " fixed_shape_resizer { \\\r\n", + " height: 320 \\\r\n", + " width: 320 \\\r\n", + " } \\\r\n", + " } \\\r\n", + " } \\\r\n", + " }\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "TJtsyMlLY1DU" + }, + "source": [ + "# Generate TensorFlow Lite model using the converter.\r\n", + "\r\n", + "%%bash\r\n", + "tflite_convert --output_file=centernet_mobilenetv2_fpn_kpts/model.tflite \\\r\n", + " --saved_model_dir=centernet_mobilenetv2_fpn_kpts/tflite/saved_model" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nJCxPBjYZSk6" + }, + "source": [ + "## TensorFlow Lite Inference\r\n", + "\r\n", + "Use a TensorFlow Lite Interpreter to detect people & their keypoints in the test image." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "F2GpD7r8ZUzx" + }, + "source": [ + "%matplotlib inline\r\n", + "\r\n", + "# Load the TFLite model and allocate tensors.\r\n", + "model_path = 'centernet_mobilenetv2_fpn_kpts/model.tflite'\r\n", + "image_path = 'coco/val2017/000000013729.jpg'\r\n", + "\r\n", + "# Initialize TensorFlow Lite Interpreter.\r\n", + "interpreter = tf.lite.Interpreter(model_path=model_path)\r\n", + "interpreter.allocate_tensors()\r\n", + "\r\n", + "# Keypoints are only relevant for people, so we only care about that\r\n", + "# category Id here.\r\n", + "category_index = {1: {'id': 1, 'name': 'person'}}\r\n", + "\r\n", + "label_id_offset = 1\r\n", + "\r\n", + "image = tf.io.read_file(image_path)\r\n", + "image = tf.compat.v1.image.decode_jpeg(image)\r\n", + "image = tf.expand_dims(image, axis=0)\r\n", + "image_numpy = image.numpy()\r\n", + "\r\n", + "input_tensor = tf.convert_to_tensor(image_numpy, dtype=tf.float32)\r\n", + "# Note that CenterNet doesn't require any pre-processing except resizing to\r\n", + "# input size that the TensorFlow Lite Interpreter was generated with.\r\n", + "input_tensor = tf.image.resize(input_tensor, (320, 320))\r\n", + "(boxes, classes, scores, num_detections, kpts, kpts_scores) = detect(\r\n", + " interpreter, input_tensor, include_keypoint=True)\r\n", + "\r\n", + "vis_image = plot_detections(\r\n", + " image_numpy[0],\r\n", + " boxes[0],\r\n", + " classes[0].astype(np.uint32) + label_id_offset,\r\n", + " scores[0],\r\n", + " category_index,\r\n", + " keypoints=kpts[0],\r\n", + " keypoint_scores=kpts_scores[0])\r\n", + "plt.figure(figsize = (30, 20))\r\n", + "plt.imshow(vis_image)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "59Y3q6AC9C7s" + }, + "source": [ + "# Running On Mobile\r\n", + "\r\n", + "As mentioned earlier, both the above models can be run on mobile phones with TensorFlow Lite. See our [**inference documentation**](https://www.tensorflow.org/lite/guide/inference) for general guidelines on platform-specific APIs & leveraging hardware acceleration. Both the object-detection & keypoint-detection versions of CenterNet are compatible with our [GPU delegate](https://www.tensorflow.org/lite/performance/gpu). *We are working on developing quantized versions of this model.*\r\n", + "\r\n", + "To leverage *object-detection* in your Android app, the simplest way is to use TFLite's [**ObjectDetector Task API**](https://www.tensorflow.org/lite/inference_with_metadata/task_library/object_detector). It is a high-level API that encapsulates complex but common image processing and post processing logic. Inference can be done in 5 lines of code. It is supported in Java for Android and C++ for native code. *We are working on building the Swift API for iOS, as well as the support for the keypoint-detection model.*\r\n", + "\r\n", + "To use the Task API, the model needs to be packed with [TFLite Metadata](https://www.tensorflow.org/lite/convert/metadata). This metadata helps the inference code perform the correct pre & post processing as required by the model. Use the following code to create the metadata." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8T_qzv6lDN_a" + }, + "source": [ + "!pip install tflite_support_nightly" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "CTZhBmfWDQ3z" + }, + "source": [ + "from tflite_support.metadata_writers import object_detector\n", + "from tflite_support.metadata_writers import writer_utils\n", + "\n", + "ObjectDetectorWriter = object_detector.MetadataWriter\n", + "\n", + "_MODEL_PATH = \"centernet_mobilenetv2_fpn_od/model.tflite\"\n", + "_SAVE_TO_PATH = \"centernet_mobilenetv2_fpn_od/model_with_metadata.tflite\"\n", + "_LABEL_PATH = \"centernet_mobilenetv2_fpn_od/tflite_label_map.txt\"\n", + "\n", + "# We need to convert Detection API's labelmap into what the Task API needs:\n", + "# a txt file with one class name on each line from index 0 to N.\n", + "# The first '0' class indicates the background.\n", + "# This code assumes COCO detection which has 90 classes, you can write a label\n", + "# map file for your model if re-trained.\n", + "od_label_map_path = 'centernet_mobilenetv2_fpn_od/label_map.txt'\n", + "category_index = label_map_util.create_category_index_from_labelmap(\n", + " label_map_path)\n", + "f = open(_LABEL_PATH, 'w')\n", + "for class_id in range(1, 91):\n", + " if class_id not in category_index:\n", + " f.write('???\\n')\n", + " continue\n", + " name = category_index[class_id]['name']\n", + " f.write(name+'\\n')\n", + "f.close()\n", + "\n", + "writer = ObjectDetectorWriter.create_for_inference(\n", + " writer_utils.load_file(_MODEL_PATH), input_norm_mean=[0], \n", + " input_norm_std=[1], label_file_paths=[_LABEL_PATH])\n", + "writer_utils.save_file(writer.populate(), _SAVE_TO_PATH)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b2tc7awzDUHr" + }, + "source": [ + "Visualize the metadata just created by the following code:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_SRqVdZNDYF1" + }, + "source": [ + "from tflite_support import metadata\n", + "\n", + "displayer = metadata.MetadataDisplayer.with_model_file(_SAVE_TO_PATH)\n", + "print(\"Metadata populated:\")\n", + "print(displayer.get_metadata_json())\n", + "print(\"=============================\")\n", + "print(\"Associated file(s) populated:\")\n", + "print(displayer.get_packed_associated_file_list())" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SPUNsg9eDjWT" + }, + "source": [ + "See more information about *object-detection* models from our [public documentation](https://www.tensorflow.org/lite/examples/object_detection/overview). The [Object Detection example app](https://github.com/tensorflow/examples/tree/master/lite/examples/object_detection) is a good starting point for integrating that model into your Android and iOS app. You can find [examples](https://github.com/tensorflow/examples/tree/master/lite/examples/object_detection/android#switch-between-inference-solutions-task-library-vs-tflite-interpreter) of using both the TFLite Task Library and TFLite Interpreter API." + ] + } + ] +} \ No newline at end of file diff --git a/research/object_detection/colab_tutorials/deepmac_colab.ipynb b/research/object_detection/colab_tutorials/deepmac_colab.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..cc2bd1dff248a5ff68758cf346269da6ff3ad3fc --- /dev/null +++ b/research/object_detection/colab_tutorials/deepmac_colab.ipynb @@ -0,0 +1,341 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "deepmac_demo.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "P-esW81yhfCN" + }, + "source": [ + "# Novel class segmentation demo with Deep-MAC\n", + "\n", + "Welcome to the Novel class segmentation (with Deep-MAC) demo --- this colab loads a Deep-MAC model and tests it interactively with user-specified boxes. Deep-MAC was only trained to detect and segment COCO classes, but generalizes well when segmenting within user-specified boxes of unseen classes.\n", + "\n", + "Estimated time to run through this colab (with GPU): 10-15 minutes.\n", + "Note that the bulk of this time is in installing Tensorflow and downloading\n", + "the checkpoint then running inference for the first time. Once you've done\n", + "all that, running on new images is very fast." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Kq1eGNssiW31" + }, + "source": [ + "# Prerequisites\n", + "\n", + "Please change runtime to GPU." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UT7N0HJhiRKr" + }, + "source": [ + "# Installation and Imports\n", + "\n", + "This takes 3-4 minutes." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nNdls0Pe0UPK" + }, + "source": [ + "!pip install -U --pre tensorflow==\"2.2.0\"\n", + "\n", + "import os\n", + "import pathlib\n", + "\n", + "# Clone the tensorflow models repository if it doesn't already exist\n", + "if \"models\" in pathlib.Path.cwd().parts:\n", + " while \"models\" in pathlib.Path.cwd().parts:\n", + " os.chdir('..')\n", + "elif not pathlib.Path('models').exists():\n", + " !git clone --depth 1 https://github.com/tensorflow/models\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WwjV9clX0n7S" + }, + "source": [ + "# Install the Object Detection API\n", + "%%bash\n", + "cd models/research/\n", + "protoc object_detection/protos/*.proto --python_out=.\n", + "cp object_detection/packages/tf2/setup.py .\n", + "python -m pip install ." + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "sfrrno2L0sRR" + }, + "source": [ + "import glob\n", + "import io\n", + "import logging\n", + "import os\n", + "import random\n", + "import warnings\n", + "\n", + "import imageio\n", + "from IPython.display import display, Javascript\n", + "from IPython.display import Image as IPyImage\n", + "import matplotlib\n", + "from matplotlib import patches\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from object_detection.utils import colab_utils\n", + "from object_detection.utils import ops\n", + "from object_detection.utils import visualization_utils as viz_utils\n", + "from PIL import Image, ImageDraw, ImageFont\n", + "import scipy.misc\n", + "from six import BytesIO\n", + "from skimage import color\n", + "from skimage import transform\n", + "from skimage import util\n", + "from skimage.color import rgb_colors\n", + "import tensorflow as tf\n", + "\n", + "%matplotlib inline\n", + "\n", + "COLORS = ([rgb_colors.cyan, rgb_colors.orange, rgb_colors.pink,\n", + " rgb_colors.purple, rgb_colors.limegreen , rgb_colors.crimson] +\n", + " [(color) for (name, color) in color.color_dict.items()])\n", + "random.shuffle(COLORS)\n", + "\n", + "logging.disable(logging.WARNING)\n", + "\n", + "\n", + "def read_image(path):\n", + " \"\"\"Read an image and optionally resize it for better plotting.\"\"\"\n", + " with tf.io.gfile.GFile(path, 'rb') as f:\n", + " img = Image.open(f)\n", + " return np.array(img, dtype=np.uint8)\n", + "\n", + "\n", + "def resize_for_display(image, max_height=600):\n", + " height, width, _ = image.shape\n", + " width = int(width * max_height / height)\n", + " with warnings.catch_warnings():\n", + " warnings.simplefilter(\"ignore\", UserWarning)\n", + " return util.img_as_ubyte(transform.resize(image, (height, width)))\n", + "\n", + "\n", + "def get_mask_prediction_function(model):\n", + " \"\"\"Get single image mask prediction function using a model.\"\"\"\n", + "\n", + " @tf.function\n", + " def predict_masks(image, boxes):\n", + " height, width, _ = image.shape.as_list()\n", + " batch = image[tf.newaxis]\n", + " boxes = boxes[tf.newaxis]\n", + "\n", + " detections = model(batch, boxes)\n", + " masks = detections['detection_masks']\n", + "\n", + " return ops.reframe_box_masks_to_image_masks(masks[0], boxes[0],\n", + " height, width)\n", + "\n", + " return predict_masks\n", + "\n", + "\n", + "def plot_image_annotations(image, boxes, masks, darken_image=0.5):\n", + " fig, ax = plt.subplots(figsize=(16, 12))\n", + " ax.set_axis_off()\n", + " image = (image * darken_image).astype(np.uint8)\n", + " ax.imshow(image)\n", + "\n", + " height, width, _ = image.shape\n", + "\n", + " num_colors = len(COLORS)\n", + " color_index = 0\n", + "\n", + " for box, mask in zip(boxes, masks):\n", + " ymin, xmin, ymax, xmax = box\n", + " ymin *= height\n", + " ymax *= height\n", + " xmin *= width\n", + " xmax *= width\n", + "\n", + " color = COLORS[color_index]\n", + " color = np.array(color)\n", + " rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,\n", + " linewidth=2.5, edgecolor=color, facecolor='none')\n", + " ax.add_patch(rect)\n", + " mask = (mask > 0.5).astype(np.float32)\n", + " color_image = np.ones_like(image) * color[np.newaxis, np.newaxis, :]\n", + " color_and_mask = np.concatenate(\n", + " [color_image, mask[:, :, np.newaxis]], axis=2)\n", + "\n", + " ax.imshow(color_and_mask, alpha=0.5)\n", + "\n", + " color_index = (color_index + 1) % num_colors\n", + "\n", + " return ax" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ry9yq8zsi0Gg" + }, + "source": [ + "# Load Deep-MAC Model\n", + "\n", + "This can take up to 5 minutes." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PZ-wnbYu05K8" + }, + "source": [ + "print('Downloading and untarring model')\n", + "!wget http://download.tensorflow.org/models/object_detection/tf2/20210329/deepmac_1024x1024_coco17.tar.gz\n", + "!cp deepmac_1024x1024_coco17.tar.gz models/research/object_detection/test_data/\n", + "!tar -xzf models/research/object_detection/test_data/deepmac_1024x1024_coco17.tar.gz\n", + "!mv deepmac_1024x1024_coco17 models/research/object_detection/test_data/\n", + "model_path = 'models/research/object_detection/test_data/deepmac_1024x1024_coco17/saved_model'\n", + "\n", + "print('Loading SavedModel')\n", + "model = tf.keras.models.load_model(model_path)\n", + "prediction_function = get_mask_prediction_function(model)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ilXkYOB_NUSc" + }, + "source": [ + "# Load image" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "txj4UkoDNaOq" + }, + "source": [ + "image_path = 'models/research/object_detection/test_images/image3.jpg'\n", + "image = read_image(image_path)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zyhudgYUjcvE" + }, + "source": [ + "# Annotate an image with one or more boxes\n", + "\n", + "This model is trained on COCO categories, but we encourage you to try segmenting\n", + "anything you want!\n", + "\n", + "Don't forget to hit **submit** when done." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "aZvY4At0074j" + }, + "source": [ + "display_image = resize_for_display(image)\n", + "\n", + "boxes_list = []\n", + "colab_utils.annotate([display_image], boxes_list)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gUUG7NPBJMoa" + }, + "source": [ + "# In case you didn't want to label...\n", + "\n", + "Run this cell only if you didn't annotate anything above and would prefer to just use our preannotated boxes. Don't forget to uncomment.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lupqTv1HJK5K" + }, + "source": [ + "# boxes_list = [np.array([[0.000, 0.160, 0.362, 0.812],\n", + "# [0.340, 0.286, 0.472, 0.619],\n", + "# [0.437, 0.008, 0.650, 0.263],\n", + "# [0.382, 0.003, 0.538, 0.594],\n", + "# [0.518, 0.444, 0.625,0.554]], dtype=np.float32)]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ak1WO93NjvN-" + }, + "source": [ + "# Visualize mask predictions" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vdzuKnpj1A3L" + }, + "source": [ + "%matplotlib inline\n", + "\n", + "boxes = boxes_list[0]\n", + "masks = prediction_function(tf.convert_to_tensor(image),\n", + " tf.convert_to_tensor(boxes, dtype=tf.float32))\n", + "plot_image_annotations(image, boxes, masks.numpy())\n", + "plt.show()" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/research/object_detection/colab_tutorials/eager_few_shot_od_training_tflite.ipynb b/research/object_detection/colab_tutorials/eager_few_shot_od_training_tflite.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b47d4bdb4f1bc81c8e8727721fbb9db732de8e22 --- /dev/null +++ b/research/object_detection/colab_tutorials/eager_few_shot_od_training_tflite.ipynb @@ -0,0 +1,730 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rOvvWAVTkMR7" + }, + "source": [ + "# Introduction\n", + "\n", + "Welcome to the **Few Shot Object Detection for TensorFlow Lite** Colab. Here, we demonstrate fine tuning of a SSD architecture (pre-trained on COCO) on very few examples of a *novel* class. We will then generate a (downloadable) TensorFlow Lite model for on-device inference.\n", + "\n", + "**NOTE:** This Colab is meant for the few-shot detection use-case. To train a model on a large dataset, please follow the [TF2 training](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_training_and_evaluation.md#training) documentation and then [convert](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/running_on_mobile_tf2.md) the model to TensorFlow Lite." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3U2sv0upw04O" + }, + "source": [ + "# Set Up" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vPs64QA1Zdov" + }, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H0rKBV4uZacD" + }, + "outputs": [], + "source": [ + "# Support for TF2 models was added after TF 2.3.\n", + "!pip install tf-nightly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi28cqGGFWnY" + }, + "outputs": [], + "source": [ + "import os\n", + "import pathlib\n", + "\n", + "# Clone the tensorflow models repository if it doesn't already exist\n", + "if \"models\" in pathlib.Path.cwd().parts:\n", + " while \"models\" in pathlib.Path.cwd().parts:\n", + " os.chdir('..')\n", + "elif not pathlib.Path('models').exists():\n", + " !git clone --depth 1 https://github.com/tensorflow/models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NwdsBdGhFanc" + }, + "outputs": [], + "source": [ + "# Install the Object Detection API\n", + "%%bash\n", + "cd models/research/\n", + "protoc object_detection/protos/*.proto --python_out=.\n", + "cp object_detection/packages/tf2/setup.py .\n", + "python -m pip install ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uZcqD4NLdnf4" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import os\n", + "import random\n", + "import io\n", + "import imageio\n", + "import glob\n", + "import scipy.misc\n", + "import numpy as np\n", + "from six import BytesIO\n", + "from PIL import Image, ImageDraw, ImageFont\n", + "from IPython.display import display, Javascript\n", + "from IPython.display import Image as IPyImage\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from object_detection.utils import label_map_util\n", + "from object_detection.utils import config_util\n", + "from object_detection.utils import visualization_utils as viz_utils\n", + "from object_detection.utils import colab_utils\n", + "from object_detection.utils import config_util\n", + "from object_detection.builders import model_builder\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IogyryF2lFBL" + }, + "source": [ + "##Utilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-y9R0Xllefec" + }, + "outputs": [], + "source": [ + "def load_image_into_numpy_array(path):\n", + " \"\"\"Load an image from file into a numpy array.\n", + "\n", + " Puts image into numpy array to feed into tensorflow graph.\n", + " Note that by convention we put it into a numpy array with shape\n", + " (height, width, channels), where channels=3 for RGB.\n", + "\n", + " Args:\n", + " path: a file path.\n", + "\n", + " Returns:\n", + " uint8 numpy array with shape (img_height, img_width, 3)\n", + " \"\"\"\n", + " img_data = tf.io.gfile.GFile(path, 'rb').read()\n", + " image = Image.open(BytesIO(img_data))\n", + " (im_width, im_height) = image.size\n", + " return np.array(image.getdata()).reshape(\n", + " (im_height, im_width, 3)).astype(np.uint8)\n", + "\n", + "def plot_detections(image_np,\n", + " boxes,\n", + " classes,\n", + " scores,\n", + " category_index,\n", + " figsize=(12, 16),\n", + " image_name=None):\n", + " \"\"\"Wrapper function to visualize detections.\n", + "\n", + " Args:\n", + " image_np: uint8 numpy array with shape (img_height, img_width, 3)\n", + " boxes: a numpy array of shape [N, 4]\n", + " classes: a numpy array of shape [N]. Note that class indices are 1-based,\n", + " and match the keys in the label map.\n", + " scores: a numpy array of shape [N] or None. If scores=None, then\n", + " this function assumes that the boxes to be plotted are groundtruth\n", + " boxes and plot all boxes as black with no classes or scores.\n", + " category_index: a dict containing category dictionaries (each holding\n", + " category index `id` and category name `name`) keyed by category indices.\n", + " figsize: size for the figure.\n", + " image_name: a name for the image file.\n", + " \"\"\"\n", + " image_np_with_annotations = image_np.copy()\n", + " viz_utils.visualize_boxes_and_labels_on_image_array(\n", + " image_np_with_annotations,\n", + " boxes,\n", + " classes,\n", + " scores,\n", + " category_index,\n", + " use_normalized_coordinates=True,\n", + " min_score_thresh=0.8)\n", + " if image_name:\n", + " plt.imsave(image_name, image_np_with_annotations)\n", + " else:\n", + " plt.imshow(image_np_with_annotations)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sSaXL28TZfk1" + }, + "source": [ + "## Rubber Ducky data\n", + "\n", + "We will start with some toy data consisting of 5 images of a rubber\n", + "ducky. Note that the [COCO](https://cocodataset.org/#explore) dataset contains a number of animals, but notably, it does *not* contain rubber duckies (or even ducks for that matter), so this is a novel class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SQy3ND7EpFQM" + }, + "outputs": [], + "source": [ + "# Load images and visualize\n", + "train_image_dir = 'models/research/object_detection/test_images/ducky/train/'\n", + "train_images_np = []\n", + "for i in range(1, 6):\n", + " image_path = os.path.join(train_image_dir, 'robertducky' + str(i) + '.jpg')\n", + " train_images_np.append(load_image_into_numpy_array(image_path))\n", + "\n", + "plt.rcParams['axes.grid'] = False\n", + "plt.rcParams['xtick.labelsize'] = False\n", + "plt.rcParams['ytick.labelsize'] = False\n", + "plt.rcParams['xtick.top'] = False\n", + "plt.rcParams['xtick.bottom'] = False\n", + "plt.rcParams['ytick.left'] = False\n", + "plt.rcParams['ytick.right'] = False\n", + "plt.rcParams['figure.figsize'] = [14, 7]\n", + "\n", + "for idx, train_image_np in enumerate(train_images_np):\n", + " plt.subplot(2, 3, idx+1)\n", + " plt.imshow(train_image_np)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LbOe9Ym7xMGV" + }, + "source": [ + "# Transfer Learning\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dqb_yjAo3cO_" + }, + "source": [ + "## Data Preparation\n", + "\n", + "First, we populate the groundtruth with pre-annotated bounding boxes.\n", + "\n", + "We then add the class annotations (for simplicity, we assume a single 'Duck' class in this colab; though it should be straightforward to extend this to handle multiple classes). We also convert everything to the format that the training\n", + "loop below expects (e.g., everything converted to tensors, classes converted to one-hot representations, etc.)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wIAT6ZUmdHOC" + }, + "outputs": [], + "source": [ + "gt_boxes = [\n", + " np.array([[0.436, 0.591, 0.629, 0.712]], dtype=np.float32),\n", + " np.array([[0.539, 0.583, 0.73, 0.71]], dtype=np.float32),\n", + " np.array([[0.464, 0.414, 0.626, 0.548]], dtype=np.float32),\n", + " np.array([[0.313, 0.308, 0.648, 0.526]], dtype=np.float32),\n", + " np.array([[0.256, 0.444, 0.484, 0.629]], dtype=np.float32)\n", + "]\n", + "\n", + "# By convention, our non-background classes start counting at 1. Given\n", + "# that we will be predicting just one class, we will therefore assign it a\n", + "# `class id` of 1.\n", + "duck_class_id = 1\n", + "num_classes = 1\n", + "\n", + "category_index = {duck_class_id: {'id': duck_class_id, 'name': 'rubber_ducky'}}\n", + "\n", + "# Convert class labels to one-hot; convert everything to tensors.\n", + "# The `label_id_offset` here shifts all classes by a certain number of indices;\n", + "# we do this here so that the model receives one-hot labels where non-background\n", + "# classes start counting at the zeroth index. This is ordinarily just handled\n", + "# automatically in our training binaries, but we need to reproduce it here.\n", + "label_id_offset = 1\n", + "train_image_tensors = []\n", + "gt_classes_one_hot_tensors = []\n", + "gt_box_tensors = []\n", + "for (train_image_np, gt_box_np) in zip(\n", + " train_images_np, gt_boxes):\n", + " train_image_tensors.append(tf.expand_dims(tf.convert_to_tensor(\n", + " train_image_np, dtype=tf.float32), axis=0))\n", + " gt_box_tensors.append(tf.convert_to_tensor(gt_box_np, dtype=tf.float32))\n", + " zero_indexed_groundtruth_classes = tf.convert_to_tensor(\n", + " np.ones(shape=[gt_box_np.shape[0]], dtype=np.int32) - label_id_offset)\n", + " gt_classes_one_hot_tensors.append(tf.one_hot(\n", + " zero_indexed_groundtruth_classes, num_classes))\n", + "print('Done prepping data.')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b3_Z3mJWN9KJ" + }, + "source": [ + "Let's just visualize the rubber duckies as a sanity check\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YBD6l-E4N71y" + }, + "outputs": [], + "source": [ + "dummy_scores = np.array([1.0], dtype=np.float32) # give boxes a score of 100%\n", + "\n", + "plt.figure(figsize=(30, 15))\n", + "for idx in range(5):\n", + " plt.subplot(2, 3, idx+1)\n", + " plot_detections(\n", + " train_images_np[idx],\n", + " gt_boxes[idx],\n", + " np.ones(shape=[gt_boxes[idx].shape[0]], dtype=np.int32),\n", + " dummy_scores, category_index)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ghDAsqfoZvPh" + }, + "source": [ + "## Load mobile-friendly model\n", + "\n", + "In this cell we build a mobile-friendly single-stage detection architecture (SSD MobileNet V2 FPN-Lite) and restore all but the classification layer at the top (which will be randomly initialized).\n", + "\n", + "**NOTE**: TensorFlow Lite only supports SSD models for now.\n", + "\n", + "For simplicity, we have hardcoded a number of things in this colab for the specific SSD architecture at hand (including assuming that the image size will always be 320x320), however it is not difficult to generalize to other model configurations (`pipeline.config` in the zip downloaded from the [Model Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.)).\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9J16r3NChD-7" + }, + "outputs": [], + "source": [ + "# Download the checkpoint and put it into models/research/object_detection/test_data/\n", + "\n", + "!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz\n", + "!tar -xf ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz\n", + "!if [ -d \"models/research/object_detection/test_data/checkpoint\" ]; then rm -Rf models/research/object_detection/test_data/checkpoint; fi\n", + "!mkdir models/research/object_detection/test_data/checkpoint\n", + "!mv ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint models/research/object_detection/test_data/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RyT4BUbaMeG-" + }, + "outputs": [], + "source": [ + "tf.keras.backend.clear_session()\n", + "\n", + "print('Building model and restoring weights for fine-tuning...', flush=True)\n", + "num_classes = 1\n", + "pipeline_config = 'models/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config'\n", + "checkpoint_path = 'models/research/object_detection/test_data/checkpoint/ckpt-0'\n", + "\n", + "# This will be where we save checkpoint \u0026 config for TFLite conversion later.\n", + "output_directory = 'output/'\n", + "output_checkpoint_dir = os.path.join(output_directory, 'checkpoint')\n", + "\n", + "# Load pipeline config and build a detection model.\n", + "#\n", + "# Since we are working off of a COCO architecture which predicts 90\n", + "# class slots by default, we override the `num_classes` field here to be just\n", + "# one (for our new rubber ducky class).\n", + "configs = config_util.get_configs_from_pipeline_file(pipeline_config)\n", + "model_config = configs['model']\n", + "model_config.ssd.num_classes = num_classes\n", + "model_config.ssd.freeze_batchnorm = True\n", + "detection_model = model_builder.build(\n", + " model_config=model_config, is_training=True)\n", + "# Save new pipeline config\n", + "pipeline_proto = config_util.create_pipeline_proto_from_configs(configs)\n", + "config_util.save_pipeline_config(pipeline_proto, output_directory)\n", + "\n", + "# Set up object-based checkpoint restore --- SSD has two prediction\n", + "# `heads` --- one for classification, the other for box regression. We will\n", + "# restore the box regression head but initialize the classification head\n", + "# from scratch (we show the omission below by commenting out the line that\n", + "# we would add if we wanted to restore both heads)\n", + "fake_box_predictor = tf.compat.v2.train.Checkpoint(\n", + " _base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,\n", + " # _prediction_heads=detection_model._box_predictor._prediction_heads,\n", + " # (i.e., the classification head that we *will not* restore)\n", + " _box_prediction_head=detection_model._box_predictor._box_prediction_head,\n", + " )\n", + "fake_model = tf.compat.v2.train.Checkpoint(\n", + " _feature_extractor=detection_model._feature_extractor,\n", + " _box_predictor=fake_box_predictor)\n", + "ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)\n", + "ckpt.restore(checkpoint_path).expect_partial()\n", + "\n", + "# To save checkpoint for TFLite conversion.\n", + "exported_ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)\n", + "ckpt_manager = tf.train.CheckpointManager(\n", + " exported_ckpt, output_checkpoint_dir, max_to_keep=1)\n", + "\n", + "# Run model through a dummy image so that variables are created\n", + "image, shapes = detection_model.preprocess(tf.zeros([1, 320, 320, 3]))\n", + "prediction_dict = detection_model.predict(image, shapes)\n", + "_ = detection_model.postprocess(prediction_dict, shapes)\n", + "print('Weights restored!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pCkWmdoZZ0zJ" + }, + "source": [ + "## Eager training loop (Fine-tuning)\n", + "\n", + "Some of the parameters in this block have been set empirically: for example, `learning_rate`, `num_batches` \u0026 `momentum` for SGD. These are just a starting point, you will have to tune these for your data \u0026 model architecture to get the best results.\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nyHoF4mUrv5-" + }, + "outputs": [], + "source": [ + "tf.keras.backend.set_learning_phase(True)\n", + "\n", + "# These parameters can be tuned; since our training set has 5 images\n", + "# it doesn't make sense to have a much larger batch size, though we could\n", + "# fit more examples in memory if we wanted to.\n", + "batch_size = 5\n", + "learning_rate = 0.15\n", + "num_batches = 1000\n", + "\n", + "# Select variables in top layers to fine-tune.\n", + "trainable_variables = detection_model.trainable_variables\n", + "to_fine_tune = []\n", + "prefixes_to_train = [\n", + " 'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead',\n", + " 'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead']\n", + "for var in trainable_variables:\n", + " if any([var.name.startswith(prefix) for prefix in prefixes_to_train]):\n", + " to_fine_tune.append(var)\n", + "\n", + "# Set up forward + backward pass for a single train step.\n", + "def get_model_train_step_function(model, optimizer, vars_to_fine_tune):\n", + " \"\"\"Get a tf.function for training step.\"\"\"\n", + "\n", + " # Use tf.function for a bit of speed.\n", + " # Comment out the tf.function decorator if you want the inside of the\n", + " # function to run eagerly.\n", + " @tf.function\n", + " def train_step_fn(image_tensors,\n", + " groundtruth_boxes_list,\n", + " groundtruth_classes_list):\n", + " \"\"\"A single training iteration.\n", + "\n", + " Args:\n", + " image_tensors: A list of [1, height, width, 3] Tensor of type tf.float32.\n", + " Note that the height and width can vary across images, as they are\n", + " reshaped within this function to be 320x320.\n", + " groundtruth_boxes_list: A list of Tensors of shape [N_i, 4] with type\n", + " tf.float32 representing groundtruth boxes for each image in the batch.\n", + " groundtruth_classes_list: A list of Tensors of shape [N_i, num_classes]\n", + " with type tf.float32 representing groundtruth boxes for each image in\n", + " the batch.\n", + "\n", + " Returns:\n", + " A scalar tensor representing the total loss for the input batch.\n", + " \"\"\"\n", + " shapes = tf.constant(batch_size * [[320, 320, 3]], dtype=tf.int32)\n", + " model.provide_groundtruth(\n", + " groundtruth_boxes_list=groundtruth_boxes_list,\n", + " groundtruth_classes_list=groundtruth_classes_list)\n", + " with tf.GradientTape() as tape:\n", + " preprocessed_images = tf.concat(\n", + " [detection_model.preprocess(image_tensor)[0]\n", + " for image_tensor in image_tensors], axis=0)\n", + " prediction_dict = model.predict(preprocessed_images, shapes)\n", + " losses_dict = model.loss(prediction_dict, shapes)\n", + " total_loss = losses_dict['Loss/localization_loss'] + losses_dict['Loss/classification_loss']\n", + " gradients = tape.gradient(total_loss, vars_to_fine_tune)\n", + " optimizer.apply_gradients(zip(gradients, vars_to_fine_tune))\n", + " return total_loss\n", + "\n", + " return train_step_fn\n", + "\n", + "optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)\n", + "train_step_fn = get_model_train_step_function(\n", + " detection_model, optimizer, to_fine_tune)\n", + "\n", + "print('Start fine-tuning!', flush=True)\n", + "for idx in range(num_batches):\n", + " # Grab keys for a random subset of examples\n", + " all_keys = list(range(len(train_images_np)))\n", + " random.shuffle(all_keys)\n", + " example_keys = all_keys[:batch_size]\n", + "\n", + " # Note that we do not do data augmentation in this demo. If you want a\n", + " # a fun exercise, we recommend experimenting with random horizontal flipping\n", + " # and random cropping :)\n", + " gt_boxes_list = [gt_box_tensors[key] for key in example_keys]\n", + " gt_classes_list = [gt_classes_one_hot_tensors[key] for key in example_keys]\n", + " image_tensors = [train_image_tensors[key] for key in example_keys]\n", + "\n", + " # Training step (forward pass + backwards pass)\n", + " total_loss = train_step_fn(image_tensors, gt_boxes_list, gt_classes_list)\n", + "\n", + " if idx % 100 == 0:\n", + " print('batch ' + str(idx) + ' of ' + str(num_batches)\n", + " + ', loss=' + str(total_loss.numpy()), flush=True)\n", + "\n", + "print('Done fine-tuning!')\n", + "\n", + "ckpt_manager.save()\n", + "print('Checkpoint saved!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cYk1_9Fc2lZO" + }, + "source": [ + "# Export \u0026 run with TensorFlow Lite\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y0nsDVEd9SuX" + }, + "source": [ + "## Model Conversion\n", + "\n", + "First, we invoke the `export_tflite_graph_tf2.py` script to generate a TFLite-friendly intermediate SavedModel. This will then be passed to the TensorFlow Lite Converter for generating the final model.\n", + "\n", + "To know more about this process, please look at [this documentation](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/running_on_mobile_tf2.md)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dyrqHSQQ7WKE" + }, + "outputs": [], + "source": [ + "%%bash\n", + "python models/research/object_detection/export_tflite_graph_tf2.py \\\n", + " --pipeline_config_path output/pipeline.config \\\n", + " --trained_checkpoint_dir output/checkpoint \\\n", + " --output_directory tflite" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m5hjPyR78bgs" + }, + "outputs": [], + "source": [ + "!tflite_convert --saved_model_dir=tflite/saved_model --output_file=tflite/model.tflite" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WHlXL1x_Z3tc" + }, + "source": [ + "## Test .tflite model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WcE6OwrHQJya" + }, + "outputs": [], + "source": [ + "test_image_dir = 'models/research/object_detection/test_images/ducky/test/'\n", + "test_images_np = []\n", + "for i in range(1, 50):\n", + " image_path = os.path.join(test_image_dir, 'out' + str(i) + '.jpg')\n", + " test_images_np.append(np.expand_dims(\n", + " load_image_into_numpy_array(image_path), axis=0))\n", + "\n", + "# Again, uncomment this decorator if you want to run inference eagerly\n", + "def detect(interpreter, input_tensor):\n", + " \"\"\"Run detection on an input image.\n", + "\n", + " Args:\n", + " interpreter: tf.lite.Interpreter\n", + " input_tensor: A [1, height, width, 3] Tensor of type tf.float32.\n", + " Note that height and width can be anything since the image will be\n", + " immediately resized according to the needs of the model within this\n", + " function.\n", + "\n", + " Returns:\n", + " A dict containing 3 Tensors (`detection_boxes`, `detection_classes`,\n", + " and `detection_scores`).\n", + " \"\"\"\n", + " input_details = interpreter.get_input_details()\n", + " output_details = interpreter.get_output_details()\n", + "\n", + " # We use the original model for pre-processing, since the TFLite model doesn't\n", + " # include pre-processing.\n", + " preprocessed_image, shapes = detection_model.preprocess(input_tensor)\n", + " interpreter.set_tensor(input_details[0]['index'], preprocessed_image.numpy())\n", + "\n", + " interpreter.invoke()\n", + "\n", + " boxes = interpreter.get_tensor(output_details[0]['index'])\n", + " classes = interpreter.get_tensor(output_details[1]['index'])\n", + " scores = interpreter.get_tensor(output_details[2]['index'])\n", + " return boxes, classes, scores\n", + "\n", + "# Load the TFLite model and allocate tensors.\n", + "interpreter = tf.lite.Interpreter(model_path=\"tflite/model.tflite\")\n", + "interpreter.allocate_tensors()\n", + "\n", + "# Note that the first frame will trigger tracing of the tf.function, which will\n", + "# take some time, after which inference should be fast.\n", + "\n", + "label_id_offset = 1\n", + "for i in range(len(test_images_np)):\n", + " input_tensor = tf.convert_to_tensor(test_images_np[i], dtype=tf.float32)\n", + " boxes, classes, scores = detect(interpreter, input_tensor)\n", + "\n", + " plot_detections(\n", + " test_images_np[i][0],\n", + " boxes[0],\n", + " classes[0].astype(np.uint32) + label_id_offset,\n", + " scores[0],\n", + " category_index, figsize=(15, 20), image_name=\"gif_frame_\" + ('%02d' % i) + \".jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZkMPOSQE0x8C" + }, + "outputs": [], + "source": [ + "imageio.plugins.freeimage.download()\n", + "\n", + "anim_file = 'duckies_test.gif'\n", + "\n", + "filenames = glob.glob('gif_frame_*.jpg')\n", + "filenames = sorted(filenames)\n", + "last = -1\n", + "images = []\n", + "for filename in filenames:\n", + " image = imageio.imread(filename)\n", + " images.append(image)\n", + "\n", + "imageio.mimsave(anim_file, images, 'GIF-FI', fps=5)\n", + "\n", + "display(IPyImage(open(anim_file, 'rb').read()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yzaHWsS58_PQ" + }, + "source": [ + "## (Optional) Download model\n", + "\n", + "This model can be run on-device with **TensorFlow Lite**. Look at [our SSD model signature](https://www.tensorflow.org/lite/models/object_detection/overview#uses_and_limitations) to understand how to interpret the model IO tensors. Our [Object Detection example](https://github.com/tensorflow/examples/tree/master/lite/examples/object_detection) is a good starting point for integrating the model into your mobile app.\n", + "\n", + "Refer to TFLite's [inference documentation](https://www.tensorflow.org/lite/guide/inference) for more details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gZ6vac3RAY3j" + }, + "outputs": [], + "source": [ + "from google.colab import files\n", + "files.download('tflite/model.tflite') " + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "eager_few_shot_od_training_tflite.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/research/object_detection/colab_tutorials/inference_from_saved_model_tf2_colab.ipynb b/research/object_detection/colab_tutorials/inference_from_saved_model_tf2_colab.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1e88f4c5d52435297bd2ba8c8bb47a2f3c346f30 --- /dev/null +++ b/research/object_detection/colab_tutorials/inference_from_saved_model_tf2_colab.ipynb @@ -0,0 +1,313 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "inference_from_saved_model_tf2_colab.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "cT5cdSLPX0ui" + }, + "source": [ + "# Intro to Object Detection Colab\n", + "\n", + "Welcome to the object detection colab! This demo will take you through the steps of running an \"out-of-the-box\" detection model in SavedModel format on a collection of images.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "vPs64QA1Zdov" + }, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OBzb04bdNGM8", + "colab_type": "code", + "colab": {} + }, + "source": [ + "!pip install -U --pre tensorflow==\"2.2.0\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "NgSXyvKSNHIl", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import os\n", + "import pathlib\n", + "\n", + "# Clone the tensorflow models repository if it doesn't already exist\n", + "if \"models\" in pathlib.Path.cwd().parts:\n", + " while \"models\" in pathlib.Path.cwd().parts:\n", + " os.chdir('..')\n", + "elif not pathlib.Path('models').exists():\n", + " !git clone --depth 1 https://github.com/tensorflow/models" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "rhpPgW7TNLs6", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Install the Object Detection API\n", + "%%bash\n", + "cd models/research/\n", + "protoc object_detection/protos/*.proto --python_out=.\n", + "cp object_detection/packages/tf2/setup.py .\n", + "python -m pip install ." + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "yn5_uV1HLvaz", + "colab": {} + }, + "source": [ + "import io\n", + "import os\n", + "import scipy.misc\n", + "import numpy as np\n", + "import six\n", + "import time\n", + "\n", + "from six import BytesIO\n", + "\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "from PIL import Image, ImageDraw, ImageFont\n", + "\n", + "import tensorflow as tf\n", + "from object_detection.utils import visualization_utils as viz_utils\n", + "\n", + "%matplotlib inline" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "-y9R0Xllefec", + "colab": {} + }, + "source": [ + "def load_image_into_numpy_array(path):\n", + " \"\"\"Load an image from file into a numpy array.\n", + "\n", + " Puts image into numpy array to feed into tensorflow graph.\n", + " Note that by convention we put it into a numpy array with shape\n", + " (height, width, channels), where channels=3 for RGB.\n", + "\n", + " Args:\n", + " path: a file path (this can be local or on colossus)\n", + "\n", + " Returns:\n", + " uint8 numpy array with shape (img_height, img_width, 3)\n", + " \"\"\"\n", + " img_data = tf.io.gfile.GFile(path, 'rb').read()\n", + " image = Image.open(BytesIO(img_data))\n", + " (im_width, im_height) = image.size\n", + " return np.array(image.getdata()).reshape(\n", + " (im_height, im_width, 3)).astype(np.uint8)\n", + "\n", + "# Load the COCO Label Map\n", + "category_index = {\n", + " 1: {'id': 1, 'name': 'person'},\n", + " 2: {'id': 2, 'name': 'bicycle'},\n", + " 3: {'id': 3, 'name': 'car'},\n", + " 4: {'id': 4, 'name': 'motorcycle'},\n", + " 5: {'id': 5, 'name': 'airplane'},\n", + " 6: {'id': 6, 'name': 'bus'},\n", + " 7: {'id': 7, 'name': 'train'},\n", + " 8: {'id': 8, 'name': 'truck'},\n", + " 9: {'id': 9, 'name': 'boat'},\n", + " 10: {'id': 10, 'name': 'traffic light'},\n", + " 11: {'id': 11, 'name': 'fire hydrant'},\n", + " 13: {'id': 13, 'name': 'stop sign'},\n", + " 14: {'id': 14, 'name': 'parking meter'},\n", + " 15: {'id': 15, 'name': 'bench'},\n", + " 16: {'id': 16, 'name': 'bird'},\n", + " 17: {'id': 17, 'name': 'cat'},\n", + " 18: {'id': 18, 'name': 'dog'},\n", + " 19: {'id': 19, 'name': 'horse'},\n", + " 20: {'id': 20, 'name': 'sheep'},\n", + " 21: {'id': 21, 'name': 'cow'},\n", + " 22: {'id': 22, 'name': 'elephant'},\n", + " 23: {'id': 23, 'name': 'bear'},\n", + " 24: {'id': 24, 'name': 'zebra'},\n", + " 25: {'id': 25, 'name': 'giraffe'},\n", + " 27: {'id': 27, 'name': 'backpack'},\n", + " 28: {'id': 28, 'name': 'umbrella'},\n", + " 31: {'id': 31, 'name': 'handbag'},\n", + " 32: {'id': 32, 'name': 'tie'},\n", + " 33: {'id': 33, 'name': 'suitcase'},\n", + " 34: {'id': 34, 'name': 'frisbee'},\n", + " 35: {'id': 35, 'name': 'skis'},\n", + " 36: {'id': 36, 'name': 'snowboard'},\n", + " 37: {'id': 37, 'name': 'sports ball'},\n", + " 38: {'id': 38, 'name': 'kite'},\n", + " 39: {'id': 39, 'name': 'baseball bat'},\n", + " 40: {'id': 40, 'name': 'baseball glove'},\n", + " 41: {'id': 41, 'name': 'skateboard'},\n", + " 42: {'id': 42, 'name': 'surfboard'},\n", + " 43: {'id': 43, 'name': 'tennis racket'},\n", + " 44: {'id': 44, 'name': 'bottle'},\n", + " 46: {'id': 46, 'name': 'wine glass'},\n", + " 47: {'id': 47, 'name': 'cup'},\n", + " 48: {'id': 48, 'name': 'fork'},\n", + " 49: {'id': 49, 'name': 'knife'},\n", + " 50: {'id': 50, 'name': 'spoon'},\n", + " 51: {'id': 51, 'name': 'bowl'},\n", + " 52: {'id': 52, 'name': 'banana'},\n", + " 53: {'id': 53, 'name': 'apple'},\n", + " 54: {'id': 54, 'name': 'sandwich'},\n", + " 55: {'id': 55, 'name': 'orange'},\n", + " 56: {'id': 56, 'name': 'broccoli'},\n", + " 57: {'id': 57, 'name': 'carrot'},\n", + " 58: {'id': 58, 'name': 'hot dog'},\n", + " 59: {'id': 59, 'name': 'pizza'},\n", + " 60: {'id': 60, 'name': 'donut'},\n", + " 61: {'id': 61, 'name': 'cake'},\n", + " 62: {'id': 62, 'name': 'chair'},\n", + " 63: {'id': 63, 'name': 'couch'},\n", + " 64: {'id': 64, 'name': 'potted plant'},\n", + " 65: {'id': 65, 'name': 'bed'},\n", + " 67: {'id': 67, 'name': 'dining table'},\n", + " 70: {'id': 70, 'name': 'toilet'},\n", + " 72: {'id': 72, 'name': 'tv'},\n", + " 73: {'id': 73, 'name': 'laptop'},\n", + " 74: {'id': 74, 'name': 'mouse'},\n", + " 75: {'id': 75, 'name': 'remote'},\n", + " 76: {'id': 76, 'name': 'keyboard'},\n", + " 77: {'id': 77, 'name': 'cell phone'},\n", + " 78: {'id': 78, 'name': 'microwave'},\n", + " 79: {'id': 79, 'name': 'oven'},\n", + " 80: {'id': 80, 'name': 'toaster'},\n", + " 81: {'id': 81, 'name': 'sink'},\n", + " 82: {'id': 82, 'name': 'refrigerator'},\n", + " 84: {'id': 84, 'name': 'book'},\n", + " 85: {'id': 85, 'name': 'clock'},\n", + " 86: {'id': 86, 'name': 'vase'},\n", + " 87: {'id': 87, 'name': 'scissors'},\n", + " 88: {'id': 88, 'name': 'teddy bear'},\n", + " 89: {'id': 89, 'name': 'hair drier'},\n", + " 90: {'id': 90, 'name': 'toothbrush'},\n", + "}" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "QwcBC2TlPSwg", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Download the saved model and put it into models/research/object_detection/test_data/\n", + "!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d5_coco17_tpu-32.tar.gz\n", + "!tar -xf efficientdet_d5_coco17_tpu-32.tar.gz\n", + "!mv efficientdet_d5_coco17_tpu-32/ models/research/object_detection/test_data/" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "Z2p-PmKLYCVU", + "colab": {} + }, + "source": [ + "start_time = time.time()\n", + "tf.keras.backend.clear_session()\n", + "detect_fn = tf.saved_model.load('models/research/object_detection/test_data/efficientdet_d5_coco17_tpu-32/saved_model/')\n", + "end_time = time.time()\n", + "elapsed_time = end_time - start_time\n", + "print('Elapsed time: ' + str(elapsed_time) + 's')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "vukkhd5-9NSL", + "colab": {} + }, + "source": [ + "import time\n", + "\n", + "image_dir = 'models/research/object_detection/test_images'\n", + "\n", + "elapsed = []\n", + "for i in range(2):\n", + " image_path = os.path.join(image_dir, 'image' + str(i + 1) + '.jpg')\n", + " image_np = load_image_into_numpy_array(image_path)\n", + " input_tensor = np.expand_dims(image_np, 0)\n", + " start_time = time.time()\n", + " detections = detect_fn(input_tensor)\n", + " end_time = time.time()\n", + " elapsed.append(end_time - start_time)\n", + "\n", + " plt.rcParams['figure.figsize'] = [42, 21]\n", + " label_id_offset = 1\n", + " image_np_with_detections = image_np.copy()\n", + " viz_utils.visualize_boxes_and_labels_on_image_array(\n", + " image_np_with_detections,\n", + " detections['detection_boxes'][0].numpy(),\n", + " detections['detection_classes'][0].numpy().astype(np.int32),\n", + " detections['detection_scores'][0].numpy(),\n", + " category_index,\n", + " use_normalized_coordinates=True,\n", + " max_boxes_to_draw=200,\n", + " min_score_thresh=.40,\n", + " agnostic_mode=False)\n", + " plt.subplot(2, 1, i+1)\n", + " plt.imshow(image_np_with_detections)\n", + "\n", + "mean_elapsed = sum(elapsed) / float(len(elapsed))\n", + "print('Elapsed time: ' + str(mean_elapsed) + ' second per image')" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb b/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb index b4eac1ade9a65bf3dd1c8ba584d368afcfe9003b..0dd1253207f26f31aa7a23471399553574b6eae3 100644 --- a/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb +++ b/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb @@ -10,11 +10,11 @@ "# Object Detection API Demo\n", "\n", "\u003ctable align=\"left\"\u003e\u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/object_detection/colab_tutorials/colab_tutorials/object_detection_tutorial.ipynb\"\u003e\n", + " \u003ca target=\"_blank\" href=\"https://colab.sandbox.google.com/github/tensorflow/models/blob/master/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb\"\u003e\n", " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\n", " \u003c/a\u003e\n", "\u003c/td\u003e\u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/research/object_detection/colab_tutorials/colab_tutorials/object_detection_tutorial.ipynb\"\u003e\n", + " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/models/blob/master/research/object_detection/colab_tutorials/object_detection_tutorial.ipynb\"\u003e\n", " \u003cimg width=32px src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", "\u003c/td\u003e\u003c/table\u003e" ] diff --git a/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_coco_tpu-128.config b/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_coco_tpu-128.config new file mode 100644 index 0000000000000000000000000000000000000000..a97db04ede0bc751c8b58fe99ec5bd0943f22ffc --- /dev/null +++ b/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_coco_tpu-128.config @@ -0,0 +1,210 @@ +# DeepMAC meta architecture from the "The surprising impact of mask-head +# architecture on novel class segmentation" [1] paper with an Hourglass-100[2] +# mask head. This config is trained on all COCO classes and achieves a +# mask mAP of 39.4% on the COCO testdev-2017 set. +# [1]: https://arxiv.org/abs/2104.00613 +# [2]: https://arxiv.org/abs/1904.07850 + +# Train on TPU-128 + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "hourglass_104" + bgr_ordering: true + channel_means: [104.01362025, 114.03422265, 119.9165958 ] + channel_stds: [73.6027665 , 69.89082075, 70.9150767 ] + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 1024 + max_dimension: 1024 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.1 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + min_box_overlap_iou: 0.7 + max_box_predictions: 100 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + + deepmac_mask_estimation { + dim: 32 + task_loss_weight: 5.0 + pixel_embedding_dim: 16 + mask_size: 32 + use_xy: true + use_instance_embedding: true + network_type: "hourglass100" + classification_loss { + weighted_sigmoid {} + } + } + } +} + +train_config: { + + batch_size: 128 + num_steps: 50000 + + data_augmentation_options { + random_horizontal_flip { + } + } + + data_augmentation_options { + random_adjust_hue { + } + } + + data_augmentation_options { + random_adjust_contrast { + } + } + + data_augmentation_options { + random_adjust_saturation { + } + } + + data_augmentation_options { + random_adjust_brightness { + } + } + + data_augmentation_options { + random_square_crop_by_scale { + scale_min: 0.6 + scale_max: 1.3 + } + } + + optimizer { + adam_optimizer: { + epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default. + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 1e-3 + total_steps: 50000 + warmup_learning_rate: 2.5e-4 + warmup_steps: 5000 + } + } + } + use_moving_average: false + } + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-51" + fine_tune_checkpoint_type: "fine_tune" +} + +train_input_reader: { + load_instance_masks: true + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + mask_type: PNG_MASKS + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } +} + +eval_config: { + metrics_set: "coco_detection_metrics" + metrics_set: "coco_mask_metrics" + include_metrics_per_category: true + use_moving_averages: false + batch_size: 1; + super_categories { + key: "VOC" + value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat," + "dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv" + } + super_categories { + key: "NonVOC" + value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench," + "elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase," + "frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove," + "skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl," + "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed," + "toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier," + "toothbrush" + } + super_categories { + key: "person" + value: "person" + } + super_categories { + key: "vehicle" + value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat" + } + super_categories { + key: "outdoor" + value: "traffic light,fire hydrant,stop sign,parking meter,bench" + } + super_categories { + key: "animal" + value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe" + } + super_categories { + key: "accessory" + value: "backpack,umbrella,handbag,tie,suitcase" + } + super_categories { + key: "sports" + value: "frisbee,skis,snowboard,sports ball,kite,baseball bat," + "baseball glove,skateboard,surfboard,tennis racket" + } + super_categories { + key: "kitchen" + value: "bottle,wine glass,cup,fork,knife,spoon,bowl" + } + super_categories { + key: "food" + value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut," + "cake" + } + super_categories { + key: "furniture" + value: "chair,couch,potted plant,bed,dining table,toilet" + } + super_categories { + key: "electronic" + value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator" + } + super_categories { + key: "indoor" + value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush" + } +} + +eval_input_reader: { + load_instance_masks: true + mask_type: PNG_MASKS + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } +} diff --git a/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_non_voc_only_tpu-128.config b/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_non_voc_only_tpu-128.config new file mode 100644 index 0000000000000000000000000000000000000000..8262c444e2eecd4982a2bc1ee81edf18fc81ba99 --- /dev/null +++ b/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_non_voc_only_tpu-128.config @@ -0,0 +1,273 @@ +# DeepMAC meta architecture from the "The surprising impact of mask-head +# architecture on novel class segmentation" [1] paper with an Hourglass-100[2] +# mask head. This config is trained on masks from the non-VOC classes and +# achieves a mask mAP of 39.1% on the VOC classes. +# [1]: https://arxiv.org/abs/2104.00613 +# [2]: https://arxiv.org/abs/1904.07850 + +# Train on TPU-128 + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "hourglass_104" + bgr_ordering: true + channel_means: [104.01362025, 114.03422265, 119.9165958 ] + channel_stds: [73.6027665 , 69.89082075, 70.9150767 ] + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 1024 + max_dimension: 1024 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.1 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + min_box_overlap_iou: 0.7 + max_box_predictions: 100 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + + deepmac_mask_estimation { + dim: 32 + task_loss_weight: 5.0 + pixel_embedding_dim: 16 + mask_size: 32 + use_xy: true + use_instance_embedding: true + network_type: "hourglass100" + classification_loss { + weighted_sigmoid {} + } + + allowed_masked_classes_ids: [ + 8, + 10, + 11, + 13, + 14, + 15, + 22, + 23, + 24, + 25, + 27, + 28, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 65, + 70, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 84, + 85, + 86, + 87, + 88, + 89, + 90 + ] + } + } +} + +train_config: { + + batch_size: 128 + num_steps: 50000 + + data_augmentation_options { + random_horizontal_flip { + } + } + + data_augmentation_options { + random_adjust_hue { + } + } + + data_augmentation_options { + random_adjust_contrast { + } + } + + data_augmentation_options { + random_adjust_saturation { + } + } + + data_augmentation_options { + random_adjust_brightness { + } + } + + data_augmentation_options { + random_square_crop_by_scale { + scale_min: 0.6 + scale_max: 1.3 + } + } + + optimizer { + adam_optimizer: { + epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default. + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 1e-3 + total_steps: 50000 + warmup_learning_rate: 2.5e-4 + warmup_steps: 5000 + } + } + } + use_moving_average: false + } + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-51" + fine_tune_checkpoint_type: "fine_tune" +} + +train_input_reader: { + load_instance_masks: true + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + mask_type: PNG_MASKS + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } +} + +eval_config: { + metrics_set: "coco_detection_metrics" + metrics_set: "coco_mask_metrics" + include_metrics_per_category: true + use_moving_averages: false + batch_size: 1; + super_categories { + key: "VOC" + value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat," + "dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv" + } + super_categories { + key: "NonVOC" + value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench," + "elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase," + "frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove," + "skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl," + "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed," + "toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier," + "toothbrush" + } + super_categories { + key: "person" + value: "person" + } + super_categories { + key: "vehicle" + value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat" + } + super_categories { + key: "outdoor" + value: "traffic light,fire hydrant,stop sign,parking meter,bench" + } + super_categories { + key: "animal" + value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe" + } + super_categories { + key: "accessory" + value: "backpack,umbrella,handbag,tie,suitcase" + } + super_categories { + key: "sports" + value: "frisbee,skis,snowboard,sports ball,kite,baseball bat," + "baseball glove,skateboard,surfboard,tennis racket" + } + super_categories { + key: "kitchen" + value: "bottle,wine glass,cup,fork,knife,spoon,bowl" + } + super_categories { + key: "food" + value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut," + "cake" + } + super_categories { + key: "furniture" + value: "chair,couch,potted plant,bed,dining table,toilet" + } + super_categories { + key: "electronic" + value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator" + } + super_categories { + key: "indoor" + value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush" + } +} + +eval_input_reader: { + load_instance_masks: true + mask_type: PNG_MASKS + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } +} diff --git a/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_voc_only_tpu-128.config b/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_voc_only_tpu-128.config new file mode 100644 index 0000000000000000000000000000000000000000..f2c7f7c3bcd1af9d65920c21e8df295af10b2875 --- /dev/null +++ b/research/object_detection/configs/tf2/center_net_deepmac_1024x1024_voc_only_tpu-128.config @@ -0,0 +1,234 @@ +# DeepMAC meta architecture from the "The surprising impact of mask-head +# architecture on novel class segmentation" [1] paper with an Hourglass-100[2] +# mask head. This config is only trained on masks from the VOC classes in COCO +# and achieves a mask mAP of 35.5% on non-VOC classes. +# [1]: https://arxiv.org/abs/2104.00613 +# [2]: https://arxiv.org/abs/1904.07850 + +# Train on TPU-128 + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "hourglass_104" + bgr_ordering: true + channel_means: [104.01362025, 114.03422265, 119.9165958 ] + channel_stds: [73.6027665 , 69.89082075, 70.9150767 ] + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 1024 + max_dimension: 1024 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.1 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + min_box_overlap_iou: 0.7 + max_box_predictions: 100 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + + deepmac_mask_estimation { + dim: 32 + task_loss_weight: 5.0 + pixel_embedding_dim: 16 + mask_size: 32 + use_xy: true + use_instance_embedding: true + network_type: "hourglass100" + classification_loss { + weighted_sigmoid {} + } + + allowed_masked_classes_ids: [ + 1, # person + 2, # bicycle + 3, # car + 4, # motorcycle/motorbike + 5, # airplane/aeroplane, + 6, # bus + 7, # train + 9, # boat + 16, # bird + 17, # cat + 18, # dog + 19, # horse + 20, # sheep + 21, # cow + 44, # bottle + 62, # chair + 63, # couch/sofa + 64, # potted plant + 67, # dining table + 72 # tvmonitor + ] + } + } +} + +train_config: { + + batch_size: 128 + num_steps: 50000 + + data_augmentation_options { + random_horizontal_flip { + } + } + + data_augmentation_options { + random_adjust_hue { + } + } + + data_augmentation_options { + random_adjust_contrast { + } + } + + data_augmentation_options { + random_adjust_saturation { + } + } + + data_augmentation_options { + random_adjust_brightness { + } + } + + data_augmentation_options { + random_square_crop_by_scale { + scale_min: 0.6 + scale_max: 1.3 + } + } + + optimizer { + adam_optimizer: { + epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default. + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 1e-3 + total_steps: 50000 + warmup_learning_rate: 2.5e-4 + warmup_steps: 5000 + } + } + } + use_moving_average: false + } + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-51" + fine_tune_checkpoint_type: "fine_tune" +} + +train_input_reader: { + load_instance_masks: true + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + mask_type: PNG_MASKS + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } +} + +eval_config: { + metrics_set: "coco_detection_metrics" + metrics_set: "coco_mask_metrics" + include_metrics_per_category: true + use_moving_averages: false + batch_size: 1; + super_categories { + key: "VOC" + value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat," + "dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv" + } + super_categories { + key: "NonVOC" + value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench," + "elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase," + "frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove," + "skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl," + "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed," + "toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier," + "toothbrush" + } + super_categories { + key: "person" + value: "person" + } + super_categories { + key: "vehicle" + value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat" + } + super_categories { + key: "outdoor" + value: "traffic light,fire hydrant,stop sign,parking meter,bench" + } + super_categories { + key: "animal" + value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe" + } + super_categories { + key: "accessory" + value: "backpack,umbrella,handbag,tie,suitcase" + } + super_categories { + key: "sports" + value: "frisbee,skis,snowboard,sports ball,kite,baseball bat," + "baseball glove,skateboard,surfboard,tennis racket" + } + super_categories { + key: "kitchen" + value: "bottle,wine glass,cup,fork,knife,spoon,bowl" + } + super_categories { + key: "food" + value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut," + "cake" + } + super_categories { + key: "furniture" + value: "chair,couch,potted plant,bed,dining table,toilet" + } + super_categories { + key: "electronic" + value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator" + } + super_categories { + key: "indoor" + value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush" + } +} + +eval_input_reader: { + load_instance_masks: true + mask_type: PNG_MASKS + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } +} + diff --git a/research/object_detection/configs/tf2/center_net_deepmac_512x512_voc_only_tpu-32.config b/research/object_detection/configs/tf2/center_net_deepmac_512x512_voc_only_tpu-32.config new file mode 100644 index 0000000000000000000000000000000000000000..1b5891e21362d0de016afc88d24e0a8956100b84 --- /dev/null +++ b/research/object_detection/configs/tf2/center_net_deepmac_512x512_voc_only_tpu-32.config @@ -0,0 +1,233 @@ +# DeepMAC meta architecture from the "The surprising impact of mask-head +# architecture on novel class segmentation" [1] paper with an Hourglass-52[2] +# mask head. This config is only trained on masks from the VOC classes in COCO +# and achieves a mask mAP of 32.5% on non-VOC classes. +# [1]: https://arxiv.org/abs/2104.00613 +# [2]: https://arxiv.org/abs/1904.07850 + +# Train on TPU-32 + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "hourglass_104" + bgr_ordering: true + channel_means: [104.01362025, 114.03422265, 119.9165958 ] + channel_stds: [73.6027665 , 69.89082075, 70.9150767 ] + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 512 + max_dimension: 512 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.1 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + min_box_overlap_iou: 0.7 + max_box_predictions: 100 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + + deepmac_mask_estimation { + dim: 32 + task_loss_weight: 5.0 + pixel_embedding_dim: 16 + mask_size: 32 + use_xy: true + use_instance_embedding: true + network_type: "hourglass52" + classification_loss { + weighted_sigmoid {} + } + + allowed_masked_classes_ids: [ + 1, # person + 2, # bicycle + 3, # car + 4, # motorcycle/motorbike + 5, # airplane/aeroplane, + 6, # bus + 7, # train + 9, # boat + 16, # bird + 17, # cat + 18, # dog + 19, # horse + 20, # sheep + 21, # cow + 44, # bottle + 62, # chair + 63, # couch/sofa + 64, # potted plant + 67, # dining table + 72 # tvmonitor + ] + } + } +} + +train_config: { + + batch_size: 128 + num_steps: 50000 + + data_augmentation_options { + random_horizontal_flip { + } + } + + data_augmentation_options { + random_adjust_hue { + } + } + + data_augmentation_options { + random_adjust_contrast { + } + } + + data_augmentation_options { + random_adjust_saturation { + } + } + + data_augmentation_options { + random_adjust_brightness { + } + } + + data_augmentation_options { + random_square_crop_by_scale { + scale_min: 0.6 + scale_max: 1.3 + } + } + + optimizer { + adam_optimizer: { + epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default. + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 1e-3 + total_steps: 50000 + warmup_learning_rate: 2.5e-4 + warmup_steps: 5000 + } + } + } + use_moving_average: false + } + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-1" + fine_tune_checkpoint_type: "detection" +} + +train_input_reader: { + load_instance_masks: true + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + mask_type: PNG_MASKS + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } +} + +eval_config: { + metrics_set: "coco_detection_metrics" + metrics_set: "coco_mask_metrics" + include_metrics_per_category: true + use_moving_averages: false + batch_size: 1; + super_categories { + key: "VOC" + value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat," + "dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv" + } + super_categories { + key: "NonVOC" + value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench," + "elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase," + "frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove," + "skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl," + "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed," + "toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier," + "toothbrush" + } + super_categories { + key: "person" + value: "person" + } + super_categories { + key: "vehicle" + value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat" + } + super_categories { + key: "outdoor" + value: "traffic light,fire hydrant,stop sign,parking meter,bench" + } + super_categories { + key: "animal" + value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe" + } + super_categories { + key: "accessory" + value: "backpack,umbrella,handbag,tie,suitcase" + } + super_categories { + key: "sports" + value: "frisbee,skis,snowboard,sports ball,kite,baseball bat," + "baseball glove,skateboard,surfboard,tennis racket" + } + super_categories { + key: "kitchen" + value: "bottle,wine glass,cup,fork,knife,spoon,bowl" + } + super_categories { + key: "food" + value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut," + "cake" + } + super_categories { + key: "furniture" + value: "chair,couch,potted plant,bed,dining table,toilet" + } + super_categories { + key: "electronic" + value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster," + "sink,refrigerator" + } + super_categories { + key: "indoor" + value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush" + } +} + +eval_input_reader: { + load_instance_masks: true + mask_type: PNG_MASKS + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } +} diff --git a/research/object_detection/configs/tf2/center_net_hourglass104_1024x1024_coco17_tpu-32.config b/research/object_detection/configs/tf2/centernet_hourglass104_1024x1024_coco17_tpu-32.config similarity index 100% rename from research/object_detection/configs/tf2/center_net_hourglass104_1024x1024_coco17_tpu-32.config rename to research/object_detection/configs/tf2/centernet_hourglass104_1024x1024_coco17_tpu-32.config diff --git a/research/object_detection/configs/tf2/centernet_hourglass104_1024x1024_kpts_coco17_tpu-32.config b/research/object_detection/configs/tf2/centernet_hourglass104_1024x1024_kpts_coco17_tpu-32.config new file mode 100644 index 0000000000000000000000000000000000000000..da7136f15db8a7a6201700ff761b4cab1387fdd2 --- /dev/null +++ b/research/object_detection/configs/tf2/centernet_hourglass104_1024x1024_kpts_coco17_tpu-32.config @@ -0,0 +1,374 @@ +# CenterNet meta-architecture from the "Objects as Points" [2] paper with the +# hourglass[1] backbone. This config achieves an mAP of 42.8/64.5 +/- 0.16 on +# COCO 17 (averaged over 5 runs). This config is TPU compatible. +# [1]: https://arxiv.org/abs/1603.06937 +# [2]: https://arxiv.org/abs/1904.07850 + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "hourglass_104" + channel_means: 104.01361846923828 + channel_means: 114.03422546386719 + channel_means: 119.91659545898438 + channel_stds: 73.60276794433594 + channel_stds: 69.89082336425781 + channel_stds: 70.91507720947266 + bgr_ordering: true + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 1024 + max_dimension: 1024 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.10000000149011612 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + min_box_overlap_iou: 0.699999988079071 + max_box_predictions: 100 + } + keypoint_label_map_path: "PATH_TO_BE_CONFIGURED" + keypoint_estimation_task { + task_name: "human_pose" + task_loss_weight: 1.0 + loss { + localization_loss { + l1_localization_loss { + } + } + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + keypoint_class_name: "/m/01g317" + keypoint_label_to_std { + key: "left_ankle" + value: 0.8899999856948853 + } + keypoint_label_to_std { + key: "left_ear" + value: 0.3499999940395355 + } + keypoint_label_to_std { + key: "left_elbow" + value: 0.7200000286102295 + } + keypoint_label_to_std { + key: "left_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "left_hip" + value: 1.0700000524520874 + } + keypoint_label_to_std { + key: "left_knee" + value: 0.8899999856948853 + } + keypoint_label_to_std { + key: "left_shoulder" + value: 0.7900000214576721 + } + keypoint_label_to_std { + key: "left_wrist" + value: 0.6200000047683716 + } + keypoint_label_to_std { + key: "nose" + value: 0.25999999046325684 + } + keypoint_label_to_std { + key: "right_ankle" + value: 0.8899999856948853 + } + keypoint_label_to_std { + key: "right_ear" + value: 0.3499999940395355 + } + keypoint_label_to_std { + key: "right_elbow" + value: 0.7200000286102295 + } + keypoint_label_to_std { + key: "right_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "right_hip" + value: 1.0700000524520874 + } + keypoint_label_to_std { + key: "right_knee" + value: 0.8899999856948853 + } + keypoint_label_to_std { + key: "right_shoulder" + value: 0.7900000214576721 + } + keypoint_label_to_std { + key: "right_wrist" + value: 0.6200000047683716 + } + keypoint_regression_loss_weight: 0.10000000149011612 + keypoint_heatmap_loss_weight: 1.0 + keypoint_offset_loss_weight: 1.0 + offset_peak_radius: 3 + per_keypoint_offset: true + } + } +} +train_config { + batch_size: 128 + data_augmentation_options { + random_horizontal_flip { + keypoint_flip_permutation: 0 + keypoint_flip_permutation: 2 + keypoint_flip_permutation: 1 + keypoint_flip_permutation: 4 + keypoint_flip_permutation: 3 + keypoint_flip_permutation: 6 + keypoint_flip_permutation: 5 + keypoint_flip_permutation: 8 + keypoint_flip_permutation: 7 + keypoint_flip_permutation: 10 + keypoint_flip_permutation: 9 + keypoint_flip_permutation: 12 + keypoint_flip_permutation: 11 + keypoint_flip_permutation: 14 + keypoint_flip_permutation: 13 + keypoint_flip_permutation: 16 + keypoint_flip_permutation: 15 + } + } + data_augmentation_options { + random_adjust_hue { + } + } + data_augmentation_options { + random_adjust_contrast { + } + } + data_augmentation_options { + random_adjust_saturation { + } + } + data_augmentation_options { + random_adjust_brightness { + } + } + data_augmentation_options { + random_square_crop_by_scale { + scale_min: 0.6000000238418579 + scale_max: 1.2999999523162842 + } + } + optimizer { + adam_optimizer { + learning_rate { + cosine_decay_learning_rate { + learning_rate_base: 0.0010000000474974513 + total_steps: 250000 + warmup_learning_rate: 0.0002500000118743628 + warmup_steps: 5000 + } + } + epsilon: 1.0000000116860974e-07 + } + use_moving_average: false + } + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED" + num_steps: 250000 + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + fine_tune_checkpoint_type: "detection" + fine_tune_checkpoint_version: V2 +} +train_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } + num_keypoints: 17 +} +eval_config { + num_visualizations: 10 + metrics_set: "coco_detection_metrics" + use_moving_averages: false + min_score_threshold: 0.20000000298023224 + max_num_boxes_to_visualize: 20 + batch_size: 1 + parameterized_metric { + coco_keypoint_metrics { + class_label: "person" + keypoint_label_to_sigmas { + key: "left_ankle" + value: 0.08900000154972076 + } + keypoint_label_to_sigmas { + key: "left_ear" + value: 0.03500000014901161 + } + keypoint_label_to_sigmas { + key: "left_elbow" + value: 0.07199999690055847 + } + keypoint_label_to_sigmas { + key: "left_eye" + value: 0.02500000037252903 + } + keypoint_label_to_sigmas { + key: "left_hip" + value: 0.10700000077486038 + } + keypoint_label_to_sigmas { + key: "left_knee" + value: 0.08699999749660492 + } + keypoint_label_to_sigmas { + key: "left_shoulder" + value: 0.07900000363588333 + } + keypoint_label_to_sigmas { + key: "left_wrist" + value: 0.06199999898672104 + } + keypoint_label_to_sigmas { + key: "nose" + value: 0.026000000536441803 + } + keypoint_label_to_sigmas { + key: "right_ankle" + value: 0.08900000154972076 + } + keypoint_label_to_sigmas { + key: "right_ear" + value: 0.03500000014901161 + } + keypoint_label_to_sigmas { + key: "right_elbow" + value: 0.07199999690055847 + } + keypoint_label_to_sigmas { + key: "right_eye" + value: 0.02500000037252903 + } + keypoint_label_to_sigmas { + key: "right_hip" + value: 0.10700000077486038 + } + keypoint_label_to_sigmas { + key: "right_knee" + value: 0.08699999749660492 + } + keypoint_label_to_sigmas { + key: "right_shoulder" + value: 0.07900000363588333 + } + keypoint_label_to_sigmas { + key: "right_wrist" + value: 0.06199999898672104 + } + } + } + keypoint_edge { + start: 0 + end: 1 + } + keypoint_edge { + start: 0 + end: 2 + } + keypoint_edge { + start: 1 + end: 3 + } + keypoint_edge { + start: 2 + end: 4 + } + keypoint_edge { + start: 0 + end: 5 + } + keypoint_edge { + start: 0 + end: 6 + } + keypoint_edge { + start: 5 + end: 7 + } + keypoint_edge { + start: 7 + end: 9 + } + keypoint_edge { + start: 6 + end: 8 + } + keypoint_edge { + start: 8 + end: 10 + } + keypoint_edge { + start: 5 + end: 6 + } + keypoint_edge { + start: 5 + end: 11 + } + keypoint_edge { + start: 6 + end: 12 + } + keypoint_edge { + start: 11 + end: 12 + } + keypoint_edge { + start: 11 + end: 13 + } + keypoint_edge { + start: 13 + end: 15 + } + keypoint_edge { + start: 12 + end: 14 + } + keypoint_edge { + start: 14 + end: 16 + } +} +eval_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } + num_keypoints: 17 +} diff --git a/research/object_detection/configs/tf2/center_net_hourglass104_512x512_coco17_tpu-8.config b/research/object_detection/configs/tf2/centernet_hourglass104_512x512_coco17_tpu-8.config similarity index 100% rename from research/object_detection/configs/tf2/center_net_hourglass104_512x512_coco17_tpu-8.config rename to research/object_detection/configs/tf2/centernet_hourglass104_512x512_coco17_tpu-8.config diff --git a/research/object_detection/configs/tf2/centernet_hourglass104_512x512_kpts_coco17_tpu-32.config b/research/object_detection/configs/tf2/centernet_hourglass104_512x512_kpts_coco17_tpu-32.config new file mode 100644 index 0000000000000000000000000000000000000000..ce5652895f9331261f28be1e23eca4ccb916d1e1 --- /dev/null +++ b/research/object_detection/configs/tf2/centernet_hourglass104_512x512_kpts_coco17_tpu-32.config @@ -0,0 +1,395 @@ +# CenterNet meta-architecture from the "Objects as Points" [2] paper with the +# hourglass[1] backbone. This config achieves an mAP of 40.0/61.4 +/- 0.16 on +# COCO 17 (averaged over 5 runs). This config is TPU compatible. +# [1]: https://arxiv.org/abs/1603.06937 +# [2]: https://arxiv.org/abs/1904.07850 + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "hourglass_104" + bgr_ordering: true + channel_means: [104.01362025, 114.03422265, 119.9165958 ] + channel_stds: [73.6027665 , 69.89082075, 70.9150767 ] + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 512 + max_dimension: 512 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.1 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + min_box_overlap_iou: 0.7 + max_box_predictions: 100 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + + keypoint_label_map_path: "PATH_TO_BE_CONFIGURED" + keypoint_estimation_task { + task_name: "human_pose" + task_loss_weight: 1.0 + loss { + localization_loss { + l1_localization_loss { + } + } + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + keypoint_class_name: "/m/01g317" + keypoint_label_to_std { + key: "left_ankle" + value: 0.89 + } + keypoint_label_to_std { + key: "left_ear" + value: 0.35 + } + keypoint_label_to_std { + key: "left_elbow" + value: 0.72 + } + keypoint_label_to_std { + key: "left_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "left_hip" + value: 1.07 + } + keypoint_label_to_std { + key: "left_knee" + value: 0.89 + } + keypoint_label_to_std { + key: "left_shoulder" + value: 0.79 + } + keypoint_label_to_std { + key: "left_wrist" + value: 0.62 + } + keypoint_label_to_std { + key: "nose" + value: 0.26 + } + keypoint_label_to_std { + key: "right_ankle" + value: 0.89 + } + keypoint_label_to_std { + key: "right_ear" + value: 0.35 + } + keypoint_label_to_std { + key: "right_elbow" + value: 0.72 + } + keypoint_label_to_std { + key: "right_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "right_hip" + value: 1.07 + } + keypoint_label_to_std { + key: "right_knee" + value: 0.89 + } + keypoint_label_to_std { + key: "right_shoulder" + value: 0.79 + } + keypoint_label_to_std { + key: "right_wrist" + value: 0.62 + } + keypoint_regression_loss_weight: 0.1 + keypoint_heatmap_loss_weight: 1.0 + keypoint_offset_loss_weight: 1.0 + offset_peak_radius: 3 + per_keypoint_offset: true + } + } +} + +train_config: { + + batch_size: 128 + num_steps: 250000 + + data_augmentation_options { + random_horizontal_flip { + keypoint_flip_permutation: 0 + keypoint_flip_permutation: 2 + keypoint_flip_permutation: 1 + keypoint_flip_permutation: 4 + keypoint_flip_permutation: 3 + keypoint_flip_permutation: 6 + keypoint_flip_permutation: 5 + keypoint_flip_permutation: 8 + keypoint_flip_permutation: 7 + keypoint_flip_permutation: 10 + keypoint_flip_permutation: 9 + keypoint_flip_permutation: 12 + keypoint_flip_permutation: 11 + keypoint_flip_permutation: 14 + keypoint_flip_permutation: 13 + keypoint_flip_permutation: 16 + keypoint_flip_permutation: 15 + } + } + + data_augmentation_options { + random_crop_image { + min_aspect_ratio: 0.5 + max_aspect_ratio: 1.7 + random_coef: 0.25 + } + } + + + data_augmentation_options { + random_adjust_hue { + } + } + + data_augmentation_options { + random_adjust_contrast { + } + } + + data_augmentation_options { + random_adjust_saturation { + } + } + + data_augmentation_options { + random_adjust_brightness { + } + } + + data_augmentation_options { + random_absolute_pad_image { + max_height_padding: 200 + max_width_padding: 200 + pad_color: [0, 0, 0] + } + } + + optimizer { + adam_optimizer: { + epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default. + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 1e-3 + total_steps: 250000 + warmup_learning_rate: 2.5e-4 + warmup_steps: 5000 + } + } + } + use_moving_average: false + } + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED" + fine_tune_checkpoint_type: "detection" +} + +train_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } + num_keypoints: 17 +} + +eval_config: { + metrics_set: "coco_detection_metrics" + use_moving_averages: false + num_visualizations: 10 + max_num_boxes_to_visualize: 20 + min_score_threshold: 0.2 + batch_size: 1; + parameterized_metric { + coco_keypoint_metrics { + class_label: "person" + keypoint_label_to_sigmas { + key: "nose" + value: 0.026 + } + keypoint_label_to_sigmas { + key: "left_eye" + value: 0.025 + } + keypoint_label_to_sigmas { + key: "right_eye" + value: 0.025 + } + keypoint_label_to_sigmas { + key: "left_ear" + value: 0.035 + } + keypoint_label_to_sigmas { + key: "right_ear" + value: 0.035 + } + keypoint_label_to_sigmas { + key: "left_shoulder" + value: 0.079 + } + keypoint_label_to_sigmas { + key: "right_shoulder" + value: 0.079 + } + keypoint_label_to_sigmas { + key: "left_elbow" + value: 0.072 + } + keypoint_label_to_sigmas { + key: "right_elbow" + value: 0.072 + } + keypoint_label_to_sigmas { + key: "left_wrist" + value: 0.062 + } + keypoint_label_to_sigmas { + key: "right_wrist" + value: 0.062 + } + keypoint_label_to_sigmas { + key: "left_hip" + value: 0.107 + } + keypoint_label_to_sigmas { + key: "right_hip" + value: 0.107 + } + keypoint_label_to_sigmas { + key: "left_knee" + value: 0.087 + } + keypoint_label_to_sigmas { + key: "right_knee" + value: 0.087 + } + keypoint_label_to_sigmas { + key: "left_ankle" + value: 0.089 + } + keypoint_label_to_sigmas { + key: "right_ankle" + value: 0.089 + } + } + } + # Provide the edges to connect the keypoints. The setting is suitable for + # COCO's 17 human pose keypoints. + keypoint_edge { # nose-left eye + start: 0 + end: 1 + } + keypoint_edge { # nose-right eye + start: 0 + end: 2 + } + keypoint_edge { # left eye-left ear + start: 1 + end: 3 + } + keypoint_edge { # right eye-right ear + start: 2 + end: 4 + } + keypoint_edge { # nose-left shoulder + start: 0 + end: 5 + } + keypoint_edge { # nose-right shoulder + start: 0 + end: 6 + } + keypoint_edge { # left shoulder-left elbow + start: 5 + end: 7 + } + keypoint_edge { # left elbow-left wrist + start: 7 + end: 9 + } + keypoint_edge { # right shoulder-right elbow + start: 6 + end: 8 + } + keypoint_edge { # right elbow-right wrist + start: 8 + end: 10 + } + keypoint_edge { # left shoulder-right shoulder + start: 5 + end: 6 + } + keypoint_edge { # left shoulder-left hip + start: 5 + end: 11 + } + keypoint_edge { # right shoulder-right hip + start: 6 + end: 12 + } + keypoint_edge { # left hip-right hip + start: 11 + end: 12 + } + keypoint_edge { # left hip-left knee + start: 11 + end: 13 + } + keypoint_edge { # left knee-left ankle + start: 13 + end: 15 + } + keypoint_edge { # right hip-right knee + start: 12 + end: 14 + } + keypoint_edge { # right knee-right ankle + start: 14 + end: 16 + } +} + +eval_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } + num_keypoints: 17 +} + diff --git a/research/object_detection/configs/tf2/center_net_resnet101_v1_fpn_512x512_coco17_tpu-8.config b/research/object_detection/configs/tf2/centernet_resnet101_v1_fpn_512x512_coco17_tpu-8.config similarity index 100% rename from research/object_detection/configs/tf2/center_net_resnet101_v1_fpn_512x512_coco17_tpu-8.config rename to research/object_detection/configs/tf2/centernet_resnet101_v1_fpn_512x512_coco17_tpu-8.config diff --git a/research/object_detection/configs/tf2/centernet_resnet50_v1_fpn_512x512_kpts_coco17_tpu-8.config b/research/object_detection/configs/tf2/centernet_resnet50_v1_fpn_512x512_kpts_coco17_tpu-8.config new file mode 100644 index 0000000000000000000000000000000000000000..ad25d5c347dafc7f442c62e534e1a7b551c1d728 --- /dev/null +++ b/research/object_detection/configs/tf2/centernet_resnet50_v1_fpn_512x512_kpts_coco17_tpu-8.config @@ -0,0 +1,392 @@ +# CenterNet meta-architecture from the "Objects as Points" [1] paper +# with the ResNet-v1-50 backbone. The ResNet backbone has a few differences +# as compared to the one mentioned in the paper, hence the performance is +# slightly worse. This config is TPU comptatible. +# [1]: https://arxiv.org/abs/1904.07850 +# + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "resnet_v1_50_fpn" + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 512 + max_dimension: 512 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.1 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + min_box_overlap_iou: 0.7 + max_box_predictions: 100 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + keypoint_label_map_path: "PATH_TO_BE_CONFIGURED" + keypoint_estimation_task { + task_name: "human_pose" + task_loss_weight: 1.0 + loss { + localization_loss { + l1_localization_loss { + } + } + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + keypoint_class_name: "/m/01g317" + keypoint_label_to_std { + key: "left_ankle" + value: 0.89 + } + keypoint_label_to_std { + key: "left_ear" + value: 0.35 + } + keypoint_label_to_std { + key: "left_elbow" + value: 0.72 + } + keypoint_label_to_std { + key: "left_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "left_hip" + value: 1.07 + } + keypoint_label_to_std { + key: "left_knee" + value: 0.89 + } + keypoint_label_to_std { + key: "left_shoulder" + value: 0.79 + } + keypoint_label_to_std { + key: "left_wrist" + value: 0.62 + } + keypoint_label_to_std { + key: "nose" + value: 0.26 + } + keypoint_label_to_std { + key: "right_ankle" + value: 0.89 + } + keypoint_label_to_std { + key: "right_ear" + value: 0.35 + } + keypoint_label_to_std { + key: "right_elbow" + value: 0.72 + } + keypoint_label_to_std { + key: "right_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "right_hip" + value: 1.07 + } + keypoint_label_to_std { + key: "right_knee" + value: 0.89 + } + keypoint_label_to_std { + key: "right_shoulder" + value: 0.79 + } + keypoint_label_to_std { + key: "right_wrist" + value: 0.62 + } + keypoint_regression_loss_weight: 0.1 + keypoint_heatmap_loss_weight: 1.0 + keypoint_offset_loss_weight: 1.0 + offset_peak_radius: 3 + per_keypoint_offset: true + } + } +} + +train_config: { + + batch_size: 128 + num_steps: 250000 + + data_augmentation_options { + random_horizontal_flip { + keypoint_flip_permutation: 0 + keypoint_flip_permutation: 2 + keypoint_flip_permutation: 1 + keypoint_flip_permutation: 4 + keypoint_flip_permutation: 3 + keypoint_flip_permutation: 6 + keypoint_flip_permutation: 5 + keypoint_flip_permutation: 8 + keypoint_flip_permutation: 7 + keypoint_flip_permutation: 10 + keypoint_flip_permutation: 9 + keypoint_flip_permutation: 12 + keypoint_flip_permutation: 11 + keypoint_flip_permutation: 14 + keypoint_flip_permutation: 13 + keypoint_flip_permutation: 16 + keypoint_flip_permutation: 15 + } + } + + data_augmentation_options { + random_crop_image { + min_aspect_ratio: 0.5 + max_aspect_ratio: 1.7 + random_coef: 0.25 + } + } + + + data_augmentation_options { + random_adjust_hue { + } + } + + data_augmentation_options { + random_adjust_contrast { + } + } + + data_augmentation_options { + random_adjust_saturation { + } + } + + data_augmentation_options { + random_adjust_brightness { + } + } + + data_augmentation_options { + random_absolute_pad_image { + max_height_padding: 200 + max_width_padding: 200 + pad_color: [0, 0, 0] + } + } + + optimizer { + adam_optimizer: { + epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default. + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 1e-3 + total_steps: 250000 + warmup_learning_rate: 2.5e-4 + warmup_steps: 5000 + } + } + } + use_moving_average: false + } + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED" + fine_tune_checkpoint_type: "classification" +} + +train_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } + num_keypoints: 17 +} + +eval_config: { + metrics_set: "coco_detection_metrics" + use_moving_averages: false + num_visualizations: 10 + max_num_boxes_to_visualize: 20 + min_score_threshold: 0.2 + batch_size: 1; + parameterized_metric { + coco_keypoint_metrics { + class_label: "person" + keypoint_label_to_sigmas { + key: "nose" + value: 0.026 + } + keypoint_label_to_sigmas { + key: "left_eye" + value: 0.025 + } + keypoint_label_to_sigmas { + key: "right_eye" + value: 0.025 + } + keypoint_label_to_sigmas { + key: "left_ear" + value: 0.035 + } + keypoint_label_to_sigmas { + key: "right_ear" + value: 0.035 + } + keypoint_label_to_sigmas { + key: "left_shoulder" + value: 0.079 + } + keypoint_label_to_sigmas { + key: "right_shoulder" + value: 0.079 + } + keypoint_label_to_sigmas { + key: "left_elbow" + value: 0.072 + } + keypoint_label_to_sigmas { + key: "right_elbow" + value: 0.072 + } + keypoint_label_to_sigmas { + key: "left_wrist" + value: 0.062 + } + keypoint_label_to_sigmas { + key: "right_wrist" + value: 0.062 + } + keypoint_label_to_sigmas { + key: "left_hip" + value: 0.107 + } + keypoint_label_to_sigmas { + key: "right_hip" + value: 0.107 + } + keypoint_label_to_sigmas { + key: "left_knee" + value: 0.087 + } + keypoint_label_to_sigmas { + key: "right_knee" + value: 0.087 + } + keypoint_label_to_sigmas { + key: "left_ankle" + value: 0.089 + } + keypoint_label_to_sigmas { + key: "right_ankle" + value: 0.089 + } + } + } + # Provide the edges to connect the keypoints. The setting is suitable for + # COCO's 17 human pose keypoints. + keypoint_edge { # nose-left eye + start: 0 + end: 1 + } + keypoint_edge { # nose-right eye + start: 0 + end: 2 + } + keypoint_edge { # left eye-left ear + start: 1 + end: 3 + } + keypoint_edge { # right eye-right ear + start: 2 + end: 4 + } + keypoint_edge { # nose-left shoulder + start: 0 + end: 5 + } + keypoint_edge { # nose-right shoulder + start: 0 + end: 6 + } + keypoint_edge { # left shoulder-left elbow + start: 5 + end: 7 + } + keypoint_edge { # left elbow-left wrist + start: 7 + end: 9 + } + keypoint_edge { # right shoulder-right elbow + start: 6 + end: 8 + } + keypoint_edge { # right elbow-right wrist + start: 8 + end: 10 + } + keypoint_edge { # left shoulder-right shoulder + start: 5 + end: 6 + } + keypoint_edge { # left shoulder-left hip + start: 5 + end: 11 + } + keypoint_edge { # right shoulder-right hip + start: 6 + end: 12 + } + keypoint_edge { # left hip-right hip + start: 11 + end: 12 + } + keypoint_edge { # left hip-left knee + start: 11 + end: 13 + } + keypoint_edge { # left knee-left ankle + start: 13 + end: 15 + } + keypoint_edge { # right hip-right knee + start: 12 + end: 14 + } + keypoint_edge { # right knee-right ankle + start: 14 + end: 16 + } +} +eval_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } + num_keypoints: 17 +} diff --git a/research/object_detection/configs/tf2/centernet_resnet50_v2_512x512_kpts_coco17_tpu-8.config b/research/object_detection/configs/tf2/centernet_resnet50_v2_512x512_kpts_coco17_tpu-8.config new file mode 100644 index 0000000000000000000000000000000000000000..3067ed417b1898b0b2b7839647d138c462c329c9 --- /dev/null +++ b/research/object_detection/configs/tf2/centernet_resnet50_v2_512x512_kpts_coco17_tpu-8.config @@ -0,0 +1,393 @@ +# CenterNet meta-architecture from the "Objects as Points" [1] paper +# with the ResNet-v2-50 backbone. The ResNet backbone has a few differences +# as compared to the one mentioned in the paper, hence the performance is +# slightly worse. This config is TPU comptatible. +# [1]: https://arxiv.org/abs/1904.07850 + +model { + center_net { + num_classes: 90 + feature_extractor { + type: "resnet_v2_50" + } + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 512 + max_dimension: 512 + pad_to_max_dimension: true + } + } + object_detection_task { + task_loss_weight: 1.0 + offset_loss_weight: 1.0 + scale_loss_weight: 0.1 + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + object_center_loss_weight: 1.0 + min_box_overlap_iou: 0.7 + max_box_predictions: 100 + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + + keypoint_label_map_path: "PATH_TO_BE_CONFIGURED" + keypoint_estimation_task { + task_name: "human_pose" + task_loss_weight: 1.0 + loss { + localization_loss { + l1_localization_loss { + } + } + classification_loss { + penalty_reduced_logistic_focal_loss { + alpha: 2.0 + beta: 4.0 + } + } + } + keypoint_class_name: "/m/01g317" + keypoint_label_to_std { + key: "left_ankle" + value: 0.89 + } + keypoint_label_to_std { + key: "left_ear" + value: 0.35 + } + keypoint_label_to_std { + key: "left_elbow" + value: 0.72 + } + keypoint_label_to_std { + key: "left_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "left_hip" + value: 1.07 + } + keypoint_label_to_std { + key: "left_knee" + value: 0.89 + } + keypoint_label_to_std { + key: "left_shoulder" + value: 0.79 + } + keypoint_label_to_std { + key: "left_wrist" + value: 0.62 + } + keypoint_label_to_std { + key: "nose" + value: 0.26 + } + keypoint_label_to_std { + key: "right_ankle" + value: 0.89 + } + keypoint_label_to_std { + key: "right_ear" + value: 0.35 + } + keypoint_label_to_std { + key: "right_elbow" + value: 0.72 + } + keypoint_label_to_std { + key: "right_eye" + value: 0.25 + } + keypoint_label_to_std { + key: "right_hip" + value: 1.07 + } + keypoint_label_to_std { + key: "right_knee" + value: 0.89 + } + keypoint_label_to_std { + key: "right_shoulder" + value: 0.79 + } + keypoint_label_to_std { + key: "right_wrist" + value: 0.62 + } + keypoint_regression_loss_weight: 0.1 + keypoint_heatmap_loss_weight: 1.0 + keypoint_offset_loss_weight: 1.0 + offset_peak_radius: 3 + per_keypoint_offset: true + } + } +} + +train_config: { + + batch_size: 128 + num_steps: 250000 + + data_augmentation_options { + random_horizontal_flip { + keypoint_flip_permutation: 0 + keypoint_flip_permutation: 2 + keypoint_flip_permutation: 1 + keypoint_flip_permutation: 4 + keypoint_flip_permutation: 3 + keypoint_flip_permutation: 6 + keypoint_flip_permutation: 5 + keypoint_flip_permutation: 8 + keypoint_flip_permutation: 7 + keypoint_flip_permutation: 10 + keypoint_flip_permutation: 9 + keypoint_flip_permutation: 12 + keypoint_flip_permutation: 11 + keypoint_flip_permutation: 14 + keypoint_flip_permutation: 13 + keypoint_flip_permutation: 16 + keypoint_flip_permutation: 15 + } + } + + data_augmentation_options { + random_crop_image { + min_aspect_ratio: 0.5 + max_aspect_ratio: 1.7 + random_coef: 0.25 + } + } + + + data_augmentation_options { + random_adjust_hue { + } + } + + data_augmentation_options { + random_adjust_contrast { + } + } + + data_augmentation_options { + random_adjust_saturation { + } + } + + data_augmentation_options { + random_adjust_brightness { + } + } + + data_augmentation_options { + random_absolute_pad_image { + max_height_padding: 200 + max_width_padding: 200 + pad_color: [0, 0, 0] + } + } + + optimizer { + adam_optimizer: { + epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default. + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 1e-3 + total_steps: 250000 + warmup_learning_rate: 2.5e-4 + warmup_steps: 5000 + } + } + } + use_moving_average: false + } + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED" + fine_tune_checkpoint_type: "classification" +} + +train_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } + num_keypoints: 17 +} + +eval_config: { + metrics_set: "coco_detection_metrics" + use_moving_averages: false + num_visualizations: 10 + max_num_boxes_to_visualize: 20 + min_score_threshold: 0.2 + batch_size: 1; + parameterized_metric { + coco_keypoint_metrics { + class_label: "person" + keypoint_label_to_sigmas { + key: "nose" + value: 0.026 + } + keypoint_label_to_sigmas { + key: "left_eye" + value: 0.025 + } + keypoint_label_to_sigmas { + key: "right_eye" + value: 0.025 + } + keypoint_label_to_sigmas { + key: "left_ear" + value: 0.035 + } + keypoint_label_to_sigmas { + key: "right_ear" + value: 0.035 + } + keypoint_label_to_sigmas { + key: "left_shoulder" + value: 0.079 + } + keypoint_label_to_sigmas { + key: "right_shoulder" + value: 0.079 + } + keypoint_label_to_sigmas { + key: "left_elbow" + value: 0.072 + } + keypoint_label_to_sigmas { + key: "right_elbow" + value: 0.072 + } + keypoint_label_to_sigmas { + key: "left_wrist" + value: 0.062 + } + keypoint_label_to_sigmas { + key: "right_wrist" + value: 0.062 + } + keypoint_label_to_sigmas { + key: "left_hip" + value: 0.107 + } + keypoint_label_to_sigmas { + key: "right_hip" + value: 0.107 + } + keypoint_label_to_sigmas { + key: "left_knee" + value: 0.087 + } + keypoint_label_to_sigmas { + key: "right_knee" + value: 0.087 + } + keypoint_label_to_sigmas { + key: "left_ankle" + value: 0.089 + } + keypoint_label_to_sigmas { + key: "right_ankle" + value: 0.089 + } + } + } + # Provide the edges to connect the keypoints. The setting is suitable for + # COCO's 17 human pose keypoints. + keypoint_edge { # nose-left eye + start: 0 + end: 1 + } + keypoint_edge { # nose-right eye + start: 0 + end: 2 + } + keypoint_edge { # left eye-left ear + start: 1 + end: 3 + } + keypoint_edge { # right eye-right ear + start: 2 + end: 4 + } + keypoint_edge { # nose-left shoulder + start: 0 + end: 5 + } + keypoint_edge { # nose-right shoulder + start: 0 + end: 6 + } + keypoint_edge { # left shoulder-left elbow + start: 5 + end: 7 + } + keypoint_edge { # left elbow-left wrist + start: 7 + end: 9 + } + keypoint_edge { # right shoulder-right elbow + start: 6 + end: 8 + } + keypoint_edge { # right elbow-right wrist + start: 8 + end: 10 + } + keypoint_edge { # left shoulder-right shoulder + start: 5 + end: 6 + } + keypoint_edge { # left shoulder-left hip + start: 5 + end: 11 + } + keypoint_edge { # right shoulder-right hip + start: 6 + end: 12 + } + keypoint_edge { # left hip-right hip + start: 11 + end: 12 + } + keypoint_edge { # left hip-left knee + start: 11 + end: 13 + } + keypoint_edge { # left knee-left ankle + start: 13 + end: 15 + } + keypoint_edge { # right hip-right knee + start: 12 + end: 14 + } + keypoint_edge { # right knee-right ankle + start: 14 + end: 16 + } +} + +eval_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } + num_keypoints: 17 +} diff --git a/research/object_detection/configs/tf2/faster_rcnn_resnet50_v1_fpn_640x640_coco17_tpu-8.config b/research/object_detection/configs/tf2/faster_rcnn_resnet50_v1_fpn_640x640_coco17_tpu-8.config new file mode 100644 index 0000000000000000000000000000000000000000..acb5a91359bd3d0349f628d6f284c19e4dc0e326 --- /dev/null +++ b/research/object_detection/configs/tf2/faster_rcnn_resnet50_v1_fpn_640x640_coco17_tpu-8.config @@ -0,0 +1,173 @@ +# Faster RCNN with Resnet 50 v1 FPN feature extractor. +# See Lin et al, https://arxiv.org/abs/1612.03144 +# Trained on COCO, initialized from Imagenet classification checkpoint +# Train on TPU-8 +# +# Achieves 31.4 mAP on COCO17 Val + +model { + faster_rcnn { + num_classes: 90 + image_resizer { + keep_aspect_ratio_resizer { + min_dimension: 640 + max_dimension: 640 + pad_to_max_dimension: true + } + } + feature_extractor { + type: 'faster_rcnn_resnet50_fpn_keras' + batch_norm_trainable: true + fpn { + min_level: 2 + max_level: 6 + } + conv_hyperparams { + activation: RELU_6, + regularizer { + l2_regularizer { + weight: 0.0004 + } + } + initializer { + truncated_normal_initializer { + stddev: 0.03 + mean: 0.0 + } + } + batch_norm { + scale: true, + decay: 0.997, + epsilon: 0.001, + } + } + override_base_feature_extractor_hyperparams: true + } + first_stage_anchor_generator { + multiscale_anchor_generator { + min_level: 2 + max_level: 6 + # According to the origial paper the value should be 8.0 + anchor_scale: 4.0 + aspect_ratios: [1.0, 2.0, 0.5] + # According to the original paper the value should be 1 + scales_per_octave: 2 + normalize_coordinates: false + } + } + first_stage_box_predictor_conv_hyperparams { + op: CONV + regularizer { + l2_regularizer { + weight: 0.0 + } + } + initializer { + truncated_normal_initializer { + stddev: 0.01 + } + } + } + first_stage_nms_score_threshold: 0.0 + first_stage_nms_iou_threshold: 0.7 + first_stage_max_proposals: 300 + first_stage_localization_loss_weight: 2.0 + first_stage_objectness_loss_weight: 1.0 + # According to the origial paper, value should be 7. + initial_crop_size: 14 + maxpool_kernel_size: 2 + maxpool_stride: 2 + second_stage_box_predictor { + mask_rcnn_box_predictor { + use_dropout: false + dropout_keep_probability: 1.0 + fc_hyperparams { + op: FC + regularizer { + l2_regularizer { + weight: 0.0 + } + } + initializer { + variance_scaling_initializer { + factor: 1.0 + uniform: true + mode: FAN_AVG + } + } + } + } + } + second_stage_post_processing { + batch_non_max_suppression { + score_threshold: 0.0 + iou_threshold: 0.6 + max_detections_per_class: 100 + max_total_detections: 300 + } + score_converter: SOFTMAX + } + second_stage_localization_loss_weight: 2.0 + second_stage_classification_loss_weight: 1.0 + use_static_shapes: true + use_matmul_crop_and_resize: true + clip_anchors_to_image: true + use_static_balanced_label_sampler: true + use_matmul_gather_in_matcher: true + } +} + +train_config: { + batch_size: 64 + sync_replicas: true + startup_delay_steps: 0 + replicas_to_aggregate: 8 + num_steps: 25000 + optimizer { + momentum_optimizer: { + learning_rate: { + cosine_decay_learning_rate { + learning_rate_base: 0.04 + total_steps: 25000 + warmup_learning_rate: .013333 + warmup_steps: 2000 + } + } + momentum_optimizer_value: 0.9 + } + use_moving_average: false + } + fine_tune_checkpoint_version: V2 + fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1" + fine_tune_checkpoint_type: "classification" + data_augmentation_options { + random_horizontal_flip { + } + } + + max_number_of_boxes: 100 + unpad_groundtruth_tensors: false + use_bfloat16: true +} + +train_input_reader: { + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord" + } + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" +} + +eval_config: { + metrics_set: "coco_detection_metrics" + use_moving_averages: false + batch_size: 1; +} + +eval_input_reader: { + label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt" + shuffle: false + num_epochs: 1 + tf_record_input_reader { + input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord" + } +} diff --git a/research/object_detection/core/box_list_ops.py b/research/object_detection/core/box_list_ops.py index 159845b690d5f10ac4e38ca167faa2a6051cc023..cb457b728449cc637de8ab28f5f60711954d3c27 100644 --- a/research/object_detection/core/box_list_ops.py +++ b/research/object_detection/core/box_list_ops.py @@ -151,7 +151,10 @@ def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None): with tf.name_scope(scope, 'ClipToWindow'): y_min, x_min, y_max, x_max = tf.split( value=boxlist.get(), num_or_size_splits=4, axis=1) - win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) + win_y_min = window[0] + win_x_min = window[1] + win_y_max = window[2] + win_x_max = window[3] y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min) y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min) x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min) @@ -304,6 +307,50 @@ def iou(boxlist1, boxlist2, scope=None): tf.zeros_like(intersections), tf.truediv(intersections, unions)) +def l1(boxlist1, boxlist2, scope=None): + """Computes l1 loss (pairwise) between two boxlists. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + scope: name scope. + + Returns: + a tensor with shape [N, M] representing the pairwise L1 loss. + """ + with tf.name_scope(scope, 'PairwiseL1'): + ycenter1, xcenter1, h1, w1 = boxlist1.get_center_coordinates_and_sizes() + ycenter2, xcenter2, h2, w2 = boxlist2.get_center_coordinates_and_sizes() + ycenters = tf.abs(tf.expand_dims(ycenter2, axis=0) - tf.expand_dims( + tf.transpose(ycenter1), axis=1)) + xcenters = tf.abs(tf.expand_dims(xcenter2, axis=0) - tf.expand_dims( + tf.transpose(xcenter1), axis=1)) + heights = tf.abs(tf.expand_dims(h2, axis=0) - tf.expand_dims( + tf.transpose(h1), axis=1)) + widths = tf.abs(tf.expand_dims(w2, axis=0) - tf.expand_dims( + tf.transpose(w1), axis=1)) + return ycenters + xcenters + heights + widths + + +def giou(boxlist1, boxlist2, scope=None): + """Computes pairwise generalized IOU between two boxlists. + + Args: + boxlist1: BoxList holding N boxes + boxlist2: BoxList holding M boxes + scope: name scope. + + Returns: + a tensor with shape [N, M] representing the pairwise GIoU loss. + """ + with tf.name_scope(scope, 'PairwiseGIoU'): + n = boxlist1.num_boxes() + m = boxlist2.num_boxes() + boxes1 = tf.repeat(boxlist1.get(), repeats=m, axis=0) + boxes2 = tf.tile(boxlist2.get(), multiples=[n, 1]) + return tf.reshape(ops.giou(boxes1, boxes2), [n, m]) + + def matched_iou(boxlist1, boxlist2, scope=None): """Compute intersection-over-union between corresponding boxes in boxlists. diff --git a/research/object_detection/core/box_list_ops_test.py b/research/object_detection/core/box_list_ops_test.py index b572dff9e1cdd1b7ae59a63df3ae294d4b01a9a5..767c1899727b9e0280c096dc8e4a440c535d5839 100644 --- a/research/object_detection/core/box_list_ops_test.py +++ b/research/object_detection/core/box_list_ops_test.py @@ -229,6 +229,31 @@ class BoxListOpsTest(test_case.TestCase): iou_output = self.execute(graph_fn, []) self.assertAllClose(iou_output, exp_output) + def test_l1(self): + def graph_fn(): + corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) + corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], + [0.0, 0.0, 20.0, 20.0]]) + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + l1 = box_list_ops.l1(boxes1, boxes2) + return l1 + exp_output = [[5.0, 22.5, 45.5], [8.5, 19.0, 40.0]] + l1_output = self.execute(graph_fn, []) + self.assertAllClose(l1_output, exp_output) + + def test_giou(self): + def graph_fn(): + corners1 = tf.constant([[5.0, 7.0, 7.0, 9.0]]) + corners2 = tf.constant([[5.0, 7.0, 7.0, 9.0], [5.0, 11.0, 7.0, 13.0]]) + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + giou = box_list_ops.giou(boxes1, boxes2) + return giou + exp_output = [[1.0, -1.0 / 3.0]] + giou_output = self.execute(graph_fn, []) + self.assertAllClose(giou_output, exp_output) + def test_matched_iou(self): def graph_fn(): corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]) diff --git a/research/object_detection/core/densepose_ops.py b/research/object_detection/core/densepose_ops.py index 0ad852fb4e2de10d3545bf5575050c4f0fc3cefb..8dd8f39bafa357f242170b9ae2ec6c29cd24ef4f 100644 --- a/research/object_detection/core/densepose_ops.py +++ b/research/object_detection/core/densepose_ops.py @@ -26,6 +26,7 @@ points for an instance in the example. """ import os +import numpy as np import scipy.io import tensorflow.compat.v1 as tf @@ -278,8 +279,10 @@ class DensePoseHorizontalFlip(object): for key in ('U_transforms', 'V_transforms'): uv_symmetry_map_per_part = [] for i in range(data[key].shape[1]): - # The following tensor has shape [256, 256]. - map_per_part = tf.constant(data[key][0, i], dtype=tf.float32) + # The following tensor has shape [256, 256]. The raw data is stored as + # uint8 values, so convert to float and scale to the range [0., 1.] + data_normalized = data[key][0, i].astype(np.float32) / 255. + map_per_part = tf.constant(data_normalized, dtype=tf.float32) uv_symmetry_map_per_part.append(map_per_part) uv_symmetry_map[key] = tf.reshape( tf.stack(uv_symmetry_map_per_part, axis=0), [-1]) diff --git a/research/object_detection/core/freezable_batch_norm.py b/research/object_detection/core/freezable_batch_norm.py index 7f08fa5df12163e8178f233dbb1d766fe27d8742..295aa7b3a50e3c79e8ece4d1e4d75bcb4d688ed3 100644 --- a/research/object_detection/core/freezable_batch_norm.py +++ b/research/object_detection/core/freezable_batch_norm.py @@ -35,7 +35,7 @@ class FreezableBatchNorm(tf.keras.layers.BatchNormalization): i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1. - Arguments: + Args: training: If False, the layer will normalize using the moving average and std. dev, without updating the learned avg and std. dev. If None or True, the layer will follow the keras BatchNormalization layer diff --git a/research/object_detection/core/freezable_batch_norm_tf2_test.py b/research/object_detection/core/freezable_batch_norm_tf2_test.py index 4cc42ae3ef7da9b3412d2f461d7f9db62420e603..48b131d6279d34b2c050db46da0112b8e839d1c2 100644 --- a/research/object_detection/core/freezable_batch_norm_tf2_test.py +++ b/research/object_detection/core/freezable_batch_norm_tf2_test.py @@ -17,25 +17,40 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + import unittest + +from absl.testing import parameterized import numpy as np from six.moves import zip -import tensorflow.compat.v1 as tf +import tensorflow as tf from object_detection.core import freezable_batch_norm from object_detection.utils import tf_version +# pylint: disable=g-import-not-at-top +if tf_version.is_tf2(): + from object_detection.core import freezable_sync_batch_norm +# pylint: enable=g-import-not-at-top + @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') -class FreezableBatchNormTest(tf.test.TestCase): +class FreezableBatchNormTest(tf.test.TestCase, parameterized.TestCase): """Tests for FreezableBatchNorm operations.""" - def _build_model(self, training=None): + def _build_model(self, use_sync_batch_norm, training=None): model = tf.keras.models.Sequential() - norm = freezable_batch_norm.FreezableBatchNorm(training=training, - input_shape=(10,), - momentum=0.8) + norm = None + if use_sync_batch_norm: + norm = freezable_sync_batch_norm.FreezableSyncBatchNorm(training=training, + input_shape=(10,), + momentum=0.8) + else: + norm = freezable_batch_norm.FreezableBatchNorm(training=training, + input_shape=(10,), + momentum=0.8) + model.add(norm) return model, norm @@ -43,8 +58,9 @@ class FreezableBatchNormTest(tf.test.TestCase): for source, target in zip(source_weights, target_weights): target.assign(source) - def _train_freezable_batch_norm(self, training_mean, training_var): - model, _ = self._build_model() + def _train_freezable_batch_norm(self, training_mean, training_var, + use_sync_batch_norm): + model, _ = self._build_model(use_sync_batch_norm=use_sync_batch_norm) model.compile(loss='mse', optimizer='sgd') # centered on training_mean, variance training_var @@ -72,7 +88,8 @@ class FreezableBatchNormTest(tf.test.TestCase): np.testing.assert_allclose(out.numpy().mean(), 0.0, atol=1.5e-1) np.testing.assert_allclose(out.numpy().std(), 1.0, atol=1.5e-1) - def test_batchnorm_freezing_training_none(self): + @parameterized.parameters(True, False) + def test_batchnorm_freezing_training_none(self, use_sync_batch_norm): training_mean = 5.0 training_var = 10.0 @@ -81,12 +98,13 @@ class FreezableBatchNormTest(tf.test.TestCase): # Initially train the batch norm, and save the weights trained_weights = self._train_freezable_batch_norm(training_mean, - training_var) + training_var, + use_sync_batch_norm) # Load the batch norm weights, freezing training to True. # Apply the batch norm layer to testing data and ensure it is normalized # according to the batch statistics. - model, norm = self._build_model(training=True) + model, norm = self._build_model(use_sync_batch_norm, training=True) self._copy_weights(trained_weights, model.weights) # centered on testing_mean, variance testing_var @@ -136,7 +154,8 @@ class FreezableBatchNormTest(tf.test.TestCase): testing_mean, testing_var, training_arg, training_mean, training_var) - def test_batchnorm_freezing_training_false(self): + @parameterized.parameters(True, False) + def test_batchnorm_freezing_training_false(self, use_sync_batch_norm): training_mean = 5.0 training_var = 10.0 @@ -145,12 +164,13 @@ class FreezableBatchNormTest(tf.test.TestCase): # Initially train the batch norm, and save the weights trained_weights = self._train_freezable_batch_norm(training_mean, - training_var) + training_var, + use_sync_batch_norm) # Load the batch norm back up, freezing training to False. # Apply the batch norm layer to testing data and ensure it is normalized # according to the training data's statistics. - model, norm = self._build_model(training=False) + model, norm = self._build_model(use_sync_batch_norm, training=False) self._copy_weights(trained_weights, model.weights) # centered on testing_mean, variance testing_var diff --git a/research/object_detection/core/freezable_sync_batch_norm.py b/research/object_detection/core/freezable_sync_batch_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..f95a106498366dd028951686e3cac1b9d5f15802 --- /dev/null +++ b/research/object_detection/core/freezable_sync_batch_norm.py @@ -0,0 +1,70 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A freezable batch norm layer that uses Keras sync batch normalization.""" +import tensorflow as tf + + +class FreezableSyncBatchNorm(tf.keras.layers.experimental.SyncBatchNormalization + ): + """Sync Batch normalization layer (Ioffe and Szegedy, 2014). + + This is a `freezable` batch norm layer that supports setting the `training` + parameter in the __init__ method rather than having to set it either via + the Keras learning phase or via the `call` method parameter. This layer will + forward all other parameters to the Keras `SyncBatchNormalization` layer + + This is class is necessary because Object Detection model training sometimes + requires batch normalization layers to be `frozen` and used as if it was + evaluation time, despite still training (and potentially using dropout layers) + + Like the default Keras SyncBatchNormalization layer, this will normalize the + activations of the previous layer at each batch, + i.e. applies a transformation that maintains the mean activation + close to 0 and the activation standard deviation close to 1. + + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + + Output shape: + Same shape as input. + + References: + - [Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift](https://arxiv.org/abs/1502.03167) + """ + + def __init__(self, training=None, **kwargs): + """Constructor. + + Args: + training: If False, the layer will normalize using the moving average and + std. dev, without updating the learned avg and std. dev. + If None or True, the layer will follow the keras SyncBatchNormalization + layer strategy of checking the Keras learning phase at `call` time to + decide what to do. + **kwargs: The keyword arguments to forward to the keras + SyncBatchNormalization layer constructor. + """ + super(FreezableSyncBatchNorm, self).__init__(**kwargs) + self._training = training + + def call(self, inputs, training=None): + # Override the call arg only if the batchnorm is frozen. (Ignore None) + if self._training is False: # pylint: disable=g-bool-id-comparison + training = self._training + return super(FreezableSyncBatchNorm, self).call(inputs, training=training) diff --git a/research/object_detection/core/keypoint_ops.py b/research/object_detection/core/keypoint_ops.py index 1b0c4ccfed42aae492550331e870173c624f0316..521b9a7e9235040d8f1c6ebbd0ab0e23081ee5a8 100644 --- a/research/object_detection/core/keypoint_ops.py +++ b/research/object_detection/core/keypoint_ops.py @@ -56,6 +56,7 @@ def clip_to_window(keypoints, window, scope=None): Returns: new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] """ + keypoints.get_shape().assert_has_rank(3) with tf.name_scope(scope, 'ClipToWindow'): y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) @@ -81,6 +82,7 @@ def prune_outside_window(keypoints, window, scope=None): Returns: new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] """ + keypoints.get_shape().assert_has_rank(3) with tf.name_scope(scope, 'PruneOutsideWindow'): y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) @@ -125,22 +127,24 @@ def change_coordinate_frame(keypoints, window, scope=None): return new_keypoints -def keypoints_to_enclosing_bounding_boxes(keypoints): +def keypoints_to_enclosing_bounding_boxes(keypoints, keypoints_axis=1): """Creates enclosing bounding boxes from keypoints. Args: keypoints: a [num_instances, num_keypoints, 2] float32 tensor with keypoints in [y, x] format. + keypoints_axis: An integer indicating the axis that correspond to the + keypoint dimension. Returns: A [num_instances, 4] float32 tensor that tightly covers all the keypoints for each instance. """ - ymin = tf.math.reduce_min(keypoints[:, :, 0], axis=1) - xmin = tf.math.reduce_min(keypoints[:, :, 1], axis=1) - ymax = tf.math.reduce_max(keypoints[:, :, 0], axis=1) - xmax = tf.math.reduce_max(keypoints[:, :, 1], axis=1) - return tf.stack([ymin, xmin, ymax, xmax], axis=1) + ymin = tf.math.reduce_min(keypoints[..., 0], axis=keypoints_axis) + xmin = tf.math.reduce_min(keypoints[..., 1], axis=keypoints_axis) + ymax = tf.math.reduce_max(keypoints[..., 0], axis=keypoints_axis) + xmax = tf.math.reduce_max(keypoints[..., 1], axis=keypoints_axis) + return tf.stack([ymin, xmin, ymax, xmax], axis=keypoints_axis) def to_normalized_coordinates(keypoints, height, width, @@ -240,6 +244,7 @@ def flip_horizontal(keypoints, flip_point, flip_permutation=None, scope=None): Returns: new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] """ + keypoints.get_shape().assert_has_rank(3) with tf.name_scope(scope, 'FlipHorizontal'): keypoints = tf.transpose(keypoints, [1, 0, 2]) if flip_permutation: @@ -274,6 +279,7 @@ def flip_vertical(keypoints, flip_point, flip_permutation=None, scope=None): Returns: new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] """ + keypoints.get_shape().assert_has_rank(3) with tf.name_scope(scope, 'FlipVertical'): keypoints = tf.transpose(keypoints, [1, 0, 2]) if flip_permutation: @@ -299,6 +305,7 @@ def rot90(keypoints, rotation_permutation=None, scope=None): Returns: new_keypoints: a tensor of shape [num_instances, num_keypoints, 2] """ + keypoints.get_shape().assert_has_rank(3) with tf.name_scope(scope, 'Rot90'): keypoints = tf.transpose(keypoints, [1, 0, 2]) if rotation_permutation: @@ -334,6 +341,7 @@ def keypoint_weights_from_visibilities(keypoint_visibilities, keypoints deemed visible will have the provided per-keypoint weight, and all others will be set to zero. """ + keypoint_visibilities.get_shape().assert_has_rank(2) if per_keypoint_weights is None: num_keypoints = keypoint_visibilities.shape.as_list()[1] per_keypoint_weight_mult = tf.ones((1, num_keypoints,), dtype=tf.float32) @@ -363,6 +371,7 @@ def set_keypoint_visibilities(keypoints, initial_keypoint_visibilities=None): keypoint_visibilities: a bool tensor of shape [num_instances, num_keypoints] indicating whether a keypoint is visible or not. """ + keypoints.get_shape().assert_has_rank(3) if initial_keypoint_visibilities is not None: keypoint_visibilities = tf.cast(initial_keypoint_visibilities, tf.bool) else: diff --git a/research/object_detection/core/keypoint_ops_test.py b/research/object_detection/core/keypoint_ops_test.py index bbdcf01940dcaf96da283bd6bcf73e91b633f0ee..729bfe8a37797295258abeff627a9c8ef3dc4513 100644 --- a/research/object_detection/core/keypoint_ops_test.py +++ b/research/object_detection/core/keypoint_ops_test.py @@ -116,6 +116,35 @@ class KeypointOpsTest(test_case.TestCase): ]) self.assertAllClose(expected_bboxes, output) + def test_keypoints_to_enclosing_bounding_boxes_axis2(self): + def graph_fn(): + keypoints = tf.constant( + [ + [ # Instance 0. + [5., 10.], + [3., 20.], + [8., 4.], + ], + [ # Instance 1. + [2., 12.], + [0., 3.], + [5., 19.], + ], + ], dtype=tf.float32) + keypoints = tf.stack([keypoints, keypoints], axis=0) + bboxes = keypoint_ops.keypoints_to_enclosing_bounding_boxes( + keypoints, keypoints_axis=2) + return bboxes + output = self.execute(graph_fn, []) + + expected_bboxes = np.array( + [ + [3., 4., 8., 20.], + [0., 3., 5., 19.] + ]) + self.assertAllClose(expected_bboxes, output[0]) + self.assertAllClose(expected_bboxes, output[1]) + def test_to_normalized_coordinates(self): def graph_fn(): keypoints = tf.constant([ diff --git a/research/object_detection/core/losses.py b/research/object_detection/core/losses.py index c4d499e7e6c4ed5da803c48ff3d8908e713a3c2e..73d26b2123cdeafbf798ec452bfc5be742cca35a 100644 --- a/research/object_detection/core/losses.py +++ b/research/object_detection/core/losses.py @@ -36,6 +36,7 @@ import tensorflow.compat.v1 as tf from object_detection.core import box_list from object_detection.core import box_list_ops from object_detection.utils import ops +from object_detection.utils import shape_utils class Loss(six.with_metaclass(abc.ABCMeta, object)): @@ -210,6 +211,38 @@ class WeightedIOULocalizationLoss(Loss): return tf.reshape(weights, [-1]) * per_anchor_iou_loss +class WeightedGIOULocalizationLoss(Loss): + """GIOU localization loss function. + + Sums the GIOU loss for corresponding pairs of predicted/groundtruth boxes + and for each pair assign a loss of 1 - GIOU. We then compute a weighted + sum over all pairs which is returned as the total loss. + """ + + def _compute_loss(self, prediction_tensor, target_tensor, weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4] + representing the decoded predicted boxes + target_tensor: A float tensor of shape [batch_size, num_anchors, 4] + representing the decoded target boxes + weights: a float tensor of shape [batch_size, num_anchors] + + Returns: + loss: a float tensor of shape [batch_size, num_anchors] tensor + representing the value of the loss function. + """ + batch_size, num_anchors, _ = shape_utils.combined_static_and_dynamic_shape( + prediction_tensor) + predicted_boxes = tf.reshape(prediction_tensor, [-1, 4]) + target_boxes = tf.reshape(target_tensor, [-1, 4]) + + per_anchor_iou_loss = 1 - ops.giou(predicted_boxes, target_boxes) + return tf.reshape(tf.reshape(weights, [-1]) * per_anchor_iou_loss, + [batch_size, num_anchors]) + + class WeightedSigmoidClassificationLoss(Loss): """Sigmoid cross entropy classification loss function.""" @@ -245,6 +278,79 @@ class WeightedSigmoidClassificationLoss(Loss): return per_entry_cross_ent * weights +class WeightedDiceClassificationLoss(Loss): + """Dice loss for classification [1][2]. + + [1]: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient + [2]: https://arxiv.org/abs/1606.04797 + + """ + + def __init__(self, squared_normalization): + """Initializes the loss object. + + Args: + squared_normalization: boolean, if set, we square the probabilities in the + denominator term used for normalization. + """ + + self._squared_normalization = squared_normalization + super(WeightedDiceClassificationLoss, self).__init__() + + def _compute_loss(self, + prediction_tensor, + target_tensor, + weights, + class_indices=None): + """Computes the loss value. + + Dice loss uses the area of the ground truth and prediction tensors for + normalization. We compute area by summing along the anchors (2nd) dimension. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_pixels, + num_classes] representing the predicted logits for each class. + num_pixels denotes the total number of pixels in the spatial dimensions + of the mask after flattening. + target_tensor: A float tensor of shape [batch_size, num_pixels, + num_classes] representing one-hot encoded classification targets. + num_pixels denotes the total number of pixels in the spatial dimensions + of the mask after flattening. + weights: a float tensor of shape, either [batch_size, num_anchors, + num_classes] or [batch_size, num_anchors, 1]. If the shape is + [batch_size, num_anchors, 1], all the classses are equally weighted. + class_indices: (Optional) A 1-D integer tensor of class indices. + If provided, computes loss only for the specified class indices. + + Returns: + loss: a float tensor of shape [batch_size, num_classes] + representing the value of the loss function. + """ + if class_indices is not None: + weights *= tf.reshape( + ops.indices_to_dense_vector(class_indices, + tf.shape(prediction_tensor)[2]), + [1, 1, -1]) + + prob_tensor = tf.nn.sigmoid(prediction_tensor) + + if self._squared_normalization: + prob_tensor = tf.pow(prob_tensor, 2) + target_tensor = tf.pow(target_tensor, 2) + + prob_tensor *= weights + target_tensor *= weights + + prediction_area = tf.reduce_sum(prob_tensor, axis=1) + gt_area = tf.reduce_sum(target_tensor, axis=1) + + intersection = tf.reduce_sum(prob_tensor * target_tensor, axis=1) + dice_coeff = 2 * intersection / tf.maximum(gt_area + prediction_area, 1.0) + dice_loss = 1 - dice_coeff + + return dice_loss + + class SigmoidFocalClassificationLoss(Loss): """Sigmoid focal cross entropy loss. diff --git a/research/object_detection/core/losses_test.py b/research/object_detection/core/losses_test.py index 5957052ee7ed9cc9f682e960afa8514faff30e07..eb2f845f844e0d8354152498895be6eb1c6db1cc 100644 --- a/research/object_detection/core/losses_test.py +++ b/research/object_detection/core/losses_test.py @@ -198,6 +198,47 @@ class WeightedIOULocalizationLossTest(test_case.TestCase): self.assertAllClose(loss_output, exp_loss) +class WeightedGIOULocalizationLossTest(test_case.TestCase): + + def testReturnsCorrectLoss(self): + def graph_fn(): + prediction_tensor = tf.constant([[[1.5, 0, 2.4, 1], + [0, 0, 1, 1], + [0, 0, 0, 0]]]) + target_tensor = tf.constant([[[1.5, 0, 2.4, 1], + [0, 0, 1, 1], + [5, 5, 10, 10]]]) + weights = [[1.0, .5, 2.0]] + loss_op = losses.WeightedGIOULocalizationLoss() + loss = loss_op(prediction_tensor, + target_tensor, + weights=weights) + loss = tf.reduce_sum(loss) + return loss + exp_loss = 3.5 + loss_output = self.execute(graph_fn, []) + self.assertAllClose(loss_output, exp_loss) + + def testReturnsCorrectLossWithNoLabels(self): + def graph_fn(): + prediction_tensor = tf.constant([[[1.5, 0, 2.4, 1], + [0, 0, 1, 1], + [0, 0, .5, .25]]]) + target_tensor = tf.constant([[[1.5, 0, 2.4, 1], + [0, 0, 1, 1], + [50, 50, 500.5, 100.25]]]) + weights = [[1.0, .5, 2.0]] + losses_mask = tf.constant([False], tf.bool) + loss_op = losses.WeightedGIOULocalizationLoss() + loss = loss_op(prediction_tensor, target_tensor, weights=weights, + losses_mask=losses_mask) + loss = tf.reduce_sum(loss) + return loss + exp_loss = 0.0 + loss_output = self.execute(graph_fn, []) + self.assertAllClose(loss_output, exp_loss) + + class WeightedSigmoidClassificationLossTest(test_case.TestCase): def testReturnsCorrectLoss(self): @@ -1406,5 +1447,111 @@ class L1LocalizationLossTest(test_case.TestCase): self.assertAllClose(computed_value, [[0.8, 0.0], [0.6, 0.1]], rtol=1e-6) +class WeightedDiceClassificationLoss(test_case.TestCase): + + def test_compute_weights_1(self): + def graph_fn(): + loss = losses.WeightedDiceClassificationLoss(squared_normalization=False) + pred = np.zeros((2, 3, 4), dtype=np.float32) + target = np.zeros((2, 3, 4), dtype=np.float32) + + pred[0, 1, 0] = _logit(0.9) + pred[0, 2, 0] = _logit(0.1) + pred[0, 2, 2] = _logit(0.5) + pred[0, 1, 3] = _logit(0.1) + + pred[1, 2, 3] = _logit(0.2) + pred[1, 1, 1] = _logit(0.3) + pred[1, 0, 2] = _logit(0.1) + + target[0, 1, 0] = 1.0 + target[0, 2, 2] = 1.0 + target[0, 1, 3] = 1.0 + + target[1, 2, 3] = 1.0 + target[1, 1, 1] = 0.0 + target[1, 0, 2] = 0.0 + + weights = np.ones_like(target) + return loss._compute_loss(pred, target, weights) + + dice_coeff = np.zeros((2, 4)) + dice_coeff[0, 0] = 2 * 0.9 / 2.5 + dice_coeff[0, 2] = 2 * 0.5 / 2.5 + dice_coeff[0, 3] = 2 * 0.1 / 2.1 + dice_coeff[1, 3] = 2 * 0.2 / 2.2 + + computed_value = self.execute(graph_fn, []) + self.assertAllClose(computed_value, 1 - dice_coeff, rtol=1e-6) + + def test_compute_weights_set(self): + + def graph_fn(): + loss = losses.WeightedDiceClassificationLoss(squared_normalization=False) + pred = np.zeros((2, 3, 4), dtype=np.float32) + target = np.zeros((2, 3, 4), dtype=np.float32) + + pred[0, 1, 0] = _logit(0.9) + pred[0, 2, 0] = _logit(0.1) + pred[0, 2, 2] = _logit(0.5) + pred[0, 1, 3] = _logit(0.1) + + pred[1, 2, 3] = _logit(0.2) + pred[1, 1, 1] = _logit(0.3) + pred[1, 0, 2] = _logit(0.1) + + target[0, 1, 0] = 1.0 + target[0, 2, 2] = 1.0 + target[0, 1, 3] = 1.0 + + target[1, 2, 3] = 1.0 + target[1, 1, 1] = 0.0 + target[1, 0, 2] = 0.0 + + weights = np.ones_like(target) + weights[:, :, 0] = 0.0 + return loss._compute_loss(pred, target, weights) + + dice_coeff = np.zeros((2, 4)) + dice_coeff[0, 2] = 2 * 0.5 / 2.5 + dice_coeff[0, 3] = 2 * 0.1 / 2.1 + dice_coeff[1, 3] = 2 * 0.2 / 2.2 + + computed_value = self.execute(graph_fn, []) + self.assertAllClose(computed_value, 1 - dice_coeff, rtol=1e-6) + + def test_class_indices(self): + def graph_fn(): + loss = losses.WeightedDiceClassificationLoss(squared_normalization=False) + pred = np.zeros((2, 3, 4), dtype=np.float32) + target = np.zeros((2, 3, 4), dtype=np.float32) + + pred[0, 1, 0] = _logit(0.9) + pred[0, 2, 0] = _logit(0.1) + pred[0, 2, 2] = _logit(0.5) + pred[0, 1, 3] = _logit(0.1) + + pred[1, 2, 3] = _logit(0.2) + pred[1, 1, 1] = _logit(0.3) + pred[1, 0, 2] = _logit(0.1) + + target[0, 1, 0] = 1.0 + target[0, 2, 2] = 1.0 + target[0, 1, 3] = 1.0 + + target[1, 2, 3] = 1.0 + target[1, 1, 1] = 0.0 + target[1, 0, 2] = 0.0 + + weights = np.ones_like(target) + return loss._compute_loss(pred, target, weights, class_indices=[0]) + + dice_coeff = np.zeros((2, 4)) + dice_coeff[0, 0] = 2 * 0.9 / 2.5 + + computed_value = self.execute(graph_fn, []) + self.assertAllClose(computed_value, 1 - dice_coeff, rtol=1e-6) + + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/core/model.py b/research/object_detection/core/model.py index f0ace5d050c3df472b6eb53fa5c277523376573b..bb96038dabf53006a21ee19fec1c954f1ab6b035 100644 --- a/research/object_detection/core/model.py +++ b/research/object_detection/core/model.py @@ -102,7 +102,8 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): Args: field: a string key, options are fields.BoxListFields.{boxes,classes,masks,keypoints, - keypoint_visibilities, densepose_*} + keypoint_visibilities, densepose_*, track_ids, + temporal_offsets, track_match_flags} fields.InputDataFields.is_annotated. Returns: @@ -123,7 +124,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): Args: field: a string key, options are fields.BoxListFields.{boxes,classes,masks,keypoints, - keypoint_visibilities, densepose_*} or + keypoint_visibilities, densepose_*, track_ids} or fields.InputDataFields.is_annotated. Returns: @@ -303,13 +304,20 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): groundtruth_dp_num_points_list=None, groundtruth_dp_part_ids_list=None, groundtruth_dp_surface_coords_list=None, + groundtruth_track_ids_list=None, + groundtruth_temporal_offsets_list=None, + groundtruth_track_match_flags_list=None, groundtruth_weights_list=None, groundtruth_confidences_list=None, groundtruth_is_crowd_list=None, groundtruth_group_of_list=None, groundtruth_area_list=None, is_annotated_list=None, - groundtruth_labeled_classes=None): + groundtruth_labeled_classes=None, + groundtruth_verified_neg_classes=None, + groundtruth_not_exhaustive_classes=None, + groundtruth_keypoint_depths_list=None, + groundtruth_keypoint_depth_weights_list=None): """Provide groundtruth tensors. Args: @@ -342,6 +350,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): shape [num_boxes, max_sampled_points, 4] containing the DensePose surface coordinates for each sampled point. Note that there may be padding. + groundtruth_track_ids_list: a list of 1-D tf.int32 tensors of shape + [num_boxes] containing the track IDs of groundtruth objects. + groundtruth_temporal_offsets_list: a list of 2-D tf.float32 tensors + of shape [num_boxes, 2] containing the spatial offsets of objects' + centers compared with the previous frame. + groundtruth_track_match_flags_list: a list of 1-D tf.float32 tensors + of shape [num_boxes] containing 0-1 flags that indicate if an object + has existed in the previous frame. groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape [num_boxes] containing weights for groundtruth boxes. groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape @@ -359,6 +375,17 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): groundtruth_labeled_classes: A list of 1-D tf.float32 tensors of shape [num_classes], containing label indices encoded as k-hot of the classes that are exhaustively annotated. + groundtruth_verified_neg_classes: A list of 1-D tf.float32 tensors of + shape [num_classes], containing a K-hot representation of classes + which were verified as not present in the image. + groundtruth_not_exhaustive_classes: A list of 1-D tf.float32 tensors of + shape [num_classes], containing a K-hot representation of classes + which don't have all of their instances marked exhaustively. + groundtruth_keypoint_depths_list: a list of 2-D tf.float32 tensors + of shape [num_boxes, num_keypoints] containing keypoint relative depths. + groundtruth_keypoint_depth_weights_list: a list of 2-D tf.float32 tensors + of shape [num_boxes, num_keypoints] containing the weights of the + relative depths. """ self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list self._groundtruth_lists[ @@ -379,6 +406,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): self._groundtruth_lists[ fields.BoxListFields.keypoint_visibilities] = ( groundtruth_keypoint_visibilities_list) + if groundtruth_keypoint_depths_list: + self._groundtruth_lists[ + fields.BoxListFields.keypoint_depths] = ( + groundtruth_keypoint_depths_list) + if groundtruth_keypoint_depth_weights_list: + self._groundtruth_lists[ + fields.BoxListFields.keypoint_depth_weights] = ( + groundtruth_keypoint_depth_weights_list) if groundtruth_dp_num_points_list: self._groundtruth_lists[ fields.BoxListFields.densepose_num_points] = ( @@ -391,6 +426,17 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): self._groundtruth_lists[ fields.BoxListFields.densepose_surface_coords] = ( groundtruth_dp_surface_coords_list) + if groundtruth_track_ids_list: + self._groundtruth_lists[ + fields.BoxListFields.track_ids] = groundtruth_track_ids_list + if groundtruth_temporal_offsets_list: + self._groundtruth_lists[ + fields.BoxListFields.temporal_offsets] = ( + groundtruth_temporal_offsets_list) + if groundtruth_track_match_flags_list: + self._groundtruth_lists[ + fields.BoxListFields.track_match_flags] = ( + groundtruth_track_match_flags_list) if groundtruth_is_crowd_list: self._groundtruth_lists[ fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list @@ -407,6 +453,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)): self._groundtruth_lists[ fields.InputDataFields .groundtruth_labeled_classes] = groundtruth_labeled_classes + if groundtruth_verified_neg_classes: + self._groundtruth_lists[ + fields.InputDataFields + .groundtruth_verified_neg_classes] = groundtruth_verified_neg_classes + if groundtruth_not_exhaustive_classes: + self._groundtruth_lists[ + fields.InputDataFields + .groundtruth_not_exhaustive_classes] = ( + groundtruth_not_exhaustive_classes) @abc.abstractmethod def regularization_losses(self): diff --git a/research/object_detection/core/post_processing.py b/research/object_detection/core/post_processing.py index e425cd08a2f11b4ebcc999b01ac4025101a93a8c..c4c42fc9345176ea1ef81d4e4a316cfa1148b0b5 100644 --- a/research/object_detection/core/post_processing.py +++ b/research/object_detection/core/post_processing.py @@ -26,6 +26,7 @@ import tensorflow.compat.v1 as tf from object_detection.core import box_list from object_detection.core import box_list_ops +from object_detection.core import keypoint_ops from object_detection.core import standard_fields as fields from object_detection.utils import shape_utils @@ -379,9 +380,23 @@ def _clip_window_prune_boxes(sorted_boxes, clip_window, pad_to_max_output_size, if change_coordinate_frame: sorted_boxes = box_list_ops.change_coordinate_frame(sorted_boxes, clip_window) + if sorted_boxes.has_field(fields.BoxListFields.keypoints): + sorted_keypoints = sorted_boxes.get_field(fields.BoxListFields.keypoints) + sorted_keypoints = keypoint_ops.change_coordinate_frame(sorted_keypoints, + clip_window) + sorted_boxes.set_field(fields.BoxListFields.keypoints, sorted_keypoints) return sorted_boxes, num_valid_nms_boxes_cumulative +class NullContextmanager(object): + + def __enter__(self): + pass + + def __exit__(self, type_arg, value_arg, traceback_arg): + return False + + def multiclass_non_max_suppression(boxes, scores, score_thresh, @@ -397,6 +412,7 @@ def multiclass_non_max_suppression(boxes, additional_fields=None, soft_nms_sigma=0.0, use_hard_nms=False, + use_cpu_nms=False, scope=None): """Multi-class version of non maximum suppression. @@ -452,6 +468,7 @@ def multiclass_non_max_suppression(boxes, NMS. Soft NMS is currently only supported when pad_to_max_output_size is False. use_hard_nms: Enforce the usage of hard NMS. + use_cpu_nms: Enforce NMS to run on CPU. scope: name scope. Returns: @@ -474,7 +491,8 @@ def multiclass_non_max_suppression(boxes, raise ValueError('Soft NMS (soft_nms_sigma != 0.0) is currently not ' 'supported when pad_to_max_output_size is True.') - with tf.name_scope(scope, 'MultiClassNonMaxSuppression'): + with tf.name_scope(scope, 'MultiClassNonMaxSuppression'), tf.device( + 'cpu:0') if use_cpu_nms else NullContextmanager(): num_scores = tf.shape(scores)[0] num_classes = shape_utils.get_dim_as_int(scores.get_shape()[1]) @@ -855,7 +873,8 @@ def batch_multiclass_non_max_suppression(boxes, max_classes_per_detection=1, use_dynamic_map_fn=False, use_combined_nms=False, - use_hard_nms=False): + use_hard_nms=False, + use_cpu_nms=False): """Multi-class version of non maximum suppression that operates on a batch. This op is similar to `multiclass_non_max_suppression` but operates on a batch @@ -927,6 +946,7 @@ def batch_multiclass_non_max_suppression(boxes, Masks and additional fields are not supported. See argument checks in the code below for unsupported arguments. use_hard_nms: Enforce the usage of hard NMS. + use_cpu_nms: Enforce NMS to run on CPU. Returns: 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor @@ -1162,7 +1182,8 @@ def batch_multiclass_non_max_suppression(boxes, use_partitioned_nms=use_partitioned_nms, additional_fields=per_image_additional_fields, soft_nms_sigma=soft_nms_sigma, - use_hard_nms=use_hard_nms) + use_hard_nms=use_hard_nms, + use_cpu_nms=use_cpu_nms) if not use_static_shapes: nmsed_boxlist = box_list_ops.pad_or_clip_box_list( diff --git a/research/object_detection/core/preprocessor.py b/research/object_detection/core/preprocessor.py index 6cebfd99112cb9542e97626c917778c9be3a7de9..3c98596a24ed0286bb7e369e1c8862ef9133b3bd 100644 --- a/research/object_detection/core/preprocessor.py +++ b/research/object_detection/core/preprocessor.py @@ -571,6 +571,8 @@ def random_horizontal_flip(image, keypoint_visibilities=None, densepose_part_ids=None, densepose_surface_coords=None, + keypoint_depths=None, + keypoint_depth_weights=None, keypoint_flip_permutation=None, probability=0.5, seed=None, @@ -602,6 +604,12 @@ def random_horizontal_flip(image, (y, x) are the normalized image coordinates for a sampled point, and (v, u) is the surface coordinate for the part. + keypoint_depths: (optional) rank 2 float32 tensor with shape [num_instances, + num_keypoints] representing the relative depth of the + keypoints. + keypoint_depth_weights: (optional) rank 2 float32 tensor with shape + [num_instances, num_keypoints] representing the + weights of the relative depth of the keypoints. keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip permutation. probability: the probability of performing this augmentation. @@ -631,6 +639,10 @@ def random_horizontal_flip(image, [num_instances, num_points]. densepose_surface_coords: rank 3 float32 tensor with shape [num_instances, num_points, 4]. + keypoint_depths: rank 2 float32 tensor with shape [num_instances, + num_keypoints] + keypoint_depth_weights: rank 2 float32 tensor with shape [num_instances, + num_keypoints]. Raises: ValueError: if keypoints are provided but keypoint_flip_permutation is not. @@ -708,6 +720,21 @@ def random_horizontal_flip(image, lambda: (densepose_part_ids, densepose_surface_coords)) result.extend(densepose_tensors) + # flip keypoint depths and weights. + if (keypoint_depths is not None and + keypoint_flip_permutation is not None): + kpt_flip_perm = keypoint_flip_permutation + keypoint_depths = tf.cond( + do_a_flip_random, + lambda: tf.gather(keypoint_depths, kpt_flip_perm, axis=1), + lambda: keypoint_depths) + keypoint_depth_weights = tf.cond( + do_a_flip_random, + lambda: tf.gather(keypoint_depth_weights, kpt_flip_perm, axis=1), + lambda: keypoint_depth_weights) + result.append(keypoint_depths) + result.append(keypoint_depth_weights) + return tuple(result) @@ -1049,6 +1076,28 @@ def random_rgb_to_gray(image, return image +def adjust_gamma(image, gamma=1.0, gain=1.0): + """Adjusts the gamma. + + Args: + image: rank 3 float32 tensor contains 1 image -> [height, width, channels] + with pixel values varying between [0, 255]. + gamma: the gamma value. Must be a non-negative real number. + gain: a constant multiplier. + + Returns: + image: image which is the same shape as input image. + """ + with tf.name_scope('AdjustGamma', values=[image]): + def _adjust_gamma(image): + image = tf.image.adjust_gamma(image / 255, gamma, gain) * 255 + image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) + return image + + image = _augment_only_rgb_channels(image, _adjust_gamma) + return image + + def random_adjust_brightness(image, max_delta=0.2, seed=None, @@ -1069,7 +1118,6 @@ def random_adjust_brightness(image, Returns: image: image which is the same shape as input image. - boxes: boxes which is the same shape as input boxes. """ with tf.name_scope('RandomAdjustBrightness', values=[image]): generator_func = functools.partial(tf.random_uniform, [], @@ -1258,7 +1306,7 @@ def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None): return image -def random_jitter_boxes(boxes, ratio=0.05, seed=None): +def random_jitter_boxes(boxes, ratio=0.05, jitter_mode='random', seed=None): """Randomly jitter boxes in image. Args: @@ -1269,45 +1317,46 @@ def random_jitter_boxes(boxes, ratio=0.05, seed=None): ratio: The ratio of the box width and height that the corners can jitter. For example if the width is 100 pixels and ratio is 0.05, the corners can jitter up to 5 pixels in the x direction. + jitter_mode: One of + shrink - Only shrinks boxes. + expand - Only expands boxes. + default - Randomly and independently perturbs each box boundary. seed: random seed. Returns: boxes: boxes which is the same shape as input boxes. """ - def random_jitter_box(box, ratio, seed): - """Randomly jitter box. + with tf.name_scope('RandomJitterBoxes', values=[boxes]): + ymin, xmin, ymax, xmax = (boxes[:, i] for i in range(4)) - Args: - box: bounding box [1, 1, 4]. - ratio: max ratio between jittered box and original box, - a number between [0, 0.5]. - seed: random seed. + height, width = ymax - ymin, xmax - xmin + ycenter, xcenter = (ymin + ymax) / 2.0, (xmin + xmax) / 2.0 - Returns: - jittered_box: jittered box. - """ - rand_numbers = tf.random_uniform( - [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed) - box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1]) - box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0]) - hw_coefs = tf.stack([box_height, box_width, box_height, box_width]) - hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers) - jittered_box = tf.add(box, hw_rand_coefs) - jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0) - return jittered_box + height = tf.abs(height) + width = tf.abs(width) - with tf.name_scope('RandomJitterBoxes', values=[boxes]): - # boxes are [N, 4]. Lets first make them [N, 1, 1, 4] - boxes_shape = tf.shape(boxes) - boxes = tf.expand_dims(boxes, 1) - boxes = tf.expand_dims(boxes, 2) + if jitter_mode == 'shrink': + min_ratio, max_ratio = -ratio, 0 + elif jitter_mode == 'expand': + min_ratio, max_ratio = 0, ratio + else: + min_ratio, max_ratio = -ratio, ratio + + num_boxes = tf.shape(boxes)[0] + distortion = 1.0 + tf.random_uniform( + [num_boxes, 4], minval=min_ratio, maxval=max_ratio, dtype=tf.float32, + seed=seed) - distorted_boxes = tf.map_fn( - lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32) + ymin_jitter = height * distortion[:, 0] + xmin_jitter = width * distortion[:, 1] + ymax_jitter = height * distortion[:, 2] + xmax_jitter = width * distortion[:, 3] - distorted_boxes = tf.reshape(distorted_boxes, boxes_shape) + ymin, ymax = ycenter - (ymin_jitter / 2.0), ycenter + (ymax_jitter / 2.0) + xmin, xmax = xcenter - (xmin_jitter / 2.0), xcenter + (xmax_jitter / 2.0) - return distorted_boxes + boxes = tf.stack([ymin, xmin, ymax, xmax], axis=1) + return tf.clip_by_value(boxes, 0.0, 1.0) def _strict_random_crop_image(image, @@ -2937,7 +2986,7 @@ def resize_to_range(image, for i in range(len(channels)) ], axis=2) - new_image.set_shape([max_dimension, max_dimension, 3]) + new_image.set_shape([max_dimension, max_dimension, len(channels)]) result = [new_image] if masks is not None: @@ -3111,6 +3160,7 @@ def resize_pad_to_multiple(image, masks=None, multiple=1): image_height, image_width, num_channels = _get_image_info(image) image = image[tf.newaxis, :, :, :] image = ops.pad_to_multiple(image, multiple)[0, :, :, :] + result = [image] if masks is not None: masks = tf.transpose(masks, (1, 2, 0)) @@ -3118,11 +3168,10 @@ def resize_pad_to_multiple(image, masks=None, multiple=1): masks = ops.pad_to_multiple(masks, multiple)[0, :, :, :] masks = tf.transpose(masks, (2, 0, 1)) + result.append(masks) - if masks is None: - return image, tf.stack([image_height, image_width, num_channels]) - else: - return image, masks, tf.stack([image_height, image_width, num_channels]) + result.append(tf.stack([image_height, image_width, num_channels])) + return result def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): @@ -3971,9 +4020,10 @@ def _get_crop_border(border, size): def random_square_crop_by_scale(image, boxes, labels, label_weights, - masks=None, keypoints=None, max_border=128, - scale_min=0.6, scale_max=1.3, num_scales=8, - seed=None, preprocess_vars_cache=None): + label_confidences=None, masks=None, + keypoints=None, max_border=128, scale_min=0.6, + scale_max=1.3, num_scales=8, seed=None, + preprocess_vars_cache=None): """Randomly crop a square in proportion to scale and image size. Extract a square sized crop from an image whose side length is sampled by @@ -3993,6 +4043,8 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights, labels: rank 1 int32 tensor containing the object classes. label_weights: float32 tensor of shape [num_instances] representing the weight for each box. + label_confidences: (optional) float32 tensor of shape [num_instances] + representing the confidence for each box. masks: (optional) rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. The masks are of the same height, width as the input `image`. @@ -4021,6 +4073,8 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights, Boxes are in normalized form. labels: new labels. label_weights: rank 1 float32 tensor with shape [num_instances]. + label_confidences: (optional) float32 tensor of shape [num_instances] + representing the confidence for each box. masks: rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. @@ -4110,6 +4164,9 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights, tf.gather(labels, indices), tf.gather(label_weights, indices)] + if label_confidences is not None: + return_values.append(tf.gather(label_confidences, indices)) + if masks is not None: new_masks = tf.expand_dims(masks, -1) new_masks = new_masks[:, ymin:ymax, xmin:xmax] @@ -4135,6 +4192,7 @@ def random_scale_crop_and_pad_to_square( label_weights, masks=None, keypoints=None, + label_confidences=None, scale_min=0.1, scale_max=2.0, output_size=512, @@ -4168,6 +4226,8 @@ def random_scale_crop_and_pad_to_square( as the input `image`. keypoints: (optional) rank 3 float32 tensor with shape [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized coordinates. + label_confidences: (optional) float32 tensor of shape [num_instance] + representing the confidence for each box. scale_min: float, the minimum value for the random scale factor. scale_max: float, the maximum value for the random scale factor. output_size: int, the desired (square) output image size. @@ -4183,9 +4243,8 @@ def random_scale_crop_and_pad_to_square( label_weights: rank 1 float32 tensor with shape [num_instances]. masks: rank 3 float32 tensor with shape [num_instances, height, width] containing instance masks. - + label_confidences: confidences for retained boxes. """ - img_shape = tf.shape(image) input_height, input_width = img_shape[0], img_shape[1] random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed) @@ -4250,6 +4309,9 @@ def random_scale_crop_and_pad_to_square( keypoints, [0.0, 0.0, 1.0, 1.0]) return_values.append(keypoints) + if label_confidences is not None: + return_values.append(tf.gather(label_confidences, indices)) + return return_values @@ -4259,7 +4321,8 @@ def get_default_func_arg_map(include_label_weights=True, include_instance_masks=False, include_keypoints=False, include_keypoint_visibilities=False, - include_dense_pose=False): + include_dense_pose=False, + include_keypoint_depths=False): """Returns the default mapping from a preprocessor function to its args. Args: @@ -4277,6 +4340,8 @@ def get_default_func_arg_map(include_label_weights=True, the keypoint visibilities, too. include_dense_pose: If True, preprocessing functions will modify the DensePose labels, too. + include_keypoint_depths: If True, preprocessing functions will modify the + keypoint depth labels, too. Returns: A map from preprocessing functions to the arguments they receive. @@ -4319,6 +4384,13 @@ def get_default_func_arg_map(include_label_weights=True, fields.InputDataFields.groundtruth_dp_part_ids) groundtruth_dp_surface_coords = ( fields.InputDataFields.groundtruth_dp_surface_coords) + groundtruth_keypoint_depths = None + groundtruth_keypoint_depth_weights = None + if include_keypoint_depths: + groundtruth_keypoint_depths = ( + fields.InputDataFields.groundtruth_keypoint_depths) + groundtruth_keypoint_depth_weights = ( + fields.InputDataFields.groundtruth_keypoint_depth_weights) prep_func_arg_map = { normalize_image: (fields.InputDataFields.image,), @@ -4330,6 +4402,8 @@ def get_default_func_arg_map(include_label_weights=True, groundtruth_keypoint_visibilities, groundtruth_dp_part_ids, groundtruth_dp_surface_coords, + groundtruth_keypoint_depths, + groundtruth_keypoint_depth_weights, ), random_vertical_flip: ( fields.InputDataFields.image, @@ -4483,14 +4557,15 @@ def get_default_func_arg_map(include_label_weights=True, (fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_classes, - groundtruth_label_weights, groundtruth_instance_masks, - groundtruth_keypoints), + groundtruth_label_weights, groundtruth_label_confidences, + groundtruth_instance_masks, groundtruth_keypoints), random_scale_crop_and_pad_to_square: (fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_classes, groundtruth_label_weights, groundtruth_instance_masks, - groundtruth_keypoints), + groundtruth_keypoints, groundtruth_label_confidences), + adjust_gamma: (fields.InputDataFields.image,), } return prep_func_arg_map @@ -4541,7 +4616,6 @@ def preprocess(tensor_dict, """ if func_arg_map is None: func_arg_map = get_default_func_arg_map() - # changes the images to image (rank 4 to rank 3) since the functions # receive rank 3 tensor for image if fields.InputDataFields.image in tensor_dict: diff --git a/research/object_detection/core/preprocessor_test.py b/research/object_detection/core/preprocessor_test.py index 396ff96da95948d0adebac14634e8f6ecbfcd7fc..91c6922f196e26bfcc332430a6857011a3bebdfd 100644 --- a/research/object_detection/core/preprocessor_test.py +++ b/research/object_detection/core/preprocessor_test.py @@ -105,6 +105,17 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ]) return keypoints, keypoint_visibilities + def createTestKeypointDepths(self): + keypoint_depths = tf.constant([ + [1.0, 0.9, 0.8], + [0.7, 0.6, 0.5] + ], dtype=tf.float32) + keypoint_depth_weights = tf.constant([ + [0.5, 0.6, 0.7], + [0.8, 0.9, 1.0] + ], dtype=tf.float32) + return keypoint_depths, keypoint_depth_weights + def createTestKeypointsInsideCrop(self): keypoints = np.array([ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]], @@ -713,6 +724,59 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): test_keypoints=True) + def testRunRandomHorizontalFlipWithKeypointDepth(self): + + def graph_fn(): + preprocess_options = [(preprocessor.random_horizontal_flip, {})] + image_height = 3 + image_width = 3 + images = tf.random_uniform([1, image_height, image_width, 3]) + boxes = self.createTestBoxes() + masks = self.createTestMasks() + keypoints, keypoint_visibilities = self.createTestKeypoints() + keypoint_depths, keypoint_depth_weights = self.createTestKeypointDepths() + keypoint_flip_permutation = self.createKeypointFlipPermutation() + tensor_dict = { + fields.InputDataFields.image: + images, + fields.InputDataFields.groundtruth_boxes: + boxes, + fields.InputDataFields.groundtruth_instance_masks: + masks, + fields.InputDataFields.groundtruth_keypoints: + keypoints, + fields.InputDataFields.groundtruth_keypoint_visibilities: + keypoint_visibilities, + fields.InputDataFields.groundtruth_keypoint_depths: + keypoint_depths, + fields.InputDataFields.groundtruth_keypoint_depth_weights: + keypoint_depth_weights, + } + preprocess_options = [(preprocessor.random_horizontal_flip, { + 'keypoint_flip_permutation': keypoint_flip_permutation, + 'probability': 1.0 + })] + preprocessor_arg_map = preprocessor.get_default_func_arg_map( + include_instance_masks=True, + include_keypoints=True, + include_keypoint_visibilities=True, + include_dense_pose=False, + include_keypoint_depths=True) + tensor_dict = preprocessor.preprocess( + tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map) + keypoint_depths = tensor_dict[ + fields.InputDataFields.groundtruth_keypoint_depths] + keypoint_depth_weights = tensor_dict[ + fields.InputDataFields.groundtruth_keypoint_depth_weights] + output_tensors = [keypoint_depths, keypoint_depth_weights] + return output_tensors + + output_tensors = self.execute_cpu(graph_fn, []) + expected_keypoint_depths = [[1.0, 0.8, 0.9], [0.7, 0.5, 0.6]] + expected_keypoint_depth_weights = [[0.5, 0.7, 0.6], [0.8, 1.0, 0.9]] + self.assertAllClose(expected_keypoint_depths, output_tensors[0]) + self.assertAllClose(expected_keypoint_depth_weights, output_tensors[1]) + def testRandomVerticalFlip(self): def graph_fn(): @@ -1199,6 +1263,67 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): (boxes_shape_, distorted_boxes_shape_) = self.execute_cpu(graph_fn, []) self.assertAllEqual(boxes_shape_, distorted_boxes_shape_) + def testRandomJitterBoxesZeroRatio(self): + + def graph_fn(): + preprocessing_options = [] + preprocessing_options.append((preprocessor.random_jitter_boxes, + {'ratio': 0.0})) + boxes = self.createTestBoxes() + tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + return [boxes, distorted_boxes] + + (boxes, distorted_boxes) = self.execute_cpu(graph_fn, []) + self.assertAllEqual(boxes, distorted_boxes) + + def testRandomJitterBoxesExpand(self): + + def graph_fn(): + preprocessing_options = [] + preprocessing_options.append((preprocessor.random_jitter_boxes, + {'jitter_mode': 'expand'})) + boxes = self.createTestBoxes() + tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + return [boxes, distorted_boxes] + + boxes, distorted_boxes = self.execute_cpu(graph_fn, []) + ymin, xmin, ymax, xmax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] + distorted_ymin, distorted_xmin, distorted_ymax, distorted_xmax = ( + distorted_boxes[:, 0], distorted_boxes[:, 1], distorted_boxes[0, 2], + distorted_boxes[:, 3]) + + self.assertTrue(np.all(distorted_ymin <= ymin)) + self.assertTrue(np.all(distorted_xmin <= xmin)) + self.assertTrue(np.all(distorted_ymax >= ymax)) + self.assertTrue(np.all(distorted_xmax >= xmax)) + + def testRandomJitterBoxesShrink(self): + + def graph_fn(): + preprocessing_options = [] + preprocessing_options.append((preprocessor.random_jitter_boxes, + {'jitter_mode': 'shrink'})) + boxes = self.createTestBoxes() + tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] + return [boxes, distorted_boxes] + + boxes, distorted_boxes = self.execute_cpu(graph_fn, []) + ymin, xmin, ymax, xmax = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] + distorted_ymin, distorted_xmin, distorted_ymax, distorted_xmax = ( + distorted_boxes[:, 0], distorted_boxes[:, 1], distorted_boxes[0, 2], + distorted_boxes[:, 3]) + + self.assertTrue(np.all(distorted_ymin >= ymin)) + self.assertTrue(np.all(distorted_xmin >= xmin)) + self.assertTrue(np.all(distorted_ymax <= ymax)) + self.assertTrue(np.all(distorted_xmax <= xmax)) + def testRandomCropImage(self): def graph_fn(): @@ -3814,21 +3939,23 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): boxes = tf.constant([[0.25, .25, .75, .75]]) labels = tf.constant([[1]]) + label_confidences = tf.constant([0.75]) label_weights = tf.constant([[1.]]) - (new_image, new_boxes, _, _, new_masks, + (new_image, new_boxes, _, _, new_confidences, new_masks, new_keypoints) = preprocessor.random_square_crop_by_scale( image, boxes, labels, label_weights, + label_confidences, masks=masks, keypoints=keypoints, max_border=256, scale_min=scale, scale_max=scale) - return new_image, new_boxes, new_masks, new_keypoints - image, boxes, masks, keypoints = self.execute_cpu(graph_fn, []) + return new_image, new_boxes, new_confidences, new_masks, new_keypoints + image, boxes, confidences, masks, keypoints = self.execute_cpu(graph_fn, []) ymin, xmin, ymax, xmax = boxes[0] self.assertAlmostEqual(ymax - ymin, 0.5 / scale) self.assertAlmostEqual(xmax - xmin, 0.5 / scale) @@ -3842,6 +3969,7 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): self.assertAlmostEqual(scale * 256.0, size) self.assertAllClose(image[:, :, 0], masks[0, :, :]) + self.assertAllClose(confidences, [0.75]) @parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0), ('scale_2_0', 2.0)) @@ -3928,6 +4056,54 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): self.assertAllClose(image[:, :, 0], masks[0, :, :]) + def test_random_scale_crop_and_pad_to_square_handles_confidences(self): + + def graph_fn(): + image = tf.zeros([10, 10, 1]) + boxes = tf.constant([[0, 0, 0.5, 0.5], [0.5, 0.5, 0.75, 0.75]]) + label_weights = tf.constant([1.0, 1.0]) + box_labels = tf.constant([0, 1]) + box_confidences = tf.constant([-1.0, 1.0]) + + (_, new_boxes, _, _, + new_confidences) = preprocessor.random_scale_crop_and_pad_to_square( + image, + boxes, + box_labels, + label_weights, + label_confidences=box_confidences, + scale_min=0.8, + scale_max=0.9, + output_size=10) + return new_boxes, new_confidences + + boxes, confidences = self.execute_cpu(graph_fn, []) + + self.assertLen(boxes, 2) + self.assertAllEqual(confidences, [-1.0, 1.0]) + + def testAdjustGamma(self): + + def graph_fn(): + preprocessing_options = [] + preprocessing_options.append((preprocessor.normalize_image, { + 'original_minval': 0, + 'original_maxval': 255, + 'target_minval': 0, + 'target_maxval': 1 + })) + preprocessing_options.append((preprocessor.adjust_gamma, {})) + images_original = self.createTestImages() + tensor_dict = {fields.InputDataFields.image: images_original} + tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) + images_gamma = tensor_dict[fields.InputDataFields.image] + image_original_shape = tf.shape(images_original) + image_gamma_shape = tf.shape(images_gamma) + return [image_original_shape, image_gamma_shape] + + (image_original_shape_, image_gamma_shape_) = self.execute_cpu(graph_fn, []) + self.assertAllEqual(image_original_shape_, image_gamma_shape_) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/core/region_similarity_calculator.py b/research/object_detection/core/region_similarity_calculator.py index fd75d52f9f7b2d79e846f733fc312ec24176bf95..fcaba76104fbf6d706c838aa93a1c2b8db9886fe 100644 --- a/research/object_detection/core/region_similarity_calculator.py +++ b/research/object_detection/core/region_similarity_calculator.py @@ -1,3 +1,4 @@ +# Lint as: python3 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -79,6 +80,39 @@ class IouSimilarity(RegionSimilarityCalculator): return box_list_ops.iou(boxlist1, boxlist2) +class DETRSimilarity(RegionSimilarityCalculator): + """Class to compute similarity for the Detection Transformer model. + + This class computes pairwise DETR similarity between two BoxLists using a + weighted combination of GIOU, classification scores, and the L1 loss. + """ + + def __init__(self, l1_weight=5, giou_weight=2): + super().__init__() + self.l1_weight = l1_weight + self.giou_weight = giou_weight + + def _compare(self, boxlist1, boxlist2): + """Compute pairwise DETR similarity between the two BoxLists. + + Args: + boxlist1: BoxList holding N groundtruth boxes. + boxlist2: BoxList holding M predicted boxes. + + Returns: + A tensor with shape [N, M] representing pairwise DETR similarity scores. + """ + groundtruth_labels = boxlist1.get_field(fields.BoxListFields.classes) + predicted_labels = boxlist2.get_field(fields.BoxListFields.classes) + classification_scores = tf.matmul(groundtruth_labels, + predicted_labels, + transpose_b=True) + loss = self.l1_weight * box_list_ops.l1( + boxlist1, boxlist2) + self.giou_weight * (1 - box_list_ops.giou( + boxlist1, boxlist2)) - classification_scores + return -loss + + class NegSqDistSimilarity(RegionSimilarityCalculator): """Class to compute similarity based on the squared distance metric. diff --git a/research/object_detection/core/region_similarity_calculator_test.py b/research/object_detection/core/region_similarity_calculator_test.py index 9f9a10b637fa127dfd83637f43508f9dbdb80c18..ec1de45be14772a9f56e32eec67632095c629235 100644 --- a/research/object_detection/core/region_similarity_calculator_test.py +++ b/research/object_detection/core/region_similarity_calculator_test.py @@ -93,6 +93,25 @@ class RegionSimilarityCalculatorTest(test_case.TestCase): iou_output = self.execute(graph_fn, []) self.assertAllClose(iou_output, exp_output) + def test_detr_similarity(self): + def graph_fn(): + corners1 = tf.constant([[5.0, 7.0, 7.0, 9.0]]) + corners2 = tf.constant([[5.0, 7.0, 7.0, 9.0], [5.0, 11.0, 7.0, 13.0]]) + groundtruth_labels = tf.constant([[1.0, 0.0]]) + predicted_labels = tf.constant([[0.0, 1000.0], [1000.0, 0.0]]) + boxes1 = box_list.BoxList(corners1) + boxes2 = box_list.BoxList(corners2) + boxes1.add_field(fields.BoxListFields.classes, groundtruth_labels) + boxes2.add_field(fields.BoxListFields.classes, predicted_labels) + detr_similarity_calculator = \ + region_similarity_calculator.DETRSimilarity() + detr_similarity = detr_similarity_calculator.compare( + boxes1, boxes2, None) + return detr_similarity + exp_output = [[0.0, -20 - 8.0/3.0 + 1000.0]] + sim_output = self.execute(graph_fn, []) + self.assertAllClose(sim_output, exp_output) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/core/standard_fields.py b/research/object_detection/core/standard_fields.py index ddb4a842db2a70c8d9497b1032249785ae539eef..1925c550615fd9c48eab538f1e042a9ceff82a1d 100644 --- a/research/object_detection/core/standard_fields.py +++ b/research/object_detection/core/standard_fields.py @@ -46,6 +46,11 @@ class InputDataFields(object): classes for which an image has been labeled. groundtruth_boxes: coordinates of the ground truth boxes in the image. groundtruth_classes: box-level class labels. + groundtruth_track_ids: box-level track ID labels. + groundtruth_temporal_offset: box-level temporal offsets, i.e., + movement of the box center in adjacent frames. + groundtruth_track_match_flags: box-level flags indicating if objects + exist in the previous frame. groundtruth_confidences: box-level class confidences. The shape should be the same as the shape of groundtruth_classes. groundtruth_label_types: box-level label types (e.g. explicit negative). @@ -62,9 +67,15 @@ class InputDataFields(object): groundtruth_instance_boundaries: ground truth instance boundaries. groundtruth_instance_classes: instance mask-level class labels. groundtruth_keypoints: ground truth keypoints. + groundtruth_keypoint_depths: Relative depth of the keypoints. + groundtruth_keypoint_depth_weights: Weights of the relative depth of the + keypoints. groundtruth_keypoint_visibilities: ground truth keypoint visibilities. groundtruth_keypoint_weights: groundtruth weight factor for keypoints. groundtruth_label_weights: groundtruth label weights. + groundtruth_verified_negative_classes: groundtruth verified negative classes + groundtruth_not_exhaustive_classes: groundtruth not-exhaustively labeled + classes. groundtruth_weights: groundtruth weight factor for bounding boxes. groundtruth_dp_num_points: The number of DensePose sampled points for each instance. @@ -81,6 +92,8 @@ class InputDataFields(object): context_features, used for reshaping. valid_context_size: the valid context size, used in filtering the padded context features. + context_features_image_id_list: the list of image source ids corresponding + to the features in context_features image_format: format for the images, used to decode image_height: height of images, used to decode image_width: width of images, used to decode @@ -97,6 +110,9 @@ class InputDataFields(object): groundtruth_labeled_classes = 'groundtruth_labeled_classes' groundtruth_boxes = 'groundtruth_boxes' groundtruth_classes = 'groundtruth_classes' + groundtruth_track_ids = 'groundtruth_track_ids' + groundtruth_temporal_offset = 'groundtruth_temporal_offset' + groundtruth_track_match_flags = 'groundtruth_track_match_flags' groundtruth_confidences = 'groundtruth_confidences' groundtruth_label_types = 'groundtruth_label_types' groundtruth_is_crowd = 'groundtruth_is_crowd' @@ -109,9 +125,13 @@ class InputDataFields(object): groundtruth_instance_boundaries = 'groundtruth_instance_boundaries' groundtruth_instance_classes = 'groundtruth_instance_classes' groundtruth_keypoints = 'groundtruth_keypoints' + groundtruth_keypoint_depths = 'groundtruth_keypoint_depths' + groundtruth_keypoint_depth_weights = 'groundtruth_keypoint_depth_weights' groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities' groundtruth_keypoint_weights = 'groundtruth_keypoint_weights' groundtruth_label_weights = 'groundtruth_label_weights' + groundtruth_verified_neg_classes = 'groundtruth_verified_neg_classes' + groundtruth_not_exhaustive_classes = 'groundtruth_not_exhaustive_classes' groundtruth_weights = 'groundtruth_weights' groundtruth_dp_num_points = 'groundtruth_dp_num_points' groundtruth_dp_part_ids = 'groundtruth_dp_part_ids' @@ -123,6 +143,7 @@ class InputDataFields(object): context_features = 'context_features' context_feature_length = 'context_feature_length' valid_context_size = 'valid_context_size' + context_features_image_id_list = 'context_features_image_id_list' image_timestamps = 'image_timestamps' image_format = 'image_format' image_height = 'image_height' @@ -146,6 +167,7 @@ class DetectionResultFields(object): detection_boundaries: contains an object boundary for each detection box. detection_keypoints: contains detection keypoints for each detection box. detection_keypoint_scores: contains detection keypoint scores. + detection_keypoint_depths: contains detection keypoint depths. num_detections: number of detections in the batch. raw_detection_boxes: contains decoded detection boxes without Non-Max suppression. @@ -167,6 +189,9 @@ class DetectionResultFields(object): detection_boundaries = 'detection_boundaries' detection_keypoints = 'detection_keypoints' detection_keypoint_scores = 'detection_keypoint_scores' + detection_keypoint_depths = 'detection_keypoint_depths' + detection_embeddings = 'detection_embeddings' + detection_offsets = 'detection_temporal_offsets' num_detections = 'num_detections' raw_detection_boxes = 'raw_detection_boxes' raw_detection_scores = 'raw_detection_scores' @@ -187,10 +212,14 @@ class BoxListFields(object): keypoints: keypoints per bounding box. keypoint_visibilities: keypoint visibilities per bounding box. keypoint_heatmaps: keypoint heatmaps per bounding box. + keypoint_depths: keypoint depths per bounding box. + keypoint_depth_weights: keypoint depth weights per bounding box. densepose_num_points: number of DensePose points per bounding box. densepose_part_ids: DensePose part ids per bounding box. densepose_surface_coords: DensePose surface coordinates per bounding box. is_crowd: is_crowd annotation per bounding box. + temporal_offsets: temporal center offsets per bounding box. + track_match_flags: match flags per bounding box. """ boxes = 'boxes' classes = 'classes' @@ -203,11 +232,16 @@ class BoxListFields(object): keypoints = 'keypoints' keypoint_visibilities = 'keypoint_visibilities' keypoint_heatmaps = 'keypoint_heatmaps' + keypoint_depths = 'keypoint_depths' + keypoint_depth_weights = 'keypoint_depth_weights' densepose_num_points = 'densepose_num_points' densepose_part_ids = 'densepose_part_ids' densepose_surface_coords = 'densepose_surface_coords' is_crowd = 'is_crowd' group_of = 'group_of' + track_ids = 'track_ids' + temporal_offsets = 'temporal_offsets' + track_match_flags = 'track_match_flags' class PredictionFields(object): diff --git a/research/object_detection/core/target_assigner.py b/research/object_detection/core/target_assigner.py index 4a450c5c076f7197d097211af95a361388a4d717..e491bfcfb5978e9db8e333e21461f4cd5dafd508 100644 --- a/research/object_detection/core/target_assigner.py +++ b/research/object_detection/core/target_assigner.py @@ -51,6 +51,7 @@ from object_detection.core import matcher as mat from object_detection.core import region_similarity_calculator as sim_calc from object_detection.core import standard_fields as fields from object_detection.matchers import argmax_matcher +from object_detection.matchers import hungarian_matcher from object_detection.utils import shape_utils from object_detection.utils import target_assigner_utils as ta_utils from object_detection.utils import tf_version @@ -510,7 +511,8 @@ def batch_assign(target_assigner, anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch): (cls_targets, cls_weights, reg_targets, reg_weights, match) = target_assigner.assign( - anchors, gt_boxes, gt_class_targets, unmatched_class_label, gt_weights) + anchors, gt_boxes, gt_class_targets, unmatched_class_label, + gt_weights) cls_targets_list.append(cls_targets) cls_weights_list.append(cls_weights) reg_targets_list.append(reg_targets) @@ -810,7 +812,20 @@ def get_batch_predictions_from_indices(batch_predictions, indices): values: A tensor of shape [num_instances, channels] holding the predicted values at the given indices. """ - return tf.gather_nd(batch_predictions, indices) + # Note, gather_nd (and its gradient scatter_nd) runs significantly slower (on + # TPU) than gather with flattened inputs, so reshape the tensor, flatten the + # indices, and run gather. + shape = shape_utils.combined_static_and_dynamic_shape(batch_predictions) + + # [B, H, W, C] -> [H*W, W, 1] or [B, H, W, N, C] -> [H*W*N, W*N, N, 1] + rev_cum_interior_indices = tf.reverse(tf.math.cumprod(shape[-2:0:-1]), [0]) + rev_cum_interior_indices = tf.concat([rev_cum_interior_indices, [1]], axis=0) + + # Compute flattened indices and gather. + flattened_inds = tf.linalg.matmul( + indices, rev_cum_interior_indices[:, tf.newaxis])[:, 0] + batch_predictions_2d = tf.reshape(batch_predictions, [-1, shape[-1]]) + return tf.gather(batch_predictions_2d, flattened_inds, axis=0) def _compute_std_dev_from_box_size(boxes_height, boxes_width, min_overlap): @@ -835,20 +850,111 @@ def _compute_std_dev_from_box_size(boxes_height, boxes_width, min_overlap): return sigma +def _preprocess_keypoints_and_weights(out_height, out_width, keypoints, + class_onehot, class_weights, + keypoint_weights, class_id, + keypoint_indices): + """Preprocesses the keypoints and the corresponding keypoint weights. + + This function performs several common steps to preprocess the keypoints and + keypoint weights features, including: + 1) Select the subset of keypoints based on the keypoint indices, fill the + keypoint NaN values with zeros and convert to absolute coordinates. + 2) Generate the weights of the keypoint using the following information: + a. The class of the instance. + b. The NaN value of the keypoint coordinates. + c. The provided keypoint weights. + + Args: + out_height: An integer or an integer tensor indicating the output height + of the model. + out_width: An integer or an integer tensor indicating the output width of + the model. + keypoints: A float tensor of shape [num_instances, num_total_keypoints, 2] + representing the original keypoint grountruth coordinates. + class_onehot: A float tensor of shape [num_instances, num_classes] + containing the class targets with the 0th index assumed to map to the + first non-background class. + class_weights: A float tensor of shape [num_instances] containing weights + for groundtruth instances. + keypoint_weights: A float tensor of shape + [num_instances, num_total_keypoints] representing the weights of each + keypoints. + class_id: int, the ID of the class (0-indexed) that contains the target + keypoints to consider in this task. + keypoint_indices: A list of integers representing the indices of the + keypoints to be considered in this task. This is used to retrieve the + subset of the keypoints that should be considered in this task. + + Returns: + A tuple of two tensors: + keypoint_absolute: A float tensor of shape + [num_instances, num_keypoints, 2] which is the selected and updated + keypoint coordinates. + keypoint_weights: A float tensor of shape [num_instances, num_keypoints] + representing the updated weight of each keypoint. + """ + # Select the targets keypoints by their type ids and generate the mask + # of valid elements. + valid_mask, keypoints = ta_utils.get_valid_keypoint_mask_for_class( + keypoint_coordinates=keypoints, + class_id=class_id, + class_onehot=class_onehot, + class_weights=class_weights, + keypoint_indices=keypoint_indices) + # Keypoint coordinates in absolute coordinate system. + # The shape of the tensors: [num_instances, num_keypoints, 2]. + keypoints_absolute = keypoint_ops.to_absolute_coordinates( + keypoints, out_height, out_width) + # Assign default weights for the keypoints. + if keypoint_weights is None: + keypoint_weights = tf.ones_like(keypoints[:, :, 0]) + else: + keypoint_weights = tf.gather( + keypoint_weights, indices=keypoint_indices, axis=1) + keypoint_weights = keypoint_weights * valid_mask + return keypoints_absolute, keypoint_weights + + class CenterNetCenterHeatmapTargetAssigner(object): """Wrapper to compute the object center heatmap.""" - def __init__(self, stride, min_overlap=0.7): + def __init__(self, + stride, + min_overlap=0.7, + compute_heatmap_sparse=False, + keypoint_class_id=None, + keypoint_indices=None, + keypoint_weights_for_center=None): """Initializes the target assigner. Args: stride: int, the stride of the network in output pixels. min_overlap: The minimum IOU overlap that boxes need to have to not be penalized. + compute_heatmap_sparse: bool, indicating whether or not to use the sparse + version of the Op that computes the heatmap. The sparse version scales + better with number of classes, but in some cases is known to cause + OOM error. See (b/170989061). + keypoint_class_id: int, the ID of the class (0-indexed) that contains the + target keypoints to consider in this task. + keypoint_indices: A list of integers representing the indices of the + keypoints to be considered in this task. This is used to retrieve the + subset of the keypoints from gt_keypoints that should be considered in + this task. + keypoint_weights_for_center: The keypoint weights used for calculating the + location of object center. The number of weights need to be the same as + the number of keypoints. The object center is calculated by the weighted + mean of the keypoint locations. If not provided, the object center is + determined by the center of the bounding box (default behavior). """ self._stride = stride self._min_overlap = min_overlap + self._compute_heatmap_sparse = compute_heatmap_sparse + self._keypoint_class_id = keypoint_class_id + self._keypoint_indices = keypoint_indices + self._keypoint_weights_for_center = keypoint_weights_for_center def assign_center_targets_from_boxes(self, height, @@ -879,8 +985,8 @@ class CenterNetCenterHeatmapTargetAssigner(object): the stride specified during initialization. """ - out_height = tf.cast(height // self._stride, tf.float32) - out_width = tf.cast(width // self._stride, tf.float32) + out_height = tf.cast(tf.maximum(height // self._stride, 1), tf.float32) + out_width = tf.cast(tf.maximum(width // self._stride, 1), tf.float32) # Compute the yx-grid to be used to generate the heatmap. Each returned # tensor has shape of [out_height, out_width] (y_grid, x_grid) = ta_utils.image_shape_to_grids(out_height, out_width) @@ -893,9 +999,10 @@ class CenterNetCenterHeatmapTargetAssigner(object): gt_weights_list): boxes = box_list.BoxList(boxes) # Convert the box coordinates to absolute output image dimension space. - boxes = box_list_ops.to_absolute_coordinates(boxes, - height // self._stride, - width // self._stride) + boxes = box_list_ops.to_absolute_coordinates( + boxes, + tf.maximum(height // self._stride, 1), + tf.maximum(width // self._stride, 1)) # Get the box center coordinates. Each returned tensors have the shape of # [num_instances] (y_center, x_center, boxes_height, @@ -913,7 +1020,147 @@ class CenterNetCenterHeatmapTargetAssigner(object): x_coordinates=x_center, sigma=sigma, channel_onehot=class_targets, - channel_weights=weights) + channel_weights=weights, + sparse=self._compute_heatmap_sparse) + heatmaps.append(heatmap) + + # Return the stacked heatmaps over the batch. + return tf.stack(heatmaps, axis=0) + + def assign_center_targets_from_keypoints(self, + height, + width, + gt_classes_list, + gt_keypoints_list, + gt_weights_list=None, + gt_keypoints_weights_list=None): + """Computes the object center heatmap target using keypoint locations. + + Args: + height: int, height of input to the model. This is used to + determine the height of the output. + width: int, width of the input to the model. This is used to + determine the width of the output. + gt_classes_list: A list of float tensors with shape [num_boxes, + num_classes] representing the one-hot encoded class labels for each box + in the gt_boxes_list. + gt_keypoints_list: A list of float tensors with shape [num_boxes, 4] + representing the groundtruth detection bounding boxes for each sample in + the batch. The box coordinates are expected in normalized coordinates. + gt_weights_list: A list of float tensors with shape [num_boxes] + representing the weight of each groundtruth detection box. + gt_keypoints_weights_list: [Optional] a list of 3D tf.float32 tensors of + shape [num_instances, num_total_keypoints] representing the weights of + each keypoints. If not provided, then all not NaN keypoints will be + equally weighted. + + Returns: + heatmap: A Tensor of size [batch_size, output_height, output_width, + num_classes] representing the per class center heatmap. output_height + and output_width are computed by dividing the input height and width by + the stride specified during initialization. + """ + assert (self._keypoint_weights_for_center is not None and + self._keypoint_class_id is not None and + self._keypoint_indices is not None) + out_height = tf.cast(tf.maximum(height // self._stride, 1), tf.float32) + out_width = tf.cast(tf.maximum(width // self._stride, 1), tf.float32) + # Compute the yx-grid to be used to generate the heatmap. Each returned + # tensor has shape of [out_height, out_width] + (y_grid, x_grid) = ta_utils.image_shape_to_grids(out_height, out_width) + + heatmaps = [] + if gt_weights_list is None: + gt_weights_list = [None] * len(gt_classes_list) + if gt_keypoints_weights_list is None: + gt_keypoints_weights_list = [None] * len(gt_keypoints_list) + + for keypoints, classes, kp_weights, weights in zip( + gt_keypoints_list, gt_classes_list, gt_keypoints_weights_list, + gt_weights_list): + + keypoints_absolute, kp_weights = _preprocess_keypoints_and_weights( + out_height=out_height, + out_width=out_width, + keypoints=keypoints, + class_onehot=classes, + class_weights=weights, + keypoint_weights=kp_weights, + class_id=self._keypoint_class_id, + keypoint_indices=self._keypoint_indices) + # _, num_keypoints, _ = ( + # shape_utils.combined_static_and_dynamic_shape(keypoints_absolute)) + + # Update the keypoint weights by the specified keypoints weights. + kp_loc_weights = tf.constant( + self._keypoint_weights_for_center, dtype=tf.float32) + updated_kp_weights = kp_weights * kp_loc_weights[tf.newaxis, :] + + # Obtain the sum of the weights for each instance. + # instance_weight_sum has shape: [num_instance]. + instance_weight_sum = tf.reduce_sum(updated_kp_weights, axis=1) + + # Weight the keypoint coordinates by updated_kp_weights. + # weighted_keypoints has shape: [num_instance, num_keypoints, 2] + weighted_keypoints = keypoints_absolute * tf.expand_dims( + updated_kp_weights, axis=2) + + # Compute the mean of the keypoint coordinates over the weighted + # keypoints. + # keypoint_mean has shape: [num_instance, 2] + keypoint_mean = tf.math.divide( + tf.reduce_sum(weighted_keypoints, axis=1), + tf.expand_dims(instance_weight_sum, axis=-1)) + + # Replace the NaN values (due to divided by zeros in the above operation) + # by 0.0 where the sum of instance weight is zero. + # keypoint_mean has shape: [num_instance, 2] + keypoint_mean = tf.where( + tf.stack([instance_weight_sum, instance_weight_sum], axis=1) > 0.0, + keypoint_mean, tf.zeros_like(keypoint_mean)) + + # Compute the distance from each keypoint to the mean location using + # broadcasting and weighted by updated_kp_weights. + # keypoint_dist has shape: [num_instance, num_keypoints] + keypoint_mean = tf.expand_dims(keypoint_mean, axis=1) + keypoint_dist = tf.math.sqrt( + tf.reduce_sum( + tf.math.square(keypoints_absolute - keypoint_mean), axis=2)) + keypoint_dist = keypoint_dist * updated_kp_weights + + # Compute the average of the distances from each keypoint to the mean + # location and update the average value by zero when the instance weight + # is zero. + # avg_radius has shape: [num_instance] + avg_radius = tf.math.divide( + tf.reduce_sum(keypoint_dist, axis=1), instance_weight_sum) + avg_radius = tf.where( + instance_weight_sum > 0.0, avg_radius, tf.zeros_like(avg_radius)) + + # Update the class instance weight. If the instance doesn't contain enough + # valid keypoint values (i.e. instance_weight_sum == 0.0), then set the + # instance weight to zero. + # updated_class_weights has shape: [num_instance] + updated_class_weights = tf.where( + instance_weight_sum > 0.0, weights, tf.zeros_like(weights)) + + # Compute the sigma from average distance. We use 2 * average distance to + # to approximate the width/height of the bounding box. + # sigma has shape: [num_instances]. + sigma = _compute_std_dev_from_box_size(2 * avg_radius, 2 * avg_radius, + self._min_overlap) + + # Apply the Gaussian kernel to the center coordinates. Returned heatmap + # has shape of [out_height, out_width, num_classes] + heatmap = ta_utils.coordinates_to_heatmap( + y_grid=y_grid, + x_grid=x_grid, + y_coordinates=keypoint_mean[:, 0, 0], + x_coordinates=keypoint_mean[:, 0, 1], + sigma=sigma, + channel_onehot=classes, + channel_weights=updated_class_weights, + sparse=self._compute_heatmap_sparse) heatmaps.append(heatmap) # Return the stacked heatmaps over the batch. @@ -984,9 +1231,10 @@ class CenterNetBoxTargetAssigner(object): for i, (boxes, weights) in enumerate(zip(gt_boxes_list, gt_weights_list)): boxes = box_list.BoxList(boxes) - boxes = box_list_ops.to_absolute_coordinates(boxes, - height // self._stride, - width // self._stride) + boxes = box_list_ops.to_absolute_coordinates( + boxes, + tf.maximum(height // self._stride, 1), + tf.maximum(width // self._stride, 1)) # Get the box center coordinates. Each returned tensors have the shape of # [num_boxes] (y_center, x_center, boxes_height, @@ -1071,7 +1319,9 @@ class CenterNetKeypointTargetAssigner(object): keypoint_indices, keypoint_std_dev=None, per_keypoint_offset=False, - peak_radius=0): + peak_radius=0, + compute_heatmap_sparse=False, + per_keypoint_depth=False): """Initializes a CenterNet keypoints target assigner. Args: @@ -1098,13 +1348,22 @@ class CenterNetKeypointTargetAssigner(object): out_width, 2 * num_keypoints]. peak_radius: int, the radius (in the unit of output pixel) around heatmap peak to assign the offset targets. + compute_heatmap_sparse: bool, indicating whether or not to use the sparse + version of the Op that computes the heatmap. The sparse version scales + better with number of keypoint types, but in some cases is known to + cause an OOM error. See (b/170989061). + per_keypoint_depth: A bool indicates whether the model predicts the depth + of each keypoints in independent channels. Similar to + per_keypoint_offset but for the keypoint depth. """ self._stride = stride self._class_id = class_id self._keypoint_indices = keypoint_indices self._per_keypoint_offset = per_keypoint_offset + self._per_keypoint_depth = per_keypoint_depth self._peak_radius = peak_radius + self._compute_heatmap_sparse = compute_heatmap_sparse if keypoint_std_dev is None: self._keypoint_std_dev = ([_DEFAULT_KEYPOINT_OFFSET_STD_DEV] * len(keypoint_indices)) @@ -1112,65 +1371,6 @@ class CenterNetKeypointTargetAssigner(object): assert len(keypoint_indices) == len(keypoint_std_dev) self._keypoint_std_dev = keypoint_std_dev - def _preprocess_keypoints_and_weights(self, out_height, out_width, keypoints, - class_onehot, class_weights, - keypoint_weights): - """Preprocesses the keypoints and the corresponding keypoint weights. - - This function performs several common steps to preprocess the keypoints and - keypoint weights features, including: - 1) Select the subset of keypoints based on the keypoint indices, fill the - keypoint NaN values with zeros and convert to absoluate coordinates. - 2) Generate the weights of the keypoint using the following information: - a. The class of the instance. - b. The NaN value of the keypoint coordinates. - c. The provided keypoint weights. - - Args: - out_height: An integer or an interger tensor indicating the output height - of the model. - out_width: An integer or an interger tensor indicating the output width of - the model. - keypoints: A float tensor of shape [num_instances, num_total_keypoints, 2] - representing the original keypoint grountruth coordinates. - class_onehot: A float tensor of shape [num_instances, num_classes] - containing the class targets with the 0th index assumed to map to the - first non-background class. - class_weights: A float tensor of shape [num_instances] containing weights - for groundtruth instances. - keypoint_weights: A float tensor of shape - [num_instances, num_total_keypoints] representing the weights of each - keypoints. - - Returns: - A tuple of two tensors: - keypoint_absolute: A float tensor of shape - [num_instances, num_keypoints, 2] which is the selected and updated - keypoint coordinates. - keypoint_weights: A float tensor of shape [num_instances, num_keypoints] - representing the updated weight of each keypoint. - """ - # Select the targets keypoints by their type ids and generate the mask - # of valid elements. - valid_mask, keypoints = ta_utils.get_valid_keypoint_mask_for_class( - keypoint_coordinates=keypoints, - class_id=self._class_id, - class_onehot=class_onehot, - class_weights=class_weights, - keypoint_indices=self._keypoint_indices) - # Keypoint coordinates in absolute coordinate system. - # The shape of the tensors: [num_instances, num_keypoints, 2]. - keypoints_absolute = keypoint_ops.to_absolute_coordinates( - keypoints, out_height, out_width) - # Assign default weights for the keypoints. - if keypoint_weights is None: - keypoint_weights = tf.ones_like(keypoints[:, :, 0]) - else: - keypoint_weights = tf.gather( - keypoint_weights, indices=self._keypoint_indices, axis=1) - keypoint_weights = keypoint_weights * valid_mask - return keypoints_absolute, keypoint_weights - def assign_keypoint_heatmap_targets(self, height, width, @@ -1212,8 +1412,8 @@ class CenterNetKeypointTargetAssigner(object): output_width] where all values within the regions of the blackout boxes are 0.0 and 1.0 else where. """ - out_width = tf.cast(width // self._stride, tf.float32) - out_height = tf.cast(height // self._stride, tf.float32) + out_width = tf.cast(tf.maximum(width // self._stride, 1), tf.float32) + out_height = tf.cast(tf.maximum(height // self._stride, 1), tf.float32) # Compute the yx-grid to be used to generate the heatmap. Each returned # tensor has shape of [out_height, out_width] y_grid, x_grid = ta_utils.image_shape_to_grids(out_height, out_width) @@ -1231,13 +1431,15 @@ class CenterNetKeypointTargetAssigner(object): for keypoints, classes, kp_weights, weights, boxes in zip( gt_keypoints_list, gt_classes_list, gt_keypoints_weights_list, gt_weights_list, gt_boxes_list): - keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights( + keypoints_absolute, kp_weights = _preprocess_keypoints_and_weights( out_height=out_height, out_width=out_width, keypoints=keypoints, class_onehot=classes, class_weights=weights, - keypoint_weights=kp_weights) + keypoint_weights=kp_weights, + class_id=self._class_id, + keypoint_indices=self._keypoint_indices) num_instances, num_keypoints, _ = ( shape_utils.combined_static_and_dynamic_shape(keypoints_absolute)) @@ -1264,9 +1466,10 @@ class CenterNetKeypointTargetAssigner(object): if boxes is not None: boxes = box_list.BoxList(boxes) # Convert the box coordinates to absolute output image dimension space. - boxes = box_list_ops.to_absolute_coordinates(boxes, - height // self._stride, - width // self._stride) + boxes = box_list_ops.to_absolute_coordinates( + boxes, + tf.maximum(height // self._stride, 1), + tf.maximum(width // self._stride, 1)) # Get the box height and width. Each returned tensors have the shape # of [num_instances] (_, _, boxes_height, @@ -1385,13 +1588,15 @@ class CenterNetKeypointTargetAssigner(object): for i, (keypoints, classes, kp_weights, weights) in enumerate( zip(gt_keypoints_list, gt_classes_list, gt_keypoints_weights_list, gt_weights_list)): - keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights( - out_height=height // self._stride, - out_width=width // self._stride, + keypoints_absolute, kp_weights = _preprocess_keypoints_and_weights( + out_height=tf.maximum(height // self._stride, 1), + out_width=tf.maximum(width // self._stride, 1), keypoints=keypoints, class_onehot=classes, class_weights=weights, - keypoint_weights=kp_weights) + keypoint_weights=kp_weights, + class_id=self._class_id, + keypoint_indices=self._keypoint_indices) num_instances, num_keypoints, _ = ( shape_utils.combined_static_and_dynamic_shape(keypoints_absolute)) @@ -1402,10 +1607,11 @@ class CenterNetKeypointTargetAssigner(object): # All keypoint coordinates and their neighbors: # [num_instance * num_keypoints, num_neighbors] (y_source_neighbors, x_source_neighbors, - valid_sources) = ta_utils.get_surrounding_grids(height // self._stride, - width // self._stride, - y_source, x_source, - self._peak_radius) + valid_sources) = ta_utils.get_surrounding_grids( + tf.cast(tf.maximum(height // self._stride, 1), tf.float32), + tf.cast(tf.maximum(width // self._stride, 1), tf.float32), + y_source, x_source, + self._peak_radius) _, num_neighbors = shape_utils.combined_static_and_dynamic_shape( y_source_neighbors) @@ -1454,6 +1660,179 @@ class CenterNetKeypointTargetAssigner(object): batch_offsets = tf.concat(batch_offsets, axis=0) return (batch_indices, batch_offsets, batch_weights) + def assign_keypoints_depth_targets(self, + height, + width, + gt_keypoints_list, + gt_classes_list, + gt_keypoint_depths_list, + gt_keypoint_depth_weights_list, + gt_keypoints_weights_list=None, + gt_weights_list=None): + """Returns the target depths of the keypoints. + + The returned values are the relative depth information of each keypoints. + + Args: + height: int, height of input to the CenterNet model. This is used to + determine the height of the output. + width: int, width of the input to the CenterNet model. This is used to + determine the width of the output. + gt_keypoints_list: A list of tensors with shape [num_instances, + num_total_keypoints, 2]. See class-level description for more detail. + gt_classes_list: A list of tensors with shape [num_instances, + num_classes]. See class-level description for more detail. + gt_keypoint_depths_list: A list of tensors with shape [num_instances, + num_total_keypoints] corresponding to the relative depth of the + keypoints. + gt_keypoint_depth_weights_list: A list of tensors with shape + [num_instances, num_total_keypoints] corresponding to the weights of + the relative depth. + gt_keypoints_weights_list: A list of tensors with shape [num_instances, + num_total_keypoints] corresponding to the weight of each keypoint. + gt_weights_list: A list of float tensors with shape [num_instances]. See + class-level description for more detail. + + Returns: + batch_indices: an integer tensor of shape [num_total_instances, 3] (or + [num_total_instances, 4] if 'per_keypoint_depth' is set True) holding + the indices inside the predicted tensor which should be penalized. The + first column indicates the index along the batch dimension and the + second and third columns indicate the index along the y and x + dimensions respectively. The fourth column corresponds to the channel + dimension (if 'per_keypoint_offset' is set True). + batch_depths: a float tensor of shape [num_total_instances, 1] (or + [num_total_instances, num_keypoints] if per_keypoint_depth is set True) + indicating the target depth of each keypoint. + batch_weights: a float tensor of shape [num_total_instances] indicating + the weight of each prediction. + Note that num_total_instances = batch_size * num_instances * + num_keypoints * num_neighbors + """ + + batch_indices = [] + batch_weights = [] + batch_depths = [] + + if gt_keypoints_weights_list is None: + gt_keypoints_weights_list = [None] * len(gt_keypoints_list) + if gt_weights_list is None: + gt_weights_list = [None] * len(gt_classes_list) + if gt_keypoint_depths_list is None: + gt_keypoint_depths_list = [None] * len(gt_classes_list) + for i, (keypoints, classes, kp_weights, weights, + keypoint_depths, keypoint_depth_weights) in enumerate( + zip(gt_keypoints_list, gt_classes_list, + gt_keypoints_weights_list, gt_weights_list, + gt_keypoint_depths_list, gt_keypoint_depth_weights_list)): + keypoints_absolute, kp_weights = _preprocess_keypoints_and_weights( + out_height=tf.maximum(height // self._stride, 1), + out_width=tf.maximum(width // self._stride, 1), + keypoints=keypoints, + class_onehot=classes, + class_weights=weights, + keypoint_weights=kp_weights, + class_id=self._class_id, + keypoint_indices=self._keypoint_indices) + num_instances, num_keypoints, _ = ( + shape_utils.combined_static_and_dynamic_shape(keypoints_absolute)) + + # [num_instances * num_keypoints] + y_source = tf.keras.backend.flatten(keypoints_absolute[:, :, 0]) + x_source = tf.keras.backend.flatten(keypoints_absolute[:, :, 1]) + + # All keypoint coordinates and their neighbors: + # [num_instance * num_keypoints, num_neighbors] + (y_source_neighbors, x_source_neighbors, + valid_sources) = ta_utils.get_surrounding_grids( + tf.cast(tf.maximum(height // self._stride, 1), tf.float32), + tf.cast(tf.maximum(width // self._stride, 1), tf.float32), + y_source, x_source, + self._peak_radius) + _, num_neighbors = shape_utils.combined_static_and_dynamic_shape( + y_source_neighbors) + + # Update the valid keypoint weights. + # [num_instance * num_keypoints, num_neighbors] + valid_keypoints = tf.cast( + valid_sources, dtype=tf.float32) * tf.stack( + [tf.keras.backend.flatten(kp_weights)] * num_neighbors, axis=-1) + + # Compute the offsets and indices of the box centers. Shape: + # indices: [num_instances * num_keypoints, num_neighbors, 2] + _, indices = ta_utils.compute_floor_offsets_with_indices( + y_source=y_source_neighbors, + x_source=x_source_neighbors, + y_target=y_source, + x_target=x_source) + # Reshape to: + # indices: [num_instances * num_keypoints * num_neighbors, 2] + indices = tf.reshape(indices, [-1, 2]) + + # Gather the keypoint depth from corresponding keypoint indices: + # [num_instances, num_keypoints] + keypoint_depths = tf.gather( + keypoint_depths, self._keypoint_indices, axis=1) + # Tile the depth target to surrounding pixels. + # [num_instances, num_keypoints, num_neighbors] + tiled_keypoint_depths = tf.tile( + tf.expand_dims(keypoint_depths, axis=-1), + multiples=[1, 1, num_neighbors]) + + # [num_instances, num_keypoints] + keypoint_depth_weights = tf.gather( + keypoint_depth_weights, self._keypoint_indices, axis=1) + # [num_instances, num_keypoints, num_neighbors] + keypoint_depth_weights = tf.tile( + tf.expand_dims(keypoint_depth_weights, axis=-1), + multiples=[1, 1, num_neighbors]) + # Update the weights of keypoint depth by the weights of the keypoints. + # A keypoint depth target is valid only if its corresponding keypoint + # target is also valid. + # [num_instances, num_keypoints, num_neighbors] + tiled_depth_weights = ( + tf.reshape(valid_keypoints, + [num_instances, num_keypoints, num_neighbors]) * + keypoint_depth_weights) + invalid_depths = tf.logical_or( + tf.math.is_nan(tiled_depth_weights), + tf.math.is_nan(tiled_keypoint_depths)) + # Assign zero values and weights to NaN values. + final_keypoint_depths = tf.where(invalid_depths, + tf.zeros_like(tiled_keypoint_depths), + tiled_keypoint_depths) + final_keypoint_depth_weights = tf.where( + invalid_depths, + tf.zeros_like(tiled_depth_weights), + tiled_depth_weights) + # [num_instances * num_keypoints * num_neighbors, 1] + batch_depths.append(tf.reshape(final_keypoint_depths, [-1, 1])) + + # Prepare the batch indices to be prepended. + batch_index = tf.fill( + [num_instances * num_keypoints * num_neighbors, 1], i) + if self._per_keypoint_depth: + tiled_keypoint_types = self._get_keypoint_types( + num_instances, num_keypoints, num_neighbors) + batch_indices.append( + tf.concat([batch_index, indices, + tf.reshape(tiled_keypoint_types, [-1, 1])], axis=1)) + else: + batch_indices.append(tf.concat([batch_index, indices], axis=1)) + batch_weights.append( + tf.keras.backend.flatten(final_keypoint_depth_weights)) + + # Concatenate the tensors in the batch in the first dimension: + # shape: [batch_size * num_instances * num_keypoints * num_neighbors, 3] or + # [batch_size * num_instances * num_keypoints * num_neighbors, 4] if + # 'per_keypoint_offset' is set to True. + batch_indices = tf.concat(batch_indices, axis=0) + # shape: [batch_size * num_instances * num_keypoints * num_neighbors] + batch_weights = tf.concat(batch_weights, axis=0) + # shape: [batch_size * num_instances * num_keypoints * num_neighbors, 1] + batch_depths = tf.concat(batch_depths, axis=0) + return (batch_indices, batch_depths, batch_weights) + def assign_joint_regression_targets(self, height, width, @@ -1519,13 +1898,15 @@ class CenterNetKeypointTargetAssigner(object): for i, (keypoints, classes, boxes, kp_weights, weights) in enumerate( zip(gt_keypoints_list, gt_classes_list, gt_boxes_list, gt_keypoints_weights_list, gt_weights_list)): - keypoints_absolute, kp_weights = self._preprocess_keypoints_and_weights( - out_height=height // self._stride, - out_width=width // self._stride, + keypoints_absolute, kp_weights = _preprocess_keypoints_and_weights( + out_height=tf.maximum(height // self._stride, 1), + out_width=tf.maximum(width // self._stride, 1), keypoints=keypoints, class_onehot=classes, class_weights=weights, - keypoint_weights=kp_weights) + keypoint_weights=kp_weights, + class_id=self._class_id, + keypoint_indices=self._keypoint_indices) num_instances, num_keypoints, _ = ( shape_utils.combined_static_and_dynamic_shape(keypoints_absolute)) @@ -1533,9 +1914,10 @@ class CenterNetKeypointTargetAssigner(object): if boxes is not None: # Compute joint center from boxes. boxes = box_list.BoxList(boxes) - boxes = box_list_ops.to_absolute_coordinates(boxes, - height // self._stride, - width // self._stride) + boxes = box_list_ops.to_absolute_coordinates( + boxes, + tf.maximum(height // self._stride, 1), + tf.maximum(width // self._stride, 1)) y_center, x_center, _, _ = boxes.get_center_coordinates_and_sizes() else: # TODO(yuhuic): Add the logic to generate object centers from keypoints. @@ -1554,7 +1936,8 @@ class CenterNetKeypointTargetAssigner(object): # [num_instance * num_keypoints, num_neighbors] (y_source_neighbors, x_source_neighbors, valid_sources) = ta_utils.get_surrounding_grids( - height // self._stride, width // self._stride, + tf.cast(tf.maximum(height // self._stride, 1), tf.float32), + tf.cast(tf.maximum(width // self._stride, 1), tf.float32), tf.keras.backend.flatten(y_center_tiled), tf.keras.backend.flatten(x_center_tiled), self._peak_radius) @@ -1600,6 +1983,17 @@ class CenterNetKeypointTargetAssigner(object): return (batch_indices, batch_offsets, batch_weights) +def _resize_masks(masks, height, width, method): + # Resize segmentation masks to conform to output dimensions. Use TF2 + # image resize because TF1's version is buggy: + # https://yaqs.corp.google.com/eng/q/4970450458378240 + masks = tf2.image.resize( + masks[:, :, :, tf.newaxis], + size=(height, width), + method=method) + return masks[:, :, :, 0] + + class CenterNetMaskTargetAssigner(object): """Wrapper to compute targets for segmentation masks.""" @@ -1636,22 +2030,20 @@ class CenterNetMaskTargetAssigner(object): _, input_height, input_width = ( shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0])) - output_height = input_height // self._stride - output_width = input_width // self._stride + output_height = tf.maximum(input_height // self._stride, 1) + output_width = tf.maximum(input_width // self._stride, 1) segmentation_targets_list = [] for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list): - # Resize segmentation masks to conform to output dimensions. Use TF2 - # image resize because TF1's version is buggy: - # https://yaqs.corp.google.com/eng/q/4970450458378240 - gt_masks = tf2.image.resize( - gt_masks[:, :, :, tf.newaxis], - size=(output_height, output_width), - method=mask_resize_method) + gt_masks = _resize_masks(gt_masks, output_height, output_width, + mask_resize_method) + gt_masks = gt_masks[:, :, :, tf.newaxis] gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes]) # Shape: [h, w, num_classes]. segmentations_for_image = tf.reduce_max( gt_masks * gt_classes_reshaped, axis=0) + # Avoid the case where max of an empty array is -inf. + segmentations_for_image = tf.maximum(segmentations_for_image, 0.0) segmentation_targets_list.append(segmentations_for_image) segmentation_target = tf.stack(segmentation_targets_list, axis=0) @@ -1729,7 +2121,9 @@ class CenterNetDensePoseTargetAssigner(object): part_ids_one_hot = tf.one_hot(part_ids_flattened, depth=self._num_parts) # Get DensePose coordinates in the output space. surface_coords_abs = densepose_ops.to_absolute_coordinates( - surface_coords, height // self._stride, width // self._stride) + surface_coords, + tf.maximum(height // self._stride, 1), + tf.maximum(width // self._stride, 1)) surface_coords_abs = tf.reshape(surface_coords_abs, [-1, 4]) # Each tensor has shape [num_boxes * max_sampled_points]. yabs, xabs, v, u = tf.unstack(surface_coords_abs, axis=-1) @@ -1771,3 +2165,495 @@ class CenterNetDensePoseTargetAssigner(object): batch_surface_coords = tf.concat(batch_surface_coords, axis=0) batch_weights = tf.concat(batch_weights, axis=0) return batch_indices, batch_part_ids, batch_surface_coords, batch_weights + + +class CenterNetTrackTargetAssigner(object): + """Wrapper to compute targets for tracking task. + + Reference paper: A Simple Baseline for Multi-Object Tracking [1] + [1]: https://arxiv.org/abs/2004.01888 + """ + + def __init__(self, stride, num_track_ids): + self._stride = stride + self._num_track_ids = num_track_ids + + def assign_track_targets(self, + height, + width, + gt_track_ids_list, + gt_boxes_list, + gt_weights_list=None): + """Computes the track ID targets. + + Args: + height: int, height of input to the model. This is used to determine the + height of the output. + width: int, width of the input to the model. This is used to determine the + width of the output. + gt_track_ids_list: A list of 1-D tensors with shape [num_boxes] + corresponding to the track ID of each groundtruth detection box. + gt_boxes_list: A list of float tensors with shape [num_boxes, 4] + representing the groundtruth detection bounding boxes for each sample in + the batch. The coordinates are expected in normalized coordinates. + gt_weights_list: A list of 1-D tensors with shape [num_boxes] + corresponding to the weight of each groundtruth detection box. + + Returns: + batch_indices: an integer tensor of shape [batch_size, num_boxes, 3] + holding the indices inside the predicted tensor which should be + penalized. The first column indicates the index along the batch + dimension and the second and third columns indicate the index + along the y and x dimensions respectively. + batch_weights: a float tensor of shape [batch_size, num_boxes] indicating + the weight of each prediction. + track_id_targets: An int32 tensor of size [batch_size, num_boxes, + num_track_ids] containing the one-hot track ID vector of each + groundtruth detection box. + """ + track_id_targets = tf.one_hot( + gt_track_ids_list, depth=self._num_track_ids, axis=-1) + + if gt_weights_list is None: + gt_weights_list = [None] * len(gt_boxes_list) + + batch_indices = [] + batch_weights = [] + + for i, (boxes, weights) in enumerate(zip(gt_boxes_list, gt_weights_list)): + boxes = box_list.BoxList(boxes) + boxes = box_list_ops.to_absolute_coordinates( + boxes, + tf.maximum(height // self._stride, 1), + tf.maximum(width // self._stride, 1)) + # Get the box center coordinates. Each returned tensors have the shape of + # [num_boxes] + (y_center, x_center, _, _) = boxes.get_center_coordinates_and_sizes() + num_boxes = tf.shape(x_center) + + # Compute the indices of the box centers. Shape: + # indices: [num_boxes, 2] + (_, indices) = ta_utils.compute_floor_offsets_with_indices( + y_source=y_center, x_source=x_center) + + # Assign ones if weights are not provided. + if weights is None: + weights = tf.ones(num_boxes, dtype=tf.float32) + + # Shape of [num_boxes, 1] integer tensor filled with current batch index. + batch_index = i * tf.ones_like(indices[:, 0:1], dtype=tf.int32) + batch_indices.append(tf.concat([batch_index, indices], axis=1)) + batch_weights.append(weights) + + batch_indices = tf.stack(batch_indices, axis=0) + batch_weights = tf.stack(batch_weights, axis=0) + + return batch_indices, batch_weights, track_id_targets + + +def filter_mask_overlap_min_area(masks): + """If a pixel belongs to 2 instances, remove it from the larger instance.""" + + num_instances = tf.shape(masks)[0] + def _filter_min_area(): + """Helper function to filter non empty masks.""" + areas = tf.reduce_sum(masks, axis=[1, 2], keepdims=True) + per_pixel_area = masks * areas + # Make sure background is ignored in argmin. + per_pixel_area = (masks * per_pixel_area + + (1 - masks) * per_pixel_area.dtype.max) + min_index = tf.cast(tf.argmin(per_pixel_area, axis=0), tf.int32) + + filtered_masks = ( + tf.range(num_instances)[:, tf.newaxis, tf.newaxis] + == + min_index[tf.newaxis, :, :] + ) + + return tf.cast(filtered_masks, tf.float32) * masks + + return tf.cond(num_instances > 0, _filter_min_area, + lambda: masks) + + +def filter_mask_overlap(masks, method='min_area'): + + if method == 'min_area': + return filter_mask_overlap_min_area(masks) + else: + raise ValueError('Unknown mask overlap filter type - {}'.format(method)) + + +class CenterNetCornerOffsetTargetAssigner(object): + """Wrapper to compute corner offsets for boxes using masks.""" + + def __init__(self, stride, overlap_resolution='min_area'): + """Initializes the corner offset target assigner. + + Args: + stride: int, the stride of the network in output pixels. + overlap_resolution: string, specifies how we handle overlapping + instance masks. Currently only 'min_area' is supported which assigns + overlapping pixels to the instance with the minimum area. + """ + + self._stride = stride + self._overlap_resolution = overlap_resolution + + def assign_corner_offset_targets( + self, gt_boxes_list, gt_masks_list): + """Computes the corner offset targets and foreground map. + + For each pixel that is part of any object's foreground, this function + computes the relative offsets to the top-left and bottom-right corners of + that instance's bounding box. It also returns a foreground map to indicate + which pixels contain valid corner offsets. + + Args: + gt_boxes_list: A list of float tensors with shape [num_boxes, 4] + representing the groundtruth detection bounding boxes for each sample in + the batch. The coordinates are expected in normalized coordinates. + gt_masks_list: A list of float tensors with shape [num_boxes, + input_height, input_width] with values in {0, 1} representing instance + masks for each object. + + Returns: + corner_offsets: A float tensor of shape [batch_size, height, width, 4] + containing, in order, the (y, x) offsets to the top left corner and + the (y, x) offsets to the bottom right corner for each foregroung pixel + foreground: A float tensor of shape [batch_size, height, width] in which + each pixel is set to 1 if it is a part of any instance's foreground + (and thus contains valid corner offsets) and 0 otherwise. + + """ + _, input_height, input_width = ( + shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0])) + output_height = tf.maximum(input_height // self._stride, 1) + output_width = tf.maximum(input_width // self._stride, 1) + y_grid, x_grid = tf.meshgrid( + tf.range(output_height), tf.range(output_width), + indexing='ij') + y_grid, x_grid = tf.cast(y_grid, tf.float32), tf.cast(x_grid, tf.float32) + + corner_targets = [] + foreground_targets = [] + for gt_masks, gt_boxes in zip(gt_masks_list, gt_boxes_list): + gt_masks = _resize_masks(gt_masks, output_height, output_width, + method=ResizeMethod.NEAREST_NEIGHBOR) + gt_masks = filter_mask_overlap(gt_masks, self._overlap_resolution) + + output_height = tf.cast(output_height, tf.float32) + output_width = tf.cast(output_width, tf.float32) + ymin, xmin, ymax, xmax = tf.unstack(gt_boxes, axis=1) + ymin, ymax = ymin * output_height, ymax * output_height + xmin, xmax = xmin * output_width, xmax * output_width + + top_y = ymin[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis] + left_x = xmin[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis] + bottom_y = ymax[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis] + right_x = xmax[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis] + + foreground_target = tf.cast(tf.reduce_sum(gt_masks, axis=0) > 0.5, + tf.float32) + foreground_targets.append(foreground_target) + + corner_target = tf.stack([ + tf.reduce_sum(top_y * gt_masks, axis=0), + tf.reduce_sum(left_x * gt_masks, axis=0), + tf.reduce_sum(bottom_y * gt_masks, axis=0), + tf.reduce_sum(right_x * gt_masks, axis=0), + ], axis=2) + + corner_targets.append(corner_target) + + return (tf.stack(corner_targets, axis=0), + tf.stack(foreground_targets, axis=0)) + + +class CenterNetTemporalOffsetTargetAssigner(object): + """Wrapper to compute target tensors for the temporal offset task. + + This class has methods that take as input a batch of ground truth tensors + (in the form of a list) and returns the targets required to train the + temporal offset task. + """ + + def __init__(self, stride): + """Initializes the target assigner. + + Args: + stride: int, the stride of the network in output pixels. + """ + + self._stride = stride + + def assign_temporal_offset_targets(self, + height, + width, + gt_boxes_list, + gt_offsets_list, + gt_match_list, + gt_weights_list=None): + """Returns the temporal offset targets and their indices. + + For each ground truth box, this function assigns it the corresponding + temporal offset to train the model. + + Args: + height: int, height of input to the model. This is used to determine the + height of the output. + width: int, width of the input to the model. This is used to determine the + width of the output. + gt_boxes_list: A list of float tensors with shape [num_boxes, 4] + representing the groundtruth detection bounding boxes for each sample in + the batch. The coordinates are expected in normalized coordinates. + gt_offsets_list: A list of 2-D tf.float32 tensors of shape [num_boxes, 2] + containing the spatial offsets of objects' centers compared with the + previous frame. + gt_match_list: A list of 1-D tf.float32 tensors of shape [num_boxes] + containing flags that indicate if an object has existed in the + previous frame. + gt_weights_list: A list of tensors with shape [num_boxes] corresponding to + the weight of each groundtruth detection box. + + Returns: + batch_indices: an integer tensor of shape [num_boxes, 3] holding the + indices inside the predicted tensor which should be penalized. The + first column indicates the index along the batch dimension and the + second and third columns indicate the index along the y and x + dimensions respectively. + batch_temporal_offsets: a float tensor of shape [num_boxes, 2] of the + expected y and x temporal offset of each object center in the + output space. + batch_weights: a float tensor of shape [num_boxes] indicating the + weight of each prediction. + """ + + if gt_weights_list is None: + gt_weights_list = [None] * len(gt_boxes_list) + + batch_indices = [] + batch_weights = [] + batch_temporal_offsets = [] + + for i, (boxes, offsets, match_flags, weights) in enumerate(zip( + gt_boxes_list, gt_offsets_list, gt_match_list, gt_weights_list)): + boxes = box_list.BoxList(boxes) + boxes = box_list_ops.to_absolute_coordinates( + boxes, + tf.maximum(height // self._stride, 1), + tf.maximum(width // self._stride, 1)) + # Get the box center coordinates. Each returned tensors have the shape of + # [num_boxes] + (y_center, x_center, _, _) = boxes.get_center_coordinates_and_sizes() + num_boxes = tf.shape(x_center) + + # Compute the offsets and indices of the box centers. Shape: + # offsets: [num_boxes, 2] + # indices: [num_boxes, 2] + (_, indices) = ta_utils.compute_floor_offsets_with_indices( + y_source=y_center, x_source=x_center) + + # Assign ones if weights are not provided. + # if an object is not matched, its weight becomes zero. + if weights is None: + weights = tf.ones(num_boxes, dtype=tf.float32) + weights *= match_flags + + # Shape of [num_boxes, 1] integer tensor filled with current batch index. + batch_index = i * tf.ones_like(indices[:, 0:1], dtype=tf.int32) + batch_indices.append(tf.concat([batch_index, indices], axis=1)) + batch_weights.append(weights) + batch_temporal_offsets.append(offsets) + + batch_indices = tf.concat(batch_indices, axis=0) + batch_weights = tf.concat(batch_weights, axis=0) + batch_temporal_offsets = tf.concat(batch_temporal_offsets, axis=0) + return (batch_indices, batch_temporal_offsets, batch_weights) + + +class DETRTargetAssigner(object): + """Target assigner for DETR (https://arxiv.org/abs/2005.12872). + + Detection Transformer (DETR) matches predicted boxes to groundtruth directly + to determine targets instead of matching anchors to groundtruth. Hence, the + new target assigner. + """ + + def __init__(self): + """Construct Object Detection Target Assigner.""" + self._similarity_calc = sim_calc.DETRSimilarity() + self._matcher = hungarian_matcher.HungarianBipartiteMatcher() + + def batch_assign(self, + pred_box_batch, + gt_box_batch, + pred_class_batch, + gt_class_targets_batch, + gt_weights_batch=None, + unmatched_class_label_batch=None): + """Batched assignment of classification and regression targets. + + Args: + pred_box_batch: a tensor of shape [batch_size, num_queries, 4] + representing predicted bounding boxes. + gt_box_batch: a tensor of shape [batch_size, num_queries, 4] + representing groundtruth bounding boxes. + pred_class_batch: A list of tensors with length batch_size, where each + each tensor has shape [num_queries, num_classes] to be used + by certain similarity calculators. + gt_class_targets_batch: a list of tensors with length batch_size, where + each tensor has shape [num_gt_boxes_i, num_classes] and + num_gt_boxes_i is the number of boxes in the ith boxlist of + gt_box_batch. + gt_weights_batch: A list of 1-D tf.float32 tensors of shape + [num_boxes] containing weights for groundtruth boxes. + unmatched_class_label_batch: a float32 tensor with shape + [d_1, d_2, ..., d_k] which is consistent with the classification target + for each anchor (and can be empty for scalar targets). This shape must + thus be compatible with the `gt_class_targets_batch`. + + Returns: + batch_cls_targets: a tensor with shape [batch_size, num_pred_boxes, + num_classes], + batch_cls_weights: a tensor with shape [batch_size, num_pred_boxes, + num_classes], + batch_reg_targets: a tensor with shape [batch_size, num_pred_boxes, + box_code_dimension] + batch_reg_weights: a tensor with shape [batch_size, num_pred_boxes]. + """ + pred_box_batch = [ + box_list.BoxList(pred_box) + for pred_box in tf.unstack(pred_box_batch)] + gt_box_batch = [ + box_list.BoxList(gt_box) + for gt_box in tf.unstack(gt_box_batch)] + + cls_targets_list = [] + cls_weights_list = [] + reg_targets_list = [] + reg_weights_list = [] + if gt_weights_batch is None: + gt_weights_batch = [None] * len(gt_class_targets_batch) + if unmatched_class_label_batch is None: + unmatched_class_label_batch = [None] * len(gt_class_targets_batch) + pred_class_batch = tf.unstack(pred_class_batch) + for (pred_boxes, gt_boxes, pred_class_batch, gt_class_targets, gt_weights, + unmatched_class_label) in zip(pred_box_batch, gt_box_batch, + pred_class_batch, gt_class_targets_batch, + gt_weights_batch, + unmatched_class_label_batch): + (cls_targets, cls_weights, reg_targets, + reg_weights) = self.assign(pred_boxes, gt_boxes, pred_class_batch, + gt_class_targets, gt_weights, + unmatched_class_label) + cls_targets_list.append(cls_targets) + cls_weights_list.append(cls_weights) + reg_targets_list.append(reg_targets) + reg_weights_list.append(reg_weights) + batch_cls_targets = tf.stack(cls_targets_list) + batch_cls_weights = tf.stack(cls_weights_list) + batch_reg_targets = tf.stack(reg_targets_list) + batch_reg_weights = tf.stack(reg_weights_list) + return (batch_cls_targets, batch_cls_weights, batch_reg_targets, + batch_reg_weights) + + def assign(self, + pred_boxes, + gt_boxes, + pred_classes, + gt_labels, + gt_weights=None, + unmatched_class_label=None): + """Assign classification and regression targets to each box_pred. + + For a given set of pred_boxes and groundtruth detections, match pred_boxes + to gt_boxes and assign classification and regression targets to + each box_pred as well as weights based on the resulting match (specifying, + e.g., which pred_boxes should not contribute to training loss). + + pred_boxes that are not matched to anything are given a classification + target of `unmatched_cls_target`. + + Args: + pred_boxes: a BoxList representing N pred_boxes + gt_boxes: a BoxList representing M groundtruth boxes + pred_classes: A tensor with shape [max_num_boxes, num_classes] + to be used by certain similarity calculators. + gt_labels: a tensor of shape [M, num_classes] + with labels for each of the ground_truth boxes. The subshape + [num_classes] can be empty (corresponding to scalar inputs). When set + to None, gt_labels assumes a binary problem where all + ground_truth boxes get a positive label (of 1). + gt_weights: a float tensor of shape [M] indicating the weight to + assign to all pred_boxes match to a particular groundtruth box. The + weights must be in [0., 1.]. If None, all weights are set to 1. + Generally no groundtruth boxes with zero weight match to any pred_boxes + as matchers are aware of groundtruth weights. Additionally, + `cls_weights` and `reg_weights` are calculated using groundtruth + weights as an added safety. + unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k] + which is consistent with the classification target for each + anchor (and can be empty for scalar targets). This shape must thus be + compatible with the groundtruth labels that are passed to the "assign" + function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). + + Returns: + cls_targets: a float32 tensor with shape [num_pred_boxes, num_classes], + where the subshape [num_classes] is compatible with gt_labels + which has shape [num_gt_boxes, num_classes]. + cls_weights: a float32 tensor with shape [num_pred_boxes, num_classes], + representing weights for each element in cls_targets. + reg_targets: a float32 tensor with shape [num_pred_boxes, + box_code_dimension] + reg_weights: a float32 tensor with shape [num_pred_boxes] + + """ + if not unmatched_class_label: + unmatched_class_label = tf.constant( + [1] + [0] * (gt_labels.shape[1] - 1), tf.float32) + + if gt_weights is None: + num_gt_boxes = gt_boxes.num_boxes_static() + if not num_gt_boxes: + num_gt_boxes = gt_boxes.num_boxes() + gt_weights = tf.ones([num_gt_boxes], dtype=tf.float32) + + gt_boxes.add_field(fields.BoxListFields.classes, gt_labels) + pred_boxes.add_field(fields.BoxListFields.classes, pred_classes) + + match_quality_matrix = self._similarity_calc.compare( + gt_boxes, + pred_boxes) + match = self._matcher.match(match_quality_matrix, + valid_rows=tf.greater(gt_weights, 0)) + + matched_gt_boxes = match.gather_based_on_match( + gt_boxes.get(), + unmatched_value=tf.zeros(4), + ignored_value=tf.zeros(4)) + matched_gt_boxlist = box_list.BoxList(matched_gt_boxes) + ty, tx, th, tw = matched_gt_boxlist.get_center_coordinates_and_sizes() + reg_targets = tf.transpose(tf.stack([ty, tx, th, tw])) + cls_targets = match.gather_based_on_match( + gt_labels, + unmatched_value=unmatched_class_label, + ignored_value=unmatched_class_label) + reg_weights = match.gather_based_on_match( + gt_weights, + ignored_value=0., + unmatched_value=0.) + cls_weights = match.gather_based_on_match( + gt_weights, + ignored_value=0., + unmatched_value=1) + + # convert cls_weights from per-box_pred to per-class. + class_label_shape = tf.shape(cls_targets)[1:] + weights_multiple = tf.concat( + [tf.constant([1]), class_label_shape], + axis=0) + cls_weights = tf.expand_dims(cls_weights, -1) + cls_weights = tf.tile(cls_weights, weights_multiple) + + return (cls_targets, cls_weights, reg_targets, reg_weights) diff --git a/research/object_detection/core/target_assigner_test.py b/research/object_detection/core/target_assigner_test.py index 4dec1eb778c901a2d6ec5ed108db58d8b399a1cd..ad0eaa82006e4f97987fe14fae9a1e0473f83cc6 100644 --- a/research/object_detection/core/target_assigner_test.py +++ b/research/object_detection/core/target_assigner_test.py @@ -14,6 +14,7 @@ # ============================================================================== """Tests for object_detection.core.target_assigner.""" +from absl.testing import parameterized import numpy as np import tensorflow.compat.v1 as tf @@ -115,6 +116,7 @@ class TargetAssignerTest(test_case.TestCase): self.assertEqual(reg_weights_out.dtype, np.float32) def test_assign_agnostic_with_keypoints(self): + def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_keypoints): similarity_calc = region_similarity_calculator.IouSimilarity() @@ -1234,7 +1236,8 @@ def _array_argmax(array): return np.unravel_index(np.argmax(array), array.shape) -class CenterNetCenterHeatmapTargetAssignerTest(test_case.TestCase): +class CenterNetCenterHeatmapTargetAssignerTest(test_case.TestCase, + parameterized.TestCase): def setUp(self): super(CenterNetCenterHeatmapTargetAssignerTest, self).setUp() @@ -1262,6 +1265,66 @@ class CenterNetCenterHeatmapTargetAssignerTest(test_case.TestCase): self.assertEqual((15, 5), _array_argmax(targets[0, :, :, 1])) self.assertAlmostEqual(1.0, targets[0, 15, 5, 1]) + @parameterized.parameters( + {'keypoint_weights_for_center': [1.0, 1.0, 1.0, 1.0]}, + {'keypoint_weights_for_center': [0.0, 0.0, 1.0, 1.0]}, + ) + def test_center_location_by_keypoints(self, keypoint_weights_for_center): + """Test that the centers are at the correct location.""" + kpts_y = [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.0, 0.0, 0.0, 0.0]] + kpts_x = [[0.5, 0.6, 0.7, 0.8], [0.1, 0.2, 0.3, 0.4], [0.0, 0.0, 0.0, 0.0]] + gt_keypoints_list = [ + tf.stack([tf.constant(kpts_y), tf.constant(kpts_x)], axis=2) + ] + kpts_weight = [[1.0, 1.0, 1.0, 1.0], [1.0, 0.0, 1.0, 0.0], + [1.0, 0.0, 1.0, 0.0]] + gt_keypoints_weights_list = [tf.constant(kpts_weight)] + gt_classes_list = [ + tf.one_hot([0, 0, 0], depth=1), + ] + gt_weights_list = [tf.constant([1.0, 1.0, 0.0])] + + def graph_fn(): + assigner = targetassigner.CenterNetCenterHeatmapTargetAssigner( + 4, + keypoint_class_id=0, + keypoint_indices=[0, 1, 2, 3], + keypoint_weights_for_center=keypoint_weights_for_center) + targets = assigner.assign_center_targets_from_keypoints( + 80, + 80, + gt_classes_list=gt_classes_list, + gt_keypoints_list=gt_keypoints_list, + gt_weights_list=gt_weights_list, + gt_keypoints_weights_list=gt_keypoints_weights_list) + return targets + + targets = self.execute(graph_fn, []) + + if sum(keypoint_weights_for_center) == 4.0: + # There should be two peaks at location (5, 13), and (12, 4). + # (5, 13) = ((0.1 + 0.2 + 0.3 + 0.4) / 4 * 80 / 4, + # (0.5 + 0.6 + 0.7 + 0.8) / 4 * 80 / 4) + # (12, 4) = ((0.5 + 0.7) / 2 * 80 / 4, + # (0.1 + 0.3) / 2 * 80 / 4) + self.assertEqual((5, 13), _array_argmax(targets[0, :, :, 0])) + self.assertAlmostEqual(1.0, targets[0, 5, 13, 0]) + self.assertEqual((1, 20, 20, 1), targets.shape) + targets[0, 5, 13, 0] = 0.0 + self.assertEqual((12, 4), _array_argmax(targets[0, :, :, 0])) + self.assertAlmostEqual(1.0, targets[0, 12, 4, 0]) + else: + # There should be two peaks at location (5, 13), and (12, 4). + # (7, 15) = ((0.3 + 0.4) / 2 * 80 / 4, + # (0.7 + 0.8) / 2 * 80 / 4) + # (14, 6) = (0.7 * 80 / 4, 0.3 * 80 / 4) + self.assertEqual((7, 15), _array_argmax(targets[0, :, :, 0])) + self.assertAlmostEqual(1.0, targets[0, 7, 15, 0]) + self.assertEqual((1, 20, 20, 1), targets.shape) + targets[0, 7, 15, 0] = 0.0 + self.assertEqual((14, 6), _array_argmax(targets[0, :, :, 0])) + self.assertAlmostEqual(1.0, targets[0, 14, 6, 0]) + def test_center_batch_shape(self): """Test that the shape of the target for a batch is correct.""" def graph_fn(): @@ -1565,22 +1628,48 @@ class CenterNetBoxTargetAssignerTest(test_case.TestCase): """ def graph_fn(): - box_batch = [ - tf.constant([self._box_center, self._box_lower_left]), - tf.constant([self._box_center_small, self._box_odd_coordinates]), - ] - pred_array = np.ones((2, 40, 20, 2), dtype=np.int32) * -1000 pred_array[0, 20, 10] = [1, 2] pred_array[0, 30, 5] = [3, 4] pred_array[1, 20, 10] = [5, 6] pred_array[1, 14, 11] = [7, 8] + pred_tensor = tf.constant(pred_array) + + indices = tf.constant([ + [0, 20, 10], + [0, 30, 5], + [1, 20, 10], + [1, 14, 11] + ], dtype=tf.int32) + + preds = targetassigner.get_batch_predictions_from_indices( + pred_tensor, indices) + return preds + preds = self.execute(graph_fn, []) + np.testing.assert_array_equal(preds, [[1, 2], [3, 4], [5, 6], [7, 8]]) + + def test_get_batch_predictions_from_indices_with_class(self): + """Test the get_batch_predictions_from_indices function with class axis. + + This test verifies that the indices returned by + assign_size_and_offset_targets function work as expected with a predicted + tensor. + """ + def graph_fn(): + pred_array = np.ones((2, 40, 20, 5, 2), dtype=np.int32) * -1000 + pred_array[0, 20, 10, 0] = [1, 2] + pred_array[0, 30, 5, 2] = [3, 4] + pred_array[1, 20, 10, 1] = [5, 6] + pred_array[1, 14, 11, 4] = [7, 8] pred_tensor = tf.constant(pred_array) - cn_assigner = targetassigner.CenterNetBoxTargetAssigner(4) - indices, _, _, _ = cn_assigner.assign_size_and_offset_targets( - 160, 80, box_batch) + indices = tf.constant([ + [0, 20, 10, 0], + [0, 30, 5, 2], + [1, 20, 10, 1], + [1, 14, 11, 4] + ], dtype=tf.int32) preds = targetassigner.get_batch_predictions_from_indices( pred_tensor, indices) @@ -1682,6 +1771,121 @@ class CenterNetKeypointTargetAssignerTest(test_case.TestCase): np.testing.assert_array_equal([0, 3, 2], indices[7, :]) np.testing.assert_array_almost_equal([0.6, 0.4], offsets[7, :]) + def test_assign_keypoint_depths_target(self): + def graph_fn(): + gt_classes_list = [ + tf.one_hot([0, 1, 0, 1], depth=4), + ] + coordinates = tf.expand_dims( + tf.constant( + np.array([[0.1, 0.2, 0.3, 0.4, 0.5], + [float('nan'), 0.7, 0.7, 0.9, 0.4], + [0.4, 0.1, 0.4, 0.2, 0.0], + [float('nan'), 0.0, 0.12, 0.7, 0.4]]), + dtype=tf.float32), + axis=2) + gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)] + depths = tf.constant( + np.array([[0.1, 0.2, 0.3, 0.4, 0.5], + [float('nan'), 0.7, float('nan'), 0.9, 0.4], + [0.4, 0.1, 0.4, 0.2, 0.0], + [0.5, 0.0, 7.0, 0.7, 0.4]]), + dtype=tf.float32) + gt_keypoint_depths_list = [depths] + + gt_keypoint_depth_weights = tf.constant( + np.array([[1.0, 1.0, 1.0, 1.0, 1.0], + [float('nan'), 0.0, 1.0, 0.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 0.5, 1.0, 1.0]]), + dtype=tf.float32) + gt_keypoint_depth_weights_list = [gt_keypoint_depth_weights] + + cn_assigner = targetassigner.CenterNetKeypointTargetAssigner( + stride=4, + class_id=1, + keypoint_indices=[0, 2], + peak_radius=1) + (indices, depths, weights) = cn_assigner.assign_keypoints_depth_targets( + height=120, + width=80, + gt_keypoints_list=gt_keypoints_list, + gt_classes_list=gt_classes_list, + gt_keypoint_depths_list=gt_keypoint_depths_list, + gt_keypoint_depth_weights_list=gt_keypoint_depth_weights_list) + return indices, depths, weights + indices, depths, weights = self.execute(graph_fn, []) + + # Only the last 5 elements has positive weight. + np.testing.assert_array_almost_equal([ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5 + ], weights) + # Validate the last 5 elements' depth value. + np.testing.assert_array_almost_equal( + [7.0, 7.0, 7.0, 7.0, 7.0], depths[35:, 0]) + self.assertEqual((40, 3), indices.shape) + np.testing.assert_array_equal([0, 2, 2], indices[35, :]) + + def test_assign_keypoint_depths_per_keypoints(self): + def graph_fn(): + gt_classes_list = [ + tf.one_hot([0, 1, 0, 1], depth=4), + ] + coordinates = tf.expand_dims( + tf.constant( + np.array([[0.1, 0.2, 0.3, 0.4, 0.5], + [float('nan'), 0.7, 0.7, 0.9, 0.4], + [0.4, 0.1, 0.4, 0.2, 0.0], + [float('nan'), 0.0, 0.12, 0.7, 0.4]]), + dtype=tf.float32), + axis=2) + gt_keypoints_list = [tf.concat([coordinates, coordinates], axis=2)] + depths = tf.constant( + np.array([[0.1, 0.2, 0.3, 0.4, 0.5], + [float('nan'), 0.7, float('nan'), 0.9, 0.4], + [0.4, 0.1, 0.4, 0.2, 0.0], + [0.5, 0.0, 7.0, 0.7, 0.4]]), + dtype=tf.float32) + gt_keypoint_depths_list = [depths] + + gt_keypoint_depth_weights = tf.constant( + np.array([[1.0, 1.0, 1.0, 1.0, 1.0], + [float('nan'), 0.0, 1.0, 0.0, 0.0], + [1.0, 1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 0.5, 1.0, 1.0]]), + dtype=tf.float32) + gt_keypoint_depth_weights_list = [gt_keypoint_depth_weights] + + cn_assigner = targetassigner.CenterNetKeypointTargetAssigner( + stride=4, + class_id=1, + keypoint_indices=[0, 2], + peak_radius=1, + per_keypoint_depth=True) + (indices, depths, weights) = cn_assigner.assign_keypoints_depth_targets( + height=120, + width=80, + gt_keypoints_list=gt_keypoints_list, + gt_classes_list=gt_classes_list, + gt_keypoint_depths_list=gt_keypoint_depths_list, + gt_keypoint_depth_weights_list=gt_keypoint_depth_weights_list) + return indices, depths, weights + indices, depths, weights = self.execute(graph_fn, []) + + # Only the last 5 elements has positive weight. + np.testing.assert_array_almost_equal([ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5 + ], weights) + # Validate the last 5 elements' depth value. + np.testing.assert_array_almost_equal( + [7.0, 7.0, 7.0, 7.0, 7.0], depths[35:, 0]) + self.assertEqual((40, 4), indices.shape) + np.testing.assert_array_equal([0, 2, 2, 1], indices[35, :]) + def test_assign_keypoints_offset_targets_radius(self): def graph_fn(): gt_classes_list = [ @@ -1905,6 +2109,22 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase): np.testing.assert_array_almost_equal( expected_seg_target, segmentation_target) + def test_assign_segmentation_targets_no_objects(self): + def graph_fn(): + gt_masks_list = [tf.zeros((0, 5, 5))] + gt_classes_list = [tf.zeros((0, 10))] + cn_assigner = targetassigner.CenterNetMaskTargetAssigner(stride=1) + segmentation_target = cn_assigner.assign_segmentation_targets( + gt_masks_list=gt_masks_list, + gt_classes_list=gt_classes_list, + mask_resize_method=targetassigner.ResizeMethod.NEAREST_NEIGHBOR) + return segmentation_target + + segmentation_target = self.execute(graph_fn, []) + expected_seg_target = np.zeros((1, 5, 5, 10)) + np.testing.assert_array_almost_equal( + expected_seg_target, segmentation_target) + class CenterNetDensePoseTargetAssignerTest(test_case.TestCase): @@ -1999,6 +2219,490 @@ class CenterNetDensePoseTargetAssignerTest(test_case.TestCase): self.assertAllClose(expected_batch_weights, batch_weights) +class CenterNetTrackTargetAssignerTest(test_case.TestCase): + + def setUp(self): + super(CenterNetTrackTargetAssignerTest, self).setUp() + self._box_center = [0.0, 0.0, 1.0, 1.0] + self._box_center_small = [0.25, 0.25, 0.75, 0.75] + self._box_lower_left = [0.5, 0.0, 1.0, 0.5] + self._box_center_offset = [0.1, 0.05, 1.0, 1.0] + self._box_odd_coordinates = [0.1625, 0.2125, 0.5625, 0.9625] + + def test_assign_track_targets(self): + """Test the assign_track_targets function.""" + def graph_fn(): + box_batch = [ + tf.constant([self._box_center, self._box_lower_left]), + tf.constant([self._box_lower_left, self._box_center_small]), + tf.constant([self._box_center_small, self._box_odd_coordinates]), + ] + track_id_batch = [ + tf.constant([0, 1]), + tf.constant([1, 0]), + tf.constant([0, 2]), + ] + + assigner = targetassigner.CenterNetTrackTargetAssigner( + stride=4, num_track_ids=3) + + (batch_indices, batch_weights, + track_targets) = assigner.assign_track_targets( + height=80, + width=80, + gt_track_ids_list=track_id_batch, + gt_boxes_list=box_batch) + return batch_indices, batch_weights, track_targets + + indices, weights, track_ids = self.execute(graph_fn, []) + + self.assertEqual(indices.shape, (3, 2, 3)) + self.assertEqual(track_ids.shape, (3, 2, 3)) + self.assertEqual(weights.shape, (3, 2)) + + np.testing.assert_array_equal(indices, + [[[0, 10, 10], [0, 15, 5]], + [[1, 15, 5], [1, 10, 10]], + [[2, 10, 10], [2, 7, 11]]]) + np.testing.assert_array_equal(track_ids, + [[[1, 0, 0], [0, 1, 0]], + [[0, 1, 0], [1, 0, 0]], + [[1, 0, 0], [0, 0, 1]]]) + np.testing.assert_array_equal(weights, [[1, 1], [1, 1], [1, 1]]) + + def test_assign_track_targets_weights(self): + """Test the assign_track_targets function with box weights.""" + def graph_fn(): + box_batch = [ + tf.constant([self._box_center, self._box_lower_left]), + tf.constant([self._box_lower_left, self._box_center_small]), + tf.constant([self._box_center_small, self._box_odd_coordinates]), + ] + track_id_batch = [ + tf.constant([0, 1]), + tf.constant([1, 0]), + tf.constant([0, 2]), + ] + weights_batch = [ + tf.constant([0.0, 1.0]), + tf.constant([1.0, 1.0]), + tf.constant([0.0, 0.0]) + ] + + assigner = targetassigner.CenterNetTrackTargetAssigner( + stride=4, num_track_ids=3) + + (batch_indices, batch_weights, + track_targets) = assigner.assign_track_targets( + height=80, + width=80, + gt_track_ids_list=track_id_batch, + gt_boxes_list=box_batch, + gt_weights_list=weights_batch) + return batch_indices, batch_weights, track_targets + + indices, weights, track_ids = self.execute(graph_fn, []) + + self.assertEqual(indices.shape, (3, 2, 3)) + self.assertEqual(track_ids.shape, (3, 2, 3)) + self.assertEqual(weights.shape, (3, 2)) + + np.testing.assert_array_equal(indices, + [[[0, 10, 10], [0, 15, 5]], + [[1, 15, 5], [1, 10, 10]], + [[2, 10, 10], [2, 7, 11]]]) + np.testing.assert_array_equal(track_ids, + [[[1, 0, 0], [0, 1, 0]], + [[0, 1, 0], [1, 0, 0]], + [[1, 0, 0], [0, 0, 1]]]) + np.testing.assert_array_equal(weights, [[0, 1], [1, 1], [0, 0]]) + # TODO(xwwang): Add a test for the case when no objects are detected. + + +class CornerOffsetTargetAssignerTest(test_case.TestCase): + + def test_filter_overlap_min_area_empty(self): + """Test that empty masks work on CPU.""" + def graph_fn(masks): + return targetassigner.filter_mask_overlap_min_area(masks) + + masks = self.execute_cpu(graph_fn, [np.zeros((0, 5, 5), dtype=np.float32)]) + self.assertEqual(masks.shape, (0, 5, 5)) + + def test_filter_overlap_min_area(self): + """Test the object with min. area is selected instead of overlap.""" + def graph_fn(masks): + return targetassigner.filter_mask_overlap_min_area(masks) + + masks = np.zeros((3, 4, 4), dtype=np.float32) + masks[0, :2, :2] = 1.0 + masks[1, :3, :3] = 1.0 + masks[2, 3, 3] = 1.0 + + masks = self.execute(graph_fn, [masks]) + + self.assertAllClose(masks[0], + [[1, 1, 0, 0], + [1, 1, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]]) + self.assertAllClose(masks[1], + [[0, 0, 1, 0], + [0, 0, 1, 0], + [1, 1, 1, 0], + [0, 0, 0, 0]]) + + self.assertAllClose(masks[2], + [[0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 1]]) + + def test_assign_corner_offset_single_object(self): + """Test that corner offsets are correct with a single object.""" + assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1) + + def graph_fn(): + boxes = [ + tf.constant([[0., 0., 1., 1.]]) + ] + mask = np.zeros((1, 4, 4), dtype=np.float32) + mask[0, 1:3, 1:3] = 1.0 + + masks = [tf.constant(mask)] + return assigner.assign_corner_offset_targets(boxes, masks) + + corner_offsets, foreground = self.execute(graph_fn, []) + self.assertAllClose(foreground[0], + [[0, 0, 0, 0], + [0, 1, 1, 0], + [0, 1, 1, 0], + [0, 0, 0, 0]]) + + self.assertAllClose(corner_offsets[0, :, :, 0], + [[0, 0, 0, 0], + [0, -1, -1, 0], + [0, -2, -2, 0], + [0, 0, 0, 0]]) + self.assertAllClose(corner_offsets[0, :, :, 1], + [[0, 0, 0, 0], + [0, -1, -2, 0], + [0, -1, -2, 0], + [0, 0, 0, 0]]) + self.assertAllClose(corner_offsets[0, :, :, 2], + [[0, 0, 0, 0], + [0, 3, 3, 0], + [0, 2, 2, 0], + [0, 0, 0, 0]]) + self.assertAllClose(corner_offsets[0, :, :, 3], + [[0, 0, 0, 0], + [0, 3, 2, 0], + [0, 3, 2, 0], + [0, 0, 0, 0]]) + + def test_assign_corner_offset_multiple_objects(self): + """Test corner offsets are correct with multiple objects.""" + assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1) + + def graph_fn(): + boxes = [ + tf.constant([[0., 0., 1., 1.], [0., 0., 0., 0.]]), + tf.constant([[0., 0., .25, .25], [.25, .25, 1., 1.]]) + ] + mask1 = np.zeros((2, 4, 4), dtype=np.float32) + mask1[0, 0, 0] = 1.0 + mask1[0, 3, 3] = 1.0 + + mask2 = np.zeros((2, 4, 4), dtype=np.float32) + mask2[0, :2, :2] = 1.0 + mask2[1, 1:, 1:] = 1.0 + + masks = [tf.constant(mask1), tf.constant(mask2)] + return assigner.assign_corner_offset_targets(boxes, masks) + + corner_offsets, foreground = self.execute(graph_fn, []) + self.assertEqual(corner_offsets.shape, (2, 4, 4, 4)) + self.assertEqual(foreground.shape, (2, 4, 4)) + + self.assertAllClose(foreground[0], + [[1, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 1]]) + + self.assertAllClose(corner_offsets[0, :, :, 0], + [[0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, -3]]) + self.assertAllClose(corner_offsets[0, :, :, 1], + [[0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, -3]]) + self.assertAllClose(corner_offsets[0, :, :, 2], + [[4, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 1]]) + self.assertAllClose(corner_offsets[0, :, :, 3], + [[4, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 1]]) + + self.assertAllClose(foreground[1], + [[1, 1, 0, 0], + [1, 1, 1, 1], + [0, 1, 1, 1], + [0, 1, 1, 1]]) + + self.assertAllClose(corner_offsets[1, :, :, 0], + [[0, 0, 0, 0], + [-1, -1, 0, 0], + [0, -1, -1, -1], + [0, -2, -2, -2]]) + self.assertAllClose(corner_offsets[1, :, :, 1], + [[0, -1, 0, 0], + [0, -1, -1, -2], + [0, 0, -1, -2], + [0, 0, -1, -2]]) + self.assertAllClose(corner_offsets[1, :, :, 2], + [[1, 1, 0, 0], + [0, 0, 3, 3], + [0, 2, 2, 2], + [0, 1, 1, 1]]) + self.assertAllClose(corner_offsets[1, :, :, 3], + [[1, 0, 0, 0], + [1, 0, 2, 1], + [0, 3, 2, 1], + [0, 3, 2, 1]]) + + def test_assign_corner_offsets_no_objects(self): + """Test assignment works with empty input on cpu.""" + assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1) + + def graph_fn(): + boxes = [ + tf.zeros((0, 4), dtype=tf.float32) + ] + masks = [tf.zeros((0, 5, 5), dtype=tf.float32)] + return assigner.assign_corner_offset_targets(boxes, masks) + + corner_offsets, foreground = self.execute_cpu(graph_fn, []) + self.assertAllClose(corner_offsets, np.zeros((1, 5, 5, 4))) + self.assertAllClose(foreground, np.zeros((1, 5, 5))) + + +class CenterNetTemporalOffsetTargetAssigner(test_case.TestCase): + + def setUp(self): + super(CenterNetTemporalOffsetTargetAssigner, self).setUp() + self._box_center = [0.0, 0.0, 1.0, 1.0] + self._box_center_small = [0.25, 0.25, 0.75, 0.75] + self._box_lower_left = [0.5, 0.0, 1.0, 0.5] + self._box_center_offset = [0.1, 0.05, 1.0, 1.0] + self._box_odd_coordinates = [0.1625, 0.2125, 0.5625, 0.9625] + self._offset_center = [0.5, 0.4] + self._offset_center_small = [0.1, 0.1] + self._offset_lower_left = [-0.1, 0.1] + self._offset_center_offset = [0.4, 0.3] + self._offset_odd_coord = [0.125, -0.125] + + def test_assign_empty_groundtruths(self): + """Tests the assign_offset_targets function with empty inputs.""" + def graph_fn(): + box_batch = [ + tf.zeros((0, 4), dtype=tf.float32), + ] + + offset_batch = [ + tf.zeros((0, 2), dtype=tf.float32), + ] + + match_flag_batch = [ + tf.zeros((0), dtype=tf.float32), + ] + + assigner = targetassigner.CenterNetTemporalOffsetTargetAssigner(4) + indices, temporal_offset, weights = assigner.assign_temporal_offset_targets( + 80, 80, box_batch, offset_batch, match_flag_batch) + return indices, temporal_offset, weights + indices, temporal_offset, weights = self.execute(graph_fn, []) + self.assertEqual(indices.shape, (0, 3)) + self.assertEqual(temporal_offset.shape, (0, 2)) + self.assertEqual(weights.shape, (0,)) + + def test_assign_offset_targets(self): + """Tests the assign_offset_targets function.""" + def graph_fn(): + box_batch = [ + tf.constant([self._box_center, self._box_lower_left]), + tf.constant([self._box_center_offset]), + tf.constant([self._box_center_small, self._box_odd_coordinates]), + ] + + offset_batch = [ + tf.constant([self._offset_center, self._offset_lower_left]), + tf.constant([self._offset_center_offset]), + tf.constant([self._offset_center_small, self._offset_odd_coord]), + ] + + match_flag_batch = [ + tf.constant([1.0, 1.0]), + tf.constant([1.0]), + tf.constant([1.0, 1.0]), + ] + + assigner = targetassigner.CenterNetTemporalOffsetTargetAssigner(4) + indices, temporal_offset, weights = assigner.assign_temporal_offset_targets( + 80, 80, box_batch, offset_batch, match_flag_batch) + return indices, temporal_offset, weights + indices, temporal_offset, weights = self.execute(graph_fn, []) + self.assertEqual(indices.shape, (5, 3)) + self.assertEqual(temporal_offset.shape, (5, 2)) + self.assertEqual(weights.shape, (5,)) + np.testing.assert_array_equal( + indices, + [[0, 10, 10], [0, 15, 5], [1, 11, 10], [2, 10, 10], [2, 7, 11]]) + np.testing.assert_array_almost_equal( + temporal_offset, + [[0.5, 0.4], [-0.1, 0.1], [0.4, 0.3], [0.1, 0.1], [0.125, -0.125]]) + np.testing.assert_array_equal(weights, 1) + + def test_assign_offset_targets_with_match_flags(self): + """Tests the assign_offset_targets function with match flags.""" + def graph_fn(): + box_batch = [ + tf.constant([self._box_center, self._box_lower_left]), + tf.constant([self._box_center_offset]), + tf.constant([self._box_center_small, self._box_odd_coordinates]), + ] + + offset_batch = [ + tf.constant([self._offset_center, self._offset_lower_left]), + tf.constant([self._offset_center_offset]), + tf.constant([self._offset_center_small, self._offset_odd_coord]), + ] + + match_flag_batch = [ + tf.constant([0.0, 1.0]), + tf.constant([1.0]), + tf.constant([1.0, 1.0]), + ] + + cn_assigner = targetassigner.CenterNetTemporalOffsetTargetAssigner(4) + weights_batch = [ + tf.constant([1.0, 0.0]), + tf.constant([1.0]), + tf.constant([1.0, 1.0]) + ] + indices, temporal_offset, weights = cn_assigner.assign_temporal_offset_targets( + 80, 80, box_batch, offset_batch, match_flag_batch, weights_batch) + return indices, temporal_offset, weights + indices, temporal_offset, weights = self.execute(graph_fn, []) + self.assertEqual(indices.shape, (5, 3)) + self.assertEqual(temporal_offset.shape, (5, 2)) + self.assertEqual(weights.shape, (5,)) + + np.testing.assert_array_equal( + indices, + [[0, 10, 10], [0, 15, 5], [1, 11, 10], [2, 10, 10], [2, 7, 11]]) + np.testing.assert_array_almost_equal( + temporal_offset, + [[0.5, 0.4], [-0.1, 0.1], [0.4, 0.3], [0.1, 0.1], [0.125, -0.125]]) + np.testing.assert_array_equal(weights, [0, 0, 1, 1, 1]) + + +class DETRTargetAssignerTest(test_case.TestCase): + + def test_assign_detr(self): + def graph_fn(pred_corners, groundtruth_box_corners, + groundtruth_labels, predicted_labels): + detr_target_assigner = targetassigner.DETRTargetAssigner() + pred_boxlist = box_list.BoxList(pred_corners) + groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners) + result = detr_target_assigner.assign( + pred_boxlist, groundtruth_boxlist, + predicted_labels, groundtruth_labels) + (cls_targets, cls_weights, reg_targets, reg_weights) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + pred_corners = np.array([[0.25, 0.25, 0.4, 0.2], + [0.5, 0.8, 1.0, 0.8], + [0.9, 0.5, 0.1, 1.0]], dtype=np.float32) + groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9]], + dtype=np.float32) + predicted_labels = np.array([[-3.0, 3.0], [2.0, 9.4], [5.0, 1.0]], + dtype=np.float32) + groundtruth_labels = np.array([[0.0, 1.0], [0.0, 1.0]], + dtype=np.float32) + + exp_cls_targets = [[0, 1], [0, 1], [1, 0]] + exp_cls_weights = [[1, 1], [1, 1], [1, 1]] + exp_reg_targets = [[0.25, 0.25, 0.5, 0.5], + [0.7, 0.7, 0.4, 0.4], + [0, 0, 0, 0]] + exp_reg_weights = [1, 1, 0] + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute_cpu( + graph_fn, [pred_corners, groundtruth_box_corners, + groundtruth_labels, predicted_labels]) + + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEqual(cls_targets_out.dtype, np.float32) + self.assertEqual(cls_weights_out.dtype, np.float32) + self.assertEqual(reg_targets_out.dtype, np.float32) + self.assertEqual(reg_weights_out.dtype, np.float32) + + def test_batch_assign_detr(self): + def graph_fn(pred_corners, groundtruth_box_corners, + groundtruth_labels, predicted_labels): + detr_target_assigner = targetassigner.DETRTargetAssigner() + result = detr_target_assigner.batch_assign( + pred_corners, groundtruth_box_corners, + [predicted_labels], [groundtruth_labels]) + (cls_targets, cls_weights, reg_targets, reg_weights) = result + return (cls_targets, cls_weights, reg_targets, reg_weights) + + pred_corners = np.array([[[0.25, 0.25, 0.4, 0.2], + [0.5, 0.8, 1.0, 0.8], + [0.9, 0.5, 0.1, 1.0]]], dtype=np.float32) + groundtruth_box_corners = np.array([[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.9, 0.9]]], + dtype=np.float32) + predicted_labels = np.array([[-3.0, 3.0], [2.0, 9.4], [5.0, 1.0]], + dtype=np.float32) + groundtruth_labels = np.array([[0.0, 1.0], [0.0, 1.0]], + dtype=np.float32) + + exp_cls_targets = [[[0, 1], [0, 1], [1, 0]]] + exp_cls_weights = [[[1, 1], [1, 1], [1, 1]]] + exp_reg_targets = [[[0.25, 0.25, 0.5, 0.5], + [0.7, 0.7, 0.4, 0.4], + [0, 0, 0, 0]]] + exp_reg_weights = [[1, 1, 0]] + + (cls_targets_out, + cls_weights_out, reg_targets_out, reg_weights_out) = self.execute_cpu( + graph_fn, [pred_corners, groundtruth_box_corners, + groundtruth_labels, predicted_labels]) + + self.assertAllClose(cls_targets_out, exp_cls_targets) + self.assertAllClose(cls_weights_out, exp_cls_weights) + self.assertAllClose(reg_targets_out, exp_reg_targets) + self.assertAllClose(reg_weights_out, exp_reg_weights) + self.assertEqual(cls_targets_out.dtype, np.float32) + self.assertEqual(cls_weights_out.dtype, np.float32) + self.assertEqual(reg_targets_out.dtype, np.float32) + self.assertEqual(reg_weights_out.dtype, np.float32) + + if __name__ == '__main__': tf.enable_v2_behavior() tf.test.main() diff --git a/research/object_detection/data_decoders/tf_example_decoder.py b/research/object_detection/data_decoders/tf_example_decoder.py index 04cc4db59988161345c5cacd2e6f513b2707b0a1..acd48750fd9b390b84c6df2e8ad80ced19adc928 100644 --- a/research/object_detection/data_decoders/tf_example_decoder.py +++ b/research/object_detection/data_decoders/tf_example_decoder.py @@ -124,40 +124,6 @@ class _ClassTensorHandler(slim_example_decoder.Tensor): self._display_name_to_id_table.lookup(unmapped_tensor)) -class _BackupHandler(slim_example_decoder.ItemHandler): - """An ItemHandler that tries two ItemHandlers in order.""" - - def __init__(self, handler, backup): - """Initializes the BackupHandler handler. - - If the first Handler's tensors_to_item returns a Tensor with no elements, - the second Handler is used. - - Args: - handler: The primary ItemHandler. - backup: The backup ItemHandler. - - Raises: - ValueError: if either is not an ItemHandler. - """ - if not isinstance(handler, slim_example_decoder.ItemHandler): - raise ValueError('Primary handler is of type %s instead of ItemHandler' % - type(handler)) - if not isinstance(backup, slim_example_decoder.ItemHandler): - raise ValueError( - 'Backup handler is of type %s instead of ItemHandler' % type(backup)) - self._handler = handler - self._backup = backup - super(_BackupHandler, self).__init__(handler.keys + backup.keys) - - def tensors_to_item(self, keys_to_tensors): - item = self._handler.tensors_to_item(keys_to_tensors) - return tf.cond( - pred=tf.equal(tf.reduce_prod(tf.shape(item)), 0), - true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors), - false_fn=lambda: item) - - class TfExampleDecoder(data_decoder.DataDecoder): """Tensorflow Example proto decoder.""" @@ -172,7 +138,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): load_multiclass_scores=False, load_context_features=False, expand_hierarchy_labels=False, - load_dense_pose=False): + load_dense_pose=False, + load_track_id=False, + load_keypoint_depth_features=False): """Constructor sets keys_to_features and items_to_handlers. Args: @@ -204,6 +172,11 @@ class TfExampleDecoder(data_decoder.DataDecoder): classes, the labels are extended to ancestor. For negative classes, the labels are expanded to descendants. load_dense_pose: Whether to load DensePose annotations. + load_track_id: Whether to load tracking annotations. + load_keypoint_depth_features: Whether to load the keypoint depth features + including keypoint relative depths and weights. If this field is set to + True but no keypoint depth features are in the input tf.Example, then + default values will be populated. Raises: ValueError: If `instance_mask_type` option is not one of @@ -212,6 +185,7 @@ class TfExampleDecoder(data_decoder.DataDecoder): ValueError: If `expand_labels_hierarchy` is True, but the `label_map_proto_file` is not provided. """ + # TODO(rathodv): delete unused `use_display_name` argument once we change # other decoders to handle label maps similarly. del use_display_name @@ -235,6 +209,10 @@ class TfExampleDecoder(data_decoder.DataDecoder): tf.VarLenFeature(tf.string), 'image/class/label': tf.VarLenFeature(tf.int64), + 'image/neg_category_ids': + tf.VarLenFeature(tf.int64), + 'image/not_exhaustive_category_ids': + tf.VarLenFeature(tf.int64), 'image/class/confidence': tf.VarLenFeature(tf.float32), # Object boxes and classes. @@ -296,6 +274,10 @@ class TfExampleDecoder(data_decoder.DataDecoder): # Image-level labels. fields.InputDataFields.groundtruth_image_confidences: ( slim_example_decoder.Tensor('image/class/confidence')), + fields.InputDataFields.groundtruth_verified_neg_classes: ( + slim_example_decoder.Tensor('image/neg_category_ids')), + fields.InputDataFields.groundtruth_not_exhaustive_classes: ( + slim_example_decoder.Tensor('image/not_exhaustive_category_ids')), # Object boxes and classes. fields.InputDataFields.groundtruth_boxes: ( slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], @@ -355,6 +337,23 @@ class TfExampleDecoder(data_decoder.DataDecoder): slim_example_decoder.ItemHandlerCallback( ['image/object/keypoint/x', 'image/object/keypoint/visibility'], self._reshape_keypoint_visibilities)) + if load_keypoint_depth_features: + self.keys_to_features['image/object/keypoint/z'] = ( + tf.VarLenFeature(tf.float32)) + self.keys_to_features['image/object/keypoint/z/weights'] = ( + tf.VarLenFeature(tf.float32)) + self.items_to_handlers[ + fields.InputDataFields.groundtruth_keypoint_depths] = ( + slim_example_decoder.ItemHandlerCallback( + ['image/object/keypoint/x', 'image/object/keypoint/z'], + self._reshape_keypoint_depths)) + self.items_to_handlers[ + fields.InputDataFields.groundtruth_keypoint_depth_weights] = ( + slim_example_decoder.ItemHandlerCallback( + ['image/object/keypoint/x', + 'image/object/keypoint/z/weights'], + self._reshape_keypoint_depth_weights)) + if load_instance_masks: if instance_mask_type in (input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL_MASKS): @@ -401,16 +400,22 @@ class TfExampleDecoder(data_decoder.DataDecoder): 'image/object/densepose/u', 'image/object/densepose/v', 'image/object/densepose/num'], self._dense_pose_surface_coordinates)) + if load_track_id: + self.keys_to_features['image/object/track/label'] = ( + tf.VarLenFeature(tf.int64)) + self.items_to_handlers[ + fields.InputDataFields.groundtruth_track_ids] = ( + slim_example_decoder.Tensor('image/object/track/label')) if label_map_proto_file: # If the label_map_proto is provided, try to use it in conjunction with # the class text, and fall back to a materialized ID. - label_handler = _BackupHandler( + label_handler = slim_example_decoder.BackupHandler( _ClassTensorHandler( 'image/object/class/text', label_map_proto_file, default_value=''), slim_example_decoder.Tensor('image/object/class/label')) - image_label_handler = _BackupHandler( + image_label_handler = slim_example_decoder.BackupHandler( _ClassTensorHandler( fields.TfExampleFields.image_class_text, label_map_proto_file, @@ -586,6 +591,11 @@ class TfExampleDecoder(data_decoder.DataDecoder): tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids], dtype=tf.int32) + if fields.InputDataFields.groundtruth_track_ids in tensor_dict: + tensor_dict[fields.InputDataFields.groundtruth_track_ids] = tf.cast( + tensor_dict[fields.InputDataFields.groundtruth_track_ids], + dtype=tf.int32) + return tensor_dict def _reshape_keypoints(self, keys_to_tensors): @@ -614,6 +624,73 @@ class TfExampleDecoder(data_decoder.DataDecoder): keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2]) return keypoints + def _reshape_keypoint_depths(self, keys_to_tensors): + """Reshape keypoint depths. + + The keypoint depths are reshaped to [num_instances, num_keypoints]. The + keypoint depth tensor is expected to have the same shape as the keypoint x + (or y) tensors. If not (usually because the example does not have the depth + groundtruth), then default depth values (zero) are provided. + + Args: + keys_to_tensors: a dictionary from keys to tensors. Expected keys are: + 'image/object/keypoint/x' + 'image/object/keypoint/z' + + Returns: + A 2-D float tensor of shape [num_instances, num_keypoints] with values + representing the keypoint depths. + """ + x = keys_to_tensors['image/object/keypoint/x'] + z = keys_to_tensors['image/object/keypoint/z'] + if isinstance(z, tf.SparseTensor): + z = tf.sparse_tensor_to_dense(z) + if isinstance(x, tf.SparseTensor): + x = tf.sparse_tensor_to_dense(x) + + default_z = tf.zeros_like(x) + # Use keypoint depth groundtruth if provided, otherwise use the default + # depth value. + z = tf.cond(tf.equal(tf.size(x), tf.size(z)), + true_fn=lambda: z, + false_fn=lambda: default_z) + z = tf.reshape(z, [-1, self._num_keypoints]) + return z + + def _reshape_keypoint_depth_weights(self, keys_to_tensors): + """Reshape keypoint depth weights. + + The keypoint depth weights are reshaped to [num_instances, num_keypoints]. + The keypoint depth weights tensor is expected to have the same shape as the + keypoint x (or y) tensors. If not (usually because the example does not have + the depth weights groundtruth), then default weight values (zero) are + provided. + + Args: + keys_to_tensors: a dictionary from keys to tensors. Expected keys are: + 'image/object/keypoint/x' + 'image/object/keypoint/z/weights' + + Returns: + A 2-D float tensor of shape [num_instances, num_keypoints] with values + representing the keypoint depth weights. + """ + x = keys_to_tensors['image/object/keypoint/x'] + z = keys_to_tensors['image/object/keypoint/z/weights'] + if isinstance(z, tf.SparseTensor): + z = tf.sparse_tensor_to_dense(z) + if isinstance(x, tf.SparseTensor): + x = tf.sparse_tensor_to_dense(x) + + default_z = tf.zeros_like(x) + # Use keypoint depth weights if provided, otherwise use the default + # values. + z = tf.cond(tf.equal(tf.size(x), tf.size(z)), + true_fn=lambda: z, + false_fn=lambda: default_z) + z = tf.reshape(z, [-1, self._num_keypoints]) + return z + def _reshape_keypoint_visibilities(self, keys_to_tensors): """Reshape keypoint visibilities. diff --git a/research/object_detection/data_decoders/tf_example_decoder_test.py b/research/object_detection/data_decoders/tf_example_decoder_test.py index 81ed9258e650d7534bd9e3ae76aa574bc2a06b61..5311bdf4dfe6e2813dcf2c28b40dad10195c1693 100644 --- a/research/object_detection/data_decoders/tf_example_decoder_test.py +++ b/research/object_detection/data_decoders/tf_example_decoder_test.py @@ -275,6 +275,124 @@ class TfExampleDecoderTest(test_case.TestCase): self.assertAllEqual(expected_boxes, tensor_dict[fields.InputDataFields.groundtruth_boxes]) + def testDecodeKeypointDepth(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg, _ = self._create_encoded_and_decoded_data( + image_tensor, 'jpeg') + bbox_ymins = [0.0, 4.0] + bbox_xmins = [1.0, 5.0] + bbox_ymaxs = [2.0, 6.0] + bbox_xmaxs = [3.0, 7.0] + keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] + keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + keypoint_visibility = [1, 2, 0, 1, 0, 2] + keypoint_depths = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6] + keypoint_depth_weights = [1.0, 0.9, 0.8, 0.7, 0.6, 0.5] + + def graph_fn(): + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + 'image/object/keypoint/y': + dataset_util.float_list_feature(keypoint_ys), + 'image/object/keypoint/x': + dataset_util.float_list_feature(keypoint_xs), + 'image/object/keypoint/z': + dataset_util.float_list_feature(keypoint_depths), + 'image/object/keypoint/z/weights': + dataset_util.float_list_feature(keypoint_depth_weights), + 'image/object/keypoint/visibility': + dataset_util.int64_list_feature(keypoint_visibility), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + num_keypoints=3, load_keypoint_depth_features=True) + output = example_decoder.decode(tf.convert_to_tensor(example)) + + self.assertAllEqual( + (output[fields.InputDataFields.groundtruth_keypoint_depths].get_shape( + ).as_list()), [2, 3]) + self.assertAllEqual( + (output[fields.InputDataFields.groundtruth_keypoint_depth_weights] + .get_shape().as_list()), [2, 3]) + return output + + tensor_dict = self.execute_cpu(graph_fn, []) + + expected_keypoint_depths = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + self.assertAllClose( + expected_keypoint_depths, + tensor_dict[fields.InputDataFields.groundtruth_keypoint_depths]) + + expected_keypoint_depth_weights = [[1.0, 0.9, 0.8], [0.7, 0.6, 0.5]] + self.assertAllClose( + expected_keypoint_depth_weights, + tensor_dict[fields.InputDataFields.groundtruth_keypoint_depth_weights]) + + def testDecodeKeypointDepthNoDepth(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg, _ = self._create_encoded_and_decoded_data( + image_tensor, 'jpeg') + bbox_ymins = [0.0, 4.0] + bbox_xmins = [1.0, 5.0] + bbox_ymaxs = [2.0, 6.0] + bbox_xmaxs = [3.0, 7.0] + keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] + keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + keypoint_visibility = [1, 2, 0, 1, 0, 2] + + def graph_fn(): + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + 'image/object/keypoint/y': + dataset_util.float_list_feature(keypoint_ys), + 'image/object/keypoint/x': + dataset_util.float_list_feature(keypoint_xs), + 'image/object/keypoint/visibility': + dataset_util.int64_list_feature(keypoint_visibility), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + num_keypoints=3, load_keypoint_depth_features=True) + output = example_decoder.decode(tf.convert_to_tensor(example)) + + return output + + tensor_dict = self.execute_cpu(graph_fn, []) + + expected_keypoints_depth_default = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] + self.assertAllClose( + expected_keypoints_depth_default, + tensor_dict[fields.InputDataFields.groundtruth_keypoint_depths]) + self.assertAllClose( + expected_keypoints_depth_default, + tensor_dict[fields.InputDataFields.groundtruth_keypoint_depth_weights]) + def testDecodeKeypoint(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg, _ = self._create_encoded_and_decoded_data( @@ -841,6 +959,61 @@ class TfExampleDecoderTest(test_case.TestCase): self.assertAllEqual(object_area, tensor_dict[fields.InputDataFields.groundtruth_area]) + def testDecodeVerifiedNegClasses(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg, _ = self._create_encoded_and_decoded_data( + image_tensor, 'jpeg') + neg_category_ids = [0, 5, 8] + + def graph_fn(): + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/neg_category_ids': + dataset_util.int64_list_feature(neg_category_ids), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + output = example_decoder.decode(tf.convert_to_tensor(example)) + return output + + tensor_dict = self.execute_cpu(graph_fn, []) + self.assertAllEqual( + neg_category_ids, + tensor_dict[fields.InputDataFields.groundtruth_verified_neg_classes]) + + def testDecodeNotExhaustiveClasses(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg, _ = self._create_encoded_and_decoded_data( + image_tensor, 'jpeg') + not_exhaustive_category_ids = [0, 5, 8] + + def graph_fn(): + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/not_exhaustive_category_ids': + dataset_util.int64_list_feature( + not_exhaustive_category_ids), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder() + output = example_decoder.decode(tf.convert_to_tensor(example)) + return output + + tensor_dict = self.execute_cpu(graph_fn, []) + self.assertAllEqual( + not_exhaustive_category_ids, + tensor_dict[fields.InputDataFields.groundtruth_not_exhaustive_classes]) + def testDecodeObjectIsCrowd(self): image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) encoded_jpeg, _ = self._create_encoded_and_decoded_data( @@ -1430,6 +1603,48 @@ class TfExampleDecoderTest(test_case.TestCase): self.assertAllEqual(dp_part_ids, expected_dp_part_ids) self.assertAllClose(dp_surface_coords, expected_dp_surface_coords) + def testDecodeTrack(self): + image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) + encoded_jpeg, _ = self._create_encoded_and_decoded_data( + image_tensor, 'jpeg') + bbox_ymins = [0.0, 4.0, 2.0] + bbox_xmins = [1.0, 5.0, 8.0] + bbox_ymaxs = [2.0, 6.0, 1.0] + bbox_xmaxs = [3.0, 7.0, 3.3] + track_labels = [0, 1, 2] + + def graph_fn(): + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(bbox_ymins), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(bbox_xmins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(bbox_ymaxs), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(bbox_xmaxs), + 'image/object/track/label': + dataset_util.int64_list_feature(track_labels), + })).SerializeToString() + + example_decoder = tf_example_decoder.TfExampleDecoder( + load_track_id=True) + output = example_decoder.decode(tf.convert_to_tensor(example)) + track_ids = output[fields.InputDataFields.groundtruth_track_ids] + return track_ids + + track_ids = self.execute_cpu(graph_fn, []) + + expected_track_labels = [0, 1, 2] + + self.assertAllEqual(track_ids, expected_track_labels) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/data_decoders/tf_sequence_example_decoder.py b/research/object_detection/data_decoders/tf_sequence_example_decoder.py index 1565a910eb1726ce0846e9c78488a7e8d4f97fdf..9bed15970353255c0293057db3a5933d3c3a961f 100644 --- a/research/object_detection/data_decoders/tf_sequence_example_decoder.py +++ b/research/object_detection/data_decoders/tf_sequence_example_decoder.py @@ -117,11 +117,13 @@ class TfSequenceExampleDecoder(data_decoder.DataDecoder): Context R-CNN (see https://arxiv.org/abs/1912.03538): 'image/context_features' 'image/context_feature_length' + 'image/context_features_image_id_list' """ def __init__(self, label_map_proto_file, load_context_features=False, + load_context_image_ids=False, use_display_name=False, fully_annotated=False): """Constructs `TfSequenceExampleDecoder` object. @@ -134,6 +136,8 @@ class TfSequenceExampleDecoder(data_decoder.DataDecoder): load_context_features: Whether to load information from context_features, to provide additional context to a detection model for training and/or inference + load_context_image_ids: Whether to load the corresponding image ids for + the context_features in order to visualize attention. use_display_name: whether or not to use the `display_name` for label mapping (instead of `name`). Only used if label_map_proto_file is provided. @@ -207,6 +211,16 @@ class TfSequenceExampleDecoder(data_decoder.DataDecoder): tf.FixedLenFeature((), tf.int64)) self._items_to_handlers[fields.InputDataFields.context_feature_length] = ( slim_example_decoder.Tensor('image/context_feature_length')) + + if load_context_image_ids: + self._context_keys_to_features['image/context_features_image_id_list'] = ( + tf.VarLenFeature(dtype=tf.string)) + self._items_to_handlers[ + fields.InputDataFields.context_features_image_id_list] = ( + slim_example_decoder.Tensor( + 'image/context_features_image_id_list', + default_value='')) + self._fully_annotated = fully_annotated def decode(self, tf_seq_example_string_tensor): @@ -239,6 +253,8 @@ class TfSequenceExampleDecoder(data_decoder.DataDecoder): the length of each feature in context_features fields.InputDataFields.image: a [num_frames] string tensor with the encoded images. + fields.inputDataFields.context_features_image_id_list: a 1D vector + of shape [num_context_features] containing string tensors. """ serialized_example = tf.reshape(tf_seq_example_string_tensor, shape=[]) decoder = slim_example_decoder.TFSequenceExampleDecoder( diff --git a/research/object_detection/data_decoders/tf_sequence_example_decoder_test.py b/research/object_detection/data_decoders/tf_sequence_example_decoder_test.py index 2ea1c6163454cf2d05065713b2e0657f24af5e64..4aa3afbe073a8df2f221ef741cdfb4adc4207cf4 100644 --- a/research/object_detection/data_decoders/tf_sequence_example_decoder_test.py +++ b/research/object_detection/data_decoders/tf_sequence_example_decoder_test.py @@ -120,6 +120,145 @@ class TfSequenceExampleDecoderTest(test_case.TestCase): self.assertAllEqual(expected_groundtruth_classes, tensor_dict_out[flds.groundtruth_classes]) + def test_decode_sequence_example_context(self): + num_frames = 4 + image_height = 20 + image_width = 30 + + expected_groundtruth_boxes = [ + [[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]], + [[0.2, 0.2, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], + [[0.0, 0.0, 1.0, 1.0], [0.1, 0.1, 0.2, 0.2]], + [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]] + ] + expected_groundtruth_classes = [ + [-1, -1], + [-1, 1], + [1, 2], + [-1, -1] + ] + + expected_context_features = np.array( + [[0.0, 0.1, 0.2], [0.3, 0.4, 0.5]], dtype=np.float32) + + flds = fields.InputDataFields + encoded_images = self._make_random_serialized_jpeg_images( + num_frames, image_height, image_width) + + def graph_fn(): + label_map_proto_file = os.path.join(self.get_temp_dir(), 'labelmap.pbtxt') + self._create_label_map(label_map_proto_file) + decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder( + label_map_proto_file=label_map_proto_file, + load_context_features=True) + sequence_example_serialized = seq_example_util.make_sequence_example( + dataset_name='video_dataset', + video_id='video', + encoded_images=encoded_images, + image_height=image_height, + image_width=image_width, + image_format='JPEG', + image_source_ids=[str(i) for i in range(num_frames)], + is_annotated=[[1], [1], [1], [1]], + bboxes=[ + [[0., 0., 1., 1.]], # Frame 0. + [[0.2, 0.2, 1., 1.], + [0., 0., 1., 1.]], # Frame 1. + [[0., 0., 1., 1.], # Frame 2. + [0.1, 0.1, 0.2, 0.2]], + [[]], # Frame 3. + ], + label_strings=[ + ['fox'], # Frame 0. Fox will be filtered out. + ['fox', 'dog'], # Frame 1. Fox will be filtered out. + ['dog', 'cat'], # Frame 2. + [], # Frame 3 + ], + context_features=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5], + context_feature_length=[3], + context_features_image_id_list=[b'im_1', b'im_2'] + ).SerializeToString() + + example_string_tensor = tf.convert_to_tensor(sequence_example_serialized) + return decoder.decode(example_string_tensor) + + tensor_dict_out = self.execute(graph_fn, []) + self.assertAllClose(expected_groundtruth_boxes, + tensor_dict_out[flds.groundtruth_boxes]) + self.assertAllEqual(expected_groundtruth_classes, + tensor_dict_out[flds.groundtruth_classes]) + self.assertAllClose(expected_context_features, + tensor_dict_out[flds.context_features]) + + def test_decode_sequence_example_context_image_id_list(self): + num_frames = 4 + image_height = 20 + image_width = 30 + + expected_groundtruth_boxes = [ + [[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]], + [[0.2, 0.2, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]], + [[0.0, 0.0, 1.0, 1.0], [0.1, 0.1, 0.2, 0.2]], + [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]] + ] + expected_groundtruth_classes = [ + [-1, -1], + [-1, 1], + [1, 2], + [-1, -1] + ] + + expected_context_image_ids = [b'im_1', b'im_2'] + + flds = fields.InputDataFields + encoded_images = self._make_random_serialized_jpeg_images( + num_frames, image_height, image_width) + + def graph_fn(): + label_map_proto_file = os.path.join(self.get_temp_dir(), 'labelmap.pbtxt') + self._create_label_map(label_map_proto_file) + decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder( + label_map_proto_file=label_map_proto_file, + load_context_image_ids=True) + sequence_example_serialized = seq_example_util.make_sequence_example( + dataset_name='video_dataset', + video_id='video', + encoded_images=encoded_images, + image_height=image_height, + image_width=image_width, + image_format='JPEG', + image_source_ids=[str(i) for i in range(num_frames)], + is_annotated=[[1], [1], [1], [1]], + bboxes=[ + [[0., 0., 1., 1.]], # Frame 0. + [[0.2, 0.2, 1., 1.], + [0., 0., 1., 1.]], # Frame 1. + [[0., 0., 1., 1.], # Frame 2. + [0.1, 0.1, 0.2, 0.2]], + [[]], # Frame 3. + ], + label_strings=[ + ['fox'], # Frame 0. Fox will be filtered out. + ['fox', 'dog'], # Frame 1. Fox will be filtered out. + ['dog', 'cat'], # Frame 2. + [], # Frame 3 + ], + context_features=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5], + context_feature_length=[3], + context_features_image_id_list=[b'im_1', b'im_2'] + ).SerializeToString() + + example_string_tensor = tf.convert_to_tensor(sequence_example_serialized) + return decoder.decode(example_string_tensor) + + tensor_dict_out = self.execute(graph_fn, []) + self.assertAllClose(expected_groundtruth_boxes, + tensor_dict_out[flds.groundtruth_boxes]) + self.assertAllEqual(expected_groundtruth_classes, + tensor_dict_out[flds.groundtruth_classes]) + self.assertAllEqual(expected_context_image_ids, + tensor_dict_out[flds.context_features_image_id_list]) + def test_decode_sequence_example_negative_clip(self): num_frames = 4 image_height = 20 diff --git a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py index 89f89467c5e887f1410a6c706e10c2ab9002c48d..21890aa9a02362316a9b9a2377b005fc783ee0fe 100644 --- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py +++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py @@ -53,7 +53,7 @@ import os import numpy as np import PIL.Image import six -import tensorflow.compat.v1 as tf +import tensorflow as tf try: import apache_beam as beam # pylint:disable=g-import-not-at-top @@ -294,20 +294,46 @@ class SortGroupedDataFn(beam.DoFn): sorted_example_list = sorted(example_list, key=sorting_fn) + num_embeddings = 0 + for example in sorted_example_list: + num_embeddings += example.features.feature[ + 'image/embedding_count'].int64_list.value[0] + self._num_examples_processed.inc(1) - if len(sorted_example_list) > self._max_num_elements_in_context_features: + # To handle cases where there are more context embeddings within + # the time horizon than the specified maximum, we split the context group + # into subsets sequentially in time, with each subset having the maximum + # number of context embeddings except the final one, which holds the + # remainder. + if num_embeddings > self._max_num_elements_in_context_features: leftovers = sorted_example_list output_list = [] count = 0 self._too_many_elements.inc(1) - while len(leftovers) > self._max_num_elements_in_context_features: + num_embeddings = 0 + max_idx = 0 + for idx, example in enumerate(leftovers): + num_embeddings += example.features.feature[ + 'image/embedding_count'].int64_list.value[0] + if num_embeddings <= self._max_num_elements_in_context_features: + max_idx = idx + while num_embeddings > self._max_num_elements_in_context_features: self._split_elements.inc(1) new_key = key + six.ensure_binary('_' + str(count)) - new_list = leftovers[:self._max_num_elements_in_context_features] + new_list = leftovers[:max_idx] output_list.append((new_key, new_list)) - leftovers = leftovers[:self._max_num_elements_in_context_features] + leftovers = leftovers[max_idx:] count += 1 + num_embeddings = 0 + max_idx = 0 + for idx, example in enumerate(leftovers): + num_embeddings += example.features.feature[ + 'image/embedding_count'].int64_list.value[0] + if num_embeddings <= self._max_num_elements_in_context_features: + max_idx = idx + new_key = key + six.ensure_binary('_' + str(count)) + output_list.append((new_key, leftovers)) else: output_list = [(key, sorted_example_list)] @@ -454,12 +480,15 @@ class GenerateContextFn(beam.DoFn): example_embedding = list(example.features.feature[ 'image/embedding'].float_list.value) context_features.extend(example_embedding) - example.features.feature[ - 'context_features_idx'].int64_list.value.append(count) - count += 1 + num_embeddings = example.features.feature[ + 'image/embedding_count'].int64_list.value[0] example_image_id = example.features.feature[ 'image/source_id'].bytes_list.value[0] - context_features_image_id_list.append(example_image_id) + for _ in range(num_embeddings): + example.features.feature[ + 'context_features_idx'].int64_list.value.append(count) + count += 1 + context_features_image_id_list.append(example_image_id) if not example_embedding: example_embedding.append(np.zeros(self._context_feature_length)) @@ -926,6 +955,7 @@ def main(argv=None, save_main_session=True): args.context_features_score_threshold, args.keep_only_positives_gt, args.max_num_elements_in_context_features, + args.num_shards, args.output_type, args.max_clip_length, args.context_feature_length) diff --git a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py deleted file mode 100644 index 42f970c226dce687f6d99b30912e5673208d15dd..0000000000000000000000000000000000000000 --- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py +++ /dev/null @@ -1,396 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for add_context_to_examples.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import contextlib -import datetime -import os -import tempfile -import unittest - -import numpy as np -import six -import tensorflow.compat.v1 as tf - -from object_detection.dataset_tools.context_rcnn import add_context_to_examples -from object_detection.utils import tf_version - - -try: - import apache_beam as beam # pylint:disable=g-import-not-at-top -except ModuleNotFoundError: - pass - - -@contextlib.contextmanager -def InMemoryTFRecord(entries): - temp = tempfile.NamedTemporaryFile(delete=False) - filename = temp.name - try: - with tf.python_io.TFRecordWriter(filename) as writer: - for value in entries: - writer.write(value) - yield filename - finally: - os.unlink(temp.name) - - -def BytesFeature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def BytesListFeature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - - -def Int64Feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - -def Int64ListFeature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) - - -def FloatListFeature(value): - return tf.train.Feature(float_list=tf.train.FloatList(value=value)) - - -@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') -class GenerateContextDataTest(tf.test.TestCase): - - def _create_first_tf_example(self): - with self.test_session(): - encoded_image = tf.image.encode_jpeg( - tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval() - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': BytesFeature(encoded_image), - 'image/source_id': BytesFeature(six.ensure_binary('image_id_1')), - 'image/height': Int64Feature(4), - 'image/width': Int64Feature(4), - 'image/object/class/label': Int64ListFeature([5, 5]), - 'image/object/class/text': BytesListFeature([six.ensure_binary('hyena'), - six.ensure_binary('hyena') - ]), - 'image/object/bbox/xmin': FloatListFeature([0.0, 0.1]), - 'image/object/bbox/xmax': FloatListFeature([0.2, 0.3]), - 'image/object/bbox/ymin': FloatListFeature([0.4, 0.5]), - 'image/object/bbox/ymax': FloatListFeature([0.6, 0.7]), - 'image/seq_id': BytesFeature(six.ensure_binary('01')), - 'image/seq_num_frames': Int64Feature(2), - 'image/seq_frame_num': Int64Feature(0), - 'image/date_captured': BytesFeature( - six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 0, 0)))), - 'image/embedding': FloatListFeature([0.1, 0.2, 0.3]), - 'image/embedding_score': FloatListFeature([0.9]), - 'image/embedding_length': Int64Feature(3) - - })) - - return example.SerializeToString() - - def _create_second_tf_example(self): - with self.test_session(): - encoded_image = tf.image.encode_jpeg( - tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval() - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': BytesFeature(encoded_image), - 'image/source_id': BytesFeature(six.ensure_binary('image_id_2')), - 'image/height': Int64Feature(4), - 'image/width': Int64Feature(4), - 'image/object/class/label': Int64ListFeature([5]), - 'image/object/class/text': BytesListFeature([six.ensure_binary('hyena') - ]), - 'image/object/bbox/xmin': FloatListFeature([0.0]), - 'image/object/bbox/xmax': FloatListFeature([0.1]), - 'image/object/bbox/ymin': FloatListFeature([0.2]), - 'image/object/bbox/ymax': FloatListFeature([0.3]), - 'image/seq_id': BytesFeature(six.ensure_binary('01')), - 'image/seq_num_frames': Int64Feature(2), - 'image/seq_frame_num': Int64Feature(1), - 'image/date_captured': BytesFeature( - six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 1, 0)))), - 'image/embedding': FloatListFeature([0.4, 0.5, 0.6]), - 'image/embedding_score': FloatListFeature([0.9]), - 'image/embedding_length': Int64Feature(3) - })) - - return example.SerializeToString() - - def assert_expected_examples(self, tf_example_list): - self.assertAllEqual( - {tf_example.features.feature['image/source_id'].bytes_list.value[0] - for tf_example in tf_example_list}, - {six.ensure_binary('image_id_1'), six.ensure_binary('image_id_2')}) - self.assertAllClose( - tf_example_list[0].features.feature[ - 'image/context_features'].float_list.value, - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) - self.assertAllClose( - tf_example_list[1].features.feature[ - 'image/context_features'].float_list.value, - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) - - def assert_expected_sequence_example(self, tf_sequence_example_list): - tf_sequence_example = tf_sequence_example_list[0] - num_frames = 2 - - self.assertAllEqual( - tf_sequence_example.context.feature[ - 'clip/media_id'].bytes_list.value[0], six.ensure_binary( - '01_0')) - self.assertAllClose( - tf_sequence_example.context.feature[ - 'image/context_features'].float_list.value, - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) - - seq_feature_dict = tf_sequence_example.feature_lists.feature_list - - self.assertLen( - seq_feature_dict['image/encoded'].feature[:], - num_frames) - actual_timestamps = [ - feature.int64_list.value[0] for feature - in seq_feature_dict['image/timestamp'].feature] - timestamps = [0, 1] - self.assertAllEqual(timestamps, actual_timestamps) - - # First image. - self.assertAllClose( - [0.4, 0.5], - seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:]) - self.assertAllClose( - [0.0, 0.1], - seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:]) - self.assertAllClose( - [0.6, 0.7], - seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:]) - self.assertAllClose( - [0.2, 0.3], - seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:]) - self.assertAllEqual( - [six.ensure_binary('hyena'), six.ensure_binary('hyena')], - seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:]) - - # Second example. - self.assertAllClose( - [0.2], - seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:]) - self.assertAllClose( - [0.0], - seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:]) - self.assertAllClose( - [0.3], - seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:]) - self.assertAllClose( - [0.1], - seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:]) - self.assertAllEqual( - [six.ensure_binary('hyena')], - seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:]) - - def assert_expected_key(self, key): - self.assertAllEqual(key, b'01') - - def assert_sorted(self, example_collection): - example_list = list(example_collection) - counter = 0 - for example in example_list: - frame_num = example.features.feature[ - 'image/seq_frame_num'].int64_list.value[0] - self.assertGreaterEqual(frame_num, counter) - counter = frame_num - - def assert_context(self, example_collection): - example_list = list(example_collection) - for example in example_list: - context = example.features.feature[ - 'image/context_features'].float_list.value - self.assertAllClose([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], context) - - def assert_resized(self, example): - width = example.features.feature['image/width'].int64_list.value[0] - self.assertAllEqual(width, 2) - height = example.features.feature['image/height'].int64_list.value[0] - self.assertAllEqual(height, 2) - - def assert_size(self, example): - width = example.features.feature['image/width'].int64_list.value[0] - self.assertAllEqual(width, 4) - height = example.features.feature['image/height'].int64_list.value[0] - self.assertAllEqual(height, 4) - - def test_sliding_window(self): - example_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] - max_clip_length = 3 - stride_length = 3 - out_list = [list(i) for i in add_context_to_examples.get_sliding_window( - example_list, max_clip_length, stride_length)] - self.assertAllEqual(out_list, [['a', 'b', 'c'], - ['d', 'e', 'f'], - ['g']]) - - def test_rekey_data_fn(self): - sequence_key = 'image/seq_id' - time_horizon = None - reduce_image_size = False - max_dim = None - - rekey_fn = add_context_to_examples.ReKeyDataFn( - sequence_key, time_horizon, - reduce_image_size, max_dim) - output = rekey_fn.process(self._create_first_tf_example()) - - self.assert_expected_key(output[0][0]) - self.assert_size(output[0][1]) - - def test_rekey_data_fn_w_resize(self): - sequence_key = 'image/seq_id' - time_horizon = None - reduce_image_size = True - max_dim = 2 - - rekey_fn = add_context_to_examples.ReKeyDataFn( - sequence_key, time_horizon, - reduce_image_size, max_dim) - output = rekey_fn.process(self._create_first_tf_example()) - - self.assert_expected_key(output[0][0]) - self.assert_resized(output[0][1]) - - def test_sort_fn(self): - sequence_key = 'image/seq_id' - sorted_image_ids = False - max_num_elements_in_context_features = 10 - sort_fn = add_context_to_examples.SortGroupedDataFn( - sequence_key, sorted_image_ids, max_num_elements_in_context_features) - output = sort_fn.process( - ('dummy_key', [tf.train.Example.FromString( - self._create_second_tf_example()), - tf.train.Example.FromString( - self._create_first_tf_example())])) - - self.assert_sorted(output[0][1]) - - def test_add_context_fn(self): - sequence_key = 'image/seq_id' - add_context_features = True - image_ids_to_keep = 'All' - context_fn = add_context_to_examples.GenerateContextFn( - sequence_key, add_context_features, image_ids_to_keep) - output = context_fn.process( - ('dummy_key', [tf.train.Example.FromString( - self._create_first_tf_example()), - tf.train.Example.FromString( - self._create_second_tf_example())])) - - self.assertEqual(len(output), 2) - self.assert_context(output) - - def test_add_context_fn_output_sequence_example(self): - sequence_key = 'image/seq_id' - add_context_features = True - image_ids_to_keep = 'All' - context_fn = add_context_to_examples.GenerateContextFn( - sequence_key, add_context_features, image_ids_to_keep, - output_type='tf_sequence_example') - output = context_fn.process( - ('01', - [tf.train.Example.FromString(self._create_first_tf_example()), - tf.train.Example.FromString(self._create_second_tf_example())])) - - self.assertEqual(len(output), 1) - self.assert_expected_sequence_example(output) - - def test_add_context_fn_output_sequence_example_cliplen(self): - sequence_key = 'image/seq_id' - add_context_features = True - image_ids_to_keep = 'All' - context_fn = add_context_to_examples.GenerateContextFn( - sequence_key, add_context_features, image_ids_to_keep, - output_type='tf_sequence_example', max_clip_length=1) - output = context_fn.process( - ('01', - [tf.train.Example.FromString(self._create_first_tf_example()), - tf.train.Example.FromString(self._create_second_tf_example())])) - self.assertEqual(len(output), 2) - - def test_beam_pipeline(self): - with InMemoryTFRecord( - [self._create_first_tf_example(), - self._create_second_tf_example()]) as input_tfrecord: - temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) - output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') - sequence_key = six.ensure_binary('image/seq_id') - max_num_elements = 10 - num_shards = 1 - pipeline_options = beam.options.pipeline_options.PipelineOptions( - runner='DirectRunner') - p = beam.Pipeline(options=pipeline_options) - add_context_to_examples.construct_pipeline( - p, - input_tfrecord, - output_tfrecord, - sequence_key, - max_num_elements_in_context_features=max_num_elements, - num_shards=num_shards) - p.run() - filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') - actual_output = [] - record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) - for record in record_iterator: - actual_output.append(record) - self.assertEqual(len(actual_output), 2) - self.assert_expected_examples([tf.train.Example.FromString( - tf_example) for tf_example in actual_output]) - - def test_beam_pipeline_sequence_example(self): - with InMemoryTFRecord( - [self._create_first_tf_example(), - self._create_second_tf_example()]) as input_tfrecord: - temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) - output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') - sequence_key = six.ensure_binary('image/seq_id') - max_num_elements = 10 - num_shards = 1 - pipeline_options = beam.options.pipeline_options.PipelineOptions( - runner='DirectRunner') - p = beam.Pipeline(options=pipeline_options) - add_context_to_examples.construct_pipeline( - p, - input_tfrecord, - output_tfrecord, - sequence_key, - max_num_elements_in_context_features=max_num_elements, - num_shards=num_shards, - output_type='tf_sequence_example') - p.run() - filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') - actual_output = [] - record_iterator = tf.python_io.tf_record_iterator( - path=filenames[0]) - for record in record_iterator: - actual_output.append(record) - self.assertEqual(len(actual_output), 1) - self.assert_expected_sequence_example( - [tf.train.SequenceExample.FromString( - tf_example) for tf_example in actual_output]) - -if __name__ == '__main__': - tf.test.main() diff --git a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf2_test.py b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..61020b008bcf464630a55a29d56928e6b8cd41cf --- /dev/null +++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf2_test.py @@ -0,0 +1,398 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for add_context_to_examples.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import contextlib +import datetime +import os +import tempfile +import unittest + +import numpy as np +import six +import tensorflow as tf + +from object_detection.utils import tf_version + +if tf_version.is_tf2(): + from object_detection.dataset_tools.context_rcnn import add_context_to_examples # pylint:disable=g-import-not-at-top + +try: + import apache_beam as beam # pylint:disable=g-import-not-at-top +except ModuleNotFoundError: + pass + + +@contextlib.contextmanager +def InMemoryTFRecord(entries): + temp = tempfile.NamedTemporaryFile(delete=False) + filename = temp.name + try: + with tf.io.TFRecordWriter(filename) as writer: + for value in entries: + writer.write(value) + yield filename + finally: + os.unlink(temp.name) + + +def BytesFeature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def BytesListFeature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + + +def Int64Feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + +def Int64ListFeature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + +def FloatListFeature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class GenerateContextDataTest(tf.test.TestCase): + + def _create_first_tf_example(self): + encoded_image = tf.io.encode_jpeg( + tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).numpy() + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/encoded': BytesFeature(encoded_image), + 'image/source_id': BytesFeature(six.ensure_binary('image_id_1')), + 'image/height': Int64Feature(4), + 'image/width': Int64Feature(4), + 'image/object/class/label': Int64ListFeature([5, 5]), + 'image/object/class/text': BytesListFeature([six.ensure_binary('hyena'), + six.ensure_binary('hyena') + ]), + 'image/object/bbox/xmin': FloatListFeature([0.0, 0.1]), + 'image/object/bbox/xmax': FloatListFeature([0.2, 0.3]), + 'image/object/bbox/ymin': FloatListFeature([0.4, 0.5]), + 'image/object/bbox/ymax': FloatListFeature([0.6, 0.7]), + 'image/seq_id': BytesFeature(six.ensure_binary('01')), + 'image/seq_num_frames': Int64Feature(2), + 'image/seq_frame_num': Int64Feature(0), + 'image/date_captured': BytesFeature( + six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 0, 0)))), + 'image/embedding': FloatListFeature([0.1, 0.2, 0.3]), + 'image/embedding_score': FloatListFeature([0.9]), + 'image/embedding_length': Int64Feature(3), + 'image/embedding_count': Int64Feature(1) + + })) + + return example.SerializeToString() + + def _create_second_tf_example(self): + encoded_image = tf.io.encode_jpeg( + tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).numpy() + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/encoded': BytesFeature(encoded_image), + 'image/source_id': BytesFeature(six.ensure_binary('image_id_2')), + 'image/height': Int64Feature(4), + 'image/width': Int64Feature(4), + 'image/object/class/label': Int64ListFeature([5]), + 'image/object/class/text': BytesListFeature([six.ensure_binary('hyena') + ]), + 'image/object/bbox/xmin': FloatListFeature([0.0]), + 'image/object/bbox/xmax': FloatListFeature([0.1]), + 'image/object/bbox/ymin': FloatListFeature([0.2]), + 'image/object/bbox/ymax': FloatListFeature([0.3]), + 'image/seq_id': BytesFeature(six.ensure_binary('01')), + 'image/seq_num_frames': Int64Feature(2), + 'image/seq_frame_num': Int64Feature(1), + 'image/date_captured': BytesFeature( + six.ensure_binary(str(datetime.datetime(2020, 1, 1, 1, 1, 0)))), + 'image/embedding': FloatListFeature([0.4, 0.5, 0.6]), + 'image/embedding_score': FloatListFeature([0.9]), + 'image/embedding_length': Int64Feature(3), + 'image/embedding_count': Int64Feature(1) + })) + + return example.SerializeToString() + + def assert_expected_examples(self, tf_example_list): + self.assertAllEqual( + {tf_example.features.feature['image/source_id'].bytes_list.value[0] + for tf_example in tf_example_list}, + {six.ensure_binary('image_id_1'), six.ensure_binary('image_id_2')}) + self.assertAllClose( + tf_example_list[0].features.feature[ + 'image/context_features'].float_list.value, + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) + self.assertAllClose( + tf_example_list[1].features.feature[ + 'image/context_features'].float_list.value, + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) + + def assert_expected_sequence_example(self, tf_sequence_example_list): + tf_sequence_example = tf_sequence_example_list[0] + num_frames = 2 + + self.assertAllEqual( + tf_sequence_example.context.feature[ + 'clip/media_id'].bytes_list.value[0], six.ensure_binary( + '01_0')) + self.assertAllClose( + tf_sequence_example.context.feature[ + 'image/context_features'].float_list.value, + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]) + + seq_feature_dict = tf_sequence_example.feature_lists.feature_list + + self.assertLen( + seq_feature_dict['image/encoded'].feature[:], + num_frames) + actual_timestamps = [ + feature.int64_list.value[0] for feature + in seq_feature_dict['image/timestamp'].feature] + timestamps = [0, 1] + self.assertAllEqual(timestamps, actual_timestamps) + + # First image. + self.assertAllClose( + [0.4, 0.5], + seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:]) + self.assertAllClose( + [0.0, 0.1], + seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:]) + self.assertAllClose( + [0.6, 0.7], + seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:]) + self.assertAllClose( + [0.2, 0.3], + seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:]) + self.assertAllEqual( + [six.ensure_binary('hyena'), six.ensure_binary('hyena')], + seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:]) + + # Second example. + self.assertAllClose( + [0.2], + seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:]) + self.assertAllClose( + [0.0], + seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:]) + self.assertAllClose( + [0.3], + seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:]) + self.assertAllClose( + [0.1], + seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:]) + self.assertAllEqual( + [six.ensure_binary('hyena')], + seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:]) + + def assert_expected_key(self, key): + self.assertAllEqual(key, b'01') + + def assert_sorted(self, example_collection): + example_list = list(example_collection) + counter = 0 + for example in example_list: + frame_num = example.features.feature[ + 'image/seq_frame_num'].int64_list.value[0] + self.assertGreaterEqual(frame_num, counter) + counter = frame_num + + def assert_context(self, example_collection): + example_list = list(example_collection) + for example in example_list: + context = example.features.feature[ + 'image/context_features'].float_list.value + self.assertAllClose([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], context) + + def assert_resized(self, example): + width = example.features.feature['image/width'].int64_list.value[0] + self.assertAllEqual(width, 2) + height = example.features.feature['image/height'].int64_list.value[0] + self.assertAllEqual(height, 2) + + def assert_size(self, example): + width = example.features.feature['image/width'].int64_list.value[0] + self.assertAllEqual(width, 4) + height = example.features.feature['image/height'].int64_list.value[0] + self.assertAllEqual(height, 4) + + def test_sliding_window(self): + example_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + max_clip_length = 3 + stride_length = 3 + out_list = [list(i) for i in add_context_to_examples.get_sliding_window( + example_list, max_clip_length, stride_length)] + self.assertAllEqual(out_list, [['a', 'b', 'c'], + ['d', 'e', 'f'], + ['g']]) + + def test_rekey_data_fn(self): + sequence_key = 'image/seq_id' + time_horizon = None + reduce_image_size = False + max_dim = None + + rekey_fn = add_context_to_examples.ReKeyDataFn( + sequence_key, time_horizon, + reduce_image_size, max_dim) + output = rekey_fn.process(self._create_first_tf_example()) + + self.assert_expected_key(output[0][0]) + self.assert_size(output[0][1]) + + def test_rekey_data_fn_w_resize(self): + sequence_key = 'image/seq_id' + time_horizon = None + reduce_image_size = True + max_dim = 2 + + rekey_fn = add_context_to_examples.ReKeyDataFn( + sequence_key, time_horizon, + reduce_image_size, max_dim) + output = rekey_fn.process(self._create_first_tf_example()) + + self.assert_expected_key(output[0][0]) + self.assert_resized(output[0][1]) + + def test_sort_fn(self): + sequence_key = 'image/seq_id' + sorted_image_ids = False + max_num_elements_in_context_features = 10 + sort_fn = add_context_to_examples.SortGroupedDataFn( + sequence_key, sorted_image_ids, max_num_elements_in_context_features) + output = sort_fn.process( + ('dummy_key', [tf.train.Example.FromString( + self._create_second_tf_example()), + tf.train.Example.FromString( + self._create_first_tf_example())])) + + self.assert_sorted(output[0][1]) + + def test_add_context_fn(self): + sequence_key = 'image/seq_id' + add_context_features = True + image_ids_to_keep = 'All' + context_fn = add_context_to_examples.GenerateContextFn( + sequence_key, add_context_features, image_ids_to_keep) + output = context_fn.process( + ('dummy_key', [tf.train.Example.FromString( + self._create_first_tf_example()), + tf.train.Example.FromString( + self._create_second_tf_example())])) + + self.assertEqual(len(output), 2) + self.assert_context(output) + + def test_add_context_fn_output_sequence_example(self): + sequence_key = 'image/seq_id' + add_context_features = True + image_ids_to_keep = 'All' + context_fn = add_context_to_examples.GenerateContextFn( + sequence_key, add_context_features, image_ids_to_keep, + output_type='tf_sequence_example') + output = context_fn.process( + ('01', + [tf.train.Example.FromString(self._create_first_tf_example()), + tf.train.Example.FromString(self._create_second_tf_example())])) + + self.assertEqual(len(output), 1) + self.assert_expected_sequence_example(output) + + def test_add_context_fn_output_sequence_example_cliplen(self): + sequence_key = 'image/seq_id' + add_context_features = True + image_ids_to_keep = 'All' + context_fn = add_context_to_examples.GenerateContextFn( + sequence_key, add_context_features, image_ids_to_keep, + output_type='tf_sequence_example', max_clip_length=1) + output = context_fn.process( + ('01', + [tf.train.Example.FromString(self._create_first_tf_example()), + tf.train.Example.FromString(self._create_second_tf_example())])) + self.assertEqual(len(output), 2) + + def test_beam_pipeline(self): + with InMemoryTFRecord( + [self._create_first_tf_example(), + self._create_second_tf_example()]) as input_tfrecord: + temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) + output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') + sequence_key = six.ensure_binary('image/seq_id') + max_num_elements = 10 + num_shards = 1 + pipeline_options = beam.options.pipeline_options.PipelineOptions( + runner='DirectRunner') + p = beam.Pipeline(options=pipeline_options) + add_context_to_examples.construct_pipeline( + p, + input_tfrecord, + output_tfrecord, + sequence_key, + max_num_elements_in_context_features=max_num_elements, + num_shards=num_shards) + p.run() + filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') + actual_output = [] + record_iterator = tf.data.TFRecordDataset( + tf.convert_to_tensor(filenames)).as_numpy_iterator() + for record in record_iterator: + actual_output.append(record) + self.assertEqual(len(actual_output), 2) + self.assert_expected_examples([tf.train.Example.FromString( + tf_example) for tf_example in actual_output]) + + def test_beam_pipeline_sequence_example(self): + with InMemoryTFRecord( + [self._create_first_tf_example(), + self._create_second_tf_example()]) as input_tfrecord: + temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) + output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') + sequence_key = six.ensure_binary('image/seq_id') + max_num_elements = 10 + num_shards = 1 + pipeline_options = beam.options.pipeline_options.PipelineOptions( + runner='DirectRunner') + p = beam.Pipeline(options=pipeline_options) + add_context_to_examples.construct_pipeline( + p, + input_tfrecord, + output_tfrecord, + sequence_key, + max_num_elements_in_context_features=max_num_elements, + num_shards=num_shards, + output_type='tf_sequence_example') + p.run() + filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') + actual_output = [] + record_iterator = tf.data.TFRecordDataset( + tf.convert_to_tensor(filenames)).as_numpy_iterator() + for record in record_iterator: + actual_output.append(record) + self.assertEqual(len(actual_output), 1) + self.assert_expected_sequence_example( + [tf.train.SequenceExample.FromString( + tf_example) for tf_example in actual_output]) + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py index bafc406be12ad3f7a8e4d83cde3707bc0b48b23e..dbf3cad0eacaa4883aba340e34bb623a96d3af50 100644 --- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py +++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py @@ -37,11 +37,10 @@ import argparse import hashlib import io import json -import logging import os import numpy as np import PIL.Image -import tensorflow.compat.v1 as tf +import tensorflow as tf from object_detection.utils import dataset_util try: @@ -110,16 +109,9 @@ class ParseImage(beam.DoFn): encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) - # Ensure the image can be read by tf - with tf.Graph().as_default(): - image = tf.image.decode_jpeg(encoded_jpg, channels=3) - init_op = tf.initialize_all_tables() - with tf.Session() as sess: - sess.run(init_op) - sess.run(image) - except Exception as e: # pylint: disable=broad-except + image = tf.io.decode_jpeg(encoded_jpg, channels=3) + except Exception: # pylint: disable=broad-except # The image file is missing or corrupt - tf.logging.error(str(e)) return [] key = hashlib.sha256(encoded_jpg).hexdigest() @@ -257,8 +249,6 @@ def create_pipeline(pipeline, keep_bboxes: Whether to keep any bounding boxes that exist in the json file """ - logging.info('Reading data from COCO-CameraTraps Dataset.') - data = load_json_data(input_annotations_file) num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard)) diff --git a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py deleted file mode 100644 index 19018a3a1158d3fdebd9b81f5af3e24b6327cd74..0000000000000000000000000000000000000000 --- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for create_cococameratraps_tfexample_main.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import datetime -import json -import os -import tempfile -import unittest - -import numpy as np - -from PIL import Image -import tensorflow.compat.v1 as tf -from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main -from object_detection.utils import tf_version - -try: - import apache_beam as beam # pylint:disable=g-import-not-at-top -except ModuleNotFoundError: - pass - - -@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') -class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): - - IMAGE_HEIGHT = 360 - IMAGE_WIDTH = 480 - - def _write_random_images_to_directory(self, directory, num_frames): - for frame_num in range(num_frames): - img = np.random.randint(0, high=256, - size=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 3), - dtype=np.uint8) - pil_image = Image.fromarray(img) - fname = 'im_' + str(frame_num) + '.jpg' - pil_image.save(os.path.join(directory, fname), 'JPEG') - - def _create_json_file(self, directory, num_frames, keep_bboxes=False): - json_dict = {'images': [], 'annotations': []} - json_dict['categories'] = [{'id': 0, 'name': 'empty'}, - {'id': 1, 'name': 'animal'}] - for idx in range(num_frames): - im = {'id': 'im_' + str(idx), - 'file_name': 'im_' + str(idx) + '.jpg', - 'height': self.IMAGE_HEIGHT, - 'width': self.IMAGE_WIDTH, - 'seq_id': 'seq_1', - 'seq_num_frames': num_frames, - 'frame_num': idx, - 'location': 'loc_' + str(idx), - 'date_captured': str(datetime.datetime.now()) - } - json_dict['images'].append(im) - ann = {'id': 'ann' + str(idx), - 'image_id': 'im_' + str(idx), - 'category_id': 1, - } - if keep_bboxes: - ann['bbox'] = [0.0 * self.IMAGE_WIDTH, - 0.1 * self.IMAGE_HEIGHT, - 0.5 * self.IMAGE_WIDTH, - 0.5 * self.IMAGE_HEIGHT] - json_dict['annotations'].append(ann) - - json_path = os.path.join(directory, 'test_file.json') - with tf.io.gfile.GFile(json_path, 'w') as f: - json.dump(json_dict, f) - return json_path - - def assert_expected_example_bbox(self, example): - self.assertAllClose( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.1]) - self.assertAllClose( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.0]) - self.assertAllClose( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.6]) - self.assertAllClose( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.5]) - self.assertAllClose( - example.features.feature['image/object/class/label'] - .int64_list.value, [1]) - self.assertAllEqual( - example.features.feature['image/object/class/text'] - .bytes_list.value, [b'animal']) - self.assertAllClose( - example.features.feature['image/class/label'] - .int64_list.value, [1]) - self.assertAllEqual( - example.features.feature['image/class/text'] - .bytes_list.value, [b'animal']) - - # Check other essential attributes. - self.assertAllEqual( - example.features.feature['image/height'].int64_list.value, - [self.IMAGE_HEIGHT]) - self.assertAllEqual( - example.features.feature['image/width'].int64_list.value, - [self.IMAGE_WIDTH]) - self.assertAllEqual( - example.features.feature['image/source_id'].bytes_list.value, - [b'im_0']) - self.assertTrue( - example.features.feature['image/encoded'].bytes_list.value) - - def assert_expected_example(self, example): - self.assertAllClose( - example.features.feature['image/object/bbox/ymin'].float_list.value, - []) - self.assertAllClose( - example.features.feature['image/object/bbox/xmin'].float_list.value, - []) - self.assertAllClose( - example.features.feature['image/object/bbox/ymax'].float_list.value, - []) - self.assertAllClose( - example.features.feature['image/object/bbox/xmax'].float_list.value, - []) - self.assertAllClose( - example.features.feature['image/object/class/label'] - .int64_list.value, [1]) - self.assertAllEqual( - example.features.feature['image/object/class/text'] - .bytes_list.value, [b'animal']) - self.assertAllClose( - example.features.feature['image/class/label'] - .int64_list.value, [1]) - self.assertAllEqual( - example.features.feature['image/class/text'] - .bytes_list.value, [b'animal']) - - # Check other essential attributes. - self.assertAllEqual( - example.features.feature['image/height'].int64_list.value, - [self.IMAGE_HEIGHT]) - self.assertAllEqual( - example.features.feature['image/width'].int64_list.value, - [self.IMAGE_WIDTH]) - self.assertAllEqual( - example.features.feature['image/source_id'].bytes_list.value, - [b'im_0']) - self.assertTrue( - example.features.feature['image/encoded'].bytes_list.value) - - def test_beam_pipeline(self): - num_frames = 1 - temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) - json_path = self._create_json_file(temp_dir, num_frames) - output_tfrecord = temp_dir+'/output' - self._write_random_images_to_directory(temp_dir, num_frames) - pipeline_options = beam.options.pipeline_options.PipelineOptions( - runner='DirectRunner') - p = beam.Pipeline(options=pipeline_options) - create_cococameratraps_tfexample_main.create_pipeline( - p, temp_dir, json_path, - output_tfrecord_prefix=output_tfrecord) - p.run() - filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') - actual_output = [] - record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) - for record in record_iterator: - actual_output.append(record) - self.assertEqual(len(actual_output), num_frames) - self.assert_expected_example(tf.train.Example.FromString( - actual_output[0])) - - def test_beam_pipeline_bbox(self): - num_frames = 1 - temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) - json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True) - output_tfrecord = temp_dir+'/output' - self._write_random_images_to_directory(temp_dir, num_frames) - pipeline_options = beam.options.pipeline_options.PipelineOptions( - runner='DirectRunner') - p = beam.Pipeline(options=pipeline_options) - create_cococameratraps_tfexample_main.create_pipeline( - p, temp_dir, json_path, - output_tfrecord_prefix=output_tfrecord, - keep_bboxes=True) - p.run() - filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????') - actual_output = [] - record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) - for record in record_iterator: - actual_output.append(record) - self.assertEqual(len(actual_output), num_frames) - self.assert_expected_example_bbox(tf.train.Example.FromString( - actual_output[0])) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf2_test.py b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..0a1ac203f334574a3b09654fd736047b8236fa38 --- /dev/null +++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf2_test.py @@ -0,0 +1,214 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for create_cococameratraps_tfexample_main.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import datetime +import json +import os +import tempfile +import unittest + +import numpy as np + +from PIL import Image +import tensorflow as tf +from object_detection.utils import tf_version + +if tf_version.is_tf2(): + from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main # pylint:disable=g-import-not-at-top + +try: + import apache_beam as beam # pylint:disable=g-import-not-at-top +except ModuleNotFoundError: + pass + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase): + + IMAGE_HEIGHT = 360 + IMAGE_WIDTH = 480 + + def _write_random_images_to_directory(self, directory, num_frames): + for frame_num in range(num_frames): + img = np.random.randint(0, high=256, + size=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 3), + dtype=np.uint8) + pil_image = Image.fromarray(img) + fname = 'im_' + str(frame_num) + '.jpg' + pil_image.save(os.path.join(directory, fname), 'JPEG') + + def _create_json_file(self, directory, num_frames, keep_bboxes=False): + json_dict = {'images': [], 'annotations': []} + json_dict['categories'] = [{'id': 0, 'name': 'empty'}, + {'id': 1, 'name': 'animal'}] + for idx in range(num_frames): + im = {'id': 'im_' + str(idx), + 'file_name': 'im_' + str(idx) + '.jpg', + 'height': self.IMAGE_HEIGHT, + 'width': self.IMAGE_WIDTH, + 'seq_id': 'seq_1', + 'seq_num_frames': num_frames, + 'frame_num': idx, + 'location': 'loc_' + str(idx), + 'date_captured': str(datetime.datetime.now()) + } + json_dict['images'].append(im) + ann = {'id': 'ann' + str(idx), + 'image_id': 'im_' + str(idx), + 'category_id': 1, + } + if keep_bboxes: + ann['bbox'] = [0.0 * self.IMAGE_WIDTH, + 0.1 * self.IMAGE_HEIGHT, + 0.5 * self.IMAGE_WIDTH, + 0.5 * self.IMAGE_HEIGHT] + json_dict['annotations'].append(ann) + + json_path = os.path.join(directory, 'test_file.json') + with tf.io.gfile.GFile(json_path, 'w') as f: + json.dump(json_dict, f) + return json_path + + def assert_expected_example_bbox(self, example): + self.assertAllClose( + example.features.feature['image/object/bbox/ymin'].float_list.value, + [0.1]) + self.assertAllClose( + example.features.feature['image/object/bbox/xmin'].float_list.value, + [0.0]) + self.assertAllClose( + example.features.feature['image/object/bbox/ymax'].float_list.value, + [0.6]) + self.assertAllClose( + example.features.feature['image/object/bbox/xmax'].float_list.value, + [0.5]) + self.assertAllClose( + example.features.feature['image/object/class/label'] + .int64_list.value, [1]) + self.assertAllEqual( + example.features.feature['image/object/class/text'] + .bytes_list.value, [b'animal']) + self.assertAllClose( + example.features.feature['image/class/label'] + .int64_list.value, [1]) + self.assertAllEqual( + example.features.feature['image/class/text'] + .bytes_list.value, [b'animal']) + + # Check other essential attributes. + self.assertAllEqual( + example.features.feature['image/height'].int64_list.value, + [self.IMAGE_HEIGHT]) + self.assertAllEqual( + example.features.feature['image/width'].int64_list.value, + [self.IMAGE_WIDTH]) + self.assertAllEqual( + example.features.feature['image/source_id'].bytes_list.value, + [b'im_0']) + self.assertTrue( + example.features.feature['image/encoded'].bytes_list.value) + + def assert_expected_example(self, example): + self.assertAllClose( + example.features.feature['image/object/bbox/ymin'].float_list.value, + []) + self.assertAllClose( + example.features.feature['image/object/bbox/xmin'].float_list.value, + []) + self.assertAllClose( + example.features.feature['image/object/bbox/ymax'].float_list.value, + []) + self.assertAllClose( + example.features.feature['image/object/bbox/xmax'].float_list.value, + []) + self.assertAllClose( + example.features.feature['image/object/class/label'] + .int64_list.value, [1]) + self.assertAllEqual( + example.features.feature['image/object/class/text'] + .bytes_list.value, [b'animal']) + self.assertAllClose( + example.features.feature['image/class/label'] + .int64_list.value, [1]) + self.assertAllEqual( + example.features.feature['image/class/text'] + .bytes_list.value, [b'animal']) + + # Check other essential attributes. + self.assertAllEqual( + example.features.feature['image/height'].int64_list.value, + [self.IMAGE_HEIGHT]) + self.assertAllEqual( + example.features.feature['image/width'].int64_list.value, + [self.IMAGE_WIDTH]) + self.assertAllEqual( + example.features.feature['image/source_id'].bytes_list.value, + [b'im_0']) + self.assertTrue( + example.features.feature['image/encoded'].bytes_list.value) + + def test_beam_pipeline(self): + num_frames = 1 + temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) + json_path = self._create_json_file(temp_dir, num_frames) + output_tfrecord = temp_dir+'/output' + self._write_random_images_to_directory(temp_dir, num_frames) + pipeline_options = beam.options.pipeline_options.PipelineOptions( + runner='DirectRunner') + p = beam.Pipeline(options=pipeline_options) + create_cococameratraps_tfexample_main.create_pipeline( + p, temp_dir, json_path, + output_tfrecord_prefix=output_tfrecord) + p.run() + filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') + actual_output = [] + record_iterator = tf.data.TFRecordDataset( + tf.convert_to_tensor(filenames)).as_numpy_iterator() + for record in record_iterator: + actual_output.append(record) + self.assertEqual(len(actual_output), num_frames) + self.assert_expected_example(tf.train.Example.FromString( + actual_output[0])) + + def test_beam_pipeline_bbox(self): + num_frames = 1 + temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) + json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True) + output_tfrecord = temp_dir+'/output' + self._write_random_images_to_directory(temp_dir, num_frames) + pipeline_options = beam.options.pipeline_options.PipelineOptions( + runner='DirectRunner') + p = beam.Pipeline(options=pipeline_options) + create_cococameratraps_tfexample_main.create_pipeline( + p, temp_dir, json_path, + output_tfrecord_prefix=output_tfrecord, + keep_bboxes=True) + p.run() + filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????') + actual_output = [] + record_iterator = tf.data.TFRecordDataset( + tf.convert_to_tensor(filenames)).as_numpy_iterator() + for record in record_iterator: + actual_output.append(record) + self.assertEqual(len(actual_output), num_frames) + self.assert_expected_example_bbox(tf.train.Example.FromString( + actual_output[0])) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py index aafac9edf8388e33e95ebe9e61c1fa424930676a..c826873802f09ffbc48788576eb9c02038ceeb65 100644 --- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py +++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py @@ -48,7 +48,8 @@ from __future__ import print_function import argparse import os import threading -import tensorflow.compat.v1 as tf +import tensorflow as tf + try: import apache_beam as beam # pylint:disable=g-import-not-at-top except ModuleNotFoundError: @@ -77,7 +78,7 @@ class GenerateDetectionDataFn(beam.DoFn): self._num_examples_processed = beam.metrics.Metrics.counter( 'detection_data_generation', 'num_tf_examples_processed') - def start_bundle(self): + def setup(self): self._load_inference_model() def _load_inference_model(self): @@ -85,22 +86,7 @@ class GenerateDetectionDataFn(beam.DoFn): # one instance across all threads in the worker. This is possible since # tf.Session.run() is thread safe. with self.session_lock: - if self._session is None: - graph = tf.Graph() - self._session = tf.Session(graph=graph) - with graph.as_default(): - meta_graph = tf.saved_model.loader.load( - self._session, [tf.saved_model.tag_constants.SERVING], - self._model_dir) - signature = meta_graph.signature_def['serving_default'] - input_tensor_name = signature.inputs['inputs'].name - self._input = graph.get_tensor_by_name(input_tensor_name) - self._boxes_node = graph.get_tensor_by_name( - signature.outputs['detection_boxes'].name) - self._scores_node = graph.get_tensor_by_name( - signature.outputs['detection_scores'].name) - self._num_detections_node = graph.get_tensor_by_name( - signature.outputs['num_detections'].name) + self._detect_fn = tf.saved_model.load(self._model_dir) def process(self, tfrecord_entry): return self._run_inference_and_generate_detections(tfrecord_entry) @@ -112,9 +98,11 @@ class GenerateDetectionDataFn(beam.DoFn): # There are already ground truth boxes for this image, just keep them. return [input_example] - detection_boxes, detection_scores, num_detections = self._session.run( - [self._boxes_node, self._scores_node, self._num_detections_node], - feed_dict={self._input: [tfrecord_entry]}) + detections = self._detect_fn.signatures['serving_default']( + (tf.expand_dims(tf.convert_to_tensor(tfrecord_entry), 0))) + detection_boxes = detections['detection_boxes'] + num_detections = detections['num_detections'] + detection_scores = detections['detection_scores'] example = tf.train.Example() diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py deleted file mode 100644 index 545e832338f156d7cc6426bdc92e36350be1c24c..0000000000000000000000000000000000000000 --- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py +++ /dev/null @@ -1,276 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for generate_detection_data.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import os -import tempfile -import unittest -import numpy as np -import six -import tensorflow.compat.v1 as tf - -from object_detection import exporter -from object_detection.builders import model_builder -from object_detection.core import model -from object_detection.dataset_tools.context_rcnn import generate_detection_data -from object_detection.protos import pipeline_pb2 -from object_detection.utils import tf_version - -if six.PY2: - import mock # pylint: disable=g-import-not-at-top -else: - mock = unittest.mock - -try: - import apache_beam as beam # pylint:disable=g-import-not-at-top -except ModuleNotFoundError: - pass - - -class FakeModel(model.DetectionModel): - """A Fake Detection model with expected output nodes from post-processing.""" - - def preprocess(self, inputs): - true_image_shapes = [] # Doesn't matter for the fake model. - return tf.identity(inputs), true_image_shapes - - def predict(self, preprocessed_inputs, true_image_shapes): - return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)} - - def postprocess(self, prediction_dict, true_image_shapes): - with tf.control_dependencies(prediction_dict.values()): - postprocessed_tensors = { - 'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6], - [0.5, 0.5, 0.8, 0.8]]], tf.float32), - 'detection_scores': tf.constant([[0.95, 0.6]], tf.float32), - 'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2], - [0.3, 0.1, 0.6]]], - tf.float32), - 'detection_classes': tf.constant([[0, 1]], tf.float32), - 'num_detections': tf.constant([2], tf.float32) - } - return postprocessed_tensors - - def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): - pass - - def restore_from_objects(self, fine_tune_checkpoint_type): - pass - - def loss(self, prediction_dict, true_image_shapes): - pass - - def regularization_losses(self): - pass - - def updates(self): - pass - - -@contextlib.contextmanager -def InMemoryTFRecord(entries): - temp = tempfile.NamedTemporaryFile(delete=False) - filename = temp.name - try: - with tf.python_io.TFRecordWriter(filename) as writer: - for value in entries: - writer.write(value) - yield filename - finally: - os.unlink(filename) - - -@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') -class GenerateDetectionDataTest(tf.test.TestCase): - - def _save_checkpoint_from_mock_model(self, checkpoint_path): - """A function to save checkpoint from a fake Detection Model. - - Args: - checkpoint_path: Path to save checkpoint from Fake model. - """ - g = tf.Graph() - with g.as_default(): - mock_model = FakeModel(num_classes=5) - preprocessed_inputs, true_image_shapes = mock_model.preprocess( - tf.placeholder(tf.float32, shape=[None, None, None, 3])) - predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) - mock_model.postprocess(predictions, true_image_shapes) - tf.train.get_or_create_global_step() - saver = tf.train.Saver() - init = tf.global_variables_initializer() - with self.test_session(graph=g) as sess: - sess.run(init) - saver.save(sess, checkpoint_path) - - def _export_saved_model(self): - tmp_dir = self.get_temp_dir() - checkpoint_path = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(checkpoint_path) - output_directory = os.path.join(tmp_dir, 'output') - saved_model_path = os.path.join(output_directory, 'saved_model') - tf.io.gfile.makedirs(output_directory) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(num_classes=5) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - detection_model = model_builder.build(pipeline_config.model, - is_training=False) - outputs, placeholder_tensor = exporter.build_detection_graph( - input_type='tf_example', - detection_model=detection_model, - input_shape=None, - output_collection_name='inference_op', - graph_hook_fn=None) - output_node_names = ','.join(outputs.keys()) - saver = tf.train.Saver() - input_saver_def = saver.as_saver_def() - frozen_graph_def = exporter.freeze_graph_with_def_protos( - input_graph_def=tf.get_default_graph().as_graph_def(), - input_saver_def=input_saver_def, - input_checkpoint=checkpoint_path, - output_node_names=output_node_names, - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - output_graph='', - clear_devices=True, - initializer_nodes='') - exporter.write_saved_model( - saved_model_path=saved_model_path, - frozen_graph_def=frozen_graph_def, - inputs=placeholder_tensor, - outputs=outputs) - return saved_model_path - - def _create_tf_example(self): - with self.test_session(): - encoded_image = tf.image.encode_jpeg( - tf.constant(np.ones((4, 6, 3)).astype(np.uint8))).eval() - - def BytesFeature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - def Int64Feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': BytesFeature(encoded_image), - 'image/source_id': BytesFeature(b'image_id'), - 'image/height': Int64Feature(4), - 'image/width': Int64Feature(6), - 'image/object/class/label': Int64Feature(5), - 'image/object/class/text': BytesFeature(b'hyena'), - 'image/class/label': Int64Feature(5), - 'image/class/text': BytesFeature(b'hyena'), - })) - - return example.SerializeToString() - - def assert_expected_example(self, example): - self.assertAllClose( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.0]) - self.assertAllClose( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.1]) - self.assertAllClose( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.5]) - self.assertAllClose( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.6]) - self.assertAllClose( - example.features.feature['image/object/class/score'] - .float_list.value, [0.95]) - self.assertAllClose( - example.features.feature['image/object/class/label'] - .int64_list.value, [5]) - self.assertAllEqual( - example.features.feature['image/object/class/text'] - .bytes_list.value, [b'hyena']) - self.assertAllClose( - example.features.feature['image/class/label'] - .int64_list.value, [5]) - self.assertAllEqual( - example.features.feature['image/class/text'] - .bytes_list.value, [b'hyena']) - - # Check other essential attributes. - self.assertAllEqual( - example.features.feature['image/height'].int64_list.value, [4]) - self.assertAllEqual( - example.features.feature['image/width'].int64_list.value, [6]) - self.assertAllEqual( - example.features.feature['image/source_id'].bytes_list.value, - [b'image_id']) - self.assertTrue( - example.features.feature['image/encoded'].bytes_list.value) - - def test_generate_detection_data_fn(self): - saved_model_path = self._export_saved_model() - confidence_threshold = 0.8 - inference_fn = generate_detection_data.GenerateDetectionDataFn( - saved_model_path, confidence_threshold) - inference_fn.start_bundle() - generated_example = self._create_tf_example() - self.assertAllEqual(tf.train.Example.FromString( - generated_example).features.feature['image/object/class/label'] - .int64_list.value, [5]) - self.assertAllEqual(tf.train.Example.FromString( - generated_example).features.feature['image/object/class/text'] - .bytes_list.value, [b'hyena']) - output = inference_fn.process(generated_example) - output_example = output[0] - - self.assertAllEqual( - output_example.features.feature['image/object/class/label'] - .int64_list.value, [5]) - self.assertAllEqual(output_example.features.feature['image/width'] - .int64_list.value, [6]) - - self.assert_expected_example(output_example) - - def test_beam_pipeline(self): - with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: - temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) - output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') - saved_model_path = self._export_saved_model() - confidence_threshold = 0.8 - num_shards = 1 - pipeline_options = beam.options.pipeline_options.PipelineOptions( - runner='DirectRunner') - p = beam.Pipeline(options=pipeline_options) - generate_detection_data.construct_pipeline( - p, input_tfrecord, output_tfrecord, saved_model_path, - confidence_threshold, num_shards) - p.run() - filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') - actual_output = [] - record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) - for record in record_iterator: - actual_output.append(record) - self.assertEqual(len(actual_output), 1) - self.assert_expected_example(tf.train.Example.FromString( - actual_output[0])) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf2_test.py b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3350eb2df7ff51147434b019658d8588ab1521d5 --- /dev/null +++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf2_test.py @@ -0,0 +1,260 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for generate_detection_data.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import os +import tempfile +import unittest +import numpy as np +import six +import tensorflow as tf + +from object_detection import exporter_lib_v2 +from object_detection.builders import model_builder +from object_detection.core import model +from object_detection.protos import pipeline_pb2 +from object_detection.utils import tf_version + +if tf_version.is_tf2(): + from object_detection.dataset_tools.context_rcnn import generate_detection_data # pylint:disable=g-import-not-at-top + +if six.PY2: + import mock # pylint: disable=g-import-not-at-top +else: + mock = unittest.mock + +try: + import apache_beam as beam # pylint:disable=g-import-not-at-top +except ModuleNotFoundError: + pass + + +class FakeModel(model.DetectionModel): + + def __init__(self, conv_weight_scalar=1.0): + super(FakeModel, self).__init__(num_classes=5) + self._conv = tf.keras.layers.Conv2D( + filters=1, kernel_size=1, strides=(1, 1), padding='valid', + kernel_initializer=tf.keras.initializers.Constant( + value=conv_weight_scalar)) + + def preprocess(self, inputs): + return tf.identity(inputs), exporter_lib_v2.get_true_shapes(inputs) + + def predict(self, preprocessed_inputs, true_image_shapes): + return {'image': self._conv(preprocessed_inputs)} + + def postprocess(self, prediction_dict, true_image_shapes): + with tf.control_dependencies(list(prediction_dict.values())): + postprocessed_tensors = { + 'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6], + [0.5, 0.5, 0.8, 0.8]]], tf.float32), + 'detection_scores': tf.constant([[0.95, 0.6]], tf.float32), + 'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2], + [0.3, 0.1, 0.6]]], + tf.float32), + 'detection_classes': tf.constant([[0, 1]], tf.float32), + 'num_detections': tf.constant([2], tf.float32) + } + return postprocessed_tensors + + def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): + pass + + def restore_from_objects(self, fine_tune_checkpoint_type): + pass + + def loss(self, prediction_dict, true_image_shapes): + pass + + def regularization_losses(self): + pass + + def updates(self): + pass + + +@contextlib.contextmanager +def InMemoryTFRecord(entries): + temp = tempfile.NamedTemporaryFile(delete=False) + filename = temp.name + try: + with tf.io.TFRecordWriter(filename) as writer: + for value in entries: + writer.write(value) + yield filename + finally: + os.unlink(filename) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class GenerateDetectionDataTest(tf.test.TestCase): + + def _save_checkpoint_from_mock_model(self, checkpoint_path): + """A function to save checkpoint from a fake Detection Model. + + Args: + checkpoint_path: Path to save checkpoint from Fake model. + """ + mock_model = FakeModel() + fake_image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32) + preprocessed_inputs, true_image_shapes = mock_model.preprocess(fake_image) + predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) + mock_model.postprocess(predictions, true_image_shapes) + ckpt = tf.train.Checkpoint(model=mock_model) + exported_checkpoint_manager = tf.train.CheckpointManager( + ckpt, checkpoint_path, max_to_keep=1) + exported_checkpoint_manager.save(checkpoint_number=0) + + def _export_saved_model(self): + tmp_dir = self.get_temp_dir() + self._save_checkpoint_from_mock_model(tmp_dir) + output_directory = os.path.join(tmp_dir, 'output') + saved_model_path = os.path.join(output_directory, 'saved_model') + tf.io.gfile.makedirs(output_directory) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP['model_build'] = mock_builder + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter_lib_v2.export_inference_graph( + input_type='tf_example', + pipeline_config=pipeline_config, + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory) + saved_model_path = os.path.join(output_directory, 'saved_model') + return saved_model_path + + def _create_tf_example(self): + with self.test_session(): + encoded_image = tf.io.encode_jpeg( + tf.constant(np.ones((4, 6, 3)).astype(np.uint8))).numpy() + + def BytesFeature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + def Int64Feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/encoded': BytesFeature(encoded_image), + 'image/source_id': BytesFeature(b'image_id'), + 'image/height': Int64Feature(4), + 'image/width': Int64Feature(6), + 'image/object/class/label': Int64Feature(5), + 'image/object/class/text': BytesFeature(b'hyena'), + 'image/class/label': Int64Feature(5), + 'image/class/text': BytesFeature(b'hyena'), + })) + + return example.SerializeToString() + + def assert_expected_example(self, example): + self.assertAllClose( + example.features.feature['image/object/bbox/ymin'].float_list.value, + [0.0]) + self.assertAllClose( + example.features.feature['image/object/bbox/xmin'].float_list.value, + [0.1]) + self.assertAllClose( + example.features.feature['image/object/bbox/ymax'].float_list.value, + [0.5]) + self.assertAllClose( + example.features.feature['image/object/bbox/xmax'].float_list.value, + [0.6]) + self.assertAllClose( + example.features.feature['image/object/class/score'] + .float_list.value, [0.95]) + self.assertAllClose( + example.features.feature['image/object/class/label'] + .int64_list.value, [5]) + self.assertAllEqual( + example.features.feature['image/object/class/text'] + .bytes_list.value, [b'hyena']) + self.assertAllClose( + example.features.feature['image/class/label'] + .int64_list.value, [5]) + self.assertAllEqual( + example.features.feature['image/class/text'] + .bytes_list.value, [b'hyena']) + + # Check other essential attributes. + self.assertAllEqual( + example.features.feature['image/height'].int64_list.value, [4]) + self.assertAllEqual( + example.features.feature['image/width'].int64_list.value, [6]) + self.assertAllEqual( + example.features.feature['image/source_id'].bytes_list.value, + [b'image_id']) + self.assertTrue( + example.features.feature['image/encoded'].bytes_list.value) + + def test_generate_detection_data_fn(self): + saved_model_path = self._export_saved_model() + confidence_threshold = 0.8 + inference_fn = generate_detection_data.GenerateDetectionDataFn( + saved_model_path, confidence_threshold) + inference_fn.setup() + generated_example = self._create_tf_example() + self.assertAllEqual(tf.train.Example.FromString( + generated_example).features.feature['image/object/class/label'] + .int64_list.value, [5]) + self.assertAllEqual(tf.train.Example.FromString( + generated_example).features.feature['image/object/class/text'] + .bytes_list.value, [b'hyena']) + output = inference_fn.process(generated_example) + output_example = output[0] + + self.assertAllEqual( + output_example.features.feature['image/object/class/label'] + .int64_list.value, [5]) + self.assertAllEqual(output_example.features.feature['image/width'] + .int64_list.value, [6]) + + self.assert_expected_example(output_example) + + def test_beam_pipeline(self): + with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: + temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) + output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') + saved_model_path = self._export_saved_model() + confidence_threshold = 0.8 + num_shards = 1 + pipeline_options = beam.options.pipeline_options.PipelineOptions( + runner='DirectRunner') + p = beam.Pipeline(options=pipeline_options) + generate_detection_data.construct_pipeline( + p, input_tfrecord, output_tfrecord, saved_model_path, + confidence_threshold, num_shards) + p.run() + filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????') + actual_output = [] + record_iterator = tf.data.TFRecordDataset( + tf.convert_to_tensor(filenames)).as_numpy_iterator() + for record in record_iterator: + actual_output.append(record) + self.assertEqual(len(actual_output), 1) + self.assert_expected_example(tf.train.Example.FromString( + actual_output[0])) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py index 74d15901dec3706d39e6feaaad3d5ad99d9f45d7..dac1168c14a469a72fc2ad796caaf62c06f5b5fc 100644 --- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py +++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py @@ -55,7 +55,7 @@ import threading import numpy as np import six -import tensorflow.compat.v1 as tf +import tensorflow as tf try: import apache_beam as beam # pylint:disable=g-import-not-at-top @@ -63,6 +63,76 @@ except ModuleNotFoundError: pass +def add_keys(serialized_example): + key = hash(serialized_example) + return key, serialized_example + + +def drop_keys(key_value_tuple): + return key_value_tuple[1] + + +def get_date_captured(example): + date_captured = datetime.datetime.strptime( + six.ensure_str( + example.features.feature['image/date_captured'].bytes_list.value[0]), + '%Y-%m-%d %H:%M:%S') + return date_captured + + +def embed_date_captured(date_captured): + """Encodes the datetime of the image.""" + embedded_date_captured = [] + month_max = 12.0 + day_max = 31.0 + hour_max = 24.0 + minute_max = 60.0 + min_year = 1990.0 + max_year = 2030.0 + + year = (date_captured.year - min_year) / float(max_year - min_year) + embedded_date_captured.append(year) + + month = (date_captured.month - 1) / month_max + embedded_date_captured.append(month) + + day = (date_captured.day - 1) / day_max + embedded_date_captured.append(day) + + hour = date_captured.hour / hour_max + embedded_date_captured.append(hour) + + minute = date_captured.minute / minute_max + embedded_date_captured.append(minute) + + return np.asarray(embedded_date_captured) + + +def embed_position_and_size(box): + """Encodes the bounding box of the object of interest.""" + ymin = box[0] + xmin = box[1] + ymax = box[2] + xmax = box[3] + w = xmax - xmin + h = ymax - ymin + x = xmin + w / 2.0 + y = ymin + h / 2.0 + return np.asarray([x, y, w, h]) + + +def get_bb_embedding(detection_features, detection_boxes, detection_scores, + index): + embedding = detection_features[0][index] + pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0) + + box = detection_boxes[0][index] + position_embedding = embed_position_and_size(box) + + score = detection_scores[0][index] + return np.concatenate((pooled_embedding, position_embedding)), score + + class GenerateEmbeddingDataFn(beam.DoFn): """Generates embedding data for camera trap images. @@ -72,13 +142,14 @@ class GenerateEmbeddingDataFn(beam.DoFn): session_lock = threading.Lock() def __init__(self, model_dir, top_k_embedding_count, - bottom_k_embedding_count): + bottom_k_embedding_count, embedding_type='final_box_features'): """Initialization function. Args: model_dir: A directory containing saved model. top_k_embedding_count: the number of high-confidence embeddings to store bottom_k_embedding_count: the number of low-confidence embeddings to store + embedding_type: One of 'final_box_features', 'rpn_box_features' """ self._model_dir = model_dir self._session = None @@ -86,8 +157,9 @@ class GenerateEmbeddingDataFn(beam.DoFn): 'embedding_data_generation', 'num_tf_examples_processed') self._top_k_embedding_count = top_k_embedding_count self._bottom_k_embedding_count = bottom_k_embedding_count + self._embedding_type = embedding_type - def start_bundle(self): + def setup(self): self._load_inference_model() def _load_inference_model(self): @@ -95,102 +167,38 @@ class GenerateEmbeddingDataFn(beam.DoFn): # one instance across all threads in the worker. This is possible since # tf.Session.run() is thread safe. with self.session_lock: - if self._session is None: - graph = tf.Graph() - self._session = tf.Session(graph=graph) - with graph.as_default(): - meta_graph = tf.saved_model.loader.load( - self._session, [tf.saved_model.tag_constants.SERVING], - self._model_dir) - signature = meta_graph.signature_def['serving_default'] - input_tensor_name = signature.inputs['inputs'].name - detection_features_name = signature.outputs['detection_features'].name - detection_boxes_name = signature.outputs['detection_boxes'].name - num_detections_name = signature.outputs['num_detections'].name - self._input = graph.get_tensor_by_name(input_tensor_name) - self._embedding_node = graph.get_tensor_by_name(detection_features_name) - self._box_node = graph.get_tensor_by_name(detection_boxes_name) - self._scores_node = graph.get_tensor_by_name( - signature.outputs['detection_scores'].name) - self._num_detections = graph.get_tensor_by_name(num_detections_name) - tf.logging.info(signature.outputs['detection_features'].name) - tf.logging.info(signature.outputs['detection_boxes'].name) - tf.logging.info(signature.outputs['num_detections'].name) - - def process(self, tfrecord_entry): - return self._run_inference_and_generate_embedding(tfrecord_entry) - - def _run_inference_and_generate_embedding(self, tfrecord_entry): - input_example = tf.train.Example.FromString(tfrecord_entry) - # Convert date_captured datetime string to unix time integer and store - - def get_date_captured(example): - date_captured = datetime.datetime.strptime( - six.ensure_str( - example.features.feature[ - 'image/date_captured'].bytes_list.value[0]), - '%Y-%m-%d %H:%M:%S') - return date_captured + self._detect_fn = tf.saved_model.load(self._model_dir) - try: - date_captured = get_date_captured(input_example) - except Exception: # pylint: disable=broad-except - # we require date_captured to be available for all images - return [] - - def embed_date_captured(date_captured): - """Encodes the datetime of the image.""" - embedded_date_captured = [] - month_max = 12.0 - day_max = 31.0 - hour_max = 24.0 - minute_max = 60.0 - min_year = 1990.0 - max_year = 2030.0 - - year = (date_captured.year-min_year)/float(max_year-min_year) - embedded_date_captured.append(year) - - month = (date_captured.month-1)/month_max - embedded_date_captured.append(month) - - day = (date_captured.day-1)/day_max - embedded_date_captured.append(day) - - hour = date_captured.hour/hour_max - embedded_date_captured.append(hour) - - minute = date_captured.minute/minute_max - embedded_date_captured.append(minute) - - return np.asarray(embedded_date_captured) - - def embed_position_and_size(box): - """Encodes the bounding box of the object of interest.""" - ymin = box[0] - xmin = box[1] - ymax = box[2] - xmax = box[3] - w = xmax - xmin - h = ymax - ymin - x = xmin + w / 2.0 - y = ymin + h / 2.0 - return np.asarray([x, y, w, h]) - - unix_time = ( - (date_captured - datetime.datetime.fromtimestamp(0)).total_seconds()) + def process(self, tfexample_key_value): + return self._run_inference_and_generate_embedding(tfexample_key_value) + def _run_inference_and_generate_embedding(self, tfexample_key_value): + key, tfexample = tfexample_key_value + input_example = tf.train.Example.FromString(tfexample) example = tf.train.Example() - example.features.feature['image/unix_time'].float_list.value.extend( - [unix_time]) + example.CopyFrom(input_example) - (detection_features, detection_boxes, num_detections, - detection_scores) = self._session.run( - [ - self._embedding_node, self._box_node, self._num_detections[0], - self._scores_node - ], - feed_dict={self._input: [tfrecord_entry]}) + try: + date_captured = get_date_captured(input_example) + unix_time = ((date_captured - + datetime.datetime.fromtimestamp(0)).total_seconds()) + example.features.feature['image/unix_time'].float_list.value.extend( + [unix_time]) + temporal_embedding = embed_date_captured(date_captured) + except Exception: # pylint: disable=broad-except + temporal_embedding = None + + detections = self._detect_fn.signatures['serving_default']( + (tf.expand_dims(tf.convert_to_tensor(tfexample), 0))) + if self._embedding_type == 'final_box_features': + detection_features = detections['detection_features'] + elif self._embedding_type == 'rpn_box_features': + detection_features = detections['cropped_rpn_box_features'] + else: + raise ValueError('embedding type not supported') + detection_boxes = detections['detection_boxes'] + num_detections = detections['num_detections'] + detection_scores = detections['detection_scores'] num_detections = int(num_detections) embed_all = [] @@ -198,25 +206,12 @@ class GenerateEmbeddingDataFn(beam.DoFn): detection_features = np.asarray(detection_features) - def get_bb_embedding(detection_features, detection_boxes, detection_scores, - index): - embedding = detection_features[0][index] - pooled_embedding = np.mean(np.mean(embedding, axis=1), axis=0) - - box = detection_boxes[0][index] - position_embedding = embed_position_and_size(box) - - score = detection_scores[0][index] - return np.concatenate((pooled_embedding, position_embedding)), score - - temporal_embedding = embed_date_captured(date_captured) - embedding_count = 0 for index in range(min(num_detections, self._top_k_embedding_count)): bb_embedding, score = get_bb_embedding( detection_features, detection_boxes, detection_scores, index) embed_all.extend(bb_embedding) - embed_all.extend(temporal_embedding) + if temporal_embedding is not None: embed_all.extend(temporal_embedding) score_all.append(score) embedding_count += 1 @@ -226,7 +221,7 @@ class GenerateEmbeddingDataFn(beam.DoFn): bb_embedding, score = get_bb_embedding( detection_features, detection_boxes, detection_scores, index) embed_all.extend(bb_embedding) - embed_all.extend(temporal_embedding) + if temporal_embedding is not None: embed_all.extend(temporal_embedding) score_all.append(score) embedding_count += 1 @@ -234,7 +229,7 @@ class GenerateEmbeddingDataFn(beam.DoFn): bb_embedding, score = get_bb_embedding( detection_features, detection_boxes, detection_scores, 0) embed_all.extend(bb_embedding) - embed_all.extend(temporal_embedding) + if temporal_embedding is not None: embed_all.extend(temporal_embedding) score_all.append(score) # Takes max in case embedding_count is 0. @@ -251,65 +246,13 @@ class GenerateEmbeddingDataFn(beam.DoFn): example.features.feature['image/embedding_count'].int64_list.value.append( embedding_count) - # Add other essential example attributes - example.features.feature['image/encoded'].bytes_list.value.extend( - input_example.features.feature['image/encoded'].bytes_list.value) - example.features.feature['image/height'].int64_list.value.extend( - input_example.features.feature['image/height'].int64_list.value) - example.features.feature['image/width'].int64_list.value.extend( - input_example.features.feature['image/width'].int64_list.value) - example.features.feature['image/source_id'].bytes_list.value.extend( - input_example.features.feature['image/source_id'].bytes_list.value) - example.features.feature['image/location'].bytes_list.value.extend( - input_example.features.feature['image/location'].bytes_list.value) - - example.features.feature['image/date_captured'].bytes_list.value.extend( - input_example.features.feature['image/date_captured'].bytes_list.value) - - example.features.feature['image/class/text'].bytes_list.value.extend( - input_example.features.feature['image/class/text'].bytes_list.value) - example.features.feature['image/class/label'].int64_list.value.extend( - input_example.features.feature['image/class/label'].int64_list.value) - - example.features.feature['image/seq_id'].bytes_list.value.extend( - input_example.features.feature['image/seq_id'].bytes_list.value) - example.features.feature['image/seq_num_frames'].int64_list.value.extend( - input_example.features.feature['image/seq_num_frames'].int64_list.value) - example.features.feature['image/seq_frame_num'].int64_list.value.extend( - input_example.features.feature['image/seq_frame_num'].int64_list.value) - - example.features.feature['image/object/bbox/ymax'].float_list.value.extend( - input_example.features.feature[ - 'image/object/bbox/ymax'].float_list.value) - example.features.feature['image/object/bbox/ymin'].float_list.value.extend( - input_example.features.feature[ - 'image/object/bbox/ymin'].float_list.value) - example.features.feature['image/object/bbox/xmax'].float_list.value.extend( - input_example.features.feature[ - 'image/object/bbox/xmax'].float_list.value) - example.features.feature['image/object/bbox/xmin'].float_list.value.extend( - input_example.features.feature[ - 'image/object/bbox/xmin'].float_list.value) - example.features.feature[ - 'image/object/class/score'].float_list.value.extend( - input_example.features.feature[ - 'image/object/class/score'].float_list.value) - example.features.feature[ - 'image/object/class/label'].int64_list.value.extend( - input_example.features.feature[ - 'image/object/class/label'].int64_list.value) - example.features.feature[ - 'image/object/class/text'].bytes_list.value.extend( - input_example.features.feature[ - 'image/object/class/text'].bytes_list.value) - self._num_examples_processed.inc(1) - return [example] + return [(key, example)] def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir, top_k_embedding_count, bottom_k_embedding_count, - num_shards): + num_shards, embedding_type): """Returns a beam pipeline to run object detection inference. Args: @@ -321,19 +264,21 @@ def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir, top_k_embedding_count: The number of high-confidence embeddings to store. bottom_k_embedding_count: The number of low-confidence embeddings to store. num_shards: The number of output shards. + embedding_type: Which features to embed. """ input_collection = ( pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord( - input_tfrecord, - coder=beam.coders.BytesCoder())) + input_tfrecord, coder=beam.coders.BytesCoder()) + | 'AddKeys' >> beam.Map(add_keys)) output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo( GenerateEmbeddingDataFn(model_dir, top_k_embedding_count, - bottom_k_embedding_count)) + bottom_k_embedding_count, embedding_type)) output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle() - _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( - output_tfrecord, - num_shards=num_shards, - coder=beam.coders.ProtoCoder(tf.train.Example)) + _ = output_collection | 'DropKeys' >> beam.Map( + drop_keys) | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord( + output_tfrecord, + num_shards=num_shards, + coder=beam.coders.ProtoCoder(tf.train.Example)) def parse_args(argv): @@ -378,6 +323,12 @@ def parse_args(argv): dest='num_shards', default=0, help='Number of output shards.') + parser.add_argument( + '--embedding_type', + dest='embedding_type', + default='final_box_features', + help='What features to embed, supports `final_box_features`, ' + '`rpn_box_features`.') beam_args, pipeline_args = parser.parse_known_args(argv) return beam_args, pipeline_args @@ -409,11 +360,11 @@ def main(argv=None, save_main_session=True): args.embedding_model_dir, args.top_k_embedding_count, args.bottom_k_embedding_count, - args.num_shards) + args.num_shards, + args.embedding_type) p.run() if __name__ == '__main__': main() - diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py deleted file mode 100644 index 71d1d600d8fa933751c615db3c06dfadfe0b28f4..0000000000000000000000000000000000000000 --- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py +++ /dev/null @@ -1,347 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for generate_embedding_data.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import contextlib -import os -import tempfile -import unittest -import numpy as np -import six -import tensorflow.compat.v1 as tf -from object_detection import exporter -from object_detection.builders import model_builder -from object_detection.core import model -from object_detection.dataset_tools.context_rcnn import generate_embedding_data -from object_detection.protos import pipeline_pb2 -from object_detection.utils import tf_version - - -if six.PY2: - import mock # pylint: disable=g-import-not-at-top -else: - mock = unittest.mock - -try: - import apache_beam as beam # pylint:disable=g-import-not-at-top -except ModuleNotFoundError: - pass - - -class FakeModel(model.DetectionModel): - """A Fake Detection model with expected output nodes from post-processing.""" - - def preprocess(self, inputs): - true_image_shapes = [] # Doesn't matter for the fake model. - return tf.identity(inputs), true_image_shapes - - def predict(self, preprocessed_inputs, true_image_shapes): - return {'image': tf.layers.conv2d(preprocessed_inputs, 3, 1)} - - def postprocess(self, prediction_dict, true_image_shapes): - with tf.control_dependencies(prediction_dict.values()): - num_features = 100 - feature_dims = 10 - classifier_feature = np.ones( - (2, feature_dims, feature_dims, num_features), - dtype=np.float32).tolist() - postprocessed_tensors = { - 'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6], - [0.5, 0.5, 0.8, 0.8]]], tf.float32), - 'detection_scores': tf.constant([[0.95, 0.6]], tf.float32), - 'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2], - [0.3, 0.1, 0.6]]], - tf.float32), - 'detection_classes': tf.constant([[0, 1]], tf.float32), - 'num_detections': tf.constant([2], tf.float32), - 'detection_features': - tf.constant([classifier_feature], - tf.float32) - } - return postprocessed_tensors - - def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): - pass - - def restore_from_objects(self, fine_tune_checkpoint_type): - pass - - def loss(self, prediction_dict, true_image_shapes): - pass - - def regularization_losses(self): - pass - - def updates(self): - pass - - -@contextlib.contextmanager -def InMemoryTFRecord(entries): - temp = tempfile.NamedTemporaryFile(delete=False) - filename = temp.name - try: - with tf.python_io.TFRecordWriter(filename) as writer: - for value in entries: - writer.write(value) - yield filename - finally: - os.unlink(temp.name) - - -@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') -class GenerateEmbeddingData(tf.test.TestCase): - - def _save_checkpoint_from_mock_model(self, checkpoint_path): - """A function to save checkpoint from a fake Detection Model. - - Args: - checkpoint_path: Path to save checkpoint from Fake model. - """ - g = tf.Graph() - with g.as_default(): - mock_model = FakeModel(num_classes=5) - preprocessed_inputs, true_image_shapes = mock_model.preprocess( - tf.placeholder(tf.float32, shape=[None, None, None, 3])) - predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) - mock_model.postprocess(predictions, true_image_shapes) - tf.train.get_or_create_global_step() - saver = tf.train.Saver() - init = tf.global_variables_initializer() - with self.test_session(graph=g) as sess: - sess.run(init) - saver.save(sess, checkpoint_path) - - def _export_saved_model(self): - tmp_dir = self.get_temp_dir() - checkpoint_path = os.path.join(tmp_dir, 'model.ckpt') - self._save_checkpoint_from_mock_model(checkpoint_path) - output_directory = os.path.join(tmp_dir, 'output') - saved_model_path = os.path.join(output_directory, 'saved_model') - tf.io.gfile.makedirs(output_directory) - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - mock_builder.return_value = FakeModel(num_classes=5) - pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() - pipeline_config.eval_config.use_moving_averages = False - detection_model = model_builder.build(pipeline_config.model, - is_training=False) - outputs, placeholder_tensor = exporter.build_detection_graph( - input_type='tf_example', - detection_model=detection_model, - input_shape=None, - output_collection_name='inference_op', - graph_hook_fn=None) - output_node_names = ','.join(outputs.keys()) - saver = tf.train.Saver() - input_saver_def = saver.as_saver_def() - frozen_graph_def = exporter.freeze_graph_with_def_protos( - input_graph_def=tf.get_default_graph().as_graph_def(), - input_saver_def=input_saver_def, - input_checkpoint=checkpoint_path, - output_node_names=output_node_names, - restore_op_name='save/restore_all', - filename_tensor_name='save/Const:0', - output_graph='', - clear_devices=True, - initializer_nodes='') - exporter.write_saved_model( - saved_model_path=saved_model_path, - frozen_graph_def=frozen_graph_def, - inputs=placeholder_tensor, - outputs=outputs) - return saved_model_path - - def _create_tf_example(self): - with self.test_session(): - encoded_image = tf.image.encode_jpeg( - tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).eval() - - def BytesFeature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - def Int64Feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - def FloatFeature(value): - return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) - - example = tf.train.Example( - features=tf.train.Features( - feature={ - 'image/encoded': BytesFeature(encoded_image), - 'image/source_id': BytesFeature(b'image_id'), - 'image/height': Int64Feature(400), - 'image/width': Int64Feature(600), - 'image/class/label': Int64Feature(5), - 'image/class/text': BytesFeature(b'hyena'), - 'image/object/bbox/xmin': FloatFeature(0.1), - 'image/object/bbox/xmax': FloatFeature(0.6), - 'image/object/bbox/ymin': FloatFeature(0.0), - 'image/object/bbox/ymax': FloatFeature(0.5), - 'image/object/class/score': FloatFeature(0.95), - 'image/object/class/label': Int64Feature(5), - 'image/object/class/text': BytesFeature(b'hyena'), - 'image/date_captured': BytesFeature(b'2019-10-20 12:12:12') - })) - - return example.SerializeToString() - - def assert_expected_example(self, example, topk=False, botk=False): - # Check embeddings - if topk or botk: - self.assertEqual(len( - example.features.feature['image/embedding'].float_list.value), - 218) - self.assertAllEqual( - example.features.feature['image/embedding_count'].int64_list.value, - [2]) - else: - self.assertEqual(len( - example.features.feature['image/embedding'].float_list.value), - 109) - self.assertAllEqual( - example.features.feature['image/embedding_count'].int64_list.value, - [1]) - - self.assertAllEqual( - example.features.feature['image/embedding_length'].int64_list.value, - [109]) - - # Check annotations - self.assertAllClose( - example.features.feature['image/object/bbox/ymin'].float_list.value, - [0.0]) - self.assertAllClose( - example.features.feature['image/object/bbox/xmin'].float_list.value, - [0.1]) - self.assertAllClose( - example.features.feature['image/object/bbox/ymax'].float_list.value, - [0.5]) - self.assertAllClose( - example.features.feature['image/object/bbox/xmax'].float_list.value, - [0.6]) - self.assertAllClose( - example.features.feature['image/object/class/score'] - .float_list.value, [0.95]) - self.assertAllClose( - example.features.feature['image/object/class/label'] - .int64_list.value, [5]) - self.assertAllEqual( - example.features.feature['image/object/class/text'] - .bytes_list.value, [b'hyena']) - self.assertAllClose( - example.features.feature['image/class/label'] - .int64_list.value, [5]) - self.assertAllEqual( - example.features.feature['image/class/text'] - .bytes_list.value, [b'hyena']) - - # Check other essential attributes. - self.assertAllEqual( - example.features.feature['image/height'].int64_list.value, [400]) - self.assertAllEqual( - example.features.feature['image/width'].int64_list.value, [600]) - self.assertAllEqual( - example.features.feature['image/source_id'].bytes_list.value, - [b'image_id']) - self.assertTrue( - example.features.feature['image/encoded'].bytes_list.value) - - def test_generate_embedding_data_fn(self): - saved_model_path = self._export_saved_model() - top_k_embedding_count = 1 - bottom_k_embedding_count = 0 - inference_fn = generate_embedding_data.GenerateEmbeddingDataFn( - saved_model_path, top_k_embedding_count, bottom_k_embedding_count) - inference_fn.start_bundle() - generated_example = self._create_tf_example() - self.assertAllEqual(tf.train.Example.FromString( - generated_example).features.feature['image/object/class/label'] - .int64_list.value, [5]) - self.assertAllEqual(tf.train.Example.FromString( - generated_example).features.feature['image/object/class/text'] - .bytes_list.value, [b'hyena']) - output = inference_fn.process(generated_example) - output_example = output[0] - self.assert_expected_example(output_example) - - def test_generate_embedding_data_with_top_k_boxes(self): - saved_model_path = self._export_saved_model() - top_k_embedding_count = 2 - bottom_k_embedding_count = 0 - inference_fn = generate_embedding_data.GenerateEmbeddingDataFn( - saved_model_path, top_k_embedding_count, bottom_k_embedding_count) - inference_fn.start_bundle() - generated_example = self._create_tf_example() - self.assertAllEqual( - tf.train.Example.FromString(generated_example).features - .feature['image/object/class/label'].int64_list.value, [5]) - self.assertAllEqual( - tf.train.Example.FromString(generated_example).features - .feature['image/object/class/text'].bytes_list.value, [b'hyena']) - output = inference_fn.process(generated_example) - output_example = output[0] - self.assert_expected_example(output_example, topk=True) - - def test_generate_embedding_data_with_bottom_k_boxes(self): - saved_model_path = self._export_saved_model() - top_k_embedding_count = 0 - bottom_k_embedding_count = 2 - inference_fn = generate_embedding_data.GenerateEmbeddingDataFn( - saved_model_path, top_k_embedding_count, bottom_k_embedding_count) - inference_fn.start_bundle() - generated_example = self._create_tf_example() - self.assertAllEqual( - tf.train.Example.FromString(generated_example).features - .feature['image/object/class/label'].int64_list.value, [5]) - self.assertAllEqual( - tf.train.Example.FromString(generated_example).features - .feature['image/object/class/text'].bytes_list.value, [b'hyena']) - output = inference_fn.process(generated_example) - output_example = output[0] - self.assert_expected_example(output_example, botk=True) - - def test_beam_pipeline(self): - with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: - temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) - output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') - saved_model_path = self._export_saved_model() - top_k_embedding_count = 1 - bottom_k_embedding_count = 0 - num_shards = 1 - pipeline_options = beam.options.pipeline_options.PipelineOptions( - runner='DirectRunner') - p = beam.Pipeline(options=pipeline_options) - generate_embedding_data.construct_pipeline( - p, input_tfrecord, output_tfrecord, saved_model_path, - top_k_embedding_count, bottom_k_embedding_count, num_shards) - p.run() - filenames = tf.io.gfile.glob( - output_tfrecord + '-?????-of-?????') - actual_output = [] - record_iterator = tf.python_io.tf_record_iterator(path=filenames[0]) - for record in record_iterator: - actual_output.append(record) - self.assertEqual(len(actual_output), 1) - self.assert_expected_example(tf.train.Example.FromString( - actual_output[0])) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf2_test.py b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..156e283eff5936b3e0d909c7fb9a2b940a21f7c2 --- /dev/null +++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf2_test.py @@ -0,0 +1,331 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for generate_embedding_data.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import contextlib +import os +import tempfile +import unittest +import numpy as np +import six +import tensorflow as tf +from object_detection import exporter_lib_v2 +from object_detection.builders import model_builder +from object_detection.core import model +from object_detection.protos import pipeline_pb2 +from object_detection.utils import tf_version + +if tf_version.is_tf2(): + from object_detection.dataset_tools.context_rcnn import generate_embedding_data # pylint:disable=g-import-not-at-top + +if six.PY2: + import mock # pylint: disable=g-import-not-at-top +else: + mock = unittest.mock + +try: + import apache_beam as beam # pylint:disable=g-import-not-at-top +except ModuleNotFoundError: + pass + + +class FakeModel(model.DetectionModel): + + def __init__(self, conv_weight_scalar=1.0): + super(FakeModel, self).__init__(num_classes=5) + self._conv = tf.keras.layers.Conv2D( + filters=1, kernel_size=1, strides=(1, 1), padding='valid', + kernel_initializer=tf.keras.initializers.Constant( + value=conv_weight_scalar)) + + def preprocess(self, inputs): + return tf.identity(inputs), exporter_lib_v2.get_true_shapes(inputs) + + def predict(self, preprocessed_inputs, true_image_shapes): + return {'image': self._conv(preprocessed_inputs)} + + def postprocess(self, prediction_dict, true_image_shapes): + with tf.control_dependencies(prediction_dict.values()): + num_features = 100 + feature_dims = 10 + classifier_feature = np.ones( + (2, feature_dims, feature_dims, num_features), + dtype=np.float32).tolist() + postprocessed_tensors = { + 'detection_boxes': tf.constant([[[0.0, 0.1, 0.5, 0.6], + [0.5, 0.5, 0.8, 0.8]]], tf.float32), + 'detection_scores': tf.constant([[0.95, 0.6]], tf.float32), + 'detection_multiclass_scores': tf.constant([[[0.1, 0.7, 0.2], + [0.3, 0.1, 0.6]]], + tf.float32), + 'detection_classes': tf.constant([[0, 1]], tf.float32), + 'num_detections': tf.constant([2], tf.float32), + 'detection_features': + tf.constant([classifier_feature], + tf.float32) + } + return postprocessed_tensors + + def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): + pass + + def restore_from_objects(self, fine_tune_checkpoint_type): + pass + + def loss(self, prediction_dict, true_image_shapes): + pass + + def regularization_losses(self): + pass + + def updates(self): + pass + + +@contextlib.contextmanager +def InMemoryTFRecord(entries): + temp = tempfile.NamedTemporaryFile(delete=False) + filename = temp.name + try: + with tf.io.TFRecordWriter(filename) as writer: + for value in entries: + writer.write(value) + yield filename + finally: + os.unlink(temp.name) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class GenerateEmbeddingData(tf.test.TestCase): + + def _save_checkpoint_from_mock_model(self, checkpoint_path): + """A function to save checkpoint from a fake Detection Model. + + Args: + checkpoint_path: Path to save checkpoint from Fake model. + """ + mock_model = FakeModel() + fake_image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32) + preprocessed_inputs, true_image_shapes = mock_model.preprocess(fake_image) + predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) + mock_model.postprocess(predictions, true_image_shapes) + ckpt = tf.train.Checkpoint(model=mock_model) + exported_checkpoint_manager = tf.train.CheckpointManager( + ckpt, checkpoint_path, max_to_keep=1) + exported_checkpoint_manager.save(checkpoint_number=0) + + def _export_saved_model(self): + tmp_dir = self.get_temp_dir() + self._save_checkpoint_from_mock_model(tmp_dir) + output_directory = os.path.join(tmp_dir, 'output') + saved_model_path = os.path.join(output_directory, 'saved_model') + tf.io.gfile.makedirs(output_directory) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP['model_build'] = mock_builder + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter_lib_v2.export_inference_graph( + input_type='tf_example', + pipeline_config=pipeline_config, + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory) + saved_model_path = os.path.join(output_directory, 'saved_model') + return saved_model_path + + def _create_tf_example(self): + encoded_image = tf.io.encode_jpeg( + tf.constant(np.ones((4, 4, 3)).astype(np.uint8))).numpy() + + def BytesFeature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + def Int64Feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + def FloatFeature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) + + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': BytesFeature(encoded_image), + 'image/source_id': BytesFeature(b'image_id'), + 'image/height': Int64Feature(400), + 'image/width': Int64Feature(600), + 'image/class/label': Int64Feature(5), + 'image/class/text': BytesFeature(b'hyena'), + 'image/object/bbox/xmin': FloatFeature(0.1), + 'image/object/bbox/xmax': FloatFeature(0.6), + 'image/object/bbox/ymin': FloatFeature(0.0), + 'image/object/bbox/ymax': FloatFeature(0.5), + 'image/object/class/score': FloatFeature(0.95), + 'image/object/class/label': Int64Feature(5), + 'image/object/class/text': BytesFeature(b'hyena'), + 'image/date_captured': BytesFeature(b'2019-10-20 12:12:12') + })) + + return example.SerializeToString() + + def assert_expected_example(self, example, topk=False, botk=False): + # Check embeddings + if topk or botk: + self.assertEqual(len( + example.features.feature['image/embedding'].float_list.value), + 218) + self.assertAllEqual( + example.features.feature['image/embedding_count'].int64_list.value, + [2]) + else: + self.assertEqual(len( + example.features.feature['image/embedding'].float_list.value), + 109) + self.assertAllEqual( + example.features.feature['image/embedding_count'].int64_list.value, + [1]) + + self.assertAllEqual( + example.features.feature['image/embedding_length'].int64_list.value, + [109]) + + # Check annotations + self.assertAllClose( + example.features.feature['image/object/bbox/ymin'].float_list.value, + [0.0]) + self.assertAllClose( + example.features.feature['image/object/bbox/xmin'].float_list.value, + [0.1]) + self.assertAllClose( + example.features.feature['image/object/bbox/ymax'].float_list.value, + [0.5]) + self.assertAllClose( + example.features.feature['image/object/bbox/xmax'].float_list.value, + [0.6]) + self.assertAllClose( + example.features.feature['image/object/class/score'] + .float_list.value, [0.95]) + self.assertAllClose( + example.features.feature['image/object/class/label'] + .int64_list.value, [5]) + self.assertAllEqual( + example.features.feature['image/object/class/text'] + .bytes_list.value, [b'hyena']) + self.assertAllClose( + example.features.feature['image/class/label'] + .int64_list.value, [5]) + self.assertAllEqual( + example.features.feature['image/class/text'] + .bytes_list.value, [b'hyena']) + + # Check other essential attributes. + self.assertAllEqual( + example.features.feature['image/height'].int64_list.value, [400]) + self.assertAllEqual( + example.features.feature['image/width'].int64_list.value, [600]) + self.assertAllEqual( + example.features.feature['image/source_id'].bytes_list.value, + [b'image_id']) + self.assertTrue( + example.features.feature['image/encoded'].bytes_list.value) + + def test_generate_embedding_data_fn(self): + saved_model_path = self._export_saved_model() + top_k_embedding_count = 1 + bottom_k_embedding_count = 0 + inference_fn = generate_embedding_data.GenerateEmbeddingDataFn( + saved_model_path, top_k_embedding_count, bottom_k_embedding_count) + inference_fn.setup() + generated_example = self._create_tf_example() + self.assertAllEqual(tf.train.Example.FromString( + generated_example).features.feature['image/object/class/label'] + .int64_list.value, [5]) + self.assertAllEqual(tf.train.Example.FromString( + generated_example).features.feature['image/object/class/text'] + .bytes_list.value, [b'hyena']) + output = inference_fn.process(('dummy_key', generated_example)) + output_example = output[0][1] + self.assert_expected_example(output_example) + + def test_generate_embedding_data_with_top_k_boxes(self): + saved_model_path = self._export_saved_model() + top_k_embedding_count = 2 + bottom_k_embedding_count = 0 + inference_fn = generate_embedding_data.GenerateEmbeddingDataFn( + saved_model_path, top_k_embedding_count, bottom_k_embedding_count) + inference_fn.setup() + generated_example = self._create_tf_example() + self.assertAllEqual( + tf.train.Example.FromString(generated_example).features + .feature['image/object/class/label'].int64_list.value, [5]) + self.assertAllEqual( + tf.train.Example.FromString(generated_example).features + .feature['image/object/class/text'].bytes_list.value, [b'hyena']) + output = inference_fn.process(('dummy_key', generated_example)) + output_example = output[0][1] + self.assert_expected_example(output_example, topk=True) + + def test_generate_embedding_data_with_bottom_k_boxes(self): + saved_model_path = self._export_saved_model() + top_k_embedding_count = 0 + bottom_k_embedding_count = 2 + inference_fn = generate_embedding_data.GenerateEmbeddingDataFn( + saved_model_path, top_k_embedding_count, bottom_k_embedding_count) + inference_fn.setup() + generated_example = self._create_tf_example() + self.assertAllEqual( + tf.train.Example.FromString(generated_example).features + .feature['image/object/class/label'].int64_list.value, [5]) + self.assertAllEqual( + tf.train.Example.FromString(generated_example).features + .feature['image/object/class/text'].bytes_list.value, [b'hyena']) + output = inference_fn.process(('dummy_key', generated_example)) + output_example = output[0][1] + self.assert_expected_example(output_example, botk=True) + + def test_beam_pipeline(self): + with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord: + temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR')) + output_tfrecord = os.path.join(temp_dir, 'output_tfrecord') + saved_model_path = self._export_saved_model() + top_k_embedding_count = 1 + bottom_k_embedding_count = 0 + num_shards = 1 + embedding_type = 'final_box_features' + pipeline_options = beam.options.pipeline_options.PipelineOptions( + runner='DirectRunner') + p = beam.Pipeline(options=pipeline_options) + generate_embedding_data.construct_pipeline( + p, input_tfrecord, output_tfrecord, saved_model_path, + top_k_embedding_count, bottom_k_embedding_count, num_shards, + embedding_type) + p.run() + filenames = tf.io.gfile.glob( + output_tfrecord + '-?????-of-?????') + actual_output = [] + record_iterator = tf.data.TFRecordDataset( + tf.convert_to_tensor(filenames)).as_numpy_iterator() + for record in record_iterator: + actual_output.append(record) + self.assertEqual(len(actual_output), 1) + self.assert_expected_example(tf.train.Example.FromString( + actual_output[0])) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/dataset_tools/create_ava_actions_tf_record.py b/research/object_detection/dataset_tools/create_ava_actions_tf_record.py new file mode 100644 index 0000000000000000000000000000000000000000..a27001d879c48e1e10194015f20eecb0724dfdf9 --- /dev/null +++ b/research/object_detection/dataset_tools/create_ava_actions_tf_record.py @@ -0,0 +1,540 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +r"""Code to download and parse the AVA Actions dataset for TensorFlow models. + +The [AVA Actions data set]( +https://research.google.com/ava/index.html) +is a dataset for human action recognition. + +This script downloads the annotations and prepares data from similar annotations +if local video files are available. The video files can be downloaded +from the following website: +https://github.com/cvdfoundation/ava-dataset + +Prior to running this script, please run download_and_preprocess_ava.sh to +download input videos. + +Running this code as a module generates the data set on disk. First, the +required files are downloaded (_download_data) which enables constructing the +label map. Then (in generate_examples), for each split in the data set, the +metadata and image frames are generated from the annotations for each sequence +example (_generate_examples). The data set is written to disk as a set of +numbered TFRecord files. + +Generating the data on disk can take considerable time and disk space. +(Image compression quality is the primary determiner of disk usage. + +If using the Tensorflow Object Detection API, set the input_type field +in the input_reader to TF_SEQUENCE_EXAMPLE. If using this script to generate +data for Context R-CNN scripts, the --examples_for_context flag should be +set to true, so that properly-formatted tf.example objects are written to disk. + +This data is structured for per-clip action classification where images is +the sequence of images and labels are a one-hot encoded value. See +as_dataset() for more details. + +Note that the number of videos changes in the data set over time, so it will +likely be necessary to change the expected number of examples. + +The argument video_path_format_string expects a value as such: + '/path/to/videos/{0}' + +""" +import collections +import contextlib +import csv +import glob +import hashlib +import os +import random +import sys +import zipfile + +from absl import app +from absl import flags +from absl import logging +import cv2 +from six.moves import range +from six.moves import urllib +import tensorflow.compat.v1 as tf + +from object_detection.dataset_tools import seq_example_util +from object_detection.utils import dataset_util +from object_detection.utils import label_map_util + + +POSSIBLE_TIMESTAMPS = range(902, 1798) +ANNOTATION_URL = 'https://research.google.com/ava/download/ava_v2.2.zip' +SECONDS_TO_MILLI = 1000 +FILEPATTERN = 'ava_actions_%s_1fps_rgb' +SPLITS = { + 'train': { + 'shards': 1000, + 'examples': 862663, + 'csv': '', + 'excluded-csv': '' + }, + 'val': { + 'shards': 100, + 'examples': 243029, + 'csv': '', + 'excluded-csv': '' + }, + # Test doesn't have ground truth, so TF Records can't be created + 'test': { + 'shards': 100, + 'examples': 0, + 'csv': '', + 'excluded-csv': '' + } +} + +NUM_CLASSES = 80 + + +def feature_list_feature(value): + return tf.train.FeatureList(feature=value) + + +class Ava(object): + """Generates and loads the AVA Actions 2.2 data set.""" + + def __init__(self, path_to_output_dir, path_to_data_download): + if not path_to_output_dir: + raise ValueError('You must supply the path to the data directory.') + self.path_to_data_download = path_to_data_download + self.path_to_output_dir = path_to_output_dir + + def generate_and_write_records(self, + splits_to_process='train,val,test', + video_path_format_string=None, + seconds_per_sequence=10, + hop_between_sequences=10, + examples_for_context=False): + """Downloads data and generates sharded TFRecords. + + Downloads the data files, generates metadata, and processes the metadata + with MediaPipe to produce tf.SequenceExamples for training. The resulting + files can be read with as_dataset(). After running this function the + original data files can be deleted. + + Args: + splits_to_process: csv string of which splits to process. Allows + providing a custom CSV with the CSV flag. The original data is still + downloaded to generate the label_map. + video_path_format_string: The format string for the path to local files. + seconds_per_sequence: The length of each sequence, in seconds. + hop_between_sequences: The gap between the centers of + successive sequences. + examples_for_context: Whether to generate sequence examples with context + for context R-CNN. + """ + example_function = self._generate_sequence_examples + if examples_for_context: + example_function = self._generate_examples + + logging.info('Downloading data.') + download_output = self._download_data() + for key in splits_to_process.split(','): + logging.info('Generating examples for split: %s', key) + all_metadata = list(example_function( + download_output[0][key][0], download_output[0][key][1], + download_output[1], seconds_per_sequence, hop_between_sequences, + video_path_format_string)) + logging.info('An example of the metadata: ') + logging.info(all_metadata[0]) + random.seed(47) + random.shuffle(all_metadata) + shards = SPLITS[key]['shards'] + shard_names = [os.path.join( + self.path_to_output_dir, FILEPATTERN % key + '-%05d-of-%05d' % ( + i, shards)) for i in range(shards)] + writers = [tf.io.TFRecordWriter(shard) for shard in shard_names] + with _close_on_exit(writers) as writers: + for i, seq_ex in enumerate(all_metadata): + writers[i % len(writers)].write(seq_ex.SerializeToString()) + logging.info('Data extraction complete.') + + def _generate_sequence_examples(self, annotation_file, excluded_file, + label_map, seconds_per_sequence, + hop_between_sequences, + video_path_format_string): + """For each row in the annotation CSV, generates corresponding examples. + + When iterating through frames for a single sequence example, skips over + excluded frames. When moving to the next sequence example, also skips over + excluded frames as if they don't exist. Generates equal-length sequence + examples, each with length seconds_per_sequence (1 fps) and gaps of + hop_between_sequences frames (and seconds) between them, possible greater + due to excluded frames. + + Args: + annotation_file: path to the file of AVA CSV annotations. + excluded_file: path to a CSV file of excluded timestamps for each video. + label_map: an {int: string} label map. + seconds_per_sequence: The number of seconds per example in each example. + hop_between_sequences: The hop between sequences. If less than + seconds_per_sequence, will overlap. + video_path_format_string: File path format to glob video files. + + Yields: + Each prepared tf.SequenceExample of metadata also containing video frames + """ + fieldnames = ['id', 'timestamp_seconds', 'xmin', 'ymin', 'xmax', 'ymax', + 'action_label'] + frame_excluded = {} + # create a sparse, nested map of videos and frame indices. + with open(excluded_file, 'r') as excluded: + reader = csv.reader(excluded) + for row in reader: + frame_excluded[(row[0], int(float(row[1])))] = True + with open(annotation_file, 'r') as annotations: + reader = csv.DictReader(annotations, fieldnames) + frame_annotations = collections.defaultdict(list) + ids = set() + # aggreggate by video and timestamp: + for row in reader: + ids.add(row['id']) + key = (row['id'], int(float(row['timestamp_seconds']))) + frame_annotations[key].append(row) + # for each video, find aggregates near each sampled frame.: + logging.info('Generating metadata...') + media_num = 1 + for media_id in ids: + logging.info('%d/%d, ignore warnings.\n', media_num, len(ids)) + media_num += 1 + + filepath = glob.glob( + video_path_format_string.format(media_id) + '*')[0] + cur_vid = cv2.VideoCapture(filepath) + width = cur_vid.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cur_vid.get(cv2.CAP_PROP_FRAME_HEIGHT) + middle_frame_time = POSSIBLE_TIMESTAMPS[0] + while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]: + start_time = middle_frame_time - seconds_per_sequence // 2 - ( + 0 if seconds_per_sequence % 2 == 0 else 1) + end_time = middle_frame_time + (seconds_per_sequence // 2) + + total_boxes = [] + total_labels = [] + total_label_strings = [] + total_images = [] + total_source_ids = [] + total_confidences = [] + total_is_annotated = [] + windowed_timestamp = start_time + + while windowed_timestamp < end_time: + if (media_id, windowed_timestamp) in frame_excluded: + end_time += 1 + windowed_timestamp += 1 + logging.info('Ignoring and skipping excluded frame.') + continue + + cur_vid.set(cv2.CAP_PROP_POS_MSEC, + (windowed_timestamp) * SECONDS_TO_MILLI) + _, image = cur_vid.read() + _, buffer = cv2.imencode('.jpg', image) + + bufstring = buffer.tostring() + total_images.append(bufstring) + source_id = str(windowed_timestamp) + '_' + media_id + total_source_ids.append(source_id) + total_is_annotated.append(1) + + boxes = [] + labels = [] + label_strings = [] + confidences = [] + for row in frame_annotations[(media_id, windowed_timestamp)]: + if len(row) > 2 and int(row['action_label']) in label_map: + boxes.append([float(row['ymin']), float(row['xmin']), + float(row['ymax']), float(row['xmax'])]) + labels.append(int(row['action_label'])) + label_strings.append(label_map[int(row['action_label'])]) + confidences.append(1) + else: + logging.warning('Unknown label: %s', row['action_label']) + + total_boxes.append(boxes) + total_labels.append(labels) + total_label_strings.append(label_strings) + total_confidences.append(confidences) + windowed_timestamp += 1 + + if total_boxes: + yield seq_example_util.make_sequence_example( + 'AVA', media_id, total_images, int(height), int(width), 'jpeg', + total_source_ids, None, total_is_annotated, total_boxes, + total_label_strings, use_strs_for_source_id=True) + + # Move middle_time_frame, skipping excluded frames + frames_mv = 0 + frames_excluded_count = 0 + while (frames_mv < hop_between_sequences + frames_excluded_count + and middle_frame_time + frames_mv < POSSIBLE_TIMESTAMPS[-1]): + frames_mv += 1 + if (media_id, windowed_timestamp + frames_mv) in frame_excluded: + frames_excluded_count += 1 + middle_frame_time += frames_mv + + cur_vid.release() + + def _generate_examples(self, annotation_file, excluded_file, label_map, + seconds_per_sequence, hop_between_sequences, + video_path_format_string): + """For each row in the annotation CSV, generates examples. + + When iterating through frames for a single example, skips + over excluded frames. Generates equal-length sequence examples, each with + length seconds_per_sequence (1 fps) and gaps of hop_between_sequences + frames (and seconds) between them, possible greater due to excluded frames. + + Args: + annotation_file: path to the file of AVA CSV annotations. + excluded_file: path to a CSV file of excluded timestamps for each video. + label_map: an {int: string} label map. + seconds_per_sequence: The number of seconds per example in each example. + hop_between_sequences: The hop between sequences. If less than + seconds_per_sequence, will overlap. + video_path_format_string: File path format to glob video files. + + Yields: + Each prepared tf.Example of metadata also containing video frames + """ + del seconds_per_sequence + del hop_between_sequences + fieldnames = ['id', 'timestamp_seconds', 'xmin', 'ymin', 'xmax', 'ymax', + 'action_label'] + frame_excluded = {} + # create a sparse, nested map of videos and frame indices. + with open(excluded_file, 'r') as excluded: + reader = csv.reader(excluded) + for row in reader: + frame_excluded[(row[0], int(float(row[1])))] = True + with open(annotation_file, 'r') as annotations: + reader = csv.DictReader(annotations, fieldnames) + frame_annotations = collections.defaultdict(list) + ids = set() + # aggreggate by video and timestamp: + for row in reader: + ids.add(row['id']) + key = (row['id'], int(float(row['timestamp_seconds']))) + frame_annotations[key].append(row) + # for each video, find aggreggates near each sampled frame.: + logging.info('Generating metadata...') + media_num = 1 + for media_id in ids: + logging.info('%d/%d, ignore warnings.\n', media_num, len(ids)) + media_num += 1 + + filepath = glob.glob( + video_path_format_string.format(media_id) + '*')[0] + cur_vid = cv2.VideoCapture(filepath) + width = cur_vid.get(cv2.CAP_PROP_FRAME_WIDTH) + height = cur_vid.get(cv2.CAP_PROP_FRAME_HEIGHT) + middle_frame_time = POSSIBLE_TIMESTAMPS[0] + total_non_excluded = 0 + while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]: + if (media_id, middle_frame_time) not in frame_excluded: + total_non_excluded += 1 + middle_frame_time += 1 + + middle_frame_time = POSSIBLE_TIMESTAMPS[0] + cur_frame_num = 0 + while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]: + cur_vid.set(cv2.CAP_PROP_POS_MSEC, + middle_frame_time * SECONDS_TO_MILLI) + _, image = cur_vid.read() + _, buffer = cv2.imencode('.jpg', image) + + bufstring = buffer.tostring() + + if (media_id, middle_frame_time) in frame_excluded: + middle_frame_time += 1 + logging.info('Ignoring and skipping excluded frame.') + continue + + cur_frame_num += 1 + source_id = str(middle_frame_time) + '_' + media_id + + xmins = [] + xmaxs = [] + ymins = [] + ymaxs = [] + areas = [] + labels = [] + label_strings = [] + confidences = [] + for row in frame_annotations[(media_id, middle_frame_time)]: + if len(row) > 2 and int(row['action_label']) in label_map: + xmins.append(float(row['xmin'])) + xmaxs.append(float(row['xmax'])) + ymins.append(float(row['ymin'])) + ymaxs.append(float(row['ymax'])) + areas.append(float((xmaxs[-1] - xmins[-1]) * + (ymaxs[-1] - ymins[-1])) / 2) + labels.append(int(row['action_label'])) + label_strings.append(label_map[int(row['action_label'])]) + confidences.append(1) + else: + logging.warning('Unknown label: %s', row['action_label']) + + middle_frame_time += 1/3 + if abs(middle_frame_time - round(middle_frame_time) < 0.0001): + middle_frame_time = round(middle_frame_time) + + key = hashlib.sha256(bufstring).hexdigest() + date_captured_feature = ( + '2020-06-17 00:%02d:%02d' % ((middle_frame_time - 900)*3 // 60, + (middle_frame_time - 900)*3 % 60)) + context_feature_dict = { + 'image/height': + dataset_util.int64_feature(int(height)), + 'image/width': + dataset_util.int64_feature(int(width)), + 'image/format': + dataset_util.bytes_feature('jpeg'.encode('utf8')), + 'image/source_id': + dataset_util.bytes_feature(source_id.encode('utf8')), + 'image/filename': + dataset_util.bytes_feature(source_id.encode('utf8')), + 'image/encoded': + dataset_util.bytes_feature(bufstring), + 'image/key/sha256': + dataset_util.bytes_feature(key.encode('utf8')), + 'image/object/bbox/xmin': + dataset_util.float_list_feature(xmins), + 'image/object/bbox/xmax': + dataset_util.float_list_feature(xmaxs), + 'image/object/bbox/ymin': + dataset_util.float_list_feature(ymins), + 'image/object/bbox/ymax': + dataset_util.float_list_feature(ymaxs), + 'image/object/area': + dataset_util.float_list_feature(areas), + 'image/object/class/label': + dataset_util.int64_list_feature(labels), + 'image/object/class/text': + dataset_util.bytes_list_feature(label_strings), + 'image/location': + dataset_util.bytes_feature(media_id.encode('utf8')), + 'image/date_captured': + dataset_util.bytes_feature( + date_captured_feature.encode('utf8')), + 'image/seq_num_frames': + dataset_util.int64_feature(total_non_excluded), + 'image/seq_frame_num': + dataset_util.int64_feature(cur_frame_num), + 'image/seq_id': + dataset_util.bytes_feature(media_id.encode('utf8')), + } + + yield tf.train.Example( + features=tf.train.Features(feature=context_feature_dict)) + + cur_vid.release() + + def _download_data(self): + """Downloads and extracts data if not already available.""" + if sys.version_info >= (3, 0): + urlretrieve = urllib.request.urlretrieve + else: + urlretrieve = urllib.request.urlretrieve + logging.info('Creating data directory.') + tf.io.gfile.makedirs(self.path_to_data_download) + logging.info('Downloading annotations.') + paths = {} + + zip_path = os.path.join(self.path_to_data_download, + ANNOTATION_URL.split('/')[-1]) + urlretrieve(ANNOTATION_URL, zip_path) + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(self.path_to_data_download) + for split in ['train', 'test', 'val']: + csv_path = os.path.join(self.path_to_data_download, + 'ava_%s_v2.2.csv' % split) + excl_name = 'ava_%s_excluded_timestamps_v2.2.csv' % split + excluded_csv_path = os.path.join(self.path_to_data_download, excl_name) + SPLITS[split]['csv'] = csv_path + SPLITS[split]['excluded-csv'] = excluded_csv_path + paths[split] = (csv_path, excluded_csv_path) + + label_map = self.get_label_map(os.path.join( + self.path_to_data_download, + 'ava_action_list_v2.2_for_activitynet_2019.pbtxt')) + return paths, label_map + + def get_label_map(self, path): + """Parses a label map into {integer:string} format.""" + label_map_dict = label_map_util.get_label_map_dict(path) + label_map_dict = {v: bytes(k, 'utf8') for k, v in label_map_dict.items()} + logging.info(label_map_dict) + return label_map_dict + + +@contextlib.contextmanager +def _close_on_exit(writers): + """Call close on all writers on exit.""" + try: + yield writers + finally: + for writer in writers: + writer.close() + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + Ava(flags.FLAGS.path_to_output_dir, + flags.FLAGS.path_to_download_data).generate_and_write_records( + flags.FLAGS.splits_to_process, + flags.FLAGS.video_path_format_string, + flags.FLAGS.seconds_per_sequence, + flags.FLAGS.hop_between_sequences, + flags.FLAGS.examples_for_context) + +if __name__ == '__main__': + flags.DEFINE_string('path_to_download_data', + '', + 'Path to directory to download data to.') + flags.DEFINE_string('path_to_output_dir', + '', + 'Path to directory to write data to.') + flags.DEFINE_string('splits_to_process', + 'train,val', + 'Process these splits. Useful for custom data splits.') + flags.DEFINE_string('video_path_format_string', + None, + 'The format string for the path to local video files. ' + 'Uses the Python string.format() syntax with possible ' + 'arguments of {video}, {start}, {end}, {label_name}, and ' + '{split}, corresponding to columns of the data csvs.') + flags.DEFINE_integer('seconds_per_sequence', + 10, + 'The number of seconds per example in each example.' + 'Always 1 when examples_for_context is True.') + flags.DEFINE_integer('hop_between_sequences', + 10, + 'The hop between sequences. If less than ' + 'seconds_per_sequence, will overlap. Always 1 when ' + 'examples_for_context is True.') + flags.DEFINE_boolean('examples_for_context', + False, + 'Whether to generate examples instead of sequence ' + 'examples. If true, will generate tf.Example objects ' + 'for use in Context R-CNN.') + app.run(main) diff --git a/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat b/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat index d09d70fb1264efe1d151c6c7e557c020ae569af1..2836cac4d6b37a16fbff8ac6efda1d7ecb88a711 100644 Binary files a/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat and b/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat differ diff --git a/research/object_detection/dataset_tools/download_and_preprocess_ava.sh b/research/object_detection/dataset_tools/download_and_preprocess_ava.sh new file mode 100755 index 0000000000000000000000000000000000000000..723f6a7fcf5421e4bbbd015b8579b14cf3b0d61f --- /dev/null +++ b/research/object_detection/dataset_tools/download_and_preprocess_ava.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# This script downloads the videos for the AVA dataset. There are no arguments. +# Copy this script into the desired parent directory of the ava_vids_raw/ +# directory created in this script to store the raw videos. + +mkdir ava_vids_raw +cd ava_vids_raw + +curl -O s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt + +echo "Downloading all videos." + +cat "ava_file_names_trainval_v2.1.txt" | while read line +do + curl -O s3.amazonaws.com/ava-dataset/trainval/$line + echo "Downloaded " $line +done + +rm "ava_file_names_trainval_v2.1.txt" +cd .. + +# Trimming causes issues with frame seeking in the python script, so it is best left out. +# If included, need to modify the python script to subtract 900 seconds wheen seeking. + +# echo "Trimming all videos." + +# mkdir ava_vids_trimmed +# for filename in ava_vids_raw/*; do +# ffmpeg -ss 900 -to 1800 -i $filename -c copy ava_vids_trimmed/${filename##*/} +# done diff --git a/research/object_detection/dataset_tools/seq_example_util.py b/research/object_detection/dataset_tools/seq_example_util.py index 84573ec7eff5c2217693bd777386533c2c164af0..d4e160a29d7f27b03cccedfcdce0414980915091 100644 --- a/research/object_detection/dataset_tools/seq_example_util.py +++ b/research/object_detection/dataset_tools/seq_example_util.py @@ -1,4 +1,3 @@ -# Lint as: python2, python3 # Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -123,6 +122,15 @@ def sequence_bytes_feature(ndarray): return feature_list +def sequence_strings_feature(strings): + new_str_arr = [] + for single_str in strings: + new_str_arr.append(tf.train.Feature( + bytes_list=tf.train.BytesList( + value=[single_str.encode('utf8')]))) + return tf.train.FeatureList(feature=new_str_arr) + + def boxes_to_box_components(bboxes): """Converts a list of numpy arrays (boxes) to box components. @@ -137,8 +145,11 @@ def boxes_to_box_components(bboxes): ymax_list = [] xmax_list = [] for bbox in bboxes: - bbox = np.array(bbox).astype(np.float32) - ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1) + if bbox != []: # pylint: disable=g-explicit-bool-comparison + bbox = np.array(bbox).astype(np.float32) + ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1) + else: + ymin, xmin, ymax, xmax = [], [], [], [] ymin_list.append(np.reshape(ymin, [-1])) xmin_list.append(np.reshape(xmin, [-1])) ymax_list.append(np.reshape(ymax, [-1])) @@ -159,7 +170,11 @@ def make_sequence_example(dataset_name, label_strings=None, detection_bboxes=None, detection_classes=None, - detection_scores=None): + detection_scores=None, + use_strs_for_source_id=False, + context_features=None, + context_feature_length=None, + context_features_image_id_list=None): """Constructs tf.SequenceExamples. Args: @@ -189,6 +204,14 @@ def make_sequence_example(dataset_name, detection_scores: (Optional) A list (with num_frames_elements) of [num_boxes_i] numpy float32 arrays holding predicted object scores for each frame. + use_strs_for_source_id: (Optional) Whether to write the source IDs as + strings rather than byte lists of characters. + context_features: (Optional) A list or numpy array of features to use in + Context R-CNN, of length num_context_features * context_feature_length. + context_feature_length: (Optional) The length of each context feature, used + for reshaping. + context_features_image_id_list: (Optional) A list of image ids of length + num_context_features corresponding to the context features. Returns: A tf.train.SequenceExample. @@ -221,7 +244,11 @@ def make_sequence_example(dataset_name, if image_format is not None: context_dict['image/format'] = context_bytes_feature([image_format]) if image_source_ids is not None: - feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids) + if use_strs_for_source_id: + feature_list['image/source_id'] = sequence_strings_feature( + image_source_ids) + else: + feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids) if bboxes is not None: bbox_ymin, bbox_xmin, bbox_ymax, bbox_xmax = boxes_to_box_components(bboxes) feature_list['region/bbox/xmin'] = sequence_float_feature(bbox_xmin) @@ -255,6 +282,16 @@ def make_sequence_example(dataset_name, feature_list['predicted/region/label/confidence'] = sequence_float_feature( detection_scores) + if context_features is not None: + context_dict['image/context_features'] = context_float_feature( + context_features) + if context_feature_length is not None: + context_dict['image/context_feature_length'] = context_int64_feature( + context_feature_length) + if context_features_image_id_list is not None: + context_dict['image/context_features_image_id_list'] = ( + context_bytes_feature(context_features_image_id_list)) + context = tf.train.Features(feature=context_dict) feature_lists = tf.train.FeatureLists(feature_list=feature_list) diff --git a/research/object_detection/dataset_tools/seq_example_util_test.py b/research/object_detection/dataset_tools/seq_example_util_test.py index fd721954be896b4044735dd67928044e413422e7..cb36f7753d110b9b1fad75e13a4311cf259e1f40 100644 --- a/research/object_detection/dataset_tools/seq_example_util_test.py +++ b/research/object_detection/dataset_tools/seq_example_util_test.py @@ -104,6 +104,107 @@ class SeqExampleUtilTest(tf.test.TestCase): source_ids) def test_make_labeled_example(self): + num_frames = 3 + image_height = 100 + image_width = 200 + dataset_name = b'unlabeled_dataset' + video_id = b'video_000' + labels = [b'dog', b'cat', b'wolf'] + images = tf.cast(tf.random.uniform( + [num_frames, image_height, image_width, 3], + maxval=256, + dtype=tf.int32), dtype=tf.uint8) + images_list = tf.unstack(images, axis=0) + encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list] + encoded_images = self.materialize_tensors(encoded_images_list) + timestamps = [100000, 110000, 120000] + is_annotated = [1, 0, 1] + bboxes = [ + np.array([[0., 0., 0., 0.], + [0., 0., 1., 1.]], dtype=np.float32), + np.zeros([0, 4], dtype=np.float32), + np.array([], dtype=np.float32) + ] + label_strings = [ + np.array(labels), + np.array([]), + np.array([]) + ] + + seq_example = seq_example_util.make_sequence_example( + dataset_name=dataset_name, + video_id=video_id, + encoded_images=encoded_images, + image_height=image_height, + image_width=image_width, + timestamps=timestamps, + is_annotated=is_annotated, + bboxes=bboxes, + label_strings=label_strings) + + context_feature_dict = seq_example.context.feature + self.assertEqual( + dataset_name, + context_feature_dict['example/dataset_name'].bytes_list.value[0]) + self.assertEqual( + timestamps[0], + context_feature_dict['clip/start/timestamp'].int64_list.value[0]) + self.assertEqual( + timestamps[-1], + context_feature_dict['clip/end/timestamp'].int64_list.value[0]) + self.assertEqual( + num_frames, + context_feature_dict['clip/frames'].int64_list.value[0]) + + seq_feature_dict = seq_example.feature_lists.feature_list + self.assertLen( + seq_feature_dict['image/encoded'].feature[:], + num_frames) + actual_timestamps = [ + feature.int64_list.value[0] for feature + in seq_feature_dict['image/timestamp'].feature] + self.assertAllEqual(timestamps, actual_timestamps) + # Frame 0. + self.assertAllEqual( + is_annotated[0], + seq_feature_dict['region/is_annotated'].feature[0].int64_list.value[0]) + self.assertAllClose( + [0., 0.], + seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:]) + self.assertAllClose( + [0., 0.], + seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:]) + self.assertAllClose( + [0., 1.], + seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:]) + self.assertAllClose( + [0., 1.], + seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:]) + self.assertAllEqual( + labels, + seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:]) + + # Frame 1. + self.assertAllEqual( + is_annotated[1], + seq_feature_dict['region/is_annotated'].feature[1].int64_list.value[0]) + self.assertAllClose( + [], + seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:]) + self.assertAllClose( + [], + seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:]) + self.assertAllClose( + [], + seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:]) + self.assertAllClose( + [], + seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:]) + self.assertAllEqual( + [], + seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:]) + + def test_make_labeled_example_with_context_features(self): num_frames = 2 image_height = 100 image_width = 200 @@ -128,6 +229,9 @@ class SeqExampleUtilTest(tf.test.TestCase): np.array(labels), np.array([]) ] + context_features = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5] + context_feature_length = [3] + context_features_image_id_list = [b'im_1', b'im_2'] seq_example = seq_example_util.make_sequence_example( dataset_name=dataset_name, @@ -138,7 +242,10 @@ class SeqExampleUtilTest(tf.test.TestCase): timestamps=timestamps, is_annotated=is_annotated, bboxes=bboxes, - label_strings=label_strings) + label_strings=label_strings, + context_features=context_features, + context_feature_length=context_feature_length, + context_features_image_id_list=context_features_image_id_list) context_feature_dict = seq_example.context.feature self.assertEqual( @@ -154,6 +261,18 @@ class SeqExampleUtilTest(tf.test.TestCase): num_frames, context_feature_dict['clip/frames'].int64_list.value[0]) + self.assertAllClose( + context_features, + context_feature_dict['image/context_features'].float_list.value[:]) + self.assertEqual( + context_feature_length[0], + context_feature_dict[ + 'image/context_feature_length'].int64_list.value[0]) + self.assertEqual( + context_features_image_id_list, + context_feature_dict[ + 'image/context_features_image_id_list'].bytes_list.value[:]) + seq_feature_dict = seq_example.feature_lists.feature_list self.assertLen( seq_feature_dict['image/encoded'].feature[:], diff --git a/research/object_detection/eval_util.py b/research/object_detection/eval_util.py index b6d57f738942d740277add29c15aabd0de4c9c18..f5e7fef600bfe79de22c92f12323a56e0923ac76 100644 --- a/research/object_detection/eval_util.py +++ b/research/object_detection/eval_util.py @@ -33,6 +33,7 @@ from object_detection.core import box_list_ops from object_detection.core import keypoint_ops from object_detection.core import standard_fields as fields from object_detection.metrics import coco_evaluation +from object_detection.metrics import lvis_evaluation from object_detection.protos import eval_pb2 from object_detection.utils import label_map_util from object_detection.utils import object_detection_evaluation @@ -54,6 +55,8 @@ EVAL_METRICS_CLASS_DICT = { coco_evaluation.CocoMaskEvaluator, 'coco_panoptic_metrics': coco_evaluation.CocoPanopticSegmentationEvaluator, + 'lvis_mask_metrics': + lvis_evaluation.LVISMaskEvaluator, 'oid_challenge_detection_metrics': object_detection_evaluation.OpenImagesDetectionChallengeEvaluator, 'oid_challenge_segmentation_metrics': @@ -548,10 +551,38 @@ def _scale_box_to_absolute(args): box_list.BoxList(boxes), image_shape[0], image_shape[1]).get() -def _resize_detection_masks(args): - detection_boxes, detection_masks, image_shape = args +def _resize_detection_masks(arg_tuple): + """Resizes detection masks. + + Args: + arg_tuple: A (detection_boxes, detection_masks, image_shape, pad_shape) + tuple where + detection_boxes is a tf.float32 tensor of size [num_masks, 4] containing + the box corners. Row i contains [ymin, xmin, ymax, xmax] of the box + corresponding to mask i. Note that the box corners are in + normalized coordinates. + detection_masks is a tensor of size + [num_masks, mask_height, mask_width]. + image_shape is a tensor of shape [2] + pad_shape is a tensor of shape [2] --- this is assumed to be greater + than or equal to image_shape along both dimensions and represents a + shape to-be-padded-to. + + Returns: + """ + + detection_boxes, detection_masks, image_shape, pad_shape = arg_tuple + detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[0], image_shape[1]) + + pad_instance_dim = tf.zeros([3, 1], dtype=tf.int32) + pad_hw_dim = tf.concat([tf.zeros([1], dtype=tf.int32), + pad_shape - image_shape], axis=0) + pad_hw_dim = tf.expand_dims(pad_hw_dim, 1) + paddings = tf.concat([pad_instance_dim, pad_hw_dim], axis=1) + detection_masks_reframed = tf.pad(detection_masks_reframed, paddings) + # If the masks are currently float, binarize them. Otherwise keep them as # integers, since they have already been thresholded. if detection_masks_reframed.dtype == tf.float32: @@ -559,9 +590,44 @@ def _resize_detection_masks(args): return tf.cast(detection_masks_reframed, tf.uint8) +def resize_detection_masks(detection_boxes, detection_masks, + original_image_spatial_shapes): + """Resizes per-box detection masks to be relative to the entire image. + + Note that this function only works when the spatial size of all images in + the batch is the same. If not, this function should be used with batch_size=1. + + Args: + detection_boxes: A [batch_size, num_instances, 4] float tensor containing + bounding boxes. + detection_masks: A [batch_size, num_instances, height, width] float tensor + containing binary instance masks per box. + original_image_spatial_shapes: a [batch_size, 3] shaped int tensor + holding the spatial dimensions of each image in the batch. + Returns: + masks: Masks resized to the spatial extents given by + (original_image_spatial_shapes[0, 0], original_image_spatial_shapes[0, 1]) + """ + # modify original image spatial shapes to be max along each dim + # in evaluator, should have access to original_image_spatial_shape field + # in add_Eval_Dict + max_spatial_shape = tf.reduce_max( + original_image_spatial_shapes, axis=0, keep_dims=True) + tiled_max_spatial_shape = tf.tile( + max_spatial_shape, + multiples=[tf.shape(original_image_spatial_shapes)[0], 1]) + return shape_utils.static_or_dynamic_map_fn( + _resize_detection_masks, + elems=[detection_boxes, + detection_masks, + original_image_spatial_shapes, + tiled_max_spatial_shape], + dtype=tf.uint8) + + def _resize_groundtruth_masks(args): - """Resizes groundgtruth masks to the original image size.""" - mask, true_image_shape, original_image_shape = args + """Resizes groundtruth masks to the original image size.""" + mask, true_image_shape, original_image_shape, pad_shape = args true_height = true_image_shape[0] true_width = true_image_shape[1] mask = mask[:, :true_height, :true_width] @@ -571,7 +637,15 @@ def _resize_groundtruth_masks(args): original_image_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=True) - return tf.cast(tf.squeeze(mask, 3), tf.uint8) + + paddings = tf.concat( + [tf.zeros([3, 1], dtype=tf.int32), + tf.expand_dims( + tf.concat([tf.zeros([1], dtype=tf.int32), + pad_shape-original_image_shape], axis=0), + 1)], axis=1) + mask = tf.pad(tf.squeeze(mask, 3), paddings) + return tf.cast(mask, tf.uint8) def _resize_surface_coordinate_masks(args): @@ -698,7 +772,8 @@ def result_dict_for_batched_example(images, scale_to_absolute=False, original_image_spatial_shapes=None, true_image_shapes=None, - max_gt_boxes=None): + max_gt_boxes=None, + label_id_offset=1): """Merges all detection and groundtruth information for a single example. Note that evaluation tools require classes that are 1-indexed, and so this @@ -753,6 +828,7 @@ def result_dict_for_batched_example(images, containing the size of the unpadded original_image. max_gt_boxes: [batch_size] tensor representing the maximum number of groundtruth boxes to pad. + label_id_offset: offset for class ids. Returns: A dictionary with: @@ -807,8 +883,6 @@ def result_dict_for_batched_example(images, ValueError: if true_image_shapes is not 2D int32 tensor of shape [3]. """ - label_id_offset = 1 # Applying label id offset (b/63711816) - input_data_fields = fields.InputDataFields if original_image_spatial_shapes is None: original_image_spatial_shapes = tf.tile( @@ -869,12 +943,9 @@ def result_dict_for_batched_example(images, if detection_fields.detection_masks in detections: detection_masks = detections[detection_fields.detection_masks] - output_dict[detection_fields.detection_masks] = ( - shape_utils.static_or_dynamic_map_fn( - _resize_detection_masks, - elems=[detection_boxes, detection_masks, - original_image_spatial_shapes], - dtype=tf.uint8)) + output_dict[detection_fields.detection_masks] = resize_detection_masks( + detection_boxes, detection_masks, original_image_spatial_shapes) + if detection_fields.detection_surface_coords in detections: detection_surface_coords = detections[ detection_fields.detection_surface_coords] @@ -911,10 +982,17 @@ def result_dict_for_batched_example(images, if input_data_fields.groundtruth_instance_masks in groundtruth: masks = groundtruth[input_data_fields.groundtruth_instance_masks] + max_spatial_shape = tf.reduce_max( + original_image_spatial_shapes, axis=0, keep_dims=True) + tiled_max_spatial_shape = tf.tile( + max_spatial_shape, + multiples=[tf.shape(original_image_spatial_shapes)[0], 1]) groundtruth[input_data_fields.groundtruth_instance_masks] = ( shape_utils.static_or_dynamic_map_fn( _resize_groundtruth_masks, - elems=[masks, true_image_shapes, original_image_spatial_shapes], + elems=[masks, true_image_shapes, + original_image_spatial_shapes, + tiled_max_spatial_shape], dtype=tf.uint8)) output_dict.update(groundtruth) @@ -1095,11 +1173,39 @@ def evaluator_options_from_eval_config(eval_config): eval_metric_fn_keys = eval_config.metrics_set evaluator_options = {} for eval_metric_fn_key in eval_metric_fn_keys: - if eval_metric_fn_key in ('coco_detection_metrics', 'coco_mask_metrics'): + if eval_metric_fn_key in ( + 'coco_detection_metrics', 'coco_mask_metrics', 'lvis_mask_metrics'): evaluator_options[eval_metric_fn_key] = { 'include_metrics_per_category': ( eval_config.include_metrics_per_category) } + + if (hasattr(eval_config, 'all_metrics_per_category') and + eval_config.all_metrics_per_category): + evaluator_options[eval_metric_fn_key].update({ + 'all_metrics_per_category': eval_config.all_metrics_per_category + }) + # For coco detection eval, if the eval_config proto contains the + # "skip_predictions_for_unlabeled_class" field, include this field in + # evaluator_options. + if eval_metric_fn_key == 'coco_detection_metrics' and hasattr( + eval_config, 'skip_predictions_for_unlabeled_class'): + evaluator_options[eval_metric_fn_key].update({ + 'skip_predictions_for_unlabeled_class': + (eval_config.skip_predictions_for_unlabeled_class) + }) + for super_category in eval_config.super_categories: + if 'super_categories' not in evaluator_options[eval_metric_fn_key]: + evaluator_options[eval_metric_fn_key]['super_categories'] = {} + key = super_category + value = eval_config.super_categories[key].split(',') + evaluator_options[eval_metric_fn_key]['super_categories'][key] = value + if eval_metric_fn_key == 'lvis_mask_metrics' and hasattr( + eval_config, 'export_path'): + evaluator_options[eval_metric_fn_key].update({ + 'export_path': eval_config.export_path + }) + elif eval_metric_fn_key == 'precision_at_recall_detection_metrics': evaluator_options[eval_metric_fn_key] = { 'recall_lower_bound': (eval_config.recall_lower_bound), diff --git a/research/object_detection/eval_util_test.py b/research/object_detection/eval_util_test.py index d0623f1fcda50482ee98eccb2e2e62ef10b88be3..a39a5ff16749fdfbb091448c444c02de5d524b36 100644 --- a/research/object_detection/eval_util_test.py +++ b/research/object_detection/eval_util_test.py @@ -25,6 +25,7 @@ import numpy as np import six from six.moves import range import tensorflow.compat.v1 as tf +from google.protobuf import text_format from object_detection import eval_util from object_detection.core import standard_fields as fields @@ -84,6 +85,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) groundtruth_classes = tf.constant([1]) groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) + original_image_spatial_shapes = tf.constant([[20, 20]], dtype=tf.int32) + groundtruth_keypoints = tf.constant([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]) if resized_groundtruth_masks: groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) @@ -99,6 +102,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): groundtruth_keypoints = tf.tile( tf.expand_dims(groundtruth_keypoints, 0), multiples=[batch_size, 1, 1]) + original_image_spatial_shapes = tf.tile(original_image_spatial_shapes, + multiples=[batch_size, 1]) detections = { detection_fields.detection_boxes: detection_boxes, @@ -111,7 +116,10 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_keypoints: groundtruth_keypoints, - input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks + input_data_fields.groundtruth_instance_masks: + groundtruth_instance_masks, + input_data_fields.original_image_spatial_shape: + original_image_spatial_shapes } if batch_size > 1: return eval_util.result_dict_for_batched_example( @@ -239,6 +247,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): eval_config) self.assertTrue(evaluator_options['coco_detection_metrics'] ['include_metrics_per_category']) + self.assertFalse(evaluator_options['coco_detection_metrics'] + ['skip_predictions_for_unlabeled_class']) self.assertTrue( evaluator_options['coco_mask_metrics']['include_metrics_per_category']) self.assertAlmostEqual( @@ -253,6 +263,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): eval_config.metrics_set.extend( ['coco_detection_metrics', 'precision_at_recall_detection_metrics']) eval_config.include_metrics_per_category = True + eval_config.skip_predictions_for_unlabeled_class = True eval_config.recall_lower_bound = 0.2 eval_config.recall_upper_bound = 0.6 categories = self._get_categories_list() @@ -263,6 +274,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): evaluator_options) self.assertTrue(evaluator[0]._include_metrics_per_category) + self.assertTrue(evaluator[0]._skip_predictions_for_unlabeled_class) self.assertAlmostEqual(evaluator[1]._recall_lower_bound, eval_config.recall_lower_bound) self.assertAlmostEqual(evaluator[1]._recall_upper_bound, @@ -402,6 +414,48 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): [[[0., 0.], [75., 150.], [150., 300.]]]], detection_keypoints) + def test_evaluator_options_from_eval_config_no_super_categories(self): + eval_config_text_proto = """ + metrics_set: "coco_detection_metrics" + metrics_set: "coco_mask_metrics" + include_metrics_per_category: true + use_moving_averages: false + batch_size: 1; + """ + eval_config = eval_pb2.EvalConfig() + text_format.Merge(eval_config_text_proto, eval_config) + evaluator_options = eval_util.evaluator_options_from_eval_config( + eval_config) + self.assertNotIn('super_categories', evaluator_options['coco_mask_metrics']) + + def test_evaluator_options_from_eval_config_with_super_categories(self): + eval_config_text_proto = """ + metrics_set: "coco_detection_metrics" + metrics_set: "coco_mask_metrics" + include_metrics_per_category: true + use_moving_averages: false + batch_size: 1; + super_categories { + key: "supercat1" + value: "a,b,c" + } + super_categories { + key: "supercat2" + value: "d,e,f" + } + """ + eval_config = eval_pb2.EvalConfig() + text_format.Merge(eval_config_text_proto, eval_config) + evaluator_options = eval_util.evaluator_options_from_eval_config( + eval_config) + self.assertIn('super_categories', evaluator_options['coco_mask_metrics']) + super_categories = evaluator_options[ + 'coco_mask_metrics']['super_categories'] + self.assertIn('supercat1', super_categories) + self.assertIn('supercat2', super_categories) + self.assertAllEqual(super_categories['supercat1'], ['a', 'b', 'c']) + self.assertAllEqual(super_categories['supercat2'], ['d', 'e', 'f']) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/export_tflite_graph_lib_tf2.py b/research/object_detection/export_tflite_graph_lib_tf2.py new file mode 100644 index 0000000000000000000000000000000000000000..66bc8ecf03c1156ca96e44418c906963ac159ac5 --- /dev/null +++ b/research/object_detection/export_tflite_graph_lib_tf2.py @@ -0,0 +1,375 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library to export TFLite-compatible SavedModel from TF2 detection models.""" +import os +import numpy as np +import tensorflow.compat.v1 as tf1 +import tensorflow.compat.v2 as tf + +from object_detection.builders import model_builder +from object_detection.builders import post_processing_builder +from object_detection.core import box_list +from object_detection.core import standard_fields as fields + +_DEFAULT_NUM_CHANNELS = 3 +_DEFAULT_NUM_COORD_BOX = 4 +_MAX_CLASSES_PER_DETECTION = 1 +_DETECTION_POSTPROCESS_FUNC = 'TFLite_Detection_PostProcess' + + +def get_const_center_size_encoded_anchors(anchors): + """Exports center-size encoded anchors as a constant tensor. + + Args: + anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor + boxes + + Returns: + encoded_anchors: a float32 constant tensor of shape [num_anchors, 4] + containing the anchor boxes. + """ + anchor_boxlist = box_list.BoxList(anchors) + y, x, h, w = anchor_boxlist.get_center_coordinates_and_sizes() + num_anchors = y.get_shape().as_list() + + with tf1.Session() as sess: + y_out, x_out, h_out, w_out = sess.run([y, x, h, w]) + encoded_anchors = tf1.constant( + np.transpose(np.stack((y_out, x_out, h_out, w_out))), + dtype=tf1.float32, + shape=[num_anchors[0], _DEFAULT_NUM_COORD_BOX], + name='anchors') + return num_anchors[0], encoded_anchors + + +class SSDModule(tf.Module): + """Inference Module for TFLite-friendly SSD models.""" + + def __init__(self, pipeline_config, detection_model, max_detections, + use_regular_nms): + """Initialization. + + Args: + pipeline_config: The original pipeline_pb2.TrainEvalPipelineConfig + detection_model: The detection model to use for inference. + max_detections: Max detections desired from the TFLite model. + use_regular_nms: If True, TFLite model uses the (slower) multi-class NMS. + """ + self._process_config(pipeline_config) + self._pipeline_config = pipeline_config + self._model = detection_model + self._max_detections = max_detections + self._use_regular_nms = use_regular_nms + + def _process_config(self, pipeline_config): + self._num_classes = pipeline_config.model.ssd.num_classes + self._nms_score_threshold = pipeline_config.model.ssd.post_processing.batch_non_max_suppression.score_threshold + self._nms_iou_threshold = pipeline_config.model.ssd.post_processing.batch_non_max_suppression.iou_threshold + self._scale_values = {} + self._scale_values[ + 'y_scale'] = pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale + self._scale_values[ + 'x_scale'] = pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale + self._scale_values[ + 'h_scale'] = pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale + self._scale_values[ + 'w_scale'] = pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale + + image_resizer_config = pipeline_config.model.ssd.image_resizer + image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof') + self._num_channels = _DEFAULT_NUM_CHANNELS + + if image_resizer == 'fixed_shape_resizer': + self._height = image_resizer_config.fixed_shape_resizer.height + self._width = image_resizer_config.fixed_shape_resizer.width + if image_resizer_config.fixed_shape_resizer.convert_to_grayscale: + self._num_channels = 1 + else: + raise ValueError( + 'Only fixed_shape_resizer' + 'is supported with tflite. Found {}'.format( + image_resizer_config.WhichOneof('image_resizer_oneof'))) + + def input_shape(self): + """Returns shape of TFLite model input.""" + return [1, self._height, self._width, self._num_channels] + + def postprocess_implements_signature(self): + """Returns tf.implements signature for MLIR legalization of TFLite NMS.""" + implements_signature = [ + 'name: "%s"' % _DETECTION_POSTPROCESS_FUNC, + 'attr { key: "max_detections" value { i: %d } }' % self._max_detections, + 'attr { key: "max_classes_per_detection" value { i: %d } }' % + _MAX_CLASSES_PER_DETECTION, + 'attr { key: "use_regular_nms" value { b: %s } }' % + str(self._use_regular_nms).lower(), + 'attr { key: "nms_score_threshold" value { f: %f } }' % + self._nms_score_threshold, + 'attr { key: "nms_iou_threshold" value { f: %f } }' % + self._nms_iou_threshold, + 'attr { key: "y_scale" value { f: %f } }' % + self._scale_values['y_scale'], + 'attr { key: "x_scale" value { f: %f } }' % + self._scale_values['x_scale'], + 'attr { key: "h_scale" value { f: %f } }' % + self._scale_values['h_scale'], + 'attr { key: "w_scale" value { f: %f } }' % + self._scale_values['w_scale'], + 'attr { key: "num_classes" value { i: %d } }' % self._num_classes + ] + implements_signature = ' '.join(implements_signature) + return implements_signature + + def _get_postprocess_fn(self, num_anchors, num_classes): + # There is no TF equivalent for TFLite's custom post-processing op. + # So we add an 'empty' composite function here, that is legalized to the + # custom op with MLIR. + @tf.function( + experimental_implements=self.postprocess_implements_signature()) + # pylint: disable=g-unused-argument,unused-argument + def dummy_post_processing(box_encodings, class_predictions, anchors): + boxes = tf.constant(0.0, dtype=tf.float32, name='boxes') + scores = tf.constant(0.0, dtype=tf.float32, name='scores') + classes = tf.constant(0.0, dtype=tf.float32, name='classes') + num_detections = tf.constant(0.0, dtype=tf.float32, name='num_detections') + return boxes, classes, scores, num_detections + + return dummy_post_processing + + @tf.function + def inference_fn(self, image): + """Encapsulates SSD inference for TFLite conversion. + + NOTE: The Args & Returns sections below indicate the TFLite model signature, + and not what the TF graph does (since the latter does not include the custom + NMS op used by TFLite) + + Args: + image: a float32 tensor of shape [num_anchors, 4] containing the anchor + boxes + + Returns: + num_detections: a float32 scalar denoting number of total detections. + classes: a float32 tensor denoting class ID for each detection. + scores: a float32 tensor denoting score for each detection. + boxes: a float32 tensor denoting coordinates of each detected box. + """ + predicted_tensors = self._model.predict(image, true_image_shapes=None) + # The score conversion occurs before the post-processing custom op + _, score_conversion_fn = post_processing_builder.build( + self._pipeline_config.model.ssd.post_processing) + class_predictions = score_conversion_fn( + predicted_tensors['class_predictions_with_background']) + + with tf.name_scope('raw_outputs'): + # 'raw_outputs/box_encodings': a float32 tensor of shape + # [1, num_anchors, 4] containing the encoded box predictions. Note that + # these are raw predictions and no Non-Max suppression is applied on + # them and no decode center size boxes is applied to them. + box_encodings = tf.identity( + predicted_tensors['box_encodings'], name='box_encodings') + # 'raw_outputs/class_predictions': a float32 tensor of shape + # [1, num_anchors, num_classes] containing the class scores for each + # anchor after applying score conversion. + class_predictions = tf.identity( + class_predictions, name='class_predictions') + # 'anchors': a float32 tensor of shape + # [4, num_anchors] containing the anchors as a constant node. + num_anchors, anchors = get_const_center_size_encoded_anchors( + predicted_tensors['anchors']) + anchors = tf.identity(anchors, name='anchors') + + # tf.function@ seems to reverse order of inputs, so reverse them here. + return self._get_postprocess_fn(num_anchors, + self._num_classes)(box_encodings, + class_predictions, + anchors)[::-1] + + +class CenterNetModule(tf.Module): + """Inference Module for TFLite-friendly CenterNet models. + + The exported CenterNet model includes the preprocessing and postprocessing + logics so the caller should pass in the raw image pixel values. It supports + both object detection and keypoint estimation task. + """ + + def __init__(self, pipeline_config, max_detections, include_keypoints, + label_map_path=''): + """Initialization. + + Args: + pipeline_config: The original pipeline_pb2.TrainEvalPipelineConfig + max_detections: Max detections desired from the TFLite model. + include_keypoints: If set true, the output dictionary will include the + keypoint coordinates and keypoint confidence scores. + label_map_path: Path to the label map which is used by CenterNet keypoint + estimation task. If provided, the label_map_path in the configuration + will be replaced by this one. + """ + self._max_detections = max_detections + self._include_keypoints = include_keypoints + self._process_config(pipeline_config) + if include_keypoints and label_map_path: + pipeline_config.model.center_net.keypoint_label_map_path = label_map_path + self._pipeline_config = pipeline_config + self._model = model_builder.build( + self._pipeline_config.model, is_training=False) + + def get_model(self): + return self._model + + def _process_config(self, pipeline_config): + self._num_classes = pipeline_config.model.center_net.num_classes + + center_net_config = pipeline_config.model.center_net + image_resizer_config = center_net_config.image_resizer + image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof') + self._num_channels = _DEFAULT_NUM_CHANNELS + + if image_resizer == 'fixed_shape_resizer': + self._height = image_resizer_config.fixed_shape_resizer.height + self._width = image_resizer_config.fixed_shape_resizer.width + if image_resizer_config.fixed_shape_resizer.convert_to_grayscale: + self._num_channels = 1 + else: + raise ValueError( + 'Only fixed_shape_resizer' + 'is supported with tflite. Found {}'.format(image_resizer)) + + center_net_config.object_center_params.max_box_predictions = ( + self._max_detections) + + if not self._include_keypoints: + del center_net_config.keypoint_estimation_task[:] + + def input_shape(self): + """Returns shape of TFLite model input.""" + return [1, self._height, self._width, self._num_channels] + + @tf.function + def inference_fn(self, image): + """Encapsulates CenterNet inference for TFLite conversion. + + Args: + image: a float32 tensor of shape [1, image_height, image_width, channel] + denoting the image pixel values. + + Returns: + A dictionary of predicted tensors: + classes: a float32 tensor with shape [1, max_detections] denoting class + ID for each detection. + scores: a float32 tensor with shape [1, max_detections] denoting score + for each detection. + boxes: a float32 tensor with shape [1, max_detections, 4] denoting + coordinates of each detected box. + keypoints: a float32 with shape [1, max_detections, num_keypoints, 2] + denoting the predicted keypoint coordinates (normalized in between + 0-1). Note that [:, :, :, 0] represents the y coordinates and + [:, :, :, 1] represents the x coordinates. + keypoint_scores: a float32 with shape [1, max_detections, num_keypoints] + denoting keypoint confidence scores. + """ + image = tf.cast(image, tf.float32) + image, shapes = self._model.preprocess(image) + prediction_dict = self._model.predict(image, None) + detections = self._model.postprocess( + prediction_dict, true_image_shapes=shapes) + + field_names = fields.DetectionResultFields + classes_field = field_names.detection_classes + classes = tf.cast(detections[classes_field], tf.float32) + num_detections = tf.cast(detections[field_names.num_detections], tf.float32) + + if self._include_keypoints: + model_outputs = (detections[field_names.detection_boxes], classes, + detections[field_names.detection_scores], num_detections, + detections[field_names.detection_keypoints], + detections[field_names.detection_keypoint_scores]) + else: + model_outputs = (detections[field_names.detection_boxes], classes, + detections[field_names.detection_scores], num_detections) + + # tf.function@ seems to reverse order of inputs, so reverse them here. + return model_outputs[::-1] + + +def export_tflite_model(pipeline_config, trained_checkpoint_dir, + output_directory, max_detections, use_regular_nms, + include_keypoints=False, label_map_path=''): + """Exports inference SavedModel for TFLite conversion. + + NOTE: Only supports SSD meta-architectures for now, and the output model will + have static-shaped, single-batch input. + + This function creates `output_directory` if it does not already exist, + which will hold the intermediate SavedModel that can be used with the TFLite + converter. + + Args: + pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto. + trained_checkpoint_dir: Path to the trained checkpoint file. + output_directory: Path to write outputs. + max_detections: Max detections desired from the TFLite model. + use_regular_nms: If True, TFLite model uses the (slower) multi-class NMS. + Note that this argument is only used by the SSD model. + include_keypoints: Decides whether to also output the keypoint predictions. + Note that this argument is only used by the CenterNet model. + label_map_path: Path to the label map which is used by CenterNet keypoint + estimation task. If provided, the label_map_path in the configuration will + be replaced by this one. + + Raises: + ValueError: if pipeline is invalid. + """ + output_saved_model_directory = os.path.join(output_directory, 'saved_model') + + # Build the underlying model using pipeline config. + # TODO(b/162842801): Add support for other architectures. + if pipeline_config.model.WhichOneof('model') == 'ssd': + detection_model = model_builder.build( + pipeline_config.model, is_training=False) + ckpt = tf.train.Checkpoint(model=detection_model) + # The module helps build a TF SavedModel appropriate for TFLite conversion. + detection_module = SSDModule(pipeline_config, detection_model, + max_detections, use_regular_nms) + elif pipeline_config.model.WhichOneof('model') == 'center_net': + detection_module = CenterNetModule( + pipeline_config, max_detections, include_keypoints, + label_map_path=label_map_path) + ckpt = tf.train.Checkpoint(model=detection_module.get_model()) + else: + raise ValueError('Only ssd or center_net models are supported in tflite. ' + 'Found {} in config'.format( + pipeline_config.model.WhichOneof('model'))) + + manager = tf.train.CheckpointManager( + ckpt, trained_checkpoint_dir, max_to_keep=1) + status = ckpt.restore(manager.latest_checkpoint).expect_partial() + + # Getting the concrete function traces the graph and forces variables to + # be constructed; only after this can we save the saved model. + status.assert_existing_objects_matched() + concrete_function = detection_module.inference_fn.get_concrete_function( + tf.TensorSpec( + shape=detection_module.input_shape(), dtype=tf.float32, name='input')) + status.assert_existing_objects_matched() + + # Export SavedModel. + tf.saved_model.save( + detection_module, + output_saved_model_directory, + signatures=concrete_function) diff --git a/research/object_detection/export_tflite_graph_lib_tf2_test.py b/research/object_detection/export_tflite_graph_lib_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..6bb151fbda49dc6a67960d5bd998cd3aa91640cf --- /dev/null +++ b/research/object_detection/export_tflite_graph_lib_tf2_test.py @@ -0,0 +1,342 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test for export_tflite_graph_lib_tf2.py.""" + +from __future__ import division +import os +import unittest +import six + +import tensorflow.compat.v2 as tf + +from object_detection import export_tflite_graph_lib_tf2 +from object_detection.builders import model_builder +from object_detection.core import model +from object_detection.protos import pipeline_pb2 +from object_detection.utils import tf_version +from google.protobuf import text_format + +if six.PY2: + import mock # pylint: disable=g-importing-member,g-import-not-at-top +else: + from unittest import mock # pylint: disable=g-importing-member,g-import-not-at-top + + +class FakeModel(model.DetectionModel): + + def __init__(self): + super(FakeModel, self).__init__(num_classes=2) + self._conv = tf.keras.layers.Conv2D( + filters=1, + kernel_size=1, + strides=(1, 1), + padding='valid', + kernel_initializer=tf.keras.initializers.Constant(value=1.0)) + + def preprocess(self, inputs): + true_image_shapes = [] # Doesn't matter for the fake model. + return tf.identity(inputs), true_image_shapes + + def predict(self, preprocessed_inputs, true_image_shapes): + prediction_tensors = {'image': self._conv(preprocessed_inputs)} + with tf.control_dependencies([prediction_tensors['image']]): + prediction_tensors['box_encodings'] = tf.constant( + [[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]]], tf.float32) + prediction_tensors['class_predictions_with_background'] = tf.constant( + [[[0.7, 0.6], [0.9, 0.0]]], tf.float32) + with tf.control_dependencies([ + tf.convert_to_tensor( + prediction_tensors['image'].get_shape().as_list()[1:3]) + ]): + prediction_tensors['anchors'] = tf.constant( + [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 1.0]], tf.float32) + return prediction_tensors + + def postprocess(self, prediction_dict, true_image_shapes): + predict_tensor_sum = tf.reduce_sum(prediction_dict['image']) + with tf.control_dependencies(list(prediction_dict.values())): + postprocessed_tensors = { + 'detection_boxes': + tf.constant([[[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]]], + tf.float32), + 'detection_scores': + predict_tensor_sum + + tf.constant([[0.7, 0.6], [0.9, 0.0]], tf.float32), + 'detection_classes': + tf.constant([[0, 1], [1, 0]], tf.float32), + 'num_detections': + tf.constant([2, 1], tf.float32), + 'detection_keypoints': + tf.zeros([2, 17, 2], tf.float32), + 'detection_keypoint_scores': + tf.zeros([2, 17], tf.float32), + } + return postprocessed_tensors + + def restore_map(self, checkpoint_path, from_detection_checkpoint): + pass + + def restore_from_objects(self, fine_tune_checkpoint_type): + pass + + def loss(self, prediction_dict, true_image_shapes): + pass + + def regularization_losses(self): + pass + + def updates(self): + pass + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class ExportTfLiteGraphTest(tf.test.TestCase): + + def _save_checkpoint_from_mock_model(self, checkpoint_dir): + mock_model = FakeModel() + fake_image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32) + preprocessed_inputs, true_image_shapes = mock_model.preprocess(fake_image) + predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) + mock_model.postprocess(predictions, true_image_shapes) + + ckpt = tf.train.Checkpoint(model=mock_model) + exported_checkpoint_manager = tf.train.CheckpointManager( + ckpt, checkpoint_dir, max_to_keep=1) + exported_checkpoint_manager.save(checkpoint_number=0) + + def _get_ssd_config(self): + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.height = 10 + pipeline_config.model.ssd.image_resizer.fixed_shape_resizer.width = 10 + pipeline_config.model.ssd.num_classes = 2 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.y_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.x_scale = 10.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.height_scale = 5.0 + pipeline_config.model.ssd.box_coder.faster_rcnn_box_coder.width_scale = 5.0 + pipeline_config.model.ssd.post_processing.batch_non_max_suppression.iou_threshold = 0.5 + return pipeline_config + + def _get_center_net_config(self): + pipeline_config_text = """ +model { + center_net { + num_classes: 1 + feature_extractor { + type: "mobilenet_v2_fpn" + } + image_resizer { + fixed_shape_resizer { + height: 10 + width: 10 + } + } + object_detection_task { + localization_loss { + l1_localization_loss { + } + } + } + object_center_params { + classification_loss { + } + max_box_predictions: 20 + } + keypoint_estimation_task { + loss { + localization_loss { + l1_localization_loss { + } + } + classification_loss { + penalty_reduced_logistic_focal_loss { + } + } + } + } + } +} + """ + return text_format.Parse( + pipeline_config_text, pipeline_pb2.TrainEvalPipelineConfig()) + + # The tf.implements signature is important since it ensures MLIR legalization, + # so we test it here. + def test_postprocess_implements_signature(self): + tmp_dir = self.get_temp_dir() + self._save_checkpoint_from_mock_model(tmp_dir) + pipeline_config = self._get_ssd_config() + + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + + detection_model = model_builder.build( + pipeline_config.model, is_training=False) + + ckpt = tf.train.Checkpoint(model=detection_model) + manager = tf.train.CheckpointManager(ckpt, tmp_dir, max_to_keep=1) + ckpt.restore(manager.latest_checkpoint).expect_partial() + + # The module helps build a TF graph appropriate for TFLite conversion. + detection_module = export_tflite_graph_lib_tf2.SSDModule( + pipeline_config=pipeline_config, + detection_model=detection_model, + max_detections=20, + use_regular_nms=True) + + expected_signature = ('name: "TFLite_Detection_PostProcess" attr { key: ' + '"max_detections" value { i: 20 } } attr { key: ' + '"max_classes_per_detection" value { i: 1 } } attr ' + '{ key: "use_regular_nms" value { b: true } } attr ' + '{ key: "nms_score_threshold" value { f: 0.000000 }' + ' } attr { key: "nms_iou_threshold" value { f: ' + '0.500000 } } attr { key: "y_scale" value { f: ' + '10.000000 } } attr { key: "x_scale" value { f: ' + '10.000000 } } attr { key: "h_scale" value { f: ' + '5.000000 } } attr { key: "w_scale" value { f: ' + '5.000000 } } attr { key: "num_classes" value { i: ' + '2 } }') + + self.assertEqual(expected_signature, + detection_module.postprocess_implements_signature()) + + def test_unsupported_architecture(self): + tmp_dir = self.get_temp_dir() + self._save_checkpoint_from_mock_model(tmp_dir) + + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.model.faster_rcnn.num_classes = 10 + + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + expected_message = 'Only ssd or center_net models are supported in tflite' + try: + export_tflite_graph_lib_tf2.export_tflite_model( + pipeline_config=pipeline_config, + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory, + max_detections=10, + use_regular_nms=False) + except ValueError as e: + if expected_message not in str(e): + raise + else: + raise AssertionError('Exception not raised: %s' % expected_message) + + def test_export_yields_saved_model(self): + tmp_dir = self.get_temp_dir() + self._save_checkpoint_from_mock_model(tmp_dir) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + output_directory = os.path.join(tmp_dir, 'output') + export_tflite_graph_lib_tf2.export_tflite_model( + pipeline_config=self._get_ssd_config(), + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory, + max_detections=10, + use_regular_nms=False) + self.assertTrue( + os.path.exists( + os.path.join(output_directory, 'saved_model', 'saved_model.pb'))) + self.assertTrue( + os.path.exists( + os.path.join(output_directory, 'saved_model', 'variables', + 'variables.index'))) + self.assertTrue( + os.path.exists( + os.path.join(output_directory, 'saved_model', 'variables', + 'variables.data-00000-of-00001'))) + + def test_exported_model_inference(self): + tmp_dir = self.get_temp_dir() + output_directory = os.path.join(tmp_dir, 'output') + self._save_checkpoint_from_mock_model(tmp_dir) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + export_tflite_graph_lib_tf2.export_tflite_model( + pipeline_config=self._get_ssd_config(), + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory, + max_detections=10, + use_regular_nms=False) + + saved_model_path = os.path.join(output_directory, 'saved_model') + detect_fn = tf.saved_model.load(saved_model_path) + detect_fn_sig = detect_fn.signatures['serving_default'] + image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32) + detections = detect_fn_sig(image) + + # The exported graph doesn't have numerically correct outputs, but there + # should be 4. + self.assertEqual(4, len(detections)) + + def test_center_net_inference_object_detection(self): + tmp_dir = self.get_temp_dir() + output_directory = os.path.join(tmp_dir, 'output') + self._save_checkpoint_from_mock_model(tmp_dir) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + export_tflite_graph_lib_tf2.export_tflite_model( + pipeline_config=self._get_center_net_config(), + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory, + max_detections=10, + use_regular_nms=False) + + saved_model_path = os.path.join(output_directory, 'saved_model') + detect_fn = tf.saved_model.load(saved_model_path) + detect_fn_sig = detect_fn.signatures['serving_default'] + image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32) + detections = detect_fn_sig(image) + + # The exported graph doesn't have numerically correct outputs, but there + # should be 4. + self.assertEqual(4, len(detections)) + + def test_center_net_inference_keypoint(self): + tmp_dir = self.get_temp_dir() + output_directory = os.path.join(tmp_dir, 'output') + self._save_checkpoint_from_mock_model(tmp_dir) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + export_tflite_graph_lib_tf2.export_tflite_model( + pipeline_config=self._get_center_net_config(), + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory, + max_detections=10, + use_regular_nms=False, + include_keypoints=True) + + saved_model_path = os.path.join(output_directory, 'saved_model') + detect_fn = tf.saved_model.load(saved_model_path) + detect_fn_sig = detect_fn.signatures['serving_default'] + image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32) + detections = detect_fn_sig(image) + + # The exported graph doesn't have numerically correct outputs, but there + # should be 6 (4 for boxes, 2 for keypoints). + self.assertEqual(6, len(detections)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/export_tflite_graph_tf2.py b/research/object_detection/export_tflite_graph_tf2.py new file mode 100644 index 0000000000000000000000000000000000000000..6bb40b15d8d69e72b802f931396072e1469bb445 --- /dev/null +++ b/research/object_detection/export_tflite_graph_tf2.py @@ -0,0 +1,161 @@ +# Lint as: python2, python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Exports TF2 detection SavedModel for conversion to TensorFlow Lite. + +Link to the TF2 Detection Zoo: +https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md +The output folder will contain an intermediate SavedModel that can be used with +the TfLite converter. + +NOTE: This only supports SSD meta-architectures for now. + +One input: + image: a float32 tensor of shape[1, height, width, 3] containing the + *normalized* input image. + NOTE: See the `preprocess` function defined in the feature extractor class + in the object_detection/models directory. + +Four Outputs: + detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box + locations + detection_classes: a float32 tensor of shape [1, num_boxes] + with class indices + detection_scores: a float32 tensor of shape [1, num_boxes] + with class scores + num_boxes: a float32 tensor of size 1 containing the number of detected boxes + +Example Usage: +-------------- +python object_detection/export_tflite_graph_tf2.py \ + --pipeline_config_path path/to/ssd_model/pipeline.config \ + --trained_checkpoint_dir path/to/ssd_model/checkpoint \ + --output_directory path/to/exported_model_directory + +The expected output SavedModel would be in the directory +path/to/exported_model_directory (which is created if it does not exist). + +Config overrides (see the `config_override` flag) are text protobufs +(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override +certain fields in the provided pipeline_config_path. These are useful for +making small changes to the inference graph that differ from the training or +eval config. + +Example Usage 1 (in which we change the NMS iou_threshold to be 0.5 and +NMS score_threshold to be 0.0): +python object_detection/export_tflite_model_tf2.py \ + --pipeline_config_path path/to/ssd_model/pipeline.config \ + --trained_checkpoint_dir path/to/ssd_model/checkpoint \ + --output_directory path/to/exported_model_directory + --config_override " \ + model{ \ + ssd{ \ + post_processing { \ + batch_non_max_suppression { \ + score_threshold: 0.0 \ + iou_threshold: 0.5 \ + } \ + } \ + } \ + } \ + " + +Example Usage 2 (export CenterNet model for keypoint estimation task with fixed +shape resizer and customized input resolution): +python object_detection/export_tflite_model_tf2.py \ + --pipeline_config_path path/to/ssd_model/pipeline.config \ + --trained_checkpoint_dir path/to/ssd_model/checkpoint \ + --output_directory path/to/exported_model_directory \ + --keypoint_label_map_path path/to/label_map.txt \ + --max_detections 10 \ + --centernet_include_keypoints true \ + --config_override " \ + model{ \ + center_net { \ + image_resizer { \ + fixed_shape_resizer { \ + height: 320 \ + width: 320 \ + } \ + } \ + } \ + }" \ +""" +from absl import app +from absl import flags + +import tensorflow.compat.v2 as tf +from google.protobuf import text_format +from object_detection import export_tflite_graph_lib_tf2 +from object_detection.protos import pipeline_pb2 + +tf.enable_v2_behavior() + +FLAGS = flags.FLAGS + +flags.DEFINE_string( + 'pipeline_config_path', None, + 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' + 'file.') +flags.DEFINE_string('trained_checkpoint_dir', None, + 'Path to trained checkpoint directory') +flags.DEFINE_string('output_directory', None, 'Path to write outputs.') +flags.DEFINE_string( + 'config_override', '', 'pipeline_pb2.TrainEvalPipelineConfig ' + 'text proto to override pipeline_config_path.') +flags.DEFINE_integer('max_detections', 10, + 'Maximum number of detections (boxes) to return.') +# SSD-specific flags +flags.DEFINE_bool( + 'ssd_use_regular_nms', False, + 'Flag to set postprocessing op to use Regular NMS instead of Fast NMS ' + '(Default false).') +# CenterNet-specific flags +flags.DEFINE_bool( + 'centernet_include_keypoints', False, + 'Whether to export the predicted keypoint tensors. Only CenterNet model' + ' supports this flag.' +) +flags.DEFINE_string( + 'keypoint_label_map_path', None, + 'Path of the label map used by CenterNet keypoint estimation task. If' + ' provided, the label map path in the pipeline config will be replaced by' + ' this one. Note that it is only used when exporting CenterNet model for' + ' keypoint estimation task.' +) + + +def main(argv): + del argv # Unused. + flags.mark_flag_as_required('pipeline_config_path') + flags.mark_flag_as_required('trained_checkpoint_dir') + flags.mark_flag_as_required('output_directory') + + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + + with tf.io.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: + text_format.Parse(f.read(), pipeline_config) + override_config = pipeline_pb2.TrainEvalPipelineConfig() + text_format.Parse(FLAGS.config_override, override_config) + pipeline_config.MergeFrom(override_config) + + export_tflite_graph_lib_tf2.export_tflite_model( + pipeline_config, FLAGS.trained_checkpoint_dir, FLAGS.output_directory, + FLAGS.max_detections, FLAGS.ssd_use_regular_nms, + FLAGS.centernet_include_keypoints, FLAGS.keypoint_label_map_path) + + +if __name__ == '__main__': + app.run(main) diff --git a/research/object_detection/exporter.py b/research/object_detection/exporter.py index 61c5f7f22db46c88c8bc5c1803b281da4c020967..a4848e35b52bc87d1e752927b0faeaf41dc9cca3 100644 --- a/research/object_detection/exporter.py +++ b/research/object_detection/exporter.py @@ -170,7 +170,7 @@ def replace_variable_values_with_moving_averages(graph, with graph.as_default(): variable_averages = tf.train.ExponentialMovingAverage(0.0) ema_variables_to_restore = variable_averages.variables_to_restore() - ema_variables_to_restore = config_util.remove_unecessary_ema( + ema_variables_to_restore = config_util.remove_unnecessary_ema( ema_variables_to_restore, no_ema_collection) with tf.Session() as sess: read_saver = tf.train.Saver(ema_variables_to_restore) diff --git a/research/object_detection/exporter_lib_tf2_test.py b/research/object_detection/exporter_lib_tf2_test.py index 99cbf263bece871d1a7d3b5a9f92e22c3f356412..cf8973fdf9d63183a5470a16ad64836ef504b24c 100644 --- a/research/object_detection/exporter_lib_tf2_test.py +++ b/research/object_detection/exporter_lib_tf2_test.py @@ -51,11 +51,13 @@ class FakeModel(model.DetectionModel): value=conv_weight_scalar)) def preprocess(self, inputs): - true_image_shapes = [] # Doesn't matter for the fake model. - return tf.identity(inputs), true_image_shapes + return tf.identity(inputs), exporter_lib_v2.get_true_shapes(inputs) - def predict(self, preprocessed_inputs, true_image_shapes): - return {'image': self._conv(preprocessed_inputs)} + def predict(self, preprocessed_inputs, true_image_shapes, **side_inputs): + return_dict = {'image': self._conv(preprocessed_inputs)} + if 'side_inp_1' in side_inputs: + return_dict['image'] += side_inputs['side_inp_1'] + return return_dict def postprocess(self, prediction_dict, true_image_shapes): predict_tensor_sum = tf.reduce_sum(prediction_dict['image']) @@ -73,6 +75,13 @@ class FakeModel(model.DetectionModel): } return postprocessed_tensors + def predict_masks_from_boxes(self, prediction_dict, true_image_shapes, boxes): + output_dict = self.postprocess(prediction_dict, true_image_shapes) + output_dict.update({ + 'detection_masks': tf.ones(shape=(1, 2, 16), dtype=tf.float32), + }) + return output_dict + def restore_map(self, checkpoint_path, fine_tune_checkpoint_type): pass @@ -117,6 +126,7 @@ class ExportInferenceGraphTest(tf.test.TestCase, parameterized.TestCase): with mock.patch.object( model_builder, 'build', autospec=True) as mock_builder: mock_builder.return_value = FakeModel() + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP['model_build'] = mock_builder output_directory = os.path.join(tmp_dir, 'output') pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() exporter_lib_v2.export_inference_graph( @@ -142,9 +152,9 @@ class ExportInferenceGraphTest(tf.test.TestCase, parameterized.TestCase): """Get dummy input for the given input type.""" if input_type == 'image_tensor': - return np.zeros(shape=(1, 20, 20, 3), dtype=np.uint8) + return np.zeros((1, 20, 20, 3), dtype=np.uint8) if input_type == 'float_image_tensor': - return np.zeros(shape=(1, 20, 20, 3), dtype=np.float32) + return np.zeros((1, 20, 20, 3), dtype=np.float32) elif input_type == 'encoded_image_string_tensor': image = Image.new('RGB', (20, 20)) byte_io = io.BytesIO() @@ -178,6 +188,7 @@ class ExportInferenceGraphTest(tf.test.TestCase, parameterized.TestCase): with mock.patch.object( model_builder, 'build', autospec=True) as mock_builder: mock_builder.return_value = FakeModel() + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP['model_build'] = mock_builder output_directory = os.path.join(tmp_dir, 'output') pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() exporter_lib_v2.export_inference_graph( @@ -189,7 +200,7 @@ class ExportInferenceGraphTest(tf.test.TestCase, parameterized.TestCase): saved_model_path = os.path.join(output_directory, 'saved_model') detect_fn = tf.saved_model.load(saved_model_path) image = self.get_dummy_input(input_type) - detections = detect_fn(image) + detections = detect_fn(tf.constant(image)) detection_fields = fields.DetectionResultFields self.assertAllClose(detections[detection_fields.detection_boxes], @@ -203,12 +214,64 @@ class ExportInferenceGraphTest(tf.test.TestCase, parameterized.TestCase): [[1, 2], [2, 1]]) self.assertAllClose(detections[detection_fields.num_detections], [2, 1]) + @parameterized.parameters( + {'use_default_serving': True}, + {'use_default_serving': False} + ) + def test_export_saved_model_and_run_inference_with_side_inputs( + self, input_type='image_tensor', use_default_serving=True): + tmp_dir = self.get_temp_dir() + self._save_checkpoint_from_mock_model(tmp_dir) + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP['model_build'] = mock_builder + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter_lib_v2.export_inference_graph( + input_type=input_type, + pipeline_config=pipeline_config, + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory, + use_side_inputs=True, + side_input_shapes='1/2,2', + side_input_names='side_inp_1,side_inp_2', + side_input_types='tf.float32,tf.uint8') + + saved_model_path = os.path.join(output_directory, 'saved_model') + detect_fn = tf.saved_model.load(saved_model_path) + detect_fn_sig = detect_fn.signatures['serving_default'] + image = tf.constant(self.get_dummy_input(input_type)) + side_input_1 = np.ones((1,), dtype=np.float32) + side_input_2 = np.ones((2, 2), dtype=np.uint8) + if use_default_serving: + detections = detect_fn_sig(input_tensor=image, + side_inp_1=tf.constant(side_input_1), + side_inp_2=tf.constant(side_input_2)) + else: + detections = detect_fn(image, + tf.constant(side_input_1), + tf.constant(side_input_2)) + + detection_fields = fields.DetectionResultFields + self.assertAllClose(detections[detection_fields.detection_boxes], + [[[0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8]], + [[0.5, 0.5, 1.0, 1.0], + [0.0, 0.0, 0.0, 0.0]]]) + self.assertAllClose(detections[detection_fields.detection_scores], + [[400.7, 400.6], [400.9, 400.0]]) + self.assertAllClose(detections[detection_fields.detection_classes], + [[1, 2], [2, 1]]) + self.assertAllClose(detections[detection_fields.num_detections], [2, 1]) + def test_export_checkpoint_and_run_inference_with_image(self): tmp_dir = self.get_temp_dir() self._save_checkpoint_from_mock_model(tmp_dir, conv_weight_scalar=2.0) with mock.patch.object( model_builder, 'build', autospec=True) as mock_builder: mock_builder.return_value = FakeModel() + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP['model_build'] = mock_builder output_directory = os.path.join(tmp_dir, 'output') pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() exporter_lib_v2.export_inference_graph( @@ -235,6 +298,83 @@ class ExportInferenceGraphTest(tf.test.TestCase, parameterized.TestCase): [[150 + 0.7, 150 + 0.6], [150 + 0.9, 150 + 0.0]]) +class DetectionFromImageAndBoxModuleTest(tf.test.TestCase): + + def get_dummy_input(self, input_type): + """Get dummy input for the given input type.""" + + if input_type == 'image_tensor' or input_type == 'image_and_boxes_tensor': + return np.zeros((1, 20, 20, 3), dtype=np.uint8) + if input_type == 'float_image_tensor': + return np.zeros((1, 20, 20, 3), dtype=np.float32) + elif input_type == 'encoded_image_string_tensor': + image = Image.new('RGB', (20, 20)) + byte_io = io.BytesIO() + image.save(byte_io, 'PNG') + return [byte_io.getvalue()] + elif input_type == 'tf_example': + image_tensor = tf.zeros((20, 20, 3), dtype=tf.uint8) + encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).numpy() + example = tf.train.Example( + features=tf.train.Features( + feature={ + 'image/encoded': + dataset_util.bytes_feature(encoded_jpeg), + 'image/format': + dataset_util.bytes_feature(six.b('jpeg')), + 'image/source_id': + dataset_util.bytes_feature(six.b('image_id')), + })).SerializeToString() + return [example] + + def _save_checkpoint_from_mock_model(self, + checkpoint_dir, + conv_weight_scalar=6.0): + mock_model = FakeModel(conv_weight_scalar) + fake_image = tf.zeros(shape=[1, 10, 10, 3], dtype=tf.float32) + preprocessed_inputs, true_image_shapes = mock_model.preprocess(fake_image) + predictions = mock_model.predict(preprocessed_inputs, true_image_shapes) + mock_model.postprocess(predictions, true_image_shapes) + + ckpt = tf.train.Checkpoint(model=mock_model) + exported_checkpoint_manager = tf.train.CheckpointManager( + ckpt, checkpoint_dir, max_to_keep=1) + exported_checkpoint_manager.save(checkpoint_number=0) + + def test_export_saved_model_and_run_inference_for_segmentation( + self, input_type='image_and_boxes_tensor'): + tmp_dir = self.get_temp_dir() + self._save_checkpoint_from_mock_model(tmp_dir) + + with mock.patch.object( + model_builder, 'build', autospec=True) as mock_builder: + mock_builder.return_value = FakeModel() + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP['model_build'] = mock_builder + output_directory = os.path.join(tmp_dir, 'output') + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + exporter_lib_v2.export_inference_graph( + input_type=input_type, + pipeline_config=pipeline_config, + trained_checkpoint_dir=tmp_dir, + output_directory=output_directory) + + saved_model_path = os.path.join(output_directory, 'saved_model') + detect_fn = tf.saved_model.load(saved_model_path) + image = self.get_dummy_input(input_type) + boxes = tf.constant([ + [ + [0.0, 0.0, 0.5, 0.5], + [0.5, 0.5, 0.8, 0.8], + ], + ]) + detections = detect_fn(tf.constant(image), boxes) + + detection_fields = fields.DetectionResultFields + self.assertIn(detection_fields.detection_masks, detections) + self.assertListEqual( + list(detections[detection_fields.detection_masks].shape), [1, 2, 16]) + + if __name__ == '__main__': tf.enable_v2_behavior() tf.test.main() diff --git a/research/object_detection/exporter_lib_v2.py b/research/object_detection/exporter_lib_v2.py index a7ecb45adb14f1b20c2291a3cf67376ad07194eb..fd10187cf4e61543c552628bf29279b48f4323c8 100644 --- a/research/object_detection/exporter_lib_v2.py +++ b/research/object_detection/exporter_lib_v2.py @@ -15,7 +15,9 @@ # ============================================================================== """Functions to export object detection inference graph.""" +import ast import os + import tensorflow.compat.v2 as tf from object_detection.builders import model_builder from object_detection.core import standard_fields as fields @@ -23,6 +25,11 @@ from object_detection.data_decoders import tf_example_decoder from object_detection.utils import config_util +INPUT_BUILDER_UTIL_MAP = { + 'model_build': model_builder.build, +} + + def _decode_image(encoded_image_string_tensor): image_tensor = tf.image.decode_image(encoded_image_string_tensor, channels=3) @@ -37,31 +44,87 @@ def _decode_tf_example(tf_example_string_tensor): return image_tensor +def _combine_side_inputs(side_input_shapes='', + side_input_types='', + side_input_names=''): + """Zips the side inputs together. + + Args: + side_input_shapes: forward-slash-separated list of comma-separated lists + describing input shapes. + side_input_types: comma-separated list of the types of the inputs. + side_input_names: comma-separated list of the names of the inputs. + + Returns: + a zipped list of side input tuples. + """ + side_input_shapes = [ + ast.literal_eval('[' + x + ']') for x in side_input_shapes.split('/') + ] + side_input_types = eval('[' + side_input_types + ']') # pylint: disable=eval-used + side_input_names = side_input_names.split(',') + return zip(side_input_shapes, side_input_types, side_input_names) + + class DetectionInferenceModule(tf.Module): """Detection Inference Module.""" - def __init__(self, detection_model): + def __init__(self, detection_model, + use_side_inputs=False, + zipped_side_inputs=None): """Initializes a module for detection. Args: - detection_model: The detection model to use for inference. + detection_model: the detection model to use for inference. + use_side_inputs: whether to use side inputs. + zipped_side_inputs: the zipped side inputs. """ self._model = detection_model - def _run_inference_on_images(self, image): + def _get_side_input_signature(self, zipped_side_inputs): + sig = [] + side_input_names = [] + for info in zipped_side_inputs: + sig.append(tf.TensorSpec(shape=info[0], + dtype=info[1], + name=info[2])) + side_input_names.append(info[2]) + return sig + + def _get_side_names_from_zip(self, zipped_side_inputs): + return [side[2] for side in zipped_side_inputs] + + def _preprocess_input(self, batch_input, decode_fn): + # Input preprocessing happends on the CPU. We don't need to use the device + # placement as it is automatically handled by TF. + def _decode_and_preprocess(single_input): + image = decode_fn(single_input) + image = tf.cast(image, tf.float32) + image, true_shape = self._model.preprocess(image[tf.newaxis, :, :, :]) + return image[0], true_shape[0] + + images, true_shapes = tf.map_fn( + _decode_and_preprocess, + elems=batch_input, + parallel_iterations=32, + back_prop=False, + fn_output_signature=(tf.float32, tf.int32)) + return images, true_shapes + + def _run_inference_on_images(self, images, true_shapes, **kwargs): """Cast image to float and run inference. Args: - image: uint8 Tensor of shape [1, None, None, 3] + images: float32 Tensor of shape [None, None, None, 3]. + true_shapes: int32 Tensor of form [batch, 3] + **kwargs: additional keyword arguments. + Returns: Tensor dictionary holding detections. """ label_id_offset = 1 - - image = tf.cast(image, tf.float32) - image, shapes = self._model.preprocess(image) - prediction_dict = self._model.predict(image, shapes) - detections = self._model.postprocess(prediction_dict, shapes) + prediction_dict = self._model.predict(images, true_shapes, **kwargs) + detections = self._model.postprocess(prediction_dict, true_shapes) classes_field = fields.DetectionResultFields.detection_classes detections[classes_field] = ( tf.cast(detections[classes_field], tf.float32) + label_id_offset) @@ -75,11 +138,44 @@ class DetectionInferenceModule(tf.Module): class DetectionFromImageModule(DetectionInferenceModule): """Detection Inference Module for image inputs.""" - @tf.function( - input_signature=[ - tf.TensorSpec(shape=[1, None, None, 3], dtype=tf.uint8)]) - def __call__(self, input_tensor): - return self._run_inference_on_images(input_tensor) + def __init__(self, detection_model, + use_side_inputs=False, + zipped_side_inputs=None): + """Initializes a module for detection. + + Args: + detection_model: the detection model to use for inference. + use_side_inputs: whether to use side inputs. + zipped_side_inputs: the zipped side inputs. + """ + if zipped_side_inputs is None: + zipped_side_inputs = [] + sig = [tf.TensorSpec(shape=[1, None, None, 3], + dtype=tf.uint8, + name='input_tensor')] + if use_side_inputs: + sig.extend(self._get_side_input_signature(zipped_side_inputs)) + self._side_input_names = self._get_side_names_from_zip(zipped_side_inputs) + + def call_func(input_tensor, *side_inputs): + kwargs = dict(zip(self._side_input_names, side_inputs)) + images, true_shapes = self._preprocess_input(input_tensor, lambda x: x) + return self._run_inference_on_images(images, true_shapes, **kwargs) + + self.__call__ = tf.function(call_func, input_signature=sig) + + # TODO(kaushikshiv): Check if omitting the signature also works. + super(DetectionFromImageModule, self).__init__(detection_model, + use_side_inputs, + zipped_side_inputs) + + +def get_true_shapes(input_tensor): + input_shape = tf.shape(input_tensor) + batch = input_shape[0] + image_shape = input_shape[1:] + true_shapes = tf.tile(image_shape[tf.newaxis, :], [batch, 1]) + return true_shapes class DetectionFromFloatImageModule(DetectionInferenceModule): @@ -87,53 +183,40 @@ class DetectionFromFloatImageModule(DetectionInferenceModule): @tf.function( input_signature=[ - tf.TensorSpec(shape=[1, None, None, 3], dtype=tf.float32)]) + tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.float32)]) def __call__(self, input_tensor): - return self._run_inference_on_images(input_tensor) + images, true_shapes = self._preprocess_input(input_tensor, lambda x: x) + return self._run_inference_on_images(images, + true_shapes) class DetectionFromEncodedImageModule(DetectionInferenceModule): """Detection Inference Module for encoded image string inputs.""" - @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)]) + @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)]) def __call__(self, input_tensor): - with tf.device('cpu:0'): - image = tf.map_fn( - _decode_image, - elems=input_tensor, - dtype=tf.uint8, - parallel_iterations=32, - back_prop=False) - return self._run_inference_on_images(image) + images, true_shapes = self._preprocess_input(input_tensor, _decode_image) + return self._run_inference_on_images(images, true_shapes) class DetectionFromTFExampleModule(DetectionInferenceModule): """Detection Inference Module for TF.Example inputs.""" - @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.string)]) + @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)]) def __call__(self, input_tensor): - with tf.device('cpu:0'): - image = tf.map_fn( - _decode_tf_example, - elems=input_tensor, - dtype=tf.uint8, - parallel_iterations=32, - back_prop=False) - return self._run_inference_on_images(image) - -DETECTION_MODULE_MAP = { - 'image_tensor': DetectionFromImageModule, - 'encoded_image_string_tensor': - DetectionFromEncodedImageModule, - 'tf_example': DetectionFromTFExampleModule, - 'float_image_tensor': DetectionFromFloatImageModule -} + images, true_shapes = self._preprocess_input(input_tensor, + _decode_tf_example) + return self._run_inference_on_images(images, true_shapes) def export_inference_graph(input_type, pipeline_config, trained_checkpoint_dir, - output_directory): + output_directory, + use_side_inputs=False, + side_input_shapes='', + side_input_types='', + side_input_names=''): """Exports inference graph for the model specified in the pipeline config. This function creates `output_directory` if it does not already exist, @@ -147,14 +230,20 @@ def export_inference_graph(input_type, pipeline_config: pipeline_pb2.TrainAndEvalPipelineConfig proto. trained_checkpoint_dir: Path to the trained checkpoint file. output_directory: Path to write outputs. + use_side_inputs: boolean that determines whether side inputs should be + included in the input signature. + side_input_shapes: forward-slash-separated list of comma-separated lists + describing input shapes. + side_input_types: comma-separated list of the types of the inputs. + side_input_names: comma-separated list of the names of the inputs. Raises: ValueError: if input_type is invalid. """ output_checkpoint_directory = os.path.join(output_directory, 'checkpoint') output_saved_model_directory = os.path.join(output_directory, 'saved_model') - detection_model = model_builder.build(pipeline_config.model, - is_training=False) + detection_model = INPUT_BUILDER_UTIL_MAP['model_build']( + pipeline_config.model, is_training=False) ckpt = tf.train.Checkpoint( model=detection_model) @@ -164,7 +253,18 @@ def export_inference_graph(input_type, if input_type not in DETECTION_MODULE_MAP: raise ValueError('Unrecognized `input_type`') - detection_module = DETECTION_MODULE_MAP[input_type](detection_model) + if use_side_inputs and input_type != 'image_tensor': + raise ValueError('Side inputs supported for image_tensor input type only.') + + zipped_side_inputs = [] + if use_side_inputs: + zipped_side_inputs = _combine_side_inputs(side_input_shapes, + side_input_types, + side_input_names) + + detection_module = DETECTION_MODULE_MAP[input_type](detection_model, + use_side_inputs, + list(zipped_side_inputs)) # Getting the concrete function traces the graph and forces variables to # be constructed --- only after this can we save the checkpoint and # saved model. @@ -180,3 +280,78 @@ def export_inference_graph(input_type, signatures=concrete_function) config_util.save_pipeline_config(pipeline_config, output_directory) + + +class DetectionFromImageAndBoxModule(DetectionInferenceModule): + """Detection Inference Module for image with bounding box inputs. + + The saved model will require two inputs (image and normalized boxes) and run + per-box mask prediction. To be compatible with this exporter, the detection + model has to implement a called predict_masks_from_boxes( + prediction_dict, true_image_shapes, provided_boxes, **params), where + - prediciton_dict is a dict returned by the predict method. + - true_image_shapes is a tensor of size [batch_size, 3], containing the + true shape of each image in case it is padded. + - provided_boxes is a [batch_size, num_boxes, 4] size tensor containing + boxes specified in normalized coordinates. + """ + + def __init__(self, + detection_model, + use_side_inputs=False, + zipped_side_inputs=None): + """Initializes a module for detection. + + Args: + detection_model: the detection model to use for inference. + use_side_inputs: whether to use side inputs. + zipped_side_inputs: the zipped side inputs. + """ + assert hasattr(detection_model, 'predict_masks_from_boxes') + super(DetectionFromImageAndBoxModule, + self).__init__(detection_model, use_side_inputs, zipped_side_inputs) + + def _run_segmentation_on_images(self, image, boxes, **kwargs): + """Run segmentation on images with provided boxes. + + Args: + image: uint8 Tensor of shape [1, None, None, 3]. + boxes: float32 tensor of shape [1, None, 4] containing normalized box + coordinates. + **kwargs: additional keyword arguments. + + Returns: + Tensor dictionary holding detections (including masks). + """ + label_id_offset = 1 + + image = tf.cast(image, tf.float32) + image, shapes = self._model.preprocess(image) + prediction_dict = self._model.predict(image, shapes, **kwargs) + detections = self._model.predict_masks_from_boxes(prediction_dict, shapes, + boxes) + classes_field = fields.DetectionResultFields.detection_classes + detections[classes_field] = ( + tf.cast(detections[classes_field], tf.float32) + label_id_offset) + + for key, val in detections.items(): + detections[key] = tf.cast(val, tf.float32) + + return detections + + @tf.function(input_signature=[ + tf.TensorSpec(shape=[1, None, None, 3], dtype=tf.uint8), + tf.TensorSpec(shape=[1, None, 4], dtype=tf.float32) + ]) + def __call__(self, input_tensor, boxes): + return self._run_segmentation_on_images(input_tensor, boxes) + + +DETECTION_MODULE_MAP = { + 'image_tensor': DetectionFromImageModule, + 'encoded_image_string_tensor': + DetectionFromEncodedImageModule, + 'tf_example': DetectionFromTFExampleModule, + 'float_image_tensor': DetectionFromFloatImageModule, + 'image_and_boxes_tensor': DetectionFromImageAndBoxModule, +} diff --git a/research/object_detection/exporter_main_v2.py b/research/object_detection/exporter_main_v2.py index a2ba8456039d4584e5998d619f36747d58018418..39630b65d2467ff759c3af4b4171fe8fd1733397 100644 --- a/research/object_detection/exporter_main_v2.py +++ b/research/object_detection/exporter_main_v2.py @@ -31,6 +31,11 @@ specified option. * `tf_example`: Accepts a 1-D string tensor of shape [None] containing serialized TFExample protos. Image resolutions are expected to be the same if more than 1 image is provided. + * `image_and_boxes_tensor`: Accepts a 4-D image tensor of size + [1, None, None, 3] and a boxes tensor of size [1, None, 4] of normalized + bounding boxes. To be able to support this option, the model needs + to implement a predict_masks_from_boxes method. See the documentation + for DetectionFromImageAndBoxModule for details. and the following output nodes returned by the model.postprocess(..): * `num_detections`: Outputs float32 tensors of the form [batch] @@ -50,6 +55,10 @@ python exporter_main_v2.py \ --pipeline_config_path path/to/ssd_inception_v2.config \ --trained_checkpoint_dir path/to/checkpoint \ --output_directory path/to/exported_model_directory + --use_side_inputs True/False \ + --side_input_shapes dim_0,dim_1,...dim_a/.../dim_0,dim_1,...,dim_z \ + --side_input_names name_a,name_b,...,name_c \ + --side_input_types type_1,type_2 The expected output would be in the directory path/to/exported_model_directory (which is created if it does not exist) @@ -80,6 +89,13 @@ python exporter_main_v2.py \ } \ } \ }" + +If side inputs are desired, the following arguments could be appended +(the example below is for Context R-CNN). + --use_side_inputs True \ + --side_input_shapes 1,2000,2057/1 \ + --side_input_names context_features,valid_context_size \ + --side_input_types tf.float32,tf.int32 """ from absl import app from absl import flags @@ -96,7 +112,8 @@ FLAGS = flags.FLAGS flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be ' 'one of [`image_tensor`, `encoded_image_string_tensor`, ' - '`tf_example`, `float_image_tensor`]') + '`tf_example`, `float_image_tensor`, ' + '`image_and_boxes_tensor`]') flags.DEFINE_string('pipeline_config_path', None, 'Path to a pipeline_pb2.TrainEvalPipelineConfig config ' 'file.') @@ -106,6 +123,27 @@ flags.DEFINE_string('output_directory', None, 'Path to write outputs.') flags.DEFINE_string('config_override', '', 'pipeline_pb2.TrainEvalPipelineConfig ' 'text proto to override pipeline_config_path.') +flags.DEFINE_boolean('use_side_inputs', False, + 'If True, uses side inputs as well as image inputs.') +flags.DEFINE_string('side_input_shapes', '', + 'If use_side_inputs is True, this explicitly sets ' + 'the shape of the side input tensors to a fixed size. The ' + 'dimensions are to be provided as a comma-separated list ' + 'of integers. A value of -1 can be used for unknown ' + 'dimensions. A `/` denotes a break, starting the shape of ' + 'the next side input tensor. This flag is required if ' + 'using side inputs.') +flags.DEFINE_string('side_input_types', '', + 'If use_side_inputs is True, this explicitly sets ' + 'the type of the side input tensors. The ' + 'dimensions are to be provided as a comma-separated list ' + 'of types, each of `string`, `integer`, or `float`. ' + 'This flag is required if using side inputs.') +flags.DEFINE_string('side_input_names', '', + 'If use_side_inputs is True, this explicitly sets ' + 'the names of the side input tensors required by the model ' + 'assuming the names will be a comma-separated list of ' + 'strings. This flag is required if using side inputs.') flags.mark_flag_as_required('pipeline_config_path') flags.mark_flag_as_required('trained_checkpoint_dir') @@ -119,7 +157,8 @@ def main(_): text_format.Merge(FLAGS.config_override, pipeline_config) exporter_lib_v2.export_inference_graph( FLAGS.input_type, pipeline_config, FLAGS.trained_checkpoint_dir, - FLAGS.output_directory) + FLAGS.output_directory, FLAGS.use_side_inputs, FLAGS.side_input_shapes, + FLAGS.side_input_types, FLAGS.side_input_names) if __name__ == '__main__': diff --git a/research/object_detection/g3doc/deepmac.md b/research/object_detection/g3doc/deepmac.md new file mode 100644 index 0000000000000000000000000000000000000000..effffbbba6d06bbfdc1ab4b23a10a5d533c733d7 --- /dev/null +++ b/research/object_detection/g3doc/deepmac.md @@ -0,0 +1,97 @@ +# DeepMAC model + + + +**DeepMAC** (Deep Mask heads Above CenterNet) is a neural network architecture +that is designed for the partially supervised instance segmentation task. For +details see the +[The surprising impact of mask-head architecture on novel class segmentation](https://arxiv.org/abs/2104.00613) +paper. The figure below shows improved mask predictions for unseen classes as we +use better mask-head architectures. + +

+ +

+ +Just by using better mask-head architectures (no extra losses or modules) we +achieve state-of-the-art performance in the partially supervised instance +segmentation task. + +## Code structure + +* `deepmac_meta_arch.py` implements our main architecture, DeepMAC, on top of + the CenterNet detection architecture. +* The proto message `DeepMACMaskEstimation` in `center_net.proto` controls the + configuration of the mask head used. +* The field `allowed_masked_classes_ids` controls which classes recieve mask + supervision during training. +* Mask R-CNN based ablations in the paper are implemented in the + [TF model garden](../../../official/vision/beta/projects/deepmac_maskrcnn) + code base. + +## Prerequisites + +1. Follow [TF2 install instructions](tf2.md) to install Object Detection API. +2. Generate COCO dataset by using + [create_coco_tf_record.py](../../../official/vision/beta/data/create_coco_tf_record.py) + +## Configurations + +We provide pre-defined configs which can be run as a +[TF2 training pipeline](tf2_training_and_evaluation.md). Each of these +configurations needs to be passed as the `pipeline_config_path` argument to the +`object_detection/model_main_tf2.py` binary. Note that the `512x512` resolution +models require a TPU `v3-32` and the `1024x1024` resolution models require a TPU +`v3-128` to train. The configs can be found in the [configs/tf2](../configs/tf2) +directory. In the table below `X->Y` indicates that we train with masks from `X` +and evaluate with masks from `Y`. Performance is measured on the `coco-val2017` +set. + +### Partially supervised models + +Resolution | Mask head | Train->Eval | Config name | Mask mAP +:--------- | :------------ | :------------- | :------------------------------------------------- | -------: +512x512 | Hourglass-52 | VOC -> Non-VOC | `center_net_deepmac_512x512_voc_only.config` | 32.5 +1024x1024 | Hourglass-100 | VOC -> Non-VOC | `center_net_deepmac_1024x1024_voc_only.config` | 35.5 +1024x1024 | Hourglass-100 | Non-VOC -> VOC | `center_net_deepmac_1024x1024_non_voc_only.config` | 39.1 + +### Fully supervised models + +Here we report the Mask mAP averaged over all COCO classes on the `test-dev2017` +set . + +Resolution | Mask head | Config name | Mask mAP +:--------- | :------------ | :----------------------------------------- | -------: +1024x1024 | Hourglass-100 | `center_net_deepmac_1024x1024_coco.config` | 39.4 + +## Demos + +* [DeepMAC Colab](../colab_tutorials/deepmac_colab.ipynb) lets you run a + pre-trained DeepMAC model on user-specified boxes. Note that you are not + restricted to COCO classes! +* [iWildCam Notebook](https://www.kaggle.com/vighneshbgoogle/iwildcam-visualize-instance-masks) + to visualize instance masks generated by DeepMAC on the iWildCam dataset. + +## Pre-trained models + +* [COCO Checkpoint](http://download.tensorflow.org/models/object_detection/tf2/20210329/deepmac_1024x1024_coco17.tar.gz) - + Takes as input Image + Boxes and produces per-box instance masks as output. + +## See also + +* [Mask RCNN code](https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/deepmac_maskrcnn) + in TF Model garden code base. +* Project website - [git.io/deepmac](https://git.io/deepmac) + +## Citation + +``` +@misc{birodkar2021surprising, + title={The surprising impact of mask-head architecture on novel class segmentation}, + author={Vighnesh Birodkar and Zhichao Lu and Siyang Li and Vivek Rathod and Jonathan Huang}, + year={2021}, + eprint={2104.00613}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/research/object_detection/g3doc/img/mask_improvement.png b/research/object_detection/g3doc/img/mask_improvement.png new file mode 100644 index 0000000000000000000000000000000000000000..3e35501b8b59083ba60a87ab917f1919c06cef27 Binary files /dev/null and b/research/object_detection/g3doc/img/mask_improvement.png differ diff --git a/research/object_detection/g3doc/release_notes.md b/research/object_detection/g3doc/release_notes.md index f69727d5d8547e908b6de76b43a678943c5bee09..21512397c9991e74bbe7a009e5cae40e11de63c6 100644 --- a/research/object_detection/g3doc/release_notes.md +++ b/research/object_detection/g3doc/release_notes.md @@ -1,5 +1,12 @@ # Release Notes +### September 3rd, 2020 + +TF2 OD API models can now be converted to TensorFlow Lite! Only SSD models +currently supported. See documentation. + +**Thanks to contributors**: Sachin Joglekar + ### July 10th, 2020 We are happy to announce that the TF OD API officially supports TF2! Our release @@ -40,6 +47,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi, Yixin Shi, Yu-hui Chen, Zhichao Lu. +### June 26th, 2020 + +We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP +higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson +Xavier at comparable latency (3.2ms vs 3.3ms). + +Along with the model definition, we are also releasing model checkpoints trained +on the COCO dataset. + +Thanks to contributors: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An +(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA). + ### June 17th, 2020 We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that diff --git a/research/object_detection/g3doc/running_on_mobile_tensorflowlite.md b/research/object_detection/g3doc/running_on_mobile_tensorflowlite.md index 379652e34cb2241d6294679548c988e8916510bc..0acbf5dd34a5de1311b4dfb7fd0966c34c895ef1 100644 --- a/research/object_detection/g3doc/running_on_mobile_tensorflowlite.md +++ b/research/object_detection/g3doc/running_on_mobile_tensorflowlite.md @@ -51,22 +51,23 @@ will output the frozen graph that we can input to TensorFlow Lite directly and is the one we’ll be using. Next we’ll use TensorFlow Lite to get the optimized model by using -[TOCO](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/toco), +[TfLite Converter](https://www.tensorflow.org/lite/convert), the TensorFlow Lite Optimizing Converter. This will convert the resulting frozen graph (tflite_graph.pb) to the TensorFlow Lite flatbuffer format (detect.tflite) via the following command. For a quantized model, run this from the tensorflow/ directory: ```shell -bazel run -c opt tensorflow/lite/toco:toco -- \ ---input_file=$OUTPUT_DIR/tflite_graph.pb \ +bazel run -c opt tensorflow/lite/python:tflite_convert -- \ +--enable_v1_converter \ +--graph_def_file=$OUTPUT_DIR/tflite_graph.pb \ --output_file=$OUTPUT_DIR/detect.tflite \ --input_shapes=1,300,300,3 \ --input_arrays=normalized_input_image_tensor \ --output_arrays='TFLite_Detection_PostProcess','TFLite_Detection_PostProcess:1','TFLite_Detection_PostProcess:2','TFLite_Detection_PostProcess:3' \ --inference_type=QUANTIZED_UINT8 \ --mean_values=128 \ ---std_values=128 \ +--std_dev_values=128 \ --change_concat_input_ranges=false \ --allow_custom_ops ``` @@ -84,8 +85,9 @@ parameters and can be run via the TensorFlow Lite interpreter on the Android device. For a floating point model, run this from the tensorflow/ directory: ```shell -bazel run -c opt tensorflow/lite/toco:toco -- \ ---input_file=$OUTPUT_DIR/tflite_graph.pb \ +bazel run -c opt tensorflow/lite/python:tflite_convert -- \ +--enable_v1_converter \ +--graph_def_file=$OUTPUT_DIR/tflite_graph.pb \ --output_file=$OUTPUT_DIR/detect.tflite \ --input_shapes=1,300,300,3 \ --input_arrays=normalized_input_image_tensor \ diff --git a/research/object_detection/g3doc/running_on_mobile_tf2.md b/research/object_detection/g3doc/running_on_mobile_tf2.md new file mode 100644 index 0000000000000000000000000000000000000000..efa335c17b83590e76ada787b3ea76e3e97e66d9 --- /dev/null +++ b/research/object_detection/g3doc/running_on_mobile_tf2.md @@ -0,0 +1,145 @@ +# Running TF2 Detection API Models on mobile + +[![TensorFlow 2.4](https://img.shields.io/badge/TensorFlow-2.4-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.4.0) +[![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/) + +**NOTE:** This document talks about the *SSD* models in the detection zoo. For +details on our (experimental) CenterNet support, see +[this notebook](../colab_tutorials/centernet_on_device.ipynb). + +[TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/)(TFLite) is +TensorFlow’s lightweight solution for mobile and embedded devices. It enables +on-device machine learning inference with low latency and a small binary size. +TensorFlow Lite uses many techniques for this such as quantized kernels that +allow smaller and faster (fixed-point math) models. + +This document shows how elgible models from the +[TF2 Detection zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md) +can be converted for inference with TFLite. + +For an end-to-end Python guide on how to fine-tune an SSD model for mobile +inference, look at +[this Colab](../colab_tutorials/eager_few_shot_od_training_tflite.ipynb). + +**NOTE:** TFLite currently only supports **SSD Architectures** (excluding +EfficientDet) for boxes-based detection. Support for EfficientDet is coming +soon. + +The output model has the following inputs & outputs: + +``` +One input: + image: a float32 tensor of shape[1, height, width, 3] containing the + *normalized* input image. + NOTE: See the `preprocess` function defined in the feature extractor class + in the object_detection/models directory. + +Four Outputs: + detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box + locations + detection_classes: a float32 tensor of shape [1, num_boxes] + with class indices + detection_scores: a float32 tensor of shape [1, num_boxes] + with class scores + num_boxes: a float32 tensor of size 1 containing the number of detected boxes +``` + +There are two steps to TFLite conversion: + +### Step 1: Export TFLite inference graph + +This step generates an intermediate SavedModel that can be used with the +[TFLite Converter](https://www.tensorflow.org/lite/convert) via commandline or +Python API. + +To use the script: + +```bash +# From the tensorflow/models/research/ directory +python object_detection/export_tflite_graph_tf2.py \ + --pipeline_config_path path/to/ssd_model/pipeline.config \ + --trained_checkpoint_dir path/to/ssd_model/checkpoint \ + --output_directory path/to/exported_model_directory +``` + +Use `--help` with the above script to get the full list of supported parameters. +These can fine-tune accuracy and speed for your model. + +### Step 2: Convert to TFLite + +Use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert) to +convert the `SavedModel` to TFLite. Note that you need to use `from_saved_model` +for TFLite conversion with the Python API. + +You can also leverage +[Post-training Quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) +to +[optimize performance](https://www.tensorflow.org/lite/performance/model_optimization) +and obtain a smaller model. Note that this is only possible from the *Python +API*. Be sure to use a +[representative dataset](https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization) +and set the following options on the converter: + +```python +converter.optimizations = [tf.lite.Optimize.DEFAULT] +converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8, + tf.lite.OpsSet.TFLITE_BUILTINS] +converter.representative_dataset = <...> +``` + +## Running our model on Android + +To run our TensorFlow Lite model on device, we will use Android Studio to build +and run the TensorFlow Lite detection example with the new model. The example is +found in the +[TensorFlow examples repository](https://github.com/tensorflow/examples) under +`/lite/examples/object_detection`. The example can be built with +[Android Studio](https://developer.android.com/studio/index.html), and requires +the +[Android SDK with build tools](https://developer.android.com/tools/revisions/build-tools.html) +that support API >= 21. Additional details are available on the +[TensorFlow Lite example page](https://github.com/tensorflow/examples/tree/master/lite/examples/object_detection/android). + +Next we need to point the app to our new detect.tflite file and give it the +names of our new labels. Specifically, we will copy our TensorFlow Lite +flatbuffer to the app assets directory with the following command: + +```shell +mkdir $TF_EXAMPLES/lite/examples/object_detection/android/app/src/main/assets +cp /tmp/tflite/detect.tflite \ + $TF_EXAMPLES/lite/examples/object_detection/android/app/src/main/assets +``` + +You will also need to copy your new labelmap labelmap.txt to the assets +directory. + +We will now edit the gradle build file to use these assets. First, open the +`build.gradle` file +`$TF_EXAMPLES/lite/examples/object_detection/android/app/build.gradle`. Comment +out the model download script to avoid your assets being overwritten: + +```shell +// apply from:'download_model.gradle' +``` + +If your model is named `detect.tflite`, and your labels file `labelmap.txt`, the +example will use them automatically as long as they've been properly copied into +the base assets directory. If you need to use a custom path or filename, open up +the +$TF_EXAMPLES/lite/examples/object_detection/android/app/src/main/java/org/tensorflow/demo/DetectorActivity.java +file in a text editor and find the definition of TF_OD_API_LABELS_FILE. Update +this path to point to your new label map file: "labels_list.txt". Note that if +your model is quantized, the flag TF_OD_API_IS_QUANTIZED is set to true, and if +your model is floating point, the flag TF_OD_API_IS_QUANTIZED is set to false. +This new section of DetectorActivity.java should now look as follows for a +quantized model: + +```java + private static final boolean TF_OD_API_IS_QUANTIZED = true; + private static final String TF_OD_API_MODEL_FILE = "detect.tflite"; + private static final String TF_OD_API_LABELS_FILE = "labels_list.txt"; +``` + +Once you’ve copied the TensorFlow Lite model and edited the gradle build script +to not use the downloaded assets, you can build and deploy the app using the +usual Android Studio build process. diff --git a/research/object_detection/g3doc/tf1.md b/research/object_detection/g3doc/tf1.md index f973ef38c3a70dbd2bc996b90141ae50eed84713..f1577600963e1af99b6fdd192028a12622240cc2 100644 --- a/research/object_detection/g3doc/tf1.md +++ b/research/object_detection/g3doc/tf1.md @@ -35,7 +35,7 @@ cd models/research protoc object_detection/protos/*.proto --python_out=. # Install TensorFlow Object Detection API. cp object_detection/packages/tf1/setup.py . -python -m pip install . +python -m pip install --use-feature=2020-resolver . ``` ```bash @@ -73,6 +73,8 @@ the [Model Zoo](tf1_detection_zoo.md). Supported object detection evaluation protocols
* TPU compatible detection pipelines
+* + Training and evaluation guide (CPU, GPU, or TPU)
## Extras: diff --git a/research/object_detection/g3doc/tf1_detection_zoo.md b/research/object_detection/g3doc/tf1_detection_zoo.md index 15416bb7aec947933097af1ef26205f3b8d60e20..6f002cd09bb8208e02af3d4960212470cd291555 100644 --- a/research/object_detection/g3doc/tf1_detection_zoo.md +++ b/research/object_detection/g3doc/tf1_detection_zoo.md @@ -67,8 +67,7 @@ Some remarks on frozen inference graphs: metrics. * Our frozen inference graphs are generated using the [v1.12.0](https://github.com/tensorflow/tensorflow/tree/v1.12.0) release - version of TensorFlow and we do not guarantee that these will work with - other versions; this being said, each frozen inference graph can be + version of TensorFlow; this being said, each frozen inference graph can be regenerated using your current version of TensorFlow by re-running the [exporter](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/exporting_models.md), pointing it at the model directory as well as the corresponding config file diff --git a/research/object_detection/g3doc/tf2.md b/research/object_detection/g3doc/tf2.md index 3860c7ea2f302816fd1c38068862b5a8f837ff53..d45d157f3b94cff36a3a76fd18a9fe7b4f7a2d9c 100644 --- a/research/object_detection/g3doc/tf2.md +++ b/research/object_detection/g3doc/tf2.md @@ -35,7 +35,7 @@ cd models/research protoc object_detection/protos/*.proto --python_out=. # Install TensorFlow Object Detection API. cp object_detection/packages/tf2/setup.py . -python -m pip install . +python -m pip install --use-feature=2020-resolver . ``` ```bash @@ -55,6 +55,9 @@ python object_detection/builders/model_builder_tf2_test.py * Inference - [Run inference with models from the zoo](../colab_tutorials/inference_tf2_colab.ipynb) +* Few Shot Learning for Mobile Inference - + [Fine-tune a pre-trained detector for use with TensorFlow Lite](../colab_tutorials/eager_few_shot_od_training_tflite.ipynb) + ## Training and Evaluation @@ -80,3 +83,5 @@ We provide a large collection of models that are trained on COCO 2017 in the Supported object detection evaluation protocols
* TPU compatible detection pipelines
+* + Training and evaluation guide (CPU, GPU, or TPU)
diff --git a/research/object_detection/g3doc/tf2_detection_zoo.md b/research/object_detection/g3doc/tf2_detection_zoo.md index b129b80003a9093f9bfa565f8b86625a3de6b747..249e5fa1bf8d0f304043050c5c65299ca705b390 100644 --- a/research/object_detection/g3doc/tf2_detection_zoo.md +++ b/research/object_detection/g3doc/tf2_detection_zoo.md @@ -15,6 +15,8 @@ They are also useful for initializing your models when training on novel datasets. You can try this out on our few-shot training [colab](../colab_tutorials/eager_few_shot_od_training_tf2_colab.ipynb). +Please look at [this guide](running_on_mobile_tf2.md) for mobile inference. + Finally, if you would like to train these models from scratch, you can find the @@ -23,15 +25,17 @@ model configs in this [directory](../configs/tf2) (also in the linked Model name | Speed (ms) | COCO mAP | Outputs --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------: | :----------: | :-----: -[CenterNet HourGlass104 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_hg104_512x512_coco17_tpu-8.tar.gz) | 70 | 41.6 | Boxes +[CenterNet HourGlass104 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200713/centernet_hg104_512x512_coco17_tpu-8.tar.gz) | 70 | 41.9 | Boxes [CenterNet HourGlass104 Keypoints 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_hg104_512x512_kpts_coco17_tpu-32.tar.gz) | 76 | 40.0/61.4 | Boxes/Keypoints -[CenterNet HourGlass104 1024x1024](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_hg104_1024x1024_coco17_tpu-32.tar.gz) | 197 | 43.5 | Boxes +[CenterNet HourGlass104 1024x1024](http://download.tensorflow.org/models/object_detection/tf2/20200713/centernet_hg104_1024x1024_coco17_tpu-32.tar.gz) | 197 | 44.5 | Boxes [CenterNet HourGlass104 Keypoints 1024x1024](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_hg104_1024x1024_kpts_coco17_tpu-32.tar.gz) | 211 | 42.8/64.5 | Boxes/Keypoints [CenterNet Resnet50 V1 FPN 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_resnet50_v1_fpn_512x512_coco17_tpu-8.tar.gz) | 27 | 31.2 | Boxes [CenterNet Resnet50 V1 FPN Keypoints 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_resnet50_v1_fpn_512x512_kpts_coco17_tpu-8.tar.gz) | 30 | 29.3/50.7 | Boxes/Keypoints [CenterNet Resnet101 V1 FPN 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_resnet101_v1_fpn_512x512_coco17_tpu-8.tar.gz) | 34 | 34.2 | Boxes [CenterNet Resnet50 V2 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_resnet50_v2_512x512_coco17_tpu-8.tar.gz) | 27 | 29.5 | Boxes [CenterNet Resnet50 V2 Keypoints 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/centernet_resnet50_v2_512x512_kpts_coco17_tpu-8.tar.gz) | 30 | 27.6/48.2 | Boxes/Keypoints +[CenterNet MobileNetV2 FPN 512x512](http://download.tensorflow.org/models/object_detection/tf2/20210210/centernet_mobilenetv2fpn_512x512_coco17_od.tar.gz) | 6 | 23.4 | Boxes +[CenterNet MobileNetV2 FPN Keypoints 512x512](http://download.tensorflow.org/models/object_detection/tf2/20210210/centernet_mobilenetv2fpn_512x512_coco17_kpts.tar.gz) | 6 | 41.7 | Keypoints [EfficientDet D0 512x512](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz) | 39 | 33.6 | Boxes [EfficientDet D1 640x640](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d1_coco17_tpu-32.tar.gz) | 54 | 38.4 | Boxes [EfficientDet D2 768x768](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d2_coco17_tpu-32.tar.gz) | 67 | 41.8 | Boxes diff --git a/research/object_detection/inference/infer_detections.py b/research/object_detection/inference/infer_detections.py index 3579142fc9add1c439164f0d938d4efe9089c692..7bc662f4297436024dd2f9632fdd92133116d482 100644 --- a/research/object_detection/inference/infer_detections.py +++ b/research/object_detection/inference/infer_detections.py @@ -17,7 +17,7 @@ r"""Infers detections on a TFRecord of TFExamples given an inference graph. Example usage: ./infer_detections \ --input_tfrecord_paths=/path/to/input/tfrecord1,/path/to/input/tfrecord2 \ - --output_tfrecord_path_prefix=/path/to/output/detections.tfrecord \ + --output_tfrecord_path=/path/to/output/detections.tfrecord \ --inference_graph=/path/to/frozen_weights_inference_graph.pb The output is a TFRecord of TFExamples. Each TFExample from the input is first diff --git a/research/object_detection/inputs.py b/research/object_detection/inputs.py index 59ed22d0531476c7550e7cabb3961480fe7e4f4d..bdb219b08cc1e2adaa1aa58eec02588733cd3c4d 100644 --- a/research/object_detection/inputs.py +++ b/research/object_detection/inputs.py @@ -68,8 +68,25 @@ def _multiclass_scores_or_one_hot_labels(multiclass_scores, return tf.cond(tf.size(multiclass_scores) > 0, true_fn, false_fn) -def _convert_labeled_classes_to_k_hot(groundtruth_labeled_classes, num_classes): - """Returns k-hot encoding of the labeled classes.""" +def convert_labeled_classes_to_k_hot(groundtruth_labeled_classes, + num_classes, + map_empty_to_ones=False): + """Returns k-hot encoding of the labeled classes. + + If map_empty_to_ones is enabled and the input labeled_classes is empty, + this function assumes all classes are exhaustively labeled, thus returning + an all-one encoding. + + Args: + groundtruth_labeled_classes: a Tensor holding a sparse representation of + labeled classes. + num_classes: an integer representing the number of classes + map_empty_to_ones: boolean (default: False). Set this to be True to default + to an all-ones result if given an empty `groundtruth_labeled_classes`. + Returns: + A k-hot (and 0-indexed) tensor representation of + `groundtruth_labeled_classes`. + """ # If the input labeled_classes is empty, it assumes all classes are # exhaustively labeled, thus returning an all-one encoding. @@ -82,13 +99,16 @@ def _convert_labeled_classes_to_k_hot(groundtruth_labeled_classes, num_classes): def false_fn(): return tf.ones(num_classes, dtype=tf.float32) - return tf.cond(tf.size(groundtruth_labeled_classes) > 0, true_fn, false_fn) + if map_empty_to_ones: + return tf.cond(tf.size(groundtruth_labeled_classes) > 0, true_fn, false_fn) + return true_fn() def _remove_unrecognized_classes(class_ids, unrecognized_label): """Returns class ids with unrecognized classes filtered out.""" - recognized_indices = tf.where(tf.greater(class_ids, unrecognized_label)) + recognized_indices = tf.squeeze( + tf.where(tf.greater(class_ids, unrecognized_label)), -1) return tf.gather(class_ids, recognized_indices) @@ -197,51 +217,54 @@ def transform_input_data(tensor_dict, """ out_tensor_dict = tensor_dict.copy() - labeled_classes_field = fields.InputDataFields.groundtruth_labeled_classes - image_classes_field = fields.InputDataFields.groundtruth_image_classes + input_fields = fields.InputDataFields + labeled_classes_field = input_fields.groundtruth_labeled_classes + image_classes_field = input_fields.groundtruth_image_classes + verified_neg_classes_field = input_fields.groundtruth_verified_neg_classes + not_exhaustive_field = input_fields.groundtruth_not_exhaustive_classes + if (labeled_classes_field in out_tensor_dict and image_classes_field in out_tensor_dict): raise KeyError('groundtruth_labeled_classes and groundtruth_image_classes' 'are provided by the decoder, but only one should be set.') - if labeled_classes_field in out_tensor_dict: - # tf_example_decoder casts unrecognized labels to -1. Remove these - # unrecognized labels before converting labeled_classes to k-hot vector. - out_tensor_dict[labeled_classes_field] = _remove_unrecognized_classes( - out_tensor_dict[labeled_classes_field], unrecognized_label=-1) - out_tensor_dict[labeled_classes_field] = _convert_labeled_classes_to_k_hot( - out_tensor_dict[labeled_classes_field], num_classes) - - if image_classes_field in out_tensor_dict: - out_tensor_dict[labeled_classes_field] = _convert_labeled_classes_to_k_hot( - out_tensor_dict[image_classes_field], num_classes) - - if fields.InputDataFields.multiclass_scores in out_tensor_dict: + for field, map_empty_to_ones in [ + (labeled_classes_field, True), + (image_classes_field, True), + (verified_neg_classes_field, False), + (not_exhaustive_field, False)]: + if field in out_tensor_dict: + out_tensor_dict[field] = _remove_unrecognized_classes( + out_tensor_dict[field], unrecognized_label=-1) + out_tensor_dict[field] = convert_labeled_classes_to_k_hot( + out_tensor_dict[field], num_classes, map_empty_to_ones) + + if input_fields.multiclass_scores in out_tensor_dict: out_tensor_dict[ - fields.InputDataFields + input_fields .multiclass_scores] = _multiclass_scores_or_one_hot_labels( - out_tensor_dict[fields.InputDataFields.multiclass_scores], - out_tensor_dict[fields.InputDataFields.groundtruth_boxes], - out_tensor_dict[fields.InputDataFields.groundtruth_classes], + out_tensor_dict[input_fields.multiclass_scores], + out_tensor_dict[input_fields.groundtruth_boxes], + out_tensor_dict[input_fields.groundtruth_classes], num_classes) - if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: + if input_fields.groundtruth_boxes in out_tensor_dict: out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates( out_tensor_dict) out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict) if retain_original_image: - out_tensor_dict[fields.InputDataFields.original_image] = tf.cast( - image_resizer_fn(out_tensor_dict[fields.InputDataFields.image], + out_tensor_dict[input_fields.original_image] = tf.cast( + image_resizer_fn(out_tensor_dict[input_fields.image], None)[0], tf.uint8) - if fields.InputDataFields.image_additional_channels in out_tensor_dict: - channels = out_tensor_dict[fields.InputDataFields.image_additional_channels] - out_tensor_dict[fields.InputDataFields.image] = tf.concat( - [out_tensor_dict[fields.InputDataFields.image], channels], axis=2) + if input_fields.image_additional_channels in out_tensor_dict: + channels = out_tensor_dict[input_fields.image_additional_channels] + out_tensor_dict[input_fields.image] = tf.concat( + [out_tensor_dict[input_fields.image], channels], axis=2) if retain_original_image_additional_channels: out_tensor_dict[ - fields.InputDataFields.image_additional_channels] = tf.cast( + input_fields.image_additional_channels] = tf.cast( image_resizer_fn(channels, None)[0], tf.uint8) # Apply data augmentation ops. @@ -249,7 +272,7 @@ def transform_input_data(tensor_dict, out_tensor_dict = data_augmentation_fn(out_tensor_dict) # Apply model preprocessing ops and resize instance masks. - image = out_tensor_dict[fields.InputDataFields.image] + image = out_tensor_dict[input_fields.image] preprocessed_resized_image, true_image_shape = model_preprocess_fn( tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0)) @@ -262,35 +285,43 @@ def transform_input_data(tensor_dict, tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1]) ]) - if fields.InputDataFields.groundtruth_boxes in tensor_dict: - bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes] + if input_fields.groundtruth_boxes in tensor_dict: + bboxes = out_tensor_dict[input_fields.groundtruth_boxes] boxlist = box_list.BoxList(bboxes) realigned_bboxes = box_list_ops.change_coordinate_frame(boxlist, im_box) realigned_boxes_tensor = realigned_bboxes.get() valid_boxes_tensor = assert_or_prune_invalid_boxes(realigned_boxes_tensor) out_tensor_dict[ - fields.InputDataFields.groundtruth_boxes] = valid_boxes_tensor + input_fields.groundtruth_boxes] = valid_boxes_tensor - if fields.InputDataFields.groundtruth_keypoints in tensor_dict: - keypoints = out_tensor_dict[fields.InputDataFields.groundtruth_keypoints] + if input_fields.groundtruth_keypoints in tensor_dict: + keypoints = out_tensor_dict[input_fields.groundtruth_keypoints] realigned_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box) out_tensor_dict[ - fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints - flds_gt_kpt = fields.InputDataFields.groundtruth_keypoints - flds_gt_kpt_vis = fields.InputDataFields.groundtruth_keypoint_visibilities - flds_gt_kpt_weights = fields.InputDataFields.groundtruth_keypoint_weights + input_fields.groundtruth_keypoints] = realigned_keypoints + flds_gt_kpt = input_fields.groundtruth_keypoints + flds_gt_kpt_vis = input_fields.groundtruth_keypoint_visibilities + flds_gt_kpt_weights = input_fields.groundtruth_keypoint_weights if flds_gt_kpt_vis not in out_tensor_dict: out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like( out_tensor_dict[flds_gt_kpt][:, :, 0], dtype=tf.bool) + flds_gt_kpt_depth = fields.InputDataFields.groundtruth_keypoint_depths + flds_gt_kpt_depth_weight = ( + fields.InputDataFields.groundtruth_keypoint_depth_weights) + if flds_gt_kpt_depth in out_tensor_dict: + out_tensor_dict[flds_gt_kpt_depth] = out_tensor_dict[flds_gt_kpt_depth] + out_tensor_dict[flds_gt_kpt_depth_weight] = out_tensor_dict[ + flds_gt_kpt_depth_weight] + out_tensor_dict[flds_gt_kpt_weights] = ( keypoint_ops.keypoint_weights_from_visibilities( out_tensor_dict[flds_gt_kpt_vis], keypoint_type_weight)) - dp_surface_coords_fld = fields.InputDataFields.groundtruth_dp_surface_coords + dp_surface_coords_fld = input_fields.groundtruth_dp_surface_coords if dp_surface_coords_fld in tensor_dict: dp_surface_coords = out_tensor_dict[dp_surface_coords_fld] realigned_dp_surface_coords = densepose_ops.change_coordinate_frame( @@ -300,60 +331,60 @@ def transform_input_data(tensor_dict, if use_bfloat16: preprocessed_resized_image = tf.cast( preprocessed_resized_image, tf.bfloat16) - if fields.InputDataFields.context_features in out_tensor_dict: - out_tensor_dict[fields.InputDataFields.context_features] = tf.cast( - out_tensor_dict[fields.InputDataFields.context_features], tf.bfloat16) - out_tensor_dict[fields.InputDataFields.image] = tf.squeeze( + if input_fields.context_features in out_tensor_dict: + out_tensor_dict[input_fields.context_features] = tf.cast( + out_tensor_dict[input_fields.context_features], tf.bfloat16) + out_tensor_dict[input_fields.image] = tf.squeeze( preprocessed_resized_image, axis=0) - out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze( + out_tensor_dict[input_fields.true_image_shape] = tf.squeeze( true_image_shape, axis=0) - if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict: - masks = out_tensor_dict[fields.InputDataFields.groundtruth_instance_masks] + if input_fields.groundtruth_instance_masks in out_tensor_dict: + masks = out_tensor_dict[input_fields.groundtruth_instance_masks] _, resized_masks, _ = image_resizer_fn(image, masks) if use_bfloat16: resized_masks = tf.cast(resized_masks, tf.bfloat16) out_tensor_dict[ - fields.InputDataFields.groundtruth_instance_masks] = resized_masks + input_fields.groundtruth_instance_masks] = resized_masks zero_indexed_groundtruth_classes = out_tensor_dict[ - fields.InputDataFields.groundtruth_classes] - _LABEL_OFFSET + input_fields.groundtruth_classes] - _LABEL_OFFSET if use_multiclass_scores: out_tensor_dict[ - fields.InputDataFields.groundtruth_classes] = out_tensor_dict[ - fields.InputDataFields.multiclass_scores] + input_fields.groundtruth_classes] = out_tensor_dict[ + input_fields.multiclass_scores] else: - out_tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot( + out_tensor_dict[input_fields.groundtruth_classes] = tf.one_hot( zero_indexed_groundtruth_classes, num_classes) - out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None) + out_tensor_dict.pop(input_fields.multiclass_scores, None) - if fields.InputDataFields.groundtruth_confidences in out_tensor_dict: + if input_fields.groundtruth_confidences in out_tensor_dict: groundtruth_confidences = out_tensor_dict[ - fields.InputDataFields.groundtruth_confidences] + input_fields.groundtruth_confidences] # Map the confidences to the one-hot encoding of classes - out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( + out_tensor_dict[input_fields.groundtruth_confidences] = ( tf.reshape(groundtruth_confidences, [-1, 1]) * - out_tensor_dict[fields.InputDataFields.groundtruth_classes]) + out_tensor_dict[input_fields.groundtruth_classes]) else: groundtruth_confidences = tf.ones_like( zero_indexed_groundtruth_classes, dtype=tf.float32) - out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( - out_tensor_dict[fields.InputDataFields.groundtruth_classes]) + out_tensor_dict[input_fields.groundtruth_confidences] = ( + out_tensor_dict[input_fields.groundtruth_classes]) if merge_multiple_boxes: merged_boxes, merged_classes, merged_confidences, _ = ( util_ops.merge_boxes_with_multiple_labels( - out_tensor_dict[fields.InputDataFields.groundtruth_boxes], + out_tensor_dict[input_fields.groundtruth_boxes], zero_indexed_groundtruth_classes, groundtruth_confidences, num_classes)) merged_classes = tf.cast(merged_classes, tf.float32) - out_tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes - out_tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes - out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = ( + out_tensor_dict[input_fields.groundtruth_boxes] = merged_boxes + out_tensor_dict[input_fields.groundtruth_classes] = merged_classes + out_tensor_dict[input_fields.groundtruth_confidences] = ( merged_confidences) - if fields.InputDataFields.groundtruth_boxes in out_tensor_dict: - out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape( - out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] + if input_fields.groundtruth_boxes in out_tensor_dict: + out_tensor_dict[input_fields.num_groundtruth_boxes] = tf.shape( + out_tensor_dict[input_fields.groundtruth_boxes])[0] return out_tensor_dict @@ -397,113 +428,132 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_context_features is not specified and context_features is in the tensor dict. """ - if not spatial_image_shape or spatial_image_shape == [-1, -1]: height, width = None, None else: height, width = spatial_image_shape # pylint: disable=unpacking-non-sequence + input_fields = fields.InputDataFields num_additional_channels = 0 - if fields.InputDataFields.image_additional_channels in tensor_dict: + if input_fields.image_additional_channels in tensor_dict: num_additional_channels = shape_utils.get_dim_as_int(tensor_dict[ - fields.InputDataFields.image_additional_channels].shape[2]) + input_fields.image_additional_channels].shape[2]) # We assume that if num_additional_channels > 0, then it has already been # concatenated to the base image (but not the ground truth). num_channels = 3 - if fields.InputDataFields.image in tensor_dict: + if input_fields.image in tensor_dict: num_channels = shape_utils.get_dim_as_int( - tensor_dict[fields.InputDataFields.image].shape[2]) + tensor_dict[input_fields.image].shape[2]) if num_additional_channels: if num_additional_channels >= num_channels: raise ValueError( 'Image must be already concatenated with additional channels.') - if (fields.InputDataFields.original_image in tensor_dict and + if (input_fields.original_image in tensor_dict and shape_utils.get_dim_as_int( - tensor_dict[fields.InputDataFields.original_image].shape[2]) == + tensor_dict[input_fields.original_image].shape[2]) == num_channels): raise ValueError( 'Image must be already concatenated with additional channels.') - if fields.InputDataFields.context_features in tensor_dict and ( + if input_fields.context_features in tensor_dict and ( max_num_context_features is None): raise ValueError('max_num_context_features must be specified in the model ' 'config if include_context is specified in the input ' 'config') padding_shapes = { - fields.InputDataFields.image: [height, width, num_channels], - fields.InputDataFields.original_image_spatial_shape: [2], - fields.InputDataFields.image_additional_channels: [ + input_fields.image: [height, width, num_channels], + input_fields.original_image_spatial_shape: [2], + input_fields.image_additional_channels: [ height, width, num_additional_channels ], - fields.InputDataFields.source_id: [], - fields.InputDataFields.filename: [], - fields.InputDataFields.key: [], - fields.InputDataFields.groundtruth_difficult: [max_num_boxes], - fields.InputDataFields.groundtruth_boxes: [max_num_boxes, 4], - fields.InputDataFields.groundtruth_classes: [max_num_boxes, num_classes], - fields.InputDataFields.groundtruth_instance_masks: [ + input_fields.source_id: [], + input_fields.filename: [], + input_fields.key: [], + input_fields.groundtruth_difficult: [max_num_boxes], + input_fields.groundtruth_boxes: [max_num_boxes, 4], + input_fields.groundtruth_classes: [max_num_boxes, num_classes], + input_fields.groundtruth_instance_masks: [ max_num_boxes, height, width ], - fields.InputDataFields.groundtruth_is_crowd: [max_num_boxes], - fields.InputDataFields.groundtruth_group_of: [max_num_boxes], - fields.InputDataFields.groundtruth_area: [max_num_boxes], - fields.InputDataFields.groundtruth_weights: [max_num_boxes], - fields.InputDataFields.groundtruth_confidences: [ + input_fields.groundtruth_is_crowd: [max_num_boxes], + input_fields.groundtruth_group_of: [max_num_boxes], + input_fields.groundtruth_area: [max_num_boxes], + input_fields.groundtruth_weights: [max_num_boxes], + input_fields.groundtruth_confidences: [ max_num_boxes, num_classes ], - fields.InputDataFields.num_groundtruth_boxes: [], - fields.InputDataFields.groundtruth_label_types: [max_num_boxes], - fields.InputDataFields.groundtruth_label_weights: [max_num_boxes], - fields.InputDataFields.true_image_shape: [3], - fields.InputDataFields.groundtruth_image_classes: [num_classes], - fields.InputDataFields.groundtruth_image_confidences: [num_classes], - fields.InputDataFields.groundtruth_labeled_classes: [num_classes], + input_fields.num_groundtruth_boxes: [], + input_fields.groundtruth_label_types: [max_num_boxes], + input_fields.groundtruth_label_weights: [max_num_boxes], + input_fields.true_image_shape: [3], + input_fields.groundtruth_image_classes: [num_classes], + input_fields.groundtruth_image_confidences: [num_classes], + input_fields.groundtruth_labeled_classes: [num_classes], } - if fields.InputDataFields.original_image in tensor_dict: - padding_shapes[fields.InputDataFields.original_image] = [ + if input_fields.original_image in tensor_dict: + padding_shapes[input_fields.original_image] = [ height, width, - shape_utils.get_dim_as_int(tensor_dict[fields.InputDataFields. + shape_utils.get_dim_as_int(tensor_dict[input_fields. original_image].shape[2]) ] - if fields.InputDataFields.groundtruth_keypoints in tensor_dict: + if input_fields.groundtruth_keypoints in tensor_dict: tensor_shape = ( - tensor_dict[fields.InputDataFields.groundtruth_keypoints].shape) + tensor_dict[input_fields.groundtruth_keypoints].shape) padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1]), shape_utils.get_dim_as_int(tensor_shape[2])] - padding_shapes[fields.InputDataFields.groundtruth_keypoints] = padding_shape - if fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict: - tensor_shape = tensor_dict[fields.InputDataFields. + padding_shapes[input_fields.groundtruth_keypoints] = padding_shape + if input_fields.groundtruth_keypoint_visibilities in tensor_dict: + tensor_shape = tensor_dict[input_fields. groundtruth_keypoint_visibilities].shape padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] - padding_shapes[fields.InputDataFields. + padding_shapes[input_fields. groundtruth_keypoint_visibilities] = padding_shape - if fields.InputDataFields.groundtruth_keypoint_weights in tensor_dict: - tensor_shape = ( - tensor_dict[fields.InputDataFields.groundtruth_keypoint_weights].shape) + if fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict: + tensor_shape = tensor_dict[fields.InputDataFields. + groundtruth_keypoint_depths].shape padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] padding_shapes[fields.InputDataFields. + groundtruth_keypoint_depths] = padding_shape + padding_shapes[fields.InputDataFields. + groundtruth_keypoint_depth_weights] = padding_shape + + if input_fields.groundtruth_keypoint_weights in tensor_dict: + tensor_shape = ( + tensor_dict[input_fields.groundtruth_keypoint_weights].shape) + padding_shape = [max_num_boxes, shape_utils.get_dim_as_int(tensor_shape[1])] + padding_shapes[input_fields. groundtruth_keypoint_weights] = padding_shape - if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict: + if input_fields.groundtruth_dp_num_points in tensor_dict: padding_shapes[ - fields.InputDataFields.groundtruth_dp_num_points] = [max_num_boxes] + input_fields.groundtruth_dp_num_points] = [max_num_boxes] padding_shapes[ - fields.InputDataFields.groundtruth_dp_part_ids] = [ + input_fields.groundtruth_dp_part_ids] = [ max_num_boxes, max_dp_points] padding_shapes[ - fields.InputDataFields.groundtruth_dp_surface_coords] = [ + input_fields.groundtruth_dp_surface_coords] = [ max_num_boxes, max_dp_points, 4] + if input_fields.groundtruth_track_ids in tensor_dict: + padding_shapes[ + input_fields.groundtruth_track_ids] = [max_num_boxes] + + if input_fields.groundtruth_verified_neg_classes in tensor_dict: + padding_shapes[ + input_fields.groundtruth_verified_neg_classes] = [num_classes] + if input_fields.groundtruth_not_exhaustive_classes in tensor_dict: + padding_shapes[ + input_fields.groundtruth_not_exhaustive_classes] = [num_classes] # Prepare for ContextRCNN related fields. - if fields.InputDataFields.context_features in tensor_dict: + if input_fields.context_features in tensor_dict: padding_shape = [max_num_context_features, context_feature_length] - padding_shapes[fields.InputDataFields.context_features] = padding_shape + padding_shapes[input_fields.context_features] = padding_shape tensor_shape = tf.shape( tensor_dict[fields.InputDataFields.context_features]) @@ -511,9 +561,12 @@ def pad_input_data_to_static_shapes(tensor_dict, padding_shapes[fields.InputDataFields.valid_context_size] = [] if fields.InputDataFields.context_feature_length in tensor_dict: padding_shapes[fields.InputDataFields.context_feature_length] = [] + if fields.InputDataFields.context_features_image_id_list in tensor_dict: + padding_shapes[fields.InputDataFields.context_features_image_id_list] = [ + max_num_context_features] - if fields.InputDataFields.is_annotated in tensor_dict: - padding_shapes[fields.InputDataFields.is_annotated] = [] + if input_fields.is_annotated in tensor_dict: + padding_shapes[input_fields.is_annotated] = [] padded_tensor_dict = {} for tensor_name in tensor_dict: @@ -522,10 +575,10 @@ def pad_input_data_to_static_shapes(tensor_dict, # Make sure that the number of groundtruth boxes now reflects the # padded/clipped tensors. - if fields.InputDataFields.num_groundtruth_boxes in padded_tensor_dict: - padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = ( + if input_fields.num_groundtruth_boxes in padded_tensor_dict: + padded_tensor_dict[input_fields.num_groundtruth_boxes] = ( tf.minimum( - padded_tensor_dict[fields.InputDataFields.num_groundtruth_boxes], + padded_tensor_dict[input_fields.num_groundtruth_boxes], max_num_boxes)) return padded_tensor_dict @@ -552,6 +605,8 @@ def augment_input_data(tensor_dict, data_augmentation_options): in tensor_dict) include_keypoint_visibilities = ( fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict) + include_keypoint_depths = ( + fields.InputDataFields.groundtruth_keypoint_depths in tensor_dict) include_label_weights = (fields.InputDataFields.groundtruth_weights in tensor_dict) include_label_confidences = (fields.InputDataFields.groundtruth_confidences @@ -571,7 +626,8 @@ def augment_input_data(tensor_dict, data_augmentation_options): include_instance_masks=include_instance_masks, include_keypoints=include_keypoints, include_keypoint_visibilities=include_keypoint_visibilities, - include_dense_pose=include_dense_pose)) + include_dense_pose=include_dense_pose, + include_keypoint_depths=include_keypoint_depths)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image], axis=0) return tensor_dict @@ -593,6 +649,8 @@ def _get_labels_dict(input_dict): fields.InputDataFields.groundtruth_confidences, fields.InputDataFields.groundtruth_labeled_classes, fields.InputDataFields.groundtruth_keypoints, + fields.InputDataFields.groundtruth_keypoint_depths, + fields.InputDataFields.groundtruth_keypoint_depth_weights, fields.InputDataFields.groundtruth_instance_masks, fields.InputDataFields.groundtruth_area, fields.InputDataFields.groundtruth_is_crowd, @@ -602,7 +660,10 @@ def _get_labels_dict(input_dict): fields.InputDataFields.groundtruth_keypoint_weights, fields.InputDataFields.groundtruth_dp_num_points, fields.InputDataFields.groundtruth_dp_part_ids, - fields.InputDataFields.groundtruth_dp_surface_coords + fields.InputDataFields.groundtruth_dp_surface_coords, + fields.InputDataFields.groundtruth_track_ids, + fields.InputDataFields.groundtruth_verified_neg_classes, + fields.InputDataFields.groundtruth_not_exhaustive_classes ] for key in optional_label_keys: @@ -673,6 +734,9 @@ def _get_features_dict(input_dict, include_source_id=False): if fields.InputDataFields.valid_context_size in input_dict: features[fields.InputDataFields.valid_context_size] = input_dict[ fields.InputDataFields.valid_context_size] + if fields.InputDataFields.context_features_image_id_list in input_dict: + features[fields.InputDataFields.context_features_image_id_list] = ( + input_dict[fields.InputDataFields.context_features_image_id_list]) return features @@ -762,6 +826,8 @@ def train_input(train_config, train_input_config, DensePose surface coordinates. The format is (y, x, v, u), where (y, x) are normalized image coordinates and (v, u) are normalized surface part coordinates. + labels[fields.InputDataFields.groundtruth_track_ids] is a + [batch_size, num_boxes] int32 tensor with the track ID for each object. Raises: TypeError: if the `train_config`, `train_input_config` or `model_config` @@ -853,7 +919,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config): def eval_input(eval_config, eval_input_config, model_config, - model=None, params=None): + model=None, params=None, input_context=None): """Returns `features` and `labels` tensor dictionaries for evaluation. Args: @@ -863,6 +929,9 @@ def eval_input(eval_config, eval_input_config, model_config, model: A pre-constructed Detection Model. If None, one will be created from the config. params: Parameter dictionary passed from the estimator. + input_context: optional, A tf.distribute.InputContext object used to + shard filenames and compute per-replica batch_size when this function + is being called per-replica. Returns: A tf.data.Dataset that holds (features, labels) tuple. @@ -914,6 +983,8 @@ def eval_input(eval_config, eval_input_config, model_config, DensePose surface coordinates. The format is (y, x, v, u), where (y, x) are normalized image coordinates and (v, u) are normalized surface part coordinates. + labels[fields.InputDataFields.groundtruth_track_ids] is a + [batch_size, num_boxes] int32 tensor with the track ID for each object. Raises: TypeError: if the `eval_config`, `eval_input_config` or `model_config` @@ -981,6 +1052,7 @@ def eval_input(eval_config, eval_input_config, model_config, eval_input_config, batch_size=params['batch_size'] if params else eval_config.batch_size, transform_input_data_fn=transform_and_pad_input_data_fn, + input_context=input_context, reduce_to_frame_fn=reduce_to_frame_fn) return dataset @@ -1094,8 +1166,12 @@ def get_reduce_to_frame_fn(input_reader_config, is_training): num_frames = tf.cast( tf.shape(tensor_dict[fields.InputDataFields.source_id])[0], dtype=tf.int32) - frame_index = tf.random.uniform((), minval=0, maxval=num_frames, - dtype=tf.int32) + if input_reader_config.frame_index == -1: + frame_index = tf.random.uniform((), minval=0, maxval=num_frames, + dtype=tf.int32) + else: + frame_index = tf.constant(input_reader_config.frame_index, + dtype=tf.int32) out_tensor_dict = {} for key in tensor_dict: if key in fields.SEQUENCE_FIELDS: diff --git a/research/object_detection/inputs_test.py b/research/object_detection/inputs_test.py index ec44d04d81b4ff5097bdfd0b2670949e92f00a99..4716882e9a34dc1124d1ea881087ff8da70f28f5 100644 --- a/research/object_detection/inputs_test.py +++ b/research/object_detection/inputs_test.py @@ -61,7 +61,7 @@ def _get_configs_for_model(model_name): configs, kwargs_dict=override_dict) -def _get_configs_for_model_sequence_example(model_name): +def _get_configs_for_model_sequence_example(model_name, frame_index=-1): """Returns configurations for model.""" fname = os.path.join(tf.resource_loader.get_data_files_path(), 'test_data/' + model_name + '.config') @@ -74,7 +74,8 @@ def _get_configs_for_model_sequence_example(model_name): override_dict = { 'train_input_path': data_path, 'eval_input_path': data_path, - 'label_map_path': label_map_path + 'label_map_path': label_map_path, + 'frame_index': frame_index } return config_util.merge_external_params_with_configs( configs, kwargs_dict=override_dict) @@ -312,6 +313,87 @@ class InputFnTest(test_case.TestCase, parameterized.TestCase): tf.float32, labels[fields.InputDataFields.groundtruth_weights].dtype) + def test_context_rcnn_resnet50_eval_input_with_sequence_example_image_id_list( + self, eval_batch_size=8): + """Tests the eval input function for FasterRcnnResnet50.""" + configs = _get_configs_for_model_sequence_example( + 'context_rcnn_camera_trap') + model_config = configs['model'] + eval_config = configs['eval_config'] + eval_config.batch_size = eval_batch_size + eval_input_config = configs['eval_input_configs'][0] + eval_input_config.load_context_image_ids = True + eval_input_fn = inputs.create_eval_input_fn( + eval_config, eval_input_config, model_config) + features, labels = _make_initializable_iterator(eval_input_fn()).get_next() + self.assertAllEqual([eval_batch_size, 640, 640, 3], + features[fields.InputDataFields.image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual( + [eval_batch_size, 640, 640, 3], + features[fields.InputDataFields.original_image].shape.as_list()) + self.assertEqual(tf.uint8, + features[fields.InputDataFields.original_image].dtype) + self.assertAllEqual([eval_batch_size], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [eval_batch_size, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [eval_batch_size, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [eval_batch_size, 100], + labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual( + tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + + def test_context_rcnn_resnet50_train_input_with_sequence_example_frame_index( + self, train_batch_size=8): + """Tests the training input function for FasterRcnnResnet50.""" + configs = _get_configs_for_model_sequence_example( + 'context_rcnn_camera_trap', frame_index=2) + model_config = configs['model'] + train_config = configs['train_config'] + train_config.batch_size = train_batch_size + train_input_fn = inputs.create_train_input_fn( + train_config, configs['train_input_config'], model_config) + features, labels = _make_initializable_iterator(train_input_fn()).get_next() + + self.assertAllEqual([train_batch_size, 640, 640, 3], + features[fields.InputDataFields.image].shape.as_list()) + self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype) + self.assertAllEqual([train_batch_size], + features[inputs.HASH_KEY].shape.as_list()) + self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype) + self.assertAllEqual( + [train_batch_size, 100, 4], + labels[fields.InputDataFields.groundtruth_boxes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_boxes].dtype) + self.assertAllEqual( + [train_batch_size, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_classes].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_classes].dtype) + self.assertAllEqual( + [train_batch_size, 100], + labels[fields.InputDataFields.groundtruth_weights].shape.as_list()) + self.assertEqual(tf.float32, + labels[fields.InputDataFields.groundtruth_weights].dtype) + self.assertAllEqual( + [train_batch_size, 100, model_config.faster_rcnn.num_classes], + labels[fields.InputDataFields.groundtruth_confidences].shape.as_list()) + self.assertEqual( + tf.float32, + labels[fields.InputDataFields.groundtruth_confidences].dtype) + def test_ssd_inceptionV2_train_input(self): """Tests the training input function for SSDInceptionV2.""" configs = _get_configs_for_model('ssd_inception_v2_pets') @@ -1338,6 +1420,49 @@ class DataTransformationFnTest(test_case.TestCase, parameterized.TestCase): [[[0., 0., 0., 0.,], [0., 0., 0., 0.,]], [[0.1, 0.1, 0.3, 0.4,], [0.6, 0.4, 0.6, 0.7,]]]) + def test_groundtruth_keypoint_depths(self): + def graph_fn(): + tensor_dict = { + fields.InputDataFields.image: + tf.constant(np.random.rand(100, 50, 3).astype(np.float32)), + fields.InputDataFields.groundtruth_boxes: + tf.constant(np.array([[.5, .5, 1, 1], [.0, .0, .5, .5]], + np.float32)), + fields.InputDataFields.groundtruth_classes: + tf.constant(np.array([1, 2], np.int32)), + fields.InputDataFields.groundtruth_keypoints: + tf.constant([[[0.1, 0.2], [0.3, 0.4]], + [[0.5, 0.6], [0.7, 0.8]]]), + fields.InputDataFields.groundtruth_keypoint_visibilities: + tf.constant([[True, False], [True, True]]), + fields.InputDataFields.groundtruth_keypoint_depths: + tf.constant([[1.0, 0.9], [0.8, 0.7]]), + fields.InputDataFields.groundtruth_keypoint_depth_weights: + tf.constant([[0.7, 0.8], [0.9, 1.0]]), + } + + num_classes = 3 + keypoint_type_weight = [1.0, 2.0] + input_transformation_fn = functools.partial( + inputs.transform_input_data, + model_preprocess_fn=_fake_resize50_preprocess_fn, + image_resizer_fn=_fake_image_resizer_fn, + num_classes=num_classes, + keypoint_type_weight=keypoint_type_weight) + transformed_inputs = input_transformation_fn(tensor_dict=tensor_dict) + return (transformed_inputs[ + fields.InputDataFields.groundtruth_keypoint_depths], + transformed_inputs[ + fields.InputDataFields.groundtruth_keypoint_depth_weights]) + + keypoint_depths, keypoint_depth_weights = self.execute_cpu(graph_fn, []) + self.assertAllClose( + keypoint_depths, + [[1.0, 0.9], [0.8, 0.7]]) + self.assertAllClose( + keypoint_depth_weights, + [[0.7, 0.8], [0.9, 1.0]]) + class PadInputDataToStaticShapesFnTest(test_case.TestCase): @@ -1528,6 +1653,22 @@ class PadInputDataToStaticShapesFnTest(test_case.TestCase): padded_tensor_dict[fields.InputDataFields.groundtruth_dp_surface_coords] .shape.as_list(), [3, 200, 4]) + def test_pad_input_data_to_static_shapes_for_trackid(self): + input_tensor_dict = { + fields.InputDataFields.groundtruth_track_ids: + tf.constant([0, 1], dtype=tf.int32), + } + + padded_tensor_dict = inputs.pad_input_data_to_static_shapes( + tensor_dict=input_tensor_dict, + max_num_boxes=3, + num_classes=1, + spatial_image_shape=[128, 128]) + + self.assertAllEqual( + padded_tensor_dict[fields.InputDataFields.groundtruth_track_ids] + .shape.as_list(), [3]) + def test_context_features(self): context_memory_size = 8 context_feature_length = 10 diff --git a/research/object_detection/matchers/hungarian_matcher.py b/research/object_detection/matchers/hungarian_matcher.py new file mode 100644 index 0000000000000000000000000000000000000000..63ee5d9f228a94406b1b3c1707eb493572749a91 --- /dev/null +++ b/research/object_detection/matchers/hungarian_matcher.py @@ -0,0 +1,58 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Hungarian bipartite matcher implementation.""" + +import numpy as np +from scipy.optimize import linear_sum_assignment + +import tensorflow.compat.v1 as tf +from object_detection.core import matcher + + +class HungarianBipartiteMatcher(matcher.Matcher): + """Wraps a Hungarian bipartite matcher into TensorFlow.""" + + def _match(self, similarity_matrix, valid_rows): + """Optimally bipartite matches a collection rows and columns. + + Args: + similarity_matrix: Float tensor of shape [N, M] with pairwise similarity + where higher values mean more similar. + valid_rows: A boolean tensor of shape [N] indicating the rows that are + valid. + + Returns: + match_results: int32 tensor of shape [M] with match_results[i]=-1 + meaning that column i is not matched and otherwise that it is matched to + row match_results[i]. + """ + valid_row_sim_matrix = tf.gather(similarity_matrix, + tf.squeeze(tf.where(valid_rows), axis=-1)) + distance_matrix = -1 * valid_row_sim_matrix + + def numpy_wrapper(inputs): + def numpy_matching(input_matrix): + row_indices, col_indices = linear_sum_assignment(input_matrix) + match_results = np.full(input_matrix.shape[1], -1) + match_results[col_indices] = row_indices + return match_results.astype(np.int32) + + return tf.numpy_function(numpy_matching, inputs, Tout=[tf.int32]) + + matching_result = tf.autograph.experimental.do_not_convert( + numpy_wrapper)([distance_matrix]) + + return tf.reshape(matching_result, [-1]) diff --git a/research/object_detection/matchers/hungarian_matcher_tf2_test.py b/research/object_detection/matchers/hungarian_matcher_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..bbac858a42db5ca53ab89b40d2fd95010d2d18fd --- /dev/null +++ b/research/object_detection/matchers/hungarian_matcher_tf2_test.py @@ -0,0 +1,105 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for object_detection.core.bipartite_matcher.""" +import unittest +import numpy as np +import tensorflow.compat.v1 as tf + +from object_detection.utils import test_case +from object_detection.utils import tf_version + +if tf_version.is_tf2(): + from object_detection.matchers import hungarian_matcher # pylint: disable=g-import-not-at-top + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class HungarianBipartiteMatcherTest(test_case.TestCase): + + def test_get_expected_matches_when_all_rows_are_valid(self): + similarity_matrix = np.array([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]], + dtype=np.float32) + valid_rows = np.ones([2], dtype=np.bool) + expected_match_results = [-1, 1, 0] + + matcher = hungarian_matcher.HungarianBipartiteMatcher() + match_results_out = matcher.match(similarity_matrix, valid_rows=valid_rows) + + self.assertAllEqual(match_results_out._match_results.numpy(), + expected_match_results) + + def test_get_expected_matches_with_all_rows_be_default(self): + similarity_matrix = np.array([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]], + dtype=np.float32) + expected_match_results = [-1, 1, 0] + + matcher = hungarian_matcher.HungarianBipartiteMatcher() + match_results_out = matcher.match(similarity_matrix) + + self.assertAllEqual(match_results_out._match_results.numpy(), + expected_match_results) + + def test_get_no_matches_with_zero_valid_rows(self): + similarity_matrix = np.array([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]], + dtype=np.float32) + valid_rows = np.zeros([2], dtype=np.bool) + expected_match_results = [-1, -1, -1] + + matcher = hungarian_matcher.HungarianBipartiteMatcher() + match_results_out = matcher.match(similarity_matrix, valid_rows=valid_rows) + + self.assertAllEqual(match_results_out._match_results.numpy(), + expected_match_results) + + def test_get_expected_matches_with_only_one_valid_row(self): + similarity_matrix = np.array([[0.50, 0.1, 0.8], [0.15, 0.2, 0.3]], + dtype=np.float32) + valid_rows = np.array([True, False], dtype=np.bool) + expected_match_results = [-1, -1, 0] + + matcher = hungarian_matcher.HungarianBipartiteMatcher() + match_results_out = matcher.match(similarity_matrix, valid_rows=valid_rows) + + self.assertAllEqual(match_results_out._match_results.numpy(), + expected_match_results) + + def test_get_expected_matches_with_only_one_valid_row_at_bottom(self): + similarity_matrix = np.array([[0.15, 0.2, 0.3], [0.50, 0.1, 0.8]], + dtype=np.float32) + valid_rows = np.array([False, True], dtype=np.bool) + expected_match_results = [-1, -1, 0] + + matcher = hungarian_matcher.HungarianBipartiteMatcher() + match_results_out = matcher.match(similarity_matrix, valid_rows=valid_rows) + + self.assertAllEqual(match_results_out._match_results.numpy(), + expected_match_results) + + def test_get_expected_matches_with_two_valid_rows(self): + similarity_matrix = np.array([[0.15, 0.2, 0.3], [0.50, 0.1, 0.8], + [0.84, 0.32, 0.2]], + dtype=np.float32) + valid_rows = np.array([True, False, True], dtype=np.bool) + expected_match_results = [1, -1, 0] + + matcher = hungarian_matcher.HungarianBipartiteMatcher() + match_results_out = matcher.match(similarity_matrix, valid_rows=valid_rows) + + self.assertAllEqual(match_results_out._match_results.numpy(), + expected_match_results) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/meta_architectures/center_net_meta_arch.py b/research/object_detection/meta_architectures/center_net_meta_arch.py index fc242c99020dc8db4819fb2ba4076e6c3913bd70..2131c530642ca41e4db3001492e3bc6be2123562 100644 --- a/research/object_detection/meta_architectures/center_net_meta_arch.py +++ b/research/object_detection/meta_architectures/center_net_meta_arch.py @@ -21,7 +21,6 @@ import abc import collections import functools -import numpy as np import tensorflow.compat.v1 as tf import tensorflow.compat.v2 as tf2 @@ -32,6 +31,9 @@ from object_detection.core import model from object_detection.core import standard_fields as fields from object_detection.core import target_assigner as cn_assigner from object_detection.utils import shape_utils +from object_detection.utils import target_assigner_utils as ta_utils +from object_detection.utils import tf_version + # Number of channels needed to predict size and offsets. NUM_OFFSET_CHANNELS = 2 @@ -40,10 +42,6 @@ NUM_SIZE_CHANNELS = 2 # Error range for detecting peaks. PEAK_EPSILON = 1e-6 -# Constants shared between all keypoint tasks. -UNMATCHED_KEYPOINT_SCORE = 0.1 -KEYPOINT_CANDIDATE_SEARCH_SCALE = 0.3 - class CenterNetFeatureExtractor(tf.keras.Model): """Base class for feature extractors for the CenterNet meta architecture. @@ -118,9 +116,29 @@ class CenterNetFeatureExtractor(tf.keras.Model): """Ther number of feature outputs returned by the feature extractor.""" pass + @property + @abc.abstractmethod + def supported_sub_model_types(self): + """Valid sub model types supported by the get_sub_model function.""" + pass + + @abc.abstractmethod + def get_sub_model(self, sub_model_type): + """Returns the underlying keras model for the given sub_model_type. + + This function is useful when we only want to get a subset of weights to + be restored from a checkpoint. + + Args: + sub_model_type: string, the type of sub model. Currently, CenterNet + feature extractors support 'detection' and 'classification'. + """ + pass + -def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256, - bias_fill=None): +def make_prediction_net(num_out_channels, kernel_sizes=(3), num_filters=(256), + bias_fill=None, use_depthwise=False, name=None, + unit_height_conv=True): """Creates a network to predict the given number of output channels. This function is intended to make the prediction heads for the CenterNet @@ -128,29 +146,57 @@ def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256, Args: num_out_channels: Number of output channels. - kernel_size: The size of the conv kernel in the intermediate layer - num_filters: The number of filters in the intermediate conv layer. + kernel_sizes: A list representing the sizes of the conv kernel in the + intermediate layer. Note that the length of the list indicates the number + of intermediate conv layers and it must be the same as the length of the + num_filters. + num_filters: A list representing the number of filters in the intermediate + conv layer. Note that the length of the list indicates the number of + intermediate conv layers. bias_fill: If not None, is used to initialize the bias in the final conv layer. + use_depthwise: If true, use SeparableConv2D to construct the Sequential + layers instead of Conv2D. + name: Optional name for the prediction net. + unit_height_conv: If True, Conv2Ds have asymmetric kernels with height=1. Returns: net: A keras module which when called on an input tensor of size [batch_size, height, width, num_in_channels] returns an output of size [batch_size, height, width, num_out_channels] """ + if isinstance(kernel_sizes, int) and isinstance(num_filters, int): + kernel_sizes = [kernel_sizes] + num_filters = [num_filters] + assert len(kernel_sizes) == len(num_filters) + if use_depthwise: + conv_fn = tf.keras.layers.SeparableConv2D + else: + conv_fn = tf.keras.layers.Conv2D - out_conv = tf.keras.layers.Conv2D(num_out_channels, kernel_size=1) + # We name the convolution operations explicitly because Keras, by default, + # uses different names during training and evaluation. By setting the names + # here, we avoid unexpected pipeline breakage in TF1. + out_conv = tf.keras.layers.Conv2D( + num_out_channels, + kernel_size=1, + name='conv1' if tf_version.is_tf1() else None) if bias_fill is not None: out_conv.bias_initializer = tf.keras.initializers.constant(bias_fill) - net = tf.keras.Sequential( - [tf.keras.layers.Conv2D(num_filters, kernel_size=kernel_size, - padding='same'), - tf.keras.layers.ReLU(), - out_conv] - ) - + layers = [] + for idx, (kernel_size, + num_filter) in enumerate(zip(kernel_sizes, num_filters)): + layers.append( + conv_fn( + num_filter, + kernel_size=[1, kernel_size] if unit_height_conv else kernel_size, + padding='same', + name='conv2_%d' % idx if tf_version.is_tf1() else None)) + layers.append(tf.keras.layers.ReLU()) + layers.append(out_conv) + net = tf.keras.Sequential(layers, name=name) return net @@ -159,7 +205,7 @@ def _to_float32(x): def _get_shape(tensor, num_dims): - tf.Assert(tensor.get_shape().ndims == num_dims, [tensor]) + assert len(tensor.shape.as_list()) == num_dims return shape_utils.combined_static_and_dynamic_shape(tensor) @@ -169,12 +215,44 @@ def _flatten_spatial_dimensions(batch_images): channels]) +def _multi_range(limit, + value_repetitions=1, + range_repetitions=1, + dtype=tf.int32): + """Creates a sequence with optional value duplication and range repetition. + + As an example (see the Args section for more details), + _multi_range(limit=2, value_repetitions=3, range_repetitions=4) returns: + + [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1] + + Args: + limit: A 0-D Tensor (scalar). Upper limit of sequence, exclusive. + value_repetitions: Integer. The number of times a value in the sequence is + repeated. With value_repetitions=3, the result is [0, 0, 0, 1, 1, 1, ..]. + range_repetitions: Integer. The number of times the range is repeated. With + range_repetitions=3, the result is [0, 1, 2, .., 0, 1, 2, ..]. + dtype: The type of the elements of the resulting tensor. + + Returns: + A 1-D tensor of type `dtype` and size + [`limit` * `value_repetitions` * `range_repetitions`] that contains the + specified range with given repetitions. + """ + return tf.reshape( + tf.tile( + tf.expand_dims(tf.range(limit, dtype=dtype), axis=-1), + multiples=[range_repetitions, value_repetitions]), [-1]) + + def top_k_feature_map_locations(feature_map, max_pool_kernel_size=3, k=100, per_channel=False): """Returns the top k scores and their locations in a feature map. Given a feature map, the top k values (based on activation) are returned. If - `per_channel` is True, the top k values **per channel** are returned. + `per_channel` is True, the top k values **per channel** are returned. Note + that when k equals to 1, ths function uses reduce_max and argmax instead of + top_k to make the logics more efficient. The `max_pool_kernel_size` argument allows for selecting local peaks in a region. This filtering is done per channel, so nothing prevents two values at @@ -224,12 +302,21 @@ def top_k_feature_map_locations(feature_map, max_pool_kernel_size=3, k=100, batch_size, _, width, num_channels = _get_shape(feature_map, 4) if per_channel: - # Perform top k over batch and channels. - feature_map_peaks_transposed = tf.transpose(feature_map_peaks, - perm=[0, 3, 1, 2]) - feature_map_peaks_transposed = tf.reshape( - feature_map_peaks_transposed, [batch_size, num_channels, -1]) - scores, peak_flat_indices = tf.math.top_k(feature_map_peaks_transposed, k=k) + if k == 1: + feature_map_flattened = tf.reshape( + feature_map_peaks, [batch_size, -1, num_channels]) + scores = tf.math.reduce_max(feature_map_flattened, axis=1) + peak_flat_indices = tf.math.argmax( + feature_map_flattened, axis=1, output_type=tf.dtypes.int32) + peak_flat_indices = tf.expand_dims(peak_flat_indices, axis=-1) + else: + # Perform top k over batch and channels. + feature_map_peaks_transposed = tf.transpose(feature_map_peaks, + perm=[0, 3, 1, 2]) + feature_map_peaks_transposed = tf.reshape( + feature_map_peaks_transposed, [batch_size, num_channels, -1]) + scores, peak_flat_indices = tf.math.top_k( + feature_map_peaks_transposed, k=k) # Convert the indices such that they represent the location in the full # (flattened) feature map of size [batch, height * width * channels]. channel_idx = tf.range(num_channels)[tf.newaxis, :, tf.newaxis] @@ -237,8 +324,14 @@ def top_k_feature_map_locations(feature_map, max_pool_kernel_size=3, k=100, scores = tf.reshape(scores, [batch_size, -1]) peak_flat_indices = tf.reshape(peak_flat_indices, [batch_size, -1]) else: - feature_map_peaks_flat = tf.reshape(feature_map_peaks, [batch_size, -1]) - scores, peak_flat_indices = tf.math.top_k(feature_map_peaks_flat, k=k) + if k == 1: + feature_map_peaks_flat = tf.reshape(feature_map_peaks, [batch_size, -1]) + scores = tf.math.reduce_max(feature_map_peaks_flat, axis=1, keepdims=True) + peak_flat_indices = tf.expand_dims(tf.math.argmax( + feature_map_peaks_flat, axis=1, output_type=tf.dtypes.int32), axis=-1) + else: + feature_map_peaks_flat = tf.reshape(feature_map_peaks, [batch_size, -1]) + scores, peak_flat_indices = tf.math.top_k(feature_map_peaks_flat, k=k) # Get x, y and channel indices corresponding to the top indices in the flat # array. @@ -248,20 +341,15 @@ def top_k_feature_map_locations(feature_map, max_pool_kernel_size=3, k=100, return scores, y_indices, x_indices, channel_indices -def prediction_tensors_to_boxes(detection_scores, y_indices, x_indices, - channel_indices, height_width_predictions, +def prediction_tensors_to_boxes(y_indices, x_indices, height_width_predictions, offset_predictions): """Converts CenterNet class-center, offset and size predictions to boxes. Args: - detection_scores: A [batch, num_boxes] float32 tensor with detection - scores in range [0, 1]. y_indices: A [batch, num_boxes] int32 tensor with y indices corresponding to object center locations (expressed in output coordinate frame). x_indices: A [batch, num_boxes] int32 tensor with x indices corresponding to object center locations (expressed in output coordinate frame). - channel_indices: A [batch, num_boxes] int32 tensor with channel indices - corresponding to object classes. height_width_predictions: A float tensor of shape [batch_size, height, width, 2] representing the height and width of a box centered at each pixel. @@ -272,49 +360,87 @@ def prediction_tensors_to_boxes(detection_scores, y_indices, x_indices, Returns: detection_boxes: A tensor of shape [batch_size, num_boxes, 4] holding the the raw bounding box coordinates of boxes. - detection_classes: An integer tensor of shape [batch_size, num_boxes] - indicating the predicted class for each box. - detection_scores: A float tensor of shape [batch_size, num_boxes] indicating - the score for each box. - num_detections: An integer tensor of shape [batch_size,] indicating the - number of boxes detected for each sample in the batch. - """ - _, _, width, _ = _get_shape(height_width_predictions, 4) + batch_size, num_boxes = _get_shape(y_indices, 2) + _, height, width, _ = _get_shape(height_width_predictions, 4) + height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32) + + # TF Lite does not support tf.gather with batch_dims > 0, so we need to use + # tf_gather_nd instead and here we prepare the indices for that. + combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_boxes), + tf.reshape(y_indices, [-1]), + tf.reshape(x_indices, [-1]) + ], axis=1) + new_height_width = tf.gather_nd(height_width_predictions, combined_indices) + new_height_width = tf.reshape(new_height_width, [batch_size, num_boxes, 2]) + + new_offsets = tf.gather_nd(offset_predictions, combined_indices) + offsets = tf.reshape(new_offsets, [batch_size, num_boxes, 2]) - peak_spatial_indices = flattened_indices_from_row_col_indices( - y_indices, x_indices, width) y_indices = _to_float32(y_indices) x_indices = _to_float32(x_indices) - height_width_flat = _flatten_spatial_dimensions(height_width_predictions) - offsets_flat = _flatten_spatial_dimensions(offset_predictions) - - height_width = tf.gather(height_width_flat, peak_spatial_indices, - batch_dims=1) - offsets = tf.gather(offsets_flat, peak_spatial_indices, batch_dims=1) - + height_width = tf.maximum(new_height_width, 0) heights, widths = tf.unstack(height_width, axis=2) y_offsets, x_offsets = tf.unstack(offsets, axis=2) - detection_classes = channel_indices + ymin = y_indices + y_offsets - heights / 2.0 + xmin = x_indices + x_offsets - widths / 2.0 + ymax = y_indices + y_offsets + heights / 2.0 + xmax = x_indices + x_offsets + widths / 2.0 - num_detections = tf.reduce_sum(tf.to_int32(detection_scores > 0), axis=1) + ymin = tf.clip_by_value(ymin, 0., height) + xmin = tf.clip_by_value(xmin, 0., width) + ymax = tf.clip_by_value(ymax, 0., height) + xmax = tf.clip_by_value(xmax, 0., width) + boxes = tf.stack([ymin, xmin, ymax, xmax], axis=2) - boxes = tf.stack([y_indices + y_offsets - heights / 2.0, - x_indices + x_offsets - widths / 2.0, - y_indices + y_offsets + heights / 2.0, - x_indices + x_offsets + widths / 2.0], axis=2) + return boxes - return boxes, detection_classes, detection_scores, num_detections +def prediction_tensors_to_temporal_offsets( + y_indices, x_indices, offset_predictions): + """Converts CenterNet temporal offset map predictions to batched format. -def prediction_tensors_to_keypoint_candidates( - keypoint_heatmap_predictions, - keypoint_heatmap_offsets, - keypoint_score_threshold=0.1, - max_pool_kernel_size=1, - max_candidates=20): + This function is similar to the box offset conversion function, as both + temporal offsets and box offsets are size-2 vectors. + + Args: + y_indices: A [batch, num_boxes] int32 tensor with y indices corresponding to + object center locations (expressed in output coordinate frame). + x_indices: A [batch, num_boxes] int32 tensor with x indices corresponding to + object center locations (expressed in output coordinate frame). + offset_predictions: A float tensor of shape [batch_size, height, width, 2] + representing the y and x offsets of a box's center across adjacent frames. + + Returns: + offsets: A tensor of shape [batch_size, num_boxes, 2] holding the + the object temporal offsets of (y, x) dimensions. + + """ + batch_size, num_boxes = _get_shape(y_indices, 2) + + # TF Lite does not support tf.gather with batch_dims > 0, so we need to use + # tf_gather_nd instead and here we prepare the indices for that. + combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_boxes), + tf.reshape(y_indices, [-1]), + tf.reshape(x_indices, [-1]) + ], axis=1) + + new_offsets = tf.gather_nd(offset_predictions, combined_indices) + offsets = tf.reshape(new_offsets, [batch_size, num_boxes, -1]) + + return offsets + + +def prediction_tensors_to_keypoint_candidates(keypoint_heatmap_predictions, + keypoint_heatmap_offsets, + keypoint_score_threshold=0.1, + max_pool_kernel_size=1, + max_candidates=20, + keypoint_depths=None): """Convert keypoint heatmap predictions and offsets to keypoint candidates. Args: @@ -323,14 +449,17 @@ def prediction_tensors_to_keypoint_candidates( keypoint_heatmap_offsets: A float tensor of shape [batch_size, height, width, 2] (or [batch_size, height, width, 2 * num_keypoints] if 'per_keypoint_offset' is set True) representing the per-keypoint offsets. - keypoint_score_threshold: float, the threshold for considering a keypoint - a candidate. + keypoint_score_threshold: float, the threshold for considering a keypoint a + candidate. max_pool_kernel_size: integer, the max pool kernel size to use to pull off peak score locations in a neighborhood. For example, to make sure no two neighboring values for the same keypoint are returned, set max_pool_kernel_size=3. If None or 1, will not apply any local filtering. - max_candidates: integer, maximum number of keypoint candidates per - keypoint type. + max_candidates: integer, maximum number of keypoint candidates per keypoint + type. + keypoint_depths: (optional) A float tensor of shape [batch_size, height, + width, 1] (or [batch_size, height, width, num_keypoints] if + 'per_keypoint_depth' is set True) representing the per-keypoint depths. Returns: keypoint_candidates: A tensor of shape @@ -344,9 +473,11 @@ def prediction_tensors_to_keypoint_candidates( [batch_size, num_keypoints] with the number of candidates for each keypoint type, as it's possible to filter some candidates due to the score threshold. + depth_candidates: A tensor of shape [batch_size, max_candidates, + num_keypoints] representing the estimated depth of each keypoint + candidate. Return None if the input keypoint_depths is None. """ - batch_size, _, width, num_keypoints = _get_shape( - keypoint_heatmap_predictions, 4) + batch_size, _, _, num_keypoints = _get_shape(keypoint_heatmap_predictions, 4) # Get x, y and channel indices corresponding to the top indices in the # keypoint heatmap predictions. # Note that the top k candidates are produced for **each keypoint type**. @@ -358,19 +489,42 @@ def prediction_tensors_to_keypoint_candidates( k=max_candidates, per_channel=True)) - peak_spatial_indices = flattened_indices_from_row_col_indices( - y_indices, x_indices, width) + # TF Lite does not support tf.gather with batch_dims > 0, so we need to use + # tf_gather_nd instead and here we prepare the indices for that. + _, num_indices = _get_shape(y_indices, 2) + combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_indices), + tf.reshape(y_indices, [-1]), + tf.reshape(x_indices, [-1]) + ], axis=1) + + selected_offsets_flat = tf.gather_nd(keypoint_heatmap_offsets, + combined_indices) + selected_offsets = tf.reshape(selected_offsets_flat, + [batch_size, num_indices, -1]) + y_indices = _to_float32(y_indices) x_indices = _to_float32(x_indices) - offsets_flat = _flatten_spatial_dimensions(keypoint_heatmap_offsets) - - selected_offsets = tf.gather(offsets_flat, peak_spatial_indices, batch_dims=1) - _, num_indices, num_channels = _get_shape(selected_offsets, 3) + _, _, num_channels = _get_shape(selected_offsets, 3) if num_channels > 2: + # Offsets are per keypoint and the last dimension of selected_offsets + # contains all those offsets, so reshape the offsets to make sure that the + # last dimension contains (y_offset, x_offset) for a single keypoint. reshaped_offsets = tf.reshape(selected_offsets, [batch_size, num_indices, -1, 2]) - offsets = tf.gather(reshaped_offsets, channel_indices, batch_dims=2) + + # TF Lite does not support tf.gather with batch_dims > 0, so we need to use + # tf_gather_nd instead and here we prepare the indices for that. In this + # case, channel_indices indicates which keypoint to use the offset from. + channel_combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_indices), + _multi_range(num_indices, range_repetitions=batch_size), + tf.reshape(channel_indices, [-1]) + ], axis=1) + + offsets = tf.gather_nd(reshaped_offsets, channel_combined_indices) + offsets = tf.reshape(offsets, [batch_size, num_indices, -1]) else: offsets = selected_offsets y_offsets, x_offsets = tf.unstack(offsets, axis=2) @@ -388,7 +542,250 @@ def prediction_tensors_to_keypoint_candidates( num_candidates = tf.reduce_sum( tf.to_int32(keypoint_scores >= keypoint_score_threshold), axis=1) - return keypoint_candidates, keypoint_scores, num_candidates + depth_candidates = None + if keypoint_depths is not None: + selected_depth_flat = tf.gather_nd(keypoint_depths, combined_indices) + selected_depth = tf.reshape(selected_depth_flat, + [batch_size, num_indices, -1]) + _, _, num_depth_channels = _get_shape(selected_depth, 3) + if num_depth_channels > 1: + combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_indices), + _multi_range(num_indices, range_repetitions=batch_size), + tf.reshape(channel_indices, [-1]) + ], axis=1) + depth = tf.gather_nd(selected_depth, combined_indices) + depth = tf.reshape(depth, [batch_size, num_indices, -1]) + else: + depth = selected_depth + depth_candidates = tf.reshape(depth, + [batch_size, num_keypoints, max_candidates]) + depth_candidates = tf.transpose(depth_candidates, [0, 2, 1]) + + return keypoint_candidates, keypoint_scores, num_candidates, depth_candidates + + +def argmax_feature_map_locations(feature_map): + """Returns the peak locations in the feature map.""" + batch_size, _, width, num_channels = _get_shape(feature_map, 4) + + feature_map_flattened = tf.reshape( + feature_map, [batch_size, -1, num_channels]) + peak_flat_indices = tf.math.argmax( + feature_map_flattened, axis=1, output_type=tf.dtypes.int32) + # Get x and y indices corresponding to the top indices in the flat array. + y_indices, x_indices = ( + row_col_indices_from_flattened_indices(peak_flat_indices, width)) + channel_indices = tf.tile( + tf.range(num_channels)[tf.newaxis, :], [batch_size, 1]) + return y_indices, x_indices, channel_indices + + +def prediction_tensors_to_single_instance_kpts( + keypoint_heatmap_predictions, + keypoint_heatmap_offsets, + keypoint_score_heatmap=None): + """Convert keypoint heatmap predictions and offsets to keypoint candidates. + + Args: + keypoint_heatmap_predictions: A float tensor of shape [batch_size, height, + width, num_keypoints] representing the per-keypoint heatmaps which is + used for finding the best keypoint candidate locations. + keypoint_heatmap_offsets: A float tensor of shape [batch_size, height, + width, 2] (or [batch_size, height, width, 2 * num_keypoints] if + 'per_keypoint_offset' is set True) representing the per-keypoint offsets. + keypoint_score_heatmap: (optional) A float tensor of shape [batch_size, + height, width, num_keypoints] representing the heatmap which is used for + reporting the confidence scores. If not provided, then the values in the + keypoint_heatmap_predictions will be used. + + Returns: + keypoint_candidates: A tensor of shape + [batch_size, max_candidates, num_keypoints, 2] holding the + location of keypoint candidates in [y, x] format (expressed in absolute + coordinates in the output coordinate frame). + keypoint_scores: A float tensor of shape + [batch_size, max_candidates, num_keypoints] with the scores for each + keypoint candidate. The scores come directly from the heatmap predictions. + num_keypoint_candidates: An integer tensor of shape + [batch_size, num_keypoints] with the number of candidates for each + keypoint type, as it's possible to filter some candidates due to the score + threshold. + """ + batch_size, height, width, num_keypoints = _get_shape( + keypoint_heatmap_predictions, 4) + # Get x, y and channel indices corresponding to the top indices in the + # keypoint heatmap predictions. + y_indices, x_indices, channel_indices = argmax_feature_map_locations( + keypoint_heatmap_predictions) + + # TF Lite does not support tf.gather with batch_dims > 0, so we need to use + # tf_gather_nd instead and here we prepare the indices for that. + _, num_keypoints = _get_shape(y_indices, 2) + combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_keypoints), + tf.reshape(y_indices, [-1]), + tf.reshape(x_indices, [-1]), + tf.reshape(channel_indices, [-1]) + ], axis=1) + + # Reshape the offsets predictions to shape: + # [batch_size, height, width, num_keypoints, 2] + keypoint_heatmap_offsets = tf.reshape( + keypoint_heatmap_offsets, [batch_size, height, width, num_keypoints, -1]) + + # shape: [num_keypoints, 2] + selected_offsets_flat = tf.gather_nd(keypoint_heatmap_offsets, + combined_indices) + y_offsets, x_offsets = tf.unstack(selected_offsets_flat, axis=1) + + keypoint_candidates = tf.stack([ + tf.cast(y_indices, dtype=tf.float32) + tf.expand_dims(y_offsets, axis=0), + tf.cast(x_indices, dtype=tf.float32) + tf.expand_dims(x_offsets, axis=0) + ], axis=2) + keypoint_candidates = tf.expand_dims(keypoint_candidates, axis=0) + if keypoint_score_heatmap is None: + keypoint_scores = tf.gather_nd( + keypoint_heatmap_predictions, combined_indices) + else: + keypoint_scores = tf.gather_nd(keypoint_score_heatmap, combined_indices) + keypoint_scores = tf.expand_dims( + tf.expand_dims(keypoint_scores, axis=0), axis=0) + return keypoint_candidates, keypoint_scores + + +def _score_to_distance_map(y_grid, x_grid, heatmap, points_y, points_x, + score_distance_offset): + """Rescores heatmap using the distance information. + + Rescore the heatmap scores using the formula: + score / (d + score_distance_offset), where the d is the distance from each + pixel location to the target point location. + + Args: + y_grid: A float tensor with shape [height, width] representing the + y-coordinate of each pixel grid. + x_grid: A float tensor with shape [height, width] representing the + x-coordinate of each pixel grid. + heatmap: A float tensor with shape [1, height, width, channel] + representing the heatmap to be rescored. + points_y: A float tensor with shape [channel] representing the y + coordinates of the target points for each channel. + points_x: A float tensor with shape [channel] representing the x + coordinates of the target points for each channel. + score_distance_offset: A constant used in the above formula. + + Returns: + A float tensor with shape [1, height, width, channel] representing the + rescored heatmap. + """ + y_diff = y_grid[:, :, tf.newaxis] - points_y + x_diff = x_grid[:, :, tf.newaxis] - points_x + distance = tf.math.sqrt(y_diff**2 + x_diff**2) + return tf.math.divide(heatmap, distance + score_distance_offset) + + +def prediction_to_single_instance_keypoints( + object_heatmap, + keypoint_heatmap, + keypoint_offset, + keypoint_regression, + kp_params, + keypoint_depths=None): + """Postprocess function to predict single instance keypoints. + + This is a simplified postprocessing function based on the assumption that + there is only one instance in the image. If there are multiple instances in + the image, the model prefers to predict the one that is closest to the image + center. Here is a high-level description of what this function does: + 1) Object heatmap re-weighted by the distance between each pixel to the + image center is used to determine the instance center. + 2) Regressed keypoint locations are retrieved from the instance center. The + Gaussian kernel is applied to the regressed keypoint locations to + re-weight the keypoint heatmap. This is to select the keypoints that are + associated with the center instance without using top_k op. + 3) The keypoint locations are computed by the re-weighted keypoint heatmap + and the keypoint offset. + + Args: + object_heatmap: A float tensor of shape [1, height, width, 1] representing + the heapmap of the class. + keypoint_heatmap: A float tensor of shape [1, height, width, num_keypoints] + representing the per-keypoint heatmaps. + keypoint_offset: A float tensor of shape [1, height, width, 2] (or [1, + height, width, 2 * num_keypoints] if 'per_keypoint_offset' is set True) + representing the per-keypoint offsets. + keypoint_regression: A float tensor of shape [1, height, width, 2 * + num_keypoints] representing the joint regression prediction. + kp_params: A `KeypointEstimationParams` object with parameters for a single + keypoint class. + keypoint_depths: (optional) A float tensor of shape [batch_size, height, + width, 1] (or [batch_size, height, width, num_keypoints] if + 'per_keypoint_depth' is set True) representing the per-keypoint depths. + + Returns: + A tuple of two tensors: + keypoint_candidates: A float tensor with shape [1, 1, num_keypoints, 2] + representing the yx-coordinates of the keypoints in the output feature + map space. + keypoint_scores: A float tensor with shape [1, 1, num_keypoints] + representing the keypoint prediction scores. + + Raises: + ValueError: if the input keypoint_std_dev doesn't have valid number of + elements (1 or num_keypoints). + """ + # TODO(yuhuic): add the keypoint depth prediction logics in the browser + # postprocessing back. + del keypoint_depths + + num_keypoints = len(kp_params.keypoint_std_dev) + batch_size, height, width, _ = _get_shape(keypoint_heatmap, 4) + + # Create the image center location. + image_center_y = tf.convert_to_tensor([0.5 * height], dtype=tf.float32) + image_center_x = tf.convert_to_tensor([0.5 * width], dtype=tf.float32) + (y_grid, x_grid) = ta_utils.image_shape_to_grids(height, width) + # Rescore the object heatmap by the distnace to the image center. + object_heatmap = _score_to_distance_map( + y_grid, x_grid, object_heatmap, image_center_y, + image_center_x, kp_params.score_distance_offset) + + # Pick the highest score and location of the weighted object heatmap. + y_indices, x_indices, _ = argmax_feature_map_locations(object_heatmap) + _, num_indices = _get_shape(y_indices, 2) + combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_indices), + tf.reshape(y_indices, [-1]), + tf.reshape(x_indices, [-1]) + ], axis=1) + + # Select the regression vectors from the object center. + selected_regression_flat = tf.gather_nd(keypoint_regression, combined_indices) + # shape: [num_keypoints, 2] + regression_offsets = tf.reshape(selected_regression_flat, [num_keypoints, -1]) + (y_reg, x_reg) = tf.unstack(regression_offsets, axis=1) + y_regressed = tf.cast(y_indices, dtype=tf.float32) + y_reg + x_regressed = tf.cast(x_indices, dtype=tf.float32) + x_reg + + if kp_params.candidate_ranking_mode == 'score_distance_ratio': + reweighted_keypoint_heatmap = _score_to_distance_map( + y_grid, x_grid, keypoint_heatmap, y_regressed, x_regressed, + kp_params.score_distance_offset) + else: + raise ValueError('Unsupported candidate_ranking_mode: %s' % + kp_params.candidate_ranking_mode) + + # Get the keypoint locations/scores: + # keypoint_candidates: [1, 1, num_keypoints, 2] + # keypoint_scores: [1, 1, num_keypoints] + # depth_candidates: [1, 1, num_keypoints] + (keypoint_candidates, keypoint_scores + ) = prediction_tensors_to_single_instance_kpts( + reweighted_keypoint_heatmap, + keypoint_offset, + keypoint_score_heatmap=keypoint_heatmap) + return keypoint_candidates, keypoint_scores, None def regressed_keypoints_at_object_centers(regressed_keypoint_predictions, @@ -415,16 +812,18 @@ def regressed_keypoints_at_object_centers(regressed_keypoint_predictions, regressed keypoints are gathered at the provided locations, and converted to absolute coordinates in the output coordinate frame. """ - batch_size, _, width, _ = _get_shape(regressed_keypoint_predictions, 4) - flattened_indices = flattened_indices_from_row_col_indices( - y_indices, x_indices, width) - _, num_instances = _get_shape(flattened_indices, 2) - - regressed_keypoints_flat = _flatten_spatial_dimensions( - regressed_keypoint_predictions) - - relative_regressed_keypoints = tf.gather( - regressed_keypoints_flat, flattened_indices, batch_dims=1) + batch_size, num_instances = _get_shape(y_indices, 2) + + # TF Lite does not support tf.gather with batch_dims > 0, so we need to use + # tf_gather_nd instead and here we prepare the indices for that. + combined_indices = tf.stack([ + _multi_range(batch_size, value_repetitions=num_instances), + tf.reshape(y_indices, [-1]), + tf.reshape(x_indices, [-1]) + ], axis=1) + + relative_regressed_keypoints = tf.gather_nd(regressed_keypoint_predictions, + combined_indices) relative_regressed_keypoints = tf.reshape( relative_regressed_keypoints, [batch_size, num_instances, -1, 2]) @@ -440,11 +839,18 @@ def regressed_keypoints_at_object_centers(regressed_keypoint_predictions, [batch_size, num_instances, -1]) -def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores, - num_keypoint_candidates, bboxes=None, - unmatched_keypoint_score=0.1, box_scale=1.2, +def refine_keypoints(regressed_keypoints, + keypoint_candidates, + keypoint_scores, + num_keypoint_candidates, + bboxes=None, + unmatched_keypoint_score=0.1, + box_scale=1.2, candidate_search_scale=0.3, - candidate_ranking_mode='min_distance'): + candidate_ranking_mode='min_distance', + score_distance_offset=1e-6, + keypoint_depth_candidates=None, + keypoint_score_threshold=0.1): """Refines regressed keypoints by snapping to the nearest candidate keypoints. The initial regressed keypoints represent a full set of keypoints regressed @@ -500,6 +906,16 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores, candidate_ranking_mode: A string as one of ['min_distance', 'score_distance_ratio'] indicating how to select the candidate. If invalid value is provided, an ValueError will be raised. + score_distance_offset: The distance offset to apply in the denominator when + candidate_ranking_mode is 'score_distance_ratio'. The metric to maximize + in this scenario is score / (distance + score_distance_offset). Larger + values of score_distance_offset make the keypoint score gain more relative + importance. + keypoint_depth_candidates: (optional) A float tensor of shape + [batch_size, max_candidates, num_keypoints] indicating the depths for + keypoint candidates. + keypoint_score_threshold: float, The heatmap score threshold for + a keypoint to become a valid candidate. Returns: A tuple with: @@ -526,26 +942,39 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores, num_candidates_tiled = tf.tile(tf.expand_dims(num_keypoint_candidates, 1), [1, max_candidates, 1]) invalid_candidates = range_tiled >= num_candidates_tiled - nan_mask = tf.where( - invalid_candidates, - np.nan * tf.ones_like(invalid_candidates, dtype=tf.float32), - tf.ones_like(invalid_candidates, dtype=tf.float32)) - keypoint_candidates_with_nans = tf.math.multiply( - keypoint_candidates, tf.expand_dims(nan_mask, -1)) # Pairwise squared distances between regressed keypoints and candidate # keypoints (for a single keypoint type). - # Shape [batch_size, num_instances, max_candidates, num_keypoints]. + # Shape [batch_size, num_instances, 1, num_keypoints, 2]. regressed_keypoint_expanded = tf.expand_dims(regressed_keypoints, axis=2) + # Shape [batch_size, 1, max_candidates, num_keypoints, 2]. keypoint_candidates_expanded = tf.expand_dims( - keypoint_candidates_with_nans, axis=1) - sqrd_distances = tf.math.reduce_sum( - tf.math.squared_difference(regressed_keypoint_expanded, - keypoint_candidates_expanded), - axis=-1) + keypoint_candidates, axis=1) + # Use explicit tensor shape broadcasting (since the tensor dimensions are + # expanded to 5D) to make it tf.lite compatible. + regressed_keypoint_expanded = tf.tile( + regressed_keypoint_expanded, multiples=[1, 1, max_candidates, 1, 1]) + keypoint_candidates_expanded = tf.tile( + keypoint_candidates_expanded, multiples=[1, num_instances, 1, 1, 1]) + # Replace tf.math.squared_difference by "-" operator and tf.multiply ops since + # tf.lite convert doesn't support squared_difference with undetermined + # dimension. + diff = regressed_keypoint_expanded - keypoint_candidates_expanded + sqrd_distances = tf.math.reduce_sum(tf.multiply(diff, diff), axis=-1) distances = tf.math.sqrt(sqrd_distances) + # Replace the invalid candidated with large constant (10^5) to make sure the + # following reduce_min/argmin behaves properly. + max_dist = 1e5 + distances = tf.where( + tf.tile( + tf.expand_dims(invalid_candidates, axis=1), + multiples=[1, num_instances, 1, 1]), + tf.ones_like(distances) * max_dist, + distances + ) + # Determine the candidates that have the minimum distance to the regressed # keypoints. Shape [batch_size, num_instances, num_keypoints]. min_distances = tf.math.reduce_min(distances, axis=2) @@ -557,7 +986,7 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores, tiled_keypoint_scores = tf.tile( tf.expand_dims(keypoint_scores, axis=1), multiples=[1, num_instances, 1, 1]) - ranking_scores = tiled_keypoint_scores / (distances + 1e-6) + ranking_scores = tiled_keypoint_scores / (distances + score_distance_offset) nearby_candidate_inds = tf.math.argmax(ranking_scores, axis=2) else: raise ValueError('Not recognized candidate_ranking_mode: %s' % @@ -566,47 +995,47 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores, # Gather the coordinates and scores corresponding to the closest candidates. # Shape of tensors are [batch_size, num_instances, num_keypoints, 2] and # [batch_size, num_instances, num_keypoints], respectively. - nearby_candidate_coords, nearby_candidate_scores = ( - _gather_candidates_at_indices(keypoint_candidates, keypoint_scores, - nearby_candidate_inds)) + (nearby_candidate_coords, nearby_candidate_scores, + nearby_candidate_depths) = ( + _gather_candidates_at_indices(keypoint_candidates, keypoint_scores, + nearby_candidate_inds, + keypoint_depth_candidates)) if bboxes is None: - # Create bboxes from regressed keypoints. - # Shape [batch_size * num_instances, 4]. - regressed_keypoints_flattened = tf.reshape( - regressed_keypoints, [-1, num_keypoints, 2]) - bboxes_flattened = keypoint_ops.keypoints_to_enclosing_bounding_boxes( - regressed_keypoints_flattened) + # Filter out the chosen candidate with score lower than unmatched + # keypoint score. + mask = tf.cast(nearby_candidate_scores < + keypoint_score_threshold, tf.int32) else: bboxes_flattened = tf.reshape(bboxes, [-1, 4]) - # Scale the bounding boxes. - # Shape [batch_size, num_instances, 4]. - boxlist = box_list.BoxList(bboxes_flattened) - boxlist_scaled = box_list_ops.scale_height_width( - boxlist, box_scale, box_scale) - bboxes_scaled = boxlist_scaled.get() - bboxes = tf.reshape(bboxes_scaled, [batch_size, num_instances, 4]) - - # Get ymin, xmin, ymax, xmax bounding box coordinates, tiled per keypoint. - # Shape [batch_size, num_instances, num_keypoints]. - bboxes_tiled = tf.tile(tf.expand_dims(bboxes, 2), [1, 1, num_keypoints, 1]) - ymin, xmin, ymax, xmax = tf.unstack(bboxes_tiled, axis=3) - - # Produce a mask that indicates whether the original regressed keypoint - # should be used instead of a candidate keypoint. - # Shape [batch_size, num_instances, num_keypoints]. - search_radius = ( - tf.math.maximum(ymax - ymin, xmax - xmin) * candidate_search_scale) - mask = (tf.cast(nearby_candidate_coords[:, :, :, 0] < ymin, tf.int32) + - tf.cast(nearby_candidate_coords[:, :, :, 0] > ymax, tf.int32) + - tf.cast(nearby_candidate_coords[:, :, :, 1] < xmin, tf.int32) + - tf.cast(nearby_candidate_coords[:, :, :, 1] > xmax, tf.int32) + - # Filter out the chosen candidate with score lower than unmatched - # keypoint score. - tf.cast(nearby_candidate_scores < - unmatched_keypoint_score, tf.int32) + - tf.cast(min_distances > search_radius, tf.int32)) + # Scale the bounding boxes. + # Shape [batch_size, num_instances, 4]. + boxlist = box_list.BoxList(bboxes_flattened) + boxlist_scaled = box_list_ops.scale_height_width( + boxlist, box_scale, box_scale) + bboxes_scaled = boxlist_scaled.get() + bboxes = tf.reshape(bboxes_scaled, [batch_size, num_instances, 4]) + + # Get ymin, xmin, ymax, xmax bounding box coordinates, tiled per keypoint. + # Shape [batch_size, num_instances, num_keypoints]. + bboxes_tiled = tf.tile(tf.expand_dims(bboxes, 2), [1, 1, num_keypoints, 1]) + ymin, xmin, ymax, xmax = tf.unstack(bboxes_tiled, axis=3) + + # Produce a mask that indicates whether the original regressed keypoint + # should be used instead of a candidate keypoint. + # Shape [batch_size, num_instances, num_keypoints]. + search_radius = ( + tf.math.maximum(ymax - ymin, xmax - xmin) * candidate_search_scale) + mask = (tf.cast(nearby_candidate_coords[:, :, :, 0] < ymin, tf.int32) + + tf.cast(nearby_candidate_coords[:, :, :, 0] > ymax, tf.int32) + + tf.cast(nearby_candidate_coords[:, :, :, 1] < xmin, tf.int32) + + tf.cast(nearby_candidate_coords[:, :, :, 1] > xmax, tf.int32) + + # Filter out the chosen candidate with score lower than unmatched + # keypoint score. + tf.cast(nearby_candidate_scores < + keypoint_score_threshold, tf.int32) + + tf.cast(min_distances > search_radius, tf.int32)) mask = mask > 0 # Create refined keypoints where candidate keypoints replace original @@ -625,7 +1054,12 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores, unmatched_keypoint_score * tf.ones_like(nearby_candidate_scores), nearby_candidate_scores) - return refined_keypoints, refined_scores + refined_depths = None + if nearby_candidate_depths is not None: + refined_depths = tf.where(mask, tf.zeros_like(nearby_candidate_depths), + nearby_candidate_depths) + + return refined_keypoints, refined_scores, refined_depths def _pad_to_full_keypoint_dim(keypoint_coords, keypoint_scores, keypoint_inds, @@ -706,8 +1140,10 @@ def _pad_to_full_instance_dim(keypoint_coords, keypoint_scores, instance_inds, return keypoint_coords_padded, keypoint_scores_padded -def _gather_candidates_at_indices(keypoint_candidates, keypoint_scores, - indices): +def _gather_candidates_at_indices(keypoint_candidates, + keypoint_scores, + indices, + keypoint_depth_candidates=None): """Gathers keypoint candidate coordinates and scores at indices. Args: @@ -717,31 +1153,72 @@ def _gather_candidates_at_indices(keypoint_candidates, keypoint_scores, num_keypoints] with keypoint scores. indices: an integer tensor of shape [batch_size, num_indices, num_keypoints] with indices. + keypoint_depth_candidates: (optional) a float tensor of shape [batch_size, + max_candidates, num_keypoints] with keypoint depths. Returns: A tuple with gathered_keypoint_candidates: a float tensor of shape [batch_size, num_indices, num_keypoints, 2] with gathered coordinates. gathered_keypoint_scores: a float tensor of shape [batch_size, - num_indices, num_keypoints, 2]. + num_indices, num_keypoints]. + gathered_keypoint_depths: a float tensor of shape [batch_size, + num_indices, num_keypoints]. Return None if the input + keypoint_depth_candidates is None. """ + batch_size, num_indices, num_keypoints = _get_shape(indices, 3) + # Transpose tensors so that all batch dimensions are up front. keypoint_candidates_transposed = tf.transpose(keypoint_candidates, [0, 2, 1, 3]) keypoint_scores_transposed = tf.transpose(keypoint_scores, [0, 2, 1]) - nearby_candidate_inds_transposed = tf.transpose(indices, - [0, 2, 1]) - nearby_candidate_coords_tranposed = tf.gather( - keypoint_candidates_transposed, nearby_candidate_inds_transposed, - batch_dims=2) - nearby_candidate_scores_transposed = tf.gather( - keypoint_scores_transposed, nearby_candidate_inds_transposed, - batch_dims=2) - gathered_keypoint_candidates = tf.transpose(nearby_candidate_coords_tranposed, - [0, 2, 1, 3]) + nearby_candidate_inds_transposed = tf.transpose(indices, [0, 2, 1]) + + # TF Lite does not support tf.gather with batch_dims > 0, so we need to use + # tf_gather_nd instead and here we prepare the indices for that. + combined_indices = tf.stack([ + _multi_range( + batch_size, + value_repetitions=num_keypoints * num_indices, + dtype=tf.int64), + _multi_range( + num_keypoints, + value_repetitions=num_indices, + range_repetitions=batch_size, + dtype=tf.int64), + tf.reshape(nearby_candidate_inds_transposed, [-1]) + ], axis=1) + + nearby_candidate_coords_transposed = tf.gather_nd( + keypoint_candidates_transposed, combined_indices) + nearby_candidate_coords_transposed = tf.reshape( + nearby_candidate_coords_transposed, + [batch_size, num_keypoints, num_indices, -1]) + + nearby_candidate_scores_transposed = tf.gather_nd(keypoint_scores_transposed, + combined_indices) + nearby_candidate_scores_transposed = tf.reshape( + nearby_candidate_scores_transposed, + [batch_size, num_keypoints, num_indices]) + + gathered_keypoint_candidates = tf.transpose( + nearby_candidate_coords_transposed, [0, 2, 1, 3]) gathered_keypoint_scores = tf.transpose(nearby_candidate_scores_transposed, [0, 2, 1]) - return gathered_keypoint_candidates, gathered_keypoint_scores + + gathered_keypoint_depths = None + if keypoint_depth_candidates is not None: + keypoint_depths_transposed = tf.transpose(keypoint_depth_candidates, + [0, 2, 1]) + nearby_candidate_depths_transposed = tf.gather_nd( + keypoint_depths_transposed, combined_indices) + nearby_candidate_depths_transposed = tf.reshape( + nearby_candidate_depths_transposed, + [batch_size, num_keypoints, num_indices]) + gathered_keypoint_depths = tf.transpose(nearby_candidate_depths_transposed, + [0, 2, 1]) + return (gathered_keypoint_candidates, gathered_keypoint_scores, + gathered_keypoint_depths) def flattened_indices_from_row_col_indices(row_indices, col_indices, num_cols): @@ -768,13 +1245,45 @@ def row_col_channel_indices_from_flattened_indices(indices, num_cols, indices. """ + # Be careful with this function when running a model in float16 precision + # (e.g. TF.js with WebGL) because the array indices may not be represented + # accurately if they are too large, resulting in incorrect channel indices. + # See: + # https://en.wikipedia.org/wiki/Half-precision_floating-point_format#Precision_limitations_on_integer_values + # + # Avoid using mod operator to make the ops more easy to be compatible with + # different environments, e.g. WASM. row_indices = (indices // num_channels) // num_cols - col_indices = (indices // num_channels) % num_cols - channel_indices = indices % num_channels + col_indices = (indices // num_channels) - row_indices * num_cols + channel_indices_temp = indices // num_channels + channel_indices = indices - channel_indices_temp * num_channels return row_indices, col_indices, channel_indices +def row_col_indices_from_flattened_indices(indices, num_cols): + """Computes row and column indices from flattened indices. + + Args: + indices: An integer tensor of any shape holding the indices in the flattened + space. + num_cols: Number of columns in the image (width). + + Returns: + row_indices: The row indices corresponding to each of the input indices. + Same shape as indices. + col_indices: The column indices corresponding to each of the input indices. + Same shape as indices. + + """ + # Avoid using mod operator to make the ops more easy to be compatible with + # different environments, e.g. WASM. + row_indices = indices // num_cols + col_indices = indices - row_indices * num_cols + + return row_indices, col_indices + + def get_valid_anchor_weights_in_flattened_image(true_image_shapes, height, width): """Computes valid anchor weights for an image assuming pixels will be flattened. @@ -833,27 +1342,12 @@ def convert_strided_predictions_to_normalized_boxes(boxes, stride, boxes: A tensor of shape [batch_size, num_boxes, 4] representing the coordinates of the normalized boxes. """ - - def _normalize_boxlist(args): - - boxes, height, width = args - boxes = box_list_ops.scale(boxes, stride, stride) - boxes = box_list_ops.to_normalized_coordinates(boxes, height, width) - boxes = box_list_ops.clip_to_window(boxes, [0., 0., 1., 1.], - filter_nonoverlapping=False) - return boxes - - box_lists = [box_list.BoxList(boxes) for boxes in tf.unstack(boxes, axis=0)] - true_heights, true_widths, _ = tf.unstack(true_image_shapes, axis=1) - - true_heights_list = tf.unstack(true_heights, axis=0) - true_widths_list = tf.unstack(true_widths, axis=0) - - box_lists = list(map(_normalize_boxlist, - zip(box_lists, true_heights_list, true_widths_list))) - boxes = tf.stack([box_list_instance.get() for - box_list_instance in box_lists], axis=0) - + # Note: We use tf ops instead of functions in box_list_ops to make this + # function compatible with dynamic batch size. + boxes = boxes * stride + true_image_shapes = tf.tile(true_image_shapes[:, tf.newaxis, :2], [1, 1, 2]) + boxes = boxes / tf.cast(true_image_shapes, tf.float32) + boxes = tf.clip_by_value(boxes, 0.0, 1.0) return boxes @@ -915,9 +1409,16 @@ def convert_strided_predictions_to_normalized_keypoints( def clip_to_window(inputs): keypoints, window = inputs return keypoint_ops.clip_to_window(keypoints, window) + + # Specify the TensorSpec explicitly in the tf.map_fn to make it tf.lite + # compatible. + kpts_dims = _get_shape(keypoint_coords_normalized, 4) + output_spec = tf.TensorSpec( + shape=[kpts_dims[1], kpts_dims[2], kpts_dims[3]], dtype=tf.float32) keypoint_coords_normalized = tf.map_fn( clip_to_window, (keypoint_coords_normalized, batch_window), - dtype=tf.float32, back_prop=False) + dtype=tf.float32, back_prop=False, + fn_output_signature=output_spec) keypoint_scores = tf.where(valid_indices, keypoint_scores, tf.zeros_like(keypoint_scores)) return keypoint_coords_normalized, keypoint_scores @@ -1151,6 +1652,33 @@ def gather_surface_coords_for_parts(surface_coords_cropped, return tf.reshape(vu_coords_flattened, [max_detections, height, width, 2]) +def predicted_embeddings_at_object_centers(embedding_predictions, + y_indices, x_indices): + """Returns the predicted embeddings at specified object centers. + + Args: + embedding_predictions: A float tensor of shape [batch_size, height, width, + reid_embed_size] holding predicted embeddings. + y_indices: A [batch, num_instances] int tensor holding y indices for object + centers. These indices correspond to locations in the output feature map. + x_indices: A [batch, num_instances] int tensor holding x indices for object + centers. These indices correspond to locations in the output feature map. + + Returns: + A float tensor of shape [batch_size, num_objects, reid_embed_size] where + predicted embeddings are gathered at the provided locations. + """ + batch_size, _, width, _ = _get_shape(embedding_predictions, 4) + flattened_indices = flattened_indices_from_row_col_indices( + y_indices, x_indices, width) + _, num_instances = _get_shape(flattened_indices, 2) + embeddings_flat = _flatten_spatial_dimensions(embedding_predictions) + embeddings = tf.gather(embeddings_flat, flattened_indices, batch_dims=1) + embeddings = tf.reshape(embeddings, [batch_size, num_instances, -1]) + + return embeddings + + class ObjectDetectionParams( collections.namedtuple('ObjectDetectionParams', [ 'localization_loss', 'scale_loss_weight', 'offset_loss_weight', @@ -1201,7 +1729,13 @@ class KeypointEstimationParams( 'heatmap_bias_init', 'num_candidates_per_keypoint', 'task_loss_weight', 'peak_max_pool_kernel_size', 'unmatched_keypoint_score', 'box_scale', 'candidate_search_scale', 'candidate_ranking_mode', - 'offset_peak_radius', 'per_keypoint_offset' + 'offset_peak_radius', 'per_keypoint_offset', 'predict_depth', + 'per_keypoint_depth', 'keypoint_depth_loss_weight', + 'score_distance_offset', 'clip_out_of_frame_keypoints', + 'rescore_instances', 'heatmap_head_num_filters', + 'heatmap_head_kernel_sizes', 'offset_head_num_filters', + 'offset_head_kernel_sizes', 'regress_head_num_filters', + 'regress_head_kernel_sizes' ])): """Namedtuple to host object detection related parameters. @@ -1234,7 +1768,19 @@ class KeypointEstimationParams( candidate_search_scale=0.3, candidate_ranking_mode='min_distance', offset_peak_radius=0, - per_keypoint_offset=False): + per_keypoint_offset=False, + predict_depth=False, + per_keypoint_depth=False, + keypoint_depth_loss_weight=1.0, + score_distance_offset=1e-6, + clip_out_of_frame_keypoints=False, + rescore_instances=False, + heatmap_head_num_filters=(256), + heatmap_head_kernel_sizes=(3), + offset_head_num_filters=(256), + offset_head_kernel_sizes=(3), + regress_head_num_filters=(256), + regress_head_kernel_sizes=(3)): """Constructor with default values for KeypointEstimationParams. Args: @@ -1298,6 +1844,34 @@ class KeypointEstimationParams( original paper). If set True, the output offset target has the shape [batch_size, out_height, out_width, 2 * num_keypoints] (recommended when the offset_peak_radius is not zero). + predict_depth: A bool indicates whether to predict the depth of each + keypoints. + per_keypoint_depth: A bool indicates whether the model predicts the depth + of each keypoints in independent channels. Similar to + per_keypoint_offset but for the keypoint depth. + keypoint_depth_loss_weight: The weight of the keypoint depth loss. + score_distance_offset: The distance offset to apply in the denominator + when candidate_ranking_mode is 'score_distance_ratio'. The metric to + maximize in this scenario is score / (distance + score_distance_offset). + Larger values of score_distance_offset make the keypoint score gain more + relative importance. + clip_out_of_frame_keypoints: Whether keypoints outside the image frame + should be clipped back to the image boundary. If True, the keypoints + that are clipped have scores set to 0.0. + rescore_instances: Whether to rescore instances based on a combination of + detection score and keypoint scores. + heatmap_head_num_filters: filter numbers of the convolutional layers used + by the keypoint heatmap prediction head. + heatmap_head_kernel_sizes: kernel size of the convolutional layers used + by the keypoint heatmap prediction head. + offset_head_num_filters: filter numbers of the convolutional layers used + by the keypoint offset prediction head. + offset_head_kernel_sizes: kernel size of the convolutional layers used + by the keypoint offset prediction head. + regress_head_num_filters: filter numbers of the convolutional layers used + by the keypoint regression prediction head. + regress_head_kernel_sizes: kernel size of the convolutional layers used + by the keypoint regression prediction head. Returns: An initialized KeypointEstimationParams namedtuple. @@ -1310,13 +1884,20 @@ class KeypointEstimationParams( heatmap_bias_init, num_candidates_per_keypoint, task_loss_weight, peak_max_pool_kernel_size, unmatched_keypoint_score, box_scale, candidate_search_scale, candidate_ranking_mode, offset_peak_radius, - per_keypoint_offset) + per_keypoint_offset, predict_depth, per_keypoint_depth, + keypoint_depth_loss_weight, score_distance_offset, + clip_out_of_frame_keypoints, rescore_instances, + heatmap_head_num_filters, heatmap_head_kernel_sizes, + offset_head_num_filters, offset_head_kernel_sizes, + regress_head_num_filters, regress_head_kernel_sizes) class ObjectCenterParams( collections.namedtuple('ObjectCenterParams', [ 'classification_loss', 'object_center_loss_weight', 'heatmap_bias_init', - 'min_box_overlap_iou', 'max_box_predictions', 'use_only_known_classes' + 'min_box_overlap_iou', 'max_box_predictions', 'use_labeled_classes', + 'keypoint_weights_for_center', 'center_head_num_filters', + 'center_head_kernel_sizes' ])): """Namedtuple to store object center prediction related parameters.""" @@ -1328,7 +1909,10 @@ class ObjectCenterParams( heatmap_bias_init=-2.19, min_box_overlap_iou=0.7, max_box_predictions=100, - use_labeled_classes=False): + use_labeled_classes=False, + keypoint_weights_for_center=None, + center_head_num_filters=(256), + center_head_kernel_sizes=(3)): """Constructor with default values for ObjectCenterParams. Args: @@ -1343,7 +1927,16 @@ class ObjectCenterParams( computing the class specific center heatmaps. max_box_predictions: int, the maximum number of boxes to predict. use_labeled_classes: boolean, compute the loss only labeled classes. - + keypoint_weights_for_center: (optional) The keypoint weights used for + calculating the location of object center. If provided, the number of + weights need to be the same as the number of keypoints. The object + center is calculated by the weighted mean of the keypoint locations. If + not provided, the object center is determined by the center of the + bounding box (default behavior). + center_head_num_filters: filter numbers of the convolutional layers used + by the object center prediction head. + center_head_kernel_sizes: kernel size of the convolutional layers used + by the object center prediction head. Returns: An initialized ObjectCenterParams namedtuple. """ @@ -1351,7 +1944,8 @@ class ObjectCenterParams( cls).__new__(cls, classification_loss, object_center_loss_weight, heatmap_bias_init, min_box_overlap_iou, max_box_predictions, - use_labeled_classes) + use_labeled_classes, keypoint_weights_for_center, + center_head_num_filters, center_head_kernel_sizes) class MaskParams( @@ -1451,6 +2045,68 @@ class DensePoseParams( task_loss_weight, upsample_to_input_res, upsample_method, heatmap_bias_init) + +class TrackParams( + collections.namedtuple('TrackParams', [ + 'num_track_ids', 'reid_embed_size', 'num_fc_layers', + 'classification_loss', 'task_loss_weight' + ])): + """Namedtuple to store tracking prediction related parameters.""" + + __slots__ = () + + def __new__(cls, + num_track_ids, + reid_embed_size, + num_fc_layers, + classification_loss, + task_loss_weight=1.0): + """Constructor with default values for TrackParams. + + Args: + num_track_ids: int. The maximum track ID in the dataset. Used for ReID + embedding classification task. + reid_embed_size: int. The embedding size for ReID task. + num_fc_layers: int. The number of (fully-connected, batch-norm, relu) + layers for track ID classification head. + classification_loss: an object_detection.core.losses.Loss object to + compute the loss for the ReID embedding in CenterNet. + task_loss_weight: float, the loss weight for the tracking task. + + Returns: + An initialized TrackParams namedtuple. + """ + return super(TrackParams, + cls).__new__(cls, num_track_ids, reid_embed_size, + num_fc_layers, classification_loss, + task_loss_weight) + + +class TemporalOffsetParams( + collections.namedtuple('TemporalOffsetParams', [ + 'localization_loss', 'task_loss_weight' + ])): + """Namedtuple to store temporal offset related parameters.""" + + __slots__ = () + + def __new__(cls, + localization_loss, + task_loss_weight=1.0): + """Constructor with default values for TrackParams. + + Args: + localization_loss: an object_detection.core.losses.Loss object to + compute the loss for the temporal offset in CenterNet. + task_loss_weight: float, the loss weight for the temporal offset + task. + + Returns: + An initialized TemporalOffsetParams namedtuple. + """ + return super(TemporalOffsetParams, + cls).__new__(cls, localization_loss, task_loss_weight) + # The following constants are used to generate the keys of the # (prediction, loss, target assigner,...) dictionaries used in CenterNetMetaArch # class. @@ -1461,12 +2117,17 @@ BOX_OFFSET = 'box/offset' KEYPOINT_REGRESSION = 'keypoint/regression' KEYPOINT_HEATMAP = 'keypoint/heatmap' KEYPOINT_OFFSET = 'keypoint/offset' +KEYPOINT_DEPTH = 'keypoint/depth' SEGMENTATION_TASK = 'segmentation_task' SEGMENTATION_HEATMAP = 'segmentation/heatmap' DENSEPOSE_TASK = 'densepose_task' DENSEPOSE_HEATMAP = 'densepose/heatmap' DENSEPOSE_REGRESSION = 'densepose/regression' LOSS_KEY_PREFIX = 'Loss' +TRACK_TASK = 'track_task' +TRACK_REID = 'track/reid' +TEMPORALOFFSET_TASK = 'temporal_offset_task' +TEMPORAL_OFFSET = 'track/offset' def get_keypoint_name(task_name, head_name): @@ -1510,7 +2171,13 @@ class CenterNetMetaArch(model.DetectionModel): object_detection_params=None, keypoint_params_dict=None, mask_params=None, - densepose_params=None): + densepose_params=None, + track_params=None, + temporal_offset_params=None, + use_depthwise=False, + compute_heatmap_sparse=False, + non_max_suppression_fn=None, + unit_height_conv=False): """Initializes a CenterNet model. Args: @@ -1542,6 +2209,20 @@ class CenterNetMetaArch(model.DetectionModel): hyper-parameters for DensePose prediction. Please see the class definition for more details. Note that if this is provided, it is expected that `mask_params` is also provided. + track_params: A TrackParams namedtuple. This object + holds the hyper-parameters for tracking. Please see the class + definition for more details. + temporal_offset_params: A TemporalOffsetParams namedtuple. This object + holds the hyper-parameters for offset prediction based tracking. + use_depthwise: If true, all task heads will be constructed using + separable_conv. Otherwise, standard convoltuions will be used. + compute_heatmap_sparse: bool, whether or not to use the sparse version of + the Op that computes the center heatmaps. The sparse version scales + better with number of channels in the heatmap, but in some cases is + known to cause an OOM error. See b/170989061. + non_max_suppression_fn: Optional Non Max Suppression function to apply. + unit_height_conv: If True, Conv2Ds in prediction heads have asymmetric + kernels with height=1. """ assert object_detection_params or keypoint_params_dict # Shorten the name for convenience and better formatting. @@ -1549,6 +2230,7 @@ class CenterNetMetaArch(model.DetectionModel): # The Objects as Points paper attaches loss functions to multiple # (`num_feature_outputs`) feature maps in the the backbone. E.g. # for the hourglass backbone, `num_feature_outputs` is 2. + self._num_classes = num_classes self._feature_extractor = feature_extractor self._num_feature_outputs = feature_extractor.num_feature_outputs self._stride = self._feature_extractor.out_stride @@ -1561,12 +2243,21 @@ class CenterNetMetaArch(model.DetectionModel): raise ValueError('To run DensePose prediction, `mask_params` must also ' 'be supplied.') self._densepose_params = densepose_params + self._track_params = track_params + self._temporal_offset_params = temporal_offset_params + + self._use_depthwise = use_depthwise + self._compute_heatmap_sparse = compute_heatmap_sparse + # subclasses may not implement the unit_height_conv arg, so only provide it + # as a kwarg if it is True. + kwargs = {'unit_height_conv': unit_height_conv} if unit_height_conv else {} # Construct the prediction head nets. self._prediction_head_dict = self._construct_prediction_heads( num_classes, self._num_feature_outputs, - class_prediction_bias_init=self._center_params.heatmap_bias_init) + class_prediction_bias_init=self._center_params.heatmap_bias_init, + **kwargs) # Initialize the target assigners. self._target_assigner_dict = self._initialize_target_assigners( stride=self._stride, @@ -1574,6 +2265,7 @@ class CenterNetMetaArch(model.DetectionModel): # Will be used in VOD single_frame_meta_arch for tensor reshape. self._batched_prediction_tensor_names = [] + self._non_max_suppression_fn = non_max_suppression_fn super(CenterNetMetaArch, self).__init__(num_classes) @@ -1584,8 +2276,26 @@ class CenterNetMetaArch(model.DetectionModel): 'tensor names.') return self._batched_prediction_tensor_names + def _make_prediction_net_list(self, num_feature_outputs, num_out_channels, + kernel_sizes=(3), num_filters=(256), + bias_fill=None, name=None, + unit_height_conv=False): + prediction_net_list = [] + for i in range(num_feature_outputs): + prediction_net_list.append( + make_prediction_net( + num_out_channels, + kernel_sizes=kernel_sizes, + num_filters=num_filters, + bias_fill=bias_fill, + use_depthwise=self._use_depthwise, + name='{}_{}'.format(name, i) if name else name, + unit_height_conv=unit_height_conv)) + return prediction_net_list + def _construct_prediction_heads(self, num_classes, num_feature_outputs, - class_prediction_bias_init): + class_prediction_bias_init, + unit_height_conv=False): """Constructs the prediction heads based on the specific parameters. Args: @@ -1597,62 +2307,126 @@ class CenterNetMetaArch(model.DetectionModel): class_prediction_bias_init: float, the initial value of bias in the convolutional kernel of the class prediction head. If set to None, the bias is initialized with zeros. + unit_height_conv: If True, Conv2Ds have asymmetric kernels with height=1. Returns: A dictionary of keras modules generated by calling make_prediction_net - function. + function. It will also create and set a private member of the class when + learning the tracking task. """ prediction_heads = {} - prediction_heads[OBJECT_CENTER] = [ - make_prediction_net(num_classes, bias_fill=class_prediction_bias_init) - for _ in range(num_feature_outputs) - ] + prediction_heads[OBJECT_CENTER] = self._make_prediction_net_list( + num_feature_outputs, + num_classes, + kernel_sizes=self._center_params.center_head_kernel_sizes, + num_filters=self._center_params.center_head_num_filters, + bias_fill=class_prediction_bias_init, + name='center', + unit_height_conv=unit_height_conv) + if self._od_params is not None: - prediction_heads[BOX_SCALE] = [ - make_prediction_net(NUM_SIZE_CHANNELS) - for _ in range(num_feature_outputs) - ] - prediction_heads[BOX_OFFSET] = [ - make_prediction_net(NUM_OFFSET_CHANNELS) - for _ in range(num_feature_outputs) - ] + prediction_heads[BOX_SCALE] = self._make_prediction_net_list( + num_feature_outputs, NUM_SIZE_CHANNELS, name='box_scale', + unit_height_conv=unit_height_conv) + prediction_heads[BOX_OFFSET] = self._make_prediction_net_list( + num_feature_outputs, NUM_OFFSET_CHANNELS, name='box_offset', + unit_height_conv=unit_height_conv) + if self._kp_params_dict is not None: for task_name, kp_params in self._kp_params_dict.items(): num_keypoints = len(kp_params.keypoint_indices) - prediction_heads[get_keypoint_name(task_name, KEYPOINT_HEATMAP)] = [ - make_prediction_net( - num_keypoints, bias_fill=kp_params.heatmap_bias_init) - for _ in range(num_feature_outputs) - ] - prediction_heads[get_keypoint_name(task_name, KEYPOINT_REGRESSION)] = [ - make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints) - for _ in range(num_feature_outputs) - ] + prediction_heads[get_keypoint_name( + task_name, KEYPOINT_HEATMAP)] = self._make_prediction_net_list( + num_feature_outputs, + num_keypoints, + kernel_sizes=kp_params.heatmap_head_kernel_sizes, + num_filters=kp_params.heatmap_head_num_filters, + bias_fill=kp_params.heatmap_bias_init, + name='kpt_heatmap', + unit_height_conv=unit_height_conv) + prediction_heads[get_keypoint_name( + task_name, KEYPOINT_REGRESSION)] = self._make_prediction_net_list( + num_feature_outputs, + NUM_OFFSET_CHANNELS * num_keypoints, + kernel_sizes=kp_params.regress_head_kernel_sizes, + num_filters=kp_params.regress_head_num_filters, + name='kpt_regress', + unit_height_conv=unit_height_conv) + if kp_params.per_keypoint_offset: - prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [ - make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints) - for _ in range(num_feature_outputs) - ] + prediction_heads[get_keypoint_name( + task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list( + num_feature_outputs, + NUM_OFFSET_CHANNELS * num_keypoints, + kernel_sizes=kp_params.offset_head_kernel_sizes, + num_filters=kp_params.offset_head_num_filters, + name='kpt_offset', + unit_height_conv=unit_height_conv) else: - prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [ - make_prediction_net(NUM_OFFSET_CHANNELS) - for _ in range(num_feature_outputs) - ] + prediction_heads[get_keypoint_name( + task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list( + num_feature_outputs, + NUM_OFFSET_CHANNELS, + kernel_sizes=kp_params.offset_head_kernel_sizes, + num_filters=kp_params.offset_head_num_filters, + name='kpt_offset', + unit_height_conv=unit_height_conv) + + if kp_params.predict_depth: + num_depth_channel = ( + num_keypoints if kp_params.per_keypoint_depth else 1) + prediction_heads[get_keypoint_name( + task_name, KEYPOINT_DEPTH)] = self._make_prediction_net_list( + num_feature_outputs, num_depth_channel, name='kpt_depth', + unit_height_conv=unit_height_conv) + if self._mask_params is not None: - prediction_heads[SEGMENTATION_HEATMAP] = [ - make_prediction_net(num_classes, - bias_fill=self._mask_params.heatmap_bias_init) - for _ in range(num_feature_outputs)] + prediction_heads[SEGMENTATION_HEATMAP] = self._make_prediction_net_list( + num_feature_outputs, + num_classes, + bias_fill=self._mask_params.heatmap_bias_init, + name='seg_heatmap', + unit_height_conv=unit_height_conv) + if self._densepose_params is not None: - prediction_heads[DENSEPOSE_HEATMAP] = [ - make_prediction_net( # pylint: disable=g-complex-comprehension - self._densepose_params.num_parts, - bias_fill=self._densepose_params.heatmap_bias_init) - for _ in range(num_feature_outputs)] - prediction_heads[DENSEPOSE_REGRESSION] = [ - make_prediction_net(2 * self._densepose_params.num_parts) - for _ in range(num_feature_outputs) - ] + prediction_heads[DENSEPOSE_HEATMAP] = self._make_prediction_net_list( + num_feature_outputs, + self._densepose_params.num_parts, + bias_fill=self._densepose_params.heatmap_bias_init, + name='dense_pose_heatmap', + unit_height_conv=unit_height_conv) + prediction_heads[DENSEPOSE_REGRESSION] = self._make_prediction_net_list( + num_feature_outputs, + 2 * self._densepose_params.num_parts, + name='dense_pose_regress', + unit_height_conv=unit_height_conv) + + if self._track_params is not None: + prediction_heads[TRACK_REID] = self._make_prediction_net_list( + num_feature_outputs, + self._track_params.reid_embed_size, + name='track_reid', + unit_height_conv=unit_height_conv) + + # Creates a classification network to train object embeddings by learning + # a projection from embedding space to object track ID space. + self.track_reid_classification_net = tf.keras.Sequential() + for _ in range(self._track_params.num_fc_layers - 1): + self.track_reid_classification_net.add( + tf.keras.layers.Dense(self._track_params.reid_embed_size, + input_shape=( + self._track_params.reid_embed_size,))) + self.track_reid_classification_net.add( + tf.keras.layers.BatchNormalization()) + self.track_reid_classification_net.add(tf.keras.layers.ReLU()) + self.track_reid_classification_net.add( + tf.keras.layers.Dense(self._track_params.num_track_ids, + input_shape=( + self._track_params.reid_embed_size,))) + if self._temporal_offset_params is not None: + prediction_heads[TEMPORAL_OFFSET] = self._make_prediction_net_list( + num_feature_outputs, NUM_OFFSET_CHANNELS, name='temporal_offset', + unit_height_conv=unit_height_conv) return prediction_heads def _initialize_target_assigners(self, stride, min_box_overlap_iou): @@ -1668,9 +2442,31 @@ class CenterNetMetaArch(model.DetectionModel): A dictionary of initialized target assigners for each task. """ target_assigners = {} - target_assigners[OBJECT_CENTER] = ( - cn_assigner.CenterNetCenterHeatmapTargetAssigner( - stride, min_box_overlap_iou)) + keypoint_weights_for_center = ( + self._center_params.keypoint_weights_for_center) + if not keypoint_weights_for_center: + target_assigners[OBJECT_CENTER] = ( + cn_assigner.CenterNetCenterHeatmapTargetAssigner( + stride, min_box_overlap_iou, self._compute_heatmap_sparse)) + self._center_from_keypoints = False + else: + # Determining the object center location by keypoint location is only + # supported when there is exactly one keypoint prediction task and no + # object detection task is specified. + assert len(self._kp_params_dict) == 1 and self._od_params is None + kp_params = next(iter(self._kp_params_dict.values())) + # The number of keypoint_weights_for_center needs to be the same as the + # number of keypoints. + assert len(keypoint_weights_for_center) == len(kp_params.keypoint_indices) + target_assigners[OBJECT_CENTER] = ( + cn_assigner.CenterNetCenterHeatmapTargetAssigner( + stride, + min_box_overlap_iou, + self._compute_heatmap_sparse, + keypoint_class_id=kp_params.class_id, + keypoint_indices=kp_params.keypoint_indices, + keypoint_weights_for_center=keypoint_weights_for_center)) + self._center_from_keypoints = True if self._od_params is not None: target_assigners[DETECTION_TASK] = ( cn_assigner.CenterNetBoxTargetAssigner(stride)) @@ -1683,7 +2479,9 @@ class CenterNetMetaArch(model.DetectionModel): keypoint_indices=kp_params.keypoint_indices, keypoint_std_dev=kp_params.keypoint_std_dev, peak_radius=kp_params.offset_peak_radius, - per_keypoint_offset=kp_params.per_keypoint_offset)) + per_keypoint_offset=kp_params.per_keypoint_offset, + compute_heatmap_sparse=self._compute_heatmap_sparse, + per_keypoint_depth=kp_params.per_keypoint_depth)) if self._mask_params is not None: target_assigners[SEGMENTATION_TASK] = ( cn_assigner.CenterNetMaskTargetAssigner(stride)) @@ -1691,6 +2489,13 @@ class CenterNetMetaArch(model.DetectionModel): dp_stride = 1 if self._densepose_params.upsample_to_input_res else stride target_assigners[DENSEPOSE_TASK] = ( cn_assigner.CenterNetDensePoseTargetAssigner(dp_stride)) + if self._track_params is not None: + target_assigners[TRACK_TASK] = ( + cn_assigner.CenterNetTrackTargetAssigner( + stride, self._track_params.num_track_ids)) + if self._temporal_offset_params is not None: + target_assigners[TEMPORALOFFSET_TASK] = ( + cn_assigner.CenterNetTemporalOffsetTargetAssigner(stride)) return target_assigners @@ -1711,11 +2516,10 @@ class CenterNetMetaArch(model.DetectionModel): Returns: A float scalar tensor representing the object center loss per instance. """ - gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes) gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes) gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights) - if self._center_params.use_only_known_classes: + if self._center_params.use_labeled_classes: gt_labeled_classes_list = self.groundtruth_lists( fields.InputDataFields.groundtruth_labeled_classes) batch_labeled_classes = tf.stack(gt_labeled_classes_list, axis=0) @@ -1727,12 +2531,22 @@ class CenterNetMetaArch(model.DetectionModel): # Convert the groundtruth to targets. assigner = self._target_assigner_dict[OBJECT_CENTER] - heatmap_targets = assigner.assign_center_targets_from_boxes( - height=input_height, - width=input_width, - gt_boxes_list=gt_boxes_list, - gt_classes_list=gt_classes_list, - gt_weights_list=gt_weights_list) + if self._center_from_keypoints: + gt_keypoints_list = self.groundtruth_lists(fields.BoxListFields.keypoints) + heatmap_targets = assigner.assign_center_targets_from_keypoints( + height=input_height, + width=input_width, + gt_classes_list=gt_classes_list, + gt_keypoints_list=gt_keypoints_list, + gt_weights_list=gt_weights_list) + else: + gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes) + heatmap_targets = assigner.assign_center_targets_from_boxes( + height=input_height, + width=input_width, + gt_boxes_list=gt_boxes_list, + gt_classes_list=gt_classes_list, + gt_weights_list=gt_weights_list) flattened_heatmap_targets = _flatten_spatial_dimensions(heatmap_targets) num_boxes = _to_float32(get_num_instances_from_weights(gt_weights_list)) @@ -1847,6 +2661,7 @@ class CenterNetMetaArch(model.DetectionModel): heatmap_key = get_keypoint_name(task_name, KEYPOINT_HEATMAP) offset_key = get_keypoint_name(task_name, KEYPOINT_OFFSET) regression_key = get_keypoint_name(task_name, KEYPOINT_REGRESSION) + depth_key = get_keypoint_name(task_name, KEYPOINT_DEPTH) heatmap_loss = self._compute_kp_heatmap_loss( input_height=input_height, input_width=input_width, @@ -1874,6 +2689,14 @@ class CenterNetMetaArch(model.DetectionModel): kp_params.keypoint_offset_loss_weight * offset_loss) loss_dict[regression_key] = ( kp_params.keypoint_regression_loss_weight * reg_loss) + if kp_params.predict_depth: + depth_loss = self._compute_kp_depth_loss( + input_height=input_height, + input_width=input_width, + task_name=task_name, + depth_predictions=prediction_dict[depth_key], + localization_loss_fn=kp_params.localization_loss) + loss_dict[depth_key] = kp_params.keypoint_depth_loss_weight * depth_loss return loss_dict def _compute_kp_heatmap_loss(self, input_height, input_width, task_name, @@ -2043,6 +2866,65 @@ class CenterNetMetaArch(model.DetectionModel): tf.maximum(tf.reduce_sum(batch_weights), 1.0)) return loss + def _compute_kp_depth_loss(self, input_height, input_width, task_name, + depth_predictions, localization_loss_fn): + """Computes the loss of the keypoint depth estimation. + + Args: + input_height: An integer scalar tensor representing input image height. + input_width: An integer scalar tensor representing input image width. + task_name: A string representing the name of the keypoint task. + depth_predictions: A list of float tensors of shape [batch_size, + out_height, out_width, 1 (or num_keypoints)] representing the prediction + heads of the model for keypoint depth. + localization_loss_fn: An object_detection.core.losses.Loss object to + compute the loss for the keypoint offset predictions in CenterNet. + + Returns: + loss: A float scalar tensor representing the keypoint depth loss + normalized by number of total keypoints. + """ + kp_params = self._kp_params_dict[task_name] + gt_keypoints_list = self.groundtruth_lists(fields.BoxListFields.keypoints) + gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes) + gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights) + gt_keypoint_depths_list = self.groundtruth_lists( + fields.BoxListFields.keypoint_depths) + gt_keypoint_depth_weights_list = self.groundtruth_lists( + fields.BoxListFields.keypoint_depth_weights) + + assigner = self._target_assigner_dict[task_name] + (batch_indices, batch_depths, + batch_weights) = assigner.assign_keypoints_depth_targets( + height=input_height, + width=input_width, + gt_keypoints_list=gt_keypoints_list, + gt_weights_list=gt_weights_list, + gt_classes_list=gt_classes_list, + gt_keypoint_depths_list=gt_keypoint_depths_list, + gt_keypoint_depth_weights_list=gt_keypoint_depth_weights_list) + + # Keypoint offset loss. + loss = 0.0 + for prediction in depth_predictions: + if kp_params.per_keypoint_depth: + prediction = tf.expand_dims(prediction, axis=-1) + selected_depths = cn_assigner.get_batch_predictions_from_indices( + prediction, batch_indices) + # The dimensions passed are not as per the doc string but the loss + # still computes the correct value. + unweighted_loss = localization_loss_fn( + selected_depths, + batch_depths, + weights=tf.expand_dims(tf.ones_like(batch_weights), -1)) + # Apply the weights after the loss function to have full control over it. + loss += batch_weights * tf.squeeze(unweighted_loss, axis=1) + + loss = tf.reduce_sum(loss) / ( + float(len(depth_predictions)) * + tf.maximum(tf.reduce_sum(batch_weights), 1.0)) + return loss + def _compute_segmentation_losses(self, prediction_dict, per_pixel_weights): """Computes all the losses associated with segmentation. @@ -2209,6 +3091,193 @@ class CenterNetMetaArch(model.DetectionModel): num_predictions * num_valid_points) return part_prediction_loss, surface_coord_loss + def _compute_track_losses(self, input_height, input_width, prediction_dict): + """Computes all the losses associated with tracking. + + Args: + input_height: An integer scalar tensor representing input image height. + input_width: An integer scalar tensor representing input image width. + prediction_dict: The dictionary returned from the predict() method. + + Returns: + A dictionary with tracking losses. + """ + object_reid_predictions = prediction_dict[TRACK_REID] + embedding_loss = self._compute_track_embedding_loss( + input_height=input_height, + input_width=input_width, + object_reid_predictions=object_reid_predictions) + losses = { + TRACK_REID: embedding_loss + } + return losses + + def _compute_track_embedding_loss(self, input_height, input_width, + object_reid_predictions): + """Computes the object ReID loss. + + The embedding is trained as a classification task where the target is the + ID of each track among all tracks in the whole dataset. + + Args: + input_height: An integer scalar tensor representing input image height. + input_width: An integer scalar tensor representing input image width. + object_reid_predictions: A list of float tensors of shape [batch_size, + out_height, out_width, reid_embed_size] representing the object + embedding feature maps. + + Returns: + A float scalar tensor representing the object ReID loss per instance. + """ + gt_track_ids_list = self.groundtruth_lists(fields.BoxListFields.track_ids) + gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes) + gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights) + num_boxes = _to_float32(get_num_instances_from_weights(gt_weights_list)) + + # Convert the groundtruth to targets. + assigner = self._target_assigner_dict[TRACK_TASK] + batch_indices, batch_weights, track_targets = assigner.assign_track_targets( + height=input_height, + width=input_width, + gt_track_ids_list=gt_track_ids_list, + gt_boxes_list=gt_boxes_list, + gt_weights_list=gt_weights_list) + batch_weights = tf.expand_dims(batch_weights, -1) + + loss = 0.0 + object_reid_loss = self._track_params.classification_loss + # Loop through each feature output head. + for pred in object_reid_predictions: + embedding_pred = cn_assigner.get_batch_predictions_from_indices( + pred, batch_indices) + + reid_classification = self.track_reid_classification_net(embedding_pred) + + loss += object_reid_loss( + reid_classification, track_targets, weights=batch_weights) + + loss_per_instance = tf.reduce_sum(loss) / ( + float(len(object_reid_predictions)) * num_boxes) + + return loss_per_instance + + def _compute_temporal_offset_loss(self, input_height, + input_width, prediction_dict): + """Computes the temporal offset loss for tracking. + + Args: + input_height: An integer scalar tensor representing input image height. + input_width: An integer scalar tensor representing input image width. + prediction_dict: The dictionary returned from the predict() method. + + Returns: + A dictionary with track/temporal_offset losses. + """ + gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes) + gt_offsets_list = self.groundtruth_lists( + fields.BoxListFields.temporal_offsets) + gt_match_list = self.groundtruth_lists( + fields.BoxListFields.track_match_flags) + gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights) + num_boxes = tf.cast( + get_num_instances_from_weights(gt_weights_list), tf.float32) + + offset_predictions = prediction_dict[TEMPORAL_OFFSET] + num_predictions = float(len(offset_predictions)) + + assigner = self._target_assigner_dict[TEMPORALOFFSET_TASK] + (batch_indices, batch_offset_targets, + batch_weights) = assigner.assign_temporal_offset_targets( + height=input_height, + width=input_width, + gt_boxes_list=gt_boxes_list, + gt_offsets_list=gt_offsets_list, + gt_match_list=gt_match_list, + gt_weights_list=gt_weights_list) + batch_weights = tf.expand_dims(batch_weights, -1) + + offset_loss_fn = self._temporal_offset_params.localization_loss + loss_dict = {} + offset_loss = 0 + for offset_pred in offset_predictions: + offset_pred = cn_assigner.get_batch_predictions_from_indices( + offset_pred, batch_indices) + offset_loss += offset_loss_fn(offset_pred[:, None], + batch_offset_targets[:, None], + weights=batch_weights) + offset_loss = tf.reduce_sum(offset_loss) / (num_predictions * num_boxes) + loss_dict[TEMPORAL_OFFSET] = offset_loss + return loss_dict + + def _should_clip_keypoints(self): + """Returns a boolean indicating whether keypoint clipping should occur. + + If there is only one keypoint task, clipping is controlled by the field + `clip_out_of_frame_keypoints`. If there are multiple keypoint tasks, + clipping logic is defined based on unanimous agreement of keypoint + parameters. If there is any ambiguity, clip_out_of_frame_keypoints is set + to False (default). + """ + kp_params_iterator = iter(self._kp_params_dict.values()) + if len(self._kp_params_dict) == 1: + kp_params = next(kp_params_iterator) + return kp_params.clip_out_of_frame_keypoints + + # Multi-task setting. + kp_params = next(kp_params_iterator) + should_clip = kp_params.clip_out_of_frame_keypoints + for kp_params in kp_params_iterator: + if kp_params.clip_out_of_frame_keypoints != should_clip: + return False + return should_clip + + def _rescore_instances(self, classes, scores, keypoint_scores): + """Rescores instances based on detection and keypoint scores. + + Args: + classes: A [batch, max_detections] int32 tensor with detection classes. + scores: A [batch, max_detections] float32 tensor with detection scores. + keypoint_scores: A [batch, max_detections, total_num_keypoints] float32 + tensor with keypoint scores. + + Returns: + A [batch, max_detections] float32 tensor with possibly altered detection + scores. + """ + batch, max_detections, total_num_keypoints = ( + shape_utils.combined_static_and_dynamic_shape(keypoint_scores)) + classes_tiled = tf.tile(classes[:, :, tf.newaxis], + multiples=[1, 1, total_num_keypoints]) + # TODO(yuhuic): Investigate whether this function will create subgraphs in + # tflite that will cause the model to run slower at inference. + for kp_params in self._kp_params_dict.values(): + if not kp_params.rescore_instances: + continue + class_id = kp_params.class_id + keypoint_indices = kp_params.keypoint_indices + kpt_mask = tf.reduce_sum( + tf.one_hot(keypoint_indices, depth=total_num_keypoints), axis=0) + kpt_mask_tiled = tf.tile(kpt_mask[tf.newaxis, tf.newaxis, :], + multiples=[batch, max_detections, 1]) + class_and_keypoint_mask = tf.math.logical_and( + classes_tiled == class_id, + kpt_mask_tiled == 1.0) + class_and_keypoint_mask_float = tf.cast(class_and_keypoint_mask, + dtype=tf.float32) + visible_keypoints = tf.math.greater(keypoint_scores, 0.0) + num_visible_keypoints = tf.reduce_sum( + class_and_keypoint_mask_float * + tf.cast(visible_keypoints, tf.float32), axis=-1) + num_visible_keypoints = tf.math.maximum(num_visible_keypoints, 1.0) + scores_for_class = (1./num_visible_keypoints) * ( + tf.reduce_sum(class_and_keypoint_mask_float * + scores[:, :, tf.newaxis] * + keypoint_scores, axis=-1)) + scores = tf.where(classes == class_id, + scores_for_class, + scores) + return scores + def preprocess(self, inputs): outputs = shape_utils.resize_images_and_return_shapes( inputs, self._image_resizer_fn) @@ -2303,7 +3372,9 @@ class CenterNetMetaArch(model.DetectionModel): 'Loss/$TASK_NAME/keypoint/regression', (optional) 'Loss/segmentation/heatmap', (optional) 'Loss/densepose/heatmap', (optional) - 'Loss/densepose/regression]' (optional) + 'Loss/densepose/regression', (optional) + 'Loss/track/reid'] (optional) + 'Loss/track/offset'] (optional) scalar tensors corresponding to the losses for different tasks. Note the $TASK_NAME is provided by the KeypointEstimation namedtuple used to differentiate between different keypoint tasks. @@ -2312,8 +3383,8 @@ class CenterNetMetaArch(model.DetectionModel): _, input_height, input_width, _ = _get_shape( prediction_dict['preprocessed_inputs'], 4) - output_height, output_width = (input_height // self._stride, - input_width // self._stride) + output_height, output_width = (tf.maximum(input_height // self._stride, 1), + tf.maximum(input_width // self._stride, 1)) # TODO(vighneshb) Explore whether using floor here is safe. output_true_image_shapes = tf.ceil( @@ -2371,6 +3442,26 @@ class CenterNetMetaArch(model.DetectionModel): densepose_losses[key] * self._densepose_params.task_loss_weight) losses.update(densepose_losses) + if self._track_params is not None: + track_losses = self._compute_track_losses( + input_height=input_height, + input_width=input_width, + prediction_dict=prediction_dict) + for key in track_losses: + track_losses[key] = ( + track_losses[key] * self._track_params.task_loss_weight) + losses.update(track_losses) + + if self._temporal_offset_params is not None: + offset_losses = self._compute_temporal_offset_loss( + input_height=input_height, + input_width=input_width, + prediction_dict=prediction_dict) + for key in offset_losses: + offset_losses[key] = ( + offset_losses[key] * self._temporal_offset_params.task_loss_weight) + losses.update(offset_losses) + # Prepend the LOSS_KEY_PREFIX to the keys in the dictionary such that the # losses will be grouped together in Tensorboard. return dict([('%s/%s' % (LOSS_KEY_PREFIX, key), val) @@ -2394,8 +3485,13 @@ class CenterNetMetaArch(model.DetectionModel): detections: a dictionary containing the following fields detection_boxes - A tensor of shape [batch, max_detections, 4] holding the predicted boxes. + detection_boxes_strided: A tensor of shape [batch_size, num_detections, + 4] holding the predicted boxes in absolute coordinates of the + feature extractor's final layer output. detection_scores: A tensor of shape [batch, max_detections] holding the predicted score for each box. + detection_multiclass_scores: A tensor of shape [batch, max_detection, + num_classes] holding multiclass score for each box. detection_classes: An integer tensor of shape [batch, max_detections] containing the detected class for each box. num_detections: An integer tensor of shape [batch] containing the @@ -2413,6 +3509,8 @@ class CenterNetMetaArch(model.DetectionModel): detection_surface_coords: (Optional) A float32 tensor of shape [batch, max_detection, mask_height, mask_width, 2] with DensePose surface coordinates, in (v, u) format. + detection_embeddings: (Optional) A float tensor of shape [batch, + max_detections, reid_embed_size] containing object embeddings. """ object_center_prob = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1]) # Get x, y and channel indices corresponding to the top indices in the class @@ -2421,35 +3519,84 @@ class CenterNetMetaArch(model.DetectionModel): top_k_feature_map_locations( object_center_prob, max_pool_kernel_size=3, k=self._center_params.max_box_predictions)) + multiclass_scores = tf.gather_nd( + object_center_prob, tf.stack([y_indices, x_indices], -1), batch_dims=1) - boxes_strided, classes, scores, num_detections = ( - prediction_tensors_to_boxes( - detection_scores, y_indices, x_indices, channel_indices, - prediction_dict[BOX_SCALE][-1], prediction_dict[BOX_OFFSET][-1])) - - boxes = convert_strided_predictions_to_normalized_boxes( - boxes_strided, self._stride, true_image_shapes) - + num_detections = tf.reduce_sum(tf.to_int32(detection_scores > 0), axis=1) postprocess_dict = { - fields.DetectionResultFields.detection_boxes: boxes, - fields.DetectionResultFields.detection_scores: scores, - fields.DetectionResultFields.detection_classes: classes, + fields.DetectionResultFields.detection_scores: detection_scores, + fields.DetectionResultFields.detection_multiclass_scores: + multiclass_scores, + fields.DetectionResultFields.detection_classes: channel_indices, fields.DetectionResultFields.num_detections: num_detections, } + boxes_strided = None + if self._od_params: + boxes_strided = ( + prediction_tensors_to_boxes(y_indices, x_indices, + prediction_dict[BOX_SCALE][-1], + prediction_dict[BOX_OFFSET][-1])) + + boxes = convert_strided_predictions_to_normalized_boxes( + boxes_strided, self._stride, true_image_shapes) + + postprocess_dict.update({ + fields.DetectionResultFields.detection_boxes: boxes, + 'detection_boxes_strided': boxes_strided + }) + if self._kp_params_dict: - keypoints, keypoint_scores = self._postprocess_keypoints( - prediction_dict, classes, y_indices, x_indices, - boxes_strided, num_detections) - keypoints, keypoint_scores = ( - convert_strided_predictions_to_normalized_keypoints( - keypoints, keypoint_scores, self._stride, true_image_shapes, - clip_out_of_frame_keypoints=True)) + # If the model is trained to predict only one class of object and its + # keypoint, we fall back to a simpler postprocessing function which uses + # the ops that are supported by tf.lite on GPU. + clip_keypoints = self._should_clip_keypoints() + if len(self._kp_params_dict) == 1 and self._num_classes == 1: + (keypoints, keypoint_scores, + keypoint_depths) = self._postprocess_keypoints_single_class( + prediction_dict, channel_indices, y_indices, x_indices, + boxes_strided, num_detections) + keypoints, keypoint_scores = ( + convert_strided_predictions_to_normalized_keypoints( + keypoints, keypoint_scores, self._stride, true_image_shapes, + clip_out_of_frame_keypoints=clip_keypoints)) + if keypoint_depths is not None: + postprocess_dict.update({ + fields.DetectionResultFields.detection_keypoint_depths: + keypoint_depths + }) + else: + # Multi-class keypoint estimation task does not support depth + # estimation. + assert all([ + not kp_dict.predict_depth + for kp_dict in self._kp_params_dict.values() + ]) + keypoints, keypoint_scores = self._postprocess_keypoints_multi_class( + prediction_dict, channel_indices, y_indices, x_indices, + None, num_detections) + keypoints, keypoint_scores = ( + convert_strided_predictions_to_normalized_keypoints( + keypoints, keypoint_scores, self._stride, true_image_shapes, + clip_out_of_frame_keypoints=clip_keypoints)) + + # Update instance scores based on keypoints. + scores = self._rescore_instances( + channel_indices, detection_scores, keypoint_scores) postprocess_dict.update({ + fields.DetectionResultFields.detection_scores: scores, fields.DetectionResultFields.detection_keypoints: keypoints, fields.DetectionResultFields.detection_keypoint_scores: keypoint_scores }) + if self._od_params is None: + # Still output the box prediction by enclosing the keypoints for + # evaluation purpose. + boxes = keypoint_ops.keypoints_to_enclosing_bounding_boxes( + keypoints, keypoints_axis=2) + postprocess_dict.update({ + fields.DetectionResultFields.detection_boxes: boxes, + }) if self._mask_params: masks = tf.nn.sigmoid(prediction_dict[SEGMENTATION_HEATMAP][-1]) @@ -2461,7 +3608,7 @@ class CenterNetMetaArch(model.DetectionModel): densepose_class_index = self._densepose_params.class_id instance_masks, surface_coords = ( convert_strided_predictions_to_instance_masks( - boxes, classes, masks, true_image_shapes, + boxes, channel_indices, masks, true_image_shapes, densepose_part_heatmap, densepose_surface_coords, stride=self._stride, mask_height=self._mask_params.mask_height, mask_width=self._mask_params.mask_width, @@ -2474,12 +3621,196 @@ class CenterNetMetaArch(model.DetectionModel): fields.DetectionResultFields.detection_surface_coords] = ( surface_coords) + if self._track_params: + embeddings = self._postprocess_embeddings(prediction_dict, + y_indices, x_indices) + postprocess_dict.update({ + fields.DetectionResultFields.detection_embeddings: embeddings + }) + + if self._temporal_offset_params: + offsets = prediction_tensors_to_temporal_offsets( + y_indices, x_indices, + prediction_dict[TEMPORAL_OFFSET][-1]) + postprocess_dict[fields.DetectionResultFields.detection_offsets] = offsets + + if self._non_max_suppression_fn: + boxes = tf.expand_dims( + postprocess_dict.pop(fields.DetectionResultFields.detection_boxes), + axis=-2) + multiclass_scores = postprocess_dict[ + fields.DetectionResultFields.detection_multiclass_scores] + num_valid_boxes = postprocess_dict.pop( + fields.DetectionResultFields.num_detections) + # Remove scores and classes as NMS will compute these form multiclass + # scores. + postprocess_dict.pop(fields.DetectionResultFields.detection_scores) + postprocess_dict.pop(fields.DetectionResultFields.detection_classes) + (nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields, + num_detections) = self._non_max_suppression_fn( + boxes, + multiclass_scores, + additional_fields=postprocess_dict, + num_valid_boxes=num_valid_boxes) + postprocess_dict = nmsed_additional_fields + postprocess_dict[ + fields.DetectionResultFields.detection_boxes] = nmsed_boxes + postprocess_dict[ + fields.DetectionResultFields.detection_scores] = nmsed_scores + postprocess_dict[ + fields.DetectionResultFields.detection_classes] = nmsed_classes + postprocess_dict[ + fields.DetectionResultFields.num_detections] = num_detections + postprocess_dict.update(nmsed_additional_fields) + return postprocess_dict + + def postprocess_single_instance_keypoints( + self, + prediction_dict, + true_image_shapes): + """Postprocess for predicting single instance keypoints. + + This postprocess function is a special case of predicting the keypoint of + a single instance in the image (original CenterNet postprocess supports + multi-instance prediction). Due to the simplification assumption, this + postprocessing function achieves much faster inference time. + Here is a short list of the modifications made in this function: + + 1) Assume the model predicts only single class keypoint. + 2) Assume there is only one instance in the image. If multiple instances + appear in the image, the model tends to predict the one that is closer + to the image center (the other ones are considered as background and + are rejected by the model). + 3) Avoid using top_k ops in the postprocessing logics since it is slower + than using argmax. + 4) The predictions other than the keypoints are ignored, e.g. boxes. + 5) The input batch size is assumed to be 1. + + Args: + prediction_dict: a dictionary holding predicted tensors from "predict" + function. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is of + the form [height, width, channels] indicating the shapes of true images + in the resized images, as resized images can be padded with zeros. + + Returns: + detections: a dictionary containing the following fields + detection_keypoints: A float tensor of shape + [1, 1, num_keypoints, 2] with normalized keypoints. Any invalid + keypoints have their coordinates and scores set to 0.0. + detection_keypoint_scores: A float tensor of shape + [1, 1, num_keypoints] with scores for each keypoint. + """ + # The number of keypoint task is expected to be 1. + assert len(self._kp_params_dict) == 1 + task_name, kp_params = next(iter(self._kp_params_dict.items())) + keypoint_heatmap = tf.nn.sigmoid(prediction_dict[get_keypoint_name( + task_name, KEYPOINT_HEATMAP)][-1]) + keypoint_offset = prediction_dict[get_keypoint_name(task_name, + KEYPOINT_OFFSET)][-1] + keypoint_regression = prediction_dict[get_keypoint_name( + task_name, KEYPOINT_REGRESSION)][-1] + object_heatmap = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1]) + + keypoint_depths = None + if kp_params.predict_depth: + keypoint_depths = prediction_dict[get_keypoint_name( + task_name, KEYPOINT_DEPTH)][-1] + keypoints, keypoint_scores, keypoint_depths = ( + prediction_to_single_instance_keypoints( + object_heatmap=object_heatmap, + keypoint_heatmap=keypoint_heatmap, + keypoint_offset=keypoint_offset, + keypoint_regression=keypoint_regression, + kp_params=kp_params, + keypoint_depths=keypoint_depths)) + + keypoints, keypoint_scores = ( + convert_strided_predictions_to_normalized_keypoints( + keypoints, + keypoint_scores, + self._stride, + true_image_shapes, + clip_out_of_frame_keypoints=False)) + postprocess_dict = { + fields.DetectionResultFields.detection_keypoints: keypoints, + fields.DetectionResultFields.detection_keypoint_scores: keypoint_scores + } + + if kp_params.predict_depth: + postprocess_dict.update({ + fields.DetectionResultFields.detection_keypoint_depths: + keypoint_depths + }) return postprocess_dict - def _postprocess_keypoints(self, prediction_dict, classes, y_indices, - x_indices, boxes, num_detections): + def _postprocess_embeddings(self, prediction_dict, y_indices, x_indices): + """Performs postprocessing on embedding predictions. + + Args: + prediction_dict: a dictionary holding predicted tensors, returned from the + predict() method. This dictionary should contain embedding prediction + feature maps for tracking task. + y_indices: A [batch_size, max_detections] int tensor with y indices for + all object centers. + x_indices: A [batch_size, max_detections] int tensor with x indices for + all object centers. + + Returns: + embeddings: A [batch_size, max_detection, reid_embed_size] float32 + tensor with L2 normalized embeddings extracted from detection box + centers. + """ + embedding_predictions = prediction_dict[TRACK_REID][-1] + embeddings = predicted_embeddings_at_object_centers( + embedding_predictions, y_indices, x_indices) + embeddings, _ = tf.linalg.normalize(embeddings, axis=-1) + + return embeddings + + def _scatter_keypoints_to_batch(self, num_ind, kpt_coords_for_example, + kpt_scores_for_example, + instance_inds_for_example, max_detections, + total_num_keypoints): + """Helper function to convert scattered keypoints into batch.""" + def left_fn(kpt_coords_for_example, kpt_scores_for_example, + instance_inds_for_example): + # Scatter into tensor where instances align with original detection + # instances. New shape of keypoint coordinates and scores are + # [1, max_detections, num_total_keypoints, 2] and + # [1, max_detections, num_total_keypoints], respectively. + return _pad_to_full_instance_dim( + kpt_coords_for_example, kpt_scores_for_example, + instance_inds_for_example, + self._center_params.max_box_predictions) + + def right_fn(): + kpt_coords_for_example_all_det = tf.zeros( + [1, max_detections, total_num_keypoints, 2], dtype=tf.float32) + kpt_scores_for_example_all_det = tf.zeros( + [1, max_detections, total_num_keypoints], dtype=tf.float32) + return (kpt_coords_for_example_all_det, + kpt_scores_for_example_all_det) + + left_fn = functools.partial(left_fn, kpt_coords_for_example, + kpt_scores_for_example, + instance_inds_for_example) + + # Use dimension values instead of tf.size for tf.lite compatibility. + return tf.cond(num_ind[0] > 0, left_fn, right_fn) + + def _postprocess_keypoints_multi_class(self, prediction_dict, classes, + y_indices, x_indices, boxes, + num_detections): """Performs postprocessing on keypoint predictions. + This is the most general keypoint postprocessing function which supports + multiple keypoint tasks (e.g. human and dog keypoints) and multiple object + detection classes. Note that it is the most expensive postprocessing logics + and is currently not tf.lite/tf.js compatible. See + _postprocess_keypoints_single_class if you plan to export the model in more + portable format. + Args: prediction_dict: a dictionary holding predicted tensors, returned from the predict() method. This dictionary should contain keypoint prediction @@ -2504,7 +3835,7 @@ class CenterNetMetaArch(model.DetectionModel): """ total_num_keypoints = sum(len(kp_dict.keypoint_indices) for kp_dict in self._kp_params_dict.values()) - batch_size, max_detections, _ = _get_shape(boxes, 3) + batch_size, max_detections = _get_shape(classes, 2) kpt_coords_for_example_list = [] kpt_scores_for_example_list = [] for ex_ind in range(batch_size): @@ -2520,29 +3851,39 @@ class CenterNetMetaArch(model.DetectionModel): get_keypoint_name(task_name, KEYPOINT_REGRESSION)][-1] instance_inds = self._get_instance_indices( classes, num_detections, ex_ind, kp_params.class_id) + num_ind = _get_shape(instance_inds, 1) - def true_fn( - keypoint_heatmap, keypoint_offsets, keypoint_regression, - classes, y_indices, x_indices, boxes, instance_inds, - ex_ind, kp_params): + def true_fn(keypoint_heatmap, keypoint_offsets, keypoint_regression, + classes, y_indices, x_indices, boxes, instance_inds, ex_ind, + kp_params): """Logics to execute when instance_inds is not an empty set.""" + # Gather the feature map locations corresponding to the object class. + y_indices_for_kpt_class = tf.gather(y_indices, instance_inds, axis=1) + x_indices_for_kpt_class = tf.gather(x_indices, instance_inds, axis=1) + if boxes is None: + boxes_for_kpt_class = None + else: + boxes_for_kpt_class = tf.gather(boxes, instance_inds, axis=1) + # Postprocess keypoints and scores for class and single image. Shapes # are [1, num_instances_i, num_keypoints_i, 2] and # [1, num_instances_i, num_keypoints_i], respectively. Note that # num_instances_i and num_keypoints_i refers to the number of # instances and keypoints for class i, respectively. - kpt_coords_for_class, kpt_scores_for_class = ( + (kpt_coords_for_class, kpt_scores_for_class, _) = ( self._postprocess_keypoints_for_class_and_image( keypoint_heatmap, keypoint_offsets, keypoint_regression, - classes, y_indices, x_indices, boxes, instance_inds, - ex_ind, kp_params)) + classes, y_indices_for_kpt_class, x_indices_for_kpt_class, + boxes_for_kpt_class, ex_ind, kp_params)) + # Expand keypoint dimension (with padding) so that coordinates and # scores have shape [1, num_instances_i, num_total_keypoints, 2] and # [1, num_instances_i, num_total_keypoints], respectively. kpts_coords_for_class_padded, kpt_scores_for_class_padded = ( - _pad_to_full_keypoint_dim( - kpt_coords_for_class, kpt_scores_for_class, - kp_params.keypoint_indices, total_num_keypoints)) + _pad_to_full_keypoint_dim(kpt_coords_for_class, + kpt_scores_for_class, + kp_params.keypoint_indices, + total_num_keypoints)) return kpts_coords_for_class_padded, kpt_scores_for_class_padded def false_fn(): @@ -2554,7 +3895,8 @@ class CenterNetMetaArch(model.DetectionModel): true_fn, keypoint_heatmap, keypoint_offsets, keypoint_regression, classes, y_indices, x_indices, boxes, instance_inds, ex_ind, kp_params) - results = tf.cond(tf.size(instance_inds) > 0, true_fn, false_fn) + # Use dimension values instead of tf.size for tf.lite compatibility. + results = tf.cond(num_ind[0] > 0, true_fn, false_fn) kpt_coords_for_class_list.append(results[0]) kpt_scores_for_class_list.append(results[1]) @@ -2566,24 +3908,13 @@ class CenterNetMetaArch(model.DetectionModel): instance_inds_for_example = tf.concat(instance_inds_for_class_list, axis=0) - if tf.size(instance_inds_for_example) > 0: - # Scatter into tensor where instances align with original detection - # instances. New shape of keypoint coordinates and scores are - # [1, max_detections, num_total_keypoints, 2] and - # [1, max_detections, num_total_keypoints], respectively. - kpt_coords_for_example_all_det, kpt_scores_for_example_all_det = ( - _pad_to_full_instance_dim( - kpt_coords_for_example, kpt_scores_for_example, - instance_inds_for_example, - self._center_params.max_box_predictions)) - else: - kpt_coords_for_example_all_det = tf.zeros( - [1, max_detections, total_num_keypoints, 2], dtype=tf.float32) - kpt_scores_for_example_all_det = tf.zeros( - [1, max_detections, total_num_keypoints], dtype=tf.float32) + (kpt_coords_for_example_all_det, + kpt_scores_for_example_all_det) = self._scatter_keypoints_to_batch( + num_ind, kpt_coords_for_example, kpt_scores_for_example, + instance_inds_for_example, max_detections, total_num_keypoints) - kpt_coords_for_example_list.append(kpt_coords_for_example_all_det) - kpt_scores_for_example_list.append(kpt_scores_for_example_all_det) + kpt_coords_for_example_list.append(kpt_coords_for_example_all_det) + kpt_scores_for_example_list.append(kpt_scores_for_example_all_det) # Concatenate all keypoints and scores from all examples in the batch. # Shapes are [batch_size, max_detections, num_total_keypoints, 2] and @@ -2593,6 +3924,91 @@ class CenterNetMetaArch(model.DetectionModel): return keypoints, keypoint_scores + def _postprocess_keypoints_single_class(self, prediction_dict, classes, + y_indices, x_indices, boxes, + num_detections): + """Performs postprocessing on keypoint predictions (single class only). + + This function handles the special case of keypoint task that the model + predicts only one class of the bounding box/keypoint (e.g. person). By the + assumption, the function uses only tf.lite supported ops and should run + faster. + + Args: + prediction_dict: a dictionary holding predicted tensors, returned from the + predict() method. This dictionary should contain keypoint prediction + feature maps for each keypoint task. + classes: A [batch_size, max_detections] int tensor with class indices for + all detected objects. + y_indices: A [batch_size, max_detections] int tensor with y indices for + all object centers. + x_indices: A [batch_size, max_detections] int tensor with x indices for + all object centers. + boxes: A [batch_size, max_detections, 4] float32 tensor with bounding + boxes in (un-normalized) output space. + num_detections: A [batch_size] int tensor with the number of valid + detections for each image. + + Returns: + A tuple of + keypoints: a [batch_size, max_detection, num_total_keypoints, 2] float32 + tensor with keypoints in the output (strided) coordinate frame. + keypoint_scores: a [batch_size, max_detections, num_total_keypoints] + float32 tensor with keypoint scores. + """ + # This function only works when there is only one keypoint task and the + # number of classes equal to one. For more general use cases, please use + # _postprocess_keypoints instead. + assert len(self._kp_params_dict) == 1 and self._num_classes == 1 + task_name, kp_params = next(iter(self._kp_params_dict.items())) + keypoint_heatmap = prediction_dict[ + get_keypoint_name(task_name, KEYPOINT_HEATMAP)][-1] + keypoint_offsets = prediction_dict[ + get_keypoint_name(task_name, KEYPOINT_OFFSET)][-1] + keypoint_regression = prediction_dict[ + get_keypoint_name(task_name, KEYPOINT_REGRESSION)][-1] + keypoint_depth_predictions = None + if kp_params.predict_depth: + keypoint_depth_predictions = prediction_dict[get_keypoint_name( + task_name, KEYPOINT_DEPTH)][-1] + + batch_size, _ = _get_shape(classes, 2) + kpt_coords_for_example_list = [] + kpt_scores_for_example_list = [] + kpt_depths_for_example_list = [] + for ex_ind in range(batch_size): + # Postprocess keypoints and scores for class and single image. Shapes + # are [1, max_detections, num_keypoints, 2] and + # [1, max_detections, num_keypoints], respectively. + (kpt_coords_for_class, kpt_scores_for_class, kpt_depths_for_class) = ( + self._postprocess_keypoints_for_class_and_image( + keypoint_heatmap, + keypoint_offsets, + keypoint_regression, + classes, + y_indices, + x_indices, + boxes, + ex_ind, + kp_params, + keypoint_depth_predictions=keypoint_depth_predictions)) + + kpt_coords_for_example_list.append(kpt_coords_for_class) + kpt_scores_for_example_list.append(kpt_scores_for_class) + kpt_depths_for_example_list.append(kpt_depths_for_class) + + # Concatenate all keypoints and scores from all examples in the batch. + # Shapes are [batch_size, max_detections, num_keypoints, 2] and + # [batch_size, max_detections, num_keypoints], respectively. + keypoints = tf.concat(kpt_coords_for_example_list, axis=0) + keypoint_scores = tf.concat(kpt_scores_for_example_list, axis=0) + + keypoint_depths = None + if kp_params.predict_depth: + keypoint_depths = tf.concat(kpt_depths_for_example_list, axis=0) + + return keypoints, keypoint_scores, keypoint_depths + def _get_instance_indices(self, classes, num_detections, batch_index, class_id): """Gets the instance indices that match the target class ID. @@ -2606,7 +4022,7 @@ class CenterNetMetaArch(model.DetectionModel): class_id: Class id Returns: - instance_inds: A [num_instances] int tensor where each element indicates + instance_inds: A [num_instances] int32 tensor where each element indicates the instance location within the `classes` tensor. This is useful to associate the refined keypoints with the original detections (i.e. boxes) @@ -2615,26 +4031,29 @@ class CenterNetMetaArch(model.DetectionModel): _, max_detections = shape_utils.combined_static_and_dynamic_shape( classes) # Get the detection indices corresponding to the target class. + # Call tf.math.equal with matched tensor shape to make it tf.lite + # compatible. valid_detections_with_kpt_class = tf.math.logical_and( tf.range(max_detections) < num_detections[batch_index], - classes[0] == class_id) + tf.math.equal(classes[0], tf.fill(classes[0].shape, class_id))) instance_inds = tf.where(valid_detections_with_kpt_class)[:, 0] - return instance_inds + # Cast the indices tensor to int32 for tf.lite compatibility. + return tf.cast(instance_inds, tf.int32) def _postprocess_keypoints_for_class_and_image( - self, keypoint_heatmap, keypoint_offsets, keypoint_regression, classes, - y_indices, x_indices, boxes, indices_with_kpt_class, batch_index, - kp_params): + self, + keypoint_heatmap, + keypoint_offsets, + keypoint_regression, + classes, + y_indices, + x_indices, + boxes, + batch_index, + kp_params, + keypoint_depth_predictions=None): """Postprocess keypoints for a single image and class. - This function performs the following postprocessing operations on a single - image and single keypoint class: - - Converts keypoints scores to range [0, 1] with sigmoid. - - Determines the detections that correspond to the specified keypoint class. - - Gathers the regressed keypoints at the detection (i.e. box) centers. - - Gathers keypoint candidates from the keypoint heatmaps. - - Snaps regressed keypoints to nearby keypoint candidates. - Args: keypoint_heatmap: A [batch_size, height, width, num_keypoints] float32 tensor with keypoint heatmaps. @@ -2650,13 +4069,11 @@ class CenterNetMetaArch(model.DetectionModel): all object centers. boxes: A [batch_size, max_detections, 4] float32 tensor with detected boxes in the output (strided) frame. - indices_with_kpt_class: A [num_instances] int tensor where each element - indicates the instance location within the `classes` tensor. This is - useful to associate the refined keypoints with the original detections - (i.e. boxes) batch_index: An integer specifying the index for an example in the batch. kp_params: A `KeypointEstimationParams` object with parameters for a single keypoint class. + keypoint_depth_predictions: (optional) A [batch_size, height, width, 1] + float32 tensor representing the keypoint depth prediction. Returns: A tuple of @@ -2667,28 +4084,31 @@ class CenterNetMetaArch(model.DetectionModel): for the specific class. refined_scores: A [1, num_instances, num_keypoints] float32 tensor with keypoint scores. + refined_depths: A [1, num_instances, num_keypoints] float32 tensor with + keypoint depths. Return None if the input keypoint_depth_predictions is + None. """ - keypoint_indices = kp_params.keypoint_indices - num_keypoints = len(keypoint_indices) + num_keypoints = len(kp_params.keypoint_indices) keypoint_heatmap = tf.nn.sigmoid( keypoint_heatmap[batch_index:batch_index+1, ...]) keypoint_offsets = keypoint_offsets[batch_index:batch_index+1, ...] keypoint_regression = keypoint_regression[batch_index:batch_index+1, ...] + keypoint_depths = None + if keypoint_depth_predictions is not None: + keypoint_depths = keypoint_depth_predictions[batch_index:batch_index + 1, + ...] y_indices = y_indices[batch_index:batch_index+1, ...] x_indices = x_indices[batch_index:batch_index+1, ...] - - # Gather the feature map locations corresponding to the object class. - y_indices_for_kpt_class = tf.gather(y_indices, indices_with_kpt_class, - axis=1) - x_indices_for_kpt_class = tf.gather(x_indices, indices_with_kpt_class, - axis=1) - boxes_for_kpt_class = tf.gather(boxes, indices_with_kpt_class, axis=1) + if boxes is None: + boxes_slice = None + else: + boxes_slice = boxes[batch_index:batch_index+1, ...] # Gather the regressed keypoints. Final tensor has shape # [1, num_instances, num_keypoints, 2]. regressed_keypoints_for_objects = regressed_keypoints_at_object_centers( - keypoint_regression, y_indices_for_kpt_class, x_indices_for_kpt_class) + keypoint_regression, y_indices, x_indices) regressed_keypoints_for_objects = tf.reshape( regressed_keypoints_for_objects, [1, -1, num_keypoints, 2]) @@ -2696,26 +4116,36 @@ class CenterNetMetaArch(model.DetectionModel): # The shape of keypoint_candidates and keypoint_scores is: # [1, num_candidates_per_keypoint, num_keypoints, 2] and # [1, num_candidates_per_keypoint, num_keypoints], respectively. - keypoint_candidates, keypoint_scores, num_keypoint_candidates = ( - prediction_tensors_to_keypoint_candidates( - keypoint_heatmap, keypoint_offsets, - keypoint_score_threshold=( - kp_params.keypoint_candidate_score_threshold), - max_pool_kernel_size=kp_params.peak_max_pool_kernel_size, - max_candidates=kp_params.num_candidates_per_keypoint)) + (keypoint_candidates, keypoint_scores, num_keypoint_candidates, + keypoint_depth_candidates) = ( + prediction_tensors_to_keypoint_candidates( + keypoint_heatmap, + keypoint_offsets, + keypoint_score_threshold=( + kp_params.keypoint_candidate_score_threshold), + max_pool_kernel_size=kp_params.peak_max_pool_kernel_size, + max_candidates=kp_params.num_candidates_per_keypoint, + keypoint_depths=keypoint_depths)) # Get the refined keypoints and scores, of shape # [1, num_instances, num_keypoints, 2] and # [1, num_instances, num_keypoints], respectively. - refined_keypoints, refined_scores = refine_keypoints( - regressed_keypoints_for_objects, keypoint_candidates, keypoint_scores, - num_keypoint_candidates, bboxes=boxes_for_kpt_class, + (refined_keypoints, refined_scores, refined_depths) = refine_keypoints( + regressed_keypoints_for_objects, + keypoint_candidates, + keypoint_scores, + num_keypoint_candidates, + bboxes=boxes_slice, unmatched_keypoint_score=kp_params.unmatched_keypoint_score, box_scale=kp_params.box_scale, candidate_search_scale=kp_params.candidate_search_scale, - candidate_ranking_mode=kp_params.candidate_ranking_mode) + candidate_ranking_mode=kp_params.candidate_ranking_mode, + score_distance_offset=kp_params.score_distance_offset, + keypoint_depth_candidates=keypoint_depth_candidates, + keypoint_score_threshold=( + kp_params.keypoint_candidate_score_threshold)) - return refined_keypoints, refined_scores + return refined_keypoints, refined_scores, refined_depths def regularization_losses(self): return [] @@ -2748,25 +4178,38 @@ class CenterNetMetaArch(model.DetectionModel): fine_tune_checkpoint_type: whether to restore from a full detection checkpoint (with compatible variable names) or to restore from a classification checkpoint for initialization prior to training. - Valid values: `detection`, `classification`. Default 'detection'. - 'detection': used when loading in the Hourglass model pre-trained on - other detection task. - 'classification': used when loading in the ResNet model pre-trained on - image classification task. Note that only the image feature encoding - part is loaded but not those upsampling layers. + Valid values: `detection`, `classification`, `fine_tune`. + Default 'detection'. + 'detection': used when loading models pre-trained on other detection + tasks. With this checkpoint type the weights of the feature extractor + are expected under the attribute 'feature_extractor'. + 'classification': used when loading models pre-trained on an image + classification task. Note that only the encoder section of the network + is loaded and not the upsampling layers. With this checkpoint type, + the weights of only the encoder section are expected under the + attribute 'feature_extractor'. 'fine_tune': used when loading the entire CenterNet feature extractor pre-trained on other tasks. The checkpoints saved during CenterNet - model training can be directly loaded using this mode. + model training can be directly loaded using this type. With this + checkpoint type, the weights of the feature extractor are expected + under the attribute 'model._feature_extractor'. + For more details, see the tensorflow section on Loading mechanics. + https://www.tensorflow.org/guide/checkpoint#loading_mechanics Returns: A dict mapping keys to Trackable objects (tf.Module or Checkpoint). """ - if fine_tune_checkpoint_type == 'classification': - return {'feature_extractor': self._feature_extractor.get_base_model()} + supported_types = self._feature_extractor.supported_sub_model_types + supported_types += ['fine_tune'] - elif fine_tune_checkpoint_type == 'detection': - return {'feature_extractor': self._feature_extractor.get_model()} + if fine_tune_checkpoint_type not in supported_types: + message = ('Checkpoint type "{}" not supported for {}. ' + 'Supported types are {}') + raise ValueError( + message.format(fine_tune_checkpoint_type, + self._feature_extractor.__class__.__name__, + supported_types)) elif fine_tune_checkpoint_type == 'fine_tune': feature_extractor_model = tf.train.Checkpoint( @@ -2774,9 +4217,17 @@ class CenterNetMetaArch(model.DetectionModel): return {'model': feature_extractor_model} else: - raise ValueError('Not supported fine tune checkpoint type - {}'.format( - fine_tune_checkpoint_type)) + return {'feature_extractor': self._feature_extractor.get_sub_model( + fine_tune_checkpoint_type)} def updates(self): - raise RuntimeError('This model is intended to be used with model_lib_v2 ' - 'which does not support updates()') + if tf_version.is_tf2(): + raise RuntimeError('This model is intended to be used with model_lib_v2 ' + 'which does not support updates()') + else: + update_ops = [] + slim_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + # Copy the slim ops to avoid modifying the collection + if slim_update_ops: + update_ops.extend(slim_update_ops) + return update_ops diff --git a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py index c2474b53ceff59e824a2e9975afb436ed5de9c5c..d3784ef8419fba6ad1577becec5571be29b222ea 100644 --- a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py +++ b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py @@ -17,27 +17,35 @@ from __future__ import division import functools +import re import unittest + from absl.testing import parameterized import numpy as np import tensorflow.compat.v1 as tf +from object_detection.builders import post_processing_builder +from object_detection.core import keypoint_ops from object_detection.core import losses from object_detection.core import preprocessor from object_detection.core import standard_fields as fields from object_detection.core import target_assigner as cn_assigner from object_detection.meta_architectures import center_net_meta_arch as cnma from object_detection.models import center_net_resnet_feature_extractor +from object_detection.protos import post_processing_pb2 from object_detection.utils import test_case from object_detection.utils import tf_version @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') -class CenterNetMetaArchPredictionHeadTest(test_case.TestCase): +class CenterNetMetaArchPredictionHeadTest( + test_case.TestCase, parameterized.TestCase): """Test CenterNet meta architecture prediction head.""" - def test_prediction_head(self): - head = cnma.make_prediction_net(num_out_channels=7) + @parameterized.parameters([True, False]) + def test_prediction_head(self, use_depthwise): + head = cnma.make_prediction_net(num_out_channels=7, + use_depthwise=use_depthwise) output = head(np.zeros((4, 128, 128, 8))) self.assertEqual((4, 128, 128, 7), output.shape) @@ -47,7 +55,7 @@ class CenterNetMetaArchPredictionHeadTest(test_case.TestCase): class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): """Test for CenterNet meta architecture related functions.""" - def test_row_col_indices_from_flattened_indices(self): + def test_row_col_channel_indices_from_flattened_indices(self): """Tests that the computation of row, col, channel indices is correct.""" r_grid, c_grid, ch_grid = (np.zeros((5, 4, 3), dtype=np.int), @@ -81,6 +89,21 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): np.testing.assert_array_equal(ci, c_grid.flatten()) np.testing.assert_array_equal(chi, ch_grid.flatten()) + def test_row_col_indices_from_flattened_indices(self): + """Tests that the computation of row, col indices is correct.""" + + r_grid = np.array([[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], + [4, 4, 4, 4]]) + + c_grid = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3], + [0, 1, 2, 3]]) + + indices = np.arange(20) + ri, ci, = cnma.row_col_indices_from_flattened_indices(indices, 4) + + np.testing.assert_array_equal(ri, r_grid.flatten()) + np.testing.assert_array_equal(ci, c_grid.flatten()) + def test_flattened_indices_from_row_col_indices(self): r = np.array( @@ -479,6 +502,70 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): np.testing.assert_array_equal([1, 0, 0, 2], x_inds[1]) np.testing.assert_array_equal([0, 0, 1, 1], channel_inds[1]) + def test_top_k_feature_map_locations_k1(self): + feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32) + feature_map_np[0, 2, 0, 0] = 1.0 # Selected. + feature_map_np[0, 2, 1, 0] = 0.9 + feature_map_np[0, 0, 1, 0] = 0.7 + feature_map_np[0, 2, 2, 1] = 0.5 + feature_map_np[0, 0, 0, 1] = 0.3 + feature_map_np[1, 2, 1, 0] = 0.7 + feature_map_np[1, 1, 0, 0] = 0.4 + feature_map_np[1, 1, 2, 0] = 0.3 + feature_map_np[1, 1, 0, 1] = 0.8 # Selected. + feature_map_np[1, 1, 2, 1] = 0.3 + + def graph_fn(): + feature_map = tf.constant(feature_map_np) + scores, y_inds, x_inds, channel_inds = ( + cnma.top_k_feature_map_locations( + feature_map, max_pool_kernel_size=3, k=1, per_channel=False)) + return scores, y_inds, x_inds, channel_inds + + scores, y_inds, x_inds, channel_inds = self.execute(graph_fn, []) + + np.testing.assert_allclose([1.0], scores[0]) + np.testing.assert_array_equal([2], y_inds[0]) + np.testing.assert_array_equal([0], x_inds[0]) + np.testing.assert_array_equal([0], channel_inds[0]) + + np.testing.assert_allclose([0.8], scores[1]) + np.testing.assert_array_equal([1], y_inds[1]) + np.testing.assert_array_equal([0], x_inds[1]) + np.testing.assert_array_equal([1], channel_inds[1]) + + def test_top_k_feature_map_locations_k1_per_channel(self): + feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32) + feature_map_np[0, 2, 0, 0] = 1.0 # Selected. + feature_map_np[0, 2, 1, 0] = 0.9 + feature_map_np[0, 0, 1, 0] = 0.7 + feature_map_np[0, 2, 2, 1] = 0.5 # Selected. + feature_map_np[0, 0, 0, 1] = 0.3 + feature_map_np[1, 2, 1, 0] = 0.7 # Selected. + feature_map_np[1, 1, 0, 0] = 0.4 + feature_map_np[1, 1, 2, 0] = 0.3 + feature_map_np[1, 1, 0, 1] = 0.8 # Selected. + feature_map_np[1, 1, 2, 1] = 0.3 + + def graph_fn(): + feature_map = tf.constant(feature_map_np) + scores, y_inds, x_inds, channel_inds = ( + cnma.top_k_feature_map_locations( + feature_map, max_pool_kernel_size=3, k=1, per_channel=True)) + return scores, y_inds, x_inds, channel_inds + + scores, y_inds, x_inds, channel_inds = self.execute(graph_fn, []) + + np.testing.assert_allclose([1.0, 0.5], scores[0]) + np.testing.assert_array_equal([2, 2], y_inds[0]) + np.testing.assert_array_equal([0, 2], x_inds[0]) + np.testing.assert_array_equal([0, 1], channel_inds[0]) + + np.testing.assert_allclose([0.7, 0.8], scores[1]) + np.testing.assert_array_equal([2, 1], y_inds[1]) + np.testing.assert_array_equal([1, 0], x_inds[1]) + np.testing.assert_array_equal([0, 1], channel_inds[1]) + def test_box_prediction(self): class_pred = np.zeros((3, 128, 128, 5), dtype=np.float32) @@ -517,33 +604,69 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): hw_pred_tensor = tf.constant(hw_pred) offset_pred_tensor = tf.constant(offset_pred) - detection_scores, y_indices, x_indices, channel_indices = ( + _, y_indices, x_indices, _ = ( cnma.top_k_feature_map_locations( class_pred_tensor, max_pool_kernel_size=3, k=2)) - boxes, classes, scores, num_dets = cnma.prediction_tensors_to_boxes( - detection_scores, y_indices, x_indices, channel_indices, - hw_pred_tensor, offset_pred_tensor) - return boxes, classes, scores, num_dets + boxes = cnma.prediction_tensors_to_boxes( + y_indices, x_indices, hw_pred_tensor, offset_pred_tensor) + return boxes - boxes, classes, scores, num_dets = self.execute(graph_fn, []) - - np.testing.assert_array_equal(num_dets, [2, 2, 2]) + boxes = self.execute(graph_fn, []) np.testing.assert_allclose( - [[-9, -8, 31, 52], [25, 35, 75, 85]], boxes[0]) + [[0, 0, 31, 52], [25, 35, 75, 85]], boxes[0]) np.testing.assert_allclose( [[96, 98, 106, 108], [96, 98, 106, 108]], boxes[1]) np.testing.assert_allclose( [[69.5, 74.5, 90.5, 99.5], [40, 75, 80, 105]], boxes[2]) - np.testing.assert_array_equal(classes[0], [1, 0]) - np.testing.assert_array_equal(classes[1], [2, 1]) - np.testing.assert_array_equal(classes[2], [0, 4]) + def test_offset_prediction(self): + + class_pred = np.zeros((3, 128, 128, 5), dtype=np.float32) + offset_pred = np.zeros((3, 128, 128, 2), dtype=np.float32) + + # Sample 1, 2 boxes + class_pred[0, 10, 20] = [0.3, .7, 0.0, 0.0, 0.0] + offset_pred[0, 10, 20] = [1, 2] + + class_pred[0, 50, 60] = [0.55, 0.0, 0.0, 0.0, 0.45] + offset_pred[0, 50, 60] = [0, 0] + + # Sample 2, 2 boxes (at same location) + class_pred[1, 100, 100] = [0.0, 0.1, 0.9, 0.0, 0.0] + offset_pred[1, 100, 100] = [1, 3] + + # Sample 3, 3 boxes + class_pred[2, 60, 90] = [0.0, 0.0, 0.0, 0.2, 0.8] + offset_pred[2, 60, 90] = [0, 0] + + class_pred[2, 65, 95] = [0.0, 0.7, 0.3, 0.0, 0.0] + offset_pred[2, 65, 95] = [1, 2] + + class_pred[2, 75, 85] = [1.0, 0.0, 0.0, 0.0, 0.0] + offset_pred[2, 75, 85] = [5, 2] + + def graph_fn(): + class_pred_tensor = tf.constant(class_pred) + offset_pred_tensor = tf.constant(offset_pred) + + _, y_indices, x_indices, _ = ( + cnma.top_k_feature_map_locations( + class_pred_tensor, max_pool_kernel_size=3, k=2)) - np.testing.assert_allclose(scores[0], [.7, .55]) - np.testing.assert_allclose(scores[1][:1], [.9]) - np.testing.assert_allclose(scores[2], [1., .8]) + offsets = cnma.prediction_tensors_to_temporal_offsets( + y_indices, x_indices, offset_pred_tensor) + return offsets + + offsets = self.execute(graph_fn, []) + + np.testing.assert_allclose( + [[1, 2], [0, 0]], offsets[0]) + np.testing.assert_allclose( + [[1, 3], [1, 3]], offsets[1]) + np.testing.assert_allclose( + [[5, 2], [0, 0]], offsets[2]) def test_keypoint_candidate_prediction(self): keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32) @@ -577,7 +700,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): keypoint_heatmap_offsets = tf.constant( keypoint_heatmap_offsets_np, dtype=tf.float32) - keypoint_cands, keypoint_scores, num_keypoint_candidates = ( + (keypoint_cands, keypoint_scores, num_keypoint_candidates, _) = ( cnma.prediction_tensors_to_keypoint_candidates( keypoint_heatmap, keypoint_heatmap_offsets, @@ -618,6 +741,73 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): np.testing.assert_array_equal(expected_num_keypoint_candidates, num_keypoint_candidates) + def test_prediction_to_single_instance_keypoints(self): + image_size = (9, 9) + object_heatmap_np = np.zeros((1, image_size[0], image_size[1], 1), + dtype=np.float32) + # This should be picked. + object_heatmap_np[0, 4, 4, 0] = 0.9 + # This shouldn't be picked since it's farther away from the center. + object_heatmap_np[0, 2, 2, 0] = 1.0 + + keypoint_heatmap_np = np.zeros((1, image_size[0], image_size[1], 4), + dtype=np.float32) + # Top-left corner should be picked. + keypoint_heatmap_np[0, 1, 1, 0] = 0.9 + keypoint_heatmap_np[0, 4, 4, 0] = 1.0 + # Top-right corner should be picked. + keypoint_heatmap_np[0, 1, 7, 1] = 0.9 + keypoint_heatmap_np[0, 4, 4, 1] = 1.0 + # Bottom-left corner should be picked. + keypoint_heatmap_np[0, 7, 1, 2] = 0.9 + keypoint_heatmap_np[0, 4, 4, 2] = 1.0 + # Bottom-right corner should be picked. + keypoint_heatmap_np[0, 7, 7, 3] = 0.9 + keypoint_heatmap_np[0, 4, 4, 3] = 1.0 + + keypoint_offset_np = np.zeros((1, image_size[0], image_size[1], 8), + dtype=np.float32) + keypoint_offset_np[0, 1, 1] = [0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + keypoint_offset_np[0, 1, 7] = [0.0, 0.0, 0.5, -0.5, 0.0, 0.0, 0.0, 0.0] + keypoint_offset_np[0, 7, 1] = [0.0, 0.0, 0.0, 0.0, -0.5, 0.5, 0.0, 0.0] + keypoint_offset_np[0, 7, 7] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, -0.5] + + keypoint_regression_np = np.zeros((1, image_size[0], image_size[1], 8), + dtype=np.float32) + keypoint_regression_np[0, 4, 4] = [-3, -3, -3, 3, 3, -3, 3, 3] + + kp_params = get_fake_kp_params( + candidate_ranking_mode='score_distance_ratio') + + def graph_fn(): + object_heatmap = tf.constant(object_heatmap_np, dtype=tf.float32) + keypoint_heatmap = tf.constant(keypoint_heatmap_np, dtype=tf.float32) + keypoint_offset = tf.constant(keypoint_offset_np, dtype=tf.float32) + keypoint_regression = tf.constant( + keypoint_regression_np, dtype=tf.float32) + + (keypoint_cands, keypoint_scores, _) = ( + cnma.prediction_to_single_instance_keypoints( + object_heatmap, + keypoint_heatmap, + keypoint_offset, + keypoint_regression, + kp_params=kp_params)) + + return keypoint_cands, keypoint_scores + + (keypoint_cands, keypoint_scores) = self.execute(graph_fn, []) + + expected_keypoint_candidates = [[[ + [1.5, 1.5], # top-left + [1.5, 6.5], # top-right + [6.5, 1.5], # bottom-left + [6.5, 6.5], # bottom-right + ]]] + expected_keypoint_scores = [[[0.9, 0.9, 0.9, 0.9]]] + np.testing.assert_allclose(expected_keypoint_candidates, keypoint_cands) + np.testing.assert_allclose(expected_keypoint_scores, keypoint_scores) + def test_keypoint_candidate_prediction_per_keypoints(self): keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32) keypoint_heatmap_np[0, 0, 0, 0] = 1.0 @@ -652,7 +842,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): keypoint_heatmap_offsets = tf.constant( keypoint_heatmap_offsets_np, dtype=tf.float32) - keypoint_cands, keypoint_scores, num_keypoint_candidates = ( + (keypoint_cands, keypoint_scores, num_keypoint_candidates, _) = ( cnma.prediction_tensors_to_keypoint_candidates( keypoint_heatmap, keypoint_heatmap_offsets, @@ -693,6 +883,89 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): np.testing.assert_array_equal(expected_num_keypoint_candidates, num_keypoint_candidates) + @parameterized.parameters({'per_keypoint_depth': True}, + {'per_keypoint_depth': False}) + def test_keypoint_candidate_prediction_depth(self, per_keypoint_depth): + keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32) + keypoint_heatmap_np[0, 0, 0, 0] = 1.0 + keypoint_heatmap_np[0, 2, 1, 0] = 0.7 + keypoint_heatmap_np[0, 1, 1, 0] = 0.6 + keypoint_heatmap_np[0, 0, 2, 1] = 0.7 + keypoint_heatmap_np[0, 1, 1, 1] = 0.3 # Filtered by low score. + keypoint_heatmap_np[0, 2, 2, 1] = 0.2 + keypoint_heatmap_np[1, 1, 0, 0] = 0.6 + keypoint_heatmap_np[1, 2, 1, 0] = 0.5 + keypoint_heatmap_np[1, 0, 0, 0] = 0.4 + keypoint_heatmap_np[1, 0, 0, 1] = 1.0 + keypoint_heatmap_np[1, 0, 1, 1] = 0.9 + keypoint_heatmap_np[1, 2, 0, 1] = 0.8 + + if per_keypoint_depth: + keypoint_depths_np = np.zeros((2, 3, 3, 2), dtype=np.float32) + keypoint_depths_np[0, 0, 0, 0] = -1.5 + keypoint_depths_np[0, 2, 1, 0] = -1.0 + keypoint_depths_np[0, 0, 2, 1] = 1.5 + else: + keypoint_depths_np = np.zeros((2, 3, 3, 1), dtype=np.float32) + keypoint_depths_np[0, 0, 0, 0] = -1.5 + keypoint_depths_np[0, 2, 1, 0] = -1.0 + keypoint_depths_np[0, 0, 2, 0] = 1.5 + + keypoint_heatmap_offsets_np = np.zeros((2, 3, 3, 2), dtype=np.float32) + keypoint_heatmap_offsets_np[0, 0, 0] = [0.5, 0.25] + keypoint_heatmap_offsets_np[0, 2, 1] = [-0.25, 0.5] + keypoint_heatmap_offsets_np[0, 1, 1] = [0.0, 0.0] + keypoint_heatmap_offsets_np[0, 0, 2] = [1.0, 0.0] + keypoint_heatmap_offsets_np[0, 2, 2] = [1.0, 1.0] + keypoint_heatmap_offsets_np[1, 1, 0] = [0.25, 0.5] + keypoint_heatmap_offsets_np[1, 2, 1] = [0.5, 0.0] + keypoint_heatmap_offsets_np[1, 0, 0] = [0.0, -0.5] + keypoint_heatmap_offsets_np[1, 0, 1] = [0.5, -0.5] + keypoint_heatmap_offsets_np[1, 2, 0] = [-1.0, -0.5] + + def graph_fn(): + keypoint_heatmap = tf.constant(keypoint_heatmap_np, dtype=tf.float32) + keypoint_heatmap_offsets = tf.constant( + keypoint_heatmap_offsets_np, dtype=tf.float32) + + keypoint_depths = tf.constant(keypoint_depths_np, dtype=tf.float32) + (keypoint_cands, keypoint_scores, num_keypoint_candidates, + keypoint_depths) = ( + cnma.prediction_tensors_to_keypoint_candidates( + keypoint_heatmap, + keypoint_heatmap_offsets, + keypoint_score_threshold=0.5, + max_pool_kernel_size=1, + max_candidates=2, + keypoint_depths=keypoint_depths)) + return (keypoint_cands, keypoint_scores, num_keypoint_candidates, + keypoint_depths) + + (_, keypoint_scores, _, keypoint_depths) = self.execute(graph_fn, []) + + expected_keypoint_scores = [ + [ # Example 0. + [1.0, 0.7], # Keypoint 1. + [0.7, 0.3], # Keypoint 2. + ], + [ # Example 1. + [0.6, 1.0], # Keypoint 1. + [0.5, 0.9], # Keypoint 2. + ], + ] + expected_keypoint_depths = [ + [ + [-1.5, 1.5], + [-1.0, 0.0], + ], + [ + [0., 0.], + [0., 0.], + ], + ] + np.testing.assert_allclose(expected_keypoint_scores, keypoint_scores) + np.testing.assert_allclose(expected_keypoint_depths, keypoint_depths) + def test_regressed_keypoints_at_object_centers(self): batch_size = 2 num_keypoints = 5 @@ -798,11 +1071,22 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32) num_keypoint_candidates = tf.constant(num_keypoints_candidates_np, dtype=tf.int32) - refined_keypoints, refined_scores = cnma.refine_keypoints( - regressed_keypoints, keypoint_candidates, keypoint_scores, - num_keypoint_candidates, bboxes=None, + # The behavior of bboxes=None is different now. We provide the bboxes + # explicitly by using the regressed keypoints to create the same + # behavior. + regressed_keypoints_flattened = tf.reshape( + regressed_keypoints, [-1, 3, 2]) + bboxes_flattened = keypoint_ops.keypoints_to_enclosing_bounding_boxes( + regressed_keypoints_flattened) + (refined_keypoints, refined_scores, _) = cnma.refine_keypoints( + regressed_keypoints, + keypoint_candidates, + keypoint_scores, + num_keypoint_candidates, + bboxes=bboxes_flattened, unmatched_keypoint_score=unmatched_keypoint_score, - box_scale=1.2, candidate_search_scale=0.3, + box_scale=1.2, + candidate_search_scale=0.3, candidate_ranking_mode=candidate_ranking_mode) return refined_keypoints, refined_scores @@ -870,7 +1154,87 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints) np.testing.assert_allclose(expected_refined_scores, refined_scores) - def test_refine_keypoints_with_bboxes(self): + def test_refine_keypoints_without_bbox(self): + regressed_keypoints_np = np.array( + [ + # Example 0. + [ + [[2.0, 2.0], [6.0, 10.0], [14.0, 7.0]], # Instance 0. + [[0.0, 6.0], [3.0, 3.0], [5.0, 7.0]], # Instance 1. + ], + ], dtype=np.float32) + keypoint_candidates_np = np.array( + [ + # Example 0. + [ + [[2.0, 2.5], [6.0, 10.5], [4.0, 7.0]], # Candidate 0. + [[1.0, 8.0], [0.0, 0.0], [2.0, 2.0]], # Candidate 1. + [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], # Candidate 2. + ], + ], dtype=np.float32) + keypoint_scores_np = np.array( + [ + # Example 0. + [ + [0.8, 0.9, 1.0], # Candidate 0. + [0.6, 0.1, 0.9], # Candidate 1. + [0.0, 0.0, 0.0], # Candidate 1. + ], + ], dtype=np.float32) + num_keypoints_candidates_np = np.array( + [ + # Example 0. + [2, 2, 2], + ], dtype=np.int32) + unmatched_keypoint_score = 0.1 + + def graph_fn(): + regressed_keypoints = tf.constant( + regressed_keypoints_np, dtype=tf.float32) + keypoint_candidates = tf.constant( + keypoint_candidates_np, dtype=tf.float32) + keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32) + num_keypoint_candidates = tf.constant(num_keypoints_candidates_np, + dtype=tf.int32) + (refined_keypoints, refined_scores, _) = cnma.refine_keypoints( + regressed_keypoints, + keypoint_candidates, + keypoint_scores, + num_keypoint_candidates, + bboxes=None, + unmatched_keypoint_score=unmatched_keypoint_score, + box_scale=1.2, + candidate_search_scale=0.3, + candidate_ranking_mode='min_distance') + return refined_keypoints, refined_scores + + refined_keypoints, refined_scores = self.execute(graph_fn, []) + + # The expected refined keypoints pick the ones that are closest to the + # regressed keypoint locations without filtering out the candidates which + # are outside of the bounding box. + expected_refined_keypoints = np.array( + [ + # Example 0. + [ + [[2.0, 2.5], [6.0, 10.5], [4.0, 7.0]], # Instance 0. + [[1.0, 8.0], [0.0, 0.0], [4.0, 7.0]], # Instance 1. + ], + ], dtype=np.float32) + expected_refined_scores = np.array( + [ + # Example 0. + [ + [0.8, 0.9, 1.0], # Instance 0. + [0.6, 0.1, 1.0], # Instance 1. + ], + ], dtype=np.float32) + + np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints) + np.testing.assert_allclose(expected_refined_scores, refined_scores) + + @parameterized.parameters({'predict_depth': True}, {'predict_depth': False}) + def test_refine_keypoints_with_bboxes(self, predict_depth): regressed_keypoints_np = np.array( [ # Example 0. @@ -909,7 +1273,22 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): [0.7, 0.4, 0.0], # Candidate 0. [0.6, 0.1, 0.0], # Candidate 1. ] - ], dtype=np.float32) + ], + dtype=np.float32) + keypoint_depths_np = np.array( + [ + # Example 0. + [ + [-0.8, -0.9, -1.0], # Candidate 0. + [-0.6, -0.1, -0.9], # Candidate 1. + ], + # Example 1. + [ + [-0.7, -0.4, -0.0], # Candidate 0. + [-0.6, -0.1, -0.0], # Candidate 1. + ] + ], + dtype=np.float32) num_keypoints_candidates_np = np.array( [ # Example 0. @@ -938,17 +1317,28 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): keypoint_candidates = tf.constant( keypoint_candidates_np, dtype=tf.float32) keypoint_scores = tf.constant(keypoint_scores_np, dtype=tf.float32) + if predict_depth: + keypoint_depths = tf.constant(keypoint_depths_np, dtype=tf.float32) + else: + keypoint_depths = None num_keypoint_candidates = tf.constant(num_keypoints_candidates_np, dtype=tf.int32) bboxes = tf.constant(bboxes_np, dtype=tf.float32) - refined_keypoints, refined_scores = cnma.refine_keypoints( - regressed_keypoints, keypoint_candidates, keypoint_scores, - num_keypoint_candidates, bboxes=bboxes, - unmatched_keypoint_score=unmatched_keypoint_score, - box_scale=1.0, candidate_search_scale=0.3) - return refined_keypoints, refined_scores - - refined_keypoints, refined_scores = self.execute(graph_fn, []) + (refined_keypoints, refined_scores, + refined_depths) = cnma.refine_keypoints( + regressed_keypoints, + keypoint_candidates, + keypoint_scores, + num_keypoint_candidates, + bboxes=bboxes, + unmatched_keypoint_score=unmatched_keypoint_score, + box_scale=1.0, + candidate_search_scale=0.3, + keypoint_depth_candidates=keypoint_depths) + if predict_depth: + return refined_keypoints, refined_scores, refined_depths + else: + return refined_keypoints, refined_scores expected_refined_keypoints = np.array( [ @@ -979,8 +1369,17 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): ], ], dtype=np.float32) - np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints) - np.testing.assert_allclose(expected_refined_scores, refined_scores) + if predict_depth: + refined_keypoints, refined_scores, refined_depths = self.execute( + graph_fn, []) + expected_refined_depths = np.array([[[-0.8, 0.0, 0.0], [0.0, 0.0, -1.0]], + [[-0.7, -0.1, 0.0], [-0.7, -0.4, + 0.0]]]) + np.testing.assert_allclose(expected_refined_depths, refined_depths) + else: + refined_keypoints, refined_scores = self.execute(graph_fn, []) + np.testing.assert_allclose(expected_refined_keypoints, refined_keypoints) + np.testing.assert_allclose(expected_refined_scores, refined_scores) def test_pad_to_full_keypoint_dim(self): batch_size = 4 @@ -1044,6 +1443,41 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase): np.testing.assert_allclose(kpt_scores_np[:, i, :], kpt_scores_padded[:, inst_ind, :]) + def test_predicted_embeddings_at_object_centers(self): + batch_size = 2 + embedding_size = 5 + num_instances = 6 + predicted_embedding_feature_map_np = np.random.randn( + batch_size, 10, 10, embedding_size).astype(np.float32) + y_indices = np.random.choice(10, (batch_size, num_instances)) + x_indices = np.random.choice(10, (batch_size, num_instances)) + + def graph_fn(): + predicted_embedding_feature_map = tf.constant( + predicted_embedding_feature_map_np, dtype=tf.float32) + + gathered_predicted_embeddings = ( + cnma.predicted_embeddings_at_object_centers( + predicted_embedding_feature_map, + tf.constant(y_indices, dtype=tf.int32), + tf.constant(x_indices, dtype=tf.int32))) + return gathered_predicted_embeddings + + gathered_predicted_embeddings = self.execute(graph_fn, []) + + expected_gathered_embeddings_0 = predicted_embedding_feature_map_np[ + 0, y_indices[0], x_indices[0], :] + expected_gathered_embeddings_1 = predicted_embedding_feature_map_np[ + 1, y_indices[1], x_indices[1], :] + expected_gathered_embeddings = np.stack([ + expected_gathered_embeddings_0, + expected_gathered_embeddings_1], axis=0) + expected_gathered_embeddings = np.reshape( + expected_gathered_embeddings, + [batch_size, num_instances, embedding_size]) + np.testing.assert_allclose(expected_gathered_embeddings, + gathered_predicted_embeddings) + # Common parameters for setting up testing examples across tests. _NUM_CLASSES = 10 @@ -1051,16 +1485,21 @@ _KEYPOINT_INDICES = [0, 1, 2, 3] _NUM_KEYPOINTS = len(_KEYPOINT_INDICES) _DENSEPOSE_NUM_PARTS = 24 _TASK_NAME = 'human_pose' +_NUM_TRACK_IDS = 3 +_REID_EMBED_SIZE = 2 +_NUM_FC_LAYERS = 1 -def get_fake_center_params(): +def get_fake_center_params(max_box_predictions=5): """Returns the fake object center parameter namedtuple.""" return cnma.ObjectCenterParams( classification_loss=losses.WeightedSigmoidClassificationLoss(), object_center_loss_weight=1.0, min_box_overlap_iou=1.0, - max_box_predictions=5, - use_labeled_classes=False) + max_box_predictions=max_box_predictions, + use_labeled_classes=False, + center_head_num_filters=[128], + center_head_kernel_sizes=[5]) def get_fake_od_params(): @@ -1071,7 +1510,12 @@ def get_fake_od_params(): scale_loss_weight=0.1) -def get_fake_kp_params(): +def get_fake_kp_params(num_candidates_per_keypoint=100, + per_keypoint_offset=False, + predict_depth=False, + per_keypoint_depth=False, + peak_radius=0, + candidate_ranking_mode='min_distance'): """Returns the fake keypoint estimation parameter namedtuple.""" return cnma.KeypointEstimationParams( task_name=_TASK_NAME, @@ -1080,7 +1524,14 @@ def get_fake_kp_params(): keypoint_std_dev=[0.00001] * len(_KEYPOINT_INDICES), classification_loss=losses.WeightedSigmoidClassificationLoss(), localization_loss=losses.L1LocalizationLoss(), - keypoint_candidate_score_threshold=0.1) + unmatched_keypoint_score=0.1, + keypoint_candidate_score_threshold=0.1, + num_candidates_per_keypoint=num_candidates_per_keypoint, + per_keypoint_offset=per_keypoint_offset, + predict_depth=predict_depth, + per_keypoint_depth=per_keypoint_depth, + offset_peak_radius=peak_radius, + candidate_ranking_mode=candidate_ranking_mode) def get_fake_mask_params(): @@ -1106,7 +1557,34 @@ def get_fake_densepose_params(): upsample_method='nearest') -def build_center_net_meta_arch(build_resnet=False): +def get_fake_track_params(): + """Returns the fake object tracking parameter namedtuple.""" + return cnma.TrackParams( + num_track_ids=_NUM_TRACK_IDS, + reid_embed_size=_REID_EMBED_SIZE, + num_fc_layers=_NUM_FC_LAYERS, + classification_loss=losses.WeightedSoftmaxClassificationLoss(), + task_loss_weight=1.0) + + +def get_fake_temporal_offset_params(): + """Returns the fake temporal offset parameter namedtuple.""" + return cnma.TemporalOffsetParams( + localization_loss=losses.WeightedSmoothL1LocalizationLoss(), + task_loss_weight=1.0) + + +def build_center_net_meta_arch(build_resnet=False, + num_classes=_NUM_CLASSES, + max_box_predictions=5, + apply_non_max_suppression=False, + detection_only=False, + per_keypoint_offset=False, + predict_depth=False, + per_keypoint_depth=False, + peak_radius=0, + keypoint_only=False, + candidate_ranking_mode='min_distance'): """Builds the CenterNet meta architecture.""" if build_resnet: feature_extractor = ( @@ -1124,17 +1602,82 @@ def build_center_net_meta_arch(build_resnet=False): min_dimension=128, max_dimension=128, pad_to_max_dimesnion=True) - return cnma.CenterNetMetaArch( - is_training=True, - add_summaries=False, - num_classes=_NUM_CLASSES, - feature_extractor=feature_extractor, - image_resizer_fn=image_resizer_fn, - object_center_params=get_fake_center_params(), - object_detection_params=get_fake_od_params(), - keypoint_params_dict={_TASK_NAME: get_fake_kp_params()}, - mask_params=get_fake_mask_params(), - densepose_params=get_fake_densepose_params()) + + non_max_suppression_fn = None + if apply_non_max_suppression: + post_processing_proto = post_processing_pb2.PostProcessing() + post_processing_proto.batch_non_max_suppression.iou_threshold = 1.0 + post_processing_proto.batch_non_max_suppression.score_threshold = 0.6 + (post_processing_proto.batch_non_max_suppression.max_total_detections + ) = max_box_predictions + (post_processing_proto.batch_non_max_suppression.max_detections_per_class + ) = max_box_predictions + (post_processing_proto.batch_non_max_suppression.change_coordinate_frame + ) = False + non_max_suppression_fn, _ = post_processing_builder.build( + post_processing_proto) + + if keypoint_only: + num_candidates_per_keypoint = 100 if max_box_predictions > 1 else 1 + return cnma.CenterNetMetaArch( + is_training=True, + add_summaries=False, + num_classes=num_classes, + feature_extractor=feature_extractor, + image_resizer_fn=image_resizer_fn, + object_center_params=get_fake_center_params(max_box_predictions), + keypoint_params_dict={ + _TASK_NAME: + get_fake_kp_params(num_candidates_per_keypoint, + per_keypoint_offset, predict_depth, + per_keypoint_depth, peak_radius, + candidate_ranking_mode) + }, + non_max_suppression_fn=non_max_suppression_fn) + elif detection_only: + return cnma.CenterNetMetaArch( + is_training=True, + add_summaries=False, + num_classes=num_classes, + feature_extractor=feature_extractor, + image_resizer_fn=image_resizer_fn, + object_center_params=get_fake_center_params(max_box_predictions), + object_detection_params=get_fake_od_params(), + non_max_suppression_fn=non_max_suppression_fn) + elif num_classes == 1: + num_candidates_per_keypoint = 100 if max_box_predictions > 1 else 1 + return cnma.CenterNetMetaArch( + is_training=True, + add_summaries=False, + num_classes=num_classes, + feature_extractor=feature_extractor, + image_resizer_fn=image_resizer_fn, + object_center_params=get_fake_center_params(max_box_predictions), + object_detection_params=get_fake_od_params(), + keypoint_params_dict={ + _TASK_NAME: + get_fake_kp_params(num_candidates_per_keypoint, + per_keypoint_offset, predict_depth, + per_keypoint_depth, peak_radius, + candidate_ranking_mode) + }, + non_max_suppression_fn=non_max_suppression_fn) + else: + return cnma.CenterNetMetaArch( + is_training=True, + add_summaries=False, + num_classes=num_classes, + feature_extractor=feature_extractor, + image_resizer_fn=image_resizer_fn, + object_center_params=get_fake_center_params(), + object_detection_params=get_fake_od_params(), + keypoint_params_dict={_TASK_NAME: get_fake_kp_params( + candidate_ranking_mode=candidate_ranking_mode)}, + mask_params=get_fake_mask_params(), + densepose_params=get_fake_densepose_params(), + track_params=get_fake_track_params(), + temporal_offset_params=get_fake_temporal_offset_params(), + non_max_suppression_fn=non_max_suppression_fn) def _logit(p): @@ -1228,6 +1771,16 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): fake_feature_map) self.assertEqual((4, 128, 128, 2 * _DENSEPOSE_NUM_PARTS), output.shape) + # "track embedding" head: + output = model._prediction_head_dict[cnma.TRACK_REID][-1]( + fake_feature_map) + self.assertEqual((4, 128, 128, _REID_EMBED_SIZE), output.shape) + + # "temporal offset" head: + output = model._prediction_head_dict[cnma.TEMPORAL_OFFSET][-1]( + fake_feature_map) + self.assertEqual((4, 128, 128, 2), output.shape) + def test_initialize_target_assigners(self): model = build_center_net_meta_arch() assigner_dict = model._initialize_target_assigners( @@ -1255,6 +1808,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): self.assertIsInstance(assigner_dict[cnma.DENSEPOSE_TASK], cn_assigner.CenterNetDensePoseTargetAssigner) + # Track estimation target assigner: + self.assertIsInstance(assigner_dict[cnma.TRACK_TASK], + cn_assigner.CenterNetTrackTargetAssigner) + + # Temporal Offset target assigner: + self.assertIsInstance(assigner_dict[cnma.TEMPORALOFFSET_TASK], + cn_assigner.CenterNetTemporalOffsetTargetAssigner) + def test_predict(self): """Test the predict function.""" @@ -1279,6 +1840,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): (2, 32, 32, _DENSEPOSE_NUM_PARTS)) self.assertEqual(prediction_dict[cnma.DENSEPOSE_REGRESSION][0].shape, (2, 32, 32, 2 * _DENSEPOSE_NUM_PARTS)) + self.assertEqual(prediction_dict[cnma.TRACK_REID][0].shape, + (2, 32, 32, _REID_EMBED_SIZE)) + self.assertEqual(prediction_dict[cnma.TEMPORAL_OFFSET][0].shape, + (2, 32, 32, 2)) def test_loss(self): """Test the loss function.""" @@ -1297,7 +1862,20 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): groundtruth_dp_part_ids_list=groundtruth_dict[ fields.BoxListFields.densepose_part_ids], groundtruth_dp_surface_coords_list=groundtruth_dict[ - fields.BoxListFields.densepose_surface_coords]) + fields.BoxListFields.densepose_surface_coords], + groundtruth_track_ids_list=groundtruth_dict[ + fields.BoxListFields.track_ids], + groundtruth_track_match_flags_list=groundtruth_dict[ + fields.BoxListFields.track_match_flags], + groundtruth_temporal_offsets_list=groundtruth_dict[ + fields.BoxListFields.temporal_offsets]) + + kernel_initializer = tf.constant_initializer( + [[1, 1, 0], [-1000000, -1000000, 1000000]]) + model.track_reid_classification_net = tf.keras.layers.Dense( + _NUM_TRACK_IDS, + kernel_initializer=kernel_initializer, + input_shape=(_REID_EMBED_SIZE,)) prediction_dict = get_fake_prediction_dict( input_height=16, input_width=32, stride=4) @@ -1339,6 +1917,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): self.assertGreater( 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX, cnma.DENSEPOSE_REGRESSION)]) + self.assertGreater( + 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX, + cnma.TRACK_REID)]) + self.assertGreater( + 0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX, + cnma.TEMPORAL_OFFSET)]) @parameterized.parameters( {'target_class_id': 1}, @@ -1349,15 +1933,18 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): model = build_center_net_meta_arch() max_detection = model._center_params.max_box_predictions num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices) + unmatched_keypoint_score = ( + model._kp_params_dict[_TASK_NAME].unmatched_keypoint_score) class_center = np.zeros((1, 32, 32, 10), dtype=np.float32) height_width = np.zeros((1, 32, 32, 2), dtype=np.float32) offset = np.zeros((1, 32, 32, 2), dtype=np.float32) - keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32) + keypoint_heatmaps = np.ones( + (1, 32, 32, num_keypoints), dtype=np.float32) * _logit(0.001) keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32) keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2) - class_probs = np.zeros(10) + class_probs = np.ones(10) * _logit(0.25) class_probs[target_class_id] = _logit(0.75) class_center[0, 16, 16] = class_probs height_width[0, 16, 16] = [5, 10] @@ -1384,6 +1971,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): dp_surf_coords = np.random.randn(1, 32, 32, 2 * _DENSEPOSE_NUM_PARTS) + embedding_size = 100 + track_reid_embedding = np.zeros((1, 32, 32, embedding_size), + dtype=np.float32) + track_reid_embedding[0, 16, 16, :] = np.ones(embedding_size) + + temporal_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32) + temporal_offsets[..., 1] = 1 + class_center = tf.constant(class_center) height_width = tf.constant(height_width) offset = tf.constant(offset) @@ -1393,6 +1988,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): segmentation_heatmap = tf.constant(segmentation_heatmap, dtype=tf.float32) dp_part_heatmap = tf.constant(dp_part_heatmap, dtype=tf.float32) dp_surf_coords = tf.constant(dp_surf_coords, dtype=tf.float32) + track_reid_embedding = tf.constant(track_reid_embedding, dtype=tf.float32) + temporal_offsets = tf.constant(temporal_offsets, dtype=tf.float32) prediction_dict = { cnma.OBJECT_CENTER: [class_center], @@ -1406,7 +2003,9 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): [keypoint_regression], cnma.SEGMENTATION_HEATMAP: [segmentation_heatmap], cnma.DENSEPOSE_HEATMAP: [dp_part_heatmap], - cnma.DENSEPOSE_REGRESSION: [dp_surf_coords] + cnma.DENSEPOSE_REGRESSION: [dp_surf_coords], + cnma.TRACK_REID: [track_reid_embedding], + cnma.TEMPORAL_OFFSET: [temporal_offsets], } def graph_fn(): @@ -1415,11 +2014,23 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): return detections detections = self.execute_cpu(graph_fn, []) - self.assertAllClose(detections['detection_boxes'][0, 0], np.array([55, 46, 75, 86]) / 128.0) self.assertAllClose(detections['detection_scores'][0], [.75, .5, .5, .5, .5]) + expected_multiclass_scores = [.25] * 10 + expected_multiclass_scores[target_class_id] = .75 + self.assertAllClose(expected_multiclass_scores, + detections['detection_multiclass_scores'][0][0]) + + # The output embedding extracted at the object center will be a 3-D array of + # shape [batch, num_boxes, embedding_size]. The valid predicted embedding + # will be the first embedding in the first batch. It is a 1-D array of + # shape [embedding_size] with values all ones. All the values of the + # embedding will then be divided by the square root of 'embedding_size' + # after the L2 normalization. + self.assertAllClose(detections['detection_embeddings'][0, 0], + np.ones(embedding_size) / embedding_size**0.5) self.assertEqual(detections['detection_classes'][0, 0], target_class_id) self.assertEqual(detections['num_detections'], [5]) self.assertAllEqual([1, max_detection, num_keypoints, 2], @@ -1428,6 +2039,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): detections['detection_keypoint_scores'].shape) self.assertAllEqual([1, max_detection, 4, 4], detections['detection_masks'].shape) + self.assertAllEqual([1, max_detection, embedding_size], + detections['detection_embeddings'].shape) + self.assertAllEqual([1, max_detection, 2], + detections['detection_temporal_offsets'].shape) # Masks should be empty for everything but the first detection. self.assertAllEqual( @@ -1441,7 +2056,7 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): expected_kpts_for_obj_0 = np.array( [[14., 14.], [14., 18.], [18., 14.], [17., 17.]]) / 32. expected_kpt_scores_for_obj_0 = np.array( - [0.9, 0.9, 0.9, cnma.UNMATCHED_KEYPOINT_SCORE]) + [0.9, 0.9, 0.9, unmatched_keypoint_score]) np.testing.assert_allclose(detections['detection_keypoints'][0][0], expected_kpts_for_obj_0, rtol=1e-6) np.testing.assert_allclose(detections['detection_keypoint_scores'][0][0], @@ -1471,7 +2086,308 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): detections['detection_surface_coords'][0, 0, :, :], np.zeros_like(detections['detection_surface_coords'][0, 0, :, :])) - def test_get_instance_indices(self): + def test_postprocess_kpts_no_od(self): + """Test the postprocess function.""" + target_class_id = 1 + model = build_center_net_meta_arch(keypoint_only=True) + max_detection = model._center_params.max_box_predictions + num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices) + + class_center = np.zeros((1, 32, 32, 10), dtype=np.float32) + keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32) + keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32) + keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2) + + class_probs = np.ones(10) * _logit(0.25) + class_probs[target_class_id] = _logit(0.75) + class_center[0, 16, 16] = class_probs + keypoint_regression[0, 16, 16] = [ + -1., -1., + -1., 1., + 1., -1., + 1., 1.] + keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9) + keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9) + keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9) + keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05) # Note the low score. + + class_center = tf.constant(class_center) + keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32) + keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32) + keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32) + + prediction_dict = { + cnma.OBJECT_CENTER: [class_center], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP): + [keypoint_heatmaps], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET): + [keypoint_offsets], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION): + [keypoint_regression], + } + + # def graph_fn(): + detections = model.postprocess(prediction_dict, + tf.constant([[128, 128, 3]])) + # return detections + + # detections = self.execute_cpu(graph_fn, []) + self.assertAllClose(detections['detection_scores'][0], + [.75, .5, .5, .5, .5]) + expected_multiclass_scores = [.25] * 10 + expected_multiclass_scores[target_class_id] = .75 + self.assertAllClose(expected_multiclass_scores, + detections['detection_multiclass_scores'][0][0]) + + self.assertEqual(detections['detection_classes'][0, 0], target_class_id) + self.assertEqual(detections['num_detections'], [5]) + self.assertAllEqual([1, max_detection, num_keypoints, 2], + detections['detection_keypoints'].shape) + self.assertAllEqual([1, max_detection, num_keypoints], + detections['detection_keypoint_scores'].shape) + + def test_non_max_suppression(self): + """Tests application of NMS on CenterNet detections.""" + target_class_id = 1 + model = build_center_net_meta_arch(apply_non_max_suppression=True, + detection_only=True) + + class_center = np.zeros((1, 32, 32, 10), dtype=np.float32) + height_width = np.zeros((1, 32, 32, 2), dtype=np.float32) + offset = np.zeros((1, 32, 32, 2), dtype=np.float32) + + class_probs = np.ones(10) * _logit(0.25) + class_probs[target_class_id] = _logit(0.75) + class_center[0, 16, 16] = class_probs + height_width[0, 16, 16] = [5, 10] + offset[0, 16, 16] = [.25, .5] + + class_center = tf.constant(class_center) + height_width = tf.constant(height_width) + offset = tf.constant(offset) + + prediction_dict = { + cnma.OBJECT_CENTER: [class_center], + cnma.BOX_SCALE: [height_width], + cnma.BOX_OFFSET: [offset], + } + + def graph_fn(): + detections = model.postprocess(prediction_dict, + tf.constant([[128, 128, 3]])) + return detections + + detections = self.execute_cpu(graph_fn, []) + num_detections = int(detections['num_detections']) + self.assertEqual(num_detections, 1) + self.assertAllClose(detections['detection_boxes'][0, 0], + np.array([55, 46, 75, 86]) / 128.0) + self.assertAllClose(detections['detection_scores'][0][:num_detections], + [.75]) + expected_multiclass_scores = [.25] * 10 + expected_multiclass_scores[target_class_id] = .75 + self.assertAllClose(expected_multiclass_scores, + detections['detection_multiclass_scores'][0][0]) + + def test_postprocess_single_class(self): + """Test the postprocess function.""" + model = build_center_net_meta_arch(num_classes=1) + max_detection = model._center_params.max_box_predictions + num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices) + + class_center = np.zeros((1, 32, 32, 1), dtype=np.float32) + height_width = np.zeros((1, 32, 32, 2), dtype=np.float32) + offset = np.zeros((1, 32, 32, 2), dtype=np.float32) + keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32) + keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32) + keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2) + + class_probs = np.zeros(1) + class_probs[0] = _logit(0.75) + class_center[0, 16, 16] = class_probs + height_width[0, 16, 16] = [5, 10] + offset[0, 16, 16] = [.25, .5] + keypoint_regression[0, 16, 16] = [ + -1., -1., + -1., 1., + 1., -1., + 1., 1.] + keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9) + keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9) + keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9) + keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05) # Note the low score. + + class_center = tf.constant(class_center) + height_width = tf.constant(height_width) + offset = tf.constant(offset) + keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32) + keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32) + keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32) + + prediction_dict = { + cnma.OBJECT_CENTER: [class_center], + cnma.BOX_SCALE: [height_width], + cnma.BOX_OFFSET: [offset], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP): + [keypoint_heatmaps], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET): + [keypoint_offsets], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION): + [keypoint_regression], + } + + def graph_fn(): + detections = model.postprocess(prediction_dict, + tf.constant([[128, 128, 3]])) + return detections + + detections = self.execute_cpu(graph_fn, []) + + self.assertAllClose(detections['detection_boxes'][0, 0], + np.array([55, 46, 75, 86]) / 128.0) + self.assertAllClose(detections['detection_scores'][0], + [.75, .5, .5, .5, .5]) + + self.assertEqual(detections['detection_classes'][0, 0], 0) + self.assertEqual(detections['num_detections'], [5]) + self.assertAllEqual([1, max_detection, num_keypoints, 2], + detections['detection_keypoints'].shape) + self.assertAllEqual([1, max_detection, num_keypoints], + detections['detection_keypoint_scores'].shape) + + def test_postprocess_single_instance(self): + """Test the postprocess single instance function.""" + model = build_center_net_meta_arch( + num_classes=1, candidate_ranking_mode='score_distance_ratio') + num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices) + + class_center = np.zeros((1, 32, 32, 1), dtype=np.float32) + keypoint_heatmaps = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32) + keypoint_offsets = np.zeros( + (1, 32, 32, num_keypoints * 2), dtype=np.float32) + keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2) + + class_probs = np.zeros(1) + class_probs[0] = _logit(0.75) + class_center[0, 16, 16] = class_probs + keypoint_regression[0, 16, 16] = [ + -1., -1., + -1., 1., + 1., -1., + 1., 1.] + keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9) + keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9) + keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9) + keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05) # Note the low score. + + class_center = tf.constant(class_center) + keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32) + keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32) + keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32) + + prediction_dict = { + cnma.OBJECT_CENTER: [class_center], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP): + [keypoint_heatmaps], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_OFFSET): + [keypoint_offsets], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION): + [keypoint_regression], + } + + def graph_fn(): + detections = model.postprocess_single_instance_keypoints( + prediction_dict, + tf.constant([[128, 128, 3]])) + return detections + + detections = self.execute_cpu(graph_fn, []) + + self.assertAllEqual([1, 1, num_keypoints, 2], + detections['detection_keypoints'].shape) + self.assertAllEqual([1, 1, num_keypoints], + detections['detection_keypoint_scores'].shape) + + @parameterized.parameters( + {'per_keypoint_depth': False}, + {'per_keypoint_depth': True}, + ) + def test_postprocess_single_class_depth(self, per_keypoint_depth): + """Test the postprocess function.""" + model = build_center_net_meta_arch( + num_classes=1, + per_keypoint_offset=per_keypoint_depth, + predict_depth=True, + per_keypoint_depth=per_keypoint_depth) + num_keypoints = len(model._kp_params_dict[_TASK_NAME].keypoint_indices) + + class_center = np.zeros((1, 32, 32, 1), dtype=np.float32) + height_width = np.zeros((1, 32, 32, 2), dtype=np.float32) + offset = np.zeros((1, 32, 32, 2), dtype=np.float32) + keypoint_heatmaps = np.ones( + (1, 32, 32, num_keypoints), dtype=np.float32) * _logit(0.001) + keypoint_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32) + keypoint_regression = np.random.randn(1, 32, 32, num_keypoints * 2) + + class_probs = np.zeros(1) + class_probs[0] = _logit(0.75) + class_center[0, 16, 16] = class_probs + height_width[0, 16, 16] = [5, 10] + offset[0, 16, 16] = [.25, .5] + keypoint_regression[0, 16, 16] = [-1., -1., -1., 1., 1., -1., 1., 1.] + keypoint_heatmaps[0, 14, 14, 0] = _logit(0.9) + keypoint_heatmaps[0, 14, 18, 1] = _logit(0.9) + keypoint_heatmaps[0, 18, 14, 2] = _logit(0.9) + keypoint_heatmaps[0, 18, 18, 3] = _logit(0.05) # Note the low score. + + if per_keypoint_depth: + keypoint_depth = np.zeros((1, 32, 32, num_keypoints), dtype=np.float32) + keypoint_depth[0, 14, 14, 0] = -1.0 + keypoint_depth[0, 14, 18, 1] = -1.1 + keypoint_depth[0, 18, 14, 2] = -1.2 + keypoint_depth[0, 18, 18, 3] = -1.3 + else: + keypoint_depth = np.zeros((1, 32, 32, 1), dtype=np.float32) + keypoint_depth[0, 14, 14, 0] = -1.0 + keypoint_depth[0, 14, 18, 0] = -1.1 + keypoint_depth[0, 18, 14, 0] = -1.2 + keypoint_depth[0, 18, 18, 0] = -1.3 + + class_center = tf.constant(class_center) + height_width = tf.constant(height_width) + offset = tf.constant(offset) + keypoint_heatmaps = tf.constant(keypoint_heatmaps, dtype=tf.float32) + keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32) + keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32) + keypoint_depth = tf.constant(keypoint_depth, dtype=tf.float32) + + prediction_dict = { + cnma.OBJECT_CENTER: [class_center], + cnma.BOX_SCALE: [height_width], + cnma.BOX_OFFSET: [offset], + cnma.get_keypoint_name(_TASK_NAME, + cnma.KEYPOINT_HEATMAP): [keypoint_heatmaps], + cnma.get_keypoint_name(_TASK_NAME, + cnma.KEYPOINT_OFFSET): [keypoint_offsets], + cnma.get_keypoint_name(_TASK_NAME, + cnma.KEYPOINT_REGRESSION): [keypoint_regression], + cnma.get_keypoint_name(_TASK_NAME, + cnma.KEYPOINT_DEPTH): [keypoint_depth] + } + + def graph_fn(): + detections = model.postprocess(prediction_dict, + tf.constant([[128, 128, 3]])) + return detections + + detections = self.execute_cpu(graph_fn, []) + + self.assertAllClose(detections['detection_keypoint_depths'][0, 0], + np.array([-1.0, -1.1, -1.2, 0.0])) + self.assertAllClose(detections['detection_keypoint_scores'][0, 0], + np.array([0.9, 0.9, 0.9, 0.1])) + + def test_get_instance_indices(self): classes = tf.constant([[0, 1, 2, 0], [2, 1, 2, 2]], dtype=tf.int32) num_detections = tf.constant([1, 3], dtype=tf.int32) batch_index = 1 @@ -1481,8 +2397,72 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase): classes, num_detections, batch_index, class_id) self.assertAllEqual(valid_indices.numpy(), [0, 2]) + def test_rescore_instances(self): + feature_extractor = DummyFeatureExtractor( + channel_means=(1.0, 2.0, 3.0), + channel_stds=(10., 20., 30.), + bgr_ordering=False, + num_feature_outputs=2, + stride=4) + image_resizer_fn = functools.partial( + preprocessor.resize_to_range, + min_dimension=128, + max_dimension=128, + pad_to_max_dimesnion=True) + + kp_params_1 = cnma.KeypointEstimationParams( + task_name='kpt_task_1', + class_id=0, + keypoint_indices=[0, 1, 2], + keypoint_std_dev=[0.00001] * 3, + classification_loss=losses.WeightedSigmoidClassificationLoss(), + localization_loss=losses.L1LocalizationLoss(), + keypoint_candidate_score_threshold=0.1, + rescore_instances=True) # Note rescoring for class_id = 0. + kp_params_2 = cnma.KeypointEstimationParams( + task_name='kpt_task_2', + class_id=1, + keypoint_indices=[3, 4], + keypoint_std_dev=[0.00001] * 2, + classification_loss=losses.WeightedSigmoidClassificationLoss(), + localization_loss=losses.L1LocalizationLoss(), + keypoint_candidate_score_threshold=0.1, + rescore_instances=False) + model = cnma.CenterNetMetaArch( + is_training=True, + add_summaries=False, + num_classes=2, + feature_extractor=feature_extractor, + image_resizer_fn=image_resizer_fn, + object_center_params=get_fake_center_params(), + object_detection_params=get_fake_od_params(), + keypoint_params_dict={ + 'kpt_task_1': kp_params_1, + 'kpt_task_2': kp_params_2, + }) -def get_fake_prediction_dict(input_height, input_width, stride): + def graph_fn(): + classes = tf.constant([[1, 0]], dtype=tf.int32) + scores = tf.constant([[0.5, 0.75]], dtype=tf.float32) + keypoint_scores = tf.constant( + [ + [[0.1, 0.0, 0.3, 0.4, 0.5], + [0.1, 0.2, 0.3, 0.4, 0.5]], + ]) + new_scores = model._rescore_instances(classes, scores, keypoint_scores) + return new_scores + + new_scores = self.execute_cpu(graph_fn, []) + expected_scores = np.array( + [[0.5, 0.75 * (0.1 + 0.3)/2]] + ) + self.assertAllClose(expected_scores, new_scores) + + +def get_fake_prediction_dict(input_height, + input_width, + stride, + per_keypoint_depth=False): """Prepares the fake prediction dictionary.""" output_height = input_height // stride output_width = input_width // stride @@ -1517,6 +2497,11 @@ def get_fake_prediction_dict(input_height, input_width, stride): dtype=np.float32) keypoint_offset[0, 2, 4] = 0.2, 0.4 + keypoint_depth = np.zeros((2, output_height, output_width, + _NUM_KEYPOINTS if per_keypoint_depth else 1), + dtype=np.float32) + keypoint_depth[0, 2, 4] = 3.0 + keypoint_regression = np.zeros( (2, output_height, output_width, 2 * _NUM_KEYPOINTS), dtype=np.float32) keypoint_regression[0, 2, 4] = 0.0, 0.0, 0.2, 0.4, 0.0, 0.0, 0.2, 0.4 @@ -1537,6 +2522,14 @@ def get_fake_prediction_dict(input_height, input_width, stride): # (5 * 2, 5 * 2 + 1), or (10, 11). densepose_regression[0, 2, 4, 10:12] = 0.4, 0.7 + track_reid_embedding = np.zeros((2, output_height, output_width, + _REID_EMBED_SIZE), dtype=np.float32) + track_reid_embedding[0, 2, 4, :] = np.arange(_REID_EMBED_SIZE) + + temporal_offsets = np.zeros((2, output_height, output_width, 2), + dtype=np.float32) + temporal_offsets[0, 2, 4, :] = 5 + prediction_dict = { 'preprocessed_inputs': tf.zeros((2, input_height, input_width, 3)), @@ -1544,14 +2537,10 @@ def get_fake_prediction_dict(input_height, input_width, stride): tf.constant(object_center), tf.constant(object_center) ], - cnma.BOX_SCALE: [ - tf.constant(object_scale), - tf.constant(object_scale) - ], - cnma.BOX_OFFSET: [ - tf.constant(object_offset), - tf.constant(object_offset) - ], + cnma.BOX_SCALE: [tf.constant(object_scale), + tf.constant(object_scale)], + cnma.BOX_OFFSET: [tf.constant(object_offset), + tf.constant(object_offset)], cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_HEATMAP): [ tf.constant(keypoint_heatmap), tf.constant(keypoint_heatmap) @@ -1564,6 +2553,10 @@ def get_fake_prediction_dict(input_height, input_width, stride): tf.constant(keypoint_regression), tf.constant(keypoint_regression) ], + cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_DEPTH): [ + tf.constant(keypoint_depth), + tf.constant(keypoint_depth) + ], cnma.SEGMENTATION_HEATMAP: [ tf.constant(mask_heatmap), tf.constant(mask_heatmap) @@ -1575,12 +2568,23 @@ def get_fake_prediction_dict(input_height, input_width, stride): cnma.DENSEPOSE_REGRESSION: [ tf.constant(densepose_regression), tf.constant(densepose_regression), - ] + ], + cnma.TRACK_REID: [ + tf.constant(track_reid_embedding), + tf.constant(track_reid_embedding), + ], + cnma.TEMPORAL_OFFSET: [ + tf.constant(temporal_offsets), + tf.constant(temporal_offsets), + ], } return prediction_dict -def get_fake_groundtruth_dict(input_height, input_width, stride): +def get_fake_groundtruth_dict(input_height, + input_width, + stride, + has_depth=False): """Prepares the fake groundtruth dictionary.""" # A small box with center at (0.55, 0.55). boxes = [ @@ -1609,6 +2613,26 @@ def get_fake_groundtruth_dict(input_height, input_width, stride): axis=2), multiples=[1, 1, 2]), ] + if has_depth: + keypoint_depths = [ + tf.constant([[float('nan'), 3.0, + float('nan'), 3.0, 0.55, 0.0]]), + tf.constant([[float('nan'), 0.55, + float('nan'), 0.55, 0.55, 0.0]]) + ] + keypoint_depth_weights = [ + tf.constant([[1.0, 1.0, 1.0, 1.0, 0.0, 0.0]]), + tf.constant([[1.0, 1.0, 1.0, 1.0, 0.0, 0.0]]) + ] + else: + keypoint_depths = [ + tf.constant([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]), + tf.constant([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) + ] + keypoint_depth_weights = [ + tf.constant([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]), + tf.constant([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]) + ] labeled_classes = [ tf.one_hot([1], depth=_NUM_CLASSES) + tf.one_hot([2], depth=_NUM_CLASSES), tf.one_hot([0], depth=_NUM_CLASSES) + tf.one_hot([1], depth=_NUM_CLASSES), @@ -1633,23 +2657,39 @@ def get_fake_groundtruth_dict(input_height, input_width, stride): tf.constant(densepose_surface_coords_np), tf.zeros_like(densepose_surface_coords_np) ] + track_ids = [ + tf.constant([2], dtype=tf.int32), + tf.constant([1], dtype=tf.int32), + ] + temporal_offsets = [ + tf.constant([[5.0, 5.0]], dtype=tf.float32), + tf.constant([[2.0, 3.0]], dtype=tf.float32), + ] + track_match_flags = [ + tf.constant([1.0], dtype=tf.float32), + tf.constant([1.0], dtype=tf.float32), + ] groundtruth_dict = { fields.BoxListFields.boxes: boxes, fields.BoxListFields.weights: weights, fields.BoxListFields.classes: classes, fields.BoxListFields.keypoints: keypoints, + fields.BoxListFields.keypoint_depths: keypoint_depths, + fields.BoxListFields.keypoint_depth_weights: keypoint_depth_weights, fields.BoxListFields.masks: masks, fields.BoxListFields.densepose_num_points: densepose_num_points, fields.BoxListFields.densepose_part_ids: densepose_part_ids, - fields.BoxListFields.densepose_surface_coords: - densepose_surface_coords, + fields.BoxListFields.densepose_surface_coords: densepose_surface_coords, + fields.BoxListFields.track_ids: track_ids, + fields.BoxListFields.temporal_offsets: temporal_offsets, + fields.BoxListFields.track_match_flags: track_match_flags, fields.InputDataFields.groundtruth_labeled_classes: labeled_classes, } return groundtruth_dict @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') -class CenterNetMetaComputeLossTest(test_case.TestCase): +class CenterNetMetaComputeLossTest(test_case.TestCase, parameterized.TestCase): """Test for CenterNet loss compuation related functions.""" def setUp(self): @@ -1697,17 +2737,16 @@ class CenterNetMetaComputeLossTest(test_case.TestCase): # The prediction and groundtruth are curated to produce very low loss. self.assertGreater(0.01, loss) - default_value = self.model._center_params.use_only_known_classes + default_value = self.model._center_params.use_labeled_classes self.model._center_params = ( - self.model._center_params._replace(use_only_known_classes=True)) + self.model._center_params._replace(use_labeled_classes=True)) loss = self.model._compute_object_center_loss( object_center_predictions=self.prediction_dict[cnma.OBJECT_CENTER], input_height=self.input_height, input_width=self.input_width, per_pixel_weights=self.per_pixel_weights) self.model._center_params = ( - self.model._center_params._replace( - use_only_known_classes=default_value)) + self.model._center_params._replace(use_labeled_classes=default_value)) # The prediction and groundtruth are curated to produce very low loss. self.assertGreater(0.01, loss) @@ -1776,6 +2815,66 @@ class CenterNetMetaComputeLossTest(test_case.TestCase): # The prediction and groundtruth are curated to produce very low loss. self.assertGreater(0.01, loss) + @parameterized.parameters( + {'per_keypoint_depth': False}, + {'per_keypoint_depth': True}, + ) + def test_compute_kp_depth_loss(self, per_keypoint_depth): + prediction_dict = get_fake_prediction_dict( + self.input_height, + self.input_width, + self.stride, + per_keypoint_depth=per_keypoint_depth) + model = build_center_net_meta_arch( + num_classes=1, + per_keypoint_offset=per_keypoint_depth, + predict_depth=True, + per_keypoint_depth=per_keypoint_depth, + peak_radius=1 if per_keypoint_depth else 0) + model._groundtruth_lists = get_fake_groundtruth_dict( + self.input_height, self.input_width, self.stride, has_depth=True) + + def graph_fn(): + loss = model._compute_kp_depth_loss( + input_height=self.input_height, + input_width=self.input_width, + task_name=_TASK_NAME, + depth_predictions=prediction_dict[cnma.get_keypoint_name( + _TASK_NAME, cnma.KEYPOINT_DEPTH)], + localization_loss_fn=self.localization_loss_fn) + return loss + + loss = self.execute(graph_fn, []) + + if per_keypoint_depth: + # The loss is computed on a disk with radius 1 but only the center pixel + # has the accurate prediction. The final loss is (4 * |3-0|) / 5 = 2.4 + self.assertAlmostEqual(2.4, loss, delta=1e-4) + else: + # The prediction and groundtruth are curated to produce very low loss. + self.assertGreater(0.01, loss) + + def test_compute_track_embedding_loss(self): + default_fc = self.model.track_reid_classification_net + # Initialize the kernel to extreme values so that the classification score + # is close to (0, 0, 1) after the softmax layer. + kernel_initializer = tf.constant_initializer( + [[1, 1, 0], [-1000000, -1000000, 1000000]]) + self.model.track_reid_classification_net = tf.keras.layers.Dense( + _NUM_TRACK_IDS, + kernel_initializer=kernel_initializer, + input_shape=(_REID_EMBED_SIZE,)) + + loss = self.model._compute_track_embedding_loss( + input_height=self.input_height, + input_width=self.input_width, + object_reid_predictions=self.prediction_dict[cnma.TRACK_REID]) + + self.model.track_reid_classification_net = default_fc + + # The prediction and groundtruth are curated to produce very low loss. + self.assertGreater(0.01, loss) + @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') class CenterNetMetaArchRestoreTest(test_case.TestCase): @@ -1788,6 +2887,16 @@ class CenterNetMetaArchRestoreTest(test_case.TestCase): self.assertIsInstance(restore_from_objects_map['feature_extractor'], tf.keras.Model) + def test_retore_map_error(self): + """Test that restoring unsupported checkpoint type raises an error.""" + + model = build_center_net_meta_arch(build_resnet=True) + msg = ("Checkpoint type \"detection\" not supported for " + "CenterNetResnetFeatureExtractor. Supported types are " + "['classification', 'fine_tune']") + with self.assertRaisesRegex(ValueError, re.escape(msg)): + model.restore_from_objects('detection') + class DummyFeatureExtractor(cnma.CenterNetFeatureExtractor): @@ -1886,6 +2995,162 @@ class CenterNetFeatureExtractorTest(test_case.TestCase): self.assertAllClose(output[..., 2], 3 * np.ones((2, 32, 32))) +class Dummy1dFeatureExtractor(cnma.CenterNetFeatureExtractor): + """Returns a static tensor.""" + + def __init__(self, tensor, out_stride=1, channel_means=(0., 0., 0.), + channel_stds=(1., 1., 1.), bgr_ordering=False): + """Intializes the feature extractor. + + Args: + tensor: The tensor to return as the processed feature. + out_stride: The out_stride to return if asked. + channel_means: Ignored, but provided for API compatability. + channel_stds: Ignored, but provided for API compatability. + bgr_ordering: Ignored, but provided for API compatability. + """ + + super().__init__( + channel_means=channel_means, channel_stds=channel_stds, + bgr_ordering=bgr_ordering) + self._tensor = tensor + self._out_stride = out_stride + + def call(self, inputs): + return [self._tensor] + + @property + def out_stride(self): + """The stride in the output image of the network.""" + return self._out_stride + + @property + def num_feature_outputs(self): + """Ther number of feature outputs returned by the feature extractor.""" + return 1 + + @property + def supported_sub_model_types(self): + return ['detection'] + + def get_sub_model(self, sub_model_type): + if sub_model_type == 'detection': + return self._network + else: + ValueError('Sub model type "{}" not supported.'.format(sub_model_type)) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class CenterNetMetaArch1dTest(test_case.TestCase, parameterized.TestCase): + + @parameterized.parameters([1, 2]) + def test_outputs_with_correct_shape(self, stride): + # The 1D case reuses code from the 2D cases. These tests only check that + # the output shapes are correct, and relies on other tests for correctness. + batch_size = 2 + height = 1 + width = 32 + channels = 16 + unstrided_inputs = np.random.randn( + batch_size, height, width, channels) + fixed_output_features = np.random.randn( + batch_size, height, width // stride, channels) + max_boxes = 10 + num_classes = 3 + feature_extractor = Dummy1dFeatureExtractor(fixed_output_features, stride) + arch = cnma.CenterNetMetaArch( + is_training=True, + add_summaries=True, + num_classes=num_classes, + feature_extractor=feature_extractor, + image_resizer_fn=None, + object_center_params=cnma.ObjectCenterParams( + classification_loss=losses.PenaltyReducedLogisticFocalLoss(), + object_center_loss_weight=1.0, + max_box_predictions=max_boxes, + ), + object_detection_params=cnma.ObjectDetectionParams( + localization_loss=losses.L1LocalizationLoss(), + scale_loss_weight=1.0, + offset_loss_weight=1.0, + ), + keypoint_params_dict=None, + mask_params=None, + densepose_params=None, + track_params=None, + temporal_offset_params=None, + use_depthwise=False, + compute_heatmap_sparse=False, + non_max_suppression_fn=None, + unit_height_conv=True) + arch.provide_groundtruth( + groundtruth_boxes_list=[ + tf.constant([[0, 0.5, 1.0, 0.75], + [0, 0.1, 1.0, 0.25]], tf.float32), + tf.constant([[0, 0, 1.0, 1.0], + [0, 0, 0.0, 0.0]], tf.float32) + ], + groundtruth_classes_list=[ + tf.constant([[0, 0, 1], + [0, 1, 0]], tf.float32), + tf.constant([[1, 0, 0], + [0, 0, 0]], tf.float32) + ], + groundtruth_weights_list=[ + tf.constant([1.0, 1.0]), + tf.constant([1.0, 0.0])] + ) + + predictions = arch.predict(None, None) # input is hardcoded above. + predictions['preprocessed_inputs'] = tf.constant(unstrided_inputs) + true_shapes = tf.constant([[1, 32, 16], [1, 24, 16]], tf.int32) + postprocess_output = arch.postprocess(predictions, true_shapes) + losses_output = arch.loss(predictions, true_shapes) + + self.assertIn('%s/%s' % (cnma.LOSS_KEY_PREFIX, cnma.OBJECT_CENTER), + losses_output) + self.assertEqual((), losses_output['%s/%s' % ( + cnma.LOSS_KEY_PREFIX, cnma.OBJECT_CENTER)].shape) + self.assertIn('%s/%s' % (cnma.LOSS_KEY_PREFIX, cnma.BOX_SCALE), + losses_output) + self.assertEqual((), losses_output['%s/%s' % ( + cnma.LOSS_KEY_PREFIX, cnma.BOX_SCALE)].shape) + self.assertIn('%s/%s' % (cnma.LOSS_KEY_PREFIX, cnma.BOX_OFFSET), + losses_output) + self.assertEqual((), losses_output['%s/%s' % ( + cnma.LOSS_KEY_PREFIX, cnma.BOX_OFFSET)].shape) + + self.assertIn('detection_scores', postprocess_output) + self.assertEqual(postprocess_output['detection_scores'].shape, + (batch_size, max_boxes)) + self.assertIn('detection_multiclass_scores', postprocess_output) + self.assertEqual(postprocess_output['detection_multiclass_scores'].shape, + (batch_size, max_boxes, num_classes)) + self.assertIn('detection_classes', postprocess_output) + self.assertEqual(postprocess_output['detection_classes'].shape, + (batch_size, max_boxes)) + self.assertIn('num_detections', postprocess_output) + self.assertEqual(postprocess_output['num_detections'].shape, + (batch_size,)) + self.assertIn('detection_boxes', postprocess_output) + self.assertEqual(postprocess_output['detection_boxes'].shape, + (batch_size, max_boxes, 4)) + self.assertIn('detection_boxes_strided', postprocess_output) + self.assertEqual(postprocess_output['detection_boxes_strided'].shape, + (batch_size, max_boxes, 4)) + + self.assertIn(cnma.OBJECT_CENTER, predictions) + self.assertEqual(predictions[cnma.OBJECT_CENTER][0].shape, + (batch_size, height, width // stride, num_classes)) + self.assertIn(cnma.BOX_SCALE, predictions) + self.assertEqual(predictions[cnma.BOX_SCALE][0].shape, + (batch_size, height, width // stride, 2)) + self.assertIn(cnma.BOX_OFFSET, predictions) + self.assertEqual(predictions[cnma.BOX_OFFSET][0].shape, + (batch_size, height, width // stride, 2)) + self.assertIn('preprocessed_inputs', predictions) + + if __name__ == '__main__': tf.enable_v2_behavior() tf.test.main() diff --git a/research/object_detection/meta_architectures/context_rcnn_lib.py b/research/object_detection/meta_architectures/context_rcnn_lib.py index 902a88c77669cd27eb36490d645740041600fcac..30e5f848e7b2d1b62fbfa1fc7fec7b8c5c58783f 100644 --- a/research/object_detection/meta_architectures/context_rcnn_lib.py +++ b/research/object_detection/meta_architectures/context_rcnn_lib.py @@ -67,10 +67,13 @@ def filter_weight_value(weights, values, valid_mask): # Force the invalid weights to be very negative so it won't contribute to # the softmax. - weights += tf.transpose( - tf.cast(tf.math.logical_not(valid_mask), weights.dtype) * - _NEGATIVE_PADDING_VALUE, - perm=[0, 2, 1]) + + very_negative_mask = tf.ones( + weights.shape, dtype=weights.dtype) * _NEGATIVE_PADDING_VALUE + valid_weight_mask = tf.tile(tf.transpose(valid_mask, perm=[0, 2, 1]), + [1, weights.shape[1], 1]) + weights = tf.where(valid_weight_mask, + x=weights, y=very_negative_mask) # Force the invalid values to be 0. values *= tf.cast(valid_mask, values.dtype) @@ -140,8 +143,9 @@ def project_features(features, projection_dimension, is_training, normalize): def attention_block(input_features, context_features, bottleneck_dimension, - output_dimension, attention_temperature, valid_mask, - is_training): + output_dimension, attention_temperature, + keys_values_valid_mask, queries_valid_mask, + is_training, block_name="AttentionBlock"): """Generic attention block. Args: @@ -156,14 +160,18 @@ def attention_block(input_features, context_features, bottleneck_dimension, attention_temperature: A float Tensor. It controls the temperature of the softmax for weights calculation. The formula for calculation as follows: weights = exp(weights / temperature) / sum(exp(weights / temperature)) - valid_mask: A boolean Tensor of shape [batch_size, context_size]. + keys_values_valid_mask: A boolean Tensor of shape + [batch_size, context_size]. + queries_valid_mask: A boolean Tensor of shape + [batch_size, max_num_proposals]. is_training: A boolean Tensor (affecting batch normalization). + block_name: A string to specify names for different attention blocks Returns: A float Tensor of shape [batch_size, input_size, output_dimension]. """ - with tf.variable_scope("AttentionBlock"): + with tf.variable_scope(block_name): queries = project_features( input_features, bottleneck_dimension, is_training, normalize=True) keys = project_features( @@ -171,27 +179,42 @@ def attention_block(input_features, context_features, bottleneck_dimension, values = project_features( context_features, bottleneck_dimension, is_training, normalize=True) - weights = tf.matmul(queries, keys, transpose_b=True) + # masking out any keys which are padding + keys *= tf.cast(keys_values_valid_mask[..., tf.newaxis], keys.dtype) + queries *= tf.cast(queries_valid_mask[..., tf.newaxis], queries.dtype) + + weights = tf.matmul(queries, keys, transpose_b=True) + + weights, values = filter_weight_value(weights, values, + keys_values_valid_mask) - weights, values = filter_weight_value(weights, values, valid_mask) + weights = tf.identity(tf.nn.softmax(weights / attention_temperature), + name=block_name+"AttentionWeights") - weights = tf.nn.softmax(weights / attention_temperature) + features = tf.matmul(weights, values) - features = tf.matmul(weights, values) output_features = project_features( features, output_dimension, is_training, normalize=False) return output_features -def compute_box_context_attention(box_features, context_features, - valid_context_size, bottleneck_dimension, - attention_temperature, is_training): +def _compute_box_context_attention(box_features, num_proposals, + context_features, valid_context_size, + bottleneck_dimension, + attention_temperature, is_training, + max_num_proposals, + use_self_attention=False, + use_long_term_attention=True, + self_attention_in_sequence=False, + num_attention_heads=1, + num_attention_layers=1): """Computes the attention feature from the context given a batch of box. Args: - box_features: A float Tensor of shape [batch_size, max_num_proposals, + box_features: A float Tensor of shape [batch_size * max_num_proposals, height, width, channels]. It is pooled features from first stage proposals. + num_proposals: The number of valid box proposals. context_features: A float Tensor of shape [batch_size, context_size, num_context_features]. valid_context_size: A int32 Tensor of shape [batch_size]. @@ -201,22 +224,78 @@ def compute_box_context_attention(box_features, context_features, softmax for weights calculation. The formula for calculation as follows: weights = exp(weights / temperature) / sum(exp(weights / temperature)) is_training: A boolean Tensor (affecting batch normalization). + max_num_proposals: The number of box proposals for each image. + use_self_attention: Whether to use an attention block across the + first stage predicted box features for the input image. + use_long_term_attention: Whether to use an attention block into the context + features. + self_attention_in_sequence: Whether self-attention and long term attention + should be in sequence or parallel. + num_attention_heads: Number of heads for multi-headed attention. + num_attention_layers: Number of heads for multi-layered attention. Returns: A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels]. """ _, context_size, _ = context_features.shape - valid_mask = compute_valid_mask(valid_context_size, context_size) + context_valid_mask = compute_valid_mask(valid_context_size, context_size) + + total_proposals, height, width, channels = box_features.shape + + batch_size = total_proposals // max_num_proposals + box_features = tf.reshape( + box_features, + [batch_size, + max_num_proposals, + height, + width, + channels]) - channels = box_features.shape[-1] # Average pools over height and width dimension so that the shape of # box_features becomes [batch_size, max_num_proposals, channels]. box_features = tf.reduce_mean(box_features, [2, 3]) - - output_features = attention_block(box_features, context_features, - bottleneck_dimension, channels.value, - attention_temperature, valid_mask, - is_training) + box_valid_mask = compute_valid_mask( + num_proposals, + box_features.shape[1]) + + if use_self_attention: + self_attention_box_features = attention_block( + box_features, box_features, bottleneck_dimension, channels.value, + attention_temperature, keys_values_valid_mask=box_valid_mask, + queries_valid_mask=box_valid_mask, is_training=is_training, + block_name="SelfAttentionBlock") + + if use_long_term_attention: + if use_self_attention and self_attention_in_sequence: + input_features = tf.add(self_attention_box_features, box_features) + input_features = tf.divide(input_features, 2) + else: + input_features = box_features + original_input_features = input_features + for jdx in range(num_attention_layers): + layer_features = tf.zeros_like(input_features) + for idx in range(num_attention_heads): + block_name = "AttentionBlock" + str(idx) + "_AttentionLayer" +str(jdx) + attention_features = attention_block( + input_features, + context_features, + bottleneck_dimension, + channels.value, + attention_temperature, + keys_values_valid_mask=context_valid_mask, + queries_valid_mask=box_valid_mask, + is_training=is_training, + block_name=block_name) + layer_features = tf.add(layer_features, attention_features) + layer_features = tf.divide(layer_features, num_attention_heads) + input_features = tf.add(input_features, layer_features) + output_features = tf.add(input_features, original_input_features) + if not self_attention_in_sequence and use_self_attention: + output_features = tf.add(self_attention_box_features, output_features) + elif use_self_attention: + output_features = self_attention_box_features + else: + output_features = tf.zeros(self_attention_box_features.shape) # Expands the dimension back to match with the original feature map. output_features = output_features[:, :, tf.newaxis, tf.newaxis, :] diff --git a/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py b/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py index a0b3b848d835dcad37f6c75f05b869fbaec4facb..a4c9404f6ee9bbb98dccaf305692ddde185971be 100644 --- a/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py +++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py @@ -50,9 +50,9 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase, filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value( weights, values, valid_mask) expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]], - [[4, _NEGATIVE_PADDING_VALUE + 4], - [4, _NEGATIVE_PADDING_VALUE + 4], - [4, _NEGATIVE_PADDING_VALUE + 4]]]) + [[4, _NEGATIVE_PADDING_VALUE], + [4, _NEGATIVE_PADDING_VALUE], + [4, _NEGATIVE_PADDING_VALUE]]]) expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0]]]) @@ -66,9 +66,9 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase, weights, values, valid_mask) expected_weights = tf.constant( [[[4, 4], [4, 4], [4, 4]], - [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4], - [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4], - [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]]) + [[_NEGATIVE_PADDING_VALUE, _NEGATIVE_PADDING_VALUE], + [_NEGATIVE_PADDING_VALUE, _NEGATIVE_PADDING_VALUE], + [_NEGATIVE_PADDING_VALUE, _NEGATIVE_PADDING_VALUE]]]) expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0]]]) @@ -100,27 +100,67 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase, input_features = tf.ones([2, 3, 4], tf.float32) context_features = tf.ones([2, 2, 3], tf.float32) valid_mask = tf.constant([[True, True], [False, False]], tf.bool) + box_valid_mask = tf.constant([[True, True, True], [False, False, False]], + tf.bool) is_training = False output_features = context_rcnn_lib.attention_block( input_features, context_features, bottleneck_dimension, - output_dimension, attention_temperature, valid_mask, is_training) + output_dimension, attention_temperature, + keys_values_valid_mask=valid_mask, + queries_valid_mask=box_valid_mask, + is_training=is_training) # Makes sure the shape is correct. self.assertAllEqual(output_features.shape, [2, 3, output_dimension]) @parameterized.parameters(True, False) def test_compute_box_context_attention(self, is_training): - box_features = tf.ones([2, 3, 4, 4, 4], tf.float32) + box_features = tf.ones([2 * 3, 4, 4, 4], tf.float32) context_features = tf.ones([2, 5, 6], tf.float32) valid_context_size = tf.constant((2, 3), tf.int32) + num_proposals = tf.constant((2, 3), tf.int32) bottleneck_dimension = 10 attention_temperature = 1 - attention_features = context_rcnn_lib.compute_box_context_attention( - box_features, context_features, valid_context_size, - bottleneck_dimension, attention_temperature, is_training) + attention_features = context_rcnn_lib._compute_box_context_attention( + box_features, num_proposals, context_features, valid_context_size, + bottleneck_dimension, attention_temperature, is_training, + max_num_proposals=3) # Makes sure the shape is correct. self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4]) + @parameterized.parameters(True, False) + def test_compute_box_context_attention_with_self_attention(self, is_training): + box_features = tf.ones([2 * 3, 4, 4, 4], tf.float32) + context_features = tf.ones([2, 5, 6], tf.float32) + valid_context_size = tf.constant((2, 3), tf.int32) + num_proposals = tf.constant((2, 3), tf.int32) + bottleneck_dimension = 10 + attention_temperature = 1 + attention_features = context_rcnn_lib._compute_box_context_attention( + box_features, num_proposals, context_features, valid_context_size, + bottleneck_dimension, attention_temperature, is_training, + max_num_proposals=3, + use_self_attention=True) + # Makes sure the shape is correct. + self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4]) + + @parameterized.parameters(True, False) + def test_compute_box_context_attention_with_layers_and_heads( + self, is_training): + box_features = tf.ones([2 * 3, 4, 4, 4], tf.float32) + context_features = tf.ones([2, 5, 6], tf.float32) + valid_context_size = tf.constant((2, 3), tf.int32) + num_proposals = tf.constant((2, 3), tf.int32) + bottleneck_dimension = 10 + attention_temperature = 1 + attention_features = context_rcnn_lib._compute_box_context_attention( + box_features, num_proposals, context_features, valid_context_size, + bottleneck_dimension, attention_temperature, is_training, + max_num_proposals=3, + num_attention_layers=3, + num_attention_heads=3) + # Makes sure the shape is correct. + self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4]) if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/meta_architectures/context_rcnn_lib_tf2.py b/research/object_detection/meta_architectures/context_rcnn_lib_tf2.py new file mode 100644 index 0000000000000000000000000000000000000000..b795d60a91a0874a8d40dc1e140216a126901b0b --- /dev/null +++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf2.py @@ -0,0 +1,264 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library functions for Context R-CNN.""" +import tensorflow as tf + +from object_detection.core import freezable_batch_norm + +# The negative value used in padding the invalid weights. +_NEGATIVE_PADDING_VALUE = -100000 + + +class ContextProjection(tf.keras.layers.Layer): + """Custom layer to do batch normalization and projection.""" + + def __init__(self, projection_dimension, **kwargs): + self.batch_norm = freezable_batch_norm.FreezableBatchNorm( + epsilon=0.001, + center=True, + scale=True, + momentum=0.97, + trainable=True) + self.projection = tf.keras.layers.Dense(units=projection_dimension, + use_bias=True) + self.projection_dimension = projection_dimension + super(ContextProjection, self).__init__(**kwargs) + + def build(self, input_shape): + self.projection.build(input_shape) + self.batch_norm.build(input_shape[:1] + [self.projection_dimension]) + + def call(self, input_features, is_training=False): + return tf.nn.relu6(self.batch_norm(self.projection(input_features), + is_training)) + + +class AttentionBlock(tf.keras.layers.Layer): + """Custom layer to perform all attention.""" + + def __init__(self, bottleneck_dimension, attention_temperature, + output_dimension=None, is_training=False, + name='AttentionBlock', max_num_proposals=100, + **kwargs): + """Constructs an attention block. + + Args: + bottleneck_dimension: A int32 Tensor representing the bottleneck dimension + for intermediate projections. + attention_temperature: A float Tensor. It controls the temperature of the + softmax for weights calculation. The formula for calculation as follows: + weights = exp(weights / temperature) / sum(exp(weights / temperature)) + output_dimension: A int32 Tensor representing the last dimension of the + output feature. + is_training: A boolean Tensor (affecting batch normalization). + name: A string describing what to name the variables in this block. + max_num_proposals: The number of box proposals for each image + **kwargs: Additional keyword arguments. + """ + + self._key_proj = ContextProjection(bottleneck_dimension) + self._val_proj = ContextProjection(bottleneck_dimension) + self._query_proj = ContextProjection(bottleneck_dimension) + self._feature_proj = None + self._attention_temperature = attention_temperature + self._bottleneck_dimension = bottleneck_dimension + self._is_training = is_training + self._output_dimension = output_dimension + self._max_num_proposals = max_num_proposals + if self._output_dimension: + self._feature_proj = ContextProjection(self._output_dimension) + super(AttentionBlock, self).__init__(name=name, **kwargs) + + def build(self, input_shapes): + """Finishes building the attention block. + + Args: + input_shapes: the shape of the primary input box features. + """ + if not self._feature_proj: + self._output_dimension = input_shapes[-1] + self._feature_proj = ContextProjection(self._output_dimension) + + def call(self, box_features, context_features, valid_context_size, + num_proposals): + """Handles a call by performing attention. + + Args: + box_features: A float Tensor of shape [batch_size * input_size, height, + width, num_input_features]. + context_features: A float Tensor of shape [batch_size, context_size, + num_context_features]. + valid_context_size: A int32 Tensor of shape [batch_size]. + num_proposals: A [batch_size] int32 Tensor specifying the number of valid + proposals per image in the batch. + + Returns: + A float Tensor with shape [batch_size, input_size, num_input_features] + containing output features after attention with context features. + """ + + _, context_size, _ = context_features.shape + keys_values_valid_mask = compute_valid_mask( + valid_context_size, context_size) + + total_proposals, height, width, channels = box_features.shape + batch_size = total_proposals // self._max_num_proposals + box_features = tf.reshape( + box_features, + [batch_size, + self._max_num_proposals, + height, + width, + channels]) + + # Average pools over height and width dimension so that the shape of + # box_features becomes [batch_size, max_num_proposals, channels]. + box_features = tf.reduce_mean(box_features, [2, 3]) + + queries_valid_mask = compute_valid_mask(num_proposals, + box_features.shape[1]) + + queries = project_features( + box_features, self._bottleneck_dimension, self._is_training, + self._query_proj, normalize=True) + keys = project_features( + context_features, self._bottleneck_dimension, self._is_training, + self._key_proj, normalize=True) + values = project_features( + context_features, self._bottleneck_dimension, self._is_training, + self._val_proj, normalize=True) + + # masking out any keys which are padding + keys *= tf.cast(keys_values_valid_mask[..., tf.newaxis], keys.dtype) + queries *= tf.cast(queries_valid_mask[..., tf.newaxis], queries.dtype) + + weights = tf.matmul(queries, keys, transpose_b=True) + weights, values = filter_weight_value(weights, values, + keys_values_valid_mask) + weights = tf.nn.softmax(weights / self._attention_temperature) + + features = tf.matmul(weights, values) + output_features = project_features( + features, self._output_dimension, self._is_training, + self._feature_proj, normalize=False) + + output_features = output_features[:, :, tf.newaxis, tf.newaxis, :] + + return output_features + + +def filter_weight_value(weights, values, valid_mask): + """Filters weights and values based on valid_mask. + + _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to + avoid their contribution in softmax. 0 will be set for the invalid elements in + the values. + + Args: + weights: A float Tensor of shape [batch_size, input_size, context_size]. + values: A float Tensor of shape [batch_size, context_size, + projected_dimension]. + valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means + valid and False means invalid. + + Returns: + weights: A float Tensor of shape [batch_size, input_size, context_size]. + values: A float Tensor of shape [batch_size, context_size, + projected_dimension]. + + Raises: + ValueError: If shape of doesn't match. + """ + w_batch_size, _, w_context_size = weights.shape + v_batch_size, v_context_size, _ = values.shape + m_batch_size, m_context_size = valid_mask.shape + if w_batch_size != v_batch_size or v_batch_size != m_batch_size: + raise ValueError('Please make sure the first dimension of the input' + ' tensors are the same.') + + if w_context_size != v_context_size: + raise ValueError('Please make sure the third dimension of weights matches' + ' the second dimension of values.') + + if w_context_size != m_context_size: + raise ValueError('Please make sure the third dimension of the weights' + ' matches the second dimension of the valid_mask.') + + valid_mask = valid_mask[..., tf.newaxis] + + # Force the invalid weights to be very negative so it won't contribute to + # the softmax. + weights += tf.transpose( + tf.cast(tf.math.logical_not(valid_mask), weights.dtype) * + _NEGATIVE_PADDING_VALUE, + perm=[0, 2, 1]) + + # Force the invalid values to be 0. + values *= tf.cast(valid_mask, values.dtype) + + return weights, values + + +def project_features(features, bottleneck_dimension, is_training, + layer, normalize=True): + """Projects features to another feature space. + + Args: + features: A float Tensor of shape [batch_size, features_size, + num_features]. + bottleneck_dimension: A int32 Tensor. + is_training: A boolean Tensor (affecting batch normalization). + layer: Contains a custom layer specific to the particular operation + being performed (key, value, query, features) + normalize: A boolean Tensor. If true, the output features will be l2 + normalized on the last dimension. + + Returns: + A float Tensor of shape [batch, features_size, projection_dimension]. + """ + shape_arr = features.shape + batch_size, _, num_features = shape_arr + features = tf.reshape(features, [-1, num_features]) + + projected_features = layer(features, is_training) + + projected_features = tf.reshape(projected_features, + [batch_size, -1, bottleneck_dimension]) + + if normalize: + projected_features = tf.keras.backend.l2_normalize(projected_features, + axis=-1) + + return projected_features + + +def compute_valid_mask(num_valid_elements, num_elements): + """Computes mask of valid entries within padded context feature. + + Args: + num_valid_elements: A int32 Tensor of shape [batch_size]. + num_elements: An int32 Tensor. + + Returns: + A boolean Tensor of the shape [batch_size, num_elements]. True means + valid and False means invalid. + """ + batch_size = num_valid_elements.shape[0] + element_idxs = tf.range(num_elements, dtype=tf.int32) + batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1]) + num_valid_elements = num_valid_elements[..., tf.newaxis] + valid_mask = tf.less(batch_element_idxs, num_valid_elements) + return valid_mask diff --git a/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py b/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..f0e6b0559d7d119fb12232741fd9c9e4d4a1534d --- /dev/null +++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py @@ -0,0 +1,122 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for context_rcnn_lib.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest +from absl.testing import parameterized +import tensorflow.compat.v1 as tf + +from object_detection.meta_architectures import context_rcnn_lib_tf2 as context_rcnn_lib +from object_detection.utils import test_case +from object_detection.utils import tf_version + +_NEGATIVE_PADDING_VALUE = -100000 + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase): + """Tests for the functions in context_rcnn_lib.""" + + def test_compute_valid_mask(self): + num_elements = tf.constant(3, tf.int32) + num_valid_elementss = tf.constant((1, 2), tf.int32) + valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss, + num_elements) + expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32) + self.assertAllEqual(valid_mask, expected_valid_mask) + + def test_filter_weight_value(self): + weights = tf.ones((2, 3, 2), tf.float32) * 4 + values = tf.ones((2, 2, 4), tf.float32) + valid_mask = tf.constant([[True, True], [True, False]], tf.bool) + + filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value( + weights, values, valid_mask) + expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]], + [[4, _NEGATIVE_PADDING_VALUE + 4], + [4, _NEGATIVE_PADDING_VALUE + 4], + [4, _NEGATIVE_PADDING_VALUE + 4]]]) + + expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]], + [[1, 1, 1, 1], [0, 0, 0, 0]]]) + self.assertAllEqual(filtered_weights, expected_weights) + self.assertAllEqual(filtered_values, expected_values) + + # Changes the valid_mask so the results will be different. + valid_mask = tf.constant([[True, True], [False, False]], tf.bool) + + filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value( + weights, values, valid_mask) + expected_weights = tf.constant( + [[[4, 4], [4, 4], [4, 4]], + [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4], + [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4], + [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]]) + + expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]], + [[0, 0, 0, 0], [0, 0, 0, 0]]]) + self.assertAllEqual(filtered_weights, expected_weights) + self.assertAllEqual(filtered_values, expected_values) + + @parameterized.parameters((2, True, True), (2, False, True), + (10, True, False), (10, False, False)) + def test_project_features(self, projection_dimension, is_training, normalize): + features = tf.ones([2, 3, 4], tf.float32) + projected_features = context_rcnn_lib.project_features( + features, + projection_dimension, + is_training, + context_rcnn_lib.ContextProjection(projection_dimension), + normalize=normalize) + + # Makes sure the shape is correct. + self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension]) + + @parameterized.parameters( + (2, 10, 1), + (3, 10, 2), + (4, None, 3), + (5, 20, 4), + (7, None, 5), + ) + def test_attention_block(self, bottleneck_dimension, output_dimension, + attention_temperature): + input_features = tf.ones([2 * 8, 3, 3, 3], tf.float32) + context_features = tf.ones([2, 20, 10], tf.float32) + num_proposals = tf.convert_to_tensor([6, 3]) + attention_block = context_rcnn_lib.AttentionBlock( + bottleneck_dimension, + attention_temperature, + output_dimension=output_dimension, + is_training=False, + max_num_proposals=8) + valid_context_size = tf.random_uniform((2,), + minval=0, + maxval=10, + dtype=tf.int32) + output_features = attention_block(input_features, context_features, + valid_context_size, num_proposals) + + # Makes sure the shape is correct. + self.assertAllEqual(output_features.shape, + [2, 8, 1, 1, (output_dimension or 3)]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py index abe30558b01218df8999b3f0f7698e57f67f8ff2..dc7cad1e47489ea3b238a33648b5d065852c5f5c 100644 --- a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py +++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py @@ -25,9 +25,18 @@ from __future__ import print_function import functools +import tensorflow.compat.v1 as tf + +from object_detection.core import box_predictor from object_detection.core import standard_fields as fields from object_detection.meta_architectures import context_rcnn_lib +from object_detection.meta_architectures import context_rcnn_lib_tf2 from object_detection.meta_architectures import faster_rcnn_meta_arch +from object_detection.protos import faster_rcnn_pb2 +from object_detection.utils import ops +from object_detection.utils import tf_version + +_UNINITIALIZED_FEATURE_EXTRACTOR = '__uninitialized__' class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): @@ -74,8 +83,17 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): freeze_batchnorm=False, return_raw_detections_during_predict=False, output_final_box_features=False, + output_final_box_rpn_features=False, attention_bottleneck_dimension=None, - attention_temperature=None): + attention_temperature=None, + use_self_attention=False, + use_long_term_attention=True, + self_attention_in_sequence=False, + num_attention_heads=1, + num_attention_layers=1, + attention_position=( + faster_rcnn_pb2.AttentionPosition.POST_BOX_CLASSIFIER) + ): """ContextRCNNMetaArch Constructor. Args: @@ -208,11 +226,25 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): boxes in the predict() method. These are decoded boxes that have not been through postprocessing (i.e. NMS). Default False. output_final_box_features: Whether to output final box features. If true, - it crops the feauture map based on the final box prediction and returns - in the dict as detection_features. + it crops the feature map based on the final box prediction and returns + it in the output dict as detection_features. + output_final_box_rpn_features: Whether to output rpn box features. If + true, it crops the rpn feature map based on the final box prediction and + returns it in the output dict as detection_features. attention_bottleneck_dimension: A single integer. The bottleneck feature dimension of the attention block. attention_temperature: A single float. The attention temperature. + use_self_attention: Whether to use self-attention within the box features + in the current frame. + use_long_term_attention: Whether to use attention into the context + features. + self_attention_in_sequence: Whether self attention and long term attention + are in sequence or parallel. + num_attention_heads: The number of attention heads to use. + num_attention_layers: The number of attention layers to use. + attention_position: Whether attention should occur post rpn or post + box classifier. Options are specified in the faster rcnn proto, + default is post box classifier. Raises: ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at @@ -262,13 +294,40 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): freeze_batchnorm=freeze_batchnorm, return_raw_detections_during_predict=( return_raw_detections_during_predict), - output_final_box_features=output_final_box_features) + output_final_box_features=output_final_box_features, + output_final_box_rpn_features=output_final_box_rpn_features) + + self._attention_position = attention_position - self._context_feature_extract_fn = functools.partial( - context_rcnn_lib.compute_box_context_attention, - bottleneck_dimension=attention_bottleneck_dimension, - attention_temperature=attention_temperature, - is_training=is_training) + if tf_version.is_tf1(): + self._context_feature_extract_fn = functools.partial( + context_rcnn_lib._compute_box_context_attention, + bottleneck_dimension=attention_bottleneck_dimension, + attention_temperature=attention_temperature, + is_training=is_training, + max_num_proposals=self.max_num_proposals, + use_self_attention=use_self_attention, + use_long_term_attention=use_long_term_attention, + self_attention_in_sequence=self_attention_in_sequence, + num_attention_heads=num_attention_heads, + num_attention_layers=num_attention_layers) + else: + if use_self_attention: + raise NotImplementedError + if self_attention_in_sequence: + raise NotImplementedError + if not use_long_term_attention: + raise NotImplementedError + if num_attention_heads > 1: + raise NotImplementedError + if num_attention_layers > 1: + raise NotImplementedError + + self._context_feature_extract_fn = context_rcnn_lib_tf2.AttentionBlock( + bottleneck_dimension=attention_bottleneck_dimension, + attention_temperature=attention_temperature, + is_training=is_training, + max_num_proposals=self.max_num_proposals) @staticmethod def get_side_inputs(features): @@ -290,8 +349,8 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): if (fields.InputDataFields.context_features not in features or fields.InputDataFields.valid_context_size not in features): raise ValueError( - "Please make sure context_features and valid_context_size are in the " - "features") + 'Please make sure context_features and valid_context_size are in the ' + 'features') return { fields.InputDataFields.context_features: @@ -300,8 +359,189 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): features[fields.InputDataFields.valid_context_size] } + def _predict_second_stage(self, rpn_box_encodings, + rpn_objectness_predictions_with_background, + rpn_features_to_crop, anchors, image_shape, + true_image_shapes, **side_inputs): + """Predicts the output tensors from second stage of Faster R-CNN. + + Args: + rpn_box_encodings: 3-D float tensor of shape + [batch_size, num_valid_anchors, self._box_coder.code_size] containing + predicted boxes. + rpn_objectness_predictions_with_background: 2-D float tensor of shape + [batch_size, num_valid_anchors, 2] containing class + predictions (logits) for each of the anchors. Note that this + tensor *includes* background class predictions (at class index 0). + rpn_features_to_crop: A list of 4-D float32 or bfloat16 tensor with shape + [batch_size, height_i, width_i, depth] representing image features to + crop using the proposal boxes predicted by the RPN. + anchors: 2-D float tensor of shape + [num_anchors, self._box_coder.code_size]. + image_shape: A 1D int32 tensors of size [4] containing the image shape. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is + of the form [height, width, channels] indicating the shapes + of true images in the resized images, as resized images can be padded + with zeros. + **side_inputs: additional tensors that are required by the network. + + Returns: + prediction_dict: a dictionary holding "raw" prediction tensors: + 1) refined_box_encodings: a 3-D float32 tensor with shape + [total_num_proposals, num_classes, self._box_coder.code_size] + representing predicted (final) refined box encodings, where + total_num_proposals=batch_size*self._max_num_proposals. If using a + shared box across classes the shape will instead be + [total_num_proposals, 1, self._box_coder.code_size]. + 2) class_predictions_with_background: a 3-D float32 tensor with shape + [total_num_proposals, num_classes + 1] containing class + predictions (logits) for each of the anchors, where + total_num_proposals=batch_size*self._max_num_proposals. + Note that this tensor *includes* background class predictions + (at class index 0). + 3) num_proposals: An int32 tensor of shape [batch_size] representing the + number of proposals generated by the RPN. `num_proposals` allows us + to keep track of which entries are to be treated as zero paddings and + which are not since we always pad the number of proposals to be + `self.max_num_proposals` for each image. + 4) proposal_boxes: A float32 tensor of shape + [batch_size, self.max_num_proposals, 4] representing + decoded proposal bounding boxes in absolute coordinates. + 5) proposal_boxes_normalized: A float32 tensor of shape + [batch_size, self.max_num_proposals, 4] representing decoded proposal + bounding boxes in normalized coordinates. Can be used to override the + boxes proposed by the RPN, thus enabling one to extract features and + get box classification and prediction for externally selected areas + of the image. + 6) box_classifier_features: a 4-D float32/bfloat16 tensor + representing the features for each proposal. + If self._return_raw_detections_during_predict is True, the dictionary + will also contain: + 7) raw_detection_boxes: a 4-D float32 tensor with shape + [batch_size, self.max_num_proposals, num_classes, 4] in normalized + coordinates. + 8) raw_detection_feature_map_indices: a 3-D int32 tensor with shape + [batch_size, self.max_num_proposals, num_classes]. + """ + proposal_boxes_normalized, num_proposals = self._proposal_postprocess( + rpn_box_encodings, rpn_objectness_predictions_with_background, anchors, + image_shape, true_image_shapes) + + prediction_dict = self._box_prediction(rpn_features_to_crop, + proposal_boxes_normalized, + image_shape, true_image_shapes, + num_proposals, + **side_inputs) + prediction_dict['num_proposals'] = num_proposals + return prediction_dict + + def _box_prediction(self, rpn_features_to_crop, proposal_boxes_normalized, + image_shape, true_image_shapes, num_proposals, + **side_inputs): + """Predicts the output tensors from second stage of Faster R-CNN. + + Args: + rpn_features_to_crop: A list 4-D float32 or bfloat16 tensor with shape + [batch_size, height_i, width_i, depth] representing image features to + crop using the proposal boxes predicted by the RPN. + proposal_boxes_normalized: A float tensor with shape [batch_size, + max_num_proposals, 4] representing the (potentially zero padded) + proposal boxes for all images in the batch. These boxes are represented + as normalized coordinates. + image_shape: A 1D int32 tensors of size [4] containing the image shape. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is + of the form [height, width, channels] indicating the shapes + of true images in the resized images, as resized images can be padded + with zeros. + num_proposals: The number of valid box proposals. + **side_inputs: additional tensors that are required by the network. + + Returns: + prediction_dict: a dictionary holding "raw" prediction tensors: + 1) refined_box_encodings: a 3-D float32 tensor with shape + [total_num_proposals, num_classes, self._box_coder.code_size] + representing predicted (final) refined box encodings, where + total_num_proposals=batch_size*self._max_num_proposals. If using a + shared box across classes the shape will instead be + [total_num_proposals, 1, self._box_coder.code_size]. + 2) class_predictions_with_background: a 3-D float32 tensor with shape + [total_num_proposals, num_classes + 1] containing class + predictions (logits) for each of the anchors, where + total_num_proposals=batch_size*self._max_num_proposals. + Note that this tensor *includes* background class predictions + (at class index 0). + 3) proposal_boxes: A float32 tensor of shape + [batch_size, self.max_num_proposals, 4] representing + decoded proposal bounding boxes in absolute coordinates. + 4) proposal_boxes_normalized: A float32 tensor of shape + [batch_size, self.max_num_proposals, 4] representing decoded proposal + bounding boxes in normalized coordinates. Can be used to override the + boxes proposed by the RPN, thus enabling one to extract features and + get box classification and prediction for externally selected areas + of the image. + 5) box_classifier_features: a 4-D float32/bfloat16 tensor + representing the features for each proposal. + If self._return_raw_detections_during_predict is True, the dictionary + will also contain: + 6) raw_detection_boxes: a 4-D float32 tensor with shape + [batch_size, self.max_num_proposals, num_classes, 4] in normalized + coordinates. + 7) raw_detection_feature_map_indices: a 3-D int32 tensor with shape + [batch_size, self.max_num_proposals, num_classes]. + 8) final_anchors: a 3-D float tensor of shape [batch_size, + self.max_num_proposals, 4] containing the reference anchors for raw + detection boxes in normalized coordinates. + """ + flattened_proposal_feature_maps = ( + self._compute_second_stage_input_feature_maps( + rpn_features_to_crop, proposal_boxes_normalized, + image_shape, num_proposals, **side_inputs)) + + box_classifier_features = self._extract_box_classifier_features( + flattened_proposal_feature_maps, num_proposals, **side_inputs) + + if self._mask_rcnn_box_predictor.is_keras_model: + box_predictions = self._mask_rcnn_box_predictor( + [box_classifier_features], + prediction_stage=2) + else: + box_predictions = self._mask_rcnn_box_predictor.predict( + [box_classifier_features], + num_predictions_per_location=[1], + scope=self.second_stage_box_predictor_scope, + prediction_stage=2) + + refined_box_encodings = tf.squeeze( + box_predictions[box_predictor.BOX_ENCODINGS], + axis=1, name='all_refined_box_encodings') + class_predictions_with_background = tf.squeeze( + box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], + axis=1, name='all_class_predictions_with_background') + + absolute_proposal_boxes = ops.normalized_to_image_coordinates( + proposal_boxes_normalized, image_shape, self._parallel_iterations) + + prediction_dict = { + 'refined_box_encodings': tf.cast(refined_box_encodings, + dtype=tf.float32), + 'class_predictions_with_background': + tf.cast(class_predictions_with_background, dtype=tf.float32), + 'proposal_boxes': absolute_proposal_boxes, + 'box_classifier_features': box_classifier_features, + 'proposal_boxes_normalized': proposal_boxes_normalized, + 'final_anchors': proposal_boxes_normalized + } + + if self._return_raw_detections_during_predict: + prediction_dict.update(self._raw_detections_and_feature_map_inds( + refined_box_encodings, absolute_proposal_boxes, true_image_shapes)) + + return prediction_dict + def _compute_second_stage_input_feature_maps(self, features_to_crop, proposal_boxes_normalized, + image_shape, + num_proposals, context_features, valid_context_size): """Crops to a set of proposals from the feature map for a batch of images. @@ -316,6 +556,8 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): proposal_boxes_normalized: A float32 Tensor with shape [batch_size, num_proposals, box_code_size] containing proposal boxes in normalized coordinates. + image_shape: A 1D int32 tensors of size [4] containing the image shape. + num_proposals: The number of valid box proposals. context_features: A float Tensor of shape [batch_size, context_size, num_context_features]. valid_context_size: A int32 Tensor of shape [batch_size]. @@ -323,18 +565,60 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): Returns: A float32 Tensor with shape [K, new_height, new_width, depth]. """ + del image_shape box_features = self._crop_and_resize_fn( - features_to_crop, proposal_boxes_normalized, + features_to_crop, proposal_boxes_normalized, None, [self._initial_crop_size, self._initial_crop_size]) - attention_features = self._context_feature_extract_fn( - box_features=box_features, - context_features=context_features, - valid_context_size=valid_context_size) + flattened_box_features = self._flatten_first_two_dimensions(box_features) + + flattened_box_features = self._maxpool_layer(flattened_box_features) + + if self._attention_position == ( + faster_rcnn_pb2.AttentionPosition.POST_RPN): + attention_features = self._context_feature_extract_fn( + box_features=flattened_box_features, + num_proposals=num_proposals, + context_features=context_features, + valid_context_size=valid_context_size) + + # Adds box features with attention features. + flattened_box_features += self._flatten_first_two_dimensions( + attention_features) + + return flattened_box_features + + def _extract_box_classifier_features( + self, flattened_box_features, num_proposals, context_features, + valid_context_size, + attention_position=( + faster_rcnn_pb2.AttentionPosition.POST_BOX_CLASSIFIER)): + if self._feature_extractor_for_box_classifier_features == ( + _UNINITIALIZED_FEATURE_EXTRACTOR): + self._feature_extractor_for_box_classifier_features = ( + self._feature_extractor.get_box_classifier_feature_extractor_model( + name=self.second_stage_feature_extractor_scope)) + + if self._feature_extractor_for_box_classifier_features: + box_classifier_features = ( + self._feature_extractor_for_box_classifier_features( + flattened_box_features)) + else: + box_classifier_features = ( + self._feature_extractor.extract_box_classifier_features( + flattened_box_features, + scope=self.second_stage_feature_extractor_scope)) - # Adds box features with attention features. - box_features += attention_features + if self._attention_position == ( + faster_rcnn_pb2.AttentionPosition.POST_BOX_CLASSIFIER): + attention_features = self._context_feature_extract_fn( + box_features=box_classifier_features, + num_proposals=num_proposals, + context_features=context_features, + valid_context_size=valid_context_size) - flattened_feature_maps = self._flatten_first_two_dimensions(box_features) + # Adds box features with attention features. + box_classifier_features += self._flatten_first_two_dimensions( + attention_features) - return self._maxpool_layer(flattened_feature_maps) + return box_classifier_features diff --git a/research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py b/research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7ee8209c7d4803bb92627eedd341e8217dcd2366 --- /dev/null +++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py @@ -0,0 +1,540 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for object_detection.meta_architectures.context_meta_arch.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import unittest +from unittest import mock # pylint: disable=g-importing-member +from absl.testing import parameterized +import tensorflow.compat.v1 as tf +import tf_slim as slim + +from google.protobuf import text_format + +from object_detection.anchor_generators import grid_anchor_generator +from object_detection.builders import box_predictor_builder +from object_detection.builders import hyperparams_builder +from object_detection.builders import post_processing_builder +from object_detection.core import balanced_positive_negative_sampler as sampler +from object_detection.core import losses +from object_detection.core import post_processing +from object_detection.core import standard_fields as fields +from object_detection.core import target_assigner +from object_detection.meta_architectures import context_rcnn_meta_arch +from object_detection.meta_architectures import faster_rcnn_meta_arch +from object_detection.protos import box_predictor_pb2 +from object_detection.protos import hyperparams_pb2 +from object_detection.protos import post_processing_pb2 +from object_detection.utils import spatial_transform_ops as spatial_ops +from object_detection.utils import test_case +from object_detection.utils import test_utils +from object_detection.utils import tf_version + + +class FakeFasterRCNNFeatureExtractor( + faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): + """Fake feature extractor to use in tests.""" + + def __init__(self): + super(FakeFasterRCNNFeatureExtractor, self).__init__( + is_training=False, + first_stage_features_stride=32, + reuse_weights=None, + weight_decay=0.0) + + def preprocess(self, resized_inputs): + return tf.identity(resized_inputs) + + def _extract_proposal_features(self, preprocessed_inputs, scope): + with tf.variable_scope('mock_model'): + proposal_features = 0 * slim.conv2d( + preprocessed_inputs, num_outputs=3, kernel_size=1, scope='layer1') + return proposal_features, {} + + def _extract_box_classifier_features(self, proposal_feature_maps, scope): + with tf.variable_scope('mock_model'): + return 0 * slim.conv2d( + proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer2') + + +class FakeFasterRCNNKerasFeatureExtractor( + faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor): + """Fake feature extractor to use in tests.""" + + def __init__(self): + super(FakeFasterRCNNKerasFeatureExtractor, self).__init__( + is_training=False, first_stage_features_stride=32, weight_decay=0.0) + + def preprocess(self, resized_inputs): + return tf.identity(resized_inputs) + + def get_proposal_feature_extractor_model(self, name): + + class ProposalFeatureExtractor(tf.keras.Model): + """Dummy proposal feature extraction.""" + + def __init__(self, name): + super(ProposalFeatureExtractor, self).__init__(name=name) + self.conv = None + + def build(self, input_shape): + self.conv = tf.keras.layers.Conv2D( + 3, kernel_size=1, padding='SAME', name='layer1') + + def call(self, inputs): + return self.conv(inputs) + + return ProposalFeatureExtractor(name=name) + + def get_box_classifier_feature_extractor_model(self, name): + return tf.keras.Sequential([ + tf.keras.layers.Conv2D( + 3, kernel_size=1, padding='SAME', name=name + '_layer2') + ]) + + +class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): + + def _get_model(self, box_predictor, **common_kwargs): + return context_rcnn_meta_arch.ContextRCNNMetaArch( + initial_crop_size=3, + maxpool_kernel_size=1, + maxpool_stride=1, + second_stage_mask_rcnn_box_predictor=box_predictor, + attention_bottleneck_dimension=10, + attention_temperature=0.2, + **common_kwargs) + + def _build_arg_scope_with_hyperparams(self, hyperparams_text_proto, + is_training): + hyperparams = hyperparams_pb2.Hyperparams() + text_format.Merge(hyperparams_text_proto, hyperparams) + return hyperparams_builder.build(hyperparams, is_training=is_training) + + def _build_keras_layer_hyperparams(self, hyperparams_text_proto): + hyperparams = hyperparams_pb2.Hyperparams() + text_format.Merge(hyperparams_text_proto, hyperparams) + return hyperparams_builder.KerasLayerHyperparams(hyperparams) + + def _get_second_stage_box_predictor_text_proto(self, + share_box_across_classes=False + ): + share_box_field = 'true' if share_box_across_classes else 'false' + box_predictor_text_proto = """ + mask_rcnn_box_predictor {{ + fc_hyperparams {{ + op: FC + activation: NONE + regularizer {{ + l2_regularizer {{ + weight: 0.0005 + }} + }} + initializer {{ + variance_scaling_initializer {{ + factor: 1.0 + uniform: true + mode: FAN_AVG + }} + }} + }} + share_box_across_classes: {share_box_across_classes} + }} + """.format(share_box_across_classes=share_box_field) + return box_predictor_text_proto + + def _get_box_classifier_features_shape(self, + image_size, + batch_size, + max_num_proposals, + initial_crop_size, + maxpool_stride, + num_features): + return (batch_size * max_num_proposals, + initial_crop_size/maxpool_stride, + initial_crop_size/maxpool_stride, + num_features) + + def _get_second_stage_box_predictor(self, + num_classes, + is_training, + predict_masks, + masks_are_class_agnostic, + share_box_across_classes=False, + use_keras=False): + box_predictor_proto = box_predictor_pb2.BoxPredictor() + text_format.Merge( + self._get_second_stage_box_predictor_text_proto( + share_box_across_classes), box_predictor_proto) + if predict_masks: + text_format.Merge( + self._add_mask_to_second_stage_box_predictor_text_proto( + masks_are_class_agnostic), box_predictor_proto) + + if use_keras: + return box_predictor_builder.build_keras( + hyperparams_builder.KerasLayerHyperparams, + inplace_batchnorm_update=False, + freeze_batchnorm=False, + box_predictor_config=box_predictor_proto, + num_classes=num_classes, + num_predictions_per_location_list=None, + is_training=is_training) + else: + return box_predictor_builder.build( + hyperparams_builder.build, + box_predictor_proto, + num_classes=num_classes, + is_training=is_training) + + def _build_model(self, + is_training, + number_of_stages, + second_stage_batch_size, + first_stage_max_proposals=8, + num_classes=2, + hard_mining=False, + softmax_second_stage_classification_loss=True, + predict_masks=False, + pad_to_max_dimension=None, + masks_are_class_agnostic=False, + use_matmul_crop_and_resize=False, + clip_anchors_to_image=False, + use_matmul_gather_in_matcher=False, + use_static_shapes=False, + calibration_mapping_value=None, + share_box_across_classes=False, + return_raw_detections_during_predict=False): + use_keras = tf_version.is_tf2() + def image_resizer_fn(image, masks=None): + """Fake image resizer function.""" + resized_inputs = [] + resized_image = tf.identity(image) + if pad_to_max_dimension is not None: + resized_image = tf.image.pad_to_bounding_box(image, 0, 0, + pad_to_max_dimension, + pad_to_max_dimension) + resized_inputs.append(resized_image) + if masks is not None: + resized_masks = tf.identity(masks) + if pad_to_max_dimension is not None: + resized_masks = tf.image.pad_to_bounding_box( + tf.transpose(masks, [1, 2, 0]), 0, 0, pad_to_max_dimension, + pad_to_max_dimension) + resized_masks = tf.transpose(resized_masks, [2, 0, 1]) + resized_inputs.append(resized_masks) + resized_inputs.append(tf.shape(image)) + return resized_inputs + + # anchors in this test are designed so that a subset of anchors are inside + # the image and a subset of anchors are outside. + first_stage_anchor_scales = (0.001, 0.005, 0.1) + first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) + first_stage_anchor_strides = (1, 1) + first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( + first_stage_anchor_scales, + first_stage_anchor_aspect_ratios, + anchor_stride=first_stage_anchor_strides) + first_stage_target_assigner = target_assigner.create_target_assigner( + 'FasterRCNN', + 'proposal', + use_matmul_gather=use_matmul_gather_in_matcher) + + if use_keras: + fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor() + else: + fake_feature_extractor = FakeFasterRCNNFeatureExtractor() + + first_stage_box_predictor_hyperparams_text_proto = """ + op: CONV + activation: RELU + regularizer { + l2_regularizer { + weight: 0.00004 + } + } + initializer { + truncated_normal_initializer { + stddev: 0.03 + } + } + """ + if use_keras: + first_stage_box_predictor_arg_scope_fn = ( + self._build_keras_layer_hyperparams( + first_stage_box_predictor_hyperparams_text_proto)) + else: + first_stage_box_predictor_arg_scope_fn = ( + self._build_arg_scope_with_hyperparams( + first_stage_box_predictor_hyperparams_text_proto, is_training)) + + first_stage_box_predictor_kernel_size = 3 + first_stage_atrous_rate = 1 + first_stage_box_predictor_depth = 512 + first_stage_minibatch_size = 3 + first_stage_sampler = sampler.BalancedPositiveNegativeSampler( + positive_fraction=0.5, is_static=use_static_shapes) + + first_stage_nms_score_threshold = -1.0 + first_stage_nms_iou_threshold = 1.0 + first_stage_non_max_suppression_fn = functools.partial( + post_processing.batch_multiclass_non_max_suppression, + score_thresh=first_stage_nms_score_threshold, + iou_thresh=first_stage_nms_iou_threshold, + max_size_per_class=first_stage_max_proposals, + max_total_size=first_stage_max_proposals, + use_static_shapes=use_static_shapes) + + first_stage_localization_loss_weight = 1.0 + first_stage_objectness_loss_weight = 1.0 + + post_processing_config = post_processing_pb2.PostProcessing() + post_processing_text_proto = """ + score_converter: IDENTITY + batch_non_max_suppression { + score_threshold: -20.0 + iou_threshold: 1.0 + max_detections_per_class: 5 + max_total_detections: 5 + use_static_shapes: """ + '{}'.format(use_static_shapes) + """ + } + """ + if calibration_mapping_value: + calibration_text_proto = """ + calibration_config { + function_approximation { + x_y_pairs { + x_y_pair { + x: 0.0 + y: %f + } + x_y_pair { + x: 1.0 + y: %f + }}}}""" % (calibration_mapping_value, calibration_mapping_value) + post_processing_text_proto = ( + post_processing_text_proto + ' ' + calibration_text_proto) + text_format.Merge(post_processing_text_proto, post_processing_config) + second_stage_non_max_suppression_fn, second_stage_score_conversion_fn = ( + post_processing_builder.build(post_processing_config)) + + second_stage_target_assigner = target_assigner.create_target_assigner( + 'FasterRCNN', + 'detection', + use_matmul_gather=use_matmul_gather_in_matcher) + second_stage_sampler = sampler.BalancedPositiveNegativeSampler( + positive_fraction=1.0, is_static=use_static_shapes) + + second_stage_localization_loss_weight = 1.0 + second_stage_classification_loss_weight = 1.0 + if softmax_second_stage_classification_loss: + second_stage_classification_loss = ( + losses.WeightedSoftmaxClassificationLoss()) + else: + second_stage_classification_loss = ( + losses.WeightedSigmoidClassificationLoss()) + + hard_example_miner = None + if hard_mining: + hard_example_miner = losses.HardExampleMiner( + num_hard_examples=1, + iou_threshold=0.99, + loss_type='both', + cls_loss_weight=second_stage_classification_loss_weight, + loc_loss_weight=second_stage_localization_loss_weight, + max_negatives_per_positive=None) + + crop_and_resize_fn = ( + spatial_ops.multilevel_matmul_crop_and_resize + if use_matmul_crop_and_resize + else spatial_ops.multilevel_native_crop_and_resize) + common_kwargs = { + 'is_training': + is_training, + 'num_classes': + num_classes, + 'image_resizer_fn': + image_resizer_fn, + 'feature_extractor': + fake_feature_extractor, + 'number_of_stages': + number_of_stages, + 'first_stage_anchor_generator': + first_stage_anchor_generator, + 'first_stage_target_assigner': + first_stage_target_assigner, + 'first_stage_atrous_rate': + first_stage_atrous_rate, + 'first_stage_box_predictor_arg_scope_fn': + first_stage_box_predictor_arg_scope_fn, + 'first_stage_box_predictor_kernel_size': + first_stage_box_predictor_kernel_size, + 'first_stage_box_predictor_depth': + first_stage_box_predictor_depth, + 'first_stage_minibatch_size': + first_stage_minibatch_size, + 'first_stage_sampler': + first_stage_sampler, + 'first_stage_non_max_suppression_fn': + first_stage_non_max_suppression_fn, + 'first_stage_max_proposals': + first_stage_max_proposals, + 'first_stage_localization_loss_weight': + first_stage_localization_loss_weight, + 'first_stage_objectness_loss_weight': + first_stage_objectness_loss_weight, + 'second_stage_target_assigner': + second_stage_target_assigner, + 'second_stage_batch_size': + second_stage_batch_size, + 'second_stage_sampler': + second_stage_sampler, + 'second_stage_non_max_suppression_fn': + second_stage_non_max_suppression_fn, + 'second_stage_score_conversion_fn': + second_stage_score_conversion_fn, + 'second_stage_localization_loss_weight': + second_stage_localization_loss_weight, + 'second_stage_classification_loss_weight': + second_stage_classification_loss_weight, + 'second_stage_classification_loss': + second_stage_classification_loss, + 'hard_example_miner': + hard_example_miner, + 'crop_and_resize_fn': + crop_and_resize_fn, + 'clip_anchors_to_image': + clip_anchors_to_image, + 'use_static_shapes': + use_static_shapes, + 'resize_masks': + True, + 'return_raw_detections_during_predict': + return_raw_detections_during_predict + } + + return self._get_model( + self._get_second_stage_box_predictor( + num_classes=num_classes, + is_training=is_training, + use_keras=use_keras, + predict_masks=predict_masks, + masks_are_class_agnostic=masks_are_class_agnostic, + share_box_across_classes=share_box_across_classes), **common_kwargs) + + @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') + @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib') + def test_prediction_mock_tf1(self, mock_context_rcnn_lib_v1): + """Mocks the context_rcnn_lib_v1 module to test the prediction. + + Using mock object so that we can ensure _compute_box_context_attention is + called in side the prediction function. + + Args: + mock_context_rcnn_lib_v1: mock module for the context_rcnn_lib_v1. + """ + model = self._build_model( + is_training=False, + number_of_stages=2, + second_stage_batch_size=6, + num_classes=42) + mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32) + + mock_context_rcnn_lib_v1._compute_box_context_attention.return_value = mock_tensor + inputs_shape = (2, 20, 20, 3) + inputs = tf.cast( + tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32), + dtype=tf.float32) + preprocessed_inputs, true_image_shapes = model.preprocess(inputs) + context_features = tf.random_uniform((2, 20, 10), + minval=0, + maxval=255, + dtype=tf.float32) + valid_context_size = tf.random_uniform((2,), + minval=0, + maxval=10, + dtype=tf.int32) + features = { + fields.InputDataFields.context_features: context_features, + fields.InputDataFields.valid_context_size: valid_context_size + } + + side_inputs = model.get_side_inputs(features) + + _ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs) + mock_context_rcnn_lib_v1._compute_box_context_attention.assert_called_once() + + @parameterized.named_parameters( + {'testcase_name': 'static_shapes', 'static_shapes': True}, + {'testcase_name': 'nostatic_shapes', 'static_shapes': False}, + ) + def test_prediction_end_to_end(self, static_shapes): + """Runs prediction end to end and test the shape of the results.""" + with test_utils.GraphContextOrNone() as g: + model = self._build_model( + is_training=False, + number_of_stages=2, + second_stage_batch_size=6, + use_matmul_crop_and_resize=static_shapes, + clip_anchors_to_image=static_shapes, + use_matmul_gather_in_matcher=static_shapes, + use_static_shapes=static_shapes, + num_classes=42) + + def graph_fn(): + inputs_shape = (2, 20, 20, 3) + inputs = tf.cast( + tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32), + dtype=tf.float32) + preprocessed_inputs, true_image_shapes = model.preprocess(inputs) + context_features = tf.random_uniform((2, 20, 10), + minval=0, + maxval=255, + dtype=tf.float32) + valid_context_size = tf.random_uniform((2,), + minval=0, + maxval=10, + dtype=tf.int32) + features = { + fields.InputDataFields.context_features: context_features, + fields.InputDataFields.valid_context_size: valid_context_size + } + + side_inputs = model.get_side_inputs(features) + prediction_dict = model.predict(preprocessed_inputs, true_image_shapes, + **side_inputs) + return (prediction_dict['rpn_box_predictor_features'], + prediction_dict['rpn_box_encodings'], + prediction_dict['refined_box_encodings'], + prediction_dict['proposal_boxes_normalized'], + prediction_dict['proposal_boxes']) + execute_fn = self.execute if static_shapes else self.execute_cpu + (rpn_box_predictor_features, rpn_box_encodings, refined_box_encodings, + proposal_boxes_normalized, proposal_boxes) = execute_fn(graph_fn, [], + graph=g) + self.assertAllEqual(len(rpn_box_predictor_features), 1) + self.assertAllEqual(rpn_box_predictor_features[0].shape, [2, 20, 20, 512]) + self.assertAllEqual(rpn_box_encodings.shape, [2, 3600, 4]) + self.assertAllEqual(refined_box_encodings.shape, [16, 42, 4]) + self.assertAllEqual(proposal_boxes_normalized.shape, [2, 8, 4]) + self.assertAllEqual(proposal_boxes.shape, [2, 8, 4]) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py b/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py deleted file mode 100644 index d80404f45c91252be4a631897fe52976b9109b13..0000000000000000000000000000000000000000 --- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py +++ /dev/null @@ -1,540 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for object_detection.meta_architectures.context_meta_arch.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import unittest -from unittest import mock # pylint: disable=g-importing-member -from absl.testing import parameterized -import tensorflow.compat.v1 as tf -import tf_slim as slim - -from google.protobuf import text_format - -from object_detection.anchor_generators import grid_anchor_generator -from object_detection.builders import box_predictor_builder -from object_detection.builders import hyperparams_builder -from object_detection.builders import post_processing_builder -from object_detection.core import balanced_positive_negative_sampler as sampler -from object_detection.core import losses -from object_detection.core import post_processing -from object_detection.core import standard_fields as fields -from object_detection.core import target_assigner -from object_detection.meta_architectures import context_rcnn_meta_arch -from object_detection.meta_architectures import faster_rcnn_meta_arch -from object_detection.protos import box_predictor_pb2 -from object_detection.protos import hyperparams_pb2 -from object_detection.protos import post_processing_pb2 -from object_detection.utils import ops -from object_detection.utils import test_case -from object_detection.utils import test_utils -from object_detection.utils import tf_version - - -class FakeFasterRCNNFeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): - """Fake feature extractor to use in tests.""" - - def __init__(self): - super(FakeFasterRCNNFeatureExtractor, self).__init__( - is_training=False, - first_stage_features_stride=32, - reuse_weights=None, - weight_decay=0.0) - - def preprocess(self, resized_inputs): - return tf.identity(resized_inputs) - - def _extract_proposal_features(self, preprocessed_inputs, scope): - with tf.variable_scope('mock_model'): - proposal_features = 0 * slim.conv2d( - preprocessed_inputs, num_outputs=3, kernel_size=1, scope='layer1') - return proposal_features, {} - - def _extract_box_classifier_features(self, proposal_feature_maps, scope): - with tf.variable_scope('mock_model'): - return 0 * slim.conv2d( - proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer2') - - -class FakeFasterRCNNKerasFeatureExtractor( - faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor): - """Fake feature extractor to use in tests.""" - - def __init__(self): - super(FakeFasterRCNNKerasFeatureExtractor, self).__init__( - is_training=False, first_stage_features_stride=32, weight_decay=0.0) - - def preprocess(self, resized_inputs): - return tf.identity(resized_inputs) - - def get_proposal_feature_extractor_model(self, name): - - class ProposalFeatureExtractor(tf.keras.Model): - """Dummy proposal feature extraction.""" - - def __init__(self, name): - super(ProposalFeatureExtractor, self).__init__(name=name) - self.conv = None - - def build(self, input_shape): - self.conv = tf.keras.layers.Conv2D( - 3, kernel_size=1, padding='SAME', name='layer1') - - def call(self, inputs): - return self.conv(inputs) - - return ProposalFeatureExtractor(name=name) - - def get_box_classifier_feature_extractor_model(self, name): - return tf.keras.Sequential([ - tf.keras.layers.Conv2D( - 3, kernel_size=1, padding='SAME', name=name + '_layer2') - ]) - - -@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.') -class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): - - def _get_model(self, box_predictor, **common_kwargs): - return context_rcnn_meta_arch.ContextRCNNMetaArch( - initial_crop_size=3, - maxpool_kernel_size=1, - maxpool_stride=1, - second_stage_mask_rcnn_box_predictor=box_predictor, - attention_bottleneck_dimension=10, - attention_temperature=0.2, - **common_kwargs) - - def _build_arg_scope_with_hyperparams(self, hyperparams_text_proto, - is_training): - hyperparams = hyperparams_pb2.Hyperparams() - text_format.Merge(hyperparams_text_proto, hyperparams) - return hyperparams_builder.build(hyperparams, is_training=is_training) - - def _build_keras_layer_hyperparams(self, hyperparams_text_proto): - hyperparams = hyperparams_pb2.Hyperparams() - text_format.Merge(hyperparams_text_proto, hyperparams) - return hyperparams_builder.KerasLayerHyperparams(hyperparams) - - def _get_second_stage_box_predictor_text_proto(self, - share_box_across_classes=False - ): - share_box_field = 'true' if share_box_across_classes else 'false' - box_predictor_text_proto = """ - mask_rcnn_box_predictor {{ - fc_hyperparams {{ - op: FC - activation: NONE - regularizer {{ - l2_regularizer {{ - weight: 0.0005 - }} - }} - initializer {{ - variance_scaling_initializer {{ - factor: 1.0 - uniform: true - mode: FAN_AVG - }} - }} - }} - share_box_across_classes: {share_box_across_classes} - }} - """.format(share_box_across_classes=share_box_field) - return box_predictor_text_proto - - def _get_box_classifier_features_shape(self, - image_size, - batch_size, - max_num_proposals, - initial_crop_size, - maxpool_stride, - num_features): - return (batch_size * max_num_proposals, - initial_crop_size/maxpool_stride, - initial_crop_size/maxpool_stride, - num_features) - - def _get_second_stage_box_predictor(self, - num_classes, - is_training, - predict_masks, - masks_are_class_agnostic, - share_box_across_classes=False, - use_keras=False): - box_predictor_proto = box_predictor_pb2.BoxPredictor() - text_format.Merge( - self._get_second_stage_box_predictor_text_proto( - share_box_across_classes), box_predictor_proto) - if predict_masks: - text_format.Merge( - self._add_mask_to_second_stage_box_predictor_text_proto( - masks_are_class_agnostic), box_predictor_proto) - - if use_keras: - return box_predictor_builder.build_keras( - hyperparams_builder.KerasLayerHyperparams, - inplace_batchnorm_update=False, - freeze_batchnorm=False, - box_predictor_config=box_predictor_proto, - num_classes=num_classes, - num_predictions_per_location_list=None, - is_training=is_training) - else: - return box_predictor_builder.build( - hyperparams_builder.build, - box_predictor_proto, - num_classes=num_classes, - is_training=is_training) - - def _build_model(self, - is_training, - number_of_stages, - second_stage_batch_size, - first_stage_max_proposals=8, - num_classes=2, - hard_mining=False, - softmax_second_stage_classification_loss=True, - predict_masks=False, - pad_to_max_dimension=None, - masks_are_class_agnostic=False, - use_matmul_crop_and_resize=False, - clip_anchors_to_image=False, - use_matmul_gather_in_matcher=False, - use_static_shapes=False, - calibration_mapping_value=None, - share_box_across_classes=False, - return_raw_detections_during_predict=False): - use_keras = tf_version.is_tf2() - def image_resizer_fn(image, masks=None): - """Fake image resizer function.""" - resized_inputs = [] - resized_image = tf.identity(image) - if pad_to_max_dimension is not None: - resized_image = tf.image.pad_to_bounding_box(image, 0, 0, - pad_to_max_dimension, - pad_to_max_dimension) - resized_inputs.append(resized_image) - if masks is not None: - resized_masks = tf.identity(masks) - if pad_to_max_dimension is not None: - resized_masks = tf.image.pad_to_bounding_box( - tf.transpose(masks, [1, 2, 0]), 0, 0, pad_to_max_dimension, - pad_to_max_dimension) - resized_masks = tf.transpose(resized_masks, [2, 0, 1]) - resized_inputs.append(resized_masks) - resized_inputs.append(tf.shape(image)) - return resized_inputs - - # anchors in this test are designed so that a subset of anchors are inside - # the image and a subset of anchors are outside. - first_stage_anchor_scales = (0.001, 0.005, 0.1) - first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) - first_stage_anchor_strides = (1, 1) - first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( - first_stage_anchor_scales, - first_stage_anchor_aspect_ratios, - anchor_stride=first_stage_anchor_strides) - first_stage_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', - 'proposal', - use_matmul_gather=use_matmul_gather_in_matcher) - - if use_keras: - fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor() - else: - fake_feature_extractor = FakeFasterRCNNFeatureExtractor() - - first_stage_box_predictor_hyperparams_text_proto = """ - op: CONV - activation: RELU - regularizer { - l2_regularizer { - weight: 0.00004 - } - } - initializer { - truncated_normal_initializer { - stddev: 0.03 - } - } - """ - if use_keras: - first_stage_box_predictor_arg_scope_fn = ( - self._build_keras_layer_hyperparams( - first_stage_box_predictor_hyperparams_text_proto)) - else: - first_stage_box_predictor_arg_scope_fn = ( - self._build_arg_scope_with_hyperparams( - first_stage_box_predictor_hyperparams_text_proto, is_training)) - - first_stage_box_predictor_kernel_size = 3 - first_stage_atrous_rate = 1 - first_stage_box_predictor_depth = 512 - first_stage_minibatch_size = 3 - first_stage_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=0.5, is_static=use_static_shapes) - - first_stage_nms_score_threshold = -1.0 - first_stage_nms_iou_threshold = 1.0 - first_stage_max_proposals = first_stage_max_proposals - first_stage_non_max_suppression_fn = functools.partial( - post_processing.batch_multiclass_non_max_suppression, - score_thresh=first_stage_nms_score_threshold, - iou_thresh=first_stage_nms_iou_threshold, - max_size_per_class=first_stage_max_proposals, - max_total_size=first_stage_max_proposals, - use_static_shapes=use_static_shapes) - - first_stage_localization_loss_weight = 1.0 - first_stage_objectness_loss_weight = 1.0 - - post_processing_config = post_processing_pb2.PostProcessing() - post_processing_text_proto = """ - score_converter: IDENTITY - batch_non_max_suppression { - score_threshold: -20.0 - iou_threshold: 1.0 - max_detections_per_class: 5 - max_total_detections: 5 - use_static_shapes: """ + '{}'.format(use_static_shapes) + """ - } - """ - if calibration_mapping_value: - calibration_text_proto = """ - calibration_config { - function_approximation { - x_y_pairs { - x_y_pair { - x: 0.0 - y: %f - } - x_y_pair { - x: 1.0 - y: %f - }}}}""" % (calibration_mapping_value, calibration_mapping_value) - post_processing_text_proto = ( - post_processing_text_proto + ' ' + calibration_text_proto) - text_format.Merge(post_processing_text_proto, post_processing_config) - second_stage_non_max_suppression_fn, second_stage_score_conversion_fn = ( - post_processing_builder.build(post_processing_config)) - - second_stage_target_assigner = target_assigner.create_target_assigner( - 'FasterRCNN', - 'detection', - use_matmul_gather=use_matmul_gather_in_matcher) - second_stage_sampler = sampler.BalancedPositiveNegativeSampler( - positive_fraction=1.0, is_static=use_static_shapes) - - second_stage_localization_loss_weight = 1.0 - second_stage_classification_loss_weight = 1.0 - if softmax_second_stage_classification_loss: - second_stage_classification_loss = ( - losses.WeightedSoftmaxClassificationLoss()) - else: - second_stage_classification_loss = ( - losses.WeightedSigmoidClassificationLoss()) - - hard_example_miner = None - if hard_mining: - hard_example_miner = losses.HardExampleMiner( - num_hard_examples=1, - iou_threshold=0.99, - loss_type='both', - cls_loss_weight=second_stage_classification_loss_weight, - loc_loss_weight=second_stage_localization_loss_weight, - max_negatives_per_positive=None) - - crop_and_resize_fn = ( - ops.matmul_crop_and_resize - if use_matmul_crop_and_resize else ops.native_crop_and_resize) - common_kwargs = { - 'is_training': - is_training, - 'num_classes': - num_classes, - 'image_resizer_fn': - image_resizer_fn, - 'feature_extractor': - fake_feature_extractor, - 'number_of_stages': - number_of_stages, - 'first_stage_anchor_generator': - first_stage_anchor_generator, - 'first_stage_target_assigner': - first_stage_target_assigner, - 'first_stage_atrous_rate': - first_stage_atrous_rate, - 'first_stage_box_predictor_arg_scope_fn': - first_stage_box_predictor_arg_scope_fn, - 'first_stage_box_predictor_kernel_size': - first_stage_box_predictor_kernel_size, - 'first_stage_box_predictor_depth': - first_stage_box_predictor_depth, - 'first_stage_minibatch_size': - first_stage_minibatch_size, - 'first_stage_sampler': - first_stage_sampler, - 'first_stage_non_max_suppression_fn': - first_stage_non_max_suppression_fn, - 'first_stage_max_proposals': - first_stage_max_proposals, - 'first_stage_localization_loss_weight': - first_stage_localization_loss_weight, - 'first_stage_objectness_loss_weight': - first_stage_objectness_loss_weight, - 'second_stage_target_assigner': - second_stage_target_assigner, - 'second_stage_batch_size': - second_stage_batch_size, - 'second_stage_sampler': - second_stage_sampler, - 'second_stage_non_max_suppression_fn': - second_stage_non_max_suppression_fn, - 'second_stage_score_conversion_fn': - second_stage_score_conversion_fn, - 'second_stage_localization_loss_weight': - second_stage_localization_loss_weight, - 'second_stage_classification_loss_weight': - second_stage_classification_loss_weight, - 'second_stage_classification_loss': - second_stage_classification_loss, - 'hard_example_miner': - hard_example_miner, - 'crop_and_resize_fn': - crop_and_resize_fn, - 'clip_anchors_to_image': - clip_anchors_to_image, - 'use_static_shapes': - use_static_shapes, - 'resize_masks': - True, - 'return_raw_detections_during_predict': - return_raw_detections_during_predict - } - - return self._get_model( - self._get_second_stage_box_predictor( - num_classes=num_classes, - is_training=is_training, - use_keras=use_keras, - predict_masks=predict_masks, - masks_are_class_agnostic=masks_are_class_agnostic, - share_box_across_classes=share_box_across_classes), **common_kwargs) - - @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib') - def test_prediction_mock(self, mock_context_rcnn_lib): - """Mocks the context_rcnn_lib module to test the prediction. - - Using mock object so that we can ensure compute_box_context_attention is - called in side the prediction function. - - Args: - mock_context_rcnn_lib: mock module for the context_rcnn_lib. - """ - model = self._build_model( - is_training=False, - number_of_stages=2, - second_stage_batch_size=6, - num_classes=42) - mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32) - - mock_context_rcnn_lib.compute_box_context_attention.return_value = mock_tensor - inputs_shape = (2, 20, 20, 3) - inputs = tf.cast( - tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32), - dtype=tf.float32) - preprocessed_inputs, true_image_shapes = model.preprocess(inputs) - context_features = tf.random_uniform((2, 20, 10), - minval=0, - maxval=255, - dtype=tf.float32) - valid_context_size = tf.random_uniform((2,), - minval=0, - maxval=10, - dtype=tf.int32) - features = { - fields.InputDataFields.context_features: context_features, - fields.InputDataFields.valid_context_size: valid_context_size - } - - side_inputs = model.get_side_inputs(features) - - _ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs) - mock_context_rcnn_lib.compute_box_context_attention.assert_called_once() - - @parameterized.named_parameters( - {'testcase_name': 'static_shapes', 'static_shapes': True}, - {'testcase_name': 'nostatic_shapes', 'static_shapes': False}, - ) - def test_prediction_end_to_end(self, static_shapes): - """Runs prediction end to end and test the shape of the results.""" - with test_utils.GraphContextOrNone() as g: - model = self._build_model( - is_training=False, - number_of_stages=2, - second_stage_batch_size=6, - use_matmul_crop_and_resize=static_shapes, - clip_anchors_to_image=static_shapes, - use_matmul_gather_in_matcher=static_shapes, - use_static_shapes=static_shapes, - num_classes=42) - - def graph_fn(): - inputs_shape = (2, 20, 20, 3) - inputs = tf.cast( - tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32), - dtype=tf.float32) - preprocessed_inputs, true_image_shapes = model.preprocess(inputs) - context_features = tf.random_uniform((2, 20, 10), - minval=0, - maxval=255, - dtype=tf.float32) - valid_context_size = tf.random_uniform((2,), - minval=0, - maxval=10, - dtype=tf.int32) - features = { - fields.InputDataFields.context_features: context_features, - fields.InputDataFields.valid_context_size: valid_context_size - } - - side_inputs = model.get_side_inputs(features) - - prediction_dict = model.predict(preprocessed_inputs, true_image_shapes, - **side_inputs) - return (prediction_dict['rpn_box_predictor_features'], - prediction_dict['rpn_box_encodings'], - prediction_dict['refined_box_encodings'], - prediction_dict['proposal_boxes_normalized'], - prediction_dict['proposal_boxes']) - execute_fn = self.execute if static_shapes else self.execute_cpu - (rpn_box_predictor_features, rpn_box_encodings, refined_box_encodings, - proposal_boxes_normalized, proposal_boxes) = execute_fn(graph_fn, [], - graph=g) - self.assertAllEqual(rpn_box_predictor_features.shape, [2, 20, 20, 512]) - self.assertAllEqual(rpn_box_encodings.shape, [2, 3600, 4]) - self.assertAllEqual(refined_box_encodings.shape, [16, 42, 4]) - self.assertAllEqual(proposal_boxes_normalized.shape, [2, 8, 4]) - self.assertAllEqual(proposal_boxes.shape, [2, 8, 4]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/object_detection/meta_architectures/deepmac_meta_arch.py b/research/object_detection/meta_architectures/deepmac_meta_arch.py new file mode 100644 index 0000000000000000000000000000000000000000..cf689a0fcec4a9df6fd3ae8bc03021ff8090158a --- /dev/null +++ b/research/object_detection/meta_architectures/deepmac_meta_arch.py @@ -0,0 +1,862 @@ +"""Deep Mask heads above CenterNet (DeepMAC) architecture. + +TODO(vighneshb) Add link to paper when done. +""" + +import collections + +import numpy as np +import tensorflow as tf + +from object_detection.builders import losses_builder +from object_detection.core import box_list +from object_detection.core import box_list_ops +from object_detection.core import losses +from object_detection.core import preprocessor +from object_detection.core import standard_fields as fields +from object_detection.meta_architectures import center_net_meta_arch +from object_detection.models.keras_models import hourglass_network +from object_detection.models.keras_models import resnet_v1 +from object_detection.protos import losses_pb2 +from object_detection.protos import preprocessor_pb2 +from object_detection.utils import shape_utils +from object_detection.utils import spatial_transform_ops + + +INSTANCE_EMBEDDING = 'INSTANCE_EMBEDDING' +PIXEL_EMBEDDING = 'PIXEL_EMBEDDING' +DEEP_MASK_ESTIMATION = 'deep_mask_estimation' +LOSS_KEY_PREFIX = center_net_meta_arch.LOSS_KEY_PREFIX + + +class DeepMACParams( + collections.namedtuple('DeepMACParams', [ + 'classification_loss', 'dim', 'task_loss_weight', 'pixel_embedding_dim', + 'allowed_masked_classes_ids', 'mask_size', 'mask_num_subsamples', + 'use_xy', 'network_type', 'use_instance_embedding', 'num_init_channels', + 'predict_full_resolution_masks', 'postprocess_crop_size', + 'max_roi_jitter_ratio', 'roi_jitter_mode' + ])): + """Class holding the DeepMAC network configutration.""" + + __slots__ = () + + def __new__(cls, classification_loss, dim, task_loss_weight, + pixel_embedding_dim, allowed_masked_classes_ids, mask_size, + mask_num_subsamples, use_xy, network_type, use_instance_embedding, + num_init_channels, predict_full_resolution_masks, + postprocess_crop_size, max_roi_jitter_ratio, + roi_jitter_mode): + return super(DeepMACParams, + cls).__new__(cls, classification_loss, dim, + task_loss_weight, pixel_embedding_dim, + allowed_masked_classes_ids, mask_size, + mask_num_subsamples, use_xy, network_type, + use_instance_embedding, num_init_channels, + predict_full_resolution_masks, + postprocess_crop_size, max_roi_jitter_ratio, + roi_jitter_mode) + + +def subsample_instances(classes, weights, boxes, masks, num_subsamples): + """Randomly subsamples instances to the desired number. + + Args: + classes: [num_instances, num_classes] float tensor of one-hot encoded + classes. + weights: [num_instances] float tensor of weights of each instance. + boxes: [num_instances, 4] tensor of box coordinates. + masks: [num_instances, height, width] tensor of per-instance masks. + num_subsamples: int, the desired number of samples. + + Returns: + classes: [num_subsamples, num_classes] float tensor of classes. + weights: [num_subsamples] float tensor of weights. + boxes: [num_subsamples, 4] float tensor of box coordinates. + masks: [num_subsamples, height, width] float tensor of per-instance masks. + + """ + + if num_subsamples <= -1: + return classes, weights, boxes, masks + + num_instances = tf.reduce_sum(tf.cast(weights > 0.5, tf.int32)) + + if num_instances <= num_subsamples: + return (classes[:num_subsamples], weights[:num_subsamples], + boxes[:num_subsamples], masks[:num_subsamples]) + + else: + random_index = tf.random.uniform([num_subsamples], 0, num_instances, + dtype=tf.int32) + + return (tf.gather(classes, random_index), tf.gather(weights, random_index), + tf.gather(boxes, random_index), tf.gather(masks, random_index)) + + +def _get_deepmac_network_by_type(name, num_init_channels, mask_size=None): + """Get DeepMAC network model given a string type.""" + + if name.startswith('hourglass'): + if name == 'hourglass10': + return hourglass_network.hourglass_10(num_init_channels, + initial_downsample=False) + elif name == 'hourglass20': + return hourglass_network.hourglass_20(num_init_channels, + initial_downsample=False) + elif name == 'hourglass32': + return hourglass_network.hourglass_32(num_init_channels, + initial_downsample=False) + elif name == 'hourglass52': + return hourglass_network.hourglass_52(num_init_channels, + initial_downsample=False) + elif name == 'hourglass100': + return hourglass_network.hourglass_100(num_init_channels, + initial_downsample=False) + elif name == 'hourglass20_uniform_size': + return hourglass_network.hourglass_20_uniform_size(num_init_channels) + + elif name == 'hourglass20_no_shortcut': + return hourglass_network.hourglass_20_no_shortcut(num_init_channels) + + elif name == 'fully_connected': + if not mask_size: + raise ValueError('Mask size must be set.') + return FullyConnectedMaskHead(num_init_channels, mask_size) + + elif name.startswith('resnet'): + return ResNetMaskNetwork(name, num_init_channels) + + raise ValueError('Unknown network type {}'.format(name)) + + +def crop_masks_within_boxes(masks, boxes, output_size): + """Crops masks to lie tightly within the boxes. + + Args: + masks: A [num_instances, height, width] float tensor of masks. + boxes: A [num_instances, 4] sized tensor of normalized bounding boxes. + output_size: The height and width of the output masks. + + Returns: + masks: A [num_instances, output_size, output_size] tensor of masks which + are cropped to be tightly within the gives boxes and resized. + + """ + masks = spatial_transform_ops.matmul_crop_and_resize( + masks[:, :, :, tf.newaxis], boxes[:, tf.newaxis, :], + [output_size, output_size]) + return masks[:, 0, :, :, 0] + + +def resize_instance_masks(masks, shape): + height, width = shape + masks_ex = masks[:, :, :, tf.newaxis] + masks_ex = tf.image.resize(masks_ex, (height, width), + method=tf.image.ResizeMethod.BILINEAR) + masks = masks_ex[:, :, :, 0] + + return masks + + +def filter_masked_classes(masked_class_ids, classes, weights, masks): + """Filter out masks whose class IDs are not present in masked_class_ids. + + Args: + masked_class_ids: A list of class IDs allowed to have masks. These class IDs + are 1-indexed. + classes: A [num_instances, num_classes] float tensor containing the one-hot + encoded classes. + weights: A [num_instances] float tensor containing the weights of each + sample. + masks: A [num_instances, height, width] tensor containing the mask per + instance. + + Returns: + classes_filtered: A [num_instances, num_classes] float tensor containing the + one-hot encoded classes with classes not in masked_class_ids zeroed out. + weights_filtered: A [num_instances] float tensor containing the weights of + each sample with instances whose classes aren't in masked_class_ids + zeroed out. + masks_filtered: A [num_instances, height, width] tensor containing the mask + per instance with masks not belonging to masked_class_ids zeroed out. + """ + + if len(masked_class_ids) == 0: # pylint:disable=g-explicit-length-test + return classes, weights, masks + + if tf.shape(classes)[0] == 0: + return classes, weights, masks + + masked_class_ids = tf.constant(np.array(masked_class_ids, dtype=np.int32)) + label_id_offset = 1 + masked_class_ids -= label_id_offset + class_ids = tf.argmax(classes, axis=1, output_type=tf.int32) + matched_classes = tf.equal( + class_ids[:, tf.newaxis], masked_class_ids[tf.newaxis, :] + ) + + matched_classes = tf.reduce_any(matched_classes, axis=1) + matched_classes = tf.cast(matched_classes, tf.float32) + + return ( + classes * matched_classes[:, tf.newaxis], + weights * matched_classes, + masks * matched_classes[:, tf.newaxis, tf.newaxis] + ) + + +class ResNetMaskNetwork(tf.keras.layers.Layer): + """A small wrapper around ResNet blocks to predict masks.""" + + def __init__(self, resnet_type, num_init_channels): + """Creates the ResNet mask network. + + Args: + resnet_type: A string of the for resnetN where N where N is in + [4, 8, 12, 16, 20] + num_init_channels: Number of filters in the ResNet block. + """ + + super(ResNetMaskNetwork, self).__init__() + nc = num_init_channels + + if resnet_type == 'resnet4': + channel_dims = [nc * 2] + blocks = [2] + elif resnet_type == 'resnet8': + channel_dims = [nc * 2] + blocks = [4] + elif resnet_type == 'resnet12': + channel_dims = [nc * 2] + blocks = [6] + elif resnet_type == 'resnet16': + channel_dims = [nc * 2] + blocks = [8] + # Defined such that the channels are roughly similar to the hourglass20. + elif resnet_type == 'resnet20': + channel_dims = [nc * 2, nc * 3] + blocks = [8, 2] + else: + raise ValueError('Unknown resnet type "{}"'.format(resnet_type)) + + self.input_layer = tf.keras.layers.Conv2D(nc, 1, 1) + + # Last channel has to be defined so that batch norm can initialize properly. + model_input = tf.keras.layers.Input([None, None, nc]) + output = model_input + + for i, (num_blocks, channels) in enumerate(zip(blocks, channel_dims)): + output = resnet_v1.stack_basic(output, filters=channels, + blocks=num_blocks, stride1=1, + name='resnet_mask_block_%d' % i) + self.model = tf.keras.Model(inputs=model_input, outputs=output) + + def __call__(self, inputs): + return self.model(self.input_layer(inputs)) + + +class FullyConnectedMaskHead(tf.keras.layers.Layer): + """A 2 layer fully connected mask head.""" + + def __init__(self, num_init_channels, mask_size): + super(FullyConnectedMaskHead, self).__init__() + self.fc1 = tf.keras.layers.Dense(units=1024, activation='relu') + self.fc2 = tf.keras.layers.Dense(units=mask_size*mask_size) + self.mask_size = mask_size + self.num_input_channels = num_init_channels + self.input_layer = tf.keras.layers.Conv2D(num_init_channels, 1, 1) + model_input = tf.keras.layers.Input( + [mask_size * mask_size * num_init_channels,]) + output = self.fc2(self.fc1(model_input)) + self.model = tf.keras.Model(inputs=model_input, outputs=output) + + def __call__(self, inputs): + inputs = self.input_layer(inputs) + inputs_shape = tf.shape(inputs) + num_instances = inputs_shape[0] + height = inputs_shape[1] + width = inputs_shape[2] + dims = inputs_shape[3] + flattened_inputs = tf.reshape(inputs, + [num_instances, height * width * dims]) + flattened_masks = self.model(flattened_inputs) + return tf.reshape(flattened_masks, + [num_instances, self.mask_size, self.mask_size, 1]) + + +class MaskHeadNetwork(tf.keras.layers.Layer): + """Mask head class for DeepMAC.""" + + def __init__(self, network_type, num_init_channels=64, + use_instance_embedding=True, mask_size=None): + """Initializes the network. + + Args: + network_type: A string denoting the kind of network we want to use + internally. + num_init_channels: int, the number of channels in the first block. The + number of channels in the following blocks depend on the network type + used. + use_instance_embedding: bool, if set, we concatenate the instance + embedding to the input while predicting the mask. + mask_size: int, size of the output mask. Required only with + `fully_connected` mask type. + """ + + super(MaskHeadNetwork, self).__init__() + + self._net = _get_deepmac_network_by_type( + network_type, num_init_channels, mask_size) + self._use_instance_embedding = use_instance_embedding + + self.project_out = tf.keras.layers.Conv2D( + filters=1, kernel_size=1, activation=None) + + def __call__(self, instance_embedding, pixel_embedding, training): + """Returns mask logits given object center and spatial embeddings. + + Args: + instance_embedding: A [num_instances, embedding_size] float tensor + representing the center emedding vector of each instance. + pixel_embedding: A [num_instances, height, width, pixel_embedding_size] + float tensor representing the per-pixel spatial embedding for each + instance. + training: boolean flag indicating training or testing mode. + + Returns: + mask: A [num_instances, height, width] float tensor containing the mask + logits for each instance. + """ + + height = tf.shape(pixel_embedding)[1] + width = tf.shape(pixel_embedding)[2] + + instance_embedding = instance_embedding[:, tf.newaxis, tf.newaxis, :] + instance_embedding = tf.tile(instance_embedding, [1, height, width, 1]) + + if self._use_instance_embedding: + inputs = tf.concat([pixel_embedding, instance_embedding], axis=3) + else: + inputs = pixel_embedding + + out = self._net(inputs) + if isinstance(out, list): + out = out[-1] + + if out.shape[-1] > 1: + out = self.project_out(out) + + return tf.squeeze(out, axis=-1) + + +def deepmac_proto_to_params(deepmac_config): + """Convert proto to named tuple.""" + + loss = losses_pb2.Loss() + # Add dummy localization loss to avoid the loss_builder throwing error. + loss.localization_loss.weighted_l2.CopyFrom( + losses_pb2.WeightedL2LocalizationLoss()) + loss.classification_loss.CopyFrom(deepmac_config.classification_loss) + classification_loss, _, _, _, _, _, _ = (losses_builder.build(loss)) + + jitter_mode = preprocessor_pb2.RandomJitterBoxes.JitterMode.Name( + deepmac_config.jitter_mode).lower() + + return DeepMACParams( + dim=deepmac_config.dim, + classification_loss=classification_loss, + task_loss_weight=deepmac_config.task_loss_weight, + pixel_embedding_dim=deepmac_config.pixel_embedding_dim, + allowed_masked_classes_ids=deepmac_config.allowed_masked_classes_ids, + mask_size=deepmac_config.mask_size, + mask_num_subsamples=deepmac_config.mask_num_subsamples, + use_xy=deepmac_config.use_xy, + network_type=deepmac_config.network_type, + use_instance_embedding=deepmac_config.use_instance_embedding, + num_init_channels=deepmac_config.num_init_channels, + predict_full_resolution_masks= + deepmac_config.predict_full_resolution_masks, + postprocess_crop_size=deepmac_config.postprocess_crop_size, + max_roi_jitter_ratio=deepmac_config.max_roi_jitter_ratio, + roi_jitter_mode=jitter_mode + ) + + +class DeepMACMetaArch(center_net_meta_arch.CenterNetMetaArch): + """The experimental CenterNet DeepMAC[1] model. + + [1]: https://arxiv.org/abs/2104.00613 + """ + + def __init__(self, + is_training, + add_summaries, + num_classes, + feature_extractor, + image_resizer_fn, + object_center_params, + object_detection_params, + deepmac_params, + compute_heatmap_sparse=False): + """Constructs the super class with object center & detection params only.""" + + self._deepmac_params = deepmac_params + super(DeepMACMetaArch, self).__init__( + is_training=is_training, add_summaries=add_summaries, + num_classes=num_classes, feature_extractor=feature_extractor, + image_resizer_fn=image_resizer_fn, + object_center_params=object_center_params, + object_detection_params=object_detection_params, + compute_heatmap_sparse=compute_heatmap_sparse) + + def _construct_prediction_heads(self, num_classes, num_feature_outputs, + class_prediction_bias_init): + super_instance = super(DeepMACMetaArch, self) + prediction_heads = super_instance._construct_prediction_heads( # pylint:disable=protected-access + num_classes, num_feature_outputs, class_prediction_bias_init) + + if self._deepmac_params is not None: + prediction_heads[INSTANCE_EMBEDDING] = [ + center_net_meta_arch.make_prediction_net(self._deepmac_params.dim) + for _ in range(num_feature_outputs) + ] + + prediction_heads[PIXEL_EMBEDDING] = [ + center_net_meta_arch.make_prediction_net( + self._deepmac_params.pixel_embedding_dim) + for _ in range(num_feature_outputs) + ] + + self._mask_net = MaskHeadNetwork( + network_type=self._deepmac_params.network_type, + use_instance_embedding=self._deepmac_params.use_instance_embedding, + num_init_channels=self._deepmac_params.num_init_channels) + + return prediction_heads + + def _get_mask_head_input(self, boxes, pixel_embedding): + """Get the input to the mask network, given bounding boxes. + + Args: + boxes: A [num_instances, 4] float tensor containing bounding boxes in + normalized coordinates. + pixel_embedding: A [height, width, embedding_size] float tensor + containing spatial pixel embeddings. + + Returns: + embedding: A [num_instances, mask_height, mask_width, embedding_size + 2] + float tensor containing the inputs to the mask network. For each + bounding box, we concatenate the normalized box coordinates to the + cropped pixel embeddings. If predict_full_resolution_masks is set, + mask_height and mask_width are the same as height and width of + pixel_embedding. If not, mask_height and mask_width are the same as + mask_size. + """ + + num_instances = tf.shape(boxes)[0] + mask_size = self._deepmac_params.mask_size + + if self._deepmac_params.predict_full_resolution_masks: + num_instances = tf.shape(boxes)[0] + pixel_embedding = pixel_embedding[tf.newaxis, :, :, :] + pixel_embeddings_processed = tf.tile(pixel_embedding, + [num_instances, 1, 1, 1]) + else: + # TODO(vighneshb) Explore multilevel_roi_align and align_corners=False. + pixel_embeddings_cropped = spatial_transform_ops.matmul_crop_and_resize( + pixel_embedding[tf.newaxis], boxes[tf.newaxis], + [mask_size, mask_size]) + pixel_embeddings_processed = pixel_embeddings_cropped[0] + + mask_shape = tf.shape(pixel_embeddings_processed) + mask_height, mask_width = mask_shape[1], mask_shape[2] + y_grid, x_grid = tf.meshgrid(tf.linspace(-1.0, 1.0, mask_height), + tf.linspace(-1.0, 1.0, mask_width), + indexing='ij') + coords = tf.stack([y_grid, x_grid], axis=2) + coords = coords[tf.newaxis, :, :, :] + coords = tf.tile(coords, [num_instances, 1, 1, 1]) + + if self._deepmac_params.use_xy: + return tf.concat([coords, pixel_embeddings_processed], axis=3) + else: + return pixel_embeddings_processed + + def _get_instance_embeddings(self, boxes, instance_embedding): + """Return the instance embeddings from bounding box centers. + + Args: + boxes: A [num_instances, 4] float tensor holding bounding boxes. The + coordinates are in normalized input space. + instance_embedding: A [height, width, embedding_size] float tensor + containing the instance embeddings. + + Returns: + instance_embeddings: A [num_instances, embedding_size] shaped float tensor + containing the center embedding for each instance. + """ + blist = box_list.BoxList(boxes) + output_height = tf.shape(instance_embedding)[0] + output_width = tf.shape(instance_embedding)[1] + + blist_output = box_list_ops.to_absolute_coordinates( + blist, output_height, output_width, check_range=False) + (y_center_output, x_center_output, + _, _) = blist_output.get_center_coordinates_and_sizes() + center_coords_output = tf.stack([y_center_output, x_center_output], axis=1) + center_coords_output_int = tf.cast(center_coords_output, tf.int32) + center_latents = tf.gather_nd(instance_embedding, center_coords_output_int) + + return center_latents + + def _get_groundtruth_mask_output(self, boxes, masks): + """Get the expected mask output for each box. + + Args: + boxes: A [num_instances, 4] float tensor containing bounding boxes in + normalized coordinates. + masks: A [num_instances, height, width] float tensor containing binary + ground truth masks. + + Returns: + masks: If predict_full_resolution_masks is set, masks are not resized + and the size of this tensor is [num_instances, input_height, input_width]. + Otherwise, returns a tensor of size [num_instances, mask_size, mask_size]. + """ + mask_size = self._deepmac_params.mask_size + if self._deepmac_params.predict_full_resolution_masks: + return masks + else: + cropped_masks = spatial_transform_ops.matmul_crop_and_resize( + masks[:, :, :, tf.newaxis], boxes[:, tf.newaxis, :], + [mask_size, mask_size]) + cropped_masks = tf.stop_gradient(cropped_masks) + cropped_masks = tf.squeeze(cropped_masks, axis=[1, 4]) + + # TODO(vighneshb) should we discretize masks? + return cropped_masks + + def _resize_logits_like_gt(self, logits, gt): + + height, width = tf.shape(gt)[1], tf.shape(gt)[2] + + return resize_instance_masks(logits, (height, width)) + + def _compute_per_instance_mask_loss( + self, boxes, masks, instance_embedding, pixel_embedding): + """Returns the mask loss per instance. + + Args: + boxes: A [num_instances, 4] float tensor holding bounding boxes. The + coordinates are in normalized input space. + masks: A [num_instances, input_height, input_width] float tensor + containing the instance masks. + instance_embedding: A [output_height, output_width, embedding_size] + float tensor containing the instance embeddings. + pixel_embedding: optional [output_height, output_width, + pixel_embedding_size] float tensor containing the per-pixel embeddings. + + Returns: + mask_loss: A [num_instances] shaped float tensor containing the + mask loss for each instance. + """ + + num_instances = tf.shape(boxes)[0] + + if tf.keras.backend.learning_phase(): + boxes = preprocessor.random_jitter_boxes( + boxes, self._deepmac_params.max_roi_jitter_ratio, + jitter_mode=self._deepmac_params.roi_jitter_mode) + mask_input = self._get_mask_head_input( + boxes, pixel_embedding) + instance_embeddings = self._get_instance_embeddings( + boxes, instance_embedding) + + mask_logits = self._mask_net( + instance_embeddings, mask_input, + training=tf.keras.backend.learning_phase()) + mask_gt = self._get_groundtruth_mask_output(boxes, masks) + mask_logits = self._resize_logits_like_gt(mask_logits, mask_gt) + + mask_logits = tf.reshape(mask_logits, [num_instances, -1, 1]) + mask_gt = tf.reshape(mask_gt, [num_instances, -1, 1]) + loss = self._deepmac_params.classification_loss( + prediction_tensor=mask_logits, + target_tensor=mask_gt, + weights=tf.ones_like(mask_logits)) + + # TODO(vighneshb) Make this configurable via config. + if isinstance(self._deepmac_params.classification_loss, + losses.WeightedDiceClassificationLoss): + return tf.reduce_sum(loss, axis=1) + else: + return tf.reduce_mean(loss, axis=[1, 2]) + + def _compute_instance_masks_loss(self, prediction_dict): + """Computes the mask loss. + + Args: + prediction_dict: dict from predict() method containing + INSTANCE_EMBEDDING and PIXEL_EMBEDDING prediction. + Both of these are lists of tensors, each of size + [batch_size, height, width, embedding_size]. + + Returns: + loss: float, the mask loss as a scalar. + """ + gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes) + gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights) + gt_masks_list = self.groundtruth_lists(fields.BoxListFields.masks) + gt_classes_list = self.groundtruth_lists(fields.BoxListFields.classes) + + allowed_masked_classes_ids = ( + self._deepmac_params.allowed_masked_classes_ids) + + total_loss = 0.0 + + # Iterate over multiple preidctions by backbone (for hourglass length=2) + for instance_pred, pixel_pred in zip( + prediction_dict[INSTANCE_EMBEDDING], + prediction_dict[PIXEL_EMBEDDING]): + # Iterate over samples in batch + # TODO(vighneshb) find out how autograph is handling this. Converting + # to a single op may give speed improvements + for i, (boxes, weights, classes, masks) in enumerate( + zip(gt_boxes_list, gt_weights_list, gt_classes_list, gt_masks_list)): + + _, weights, masks = filter_masked_classes(allowed_masked_classes_ids, + classes, weights, masks) + num_subsample = self._deepmac_params.mask_num_subsamples + _, weights, boxes, masks = subsample_instances( + classes, weights, boxes, masks, num_subsample) + + per_instance_loss = self._compute_per_instance_mask_loss( + boxes, masks, instance_pred[i], pixel_pred[i]) + per_instance_loss *= weights + + num_instances = tf.maximum(tf.reduce_sum(weights), 1.0) + + total_loss += tf.reduce_sum(per_instance_loss) / num_instances + + batch_size = len(gt_boxes_list) + num_predictions = len(prediction_dict[INSTANCE_EMBEDDING]) + + return total_loss / float(batch_size * num_predictions) + + def loss(self, prediction_dict, true_image_shapes, scope=None): + + losses_dict = super(DeepMACMetaArch, self).loss( + prediction_dict, true_image_shapes, scope) + + if self._deepmac_params is not None: + mask_loss = self._compute_instance_masks_loss( + prediction_dict=prediction_dict) + key = LOSS_KEY_PREFIX + '/' + DEEP_MASK_ESTIMATION + losses_dict[key] = ( + self._deepmac_params.task_loss_weight * mask_loss + ) + + return losses_dict + + def postprocess(self, prediction_dict, true_image_shapes, **params): + """Produces boxes given a prediction dict returned by predict(). + + Args: + prediction_dict: a dictionary holding predicted tensors from "predict" + function. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is of + the form [height, width, channels] indicating the shapes of true images + in the resized images, as resized images can be padded with zeros. + **params: Currently ignored. + + Returns: + detections: a dictionary containing the following fields + detection_masks: (Optional) A uint8 tensor of shape [batch, + max_detections, mask_height, mask_width] with masks for each + detection. Background is specified with 0, and foreground is specified + with positive integers (1 for standard instance segmentation mask, and + 1-indexed parts for DensePose task). + And all other fields returned by the super class method. + """ + postprocess_dict = super(DeepMACMetaArch, self).postprocess( + prediction_dict, true_image_shapes, **params) + boxes_strided = postprocess_dict['detection_boxes_strided'] + + if self._deepmac_params is not None: + masks = self._postprocess_masks( + boxes_strided, prediction_dict[INSTANCE_EMBEDDING][-1], + prediction_dict[PIXEL_EMBEDDING][-1]) + postprocess_dict[fields.DetectionResultFields.detection_masks] = masks + + return postprocess_dict + + def _postprocess_masks(self, boxes_output_stride, + instance_embedding, pixel_embedding): + """Postprocess masks with the deep mask network. + + Args: + boxes_output_stride: A [batch_size, num_instances, 4] float tensor + containing the batch of boxes in the absolute output space of the + feature extractor. + instance_embedding: A [batch_size, output_height, output_width, + embedding_size] float tensor containing instance embeddings. + pixel_embedding: A [batch_size, output_height, output_width, + pixel_embedding_size] float tensor containing the per-pixel embedding. + + Returns: + masks: A float tensor of size [batch_size, num_instances, mask_size, + mask_size] containing binary per-box instance masks. + """ + + def process(elems): + boxes, instance_embedding, pixel_embedding = elems + return self._postprocess_sample(boxes, instance_embedding, + pixel_embedding) + + max_instances = self._center_params.max_box_predictions + return tf.map_fn(process, [boxes_output_stride, instance_embedding, + pixel_embedding], + dtype=tf.float32, parallel_iterations=max_instances) + + def _postprocess_sample(self, boxes_output_stride, + instance_embedding, pixel_embedding): + """Post process masks for a single sample. + + Args: + boxes_output_stride: A [num_instances, 4] float tensor containing + bounding boxes in the absolute output space. + instance_embedding: A [output_height, output_width, embedding_size] + float tensor containing instance embeddings. + pixel_embedding: A [batch_size, output_height, output_width, + pixel_embedding_size] float tensor containing the per-pixel embedding. + + Returns: + masks: A float tensor of size [num_instances, mask_height, mask_width] + containing binary per-box instance masks. If + predict_full_resolution_masks is set, the masks will be resized to + postprocess_crop_size. Otherwise, mask_height=mask_width=mask_size + """ + + height, width = (tf.shape(instance_embedding)[0], + tf.shape(instance_embedding)[1]) + height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32) + blist = box_list.BoxList(boxes_output_stride) + blist = box_list_ops.to_normalized_coordinates( + blist, height, width, check_range=False) + boxes = blist.get() + + mask_input = self._get_mask_head_input(boxes, pixel_embedding) + instance_embeddings = self._get_instance_embeddings( + boxes, instance_embedding) + + mask_logits = self._mask_net( + instance_embeddings, mask_input, + training=tf.keras.backend.learning_phase()) + + # TODO(vighneshb) Explore sweeping mask thresholds. + + if self._deepmac_params.predict_full_resolution_masks: + + height, width = tf.shape(mask_logits)[1], tf.shape(mask_logits)[2] + height *= self._stride + width *= self._stride + mask_logits = resize_instance_masks(mask_logits, (height, width)) + mask_logits = crop_masks_within_boxes( + mask_logits, boxes, self._deepmac_params.postprocess_crop_size) + + masks_prob = tf.nn.sigmoid(mask_logits) + + return masks_prob + + def _transform_boxes_to_feature_coordinates(self, provided_boxes, + true_image_shapes, + resized_image_shape, + instance_embedding): + """Transforms normalzied boxes to feature map coordinates. + + Args: + provided_boxes: A [batch, num_instances, 4] float tensor containing + normalized bounding boxes. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is of + the form [height, width, channels] indicating the shapes of true images + in the resized images, as resized images can be padded with zeros. + resized_image_shape: A 4D int32 tensor containing shapes of the + preprocessed inputs (N, H, W, C). + instance_embedding: A [batch, output_height, output_width, embedding_size] + float tensor containing instance embeddings. + + Returns: + A float tensor of size [batch, num_instances, 4] containing boxes whose + coordinates have been transformed to the absolute output space of the + feature extractor. + """ + # Input boxes must be normalized. + shape_utils.assert_box_normalized(provided_boxes) + + # Transform the provided boxes to the absolute output space of the feature + # extractor. + height, width = (tf.shape(instance_embedding)[1], + tf.shape(instance_embedding)[2]) + + resized_image_height = resized_image_shape[1] + resized_image_width = resized_image_shape[2] + + def transform_boxes(elems): + boxes_per_image, true_image_shape = elems + blist = box_list.BoxList(boxes_per_image) + # First transform boxes from image space to resized image space since + # there may have paddings in the resized images. + blist = box_list_ops.scale(blist, + true_image_shape[0] / resized_image_height, + true_image_shape[1] / resized_image_width) + # Then transform boxes from resized image space (normalized) to the + # feature map space (absolute). + blist = box_list_ops.to_absolute_coordinates( + blist, height, width, check_range=False) + return blist.get() + + return tf.map_fn( + transform_boxes, [provided_boxes, true_image_shapes], dtype=tf.float32) + + def predict_masks_from_boxes(self, prediction_dict, true_image_shapes, + provided_boxes, **params): + """Produces masks for the provided boxes. + + Args: + prediction_dict: a dictionary holding predicted tensors from "predict" + function. + true_image_shapes: int32 tensor of shape [batch, 3] where each row is of + the form [height, width, channels] indicating the shapes of true images + in the resized images, as resized images can be padded with zeros. + provided_boxes: float tensor of shape [batch, num_boxes, 4] containing + boxes coordinates (normalized) from which we will produce masks. + **params: Currently ignored. + + Returns: + detections: a dictionary containing the following fields + detection_masks: (Optional) A uint8 tensor of shape [batch, + max_detections, mask_height, mask_width] with masks for each + detection. Background is specified with 0, and foreground is specified + with positive integers (1 for standard instance segmentation mask, and + 1-indexed parts for DensePose task). + And all other fields returned by the super class method. + """ + postprocess_dict = super(DeepMACMetaArch, + self).postprocess(prediction_dict, + true_image_shapes, **params) + + instance_embedding = prediction_dict[INSTANCE_EMBEDDING][-1] + resized_image_shapes = shape_utils.combined_static_and_dynamic_shape( + prediction_dict['preprocessed_inputs']) + boxes_strided = self._transform_boxes_to_feature_coordinates( + provided_boxes, true_image_shapes, resized_image_shapes, + instance_embedding) + + if self._deepmac_params is not None: + masks = self._postprocess_masks( + boxes_strided, instance_embedding, + prediction_dict[PIXEL_EMBEDDING][-1]) + postprocess_dict[fields.DetectionResultFields.detection_masks] = masks + + return postprocess_dict diff --git a/research/object_detection/meta_architectures/deepmac_meta_arch_test.py b/research/object_detection/meta_architectures/deepmac_meta_arch_test.py new file mode 100644 index 0000000000000000000000000000000000000000..3a2dd7447a65129921ebdb0535e6ffcc67fec041 --- /dev/null +++ b/research/object_detection/meta_architectures/deepmac_meta_arch_test.py @@ -0,0 +1,419 @@ +"""Tests for google3.third_party.tensorflow_models.object_detection.meta_architectures.deepmac_meta_arch.""" + +import functools +import unittest + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from object_detection.core import losses +from object_detection.core import preprocessor +from object_detection.meta_architectures import center_net_meta_arch +from object_detection.meta_architectures import deepmac_meta_arch +from object_detection.utils import tf_version + + +class DummyFeatureExtractor(center_net_meta_arch.CenterNetFeatureExtractor): + + def __init__(self, + channel_means, + channel_stds, + bgr_ordering, + num_feature_outputs, + stride): + self._num_feature_outputs = num_feature_outputs + self._stride = stride + super(DummyFeatureExtractor, self).__init__( + channel_means=channel_means, channel_stds=channel_stds, + bgr_ordering=bgr_ordering) + + def predict(self): + pass + + def loss(self): + pass + + def postprocess(self): + pass + + def call(self, inputs): + batch_size, input_height, input_width, _ = inputs.shape + fake_output = tf.ones([ + batch_size, input_height // self._stride, input_width // self._stride, + 64 + ], dtype=tf.float32) + return [fake_output] * self._num_feature_outputs + + @property + def out_stride(self): + return self._stride + + @property + def num_feature_outputs(self): + return self._num_feature_outputs + + +class MockMaskNet(tf.keras.layers.Layer): + + def __call__(self, instance_embedding, pixel_embedding, training): + return tf.zeros_like(pixel_embedding[:, :, :, 0]) + 0.9 + + +def build_meta_arch(predict_full_resolution_masks=False, use_dice_loss=False): + """Builds the DeepMAC meta architecture.""" + + feature_extractor = DummyFeatureExtractor( + channel_means=(1.0, 2.0, 3.0), + channel_stds=(10., 20., 30.), + bgr_ordering=False, + num_feature_outputs=2, + stride=4) + image_resizer_fn = functools.partial( + preprocessor.resize_to_range, + min_dimension=128, + max_dimension=128, + pad_to_max_dimesnion=True) + + object_center_params = center_net_meta_arch.ObjectCenterParams( + classification_loss=losses.WeightedSigmoidClassificationLoss(), + object_center_loss_weight=1.0, + min_box_overlap_iou=1.0, + max_box_predictions=5, + use_labeled_classes=False) + + if use_dice_loss: + classification_loss = losses.WeightedDiceClassificationLoss(False) + else: + classification_loss = losses.WeightedSigmoidClassificationLoss() + + deepmac_params = deepmac_meta_arch.DeepMACParams( + classification_loss=classification_loss, + dim=8, + task_loss_weight=1.0, + pixel_embedding_dim=2, + allowed_masked_classes_ids=[], + mask_size=16, + mask_num_subsamples=-1, + use_xy=True, + network_type='hourglass10', + use_instance_embedding=True, + num_init_channels=8, + predict_full_resolution_masks=predict_full_resolution_masks, + postprocess_crop_size=128, + max_roi_jitter_ratio=0.0, + roi_jitter_mode='random' + ) + + object_detection_params = center_net_meta_arch.ObjectDetectionParams( + localization_loss=losses.L1LocalizationLoss(), + offset_loss_weight=1.0, + scale_loss_weight=0.1 + ) + + return deepmac_meta_arch.DeepMACMetaArch( + is_training=True, + add_summaries=False, + num_classes=6, + feature_extractor=feature_extractor, + object_center_params=object_center_params, + deepmac_params=deepmac_params, + object_detection_params=object_detection_params, + image_resizer_fn=image_resizer_fn) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class DeepMACUtilsTest(tf.test.TestCase): + + def test_subsample_trivial(self): + """Test subsampling masks.""" + + boxes = np.arange(4).reshape(4, 1) * np.ones((4, 4)) + masks = np.arange(4).reshape(4, 1, 1) * np.ones((4, 32, 32)) + weights = np.ones(4) + classes = tf.one_hot(tf.range(4), depth=4) + + result = deepmac_meta_arch.subsample_instances( + classes, weights, boxes, masks, 4) + self.assertAllClose(result[0], classes) + self.assertAllClose(result[1], weights) + self.assertAllClose(result[2], boxes) + self.assertAllClose(result[3], masks) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class DeepMACMetaArchTest(tf.test.TestCase): + + def setUp(self): # pylint:disable=g-missing-super-call + self.model = build_meta_arch() + + def test_mask_network(self): + net = deepmac_meta_arch.MaskHeadNetwork('hourglass10', 8) + + out = net(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True) + self.assertEqual(out.shape, (2, 32, 32)) + + def test_mask_network_hourglass20(self): + net = deepmac_meta_arch.MaskHeadNetwork('hourglass20', 8) + + out = net(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True) + self.assertEqual(out.shape, (2, 32, 32)) + + def test_mask_network_resnet(self): + + net = deepmac_meta_arch.MaskHeadNetwork('resnet4') + + out = net(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True) + self.assertEqual(out.shape, (2, 32, 32)) + + def test_mask_network_resnet_tf_function(self): + + net = deepmac_meta_arch.MaskHeadNetwork('resnet8') + call_func = tf.function(net.__call__) + + out = call_func(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True) + self.assertEqual(out.shape, (2, 32, 32)) + + def test_get_mask_head_input(self): + + boxes = tf.constant([[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]], + dtype=tf.float32) + + pixel_embedding = np.zeros((32, 32, 4), dtype=np.float32) + pixel_embedding[:16, :16] = 1.0 + pixel_embedding[16:, 16:] = 2.0 + pixel_embedding = tf.constant(pixel_embedding) + + mask_inputs = self.model._get_mask_head_input(boxes, pixel_embedding) + self.assertEqual(mask_inputs.shape, (2, 16, 16, 6)) + + y_grid, x_grid = tf.meshgrid(np.linspace(-1.0, 1.0, 16), + np.linspace(-1.0, 1.0, 16), indexing='ij') + for i in range(2): + mask_input = mask_inputs[i] + self.assertAllClose(y_grid, mask_input[:, :, 0]) + self.assertAllClose(x_grid, mask_input[:, :, 1]) + pixel_embedding = mask_input[:, :, 2:] + self.assertAllClose(np.zeros((16, 16, 4)) + i + 1, pixel_embedding) + + def test_get_mask_head_input_no_crop_resize(self): + + model = build_meta_arch(predict_full_resolution_masks=True) + boxes = tf.constant([[0., 0., 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]], + dtype=tf.float32) + + pixel_embedding_np = np.random.randn(32, 32, 4).astype(np.float32) + pixel_embedding = tf.constant(pixel_embedding_np) + + mask_inputs = model._get_mask_head_input(boxes, pixel_embedding) + self.assertEqual(mask_inputs.shape, (2, 32, 32, 6)) + + y_grid, x_grid = tf.meshgrid(np.linspace(-1.0, 1.0, 32), + np.linspace(-1.0, 1.0, 32), indexing='ij') + for i in range(2): + mask_input = mask_inputs[i] + self.assertAllClose(y_grid, mask_input[:, :, 0]) + self.assertAllClose(x_grid, mask_input[:, :, 1]) + pixel_embedding = mask_input[:, :, 2:] + self.assertAllClose(pixel_embedding_np, pixel_embedding) + + def test_get_instance_embeddings(self): + + embeddings = np.zeros((32, 32, 2)) + embeddings[8, 8] = 1.0 + embeddings[24, 16] = 2.0 + embeddings = tf.constant(embeddings) + + boxes = tf.constant([[0., 0., 0.5, 0.5], [0.5, 0.0, 1.0, 1.0]]) + + center_embeddings = self.model._get_instance_embeddings(boxes, embeddings) + + self.assertAllClose(center_embeddings, [[1.0, 1.0], [2.0, 2.0]]) + + def test_get_groundtruth_mask_output(self): + + boxes = tf.constant([[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]], + dtype=tf.float32) + masks = np.zeros((2, 32, 32), dtype=np.float32) + masks[0, :16, :16] = 0.5 + masks[1, 16:, 16:] = 0.1 + masks = self.model._get_groundtruth_mask_output(boxes, masks) + self.assertEqual(masks.shape, (2, 16, 16)) + + self.assertAllClose(masks[0], np.zeros((16, 16)) + 0.5) + self.assertAllClose(masks[1], np.zeros((16, 16)) + 0.1) + + def test_get_groundtruth_mask_output_crop_resize(self): + + model = build_meta_arch(predict_full_resolution_masks=True) + boxes = tf.constant([[0., 0., 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]], + dtype=tf.float32) + masks = tf.ones((2, 32, 32)) + masks = model._get_groundtruth_mask_output(boxes, masks) + self.assertAllClose(masks, np.ones((2, 32, 32))) + + def test_per_instance_loss(self): + + model = build_meta_arch() + model._mask_net = MockMaskNet() + boxes = tf.constant([[0.0, 0.0, 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]]) + masks = np.zeros((2, 32, 32), dtype=np.float32) + masks[0, :16, :16] = 1.0 + masks[1, 16:, 16:] = 1.0 + masks = tf.constant(masks) + + loss = model._compute_per_instance_mask_loss( + boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2))) + self.assertAllClose( + loss, np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9))) + + def test_per_instance_loss_no_crop_resize(self): + + model = build_meta_arch(predict_full_resolution_masks=True) + model._mask_net = MockMaskNet() + boxes = tf.constant([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) + masks = np.ones((2, 128, 128), dtype=np.float32) + masks = tf.constant(masks) + + loss = model._compute_per_instance_mask_loss( + boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2))) + self.assertAllClose( + loss, np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9))) + + def test_per_instance_loss_no_crop_resize_dice(self): + + model = build_meta_arch(predict_full_resolution_masks=True, + use_dice_loss=True) + model._mask_net = MockMaskNet() + boxes = tf.constant([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) + masks = np.ones((2, 128, 128), dtype=np.float32) + masks = tf.constant(masks) + + loss = model._compute_per_instance_mask_loss( + boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2))) + pred = tf.nn.sigmoid(0.9) + expected = (1.0 - ((2.0 * pred) / (1.0 + pred))) + self.assertAllClose(loss, [expected, expected], rtol=1e-3) + + def test_empty_masks(self): + boxes = tf.zeros([0, 4]) + masks = tf.zeros([0, 128, 128]) + + loss = self.model._compute_per_instance_mask_loss( + boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2))) + self.assertEqual(loss.shape, (0,)) + + def test_postprocess(self): + + model = build_meta_arch() + model._mask_net = MockMaskNet() + boxes = np.zeros((2, 3, 4), dtype=np.float32) + boxes[:, :, [0, 2]] = 0.0 + boxes[:, :, [1, 3]] = 8.0 + boxes = tf.constant(boxes) + + masks = model._postprocess_masks( + boxes, tf.zeros((2, 32, 32, 2)), tf.zeros((2, 32, 32, 2))) + prob = tf.nn.sigmoid(0.9).numpy() + self.assertAllClose(masks, prob * np.ones((2, 3, 16, 16))) + + def test_postprocess_no_crop_resize_shape(self): + + model = build_meta_arch(predict_full_resolution_masks=True) + model._mask_net = MockMaskNet() + boxes = np.zeros((2, 3, 4), dtype=np.float32) + boxes[:, :, [0, 2]] = 0.0 + boxes[:, :, [1, 3]] = 8.0 + boxes = tf.constant(boxes) + + masks = model._postprocess_masks( + boxes, tf.zeros((2, 32, 32, 2)), tf.zeros((2, 32, 32, 2))) + prob = tf.nn.sigmoid(0.9).numpy() + self.assertAllClose(masks, prob * np.ones((2, 3, 128, 128))) + + def test_crop_masks_within_boxes(self): + masks = np.zeros((2, 32, 32)) + masks[0, :16, :16] = 1.0 + masks[1, 16:, 16:] = 1.0 + boxes = tf.constant([[0.0, 0.0, 15.0 / 32, 15.0 / 32], + [0.5, 0.5, 1.0, 1]]) + masks = deepmac_meta_arch.crop_masks_within_boxes( + masks, boxes, 128) + masks = (masks.numpy() > 0.0).astype(np.float32) + self.assertAlmostEqual(masks.sum(), 2 * 128 * 128) + + def test_transform_boxes_to_feature_coordinates(self): + batch_size = 2 + model = build_meta_arch() + model._mask_net = MockMaskNet() + boxes = np.zeros((batch_size, 3, 4), dtype=np.float32) + boxes[:, :, [0, 2]] = 0.1 + boxes[:, :, [1, 3]] = 0.5 + boxes = tf.constant(boxes) + true_image_shapes = tf.constant([ + [64, 32, 3], # Image 1 is padded during resizing. + [64, 64, 3], # Image 2 is not padded. + ]) + resized_image_height = 64 + resized_image_width = 64 + resized_image_shape = [ + batch_size, resized_image_height, resized_image_width, 3 + ] + + feature_map_height = 32 + feature_map_width = 32 + instance_embedding = tf.zeros( + (batch_size, feature_map_height, feature_map_width, 2)) + + expected_boxes = np.array([ + [ # Image 1 + # 0.1 * (64 / resized_image_height) * feature_map_height -> 3.2 + # 0.5 * (32 / resized_image_width) * feature_map_width -> 8.0 + [3.2, 8., 3.2, 8.], + [3.2, 8., 3.2, 8.], + [3.2, 8., 3.2, 8.], + ], + [ # Image 2 + # 0.1 * (64 / resized_image_height) * feature_map_height -> 3.2 + # 0.5 * (64 / resized_image_width) * feature_map_width -> 16 + [3.2, 16., 3.2, 16.], + [3.2, 16., 3.2, 16.], + [3.2, 16., 3.2, 16.], + ], + ]) + + box_strided = model._transform_boxes_to_feature_coordinates( + boxes, true_image_shapes, resized_image_shape, instance_embedding) + self.assertAllClose(box_strided, expected_boxes) + + def test_fc_tf_function(self): + + net = deepmac_meta_arch.MaskHeadNetwork('fully_connected', 8, mask_size=32) + call_func = tf.function(net.__call__) + + out = call_func(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 8)), training=True) + self.assertEqual(out.shape, (2, 32, 32)) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class FullyConnectedMaskHeadTest(tf.test.TestCase): + + def test_fc_mask_head(self): + head = deepmac_meta_arch.FullyConnectedMaskHead(512, 16) + inputs = tf.random.uniform([100, 16, 16, 512]) + output = head(inputs) + self.assertAllEqual([100, 16, 16, 1], output.numpy().shape) + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class ResNetMaskHeadTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.parameters(['resnet4', 'resnet8', 'resnet20']) + def test_pass(self, name): + net = deepmac_meta_arch.ResNetMaskNetwork(name, 8) + out = net(tf.zeros((3, 32, 32, 16))) + self.assertEqual(out.shape[:3], (3, 32, 32)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py index 4f944eda6c414f83d4b1bcea1d08fd22b25f7f62..6f88b2df02d568a4da80612d92680e2f5a3ca2c7 100644 --- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py +++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py @@ -99,7 +99,6 @@ import functools import tensorflow.compat.v1 as tf import tf_slim as slim -from object_detection.anchor_generators import grid_anchor_generator from object_detection.builders import box_predictor_builder from object_detection.builders import hyperparams_builder from object_detection.core import box_list @@ -305,7 +304,8 @@ class FasterRCNNMetaArch(model.DetectionModel): resize_masks=True, freeze_batchnorm=False, return_raw_detections_during_predict=False, - output_final_box_features=False): + output_final_box_features=False, + output_final_box_rpn_features=False): """FasterRCNNMetaArch Constructor. Args: @@ -438,8 +438,11 @@ class FasterRCNNMetaArch(model.DetectionModel): boxes in the predict() method. These are decoded boxes that have not been through postprocessing (i.e. NMS). Default False. output_final_box_features: Whether to output final box features. If true, - it crops the feauture map based on the final box prediction and returns - in the dict as detection_features. + it crops the rpn feature map and passes it through box_classifier then + returns in the output dict as `detection_features`. + output_final_box_rpn_features: Whether to output rpn box features. If + true, it crops the rpn feature map and returns in the output dict as + `detection_features`. Raises: ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at @@ -451,11 +454,6 @@ class FasterRCNNMetaArch(model.DetectionModel): # in the future. super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes) - if not isinstance(first_stage_anchor_generator, - grid_anchor_generator.GridAnchorGenerator): - raise ValueError('first_stage_anchor_generator must be of type ' - 'grid_anchor_generator.GridAnchorGenerator.') - self._is_training = is_training self._image_resizer_fn = image_resizer_fn self._resize_masks = resize_masks @@ -492,9 +490,7 @@ class FasterRCNNMetaArch(model.DetectionModel): hyperparams_builder.KerasLayerHyperparams): num_anchors_per_location = ( self._first_stage_anchor_generator.num_anchors_per_location()) - if len(num_anchors_per_location) != 1: - raise ValueError('anchor_generator is expected to generate anchors ' - 'corresponding to a single feature map.') + conv_hyperparams = ( first_stage_box_predictor_arg_scope_fn) self._first_stage_box_predictor_first_conv = ( @@ -533,11 +529,10 @@ class FasterRCNNMetaArch(model.DetectionModel): else: self._first_stage_box_predictor_arg_scope_fn = ( first_stage_box_predictor_arg_scope_fn) - def rpn_box_predictor_feature_extractor(rpn_features_to_crop): + def rpn_box_predictor_feature_extractor(single_rpn_features_to_crop): with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()): - reuse = tf.get_variable_scope().reuse return slim.conv2d( - rpn_features_to_crop, + single_rpn_features_to_crop, self._first_stage_box_predictor_depth, kernel_size=[ self._first_stage_box_predictor_kernel_size, @@ -546,7 +541,7 @@ class FasterRCNNMetaArch(model.DetectionModel): rate=self._first_stage_atrous_rate, activation_fn=tf.nn.relu6, scope='Conv', - reuse=reuse) + reuse=tf.AUTO_REUSE) self._first_stage_box_predictor_first_conv = ( rpn_box_predictor_feature_extractor) self._first_stage_box_predictor = ( @@ -613,6 +608,7 @@ class FasterRCNNMetaArch(model.DetectionModel): self._return_raw_detections_during_predict = ( return_raw_detections_during_predict) self._output_final_box_features = output_final_box_features + self._output_final_box_rpn_features = output_final_box_rpn_features @property def first_stage_feature_extractor_scope(self): @@ -762,10 +758,10 @@ class FasterRCNNMetaArch(model.DetectionModel): Returns: prediction_dict: a dictionary holding "raw" prediction tensors: - 1) rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch_size, height, width, depth] to be used for predicting proposal - boxes and corresponding objectness scores. - 2) rpn_features_to_crop: A 4-D float32 tensor with shape + 1) rpn_box_predictor_features: A list of 4-D float32 tensor with shape + [batch_size, height_i, width_j, depth] to be used for predicting + proposal boxes and corresponding objectness scores. + 2) rpn_features_to_crop: A list of 4-D float32 tensor with shape [batch_size, height, width, depth] representing image features to crop using the proposal boxes predicted by the RPN. 3) image_shape: a 1-D tensor of shape [4] representing the input @@ -830,7 +826,8 @@ class FasterRCNNMetaArch(model.DetectionModel): prediction_dict['rpn_objectness_predictions_with_background'], prediction_dict['rpn_features_to_crop'], prediction_dict['anchors'], prediction_dict['image_shape'], - true_image_shapes, **side_inputs)) + true_image_shapes, + **side_inputs)) if self._number_of_stages == 3: prediction_dict = self._predict_third_stage(prediction_dict, @@ -850,12 +847,12 @@ class FasterRCNNMetaArch(model.DetectionModel): Returns: prediction_dict: a dictionary holding "raw" prediction tensors: - 1) rpn_box_predictor_features: A 4-D float32/bfloat16 tensor with shape - [batch_size, height, width, depth] to be used for predicting proposal - boxes and corresponding objectness scores. - 2) rpn_features_to_crop: A 4-D float32/bfloat16 tensor with shape - [batch_size, height, width, depth] representing image features to crop - using the proposal boxes predicted by the RPN. + 1) rpn_box_predictor_features: A list of 4-D float32/bfloat16 tensor + with shape [batch_size, height_i, width_j, depth] to be used for + predicting proposal boxes and corresponding objectness scores. + 2) rpn_features_to_crop: A list of 4-D float32/bfloat16 tensor with + shape [batch_size, height, width, depth] representing image features + to crop using the proposal boxes predicted by the RPN. 3) image_shape: a 1-D tensor of shape [4] representing the input image shape. 4) rpn_box_encodings: 3-D float32 tensor of shape @@ -911,7 +908,7 @@ class FasterRCNNMetaArch(model.DetectionModel): dtype=tf.float32), 'anchors': anchors_boxlist.data['boxes'], - fields.PredictionFields.feature_maps: [rpn_features_to_crop] + fields.PredictionFields.feature_maps: rpn_features_to_crop } return prediction_dict @@ -947,9 +944,9 @@ class FasterRCNNMetaArch(model.DetectionModel): [batch_size, num_valid_anchors, 2] containing class predictions (logits) for each of the anchors. Note that this tensor *includes* background class predictions (at class index 0). - rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape - [batch_size, height, width, depth] representing image features to crop - using the proposal boxes predicted by the RPN. + rpn_features_to_crop: A list of 4-D float32 or bfloat16 tensor with shape + [batch_size, height_i, width_i, depth] representing image features to + crop using the proposal boxes predicted by the RPN. anchors: 2-D float tensor of shape [num_anchors, self._box_coder.code_size]. image_shape: A 1D int32 tensors of size [4] containing the image shape. @@ -1012,9 +1009,9 @@ class FasterRCNNMetaArch(model.DetectionModel): """Predicts the output tensors from second stage of Faster R-CNN. Args: - rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape - [batch_size, height, width, depth] representing image features to crop - using the proposal boxes predicted by the RPN. + rpn_features_to_crop: A list 4-D float32 or bfloat16 tensor with shape + [batch_size, height_i, width_i, depth] representing image features to + crop using the proposal boxes predicted by the RPN. proposal_boxes_normalized: A float tensor with shape [batch_size, max_num_proposals, 4] representing the (potentially zero padded) proposal boxes for all images in the batch. These boxes are represented @@ -1064,10 +1061,11 @@ class FasterRCNNMetaArch(model.DetectionModel): """ flattened_proposal_feature_maps = ( self._compute_second_stage_input_feature_maps( - rpn_features_to_crop, proposal_boxes_normalized, **side_inputs)) + rpn_features_to_crop, proposal_boxes_normalized, + image_shape, **side_inputs)) box_classifier_features = self._extract_box_classifier_features( - flattened_proposal_feature_maps) + flattened_proposal_feature_maps, **side_inputs) if self._mask_rcnn_box_predictor.is_keras_model: box_predictions = self._mask_rcnn_box_predictor( @@ -1196,6 +1194,8 @@ class FasterRCNNMetaArch(model.DetectionModel): decoded proposal bounding boxes in absolute coordinates. 5) box_classifier_features: a 4-D float32 tensor representing the features for each proposal. + 6) image_shape: a 1-D tensor of shape [4] representing the input + image shape. image_shapes: A 2-D int32 tensors of shape [batch_size, 3] containing shapes of images in the batch. @@ -1234,11 +1234,12 @@ class FasterRCNNMetaArch(model.DetectionModel): detection_classes = detections_dict[ fields.DetectionResultFields.detection_classes] rpn_features_to_crop = prediction_dict['rpn_features_to_crop'] + image_shape = prediction_dict['image_shape'] batch_size = tf.shape(detection_boxes)[0] max_detection = tf.shape(detection_boxes)[1] flattened_detected_feature_maps = ( self._compute_second_stage_input_feature_maps( - rpn_features_to_crop, detection_boxes)) + rpn_features_to_crop, detection_boxes, image_shape)) curr_box_classifier_features = self._extract_box_classifier_features( flattened_detected_feature_maps) @@ -1302,13 +1303,13 @@ class FasterRCNNMetaArch(model.DetectionModel): preprocessed_inputs: a [batch, height, width, channels] image tensor. Returns: - rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch, height, width, depth] to be used for predicting proposal boxes - and corresponding objectness scores. - rpn_features_to_crop: A 4-D float32 tensor with shape + rpn_box_predictor_features: A list of 4-D float32 tensor with shape + [batch, height_i, width_j, depth] to be used for predicting proposal + boxes and corresponding objectness scores. + rpn_features_to_crop: A list of 4-D float32 tensor with shape [batch, height, width, depth] representing image features to crop using the proposals boxes. - anchors: A BoxList representing anchors (for the RPN) in + anchors: A list of BoxList representing anchors (for the RPN) in absolute coordinates. image_shape: A 1-D tensor representing the input image shape. """ @@ -1317,12 +1318,21 @@ class FasterRCNNMetaArch(model.DetectionModel): rpn_features_to_crop, self.endpoints = self._extract_proposal_features( preprocessed_inputs) - feature_map_shape = tf.shape(rpn_features_to_crop) + # Decide if rpn_features_to_crop is a list. If not make it a list + if not isinstance(rpn_features_to_crop, list): + rpn_features_to_crop = [rpn_features_to_crop] + + feature_map_shapes = [] + rpn_box_predictor_features = [] + for single_rpn_features_to_crop in rpn_features_to_crop: + single_shape = tf.shape(single_rpn_features_to_crop) + feature_map_shapes.append((single_shape[1], single_shape[2])) + single_rpn_box_predictor_features = ( + self._first_stage_box_predictor_first_conv( + single_rpn_features_to_crop)) + rpn_box_predictor_features.append(single_rpn_box_predictor_features) anchors = box_list_ops.concatenate( - self._first_stage_anchor_generator.generate([(feature_map_shape[1], - feature_map_shape[2])])) - rpn_box_predictor_features = ( - self._first_stage_box_predictor_first_conv(rpn_features_to_crop)) + self._first_stage_anchor_generator.generate(feature_map_shapes)) return (rpn_box_predictor_features, rpn_features_to_crop, anchors, image_shape) @@ -1349,9 +1359,9 @@ class FasterRCNNMetaArch(model.DetectionModel): Note resulting tensors will not have been postprocessed. Args: - rpn_box_predictor_features: A 4-D float32 tensor with shape - [batch, height, width, depth] to be used for predicting proposal boxes - and corresponding objectness scores. + rpn_box_predictor_features: A list of 4-D float32 tensor with shape + [batch, height_i, width_j, depth] to be used for predicting proposal + boxes and corresponding objectness scores. Returns: box_encodings: 3-D float tensor of shape @@ -1369,15 +1379,13 @@ class FasterRCNNMetaArch(model.DetectionModel): """ num_anchors_per_location = ( self._first_stage_anchor_generator.num_anchors_per_location()) - if len(num_anchors_per_location) != 1: - raise RuntimeError('anchor_generator is expected to generate anchors ' - 'corresponding to a single feature map.') + if self._first_stage_box_predictor.is_keras_model: box_predictions = self._first_stage_box_predictor( - [rpn_box_predictor_features]) + rpn_box_predictor_features) else: box_predictions = self._first_stage_box_predictor.predict( - [rpn_box_predictor_features], + rpn_box_predictor_features, num_anchors_per_location, scope=self.first_stage_box_predictor_scope) @@ -1545,9 +1553,22 @@ class FasterRCNNMetaArch(model.DetectionModel): 'Please make sure rpn_features_to_crop is in the prediction_dict.' ) detections_dict[ - 'detection_features'] = self._add_detection_features_output_node( + 'detection_features'] = ( + self._add_detection_box_boxclassifier_features_output_node( + detections_dict[ + fields.DetectionResultFields.detection_boxes], + prediction_dict['rpn_features_to_crop'], + prediction_dict['image_shape'])) + if self._output_final_box_rpn_features: + if 'rpn_features_to_crop' not in prediction_dict: + raise ValueError( + 'Please make sure rpn_features_to_crop is in the prediction_dict.' + ) + detections_dict['cropped_rpn_box_features'] = ( + self._add_detection_box_rpn_features_output_node( detections_dict[fields.DetectionResultFields.detection_boxes], - prediction_dict['rpn_features_to_crop']) + prediction_dict['rpn_features_to_crop'], + prediction_dict['image_shape'])) return detections_dict @@ -1563,8 +1584,8 @@ class FasterRCNNMetaArch(model.DetectionModel): prediction_dict.pop(k) return prediction_dict - def _add_detection_features_output_node(self, detection_boxes, - rpn_features_to_crop): + def _add_detection_box_boxclassifier_features_output_node( + self, detection_boxes, rpn_features_to_crop, image_shape): """Add detection features to outputs. This function extracts box features for each box in rpn_features_to_crop. @@ -1576,9 +1597,10 @@ class FasterRCNNMetaArch(model.DetectionModel): Args: detection_boxes: a 3-D float32 tensor of shape [batch_size, max_detections, 4] which represents the bounding boxes. - rpn_features_to_crop: A 4-D float32 tensor with shape + rpn_features_to_crop: A list of 4-D float32 tensor with shape [batch, height, width, depth] representing image features to crop using the proposals boxes. + image_shape: a 1-D tensor of shape [4] representing the image shape. Returns: detection_features: a 4-D float32 tensor of shape @@ -1588,7 +1610,7 @@ class FasterRCNNMetaArch(model.DetectionModel): with tf.name_scope('SecondStageDetectionFeaturesExtract'): flattened_detected_feature_maps = ( self._compute_second_stage_input_feature_maps( - rpn_features_to_crop, detection_boxes)) + rpn_features_to_crop, detection_boxes, image_shape)) detection_features_unpooled = self._extract_box_classifier_features( flattened_detected_feature_maps) @@ -1602,6 +1624,8 @@ class FasterRCNNMetaArch(model.DetectionModel): reshaped_detection_features_pool = tf.identity( reshaped_detection_features_pool, 'pooled_detection_features') + # TODO(sbeery) add node to extract rpn features here!! + reshaped_detection_features = tf.reshape( detection_features_unpooled, [batch_size, max_detections, @@ -1611,6 +1635,44 @@ class FasterRCNNMetaArch(model.DetectionModel): return reshaped_detection_features + def _add_detection_box_rpn_features_output_node(self, detection_boxes, + rpn_features_to_crop, + image_shape): + """Add detection features to outputs. + + This function extracts box features for each box in rpn_features_to_crop. + It returns the extracted box features, reshaped to + [batch size, max_detections, height, width, depth] + + Args: + detection_boxes: a 3-D float32 tensor of shape + [batch_size, max_detections, 4] which represents the bounding boxes. + rpn_features_to_crop: A list of 4-D float32 tensor with shape + [batch, height, width, depth] representing image features to crop using + the proposals boxes. + image_shape: a 1-D tensor of shape [4] representing the image shape. + + Returns: + detection_features: a 4-D float32 tensor of shape + [batch size, max_detections, height, width, depth] representing + cropped image features + """ + with tf.name_scope('FirstStageDetectionFeaturesExtract'): + flattened_detected_feature_maps = ( + self._compute_second_stage_input_feature_maps( + rpn_features_to_crop, detection_boxes, image_shape)) + + batch_size = tf.shape(detection_boxes)[0] + max_detections = tf.shape(detection_boxes)[1] + reshaped_detection_features = tf.reshape( + flattened_detected_feature_maps, + [batch_size, max_detections, + tf.shape(flattened_detected_feature_maps)[1], + tf.shape(flattened_detected_feature_maps)[2], + tf.shape(flattened_detected_feature_maps)[3]]) + + return reshaped_detection_features + def _postprocess_rpn(self, rpn_box_encodings_batch, rpn_objectness_predictions_with_background_batch, @@ -1930,6 +1992,7 @@ class FasterRCNNMetaArch(model.DetectionModel): def _compute_second_stage_input_feature_maps(self, features_to_crop, proposal_boxes_normalized, + image_shape, **side_inputs): """Crops to a set of proposals from the feature map for a batch of images. @@ -1943,14 +2006,27 @@ class FasterRCNNMetaArch(model.DetectionModel): proposal_boxes_normalized: A float32 tensor with shape [batch_size, num_proposals, box_code_size] containing proposal boxes in normalized coordinates. + image_shape: A 1D int32 tensors of size [4] containing the image shape. **side_inputs: additional tensors that are required by the network. Returns: A float32 tensor with shape [K, new_height, new_width, depth]. """ + num_levels = len(features_to_crop) + box_levels = None + if num_levels != 1: + # If there are multiple levels to select, get the box levels + # unit_scale_index: num_levels-2 is chosen based on section 4.2 of + # https://arxiv.org/pdf/1612.03144.pdf and works best for Resnet based + # feature extractor. + box_levels = ops.fpn_feature_levels( + num_levels, num_levels - 2, + tf.sqrt(tf.cast(image_shape[1] * image_shape[2], tf.float32)) / 224.0, + proposal_boxes_normalized) + cropped_regions = self._flatten_first_two_dimensions( self._crop_and_resize_fn( - features_to_crop, proposal_boxes_normalized, + features_to_crop, proposal_boxes_normalized, box_levels, [self._initial_crop_size, self._initial_crop_size])) return self._maxpool_layer(cropped_regions) @@ -2394,7 +2470,15 @@ class FasterRCNNMetaArch(model.DetectionModel): unmatched_class_label=tf.constant( [1] + self._num_classes * [0], dtype=tf.float32), gt_weights_batch=groundtruth_weights_list) - + if self.groundtruth_has_field( + fields.InputDataFields.groundtruth_labeled_classes): + gt_labeled_classes = self.groundtruth_lists( + fields.InputDataFields.groundtruth_labeled_classes) + gt_labeled_classes = tf.pad( + gt_labeled_classes, [[0, 0], [1, 0]], + mode='CONSTANT', + constant_values=1) + batch_cls_weights *= tf.expand_dims(gt_labeled_classes, 1) class_predictions_with_background = tf.reshape( class_predictions_with_background, [batch_size, self.max_num_proposals, -1]) @@ -2517,8 +2601,8 @@ class FasterRCNNMetaArch(model.DetectionModel): image_shape[1], image_shape[2], check_range=False).get() flat_cropped_gt_mask = self._crop_and_resize_fn( - tf.expand_dims(flat_gt_masks, -1), - tf.expand_dims(flat_normalized_proposals, axis=1), + [tf.expand_dims(flat_gt_masks, -1)], + tf.expand_dims(flat_normalized_proposals, axis=1), None, [mask_height, mask_width]) # Without stopping gradients into cropped groundtruth masks the # performance with 100-padded groundtruth masks when batch size > 1 is diff --git a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py index 6c830b32d58cff5756521abc5bfeecacc7118531..d935c99fad63dcdecb67b310430f97e2c51a9ed6 100644 --- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py +++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py @@ -484,7 +484,7 @@ class FasterRCNNMetaArchTest( 'mask_predictions': mask_predictions, 'rpn_features_to_crop': - rpn_features_to_crop + [rpn_features_to_crop] }, true_image_shapes) self.assertIn('detection_features', detections) return (detections['detection_boxes'], detections['detection_scores'], diff --git a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py index beead134d515a1084b4ed1a57f63d601e07a02b2..d5d454de9f964933ef2f902e3687b1b6d8cc0500 100644 --- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py +++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py @@ -23,6 +23,7 @@ import tensorflow.compat.v1 as tf from google.protobuf import text_format from object_detection.anchor_generators import grid_anchor_generator +from object_detection.anchor_generators import multiscale_grid_anchor_generator from object_detection.builders import box_predictor_builder from object_detection.builders import hyperparams_builder from object_detection.builders import post_processing_builder @@ -34,7 +35,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch from object_detection.protos import box_predictor_pb2 from object_detection.protos import hyperparams_pb2 from object_detection.protos import post_processing_pb2 -from object_detection.utils import ops +from object_detection.utils import spatial_transform_ops as spatial_ops from object_detection.utils import test_case from object_detection.utils import test_utils from object_detection.utils import tf_version @@ -76,6 +77,36 @@ class FakeFasterRCNNFeatureExtractor( proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer2') +class FakeFasterRCNNMultiLevelFeatureExtractor( + faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): + """Fake feature extractor to use in tests.""" + + def __init__(self): + super(FakeFasterRCNNMultiLevelFeatureExtractor, self).__init__( + is_training=False, + first_stage_features_stride=32, + reuse_weights=None, + weight_decay=0.0) + + def preprocess(self, resized_inputs): + return tf.identity(resized_inputs) + + def _extract_proposal_features(self, preprocessed_inputs, scope): + with tf.variable_scope('mock_model'): + proposal_features_1 = 0 * slim.conv2d( + preprocessed_inputs, num_outputs=3, kernel_size=3, scope='layer1', + padding='VALID') + proposal_features_2 = 0 * slim.conv2d( + proposal_features_1, num_outputs=3, kernel_size=3, scope='layer2', + padding='VALID') + return [proposal_features_1, proposal_features_2], {} + + def _extract_box_classifier_features(self, proposal_feature_maps, scope): + with tf.variable_scope('mock_model'): + return 0 * slim.conv2d( + proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer3') + + class FakeFasterRCNNKerasFeatureExtractor( faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor): """Fake feature extractor to use in tests.""" @@ -112,6 +143,42 @@ class FakeFasterRCNNKerasFeatureExtractor( 3, kernel_size=1, padding='SAME', name=name + '_layer2')]) +class FakeFasterRCNNKerasMultilevelFeatureExtractor( + faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor): + """Fake feature extractor to use in tests.""" + + def __init__(self): + super(FakeFasterRCNNKerasMultilevelFeatureExtractor, self).__init__( + is_training=False, + first_stage_features_stride=32, + weight_decay=0.0) + + def preprocess(self, resized_inputs): + return tf.identity(resized_inputs) + + def get_proposal_feature_extractor_model(self, name): + + class ProposalFeatureExtractor(tf.keras.Model): + """Dummy proposal feature extraction.""" + + def __init__(self, name): + super(ProposalFeatureExtractor, self).__init__(name=name) + self.conv = None + + def build(self, input_shape): + self.conv = tf.keras.layers.Conv2D( + 3, kernel_size=3, name='layer1') + self.conv_1 = tf.keras.layers.Conv2D( + 3, kernel_size=3, name='layer1') + + def call(self, inputs): + output_1 = self.conv(inputs) + output_2 = self.conv_1(output_1) + return [output_1, output_2] + + return ProposalFeatureExtractor(name=name) + + class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): """Base class to test Faster R-CNN and R-FCN meta architectures.""" @@ -234,7 +301,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): calibration_mapping_value=None, share_box_across_classes=False, return_raw_detections_during_predict=False, - output_final_box_features=False): + output_final_box_features=False, + multi_level=False): use_keras = tf_version.is_tf2() def image_resizer_fn(image, masks=None): """Fake image resizer function.""" @@ -260,22 +328,41 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): # anchors in this test are designed so that a subset of anchors are inside # the image and a subset of anchors are outside. - first_stage_anchor_scales = (0.001, 0.005, 0.1) - first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) - first_stage_anchor_strides = (1, 1) - first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( - first_stage_anchor_scales, - first_stage_anchor_aspect_ratios, - anchor_stride=first_stage_anchor_strides) + first_stage_anchor_generator = None + if multi_level: + min_level = 0 + max_level = 1 + anchor_scale = 0.1 + aspect_ratios = [1.0, 2.0, 0.5] + scales_per_octave = 2 + normalize_coordinates = False + (first_stage_anchor_generator + ) = multiscale_grid_anchor_generator.MultiscaleGridAnchorGenerator( + min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, + normalize_coordinates) + else: + first_stage_anchor_scales = (0.001, 0.005, 0.1) + first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) + first_stage_anchor_strides = (1, 1) + first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator( + first_stage_anchor_scales, + first_stage_anchor_aspect_ratios, + anchor_stride=first_stage_anchor_strides) first_stage_target_assigner = target_assigner.create_target_assigner( 'FasterRCNN', 'proposal', use_matmul_gather=use_matmul_gather_in_matcher) if use_keras: - fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor() + if multi_level: + fake_feature_extractor = FakeFasterRCNNKerasMultilevelFeatureExtractor() + else: + fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor() else: - fake_feature_extractor = FakeFasterRCNNFeatureExtractor() + if multi_level: + fake_feature_extractor = FakeFasterRCNNMultiLevelFeatureExtractor() + else: + fake_feature_extractor = FakeFasterRCNNFeatureExtractor() first_stage_box_predictor_hyperparams_text_proto = """ op: CONV @@ -377,8 +464,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): max_negatives_per_positive=None) crop_and_resize_fn = ( - ops.matmul_crop_and_resize - if use_matmul_crop_and_resize else ops.native_crop_and_resize) + spatial_ops.multilevel_matmul_crop_and_resize + if use_matmul_crop_and_resize + else spatial_ops.multilevel_native_crop_and_resize) common_kwargs = { 'is_training': is_training, @@ -478,8 +566,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): preprocessed_inputs, true_image_shapes = model.preprocess(images) prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) - return (prediction_dict['rpn_box_predictor_features'], - prediction_dict['rpn_features_to_crop'], + return (prediction_dict['rpn_box_predictor_features'][0], + prediction_dict['rpn_features_to_crop'][0], prediction_dict['image_shape'], prediction_dict['rpn_box_encodings'], prediction_dict['rpn_objectness_predictions_with_background'], @@ -528,6 +616,92 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): self.assertTrue(np.all(np.less_equal(anchors[:, 2], height))) self.assertTrue(np.all(np.less_equal(anchors[:, 3], width))) + @parameterized.parameters( + {'use_static_shapes': False}, + {'use_static_shapes': True}, + ) + def test_predict_shape_in_inference_mode_first_stage_only_multi_level( + self, use_static_shapes): + batch_size = 2 + height = 50 + width = 52 + input_image_shape = (batch_size, height, width, 3) + + with test_utils.GraphContextOrNone() as g: + model = self._build_model( + is_training=False, + number_of_stages=1, + second_stage_batch_size=2, + clip_anchors_to_image=use_static_shapes, + use_static_shapes=use_static_shapes, + multi_level=True) + def graph_fn(images): + """Function to construct tf graph for the test.""" + + preprocessed_inputs, true_image_shapes = model.preprocess(images) + prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) + return (prediction_dict['rpn_box_predictor_features'][0], + prediction_dict['rpn_box_predictor_features'][1], + prediction_dict['rpn_features_to_crop'][0], + prediction_dict['rpn_features_to_crop'][1], + prediction_dict['image_shape'], + prediction_dict['rpn_box_encodings'], + prediction_dict['rpn_objectness_predictions_with_background'], + prediction_dict['anchors']) + + images = np.zeros(input_image_shape, dtype=np.float32) + + # In inference mode, anchors are clipped to the image window, but not + # pruned. Since MockFasterRCNN.extract_proposal_features returns a + # tensor with the same shape as its input, the expected number of anchors + # is height * width * the number of anchors per location (i.e. 3x3). + expected_num_anchors = ((height-2) * (width-2) + (height-4) * (width-4)) * 6 + expected_output_shapes = { + 'rpn_box_predictor_features_0': (batch_size, height-2, width-2, 512), + 'rpn_box_predictor_features_1': (batch_size, height-4, width-4, 512), + 'rpn_features_to_crop_0': (batch_size, height-2, width-2, 3), + 'rpn_features_to_crop_1': (batch_size, height-4, width-4, 3), + 'rpn_box_encodings': (batch_size, expected_num_anchors, 4), + 'rpn_objectness_predictions_with_background': + (batch_size, expected_num_anchors, 2), + } + + if use_static_shapes: + expected_output_shapes['anchors'] = (expected_num_anchors, 4) + else: + expected_output_shapes['anchors'] = (18300, 4) + + if use_static_shapes: + results = self.execute(graph_fn, [images], graph=g) + else: + results = self.execute_cpu(graph_fn, [images], graph=g) + + self.assertAllEqual(results[0].shape, + expected_output_shapes['rpn_box_predictor_features_0']) + self.assertAllEqual(results[1].shape, + expected_output_shapes['rpn_box_predictor_features_1']) + self.assertAllEqual(results[2].shape, + expected_output_shapes['rpn_features_to_crop_0']) + self.assertAllEqual(results[3].shape, + expected_output_shapes['rpn_features_to_crop_1']) + self.assertAllEqual(results[4], + input_image_shape) + self.assertAllEqual(results[5].shape, + expected_output_shapes['rpn_box_encodings']) + self.assertAllEqual( + results[6].shape, + expected_output_shapes['rpn_objectness_predictions_with_background']) + self.assertAllEqual(results[7].shape, + expected_output_shapes['anchors']) + + # Check that anchors are clipped to window. + anchors = results[5] + self.assertTrue(np.all(np.greater_equal(anchors, 0))) + self.assertTrue(np.all(np.less_equal(anchors[:, 0], height))) + self.assertTrue(np.all(np.less_equal(anchors[:, 1], width))) + self.assertTrue(np.all(np.less_equal(anchors[:, 2], height))) + self.assertTrue(np.all(np.less_equal(anchors[:, 3], width))) + def test_regularization_losses(self): with test_utils.GraphContextOrNone() as g: model = self._build_model( @@ -600,9 +774,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): def compare_results(results, expected_output_shapes): """Checks if the shape of the predictions are as expected.""" - self.assertAllEqual(results[0].shape, + self.assertAllEqual(results[0][0].shape, expected_output_shapes['rpn_box_predictor_features']) - self.assertAllEqual(results[1].shape, + self.assertAllEqual(results[1][0].shape, expected_output_shapes['rpn_features_to_crop']) self.assertAllEqual(results[2].shape, expected_output_shapes['image_shape']) @@ -745,8 +919,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): result_tensor_dict['anchors'], result_tensor_dict['rpn_box_encodings'], result_tensor_dict['rpn_objectness_predictions_with_background'], - result_tensor_dict['rpn_features_to_crop'], - result_tensor_dict['rpn_box_predictor_features'], + result_tensor_dict['rpn_features_to_crop'][0], + result_tensor_dict['rpn_box_predictor_features'][0], result_tensor_dict['final_anchors'], ) diff --git a/research/object_detection/meta_architectures/rfcn_meta_arch.py b/research/object_detection/meta_architectures/rfcn_meta_arch.py index 1228a4b90a79039aa6519ffe2d899bc80541aedc..c19dc04d6cad120140593114dccf81c5683aa306 100644 --- a/research/object_detection/meta_architectures/rfcn_meta_arch.py +++ b/research/object_detection/meta_architectures/rfcn_meta_arch.py @@ -84,7 +84,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): resize_masks=False, freeze_batchnorm=False, return_raw_detections_during_predict=False, - output_final_box_features=False): + output_final_box_features=False, + output_final_box_rpn_features=False): """RFCNMetaArch Constructor. Args: @@ -194,8 +195,11 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): boxes in the predict() method. These are decoded boxes that have not been through postprocessing (i.e. NMS). Default False. output_final_box_features: Whether to output final box features. If true, - it crops the feauture map based on the final box prediction and returns - in the dict as detection_features. + it crops the feature map based on the final box prediction and returns + it in the dict as detection_features. + output_final_box_rpn_features: Whether to output rpn box features. If + true, it crops the rpn feature map based on the final box prediction and + returns it in the dict as detection_features. Raises: ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` @@ -245,7 +249,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): freeze_batchnorm=freeze_batchnorm, return_raw_detections_during_predict=( return_raw_detections_during_predict), - output_final_box_features=output_final_box_features) + output_final_box_features=output_final_box_features, + output_final_box_rpn_features=output_final_box_rpn_features) self._rfcn_box_predictor = second_stage_rfcn_box_predictor @@ -265,7 +270,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): [batch_size, num_valid_anchors, 2] containing class predictions (logits) for each of the anchors. Note that this tensor *includes* background class predictions (at class index 0). - rpn_features: A 4-D float32 tensor with shape + rpn_features: A list of single 4-D float32 tensor with shape [batch_size, height, width, depth] representing image features from the RPN. anchors: 2-D float tensor of shape @@ -313,6 +318,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): rpn_objectness_predictions_with_background, anchors, image_shape_2d, true_image_shapes) + rpn_features = rpn_features[0] box_classifier_features = ( self._extract_box_classifier_features(rpn_features)) diff --git a/research/object_detection/meta_architectures/ssd_meta_arch.py b/research/object_detection/meta_architectures/ssd_meta_arch.py index eb1fd320d7061a72fe6fa48955b421eee3b0f96e..055e6185fd19d1e95f3a80bfee06789694d26031 100644 --- a/research/object_detection/meta_architectures/ssd_meta_arch.py +++ b/research/object_detection/meta_architectures/ssd_meta_arch.py @@ -1308,10 +1308,17 @@ class SSDMetaArch(model.DetectionModel): to be used to restore Slim-based models when running Tensorflow 1.x. Args: - fine_tune_checkpoint_type: whether to restore from a full detection - checkpoint (with compatible variable names) or to restore from a - classification checkpoint for initialization prior to training. - Valid values: `detection`, `classification`. Default 'detection'. + fine_tune_checkpoint_type: A string inidicating the subset of variables + to load. Valid values: `detection`, `classification`, `full`. Default + `detection`. + An SSD checkpoint has three parts: + 1) Classification Network (like ResNet) + 2) DeConv layers (for FPN) + 3) Box/Class prediction parameters + The parameters will be loaded using the following strategy: + `classification` - will load #1 + `detection` - will load #1, #2 + `full` - will load #1, #2, #3 Returns: A dict mapping keys to Trackable objects (tf.Module or Checkpoint). @@ -1325,6 +1332,10 @@ class SSDMetaArch(model.DetectionModel): fake_model = tf.train.Checkpoint( _feature_extractor=self._feature_extractor) return {'model': fake_model} + + elif fine_tune_checkpoint_type == 'full': + return {'model': self} + else: raise ValueError('Not supported fine_tune_checkpoint_type: {}'.format( fine_tune_checkpoint_type)) diff --git a/research/object_detection/meta_architectures/ssd_meta_arch_test.py b/research/object_detection/meta_architectures/ssd_meta_arch_test.py index 585eb1778f72deae1aeee45bfbf1d18fa3af1212..7ad061e8d40bbb261bd7c51d9cf8350ecb8883be 100644 --- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py +++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py @@ -615,7 +615,6 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, self.assertNotIn(six.ensure_binary('FeatureExtractor'), var) def test_load_all_det_checkpoint_vars(self): - # TODO(rathodv): Support TF2.X if self.is_tf2(): return test_graph_detection = tf.Graph() with test_graph_detection.as_default(): @@ -634,6 +633,39 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase, self.assertIsInstance(var_map, dict) self.assertIn('another_variable', var_map) + def test_load_checkpoint_vars_tf2(self): + + if not self.is_tf2(): + self.skipTest('Not running TF2 checkpoint test with TF1.') + + model, _, _, _ = self._create_model() + inputs_shape = [2, 2, 2, 3] + inputs = tf.cast( + tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32), + dtype=tf.float32) + model(inputs) + + detection_var_names = sorted([ + var.name for var in model.restore_from_objects('detection')[ + 'model']._feature_extractor.weights + ]) + expected_detection_names = [ + 'ssd_meta_arch/fake_ssd_keras_feature_extractor/mock_model/layer1/bias:0', + 'ssd_meta_arch/fake_ssd_keras_feature_extractor/mock_model/layer1/kernel:0' + ] + self.assertEqual(detection_var_names, expected_detection_names) + + full_var_names = sorted([ + var.name for var in + model.restore_from_objects('full')['model'].weights + ]) + + exepcted_full_names = ['box_predictor_var:0'] + expected_detection_names + self.assertEqual(exepcted_full_names, full_var_names) + # TODO(vighneshb) Add similar test for classification checkpoint type. + # TODO(vighneshb) Test loading a checkpoint from disk to verify that + # checkpoints are loaded correctly. + def test_loss_results_are_correct_with_random_example_sampling(self): with test_utils.GraphContextOrNone() as g: model, num_classes, _, _ = self._create_model( diff --git a/research/object_detection/metrics/coco_evaluation.py b/research/object_detection/metrics/coco_evaluation.py index f721bbe3a503666d938fe4233b4619c044301e09..89437bd1fb3854d0de8b9ea8ff07ce5f14b7d1aa 100644 --- a/research/object_detection/metrics/coco_evaluation.py +++ b/research/object_detection/metrics/coco_evaluation.py @@ -34,7 +34,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): def __init__(self, categories, include_metrics_per_category=False, - all_metrics_per_category=False): + all_metrics_per_category=False, + skip_predictions_for_unlabeled_class=False, + super_categories=None): """Constructor. Args: @@ -46,6 +48,13 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): each category in per_category_ap. Be careful with setting it to true if you have more than handful of categories, because it will pollute your mldash. + skip_predictions_for_unlabeled_class: Skip predictions that do not match + with the labeled classes for the image. + super_categories: None or a python dict mapping super-category names + (strings) to lists of categories (corresponding to category names + in the label_map). Metrics are aggregated along these super-categories + and added to the `per_category_ap` and are associated with the name + `PerformanceBySuperCategory/`. """ super(CocoDetectionEvaluator, self).__init__(categories) # _image_ids is a dictionary that maps unique image ids to Booleans which @@ -58,6 +67,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): self._metrics = None self._include_metrics_per_category = include_metrics_per_category self._all_metrics_per_category = all_metrics_per_category + self._skip_predictions_for_unlabeled_class = skip_predictions_for_unlabeled_class + self._groundtruth_labeled_classes = {} + self._super_categories = super_categories def clear(self): """Clears the state to prepare for a fresh evaluation.""" @@ -92,6 +104,10 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): numpy array of keypoint visibilities with shape [num_gt_boxes, num_keypoints]. Integer is treated as an enum with 0=not labeled, 1=labeled but not visible and 2=labeled and visible. + InputDataFields.groundtruth_labeled_classes (optional): a tensor of + shape [num_classes + 1] containing the multi-hot tensor indicating the + classes that each image is labeled for. Note that the classes labels + are 1-indexed. """ if image_id in self._image_ids: tf.logging.warning('Ignoring ground truth with image id %s since it was ' @@ -134,6 +150,19 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): self._annotation_id += groundtruth_dict[standard_fields.InputDataFields. groundtruth_boxes].shape[0] + if (standard_fields.InputDataFields.groundtruth_labeled_classes + ) in groundtruth_dict: + labeled_classes = groundtruth_dict[ + standard_fields.InputDataFields.groundtruth_labeled_classes] + if labeled_classes.shape != (len(self._category_id_set) + 1,): + raise ValueError('Invalid shape for groundtruth labeled classes: {}, ' + 'num_categories_including_background: {}'.format( + labeled_classes, + len(self._category_id_set) + 1)) + self._groundtruth_labeled_classes[image_id] = np.flatnonzero( + groundtruth_dict[standard_fields.InputDataFields + .groundtruth_labeled_classes] == 1).tolist() + # Boolean to indicate whether a detection has been added for this image. self._image_ids[image_id] = False @@ -173,17 +202,41 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): standard_fields.DetectionResultFields.detection_keypoints) if detection_keypoints is not None and not detection_keypoints.shape[0]: detection_keypoints = None - self._detection_boxes_list.extend( - coco_tools.ExportSingleImageDetectionBoxesToCoco( - image_id=image_id, - category_id_set=self._category_id_set, - detection_boxes=detections_dict[ - standard_fields.DetectionResultFields.detection_boxes], - detection_scores=detections_dict[ - standard_fields.DetectionResultFields.detection_scores], - detection_classes=detections_dict[ - standard_fields.DetectionResultFields.detection_classes], - detection_keypoints=detection_keypoints)) + + if self._skip_predictions_for_unlabeled_class: + det_classes = detections_dict[ + standard_fields.DetectionResultFields.detection_classes] + num_det_boxes = det_classes.shape[0] + keep_box_ids = [] + for box_id in range(num_det_boxes): + if det_classes[box_id] in self._groundtruth_labeled_classes[image_id]: + keep_box_ids.append(box_id) + self._detection_boxes_list.extend( + coco_tools.ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self._category_id_set, + detection_boxes=detections_dict[ + standard_fields.DetectionResultFields.detection_boxes] + [keep_box_ids], + detection_scores=detections_dict[ + standard_fields.DetectionResultFields.detection_scores] + [keep_box_ids], + detection_classes=detections_dict[ + standard_fields.DetectionResultFields.detection_classes] + [keep_box_ids], + detection_keypoints=detection_keypoints)) + else: + self._detection_boxes_list.extend( + coco_tools.ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self._category_id_set, + detection_boxes=detections_dict[ + standard_fields.DetectionResultFields.detection_boxes], + detection_scores=detections_dict[ + standard_fields.DetectionResultFields.detection_scores], + detection_classes=detections_dict[ + standard_fields.DetectionResultFields.detection_classes], + detection_keypoints=detection_keypoints)) self._image_ids[image_id] = True def dump_detections_to_json_file(self, json_output_path): @@ -233,6 +286,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): no supercategories exist). For backward compatibility 'PerformanceByCategory' is included in the output regardless of all_metrics_per_category. + If super_categories are provided, then this will additionally include + metrics aggregated along the super_categories with keys of the form: + `PerformanceBySuperCategory/` """ tf.logging.info('Performing evaluation on %d images.', len(self._image_ids)) groundtruth_dict = { @@ -247,7 +303,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): coco_wrapped_groundtruth, coco_wrapped_detections, agnostic_mode=False) box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( include_metrics_per_category=self._include_metrics_per_category, - all_metrics_per_category=self._all_metrics_per_category) + all_metrics_per_category=self._all_metrics_per_category, + super_categories=self._super_categories) box_metrics.update(box_per_category_ap) box_metrics = {'DetectionBoxes_'+ key: value for key, value in iter(box_metrics.items())} @@ -271,24 +328,20 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): None when executing eagerly, or an update_op that can be used to update the eval metrics in `tf.estimator.EstimatorSpec`. """ - def update_op( - image_id_batched, - groundtruth_boxes_batched, - groundtruth_classes_batched, - groundtruth_is_crowd_batched, - num_gt_boxes_per_image, - detection_boxes_batched, - detection_scores_batched, - detection_classes_batched, - num_det_boxes_per_image, - is_annotated_batched): - """Update operation for adding batch of images to Coco evaluator.""" - for (image_id, gt_box, gt_class, gt_is_crowd, num_gt_box, det_box, - det_score, det_class, num_det_box, is_annotated) in zip( + def update_op(image_id_batched, groundtruth_boxes_batched, + groundtruth_classes_batched, groundtruth_is_crowd_batched, + groundtruth_labeled_classes_batched, num_gt_boxes_per_image, + detection_boxes_batched, detection_scores_batched, + detection_classes_batched, num_det_boxes_per_image, + is_annotated_batched): + """Update operation for adding batch of images to Coco evaluator.""" + for (image_id, gt_box, gt_class, gt_is_crowd, gt_labeled_classes, + num_gt_box, det_box, det_score, det_class, + num_det_box, is_annotated) in zip( image_id_batched, groundtruth_boxes_batched, groundtruth_classes_batched, groundtruth_is_crowd_batched, - num_gt_boxes_per_image, + groundtruth_labeled_classes_batched, num_gt_boxes_per_image, detection_boxes_batched, detection_scores_batched, detection_classes_batched, num_det_boxes_per_image, is_annotated_batched): @@ -297,7 +350,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): image_id, { 'groundtruth_boxes': gt_box[:num_gt_box], 'groundtruth_classes': gt_class[:num_gt_box], - 'groundtruth_is_crowd': gt_is_crowd[:num_gt_box] + 'groundtruth_is_crowd': gt_is_crowd[:num_gt_box], + 'groundtruth_labeled_classes': gt_labeled_classes }) self.add_single_detected_image_info( image_id, @@ -313,22 +367,38 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] groundtruth_is_crowd = eval_dict.get( input_data_fields.groundtruth_is_crowd, None) + groundtruth_labeled_classes = eval_dict.get( + input_data_fields.groundtruth_labeled_classes, None) detection_boxes = eval_dict[detection_fields.detection_boxes] detection_scores = eval_dict[detection_fields.detection_scores] detection_classes = eval_dict[detection_fields.detection_classes] num_gt_boxes_per_image = eval_dict.get( - 'num_groundtruth_boxes_per_image', None) - num_det_boxes_per_image = eval_dict.get('num_det_boxes_per_image', None) + input_data_fields.num_groundtruth_boxes, None) + num_det_boxes_per_image = eval_dict.get(detection_fields.num_detections, + None) is_annotated = eval_dict.get('is_annotated', None) if groundtruth_is_crowd is None: groundtruth_is_crowd = tf.zeros_like(groundtruth_classes, dtype=tf.bool) + + # If groundtruth_labeled_classes is not provided, make it equal to the + # detection_classes. This assumes that all predictions will be kept to + # compute eval metrics. + if groundtruth_labeled_classes is None: + groundtruth_labeled_classes = tf.reduce_max( + tf.one_hot( + tf.cast(detection_classes, tf.int32), + len(self._category_id_set) + 1), + axis=-2) + if not image_id.shape.as_list(): # Apply a batch dimension to all tensors. image_id = tf.expand_dims(image_id, 0) groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) groundtruth_is_crowd = tf.expand_dims(groundtruth_is_crowd, 0) + groundtruth_labeled_classes = tf.expand_dims(groundtruth_labeled_classes, + 0) detection_boxes = tf.expand_dims(detection_boxes, 0) detection_scores = tf.expand_dims(detection_scores, 0) detection_classes = tf.expand_dims(detection_classes, 0) @@ -359,16 +429,12 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator): if is_annotated is None: is_annotated = tf.ones_like(image_id, dtype=tf.bool) - return tf.py_func(update_op, [image_id, - groundtruth_boxes, - groundtruth_classes, - groundtruth_is_crowd, - num_gt_boxes_per_image, - detection_boxes, - detection_scores, - detection_classes, - num_det_boxes_per_image, - is_annotated], []) + return tf.py_func(update_op, [ + image_id, groundtruth_boxes, groundtruth_classes, groundtruth_is_crowd, + groundtruth_labeled_classes, num_gt_boxes_per_image, detection_boxes, + detection_scores, detection_classes, num_det_boxes_per_image, + is_annotated + ], []) def get_estimator_eval_metric_ops(self, eval_dict): """Returns a dictionary of eval metric ops. @@ -894,7 +960,10 @@ class CocoKeypointEvaluator(CocoDetectionEvaluator): class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): """Class to evaluate COCO detection metrics.""" - def __init__(self, categories, include_metrics_per_category=False): + def __init__(self, categories, + include_metrics_per_category=False, + all_metrics_per_category=False, + super_categories=None): """Constructor. Args: @@ -902,6 +971,15 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): 'id': (required) an integer id uniquely identifying this category. 'name': (required) string representing category name e.g., 'cat', 'dog'. include_metrics_per_category: If True, include metrics for each category. + all_metrics_per_category: Whether to include all the summary metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + super_categories: None or a python dict mapping super-category names + (strings) to lists of categories (corresponding to category names + in the label_map). Metrics are aggregated along these super-categories + and added to the `per_category_ap` and are associated with the name + `PerformanceBySuperCategory/`. """ super(CocoMaskEvaluator, self).__init__(categories) self._image_id_to_mask_shape_map = {} @@ -911,6 +989,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): self._category_id_set = set([cat['id'] for cat in self._categories]) self._annotation_id = 1 self._include_metrics_per_category = include_metrics_per_category + self._super_categories = super_categories + self._all_metrics_per_category = all_metrics_per_category def clear(self): """Clears the state to prepare for a fresh evaluation.""" @@ -939,12 +1019,27 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): [num_boxes, image_height, image_width] containing groundtruth masks corresponding to the boxes. The elements of the array must be in {0, 1}. + InputDataFields.groundtruth_is_crowd (optional): integer numpy array of + shape [num_boxes] containing iscrowd flag for groundtruth boxes. + InputDataFields.groundtruth_area (optional): float numpy array of + shape [num_boxes] containing the area (in the original absolute + coordinates) of the annotated object. """ if image_id in self._image_id_to_mask_shape_map: tf.logging.warning('Ignoring ground truth with image id %s since it was ' 'previously added', image_id) return + # Drop optional fields if empty tensor. + groundtruth_is_crowd = groundtruth_dict.get( + standard_fields.InputDataFields.groundtruth_is_crowd) + groundtruth_area = groundtruth_dict.get( + standard_fields.InputDataFields.groundtruth_area) + if groundtruth_is_crowd is not None and not groundtruth_is_crowd.shape[0]: + groundtruth_is_crowd = None + if groundtruth_area is not None and not groundtruth_area.shape[0]: + groundtruth_area = None + groundtruth_instance_masks = groundtruth_dict[ standard_fields.InputDataFields.groundtruth_instance_masks] groundtruth_instance_masks = convert_masks_to_binary( @@ -960,7 +1055,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): groundtruth_classes=groundtruth_dict[standard_fields. InputDataFields. groundtruth_classes], - groundtruth_masks=groundtruth_instance_masks)) + groundtruth_masks=groundtruth_instance_masks, + groundtruth_is_crowd=groundtruth_is_crowd, + groundtruth_area=groundtruth_area)) self._annotation_id += groundtruth_dict[standard_fields.InputDataFields. groundtruth_boxes].shape[0] self._image_id_to_mask_shape_map[image_id] = groundtruth_dict[ @@ -1067,6 +1164,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): no supercategories exist). For backward compatibility 'PerformanceByCategory' is included in the output regardless of all_metrics_per_category. + If super_categories are provided, then this will additionally include + metrics aggregated along the super_categories with keys of the form: + `PerformanceBySuperCategory/` """ groundtruth_dict = { 'annotations': self._groundtruth_list, @@ -1083,7 +1183,9 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): coco_wrapped_groundtruth, coco_wrapped_detection_masks, agnostic_mode=False, iou_type='segm') mask_metrics, mask_per_category_ap = mask_evaluator.ComputeMetrics( - include_metrics_per_category=self._include_metrics_per_category) + include_metrics_per_category=self._include_metrics_per_category, + super_categories=self._super_categories, + all_metrics_per_category=self._all_metrics_per_category) mask_metrics.update(mask_per_category_ap) mask_metrics = {'DetectionMasks_'+ key: value for key, value in mask_metrics.items()} @@ -1112,18 +1214,20 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): groundtruth_instance_masks_batched, groundtruth_is_crowd_batched, num_gt_boxes_per_image, detection_scores_batched, detection_classes_batched, - detection_masks_batched, num_det_boxes_per_image): + detection_masks_batched, num_det_boxes_per_image, + original_image_spatial_shape): """Update op for metrics.""" for (image_id, groundtruth_boxes, groundtruth_classes, groundtruth_instance_masks, groundtruth_is_crowd, num_gt_box, detection_scores, detection_classes, - detection_masks, num_det_box) in zip( + detection_masks, num_det_box, original_image_shape) in zip( image_id_batched, groundtruth_boxes_batched, groundtruth_classes_batched, groundtruth_instance_masks_batched, groundtruth_is_crowd_batched, num_gt_boxes_per_image, detection_scores_batched, detection_classes_batched, - detection_masks_batched, num_det_boxes_per_image): + detection_masks_batched, num_det_boxes_per_image, + original_image_spatial_shape): self.add_single_ground_truth_image_info( image_id, { 'groundtruth_boxes': @@ -1131,7 +1235,10 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): 'groundtruth_classes': groundtruth_classes[:num_gt_box], 'groundtruth_instance_masks': - groundtruth_instance_masks[:num_gt_box], + groundtruth_instance_masks[ + :num_gt_box, + :original_image_shape[0], + :original_image_shape[1]], 'groundtruth_is_crowd': groundtruth_is_crowd[:num_gt_box] }) @@ -1139,13 +1246,18 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): image_id, { 'detection_scores': detection_scores[:num_det_box], 'detection_classes': detection_classes[:num_det_box], - 'detection_masks': detection_masks[:num_det_box] + 'detection_masks': detection_masks[ + :num_det_box, + :original_image_shape[0], + :original_image_shape[1]] }) # Unpack items from the evaluation dictionary. input_data_fields = standard_fields.InputDataFields detection_fields = standard_fields.DetectionResultFields image_id = eval_dict[input_data_fields.key] + original_image_spatial_shape = eval_dict[ + input_data_fields.original_image_spatial_shape] groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes] groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] groundtruth_instance_masks = eval_dict[ @@ -1197,7 +1309,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): image_id, groundtruth_boxes, groundtruth_classes, groundtruth_instance_masks, groundtruth_is_crowd, num_gt_boxes_per_image, detection_scores, detection_classes, - detection_masks, num_det_boxes_per_image + detection_masks, num_det_boxes_per_image, original_image_spatial_shape ], []) def get_estimator_eval_metric_ops(self, eval_dict): @@ -1224,15 +1336,15 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator): metric_names = ['DetectionMasks_Precision/mAP', 'DetectionMasks_Precision/mAP@.50IOU', 'DetectionMasks_Precision/mAP@.75IOU', - 'DetectionMasks_Precision/mAP (large)', - 'DetectionMasks_Precision/mAP (medium)', 'DetectionMasks_Precision/mAP (small)', + 'DetectionMasks_Precision/mAP (medium)', + 'DetectionMasks_Precision/mAP (large)', 'DetectionMasks_Recall/AR@1', 'DetectionMasks_Recall/AR@10', 'DetectionMasks_Recall/AR@100', - 'DetectionMasks_Recall/AR@100 (large)', + 'DetectionMasks_Recall/AR@100 (small)', 'DetectionMasks_Recall/AR@100 (medium)', - 'DetectionMasks_Recall/AR@100 (small)'] + 'DetectionMasks_Recall/AR@100 (large)'] if self._include_metrics_per_category: for category_dict in self._categories: metric_names.append('DetectionMasks_PerformanceByCategory/mAP/' + diff --git a/research/object_detection/metrics/coco_evaluation_test.py b/research/object_detection/metrics/coco_evaluation_test.py index 110690bf211fb9ab903c3df433e542a977c64ff0..8cfb3ee5a19ea242e625cba50841ff107e918d95 100644 --- a/research/object_detection/metrics/coco_evaluation_test.py +++ b/research/object_detection/metrics/coco_evaluation_test.py @@ -255,9 +255,7 @@ class CocoDetectionEvaluationTest(tf.test.TestCase): @unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X') class CocoEvaluationPyFuncTest(tf.test.TestCase): - def testGetOneMAPWithMatchingGroundtruthAndDetections(self): - coco_evaluator = coco_evaluation.CocoDetectionEvaluator( - _get_categories_list()) + def _MatchingGroundtruthAndDetections(self, coco_evaluator): image_id = tf.placeholder(tf.string, shape=()) groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) @@ -330,6 +328,121 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase): self.assertFalse(coco_evaluator._detection_boxes_list) self.assertFalse(coco_evaluator._image_ids) + def testGetOneMAPWithMatchingGroundtruthAndDetections(self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list()) + self._MatchingGroundtruthAndDetections(coco_evaluator) + + # Configured to skip unmatched detector predictions with + # groundtruth_labeled_classes, but reverts to fully-labeled eval since there + # are no groundtruth_labeled_classes set. + def testGetMAPWithSkipUnmatchedPredictionsIgnoreGrountruthLabeledClasses( + self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list(), skip_predictions_for_unlabeled_class=True) + self._MatchingGroundtruthAndDetections(coco_evaluator) + + # Test skipping unmatched detector predictions with + # groundtruth_labeled_classes. + def testGetMAPWithSkipUnmatchedPredictions(self): + coco_evaluator = coco_evaluation.CocoDetectionEvaluator( + _get_categories_list(), skip_predictions_for_unlabeled_class=True) + image_id = tf.placeholder(tf.string, shape=()) + groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) + groundtruth_labeled_classes = tf.placeholder(tf.float32, shape=(None)) + detection_boxes = tf.placeholder(tf.float32, shape=(None, 4)) + detection_scores = tf.placeholder(tf.float32, shape=(None)) + detection_classes = tf.placeholder(tf.float32, shape=(None)) + + input_data_fields = standard_fields.InputDataFields + detection_fields = standard_fields.DetectionResultFields + eval_dict = { + input_data_fields.key: + image_id, + input_data_fields.groundtruth_boxes: + groundtruth_boxes, + input_data_fields.groundtruth_classes: + groundtruth_classes, + input_data_fields.groundtruth_labeled_classes: + groundtruth_labeled_classes, + detection_fields.detection_boxes: + detection_boxes, + detection_fields.detection_scores: + detection_scores, + detection_fields.detection_classes: + detection_classes + } + + eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(eval_dict) + + _, update_op = eval_metric_ops['DetectionBoxes_Precision/mAP'] + + with self.test_session() as sess: + sess.run( + update_op, + feed_dict={ + image_id: + 'image1', + groundtruth_boxes: + np.array([[100., 100., 200., 200.]]), + groundtruth_classes: + np.array([1]), + # Only class 1 is exhaustively labeled for image1. + groundtruth_labeled_classes: + np.array([0., 1., 0., 0.]), + detection_boxes: + np.array([[100., 100., 200., 200.], [100., 100., 200., + 200.]]), + detection_scores: + np.array([.8, .95]), + detection_classes: + np.array([1, 2]) + }) + sess.run( + update_op, + feed_dict={ + image_id: 'image2', + groundtruth_boxes: np.array([[50., 50., 100., 100.]]), + groundtruth_classes: np.array([3]), + groundtruth_labeled_classes: np.array([0., 0., 0., 1.]), + detection_boxes: np.array([[50., 50., 100., 100.]]), + detection_scores: np.array([.7]), + detection_classes: np.array([3]) + }) + sess.run( + update_op, + feed_dict={ + image_id: 'image3', + groundtruth_boxes: np.array([[25., 25., 50., 50.]]), + groundtruth_classes: np.array([2]), + groundtruth_labeled_classes: np.array([0., 0., 1., 0.]), + detection_boxes: np.array([[25., 25., 50., 50.]]), + detection_scores: np.array([.9]), + detection_classes: np.array([2]) + }) + metrics = {} + for key, (value_op, _) in eval_metric_ops.items(): + metrics[key] = value_op + metrics = sess.run(metrics) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.50IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP@.75IOU'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Precision/mAP (small)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@1'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@10'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (large)'], 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (medium)'], + 1.0) + self.assertAlmostEqual(metrics['DetectionBoxes_Recall/AR@100 (small)'], 1.0) + self.assertFalse(coco_evaluator._groundtruth_list) + self.assertFalse(coco_evaluator._detection_boxes_list) + self.assertFalse(coco_evaluator._image_ids) + def testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated(self): coco_evaluator = coco_evaluation.CocoDetectionEvaluator( _get_categories_list()) @@ -1443,6 +1556,41 @@ class CocoMaskEvaluationTest(tf.test.TestCase): self.assertFalse(coco_evaluator._groundtruth_list) self.assertFalse(coco_evaluator._detection_masks_list) + def testGetOneMAPWithMatchingGroundtruthAndDetectionsSkipCrowd(self): + """Tests computing mAP with is_crowd GT boxes skipped.""" + coco_evaluator = coco_evaluation.CocoMaskEvaluator( + _get_categories_list()) + coco_evaluator.add_single_ground_truth_image_info( + image_id='image1', + groundtruth_dict={ + standard_fields.InputDataFields.groundtruth_boxes: + np.array([[100., 100., 200., 200.], [99., 99., 200., 200.]]), + standard_fields.InputDataFields.groundtruth_classes: + np.array([1, 2]), + standard_fields.InputDataFields.groundtruth_is_crowd: + np.array([0, 1]), + standard_fields.InputDataFields.groundtruth_instance_masks: + np.concatenate( + [np.pad(np.ones([1, 100, 100], dtype=np.uint8), + ((0, 0), (100, 56), (100, 56)), mode='constant'), + np.pad(np.ones([1, 101, 101], dtype=np.uint8), + ((0, 0), (99, 56), (99, 56)), mode='constant')], + axis=0) + }) + coco_evaluator.add_single_detected_image_info( + image_id='image1', + detections_dict={ + standard_fields.DetectionResultFields.detection_scores: + np.array([.8]), + standard_fields.DetectionResultFields.detection_classes: + np.array([1]), + standard_fields.DetectionResultFields.detection_masks: + np.pad(np.ones([1, 100, 100], dtype=np.uint8), + ((0, 0), (100, 56), (100, 56)), mode='constant') + }) + metrics = coco_evaluator.evaluate() + self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0) + @unittest.skipIf(tf_version.is_tf2(), 'Only Supported in TF1.X') class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): @@ -1453,6 +1601,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) + original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2)) detection_scores = tf.placeholder(tf.float32, shape=(None)) detection_classes = tf.placeholder(tf.float32, shape=(None)) detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) @@ -1464,6 +1613,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_masks, + input_data_fields.original_image_spatial_shape: + original_image_spatial_shape, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, @@ -1489,6 +1640,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)), mode='constant') ]), + original_image_spatial_shape: np.array([[120, 120]]), detection_scores: np.array([.9, .8]), detection_classes: @@ -1513,6 +1665,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4)) groundtruth_classes = tf.placeholder(tf.float32, shape=(None)) groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) + original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2)) detection_scores = tf.placeholder(tf.float32, shape=(None)) detection_classes = tf.placeholder(tf.float32, shape=(None)) detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None)) @@ -1524,6 +1677,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_masks, + input_data_fields.original_image_spatial_shape: + original_image_spatial_shape, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, @@ -1553,6 +1708,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)), mode='constant') ]), + original_image_spatial_shape: np.array([[120, 120], [120, 120]]), detection_scores: np.array([.9, .8]), detection_classes: @@ -1577,6 +1733,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): dtype=np.uint8), ((0, 0), (10, 10), (10, 10)), mode='constant'), + original_image_spatial_shape: np.array([[70, 70]]), detection_scores: np.array([.8]), detection_classes: np.array([1]), detection_masks: np.pad(np.ones([1, 50, 50], dtype=np.uint8), @@ -1592,6 +1749,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): dtype=np.uint8), ((0, 0), (10, 10), (10, 10)), mode='constant'), + original_image_spatial_shape: np.array([[45, 45]]), detection_scores: np.array([.8]), detection_classes: np.array([1]), detection_masks: np.pad(np.ones([1, 25, 25], @@ -1630,6 +1788,7 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) groundtruth_masks = tf.placeholder( tf.uint8, shape=(batch_size, None, None, None)) + original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2)) detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None)) detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None)) detection_masks = tf.placeholder( @@ -1642,6 +1801,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_instance_masks: groundtruth_masks, + input_data_fields.original_image_spatial_shape: + original_image_spatial_shape, detection_fields.detection_scores: detection_scores, detection_fields.detection_classes: detection_classes, detection_fields.detection_masks: detection_masks, @@ -1678,6 +1839,8 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase): mode='constant') ], axis=0), + original_image_spatial_shape: np.array( + [[100, 100], [100, 100], [100, 100]]), detection_scores: np.array([[.8], [.8], [.8]]), detection_classes: diff --git a/research/object_detection/metrics/coco_tools.py b/research/object_detection/metrics/coco_tools.py index 790d5bdef23bef149e8eb1afa9cdecb9ce458e6e..b3c5a92765fa3e8d49eb0701c3284d3fdab0c6fe 100644 --- a/research/object_detection/metrics/coco_tools.py +++ b/research/object_detection/metrics/coco_tools.py @@ -142,6 +142,35 @@ class COCOWrapper(coco.COCO): return results +COCO_METRIC_NAMES_AND_INDEX = ( + ('Precision/mAP', 0), + ('Precision/mAP@.50IOU', 1), + ('Precision/mAP@.75IOU', 2), + ('Precision/mAP (small)', 3), + ('Precision/mAP (medium)', 4), + ('Precision/mAP (large)', 5), + ('Recall/AR@1', 6), + ('Recall/AR@10', 7), + ('Recall/AR@100', 8), + ('Recall/AR@100 (small)', 9), + ('Recall/AR@100 (medium)', 10), + ('Recall/AR@100 (large)', 11) +) + +COCO_KEYPOINT_METRIC_NAMES_AND_INDEX = ( + ('Precision/mAP', 0), + ('Precision/mAP@.50IOU', 1), + ('Precision/mAP@.75IOU', 2), + ('Precision/mAP (medium)', 3), + ('Precision/mAP (large)', 4), + ('Recall/AR@1', 5), + ('Recall/AR@10', 6), + ('Recall/AR@100', 7), + ('Recall/AR@100 (medium)', 8), + ('Recall/AR@100 (large)', 9) +) + + class COCOEvalWrapper(cocoeval.COCOeval): """Wrapper for the pycocotools COCOeval class. @@ -202,7 +231,8 @@ class COCOEvalWrapper(cocoeval.COCOeval): def ComputeMetrics(self, include_metrics_per_category=False, - all_metrics_per_category=False): + all_metrics_per_category=False, + super_categories=None): """Computes detection/keypoint metrics. Args: @@ -211,6 +241,11 @@ class COCOEvalWrapper(cocoeval.COCOeval): each category in per_category_ap. Be careful with setting it to true if you have more than handful of categories, because it will pollute your mldash. + super_categories: None or a python dict mapping super-category names + (strings) to lists of categories (corresponding to category names + in the label_map). Metrics are aggregated along these super-categories + and added to the `per_category_ap` and are associated with the name + `PerformanceBySuperCategory/`. Returns: 1. summary_metrics: a dictionary holding: @@ -240,6 +275,9 @@ class COCOEvalWrapper(cocoeval.COCOeval): output regardless of all_metrics_per_category. If evaluating class-agnostic mode, per_category_ap is an empty dictionary. + If super_categories are provided, then this will additionally include + metrics aggregated along the super_categories with keys of the form: + `PerformanceBySuperCategory/` Raises: ValueError: If category_stats does not exist. @@ -250,80 +288,71 @@ class COCOEvalWrapper(cocoeval.COCOeval): summary_metrics = {} if self._iou_type in ['bbox', 'segm']: - summary_metrics = OrderedDict([('Precision/mAP', self.stats[0]), - ('Precision/mAP@.50IOU', self.stats[1]), - ('Precision/mAP@.75IOU', self.stats[2]), - ('Precision/mAP (small)', self.stats[3]), - ('Precision/mAP (medium)', self.stats[4]), - ('Precision/mAP (large)', self.stats[5]), - ('Recall/AR@1', self.stats[6]), - ('Recall/AR@10', self.stats[7]), - ('Recall/AR@100', self.stats[8]), - ('Recall/AR@100 (small)', self.stats[9]), - ('Recall/AR@100 (medium)', self.stats[10]), - ('Recall/AR@100 (large)', self.stats[11])]) + summary_metrics = OrderedDict( + [(name, self.stats[index]) for name, index in + COCO_METRIC_NAMES_AND_INDEX]) elif self._iou_type == 'keypoints': category_id = self.GetCategoryIdList()[0] category_name = self.GetCategory(category_id)['name'] summary_metrics = OrderedDict([]) - summary_metrics['Precision/mAP ByCategory/{}'.format( - category_name)] = self.stats[0] - summary_metrics['Precision/mAP@.50IOU ByCategory/{}'.format( - category_name)] = self.stats[1] - summary_metrics['Precision/mAP@.75IOU ByCategory/{}'.format( - category_name)] = self.stats[2] - summary_metrics['Precision/mAP (medium) ByCategory/{}'.format( - category_name)] = self.stats[3] - summary_metrics['Precision/mAP (large) ByCategory/{}'.format( - category_name)] = self.stats[4] - summary_metrics['Recall/AR@1 ByCategory/{}'.format( - category_name)] = self.stats[5] - summary_metrics['Recall/AR@10 ByCategory/{}'.format( - category_name)] = self.stats[6] - summary_metrics['Recall/AR@100 ByCategory/{}'.format( - category_name)] = self.stats[7] - summary_metrics['Recall/AR@100 (medium) ByCategory/{}'.format( - category_name)] = self.stats[8] - summary_metrics['Recall/AR@100 (large) ByCategory/{}'.format( - category_name)] = self.stats[9] + for metric_name, index in COCO_KEYPOINT_METRIC_NAMES_AND_INDEX: + value = self.stats[index] + summary_metrics['{} ByCategory/{}'.format( + metric_name, category_name)] = value if not include_metrics_per_category: return summary_metrics, {} if not hasattr(self, 'category_stats'): raise ValueError('Category stats do not exist') per_category_ap = OrderedDict([]) + super_category_ap = OrderedDict([]) if self.GetAgnosticMode(): return summary_metrics, per_category_ap + + if super_categories: + for key in super_categories: + super_category_ap['PerformanceBySuperCategory/{}'.format(key)] = 0 + + if all_metrics_per_category: + for metric_name, _ in COCO_METRIC_NAMES_AND_INDEX: + metric_key = '{} BySuperCategory/{}'.format(metric_name, key) + super_category_ap[metric_key] = 0 + for category_index, category_id in enumerate(self.GetCategoryIdList()): category = self.GetCategory(category_id)['name'] # Kept for backward compatilbility per_category_ap['PerformanceByCategory/mAP/{}'.format( category)] = self.category_stats[0][category_index] - if all_metrics_per_category: - per_category_ap['Precision mAP ByCategory/{}'.format( - category)] = self.category_stats[0][category_index] - per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( - category)] = self.category_stats[1][category_index] - per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( - category)] = self.category_stats[2][category_index] - per_category_ap['Precision mAP (small) ByCategory/{}'.format( - category)] = self.category_stats[3][category_index] - per_category_ap['Precision mAP (medium) ByCategory/{}'.format( - category)] = self.category_stats[4][category_index] - per_category_ap['Precision mAP (large) ByCategory/{}'.format( - category)] = self.category_stats[5][category_index] - per_category_ap['Recall AR@1 ByCategory/{}'.format( - category)] = self.category_stats[6][category_index] - per_category_ap['Recall AR@10 ByCategory/{}'.format( - category)] = self.category_stats[7][category_index] - per_category_ap['Recall AR@100 ByCategory/{}'.format( - category)] = self.category_stats[8][category_index] - per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( - category)] = self.category_stats[9][category_index] - per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( - category)] = self.category_stats[10][category_index] - per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( - category)] = self.category_stats[11][category_index] + if all_metrics_per_category: + for metric_name, index in COCO_METRIC_NAMES_AND_INDEX: + metric_key = '{} ByCategory/{}'.format(metric_name, category) + per_category_ap[metric_key] = self.category_stats[index][ + category_index] + + if super_categories: + for key in super_categories: + if category in super_categories[key]: + metric_key = 'PerformanceBySuperCategory/{}'.format(key) + super_category_ap[metric_key] += self.category_stats[0][ + category_index] + if all_metrics_per_category: + for metric_name, index in COCO_METRIC_NAMES_AND_INDEX: + metric_key = '{} BySuperCategory/{}'.format(metric_name, key) + super_category_ap[metric_key] += ( + self.category_stats[index][category_index]) + + if super_categories: + for key in super_categories: + length = len(super_categories[key]) + super_category_ap['PerformanceBySuperCategory/{}'.format( + key)] /= length + + if all_metrics_per_category: + for metric_name, _ in COCO_METRIC_NAMES_AND_INDEX: + super_category_ap['{} BySuperCategory/{}'.format( + metric_name, key)] /= length + + per_category_ap.update(super_category_ap) return summary_metrics, per_category_ap diff --git a/research/object_detection/metrics/lvis_evaluation.py b/research/object_detection/metrics/lvis_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..4fbd6e427143e96739571ad05c54bd22b31fcc39 --- /dev/null +++ b/research/object_detection/metrics/lvis_evaluation.py @@ -0,0 +1,463 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Class for evaluating object detections with LVIS metrics.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import re + +from lvis import results as lvis_results + +import numpy as np +from six.moves import zip +import tensorflow.compat.v1 as tf + +from object_detection.core import standard_fields as fields +from object_detection.metrics import lvis_tools +from object_detection.utils import object_detection_evaluation + + +def convert_masks_to_binary(masks): + """Converts masks to 0 or 1 and uint8 type.""" + return (masks > 0).astype(np.uint8) + + +class LVISMaskEvaluator(object_detection_evaluation.DetectionEvaluator): + """Class to evaluate LVIS mask metrics.""" + + def __init__(self, + categories, + include_metrics_per_category=False, + export_path=None): + """Constructor. + + Args: + categories: A list of dicts, each of which has the following keys - + 'id': (required) an integer id uniquely identifying this category. + 'name': (required) string representing category name e.g., 'cat', 'dog'. + include_metrics_per_category: Additionally include per-category metrics + (this option is currently unsupported). + export_path: Path to export detections to LVIS compatible JSON format. + """ + super(LVISMaskEvaluator, self).__init__(categories) + self._image_ids_with_detections = set([]) + self._groundtruth_list = [] + self._detection_masks_list = [] + self._category_id_set = set([cat['id'] for cat in self._categories]) + self._annotation_id = 1 + self._image_id_to_mask_shape_map = {} + self._image_id_to_verified_neg_classes = {} + self._image_id_to_not_exhaustive_classes = {} + if include_metrics_per_category: + raise ValueError('include_metrics_per_category not yet supported ' + 'for LVISMaskEvaluator.') + self._export_path = export_path + + def clear(self): + """Clears the state to prepare for a fresh evaluation.""" + self._image_id_to_mask_shape_map.clear() + self._image_ids_with_detections.clear() + self._image_id_to_verified_neg_classes.clear() + self._image_id_to_not_exhaustive_classes.clear() + self._groundtruth_list = [] + self._detection_masks_list = [] + + def add_single_ground_truth_image_info(self, + image_id, + groundtruth_dict): + """Adds groundtruth for a single image to be used for evaluation. + + If the image has already been added, a warning is logged, and groundtruth is + ignored. + + Args: + image_id: A unique string/integer identifier for the image. + groundtruth_dict: A dictionary containing - + InputDataFields.groundtruth_boxes: float32 numpy array of shape + [num_boxes, 4] containing `num_boxes` groundtruth boxes of the format + [ymin, xmin, ymax, xmax] in absolute image coordinates. + InputDataFields.groundtruth_classes: integer numpy array of shape + [num_boxes] containing 1-indexed groundtruth classes for the boxes. + InputDataFields.groundtruth_instance_masks: uint8 numpy array of shape + [num_masks, image_height, image_width] containing groundtruth masks. + The elements of the array must be in {0, 1}. + InputDataFields.groundtruth_verified_neg_classes: [num_classes + 1] + float indicator vector with values in {0, 1}. The length is + num_classes + 1 so as to be compatible with the 1-indexed groundtruth + classes. + InputDataFields.groundtruth_not_exhaustive_classes: [num_classes + 1] + float indicator vector with values in {0, 1}. The length is + num_classes + 1 so as to be compatible with the 1-indexed groundtruth + classes. + InputDataFields.groundtruth_area (optional): float numpy array of + shape [num_boxes] containing the area (in the original absolute + coordinates) of the annotated object. + Raises: + ValueError: if groundtruth_dict is missing a required field + """ + if image_id in self._image_id_to_mask_shape_map: + tf.logging.warning('Ignoring ground truth with image id %s since it was ' + 'previously added', image_id) + return + for key in [fields.InputDataFields.groundtruth_boxes, + fields.InputDataFields.groundtruth_classes, + fields.InputDataFields.groundtruth_instance_masks, + fields.InputDataFields.groundtruth_verified_neg_classes, + fields.InputDataFields.groundtruth_not_exhaustive_classes]: + if key not in groundtruth_dict.keys(): + raise ValueError('groundtruth_dict missing entry: {}'.format(key)) + + groundtruth_instance_masks = groundtruth_dict[ + fields.InputDataFields.groundtruth_instance_masks] + groundtruth_instance_masks = convert_masks_to_binary( + groundtruth_instance_masks) + verified_neg_classes_shape = groundtruth_dict[ + fields.InputDataFields.groundtruth_verified_neg_classes].shape + not_exhaustive_classes_shape = groundtruth_dict[ + fields.InputDataFields.groundtruth_not_exhaustive_classes].shape + if verified_neg_classes_shape != (len(self._category_id_set) + 1,): + raise ValueError('Invalid shape for verified_neg_classes_shape.') + if not_exhaustive_classes_shape != (len(self._category_id_set) + 1,): + raise ValueError('Invalid shape for not_exhaustive_classes_shape.') + self._image_id_to_verified_neg_classes[image_id] = np.flatnonzero( + groundtruth_dict[ + fields.InputDataFields.groundtruth_verified_neg_classes] + == 1).tolist() + self._image_id_to_not_exhaustive_classes[image_id] = np.flatnonzero( + groundtruth_dict[ + fields.InputDataFields.groundtruth_not_exhaustive_classes] + == 1).tolist() + + # Drop optional fields if empty tensor. + groundtruth_area = groundtruth_dict.get( + fields.InputDataFields.groundtruth_area) + if groundtruth_area is not None and not groundtruth_area.shape[0]: + groundtruth_area = None + + self._groundtruth_list.extend( + lvis_tools.ExportSingleImageGroundtruthToLVIS( + image_id=image_id, + next_annotation_id=self._annotation_id, + category_id_set=self._category_id_set, + groundtruth_boxes=groundtruth_dict[ + fields.InputDataFields.groundtruth_boxes], + groundtruth_classes=groundtruth_dict[ + fields.InputDataFields.groundtruth_classes], + groundtruth_masks=groundtruth_instance_masks, + groundtruth_area=groundtruth_area) + ) + + self._annotation_id += groundtruth_dict[fields.InputDataFields. + groundtruth_boxes].shape[0] + self._image_id_to_mask_shape_map[image_id] = groundtruth_dict[ + fields.InputDataFields.groundtruth_instance_masks].shape + + def add_single_detected_image_info(self, + image_id, + detections_dict): + """Adds detections for a single image to be used for evaluation. + + If a detection has already been added for this image id, a warning is + logged, and the detection is skipped. + + Args: + image_id: A unique string/integer identifier for the image. + detections_dict: A dictionary containing - + DetectionResultFields.detection_scores: float32 numpy array of shape + [num_boxes] containing detection scores for the boxes. + DetectionResultFields.detection_classes: integer numpy array of shape + [num_boxes] containing 1-indexed detection classes for the boxes. + DetectionResultFields.detection_masks: optional uint8 numpy array of + shape [num_boxes, image_height, image_width] containing instance + masks corresponding to the boxes. The elements of the array must be + in {0, 1}. + Raises: + ValueError: If groundtruth for the image_id is not available. + """ + if image_id not in self._image_id_to_mask_shape_map: + raise ValueError('Missing groundtruth for image id: {}'.format(image_id)) + + if image_id in self._image_ids_with_detections: + tf.logging.warning('Ignoring detection with image id %s since it was ' + 'previously added', image_id) + return + + groundtruth_masks_shape = self._image_id_to_mask_shape_map[image_id] + detection_masks = detections_dict[fields.DetectionResultFields. + detection_masks] + if groundtruth_masks_shape[1:] != detection_masks.shape[1:]: + raise ValueError('Spatial shape of groundtruth masks and detection masks ' + 'are incompatible: {} vs {}'.format( + groundtruth_masks_shape, + detection_masks.shape)) + detection_masks = convert_masks_to_binary(detection_masks) + + self._detection_masks_list.extend( + lvis_tools.ExportSingleImageDetectionMasksToLVIS( + image_id=image_id, + category_id_set=self._category_id_set, + detection_masks=detection_masks, + detection_scores=detections_dict[ + fields.DetectionResultFields.detection_scores], + detection_classes=detections_dict[ + fields.DetectionResultFields.detection_classes])) + self._image_ids_with_detections.update([image_id]) + + def evaluate(self): + """Evaluates the detection boxes and returns a dictionary of coco metrics. + + Returns: + A dictionary holding + """ + if self._export_path: + tf.logging.info('Dumping detections to json.') + self.dump_detections_to_json_file(self._export_path) + tf.logging.info('Performing evaluation on %d images.', + len(self._image_id_to_mask_shape_map.keys())) + # pylint: disable=g-complex-comprehension + groundtruth_dict = { + 'annotations': self._groundtruth_list, + 'images': [ + { + 'id': int(image_id), + 'height': shape[1], + 'width': shape[2], + 'neg_category_ids': + self._image_id_to_verified_neg_classes[image_id], + 'not_exhaustive_category_ids': + self._image_id_to_not_exhaustive_classes[image_id] + } for image_id, shape in self._image_id_to_mask_shape_map.items()], + 'categories': self._categories + } + # pylint: enable=g-complex-comprehension + lvis_wrapped_groundtruth = lvis_tools.LVISWrapper(groundtruth_dict) + detections = lvis_results.LVISResults(lvis_wrapped_groundtruth, + self._detection_masks_list) + mask_evaluator = lvis_tools.LVISEvalWrapper( + lvis_wrapped_groundtruth, detections, iou_type='segm') + mask_metrics = mask_evaluator.ComputeMetrics() + mask_metrics = {'DetectionMasks_'+ key: value + for key, value in iter(mask_metrics.items())} + return mask_metrics + + def add_eval_dict(self, eval_dict): + """Observes an evaluation result dict for a single example. + + When executing eagerly, once all observations have been observed by this + method you can use `.evaluate()` to get the final metrics. + + When using `tf.estimator.Estimator` for evaluation this function is used by + `get_estimator_eval_metric_ops()` to construct the metric update op. + + Args: + eval_dict: A dictionary that holds tensors for evaluating an object + detection model, returned from + eval_util.result_dict_for_single_example(). + + Returns: + None when executing eagerly, or an update_op that can be used to update + the eval metrics in `tf.estimator.EstimatorSpec`. + """ + def update_op(image_id_batched, groundtruth_boxes_batched, + groundtruth_classes_batched, + groundtruth_instance_masks_batched, + groundtruth_verified_neg_classes_batched, + groundtruth_not_exhaustive_classes_batched, + num_gt_boxes_per_image, + detection_scores_batched, detection_classes_batched, + detection_masks_batched, num_det_boxes_per_image, + original_image_spatial_shape): + """Update op for metrics.""" + + for (image_id, groundtruth_boxes, groundtruth_classes, + groundtruth_instance_masks, groundtruth_verified_neg_classes, + groundtruth_not_exhaustive_classes, num_gt_box, + detection_scores, detection_classes, + detection_masks, num_det_box, original_image_shape) in zip( + image_id_batched, groundtruth_boxes_batched, + groundtruth_classes_batched, groundtruth_instance_masks_batched, + groundtruth_verified_neg_classes_batched, + groundtruth_not_exhaustive_classes_batched, + num_gt_boxes_per_image, + detection_scores_batched, detection_classes_batched, + detection_masks_batched, num_det_boxes_per_image, + original_image_spatial_shape): + self.add_single_ground_truth_image_info( + image_id, { + input_data_fields.groundtruth_boxes: + groundtruth_boxes[:num_gt_box], + input_data_fields.groundtruth_classes: + groundtruth_classes[:num_gt_box], + input_data_fields.groundtruth_instance_masks: + groundtruth_instance_masks[ + :num_gt_box, + :original_image_shape[0], + :original_image_shape[1]], + input_data_fields.groundtruth_verified_neg_classes: + groundtruth_verified_neg_classes, + input_data_fields.groundtruth_not_exhaustive_classes: + groundtruth_not_exhaustive_classes + }) + self.add_single_detected_image_info( + image_id, { + 'detection_scores': detection_scores[:num_det_box], + 'detection_classes': detection_classes[:num_det_box], + 'detection_masks': detection_masks[ + :num_det_box, + :original_image_shape[0], + :original_image_shape[1]] + }) + + # Unpack items from the evaluation dictionary. + input_data_fields = fields.InputDataFields + detection_fields = fields.DetectionResultFields + image_id = eval_dict[input_data_fields.key] + original_image_spatial_shape = eval_dict[ + input_data_fields.original_image_spatial_shape] + groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes] + groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes] + groundtruth_instance_masks = eval_dict[ + input_data_fields.groundtruth_instance_masks] + groundtruth_verified_neg_classes = eval_dict[ + input_data_fields.groundtruth_verified_neg_classes] + groundtruth_not_exhaustive_classes = eval_dict[ + input_data_fields.groundtruth_not_exhaustive_classes] + + num_gt_boxes_per_image = eval_dict.get( + input_data_fields.num_groundtruth_boxes, None) + detection_scores = eval_dict[detection_fields.detection_scores] + detection_classes = eval_dict[detection_fields.detection_classes] + detection_masks = eval_dict[detection_fields.detection_masks] + num_det_boxes_per_image = eval_dict.get(detection_fields.num_detections, + None) + + if not image_id.shape.as_list(): + # Apply a batch dimension to all tensors. + image_id = tf.expand_dims(image_id, 0) + groundtruth_boxes = tf.expand_dims(groundtruth_boxes, 0) + groundtruth_classes = tf.expand_dims(groundtruth_classes, 0) + groundtruth_instance_masks = tf.expand_dims(groundtruth_instance_masks, 0) + groundtruth_verified_neg_classes = tf.expand_dims( + groundtruth_verified_neg_classes, 0) + groundtruth_not_exhaustive_classes = tf.expand_dims( + groundtruth_not_exhaustive_classes, 0) + detection_scores = tf.expand_dims(detection_scores, 0) + detection_classes = tf.expand_dims(detection_classes, 0) + detection_masks = tf.expand_dims(detection_masks, 0) + + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.shape(groundtruth_boxes)[1:2] + else: + num_gt_boxes_per_image = tf.expand_dims(num_gt_boxes_per_image, 0) + + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.shape(detection_scores)[1:2] + else: + num_det_boxes_per_image = tf.expand_dims(num_det_boxes_per_image, 0) + else: + if num_gt_boxes_per_image is None: + num_gt_boxes_per_image = tf.tile( + tf.shape(groundtruth_boxes)[1:2], + multiples=tf.shape(groundtruth_boxes)[0:1]) + if num_det_boxes_per_image is None: + num_det_boxes_per_image = tf.tile( + tf.shape(detection_scores)[1:2], + multiples=tf.shape(detection_scores)[0:1]) + + return tf.py_func(update_op, [ + image_id, groundtruth_boxes, groundtruth_classes, + groundtruth_instance_masks, groundtruth_verified_neg_classes, + groundtruth_not_exhaustive_classes, + num_gt_boxes_per_image, detection_scores, detection_classes, + detection_masks, num_det_boxes_per_image, original_image_spatial_shape + ], []) + + def get_estimator_eval_metric_ops(self, eval_dict): + """Returns a dictionary of eval metric ops. + + Note that once value_op is called, the detections and groundtruth added via + update_op are cleared. + + Args: + eval_dict: A dictionary that holds tensors for evaluating object detection + performance. For single-image evaluation, this dictionary may be + produced from eval_util.result_dict_for_single_example(). If multi-image + evaluation, `eval_dict` should contain the fields + 'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to + properly unpad the tensors from the batch. + + Returns: + a dictionary of metric names to tuple of value_op and update_op that can + be used as eval metric ops in tf.estimator.EstimatorSpec. Note that all + update ops must be run together and similarly all value ops must be run + together to guarantee correct behaviour. + """ + update_op = self.add_eval_dict(eval_dict) + metric_names = ['DetectionMasks_Precision/mAP', + 'DetectionMasks_Precision/mAP@.50IOU', + 'DetectionMasks_Precision/mAP@.75IOU', + 'DetectionMasks_Precision/mAP (small)', + 'DetectionMasks_Precision/mAP (medium)', + 'DetectionMasks_Precision/mAP (large)', + 'DetectionMasks_Recall/AR@1', + 'DetectionMasks_Recall/AR@10', + 'DetectionMasks_Recall/AR@100', + 'DetectionMasks_Recall/AR@100 (small)', + 'DetectionMasks_Recall/AR@100 (medium)', + 'DetectionMasks_Recall/AR@100 (large)'] + if self._include_metrics_per_category: + for category_dict in self._categories: + metric_names.append('DetectionMasks_PerformanceByCategory/mAP/' + + category_dict['name']) + + def first_value_func(): + self._metrics = self.evaluate() + self.clear() + return np.float32(self._metrics[metric_names[0]]) + + def value_func_factory(metric_name): + def value_func(): + return np.float32(self._metrics[metric_name]) + return value_func + + # Ensure that the metrics are only evaluated once. + first_value_op = tf.py_func(first_value_func, [], tf.float32) + eval_metric_ops = {metric_names[0]: (first_value_op, update_op)} + with tf.control_dependencies([first_value_op]): + for metric_name in metric_names[1:]: + eval_metric_ops[metric_name] = (tf.py_func( + value_func_factory(metric_name), [], np.float32), update_op) + return eval_metric_ops + + def dump_detections_to_json_file(self, json_output_path): + """Saves the detections into json_output_path in the format used by MS COCO. + + Args: + json_output_path: String containing the output file's path. It can be also + None. In that case nothing will be written to the output file. + """ + if json_output_path and json_output_path is not None: + pattern = re.compile(r'\d+\.\d{8,}') + def mround(match): + return '{:.2f}'.format(float(match.group())) + + with tf.io.gfile.GFile(json_output_path, 'w') as fid: + json_string = json.dumps(self._detection_masks_list) + fid.write(re.sub(pattern, mround, json_string)) + + tf.logging.info('Dumping detections to output json file: %s', + json_output_path) diff --git a/research/object_detection/metrics/lvis_evaluation_test.py b/research/object_detection/metrics/lvis_evaluation_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2a612e5c93af03acb8823eebc010f51b585e4c41 --- /dev/null +++ b/research/object_detection/metrics/lvis_evaluation_test.py @@ -0,0 +1,182 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow_models.object_detection.metrics.coco_evaluation.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest +import numpy as np +import tensorflow.compat.v1 as tf +from object_detection.core import standard_fields as fields +from object_detection.metrics import lvis_evaluation +from object_detection.utils import tf_version + + +def _get_categories_list(): + return [{ + 'id': 1, + 'name': 'person', + 'frequency': 'f' + }, { + 'id': 2, + 'name': 'dog', + 'frequency': 'c' + }, { + 'id': 3, + 'name': 'cat', + 'frequency': 'r' + }] + + +class LvisMaskEvaluationTest(tf.test.TestCase): + + def testGetOneMAPWithMatchingGroundtruthAndDetections(self): + """Tests that mAP is calculated correctly on GT and Detections.""" + masks1 = np.expand_dims(np.pad( + np.ones([100, 100], dtype=np.uint8), + ((100, 56), (100, 56)), mode='constant'), axis=0) + masks2 = np.expand_dims(np.pad( + np.ones([50, 50], dtype=np.uint8), + ((50, 156), (50, 156)), mode='constant'), axis=0) + masks3 = np.expand_dims(np.pad( + np.ones([25, 25], dtype=np.uint8), + ((25, 206), (25, 206)), mode='constant'), axis=0) + + lvis_evaluator = lvis_evaluation.LVISMaskEvaluator( + _get_categories_list()) + lvis_evaluator.add_single_ground_truth_image_info( + image_id=1, + groundtruth_dict={ + fields.InputDataFields.groundtruth_boxes: + np.array([[100., 100., 200., 200.]]), + fields.InputDataFields.groundtruth_classes: np.array([1]), + fields.InputDataFields.groundtruth_instance_masks: masks1, + fields.InputDataFields.groundtruth_verified_neg_classes: + np.array([0, 0, 0, 0]), + fields.InputDataFields.groundtruth_not_exhaustive_classes: + np.array([0, 0, 0, 0]) + }) + lvis_evaluator.add_single_detected_image_info( + image_id=1, + detections_dict={ + fields.DetectionResultFields.detection_masks: masks1, + fields.DetectionResultFields.detection_scores: + np.array([.8]), + fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + lvis_evaluator.add_single_ground_truth_image_info( + image_id=2, + groundtruth_dict={ + fields.InputDataFields.groundtruth_boxes: + np.array([[50., 50., 100., 100.]]), + fields.InputDataFields.groundtruth_classes: np.array([1]), + fields.InputDataFields.groundtruth_instance_masks: masks2, + fields.InputDataFields.groundtruth_verified_neg_classes: + np.array([0, 0, 0, 0]), + fields.InputDataFields.groundtruth_not_exhaustive_classes: + np.array([0, 0, 0, 0]) + }) + lvis_evaluator.add_single_detected_image_info( + image_id=2, + detections_dict={ + fields.DetectionResultFields.detection_masks: masks2, + fields.DetectionResultFields.detection_scores: + np.array([.8]), + fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + lvis_evaluator.add_single_ground_truth_image_info( + image_id=3, + groundtruth_dict={ + fields.InputDataFields.groundtruth_boxes: + np.array([[25., 25., 50., 50.]]), + fields.InputDataFields.groundtruth_classes: np.array([1]), + fields.InputDataFields.groundtruth_instance_masks: masks3, + fields.InputDataFields.groundtruth_verified_neg_classes: + np.array([0, 0, 0, 0]), + fields.InputDataFields.groundtruth_not_exhaustive_classes: + np.array([0, 0, 0, 0]) + }) + lvis_evaluator.add_single_detected_image_info( + image_id=3, + detections_dict={ + fields.DetectionResultFields.detection_masks: masks3, + fields.DetectionResultFields.detection_scores: + np.array([.8]), + fields.DetectionResultFields.detection_classes: + np.array([1]) + }) + metrics = lvis_evaluator.evaluate() + self.assertAlmostEqual(metrics['DetectionMasks_AP'], 1.0) + + +@unittest.skipIf(tf_version.is_tf1(), 'Only Supported in TF2.X') +class LVISMaskEvaluationPyFuncTest(tf.test.TestCase): + + def testAddEvalDict(self): + lvis_evaluator = lvis_evaluation.LVISMaskEvaluator(_get_categories_list()) + image_id = tf.constant(1, dtype=tf.int32) + groundtruth_boxes = tf.constant( + np.array([[100., 100., 200., 200.], [50., 50., 100., 100.]]), + dtype=tf.float32) + groundtruth_classes = tf.constant(np.array([1, 2]), dtype=tf.float32) + groundtruth_masks = tf.constant(np.stack([ + np.pad(np.ones([100, 100], dtype=np.uint8), ((10, 10), (10, 10)), + mode='constant'), + np.pad(np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)), + mode='constant') + ]), dtype=tf.uint8) + original_image_spatial_shapes = tf.constant([[120, 120], [120, 120]], + dtype=tf.int32) + groundtruth_verified_neg_classes = tf.constant(np.array([0, 0, 0, 0]), + dtype=tf.float32) + groundtruth_not_exhaustive_classes = tf.constant(np.array([0, 0, 0, 0]), + dtype=tf.float32) + detection_scores = tf.constant(np.array([.9, .8]), dtype=tf.float32) + detection_classes = tf.constant(np.array([2, 1]), dtype=tf.float32) + detection_masks = tf.constant(np.stack([ + np.pad(np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)), + mode='constant'), + np.pad(np.ones([100, 100], dtype=np.uint8), ((10, 10), (10, 10)), + mode='constant'), + ]), dtype=tf.uint8) + + input_data_fields = fields.InputDataFields + detection_fields = fields.DetectionResultFields + eval_dict = { + input_data_fields.key: image_id, + input_data_fields.groundtruth_boxes: groundtruth_boxes, + input_data_fields.groundtruth_classes: groundtruth_classes, + input_data_fields.groundtruth_instance_masks: groundtruth_masks, + input_data_fields.groundtruth_verified_neg_classes: + groundtruth_verified_neg_classes, + input_data_fields.groundtruth_not_exhaustive_classes: + groundtruth_not_exhaustive_classes, + input_data_fields.original_image_spatial_shape: + original_image_spatial_shapes, + detection_fields.detection_scores: detection_scores, + detection_fields.detection_classes: detection_classes, + detection_fields.detection_masks: detection_masks + } + lvis_evaluator.add_eval_dict(eval_dict) + self.assertLen(lvis_evaluator._groundtruth_list, 2) + self.assertLen(lvis_evaluator._detection_masks_list, 2) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/metrics/lvis_tools.py b/research/object_detection/metrics/lvis_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..86f3a234b74d7f08172c2f6dc321038d7a0425f8 --- /dev/null +++ b/research/object_detection/metrics/lvis_tools.py @@ -0,0 +1,260 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Wrappers for third party lvis to be used within object_detection. + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes, +masks and classes) and detections (masks, scores and classes), where +elements of each list correspond to detections/annotations of a single image, +then evaluation can be invoked as follows: + + groundtruth = lvis_tools.LVISWrapper(groundtruth_dict) + detections = lvis_results.LVISResults(groundtruth, detections_list) + evaluator = lvis_tools.LVISEvalWrapper(groundtruth, detections, + iou_type='segm') + summary_metrics = evaluator.ComputeMetrics() + +TODO(jonathanhuang): Add support for exporting to JSON. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging + +from lvis import eval as lvis_eval +from lvis import lvis +import numpy as np +from pycocotools import mask +import six +from six.moves import range + + +def RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + rle = mask.encode(np.asfortranarray(masks)) + rle['counts'] = six.ensure_str(rle['counts']) + return rle + + +def _ConvertBoxToCOCOFormat(box): + """Converts a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a [ymin, xmin, ymax, xmax] numpy array + + Returns: + a list of floats representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), + float(box[2] - box[0])] + + +class LVISWrapper(lvis.LVIS): + """Wrapper for the lvis.LVIS class.""" + + def __init__(self, dataset, detection_type='bbox'): + """LVISWrapper constructor. + + See https://www.lvisdataset.org/dataset for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + self.logger = logging.getLogger(__name__) + self.logger.info('Loading annotations.') + self.dataset = dataset + self._create_index() + + +class LVISEvalWrapper(lvis_eval.LVISEval): + """LVISEval wrapper.""" + + def __init__(self, groundtruth=None, detections=None, iou_type='bbox'): + lvis_eval.LVISEval.__init__( + self, groundtruth, detections, iou_type=iou_type) + self._iou_type = iou_type + + def ComputeMetrics(self): + self.run() + summary_metrics = {} + summary_metrics = self.results + return summary_metrics + + +def ExportSingleImageGroundtruthToLVIS(image_id, + next_annotation_id, + category_id_set, + groundtruth_boxes, + groundtruth_classes, + groundtruth_masks=None, + groundtruth_area=None): + """Export groundtruth of a single image to LVIS format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the LVIS evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionMasksToLVIS. We assume that boxes, classes and masks + are in correspondence - that is, e.g., groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier castable to integer. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_area: numpy array (float32) with shape [num_gt_boxes]. If + provided, then the area values (in the original absolute coordinates) will + be populated instead of calculated from bounding box coordinates. + + Returns: + a list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' + 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' + 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' + 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError('Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % ( + groundtruth_classes.shape[0], + groundtruth_boxes.shape[0], image_id)) + + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + if groundtruth_area is not None and groundtruth_area[i] > 0: + area = float(groundtruth_area[i]) + else: + area = float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])) + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + int(image_id), + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': area, + } + if groundtruth_masks is not None: + export_dict['segmentation'] = RleCompress(groundtruth_masks[i]) + + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionMasksToLVIS(image_id, + category_id_set, + detection_masks, + detection_scores, + detection_classes): + """Export detection masks of a single image to LVIS format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the LVIS evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier castable to integer. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + a list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError('All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % ( + detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0] + )) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': int(image_id), + 'category_id': int(detection_classes[i]), + 'segmentation': RleCompress(detection_masks[i]), + 'score': float(detection_scores[i]) + }) + + return detections_list diff --git a/research/object_detection/metrics/lvis_tools_test.py b/research/object_detection/metrics/lvis_tools_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5a5585acda9874e034464a63e1cdfdb881254a4f --- /dev/null +++ b/research/object_detection/metrics/lvis_tools_test.py @@ -0,0 +1,158 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow_model.object_detection.metrics.lvis_tools.""" +from lvis import results as lvis_results +import numpy as np +from pycocotools import mask +import tensorflow.compat.v1 as tf +from object_detection.metrics import lvis_tools + + +class LVISToolsTest(tf.test.TestCase): + + def setUp(self): + super(LVISToolsTest, self).setUp() + mask1 = np.pad( + np.ones([100, 100], dtype=np.uint8), + ((100, 56), (100, 56)), mode='constant') + mask2 = np.pad( + np.ones([50, 50], dtype=np.uint8), + ((50, 156), (50, 156)), mode='constant') + mask1_rle = lvis_tools.RleCompress(mask1) + mask2_rle = lvis_tools.RleCompress(mask2) + groundtruth_annotations_list = [ + { + 'id': 1, + 'image_id': 1, + 'category_id': 1, + 'bbox': [100., 100., 100., 100.], + 'area': 100.**2, + 'segmentation': mask1_rle + }, + { + 'id': 2, + 'image_id': 2, + 'category_id': 1, + 'bbox': [50., 50., 50., 50.], + 'area': 50.**2, + 'segmentation': mask2_rle + }, + ] + image_list = [ + { + 'id': 1, + 'neg_category_ids': [], + 'not_exhaustive_category_ids': [], + 'height': 256, + 'width': 256 + }, + { + 'id': 2, + 'neg_category_ids': [], + 'not_exhaustive_category_ids': [], + 'height': 256, + 'width': 256 + } + ] + category_list = [{'id': 0, 'name': 'person', 'frequency': 'f'}, + {'id': 1, 'name': 'cat', 'frequency': 'c'}, + {'id': 2, 'name': 'dog', 'frequency': 'r'}] + self._groundtruth_dict = { + 'annotations': groundtruth_annotations_list, + 'images': image_list, + 'categories': category_list + } + + self._detections_list = [ + { + 'image_id': 1, + 'category_id': 1, + 'segmentation': mask1_rle, + 'score': .8 + }, + { + 'image_id': 2, + 'category_id': 1, + 'segmentation': mask2_rle, + 'score': .7 + }, + ] + + def testLVISWrappers(self): + groundtruth = lvis_tools.LVISWrapper(self._groundtruth_dict) + detections = lvis_results.LVISResults(groundtruth, self._detections_list) + evaluator = lvis_tools.LVISEvalWrapper(groundtruth, detections, + iou_type='segm') + summary_metrics = evaluator.ComputeMetrics() + self.assertAlmostEqual(1.0, summary_metrics['AP']) + + def testSingleImageDetectionMaskExport(self): + masks = np.array( + [[[1, 1,], [1, 1]], + [[0, 0], [0, 1]], + [[0, 0], [0, 0]]], dtype=np.uint8) + classes = np.array([1, 2, 3], dtype=np.int32) + scores = np.array([0.8, 0.2, 0.7], dtype=np.float32) + lvis_annotations = lvis_tools.ExportSingleImageDetectionMasksToLVIS( + image_id=1, + category_id_set=set([1, 2, 3]), + detection_classes=classes, + detection_scores=scores, + detection_masks=masks) + expected_counts = ['04', '31', '4'] + for i, mask_annotation in enumerate(lvis_annotations): + self.assertEqual(mask_annotation['segmentation']['counts'], + expected_counts[i]) + self.assertTrue(np.all(np.equal(mask.decode( + mask_annotation['segmentation']), masks[i]))) + self.assertEqual(mask_annotation['image_id'], 1) + self.assertEqual(mask_annotation['category_id'], classes[i]) + self.assertAlmostEqual(mask_annotation['score'], scores[i]) + + def testSingleImageGroundtruthExport(self): + masks = np.array( + [[[1, 1,], [1, 1]], + [[0, 0], [0, 1]], + [[0, 0], [0, 0]]], dtype=np.uint8) + boxes = np.array([[0, 0, 1, 1], + [0, 0, .5, .5], + [.5, .5, 1, 1]], dtype=np.float32) + lvis_boxes = np.array([[0, 0, 1, 1], + [0, 0, .5, .5], + [.5, .5, .5, .5]], dtype=np.float32) + classes = np.array([1, 2, 3], dtype=np.int32) + next_annotation_id = 1 + expected_counts = ['04', '31', '4'] + + lvis_annotations = lvis_tools.ExportSingleImageGroundtruthToLVIS( + image_id=1, + category_id_set=set([1, 2, 3]), + next_annotation_id=next_annotation_id, + groundtruth_boxes=boxes, + groundtruth_classes=classes, + groundtruth_masks=masks) + for i, annotation in enumerate(lvis_annotations): + self.assertEqual(annotation['segmentation']['counts'], + expected_counts[i]) + self.assertTrue(np.all(np.equal(mask.decode( + annotation['segmentation']), masks[i]))) + self.assertTrue(np.all(np.isclose(annotation['bbox'], lvis_boxes[i]))) + self.assertEqual(annotation['image_id'], 1) + self.assertEqual(annotation['category_id'], classes[i]) + self.assertEqual(annotation['id'], i + next_annotation_id) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/metrics/oid_challenge_evaluation_utils.py b/research/object_detection/metrics/oid_challenge_evaluation_utils.py index 844cce3e8f362c2c15403269584149878f60bc51..86746912336831c9193d9b1a25272f0fc7283592 100644 --- a/research/object_detection/metrics/oid_challenge_evaluation_utils.py +++ b/research/object_detection/metrics/oid_challenge_evaluation_utils.py @@ -56,10 +56,9 @@ def _decode_raw_data_into_masks_and_boxes(segments, image_widths, """Decods binary segmentation masks into np.arrays and boxes. Args: - segments: pandas Series object containing either - None entries, or strings with - base64, zlib compressed, COCO RLE-encoded binary masks. - All masks are expected to be the same size. + segments: pandas Series object containing either None entries, or strings + with base64, zlib compressed, COCO RLE-encoded binary masks. All masks are + expected to be the same size. image_widths: pandas Series of mask widths. image_heights: pandas Series of mask heights. @@ -136,15 +135,15 @@ def build_groundtruth_dictionary(data, class_label_map): dictionary = { standard_fields.InputDataFields.groundtruth_boxes: - data_location[['YMin', 'XMin', 'YMax', 'XMax']].to_numpy(), + data_location[['YMin', 'XMin', 'YMax', + 'XMax']].to_numpy().astype(float), standard_fields.InputDataFields.groundtruth_classes: data_location['LabelName'].map(lambda x: class_label_map[x] ).to_numpy(), standard_fields.InputDataFields.groundtruth_group_of: data_location['IsGroupOf'].to_numpy().astype(int), standard_fields.InputDataFields.groundtruth_image_classes: - data_labels['LabelName'].map(lambda x: class_label_map[x] - ).to_numpy(), + data_labels['LabelName'].map(lambda x: class_label_map[x]).to_numpy(), } if 'Mask' in data_location: @@ -181,7 +180,7 @@ def build_predictions_dictionary(data, class_label_map): standard_fields.DetectionResultFields.detection_classes: data['LabelName'].map(lambda x: class_label_map[x]).to_numpy(), standard_fields.DetectionResultFields.detection_scores: - data['Score'].to_numpy() + data['Score'].to_numpy().astype(float) } if 'Mask' in data: @@ -192,6 +191,6 @@ def build_predictions_dictionary(data, class_label_map): else: dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[ 'YMin', 'XMin', 'YMax', 'XMax' - ]].to_numpy() + ]].to_numpy().astype(float) return dictionary diff --git a/research/object_detection/model_lib.py b/research/object_detection/model_lib.py index 1eb482d19bdd546b5f7e4ab49bde2039421ead39..111be9cb4a7db5bc4838467d080204ec00125fb5 100644 --- a/research/object_detection/model_lib.py +++ b/research/object_detection/model_lib.py @@ -102,10 +102,18 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, 'groundtruth_dp_surface_coords_list': [batch_size, num_boxes, max_sampled_points, 4] containing the DensePose surface coordinates for each sampled point (if provided in groundtruth). + 'groundtruth_track_ids_list': [batch_size, num_boxes] int32 tensor + with track ID for each instance (if provided in groundtruth). 'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating group_of annotations (if provided in groundtruth). 'groundtruth_labeled_classes': [batch_size, num_classes] int64 tensor of 1-indexed classes. + 'groundtruth_verified_neg_classes': [batch_size, num_classes] float32 + K-hot representation of 1-indexed classes which were verified as not + present in the image. + 'groundtruth_not_exhaustive_classes': [batch_size, num_classes] K-hot + representation of 1-indexed classes which don't have all of their + instances marked exhaustively. class_agnostic: Boolean indicating whether detections are class agnostic. """ input_data_fields = fields.InputDataFields() @@ -127,6 +135,7 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_classes: groundtruth_classes } + if detection_model.groundtruth_has_field(fields.BoxListFields.masks): groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack( detection_model.groundtruth_lists(fields.BoxListFields.masks)) @@ -143,6 +152,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, groundtruth[input_data_fields.groundtruth_keypoints] = tf.stack( detection_model.groundtruth_lists(fields.BoxListFields.keypoints)) + if detection_model.groundtruth_has_field( + fields.BoxListFields.keypoint_depths): + groundtruth[input_data_fields.groundtruth_keypoint_depths] = tf.stack( + detection_model.groundtruth_lists(fields.BoxListFields.keypoint_depths)) + groundtruth[ + input_data_fields.groundtruth_keypoint_depth_weights] = tf.stack( + detection_model.groundtruth_lists( + fields.BoxListFields.keypoint_depth_weights)) + if detection_model.groundtruth_has_field( fields.BoxListFields.keypoint_visibilities): groundtruth[input_data_fields.groundtruth_keypoint_visibilities] = tf.stack( @@ -153,24 +171,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, groundtruth[input_data_fields.groundtruth_group_of] = tf.stack( detection_model.groundtruth_lists(fields.BoxListFields.group_of)) + label_id_offset_paddings = tf.constant([[0, 0], [1, 0]]) if detection_model.groundtruth_has_field( - fields.InputDataFields.groundtruth_labeled_classes): - labeled_classes_list = detection_model.groundtruth_lists( - fields.InputDataFields.groundtruth_labeled_classes) - labeled_classes = [ - tf.where(x)[:, 0] + label_id_offset for x in labeled_classes_list - ] - if len(labeled_classes) > 1: - num_classes = labeled_classes_list[0].shape[0] - padded_labeled_classes = [] - for x in labeled_classes: - padding = num_classes - tf.shape(x)[0] - padded_labeled_classes.append(tf.pad(x, [[0, padding]])) - groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack( - padded_labeled_classes) - else: - groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack( - labeled_classes) + input_data_fields.groundtruth_verified_neg_classes): + groundtruth[input_data_fields.groundtruth_verified_neg_classes] = tf.pad( + tf.stack(detection_model.groundtruth_lists( + input_data_fields.groundtruth_verified_neg_classes)), + label_id_offset_paddings) + + if detection_model.groundtruth_has_field( + input_data_fields.groundtruth_not_exhaustive_classes): + groundtruth[ + input_data_fields.groundtruth_not_exhaustive_classes] = tf.pad( + tf.stack(detection_model.groundtruth_lists( + input_data_fields.groundtruth_not_exhaustive_classes)), + label_id_offset_paddings) if detection_model.groundtruth_has_field( fields.BoxListFields.densepose_num_points): @@ -187,6 +202,19 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic, groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack( detection_model.groundtruth_lists( fields.BoxListFields.densepose_surface_coords)) + + if detection_model.groundtruth_has_field(fields.BoxListFields.track_ids): + groundtruth[input_data_fields.groundtruth_track_ids] = tf.stack( + detection_model.groundtruth_lists(fields.BoxListFields.track_ids)) + + if detection_model.groundtruth_has_field( + input_data_fields.groundtruth_labeled_classes): + groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.pad( + tf.stack( + detection_model.groundtruth_lists( + input_data_fields.groundtruth_labeled_classes)), + label_id_offset_paddings) + groundtruth[input_data_fields.num_groundtruth_boxes] = ( tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]])) return groundtruth @@ -241,10 +269,13 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True): fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_keypoints, + fields.InputDataFields.groundtruth_keypoint_depths, + fields.InputDataFields.groundtruth_keypoint_depth_weights, fields.InputDataFields.groundtruth_keypoint_visibilities, fields.InputDataFields.groundtruth_dp_num_points, fields.InputDataFields.groundtruth_dp_part_ids, fields.InputDataFields.groundtruth_dp_surface_coords, + fields.InputDataFields.groundtruth_track_ids, fields.InputDataFields.groundtruth_group_of, fields.InputDataFields.groundtruth_difficult, fields.InputDataFields.groundtruth_is_crowd, @@ -291,6 +322,13 @@ def provide_groundtruth(model, labels): gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] + gt_keypoint_depths_list = None + gt_keypoint_depth_weights_list = None + if fields.InputDataFields.groundtruth_keypoint_depths in labels: + gt_keypoint_depths_list = ( + labels[fields.InputDataFields.groundtruth_keypoint_depths]) + gt_keypoint_depth_weights_list = ( + labels[fields.InputDataFields.groundtruth_keypoint_depth_weights]) gt_keypoint_visibilities_list = None if fields.InputDataFields.groundtruth_keypoint_visibilities in labels: gt_keypoint_visibilities_list = labels[ @@ -307,6 +345,10 @@ def provide_groundtruth(model, labels): if fields.InputDataFields.groundtruth_dp_surface_coords in labels: gt_dp_surface_coords_list = labels[ fields.InputDataFields.groundtruth_dp_surface_coords] + gt_track_ids_list = None + if fields.InputDataFields.groundtruth_track_ids in labels: + gt_track_ids_list = labels[ + fields.InputDataFields.groundtruth_track_ids] gt_weights_list = None if fields.InputDataFields.groundtruth_weights in labels: gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] @@ -327,6 +369,14 @@ def provide_groundtruth(model, labels): if fields.InputDataFields.groundtruth_labeled_classes in labels: gt_labeled_classes = labels[ fields.InputDataFields.groundtruth_labeled_classes] + gt_verified_neg_classes = None + if fields.InputDataFields.groundtruth_verified_neg_classes in labels: + gt_verified_neg_classes = labels[ + fields.InputDataFields.groundtruth_verified_neg_classes] + gt_not_exhaustive_classes = None + if fields.InputDataFields.groundtruth_not_exhaustive_classes in labels: + gt_not_exhaustive_classes = labels[ + fields.InputDataFields.groundtruth_not_exhaustive_classes] model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, @@ -341,7 +391,12 @@ def provide_groundtruth(model, labels): groundtruth_weights_list=gt_weights_list, groundtruth_is_crowd_list=gt_is_crowd_list, groundtruth_group_of_list=gt_group_of_list, - groundtruth_area_list=gt_area_list) + groundtruth_area_list=gt_area_list, + groundtruth_track_ids_list=gt_track_ids_list, + groundtruth_verified_neg_classes=gt_verified_neg_classes, + groundtruth_not_exhaustive_classes=gt_not_exhaustive_classes, + groundtruth_keypoint_depths_list=gt_keypoint_depths_list, + groundtruth_keypoint_depth_weights_list=gt_keypoint_depth_weights_list) def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False, @@ -390,8 +445,7 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False, from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0. base_layer_utils.enable_v2_dtype_behavior() - tf2.keras.mixed_precision.experimental.set_policy( - 'mixed_bfloat16') + tf2.keras.mixed_precision.set_global_policy('mixed_bfloat16') detection_model = detection_model_fn( is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None @@ -786,12 +840,14 @@ def create_estimator_and_inputs(run_config, train_config=train_config, train_input_config=train_input_config, model_config=model_config) - eval_input_fns = [ - create_eval_input_fn( - eval_config=eval_config, - eval_input_config=eval_input_config, - model_config=model_config) for eval_input_config in eval_input_configs - ] + eval_input_fns = [] + for eval_input_config in eval_input_configs: + eval_input_fns.append( + create_eval_input_fn( + eval_config=eval_config, + eval_input_config=eval_input_config, + model_config=model_config)) + eval_input_names = [ eval_input_config.name for eval_input_config in eval_input_configs ] @@ -934,12 +990,12 @@ def _evaluate_checkpoint(estimator, raise e -def continuous_eval(estimator, - model_dir, - input_fn, - train_steps, - name, - max_retries=0): +def continuous_eval_generator(estimator, + model_dir, + input_fn, + train_steps, + name, + max_retries=0): """Perform continuous evaluation on checkpoints written to a model directory. Args: @@ -952,6 +1008,9 @@ def continuous_eval(estimator, max_retries: Maximum number of times to retry the evaluation on encountering a tf.errors.InvalidArgumentError. If negative, will always retry the evaluation. + + Yields: + Pair of current step and eval_results. """ def terminate_eval(): @@ -974,6 +1033,7 @@ def continuous_eval(estimator, # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(ckpt).split('-')[1]) + yield (current_step, eval_results) if current_step >= train_steps: tf.logging.info( 'Evaluation finished after training step %d' % current_step) @@ -984,6 +1044,30 @@ def continuous_eval(estimator, 'Checkpoint %s no longer exists, skipping checkpoint' % ckpt) +def continuous_eval(estimator, + model_dir, + input_fn, + train_steps, + name, + max_retries=0): + """Performs continuous evaluation on checkpoints written to a model directory. + + Args: + estimator: Estimator object to use for evaluation. + model_dir: Model directory to read checkpoints for continuous evaluation. + input_fn: Input function to use for evaluation. + train_steps: Number of training steps. This is used to infer the last + checkpoint and stop evaluation loop. + name: Namescope for eval summary. + max_retries: Maximum number of times to retry the evaluation on encountering + a tf.errors.InvalidArgumentError. If negative, will always retry the + evaluation. + """ + for current_step, eval_results in continuous_eval_generator( + estimator, model_dir, input_fn, train_steps, name, max_retries): + tf.logging.info('Step %s, Eval results: %s', current_step, eval_results) + + def populate_experiment(run_config, hparams, pipeline_config_path, diff --git a/research/object_detection/model_lib_tf2_test.py b/research/object_detection/model_lib_tf2_test.py index f65273660195752227b2bcc90dceb04184a6eb62..12330dbc7fa2f4022823c72a75878aac0d731455 100644 --- a/research/object_detection/model_lib_tf2_test.py +++ b/research/object_detection/model_lib_tf2_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import json import os import tempfile import unittest @@ -26,9 +27,9 @@ import six import tensorflow.compat.v1 as tf import tensorflow.compat.v2 as tf2 +from object_detection import exporter_lib_v2 from object_detection import inputs from object_detection import model_lib_v2 -from object_detection.builders import model_builder from object_detection.core import model from object_detection.protos import train_pb2 from object_detection.utils import config_util @@ -69,7 +70,8 @@ def _get_config_kwarg_overrides(): return { 'train_input_path': data_path, 'eval_input_path': data_path, - 'label_map_path': label_map_path + 'label_map_path': label_map_path, + 'train_input_reader': {'batch_size': 1} } @@ -90,13 +92,14 @@ class ModelLibTest(tf.test.TestCase): config_kwarg_overrides = _get_config_kwarg_overrides() train_steps = 2 - strategy = tf2.distribute.OneDeviceStrategy(device='/cpu:0') + strategy = tf2.distribute.MirroredStrategy(['/cpu:0', '/cpu:1']) with strategy.scope(): model_lib_v2.train_loop( new_pipeline_config_path, model_dir=model_dir, train_steps=train_steps, checkpoint_every_n=1, + num_steps_per_iteration=1, **config_kwarg_overrides) model_lib_v2.eval_continuously( @@ -145,6 +148,12 @@ class SimpleModel(model.DetectionModel): return [] +def fake_model_builder(*_, **__): + return SimpleModel() + +FAKE_BUILDER_MAP = {'detection_model_fn_base': fake_model_builder} + + @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') class ModelCheckpointTest(tf.test.TestCase): """Test for model checkpoint related functionality.""" @@ -153,10 +162,9 @@ class ModelCheckpointTest(tf.test.TestCase): """Test that only the most recent checkpoints are kept.""" strategy = tf2.distribute.OneDeviceStrategy(device='/cpu:0') - with mock.patch.object( - model_builder, 'build', autospec=True) as mock_builder: - with strategy.scope(): - mock_builder.return_value = SimpleModel() + with mock.patch.dict( + model_lib_v2.MODEL_BUILD_UTIL_MAP, FAKE_BUILDER_MAP): + model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) new_pipeline_config_path = os.path.join(model_dir, 'new_pipeline.config') @@ -167,8 +175,8 @@ class ModelCheckpointTest(tf.test.TestCase): with strategy.scope(): model_lib_v2.train_loop( new_pipeline_config_path, model_dir=model_dir, - train_steps=20, checkpoint_every_n=2, checkpoint_max_to_keep=3, - **config_kwarg_overrides + train_steps=5, checkpoint_every_n=2, checkpoint_max_to_keep=3, + num_steps_per_iteration=1, **config_kwarg_overrides ) ckpt_files = tf.io.gfile.glob(os.path.join(model_dir, 'ckpt-*.index')) self.assertEqual(len(ckpt_files), 3, @@ -210,6 +218,7 @@ class CheckpointV2Test(tf.test.TestCase): model_lib_v2.load_fine_tune_checkpoint( self._model, self._ckpt_path, checkpoint_type='', checkpoint_version=train_pb2.CheckpointVersion.V2, + run_model_on_dummy_input=True, input_dataset=self._train_input_fn(), unpad_groundtruth_tensors=True) np.testing.assert_allclose(self._model.weight.numpy(), 42) @@ -222,9 +231,46 @@ class CheckpointV2Test(tf.test.TestCase): model_lib_v2.load_fine_tune_checkpoint( IncompatibleModel(), self._ckpt_path, checkpoint_type='', checkpoint_version=train_pb2.CheckpointVersion.V2, + run_model_on_dummy_input=True, input_dataset=self._train_input_fn(), unpad_groundtruth_tensors=True) +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class MetricsExportTest(tf.test.TestCase): + + @classmethod + def setUpClass(cls): # pylint:disable=g-missing-super-call + tf.keras.backend.clear_session() + + def test_export_metrics_json_serializable(self): + """Tests that Estimator and input function are constructed correctly.""" + + strategy = tf2.distribute.OneDeviceStrategy(device='/cpu:0') + + def export(data, _): + json.dumps(data) + + with mock.patch.dict( + exporter_lib_v2.INPUT_BUILDER_UTIL_MAP, FAKE_BUILDER_MAP): + with strategy.scope(): + model_dir = tf.test.get_temp_dir() + new_pipeline_config_path = os.path.join(model_dir, + 'new_pipeline.config') + pipeline_config_path = get_pipeline_config_path(MODEL_NAME_FOR_TEST) + config_util.clear_fine_tune_checkpoint(pipeline_config_path, + new_pipeline_config_path) + train_steps = 2 + with strategy.scope(): + model_lib_v2.train_loop( + new_pipeline_config_path, + model_dir=model_dir, + train_steps=train_steps, + checkpoint_every_n=100, + performance_summary_exporter=export, + num_steps_per_iteration=1, + **_get_config_kwarg_overrides()) + + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/model_lib_v2.py b/research/object_detection/model_lib_v2.py index d2d24e77ec72b654282212df52f3d0709ebce14d..ee4c1b8d78ed3063cf794db315eaddefdb116c9b 100644 --- a/research/object_detection/model_lib_v2.py +++ b/research/object_detection/model_lib_v2.py @@ -21,13 +21,14 @@ from __future__ import print_function import copy import os import time +import numpy as np import tensorflow.compat.v1 as tf +import tensorflow.compat.v2 as tf2 from object_detection import eval_util from object_detection import inputs from object_detection import model_lib -from object_detection.builders import model_builder from object_detection.builders import optimizer_builder from object_detection.core import standard_fields as fields from object_detection.protos import train_pb2 @@ -36,15 +37,9 @@ from object_detection.utils import label_map_util from object_detection.utils import ops from object_detection.utils import visualization_utils as vutils -# pylint: disable=g-import-not-at-top -try: - from tensorflow.contrib import tpu as contrib_tpu -except ImportError: - # TF 2.0 doesn't ship with contrib. - pass -# pylint: enable=g-import-not-at-top MODEL_BUILD_UTIL_MAP = model_lib.MODEL_BUILD_UTIL_MAP +NUM_STEPS_PER_ITERATION = 100 RESTORE_MAP_ERROR_TEMPLATE = ( @@ -103,6 +98,12 @@ def _compute_losses_and_predictions_dicts( containing group_of annotations. labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32 k-hot tensor of classes. + labels[fields.InputDataFields.groundtruth_track_ids] is a int32 + tensor of track IDs. + labels[fields.InputDataFields.groundtruth_keypoint_depths] is a + float32 tensor containing keypoint depths information. + labels[fields.InputDataFields.groundtruth_keypoint_depth_weights] is a + float32 tensor containing the weights of the keypoint depth feature. add_regularization_loss: Whether or not to include the model's regularization loss in the losses dictionary. @@ -117,7 +118,8 @@ def _compute_losses_and_predictions_dicts( prediction_dict = model.predict( preprocessed_images, - features[fields.InputDataFields.true_image_shape]) + features[fields.InputDataFields.true_image_shape], + **model.get_side_inputs(features)) prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict) losses_dict = model.loss( @@ -142,6 +144,42 @@ def _compute_losses_and_predictions_dicts( return losses_dict, prediction_dict +def _ensure_model_is_built(model, input_dataset, unpad_groundtruth_tensors): + """Ensures that model variables are all built, by running on a dummy input. + + Args: + model: A DetectionModel to be built. + input_dataset: The tf.data Dataset the model is being trained on. Needed to + get the shapes for the dummy loss computation. + unpad_groundtruth_tensors: A parameter passed to unstack_batch. + """ + features, labels = iter(input_dataset).next() + + @tf.function + def _dummy_computation_fn(features, labels): + model._is_training = False # pylint: disable=protected-access + tf.keras.backend.set_learning_phase(False) + + labels = model_lib.unstack_batch( + labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) + + return _compute_losses_and_predictions_dicts(model, features, labels) + + strategy = tf.compat.v2.distribute.get_strategy() + if hasattr(tf.distribute.Strategy, 'run'): + strategy.run( + _dummy_computation_fn, args=( + features, + labels, + )) + else: + strategy.experimental_run_v2( + _dummy_computation_fn, args=( + features, + labels, + )) + + # TODO(kaftan): Explore removing learning_rate from this method & returning ## The full losses dict instead of just total_loss, then doing all summaries ## saving in a utility method called by the outer training loop. @@ -214,6 +252,12 @@ def eager_train_step(detection_model, (v, u) are part-relative normalized surface coordinates. labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32 k-hot tensor of classes. + labels[fields.InputDataFields.groundtruth_track_ids] is a int32 + tensor of track IDs. + labels[fields.InputDataFields.groundtruth_keypoint_depths] is a + float32 tensor containing keypoint depths information. + labels[fields.InputDataFields.groundtruth_keypoint_depth_weights] is a + float32 tensor containing the weights of the keypoint depth feature. unpad_groundtruth_tensors: A parameter passed to unstack_batch. optimizer: The training optimizer that will update the variables. learning_rate: The learning rate tensor for the current training step. @@ -277,7 +321,8 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map): """Ensure that given dict is a valid TF v2 style restore map. Args: - checkpoint_restore_map: A dict mapping strings to tf.keras.Model objects. + checkpoint_restore_map: A nested dict mapping strings to + tf.keras.Model objects. Raises: ValueError: If they keys in checkpoint_restore_map are not strings or if @@ -289,8 +334,12 @@ def validate_tf_v2_checkpoint_restore_map(checkpoint_restore_map): if not (isinstance(key, str) and (isinstance(value, tf.Module) or isinstance(value, tf.train.Checkpoint))): - raise TypeError(RESTORE_MAP_ERROR_TEMPLATE.format( - key.__class__.__name__, value.__class__.__name__)) + if isinstance(key, str) and isinstance(value, dict): + validate_tf_v2_checkpoint_restore_map(value) + else: + raise TypeError( + RESTORE_MAP_ERROR_TEMPLATE.format(key.__class__.__name__, + value.__class__.__name__)) def is_object_based_checkpoint(checkpoint_path): @@ -299,9 +348,9 @@ def is_object_based_checkpoint(checkpoint_path): return '_CHECKPOINTABLE_OBJECT_GRAPH' in var_names -def load_fine_tune_checkpoint( - model, checkpoint_path, checkpoint_type, checkpoint_version, input_dataset, - unpad_groundtruth_tensors): +def load_fine_tune_checkpoint(model, checkpoint_path, checkpoint_type, + checkpoint_version, run_model_on_dummy_input, + input_dataset, unpad_groundtruth_tensors): """Load a fine tuning classification or detection checkpoint. To make sure the model variables are all built, this method first executes @@ -323,6 +372,9 @@ def load_fine_tune_checkpoint( checkpoint_version: train_pb2.CheckpointVersion.V1 or V2 enum indicating whether to load checkpoints in V1 style or V2 style. In this binary we only support V2 style (object-based) checkpoints. + run_model_on_dummy_input: Whether to run the model on a dummy input in order + to ensure that all model variables have been built successfully before + loading the fine_tune_checkpoint. input_dataset: The tf.data Dataset the model is being trained on. Needed to get the shapes for the dummy loss computation. unpad_groundtruth_tensors: A parameter passed to unstack_batch. @@ -337,34 +389,8 @@ def load_fine_tune_checkpoint( if checkpoint_version == train_pb2.CheckpointVersion.V1: raise ValueError('Checkpoint version should be V2') - features, labels = iter(input_dataset).next() - - @tf.function - def _dummy_computation_fn(features, labels): - model._is_training = False # pylint: disable=protected-access - tf.keras.backend.set_learning_phase(False) - - labels = model_lib.unstack_batch( - labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) - - return _compute_losses_and_predictions_dicts( - model, - features, - labels) - - strategy = tf.compat.v2.distribute.get_strategy() - if hasattr(tf.distribute.Strategy, 'run'): - strategy.run( - _dummy_computation_fn, args=( - features, - labels, - )) - else: - strategy.experimental_run_v2( - _dummy_computation_fn, args=( - features, - labels, - )) + if run_model_on_dummy_input: + _ensure_model_is_built(model, input_dataset, unpad_groundtruth_tensors) restore_from_objects_dict = model.restore_from_objects( fine_tune_checkpoint_type=checkpoint_type) @@ -415,6 +441,9 @@ def train_loop( save_final_config=False, checkpoint_every_n=1000, checkpoint_max_to_keep=7, + record_summaries=True, + performance_summary_exporter=None, + num_steps_per_iteration=NUM_STEPS_PER_ITERATION, **kwargs): """Trains a model using eager + functions. @@ -444,6 +473,10 @@ def train_loop( Checkpoint every n training steps. checkpoint_max_to_keep: int, the number of most recent checkpoints to keep in the model directory. + record_summaries: Boolean, whether or not to record summaries. + performance_summary_exporter: function for exporting performance metrics. + num_steps_per_iteration: int, The number of training steps to perform + in each iteration. **kwargs: Additional keyword arguments for configuration override. """ ## Parse the configs @@ -453,6 +486,7 @@ def train_loop( 'merge_external_params_with_configs'] create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[ 'create_pipeline_proto_from_configs'] + steps_per_sec_list = [] configs = get_configs_from_pipeline_file( pipeline_config_path, config_override=config_override) @@ -477,7 +511,7 @@ def train_loop( train_steps = train_config.num_steps if kwargs['use_bfloat16']: - tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') + tf.compat.v2.keras.mixed_precision.set_global_policy('mixed_bfloat16') if train_config.load_all_detection_checkpoint_vars: raise ValueError('train_pb2.load_all_detection_checkpoint_vars ' @@ -489,13 +523,15 @@ def train_loop( # Write the as-run pipeline config to disk. if save_final_config: + tf.logging.info('Saving pipeline config file to directory {}'.format( + model_dir)) pipeline_config_final = create_pipeline_proto_from_configs(configs) config_util.save_pipeline_config(pipeline_config_final, model_dir) # Build the model, optimizer, and training input strategy = tf.compat.v2.distribute.get_strategy() with strategy.scope(): - detection_model = model_builder.build( + detection_model = MODEL_BUILD_UTIL_MAP['detection_model_fn_base']( model_config=model_config, is_training=True) def train_dataset_fn(input_context): @@ -520,6 +556,15 @@ def train_loop( optimizer, (learning_rate,) = optimizer_builder.build( train_config.optimizer, global_step=global_step) + # We run the detection_model on dummy inputs in order to ensure that the + # model and all its variables have been properly constructed. Specifically, + # this is currently necessary prior to (potentially) creating shadow copies + # of the model variables for the EMA optimizer. + if train_config.optimizer.use_moving_average: + _ensure_model_is_built(detection_model, train_input, + unpad_groundtruth_tensors) + optimizer.shadow_copy(detection_model) + if callable(learning_rate): learning_rate_fn = learning_rate else: @@ -530,15 +575,11 @@ def train_loop( # is the chief. summary_writer_filepath = get_filepath(strategy, os.path.join(model_dir, 'train')) - summary_writer = tf.compat.v2.summary.create_file_writer( - summary_writer_filepath) - - if use_tpu: - num_steps_per_iteration = 100 + if record_summaries: + summary_writer = tf.compat.v2.summary.create_file_writer( + summary_writer_filepath) else: - # TODO(b/135933080) Explore setting to 100 when GPU performance issues - # are fixed. - num_steps_per_iteration = 1 + summary_writer = tf2.summary.create_noop_writer() with summary_writer.as_default(): with strategy.scope(): @@ -546,12 +587,11 @@ def train_loop( lambda: global_step % num_steps_per_iteration == 0): # Load a fine-tuning checkpoint. if train_config.fine_tune_checkpoint: - load_fine_tune_checkpoint(detection_model, - train_config.fine_tune_checkpoint, - fine_tune_checkpoint_type, - fine_tune_checkpoint_version, - train_input, - unpad_groundtruth_tensors) + load_fine_tune_checkpoint( + detection_model, train_config.fine_tune_checkpoint, + fine_tune_checkpoint_type, fine_tune_checkpoint_version, + train_config.run_fine_tune_checkpoint_dummy_computation, + train_input, unpad_groundtruth_tensors) ckpt = tf.compat.v2.train.Checkpoint( step=global_step, model=detection_model, optimizer=optimizer) @@ -603,7 +643,9 @@ def train_loop( if num_steps_per_iteration > 1: for _ in tf.range(num_steps_per_iteration - 1): - _sample_and_train(strategy, train_step_fn, data_iterator) + # Following suggestion on yaqs/5402607292645376 + with tf.name_scope(''): + _sample_and_train(strategy, train_step_fn, data_iterator) return _sample_and_train(strategy, train_step_fn, data_iterator) @@ -623,10 +665,12 @@ def train_loop( time_taken = time.time() - last_step_time last_step_time = time.time() + steps_per_sec = num_steps_per_iteration * 1.0 / time_taken tf.compat.v2.summary.scalar( - 'steps_per_sec', num_steps_per_iteration * 1.0 / time_taken, - step=global_step) + 'steps_per_sec', steps_per_sec, step=global_step) + + steps_per_sec_list.append(steps_per_sec) if global_step.value() - logged_step >= 100: tf.logging.info( @@ -645,6 +689,116 @@ def train_loop( # training. clean_temporary_directories(strategy, manager_dir) clean_temporary_directories(strategy, summary_writer_filepath) + # TODO(pkanwar): add accuracy metrics. + if performance_summary_exporter is not None: + metrics = { + 'steps_per_sec': np.mean(steps_per_sec_list), + 'steps_per_sec_p50': np.median(steps_per_sec_list), + 'steps_per_sec_max': max(steps_per_sec_list), + 'last_batch_loss': float(loss) + } + mixed_precision = 'bf16' if kwargs['use_bfloat16'] else 'fp32' + performance_summary_exporter(metrics, mixed_precision) + + +def prepare_eval_dict(detections, groundtruth, features): + """Prepares eval dictionary containing detections and groundtruth. + + Takes in `detections` from the model, `groundtruth` and `features` returned + from the eval tf.data.dataset and creates a dictionary of tensors suitable + for detection eval modules. + + Args: + detections: A dictionary of tensors returned by `model.postprocess`. + groundtruth: `inputs.eval_input` returns an eval dataset of (features, + labels) tuple. `groundtruth` must be set to `labels`. + Please note that: + * fields.InputDataFields.groundtruth_classes must be 0-indexed and + in its 1-hot representation. + * fields.InputDataFields.groundtruth_verified_neg_classes must be + 0-indexed and in its multi-hot repesentation. + * fields.InputDataFields.groundtruth_not_exhaustive_classes must be + 0-indexed and in its multi-hot repesentation. + * fields.InputDataFields.groundtruth_labeled_classes must be + 0-indexed and in its multi-hot repesentation. + features: `inputs.eval_input` returns an eval dataset of (features, labels) + tuple. This argument must be set to a dictionary containing the following + keys and their corresponding values from `features` -- + * fields.InputDataFields.image + * fields.InputDataFields.original_image + * fields.InputDataFields.original_image_spatial_shape + * fields.InputDataFields.true_image_shape + * inputs.HASH_KEY + + Returns: + eval_dict: A dictionary of tensors to pass to eval module. + class_agnostic: Whether to evaluate detection in class agnostic mode. + """ + + groundtruth_boxes = groundtruth[fields.InputDataFields.groundtruth_boxes] + groundtruth_boxes_shape = tf.shape(groundtruth_boxes) + # For class-agnostic models, groundtruth one-hot encodings collapse to all + # ones. + class_agnostic = ( + fields.DetectionResultFields.detection_classes not in detections) + if class_agnostic: + groundtruth_classes_one_hot = tf.ones( + [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1]) + else: + groundtruth_classes_one_hot = groundtruth[ + fields.InputDataFields.groundtruth_classes] + label_id_offset = 1 # Applying label id offset (b/63711816) + groundtruth_classes = ( + tf.argmax(groundtruth_classes_one_hot, axis=2) + label_id_offset) + groundtruth[fields.InputDataFields.groundtruth_classes] = groundtruth_classes + + label_id_offset_paddings = tf.constant([[0, 0], [1, 0]]) + if fields.InputDataFields.groundtruth_verified_neg_classes in groundtruth: + groundtruth[ + fields.InputDataFields.groundtruth_verified_neg_classes] = tf.pad( + groundtruth[ + fields.InputDataFields.groundtruth_verified_neg_classes], + label_id_offset_paddings) + if fields.InputDataFields.groundtruth_not_exhaustive_classes in groundtruth: + groundtruth[ + fields.InputDataFields.groundtruth_not_exhaustive_classes] = tf.pad( + groundtruth[ + fields.InputDataFields.groundtruth_not_exhaustive_classes], + label_id_offset_paddings) + if fields.InputDataFields.groundtruth_labeled_classes in groundtruth: + groundtruth[fields.InputDataFields.groundtruth_labeled_classes] = tf.pad( + groundtruth[fields.InputDataFields.groundtruth_labeled_classes], + label_id_offset_paddings) + + use_original_images = fields.InputDataFields.original_image in features + if use_original_images: + eval_images = features[fields.InputDataFields.original_image] + true_image_shapes = features[fields.InputDataFields.true_image_shape][:, :3] + original_image_spatial_shapes = features[ + fields.InputDataFields.original_image_spatial_shape] + else: + eval_images = features[fields.InputDataFields.image] + true_image_shapes = None + original_image_spatial_shapes = None + + eval_dict = eval_util.result_dict_for_batched_example( + eval_images, + features[inputs.HASH_KEY], + detections, + groundtruth, + class_agnostic=class_agnostic, + scale_to_absolute=True, + original_image_spatial_shapes=original_image_spatial_shapes, + true_image_shapes=true_image_shapes) + + return eval_dict, class_agnostic + + +def concat_replica_results(tensor_dict): + new_tensor_dict = {} + for key, values in tensor_dict.items(): + new_tensor_dict[key] = tf.concat(values, axis=0) + return new_tensor_dict def eager_eval_loop( @@ -653,7 +807,8 @@ def eager_eval_loop( eval_dataset, use_tpu=False, postprocess_on_cpu=False, - global_step=None): + global_step=None, + ): """Evaluate the model eagerly on the evaluation dataset. This method will compute the evaluation metrics specified in the configs on @@ -675,6 +830,7 @@ def eager_eval_loop( Returns: A dict of evaluation metrics representing the results of this evaluation. """ + del postprocess_on_cpu train_config = configs['train_config'] eval_input_config = configs['eval_input_config'] eval_config = configs['eval_config'] @@ -686,6 +842,7 @@ def eager_eval_loop( evaluator_options = eval_util.evaluator_options_from_eval_config( eval_config) + batch_size = eval_config.batch_size class_agnostic_category_index = ( label_map_util.create_class_agnostic_category_index()) @@ -714,55 +871,29 @@ def eager_eval_loop( # must be unpadded. boxes_shape = ( labels[fields.InputDataFields.groundtruth_boxes].get_shape().as_list()) - unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu + unpad_groundtruth_tensors = (boxes_shape[1] is not None + and not use_tpu + and batch_size == 1) + groundtruth_dict = labels labels = model_lib.unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) losses_dict, prediction_dict = _compute_losses_and_predictions_dicts( detection_model, features, labels, add_regularization_loss) - - def postprocess_wrapper(args): - return detection_model.postprocess(args[0], args[1]) - - # TODO(kaftan): Depending on how postprocessing will work for TPUS w/ - ## TPUStrategy, may be good to move wrapping to a utility method - if use_tpu and postprocess_on_cpu: - detections = contrib_tpu.outside_compilation( - postprocess_wrapper, - (prediction_dict, features[fields.InputDataFields.true_image_shape])) - else: - detections = postprocess_wrapper( - (prediction_dict, features[fields.InputDataFields.true_image_shape])) - - class_agnostic = ( - fields.DetectionResultFields.detection_classes not in detections) - # TODO(kaftan) (or anyone): move `_prepare_groundtruth_for_eval to eval_util - ## and call this from there. - groundtruth = model_lib._prepare_groundtruth_for_eval( # pylint: disable=protected-access - detection_model, class_agnostic, eval_input_config.max_number_of_boxes) - use_original_images = fields.InputDataFields.original_image in features - if use_original_images: - eval_images = features[fields.InputDataFields.original_image] - true_image_shapes = tf.slice( - features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) - original_image_spatial_shapes = features[ - fields.InputDataFields.original_image_spatial_shape] - else: - eval_images = features[fields.InputDataFields.image] - true_image_shapes = None - original_image_spatial_shapes = None - - eval_dict = eval_util.result_dict_for_batched_example( - eval_images, - features[inputs.HASH_KEY], - detections, - groundtruth, - class_agnostic=class_agnostic, - scale_to_absolute=True, - original_image_spatial_shapes=original_image_spatial_shapes, - true_image_shapes=true_image_shapes) - - return eval_dict, losses_dict, class_agnostic + prediction_dict = detection_model.postprocess( + prediction_dict, features[fields.InputDataFields.true_image_shape]) + eval_features = { + fields.InputDataFields.image: + features[fields.InputDataFields.image], + fields.InputDataFields.original_image: + features[fields.InputDataFields.original_image], + fields.InputDataFields.original_image_spatial_shape: + features[fields.InputDataFields.original_image_spatial_shape], + fields.InputDataFields.true_image_shape: + features[fields.InputDataFields.true_image_shape], + inputs.HASH_KEY: features[inputs.HASH_KEY], + } + return losses_dict, prediction_dict, groundtruth_dict, eval_features agnostic_categories = label_map_util.create_class_agnostic_category_index() per_class_categories = label_map_util.create_category_index_from_labelmap( @@ -770,9 +901,32 @@ def eager_eval_loop( keypoint_edges = [ (kp.start, kp.end) for kp in eval_config.keypoint_edge] - for i, (features, labels) in enumerate(eval_dataset): - eval_dict, losses_dict, class_agnostic = compute_eval_dict(features, labels) + strategy = tf.compat.v2.distribute.get_strategy() + for i, (features, labels) in enumerate(eval_dataset): + try: + (losses_dict, prediction_dict, groundtruth_dict, + eval_features) = strategy.run( + compute_eval_dict, args=(features, labels)) + except Exception as exc: # pylint:disable=broad-except + tf.logging.info('Encountered %s exception.', exc) + tf.logging.info('A replica probably exhausted all examples. Skipping ' + 'pending examples on other replicas.') + break + (local_prediction_dict, local_groundtruth_dict, + local_eval_features) = tf.nest.map_structure( + strategy.experimental_local_results, + [prediction_dict, groundtruth_dict, eval_features]) + local_prediction_dict = concat_replica_results(local_prediction_dict) + local_groundtruth_dict = concat_replica_results(local_groundtruth_dict) + local_eval_features = concat_replica_results(local_eval_features) + + eval_dict, class_agnostic = prepare_eval_dict(local_prediction_dict, + local_groundtruth_dict, + local_eval_features) + for loss_key, loss_tensor in iter(losses_dict.items()): + losses_dict[loss_key] = strategy.reduce(tf.distribute.ReduceOp.MEAN, + loss_tensor, None) if class_agnostic: category_index = agnostic_categories else: @@ -782,7 +936,7 @@ def eager_eval_loop( tf.logging.info('Finished eval step %d', i) use_original_images = fields.InputDataFields.original_image in features - if use_original_images and i < eval_config.num_visualizations: + if (use_original_images and i < eval_config.num_visualizations): sbys_image_list = vutils.draw_side_by_side_evaluation_image( eval_dict, category_index=category_index, @@ -790,21 +944,21 @@ def eager_eval_loop( min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False, keypoint_edges=keypoint_edges or None) - sbys_images = tf.concat(sbys_image_list, axis=0) - tf.compat.v2.summary.image( - name='eval_side_by_side_' + str(i), - step=global_step, - data=sbys_images, - max_outputs=eval_config.num_visualizations) - if eval_util.has_densepose(eval_dict): - dp_image_list = vutils.draw_densepose_visualizations( - eval_dict) - dp_images = tf.concat(dp_image_list, axis=0) + for j, sbys_image in enumerate(sbys_image_list): tf.compat.v2.summary.image( - name='densepose_detections_' + str(i), + name='eval_side_by_side_{}_{}'.format(i, j), step=global_step, - data=dp_images, + data=sbys_image, max_outputs=eval_config.num_visualizations) + if eval_util.has_densepose(eval_dict): + dp_image_list = vutils.draw_densepose_visualizations( + eval_dict) + for j, dp_image in enumerate(dp_image_list): + tf.compat.v2.summary.image( + name='densepose_detections_{}_{}'.format(i, j), + step=global_step, + data=dp_image, + max_outputs=eval_config.num_visualizations) if evaluators is None: if class_agnostic: @@ -817,27 +971,21 @@ def eager_eval_loop( for loss_key, loss_tensor in iter(losses_dict.items()): if loss_key not in loss_metrics: - loss_metrics[loss_key] = tf.keras.metrics.Mean() - # Skip the loss with value equal or lower than 0.0 when calculating the - # average loss since they don't usually reflect the normal loss values - # causing spurious average loss value. - if loss_tensor <= 0.0: - continue - loss_metrics[loss_key].update_state(loss_tensor) + loss_metrics[loss_key] = [] + loss_metrics[loss_key].append(loss_tensor) eval_metrics = {} for evaluator in evaluators: eval_metrics.update(evaluator.evaluate()) for loss_key in loss_metrics: - eval_metrics[loss_key] = loss_metrics[loss_key].result() + eval_metrics[loss_key] = tf.reduce_mean(loss_metrics[loss_key]) eval_metrics = {str(k): v for k, v in eval_metrics.items()} - tf.logging.info('Eval metrics at step %d', global_step) + tf.logging.info('Eval metrics at step %d', global_step.numpy()) for k in eval_metrics: tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step) tf.logging.info('\t+ %s: %f', k, eval_metrics[k]) - return eval_metrics @@ -854,6 +1002,8 @@ def eval_continuously( checkpoint_dir=None, wait_interval=180, timeout=3600, + eval_index=0, + save_final_config=False, **kwargs): """Run continuous evaluation of a detection model eagerly. @@ -883,11 +1033,16 @@ def eval_continuously( new checkpoint. timeout: The maximum number of seconds to wait for a checkpoint. Execution will terminate if no new checkpoints are found after these many seconds. - + eval_index: int, If given, only evaluate the dataset at the given + index. By default, evaluates dataset at 0'th index. + save_final_config: Whether to save the pipeline config file to the model + directory. **kwargs: Additional keyword arguments for configuration override. """ get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[ 'get_configs_from_pipeline_file'] + create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[ + 'create_pipeline_proto_from_configs'] merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[ 'merge_external_params_with_configs'] @@ -905,6 +1060,12 @@ def eval_continuously( 'Forced number of epochs for all eval validations to be 1.') configs = merge_external_params_with_configs( configs, None, kwargs_dict=kwargs) + if model_dir and save_final_config: + tf.logging.info('Saving pipeline config file to directory {}'.format( + model_dir)) + pipeline_config_final = create_pipeline_proto_from_configs(configs) + config_util.save_pipeline_config(pipeline_config_final, model_dir) + model_config = configs['model'] train_input_config = configs['train_input_config'] eval_config = configs['eval_config'] @@ -921,39 +1082,55 @@ def eval_continuously( eval_on_train_input_config.num_epochs = 1 if kwargs['use_bfloat16']: - tf.compat.v2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') + tf.compat.v2.keras.mixed_precision.set_global_policy('mixed_bfloat16') - detection_model = model_builder.build( - model_config=model_config, is_training=True) + eval_input_config = eval_input_configs[eval_index] + strategy = tf.compat.v2.distribute.get_strategy() + with strategy.scope(): + detection_model = MODEL_BUILD_UTIL_MAP['detection_model_fn_base']( + model_config=model_config, is_training=True) - # Create the inputs. - eval_inputs = [] - for eval_input_config in eval_input_configs: - next_eval_input = inputs.eval_input( - eval_config=eval_config, - eval_input_config=eval_input_config, - model_config=model_config, - model=detection_model) - eval_inputs.append((eval_input_config.name, next_eval_input)) + eval_input = strategy.experimental_distribute_dataset( + inputs.eval_input( + eval_config=eval_config, + eval_input_config=eval_input_config, + model_config=model_config, + model=detection_model)) global_step = tf.compat.v2.Variable( 0, trainable=False, dtype=tf.compat.v2.dtypes.int64) + optimizer, _ = optimizer_builder.build( + configs['train_config'].optimizer, global_step=global_step) + for latest_checkpoint in tf.train.checkpoints_iterator( checkpoint_dir, timeout=timeout, min_interval_secs=wait_interval): ckpt = tf.compat.v2.train.Checkpoint( - step=global_step, model=detection_model) + step=global_step, model=detection_model, optimizer=optimizer) + + # We run the detection_model on dummy inputs in order to ensure that the + # model and all its variables have been properly constructed. Specifically, + # this is currently necessary prior to (potentially) creating shadow copies + # of the model variables for the EMA optimizer. + if eval_config.use_moving_averages: + unpad_groundtruth_tensors = (eval_config.batch_size == 1 and not use_tpu) + _ensure_model_is_built(detection_model, eval_input, + unpad_groundtruth_tensors) + optimizer.shadow_copy(detection_model) ckpt.restore(latest_checkpoint).expect_partial() - for eval_name, eval_input in eval_inputs: - summary_writer = tf.compat.v2.summary.create_file_writer( - os.path.join(model_dir, 'eval', eval_name)) - with summary_writer.as_default(): - eager_eval_loop( - detection_model, - configs, - eval_input, - use_tpu=use_tpu, - postprocess_on_cpu=postprocess_on_cpu, - global_step=global_step) + if eval_config.use_moving_averages: + optimizer.swap_weights() + + summary_writer = tf.compat.v2.summary.create_file_writer( + os.path.join(model_dir, 'eval', eval_input_config.name)) + with summary_writer.as_default(): + eager_eval_loop( + detection_model, + configs, + eval_input, + use_tpu=use_tpu, + postprocess_on_cpu=postprocess_on_cpu, + global_step=global_step, + ) diff --git a/research/object_detection/model_main_tf2.py b/research/object_detection/model_main_tf2.py index 82c7c7acfc52ca8a8040095560b5f001eddf3625..0cf053039ec16461fef0c1eb2f94df66fad2b70c 100644 --- a/research/object_detection/model_main_tf2.py +++ b/research/object_detection/model_main_tf2.py @@ -62,6 +62,11 @@ flags.DEFINE_integer( 'num_workers', 1, 'When num_workers > 1, training uses ' 'MultiWorkerMirroredStrategy. When num_workers = 1 it uses ' 'MirroredStrategy.') +flags.DEFINE_integer( + 'checkpoint_every_n', 1000, 'Integer defining how often we checkpoint.') +flags.DEFINE_boolean('record_summaries', True, + ('Whether or not to record summaries during' + ' training.')) FLAGS = flags.FLAGS @@ -100,7 +105,9 @@ def main(unused_argv): pipeline_config_path=FLAGS.pipeline_config_path, model_dir=FLAGS.model_dir, train_steps=FLAGS.num_train_steps, - use_tpu=FLAGS.use_tpu) + use_tpu=FLAGS.use_tpu, + checkpoint_every_n=FLAGS.checkpoint_every_n, + record_summaries=FLAGS.record_summaries) if __name__ == '__main__': tf.compat.v1.app.run() diff --git a/research/object_detection/models/center_net_hourglass_feature_extractor.py b/research/object_detection/models/center_net_hourglass_feature_extractor.py index 4761915aa5ad0023673199f2083ff355816f7bb1..785041e89d26bfc34808cd5c40486b9182db263e 100644 --- a/research/object_detection/models/center_net_hourglass_feature_extractor.py +++ b/research/object_detection/models/center_net_hourglass_feature_extractor.py @@ -62,13 +62,63 @@ class CenterNetHourglassFeatureExtractor( """Ther number of feature outputs returned by the feature extractor.""" return self._network.num_hourglasses - def get_model(self): - return self._network + @property + def supported_sub_model_types(self): + return ['detection'] + + def get_sub_model(self, sub_model_type): + if sub_model_type == 'detection': + return self._network + else: + ValueError('Sub model type "{}" not supported.'.format(sub_model_type)) + + +def hourglass_10(channel_means, channel_stds, bgr_ordering, **kwargs): + """The Hourglass-10 backbone for CenterNet.""" + del kwargs + + network = hourglass_network.hourglass_10(num_channels=32) + return CenterNetHourglassFeatureExtractor( + network, channel_means=channel_means, channel_stds=channel_stds, + bgr_ordering=bgr_ordering) + + +def hourglass_20(channel_means, channel_stds, bgr_ordering, **kwargs): + """The Hourglass-20 backbone for CenterNet.""" + del kwargs + + network = hourglass_network.hourglass_20(num_channels=48) + return CenterNetHourglassFeatureExtractor( + network, channel_means=channel_means, channel_stds=channel_stds, + bgr_ordering=bgr_ordering) + + +def hourglass_32(channel_means, channel_stds, bgr_ordering, **kwargs): + """The Hourglass-32 backbone for CenterNet.""" + del kwargs + + network = hourglass_network.hourglass_32(num_channels=48) + return CenterNetHourglassFeatureExtractor( + network, channel_means=channel_means, channel_stds=channel_stds, + bgr_ordering=bgr_ordering) + + +def hourglass_52(channel_means, channel_stds, bgr_ordering, **kwargs): + """The Hourglass-52 backbone for CenterNet.""" + del kwargs + + network = hourglass_network.hourglass_52(num_channels=64) + return CenterNetHourglassFeatureExtractor( + network, channel_means=channel_means, channel_stds=channel_stds, + bgr_ordering=bgr_ordering) -def hourglass_104(channel_means, channel_stds, bgr_ordering): +def hourglass_104(channel_means, channel_stds, bgr_ordering, **kwargs): """The Hourglass-104 backbone for CenterNet.""" + del kwargs + # TODO(vighneshb): update hourglass_104 signature to match with other + # hourglass networks. network = hourglass_network.hourglass_104() return CenterNetHourglassFeatureExtractor( network, channel_means=channel_means, channel_stds=channel_stds, diff --git a/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py index 19d5cbe9843ff03d6d1499a02980a067dc305579..31c26c5ab9efddc99518e92a2320ed409d737ea3 100644 --- a/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py +++ b/research/object_detection/models/center_net_hourglass_feature_extractor_tf2_test.py @@ -30,7 +30,8 @@ class CenterNetHourglassFeatureExtractorTest(test_case.TestCase): net = hourglass_network.HourglassNetwork( num_stages=4, blocks_per_stage=[2, 3, 4, 5, 6], - channel_dims=[4, 6, 8, 10, 12, 14], num_hourglasses=2) + input_channel_dims=4, channel_dims_per_stage=[6, 8, 10, 12, 14], + num_hourglasses=2) model = hourglass.CenterNetHourglassFeatureExtractor(net) def graph_fn(): diff --git a/research/object_detection/models/center_net_mobilenet_v2_feature_extractor.py b/research/object_detection/models/center_net_mobilenet_v2_feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..63ce95d6c524c10ba08fff4e3f06d14ca36c7a91 --- /dev/null +++ b/research/object_detection/models/center_net_mobilenet_v2_feature_extractor.py @@ -0,0 +1,128 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""MobileNet V2[1] feature extractor for CenterNet[2] meta architecture. + +[1]: https://arxiv.org/abs/1801.04381 +[2]: https://arxiv.org/abs/1904.07850 +""" + +import tensorflow.compat.v1 as tf + +from object_detection.meta_architectures import center_net_meta_arch +from object_detection.models.keras_models import mobilenet_v2 as mobilenetv2 + + +class CenterNetMobileNetV2FeatureExtractor( + center_net_meta_arch.CenterNetFeatureExtractor): + """The MobileNet V2 feature extractor for CenterNet.""" + + def __init__(self, + mobilenet_v2_net, + channel_means=(0., 0., 0.), + channel_stds=(1., 1., 1.), + bgr_ordering=False): + """Intializes the feature extractor. + + Args: + mobilenet_v2_net: The underlying mobilenet_v2 network to use. + channel_means: A tuple of floats, denoting the mean of each channel + which will be subtracted from it. + channel_stds: A tuple of floats, denoting the standard deviation of each + channel. Each channel will be divided by its standard deviation value. + bgr_ordering: bool, if set will change the channel ordering to be in the + [blue, red, green] order. + """ + + super(CenterNetMobileNetV2FeatureExtractor, self).__init__( + channel_means=channel_means, + channel_stds=channel_stds, + bgr_ordering=bgr_ordering) + self._network = mobilenet_v2_net + + output = self._network(self._network.input) + + # MobileNet by itself transforms a 224x224x3 volume into a 7x7x1280, which + # leads to a stride of 32. We perform upsampling to get it to a target + # stride of 4. + for num_filters in [256, 128, 64]: + # 1. We use a simple convolution instead of a deformable convolution + conv = tf.keras.layers.Conv2D( + filters=num_filters, kernel_size=1, strides=1, padding='same') + output = conv(output) + output = tf.keras.layers.BatchNormalization()(output) + output = tf.keras.layers.ReLU()(output) + + # 2. We use the default initialization for the convolution layers + # instead of initializing it to do bilinear upsampling. + conv_transpose = tf.keras.layers.Conv2DTranspose( + filters=num_filters, kernel_size=3, strides=2, padding='same') + output = conv_transpose(output) + output = tf.keras.layers.BatchNormalization()(output) + output = tf.keras.layers.ReLU()(output) + + self._network = tf.keras.models.Model( + inputs=self._network.input, outputs=output) + + def preprocess(self, resized_inputs): + resized_inputs = super(CenterNetMobileNetV2FeatureExtractor, + self).preprocess(resized_inputs) + return tf.keras.applications.mobilenet_v2.preprocess_input(resized_inputs) + + def load_feature_extractor_weights(self, path): + self._network.load_weights(path) + + def get_base_model(self): + return self._network + + def call(self, inputs): + return [self._network(inputs)] + + @property + def out_stride(self): + """The stride in the output image of the network.""" + return 4 + + @property + def num_feature_outputs(self): + """The number of feature outputs returned by the feature extractor.""" + return 1 + + @property + def supported_sub_model_types(self): + return ['detection'] + + def get_sub_model(self, sub_model_type): + if sub_model_type == 'detection': + return self._network + else: + ValueError('Sub model type "{}" not supported.'.format(sub_model_type)) + + +def mobilenet_v2(channel_means, channel_stds, bgr_ordering, + depth_multiplier=1.0, **kwargs): + """The MobileNetV2 backbone for CenterNet.""" + del kwargs + + # We set 'is_training' to True for now. + network = mobilenetv2.mobilenet_v2( + batchnorm_training=True, + alpha=depth_multiplier, + include_top=False, + weights='imagenet' if depth_multiplier == 1.0 else None) + return CenterNetMobileNetV2FeatureExtractor( + network, + channel_means=channel_means, + channel_stds=channel_stds, + bgr_ordering=bgr_ordering) diff --git a/research/object_detection/models/center_net_mobilenet_v2_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_mobilenet_v2_feature_extractor_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..5211701138d8e134bba7c2ff6b247cf19d156691 --- /dev/null +++ b/research/object_detection/models/center_net_mobilenet_v2_feature_extractor_tf2_test.py @@ -0,0 +1,46 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Testing mobilenet_v2 feature extractor for CenterNet.""" +import unittest +import numpy as np +import tensorflow.compat.v1 as tf + +from object_detection.models import center_net_mobilenet_v2_feature_extractor +from object_detection.models.keras_models import mobilenet_v2 +from object_detection.utils import test_case +from object_detection.utils import tf_version + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class CenterNetMobileNetV2FeatureExtractorTest(test_case.TestCase): + + def test_center_net_mobilenet_v2_feature_extractor(self): + + net = mobilenet_v2.mobilenet_v2(True, include_top=False) + + model = center_net_mobilenet_v2_feature_extractor.CenterNetMobileNetV2FeatureExtractor( + net) + + def graph_fn(): + img = np.zeros((8, 224, 224, 3), dtype=np.float32) + processed_img = model.preprocess(img) + return model(processed_img) + + outputs = self.execute(graph_fn, []) + self.assertEqual(outputs.shape, (8, 56, 56, 64)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor.py b/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..15ba3d85cf6160b6dddefa8116939c88cd812dc0 --- /dev/null +++ b/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor.py @@ -0,0 +1,162 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""MobileNet V2[1] + FPN[2] feature extractor for CenterNet[3] meta architecture. + +[1]: https://arxiv.org/abs/1801.04381 +[2]: https://arxiv.org/abs/1612.03144. +[3]: https://arxiv.org/abs/1904.07850 +""" + +import tensorflow.compat.v1 as tf + +from object_detection.meta_architectures import center_net_meta_arch +from object_detection.models.keras_models import mobilenet_v2 as mobilenetv2 + + +_MOBILENET_V2_FPN_SKIP_LAYERS = [ + 'block_2_add', 'block_5_add', 'block_9_add', 'out_relu' +] + + +class CenterNetMobileNetV2FPNFeatureExtractor( + center_net_meta_arch.CenterNetFeatureExtractor): + """The MobileNet V2 with FPN skip layers feature extractor for CenterNet.""" + + def __init__(self, + mobilenet_v2_net, + channel_means=(0., 0., 0.), + channel_stds=(1., 1., 1.), + bgr_ordering=False, + use_separable_conv=False): + """Intializes the feature extractor. + + Args: + mobilenet_v2_net: The underlying mobilenet_v2 network to use. + channel_means: A tuple of floats, denoting the mean of each channel + which will be subtracted from it. + channel_stds: A tuple of floats, denoting the standard deviation of each + channel. Each channel will be divided by its standard deviation value. + bgr_ordering: bool, if set will change the channel ordering to be in the + [blue, red, green] order. + use_separable_conv: If set to True, all convolutional layers in the FPN + network will be replaced by separable convolutions. + """ + + super(CenterNetMobileNetV2FPNFeatureExtractor, self).__init__( + channel_means=channel_means, + channel_stds=channel_stds, + bgr_ordering=bgr_ordering) + self._base_model = mobilenet_v2_net + + output = self._base_model(self._base_model.input) + + # Add pyramid feature network on every layer that has stride 2. + skip_outputs = [ + self._base_model.get_layer(skip_layer_name).output + for skip_layer_name in _MOBILENET_V2_FPN_SKIP_LAYERS + ] + self._fpn_model = tf.keras.models.Model( + inputs=self._base_model.input, outputs=skip_outputs) + fpn_outputs = self._fpn_model(self._base_model.input) + + # Construct the top-down feature maps -- we start with an output of + # 7x7x1280, which we continually upsample, apply a residual on and merge. + # This results in a 56x56x24 output volume. + top_layer = fpn_outputs[-1] + # Use normal convolutional layer since the kernel_size is 1. + residual_op = tf.keras.layers.Conv2D( + filters=64, kernel_size=1, strides=1, padding='same') + top_down = residual_op(top_layer) + + num_filters_list = [64, 32, 24] + for i, num_filters in enumerate(num_filters_list): + level_ind = len(num_filters_list) - 1 - i + # Upsample. + upsample_op = tf.keras.layers.UpSampling2D(2, interpolation='nearest') + top_down = upsample_op(top_down) + + # Residual (skip-connection) from bottom-up pathway. + # Use normal convolutional layer since the kernel_size is 1. + residual_op = tf.keras.layers.Conv2D( + filters=num_filters, kernel_size=1, strides=1, padding='same') + residual = residual_op(fpn_outputs[level_ind]) + + # Merge. + top_down = top_down + residual + next_num_filters = num_filters_list[i + 1] if i + 1 <= 2 else 24 + if use_separable_conv: + conv = tf.keras.layers.SeparableConv2D( + filters=next_num_filters, kernel_size=3, strides=1, padding='same') + else: + conv = tf.keras.layers.Conv2D( + filters=next_num_filters, kernel_size=3, strides=1, padding='same') + top_down = conv(top_down) + top_down = tf.keras.layers.BatchNormalization()(top_down) + top_down = tf.keras.layers.ReLU()(top_down) + + output = top_down + + self._feature_extractor_model = tf.keras.models.Model( + inputs=self._base_model.input, outputs=output) + + def preprocess(self, resized_inputs): + resized_inputs = super(CenterNetMobileNetV2FPNFeatureExtractor, + self).preprocess(resized_inputs) + return tf.keras.applications.mobilenet_v2.preprocess_input(resized_inputs) + + def load_feature_extractor_weights(self, path): + self._base_model.load_weights(path) + + @property + def supported_sub_model_types(self): + return ['classification'] + + def get_sub_model(self, sub_model_type): + if sub_model_type == 'classification': + return self._base_model + else: + ValueError('Sub model type "{}" not supported.'.format(sub_model_type)) + + def call(self, inputs): + return [self._feature_extractor_model(inputs)] + + @property + def out_stride(self): + """The stride in the output image of the network.""" + return 4 + + @property + def num_feature_outputs(self): + """The number of feature outputs returned by the feature extractor.""" + return 1 + + +def mobilenet_v2_fpn(channel_means, channel_stds, bgr_ordering, + use_separable_conv=False, depth_multiplier=1.0, **kwargs): + """The MobileNetV2+FPN backbone for CenterNet.""" + del kwargs + + # Set to batchnorm_training to True for now. + network = mobilenetv2.mobilenet_v2( + batchnorm_training=True, + alpha=depth_multiplier, + include_top=False, + weights='imagenet' if depth_multiplier == 1.0 else None) + return CenterNetMobileNetV2FPNFeatureExtractor( + network, + channel_means=channel_means, + channel_stds=channel_stds, + bgr_ordering=bgr_ordering, + use_separable_conv=use_separable_conv) diff --git a/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor_tf2_test.py new file mode 100644 index 0000000000000000000000000000000000000000..82e2ca02167b1882bef7260e1a5b233914add8bb --- /dev/null +++ b/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor_tf2_test.py @@ -0,0 +1,108 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Testing mobilenet_v2+FPN feature extractor for CenterNet.""" +import unittest +import numpy as np +import tensorflow.compat.v1 as tf + +from object_detection.models import center_net_mobilenet_v2_fpn_feature_extractor +from object_detection.utils import test_case +from object_detection.utils import tf_version + + +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class CenterNetMobileNetV2FPNFeatureExtractorTest(test_case.TestCase): + + def test_center_net_mobilenet_v2_fpn_feature_extractor(self): + + channel_means = (0., 0., 0.) + channel_stds = (1., 1., 1.) + bgr_ordering = False + model = ( + center_net_mobilenet_v2_fpn_feature_extractor.mobilenet_v2_fpn( + channel_means, channel_stds, bgr_ordering, + use_separable_conv=False)) + + def graph_fn(): + img = np.zeros((8, 224, 224, 3), dtype=np.float32) + processed_img = model.preprocess(img) + return model(processed_img) + + outputs = self.execute(graph_fn, []) + self.assertEqual(outputs.shape, (8, 56, 56, 24)) + + # Pull out the FPN network. + output = model.get_layer('model_1') + for layer in output.layers: + # All convolution layers should be normal 2D convolutions. + if 'conv' in layer.name: + self.assertIsInstance(layer, tf.keras.layers.Conv2D) + + def test_center_net_mobilenet_v2_fpn_feature_extractor_sep_conv(self): + + channel_means = (0., 0., 0.) + channel_stds = (1., 1., 1.) + bgr_ordering = False + model = ( + center_net_mobilenet_v2_fpn_feature_extractor.mobilenet_v2_fpn( + channel_means, channel_stds, bgr_ordering, use_separable_conv=True)) + + def graph_fn(): + img = np.zeros((8, 224, 224, 3), dtype=np.float32) + processed_img = model.preprocess(img) + return model(processed_img) + + outputs = self.execute(graph_fn, []) + self.assertEqual(outputs.shape, (8, 56, 56, 24)) + # Pull out the FPN network. + backbone = model.get_layer('model') + first_conv = backbone.get_layer('Conv1') + self.assertEqual(32, first_conv.filters) + + # Pull out the FPN network. + output = model.get_layer('model_1') + for layer in output.layers: + # Convolution layers with kernel size not equal to (1, 1) should be + # separable 2D convolutions. + if 'conv' in layer.name and layer.kernel_size != (1, 1): + self.assertIsInstance(layer, tf.keras.layers.SeparableConv2D) + + def test_center_net_mobilenet_v2_fpn_feature_extractor_depth_multiplier(self): + + channel_means = (0., 0., 0.) + channel_stds = (1., 1., 1.) + bgr_ordering = False + model = ( + center_net_mobilenet_v2_fpn_feature_extractor.mobilenet_v2_fpn( + channel_means, channel_stds, bgr_ordering, use_separable_conv=True, + depth_multiplier=2.0)) + + def graph_fn(): + img = np.zeros((8, 224, 224, 3), dtype=np.float32) + processed_img = model.preprocess(img) + return model(processed_img) + + outputs = self.execute(graph_fn, []) + self.assertEqual(outputs.shape, (8, 56, 56, 24)) + # Pull out the FPN network. + backbone = model.get_layer('model') + first_conv = backbone.get_layer('Conv1') + # Note that the first layer typically has 32 filters, but this model has + # a depth multiplier of 2. + self.assertEqual(64, first_conv.filters) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/object_detection/models/center_net_resnet_feature_extractor.py b/research/object_detection/models/center_net_resnet_feature_extractor.py index 477fa4c50ea9e0bc62b43a75c1674acfef7a183c..d8fbe7126563c3ef0d34602d86df3061fbb366a5 100644 --- a/research/object_detection/models/center_net_resnet_feature_extractor.py +++ b/research/object_detection/models/center_net_resnet_feature_extractor.py @@ -46,10 +46,12 @@ class CenterNetResnetFeatureExtractor(CenterNetFeatureExtractor): channel_means=channel_means, channel_stds=channel_stds, bgr_ordering=bgr_ordering) if resnet_type == 'resnet_v2_101': - self._base_model = tf.keras.applications.ResNet101V2(weights=None) + self._base_model = tf.keras.applications.ResNet101V2(weights=None, + include_top=False) output_layer = 'conv5_block3_out' elif resnet_type == 'resnet_v2_50': - self._base_model = tf.keras.applications.ResNet50V2(weights=None) + self._base_model = tf.keras.applications.ResNet50V2(weights=None, + include_top=False) output_layer = 'conv5_block3_out' else: raise ValueError('Unknown Resnet Model {}'.format(resnet_type)) @@ -101,10 +103,6 @@ class CenterNetResnetFeatureExtractor(CenterNetFeatureExtractor): def load_feature_extractor_weights(self, path): self._base_model.load_weights(path) - def get_base_model(self): - """Get base resnet model for inspection and testing.""" - return self._base_model - def call(self, inputs): """Returns image features extracted by the backbone. @@ -127,9 +125,20 @@ class CenterNetResnetFeatureExtractor(CenterNetFeatureExtractor): def out_stride(self): return 4 + @property + def supported_sub_model_types(self): + return ['classification'] + + def get_sub_model(self, sub_model_type): + if sub_model_type == 'classification': + return self._base_model + else: + ValueError('Sub model type "{}" not supported.'.format(sub_model_type)) + -def resnet_v2_101(channel_means, channel_stds, bgr_ordering): +def resnet_v2_101(channel_means, channel_stds, bgr_ordering, **kwargs): """The ResNet v2 101 feature extractor.""" + del kwargs return CenterNetResnetFeatureExtractor( resnet_type='resnet_v2_101', @@ -139,8 +148,9 @@ def resnet_v2_101(channel_means, channel_stds, bgr_ordering): ) -def resnet_v2_50(channel_means, channel_stds, bgr_ordering): +def resnet_v2_50(channel_means, channel_stds, bgr_ordering, **kwargs): """The ResNet v2 50 feature extractor.""" + del kwargs return CenterNetResnetFeatureExtractor( resnet_type='resnet_v2_50', diff --git a/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py index 3429c0442053982d3d3d9502508ede3177cbf102..d8f9b22a746cbd6da862f9a37f4ef2e57f10b451 100644 --- a/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py +++ b/research/object_detection/models/center_net_resnet_feature_extractor_tf2_test.py @@ -31,11 +31,11 @@ class CenterNetResnetFeatureExtractorTest(test_case.TestCase): model = center_net_resnet_feature_extractor.\ CenterNetResnetFeatureExtractor('resnet_v2_101') def graph_fn(): - img = np.zeros((8, 224, 224, 3), dtype=np.float32) + img = np.zeros((8, 512, 512, 3), dtype=np.float32) processed_img = model.preprocess(img) return model(processed_img) outputs = self.execute(graph_fn, []) - self.assertEqual(outputs.shape, (8, 56, 56, 64)) + self.assertEqual(outputs.shape, (8, 128, 128, 64)) def test_output_size_resnet50(self): """Verify that shape of features returned by the backbone is correct.""" diff --git a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py index 842e9cf1b2e5393a6bc87df3989f173d0409de70..c78091e8d04b76bd383092078d0e08c0f05bbdb4 100644 --- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py +++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor.py @@ -21,9 +21,14 @@ import tensorflow.compat.v1 as tf from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor +from object_detection.models.keras_models import resnet_v1 _RESNET_MODEL_OUTPUT_LAYERS = { + 'resnet_v1_18': ['conv2_block2_out', 'conv3_block2_out', + 'conv4_block2_out', 'conv5_block2_out'], + 'resnet_v1_34': ['conv2_block3_out', 'conv3_block4_out', + 'conv4_block6_out', 'conv5_block3_out'], 'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out'], 'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out', @@ -66,9 +71,15 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor): channel_means=channel_means, channel_stds=channel_stds, bgr_ordering=bgr_ordering) if resnet_type == 'resnet_v1_50': - self._base_model = tf.keras.applications.ResNet50(weights=None) + self._base_model = tf.keras.applications.ResNet50(weights=None, + include_top=False) elif resnet_type == 'resnet_v1_101': - self._base_model = tf.keras.applications.ResNet101(weights=None) + self._base_model = tf.keras.applications.ResNet101(weights=None, + include_top=False) + elif resnet_type == 'resnet_v1_18': + self._base_model = resnet_v1.resnet_v1_18(weights=None, include_top=False) + elif resnet_type == 'resnet_v1_34': + self._base_model = resnet_v1.resnet_v1_34(weights=None, include_top=False) else: raise ValueError('Unknown Resnet Model {}'.format(resnet_type)) output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type] @@ -128,10 +139,6 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor): def load_feature_extractor_weights(self, path): self._base_model.load_weights(path) - def get_base_model(self): - """Get base resnet model for inspection and testing.""" - return self._base_model - def call(self, inputs): """Returns image features extracted by the backbone. @@ -154,9 +161,20 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor): def out_stride(self): return 4 + @property + def supported_sub_model_types(self): + return ['classification'] + + def get_sub_model(self, sub_model_type): + if sub_model_type == 'classification': + return self._base_model + else: + ValueError('Sub model type "{}" not supported.'.format(sub_model_type)) + -def resnet_v1_101_fpn(channel_means, channel_stds, bgr_ordering): +def resnet_v1_101_fpn(channel_means, channel_stds, bgr_ordering, **kwargs): """The ResNet v1 101 FPN feature extractor.""" + del kwargs return CenterNetResnetV1FpnFeatureExtractor( resnet_type='resnet_v1_101', @@ -166,11 +184,35 @@ def resnet_v1_101_fpn(channel_means, channel_stds, bgr_ordering): ) -def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering): +def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering, **kwargs): """The ResNet v1 50 FPN feature extractor.""" + del kwargs return CenterNetResnetV1FpnFeatureExtractor( resnet_type='resnet_v1_50', channel_means=channel_means, channel_stds=channel_stds, bgr_ordering=bgr_ordering) + + +def resnet_v1_34_fpn(channel_means, channel_stds, bgr_ordering, **kwargs): + """The ResNet v1 34 FPN feature extractor.""" + del kwargs + + return CenterNetResnetV1FpnFeatureExtractor( + resnet_type='resnet_v1_34', + channel_means=channel_means, + channel_stds=channel_stds, + bgr_ordering=bgr_ordering + ) + + +def resnet_v1_18_fpn(channel_means, channel_stds, bgr_ordering, **kwargs): + """The ResNet v1 18 FPN feature extractor.""" + del kwargs + + return CenterNetResnetV1FpnFeatureExtractor( + resnet_type='resnet_v1_18', + channel_means=channel_means, + channel_stds=channel_stds, + bgr_ordering=bgr_ordering) diff --git a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py index 3f1524904f0a055e48342d09febdd7bd3ec6fb3c..2508e52f793157c9bf3b644601e7772f38511534 100644 --- a/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py +++ b/research/object_detection/models/center_net_resnet_v1_fpn_feature_extractor_tf2_test.py @@ -31,6 +31,8 @@ class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase, @parameterized.parameters( {'resnet_type': 'resnet_v1_50'}, {'resnet_type': 'resnet_v1_101'}, + {'resnet_type': 'resnet_v1_18'}, + {'resnet_type': 'resnet_v1_34'}, ) def test_correct_output_size(self, resnet_type): """Verify that shape of features returned by the backbone is correct.""" @@ -38,11 +40,11 @@ class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase, model = center_net_resnet_v1_fpn_feature_extractor.\ CenterNetResnetV1FpnFeatureExtractor(resnet_type) def graph_fn(): - img = np.zeros((8, 224, 224, 3), dtype=np.float32) + img = np.zeros((8, 512, 512, 3), dtype=np.float32) processed_img = model.preprocess(img) return model(processed_img) - self.assertEqual(self.execute(graph_fn, []).shape, (8, 56, 56, 64)) + self.assertEqual(self.execute(graph_fn, []).shape, (8, 128, 128, 64)) if __name__ == '__main__': diff --git a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py index e3a161e01f7c2f6d13a390c039070855393b82bd..27d8844b7b765e1d195f1a40580c5b3863637b12 100644 --- a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py +++ b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py @@ -20,6 +20,7 @@ import tensorflow.compat.v1 as tf from object_detection.meta_architectures import faster_rcnn_meta_arch from object_detection.models import feature_map_generators from object_detection.models.keras_models import resnet_v1 +from object_detection.utils import ops _RESNET_MODEL_OUTPUT_LAYERS = { @@ -32,6 +33,78 @@ _RESNET_MODEL_OUTPUT_LAYERS = { } +class _ResnetFPN(tf.keras.layers.Layer): + """Construct Resnet FPN layer.""" + + def __init__(self, + backbone_classifier, + fpn_features_generator, + coarse_feature_layers, + pad_to_multiple, + fpn_min_level, + resnet_block_names, + base_fpn_max_level): + """Constructor. + + Args: + backbone_classifier: Classifier backbone. Should be one of 'resnet_v1_50', + 'resnet_v1_101', 'resnet_v1_152'. + fpn_features_generator: KerasFpnTopDownFeatureMaps that accepts a + dictionary of features and returns a ordered dictionary of fpn features. + coarse_feature_layers: Coarse feature layers for fpn. + pad_to_multiple: An integer multiple to pad input image. + fpn_min_level: the highest resolution feature map to use in FPN. The valid + values are {2, 3, 4, 5} which map to Resnet v1 layers. + resnet_block_names: a list of block names of resnet. + base_fpn_max_level: maximum level of fpn without coarse feature layers. + """ + super(_ResnetFPN, self).__init__() + self.classification_backbone = backbone_classifier + self.fpn_features_generator = fpn_features_generator + self.coarse_feature_layers = coarse_feature_layers + self.pad_to_multiple = pad_to_multiple + self._fpn_min_level = fpn_min_level + self._resnet_block_names = resnet_block_names + self._base_fpn_max_level = base_fpn_max_level + + def call(self, inputs): + """Create internal Resnet FPN layer. + + Args: + inputs: A [batch, height_out, width_out, channels] float32 tensor + representing a batch of images. + + Returns: + feature_maps: A list of tensors with shape [batch, height, width, depth] + represent extracted features. + """ + inputs = ops.pad_to_multiple(inputs, self.pad_to_multiple) + backbone_outputs = self.classification_backbone(inputs) + + feature_block_list = [] + for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): + feature_block_list.append('block{}'.format(level - 1)) + feature_block_map = dict( + list(zip(self._resnet_block_names, backbone_outputs))) + fpn_input_image_features = [ + (feature_block, feature_block_map[feature_block]) + for feature_block in feature_block_list] + fpn_features = self.fpn_features_generator(fpn_input_image_features) + + feature_maps = [] + for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): + feature_maps.append(fpn_features['top_down_block{}'.format(level-1)]) + last_feature_map = fpn_features['top_down_block{}'.format( + self._base_fpn_max_level - 1)] + + for coarse_feature_layers in self.coarse_feature_layers: + for layer in coarse_feature_layers: + last_feature_map = layer(last_feature_map) + feature_maps.append(last_feature_map) + + return feature_maps + + class FasterRCNNResnetV1FpnKerasFeatureExtractor( faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor): """Faster RCNN Feature Extractor using Keras-based Resnet V1 FPN features.""" @@ -42,7 +115,8 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( resnet_v1_base_model_name, first_stage_features_stride, conv_hyperparams, - batch_norm_trainable=False, + batch_norm_trainable=True, + pad_to_multiple=32, weight_decay=0.0, fpn_min_level=2, fpn_max_level=6, @@ -60,6 +134,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( containing convolution hyperparameters for the layers added on top of the base feature extractor. batch_norm_trainable: See base class. + pad_to_multiple: An integer multiple to pad input image. weight_decay: See base class. fpn_min_level: the highest resolution feature map to use in FPN. The valid values are {2, 3, 4, 5} which map to Resnet v1 layers. @@ -93,6 +168,8 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( self._fpn_max_level = fpn_max_level self._additional_layer_depth = additional_layer_depth self._freeze_batchnorm = (not batch_norm_trainable) + self._pad_to_multiple = pad_to_multiple + self._override_base_feature_extractor_hyperparams = \ override_base_feature_extractor_hyperparams self._resnet_block_names = ['block1', 'block2', 'block3', 'block4'] @@ -156,10 +233,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( self.classification_backbone = tf.keras.Model( inputs=full_resnet_v1_model.inputs, outputs=outputs) - backbone_outputs = self.classification_backbone( - full_resnet_v1_model.inputs) - # construct FPN feature generator self._base_fpn_max_level = min(self._fpn_max_level, 5) self._num_levels = self._base_fpn_max_level + 1 - self._fpn_min_level self._fpn_features_generator = ( @@ -171,16 +245,6 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( freeze_batchnorm=self._freeze_batchnorm, name='FeatureMaps')) - feature_block_list = [] - for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): - feature_block_list.append('block{}'.format(level - 1)) - feature_block_map = dict( - list(zip(self._resnet_block_names, backbone_outputs))) - fpn_input_image_features = [ - (feature_block, feature_block_map[feature_block]) - for feature_block in feature_block_list] - fpn_features = self._fpn_features_generator(fpn_input_image_features) - # Construct coarse feature layers for i in range(self._base_fpn_max_level, self._fpn_max_level): layers = [] @@ -202,19 +266,13 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( name=layer_name)) self._coarse_feature_layers.append(layers) - feature_maps = [] - for level in range(self._fpn_min_level, self._base_fpn_max_level + 1): - feature_maps.append(fpn_features['top_down_block{}'.format(level-1)]) - last_feature_map = fpn_features['top_down_block{}'.format( - self._base_fpn_max_level - 1)] - - for coarse_feature_layers in self._coarse_feature_layers: - for layer in coarse_feature_layers: - last_feature_map = layer(last_feature_map) - feature_maps.append(last_feature_map) - - feature_extractor_model = tf.keras.models.Model( - inputs=full_resnet_v1_model.inputs, outputs=feature_maps) + feature_extractor_model = _ResnetFPN(self.classification_backbone, + self._fpn_features_generator, + self._coarse_feature_layers, + self._pad_to_multiple, + self._fpn_min_level, + self._resnet_block_names, + self._base_fpn_max_level) return feature_extractor_model def get_box_classifier_feature_extractor_model(self, name=None): @@ -233,16 +291,18 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor( And returns proposal_classifier_features: A 4-D float tensor with shape - [batch_size * self.max_num_proposals, 1024] + [batch_size * self.max_num_proposals, 1, 1, 1024] representing box classifier features for each proposal. """ with tf.name_scope(name): with tf.name_scope('ResnetV1FPN'): - # TODO(yiming): Add a batchnorm layer between two fc layers. feature_extractor_model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(), tf.keras.layers.Dense(units=1024, activation='relu'), - tf.keras.layers.Dense(units=1024, activation='relu') + self._conv_hyperparams.build_batch_norm( + training=(self._is_training and not self._freeze_batchnorm)), + tf.keras.layers.Dense(units=1024, activation='relu'), + tf.keras.layers.Reshape((1, 1, 1024)) ]) return feature_extractor_model @@ -254,8 +314,8 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor( def __init__(self, is_training, first_stage_features_stride=16, + batch_norm_trainable=True, conv_hyperparams=None, - batch_norm_trainable=False, weight_decay=0.0, fpn_min_level=2, fpn_max_level=6, @@ -266,8 +326,8 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor( Args: is_training: See base class. first_stage_features_stride: See base class. - conv_hyperparams: See base class. batch_norm_trainable: See base class. + conv_hyperparams: See base class. weight_decay: See base class. fpn_min_level: See base class. fpn_max_level: See base class. @@ -297,8 +357,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor( def __init__(self, is_training, first_stage_features_stride=16, + batch_norm_trainable=True, conv_hyperparams=None, - batch_norm_trainable=False, weight_decay=0.0, fpn_min_level=2, fpn_max_level=6, @@ -309,8 +369,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor( Args: is_training: See base class. first_stage_features_stride: See base class. - conv_hyperparams: See base class. batch_norm_trainable: See base class. + conv_hyperparams: See base class. weight_decay: See base class. fpn_min_level: See base class. fpn_max_level: See base class. @@ -339,8 +399,8 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor( def __init__(self, is_training, first_stage_features_stride=16, + batch_norm_trainable=True, conv_hyperparams=None, - batch_norm_trainable=False, weight_decay=0.0, fpn_min_level=2, fpn_max_level=6, @@ -351,8 +411,8 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor( Args: is_training: See base class. first_stage_features_stride: See base class. - conv_hyperparams: See base class. batch_norm_trainable: See base class. + conv_hyperparams: See base class. weight_decay: See base class. fpn_min_level: See base class. fpn_max_level: See base class. diff --git a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py index 5407f2cfa9aafa7ffc44eb93c1c775cd5bb2784d..d0a0813cf65e873a4109fc8bc33add099c1ab87c 100644 --- a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py +++ b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py @@ -91,4 +91,4 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase): model(proposal_feature_maps)) features_shape = tf.shape(proposal_classifier_features) - self.assertAllEqual(features_shape.numpy(), [3, 1024]) + self.assertAllEqual(features_shape.numpy(), [3, 1, 1, 1024]) diff --git a/research/object_detection/models/feature_map_generators.py b/research/object_detection/models/feature_map_generators.py index 87d15e968390446a4332e20e5b737e04d573d98a..f343f317d86e984cc441570a4a7681fca1be2c77 100644 --- a/research/object_detection/models/feature_map_generators.py +++ b/research/object_detection/models/feature_map_generators.py @@ -467,9 +467,11 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier, stride=1, scope=layer_name) if pool_residual and pre_layer_depth == depth_fn(layer_depth): + if use_explicit_padding: + pre_layer = ops.fixed_padding(pre_layer, conv_kernel_size) feature_map += slim.avg_pool2d( - pre_layer, [3, 3], - padding='SAME', + pre_layer, [conv_kernel_size, conv_kernel_size], + padding=padding, stride=2, scope=layer_name + '_pool') else: diff --git a/research/object_detection/models/keras_models/base_models/original_mobilenet_v2.py b/research/object_detection/models/keras_models/base_models/original_mobilenet_v2.py index cf7f95724e86c422b568921b77dcf094d901d11f..42b40caf1c3de5dcd143c1d7200abf8ee16b7253 100644 --- a/research/object_detection/models/keras_models/base_models/original_mobilenet_v2.py +++ b/research/object_detection/models/keras_models/base_models/original_mobilenet_v2.py @@ -117,7 +117,7 @@ def _obtain_input_shape( require_flatten): """Internal utility to compute/validate an ImageNet model's input shape. - Arguments: + Args: input_shape: either None (will return the default network input shape), or a user-provided shape to be validated. default_size: default input width/height for the model. @@ -198,7 +198,7 @@ def preprocess_input(x): the RGB values from [0, 255] to [-1, 1]. Note that this preprocessing function is different from `imagenet_utils.preprocess_input()`. - Arguments: + Args: x: a 4D numpy array consists of RGB values within [0, 255]. Returns: @@ -237,7 +237,7 @@ def mobilenet_v2(input_shape=None, model = load_model('mobilenet.h5', custom_objects={ 'relu6': mobilenet.relu6}) - Arguments: + Args: input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). diff --git a/research/object_detection/models/keras_models/hourglass_network.py b/research/object_detection/models/keras_models/hourglass_network.py index 09fb8ed4f4fb6f4b2712d8403ba1b94985ad25ad..e6e71545c401f0bd2df723581734d85969f400f7 100644 --- a/research/object_detection/models/keras_models/hourglass_network.py +++ b/research/object_detection/models/keras_models/hourglass_network.py @@ -174,8 +174,38 @@ class InputDownsampleBlock(tf.keras.layers.Layer): return self.residual_block(self.conv_block(inputs)) +class InputConvBlock(tf.keras.layers.Layer): + """Block for the initial feature convolution. + + This block is used in the hourglass network when we don't want to downsample + the input. + """ + + def __init__(self, out_channels_initial_conv, out_channels_residual_block): + """Initializes the downsample block. + + Args: + out_channels_initial_conv: int, the desired number of output channels + in the initial conv layer. + out_channels_residual_block: int, the desired number of output channels + in the underlying residual block. + """ + + super(InputConvBlock, self).__init__() + + self.conv_block = ConvolutionalBlock( + kernel_size=3, out_channels=out_channels_initial_conv, stride=1, + padding='valid') + self.residual_block = ResidualBlock( + out_channels=out_channels_residual_block, stride=1, skip_conv=True) + + def call(self, inputs): + return self.residual_block(self.conv_block(inputs)) + + def _make_repeated_residual_blocks(out_channels, num_blocks, - initial_stride=1, residual_channels=None): + initial_stride=1, residual_channels=None, + initial_skip_conv=False): """Stack Residual blocks one after the other. Args: @@ -184,6 +214,9 @@ def _make_repeated_residual_blocks(out_channels, num_blocks, initial_stride: int, the stride of the initial residual block. residual_channels: int, the desired number of output channels in the intermediate residual blocks. If not specifed, we use out_channels. + initial_skip_conv: bool, if set, the first residual block uses a skip + convolution. This is useful when the number of channels in the input + are not the same as residual_channels. Returns: blocks: A list of residual blocks to be applied in sequence. @@ -196,16 +229,34 @@ def _make_repeated_residual_blocks(out_channels, num_blocks, residual_channels = out_channels for i in range(num_blocks - 1): + # Only use the stride at the first block so we don't repeatedly downsample + # the input stride = initial_stride if i == 0 else 1 + + # If the stide is more than 1, we cannot use an identity layer for the + # skip connection and are forced to use a conv for the skip connection. skip_conv = stride > 1 + if i == 0 and initial_skip_conv: + skip_conv = True + blocks.append( ResidualBlock(out_channels=residual_channels, stride=stride, skip_conv=skip_conv) ) - skip_conv = residual_channels != out_channels - blocks.append(ResidualBlock(out_channels=out_channels, skip_conv=skip_conv)) + if num_blocks == 1: + # If there is only 1 block, the for loop above is not run, + # therefore we honor the requested stride in the last residual block + stride = initial_stride + # We are forced to use a conv in the skip connection if stride > 1 + skip_conv = stride > 1 + else: + stride = 1 + skip_conv = residual_channels != out_channels + + blocks.append(ResidualBlock(out_channels=out_channels, skip_conv=skip_conv, + stride=stride)) return blocks @@ -222,7 +273,8 @@ def _apply_blocks(inputs, blocks): class EncoderDecoderBlock(tf.keras.layers.Layer): """An encoder-decoder block which recursively defines the hourglass network.""" - def __init__(self, num_stages, channel_dims, blocks_per_stage): + def __init__(self, num_stages, channel_dims, blocks_per_stage, + stagewise_downsample=True, encoder_decoder_shortcut=True): """Initializes the encoder-decoder block. Args: @@ -237,6 +289,10 @@ class EncoderDecoderBlock(tf.keras.layers.Layer): blocks_per_stage: int list, number of residual blocks to use at each stage. `blocks_per_stage[0]` defines the number of blocks at the current stage and `blocks_per_stage[1:]` is used at further stages. + stagewise_downsample: bool, whether or not to downsample before passing + inputs to the next stage. + encoder_decoder_shortcut: bool, whether or not to use shortcut + connections between encoder and decoder. """ super(EncoderDecoderBlock, self).__init__() @@ -244,17 +300,26 @@ class EncoderDecoderBlock(tf.keras.layers.Layer): out_channels = channel_dims[0] out_channels_downsampled = channel_dims[1] - self.encoder_block1 = _make_repeated_residual_blocks( - out_channels=out_channels, num_blocks=blocks_per_stage[0], - initial_stride=1) + self.encoder_decoder_shortcut = encoder_decoder_shortcut + + if encoder_decoder_shortcut: + self.merge_features = tf.keras.layers.Add() + self.encoder_block1 = _make_repeated_residual_blocks( + out_channels=out_channels, num_blocks=blocks_per_stage[0], + initial_stride=1) + + initial_stride = 2 if stagewise_downsample else 1 self.encoder_block2 = _make_repeated_residual_blocks( out_channels=out_channels_downsampled, - num_blocks=blocks_per_stage[0], initial_stride=2) + num_blocks=blocks_per_stage[0], initial_stride=initial_stride, + initial_skip_conv=out_channels != out_channels_downsampled) if num_stages > 1: self.inner_block = [ EncoderDecoderBlock(num_stages - 1, channel_dims[1:], - blocks_per_stage[1:]) + blocks_per_stage[1:], + stagewise_downsample=stagewise_downsample, + encoder_decoder_shortcut=encoder_decoder_shortcut) ] else: self.inner_block = _make_repeated_residual_blocks( @@ -264,13 +329,13 @@ class EncoderDecoderBlock(tf.keras.layers.Layer): self.decoder_block = _make_repeated_residual_blocks( residual_channels=out_channels_downsampled, out_channels=out_channels, num_blocks=blocks_per_stage[0]) - self.upsample = tf.keras.layers.UpSampling2D(2) - self.merge_features = tf.keras.layers.Add() + self.upsample = tf.keras.layers.UpSampling2D(initial_stride) def call(self, inputs): - encoded_outputs = _apply_blocks(inputs, self.encoder_block1) + if self.encoder_decoder_shortcut: + encoded_outputs = _apply_blocks(inputs, self.encoder_block1) encoded_downsampled_outputs = _apply_blocks(inputs, self.encoder_block2) inner_block_outputs = _apply_blocks( encoded_downsampled_outputs, self.inner_block) @@ -278,48 +343,68 @@ class EncoderDecoderBlock(tf.keras.layers.Layer): decoded_outputs = _apply_blocks(inner_block_outputs, self.decoder_block) upsampled_outputs = self.upsample(decoded_outputs) - return self.merge_features([encoded_outputs, upsampled_outputs]) + if self.encoder_decoder_shortcut: + return self.merge_features([encoded_outputs, upsampled_outputs]) + else: + return upsampled_outputs class HourglassNetwork(tf.keras.Model): """The hourglass network.""" - def __init__(self, num_stages, channel_dims, blocks_per_stage, - num_hourglasses): + def __init__(self, num_stages, input_channel_dims, channel_dims_per_stage, + blocks_per_stage, num_hourglasses, initial_downsample=True, + stagewise_downsample=True, encoder_decoder_shortcut=True): """Intializes the feature extractor. Args: num_stages: int, Number of stages in the network. At each stage we have 2 encoder and 1 decoder blocks. The second encoder block downsamples the input. - channel_dims: int list, the output channel dimensions of stages in - the network. `channel_dims[0]` and `channel_dims[1]` are used to define - the initial downsampling block. `channel_dims[1:]` is used to define - the hourglass network(s) which follow(s). + input_channel_dims: int, the number of channels in the input conv blocks. + channel_dims_per_stage: int list, the output channel dimensions of each + stage in the hourglass network. blocks_per_stage: int list, number of residual blocks to use at each stage in the hourglass network num_hourglasses: int, number of hourglas networks to stack sequentially. + initial_downsample: bool, if set, downsamples the input by a factor of 4 + before applying the rest of the network. Downsampling is done with a 7x7 + convolution kernel, otherwise a 3x3 kernel is used. + stagewise_downsample: bool, whether or not to downsample before passing + inputs to the next stage. + encoder_decoder_shortcut: bool, whether or not to use shortcut + connections between encoder and decoder. """ super(HourglassNetwork, self).__init__() self.num_hourglasses = num_hourglasses - self.downsample_input = InputDownsampleBlock( - out_channels_initial_conv=channel_dims[0], - out_channels_residual_block=channel_dims[1] - ) + self.initial_downsample = initial_downsample + if initial_downsample: + self.downsample_input = InputDownsampleBlock( + out_channels_initial_conv=input_channel_dims, + out_channels_residual_block=channel_dims_per_stage[0] + ) + else: + self.conv_input = InputConvBlock( + out_channels_initial_conv=input_channel_dims, + out_channels_residual_block=channel_dims_per_stage[0] + ) self.hourglass_network = [] self.output_conv = [] for _ in range(self.num_hourglasses): self.hourglass_network.append( EncoderDecoderBlock( - num_stages=num_stages, channel_dims=channel_dims[1:], - blocks_per_stage=blocks_per_stage) + num_stages=num_stages, channel_dims=channel_dims_per_stage, + blocks_per_stage=blocks_per_stage, + stagewise_downsample=stagewise_downsample, + encoder_decoder_shortcut=encoder_decoder_shortcut) ) self.output_conv.append( - ConvolutionalBlock(kernel_size=3, out_channels=channel_dims[1]) + ConvolutionalBlock(kernel_size=3, + out_channels=channel_dims_per_stage[0]) ) self.intermediate_conv1 = [] @@ -329,21 +414,25 @@ class HourglassNetwork(tf.keras.Model): for _ in range(self.num_hourglasses - 1): self.intermediate_conv1.append( ConvolutionalBlock( - kernel_size=1, out_channels=channel_dims[1], relu=False) + kernel_size=1, out_channels=channel_dims_per_stage[0], relu=False) ) self.intermediate_conv2.append( ConvolutionalBlock( - kernel_size=1, out_channels=channel_dims[1], relu=False) + kernel_size=1, out_channels=channel_dims_per_stage[0], relu=False) ) self.intermediate_residual.append( - ResidualBlock(out_channels=channel_dims[1]) + ResidualBlock(out_channels=channel_dims_per_stage[0]) ) self.intermediate_relu = tf.keras.layers.ReLU() def call(self, inputs): - inputs = self.downsample_input(inputs) + if self.initial_downsample: + inputs = self.downsample_input(inputs) + else: + inputs = self.conv_input(inputs) + outputs = [] for i in range(self.num_hourglasses): @@ -372,12 +461,164 @@ class HourglassNetwork(tf.keras.Model): return self.num_hourglasses +def _layer_depth(layer): + """Compute depth of Conv/Residual blocks or lists of them.""" + + if isinstance(layer, list): + return sum([_layer_depth(l) for l in layer]) + + elif isinstance(layer, ConvolutionalBlock): + return 1 + + elif isinstance(layer, ResidualBlock): + return 2 + + else: + raise ValueError('Unknown layer - {}'.format(layer)) + + +def _encoder_decoder_depth(network): + """Helper function to compute depth of encoder-decoder blocks.""" + + encoder_block2_layers = _layer_depth(network.encoder_block2) + decoder_block_layers = _layer_depth(network.decoder_block) + + if isinstance(network.inner_block[0], EncoderDecoderBlock): + + assert len(network.inner_block) == 1, 'Inner block is expected as length 1.' + inner_block_layers = _encoder_decoder_depth(network.inner_block[0]) + + return inner_block_layers + encoder_block2_layers + decoder_block_layers + + elif isinstance(network.inner_block[0], ResidualBlock): + return (encoder_block2_layers + decoder_block_layers + + _layer_depth(network.inner_block)) + + else: + raise ValueError('Unknown inner block type.') + + +def hourglass_depth(network): + """Helper function to verify depth of hourglass backbone.""" + + input_conv_layers = 3 # 1 ResidualBlock and 1 ConvBlock + + # Only intermediate_conv2 and intermediate_residual are applied before + # sending inputs to the later stages. + intermediate_layers = ( + _layer_depth(network.intermediate_conv2) + + _layer_depth(network.intermediate_residual) + ) + + # network.output_conv is applied before sending input to the later stages + output_layers = _layer_depth(network.output_conv) + + encoder_decoder_layers = sum(_encoder_decoder_depth(net) for net in + network.hourglass_network) + + return (input_conv_layers + encoder_decoder_layers + intermediate_layers + + output_layers) + + def hourglass_104(): - """The Hourglass-104 backbone.""" + """The Hourglass-104 backbone. + + The architecture parameters are taken from [1]. + + Returns: + network: An HourglassNetwork object implementing the Hourglass-104 + backbone. + + [1]: https://arxiv.org/abs/1904.07850 + """ return HourglassNetwork( - channel_dims=[128, 256, 256, 384, 384, 384, 512], + input_channel_dims=128, + channel_dims_per_stage=[256, 256, 384, 384, 384, 512], num_hourglasses=2, num_stages=5, blocks_per_stage=[2, 2, 2, 2, 2, 4], ) + + +def single_stage_hourglass(input_channel_dims, channel_dims_per_stage, + blocks_per_stage, initial_downsample=True, + stagewise_downsample=True, + encoder_decoder_shortcut=True): + assert len(channel_dims_per_stage) == len(blocks_per_stage) + + return HourglassNetwork( + input_channel_dims=input_channel_dims, + channel_dims_per_stage=channel_dims_per_stage, + num_hourglasses=1, + num_stages=len(channel_dims_per_stage) - 1, + blocks_per_stage=blocks_per_stage, + initial_downsample=initial_downsample, + stagewise_downsample=stagewise_downsample, + encoder_decoder_shortcut=encoder_decoder_shortcut + ) + + +def hourglass_10(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[1, 1], + channel_dims_per_stage=[nc * 2, nc * 2]) + + +def hourglass_20(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[1, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3]) + + +def hourglass_32(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[2, 2, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3, nc * 3]) + + +def hourglass_52(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[2, 2, 2, 2, 2, 4], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3, nc * 3, nc * 3, nc*4]) + + +def hourglass_100(num_channels, initial_downsample=True): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + initial_downsample=initial_downsample, + blocks_per_stage=[4, 4, 4, 4, 4, 8], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3, nc * 3, nc * 3, nc*4]) + + +def hourglass_20_uniform_size(num_channels): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + blocks_per_stage=[1, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3], + initial_downsample=False, + stagewise_downsample=False) + + +def hourglass_20_no_shortcut(num_channels): + nc = num_channels + return single_stage_hourglass( + input_channel_dims=nc, + blocks_per_stage=[1, 2, 2], + channel_dims_per_stage=[nc * 2, nc * 2, nc * 3], + initial_downsample=False, + encoder_decoder_shortcut=False) diff --git a/research/object_detection/models/keras_models/hourglass_network_tf2_test.py b/research/object_detection/models/keras_models/hourglass_network_tf2_test.py index d90b950ecd4102a260643391de6a4475ed959c0f..d1813703c7c6debc049711551031800985b8431d 100644 --- a/research/object_detection/models/keras_models/hourglass_network_tf2_test.py +++ b/research/object_detection/models/keras_models/hourglass_network_tf2_test.py @@ -78,6 +78,12 @@ class HourglassFeatureExtractorTest(tf.test.TestCase, parameterized.TestCase): output = layer(np.zeros((2, 32, 32, 8), dtype=np.float32)) self.assertEqual(output.shape, (2, 8, 8, 8)) + def test_input_conv_block(self): + layer = hourglass.InputConvBlock( + out_channels_initial_conv=4, out_channels_residual_block=8) + output = layer(np.zeros((2, 32, 32, 8), dtype=np.float32)) + self.assertEqual(output.shape, (2, 32, 32, 8)) + def test_encoder_decoder_block(self): layer = hourglass.EncoderDecoderBlock( @@ -89,12 +95,64 @@ class HourglassFeatureExtractorTest(tf.test.TestCase, parameterized.TestCase): def test_hourglass_feature_extractor(self): model = hourglass.HourglassNetwork( - num_stages=4, blocks_per_stage=[2, 3, 4, 5, 6], - channel_dims=[4, 6, 8, 10, 12, 14], num_hourglasses=2) + num_stages=4, blocks_per_stage=[2, 3, 4, 5, 6], input_channel_dims=4, + channel_dims_per_stage=[6, 8, 10, 12, 14], num_hourglasses=2) outputs = model(np.zeros((2, 64, 64, 3), dtype=np.float32)) self.assertEqual(outputs[0].shape, (2, 16, 16, 6)) self.assertEqual(outputs[1].shape, (2, 16, 16, 6)) +@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') +class HourglassDepthTest(tf.test.TestCase): + + def test_hourglass_104(self): + + net = hourglass.hourglass_104() + self.assertEqual(hourglass.hourglass_depth(net), 104) + + def test_hourglass_10(self): + net = hourglass.hourglass_10(2, initial_downsample=False) + self.assertEqual(hourglass.hourglass_depth(net), 10) + + outputs = net(tf.zeros((2, 32, 32, 3))) + self.assertEqual(outputs[0].shape, (2, 32, 32, 4)) + + def test_hourglass_20(self): + net = hourglass.hourglass_20(2, initial_downsample=False) + self.assertEqual(hourglass.hourglass_depth(net), 20) + + outputs = net(tf.zeros((2, 32, 32, 3))) + self.assertEqual(outputs[0].shape, (2, 32, 32, 4)) + + def test_hourglass_32(self): + net = hourglass.hourglass_32(2, initial_downsample=False) + self.assertEqual(hourglass.hourglass_depth(net), 32) + + outputs = net(tf.zeros((2, 32, 32, 3))) + self.assertEqual(outputs[0].shape, (2, 32, 32, 4)) + + def test_hourglass_52(self): + net = hourglass.hourglass_52(2, initial_downsample=False) + self.assertEqual(hourglass.hourglass_depth(net), 52) + + outputs = net(tf.zeros((2, 32, 32, 3))) + self.assertEqual(outputs[0].shape, (2, 32, 32, 4)) + + def test_hourglass_20_uniform_size(self): + net = hourglass.hourglass_20_uniform_size(2) + self.assertEqual(hourglass.hourglass_depth(net), 20) + + outputs = net(tf.zeros((2, 32, 32, 3))) + self.assertEqual(outputs[0].shape, (2, 32, 32, 4)) + + def test_hourglass_100(self): + net = hourglass.hourglass_100(2, initial_downsample=False) + self.assertEqual(hourglass.hourglass_depth(net), 100) + + outputs = net(tf.zeros((2, 32, 32, 3))) + self.assertEqual(outputs[0].shape, (2, 32, 32, 4)) + + if __name__ == '__main__': tf.test.main() + diff --git a/research/object_detection/models/keras_models/resnet_v1.py b/research/object_detection/models/keras_models/resnet_v1.py index d5426ad6b5e499171dbd955dc9c3fe465c4b6051..62660d4a70d5887c4a5084117ae5c2e2cfc3f888 100644 --- a/research/object_detection/models/keras_models/resnet_v1.py +++ b/research/object_detection/models/keras_models/resnet_v1.py @@ -21,6 +21,7 @@ from __future__ import print_function import tensorflow.compat.v1 as tf +from tensorflow.python.keras.applications import resnet from object_detection.core import freezable_batch_norm from object_detection.models.keras_models import model_utils @@ -95,11 +96,11 @@ class _LayersOverride(object): self.regularizer = tf.keras.regularizers.l2(weight_decay) self.initializer = tf.variance_scaling_initializer() - def _FixedPaddingLayer(self, kernel_size, rate=1): + def _FixedPaddingLayer(self, kernel_size, rate=1): # pylint: disable=invalid-name return tf.keras.layers.Lambda( lambda x: _fixed_padding(x, kernel_size, rate)) - def Conv2D(self, filters, kernel_size, **kwargs): + def Conv2D(self, filters, kernel_size, **kwargs): # pylint: disable=invalid-name """Builds a Conv2D layer according to the current Object Detection config. Overrides the Keras Resnet application's convolutions with ones that @@ -141,7 +142,7 @@ class _LayersOverride(object): else: return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs) - def Activation(self, *args, **kwargs): # pylint: disable=unused-argument + def Activation(self, *args, **kwargs): # pylint: disable=unused-argument,invalid-name """Builds an activation layer. Overrides the Keras application Activation layer specified by the @@ -163,7 +164,7 @@ class _LayersOverride(object): else: return tf.keras.layers.Lambda(tf.nn.relu, name=name) - def BatchNormalization(self, **kwargs): + def BatchNormalization(self, **kwargs): # pylint: disable=invalid-name """Builds a normalization layer. Overrides the Keras application batch norm with the norm specified by the @@ -191,7 +192,7 @@ class _LayersOverride(object): momentum=self._default_batchnorm_momentum, **kwargs) - def Input(self, shape): + def Input(self, shape): # pylint: disable=invalid-name """Builds an Input layer. Overrides the Keras application Input layer with one that uses a @@ -219,7 +220,7 @@ class _LayersOverride(object): input=input_tensor, shape=[None] + shape) return model_utils.input_layer(shape, placeholder_with_default) - def MaxPooling2D(self, pool_size, **kwargs): + def MaxPooling2D(self, pool_size, **kwargs): # pylint: disable=invalid-name """Builds a MaxPooling2D layer with default padding as 'SAME'. This is specified by the default resnet arg_scope in slim. @@ -237,7 +238,7 @@ class _LayersOverride(object): # Add alias as Keras also has it. MaxPool2D = MaxPooling2D # pylint: disable=invalid-name - def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument + def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument,invalid-name """Replaces explicit padding in the Keras application with a no-op. Args: @@ -395,3 +396,146 @@ def resnet_v1_152(batchnorm_training, return tf.keras.applications.resnet.ResNet152( layers=layers_override, **kwargs) # pylint: enable=invalid-name + + +# The following codes are based on the existing keras ResNet model pattern: +# google3/third_party/tensorflow/python/keras/applications/resnet.py +def block_basic(x, + filters, + kernel_size=3, + stride=1, + conv_shortcut=False, + name=None): + """A residual block for ResNet18/34. + + Args: + x: input tensor. + filters: integer, filters of the bottleneck layer. + kernel_size: default 3, kernel size of the bottleneck layer. + stride: default 1, stride of the first layer. + conv_shortcut: default False, use convolution shortcut if True, otherwise + identity shortcut. + name: string, block label. + + Returns: + Output tensor for the residual block. + """ + layers = tf.keras.layers + bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1 + + preact = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')( + x) + preact = layers.Activation('relu', name=name + '_preact_relu')(preact) + + if conv_shortcut: + shortcut = layers.Conv2D( + filters, 1, strides=1, name=name + '_0_conv')( + preact) + else: + shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x + + x = layers.ZeroPadding2D( + padding=((1, 1), (1, 1)), name=name + '_1_pad')( + preact) + x = layers.Conv2D( + filters, kernel_size, strides=1, use_bias=False, name=name + '_1_conv')( + x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')( + x) + x = layers.Activation('relu', name=name + '_1_relu')(x) + + x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) + x = layers.Conv2D( + filters, + kernel_size, + strides=stride, + use_bias=False, + name=name + '_2_conv')( + x) + x = layers.BatchNormalization( + axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')( + x) + x = layers.Activation('relu', name=name + '_2_relu')(x) + x = layers.Add(name=name + '_out')([shortcut, x]) + return x + + +def stack_basic(x, filters, blocks, stride1=2, name=None): + """A set of stacked residual blocks for ResNet18/34. + + Args: + x: input tensor. + filters: integer, filters of the bottleneck layer in a block. + blocks: integer, blocks in the stacked blocks. + stride1: default 2, stride of the first layer in the first block. + name: string, stack label. + + Returns: + Output tensor for the stacked blocks. + """ + x = block_basic(x, filters, conv_shortcut=True, name=name + '_block1') + for i in range(2, blocks): + x = block_basic(x, filters, name=name + '_block' + str(i)) + x = block_basic( + x, filters, stride=stride1, name=name + '_block' + str(blocks)) + return x + + +def resnet_v1_18(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax'): + """Instantiates the ResNet18 architecture.""" + + def stack_fn(x): + x = stack_basic(x, 64, 2, stride1=1, name='conv2') + x = stack_basic(x, 128, 2, name='conv3') + x = stack_basic(x, 256, 2, name='conv4') + return stack_basic(x, 512, 2, name='conv5') + + return resnet.ResNet( + stack_fn, + True, + True, + 'resnet18', + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation=classifier_activation) + + +def resnet_v1_34(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax'): + """Instantiates the ResNet34 architecture.""" + + def stack_fn(x): + x = stack_basic(x, 64, 3, stride1=1, name='conv2') + x = stack_basic(x, 128, 4, name='conv3') + x = stack_basic(x, 256, 6, name='conv4') + return stack_basic(x, 512, 3, name='conv5') + + return resnet.ResNet( + stack_fn, + True, + True, + 'resnet34', + include_top, + weights, + input_tensor, + input_shape, + pooling, + classes, + classifier_activation=classifier_activation) diff --git a/research/object_detection/models/keras_models/resnet_v1_tf2_test.py b/research/object_detection/models/keras_models/resnet_v1_tf2_test.py index 71cc5f22bd994b6432957bf5b34837f829c9b8da..4566bc8ddda664fd7698f17b4951fca48612307b 100644 --- a/research/object_detection/models/keras_models/resnet_v1_tf2_test.py +++ b/research/object_detection/models/keras_models/resnet_v1_tf2_test.py @@ -20,12 +20,13 @@ object detection. To verify the consistency of the two models, we compare: 2. Number of global variables. """ import unittest + +from absl.testing import parameterized import numpy as np from six.moves import zip import tensorflow.compat.v1 as tf from google.protobuf import text_format - from object_detection.builders import hyperparams_builder from object_detection.models.keras_models import resnet_v1 from object_detection.protos import hyperparams_pb2 @@ -180,5 +181,46 @@ class ResnetV1Test(test_case.TestCase): self.assertEqual(len(variables), var_num) +class ResnetShapeTest(test_case.TestCase, parameterized.TestCase): + + @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.') + @parameterized.parameters( + { + 'resnet_type': + 'resnet_v1_34', + 'output_layer_names': [ + 'conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out', + 'conv5_block3_out' + ] + }, { + 'resnet_type': + 'resnet_v1_18', + 'output_layer_names': [ + 'conv2_block2_out', 'conv3_block2_out', 'conv4_block2_out', + 'conv5_block2_out' + ] + }) + def test_output_shapes(self, resnet_type, output_layer_names): + if resnet_type == 'resnet_v1_34': + model = resnet_v1.resnet_v1_34(input_shape=(64, 64, 3), weights=None) + else: + model = resnet_v1.resnet_v1_18(input_shape=(64, 64, 3), weights=None) + outputs = [ + model.get_layer(output_layer_name).output + for output_layer_name in output_layer_names + ] + resnet_model = tf.keras.models.Model(inputs=model.input, outputs=outputs) + outputs = resnet_model(np.zeros((2, 64, 64, 3), dtype=np.float32)) + + # Check the shape of 'conv2_block3_out': + self.assertEqual(outputs[0].shape, [2, 16, 16, 64]) + # Check the shape of 'conv3_block4_out': + self.assertEqual(outputs[1].shape, [2, 8, 8, 128]) + # Check the shape of 'conv4_block6_out': + self.assertEqual(outputs[2].shape, [2, 4, 4, 256]) + # Check the shape of 'conv5_block3_out': + self.assertEqual(outputs[3].shape, [2, 2, 2, 512]) + + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py index 2ecf8fb01b2dba536b8d6c531a3e8becac75091c..3184a1c682dc4df68f31dd8554db846698c667e0 100644 --- a/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py +++ b/research/object_detection/models/ssd_efficientnet_bifpn_feature_extractor.py @@ -23,6 +23,7 @@ from six.moves import range from six.moves import zip import tensorflow.compat.v2 as tf +from tensorflow.python.keras import backend as keras_backend from object_detection.meta_architectures import ssd_meta_arch from object_detection.models import bidirectional_feature_pyramid_generators as bifpn_generators from object_detection.utils import ops @@ -103,9 +104,10 @@ class SSDEfficientNetBiFPNKerasFeatureExtractor( use_depthwise: unsupported by EfficientNetBiFPN, since BiFPN uses regular convolutions when inputs to a node have a differing number of channels, and use separable convolutions after combine operations. - override_base_feature_extractor_hyperparams: unsupported. Whether to - override hyperparameters of the base feature extractor with the one from - `conv_hyperparams`. + override_base_feature_extractor_hyperparams: Whether to override the + efficientnet backbone's default weight decay with the weight decay + defined by `conv_hyperparams`. Note, only overriding of weight decay is + currently supported. name: a string name scope to assign to the model. If 'None', Keras will auto-generate one from the class name. """ @@ -129,9 +131,6 @@ class SSDEfficientNetBiFPNKerasFeatureExtractor( raise ValueError('EfficientNetBiFPN does not support explicit padding.') if use_depthwise: raise ValueError('EfficientNetBiFPN does not support use_depthwise.') - if override_base_feature_extractor_hyperparams: - raise ValueError('EfficientNetBiFPN does not support ' - 'override_base_feature_extractor_hyperparams.') self._bifpn_min_level = bifpn_min_level self._bifpn_max_level = bifpn_max_level @@ -158,9 +157,15 @@ class SSDEfficientNetBiFPNKerasFeatureExtractor( # Initialize the EfficientNet backbone. # Note, this is currently done in the init method rather than in the build # method, since doing so introduces an error which is not well understood. + efficientnet_overrides = {'rescale_input': False} + if override_base_feature_extractor_hyperparams: + efficientnet_overrides[ + 'weight_decay'] = conv_hyperparams.get_regularizer_weight() + if (conv_hyperparams.use_sync_batch_norm() and + keras_backend.is_tpu_strategy(tf.distribute.get_strategy())): + efficientnet_overrides['batch_norm'] = 'tpu' efficientnet_base = efficientnet_model.EfficientNet.from_name( - model_name=self._efficientnet_version, - overrides={'rescale_input': False}) + model_name=self._efficientnet_version, overrides=efficientnet_overrides) outputs = [efficientnet_base.get_layer(output_layer_name).output for output_layer_name in self._output_layer_names] self._efficientnet = tf.keras.Model( diff --git a/research/object_detection/packages/tf1/setup.py b/research/object_detection/packages/tf1/setup.py index 1cd4923cbc14b9dc1ae4f2c7087cc5e22991ea69..a40a368a6f5fddbccfc13b4d76f38a49d3c1c8d3 100644 --- a/research/object_detection/packages/tf1/setup.py +++ b/research/object_detection/packages/tf1/setup.py @@ -3,9 +3,9 @@ import os from setuptools import find_packages from setuptools import setup -REQUIRED_PACKAGES = ['apache-beam', 'pillow', 'lxml', 'matplotlib', 'Cython', - 'contextlib2', 'tf-slim', 'six', 'pycocotools', 'scipy', - 'pandas'] +REQUIRED_PACKAGES = ['pillow', 'lxml', 'matplotlib', 'Cython', + 'contextlib2', 'tf-slim', 'six', 'pycocotools', 'lvis', + 'scipy', 'pandas'] setup( name='object_detection', diff --git a/research/object_detection/packages/tf2/setup.py b/research/object_detection/packages/tf2/setup.py index 09738ee079c8551f3403f4ea06600a3a284f42d3..3f9f0e35363cde03bee00641f3fb53ccc85c55ad 100644 --- a/research/object_detection/packages/tf2/setup.py +++ b/research/object_detection/packages/tf2/setup.py @@ -6,9 +6,23 @@ from setuptools import setup # Note: adding apache-beam to required packages causes conflict with # tf-models-offical requirements. These packages request for incompatible # oauth2client package. -REQUIRED_PACKAGES = ['pillow', 'lxml', 'matplotlib', 'Cython', 'contextlib2', - 'tf-slim', 'six', 'pycocotools', 'scipy', 'pandas', - 'tf-models-official'] +REQUIRED_PACKAGES = [ + # Required for apache-beam with PY3 + 'avro-python3', + 'apache-beam', + 'pillow', + 'lxml', + 'matplotlib', + 'Cython', + 'contextlib2', + 'tf-slim', + 'six', + 'pycocotools', + 'lvis', + 'scipy', + 'pandas', + 'tf-models-official' +] setup( name='object_detection', diff --git a/research/object_detection/predictors/convolutional_keras_box_predictor.py b/research/object_detection/predictors/convolutional_keras_box_predictor.py index fc72fb04c2d47301b1ac5fc185ca98c6b00073c0..cdc90fca697865ee78b4b8b21ae0eca9470a9183 100644 --- a/research/object_detection/predictors/convolutional_keras_box_predictor.py +++ b/research/object_detection/predictors/convolutional_keras_box_predictor.py @@ -236,6 +236,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): apply_batch_norm=False, share_prediction_tower=False, use_depthwise=False, + apply_conv_hyperparams_pointwise=False, name=None): """Constructor. @@ -269,6 +270,10 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): prediction head, class prediction head and other heads. use_depthwise: Whether to use depthwise separable conv2d instead of regular conv2d. + apply_conv_hyperparams_pointwise: Whether to apply the conv_hyperparams to + the pointwise_initializer and pointwise_regularizer when using depthwise + separable convolutions. By default, conv_hyperparams are only applied to + the depthwise initializer and regularizer when use_depthwise is true. name: A string name scope to assign to the model. If `None`, Keras will auto-generate one from the class name. """ @@ -294,6 +299,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): self._apply_batch_norm = apply_batch_norm self._share_prediction_tower = share_prediction_tower self._use_depthwise = use_depthwise + self._apply_conv_hyperparams_pointwise = apply_conv_hyperparams_pointwise # Additional projection layers to bring all feature maps to uniform # channels. @@ -344,6 +350,9 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor): # so we remap the kernel_* to depthwise_* here. kwargs['depthwise_regularizer'] = kwargs['kernel_regularizer'] kwargs['depthwise_initializer'] = kwargs['kernel_initializer'] + if self._apply_conv_hyperparams_pointwise: + kwargs['pointwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['pointwise_initializer'] = kwargs['kernel_initializer'] conv_layers.append( tf.keras.layers.SeparableConv2D( self._depth, [self._kernel_size, self._kernel_size], diff --git a/research/object_detection/predictors/heads/class_head.py b/research/object_detection/predictors/heads/class_head.py index 604859313de84a783953e67dbe47e301a740cb96..d7abc23c20cf8fab5e3686ca6335ea76298b99fd 100644 --- a/research/object_detection/predictors/heads/class_head.py +++ b/research/object_detection/predictors/heads/class_head.py @@ -24,6 +24,7 @@ import tensorflow.compat.v1 as tf import tf_slim as slim from object_detection.predictors.heads import head +from object_detection.utils import shape_utils class MaskRCNNClassHead(head.Head): @@ -303,13 +304,23 @@ class WeightSharedConvolutionalClassHead(head.Head): biases_initializer=tf.constant_initializer( self._class_prediction_bias_init), scope=self._scope) - batch_size = features.get_shape().as_list()[0] - if batch_size is None: - batch_size = tf.shape(features)[0] + batch_size, height, width = shape_utils.combined_static_and_dynamic_shape( + features)[0:3] + class_predictions_with_background = tf.reshape( + class_predictions_with_background, [ + batch_size, height, width, num_predictions_per_location, + self._num_class_slots + ]) class_predictions_with_background = self._score_converter_fn( class_predictions_with_background) if self._return_flat_predictions: class_predictions_with_background = tf.reshape( class_predictions_with_background, [batch_size, -1, self._num_class_slots]) + else: + class_predictions_with_background = tf.reshape( + class_predictions_with_background, [ + batch_size, height, width, + num_predictions_per_location * self._num_class_slots + ]) return class_predictions_with_background diff --git a/research/object_detection/predictors/heads/class_head_tf1_test.py b/research/object_detection/predictors/heads/class_head_tf1_test.py index 3dc8fb120cb9a4c19ff2d595d31dc3645f6e06d0..986a383c1a782fe24c320ca76a5d11a4ac531d65 100644 --- a/research/object_detection/predictors/heads/class_head_tf1_test.py +++ b/research/object_detection/predictors/heads/class_head_tf1_test.py @@ -15,6 +15,7 @@ """Tests for object_detection.predictors.heads.class_head.""" import unittest +import numpy as np import tensorflow.compat.v1 as tf from google.protobuf import text_format @@ -194,6 +195,37 @@ class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase): ]) self.assertSetEqual(expected_var_names, actual_variable_set) + def test_softmax_score_converter(self): + num_class_slots = 10 + batch_size = 2 + height = 17 + width = 19 + num_predictions_per_location = 2 + assert num_predictions_per_location != 1 + + def graph_fn(): + class_prediction_head = ( + class_head.WeightSharedConvolutionalClassHead( + num_class_slots=num_class_slots, + score_converter_fn=tf.nn.softmax)) + image_feature = tf.random_uniform([batch_size, height, width, 1024], + minval=-10.0, + maxval=10.0, + dtype=tf.float32) + class_predictions = class_prediction_head.predict( + features=image_feature, + num_predictions_per_location=num_predictions_per_location) + return class_predictions + + class_predictions_out = self.execute(graph_fn, []) + class_predictions_sum = np.sum(class_predictions_out, axis=-1) + num_anchors = height * width * num_predictions_per_location + exp_class_predictions_sum = np.ones((batch_size, num_anchors), + dtype=np.float32) + self.assertAllEqual((batch_size, num_anchors, num_class_slots), + class_predictions_out.shape) + self.assertAllClose(class_predictions_sum, exp_class_predictions_sum) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/predictors/heads/keras_box_head.py b/research/object_detection/predictors/heads/keras_box_head.py index b8def7fc1b01291d92ce545c8c3c29d9a24c646a..daf730646b8d8797b1600e16caeb0d533d1bcd54 100644 --- a/research/object_detection/predictors/heads/keras_box_head.py +++ b/research/object_detection/predictors/heads/keras_box_head.py @@ -248,6 +248,7 @@ class WeightSharedConvolutionalBoxHead(head.KerasHead): conv_hyperparams, kernel_size=3, use_depthwise=False, + apply_conv_hyperparams_to_heads=False, box_encodings_clip_range=None, return_flat_predictions=True, name=None): @@ -262,6 +263,10 @@ class WeightSharedConvolutionalBoxHead(head.KerasHead): kernel_size: Size of final convolution kernel. use_depthwise: Whether to use depthwise convolutions for prediction steps. Default is False. + apply_conv_hyperparams_to_heads: Whether to apply conv_hyperparams to + depthwise seperable convolution layers in the box and class heads. By + default, the conv_hyperparams are only applied to layers in the + predictor tower when using depthwise separable convolutions. box_encodings_clip_range: Min and max values for clipping box_encodings. return_flat_predictions: If true, returns flattened prediction tensor of shape [batch, height * width * num_predictions_per_location, @@ -282,19 +287,26 @@ class WeightSharedConvolutionalBoxHead(head.KerasHead): self._kernel_size = kernel_size self._num_predictions_per_location = num_predictions_per_location self._use_depthwise = use_depthwise + self._apply_conv_hyperparams_to_heads = apply_conv_hyperparams_to_heads self._box_encodings_clip_range = box_encodings_clip_range self._return_flat_predictions = return_flat_predictions self._box_encoder_layers = [] if self._use_depthwise: + kwargs = conv_hyperparams.params(use_bias=True) + if self._apply_conv_hyperparams_to_heads: + kwargs['depthwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['depthwise_initializer'] = kwargs['kernel_initializer'] + kwargs['pointwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['pointwise_initializer'] = kwargs['kernel_initializer'] self._box_encoder_layers.append( tf.keras.layers.SeparableConv2D( num_predictions_per_location * self._box_code_size, [self._kernel_size, self._kernel_size], padding='SAME', name='BoxPredictor', - **conv_hyperparams.params(use_bias=True))) + **kwargs)) else: self._box_encoder_layers.append( tf.keras.layers.Conv2D( diff --git a/research/object_detection/predictors/heads/keras_class_head.py b/research/object_detection/predictors/heads/keras_class_head.py index 988ebb2ee720f5db137ade0aef9919a942a57a5b..596f951d42e6790fdf31b32271e48d6741481a54 100644 --- a/research/object_detection/predictors/heads/keras_class_head.py +++ b/research/object_detection/predictors/heads/keras_class_head.py @@ -22,6 +22,7 @@ All the class prediction heads have a predict function that receives the import tensorflow.compat.v1 as tf from object_detection.predictors.heads import head +from object_detection.utils import shape_utils class ConvolutionalClassHead(head.KerasHead): @@ -250,6 +251,7 @@ class WeightSharedConvolutionalClassHead(head.KerasHead): use_dropout=False, dropout_keep_prob=0.8, use_depthwise=False, + apply_conv_hyperparams_to_heads=False, score_converter_fn=tf.identity, return_flat_predictions=True, name=None): @@ -269,6 +271,10 @@ class WeightSharedConvolutionalClassHead(head.KerasHead): dropout_keep_prob: Probability of keeping activiations. use_depthwise: Whether to use depthwise convolutions for prediction steps. Default is False. + apply_conv_hyperparams_to_heads: Whether to apply conv_hyperparams to + depthwise seperable convolution layers in the box and class heads. By + default, the conv_hyperparams are only applied to layers in the + predictor tower when using depthwise separable convolutions. score_converter_fn: Callable elementwise nonlinearity (that takes tensors as inputs and returns tensors). return_flat_predictions: If true, returns flattened prediction tensor @@ -287,11 +293,13 @@ class WeightSharedConvolutionalClassHead(head.KerasHead): super(WeightSharedConvolutionalClassHead, self).__init__(name=name) self._num_class_slots = num_class_slots + self._num_predictions_per_location = num_predictions_per_location self._kernel_size = kernel_size self._class_prediction_bias_init = class_prediction_bias_init self._use_dropout = use_dropout self._dropout_keep_prob = dropout_keep_prob self._use_depthwise = use_depthwise + self._apply_conv_hyperparams_to_heads = apply_conv_hyperparams_to_heads self._score_converter_fn = score_converter_fn self._return_flat_predictions = return_flat_predictions @@ -301,6 +309,12 @@ class WeightSharedConvolutionalClassHead(head.KerasHead): self._class_predictor_layers.append( tf.keras.layers.Dropout(rate=1.0 - self._dropout_keep_prob)) if self._use_depthwise: + kwargs = conv_hyperparams.params(use_bias=True) + if self._apply_conv_hyperparams_to_heads: + kwargs['depthwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['depthwise_initializer'] = kwargs['kernel_initializer'] + kwargs['pointwise_regularizer'] = kwargs['kernel_regularizer'] + kwargs['pointwise_initializer'] = kwargs['kernel_initializer'] self._class_predictor_layers.append( tf.keras.layers.SeparableConv2D( num_predictions_per_location * self._num_class_slots, @@ -311,7 +325,7 @@ class WeightSharedConvolutionalClassHead(head.KerasHead): name='ClassPredictor', bias_initializer=tf.constant_initializer( self._class_prediction_bias_init), - **conv_hyperparams.params(use_bias=True))) + **kwargs)) else: self._class_predictor_layers.append( tf.keras.layers.Conv2D( @@ -339,13 +353,23 @@ class WeightSharedConvolutionalClassHead(head.KerasHead): for layer in self._class_predictor_layers: class_predictions_with_background = layer( class_predictions_with_background) - batch_size = features.get_shape().as_list()[0] - if batch_size is None: - batch_size = tf.shape(features)[0] + batch_size, height, width = shape_utils.combined_static_and_dynamic_shape( + features)[0:3] + class_predictions_with_background = tf.reshape( + class_predictions_with_background, [ + batch_size, height, width, self._num_predictions_per_location, + self._num_class_slots + ]) class_predictions_with_background = self._score_converter_fn( class_predictions_with_background) if self._return_flat_predictions: class_predictions_with_background = tf.reshape( class_predictions_with_background, [batch_size, -1, self._num_class_slots]) + else: + class_predictions_with_background = tf.reshape( + class_predictions_with_background, [ + batch_size, height, width, + self._num_predictions_per_location * self._num_class_slots + ]) return class_predictions_with_background diff --git a/research/object_detection/predictors/heads/keras_class_head_tf2_test.py b/research/object_detection/predictors/heads/keras_class_head_tf2_test.py index aa890ce522defb6ec4c97965846e8f20529bc24b..6aa240e98ed6995bd95f8502761b2a66ec6c2c7f 100644 --- a/research/object_detection/predictors/heads/keras_class_head_tf2_test.py +++ b/research/object_detection/predictors/heads/keras_class_head_tf2_test.py @@ -15,6 +15,7 @@ """Tests for object_detection.predictors.heads.class_head.""" import unittest +import numpy as np import tensorflow.compat.v1 as tf from google.protobuf import text_format @@ -198,6 +199,38 @@ class WeightSharedConvolutionalKerasClassPredictorTest(test_case.TestCase): class_prediction_head(image_feature) self.assertEqual(len(class_prediction_head.variables), 2) + def test_softmax_score_converter(self): + num_class_slots = 10 + batch_size = 2 + height = 17 + width = 19 + num_predictions_per_location = 2 + assert num_predictions_per_location != 1 + + conv_hyperparams = self._build_conv_hyperparams() + class_prediction_head = keras_class_head.WeightSharedConvolutionalClassHead( + num_class_slots=num_class_slots, + conv_hyperparams=conv_hyperparams, + num_predictions_per_location=num_predictions_per_location, + score_converter_fn=tf.nn.softmax) + + def graph_fn(): + image_feature = tf.random_uniform([batch_size, height, width, 1024], + minval=-10.0, + maxval=10.0, + dtype=tf.float32) + class_predictions = class_prediction_head(image_feature) + return class_predictions + + class_predictions_out = self.execute(graph_fn, []) + class_predictions_sum = np.sum(class_predictions_out, axis=-1) + num_anchors = height * width * num_predictions_per_location + exp_class_predictions_sum = np.ones((batch_size, num_anchors), + dtype=np.float32) + self.assertAllEqual((batch_size, num_anchors, num_class_slots), + class_predictions_out.shape) + self.assertAllClose(class_predictions_sum, exp_class_predictions_sum) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/protos/box_predictor.proto b/research/object_detection/protos/box_predictor.proto index 0b0fadd7977eb799c2920adb0f79c5e535b68534..c4926502a4fe022b2ebedb78c14f3a2431aa7d1b 100644 --- a/research/object_detection/protos/box_predictor.proto +++ b/research/object_detection/protos/box_predictor.proto @@ -66,11 +66,23 @@ message ConvolutionalBoxPredictor { } // Configuration proto for weight shared convolutional box predictor. -// Next id: 19 +// Next id: 21 message WeightSharedConvolutionalBoxPredictor { // Hyperparameters for convolution ops used in the box predictor. optional Hyperparams conv_hyperparams = 1; + // Whether the `conv_hyperparams` should apply to depthwise separable + // convolution layers in the box and class heads, in addition to the layers in + // the predictor tower. By default, the `conv_hyperparams` are only applied to + // layers in the predictor tower when use_depthwise is true. + optional bool apply_conv_hyperparams_to_heads = 19 [default = false]; + + // Whether the `conv_hyperparams` should apply to the `pointwise_initializer` + // and `pointwise_regularizer` when using depthwise separable convolutions in + // the prediction tower layers. By default, the `conv_hyperparams` only apply + // to the `depthwise_initializer` and `depthwise_regularizer`. + optional bool apply_conv_hyperparams_pointwise = 20 [default = false]; + // Number of the additional conv layers before the predictor. optional int32 num_layers_before_predictor = 4 [default = 0]; diff --git a/research/object_detection/protos/center_net.proto b/research/object_detection/protos/center_net.proto index a4ad0beef1688af23072d473705ee0dca6052173..bff4183bb11c23ebd48ef144a3e8ad2c00d2a238 100644 --- a/research/object_detection/protos/center_net.proto +++ b/research/object_detection/protos/center_net.proto @@ -4,11 +4,14 @@ package object_detection.protos; import "object_detection/protos/image_resizer.proto"; import "object_detection/protos/losses.proto"; +import "object_detection/protos/post_processing.proto"; +import "object_detection/protos/preprocessor.proto"; // Configuration for the CenterNet meta architecture from the "Objects as // Points" paper [1] // [1]: https://arxiv.org/abs/1904.07850 +// Next Id = 16 message CenterNet { // Number of classes to predict. optional int32 num_classes = 1; @@ -19,6 +22,32 @@ message CenterNet { // Image resizer for preprocessing the input image. optional ImageResizer image_resizer = 3; + // If set, all task heads will be constructed with separable convolutions. + optional bool use_depthwise = 13 [default = false]; + + // Indicates whether or not to use the sparse version of the Op that computes + // the center heatmaps. The sparse version scales better with number of + // channels in the heatmap, but in some cases is known to cause an OOM error. + // TODO(b/170989061) When bug is fixed, make this the default behavior. + optional bool compute_heatmap_sparse = 15 [default = false]; + + // Parameters to determine the model architecture/layers of the prediction + // heads. + message PredictionHeadParams { + // The two fields: num_filters, kernel_sizes correspond to the parameters of + // the convolutional layers used by the prediction head. If provided, the + // length of the two repeated fields need to be the same and represents the + // number of convolutional layers. + + // Corresponds to the "filters" argument in tf.keras.layers.Conv2D. If not + // provided, the default value [256] will be used. + repeated int32 num_filters = 1; + + // Corresponds to the "kernel_size" argument in tf.keras.layers.Conv2D. If + // not provided, the default value [3] will be used. + repeated int32 kernel_sizes = 2; + } + // Parameters which are related to object detection task. message ObjectDetection { // The original fields are moved to ObjectCenterParams or deleted. @@ -62,6 +91,18 @@ message CenterNet { // If set, loss is only computed for the labeled classes. optional bool use_labeled_classes = 6 [default = false]; + + // The keypoint weights used for calculating the location of object center. + // When the field is provided, the number of weights need to be the same as + // the number of keypoints. The object center is calculated by the weighted + // mean of the keypoint locations. When the field is not provided, the + // object center is determined by the bounding box groundtruth annotations + // (default behavior). + repeated float keypoint_weights_for_center = 7; + + // Parameters to determine the architecture of the object center prediction + // head. + optional PredictionHeadParams center_head_params = 8; } optional ObjectCenterParams object_center_params = 5; @@ -142,6 +183,12 @@ message CenterNet { // the keypoint candidate. optional string candidate_ranking_mode = 16 [default = "min_distance"]; + // The score distance ratio offset, only used if candidate_ranking_mode is + // 'score_distance_ratio'. The offset is used in the maximization of score + // distance ratio, defined as: + // keypoint_score / (distance + score_distance_offset) + optional float score_distance_offset = 22 [default = 1.0]; + // The radius (in the unit of output pixel) around heatmap peak to assign // the offset targets. If set 0, then the offset target will only be // assigned to the heatmap peak (same behavior as the original paper). @@ -154,6 +201,46 @@ message CenterNet { // out_height, out_width, 2 * num_keypoints] (recommended when the // offset_peak_radius is not zero). optional bool per_keypoint_offset = 18 [default = false]; + + // Indicates whether to predict the depth of each keypoints. Note that this + // is only supported in the single class keypoint task. + optional bool predict_depth = 19 [default = false]; + + // Indicates whether to predict depths for each keypoint channel + // separately. If set False, the output depth target has the shape + // [batch_size, out_height, out_width, 1]. If set True, the output depth + // target has the shape [batch_size, out_height, out_width, + // num_keypoints]. Recommend to set this value and "per_keypoint_offset" to + // both be True at the same time. + optional bool per_keypoint_depth = 20 [default = false]; + + // The weight of the keypoint depth loss. + optional float keypoint_depth_loss_weight = 21 [default = 1.0]; + + // Whether keypoints outside the image frame should be clipped back to the + // image boundary. If true, the keypoints that are clipped have scores set + // to 0.0. + optional bool clip_out_of_frame_keypoints = 23 [default = false]; + + // Whether instances should be rescored based on keypoint confidences. If + // False, will use the detection score (from the object center heatmap). If + // True, will compute new scores with: + // new_score = o * (1/k) sum {s_i} + // where o is the object score, s_i is the score for keypoint i, and k is + // the number of keypoints for that class. + optional bool rescore_instances = 24 [default = false]; + + // Parameters to determine the architecture of the keypoint heatmap + // prediction head. + optional PredictionHeadParams heatmap_head_params = 25; + + // Parameters to determine the architecture of the keypoint offset + // prediction head. + optional PredictionHeadParams offset_head_params = 26; + + // Parameters to determine the architecture of the keypoint regression + // prediction head. + optional PredictionHeadParams regress_head_params = 27; } repeated KeypointEstimation keypoint_estimation_task = 7; @@ -218,6 +305,117 @@ message CenterNet { optional float heatmap_bias_init = 8 [default = -2.19]; } optional DensePoseEstimation densepose_estimation_task = 9; + + // Parameters which are related to tracking embedding estimation task. + // A Simple Baseline for Multi-Object Tracking [2] + // [2]: https://arxiv.org/abs/2004.01888 + message TrackEstimation { + // Weight of the task loss. The total loss of the model will be the + // summation of task losses weighted by the weights. + optional float task_loss_weight = 1 [default = 1.0]; + + // The maximun track ID of the datset. + optional int32 num_track_ids = 2; + + // The embedding size for re-identification (ReID) task in tracking. + optional int32 reid_embed_size = 3 [default = 128]; + + // The number of (fully-connected, batch-norm, relu) layers for track ID + // classification head. The output dimension of each intermediate FC layer + // will all be 'reid_embed_size'. The last FC layer will directly project to + // the track ID classification space of size 'num_track_ids' without + // batch-norm and relu layers. + optional int32 num_fc_layers = 4 [default = 1]; + + // Classification loss configuration for ReID loss. + optional ClassificationLoss classification_loss = 5; + } + optional TrackEstimation track_estimation_task = 10; + + // Temporal offset prediction head similar to CenterTrack. + // Currently our implementation adopts LSTM, different from original paper. + // See go/lstd-centernet for more details. + // Tracking Objects as Points [3] + // [3]: https://arxiv.org/abs/2004.01177 + message TemporalOffsetEstimation { + // Weight of the task loss. The total loss of the model will be the + // summation of task losses weighted by the weights. + optional float task_loss_weight = 1 [default = 1.0]; + + // Localization loss configuration for offset loss. + optional LocalizationLoss localization_loss = 2; + } + optional TemporalOffsetEstimation temporal_offset_task = 12; + + + // Mask prediction support using DeepMAC. See https://arxiv.org/abs/2104.00613 + message DeepMACMaskEstimation { + // The loss used for penalizing mask predictions. + optional ClassificationLoss classification_loss = 1; + + // Weight of mask prediction loss + optional float task_loss_weight = 2 [default = 1.0]; + + // The dimension of the per-instance embedding. + optional int32 dim = 3 [default = 256]; + + // The dimension of the per-pixel embedding + optional int32 pixel_embedding_dim = 4 [default=16]; + + // If set, masks are only kept for classes listed here. Masks are deleted + // for all other classes. Note that this is only done at training time, eval + // behavior is unchanged. + repeated int32 allowed_masked_classes_ids = 5; + + // The size of cropped pixel embedding that goes into the 2D mask prediction + // network (RoI align). + optional int32 mask_size = 6 [default=32]; + + // If set to a positive value, we subsample instances by this amount to + // save memory during training. + optional int32 mask_num_subsamples = 67[default=-1]; + + // Whether or not to use (x, y) coordinates as input to mask net. + optional bool use_xy = 8 [default=true]; + + // Defines the kind of architecture we want to use for mask network. + optional string network_type = 9 [default="hourglass52"]; + + // Whether or not we want to use instance embedding in mask network. + optional bool use_instance_embedding = 10 [default=true]; + + // Number of channels in the inital block of the mask prediction network. + optional int32 num_init_channels = 11 [default=64]; + + // Whether or not to predict masks at full resolution. If true, we predict + // masks at the resolution of the output stride. Otherwise, masks are + // predicted at resolution defined by mask_size + optional bool predict_full_resolution_masks = 12 [default=false]; + + // If predict_full_resolution_masks is set, this parameter controls the size + // of cropped masks returned by post-process. To be compatible with the rest + // of the API, masks are always cropped and resized according to detected + // boxes in postprocess. + optional int32 postprocess_crop_size = 13 [default=256]; + + // The maximum relative amount by which boxes will be jittered before + // RoI crop happens. The x and y coordinates of the box are jittered + // relative to width and height respectively. + optional float max_roi_jitter_ratio = 14 [default=0.0]; + + // The mode for jitterting box ROIs. See RandomJitterBoxes in + // preprocessor.proto for more details + optional RandomJitterBoxes.JitterMode jitter_mode = 15 [default=DEFAULT]; + } + + optional DeepMACMaskEstimation deepmac_mask_estimation = 14; + + // CenterNet does not apply conventional post processing operations such as + // non max suppression as it applies a max-pool operator on box centers. + // However, in some cases we observe the need to remove duplicate predictions + // from CenterNet. Use this optional parameter to apply traditional non max + // suppression and score thresholding. + optional PostProcessing post_processing = 24; } message CenterNetFeatureExtractor { @@ -235,4 +433,18 @@ message CenterNetFeatureExtractor { // If set, will change channel order to be [blue, green, red]. This can be // useful to be compatible with some pre-trained feature extractors. optional bool bgr_ordering = 4 [default = false]; + + // If set, the feature upsampling layers will be constructed with + // separable convolutions. This is typically applied to feature pyramid + // network if any. + optional bool use_depthwise = 5 [default = false]; + + + // Depth multiplier. Only valid for specific models (e.g. MobileNet). See subclasses of `CenterNetFeatureExtractor`. + optional float depth_multiplier = 9 [default = 1.0]; + + // Whether to use separable convolutions. Only valid for specific + // models. See subclasses of `CenterNetFeatureExtractor`. + optional bool use_separable_conv = 10 [default = false]; } + diff --git a/research/object_detection/protos/eval.proto b/research/object_detection/protos/eval.proto index b1b99881c266da3643c4620efa55184de60e35d7..07ad0189b4e3f313597857a6813b29c0716f0a8c 100644 --- a/research/object_detection/protos/eval.proto +++ b/research/object_detection/protos/eval.proto @@ -3,7 +3,7 @@ syntax = "proto2"; package object_detection.protos; // Message for configuring DetectionModel evaluation jobs (eval.py). -// Next id - 33 +// Next id - 36 message EvalConfig { optional uint32 batch_size = 25 [default = 1]; // Number of visualization images to generate. @@ -82,6 +82,14 @@ message EvalConfig { // If True, additionally include per-category metrics. optional bool include_metrics_per_category = 24 [default = false]; + // If true, includes all metrics per category. + optional bool all_metrics_per_category = 35 [default=false]; + + // Optional super-category definitions: keys are super-category names; + // values are comma-separated categories (assumed to correspond to category + // names (`display_name`) in the label map. + map super_categories = 34; + // Recall range within which precision should be computed. optional float recall_lower_bound = 26 [default = 0.0]; optional float recall_upper_bound = 27 [default = 1.0]; @@ -103,6 +111,13 @@ message EvalConfig { // visualization. An example would be human pose estimation where certain // joints can be connected. repeated KeypointEdge keypoint_edge = 32; + + // The "groundtruth_labeled_classes" field indicates which classes have been + // labeled on the images. If skip_predictions_for_unlabeled_class is set, + // detector predictions that do not match to the groundtruth_labeled_classes + // will be ignored. This is useful for evaluating on test data that are not + // exhaustively labeled. + optional bool skip_predictions_for_unlabeled_class = 33 [default = false]; } // A message to configure parameterized evaluation metric. diff --git a/research/object_detection/protos/faster_rcnn.proto b/research/object_detection/protos/faster_rcnn.proto index 486cc77ea8b156fb54500b0bbf7a01d4b17ac7b6..7509c6a3f5e81a756bf3e1fda1d8e07ae6f1f3ec 100644 --- a/research/object_detection/protos/faster_rcnn.proto +++ b/research/object_detection/protos/faster_rcnn.proto @@ -8,6 +8,7 @@ import "object_detection/protos/hyperparams.proto"; import "object_detection/protos/image_resizer.proto"; import "object_detection/protos/losses.proto"; import "object_detection/protos/post_processing.proto"; +import "object_detection/protos/fpn.proto"; // Configuration for Faster R-CNN models. // See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py @@ -17,6 +18,7 @@ import "object_detection/protos/post_processing.proto"; // (or RPN) and a second stage box classifier. We thus use the prefixes // `first_stage_` and `second_stage_` to indicate the stage to which each // parameter pertains when relevant. + message FasterRcnn { // Whether to construct only the Region Proposal Network (RPN). optional int32 number_of_stages = 1 [default = 2]; @@ -175,17 +177,30 @@ message FasterRcnn { // Whether to use tf.image.combined_non_max_suppression. optional bool use_combined_nms_in_first_stage = 40 [default = false]; - // Whether to output final box feature. If true, it will crop the feature map - // in the postprocess() method based on the final predictions. + // Whether to output final box feature. If true, it will crop the rpn feature + // map based on the final prediction boxes, then pass the crops through the + // box_classifier to compute the final features in the postprocess() method. optional bool output_final_box_features = 42 [default = false]; + // Whether to output final box rpn features. If true, it will crop the rpn + // feature map in the postprocess() method based on the final prediction + // boxes. + optional bool output_final_box_rpn_features = 43 [default = false]; + // Configs for context model. optional Context context_config = 41; } +// Input type format: whether inputs are TfExamples or TfSequenceExamples. +enum AttentionPosition { + ATTENTION_DEFAULT = 0; // Default, currently post box classifier + POST_BOX_CLASSIFIER = 1; // Post box classifier + POST_RPN = 2; // Post RPN, pre box classifier +} + message Context { - // Configuration proto for Context . - // Next id: 4 + // Configuration proto for Context R-CNN. + // Next id: 12 // The maximum number of contextual features per-image, used for padding optional int32 max_num_context_features = 1 [default = 2000]; @@ -198,6 +213,30 @@ message Context { // The context feature length. optional int32 context_feature_length = 4 [default = 2057]; + + // Whether to use self-attention from box proposals to themselves, TF1 only. + optional bool use_self_attention = 6 [default = false]; + + // Whether to use attention into context features, setting to false is only + // implemented in TF1. + optional bool use_long_term_attention = 7 [default = true]; + + // Whether the self-attention block and the long term attention block should + // be in sequence or parallel, ie whether the outputs of the self-attention + // block should be the inputs into the long term attention block (sequence) + // or whether the self attention block and long term attention block should + // happen in parallel, with outputs summed. + optional bool self_attention_in_sequence = 8 [default = false]; + + // Number of attention heads + optional int32 num_attention_heads = 9 [default = 1]; + + // Number of attention layers + optional int32 num_attention_layers = 11 [default = 1]; + + // Where the attention goes, 0 is pre-second-stage, 1 is post-second-stage + optional AttentionPosition attention_position = 10 [ + default = POST_BOX_CLASSIFIER]; } message FasterRcnnFeatureExtractor { @@ -212,4 +251,21 @@ message FasterRcnnFeatureExtractor { // When training with a relative large batch size (e.g. 8), it could be // desirable to enable batch norm update. optional bool batch_norm_trainable = 3 [default = false]; + + // Hyperparameters that affect the layers of feature extractor added on top + // of the base feature extractor. + optional Hyperparams conv_hyperparams = 4; + + // if the value is set to true, the base feature extractor's hyperparams will + // be overridden with the `conv_hyperparams`. + optional bool override_base_feature_extractor_hyperparams = 5 + [default = false]; + + // The nearest multiple to zero-pad the input height and width dimensions to. + // For example, if pad_to_multiple = 2, input dimensions are zero-padded + // until the resulting dimensions are even. + optional int32 pad_to_multiple = 6 [default = 32]; + + // Feature Pyramid Networks config. + optional FeaturePyramidNetworks fpn = 7; } diff --git a/research/object_detection/protos/fpn.proto b/research/object_detection/protos/fpn.proto new file mode 100644 index 0000000000000000000000000000000000000000..568aa848de67a899709918de235d9939c776ec93 --- /dev/null +++ b/research/object_detection/protos/fpn.proto @@ -0,0 +1,50 @@ +syntax = "proto2"; + +package object_detection.protos; + +// Configuration for Feature Pyramid Networks. +message FeaturePyramidNetworks { + // We recommend to use multi_resolution_feature_map_generator with FPN, and + // the levels there must match the levels defined below for better + // performance. + // Correspondence from FPN levels to Resnet/Mobilenet V1 feature maps: + // FPN Level Resnet Feature Map Mobilenet-V1 Feature Map + // 2 Block 1 Conv2d_3_pointwise + // 3 Block 2 Conv2d_5_pointwise + // 4 Block 3 Conv2d_11_pointwise + // 5 Block 4 Conv2d_13_pointwise + // 6 Bottomup_5 bottom_up_Conv2d_14 + // 7 Bottomup_6 bottom_up_Conv2d_15 + // 8 Bottomup_7 bottom_up_Conv2d_16 + // 9 Bottomup_8 bottom_up_Conv2d_17 + + // minimum level in feature pyramid + optional int32 min_level = 1 [default = 3]; + + // maximum level in feature pyramid + optional int32 max_level = 2 [default = 7]; + + // channel depth for additional coarse feature layers. + optional int32 additional_layer_depth = 3 [default = 256]; + +} + +// Configuration for Bidirectional Feature Pyramid Networks. +message BidirectionalFeaturePyramidNetworks { + // minimum level in the feature pyramid. + optional int32 min_level = 1 [default = 3]; + + // maximum level in the feature pyramid. + optional int32 max_level = 2 [default = 7]; + + // The number of repeated top-down bottom-up iterations for BiFPN-based + // feature extractors (bidirectional feature pyramid networks). + optional int32 num_iterations = 3; + + // The number of filters (channels) to use in feature pyramid layers for + // BiFPN-based feature extractors (bidirectional feature pyramid networks). + optional int32 num_filters = 4; + + // Method used to combine inputs to BiFPN nodes. + optional string combine_method = 5 [default = 'fast_attention']; +} diff --git a/research/object_detection/protos/hyperparams.proto b/research/object_detection/protos/hyperparams.proto index e2fee247ca1303dfdbb9bdb69f187b7520c4e89c..fba7359f45fde3911c0ae547d1e6b61a5ae39d45 100644 --- a/research/object_detection/protos/hyperparams.proto +++ b/research/object_detection/protos/hyperparams.proto @@ -42,6 +42,8 @@ message Hyperparams { // Note that if nothing below is selected, then no normalization is applied // BatchNorm hyperparameters. BatchNorm batch_norm = 5; + // SyncBatchNorm hyperparameters (KerasLayerHyperparams only). + BatchNorm sync_batch_norm = 9; // GroupNorm hyperparameters. This is only supported on a subset of models. // Note that the current implementation of group norm instantiated in // tf.contrib.group.layers.group_norm() only supports fixed_size_resizer @@ -86,6 +88,11 @@ message Initializer { TruncatedNormalInitializer truncated_normal_initializer = 1; VarianceScalingInitializer variance_scaling_initializer = 2; RandomNormalInitializer random_normal_initializer = 3; + // Allows specifying initializers by name, as a string, which will be passed + // directly as an argument during layer construction. Currently, this is + // only supported when using KerasLayerHyperparams, and for valid Keras + // initializers, e.g. `glorot_uniform`, `variance_scaling`, etc. + string keras_initializer_by_name = 4; } } diff --git a/research/object_detection/protos/input_reader.proto b/research/object_detection/protos/input_reader.proto index 27d022532dc14fffc2b8078a500933d44ae5bf68..053bcfa559e0ba2e93ccf7dbe31e58745ea6ca3d 100644 --- a/research/object_detection/protos/input_reader.proto +++ b/research/object_detection/protos/input_reader.proto @@ -2,7 +2,6 @@ syntax = "proto2"; package object_detection.protos; -import "object_detection/protos/image_resizer.proto"; // Configuration proto for defining input readers that generate Object Detection // Examples from input sources. Input readers are expected to generate a @@ -26,12 +25,12 @@ enum InstanceMaskType { // Input type format: whether inputs are TfExamples or TfSequenceExamples. enum InputType { - INPUT_DEFAULT = 0; // Default implementation, currently TF_EXAMPLE - TF_EXAMPLE = 1; // TfExample input - TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input + INPUT_DEFAULT = 0; // Default implementation, currently TF_EXAMPLE + TF_EXAMPLE = 1; // TfExample input + TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input } -// Next id: 32 +// Next id: 38 message InputReader { // Name of input reader. Typically used to describe the dataset that is read // by this input reader. @@ -62,6 +61,9 @@ message InputReader { optional uint32 sample_1_of_n_examples = 22 [default = 1]; // Number of file shards to read in parallel. + // + // When sample_from_datasets_weights are configured, num_readers is applied + // for each dataset. optional uint32 num_readers = 6 [default = 64]; // Number of batches to produce in parallel. If this is run on a 2x2 TPU set @@ -91,6 +93,9 @@ message InputReader { // Number of parallel decode ops to apply. optional uint32 num_parallel_map_calls = 14 [default = 64, deprecated = true]; + // Drop remainder when batch size does not divide dataset size. + optional bool drop_remainder = 35 [default = true]; + // If positive, TfExampleDecoder will try to decode rasters of additional // channels from tf.Examples. optional int32 num_additional_channels = 18 [default = 0]; @@ -113,6 +118,9 @@ message InputReader { // Whether to load context features from the dataset. optional bool load_context_features = 25 [default = false]; + // Whether to load context image ids from the dataset. + optional bool load_context_image_ids = 36 [default = false]; + // Whether to load groundtruth instance masks. optional bool load_instance_masks = 7 [default = false]; @@ -123,6 +131,12 @@ message InputReader { // to true. optional bool load_dense_pose = 31 [default = false]; + // Whether to load track information. + optional bool load_track_id = 33 [default = false]; + + // Whether to load keypoint depth features. + optional bool load_keypoint_depth_features = 37 [default = false]; + // Whether to use the display name when decoding examples. This is only used // when mapping class text strings to integers. optional bool use_display_name = 17 [default = false]; @@ -133,11 +147,47 @@ message InputReader { // Whether input data type is tf.Examples or tf.SequenceExamples optional InputType input_type = 30 [default = TF_EXAMPLE]; + // Which frame to choose from the input if Sequence Example. -1 indicates + // random choice. + optional int32 frame_index = 32 [default = -1]; + oneof input_reader { TFRecordInputReader tf_record_input_reader = 8; ExternalInputReader external_input_reader = 9; } + // When multiple input files are configured, we can sample across them based + // on weights. + // + // The number of weights must match the number of input files configured. + // + // The number of input readers per dataset is num_readers, scaled relative to + // the dataset weight. + // + // When set, shuffling and shuffle buffer size, settings are + // applied individually to each dataset. + // + // Implementation follows tf.data.experimental.sample_from_datasets sampling + // strategy. Weights may take any value - only relative weights matter. + // + // Zero weights will result in a dataset not being sampled and no input + // readers spawned. + // + // Examples, assuming two input files configured: + // + // Equal weighting: + // sample_from_datasets_weights: 0.5 + // sample_from_datasets_weights: 0.5 + // + // 2:1 weighting: + // sample_from_datasets_weights: 2 + // sample_from_datasets_weights: 1 + // + // Exclude the second dataset: + // sample_from_datasets_weights: 1 + // sample_from_datasets_weights: 0 + repeated float sample_from_datasets_weights = 34; + // Expand labels to ancestors or descendants in the hierarchy for // for positive and negative labels, respectively. diff --git a/research/object_detection/protos/losses.proto b/research/object_detection/protos/losses.proto index 2342fb24fceb68c58b9e06f4439cedd89efd933d..adb77c07555436b1d6179ffdd40062e399dd4037 100644 --- a/research/object_detection/protos/losses.proto +++ b/research/object_detection/protos/losses.proto @@ -70,6 +70,7 @@ message LocalizationLoss { WeightedSmoothL1LocalizationLoss weighted_smooth_l1 = 2; WeightedIOULocalizationLoss weighted_iou = 3; L1LocalizationLoss l1_localization_loss = 4; + WeightedGIOULocalizationLoss weighted_giou = 5; } } @@ -101,6 +102,10 @@ message WeightedIOULocalizationLoss { message L1LocalizationLoss { } +// Generalized intersection over union location loss: 1 - GIOU +message WeightedGIOULocalizationLoss { +} + // Configuration for class prediction loss function. message ClassificationLoss { oneof classification_loss { @@ -110,6 +115,7 @@ message ClassificationLoss { BootstrappedSigmoidClassificationLoss bootstrapped_sigmoid = 3; SigmoidFocalClassificationLoss weighted_sigmoid_focal = 4; PenaltyReducedLogisticFocalLoss penalty_reduced_logistic_focal_loss = 6; + WeightedDiceClassificationLoss weighted_dice_classification_loss = 7; } } @@ -217,3 +223,14 @@ message RandomExampleSampler { // example sampling. optional float positive_sample_fraction = 1 [default = 0.01]; } + +// Dice loss for training instance masks[1][2]. +// [1]: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient +// [2]: https://arxiv.org/abs/1606.04797 +message WeightedDiceClassificationLoss { + // If set, we square the probabilities in the denominator term used for + // normalization. + optional bool squared_normalization = 1 [default=false]; +} + + diff --git a/research/object_detection/protos/post_processing.proto b/research/object_detection/protos/post_processing.proto index 80f75b18d1a1753ad672d0d84b8fbaf6e655adf8..314faab155bef5f72f316350ea69597e5b9bd5fd 100644 --- a/research/object_detection/protos/post_processing.proto +++ b/research/object_detection/protos/post_processing.proto @@ -27,10 +27,10 @@ message BatchNonMaxSuppression { // Class-agnostic NMS function implements a class-agnostic version // of Non Maximal Suppression where if max_classes_per_detection=k, // 1) we keep the top-k scores for each detection and - // 2) during NMS, each detection only uses the highest class score for sorting. - // 3) Compared to regular NMS, the worst runtime of this version is O(N^2) - // instead of O(KN^2) where N is the number of detections and K the number of - // classes. + // 2) during NMS, each detection only uses the highest class score for + // sorting. 3) Compared to regular NMS, the worst runtime of this version is + // O(N^2) instead of O(KN^2) where N is the number of detections and K the + // number of classes. optional bool use_class_agnostic_nms = 7 [default = false]; // Number of classes retained per detection in class agnostic NMS. @@ -57,6 +57,12 @@ message BatchNonMaxSuppression { // export models for older versions of TF. optional bool use_hard_nms = 13 [default = false]; + // Use cpu NMS. NMSV3/NMSV4 by default runs on GPU, which may cause OOM issue + // if the model is large and/or batch size is large during training. + // Setting this flag to false moves the nms op to CPU when OOM happens. + // The flag is not needed if use_hard_nms = false, as soft NMS currently + // runs on CPU by default. + optional bool use_cpu_nms = 14 [default = false]; } // Configuration proto for post-processing predicted boxes and diff --git a/research/object_detection/protos/preprocessor.proto b/research/object_detection/protos/preprocessor.proto index a99be94194a1d412676e6d4d387cba53a8667c02..6afcc3e70f67d3f905ff8cee193b2cf89e174483 100644 --- a/research/object_detection/protos/preprocessor.proto +++ b/research/object_detection/protos/preprocessor.proto @@ -4,7 +4,7 @@ package object_detection.protos; // Message for defining a preprocessing operation on input data. // See: //third_party/tensorflow_models/object_detection/core/preprocessor.py -// Next ID: 39 +// Next ID: 40 message PreprocessingStep { oneof preprocessing_step { NormalizeImage normalize_image = 1; @@ -45,6 +45,7 @@ message PreprocessingStep { RandomPatchGaussian random_patch_gaussian = 36; RandomSquareCropByScale random_square_crop_by_scale = 37; RandomScaleCropAndPadToSquare random_scale_crop_and_pad_to_square = 38; + AdjustGamma adjust_gamma = 39; } } @@ -164,6 +165,18 @@ message RandomDistortColor { // ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4]. message RandomJitterBoxes { optional float ratio = 1 [default=0.05]; + + enum JitterMode { + DEFAULT = 0; + EXPAND = 1; + SHRINK = 2; + } + // The mode of jittering + // EXPAND - Only expands boxes + // SHRINK - Only shrinks boxes + // DEFAULT - Jitters each box boundary independently + optional JitterMode jitter_mode = 2 [default=DEFAULT]; + } // Randomly crops the image and bounding boxes. @@ -590,3 +603,9 @@ message RandomScaleCropAndPadToSquare { optional float scale_min = 2 [default=0.1]; optional float scale_max = 3 [default=2.0]; } + +// Adjusts the gamma of the image using the specified gamma and gain values. +message AdjustGamma { + optional float gamma = 1 [default=1.0]; + optional float gain = 2 [default=1.0]; +} diff --git a/research/object_detection/protos/ssd.proto b/research/object_detection/protos/ssd.proto index 3fdcd99370a396d0fe1a9123edc00bbed4af0cf9..e4b6ffa18c24d4430587f40ebfb7cc6b183ee9ee 100644 --- a/research/object_detection/protos/ssd.proto +++ b/research/object_detection/protos/ssd.proto @@ -11,6 +11,7 @@ import "object_detection/protos/losses.proto"; import "object_detection/protos/matcher.proto"; import "object_detection/protos/post_processing.proto"; import "object_detection/protos/region_similarity_calculator.proto"; +import "object_detection/protos/fpn.proto"; // Configuration for Single Shot Detection (SSD) models. // Next id: 27 @@ -203,50 +204,3 @@ message SsdFeatureExtractor { } -// Configuration for Feature Pyramid Networks. -message FeaturePyramidNetworks { - // We recommend to use multi_resolution_feature_map_generator with FPN, and - // the levels there must match the levels defined below for better - // performance. - // Correspondence from FPN levels to Resnet/Mobilenet V1 feature maps: - // FPN Level Resnet Feature Map Mobilenet-V1 Feature Map - // 2 Block 1 Conv2d_3_pointwise - // 3 Block 2 Conv2d_5_pointwise - // 4 Block 3 Conv2d_11_pointwise - // 5 Block 4 Conv2d_13_pointwise - // 6 Bottomup_5 bottom_up_Conv2d_14 - // 7 Bottomup_6 bottom_up_Conv2d_15 - // 8 Bottomup_7 bottom_up_Conv2d_16 - // 9 Bottomup_8 bottom_up_Conv2d_17 - - // minimum level in feature pyramid - optional int32 min_level = 1 [default = 3]; - - // maximum level in feature pyramid - optional int32 max_level = 2 [default = 7]; - - // channel depth for additional coarse feature layers. - optional int32 additional_layer_depth = 3 [default = 256]; - -} - -// Configuration for Bidirectional Feature Pyramid Networks. -message BidirectionalFeaturePyramidNetworks { - // minimum level in the feature pyramid. - optional int32 min_level = 1 [default = 3]; - - // maximum level in the feature pyramid. - optional int32 max_level = 2 [default = 7]; - - // The number of repeated top-down bottom-up iterations for BiFPN-based - // feature extractors (bidirectional feature pyramid networks). - optional int32 num_iterations = 3; - - // The number of filters (channels) to use in feature pyramid layers for - // BiFPN-based feature extractors (bidirectional feature pyramid networks). - optional int32 num_filters = 4; - - // Method used to combine inputs to BiFPN nodes. - optional string combine_method = 5 [default = 'fast_attention']; -} - diff --git a/research/object_detection/protos/string_int_label_map.proto b/research/object_detection/protos/string_int_label_map.proto index c9095a9d73cf51a669c7b106ad34fb8cc0a24b43..d77dd92af8534fd0b25ed0ff2d6e2faa24d03b84 100644 --- a/research/object_detection/protos/string_int_label_map.proto +++ b/research/object_detection/protos/string_int_label_map.proto @@ -6,6 +6,14 @@ syntax = "proto2"; package object_detection.protos; +// LVIS frequency: +enum LVISFrequency { + UNSPECIFIED = 0; + FREQUENT = 1; + COMMON = 2; + RARE = 3; +} + message StringIntLabelMapItem { // String name. The most common practice is to set this to a MID or synsets // id. @@ -38,6 +46,10 @@ message StringIntLabelMapItem { // current element. Value should correspond to another label id element. repeated int32 ancestor_ids = 5; repeated int32 descendant_ids = 6; + + // LVIS specific label map fields + optional LVISFrequency frequency = 7; + optional int32 instance_count = 8; }; message StringIntLabelMap { diff --git a/research/object_detection/protos/train.proto b/research/object_detection/protos/train.proto index 62d326cdf67c7329ddaa22250a4f2734a4f43066..14a63404b460db3830aa045cb43ba6bbaf5df070 100644 --- a/research/object_detection/protos/train.proto +++ b/research/object_detection/protos/train.proto @@ -14,7 +14,7 @@ enum CheckpointVersion { // Message for configuring DetectionModel training jobs (train.py). -// Next id: 30 +// Next id: 31 message TrainConfig { // Effective batch size to use for training. // For TPU (or sync SGD jobs), the batch size per core (or GPU) is going to be @@ -40,9 +40,12 @@ message TrainConfig { // extractor variables trained outside of object detection. optional string fine_tune_checkpoint = 7 [default=""]; - // Type of checkpoint to restore variables from, e.g. 'classification' or - // 'detection'. Provides extensibility to from_detection_checkpoint. - // Typically used to load feature extractor variables from trained models. + // Type of checkpoint to restore variables from, e.g. 'classification' + // 'detection', `fine_tune`, `full`. Controls which variables are restored + // from the pre-trained checkpoint. For meta architecture specific valid + // values of this parameter, see the restore_map (TF1) or + // restore_from_object (TF2) function documentation in the + // /meta_architectures/*meta_arch.py files optional string fine_tune_checkpoint_type = 22 [default=""]; // Either "v1" or "v2". If v1, restores the checkpoint using the tensorflow @@ -60,9 +63,21 @@ message TrainConfig { // Whether to load all checkpoint vars that match model variable names and // sizes. This option is only available if `from_detection_checkpoint` is // True. This option is *not* supported for TF2 --- setting it to true - // will raise an error. + // will raise an error. Instead, set fine_tune_checkpoint_type: 'full'. optional bool load_all_detection_checkpoint_vars = 19 [default = false]; + // Whether to run dummy computation when loading a `fine_tune_checkpoint`. + // This option is true by default since it is often necessary to run the model + // on a dummy input before loading a `fine_tune_checkpoint`, in order to + // ensure that all the model variables have alread been built successfully. + // Some meta architectures, like CenterNet, do not require dummy computation + // to successfully load all checkpoint variables, and in these cases this + // flag may be set to false to reduce startup time and memory consumption. + // Note, this flag only affects dummy computation when loading a + // `fine_tune_checkpoint`, e.g. it does not affect the dummy computation that + // is run when creating shadow copies of model variables when using EMA. + optional bool run_fine_tune_checkpoint_dummy_computation = 30 [default=true]; + // Number of steps to train the DetectionModel for. If 0, will train the model // indefinitely. optional uint32 num_steps = 9 [default=0]; diff --git a/research/object_detection/test_images/image3.jpg b/research/object_detection/test_images/image3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..44a7b0322689a172ca10005bb27df1bc0e590b69 Binary files /dev/null and b/research/object_detection/test_images/image3.jpg differ diff --git a/research/object_detection/test_images/image_info.txt b/research/object_detection/test_images/image_info.txt index 6f805cbcd27405940398f24f2a1a4538e197e108..656af904f46462cb04097f89568342927798826a 100644 --- a/research/object_detection/test_images/image_info.txt +++ b/research/object_detection/test_images/image_info.txt @@ -3,4 +3,4 @@ Image provenance: image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg image2.jpg: Michael Miley, https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4 - +image3.jpg: Chris Briggs, https://unsplash.com/photos/ILBrHd6PFJA diff --git a/research/object_detection/utils/config_util.py b/research/object_detection/utils/config_util.py index 662d42e1305538534e1cb6671086e4faa6cdf00c..05f7f7ef063809f56a4a0af6084f4dbca5e8af89 100644 --- a/research/object_detection/utils/config_util.py +++ b/research/object_detection/utils/config_util.py @@ -19,9 +19,9 @@ from __future__ import division from __future__ import print_function import os -import tensorflow.compat.v1 as tf from google.protobuf import text_format +import tensorflow.compat.v1 as tf from tensorflow.python.lib.io import file_io @@ -621,6 +621,24 @@ def _maybe_update_config_with_key_value(configs, key, value): value) elif field_name == "num_classes": _update_num_classes(configs["model"], value) + elif field_name == "sample_from_datasets_weights": + _update_sample_from_datasets_weights(configs["train_input_config"], value) + elif field_name == "peak_max_pool_kernel_size": + _update_peak_max_pool_kernel_size(configs["model"], value) + elif field_name == "candidate_search_scale": + _update_candidate_search_scale(configs["model"], value) + elif field_name == "candidate_ranking_mode": + _update_candidate_ranking_mode(configs["model"], value) + elif field_name == "score_distance_offset": + _update_score_distance_offset(configs["model"], value) + elif field_name == "box_scale": + _update_box_scale(configs["model"], value) + elif field_name == "keypoint_candidate_score_threshold": + _update_keypoint_candidate_score_threshold(configs["model"], value) + elif field_name == "rescore_instances": + _update_rescore_instances(configs["model"], value) + elif field_name == "unmatched_keypoint_score": + _update_unmatched_keypoint_score(configs["model"], value) else: return False return True @@ -1024,7 +1042,7 @@ def _update_retain_original_image_additional_channels( retain_original_image_additional_channels) -def remove_unecessary_ema(variables_to_restore, no_ema_collection=None): +def remove_unnecessary_ema(variables_to_restore, no_ema_collection=None): """Remap and Remove EMA variable that are not created during training. ExponentialMovingAverage.variables_to_restore() returns a map of EMA names @@ -1036,9 +1054,8 @@ def remove_unecessary_ema(variables_to_restore, no_ema_collection=None): } This function takes care of the extra ExponentialMovingAverage variables that get created during eval but aren't available in the checkpoint, by - remapping the key to the shallow copy of the variable itself, and remove - the entry of its EMA from the variables to restore. An example resulting - dictionary would look like: + remapping the key to the variable itself, and remove the entry of its EMA from + the variables to restore. An example resulting dictionary would look like: { conv/batchnorm/gamma: conv/batchnorm/gamma, conv_4/conv2d_params: conv_4/conv2d_params, @@ -1057,14 +1074,15 @@ def remove_unecessary_ema(variables_to_restore, no_ema_collection=None): if no_ema_collection is None: return variables_to_restore + restore_map = {} for key in variables_to_restore: - if "ExponentialMovingAverage" in key: - for name in no_ema_collection: - if name in key: - variables_to_restore[key.replace("/ExponentialMovingAverage", - "")] = variables_to_restore[key] - del variables_to_restore[key] - return variables_to_restore + if ("ExponentialMovingAverage" in key + and any([name in key for name in no_ema_collection])): + new_key = key.replace("/ExponentialMovingAverage", "") + else: + new_key = key + restore_map[new_key] = variables_to_restore[key] + return restore_map def _update_num_classes(model_config, num_classes): @@ -1073,3 +1091,126 @@ def _update_num_classes(model_config, num_classes): model_config.faster_rcnn.num_classes = num_classes if meta_architecture == "ssd": model_config.ssd.num_classes = num_classes + + +def _update_sample_from_datasets_weights(input_reader_config, weights): + """Updated sample_from_datasets_weights with overrides.""" + if len(weights) != len(input_reader_config.sample_from_datasets_weights): + raise ValueError( + "sample_from_datasets_weights override has a different number of values" + " ({}) than the configured dataset weights ({})." + .format( + len(input_reader_config.sample_from_datasets_weights), + len(weights))) + + del input_reader_config.sample_from_datasets_weights[:] + input_reader_config.sample_from_datasets_weights.extend(weights) + + +def _update_peak_max_pool_kernel_size(model_config, kernel_size): + """Updates the max pool kernel size (NMS) for keypoints in CenterNet.""" + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.peak_max_pool_kernel_size = kernel_size + else: + tf.logging.warning("Ignoring config override key for " + "peak_max_pool_kernel_size since there are multiple " + "keypoint estimation tasks") + + +def _update_candidate_search_scale(model_config, search_scale): + """Updates the keypoint candidate search scale in CenterNet.""" + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.candidate_search_scale = search_scale + else: + tf.logging.warning("Ignoring config override key for " + "candidate_search_scale since there are multiple " + "keypoint estimation tasks") + + +def _update_candidate_ranking_mode(model_config, mode): + """Updates how keypoints are snapped to candidates in CenterNet.""" + if mode not in ("min_distance", "score_distance_ratio"): + raise ValueError("Attempting to set the keypoint candidate ranking mode " + "to {}, but the only options are 'min_distance' and " + "'score_distance_ratio'.".format(mode)) + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.candidate_ranking_mode = mode + else: + tf.logging.warning("Ignoring config override key for " + "candidate_ranking_mode since there are multiple " + "keypoint estimation tasks") + + +def _update_score_distance_offset(model_config, offset): + """Updates the keypoint candidate selection metric. See CenterNet proto.""" + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.score_distance_offset = offset + else: + tf.logging.warning("Ignoring config override key for " + "score_distance_offset since there are multiple " + "keypoint estimation tasks") + + +def _update_box_scale(model_config, box_scale): + """Updates the keypoint candidate search region. See CenterNet proto.""" + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.box_scale = box_scale + else: + tf.logging.warning("Ignoring config override key for box_scale since " + "there are multiple keypoint estimation tasks") + + +def _update_keypoint_candidate_score_threshold(model_config, threshold): + """Updates the keypoint candidate score threshold. See CenterNet proto.""" + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.keypoint_candidate_score_threshold = threshold + else: + tf.logging.warning("Ignoring config override key for " + "keypoint_candidate_score_threshold since there are " + "multiple keypoint estimation tasks") + + +def _update_rescore_instances(model_config, should_rescore): + """Updates whether boxes should be rescored based on keypoint confidences.""" + if isinstance(should_rescore, str): + should_rescore = True if should_rescore == "True" else False + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.rescore_instances = should_rescore + else: + tf.logging.warning("Ignoring config override key for " + "rescore_instances since there are multiple keypoint " + "estimation tasks") + + +def _update_unmatched_keypoint_score(model_config, score): + meta_architecture = model_config.WhichOneof("model") + if meta_architecture == "center_net": + if len(model_config.center_net.keypoint_estimation_task) == 1: + kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0] + kpt_estimation_task.unmatched_keypoint_score = score + else: + tf.logging.warning("Ignoring config override key for " + "unmatched_keypoint_score since there are multiple " + "keypoint estimation tasks") + diff --git a/research/object_detection/utils/config_util_test.py b/research/object_detection/utils/config_util_test.py index f36970c11078b222710427b46ffd502be608c109..196685e53ecd9bcb17a4786d80eb5297f4f41cee 100644 --- a/research/object_detection/utils/config_util_test.py +++ b/research/object_detection/utils/config_util_test.py @@ -377,6 +377,45 @@ class ConfigUtilTest(tf.test.TestCase): new_batch_size = configs["train_config"].batch_size self.assertEqual(10, new_batch_size) + @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.") + def testOverwriteSampleFromDatasetWeights(self): + """Tests config override for sample_from_datasets_weights.""" + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.train_input_reader.sample_from_datasets_weights.extend( + [1, 2]) + pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") + _write_config(pipeline_config, pipeline_config_path) + + # Override parameters: + configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) + hparams = contrib_training.HParams(sample_from_datasets_weights=[0.5, 0.5]) + configs = config_util.merge_external_params_with_configs(configs, hparams) + + # Ensure that the parameters have the overridden values: + self.assertListEqual( + [0.5, 0.5], + list(configs["train_input_config"].sample_from_datasets_weights)) + + @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.") + def testOverwriteSampleFromDatasetWeightsWrongLength(self): + """Tests config override for sample_from_datasets_weights.""" + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + pipeline_config.train_input_reader.sample_from_datasets_weights.extend( + [1, 2]) + pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") + _write_config(pipeline_config, pipeline_config_path) + + # Try to override parameter with too many weights: + configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) + hparams = contrib_training.HParams( + sample_from_datasets_weights=[0.5, 0.5, 0.5]) + with self.assertRaises( + ValueError, + msg="sample_from_datasets_weights override has a different number of" + " values (3) than the configured dataset weights (2)." + ): + config_util.merge_external_params_with_configs(configs, hparams) + @unittest.skipIf(tf_version.is_tf2(), "Skipping TF1.X only test.") def testKeyValueOverrideBadKey(self): """Tests that overwriting with a bad key causes an exception.""" @@ -946,7 +985,7 @@ class ConfigUtilTest(tf.test.TestCase): self.assertEqual(config_util.get_number_of_classes(configs["model"]), 2) - def testRemoveUnecessaryEma(self): + def testRemoveUnnecessaryEma(self): input_dict = { "expanded_conv_10/project/act_quant/min": 1, @@ -977,7 +1016,68 @@ class ConfigUtilTest(tf.test.TestCase): self.assertEqual( output_dict, - config_util.remove_unecessary_ema(input_dict, no_ema_collection)) + config_util.remove_unnecessary_ema(input_dict, no_ema_collection)) + + def testUpdateRescoreInstances(self): + pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + kpt_task = pipeline_config.model.center_net.keypoint_estimation_task.add() + kpt_task.rescore_instances = True + + _write_config(pipeline_config, pipeline_config_path) + + configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) + cn_config = configs["model"].center_net + self.assertEqual( + True, cn_config.keypoint_estimation_task[0].rescore_instances) + + config_util.merge_external_params_with_configs( + configs, kwargs_dict={"rescore_instances": False}) + cn_config = configs["model"].center_net + self.assertEqual( + False, cn_config.keypoint_estimation_task[0].rescore_instances) + + def testUpdateRescoreInstancesWithBooleanString(self): + pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + kpt_task = pipeline_config.model.center_net.keypoint_estimation_task.add() + kpt_task.rescore_instances = True + + _write_config(pipeline_config, pipeline_config_path) + + configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) + cn_config = configs["model"].center_net + self.assertEqual( + True, cn_config.keypoint_estimation_task[0].rescore_instances) + + config_util.merge_external_params_with_configs( + configs, kwargs_dict={"rescore_instances": "False"}) + cn_config = configs["model"].center_net + self.assertEqual( + False, cn_config.keypoint_estimation_task[0].rescore_instances) + + def testUpdateRescoreInstancesWithMultipleTasks(self): + pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") + pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() + kpt_task = pipeline_config.model.center_net.keypoint_estimation_task.add() + kpt_task.rescore_instances = True + kpt_task = pipeline_config.model.center_net.keypoint_estimation_task.add() + kpt_task.rescore_instances = True + + _write_config(pipeline_config, pipeline_config_path) + + configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) + cn_config = configs["model"].center_net + self.assertEqual( + True, cn_config.keypoint_estimation_task[0].rescore_instances) + + config_util.merge_external_params_with_configs( + configs, kwargs_dict={"rescore_instances": False}) + cn_config = configs["model"].center_net + self.assertEqual( + True, cn_config.keypoint_estimation_task[0].rescore_instances) + self.assertEqual( + True, cn_config.keypoint_estimation_task[1].rescore_instances) if __name__ == "__main__": diff --git a/research/object_detection/utils/dataset_util.py b/research/object_detection/utils/dataset_util.py index 65e7f7feeac0b4015b6fb31833f8d0590e1095ce..1cf007de8da7b8ae9b99b289f46c14a4238cc1d4 100644 --- a/research/object_detection/utils/dataset_util.py +++ b/research/object_detection/utils/dataset_util.py @@ -38,6 +38,10 @@ def bytes_list_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) +def float_feature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) + + def float_list_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=value)) diff --git a/research/object_detection/utils/label_map_util.py b/research/object_detection/utils/label_map_util.py index 37c823a8d39cd89801f1f3d7b957d31ede2f6c06..ecf7d82fbf873a7758f6f7c534d1f86a4bd60767 100644 --- a/research/object_detection/utils/label_map_util.py +++ b/research/object_detection/utils/label_map_util.py @@ -130,6 +130,18 @@ def convert_label_map_to_categories(label_map, if item.id not in list_of_ids_already_added: list_of_ids_already_added.append(item.id) category = {'id': item.id, 'name': name} + if item.HasField('frequency'): + if item.frequency == string_int_label_map_pb2.LVISFrequency.Value( + 'FREQUENT'): + category['frequency'] = 'f' + elif item.frequency == string_int_label_map_pb2.LVISFrequency.Value( + 'COMMON'): + category['frequency'] = 'c' + elif item.frequency == string_int_label_map_pb2.LVISFrequency.Value( + 'RARE'): + category['frequency'] = 'r' + if item.HasField('instance_count'): + category['instance_count'] = item.instance_count if item.keypoints: keypoints = {} list_of_keypoint_ids = [] diff --git a/research/object_detection/utils/label_map_util_test.py b/research/object_detection/utils/label_map_util_test.py index 969f3258baf6eadaf9017dc06a8aa1f188eb51b9..cd5bb4169c1e9ffb5375080b37ead6f74d458adb 100644 --- a/research/object_detection/utils/label_map_util_test.py +++ b/research/object_detection/utils/label_map_util_test.py @@ -201,7 +201,7 @@ class LabelMapUtilTest(tf.test.TestCase): name:'n00007846' } """ - text_format.Merge(label_map_string, label_map_proto) + text_format.Parse(label_map_string, label_map_proto) categories = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=3) self.assertListEqual([{ @@ -227,19 +227,61 @@ class LabelMapUtilTest(tf.test.TestCase): }] self.assertListEqual(expected_categories_list, categories) + def test_convert_label_map_to_categories_lvis_frequency_and_counts(self): + label_map_proto = string_int_label_map_pb2.StringIntLabelMap() + label_map_string = """ + item { + id:1 + name:'person' + frequency: FREQUENT + instance_count: 1000 + } + item { + id:2 + name:'dog' + frequency: COMMON + instance_count: 100 + } + item { + id:3 + name:'cat' + frequency: RARE + instance_count: 10 + } + """ + text_format.Parse(label_map_string, label_map_proto) + categories = label_map_util.convert_label_map_to_categories( + label_map_proto, max_num_classes=3) + self.assertListEqual([{ + 'id': 1, + 'name': u'person', + 'frequency': 'f', + 'instance_count': 1000 + }, { + 'id': 2, + 'name': u'dog', + 'frequency': 'c', + 'instance_count': 100 + }, { + 'id': 3, + 'name': u'cat', + 'frequency': 'r', + 'instance_count': 10 + }], categories) + def test_convert_label_map_to_categories(self): label_map_proto = self._generate_label_map(num_classes=4) categories = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=3) expected_categories_list = [{ 'name': u'1', - 'id': 1 + 'id': 1, }, { 'name': u'2', - 'id': 2 + 'id': 2, }, { 'name': u'3', - 'id': 3 + 'id': 3, }] self.assertListEqual(expected_categories_list, categories) @@ -259,7 +301,7 @@ class LabelMapUtilTest(tf.test.TestCase): } """ label_map_proto = string_int_label_map_pb2.StringIntLabelMap() - text_format.Merge(label_map_str, label_map_proto) + text_format.Parse(label_map_str, label_map_proto) categories = label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=1) self.assertEqual('person', categories[0]['name']) @@ -291,7 +333,7 @@ class LabelMapUtilTest(tf.test.TestCase): } """ label_map_proto = string_int_label_map_pb2.StringIntLabelMap() - text_format.Merge(label_map_str, label_map_proto) + text_format.Parse(label_map_str, label_map_proto) with self.assertRaises(ValueError): label_map_util.convert_label_map_to_categories( label_map_proto, max_num_classes=2) diff --git a/research/object_detection/utils/learning_schedules.py b/research/object_detection/utils/learning_schedules.py index 167be22f70e8627cac86d84c905a0b0e15588ad6..678cda6bdf8609bebcfa55fcde889d8dac09b174 100644 --- a/research/object_detection/utils/learning_schedules.py +++ b/research/object_detection/utils/learning_schedules.py @@ -23,6 +23,14 @@ from six.moves import zip import tensorflow.compat.v1 as tf +def _learning_rate_return_value(eager_decay_rate): + """Helper function to return proper learning rate based on tf version.""" + if tf.executing_eagerly(): + return eager_decay_rate + else: + return eager_decay_rate() + + def exponential_decay_with_burnin(global_step, learning_rate_base, learning_rate_decay_steps, @@ -76,10 +84,65 @@ def exponential_decay_with_burnin(global_step, tf.constant(burnin_learning_rate), post_burnin_learning_rate), min_learning_rate, name='learning_rate') - if tf.executing_eagerly(): - return eager_decay_rate - else: - return eager_decay_rate() + return _learning_rate_return_value(eager_decay_rate) + + +def exponential_decay_with_warmup(global_step, + learning_rate_base, + learning_rate_decay_steps, + learning_rate_decay_factor, + warmup_learning_rate=0.0, + warmup_steps=0, + min_learning_rate=0.0, + staircase=True): + """Exponential decay schedule with warm up period. + + Args: + global_step: int tensor representing global step. + learning_rate_base: base learning rate. + learning_rate_decay_steps: steps to take between decaying the learning rate. + Note that this includes the number of burn-in steps. + learning_rate_decay_factor: multiplicative factor by which to decay learning + rate. + warmup_learning_rate: initial learning rate during warmup period. + warmup_steps: number of steps to use warmup learning rate. + min_learning_rate: the minimum learning rate. + staircase: whether use staircase decay. + + Returns: + If executing eagerly: + returns a no-arg callable that outputs the (scalar) + float tensor learning rate given the current value of global_step. + If in a graph: + immediately returns a (scalar) float tensor representing learning rate. + """ + + def eager_decay_rate(): + """Callable to compute the learning rate.""" + post_warmup_learning_rate = tf.train.exponential_decay( + learning_rate_base, + global_step - warmup_steps, + learning_rate_decay_steps, + learning_rate_decay_factor, + staircase=staircase) + if callable(post_warmup_learning_rate): + post_warmup_learning_rate = post_warmup_learning_rate() + + if learning_rate_base < warmup_learning_rate: + raise ValueError('learning_rate_base must be larger or equal to ' + 'warmup_learning_rate.') + slope = (learning_rate_base - warmup_learning_rate) / warmup_steps + warmup_rate = slope * tf.cast(global_step, + tf.float32) + warmup_learning_rate + learning_rate = tf.where( + tf.less(tf.cast(global_step, tf.int32), tf.constant(warmup_steps)), + warmup_rate, + tf.maximum(post_warmup_learning_rate, min_learning_rate), + name='learning_rate') + + return learning_rate + + return _learning_rate_return_value(eager_decay_rate) def cosine_decay_with_warmup(global_step, @@ -142,10 +205,7 @@ def cosine_decay_with_warmup(global_step, return tf.where(global_step > total_steps, 0.0, learning_rate, name='learning_rate') - if tf.executing_eagerly(): - return eager_decay_rate - else: - return eager_decay_rate() + return _learning_rate_return_value(eager_decay_rate) def manual_stepping(global_step, boundaries, rates, warmup=False): @@ -212,7 +272,5 @@ def manual_stepping(global_step, boundaries, rates, warmup=False): [0] * num_boundaries)) return tf.reduce_sum(rates * tf.one_hot(rate_index, depth=num_boundaries), name='learning_rate') - if tf.executing_eagerly(): - return eager_decay_rate - else: - return eager_decay_rate() + + return _learning_rate_return_value(eager_decay_rate) diff --git a/research/object_detection/utils/learning_schedules_test.py b/research/object_detection/utils/learning_schedules_test.py index 2b6012d111c702a8cfd9f767b6f6fc2819f36667..d7a1c9fc4a92a7779e1bb9410bbe956c0c4eeb01 100644 --- a/research/object_detection/utils/learning_schedules_test.py +++ b/research/object_detection/utils/learning_schedules_test.py @@ -50,6 +50,28 @@ class LearningSchedulesTest(test_case.TestCase): exp_rates = [.5, .5, 1, 1, 1, .1, .1, .1, .05] self.assertAllClose(output_rates, exp_rates, rtol=1e-4) + def testExponentialDecayWithWarmup(self): + def graph_fn(global_step): + learning_rate_base = 1.0 + learning_rate_decay_steps = 3 + learning_rate_decay_factor = .1 + warmup_learning_rate = .5 + warmup_steps = 2 + min_learning_rate = .05 + learning_rate = learning_schedules.exponential_decay_with_warmup( + global_step, learning_rate_base, learning_rate_decay_steps, + learning_rate_decay_factor, warmup_learning_rate, warmup_steps, + min_learning_rate) + assert learning_rate.op.name.endswith('learning_rate') + return (learning_rate,) + + output_rates = [ + self.execute(graph_fn, [np.array(i).astype(np.int64)]) for i in range(9) + ] + + exp_rates = [.5, .75, 1, 1, 1, .1, .1, .1, .05] + self.assertAllClose(output_rates, exp_rates, rtol=1e-4) + def testCosineDecayWithWarmup(self): def graph_fn(global_step): learning_rate_base = 1.0 diff --git a/research/object_detection/utils/object_detection_evaluation.py b/research/object_detection/utils/object_detection_evaluation.py index e0b51101205ce7c813cced02a453e863c3d4e90f..89a8af022aaf5cba6833a5ff3229f58a6ba618c4 100644 --- a/research/object_detection/utils/object_detection_evaluation.py +++ b/research/object_detection/utils/object_detection_evaluation.py @@ -159,7 +159,9 @@ class ObjectDetectionEvaluator(DetectionEvaluator): metric_prefix=None, use_weighted_mean_ap=False, evaluate_masks=False, - group_of_weight=0.0): + group_of_weight=0.0, + nms_iou_threshold=1.0, + nms_max_output_boxes=10000): """Constructor. Args: @@ -187,6 +189,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator): matching_iou_threshold, weight group_of_weight is added to true positives. Consequently, if no detection falls within a group-of box, weight group_of_weight is added to false negatives. + nms_iou_threshold: NMS IoU threashold. + nms_max_output_boxes: maximal number of boxes after NMS. Raises: ValueError: If the category ids are not 1-indexed. @@ -202,6 +206,8 @@ class ObjectDetectionEvaluator(DetectionEvaluator): self._label_id_offset = 1 self._evaluate_masks = evaluate_masks self._group_of_weight = group_of_weight + self._nms_iou_threshold = nms_iou_threshold + self._nms_max_output_boxes = nms_max_output_boxes self._evaluation = ObjectDetectionEvaluation( num_groundtruth_classes=self._num_classes, matching_iou_threshold=self._matching_iou_threshold, @@ -209,7 +215,9 @@ class ObjectDetectionEvaluator(DetectionEvaluator): recall_upper_bound=self._recall_upper_bound, use_weighted_mean_ap=self._use_weighted_mean_ap, label_id_offset=self._label_id_offset, - group_of_weight=self._group_of_weight) + group_of_weight=self._group_of_weight, + nms_iou_threshold=self._nms_iou_threshold, + nms_max_output_boxes=self._nms_max_output_boxes) self._image_ids = set([]) self._evaluate_corlocs = evaluate_corlocs self._evaluate_precision_recall = evaluate_precision_recall @@ -246,7 +254,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator): """ for image_id in image_ids: if image_id in self._image_ids: - raise ValueError('Image with id {} already added.'.format(image_id)) + logging.warning('Image with id %s already added.', image_id) self._evaluation.merge_internal_state(state_tuple) @@ -313,7 +321,7 @@ class ObjectDetectionEvaluator(DetectionEvaluator): raise error if instance masks are not in groundtruth dictionary. """ if image_id in self._image_ids: - raise ValueError('Image with id {} already added.'.format(image_id)) + logging.warning('Image with id %s already added.', image_id) groundtruth_classes = ( groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] - @@ -454,7 +462,10 @@ class ObjectDetectionEvaluator(DetectionEvaluator): num_groundtruth_classes=self._num_classes, matching_iou_threshold=self._matching_iou_threshold, use_weighted_mean_ap=self._use_weighted_mean_ap, - label_id_offset=self._label_id_offset) + label_id_offset=self._label_id_offset, + nms_iou_threshold=self._nms_iou_threshold, + nms_max_output_boxes=self._nms_max_output_boxes, + ) self._image_ids.clear() def add_eval_dict(self, eval_dict): @@ -549,13 +560,19 @@ class ObjectDetectionEvaluator(DetectionEvaluator): class PascalDetectionEvaluator(ObjectDetectionEvaluator): """A class to evaluate detections using PASCAL metrics.""" - def __init__(self, categories, matching_iou_threshold=0.5): + def __init__(self, + categories, + matching_iou_threshold=0.5, + nms_iou_threshold=1.0, + nms_max_output_boxes=10000): super(PascalDetectionEvaluator, self).__init__( categories, matching_iou_threshold=matching_iou_threshold, evaluate_corlocs=False, metric_prefix='PascalBoxes', - use_weighted_mean_ap=False) + use_weighted_mean_ap=False, + nms_iou_threshold=nms_iou_threshold, + nms_max_output_boxes=nms_max_output_boxes) class WeightedPascalDetectionEvaluator(ObjectDetectionEvaluator): @@ -712,7 +729,7 @@ class OpenImagesDetectionEvaluator(ObjectDetectionEvaluator): ValueError: On adding groundtruth for an image more than once. """ if image_id in self._image_ids: - raise ValueError('Image with id {} already added.'.format(image_id)) + logging.warning('Image with id %s already added.', image_id) groundtruth_classes = ( groundtruth_dict[standard_fields.InputDataFields.groundtruth_classes] - diff --git a/research/object_detection/utils/object_detection_evaluation_test.py b/research/object_detection/utils/object_detection_evaluation_test.py index ff399ed4bad4d4872eb135685789678237f0e0b1..4e61ee4ff2169759bb62dc5c457339cba9b6ad8c 100644 --- a/research/object_detection/utils/object_detection_evaluation_test.py +++ b/research/object_detection/utils/object_detection_evaluation_test.py @@ -524,30 +524,6 @@ class PascalEvaluationTest(tf.test.TestCase): pascal_evaluator.clear() self.assertFalse(pascal_evaluator._image_ids) - def test_value_error_on_duplicate_images(self): - categories = [{'id': 1, 'name': 'cat'}, - {'id': 2, 'name': 'dog'}, - {'id': 3, 'name': 'elephant'}] - # Add groundtruth - pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator( - categories) - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - pascal_evaluator.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - with self.assertRaises(ValueError): - pascal_evaluator.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - class WeightedPascalEvaluationTest(tf.test.TestCase): @@ -659,28 +635,6 @@ class WeightedPascalEvaluationTest(tf.test.TestCase): self.wp_eval.clear() self.assertFalse(self.wp_eval._image_ids) - def test_value_error_on_duplicate_images(self): - # Add groundtruth - self.wp_eval = ( - object_detection_evaluation.WeightedPascalDetectionEvaluator( - self.categories)) - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - self.wp_eval.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - with self.assertRaises(ValueError): - self.wp_eval.add_single_ground_truth_image_info( - image_key1, - {standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1}) - class PrecisionAtRecallEvaluationTest(tf.test.TestCase): @@ -807,31 +761,6 @@ class PrecisionAtRecallEvaluationTest(tf.test.TestCase): self.wp_eval.clear() self.assertFalse(self.wp_eval._image_ids) - def test_value_error_on_duplicate_images(self): - # Add groundtruth - self.wp_eval = ( - object_detection_evaluation.PrecisionAtRecallDetectionEvaluator( - self.categories, recall_lower_bound=0.0, recall_upper_bound=0.5)) - image_key1 = 'img1' - groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], - dtype=float) - groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int) - self.wp_eval.add_single_ground_truth_image_info( - image_key1, { - standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1 - }) - with self.assertRaises(ValueError): - self.wp_eval.add_single_ground_truth_image_info( - image_key1, { - standard_fields.InputDataFields.groundtruth_boxes: - groundtruth_boxes1, - standard_fields.InputDataFields.groundtruth_classes: - groundtruth_class_labels1 - }) - class ObjectDetectionEvaluationTest(tf.test.TestCase): diff --git a/research/object_detection/utils/ops.py b/research/object_detection/utils/ops.py index f345fa3889c58a4c0ede56a07f3b9fdb731faf98..e1897933cd866313f556ead13ec0a5f186af7ecd 100644 --- a/research/object_detection/utils/ops.py +++ b/research/object_detection/utils/ops.py @@ -216,13 +216,13 @@ def pad_to_multiple(tensor, multiple): height_pad = tf.zeros([ batch_size, padded_tensor_height - tensor_height, tensor_width, tensor_depth - ]) + ], dtype=tensor.dtype) tensor = tf.concat([tensor, height_pad], 1) if padded_tensor_width != tensor_width: width_pad = tf.zeros([ batch_size, padded_tensor_height, padded_tensor_width - tensor_width, tensor_depth - ]) + ], dtype=tensor.dtype) tensor = tf.concat([tensor, width_pad], 2) return tensor @@ -1134,3 +1134,57 @@ def decode_image(tensor_dict): tensor_dict[fields.InputDataFields.image], channels=3) tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) return tensor_dict + + +def giou(boxes1, boxes2): + """Computes generalized IOU between two tensors. + + Each box should be represented as [ymin, xmin, ymax, xmax]. + + Args: + boxes1: a tensor with shape [num_boxes, 4] + boxes2: a tensor with shape [num_boxes, 4] + + Returns: + a tensor of shape [num_boxes] containing GIoUs + + """ + pred_ymin, pred_xmin, pred_ymax, pred_xmax = tf.unstack(boxes1, axis=1) + gt_ymin, gt_xmin, gt_ymax, gt_xmax = tf.unstack(boxes2, axis=1) + + gt_area = (gt_ymax - gt_ymin) * (gt_xmax - gt_xmin) + pred_area = (pred_ymax - pred_ymin) * (pred_xmax - pred_xmin) + + x1_i = tf.maximum(pred_xmin, gt_xmin) + x2_i = tf.minimum(pred_xmax, gt_xmax) + y1_i = tf.maximum(pred_ymin, gt_ymin) + y2_i = tf.minimum(pred_ymax, gt_ymax) + intersection_area = tf.maximum(0.0, y2_i - y1_i) * tf.maximum(0.0, + x2_i - x1_i) + + x1_c = tf.minimum(pred_xmin, gt_xmin) + x2_c = tf.maximum(pred_xmax, gt_xmax) + y1_c = tf.minimum(pred_ymin, gt_ymin) + y2_c = tf.maximum(pred_ymax, gt_ymax) + hull_area = (y2_c - y1_c) * (x2_c - x1_c) + + union_area = gt_area + pred_area - intersection_area + iou = tf.where(tf.equal(union_area, 0.0), + tf.zeros_like(union_area), intersection_area / union_area) + giou_ = iou - tf.where(hull_area > 0.0, + (hull_area - union_area) / hull_area, iou) + return giou_ + + +def center_to_corner_coordinate(input_tensor): + """Converts input boxes from center to corner representation.""" + reshaped_encodings = tf.reshape(input_tensor, [-1, 4]) + ycenter = tf.gather(reshaped_encodings, [0], axis=1) + xcenter = tf.gather(reshaped_encodings, [1], axis=1) + h = tf.gather(reshaped_encodings, [2], axis=1) + w = tf.gather(reshaped_encodings, [3], axis=1) + ymin = ycenter - h / 2. + xmin = xcenter - w / 2. + ymax = ycenter + h / 2. + xmax = xcenter + w / 2. + return tf.squeeze(tf.stack([ymin, xmin, ymax, xmax], axis=1)) diff --git a/research/object_detection/utils/ops_test.py b/research/object_detection/utils/ops_test.py index c5252d644ccd90a4518ad32d32656100c6f3c4f9..9a7ded91d5422a27f8ecea1a50441d562bb591c3 100644 --- a/research/object_detection/utils/ops_test.py +++ b/research/object_detection/utils/ops_test.py @@ -1635,5 +1635,119 @@ class TestGatherWithPaddingValues(test_case.TestCase): + +class TestGIoU(test_case.TestCase): + + def test_giou_with_no_overlap(self): + expected_giou_tensor = [ + 0, -1/3, -3/4, 0, -98/100 + ] + + def graph_fn(): + boxes1 = tf.constant([[3, 4, 5, 6], [3, 3, 5, 5], + [0, 0, 0, 0], [3, 3, 5, 5], + [9, 9, 10, 10]], + dtype=tf.float32) + boxes2 = tf.constant([[3, 2, 5, 4], [3, 7, 5, 9], + [5, 5, 10, 10], [3, 5, 5, 7], + [0, 0, 1, 1]], dtype=tf.float32) + + giou = ops.giou(boxes1, boxes2) + self.assertEqual(giou.dtype, tf.float32) + + return giou + + giou = self.execute(graph_fn, []) + self.assertAllClose(expected_giou_tensor, giou) + + def test_giou_with_overlaps(self): + expected_giou_tensor = [ + 1/25, 1/4, 1/3, 1/7 - 2/9 + ] + + def graph_fn(): + boxes1 = tf.constant([[2, 1, 7, 6], [2, 2, 4, 4], + [2, 2, 4, 4], [2, 2, 4, 4]], + dtype=tf.float32) + boxes2 = tf.constant([[4, 3, 5, 4], [3, 3, 4, 4], + [2, 3, 4, 5], [3, 3, 5, 5]], dtype=tf.float32) + + giou = ops.giou(boxes1, boxes2) + self.assertEqual(giou.dtype, tf.float32) + + return giou + + giou = self.execute(graph_fn, []) + self.assertAllClose(expected_giou_tensor, giou) + + def test_giou_with_perfect_overlap(self): + expected_giou_tensor = [1] + + def graph_fn(): + boxes1 = tf.constant([[3, 3, 5, 5]], dtype=tf.float32) + boxes2 = tf.constant([[3, 3, 5, 5]], dtype=tf.float32) + + giou = ops.giou(boxes1, boxes2) + self.assertEqual(giou.dtype, tf.float32) + + return giou + + giou = self.execute(graph_fn, []) + self.assertAllClose(expected_giou_tensor, giou) + + def test_giou_with_zero_area_boxes(self): + expected_giou_tensor = [0] + + def graph_fn(): + boxes1 = tf.constant([[1, 1, 1, 1]], dtype=tf.float32) + boxes2 = tf.constant([[1, 1, 1, 1]], dtype=tf.float32) + + giou = ops.giou(boxes1, boxes2) + self.assertEqual(giou.dtype, tf.float32) + + return giou + + giou = self.execute(graph_fn, []) + self.assertAllClose(expected_giou_tensor, giou) + + def test_giou_different_with_l1_same(self): + expected_giou_tensor = [ + 2/3, 3/5 + ] + + def graph_fn(): + boxes1 = tf.constant([[3, 3, 5, 5], [3, 3, 5, 5]], dtype=tf.float32) + boxes2 = tf.constant([[3, 2.5, 5, 5.5], [3, 2.5, 5, 4.5]], + dtype=tf.float32) + + giou = ops.giou(boxes1, boxes2) + self.assertEqual(giou.dtype, tf.float32) + + return giou + + giou = self.execute(graph_fn, []) + self.assertAllClose(expected_giou_tensor, giou) + + +class TestCoordinateConversion(test_case.TestCase): + + def test_coord_conv(self): + expected_box_tensor = [ + [0.5, 0.5, 5.5, 5.5], [2, 1, 4, 7], [0, 0, 0, 0] + ] + + def graph_fn(): + boxes = tf.constant([[3, 3, 5, 5], [3, 4, 2, 6], [0, 0, 0, 0]], + dtype=tf.float32) + + converted = ops.center_to_corner_coordinate(boxes) + self.assertEqual(converted.dtype, tf.float32) + + return converted + + converted = self.execute(graph_fn, []) + self.assertAllClose(expected_box_tensor, converted) + + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/utils/spatial_transform_ops.py b/research/object_detection/utils/spatial_transform_ops.py index 95aaf967984172c6ef49a5c9de98b5a8ed0cdc0b..1880dffea1a4a6ddd21c60aac31dc1fda0a9ff30 100644 --- a/research/object_detection/utils/spatial_transform_ops.py +++ b/research/object_detection/utils/spatial_transform_ops.py @@ -411,6 +411,56 @@ def multilevel_roi_align(features, boxes, box_levels, output_size, return features_per_box +def multilevel_native_crop_and_resize(images, boxes, box_levels, + crop_size, scope=None): + """Multilevel native crop and resize. + + Same as `multilevel_matmul_crop_and_resize` but uses tf.image.crop_and_resize. + + Args: + images: A list of 4-D tensor of shape + [batch, image_height, image_width, depth] representing features of + different size. + boxes: A `Tensor` of type `float32`. + A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in + normalized coordinates and are of the form `[y1, x1, y2, x2]`. A + normalized coordinate value of `y` is mapped to the image coordinate at + `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image + height is mapped to `[0, image_height - 1] in image height coordinates. + We do allow y1 > y2, in which case the sampled crop is an up-down flipped + version of the original image. The width dimension is treated similarly. + Normalized coordinates outside the `[0, 1]` range are allowed, in which + case we use `extrapolation_value` to extrapolate the input image values. + box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level + of the box. + crop_size: A list of two integers `[crop_height, crop_width]`. All + cropped image patches are resized to this size. The aspect ratio of the + image content is not preserved. Both `crop_height` and `crop_width` need + to be positive. + scope: A name for the operation (optional). + + Returns: + A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width, + depth]` + """ + if box_levels is None: + return native_crop_and_resize(images[0], boxes, crop_size, scope) + with tf.name_scope('MultiLevelNativeCropAndResize'): + cropped_feature_list = [] + for level, image in enumerate(images): + # For each level, crop the feature according to all boxes + # set the cropped feature not at this level to 0 tensor. + # Consider more efficient way of computing cropped features. + cropped = native_crop_and_resize(image, boxes, crop_size, scope) + cond = tf.tile( + tf.equal(box_levels, level)[:, :, tf.newaxis], + [1, 1] + [tf.math.reduce_prod(cropped.shape.as_list()[2:])]) + cond = tf.reshape(cond, cropped.shape) + cropped_final = tf.where(cond, cropped, tf.zeros_like(cropped)) + cropped_feature_list.append(cropped_final) + return tf.math.reduce_sum(cropped_feature_list, axis=0) + + def native_crop_and_resize(image, boxes, crop_size, scope=None): """Same as `matmul_crop_and_resize` but uses tf.image.crop_and_resize.""" def get_box_inds(proposals): @@ -431,6 +481,50 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None): return tf.reshape(cropped_regions, final_shape) +def multilevel_matmul_crop_and_resize(images, boxes, box_levels, crop_size, + extrapolation_value=0.0, scope=None): + """Multilevel matmul crop and resize. + + Same as `matmul_crop_and_resize` but crop images according to box levels. + + Args: + images: A list of 4-D tensor of shape + [batch, image_height, image_width, depth] representing features of + different size. + boxes: A `Tensor` of type `float32` or 'bfloat16'. + A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in + normalized coordinates and are of the form `[y1, x1, y2, x2]`. A + normalized coordinate value of `y` is mapped to the image coordinate at + `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image + height is mapped to `[0, image_height - 1] in image height coordinates. + We do allow y1 > y2, in which case the sampled crop is an up-down flipped + version of the original image. The width dimension is treated similarly. + Normalized coordinates outside the `[0, 1]` range are allowed, in which + case we use `extrapolation_value` to extrapolate the input image values. + box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level + of the box. + crop_size: A list of two integers `[crop_height, crop_width]`. All + cropped image patches are resized to this size. The aspect ratio of the + image content is not preserved. Both `crop_height` and `crop_width` need + to be positive. + extrapolation_value: A float value to use for extrapolation. + scope: A name for the operation (optional). + + Returns: + A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width, + depth]` + """ + with tf.name_scope(scope, 'MultiLevelMatMulCropAndResize'): + if box_levels is None: + box_levels = tf.zeros(tf.shape(boxes)[:2], dtype=tf.int32) + return multilevel_roi_align(images, + boxes, + box_levels, + crop_size, + align_corners=True, + extrapolation_value=extrapolation_value) + + def matmul_crop_and_resize(image, boxes, crop_size, extrapolation_value=0.0, scope=None): """Matrix multiplication based implementation of the crop and resize op. diff --git a/research/object_detection/utils/spatial_transform_ops_test.py b/research/object_detection/utils/spatial_transform_ops_test.py index d18456640673cf736ee152b19291ed91226fcd31..2261f078ed2bfac9adf1e147a8ea15bef900c5f2 100644 --- a/research/object_detection/utils/spatial_transform_ops_test.py +++ b/research/object_detection/utils/spatial_transform_ops_test.py @@ -512,6 +512,38 @@ class MatMulCropAndResizeTest(test_case.TestCase): crop_output = self.execute(graph_fn, [image, boxes]) self.assertAllClose(crop_output, expected_output) + def testMultilevelMatMulCropAndResize(self): + + def graph_fn(image1, image2, boxes, box_levels): + return spatial_ops.multilevel_matmul_crop_and_resize([image1, image2], + boxes, + box_levels, + crop_size=[2, 2]) + + image = [np.array([[[[1, 0], [2, 0], [3, 0]], + [[4, 0], [5, 0], [6, 0]], + [[7, 0], [8, 0], [9, 0]]], + [[[1, 0], [2, 0], [3, 0]], + [[4, 0], [5, 0], [6, 0]], + [[7, 0], [8, 0], [9, 0]]]], dtype=np.float32), + np.array([[[[1, 0], [2, 1], [3, 2]], + [[4, 3], [5, 4], [6, 5]], + [[7, 6], [8, 7], [9, 8]]], + [[[1, 0], [2, 1], [3, 2]], + [[4, 3], [5, 4], [6, 5]], + [[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)] + boxes = np.array([[[1, 1, 0, 0], + [.5, .5, 0, 0]], + [[0, 0, 1, 1], + [0, 0, .5, .5]]], dtype=np.float32) + box_levels = np.array([[0, 1], [1, 1]], dtype=np.int32) + expected_output = [[[[[9, 0], [7, 0]], [[3, 0], [1, 0]]], + [[[5, 4], [4, 3]], [[2, 1], [1, 0]]]], + [[[[1, 0], [3, 2]], [[7, 6], [9, 8]]], + [[[1, 0], [2, 1]], [[4, 3], [5, 4]]]]] + crop_output = self.execute(graph_fn, image + [boxes, box_levels]) + self.assertAllClose(crop_output, expected_output) + class NativeCropAndResizeTest(test_case.TestCase): @@ -537,6 +569,35 @@ class NativeCropAndResizeTest(test_case.TestCase): crop_output = self.execute_cpu(graph_fn, [image, boxes]) self.assertAllClose(crop_output, expected_output) + def testMultilevelBatchCropAndResize3x3To2x2_2Channels(self): + + def graph_fn(image1, image2, boxes, box_levels): + return spatial_ops.multilevel_native_crop_and_resize([image1, image2], + boxes, + box_levels, + crop_size=[2, 2]) + image = [np.array([[[[1, 0], [2, 1], [3, 2]], + [[4, 3], [5, 4], [6, 5]], + [[7, 6], [8, 7], [9, 8]]], + [[[1, 0], [2, 1], [3, 2]], + [[4, 3], [5, 4], [6, 5]], + [[7, 6], [8, 7], [9, 8]]]], dtype=np.float32), + np.array([[[[1, 0], [2, 1]], + [[4, 3], [5, 4]]], + [[[1, 0], [2, 1]], + [[4, 3], [5, 4]]]], dtype=np.float32)] + boxes = np.array([[[0, 0, 1, 1], + [0, 0, .5, .5]], + [[1, 1, 0, 0], + [.5, .5, 0, 0]]], dtype=np.float32) + box_levels = np.array([[0, 1], [0, 0]], dtype=np.float32) + expected_output = [[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]], + [[[1, 0], [1.5, 0.5]], [[2.5, 1.5], [3, 2]]]], + [[[[9, 8], [7, 6]], [[3, 2], [1, 0]]], + [[[5, 4], [4, 3]], [[2, 1], [1, 0]]]]] + crop_output = self.execute_cpu(graph_fn, image + [boxes, box_levels]) + self.assertAllClose(crop_output, expected_output) + if __name__ == '__main__': tf.test.main() diff --git a/research/object_detection/utils/target_assigner_utils.py b/research/object_detection/utils/target_assigner_utils.py index 0aa26a47ed75ac918a82aaee184fa2bb0dfa7127..7ac61e8a84d387b1e4c4a139ffcd9d5c336f19dd 100644 --- a/research/object_detection/utils/target_assigner_utils.py +++ b/research/object_detection/utils/target_assigner_utils.py @@ -41,13 +41,88 @@ def image_shape_to_grids(height, width): return (y_grid, x_grid) +def _coordinates_to_heatmap_dense(y_grid, x_grid, y_coordinates, x_coordinates, + sigma, channel_onehot, channel_weights=None): + """Dense version of coordinates to heatmap that uses an outer product.""" + num_instances, num_channels = ( + shape_utils.combined_static_and_dynamic_shape(channel_onehot)) + + x_grid = tf.expand_dims(x_grid, 2) + y_grid = tf.expand_dims(y_grid, 2) + # The raw center coordinates in the output space. + x_diff = x_grid - tf.math.floor(x_coordinates) + y_diff = y_grid - tf.math.floor(y_coordinates) + squared_distance = x_diff**2 + y_diff**2 + + gaussian_map = tf.exp(-squared_distance / (2 * sigma * sigma)) + + reshaped_gaussian_map = tf.expand_dims(gaussian_map, axis=-1) + reshaped_channel_onehot = tf.reshape(channel_onehot, + (1, 1, num_instances, num_channels)) + gaussian_per_box_per_class_map = ( + reshaped_gaussian_map * reshaped_channel_onehot) + + if channel_weights is not None: + reshaped_weights = tf.reshape(channel_weights, (1, 1, num_instances, 1)) + gaussian_per_box_per_class_map *= reshaped_weights + + # Take maximum along the "instance" dimension so that all per-instance + # heatmaps of the same class are merged together. + heatmap = tf.reduce_max(gaussian_per_box_per_class_map, axis=2) + + # Maximum of an empty tensor is -inf, the following is to avoid that. + heatmap = tf.maximum(heatmap, 0) + + return tf.stop_gradient(heatmap) + + +def _coordinates_to_heatmap_sparse(y_grid, x_grid, y_coordinates, x_coordinates, + sigma, channel_onehot, channel_weights=None): + """Sparse version of coordinates to heatmap using tf.scatter.""" + + if not hasattr(tf, 'tensor_scatter_nd_max'): + raise RuntimeError( + ('Please upgrade tensowflow to use `tensor_scatter_nd_max` or set ' + 'compute_heatmap_sparse=False')) + _, num_channels = ( + shape_utils.combined_static_and_dynamic_shape(channel_onehot)) + + height, width = shape_utils.combined_static_and_dynamic_shape(y_grid) + x_grid = tf.expand_dims(x_grid, 2) + y_grid = tf.expand_dims(y_grid, 2) + # The raw center coordinates in the output space. + x_diff = x_grid - tf.math.floor(x_coordinates) + y_diff = y_grid - tf.math.floor(y_coordinates) + squared_distance = x_diff**2 + y_diff**2 + + gaussian_map = tf.exp(-squared_distance / (2 * sigma * sigma)) + + if channel_weights is not None: + gaussian_map = gaussian_map * channel_weights[tf.newaxis, tf.newaxis, :] + + channel_indices = tf.argmax(channel_onehot, axis=1) + + channel_indices = channel_indices[:, tf.newaxis] + heatmap_init = tf.zeros((num_channels, height, width)) + + gaussian_map = tf.transpose(gaussian_map, (2, 0, 1)) + heatmap = tf.tensor_scatter_nd_max( + heatmap_init, channel_indices, gaussian_map) + + # Maximum of an empty tensor is -inf, the following is to avoid that. + heatmap = tf.maximum(heatmap, 0) + + return tf.stop_gradient(tf.transpose(heatmap, (1, 2, 0))) + + def coordinates_to_heatmap(y_grid, x_grid, y_coordinates, x_coordinates, sigma, channel_onehot, - channel_weights=None): + channel_weights=None, + sparse=False): """Returns the heatmap targets from a set of point coordinates. This function maps a set of point coordinates to the output heatmap image @@ -71,41 +146,23 @@ def coordinates_to_heatmap(y_grid, representing the one-hot encoded channel labels for each point. channel_weights: A 1D tensor with shape [num_instances] corresponding to the weight of each instance. + sparse: bool, indicating whether or not to use the sparse implementation + of the function. The sparse version scales better with number of channels, + but in some cases is known to cause OOM error. See (b/170989061). Returns: heatmap: A tensor of size [height, width, num_channels] representing the heatmap. Output (height, width) match the dimensions of the input grids. """ - num_instances, num_channels = ( - shape_utils.combined_static_and_dynamic_shape(channel_onehot)) - - x_grid = tf.expand_dims(x_grid, 2) - y_grid = tf.expand_dims(y_grid, 2) - # The raw center coordinates in the output space. - x_diff = x_grid - tf.math.floor(x_coordinates) - y_diff = y_grid - tf.math.floor(y_coordinates) - squared_distance = x_diff**2 + y_diff**2 - gaussian_map = tf.exp(-squared_distance / (2 * sigma * sigma)) - - reshaped_gaussian_map = tf.expand_dims(gaussian_map, axis=-1) - reshaped_channel_onehot = tf.reshape(channel_onehot, - (1, 1, num_instances, num_channels)) - gaussian_per_box_per_class_map = ( - reshaped_gaussian_map * reshaped_channel_onehot) - - if channel_weights is not None: - reshaped_weights = tf.reshape(channel_weights, (1, 1, num_instances, 1)) - gaussian_per_box_per_class_map *= reshaped_weights - - # Take maximum along the "instance" dimension so that all per-instance - # heatmaps of the same class are merged together. - heatmap = tf.reduce_max(gaussian_per_box_per_class_map, axis=2) - - # Maximum of an empty tensor is -inf, the following is to avoid that. - heatmap = tf.maximum(heatmap, 0) - - return heatmap + if sparse: + return _coordinates_to_heatmap_sparse( + y_grid, x_grid, y_coordinates, x_coordinates, sigma, channel_onehot, + channel_weights) + else: + return _coordinates_to_heatmap_dense( + y_grid, x_grid, y_coordinates, x_coordinates, sigma, channel_onehot, + channel_weights) def compute_floor_offsets_with_indices(y_source, diff --git a/research/object_detection/utils/target_assigner_utils_test.py b/research/object_detection/utils/target_assigner_utils_test.py index f663445324d7ee648130018b522fdcbaaeb74d54..ef0f3420e01b84b35ecd785a176e35d8c8871cfb 100644 --- a/research/object_detection/utils/target_assigner_utils_test.py +++ b/research/object_detection/utils/target_assigner_utils_test.py @@ -14,6 +14,7 @@ # ============================================================================== """Tests for utils.target_assigner_utils.""" +from absl.testing import parameterized import numpy as np import tensorflow.compat.v1 as tf @@ -21,7 +22,7 @@ from object_detection.utils import target_assigner_utils as ta_utils from object_detection.utils import test_case -class TargetUtilTest(test_case.TestCase): +class TargetUtilTest(parameterized.TestCase, test_case.TestCase): def test_image_shape_to_grids(self): def graph_fn(): @@ -36,7 +37,11 @@ class TargetUtilTest(test_case.TestCase): np.testing.assert_array_equal(y_grid, expected_y_grid) np.testing.assert_array_equal(x_grid, expected_x_grid) - def test_coordinates_to_heatmap(self): + @parameterized.parameters((False,), (True,)) + def test_coordinates_to_heatmap(self, sparse): + if not hasattr(tf, 'tensor_scatter_nd_max'): + self.skipTest('Cannot test function due to old TF version.') + def graph_fn(): (y_grid, x_grid) = ta_utils.image_shape_to_grids(height=3, width=5) y_coordinates = tf.constant([1.5, 0.5], dtype=tf.float32) @@ -46,7 +51,8 @@ class TargetUtilTest(test_case.TestCase): channel_weights = tf.constant([1, 1], dtype=tf.float32) heatmap = ta_utils.coordinates_to_heatmap(y_grid, x_grid, y_coordinates, x_coordinates, sigma, - channel_onehot, channel_weights) + channel_onehot, + channel_weights, sparse=sparse) return heatmap heatmap = self.execute(graph_fn, []) diff --git a/research/object_detection/utils/test_utils.py b/research/object_detection/utils/test_utils.py index 666a29adbad262054e039c30fb9deb52e66ac665..5cefe3f8a14ce51924adceb3524cfa15afa432af 100644 --- a/research/object_detection/utils/test_utils.py +++ b/research/object_detection/utils/test_utils.py @@ -101,6 +101,10 @@ class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor): is_training, num_classes, False, False) self._add_background_class = add_background_class + # Dummy variable so that box predictor registers some variables. + self._dummy_var = tf.Variable(0.0, trainable=True, + name='box_predictor_var') + def _predict(self, image_features, **kwargs): image_feature = image_features[0] combined_feature_shape = shape_utils.combined_static_and_dynamic_shape( diff --git a/research/object_detection/utils/visualization_utils.py b/research/object_detection/utils/visualization_utils.py index 756d98e30ba71349685e04f3b2376b6ad6b76fc8..b5532a1d29b60d02efc9215e3685a273730a58ee 100644 --- a/research/object_detection/utils/visualization_utils.py +++ b/research/object_detection/utils/visualization_utils.py @@ -664,6 +664,10 @@ def draw_side_by_side_evaluation_image(eval_dict, key != input_data_fields.image_additional_channels): eval_dict[key] = tf.expand_dims(eval_dict[key], 0) + num_gt_boxes = [-1] * eval_dict[input_data_fields.original_image].shape[0] + if input_data_fields.num_groundtruth_boxes in eval_dict: + num_gt_boxes = tf.cast(eval_dict[input_data_fields.num_groundtruth_boxes], + tf.int32) for indx in range(eval_dict[input_data_fields.original_image].shape[0]): instance_masks = None if detection_fields.detection_masks in eval_dict: @@ -680,8 +684,10 @@ def draw_side_by_side_evaluation_image(eval_dict, keypoint_scores = tf.expand_dims( eval_dict[detection_fields.detection_keypoint_scores][indx], axis=0) else: - keypoint_scores = tf.cast(keypoint_ops.set_keypoint_visibilities( - keypoints), dtype=tf.float32) + keypoint_scores = tf.expand_dims(tf.cast( + keypoint_ops.set_keypoint_visibilities( + eval_dict[detection_fields.detection_keypoints][indx]), + dtype=tf.float32), axis=0) groundtruth_instance_masks = None if input_data_fields.groundtruth_instance_masks in eval_dict: @@ -699,10 +705,10 @@ def draw_side_by_side_evaluation_image(eval_dict, groundtruth_keypoint_scores = tf.expand_dims( tf.cast(eval_dict[gt_kpt_vis_fld][indx], dtype=tf.float32), axis=0) else: - groundtruth_keypoint_scores = tf.cast( + groundtruth_keypoint_scores = tf.expand_dims(tf.cast( keypoint_ops.set_keypoint_visibilities( - groundtruth_keypoints), dtype=tf.float32) - + eval_dict[input_data_fields.groundtruth_keypoints][indx]), + dtype=tf.float32), axis=0) images_with_detections = draw_bounding_boxes_on_image_tensors( tf.expand_dims( eval_dict[input_data_fields.original_image][indx], axis=0), @@ -725,16 +731,23 @@ def draw_side_by_side_evaluation_image(eval_dict, max_boxes_to_draw=max_boxes_to_draw, min_score_thresh=min_score_thresh, use_normalized_coordinates=use_normalized_coordinates) + num_gt_boxes_i = num_gt_boxes[indx] images_with_groundtruth = draw_bounding_boxes_on_image_tensors( tf.expand_dims( - eval_dict[input_data_fields.original_image][indx], axis=0), + eval_dict[input_data_fields.original_image][indx], + axis=0), tf.expand_dims( - eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0), + eval_dict[input_data_fields.groundtruth_boxes][indx] + [:num_gt_boxes_i], + axis=0), tf.expand_dims( - eval_dict[input_data_fields.groundtruth_classes][indx], axis=0), + eval_dict[input_data_fields.groundtruth_classes][indx] + [:num_gt_boxes_i], + axis=0), tf.expand_dims( tf.ones_like( - eval_dict[input_data_fields.groundtruth_classes][indx], + eval_dict[input_data_fields.groundtruth_classes][indx] + [:num_gt_boxes_i], dtype=tf.float32), axis=0), category_index, @@ -760,13 +773,17 @@ def draw_side_by_side_evaluation_image(eval_dict, eval_dict[input_data_fields.image_additional_channels][indx], axis=0), tf.expand_dims( - eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0), + eval_dict[input_data_fields.groundtruth_boxes][indx] + [:num_gt_boxes_i], + axis=0), tf.expand_dims( - eval_dict[input_data_fields.groundtruth_classes][indx], + eval_dict[input_data_fields.groundtruth_classes][indx] + [:num_gt_boxes_i], axis=0), tf.expand_dims( tf.ones_like( - eval_dict[input_data_fields.groundtruth_classes][indx], + eval_dict[input_data_fields.groundtruth_classes][indx] + [num_gt_boxes_i], dtype=tf.float32), axis=0), category_index, @@ -1098,6 +1115,7 @@ def visualize_boxes_and_labels_on_image_array( min_score_thresh=.5, agnostic_mode=False, line_thickness=4, + mask_alpha=.4, groundtruth_box_visualization_color='black', skip_boxes=False, skip_scores=False, @@ -1143,6 +1161,7 @@ def visualize_boxes_and_labels_on_image_array( class-agnostic mode or not. This mode will display scores but ignore classes. line_thickness: integer (default: 4) controlling line width of the boxes. + mask_alpha: transparency value between 0 and 1 (default: 0.4). groundtruth_box_visualization_color: box color for visualizing groundtruth boxes skip_boxes: whether to skip the drawing of bounding boxes. @@ -1218,7 +1237,8 @@ def visualize_boxes_and_labels_on_image_array( draw_mask_on_image_array( image, box_to_instance_masks_map[box], - color=color + color=color, + alpha=mask_alpha ) if instance_boundaries is not None: draw_mask_on_image_array( diff --git a/research/ptn/.gitignore b/research/ptn/.gitignore deleted file mode 100644 index 8479374e96149a2e772046da042820dae34ba305..0000000000000000000000000000000000000000 --- a/research/ptn/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -bazel -.idea -bazel-bin -bazel-out -bazel-genfiles -bazel-ptn -bazel-testlogs -*.pyc diff --git a/research/ptn/BUILD b/research/ptn/BUILD deleted file mode 100644 index f08c6172c4e899de77d3c10dc8e9d61519e750a8..0000000000000000000000000000000000000000 --- a/research/ptn/BUILD +++ /dev/null @@ -1,94 +0,0 @@ -py_library( - name = "input_generator", - srcs = ["input_generator.py"], - deps = [ - ], -) - -py_library( - name = "losses", - srcs = ["losses.py"], - deps = [ - ], -) - -py_library( - name = "metrics", - srcs = ["metrics.py"], - deps = [ - ], -) - -py_library( - name = "utils", - srcs = ["utils.py"], - deps = [ - ], -) - -# Defines the Rotator model here -py_library( - name = "model_rotator", - srcs = ["model_rotator.py"], - deps = [ - ":input_generator", - ":losses", - ":metrics", - ":utils", - "//nets:deeprotator_factory", - ], -) - -# Defines the Im2vox model here -py_library( - name = "model_voxel_generation", - srcs = ["model_voxel_generation.py"], - deps = [ - ":input_generator", - "//nets:im2vox_factory", - ], -) - -py_library( - name = "model_ptn", - srcs = ["model_ptn.py"], - deps = [ - ":losses", - ":metrics", - ":model_voxel_generation", - ":utils", - "//nets:im2vox_factory", - ], -) - -py_binary( - name = "train_ptn", - srcs = ["train_ptn.py"], - deps = [ - ":model_ptn", - ], -) - -py_binary( - name = "eval_ptn", - srcs = ["eval_ptn.py"], - deps = [ - ":model_ptn", - ], -) - -py_binary( - name = "pretrain_rotator", - srcs = ["pretrain_rotator.py"], - deps = [ - ":model_rotator", - ], -) - -py_binary( - name = "eval_rotator", - srcs = ["eval_rotator.py"], - deps = [ - ":model_rotator", - ], -) diff --git a/research/ptn/README.md b/research/ptn/README.md deleted file mode 100644 index e9558cd097f7c5f8dd30321222d777172cf5276d..0000000000000000000000000000000000000000 --- a/research/ptn/README.md +++ /dev/null @@ -1,75 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Perspective Transformer Nets - -## Introduction -This is the TensorFlow implementation for the NIPS 2016 work ["Perspective Transformer Nets: Learning Single-View 3D Object Reconstrution without 3D Supervision"](https://papers.nips.cc/paper/6206-perspective-transformer-nets-learning-single-view-3d-object-reconstruction-without-3d-supervision.pdf) - -Re-implemented by Xinchen Yan, Arkanath Pathak, Jasmine Hsu, Honglak Lee - -Reference: [Orginal implementation in Torch](https://github.com/xcyan/nips16_PTN) - -## How to run this code - -This implementation is ready to be run locally or ["distributed across multiple machines/tasks"](https://www.tensorflow.org/deploy/distributed). -You will need to set the task number flag for each task when running in a distributed fashion. -Please refer to the original paper for parameter explanations and training details. - -### Installation -* TensorFlow - * This code requires the latest open-source TensorFlow that you will need to build manually. - The [documentation](https://www.tensorflow.org/install/install_sources) provides the steps required for that. -* Bazel - * Follow the instructions [here](http://bazel.build/docs/install.html). - * Alternately, Download bazel from - [https://github.com/bazelbuild/bazel/releases](https://github.com/bazelbuild/bazel/releases) - for your system configuration. - * Check for the bazel version using this command: bazel version -* matplotlib - * Follow the instructions [here](https://matplotlib.org/users/installing.html). - * You can use a package repository like pip. -* scikit-image - * Follow the instructions [here](http://scikit-image.org/docs/dev/install.html). - * You can use a package repository like pip. -* PIL - * Install from [here](https://pypi.python.org/pypi/Pillow/2.2.1). - -### Dataset - -This code requires the dataset to be in *tfrecords* format with the following features: -* image - * Flattened list of image (float representations) for each view point. -* mask - * Flattened list of image masks (float representations) for each view point. -* vox - * Flattened list of voxels (float representations) for the object. - * This is needed for using vox loss and for prediction comparison. - -You can download the ShapeNet Dataset in tfrecords format from [here](https://drive.google.com/file/d/0B12XukcbU7T7OHQ4MGh6d25qQlk)*. - -* Disclaimer: This data is hosted personally by Arkanath Pathak for non-commercial research purposes. Please cite the [ShapeNet paper](https://arxiv.org/pdf/1512.03012.pdf) in your works when using ShapeNet for non-commercial research purposes. - -### Pretraining: pretrain_rotator.py for each RNN step -$ bazel run -c opt :pretrain_rotator -- --step_size={} --init_model={} - -Pass the init_model as the checkpoint path for the last step trained model. -You'll also need to set the inp_dir flag to where your data resides. - -### Training: train_ptn.py with last pretrained model. -$ bazel run -c opt :train_ptn -- --init_model={} - -### Example TensorBoard Visualizations - -To compare the visualizations make sure to set the model_name flag different for each parametric setting: - -This code adds summaries for each loss. For instance, these are the losses we encountered in the distributed pretraining for ShapeNet Chair Dataset with 10 workers and 16 parameter servers: -![ShapeNet Chair Pretraining](https://drive.google.com/uc?export=view&id=0B12XukcbU7T7bWdlTjhzbGJVaWs "ShapeNet Chair Experiment Pretraining Losses") - -You can expect such images after fine tuning the training as "grid_vis" under **Image** summaries in TensorBoard: -![ShapeNet Chair experiments with projection weight of 1](https://drive.google.com/uc?export=view&id=0B12XukcbU7T7ZFV6aEVBSDdCMjQ "ShapeNet Chair Dataset Predictions") -Here the third and fifth columns are the predicted masks and voxels respectively, alongside their ground truth values. - -A similar image for when trained on all ShapeNet Categories (Voxel visualizations might be skewed): -![ShapeNet All Categories experiments](https://drive.google.com/uc?export=view&id=0B12XukcbU7T7bDZKNFlkTVAzZmM "ShapeNet All Categories Dataset Predictions") diff --git a/research/ptn/WORKSPACE b/research/ptn/WORKSPACE deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/ptn/eval_ptn.py b/research/ptn/eval_ptn.py deleted file mode 100644 index 2f8dd96b1938452083253832a586eaf525d5e072..0000000000000000000000000000000000000000 --- a/research/ptn/eval_ptn.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains evaluation plan for the Im2vox model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import tensorflow as tf -from tensorflow import app - -import model_ptn - -flags = tf.app.flags -slim = tf.contrib.slim - -flags.DEFINE_string('inp_dir', - '', - 'Directory path containing the input data (tfrecords).') -flags.DEFINE_string( - 'dataset_name', 'shapenet_chair', - 'Dataset name that is to be used for training and evaluation.') -flags.DEFINE_integer('z_dim', 512, '') -flags.DEFINE_integer('f_dim', 64, '') -flags.DEFINE_integer('fc_dim', 1024, '') -flags.DEFINE_integer('num_views', 24, 'Num of viewpoints in the input data.') -flags.DEFINE_integer('image_size', 64, - 'Input images dimension (pixels) - width & height.') -flags.DEFINE_integer('vox_size', 32, 'Voxel prediction dimension.') -flags.DEFINE_integer('step_size', 24, '') -flags.DEFINE_integer('batch_size', 1, 'Batch size while training.') -flags.DEFINE_float('focal_length', 0.866, '') -flags.DEFINE_float('focal_range', 1.732, '') -flags.DEFINE_string('encoder_name', 'ptn_encoder', - 'Name of the encoder network being used.') -flags.DEFINE_string('decoder_name', 'ptn_vox_decoder', - 'Name of the decoder network being used.') -flags.DEFINE_string('projector_name', 'ptn_projector', - 'Name of the projector network being used.') -# Save options -flags.DEFINE_string('checkpoint_dir', '/tmp/ptn/eval/', - 'Directory path for saving trained models and other data.') -flags.DEFINE_string('model_name', 'ptn_proj', - 'Name of the model used in naming the TF job. Must be different for each run.') -flags.DEFINE_string('eval_set', 'val', 'Data partition to form evaluation on.') -# Optimization -flags.DEFINE_float('proj_weight', 10, 'Weighting factor for projection loss.') -flags.DEFINE_float('volume_weight', 0, 'Weighting factor for volume loss.') -flags.DEFINE_float('viewpoint_weight', 1, - 'Weighting factor for viewpoint loss.') -flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate.') -flags.DEFINE_float('weight_decay', 0.001, '') -flags.DEFINE_float('clip_gradient_norm', 0, '') -# Summary -flags.DEFINE_integer('save_summaries_secs', 15, '') -flags.DEFINE_integer('eval_interval_secs', 60 * 5, '') -# Distribution -flags.DEFINE_string('master', '', '') - -FLAGS = flags.FLAGS - - -def main(argv=()): - del argv # Unused. - eval_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name, 'train') - log_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name, - 'eval_%s' % FLAGS.eval_set) - if not os.path.exists(eval_dir): - os.makedirs(eval_dir) - if not os.path.exists(log_dir): - os.makedirs(log_dir) - g = tf.Graph() - - with g.as_default(): - eval_params = FLAGS - eval_params.batch_size = 1 - eval_params.step_size = FLAGS.num_views - ########### - ## model ## - ########### - model = model_ptn.model_PTN(eval_params) - ########## - ## data ## - ########## - eval_data = model.get_inputs( - FLAGS.inp_dir, - FLAGS.dataset_name, - eval_params.eval_set, - eval_params.batch_size, - eval_params.image_size, - eval_params.vox_size, - is_training=False) - inputs = model.preprocess_with_all_views(eval_data) - ############## - ## model_fn ## - ############## - model_fn = model.get_model_fn(is_training=False, run_projection=False) - outputs = model_fn(inputs) - ############# - ## metrics ## - ############# - names_to_values, names_to_updates = model.get_metrics(inputs, outputs) - del names_to_values - ################ - ## evaluation ## - ################ - num_batches = eval_data['num_samples'] - slim.evaluation.evaluation_loop( - master=FLAGS.master, - checkpoint_dir=eval_dir, - logdir=log_dir, - num_evals=num_batches, - eval_op=names_to_updates.values(), - eval_interval_secs=FLAGS.eval_interval_secs) - - -if __name__ == '__main__': - app.run() diff --git a/research/ptn/eval_rotator.py b/research/ptn/eval_rotator.py deleted file mode 100644 index b7fcf0fe4ab2b98754ffbc0d75efa64828db6e25..0000000000000000000000000000000000000000 --- a/research/ptn/eval_rotator.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains evaluation plan for the Rotator model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import tensorflow as tf -from tensorflow import app - -import model_rotator as model - -flags = tf.app.flags -slim = tf.contrib.slim - -flags.DEFINE_string('inp_dir', - '', - 'Directory path containing the input data (tfrecords).') -flags.DEFINE_string( - 'dataset_name', 'shapenet_chair', - 'Dataset name that is to be used for training and evaluation.') -flags.DEFINE_integer('z_dim', 512, '') -flags.DEFINE_integer('a_dim', 3, '') -flags.DEFINE_integer('f_dim', 64, '') -flags.DEFINE_integer('fc_dim', 1024, '') -flags.DEFINE_integer('num_views', 24, 'Num of viewpoints in the input data.') -flags.DEFINE_integer('image_size', 64, - 'Input images dimension (pixels) - width & height.') -flags.DEFINE_integer('step_size', 24, '') -flags.DEFINE_integer('batch_size', 2, '') -flags.DEFINE_string('encoder_name', 'ptn_encoder', - 'Name of the encoder network being used.') -flags.DEFINE_string('decoder_name', 'ptn_im_decoder', - 'Name of the decoder network being used.') -flags.DEFINE_string('rotator_name', 'ptn_rotator', - 'Name of the rotator network being used.') -# Save options -flags.DEFINE_string('checkpoint_dir', '/tmp/ptn_train/', - 'Directory path for saving trained models and other data.') -flags.DEFINE_string('model_name', 'ptn_proj', - 'Name of the model used in naming the TF job. Must be different for each run.') -# Optimization -flags.DEFINE_float('image_weight', 10, '') -flags.DEFINE_float('mask_weight', 1, '') -flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate.') -flags.DEFINE_float('weight_decay', 0.001, '') -flags.DEFINE_float('clip_gradient_norm', 0, '') -# Summary -flags.DEFINE_integer('save_summaries_secs', 15, '') -flags.DEFINE_integer('eval_interval_secs', 60 * 5, '') -# Scheduling -flags.DEFINE_string('master', '', '') - -FLAGS = flags.FLAGS - - -def main(argv=()): - del argv # Unused. - eval_dir = os.path.join(FLAGS.checkpoint_dir, - FLAGS.model_name, 'train') - log_dir = os.path.join(FLAGS.checkpoint_dir, - FLAGS.model_name, 'eval') - - if not os.path.exists(eval_dir): - os.makedirs(eval_dir) - if not os.path.exists(log_dir): - os.makedirs(log_dir) - g = tf.Graph() - - if FLAGS.step_size < FLAGS.num_views: - raise ValueError('Impossible step_size, must not be less than num_views.') - - g = tf.Graph() - with g.as_default(): - ########## - ## data ## - ########## - val_data = model.get_inputs( - FLAGS.inp_dir, - FLAGS.dataset_name, - 'val', - FLAGS.batch_size, - FLAGS.image_size, - is_training=False) - inputs = model.preprocess(val_data, FLAGS.step_size) - ########### - ## model ## - ########### - model_fn = model.get_model_fn(FLAGS, is_training=False) - outputs = model_fn(inputs) - ############# - ## metrics ## - ############# - names_to_values, names_to_updates = model.get_metrics( - inputs, outputs, FLAGS) - del names_to_values - ################ - ## evaluation ## - ################ - num_batches = int(val_data['num_samples'] / FLAGS.batch_size) - slim.evaluation.evaluation_loop( - master=FLAGS.master, - checkpoint_dir=eval_dir, - logdir=log_dir, - num_evals=num_batches, - eval_op=names_to_updates.values(), - eval_interval_secs=FLAGS.eval_interval_secs) - - -if __name__ == '__main__': - app.run() diff --git a/research/ptn/input_generator.py b/research/ptn/input_generator.py deleted file mode 100644 index 7047d6483030b8b6ad0e68897121822030f84769..0000000000000000000000000000000000000000 --- a/research/ptn/input_generator.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides dataset dictionaries as used in our network models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import tensorflow as tf -import tensorflow.contrib.slim as slim - -from tensorflow.contrib.slim.python.slim.data import dataset -from tensorflow.contrib.slim.python.slim.data import dataset_data_provider -from tensorflow.contrib.slim.python.slim.data import tfexample_decoder - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'Images', - 'mask': 'Masks', - 'vox': 'Voxels' -} - - -def _get_split(file_pattern, num_samples, num_views, image_size, vox_size): - """Get dataset.Dataset for the given dataset file pattern and properties.""" - - # A dictionary from TF-Example keys to tf.FixedLenFeature instance. - keys_to_features = { - 'image': tf.FixedLenFeature( - shape=[num_views, image_size, image_size, 3], - dtype=tf.float32, default_value=None), - 'mask': tf.FixedLenFeature( - shape=[num_views, image_size, image_size, 1], - dtype=tf.float32, default_value=None), - 'vox': tf.FixedLenFeature( - shape=[vox_size, vox_size, vox_size, 1], - dtype=tf.float32, default_value=None), - } - - items_to_handler = { - 'image': tfexample_decoder.Tensor( - 'image', shape=[num_views, image_size, image_size, 3]), - 'mask': tfexample_decoder.Tensor( - 'mask', shape=[num_views, image_size, image_size, 1]), - 'vox': tfexample_decoder.Tensor( - 'vox', shape=[vox_size, vox_size, vox_size, 1]) - } - - decoder = tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handler) - - return dataset.Dataset( - data_sources=file_pattern, - reader=tf.TFRecordReader, - decoder=decoder, - num_samples=num_samples, - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS) - - -def get(dataset_dir, - dataset_name, - split_name, - shuffle=True, - num_readers=1, - common_queue_capacity=64, - common_queue_min=50): - """Provides input data for a specified dataset and split.""" - - dataset_to_kwargs = { - 'shapenet_chair': { - 'file_pattern': '03001627_%s.tfrecords' % split_name, - 'num_views': 24, - 'image_size': 64, - 'vox_size': 32, - }, 'shapenet_all': { - 'file_pattern': '*_%s.tfrecords' % split_name, - 'num_views': 24, - 'image_size': 64, - 'vox_size': 32, - }, - } - - split_sizes = { - 'shapenet_chair': { - 'train': 4744, - 'val': 678, - 'test': 1356, - }, - 'shapenet_all': { - 'train': 30643, - 'val': 4378, - 'test': 8762, - } - } - - kwargs = dataset_to_kwargs[dataset_name] - kwargs['file_pattern'] = os.path.join(dataset_dir, kwargs['file_pattern']) - kwargs['num_samples'] = split_sizes[dataset_name][split_name] - - dataset_split = _get_split(**kwargs) - data_provider = dataset_data_provider.DatasetDataProvider( - dataset_split, - num_readers=num_readers, - common_queue_capacity=common_queue_capacity, - common_queue_min=common_queue_min, - shuffle=shuffle) - - inputs = { - 'num_samples': dataset_split.num_samples, - } - - [image, mask, vox] = data_provider.get(['image', 'mask', 'vox']) - inputs['image'] = image - inputs['mask'] = mask - inputs['voxel'] = vox - - return inputs diff --git a/research/ptn/losses.py b/research/ptn/losses.py deleted file mode 100644 index 53cc28847a32af88b718fbb9e53ca287b48a8b65..0000000000000000000000000000000000000000 --- a/research/ptn/losses.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Defines the various loss functions in use by the PTN model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - - -def add_rotator_image_loss(inputs, outputs, step_size, weight_scale): - """Computes the image loss of deep rotator model. - - Args: - inputs: Input dictionary to the model containing keys - such as `images_k'. - outputs: Output dictionary returned by the model containing keys - such as `images_k'. - step_size: A scalar representing the number of recurrent - steps (number of repeated out-of-plane rotations) - in the deep rotator network (int). - weight_scale: A reweighting factor applied over the image loss (float). - - Returns: - A `Tensor' scalar that returns averaged L2 loss - (divided by batch_size and step_size) between the - ground-truth images (RGB) and predicted images (tf.float32). - - """ - batch_size = tf.shape(inputs['images_0'])[0] - image_loss = 0 - for k in range(1, step_size + 1): - image_loss += tf.nn.l2_loss( - inputs['images_%d' % k] - outputs['images_%d' % k]) - - image_loss /= tf.to_float(step_size * batch_size) - slim.summaries.add_scalar_summary( - image_loss, 'image_loss', prefix='losses') - image_loss *= weight_scale - return image_loss - - -def add_rotator_mask_loss(inputs, outputs, step_size, weight_scale): - """Computes the mask loss of deep rotator model. - - Args: - inputs: Input dictionary to the model containing keys - such as `masks_k'. - outputs: Output dictionary returned by the model containing - keys such as `masks_k'. - step_size: A scalar representing the number of recurrent - steps (number of repeated out-of-plane rotations) - in the deep rotator network (int). - weight_scale: A reweighting factor applied over the mask loss (float). - - Returns: - A `Tensor' that returns averaged L2 loss - (divided by batch_size and step_size) between the ground-truth masks - (object silhouettes) and predicted masks (tf.float32). - - """ - batch_size = tf.shape(inputs['images_0'])[0] - mask_loss = 0 - for k in range(1, step_size + 1): - mask_loss += tf.nn.l2_loss( - inputs['masks_%d' % k] - outputs['masks_%d' % k]) - - mask_loss /= tf.to_float(step_size * batch_size) - slim.summaries.add_scalar_summary( - mask_loss, 'mask_loss', prefix='losses') - mask_loss *= weight_scale - return mask_loss - - -def add_volume_proj_loss(inputs, outputs, num_views, weight_scale): - """Computes the projection loss of voxel generation model. - - Args: - inputs: Input dictionary to the model containing keys such as - `images_1'. - outputs: Output dictionary returned by the model containing keys - such as `masks_k' and ``projs_k'. - num_views: A integer scalar represents the total number of - viewpoints for each of the object (int). - weight_scale: A reweighting factor applied over the projection loss (float). - - Returns: - A `Tensor' that returns the averaged L2 loss - (divided by batch_size and num_views) between the ground-truth - masks (object silhouettes) and predicted masks (tf.float32). - - """ - batch_size = tf.shape(inputs['images_1'])[0] - proj_loss = 0 - for k in range(num_views): - proj_loss += tf.nn.l2_loss( - outputs['masks_%d' % (k + 1)] - outputs['projs_%d' % (k + 1)]) - proj_loss /= tf.to_float(num_views * batch_size) - slim.summaries.add_scalar_summary( - proj_loss, 'proj_loss', prefix='losses') - proj_loss *= weight_scale - return proj_loss - - -def add_volume_loss(inputs, outputs, num_views, weight_scale): - """Computes the volume loss of voxel generation model. - - Args: - inputs: Input dictionary to the model containing keys such as - `images_1' and `voxels'. - outputs: Output dictionary returned by the model containing keys - such as `voxels_k'. - num_views: A scalar representing the total number of - viewpoints for each object (int). - weight_scale: A reweighting factor applied over the volume - loss (tf.float32). - - Returns: - A `Tensor' that returns the averaged L2 loss - (divided by batch_size and num_views) between the ground-truth - volumes and predicted volumes (tf.float32). - - """ - batch_size = tf.shape(inputs['images_1'])[0] - vol_loss = 0 - for k in range(num_views): - vol_loss += tf.nn.l2_loss( - inputs['voxels'] - outputs['voxels_%d' % (k + 1)]) - vol_loss /= tf.to_float(num_views * batch_size) - slim.summaries.add_scalar_summary( - vol_loss, 'vol_loss', prefix='losses') - vol_loss *= weight_scale - return vol_loss - - -def regularization_loss(scopes, params): - """Computes the weight decay as regularization during training. - - Args: - scopes: A list of different components of the model such as - ``encoder'', ``decoder'' and ``projector''. - params: Parameters of the model. - - Returns: - Regularization loss (tf.float32). - """ - - reg_loss = tf.zeros(dtype=tf.float32, shape=[]) - if params.weight_decay > 0: - is_trainable = lambda x: x in tf.trainable_variables() - is_weights = lambda x: 'weights' in x.name - for scope in scopes: - scope_vars = filter(is_trainable, - tf.contrib.framework.get_model_variables(scope)) - scope_vars = filter(is_weights, scope_vars) - if scope_vars: - reg_loss += tf.add_n([tf.nn.l2_loss(var) for var in scope_vars]) - - slim.summaries.add_scalar_summary( - reg_loss, 'reg_loss', prefix='losses') - reg_loss *= params.weight_decay - return reg_loss diff --git a/research/ptn/metrics.py b/research/ptn/metrics.py deleted file mode 100644 index 5f31dd5fd5a67af3f5cc94de88493c25750e413c..0000000000000000000000000000000000000000 --- a/research/ptn/metrics.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Provides metrics used by PTN.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf - -slim = tf.contrib.slim - - -def add_image_pred_metrics( - inputs, outputs, num_views, upscale_factor): - """Computes the image prediction metrics. - - Args: - inputs: Input dictionary of the deep rotator model (model_rotator.py). - outputs: Output dictionary of the deep rotator model (model_rotator.py). - num_views: An integer scalar representing the total number - of different viewpoints for each object in the dataset. - upscale_factor: A float scalar representing the number of pixels - per image (num_channels x image_height x image_width). - - Returns: - names_to_values: A dictionary representing the current value - of the metric. - names_to_updates: A dictionary representing the operation - that accumulates the error from a batch of data. - """ - names_to_values = dict() - names_to_updates = dict() - for k in xrange(num_views): - tmp_value, tmp_update = tf.contrib.metrics.streaming_mean_squared_error( - outputs['images_%d' % (k + 1)], inputs['images_%d' % (k + 1)]) - name = 'image_pred/rnn_%d' % (k + 1) - names_to_values.update({name: tmp_value * upscale_factor}) - names_to_updates.update({name: tmp_update}) - return names_to_values, names_to_updates - - -def add_mask_pred_metrics( - inputs, outputs, num_views, upscale_factor): - """Computes the mask prediction metrics. - - Args: - inputs: Input dictionary of the deep rotator model (model_rotator.py). - outputs: Output dictionary of the deep rotator model (model_rotator.py). - num_views: An integer scalar representing the total number - of different viewpoints for each object in the dataset. - upscale_factor: A float scalar representing the number of pixels - per image (num_channels x image_height x image_width). - - Returns: - names_to_values: A dictionary representing the current value - of the metric. - names_to_updates: A dictionary representing the operation - that accumulates the error from a batch of data. - - """ - names_to_values = dict() - names_to_updates = dict() - for k in xrange(num_views): - tmp_value, tmp_update = tf.contrib.metrics.streaming_mean_squared_error( - outputs['masks_%d' % (k + 1)], inputs['masks_%d' % (k + 1)]) - name = 'mask_pred/rnn_%d' % (k + 1) - names_to_values.update({name: tmp_value * upscale_factor}) - names_to_updates.update({name: tmp_update}) - return names_to_values, names_to_updates - - -def add_volume_iou_metrics(inputs, outputs): - """Computes the per-instance volume IOU. - - Args: - inputs: Input dictionary of the voxel generation model. - outputs: Output dictionary returned by the voxel generation model. - - Returns: - names_to_values: metrics->values (dict). - names_to_updates: metrics->ops (dict). - - """ - names_to_values = dict() - names_to_updates = dict() - labels = tf.greater_equal(inputs['voxels'], 0.5) - predictions = tf.greater_equal(outputs['voxels_1'], 0.5) - labels = (2 - tf.to_int32(labels)) - 1 - predictions = (3 - tf.to_int32(predictions) * 2) - 1 - tmp_values, tmp_updates = tf.metrics.mean_iou( - labels=labels, - predictions=predictions, - num_classes=3) - names_to_values['volume_iou'] = tmp_values * 3.0 - names_to_updates['volume_iou'] = tmp_updates - return names_to_values, names_to_updates diff --git a/research/ptn/model_ptn.py b/research/ptn/model_ptn.py deleted file mode 100644 index cc0fc4fa38e5479d78307e764ae490e124e34ab7..0000000000000000000000000000000000000000 --- a/research/ptn/model_ptn.py +++ /dev/null @@ -1,232 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Implementations for Im2Vox PTN (NIPS16) model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import losses -import metrics -import model_voxel_generation -import utils -from nets import im2vox_factory - -slim = tf.contrib.slim - - -class model_PTN(model_voxel_generation.Im2Vox): # pylint:disable=invalid-name - """Inherits the generic Im2Vox model class and implements the functions.""" - - def __init__(self, params): - super(model_PTN, self).__init__(params) - - # For testing, this selects all views in input - def preprocess_with_all_views(self, raw_inputs): - (quantity, num_views) = raw_inputs['images'].get_shape().as_list()[:2] - - inputs = dict() - inputs['voxels'] = [] - inputs['images_1'] = [] - for k in xrange(num_views): - inputs['matrix_%d' % (k + 1)] = [] - inputs['matrix_1'] = [] - for n in xrange(quantity): - for k in xrange(num_views): - inputs['images_1'].append(raw_inputs['images'][n, k, :, :, :]) - inputs['voxels'].append(raw_inputs['voxels'][n, :, :, :, :]) - tf_matrix = self.get_transform_matrix(k) - inputs['matrix_%d' % (k + 1)].append(tf_matrix) - - inputs['images_1'] = tf.stack(inputs['images_1']) - inputs['voxels'] = tf.stack(inputs['voxels']) - for k in xrange(num_views): - inputs['matrix_%d' % (k + 1)] = tf.stack(inputs['matrix_%d' % (k + 1)]) - - return inputs - - def get_model_fn(self, is_training=True, reuse=False, run_projection=True): - return im2vox_factory.get(self._params, is_training, reuse, run_projection) - - def get_regularization_loss(self, scopes): - return losses.regularization_loss(scopes, self._params) - - def get_loss(self, inputs, outputs): - """Computes the loss used for PTN paper (projection + volume loss).""" - g_loss = tf.zeros(dtype=tf.float32, shape=[]) - - if self._params.proj_weight: - g_loss += losses.add_volume_proj_loss( - inputs, outputs, self._params.step_size, self._params.proj_weight) - - if self._params.volume_weight: - g_loss += losses.add_volume_loss(inputs, outputs, 1, - self._params.volume_weight) - - slim.summaries.add_scalar_summary(g_loss, 'im2vox_loss', prefix='losses') - - return g_loss - - def get_metrics(self, inputs, outputs): - """Aggregate the metrics for voxel generation model. - - Args: - inputs: Input dictionary of the voxel generation model. - outputs: Output dictionary returned by the voxel generation model. - - Returns: - names_to_values: metrics->values (dict). - names_to_updates: metrics->ops (dict). - """ - names_to_values = dict() - names_to_updates = dict() - - tmp_values, tmp_updates = metrics.add_volume_iou_metrics(inputs, outputs) - - names_to_values.update(tmp_values) - names_to_updates.update(tmp_updates) - - for name, value in names_to_values.iteritems(): - slim.summaries.add_scalar_summary( - value, name, prefix='eval', print_summary=True) - - return names_to_values, names_to_updates - - def write_disk_grid(self, - global_step, - log_dir, - input_images, - gt_projs, - pred_projs, - input_voxels=None, - output_voxels=None): - """Function called by TF to save the prediction periodically.""" - summary_freq = self._params.save_every - - def write_grid(input_images, gt_projs, pred_projs, global_step, - input_voxels, output_voxels): - """Native python function to call for writing images to files.""" - grid = _build_image_grid( - input_images, - gt_projs, - pred_projs, - input_voxels=input_voxels, - output_voxels=output_voxels) - - if global_step % summary_freq == 0: - img_path = os.path.join(log_dir, '%s.jpg' % str(global_step)) - utils.save_image(grid, img_path) - return grid - - save_op = tf.py_func(write_grid, [ - input_images, gt_projs, pred_projs, global_step, input_voxels, - output_voxels - ], [tf.uint8], 'write_grid')[0] - slim.summaries.add_image_summary( - tf.expand_dims(save_op, axis=0), name='grid_vis') - return save_op - - def get_transform_matrix(self, view_out): - """Get the 4x4 Perspective Transfromation matrix used for PTN.""" - num_views = self._params.num_views - focal_length = self._params.focal_length - focal_range = self._params.focal_range - phi = 30 - theta_interval = 360.0 / num_views - theta = theta_interval * view_out - - # pylint: disable=invalid-name - camera_matrix = np.zeros((4, 4), dtype=np.float32) - intrinsic_matrix = np.eye(4, dtype=np.float32) - extrinsic_matrix = np.eye(4, dtype=np.float32) - - sin_phi = np.sin(float(phi) / 180.0 * np.pi) - cos_phi = np.cos(float(phi) / 180.0 * np.pi) - sin_theta = np.sin(float(-theta) / 180.0 * np.pi) - cos_theta = np.cos(float(-theta) / 180.0 * np.pi) - - rotation_azimuth = np.zeros((3, 3), dtype=np.float32) - rotation_azimuth[0, 0] = cos_theta - rotation_azimuth[2, 2] = cos_theta - rotation_azimuth[0, 2] = -sin_theta - rotation_azimuth[2, 0] = sin_theta - rotation_azimuth[1, 1] = 1.0 - - ## rotation axis -- x - rotation_elevation = np.zeros((3, 3), dtype=np.float32) - rotation_elevation[0, 0] = cos_phi - rotation_elevation[0, 1] = sin_phi - rotation_elevation[1, 0] = -sin_phi - rotation_elevation[1, 1] = cos_phi - rotation_elevation[2, 2] = 1.0 - - rotation_matrix = np.matmul(rotation_azimuth, rotation_elevation) - displacement = np.zeros((3, 1), dtype=np.float32) - displacement[0, 0] = float(focal_length) + float(focal_range) / 2.0 - displacement = np.matmul(rotation_matrix, displacement) - - extrinsic_matrix[0:3, 0:3] = rotation_matrix - extrinsic_matrix[0:3, 3:4] = -displacement - - intrinsic_matrix[2, 2] = 1.0 / float(focal_length) - intrinsic_matrix[1, 1] = 1.0 / float(focal_length) - - camera_matrix = np.matmul(extrinsic_matrix, intrinsic_matrix) - return camera_matrix - - -def _build_image_grid(input_images, - gt_projs, - pred_projs, - input_voxels, - output_voxels, - vis_size=128): - """Builds a grid image by concatenating the input images.""" - quantity = input_images.shape[0] - - for row in xrange(int(quantity / 3)): - for col in xrange(3): - index = row * 3 + col - input_img_ = utils.resize_image(input_images[index, :, :, :], vis_size, - vis_size) - gt_proj_ = utils.resize_image(gt_projs[index, :, :, :], vis_size, - vis_size) - pred_proj_ = utils.resize_image(pred_projs[index, :, :, :], vis_size, - vis_size) - gt_voxel_vis = utils.resize_image( - utils.display_voxel(input_voxels[index, :, :, :, 0]), vis_size, - vis_size) - pred_voxel_vis = utils.resize_image( - utils.display_voxel(output_voxels[index, :, :, :, 0]), vis_size, - vis_size) - if col == 0: - tmp_ = np.concatenate( - [input_img_, gt_proj_, pred_proj_, gt_voxel_vis, pred_voxel_vis], 1) - else: - tmp_ = np.concatenate([ - tmp_, input_img_, gt_proj_, pred_proj_, gt_voxel_vis, pred_voxel_vis - ], 1) - if row == 0: - out_grid = tmp_ - else: - out_grid = np.concatenate([out_grid, tmp_], 0) - - return out_grid diff --git a/research/ptn/model_rotator.py b/research/ptn/model_rotator.py deleted file mode 100644 index 28860bc1025f78b5413b84d0c2bbf632874e0853..0000000000000000000000000000000000000000 --- a/research/ptn/model_rotator.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Helper functions for pretraining (rotator) as described in PTN paper.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import input_generator -import losses -import metrics -import utils -from nets import deeprotator_factory - -slim = tf.contrib.slim - - -def _get_data_from_provider(inputs, batch_size, split_name): - """Returns dictionary of batch input data processed by tf.train.batch.""" - images, masks = tf.train.batch( - [inputs['image'], inputs['mask']], - batch_size=batch_size, - num_threads=8, - capacity=8 * batch_size, - name='batching_queues/%s' % (split_name)) - - outputs = dict() - outputs['images'] = images - outputs['masks'] = masks - outputs['num_samples'] = inputs['num_samples'] - - return outputs - - -def get_inputs(dataset_dir, dataset_name, split_name, batch_size, image_size, - is_training): - """Loads the given dataset and split.""" - del image_size # Unused - with tf.variable_scope('data_loading_%s/%s' % (dataset_name, split_name)): - common_queue_min = 50 - common_queue_capacity = 256 - num_readers = 4 - - inputs = input_generator.get( - dataset_dir, - dataset_name, - split_name, - shuffle=is_training, - num_readers=num_readers, - common_queue_min=common_queue_min, - common_queue_capacity=common_queue_capacity) - - return _get_data_from_provider(inputs, batch_size, split_name) - - -def preprocess(raw_inputs, step_size): - """Selects the subset of viewpoints to train on.""" - shp = raw_inputs['images'].get_shape().as_list() - quantity = shp[0] - num_views = shp[1] - image_size = shp[2] - del image_size # Unused - - batch_rot = np.zeros((quantity, 3), dtype=np.float32) - inputs = dict() - for n in xrange(step_size + 1): - inputs['images_%d' % n] = [] - inputs['masks_%d' % n] = [] - - for n in xrange(quantity): - view_in = np.random.randint(0, num_views) - rng_rot = np.random.randint(0, 2) - if step_size == 1: - rng_rot = np.random.randint(0, 3) - - delta = 0 - if rng_rot == 0: - delta = -1 - batch_rot[n, 2] = 1 - elif rng_rot == 1: - delta = 1 - batch_rot[n, 0] = 1 - else: - delta = 0 - batch_rot[n, 1] = 1 - - inputs['images_0'].append(raw_inputs['images'][n, view_in, :, :, :]) - inputs['masks_0'].append(raw_inputs['masks'][n, view_in, :, :, :]) - - view_out = view_in - for k in xrange(1, step_size + 1): - view_out += delta - if view_out >= num_views: - view_out = 0 - if view_out < 0: - view_out = num_views - 1 - - inputs['images_%d' % k].append(raw_inputs['images'][n, view_out, :, :, :]) - inputs['masks_%d' % k].append(raw_inputs['masks'][n, view_out, :, :, :]) - - for n in xrange(step_size + 1): - inputs['images_%d' % n] = tf.stack(inputs['images_%d' % n]) - inputs['masks_%d' % n] = tf.stack(inputs['masks_%d' % n]) - - inputs['actions'] = tf.constant(batch_rot, dtype=tf.float32) - return inputs - - -def get_init_fn(scopes, params): - """Initialization assignment operator function used while training.""" - if not params.init_model: - return None - - is_trainable = lambda x: x in tf.trainable_variables() - var_list = [] - for scope in scopes: - var_list.extend( - filter(is_trainable, tf.contrib.framework.get_model_variables(scope))) - - init_assign_op, init_feed_dict = slim.assign_from_checkpoint( - params.init_model, var_list) - - def init_assign_function(sess): - sess.run(init_assign_op, init_feed_dict) - - return init_assign_function - - -def get_model_fn(params, is_training, reuse=False): - return deeprotator_factory.get(params, is_training, reuse) - - -def get_regularization_loss(scopes, params): - return losses.regularization_loss(scopes, params) - - -def get_loss(inputs, outputs, params): - """Computes the rotator loss.""" - g_loss = tf.zeros(dtype=tf.float32, shape=[]) - - if hasattr(params, 'image_weight'): - g_loss += losses.add_rotator_image_loss(inputs, outputs, params.step_size, - params.image_weight) - - if hasattr(params, 'mask_weight'): - g_loss += losses.add_rotator_mask_loss(inputs, outputs, params.step_size, - params.mask_weight) - - slim.summaries.add_scalar_summary( - g_loss, 'rotator_loss', prefix='losses') - - return g_loss - - -def get_train_op_for_scope(loss, optimizer, scopes, params): - """Train operation function for the given scope used file training.""" - is_trainable = lambda x: x in tf.trainable_variables() - - var_list = [] - update_ops = [] - - for scope in scopes: - var_list.extend( - filter(is_trainable, tf.contrib.framework.get_model_variables(scope))) - update_ops.extend(tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) - - return slim.learning.create_train_op( - loss, - optimizer, - update_ops=update_ops, - variables_to_train=var_list, - clip_gradient_norm=params.clip_gradient_norm) - - -def get_metrics(inputs, outputs, params): - """Aggregate the metrics for rotator model. - - Args: - inputs: Input dictionary of the rotator model. - outputs: Output dictionary returned by the rotator model. - params: Hyperparameters of the rotator model. - - Returns: - names_to_values: metrics->values (dict). - names_to_updates: metrics->ops (dict). - """ - names_to_values = dict() - names_to_updates = dict() - - tmp_values, tmp_updates = metrics.add_image_pred_metrics( - inputs, outputs, params.num_views, 3*params.image_size**2) - names_to_values.update(tmp_values) - names_to_updates.update(tmp_updates) - - tmp_values, tmp_updates = metrics.add_mask_pred_metrics( - inputs, outputs, params.num_views, params.image_size**2) - names_to_values.update(tmp_values) - names_to_updates.update(tmp_updates) - - for name, value in names_to_values.iteritems(): - slim.summaries.add_scalar_summary( - value, name, prefix='eval', print_summary=True) - - return names_to_values, names_to_updates - - -def write_disk_grid(global_step, summary_freq, log_dir, input_images, - output_images, pred_images, pred_masks): - """Function called by TF to save the prediction periodically.""" - - def write_grid(grid, global_step): - """Native python function to call for writing images to files.""" - if global_step % summary_freq == 0: - img_path = os.path.join(log_dir, '%s.jpg' % str(global_step)) - utils.save_image(grid, img_path) - return 0 - - grid = _build_image_grid(input_images, output_images, pred_images, pred_masks) - slim.summaries.add_image_summary( - tf.expand_dims(grid, axis=0), name='grid_vis') - save_op = tf.py_func(write_grid, [grid, global_step], [tf.int64], - 'write_grid')[0] - return save_op - - -def _build_image_grid(input_images, output_images, pred_images, pred_masks): - """Builds a grid image by concatenating the input images.""" - quantity = input_images.get_shape().as_list()[0] - - for row in xrange(int(quantity / 4)): - for col in xrange(4): - index = row * 4 + col - input_img_ = input_images[index, :, :, :] - output_img_ = output_images[index, :, :, :] - pred_img_ = pred_images[index, :, :, :] - pred_mask_ = tf.tile(pred_masks[index, :, :, :], [1, 1, 3]) - if col == 0: - tmp_ = tf.concat([input_img_, output_img_, pred_img_, pred_mask_], - 1) ## to the right - else: - tmp_ = tf.concat([tmp_, input_img_, output_img_, pred_img_, pred_mask_], - 1) - if row == 0: - out_grid = tmp_ - else: - out_grid = tf.concat([out_grid, tmp_], 0) - - return out_grid diff --git a/research/ptn/model_voxel_generation.py b/research/ptn/model_voxel_generation.py deleted file mode 100644 index 0c8fc84669a7fccc0f3a33ecb4c847162d99521e..0000000000000000000000000000000000000000 --- a/research/ptn/model_voxel_generation.py +++ /dev/null @@ -1,222 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base class for voxel generation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import os - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -import input_generator -import utils - -slim = tf.contrib.slim - - -class Im2Vox(object): - """Defines the voxel generation model.""" - - __metaclass__ = abc.ABCMeta - - def __init__(self, params): - self._params = params - - @abc.abstractmethod - def get_metrics(self, inputs, outputs): - """Gets dictionaries from metrics to value `Tensors` & update `Tensors`.""" - pass - - @abc.abstractmethod - def get_loss(self, inputs, outputs): - pass - - @abc.abstractmethod - def get_regularization_loss(self, scopes): - pass - - def set_params(self, params): - self._params = params - - def get_inputs(self, - dataset_dir, - dataset_name, - split_name, - batch_size, - image_size, - vox_size, - is_training=True): - """Loads data for a specified dataset and split.""" - del image_size, vox_size - with tf.variable_scope('data_loading_%s/%s' % (dataset_name, split_name)): - common_queue_min = 64 - common_queue_capacity = 256 - num_readers = 4 - - inputs = input_generator.get( - dataset_dir, - dataset_name, - split_name, - shuffle=is_training, - num_readers=num_readers, - common_queue_min=common_queue_min, - common_queue_capacity=common_queue_capacity) - - images, voxels = tf.train.batch( - [inputs['image'], inputs['voxel']], - batch_size=batch_size, - num_threads=8, - capacity=8 * batch_size, - name='batching_queues/%s/%s' % (dataset_name, split_name)) - - outputs = dict() - outputs['images'] = images - outputs['voxels'] = voxels - outputs['num_samples'] = inputs['num_samples'] - - return outputs - - def preprocess(self, raw_inputs, step_size): - """Selects the subset of viewpoints to train on.""" - (quantity, num_views) = raw_inputs['images'].get_shape().as_list()[:2] - - inputs = dict() - inputs['voxels'] = raw_inputs['voxels'] - - for k in xrange(step_size): - inputs['images_%d' % (k + 1)] = [] - inputs['matrix_%d' % (k + 1)] = [] - - for n in xrange(quantity): - selected_views = np.random.choice(num_views, step_size, replace=False) - for k in xrange(step_size): - view_selected = selected_views[k] - inputs['images_%d' % - (k + 1)].append(raw_inputs['images'][n, view_selected, :, :, :]) - tf_matrix = self.get_transform_matrix(view_selected) - inputs['matrix_%d' % (k + 1)].append(tf_matrix) - - for k in xrange(step_size): - inputs['images_%d' % (k + 1)] = tf.stack(inputs['images_%d' % (k + 1)]) - inputs['matrix_%d' % (k + 1)] = tf.stack(inputs['matrix_%d' % (k + 1)]) - - return inputs - - def get_init_fn(self, scopes): - """Initialization assignment operator function used while training.""" - if not self._params.init_model: - return None - - is_trainable = lambda x: x in tf.trainable_variables() - var_list = [] - for scope in scopes: - var_list.extend( - filter(is_trainable, tf.contrib.framework.get_model_variables(scope))) - - init_assign_op, init_feed_dict = slim.assign_from_checkpoint( - self._params.init_model, var_list) - - def init_assign_function(sess): - sess.run(init_assign_op, init_feed_dict) - - return init_assign_function - - def get_train_op_for_scope(self, loss, optimizer, scopes): - """Train operation function for the given scope used file training.""" - is_trainable = lambda x: x in tf.trainable_variables() - - var_list = [] - update_ops = [] - - for scope in scopes: - var_list.extend( - filter(is_trainable, tf.contrib.framework.get_model_variables(scope))) - update_ops.extend(tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) - - return slim.learning.create_train_op( - loss, - optimizer, - update_ops=update_ops, - variables_to_train=var_list, - clip_gradient_norm=self._params.clip_gradient_norm) - - def write_disk_grid(self, - global_step, - log_dir, - input_images, - gt_projs, - pred_projs, - pred_voxels=None): - """Function called by TF to save the prediction periodically.""" - summary_freq = self._params.save_every - - def write_grid(input_images, gt_projs, pred_projs, pred_voxels, - global_step): - """Native python function to call for writing images to files.""" - grid = _build_image_grid(input_images, gt_projs, pred_projs, pred_voxels) - - if global_step % summary_freq == 0: - img_path = os.path.join(log_dir, '%s.jpg' % str(global_step)) - utils.save_image(grid, img_path) - with open( - os.path.join(log_dir, 'pred_voxels_%s' % str(global_step)), - 'w') as fout: - np.save(fout, pred_voxels) - with open( - os.path.join(log_dir, 'input_images_%s' % str(global_step)), - 'w') as fout: - np.save(fout, input_images) - - return grid - - py_func_args = [ - input_images, gt_projs, pred_projs, pred_voxels, global_step - ] - save_grid_op = tf.py_func(write_grid, py_func_args, [tf.uint8], - 'wrtie_grid')[0] - slim.summaries.add_image_summary( - tf.expand_dims(save_grid_op, axis=0), name='grid_vis') - return save_grid_op - - -def _build_image_grid(input_images, gt_projs, pred_projs, pred_voxels): - """Build the visualization grid with py_func.""" - quantity, img_height, img_width = input_images.shape[:3] - for row in xrange(int(quantity / 3)): - for col in xrange(3): - index = row * 3 + col - input_img_ = input_images[index, :, :, :] - gt_proj_ = gt_projs[index, :, :, :] - pred_proj_ = pred_projs[index, :, :, :] - pred_voxel_ = utils.display_voxel(pred_voxels[index, :, :, :, 0]) - pred_voxel_ = utils.resize_image(pred_voxel_, img_height, img_width) - if col == 0: - tmp_ = np.concatenate([input_img_, gt_proj_, pred_proj_, pred_voxel_], - 1) - else: - tmp_ = np.concatenate( - [tmp_, input_img_, gt_proj_, pred_proj_, pred_voxel_], 1) - if row == 0: - out_grid = tmp_ - else: - out_grid = np.concatenate([out_grid, tmp_], 0) - - out_grid = out_grid.astype(np.uint8) - return out_grid diff --git a/research/ptn/nets/BUILD b/research/ptn/nets/BUILD deleted file mode 100644 index 987499341ef25c8e19d532fdb14d3a9e842d0909..0000000000000000000000000000000000000000 --- a/research/ptn/nets/BUILD +++ /dev/null @@ -1,64 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -py_library( - name = "deeprotator_factory", - srcs = ["deeprotator_factory.py"], - deps = [ - ":ptn_encoder", - ":ptn_im_decoder", - ":ptn_rotator", - ], -) - -py_library( - name = "im2vox_factory", - srcs = ["im2vox_factory.py"], - deps = [ - ":perspective_projector", - ":ptn_encoder", - ":ptn_vox_decoder", - ], -) - -py_library( - name = "perspective_projector", - srcs = ["perspective_projector.py"], - deps = [ - ":perspective_transform", - ], -) - -py_library( - name = "perspective_transform", - srcs = ["perspective_transform.py"], - deps = [ - ], -) - -py_library( - name = "ptn_encoder", - srcs = ["ptn_encoder.py"], - deps = [ - ], -) - -py_library( - name = "ptn_im_decoder", - srcs = ["ptn_im_decoder.py"], - deps = [ - ], -) - -py_library( - name = "ptn_rotator", - srcs = ["ptn_rotator.py"], - deps = [ - ], -) - -py_library( - name = "ptn_vox_decoder", - srcs = ["ptn_vox_decoder.py"], - deps = [ - ], -) diff --git a/research/ptn/nets/deeprotator_factory.py b/research/ptn/nets/deeprotator_factory.py deleted file mode 100644 index e16170c41b7daae4a00068183cd0a4056ff279c3..0000000000000000000000000000000000000000 --- a/research/ptn/nets/deeprotator_factory.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Factory module for different encoder/decoder network models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import ptn_encoder -from nets import ptn_im_decoder -from nets import ptn_rotator - -_NAME_TO_NETS = { - 'ptn_encoder': ptn_encoder, - 'ptn_rotator': ptn_rotator, - 'ptn_im_decoder': ptn_im_decoder, -} - - -def _get_network(name): - """Gets a single network component.""" - - if name not in _NAME_TO_NETS: - raise ValueError('Network name [%s] not recognized.' % name) - return _NAME_TO_NETS[name].model - - -def get(params, is_training=False, reuse=False): - """Factory function to retrieve a network model. - - Args: - params: Different parameters used througout ptn, typically FLAGS (dict) - is_training: Set to True if while training (boolean) - reuse: Set as True if either using a pre-trained model or - in the training loop while the graph has already been built (boolean) - Returns: - Model function for network (inputs to outputs) - """ - - def model(inputs): - """Model function corresponding to a specific network architecture.""" - outputs = {} - - # First, build the encoder. - encoder_fn = _get_network(params.encoder_name) - with tf.variable_scope('encoder', reuse=reuse): - # Produces id/pose units - features = encoder_fn(inputs['images_0'], params, is_training) - outputs['ids'] = features['ids'] - outputs['poses_0'] = features['poses'] - - # Second, build the rotator and decoder. - rotator_fn = _get_network(params.rotator_name) - with tf.variable_scope('rotator', reuse=reuse): - outputs['poses_1'] = rotator_fn(outputs['poses_0'], inputs['actions'], - params, is_training) - decoder_fn = _get_network(params.decoder_name) - with tf.variable_scope('decoder', reuse=reuse): - dec_output = decoder_fn(outputs['ids'], outputs['poses_1'], params, - is_training) - outputs['images_1'] = dec_output['images'] - outputs['masks_1'] = dec_output['masks'] - - # Third, build the recurrent connection - for k in range(1, params.step_size): - with tf.variable_scope('rotator', reuse=True): - outputs['poses_%d' % (k + 1)] = rotator_fn( - outputs['poses_%d' % k], inputs['actions'], params, is_training) - with tf.variable_scope('decoder', reuse=True): - dec_output = decoder_fn(outputs['ids'], outputs['poses_%d' % (k + 1)], - params, is_training) - outputs['images_%d' % (k + 1)] = dec_output['images'] - outputs['masks_%d' % (k + 1)] = dec_output['masks'] - - return outputs - - return model diff --git a/research/ptn/nets/im2vox_factory.py b/research/ptn/nets/im2vox_factory.py deleted file mode 100644 index c54b96c24a56a8fa796cbc4f881a78cfbf41b86d..0000000000000000000000000000000000000000 --- a/research/ptn/nets/im2vox_factory.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Factory module for getting the complete image to voxel generation network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import perspective_projector -from nets import ptn_encoder -from nets import ptn_vox_decoder - -_NAME_TO_NETS = { - 'ptn_encoder': ptn_encoder, - 'ptn_vox_decoder': ptn_vox_decoder, - 'perspective_projector': perspective_projector, -} - - -def _get_network(name): - """Gets a single encoder/decoder network model.""" - - if name not in _NAME_TO_NETS: - raise ValueError('Network name [%s] not recognized.' % name) - return _NAME_TO_NETS[name].model - - -def get(params, is_training=False, reuse=False, run_projection=True): - """Factory function to get the training/pretraining im->vox model (NIPS16). - - Args: - params: Different parameters used througout ptn, typically FLAGS (dict). - is_training: Set to True if while training (boolean). - reuse: Set as True if sharing variables with a model that has already - been built (boolean). - run_projection: Set as False if not interested in mask and projection - images. Useful in evaluation routine (boolean). - Returns: - Model function for network (inputs to outputs). - """ - def model(inputs): - """Model function corresponding to a specific network architecture.""" - outputs = {} - - # First, build the encoder - encoder_fn = _get_network(params.encoder_name) - with tf.variable_scope('encoder', reuse=reuse): - # Produces id/pose units - enc_outputs = encoder_fn(inputs['images_1'], params, is_training) - outputs['ids_1'] = enc_outputs['ids'] - - # Second, build the decoder and projector - decoder_fn = _get_network(params.decoder_name) - with tf.variable_scope('decoder', reuse=reuse): - outputs['voxels_1'] = decoder_fn(outputs['ids_1'], params, is_training) - if run_projection: - projector_fn = _get_network(params.projector_name) - with tf.variable_scope('projector', reuse=reuse): - outputs['projs_1'] = projector_fn( - outputs['voxels_1'], inputs['matrix_1'], params, is_training) - # Infer the ground-truth mask - with tf.variable_scope('oracle', reuse=reuse): - outputs['masks_1'] = projector_fn(inputs['voxels'], inputs['matrix_1'], - params, False) - - # Third, build the entire graph (bundled strategy described in PTN paper) - for k in range(1, params.step_size): - with tf.variable_scope('projector', reuse=True): - outputs['projs_%d' % (k + 1)] = projector_fn( - outputs['voxels_1'], inputs['matrix_%d' % - (k + 1)], params, is_training) - with tf.variable_scope('oracle', reuse=True): - outputs['masks_%d' % (k + 1)] = projector_fn( - inputs['voxels'], inputs['matrix_%d' % (k + 1)], params, False) - - return outputs - - return model diff --git a/research/ptn/nets/perspective_projector.py b/research/ptn/nets/perspective_projector.py deleted file mode 100644 index 38c7df86b203884327d4c4eda5f02b7fc1b16323..0000000000000000000000000000000000000000 --- a/research/ptn/nets/perspective_projector.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""3D->2D projector model as used in PTN (NIPS16).""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import perspective_transform - - -def model(voxels, transform_matrix, params, is_training): - """Model transforming the 3D voxels into 2D projections. - - Args: - voxels: A tensor of size [batch, depth, height, width, channel] - representing the input of projection layer (tf.float32). - transform_matrix: A tensor of size [batch, 16] representing - the flattened 4-by-4 matrix for transformation (tf.float32). - params: Model parameters (dict). - is_training: Set to True if while training (boolean). - - Returns: - A transformed tensor (tf.float32) - - """ - del is_training # Doesn't make a difference for projector - # Rearrangement (batch, z, y, x, channel) --> (batch, y, z, x, channel). - # By the standard, projection happens along z-axis but the voxels - # are stored in a different way. So we need to switch the y and z - # axis for transformation operation. - voxels = tf.transpose(voxels, [0, 2, 1, 3, 4]) - z_near = params.focal_length - z_far = params.focal_length + params.focal_range - transformed_voxels = perspective_transform.transformer( - voxels, transform_matrix, [params.vox_size] * 3, z_near, z_far) - views = tf.reduce_max(transformed_voxels, [1]) - views = tf.reverse(views, [1]) - return views diff --git a/research/ptn/nets/perspective_transform.py b/research/ptn/nets/perspective_transform.py deleted file mode 100644 index 1c01f15f21d4f6f36c4638d0b8ad916e895aa0b2..0000000000000000000000000000000000000000 --- a/research/ptn/nets/perspective_transform.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Perspective Transformer Layer Implementation. - -Transform the volume based on 4 x 4 perspective projection matrix. - -Reference: -(1) "Perspective Transformer Nets: Perspective Transformer Nets: -Learning Single-View 3D Object Reconstruction without 3D Supervision." -Xinchen Yan, Jimei Yang, Ersin Yumer, Yijie Guo, Honglak Lee. In NIPS 2016 -https://papers.nips.cc/paper/6206-perspective-transformer-nets-learning-single-view-3d-object-reconstruction-without-3d-supervision.pdf - -(2) Official implementation in Torch: https://github.com/xcyan/ptnbhwd - -(3) 2D Transformer implementation in TF: -github.com/tensorflow/models/tree/master/research/transformer - -""" - -import tensorflow as tf - - -def transformer(voxels, - theta, - out_size, - z_near, - z_far, - name='PerspectiveTransformer'): - """Perspective Transformer Layer. - - Args: - voxels: A tensor of size [num_batch, depth, height, width, num_channels]. - It is the output of a deconv/upsampling conv network (tf.float32). - theta: A tensor of size [num_batch, 16]. - It is the inverse camera transformation matrix (tf.float32). - out_size: A tuple representing the size of output of - transformer layer (float). - z_near: A number representing the near clipping plane (float). - z_far: A number representing the far clipping plane (float). - - Returns: - A transformed tensor (tf.float32). - - """ - def _repeat(x, n_repeats): - with tf.variable_scope('_repeat'): - rep = tf.transpose( - tf.expand_dims(tf.ones(shape=tf.stack([ - n_repeats, - ])), 1), [1, 0]) - rep = tf.to_int32(rep) - x = tf.matmul(tf.reshape(x, (-1, 1)), rep) - return tf.reshape(x, [-1]) - - def _interpolate(im, x, y, z, out_size): - """Bilinear interploation layer. - - Args: - im: A 5D tensor of size [num_batch, depth, height, width, num_channels]. - It is the input volume for the transformation layer (tf.float32). - x: A tensor of size [num_batch, out_depth, out_height, out_width] - representing the inverse coordinate mapping for x (tf.float32). - y: A tensor of size [num_batch, out_depth, out_height, out_width] - representing the inverse coordinate mapping for y (tf.float32). - z: A tensor of size [num_batch, out_depth, out_height, out_width] - representing the inverse coordinate mapping for z (tf.float32). - out_size: A tuple representing the output size of transformation layer - (float). - - Returns: - A transformed tensor (tf.float32). - - """ - with tf.variable_scope('_interpolate'): - num_batch = im.get_shape().as_list()[0] - depth = im.get_shape().as_list()[1] - height = im.get_shape().as_list()[2] - width = im.get_shape().as_list()[3] - channels = im.get_shape().as_list()[4] - - x = tf.to_float(x) - y = tf.to_float(y) - z = tf.to_float(z) - depth_f = tf.to_float(depth) - height_f = tf.to_float(height) - width_f = tf.to_float(width) - # Number of disparity interpolated. - out_depth = out_size[0] - out_height = out_size[1] - out_width = out_size[2] - zero = tf.zeros([], dtype='int32') - # 0 <= z < depth, 0 <= y < height & 0 <= x < width. - max_z = tf.to_int32(tf.shape(im)[1] - 1) - max_y = tf.to_int32(tf.shape(im)[2] - 1) - max_x = tf.to_int32(tf.shape(im)[3] - 1) - - # Converts scale indices from [-1, 1] to [0, width/height/depth]. - x = (x + 1.0) * (width_f) / 2.0 - y = (y + 1.0) * (height_f) / 2.0 - z = (z + 1.0) * (depth_f) / 2.0 - - x0 = tf.to_int32(tf.floor(x)) - x1 = x0 + 1 - y0 = tf.to_int32(tf.floor(y)) - y1 = y0 + 1 - z0 = tf.to_int32(tf.floor(z)) - z1 = z0 + 1 - - x0_clip = tf.clip_by_value(x0, zero, max_x) - x1_clip = tf.clip_by_value(x1, zero, max_x) - y0_clip = tf.clip_by_value(y0, zero, max_y) - y1_clip = tf.clip_by_value(y1, zero, max_y) - z0_clip = tf.clip_by_value(z0, zero, max_z) - z1_clip = tf.clip_by_value(z1, zero, max_z) - dim3 = width - dim2 = width * height - dim1 = width * height * depth - base = _repeat( - tf.range(num_batch) * dim1, out_depth * out_height * out_width) - base_z0_y0 = base + z0_clip * dim2 + y0_clip * dim3 - base_z0_y1 = base + z0_clip * dim2 + y1_clip * dim3 - base_z1_y0 = base + z1_clip * dim2 + y0_clip * dim3 - base_z1_y1 = base + z1_clip * dim2 + y1_clip * dim3 - - idx_z0_y0_x0 = base_z0_y0 + x0_clip - idx_z0_y0_x1 = base_z0_y0 + x1_clip - idx_z0_y1_x0 = base_z0_y1 + x0_clip - idx_z0_y1_x1 = base_z0_y1 + x1_clip - idx_z1_y0_x0 = base_z1_y0 + x0_clip - idx_z1_y0_x1 = base_z1_y0 + x1_clip - idx_z1_y1_x0 = base_z1_y1 + x0_clip - idx_z1_y1_x1 = base_z1_y1 + x1_clip - - # Use indices to lookup pixels in the flat image and restore - # channels dim - im_flat = tf.reshape(im, tf.stack([-1, channels])) - im_flat = tf.to_float(im_flat) - i_z0_y0_x0 = tf.gather(im_flat, idx_z0_y0_x0) - i_z0_y0_x1 = tf.gather(im_flat, idx_z0_y0_x1) - i_z0_y1_x0 = tf.gather(im_flat, idx_z0_y1_x0) - i_z0_y1_x1 = tf.gather(im_flat, idx_z0_y1_x1) - i_z1_y0_x0 = tf.gather(im_flat, idx_z1_y0_x0) - i_z1_y0_x1 = tf.gather(im_flat, idx_z1_y0_x1) - i_z1_y1_x0 = tf.gather(im_flat, idx_z1_y1_x0) - i_z1_y1_x1 = tf.gather(im_flat, idx_z1_y1_x1) - - # Finally calculate interpolated values. - x0_f = tf.to_float(x0) - x1_f = tf.to_float(x1) - y0_f = tf.to_float(y0) - y1_f = tf.to_float(y1) - z0_f = tf.to_float(z0) - z1_f = tf.to_float(z1) - # Check the out-of-boundary case. - x0_valid = tf.to_float( - tf.less_equal(x0, max_x) & tf.greater_equal(x0, 0)) - x1_valid = tf.to_float( - tf.less_equal(x1, max_x) & tf.greater_equal(x1, 0)) - y0_valid = tf.to_float( - tf.less_equal(y0, max_y) & tf.greater_equal(y0, 0)) - y1_valid = tf.to_float( - tf.less_equal(y1, max_y) & tf.greater_equal(y1, 0)) - z0_valid = tf.to_float( - tf.less_equal(z0, max_z) & tf.greater_equal(z0, 0)) - z1_valid = tf.to_float( - tf.less_equal(z1, max_z) & tf.greater_equal(z1, 0)) - - w_z0_y0_x0 = tf.expand_dims(((x1_f - x) * (y1_f - y) * - (z1_f - z) * x1_valid * y1_valid * z1_valid), - 1) - w_z0_y0_x1 = tf.expand_dims(((x - x0_f) * (y1_f - y) * - (z1_f - z) * x0_valid * y1_valid * z1_valid), - 1) - w_z0_y1_x0 = tf.expand_dims(((x1_f - x) * (y - y0_f) * - (z1_f - z) * x1_valid * y0_valid * z1_valid), - 1) - w_z0_y1_x1 = tf.expand_dims(((x - x0_f) * (y - y0_f) * - (z1_f - z) * x0_valid * y0_valid * z1_valid), - 1) - w_z1_y0_x0 = tf.expand_dims(((x1_f - x) * (y1_f - y) * - (z - z0_f) * x1_valid * y1_valid * z0_valid), - 1) - w_z1_y0_x1 = tf.expand_dims(((x - x0_f) * (y1_f - y) * - (z - z0_f) * x0_valid * y1_valid * z0_valid), - 1) - w_z1_y1_x0 = tf.expand_dims(((x1_f - x) * (y - y0_f) * - (z - z0_f) * x1_valid * y0_valid * z0_valid), - 1) - w_z1_y1_x1 = tf.expand_dims(((x - x0_f) * (y - y0_f) * - (z - z0_f) * x0_valid * y0_valid * z0_valid), - 1) - - output = tf.add_n([ - w_z0_y0_x0 * i_z0_y0_x0, w_z0_y0_x1 * i_z0_y0_x1, - w_z0_y1_x0 * i_z0_y1_x0, w_z0_y1_x1 * i_z0_y1_x1, - w_z1_y0_x0 * i_z1_y0_x0, w_z1_y0_x1 * i_z1_y0_x1, - w_z1_y1_x0 * i_z1_y1_x0, w_z1_y1_x1 * i_z1_y1_x1 - ]) - return output - - def _meshgrid(depth, height, width, z_near, z_far): - with tf.variable_scope('_meshgrid'): - x_t = tf.reshape( - tf.tile(tf.linspace(-1.0, 1.0, width), [height * depth]), - [depth, height, width]) - y_t = tf.reshape( - tf.tile(tf.linspace(-1.0, 1.0, height), [width * depth]), - [depth, width, height]) - y_t = tf.transpose(y_t, [0, 2, 1]) - sample_grid = tf.tile( - tf.linspace(float(z_near), float(z_far), depth), [width * height]) - z_t = tf.reshape(sample_grid, [height, width, depth]) - z_t = tf.transpose(z_t, [2, 0, 1]) - - z_t = 1 / z_t - d_t = 1 / z_t - x_t /= z_t - y_t /= z_t - - x_t_flat = tf.reshape(x_t, (1, -1)) - y_t_flat = tf.reshape(y_t, (1, -1)) - d_t_flat = tf.reshape(d_t, (1, -1)) - - ones = tf.ones_like(x_t_flat) - grid = tf.concat([d_t_flat, y_t_flat, x_t_flat, ones], 0) - return grid - - def _transform(theta, input_dim, out_size, z_near, z_far): - with tf.variable_scope('_transform'): - num_batch = input_dim.get_shape().as_list()[0] - num_channels = input_dim.get_shape().as_list()[4] - theta = tf.reshape(theta, (-1, 4, 4)) - theta = tf.cast(theta, 'float32') - - out_depth = out_size[0] - out_height = out_size[1] - out_width = out_size[2] - grid = _meshgrid(out_depth, out_height, out_width, z_near, z_far) - grid = tf.expand_dims(grid, 0) - grid = tf.reshape(grid, [-1]) - grid = tf.tile(grid, tf.stack([num_batch])) - grid = tf.reshape(grid, tf.stack([num_batch, 4, -1])) - - # Transform A x (x_t', y_t', 1, d_t)^T -> (x_s, y_s, z_s, 1). - t_g = tf.matmul(theta, grid) - z_s = tf.slice(t_g, [0, 0, 0], [-1, 1, -1]) - y_s = tf.slice(t_g, [0, 1, 0], [-1, 1, -1]) - x_s = tf.slice(t_g, [0, 2, 0], [-1, 1, -1]) - - z_s_flat = tf.reshape(z_s, [-1]) - y_s_flat = tf.reshape(y_s, [-1]) - x_s_flat = tf.reshape(x_s, [-1]) - - input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, z_s_flat, - out_size) - - output = tf.reshape( - input_transformed, - tf.stack([num_batch, out_depth, out_height, out_width, num_channels])) - - return output - - with tf.variable_scope(name): - output = _transform(theta, voxels, out_size, z_near, z_far) - return output diff --git a/research/ptn/nets/ptn_encoder.py b/research/ptn/nets/ptn_encoder.py deleted file mode 100644 index ede556834e6ea42e9e0a266bc0d525a679924077..0000000000000000000000000000000000000000 --- a/research/ptn/nets/ptn_encoder.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Training/Pretraining encoder as used in PTN (NIPS16).""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - - -def _preprocess(images): - return images * 2 - 1 - - -def model(images, params, is_training): - """Model encoding the images into view-invariant embedding.""" - del is_training # Unused - image_size = images.get_shape().as_list()[1] - f_dim = params.f_dim - fc_dim = params.fc_dim - z_dim = params.z_dim - outputs = dict() - - images = _preprocess(images) - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)): - h0 = slim.conv2d(images, f_dim, [5, 5], stride=2, activation_fn=tf.nn.relu) - h1 = slim.conv2d(h0, f_dim * 2, [5, 5], stride=2, activation_fn=tf.nn.relu) - h2 = slim.conv2d(h1, f_dim * 4, [5, 5], stride=2, activation_fn=tf.nn.relu) - # Reshape layer - s8 = image_size // 8 - h2 = tf.reshape(h2, [-1, s8 * s8 * f_dim * 4]) - h3 = slim.fully_connected(h2, fc_dim, activation_fn=tf.nn.relu) - h4 = slim.fully_connected(h3, fc_dim, activation_fn=tf.nn.relu) - - outputs['ids'] = slim.fully_connected(h4, z_dim, activation_fn=tf.nn.relu) - outputs['poses'] = slim.fully_connected(h4, z_dim, activation_fn=tf.nn.relu) - return outputs diff --git a/research/ptn/nets/ptn_im_decoder.py b/research/ptn/nets/ptn_im_decoder.py deleted file mode 100644 index 8ee512e878d549c2d16f13f58dee8786e129e751..0000000000000000000000000000000000000000 --- a/research/ptn/nets/ptn_im_decoder.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Image/Mask decoder used while pretraining the network.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - -_FEATURE_MAP_SIZE = 8 - - -def _postprocess_im(images): - """Performs post-processing for the images returned from conv net. - - Transforms the value from [-1, 1] to [0, 1]. - """ - return (images + 1) * 0.5 - - -def model(identities, poses, params, is_training): - """Decoder model to get image and mask from latent embedding.""" - del is_training - f_dim = params.f_dim - fc_dim = params.fc_dim - - outputs = dict() - - with slim.arg_scope( - [slim.fully_connected, slim.conv2d_transpose], - weights_initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)): - # Concatenate the identity and pose units - h0 = tf.concat([identities, poses], 1) - h0 = slim.fully_connected(h0, fc_dim, activation_fn=tf.nn.relu) - h1 = slim.fully_connected(h0, fc_dim, activation_fn=tf.nn.relu) - - # Mask decoder - dec_m0 = slim.fully_connected( - h1, (_FEATURE_MAP_SIZE**2) * f_dim * 2, activation_fn=tf.nn.relu) - dec_m0 = tf.reshape( - dec_m0, [-1, _FEATURE_MAP_SIZE, _FEATURE_MAP_SIZE, f_dim * 2]) - - dec_m1 = slim.conv2d_transpose( - dec_m0, f_dim, [5, 5], stride=2, activation_fn=tf.nn.relu) - dec_m2 = slim.conv2d_transpose( - dec_m1, int(f_dim / 2), [5, 5], stride=2, activation_fn=tf.nn.relu) - dec_m3 = slim.conv2d_transpose( - dec_m2, 1, [5, 5], stride=2, activation_fn=tf.nn.sigmoid) - - # Image decoder - dec_i0 = slim.fully_connected( - h1, (_FEATURE_MAP_SIZE**2) * f_dim * 4, activation_fn=tf.nn.relu) - dec_i0 = tf.reshape( - dec_i0, [-1, _FEATURE_MAP_SIZE, _FEATURE_MAP_SIZE, f_dim * 4]) - - dec_i1 = slim.conv2d_transpose( - dec_i0, f_dim * 2, [5, 5], stride=2, activation_fn=tf.nn.relu) - dec_i2 = slim.conv2d_transpose( - dec_i1, f_dim * 2, [5, 5], stride=2, activation_fn=tf.nn.relu) - dec_i3 = slim.conv2d_transpose( - dec_i2, 3, [5, 5], stride=2, activation_fn=tf.nn.tanh) - - outputs = dict() - outputs['images'] = _postprocess_im(dec_i3) - outputs['masks'] = dec_m3 - return outputs diff --git a/research/ptn/nets/ptn_rotator.py b/research/ptn/nets/ptn_rotator.py deleted file mode 100644 index 2cc73bb8dfe2edb624d9fa56ecd95347fbf0cf3f..0000000000000000000000000000000000000000 --- a/research/ptn/nets/ptn_rotator.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Creates rotator network model. - -This model performs the out-of-plane rotations given input image and action. -The action is either no-op, rotate clockwise or rotate counter-clockwise. - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def bilinear(input_x, input_y, output_size): - """Define the bilinear transformation layer.""" - shape_x = input_x.get_shape().as_list() - shape_y = input_y.get_shape().as_list() - - weights_initializer = tf.truncated_normal_initializer(stddev=0.02, - seed=1) - biases_initializer = tf.constant_initializer(0.0) - - matrix = tf.get_variable("Matrix", [shape_x[1], shape_y[1], output_size], - tf.float32, initializer=weights_initializer) - bias = tf.get_variable("Bias", [output_size], - initializer=biases_initializer) - # Add to GraphKeys.MODEL_VARIABLES - tf.contrib.framework.add_model_variable(matrix) - tf.contrib.framework.add_model_variable(bias) - # Define the transformation - h0 = tf.matmul(input_x, tf.reshape(matrix, - [shape_x[1], shape_y[1]*output_size])) - h0 = tf.reshape(h0, [-1, shape_y[1], output_size]) - h1 = tf.tile(tf.reshape(input_y, [-1, shape_y[1], 1]), - [1, 1, output_size]) - h1 = tf.multiply(h0, h1) - return tf.reduce_sum(h1, 1) + bias - - -def model(poses, actions, params, is_training): - """Model for performing rotation.""" - del is_training # Unused - return bilinear(poses, actions, params.z_dim) diff --git a/research/ptn/nets/ptn_vox_decoder.py b/research/ptn/nets/ptn_vox_decoder.py deleted file mode 100644 index 87ea27fa2bfcffc6fd9b9292686096e06f36e6a6..0000000000000000000000000000000000000000 --- a/research/ptn/nets/ptn_vox_decoder.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Training decoder as used in PTN (NIPS16).""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - - -@tf.contrib.framework.add_arg_scope -def conv3d_transpose(inputs, - num_outputs, - kernel_size, - stride=1, - padding='SAME', - activation_fn=tf.nn.relu, - weights_initializer=tf.contrib.layers.xavier_initializer(), - biases_initializer=tf.zeros_initializer(), - reuse=None, - trainable=True, - scope=None): - """Wrapper for conv3d_transpose layer. - - This function wraps the tf.conv3d_transpose with basic non-linearity. - Tt creates a variable called `weights`, representing the kernel, - that is convoled with the input. A second varibale called `biases' - is added to the result of operation. - """ - with tf.variable_scope( - scope, 'Conv3d_transpose', [inputs], reuse=reuse): - dtype = inputs.dtype.base_dtype - kernel_d, kernel_h, kernel_w = kernel_size[0:3] - num_filters_in = inputs.get_shape()[4] - - weights_shape = [kernel_d, kernel_h, kernel_w, num_outputs, num_filters_in] - weights = tf.get_variable('weights', - shape=weights_shape, - dtype=dtype, - initializer=weights_initializer, - trainable=trainable) - tf.contrib.framework.add_model_variable(weights) - - input_shape = inputs.get_shape().as_list() - batch_size = input_shape[0] - depth = input_shape[1] - height = input_shape[2] - width = input_shape[3] - - def get_deconv_dim(dim_size, stride_size): - # Only support padding='SAME'. - if isinstance(dim_size, tf.Tensor): - dim_size = tf.multiply(dim_size, stride_size) - elif dim_size is not None: - dim_size *= stride_size - return dim_size - - out_depth = get_deconv_dim(depth, stride) - out_height = get_deconv_dim(height, stride) - out_width = get_deconv_dim(width, stride) - - out_shape = [batch_size, out_depth, out_height, out_width, num_outputs] - outputs = tf.nn.conv3d_transpose(inputs, weights, out_shape, - [1, stride, stride, stride, 1], - padding=padding) - - outputs.set_shape(out_shape) - - if biases_initializer is not None: - biases = tf.get_variable('biases', - shape=[num_outputs,], - dtype=dtype, - initializer=biases_initializer, - trainable=trainable) - tf.contrib.framework.add_model_variable(biases) - outputs = tf.nn.bias_add(outputs, biases) - - if activation_fn: - outputs = activation_fn(outputs) - return outputs - - -def model(identities, params, is_training): - """Model transforming embedding to voxels.""" - del is_training # Unused - f_dim = params.f_dim - - # Please refer to the original implementation: github.com/xcyan/nips16_PTN - # In TF replication, we use a slightly different architecture. - with slim.arg_scope( - [slim.fully_connected, conv3d_transpose], - weights_initializer=tf.truncated_normal_initializer(stddev=0.02, seed=1)): - h0 = slim.fully_connected( - identities, 4 * 4 * 4 * f_dim * 8, activation_fn=tf.nn.relu) - h1 = tf.reshape(h0, [-1, 4, 4, 4, f_dim * 8]) - h1 = conv3d_transpose( - h1, f_dim * 4, [4, 4, 4], stride=2, activation_fn=tf.nn.relu) - h2 = conv3d_transpose( - h1, int(f_dim * 3 / 2), [5, 5, 5], stride=2, activation_fn=tf.nn.relu) - h3 = conv3d_transpose( - h2, 1, [6, 6, 6], stride=2, activation_fn=tf.nn.sigmoid) - return h3 diff --git a/research/ptn/pretrain_rotator.py b/research/ptn/pretrain_rotator.py deleted file mode 100644 index 6307f2d4f6ad4341105de7e265411d10e55f61c9..0000000000000000000000000000000000000000 --- a/research/ptn/pretrain_rotator.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains training plan for the Rotator model (Pretraining in NIPS16).""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -from six.moves import xrange -import tensorflow as tf - -from tensorflow import app - -import model_rotator as model - -flags = tf.app.flags -slim = tf.contrib.slim - -flags.DEFINE_string('inp_dir', '', - 'Directory path containing the input data (tfrecords).') -flags.DEFINE_string( - 'dataset_name', 'shapenet_chair', - 'Dataset name that is to be used for training and evaluation.') -flags.DEFINE_integer('z_dim', 512, '') -flags.DEFINE_integer('a_dim', 3, '') -flags.DEFINE_integer('f_dim', 64, '') -flags.DEFINE_integer('fc_dim', 1024, '') -flags.DEFINE_integer('num_views', 24, 'Num of viewpoints in the input data.') -flags.DEFINE_integer('image_size', 64, - 'Input images dimension (pixels) - width & height.') -flags.DEFINE_integer('step_size', 1, 'Steps to take for rotation in pretraining.') -flags.DEFINE_integer('batch_size', 32, 'Batch size for training.') -flags.DEFINE_string('encoder_name', 'ptn_encoder', - 'Name of the encoder network being used.') -flags.DEFINE_string('decoder_name', 'ptn_im_decoder', - 'Name of the decoder network being used.') -flags.DEFINE_string('rotator_name', 'ptn_rotator', - 'Name of the rotator network being used.') -# Save options -flags.DEFINE_string('checkpoint_dir', '/tmp/ptn_train/', - 'Directory path for saving trained models and other data.') -flags.DEFINE_string('model_name', 'deeprotator_pretrain', - 'Name of the model used in naming the TF job. Must be different for each run.') -flags.DEFINE_string('init_model', None, - 'Checkpoint path of the model to initialize with.') -flags.DEFINE_integer('save_every', 1000, - 'Average period of steps after which we save a model.') -# Optimization -flags.DEFINE_float('image_weight', 10, 'Weighting factor for image loss.') -flags.DEFINE_float('mask_weight', 1, 'Weighting factor for mask loss.') -flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate.') -flags.DEFINE_float('weight_decay', 0.001, 'Weight decay parameter while training.') -flags.DEFINE_float('clip_gradient_norm', 0, 'Gradient clim norm, leave 0 if no gradient clipping.') -flags.DEFINE_integer('max_number_of_steps', 320000, 'Maximum number of steps for training.') -# Summary -flags.DEFINE_integer('save_summaries_secs', 15, 'Seconds interval for dumping TF summaries.') -flags.DEFINE_integer('save_interval_secs', 60 * 5, 'Seconds interval to save models.') -# Distribution -flags.DEFINE_string('master', '', 'The address of the tensorflow master if running distributed.') -flags.DEFINE_bool('sync_replicas', False, 'Whether to sync gradients between replicas for optimizer.') -flags.DEFINE_integer('worker_replicas', 1, 'Number of worker replicas (train tasks).') -flags.DEFINE_integer('backup_workers', 0, 'Number of backup workers.') -flags.DEFINE_integer('ps_tasks', 0, 'Number of ps tasks.') -flags.DEFINE_integer('task', 0, - 'Task identifier flag to be set for each task running in distributed manner. Task number 0 ' - 'will be chosen as the chief.') - -FLAGS = flags.FLAGS - - -def main(_): - train_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name, 'train') - save_image_dir = os.path.join(train_dir, 'images') - if not os.path.exists(train_dir): - os.makedirs(train_dir) - if not os.path.exists(save_image_dir): - os.makedirs(save_image_dir) - - g = tf.Graph() - with g.as_default(): - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): - global_step = slim.get_or_create_global_step() - ########## - ## data ## - ########## - train_data = model.get_inputs( - FLAGS.inp_dir, - FLAGS.dataset_name, - 'train', - FLAGS.batch_size, - FLAGS.image_size, - is_training=True) - inputs = model.preprocess(train_data, FLAGS.step_size) - ########### - ## model ## - ########### - model_fn = model.get_model_fn(FLAGS, is_training=True) - outputs = model_fn(inputs) - ########## - ## loss ## - ########## - task_loss = model.get_loss(inputs, outputs, FLAGS) - regularization_loss = model.get_regularization_loss( - ['encoder', 'rotator', 'decoder'], FLAGS) - loss = task_loss + regularization_loss - ############### - ## optimizer ## - ############### - optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) - if FLAGS.sync_replicas: - optimizer = tf.train.SyncReplicasOptimizer( - optimizer, - replicas_to_aggregate=FLAGS.workers_replicas - FLAGS.backup_workers, - total_num_replicas=FLAGS.worker_replicas) - - ############## - ## train_op ## - ############## - train_op = model.get_train_op_for_scope( - loss, optimizer, ['encoder', 'rotator', 'decoder'], FLAGS) - ########### - ## saver ## - ########### - saver = tf.train.Saver(max_to_keep=np.minimum(5, - FLAGS.worker_replicas + 1)) - - if FLAGS.task == 0: - val_data = model.get_inputs( - FLAGS.inp_dir, - FLAGS.dataset_name, - 'val', - FLAGS.batch_size, - FLAGS.image_size, - is_training=False) - val_inputs = model.preprocess(val_data, FLAGS.step_size) - # Note: don't compute loss here - reused_model_fn = model.get_model_fn( - FLAGS, is_training=False, reuse=True) - val_outputs = reused_model_fn(val_inputs) - with tf.device(tf.DeviceSpec(device_type='CPU')): - if FLAGS.step_size == 1: - vis_input_images = val_inputs['images_0'] * 255.0 - vis_output_images = val_inputs['images_1'] * 255.0 - vis_pred_images = val_outputs['images_1'] * 255.0 - vis_pred_masks = (val_outputs['masks_1'] * (-1) + 1) * 255.0 - else: - rep_times = int(np.ceil(32.0 / float(FLAGS.step_size))) - vis_list_1 = [] - vis_list_2 = [] - vis_list_3 = [] - vis_list_4 = [] - for j in xrange(rep_times): - for k in xrange(FLAGS.step_size): - vis_input_image = val_inputs['images_0'][j], - vis_output_image = val_inputs['images_%d' % (k + 1)][j] - vis_pred_image = val_outputs['images_%d' % (k + 1)][j] - vis_pred_mask = val_outputs['masks_%d' % (k + 1)][j] - vis_list_1.append(tf.expand_dims(vis_input_image, 0)) - vis_list_2.append(tf.expand_dims(vis_output_image, 0)) - vis_list_3.append(tf.expand_dims(vis_pred_image, 0)) - vis_list_4.append(tf.expand_dims(vis_pred_mask, 0)) - - vis_list_1 = tf.reshape( - tf.stack(vis_list_1), [ - rep_times * FLAGS.step_size, FLAGS.image_size, - FLAGS.image_size, 3 - ]) - vis_list_2 = tf.reshape( - tf.stack(vis_list_2), [ - rep_times * FLAGS.step_size, FLAGS.image_size, - FLAGS.image_size, 3 - ]) - vis_list_3 = tf.reshape( - tf.stack(vis_list_3), [ - rep_times * FLAGS.step_size, FLAGS.image_size, - FLAGS.image_size, 3 - ]) - vis_list_4 = tf.reshape( - tf.stack(vis_list_4), [ - rep_times * FLAGS.step_size, FLAGS.image_size, - FLAGS.image_size, 1 - ]) - - vis_input_images = vis_list_1 * 255.0 - vis_output_images = vis_list_2 * 255.0 - vis_pred_images = vis_list_3 * 255.0 - vis_pred_masks = (vis_list_4 * (-1) + 1) * 255.0 - - write_disk_op = model.write_disk_grid( - global_step=global_step, - summary_freq=FLAGS.save_every, - log_dir=save_image_dir, - input_images=vis_input_images, - output_images=vis_output_images, - pred_images=vis_pred_images, - pred_masks=vis_pred_masks) - with tf.control_dependencies([write_disk_op]): - train_op = tf.identity(train_op) - - ############# - ## init_fn ## - ############# - init_fn = model.get_init_fn(['encoder, ' 'rotator', 'decoder'], FLAGS) - - ############## - ## training ## - ############## - slim.learning.train( - train_op=train_op, - logdir=train_dir, - init_fn=init_fn, - master=FLAGS.master, - is_chief=(FLAGS.task == 0), - number_of_steps=FLAGS.max_number_of_steps, - saver=saver, - save_summaries_secs=FLAGS.save_summaries_secs, - save_interval_secs=FLAGS.save_interval_secs) - - -if __name__ == '__main__': - app.run() diff --git a/research/ptn/train_ptn.py b/research/ptn/train_ptn.py deleted file mode 100644 index 1b42245d4c2d7fc445e275aa4e933e89e6108699..0000000000000000000000000000000000000000 --- a/research/ptn/train_ptn.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains training plan for the Im2vox model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -import tensorflow as tf - -from tensorflow import app - -import model_ptn - -flags = tf.app.flags -slim = tf.contrib.slim - -flags.DEFINE_string('inp_dir', - '', - 'Directory path containing the input data (tfrecords).') -flags.DEFINE_string( - 'dataset_name', 'shapenet_chair', - 'Dataset name that is to be used for training and evaluation.') -flags.DEFINE_integer('z_dim', 512, '') -flags.DEFINE_integer('f_dim', 64, '') -flags.DEFINE_integer('fc_dim', 1024, '') -flags.DEFINE_integer('num_views', 24, 'Num of viewpoints in the input data.') -flags.DEFINE_integer('image_size', 64, - 'Input images dimension (pixels) - width & height.') -flags.DEFINE_integer('vox_size', 32, 'Voxel prediction dimension.') -flags.DEFINE_integer('step_size', 24, 'Steps to take in rotation to fetch viewpoints.') -flags.DEFINE_integer('batch_size', 6, 'Batch size while training.') -flags.DEFINE_float('focal_length', 0.866, 'Focal length parameter used in perspective projection.') -flags.DEFINE_float('focal_range', 1.732, 'Focal length parameter used in perspective projection.') -flags.DEFINE_string('encoder_name', 'ptn_encoder', - 'Name of the encoder network being used.') -flags.DEFINE_string('decoder_name', 'ptn_vox_decoder', - 'Name of the decoder network being used.') -flags.DEFINE_string('projector_name', 'perspective_projector', - 'Name of the projector network being used.') -# Save options -flags.DEFINE_string('checkpoint_dir', '/tmp/ptn_train/', - 'Directory path for saving trained models and other data.') -flags.DEFINE_string('model_name', 'ptn_finetune', - 'Name of the model used in naming the TF job. Must be different for each run.') -flags.DEFINE_string('init_model', None, - 'Checkpoint path of the model to initialize with.') -flags.DEFINE_integer('save_every', 1000, - 'Average period of steps after which we save a model.') -# Optimization -flags.DEFINE_float('proj_weight', 10, 'Weighting factor for projection loss.') -flags.DEFINE_float('volume_weight', 0, 'Weighting factor for volume loss.') -flags.DEFINE_float('viewpoint_weight', 1, 'Weighting factor for viewpoint loss.') -flags.DEFINE_float('learning_rate', 0.0001, 'Learning rate.') -flags.DEFINE_float('weight_decay', 0.001, 'Weight decay parameter while training.') -flags.DEFINE_float('clip_gradient_norm', 0, 'Gradient clim norm, leave 0 if no gradient clipping.') -flags.DEFINE_integer('max_number_of_steps', 10000, 'Maximum number of steps for training.') -# Summary -flags.DEFINE_integer('save_summaries_secs', 15, 'Seconds interval for dumping TF summaries.') -flags.DEFINE_integer('save_interval_secs', 60 * 5, 'Seconds interval to save models.') - -# Scheduling -flags.DEFINE_string('master', '', 'The address of the tensorflow master') -flags.DEFINE_bool('sync_replicas', False, 'Whether to sync gradients between replicas for optimizer.') -flags.DEFINE_integer('worker_replicas', 1, 'Number of worker replicas (train tasks).') -flags.DEFINE_integer('backup_workers', 0, 'Number of backup workers.') -flags.DEFINE_integer('ps_tasks', 0, 'Number of ps tasks.') -flags.DEFINE_integer('task', 0, - 'Task identifier flag to be set for each task running in distributed manner. Task number 0 ' - 'will be chosen as the chief.') - -FLAGS = flags.FLAGS - - -def main(_): - train_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name, 'train') - save_image_dir = os.path.join(train_dir, 'images') - if not os.path.exists(train_dir): - os.makedirs(train_dir) - if not os.path.exists(save_image_dir): - os.makedirs(save_image_dir) - - g = tf.Graph() - with g.as_default(): - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): - global_step = slim.get_or_create_global_step() - ########### - ## model ## - ########### - model = model_ptn.model_PTN(FLAGS) - ########## - ## data ## - ########## - train_data = model.get_inputs( - FLAGS.inp_dir, - FLAGS.dataset_name, - 'train', - FLAGS.batch_size, - FLAGS.image_size, - FLAGS.vox_size, - is_training=True) - inputs = model.preprocess(train_data, FLAGS.step_size) - ############## - ## model_fn ## - ############## - model_fn = model.get_model_fn( - is_training=True, reuse=False, run_projection=True) - outputs = model_fn(inputs) - ################## - ## train_scopes ## - ################## - if FLAGS.init_model: - train_scopes = ['decoder'] - init_scopes = ['encoder'] - else: - train_scopes = ['encoder', 'decoder'] - - ########## - ## loss ## - ########## - task_loss = model.get_loss(inputs, outputs) - - regularization_loss = model.get_regularization_loss(train_scopes) - loss = task_loss + regularization_loss - ############### - ## optimizer ## - ############### - optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) - if FLAGS.sync_replicas: - optimizer = tf.train.SyncReplicasOptimizer( - optimizer, - replicas_to_aggregate=FLAGS.workers_replicas - FLAGS.backup_workers, - total_num_replicas=FLAGS.worker_replicas) - - ############## - ## train_op ## - ############## - train_op = model.get_train_op_for_scope(loss, optimizer, train_scopes) - ########### - ## saver ## - ########### - saver = tf.train.Saver(max_to_keep=np.minimum(5, - FLAGS.worker_replicas + 1)) - - if FLAGS.task == 0: - params = FLAGS - params.batch_size = params.num_views - params.step_size = 1 - model.set_params(params) - val_data = model.get_inputs( - params.inp_dir, - params.dataset_name, - 'val', - params.batch_size, - params.image_size, - params.vox_size, - is_training=False) - val_inputs = model.preprocess(val_data, params.step_size) - # Note: don't compute loss here - reused_model_fn = model.get_model_fn(is_training=False, reuse=True) - val_outputs = reused_model_fn(val_inputs) - - with tf.device(tf.DeviceSpec(device_type='CPU')): - vis_input_images = val_inputs['images_1'] * 255.0 - vis_gt_projs = (val_outputs['masks_1'] * (-1) + 1) * 255.0 - vis_pred_projs = (val_outputs['projs_1'] * (-1) + 1) * 255.0 - - vis_gt_projs = tf.concat([vis_gt_projs] * 3, axis=3) - vis_pred_projs = tf.concat([vis_pred_projs] * 3, axis=3) - # rescale - new_size = [FLAGS.image_size] * 2 - vis_gt_projs = tf.image.resize_nearest_neighbor( - vis_gt_projs, new_size) - vis_pred_projs = tf.image.resize_nearest_neighbor( - vis_pred_projs, new_size) - # flip - # vis_gt_projs = utils.image_flipud(vis_gt_projs) - # vis_pred_projs = utils.image_flipud(vis_pred_projs) - # vis_gt_projs is of shape [batch, height, width, channels] - write_disk_op = model.write_disk_grid( - global_step=global_step, - log_dir=save_image_dir, - input_images=vis_input_images, - gt_projs=vis_gt_projs, - pred_projs=vis_pred_projs, - input_voxels=val_inputs['voxels'], - output_voxels=val_outputs['voxels_1']) - with tf.control_dependencies([write_disk_op]): - train_op = tf.identity(train_op) - - ############# - ## init_fn ## - ############# - if FLAGS.init_model: - init_fn = model.get_init_fn(init_scopes) - else: - init_fn = None - - ############## - ## training ## - ############## - slim.learning.train( - train_op=train_op, - logdir=train_dir, - init_fn=init_fn, - master=FLAGS.master, - is_chief=(FLAGS.task == 0), - number_of_steps=FLAGS.max_number_of_steps, - saver=saver, - save_summaries_secs=FLAGS.save_summaries_secs, - save_interval_secs=FLAGS.save_interval_secs) - - -if __name__ == '__main__': - app.run() diff --git a/research/ptn/utils.py b/research/ptn/utils.py deleted file mode 100644 index adf71731edb78740c6716d7abddfd77b557aaecd..0000000000000000000000000000000000000000 --- a/research/ptn/utils.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import StringIO -import matplotlib -matplotlib.use('Agg') -from matplotlib import pylab as p -# axes3d is being used implictly for visualization. -from mpl_toolkits.mplot3d import axes3d as p3 # pylint:disable=unused-import -import numpy as np -from PIL import Image -from skimage import measure -from six.moves import xrange - -import tensorflow as tf - - -def save_image(inp_array, image_file): - """Function that dumps the image to disk.""" - inp_array = np.clip(inp_array, 0, 255).astype(np.uint8) - image = Image.fromarray(inp_array) - buf = StringIO.StringIO() - image.save(buf, format='JPEG') - with open(image_file, 'w') as f: - f.write(buf.getvalue()) - return None - - -def image_flipud(images): - """Function that flip (up-down) the np image.""" - quantity = images.get_shape().as_list()[0] - image_list = [] - for k in xrange(quantity): - image_list.append(tf.image.flip_up_down(images[k, :, :, :])) - outputs = tf.stack(image_list) - return outputs - - -def resize_image(inp_array, new_height, new_width): - """Function that resize the np image.""" - inp_array = np.clip(inp_array, 0, 255).astype(np.uint8) - image = Image.fromarray(inp_array) - # Reverse order - image = image.resize((new_width, new_height)) - return np.array(image) - - -def display_voxel(points, vis_size=128): - """Function to display 3D voxel.""" - try: - data = visualize_voxel_spectral(points, vis_size) - except ValueError: - data = visualize_voxel_scatter(points, vis_size) - return data - - -def visualize_voxel_spectral(points, vis_size=128): - """Function to visualize voxel (spectral).""" - points = np.rint(points) - points = np.swapaxes(points, 0, 2) - fig = p.figure(figsize=(1, 1), dpi=vis_size) - verts, faces = measure.marching_cubes_classic(points, 0, spacing=(0.1, 0.1, 0.1)) - ax = fig.add_subplot(111, projection='3d') - ax.plot_trisurf( - verts[:, 0], verts[:, 1], faces, verts[:, 2], cmap='Spectral_r', lw=0.1) - ax.set_axis_off() - fig.tight_layout(pad=0) - fig.canvas.draw() - data = np.fromstring( - fig.canvas.tostring_rgb(), dtype=np.uint8, sep='').reshape( - vis_size, vis_size, 3) - p.close('all') - return data - - -def visualize_voxel_scatter(points, vis_size=128): - """Function to visualize voxel (scatter).""" - points = np.rint(points) - points = np.swapaxes(points, 0, 2) - fig = p.figure(figsize=(1, 1), dpi=vis_size) - ax = fig.add_subplot(111, projection='3d') - x = [] - y = [] - z = [] - (x_dimension, y_dimension, z_dimension) = points.shape - for i in range(x_dimension): - for j in range(y_dimension): - for k in range(z_dimension): - if points[i, j, k]: - x.append(i) - y.append(j) - z.append(k) - ax.scatter3D(x, y, z) - ax.set_axis_off() - fig.tight_layout(pad=0) - fig.canvas.draw() - data = np.fromstring( - fig.canvas.tostring_rgb(), dtype=np.uint8, sep='').reshape( - vis_size, vis_size, 3) - p.close('all') - return data diff --git a/research/qa_kg/README.md b/research/qa_kg/README.md deleted file mode 100644 index 7224ac8f8f5ff2f6c0003c5bbd9ac1717fe7addf..0000000000000000000000000000000000000000 --- a/research/qa_kg/README.md +++ /dev/null @@ -1,83 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Module networks for question answering on knowledge graph - -This code repository contains a TensorFlow model for question answering on -knowledge graph with end-to-end module networks. The original paper describing -end-to-end module networks is as follows. - -R. Hu, J. Andreas, M. Rohrbach, T. Darrell, K. Saenko, *Learning to Reason: -End-to-End Module Networks for Visual Question Answering*. in arXiv preprint -arXiv:1704.05526, 2017. ([PDF](https://arxiv.org/pdf/1704.05526.pdf)) - -``` -@article{hu2017learning, - title={Learning to Reason: End-to-End Module Networks for Visual Question Answering}, - author={Hu, Ronghang and Andreas, Jacob and Rohrbach, Marcus and Darrell, Trevor and Saenko, Kate}, - journal={arXiv preprint arXiv:1704.05526}, - year={2017} -} -``` - -The code in this repository is based on the original -[implementation](https://github.com/ronghanghu/n2nmn) for this paper. - -## Requirements - -1. Install TensorFlow 1.0.0. Follow the [official - guide](https://www.tensorflow.org/install/). Please note that newer or older - versions of TensorFlow may fail to work due to incompatibility with - TensorFlow Fold. -2. Install TensorFlow Fold. Follow the - [setup instructions](https://github.com/tensorflow/fold/blob/master/tensorflow_fold/g3doc/setup.md). - TensorFlow Fold only supports Linux platform. We have not tested - the code on other platforms. - -## Data - -1. Download the [MetaQA dataset](https://goo.gl/f3AmcY). Click the button - `MetaQA` and then click `Download` in the drop-down list. Extract the zip - file after downloading completed. Read the documents there for dataset - details. -2. Move the `MetaQA` folder to the root directory of this repository. - -## How to use this code - -We provide an experiment folder `exp_1_hop`, which applies the implemented model -to the 1-hop vanilla dataset in MetaQA. More experiment folders are coming soon. - -Currently, we provide code for training with ground truth layout, and testing -the saved model. Configurations can be modified in `config.py`. They can also be -set via command line parameters. - -To train the model: - -``` -python exp_1_hop/train_gt_layout.py -``` - -To test the saved model (need to provide the snapshot name): - -``` -python exp_1_hop/test.py --snapshot_name 00010000 -``` - -## Model introduction - -1. In this model, we store the knowledge graph in a key-value based memory. For - each knowledge graph edge (subject, relation, object), we use the (subject, - relation) as the key and the object as the value. -2. All entities and relations are embedded as fixed-dimension vectors. These - embeddings are also end-to-end learned. -3. Neural modules can separately operate on either the key side or the value - side. -4. The attention is shared between keys and corresponding values. -5. The answer output is based on the attention-weighted sum over keys or - values, depending on the output module. - -## Contact -Authors: Yuyu Zhang, Xin Pan - -Pull requests and issues: @yuyuz diff --git a/research/qa_kg/exp_1_hop/config.py b/research/qa_kg/exp_1_hop/config.py deleted file mode 100644 index 95d8cf5f512243279365ad99c9f582c945d6c8e2..0000000000000000000000000000000000000000 --- a/research/qa_kg/exp_1_hop/config.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import argparse -import os - - -def str2bool(v): - return v.lower() in ('true', '1') - - -def add_argument_group(name): - arg = parser.add_argument_group(name) - arg_lists.append(arg) - return arg - - -def get_config(): - config, unparsed = parser.parse_known_args() - return config, unparsed - - -arg_lists = [] -parser = argparse.ArgumentParser() -work_dir = os.path.abspath(os.path.join(__file__, '../../')) - -net_arg = add_argument_group('Network') -net_arg.add_argument('--lstm_dim', type=int, default=128) -net_arg.add_argument('--num_layers', type=int, default=1) -net_arg.add_argument('--embed_dim_txt', type=int, default=128) -net_arg.add_argument('--embed_dim_nmn', type=int, default=128) -net_arg.add_argument( - '--T_encoder', type=int, default=0) # will be updated when reading data -net_arg.add_argument('--T_decoder', type=int, default=5) - -train_arg = add_argument_group('Training') -train_arg.add_argument('--train_tag', type=str, default='n2nmn') -train_arg.add_argument('--batch_size', type=int, default=128) -train_arg.add_argument('--max_iter', type=int, default=1000000) -train_arg.add_argument('--weight_decay', type=float, default=1e-5) -train_arg.add_argument('--baseline_decay', type=float, default=0.99) -train_arg.add_argument('--max_grad_norm', type=float, default=10) -train_arg.add_argument('--random_seed', type=int, default=123) - -data_arg = add_argument_group('Data') -data_path = work_dir + '/MetaQA/' -data_arg.add_argument('--KB_file', type=str, default=data_path + 'kb.txt') -data_arg.add_argument( - '--data_dir', type=str, default=data_path + '1-hop/vanilla/') -data_arg.add_argument('--train_data_file', type=str, default='qa_train.txt') -data_arg.add_argument('--dev_data_file', type=str, default='qa_dev.txt') -data_arg.add_argument('--test_data_file', type=str, default='qa_test.txt') - -exp_arg = add_argument_group('Experiment') -exp_path = work_dir + '/exp_1_hop/' -exp_arg.add_argument('--exp_dir', type=str, default=exp_path) - -log_arg = add_argument_group('Log') -log_arg.add_argument('--log_dir', type=str, default='logs') -log_arg.add_argument('--log_interval', type=int, default=1000) -log_arg.add_argument('--num_log_samples', type=int, default=3) -log_arg.add_argument( - '--log_level', type=str, default='INFO', choices=['INFO', 'DEBUG', 'WARN']) - -io_arg = add_argument_group('IO') -io_arg.add_argument('--model_dir', type=str, default='model') -io_arg.add_argument('--snapshot_interval', type=int, default=1000) -io_arg.add_argument('--output_dir', type=str, default='output') diff --git a/research/qa_kg/exp_1_hop/test.py b/research/qa_kg/exp_1_hop/test.py deleted file mode 100644 index 2937c0d582a54f6de2702a4f741963e0fe2b2f72..0000000000000000000000000000000000000000 --- a/research/qa_kg/exp_1_hop/test.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import os -import sys -sys.path.append(os.path.abspath(os.path.join(__file__, '../../'))) -import numpy as np -import tensorflow as tf -from config import get_config -from model_n2nmn.assembler import Assembler -from model_n2nmn.model import Model -from util.data_reader import DataReader -from util.data_reader import SampleBuilder -from util.misc import prepare_dirs_and_logger - -FLAGS = tf.flags.FLAGS -tf.flags.DEFINE_string('snapshot_name', '00001000', 'snapshot file name') - - -def main(_): - config = prepare_dirs_and_logger(config_raw) - - rng = np.random.RandomState(config.random_seed) - tf.set_random_seed(config.random_seed) - config.rng = rng - - config.module_names = ['_key_find', '_key_filter', '_val_desc', ''] - config.gt_layout_tokens = ['_key_find', '_key_filter', '_val_desc', ''] - assembler = Assembler(config) - - sample_builder = SampleBuilder(config) - config = sample_builder.config # update T_encoder according to data - data_test = sample_builder.data_all['test'] - data_reader_test = DataReader( - config, data_test, assembler, shuffle=False, one_pass=True) - - num_vocab_txt = len(sample_builder.dict_all) - num_vocab_nmn = len(assembler.module_names) - num_choices = len(sample_builder.dict_all) - - # Network inputs - text_seq_batch = tf.placeholder(tf.int32, [None, None]) - seq_len_batch = tf.placeholder(tf.int32, [None]) - - # The model - model = Model( - config, - sample_builder.kb, - text_seq_batch, - seq_len_batch, - num_vocab_txt=num_vocab_txt, - num_vocab_nmn=num_vocab_nmn, - EOS_idx=assembler.EOS_idx, - num_choices=num_choices, - decoder_sampling=False) - compiler = model.compiler - scores = model.scores - - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - snapshot_file = os.path.join(config.model_dir, FLAGS.snapshot_name) - tf.logging.info('Snapshot file: %s' % snapshot_file) - - snapshot_saver = tf.train.Saver() - snapshot_saver.restore(sess, snapshot_file) - - # Evaluation metrics - num_questions = len(data_test.Y) - tf.logging.info('# of test questions: %d' % num_questions) - - answer_correct = 0 - layout_correct = 0 - layout_valid = 0 - for batch in data_reader_test.batches(): - # set up input and output tensors - h = sess.partial_run_setup( - fetches=[model.predicted_tokens, scores], - feeds=[text_seq_batch, seq_len_batch, compiler.loom_input_tensor]) - - # Part 1: Generate module layout - tokens = sess.partial_run( - h, - fetches=model.predicted_tokens, - feed_dict={ - text_seq_batch: batch['input_seq_batch'], - seq_len_batch: batch['seq_len_batch'] - }) - - # Compute accuracy of the predicted layout - gt_tokens = batch['gt_layout_batch'] - layout_correct += np.sum( - np.all( - np.logical_or(tokens == gt_tokens, gt_tokens == assembler.EOS_idx), - axis=0)) - - # Assemble the layout tokens into network structure - expr_list, expr_validity_array = assembler.assemble(tokens) - layout_valid += np.sum(expr_validity_array) - labels = batch['ans_label_batch'] - # Build TensorFlow Fold input for NMN - expr_feed = compiler.build_feed_dict(expr_list) - - # Part 2: Run NMN and learning steps - scores_val = sess.partial_run(h, scores, feed_dict=expr_feed) - - # Compute accuracy - predictions = np.argmax(scores_val, axis=1) - answer_correct += np.sum( - np.logical_and(expr_validity_array, predictions == labels)) - - answer_accuracy = answer_correct * 1.0 / num_questions - layout_accuracy = layout_correct * 1.0 / num_questions - layout_validity = layout_valid * 1.0 / num_questions - - tf.logging.info('test answer accuracy = %f, ' - 'test layout accuracy = %f, ' - 'test layout validity = %f' % - (answer_accuracy, layout_accuracy, layout_validity)) - - -if __name__ == '__main__': - config_raw, unparsed = get_config() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/research/qa_kg/exp_1_hop/train_gt_layout.py b/research/qa_kg/exp_1_hop/train_gt_layout.py deleted file mode 100644 index 02bafc428afc4d1f2b39d6bd56e6098d4b1b8ca7..0000000000000000000000000000000000000000 --- a/research/qa_kg/exp_1_hop/train_gt_layout.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import os -import sys -sys.path.append(os.path.abspath(os.path.join(__file__, '../../'))) -import numpy as np -import tensorflow as tf -from config import get_config -from model_n2nmn.assembler import Assembler -from model_n2nmn.model import Model -from util.data_reader import DataReader -from util.data_reader import SampleBuilder -from util.misc import prepare_dirs_and_logger -from util.misc import save_config -from util.misc import show_all_variables - - -def main(_): - config = prepare_dirs_and_logger(config_raw) - save_config(config) - - rng = np.random.RandomState(config.random_seed) - tf.set_random_seed(config.random_seed) - config.rng = rng - - config.module_names = ['_key_find', '_key_filter', '_val_desc', ''] - config.gt_layout_tokens = ['_key_find', '_key_filter', '_val_desc', ''] - assembler = Assembler(config) - - sample_builder = SampleBuilder(config) - config = sample_builder.config # update T_encoder according to data - data_train = sample_builder.data_all['train'] - data_reader_train = DataReader( - config, data_train, assembler, shuffle=True, one_pass=False) - - num_vocab_txt = len(sample_builder.dict_all) - num_vocab_nmn = len(assembler.module_names) - num_choices = len(sample_builder.dict_all) - - # Network inputs - text_seq_batch = tf.placeholder(tf.int32, [None, None]) - seq_len_batch = tf.placeholder(tf.int32, [None]) - ans_label_batch = tf.placeholder(tf.int32, [None]) - use_gt_layout = tf.constant(True, dtype=tf.bool) - gt_layout_batch = tf.placeholder(tf.int32, [None, None]) - - # The model for training - model = Model( - config, - sample_builder.kb, - text_seq_batch, - seq_len_batch, - num_vocab_txt=num_vocab_txt, - num_vocab_nmn=num_vocab_nmn, - EOS_idx=assembler.EOS_idx, - num_choices=num_choices, - decoder_sampling=True, - use_gt_layout=use_gt_layout, - gt_layout_batch=gt_layout_batch) - compiler = model.compiler - scores = model.scores - log_seq_prob = model.log_seq_prob - - # Loss function - softmax_loss_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=scores, labels=ans_label_batch) - # The final per-sample loss, which is loss for valid expr - # and invalid_expr_loss for invalid expr - final_loss_per_sample = softmax_loss_per_sample # All exprs are valid - - avg_sample_loss = tf.reduce_mean(final_loss_per_sample) - seq_likelihood_loss = tf.reduce_mean(-log_seq_prob) - - total_training_loss = seq_likelihood_loss + avg_sample_loss - total_loss = total_training_loss + config.weight_decay * model.l2_reg - - # Train with Adam optimizer - solver = tf.train.AdamOptimizer() - gradients = solver.compute_gradients(total_loss) - - # Clip gradient by L2 norm - gradients = [(tf.clip_by_norm(g, config.max_grad_norm), v) - for g, v in gradients] - solver_op = solver.apply_gradients(gradients) - - # Training operation - with tf.control_dependencies([solver_op]): - train_step = tf.constant(0) - - # Write summary to TensorBoard - log_writer = tf.summary.FileWriter(config.log_dir, tf.get_default_graph()) - - loss_ph = tf.placeholder(tf.float32, []) - entropy_ph = tf.placeholder(tf.float32, []) - accuracy_ph = tf.placeholder(tf.float32, []) - summary_train = [ - tf.summary.scalar('avg_sample_loss', loss_ph), - tf.summary.scalar('entropy', entropy_ph), - tf.summary.scalar('avg_accuracy', accuracy_ph) - ] - log_step_train = tf.summary.merge(summary_train) - - # Training - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - snapshot_saver = tf.train.Saver(max_to_keep=None) # keep all snapshots - show_all_variables() - - avg_accuracy = 0 - accuracy_decay = 0.99 - for n_iter, batch in enumerate(data_reader_train.batches()): - if n_iter >= config.max_iter: - break - - # set up input and output tensors - h = sess.partial_run_setup( - fetches=[ - model.predicted_tokens, model.entropy_reg, scores, avg_sample_loss, - train_step - ], - feeds=[ - text_seq_batch, seq_len_batch, gt_layout_batch, - compiler.loom_input_tensor, ans_label_batch - ]) - - # Part 1: Generate module layout - tokens, entropy_reg_val = sess.partial_run( - h, - fetches=(model.predicted_tokens, model.entropy_reg), - feed_dict={ - text_seq_batch: batch['input_seq_batch'], - seq_len_batch: batch['seq_len_batch'], - gt_layout_batch: batch['gt_layout_batch'] - }) - # Assemble the layout tokens into network structure - expr_list, expr_validity_array = assembler.assemble(tokens) - # all exprs should be valid (since they are ground-truth) - assert np.all(expr_validity_array) - labels = batch['ans_label_batch'] - # Build TensorFlow Fold input for NMN - expr_feed = compiler.build_feed_dict(expr_list) - expr_feed[ans_label_batch] = labels - - # Part 2: Run NMN and learning steps - scores_val, avg_sample_loss_val, _ = sess.partial_run( - h, fetches=(scores, avg_sample_loss, train_step), feed_dict=expr_feed) - - # Compute accuracy - predictions = np.argmax(scores_val, axis=1) - accuracy = np.mean( - np.logical_and(expr_validity_array, predictions == labels)) - avg_accuracy += (1 - accuracy_decay) * (accuracy - avg_accuracy) - - # Add to TensorBoard summary - if (n_iter + 1) % config.log_interval == 0: - tf.logging.info('iter = %d\n\t' - 'loss = %f, accuracy (cur) = %f, ' - 'accuracy (avg) = %f, entropy = %f' % - (n_iter + 1, avg_sample_loss_val, accuracy, avg_accuracy, - -entropy_reg_val)) - summary = sess.run( - fetches=log_step_train, - feed_dict={ - loss_ph: avg_sample_loss_val, - entropy_ph: -entropy_reg_val, - accuracy_ph: avg_accuracy - }) - log_writer.add_summary(summary, n_iter + 1) - - # Save snapshot - if (n_iter + 1) % config.snapshot_interval == 0: - snapshot_file = os.path.join(config.model_dir, '%08d' % (n_iter + 1)) - snapshot_saver.save(sess, snapshot_file, write_meta_graph=False) - tf.logging.info('Snapshot saved to %s' % snapshot_file) - - tf.logging.info('Run finished.') - - -if __name__ == '__main__': - config_raw, unparsed = get_config() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/research/qa_kg/model_n2nmn/__init__.py b/research/qa_kg/model_n2nmn/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/qa_kg/model_n2nmn/assembler.py b/research/qa_kg/model_n2nmn/assembler.py deleted file mode 100644 index f5839f6f49d7d30774195749405d43ed014a0049..0000000000000000000000000000000000000000 --- a/research/qa_kg/model_n2nmn/assembler.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np - -# the number of attention input to each module -_module_input_num = { - '_key_find': 0, - '_key_filter': 1, - '_val_desc': 1} -_module_output_type = { - '_key_find': 'att', - '_key_filter': 'att', - '_val_desc': 'ans' -} - -INVALID_EXPR = 'INVALID_EXPR' - - -class Assembler: - - def __init__(self, config): - # read the module list, and record the index of each module and - self.module_names = config.module_names - # find the index of - for n_s in range(len(self.module_names)): - if self.module_names[n_s] == '': - self.EOS_idx = n_s - break - # build a dictionary from module name to token index - self.name2idx_dict = { - name: n_s - for n_s, name in enumerate(self.module_names) - } - - def module_list2tokens(self, module_list, max_len=None): - layout_tokens = [self.name2idx_dict[name] for name in module_list] - if max_len is not None: - if len(module_list) >= max_len: - raise ValueError('Not enough time steps to add ') - layout_tokens += [self.EOS_idx] * (max_len - len(module_list)) - return layout_tokens - - def _layout_tokens2str(self, layout_tokens): - return ' '.join([self.module_names[idx] for idx in layout_tokens]) - - def _invalid_expr(self, layout_tokens, error_str): - return { - 'module': INVALID_EXPR, - 'expr_str': self._layout_tokens2str(layout_tokens), - 'error': error_str - } - - def _assemble_layout_tokens(self, layout_tokens, batch_idx): - # Every module takes a time_idx as the index from LSTM hidden states - # (even if it doesn't need it, like _and), and different arity of - # attention inputs. The output type can be either attention or answer - # - # The final assembled expression for each instance is as follows: - # expr_type := - # {'module': '_find', 'output_type': 'att', 'time_idx': idx} - # | {'module': '_relocate', 'output_type': 'att', 'time_idx': idx, - # 'inputs_0': } - # | {'module': '_and', 'output_type': 'att', 'time_idx': idx, - # 'inputs_0': , 'inputs_1': )} - # | {'module': '_describe', 'output_type': 'ans', 'time_idx': idx, - # 'inputs_0': } - # | {'module': INVALID_EXPR, 'expr_str': '...', 'error': '...', - # 'assembly_loss': } (for invalid expressions) - # - - # A valid layout must contain . Assembly fails if it doesn't. - if not np.any(layout_tokens == self.EOS_idx): - return self._invalid_expr(layout_tokens, 'cannot find ') - - # Decoding Reverse Polish Notation with a stack - decoding_stack = [] - for t in range(len(layout_tokens)): - # decode a module/operation - module_idx = layout_tokens[t] - if module_idx == self.EOS_idx: - break - module_name = self.module_names[module_idx] - expr = { - 'module': module_name, - 'output_type': _module_output_type[module_name], - 'time_idx': t, - 'batch_idx': batch_idx - } - - input_num = _module_input_num[module_name] - # Check if there are enough input in the stack - if len(decoding_stack) < input_num: - # Invalid expression. Not enough input. - return self._invalid_expr(layout_tokens, - 'not enough input for ' + module_name) - - # Get the input from stack - for n_input in range(input_num - 1, -1, -1): - stack_top = decoding_stack.pop() - if stack_top['output_type'] != 'att': - # Invalid expression. Input must be attention - return self._invalid_expr(layout_tokens, - 'input incompatible for ' + module_name) - expr['input_%d' % n_input] = stack_top - - decoding_stack.append(expr) - - # After decoding the reverse polish expression, there should be exactly - # one expression in the stack - if len(decoding_stack) != 1: - return self._invalid_expr( - layout_tokens, - 'final stack size not equal to 1 (%d remains)' % len(decoding_stack)) - - result = decoding_stack[0] - # The result type should be answer, not attention - if result['output_type'] != 'ans': - return self._invalid_expr(layout_tokens, - 'result type must be ans, not att') - return result - - def assemble(self, layout_tokens_batch): - # layout_tokens_batch is a numpy array with shape [max_dec_len, batch_size], - # containing module tokens and , in Reverse Polish Notation. - _, batch_size = layout_tokens_batch.shape - expr_list = [ - self._assemble_layout_tokens(layout_tokens_batch[:, batch_i], batch_i) - for batch_i in range(batch_size) - ] - expr_validity = np.array( - [expr['module'] != INVALID_EXPR for expr in expr_list], np.bool) - return expr_list, expr_validity diff --git a/research/qa_kg/model_n2nmn/model.py b/research/qa_kg/model_n2nmn/model.py deleted file mode 100644 index 56896f438006ac28c82d8e92ded2d7bbf8cf3863..0000000000000000000000000000000000000000 --- a/research/qa_kg/model_n2nmn/model.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import tensorflow as tf -import tensorflow_fold as td -from model_n2nmn import netgen_att -from model_n2nmn import assembler -from model_n2nmn.modules import Modules - - -class Model: - - def __init__(self, - config, - kb, - text_seq_batch, - seq_length_batch, - num_vocab_txt, - num_vocab_nmn, - EOS_idx, - num_choices, - decoder_sampling, - use_gt_layout=None, - gt_layout_batch=None, - scope='neural_module_network', - reuse=None): - with tf.variable_scope(scope, reuse=reuse): - # Part 1: Seq2seq RNN to generate module layout tokens - - embedding_mat = tf.get_variable( - 'embedding_mat', [num_vocab_txt, config.embed_dim_txt], - initializer=tf.contrib.layers.xavier_initializer()) - - with tf.variable_scope('layout_generation'): - att_seq2seq = netgen_att.AttentionSeq2Seq( - config, text_seq_batch, seq_length_batch, num_vocab_txt, - num_vocab_nmn, EOS_idx, decoder_sampling, embedding_mat, - use_gt_layout, gt_layout_batch) - self.att_seq2seq = att_seq2seq - predicted_tokens = att_seq2seq.predicted_tokens - token_probs = att_seq2seq.token_probs - word_vecs = att_seq2seq.word_vecs - neg_entropy = att_seq2seq.neg_entropy - self.atts = att_seq2seq.atts - - self.predicted_tokens = predicted_tokens - self.token_probs = token_probs - self.word_vecs = word_vecs - self.neg_entropy = neg_entropy - - # log probability of each generated sequence - self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) - - # Part 2: Neural Module Network - with tf.variable_scope('layout_execution'): - modules = Modules(config, kb, word_vecs, num_choices, embedding_mat) - self.modules = modules - # Recursion of modules - att_shape = [len(kb)] - # Forward declaration of module recursion - att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), - td.TensorType(att_shape)) - # _key_find - case_key_find = td.Record([('time_idx', td.Scalar(dtype='int32')), - ('batch_idx', td.Scalar(dtype='int32'))]) - case_key_find = case_key_find >> td.ScopedLayer( - modules.KeyFindModule, name_or_scope='KeyFindModule') - # _key_filter - case_key_filter = td.Record([('input_0', att_expr_decl()), - ('time_idx', td.Scalar('int32')), - ('batch_idx', td.Scalar('int32'))]) - case_key_filter = case_key_filter >> td.ScopedLayer( - modules.KeyFilterModule, name_or_scope='KeyFilterModule') - recursion_cases = td.OneOf( - td.GetItem('module'), - {'_key_find': case_key_find, - '_key_filter': case_key_filter}) - att_expr_decl.resolve_to(recursion_cases) - # _val_desc: output scores for choice (for valid expressions) - predicted_scores = td.Record([('input_0', recursion_cases), - ('time_idx', td.Scalar('int32')), - ('batch_idx', td.Scalar('int32'))]) - predicted_scores = predicted_scores >> td.ScopedLayer( - modules.ValDescribeModule, name_or_scope='ValDescribeModule') - - # For invalid expressions, define a dummy answer - # so that all answers have the same form - INVALID = assembler.INVALID_EXPR - dummy_scores = td.Void() >> td.FromTensor( - np.zeros(num_choices, np.float32)) - output_scores = td.OneOf( - td.GetItem('module'), - {'_val_desc': predicted_scores, - INVALID: dummy_scores}) - - # compile and get the output scores - self.compiler = td.Compiler.create(output_scores) - self.scores = self.compiler.output_tensors[0] - - # Regularization: Entropy + L2 - self.entropy_reg = tf.reduce_mean(neg_entropy) - module_weights = [ - v for v in tf.trainable_variables() - if (scope in v.op.name and v.op.name.endswith('weights')) - ] - self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights]) diff --git a/research/qa_kg/model_n2nmn/modules.py b/research/qa_kg/model_n2nmn/modules.py deleted file mode 100644 index 8c7a7370f81a5b81d08c87136688765dce556ada..0000000000000000000000000000000000000000 --- a/research/qa_kg/model_n2nmn/modules.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf - - -class Modules: - - def __init__(self, config, kb, word_vecs, num_choices, embedding_mat): - self.config = config - - self.embedding_mat = embedding_mat - - # kb has shape [N_kb, 3] - self.kb = kb - self.embed_keys_e, self.embed_keys_r, self.embed_vals_e = self.embed_kb() - - # word_vecs has shape [T_decoder, N, D_txt] - self.word_vecs = word_vecs - self.num_choices = num_choices - - def embed_kb(self): - keys_e, keys_r, vals_e = [], [], [] - for idx_sub, idx_rel, idx_obj in self.kb: - keys_e.append(idx_sub) - keys_r.append(idx_rel) - vals_e.append(idx_obj) - embed_keys_e = tf.nn.embedding_lookup(self.embedding_mat, keys_e) - embed_keys_r = tf.nn.embedding_lookup(self.embedding_mat, keys_r) - embed_vals_e = tf.nn.embedding_lookup(self.embedding_mat, vals_e) - return embed_keys_e, embed_keys_r, embed_vals_e - - def _slice_word_vecs(self, time_idx, batch_idx): - # this callable will be wrapped into a td.Function - # In TF Fold, batch_idx and time_idx are both [N_batch, 1] tensors - # time is highest dim in word_vecs - joint_index = tf.stack([time_idx, batch_idx], axis=1) - return tf.gather_nd(self.word_vecs, joint_index) - - # All the layers are wrapped with td.ScopedLayer - def KeyFindModule(self, - time_idx, - batch_idx, - scope='KeyFindModule', - reuse=None): - # In TF Fold, batch_idx and time_idx are both [N_batch, 1] tensors - text_param = self._slice_word_vecs(time_idx, batch_idx) - - # Mapping: embed_keys_e x text_param -> att - # Input: - # embed_keys_e: [N_kb, D_txt] - # text_param: [N, D_txt] - # Output: - # att: [N, N_kb] - # - # Implementation: - # 1. Elementwise multiplication between embed_key_e and text_param - # 2. L2-normalization - with tf.variable_scope(scope, reuse=reuse): - m = tf.matmul(text_param, self.embed_keys_e, transpose_b=True) - att = tf.nn.l2_normalize(m, dim=1) - return att - - def KeyFilterModule(self, - input_0, - time_idx, - batch_idx, - scope='KeyFilterModule', - reuse=None): - att_0 = input_0 - text_param = self._slice_word_vecs(time_idx, batch_idx) - - # Mapping: and(embed_keys_r x text_param, att) -> att - # Input: - # embed_keys_r: [N_kb, D_txt] - # text_param: [N, D_txt] - # att_0: [N, N_kb] - # Output: - # att: [N, N_kb] - # - # Implementation: - # 1. Elementwise multiplication between embed_key_r and text_param - # 2. L2-normalization - # 3. Take the elementwise-min - with tf.variable_scope(scope, reuse=reuse): - m = tf.matmul(text_param, self.embed_keys_r, transpose_b=True) - att_1 = tf.nn.l2_normalize(m, dim=1) - att = tf.minimum(att_0, att_1) - return att - - def ValDescribeModule(self, - input_0, - time_idx, - batch_idx, - scope='ValDescribeModule', - reuse=None): - att = input_0 - - # Mapping: att -> answer probs - # Input: - # embed_vals_e: [N_kb, D_txt] - # att: [N, N_kb] - # embedding_mat: [self.num_choices, D_txt] - # Output: - # answer_scores: [N, self.num_choices] - # - # Implementation: - # 1. Attention-weighted sum over values - # 2. Compute cosine similarity scores between the weighted sum and - # each candidate answer - with tf.variable_scope(scope, reuse=reuse): - # weighted_sum has shape [N, D_txt] - weighted_sum = tf.matmul(att, self.embed_vals_e) - # scores has shape [N, self.num_choices] - scores = tf.matmul( - weighted_sum, - tf.nn.l2_normalize(self.embedding_mat, dim=1), - transpose_b=True) - return scores diff --git a/research/qa_kg/model_n2nmn/netgen_att.py b/research/qa_kg/model_n2nmn/netgen_att.py deleted file mode 100644 index df6509946a5457bb07f2dfdcfab44aaf67447d0f..0000000000000000000000000000000000000000 --- a/research/qa_kg/model_n2nmn/netgen_att.py +++ /dev/null @@ -1,295 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf -from util.nn import fc_layer as fc - - -def _get_lstm_cell(num_layers, lstm_dim): - cell_list = [ - tf.contrib.rnn.BasicLSTMCell(lstm_dim, state_is_tuple=True) - for _ in range(num_layers) - ] - cell = tf.contrib.rnn.MultiRNNCell(cell_list, state_is_tuple=True) - return cell - - -class AttentionSeq2Seq: - - def __init__(self, - config, - text_seq_batch, - seq_length_batch, - num_vocab_txt, - num_vocab_nmn, - EOS_token, - decoder_sampling, - embedding_mat, - use_gt_layout=None, - gt_layout_batch=None, - scope='encoder_decoder', - reuse=None): - self.T_decoder = config.T_decoder - self.encoder_num_vocab = num_vocab_txt - self.encoder_embed_dim = config.embed_dim_txt - self.decoder_num_vocab = num_vocab_nmn - self.decoder_embed_dim = config.embed_dim_nmn - self.lstm_dim = config.lstm_dim - self.num_layers = config.num_layers - self.EOS_token = EOS_token - self.decoder_sampling = decoder_sampling - self.embedding_mat = embedding_mat - - with tf.variable_scope(scope, reuse=reuse): - self._build_encoder(text_seq_batch, seq_length_batch) - self._build_decoder(use_gt_layout, gt_layout_batch) - - def _build_encoder(self, - text_seq_batch, - seq_length_batch, - scope='encoder', - reuse=None): - lstm_dim = self.lstm_dim - num_layers = self.num_layers - - with tf.variable_scope(scope, reuse=reuse): - T = tf.shape(text_seq_batch)[0] - N = tf.shape(text_seq_batch)[1] - self.T_encoder = T - self.N = N - - # text_seq has shape [T, N] and embedded_seq has shape [T, N, D] - embedded_seq = tf.nn.embedding_lookup(self.embedding_mat, text_seq_batch) - self.embedded_input_seq = embedded_seq - - # The RNN - cell = _get_lstm_cell(num_layers, lstm_dim) - - # encoder_outputs has shape [T, N, lstm_dim] - encoder_outputs, encoder_states = tf.nn.dynamic_rnn( - cell, - embedded_seq, - seq_length_batch, - dtype=tf.float32, - time_major=True, - scope='lstm') - self.encoder_outputs = encoder_outputs - self.encoder_states = encoder_states - - # transform the encoder outputs for further attention alignments - # encoder_outputs_flat has shape [T, N, lstm_dim] - encoder_h_transformed = fc( - 'encoder_h_transform', - tf.reshape(encoder_outputs, [-1, lstm_dim]), - output_dim=lstm_dim) - encoder_h_transformed = tf.reshape(encoder_h_transformed, - [T, N, lstm_dim]) - self.encoder_h_transformed = encoder_h_transformed - - # seq_not_finished is a shape [T, N, 1] tensor, - # where seq_not_finished[t, n] - # is 1 iff sequence n is not finished at time t, and 0 otherwise - seq_not_finished = tf.less( - tf.range(T)[:, tf.newaxis, tf.newaxis], - seq_length_batch[:, tf.newaxis]) - seq_not_finished = tf.cast(seq_not_finished, tf.float32) - self.seq_not_finished = seq_not_finished - - def _build_decoder(self, - use_gt_layout, - gt_layout_batch, - scope='decoder', - reuse=None): - # The main difference from before is that the decoders now takes another - # input (the attention) when computing the next step - # T_max is the maximum length of decoded sequence (including ) - # - # This function is for decoding only. It performs greedy search or sampling. - # the first input is (its embedding vector) and the subsequent inputs - # are the outputs from previous time step - # num_vocab does not include - # - # use_gt_layout is None or a bool tensor, and gt_layout_batch is a tensor - # with shape [T_max, N]. - # If use_gt_layout is not None, then when use_gt_layout is true, predict - # exactly the tokens in gt_layout_batch, regardless of actual probability. - # Otherwise, if sampling is True, sample from the token probability - # If sampling is False, do greedy decoding (beam size 1) - N = self.N - encoder_states = self.encoder_states - T_max = self.T_decoder - lstm_dim = self.lstm_dim - num_layers = self.num_layers - EOS_token = self.EOS_token - sampling = self.decoder_sampling - - with tf.variable_scope(scope, reuse=reuse): - embedding_mat = tf.get_variable( - 'embedding_mat', [self.decoder_num_vocab, self.decoder_embed_dim]) - # we use a separate embedding for , as it is only used in the - # beginning of the sequence - go_embedding = tf.get_variable('go_embedding', - [1, self.decoder_embed_dim]) - - with tf.variable_scope('att_prediction'): - v = tf.get_variable('v', [lstm_dim]) - W_a = tf.get_variable( - 'weights', [lstm_dim, lstm_dim], - initializer=tf.contrib.layers.xavier_initializer()) - b_a = tf.get_variable( - 'biases', lstm_dim, initializer=tf.constant_initializer(0.)) - - # The parameters to predict the next token - with tf.variable_scope('token_prediction'): - W_y = tf.get_variable( - 'weights', [lstm_dim * 2, self.decoder_num_vocab], - initializer=tf.contrib.layers.xavier_initializer()) - b_y = tf.get_variable( - 'biases', - self.decoder_num_vocab, - initializer=tf.constant_initializer(0.)) - - # Attentional decoding - # Loop function is called at time t BEFORE the cell execution at time t, - # and its next_input is used as the input at time t (not t+1) - # c.f. https://www.tensorflow.org/api_docs/python/tf/nn/raw_rnn - mask_range = tf.reshape( - tf.range(self.decoder_num_vocab, dtype=tf.int32), [1, -1]) - all_eos_pred = EOS_token * tf.ones([N], tf.int32) - all_one_prob = tf.ones([N], tf.float32) - all_zero_entropy = tf.zeros([N], tf.float32) - if use_gt_layout is not None: - gt_layout_mult = tf.cast(use_gt_layout, tf.int32) - pred_layout_mult = 1 - gt_layout_mult - - def loop_fn(time, cell_output, cell_state, loop_state): - if cell_output is None: # time == 0 - next_cell_state = encoder_states - next_input = tf.tile(go_embedding, [N, 1]) - else: # time > 0 - next_cell_state = cell_state - - # compute the attention map over the input sequence - # a_raw has shape [T, N, 1] - att_raw = tf.reduce_sum( - tf.tanh( - tf.nn.xw_plus_b(cell_output, W_a, b_a) + - self.encoder_h_transformed) * v, - axis=2, - keep_dims=True) - # softmax along the first dimension (T) over not finished examples - # att has shape [T, N, 1] - att = tf.nn.softmax(att_raw, dim=0) * self.seq_not_finished - att = att / tf.reduce_sum(att, axis=0, keep_dims=True) - # d has shape [N, lstm_dim] - d2 = tf.reduce_sum(att * self.encoder_outputs, axis=0) - - # token_scores has shape [N, num_vocab] - token_scores = tf.nn.xw_plus_b( - tf.concat([cell_output, d2], axis=1), W_y, b_y) - # predict the next token (behavior depending on parameters) - if sampling: - # predicted_token has shape [N] - logits = token_scores - predicted_token = tf.cast( - tf.reshape(tf.multinomial(token_scores, 1), [-1]), tf.int32) - else: - # predicted_token has shape [N] - predicted_token = tf.cast(tf.argmax(token_scores, 1), tf.int32) - if use_gt_layout is not None: - predicted_token = (gt_layout_batch[time - 1] * gt_layout_mult + - predicted_token * pred_layout_mult) - - # token_prob has shape [N], the probability of the predicted token - # although token_prob is not needed for predicting the next token - # it is needed in output (for policy gradient training) - # [N, num_vocab] - # mask has shape [N, num_vocab] - mask = tf.equal(mask_range, tf.reshape(predicted_token, [-1, 1])) - all_token_probs = tf.nn.softmax(token_scores) - token_prob = tf.reduce_sum( - all_token_probs * tf.cast(mask, tf.float32), axis=1) - neg_entropy = tf.reduce_sum( - all_token_probs * tf.log(all_token_probs), axis=1) - - # is_eos_predicted is a [N] bool tensor, indicating whether - # has already been predicted previously in each sequence - is_eos_predicted = loop_state[2] - predicted_token_old = predicted_token - # if has already been predicted, now predict with - # prob 1 - predicted_token = tf.where(is_eos_predicted, all_eos_pred, - predicted_token) - token_prob = tf.where(is_eos_predicted, all_one_prob, token_prob) - neg_entropy = tf.where(is_eos_predicted, all_zero_entropy, - neg_entropy) - is_eos_predicted = tf.logical_or(is_eos_predicted, - tf.equal(predicted_token_old, - EOS_token)) - - # the prediction is from the cell output of the last step - # timestep (t-1), feed it as input into timestep t - next_input = tf.nn.embedding_lookup(embedding_mat, predicted_token) - - elements_finished = tf.greater_equal(time, T_max) - - # loop_state is a 5-tuple, representing - # 1) the predicted_tokens - # 2) the prob of predicted_tokens - # 3) whether has already been predicted - # 4) the negative entropy of policy (accumulated across timesteps) - # 5) the attention - if loop_state is None: # time == 0 - # Write the predicted token into the output - predicted_token_array = tf.TensorArray( - dtype=tf.int32, size=T_max, infer_shape=False) - token_prob_array = tf.TensorArray( - dtype=tf.float32, size=T_max, infer_shape=False) - att_array = tf.TensorArray( - dtype=tf.float32, size=T_max, infer_shape=False) - next_loop_state = (predicted_token_array, token_prob_array, tf.zeros( - [N], dtype=tf.bool), tf.zeros([N], dtype=tf.float32), att_array) - else: # time > 0 - t_write = time - 1 - next_loop_state = ( - loop_state[0].write(t_write, predicted_token), - loop_state[1].write(t_write, token_prob), - is_eos_predicted, - loop_state[3] + neg_entropy, - loop_state[4].write(t_write, att)) - return (elements_finished, next_input, next_cell_state, cell_output, - next_loop_state) - - # The RNN - cell = _get_lstm_cell(num_layers, lstm_dim) - _, _, decodes_ta = tf.nn.raw_rnn(cell, loop_fn, scope='lstm') - predicted_tokens = decodes_ta[0].stack() - token_probs = decodes_ta[1].stack() - neg_entropy = decodes_ta[3] - # atts has shape [T_decoder, T_encoder, N, 1] - atts = decodes_ta[4].stack() - self.atts = atts - # word_vec has shape [T_decoder, N, D] - word_vecs = tf.reduce_sum(atts * self.embedded_input_seq, axis=1) - - predicted_tokens.set_shape([None, None]) - token_probs.set_shape([None, None]) - neg_entropy.set_shape([None]) - word_vecs.set_shape([None, None, self.encoder_embed_dim]) - - self.predicted_tokens = predicted_tokens - self.token_probs = token_probs - self.neg_entropy = neg_entropy - self.word_vecs = word_vecs diff --git a/research/qa_kg/util/__init__.py b/research/qa_kg/util/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/qa_kg/util/data_reader.py b/research/qa_kg/util/data_reader.py deleted file mode 100644 index 397390af6d95b350559fbd20cc55e85a12ce03c0..0000000000000000000000000000000000000000 --- a/research/qa_kg/util/data_reader.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from collections import namedtuple -try: - from queue import Queue # Python 3 -except ImportError: - from Queue import Queue # Python 2 -import re -import threading -import numpy as np -import tensorflow as tf - -Data = namedtuple('Data', ['X', 'Y', 'MultiYs', 'qid']) - - -class SampleBuilder: - - def __init__(self, config): - self.config = config - - self.kb_raw = self.read_kb() - self.data_raw = self.read_raw_data() - - # dictionary of entities, normal words, and relations - self.dict_all = self.gen_dict() - self.reverse_dict_all = dict( - zip(self.dict_all.values(), self.dict_all.keys())) - - tf.logging.info('size of dict: %d' % len(self.dict_all)) - - self.kb = self.build_kb() - self.data_all = self.build_samples() - - def read_kb(self): - kb_raw = [] - for line in open(self.config.KB_file): - sub, rel, obj = line.strip().split('|') - kb_raw.append((sub, rel, obj)) - tf.logging.info('# of KB records: %d' % len(kb_raw)) - return kb_raw - - def read_raw_data(self): - data = dict() - for name in self.config.data_files: - raw = [] - tf.logging.info( - 'Reading data file {}'.format(self.config.data_files[name])) - for line in open(self.config.data_files[name]): - question, answers = line.strip().split('\t') - question = question.replace('],', ']') # ignore ',' in the template - raw.append((question, answers)) - data[name] = raw - return data - - def build_kb(self): - tf.logging.info('Indexing KB...') - kb = [] - for sub, rel, obj in self.kb_raw: - kb.append([self.dict_all[sub], self.dict_all[rel], self.dict_all[obj]]) - return kb - - def gen_dict(self): - s = set() - for sub, rel, obj in self.kb_raw: - s.add(sub) - s.add(rel) - s.add(obj) - for name in self.data_raw: - for question, answers in self.data_raw[name]: - normal = re.split('\[[^\]]+\]', question) - for phrase in normal: - for word in phrase.split(): - s.add(word) - s = list(s) - d = {s[idx]: idx for idx in range(len(s))} - return d - - def build_samples(self): - - def map_entity_idx(text): - entities = re.findall('\[[^\]]+\]', text) - for entity in entities: - entity = entity[1:-1] - index = self.dict_all[entity] - text = text.replace('[%s]' % entity, '@%d' % index) - return text - - data_all = dict() - - for name in self.data_raw: - X, Y, MultiYs, qid = [], [], [], [] - for i, (question, answers) in enumerate(self.data_raw[name]): - qdata, labels = [], [] - question = map_entity_idx(question) - for word in question.split(): - if word[0] == '@': - qdata.append(int(word[1:])) - else: - qdata.append(self.dict_all[word]) - for answer in answers.split('|'): - labels.append(self.dict_all[answer]) - if len(qdata) > self.config.T_encoder: - self.config.T_encoder = len(qdata) - for label in labels: - X.append(qdata) - Y.append(label) - MultiYs.append(set(labels)) - qid.append(i) - data_all[name] = Data(X=X, Y=Y, MultiYs=MultiYs, qid=qid) - - return data_all - - -def _run_prefetch(prefetch_queue, batch_loader, data, shuffle, one_pass, - config): - assert len(data.X) == len(data.Y) == len(data.MultiYs) == len(data.qid) - num_samples = len(data.X) - batch_size = config.batch_size - - n_sample = 0 - fetch_order = config.rng.permutation(num_samples) - while True: - sample_ids = fetch_order[n_sample:n_sample + batch_size] - batch = batch_loader.load_one_batch(sample_ids) - prefetch_queue.put(batch, block=True) - - n_sample += len(sample_ids) - if n_sample >= num_samples: - if one_pass: - prefetch_queue.put(None, block=True) - n_sample = 0 - if shuffle: - fetch_order = config.rng.permutation(num_samples) - - -class DataReader: - def __init__(self, - config, - data, - assembler, - shuffle=True, - one_pass=False, - prefetch_num=10): - self.config = config - - self.data = data - self.assembler = assembler - self.batch_loader = BatchLoader(self.config, - self.data, self.assembler) - - self.shuffle = shuffle - self.one_pass = one_pass - self.prefetch_queue = Queue(maxsize=prefetch_num) - self.prefetch_thread = threading.Thread(target=_run_prefetch, - args=(self.prefetch_queue, - self.batch_loader, self.data, - self.shuffle, self.one_pass, - self.config)) - self.prefetch_thread.daemon = True - self.prefetch_thread.start() - - def batches(self): - while True: - if self.prefetch_queue.empty(): - tf.logging.warning('Waiting for data loading (IO is slow)...') - batch = self.prefetch_queue.get(block=True) - if batch is None: - assert self.one_pass - tf.logging.info('One pass finished!') - raise StopIteration() - yield batch - - -class BatchLoader: - def __init__(self, config, - data, assembler): - self.config = config - - self.data = data - self.assembler = assembler - - self.T_encoder = config.T_encoder - self.T_decoder = config.T_decoder - - tf.logging.info('T_encoder: %d' % self.T_encoder) - tf.logging.info('T_decoder: %d' % self.T_decoder) - tf.logging.info('batch size: %d' % self.config.batch_size) - - self.gt_layout_tokens = config.gt_layout_tokens - - def load_one_batch(self, sample_ids): - actual_batch_size = len(sample_ids) - input_seq_batch = np.zeros((self.T_encoder, actual_batch_size), np.int32) - seq_len_batch = np.zeros(actual_batch_size, np.int32) - ans_label_batch = np.zeros(actual_batch_size, np.int32) - ans_set_labels_list = [None] * actual_batch_size - question_id_list = [None] * actual_batch_size - gt_layout_batch = np.zeros((self.T_decoder, actual_batch_size), np.int32) - - for batch_i in range(actual_batch_size): - idx = sample_ids[batch_i] - seq_len = len(self.data.X[idx]) - seq_len_batch[batch_i] = seq_len - input_seq_batch[:seq_len, batch_i] = self.data.X[idx] - ans_label_batch[batch_i] = self.data.Y[idx] - ans_set_labels_list[batch_i] = self.data.MultiYs[idx] - question_id_list[batch_i] = self.data.qid[idx] - - gt_layout_batch[:, batch_i] = self.assembler.module_list2tokens( - self.gt_layout_tokens, self.T_decoder) - - batch = dict(input_seq_batch=input_seq_batch, - seq_len_batch=seq_len_batch, - ans_label_batch=ans_label_batch, - gt_layout_batch=gt_layout_batch, - ans_set_labels_list=ans_set_labels_list, - question_id_list=question_id_list) - return batch diff --git a/research/qa_kg/util/misc.py b/research/qa_kg/util/misc.py deleted file mode 100644 index 9a0199bb403709f3c04e58c17951459febdb40f4..0000000000000000000000000000000000000000 --- a/research/qa_kg/util/misc.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from datetime import datetime -import json -import logging -import os -import tensorflow as tf -import tensorflow.contrib.slim as slim - - -def prepare_dirs_and_logger(config): - formatter = logging.Formatter('%(asctime)s:%(levelname)s::%(message)s') - logger = logging.getLogger('tensorflow') - - for hdlr in logger.handlers: - logger.removeHandler(hdlr) - - handler = logging.StreamHandler() - handler.setFormatter(formatter) - - logger.addHandler(handler) - logger.setLevel(tf.logging.INFO) - - config.log_dir = os.path.join(config.exp_dir, config.log_dir, - config.train_tag) - config.model_dir = os.path.join(config.exp_dir, config.model_dir, - config.train_tag) - config.output_dir = os.path.join(config.exp_dir, config.output_dir, - config.train_tag) - - for path in [ - config.log_dir, config.model_dir, config.output_dir - ]: - if not os.path.exists(path): - os.makedirs(path) - - config.data_files = { - 'train': os.path.join(config.data_dir, config.train_data_file), - 'dev': os.path.join(config.data_dir, config.dev_data_file), - 'test': os.path.join(config.data_dir, config.test_data_file) - } - - return config - - -def get_time(): - return datetime.now().strftime('%Y-%m-%d_%H-%M-%S') - - -def show_all_variables(): - model_vars = tf.trainable_variables() - slim.model_analyzer.analyze_vars(model_vars, print_info=True) - - -def save_config(config): - param_path = os.path.join(config.model_dir, 'params.json') - - tf.logging.info('log dir: %s' % config.log_dir) - tf.logging.info('model dir: %s' % config.model_dir) - tf.logging.info('param path: %s' % param_path) - tf.logging.info('output dir: %s' % config.output_dir) - - with open(param_path, 'w') as f: - f.write(json.dumps(config.__dict__, indent=4, sort_keys=True)) diff --git a/research/qa_kg/util/nn.py b/research/qa_kg/util/nn.py deleted file mode 100644 index 38ba02b2ecac1cea51308287a48e3062ee51fa81..0000000000000000000000000000000000000000 --- a/research/qa_kg/util/nn.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf - - -def fc_layer(name, - bottom, - output_dim, - bias_term=True, - weights_initializer=None, - biases_initializer=None, - reuse=None): - # flatten bottom input - shape = bottom.get_shape().as_list() - input_dim = 1 - for d in shape[1:]: - input_dim *= d - flat_bottom = tf.reshape(bottom, [-1, input_dim]) - - # weights and biases variables - with tf.variable_scope(name, reuse=reuse): - # initialize the variables - if weights_initializer is None: - weights_initializer = tf.contrib.layers.xavier_initializer() - if bias_term and biases_initializer is None: - biases_initializer = tf.constant_initializer(0.) - - # weights has shape [input_dim, output_dim] - weights = tf.get_variable( - 'weights', [input_dim, output_dim], initializer=weights_initializer) - if bias_term: - biases = tf.get_variable( - 'biases', output_dim, initializer=biases_initializer) - if not reuse: - tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, - tf.nn.l2_loss(weights)) - - if bias_term: - fc = tf.nn.xw_plus_b(flat_bottom, weights, biases) - else: - fc = tf.matmul(flat_bottom, weights) - return fc diff --git a/research/real_nvp/README.md b/research/real_nvp/README.md deleted file mode 100644 index c20ef111eb070be94bf6c11ea15ec1ce9d2ad686..0000000000000000000000000000000000000000 --- a/research/real_nvp/README.md +++ /dev/null @@ -1,282 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Real NVP in TensorFlow - -*A Tensorflow implementation of the training procedure of* -[*Density estimation using Real NVP*](https://arxiv.org/abs/1605.08803)*, by -Laurent Dinh, Jascha Sohl-Dickstein and Samy Bengio, for Imagenet -(32x32 and 64x64), CelebA and LSUN Including the scripts to -put the datasets in `.tfrecords` format.* - -We are happy to open source the code for *Real NVP*, a novel approach to -density estimation using deep neural networks that enables tractable density -estimation and efficient one-pass inference and sampling. This model -successfully decomposes images into hierarchical features ranging from -high-level concepts to low-resolution details. Visualizations are available -[here](http://goo.gl/yco14s). - -## Installation -* python 2.7: - * python 3 support is not available yet -* pip (python package manager) - * `apt-get install python-pip` on Ubuntu - * `brew` installs pip along with python on OSX -* Install the dependencies for [LSUN](https://github.com/fyu/lsun.git) - * Install [OpenCV](http://opencv.org/) - * `pip install numpy lmdb` -* Install the python dependencies - * `pip install scipy scikit-image Pillow` -* Install the -[latest Tensorflow Pip package](https://www.tensorflow.org/get_started/os_setup.html#using-pip) -for Python 2.7 - -## Getting Started -Once you have successfully installed the dependencies, you can start by -downloading the repository: -```shell -git clone --recursive https://github.com/tensorflow/models.git -``` -Afterward, you can use the utilities in this folder prepare the datasets. - -## Preparing datasets -### CelebA -For [*CelebA*](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html), download -`img_align_celeba.zip` from the Dropbox link on this -[page](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) under the -link *Align&Cropped Images* in the *Img* directory and `list_eval_partition.txt` -under the link *Train/Val/Test Partitions* in the *Eval* directory. Then do: - -```shell -mkdir celeba -cd celeba -unzip img_align_celeba.zip -``` - -We'll format the training subset: -```shell -python2.7 ../models/real_nvp/celeba_formatting.py \ - --partition_fn list_eval_partition.txt \ - --file_out celeba_train \ - --fn_root img_align_celeba \ - --set 0 -``` - -Then the validation subset: -```shell -python2.7 ../models/real_nvp/celeba_formatting.py \ - --partition_fn list_eval_partition.txt \ - --file_out celeba_valid \ - --fn_root img_align_celeba \ - --set 1 -``` - -And finally the test subset: -```shell -python2.7 ../models/real_nvp/celeba_formatting.py \ - --partition_fn list_eval_partition.txt \ - --file_out celeba_test \ - --fn_root img_align_celeba \ - --set 2 -``` - -Afterward: -```shell -cd .. -``` - -### Small Imagenet -Downloading the [*small Imagenet*](http://image-net.org/small/download.php) -dataset is more straightforward and can be done -entirely in Shell: -```shell -mkdir small_imnet -cd small_imnet -for FILENAME in train_32x32.tar valid_32x32.tar train_64x64.tar valid_64x64.tar -do - curl -O http://image-net.org/small/$FILENAME - tar -xvf $FILENAME -done -``` - -Then, you can format the datasets as follow: -```shell -for DIRNAME in train_32x32 valid_32x32 train_64x64 valid_64x64 -do - python2.7 ../models/real_nvp/imnet_formatting.py \ - --file_out $DIRNAME \ - --fn_root $DIRNAME -done -cd .. -``` - -### LSUN -To prepare the [*LSUN*](http://lsun.cs.princeton.edu/2016/) dataset, we will -need to use the code associated: -```shell -git clone https://github.com/fyu/lsun.git -cd lsun -``` -Then we'll download the db files: -```shell -for CATEGORY in bedroom church_outdoor tower -do - python2.7 download.py -c $CATEGORY - unzip "$CATEGORY"_train_lmdb.zip - unzip "$CATEGORY"_val_lmdb.zip - python2.7 data.py export "$CATEGORY"_train_lmdb \ - --out_dir "$CATEGORY"_train --flat - python2.7 data.py export "$CATEGORY"_val_lmdb \ - --out_dir "$CATEGORY"_val --flat -done -``` - -Finally, we then format the dataset into `.tfrecords`: -```shell -for CATEGORY in bedroom church_outdoor tower -do - python2.7 ../models/real_nvp/lsun_formatting.py \ - --file_out "$CATEGORY"_train \ - --fn_root "$CATEGORY"_train - python2.7 ../models/real_nvp/lsun_formatting.py \ - --file_out "$CATEGORY"_val \ - --fn_root "$CATEGORY"_val -done -cd .. -``` - - -## Training -We'll give an example on how to train a model on the small Imagenet -dataset (32x32): -```shell -cd models/real_nvp/ -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 32 \ ---hpconfig=n_scale=4,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset imnet \ ---traindir /tmp/real_nvp_imnet32/train \ ---logdir /tmp/real_nvp_imnet32/train \ ---data_path ../../small_imnet/train_32x32_?????.tfrecords -``` -In parallel, you can run the script to generate visualization from the model: -```shell -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 32 \ ---hpconfig=n_scale=4,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset imnet \ ---traindir /tmp/real_nvp_imnet32/train \ ---logdir /tmp/real_nvp_imnet32/sample \ ---data_path ../../small_imnet/valid_32x32_?????.tfrecords \ ---mode sample -``` -Additionally, you can also run in the script to evaluate the model on the -validation set: -```shell -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 32 \ ---hpconfig=n_scale=4,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset imnet \ ---traindir /tmp/real_nvp_imnet32/train \ ---logdir /tmp/real_nvp_imnet32/eval \ ---data_path ../../small_imnet/valid_32x32_?????.tfrecords \ ---eval_set_size 50000 ---mode eval -``` -The visualizations and validation set evaluation can be seen through -[Tensorboard](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tensorboard/README.md). - -Another example would be how to run the model on LSUN (bedroom category): -```shell -# train the model -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset lsun \ ---traindir /tmp/real_nvp_church_outdoor/train \ ---logdir /tmp/real_nvp_church_outdoor/train \ ---data_path ../../lsun/church_outdoor_train_?????.tfrecords -``` - -```shell -# sample from the model -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset lsun \ ---traindir /tmp/real_nvp_church_outdoor/train \ ---logdir /tmp/real_nvp_church_outdoor/sample \ ---data_path ../../lsun/church_outdoor_val_?????.tfrecords \ ---mode sample -``` - -```shell -# evaluate the model -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset lsun \ ---traindir /tmp/real_nvp_church_outdoor/train \ ---logdir /tmp/real_nvp_church_outdoor/eval \ ---data_path ../../lsun/church_outdoor_val_?????.tfrecords \ ---eval_set_size 300 ---mode eval -``` - -Finally, we'll give the commands to run the model on the CelebA dataset: -```shell -# train the model -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset lsun \ ---traindir /tmp/real_nvp_celeba/train \ ---logdir /tmp/real_nvp_celeba/train \ ---data_path ../../celeba/celeba_train.tfrecords -``` - -```shell -# sample from the model -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset celeba \ ---traindir /tmp/real_nvp_celeba/train \ ---logdir /tmp/real_nvp_celeba/sample \ ---data_path ../../celeba/celeba_valid.tfrecords \ ---mode sample -``` - -```shell -# evaluate the model on validation set -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset celeba \ ---traindir /tmp/real_nvp_celeba/train \ ---logdir /tmp/real_nvp_celeba/eval_valid \ ---data_path ../../celeba/celeba_valid.tfrecords \ ---eval_set_size 19867 ---mode eval - -# evaluate the model on test set -python2.7 real_nvp_multiscale_dataset.py \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=32,clip_gradient=100,residual_blocks=4 \ ---dataset celeba \ ---traindir /tmp/real_nvp_celeba/train \ ---logdir /tmp/real_nvp_celeba/eval_test \ ---data_path ../../celeba/celeba_test.tfrecords \ ---eval_set_size 19962 ---mode eval -``` - -## Credits -This code was written by Laurent Dinh -([@laurent-dinh](https://github.com/laurent-dinh)) with -the help of -Jascha Sohl-Dickstein ([@Sohl-Dickstein](https://github.com/Sohl-Dickstein) -and [jaschasd@google.com](mailto:jaschasd@google.com)), -Samy Bengio, Jon Shlens, Sherry Moore and -David Andersen. diff --git a/research/real_nvp/__init__.py b/research/real_nvp/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/real_nvp/celeba_formatting.py b/research/real_nvp/celeba_formatting.py deleted file mode 100644 index e03571086d88763264d7660aa5e9db5e9074dec5..0000000000000000000000000000000000000000 --- a/research/real_nvp/celeba_formatting.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""CelebA dataset formating. - -Download img_align_celeba.zip from -http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html under the -link "Align&Cropped Images" in the "Img" directory and list_eval_partition.txt -under the link "Train/Val/Test Partitions" in the "Eval" directory. Then do: -unzip img_align_celeba.zip - -Use the script as follow: -python celeba_formatting.py \ - --partition_fn [PARTITION_FILE_PATH] \ - --file_out [OUTPUT_FILE_PATH_PREFIX] \ - --fn_root [CELEBA_FOLDER] \ - --set [SUBSET_INDEX] - -""" - -from __future__ import print_function - -import os -import os.path - -import scipy.io -import scipy.io.wavfile -import scipy.ndimage -import tensorflow as tf - - -tf.flags.DEFINE_string("file_out", "", - "Filename of the output .tfrecords file.") -tf.flags.DEFINE_string("fn_root", "", "Name of root file path.") -tf.flags.DEFINE_string("partition_fn", "", "Partition file path.") -tf.flags.DEFINE_string("set", "", "Name of subset.") - -FLAGS = tf.flags.FLAGS - - -def _int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - -def _bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def main(): - """Main converter function.""" - # Celeb A - with open(FLAGS.partition_fn, "r") as infile: - img_fn_list = infile.readlines() - img_fn_list = [elem.strip().split() for elem in img_fn_list] - img_fn_list = [elem[0] for elem in img_fn_list if elem[1] == FLAGS.set] - fn_root = FLAGS.fn_root - num_examples = len(img_fn_list) - - file_out = "%s.tfrecords" % FLAGS.file_out - writer = tf.python_io.TFRecordWriter(file_out) - for example_idx, img_fn in enumerate(img_fn_list): - if example_idx % 1000 == 0: - print(example_idx, "/", num_examples) - image_raw = scipy.ndimage.imread(os.path.join(fn_root, img_fn)) - rows = image_raw.shape[0] - cols = image_raw.shape[1] - depth = image_raw.shape[2] - image_raw = image_raw.tostring() - example = tf.train.Example( - features=tf.train.Features( - feature={ - "height": _int64_feature(rows), - "width": _int64_feature(cols), - "depth": _int64_feature(depth), - "image_raw": _bytes_feature(image_raw) - } - ) - ) - writer.write(example.SerializeToString()) - writer.close() - - -if __name__ == "__main__": - main() diff --git a/research/real_nvp/imnet_formatting.py b/research/real_nvp/imnet_formatting.py deleted file mode 100644 index 1775dd54d368b62d047d1428d4bcf79ad4a68ae0..0000000000000000000000000000000000000000 --- a/research/real_nvp/imnet_formatting.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""LSUN dataset formatting. - -Download and format the Imagenet dataset as follow: -mkdir [IMAGENET_PATH] -cd [IMAGENET_PATH] -for FILENAME in train_32x32.tar valid_32x32.tar train_64x64.tar valid_64x64.tar -do - curl -O http://image-net.org/small/$FILENAME - tar -xvf $FILENAME -done - -Then use the script as follow: -for DIRNAME in train_32x32 valid_32x32 train_64x64 valid_64x64 -do - python imnet_formatting.py \ - --file_out $DIRNAME \ - --fn_root $DIRNAME -done - -""" - -from __future__ import print_function - -import os -import os.path - -import scipy.io -import scipy.io.wavfile -import scipy.ndimage -import tensorflow as tf - - -tf.flags.DEFINE_string("file_out", "", - "Filename of the output .tfrecords file.") -tf.flags.DEFINE_string("fn_root", "", "Name of root file path.") - -FLAGS = tf.flags.FLAGS - - -def _int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - -def _bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def main(): - """Main converter function.""" - # LSUN - fn_root = FLAGS.fn_root - img_fn_list = os.listdir(fn_root) - img_fn_list = [img_fn for img_fn in img_fn_list - if img_fn.endswith('.png')] - num_examples = len(img_fn_list) - - n_examples_per_file = 10000 - for example_idx, img_fn in enumerate(img_fn_list): - if example_idx % n_examples_per_file == 0: - file_out = "%s_%05d.tfrecords" - file_out = file_out % (FLAGS.file_out, - example_idx // n_examples_per_file) - print("Writing on:", file_out) - writer = tf.python_io.TFRecordWriter(file_out) - if example_idx % 1000 == 0: - print(example_idx, "/", num_examples) - image_raw = scipy.ndimage.imread(os.path.join(fn_root, img_fn)) - rows = image_raw.shape[0] - cols = image_raw.shape[1] - depth = image_raw.shape[2] - image_raw = image_raw.astype("uint8") - image_raw = image_raw.tostring() - example = tf.train.Example( - features=tf.train.Features( - feature={ - "height": _int64_feature(rows), - "width": _int64_feature(cols), - "depth": _int64_feature(depth), - "image_raw": _bytes_feature(image_raw) - } - ) - ) - writer.write(example.SerializeToString()) - if example_idx % n_examples_per_file == (n_examples_per_file - 1): - writer.close() - writer.close() - - -if __name__ == "__main__": - main() diff --git a/research/real_nvp/lsun_formatting.py b/research/real_nvp/lsun_formatting.py deleted file mode 100644 index 13a21c5e90f86dcdea777419d1df848fcea03d45..0000000000000000000000000000000000000000 --- a/research/real_nvp/lsun_formatting.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""LSUN dataset formatting. - -Download and format the LSUN dataset as follow: -git clone https://github.com/fyu/lsun.git -cd lsun -python2.7 download.py -c [CATEGORY] - -Then unzip the downloaded .zip files before executing: -python2.7 data.py export [IMAGE_DB_PATH] --out_dir [LSUN_FOLDER] --flat - -Then use the script as follow: -python lsun_formatting.py \ - --file_out [OUTPUT_FILE_PATH_PREFIX] \ - --fn_root [LSUN_FOLDER] - -""" -from __future__ import print_function - -import os -import os.path - -import numpy -import skimage.transform -from PIL import Image -import tensorflow as tf - - -tf.flags.DEFINE_string("file_out", "", - "Filename of the output .tfrecords file.") -tf.flags.DEFINE_string("fn_root", "", "Name of root file path.") - -FLAGS = tf.flags.FLAGS - - -def _int64_feature(value): - return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) - - -def _bytes_feature(value): - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -def main(): - """Main converter function.""" - fn_root = FLAGS.fn_root - img_fn_list = os.listdir(fn_root) - img_fn_list = [img_fn for img_fn in img_fn_list - if img_fn.endswith('.webp')] - num_examples = len(img_fn_list) - - n_examples_per_file = 10000 - for example_idx, img_fn in enumerate(img_fn_list): - if example_idx % n_examples_per_file == 0: - file_out = "%s_%05d.tfrecords" - file_out = file_out % (FLAGS.file_out, - example_idx // n_examples_per_file) - print("Writing on:", file_out) - writer = tf.python_io.TFRecordWriter(file_out) - if example_idx % 1000 == 0: - print(example_idx, "/", num_examples) - image_raw = numpy.array(Image.open(os.path.join(fn_root, img_fn))) - rows = image_raw.shape[0] - cols = image_raw.shape[1] - depth = image_raw.shape[2] - downscale = min(rows / 96., cols / 96.) - image_raw = skimage.transform.pyramid_reduce(image_raw, downscale) - image_raw *= 255. - image_raw = image_raw.astype("uint8") - rows = image_raw.shape[0] - cols = image_raw.shape[1] - depth = image_raw.shape[2] - image_raw = image_raw.tostring() - example = tf.train.Example( - features=tf.train.Features( - feature={ - "height": _int64_feature(rows), - "width": _int64_feature(cols), - "depth": _int64_feature(depth), - "image_raw": _bytes_feature(image_raw) - } - ) - ) - writer.write(example.SerializeToString()) - if example_idx % n_examples_per_file == (n_examples_per_file - 1): - writer.close() - writer.close() - - -if __name__ == "__main__": - main() diff --git a/research/real_nvp/real_nvp_multiscale_dataset.py b/research/real_nvp/real_nvp_multiscale_dataset.py deleted file mode 100644 index c0e1864f1988cd983cdba14ced2462dae1b67e29..0000000000000000000000000000000000000000 --- a/research/real_nvp/real_nvp_multiscale_dataset.py +++ /dev/null @@ -1,1639 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Script for training, evaluation and sampling for Real NVP. - -$ python real_nvp_multiscale_dataset.py \ ---alsologtostderr \ ---image_size 64 \ ---hpconfig=n_scale=5,base_dim=8 \ ---dataset imnet \ ---data_path [DATA_PATH] -""" - -from __future__ import print_function - -import time -from datetime import datetime -import os - -import numpy -from six.moves import xrange -import tensorflow as tf - -from tensorflow import gfile - -from real_nvp_utils import ( - batch_norm, batch_norm_log_diff, conv_layer, - squeeze_2x2, squeeze_2x2_ordered, standard_normal_ll, - standard_normal_sample, unsqueeze_2x2, variable_on_cpu) - - -tf.flags.DEFINE_string("master", "local", - "BNS name of the TensorFlow master, or local.") - -tf.flags.DEFINE_string("logdir", "/tmp/real_nvp_multiscale", - "Directory to which writes logs.") - -tf.flags.DEFINE_string("traindir", "/tmp/real_nvp_multiscale", - "Directory to which writes logs.") - -tf.flags.DEFINE_integer("train_steps", 1000000000000000000, - "Number of steps to train for.") - -tf.flags.DEFINE_string("data_path", "", "Path to the data.") - -tf.flags.DEFINE_string("mode", "train", - "Mode of execution. Must be 'train', " - "'sample' or 'eval'.") - -tf.flags.DEFINE_string("dataset", "imnet", - "Dataset used. Must be 'imnet', " - "'celeba' or 'lsun'.") - -tf.flags.DEFINE_integer("recursion_type", 2, - "Type of the recursion.") - -tf.flags.DEFINE_integer("image_size", 64, - "Size of the input image.") - -tf.flags.DEFINE_integer("eval_set_size", 0, - "Size of evaluation dataset.") - -tf.flags.DEFINE_string( - "hpconfig", "", - "A comma separated list of hyperparameters for the model. Format is " - "hp1=value1,hp2=value2,etc. If this FLAG is set, the model will be trained " - "with the specified hyperparameters, filling in missing hyperparameters " - "from the default_values in |hyper_params|.") - -FLAGS = tf.flags.FLAGS - -class HParams(object): - """Dictionary of hyperparameters.""" - def __init__(self, **kwargs): - self.dict_ = kwargs - self.__dict__.update(self.dict_) - - def update_config(self, in_string): - """Update the dictionary with a comma separated list.""" - pairs = in_string.split(",") - pairs = [pair.split("=") for pair in pairs] - for key, val in pairs: - self.dict_[key] = type(self.dict_[key])(val) - self.__dict__.update(self.dict_) - return self - - def __getitem__(self, key): - return self.dict_[key] - - def __setitem__(self, key, val): - self.dict_[key] = val - self.__dict__.update(self.dict_) - - -def get_default_hparams(): - """Get the default hyperparameters.""" - return HParams( - batch_size=64, - residual_blocks=2, - n_couplings=2, - n_scale=4, - learning_rate=0.001, - momentum=1e-1, - decay=1e-3, - l2_coeff=0.00005, - clip_gradient=100., - optimizer="adam", - dropout_mask=0, - base_dim=32, - bottleneck=0, - use_batch_norm=1, - alternate=1, - use_aff=1, - skip=1, - data_constraint=.9, - n_opt=0) - - -# RESNET UTILS -def residual_block(input_, dim, name, use_batch_norm=True, - train=True, weight_norm=True, bottleneck=False): - """Residual convolutional block.""" - with tf.variable_scope(name): - res = input_ - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, name="bn_in", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - if bottleneck: - res = conv_layer( - input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim, - name="h_0", stddev=numpy.sqrt(2. / (dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=(not use_batch_norm), - weight_norm=weight_norm, scale=False) - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, - name="bn_0", scale=False, train=train, - epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - res = conv_layer( - input_=res, filter_size=[3, 3], dim_in=dim, - dim_out=dim, name="h_1", stddev=numpy.sqrt(2. / (1. * dim)), - strides=[1, 1, 1, 1], padding="SAME", nonlinearity=None, - bias=(not use_batch_norm), - weight_norm=weight_norm, scale=False) - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, name="bn_1", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - res = conv_layer( - input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim, - name="out", stddev=numpy.sqrt(2. / (1. * dim)), - strides=[1, 1, 1, 1], padding="SAME", nonlinearity=None, - bias=True, weight_norm=weight_norm, scale=True) - else: - res = conv_layer( - input_=res, filter_size=[3, 3], dim_in=dim, dim_out=dim, - name="h_0", stddev=numpy.sqrt(2. / (dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=(not use_batch_norm), - weight_norm=weight_norm, scale=False) - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, name="bn_0", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - res = conv_layer( - input_=res, filter_size=[3, 3], dim_in=dim, dim_out=dim, - name="out", stddev=numpy.sqrt(2. / (1. * dim)), - strides=[1, 1, 1, 1], padding="SAME", nonlinearity=None, - bias=True, weight_norm=weight_norm, scale=True) - res += input_ - - return res - - -def resnet(input_, dim_in, dim, dim_out, name, use_batch_norm=True, - train=True, weight_norm=True, residual_blocks=5, - bottleneck=False, skip=True): - """Residual convolutional network.""" - with tf.variable_scope(name): - res = input_ - if residual_blocks != 0: - res = conv_layer( - input_=res, filter_size=[3, 3], dim_in=dim_in, dim_out=dim, - name="h_in", stddev=numpy.sqrt(2. / (dim_in)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=True, - weight_norm=weight_norm, scale=False) - if skip: - out = conv_layer( - input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim, - name="skip_in", stddev=numpy.sqrt(2. / (dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=True, - weight_norm=weight_norm, scale=True) - - # residual blocks - for idx_block in xrange(residual_blocks): - res = residual_block(res, dim, "block_%d" % idx_block, - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - bottleneck=bottleneck) - if skip: - out += conv_layer( - input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim, - name="skip_%d" % idx_block, stddev=numpy.sqrt(2. / (dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=True, - weight_norm=weight_norm, scale=True) - # outputs - if skip: - res = out - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, name="bn_pre_out", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - res = conv_layer( - input_=res, filter_size=[1, 1], dim_in=dim, - dim_out=dim_out, - name="out", stddev=numpy.sqrt(2. / (1. * dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=True, - weight_norm=weight_norm, scale=True) - else: - if bottleneck: - res = conv_layer( - input_=res, filter_size=[1, 1], dim_in=dim_in, dim_out=dim, - name="h_0", stddev=numpy.sqrt(2. / (dim_in)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=(not use_batch_norm), - weight_norm=weight_norm, scale=False) - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, name="bn_0", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - res = conv_layer( - input_=res, filter_size=[3, 3], dim_in=dim, - dim_out=dim, name="h_1", stddev=numpy.sqrt(2. / (1. * dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, - bias=(not use_batch_norm), - weight_norm=weight_norm, scale=False) - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, name="bn_1", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - res = conv_layer( - input_=res, filter_size=[1, 1], dim_in=dim, dim_out=dim_out, - name="out", stddev=numpy.sqrt(2. / (1. * dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=True, - weight_norm=weight_norm, scale=True) - else: - res = conv_layer( - input_=res, filter_size=[3, 3], dim_in=dim_in, dim_out=dim, - name="h_0", stddev=numpy.sqrt(2. / (dim_in)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=(not use_batch_norm), - weight_norm=weight_norm, scale=False) - if use_batch_norm: - res = batch_norm( - input_=res, dim=dim, name="bn_0", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.nn.relu(res) - res = conv_layer( - input_=res, filter_size=[3, 3], dim_in=dim, dim_out=dim_out, - name="out", stddev=numpy.sqrt(2. / (1. * dim)), - strides=[1, 1, 1, 1], padding="SAME", - nonlinearity=None, bias=True, - weight_norm=weight_norm, scale=True) - return res - - -# COUPLING LAYERS -# masked convolution implementations -def masked_conv_aff_coupling(input_, mask_in, dim, name, - use_batch_norm=True, train=True, weight_norm=True, - reverse=False, residual_blocks=5, - bottleneck=False, use_width=1., use_height=1., - mask_channel=0., skip=True): - """Affine coupling with masked convolution.""" - with tf.variable_scope(name) as scope: - if reverse or (not train): - scope.reuse_variables() - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - - # build mask - mask = use_width * numpy.arange(width) - mask = use_height * numpy.arange(height).reshape((-1, 1)) + mask - mask = mask.astype("float32") - mask = tf.mod(mask_in + mask, 2) - mask = tf.reshape(mask, [-1, height, width, 1]) - if mask.get_shape().as_list()[0] == 1: - mask = tf.tile(mask, [batch_size, 1, 1, 1]) - res = input_ * tf.mod(mask_channel + mask, 2) - - # initial input - if use_batch_norm: - res = batch_norm( - input_=res, dim=channels, name="bn_in", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res *= 2. - res = tf.concat([res, -res], 3) - res = tf.concat([res, mask], 3) - dim_in = 2. * channels + 1 - res = tf.nn.relu(res) - res = resnet(input_=res, dim_in=dim_in, dim=dim, - dim_out=2 * channels, - name="resnet", use_batch_norm=use_batch_norm, - train=train, weight_norm=weight_norm, - residual_blocks=residual_blocks, - bottleneck=bottleneck, skip=skip) - mask = tf.mod(mask_channel + mask, 2) - res = tf.split(axis=3, num_or_size_splits=2, value=res) - shift, log_rescaling = res[-2], res[-1] - scale = variable_on_cpu( - "rescaling_scale", [], - tf.constant_initializer(0.)) - shift = tf.reshape( - shift, [batch_size, height, width, channels]) - log_rescaling = tf.reshape( - log_rescaling, [batch_size, height, width, channels]) - log_rescaling = scale * tf.tanh(log_rescaling) - if not use_batch_norm: - scale_shift = variable_on_cpu( - "scale_shift", [], - tf.constant_initializer(0.)) - log_rescaling += scale_shift - shift *= (1. - mask) - log_rescaling *= (1. - mask) - if reverse: - res = input_ - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res * (1. - mask), dim=channels, name="bn_out", - train=False, epsilon=1e-4, axes=[0, 1, 2]) - log_var = tf.log(var) - res *= tf.exp(.5 * log_var * (1. - mask)) - res += mean * (1. - mask) - res *= tf.exp(-log_rescaling) - res -= shift - log_diff = -log_rescaling - if use_batch_norm: - log_diff += .5 * log_var * (1. - mask) - else: - res = input_ - res += shift - res *= tf.exp(log_rescaling) - log_diff = log_rescaling - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res * (1. - mask), dim=channels, name="bn_out", - train=train, epsilon=1e-4, axes=[0, 1, 2]) - log_var = tf.log(var) - res -= mean * (1. - mask) - res *= tf.exp(-.5 * log_var * (1. - mask)) - log_diff -= .5 * log_var * (1. - mask) - - return res, log_diff - - -def masked_conv_add_coupling(input_, mask_in, dim, name, - use_batch_norm=True, train=True, weight_norm=True, - reverse=False, residual_blocks=5, - bottleneck=False, use_width=1., use_height=1., - mask_channel=0., skip=True): - """Additive coupling with masked convolution.""" - with tf.variable_scope(name) as scope: - if reverse or (not train): - scope.reuse_variables() - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - - # build mask - mask = use_width * numpy.arange(width) - mask = use_height * numpy.arange(height).reshape((-1, 1)) + mask - mask = mask.astype("float32") - mask = tf.mod(mask_in + mask, 2) - mask = tf.reshape(mask, [-1, height, width, 1]) - if mask.get_shape().as_list()[0] == 1: - mask = tf.tile(mask, [batch_size, 1, 1, 1]) - res = input_ * tf.mod(mask_channel + mask, 2) - - # initial input - if use_batch_norm: - res = batch_norm( - input_=res, dim=channels, name="bn_in", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res *= 2. - res = tf.concat([res, -res], 3) - res = tf.concat([res, mask], 3) - dim_in = 2. * channels + 1 - res = tf.nn.relu(res) - shift = resnet(input_=res, dim_in=dim_in, dim=dim, dim_out=channels, - name="resnet", use_batch_norm=use_batch_norm, - train=train, weight_norm=weight_norm, - residual_blocks=residual_blocks, - bottleneck=bottleneck, skip=skip) - mask = tf.mod(mask_channel + mask, 2) - shift *= (1. - mask) - # use_batch_norm = False - if reverse: - res = input_ - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res * (1. - mask), - dim=channels, name="bn_out", train=False, epsilon=1e-4) - log_var = tf.log(var) - res *= tf.exp(.5 * log_var * (1. - mask)) - res += mean * (1. - mask) - res -= shift - log_diff = tf.zeros_like(res) - if use_batch_norm: - log_diff += .5 * log_var * (1. - mask) - else: - res = input_ - res += shift - log_diff = tf.zeros_like(res) - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res * (1. - mask), dim=channels, - name="bn_out", train=train, epsilon=1e-4, axes=[0, 1, 2]) - log_var = tf.log(var) - res -= mean * (1. - mask) - res *= tf.exp(-.5 * log_var * (1. - mask)) - log_diff -= .5 * log_var * (1. - mask) - - return res, log_diff - - -def masked_conv_coupling(input_, mask_in, dim, name, - use_batch_norm=True, train=True, weight_norm=True, - reverse=False, residual_blocks=5, - bottleneck=False, use_aff=True, - use_width=1., use_height=1., - mask_channel=0., skip=True): - """Coupling with masked convolution.""" - if use_aff: - return masked_conv_aff_coupling( - input_=input_, mask_in=mask_in, dim=dim, name=name, - use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm, - reverse=reverse, residual_blocks=residual_blocks, - bottleneck=bottleneck, use_width=use_width, use_height=use_height, - mask_channel=mask_channel, skip=skip) - else: - return masked_conv_add_coupling( - input_=input_, mask_in=mask_in, dim=dim, name=name, - use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm, - reverse=reverse, residual_blocks=residual_blocks, - bottleneck=bottleneck, use_width=use_width, use_height=use_height, - mask_channel=mask_channel, skip=skip) - - -# channel-axis splitting implementations -def conv_ch_aff_coupling(input_, dim, name, - use_batch_norm=True, train=True, weight_norm=True, - reverse=False, residual_blocks=5, - bottleneck=False, change_bottom=True, skip=True): - """Affine coupling with channel-wise splitting.""" - with tf.variable_scope(name) as scope: - if reverse or (not train): - scope.reuse_variables() - - if change_bottom: - input_, canvas = tf.split(axis=3, num_or_size_splits=2, value=input_) - else: - canvas, input_ = tf.split(axis=3, num_or_size_splits=2, value=input_) - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - res = input_ - - # initial input - if use_batch_norm: - res = batch_norm( - input_=res, dim=channels, name="bn_in", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.concat([res, -res], 3) - dim_in = 2. * channels - res = tf.nn.relu(res) - res = resnet(input_=res, dim_in=dim_in, dim=dim, dim_out=2 * channels, - name="resnet", use_batch_norm=use_batch_norm, - train=train, weight_norm=weight_norm, - residual_blocks=residual_blocks, - bottleneck=bottleneck, skip=skip) - shift, log_rescaling = tf.split(axis=3, num_or_size_splits=2, value=res) - scale = variable_on_cpu( - "scale", [], - tf.constant_initializer(1.)) - shift = tf.reshape( - shift, [batch_size, height, width, channels]) - log_rescaling = tf.reshape( - log_rescaling, [batch_size, height, width, channels]) - log_rescaling = scale * tf.tanh(log_rescaling) - if not use_batch_norm: - scale_shift = variable_on_cpu( - "scale_shift", [], - tf.constant_initializer(0.)) - log_rescaling += scale_shift - if reverse: - res = canvas - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res, dim=channels, name="bn_out", train=False, - epsilon=1e-4, axes=[0, 1, 2]) - log_var = tf.log(var) - res *= tf.exp(.5 * log_var) - res += mean - res *= tf.exp(-log_rescaling) - res -= shift - log_diff = -log_rescaling - if use_batch_norm: - log_diff += .5 * log_var - else: - res = canvas - res += shift - res *= tf.exp(log_rescaling) - log_diff = log_rescaling - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res, dim=channels, name="bn_out", train=train, - epsilon=1e-4, axes=[0, 1, 2]) - log_var = tf.log(var) - res -= mean - res *= tf.exp(-.5 * log_var) - log_diff -= .5 * log_var - if change_bottom: - res = tf.concat([input_, res], 3) - log_diff = tf.concat([tf.zeros_like(log_diff), log_diff], 3) - else: - res = tf.concat([res, input_], 3) - log_diff = tf.concat([log_diff, tf.zeros_like(log_diff)], 3) - - return res, log_diff - - -def conv_ch_add_coupling(input_, dim, name, - use_batch_norm=True, train=True, weight_norm=True, - reverse=False, residual_blocks=5, - bottleneck=False, change_bottom=True, skip=True): - """Additive coupling with channel-wise splitting.""" - with tf.variable_scope(name) as scope: - if reverse or (not train): - scope.reuse_variables() - - if change_bottom: - input_, canvas = tf.split(axis=3, num_or_size_splits=2, value=input_) - else: - canvas, input_ = tf.split(axis=3, num_or_size_splits=2, value=input_) - shape = input_.get_shape().as_list() - channels = shape[3] - res = input_ - - # initial input - if use_batch_norm: - res = batch_norm( - input_=res, dim=channels, name="bn_in", scale=False, - train=train, epsilon=1e-4, axes=[0, 1, 2]) - res = tf.concat([res, -res], 3) - dim_in = 2. * channels - res = tf.nn.relu(res) - shift = resnet(input_=res, dim_in=dim_in, dim=dim, dim_out=channels, - name="resnet", use_batch_norm=use_batch_norm, - train=train, weight_norm=weight_norm, - residual_blocks=residual_blocks, - bottleneck=bottleneck, skip=skip) - if reverse: - res = canvas - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res, dim=channels, name="bn_out", train=False, - epsilon=1e-4, axes=[0, 1, 2]) - log_var = tf.log(var) - res *= tf.exp(.5 * log_var) - res += mean - res -= shift - log_diff = tf.zeros_like(res) - if use_batch_norm: - log_diff += .5 * log_var - else: - res = canvas - res += shift - log_diff = tf.zeros_like(res) - if use_batch_norm: - mean, var = batch_norm_log_diff( - input_=res, dim=channels, name="bn_out", train=train, - epsilon=1e-4, axes=[0, 1, 2]) - log_var = tf.log(var) - res -= mean - res *= tf.exp(-.5 * log_var) - log_diff -= .5 * log_var - if change_bottom: - res = tf.concat([input_, res], 3) - log_diff = tf.concat([tf.zeros_like(log_diff), log_diff], 3) - else: - res = tf.concat([res, input_], 3) - log_diff = tf.concat([log_diff, tf.zeros_like(log_diff)], 3) - - return res, log_diff - - -def conv_ch_coupling(input_, dim, name, - use_batch_norm=True, train=True, weight_norm=True, - reverse=False, residual_blocks=5, - bottleneck=False, use_aff=True, change_bottom=True, - skip=True): - """Coupling with channel-wise splitting.""" - if use_aff: - return conv_ch_aff_coupling( - input_=input_, dim=dim, name=name, - use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm, - reverse=reverse, residual_blocks=residual_blocks, - bottleneck=bottleneck, change_bottom=change_bottom, skip=skip) - else: - return conv_ch_add_coupling( - input_=input_, dim=dim, name=name, - use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm, - reverse=reverse, residual_blocks=residual_blocks, - bottleneck=bottleneck, change_bottom=change_bottom, skip=skip) - - -# RECURSIVE USE OF COUPLING LAYERS -def rec_masked_conv_coupling(input_, hps, scale_idx, n_scale, - use_batch_norm=True, weight_norm=True, - train=True): - """Recursion on coupling layers.""" - shape = input_.get_shape().as_list() - channels = shape[3] - residual_blocks = hps.residual_blocks - base_dim = hps.base_dim - mask = 1. - use_aff = hps.use_aff - res = input_ - skip = hps.skip - log_diff = tf.zeros_like(input_) - dim = base_dim - if FLAGS.recursion_type < 4: - dim *= 2 ** scale_idx - with tf.variable_scope("scale_%d" % scale_idx): - # initial coupling layers - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=mask, dim=dim, - name="coupling_0", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=False, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=1. - mask, dim=dim, - name="coupling_1", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=False, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=mask, dim=dim, - name="coupling_2", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=False, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=True, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - if scale_idx < (n_scale - 1): - with tf.variable_scope("scale_%d" % scale_idx): - res = squeeze_2x2(res) - log_diff = squeeze_2x2(log_diff) - res, inc_log_diff = conv_ch_coupling( - input_=res, - change_bottom=True, dim=2 * dim, - name="coupling_4", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=False, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = conv_ch_coupling( - input_=res, - change_bottom=False, dim=2 * dim, - name="coupling_5", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=False, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = conv_ch_coupling( - input_=res, - change_bottom=True, dim=2 * dim, - name="coupling_6", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=False, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=True, skip=skip) - log_diff += inc_log_diff - res = unsqueeze_2x2(res) - log_diff = unsqueeze_2x2(log_diff) - if FLAGS.recursion_type > 1: - res = squeeze_2x2_ordered(res) - log_diff = squeeze_2x2_ordered(log_diff) - if FLAGS.recursion_type > 2: - res_1 = res[:, :, :, :channels] - res_2 = res[:, :, :, channels:] - log_diff_1 = log_diff[:, :, :, :channels] - log_diff_2 = log_diff[:, :, :, channels:] - else: - res_1, res_2 = tf.split(axis=3, num_or_size_splits=2, value=res) - log_diff_1, log_diff_2 = tf.split(axis=3, num_or_size_splits=2, value=log_diff) - res_1, inc_log_diff = rec_masked_conv_coupling( - input_=res_1, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale, - use_batch_norm=use_batch_norm, weight_norm=weight_norm, - train=train) - res = tf.concat([res_1, res_2], 3) - log_diff_1 += inc_log_diff - log_diff = tf.concat([log_diff_1, log_diff_2], 3) - res = squeeze_2x2_ordered(res, reverse=True) - log_diff = squeeze_2x2_ordered(log_diff, reverse=True) - else: - res = squeeze_2x2_ordered(res) - log_diff = squeeze_2x2_ordered(log_diff) - res, inc_log_diff = rec_masked_conv_coupling( - input_=res, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale, - use_batch_norm=use_batch_norm, weight_norm=weight_norm, - train=train) - log_diff += inc_log_diff - res = squeeze_2x2_ordered(res, reverse=True) - log_diff = squeeze_2x2_ordered(log_diff, reverse=True) - else: - with tf.variable_scope("scale_%d" % scale_idx): - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=1. - mask, dim=dim, - name="coupling_3", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=False, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=True, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - return res, log_diff - - -def rec_masked_deconv_coupling(input_, hps, scale_idx, n_scale, - use_batch_norm=True, weight_norm=True, - train=True): - """Recursion on inverting coupling layers.""" - shape = input_.get_shape().as_list() - channels = shape[3] - residual_blocks = hps.residual_blocks - base_dim = hps.base_dim - mask = 1. - use_aff = hps.use_aff - res = input_ - log_diff = tf.zeros_like(input_) - skip = hps.skip - dim = base_dim - if FLAGS.recursion_type < 4: - dim *= 2 ** scale_idx - if scale_idx < (n_scale - 1): - if FLAGS.recursion_type > 1: - res = squeeze_2x2_ordered(res) - log_diff = squeeze_2x2_ordered(log_diff) - if FLAGS.recursion_type > 2: - res_1 = res[:, :, :, :channels] - res_2 = res[:, :, :, channels:] - log_diff_1 = log_diff[:, :, :, :channels] - log_diff_2 = log_diff[:, :, :, channels:] - else: - res_1, res_2 = tf.split(axis=3, num_or_size_splits=2, value=res) - log_diff_1, log_diff_2 = tf.split(axis=3, num_or_size_splits=2, value=log_diff) - res_1, log_diff_1 = rec_masked_deconv_coupling( - input_=res_1, hps=hps, - scale_idx=scale_idx + 1, n_scale=n_scale, - use_batch_norm=use_batch_norm, weight_norm=weight_norm, - train=train) - res = tf.concat([res_1, res_2], 3) - log_diff = tf.concat([log_diff_1, log_diff_2], 3) - res = squeeze_2x2_ordered(res, reverse=True) - log_diff = squeeze_2x2_ordered(log_diff, reverse=True) - else: - res = squeeze_2x2_ordered(res) - log_diff = squeeze_2x2_ordered(log_diff) - res, log_diff = rec_masked_deconv_coupling( - input_=res, hps=hps, - scale_idx=scale_idx + 1, n_scale=n_scale, - use_batch_norm=use_batch_norm, weight_norm=weight_norm, - train=train) - res = squeeze_2x2_ordered(res, reverse=True) - log_diff = squeeze_2x2_ordered(log_diff, reverse=True) - with tf.variable_scope("scale_%d" % scale_idx): - res = squeeze_2x2(res) - log_diff = squeeze_2x2(log_diff) - res, inc_log_diff = conv_ch_coupling( - input_=res, - change_bottom=True, dim=2 * dim, - name="coupling_6", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=True, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=True, skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = conv_ch_coupling( - input_=res, - change_bottom=False, dim=2 * dim, - name="coupling_5", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=True, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = conv_ch_coupling( - input_=res, - change_bottom=True, dim=2 * dim, - name="coupling_4", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=True, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, skip=skip) - log_diff += inc_log_diff - res = unsqueeze_2x2(res) - log_diff = unsqueeze_2x2(log_diff) - else: - with tf.variable_scope("scale_%d" % scale_idx): - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=1. - mask, dim=dim, - name="coupling_3", - use_batch_norm=use_batch_norm, train=train, - weight_norm=weight_norm, - reverse=True, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=True, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - - with tf.variable_scope("scale_%d" % scale_idx): - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=mask, dim=dim, - name="coupling_2", - use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm, - reverse=True, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=True, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=1. - mask, dim=dim, - name="coupling_1", - use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm, - reverse=True, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - res, inc_log_diff = masked_conv_coupling( - input_=res, - mask_in=mask, dim=dim, - name="coupling_0", - use_batch_norm=use_batch_norm, train=train, weight_norm=weight_norm, - reverse=True, residual_blocks=residual_blocks, - bottleneck=hps.bottleneck, use_aff=use_aff, - use_width=1., use_height=1., skip=skip) - log_diff += inc_log_diff - - return res, log_diff - - -# ENCODER AND DECODER IMPLEMENTATIONS -# start the recursions -def encoder(input_, hps, n_scale, use_batch_norm=True, - weight_norm=True, train=True): - """Encoding/gaussianization function.""" - res = input_ - log_diff = tf.zeros_like(input_) - res, inc_log_diff = rec_masked_conv_coupling( - input_=res, hps=hps, scale_idx=0, n_scale=n_scale, - use_batch_norm=use_batch_norm, weight_norm=weight_norm, - train=train) - log_diff += inc_log_diff - - return res, log_diff - - -def decoder(input_, hps, n_scale, use_batch_norm=True, - weight_norm=True, train=True): - """Decoding/generator function.""" - res, log_diff = rec_masked_deconv_coupling( - input_=input_, hps=hps, scale_idx=0, n_scale=n_scale, - use_batch_norm=use_batch_norm, weight_norm=weight_norm, - train=train) - - return res, log_diff - - -class RealNVP(object): - """Real NVP model.""" - - def __init__(self, hps, sampling=False): - # DATA TENSOR INSTANTIATION - device = "/cpu:0" - if FLAGS.dataset == "imnet": - with tf.device( - tf.train.replica_device_setter(0, worker_device=device)): - filename_queue = tf.train.string_input_producer( - gfile.Glob(FLAGS.data_path), num_epochs=None) - reader = tf.TFRecordReader() - _, serialized_example = reader.read(filename_queue) - features = tf.parse_single_example( - serialized_example, - features={ - "image_raw": tf.FixedLenFeature([], tf.string), - }) - image = tf.decode_raw(features["image_raw"], tf.uint8) - image.set_shape([FLAGS.image_size * FLAGS.image_size * 3]) - image = tf.cast(image, tf.float32) - if FLAGS.mode == "train": - images = tf.train.shuffle_batch( - [image], batch_size=hps.batch_size, num_threads=1, - capacity=1000 + 3 * hps.batch_size, - # Ensures a minimum amount of shuffling of examples. - min_after_dequeue=1000) - else: - images = tf.train.batch( - [image], batch_size=hps.batch_size, num_threads=1, - capacity=1000 + 3 * hps.batch_size) - self.x_orig = x_orig = images - image_size = FLAGS.image_size - x_in = tf.reshape( - x_orig, - [hps.batch_size, FLAGS.image_size, FLAGS.image_size, 3]) - x_in = tf.clip_by_value(x_in, 0, 255) - x_in = (tf.cast(x_in, tf.float32) - + tf.random_uniform(tf.shape(x_in))) / 256. - elif FLAGS.dataset == "celeba": - with tf.device( - tf.train.replica_device_setter(0, worker_device=device)): - filename_queue = tf.train.string_input_producer( - gfile.Glob(FLAGS.data_path), num_epochs=None) - reader = tf.TFRecordReader() - _, serialized_example = reader.read(filename_queue) - features = tf.parse_single_example( - serialized_example, - features={ - "image_raw": tf.FixedLenFeature([], tf.string), - }) - image = tf.decode_raw(features["image_raw"], tf.uint8) - image.set_shape([218 * 178 * 3]) # 218, 178 - image = tf.cast(image, tf.float32) - image = tf.reshape(image, [218, 178, 3]) - image = image[40:188, 15:163, :] - if FLAGS.mode == "train": - image = tf.image.random_flip_left_right(image) - images = tf.train.shuffle_batch( - [image], batch_size=hps.batch_size, num_threads=1, - capacity=1000 + 3 * hps.batch_size, - min_after_dequeue=1000) - else: - images = tf.train.batch( - [image], batch_size=hps.batch_size, num_threads=1, - capacity=1000 + 3 * hps.batch_size) - self.x_orig = x_orig = images - image_size = 64 - x_in = tf.reshape(x_orig, [hps.batch_size, 148, 148, 3]) - x_in = tf.image.resize_images( - x_in, [64, 64], method=0, align_corners=False) - x_in = (tf.cast(x_in, tf.float32) - + tf.random_uniform(tf.shape(x_in))) / 256. - elif FLAGS.dataset == "lsun": - with tf.device( - tf.train.replica_device_setter(0, worker_device=device)): - filename_queue = tf.train.string_input_producer( - gfile.Glob(FLAGS.data_path), num_epochs=None) - reader = tf.TFRecordReader() - _, serialized_example = reader.read(filename_queue) - features = tf.parse_single_example( - serialized_example, - features={ - "image_raw": tf.FixedLenFeature([], tf.string), - "height": tf.FixedLenFeature([], tf.int64), - "width": tf.FixedLenFeature([], tf.int64), - "depth": tf.FixedLenFeature([], tf.int64) - }) - image = tf.decode_raw(features["image_raw"], tf.uint8) - height = tf.reshape((features["height"], tf.int64)[0], [1]) - height = tf.cast(height, tf.int32) - width = tf.reshape((features["width"], tf.int64)[0], [1]) - width = tf.cast(width, tf.int32) - depth = tf.reshape((features["depth"], tf.int64)[0], [1]) - depth = tf.cast(depth, tf.int32) - image = tf.reshape(image, tf.concat([height, width, depth], 0)) - image = tf.random_crop(image, [64, 64, 3]) - if FLAGS.mode == "train": - image = tf.image.random_flip_left_right(image) - images = tf.train.shuffle_batch( - [image], batch_size=hps.batch_size, num_threads=1, - capacity=1000 + 3 * hps.batch_size, - # Ensures a minimum amount of shuffling of examples. - min_after_dequeue=1000) - else: - images = tf.train.batch( - [image], batch_size=hps.batch_size, num_threads=1, - capacity=1000 + 3 * hps.batch_size) - self.x_orig = x_orig = images - image_size = 64 - x_in = tf.reshape(x_orig, [hps.batch_size, 64, 64, 3]) - x_in = (tf.cast(x_in, tf.float32) - + tf.random_uniform(tf.shape(x_in))) / 256. - else: - raise ValueError("Unknown dataset.") - x_in = tf.reshape(x_in, [hps.batch_size, image_size, image_size, 3]) - side_shown = int(numpy.sqrt(hps.batch_size)) - shown_x = tf.transpose( - tf.reshape( - x_in[:(side_shown * side_shown), :, :, :], - [side_shown, image_size * side_shown, image_size, 3]), - [0, 2, 1, 3]) - shown_x = tf.transpose( - tf.reshape( - shown_x, - [1, image_size * side_shown, image_size * side_shown, 3]), - [0, 2, 1, 3]) * 255. - tf.summary.image( - "inputs", - tf.cast(shown_x, tf.uint8), - max_outputs=1) - - # restrict the data - FLAGS.image_size = image_size - data_constraint = hps.data_constraint - pre_logit_scale = numpy.log(data_constraint) - pre_logit_scale -= numpy.log(1. - data_constraint) - pre_logit_scale = tf.cast(pre_logit_scale, tf.float32) - logit_x_in = 2. * x_in # [0, 2] - logit_x_in -= 1. # [-1, 1] - logit_x_in *= data_constraint # [-.9, .9] - logit_x_in += 1. # [.1, 1.9] - logit_x_in /= 2. # [.05, .95] - # logit the data - logit_x_in = tf.log(logit_x_in) - tf.log(1. - logit_x_in) - transform_cost = tf.reduce_sum( - tf.nn.softplus(logit_x_in) + tf.nn.softplus(-logit_x_in) - - tf.nn.softplus(-pre_logit_scale), - [1, 2, 3]) - - # INFERENCE AND COSTS - z_out, log_diff = encoder( - input_=logit_x_in, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=True) - if FLAGS.mode != "train": - z_out, log_diff = encoder( - input_=logit_x_in, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - final_shape = [image_size, image_size, 3] - prior_ll = standard_normal_ll(z_out) - prior_ll = tf.reduce_sum(prior_ll, [1, 2, 3]) - log_diff = tf.reduce_sum(log_diff, [1, 2, 3]) - log_diff += transform_cost - cost = -(prior_ll + log_diff) - - self.x_in = x_in - self.z_out = z_out - self.cost = cost = tf.reduce_mean(cost) - - l2_reg = sum( - [tf.reduce_sum(tf.square(v)) for v in tf.trainable_variables() - if ("magnitude" in v.name) or ("rescaling_scale" in v.name)]) - - bit_per_dim = ((cost + numpy.log(256.) * image_size * image_size * 3.) - / (image_size * image_size * 3. * numpy.log(2.))) - self.bit_per_dim = bit_per_dim - - # OPTIMIZATION - momentum = 1. - hps.momentum - decay = 1. - hps.decay - if hps.optimizer == "adam": - optimizer = tf.train.AdamOptimizer( - learning_rate=hps.learning_rate, - beta1=momentum, beta2=decay, epsilon=1e-08, - use_locking=False, name="Adam") - elif hps.optimizer == "rmsprop": - optimizer = tf.train.RMSPropOptimizer( - learning_rate=hps.learning_rate, decay=decay, - momentum=momentum, epsilon=1e-04, - use_locking=False, name="RMSProp") - else: - optimizer = tf.train.MomentumOptimizer(hps.learning_rate, - momentum=momentum) - - step = tf.get_variable( - "global_step", [], tf.int64, - tf.zeros_initializer(), - trainable=False) - self.step = step - grads_and_vars = optimizer.compute_gradients( - cost + hps.l2_coeff * l2_reg, - tf.trainable_variables()) - grads, vars_ = zip(*grads_and_vars) - capped_grads, gradient_norm = tf.clip_by_global_norm( - grads, clip_norm=hps.clip_gradient) - gradient_norm = tf.check_numerics(gradient_norm, - "Gradient norm is NaN or Inf.") - - l2_z = tf.reduce_sum(tf.square(z_out), [1, 2, 3]) - if not sampling: - tf.summary.scalar("negative_log_likelihood", tf.reshape(cost, [])) - tf.summary.scalar("gradient_norm", tf.reshape(gradient_norm, [])) - tf.summary.scalar("bit_per_dim", tf.reshape(bit_per_dim, [])) - tf.summary.scalar("log_diff", tf.reshape(tf.reduce_mean(log_diff), [])) - tf.summary.scalar("prior_ll", tf.reshape(tf.reduce_mean(prior_ll), [])) - tf.summary.scalar( - "log_diff_var", - tf.reshape(tf.reduce_mean(tf.square(log_diff)) - - tf.square(tf.reduce_mean(log_diff)), [])) - tf.summary.scalar( - "prior_ll_var", - tf.reshape(tf.reduce_mean(tf.square(prior_ll)) - - tf.square(tf.reduce_mean(prior_ll)), [])) - tf.summary.scalar("l2_z_mean", tf.reshape(tf.reduce_mean(l2_z), [])) - tf.summary.scalar( - "l2_z_var", - tf.reshape(tf.reduce_mean(tf.square(l2_z)) - - tf.square(tf.reduce_mean(l2_z)), [])) - - - capped_grads_and_vars = zip(capped_grads, vars_) - self.train_step = optimizer.apply_gradients( - capped_grads_and_vars, global_step=step) - - # SAMPLING AND VISUALIZATION - if sampling: - # SAMPLES - sample = standard_normal_sample([100] + final_shape) - sample, _ = decoder( - input_=sample, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=True) - sample = tf.nn.sigmoid(sample) - - sample = tf.clip_by_value(sample, 0, 1) * 255. - sample = tf.reshape(sample, [100, image_size, image_size, 3]) - sample = tf.transpose( - tf.reshape(sample, [10, image_size * 10, image_size, 3]), - [0, 2, 1, 3]) - sample = tf.transpose( - tf.reshape(sample, [1, image_size * 10, image_size * 10, 3]), - [0, 2, 1, 3]) - tf.summary.image( - "samples", - tf.cast(sample, tf.uint8), - max_outputs=1) - - # CONCATENATION - concatenation, _ = encoder( - input_=logit_x_in, hps=hps, - n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - concatenation = tf.reshape( - concatenation, - [(side_shown * side_shown), image_size, image_size, 3]) - concatenation = tf.transpose( - tf.reshape( - concatenation, - [side_shown, image_size * side_shown, image_size, 3]), - [0, 2, 1, 3]) - concatenation = tf.transpose( - tf.reshape( - concatenation, - [1, image_size * side_shown, image_size * side_shown, 3]), - [0, 2, 1, 3]) - concatenation, _ = decoder( - input_=concatenation, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - concatenation = tf.nn.sigmoid(concatenation) * 255. - tf.summary.image( - "concatenation", - tf.cast(concatenation, tf.uint8), - max_outputs=1) - - # MANIFOLD - - # Data basis - z_u, _ = encoder( - input_=logit_x_in[:8, :, :, :], hps=hps, - n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - u_1 = tf.reshape(z_u[0, :, :, :], [-1]) - u_2 = tf.reshape(z_u[1, :, :, :], [-1]) - u_3 = tf.reshape(z_u[2, :, :, :], [-1]) - u_4 = tf.reshape(z_u[3, :, :, :], [-1]) - u_5 = tf.reshape(z_u[4, :, :, :], [-1]) - u_6 = tf.reshape(z_u[5, :, :, :], [-1]) - u_7 = tf.reshape(z_u[6, :, :, :], [-1]) - u_8 = tf.reshape(z_u[7, :, :, :], [-1]) - - # 3D dome - manifold_side = 8 - angle_1 = numpy.arange(manifold_side) * 1. / manifold_side - angle_2 = numpy.arange(manifold_side) * 1. / manifold_side - angle_1 *= 2. * numpy.pi - angle_2 *= 2. * numpy.pi - angle_1 = angle_1.astype("float32") - angle_2 = angle_2.astype("float32") - angle_1 = tf.reshape(angle_1, [1, -1, 1]) - angle_1 += tf.zeros([manifold_side, manifold_side, 1]) - angle_2 = tf.reshape(angle_2, [-1, 1, 1]) - angle_2 += tf.zeros([manifold_side, manifold_side, 1]) - n_angle_3 = 40 - angle_3 = numpy.arange(n_angle_3) * 1. / n_angle_3 - angle_3 *= 2 * numpy.pi - angle_3 = angle_3.astype("float32") - angle_3 = tf.reshape(angle_3, [-1, 1, 1, 1]) - angle_3 += tf.zeros([n_angle_3, manifold_side, manifold_side, 1]) - manifold = tf.cos(angle_1) * ( - tf.cos(angle_2) * ( - tf.cos(angle_3) * u_1 + tf.sin(angle_3) * u_2) - + tf.sin(angle_2) * ( - tf.cos(angle_3) * u_3 + tf.sin(angle_3) * u_4)) - manifold += tf.sin(angle_1) * ( - tf.cos(angle_2) * ( - tf.cos(angle_3) * u_5 + tf.sin(angle_3) * u_6) - + tf.sin(angle_2) * ( - tf.cos(angle_3) * u_7 + tf.sin(angle_3) * u_8)) - manifold = tf.reshape( - manifold, - [n_angle_3 * manifold_side * manifold_side] + final_shape) - manifold, _ = decoder( - input_=manifold, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - manifold = tf.nn.sigmoid(manifold) - - manifold = tf.clip_by_value(manifold, 0, 1) * 255. - manifold = tf.reshape( - manifold, - [n_angle_3, - manifold_side * manifold_side, - image_size, - image_size, - 3]) - manifold = tf.transpose( - tf.reshape( - manifold, - [n_angle_3, manifold_side, - image_size * manifold_side, image_size, 3]), [0, 1, 3, 2, 4]) - manifold = tf.transpose( - tf.reshape( - manifold, - [n_angle_3, image_size * manifold_side, - image_size * manifold_side, 3]), - [0, 2, 1, 3]) - manifold = tf.transpose(manifold, [1, 2, 0, 3]) - manifold = tf.reshape( - manifold, - [1, image_size * manifold_side, - image_size * manifold_side, 3 * n_angle_3]) - tf.summary.image( - "manifold", - tf.cast(manifold[:, :, :, :3], tf.uint8), - max_outputs=1) - - # COMPRESSION - z_complete, _ = encoder( - input_=logit_x_in[:hps.n_scale, :, :, :], hps=hps, - n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - z_compressed_list = [z_complete] - z_noisy_list = [z_complete] - z_lost = z_complete - for scale_idx in xrange(hps.n_scale - 1): - z_lost = squeeze_2x2_ordered(z_lost) - z_lost, _ = tf.split(axis=3, num_or_size_splits=2, value=z_lost) - z_compressed = z_lost - z_noisy = z_lost - for _ in xrange(scale_idx + 1): - z_compressed = tf.concat( - [z_compressed, tf.zeros_like(z_compressed)], 3) - z_compressed = squeeze_2x2_ordered( - z_compressed, reverse=True) - z_noisy = tf.concat( - [z_noisy, tf.random_normal( - z_noisy.get_shape().as_list())], 3) - z_noisy = squeeze_2x2_ordered(z_noisy, reverse=True) - z_compressed_list.append(z_compressed) - z_noisy_list.append(z_noisy) - self.z_reduced = z_lost - z_compressed = tf.concat(z_compressed_list, 0) - z_noisy = tf.concat(z_noisy_list, 0) - noisy_images, _ = decoder( - input_=z_noisy, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - compressed_images, _ = decoder( - input_=z_compressed, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=False) - noisy_images = tf.nn.sigmoid(noisy_images) - compressed_images = tf.nn.sigmoid(compressed_images) - - noisy_images = tf.clip_by_value(noisy_images, 0, 1) * 255. - noisy_images = tf.reshape( - noisy_images, - [(hps.n_scale * hps.n_scale), image_size, image_size, 3]) - noisy_images = tf.transpose( - tf.reshape( - noisy_images, - [hps.n_scale, image_size * hps.n_scale, image_size, 3]), - [0, 2, 1, 3]) - noisy_images = tf.transpose( - tf.reshape( - noisy_images, - [1, image_size * hps.n_scale, image_size * hps.n_scale, 3]), - [0, 2, 1, 3]) - tf.summary.image( - "noise", - tf.cast(noisy_images, tf.uint8), - max_outputs=1) - compressed_images = tf.clip_by_value(compressed_images, 0, 1) * 255. - compressed_images = tf.reshape( - compressed_images, - [(hps.n_scale * hps.n_scale), image_size, image_size, 3]) - compressed_images = tf.transpose( - tf.reshape( - compressed_images, - [hps.n_scale, image_size * hps.n_scale, image_size, 3]), - [0, 2, 1, 3]) - compressed_images = tf.transpose( - tf.reshape( - compressed_images, - [1, image_size * hps.n_scale, image_size * hps.n_scale, 3]), - [0, 2, 1, 3]) - tf.summary.image( - "compression", - tf.cast(compressed_images, tf.uint8), - max_outputs=1) - - # SAMPLES x2 - final_shape[0] *= 2 - final_shape[1] *= 2 - big_sample = standard_normal_sample([25] + final_shape) - big_sample, _ = decoder( - input_=big_sample, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=True) - big_sample = tf.nn.sigmoid(big_sample) - - big_sample = tf.clip_by_value(big_sample, 0, 1) * 255. - big_sample = tf.reshape( - big_sample, - [25, image_size * 2, image_size * 2, 3]) - big_sample = tf.transpose( - tf.reshape( - big_sample, - [5, image_size * 10, image_size * 2, 3]), [0, 2, 1, 3]) - big_sample = tf.transpose( - tf.reshape( - big_sample, - [1, image_size * 10, image_size * 10, 3]), - [0, 2, 1, 3]) - tf.summary.image( - "big_sample", - tf.cast(big_sample, tf.uint8), - max_outputs=1) - - # SAMPLES x10 - final_shape[0] *= 5 - final_shape[1] *= 5 - extra_large = standard_normal_sample([1] + final_shape) - extra_large, _ = decoder( - input_=extra_large, hps=hps, n_scale=hps.n_scale, - use_batch_norm=hps.use_batch_norm, weight_norm=True, - train=True) - extra_large = tf.nn.sigmoid(extra_large) - - extra_large = tf.clip_by_value(extra_large, 0, 1) * 255. - tf.summary.image( - "extra_large", - tf.cast(extra_large, tf.uint8), - max_outputs=1) - - def eval_epoch(self, hps): - """Evaluate bits/dim.""" - n_eval_dict = { - "imnet": 50000, - "lsun": 300, - "celeba": 19962, - "svhn": 26032, - } - if FLAGS.eval_set_size == 0: - num_examples_eval = n_eval_dict[FLAGS.dataset] - else: - num_examples_eval = FLAGS.eval_set_size - n_epoch = num_examples_eval / hps.batch_size - eval_costs = [] - bar_len = 70 - for epoch_idx in xrange(n_epoch): - n_equal = epoch_idx * bar_len * 1. / n_epoch - n_equal = numpy.ceil(n_equal) - n_equal = int(n_equal) - n_dash = bar_len - n_equal - progress_bar = "[" + "=" * n_equal + "-" * n_dash + "]\r" - print(progress_bar, end=' ') - cost = self.bit_per_dim.eval() - eval_costs.append(cost) - print("") - return float(numpy.mean(eval_costs)) - - -def train_model(hps, logdir): - """Training.""" - with tf.Graph().as_default(): - with tf.device(tf.train.replica_device_setter(0)): - with tf.variable_scope("model"): - model = RealNVP(hps) - - saver = tf.train.Saver(tf.global_variables()) - - # Build the summary operation from the last tower summaries. - summary_op = tf.summary.merge_all() - - # Build an initialization operation to run below. - init = tf.global_variables_initializer() - - # Start running operations on the Graph. allow_soft_placement must be set to - # True to build towers on GPU, as some of the ops do not have GPU - # implementations. - sess = tf.Session(config=tf.ConfigProto( - allow_soft_placement=True, - log_device_placement=True)) - sess.run(init) - - ckpt_state = tf.train.get_checkpoint_state(logdir) - if ckpt_state and ckpt_state.model_checkpoint_path: - print("Loading file %s" % ckpt_state.model_checkpoint_path) - saver.restore(sess, ckpt_state.model_checkpoint_path) - - # Start the queue runners. - tf.train.start_queue_runners(sess=sess) - - summary_writer = tf.summary.FileWriter( - logdir, - graph=sess.graph) - - local_step = 0 - while True: - fetches = [model.step, model.bit_per_dim, model.train_step] - # The chief worker evaluates the summaries every 10 steps. - should_eval_summaries = local_step % 100 == 0 - if should_eval_summaries: - fetches += [summary_op] - - - start_time = time.time() - outputs = sess.run(fetches) - global_step_val = outputs[0] - loss = outputs[1] - duration = time.time() - start_time - assert not numpy.isnan( - loss), 'Model diverged with loss = NaN' - - if local_step % 10 == 0: - examples_per_sec = hps.batch_size / float(duration) - format_str = ('%s: step %d, loss = %.2f ' - '(%.1f examples/sec; %.3f ' - 'sec/batch)') - print(format_str % (datetime.now(), global_step_val, loss, - examples_per_sec, duration)) - - if should_eval_summaries: - summary_str = outputs[-1] - summary_writer.add_summary(summary_str, global_step_val) - - # Save the model checkpoint periodically. - if local_step % 1000 == 0 or (local_step + 1) == FLAGS.train_steps: - checkpoint_path = os.path.join(logdir, 'model.ckpt') - saver.save( - sess, - checkpoint_path, - global_step=global_step_val) - - if outputs[0] >= FLAGS.train_steps: - break - - local_step += 1 - - -def evaluate(hps, logdir, traindir, subset="valid", return_val=False): - """Evaluation.""" - hps.batch_size = 100 - with tf.Graph().as_default(): - with tf.device("/cpu:0"): - with tf.variable_scope("model") as var_scope: - eval_model = RealNVP(hps) - summary_writer = tf.summary.FileWriter(logdir) - var_scope.reuse_variables() - - saver = tf.train.Saver() - sess = tf.Session(config=tf.ConfigProto( - allow_soft_placement=True, - log_device_placement=True)) - tf.train.start_queue_runners(sess) - - previous_global_step = 0 # don"t run eval for step = 0 - - with sess.as_default(): - while True: - ckpt_state = tf.train.get_checkpoint_state(traindir) - if not (ckpt_state and ckpt_state.model_checkpoint_path): - print("No model to eval yet at %s" % traindir) - time.sleep(30) - continue - print("Loading file %s" % ckpt_state.model_checkpoint_path) - saver.restore(sess, ckpt_state.model_checkpoint_path) - - current_step = tf.train.global_step(sess, eval_model.step) - if current_step == previous_global_step: - print("Waiting for the checkpoint to be updated.") - time.sleep(30) - continue - previous_global_step = current_step - - print("Evaluating...") - bit_per_dim = eval_model.eval_epoch(hps) - print("Epoch: %d, %s -> %.3f bits/dim" - % (current_step, subset, bit_per_dim)) - print("Writing summary...") - summary = tf.Summary() - summary.value.extend( - [tf.Summary.Value( - tag="bit_per_dim", - simple_value=bit_per_dim)]) - summary_writer.add_summary(summary, current_step) - - if return_val: - return current_step, bit_per_dim - - -def sample_from_model(hps, logdir, traindir): - """Sampling.""" - hps.batch_size = 100 - with tf.Graph().as_default(): - with tf.device("/cpu:0"): - with tf.variable_scope("model") as var_scope: - eval_model = RealNVP(hps, sampling=True) - summary_writer = tf.summary.FileWriter(logdir) - var_scope.reuse_variables() - - summary_op = tf.summary.merge_all() - saver = tf.train.Saver() - sess = tf.Session(config=tf.ConfigProto( - allow_soft_placement=True, - log_device_placement=True)) - coord = tf.train.Coordinator() - threads = tf.train.start_queue_runners(sess=sess, coord=coord) - - previous_global_step = 0 # don"t run eval for step = 0 - - initialized = False - with sess.as_default(): - while True: - ckpt_state = tf.train.get_checkpoint_state(traindir) - if not (ckpt_state and ckpt_state.model_checkpoint_path): - if not initialized: - print("No model to eval yet at %s" % traindir) - time.sleep(30) - continue - else: - print ("Loading file %s" - % ckpt_state.model_checkpoint_path) - saver.restore(sess, ckpt_state.model_checkpoint_path) - - current_step = tf.train.global_step(sess, eval_model.step) - if current_step == previous_global_step: - print("Waiting for the checkpoint to be updated.") - time.sleep(30) - continue - previous_global_step = current_step - - fetches = [summary_op] - - outputs = sess.run(fetches) - summary_writer.add_summary(outputs[0], current_step) - coord.request_stop() - coord.join(threads) - - -def main(unused_argv): - hps = get_default_hparams().update_config(FLAGS.hpconfig) - if FLAGS.mode == "train": - train_model(hps=hps, logdir=FLAGS.logdir) - elif FLAGS.mode == "sample": - sample_from_model(hps=hps, logdir=FLAGS.logdir, - traindir=FLAGS.traindir) - else: - hps.batch_size = 100 - evaluate(hps=hps, logdir=FLAGS.logdir, - traindir=FLAGS.traindir, subset=FLAGS.mode) - -if __name__ == "__main__": - tf.app.run() diff --git a/research/real_nvp/real_nvp_utils.py b/research/real_nvp/real_nvp_utils.py deleted file mode 100644 index d8240f0e98d5b0d91bab8a9027ac737eb425a999..0000000000000000000000000000000000000000 --- a/research/real_nvp/real_nvp_utils.py +++ /dev/null @@ -1,475 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Utility functions for Real NVP. -""" - -# pylint: disable=dangerous-default-value - -import numpy -from six.moves import xrange -import tensorflow as tf -from tensorflow.python.framework import ops - -DEFAULT_BN_LAG = .0 - - -def stable_var(input_, mean=None, axes=[0]): - """Numerically more stable variance computation.""" - if mean is None: - mean = tf.reduce_mean(input_, axes) - res = tf.square(input_ - mean) - max_sqr = tf.reduce_max(res, axes) - res /= max_sqr - res = tf.reduce_mean(res, axes) - res *= max_sqr - - return res - - -def variable_on_cpu(name, shape, initializer, trainable=True): - """Helper to create a Variable stored on CPU memory. - - Args: - name: name of the variable - shape: list of ints - initializer: initializer for Variable - trainable: boolean defining if the variable is for training - Returns: - Variable Tensor - """ - var = tf.get_variable( - name, shape, initializer=initializer, trainable=trainable) - return var - - -# layers -def conv_layer(input_, - filter_size, - dim_in, - dim_out, - name, - stddev=1e-2, - strides=[1, 1, 1, 1], - padding="SAME", - nonlinearity=None, - bias=False, - weight_norm=False, - scale=False): - """Convolutional layer.""" - with tf.variable_scope(name) as scope: - weights = variable_on_cpu( - "weights", - filter_size + [dim_in, dim_out], - tf.random_uniform_initializer( - minval=-stddev, maxval=stddev)) - # weight normalization - if weight_norm: - weights /= tf.sqrt(tf.reduce_sum(tf.square(weights), [0, 1, 2])) - if scale: - magnitude = variable_on_cpu( - "magnitude", [dim_out], - tf.constant_initializer( - stddev * numpy.sqrt(dim_in * numpy.prod(filter_size) / 12.))) - weights *= magnitude - res = input_ - # handling filter size bigger than image size - if hasattr(input_, "shape"): - if input_.get_shape().as_list()[1] < filter_size[0]: - pad_1 = tf.zeros([ - input_.get_shape().as_list()[0], - filter_size[0] - input_.get_shape().as_list()[1], - input_.get_shape().as_list()[2], - input_.get_shape().as_list()[3] - ]) - pad_2 = tf.zeros([ - input_.get_shape().as_list[0], - filter_size[0], - filter_size[1] - input_.get_shape().as_list()[2], - input_.get_shape().as_list()[3] - ]) - res = tf.concat(axis=1, values=[pad_1, res]) - res = tf.concat(axis=2, values=[pad_2, res]) - res = tf.nn.conv2d( - input=res, - filter=weights, - strides=strides, - padding=padding, - name=scope.name) - - if hasattr(input_, "shape"): - if input_.get_shape().as_list()[1] < filter_size[0]: - res = tf.slice(res, [ - 0, filter_size[0] - input_.get_shape().as_list()[1], - filter_size[1] - input_.get_shape().as_list()[2], 0 - ], [-1, -1, -1, -1]) - - if bias: - biases = variable_on_cpu("biases", [dim_out], tf.constant_initializer(0.)) - res = tf.nn.bias_add(res, biases) - if nonlinearity is not None: - res = nonlinearity(res) - - return res - - -def max_pool_2x2(input_): - """Max pooling.""" - return tf.nn.max_pool( - input_, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") - - -def depool_2x2(input_, stride=2): - """Depooling.""" - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - res = tf.reshape(input_, [batch_size, height, 1, width, 1, channels]) - res = tf.concat( - axis=2, values=[res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])]) - res = tf.concat(axis=4, values=[ - res, tf.zeros([batch_size, height, stride, width, stride - 1, channels]) - ]) - res = tf.reshape(res, [batch_size, stride * height, stride * width, channels]) - - return res - - -# random flip on a batch of images -def batch_random_flip(input_): - """Simultaneous horizontal random flip.""" - if isinstance(input_, (float, int)): - return input_ - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - res = tf.split(axis=0, num_or_size_splits=batch_size, value=input_) - res = [elem[0, :, :, :] for elem in res] - res = [tf.image.random_flip_left_right(elem) for elem in res] - res = [tf.reshape(elem, [1, height, width, channels]) for elem in res] - res = tf.concat(axis=0, values=res) - - return res - - -# build a one hot representation corresponding to the integer tensor -# the one-hot dimension is appended to the integer tensor shape -def as_one_hot(input_, n_indices): - """Convert indices to one-hot.""" - shape = input_.get_shape().as_list() - n_elem = numpy.prod(shape) - indices = tf.range(n_elem) - indices = tf.cast(indices, tf.int64) - indices_input = tf.concat(axis=0, values=[indices, tf.reshape(input_, [-1])]) - indices_input = tf.reshape(indices_input, [2, -1]) - indices_input = tf.transpose(indices_input) - res = tf.sparse_to_dense( - indices_input, [n_elem, n_indices], 1., 0., name="flat_one_hot") - res = tf.reshape(res, [elem for elem in shape] + [n_indices]) - - return res - - -def squeeze_2x2(input_): - """Squeezing operation: reshape to convert space to channels.""" - return squeeze_nxn(input_, n_factor=2) - - -def squeeze_nxn(input_, n_factor=2): - """Squeezing operation: reshape to convert space to channels.""" - if isinstance(input_, (float, int)): - return input_ - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - if height % n_factor != 0: - raise ValueError("Height not divisible by %d." % n_factor) - if width % n_factor != 0: - raise ValueError("Width not divisible by %d." % n_factor) - res = tf.reshape( - input_, - [batch_size, - height // n_factor, - n_factor, width // n_factor, - n_factor, channels]) - res = tf.transpose(res, [0, 1, 3, 5, 2, 4]) - res = tf.reshape( - res, - [batch_size, - height // n_factor, - width // n_factor, - channels * n_factor * n_factor]) - - return res - - -def unsqueeze_2x2(input_): - """Unsqueezing operation: reshape to convert channels into space.""" - if isinstance(input_, (float, int)): - return input_ - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - if channels % 4 != 0: - raise ValueError("Number of channels not divisible by 4.") - res = tf.reshape(input_, [batch_size, height, width, channels // 4, 2, 2]) - res = tf.transpose(res, [0, 1, 4, 2, 5, 3]) - res = tf.reshape(res, [batch_size, 2 * height, 2 * width, channels // 4]) - - return res - - -# batch norm -def batch_norm(input_, - dim, - name, - scale=True, - train=True, - epsilon=1e-8, - decay=.1, - axes=[0], - bn_lag=DEFAULT_BN_LAG): - """Batch normalization.""" - # create variables - with tf.variable_scope(name): - var = variable_on_cpu( - "var", [dim], tf.constant_initializer(1.), trainable=False) - mean = variable_on_cpu( - "mean", [dim], tf.constant_initializer(0.), trainable=False) - step = variable_on_cpu("step", [], tf.constant_initializer(0.), trainable=False) - if scale: - gamma = variable_on_cpu("gamma", [dim], tf.constant_initializer(1.)) - beta = variable_on_cpu("beta", [dim], tf.constant_initializer(0.)) - # choose the appropriate moments - if train: - used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm") - cur_mean, cur_var = used_mean, used_var - if bn_lag > 0.: - used_mean -= (1. - bn_lag) * (used_mean - tf.stop_gradient(mean)) - used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var)) - used_mean /= (1. - bn_lag**(step + 1)) - used_var /= (1. - bn_lag**(step + 1)) - else: - used_mean, used_var = mean, var - cur_mean, cur_var = used_mean, used_var - - # normalize - res = (input_ - used_mean) / tf.sqrt(used_var + epsilon) - # de-normalize - if scale: - res *= gamma - res += beta - - # update variables - if train: - with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]): - with ops.colocate_with(mean): - new_mean = tf.assign_sub( - mean, - tf.check_numerics(decay * (mean - cur_mean), "NaN in moving mean.")) - with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]): - with ops.colocate_with(var): - new_var = tf.assign_sub( - var, - tf.check_numerics(decay * (var - cur_var), - "NaN in moving variance.")) - with tf.name_scope(name, "IncrementTime", [step]): - with ops.colocate_with(step): - new_step = tf.assign_add(step, 1.) - res += 0. * new_mean * new_var * new_step - - return res - - -# batch normalization taking into account the volume transformation -def batch_norm_log_diff(input_, - dim, - name, - train=True, - epsilon=1e-8, - decay=.1, - axes=[0], - reuse=None, - bn_lag=DEFAULT_BN_LAG): - """Batch normalization with corresponding log determinant Jacobian.""" - if reuse is None: - reuse = not train - # create variables - with tf.variable_scope(name) as scope: - if reuse: - scope.reuse_variables() - var = variable_on_cpu( - "var", [dim], tf.constant_initializer(1.), trainable=False) - mean = variable_on_cpu( - "mean", [dim], tf.constant_initializer(0.), trainable=False) - step = variable_on_cpu("step", [], tf.constant_initializer(0.), trainable=False) - # choose the appropriate moments - if train: - used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm") - cur_mean, cur_var = used_mean, used_var - if bn_lag > 0.: - used_var = stable_var(input_=input_, mean=used_mean, axes=axes) - cur_var = used_var - used_mean -= (1 - bn_lag) * (used_mean - tf.stop_gradient(mean)) - used_mean /= (1. - bn_lag**(step + 1)) - used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var)) - used_var /= (1. - bn_lag**(step + 1)) - else: - used_mean, used_var = mean, var - cur_mean, cur_var = used_mean, used_var - - # update variables - if train: - with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]): - with ops.colocate_with(mean): - new_mean = tf.assign_sub( - mean, - tf.check_numerics( - decay * (mean - cur_mean), "NaN in moving mean.")) - with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]): - with ops.colocate_with(var): - new_var = tf.assign_sub( - var, - tf.check_numerics(decay * (var - cur_var), - "NaN in moving variance.")) - with tf.name_scope(name, "IncrementTime", [step]): - with ops.colocate_with(step): - new_step = tf.assign_add(step, 1.) - used_var += 0. * new_mean * new_var * new_step - used_var += epsilon - - return used_mean, used_var - - -def convnet(input_, - dim_in, - dim_hid, - filter_sizes, - dim_out, - name, - use_batch_norm=True, - train=True, - nonlinearity=tf.nn.relu): - """Chaining of convolutional layers.""" - dims_in = [dim_in] + dim_hid[:-1] - dims_out = dim_hid - res = input_ - - bias = (not use_batch_norm) - with tf.variable_scope(name): - for layer_idx in xrange(len(dim_hid)): - res = conv_layer( - input_=res, - filter_size=filter_sizes[layer_idx], - dim_in=dims_in[layer_idx], - dim_out=dims_out[layer_idx], - name="h_%d" % layer_idx, - stddev=1e-2, - nonlinearity=None, - bias=bias) - if use_batch_norm: - res = batch_norm( - input_=res, - dim=dims_out[layer_idx], - name="bn_%d" % layer_idx, - scale=(nonlinearity == tf.nn.relu), - train=train, - epsilon=1e-8, - axes=[0, 1, 2]) - if nonlinearity is not None: - res = nonlinearity(res) - - res = conv_layer( - input_=res, - filter_size=filter_sizes[-1], - dim_in=dims_out[-1], - dim_out=dim_out, - name="out", - stddev=1e-2, - nonlinearity=None) - - return res - - -# distributions -# log-likelihood estimation -def standard_normal_ll(input_): - """Log-likelihood of standard Gaussian distribution.""" - res = -.5 * (tf.square(input_) + numpy.log(2. * numpy.pi)) - - return res - - -def standard_normal_sample(shape): - """Samples from standard Gaussian distribution.""" - return tf.random_normal(shape) - - -SQUEEZE_MATRIX = numpy.array([[[[1., 0., 0., 0.]], [[0., 0., 1., 0.]]], - [[[0., 0., 0., 1.]], [[0., 1., 0., 0.]]]]) - - -def squeeze_2x2_ordered(input_, reverse=False): - """Squeezing operation with a controlled ordering.""" - shape = input_.get_shape().as_list() - batch_size = shape[0] - height = shape[1] - width = shape[2] - channels = shape[3] - if reverse: - if channels % 4 != 0: - raise ValueError("Number of channels not divisible by 4.") - channels /= 4 - else: - if height % 2 != 0: - raise ValueError("Height not divisible by 2.") - if width % 2 != 0: - raise ValueError("Width not divisible by 2.") - weights = numpy.zeros((2, 2, channels, 4 * channels)) - for idx_ch in xrange(channels): - slice_2 = slice(idx_ch, (idx_ch + 1)) - slice_3 = slice((idx_ch * 4), ((idx_ch + 1) * 4)) - weights[:, :, slice_2, slice_3] = SQUEEZE_MATRIX - shuffle_channels = [idx_ch * 4 for idx_ch in xrange(channels)] - shuffle_channels += [idx_ch * 4 + 1 for idx_ch in xrange(channels)] - shuffle_channels += [idx_ch * 4 + 2 for idx_ch in xrange(channels)] - shuffle_channels += [idx_ch * 4 + 3 for idx_ch in xrange(channels)] - shuffle_channels = numpy.array(shuffle_channels) - weights = weights[:, :, :, shuffle_channels].astype("float32") - if reverse: - res = tf.nn.conv2d_transpose( - value=input_, - filter=weights, - output_shape=[batch_size, height * 2, width * 2, channels], - strides=[1, 2, 2, 1], - padding="SAME", - name="unsqueeze_2x2") - else: - res = tf.nn.conv2d( - input=input_, - filter=weights, - strides=[1, 2, 2, 1], - padding="SAME", - name="squeeze_2x2") - - return res diff --git a/research/sentiment_analysis/README.md b/research/sentiment_analysis/README.md deleted file mode 100644 index f98c42751df2475b4dbdc1db0cc303b42d2b7b4c..0000000000000000000000000000000000000000 --- a/research/sentiment_analysis/README.md +++ /dev/null @@ -1,26 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Sentiment Analysis -## Overview -This is an implementation of the Sentiment Analysis model as described in the [this paper](https://arxiv.org/abs/1412.1058). The implementation is with the reference to [paddle version](https://github.com/mlperf/reference/tree/master/sentiment_analysis/paddle). - -The model makes use of concatenation of two CNN layers with different kernel sizes. Batch normalization and dropout layers are used to prevent over-fitting. - -## Dataset -The [keras](https://keras.io)'s [IMDB Movie reviews sentiment classification](https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification) dataset is used. The dataset file download is handled by keras module, and the downloaded files are stored at ``~/.keras/datasets` directory. The compressed file's filesize as of June 15 2018 is 17MB. - -## Running Code -### Train and evaluate model -To train and evaluate the model, issue the following command: -``` -python sentiment_main.py -``` -Arguments: - * `--dataset`: The dataset name to be downloaded and preprocessed. By default, it is `imdb`. - -There are other arguments about models and training process. Use the `--help` or `-h` flag to get a full list of possible arguments with detailed descriptions. - -## Benchmarks -The model was recorded to have the accuracy of 90.1% for the IMDB dataset. diff --git a/research/sentiment_analysis/__init__.py b/research/sentiment_analysis/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/sentiment_analysis/data/__init__.py b/research/sentiment_analysis/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/sentiment_analysis/data/dataset.py b/research/sentiment_analysis/data/dataset.py deleted file mode 100644 index 9ba4b9ac677296fbc7d1db549c5dd011117a16c4..0000000000000000000000000000000000000000 --- a/research/sentiment_analysis/data/dataset.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Dataset module for sentiment analysis. - -Currently imdb dataset is available. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import data.imdb as imdb - -DATASET_IMDB = "imdb" - - -def load(dataset, vocabulary_size, sentence_length): - """Returns training and evaluation input. - - Args: - dataset: Dataset to be trained and evaluated. - Currently only imdb is supported. - vocabulary_size: The number of the most frequent tokens - to be used from the corpus. - sentence_length: The number of words in each sentence. - Longer sentences get cut, shorter ones padded. - Raises: - ValueError: if the dataset value is not valid. - Returns: - A tuple of length 4, for training sentences, labels, - evaluation sentences, and evaluation labels, - each being an numpy array. - """ - if dataset == DATASET_IMDB: - return imdb.load(vocabulary_size, sentence_length) - else: - raise ValueError("unsupported dataset: " + dataset) - - -def get_num_class(dataset): - """Returns an integer for the number of label classes. - - Args: - dataset: Dataset to be trained and evaluated. - Currently only imdb is supported. - Raises: - ValueError: if the dataset value is not valid. - Returns: - int: The number of label classes. - """ - if dataset == DATASET_IMDB: - return imdb.NUM_CLASS - else: - raise ValueError("unsupported dataset: " + dataset) diff --git a/research/sentiment_analysis/data/imdb.py b/research/sentiment_analysis/data/imdb.py deleted file mode 100644 index f8160ca2f71ac158a4ee42119a04e2dafec033ee..0000000000000000000000000000000000000000 --- a/research/sentiment_analysis/data/imdb.py +++ /dev/null @@ -1,54 +0,0 @@ -"""IMDB Dataset module for sentiment analysis.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from data.util import OOV_CHAR -from data.util import pad_sentence -from data.util import START_CHAR - -NUM_CLASS = 2 - - -def load(vocabulary_size, sentence_length): - """Returns training and evaluation input for imdb dataset. - - Args: - vocabulary_size: The number of the most frequent tokens - to be used from the corpus. - sentence_length: The number of words in each sentence. - Longer sentences get cut, shorter ones padded. - Raises: - ValueError: if the dataset value is not valid. - Returns: - A tuple of length 4, for training and evaluation data, - each being an numpy array. - """ - (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data( - path="imdb.npz", - num_words=vocabulary_size, - skip_top=0, - maxlen=None, - seed=113, - start_char=START_CHAR, - oov_char=OOV_CHAR, - index_from=OOV_CHAR+1) - - x_train_processed = [] - for sen in x_train: - sen = pad_sentence(sen, sentence_length) - x_train_processed.append(np.array(sen)) - x_train_processed = np.array(x_train_processed) - - x_test_processed = [] - for sen in x_test: - sen = pad_sentence(sen, sentence_length) - x_test_processed.append(np.array(sen)) - x_test_processed = np.array(x_test_processed) - - return x_train_processed, np.eye(NUM_CLASS)[y_train], \ - x_test_processed, np.eye(NUM_CLASS)[y_test] diff --git a/research/sentiment_analysis/data/util.py b/research/sentiment_analysis/data/util.py deleted file mode 100644 index c8f8808f7e1d7f26876d052c108d1948cbd2fe9f..0000000000000000000000000000000000000000 --- a/research/sentiment_analysis/data/util.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Utility module for sentiment analysis.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -START_CHAR = 1 -END_CHAR = 2 -OOV_CHAR = 3 - - -def pad_sentence(sentence, sentence_length): - """Pad the given sentense at the end. - - If the input is longer than sentence_length, - the remaining portion is dropped. - END_CHAR is used for the padding. - - Args: - sentence: A numpy array of integers. - sentence_length: The length of the input after the padding. - Returns: - A numpy array of integers of the given length. - """ - sentence = sentence[:sentence_length] - if len(sentence) < sentence_length: - sentence = np.pad(sentence, (0, sentence_length - len(sentence)), - "constant", constant_values=(START_CHAR, END_CHAR)) - - return sentence diff --git a/research/sentiment_analysis/sentiment_main.py b/research/sentiment_analysis/sentiment_main.py deleted file mode 100644 index 8b9ba5f921eef72377b480669bd81087fd1b160a..0000000000000000000000000000000000000000 --- a/research/sentiment_analysis/sentiment_main.py +++ /dev/null @@ -1,115 +0,0 @@ -"""Main function for the sentiment analysis model. - -The model makes use of concatenation of two CNN layers with -different kernel sizes. See `sentiment_model.py` -for more details about the models. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import os - -import tensorflow as tf - -from data import dataset -import sentiment_model - - - -_DROPOUT_RATE = 0.95 - - -def run_model(dataset_name, emb_dim, voc_size, sen_len, - hid_dim, batch_size, epochs, model_save_dir): - """Run training loop and an evaluation at the end. - - Args: - dataset_name: Dataset name to be trained and evaluated. - emb_dim: The dimension of the Embedding layer. - voc_size: The number of the most frequent tokens - to be used from the corpus. - sen_len: The number of words in each sentence. - Longer sentences get cut, shorter ones padded. - hid_dim: The dimension of the Embedding layer. - batch_size: The size of each batch during training. - epochs: The number of the iteration over the training set for training. - """ - - model = sentiment_model.CNN(emb_dim, voc_size, sen_len, - hid_dim, dataset.get_num_class(dataset_name), - _DROPOUT_RATE) - model.summary() - - model.compile(loss="categorical_crossentropy", - optimizer="rmsprop", - metrics=["accuracy"]) - - tf.logging.info("Loading the data") - x_train, y_train, x_test, y_test = dataset.load( - dataset_name, voc_size, sen_len) - - if not os.path.exists(model_save_dir): - os.makedirs(model_save_dir) - - filepath=model_save_dir+"/model-{epoch:02d}.hdf5" - - checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', - verbose=1,save_best_only=True, - save_weights_only=True,mode='auto') - - - model.fit(x_train, y_train, batch_size=batch_size, - validation_split=0.4, epochs=epochs, callbacks=[checkpoint_callback]) - - score = model.evaluate(x_test, y_test, batch_size=batch_size) - - model.save(os.path.join(model_save_dir, "full-model.h5")) - - tf.logging.info("Score: {}".format(score)) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("-d", "--dataset", help="Dataset to be trained " - "and evaluated.", - type=str, choices=["imdb"], default="imdb") - - parser.add_argument("-e", "--embedding_dim", - help="The dimension of the Embedding layer.", - type=int, default=512) - - parser.add_argument("-v", "--vocabulary_size", - help="The number of the words to be considered " - "in the dataset corpus.", - type=int, default=6000) - - parser.add_argument("-s", "--sentence_length", - help="The number of words in a data point." - "Entries of smaller length are padded.", - type=int, default=600) - - parser.add_argument("-c", "--hidden_dim", - help="The number of the CNN layer filters.", - type=int, default=512) - - parser.add_argument("-b", "--batch_size", - help="The size of each batch for training.", - type=int, default=500) - - parser.add_argument("-p", "--epochs", - help="The number of epochs for training.", - type=int, default=55) - - parser.add_argument("-f", "--folder", - help="folder/dir to save trained model", - type=str, default=None) - args = parser.parse_args() - - if args.folder is None: - parser.error("-f argument folder/dir to save is None,provide path to save model.") - - run_model(args.dataset, args.embedding_dim, args.vocabulary_size, - args.sentence_length, args.hidden_dim, - args.batch_size, args.epochs, args.folder) diff --git a/research/sentiment_analysis/sentiment_model.py b/research/sentiment_analysis/sentiment_model.py deleted file mode 100644 index 586474992ab4521512bbbb5156be0b306f94c3bb..0000000000000000000000000000000000000000 --- a/research/sentiment_analysis/sentiment_model.py +++ /dev/null @@ -1,50 +0,0 @@ -"""Model for sentiment analysis. - -The model makes use of concatenation of two CNN layers with -different kernel sizes. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -class CNN(tf.keras.models.Model): - """CNN for sentimental analysis.""" - - def __init__(self, emb_dim, num_words, sentence_length, hid_dim, - class_dim, dropout_rate): - """Initialize CNN model. - - Args: - emb_dim: The dimension of the Embedding layer. - num_words: The number of the most frequent tokens - to be used from the corpus. - sentence_length: The number of words in each sentence. - Longer sentences get cut, shorter ones padded. - hid_dim: The dimension of the Embedding layer. - class_dim: The number of the CNN layer filters. - dropout_rate: The portion of kept value in the Dropout layer. - Returns: - tf.keras.models.Model: A Keras model. - """ - - input_layer = tf.keras.layers.Input(shape=(sentence_length,), dtype=tf.int32) - - layer = tf.keras.layers.Embedding(num_words, output_dim=emb_dim)(input_layer) - - layer_conv3 = tf.keras.layers.Conv1D(hid_dim, 3, activation="relu")(layer) - layer_conv3 = tf.keras.layers.GlobalMaxPooling1D()(layer_conv3) - - layer_conv4 = tf.keras.layers.Conv1D(hid_dim, 2, activation="relu")(layer) - layer_conv4 = tf.keras.layers.GlobalMaxPooling1D()(layer_conv4) - - layer = tf.keras.layers.concatenate([layer_conv4, layer_conv3], axis=1) - layer = tf.keras.layers.BatchNormalization()(layer) - layer = tf.keras.layers.Dropout(dropout_rate)(layer) - - output = tf.keras.layers.Dense(class_dim, activation="softmax")(layer) - - super(CNN, self).__init__(inputs=[input_layer], outputs=output) diff --git a/research/seq2species/README.md b/research/seq2species/README.md deleted file mode 100644 index dbe473131d59fe990e1ff344ea69bb5bd05972e4..0000000000000000000000000000000000000000 --- a/research/seq2species/README.md +++ /dev/null @@ -1,187 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Seq2Species: Neural Network Models for Species Classification - -*A deep learning solution for read-level taxonomic classification with 16s.* - -Recent improvements in sequencing technology have made possible large, public -databases of biological sequencing data, bringing about new data richness for -many important problems in bioinformatics. However, this growing availability of -data creates a need for analysis methods capable of efficiently handling these -large sequencing datasets. We on the [Genomics team in Google -Brain](https://ai.google/research/teams/brain/healthcare-biosciences) are -particularly interested in the class of problems which can be framed as -assigning meaningful labels to short biological sequences, and are exploring the -possiblity of creating a general deep learning solution for solving this class -of sequence-labeling problems. We are excited to share our initial progress in -this direction by releasing Seq2Species, an open-source neural network framework -for [TensorFlow](https://www.tensorflow.org/) for predicting read-level -taxonomic labels from genomic sequence. Our release includes all the code -necessary to train new Seq2Species models. - -## About Seq2Species - -Briefly, Seq2Species provides a framework for training deep neural networks to -predict database-derived labels directly from short reads of DNA. Thus far, our -research has focused predominantly on demonstrating the value of this deep -learning approach on the problem of determining the species of origin of -next-generation sequencing reads from [16S ribosomal -DNA](https://en.wikipedia.org/wiki/16S_ribosomal_RNA). We used this -Seq2Species framework to train depthwise separable convolutional neural networks -on short subsequences from the 16S genes of more than 13 thousand distinct -species. The resulting classification model assign species-level probabilities -to individual 16S reads. - -For more information about the use cases we have explored, or for technical -details describing how Seq2Species work, please see our -[preprint](https://www.biorxiv.org/content/early/2018/06/22/353474). - -## Installation - -Training Seq2Species models requires installing the following dependencies: - -* python 2.7 - -* protocol buffers - -* numpy - -* absl - -### Dependencies - -Detailed instructions for installing TensorFlow are available on the [Installing -TensorFlow](https://www.tensorflow.org/install/) website. Please follow the -full instructions for installing TensorFlow with GPU support. For most -users, the following command will suffice for continuing with CPU support only: -```bash -# For CPU -pip install --upgrade tensorflow -``` - -The TensorFlow installation should also include installation of the numpy and -absl libraries, which are two of TensorFlow's python dependencies. If -necessary, instructions for standalone installation are available: - -* [numpy](https://scipy.org/install.html) - -* [absl](https://github.com/abseil/abseil-py) - -Information about protocol buffers, as well as download and installation -intructions for the protocol buffer (protobuf) compiler, are available on the [Google -Developers website](https://developers.google.com/protocol-buffers/). A typical -Ubuntu user can install this library using `apt-get`: -```bash -sudo apt-get install protobuf-compiler -``` - -### Clone - -Now, clone `tensorflow/models` to start working with the code: -```bash -git clone https://github.com/tensorflow/models.git -``` - -### Protobuf Compilation - -Seq2Species uses protobufs to store and save dataset and model metadata. Before -the framework can be used to build and train models, the protobuf libraries must -be compiled. This can be accomplished using the following command: -```bash -# From tensorflow/models/research -protoc seq2species/protos/seq2label.proto --python_out=. -``` - -### Testing the Installation - -One can test that Seq2Species has been installed correctly by running the -following command: -```bash -python seq2species/run_training_test.py -``` - -## Usage Information - -Input data to Seq2Species models should be [tf.train.Example protocol messages](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto) stored in -[TFRecord format](https://www.tensorflow.org/versions/r1.0/api_guides/python/python_io#tfrecords_format_details). -Specifically, the input pipeline expects tf.train.Examples with a 'sequence' field -containing a genomic sequence as an upper-case string, as one field for each -target label (e.g. 'species'). There should also be an accompanying -Seq2LabelDatasetInfo text protobuf containing metadata about the input, including -the possible label values for each target. - -Below, we give an example command that could be used to launch training for 1000 -steps, assuming that appropriate data and metadata files are stored at -`${TFRECORD}` and `${DATASET_INFO}`: -```bash -python seq2species/run_training.py --train_files ${TFRECORD} ---metadata_path ${DATASET_INFO} --hparams 'train_steps=1000' ---logdir $HOME/seq2species -``` -This will output [TensorBoard -summaries](https://www.tensorflow.org/guide/summaries_and_tensorboard), [TensorFlow -checkpoints](https://www.tensorflow.org/guide/variables#checkpoint_files), Seq2LabelModelInfo and -Seq2LabelExperimentMeasures metadata to the logdir `$HOME/seq2species`. - -### Preprocessed Seq2Species Data - -We have provided preprocessed data based on 16S reference sequences from the -[NCBI RefSeq Targeted Loci -Project](https://www.ncbi.nlm.nih.gov/refseq/targetedloci/) in a Seq2Species -bucket on Google Cloud Storage. After installing the -[Cloud SDK](https://cloud.google.com/sdk/install), -one can download those data (roughly 25 GB) to a local directory `${DEST}` using -the `gsutil` command: -```bash -BUCKET=gs://brain-genomics-public/research/seq2species -mkdir -p ${DEST} -gsutil -m cp ${BUCKET}/* ${DEST} -``` - -To check if the copy has completed successsfully, check the `${DEST}` directory: -```bash -ls -1 ${DEST} -``` -which should produce: -```bash -ncbi_100bp_revcomp.dataset_info.pbtxt -ncbi_100bp_revcomp.tfrecord -``` - -The following command can be used to train a copy of one of our best-perfoming -deep neural network models for 100 base pair (bp) data. This command also -illustrates how to set hyperparameter values explicitly from the commandline. -The file `configuration.py` provides a full list of hyperparameters, their descriptions, -and their default values. Additional flags are described at the top of -`run_training.py`. -```bash -python seq2species/run_training.py \ ---num_filters 3 \ ---noise_rate 0.04 \ ---train_files ${DEST}/ncbi_100bp_revcomp.tfrecord \ ---metadata_path ${DEST}/ncbi_100bp_revcomp.dataset_info.pbtxt \ ---logdir $HOME/seq2species \ ---hparams 'filter_depths=[1,1,1],filter_widths=[5,9,13],grad_clip_norm=20.0,keep_prob=0.94017831318, -lr_decay=0.0655052811,lr_init=0.000469689635793,lrelu_slope=0.0125376069918,min_read_length=100,num_fc_layers=2,num_fc_units=2828,optimizer=adam,optimizer_hp=0.885769367218,pointwise_depths=[84,58,180],pooling_type=avg,train_steps=3000000,use_depthwise_separable=true,weight_scale=1.18409526348' -``` - -### Visualization - -[TensorBoard](https://github.com/tensorflow/tensorboard) can be used to -visualize training curves and other metrics stored in the summary files produced -by `run_training.py`. Use the following command to launch a TensorBoard instance -for the example model directory `$HOME/seq2species`: -```bash -tensorboard --logdir=$HOME/seq2species -``` - -## Contact - -Any issues with the Seq2Species framework should be filed with the -[TensorFlow/models issue tracker](https://github.com/tensorflow/models/issues). -Questions regarding Seq2Species capabilities can be directed to -[seq2species-interest@google.com](mailto:seq2species-interest@google.com). This -code is maintained by [@apbusia](https://github.com/apbusia) and -[@depristo](https://github.com/depristo). diff --git a/research/seq2species/build_model.py b/research/seq2species/build_model.py deleted file mode 100644 index 9f4ae6b2eb2c1b4b4deeba99a0fbcf3be0ca644a..0000000000000000000000000000000000000000 --- a/research/seq2species/build_model.py +++ /dev/null @@ -1,506 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Defines convolutional model graph for Seq2Species. - -Builds TensorFlow computation graph for predicting the given taxonomic target -labels from short reads of DNA using convolutional filters, followed by -fully-connected layers and a softmax output layer. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import math - -import tensorflow as tf - -import input as seq2species_input -import seq2label_utils - - -class ConvolutionalNet(object): - """Class to build and store the model's computational graph and operations. - - Attributes: - read_length: int; the length in basepairs of the input reads of DNA. - placeholders: dict; mapping from name to tf.Placeholder. - global_step: tf.Variable tracking number of training iterations performed. - train_op: operation to perform one training step by gradient descent. - summary_op: operation to log model's performance metrics to TF event files. - accuracy: tf.Variable giving the model's read-level accuracy for the - current inputs. - weighted_accuracy: tf.Variable giving the model's read-level weighted - accuracy for the current inputs. - loss: tf.Variable giving the model's current cross entropy loss. - logits: tf.Variable containing the model's logits for the current inputs. - predictions: tf.Variable containing the model's current predicted - probability distributions for the current inputs. - possible_labels: a dict of possible label values (list of strings), keyed by - target name. Labels in the lists are the order used for integer encoding. - use_tpu: whether model is to be run on TPU. - """ - - def __init__(self, hparams, dataset_info, targets, use_tpu=False): - """Initializes the ConvolutionalNet according to provided hyperparameters. - - Does not build the graph---this is done by calling `build_graph` on the - constructed object or using `model_fn`. - - Args: - hparams: tf.contrib.training.Hparams object containing the model's - hyperparamters; see configuration.py for hyperparameter definitions. - dataset_info: a `Seq2LabelDatasetInfo` message reflecting the dataset - metadata. - targets: list of strings: the names of the prediction targets. - use_tpu: whether we are running on TPU; if True, summaries will be - disabled. - """ - self._placeholders = {} - self._targets = targets - self._dataset_info = dataset_info - self._hparams = hparams - all_label_values = seq2label_utils.get_all_label_values(self.dataset_info) - self._possible_labels = { - target: all_label_values[target] - for target in self.targets - } - self._use_tpu = use_tpu - - @property - def hparams(self): - return self._hparams - - @property - def dataset_info(self): - return self._dataset_info - - @property - def possible_labels(self): - return self._possible_labels - - @property - def bases(self): - return seq2species_input.DNA_BASES - - @property - def n_bases(self): - return seq2species_input.NUM_DNA_BASES - - @property - def targets(self): - return self._targets - - @property - def read_length(self): - return self.dataset_info.read_length - - @property - def placeholders(self): - return self._placeholders - - @property - def global_step(self): - return self._global_step - - @property - def train_op(self): - return self._train_op - - @property - def summary_op(self): - return self._summary_op - - @property - def accuracy(self): - return self._accuracy - - @property - def weighted_accuracy(self): - return self._weighted_accuracy - - @property - def loss(self): - return self._loss - - @property - def total_loss(self): - return self._total_loss - - @property - def logits(self): - return self._logits - - @property - def predictions(self): - return self._predictions - - @property - def use_tpu(self): - return self._use_tpu - - def _summary_scalar(self, name, scalar): - """Adds a summary scalar, if the platform supports summaries.""" - if not self.use_tpu: - return tf.summary.scalar(name, scalar) - else: - return None - - def _summary_histogram(self, name, values): - """Adds a summary histogram, if the platform supports summaries.""" - if not self.use_tpu: - return tf.summary.histogram(name, values) - else: - return None - - def _init_weights(self, shape, scale=1.0, name='weights'): - """Randomly initializes a weight Tensor of the given shape. - - Args: - shape: list; desired Tensor dimensions. - scale: float; standard deviation scale with which to initialize weights. - name: string name for the variable. - - Returns: - TF Variable contining truncated random Normal initialized weights. - """ - num_inputs = shape[0] if len(shape) < 3 else shape[0] * shape[1] * shape[2] - stddev = scale / math.sqrt(num_inputs) - return tf.get_variable( - name, - shape=shape, - initializer=tf.truncated_normal_initializer(0., stddev)) - - def _init_bias(self, size): - """Initializes bias vector of given shape as zeros. - - Args: - size: int; desired size of bias Tensor. - - Returns: - TF Variable containing the initialized biases. - """ - return tf.get_variable( - name='b_{}'.format(size), - shape=[size], - initializer=tf.zeros_initializer()) - - def _add_summaries(self, mode, gradient_norm, parameter_norm): - """Defines TensorFlow operation for logging summaries to event files. - - Args: - mode: the ModeKey string. - gradient_norm: Tensor; norm of gradients produced during the current - training operation. - parameter_norm: Tensor; norm of the model parameters produced during the - current training operation. - """ - # Log summaries for TensorBoard. - if mode == tf.estimator.ModeKeys.TRAIN: - self._summary_scalar('norm_of_gradients', gradient_norm) - self._summary_scalar('norm_of_parameters', parameter_norm) - self._summary_scalar('total_loss', self.total_loss) - self._summary_scalar('learning_rate', self._learn_rate) - for target in self.targets: - self._summary_scalar('per_read_weighted_accuracy/{}'.format(target), - self.weighted_accuracy[target]) - self._summary_scalar('per_read_accuracy/{}'.format(target), - self.accuracy[target]) - self._summary_histogram('prediction_frequency/{}'.format(target), - self._predictions[target]) - self._summary_scalar('cross_entropy_loss/{}'.format(target), - self._loss[target]) - self._summary_op = tf.summary.merge_all() - else: - # Log average performance metrics over many batches using placeholders. - summaries = [] - for target in self.targets: - accuracy_ph = tf.placeholder(tf.float32, shape=()) - weighted_accuracy_ph = tf.placeholder(tf.float32, shape=()) - cross_entropy_ph = tf.placeholder(tf.float32, shape=()) - self._placeholders.update({ - 'accuracy/{}'.format(target): accuracy_ph, - 'weighted_accuracy/{}'.format(target): weighted_accuracy_ph, - 'cross_entropy/{}'.format(target): cross_entropy_ph, - }) - summaries += [ - self._summary_scalar('cross_entropy_loss/{}'.format(target), - cross_entropy_ph), - self._summary_scalar('per_read_accuracy/{}'.format(target), - accuracy_ph), - self._summary_scalar('per_read_weighted_accuracy/{}'.format(target), - weighted_accuracy_ph) - ] - - self._summary_op = tf.summary.merge(summaries) - - def _convolution(self, - inputs, - filter_dim, - pointwise_dim=None, - scale=1.0, - padding='SAME'): - """Applies convolutional filter of given dimensions to given input Tensor. - - If a pointwise dimension is specified, a depthwise separable convolution is - performed. - - Args: - inputs: 4D Tensor of shape (# reads, 1, # basepairs, # bases). - filter_dim: integer tuple of the form (width, depth). - pointwise_dim: int; output dimension for pointwise convolution. - scale: float; standard deviation scale with which to initialize weights. - padding: string; type of padding to use. One of "SAME" or "VALID". - - Returns: - 4D Tensor result of applying the convolutional filter to the inputs. - """ - in_channels = inputs.get_shape()[3].value - filter_width, filter_depth = filter_dim - filters = self._init_weights([1, filter_width, in_channels, filter_depth], - scale) - self._summary_histogram(filters.name.split(':')[0].split('/')[1], filters) - if pointwise_dim is None: - return tf.nn.conv2d( - inputs, - filters, - strides=[1, 1, 1, 1], - padding=padding, - name='weights') - pointwise_filters = self._init_weights( - [1, 1, filter_depth * in_channels, pointwise_dim], - scale, - name='pointwise_weights') - self._summary_histogram( - pointwise_filters.name.split(':')[0].split('/')[1], pointwise_filters) - return tf.nn.separable_conv2d( - inputs, - filters, - pointwise_filters, - strides=[1, 1, 1, 1], - padding=padding) - - def _pool(self, inputs, pooling_type): - """Performs pooling across width and height of the given inputs. - - Args: - inputs: Tensor shaped (batch, height, width, channels) over which to pool. - In our case, height is a unitary dimension and width can be thought of - as the read dimension. - pooling_type: string; one of "avg" or "max". - - Returns: - Tensor result of performing pooling of the given pooling_type over the - height and width dimensions of the given inputs. - """ - if pooling_type == 'max': - return tf.reduce_max(inputs, axis=[1, 2]) - if pooling_type == 'avg': - return tf.reduce_sum( - inputs, axis=[1, 2]) / tf.to_float(tf.shape(inputs)[2]) - - def _leaky_relu(self, lrelu_slope, inputs): - """Applies leaky ReLu activation to the given inputs with the given slope. - - Args: - lrelu_slope: float; slope value for the activation function. - A slope of 0.0 defines a standard ReLu activation, while a positive - slope defines a leaky ReLu. - inputs: Tensor upon which to apply the activation function. - - Returns: - Tensor result of applying the activation function to the given inputs. - """ - with tf.variable_scope('leaky_relu_activation'): - return tf.maximum(lrelu_slope * inputs, inputs) - - def _dropout(self, inputs, keep_prob): - """Applies dropout to the given inputs. - - Args: - inputs: Tensor upon which to apply dropout. - keep_prob: float; probability with which to randomly retain values in - the given input. - - Returns: - Tensor result of applying dropout to the given inputs. - """ - with tf.variable_scope('dropout'): - if keep_prob < 1.0: - return tf.nn.dropout(inputs, keep_prob) - return inputs - - def build_graph(self, features, labels, mode, batch_size): - """Creates TensorFlow model graph. - - Args: - features: a dict of input features Tensors. - labels: a dict (by target name) of prediction labels. - mode: the ModeKey string. - batch_size: the integer batch size. - - Side Effect: - Adds the following key Tensors and operations as class attributes: - placeholders, global_step, train_op, summary_op, accuracy, - weighted_accuracy, loss, logits, and predictions. - """ - is_train = (mode == tf.estimator.ModeKeys.TRAIN) - read = features['sequence'] - - # Add a unitary dimension, so we can use conv2d. - read = tf.expand_dims(read, 1) - prev_out = read - - filters = zip(self.hparams.filter_widths, self.hparams.filter_depths) - for i, f in enumerate(filters): - with tf.variable_scope('convolution_' + str(i)): - if self.hparams.use_depthwise_separable: - p = self.hparams.pointwise_depths[i] - else: - p = None - conv_out = self._convolution( - prev_out, f, pointwise_dim=p, scale=self.hparams.weight_scale) - conv_act_out = self._leaky_relu(self.hparams.lrelu_slope, conv_out) - prev_out = ( - self._dropout(conv_act_out, self.hparams.keep_prob) - if is_train else conv_act_out) - - for i in xrange(self.hparams.num_fc_layers): - with tf.variable_scope('fully_connected_' + str(i)): - # Create a convolutional layer which is equivalent to a fully-connected - # layer when reads have length self.hparams.min_read_length. - # The convolution will tile the layer appropriately for longer reads. - biases = self._init_bias(self.hparams.num_fc_units) - if i == 0: - # Take entire min_read_length segment as input. - # Output a single value per min_read_length_segment. - filter_dimensions = (self.hparams.min_read_length, - self.hparams.num_fc_units) - else: - # Take single output value of previous layer as input. - filter_dimensions = (1, self.hparams.num_fc_units) - fc_out = biases + self._convolution( - prev_out, - filter_dimensions, - scale=self.hparams.weight_scale, - padding='VALID') - self._summary_histogram(biases.name.split(':')[0].split('/')[1], biases) - fc_act_out = self._leaky_relu(self.hparams.lrelu_slope, fc_out) - prev_out = ( - self._dropout(fc_act_out, self.hparams.keep_prob) - if is_train else fc_act_out) - - # Pool to collapse tiling for reads longer than hparams.min_read_length. - with tf.variable_scope('pool'): - pool_out = self._pool(prev_out, self.hparams.pooling_type) - - with tf.variable_scope('output'): - self._logits = {} - self._predictions = {} - self._weighted_accuracy = {} - self._accuracy = {} - self._loss = collections.OrderedDict() - - for target in self.targets: - with tf.variable_scope(target): - label = labels[target] - possible_labels = self.possible_labels[target] - weights = self._init_weights( - [pool_out.get_shape()[1].value, - len(possible_labels)], - self.hparams.weight_scale, - name='weights') - biases = self._init_bias(len(possible_labels)) - self._summary_histogram( - weights.name.split(':')[0].split('/')[1], weights) - self._summary_histogram( - biases.name.split(':')[0].split('/')[1], biases) - logits = tf.matmul(pool_out, weights) + biases - predictions = tf.nn.softmax(logits) - - gather_inds = tf.stack([tf.range(batch_size), label], axis=1) - self._weighted_accuracy[target] = tf.reduce_mean( - tf.gather_nd(predictions, gather_inds)) - argmax_prediction = tf.cast(tf.argmax(predictions, axis=1), tf.int32) - self._accuracy[target] = tf.reduce_mean( - tf.to_float(tf.equal(label, argmax_prediction))) - - losses = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=label, logits=logits) - self._loss[target] = tf.reduce_mean(losses) - self._logits[target] = logits - self._predictions[target] = predictions - - # Compute total loss - self._total_loss = tf.add_n(self._loss.values()) - - # Define the optimizer. - - # tf.estimator framework builds the global_step for us, but if we aren't - # using the framework we have to make it ourselves. - self._global_step = tf.train.get_or_create_global_step() - if self.hparams.lr_decay < 0: - self._learn_rate = self.hparams.lr_init - else: - self._learn_rate = tf.train.exponential_decay( - self.hparams.lr_init, - self._global_step, - int(self.hparams.train_steps), - self.hparams.lr_decay, - staircase=False) - if self.hparams.optimizer == 'adam': - opt = tf.train.AdamOptimizer(self._learn_rate, self.hparams.optimizer_hp) - elif self.hparams.optimizer == 'momentum': - opt = tf.train.MomentumOptimizer(self._learn_rate, - self.hparams.optimizer_hp) - if self.use_tpu: - opt = tf.contrib.tpu.CrossShardOptimizer(opt) - - gradients, variables = zip(*opt.compute_gradients(self._total_loss)) - clipped_gradients, _ = tf.clip_by_global_norm(gradients, - self.hparams.grad_clip_norm) - with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): - self._train_op = opt.apply_gradients( - zip(clipped_gradients, variables), global_step=self._global_step) - - if not self.use_tpu: - grad_norm = tf.global_norm(gradients) if is_train else None - param_norm = tf.global_norm(variables) if is_train else None - self._add_summaries(mode, grad_norm, param_norm) - - def model_fn(self, features, labels, mode, params): - """Function fulfilling the tf.estimator model_fn interface. - - Args: - features: a dict containing the input features for prediction. - labels: a dict from target name to Tensor-value prediction. - mode: the ModeKey string. - params: a dictionary of parameters for building the model; current params - are params["batch_size"]: the integer batch size. - - Returns: - A tf.estimator.EstimatorSpec object ready for use in training, inference. - or evaluation. - """ - self.build_graph(features, labels, mode, params['batch_size']) - - return tf.estimator.EstimatorSpec( - mode, - predictions=self.predictions, - loss=self.total_loss, - train_op=self.train_op, - eval_metric_ops={}) diff --git a/research/seq2species/configuration.py b/research/seq2species/configuration.py deleted file mode 100644 index a4dd626e27246a79292a80d165b199c42a154df5..0000000000000000000000000000000000000000 --- a/research/seq2species/configuration.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Defines hyperparameter configuration for ConvolutionalNet models. - -Specifically, provides methods for defining and initializing TensorFlow -hyperparameters objects for a convolutional model as defined in: -seq2species.build_model -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def parse_hparams(hparam_values='', num_filters=1): - """Initializes TensorFlow hyperparameters object with default values. - - In addition, default hyperparameter values are overwritten with the specified - ones, where necessary. - - Args: - hparam_values: comma-separated string of name=value pairs for setting - particular hyperparameters. - num_filters: int; number of filters in the model. - Must be fixed outside of hyperparameter/study object as Vizier does not - support having inter-hyperparameter dependencies. - - Returns: - tf.contrib.training.Hparams object containing the model's hyperparameters. - """ - hparams = tf.contrib.training.HParams() - - # Specify model architecture option. - hparams.add_hparam('use_depthwise_separable', True) - - # Specify number of model parameters. - hparams.add_hparam('filter_widths', [3] * num_filters) - hparams.add_hparam('filter_depths', [1] * num_filters) - hparams.add_hparam('pointwise_depths', [64] * num_filters) - hparams.add_hparam('num_fc_layers', 2) - hparams.add_hparam('num_fc_units', 455) - hparams.add_hparam('min_read_length', 100) - hparams.add_hparam('pooling_type', 'avg') - - # Specify activation options. - hparams.add_hparam('lrelu_slope', 0.0) # Negative slope for leaky relu. - - # Specify training options. - hparams.add_hparam('keep_prob', 1.0) - hparams.add_hparam('weight_scale', 1.0) - hparams.add_hparam('grad_clip_norm', 20.0) - hparams.add_hparam('lr_init', 0.001) - hparams.add_hparam('lr_decay', 0.1) - hparams.add_hparam('optimizer', 'adam') - # optimizer_hp is decay rate for 1st moment estimates for ADAM, and - # momentum for SGD. - hparams.add_hparam('optimizer_hp', 0.9) - hparams.add_hparam('train_steps', 400000) - - # Overwrite defaults with specified values. - hparams.parse(hparam_values) - return hparams diff --git a/research/seq2species/input.py b/research/seq2species/input.py deleted file mode 100644 index f1636c87501175d7a6940693e00ae5713a1780ff..0000000000000000000000000000000000000000 --- a/research/seq2species/input.py +++ /dev/null @@ -1,325 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Input pipe for feeding examples to a Seq2Label model graph.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -from google.protobuf import text_format - -from protos import seq2label_pb2 -import seq2label_utils - -DNA_BASES = tuple('ACGT') -NUM_DNA_BASES = len(DNA_BASES) -# Possible FASTA characters/IUPAC ambiguity codes. -# See https://en.wikipedia.org/wiki/Nucleic_acid_notation. -AMBIGUITY_CODES = { - 'K': 'GT', - 'M': 'AC', - 'R': 'AG', - 'Y': 'CT', - 'S': 'CG', - 'W': 'AT', - 'B': 'CGT', - 'V': 'ACG', - 'H': 'ACT', - 'D': 'AGT', - 'X': 'ACGT', - 'N': 'ACGT' -} - - -def load_dataset_info(dataset_info_path): - """Load a `Seq2LabelDatasetInfo` from a serialized text proto file.""" - dataset_info = seq2label_pb2.Seq2LabelDatasetInfo() - with tf.gfile.Open(dataset_info_path, 'r') as f: - text_format.Parse(f.read(), dataset_info) - return dataset_info - - -class _InputEncoding(object): - """A helper class providing the graph operations needed to encode input. - - Instantiation of an _InputEncoding will write on the default TF graph, so it - should only be instantiated inside the `input_fn`. - - Attributes: - mode: `tf.estimator.ModeKeys`; the execution mode {TRAIN, EVAL, INFER}. - targets: list of strings; the names of the labels of interest (e.g. - "species"). - dna_bases: a tuple of the recognized DNA alphabet. - n_bases: the size of the DNA alphabet. - all_characters: list of recognized alphabet, including ambiguity codes. - label_values: a tuple of strings, the possible label values of the - prediction target. - n_labels: the size of label_values - fixed_read_length: an integer value of the statically-known read length, or - None if the read length is to be determined dynamically. - """ - - def __init__(self, - dataset_info, - mode, - targets, - noise_rate=0.0, - fixed_read_length=None): - self.mode = mode - self.targets = targets - self.dna_bases = DNA_BASES - self.n_bases = NUM_DNA_BASES - self.all_characters = list(DNA_BASES) + sorted(AMBIGUITY_CODES.keys()) - self.character_encodings = np.concatenate( - [[self._character_to_base_distribution(char)] - for char in self.all_characters], - axis=0) - all_legal_label_values = seq2label_utils.get_all_label_values(dataset_info) - # TF lookup tables. - self.characters_table = tf.contrib.lookup.index_table_from_tensor( - mapping=self.all_characters) - self.label_tables = { - target: tf.contrib.lookup.index_table_from_tensor( - all_legal_label_values[target]) - for target in targets - } - self.fixed_read_length = fixed_read_length - self.noise_rate = noise_rate - - def _character_to_base_distribution(self, char): - """Maps the given character to a probability distribution over DNA bases. - - Args: - char: character to be encoded as a probability distribution over bases. - - Returns: - Array of size (self.n_bases,) representing the identity of the given - character as a distribution over the possible DNA bases, self.dna_bases. - - Raises: - ValueError: if the given character is not contained in the recognized - alphabet, self.all_characters. - """ - if char not in self.all_characters: - raise ValueError( - 'Base distribution requested for unrecognized character %s.' % char) - possible_bases = AMBIGUITY_CODES[char] if char in AMBIGUITY_CODES else char - base_indices = [self.dna_bases.index(base) for base in possible_bases] - probability_weight = 1.0 / len(possible_bases) - distribution = np.zeros((self.n_bases)) - distribution[base_indices] = probability_weight - return distribution - - def encode_read(self, string_seq): - """Converts the input read sequence to one-hot encoding. - - Args: - string_seq: tf.String; input read sequence. - - Returns: - Input read sequence as a one-hot encoded Tensor, with depth and ordering - of one-hot encoding determined by the given bases. Ambiguous characters - such as "N" and "S" are encoded as a probability distribution over the - possible bases they represent. - """ - with tf.variable_scope('encode_read'): - read = tf.string_split([string_seq], delimiter='').values - read = self.characters_table.lookup(read) - read = tf.cast(tf.gather(self.character_encodings, read), tf.float32) - if self.fixed_read_length: - read = tf.reshape(read, (self.fixed_read_length, self.n_bases)) - return read - - def encode_label(self, target, string_label): - """Converts the label value to an integer encoding. - - Args: - target: str; the target name. - string_label: tf.String; value of the label for the current input read. - - Returns: - Given label value as an index into the possible_target_values. - """ - with tf.variable_scope('encode_label/{}'.format(target)): - return tf.cast(self.label_tables[target].lookup(string_label), tf.int32) - - def _empty_label(self): - return tf.constant((), dtype=tf.int32, shape=()) - - def parse_single_tfexample(self, serialized_example): - """Parses a tf.train.Example proto to a one-hot encoded read, label pair. - - Injects noise into the incoming tf.train.Example's read sequence - when noise_rate is non-zero. - - Args: - serialized_example: string; the serialized tf.train.Example proto - containing the read sequence and label value of interest as - tf.FixedLenFeatures. - - Returns: - Tuple (features, labels) of dicts for the input features and prediction - targets. - """ - with tf.variable_scope('parse_single_tfexample'): - features_spec = {'sequence': tf.FixedLenFeature([], tf.string)} - for target in self.targets: - features_spec[target] = tf.FixedLenFeature([], tf.string) - features = tf.parse_single_example( - serialized_example, features=features_spec) - if self.noise_rate > 0.0: - read_sequence = tf.py_func(seq2label_utils.add_read_noise, - [features['sequence'], self.noise_rate], - (tf.string)) - else: - read_sequence = features['sequence'] - read_sequence = self.encode_read(read_sequence) - read_features = {'sequence': read_sequence} - if self.mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): - label = { - target: self.encode_label(target, features[target]) - for target in self.targets - } - else: - label = {target: self._empty_label() for target in self.targets} - return read_features, label - - -class InputDataset(object): - """A class providing access to input data for the Seq2Label model. - - Attributes: - mode: `tf.estimator.ModeKeys`; the execution mode {TRAIN, EVAL, INFER}. - targets: list of strings; the names of the labels of interest (e.g. - "species"). - dataset_info: a `Seq2LabelDatasetInfo` message reflecting the dataset - metadata. - initializer: the TF initializer op for the underlying iterator, which - will rewind the iterator. - is_train: Boolean indicating whether or not the execution mode is TRAIN. - """ - - def __init__(self, - mode, - targets, - dataset_info, - train_epochs=None, - noise_rate=0.0, - random_seed=None, - input_tfrecord_files=None, - fixed_read_length=None, - ensure_constant_batch_size=False, - num_parallel_calls=32): - """Constructor for InputDataset. - - Args: - mode: `tf.estimator.ModeKeys`; the execution mode {TRAIN, EVAL, INFER}. - targets: list of strings; the names of the labels of interest (e.g. - "species"). - dataset_info: a `Seq2LabelDatasetInfo` message reflecting the dataset - metadata. - train_epochs: the number of training epochs to perform, if mode==TRAIN. - noise_rate: float [0.0, 1.0] specifying rate at which to inject - base-flipping noise into the read sequences. - random_seed: seed to be used for shuffling, if mode==TRAIN. - input_tfrecord_files: a list of filenames for TFRecords of TF examples. - fixed_read_length: an integer value of the statically-known read length, - or None if the read length is to be determined dynamically. The read - length must be known statically for TPU execution. - ensure_constant_batch_size: ensure a constant batch size at the expense of - discarding the last "short" batch. This also gives us a statically - constant batch size, which is essential for e.g. the TPU platform. - num_parallel_calls: the number of dataset elements to process in parallel. - If None, elements will be processed sequentially. - """ - self.input_tfrecord_files = input_tfrecord_files - self.mode = mode - self.targets = targets - self.dataset_info = dataset_info - self._train_epochs = train_epochs - self._noise_rate = noise_rate - self._random_seed = random_seed - if random_seed is not None: - np.random.seed(random_seed) - self._fixed_read_length = fixed_read_length - self._ensure_constant_batch_size = ensure_constant_batch_size - self._num_parallel_calls = num_parallel_calls - - @staticmethod - def from_tfrecord_files(input_tfrecord_files, *args, **kwargs): - return InputDataset( - *args, input_tfrecord_files=input_tfrecord_files, **kwargs) - - @property - def is_train(self): - return self.mode == tf.estimator.ModeKeys.TRAIN - - def input_fn(self, params): - """Supplies input for the model. - - This function supplies input to our model as a function of the mode. - - Args: - params: a dictionary, containing: - - params['batch_size']: the integer batch size. - - Returns: - A tuple of two values as follows: - 1) the *features* dict, containing a tensor value for keys as follows: - - "sequence" - the encoded read input sequence. - 2) the *labels* dict. containing a key for `target`, whose value is: - - a string Tensor value (in TRAIN/EVAL mode), or - - a blank Tensor (PREDICT mode). - """ - randomize_input = self.is_train - batch_size = params['batch_size'] - - encoding = _InputEncoding( - self.dataset_info, - self.mode, - self.targets, - noise_rate=self._noise_rate, - fixed_read_length=self._fixed_read_length) - - dataset = tf.data.TFRecordDataset(self.input_tfrecord_files) - dataset = dataset.map( - encoding.parse_single_tfexample, - num_parallel_calls=self._num_parallel_calls) - - dataset = dataset.repeat(self._train_epochs if self.is_train else 1) - if randomize_input: - dataset = dataset.shuffle( - buffer_size=max(1000, batch_size), seed=self._random_seed) - - if self._ensure_constant_batch_size: - # Only take batches of *exactly* size batch_size; then we get a - # statically knowable batch shape. - dataset = dataset.batch(batch_size, drop_remainder=True) - else: - dataset = dataset.batch(batch_size) - - # Prefetch to allow infeed to be in parallel with model computations. - dataset = dataset.prefetch(2) - - # Use initializable iterator to support table lookups. - iterator = dataset.make_initializable_iterator() - self.initializer = iterator.initializer - tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) - - features, labels = iterator.get_next() - return (features, labels) diff --git a/research/seq2species/protos/BUILD b/research/seq2species/protos/BUILD deleted file mode 100644 index 5628d4c41a79fc3fb6fb61fc9089f1524c49f175..0000000000000000000000000000000000000000 --- a/research/seq2species/protos/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -# Protos for Tensorflow Seq2Species API. - -package( - default_visibility = ["//visibility:public"], -) - -py_proto_library( - name = "seq2label_py_pb2", - api_version = 2, - deps = [":seq2label_proto"], -) - -proto_library( - name = "seq2label_proto", - srcs = ["seq2label.proto"], -) diff --git a/research/seq2species/protos/__init__.py b/research/seq2species/protos/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/seq2species/protos/seq2label.proto b/research/seq2species/protos/seq2label.proto deleted file mode 100644 index 531c4ad75e06db119c547cde34323c08f772e7fa..0000000000000000000000000000000000000000 --- a/research/seq2species/protos/seq2label.proto +++ /dev/null @@ -1,49 +0,0 @@ -syntax = "proto2"; - -package seq2species.protos; - -// Summarizes metadata information for a dataset that can be used for running -// training or inference. -message Seq2LabelDatasetInfo { - // Summarizes all possible values for a given label in the dataset. - message LabelInfo { - optional string name = 1; - repeated string values = 2; - // Per-value weights used to normalize the classes in a dataset. - repeated float weights = 3; - } - repeated LabelInfo labels = 3; - // Length (in basepairs) of the reads in the dataset. - optional int32 read_length = 4; - // Stride (in number of basepairs) in the moving window. - optional int32 read_stride = 7; - // Total number of examples in the dataset. - optional int64 num_examples = 5; - // Full path to the dataset. - optional string dataset_path = 6; -} - -// Summarizes metadata information about a model trained on a Seq2Label dataset. -message Seq2LabelModelInfo { - optional string hparams_string = 1; - optional string model_type = 2; - repeated string targets = 3; - optional int32 num_filters = 4; - optional int32 batch_size = 5; - optional string metadata_path = 6; - optional float training_noise_rate = 7; -} - -// Summarizes resulting measures of modelling experiments. -message Seq2LabelExperimentMeasures { - optional string checkpoint_path = 1; - optional int64 steps = 2; - optional float wall_time = 3; - optional bool experiment_infeasible = 4; - - message Measure { - optional string name = 1; - optional float value = 2; - } - repeated Measure measures = 5; -} diff --git a/research/seq2species/run_training.py b/research/seq2species/run_training.py deleted file mode 100644 index f03bb09ecf8b1b65f9899f20cb91e3f0363a2f18..0000000000000000000000000000000000000000 --- a/research/seq2species/run_training.py +++ /dev/null @@ -1,293 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Defines training scheme for neural networks for Seq2Species prediction. - -Defines and runs the loop for training a (optionally) depthwise separable -convolutional model for predicting taxonomic labels from short reads of DNA. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -from absl import flags -import numpy as np -import tensorflow as tf -from google.protobuf import text_format - -import build_model -import configuration -import input as seq2species_input -from protos import seq2label_pb2 -import seq2label_utils - -# Define non-tunable parameters. -flags.DEFINE_integer('num_filters', 1, 'Number of filters for conv model') -flags.DEFINE_string('hparams', '', - 'Comma-separated list of name=value hyperparameter ' - "pairs ('hp1=value1,hp2=value2'). Unspecified " - 'hyperparameters will be filled with defaults.') -flags.DEFINE_integer('batch_size', 512, 'Size of batches during training.') -flags.DEFINE_integer('min_train_steps', 1000, - 'Minimum number of training steps to run.') -flags.DEFINE_float('max_task_loss', 10.0, - "Terminate trial if task loss doesn't fall below this " - 'within --min_train_steps.') -flags.DEFINE_integer('n_print_progress_every', 1000, - 'Print training progress every ' - '--n_print_progress_every global steps.') -flags.DEFINE_list('targets', ['species'], - 'Names of taxonomic ranks to use as training targets.') -flags.DEFINE_float( - 'noise_rate', 0.0, 'Rate [0.0, 1.0] at which to inject ' - 'base-flipping noise into input read sequences.') - -# Define paths to logs and data. -flags.DEFINE_list( - 'train_files', [], 'Full paths to the TFRecords containing the ' - 'training examples.') -flags.DEFINE_string( - 'metadata_path', '', 'Full path of the text proto containing configuration ' - 'information about the set of training examples.') -flags.DEFINE_string('logdir', '/tmp/seq2species', - 'Directory to which to write logs.') - -# Define supervisor/checkpointing options. -flags.DEFINE_integer('task', 0, 'Task ID of the replica running the training.') -flags.DEFINE_string('master', '', 'Name of the TF master to use.') -flags.DEFINE_integer( - 'save_model_secs', 900, 'Rate at which to save model parameters. ' - 'Set to 0 to disable checkpointing.') -flags.DEFINE_integer('recovery_wait_secs', 30, - 'Wait to recover model from checkpoint ' - 'before timing out.') -flags.DEFINE_integer('save_summaries_secs', 900, - 'Rate at which to save Tensorboard summaries.') -flags.DEFINE_integer('ps_tasks', 0, - 'Number of tasks in the ps job; 0 if no ps is used.') - -FLAGS = flags.FLAGS -RANDOM_SEED = 42 - - -def wait_until(time_sec): - """Stalls execution until a given time. - - Args: - time_sec: time, in seconds, until which to loop idly. - """ - while time.time() < time_sec: - pass - - -def update_measures(measures, new_measures, loss_val, max_loss=None): - """Updates tracking of experimental measures and infeasibilty. - - Args: - measures: dict; mapping from measure name to measure value. - new_measures: dict; mapping from measure name to new measure values. - loss_val: float; value of loss metric by which to determine fesibility. - max_loss: float; maximum value at which to consider the loss feasible. - - Side Effects: - Updates the given mapping of measures and values based on the current - experimental metrics stored in new_measures, and determines current - feasibility of the experiment based on the provided loss value. - """ - max_loss = max_loss if max_loss else np.finfo('f').max - measures['is_infeasible'] = ( - loss_val >= max_loss or not np.isfinite(loss_val)) - measures.update(new_measures) - - -def run_training(model, hparams, training_dataset, logdir, batch_size): - """Trains the given model on random mini-batches of reads. - - Args: - model: ConvolutionalNet instance containing the model graph and operations. - hparams: tf.contrib.training.Hparams object containing the model's - hyperparamters; see configuration.py for hyperparameter definitions. - training_dataset: an `InputDataset` that can feed labelled examples. - logdir: string; full path of directory to which to save checkpoints. - batch_size: integer batch size. - - Yields: - Tuple comprising a dictionary of experimental measures and the save path - for train checkpoints and summaries. - """ - input_params = dict(batch_size=batch_size) - features, labels = training_dataset.input_fn(input_params) - model.build_graph(features, labels, tf.estimator.ModeKeys.TRAIN, batch_size) - - is_chief = FLAGS.task == 0 - scaffold = tf.train.Scaffold( - saver=tf.train.Saver( - tf.global_variables(), - max_to_keep=5, - keep_checkpoint_every_n_hours=1.0), - init_op=tf.global_variables_initializer(), - summary_op=model.summary_op) - with tf.train.MonitoredTrainingSession( - master=FLAGS.master, - checkpoint_dir=logdir, - is_chief=is_chief, - scaffold=scaffold, - save_summaries_secs=FLAGS.save_summaries_secs, - save_checkpoint_secs=FLAGS.save_model_secs, - max_wait_secs=FLAGS.recovery_wait_secs) as sess: - global_step = sess.run(model.global_step) - print('Initialized model at global step ', global_step) - init_time = time.time() - measures = {'is_infeasible': False} - - if is_chief: - model_info = seq2label_utils.construct_seq2label_model_info( - hparams, 'conv', FLAGS.targets, FLAGS.metadata_path, FLAGS.batch_size, - FLAGS.num_filters, FLAGS.noise_rate) - write_message(model_info, os.path.join(logdir, 'model_info.pbtxt')) - - ops = [ - model.accuracy, model.weighted_accuracy, model.total_loss, - model.global_step, model.train_op - ] - - while not sess.should_stop() and global_step < hparams.train_steps: - accuracy, weighted_accuracy, loss, global_step, _ = sess.run(ops) - - def gather_measures(): - """Updates the measures dictionary from this batch.""" - new_measures = {'train_loss': loss, 'global_step': global_step} - for target in FLAGS.targets: - new_measures.update({ - ('train_accuracy/%s' % target): accuracy[target], - ('train_weighted_accuracy/%s' % target): weighted_accuracy[target] - }) - update_measures( - measures, new_measures, loss, max_loss=FLAGS.max_task_loss) - - # Periodically track measures according to current mini-batch performance. - - # Log a message. - if global_step % FLAGS.n_print_progress_every == 0: - log_message = ('\tstep: %d (%d sec), loss: %f' % - (global_step, time.time() - init_time, loss)) - for target in FLAGS.targets: - log_message += (', accuracy/%s: %f ' % (target, accuracy[target])) - log_message += (', weighted_accuracy/%s: %f ' % - (target, weighted_accuracy[target])) - print(log_message) - - # Gather new measures and update the measures dictionary. - gather_measures() - yield measures, scaffold.saver.last_checkpoints[-1] - - # Check for additional stopping criteria. - if not np.isfinite(loss) or (loss >= FLAGS.max_task_loss and - global_step > FLAGS.min_train_steps): - break - - # Always yield once at the end. - gather_measures() - yield measures, scaffold.saver.last_checkpoints[-1] - - -def write_message(message, filename): - """Writes contents of the given message to the given filename as a text proto. - - Args: - message: the proto message to save. - filename: full path of file to which to save the text proto. - - Side Effects: - Outputs a text proto file to the given filename. - """ - message_string = text_format.MessageToString(message) - with tf.gfile.GFile(filename, 'w') as f: - f.write(message_string) - - -def write_measures(measures, checkpoint_file, init_time): - """Writes performance measures to file. - - Args: - measures: dict; mapping from measure name to measure value. - checkpoint_file: string; full save path for checkpoints and summaries. - init_time: int; start time for work on the current experiment. - - Side Effects: - Writes given dictionary of performance measures for the current experiment - to a 'measures.pbtxt' file in the checkpoint directory. - """ - # Save experiment measures. - print('global_step: ', measures['global_step']) - experiment_measures = seq2label_pb2.Seq2LabelExperimentMeasures( - checkpoint_path=checkpoint_file, - steps=measures['global_step'], - experiment_infeasible=measures['is_infeasible'], - wall_time=time.time() - init_time) # Inaccurate for restarts. - for name, value in measures.iteritems(): - if name not in ['is_infeasible', 'global_step']: - experiment_measures.measures.add(name=name, value=value) - measures_file = os.path.join( - os.path.dirname(checkpoint_file), 'measures.pbtxt') - write_message(experiment_measures, measures_file) - print('Wrote ', measures_file, - ' containing the following experiment measures:\n', experiment_measures) - - -def main(unused_argv): - dataset_info = seq2species_input.load_dataset_info(FLAGS.metadata_path) - - init_time = time.time() - - # Determine model hyperparameters. - hparams = configuration.parse_hparams(FLAGS.hparams, FLAGS.num_filters) - print('Current Hyperparameters:') - for hp_name, hp_val in hparams.values().items(): - print('\t', hp_name, ': ', hp_val) - - # Initialize the model graph. - print('Constructing TensorFlow Graph.') - tf.reset_default_graph() - - input_dataset = seq2species_input.InputDataset.from_tfrecord_files( - FLAGS.train_files, - 'train', - FLAGS.targets, - dataset_info, - noise_rate=FLAGS.noise_rate, - random_seed=RANDOM_SEED) - - with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): - model = build_model.ConvolutionalNet( - hparams, dataset_info, targets=FLAGS.targets) - - # Run the experiment. - measures, checkpoint_file = None, None - print('Starting model training.') - for cur_measures, cur_file in run_training( - model, hparams, input_dataset, FLAGS.logdir, batch_size=FLAGS.batch_size): - measures, checkpoint_file = cur_measures, cur_file - - # Save experiment results. - write_measures(measures, checkpoint_file, init_time) - - -if __name__ == '__main__': - tf.app.run(main) diff --git a/research/seq2species/run_training_test.py b/research/seq2species/run_training_test.py deleted file mode 100644 index 754d2e0174e730f57309a52ab2b80d1e84e7ab15..0000000000000000000000000000000000000000 --- a/research/seq2species/run_training_test.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for run_training.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time - -from absl import flags -from absl.testing import absltest -from absl.testing import flagsaver -from absl.testing import parameterized -import numpy as np -import tensorflow as tf -from google.protobuf import text_format - -import run_training -from protos import seq2label_pb2 -import test_utils - -FLAGS = flags.FLAGS - - -class RunTrainingTest(parameterized.TestCase): - - @parameterized.parameters(2, 4, 7) - def test_wait_until(self, wait_sec): - end_time = time.time() + wait_sec - run_training.wait_until(end_time) - self.assertEqual(round(time.time() - end_time), 0) - - @parameterized.parameters( - ({}, {'a': 0.7, 'b': 12.3}, 12.3, None, - {'a': 0.7, 'b': 12.3, 'is_infeasible': False}), - ({'a': 0.42}, {'b': 24.5}, 24.5, 32.0, - {'a': 0.42, 'b': 24.5, 'is_infeasible': False}), - ({'a': 0.503}, {'a': 0.82, 'b': 7.2}, 7.2, 0.1, - {'a': 0.82, 'b': 7.2, 'is_infeasible': True}), - ({}, {'a': 0.7, 'b': 12.3}, float('Inf'), None, - {'a': 0.7, 'b': 12.3, 'is_infeasible': True}) - ) - def test_update_measures(self, measures, new_measures, loss, max_loss, - expected): - run_training.update_measures(measures, new_measures, loss, max_loss) - self.assertEqual(measures, expected) - - def test_write_measures(self): - init_time = time.time() - measures = { - 'global_step': 311448, - 'train_loss': np.float32(18.36), - 'train_weighted_accuracy': np.float32(0.3295), - 'train_accuracy': 0.8243, - 'is_infeasible': False - } - tmp_path = os.path.join(FLAGS.test_tmpdir, 'measures.pbtxt') - run_training.write_measures(measures, tmp_path, init_time) - experiment_measures = seq2label_pb2.Seq2LabelExperimentMeasures() - with tf.gfile.Open(tmp_path) as f: - text_format.Parse(f.read(), experiment_measures) - self.assertEqual(experiment_measures.checkpoint_path, tmp_path) - self.assertFalse(experiment_measures.experiment_infeasible) - self.assertEqual(experiment_measures.steps, measures['global_step']) - self.assertGreater(experiment_measures.wall_time, 0) - self.assertEqual(len(experiment_measures.measures), 3) - for measure in experiment_measures.measures: - self.assertAlmostEqual(measure.value, measures[measure.name]) - - @parameterized.parameters((test_utils.TEST_TARGETS[:1],), - (test_utils.TEST_TARGETS,)) - def test_run_training(self, targets): - """Tests whether the training loop can be run successfully. - - Generates test input files and runs the main driving code. - - Args: - targets: the targets to train on. - """ - # Create test input and metadata files. - num_examples, read_len = 20, 5 - train_file = test_utils.create_tmp_train_file(num_examples, read_len) - metadata_path = test_utils.create_tmp_metadata(num_examples, read_len) - - # Check that the training loop runs as expected. - logdir = os.path.join(FLAGS.test_tmpdir, 'train:{}'.format(len(targets))) - with flagsaver.flagsaver( - train_files=train_file, - metadata_path=metadata_path, - targets=targets, - logdir=logdir, - hparams='train_steps=10,min_read_length=5', - batch_size=10): - run_training.main(FLAGS) - # Check training loop ran by confirming existence of a checkpoint file. - self.assertIsNotNone(tf.train.latest_checkpoint(FLAGS.logdir)) - # Check training loop ran by confiming existence of a measures file. - self.assertTrue( - os.path.exists(os.path.join(FLAGS.logdir, 'measures.pbtxt'))) - - -if __name__ == '__main__': - absltest.main() diff --git a/research/seq2species/seq2label_utils.py b/research/seq2species/seq2label_utils.py deleted file mode 100644 index b975b7f17d0dfd4798ed798509e7b3d3447cfd6b..0000000000000000000000000000000000000000 --- a/research/seq2species/seq2label_utils.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for working with Seq2Label datasets and models. - -This library provides utilities for parsing and generating Seq2Label protos. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from protos import seq2label_pb2 - - -def get_all_label_values(dataset_info): - """Retrieves possible values for modeled labels from a `Seq2LabelDatasetInfo`. - - Args: - dataset_info: a `Seq2LabelDatasetInfo` message. - - Returns: - A dictionary mapping each label name to a tuple of its permissible values. - """ - return { - label_info.name: tuple(label_info.values) - for label_info in dataset_info.labels - } - - -def construct_seq2label_model_info(hparams, model_type, targets, metadata_path, - batch_size, num_filters, - training_noise_rate): - """Constructs a Seq2LabelModelInfo proto with the given properties. - - Args: - hparams: initialized tf.contrib.training.Hparams object. - model_type: string; descriptive tag indicating type of model, ie. "conv". - targets: list of names of the targets the model is trained to predict. - metadata_path: string; full path to Seq2LabelDatasetInfo text proto used - to initialize the model. - batch_size: int; number of reads per mini-batch. - num_filters: int; number of filters for convolutional model. - training_noise_rate: float; rate [0.0, 1.0] of base-flipping noise injected - into input read sequenced at training time. - - Returns: - The Seq2LabelModelInfo proto with the hparams, model_type, targets, - num_filters, batch_size, metadata_path, and training_noise_rate fields - set to the given values. - """ - return seq2label_pb2.Seq2LabelModelInfo( - hparams_string=hparams.to_json(), - model_type=model_type, - targets=sorted(targets), - num_filters=num_filters, - batch_size=batch_size, - metadata_path=metadata_path, - training_noise_rate=training_noise_rate) - - -def add_read_noise(read, base_flip_probability=0.01): - """Adds base-flipping noise to the given read sequence. - - Args: - read: string; the read sequence to which to add noise. - base_flip_probability: float; probability of a base flip at each position. - - Returns: - The given read with base-flipping noise added at the provided - base_flip_probability rate. - """ - base_flips = np.random.binomial(1, base_flip_probability, len(read)) - if sum(base_flips) == 0: - return read - - read = np.array(list(read)) - possible_mutations = np.char.replace(['ACTG'] * sum(base_flips), - read[base_flips == 1], '') - mutations = map(np.random.choice, map(list, possible_mutations)) - read[base_flips == 1] = mutations - return ''.join(read) diff --git a/research/seq2species/test_utils.py b/research/seq2species/test_utils.py deleted file mode 100644 index f02798fb533c99c8545e3a51f56de31b629ca2f4..0000000000000000000000000000000000000000 --- a/research/seq2species/test_utils.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility methods for accessing and operating on test data.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from absl import flags -import tensorflow as tf -from google.protobuf import text_format - -import input as seq2species_input -from protos import seq2label_pb2 - -FLAGS = flags.FLAGS - -# Target names included in the example inputs. -TEST_TARGETS = ['test_target_1', 'test_target_2'] - - -def _as_bytes_feature(in_string): - """Converts the given string to a tf.train.BytesList feature. - - Args: - in_string: string to be converted to BytesList Feature. - - Returns: - The TF BytesList Feature representing the given string. - """ - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[in_string])) - - -def create_tmp_train_file(num_examples, - read_len, - characters=seq2species_input.DNA_BASES, - name='test.tfrecord'): - """Write a test TFRecord of input examples to temporary test directory. - - The generated input examples are test tf.train.Example protos, each comprised - of a toy sequence of length read_len and non-meaningful labels for targets in - TEST_TARGETS. - - Args: - num_examples: int; number of examples to write to test input file. - read_len: int; length of test read sequences. - characters: string; set of characters from which to construct test reads. - Defaults to canonical DNA bases. - name: string; filename for the test input file. - - Returns: - Full path to the generated temporary test input file. - """ - tmp_path = os.path.join(FLAGS.test_tmpdir, name) - with tf.python_io.TFRecordWriter(tmp_path) as writer: - for i in xrange(num_examples): - char = characters[i % len(characters)] - features_dict = {'sequence': _as_bytes_feature(char * read_len)} - for target_name in TEST_TARGETS: - nonsense_label = _as_bytes_feature(str(i)) - features_dict[target_name] = nonsense_label - tf_features = tf.train.Features(feature=features_dict) - example = tf.train.Example(features=tf_features) - writer.write(example.SerializeToString()) - return tmp_path - - -def create_tmp_metadata(num_examples, read_len): - """Write a test Seq2LabelDatasetInfo test proto to temporary test directory. - - Args: - num_examples: int; number of example labels to write into test metadata. - read_len: int; length of test read sequences. - - Returns: - Full path to the generated temporary test file containing the - Seq2LabelDatasetInfo text proto. - """ - dataset_info = seq2label_pb2.Seq2LabelDatasetInfo( - read_length=read_len, - num_examples=num_examples, - read_stride=1, - dataset_path='test.tfrecord') - - for target in TEST_TARGETS: - dataset_info.labels.add( - name=target, values=[str(i) for i in xrange(num_examples)]) - - tmp_path = os.path.join(FLAGS.test_tmpdir, 'test.pbtxt') - with tf.gfile.GFile(tmp_path, 'w') as f: - f.write(text_format.MessageToString(dataset_info)) - return tmp_path diff --git a/research/seq_flow_lite/.bazelrc b/research/seq_flow_lite/.bazelrc new file mode 100644 index 0000000000000000000000000000000000000000..73cf8a5a7a9ddcdee386e5ece97dcf4c004ecd60 --- /dev/null +++ b/research/seq_flow_lite/.bazelrc @@ -0,0 +1,82 @@ +# gRPC using libcares in opensource has some issues. +build --define=grpc_no_ares=true + +# Suppress all warning messages. +build:short_logs --output_filter=DONT_MATCH_ANYTHING + +# Force python3 +build --action_env=PYTHON_BIN_PATH=/usr/bin/python3 +build --repo_env=PYTHON_BIN_PATH=/usr/bin/python3 +build --python_path=/usr/bin/python3 + +# Enable using platform specific build settings +build --enable_platform_specific_config + +# Flag to enable remote config. Required starting from TF 2.2. +common --experimental_repo_remote_exec + +build:manylinux2010 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:toolchain + +build -c opt +build --cxxopt="-std=c++14" +build --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" +build --auto_output_filter=subpackages +build --copt="-Wall" --copt="-Wno-sign-compare" +build --linkopt="-lrt -lm" + +# TF isn't built in dbg mode, so our dbg builds will segfault due to inconsistency +# of defines when using tf's headers. In particular in refcount.h. +build --cxxopt="-DNDEBUG" + + +build --define=use_fast_cpp_protos=true +build --define=allow_oversize_protos=true + +build --spawn_strategy=standalone +build -c opt + +# Adding "--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0" creates parity with TF +# compilation options. It also addresses memory use due to +# copy-on-write semantics of std::strings of the older ABI. +build --cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0 + +# Make Bazel print out all options from rc files. +build --announce_rc + +# Other build flags. +build --define=grpc_no_ares=true + +# See https://github.com/bazelbuild/bazel/issues/7362 for information on what +# --incompatible_remove_legacy_whole_archive flag does. +# This flag is set to true in Bazel 1.0 and newer versions. We tried to migrate +# Tensorflow to the default, however test coverage wasn't enough to catch the +# errors. +# There is ongoing work on Bazel team's side to provide support for transitive +# shared libraries. As part of migrating to transitive shared libraries, we +# hope to provide a better mechanism for control over symbol exporting, and +# then tackle this issue again. +# +# TODO: Remove this line once TF doesn't depend on Bazel wrapping all library +# archives in -whole_archive -no_whole_archive. +build --noincompatible_remove_legacy_whole_archive + +# These are bazel 2.0's incompatible flags. Tensorflow needs to use bazel 2.0.0 +# to use cc_shared_library, as part of the Tensorflow Build Improvements RFC: +# https://github.com/tensorflow/community/pull/179 +build --noincompatible_prohibit_aapt1 + +# Build TF with C++ 17 features. +build:c++17 --cxxopt=-std=c++1z +build:c++17 --cxxopt=-stdlib=libc++ +build:c++1z --config=c++17 + +# Enable using platform specific build settings, except when cross-compiling for +# mobile platforms. +build --enable_platform_specific_config + + +# Options from ./configure +try-import %workspace%/.tf_configure.bazelrc + +# Put user-specific options in .bazelrc.user +try-import %workspace%/.bazelrc.user diff --git a/research/seq_flow_lite/BUILD b/research/seq_flow_lite/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..e3d4564aa86e0e577a726dbcf90087752c334999 --- /dev/null +++ b/research/seq_flow_lite/BUILD @@ -0,0 +1,54 @@ +licenses(["notice"]) + +package( + default_visibility = [":friends"], +) + +package_group( + name = "friends", + packages = [ + "//...", + ], +) + +py_library( + name = "metric_functions", + srcs = ["metric_functions.py"], + srcs_version = "PY3", +) + +py_library( + name = "input_fn_reader", + srcs = ["input_fn_reader.py"], + srcs_version = "PY3", + deps = [ + "//layers:projection_layers", + ], +) + +py_binary( + name = "trainer", + srcs = ["trainer.py"], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + ":input_fn_reader", + ":metric_functions", + "//models:prado", + ], +) + +py_binary( + name = "export_to_tflite", + srcs = ["export_to_tflite.py"], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + ":input_fn_reader", + ":metric_functions", + "//layers:base_layers", + "//layers:projection_layers", + "//models:prado", + "//utils:tflite_utils", + ], +) diff --git a/research/seq_flow_lite/CONTRIBUTING.md b/research/seq_flow_lite/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..9a86ba025d83dfc39db62d84ee7cac0a9da1ac08 --- /dev/null +++ b/research/seq_flow_lite/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +## Community Guidelines + +This project follows +[Google's Open Source Community Guidelines](https://opensource.google/conduct/). diff --git a/research/seq_flow_lite/README.md b/research/seq_flow_lite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..93ca02cccb00f83100d4d61b9b2b5ae5c9f63310 --- /dev/null +++ b/research/seq_flow_lite/README.md @@ -0,0 +1,87 @@ +# Sequence Projection Models + +This repository contains implementation of the following papers. + +* [*PRADO: Projection Attention Networks for Document Classification On-Device*](https://www.aclweb.org/anthology/D19-1506/) +* [*Self-Governing Neural Networks for On-Device Short Text Classification*](https://www.aclweb.org/anthology/D18-1105/) + +## Description + +We provide a family of models that projects sequence to fixed sized features. +The idea behind is to build embedding-free models that minimize the model size. +Instead of using embedding table to lookup embeddings, sequence projection +models computes them on the fly. + + +## History + +### August 24, 2020 +* Add PRADO and SGNN implementation. + +## Authors or Maintainers + +* Prabhu Kaliamoorthi +* Yicheng Fan ([@thunderfyc](https://github.com/thunderfyc)) + + +## Requirements + +[![TensorFlow 2.3](https://img.shields.io/badge/TensorFlow-2.3-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.3.0) +[![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/) + + +## Training + +Train a PRADO model on civil comments dataset + +```shell +bazel run -c opt :trainer -- \ +--config_path=$(pwd)/configs/civil_comments_prado.txt \ +--runner_mode=train --logtostderr --output_dir=/tmp/prado +``` + +Train a SGNN model to detect languages: + +```shell +bazel run -c opt sgnn:train -- --logtostderr --output_dir=/tmp/sgnn +``` + +## Evaluation + +Evaluate PRADO model: + +```shell +bazel run -c opt :trainer -- \ +--config_path=$(pwd)/configs/civil_comments_prado.txt \ +--runner_mode=eval --logtostderr --output_dir=/tmp/prado +``` + +Evaluate SGNN model: +```shell +bazel run -c opt sgnn:run_tflite -- --model=/tmp/sgnn/model.tflite "Hello world" +``` + + +## References + +1. **Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift**
+ Sergey Ioffe, Christian Szegedy
+ [[link]](https://arxiv.org/abs/1502.03167). In ICML, 2015. + +2. **Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference**
+ Benoit Jacob, Skirmantas Kligys, Bo Chen, Menglong Zhu, Matthew Tang, Andrew Howard, Hartwig Adam, Dmitry Kalenichenko
+ [[link]](https://arxiv.org/abs/1712.05877). In CVPR, 2018. + +3. **PRADO: Projection Attention Networks for Document Classification On-Device**
+ Prabhu Kaliamoorthi, Sujith Ravi, Zornitsa Kozareva
+ [[link]](https://www.aclweb.org/anthology/D19-1506/). In EMNLP-IJCNLP, 2019 + +4. **Self-Governing Neural Networks for On-Device Short Text Classification**
+ Sujith Ravi, Zornitsa Kozareva
+ [[link]](https://www.aclweb.org/anthology/D18-1105). In EMNLP, 2018 + +## License + +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) + +This project is licensed under the terms of the **Apache License 2.0**. diff --git a/research/seq_flow_lite/WORKSPACE b/research/seq_flow_lite/WORKSPACE new file mode 100644 index 0000000000000000000000000000000000000000..d29ee07c65b810015f3f54c9aff18c8f3b302db6 --- /dev/null +++ b/research/seq_flow_lite/WORKSPACE @@ -0,0 +1,185 @@ +workspace(name = "tensorflow_models_seq_flow_lite") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@//third_party/py:python_configure.bzl", "python_configure") + + +http_archive( + name = "io_bazel_rules_closure", + sha256 = "5b00383d08dd71f28503736db0500b6fb4dda47489ff5fc6bed42557c07c6ba9", + strip_prefix = "rules_closure-308b05b2419edb5c8ee0471b67a40403df940149", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", + "https://github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", # 2019-06-13 + ], +) + +http_archive( + name = "org_tensorflow", + sha256 = "fc6d7c57cd9427e695a38ad00fb6ecc3f623bac792dd44ad73a3f85b338b68be", + strip_prefix = "tensorflow-8a4ffe2e1ae722cff5306778df0cfca8b7f503fe", + urls = [ + "https://github.com/tensorflow/tensorflow/archive/8a4ffe2e1ae722cff5306778df0cfca8b7f503fe.tar.gz", + ], +) + + +http_archive( + name = "org_tflite_support", + strip_prefix = "tflite-support-0861599711ef31de58f62ed3ff6bbcc1e4817ef6", + sha256 = "ef5e33d00930f3b0bad843d550476049faa3c77ca598dbb94acf81d01ba8badd", + urls = ["https://github.com/tensorflow/tflite-support/archive/0861599711ef31de58f62ed3ff6bbcc1e4817ef6.zip"], +) + +http_archive( + name = "org_tensorflow_text", + sha256 = "f64647276f7288d1b1fe4c89581d51404d0ce4ae97f2bcc4c19bd667549adca8", + strip_prefix = "text-2.2.0", + urls = [ + "https://github.com/tensorflow/text/archive/v2.2.0.zip", + ], + patches = ["@//third_party:tensorflow_text_fix_local_config_tf.patch"], + patch_args = ["-p1"], + repo_mapping = {"@com_google_re2": "@com_googlesource_code_re2"}, +) + +load("//tf_ops:repo.bzl", "cc_tf_configure", "reverb_protoc_deps") +cc_tf_configure() +PROTOC_VERSION = "3.9.0" +PROTOC_SHA256 = "15e395b648a1a6dda8fd66868824a396e9d3e89bc2c8648e3b9ab9801bea5d55" +reverb_protoc_deps(version = PROTOC_VERSION, sha256 = PROTOC_SHA256) + +# ABSL cpp library. +http_archive( + name = "com_google_absl", + sha256 = "f368a8476f4e2e0eccf8a7318b98dafbe30b2600f4e3cf52636e5eb145aba06a", # SHARED_ABSL_SHA + strip_prefix = "abseil-cpp-df3ea785d8c30a9503321a3d35ee7d35808f190d", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz", + "https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz", + ], +) + +http_archive( + name = "rules_cc", + strip_prefix = "rules_cc-master", + urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"], +) + +# GoogleTest/GoogleMock framework. Used by most unit-tests. +http_archive( + name = "com_google_googletest", + urls = ["https://github.com/google/googletest/archive/master.zip"], + strip_prefix = "googletest-master", +) + +# gflags needed by glog +http_archive( + name = "com_github_gflags_gflags", + sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe", + strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a", + urls = [ + "https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz", + "https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz", + ], +) + +# glog +http_archive( + name = "com_google_glog", + sha256 = "f28359aeba12f30d73d9e4711ef356dc842886968112162bc73002645139c39c", + strip_prefix = "glog-0.4.0", + urls = ["https://github.com/google/glog/archive/v0.4.0.tar.gz"], +) + +http_archive( + name = "absl_py", + sha256 = "603febc9b95a8f2979a7bdb77d2f5e4d9b30d4e0d59579f88eba67d4e4cc5462", + strip_prefix = "abseil-py-pypi-v0.9.0", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz", + "https://github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz", + ], +) + +http_archive( + name = "utf_archive", + build_file = "@//third_party:utf.BUILD", + sha256 = "262a902f622dcd28e05b8a4be10da0aa3899050d0be8f4a71780eed6b2ea65ca", + urls = [ + "https://mirror.bazel.build/9fans.github.io/plan9port/unix/libutf.tgz", + "https://9fans.github.io/plan9port/unix/libutf.tgz", + ], +) + + +#----------------------------------------------------------------------------- +# proto +#----------------------------------------------------------------------------- +# proto_library, cc_proto_library and java_proto_library rules implicitly depend +# on @com_google_protobuf//:proto, @com_google_protobuf//:cc_toolchain and +# @com_google_protobuf//:java_toolchain, respectively. +# This statement defines the @com_google_protobuf repo. +http_archive( + name = "com_google_protobuf", + strip_prefix = "protobuf-3.8.0", + urls = ["https://github.com/google/protobuf/archive/v3.8.0.zip"], + sha256 = "1e622ce4b84b88b6d2cdf1db38d1a634fe2392d74f0b7b74ff98f3a51838ee53", +) + +load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") +flatbuffers() + +load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace") +tf_workspace(tf_repo_name = "org_tensorflow") + + +# TF submodule compilation doesn't take care of grpc deps. Do it manually here. +load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") +grpc_deps() + +load( + "@build_bazel_rules_apple//apple:repositories.bzl", + "apple_rules_dependencies", +) +apple_rules_dependencies() + +load( + "@build_bazel_apple_support//lib:repositories.bzl", + "apple_support_dependencies", +) +apple_support_dependencies() + +load("@upb//bazel:repository_defs.bzl", "bazel_version_repository") +bazel_version_repository(name = "bazel_version") + + +# Set up Android. +load("//third_party/android:android_configure.bzl", "android_configure") +android_configure(name="local_config_android") +load("@local_config_android//:android.bzl", "android_workspace") +android_workspace() + +python_configure(name = "local_config_python") + +new_git_repository( + name = "icu4c", + tag = "release-66-1", + remote = "https://github.com/unicode-org/icu", + build_file = "@//third_party:icu.BUILD", + patch_cmds = [ + "find . -type f -exec sed -i 's/#\s*include \"unicode/#include \"icu4c\/source\/common\/unicode/g' {} \;", + ], +) + + +http_archive( + name = "farmhash_archive", + build_file = "//third_party:farmhash.BUILD", + sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", # SHARED_FARMHASH_SHA + strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz", + "https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz", + ], +) diff --git a/research/seq_flow_lite/colab/BUILD b/research/seq_flow_lite/colab/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..b234bf3ca67bafd5b11573dabef22ad5967fbea2 --- /dev/null +++ b/research/seq_flow_lite/colab/BUILD @@ -0,0 +1,7 @@ +sh_binary( + name = "move_ops", + srcs = ["move_ops.sh"], + data = [ + "//tf_ops:sequence_string_projection_op_py", + ], +) diff --git a/research/seq_flow_lite/colab/move_ops.sh b/research/seq_flow_lite/colab/move_ops.sh new file mode 100755 index 0000000000000000000000000000000000000000..a1300bf62137ae12780dcb1a9f941ff946873aba --- /dev/null +++ b/research/seq_flow_lite/colab/move_ops.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +RUNFILES_DIR=$(pwd) +cp -f "${RUNFILES_DIR}/tf_ops/libsequence_string_projection_op_py_gen_op.so" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" +cp -f "${RUNFILES_DIR}/tf_ops/sequence_string_projection_op.py" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" + diff --git a/research/seq_flow_lite/colab/setup.py b/research/seq_flow_lite/colab/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..f4604e3d0618f9e458ea47601f82b8dcc7ccb304 --- /dev/null +++ b/research/seq_flow_lite/colab/setup.py @@ -0,0 +1,53 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +import subprocess +from setuptools import find_packages +from setuptools import setup +from distutils import spawn +from distutils.command import build + + +class _BuildCommand(build.build): + sub_commands = [ + ('bazel_build', lambda self: True), + ] + build.build.sub_commands + + +class _BazelBuildCommand(setuptools.Command): + + def initialize_options(self): + pass + + def finalize_options(self): + self._bazel_cmd = spawn.find_executable('bazel') + + def run(self): + subprocess.check_call( + [self._bazel_cmd, 'run', '-c', 'opt', '//colab:move_ops'], + cwd=os.path.dirname(os.path.realpath(__file__))) + + +setup( + name='seq_flow_lite', + version='0.1', + packages=['tf_ops'], + package_data={'': ['*.so']}, + cmdclass={ + 'build': _BuildCommand, + 'bazel_build': _BazelBuildCommand, + }, + description='Test') diff --git a/research/seq_flow_lite/colab/setup_workspace.sh b/research/seq_flow_lite/colab/setup_workspace.sh new file mode 100755 index 0000000000000000000000000000000000000000..de76d02a55296c03087f6c6099e0cc65dfa6e652 --- /dev/null +++ b/research/seq_flow_lite/colab/setup_workspace.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +cd "$(dirname "$0")" +mv setup.py .. +touch ../tf_ops/__init__.py diff --git a/research/seq_flow_lite/configs/civil_comments_prado.txt b/research/seq_flow_lite/configs/civil_comments_prado.txt new file mode 100644 index 0000000000000000000000000000000000000000..b15f5b91741f2557abed89c48592921dc2c4ef9a --- /dev/null +++ b/research/seq_flow_lite/configs/civil_comments_prado.txt @@ -0,0 +1,27 @@ +{ + "model_config" : { + "labels": ["identity_attack", "insult", "obscene", "severe_toxicity", "sexual_explicit", "threat", "toxicity"], + "multilabel": true, + "quantize": true, + "max_seq_len": 128, + "max_seq_len_inference": 128, + "split_on_space": true, + "embedding_regularizer_scale": 35e-3, + "embedding_size": 64, + "bigram_channels": 64, + "trigram_channels": 64, + "feature_size": 512, + "network_regularizer_scale": 1e-4, + "keep_prob": 0.5, + "distortion_probability": 0.25 + }, + "name": "models.prado", + "batch_size": 1024, + "save_checkpoints_steps": 100, + "train_steps": 100000, + "learning_rate": 1e-3, + "learning_rate_decay_steps": 42000, + "learning_rate_decay_rate": 0.7, + "iterations_per_loop": 100, + "dataset": "civil_comments" +} diff --git a/research/seq_flow_lite/configs/go_emotion_prado.txt b/research/seq_flow_lite/configs/go_emotion_prado.txt new file mode 100644 index 0000000000000000000000000000000000000000..c159c50ee9a0a3c1d6e0834cab51a08ba1e60055 --- /dev/null +++ b/research/seq_flow_lite/configs/go_emotion_prado.txt @@ -0,0 +1,28 @@ +{ + "model_config" : { + "labels": ["admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"], + "multilabel": true, + "quantize": true, + "max_seq_len": 128, + "max_seq_len_inference": 128, + "split_on_space": true, + "embedding_regularizer_scale": 35e-3, + "embedding_size": 64, + "bigram_channels": 64, + "trigram_channels": 64, + "feature_size": 512, + "network_regularizer_scale": 1e-4, + "keep_prob": 0.5, + "distortion_probability": 0.0 + }, + "name": "models.prado", + "batch_size": 1024, + "save_checkpoints_steps": 100, + "train_steps": 100000, + "learning_rate": 0.0006, + "learning_rate_decay_steps": 340, + "learning_rate_decay_rate": 0.7, + "iterations_per_loop": 100, + "dataset": "goemotions" +} + diff --git a/research/seq_flow_lite/demo/colab/BUILD b/research/seq_flow_lite/demo/colab/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..b23440bda97c9f2ba34d558503196bdff1c938ce --- /dev/null +++ b/research/seq_flow_lite/demo/colab/BUILD @@ -0,0 +1,9 @@ +sh_binary( + name = "move_ops", + srcs = ["move_ops.sh"], + data = [ + "//tf_ops:sequence_string_projection_op_py", + "//tf_ops:sequence_string_projection_op_v2_py", + "//tf_ops:tf_custom_ops_py", + ], +) diff --git a/research/seq_flow_lite/demo/colab/move_ops.sh b/research/seq_flow_lite/demo/colab/move_ops.sh new file mode 100755 index 0000000000000000000000000000000000000000..5018797cf198295ff6b86ef6bdd2832a4fe966f8 --- /dev/null +++ b/research/seq_flow_lite/demo/colab/move_ops.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +RUNFILES_DIR=$(pwd) +cp -f "${RUNFILES_DIR}/tf_ops/libsequence_string_projection_op_py_gen_op.so" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" +cp -f "${RUNFILES_DIR}/tf_ops/sequence_string_projection_op.py" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" + +cp -f "${RUNFILES_DIR}/tf_ops/libsequence_string_projection_op_v2_py_gen_op.so" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" +cp -f "${RUNFILES_DIR}/tf_ops/sequence_string_projection_op_v2.py" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" + +cp -f "${RUNFILES_DIR}/tf_ops/libtf_custom_ops_py_gen_op.so" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" +cp -f "${RUNFILES_DIR}/tf_ops/tf_custom_ops_py.py" \ + "${BUILD_WORKSPACE_DIRECTORY}/tf_ops" + diff --git a/research/seq_flow_lite/demo/colab/setup.py b/research/seq_flow_lite/demo/colab/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..691e4c78c5c2805eb1f588dae9c57d47080313f2 --- /dev/null +++ b/research/seq_flow_lite/demo/colab/setup.py @@ -0,0 +1,53 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from distutils import spawn +from distutils.command import build +import os +import subprocess + +import setuptools + + +class _BuildCommand(build.build): + sub_commands = [ + ('bazel_build', lambda self: True), + ] + build.build.sub_commands + + +class _BazelBuildCommand(setuptools.Command): + + def initialize_options(self): + pass + + def finalize_options(self): + self._bazel_cmd = spawn.find_executable('bazel') + + def run(self): + subprocess.check_call( + [self._bazel_cmd, 'run', '-c', 'opt', '//demo/colab:move_ops'], + cwd=os.path.dirname(os.path.realpath(__file__))) + + +setuptools.setup( + name='seq_flow_lite', + version='0.1', + packages=['tf_ops'], + package_data={'': ['*.so']}, + cmdclass={ + 'build': _BuildCommand, + 'bazel_build': _BazelBuildCommand, + }, + description='Test') diff --git a/research/seq_flow_lite/demo/colab/setup_workspace.sh b/research/seq_flow_lite/demo/colab/setup_workspace.sh new file mode 100755 index 0000000000000000000000000000000000000000..ec71325c57f5e90750b2d378016ff7a8327c31ce --- /dev/null +++ b/research/seq_flow_lite/demo/colab/setup_workspace.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +cd "$(dirname "$0")" +mv setup.py ../.. +touch ../../tf_ops/__init__.py diff --git a/research/seq_flow_lite/demo/prado/BUILD b/research/seq_flow_lite/demo/prado/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..26f2e718151ca97861b578cfa7dd96fd2cb65a76 --- /dev/null +++ b/research/seq_flow_lite/demo/prado/BUILD @@ -0,0 +1,22 @@ +# A demo app for invoking a PRADO TFLite model. + +licenses(["notice"]) + +package( + default_visibility = ["//:friends"], # sequence projection +) + +cc_binary( + name = "prado_tflite_example", + srcs = ["prado_tflite_example.cc"], + data = [ + "data/tflite.fb", + ], + deps = [ + "@org_tensorflow//tensorflow/lite:framework", + "@org_tensorflow//tensorflow/lite:string_util", + "//tflite_ops:expected_value", # sequence projection + "//tflite_ops:quantization_util", # sequence projection + "//tflite_ops:sequence_string_projection", # sequence projection + ], +) diff --git a/research/seq_flow_lite/demo/prado/data/tflite.fb b/research/seq_flow_lite/demo/prado/data/tflite.fb new file mode 100644 index 0000000000000000000000000000000000000000..70fb9c2bb28b5cf5fae5a39c95e576145f7b19ad Binary files /dev/null and b/research/seq_flow_lite/demo/prado/data/tflite.fb differ diff --git a/research/seq_flow_lite/demo/prado/prado_tflite_example.cc b/research/seq_flow_lite/demo/prado/prado_tflite_example.cc new file mode 100644 index 0000000000000000000000000000000000000000..1fa523a892bcf77ad32c3283b98baae61e985e6e --- /dev/null +++ b/research/seq_flow_lite/demo/prado/prado_tflite_example.cc @@ -0,0 +1,150 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/string_util.h" +#include "tflite_ops/expected_value.h" // seq_flow_lite +#include "tflite_ops/quantization_util.h" // seq_flow_lite +#include "tflite_ops/sequence_string_projection.h" // seq_flow_lite + +namespace { +const int kTextInput = 0; +const int kClassOutput = 0; +const int kNumberOfInputs = 1; +const int kNumberOfOutputs = 1; +const int kClassOutputRank = 2; +const int kClassOutputBatchSizeIndex = 0; +const int kBatchSize = 1; +const int kClassOutputClassIndex = 1; +constexpr char kTfliteDemoFile[] = + "demo/prado/data/tflite.fb"; + +std::unique_ptr CreateInterpreter( + const std::string& tflite_flat_buffer) { + // This pointer points to a memory location contained in tflite_flat_buffer, + // hence it need not be deleted. + const tflite::Model* model = tflite::GetModel(tflite_flat_buffer.data()); + std::unique_ptr interpreter; + tflite::ops::builtin::BuiltinOpResolver resolver; + resolver.AddCustom( + "SEQUENCE_STRING_PROJECTION", + tflite::ops::custom::Register_SEQUENCE_STRING_PROJECTION()); + resolver.AddCustom("ExpectedValueOp", + tflite::ops::custom::Register_EXPECTED_VALUE()); + tflite::InterpreterBuilder(model, resolver, + /*error_reporter=*/nullptr)(&interpreter); + if (!interpreter) { + std::cout << "Unable to create tflite interpreter\n"; + } + return interpreter; +} + +std::vector InvokeModel( + const std::string& text, + std::unique_ptr& interpreter) { + std::vector classes; + auto inputs = interpreter->inputs(); + if (inputs.size() != kNumberOfInputs) { + std::cerr << "Model does not accept the right number of inputs."; + return classes; + } + // Set input to the model. + TfLiteTensor* input = interpreter->tensor(inputs[kTextInput]); + tflite::DynamicBuffer buf; + buf.AddString(text.data(), text.length()); + buf.WriteToTensorAsVector(input); + + // Allocate buffers. + interpreter->AllocateTensors(); + + // Invoke inference on the model. + interpreter->Invoke(); + + // Extract outputs and perform sanity checks on them. + auto outputs = interpreter->outputs(); + if (outputs.size() != kNumberOfOutputs) { + std::cerr << "Model does not produce right number of outputs."; + return classes; + } + TfLiteTensor* class_output = interpreter->tensor(outputs[kClassOutput]); + if (class_output->type != kTfLiteUInt8) { + std::cerr << "Tensor output types are not as expected."; + return classes; + } + if (class_output->dims->size != kClassOutputRank) { + std::cerr << "Tensor output should be rank " << kClassOutputRank; + return classes; + } + const auto output_dims = class_output->dims->data; + if (output_dims[kClassOutputBatchSizeIndex] != kBatchSize) { + std::cerr << "Batch size is expected to be " << kBatchSize; + return classes; + } + + // Extract output from the output tensor and populate results. + const size_t num_classes = output_dims[kClassOutputClassIndex]; + for (int i = 0; i < num_classes; ++i) { + // Find class probability or log probability for the class index + classes.push_back(tflite::PodDequantize(*class_output, i)); + } + return classes; +} + +std::string GetTfliteDemoFile() { + std::string tflite_flat_buffer; + std::ifstream file(kTfliteDemoFile, + std::ios::in | std::ios::binary | std::ios::ate); + if (!file.is_open()) { + std::cerr << "Unable to open demo tflite file.\n"; + return tflite_flat_buffer; + } + size_t size = file.tellg(); + file.seekg(0, file.beg); + tflite_flat_buffer.resize(size); + file.read(const_cast(tflite_flat_buffer.data()), size); + file.close(); + return tflite_flat_buffer; +} +} // namespace + +int main(int argc, char** argv) { + // The flatbuffer must remain valid until the interpreter is destroyed. + std::string tflite_flat_buffer = GetTfliteDemoFile(); + if (tflite_flat_buffer.empty()) { + return EXIT_FAILURE; + } + auto interpreter = CreateInterpreter(tflite_flat_buffer); + if (!interpreter) { + return EXIT_FAILURE; + } + while (true) { + std::string sentence; + std::cout << "Enter input: "; + std::getline(std::cin, sentence); + std::vector classes = InvokeModel(sentence, interpreter); + for (float class_value : classes) { + std::cout << class_value << std::endl; + } + } + return EXIT_SUCCESS; +} diff --git a/research/seq_flow_lite/export_to_tflite.py b/research/seq_flow_lite/export_to_tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..41bebb87877502a35171f8b22bd75380bbe676e0 --- /dev/null +++ b/research/seq_flow_lite/export_to_tflite.py @@ -0,0 +1,66 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""A tool to export TFLite model.""" + +import importlib +import json +import os + +from absl import app +from absl import flags +import tensorflow.compat.v1 as tf + +from layers import base_layers # import seq_flow_lite module +from layers import projection_layers # import seq_flow_lite module +from utils import tflite_utils # import seq_flow_lite module + +FLAGS = flags.FLAGS + +flags.DEFINE_string("output_dir", None, "The output or model directory.") + + +def load_runner_config(): + config = os.path.join(FLAGS.output_dir, "runner_config.txt") + with tf.gfile.Open(config, "r") as f: + return json.loads(f.read()) + + +def main(_): + runner_config = load_runner_config() + model_config = runner_config["model_config"] + rel_module_path = "" # empty base dir + model = importlib.import_module(rel_module_path + runner_config["name"]) + with tf.Graph().as_default() as graph: + with tf.Session(graph=graph) as session: + text = tf.placeholder(tf.string, shape=[1], name="Input") + prxlayer = projection_layers.ProjectionLayer(model_config, + base_layers.TFLITE) + encoder = model.Encoder(model_config, base_layers.TFLITE) + projection, seq_lengh = prxlayer(text) + logits = encoder(projection, seq_lengh) + + session.run(tf.global_variables_initializer()) + session.run(tf.local_variables_initializer()) + saver = tf.train.Saver() + saver.restore(session, tf.train.latest_checkpoint(FLAGS.output_dir)) + tflite_fb = tflite_utils.generate_tflite(session, graph, [text], [logits]) + output_file_name = os.path.join(FLAGS.output_dir, "tflite.fb") + with tf.gfile.Open(output_file_name, "wb") as f: + f.write(tflite_fb) + + +if __name__ == "__main__": + app.run(main) diff --git a/research/seq_flow_lite/input_fn_reader.py b/research/seq_flow_lite/input_fn_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..7e17ae5331af122371c5fb386d64a1040f93465f --- /dev/null +++ b/research/seq_flow_lite/input_fn_reader.py @@ -0,0 +1,81 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Methods related to input datasets and readers.""" + +import functools +import sys + +from absl import logging + +import tensorflow as tf +import tensorflow_datasets as tfds + +from layers import projection_layers # import seq_flow_lite module +from utils import misc_utils # import seq_flow_lite module + + +def imdb_reviews(features, _): + return features["text"], features["label"] + + +def civil_comments(features, runner_config): + labels = runner_config["model_config"]["labels"] + label_tensor = tf.stack([features[label] for label in labels], axis=1) + label_tensor = tf.floor(label_tensor + 0.5) + return features["text"], label_tensor + + +def goemotions(features, runner_config): + labels = runner_config["model_config"]["labels"] + label_tensor = tf.stack([features[label] for label in labels], axis=1) + return features["comment_text"], tf.cast(label_tensor, tf.float32) + + +def create_input_fn(runner_config, mode, drop_remainder): + """Returns an input function to use in the instantiation of tf.estimator.*.""" + + def _post_processor(features, batch_size): + """Post process the data to a form expected by model_fn.""" + data_processor = getattr(sys.modules[__name__], runner_config["dataset"]) + text, label = data_processor(features, runner_config) + model_config = runner_config["model_config"] + if "max_seq_len" in model_config: + max_seq_len = model_config["max_seq_len"] + logging.info("Truncating text to have at most %d tokens", max_seq_len) + text = misc_utils.random_substr(text, max_seq_len) + text = tf.reshape(text, [batch_size]) + num_classes = len(model_config["labels"]) + label = tf.reshape(label, [batch_size, num_classes]) + prxlayer = projection_layers.ProjectionLayer(model_config, mode) + projection, seq_length = prxlayer(text) + return {"projection": projection, "seq_length": seq_length, "label": label} + + def _input_fn(params): + """Method to be used for reading the data.""" + assert mode != tf.estimator.ModeKeys.PREDICT + split = "train" if mode == tf.estimator.ModeKeys.TRAIN else "test" + ds = tfds.load(runner_config["dataset"], split=split) + ds = ds.batch(params["batch_size"], drop_remainder=drop_remainder) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + ds = ds.shuffle(buffer_size=100) + ds = ds.repeat(count=1 if mode == tf.estimator.ModeKeys.EVAL else None) + ds = ds.map( + functools.partial(_post_processor, batch_size=params["batch_size"]), + num_parallel_calls=tf.data.experimental.AUTOTUNE, + deterministic=False) + return ds + + return _input_fn diff --git a/research/seq_flow_lite/layers/BUILD b/research/seq_flow_lite/layers/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..48e70a22038ae90b4d5173053b5983ef75773baa --- /dev/null +++ b/research/seq_flow_lite/layers/BUILD @@ -0,0 +1,78 @@ +py_strict_library = py_library + +licenses(["notice"]) + +package( + default_visibility = ["//:friends"], # sequence projection +) + +py_strict_library( + name = "base_layers", + srcs = ["base_layers.py"], + srcs_version = "PY3", + deps = [ + # package tensorflow + ], +) + +py_strict_library( + name = "quantization_layers", + srcs = ["quantization_layers.py"], + srcs_version = "PY3", + deps = [ + ":base_layers", + # package tensorflow + ], +) + +py_strict_library( + name = "normalization_layers", + srcs = ["normalization_layers.py"], + srcs_version = "PY3", + deps = [ + ":base_layers", + ":quantization_layers", + # package tensorflow + # "//tf_ops:tf_custom_ops" # sequence projection + "//tf_ops:tf_custom_ops_py", # sequence projection + ], +) + +py_strict_library( + name = "dense_layers", + srcs = ["dense_layers.py"], + srcs_version = "PY3", + deps = [ + ":base_layers", + ":normalization_layers", + ":quantization_layers", + # package tensorflow + ], +) + +py_strict_library( + name = "conv_layers", + srcs = ["conv_layers.py"], + srcs_version = "PY3", + deps = [ + ":base_layers", + ":normalization_layers", + ":quantization_layers", + # package tensorflow + ], +) + +py_strict_library( + name = "projection_layers", + srcs = ["projection_layers.py"], + srcs_version = "PY3", + deps = [ + ":base_layers", + # package absl/logging + # package tensorflow + # "//tf_ops:sequence_string_projection_op" # sequence projection + "//tf_ops:sequence_string_projection_op_py", # sequence projection + # "//tf_ops:sequence_string_projection_op_v2" # sequence projection + "//tf_ops:sequence_string_projection_op_v2_py", # sequence projection + ], +) diff --git a/research/seq_flow_lite/layers/base_layers.py b/research/seq_flow_lite/layers/base_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..40e339986a6d7f7c8ce691d46136aa6e18cafb25 --- /dev/null +++ b/research/seq_flow_lite/layers/base_layers.py @@ -0,0 +1,118 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Base layer for building models trained with quantization.""" + +import tensorflow as tf + +TRAIN = "train" +EVAL = "eval" +PREDICT = "infer" +TFLITE = "tflite" +_MODE = [TRAIN, EVAL, PREDICT, TFLITE] + + +class Parameters: + """A class that encapsulates parameters.""" + + def __init__(self, + mode, + quantize=True, + regularizer_scale=0.0, + invalid_logit=-1e6, + initializer=None): + assert isinstance(quantize, bool) + self.quantize = quantize + assert mode in _MODE + self.mode = mode + self.regularizer_scale = regularizer_scale + self.invalid_logit = invalid_logit + self.initializer = initializer + + +class BaseLayer(tf.keras.layers.Layer): + """Base class for encoders.""" + + def __init__(self, parameters, **kwargs): + assert isinstance(parameters, Parameters) + self.parameters = parameters + super(BaseLayer, self).__init__(**kwargs) + + def _assert_rank_and_type(self, tensor, rank, dtype=tf.float32): + assert len(tensor.get_shape().as_list()) == rank + assert tensor.dtype == dtype + + def add_qweight(self, shape, num_bits=8): + """Return a quantized weight variable for the given shape.""" + if self.parameters.initializer is not None: + initializer = self.parameters.initializer + else: + initializer = tf.keras.initializers.GlorotUniform() + weight = self.add_weight( + "weight", shape, initializer=initializer, trainable=True) + self.add_reg_loss(weight) + return self._weight_quantization(weight, num_bits=num_bits) + + def _weight_quantization(self, tensor, num_bits=8): + """Quantize weights when enabled.""" + # For infer mode, toco computes the min/max from the weights offline to + # quantize it. During train/eval this is computed from the current value + # in the session by the graph itself. + if self.parameters.quantize and self.parameters.mode in [TRAIN, EVAL]: + # Toco expects 0.0 to be part of the quantization range. + batch_min = tf.minimum(tf.reduce_min(tensor), 0.0) + batch_max = tf.maximum(tf.reduce_max(tensor), 0.0) + + return tf.quantization.fake_quant_with_min_max_vars( + tensor, batch_min, batch_max, num_bits=num_bits) + else: + return tensor + + def add_bias(self, shape): + weight = self.add_weight( + "bias", + shape, + initializer=tf.keras.initializers.Zeros(), + trainable=True) + self.add_reg_loss(weight) + return weight + + def add_reg_loss(self, weight): + if self.parameters.regularizer_scale > 0.0: + reg_scale = tf.convert_to_tensor(self.parameters.regularizer_scale) + reg_loss = tf.nn.l2_loss(weight) * reg_scale + self.add_loss(reg_loss) + + def assign_moving_average(self, var, update, ema_decay): + return var.assign(var.read_value() * (1 - ema_decay) + (ema_decay) * update) + + def qrange_sigmoid(self, tensor): + if self.parameters.quantize: + return tf.quantization.fake_quant_with_min_max_args(tensor, 0.0, 1.0) + return tensor + + def qrange_tanh(self, tensor): + if self.parameters.quantize: + return tf.quantization.fake_quant_with_min_max_args(tensor, -1.0, 1.0) + return tensor + + def quantized_tanh(self, tensor): + return self.qrange_tanh(tf.tanh(tensor)) + + def quantized_sigmoid(self, tensor): + return self.qrange_sigmoid(tf.sigmoid(tensor)) + + def get_batch_dimension(self, tensor): + return tensor.get_shape().as_list()[0] or tf.shape(tensor)[0] diff --git a/research/seq_flow_lite/layers/conv_layers.py b/research/seq_flow_lite/layers/conv_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..19c2adbcb8051307119527f7eb4ce72175f33003 --- /dev/null +++ b/research/seq_flow_lite/layers/conv_layers.py @@ -0,0 +1,118 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Base layer for convolution.""" +import tensorflow as tf + +from layers import base_layers # import seq_flow_lite module +from layers import normalization_layers # import seq_flow_lite module +from layers import quantization_layers # import seq_flow_lite module + + +class EncoderQConvolution(base_layers.BaseLayer): + """Quantized encoder convolution layers.""" + + def __init__(self, + filters, + ksize, + stride=1, + padding="SAME", + dilations=None, + activation=tf.keras.layers.ReLU(), + bias=True, + rank=4, + **kwargs): + self.out_filters = filters + assert rank >= 3 and rank <= 4 + self.rank = rank + self.ksize = self._unpack(ksize) + self.strides = self._unpack(stride) + self.dilations = [1] + self._unpack(dilations) + [1] if dilations else None + self.activation = activation + self.bias = bias + self.padding = padding + self.qoutput = quantization_layers.ActivationQuantization(**kwargs) + self._create_normalizer(**kwargs) + super(EncoderQConvolution, self).__init__(**kwargs) + + def _unpack(self, value): + if not isinstance(value, list): + assert isinstance(value, int) + return [1 if self.rank == 3 else value, value] + else: + assert len(value) == 2 and self.rank == 4 + assert isinstance(value[0], int) and isinstance(value[1], int) + return value + + def build(self, input_shapes): + assert len(input_shapes) == self.rank + self.in_filters = input_shapes[-1] + shape = self.ksize + [self.in_filters, self.out_filters] + self.filters = self.add_qweight(shape=shape) + if self.bias: + self.b = self.add_bias(shape=[self.out_filters]) + + def _create_normalizer(self, **kwargs): + self.normalization = normalization_layers.BatchNormalization(**kwargs) + + def _conv_r4(self, inputs, normalize_method): + outputs = tf.nn.conv2d( + inputs, + self.filters, + strides=self.strides, + padding=self.padding, + dilations=self.dilations) + if self.bias: + outputs = tf.nn.bias_add(outputs, self.b) + outputs = normalize_method(outputs) + if self.activation: + outputs = self.activation(outputs) + return self.qoutput(outputs) + + def _conv_r3(self, inputs, normalize_method): + bsz = self.get_batch_dimension(inputs) + inputs_r4 = tf.reshape(inputs, [bsz, 1, -1, self.in_filters]) + outputs = self._conv_r4(inputs_r4, normalize_method) + return tf.reshape(outputs, [bsz, -1, self.out_filters]) + + def call(self, inputs): + + def normalize_method(tensor): + return self.normalization(tensor) + + return self._do_call(inputs, normalize_method) + + def _do_call(self, inputs, normalize_method): + if self.rank == 3: + return self._conv_r3(inputs, normalize_method) + return self._conv_r4(inputs, normalize_method) + + def quantize_using_output_range(self, tensor): + return self.qoutput.quantize_using_range(tensor) + + +class EncoderQConvolutionVarLen(EncoderQConvolution): + """Convolution on variable length sequence.""" + + def _create_normalizer(self, **kwargs): + self.normalization = normalization_layers.VarLenBatchNormalization( + rank=4, **kwargs) + + def call(self, inputs, mask, inverse_normalizer): + + def normalize_method(tensor): + return self.normalization(tensor, mask, inverse_normalizer) + + return self._do_call(inputs, normalize_method) diff --git a/research/seq_flow_lite/layers/dense_layers.py b/research/seq_flow_lite/layers/dense_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..3c55d215fdbbdc939060bc5887d8426ea926f6d3 --- /dev/null +++ b/research/seq_flow_lite/layers/dense_layers.py @@ -0,0 +1,107 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Basic dense layers.""" +import tensorflow as tf + +from layers import base_layers # import seq_flow_lite module +from layers import normalization_layers # import seq_flow_lite module +from layers import quantization_layers # import seq_flow_lite module + + +class BaseQDense(base_layers.BaseLayer): + """Quantized encoder dense layers.""" + + def __init__(self, + units, + activation=tf.keras.layers.ReLU(), + bias=True, + rank=2, + normalize=True, + **kwargs): + self.units = units + self.rank = rank + assert rank >= 2 and rank <= 4 + self.activation = activation + self.bias = bias + self.normalize = normalize + self.qoutput = quantization_layers.ActivationQuantization(**kwargs) + self._create_normalizer(**kwargs) + super(BaseQDense, self).__init__(**kwargs) + + def build(self, input_shapes): + assert len(input_shapes) == self.rank + if self.rank == 4: + assert input_shapes[1] == 1 or input_shapes[2] == 1 + self.in_units = input_shapes[-1] + shape = [self.in_units, self.units] + self.w = self.add_qweight(shape=shape) + if self.bias: + self.b = self.add_bias(shape=[self.units]) + + def _create_normalizer(self, **kwargs): + self.normalization = normalization_layers.BatchNormalization(**kwargs) + + def _dense_r2(self, inputs, normalize_method): + outputs = tf.matmul(inputs, self.w) + if self.bias: + outputs = tf.nn.bias_add(outputs, self.b) + if self.normalize: + outputs = normalize_method(outputs) + if self.activation: + outputs = self.activation(outputs) + return self.qoutput(outputs) + + def _dense_r34(self, inputs, normalize_method): + bsz = self.get_batch_dimension(inputs) + outputs = tf.reshape(inputs, [-1, self.in_units]) + outputs = self._dense_r2(outputs, normalize_method) + if self.rank == 3: + return tf.reshape(outputs, [bsz, -1, self.units]) + elif inputs.get_shape().as_list()[1] == 1: + return tf.reshape(outputs, [bsz, 1, -1, self.units]) + else: + return tf.reshape(outputs, [bsz, -1, 1, self.units]) + + def call(self, inputs): + + def normalize_method(tensor): + return self.normalization(tensor) + + return self._do_call(inputs, normalize_method) + + def _do_call(self, inputs, normalize_method): + if self.rank == 2: + return self._dense_r2(inputs, normalize_method) + return self._dense_r34(inputs, normalize_method) + + def quantize_using_output_range(self, tensor): + return self.qoutput.quantize_using_range(tensor) + + +class BaseQDenseVarLen(BaseQDense): + """Dense on variable length sequence.""" + + def _create_normalizer(self, **kwargs): + self.normalization = normalization_layers.VarLenBatchNormalization( + rank=2, **kwargs) + + def call(self, inputs, mask, inverse_normalizer): + + def normalize_method(tensor): + maskr2 = tf.reshape(mask, [-1, 1]) + return self.normalization(tensor, maskr2, inverse_normalizer) + + return self._do_call(inputs, normalize_method) diff --git a/research/seq_flow_lite/layers/normalization_layers.py b/research/seq_flow_lite/layers/normalization_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..248143a096f536d2eb539bfa9a8d39fab15a547d --- /dev/null +++ b/research/seq_flow_lite/layers/normalization_layers.py @@ -0,0 +1,140 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Layers for normalization.""" +import tensorflow as tf + +from layers import base_layers # import seq_flow_lite module +from layers import quantization_layers # import seq_flow_lite module +from tf_ops import tf_custom_ops_py # import seq_flow_lite module + + +class BatchNormalization(base_layers.BaseLayer): + """A class that applies batch normalization to the input tensor.""" + + def __init__(self, ema_decay=0.999, **kwargs): + self.ema_decay = ema_decay + super(BatchNormalization, self).__init__(**kwargs) + + def build(self, input_shapes): + self.reduce_dims = list(range(len(input_shapes) - 1)) + shape = [input_shapes[-1]] + self.offset = self.add_weight( + "offset", + shape=shape, + initializer=tf.keras.initializers.Zeros(), + trainable=True) + self.scale = self.add_weight( + "scale", + shape=shape, + initializer=tf.keras.initializers.Ones(), + trainable=True) + self.mva_mean = self.add_weight( + "mva_mean", + shape=shape, + initializer=tf.keras.initializers.Zeros(), + trainable=False) + self.mva_var = self.add_weight( + "mva_variance", + shape=shape, + initializer=tf.keras.initializers.Ones(), + trainable=False) + + def call(self, inputs): + mean_mom, var_mom = None, None + if self.parameters.mode == base_layers.TRAIN: + mean_mom, var_mom = tf.nn.moments(inputs, self.reduce_dims) + return self._batch_norm(inputs, mean_mom, var_mom) + + def _batch_norm(self, inputs, mean_mom, var_mom): + if self.parameters.mode == base_layers.TRAIN: + # During training compute summay stats, update them to moving average + # variables and use the summary stas for batch normalization. + with tf.control_dependencies([ + self.assign_moving_average(self.mva_mean, mean_mom, self.ema_decay), + self.assign_moving_average(self.mva_var, var_mom, self.ema_decay) + ]): + tensor = tf.nn.batch_normalization(inputs, mean_mom, var_mom, + self.offset, self.scale, 1e-9) + else: + # During eval/inference use the moving average variable for batch + # normalization. The variables would be frozen to constants before + # saving graph. + tensor = tf.nn.batch_normalization(inputs, self.mva_mean, self.mva_var, + self.offset, self.scale, 1e-9) + return tensor + + +class VarLenBatchNormalization(BatchNormalization): + """A class that applies batch normalization to the input tensor.""" + + def __init__(self, rank=2, **kwargs): + self.rank = rank + assert rank == 2 or rank == 4 + super(VarLenBatchNormalization, self).__init__(**kwargs) + + def _reduce(self, tensor, multiplier): + return tf.reduce_sum(tensor, axis=self.reduce_dims) * multiplier + + def call(self, inputs, mask, inverse_normalizer): + if self.parameters.mode == base_layers.TRAIN: + self._assert_rank_and_type(inputs, self.rank) + self._assert_rank_and_type(mask, self.rank) + inputs = mask * inputs + mean_mom = self._reduce(inputs, inverse_normalizer) + var_mom = self._reduce(inputs * inputs, inverse_normalizer) + return mask * self._batch_norm(inputs, mean_mom, var_mom) + elif self.parameters.mode == base_layers.EVAL: + return mask * self._batch_norm(inputs, None, None) + return self._batch_norm(inputs, None, None) + + +class LayerNormalization(base_layers.BaseLayer): + """A class that applies layer normalization to the input tensor.""" + + def __init__(self, axes=None, **kwargs): + self.axes = axes or [-1] + self.qactivation = quantization_layers.ActivationQuantization(**kwargs) + super(LayerNormalization, self).__init__(**kwargs) + + def build(self, input_shape): + self.rank = len(input_shape) + for i, axis in enumerate(self.axes): + if axis < 0: + self.axes[i] += self.rank + assert (self.axes[i] > 0 and self.axes[i] < self.rank) + self.offset = self.add_weight( + "offset", + shape=[1], + initializer=tf.keras.initializers.Zeros(), + trainable=True) + self.scale = self.add_weight( + "scale", + shape=[1], + initializer=tf.keras.initializers.Ones(), + trainable=True) + + def call(self, tensor): + tensor = self.qactivation(tensor) + if self.parameters.mode != base_layers.TFLITE: + mean, variance = tf.nn.moments(tensor, self.axes, keepdims=True) + # If all the values in the tensor are same, variance will be 0. Adding a + # small epsilon to variance ensures that we get 0 as the normalized result + # instead of NaN in the resulting tensor. + tensor = (tensor - mean) / tf.sqrt(variance + 1e-6) + return tensor * self.scale + self.offset + else: + return tf_custom_ops_py.layer_norm( + tensor, self.scale, self.offset, axes=self.axes) diff --git a/research/seq_flow_lite/layers/projection_layers.py b/research/seq_flow_lite/layers/projection_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..9dff8adc85274fb5c4656065c406229ebf7e8145 --- /dev/null +++ b/research/seq_flow_lite/layers/projection_layers.py @@ -0,0 +1,119 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tensorflow projection creator for PRADO model.""" + +from absl import logging +import tensorflow as tf + +from layers import base_layers # import seq_flow_lite module +from tf_ops import sequence_string_projection_op as ssp # import seq_flow_lite module +from tf_ops import sequence_string_projection_op_v2 as sspv2 # import seq_flow_lite module + + +class ProjectionLayer(base_layers.BaseLayer): + """Base class for encoders.""" + + def __init__(self, model_config, mode): + """Create projection.""" + + def _get_params(varname, default_value=None): + value = model_config[varname] if varname in model_config else default_value + default = "" if varname in model_config else " (default)" + logging.info("%s = %s%s", varname, value, default) + setattr(self, varname, value) + + self.mode = mode + _get_params("feature_size") + _get_params("max_seq_len", 0) + _get_params("add_eos_tag", False) + _get_params("add_bos_tag", False) + _get_params("hashtype", "murmur") + _get_params("split_on_space", True) + _get_params("token_separators", "") + _get_params("vocabulary", "") + _get_params("quantize") + _get_params("word_novelty_bits", 0) + _get_params("doc_size_levels", 0) + self.distortion_probability = 0.0 + if mode == base_layers.TRAIN: + _get_params("distortion_probability", 0.0) + parameters = base_layers.Parameters(mode, self.quantize) + super(ProjectionLayer, self).__init__(parameters=parameters) + + def call(self, inputs): + projection, _, seq_length = ssp.sequence_string_projection( + input=inputs, + feature_size=self.feature_size, + max_splits=self.max_seq_len - 1, + hashtype=self.hashtype, + distortion_probability=self.distortion_probability, + split_on_space=self.split_on_space, + token_separators=self.token_separators, + word_novelty_bits=self.word_novelty_bits, + doc_size_levels=self.doc_size_levels, + add_eos_tag=self.add_eos_tag, + add_bos_tag=self.add_bos_tag, + vocabulary=self.vocabulary) + + modes = [base_layers.PREDICT, base_layers.TFLITE] + if self.mode not in modes and self.max_seq_len > 0: + short_by = self.max_seq_len - tf.shape(projection)[1] + projection = tf.pad(projection, [[0, 0], [0, short_by], [0, 0]]) + batch_size = self.get_batch_dimension(inputs) + projection = tf.reshape(projection, + [batch_size, self.max_seq_len, self.feature_size]) + if self.mode in modes: + projection = self.qrange_tanh(projection) + return projection, seq_length + + +class ProjectionLayerPreSegmented(base_layers.BaseLayer): + """Base class for encoders.""" + + def __init__(self, model_config, mode): + """Create projection.""" + + def _get_params(varname, default_value=None): + value = model_config[varname] if varname in model_config else default_value + default = "" if varname in model_config else " (default)" + logging.info("%s = %s%s", varname, value, default) + setattr(self, varname, value) + + self.mode = mode + _get_params("feature_size") + _get_params("add_eos_tag", False) + _get_params("add_bos_tag", False) + _get_params("vocabulary", "") + _get_params("quantize") + self.distortion_probability = 0.0 + if mode == base_layers.TRAIN: + _get_params("distortion_probability", 0.0) + parameters = base_layers.Parameters(mode, self.quantize) + super(ProjectionLayerPreSegmented, self).__init__(parameters=parameters) + + def call(self, inputs, sequence_length): + projection = sspv2.sequence_string_projection_v2( + input=inputs, + sequence_length=sequence_length, + feature_size=self.feature_size, + distortion_probability=self.distortion_probability, + add_eos_tag=self.add_eos_tag, + add_bos_tag=self.add_bos_tag, + vocabulary=self.vocabulary) + + modes = [base_layers.PREDICT, base_layers.TFLITE] + if self.mode in modes: + projection = self.qrange_tanh(projection) + return projection diff --git a/research/seq_flow_lite/layers/quantization_layers.py b/research/seq_flow_lite/layers/quantization_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..9990eb5b74b616b4d9be3aeb8387ce0ff63996ad --- /dev/null +++ b/research/seq_flow_lite/layers/quantization_layers.py @@ -0,0 +1,97 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Layers for quantization.""" + +import tensorflow as tf + +from layers import base_layers # import seq_flow_lite module + + +class ActivationQuantization(base_layers.BaseLayer): + """A class that applies quantization to a activation tensor.""" + + def __init__(self, ema_decay=0.99, num_bits=8, **kwargs): + self.ema_decay = ema_decay + self.num_bits = num_bits + super(ActivationQuantization, self).__init__(**kwargs) + if self.parameters.quantize: + self.min_var = self.add_weight( + "min", initializer=tf.keras.initializers.Zeros(), trainable=False) + self.max_var = self.add_weight( + "max", initializer=tf.keras.initializers.Ones(), trainable=False) + + def call(self, inputs): + if self.parameters.quantize: + if self.parameters.mode == base_layers.TRAIN: + # Toco expects 0.0 to be part of the quantization range. + batch_min = tf.minimum(tf.reduce_min(inputs), 0.0) + min_var = self.assign_moving_average(self.min_var, batch_min, + self.ema_decay) + + batch_max = tf.maximum(tf.reduce_max(inputs), 0.0) + max_var = self.assign_moving_average(self.max_var, batch_max, + self.ema_decay) + with tf.control_dependencies([min_var, max_var]): + return tf.quantization.fake_quant_with_min_max_vars( + inputs, batch_min, batch_max, num_bits=self.num_bits) + else: + return tf.quantization.fake_quant_with_min_max_vars( + inputs, self.min_var, self.max_var, num_bits=self.num_bits) + return inputs + + def quantize_using_range(self, inputs): + if self.parameters.quantize: + return tf.quantization.fake_quant_with_min_max_vars( + inputs, self.min_var, self.max_var, num_bits=self.num_bits) + return inputs + + +class ConcatQuantization(ActivationQuantization): + """A class that applies quantization to a activation tensor.""" + + def __init__(self, axis=2, **kwargs): + self.axis = axis + super(ConcatQuantization, self).__init__(**kwargs) + + def reduce_list(self, tensor_list, functor): + reduce_result = [functor(tensor) for tensor in tensor_list] + # Toco expects 0.0 to be part of the quantization range. + reduce_result.append(tf.constant(0.0)) + return functor(tf.stack(reduce_result)) + + def call(self, tensors): + if self.parameters.quantize: + if self.parameters.mode == base_layers.TRAIN: + # Toco expects 0.0 to be part of the quantization range. + batch_min = self.reduce_list(tensors, tf.reduce_min) + min_var = self.assign_moving_average(self.min_var, batch_min, + self.ema_decay) + + batch_max = self.reduce_list(tensors, tf.reduce_max) + max_var = self.assign_moving_average(self.max_var, batch_max, + self.ema_decay) + else: + min_var, max_var = self.min_var, self.max_var + + tensors = [ + tf.quantization.fake_quant_with_min_max_vars( + tensor, min_var, max_var, num_bits=self.num_bits) + for tensor in tensors + ] + tensor = tf.concat(tensors, axis=self.axis) + return tf.quantization.fake_quant_with_min_max_vars( + tensor, min_var, max_var, num_bits=self.num_bits) + return tf.concat(tensors, axis=self.axis) diff --git a/research/seq_flow_lite/metric_functions.py b/research/seq_flow_lite/metric_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..819fd69516fbc8d9691d82b73dc390278f18fba8 --- /dev/null +++ b/research/seq_flow_lite/metric_functions.py @@ -0,0 +1,47 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Metric functions.""" +import tensorflow.compat.v1 as tf + + +def classification_metric(per_example_loss, label_ids, logits): + """Compute eval metrics.""" + return { + "accuracy": + tf.metrics.accuracy(label_ids, tf.math.argmax(logits, axis=-1)), + "eval_loss": + tf.metrics.mean(per_example_loss) + } + + +THRESHOLDS = [0.5] + + +def labeling_metric(per_example_loss, label_ids, logits): + """Compute eval metrics.""" + scores = tf.math.sigmoid(logits) + num_classes = label_ids.get_shape().as_list()[-1] + return_dict = {"eval_loss": tf.metrics.mean(per_example_loss)} + for idx in range(num_classes): + return_dict["auc/" + str(idx)] = tf.metrics.auc(label_ids[:, idx], + scores[:, idx]) + return_dict["precision@" + str(THRESHOLDS) + "/" + + str(idx)] = tf.metrics.precision_at_thresholds( + label_ids[:, idx], scores[:, idx], thresholds=THRESHOLDS) + return_dict["recall@" + str(THRESHOLDS) + "/" + + str(idx)] = tf.metrics.recall_at_thresholds( + label_ids[:, idx], scores[:, idx], thresholds=THRESHOLDS) + return return_dict diff --git a/research/seq_flow_lite/models/BUILD b/research/seq_flow_lite/models/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..cb8c75fe8d2d7e518648e5012c935890844f2bab --- /dev/null +++ b/research/seq_flow_lite/models/BUILD @@ -0,0 +1,22 @@ +licenses(["notice"]) + +package( + default_visibility = ["//:friends"], # sequence projection +) + +py_library( + name = "prado", + srcs = ["prado.py"], + srcs_version = "PY3", + deps = [ + # package absl/logging + # package tensorflow + "//layers:base_layers", # sequence projection + "//layers:conv_layers", # sequence projection + "//layers:dense_layers", # sequence projection + "//layers:projection_layers", # sequence projection + "//layers:quantization_layers", # sequence projection + # "//tf_ops:tf_custom_ops" # sequence projection + "//tf_ops:tf_custom_ops_py", # sequence projection + ], +) diff --git a/research/seq_flow_lite/models/prado.py b/research/seq_flow_lite/models/prado.py new file mode 100644 index 0000000000000000000000000000000000000000..366b774a1f55ee6bd429b8654e6e5f3a2acf0446 --- /dev/null +++ b/research/seq_flow_lite/models/prado.py @@ -0,0 +1,193 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Implementation of PRADO model.""" + +import copy +from absl import logging +import numpy as np +import tensorflow as tf + +from layers import base_layers # import seq_flow_lite module +from layers import conv_layers # import seq_flow_lite module +from layers import dense_layers # import seq_flow_lite module +from layers import projection_layers # import seq_flow_lite module +from layers import quantization_layers # import seq_flow_lite module +from tf_ops import tf_custom_ops_py # import seq_flow_lite module + + +class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen): + """A layer that performs padded masked convolution.""" + + def __init__(self, invalid_value, ngram=2, skip_bigram=None, **kwargs): + self.invalid_value = invalid_value + assert ngram is None or (ngram >= 1 and ngram <= 5) + assert skip_bigram is None or skip_bigram == 1 or skip_bigram == 2 + assert bool(ngram is None) != bool(skip_bigram is None) + self.kwidth = ngram if ngram is not None else (skip_bigram + 2) + mask = [1] * self.kwidth + if skip_bigram is not None: + mask[1], mask[skip_bigram] = 0, 0 + self.mask = np.array(mask, dtype="float32").reshape((1, self.kwidth, 1, 1)) + self.zero_pad = tf.keras.layers.ZeroPadding1D(padding=[0, self.kwidth - 1]) + super(PaddedMaskedVarLenConv, self).__init__( + ksize=self.kwidth, rank=3, padding="VALID", activation=None, **kwargs) + + def call(self, inputs, mask, inverse_normalizer): + self._assert_rank_and_type(inputs, 3) + self._assert_rank_and_type(mask, 3) + maskr4 = tf.expand_dims(mask, axis=1) + inputs_padded = self.zero_pad(inputs) + result = super(PaddedMaskedVarLenConv, self).call(inputs_padded, maskr4, + inverse_normalizer) + if self.parameters.mode not in [base_layers.PREDICT, base_layers.TFLITE]: + return result * mask + (1 - mask) * self.invalid_value + return result + + def add_qweight(self, shape, num_bits=8): + weight = super(PaddedMaskedVarLenConv, self).add_qweight( + shape=shape, num_bits=num_bits) + return weight * tf.convert_to_tensor(self.mask) + + +class AttentionPoolReduce(base_layers.BaseLayer): + """Attention pooling and reduce.""" + + def __init__(self, filters, ngram=2, skip_bigram=None, **kwargs): + super(AttentionPoolReduce, self).__init__(**kwargs) + self.filters = filters + self.value = PaddedMaskedVarLenConv( + 0, filters=filters, ngram=ngram, skip_bigram=skip_bigram, **kwargs) + self.attention_logits = PaddedMaskedVarLenConv( + self.parameters.invalid_logit, + filters=filters, + ngram=ngram, + skip_bigram=skip_bigram, + **kwargs) + + def call(self, values_in, attention_in, mask, inverse_normalizer): + self._assert_rank_and_type(values_in, 3) + self._assert_rank_and_type(attention_in, 3) + self._assert_rank_and_type(mask, 3) + values = self.value(values_in, mask, inverse_normalizer) + attention_logits = self.attention_logits(attention_in, mask, + inverse_normalizer) + + if self.parameters.mode == base_layers.TFLITE: + return tf_custom_ops_py.expected_value_op(attention_logits, values) + else: + attention_logits = tf.transpose(attention_logits, [0, 2, 1]) + values = tf.transpose(values, [0, 2, 1]) + attention = tf.nn.softmax(attention_logits) + return tf.reduce_sum(attention * values, axis=2) + + +class Encoder(tf.keras.layers.Layer): + """A PRADO keras model.""" + + def __init__(self, config, mode): + super(Encoder, self).__init__() + + def _get_params(varname, default_value=None): + value = config[varname] if varname in config else default_value + default = "" if varname in config else " (default)" + logging.info("%s = %s%s", varname, value, default) + setattr(self, varname, value) + + _get_params("labels") + _get_params("quantize", True) + _get_params("embedding_regularizer_scale", 35e-3) + _get_params("embedding_size", 64) + _get_params("unigram_channels", 0) + _get_params("bigram_channels", 0) + _get_params("trigram_channels", 0) + _get_params("fourgram_channels", 0) + _get_params("fivegram_channels", 0) + _get_params("skip1bigram_channels", 0) + _get_params("skip2bigram_channels", 0) + _get_params("network_regularizer_scale", 1e-4) + _get_params("keep_prob", 0.5) + self.num_classes = len(self.labels) + + self.parameters = base_layers.Parameters( + mode, + quantize=self.quantize, + regularizer_scale=self.embedding_regularizer_scale) + self.values_fc = dense_layers.BaseQDenseVarLen( + units=self.embedding_size, rank=3, parameters=self.parameters) + self.attention_fc = dense_layers.BaseQDenseVarLen( + units=self.embedding_size, rank=3, parameters=self.parameters) + self.dropout = tf.keras.layers.Dropout(rate=(1 - self.keep_prob)) + + self.parameters = copy.copy(self.parameters) + self.parameters.regularizer_scale = self.network_regularizer_scale + self.attention_pool_layers = [] + self._add_attention_pool_layer(self.unigram_channels, 1) + self._add_attention_pool_layer(self.bigram_channels, 2) + self._add_attention_pool_layer(self.trigram_channels, 3) + self._add_attention_pool_layer(self.fourgram_channels, 4) + self._add_attention_pool_layer(self.fivegram_channels, 5) + self._add_attention_pool_layer(self.skip1bigram_channels, None, 1) + self._add_attention_pool_layer(self.skip2bigram_channels, None, 2) + + self.concat_quantizer = quantization_layers.ConcatQuantization( + axis=1, parameters=self.parameters) + self.final_fc = dense_layers.BaseQDense( + units=self.num_classes, + rank=2, + parameters=self.parameters, + activation=None) + + def _add_attention_pool_layer(self, channels, ngram, skip_bigram=None): + if channels > 0: + self.attention_pool_layers.append( + AttentionPoolReduce( + filters=channels, + skip_bigram=skip_bigram, + ngram=ngram, + parameters=self.parameters)) + + def _apply_fc_dropout(self, layer, inputs, mask, inverse_normalizer): + outputs = layer(inputs, mask, inverse_normalizer) + if self.parameters.mode == base_layers.TRAIN: + return self.dropout(outputs) + return outputs + + def call(self, projection, seq_length): + mask = tf.sequence_mask( + seq_length, tf.shape(projection)[1], dtype=tf.float32) + inverse_normalizer = tf.math.reciprocal(tf.reduce_sum(mask)) + maskr3 = tf.expand_dims(mask, axis=2) + values_in = self._apply_fc_dropout(self.values_fc, projection, mask, + inverse_normalizer) + attention_in = self._apply_fc_dropout(self.attention_fc, projection, mask, + inverse_normalizer) + tensors = [ + layer(values_in, attention_in, maskr3, inverse_normalizer) + for layer in self.attention_pool_layers + ] + pre_logits = self.concat_quantizer(tensors) + return self.final_fc(pre_logits) + + +class Model(Encoder): + + def __init__(self, config, mode): + super(Model, self).__init__(config, mode) + self.projection = projection_layers.ProjectionLayer(config, mode) + + def call(self, inputs): + projection, seq_length = self.projection(inputs) + return super(Model, self).call(projection, seq_length) diff --git a/research/seq_flow_lite/models/sgnn/BUILD b/research/seq_flow_lite/models/sgnn/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..8c63240438ab8bfac11da5947124630e55e818e7 --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/BUILD @@ -0,0 +1,110 @@ +licenses(["notice"]) + +package( + default_visibility = [ + "//visibility:public", + ], +) + +cc_library( + name = "sgnn_projection", + srcs = ["sgnn_projection.cc"], + hdrs = ["sgnn_projection.h"], + deps = [ + "@org_tensorflow//tensorflow/lite:context", + "@org_tensorflow//tensorflow/lite:string_util", + "@org_tensorflow//tensorflow/lite/kernels:kernel_util", + "@org_tensorflow//tensorflow/lite/kernels/internal:tensor", + "@farmhash_archive//:farmhash", + "@flatbuffers", + ], +) + +cc_library( + name = "sgnn_projection_op_resolver", + srcs = ["sgnn_projection_op_resolver.cc"], + hdrs = ["sgnn_projection_op_resolver.h"], + visibility = ["//visibility:public"], + deps = [ + ":sgnn_projection", + "@org_tensorflow//tensorflow/lite:framework", + ], + alwayslink = 1, +) + +cc_test( + name = "sgnn_projection_test", + srcs = ["sgnn_projection_test.cc"], + deps = [ + ":sgnn_projection", + "@org_tensorflow//tensorflow/lite:string_util", + "@org_tensorflow//tensorflow/lite/kernels:test_util", + "@org_tensorflow//tensorflow/lite/schema:schema_fbs", + "@com_google_googletest//:gtest_main", + "@flatbuffers", + ], +) + +py_library( + name = "sgnn", + srcs = [ + "sgnn.py", + ], + srcs_version = "PY3", + deps = [ + # package tensorflow + "@org_tflite_support//tensorflow_lite_support/custom_ops/python:tflite_text_api", + # Expect tensorflow text installed + ], +) + +py_test( + name = "sgnn_test", + srcs = [ + "sgnn_test.py", + ], + deps = [ + ":sgnn", + # package tensorflow + # Expect tensorflow text installed + ], +) + +py_binary( + name = "train", + srcs = [ + "train.py", + ], + main = "train.py", + python_version = "PY3", + deps = [ + ":sgnn", + # package tensorflow + # package tensorflow_datasets + ], +) + +py_binary( + name = "run_tflite", + srcs = ["run_tflite.py"], + main = "run_tflite.py", + python_version = "PY3", + deps = [ + # Expect numpy installed + # package TFLite flex delegate + # package TFLite interpreter + "@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:ngrams_op_resolver", + "@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:whitespace_tokenizer_op_resolver", + # Expect tensorflow text installed + ], +) + +# pip install numpy +py_library( + name = "expect_numpy_installed", +) + +# pip install tensroflow_text +py_library( + name = "expect_tensorflow_text_installed", +) diff --git a/research/seq_flow_lite/models/sgnn/run_tflite.py b/research/seq_flow_lite/models/sgnn/run_tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..d7b74448e5bb91a09efe2e5821157a152cf9f055 --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/run_tflite.py @@ -0,0 +1,54 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Script to run a langid TFLite model.""" + +from absl import app +from absl import flags +import numpy as np +from tensorflow.lite.python import interpreter as interpreter_wrapper # pylint: disable=g-direct-tensorflow-import + +FLAGS = flags.FLAGS +flags.DEFINE_string('model', '/tmp/langid/model.tflite', + 'Path to LangID TFLite model.') + +LANGIDS = ['ar', 'en', 'es', 'fr', 'ru', 'zh', 'unk'] + + +def main(argv): + with open(FLAGS.model, 'rb') as file: + model = file.read() + interpreter = interpreter_wrapper.InterpreterWithCustomOps( + model_content=model, + custom_op_registerers=[ + 'AddWhitespaceTokenizerCustomOp', 'AddNgramsCustomOp', + 'AddSgnnProjectionCustomOp', + ]) + interpreter.resize_tensor_input(0, [1, 1]) + interpreter.allocate_tensors() + input_string = ' '.join(argv[1:]) + print('Input: "{}"'.format(input_string)) + input_array = np.array([[input_string]], dtype=np.str) + interpreter.set_tensor(interpreter.get_input_details()[0]['index'], + input_array) + interpreter.invoke() + output = interpreter.get_tensor(interpreter.get_output_details()[0]['index']) + for x in range(output.shape[0]): + for y in range(output.shape[1]): + print('{:>3s}: {:.4f}'.format(LANGIDS[y], output[x][y])) + + +if __name__ == '__main__': + app.run(main) diff --git a/research/seq_flow_lite/models/sgnn/sgnn.py b/research/seq_flow_lite/models/sgnn/sgnn.py new file mode 100644 index 0000000000000000000000000000000000000000..240558cbdc0d923bfd91ce272d35f92bb2ea4413 --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/sgnn.py @@ -0,0 +1,228 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Builds SGNN model. + +[1] Sujith Ravi and Zornitsa Kozareva. 2018. "Self-governing neural networks for +on-device short text +classification." In Proceedings of the 2018 Conference on Empirical Methods in +Natural Language +Processing, pages 887-893. Association for Computational Linguistics + +The model will be constructed in this way: +* Projects text to float features, the size is defined by projection_size +* Fully connected layer predicts the class of predictions. +""" + +import collections +import tensorflow.compat.v2 as tf +import tensorflow_text as tf_text + +from tensorflow_lite_support.custom_ops.python import tflite_text_api + +# Hparam collections that will be used to tune the model. +Hparams = collections.namedtuple( + 'Hparams', + [ + # Learning rate for the optimizer. + 'learning_rate' + ]) + + +def preprocess(text): + """Normalize the text, and return tokens.""" + assert len(text.get_shape().as_list()) == 2 + assert text.get_shape().as_list()[-1] == 1 + text = tf.reshape(text, [-1]) + text = tf_text.case_fold_utf8(text) + tokenizer = tflite_text_api.WhitespaceTokenizer() + return tokenizer.tokenize(text) + + +def get_ngrams(tokens, n): + """Generates character ngrams from tokens. + + Args: + tokens: A string ragged tensor for tokens, in shape of [batch_size, + num_token]. + n: ngram size for char ngrams. + + Returns: + A string ragged tensor for ngrams, in shape of [batch_size, num_token, + ngrams]. + """ + chars_split = tf.strings.unicode_split('^' + tokens + '$', 'UTF-8') + chars_joined = tflite_text_api.ngrams( + chars_split, + width=n, + axis=-1, + reduction_type=tf_text.Reduction.STRING_JOIN, + string_separator='') + flat_row_splits = tf.nn.embedding_lookup(chars_joined.values.row_splits, + chars_joined.row_splits) + return tf.RaggedTensor.from_row_splits(chars_joined.values.values, + flat_row_splits) + + +def project(ngrams, hash_seed, buckets): + """Projects a ngram RaggedTensor to float tensor. + + Args: + ngrams: A string ragged tensor, in shape of [batch_size, num_token, ngrams]. + hash_seed: A python int list, in shape of [num_hash]. + buckets: An int for the max value of projected integers. + + Returns: + A float tensor that projects ngrams to the space represented by hash_seed, + in shape of [batch_size, num_hash]. + """ + num_hash = len(hash_seed) + # Hash ngrams string tensor to hash signatures. + signatures = tf.ragged.map_flat_values(tf.strings.to_hash_bucket_fast, ngrams, + buckets) + + # Each ngram signature will be multiplied by a different hash seed, + # mod by hash buckets, and linear mapping. + # value = abs(signature * seed % bucket) + # if value > bucket / 2: value -= buckets + hash_tensor = tf.constant(hash_seed, dtype=tf.int64) + value = tf.math.floormod( + tf.abs(signatures.values * tf.reshape(hash_tensor, [-1, 1])), buckets) + value = value - tf.cast(tf.greater(value, buckets >> 1), tf.int64) * buckets + + # Wrap values to ragged tensor, and calculates + # output_i,j = mean(value_i,j,k) for k-th ngram in i-th text + # computed with j-th hash seed + row_lengths = tf.repeat( + tf.reshape(signatures.row_lengths(), [1, -1]), num_hash, axis=0) + row_lengths = tf.cast(tf.reshape(row_lengths, [-1]), tf.int32) + result = tf.RaggedTensor.from_row_lengths( + tf.RaggedTensor.from_row_lengths(tf.reshape(value, [-1]), row_lengths), + tf.repeat(tf.shape(signatures.row_lengths()), num_hash)) + result = tf.reduce_mean(result, 2) / (buckets >> 1) + return tf.transpose(tf.reshape(result.values, [num_hash, -1])) + + +def fused_project(ngrams, hash_seed, buckets): + """A wrapper to fuse project method when converting to TFLite model. + + Args: + ngrams: A string ragged tensor, in shape of [batch_size, num_token, ngrams]. + hash_seed: A python int list, in shape of [num_hash]. + buckets: An int for the max value of projected integers. + + Returns: + A float tensor that projects ngrams to the space represented by hash_seed, + in shape of [batch_size, num_hash]. + """ + hash_seed_attr = ' '.join(['i: %d' % seed for seed in hash_seed]) + experimental_implements = [ + 'name: "tftext:custom:SgnnProjection"', + 'attr { key: "hash_seed" value { list {%s} } }' % hash_seed_attr, + 'attr { key: "buckets" value { i: %d } }' % buckets, + ] + experimental_implements = ' '.join(experimental_implements) + + @tf.function(experimental_implements=experimental_implements) + def func(ngrams_values, *ngrams_row_splits): + ngrams = tf.RaggedTensor.from_nested_row_splits( + flat_values=ngrams_values, nested_row_splits=ngrams_row_splits) + return project(ngrams, hash_seed, buckets) + return func(ngrams.flat_values, *ngrams.nested_row_splits) + + +def sgnn(texts, hash_seed, ngram_size): + """Projects the string text to float features. + + It first generasts N ngrams of the tokens from given text, + then projects each ngram tensor with a partion of the seeds. + + Args: + texts: a string tensor, in shape of [batch_size]. + hash_seed: a list of integers, in shape of [projection_size]. + ngram_size: max size of ngram to generate features. + + Returns: + A float tensor that projects ngrams to the space represented by hash_seed, + in shape of [batch_size, projection_size]. + """ + projection_size = len(hash_seed) + partition_size = int(projection_size / ((ngram_size + 1) * ngram_size / 2)) + if partition_size == 0: + raise ValueError( + 'projection size %d is not enough for %d ngram partitions' % + (projection_size, ngram_size)) + indices = [int(i * (i + 1) / 2) * partition_size for i in range(ngram_size)] + indices.append(projection_size) + projection_layer = [] + tokens = preprocess(texts) + + for i in range(ngram_size): + ngram = get_ngrams(tokens, i + 1) + projection = fused_project(ngram, hash_seed[indices[i]:indices[i + 1]], + 0x7FFFFFFF) + projection_layer.append(projection) + + return tf.cast(tf.concat(projection_layer, -1), tf.float32) + + +class ProjectLayer(tf.keras.layers.Layer): + """Projects the texts to a fixed sized features.""" + + def __init__(self, seed, ngram_size, **kwargs): + self.seed = seed + self.ngram_size = ngram_size + super(ProjectLayer, self).__init__(**kwargs) + + def get_config(self): + return { + 'seed': self.seed, + 'ngram_size': self.ngram_size, + } + + def call(self, x): + return sgnn(x, self.seed, self.ngram_size) + + def compute_output_shape(self, input_shape): + return (input_shape[0], len(self.seed)) + + +def keras_model(hash_seed, ngram_size, fc_size_list, hparams): + """Compiles a keras model from projected features to labels. + + Args: + hash_seed: a list of int used to project the feature. + ngram_size: maximum size of ngram to generate features from texts. + fc_size_list: a list of int, sizes of each fully connected layer. + hparams: hyper parameters for the model. + + Returns: + A keras model that predicts the language id. + + """ + if not fc_size_list: + raise ValueError( + 'Must specify one or more fully connected layers via fc_size_list') + model = tf.keras.Sequential() + model.add(ProjectLayer(hash_seed, ngram_size)) + for size in fc_size_list[:-1]: + model.add(tf.keras.layers.Dense(size)) + model.add(tf.keras.layers.Dense(fc_size_list[-1], activation='softmax')) + + model.compile( + optimizer=tf.keras.optimizers.Adam(lr=hparams.learning_rate), + loss=tf.keras.losses.SparseCategoricalCrossentropy(), + metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) + return model diff --git a/research/seq_flow_lite/models/sgnn/sgnn_projection.cc b/research/seq_flow_lite/models/sgnn/sgnn_projection.cc new file mode 100644 index 0000000000000000000000000000000000000000..a805bd82c3a9a40370f82990979a15f7f22a6028 --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/sgnn_projection.cc @@ -0,0 +1,139 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "models/sgnn/sgnn_projection.h" // seq_flow_lite + +#include +#include + +#include "flatbuffers/flexbuffers.h" // flatbuffer +#include "farmhash.h" +#include "tensorflow/lite/context.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/string_util.h" + +namespace tflite { +namespace ops { +namespace custom { +namespace sgnn { + +// This TFLite op implements the SGNN Projection +// +// Input: +// * data: A ragged string tensor of rank 2 (a 1D string value tensor and +// a 1D int64 row_split tensor). +// +// Attributes: +// * hash_seed: list of integers +// Hash seeds to project features +// * buckets: scalar integer +// Bucketize computed hash signatures. +// +// Output: +// * output: A 2D float tensor, 1st dimension is the batch of `data`, +// 2nd dimension is the size of `hash_seed`. + +constexpr int kValues = 0; +constexpr int kRowSplits = 1; + +struct SgnnProjectionAttributes { + int buckets; + std::vector hash_seed; + + explicit SgnnProjectionAttributes(const flexbuffers::Map& m) + : buckets(m["buckets"].AsInt32()) { + buckets = m["buckets"].AsInt32(); + auto hash_seed_attr = m["hash_seed"].AsTypedVector(); + hash_seed = std::vector(hash_seed_attr.size()); + for (int i = 0; i < hash_seed_attr.size(); ++i) { + hash_seed[i] = hash_seed_attr[i].AsInt32(); + } + } +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + const uint8_t* buffer_t = reinterpret_cast(buffer); + return new SgnnProjectionAttributes( + flexbuffers::GetRoot(buffer_t, length).AsMap()); +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + const auto& attributes = + *reinterpret_cast(node->user_data); + const TfLiteTensor* input_row_splits = GetInput(context, node, kRowSplits); + TfLiteTensor* output = GetOutput(context, node, 0); + TfLiteIntArray* output_shape = TfLiteIntArrayCreate(2); + output_shape->data[0] = SizeOfDimension(input_row_splits, 0) - 1; + output_shape->data[1] = attributes.hash_seed.size(); + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output, output_shape)); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const auto& attributes = + *reinterpret_cast(node->user_data); + const TfLiteTensor* ngrams = GetInput(context, node, kValues); + const TfLiteTensor* row_splits = GetInput(context, node, kRowSplits); + + auto row_splits_values = GetTensorData(row_splits); + auto output_values = GetTensorData(GetOutput(context, node, 0)); + int output_idx = 0; + for (int i = 1; i < SizeOfDimension(row_splits, 0); ++i) { + int len = row_splits_values[i] - row_splits_values[i - 1]; + std::vector hash_signature(len); + + // Follow the implementation from + // tensorflow/core/kernels/string_to_hash_bucket_op.h + for (int j = 0; j < len; ++j) { + int index = row_splits->data.i64[i - 1] + j; + StringRef str = GetString(ngrams, index); + hash_signature[j] = + util::Fingerprint64(str.str, str.len) % attributes.buckets; + } + for (int k = 0; k < attributes.hash_seed.size(); ++k) { + double result = 0; + for (int j = 0; j < len; ++j) { + int64_t tmp = hash_signature[j] * attributes.hash_seed[k]; + int64_t value = abs(tmp) % attributes.buckets; + if (value > attributes.buckets / 2) { + value -= attributes.buckets; + } + result += value; + } + output_values[output_idx] = + static_cast(result) / (attributes.buckets / 2) / len; + output_idx++; + } + } + return kTfLiteOk; +} + +} // namespace sgnn + +TfLiteRegistration* Register_tftext_SGNN_PROJECTION() { + static TfLiteRegistration r = {sgnn::Init, sgnn::Free, sgnn::Prepare, + sgnn::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/research/seq_flow_lite/models/sgnn/sgnn_projection.h b/research/seq_flow_lite/models/sgnn/sgnn_projection.h new file mode 100644 index 0000000000000000000000000000000000000000..af4a25c1f883e3c9f60ece6cc81e8fc2ce5f4eb6 --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/sgnn_projection.h @@ -0,0 +1,31 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_H_ + +#include "tensorflow/lite/context.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_tftext_SGNN_PROJECTION(); + +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_H_ diff --git a/research/seq_flow_lite/models/sgnn/sgnn_projection_op_resolver.cc b/research/seq_flow_lite/models/sgnn/sgnn_projection_op_resolver.cc new file mode 100644 index 0000000000000000000000000000000000000000..040e1d1f4bc727eea72e89985949ddedc4a3572e --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/sgnn_projection_op_resolver.cc @@ -0,0 +1,32 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "models/sgnn/sgnn_projection_op_resolver.h" // seq_flow_lite + +#include "tensorflow/lite/mutable_op_resolver.h" +#include "models/sgnn/sgnn_projection.h" // seq_flow_lite + +namespace tflite { +namespace ops { +namespace custom { + +void AddSgnnProjectionCustomOp(MutableOpResolver* resolver) { + resolver->AddCustom("tftext:custom:SgnnProjection", + Register_tftext_SGNN_PROJECTION()); +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/research/seq_flow_lite/models/sgnn/sgnn_projection_op_resolver.h b/research/seq_flow_lite/models/sgnn/sgnn_projection_op_resolver.h new file mode 100644 index 0000000000000000000000000000000000000000..0d5a7f15da4ecaa0dab20ba0ffca59943aa0c6ce --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/sgnn_projection_op_resolver.h @@ -0,0 +1,34 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_OP_RESOLVER_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_OP_RESOLVER_H_ + +#include "tensorflow/lite/mutable_op_resolver.h" + +namespace tflite { +namespace ops { +namespace custom { + +// Adds the SgnnProjection custom op to an op resolver. +// This function can be loaded using dlopen. Since C++ function names get +// mangled, declare this function as extern C, so its name is unchanged. +extern "C" void AddSgnnProjectionCustomOp(MutableOpResolver* resolver); + +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_OP_RESOLVER_H_ diff --git a/research/seq_flow_lite/models/sgnn/sgnn_projection_test.cc b/research/seq_flow_lite/models/sgnn/sgnn_projection_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..2fb25eb1160adf873d65c50b89881afc8d685a8f --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/sgnn_projection_test.cc @@ -0,0 +1,101 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "models/sgnn/sgnn_projection.h" // seq_flow_lite + +#include +#include + +#include +#include +#include "flatbuffers/flexbuffers.h" // flatbuffer +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/string_util.h" + +namespace tflite { +namespace ops { +namespace custom { +namespace sgnn_projection { +namespace test { +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +} // namespace + +class SgnnProjectionModel : public SingleOpModel { + public: + // Constructor for testing the op with a tf.Tensor + SgnnProjectionModel(const std::vector& input_values, + const std::vector& input_row_splits, + const std::vector& hash_seed, int64_t buckets) { + input_values_index_ = AddInput(TensorType_STRING); + input_row_splits_index_ = AddInput(TensorType_INT64); + output_values_index_ = AddOutput(TensorType_FLOAT32); + BuildCustomOp(hash_seed, buckets); + BuildInterpreter({{static_cast(input_values.size())}, + {static_cast(input_row_splits.size())}}); + PopulateStringTensor(input_values_index_, input_values); + PopulateTensor(input_row_splits_index_, input_row_splits); + Invoke(); + } + + std::vector GetOutputShape() { + return GetTensorShape(output_values_index_); + } + + std::vector ExtractOutputValue() { + return ExtractVector(output_values_index_); + } + + private: + void BuildCustomOp(const std::vector& hash_seed, int64_t buckets) { + flexbuffers::Builder fbb; + size_t start_map = fbb.StartMap(); + auto vector_start = fbb.StartVector("hash_seed"); + for (int i = 0; i < hash_seed.size(); i++) { + fbb.Add(hash_seed[i]); + } + fbb.EndVector(vector_start, /*typed=*/true, /*fixed=*/false); + fbb.Int("buckets", buckets); + fbb.EndMap(start_map); + fbb.Finish(); + SetCustomOp("tftext:custom:SgnnProjection", fbb.GetBuffer(), + Register_tftext_SGNN_PROJECTION); + } + + int input_values_index_; + int input_row_splits_index_; + int output_values_index_; +}; + +// Keep same result of test_projection in sgnn_test.py +TEST(SgnnProjectionTest, TensorSgnnProjection) { + SgnnProjectionModel m({"^h", "he", "el", "ll", "lo", "o$", "^h", "hi", "i$"}, + /*input_row_splits=*/{0, 6, 9}, /*hash_seed=*/{5, 7}, + /*buckets=*/0x7FFFFFFF); + EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 2)); + EXPECT_THAT(m.ExtractOutputValue(), + ElementsAreArray(ArrayFloatNear( + { 0.448691, -0.238499, -0.037561, 0.080748}))); +} + +} // namespace test +} // namespace sgnn_projection +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/research/seq_flow_lite/models/sgnn/sgnn_test.py b/research/seq_flow_lite/models/sgnn/sgnn_test.py new file mode 100644 index 0000000000000000000000000000000000000000..7e2db82e82cfeaeb8b6eeba3c93b95100cfca8bc --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/sgnn_test.py @@ -0,0 +1,73 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Lint as: python3 +"""Tests for seq_flow_lite.sgnn.""" + +import tensorflow as tf +from tensorflow.python.framework import test_util # pylint: disable=g-direct-tensorflow-import +from models import sgnn # import seq_flow_lite module + + +@test_util.run_all_in_graph_and_eager_modes +class SgnnTest(tf.test.TestCase): + + def test_preprocess(self): + self.assertAllEqual( + sgnn.preprocess( + tf.constant([['Hello World!'], [u'你好'], + [u'مرحبا بالعالم']])), + [['hello'.encode(), 'world!'.encode()], [u'你好'.encode()], + [u'مرحبا'.encode(), u'بالعالم'.encode()]]) + + def test_get_ngram(self): + tokens = tf.ragged.constant([['hello', 'world'], [u'你好'], + [u'مرحبا', u'بالعالم']]) + self.assertAllEqual( + sgnn.get_ngrams(tokens, 3), + [[ + b'^he', b'hel', b'ell', b'llo', b'lo$', b'^wo', b'wor', b'orl', + b'rld', b'ld$' + ], [u'^你好'.encode(), u'你好$'.encode()], + [ + u'^مر'.encode(), u'مرح'.encode(), u'رحب'.encode(), + u'حبا'.encode(), u'با$'.encode(), u'^با'.encode(), + u'بال'.encode(), u'الع'.encode(), u'لعا'.encode(), + u'عال'.encode(), u'الم'.encode(), u'لم$'.encode() + ]]) + + def test_project(self): + ngrams = tf.ragged.constant([[b'^h', b'he', b'el', b'll', b'lo', b'o$'], + [b'^h', b'hi', b'i$']]) + self.assertAllClose( + sgnn.fused_project(ngrams, [5, 7], 0x7FFFFFFF), + [[0.448691, -0.238499], [-0.037561, 0.080748]]) + self.assertAllClose( + sgnn.fused_project(ngrams, [5, 7], 0x7FFFFFFF), + sgnn.project(ngrams, [5, 7], 0x7FFFFFFF)) + + def test_sgnn(self): + self.assertAllClose( + sgnn.sgnn(tf.constant([['hello'], ['hi']]), [3, 5, 7], 2), + [[0.268503, 0.448691, -0.238499], [0.093143, -0.037561, 0.080748]]) + + def test_keras_model(self): + hparams = sgnn.Hparams(learning_rate=2e-4) + model = sgnn.keras_model([1, 2, 3, 4], 2, [100, 50], hparams) + self.assertIsNotNone(model) + + +if __name__ == '__main__': + tf.test.main() diff --git a/research/seq_flow_lite/models/sgnn/train.py b/research/seq_flow_lite/models/sgnn/train.py new file mode 100644 index 0000000000000000000000000000000000000000..d6fd9f6913fdbbae8e089d1e4d260c75b638736a --- /dev/null +++ b/research/seq_flow_lite/models/sgnn/train.py @@ -0,0 +1,121 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to train langid model. + +The script builds language detection from wikipedia dataset, +builds SGNN model to train an on-device model to +predict the language of the given text. +""" + +import os +from absl import app +from absl import flags +import numpy as np +import tensorflow.compat.v2 as tf +import tensorflow_datasets as tfds + +from models import sgnn # import seq_flow_lite module + +FLAGS = flags.FLAGS +flags.DEFINE_string('output_dir', '/tmp/langid', + 'Path for the output directory.') + +flags.DEFINE_integer('projection_size', 600, 'Size of projection layer.') +flags.DEFINE_integer('ngram_size', 3, 'Max size of ngram to project features.') +flags.DEFINE_string('fc_layer', '256,128', + 'Size of fully connected layer, separated by comma.') + +flags.DEFINE_integer('batch_size', 160, 'Batch size for training.') +flags.DEFINE_integer('epochs', 10, 'Num of epochs for training.') +flags.DEFINE_float('learning_rate', 2e-4, 'learning rate for optimizer.') + +LANGIDS = ['ar', 'en', 'es', 'fr', 'ru', 'zh'] + + +def dataset_fn(batch_size, is_training, split, try_gcs, max_input_len): + """Creates dataset to train and evaluate. + + Args: + batch_size: Batch size for training or evaluation. + is_training: True if the dataset is for training. + split: Split of dataset, follow the pattern defined in + https://www.tensorflow.org/datasets/splits + try_gcs: True if loading the data from gcs. + max_input_len: Max length of input string. + + Returns: + Dataset object. + """ + + def _get_text(item): + return tf.strings.substr(item['text'], 0, max_input_len) + + all_data = [] + for idx, langid in enumerate(LANGIDS): + dataset = tfds.load( + 'wikipedia/20190301.%s' % langid, try_gcs=try_gcs, split=split) + + map_fn = lambda item: (_get_text(item), idx) # pylint: disable=cell-var-from-loop + dataset = dataset.map(map_fn) + all_data.append(dataset) + + datasets = tf.data.experimental.sample_from_datasets( + all_data, [1. / len(all_data)] * len(LANGIDS)) + repeat_count = None if is_training else 1 + return datasets.cache().shuffle(100000).batch(batch_size).repeat(repeat_count) + + +def save_and_convert(model, output_dir): + """Save keras model and convert to tflite.""" + saved_model_path = os.path.join(output_dir, 'saved_model') + tf.saved_model.save(model, saved_model_path) + converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path) + converter.allow_custom_ops = True + converter.target_spec.supported_ops = [ + tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS + ] + data = converter.convert() + with open(os.path.join(output_dir, 'model.tflite'), 'wb') as f: + f.write(data) + + +def train_and_evaluate(): + """Train and evaluate the model.""" + hash_seed = np.random.uniform(-1, 1, FLAGS.projection_size) * 0x7FFFFFFF + fc_layer = [int(fc) for fc in FLAGS.fc_layer.split(',')] + fc_layer.append(len(LANGIDS) + 1) + hparams = sgnn.Hparams(learning_rate=FLAGS.learning_rate) + + model = sgnn.keras_model(hash_seed, FLAGS.ngram_size, fc_layer, hparams) + model.fit( + dataset_fn(FLAGS.batch_size, True, 'train[:10%]', True, 100), + epochs=FLAGS.epochs, + steps_per_epoch=1000, + validation_steps=100, + validation_data=dataset_fn(FLAGS.batch_size, False, 'train[10:11%]', True, + 100), + ) + save_and_convert(model, FLAGS.output_dir) + + +def main(_): + if not os.path.exists(FLAGS.output_dir): + os.mkdir(FLAGS.output_dir) + + train_and_evaluate() + + +if __name__ == '__main__': + app.run(main) diff --git a/research/seq_flow_lite/tf_ops/BUILD b/research/seq_flow_lite/tf_ops/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..f6254d33d47791bc039ee4798e8a8fc3d6348367 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/BUILD @@ -0,0 +1,147 @@ +# Tensorflow ops for sequence string projection. + +load("//tf_ops:build_def.bzl", "gen_op_wrapper_py") + +licenses(["notice"]) + +package( + default_visibility = [ + "//:__subpackages__", + ], +) + +py_library( + name = "text_projection", + srcs = ["text_projection.py"], + srcs_version = "PY3", + deps = [ + ":sequence_string_projection_op_py", + ], +) + +cc_library( + name = "sequence_string_projection_op", + srcs = [ + "sequence_string_projection.cc", + ], + deps = [ + ":projection_normalizer_util", + ":projection_tokenizer_util", + ":projection_util", + ":text_distorter", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/random", + "@tensorflow_includes//:includes", + "@tensorflow_solib//:framework_lib", + ], + alwayslink = 1, +) + +cc_library( + name = "projection_util", + srcs = ["projection_util.cc"], + hdrs = ["projection_util.h"], + deps = [ + "@utf_archive//:utf", + ], +) + +cc_library( + name = "projection_tokenizer_util", + srcs = ["projection_tokenizer_util.cc"], + hdrs = ["projection_tokenizer_util.h"], + deps = [ + ":projection_util", + "@utf_archive//:utf", + ], +) + +cc_library( + name = "projection_normalizer_util", + srcs = ["projection_normalizer_util.cc"], + hdrs = ["projection_normalizer_util.h"], + deps = [ + ":projection_util", + "@utf_archive//:utf", + ], +) + +cc_library( + name = "text_distorter", + srcs = ["text_distorter.cc"], + hdrs = ["text_distorter.h"], + deps = [ + "@com_google_absl//absl/strings", + "@icu4c", + "@tensorflow_includes//:includes", + "@tensorflow_solib//:framework_lib", + "@utf_archive//:utf", + ], +) + +cc_test( + name = "sequence_string_projection_test", + size = "small", + srcs = ["sequence_string_projection_test.cc"], + deps = [ + ":sequence_string_projection_op", + "@tensorflow_includes//:includes", + "@tensorflow_solib//:framework_lib", + ], +) + +cc_library( + name = "sequence_string_projection_op_v2", + srcs = [ + "sequence_string_projection_op_v2.cc", + ], + deps = [ + ":projection_normalizer_util", + ":projection_util", + ":text_distorter", + "@tensorflow_includes//:includes", + "@tensorflow_solib//:framework_lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/random", + ], + alwayslink = 1, +) + +cc_test( + name = "sequence_string_projection_op_v2_test", + size = "small", + srcs = ["sequence_string_projection_op_v2_test.cc"], + deps = [ + ":sequence_string_projection_op_v2", + "@tensorflow_includes//:includes", + "@tensorflow_solib//:framework_lib", + ], +) + +gen_op_wrapper_py( + name = "sequence_string_projection_op_v2_py", + out = "sequence_string_projection_op_v2.py", + kernel_lib = ":sequence_string_projection_op_v2", +) + +gen_op_wrapper_py( + name = "sequence_string_projection_op_py", + out = "sequence_string_projection_op.py", + kernel_lib = ":sequence_string_projection_op", +) + +cc_library( + name = "tf_custom_ops", + srcs = ["tf_custom_ops.cc"], + deps = [ + "@tensorflow_includes//:includes", + "@tensorflow_solib//:framework_lib", + ], + alwayslink = 1, +) + +gen_op_wrapper_py( + name = "tf_custom_ops_py", + out = "tf_custom_ops_py.py", + kernel_lib = ":tf_custom_ops", +) diff --git a/research/seq_flow_lite/tf_ops/build_def.bzl b/research/seq_flow_lite/tf_ops/build_def.bzl new file mode 100644 index 0000000000000000000000000000000000000000..cb945f330664f63fe960ffcd1048172897c17bf5 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/build_def.bzl @@ -0,0 +1,86 @@ +def tf_deps(): + return [ + "@tensorflow_includes//:includes", + "@tensorflow_solib//:framework_lib", + ] + +def tf_copts(): + return ["-Wno-sign-compare"] + +def _make_search_paths(prefix, levels_to_root): + return ",".join( + [ + "-rpath,%s/%s" % (prefix, "/".join([".."] * search_level)) + for search_level in range(levels_to_root + 1) + ], + ) + +def _rpath_linkopts(name): + # Search parent directories up to the TensorFlow root directory for shared + # object dependencies, even if this op shared object is deeply nested + # (e.g. tensorflow/contrib/package:python/ops/_op_lib.so). tensorflow/ is then + # the root and tensorflow/libtensorflow_framework.so should exist when + # deployed. Other shared object dependencies (e.g. shared between contrib/ + # ops) are picked up as long as they are in either the same or a parent + # directory in the tensorflow/ tree. + levels_to_root = native.package_name().count("/") + name.count("/") + return ["-Wl,%s" % (_make_search_paths("$$ORIGIN", levels_to_root),)] + +def gen_op_wrapper_py(name, out, kernel_lib, linkopts = [], **kwargs): + """Generates the py_library `name` with a data dep on the ops in kernel_lib. + + The resulting py_library creates file `$out`, and has a dependency on a + symbolic library called lib{$name}_gen_op.so, which contains the kernels + and ops and can be loaded via `tf.load_op_library`. + + Args: + name: The name of the py_library. + out: The name of the python file. Use "gen_{name}_ops.py". + kernel_lib: A cc_kernel_library target to generate for. + **kwargs: Any args to the `cc_binary` and `py_library` internal rules. + """ + if not out.endswith(".py"): + fail("Argument out must end with '.py', but saw: {}".format(out)) + + module_name = "lib{}_gen_op".format(name) + version_script_file = "%s-version-script.lds" % module_name + native.genrule( + name = module_name + "_version_script", + outs = [version_script_file], + cmd = "echo '{global:\n *tensorflow*;\n *deepmind*;\n local: *;};' >$@", + output_licenses = ["unencumbered"], + visibility = ["//visibility:private"], + ) + native.cc_binary( + name = "{}.so".format(module_name), + deps = [kernel_lib] + tf_deps() + [version_script_file], + copts = tf_copts() + [ + "-fno-strict-aliasing", # allow a wider range of code [aliasing] to compile. + "-fvisibility=hidden", # avoid symbol clashes between DSOs. + ], + linkshared = 1, + linkopts = linkopts + _rpath_linkopts(module_name) + [ + "-Wl,--version-script", + "$(location %s)" % version_script_file, + ], + **kwargs + ) + native.genrule( + name = "{}_genrule".format(out), + outs = [out], + cmd = """ + echo 'import tensorflow as tf +_reverb_gen_op = tf.load_op_library( + tf.compat.v1.resource_loader.get_path_to_datafile( + "lib{}_gen_op.so")) +_locals = locals() +for k in dir(_reverb_gen_op): + _locals[k] = getattr(_reverb_gen_op, k) +del _locals' > $@""".format(name), + ) + native.py_library( + name = name, + srcs = [out], + data = [":lib{}_gen_op.so".format(name)], + **kwargs + ) diff --git a/research/seq_flow_lite/tf_ops/projection_normalizer_util.cc b/research/seq_flow_lite/tf_ops/projection_normalizer_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..3303ade4523b0072255e35e5e09517ca341f20c8 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/projection_normalizer_util.cc @@ -0,0 +1,158 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tf_ops/projection_normalizer_util.h" // seq_flow_lite + +#include +#include +#include +#include +#include + +#include "tf_ops/projection_util.h" // seq_flow_lite + +// Returns true if the given text contains a number. +bool IsDigit(const std::string& text) { + Rune rune; + for (size_t i = 0; i < text.length();) { + const int bytes_read = chartorune(&rune, const_cast(text.data())); + if (rune == Runeerror || bytes_read == 0) break; + if (rune >= static_cast('0') && rune <= static_cast('9')) { + return true; + } + i += bytes_read; + } + return false; +} + +// Gets the string containing |num_chars| characters from |start| position. +std::string GetCharToken(const std::vector& char_tokens, + size_t start, size_t num_chars) { + std::string char_token = ""; + if (start + num_chars <= char_tokens.size()) { + for (size_t i = 0; i < num_chars; ++i) { + char_token.append(char_tokens[start + i]); + } + } + return char_token; +} + +// Counts how many times |pattern| appeared from |start| position. +int GetNumPattern(const std::vector& char_tokens, size_t start, + size_t num_chars, const std::string& pattern) { + int count = 0; + for (size_t i = start; i < char_tokens.size(); i += num_chars) { + std::string cur_pattern = GetCharToken(char_tokens, i, num_chars); + if (pattern == cur_pattern) { + ++count; + } else { + break; + } + } + return count; +} + +std::string ContractToken(const char* input_ptr, size_t len, size_t num_chars) { + // This function contracts patterns whose length is |num_chars| and appeared + // more than twice. So if the input is shorter than 3 * |num_chars|, do not + // apply any contraction. + if (len < 3 * num_chars) { + return input_ptr; + } + std::vector char_tokens = SplitByChar(input_ptr, len, len); + + std::string token; + token.reserve(len); + for (size_t i = 0; i < char_tokens.size();) { + std::string cur_pattern = GetCharToken(char_tokens, i, num_chars); + + // Count how many times this pattern appeared. + int num_cur_patterns = 0; + if (cur_pattern.find(" ") == std::string::npos && !IsDigit(cur_pattern)) { + num_cur_patterns = + GetNumPattern(char_tokens, i + num_chars, num_chars, cur_pattern); + } + + if (num_cur_patterns >= 2) { + // If this pattern is repeated, store it only twice. + token.append(cur_pattern); + token.append(cur_pattern); + i += (num_cur_patterns + 1) * num_chars; + } else { + token.append(char_tokens[i]); + ++i; + } + } + + return token; +} + +void ProjectionNormalizer::InitializeSeparators(const std::string& separators) { + for (size_t i = 0; i < separators.length(); ++i) { + if (separators[i] != ' ') { + separators_.insert(separators[i]); + } + } +} + +std::string ProjectionNormalizer::NormalizeInternal(const char* input_ptr, + size_t len) { + std::string normalized; + normalized.reserve(len * 2); + for (size_t i = 0; i < len; ++i) { + char c = input_ptr[i]; + bool matched_separator = separators_.find(c) != separators_.end(); + if (matched_separator) { + if (i > 0 && input_ptr[i - 1] != ' ' && normalized.back() != ' ') { + normalized.append(" "); + } + } + normalized.append(1, c); + if (matched_separator) { + if (i + 1 < len && input_ptr[i + 1] != ' ' && c != '\'') { + normalized.append(" "); + } + } + } + return normalized; +} + +std::string ProjectionNormalizer::Normalize(const std::string& input, + size_t max_input) { + return Normalize(input.c_str(), input.size(), max_input); +} + +std::string ProjectionNormalizer::Normalize(const char* input_ptr, size_t len, + size_t max_input) { + std::string normalized(input_ptr, std::min(len, max_input)); + + if (normalize_repetition_) { + // Remove repeated 1 char (e.g. soooo => soo) + normalized = ContractToken(normalized.data(), normalized.length(), 1); + + // Remove repeated 2 chars from the beginning (e.g. hahaha => + // haha, xhahaha => xhaha, xyhahaha => xyhaha). + normalized = ContractToken(normalized.data(), normalized.length(), 2); + + // Remove repeated 3 chars from the beginning + // (e.g. wowwowwow => wowwow, abcdbcdbcd => abcdbcd). + normalized = ContractToken(normalized.data(), normalized.length(), 3); + } + + if (!separators_.empty()) { + // Add space around separators_. + normalized = NormalizeInternal(normalized.data(), normalized.length()); + } + return normalized; +} diff --git a/research/seq_flow_lite/tf_ops/projection_normalizer_util.h b/research/seq_flow_lite/tf_ops/projection_normalizer_util.h new file mode 100644 index 0000000000000000000000000000000000000000..4a7bed706c5c933748e5bed2b3e98b53a70bdf6f --- /dev/null +++ b/research/seq_flow_lite/tf_ops/projection_normalizer_util.h @@ -0,0 +1,54 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_NORMALIZER_UTIL_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_NORMALIZER_UTIL_H_ + +#include +#include +#include + +#include "libutf/utf.h" + +// Normalizes the input with the given |separators| by adding a space before and +// after each separator. When |normalize_repetition| is true, it removes the +// repeated characters (except numbers) which consecutively appeared more than +// twice in a word. +// Examples: arwwwww -> arww, good!!!!! -> good!!, hahaha => haha. +class ProjectionNormalizer { + public: + explicit ProjectionNormalizer(const std::string& separators, + bool normalize_repetition = false) { + InitializeSeparators(separators); + normalize_repetition_ = normalize_repetition; + } + + // Normalizes the repeated characters (except numbers) which consecutively + // appeared more than twice in a word. + std::string Normalize(const std::string& input, size_t max_input = 300); + std::string Normalize(const char* input_ptr, size_t len, + size_t max_input = 300); + + private: + // Parses and extracts supported separators. + void InitializeSeparators(const std::string& separators); + + // Removes repeated chars. + std::string NormalizeInternal(const char* input_ptr, size_t len); + + std::unordered_set separators_; + bool normalize_repetition_; +}; + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_NORMALIZER_UTIL_H_ diff --git a/research/seq_flow_lite/tf_ops/projection_tokenizer_util.cc b/research/seq_flow_lite/tf_ops/projection_tokenizer_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..25885630a60de41108dd88d3d31141c957df9e0e --- /dev/null +++ b/research/seq_flow_lite/tf_ops/projection_tokenizer_util.cc @@ -0,0 +1,101 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tf_ops/projection_tokenizer_util.h" // seq_flow_lite + +#include +#include +#include +#include +#include + +#include "tf_ops/projection_util.h" // seq_flow_lite + + +namespace { +constexpr char kApostrophe = '\''; +constexpr char kSpace = ' '; +constexpr char kComma = ','; +constexpr char kDot = '.'; +constexpr size_t kInvalid = -1; +} // namespace + +// Returns true if the input |c| is ascii number. +bool is_numeric(char c) { return c >= '0' && c <= '9'; } + +// Returns true if we want to prepend the separator to the next token. +bool prepend_separator(char separator) { return separator == kApostrophe; } + +void ProjectionTokenizer::InitializeSeparators(const std::string& separators) { + for (size_t i = 0; i < separators.length(); ++i) { + separators_.insert(separators[i]); + } +} + +size_t ProjectionTokenizer::FindNextSeparator(const char* input_ptr, + size_t from, + size_t length) const { + auto index = from; + while (index < length) { + char c = input_ptr[index]; + // Do not break a number (e.g. "10,000", "0.23"). + if (c == kComma || c == kDot) { + if (index + 1 < length && is_numeric(input_ptr[index + 1])) { + c = input_ptr[++index]; + } + } + if (separators_.find(c) != separators_.end()) { + break; + } + ++index; + } + return index == length ? kInvalid : index; +} + +std::vector ProjectionTokenizer::Tokenize( + const char* input_ptr, size_t len, size_t max_input, + size_t max_tokens) const { + // If separators_ is not given, tokenize the input with a space. + if (separators_.empty()) { + return SplitBySpace(input_ptr, len, max_input, max_tokens); + } + + std::vector tokens; + size_t last_index = + max_input == kEntireString ? len : (len < max_input ? len : max_input); + size_t start = 0; + // Skip leading spaces. + while (start < last_index && input_ptr[start] == kSpace) { + start++; + } + auto end = FindNextSeparator(input_ptr, start, last_index); + + while (end != kInvalid && + (max_tokens == kAllTokens || tokens.size() < max_tokens - 1)) { + auto length = end - start; + if (length > 0) tokens.emplace_back(input_ptr + start, length); + + // Add the separator (except space and apostrophe) as a token + char separator = input_ptr[end]; + if (separator != kSpace && separator != kApostrophe) { + tokens.emplace_back(input_ptr + end, 1); + } + + start = end + (prepend_separator(separator) ? 0 : 1); + end = FindNextSeparator(input_ptr, end + 1, last_index); + } + auto length = end == kInvalid ? (last_index - start) : (end - start); + if (length > 0) tokens.emplace_back(input_ptr + start, length); + return tokens; +} diff --git a/research/seq_flow_lite/tf_ops/projection_tokenizer_util.h b/research/seq_flow_lite/tf_ops/projection_tokenizer_util.h new file mode 100644 index 0000000000000000000000000000000000000000..ca6ac553198b5bce9b5fce2d783e58da321519dd --- /dev/null +++ b/research/seq_flow_lite/tf_ops/projection_tokenizer_util.h @@ -0,0 +1,58 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_TOKENIZER_UTIL_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_TOKENIZER_UTIL_H_ + +#include +#include +#include + +#include "libutf/utf.h" + +// Tokenizes the input with the given separators. To properly tokenize a text +// containing contractions in English (e.g. I'm), it combines the apostrophe +// with the token coming after it. For example, the text "I'm happy" is +// tokenized into three tokens: "I", "'m", "happy". When |separators| is not +// given, use the space to tokenize the input. +// Note) This tokenization supports only English. +class ProjectionTokenizer { + public: + explicit ProjectionTokenizer(const std::string& separators) { + InitializeSeparators(separators); + } + + // Tokenizes the input by separators_. Limit to max_tokens, when it is not -1. + std::vector Tokenize(const std::string& input, size_t max_input, + size_t max_tokens) const { + return Tokenize(input.c_str(), input.size(), max_input, max_tokens); + } + + std::vector Tokenize(const char* input_ptr, size_t len, + size_t max_input, size_t max_tokens) const; + + private: + // Parses and extracts supported separators. + void InitializeSeparators(const std::string& separators); + + // Starting from input_ptr[from], search for the next occurrence of + // separators_. Don't search beyond input_ptr[length](non-inclusive). Return + // -1 if not found. + size_t FindNextSeparator(const char* input_ptr, size_t from, + size_t length) const; + + std::unordered_set separators_; +}; + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_TOKENIZER_UTIL_H_ diff --git a/research/seq_flow_lite/tf_ops/projection_util.cc b/research/seq_flow_lite/tf_ops/projection_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..0434af090449f74cbd0c741feba7bf73344761f7 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/projection_util.cc @@ -0,0 +1,412 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tf_ops/projection_util.h" // seq_flow_lite + +#include +#include +#include +#include +#include +#include +#include + +namespace { +constexpr int kInvalid = -1; +constexpr char kSpace = ' '; +} // namespace + +class MurmurHash : public HashEngine { + public: + void GetHashCodes(const std::string& word, std::vector* hash_codes, + int feature_size) override { + uint64_t hash_low = 0; + uint64_t hash_high = 0; + for (int i = 0; i < feature_size; i += 64) { + if (i == 0) { + auto hash = MurmurHash128(word.c_str(), word.size()); + hash_low = hash.first; + hash_high = hash.second; + } else { + GetMoreBits(hash_low, hash_high, &hash_low, &hash_high); + } + hash_codes->push_back(hash_low); + hash_codes->push_back(hash_high); + } + } + + private: + static constexpr uint64_t kMul = 0xc6a4a7935bd1e995ULL; + static constexpr uint64_t kMul2 = 0x9e3779b97f4a7835ULL; + inline uint64_t ShiftMix(uint64_t val) { return val ^ (val >> 47); } + inline uint64_t MurmurStep(uint64_t hash, uint64_t data) { + hash ^= ShiftMix(data * kMul) * kMul; + hash *= kMul; + return hash; + } + inline uint64_t Load64VariableLength(const void* p, int len) { + assert(len >= 1 && len <= 8); + const char* buf = static_cast(p); + uint64_t val = 0; + --len; + do { + val = (val << 8) | buf[len]; + // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. + } while (--len >= 0); + // No ToHost64(...) needed. The bytes are accessed in little-endian manner + // on every architecture. + return val; + } + void GetMoreBits(uint64_t hash, uint64_t hash2, uint64_t* rlow, + uint64_t* rhigh) { + hash = ShiftMix(hash) * kMul; + hash2 ^= hash; + *rhigh = ShiftMix(hash); + *rlow = ShiftMix(hash2 * kMul2) * kMul2; + } + std::pair MurmurHash128(const char* buf, + const size_t len) { + // Initialize the hashing value. + uint64_t hash = len * kMul; + // hash2 will be xored by hash during the hash computation iterations. + // In the end we use an alternative mixture multiplier for mixing + // the bits in hash2. + uint64_t hash2 = 0; + // Let's remove the bytes not divisible by the sizeof(uint64_t). + // This allows the inner loop to process the data as 64 bit integers. + const size_t len_aligned = len & ~0x7; + const char* end = buf + len_aligned; + + for (const char* p = buf; p != end; p += 8) { + // Manually unrolling this loop 2x did not help on Intel Core 2. + hash = MurmurStep(hash, Load64VariableLength(p, 8)); + hash2 ^= hash; + } + if ((len & 0x7) != 0) { + const uint64_t data = Load64VariableLength(end, len & 0x7); + hash ^= data; + hash *= kMul; + hash2 ^= hash; + } + hash = ShiftMix(hash) * kMul; + hash2 ^= hash; + hash = ShiftMix(hash); + + // mul2 is a prime just above golden ratio. mul2 is used to ensure that the + // impact of the last few bytes is different to the upper and lower 64 bits. + hash2 = ShiftMix(hash2 * kMul2) * kMul2; + + return std::make_pair(hash, hash2); + } +}; + +class XFixHash : public HashEngine { + public: + explicit XFixHash(int bits_per_char) + : bits_per_char_(bits_per_char), bit_mask_((1ULL << bits_per_char) - 1) {} + + void GetHashCodes(const std::string& word, std::vector* hash_codes, + int feature_size) override { + auto token_ptr = reinterpret_cast(word.c_str()); + size_t token_size = word.size(); + int token_idx = 0; + uint64_t hash_low = token_size * kMul; + uint64_t hash_high = token_size * kMul2; + uint64_t frhash = kMul; + uint64_t brhash = kMul2; + for (int i = 0; i < feature_size; i += 64) { + for (int j = i ? 0 : bits_per_char_; j < 64; + j += bits_per_char_, token_idx = (token_idx + 1) % token_size) { + frhash = ((frhash << 8) | token_ptr[token_idx]) * kMul; + brhash = + ((brhash << 8) | token_ptr[token_size - 1 - token_idx]) * kMul2; + hash_low = (hash_low << bits_per_char_) | (frhash & bit_mask_); + hash_high = (hash_high << bits_per_char_) | (brhash & bit_mask_); + } + hash_codes->push_back(hash_low); + hash_codes->push_back(hash_high); + } + } + + private: + const uint64_t kMul = 0xc6a4a7935bd1e995ULL; + const uint64_t kMul2 = 0x9e3779b97f4a7835ULL; + const int bits_per_char_; + const uint64_t bit_mask_; +}; + +class UnicodeHash : public HashEngine { + public: + // bits_per_unicode should be a divisor of 64. + explicit UnicodeHash(int bits_per_unicode) + : bits_per_unicode_(bits_per_unicode), + bit_mask_(((1ULL << bits_per_unicode) - 1) << (64 - bits_per_unicode)) { + } + + void GetHashCodes(const std::string& word, std::vector* hash_codes, + int feature_size) override { + auto word_ptr = word.c_str(); + int utflength = utflen(const_cast(word_ptr)); + // Both `feature_size` and `bits_per_unicode` are bit lengths. + const int max_usable_runes = feature_size * 2 / bits_per_unicode_; + if (max_usable_runes < utflength) { + const int unicode_skip = (utflength - max_usable_runes) / 2; + for (int i = 0; i < unicode_skip; ++i) { + Rune rune; + word_ptr += chartorune(&rune, const_cast(word_ptr)); + } + utflength = max_usable_runes; + } + + std::vector unicode_hashes; + unicode_hashes.reserve(utflength); + for (int i = 0; i < utflength; ++i) { + Rune rune; + word_ptr += chartorune(&rune, const_cast(word_ptr)); + unicode_hashes.push_back((rune * kMul) & bit_mask_); + } + + uint64_t hash = 0; + int k = 0; + for (int i = 0; i < feature_size * 2; i += 64) { + for (int j = 0; j < 64; j += bits_per_unicode_) { + if (k < unicode_hashes.size()) { + hash = (hash >> bits_per_unicode_) | unicode_hashes[k++]; + } else { + hash = hash >> bits_per_unicode_; + } + } + hash_codes->push_back(hash); + } + } + + private: + const uint64_t kMul = 0xc6a4a7935bd1e995ULL; + const int bits_per_unicode_; + const uint64_t bit_mask_; +}; + +bool Hasher::SupportedHashType(const std::string& hash_type) { + std::unordered_set supported({kMurmurHash, kUnicodeHash8, + kUnicodeHash16, kXfixHash8, + kXfixHash16, kXfixHash32}); + return supported.find(hash_type) != supported.end(); +} + +Hasher* Hasher::CreateHasher(int feature_size, const std::string& hash_type) { + if (SupportedHashType(hash_type)) { + if (hash_type == kMurmurHash) { + return new Hasher(feature_size, new MurmurHash()); + } else if (hash_type == kUnicodeHash8) { + return new Hasher(feature_size, new UnicodeHash(8)); + } else if (hash_type == kUnicodeHash16) { + return new Hasher(feature_size, new UnicodeHash(16)); + } else if (hash_type == kXfixHash8) { + return new Hasher(feature_size, new XFixHash(8)); + } else if (hash_type == kXfixHash16) { + return new Hasher(feature_size, new XFixHash(16)); + } else { + return new Hasher(feature_size, new XFixHash(32)); + } + } + return nullptr; +} + +Hasher::Hasher(int feature_size, HashEngine* hash_engine) + : feature_size_(feature_size), hash_engine_(hash_engine) { + hash_engine_->GetHashCodes(empty_string_, &null_hash_codes_, feature_size_); +} + +std::string ProjectionUnicodeHandler::LowerCaseUTF8WithSupportedUnicodes( + const std::pair& source, bool* first_cap, + bool* all_caps) const { + // Ideally the size of target should be less than or equal to source. But + // when we do to_lower the number of bytes needed to encode a unicode + // character could increase. To account for this 4 times the source length + // is allocated for target. + const char* csource = source.first; + int len = source.second; + auto target = std::unique_ptr(new char[len * 4]); + auto target_ptr = target.get(); + int i = 0; + bool first_char = true; + bool first_cap_value = false; + bool all_caps_value = false; + while (i < len) { + Rune rune; + const int bytes_read = chartorune(&rune, const_cast(csource + i)); + if (bytes_read == 0 || bytes_read > len - i) { + break; + } + i += bytes_read; + if (rune != Runeerror) { + Rune lower = tolowerrune(rune); + // Skip processing the unicode if exclude_nonalphaspace_unicodes_ is + // true and the unicode is not alpha and not space. + const Rune kSpaceRune = ' '; + if (exclude_nonalphaspace_unicodes_ && !isalpharune(lower) && + lower != kSpaceRune) { + continue; + } + if (IsUnrestrictedVocabulary() || IsValidUnicode(lower)) { + const int bytes_written = runetochar(target_ptr, &lower); + target_ptr += bytes_written; + + const bool lower_case = (lower == rune); + if (first_char) { + first_cap_value = !lower_case; + all_caps_value = !lower_case; + } else { + first_cap_value &= lower_case; + all_caps_value &= !lower_case; + } + first_char = false; + } + } + } + if (first_cap) { + *first_cap = first_cap_value; + } + if (all_caps) { + *all_caps = all_caps_value; + } + return std::string(target.get(), target_ptr); +} + +void ProjectionUnicodeHandler::InitializeVocabulary( + const std::string& vocabulary) { + for (size_t i = 0, index = 0; i < vocabulary.length();) { + Rune rune; + const int bytes_read = + chartorune(&rune, const_cast(vocabulary.c_str() + i)); + if (!bytes_read || bytes_read > (vocabulary.length() - i)) { + break; + } + i += bytes_read; + // Include novel lower case unicode segments as part of valid chars. + if (rune == Runeerror) { + std::clog << "Invalid rune in vocabulary."; + } else if (IsValidUnicode(rune)) { + std::clog << "Duplicate rune " << rune << " found in vocabulary."; + } else if (rune != tolowerrune(rune)) { + std::clog << "Upper case rune " << rune << " found in vocabulary."; + } else { + valid_chars_[rune] = index++; + } + } +} + +// Starting from input_ptr[from], search for the next occurrence of ' ', +// Don't search beyond input_ptr[length](non-inclusive), return -1 if not +// found. +inline size_t FindNextSpace(const char* input_ptr, size_t from, size_t length) { + size_t space_index; + for (space_index = from; space_index < length; space_index++) { + if (input_ptr[space_index] == kSpace) { + break; + } + } + return space_index == length ? kInvalid : space_index; +} + +template +void SplitBySpaceInternal(std::vector* tokens, const char* input_ptr, + size_t len, size_t max_input, size_t max_tokens) { + size_t last_index = + max_input == kEntireString ? len : (len < max_input ? len : max_input); + size_t start = 0; + // skip leading spaces + while (start < last_index && input_ptr[start] == kSpace) { + start++; + } + auto end = FindNextSpace(input_ptr, start, last_index); + while (end != kInvalid && + (max_tokens == kAllTokens || tokens->size() < max_tokens - 1)) { + auto length = end - start; + if (length > 0) { + tokens->emplace_back(input_ptr + start, length); + } + + start = end + 1; + end = FindNextSpace(input_ptr, start, last_index); + } + auto length = end == kInvalid ? (last_index - start) : (end - start); + if (length > 0) { + tokens->emplace_back(input_ptr + start, length); + } +} + +std::vector> SplitBySpaceAsPairs( + const char* input_ptr, size_t len, size_t max_tokens) { + std::vector> tokens; + SplitBySpaceInternal(&tokens, input_ptr, len, kEntireString, max_tokens); + return tokens; +} + +std::vector SplitBySpace(const char* input_ptr, size_t len, + size_t max_input, size_t max_tokens) { + std::vector tokens; + SplitBySpaceInternal(&tokens, input_ptr, len, max_input, max_tokens); + return tokens; +} + +template +void SplitByCharInternal(std::vector* tokens, const char* input_ptr, + size_t len, size_t max_tokens) { + Rune rune; + for (size_t i = 0; i < len;) { + auto bytes_read = chartorune(&rune, const_cast(input_ptr + i)); + if (bytes_read == 0 || bytes_read > (len - i)) break; + tokens->emplace_back(input_ptr + i, bytes_read); + if (max_tokens != kInvalid && tokens->size() == max_tokens) { + break; + } + i += bytes_read; + } +} + +std::vector> SplitByCharAsPairs( + const char* input_ptr, size_t len, size_t max_tokens) { + std::vector> tokens; + SplitByCharInternal(&tokens, input_ptr, len, max_tokens); + return tokens; +} + +std::vector SplitByChar(const char* input_ptr, size_t len, + size_t max_tokens) { + std::vector tokens; + SplitByCharInternal(&tokens, input_ptr, len, max_tokens); + return tokens; +} + +std::string JoinPairsBySpace( + std::vector> words) { + std::stringstream ss; + bool first = true; + for (auto& str_pair : words) { + if (first) { + ss << std::string(str_pair.first, str_pair.second); + first = false; + } else { + ss << kSpace << std::string(str_pair.first, str_pair.second); + } + } + return ss.str(); +} + +std::vector> ProjectionUnicodeHandler::Tokenize( + const char* str, size_t len, bool by_space, int max_tokens) { + return by_space ? SplitBySpaceAsPairs(str, len, max_tokens) + : SplitByCharAsPairs(str, len, max_tokens); +} diff --git a/research/seq_flow_lite/tf_ops/projection_util.h b/research/seq_flow_lite/tf_ops/projection_util.h new file mode 100644 index 0000000000000000000000000000000000000000..5cfa0f2de14a140729b455e7dea4781742e3a8a4 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/projection_util.h @@ -0,0 +1,146 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_UTIL_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_UTIL_H_ +#include +#include +#include +#include + +#include "libutf/utf.h" + +constexpr int kFirstCapOffset = 3; +constexpr int kAllCapsOffset = 4; +constexpr int kWordNoveltyOffset = 1; +constexpr int kDocSizeOffset = 2; + +const char kMurmurHash[] = "murmur"; +const char kXfixHash8[] = "xfixhash8"; +const char kXfixHash16[] = "xfixhash16"; +const char kXfixHash32[] = "xfixhash32"; +const char kUnicodeHash8[] = "unicodehash8"; +const char kUnicodeHash16[] = "unicodehash16"; + +class HashEngine { + public: + virtual void GetHashCodes(const std::string& word, + std::vector* hash_codes, + int feature_size) = 0; + virtual ~HashEngine() {} +}; + +// A hashing wrapper class that can hash a string and generate a hash code with +// requested number of features (two bit values). Some of the implementations +// are copied from murmurhash. +class Hasher { + public: + static Hasher* CreateHasher(int feature_size, + const std::string& hash_type = kMurmurHash); + static bool SupportedHashType(const std::string& hash_type); + bool GetHashCodes(const std::string& word, + std::vector* hash_codes) { + if (!hash_engine_) return false; + if (word.empty()) { + *hash_codes = null_hash_codes_; + } else { + hash_codes->clear(); + hash_engine_->GetHashCodes(word, hash_codes, feature_size_); + } + return true; + } + + private: + explicit Hasher(int feature_size, HashEngine* hash_engine); + const std::string empty_string_ = ""; + const int feature_size_; + std::unique_ptr hash_engine_; + std::vector null_hash_codes_; +}; + +// Unicode processor for tensorflow and tflite string projection ops. +class ProjectionUnicodeHandler { + public: + // Takes an utf8 string which lists the unicodes that are supported and are + // part of the vocabulary of this instance. When the utf8 string is empty, + // all unicode segments are supported by this instance. The boolean + // flag exclude_nonalphaspace_unicodes is used to indicate if nonalpha and + // space unicode segments from the input should be stripped out. + // Another way to analyse the filtering logic is as below. + // Vocabulary acts as a allowlist when provided and all unicode set when + // empty. The flag exclude_nonalphaspace_unicodes when true acts as a + // allowlist on all alpha characters and space. It includes the entire unicode + // set when false. Valid unicode segments are the intersection of these 2 + // sets. + explicit ProjectionUnicodeHandler(const std::string& vocabulary, + bool exclude_nonalphaspace_unicodes = false) + : exclude_nonalphaspace_unicodes_(exclude_nonalphaspace_unicodes) { + InitializeVocabulary(vocabulary); + } + + // Performs language independent lower case and returns a string with + // supported unicode segments. + std::string LowerCaseUTF8WithSupportedUnicodes( + const std::pair& source, bool* first_cap = nullptr, + bool* all_caps = nullptr) const; + + // Returns a boolean flag indicating if the unicode segment is part of the + // vocabulary. + bool IsValidUnicode(Rune rune) const { + return valid_chars_.find(rune) != valid_chars_.end(); + } + + // Returns an index in [0, |vocabulary|), if the unicode is part of the + // vocabulary and -1 if it's not. + int UnicodeIndex(Rune rune) const { + return IsValidUnicode(rune) ? valid_chars_.at(rune) : -1; + } + + // Returns |vocabulary|. + size_t NumberOfValidUnicodes() const { return valid_chars_.size(); } + + // Returns true if the vocabulary is empty which means all unicode segments + // are supported. + bool IsUnrestrictedVocabulary() const { return valid_chars_.empty(); } + + // Tokenizes input by space or unicode point segmentation. Limit to + // max_tokens, when it is not -1. + static std::vector> Tokenize( + const std::string& input, bool by_space, int max_tokens) { + return Tokenize(input.c_str(), input.size(), by_space, max_tokens); + } + static std::vector> Tokenize(const char* str, + size_t len, + bool by_space, + int max_tokens); + + private: + // Parses and extracts supported unicode segments from a utf8 string. + void InitializeVocabulary(const std::string& vocabulary); + std::unordered_map valid_chars_; + bool exclude_nonalphaspace_unicodes_; +}; + +static constexpr size_t kEntireString = SIZE_MAX; +static constexpr size_t kAllTokens = SIZE_MAX; + +std::vector SplitBySpace(const char* input_ptr, size_t len, + size_t max_input, size_t max_tokens); + +std::vector SplitByChar(const char* input_ptr, size_t len, + size_t max_tokens); + +std::string JoinPairsBySpace(std::vector> words); + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_PROJECTION_UTIL_H_ diff --git a/research/seq_flow_lite/tf_ops/repo.bzl b/research/seq_flow_lite/tf_ops/repo.bzl new file mode 100644 index 0000000000000000000000000000000000000000..862f3fa5114ec7ab8d291c528c4e5ad839cc7ed8 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/repo.bzl @@ -0,0 +1,335 @@ +"""Reverb custom external dependencies.""" + +# Sanitize a dependency so that it works correctly from code that includes +# reverb as a submodule. +def clean_dep(dep): + return str(Label(dep)) + +def get_python_path(ctx): + path = ctx.os.environ.get("PYTHON_BIN_PATH") + if not path: + fail( + "Could not get environment variable PYTHON_BIN_PATH. " + + "Check your .bazelrc file.", + ) + return path + +def _find_tf_include_path(repo_ctx): + exec_result = repo_ctx.execute( + [ + get_python_path(repo_ctx), + "-c", + "import tensorflow as tf; import sys; " + + "sys.stdout.write(tf.sysconfig.get_include())", + ], + quiet = True, + ) + if exec_result.return_code != 0: + fail("Could not locate tensorflow installation path:\n{}" + .format(exec_result.stderr)) + return exec_result.stdout.splitlines()[-1] + +def _find_tf_lib_path(repo_ctx): + exec_result = repo_ctx.execute( + [ + get_python_path(repo_ctx), + "-c", + "import tensorflow as tf; import sys; " + + "sys.stdout.write(tf.sysconfig.get_lib())", + ], + quiet = True, + ) + if exec_result.return_code != 0: + fail("Could not locate tensorflow installation path:\n{}" + .format(exec_result.stderr)) + return exec_result.stdout.splitlines()[-1] + +def _find_numpy_include_path(repo_ctx): + exec_result = repo_ctx.execute( + [ + get_python_path(repo_ctx), + "-c", + "import numpy; import sys; " + + "sys.stdout.write(numpy.get_include())", + ], + quiet = True, + ) + if exec_result.return_code != 0: + fail("Could not locate numpy includes path:\n{}" + .format(exec_result.stderr)) + return exec_result.stdout.splitlines()[-1] + +def _find_python_include_path(repo_ctx): + exec_result = repo_ctx.execute( + [ + get_python_path(repo_ctx), + "-c", + "from distutils import sysconfig; import sys; " + + "sys.stdout.write(sysconfig.get_python_inc())", + ], + quiet = True, + ) + if exec_result.return_code != 0: + fail("Could not locate python includes path:\n{}" + .format(exec_result.stderr)) + return exec_result.stdout.splitlines()[-1] + +def _find_python_solib_path(repo_ctx): + exec_result = repo_ctx.execute( + [ + get_python_path(repo_ctx), + "-c", + "import sys; vi = sys.version_info; " + + "sys.stdout.write('python{}.{}'.format(vi.major, vi.minor))", + ], + ) + if exec_result.return_code != 0: + fail("Could not locate python shared library path:\n{}" + .format(exec_result.stderr)) + version = exec_result.stdout.splitlines()[-1] + basename = "lib{}.so".format(version) + exec_result = repo_ctx.execute( + ["{}-config".format(version), "--configdir"], + quiet = True, + ) + if exec_result.return_code != 0: + fail("Could not locate python shared library path:\n{}" + .format(exec_result.stderr)) + solib_dir = exec_result.stdout.splitlines()[-1] + full_path = repo_ctx.path("{}/{}".format(solib_dir, basename)) + if not full_path.exists: + fail("Unable to find python shared library file:\n{}/{}" + .format(solib_dir, basename)) + return struct(dir = solib_dir, basename = basename) + +def _eigen_archive_repo_impl(repo_ctx): + tf_include_path = _find_tf_include_path(repo_ctx) + repo_ctx.symlink(tf_include_path, "tf_includes") + repo_ctx.file( + "BUILD", + content = """ +cc_library( + name = "includes", + hdrs = glob(["tf_includes/Eigen/**/*.h", + "tf_includes/Eigen/**", + "tf_includes/unsupported/Eigen/**/*.h", + "tf_includes/unsupported/Eigen/**"]), + # https://groups.google.com/forum/#!topic/bazel-discuss/HyyuuqTxKok + includes = ["tf_includes"], + visibility = ["//visibility:public"], +) +""", + executable = False, + ) + +def _nsync_includes_repo_impl(repo_ctx): + tf_include_path = _find_tf_include_path(repo_ctx) + repo_ctx.symlink(tf_include_path + "/external", "nsync_includes") + repo_ctx.file( + "BUILD", + content = """ +cc_library( + name = "includes", + hdrs = glob(["nsync_includes/nsync/public/*.h"]), + includes = ["nsync_includes"], + visibility = ["//visibility:public"], +) +""", + executable = False, + ) + +def _zlib_includes_repo_impl(repo_ctx): + tf_include_path = _find_tf_include_path(repo_ctx) + repo_ctx.symlink( + tf_include_path + "/external/zlib", + "zlib", + ) + repo_ctx.file( + "BUILD", + content = """ +cc_library( + name = "includes", + hdrs = glob(["zlib/**/*.h"]), + includes = ["zlib"], + visibility = ["//visibility:public"], +) +""", + executable = False, + ) + +def _snappy_includes_repo_impl(repo_ctx): + tf_include_path = _find_tf_include_path(repo_ctx) + repo_ctx.symlink( + tf_include_path + "/external/snappy", + "snappy", + ) + repo_ctx.file( + "BUILD", + content = """ +cc_library( + name = "includes", + hdrs = glob(["snappy/*.h"]), + includes = ["snappy"], + visibility = ["//visibility:public"], +) +""", + executable = False, + ) + +def _protobuf_includes_repo_impl(repo_ctx): + tf_include_path = _find_tf_include_path(repo_ctx) + repo_ctx.symlink(tf_include_path, "tf_includes") + repo_ctx.symlink(Label("//third_party:protobuf.BUILD"), "BUILD") + +def _tensorflow_includes_repo_impl(repo_ctx): + tf_include_path = _find_tf_include_path(repo_ctx) + repo_ctx.symlink(tf_include_path, "tensorflow_includes") + repo_ctx.file( + "BUILD", + content = """ +cc_library( + name = "includes", + hdrs = glob( + [ + "tensorflow_includes/**/*.h", + "tensorflow_includes/third_party/eigen3/**", + ], + exclude = ["tensorflow_includes/absl/**/*.h"], + ), + includes = ["tensorflow_includes"], + deps = [ + "@eigen_archive//:eigen", + "@protobuf_archive//:includes", + "@zlib_includes//:includes", + "@snappy_includes//:includes", + ], + visibility = ["//visibility:public"], +) +filegroup( + name = "protos", + srcs = glob(["tensorflow_includes/**/*.proto"]), + visibility = ["//visibility:public"], +) +""", + executable = False, + ) + +def _tensorflow_solib_repo_impl(repo_ctx): + tf_lib_path = _find_tf_lib_path(repo_ctx) + repo_ctx.symlink(tf_lib_path, "tensorflow_solib") + repo_ctx.file( + "BUILD", + content = """ +cc_library( + name = "framework_lib", + srcs = ["tensorflow_solib/libtensorflow_framework.so.2"], + deps = ["@python_includes", "@python_includes//:numpy_includes"], + visibility = ["//visibility:public"], +) +""", + ) + +def _python_includes_repo_impl(repo_ctx): + python_include_path = _find_python_include_path(repo_ctx) + python_solib = _find_python_solib_path(repo_ctx) + repo_ctx.symlink(python_include_path, "python_includes") + numpy_include_path = _find_numpy_include_path(repo_ctx) + repo_ctx.symlink(numpy_include_path, "numpy_includes") + repo_ctx.symlink( + "{}/{}".format(python_solib.dir, python_solib.basename), + python_solib.basename, + ) + + # Note, "@python_includes" is a misnomer since we include the + # libpythonX.Y.so in the srcs, so we can get access to python's various + # symbols at link time. + repo_ctx.file( + "BUILD", + content = """ +cc_library( + name = "python_includes", + hdrs = glob(["python_includes/**/*.h"]), + srcs = ["{}"], + includes = ["python_includes"], + visibility = ["//visibility:public"], +) +cc_library( + name = "numpy_includes", + hdrs = glob(["numpy_includes/**/*.h"]), + includes = ["numpy_includes"], + visibility = ["//visibility:public"], +) +""".format(python_solib.basename), + executable = False, + ) + +def cc_tf_configure(): + """Autoconf pre-installed tensorflow repo.""" + make_nsync_repo = repository_rule( + implementation = _nsync_includes_repo_impl, + ) + make_nsync_repo(name = "nsync_includes") + make_zlib_repo = repository_rule( + implementation = _zlib_includes_repo_impl, + ) + make_zlib_repo(name = "zlib_includes") + make_snappy_repo = repository_rule( + implementation = _snappy_includes_repo_impl, + ) + make_snappy_repo(name = "snappy_includes") + make_protobuf_repo = repository_rule( + implementation = _protobuf_includes_repo_impl, + ) + make_protobuf_repo(name = "protobuf_archive") + make_tfinc_repo = repository_rule( + implementation = _tensorflow_includes_repo_impl, + ) + make_tfinc_repo(name = "tensorflow_includes") + make_tflib_repo = repository_rule( + implementation = _tensorflow_solib_repo_impl, + ) + make_tflib_repo(name = "tensorflow_solib") + make_python_inc_repo = repository_rule( + implementation = _python_includes_repo_impl, + ) + make_python_inc_repo(name = "python_includes") + +def _reverb_protoc_archive(ctx): + version = ctx.attr.version + sha256 = ctx.attr.sha256 + + override_version = ctx.os.environ.get("REVERB_PROTOC_VERSION") + if override_version: + sha256 = "" + version = override_version + + urls = [ + "https://github.com/protocolbuffers/protobuf/releases/download/v%s/protoc-%s-linux-x86_64.zip" % (version, version), + ] + ctx.download_and_extract( + url = urls, + sha256 = sha256, + ) + + ctx.file( + "BUILD", + content = """ +filegroup( + name = "protoc_bin", + srcs = ["bin/protoc"], + visibility = ["//visibility:public"], +) +""", + executable = False, + ) + +reverb_protoc_archive = repository_rule( + implementation = _reverb_protoc_archive, + attrs = { + "version": attr.string(mandatory = True), + "sha256": attr.string(mandatory = True), + }, +) + +def reverb_protoc_deps(version, sha256): + reverb_protoc_archive(name = "protobuf_protoc", version = version, sha256 = sha256) diff --git a/research/seq_flow_lite/tf_ops/sequence_string_projection.cc b/research/seq_flow_lite/tf_ops/sequence_string_projection.cc new file mode 100644 index 0000000000000000000000000000000000000000..dd99bbe6af2af9976b59d77c7442ad86ad8bd086 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/sequence_string_projection.cc @@ -0,0 +1,362 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tf_ops/projection_normalizer_util.h" // seq_flow_lite +#include "tf_ops/projection_tokenizer_util.h" // seq_flow_lite +#include "tf_ops/projection_util.h" // seq_flow_lite +#include "tf_ops/text_distorter.h" // seq_flow_lite +#include "absl/container/flat_hash_map.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" + +using ::tensorflow::int32; +using ::tensorflow::int64; +using ::tensorflow::OpKernel; +using ::tensorflow::OpKernelConstruction; +using ::tensorflow::OpKernelContext; +using ::tensorflow::Tensor; +using ::tensorflow::TensorShape; +using ::tensorflow::TensorShapeUtils; +using ::tensorflow::uint64; +using ::tensorflow::errors::InvalidArgument; + +using tensorflow::shape_inference::DimensionHandle; +using tensorflow::shape_inference::InferenceContext; + +constexpr char kBeginTokenTSP[] = ""; +constexpr char kEndTokenTSP[] = ""; + +float* AllocateTensor(OpKernelContext* ctx, const std::string& tensor_name, + const TensorShape& tensor_shape) { + Tensor* tensor = nullptr; + auto status = ctx->allocate_output(tensor_name, tensor_shape, &tensor); + if (!TF_PREDICT_TRUE(status.ok())) { + ctx->CtxFailureWithWarning(__FILE__, __LINE__, status); + return nullptr; + } + return &tensor->flat()(0); +} + +class SequenceStringProjectionOp : public OpKernel { + public: + explicit SequenceStringProjectionOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("feature_size", &feature_size_)); + std::string hashtype; + OP_REQUIRES_OK(context, context->GetAttr("hashtype", &hashtype)); + hasher_ = + absl::WrapUnique(Hasher::CreateHasher(feature_size_, hashtype)); + CHECK(hasher_); + float distortion_probability = 0.0; + OP_REQUIRES_OK(context, context->GetAttr("distortion_probability", + &distortion_probability)); + text_distorter_ = absl::make_unique(distortion_probability); + OP_REQUIRES_OK(context, + context->GetAttr("split_on_space", &split_on_space_)); + OP_REQUIRES_OK(context, context->GetAttr("max_splits", &max_splits_)); + OP_REQUIRES_OK(context, context->GetAttr("vocabulary", &vocabulary_)); + bool add_bos_tag; + OP_REQUIRES_OK(context, context->GetAttr("add_bos_tag", &add_bos_tag)); + bos_tag_ = add_bos_tag ? 1 : 0; + bool add_eos_tag; + OP_REQUIRES_OK(context, context->GetAttr("add_eos_tag", &add_eos_tag)); + eos_tag_ = add_eos_tag ? 1 : 0; + // When word_novelty_bits is set to a positive integer, the last feature + // generated by the op captures the token frequency. + OP_REQUIRES_OK(context, + context->GetAttr("word_novelty_bits", &word_novelty_bits_)); + CHECK_GE(word_novelty_bits_, 0); + CHECK_LE(word_novelty_bits_, 7); + if (word_novelty_bits_ != 0) { + CHECK_GE(feature_size_, 1); + } + // When doc_size_levels is set to a positive integer, the second to last + // feature generated by the op is derived from the log of the document + // size. + OP_REQUIRES_OK(context, + context->GetAttr("doc_size_levels", &doc_size_levels_)); + CHECK_GE(doc_size_levels_, 0); + CHECK_LE(doc_size_levels_, 16); + if (doc_size_levels_ != 0) { + CHECK_GE(feature_size_, 2); + } + word_novelty_offset_ = 1.0f / (1 << word_novelty_bits_); + bool exclude_nonalphaspace_unicodes; + OP_REQUIRES_OK(context, context->GetAttr("exclude_nonalphaspace_unicodes", + &exclude_nonalphaspace_unicodes)); + if (!vocabulary_.empty()) { + CHECK(!exclude_nonalphaspace_unicodes); + } + unicode_handler_ = absl::make_unique( + vocabulary_, exclude_nonalphaspace_unicodes); + vocabulary_size_ = unicode_handler_->NumberOfValidUnicodes(); + + bool normalize_repetition; + OP_REQUIRES_OK(context, context->GetAttr("normalize_repetition", + &normalize_repetition)); + std::string separators; + OP_REQUIRES_OK(context, context->GetAttr("token_separators", &separators)); + if (!separators.empty() || normalize_repetition) { + projection_normalizer_ = absl::make_unique( + separators, normalize_repetition); + } + + OP_REQUIRES_OK(context, context->GetAttr("add_first_cap_feature", + &add_first_cap_feature_)); + CHECK_GE(add_first_cap_feature_, 0.0); + CHECK_LE(add_first_cap_feature_, 1.0); + if (add_first_cap_feature_ > 0.0) { + CHECK_GE(feature_size_, 3); + } + + OP_REQUIRES_OK(context, context->GetAttr("add_all_caps_feature", + &add_all_caps_feature_)); + CHECK_GE(add_all_caps_feature_, 0.0); + CHECK_LE(add_all_caps_feature_, 1.0); + if (add_all_caps_feature_ > 0.0) { + CHECK_GE(feature_size_, 4); + } + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_tensor->shape()), + InvalidArgument("input must be a vector, got shape: ", + input_tensor->shape().DebugString())); + + auto input_vec = input_tensor->vec<::tensorflow::tstring>(); + const int64 batch_size = input_vec.dimension(0); + std::vector>> words_batches; + int64 max_seq_len = 0; + words_batches.reserve(batch_size); + std::vector normalized_input_vec(batch_size); + for (int64 i = 0; i < batch_size; ++i) { + std::vector> words; + if (projection_normalizer_ == nullptr) { + words = + unicode_handler_->Tokenize(input_vec(i).data(), input_vec(i).size(), + split_on_space_, max_splits_); + } else { + normalized_input_vec[i] = projection_normalizer_->Normalize( + input_vec(i).data(), input_vec(i).size(), SIZE_MAX); + words = unicode_handler_->Tokenize(normalized_input_vec[i], + split_on_space_, max_splits_); + } + const int64 seq_len = + static_cast(bos_tag_ + words.size() + eos_tag_); + CHECK_GT(seq_len, 0); + max_seq_len = std::max(max_seq_len, seq_len); + words_batches.emplace_back(std::move(words)); + } + + auto projection = + AllocateTensor(ctx, "projection", + TensorShape({batch_size, max_seq_len, feature_size_})); + AllocateTensor(ctx, "dummy_output", TensorShape({1})); + auto sequence_length = + AllocateTensor(ctx, "sequence_length", TensorShape({batch_size})); + if (!projection || !sequence_length) { + LOG(ERROR) << "Unable to create buffer!"; + return; + } + + const float mapping_table[4] = {0, 1, -1, 0}; + const int increment = 32; + std::vector hash_codes; + absl::flat_hash_map word_counter; + for (int64 i = 0; i < batch_size; ++i) { + word_counter.clear(); + const int64 num_tokens = words_batches[i].size(); + sequence_length[i] = bos_tag_ + num_tokens + eos_tag_; + int64 offset0 = i * max_seq_len * feature_size_; + // Calculate doc_size_feature in [0, infinity) + float doc_size_feature = + (doc_size_levels_ != 0) + ? std::log2(static_cast(num_tokens)) / doc_size_levels_ + : 0.0f; + // Rescale doc_size_feature to [-1, 1]. + doc_size_feature = std::min(doc_size_feature, 1.0f) * 2.0f - 1.0f; + for (int64 j = -bos_tag_; j < num_tokens + eos_tag_; ++j) { + std::string word; + bool first_cap = false; + bool all_caps = false; + if (j < 0) { + // Use a special tag for begin of sentence. + word = kBeginTokenTSP; + } else if (j < num_tokens) { + auto uword = icu::UnicodeString::fromUTF8( + unicode_handler_->LowerCaseUTF8WithSupportedUnicodes( + words_batches[i][j], &first_cap, &all_caps)); + word = text_distorter_->DistortText(&uword); + } else { + // Use a special tag for end of sentence. + CHECK_EQ(eos_tag_, 1); + word = kEndTokenTSP; + } + hasher_->GetHashCodes(word, &hash_codes); + for (int hindex = 0, k = 0; hindex < hash_codes.size(); hindex++) { + auto hash = hash_codes[hindex]; + for (int kmax = std::min(k + increment, feature_size_); k < kmax;) { + projection[offset0 + k++] = mapping_table[hash & 0x3]; + hash >>= 2; + } + } + if (word_novelty_bits_ != 0 && !hash_codes.empty()) { + const auto word_hash = hash_codes[0]; + projection[offset0 + feature_size_ - kWordNoveltyOffset] = + std::min((word_counter[word_hash]++ * word_novelty_offset_), + 1.0f) * + 2.0f - + 1.0f; + } + if (doc_size_levels_ != 0) { + projection[offset0 + feature_size_ - kDocSizeOffset] = + doc_size_feature; + } + if (add_first_cap_feature_ > 0.0f) { + if (text_distorter_->BernouilleSample(add_first_cap_feature_)) { + projection[offset0 + feature_size_ - kFirstCapOffset] = + first_cap ? 1.0 : -1.0; + } else { + projection[offset0 + feature_size_ - kFirstCapOffset] = 0.0; + } + } + if (add_all_caps_feature_ > 0.0f) { + if (text_distorter_->BernouilleSample(add_all_caps_feature_)) { + projection[offset0 + feature_size_ - kAllCapsOffset] = + all_caps ? 1.0 : -1.0; + } else { + projection[offset0 + feature_size_ - kAllCapsOffset] = 0.0; + } + } + offset0 += feature_size_; + } + const int pending = (max_seq_len - (bos_tag_ + num_tokens + eos_tag_)); + memset(projection + offset0, 0, pending * feature_size_ * sizeof(float)); + } + } + + private: + int32 feature_size_; + std::unique_ptr hasher_; + std::unique_ptr text_distorter_; + std::unique_ptr unicode_handler_; + std::unique_ptr projection_normalizer_; + std::string vocabulary_; + int vocabulary_size_; + int32 max_splits_; + bool split_on_space_; + int eos_tag_; + int bos_tag_; + int word_novelty_bits_; + int doc_size_levels_; + float word_novelty_offset_; + float add_first_cap_feature_; + float add_all_caps_feature_; +}; + +REGISTER_KERNEL_BUILDER( + Name("SequenceStringProjection").Device(::tensorflow::DEVICE_CPU), + SequenceStringProjectionOp); + +REGISTER_OP("SequenceStringProjection") + .Input("input: string") + .Output("projection: float32") + .Output("dummy_output: float32") + .Output("sequence_length: float32") + .Attr("feature_size: int") + .Attr("distortion_probability: float = 0.0") + .Attr("vocabulary: string = ''") + .Attr("hashtype: string = 'murmur'") + .Attr("max_splits: int = -1") + .Attr("exclude_nonalphaspace_unicodes: bool = False") + .Attr("add_bos_tag: bool = False") + .Attr("add_eos_tag: bool = True") + .Attr("add_first_cap_feature: float = 0.0") + .Attr("add_all_caps_feature: float = 0.0") + .Attr("word_novelty_bits: int = 0") + .Attr("doc_size_levels: int = 0") + .Attr("split_on_space: bool = True") + .Attr("token_separators: string = ''") + .Attr("normalize_repetition: bool = false") + .SetShapeFn([](InferenceContext* c) { + DimensionHandle size; + + int32 feature_size; + TF_RETURN_IF_ERROR(c->GetAttr("feature_size", &feature_size)); + const int kMaxFeatureSize = 4096; + CHECK_GE(feature_size, 0); + CHECK_LE(feature_size, kMaxFeatureSize); + auto batch_size = c->Dim(c->input(0), 0); + c->set_output(0, c->MakeShape({batch_size, InferenceContext::kUnknownDim, + feature_size})); + c->set_output(1, c->MakeShape({1})); + c->set_output(2, c->MakeShape({batch_size})); + return tensorflow::Status::OK(); + }) + .Doc(R"doc( +This op referred to as Ternary Sequence String Projection op (TSP), tokenizes +input text either on space or unicode boundary. Fingerprint for each token is +computed using murmur hash and bit features are extracted from the fingerprint +that maps every 2 bits to the ternary output {-1, 0, 1}. This effectively turns +a batch of text input into a ternary rank 3 tensor (in float format) of shape +[batch size, max token length, requested number of features]. + +Input(s): +- input: A string tensor with batch size number of elements. + +Attribute(s): +- feature_size: Length of the ternary vector generated for each token. +- distortion_probability: When non zero distort the input text with this + probability. Helps as a regularization method when training data set is + small. +- vocabulary: When not empty provides a list of unique unicode characters that + will be allowed in the input text before fingerprinting. Another way to + say it is that the vocabulary is an optional character allowlist for the + input text. It helps normalize the text. +- hashtype: Hashing method to use for projection. +- max_splits: Maximum number of tokens that are allowed. It helps restrict the + max token length of the projection output. When the value is -1 the op + does not restrict the number of tokens in the output. +- exclude_nonalphaspace_unicodes: When true excludes all unicodes that are + not alphabets or space character. This is multilingual. Though the effect + of this flag can be achieved using vocabulary, the vocabulary will have to + be very large for multilingual input. +- add_bos_tag: When true inserts a begin of sentence tag. +- add_eos_tag: When true inserts a end of sentence tag. +- word_novelty_bits: When true adds a special feature to the ternary output + that captures the frequency of occurrence of a particular token. This is an + experimental feature. +- doc_size_levels: When true adds a special feature to the ternary projection + output the document size in log scale. This is an experimental feature. +- split_on_space: When true tokenization is done on space segmentation. + Otherwise tokenization is done by segmenting on unicode boundary. +- add_first_cap_feature: Specifies the probability with which a feature to the + resulting projection tensor that helps discriminate if the input token is + Camel case will be added. +- add_all_caps_feature: Specifies the probability with which a feature to the + resulting projection tensor that helps discriminate if the input token is + ALLCAPS will be added. + +Output(s): +- projection: Floating point tensor with ternary values of shape + [batch size, max token length, requested number of features]. +- dummy_output: Ignore this output, will be eliminated in a subsequent version. +- sequence_length: Batch size length vector containing the number of tokens for + each input text entry. +)doc"); diff --git a/research/seq_flow_lite/tf_ops/sequence_string_projection_op_v2.cc b/research/seq_flow_lite/tf_ops/sequence_string_projection_op_v2.cc new file mode 100644 index 0000000000000000000000000000000000000000..ac3ec3f4f0ed4f205ae45c92eda8b699180714b8 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/sequence_string_projection_op_v2.cc @@ -0,0 +1,233 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tf_ops/projection_normalizer_util.h" // seq_flow_lite +#include "tf_ops/projection_util.h" // seq_flow_lite +#include "tf_ops/text_distorter.h" // seq_flow_lite + +using ::tensorflow::int32; +using ::tensorflow::int64; +using ::tensorflow::OpKernel; +using ::tensorflow::OpKernelConstruction; +using ::tensorflow::OpKernelContext; +using ::tensorflow::Tensor; +using ::tensorflow::TensorShapeUtils; +using ::tensorflow::uint64; +using ::tensorflow::errors::InvalidArgument; + +using ::tensorflow::shape_inference::DimensionHandle; +using ::tensorflow::shape_inference::InferenceContext; +using ::tensorflow::shape_inference::ShapeHandle; + +constexpr char kBeginTokenTSP[] = ""; +constexpr char kEndTokenTSP[] = ""; +constexpr float kMappingTable[4] = {0, 1, -1, 0}; +constexpr int kIncrement = 32; + +template +class SequenceStringProjectionOpV2 : public OpKernel { + public: + explicit SequenceStringProjectionOpV2(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("feature_size", &feature_size_)); + std::string hashtype; + OP_REQUIRES_OK(context, context->GetAttr("hashtype", &hashtype)); + hasher_ = + absl::WrapUnique(Hasher::CreateHasher(feature_size_, hashtype)); + + float distortion_probability = 0.0; + OP_REQUIRES_OK(context, context->GetAttr("distortion_probability", + &distortion_probability)); + text_distorter_ = absl::make_unique(distortion_probability); + + OP_REQUIRES_OK(context, context->GetAttr("vocabulary", &vocabulary_)); + unicode_handler_ = absl::make_unique(vocabulary_); + + bool add_bos_tag; + OP_REQUIRES_OK(context, context->GetAttr("add_bos_tag", &add_bos_tag)); + bos_tag_ = add_bos_tag ? 1 : 0; + + bool add_eos_tag; + OP_REQUIRES_OK(context, context->GetAttr("add_eos_tag", &add_eos_tag)); + eos_tag_ = add_eos_tag ? 1 : 0; + + bool normalize_repetition; + OP_REQUIRES_OK(context, context->GetAttr("normalize_repetition", + &normalize_repetition)); + if (normalize_repetition) { + projection_normalizer_ = absl::make_unique( + std::string(), normalize_repetition); + } + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(input_tensor->shape()), + InvalidArgument("`input` must be a matrix, got shape: ", + input_tensor->shape().DebugString())); + auto input_matrix = input_tensor->matrix<::tensorflow::tstring>(); + const int64 batch_size = input_matrix.dimension(0); + const int64 max_seq_len = input_matrix.dimension(1); + + const Tensor* seq_len; + OP_REQUIRES_OK(ctx, ctx->input("sequence_length", &seq_len)); + OP_REQUIRES( + ctx, TensorShapeUtils::IsVector(seq_len->shape()), + InvalidArgument("`sequence_length` must be a vector, got shape: ", + seq_len->shape().DebugString())); + auto seq_len_flat = seq_len->flat(); + + OP_REQUIRES( + ctx, seq_len_flat.size() == batch_size, + InvalidArgument("`sequence_length` should have batch size number " + "of elements, got size ", + seq_len_flat.size(), ", batch size is ", batch_size)); + + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK( + ctx, ctx->allocate_output( + "projection", + {batch_size, bos_tag_ + max_seq_len + eos_tag_, feature_size_}, + &output_tensor)); + float* projection = &output_tensor->flat()(0); + + std::vector hash_codes; + for (int64 i = 0; i < batch_size; ++i) { + const int64 num_tokens = seq_len_flat(i); + OP_REQUIRES(ctx, num_tokens >= 0, + InvalidArgument("`sequence_length` should have values " + "greater than or equal to 0")); + OP_REQUIRES(ctx, num_tokens <= max_seq_len, + InvalidArgument("`sequence_length` should have values less " + "than or equal to max_seq_len")); + + int64 offset0 = i * (bos_tag_ + max_seq_len + eos_tag_) * feature_size_; + for (int64 j = -bos_tag_; j < num_tokens + eos_tag_; ++j) { + std::string word; + if (j < 0) { + word = kBeginTokenTSP; + } else if (j < num_tokens) { + auto token = std::pair(input_matrix(i, j).data(), + input_matrix(i, j).size()); + auto uword = icu::UnicodeString::fromUTF8( + unicode_handler_->LowerCaseUTF8WithSupportedUnicodes(token)); + word = text_distorter_->DistortText(&uword); + if (projection_normalizer_) { + word = projection_normalizer_->Normalize(word.data(), word.size(), + SIZE_MAX); + } + } else { + word = kEndTokenTSP; + } + hasher_->GetHashCodes(word, &hash_codes); + for (int hindex = 0, k = 0; hindex < hash_codes.size(); hindex++) { + auto hash = hash_codes[hindex]; + for (int kmax = std::min(k + kIncrement, feature_size_); k < kmax;) { + projection[offset0 + k++] = kMappingTable[hash & 0x3]; + hash >>= 2; + } + } + offset0 += feature_size_; + } + const int fill_length = (max_seq_len - num_tokens) * feature_size_; + float* fill_start = projection + offset0; + std::fill(fill_start, fill_start + fill_length, 0.0f); + } + } + + private: + int32 feature_size_; + std::unique_ptr hasher_; + std::unique_ptr text_distorter_; + std::unique_ptr unicode_handler_; + std::unique_ptr projection_normalizer_; + std::string vocabulary_; + int eos_tag_; + int bos_tag_; +}; + +REGISTER_KERNEL_BUILDER(Name("SequenceStringProjectionV2") + .Device(::tensorflow::DEVICE_CPU) + .TypeConstraint("Tsequence_length"), + SequenceStringProjectionOpV2); +REGISTER_KERNEL_BUILDER(Name("SequenceStringProjectionV2") + .Device(::tensorflow::DEVICE_CPU) + .TypeConstraint("Tsequence_length"), + SequenceStringProjectionOpV2); + +REGISTER_OP("SequenceStringProjectionV2") + .Input("input: string") + .Input("sequence_length: Tsequence_length") + .Output("projection: float32") + .Attr("feature_size: int") + .Attr("distortion_probability: float = 0.0") + .Attr("vocabulary: string = ''") + .Attr("hashtype: string = 'murmur'") + .Attr("add_bos_tag: bool = False") + .Attr("add_eos_tag: bool = False") + .Attr("normalize_repetition: bool = False") + .Attr("Tsequence_length: {int32, int64}") + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { + DimensionHandle size; + + int32 feature_size; + TF_RETURN_IF_ERROR(c->GetAttr("feature_size", &feature_size)); + const int kMaxFeatureSize = 4096; + CHECK_GT(feature_size, 0); + CHECK_LE(feature_size, kMaxFeatureSize); + ShapeHandle output_shape; + TF_RETURN_IF_ERROR(c->Concatenate( + c->input(0), c->MakeShape({feature_size}), &output_shape)); + c->set_output(0, output_shape); + return tensorflow::Status::OK(); + }) + .Doc(R"doc( +This op referred to as Ternary Sequence String Projection Op V2 (TSPV2), +works with presegmented string `input`. It fingerprints each token using murmur +hash and extracts bit features from the fingerprint that maps every 2 bits to +the ternary output {-1, 0, 1}. This effectively turns a batch of text segments +into a ternary rank 3 tensor (in float format) of shape +[batch size, max sequence length, requested number of features]. + +Input(s): +- input: A string tensor with [batch size, max sequence length] tokens. +- sequence_length: A vector with batch size number of integers, where each + integer is in (0, max sequence length], and represents the number of valid + text segments in each batch entry. + +Attribute(s): +- feature_size: Length of the ternary vector generated for each token. +- distortion_probability: When non zero distort the input tokens with this + probability. Helps as a regularization method when training data set is + small. +- vocabulary: When not empty provides a list of unique unicode characters that + will be allowed in the input text before fingerprinting. Expressed another + way the vocabulary is an optional character allowlist for the + input tokens. It helps normalize the text. +- hashtype: Hashing method to use for projection. +- add_bos_tag: When true inserts a begin of sentence tag. +- add_eos_tag: When true inserts a end of sentence tag. +- normalize_repetition: When true normalizes repetition in text tokens before + fingerprinting. + +Output(s): +- projection: Floating point tensor with ternary values of shape + [batch size, max sequence length, requested number of features]. +)doc"); diff --git a/research/seq_flow_lite/tf_ops/sequence_string_projection_op_v2_test.cc b/research/seq_flow_lite/tf_ops/sequence_string_projection_op_v2_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..4319009be1012108e1f9294a46252bf8b59732aa --- /dev/null +++ b/research/seq_flow_lite/tf_ops/sequence_string_projection_op_v2_test.cc @@ -0,0 +1,278 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" + +namespace { + +using ::tensorflow::DT_INT32; +using ::tensorflow::DT_STRING; +using ::tensorflow::int32; +using ::tensorflow::NodeDefBuilder; +using ::tensorflow::OpsTestBase; +using ::tensorflow::Tensor; +using ::tensorflow::TensorShape; + +class SequenceStringProjectionOpV2Test : public OpsTestBase { + protected: + bool FeatureMatches(const Tensor& output, int i1, int j1, int i2, int j2) { + bool all_matches = true; + auto output_tensor = output.tensor(); + for (int k = 0; k < output.dim_size(2); ++k) { + all_matches &= (output_tensor(i1, j1, k) == output_tensor(i2, j2, k)); + } + return all_matches; + } + bool FeatureIsZero(const Tensor& output, int i, int j) { + auto output_tensor = output.tensor(); + bool all_zeros = true; + for (int k = 0; k < output.dim_size(2); ++k) { + all_zeros &= (output_tensor(i, j, k) == 0.0f); + } + return all_zeros; + } +}; + +TEST_F(SequenceStringProjectionOpV2Test, TestOutput) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjectionV2") + .Input({"input", 1, DT_STRING}) + .Input({"sequence_length", 1, DT_INT32}) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2, 8, 1}), + {"hello", "world", "147", "dog", "xyz", "abc", "efg", "hij", "quick", + "hel1lo", "123", "jumped", "over", "the", "lazy", "dog"}); + + AddInputFromArray(TensorShape({3, 1}), {9, 0, 9}); + + EXPECT_EQ(RunOpKernel().error_message(), + "`input` must be a matrix, got shape: [2,8,1]"); + + auto old = *mutable_input(0).tensor; + *mutable_input(0).tensor = Tensor(DT_STRING, TensorShape({2, 8})); + (*mutable_input(0).tensor).flat<::tensorflow::tstring>() = + old.flat<::tensorflow::tstring>(); + + EXPECT_EQ(RunOpKernel().error_message(), + "`sequence_length` must be a vector, got shape: [3,1]"); + + *mutable_input(1).tensor = Tensor(DT_INT32, TensorShape({3})); + + EXPECT_EQ(RunOpKernel().error_message(), + "`sequence_length` should have batch size number of elements, got " + "size 3, batch size is 2"); + + *mutable_input(1).tensor = Tensor(DT_INT32, TensorShape({2})); + (*mutable_input(1).tensor).flat()(0) = 9; + (*mutable_input(1).tensor).flat()(1) = -1; + + EXPECT_EQ( + RunOpKernel().error_message(), + "`sequence_length` should have values less than or equal to max_seq_len"); + + (*mutable_input(1).tensor).flat()(0) = 4; + + EXPECT_EQ(RunOpKernel().error_message(), + "`sequence_length` should have values greater than or equal to 0"); + + (*mutable_input(1).tensor).flat()(1) = 8; + + TF_EXPECT_OK(RunOpKernel()); + + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 8); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 1)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 2, 1, 2)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 7)); // dog == dog. + // Check zero padding for first sentence. + for (int i = 4; i < 8; ++i) { + EXPECT_TRUE(FeatureIsZero(output, 0, i)); + } +} + +TEST_F(SequenceStringProjectionOpV2Test, TestOutputBoS) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjectionV2") + .Input({"input", 1, DT_STRING}) + .Input({"sequence_length", 1, DT_INT32}) + .Attr("add_bos_tag", true) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2, 8}), + {"hello", "world", "147", "dog", "", "", "", "", "quick", "hel1lo", "123", + "jumped", "over", "the", "lazy", "dog"}); + + AddInputFromArray(TensorShape({2}), {4, 8}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 9); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 0)); // == . + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 2, 1, 2)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 2)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 3)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 8)); // dog == dog. + // Check zero padding for first sentence. + for (int i = 5; i < 9; ++i) { + EXPECT_TRUE(FeatureIsZero(output, 0, i)); + } +} + +TEST_F(SequenceStringProjectionOpV2Test, TestOutputEoS) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjectionV2") + .Input({"input", 1, DT_STRING}) + .Input({"sequence_length", 1, DT_INT32}) + .Attr("add_eos_tag", true) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2, 8}), + {"hello", "world", "147", "dog", "", "", "", "", "quick", "hel1lo", "123", + "jumped", "over", "the", "lazy", "dog"}); + + AddInputFromArray(TensorShape({2}), {4, 8}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 9); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 1)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 2, 1, 2)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 7)); // dog == dog. + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 8)); // == . + // Check zero padding for first sentence. + for (int i = 5; i < 9; ++i) { + EXPECT_TRUE(FeatureIsZero(output, 0, i)); + } +} + +TEST_F(SequenceStringProjectionOpV2Test, TestOutputBoSEoS) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjectionV2") + .Input({"input", 1, DT_STRING}) + .Input({"sequence_length", 1, DT_INT32}) + .Attr("add_bos_tag", true) + .Attr("add_eos_tag", true) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz.") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2, 8}), + {"hello", "world", "147", "dog", "...", "..", "", "", "quick", "hel1lo", + "123", "jumped", "over", "the", "lazy", "dog"}); + + AddInputFromArray(TensorShape({2}), {6, 8}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 10); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 0)); // == . + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 2, 1, 2)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 2)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 3)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 8)); // dog == dog. + EXPECT_TRUE(FeatureMatches(output, 0, 7, 1, 9)); // == . + // Check for default normalize_repetition=false + EXPECT_FALSE(FeatureMatches(output, 0, 4, 0, 5)); // ... != .. + // Check zero padding for first sentence. + for (int i = 8; i < 10; ++i) { + EXPECT_TRUE(FeatureIsZero(output, 0, i)); + } +} + +TEST_F(SequenceStringProjectionOpV2Test, TestOutputNormalize) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjectionV2") + .Input({"input", 1, DT_STRING}) + .Input({"sequence_length", 1, DT_INT32}) + .Attr("normalize_repetition", true) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz.") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2, 8}), + {"hello", "world", "..", "....", "", "", "", "", "quick", "hel1lo", "123", + "jumped", "over", "...", ".....", "dog"}); + + AddInputFromArray(TensorShape({2}), {4, 8}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 8); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 2, 0, 3)); // .. == .... + EXPECT_TRUE(FeatureMatches(output, 1, 5, 1, 6)); // ... == .. + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 6)); // .... == ... + // Check zero padding for first sentence. + for (int i = 4; i < 8; ++i) { + EXPECT_TRUE(FeatureIsZero(output, 0, i)); + } +} + +} // namespace + +int main(int argc, char** argv) { + // On Linux, add: absl::SetFlag(&FLAGS_logtostderr, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/research/seq_flow_lite/tf_ops/sequence_string_projection_test.cc b/research/seq_flow_lite/tf_ops/sequence_string_projection_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d33640fae789ef971c701005e5c20b9a5b3063a9 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/sequence_string_projection_test.cc @@ -0,0 +1,594 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/shape_inference_testutil.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +using ::tensorflow::DT_STRING; +using ::tensorflow::NodeDefBuilder; +using ::tensorflow::OpsTestBase; +using ::tensorflow::Tensor; +using ::tensorflow::TensorShape; + +class SequenceStringProjectionOpTest : public OpsTestBase { + protected: + const float* FeatureIndex(const Tensor& output, int i, int j) { + return &output.flat()((i * output.dim_size(2) * output.dim_size(1)) + + (j * output.dim_size(2))); + } + bool FeatureMatches(const Tensor& output, int i1, int j1, int i2, int j2) { + const float* feature1 = FeatureIndex(output, i1, j1); + const float* feature2 = FeatureIndex(output, i2, j2); + bool all_matches = true; + for (int i = 0; i < output.dim_size(2); ++i) { + all_matches &= (feature1[i] == feature2[i]); + } + return all_matches; + } + void FeatureIsZero(const Tensor& output, int i, int j) { + const float* feature = FeatureIndex(output, i, j); + bool all_zeros = true; + for (int i = 0; i < output.dim_size(2); ++i) { + all_zeros &= (feature[i] == 0.0f); + } + EXPECT_TRUE(all_zeros); + } +}; + +TEST_F(SequenceStringProjectionOpTest, TestOutputEoS) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("distortion_probability", 0.0f) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), + {"hello world 147 dog", "quick hel1lo 123 jumped over the lazy dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 9); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 1)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 2, 1, 2)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 7)); // dog == dog. + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 8)); // == . + // Check zero padding for features after "" for first sentence. + for (int i = 5; i < 9; ++i) { + FeatureIsZero(output, 0, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 5); + EXPECT_EQ(sequence_length.flat()(1), 9); +} + +TEST_F(SequenceStringProjectionOpTest, TestOutputBoSEoS) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("add_bos_tag", true) + .Attr("distortion_probability", 0.0f) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), + {"hello world 147 dog", "quick hel1lo 123 jumped over the lazy dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 10); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 0)); // bos == bos. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 2, 1, 2)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 2)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 3)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 8)); // dog == dog. + EXPECT_TRUE(FeatureMatches(output, 0, 5, 1, 9)); // == . + // Check zero padding for features after "" for first sentence. + for (int i = 6; i < 10; ++i) { + FeatureIsZero(output, 0, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 6); + EXPECT_EQ(sequence_length.flat()(1), 10); +} + +TEST_F(SequenceStringProjectionOpTest, TestOutputBoS) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("add_bos_tag", true) + .Attr("add_eos_tag", false) + .Attr("distortion_probability", 0.0f) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), + {"hello world 147 dog", "quick hel1lo 123 jumped over the lazy dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 9); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 0)); // bos == bos. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 2, 1, 2)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 2)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 3)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 8)); // dog == dog. + // Check zero padding for features after "" for first sentence. + for (int i = 6; i < 9; ++i) { + FeatureIsZero(output, 0, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 5); + EXPECT_EQ(sequence_length.flat()(1), 9); +} + +TEST_F(SequenceStringProjectionOpTest, TestOutput) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("add_bos_tag", false) + .Attr("add_eos_tag", false) + .Attr("distortion_probability", 0.0f) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), + {"hello world 147 dog", "quick hel1lo 123 jumped over the lazy dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 8); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 1)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 2, 1, 2)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 7)); // dog == dog. + // Check zero padding for features after "" for first sentence. + for (int i = 5; i < 8; ++i) { + FeatureIsZero(output, 0, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 4); + EXPECT_EQ(sequence_length.flat()(1), 8); +} + +TEST_F(SequenceStringProjectionOpTest, DocSize) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("distortion_probability", 0.0f) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("doc_size_levels", 4) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({4}), {"dog", "dog dog", "dog dog dog dog", "dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 4); // Batch size + EXPECT_EQ(output.dim_size(1), 5); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 0, 3, 0)); // dog(0) == dog(3). + EXPECT_TRUE(FeatureMatches(output, 0, 1, 3, 1)); // EOS == EOS. + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // dog(0) != dog(1). + EXPECT_FALSE(FeatureMatches(output, 0, 0, 2, 0)); // dog(0) != dog(2). + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 4); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 2); + EXPECT_EQ(sequence_length.flat()(1), 3); + EXPECT_EQ(sequence_length.flat()(2), 5); + EXPECT_EQ(sequence_length.flat()(3), 2); +} + +TEST_F(SequenceStringProjectionOpTest, WordNovelty) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("distortion_probability", 0.0f) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("word_novelty_bits", 3) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), {"dog", "dog dog dog dog dog dog dog dog dog dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 11); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 0)); // dog(0) == dog(0). + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 10)); // EOS == EOS. + for (int i = 0; i < 8; ++i) { + for (int j = i + 1; j < 8; ++j) { + EXPECT_FALSE(FeatureMatches(output, 1, i, 1, j)); // dog(i) != dog(j). + } + } + // Check novel word feature saturates after 9 steps + EXPECT_TRUE(FeatureMatches(output, 1, 8, 1, 9)); // dog(8) == dog(9). + // Check zero padding for features after "" for first sentence. + for (int i = 2; i < 11; ++i) { + FeatureIsZero(output, 0, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 2); + EXPECT_EQ(sequence_length.flat()(1), 11); +} + +TEST_F(SequenceStringProjectionOpTest, TestMaxSplits) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("max_splits", 3) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), + {"hello world 147 dog", "quick hel1lo 123 jumped over the lazy dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 4); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // world != hello. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 1)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 2, 1, 2)); // 147 == 123 (oov values). + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 3)); // == . + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 4); + EXPECT_EQ(sequence_length.flat()(1), 4); +} + +TEST_F(SequenceStringProjectionOpTest, TestNoEOS) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz") + .Attr("add_eos_tag", false) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), {"hello world 147 dog", "quick hel1lo 123"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 4); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // world != hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 1)); // hello == hel1lo. + EXPECT_TRUE(FeatureMatches(output, 0, 2, 1, 2)); // 147 == 123 (oov values). + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 4); + EXPECT_EQ(sequence_length.flat()(1), 3); +} + +TEST_F(SequenceStringProjectionOpTest, TestNonSpaceMaxSplit) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz⺁") + .Attr("split_on_space", false) + .Attr("max_splits", 4) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>(TensorShape({2}), + {"hel world", "⺁leh1ho"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 5); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 1)); // h != l. + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 2)); // e == e. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 3)); // h == h. + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 4)); // == . + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 5); + EXPECT_EQ(sequence_length.flat()(1), 5); +} + +TEST_F(SequenceStringProjectionOpTest, TestNonSpace) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("vocabulary", "abcdefghijklmnopqrstuvwxyz⺁") + .Attr("split_on_space", false) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>(TensorShape({2}), + {"hello world", "leh1ho⺁"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 12); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // h != l. + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 1)); // e == e. + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 2)); // h == h. + EXPECT_TRUE(FeatureMatches(output, 0, 5, 1, 3)); // oov == oov. + EXPECT_TRUE(FeatureMatches(output, 0, 11, 1, 7)); // == . + // Check zero padding for features after "" for first sentence. + for (int i = 8; i < 12; ++i) { + FeatureIsZero(output, 1, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 12); + EXPECT_EQ(sequence_length.flat()(1), 8); +} + +TEST_F(SequenceStringProjectionOpTest, TestNonSpaceExcludeNonAlpha) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("vocabulary", "") + .Attr("split_on_space", false) + .Attr("exclude_nonalphaspace_unicodes", true) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>(TensorShape({2}), + {"hello world4", "leh1ho⺁"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 13); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches( + output, 0, 11, 1, + 3)); // 1 == 4 (nonalpha and space mapped to same feature). + EXPECT_TRUE(FeatureMatches(output, 0, 12, 1, 7)); // == . + // Check zero padding for features after "" for first sentence. + for (int i = 8; i < 13; ++i) { + FeatureIsZero(output, 1, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 13); + EXPECT_EQ(sequence_length.flat()(1), 8); +} + +TEST_F(SequenceStringProjectionOpTest, TestEmptyVocabulary) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("feature_size", 16) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>( + TensorShape({2}), + {"hello world 147 dog", "quick hel1lo 123 jumped over the lazy dog"}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 9); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 0)); // hello != quick. + EXPECT_FALSE(FeatureMatches(output, 0, 1, 1, 1)); // world != hello. + EXPECT_FALSE(FeatureMatches(output, 0, 0, 1, 1)); // hello != hel1lo. + EXPECT_FALSE(FeatureMatches(output, 0, 2, 1, 2)); // 147 != 123 (no oov). + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 7)); // dog == dog. + EXPECT_TRUE(FeatureMatches(output, 0, 4, 1, 8)); // == . + // Check zero padding for features after "" for first sentence. + for (int i = 5; i < 9; ++i) { + FeatureIsZero(output, 0, i); + } + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 5); + EXPECT_EQ(sequence_length.flat()(1), 9); +} + +TEST_F(SequenceStringProjectionOpTest, Normalization) { + TF_ASSERT_OK(NodeDefBuilder("test_op", "SequenceStringProjection") + .Input({"input", 1, DT_STRING}) + .Attr("feature_size", 16) + .Attr("normalize_repetition", true) + .Attr("token_separators", " ") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + AddInputFromArray<::tensorflow::tstring>(TensorShape({2}), + {"hello 147 .....", "hello 147 .."}); + + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output = *GetOutput(0); + + // First checks dimensions. + ASSERT_EQ(output.dims(), 3); + EXPECT_EQ(output.dim_size(0), 2); // Batch size + EXPECT_EQ(output.dim_size(1), 4); // Max sequence length + EXPECT_EQ(output.dim_size(2), 16); // Feature size + + EXPECT_TRUE(FeatureMatches(output, 0, 0, 1, 0)); // hello = hello + EXPECT_TRUE(FeatureMatches(output, 0, 1, 1, 1)); // 147 = 147 + EXPECT_TRUE(FeatureMatches(output, 0, 2, 1, 2)); // ..... = .. + EXPECT_TRUE(FeatureMatches(output, 0, 3, 1, 3)); // == + + const Tensor& bag_of_chars = *GetOutput(1); + ASSERT_EQ(bag_of_chars.dims(), 1); + EXPECT_EQ(bag_of_chars.dim_size(0), 1); // Dummy output + + const Tensor& sequence_length = *GetOutput(2); + EXPECT_EQ(sequence_length.dim_size(0), 2); // Batch size + EXPECT_EQ(sequence_length.flat()(0), 4); + EXPECT_EQ(sequence_length.flat()(1), 4); +} + +int main(int argc, char** argv) { + // On Linux, add: absl::SetFlag(&FLAGS_logtostderr, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/research/seq_flow_lite/tf_ops/text_distorter.cc b/research/seq_flow_lite/tf_ops/text_distorter.cc new file mode 100644 index 0000000000000000000000000000000000000000..3b62ba47db7e173c017796fdf65aa47b633c4dd2 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/text_distorter.cc @@ -0,0 +1,49 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tf_ops/text_distorter.h" // seq_flow_lite + +using tensorflow::uint32; + +// Distorts the words in the text by inserting, deleting and swapping +// unicodes randomly with probability one third of distortion_probability. +std::string TextDistorter::DistortText(icu::UnicodeString* uword) { + if (distortion_probability_ > 0.0 && + generator_.RandFloat() < distortion_probability_ && uword->length()) { + // Distort text with non zero length with distortion_probability_. + float distortion_type = generator_.RandFloat(); + uint32 rindex = generator_.Rand32() % uword->length(); + if (distortion_type < 0.33f) { + // Remove character with one third probability. + random_char_ = (*uword)[rindex]; + uword->remove(rindex, 1); + } else if (distortion_type < 0.66f) { + // Swap character with one third probability if there are more than 2 + // characters. + if (uword->length() > 2) { + random_char_ = (*uword)[rindex]; + uword->remove(rindex, 1); + uword->insert(generator_.Rand32() % uword->length(), random_char_); + } + } else if (random_char_) { + // Insert character with one third probability. + uword->insert(rindex, random_char_); + } + } + // Convert unicode sequence back to string. + std::string word; + icu::StringByteSink sink(&word); + uword->toUTF8(sink); + return word; +} diff --git a/research/seq_flow_lite/tf_ops/text_distorter.h b/research/seq_flow_lite/tf_ops/text_distorter.h new file mode 100644 index 0000000000000000000000000000000000000000..562bcd18404c9b8cdcefd45cf7b577707466de70 --- /dev/null +++ b/research/seq_flow_lite/tf_ops/text_distorter.h @@ -0,0 +1,44 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_TEXT_DISTORTER_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_TEXT_DISTORTER_H_ + +#include + +#include "icu4c/source/common/unicode/unistr.h" +#include "tensorflow/core/lib/random/simple_philox.h" + +// A class that can be used to distort text randomly. +class TextDistorter { + public: + // Add a random seed for PhiloxRandom constructor + explicit TextDistorter(float distortion_probability) + : philox_(171), + generator_(&philox_), + distortion_probability_(distortion_probability) { + assert(distortion_probability_ >= 0.0); + assert(distortion_probability_ <= 1.0); + } + std::string DistortText(icu::UnicodeString* uword); + bool BernouilleSample(float p) { return (generator_.RandFloat() <= p); } + + private: + tensorflow::random::PhiloxRandom philox_; + tensorflow::random::SimplePhilox generator_; + float distortion_probability_; + UChar32 random_char_ = 0; +}; + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TF_OPS_TEXT_DISTORTER_H_ diff --git a/research/seq_flow_lite/tf_ops/tf_custom_ops.cc b/research/seq_flow_lite/tf_ops/tf_custom_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..6574649c9734faf270fcd87926554aa3ccecb09d --- /dev/null +++ b/research/seq_flow_lite/tf_ops/tf_custom_ops.cc @@ -0,0 +1,125 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" + +using ::tensorflow::int32; + +class PoolingOp : public tensorflow::OpKernel { + public: + explicit PoolingOp(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) {} + + void Compute(tensorflow::OpKernelContext* ctx) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("PoolingOp").Device(::tensorflow::DEVICE_CPU), + PoolingOp); + +REGISTER_OP("PoolingOp") + .Input("multiplier: float32") + .Input("constant: float32") + .Input("forward: float32") + .Output("state: float32") + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + return tensorflow::Status::OK(); + }) + .Doc(R"doc( +Dummy pooling op. +)doc"); + +class ExpectedValueOp : public tensorflow::OpKernel { + public: + explicit ExpectedValueOp(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) {} + + void Compute(tensorflow::OpKernelContext* ctx) override {} +}; + +REGISTER_KERNEL_BUILDER( + Name("ExpectedValueOp").Device(::tensorflow::DEVICE_CPU), ExpectedValueOp); + +REGISTER_OP("ExpectedValueOp") + .Input("attention_logits: float32") + .Input("values: float32") + .Output("evalue: float32") + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { + auto batch_size = c->Dim(c->input(0), 0); + auto feature_size = c->Dim(c->input(0), 2); + c->set_output(0, c->MakeShape({batch_size, feature_size})); + return tensorflow::Status::OK(); + }) + .Doc(R"doc( +Dummy pooling op. +)doc"); + +class LayerNormOp : public tensorflow::OpKernel { + public: + explicit LayerNormOp(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) {} + + void Compute(tensorflow::OpKernelContext* ctx) override {} +}; + +REGISTER_KERNEL_BUILDER(Name("LayerNorm").Device(::tensorflow::DEVICE_CPU), + LayerNormOp); + +REGISTER_OP("LayerNorm") + .Input("tensor: float32") + .Input("scale: float32") + .Input("offset: float32") + .Input("axes: int32") + .Output("result: float32") + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + return tensorflow::Status::OK(); + }) + .Doc(R"doc( +Dummy layer norm op. +)doc"); + +class UniformCausalAttnOp : public tensorflow::OpKernel { + public: + explicit UniformCausalAttnOp(tensorflow::OpKernelConstruction* context) + : tensorflow::OpKernel(context) {} + + void Compute(tensorflow::OpKernelContext* ctx) override {} +}; + +REGISTER_KERNEL_BUILDER( + Name("UniformCausalAttn").Device(::tensorflow::DEVICE_CPU), + UniformCausalAttnOp); + +REGISTER_OP("UniformCausalAttn") + .Input("input: float32") + .Input("time_step: int32") + .Input("selected_beams: int32") + .Attr("feature_size: int") + .Attr("beam_size: int") + .Output("output: float32") + .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { + auto batch_size = c->Dim(c->input(0), 0); + int32 feature_size; + TF_RETURN_IF_ERROR(c->GetAttr("feature_size", &feature_size)); + c->set_output(0, c->MakeShape({batch_size, 1, feature_size})); + return tensorflow::Status::OK(); + }) + .Doc(R"doc( +Dummy uniform causal attn op. +)doc"); diff --git a/research/seq_flow_lite/tflite_ops/BUILD b/research/seq_flow_lite/tflite_ops/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..f4d61046bb4c26454757be2cabd6df4af552db9e --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/BUILD @@ -0,0 +1,102 @@ +# TFLite ops for sequence string projection. +load("@org_tensorflow//tensorflow/lite:build_def.bzl", "tflite_copts") + +licenses(["notice"]) + +package( + default_visibility = ["//:friends"], # sequence projection +) + +cc_library( + name = "sequence_string_projection", + srcs = ["sequence_string_projection.cc"], + hdrs = ["sequence_string_projection.h"], + copts = tflite_copts(), + deps = [ + ":quantization_util", + "@org_tensorflow//tensorflow/lite:string_util", + "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", + "//tf_ops:projection_normalizer_util", # sequence projection + "//tf_ops:projection_util", # sequence projection + "@flatbuffers", + ], + alwayslink = 1, +) + +cc_test( + name = "sequence_string_projection_test", + size = "small", + srcs = ["sequence_string_projection_test.cc"], + deps = [ + ":sequence_string_projection", + ":tf_tflite_diff_test_util", + "@flatbuffers", + "@org_tensorflow//tensorflow/lite:framework", + "@org_tensorflow//tensorflow/lite:string_util", + "@org_tensorflow//tensorflow/lite/core/api", + "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", + "@org_tensorflow//tensorflow/lite/kernels:test_util", + "//tf_ops:projection_util", # sequence projection + # "//tf_ops:sequence_string_projection_op" # sequence projection + # "//tf_ops:sequence_string_projection_op_v2" # sequence projection + ], +) + +cc_library( + name = "tf_tflite_diff_test_util", + testonly = 1, + srcs = ["tf_tflite_diff_test_util.cc"], + hdrs = ["tf_tflite_diff_test_util.h"], + deps = [ + "@org_tensorflow//tensorflow/core:framework", + "@org_tensorflow//tensorflow/core:lib", + "@org_tensorflow//tensorflow/core:test", + "@org_tensorflow//tensorflow/core/kernels:ops_testutil", + "@org_tensorflow//tensorflow/lite/kernels:test_util", + "@com_google_absl//absl/container:flat_hash_map", + "@flatbuffers", + ], +) + +cc_library( + name = "quantization_util", + hdrs = ["quantization_util.h"], + deps = ["@org_tensorflow//tensorflow/lite:context"], +) + +cc_library( + name = "expected_value", + srcs = ["expected_value.cc"], + hdrs = ["expected_value.h"], + copts = tflite_copts(), + deps = [ + ":quantization_util", + "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", + ], + alwayslink = 1, +) + +cc_library( + name = "layer_norm", + srcs = ["layer_norm.cc"], + hdrs = ["layer_norm.h"], + copts = tflite_copts(), + deps = [ + ":quantization_util", + "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", + "@org_tensorflow//tensorflow/lite/kernels:kernel_util", + ], + alwayslink = 1, +) + +cc_test( + name = "layer_norm_test", + size = "small", + srcs = ["layer_norm_test.cc"], + deps = [ + ":layer_norm", + "@org_tensorflow//tensorflow/lite/kernels:test_util", + "@com_google_googletest//:gtest_main", + "@flatbuffers", + ], +) diff --git a/research/seq_flow_lite/tflite_ops/expected_value.cc b/research/seq_flow_lite/tflite_ops/expected_value.cc new file mode 100644 index 0000000000000000000000000000000000000000..10f05d863ff39f7b7d4a81332e5f13444cd7dbd8 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/expected_value.cc @@ -0,0 +1,159 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tflite_ops/expected_value.h" // seq_flow_lite + +#include + +#include "tflite_ops/quantization_util.h" // seq_flow_lite + +namespace tflite { +namespace ops { +namespace custom { + +namespace { + +constexpr int kInputAttentionLogits = 0; +constexpr int kInputValues = 1; +constexpr int kOutputExpectedValue = 0; + +class ExpectedValueParams { + public: + // Get precomputed exponential table for the quantization range of the tensor. + // The table is precomputed during first lookup and used till the tflite + // interpreter is destroyed. + float* GetPrecomputedTable(const TfLiteTensor& tensor) { + if (!initialized_) { + initialized_ = true; + const float scale = tensor.params.scale; + for (int i = 0; + i < sizeof(precomputed_table_) / sizeof(precomputed_table_[0]); + ++i) { + precomputed_table_[i] = expf(-i * scale); + } + } + return precomputed_table_; + } + + private: + bool initialized_ = false; + float precomputed_table_[256]; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + return new ExpectedValueParams(); +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, node->inputs->size, 2); + TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); + + TfLiteTensor* attention_logits = + &context->tensors[node->inputs->data[kInputAttentionLogits]]; + TfLiteTensor* values = &context->tensors[node->inputs->data[kInputValues]]; + // Currently only 8-bit input tensors are supported. + TF_LITE_ENSURE_EQ(context, attention_logits->type, kTfLiteUInt8); + TF_LITE_ENSURE_EQ(context, values->type, kTfLiteUInt8); + // Both the input tensors are expected to be rank 3. + TF_LITE_ENSURE_EQ(context, attention_logits->dims->size, 3); + TF_LITE_ENSURE_EQ(context, attention_logits->dims->size, values->dims->size); + // Currently batch size is expected to be 1. + TF_LITE_ENSURE_EQ(context, attention_logits->dims->data[0], 1); + // Dimensions of both the input tensors should match. + for (int i = 0; i < values->dims->size; ++i) { + TF_LITE_ENSURE_EQ(context, attention_logits->dims->data[i], + values->dims->data[i]); + } + + TfLiteTensor* output = + &context->tensors[node->outputs->data[kOutputExpectedValue]]; + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteUInt8); + TfLiteIntArray* output_size = TfLiteIntArrayCreate(2); + // Expectation is over dimension 2 leaving a rank 2 output tensor with first + // and last dimension as the input. + output_size->data[0] = values->dims->data[0]; + output_size->data[1] = values->dims->data[2]; + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto logits_t = &context->tensors[node->inputs->data[kInputAttentionLogits]]; + auto values_t = &context->tensors[node->inputs->data[kInputValues]]; + auto output_t = &context->tensors[node->outputs->data[kOutputExpectedValue]]; + const int out_channels = logits_t->dims->data[2]; + const int sequence_length = logits_t->dims->data[1]; + + const float out_inverse_scale = 1.0f / output_t->params.scale; + const int32_t out_zero_point = output_t->params.zero_point; + uint8_t* output = output_t->data.uint8; + auto* params = reinterpret_cast(node->user_data); + const float* table = params->GetPrecomputedTable(*logits_t); + // Memory layout of the input tensor is row-major, hence the inner loops have + // a pitch of out_channels instead of 1. The inner loop runs over this array + // two times for logits and once for values. If the out_channels increases + // beyond a reasonable value, the entire content of logits/values won't fit in + // L1 cache, which would make these loops very inefficient. If the last + // dimension increases, this handler should be rewritten to do transpose first + // in a cache efficient manner before performing the compute. + for (int i = 0; i < out_channels; ++i) { + // Find max logit, max logit is subtracted to ensure numerical stability + // when computing softmax. + auto slogits = &logits_t->data.uint8[i]; + auto elogits = slogits + (sequence_length * out_channels); + int32_t maxval = 0; + for (auto logits = slogits; logits < elogits; logits += out_channels) { + maxval = std::max(static_cast(*logits), maxval); + } + // Find normalizer to compute softmax (sum of exponential over logits). + // Compute the softmax output (attention), perform the elementwise + // multiplication and reduce by summing in a single loop. This results in + // the unnormalized expected value, which is normalized later. + float normalizer = 0.0f; + float unnormalized_expected_value = 0.0f; + auto values = &values_t->data.uint8[i]; + for (auto logits = slogits; logits < elogits; + logits += out_channels, values += out_channels) { + const float unnormalized_attention = table[maxval - *logits]; + normalizer += unnormalized_attention; + unnormalized_expected_value += + unnormalized_attention * PodDequantizeValue(*values_t, *values); + } + const float expected_value = unnormalized_expected_value / normalizer; + // Quantize and set the expected value in the output buffer. + output[i] = PodQuantize(expected_value, out_zero_point, out_inverse_scale); + } + return kTfLiteOk; +} + +} // namespace + +// This tflite fused op takes two input tensors (logits and values), which are +// expected to be rank 3 tensors of the form [batch size, sequence, channels]. +// The op performs softmax on the sequence dimension of logits input, performs +// an element-wise multiplication with the values tensor, reduces the sequence +// dimension to a scalar value using sum operation and returns a tensor of the +// form [batch size, channels]. Batch size is assumed to be 1 in the current +// implementation. +TfLiteRegistration* Register_EXPECTED_VALUE() { + static TfLiteRegistration r = {Init, Free, Resize, Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/research/seq_flow_lite/tflite_ops/expected_value.h b/research/seq_flow_lite/tflite_ops/expected_value.h new file mode 100644 index 0000000000000000000000000000000000000000..8ee7e94efd672a29a982cd5919dc581bc16e4059 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/expected_value.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_EXPECTED_VALUE_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_EXPECTED_VALUE_H_ + +#include "tensorflow/lite/kernels/register.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_EXPECTED_VALUE(); + +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_EXPECTED_VALUE_H_ diff --git a/research/seq_flow_lite/tflite_ops/layer_norm.cc b/research/seq_flow_lite/tflite_ops/layer_norm.cc new file mode 100644 index 0000000000000000000000000000000000000000..77f5abf9a13d2acdc178bc236d2961d79cd196a3 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/layer_norm.cc @@ -0,0 +1,331 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tflite_ops/layer_norm.h" // seq_flow_lite + +#include +#include + +#include "tflite_ops/quantization_util.h" // seq_flow_lite +#include "tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace custom { + +namespace { + +const int kInputIndex = 0; +const int kScaleIndex = 1; +const int kOffsetIndex = 2; +const int kAxisIndex = 3; +const int kOutputIndex = 0; + +TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) { + if (node->outputs->size != 1) { + return kTfLiteError; + } + + TfLiteTensor* input = &context->tensors[node->inputs->data[kInputIndex]]; + TfLiteTensor* scale = &context->tensors[node->inputs->data[kScaleIndex]]; + TfLiteTensor* offset = &context->tensors[node->inputs->data[kOffsetIndex]]; + TF_LITE_ENSURE_EQ(context, input->type, kTfLiteUInt8); + TF_LITE_ENSURE_EQ(context, offset->dims->data[0], 1); + TF_LITE_ENSURE_EQ(context, offset->dims->size, 1); + TF_LITE_ENSURE_EQ(context, offset->type, kTfLiteUInt8); + TF_LITE_ENSURE_EQ(context, scale->dims->data[0], 1); + TF_LITE_ENSURE_EQ(context, scale->dims->size, 1); + TF_LITE_ENSURE_EQ(context, scale->type, kTfLiteUInt8); + if (node->inputs->size == 4) { + TfLiteTensor* axis = &context->tensors[node->inputs->data[kAxisIndex]]; + TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32); + } + + TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputIndex]]; + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteUInt8); + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + +int GetNumberOfSteps(const TfLiteTensor* input) { + int number_of_steps = 1; + for (int i = 0; i < input->dims->size; ++i) { + number_of_steps *= input->dims->data[i]; + } + return number_of_steps; +} + +inline int GetNumberOfFeatures(const TfLiteTensor* input, const int* axis, + const int num_axis) { + int num_features = 1; + for (int i = 0; i < num_axis; ++i) { + num_features *= input->dims->data[axis[i]]; + } + return num_features; +} + +// Performs sanity checks on input axis and resolves into valid dimensions. +inline bool ResolveAxis(const int num_dims, const int* axis, const int num_axis, + int* out_axis, int* out_num_axis) { + *out_num_axis = 0; + // Short-circuit axis resolution for scalars; the axis will go unused. + if (num_dims == 0) { + return true; + } + + // Using an unordered set to reduce complexity in looking up duplicates. + std::unordered_set unique_indices; + for (int64_t idx = 0; idx < num_axis; ++idx) { + // Handle negative index. + int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx]; + assert(current >= 0 && current < num_dims); + // Only adding the axis if it wasn't added before. + if (unique_indices.find(current) == unique_indices.end()) { + unique_indices.insert(current); + out_axis[*out_num_axis] = current; + *out_num_axis += 1; + } + } + return true; +} + +// Given current position in the input array, the api computes the next valid +// index. +bool ValidIndex(const int* input_dims, const int input_dims_size, + int* curr_pos) { + if (input_dims_size == 0) { + return false; + } + assert(input_dims != nullptr); + assert(curr_pos != nullptr); + for (int idx = input_dims_size - 1; idx >= 0; --idx) { + int current_val = curr_pos[idx] + 1; + assert(input_dims[idx] >= current_val); + if (input_dims[idx] == current_val) { + curr_pos[idx] = 0; + } else { + curr_pos[idx] = current_val; + return true; + } + } + return false; +} + +// Gets next offset depending on reduction axis. Implementation borrowed from +// tflite reduce mean implementation. +int GetOffset(const int* input_dims, const int input_dims_size, + const int* curr_pos, const int* axis, const int axis_size) { + if (input_dims_size == 0) return 0; + assert(input_dims != nullptr); + assert(curr_pos != nullptr); + int offset = 0; + for (int idx = 0; idx < input_dims_size; ++idx) { + // if idx is part of reduction axes, we skip offset calculation. + bool is_axis = false; + if (axis != nullptr) { + for (int redux = 0; redux < axis_size; ++redux) { + if (idx == axis[redux]) { + is_axis = true; + break; + } + } + } + if (!is_axis) offset = offset * input_dims[idx] + curr_pos[idx]; + } + + return offset; +} + +// TODO(b/132896827): Current implementation needs further evaluation to reduce +// space time complexities. +TfLiteStatus FlexibleLayerNorm(const TfLiteTensor* input, const float scale, + const float offset, const int* axis, + const int num_axis, TfLiteTensor* output) { + int num_features = GetNumberOfFeatures(input, &axis[0], num_axis); + int time_steps = static_cast(GetNumberOfSteps(input) / num_features); + + std::vector sum_x(time_steps, 0.0f); + std::vector sum_xx(time_steps, 0.0f); + std::vector index_iter(input->dims->size, 0); + + // Computing sum and squared sum for features across the reduction axes. + do { + // Not passing reduction axes to get the input offset as we are simply + // iterating through the multidimensional array. + int input_offset = GetOffset(input->dims->data, input->dims->size, + &index_iter[0], nullptr, 0); + // Passing in the valid reduction axes as we would like to get the output + // offset after reduction. + int stats_offset = GetOffset(input->dims->data, input->dims->size, + &index_iter[0], &axis[0], num_axis); + float input_val = PodDequantize(*input, input_offset); + sum_x[stats_offset] += input_val; + sum_xx[stats_offset] += input_val * input_val; + } while (ValidIndex(input->dims->data, input->dims->size, &index_iter[0])); + + std::vector multiplier(time_steps, 1.0f); + std::vector bias(time_steps, 0.0f); + + // Computing stats for the reduction axes. + for (int i = 0; i < time_steps; ++i) { + sum_x[i] = sum_x[i] / num_features; + sum_xx[i] = sum_xx[i] / num_features; + const float variance = sum_xx[i] - sum_x[i] * sum_x[i]; + const float inverse_stddev = 1 / sqrt(variance + 1e-6); + multiplier[i] = inverse_stddev * scale; + bias[i] = offset - sum_x[i] * inverse_stddev * scale; + } + + const float out_inverse_scale = 1.0f / output->params.scale; + const int32_t out_zero_point = output->params.zero_point; + uint8_t* out_ptr = output->data.uint8; + std::fill(index_iter.begin(), index_iter.end(), 0); + + // Using the stats to fill the output pointer. + do { + // Not passing reduction axes to get the input offset as we are simply + // iterating through the multidimensional array. + int input_offset = GetOffset(input->dims->data, input->dims->size, + &index_iter[0], nullptr, 0); + // Passing in the valid reduction axes as we would like to get the output + // offset after reduction. + int stats_offset = GetOffset(input->dims->data, input->dims->size, + &index_iter[0], &axis[0], num_axis); + float input_val = PodDequantize(*input, input_offset); + + const float value = + input_val * multiplier[stats_offset] + bias[stats_offset]; + out_ptr[input_offset] = + PodQuantize(value, out_zero_point, out_inverse_scale); + } while (ValidIndex(input->dims->data, input->dims->size, &index_iter[0])); + + return kTfLiteOk; +} + +TfLiteStatus DefaultLayerNormFloat(const TfLiteTensor* input, const float scale, + const float offset, TfLiteTensor* output) { + const int input_rank = input->dims->size; + const int num_features = input->dims->data[input_rank - 1]; + const int time_steps = + static_cast(GetNumberOfSteps(input) / num_features); + float* out_ptr = output->data.f; + for (int i = 0; i < time_steps; ++i) { + float sum_x = 0; + float sum_xx = 0; + for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) { + sum_x += input->data.f[index]; + sum_xx += input->data.f[index] * input->data.f[index]; + } + const float exp_xx = sum_xx / num_features; + const float exp_x = sum_x / num_features; + const float variance = exp_xx - exp_x * exp_x; + const float inverse_stddev = 1 / sqrt(variance + 1e-6); + const float multiplier = inverse_stddev * scale; + + const float bias = offset - exp_x * inverse_stddev * scale; + for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) { + out_ptr[index] = input->data.f[index] * multiplier + bias; + } + } + return kTfLiteOk; +} + +TfLiteStatus DefaultLayerNorm(const TfLiteTensor* input, const float scale, + const float offset, TfLiteTensor* output) { + const int input_rank = input->dims->size; + const int num_features = input->dims->data[input_rank - 1]; + const int time_steps = + static_cast(GetNumberOfSteps(input) / num_features); + + std::vector temp_buffer(num_features, 0.0f); + const float out_inverse_scale = 1.0f / output->params.scale; + const int32_t out_zero_point = output->params.zero_point; + uint8_t* out_ptr = output->data.uint8; + for (int i = 0; i < time_steps; ++i) { + float sum_x = 0; + float sum_xx = 0; + for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) { + temp_buffer[j] = PodDequantize(*input, index); + sum_x += temp_buffer[j]; + sum_xx += temp_buffer[j] * temp_buffer[j]; + } + const float exp_xx = sum_xx / num_features; + const float exp_x = sum_x / num_features; + const float variance = exp_xx - exp_x * exp_x; + const float inverse_stddev = 1 / sqrt(variance + 1e-6); + const float multiplier = inverse_stddev * scale; + const float bias = offset - exp_x * inverse_stddev * scale; + for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) { + const float value = temp_buffer[j] * multiplier + bias; + out_ptr[index] = PodQuantize(value, out_zero_point, out_inverse_scale); + } + } + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = + &context->tensors[node->inputs->data[kInputIndex]]; + TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputIndex]]; + TfLiteTensor scale_tensor = context->tensors[node->inputs->data[kScaleIndex]]; + TfLiteTensor offset_tensor = + context->tensors[node->inputs->data[kOffsetIndex]]; + float scale = 1.0; + float offset = 0.0; + if (input->type == kTfLiteUInt8) { + scale = PodDequantize(scale_tensor, 0); + offset = PodDequantize(offset_tensor, 0); + } else { + scale = scale_tensor.data.f[0]; + offset = offset_tensor.data.f[0]; + } + + TfLiteTensor* axis = &context->tensors[node->inputs->data[kAxisIndex]]; + int num_axis = static_cast(tflite::NumElements(axis)); + // For backward compatibility reasons, we handle the default layer norm for + // last channel as below. + if (num_axis == 1 && (axis->data.i32[0] == -1 || + axis->data.i32[0] == (input->dims->size - 1))) { + if (input->type == kTfLiteUInt8) { + return DefaultLayerNorm(input, scale, offset, output); + } else if (input->type == kTfLiteFloat32) { + return DefaultLayerNormFloat(input, scale, offset, output); + } else { + TF_LITE_ENSURE_MSG(context, false, + "Input should be eith Uint8 or Float32."); + } + } + + std::vector resolved_axis(num_axis); + // Resolve axis. + int num_resolved_axis = 0; + if (!ResolveAxis(input->dims->size, axis->data.i32, num_axis, + &resolved_axis[0], &num_resolved_axis)) { + return kTfLiteError; + } + + return FlexibleLayerNorm(input, scale, offset, &resolved_axis[0], + num_resolved_axis, output); +} + +} // namespace + +TfLiteRegistration* Register_LAYER_NORM() { + static TfLiteRegistration r = {nullptr, nullptr, Resize, Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/research/seq_flow_lite/tflite_ops/layer_norm.h b/research/seq_flow_lite/tflite_ops/layer_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..6e87e1995d52fd84ad83d52cfc0a6d70562a7ed3 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/layer_norm.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef LEARNING_EXPANDER_POD_DEEP_POD_TFLITE_HANDLERS_LAYER_NORM_H_ +#define LEARNING_EXPANDER_POD_DEEP_POD_TFLITE_HANDLERS_LAYER_NORM_H_ + +#include "tensorflow/lite/kernels/register.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_LAYER_NORM(); + +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif // LEARNING_EXPANDER_POD_DEEP_POD_TFLITE_HANDLERS_LAYER_NORM_H_ diff --git a/research/seq_flow_lite/tflite_ops/layer_norm_test.cc b/research/seq_flow_lite/tflite_ops/layer_norm_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..b8a935c3a6d2191279d77353652f7985dca5f524 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/layer_norm_test.cc @@ -0,0 +1,214 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tflite_ops/layer_norm.h" // seq_flow_lite + +#include + +#include +#include "flatbuffers/flexbuffers.h" // flatbuffer +#include "tensorflow/lite/kernels/test_util.h" + +namespace tflite { +namespace ops { +namespace custom { +namespace { + +class LayerNormModel : public SingleOpModel { + public: + explicit LayerNormModel(const TensorData& input, float output_min, + float output_max, float scale, float offset, + std::initializer_list axis_shape, + std::initializer_list axis) + : scale_value_(scale), offset_value_(offset) { + input_ = AddInput(input); + scale_ = AddInput( + {TensorType_UINT8, {1}, std::min(scale, 0.0f), std::max(scale, 0.0f)}); + offset_ = AddInput({TensorType_UINT8, + {1}, + std::min(offset, 0.0f), + std::max(offset, 0.0f)}); + axis_ = AddConstInput(TensorType_INT32, axis, axis_shape); + output_ = AddOutput({TensorType_UINT8, {}, output_min, output_max}); + flexbuffers::Builder fbb; + fbb.Map([&] { + { + size_t start = fbb.StartVector("axes"); + for (const int& aval : axis) { + fbb.Int(aval); + } + fbb.EndVector(start, /*typed=*/true, /*fixed=*/false); + } + }); + fbb.Finish(); + + SetCustomOp("LayerNorm", fbb.GetBuffer(), Register_LAYER_NORM); + BuildInterpreter({GetShape(input_)}); + } + + void SetInput(const std::vector& data) { + QuantizeAndPopulate(input_, data); + QuantizeAndPopulate(scale_, {scale_value_}); + QuantizeAndPopulate(offset_, {offset_value_}); + } + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } + + private: + int input_; + int scale_; + int offset_; + int axis_; + float scale_value_; + float offset_value_; + int output_; +}; + +TEST(LayerNormModelTest, RegularInput) { + const float kQuantizedTolerance = 20 * (1. / 256); + const std::vector input = { + 0, -6, 2, 4, 3, -2, 10, 1, + }; + // Mean values are 0.0, 3.0 + // Standard deviation values are 3.74, 4.41 + const std::vector expected_output = {0.0, -1.6, 0.53, 1.07, + 0.0, -1.13, 1.59, -0.45}; + + LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, 1.0, 0.0, + {1}, {2}); + m.SetInput(input); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance))); +} + +TEST(LayerNormModelTest, NegativeScale) { + const float kQuantizedTolerance = 20 * (1. / 256); + const std::vector input = { + 0, -6, 2, 4, 3, -2, 10, 1, + }; + // Mean values are 0.0, 3.0 + // Standard deviation values are 3.74, 4.41 + const std::vector expected_output = {0.0, 1.6, -0.53, -1.07, + 0.0, 1.13, -1.59, 0.45}; + LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, -1.0, 0.0, + {1}, {2}); + m.SetInput(input); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance))); +} + +TEST(LayerNormModelTest, NegativeOffset) { + const float kQuantizedTolerance = 20 * (1. / 256); + const std::vector input = { + 0, -6, 2, 4, 3, -2, 10, 1, + }; + // Mean values are 0.0, 3.0 + // Standard deviation values are 3.74, 4.41 + const std::vector expected_output = {-1.0, -2.6, -0.53, 0.07, + -1.0, -2.13, 0.59, -1.45}; + LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, 1.0, -1.0, + {1}, {2}); + m.SetInput(input); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance))); +} + +TEST(LayerNormModelTest, NegativeScaleAndOffset) { + const float kQuantizedTolerance = 20 * (1. / 256); + const std::vector input = { + 0, -6, 2, 4, 3, -2, 10, 1, + }; + // Mean values are 0.0, 3.0 + // Standard deviation values are 3.74, 4.41 + const std::vector expected_output = {-1.0, 0.6, -1.53, -2.07, + -1.0, 0.13, -2.59, -0.55}; + LayerNormModel m({TensorType_UINT8, {1, 2, 4}, -10, 10}, -10, 10, -1.0, -1.0, + {1}, {2}); + m.SetInput(input); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance))); +} + +TEST(LayerNormModelTest, MultipleAxis) { + const float kQuantizedTolerance = 6 * (1. / 256); + const std::vector input = {0, 1, 2, 3, 2, 2, 3, 3, 2, -3, 1, 0, + -2, -3, -2, 0, -1, 0, -3, -2, -1, 0, 1, 2}; + const std::vector expected_output = { + 0.06, 0.57, 1.08, 1.59, 0.69, 0.69, 1.15, 1.15, + 1.12, -2.08, 0.48, -0.16, -0.95, -1.46, -0.95, 0.06, + -0.69, -0.23, -1.60, -1.15, -0.80, -0.16, 0.48, 1.12}; + + LayerNormModel m({TensorType_UINT8, {1, 2, 3, 4}, -3, 3}, -3, 3, 1.0, 0.0, + {2}, {1, 3}); + m.SetInput(input); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance))); +} + +TEST(LayerNormModelTest, MultipleNegativeAxis) { + const float kQuantizedTolerance = 6 * (1. / 256); + const std::vector input = {0, 1, 2, 3, 2, 2, 3, 3, 2, -3, 1, 0, + -2, -3, -2, 0, -1, 0, -3, -2, -1, 0, 1, 2}; + const std::vector expected_output = { + 0.06, 0.57, 1.08, 1.59, 0.69, 0.69, 1.15, 1.15, + 1.12, -2.08, 0.48, -0.16, -0.95, -1.46, -0.95, 0.06, + -0.69, -0.23, -1.60, -1.15, -0.80, -0.16, 0.48, 1.12}; + + LayerNormModel m({TensorType_UINT8, {1, 2, 3, 4}, -3, 3}, -3, 3, 1.0, 0.0, + {2}, {-3, -1}); + m.SetInput(input); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance))); +} + +TEST(LayerNormModelTest, MultipleAxisWithLargeDepth) { + const float kQuantizedTolerance = 7 * (1. / 256); + const std::vector input = { + 0.1, 0.2, 0.3, 0.4, 0.2, 0.3, 0.4, 0.5, 0.1, 0.1, 0.1, 0.1, + 0.4, 0.2, 0.2, 0.2, 0.9, 0.9, 0.9, 0.9, 0.2, 0.3, 0.7, 0.7, + 0.1, 0.1, 0.3, 0.3, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4}; + const std::vector expected_output = { + -1.06, -0.67, -0.28, 0.11, -0.67, -0.28, 0.11, 0.50, -1.06, + -0.85, -0.85, -0.85, 0.42, -0.42, -0.42, -0.42, 2.55, 2.55, + 2.05, 2.05, -0.67, -0.28, 1.27, 1.27, -1.06, -1.06, -0.28, + 0., -0.85, -0.42, 0., 0.42, -0.85, -0.42, 0., 0.42}; + + LayerNormModel m({TensorType_UINT8, {1, 2, 2, 9}, -1.0, 1.0}, -3.0, 3.0, 1.0, + 0.0, {2}, {1, 3}); + m.SetInput(input); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance))); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/research/seq_flow_lite/tflite_ops/quantization_util.h b/research/seq_flow_lite/tflite_ops/quantization_util.h new file mode 100644 index 0000000000000000000000000000000000000000..a9f98c2eb578e413462500976af2d57b380bf36e --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/quantization_util.h @@ -0,0 +1,53 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_QUANTIZATION_UTIL_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_QUANTIZATION_UTIL_H_ + +#include +#include + +#include "tensorflow/lite/context.h" + +namespace tflite { + +// Returns the original (dequantized) value of 8bit value. +inline float PodDequantizeValue(const TfLiteTensor& tensor, uint8_t value) { + const int32_t zero_point = tensor.params.zero_point; + const float scale = tensor.params.scale; + return (static_cast(value) - zero_point) * scale; +} + +// Returns the original (dequantized) value of the 'index'-th element of +// 'tensor. +inline float PodDequantize(const TfLiteTensor& tensor, int index) { + return PodDequantizeValue(tensor, tensor.data.uint8[index]); +} + +// Quantizes 'value' to 8bit, given the quantization bias (zero_point) and +// factor (inverse_scale). +inline uint8_t PodQuantize(float value, int32_t zero_point, + float inverse_scale) { + const float integer_value_in_float = value * inverse_scale; + const float offset = (integer_value_in_float >= 0.0) ? 0.5f : -0.5f; + // NOTE(sfeuz): This assumes value * inverse_scale is within [INT_MIN, + // INT_MAX]. + int32_t integer_value = + static_cast(integer_value_in_float + offset) + zero_point; + return static_cast(std::max(std::min(255, integer_value), 0)); +} + +} // namespace tflite + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_QUANTIZATION_UTIL_H_ diff --git a/research/seq_flow_lite/tflite_ops/sequence_string_projection.cc b/research/seq_flow_lite/tflite_ops/sequence_string_projection.cc new file mode 100644 index 0000000000000000000000000000000000000000..a5e178e326a4117d6b1075abd192b9edef734d46 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/sequence_string_projection.cc @@ -0,0 +1,508 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +/** + * Sequence String projection op used in PRADO. + */ +#include "tflite_ops/sequence_string_projection.h" // seq_flow_lite + +#include +#include +#include +#include +#include +#include +#include + +#include "flatbuffers/flexbuffers.h" // flatbuffer +#include "tensorflow/lite/string_util.h" +#include "tf_ops/projection_normalizer_util.h" // seq_flow_lite +#include "tf_ops/projection_util.h" // seq_flow_lite +#include "tflite_ops/quantization_util.h" // seq_flow_lite + +namespace tflite { +namespace ops { +namespace custom { + +namespace sequence_string_projection { +/** + * This op referred to as Ternary Sequence String Projection op (TSP), tokenizes + * input text either on space or unicode boundary. Fingerprint for each token is + * computed using murmur hash and bit features are extracted from fingerprint + * that maps every 2 bits to the ternary output {-1, 0, 1}. This effectively + * turns a text input into a ternary rank 3 tensor (in 8bit/float format) of + * shape [1, max token length, requested number of features]. + * + * Input: + * tensor[0]: Input message, string[num_batch] + * attribute[0]: feature size + * attribute[1]: vocabulary, a set of allowed characters in utf8 format. + * attribute[2]: split_on_space, a boolean specifying the tokenization method. + * attribute[3]: max_splits, maximum number of splits allowed during + * tokenization. When max_splits is set to -1, no limit on + * number of tokens is imposed. When it is set to a positive + * integer, number of tokens is truncated beyond that integer. + * An end of input token is always added after tokenization, + * hence the number of tokens is one more than the true number + * of tokens. As a result, the number of tokens returned by this + * op is not the same as absl::StrSplit. + * attribute[4]: word_novelty_bits, when set to a positive value less than 8, + * generates a word specific novelty feature in the last feature + * index. + * attribute[5]: doc_size_levels, when set to a positive value less than 17, + * generates a feature proportional to the logarithm of the + * number of tokens in the second to last feature index. + * attribute[6]: add_eos_tag, add an end of sequence tag to the output when + * true. Defaults to true. + * attribute[7]: add_bos_tag, add a begin of sequence tag to the output when + * true. Defaults to false. + * attribute[8]: add_first_cap_feature, when set to 1.0f add a feature to the + * resulting projection tensor that helps discriminate if the + * input token is Camel case. Otherwise leaves the projection + * output unmodified. + * attribute[9]: add_all_caps_feature, when set to 1.0f add a feature to the + * resulting projection tensor that helps discriminate if the + * input token is ALLCAPS. Otherwise leaves the projection + * output unmodified. + * Output: + * tensor[0]: computed projections. + * float32[true number of tokens][feature size] + * true number of tokens is number of tokens + 1. (for end of + * sequence). + */ + +namespace { + +constexpr char kBeginToken[] = ""; +constexpr char kEndToken[] = ""; +constexpr int kInputMessage = 0; +constexpr int kOutputLabel = 0; + +enum class BosTag { kGenerate, kNone }; +enum class EosTag { kGenerate, kNone }; + +class ProjectionParams { + public: + ProjectionParams(int feature_size, const std::string& vocabulary, + const std::string& hashtype, int max_splits, + bool split_on_space, int word_novelty_bits, + int doc_size_levels, BosTag add_bos_tag, EosTag add_eos_tag, + bool exclude_nonalphaspace_unicodes, + const std::string& token_separators, + bool normalize_repetition, bool add_first_cap_feature, + bool add_all_caps_feature) + : feature_size_(feature_size), + unicode_handler_(vocabulary, exclude_nonalphaspace_unicodes), + hasher_(Hasher::CreateHasher(feature_size, hashtype)), + max_splits_(max_splits), + split_on_space_(split_on_space), + word_novelty_bits_(word_novelty_bits), + doc_size_levels_(doc_size_levels), + add_bos_tag_(add_bos_tag == BosTag::kGenerate), + add_eos_tag_(add_eos_tag == EosTag::kGenerate), + add_first_cap_feature_(add_first_cap_feature), + add_all_caps_feature_(add_all_caps_feature) { + assert(max_splits_ == -1 || max_splits_ > 0); + assert(word_novelty_bits >= 0 && word_novelty_bits <= 7); + // hasher_ can be nullptr if the hashtype is invalid. But there is a similar + // check in tensorflow op when the model is created. So this failure will + // never happen if the model was successfully trained. Still adding a check + // here since you can edit the model post training, which is the only + // situation when this assertion will fail. + assert(hasher_ != nullptr); + if (word_novelty_bits_ != 0) { + assert(feature_size_ >= 1); + } + assert(doc_size_levels >= 0 && doc_size_levels <= 16); + if (doc_size_levels_ != 0) { + assert(feature_size_ >= 2); + } + word_novelty_offset_ = 2.0f / (1 << word_novelty_bits_); + + if (!token_separators.empty() || normalize_repetition) { + projection_normalizer_ = std::make_unique( + token_separators, normalize_repetition); + } + } + virtual ~ProjectionParams() {} + int FeatureSize() const { return feature_size_; } + bool WordNoveltyEnabled() const { return word_novelty_bits_ != 0; } + void WordNoveltyFeature(float* data, int word_count) const { + *data = std::min((word_count * word_novelty_offset_) - 1.0f, 1.0f); + } + void WordNoveltyFeature(uint8_t* data, int word_count) const { + float word_novelty_feature; + WordNoveltyFeature(&word_novelty_feature, word_count); + *data = PodQuantize(word_novelty_feature, 127.0f, 127); + } + bool DocSizeFeatureEnabled() const { return (doc_size_levels_ != 0); } + bool FirstCap() const { return add_first_cap_feature_; } + bool AllCaps() const { return add_all_caps_feature_; } + int BosToken() const { return add_bos_tag_ ? 1 : 0; } + int EosToken() const { return add_eos_tag_ ? 1 : 0; } + void DocSizeFeature(float* data, int num_tokens) { + float doc_size_feature = + (doc_size_levels_ != 0) + ? std::log2(static_cast(num_tokens)) / doc_size_levels_ + : 0.0f; + *data = std::min(doc_size_feature, 1.0f) * 2.0f - 1.0f; + } + void DocSizeFeature(uint8_t* data, int num_tokens) { + float doc_size_feature; + DocSizeFeature(&doc_size_feature, num_tokens); + *data = PodQuantize(doc_size_feature, 127.0f, 127); + } + void Hash(const std::string& word, std::vector* hash_codes) { + hasher_->GetHashCodes(word, hash_codes); + } + // Lower cases the input text and eliminates all unsupported + // unicodes in it if a vocabulary is provided. + std::string LowerCaseUTF8WithSupportedUnicodes( + std::pair source, bool* first_cap, + bool* all_caps) const { + return unicode_handler_.LowerCaseUTF8WithSupportedUnicodes( + source, first_cap, all_caps); + } + // Splits the input text into a set of tokens. Uses space as the delimiter + // when split_on_space is True and unicode boundaries as the delimiter + // otherwise. When max_splits is set to -1, no limit on number of tokens is + // imposed. When it is set to a positive integer, number of tokens is + // truncated beyond that integer. An end of input token is always added after + // tokenization, hence the number of tokens is one more than the true number + // of tokens. + virtual TfLiteStatus PreprocessInput(TfLiteTensor* input_t, + TfLiteContext* context) { + if (input_t->bytes == 0) { + context->ReportError(context, "Empty input not supported."); + return kTfLiteError; + } + tflite::StringRef inputref = tflite::GetString(input_t, /*string_index=*/0); + if (projection_normalizer_ == nullptr) { + tokens_ = unicode_handler_.Tokenize(inputref.str, inputref.len, + split_on_space_, max_splits_); + } else { + normalized_input_ = projection_normalizer_->Normalize( + inputref.str, inputref.len, SIZE_MAX); + tokens_ = unicode_handler_.Tokenize(normalized_input_, split_on_space_, + max_splits_); + } + if (GetNumTokens() == 0 && !add_bos_tag_ && !add_eos_tag_) { + context->ReportError(context, "No tokens found."); + return kTfLiteError; + } + return kTfLiteOk; + } + int GetNumTokens() const { return tokens_.size(); } + const std::vector>& GetTokens() const { + return tokens_; + } + virtual std::string PreprocessToken(const std::string& word) { return word; } + + private: + int feature_size_; + ProjectionUnicodeHandler unicode_handler_; + std::unique_ptr hasher_; + int max_splits_; + bool split_on_space_; + int word_novelty_bits_; + int doc_size_levels_; + bool add_bos_tag_; + bool add_eos_tag_; + bool add_first_cap_feature_; + bool add_all_caps_feature_; + float word_novelty_offset_; + std::string normalized_input_; + + protected: + std::unique_ptr projection_normalizer_; + std::vector> tokens_; +}; + +class ProjectionParamsV2 : public ProjectionParams { + public: + ProjectionParamsV2(int feature_size, const std::string& vocabulary, + const std::string& hashtype, BosTag add_bos_tag, + EosTag add_eos_tag, bool normalize_repetition) + : ProjectionParams(feature_size, vocabulary, hashtype, + /*max_splits = */ -1, + /* split_on_space = */ true, + /*word_novelty_bits = */ 0, /*doc_size_levels = */ 0, + add_bos_tag, add_eos_tag, + /*exclude_nonalphaspace_unicodes = */ false, + /*token_separators = */ "", normalize_repetition, + /*add_first_cap_feature = */ false, + /*add_all_caps_feature = */ false) {} + ~ProjectionParamsV2() override {} + + TfLiteStatus PreprocessInput(TfLiteTensor* input_t, + TfLiteContext* context) override { + const TfLiteIntArray* const dims = input_t->dims; + const int num_tokens = tflite::GetStringCount(input_t); + if (num_tokens == 0) { + context->ReportError(context, "Empty input not supported."); + return kTfLiteError; + } + if (dims->size != 2) { + context->ReportError( + context, "Input tensor is expected to be rank 2, got rank %d.", + dims->size); + return kTfLiteError; + } else if (dims->data[0] != 1) { + context->ReportError(context, + "Input tensor batch size should be 1, got %d.", + dims->data[0]); + return kTfLiteError; + } else if (num_tokens != dims->data[1]) { + context->ReportError(context, + "Inconsistent number of input tokens %d != %d.", + num_tokens, dims->data[1]); + return kTfLiteError; + } + for (int i = 0; i < num_tokens; ++i) { + const tflite::StringRef strref = tflite::GetString(input_t, i); + tokens_.push_back(std::pair(strref.str, strref.len)); + } + return kTfLiteOk; + } + std::string PreprocessToken(const std::string& word) override { + return projection_normalizer_ ? projection_normalizer_->Normalize( + word.data(), word.length(), SIZE_MAX) + : word; + } +}; + +inline void SetTensorToDynamic(TfLiteTensor* tensor) { + if (tensor->allocation_type != kTfLiteDynamic) { + tensor->allocation_type = kTfLiteDynamic; + tensor->data.raw = nullptr; + } +} + +// Determines whether tensor is dynamic. Note that a tensor can be non-const and +// not dynamic. This function specifically checks for a dynamic tensor. +inline bool IsDynamicTensor(const TfLiteTensor* tensor) { + return tensor->allocation_type == kTfLiteDynamic; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + const uint8_t* buffer_t = reinterpret_cast(buffer); + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + const std::string hashtype = + m["hashtype"].IsNull() ? kMurmurHash : m["hashtype"].AsString().str(); + const int word_novelty_bits = + m["word_novelty_bits"].IsNull() ? 0 : m["word_novelty_bits"].AsInt32(); + const int doc_size_levels = + m["doc_size_levels"].IsNull() ? 0 : m["doc_size_levels"].AsInt32(); + const bool add_bos_tag = + m["add_bos_tag"].IsNull() ? false : m["add_bos_tag"].AsBool(); + const bool add_eos_tag = + m["add_eos_tag"].IsNull() ? true : m["add_eos_tag"].AsBool(); + float add_first_cap_feature = m["add_first_cap_feature"].IsNull() + ? 0.0f + : m["add_first_cap_feature"].AsFloat(); + float add_all_caps_feature = m["add_all_caps_feature"].IsNull() + ? 0.0f + : m["add_all_caps_feature"].AsFloat(); + if (add_first_cap_feature != 0.0f && add_first_cap_feature != 1.0f) { + context->ReportError( + context, + "add_first_cap_feature is %f, it should be 0.0 or 1.0., " + "resetting it to 1.0f\n", + add_first_cap_feature); + add_first_cap_feature = 1.0f; + } + if (add_all_caps_feature != 0.0f && add_all_caps_feature != 1.0f) { + context->ReportError( + context, + "add_all_caps_feature is %f, it should be 0.0 or 1.0., " + "resetting it to 1.0f\n", + add_all_caps_feature); + add_all_caps_feature = 1.0f; + } + // Old models that use the op may not have this attribute set, for those + // models the default value of false will be used. + const bool exclude_nonalphaspace_unicodes = + m["exclude_nonalphaspace_unicodes"].IsNull() + ? false + : m["exclude_nonalphaspace_unicodes"].AsBool(); + const std::string token_separators = + m["token_separators"].IsNull() ? "" : m["token_separators"].ToString(); + const bool normalize_repetition = m["normalize_repetition"].AsBool(); + if (!Hasher::SupportedHashType(hashtype)) { + context->ReportError(context, "Unsupported hashtype %s\n", + hashtype.c_str()); + return nullptr; + } + + return new ProjectionParams( + m["feature_size"].AsInt32(), m["vocabulary"].AsString().str(), hashtype, + m["max_splits"].AsInt32(), m["split_on_space"].AsBool(), + word_novelty_bits, doc_size_levels, + add_bos_tag ? BosTag::kGenerate : BosTag::kNone, + add_eos_tag ? EosTag::kGenerate : EosTag::kNone, + exclude_nonalphaspace_unicodes, token_separators, normalize_repetition, + add_first_cap_feature == 1.0f, add_all_caps_feature == 1.0f); +} + +void* InitV2(TfLiteContext* context, const char* buffer, size_t length) { + const uint8_t* buffer_t = reinterpret_cast(buffer); + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + const std::string hashtype = + m["hashtype"].IsNull() ? kMurmurHash : m["hashtype"].AsString().str(); + if (!Hasher::SupportedHashType(hashtype)) { + context->ReportError(context, "Unsupported hashtype %s\n", + hashtype.c_str()); + return nullptr; + } + + return new ProjectionParamsV2( + m["feature_size"].AsInt32(), m["vocabulary"].AsString().str(), hashtype, + m["add_bos_tag"].AsBool() ? BosTag::kGenerate : BosTag::kNone, + m["add_eos_tag"].AsBool() ? EosTag::kGenerate : EosTag::kNone, + m["normalize_repetition"].AsBool()); +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputLabel]]; + SetTensorToDynamic(output); + return kTfLiteOk; +} + +constexpr int kHashCodeBits = 64; +constexpr int kMapBits = 2; +constexpr int kIncrement = kHashCodeBits / kMapBits; +constexpr int kMapHigh = 1; +constexpr int kMapLow = 2; + +template +void TypedEval(const T* mapping_table, ProjectionParams* params, T* data) { + auto tokens = params->GetTokens(); + std::vector hash_codes; + std::unordered_map word_counter; + + T doc_size_feature = T{0}; + if (params->DocSizeFeatureEnabled()) { + params->DocSizeFeature(&doc_size_feature, tokens.size()); + } + const int num_tokens = tokens.size() + params->EosToken(); + for (int j = -params->BosToken(), offset0 = 0; j < num_tokens; ++j) { + std::string word; + bool first_cap, all_caps; + if (j < 0) { + word = kBeginToken; + } else if (j < tokens.size()) { + word = params->LowerCaseUTF8WithSupportedUnicodes(tokens[j], &first_cap, + &all_caps); + word = params->PreprocessToken(word); + } else { + word = kEndToken; + } + params->Hash(word, &hash_codes); + for (int hindex = 0, k = 0; hindex < hash_codes.size(); hindex++) { + auto hash = hash_codes[hindex]; + for (int kmax = std::min(k + kIncrement, params->FeatureSize()); + k < kmax;) { + data[offset0 + k++] = mapping_table[hash & ((1 << kMapBits) - 1)]; + hash >>= kMapBits; + } + } + offset0 += params->FeatureSize(); + if (params->WordNoveltyEnabled() && !hash_codes.empty()) { + params->WordNoveltyFeature(&data[offset0 - kWordNoveltyOffset], + word_counter[hash_codes[0]]++); + } + if (params->DocSizeFeatureEnabled()) { + data[offset0 - kDocSizeOffset] = doc_size_feature; + } + if (params->FirstCap()) { + data[offset0 - kFirstCapOffset] = + mapping_table[first_cap ? kMapHigh : kMapLow]; + } + if (params->AllCaps()) { + data[offset0 - kAllCapsOffset] = + mapping_table[all_caps ? kMapHigh : kMapLow]; + } + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->user_data); + if (params == nullptr) { + context->ReportError(context, "Empty user data."); + return kTfLiteError; + } + TF_LITE_ENSURE_OK( + context, + params->PreprocessInput( + &context->tensors[node->inputs->data[kInputMessage]], context)); + + TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputLabel]]; + if (IsDynamicTensor(output)) { + TfLiteIntArray* output_size = TfLiteIntArrayCreate(3); + output_size->data[0] = 1; + output_size->data[1] = + params->BosToken() + params->GetNumTokens() + params->EosToken(); + output_size->data[2] = params->FeatureSize(); + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output, output_size)); + } else { + context->ReportError(context, "Output must by dynamic."); + return kTfLiteError; + } + + if (output->type == kTfLiteUInt8) { + const uint8_t kMappingTable[1 << kMapBits] = {127, 255, 0, 127}; + TypedEval(kMappingTable, params, output->data.uint8); + } else if (output->type == kTfLiteFloat32) { + const float kMappingTable[1 << kMapBits] = {0.0, 1.0, -1.0, 0.0}; + TypedEval(kMappingTable, params, output->data.f); + } else { + context->ReportError(context, "Output type must be UInt8 or Float32."); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace +} // namespace sequence_string_projection + +const char kSequenceStringProjection[] = "SEQUENCE_STRING_PROJECTION"; + +// This op converts a list of strings to a sequence of features using hashing. +TfLiteRegistration* Register_SEQUENCE_STRING_PROJECTION() { + static TfLiteRegistration r = { + sequence_string_projection::Init, sequence_string_projection::Free, + sequence_string_projection::Resize, sequence_string_projection::Eval}; + return &r; +} + +const char kSequenceStringProjectionV2[] = "SEQUENCE_STRING_PROJECTION_V2"; + +// This op converts a sequence of tokens to a sequence of projected features +// using hashing. +TfLiteRegistration* Register_SEQUENCE_STRING_PROJECTION_V2() { + static TfLiteRegistration r = { + sequence_string_projection::InitV2, sequence_string_projection::Free, + sequence_string_projection::Resize, sequence_string_projection::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/research/seq_flow_lite/tflite_ops/sequence_string_projection.h b/research/seq_flow_lite/tflite_ops/sequence_string_projection.h new file mode 100644 index 0000000000000000000000000000000000000000..55b83611ce5ec8eb0c794c415b0758d7f40c98a9 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/sequence_string_projection.h @@ -0,0 +1,34 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_SEQUENCE_STRING_PROJECTION_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_SEQUENCE_STRING_PROJECTION_H_ +#include "tensorflow/lite/kernels/register.h" + +namespace tflite { +namespace ops { +namespace custom { + +extern const char kSequenceStringProjection[]; + +TfLiteRegistration* Register_SEQUENCE_STRING_PROJECTION(); + +extern const char kSequenceStringProjectionV2[]; + +TfLiteRegistration* Register_SEQUENCE_STRING_PROJECTION_V2(); +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_SEQUENCE_STRING_PROJECTION_H_ diff --git a/research/seq_flow_lite/tflite_ops/sequence_string_projection_test.cc b/research/seq_flow_lite/tflite_ops/sequence_string_projection_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..23aabcdb234280d914b1cafdf35c1b4ca76b6c1e --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/sequence_string_projection_test.cc @@ -0,0 +1,995 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tflite_ops/sequence_string_projection.h" // seq_flow_lite + +#include + +#include "flatbuffers/flexbuffers.h" // flatbuffer +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/model.h" +#include "tensorflow/lite/string_util.h" +#include "tf_ops/projection_util.h" // seq_flow_lite +#include "tflite_ops/tf_tflite_diff_test_util.h" // seq_flow_lite + +namespace tflite { + +namespace ops { +namespace custom { + +namespace { + +using ::testing::ElementsAreArray; +using ::tflite::testing::AttrValue; +using ::tflite::testing::FloatTensor; +using ::tflite::testing::IntTensor; +using ::tflite::testing::OpEquivTestCase; +using ::tflite::testing::StringTensor; +using ::tflite::testing::TensorflowTfLiteOpTest; + +class SequenceStringProjectionModel : public SingleOpModel { + public: + explicit SequenceStringProjectionModel( + bool split_on_space, int max_splits, int word_novelty_bits, + int doc_size_levels, bool add_eos_tag, TensorType output_type, + const std::string& token_separators = "", + bool normalize_repetition = false, float add_first_cap = 0.0, + float add_all_caps = 0.0, const string& hashtype = kMurmurHash) { + flexbuffers::Builder fbb; + fbb.Map([&] { + fbb.Int("feature_size", 4); + fbb.String("vocabulary", "abcdefghijklmnopqrstuvwxyz"); + fbb.Int("word_novelty_bits", word_novelty_bits); + fbb.Int("doc_size_levels", doc_size_levels); + fbb.Int("max_splits", max_splits); + fbb.Bool("split_on_space", split_on_space); + fbb.Bool("add_eos_tag", add_eos_tag); + fbb.String("token_separators", token_separators); + fbb.String("hashtype", hashtype); + fbb.Bool("normalize_repetition", normalize_repetition); + fbb.Float("add_first_cap_feature", add_first_cap); + fbb.Float("add_all_caps_feature", add_all_caps); + }); + fbb.Finish(); + output_ = AddOutput({output_type, {}}); + SetCustomOp(kSequenceStringProjection, fbb.GetBuffer(), + Register_SEQUENCE_STRING_PROJECTION); + BuildInterpreter({GetShape(input_)}); + } + void Invoke(const std::string& input) { + PopulateStringTensor(input_, {input}); + CHECK(interpreter_->AllocateTensors() == kTfLiteOk) + << "Cannot allocate tensors"; + SingleOpModel::Invoke(); + } + TfLiteStatus InvokeFailable(const std::string& input) { + PopulateStringTensor(input_, {input}); + CHECK(interpreter_->AllocateTensors() == kTfLiteOk) + << "Cannot allocate tensors"; + return SingleOpModel::InvokeUnchecked(); + } + + template + std::vector GetOutput() { + return ExtractVector(output_); + } + + void CheckOutputTensorShape(const std::vector& expected_shape) { + EXPECT_EQ(GetTensorShape(output_), expected_shape); + } + + private: + int input_ = AddInput(TensorType_STRING); + int output_; +}; + +TEST(SequenceStringProjectionTest, IncorrectHashtype) { + SequenceStringProjectionModel m(true, -1, 0, 0, true, TensorType_UINT8, "", + false, 0.0, 0.0, "unsupported"); + EXPECT_EQ(m.InvokeFailable(" "), kTfLiteError); +} + +TEST(SequenceStringProjectionTest, RegularInputUint8) { + std::vector>> testcase = { + {"hello", {127, 255, 255, 127, 127, 255, 127, 127}}, + {"world", {127, 255, 127, 127, 127, 255, 127, 127}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, -1, 0, 0, true, TensorType_UINT8); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, RegularInputUint8NoEOSTag) { + std::vector>> testcase = { + {"hello", {127, 255, 255, 127}}, + {"world", {127, 255, 127, 127}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, -1, 0, 0, false, TensorType_UINT8); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, RegularInputUint8DocSize) { + std::vector>> testcase = { + {"hello", {127, 255, 0, 127, 127, 255, 0, 127}}, + {"world", {127, 255, 0, 127, 127, 255, 0, 127}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, -1, 0, 8, true, TensorType_UINT8); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, RegularInputUint8DocSizeWordNovelty) { + std::vector>> testcase = { + {"hello", {127, 255, 0, 0, 127, 255, 0, 0}}, + {"world", {127, 255, 0, 0, 127, 255, 0, 0}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, -1, 4, 8, true, TensorType_UINT8); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, RegularInputUint8WordNovelty) { + std::vector>> testcase = { + {"hello", {127, 255, 255, 0, 127, 255, 127, 0}}, + {"world", {127, 255, 127, 0, 127, 255, 127, 0}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, -1, 3, 0, true, TensorType_UINT8); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, RegularInputFloat) { + std::vector>> testcase = { + {"hello", {0, 1, 1, 0, 0, 1, 0, 0}}, + {"world", {0, 1, 0, 0, 0, 1, 0, 0}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, -1, 0, 0, true, TensorType_FLOAT32); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, RegularInputFloatNoEOSTag) { + std::vector>> testcase = { + {"hello", {0, 1, 1, 0}}, + {"world", {0, 1, 0, 0}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, -1, 0, 0, false, TensorType_FLOAT32); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, RegularInputWithoutSplitOnSpace) { + std::vector>> testcase = { + {"h", {127, 127, 255, 127, 127, 255, 127, 127}}, + {"w", {255, 127, 255, 127, 127, 255, 127, 127}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(false, -1, 0, 0, true, TensorType_UINT8); + m.Invoke(test.first); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(test.second)); + } +} + +TEST(SequenceStringProjectionTest, CheckSequenceLimit) { + std::string input; + for (int i = 0; i < 600; ++i) { + input += "hello world "; + } + SequenceStringProjectionModel m(true, 511, 0, 0, true, TensorType_UINT8); + m.Invoke(input); + const std::vector expected_shape = {1, 512, 4}; + m.CheckOutputTensorShape(expected_shape); +} + +TEST(SequenceStringProjectionTest, CheckSequenceLimitBoundary) { + std::vector>> testcase = { + {"hello", {1, 2, 4}}, + {"hello ", {1, 2, 4}}, + {"hello world", {1, 3, 4}}, + {"hellow world ", {1, 3, 4}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(true, 2, 0, 0, true, TensorType_FLOAT32); + m.Invoke(test.first); + m.CheckOutputTensorShape(test.second); + } +} + +TEST(SequenceStringProjectionTest, CheckSequenceLimitBoundaryWithoutSpace) { + std::vector>> testcase = { + {"h", {1, 2, 4}}, + {"he", {1, 3, 4}}, + {"hel", {1, 3, 4}}, + {"hello ", {1, 3, 4}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(false, 2, 0, 0, true, TensorType_UINT8); + m.Invoke(test.first); + m.CheckOutputTensorShape(test.second); + } +} + +TEST(SequenceStringProjectionTest, + CheckSequenceLimitBoundaryWithoutSpaceNoEOS) { + std::vector>> testcase = { + {"h", {1, 1, 4}}, + {"he", {1, 2, 4}}, + {"hel", {1, 2, 4}}, + {"hello ", {1, 2, 4}}, + }; + for (const auto& test : testcase) { + SequenceStringProjectionModel m(false, 2, 0, 0, false, TensorType_UINT8); + m.Invoke(test.first); + m.CheckOutputTensorShape(test.second); + } +} + +TEST(SequenceStringProjectionTest, TokenSeparators) { + // Separate the input using "!". + SequenceStringProjectionModel m1(true, -1, 0, 0, true, TensorType_UINT8, "!", + false); + m1.Invoke("great!!!"); + auto output1 = m1.GetOutput(); + + SequenceStringProjectionModel m2(true, -1, 0, 0, true, TensorType_UINT8, "!", + false); + m2.Invoke("great ! ! !"); + auto output2 = m2.GetOutput(); + + EXPECT_THAT(output1, ElementsAreArray(output2)); +} + +TEST(SequenceStringProjectionTest, EmptyInput) { + // Separate the input using "!". + SequenceStringProjectionModel no_eos(true, -1, 0, 0, false, TensorType_UINT8, + " ", false); + EXPECT_EQ(no_eos.InvokeFailable(" "), kTfLiteError); + EXPECT_EQ(no_eos.InvokeFailable(" "), kTfLiteError); + EXPECT_EQ(no_eos.InvokeFailable(""), kTfLiteError); + EXPECT_EQ(no_eos.InvokeFailable("hello"), kTfLiteOk); + + SequenceStringProjectionModel with_eos(true, -1, 0, 0, true, TensorType_UINT8, + " ", false); + EXPECT_EQ(with_eos.InvokeFailable(" "), kTfLiteOk); + EXPECT_EQ(with_eos.InvokeFailable(" "), kTfLiteOk); + EXPECT_EQ(with_eos.InvokeFailable(""), kTfLiteOk); + EXPECT_EQ(with_eos.InvokeFailable("hello"), kTfLiteOk); +} + +TEST(SequenceStringProjectionTest, FirstCap) { + SequenceStringProjectionModel op(/*split_on_space=*/true, /*max_splits=*/-1, + /*word_novelty_bits=*/0, + /*doc_size_levels=*/0, /*add_eos_tag=*/false, + /*output_type=*/TensorType_UINT8, + /*token_separators=*/" ", + /*normalize_repetition=*/false, + /*add_first_cap=*/0.5); + op.Invoke("hello"); + auto output1 = op.GetOutput(); + + op.Invoke("Hello"); + auto output2 = op.GetOutput(); + + EXPECT_NE(output1[1], output2[1]); +} + +TEST(SequenceStringProjectionTest, AllCaps) { + SequenceStringProjectionModel op( + /*split_on_space=*/true, /*max_splits=*/-1, /*word_novelty_bits=*/0, + /*doc_size_levels=*/0, /*add_eos_tag=*/false, + /*output_type=*/TensorType_UINT8, /*token_separators=*/" ", + /*normalize_repetition=*/false, /*add_first_cap=*/0.0, + /*add_all_caps=*/0.5); + op.Invoke("hello"); + auto output1 = op.GetOutput(); + + op.Invoke("HELLO"); + auto output2 = op.GetOutput(); + + EXPECT_NE(output1[0], output2[0]); +} + +TEST(SequenceStringProjectionTest, NormalizeRepetition) { + // Normalize the repeated special tokens. Used for the emotion models. + SequenceStringProjectionModel m1(true, -1, 0, 0, true, TensorType_UINT8, "", + true); + m1.Invoke("hello.."); + auto output1 = m1.GetOutput(); + + SequenceStringProjectionModel m2(true, -1, 0, 0, true, TensorType_UINT8, "", + true); + m2.Invoke("hello....."); + auto output2 = m2.GetOutput(); + + EXPECT_THAT(output1, ElementsAreArray(output2)); +} + +class SequenceStringProjectionTest : public TensorflowTfLiteOpTest { + std::function TfLiteOpRegistration() override { + return ops::custom::Register_SEQUENCE_STRING_PROJECTION; + } + + std::string TensorflowOpName() override { return "SequenceStringProjection"; } +}; + +TEST_P(SequenceStringProjectionTest, TensorflowTfLiteSame) { + RunTensorflowOp(); + RunTfLiteOp(); + CompareOpOutput(); +} + +std::vector SequenceStringProjectionTestCases() { + std::vector test_cases; + constexpr float kScale = 2.0 / 255; + constexpr int kZero = 127; + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEqualityNoBoSNoEoS"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World 7153845&^$&^$&"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEqualityNoBoS"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(true); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World 7153845&^$&^$&"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEqualityNoEoS"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(true); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World 7153845&^$&^$&"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEquality"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(true); + test_case.attributes["add_bos_tag"] = AttrValue(true); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World 7153845&^$&^$&"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpace"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpace"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithMax"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(2); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithMax"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(4); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello World"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithDocSize"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(6); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithDocSize"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(7); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithMaxSplitsAndDocSize"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(2); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(8); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithMaxSplitsAndDocSize"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(4); + test_case.attributes["word_novelty_bits"] = AttrValue(0); + test_case.attributes["doc_size_levels"] = AttrValue(4); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithWordNovelty"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(2); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithWordNovelty"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(3); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithMaxSplitsAndWordNovelty"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(2); + test_case.attributes["word_novelty_bits"] = AttrValue(4); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithMaxSplitsAndWordNovelty"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(4); + test_case.attributes["word_novelty_bits"] = AttrValue(5); + test_case.attributes["doc_size_levels"] = AttrValue(0); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello World"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithWordNoveltyAndDocSize"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(2); + test_case.attributes["doc_size_levels"] = AttrValue(8); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithWordNoveltyAndDocSize"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(-1); + test_case.attributes["word_novelty_bits"] = AttrValue(3); + test_case.attributes["doc_size_levels"] = AttrValue(6); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithEverything"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(2); + test_case.attributes["word_novelty_bits"] = AttrValue(5); + test_case.attributes["doc_size_levels"] = AttrValue(8); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World hello world"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithEverything"; + test_case.attributes["vocabulary"] = + AttrValue("abcdefghijklmnopqrstuvwxyz"); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(4); + test_case.attributes["word_novelty_bits"] = AttrValue(7); + test_case.attributes["doc_size_levels"] = AttrValue(9); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(false); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello World"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "SplitOnSpaceWithEverythingAndExclude"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["max_splits"] = AttrValue(2); + test_case.attributes["word_novelty_bits"] = AttrValue(5); + test_case.attributes["doc_size_levels"] = AttrValue(8); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(true); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World 7153845&^$&^$&"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NoSplitOnSpaceWithEverythingAndExclude"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(false); + test_case.attributes["max_splits"] = AttrValue(2); + test_case.attributes["word_novelty_bits"] = AttrValue(5); + test_case.attributes["doc_size_levels"] = AttrValue(8); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["exclude_nonalphaspace_unicodes"] = AttrValue(true); + test_case.input_tensors.push_back( + StringTensor({1}, {"Hello World 7153845&^$&^$&"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + { + OpEquivTestCase test_case; + test_case.test_name = "NormalizeRepetition"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.attributes["normalize_repetition"] = AttrValue(true); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello World ..."})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "TokenSeparator"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.attributes["token_separators"] = AttrValue("-"); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello-World"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "CapBaseline"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello hello HELLO"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "FirstCap"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.attributes["add_first_cap_feature"] = AttrValue(1.0); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello hello HELLO"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "AllCaps"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.attributes["add_all_caps_feature"] = AttrValue(1.0); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello hello HELLO"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "FirstCapAllCaps"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["split_on_space"] = AttrValue(true); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.attributes["add_first_cap_feature"] = AttrValue(1.0); + test_case.attributes["add_all_caps_feature"] = AttrValue(1.0); + test_case.input_tensors.push_back(StringTensor({1}, {"Hello hello HELLO"})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + return test_cases; +} + +INSTANTIATE_TEST_SUITE_P( + SequenceStringProjectionTests, SequenceStringProjectionTest, + ::testing::ValuesIn(SequenceStringProjectionTestCases())); + +class SequenceStringProjectionV2Model : public SingleOpModel { + public: + explicit SequenceStringProjectionV2Model( + std::vector> input_shapes, + const string& hashtype = kMurmurHash) { + flexbuffers::Builder fbb; + fbb.Map([&] { + fbb.Int("feature_size", 4); + fbb.String("hashtype", hashtype); + }); + fbb.Finish(); + input_ = AddInput(TensorType_STRING); + output_ = AddOutput({TensorType_UINT8, {}}); + SetCustomOp(kSequenceStringProjectionV2, fbb.GetBuffer(), + Register_SEQUENCE_STRING_PROJECTION_V2); + BuildInterpreter(input_shapes); + } + void Invoke(const std::vector& input, TfLiteStatus expected) { + PopulateStringTensor(input_, input); + CHECK(interpreter_->AllocateTensors() == kTfLiteOk) + << "Cannot allocate tensors"; + ASSERT_EQ(SingleOpModel::InvokeUnchecked(), expected); + } + TfLiteStatus InvokeFailable(const std::string& input) { + PopulateStringTensor(input_, {input}); + CHECK(interpreter_->AllocateTensors() == kTfLiteOk) + << "Cannot allocate tensors"; + return SingleOpModel::InvokeUnchecked(); + } + + private: + int input_; + int output_; +}; + +TEST(SequenceStringProjectionV2Test, IncorrectHashtype) { + SequenceStringProjectionV2Model m({{1, 0}}, "unsupported"); + EXPECT_EQ(m.InvokeFailable(" "), kTfLiteError); +} + +TEST(SequenceStringProjectionV2Test, RegularInputUint8EmptyNotSupported) { + // TFLite test infratructure currently does not let the error message to be + // extracted on failure. As a result just the return error code is tested + // as all other TFLite op handler tests. The error message each test invokes + // is captured in a comment though. + // ERROR: Empty input not supported. + SequenceStringProjectionV2Model m({{1, 0}}); + m.Invoke({}, kTfLiteError); +} + +TEST(SequenceStringProjectionV2Test, RegularInputUint8BatchNotSupported) { + // TFLite test infratructure currently does not let the error message to be + // extracted on failure. As a result just the return error code is tested + // as all other TFLite op handler tests. The error message each test invokes + // is captured in a comment though. + // ERROR: Input tensor batch size should be 1, got 2. + SequenceStringProjectionV2Model m({{2, 1}}); + m.Invoke({"hello", "world"}, kTfLiteError); +} + +TEST(SequenceStringProjectionV2Test, RegularInputUint8RankNot2NotSupported) { + // TFLite test infratructure currently does not let the error message to be + // extracted on failure. As a result just the return error code is tested + // as all other TFLite op handler tests. The error message each test invokes + // is captured in a comment though. + // ERROR: Input tensor is expected to be rank 2, got rank 3. + SequenceStringProjectionV2Model m({{2, 1, 1}}); + m.Invoke({"hello", "world"}, kTfLiteError); +} + +TEST(SequenceStringProjectionV2Test, RegularInputUint8InconsistentInput) { + // TFLite test infratructure currently does not let the error message to be + // extracted on failure. As a result just the return error code is tested + // as all other TFLite op handler tests. The error message each test invokes + // is captured in a comment though. + // ERROR: Inconsistent number of input tokens 3 != 2. + SequenceStringProjectionV2Model m({{1, 2}}); + m.Invoke({"hello", "world", "goodbye"}, kTfLiteError); +} + +TEST(SequenceStringProjectionV2Test, RegularInputUint8) { + // OK + SequenceStringProjectionV2Model m({{1, 2}}); + m.Invoke({"hello", "world"}, kTfLiteOk); +} + +class SequenceStringProjectionV2Test : public TensorflowTfLiteOpTest { + std::function TfLiteOpRegistration() override { + return ops::custom::Register_SEQUENCE_STRING_PROJECTION_V2; + } + + std::string TensorflowOpName() override { + return "SequenceStringProjectionV2"; + } +}; + +TEST_P(SequenceStringProjectionV2Test, TensorflowTfLiteSame) { + RunTensorflowOp(); + RunTfLiteOp(); + CompareOpOutput(); +} + +std::vector SequenceStringProjectionV2TestCases() { + std::vector test_cases; + constexpr float kScale = 2.0 / 255; + constexpr int kZero = 127; + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEqualityNoBoSNoEoS"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1, 5}, {"Hello", "World", "7153845", "&^$&", "^$&"})); + test_case.input_tensors.push_back(IntTensor({1}, {5})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEqualityNoBoS"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(true); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.input_tensors.push_back( + StringTensor({1, 4}, {"Hello", "World", "7153845", "&^$&^$&"})); + test_case.input_tensors.push_back(IntTensor({1}, {4})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEqualityNoEoS"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(true); + test_case.input_tensors.push_back( + StringTensor({1, 3}, {"Hello", "World", "7153845&^$&^$&"})); + test_case.input_tensors.push_back(IntTensor({1}, {3})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "CheckEquality"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(true); + test_case.attributes["add_bos_tag"] = AttrValue(true); + test_case.input_tensors.push_back( + StringTensor({1, 3}, {"Hello", "Worldddd", "7153845&^$&^$&"})); + test_case.input_tensors.push_back(IntTensor({1}, {3})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + { + OpEquivTestCase test_case; + test_case.test_name = "NormalizeRepetition"; + test_case.attributes["vocabulary"] = AttrValue(""); + test_case.attributes["feature_size"] = AttrValue(8); + test_case.attributes["add_eos_tag"] = AttrValue(false); + test_case.attributes["add_bos_tag"] = AttrValue(false); + test_case.attributes["normalize_repetition"] = AttrValue(true); + test_case.input_tensors.push_back( + StringTensor({1, 6}, {"Hello", "World", "...", "..", ".", "...."})); + test_case.input_tensors.push_back(IntTensor({1}, {6})); + test_case.output_tensors.emplace_back(FloatTensor({}, {}), kScale, kZero); + test_cases.push_back(test_case); + } + + return test_cases; +} + +INSTANTIATE_TEST_SUITE_P( + SequenceStringProjectionV2Tests, SequenceStringProjectionV2Test, + ::testing::ValuesIn(SequenceStringProjectionV2TestCases())); + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + // On Linux, add: absl::SetFlag(&FLAGS_logtostderr, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/research/seq_flow_lite/tflite_ops/tf_tflite_diff_test_util.cc b/research/seq_flow_lite/tflite_ops/tf_tflite_diff_test_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..389152afd6fbf24a0c95cba48bb6969fedd64b55 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/tf_tflite_diff_test_util.cc @@ -0,0 +1,382 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tflite_ops/tf_tflite_diff_test_util.h" // seq_flow_lite + +#include "flatbuffers/flexbuffers.h" // flatbuffer +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tflite { +namespace testing { + +using ::tensorflow::TensorProto; +using ::testing::FloatNear; + +::tflite::TensorType TfTypeToTfLiteType(::tensorflow::DataType dtype) { + switch (dtype) { + case ::tensorflow::DT_FLOAT: + return TensorType_FLOAT32; + + case ::tensorflow::DT_INT32: + return TensorType_INT32; + + case ::tensorflow::DT_STRING: + return TensorType_STRING; + + case ::tensorflow::DT_BOOL: + return TensorType_BOOL; + + default: + LOG(FATAL) << "Unrecognized dtype: " << dtype; + } +} + +void SetTensorProtoShape(const std::vector& shape, TensorProto* tensor) { + auto* tensor_shape = tensor->mutable_tensor_shape(); + for (int dim : shape) { + tensor_shape->add_dim()->set_size(dim); + } +} + +TensorProto BoolTensor(const std::vector& shape, + const std::vector& values) { + TensorProto tensor; + SetTensorProtoShape(shape, &tensor); + tensor.set_dtype(::tensorflow::DT_BOOL); + for (bool b : values) { + tensor.add_bool_val(b); + } + return tensor; +} + +TensorProto IntTensor(const std::vector& shape, + const std::vector& values) { + TensorProto tensor; + tensor.set_dtype(::tensorflow::DT_INT32); + SetTensorProtoShape(shape, &tensor); + for (int i : values) { + tensor.add_int_val(i); + } + return tensor; +} + +TensorProto FloatTensor(const std::vector& shape, + const std::vector& values) { + TensorProto tensor; + tensor.set_dtype(::tensorflow::DT_FLOAT); + SetTensorProtoShape(shape, &tensor); + for (float f : values) { + tensor.add_float_val(f); + } + return tensor; +} + +TensorProto StringTensor(const std::vector& shape, + const std::vector& values) { + TensorProto tensor; + tensor.set_dtype(::tensorflow::DT_STRING); + SetTensorProtoShape(shape, &tensor); + for (const std::string& s : values) { + tensor.add_string_val(s); + } + return tensor; +} + +void TensorflowTfLiteOpTest::SetUp() { + ConstructTensorflowOp(); + ConstructTfLiteOp(); +} + +void TensorflowTfLiteOpTest::ConstructTensorflowOp() { + ::tensorflow::NodeDefBuilder builder("test_op", TensorflowOpName()); + for (const auto& attribute : GetParam().attributes) { + builder.Attr(attribute.first, attribute.second); + } + + int index = 0; + for (const auto& input_tensor : GetParam().input_tensors) { + builder.Input("input", index, input_tensor.dtype()); + index++; + } + + TF_ASSERT_OK(builder.Finalize(node_def())); + TF_ASSERT_OK(InitOp()); +} + +void TensorflowTfLiteOpTest::RunTensorflowOp() { + for (const auto& input_tensor : GetParam().input_tensors) { + switch (input_tensor.dtype()) { + case ::tensorflow::DT_FLOAT: + AddInput(input_tensor.tensor_shape(), + [&input_tensor](int x) -> float { + return input_tensor.float_val(x); + }); + break; + + case ::tensorflow::DT_INT32: + AddInput( + input_tensor.tensor_shape(), + [&input_tensor](int x) -> int { return input_tensor.int_val(x); }); + break; + + case ::tensorflow::DT_STRING: + AddInput<::tensorflow::tstring>( + input_tensor.tensor_shape(), + [&input_tensor](int x) -> ::tensorflow::tstring { + return input_tensor.string_val(x); + }); + break; + + case ::tensorflow::DT_BOOL: + AddInput(input_tensor.tensor_shape(), + [&input_tensor](int x) -> bool { + return input_tensor.bool_val(x); + }); + break; + + default: + LOG(FATAL) << "Unrecognized dtype: " << input_tensor.DebugString(); + } + } + + TF_ASSERT_OK(RunOpKernel()); +} + +std::vector ConstructTfLiteCustomOptions( + absl::flat_hash_map attributes, + const std::string& tensorflow_op) { + // Get the default attributes of the Tensorflow op. + const ::tensorflow::OpDef* tf_op_def; + TF_CHECK_OK(::tensorflow::OpRegistry::Global()->LookUpOpDef(tensorflow_op, + &tf_op_def)); + for (const auto& tf_attribute : tf_op_def->attr()) { + if (tf_attribute.has_default_value() && + !attributes.contains(tf_attribute.name())) { + attributes[tf_attribute.name()] = tf_attribute.default_value(); + } + } + + ::flexbuffers::Builder fbb; + size_t map_start = fbb.StartMap(); + for (const auto& attribute : attributes) { + switch (attribute.second.value_case()) { + case ::tensorflow::AttrValue::kS: + fbb.String(attribute.first.c_str(), attribute.second.s()); + break; + + case ::tensorflow::AttrValue::kI: + fbb.Int(attribute.first.c_str(), attribute.second.i()); + break; + + case ::tensorflow::AttrValue::kF: + fbb.Float(attribute.first.c_str(), attribute.second.f()); + break; + + case ::tensorflow::AttrValue::kB: + fbb.Bool(attribute.first.c_str(), attribute.second.b()); + break; + + case ::tensorflow::AttrValue::kList: { + int start = fbb.StartVector(attribute.first.c_str()); + if (attribute.second.list().s_size() > 0) { + for (const std::string& s : attribute.second.list().s()) { + fbb.String(s); + } + } else if (attribute.second.list().i_size() > 0) { + for (int i : attribute.second.list().i()) { + fbb.Int(i); + } + } else if (attribute.second.list().f_size() > 0) { + for (float f : attribute.second.list().f()) { + fbb.Float(f); + } + } else if (attribute.second.list().b_size() > 0) { + for (bool b : attribute.second.list().b()) { + fbb.Bool(b); + } + } + fbb.EndVector(start, /*typed=*/true, /*fixed=*/false); + break; + } + + default: + LOG(FATAL) << "Unrecognized AttrValue type: " + << attribute.second.DebugString(); + } + } + fbb.EndMap(map_start); + fbb.Finish(); + + return std::vector(fbb.GetBuffer()); +} + +void TensorflowTfLiteOpTest::ConstructTfLiteOp() { + std::vector> input_shapes; + for (const auto& input_tensor : GetParam().input_tensors) { + std::vector shape; + for (const auto& dim : input_tensor.tensor_shape().dim()) { + shape.push_back(dim.size()); + } + input_shapes.push_back(shape); + + tflite_inputs_.push_back( + tflite_op_.AddInput(TfTypeToTfLiteType(input_tensor.dtype()))); + } + + for (const auto& output_tensor : GetParam().output_tensors) { + std::vector shape; + for (const auto& dim : output_tensor.tensor.tensor_shape().dim()) { + shape.push_back(dim.size()); + } + if (output_tensor.quantization_params.scale != 0.0) { + ASSERT_EQ(output_tensor.tensor.dtype(), ::tensorflow::DT_FLOAT) + << "Quantization attempted on non-float tensor: " + << output_tensor.tensor.DebugString(); + // We can safely use as zero min and max, as they'll be ignored and + // the scale and zero_point will be used instead. + tflite_outputs_.push_back(tflite_op_.AddOutput( + {TensorType_UINT8, shape, /*min=*/0.0, /*max=*/0.0, + output_tensor.quantization_params.scale, + output_tensor.quantization_params.zero_point})); + } else { + tflite_outputs_.push_back(tflite_op_.AddOutput( + {TfTypeToTfLiteType(output_tensor.tensor.dtype()), shape})); + } + } + + tflite_op_.SetCustomOp( + TfLiteOpName(), + ConstructTfLiteCustomOptions(GetParam().attributes, TensorflowOpName()), + TfLiteOpRegistration()); + tflite_op_.BuildInterpreter(input_shapes); +} + +void TensorflowTfLiteOpTest::RunTfLiteOp() { + int input_index = 0; + for (const auto& input_tensor : GetParam().input_tensors) { + switch (input_tensor.dtype()) { + case ::tensorflow::DT_FLOAT: { + std::vector float_val(input_tensor.float_val().begin(), + input_tensor.float_val().end()); + tflite_op_.PopulateTensor(tflite_inputs_[input_index], + float_val); + break; + } + + case ::tensorflow::DT_INT32: { + std::vector int_val(input_tensor.int_val().begin(), + input_tensor.int_val().end()); + tflite_op_.PopulateTensor(tflite_inputs_[input_index], int_val); + break; + } + + case ::tensorflow::DT_STRING: { + std::vector string_val(input_tensor.string_val().begin(), + input_tensor.string_val().end()); + tflite_op_.PopulateStringTensor(tflite_inputs_[input_index], + string_val); + break; + } + + case ::tensorflow::DT_BOOL: { + std::vector bool_val(input_tensor.bool_val().begin(), + input_tensor.bool_val().end()); + tflite_op_.PopulateTensor(tflite_inputs_[input_index], bool_val); + break; + } + + default: + LOG(FATAL) << "Unrecognized dtype: " << input_tensor.DebugString(); + } + input_index++; + } + + tflite_op_.Invoke(); +} + +void TensorflowTfLiteOpTest::CompareOpOutput() { + for (int i = 0; i < tflite_outputs_.size(); i++) { + const ::tensorflow::Tensor& tf_output = *GetOutput(i); + std::vector tflite_output_shape = + tflite_op_.GetTensorShape(tflite_outputs_[i]); + auto tf_output_shape = tf_output.shape(); + EXPECT_EQ(tf_output_shape.dims(), tflite_output_shape.size()); + for (int j = 0; j < tf_output_shape.dims(); j++) { + EXPECT_EQ(tf_output_shape.dim_size(j), tflite_output_shape[j]); + } + + switch (tf_output.dtype()) { + case ::tensorflow::DT_FLOAT: { + auto tf_output_values = tf_output.flat(); + const auto& quantization_params = + GetParam().output_tensors[i].quantization_params; + if (quantization_params.scale != 0.0) { + auto tflite_output_values = Dequantize( + tflite_op_.ExtractVector(tflite_outputs_[i]), + quantization_params.scale, quantization_params.zero_point); + for (int i = 0; i < tf_output_values.size(); i++) { + EXPECT_THAT( + tf_output_values(i), + FloatNear(tflite_output_values[i], quantization_params.scale)); + } + } else { + auto tflite_output_values = + tflite_op_.ExtractVector(tflite_outputs_[i]); + for (int i = 0; i < tf_output_values.size(); i++) { + EXPECT_EQ(tf_output_values(i), tflite_output_values[i]); + } + } + break; + } + + case ::tensorflow::DT_INT32: { + auto tf_output_values = tf_output.flat(); + auto tflite_output_values = + tflite_op_.ExtractVector(tflite_outputs_[i]); + for (int i = 0; i < tf_output_values.size(); i++) { + EXPECT_EQ(tf_output_values(i), tflite_output_values[i]); + } + break; + } + + case ::tensorflow::DT_BOOL: { + auto tf_output_values = tf_output.flat(); + auto tflite_output_values = + tflite_op_.ExtractVector(tflite_outputs_[i]); + for (int i = 0; i < tf_output_values.size(); i++) { + EXPECT_EQ(tf_output_values(i), tflite_output_values[i]); + } + break; + } + + case ::tensorflow::DT_STRING: { + auto tf_output_values = tf_output.flat<::tensorflow::tstring>(); + auto tflite_output_values = + tflite_op_.ExtractVector(tflite_outputs_[i]); + for (int i = 0; i < tf_output_values.size(); i++) { + EXPECT_EQ(tf_output_values(i), tflite_output_values[i]); + } + break; + } + + default: + LOG(FATAL) << "Unrecognized dtype: " << tf_output.dtype(); + } + } +} + +} // namespace testing +} // namespace tflite diff --git a/research/seq_flow_lite/tflite_ops/tf_tflite_diff_test_util.h b/research/seq_flow_lite/tflite_ops/tf_tflite_diff_test_util.h new file mode 100644 index 0000000000000000000000000000000000000000..53eff044736976c4182d7bb9e3c7cf622cc02b00 --- /dev/null +++ b/research/seq_flow_lite/tflite_ops/tf_tflite_diff_test_util.h @@ -0,0 +1,149 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Tests equivalence between TF and TFLite versions of an op. + +#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_TF_TFLITE_DIFF_TEST_UTIL_H_ +#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_TF_TFLITE_DIFF_TEST_UTIL_H_ + +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/lite/kernels/test_util.h" + +namespace tflite { +namespace testing { + +// Convenience constructors. +template +::tensorflow::AttrValue AttrValue(T value) { + ::tensorflow::AttrValue attr_value; + ::tensorflow::SetAttrValue(value, &attr_value); + return attr_value; +} +::tensorflow::TensorProto BoolTensor(const std::vector& shape, + const std::vector& values); +::tensorflow::TensorProto IntTensor(const std::vector& shape, + const std::vector& values); +::tensorflow::TensorProto FloatTensor(const std::vector& shape, + const std::vector& values); +::tensorflow::TensorProto StringTensor(const std::vector& shape, + const std::vector& values); + +struct OutputTensor { + explicit OutputTensor(const ::tensorflow::TensorProto& tensor) + : tensor(tensor) { + quantization_params.scale = 0.0; + } + OutputTensor(const ::tensorflow::TensorProto& tensor, float scale, + int zero_point) + : tensor(tensor) { + quantization_params.scale = scale; + quantization_params.zero_point = zero_point; + } + + ::tensorflow::TensorProto tensor; + TfLiteQuantizationParams quantization_params; +}; + +struct OpEquivTestCase { + std::string test_name; + absl::flat_hash_map attributes; + std::vector<::tensorflow::TensorProto> input_tensors; + std::vector output_tensors; +}; + +// Convert Tensorflow attributes into an equivalent TFLite flatbuffer. Adds the +// default attribute values from `tensorflow_op`, if they are not set in +// `attributes`. +std::vector ConstructTfLiteCustomOptions( + absl::flat_hash_map attributes, + const std::string& tensorflow_op); + +// A test class that can be used to compare that a Tensorflow op and a +// TFLite op are producing the same output. +// +// To use: +// 1) Sub-class TensorflowTfLiteOpTest. +// Define TfLiteOpRegistration() and TensorflowOpName(). +// +// class NewOpEquivTest : public TensorflowTfLiteOpTest { +// std::function TfLiteOpRegistration() override { +// return ::tflite::custom::Register_NEW_OP; +// } +// std::string TensorflowOpName() override { return "NewOp"; } +// }; +// +// 2) Declare a TEST_P (parameterized test) to perform the comparison. +// +// TEST_P(NewOpEquivTest, Compare) { +// RunTensorflowOp(); +// RunTfLiteOp(); +// CompareOpOutput(); +// } +// +// 3) Define your test cases. +// +// std::vector NewEquivOpTestCases() { +// std::vector test_cases; +// { +// OpEquivTestCase test_case; +// test_case.test_name = "Simple"; +// test_case.attributes["int_attr"] = AttrValue(1); +// test_case.attributes["bool_attr"] = AttrValue(true); +// test_case.input_tensor.push_back(StringTensor({1, 2}, {"a", "b"})); +// test_case.output_tensors.emplace_back(FloatTensor({}, {})); +// test_cases.push_back(test_case); +// } +// return test_cases; +// } +// +// 4) Instantiate your tests. +// +// INSTANTIATE_TEST_SUITE_P( +// NewOpEquivTest, +// NewOpEquivTest, +// ::testing::ValuesIn(NewOpEquivTestCases()), +// ::expander::GetTestName()); +class TensorflowTfLiteOpTest + : public ::tensorflow::OpsTestBase, + public ::testing::WithParamInterface { + protected: + void SetUp() override; + + virtual void ConstructTensorflowOp(); + virtual void RunTensorflowOp(); + + virtual void ConstructTfLiteOp(); + virtual void RunTfLiteOp(); + + virtual void CompareOpOutput(); + + virtual std::function TfLiteOpRegistration() = 0; + virtual std::string TfLiteOpName() { return "TestOp"; } + virtual std::string TensorflowOpName() = 0; + + private: + ::tflite::SingleOpModel tflite_op_; + std::vector tflite_inputs_; + std::vector tflite_outputs_; +}; + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_TFLITE_OPS_TF_TFLITE_DIFF_TEST_UTIL_H_ diff --git a/research/seq_flow_lite/third_party/BUILD b/research/seq_flow_lite/third_party/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..5b01f6e3e4cfd195327e08ff6a957acce4e21c71 --- /dev/null +++ b/research/seq_flow_lite/third_party/BUILD @@ -0,0 +1 @@ +licenses(["notice"]) diff --git a/research/seq_flow_lite/third_party/android/BUILD b/research/seq_flow_lite/third_party/android/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..fd69d4baa8517f24ba30999c1fd67c13e44e7557 --- /dev/null +++ b/research/seq_flow_lite/third_party/android/BUILD @@ -0,0 +1 @@ +# Placeholder to make bazel treat it as a package. diff --git a/research/seq_flow_lite/third_party/android/android.bzl.tpl b/research/seq_flow_lite/third_party/android/android.bzl.tpl new file mode 100644 index 0000000000000000000000000000000000000000..e6ed4994f3ba6d721d717a04b0bd22f54dbb1d79 --- /dev/null +++ b/research/seq_flow_lite/third_party/android/android.bzl.tpl @@ -0,0 +1,9 @@ +"""Set up configurable Android SDK and NDK dependencies.""" + +def android_workspace(): + # String for replacement in Bazel template. + # These will either be replaced by android_sdk_repository if various ENV + # variables are set when `local_config_android` repo_rule is run, or they + # will be replaced by noops otherwise. + MAYBE_ANDROID_SDK_REPOSITORY + MAYBE_ANDROID_NDK_REPOSITORY diff --git a/official/benchmark/__init__.py b/research/seq_flow_lite/third_party/android/android_configure.BUILD.tpl similarity index 100% rename from official/benchmark/__init__.py rename to research/seq_flow_lite/third_party/android/android_configure.BUILD.tpl diff --git a/research/seq_flow_lite/third_party/android/android_configure.bzl b/research/seq_flow_lite/third_party/android/android_configure.bzl new file mode 100644 index 0000000000000000000000000000000000000000..2fd2d8071a38c0e9ec8429bff95b3dd4813b8b77 --- /dev/null +++ b/research/seq_flow_lite/third_party/android/android_configure.bzl @@ -0,0 +1,95 @@ +"""Repository rule for Android SDK and NDK autoconfiguration. + +`android_configure` depends on the following environment variables: + + * `ANDROID_NDK_HOME`: Location of Android NDK root. + * `ANDROID_SDK_HOME`: Location of Android SDK root. + * `ANDROID_SDK_API_LEVEL`: Desired Android SDK API version. + * `ANDROID_NDK_API_LEVEL`: Desired Android NDK API version. + * `ANDROID_BUILD_TOOLS_VERSION`: Desired Android build tools version. + + +Writes Android SDK and NDK rules. + +Add the following to your WORKSPACE FILE: + +```python +android_configure(name = "local_config_android") +``` + +Args: + name: A unique name for this workspace rule. +""" + +_ANDROID_NDK_HOME = "ANDROID_NDK_HOME" +_ANDROID_SDK_HOME = "ANDROID_SDK_HOME" +_ANDROID_NDK_API_VERSION = "ANDROID_NDK_API_LEVEL" +_ANDROID_SDK_API_VERSION = "ANDROID_SDK_API_LEVEL" +_ANDROID_BUILD_TOOLS_VERSION = "ANDROID_BUILD_TOOLS_VERSION" + +_ANDROID_SDK_REPO_TEMPLATE = """ + native.android_sdk_repository( + name="androidsdk", + path="%s", + api_level=%s, + build_tools_version="%s", + ) +""" + +_ANDROID_NDK_REPO_TEMPLATE = """ + native.android_ndk_repository( + name="androidndk", + path="%s", + api_level=%s, + ) +""" + +def _android_autoconf_impl(repository_ctx): + """Implementation of the android_autoconf repository rule.""" + sdk_home = repository_ctx.os.environ.get(_ANDROID_SDK_HOME) + sdk_api_level = repository_ctx.os.environ.get(_ANDROID_SDK_API_VERSION) + build_tools_version = repository_ctx.os.environ.get( + _ANDROID_BUILD_TOOLS_VERSION, + ) + ndk_home = repository_ctx.os.environ.get(_ANDROID_NDK_HOME) + ndk_api_level = repository_ctx.os.environ.get(_ANDROID_NDK_API_VERSION) + + sdk_rule = "" + if all([sdk_home, sdk_api_level, build_tools_version]): + sdk_rule = _ANDROID_SDK_REPO_TEMPLATE % ( + sdk_home, + sdk_api_level, + build_tools_version, + ) + + ndk_rule = "" + if all([ndk_home, ndk_api_level]): + ndk_rule = _ANDROID_NDK_REPO_TEMPLATE % (ndk_home, ndk_api_level) + + if ndk_rule == "" and sdk_rule == "": + sdk_rule = "pass" + # TODO(xunkai): Add interactive configure script. + + repository_ctx.template( + "BUILD", + Label("//third_party/android:android_configure.BUILD.tpl"), + ) + repository_ctx.template( + "android.bzl", + Label("//third_party/android:android.bzl.tpl"), + substitutions = { + "MAYBE_ANDROID_SDK_REPOSITORY": sdk_rule, + "MAYBE_ANDROID_NDK_REPOSITORY": ndk_rule, + }, + ) + +android_configure = repository_rule( + implementation = _android_autoconf_impl, + environ = [ + _ANDROID_SDK_API_VERSION, + _ANDROID_NDK_API_VERSION, + _ANDROID_BUILD_TOOLS_VERSION, + _ANDROID_NDK_HOME, + _ANDROID_SDK_HOME, + ], +) diff --git a/research/seq_flow_lite/third_party/farmhash.BUILD b/research/seq_flow_lite/third_party/farmhash.BUILD new file mode 100644 index 0000000000000000000000000000000000000000..4b8464684ae61a7650262fe1d00f439a149ed358 --- /dev/null +++ b/research/seq_flow_lite/third_party/farmhash.BUILD @@ -0,0 +1,23 @@ +licenses(["notice"]) # MIT + +exports_files(["COPYING"]) + +config_setting( + name = "windows", + values = { + "cpu": "x64_windows", + }, +) + +cc_library( + name = "farmhash", + srcs = ["src/farmhash.cc"], + hdrs = ["src/farmhash.h"], + # Disable __builtin_expect support on Windows + copts = select({ + ":windows": ["/DFARMHASH_OPTIONAL_BUILTIN_EXPECT"], + "//conditions:default": [], + }), + includes = ["src/."], + visibility = ["//visibility:public"], +) diff --git a/research/seq_flow_lite/third_party/flatbuffers/BUILD b/research/seq_flow_lite/third_party/flatbuffers/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..82bab3ffd9646371869aafa09115ef0bb46d2862 --- /dev/null +++ b/research/seq_flow_lite/third_party/flatbuffers/BUILD @@ -0,0 +1 @@ +# This empty BUILD file is required to make Bazel treat this directory as a package. diff --git a/research/seq_flow_lite/third_party/flatbuffers/BUILD.bazel b/research/seq_flow_lite/third_party/flatbuffers/BUILD.bazel new file mode 100644 index 0000000000000000000000000000000000000000..1ee46f05235bc01cb464b2555fec8776800532c7 --- /dev/null +++ b/research/seq_flow_lite/third_party/flatbuffers/BUILD.bazel @@ -0,0 +1,140 @@ +load("@build_bazel_rules_android//android:rules.bzl", "android_library") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE.txt"]) + +licenses(["notice"]) + +config_setting( + name = "freebsd", + values = {"cpu": "freebsd"}, +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows"}, +) + +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") + +# Public flatc library to compile flatbuffer files at runtime. +cc_library( + name = "flatbuffers", + hdrs = ["//:public_headers"], + linkstatic = 1, + strip_include_prefix = "/include", + visibility = ["//visibility:public"], + deps = ["//src:flatbuffers"], +) + +# Public C++ headers for the Flatbuffers library. +filegroup( + name = "public_headers", + srcs = [ + "include/flatbuffers/base.h", + "include/flatbuffers/code_generators.h", + "include/flatbuffers/flatbuffers.h", + "include/flatbuffers/flexbuffers.h", + "include/flatbuffers/hash.h", + "include/flatbuffers/idl.h", + "include/flatbuffers/minireflect.h", + "include/flatbuffers/reflection.h", + "include/flatbuffers/reflection_generated.h", + "include/flatbuffers/registry.h", + "include/flatbuffers/stl_emulation.h", + "include/flatbuffers/util.h", + ], + visibility = ["//:__subpackages__"], +) + +# Public flatc compiler library. +cc_library( + name = "flatc_library", + linkstatic = 1, + visibility = ["//visibility:public"], + deps = [ + "@flatbuffers//src:flatc_library", + ], +) + +# Public flatc compiler. +cc_binary( + name = "flatc", + linkopts = select({ + ":freebsd": [ + "-lm", + ], + ":windows": [], + "//conditions:default": [ + "-lm", + "-ldl", + ], + }), + visibility = ["//visibility:public"], + deps = [ + "@flatbuffers//src:flatc", + ], +) + +filegroup( + name = "flatc_headers", + srcs = [ + "include/flatbuffers/flatc.h", + ], + visibility = ["//:__subpackages__"], +) + +# Library used by flatbuffer_cc_library rules. +cc_library( + name = "runtime_cc", + hdrs = [ + "include/flatbuffers/base.h", + "include/flatbuffers/flatbuffers.h", + "include/flatbuffers/flexbuffers.h", + "include/flatbuffers/stl_emulation.h", + "include/flatbuffers/util.h", + ], + linkstatic = 1, + strip_include_prefix = "/include", + visibility = ["//visibility:public"], +) + +filegroup( + name = "runtime_py_srcs", + srcs = [ + "python/flatbuffers/__init__.py", + "python/flatbuffers/builder.py", + "python/flatbuffers/compat.py", + "python/flatbuffers/encode.py", + "python/flatbuffers/number_types.py", + "python/flatbuffers/packer.py", + "python/flatbuffers/table.py", + "python/flatbuffers/util.py", + ], +) + +py_library( + name = "runtime_py", + srcs = [":runtime_py_srcs"], + visibility = ["//visibility:public"], +) + +filegroup( + name = "runtime_java_srcs", + srcs = glob(["java/com/google/flatbuffers/**/*.java"]), +) + +java_library( + name = "runtime_java", + srcs = [":runtime_java_srcs"], + visibility = ["//visibility:public"], +) + +android_library( + name = "runtime_android", + srcs = [":runtime_java_srcs"], + visibility = ["//visibility:public"], +) diff --git a/research/seq_flow_lite/third_party/flatbuffers/build_defs.bzl b/research/seq_flow_lite/third_party/flatbuffers/build_defs.bzl new file mode 100644 index 0000000000000000000000000000000000000000..767549fdc76f3ef34c370c31c35a72381db5c649 --- /dev/null +++ b/research/seq_flow_lite/third_party/flatbuffers/build_defs.bzl @@ -0,0 +1,604 @@ +"""BUILD rules for generating flatbuffer files.""" + +load("@build_bazel_rules_android//android:rules.bzl", "android_library") + +flatc_path = "@flatbuffers//:flatc" +zip_files = "@org_tflite_support//tensorflow_lite_support/tools:zip_files" + +DEFAULT_INCLUDE_PATHS = [ + "./", + "$(GENDIR)", + "$(BINDIR)", +] + +DEFAULT_FLATC_ARGS = [ + "--no-union-value-namespacing", + "--gen-object-api", +] + +def flatbuffer_library_public( + name, + srcs, + outs, + language_flag, + out_prefix = "", + includes = [], + include_paths = [], + flatc_args = DEFAULT_FLATC_ARGS, + reflection_name = "", + reflection_visibility = None, + output_to_bindir = False): + """Generates code files for reading/writing the given flatbuffers in the requested language using the public compiler. + + Outs: + filegroup(name): all generated source files. + Fileset([reflection_name]): (Optional) all generated reflection binaries. + + Args: + name: Rule name. + srcs: Source .fbs files. Sent in order to the compiler. + outs: Output files from flatc. + language_flag: Target language flag. One of [-c, -j, -js]. + out_prefix: Prepend this path to the front of all generated files except on + single source targets. Usually is a directory name. + includes: Optional, list of filegroups of schemas that the srcs depend on. + include_paths: Optional, list of paths the includes files can be found in. + flatc_args: Optional, list of additional arguments to pass to flatc. + reflection_name: Optional, if set this will generate the flatbuffer + reflection binaries for the schemas. + reflection_visibility: The visibility of the generated reflection Fileset. + output_to_bindir: Passed to genrule for output to bin directory. + """ + include_paths_cmd = ["-I %s" % (s) for s in include_paths] + + # '$(@D)' when given a single source target will give the appropriate + # directory. Appending 'out_prefix' is only necessary when given a build + # target with multiple sources. + output_directory = ( + ("-o $(@D)/%s" % (out_prefix)) if len(srcs) > 1 else ("-o $(@D)") + ) + genrule_cmd = " ".join([ + "for f in $(SRCS); do", + "$(location %s)" % (flatc_path), + " ".join(flatc_args), + " ".join(include_paths_cmd), + language_flag, + output_directory, + "$$f;", + "done", + ]) + native.genrule( + name = name, + srcs = srcs, + outs = outs, + output_to_bindir = output_to_bindir, + tools = includes + [flatc_path], + cmd = genrule_cmd, + message = "Generating flatbuffer files for %s:" % (name), + ) + if reflection_name: + reflection_genrule_cmd = " ".join([ + "for f in $(SRCS); do", + "$(location %s)" % (flatc_path), + "-b --schema", + " ".join(flatc_args), + " ".join(include_paths_cmd), + language_flag, + output_directory, + "$$f;", + "done", + ]) + reflection_outs = [ + (out_prefix + "%s.bfbs") % (s.replace(".fbs", "").split("/")[-1]) + for s in srcs + ] + native.genrule( + name = "%s_srcs" % reflection_name, + srcs = srcs, + outs = reflection_outs, + output_to_bindir = output_to_bindir, + tools = includes + [flatc_path], + cmd = reflection_genrule_cmd, + message = "Generating flatbuffer reflection binary for %s:" % (name), + ) + # TODO(b/114456773): Make bazel rules proper and supported by flatbuffer + # Have to comment this since FilesetEntry is not supported in bazel + # skylark. + # native.Fileset( + # name = reflection_name, + # out = "%s_out" % reflection_name, + # entries = [ + # native.FilesetEntry(files = reflection_outs), + # ], + # visibility = reflection_visibility, + # ) + +def flatbuffer_cc_library( + name, + srcs, + srcs_filegroup_name = "", + out_prefix = "", + includes = [], + include_paths = [], + flatc_args = DEFAULT_FLATC_ARGS, + visibility = None, + srcs_filegroup_visibility = None, + gen_reflections = False): + '''A cc_library with the generated reader/writers for the given flatbuffer definitions. + + Outs: + filegroup([name]_srcs): all generated .h files. + filegroup(srcs_filegroup_name if specified, or [name]_includes if not): + Other flatbuffer_cc_library's can pass this in for their `includes` + parameter, if they depend on the schemas in this library. + Fileset([name]_reflection): (Optional) all generated reflection binaries. + cc_library([name]): library with sources and flatbuffers deps. + + Remarks: + ** Because the genrule used to call flatc does not have any trivial way of + computing the output list of files transitively generated by includes and + --gen-includes (the default) being defined for flatc, the --gen-includes + flag will not work as expected. The way around this is to add a dependency + to the flatbuffer_cc_library defined alongside the flatc included Fileset. + For example you might define: + + flatbuffer_cc_library( + name = "my_fbs", + srcs = [ "schemas/foo.fbs" ], + includes = [ "//third_party/bazz:bazz_fbs_includes" ], + ) + + In which foo.fbs includes a few files from the Fileset defined at + //third_party/bazz:bazz_fbs_includes. When compiling the library that + includes foo_generated.h, and therefore has my_fbs as a dependency, it + will fail to find any of the bazz *_generated.h files unless you also + add bazz's flatbuffer_cc_library to your own dependency list, e.g.: + + cc_library( + name = "my_lib", + deps = [ + ":my_fbs", + "//third_party/bazz:bazz_fbs" + ], + ) + + Happy dependent Flatbuffering! + + Args: + name: Rule name. + srcs: Source .fbs files. Sent in order to the compiler. + srcs_filegroup_name: Name of the output filegroup that holds srcs. Pass this + filegroup into the `includes` parameter of any other + flatbuffer_cc_library that depends on this one's schemas. + out_prefix: Prepend this path to the front of all generated files. Usually + is a directory name. + includes: Optional, list of filegroups of schemas that the srcs depend on. + ** SEE REMARKS BELOW ** + include_paths: Optional, list of paths the includes files can be found in. + flatc_args: Optional list of additional arguments to pass to flatc + (e.g. --gen-mutable). + visibility: The visibility of the generated cc_library. By default, use the + default visibility of the project. + srcs_filegroup_visibility: The visibility of the generated srcs filegroup. + By default, use the value of the visibility parameter above. + gen_reflections: Optional, if true this will generate the flatbuffer + reflection binaries for the schemas. + ''' + output_headers = [ + (out_prefix + "%s_generated.h") % (s.replace(".fbs", "").split("/")[-1]) + for s in srcs + ] + reflection_name = "%s_reflection" % name if gen_reflections else "" + + flatbuffer_library_public( + name = "%s_srcs" % (name), + srcs = srcs, + outs = output_headers, + language_flag = "-c", + out_prefix = out_prefix, + includes = includes, + include_paths = include_paths, + flatc_args = flatc_args, + reflection_name = reflection_name, + reflection_visibility = visibility, + ) + native.cc_library( + name = name, + hdrs = output_headers, + srcs = output_headers, + features = [ + "-parse_headers", + ], + deps = [ + "@flatbuffers//:runtime_cc", + ], + includes = ["."], + linkstatic = 1, + visibility = visibility, + ) + + # A filegroup for the `srcs`. That is, all the schema files for this + # Flatbuffer set. + native.filegroup( + name = srcs_filegroup_name if srcs_filegroup_name else "%s_includes" % (name), + srcs = srcs, + visibility = srcs_filegroup_visibility if srcs_filegroup_visibility != None else visibility, + ) + +# Custom provider to track dependencies transitively. +FlatbufferInfo = provider( + fields = { + "transitive_srcs": "flatbuffer schema definitions.", + }, +) + +def _flatbuffer_schemas_aspect_impl(target, ctx): + _ignore = [target] + transitive_srcs = depset() + if hasattr(ctx.rule.attr, "deps"): + for dep in ctx.rule.attr.deps: + if FlatbufferInfo in dep: + transitive_srcs = depset(dep[FlatbufferInfo].transitive_srcs, transitive = [transitive_srcs]) + if hasattr(ctx.rule.attr, "srcs"): + for src in ctx.rule.attr.srcs: + if FlatbufferInfo in src: + transitive_srcs = depset(src[FlatbufferInfo].transitive_srcs, transitive = [transitive_srcs]) + for f in src.files: + if f.extension == "fbs": + transitive_srcs = depset([f], transitive = [transitive_srcs]) + return [FlatbufferInfo(transitive_srcs = transitive_srcs)] + +# An aspect that runs over all dependencies and transitively collects +# flatbuffer schema files. +_flatbuffer_schemas_aspect = aspect( + attr_aspects = [ + "deps", + "srcs", + ], + implementation = _flatbuffer_schemas_aspect_impl, +) + +# Rule to invoke the flatbuffer compiler. +def _gen_flatbuffer_srcs_impl(ctx): + outputs = ctx.attr.outputs + include_paths = ctx.attr.include_paths + if ctx.attr.no_includes: + no_includes_statement = ["--no-includes"] + else: + no_includes_statement = [] + + # Need to generate all files in a directory. + if not outputs: + outputs = [ctx.actions.declare_directory("{}_all".format(ctx.attr.name))] + output_directory = outputs[0].path + else: + outputs = [ctx.actions.declare_file(output) for output in outputs] + output_directory = outputs[0].dirname + + deps = depset(ctx.files.srcs + ctx.files.deps, transitive = [ + dep[FlatbufferInfo].transitive_srcs + for dep in ctx.attr.deps + if FlatbufferInfo in dep + ]) + + include_paths_cmd_line = [] + for s in include_paths: + include_paths_cmd_line.extend(["-I", s]) + + for src in ctx.files.srcs: + ctx.actions.run( + inputs = deps, + outputs = outputs, + executable = ctx.executable._flatc, + arguments = [ + ctx.attr.language_flag, + "-o", + output_directory, + # Allow for absolute imports and referencing of generated files. + "-I", + "./", + "-I", + ctx.genfiles_dir.path, + "-I", + ctx.bin_dir.path, + ] + no_includes_statement + + include_paths_cmd_line + [ + "--no-union-value-namespacing", + "--gen-object-api", + src.path, + ], + progress_message = "Generating flatbuffer files for {}:".format(src), + ) + return [ + DefaultInfo(files = depset(outputs)), + ] + +_gen_flatbuffer_srcs = rule( + _gen_flatbuffer_srcs_impl, + attrs = { + "srcs": attr.label_list( + allow_files = [".fbs"], + mandatory = True, + ), + "outputs": attr.string_list( + default = [], + mandatory = False, + ), + "deps": attr.label_list( + default = [], + mandatory = False, + aspects = [_flatbuffer_schemas_aspect], + ), + "include_paths": attr.string_list( + default = [], + mandatory = False, + ), + "language_flag": attr.string( + mandatory = True, + ), + "no_includes": attr.bool( + default = False, + mandatory = False, + ), + "_flatc": attr.label( + default = Label("@flatbuffers//:flatc"), + executable = True, + cfg = "host", + ), + }, + output_to_genfiles = True, +) + +def _concat_flatbuffer_py_srcs_impl(ctx): + # Merge all generated python files. The files are concatenated and the + # import statements are removed. Finally we import the flatbuffer runtime + # library. + command = "find '%s' -name '*.py' -exec cat {} + | sed '/import flatbuffers/d'" + command += " | sed '1s/^/import flatbuffers\\'$'\\n/' > %s" + ctx.actions.run_shell( + inputs = ctx.attr.deps[0].files, + outputs = [ctx.outputs.out], + command = command % ( + ctx.attr.deps[0].files.to_list()[0].path, + ctx.outputs.out.path, + ), + ) + +_concat_flatbuffer_py_srcs = rule( + _concat_flatbuffer_py_srcs_impl, + attrs = { + "deps": attr.label_list(mandatory = True), + }, + output_to_genfiles = True, + outputs = {"out": "%{name}.py"}, +) + +def flatbuffer_py_library( + name, + srcs, + deps = [], + include_paths = []): + """A py_library with the generated reader/writers for the given schema. + + This rule assumes that the schema files define non-conflicting names, so that + they can be merged in a single file. This is e.g. the case if only a single + namespace is used. + The rule call the flatbuffer compiler for all schema files and merges the + generated python files into a single file that is wrapped in a py_library. + + Args: + name: Rule name. (required) + srcs: List of source .fbs files. (required) + deps: List of dependencies. + include_paths: Optional, list of paths the includes files can be found in. + """ + all_srcs = "{}_srcs".format(name) + _gen_flatbuffer_srcs( + name = all_srcs, + srcs = srcs, + language_flag = "--python", + deps = deps, + include_paths = include_paths, + ) + all_srcs_no_include = "{}_srcs_no_include".format(name) + _gen_flatbuffer_srcs( + name = all_srcs_no_include, + srcs = srcs, + language_flag = "--python", + deps = deps, + no_includes = True, + include_paths = include_paths, + ) + concat_py_srcs = "{}_generated".format(name) + _concat_flatbuffer_py_srcs( + name = concat_py_srcs, + deps = [ + ":{}".format(all_srcs_no_include), + ], + ) + native.py_library( + name = name, + srcs = [ + ":{}".format(concat_py_srcs), + ], + srcs_version = "PY2AND3", + deps = deps, + ) + +def flatbuffer_java_library( + name, + srcs, + custom_package = "", + package_prefix = "", + include_paths = DEFAULT_INCLUDE_PATHS, + flatc_args = DEFAULT_FLATC_ARGS, + visibility = None): + """A java library with the generated reader/writers for the given flatbuffer definitions. + + Args: + name: Rule name. (required) + srcs: List of source .fbs files including all includes. (required) + custom_package: Package name of generated Java files. If not specified + namespace in the schema files will be used. (optional) + package_prefix: like custom_package, but prefixes to the existing + namespace. (optional) + include_paths: List of paths that includes files can be found in. (optional) + flatc_args: List of additional arguments to pass to flatc. (optional) + visibility: Visibility setting for the java_library rule. (optional) + """ + out_srcjar = "java_%s_all.srcjar" % name + flatbuffer_java_srcjar( + name = "%s_srcjar" % name, + srcs = srcs, + out = out_srcjar, + custom_package = custom_package, + flatc_args = flatc_args, + include_paths = include_paths, + package_prefix = package_prefix, + ) + + native.filegroup( + name = "%s.srcjar" % name, + srcs = [out_srcjar], + ) + + native.java_library( + name = name, + srcs = [out_srcjar], + javacopts = ["-source 7 -target 7"], + deps = [ + "@flatbuffers//:runtime_java", + ], + visibility = visibility, + ) + +def flatbuffer_java_srcjar( + name, + srcs, + out, + custom_package = "", + package_prefix = "", + include_paths = DEFAULT_INCLUDE_PATHS, + flatc_args = DEFAULT_FLATC_ARGS): + """Generate flatbuffer Java source files. + + Args: + name: Rule name. (required) + srcs: List of source .fbs files including all includes. (required) + out: Output file name. (required) + custom_package: Package name of generated Java files. If not specified + namespace in the schema files will be used. (optional) + package_prefix: like custom_package, but prefixes to the existing + namespace. (optional) + include_paths: List of paths that includes files can be found in. (optional) + flatc_args: List of additional arguments to pass to flatc. (optional) + """ + command_fmt = """set -e + tmpdir=$(@D) + schemas=$$tmpdir/schemas + java_root=$$tmpdir/java + rm -rf $$schemas + rm -rf $$java_root + mkdir -p $$schemas + mkdir -p $$java_root + + for src in $(SRCS); do + dest=$$schemas/$$src + rm -rf $$(dirname $$dest) + mkdir -p $$(dirname $$dest) + if [ -z "{custom_package}" ] && [ -z "{package_prefix}" ]; then + cp -f $$src $$dest + else + if [ -z "{package_prefix}" ]; then + sed -e "s/namespace\\s.*/namespace {custom_package};/" $$src > $$dest + else + sed -e "s/namespace \\([^;]\\+\\);/namespace {package_prefix}.\\1;/" $$src > $$dest + fi + fi + done + + flatc_arg_I="-I $$tmpdir/schemas" + for include_path in {include_paths}; do + flatc_arg_I="$$flatc_arg_I -I $$schemas/$$include_path" + done + + flatc_additional_args= + for arg in {flatc_args}; do + flatc_additional_args="$$flatc_additional_args $$arg" + done + + for src in $(SRCS); do + $(location {flatc_path}) $$flatc_arg_I --java $$flatc_additional_args -o $$java_root $$schemas/$$src + done + + $(location {zip_files}) -export_zip_path=$@ -file_directory=$$java_root + """ + genrule_cmd = command_fmt.format( + package_name = native.package_name(), + custom_package = custom_package, + package_prefix = package_prefix, + flatc_path = flatc_path, + zip_files = zip_files, + include_paths = " ".join(include_paths), + flatc_args = " ".join(flatc_args), + ) + + native.genrule( + name = name, + srcs = srcs, + outs = [out], + tools = [flatc_path, zip_files], + cmd = genrule_cmd, + ) + +def flatbuffer_android_library( + name, + srcs, + custom_package = "", + package_prefix = "", + include_paths = DEFAULT_INCLUDE_PATHS, + flatc_args = DEFAULT_FLATC_ARGS, + visibility = None): + """An android_library with the generated reader/writers for the given flatbuffer definitions. + + Args: + name: Rule name. (required) + srcs: List of source .fbs files including all includes. (required) + custom_package: Package name of generated Java files. If not specified + namespace in the schema files will be used. (optional) + package_prefix: like custom_package, but prefixes to the existing + namespace. (optional) + include_paths: List of paths that includes files can be found in. (optional) + flatc_args: List of additional arguments to pass to flatc. (optional) + visibility: Visibility setting for the android_library rule. (optional) + """ + out_srcjar = "android_%s_all.srcjar" % name + flatbuffer_java_srcjar( + name = "%s_srcjar" % name, + srcs = srcs, + out = out_srcjar, + custom_package = custom_package, + flatc_args = flatc_args, + include_paths = include_paths, + package_prefix = package_prefix, + ) + + native.filegroup( + name = "%s.srcjar" % name, + srcs = [out_srcjar], + ) + + # To support org.checkerframework.dataflow.qual.Pure. + checkerframework_annotations = [ + "@org_checkerframework_qual", + ] if "--java-checkerframework" in flatc_args else [] + + android_library( + name = name, + srcs = [out_srcjar], + javacopts = ["-source 7 -target 7"], + visibility = visibility, + deps = [ + "@flatbuffers//:runtime_android", + ] + checkerframework_annotations, + ) diff --git a/research/seq_flow_lite/third_party/flatbuffers/workspace.bzl b/research/seq_flow_lite/third_party/flatbuffers/workspace.bzl new file mode 100644 index 0000000000000000000000000000000000000000..dea463f2e917c7257116846efa1738d2b1503a6f --- /dev/null +++ b/research/seq_flow_lite/third_party/flatbuffers/workspace.bzl @@ -0,0 +1,19 @@ +"""Loads the Flatbuffers library, used by TF Lite.""" + +load("//third_party:repo.bzl", "third_party_http_archive") + +def repo(): + third_party_http_archive( + name = "flatbuffers", + strip_prefix = "flatbuffers-1.12.0", + sha256 = "62f2223fb9181d1d6338451375628975775f7522185266cd5296571ac152bc45", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz", + "https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz", + ], + build_file = "//third_party/flatbuffers:BUILD.bazel", + delete = ["build_defs.bzl"], + link_files = { + "//third_party/flatbuffers:build_defs.bzl": "build_defs.bzl", + }, + ) diff --git a/research/seq_flow_lite/third_party/icu.BUILD b/research/seq_flow_lite/third_party/icu.BUILD new file mode 100644 index 0000000000000000000000000000000000000000..61f3326041d16235becc1b1e7150d5a8498cefa3 --- /dev/null +++ b/research/seq_flow_lite/third_party/icu.BUILD @@ -0,0 +1,29 @@ +licenses(["notice"]) +exports_files(["LICENSE"]) + +package( + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "icu4c", + srcs = glob( + [ + "icu4c/source/common/*.c", + "icu4c/source/common/*.cpp", + "icu4c/source/stubdata/*.cpp", + ], + ), + hdrs = glob([ + "icu4c/source/common/*.h", + ]) + glob([ + "icu4c/source/common/unicode/*.h", + ]), + copts = [ + "-DU_COMMON_IMPLEMENTATION", + "-Wno-deprecated-declarations", + ], + linkopts = [ + "-ldl", + ], +) diff --git a/research/seq_flow_lite/third_party/protobuf.BUILD b/research/seq_flow_lite/third_party/protobuf.BUILD new file mode 100644 index 0000000000000000000000000000000000000000..b8e94969c6827d7651147e080d15108ac3c3d48f --- /dev/null +++ b/research/seq_flow_lite/third_party/protobuf.BUILD @@ -0,0 +1,40 @@ +_CHECK_VERSION = """ +PROTOC_VERSION=$$($(location @protobuf_protoc//:protoc_bin) --version \ + | cut -d' ' -f2 | sed -e 's/\\./ /g') +PROTOC_VERSION=$$(printf '%d%03d%03d' $${PROTOC_VERSION}) +TF_PROTO_VERSION=$$(grep '#define PROTOBUF_MIN_PROTOC_VERSION' \ + $(location tf_includes/google/protobuf/port_def.inc) | cut -d' ' -f3) +if [ "$${PROTOC_VERSION}" -ne "$${TF_PROTO_VERSION}" ]; then + echo !!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1>&2 + echo Your protoc version does not match the tensorflow proto header \ + required version: "$${PROTOC_VERSION}" vs. "$${TF_PROTO_VERSION}" 1>&2 + echo Please update the PROTOC_VERSION in your WORKSPACE file. 1>&2 + echo !!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1>&2 + false +else + touch $@ +fi +""" + +genrule( + name = "compare_protobuf_version", + outs = ["versions_compared"], + srcs = [ + "tf_includes/google/protobuf/port_def.inc", + ], + tools = ["@protobuf_protoc//:protoc_bin"], + cmd = _CHECK_VERSION, +) + +cc_library( + name = "includes", + data = [":versions_compared"], + hdrs = glob([ + "tf_includes/google/protobuf/*.h", + "tf_includes/google/protobuf/*.inc", + "tf_includes/google/protobuf/**/*.h", + "tf_includes/google/protobuf/**/*.inc", + ]), + includes = ["tf_includes"], + visibility = ["//visibility:public"], +) diff --git a/official/benchmark/models/__init__.py b/research/seq_flow_lite/third_party/py/BUILD similarity index 100% rename from official/benchmark/models/__init__.py rename to research/seq_flow_lite/third_party/py/BUILD diff --git a/research/seq_flow_lite/third_party/py/BUILD.tpl b/research/seq_flow_lite/third_party/py/BUILD.tpl new file mode 100644 index 0000000000000000000000000000000000000000..cc0e013bdbf58485bc5980ac18904a0d4a92d988 --- /dev/null +++ b/research/seq_flow_lite/third_party/py/BUILD.tpl @@ -0,0 +1,31 @@ +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +# Point both runtimes to the same python binary to ensure we always +# use the python binary specified by ./configure.py script. +load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") + +py_runtime( + name = "py2_runtime", + interpreter_path = "%{PYTHON_BIN_PATH}", + python_version = "PY2", +) + +py_runtime( + name = "py3_runtime", + interpreter_path = "%{PYTHON_BIN_PATH}", + python_version = "PY3", +) + +py_runtime_pair( + name = "py_runtime_pair", + py2_runtime = ":py2_runtime", + py3_runtime = ":py3_runtime", +) + +toolchain( + name = "py_toolchain", + toolchain = ":py_runtime_pair", + toolchain_type = "@bazel_tools//tools/python:toolchain_type", +) diff --git a/research/seq_flow_lite/third_party/py/python_configure.bzl b/research/seq_flow_lite/third_party/py/python_configure.bzl new file mode 100644 index 0000000000000000000000000000000000000000..6601d7f2a3860dd6044c7d0448bfbef82d6e1b1b --- /dev/null +++ b/research/seq_flow_lite/third_party/py/python_configure.bzl @@ -0,0 +1,71 @@ +"""Repository rule for Python autoconfiguration. + +`python_configure` depends on the following environment variables: + + * `PYTHON_BIN_PATH`: location of python binary. +""" + +_PYTHON_BIN_PATH = "PYTHON_BIN_PATH" + +def _tpl(repository_ctx, tpl, substitutions = {}, out = None): + if not out: + out = tpl + repository_ctx.template( + out, + Label("//third_party/py:%s.tpl" % tpl), + substitutions, + ) + +def _fail(msg): + """Output failure message when auto configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) + +def _get_python_bin(repository_ctx): + """Gets the python bin path.""" + python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) + if python_bin != None: + return python_bin + python_bin_path = repository_ctx.which("python") + if python_bin_path != None: + return str(python_bin_path) + _fail("Cannot find python in PATH, please make sure " + + "python is installed and add its directory in PATH, or --define " + + "%s='/something/else'.\nPATH=%s" % ( + _PYTHON_BIN_PATH, + repository_ctx.os.environ.get("PATH", ""), + )) + +def _create_local_python_repository(repository_ctx): + """Creates the repository containing files set up to build with Python.""" + python_bin = _get_python_bin(repository_ctx) + _tpl(repository_ctx, "BUILD", { + "%{PYTHON_BIN_PATH}": python_bin, + }) + +def _python_autoconf_impl(repository_ctx): + """Implementation of the python_autoconf repository rule.""" + _create_local_python_repository(repository_ctx) + +python_configure = repository_rule( + implementation = _python_autoconf_impl, + environ = [ + _PYTHON_BIN_PATH, + ], +) +"""Detects and configures the local Python toolchain. + +Add the following to your WORKSPACE FILE: + +```python +load("//third_party/py:python_configure.bzl", "python_configure") + +python_configure(name = "local_config_py_toolchain") + +register_toolchains("@local_config_py_toolchain//:py_toolchain") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/research/seq_flow_lite/third_party/pybind11.BUILD b/research/seq_flow_lite/third_party/pybind11.BUILD new file mode 100644 index 0000000000000000000000000000000000000000..2f1ada6193c761aec254e32433df81b606e87b71 --- /dev/null +++ b/research/seq_flow_lite/third_party/pybind11.BUILD @@ -0,0 +1,25 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "pybind11", + hdrs = glob( + include = [ + "include/pybind11/*.h", + "include/pybind11/detail/*.h", + ], + exclude = [ + "include/pybind11/common.h", + "include/pybind11/eigen.h", + ], + ), + copts = [ + "-fexceptions", + "-Wno-undefined-inline", + "-Wno-pragma-once-outside-header", + ], + includes = ["include"], + strip_include_prefix = "include", + deps = [ + "@org_tensorflow//third_party/python_runtime:headers", + ], +) diff --git a/research/seq_flow_lite/third_party/python_runtime/BUILD b/research/seq_flow_lite/third_party/python_runtime/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..8da75d22746d16d63986f74fd6fe6b0969c53d91 --- /dev/null +++ b/research/seq_flow_lite/third_party/python_runtime/BUILD @@ -0,0 +1,8 @@ +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +alias( + name = "headers", + actual = "@local_config_python//:python_headers", +) diff --git a/research/seq_flow_lite/third_party/repo.bzl b/research/seq_flow_lite/third_party/repo.bzl new file mode 100644 index 0000000000000000000000000000000000000000..c9c6a8347a37fcf16f5ea9a11484b5b5ba70ec67 --- /dev/null +++ b/research/seq_flow_lite/third_party/repo.bzl @@ -0,0 +1,152 @@ +# Copyright 2020 The TensorFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for defining TensorFlow Lite Support Bazel dependencies.""" + +_SINGLE_URL_WHITELIST = [] + +def _is_windows(ctx): + return ctx.os.name.lower().find("windows") != -1 + +def _wrap_bash_cmd(ctx, cmd): + if _is_windows(ctx): + bazel_sh = _get_env_var(ctx, "BAZEL_SH") + if not bazel_sh: + fail("BAZEL_SH environment variable is not set") + cmd = [bazel_sh, "-l", "-c", " ".join(["\"%s\"" % s for s in cmd])] + return cmd + +def _get_env_var(ctx, name): + if name in ctx.os.environ: + return ctx.os.environ[name] + else: + return None + +# Checks if we should use the system lib instead of the bundled one +def _use_system_lib(ctx, name): + syslibenv = _get_env_var(ctx, "TF_SYSTEM_LIBS") + if syslibenv: + for n in syslibenv.strip().split(","): + if n.strip() == name: + return True + return False + +# Executes specified command with arguments and calls 'fail' if it exited with +# non-zero code +def _execute_and_check_ret_code(repo_ctx, cmd_and_args): + result = repo_ctx.execute(cmd_and_args, timeout = 60) + if result.return_code != 0: + fail(("Non-zero return code({1}) when executing '{0}':\n" + "Stdout: {2}\n" + + "Stderr: {3}").format( + " ".join([str(x) for x in cmd_and_args]), + result.return_code, + result.stdout, + result.stderr, + )) + +# Apply a patch_file to the repository root directory +# Runs 'patch -p1' on both Windows and Unix. +def _apply_patch(ctx, patch_file): + patch_command = ["patch", "-p1", "-d", ctx.path("."), "-i", ctx.path(patch_file)] + cmd = _wrap_bash_cmd(ctx, patch_command) + _execute_and_check_ret_code(ctx, cmd) + +def _apply_delete(ctx, paths): + for path in paths: + if path.startswith("/"): + fail("refusing to rm -rf path starting with '/': " + path) + if ".." in path: + fail("refusing to rm -rf path containing '..': " + path) + cmd = _wrap_bash_cmd(ctx, ["rm", "-rf"] + [ctx.path(path) for path in paths]) + _execute_and_check_ret_code(ctx, cmd) + +def _third_party_http_archive(ctx): + """Downloads and creates Bazel repos for dependencies. + + This is a swappable replacement for both http_archive() and + new_http_archive() that offers some additional features. It also helps + ensure best practices are followed. + """ + if ("mirror.tensorflow.org" not in ctx.attr.urls[0] and + (len(ctx.attr.urls) < 2 and + ctx.attr.name not in _SINGLE_URL_WHITELIST.to_list())): + fail("third_party_http_archive(urls) must have redundant URLs. The " + + "mirror.tensorflow.org URL must be present and it must come first. " + + "Even if you don't have permission to mirror the file, please " + + "put the correctly formatted mirror URL there anyway, because " + + "someone will come along shortly thereafter and mirror the file.") + + use_syslib = _use_system_lib(ctx, ctx.attr.name) + + # Use "BUILD.bazel" to avoid conflict with third party projects that contain a + # file or directory called "BUILD" + buildfile_path = ctx.path("BUILD.bazel") + + if use_syslib: + if ctx.attr.system_build_file == None: + fail("Bazel was configured with TF_SYSTEM_LIBS to use a system " + + "library for %s, but no system build file for %s was configured. " + + "Please add a system_build_file attribute to the repository rule" + + "for %s." % (ctx.attr.name, ctx.attr.name, ctx.attr.name)) + ctx.symlink(Label(ctx.attr.system_build_file), buildfile_path) + + else: + ctx.download_and_extract( + ctx.attr.urls, + "", + ctx.attr.sha256, + ctx.attr.type, + ctx.attr.strip_prefix, + ) + if ctx.attr.delete: + _apply_delete(ctx, ctx.attr.delete) + if ctx.attr.patch_file != None: + _apply_patch(ctx, ctx.attr.patch_file) + ctx.symlink(Label(ctx.attr.build_file), buildfile_path) + + link_dict = {} + if use_syslib: + link_dict.update(ctx.attr.system_link_files) + + for internal_src, external_dest in ctx.attr.link_files.items(): + # if syslib and link exists in both, use the system one + if external_dest not in link_dict.values(): + link_dict[internal_src] = external_dest + + for internal_src, external_dest in link_dict.items(): + ctx.symlink(Label(internal_src), ctx.path(external_dest)) + +# For link_files, specify each dict entry as: +# "//path/to/source:file": "localfile" +third_party_http_archive = repository_rule( + attrs = { + "sha256": attr.string(mandatory = True), + "urls": attr.string_list( + mandatory = True, + allow_empty = False, + ), + "strip_prefix": attr.string(), + "type": attr.string(), + "delete": attr.string_list(), + "build_file": attr.string(mandatory = True), + "system_build_file": attr.string(mandatory = False), + "patch_file": attr.label(), + "link_files": attr.string_dict(), + "system_link_files": attr.string_dict(), + }, + environ = [ + "TF_SYSTEM_LIBS", + ], + implementation = _third_party_http_archive, +) diff --git a/research/seq_flow_lite/third_party/utf.BUILD b/research/seq_flow_lite/third_party/utf.BUILD new file mode 100644 index 0000000000000000000000000000000000000000..7ac78a968dfc7854a16c5de62b1e7ef272e45ab6 --- /dev/null +++ b/research/seq_flow_lite/third_party/utf.BUILD @@ -0,0 +1,39 @@ +cc_library( + name = "utf", + srcs = [ + "libutf/rune.c", + "libutf/runestrcat.c", + "libutf/runestrchr.c", + "libutf/runestrcmp.c", + "libutf/runestrcpy.c", + "libutf/runestrdup.c", + "libutf/runestrecpy.c", + "libutf/runestrlen.c", + "libutf/runestrncat.c", + "libutf/runestrncmp.c", + "libutf/runestrncpy.c", + "libutf/runestrrchr.c", + "libutf/runestrstr.c", + "libutf/runetype.c", + "libutf/utfecpy.c", + "libutf/utflen.c", + "libutf/utfnlen.c", + "libutf/utfrrune.c", + "libutf/utfrune.c", + "libutf/utfutf.c", + ], + hdrs = [ + "libutf/utf.h", + "libutf/utfdef.h", + "libutf/plan9.h", + ], + includes = [ + ".", + "libutf", + ], + copts = [ + "-Wno-parentheses", + ], + visibility = ["//visibility:public"], +) + diff --git a/research/seq_flow_lite/trainer.py b/research/seq_flow_lite/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..a426c40b40398e4fc040e93a829628d31d65b9b2 --- /dev/null +++ b/research/seq_flow_lite/trainer.py @@ -0,0 +1,178 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""A utility for PRADO model to do train, eval, inference and model export.""" + +import importlib +import json + +from absl import app +from absl import flags +from absl import logging +import tensorflow.compat.v1 as tf + +import input_fn_reader # import root module +import metric_functions # import root module + +tf.disable_v2_behavior() + +FLAGS = flags.FLAGS + +flags.DEFINE_string("config_path", None, "Path to a RunnerConfig.") +flags.DEFINE_enum("runner_mode", None, ["train", "train_and_eval", "eval"], + "Runner mode.") +flags.DEFINE_string("master", None, "TensorFlow master URL.") +flags.DEFINE_string( + "output_dir", None, + "The output directory where the model checkpoints will be written.") +flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.") +flags.DEFINE_integer( + "num_tpu_cores", 8, + "Only used if `use_tpu` is True. Total number of TPU cores to use.") + + +def load_runner_config(): + with tf.gfile.GFile(FLAGS.config_path, "r") as f: + return json.loads(f.read()) + + +def create_model(model, model_config, features, mode): + """Creates a sequence labeling model.""" + keras_model = model.Encoder(model_config, mode) + logits = keras_model(features["projection"], features["seq_length"]) + if mode != tf.estimator.ModeKeys.PREDICT: + if not model_config["multilabel"]: + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=features["label"], logits=logits) + else: + loss = tf.nn.sigmoid_cross_entropy_with_logits( + labels=features["label"], logits=logits) + loss = tf.reduce_mean(loss) + loss += tf.add_n(keras_model.losses) + else: + loss = None + + return (loss, logits) + + +def create_optimizer(loss, runner_config, params): + """Returns a train_op using Adam optimizer.""" + learning_rate = tf.train.exponential_decay( + learning_rate=runner_config["learning_rate"], + global_step=tf.train.get_global_step(), + decay_steps=runner_config["learning_rate_decay_steps"], + decay_rate=runner_config["learning_rate_decay_rate"], + staircase=True) + optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) + if params["use_tpu"]: + optimizer = tf.tpu.CrossShardOptimizer(optimizer) + + return optimizer.minimize(loss, global_step=tf.train.get_global_step()) + + +def model_fn_builder(runner_config): + """Returns `model_fn` closure for TPUEstimator.""" + + rel_module_path = "" # empty base dir + model = importlib.import_module(rel_module_path + runner_config["name"]) + + def model_fn(features, mode, params): + """The `model_fn` for TPUEstimator.""" + label_ids = None + if mode != tf.estimator.ModeKeys.PREDICT: + label_ids = features["label"] + + model_config = runner_config["model_config"] + loss, logits = create_model(model, model_config, features, mode) + + if mode == tf.estimator.ModeKeys.TRAIN: + train_op = create_optimizer(loss, runner_config, params) + return tf.compat.v1.estimator.tpu.TPUEstimatorSpec( + mode=mode, loss=loss, train_op=train_op) + elif mode == tf.estimator.ModeKeys.EVAL: + if not runner_config["model_config"]["multilabel"]: + metric_fn = metric_functions.classification_metric + else: + metric_fn = metric_functions.labeling_metric + + eval_metrics = (metric_fn, [loss, label_ids, logits]) + return tf.compat.v1.estimator.tpu.TPUEstimatorSpec( + mode=mode, loss=loss, eval_metrics=eval_metrics) + elif mode == tf.estimator.ModeKeys.PREDICT: + predictions = {"logits": logits} + if not runner_config["model_config"]["multilabel"]: + predictions["predictions"] = tf.nn.softmax(logits) + else: + predictions["predictions"] = tf.math.sigmoid(logits) + return tf.compat.v1.estimator.EstimatorSpec( + mode=mode, predictions=predictions) + else: + assert False, "Expected to be called in TRAIN, EVAL, or PREDICT mode." + + return model_fn + + +def main(_): + runner_config = load_runner_config() + + if FLAGS.output_dir: + tf.gfile.MakeDirs(FLAGS.output_dir) + + is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2 + run_config = tf.estimator.tpu.RunConfig( + master=FLAGS.master, + model_dir=FLAGS.output_dir, + save_checkpoints_steps=runner_config["save_checkpoints_steps"], + keep_checkpoint_max=20, + tpu_config=tf.estimator.tpu.TPUConfig( + iterations_per_loop=runner_config["iterations_per_loop"], + num_shards=FLAGS.num_tpu_cores, + per_host_input_for_training=is_per_host)) + + model_fn = model_fn_builder(runner_config) + + # If TPU is not available, this will fall back to normal Estimator on CPU + # or GPU. + batch_size = runner_config["batch_size"] + estimator = tf.estimator.tpu.TPUEstimator( + use_tpu=FLAGS.use_tpu, + model_fn=model_fn, + config=run_config, + train_batch_size=batch_size, + eval_batch_size=batch_size, + predict_batch_size=batch_size) + + if FLAGS.runner_mode == "train": + train_input_fn = input_fn_reader.create_input_fn( + runner_config=runner_config, + mode=tf.estimator.ModeKeys.TRAIN, + drop_remainder=True) + estimator.train( + input_fn=train_input_fn, max_steps=runner_config["train_steps"]) + elif FLAGS.runner_mode == "eval": + # TPU needs fixed shapes, so if the last batch is smaller, we drop it. + eval_input_fn = input_fn_reader.create_input_fn( + runner_config=runner_config, + mode=tf.estimator.ModeKeys.EVAL, + drop_remainder=True) + + for _ in tf.train.checkpoints_iterator(FLAGS.output_dir, timeout=600): + result = estimator.evaluate(input_fn=eval_input_fn) + for key in sorted(result): + logging.info(" %s = %s", key, str(result[key])) + + +if __name__ == "__main__": + app.run(main) diff --git a/research/seq_flow_lite/utils/BUILD b/research/seq_flow_lite/utils/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..c315d831656648c86677ddfc270009c1ee29014a --- /dev/null +++ b/research/seq_flow_lite/utils/BUILD @@ -0,0 +1,25 @@ +py_strict_library = py_library + +licenses(["notice"]) + +package( + default_visibility = ["//:friends"], # sequence projection +) + +py_strict_library( + name = "tflite_utils", + srcs = ["tflite_utils.py"], + srcs_version = "PY3", + deps = [ + # package tensorflow + ], +) + +py_strict_library( + name = "misc_utils", + srcs = ["misc_utils.py"], + srcs_version = "PY3", + deps = [ + # package tensorflow + ], +) diff --git a/research/seq_flow_lite/utils/misc_utils.py b/research/seq_flow_lite/utils/misc_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..21eef7e38a200082bdcd35497cc111c8ea0257cd --- /dev/null +++ b/research/seq_flow_lite/utils/misc_utils.py @@ -0,0 +1,49 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""A module for miscelaneous utils.""" +import tensorflow as tf + + +def random_substr(str_tensor, max_words): + """Select random substring if the input has more than max_words.""" + word_batch_r = tf.strings.split(str_tensor) + row_splits = word_batch_r.row_splits + words = word_batch_r.values + start_idx = row_splits[:-1] + end_idx = row_splits[1:] + words_per_example = end_idx - start_idx + ones = tf.ones_like(end_idx) + max_val = tf.maximum(ones, words_per_example - max_words) + max_words_batch = tf.reduce_max(words_per_example) + rnd = tf.random.uniform( + tf.shape(start_idx), minval=0, maxval=max_words_batch, dtype=tf.int64) + off_start_idx = tf.math.floormod(rnd, max_val) + new_words_per_example = tf.where( + tf.equal(max_val, 1), words_per_example, ones * max_words) + new_start_idx = start_idx + off_start_idx + new_end_idx = new_start_idx + new_words_per_example + indices = tf.expand_dims(tf.range(tf.size(words), dtype=tf.int64), axis=0) + within_limit = tf.logical_and( + tf.greater_equal(indices, tf.expand_dims(new_start_idx, axis=1)), + tf.less(indices, tf.expand_dims(new_end_idx, axis=1))) + keep_indices = tf.reduce_any(within_limit, axis=0) + keep_indices = tf.cast(keep_indices, dtype=tf.int32) + _, selected_words = tf.dynamic_partition(words, keep_indices, 2) + row_splits = tf.math.cumsum(new_words_per_example) + row_splits = tf.concat([[0], row_splits], axis=0) + new_tensor = tf.RaggedTensor.from_row_splits( + values=selected_words, row_splits=row_splits) + return tf.strings.reduce_join(new_tensor, axis=1, separator=" ") diff --git a/research/seq_flow_lite/utils/tflite_utils.py b/research/seq_flow_lite/utils/tflite_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..daabefa837969099c958773b7fe752cfa4afb4c9 --- /dev/null +++ b/research/seq_flow_lite/utils/tflite_utils.py @@ -0,0 +1,87 @@ +# Copyright 2020 The TensorFlow Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +"""Utils to convert to a TFLite model.""" +import tensorflow.compat.v1 as tf + + +def _dump_graph_in_text_format(filename, graph_def): + """Dump a tensorflow graph in readable text format.""" + f = open(filename, 'w') + + for node in graph_def.node: + f.write('Node: %s (%s)\n' % (node.name, node.op)) + for input_name in node.input: + f.write('\tInput: %s\n' % input_name) + f.close() + + +class InterpreterWithCustomOps(tf.lite.Interpreter): + + def __init__(self, model_content, custom_op_registerers): + self._custom_op_registerers = custom_op_registerers + super(InterpreterWithCustomOps, self).__init__(model_content=model_content) + + +def set_output_quantized_for_custom_ops(graph_def): + """Set output types/quantized flag for custom/unsupported ops.""" + quantized_custom_ops = { + 'SequenceStringProjection': [tf.float32.as_datatype_enum], + 'SequenceStringProjectionV2': [tf.float32.as_datatype_enum], + 'PoolingOp': [tf.float32.as_datatype_enum], + 'ExpectedValueOp': [tf.float32.as_datatype_enum], + 'LayerNorm': [tf.float32.as_datatype_enum], + 'UniformCausalAttn': [tf.float32.as_datatype_enum], + } + custom_op_renames = { + 'SequenceStringProjection': 'SEQUENCE_STRING_PROJECTION', + 'SequenceStringProjectionV2': 'SEQUENCE_STRING_PROJECTION_V2', + } + + for node in graph_def.node: + if node.op in quantized_custom_ops: + node.attr['_output_quantized'].b = True + node.attr['_output_types'].list.type[:] = quantized_custom_ops[node.op] + if node.op in custom_op_renames: + node.op = custom_op_renames[node.op] + + +def generate_tflite(session, graph, input_tensors, output_tensors): + """Generate TFLite model from a session, graph and input/output tensors.""" + output_nodes = [tensor.name.split(':')[0] for tensor in output_tensors] + graph_def = tf.graph_util.convert_variables_to_constants( + session, graph.as_graph_def(), output_nodes) + + set_output_quantized_for_custom_ops(graph_def) + + # TODO(b/171063452): Bug needs to be fixed to handle this correctly. + # def _node_name(tensor): + # return tensor.name.split(':')[0] + + # input_arrays_with_shape = [ + # (_node_name(tensor), None) for tensor in input_tensors + # ] + # output_arrays = [_node_name(tensor) for tensor in output_tensors] + # converter = tf.lite.TFLiteConverter(graph_def, None, None, + # input_arrays_with_shape, output_arrays) + converter = tf.lite.TFLiteConverter(graph_def, input_tensors, output_tensors) + converter.inference_type = tf.uint8 + converter.default_ranges_stats = (127.5, 127.5) + converter.quantized_input_stats = { + tensor.op.name: (127.5, 127.5) for tensor in input_tensors + } + converter.allow_custom_ops = True + converter.experimental_new_converter = False + return converter.convert() diff --git a/research/setup.py b/research/setup.py deleted file mode 100644 index 16eeb279948daa438bea5b94edbbc9105f9789ab..0000000000000000000000000000000000000000 --- a/research/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Setup script for object_detection.""" - -from setuptools import find_packages -from setuptools import setup - - -REQUIRED_PACKAGES = ['Pillow>=1.0', 'Matplotlib>=2.1', 'Cython>=0.28.1'] - -setup( - name='object_detection', - version='0.1', - install_requires=REQUIRED_PACKAGES, - include_package_data=True, - packages=[p for p in find_packages() if p.startswith('object_detection')], - description='Tensorflow Object Detection Library', -) diff --git a/research/skip_thoughts/.gitignore b/research/skip_thoughts/.gitignore deleted file mode 100644 index 91cb861a9c87147ac86eda5434e4af270ea7b1dc..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -/bazel-bin -/bazel-ci_build-cache -/bazel-genfiles -/bazel-out -/bazel-skip_thoughts -/bazel-testlogs -/bazel-tf -*.pyc diff --git a/research/skip_thoughts/README.md b/research/skip_thoughts/README.md deleted file mode 100644 index b3a1de73fabfc055ad7a0741d9466b5409168dce..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/README.md +++ /dev/null @@ -1,479 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Skip-Thought Vectors - -This is a TensorFlow implementation of the model described in: - -Jamie Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel, -Antonio Torralba, Raquel Urtasun, Sanja Fidler. -[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf). -*In NIPS, 2015.* - - -## Contact -***Code author:*** Chris Shallue - -***Pull requests and issues:*** @cshallue - -## Contents -* [Model Overview](#model-overview) -* [Getting Started](#getting-started) - * [Install Required Packages](#install-required-packages) - * [Download Pretrained Models (Optional)](#download-pretrained-models-optional) -* [Training a Model](#training-a-model) - * [Prepare the Training Data](#prepare-the-training-data) - * [Run the Training Script](#run-the-training-script) - * [Track Training Progress](#track-training-progress) -* [Expanding the Vocabulary](#expanding-the-vocabulary) - * [Overview](#overview) - * [Preparation](#preparation) - * [Run the Vocabulary Expansion Script](#run-the-vocabulary-expansion-script) -* [Evaluating a Model](#evaluating-a-model) - * [Overview](#overview-1) - * [Preparation](#preparation-1) - * [Run the Evaluation Tasks](#run-the-evaluation-tasks) -* [Encoding Sentences](#encoding-sentences) - -## Model overview - -The *Skip-Thoughts* model is a sentence encoder. It learns to encode input -sentences into a fixed-dimensional vector representation that is useful for many -tasks, for example to detect paraphrases or to classify whether a product review -is positive or negative. See the -[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf) -paper for details of the model architecture and more example applications. - -A trained *Skip-Thoughts* model will encode similar sentences nearby each other -in the embedding vector space. The following examples show the nearest neighbor by -cosine similarity of some sentences from the -[movie review dataset](https://www.cs.cornell.edu/people/pabo/movie-review-data/). - - -| Input sentence | Nearest Neighbor | -|----------------|------------------| -| Simplistic, silly and tedious. | Trite, banal, cliched, mostly inoffensive. | -| Not so much farcical as sour. | Not only unfunny, but downright repellent. | -| A sensitive and astute first feature by Anne-Sophie Birot. | Absorbing character study by André Turpin . | -| An enthralling, entertaining feature. | A slick, engrossing melodrama. | - -## Getting Started - -### Install Required Packages -First ensure that you have installed the following required packages: - -* **Bazel** ([instructions](http://bazel.build/docs/install.html)) -* **TensorFlow** ([instructions](https://www.tensorflow.org/install/)) -* **NumPy** ([instructions](http://www.scipy.org/install.html)) -* **scikit-learn** ([instructions](http://scikit-learn.org/stable/install.html)) -* **Natural Language Toolkit (NLTK)** - * First install NLTK ([instructions](http://www.nltk.org/install.html)) - * Then install the NLTK data ([instructions](http://www.nltk.org/data.html)) -* **gensim** ([instructions](https://radimrehurek.com/gensim/install.html)) - * Only required if you will be expanding your vocabulary with the [word2vec](https://code.google.com/archive/p/word2vec/) model. - - -### Download Pretrained Models (Optional) - -You can download model checkpoints pretrained on the -[BookCorpus](http://yknzhu.wixsite.com/mbweb) dataset in the following -configurations: - -* Unidirectional RNN encoder ("uni-skip" in the paper) -* Bidirectional RNN encoder ("bi-skip" in the paper) - -```shell -# Directory to download the pretrained models to. -PRETRAINED_MODELS_DIR="${HOME}/skip_thoughts/pretrained/" - -mkdir -p ${PRETRAINED_MODELS_DIR} -cd ${PRETRAINED_MODELS_DIR} - -# Download and extract the unidirectional model. -wget "http://download.tensorflow.org/models/skip_thoughts_uni_2017_02_02.tar.gz" -tar -xvf skip_thoughts_uni_2017_02_02.tar.gz -rm skip_thoughts_uni_2017_02_02.tar.gz - -# Download and extract the bidirectional model. -wget "http://download.tensorflow.org/models/skip_thoughts_bi_2017_02_16.tar.gz" -tar -xvf skip_thoughts_bi_2017_02_16.tar.gz -rm skip_thoughts_bi_2017_02_16.tar.gz -``` - -You can now skip to the sections [Evaluating a Model](#evaluating-a-model) and -[Encoding Sentences](#encoding-sentences). - - -## Training a Model - -### Prepare the Training Data - -To train a model you will need to provide training data in TFRecord format. The -TFRecord format consists of a set of sharded files containing serialized -`tf.Example` protocol buffers. Each `tf.Example` proto contains three -sentences: - - * `encode`: The sentence to encode. - * `decode_pre`: The sentence preceding `encode` in the original text. - * `decode_post`: The sentence following `encode` in the original text. - -Each sentence is a list of words. During preprocessing, a dictionary is created -that assigns each word in the vocabulary to an integer-valued id. Each sentence -is encoded as a list of integer word ids in the `tf.Example` protos. - -We have provided a script to preprocess any set of text-files into this format. -You may wish to use the [BookCorpus](http://yknzhu.wixsite.com/mbweb) dataset. -Note that the preprocessing script may take **12 hours** or more to complete -on this large dataset. - -```shell -# Comma-separated list of globs matching the input input files. The format of -# the input files is assumed to be a list of newline-separated sentences, where -# each sentence is already tokenized. -INPUT_FILES="${HOME}/skip_thoughts/bookcorpus/*.txt" - -# Location to save the preprocessed training and validation data. -DATA_DIR="${HOME}/skip_thoughts/data" - -# Build the preprocessing script. -cd tensorflow-models/skip_thoughts -bazel build -c opt //skip_thoughts/data:preprocess_dataset - -# Run the preprocessing script. -bazel-bin/skip_thoughts/data/preprocess_dataset \ - --input_files=${INPUT_FILES} \ - --output_dir=${DATA_DIR} -``` - -When the script finishes you will find 100 training files and 1 validation file -in `DATA_DIR`. The files will match the patterns `train-?????-of-00100` and -`validation-00000-of-00001` respectively. - -The script will also produce a file named `vocab.txt`. The format of this file -is a list of newline-separated words where the word id is the corresponding 0- -based line index. Words are sorted by descending order of frequency in the input -data. Only the top 20,000 words are assigned unique ids; all other words are -assigned the "unknown id" of 1 in the processed data. - -### Run the Training Script - -Execute the following commands to start the training script. By default it will -run for 500k steps (around 9 days on a GeForce GTX 1080 GPU). - -```shell -# Directory containing the preprocessed data. -DATA_DIR="${HOME}/skip_thoughts/data" - -# Directory to save the model. -MODEL_DIR="${HOME}/skip_thoughts/model" - -# Build the model. -cd tensorflow-models/skip_thoughts -bazel build -c opt //skip_thoughts/... - -# Run the training script. -bazel-bin/skip_thoughts/train \ - --input_file_pattern="${DATA_DIR}/train-?????-of-00100" \ - --train_dir="${MODEL_DIR}/train" -``` - -### Track Training Progress - -Optionally, you can run the `track_perplexity` script in a separate process. -This will log per-word perplexity on the validation set which allows training -progress to be monitored on -[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard). - -Note that you may run out of memory if you run the this script on the same GPU -as the training script. You can set the environment variable -`CUDA_VISIBLE_DEVICES=""` to force the script to run on CPU. If it runs too -slowly on CPU, you can decrease the value of `--num_eval_examples`. - -```shell -DATA_DIR="${HOME}/skip_thoughts/data" -MODEL_DIR="${HOME}/skip_thoughts/model" - -# Ignore GPU devices (only necessary if your GPU is currently memory -# constrained, for example, by running the training script). -export CUDA_VISIBLE_DEVICES="" - -# Run the evaluation script. This will run in a loop, periodically loading the -# latest model checkpoint file and computing evaluation metrics. -bazel-bin/skip_thoughts/track_perplexity \ - --input_file_pattern="${DATA_DIR}/validation-?????-of-00001" \ - --checkpoint_dir="${MODEL_DIR}/train" \ - --eval_dir="${MODEL_DIR}/val" \ - --num_eval_examples=50000 -``` - -If you started the `track_perplexity` script, run a -[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard) -server in a separate process for real-time monitoring of training summaries and -validation perplexity. - -```shell -MODEL_DIR="${HOME}/skip_thoughts/model" - -# Run a TensorBoard server. -tensorboard --logdir="${MODEL_DIR}" -``` - -## Expanding the Vocabulary - -### Overview - -The vocabulary generated by the preprocessing script contains only 20,000 words -which is insufficient for many tasks. For example, a sentence from Wikipedia -might contain nouns that do not appear in this vocabulary. - -A solution to this problem described in the -[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf) -paper is to learn a mapping that transfers word representations from one model to -another. This idea is based on the "Translation Matrix" method from the paper -[Exploiting Similarities Among Languages for Machine Translation](https://arxiv.org/abs/1309.4168). - - -Specifically, we will load the word embeddings from a trained *Skip-Thoughts* -model and from a trained [word2vec model](https://arxiv.org/pdf/1301.3781.pdf) -(which has a much larger vocabulary). We will train a linear regression model -without regularization to learn a linear mapping from the word2vec embedding -space to the *Skip-Thoughts* embedding space. We will then apply the linear -model to all words in the word2vec vocabulary, yielding vectors in the *Skip- -Thoughts* word embedding space for the union of the two vocabularies. - -The linear regression task is to learn a parameter matrix *W* to minimize -*|| X - Y \* W ||2*, where *X* is a matrix of *Skip-Thoughts* -embeddings of shape `[num_words, dim1]`, *Y* is a matrix of word2vec embeddings -of shape `[num_words, dim2]`, and *W* is a matrix of shape `[dim2, dim1]`. - -### Preparation - -First you will need to download and unpack a pretrained -[word2vec model](https://arxiv.org/pdf/1301.3781.pdf) from -[this website](https://code.google.com/archive/p/word2vec/) -([direct download link](https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit?usp=sharing)). -This model was trained on the Google News dataset (about 100 billion words). - - -Also ensure that you have already [installed gensim](https://radimrehurek.com/gensim/install.html). - -### Run the Vocabulary Expansion Script - -```shell -# Path to checkpoint file or a directory containing checkpoint files (the script -# will select the most recent). -CHECKPOINT_PATH="${HOME}/skip_thoughts/model/train" - -# Vocabulary file generated by the preprocessing script. -SKIP_THOUGHTS_VOCAB="${HOME}/skip_thoughts/data/vocab.txt" - -# Path to downloaded word2vec model. -WORD2VEC_MODEL="${HOME}/skip_thoughts/googlenews/GoogleNews-vectors-negative300.bin" - -# Output directory. -EXP_VOCAB_DIR="${HOME}/skip_thoughts/exp_vocab" - -# Build the vocabulary expansion script. -cd tensorflow-models/skip_thoughts -bazel build -c opt //skip_thoughts:vocabulary_expansion - -# Run the vocabulary expansion script. -bazel-bin/skip_thoughts/vocabulary_expansion \ - --skip_thoughts_model=${CHECKPOINT_PATH} \ - --skip_thoughts_vocab=${SKIP_THOUGHTS_VOCAB} \ - --word2vec_model=${WORD2VEC_MODEL} \ - --output_dir=${EXP_VOCAB_DIR} -``` - -## Evaluating a Model - -### Overview - -The model can be evaluated using the benchmark tasks described in the -[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf) -paper. The following tasks are supported (refer to the paper for full details): - - * **SICK** semantic relatedness task. - * **MSRP** (Microsoft Research Paraphrase Corpus) paraphrase detection task. - * Binary classification tasks: - * **MR** movie review sentiment task. - * **CR** customer product review task. - * **SUBJ** subjectivity/objectivity task. - * **MPQA** opinion polarity task. - * **TREC** question-type classification task. - -### Preparation - -You will need to clone or download the -[skip-thoughts GitHub repository](https://github.com/ryankiros/skip-thoughts) by -[ryankiros](https://github.com/ryankiros) (the first author of the Skip-Thoughts -paper): - -```shell -# Folder to clone the repository to. -ST_KIROS_DIR="${HOME}/skip_thoughts/skipthoughts_kiros" - -# Clone the repository. -git clone git@github.com:ryankiros/skip-thoughts.git "${ST_KIROS_DIR}/skipthoughts" - -# Make the package importable. -export PYTHONPATH="${ST_KIROS_DIR}/:${PYTHONPATH}" -``` - -You will also need to download the data needed for each evaluation task. See the -instructions [here](https://github.com/ryankiros/skip-thoughts). - -For example, the CR (customer review) dataset is found [here](http://nlp.stanford.edu/~sidaw/home/projects:nbsvm). For this task we want the -files `custrev.pos` and `custrev.neg`. - -### Run the Evaluation Tasks - -In the following example we will evaluate a unidirectional model ("uni-skip" in -the paper) on the CR task. To use a bidirectional model ("bi-skip" in the -paper), simply pass the flags `--bi_vocab_file`, `--bi_embeddings_file` and -`--bi_checkpoint_path` instead. To use the "combine-skip" model described in the -paper you will need to pass both the unidirectional and bidirectional flags. - -```shell -# Path to checkpoint file or a directory containing checkpoint files (the script -# will select the most recent). -CHECKPOINT_PATH="${HOME}/skip_thoughts/model/train" - -# Vocabulary file generated by the vocabulary expansion script. -VOCAB_FILE="${HOME}/skip_thoughts/exp_vocab/vocab.txt" - -# Embeddings file generated by the vocabulary expansion script. -EMBEDDINGS_FILE="${HOME}/skip_thoughts/exp_vocab/embeddings.npy" - -# Directory containing files custrev.pos and custrev.neg. -EVAL_DATA_DIR="${HOME}/skip_thoughts/eval_data" - -# Build the evaluation script. -cd tensorflow-models/skip_thoughts -bazel build -c opt //skip_thoughts:evaluate - -# Run the evaluation script. -bazel-bin/skip_thoughts/evaluate \ - --eval_task=CR \ - --data_dir=${EVAL_DATA_DIR} \ - --uni_vocab_file=${VOCAB_FILE} \ - --uni_embeddings_file=${EMBEDDINGS_FILE} \ - --uni_checkpoint_path=${CHECKPOINT_PATH} -``` - -Output: - -```python -[0.82539682539682535, 0.84084880636604775, 0.83023872679045096, - 0.86206896551724133, 0.83554376657824936, 0.85676392572944293, - 0.84084880636604775, 0.83023872679045096, 0.85145888594164454, - 0.82758620689655171] -``` - -The output is a list of accuracies of 10 cross-validation classification models. -To get a single number, simply take the average: - -```python -ipython # Launch iPython. - -In [0]: -import numpy as np -np.mean([0.82539682539682535, 0.84084880636604775, 0.83023872679045096, - 0.86206896551724133, 0.83554376657824936, 0.85676392572944293, - 0.84084880636604775, 0.83023872679045096, 0.85145888594164454, - 0.82758620689655171]) - -Out [0]: 0.84009936423729525 -``` - -## Encoding Sentences - -In this example we will encode data from the -[movie review dataset](https://www.cs.cornell.edu/people/pabo/movie-review-data/) -(specifically the [sentence polarity dataset v1.0](https://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz)). - -```python -ipython # Launch iPython. - -In [0]: - -# Imports. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import numpy as np -import os.path -import scipy.spatial.distance as sd -from skip_thoughts import configuration -from skip_thoughts import encoder_manager - -In [1]: -# Set paths to the model. -VOCAB_FILE = "/path/to/vocab.txt" -EMBEDDING_MATRIX_FILE = "/path/to/embeddings.npy" -CHECKPOINT_PATH = "/path/to/model.ckpt-9999" -# The following directory should contain files rt-polarity.neg and -# rt-polarity.pos. -MR_DATA_DIR = "/dir/containing/mr/data" - -In [2]: -# Set up the encoder. Here we are using a single unidirectional model. -# To use a bidirectional model as well, call load_model() again with -# configuration.model_config(bidirectional_encoder=True) and paths to the -# bidirectional model's files. The encoder will use the concatenation of -# all loaded models. -encoder = encoder_manager.EncoderManager() -encoder.load_model(configuration.model_config(), - vocabulary_file=VOCAB_FILE, - embedding_matrix_file=EMBEDDING_MATRIX_FILE, - checkpoint_path=CHECKPOINT_PATH) - -In [3]: -# Load the movie review dataset. -data = [] -with open(os.path.join(MR_DATA_DIR, 'rt-polarity.neg'), 'rb') as f: - data.extend([line.decode('latin-1').strip() for line in f]) -with open(os.path.join(MR_DATA_DIR, 'rt-polarity.pos'), 'rb') as f: - data.extend([line.decode('latin-1').strip() for line in f]) - -In [4]: -# Generate Skip-Thought Vectors for each sentence in the dataset. -encodings = encoder.encode(data) - -In [5]: -# Define a helper function to generate nearest neighbors. -def get_nn(ind, num=10): - encoding = encodings[ind] - scores = sd.cdist([encoding], encodings, "cosine")[0] - sorted_ids = np.argsort(scores) - print("Sentence:") - print("", data[ind]) - print("\nNearest neighbors:") - for i in range(1, num + 1): - print(" %d. %s (%.3f)" % - (i, data[sorted_ids[i]], scores[sorted_ids[i]])) - -In [6]: -# Compute nearest neighbors of the first sentence in the dataset. -get_nn(0) -``` - -Output: - -``` -Sentence: - simplistic , silly and tedious . - -Nearest neighbors: - 1. trite , banal , cliched , mostly inoffensive . (0.247) - 2. banal and predictable . (0.253) - 3. witless , pointless , tasteless and idiotic . (0.272) - 4. loud , silly , stupid and pointless . (0.295) - 5. grating and tedious . (0.299) - 6. idiotic and ugly . (0.330) - 7. black-and-white and unrealistic . (0.335) - 8. hopelessly inane , humorless and under-inspired . (0.335) - 9. shallow , noisy and pretentious . (0.340) - 10. . . . unlikable , uninteresting , unfunny , and completely , utterly inept . (0.346) -``` diff --git a/research/skip_thoughts/WORKSPACE b/research/skip_thoughts/WORKSPACE deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/skip_thoughts/skip_thoughts/BUILD b/research/skip_thoughts/skip_thoughts/BUILD deleted file mode 100644 index 3ab642cace448b46f6ebe6e3d8a93b9f260bb08f..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/BUILD +++ /dev/null @@ -1,87 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_library( - name = "configuration", - srcs = ["configuration.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "skip_thoughts_model", - srcs = ["skip_thoughts_model.py"], - srcs_version = "PY2AND3", - deps = [ - "//skip_thoughts/ops:gru_cell", - "//skip_thoughts/ops:input_ops", - ], -) - -py_test( - name = "skip_thoughts_model_test", - size = "large", - srcs = ["skip_thoughts_model_test.py"], - deps = [ - ":configuration", - ":skip_thoughts_model", - ], -) - -py_binary( - name = "train", - srcs = ["train.py"], - srcs_version = "PY2AND3", - deps = [ - ":configuration", - ":skip_thoughts_model", - ], -) - -py_binary( - name = "track_perplexity", - srcs = ["track_perplexity.py"], - srcs_version = "PY2AND3", - deps = [ - ":configuration", - ":skip_thoughts_model", - ], -) - -py_binary( - name = "vocabulary_expansion", - srcs = ["vocabulary_expansion.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "skip_thoughts_encoder", - srcs = ["skip_thoughts_encoder.py"], - srcs_version = "PY2AND3", - deps = [ - ":skip_thoughts_model", - "//skip_thoughts/data:special_words", - ], -) - -py_library( - name = "encoder_manager", - srcs = ["encoder_manager.py"], - srcs_version = "PY2AND3", - deps = [ - ":skip_thoughts_encoder", - ], -) - -py_binary( - name = "evaluate", - srcs = ["evaluate.py"], - srcs_version = "PY2AND3", - deps = [ - ":encoder_manager", - "//skip_thoughts:configuration", - ], -) - diff --git a/research/skip_thoughts/skip_thoughts/__init__.py b/research/skip_thoughts/skip_thoughts/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/skip_thoughts/skip_thoughts/configuration.py b/research/skip_thoughts/skip_thoughts/configuration.py deleted file mode 100644 index bc04d57983584a7026df890d472ff326891e1136..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/configuration.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Default configuration for model architecture and training.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -class _HParams(object): - """Wrapper for configuration parameters.""" - pass - - -def model_config(input_file_pattern=None, - input_queue_capacity=640000, - num_input_reader_threads=1, - shuffle_input_data=True, - uniform_init_scale=0.1, - vocab_size=20000, - batch_size=128, - word_embedding_dim=620, - bidirectional_encoder=False, - encoder_dim=2400): - """Creates a model configuration object. - - Args: - input_file_pattern: File pattern of sharded TFRecord files containing - tf.Example protobufs. - input_queue_capacity: Number of examples to keep in the input queue. - num_input_reader_threads: Number of threads for prefetching input - tf.Examples. - shuffle_input_data: Whether to shuffle the input data. - uniform_init_scale: Scale of random uniform initializer. - vocab_size: Number of unique words in the vocab. - batch_size: Batch size (training and evaluation only). - word_embedding_dim: Word embedding dimension. - bidirectional_encoder: Whether to use a bidirectional or unidirectional - encoder RNN. - encoder_dim: Number of output dimensions of the sentence encoder. - - Returns: - An object containing model configuration parameters. - """ - config = _HParams() - config.input_file_pattern = input_file_pattern - config.input_queue_capacity = input_queue_capacity - config.num_input_reader_threads = num_input_reader_threads - config.shuffle_input_data = shuffle_input_data - config.uniform_init_scale = uniform_init_scale - config.vocab_size = vocab_size - config.batch_size = batch_size - config.word_embedding_dim = word_embedding_dim - config.bidirectional_encoder = bidirectional_encoder - config.encoder_dim = encoder_dim - return config - - -def training_config(learning_rate=0.0008, - learning_rate_decay_factor=0.5, - learning_rate_decay_steps=400000, - number_of_steps=500000, - clip_gradient_norm=5.0, - save_model_secs=600, - save_summaries_secs=600): - """Creates a training configuration object. - - Args: - learning_rate: Initial learning rate. - learning_rate_decay_factor: If > 0, the learning rate decay factor. - learning_rate_decay_steps: The number of steps before the learning rate - decays by learning_rate_decay_factor. - number_of_steps: The total number of training steps to run. Passing None - will cause the training script to run indefinitely. - clip_gradient_norm: If not None, then clip gradients to this value. - save_model_secs: How often (in seconds) to save model checkpoints. - save_summaries_secs: How often (in seconds) to save model summaries. - - Returns: - An object containing training configuration parameters. - - Raises: - ValueError: If learning_rate_decay_factor is set and - learning_rate_decay_steps is unset. - """ - if learning_rate_decay_factor and not learning_rate_decay_steps: - raise ValueError( - "learning_rate_decay_factor requires learning_rate_decay_steps.") - - config = _HParams() - config.learning_rate = learning_rate - config.learning_rate_decay_factor = learning_rate_decay_factor - config.learning_rate_decay_steps = learning_rate_decay_steps - config.number_of_steps = number_of_steps - config.clip_gradient_norm = clip_gradient_norm - config.save_model_secs = save_model_secs - config.save_summaries_secs = save_summaries_secs - return config diff --git a/research/skip_thoughts/skip_thoughts/data/BUILD b/research/skip_thoughts/skip_thoughts/data/BUILD deleted file mode 100644 index a8b61bfdc88431b699d08df58813c13b3ddd3ff4..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/data/BUILD +++ /dev/null @@ -1,23 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_library( - name = "special_words", - srcs = ["special_words.py"], - srcs_version = "PY2AND3", - deps = [], -) - -py_binary( - name = "preprocess_dataset", - srcs = [ - "preprocess_dataset.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":special_words", - ], -) diff --git a/research/skip_thoughts/skip_thoughts/data/__init__.py b/research/skip_thoughts/skip_thoughts/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/skip_thoughts/skip_thoughts/data/preprocess_dataset.py b/research/skip_thoughts/skip_thoughts/data/preprocess_dataset.py deleted file mode 100644 index b6f304f5301808177ae0f8ee59fb466d285b9217..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/data/preprocess_dataset.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Converts a set of text files to TFRecord format with Example protos. - -Each Example proto in the output contains the following fields: - - decode_pre: list of int64 ids corresponding to the "previous" sentence. - encode: list of int64 ids corresponding to the "current" sentence. - decode_post: list of int64 ids corresponding to the "post" sentence. - -In addition, the following files are generated: - - vocab.txt: List of " " pairs, where is the integer - encoding of in the Example protos. - word_counts.txt: List of " " pairs, where is the number - of occurrences of in the input files. - -The vocabulary of word ids is constructed from the top --num_words by word -count. All other words get the word id. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import os - - -import numpy as np -import tensorflow as tf - -from skip_thoughts.data import special_words - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("input_files", None, - "Comma-separated list of globs matching the input " - "files. The format of the input files is assumed to be " - "a list of newline-separated sentences, where each " - "sentence is already tokenized.") - -tf.flags.DEFINE_string("vocab_file", "", - "(Optional) existing vocab file. Otherwise, a new vocab " - "file is created and written to the output directory. " - "The file format is a list of newline-separated words, " - "where the word id is the corresponding 0-based index " - "in the file.") - -tf.flags.DEFINE_string("output_dir", None, "Output directory.") - -tf.flags.DEFINE_integer("train_output_shards", 100, - "Number of output shards for the training set.") - -tf.flags.DEFINE_integer("validation_output_shards", 1, - "Number of output shards for the validation set.") - -tf.flags.DEFINE_integer("num_validation_sentences", 50000, - "Number of output shards for the validation set.") - -tf.flags.DEFINE_integer("num_words", 20000, - "Number of words to include in the output.") - -tf.flags.DEFINE_integer("max_sentences", 0, - "If > 0, the maximum number of sentences to output.") - -tf.flags.DEFINE_integer("max_sentence_length", 30, - "If > 0, exclude sentences whose encode, decode_pre OR" - "decode_post sentence exceeds this length.") - -tf.flags.DEFINE_boolean("add_eos", True, - "Whether to add end-of-sentence ids to the output.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def _build_vocabulary(input_files): - """Loads or builds the model vocabulary. - - Args: - input_files: List of pre-tokenized input .txt files. - - Returns: - vocab: A dictionary of word to id. - """ - if FLAGS.vocab_file: - tf.logging.info("Loading existing vocab file.") - vocab = collections.OrderedDict() - with tf.gfile.GFile(FLAGS.vocab_file, mode="r") as f: - for i, line in enumerate(f): - word = line.decode("utf-8").strip() - assert word not in vocab, "Attempting to add word twice: %s" % word - vocab[word] = i - tf.logging.info("Read vocab of size %d from %s", - len(vocab), FLAGS.vocab_file) - return vocab - - tf.logging.info("Creating vocabulary.") - num = 0 - wordcount = collections.Counter() - for input_file in input_files: - tf.logging.info("Processing file: %s", input_file) - for sentence in tf.gfile.FastGFile(input_file): - wordcount.update(sentence.split()) - - num += 1 - if num % 1000000 == 0: - tf.logging.info("Processed %d sentences", num) - - tf.logging.info("Processed %d sentences total", num) - - words = list(wordcount) - freqs = list(wordcount.values()) - sorted_indices = np.argsort(freqs)[::-1] - - vocab = collections.OrderedDict() - vocab[special_words.EOS] = special_words.EOS_ID - vocab[special_words.UNK] = special_words.UNK_ID - for w_id, w_index in enumerate(sorted_indices[0:FLAGS.num_words - 2]): - vocab[words[w_index]] = w_id + 2 # 0: EOS, 1: UNK. - - tf.logging.info("Created vocab with %d words", len(vocab)) - - vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt") - with tf.gfile.FastGFile(vocab_file, "w") as f: - f.write("\n".join(vocab.keys())) - tf.logging.info("Wrote vocab file to %s", vocab_file) - - word_counts_file = os.path.join(FLAGS.output_dir, "word_counts.txt") - with tf.gfile.FastGFile(word_counts_file, "w") as f: - for i in sorted_indices: - f.write("%s %d\n" % (words[i], freqs[i])) - tf.logging.info("Wrote word counts file to %s", word_counts_file) - - return vocab - - -def _int64_feature(value): - """Helper for creating an Int64 Feature.""" - return tf.train.Feature(int64_list=tf.train.Int64List( - value=[int(v) for v in value])) - - -def _sentence_to_ids(sentence, vocab): - """Helper for converting a sentence (list of words) to a list of ids.""" - ids = [vocab.get(w, special_words.UNK_ID) for w in sentence] - if FLAGS.add_eos: - ids.append(special_words.EOS_ID) - return ids - - -def _create_serialized_example(predecessor, current, successor, vocab): - """Helper for creating a serialized Example proto.""" - example = tf.train.Example(features=tf.train.Features(feature={ - "decode_pre": _int64_feature(_sentence_to_ids(predecessor, vocab)), - "encode": _int64_feature(_sentence_to_ids(current, vocab)), - "decode_post": _int64_feature(_sentence_to_ids(successor, vocab)), - })) - - return example.SerializeToString() - - -def _process_input_file(filename, vocab, stats): - """Processes the sentences in an input file. - - Args: - filename: Path to a pre-tokenized input .txt file. - vocab: A dictionary of word to id. - stats: A Counter object for statistics. - - Returns: - processed: A list of serialized Example protos - """ - tf.logging.info("Processing input file: %s", filename) - processed = [] - - predecessor = None # Predecessor sentence (list of words). - current = None # Current sentence (list of words). - successor = None # Successor sentence (list of words). - - for successor_str in tf.gfile.FastGFile(filename): - stats.update(["sentences_seen"]) - successor = successor_str.split() - - # The first 2 sentences per file will be skipped. - if predecessor and current and successor: - stats.update(["sentences_considered"]) - - # Note that we are going to insert later, so we only allow - # sentences with strictly less than max_sentence_length to pass. - if FLAGS.max_sentence_length and ( - len(predecessor) >= FLAGS.max_sentence_length or len(current) >= - FLAGS.max_sentence_length or len(successor) >= - FLAGS.max_sentence_length): - stats.update(["sentences_too_long"]) - else: - serialized = _create_serialized_example(predecessor, current, successor, - vocab) - processed.append(serialized) - stats.update(["sentences_output"]) - - predecessor = current - current = successor - - sentences_seen = stats["sentences_seen"] - sentences_output = stats["sentences_output"] - if sentences_seen and sentences_seen % 100000 == 0: - tf.logging.info("Processed %d sentences (%d output)", sentences_seen, - sentences_output) - if FLAGS.max_sentences and sentences_output >= FLAGS.max_sentences: - break - - tf.logging.info("Completed processing file %s", filename) - return processed - - -def _write_shard(filename, dataset, indices): - """Writes a TFRecord shard.""" - with tf.python_io.TFRecordWriter(filename) as writer: - for j in indices: - writer.write(dataset[j]) - - -def _write_dataset(name, dataset, indices, num_shards): - """Writes a sharded TFRecord dataset. - - Args: - name: Name of the dataset (e.g. "train"). - dataset: List of serialized Example protos. - indices: List of indices of 'dataset' to be written. - num_shards: The number of output shards. - """ - tf.logging.info("Writing dataset %s", name) - borders = np.int32(np.linspace(0, len(indices), num_shards + 1)) - for i in range(num_shards): - filename = os.path.join(FLAGS.output_dir, "%s-%.5d-of-%.5d" % (name, i, - num_shards)) - shard_indices = indices[borders[i]:borders[i + 1]] - _write_shard(filename, dataset, shard_indices) - tf.logging.info("Wrote dataset indices [%d, %d) to output shard %s", - borders[i], borders[i + 1], filename) - tf.logging.info("Finished writing %d sentences in dataset %s.", - len(indices), name) - - -def main(unused_argv): - if not FLAGS.input_files: - raise ValueError("--input_files is required.") - if not FLAGS.output_dir: - raise ValueError("--output_dir is required.") - - if not tf.gfile.IsDirectory(FLAGS.output_dir): - tf.gfile.MakeDirs(FLAGS.output_dir) - - input_files = [] - for pattern in FLAGS.input_files.split(","): - match = tf.gfile.Glob(FLAGS.input_files) - if not match: - raise ValueError("Found no files matching %s" % pattern) - input_files.extend(match) - tf.logging.info("Found %d input files.", len(input_files)) - - vocab = _build_vocabulary(input_files) - - tf.logging.info("Generating dataset.") - stats = collections.Counter() - dataset = [] - for filename in input_files: - dataset.extend(_process_input_file(filename, vocab, stats)) - if FLAGS.max_sentences and stats["sentences_output"] >= FLAGS.max_sentences: - break - - tf.logging.info("Generated dataset with %d sentences.", len(dataset)) - for k, v in stats.items(): - tf.logging.info("%s: %d", k, v) - - tf.logging.info("Shuffling dataset.") - np.random.seed(123) - shuffled_indices = np.random.permutation(len(dataset)) - val_indices = shuffled_indices[:FLAGS.num_validation_sentences] - train_indices = shuffled_indices[FLAGS.num_validation_sentences:] - - _write_dataset("train", dataset, train_indices, FLAGS.train_output_shards) - _write_dataset("validation", dataset, val_indices, - FLAGS.validation_output_shards) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/skip_thoughts/skip_thoughts/data/special_words.py b/research/skip_thoughts/skip_thoughts/data/special_words.py deleted file mode 100644 index fb76b7a94d1655f49f6906aa42fb2913ba8eceb9..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/data/special_words.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Special word constants. - -NOTE: The ids of the EOS and UNK constants should not be modified. It is assumed -that these always occupy the first two ids. -""" - -# End of sentence. -EOS = "" -EOS_ID = 0 - -# Unknown. -UNK = "" -UNK_ID = 1 diff --git a/research/skip_thoughts/skip_thoughts/encoder_manager.py b/research/skip_thoughts/skip_thoughts/encoder_manager.py deleted file mode 100644 index 00b220245ab30d7c408a9becd146f8ff44c2e054..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/encoder_manager.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Manager class for loading and encoding with multiple skip-thoughts models. - -If multiple models are loaded at once then the encode() function returns the -concatenation of the outputs of each model. - -Example usage: - manager = EncoderManager() - manager.load_model(model_config_1, vocabulary_file_1, embedding_matrix_file_1, - checkpoint_path_1) - manager.load_model(model_config_2, vocabulary_file_2, embedding_matrix_file_2, - checkpoint_path_2) - encodings = manager.encode(data) -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - - -import numpy as np -import tensorflow as tf - -from skip_thoughts import skip_thoughts_encoder - - -class EncoderManager(object): - """Manager class for loading and encoding with skip-thoughts models.""" - - def __init__(self): - self.encoders = [] - self.sessions = [] - - def load_model(self, model_config, vocabulary_file, embedding_matrix_file, - checkpoint_path): - """Loads a skip-thoughts model. - - Args: - model_config: Object containing parameters for building the model. - vocabulary_file: Path to vocabulary file containing a list of newline- - separated words where the word id is the corresponding 0-based index in - the file. - embedding_matrix_file: Path to a serialized numpy array of shape - [vocab_size, embedding_dim]. - checkpoint_path: SkipThoughtsModel checkpoint file or a directory - containing a checkpoint file. - """ - tf.logging.info("Reading vocabulary from %s", vocabulary_file) - with tf.gfile.GFile(vocabulary_file, mode="rb") as f: - lines = list(f.readlines()) - reverse_vocab = [line.decode("utf-8").strip() for line in lines] - - tf.logging.info("Loaded vocabulary with %d words.", len(reverse_vocab)) - - tf.logging.info("Loading embedding matrix from %s", embedding_matrix_file) - # Note: tf.gfile.GFile doesn't work here because np.load() calls f.seek() - # with 3 arguments. - embedding_matrix = np.load(embedding_matrix_file) - tf.logging.info("Loaded embedding matrix with shape %s", - embedding_matrix.shape) - - word_embeddings = collections.OrderedDict( - zip(reverse_vocab, embedding_matrix)) - - g = tf.Graph() - with g.as_default(): - encoder = skip_thoughts_encoder.SkipThoughtsEncoder(word_embeddings) - restore_model = encoder.build_graph_from_config(model_config, - checkpoint_path) - - sess = tf.Session(graph=g) - restore_model(sess) - - self.encoders.append(encoder) - self.sessions.append(sess) - - def encode(self, - data, - use_norm=True, - verbose=False, - batch_size=128, - use_eos=False): - """Encodes a sequence of sentences as skip-thought vectors. - - Args: - data: A list of input strings. - use_norm: If True, normalize output skip-thought vectors to unit L2 norm. - verbose: Whether to log every batch. - batch_size: Batch size for the RNN encoders. - use_eos: If True, append the end-of-sentence word to each input sentence. - - Returns: - thought_vectors: A list of numpy arrays corresponding to 'data'. - - Raises: - ValueError: If called before calling load_encoder. - """ - if not self.encoders: - raise ValueError( - "Must call load_model at least once before calling encode.") - - encoded = [] - for encoder, sess in zip(self.encoders, self.sessions): - encoded.append( - np.array( - encoder.encode( - sess, - data, - use_norm=use_norm, - verbose=verbose, - batch_size=batch_size, - use_eos=use_eos))) - - return np.concatenate(encoded, axis=1) - - def close(self): - """Closes the active TensorFlow Sessions.""" - for sess in self.sessions: - sess.close() diff --git a/research/skip_thoughts/skip_thoughts/evaluate.py b/research/skip_thoughts/skip_thoughts/evaluate.py deleted file mode 100644 index e840d9da9f5c2e7e223669388ef0f43ed4f63398..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/evaluate.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Script to evaluate a skip-thoughts model. - -This script can evaluate a model with a unidirectional encoder ("uni-skip" in -the paper); or a model with a bidirectional encoder ("bi-skip"); or the -combination of a model with a unidirectional encoder and a model with a -bidirectional encoder ("combine-skip"). - -The uni-skip model (if it exists) is specified by the flags ---uni_vocab_file, --uni_embeddings_file, --uni_checkpoint_path. - -The bi-skip model (if it exists) is specified by the flags ---bi_vocab_file, --bi_embeddings_path, --bi_checkpoint_path. - -The evaluation tasks have different running times. SICK may take 5-10 minutes. -MSRP, TREC and CR may take 20-60 minutes. SUBJ, MPQA and MR may take 2+ hours. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from skipthoughts import eval_classification -from skipthoughts import eval_msrp -from skipthoughts import eval_sick -from skipthoughts import eval_trec -import tensorflow as tf - -from skip_thoughts import configuration -from skip_thoughts import encoder_manager - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("eval_task", "CR", - "Name of the evaluation task to run. Available tasks: " - "MR, CR, SUBJ, MPQA, SICK, MSRP, TREC.") - -tf.flags.DEFINE_string("data_dir", None, "Directory containing training data.") - -tf.flags.DEFINE_string("uni_vocab_file", None, - "Path to vocabulary file containing a list of newline-" - "separated words where the word id is the " - "corresponding 0-based index in the file.") -tf.flags.DEFINE_string("bi_vocab_file", None, - "Path to vocabulary file containing a list of newline-" - "separated words where the word id is the " - "corresponding 0-based index in the file.") - -tf.flags.DEFINE_string("uni_embeddings_file", None, - "Path to serialized numpy array of shape " - "[vocab_size, embedding_dim].") -tf.flags.DEFINE_string("bi_embeddings_file", None, - "Path to serialized numpy array of shape " - "[vocab_size, embedding_dim].") - -tf.flags.DEFINE_string("uni_checkpoint_path", None, - "Checkpoint file or directory containing a checkpoint " - "file.") -tf.flags.DEFINE_string("bi_checkpoint_path", None, - "Checkpoint file or directory containing a checkpoint " - "file.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def main(unused_argv): - if not FLAGS.data_dir: - raise ValueError("--data_dir is required.") - - encoder = encoder_manager.EncoderManager() - - # Maybe load unidirectional encoder. - if FLAGS.uni_checkpoint_path: - print("Loading unidirectional model...") - uni_config = configuration.model_config() - encoder.load_model(uni_config, FLAGS.uni_vocab_file, - FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path) - - # Maybe load bidirectional encoder. - if FLAGS.bi_checkpoint_path: - print("Loading bidirectional model...") - bi_config = configuration.model_config(bidirectional_encoder=True) - encoder.load_model(bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file, - FLAGS.bi_checkpoint_path) - - if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]: - eval_classification.eval_nested_kfold( - encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False) - elif FLAGS.eval_task == "SICK": - eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir) - elif FLAGS.eval_task == "MSRP": - eval_msrp.evaluate( - encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir) - elif FLAGS.eval_task == "TREC": - eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir) - else: - raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task) - - encoder.close() - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/skip_thoughts/skip_thoughts/ops/BUILD b/research/skip_thoughts/skip_thoughts/ops/BUILD deleted file mode 100644 index 896d54db76e743008dcd50fc7a5b44aabb61d251..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/ops/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_library( - name = "input_ops", - srcs = ["input_ops.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "gru_cell", - srcs = ["gru_cell.py"], - srcs_version = "PY2AND3", -) diff --git a/research/skip_thoughts/skip_thoughts/ops/__init__.py b/research/skip_thoughts/skip_thoughts/ops/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/research/skip_thoughts/skip_thoughts/ops/gru_cell.py b/research/skip_thoughts/skip_thoughts/ops/gru_cell.py deleted file mode 100644 index c4bee46d3a9f5faf1ec060a3b21f66b4fe51d0c9..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/ops/gru_cell.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""GRU cell implementation for the skip-thought vectors model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -_layer_norm = tf.contrib.layers.layer_norm - - -class LayerNormGRUCell(tf.contrib.rnn.RNNCell): - """GRU cell with layer normalization. - - The layer normalization implementation is based on: - - https://arxiv.org/abs/1607.06450. - - "Layer Normalization" - Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton - """ - - def __init__(self, - num_units, - w_initializer, - u_initializer, - b_initializer, - activation=tf.nn.tanh): - """Initializes the cell. - - Args: - num_units: Number of cell units. - w_initializer: Initializer for the "W" (input) parameter matrices. - u_initializer: Initializer for the "U" (recurrent) parameter matrices. - b_initializer: Initializer for the "b" (bias) parameter vectors. - activation: Cell activation function. - """ - self._num_units = num_units - self._w_initializer = w_initializer - self._u_initializer = u_initializer - self._b_initializer = b_initializer - self._activation = activation - - @property - def state_size(self): - return self._num_units - - @property - def output_size(self): - return self._num_units - - def _w_h_initializer(self): - """Returns an initializer for the "W_h" parameter matrix. - - See equation (23) in the paper. The "W_h" parameter matrix is the - concatenation of two parameter submatrices. The matrix returned is - [U_z, U_r]. - - Returns: - A Tensor with shape [num_units, 2 * num_units] as described above. - """ - - def _initializer(shape, dtype=tf.float32, partition_info=None): - num_units = self._num_units - assert shape == [num_units, 2 * num_units] - u_z = self._u_initializer([num_units, num_units], dtype, partition_info) - u_r = self._u_initializer([num_units, num_units], dtype, partition_info) - return tf.concat([u_z, u_r], 1) - - return _initializer - - def _w_x_initializer(self, input_dim): - """Returns an initializer for the "W_x" parameter matrix. - - See equation (23) in the paper. The "W_x" parameter matrix is the - concatenation of two parameter submatrices. The matrix returned is - [W_z, W_r]. - - Args: - input_dim: The dimension of the cell inputs. - - Returns: - A Tensor with shape [input_dim, 2 * num_units] as described above. - """ - - def _initializer(shape, dtype=tf.float32, partition_info=None): - num_units = self._num_units - assert shape == [input_dim, 2 * num_units] - w_z = self._w_initializer([input_dim, num_units], dtype, partition_info) - w_r = self._w_initializer([input_dim, num_units], dtype, partition_info) - return tf.concat([w_z, w_r], 1) - - return _initializer - - def __call__(self, inputs, state, scope=None): - """GRU cell with layer normalization.""" - input_dim = inputs.get_shape().as_list()[1] - num_units = self._num_units - - with tf.variable_scope(scope or "gru_cell"): - with tf.variable_scope("gates"): - w_h = tf.get_variable( - "w_h", [num_units, 2 * num_units], - initializer=self._w_h_initializer()) - w_x = tf.get_variable( - "w_x", [input_dim, 2 * num_units], - initializer=self._w_x_initializer(input_dim)) - z_and_r = (_layer_norm(tf.matmul(state, w_h), scope="layer_norm/w_h") + - _layer_norm(tf.matmul(inputs, w_x), scope="layer_norm/w_x")) - z, r = tf.split(tf.sigmoid(z_and_r), 2, 1) - with tf.variable_scope("candidate"): - w = tf.get_variable( - "w", [input_dim, num_units], initializer=self._w_initializer) - u = tf.get_variable( - "u", [num_units, num_units], initializer=self._u_initializer) - h_hat = (r * _layer_norm(tf.matmul(state, u), scope="layer_norm/u") + - _layer_norm(tf.matmul(inputs, w), scope="layer_norm/w")) - new_h = (1 - z) * state + z * self._activation(h_hat) - return new_h, new_h diff --git a/research/skip_thoughts/skip_thoughts/ops/input_ops.py b/research/skip_thoughts/skip_thoughts/ops/input_ops.py deleted file mode 100644 index 51b03fc5da335b78977d5c1b9234160f1c240e53..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/ops/input_ops.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Input ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - - -import tensorflow as tf - -# A SentenceBatch is a pair of Tensors: -# ids: Batch of input sentences represented as sequences of word ids: an int64 -# Tensor with shape [batch_size, padded_length]. -# mask: Boolean mask distinguishing real words (1) from padded words (0): an -# int32 Tensor with shape [batch_size, padded_length]. -SentenceBatch = collections.namedtuple("SentenceBatch", ("ids", "mask")) - - -def parse_example_batch(serialized): - """Parses a batch of tf.Example protos. - - Args: - serialized: A 1-D string Tensor; a batch of serialized tf.Example protos. - Returns: - encode: A SentenceBatch of encode sentences. - decode_pre: A SentenceBatch of "previous" sentences to decode. - decode_post: A SentenceBatch of "post" sentences to decode. - """ - features = tf.parse_example( - serialized, - features={ - "encode": tf.VarLenFeature(dtype=tf.int64), - "decode_pre": tf.VarLenFeature(dtype=tf.int64), - "decode_post": tf.VarLenFeature(dtype=tf.int64), - }) - - def _sparse_to_batch(sparse): - ids = tf.sparse_tensor_to_dense(sparse) # Padding with zeroes. - mask = tf.sparse_to_dense(sparse.indices, sparse.dense_shape, - tf.ones_like(sparse.values, dtype=tf.int32)) - return SentenceBatch(ids=ids, mask=mask) - - output_names = ("encode", "decode_pre", "decode_post") - return tuple(_sparse_to_batch(features[x]) for x in output_names) - - -def prefetch_input_data(reader, - file_pattern, - shuffle, - capacity, - num_reader_threads=1): - """Prefetches string values from disk into an input queue. - - Args: - reader: Instance of tf.ReaderBase. - file_pattern: Comma-separated list of file patterns (e.g. - "/tmp/train_data-?????-of-00100", where '?' acts as a wildcard that - matches any character). - shuffle: Boolean; whether to randomly shuffle the input data. - capacity: Queue capacity (number of records). - num_reader_threads: Number of reader threads feeding into the queue. - - Returns: - A Queue containing prefetched string values. - """ - data_files = [] - for pattern in file_pattern.split(","): - data_files.extend(tf.gfile.Glob(pattern)) - if not data_files: - tf.logging.fatal("Found no input files matching %s", file_pattern) - else: - tf.logging.info("Prefetching values from %d files matching %s", - len(data_files), file_pattern) - - filename_queue = tf.train.string_input_producer( - data_files, shuffle=shuffle, capacity=16, name="filename_queue") - - if shuffle: - min_after_dequeue = int(0.6 * capacity) - values_queue = tf.RandomShuffleQueue( - capacity=capacity, - min_after_dequeue=min_after_dequeue, - dtypes=[tf.string], - shapes=[[]], - name="random_input_queue") - else: - values_queue = tf.FIFOQueue( - capacity=capacity, - dtypes=[tf.string], - shapes=[[]], - name="fifo_input_queue") - - enqueue_ops = [] - for _ in range(num_reader_threads): - _, value = reader.read(filename_queue) - enqueue_ops.append(values_queue.enqueue([value])) - tf.train.queue_runner.add_queue_runner( - tf.train.queue_runner.QueueRunner(values_queue, enqueue_ops)) - tf.summary.scalar("queue/%s/fraction_of_%d_full" % (values_queue.name, - capacity), - tf.cast(values_queue.size(), tf.float32) * (1.0 / capacity)) - - return values_queue diff --git a/research/skip_thoughts/skip_thoughts/skip_thoughts_encoder.py b/research/skip_thoughts/skip_thoughts/skip_thoughts_encoder.py deleted file mode 100644 index 79c47c58813feb72f1b9bdb5c2f7bd7956f015c8..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/skip_thoughts_encoder.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Class for encoding text using a trained SkipThoughtsModel. - -Example usage: - g = tf.Graph() - with g.as_default(): - encoder = SkipThoughtsEncoder(embeddings) - restore_fn = encoder.build_graph_from_config(model_config, checkpoint_path) - - with tf.Session(graph=g) as sess: - restore_fn(sess) - skip_thought_vectors = encoder.encode(sess, data) -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path - - -import nltk -import nltk.tokenize -import numpy as np -import tensorflow as tf - -from skip_thoughts import skip_thoughts_model -from skip_thoughts.data import special_words - - -def _pad(seq, target_len): - """Pads a sequence of word embeddings up to the target length. - - Args: - seq: Sequence of word embeddings. - target_len: Desired padded sequence length. - - Returns: - embeddings: Input sequence padded with zero embeddings up to the target - length. - mask: A 0/1 vector with zeros corresponding to padded embeddings. - - Raises: - ValueError: If len(seq) is not in the interval (0, target_len]. - """ - seq_len = len(seq) - if seq_len <= 0 or seq_len > target_len: - raise ValueError("Expected 0 < len(seq) <= %d, got %d" % (target_len, - seq_len)) - - emb_dim = seq[0].shape[0] - padded_seq = np.zeros(shape=(target_len, emb_dim), dtype=seq[0].dtype) - mask = np.zeros(shape=(target_len,), dtype=np.int8) - for i in range(seq_len): - padded_seq[i] = seq[i] - mask[i] = 1 - return padded_seq, mask - - -def _batch_and_pad(sequences): - """Batches and pads sequences of word embeddings into a 2D array. - - Args: - sequences: A list of batch_size sequences of word embeddings. - - Returns: - embeddings: A numpy array with shape [batch_size, padded_length, emb_dim]. - mask: A numpy 0/1 array with shape [batch_size, padded_length] with zeros - corresponding to padded elements. - """ - batch_embeddings = [] - batch_mask = [] - batch_len = max([len(seq) for seq in sequences]) - for seq in sequences: - embeddings, mask = _pad(seq, batch_len) - batch_embeddings.append(embeddings) - batch_mask.append(mask) - return np.array(batch_embeddings), np.array(batch_mask) - - -class SkipThoughtsEncoder(object): - """Skip-thoughts sentence encoder.""" - - def __init__(self, embeddings): - """Initializes the encoder. - - Args: - embeddings: Dictionary of word to embedding vector (1D numpy array). - """ - self._sentence_detector = nltk.data.load("tokenizers/punkt/english.pickle") - self._embeddings = embeddings - - def _create_restore_fn(self, checkpoint_path, saver): - """Creates a function that restores a model from checkpoint. - - Args: - checkpoint_path: Checkpoint file or a directory containing a checkpoint - file. - saver: Saver for restoring variables from the checkpoint file. - - Returns: - restore_fn: A function such that restore_fn(sess) loads model variables - from the checkpoint file. - - Raises: - ValueError: If checkpoint_path does not refer to a checkpoint file or a - directory containing a checkpoint file. - """ - if tf.gfile.IsDirectory(checkpoint_path): - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path) - if not latest_checkpoint: - raise ValueError("No checkpoint file found in: %s" % checkpoint_path) - checkpoint_path = latest_checkpoint - - def _restore_fn(sess): - tf.logging.info("Loading model from checkpoint: %s", checkpoint_path) - saver.restore(sess, checkpoint_path) - tf.logging.info("Successfully loaded checkpoint: %s", - os.path.basename(checkpoint_path)) - - return _restore_fn - - def build_graph_from_config(self, model_config, checkpoint_path): - """Builds the inference graph from a configuration object. - - Args: - model_config: Object containing configuration for building the model. - checkpoint_path: Checkpoint file or a directory containing a checkpoint - file. - - Returns: - restore_fn: A function such that restore_fn(sess) loads model variables - from the checkpoint file. - """ - tf.logging.info("Building model.") - model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="encode") - model.build() - saver = tf.train.Saver() - - return self._create_restore_fn(checkpoint_path, saver) - - def build_graph_from_proto(self, graph_def_file, saver_def_file, - checkpoint_path): - """Builds the inference graph from serialized GraphDef and SaverDef protos. - - Args: - graph_def_file: File containing a serialized GraphDef proto. - saver_def_file: File containing a serialized SaverDef proto. - checkpoint_path: Checkpoint file or a directory containing a checkpoint - file. - - Returns: - restore_fn: A function such that restore_fn(sess) loads model variables - from the checkpoint file. - """ - # Load the Graph. - tf.logging.info("Loading GraphDef from file: %s", graph_def_file) - graph_def = tf.GraphDef() - with tf.gfile.FastGFile(graph_def_file, "rb") as f: - graph_def.ParseFromString(f.read()) - tf.import_graph_def(graph_def, name="") - - # Load the Saver. - tf.logging.info("Loading SaverDef from file: %s", saver_def_file) - saver_def = tf.train.SaverDef() - with tf.gfile.FastGFile(saver_def_file, "rb") as f: - saver_def.ParseFromString(f.read()) - saver = tf.train.Saver(saver_def=saver_def) - - return self._create_restore_fn(checkpoint_path, saver) - - def _tokenize(self, item): - """Tokenizes an input string into a list of words.""" - tokenized = [] - for s in self._sentence_detector.tokenize(item): - tokenized.extend(nltk.tokenize.word_tokenize(s)) - - return tokenized - - def _word_to_embedding(self, w): - """Returns the embedding of a word.""" - return self._embeddings.get(w, self._embeddings[special_words.UNK]) - - def _preprocess(self, data, use_eos): - """Preprocesses text for the encoder. - - Args: - data: A list of input strings. - use_eos: Whether to append the end-of-sentence word to each sentence. - - Returns: - embeddings: A list of word embedding sequences corresponding to the input - strings. - """ - preprocessed_data = [] - for item in data: - tokenized = self._tokenize(item) - if use_eos: - tokenized.append(special_words.EOS) - preprocessed_data.append([self._word_to_embedding(w) for w in tokenized]) - return preprocessed_data - - def encode(self, - sess, - data, - use_norm=True, - verbose=True, - batch_size=128, - use_eos=False): - """Encodes a sequence of sentences as skip-thought vectors. - - Args: - sess: TensorFlow Session. - data: A list of input strings. - use_norm: Whether to normalize skip-thought vectors to unit L2 norm. - verbose: Whether to log every batch. - batch_size: Batch size for the encoder. - use_eos: Whether to append the end-of-sentence word to each input - sentence. - - Returns: - thought_vectors: A list of numpy arrays corresponding to the skip-thought - encodings of sentences in 'data'. - """ - data = self._preprocess(data, use_eos) - thought_vectors = [] - - batch_indices = np.arange(0, len(data), batch_size) - for batch, start_index in enumerate(batch_indices): - if verbose: - tf.logging.info("Batch %d / %d.", batch, len(batch_indices)) - - embeddings, mask = _batch_and_pad( - data[start_index:start_index + batch_size]) - feed_dict = { - "encode_emb:0": embeddings, - "encode_mask:0": mask, - } - thought_vectors.extend( - sess.run("encoder/thought_vectors:0", feed_dict=feed_dict)) - - if use_norm: - thought_vectors = [v / np.linalg.norm(v) for v in thought_vectors] - - return thought_vectors diff --git a/research/skip_thoughts/skip_thoughts/skip_thoughts_model.py b/research/skip_thoughts/skip_thoughts/skip_thoughts_model.py deleted file mode 100644 index 9a9a43a4fed0dbbb03affd26ffa1c635c386aa55..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/skip_thoughts_model.py +++ /dev/null @@ -1,369 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Skip-Thoughts model for learning sentence vectors. - -The model is based on the paper: - - "Skip-Thought Vectors" - Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel, - Antonio Torralba, Raquel Urtasun, Sanja Fidler. - https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf - -Layer normalization is applied based on the paper: - - "Layer Normalization" - Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton - https://arxiv.org/abs/1607.06450 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from skip_thoughts.ops import gru_cell -from skip_thoughts.ops import input_ops - - -def random_orthonormal_initializer(shape, dtype=tf.float32, - partition_info=None): # pylint: disable=unused-argument - """Variable initializer that produces a random orthonormal matrix.""" - if len(shape) != 2 or shape[0] != shape[1]: - raise ValueError("Expecting square shape, got %s" % shape) - _, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True) - return u - - -class SkipThoughtsModel(object): - """Skip-thoughts model.""" - - def __init__(self, config, mode="train", input_reader=None): - """Basic setup. The actual TensorFlow graph is constructed in build(). - - Args: - config: Object containing configuration parameters. - mode: "train", "eval" or "encode". - input_reader: Subclass of tf.ReaderBase for reading the input serialized - tf.Example protocol buffers. Defaults to TFRecordReader. - - Raises: - ValueError: If mode is invalid. - """ - if mode not in ["train", "eval", "encode"]: - raise ValueError("Unrecognized mode: %s" % mode) - - self.config = config - self.mode = mode - self.reader = input_reader if input_reader else tf.TFRecordReader() - - # Initializer used for non-recurrent weights. - self.uniform_initializer = tf.random_uniform_initializer( - minval=-self.config.uniform_init_scale, - maxval=self.config.uniform_init_scale) - - # Input sentences represented as sequences of word ids. "encode" is the - # source sentence, "decode_pre" is the previous sentence and "decode_post" - # is the next sentence. - # Each is an int64 Tensor with shape [batch_size, padded_length]. - self.encode_ids = None - self.decode_pre_ids = None - self.decode_post_ids = None - - # Boolean masks distinguishing real words (1) from padded words (0). - # Each is an int32 Tensor with shape [batch_size, padded_length]. - self.encode_mask = None - self.decode_pre_mask = None - self.decode_post_mask = None - - # Input sentences represented as sequences of word embeddings. - # Each is a float32 Tensor with shape [batch_size, padded_length, emb_dim]. - self.encode_emb = None - self.decode_pre_emb = None - self.decode_post_emb = None - - # The output from the sentence encoder. - # A float32 Tensor with shape [batch_size, num_gru_units]. - self.thought_vectors = None - - # The cross entropy losses and corresponding weights of the decoders. Used - # for evaluation. - self.target_cross_entropy_losses = [] - self.target_cross_entropy_loss_weights = [] - - # The total loss to optimize. - self.total_loss = None - - def build_inputs(self): - """Builds the ops for reading input data. - - Outputs: - self.encode_ids - self.decode_pre_ids - self.decode_post_ids - self.encode_mask - self.decode_pre_mask - self.decode_post_mask - """ - if self.mode == "encode": - # Word embeddings are fed from an external vocabulary which has possibly - # been expanded (see vocabulary_expansion.py). - encode_ids = None - decode_pre_ids = None - decode_post_ids = None - encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask") - decode_pre_mask = None - decode_post_mask = None - else: - # Prefetch serialized tf.Example protos. - input_queue = input_ops.prefetch_input_data( - self.reader, - self.config.input_file_pattern, - shuffle=self.config.shuffle_input_data, - capacity=self.config.input_queue_capacity, - num_reader_threads=self.config.num_input_reader_threads) - - # Deserialize a batch. - serialized = input_queue.dequeue_many(self.config.batch_size) - encode, decode_pre, decode_post = input_ops.parse_example_batch( - serialized) - - encode_ids = encode.ids - decode_pre_ids = decode_pre.ids - decode_post_ids = decode_post.ids - - encode_mask = encode.mask - decode_pre_mask = decode_pre.mask - decode_post_mask = decode_post.mask - - self.encode_ids = encode_ids - self.decode_pre_ids = decode_pre_ids - self.decode_post_ids = decode_post_ids - - self.encode_mask = encode_mask - self.decode_pre_mask = decode_pre_mask - self.decode_post_mask = decode_post_mask - - def build_word_embeddings(self): - """Builds the word embeddings. - - Inputs: - self.encode_ids - self.decode_pre_ids - self.decode_post_ids - - Outputs: - self.encode_emb - self.decode_pre_emb - self.decode_post_emb - """ - if self.mode == "encode": - # Word embeddings are fed from an external vocabulary which has possibly - # been expanded (see vocabulary_expansion.py). - encode_emb = tf.placeholder(tf.float32, ( - None, None, self.config.word_embedding_dim), "encode_emb") - # No sequences to decode. - decode_pre_emb = None - decode_post_emb = None - else: - word_emb = tf.get_variable( - name="word_embedding", - shape=[self.config.vocab_size, self.config.word_embedding_dim], - initializer=self.uniform_initializer) - - encode_emb = tf.nn.embedding_lookup(word_emb, self.encode_ids) - decode_pre_emb = tf.nn.embedding_lookup(word_emb, self.decode_pre_ids) - decode_post_emb = tf.nn.embedding_lookup(word_emb, self.decode_post_ids) - - self.encode_emb = encode_emb - self.decode_pre_emb = decode_pre_emb - self.decode_post_emb = decode_post_emb - - def _initialize_gru_cell(self, num_units): - """Initializes a GRU cell. - - The Variables of the GRU cell are initialized in a way that exactly matches - the skip-thoughts paper: recurrent weights are initialized from random - orthonormal matrices and non-recurrent weights are initialized from random - uniform matrices. - - Args: - num_units: Number of output units. - - Returns: - cell: An instance of RNNCell with variable initializers that match the - skip-thoughts paper. - """ - return gru_cell.LayerNormGRUCell( - num_units, - w_initializer=self.uniform_initializer, - u_initializer=random_orthonormal_initializer, - b_initializer=tf.constant_initializer(0.0)) - - def build_encoder(self): - """Builds the sentence encoder. - - Inputs: - self.encode_emb - self.encode_mask - - Outputs: - self.thought_vectors - - Raises: - ValueError: if config.bidirectional_encoder is True and config.encoder_dim - is odd. - """ - with tf.variable_scope("encoder") as scope: - length = tf.to_int32(tf.reduce_sum(self.encode_mask, 1), name="length") - - if self.config.bidirectional_encoder: - if self.config.encoder_dim % 2: - raise ValueError( - "encoder_dim must be even when using a bidirectional encoder.") - num_units = self.config.encoder_dim // 2 - cell_fw = self._initialize_gru_cell(num_units) # Forward encoder - cell_bw = self._initialize_gru_cell(num_units) # Backward encoder - _, states = tf.nn.bidirectional_dynamic_rnn( - cell_fw=cell_fw, - cell_bw=cell_bw, - inputs=self.encode_emb, - sequence_length=length, - dtype=tf.float32, - scope=scope) - thought_vectors = tf.concat(states, 1, name="thought_vectors") - else: - cell = self._initialize_gru_cell(self.config.encoder_dim) - _, state = tf.nn.dynamic_rnn( - cell=cell, - inputs=self.encode_emb, - sequence_length=length, - dtype=tf.float32, - scope=scope) - # Use an identity operation to name the Tensor in the Graph. - thought_vectors = tf.identity(state, name="thought_vectors") - - self.thought_vectors = thought_vectors - - def _build_decoder(self, name, embeddings, targets, mask, initial_state, - reuse_logits): - """Builds a sentence decoder. - - Args: - name: Decoder name. - embeddings: Batch of sentences to decode; a float32 Tensor with shape - [batch_size, padded_length, emb_dim]. - targets: Batch of target word ids; an int64 Tensor with shape - [batch_size, padded_length]. - mask: A 0/1 Tensor with shape [batch_size, padded_length]. - initial_state: Initial state of the GRU. A float32 Tensor with shape - [batch_size, num_gru_cells]. - reuse_logits: Whether to reuse the logits weights. - """ - # Decoder RNN. - cell = self._initialize_gru_cell(self.config.encoder_dim) - with tf.variable_scope(name) as scope: - # Add a padding word at the start of each sentence (to correspond to the - # prediction of the first word) and remove the last word. - decoder_input = tf.pad( - embeddings[:, :-1, :], [[0, 0], [1, 0], [0, 0]], name="input") - length = tf.reduce_sum(mask, 1, name="length") - decoder_output, _ = tf.nn.dynamic_rnn( - cell=cell, - inputs=decoder_input, - sequence_length=length, - initial_state=initial_state, - scope=scope) - - # Stack batch vertically. - decoder_output = tf.reshape(decoder_output, [-1, self.config.encoder_dim]) - targets = tf.reshape(targets, [-1]) - weights = tf.to_float(tf.reshape(mask, [-1])) - - # Logits. - with tf.variable_scope("logits", reuse=reuse_logits) as scope: - logits = tf.contrib.layers.fully_connected( - inputs=decoder_output, - num_outputs=self.config.vocab_size, - activation_fn=None, - weights_initializer=self.uniform_initializer, - scope=scope) - - losses = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=targets, logits=logits) - batch_loss = tf.reduce_sum(losses * weights) - tf.losses.add_loss(batch_loss) - - tf.summary.scalar("losses/" + name, batch_loss) - - self.target_cross_entropy_losses.append(losses) - self.target_cross_entropy_loss_weights.append(weights) - - def build_decoders(self): - """Builds the sentence decoders. - - Inputs: - self.decode_pre_emb - self.decode_post_emb - self.decode_pre_ids - self.decode_post_ids - self.decode_pre_mask - self.decode_post_mask - self.thought_vectors - - Outputs: - self.target_cross_entropy_losses - self.target_cross_entropy_loss_weights - """ - if self.mode != "encode": - # Pre-sentence decoder. - self._build_decoder("decoder_pre", self.decode_pre_emb, - self.decode_pre_ids, self.decode_pre_mask, - self.thought_vectors, False) - - # Post-sentence decoder. Logits weights are reused. - self._build_decoder("decoder_post", self.decode_post_emb, - self.decode_post_ids, self.decode_post_mask, - self.thought_vectors, True) - - def build_loss(self): - """Builds the loss Tensor. - - Outputs: - self.total_loss - """ - if self.mode != "encode": - total_loss = tf.losses.get_total_loss() - tf.summary.scalar("losses/total", total_loss) - - self.total_loss = total_loss - - def build_global_step(self): - """Builds the global step Tensor. - - Outputs: - self.global_step - """ - self.global_step = tf.contrib.framework.create_global_step() - - def build(self): - """Creates all ops for training, evaluation or encoding.""" - self.build_inputs() - self.build_word_embeddings() - self.build_encoder() - self.build_decoders() - self.build_loss() - self.build_global_step() diff --git a/research/skip_thoughts/skip_thoughts/skip_thoughts_model_test.py b/research/skip_thoughts/skip_thoughts/skip_thoughts_model_test.py deleted file mode 100644 index 7bd64326d9d9cdcaae11d74ac8831adac915dfe2..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/skip_thoughts_model_test.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorflow_models.skip_thoughts.skip_thoughts_model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import numpy as np -import tensorflow as tf - -from skip_thoughts import configuration -from skip_thoughts import skip_thoughts_model - - -class SkipThoughtsModel(skip_thoughts_model.SkipThoughtsModel): - """Subclass of SkipThoughtsModel without the disk I/O.""" - - def build_inputs(self): - if self.mode == "encode": - # Encode mode doesn't read from disk, so defer to parent. - return super(SkipThoughtsModel, self).build_inputs() - else: - # Replace disk I/O with random Tensors. - self.encode_ids = tf.random_uniform( - [self.config.batch_size, 15], - minval=0, - maxval=self.config.vocab_size, - dtype=tf.int64) - self.decode_pre_ids = tf.random_uniform( - [self.config.batch_size, 15], - minval=0, - maxval=self.config.vocab_size, - dtype=tf.int64) - self.decode_post_ids = tf.random_uniform( - [self.config.batch_size, 15], - minval=0, - maxval=self.config.vocab_size, - dtype=tf.int64) - self.encode_mask = tf.ones_like(self.encode_ids) - self.decode_pre_mask = tf.ones_like(self.decode_pre_ids) - self.decode_post_mask = tf.ones_like(self.decode_post_ids) - - -class SkipThoughtsModelTest(tf.test.TestCase): - - def setUp(self): - super(SkipThoughtsModelTest, self).setUp() - self._model_config = configuration.model_config() - - def _countModelParameters(self): - """Counts the number of parameters in the model at top level scope.""" - counter = {} - for v in tf.global_variables(): - name = v.op.name.split("/")[0] - num_params = v.get_shape().num_elements() - if not num_params: - self.fail("Could not infer num_elements from Variable %s" % v.op.name) - counter[name] = counter.get(name, 0) + num_params - return counter - - def _checkModelParameters(self): - """Verifies the number of parameters in the model.""" - param_counts = self._countModelParameters() - expected_param_counts = { - # vocab_size * embedding_size - "word_embedding": 12400000, - # GRU Cells - "encoder": 21772800, - "decoder_pre": 21772800, - "decoder_post": 21772800, - # (encoder_dim + 1) * vocab_size - "logits": 48020000, - "global_step": 1, - } - self.assertDictEqual(expected_param_counts, param_counts) - - def _checkOutputs(self, expected_shapes, feed_dict=None): - """Verifies that the model produces expected outputs. - - Args: - expected_shapes: A dict mapping Tensor or Tensor name to expected output - shape. - feed_dict: Values of Tensors to feed into Session.run(). - """ - fetches = expected_shapes.keys() - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - outputs = sess.run(fetches, feed_dict) - - for index, output in enumerate(outputs): - tensor = fetches[index] - expected = expected_shapes[tensor] - actual = output.shape - if expected != actual: - self.fail("Tensor %s has shape %s (expected %s)." % (tensor, actual, - expected)) - - def testBuildForTraining(self): - model = SkipThoughtsModel(self._model_config, mode="train") - model.build() - - self._checkModelParameters() - - expected_shapes = { - # [batch_size, length] - model.encode_ids: (128, 15), - model.decode_pre_ids: (128, 15), - model.decode_post_ids: (128, 15), - model.encode_mask: (128, 15), - model.decode_pre_mask: (128, 15), - model.decode_post_mask: (128, 15), - # [batch_size, length, word_embedding_dim] - model.encode_emb: (128, 15, 620), - model.decode_pre_emb: (128, 15, 620), - model.decode_post_emb: (128, 15, 620), - # [batch_size, encoder_dim] - model.thought_vectors: (128, 2400), - # [batch_size * length] - model.target_cross_entropy_losses[0]: (1920,), - model.target_cross_entropy_losses[1]: (1920,), - # [batch_size * length] - model.target_cross_entropy_loss_weights[0]: (1920,), - model.target_cross_entropy_loss_weights[1]: (1920,), - # Scalar - model.total_loss: (), - } - self._checkOutputs(expected_shapes) - - def testBuildForEval(self): - model = SkipThoughtsModel(self._model_config, mode="eval") - model.build() - - self._checkModelParameters() - - expected_shapes = { - # [batch_size, length] - model.encode_ids: (128, 15), - model.decode_pre_ids: (128, 15), - model.decode_post_ids: (128, 15), - model.encode_mask: (128, 15), - model.decode_pre_mask: (128, 15), - model.decode_post_mask: (128, 15), - # [batch_size, length, word_embedding_dim] - model.encode_emb: (128, 15, 620), - model.decode_pre_emb: (128, 15, 620), - model.decode_post_emb: (128, 15, 620), - # [batch_size, encoder_dim] - model.thought_vectors: (128, 2400), - # [batch_size * length] - model.target_cross_entropy_losses[0]: (1920,), - model.target_cross_entropy_losses[1]: (1920,), - # [batch_size * length] - model.target_cross_entropy_loss_weights[0]: (1920,), - model.target_cross_entropy_loss_weights[1]: (1920,), - # Scalar - model.total_loss: (), - } - self._checkOutputs(expected_shapes) - - def testBuildForEncode(self): - model = SkipThoughtsModel(self._model_config, mode="encode") - model.build() - - # Test feeding a batch of word embeddings to get skip thought vectors. - encode_emb = np.random.rand(64, 15, 620) - encode_mask = np.ones((64, 15), dtype=np.int64) - feed_dict = {model.encode_emb: encode_emb, model.encode_mask: encode_mask} - expected_shapes = { - # [batch_size, encoder_dim] - model.thought_vectors: (64, 2400), - } - self._checkOutputs(expected_shapes, feed_dict) - - -if __name__ == "__main__": - tf.test.main() diff --git a/research/skip_thoughts/skip_thoughts/track_perplexity.py b/research/skip_thoughts/skip_thoughts/track_perplexity.py deleted file mode 100644 index 637eaf2c07b5ea705441f146f020887392d9faf3..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/track_perplexity.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tracks training progress via per-word perplexity. - -This script should be run concurrently with training so that summaries show up -in TensorBoard. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import range - -import math -import os.path -import time - - -import numpy as np -import tensorflow as tf - -from skip_thoughts import configuration -from skip_thoughts import skip_thoughts_model - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("input_file_pattern", None, - "File pattern of sharded TFRecord input files.") -tf.flags.DEFINE_string("checkpoint_dir", None, - "Directory containing model checkpoints.") -tf.flags.DEFINE_string("eval_dir", None, "Directory to write event logs to.") - -tf.flags.DEFINE_integer("eval_interval_secs", 600, - "Interval between evaluation runs.") -tf.flags.DEFINE_integer("num_eval_examples", 50000, - "Number of examples for evaluation.") - -tf.flags.DEFINE_integer("min_global_step", 100, - "Minimum global step to run evaluation.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def evaluate_model(sess, losses, weights, num_batches, global_step, - summary_writer, summary_op): - """Computes perplexity-per-word over the evaluation dataset. - - Summaries and perplexity-per-word are written out to the eval directory. - - Args: - sess: Session object. - losses: A Tensor of any shape; the target cross entropy losses for the - current batch. - weights: A Tensor of weights corresponding to losses. - num_batches: Integer; the number of evaluation batches. - global_step: Integer; global step of the model checkpoint. - summary_writer: Instance of SummaryWriter. - summary_op: Op for generating model summaries. - """ - # Log model summaries on a single batch. - summary_str = sess.run(summary_op) - summary_writer.add_summary(summary_str, global_step) - - start_time = time.time() - sum_losses = 0.0 - sum_weights = 0.0 - for i in range(num_batches): - batch_losses, batch_weights = sess.run([losses, weights]) - sum_losses += np.sum(batch_losses * batch_weights) - sum_weights += np.sum(batch_weights) - if not i % 100: - tf.logging.info("Computed losses for %d of %d batches.", i + 1, - num_batches) - eval_time = time.time() - start_time - - perplexity = math.exp(sum_losses / sum_weights) - tf.logging.info("Perplexity = %f (%.2f sec)", perplexity, eval_time) - - # Log perplexity to the SummaryWriter. - summary = tf.Summary() - value = summary.value.add() - value.simple_value = perplexity - value.tag = "perplexity" - summary_writer.add_summary(summary, global_step) - - # Write the Events file to the eval directory. - summary_writer.flush() - tf.logging.info("Finished processing evaluation at global step %d.", - global_step) - - -def run_once(model, losses, weights, saver, summary_writer, summary_op): - """Evaluates the latest model checkpoint. - - Args: - model: Instance of SkipThoughtsModel; the model to evaluate. - losses: Tensor; the target cross entropy losses for the current batch. - weights: A Tensor of weights corresponding to losses. - saver: Instance of tf.train.Saver for restoring model Variables. - summary_writer: Instance of FileWriter. - summary_op: Op for generating model summaries. - """ - model_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) - if not model_path: - tf.logging.info("Skipping evaluation. No checkpoint found in: %s", - FLAGS.checkpoint_dir) - return - - with tf.Session() as sess: - # Load model from checkpoint. - tf.logging.info("Loading model from checkpoint: %s", model_path) - saver.restore(sess, model_path) - global_step = tf.train.global_step(sess, model.global_step.name) - tf.logging.info("Successfully loaded %s at global step = %d.", - os.path.basename(model_path), global_step) - if global_step < FLAGS.min_global_step: - tf.logging.info("Skipping evaluation. Global step = %d < %d", global_step, - FLAGS.min_global_step) - return - - # Start the queue runners. - coord = tf.train.Coordinator() - threads = tf.train.start_queue_runners(coord=coord) - - num_eval_batches = int( - math.ceil(FLAGS.num_eval_examples / model.config.batch_size)) - - # Run evaluation on the latest checkpoint. - try: - evaluate_model(sess, losses, weights, num_eval_batches, global_step, - summary_writer, summary_op) - except tf.InvalidArgumentError: - tf.logging.error( - "Evaluation raised InvalidArgumentError (e.g. due to Nans).") - finally: - coord.request_stop() - coord.join(threads, stop_grace_period_secs=10) - - -def main(unused_argv): - if not FLAGS.input_file_pattern: - raise ValueError("--input_file_pattern is required.") - if not FLAGS.checkpoint_dir: - raise ValueError("--checkpoint_dir is required.") - if not FLAGS.eval_dir: - raise ValueError("--eval_dir is required.") - - # Create the evaluation directory if it doesn't exist. - eval_dir = FLAGS.eval_dir - if not tf.gfile.IsDirectory(eval_dir): - tf.logging.info("Creating eval directory: %s", eval_dir) - tf.gfile.MakeDirs(eval_dir) - - g = tf.Graph() - with g.as_default(): - # Build the model for evaluation. - model_config = configuration.model_config( - input_file_pattern=FLAGS.input_file_pattern, - input_queue_capacity=FLAGS.num_eval_examples, - shuffle_input_data=False) - model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="eval") - model.build() - - losses = tf.concat(model.target_cross_entropy_losses, 0) - weights = tf.concat(model.target_cross_entropy_loss_weights, 0) - - # Create the Saver to restore model Variables. - saver = tf.train.Saver() - - # Create the summary operation and the summary writer. - summary_op = tf.summary.merge_all() - summary_writer = tf.summary.FileWriter(eval_dir) - - g.finalize() - - # Run a new evaluation run every eval_interval_secs. - while True: - start = time.time() - tf.logging.info("Starting evaluation at " + time.strftime( - "%Y-%m-%d-%H:%M:%S", time.localtime())) - run_once(model, losses, weights, saver, summary_writer, summary_op) - time_to_next_eval = start + FLAGS.eval_interval_secs - time.time() - if time_to_next_eval > 0: - time.sleep(time_to_next_eval) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/skip_thoughts/skip_thoughts/train.py b/research/skip_thoughts/skip_thoughts/train.py deleted file mode 100644 index 445f31c5a8fe9d1c6084ccacb2109449839f1bd5..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/train.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Train the skip-thoughts model.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -from skip_thoughts import configuration -from skip_thoughts import skip_thoughts_model - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("input_file_pattern", None, - "File pattern of sharded TFRecord files containing " - "tf.Example protos.") -tf.flags.DEFINE_string("train_dir", None, - "Directory for saving and loading checkpoints.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def _setup_learning_rate(config, global_step): - """Sets up the learning rate with optional exponential decay. - - Args: - config: Object containing learning rate configuration parameters. - global_step: Tensor; the global step. - - Returns: - learning_rate: Tensor; the learning rate with exponential decay. - """ - if config.learning_rate_decay_factor > 0: - learning_rate = tf.train.exponential_decay( - learning_rate=float(config.learning_rate), - global_step=global_step, - decay_steps=config.learning_rate_decay_steps, - decay_rate=config.learning_rate_decay_factor, - staircase=False) - else: - learning_rate = tf.constant(config.learning_rate) - return learning_rate - - -def main(unused_argv): - if not FLAGS.input_file_pattern: - raise ValueError("--input_file_pattern is required.") - if not FLAGS.train_dir: - raise ValueError("--train_dir is required.") - - model_config = configuration.model_config( - input_file_pattern=FLAGS.input_file_pattern) - training_config = configuration.training_config() - - tf.logging.info("Building training graph.") - g = tf.Graph() - with g.as_default(): - model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="train") - model.build() - - learning_rate = _setup_learning_rate(training_config, model.global_step) - optimizer = tf.train.AdamOptimizer(learning_rate) - - train_tensor = tf.contrib.slim.learning.create_train_op( - total_loss=model.total_loss, - optimizer=optimizer, - global_step=model.global_step, - clip_gradient_norm=training_config.clip_gradient_norm) - - saver = tf.train.Saver() - - tf.contrib.slim.learning.train( - train_op=train_tensor, - logdir=FLAGS.train_dir, - graph=g, - global_step=model.global_step, - number_of_steps=training_config.number_of_steps, - save_summaries_secs=training_config.save_summaries_secs, - saver=saver, - save_interval_secs=training_config.save_model_secs) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/skip_thoughts/skip_thoughts/vocabulary_expansion.py b/research/skip_thoughts/skip_thoughts/vocabulary_expansion.py deleted file mode 100644 index 0d6c8e2bc227c4dd86e73fe9b00daafeffd4c2a5..0000000000000000000000000000000000000000 --- a/research/skip_thoughts/skip_thoughts/vocabulary_expansion.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Compute an expanded vocabulary of embeddings using a word2vec model. - -This script loads the word embeddings from a trained skip-thoughts model and -from a trained word2vec model (typically with a larger vocabulary). It trains a -linear regression model without regularization to learn a linear mapping from -the word2vec embedding space to the skip-thoughts embedding space. The model is -then applied to all words in the word2vec vocabulary, yielding vectors in the -skip-thoughts word embedding space for the union of the two vocabularies. - -The linear regression task is to learn a parameter matrix W to minimize - || X - Y * W ||^2, -where X is a matrix of skip-thoughts embeddings of shape [num_words, dim1], -Y is a matrix of word2vec embeddings of shape [num_words, dim2], and W is a -matrix of shape [dim2, dim1]. - -This is based on the "Translation Matrix" method from the paper: - - "Exploiting Similarities among Languages for Machine Translation" - Tomas Mikolov, Quoc V. Le, Ilya Sutskever - https://arxiv.org/abs/1309.4168 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import os.path - - -import gensim.models -import numpy as np -import sklearn.linear_model -import tensorflow as tf - -FLAGS = tf.flags.FLAGS - -tf.flags.DEFINE_string("skip_thoughts_model", None, - "Checkpoint file or directory containing a checkpoint " - "file.") - -tf.flags.DEFINE_string("skip_thoughts_vocab", None, - "Path to vocabulary file containing a list of newline-" - "separated words where the word id is the " - "corresponding 0-based index in the file.") - -tf.flags.DEFINE_string("word2vec_model", None, - "File containing a word2vec model in binary format.") - -tf.flags.DEFINE_string("output_dir", None, "Output directory.") - -tf.logging.set_verbosity(tf.logging.INFO) - - -def _load_skip_thoughts_embeddings(checkpoint_path): - """Loads the embedding matrix from a skip-thoughts model checkpoint. - - Args: - checkpoint_path: Model checkpoint file or directory containing a checkpoint - file. - - Returns: - word_embedding: A numpy array of shape [vocab_size, embedding_dim]. - - Raises: - ValueError: If no checkpoint file matches checkpoint_path. - """ - if tf.gfile.IsDirectory(checkpoint_path): - checkpoint_file = tf.train.latest_checkpoint(checkpoint_path) - if not checkpoint_file: - raise ValueError("No checkpoint file found in %s" % checkpoint_path) - else: - checkpoint_file = checkpoint_path - - tf.logging.info("Loading skip-thoughts embedding matrix from %s", - checkpoint_file) - reader = tf.train.NewCheckpointReader(checkpoint_file) - word_embedding = reader.get_tensor("word_embedding") - tf.logging.info("Loaded skip-thoughts embedding matrix of shape %s", - word_embedding.shape) - - return word_embedding - - -def _load_vocabulary(filename): - """Loads a vocabulary file. - - Args: - filename: Path to text file containing newline-separated words. - - Returns: - vocab: A dictionary mapping word to word id. - """ - tf.logging.info("Reading vocabulary from %s", filename) - vocab = collections.OrderedDict() - with tf.gfile.GFile(filename, mode="rb") as f: - for i, line in enumerate(f): - word = line.decode("utf-8").strip() - assert word not in vocab, "Attempting to add word twice: %s" % word - vocab[word] = i - tf.logging.info("Read vocabulary of size %d", len(vocab)) - return vocab - - -def _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab, word2vec): - """Runs vocabulary expansion on a skip-thoughts model using a word2vec model. - - Args: - skip_thoughts_emb: A numpy array of shape [skip_thoughts_vocab_size, - skip_thoughts_embedding_dim]. - skip_thoughts_vocab: A dictionary of word to id. - word2vec: An instance of gensim.models.Word2Vec. - - Returns: - combined_emb: A dictionary mapping words to embedding vectors. - """ - # Find words shared between the two vocabularies. - tf.logging.info("Finding shared words") - shared_words = [w for w in word2vec.vocab if w in skip_thoughts_vocab] - - # Select embedding vectors for shared words. - tf.logging.info("Selecting embeddings for %d shared words", len(shared_words)) - shared_st_emb = skip_thoughts_emb[[ - skip_thoughts_vocab[w] for w in shared_words - ]] - shared_w2v_emb = word2vec[shared_words] - - # Train a linear regression model on the shared embedding vectors. - tf.logging.info("Training linear regression model") - model = sklearn.linear_model.LinearRegression() - model.fit(shared_w2v_emb, shared_st_emb) - - # Create the expanded vocabulary. - tf.logging.info("Creating embeddings for expanded vocabuary") - combined_emb = collections.OrderedDict() - for w in word2vec.vocab: - # Ignore words with underscores (spaces). - if "_" not in w: - w_emb = model.predict(word2vec[w].reshape(1, -1)) - combined_emb[w] = w_emb.reshape(-1) - - for w in skip_thoughts_vocab: - combined_emb[w] = skip_thoughts_emb[skip_thoughts_vocab[w]] - - tf.logging.info("Created expanded vocabulary of %d words", len(combined_emb)) - - return combined_emb - - -def main(unused_argv): - if not FLAGS.skip_thoughts_model: - raise ValueError("--skip_thoughts_model is required.") - if not FLAGS.skip_thoughts_vocab: - raise ValueError("--skip_thoughts_vocab is required.") - if not FLAGS.word2vec_model: - raise ValueError("--word2vec_model is required.") - if not FLAGS.output_dir: - raise ValueError("--output_dir is required.") - - if not tf.gfile.IsDirectory(FLAGS.output_dir): - tf.gfile.MakeDirs(FLAGS.output_dir) - - # Load the skip-thoughts embeddings and vocabulary. - skip_thoughts_emb = _load_skip_thoughts_embeddings(FLAGS.skip_thoughts_model) - skip_thoughts_vocab = _load_vocabulary(FLAGS.skip_thoughts_vocab) - - # Load the Word2Vec model. - word2vec = gensim.models.KeyedVectors.load_word2vec_format( - FLAGS.word2vec_model, binary=True) - - # Run vocabulary expansion. - embedding_map = _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab, - word2vec) - - # Save the output. - vocab = embedding_map.keys() - vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt") - with tf.gfile.GFile(vocab_file, "w") as f: - f.write("\n".join(vocab)) - tf.logging.info("Wrote vocabulary file to %s", vocab_file) - - embeddings = np.array(embedding_map.values()) - embeddings_file = os.path.join(FLAGS.output_dir, "embeddings.npy") - np.save(embeddings_file, embeddings) - tf.logging.info("Wrote embeddings file to %s", embeddings_file) - - -if __name__ == "__main__": - tf.app.run() diff --git a/research/slim/datasets/imagenet.py b/research/slim/datasets/imagenet.py index cd7a19e51e79d559c3783031f0e040d02737a770..14b3c551f3c95b89f279f3119d3103834fcb26a6 100644 --- a/research/slim/datasets/imagenet.py +++ b/research/slim/datasets/imagenet.py @@ -86,7 +86,7 @@ def create_readable_names_for_imagenet_labels(): """ # pylint: disable=g-line-too-long - base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/' + base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/' synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url) synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url) diff --git a/research/slim/nets/mobilenet/mobilenet.py b/research/slim/nets/mobilenet/mobilenet.py index 55912d31f440adb97b38556f7defde239058bcd0..3969124b3a4337b56a80b4fffcddb4ab4a0e0d03 100644 --- a/research/slim/nets/mobilenet/mobilenet.py +++ b/research/slim/nets/mobilenet/mobilenet.py @@ -127,7 +127,7 @@ class NoOpScope(object): def safe_arg_scope(funcs, **kwargs): """Returns `slim.arg_scope` with all None arguments removed. - Arguments: + Args: funcs: Functions to pass to `arg_scope`. **kwargs: Arguments to pass to `arg_scope`. diff --git a/research/slim/nets/mobilenet/mobilenet_example.ipynb b/research/slim/nets/mobilenet/mobilenet_example.ipynb index 6bbfec6f1ed60cabe5f6357346ac7ae53b9e84bd..d66eac098f05008850d45c2c6e21b7f3534df06b 100644 --- a/research/slim/nets/mobilenet/mobilenet_example.ipynb +++ b/research/slim/nets/mobilenet/mobilenet_example.ipynb @@ -116,7 +116,7 @@ "source": [ "from __future__ import print_function\n", "from IPython import display \n", - "checkpoint_name = 'mobilenet_v2_1.0_224' #@param\n", + "base_name = checkpoint_name = 'mobilenet_v2_1.0_224' #@param\n", "url = 'https://storage.googleapis.com/mobilenet_v2/checkpoints/' + checkpoint_name + '.tgz'\n", "print('Downloading from ', url)\n", "!wget {url}\n", diff --git a/research/slim/nets/mobilenet_v1.py b/research/slim/nets/mobilenet_v1.py index f714d330ffa499a156b0b00841a0f51b4e605071..77cedd07c139a03264b7a26ac52023afe6f9722e 100644 --- a/research/slim/nets/mobilenet_v1.py +++ b/research/slim/nets/mobilenet_v1.py @@ -155,7 +155,7 @@ def _fixed_padding(inputs, kernel_size, rate=1): input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). """ kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), - kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + kernel_size[1] + (kernel_size[1] - 1) * (rate - 1)] pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] pad_beg = [pad_total[0] // 2, pad_total[1] // 2] pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] diff --git a/research/steve/README.md b/research/steve/README.md deleted file mode 100644 index 363be719efe76ac9b70913252ec949cf83bfd906..0000000000000000000000000000000000000000 --- a/research/steve/README.md +++ /dev/null @@ -1,94 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Stochastic Ensemble Value Expansion - -*A hybrid model-based/model-free reinforcement learning algorithm for sample-efficient continuous control.* - -This is the code repository accompanying the paper Sample-Efficient Reinforcement Learning with -Stochastic Ensemble Value Expansion, by Buckman et al. (2018). - -#### Abstract: -Merging model-free and model-based approaches in reinforcement learning has the potential to achieve -the high performance of model-free algorithms with low sample complexity. This is difficult because -an imperfect dynamics model can degrade the performance of the learning algorithm, and in sufficiently -complex environments, the dynamics model will always be imperfect. As a result, a key challenge is to -combine model-based approaches with model-free learning in such a way that errors in the model do not -degrade performance. We propose *stochastic ensemble value expansion* (STEVE), a novel model-based -technique that addresses this issue. By dynamically interpolating between model rollouts of various horizon -lengths for each individual example, STEVE ensures that the model is only utilized when doing so does not -introduce significant errors. Our approach outperforms model-free baselines on challenging continuous -control benchmarks with an order-of-magnitude increase in sample efficiency, and in contrast to previous -model-based approaches, performance does not degrade as the environment gets more complex. - -## Installation -This code is compatible with Ubuntu 16.04 and Python 2.7. There are several prerequisites: -* Numpy, Scipy, and Portalocker: `pip install numpy scipy portalocker` -* TensorFlow 1.6 or above. Instructions can be found on the official TensorFlow page: - [https://www.tensorflow.org/install/install_linux](https://www.tensorflow.org/install/install_linux). - We suggest installing the GPU version of TensorFlow to speed up training. -* OpenAI Gym version 0.9.4. Instructions can be found in the OpenAI Gym repository: - [https://github.com/openai/gym#installation](https://github.com/openai/gym#installation). - Note that you need to replace "pip install gym[all]" with "pip install gym[all]==0.9.4", which - will ensure that you get the correct version of Gym. (The current version of Gym has deprecated - the -v1 MuJoCo environments, which are the environments studied in this paper.) -* MuJoCo version 1.31, which can be downloaded here: [https://www.roboti.us/download/mjpro131_linux.zip](https://www.roboti.us/download/mjpro131_linux.zip). - Simply run: ``` - cd ~; mkdir -p .mujoco; cd .mujoco/; wget https://www.roboti.us/download/mjpro131_linux.zip; unzip mjpro131_linux.zip``` - You also need to get a license, and put the license key in ~/.mujoco/ as well. -* Optionally, Roboschool version 1.1. This is needed only to replicate the Roboschool experiments. - Instructions can be found in the OpenAI Roboschool repository: - [https://github.com/openai/roboschool#installation](https://github.com/openai/roboschool#installation). -* Optionally, MoviePy to render trained agents. Instructions on the MoviePy homepage: - [https://zulko.github.io/moviepy/install.html](https://zulko.github.io/moviepy/install.html). - -## Running Experiments -To run an experiment, run master.py and pass in a config file and GPU ID. For example: ``` -python master.py config/experiments/speedruns/humanoid/speedy_steve0.json 0``` -The `config/experiments/` -directory contains configuration files for all of the experiments run in the paper. - -The GPU ID specifies the GPU that should be used to learn the policy. For model-based approaches, the -next GPU (i.e. GPU_ID+1) is used to learn the worldmodel in parallel. - -To resume an experiment that was interrupted, use the same config file and pass the `--resume` flag: ``` -python master.py config/experiments/speedruns/humanoid/speedy_steve0.json 0 --resume``` - -## Output -For each experiment, two folders are created in the output directory: `//log` -and `//checkpoints`. The log directory contains the following: - -* `hps.json` contains the accumulated hyperparameters of the config file used to generate these results -* `valuerl.log` and `worldmodel.log` contain the log output of the learners. `worldmodel.log` will not - exist if you are not learning a worldmodel. -* `.greedy.csv` records all of the scores of our evaluators. The four columns contain time (hours), - epochs, frames, and score. - -The checkpoints directory contains the most recent versions of the policy and worldmodel, as well as checkpoints -of the policy, worldmodel, and their respective replay buffers at various points throughout training. - -## Code Organization -`master.py` launches four types of processes: a ValueRlLearner to learn the policy, a WorldmodelLearner -to learn the dynamics model, several Interactors to gather data from the environment to train on, and -a few Evaluators to run the greedy policy in the environment and record the score. - -`learner.py` contains a general framework for models which learn from a replay buffer. This is where -most of the code for the overall training loop is located. `valuerl_learner.py` and `worldmodel_learner.py` -contain a small amount of model-specific training loop code. - -`valuerl.py` implements the core model for all value-function-based policy learning techniques studied -in the paper, including DDPG, MVE, STEVE, etc. Similarly, `worldmodel.py` contains the core model for -our dynamics model and reward function. - -`replay.py` contains the code for the replay buffer. `nn.py`, `envwrap.py`, `config.py`, and `util.py` -each contain various helper functions. - -`toy_demo.py` is a self-contained demo, written in numpy, that was used to generate the results for the -toy examples in the first segment of the paper. - -`visualizer.py` is a utility script for loading trained policies and inspecting them. In addition to a -config file and a GPU, it takes the filename of the model to load as a mandatory third argument. - -## Contact -Please contact GitHub user buckman-google (jacobbuckman@gmail.com) with any questions. diff --git a/research/steve/agent.py b/research/steve/agent.py deleted file mode 100644 index 25069e29be1659fdd0f3a19e1a37bc41c9668b8b..0000000000000000000000000000000000000000 --- a/research/steve/agent.py +++ /dev/null @@ -1,143 +0,0 @@ -from __future__ import print_function -from builtins import zip -from builtins import range -from builtins import object -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import tensorflow as tf -import time, os, traceback, multiprocessing, portalocker - -import envwrap -import valuerl -import util -from config import config - - -def run_env(pipe): - env = envwrap.get_env(config["env"]["name"]) - reset = True - while True: - if reset is True: pipe.send(env.reset()) - action = pipe.recv() - obs, reward, done, reset = env.step(action) - pipe.send((obs, reward, done, reset)) - -class AgentManager(object): - """ - Interact with the environment according to the learned policy, - """ - def __init__(self, proc_num, evaluation, policy_lock, batch_size, config): - self.evaluation = evaluation - self.policy_lock = policy_lock - self.batch_size = batch_size - self.config = config - - self.log_path = util.create_directory("%s/%s/%s/%s" % (config["output_root"], config["env"]["name"], config["name"], config["log_path"])) + "/%s" % config["name"] - self.load_path = util.create_directory("%s/%s/%s/%s" % (config["output_root"], config["env"]["name"], config["name"], config["save_model_path"])) - - ## placeholders for intermediate states (basis for rollout) - self.obs_loader = tf.placeholder(tf.float32, [self.batch_size, np.prod(self.config["env"]["obs_dims"])]) - - ## build model - self.valuerl = valuerl.ValueRL(self.config["name"], self.config["env"], self.config["policy_config"]) - self.policy_actions = self.valuerl.build_evalution_graph(self.obs_loader, mode="exploit" if self.evaluation else "explore") - - # interactors - self.agent_pipes, self.agent_child_pipes = list(zip(*[multiprocessing.Pipe() for _ in range(self.batch_size)])) - self.agents = [multiprocessing.Process(target=run_env, args=(self.agent_child_pipes[i],)) for i in range(self.batch_size)] - for agent in self.agents: agent.start() - self.obs = [pipe.recv() for pipe in self.agent_pipes] - self.total_rewards = [0. for _ in self.agent_pipes] - self.loaded_policy = False - - self.sess = tf.Session() - self.sess.run(tf.global_variables_initializer()) - - self.rollout_i = 0 - self.proc_num = proc_num - self.epoch = -1 - self.frame_total = 0 - self.hours = 0. - - self.first = True - - def get_action(self, obs): - if self.loaded_policy: - all_actions = self.sess.run(self.policy_actions, feed_dict={self.obs_loader: obs}) - all_actions = np.clip(all_actions, -1., 1.) - return all_actions[:self.batch_size] - else: - return [self.get_random_action() for _ in range(obs.shape[0])] - - def get_random_action(self, *args, **kwargs): - return np.random.random(self.config["env"]["action_dim"]) * 2 - 1 - - def step(self): - actions = self.get_action(np.stack(self.obs)) - self.first = False - [pipe.send(action) for pipe, action in zip(self.agent_pipes, actions)] - next_obs, rewards, dones, resets = list(zip(*[pipe.recv() for pipe in self.agent_pipes])) - - frames = list(zip(self.obs, next_obs, actions, rewards, dones)) - - self.obs = [o if resets[i] is False else self.agent_pipes[i].recv() for i, o in enumerate(next_obs)] - - for i, (t,r,reset) in enumerate(zip(self.total_rewards, rewards, resets)): - if reset: - self.total_rewards[i] = 0. - if self.evaluation and self.loaded_policy: - with portalocker.Lock(self.log_path+'.greedy.csv', mode="a") as f: f.write("%2f,%d,%d,%2f\n" % (self.hours, self.epoch, self.frame_total, t+r)) - - else: - self.total_rewards[i] = t + r - - if self.evaluation and np.any(resets): self.reload() - - self.rollout_i += 1 - return frames - - def reload(self): - if not os.path.exists("%s/%s.params.index" % (self.load_path ,self.valuerl.saveid)): return False - with self.policy_lock: - self.valuerl.load(self.sess, self.load_path) - self.epoch, self.frame_total, self.hours = self.sess.run([self.valuerl.epoch_n, self.valuerl.frame_n, self.valuerl.hours]) - self.loaded_policy = True - self.first = True - return True - -def main(proc_num, evaluation, policy_replay_frame_queue, model_replay_frame_queue, policy_lock, config): - try: - np.random.seed((proc_num * int(time.time())) % (2 ** 32 - 1)) - agentmanager = AgentManager(proc_num, evaluation, policy_lock, config["evaluator_config"]["batch_size"] if evaluation else config["agent_config"]["batch_size"], config) - frame_i = 0 - while True: - new_frames = agentmanager.step() - if not evaluation: - policy_replay_frame_queue.put(new_frames) - if model_replay_frame_queue is not None: model_replay_frame_queue.put(new_frames) - if frame_i % config["agent_config"]["reload_every_n"] == 0: agentmanager.reload() - frame_i += len(new_frames) - - except Exception as e: - print('Caught exception in agent process %d' % proc_num) - traceback.print_exc() - print() - try: - for i in agentmanager.agents: i.join() - except: - pass - raise e diff --git a/research/steve/config.py b/research/steve/config.py deleted file mode 100644 index 4a6da98c375e9e78e2f4ffd63c70c66b09ad1631..0000000000000000000000000000000000000000 --- a/research/steve/config.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import print_function -from builtins import str -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import argparse, json, util, traceback - -parser = argparse.ArgumentParser() -parser.add_argument("config") -parser.add_argument("root_gpu", type=int) -parser.add_argument("--resume", action="store_true") -args = parser.parse_args() - -config_loc = args.config -config = util.ConfigDict(config_loc) - -config["name"] = config_loc.split("/")[-1][:-5] -config["resume"] = args.resume - -cstr = str(config) - -def log_config(): - HPS_PATH = util.create_directory("output/" + config["env"]["name"] + "/" + config["name"] + "/" + config["log_path"]) + "/hps.json" - print("ROOT GPU: " + str(args.root_gpu) + "\n" + str(cstr)) - with open(HPS_PATH, "w") as f: - f.write("ROOT GPU: " + str(args.root_gpu) + "\n" + str(cstr)) \ No newline at end of file diff --git a/research/steve/config/algos/ddpg.json b/research/steve/config/algos/ddpg.json deleted file mode 100644 index e76c106986dc46b1fb37697eafff0965390e2061..0000000000000000000000000000000000000000 --- a/research/steve/config/algos/ddpg.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "inherits": ["config/core/basic.json"] -} \ No newline at end of file diff --git a/research/steve/config/algos/mve_mean.json b/research/steve/config/algos/mve_mean.json deleted file mode 100644 index 729bccc62efe4549041af8bdf13df17c1cebe78e..0000000000000000000000000000000000000000 --- a/research/steve/config/algos/mve_mean.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "inherits": [ - "config/core/basic.json", - "config/core/model.json" - ], - "updates":{ - "policy_config": { - "value_expansion": { - "rollout_len": 3, - "mean_k_return": true - } - } - } -} \ No newline at end of file diff --git a/research/steve/config/algos/mve_tdk.json b/research/steve/config/algos/mve_tdk.json deleted file mode 100644 index 222fd40c3c928c4a26d0082797e9337e6edf45d2..0000000000000000000000000000000000000000 --- a/research/steve/config/algos/mve_tdk.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "inherits": [ - "config/core/basic.json", - "config/core/model.json" - ], - "updates":{ - "policy_config": { - "value_expansion": { - "rollout_len": 3, - "tdk_trick": true - } - } - } -} \ No newline at end of file diff --git a/research/steve/config/algos/mve_tdlambda.json b/research/steve/config/algos/mve_tdlambda.json deleted file mode 100644 index 3414dda5d00aedce9673643e4b3d9911facde24d..0000000000000000000000000000000000000000 --- a/research/steve/config/algos/mve_tdlambda.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "inherits": [ - "config/core/basic.json", - "config/core/model.json" - ], - "updates":{ - "policy_config": { - "value_expansion": { - "rollout_len": 3, - "lambda_return": 0.25 - } - } - } -} \ No newline at end of file diff --git a/research/steve/config/algos/steve.json b/research/steve/config/algos/steve.json deleted file mode 100644 index ca2bc0395f98eda6ce2460fc79014492bb771589..0000000000000000000000000000000000000000 --- a/research/steve/config/algos/steve.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "inherits": [ - "config/core/basic.json", - "config/core/model.json", - "config/core/bayesian.json" - ], - "updates":{ - "policy_config": { - "value_expansion": { - "rollout_len": 3, - "steve_reweight": true - } - } - } -} \ No newline at end of file diff --git a/research/steve/config/algos/steve_cov.json b/research/steve/config/algos/steve_cov.json deleted file mode 100644 index 4dbf46e19c13f479af4a2ea132b4019a991144a7..0000000000000000000000000000000000000000 --- a/research/steve/config/algos/steve_cov.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "inherits": [ - "config/core/basic.json", - "config/core/model.json", - "config/core/bayesian.json" - ], - "updates":{ - "policy_config": { - "value_expansion": { - "rollout_len": 3, - "steve_reweight": true, - "covariances": true - } - } - } -} \ No newline at end of file diff --git a/research/steve/config/core/basic.json b/research/steve/config/core/basic.json deleted file mode 100644 index 411e7b65e5303bf28c1a99b9dd0442575b6be74c..0000000000000000000000000000000000000000 --- a/research/steve/config/core/basic.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "updates": { - "output_root": "output", - "save_model_path": "checkpoints", - "log_path": "log", - - "agent_config": { - "count": 1, - "batch_size": 8, - "reload_every_n": 1, - "full_random_n": 10000 - }, - - "evaluator_config": { - "count": 2, - "batch_size": 1 - }, - - "policy_config": { - "algo": "ddpg", - "hidden_dim": 128, - "explore_chance": 0.05, - "batch_size": 512, - "replay_size": 1000000, - "frames_before_learning": 10000, - "log_every_n": 500, - "epoch_every_n": 500, - "backup_every_n": 2500000, - "frames_per_update": 0.25 - } - } -} \ No newline at end of file diff --git a/research/steve/config/core/bayesian.json b/research/steve/config/core/bayesian.json deleted file mode 100644 index ea7d955434c51c91376325ad7791836f59931086..0000000000000000000000000000000000000000 --- a/research/steve/config/core/bayesian.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "updates": { - "policy_config": { - "bayesian": { - "ensemble_size": 4, - "train_sample_count": 4, - "eval_sample_count": 4 - } - }, - - "*model_config": { - "bayesian": { - "transition": { - "ensemble_size": 4, - "train_sample_count": 4, - "eval_sample_count": 4 - }, - "reward": { - "ensemble_size": 4, - "train_sample_count": 4, - "eval_sample_count": 4 - } - } - } - } -} \ No newline at end of file diff --git a/research/steve/config/core/model.json b/research/steve/config/core/model.json deleted file mode 100644 index 485146ab0242fde093796b8c920a33aed0baaf48..0000000000000000000000000000000000000000 --- a/research/steve/config/core/model.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "updates": { - "model_config": { - "transition_hidden_dim": 512, - "aux_hidden_dim": 128, - "batch_size": 512, - "replay_size": 1000000, - "frames_before_learning": 10000, - "log_every_n": 500, - "epoch_every_n": 500, - "backup_every_n": 2500000, - "pretrain_n": 10000, - "frames_per_update": 0.25 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/flagrun.json b/research/steve/config/envs/flagrun.json deleted file mode 100644 index 09ecc7cdeb893dbe5e1ee1d264848768689b60e0..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/flagrun.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "RoboschoolHumanoidFlagrun-v1", - "obs_dims": [44], - "action_dim": 17, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/halfcheetah.json b/research/steve/config/envs/halfcheetah.json deleted file mode 100644 index e0c9b38971d49b27071d8fd875f40ba391e77fbf..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/halfcheetah.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "HalfCheetah-v1", - "obs_dims": [17], - "action_dim": 6, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/hardcore.json b/research/steve/config/envs/hardcore.json deleted file mode 100644 index af372b28b689422e7ff0e2ccd1d3809f4712cb08..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/hardcore.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "BipedalWalkerHardcore-v2", - "obs_dims": [24], - "action_dim": 4, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/hopper.json b/research/steve/config/envs/hopper.json deleted file mode 100644 index 012def18593b98bdc7978e532d758d59cc3dbb4d..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/hopper.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "Hopper-v1", - "obs_dims": [11], - "action_dim": 3, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/humanoid.json b/research/steve/config/envs/humanoid.json deleted file mode 100644 index 39aeeb29517a255b9c241c9b7fd37e6870b85bba..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/humanoid.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "Humanoid-v1", - "obs_dims": [376], - "action_dim": 17, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/rshum.json b/research/steve/config/envs/rshum.json deleted file mode 100644 index 0ad54b2bfc3087643156b2b855eb5dbd5202ef77..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/rshum.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "RoboschoolHumanoid-v1", - "obs_dims": [44], - "action_dim": 17, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/swimmer.json b/research/steve/config/envs/swimmer.json deleted file mode 100644 index 0fcf2f32e793dbbb61981b5a9bdd0b4636fcd636..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/swimmer.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "Swimmer-v1", - "obs_dims": [8], - "action_dim": 2, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/envs/walker2d.json b/research/steve/config/envs/walker2d.json deleted file mode 100644 index 03ed94f7466069f7c7ec1f1fc2e5c8f1be3b398b..0000000000000000000000000000000000000000 --- a/research/steve/config/envs/walker2d.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "updates": { - "env": { - "name": "Walker2d-v1", - "obs_dims": [17], - "action_dim": 6, - "reward_scale":1.0, - "discount":0.99, - "max_frames": 1000 - } - } -} \ No newline at end of file diff --git a/research/steve/config/experimental_setups/speedrun.json b/research/steve/config/experimental_setups/speedrun.json deleted file mode 100644 index b34a9b706f919bffb58edc3345995112905d7ef3..0000000000000000000000000000000000000000 --- a/research/steve/config/experimental_setups/speedrun.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "updates": { - "policy_config": { - "frames_per_update": false - }, - "*model_config":{ - "frames_per_update": false, - "pretrain_n": false - } - } -} \ No newline at end of file diff --git a/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk0.json b/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk0.json deleted file mode 100644 index da54f6310d2d3920b5b7dc2b121c5b81dee5fd99..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/core/bayesian", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk1.json b/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk1.json deleted file mode 100644 index da54f6310d2d3920b5b7dc2b121c5b81dee5fd99..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/core/bayesian", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk2.json b/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk2.json deleted file mode 100644 index da54f6310d2d3920b5b7dc2b121c5b81dee5fd99..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/ensemble_mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/core/bayesian", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/mve_25tdlambda0.json b/research/steve/config/experiments/ablations/baselines/mve_25tdlambda0.json deleted file mode 100644 index b9e3dcd4bcecddaac7f11c1547e522dae720fe0e..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_25tdlambda0.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/mve_tdlambda.json", "config/envs/humanoid.json"], - "updates":{ - "policy_config": { - "value_expansion": { - "lambda_return": 0.25 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/baselines/mve_25tdlambda1.json b/research/steve/config/experiments/ablations/baselines/mve_25tdlambda1.json deleted file mode 100644 index b9e3dcd4bcecddaac7f11c1547e522dae720fe0e..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_25tdlambda1.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/mve_tdlambda.json", "config/envs/humanoid.json"], - "updates":{ - "policy_config": { - "value_expansion": { - "lambda_return": 0.25 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/baselines/mve_25tdlambda2.json b/research/steve/config/experiments/ablations/baselines/mve_25tdlambda2.json deleted file mode 100644 index b9e3dcd4bcecddaac7f11c1547e522dae720fe0e..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_25tdlambda2.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/mve_tdlambda.json", "config/envs/humanoid.json"], - "updates":{ - "policy_config": { - "value_expansion": { - "lambda_return": 0.25 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/baselines/mve_75tdlambda0.json b/research/steve/config/experiments/ablations/baselines/mve_75tdlambda0.json deleted file mode 100644 index 7366ba77b4df755e819b4701d6bb2923fc1516dc..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_75tdlambda0.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/mve_tdlambda.json", "config/envs/humanoid.json"], - "updates":{ - "policy_config": { - "value_expansion": { - "lambda_return": 0.75 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/baselines/mve_75tdlambda1.json b/research/steve/config/experiments/ablations/baselines/mve_75tdlambda1.json deleted file mode 100644 index 7366ba77b4df755e819b4701d6bb2923fc1516dc..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_75tdlambda1.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/mve_tdlambda.json", "config/envs/humanoid.json"], - "updates":{ - "policy_config": { - "value_expansion": { - "lambda_return": 0.75 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/baselines/mve_75tdlambda2.json b/research/steve/config/experiments/ablations/baselines/mve_75tdlambda2.json deleted file mode 100644 index 7366ba77b4df755e819b4701d6bb2923fc1516dc..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_75tdlambda2.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/mve_tdlambda.json", "config/envs/humanoid.json"], - "updates":{ - "policy_config": { - "value_expansion": { - "lambda_return": 0.75 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/baselines/mve_meank0.json b/research/steve/config/experiments/ablations/baselines/mve_meank0.json deleted file mode 100644 index ce7d9b1ea4fb0742f8947f1a54115ea5704c5a7e..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_meank0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_mean.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/mve_meank1.json b/research/steve/config/experiments/ablations/baselines/mve_meank1.json deleted file mode 100644 index ce7d9b1ea4fb0742f8947f1a54115ea5704c5a7e..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_meank1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_mean.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/mve_meank2.json b/research/steve/config/experiments/ablations/baselines/mve_meank2.json deleted file mode 100644 index ce7d9b1ea4fb0742f8947f1a54115ea5704c5a7e..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/mve_meank2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_mean.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/steve_cov0.json b/research/steve/config/experiments/ablations/baselines/steve_cov0.json deleted file mode 100644 index df2e8a0d8f8785200fd53fff4580fc12fe657c8f..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/steve_cov0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve_cov.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/steve_cov1.json b/research/steve/config/experiments/ablations/baselines/steve_cov1.json deleted file mode 100644 index df2e8a0d8f8785200fd53fff4580fc12fe657c8f..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/steve_cov1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve_cov.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/baselines/steve_cov2.json b/research/steve/config/experiments/ablations/baselines/steve_cov2.json deleted file mode 100644 index df2e8a0d8f8785200fd53fff4580fc12fe657c8f..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/baselines/steve_cov2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve_cov.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/ablations/horizons/steve_1h0.json b/research/steve/config/experiments/ablations/horizons/steve_1h0.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_1h0.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_1h1.json b/research/steve/config/experiments/ablations/horizons/steve_1h1.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_1h1.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_1h2.json b/research/steve/config/experiments/ablations/horizons/steve_1h2.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_1h2.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_2h0.json b/research/steve/config/experiments/ablations/horizons/steve_2h0.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_2h0.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_2h1.json b/research/steve/config/experiments/ablations/horizons/steve_2h1.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_2h1.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_2h2.json b/research/steve/config/experiments/ablations/horizons/steve_2h2.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_2h2.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_5h0.json b/research/steve/config/experiments/ablations/horizons/steve_5h0.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_5h0.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_5h1.json b/research/steve/config/experiments/ablations/horizons/steve_5h1.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_5h1.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/ablations/horizons/steve_5h2.json b/research/steve/config/experiments/ablations/horizons/steve_5h2.json deleted file mode 100644 index 48b6730b7a0f2ae9777d1f06efdb9789a94f43b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/ablations/horizons/steve_5h2.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "inherits": ["config/algos/steve.json", "config/envs/humanoid.json"], - "updates": { - "policy_config": { - "value_expansion": { - "rollout_len": 1 - } - } - } -} diff --git a/research/steve/config/experiments/goodruns/flagrun/ddpg0.json b/research/steve/config/experiments/goodruns/flagrun/ddpg0.json deleted file mode 100644 index a68ee412d11e75821f21d49664977ff0ebc539a7..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/ddpg1.json b/research/steve/config/experiments/goodruns/flagrun/ddpg1.json deleted file mode 100644 index a68ee412d11e75821f21d49664977ff0ebc539a7..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/ddpg2.json b/research/steve/config/experiments/goodruns/flagrun/ddpg2.json deleted file mode 100644 index a68ee412d11e75821f21d49664977ff0ebc539a7..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/ddpg3.json b/research/steve/config/experiments/goodruns/flagrun/ddpg3.json deleted file mode 100644 index a68ee412d11e75821f21d49664977ff0ebc539a7..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/mve_tdk0.json b/research/steve/config/experiments/goodruns/flagrun/mve_tdk0.json deleted file mode 100644 index 8da85dd375077521f25f6b4f9d0926cf4d8b66b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/mve_tdk1.json b/research/steve/config/experiments/goodruns/flagrun/mve_tdk1.json deleted file mode 100644 index 8da85dd375077521f25f6b4f9d0926cf4d8b66b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/mve_tdk2.json b/research/steve/config/experiments/goodruns/flagrun/mve_tdk2.json deleted file mode 100644 index 8da85dd375077521f25f6b4f9d0926cf4d8b66b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/mve_tdk3.json b/research/steve/config/experiments/goodruns/flagrun/mve_tdk3.json deleted file mode 100644 index 8da85dd375077521f25f6b4f9d0926cf4d8b66b0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/steve0.json b/research/steve/config/experiments/goodruns/flagrun/steve0.json deleted file mode 100644 index 21d32930224e1e0632e88646654892a2229ab90b..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/steve1.json b/research/steve/config/experiments/goodruns/flagrun/steve1.json deleted file mode 100644 index 21d32930224e1e0632e88646654892a2229ab90b..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/steve2.json b/research/steve/config/experiments/goodruns/flagrun/steve2.json deleted file mode 100644 index 21d32930224e1e0632e88646654892a2229ab90b..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/flagrun/steve3.json b/research/steve/config/experiments/goodruns/flagrun/steve3.json deleted file mode 100644 index 21d32930224e1e0632e88646654892a2229ab90b..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/flagrun/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/flagrun.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/ddpg0.json b/research/steve/config/experiments/goodruns/halfcheetah/ddpg0.json deleted file mode 100644 index fc9d9eef28dfd4d631e41a9f4aeb625c66af27d0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/ddpg1.json b/research/steve/config/experiments/goodruns/halfcheetah/ddpg1.json deleted file mode 100644 index fc9d9eef28dfd4d631e41a9f4aeb625c66af27d0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/ddpg2.json b/research/steve/config/experiments/goodruns/halfcheetah/ddpg2.json deleted file mode 100644 index fc9d9eef28dfd4d631e41a9f4aeb625c66af27d0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/ddpg3.json b/research/steve/config/experiments/goodruns/halfcheetah/ddpg3.json deleted file mode 100644 index fc9d9eef28dfd4d631e41a9f4aeb625c66af27d0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk0.json b/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk0.json deleted file mode 100644 index dcae7eb48df84194e6cc7dfb6c29a5000f2c5a8a..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk1.json b/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk1.json deleted file mode 100644 index dcae7eb48df84194e6cc7dfb6c29a5000f2c5a8a..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk2.json b/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk2.json deleted file mode 100644 index dcae7eb48df84194e6cc7dfb6c29a5000f2c5a8a..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk3.json b/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk3.json deleted file mode 100644 index dcae7eb48df84194e6cc7dfb6c29a5000f2c5a8a..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/steve0.json b/research/steve/config/experiments/goodruns/halfcheetah/steve0.json deleted file mode 100644 index f2fd36d3bbf79a94bf241a2ac394a8a3652b5ad3..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/steve1.json b/research/steve/config/experiments/goodruns/halfcheetah/steve1.json deleted file mode 100644 index f2fd36d3bbf79a94bf241a2ac394a8a3652b5ad3..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/steve2.json b/research/steve/config/experiments/goodruns/halfcheetah/steve2.json deleted file mode 100644 index f2fd36d3bbf79a94bf241a2ac394a8a3652b5ad3..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/halfcheetah/steve3.json b/research/steve/config/experiments/goodruns/halfcheetah/steve3.json deleted file mode 100644 index f2fd36d3bbf79a94bf241a2ac394a8a3652b5ad3..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/halfcheetah/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/halfcheetah.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/ddpg0.json b/research/steve/config/experiments/goodruns/hardcore/ddpg0.json deleted file mode 100644 index 3dce87b15ce8079953529c417e0c25c30a734ca2..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/ddpg1.json b/research/steve/config/experiments/goodruns/hardcore/ddpg1.json deleted file mode 100644 index 3dce87b15ce8079953529c417e0c25c30a734ca2..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/ddpg2.json b/research/steve/config/experiments/goodruns/hardcore/ddpg2.json deleted file mode 100644 index 3dce87b15ce8079953529c417e0c25c30a734ca2..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/ddpg3.json b/research/steve/config/experiments/goodruns/hardcore/ddpg3.json deleted file mode 100644 index 3dce87b15ce8079953529c417e0c25c30a734ca2..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/mve_tdk0.json b/research/steve/config/experiments/goodruns/hardcore/mve_tdk0.json deleted file mode 100644 index 095d8763ac1ee32233ef08e5643f105eff67e6cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/mve_tdk1.json b/research/steve/config/experiments/goodruns/hardcore/mve_tdk1.json deleted file mode 100644 index 095d8763ac1ee32233ef08e5643f105eff67e6cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/mve_tdk2.json b/research/steve/config/experiments/goodruns/hardcore/mve_tdk2.json deleted file mode 100644 index 095d8763ac1ee32233ef08e5643f105eff67e6cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/mve_tdk3.json b/research/steve/config/experiments/goodruns/hardcore/mve_tdk3.json deleted file mode 100644 index 095d8763ac1ee32233ef08e5643f105eff67e6cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/steve0.json b/research/steve/config/experiments/goodruns/hardcore/steve0.json deleted file mode 100644 index f094208520fe3f93065f981e557571958370b7fb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/steve1.json b/research/steve/config/experiments/goodruns/hardcore/steve1.json deleted file mode 100644 index f094208520fe3f93065f981e557571958370b7fb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/steve2.json b/research/steve/config/experiments/goodruns/hardcore/steve2.json deleted file mode 100644 index f094208520fe3f93065f981e557571958370b7fb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hardcore/steve3.json b/research/steve/config/experiments/goodruns/hardcore/steve3.json deleted file mode 100644 index f094208520fe3f93065f981e557571958370b7fb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hardcore/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hardcore.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/ddpg0.json b/research/steve/config/experiments/goodruns/hopper/ddpg0.json deleted file mode 100644 index 4916ab1161349792f0c396904d109d33d0d59d59..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/ddpg1.json b/research/steve/config/experiments/goodruns/hopper/ddpg1.json deleted file mode 100644 index 4916ab1161349792f0c396904d109d33d0d59d59..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/ddpg2.json b/research/steve/config/experiments/goodruns/hopper/ddpg2.json deleted file mode 100644 index 4916ab1161349792f0c396904d109d33d0d59d59..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/ddpg3.json b/research/steve/config/experiments/goodruns/hopper/ddpg3.json deleted file mode 100644 index 4916ab1161349792f0c396904d109d33d0d59d59..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/mve_tdk0.json b/research/steve/config/experiments/goodruns/hopper/mve_tdk0.json deleted file mode 100644 index 40663e8b9450fa0b831eaa6f6c0089545b02d4a8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/mve_tdk1.json b/research/steve/config/experiments/goodruns/hopper/mve_tdk1.json deleted file mode 100644 index 40663e8b9450fa0b831eaa6f6c0089545b02d4a8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/mve_tdk2.json b/research/steve/config/experiments/goodruns/hopper/mve_tdk2.json deleted file mode 100644 index 40663e8b9450fa0b831eaa6f6c0089545b02d4a8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/mve_tdk3.json b/research/steve/config/experiments/goodruns/hopper/mve_tdk3.json deleted file mode 100644 index 40663e8b9450fa0b831eaa6f6c0089545b02d4a8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/steve0.json b/research/steve/config/experiments/goodruns/hopper/steve0.json deleted file mode 100644 index 708ce89132733c03f36d8ca7d58e1b91cd5ad9cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/steve1.json b/research/steve/config/experiments/goodruns/hopper/steve1.json deleted file mode 100644 index 708ce89132733c03f36d8ca7d58e1b91cd5ad9cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/steve2.json b/research/steve/config/experiments/goodruns/hopper/steve2.json deleted file mode 100644 index 708ce89132733c03f36d8ca7d58e1b91cd5ad9cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/hopper/steve3.json b/research/steve/config/experiments/goodruns/hopper/steve3.json deleted file mode 100644 index 708ce89132733c03f36d8ca7d58e1b91cd5ad9cd..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/hopper/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/hopper.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/ddpg0.json b/research/steve/config/experiments/goodruns/humanoid/ddpg0.json deleted file mode 100644 index 3bd27e7d5f176340826691b17ee16849bb6ac708..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/ddpg1.json b/research/steve/config/experiments/goodruns/humanoid/ddpg1.json deleted file mode 100644 index 3bd27e7d5f176340826691b17ee16849bb6ac708..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/ddpg2.json b/research/steve/config/experiments/goodruns/humanoid/ddpg2.json deleted file mode 100644 index 3bd27e7d5f176340826691b17ee16849bb6ac708..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/ddpg3.json b/research/steve/config/experiments/goodruns/humanoid/ddpg3.json deleted file mode 100644 index 3bd27e7d5f176340826691b17ee16849bb6ac708..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/mve_tdk0.json b/research/steve/config/experiments/goodruns/humanoid/mve_tdk0.json deleted file mode 100644 index 542ed8d80cec5f3d653c512febbeba3291c579e5..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/mve_tdk1.json b/research/steve/config/experiments/goodruns/humanoid/mve_tdk1.json deleted file mode 100644 index 542ed8d80cec5f3d653c512febbeba3291c579e5..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/mve_tdk2.json b/research/steve/config/experiments/goodruns/humanoid/mve_tdk2.json deleted file mode 100644 index 542ed8d80cec5f3d653c512febbeba3291c579e5..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/mve_tdk3.json b/research/steve/config/experiments/goodruns/humanoid/mve_tdk3.json deleted file mode 100644 index 542ed8d80cec5f3d653c512febbeba3291c579e5..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/steve0.json b/research/steve/config/experiments/goodruns/humanoid/steve0.json deleted file mode 100644 index 835b3f6213490a83e016846001122d113f2b5b17..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/steve1.json b/research/steve/config/experiments/goodruns/humanoid/steve1.json deleted file mode 100644 index 835b3f6213490a83e016846001122d113f2b5b17..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/steve2.json b/research/steve/config/experiments/goodruns/humanoid/steve2.json deleted file mode 100644 index 835b3f6213490a83e016846001122d113f2b5b17..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/humanoid/steve3.json b/research/steve/config/experiments/goodruns/humanoid/steve3.json deleted file mode 100644 index 835b3f6213490a83e016846001122d113f2b5b17..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/humanoid/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/humanoid.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/ddpg0.json b/research/steve/config/experiments/goodruns/rshum/ddpg0.json deleted file mode 100644 index 9fd98d11e712231750f8dce5cf6352522e92abd8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/ddpg1.json b/research/steve/config/experiments/goodruns/rshum/ddpg1.json deleted file mode 100644 index 9fd98d11e712231750f8dce5cf6352522e92abd8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/ddpg2.json b/research/steve/config/experiments/goodruns/rshum/ddpg2.json deleted file mode 100644 index 9fd98d11e712231750f8dce5cf6352522e92abd8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/ddpg3.json b/research/steve/config/experiments/goodruns/rshum/ddpg3.json deleted file mode 100644 index 9fd98d11e712231750f8dce5cf6352522e92abd8..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/mve_tdk0.json b/research/steve/config/experiments/goodruns/rshum/mve_tdk0.json deleted file mode 100644 index ade2434ee2fae17c53aaaec2bffab7983b45c0d4..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/mve_tdk1.json b/research/steve/config/experiments/goodruns/rshum/mve_tdk1.json deleted file mode 100644 index ade2434ee2fae17c53aaaec2bffab7983b45c0d4..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/mve_tdk2.json b/research/steve/config/experiments/goodruns/rshum/mve_tdk2.json deleted file mode 100644 index ade2434ee2fae17c53aaaec2bffab7983b45c0d4..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/mve_tdk3.json b/research/steve/config/experiments/goodruns/rshum/mve_tdk3.json deleted file mode 100644 index ade2434ee2fae17c53aaaec2bffab7983b45c0d4..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/steve0.json b/research/steve/config/experiments/goodruns/rshum/steve0.json deleted file mode 100644 index 510854fbf80a03ccfeab8e1a2379ffebf8cc91ab..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/steve1.json b/research/steve/config/experiments/goodruns/rshum/steve1.json deleted file mode 100644 index 510854fbf80a03ccfeab8e1a2379ffebf8cc91ab..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/steve2.json b/research/steve/config/experiments/goodruns/rshum/steve2.json deleted file mode 100644 index 510854fbf80a03ccfeab8e1a2379ffebf8cc91ab..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/rshum/steve3.json b/research/steve/config/experiments/goodruns/rshum/steve3.json deleted file mode 100644 index 510854fbf80a03ccfeab8e1a2379ffebf8cc91ab..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/rshum/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/rshum.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/ddpg0.json b/research/steve/config/experiments/goodruns/swimmer/ddpg0.json deleted file mode 100644 index a94fc7c52ac1f31b9f12b83c5ac10f49e78d66d6..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/ddpg1.json b/research/steve/config/experiments/goodruns/swimmer/ddpg1.json deleted file mode 100644 index a94fc7c52ac1f31b9f12b83c5ac10f49e78d66d6..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/ddpg2.json b/research/steve/config/experiments/goodruns/swimmer/ddpg2.json deleted file mode 100644 index a94fc7c52ac1f31b9f12b83c5ac10f49e78d66d6..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/ddpg3.json b/research/steve/config/experiments/goodruns/swimmer/ddpg3.json deleted file mode 100644 index a94fc7c52ac1f31b9f12b83c5ac10f49e78d66d6..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/mve_tdk0.json b/research/steve/config/experiments/goodruns/swimmer/mve_tdk0.json deleted file mode 100644 index 14210117887df275a89990d6844c3d9fc4c64c24..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/mve_tdk1.json b/research/steve/config/experiments/goodruns/swimmer/mve_tdk1.json deleted file mode 100644 index 14210117887df275a89990d6844c3d9fc4c64c24..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/mve_tdk2.json b/research/steve/config/experiments/goodruns/swimmer/mve_tdk2.json deleted file mode 100644 index 14210117887df275a89990d6844c3d9fc4c64c24..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/mve_tdk3.json b/research/steve/config/experiments/goodruns/swimmer/mve_tdk3.json deleted file mode 100644 index 14210117887df275a89990d6844c3d9fc4c64c24..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/steve0.json b/research/steve/config/experiments/goodruns/swimmer/steve0.json deleted file mode 100644 index d33583283089eb38f038c5669ba330d3b0720fbb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/steve1.json b/research/steve/config/experiments/goodruns/swimmer/steve1.json deleted file mode 100644 index d33583283089eb38f038c5669ba330d3b0720fbb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/steve2.json b/research/steve/config/experiments/goodruns/swimmer/steve2.json deleted file mode 100644 index d33583283089eb38f038c5669ba330d3b0720fbb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/swimmer/steve3.json b/research/steve/config/experiments/goodruns/swimmer/steve3.json deleted file mode 100644 index d33583283089eb38f038c5669ba330d3b0720fbb..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/swimmer/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/swimmer.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/ddpg0.json b/research/steve/config/experiments/goodruns/walker2d/ddpg0.json deleted file mode 100644 index 81fe2ff5643d47dded60431bbbf39541cdfb3f30..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/ddpg1.json b/research/steve/config/experiments/goodruns/walker2d/ddpg1.json deleted file mode 100644 index 81fe2ff5643d47dded60431bbbf39541cdfb3f30..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/ddpg2.json b/research/steve/config/experiments/goodruns/walker2d/ddpg2.json deleted file mode 100644 index 81fe2ff5643d47dded60431bbbf39541cdfb3f30..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/ddpg2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/ddpg3.json b/research/steve/config/experiments/goodruns/walker2d/ddpg3.json deleted file mode 100644 index 81fe2ff5643d47dded60431bbbf39541cdfb3f30..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/ddpg3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/mve_tdk0.json b/research/steve/config/experiments/goodruns/walker2d/mve_tdk0.json deleted file mode 100644 index d8420effaf5059eb359bcbd5edc17a50824d4d94..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/mve_tdk1.json b/research/steve/config/experiments/goodruns/walker2d/mve_tdk1.json deleted file mode 100644 index d8420effaf5059eb359bcbd5edc17a50824d4d94..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/mve_tdk2.json b/research/steve/config/experiments/goodruns/walker2d/mve_tdk2.json deleted file mode 100644 index d8420effaf5059eb359bcbd5edc17a50824d4d94..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/mve_tdk2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/mve_tdk3.json b/research/steve/config/experiments/goodruns/walker2d/mve_tdk3.json deleted file mode 100644 index d8420effaf5059eb359bcbd5edc17a50824d4d94..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/mve_tdk3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/steve0.json b/research/steve/config/experiments/goodruns/walker2d/steve0.json deleted file mode 100644 index a98c410cad3c219541663a6e76072f4e23888135..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/steve1.json b/research/steve/config/experiments/goodruns/walker2d/steve1.json deleted file mode 100644 index a98c410cad3c219541663a6e76072f4e23888135..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/steve2.json b/research/steve/config/experiments/goodruns/walker2d/steve2.json deleted file mode 100644 index a98c410cad3c219541663a6e76072f4e23888135..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/steve2.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/goodruns/walker2d/steve3.json b/research/steve/config/experiments/goodruns/walker2d/steve3.json deleted file mode 100644 index a98c410cad3c219541663a6e76072f4e23888135..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/goodruns/walker2d/steve3.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/walker2d.json"]} diff --git a/research/steve/config/experiments/speedruns/flagrun/speedy_ddpg0.json b/research/steve/config/experiments/speedruns/flagrun/speedy_ddpg0.json deleted file mode 100644 index b7280d71ef0a78f8e70dfe9c76575628853c9ff5..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/flagrun/speedy_ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/flagrun.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/flagrun/speedy_ddpg1.json b/research/steve/config/experiments/speedruns/flagrun/speedy_ddpg1.json deleted file mode 100644 index b7280d71ef0a78f8e70dfe9c76575628853c9ff5..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/flagrun/speedy_ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/flagrun.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/flagrun/speedy_mve_tdk0.json b/research/steve/config/experiments/speedruns/flagrun/speedy_mve_tdk0.json deleted file mode 100644 index 73252566b5e8a6515f008e1ddbe5c939b2ee000b..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/flagrun/speedy_mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/flagrun.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/flagrun/speedy_mve_tdk1.json b/research/steve/config/experiments/speedruns/flagrun/speedy_mve_tdk1.json deleted file mode 100644 index 73252566b5e8a6515f008e1ddbe5c939b2ee000b..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/flagrun/speedy_mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/flagrun.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/flagrun/speedy_steve0.json b/research/steve/config/experiments/speedruns/flagrun/speedy_steve0.json deleted file mode 100644 index ba5708f1ffb59a20751474d999f8a8798e89e751..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/flagrun/speedy_steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/flagrun.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/flagrun/speedy_steve1.json b/research/steve/config/experiments/speedruns/flagrun/speedy_steve1.json deleted file mode 100644 index ba5708f1ffb59a20751474d999f8a8798e89e751..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/flagrun/speedy_steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/flagrun.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/humanoid/speedy_ddpg0.json b/research/steve/config/experiments/speedruns/humanoid/speedy_ddpg0.json deleted file mode 100644 index eb07f31dc90b40ae7d66046627b29b153d3f2de9..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/humanoid/speedy_ddpg0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/humanoid.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/humanoid/speedy_ddpg1.json b/research/steve/config/experiments/speedruns/humanoid/speedy_ddpg1.json deleted file mode 100644 index eb07f31dc90b40ae7d66046627b29b153d3f2de9..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/humanoid/speedy_ddpg1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/ddpg.json", "config/envs/humanoid.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/humanoid/speedy_mve_tdk0.json b/research/steve/config/experiments/speedruns/humanoid/speedy_mve_tdk0.json deleted file mode 100644 index 51a3bdcb5a3a18903d9671e48eee3b2c10922c95..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/humanoid/speedy_mve_tdk0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/humanoid.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/humanoid/speedy_mve_tdk1.json b/research/steve/config/experiments/speedruns/humanoid/speedy_mve_tdk1.json deleted file mode 100644 index 51a3bdcb5a3a18903d9671e48eee3b2c10922c95..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/humanoid/speedy_mve_tdk1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/mve_tdk.json", "config/envs/humanoid.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/humanoid/speedy_steve0.json b/research/steve/config/experiments/speedruns/humanoid/speedy_steve0.json deleted file mode 100644 index 0d2bfaa4e3dd03a0ab88d28b12c97964d6c409d0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/humanoid/speedy_steve0.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/humanoid.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/config/experiments/speedruns/humanoid/speedy_steve1.json b/research/steve/config/experiments/speedruns/humanoid/speedy_steve1.json deleted file mode 100644 index 0d2bfaa4e3dd03a0ab88d28b12c97964d6c409d0..0000000000000000000000000000000000000000 --- a/research/steve/config/experiments/speedruns/humanoid/speedy_steve1.json +++ /dev/null @@ -1 +0,0 @@ -{"inherits": ["config/algos/steve.json", "config/envs/humanoid.json", "config/experimental_setups/speedrun.json"]} diff --git a/research/steve/envwrap.py b/research/steve/envwrap.py deleted file mode 100644 index bd88c3035c0afd22d5074460ee51fa346f37a798..0000000000000000000000000000000000000000 --- a/research/steve/envwrap.py +++ /dev/null @@ -1,106 +0,0 @@ -from builtins import object -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -try: - import roboschool -except: - pass -import gym -import numpy as np - -from config import config - -MAX_FRAMES = config["env"]["max_frames"] - -gym.logger.level=40 - -def get_env(env_name, *args, **kwargs): - MAPPING = { - "CartPole-v0": CartPoleWrapper, - } - if env_name in MAPPING: return MAPPING[env_name](env_name, *args, **kwargs) - else: return NoTimeLimitMujocoWrapper(env_name, *args, **kwargs) - -class GymWrapper(object): - """ - Generic wrapper for OpenAI gym environments. - """ - def __init__(self, env_name): - self.internal_env = gym.make(env_name) - self.observation_space = self.internal_env.observation_space - self.action_space = self.internal_env.action_space - self.custom_init() - - def custom_init(self): - pass - - def reset(self): - self.clock = 0 - return self.preprocess_obs(self.internal_env.reset()) - - # returns normalized actions - def sample(self): - return self.action_space.sample() - - # this is used for converting continuous approximations back to the original domain - def normalize_actions(self, actions): - return actions - - # puts actions into a form where they can be predicted. by default, called after sample() - def unnormalize_actions(self, actions): - return actions - - def preprocess_obs(self, obs): - # return np.append(obs, [self.clock/float(MAX_FRAMES)]) - return obs - - def step(self, normalized_action): - out = self.internal_env.step(normalized_action) - self.clock += 1 - obs, reward, done = self.preprocess_obs(out[0]), out[1], float(out[2]) - reset = done == 1. or self.clock == MAX_FRAMES - return obs, reward, done, reset - - def render_rollout(self, states): - ## states is numpy array of size [timesteps, state] - self.internal_env.reset() - for state in states: - self.internal_env.env.state = state - self.internal_env.render() - -class CartPoleWrapper(GymWrapper): - """ - Wrap CartPole. - """ - def sample(self): - return np.array([np.random.uniform(0., 1.)]) - - def normalize_actions(self, action): - return 1 if action[0] >= 0 else 0 - - def unnormalize_actions(self, action): - return 2. * action - 1. - -class NoTimeLimitMujocoWrapper(GymWrapper): - """ - Wrap Mujoco-style environments, removing the termination condition after time. - This is needed to keep it Markovian. - """ - def __init__(self, env_name): - self.internal_env = gym.make(env_name).env - self.observation_space = self.internal_env.observation_space - self.action_space = self.internal_env.action_space - self.custom_init() diff --git a/research/steve/learner.py b/research/steve/learner.py deleted file mode 100644 index 8a4c074cd0b85f84ea19d52bf8b43331aab9c5c5..0000000000000000000000000000000000000000 --- a/research/steve/learner.py +++ /dev/null @@ -1,272 +0,0 @@ -from __future__ import division -from __future__ import print_function -from builtins import zip -from builtins import range -from builtins import object -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import traceback, threading, time, warnings -import tensorflow as tf -import numpy as np - -import util -from replay import ReplayBuffer - -class Learner(object): - """ - Generic object which runs the main training loop of anything that trains using - a replay buffer. Handles updating, logging, saving/loading, batching, etc. - """ - def __init__(self, interactor_queue, lock, config, env_config, learner_config, **bonus_kwargs): - self.learner_name = self.learner_name() - self.interactor_queue = interactor_queue - self.learner_lock = lock - self.config = config - self.env_config = env_config - self.learner_config = learner_config - self.bonus_kwargs = bonus_kwargs - self.kill_threads = False - self.permit_desync = False - self.need_frames_notification = threading.Condition() - self._reset_inspections() - self.total_frames = 0 - - self.save_path = util.create_directory("%s/%s/%s/%s" % (self.config["output_root"], self.config["env"]["name"], self.config["name"], self.config["save_model_path"])) - self.log_path = util.create_directory("%s/%s/%s/%s" % (self.config["output_root"], self.config["env"]["name"], self.config["name"], self.config["log_path"])) + "/%s.log" % self.learner_name - - # replay buffer to store data - self.replay_buffer_lock = threading.RLock() - self.replay_buffer = ReplayBuffer(self.learner_config["replay_size"], - np.prod(self.env_config["obs_dims"]), - self.env_config["action_dim"]) - - # data loaders pull data from the replay buffer and put it into the tfqueue for model usage - self.data_loaders = self.make_loader_placeholders() - queue_capacity = np.ceil(1./self.learner_config["frames_per_update"]) if self.learner_config["frames_per_update"] else 100 - self.tf_queue = tf.FIFOQueue(capacity=queue_capacity, dtypes=[dl.dtype for dl in self.data_loaders]) - self.enqueue_op = self.tf_queue.enqueue(self.data_loaders) - self.current_batch = self.tf_queue.dequeue() - - # build the TF graph for the actual model to train - self.core, self.train_losses, self.train_ops, self.inspect_losses = self.make_core_model() - self.sess = tf.Session() - self.sess.run(tf.global_variables_initializer()) - - ## Mandatory functions to override - def learner_name(self): raise Exception('unimplemented: learner_name') - def make_loader_placeholders(self): raise Exception('unimplemented: make_loader_placeholders') - def make_core_model(self): raise Exception('unimplemented: make_core_model') - - ## Optional functions to override - def initialize(self): warnings.warn('unimplemented: initialize') - def resume_from_checkpoint(self, epoch): warnings.warn('unimplemented: resume_from_checkpoint') - def checkpoint(self): warnings.warn('unimplemented: checkpoint') - def backup(self): warnings.warn('unimplemented: backup') - - ## Internal functions - def _start(self): - # fetch data from the interactors to pre-fill the replay buffer - self.prefetch_thread = threading.Thread(target=self._poll_interactors, args=(True, self.learner_config["frames_before_learning"],)) - self.prefetch_thread.start() - self.prefetch_thread.join() - - # start the interactor and data loader - self.data_load_thread = threading.Thread(target=self._run_enqueue_data) - self.data_load_thread.start() - - # initialize the learner, pretraining if needed - if self.config["resume"]: self._resume_from_checkpoint() - else: self._initialize() - - # re-sync everything, and start up interactions with the environment - self.interactor_poll_thread = threading.Thread(target=self._poll_interactors) - self.interactor_poll_thread.start() - - # start the clock - self._last_checkpoint_time = time.time() - - def _learn(self, permit_desync=False, log=True, checkpoint=True, backup=True): - # this is to keep the frames/update synced properly - if self.learner_config["frames_per_update"] is not False and not permit_desync: - if not self._have_enough_frames(): - with self.need_frames_notification: - self.need_frames_notification.notify() - return - - # log - if log and (self.update_i + 1) % self.learner_config["log_every_n"] == 0: - self._log() - - # checkpoint - if checkpoint and (self.update_i + 1) % self.learner_config["epoch_every_n"] == 0: - self._checkpoint() - - # backup - if backup and (self.update_i + 1) % self.learner_config["backup_every_n"] == 0: - self._backup() - - # train - self._training_step() - - def _have_enough_frames(self): - gathered_frames = self.total_frames - self.learner_config["frames_before_learning"] - return gathered_frames > self.learner_config["frames_per_update"] * self.update_i - - def _initialize(self): - self.epoch = 0 - self.update_i = 0 - self.hours = 0 - self._last_checkpoint_time = time.time() - - self.initialize() - - if self.learner_config["pretrain_n"]: self._pretrain() - self._checkpoint() - - def _pretrain(self): - for _ in range(self.learner_config["pretrain_n"]): - self._learn(permit_desync=True, checkpoint=False, backup=False) - self.epoch = 0 - self.update_i = 0 - - def _resume_from_checkpoint(self): - epoch = util.get_largest_epoch_in_dir(self.save_path, self.core.saveid) - if not self.config['keep_all_replay_buffers']: util.wipe_all_but_largest_epoch_in_dir(self.save_path, self.core.saveid) - if epoch is False: - raise Exception("Tried to reload but no model found") - with self.learner_lock: - self.core.load(self.sess, self.save_path, epoch) - self.epoch, self.update_i, self.total_frames, self.hours = self.sess.run([self.core.epoch_n, self.core.update_n, self.core.frame_n, self.core.hours]) - with self.replay_buffer_lock: - self.replay_buffer.load(self.save_path, '%09d_%s' % (epoch, self.learner_name)) - self.resume_from_checkpoint(epoch) - - def _log(self): - if self.denom > 0: - logstring = "(%3.2f sec) h%-8.2f e%-8d s%-8d f%-8d\t" % (time.time() - self._log_time, self.hours, self.epoch, self.update_i + 1, self.total_frames) + ', '.join(["%8f" % x for x in (self.running_total / self.denom).tolist()]) - print("%s\t%s" % (self.learner_name, logstring)) - with open(self.log_path, "a") as f: f.write(logstring + "\n") - self._reset_inspections() - - def _reset_inspections(self): - self.running_total = 0. - self.denom = 0. - self._log_time = time.time() - - def _checkpoint(self): - self.checkpoint() - self.epoch += 1 - self.hours += (time.time() - self._last_checkpoint_time) / 3600. - self._last_checkpoint_time = time.time() - self.core.update_epoch(self.sess, self.epoch, self.update_i, self.total_frames, self.hours) - with self.learner_lock: self.core.save(self.sess, self.save_path) - - def _backup(self): - self.backup() - if not self.learner_config['keep_all_replay_buffers']: util.wipe_all_but_largest_epoch_in_dir(self.save_path, self.core.saveid) - with self.learner_lock: - self.core.save(self.sess, self.save_path, self.epoch) - with self.replay_buffer_lock: - self.replay_buffer.save(self.save_path, '%09d_%s' % (self.epoch, self.learner_name)) - - def _training_step(self): - train_ops = tuple([op for op, loss in zip(self.train_ops, - self.train_losses) - if loss is not None]) - outs = self.sess.run(train_ops + self.inspect_losses) - self.running_total += np.array(outs[len(train_ops):]) - self.denom += 1. - self.update_i += 1 - - def _poll_interactors(self, continuous_poll=False, frames_before_terminate=None): - # poll the interactors for new frames. - # the synced_condition semaphore prevents this from consuming too much CPU - while not self.kill_threads: - if self.learner_config["frames_per_update"] is not False and not continuous_poll: - with self.need_frames_notification: self.need_frames_notification.wait() - while not self.interactor_queue.empty(): - new_frames = self.interactor_queue.get() - self._add_frames(new_frames) - if frames_before_terminate and self.total_frames >= frames_before_terminate: return - - def _add_frames(self, frames): - with self.replay_buffer_lock: - for frame in frames: - self.replay_buffer.add_replay(*frame) - self.total_frames = self.replay_buffer.count - return self.total_frames - - def _run_enqueue_data(self): - while not self.kill_threads: - data = self.replay_buffer.random_batch(self.learner_config["batch_size"]) - self.sess.run(self.enqueue_op, feed_dict=dict(list(zip(self.data_loaders, data)))) - - def _kill_threads(self): - self.kill_threads = True - - -class CoreModel(object): - """The base class for the "core" of learners.""" - def __init__(self, name, env_config, learner_config): - self.name = self.saveid + "/" + name - self.env_config = env_config - self.learner_config = learner_config - - with tf.variable_scope(self.name): - self.epoch_n = tf.get_variable('epoch_n', [], initializer=tf.constant_initializer(0), dtype=tf.int64, trainable=False) - self.update_n = tf.get_variable('update_n', [], initializer=tf.constant_initializer(0), dtype=tf.int64, trainable=False) - self.frame_n = tf.get_variable('frame_n', [], initializer=tf.constant_initializer(0), dtype=tf.int64, trainable=False) - self.hours = tf.get_variable('hours', [], initializer=tf.constant_initializer(0.), dtype=tf.float64, trainable=False) - self.epoch_n_placeholder = tf.placeholder(tf.int64, []) - self.update_n_placeholder = tf.placeholder(tf.int64, []) - self.frame_n_placeholder = tf.placeholder(tf.int64, []) - self.hours_placeholder = tf.placeholder(tf.float64, []) - self.assign_epoch_op = [tf.assign(self.epoch_n, self.epoch_n_placeholder), tf.assign(self.update_n, self.update_n_placeholder), tf.assign(self.frame_n, self.frame_n_placeholder), tf.assign(self.hours, self.hours_placeholder)] - - self.create_params(env_config, learner_config) - self.model_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) - self.saver = tf.train.Saver(self.model_params) - - @property - def saveid(self): - raise Exception("specify a save ID") - - def create_params(self, env_config, learner_config): - raise Exception("unimplemented") - - def update_epoch(self, sess, epoch, updates, frames, hours): - sess.run(self.assign_epoch_op, feed_dict={self.epoch_n_placeholder: int(epoch), self.update_n_placeholder: int(updates), self.frame_n_placeholder: int(frames), self.hours_placeholder: float(hours)}) - - def save(self, sess, path, epoch=None): - if epoch is None: self.saver.save(sess, path + "/%s.params" % self.saveid) - else: self.saver.save(sess, path + "/%09d_%s.params" % (epoch, self.saveid)) - - def load(self, sess, path, epoch=None): - if epoch is None: self.saver.restore(sess, path + "/%s.params" % self.saveid) - else: self.saver.restore(sess, path + "/%09d_%s.params" % (epoch, self.saveid)) - -def run_learner(learner_subclass, queue, lock, config, env_config, learner_config, **bonus_kwargs): - learner = learner_subclass(queue, lock, config, env_config, learner_config, **bonus_kwargs) - try: - learner._start() - while True: learner._learn() - - except Exception as e: - print('Caught exception in learner process') - traceback.print_exc() - learner._kill_threads() - print() - raise e diff --git a/research/steve/master.py b/research/steve/master.py deleted file mode 100644 index 4d08474724dff64980f16257b9fe7a13cceb2cc1..0000000000000000000000000000000000000000 --- a/research/steve/master.py +++ /dev/null @@ -1,85 +0,0 @@ -from builtins import str -from builtins import range -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import multiprocessing -import os, sys, time - -from config import config, log_config -import util - -AGENT_COUNT = config["agent_config"]["count"] -EVALUATOR_COUNT = config["evaluator_config"]["count"] -MODEL_AUGMENTED = config["model_config"] is not False -if config["resume"]: - ROOT_PATH = "output/" + config["env"]["name"] + "/" + config["name"] -else: - ROOT_PATH = util.create_and_wipe_directory("output/" + config["env"]["name"] + "/" + config["name"]) -log_config() -import learner, agent, valuerl_learner -if MODEL_AUGMENTED: import worldmodel_learner - -if __name__ == '__main__': - all_procs = set([]) - interaction_procs = set([]) - - # lock - policy_lock = multiprocessing.Lock() - model_lock = multiprocessing.Lock() if MODEL_AUGMENTED else None - - # queue - policy_replay_frame_queue = multiprocessing.Queue(1) - model_replay_frame_queue = multiprocessing.Queue(1) if MODEL_AUGMENTED else None - - # interactors - for interact_proc_i in range(AGENT_COUNT): - interact_proc = multiprocessing.Process(target=agent.main, args=(interact_proc_i, False, policy_replay_frame_queue, model_replay_frame_queue, policy_lock, config)) - all_procs.add(interact_proc) - interaction_procs.add(interact_proc) - - # evaluators - for interact_proc_i in range(EVALUATOR_COUNT): - interact_proc = multiprocessing.Process(target=agent.main, args=(interact_proc_i, True, policy_replay_frame_queue, model_replay_frame_queue, policy_lock, config)) - all_procs.add(interact_proc) - interaction_procs.add(interact_proc) - - # policy training - train_policy_proc = multiprocessing.Process(target=learner.run_learner, args=(valuerl_learner.ValueRLLearner, policy_replay_frame_queue, policy_lock, config, config["env"], config["policy_config"]), kwargs={"model_lock": model_lock}) - all_procs.add(train_policy_proc) - - # model training - if MODEL_AUGMENTED: - train_model_proc = multiprocessing.Process(target=learner.run_learner, args=(worldmodel_learner.WorldmodelLearner, model_replay_frame_queue, model_lock, config, config["env"], config["model_config"])) - all_procs.add(train_model_proc) - - # start all policies - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - for i, proc in enumerate(interaction_procs): - os.environ['CUDA_VISIBLE_DEVICES'] = '' - proc.start() - - os.environ['CUDA_VISIBLE_DEVICES'] = str(int(sys.argv[2])) - train_policy_proc.start() - - if MODEL_AUGMENTED: - os.environ['CUDA_VISIBLE_DEVICES'] = str(1+int(sys.argv[2])) - train_model_proc.start() - - while True: - try: - pass - except: - for proc in all_procs: proc.join() diff --git a/research/steve/nn.py b/research/steve/nn.py deleted file mode 100644 index c87c6eb834506d0cbb462f92d53bcf3af30a4567..0000000000000000000000000000000000000000 --- a/research/steve/nn.py +++ /dev/null @@ -1,189 +0,0 @@ -from builtins import range -from builtins import object -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf -import numpy as np -from itertools import product - -class FeedForwardNet(object): - """Custom feed-forward network layer.""" - def __init__(self, name, in_size, out_shape, layers=1, hidden_dim=32, final_nonlinearity=None, get_uncertainty=False): - self.name = name - self.in_size = in_size - self.out_shape = out_shape - self.out_size = np.prod(out_shape) - self.layers = layers - self.hidden_dim = hidden_dim - self.final_nonlinearity = (lambda x:x) if final_nonlinearity is None else final_nonlinearity - self.get_uncertainty = get_uncertainty - - self.weights = [None] * layers - self.biases = [None] * layers - - self.params_list = [] - - with tf.variable_scope(name): - for layer_i in range(self.layers): - in_size = self.hidden_dim - out_size = self.hidden_dim - if layer_i == 0: in_size = self.in_size - if layer_i == self.layers - 1: out_size = self.out_size - self.weights[layer_i] = tf.get_variable("weights%d" % layer_i, [in_size, out_size], initializer=tf.contrib.layers.xavier_initializer()) - self.biases[layer_i] = tf.get_variable("bias%d" % layer_i, [1, out_size], initializer=tf.constant_initializer(0.0)) - self.params_list += [self.weights[layer_i], self.biases[layer_i]] - - def __call__(self, x, stop_params_gradient=False, is_eval=True, ensemble_idxs=None, pre_expanded=None, reduce_mode="none"): - original_shape = tf.shape(x) - h = tf.reshape(x, [-1, self.in_size]) - for layer_i in range(self.layers): - nonlinearity = tf.nn.relu if layer_i + 1 < self.layers else self.final_nonlinearity - if stop_params_gradient: h = nonlinearity(tf.matmul(h, tf.stop_gradient(self.weights[layer_i])) + tf.stop_gradient(self.biases[layer_i])) - else: h = nonlinearity(tf.matmul(h, self.weights[layer_i]) + self.biases[layer_i]) - if len(self.out_shape) > 0: h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant(self.out_shape)], -1)) - else: h = tf.reshape(h, original_shape[:-1]) - if pre_expanded is None: pre_expanded = ensemble_idxs is not None - if reduce_mode == "none" and not pre_expanded and self.get_uncertainty: - if len(self.out_shape) > 0: h = tf.expand_dims(h, -2) - else: h = tf.expand_dims(h, -1) - return h - - def l2_loss(self): - return tf.add_n([tf.reduce_sum(.5 * tf.square(mu)) for mu in self.params_list]) - -class BayesianDropoutFeedForwardNet(FeedForwardNet): - """Custom feed-forward network layer, with dropout as a Bayesian approximation.""" - def __init__(self, name, in_size, out_shape, layers=1, hidden_dim=32, final_nonlinearity=None, get_uncertainty=False, keep_prob=.5, eval_sample_count=2, consistent_random_seed=False): - super(BayesianDropoutFeedForwardNet, self).__init__(name, in_size, out_shape, layers=layers, hidden_dim=hidden_dim, - final_nonlinearity=final_nonlinearity, get_uncertainty=get_uncertainty) - self.keep_prob = keep_prob - self.eval_sample_count = eval_sample_count - if eval_sample_count < 2: raise Exception("eval_sample_count must be at least 2 to estimate uncertainty") - self.dropout_seed = tf.random_uniform([layers], maxval=1e18, dtype=tf.int64) if consistent_random_seed else [None] * layers - - def __call__(self, x, stop_params_gradient=False, is_eval=True, pre_expanded=False, ensemble_idxs=None, reduce_mode="none"): - if is_eval: - x = tf.tile(tf.expand_dims(x,0), tf.concat([tf.constant([self.eval_sample_count]), tf.ones_like(tf.shape(x))], 0)) - original_shape = tf.shape(x) - h = tf.reshape(x, [-1, self.in_size]) - for layer_i in range(self.layers): - nonlinearity = tf.nn.relu if layer_i + 1 < self.layers else self.final_nonlinearity - if layer_i > 0: h = tf.nn.dropout(h, keep_prob=self.keep_prob, seed=self.dropout_seed[layer_i]) - if stop_params_gradient: h = nonlinearity(tf.matmul(h, tf.stop_gradient(self.weights[layer_i])) + tf.stop_gradient(self.biases[layer_i])) - else: h = nonlinearity(tf.matmul(h, self.weights[layer_i]) + self.biases[layer_i]) - if len(self.out_shape) > 0: h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant(self.out_shape)], -1)) - else: h = tf.reshape(h, original_shape[:-1]) - if is_eval: - h, uncertainty = tf.nn.moments(h, 0) - if self.get_uncertainty: return h, uncertainty - else: return h - else: - return h - - -class EnsembleFeedForwardNet(FeedForwardNet): - """Custom feed-forward network layer with an ensemble.""" - def __init__(self, name, in_size, out_shape, layers=1, hidden_dim=32, final_nonlinearity=None, get_uncertainty=False, ensemble_size=2, train_sample_count=2, eval_sample_count=2): - if train_sample_count > ensemble_size: raise Exception("train_sample_count cannot be larger than ensemble size") - if eval_sample_count > ensemble_size: raise Exception("eval_sample_count cannot be larger than ensemble size") - self.name = name - self.in_size = in_size - self.out_shape = out_shape - self.out_size = np.prod(out_shape) - self.layers = layers - self.hidden_dim = hidden_dim - self.final_nonlinearity = (lambda x:x) if final_nonlinearity is None else final_nonlinearity - self.get_uncertainty = get_uncertainty - self.ensemble_size = ensemble_size - self.train_sample_count = train_sample_count - self.eval_sample_count = eval_sample_count - - self.weights = [None] * layers - self.biases = [None] * layers - - self.params_list = [] - - with tf.variable_scope(name): - for layer_i in range(self.layers): - in_size = self.hidden_dim - out_size = self.hidden_dim - if layer_i == 0: in_size = self.in_size - if layer_i == self.layers - 1: out_size = self.out_size - self.weights[layer_i] = tf.get_variable("weights%d" % layer_i, [ensemble_size, in_size, out_size], initializer=tf.contrib.layers.xavier_initializer()) - self.biases[layer_i] = tf.get_variable("bias%d" % layer_i, [ensemble_size, out_size], initializer=tf.constant_initializer(0.0)) - self.params_list += [self.weights[layer_i], self.biases[layer_i]] - - def __call__(self, x, stop_params_gradient=False, is_eval=True, ensemble_idxs=None, pre_expanded=None, reduce_mode="none"): - if pre_expanded is None: pre_expanded = ensemble_idxs is not None - if ensemble_idxs is None: - ensemble_idxs = tf.random_shuffle(tf.range(self.ensemble_size)) - ensemble_sample_n = self.eval_sample_count if is_eval else self.train_sample_count - ensemble_idxs = ensemble_idxs[:ensemble_sample_n] - else: - ensemble_sample_n = tf.shape(ensemble_idxs)[0] - - weights = [tf.gather(w, ensemble_idxs, axis=0) for w in self.weights] - biases = [tf.expand_dims(tf.gather(b, ensemble_idxs, axis=0),0) for b in self.biases] - - original_shape = tf.shape(x) - if pre_expanded: h = tf.reshape(x, [-1, ensemble_sample_n, self.in_size]) - else: h = tf.tile(tf.reshape(x, [-1, 1, self.in_size]), [1, ensemble_sample_n, 1]) - for layer_i in range(self.layers): - nonlinearity = tf.nn.relu if layer_i + 1 < self.layers else self.final_nonlinearity - if stop_params_gradient: h = nonlinearity(tf.einsum('bri,rij->brj', h, tf.stop_gradient(weights[layer_i])) + tf.stop_gradient(biases[layer_i])) - else: h = nonlinearity(tf.einsum('bri,rij->brj', h, weights[layer_i]) + biases[layer_i]) - - if pre_expanded: - if len(self.out_shape) > 0: h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant(self.out_shape)], -1)) - else: h = tf.reshape(h, original_shape[:-1]) - else: - if len(self.out_shape) > 0: h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant([ensemble_sample_n]), tf.constant(self.out_shape)], -1)) - else: h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant([ensemble_sample_n])], -1)) - - if reduce_mode == "none": - pass - elif reduce_mode == "random": - if len(self.out_shape) > 0: h = tf.reduce_sum(h * tf.reshape(tf.one_hot(tf.random_uniform([tf.shape(h)[0]], 0, ensemble_sample_n, dtype=tf.int64), ensemble_sample_n), tf.concat([tf.shape(h)[:1], tf.ones_like(tf.shape(h)[1:-2]), tf.constant([ensemble_sample_n]), tf.constant([1])], 0)), -2) - else: h = tf.reduce_sum(h * tf.reshape(tf.one_hot(tf.random_uniform([tf.shape(h)[0]], 0, ensemble_sample_n, dtype=tf.int64), ensemble_sample_n), tf.concat([tf.shape(h)[:1], tf.ones_like(tf.shape(h)[1:-1]), tf.constant([ensemble_sample_n])], 0)), -1) - elif reduce_mode == "mean": - if len(self.out_shape) > 0: h = tf.reduce_mean(h, -2) - else: h = tf.reduce_mean(h, -1) - else: raise Exception("use a valid reduce mode: none, random, or mean") - - return h - - -class ReparamNormal(object): - """Wrapper to make a feedforward network that outputs both mu and logsigma, - for use in the reparameterization trick.""" - def __init__(self, base_net, name, in_size, out_shape, layers=2, hidden_dim=32, final_nonlinearity=None, ls_start_bias=0.0, final_net=FeedForwardNet, logsigma_min=-5., logsigma_max=2., **kwargs): - assert layers > 1 - self.main_encoder = base_net(name+"_base", in_size, [hidden_dim], layers, hidden_dim, final_nonlinearity=tf.nn.relu, **kwargs) - self.mu = final_net(name+"_mu", hidden_dim, out_shape, layers=1, final_nonlinearity=final_nonlinearity, **kwargs) - self.logsigma = final_net(name+"_logsigma", hidden_dim, out_shape, layers=1, final_nonlinearity=None, **kwargs) - self.ls_start_bias = ls_start_bias - self.params_list = self.main_encoder.params_list + self.mu.params_list + self.logsigma.params_list - self.logsigma_min = logsigma_min - self.logsigma_max = logsigma_max - - def __call__(self, x): - encoded = self.main_encoder(x) - mu = self.mu(encoded) - logsigma = tf.clip_by_value(self.logsigma(encoded) + self.ls_start_bias, self.logsigma_min, self.logsigma_max) - return mu, logsigma - - def l2_loss(self): - return self.main_encoder.l2_loss() + self.mu.l2_loss() + self.logsigma.l2_loss() diff --git a/research/steve/replay.py b/research/steve/replay.py deleted file mode 100644 index 989cc0b2a518398718f36fc6dd8e33cc35083a76..0000000000000000000000000000000000000000 --- a/research/steve/replay.py +++ /dev/null @@ -1,109 +0,0 @@ -from __future__ import print_function -from future import standard_library -standard_library.install_aliases() -from builtins import zip -from builtins import str -from builtins import object -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import pickle -import multiprocessing - -class ReplayBuffer(object): - """ - Stores frames sampled from the environment, with the ability to sample a batch - for training. - """ - - def __init__(self, max_size, obs_dim, action_dim, roundrobin=True): - self.max_size = max_size - self.obs_dim = obs_dim - self.action_dim = action_dim - self.roundrobin = roundrobin - - self.obs_buffer = np.zeros([max_size, obs_dim]) - self.next_obs_buffer = np.zeros([max_size, obs_dim]) - self.action_buffer = np.zeros([max_size, action_dim]) - self.reward_buffer = np.zeros([max_size]) - self.done_buffer = np.zeros([max_size]) - - self.count = 0 - - def random_batch(self, batch_size): - indices = np.random.randint(0, min(self.count, self.max_size), batch_size) - - return ( - self.obs_buffer[indices], - self.next_obs_buffer[indices], - self.action_buffer[indices], - self.reward_buffer[indices], - self.done_buffer[indices], - self.count - ) - - def add_replay(self, obs, next_obs, action, reward, done): - if self.count >= self.max_size: - if self.roundrobin: index = self.count % self.max_size - else: index = np.random.randint(0, self.max_size) - else: - index = self.count - - self.obs_buffer[index] = obs - self.next_obs_buffer[index] = next_obs - self.action_buffer[index] = action - self.reward_buffer[index] = reward - self.done_buffer[index] = done - - self.count += 1 - - def save(self, path, name): - def _save(datas, fnames): - print("saving replay buffer...") - for data, fname in zip(datas, fnames): - with open("%s.npz"%fname, "wb") as f: - pickle.dump(data, f) - with open("%s/%s.count" % (path,name), "wb") as f: - f.write(str(self.count)) - print("...done saving.") - - datas = [ - self.obs_buffer, - self.next_obs_buffer, - self.action_buffer, - self.reward_buffer, - self.done_buffer - ] - - fnames = [ - "%s/%s.obs_buffer" % (path, name), - "%s/%s.next_obs_buffer" % (path, name), - "%s/%s.action_buffer" % (path, name), - "%s/%s.reward_buffer" % (path, name), - "%s/%s.done_buffer" % (path, name) - ] - - proc = multiprocessing.Process(target=_save, args=(datas, fnames)) - proc.start() - - def load(self, path, name): - print("Loading %s replay buffer (may take a while...)" % name) - with open("%s/%s.obs_buffer.npz" % (path,name)) as f: self.obs_buffer = pickle.load(f) - with open("%s/%s.next_obs_buffer.npz" % (path,name)) as f: self.next_obs_buffer = pickle.load(f) - with open("%s/%s.action_buffer.npz" % (path,name)) as f: self.action_buffer = pickle.load(f) - with open("%s/%s.reward_buffer.npz" % (path,name)) as f: self.reward_buffer = pickle.load(f) - with open("%s/%s.done_buffer.npz" % (path,name)) as f: self.done_buffer = pickle.load(f) - with open("%s/%s.count" % (path,name), "r") as f: self.count = int(f.read()) diff --git a/research/steve/toy_demo.py b/research/steve/toy_demo.py deleted file mode 100644 index 859a86f72a9f8162d9aa9ea1bf78e8baa8b15a4e..0000000000000000000000000000000000000000 --- a/research/steve/toy_demo.py +++ /dev/null @@ -1,430 +0,0 @@ -from __future__ import division -from __future__ import print_function -from builtins import range -from past.utils import old_div -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import scipy -import matplotlib.pyplot as plt -import seaborn as sns - -### Hyperparameters - -NONTERMINAL_STATE_COUNT = 100 -NOISE_AMOUNT = 0.1 -TRAIN_STEPS = 10000 -Q_ENSEMBLE_SIZE = 8 -MODEL_ENSEMBLE_SIZE = 8 -HORIZON = 5 -TRIAL_N = 10 - -### Helper functions - -initial_state = 0 -terminal_state = NONTERMINAL_STATE_COUNT + 1 -nonterminal_state_count = NONTERMINAL_STATE_COUNT -state_count = NONTERMINAL_STATE_COUNT + 1 -final_reward = NONTERMINAL_STATE_COUNT -colors = sns.color_palette('husl', 4) -plt.rcParams["figure.figsize"] = (6,5) - -def step(state): - if state == terminal_state: next_state = terminal_state - else: next_state = state + 1 - - if state == terminal_state: reward = 0 - elif state+1 == terminal_state: reward = final_reward - else: reward = -1 - - return next_state, reward - -def noisy_step(state): - if state == terminal_state: next_state = terminal_state - elif np.random.random([]) < NOISE_AMOUNT: next_state = np.random.randint(0, state_count) - else: next_state = state + 1 - - if state == terminal_state: reward = 0 - elif state+1 == terminal_state: reward = final_reward - else: reward = -1 - - return next_state, reward - -def get_error(Q): - losses = np.square(np.arange(state_count) - Q[:-1]) - return np.mean(losses) - -def downsample(array, factor): - pad_size = np.ceil(old_div(float(array.size),factor))*factor - array.size - array_padded = np.append(array, np.zeros([pad_size.astype(np.int64)])*np.NaN) - return scipy.nanmean(array_padded.reshape(-1,factor), axis=1) - - -###################### -### Main experiments -###################### - -# Basic Q -if True: - print("Running basic Q-learning.") - trial_results = [] - for run_i in range(TRIAL_N): - print("Trial %d" % run_i) - Q = np.random.randint(0,state_count,[state_count+1]).astype(np.float64) - Q[state_count] = 0 - losses = [] - for step_i in range(TRAIN_STEPS): - state = np.random.randint(0,state_count) - next_state, reward = step(state) - Q[state] = reward + Q[next_state] - losses.append(get_error(Q)) - trial_results.append(losses) - print("...complete.\n") - - result = np.stack(trial_results, axis=1) - means = np.mean(result, axis=1) - stdevs = np.std(result, axis=1) - plt.plot(means, label="Basic Q-learning", color=colors[0]) - plt.fill_between(np.arange(TRAIN_STEPS), means - stdevs, means + stdevs, alpha=.2, color=colors[0]) - with open('Toy-v1/baseline.csv', 'w') as f: - data = [] - for frame_i in range(result.shape[0]): - for loss in result[frame_i]: - data.append("%f,%f,%f,%f" % (frame_i, frame_i, frame_i, loss)) - f.write("\n".join(data)) - -# Ensemble Q -if True: - print("Running ensemble Q-learning.") - trial_results = [] - for run_i in range(TRIAL_N): - print("Trial %d" % run_i) - Q = np.random.randint(0,state_count,[Q_ENSEMBLE_SIZE, state_count+1]).astype(np.float64) - Q[:, state_count] = 0 - losses = [] - for step_i in range(TRAIN_STEPS): - for q_ensemble_i in range(Q_ENSEMBLE_SIZE): - state = np.random.randint(0,state_count) - next_state, reward = step(state) - Q[q_ensemble_i, state] = reward + np.mean(Q[:, next_state]) - losses.append(get_error(np.mean(Q, axis=0))) - trial_results.append(losses) - print("...complete.\n") - - result = np.stack(trial_results, axis=1) - means = np.mean(result, axis=1) - stdevs = np.std(result, axis=1) - plt.plot(means, label="Ensemble Q-learning", color=colors[1]) - plt.fill_between(np.arange(TRAIN_STEPS), means - stdevs, means + stdevs, alpha=.2, color=colors[1]) - -# Ensemble MVE-Oracle -if True: - print("Running ensemble oracle MVE.") - trial_results = [] - for run_i in range(TRIAL_N): - print("Trial %d" % run_i) - Q = np.random.randint(0,state_count,[Q_ENSEMBLE_SIZE, state_count+1]).astype(np.float64) - Q[:, state_count] = 0 - losses = [] - for step_i in range(TRAIN_STEPS): - for q_ensemble_i in range(Q_ENSEMBLE_SIZE): - state = np.random.randint(0,state_count) - next_state, reward = step(state) - - # MVE rollout - target = reward - for _ in range(HORIZON): - next_state, reward = step(next_state) - target += reward - target += np.mean(Q[:,next_state]) - - Q[q_ensemble_i, state] = target - losses.append(get_error(np.mean(Q, axis=0))) - trial_results.append(losses) - print("...complete.\n") - - result = np.stack(trial_results, axis=1) - means = np.mean(result, axis=1) - stdevs = np.std(result, axis=1) - plt.plot(means, label="MVE-oracle", color=colors[2]) - plt.fill_between(np.arange(TRAIN_STEPS), means - stdevs, means + stdevs, alpha=.2, color=colors[2]) - with open('Toy-v1/mve_oracle.csv', 'w') as f: - data = [] - for frame_i in range(result.shape[0]): - for loss in result[frame_i]: - data.append("%f,%f,%f,%f" % (frame_i, frame_i, frame_i, loss)) - f.write("\n".join(data)) - -# Ensemble MVE-Noisy -if True: - print("Running ensemble noisy MVE.") - trial_results = [] - for run_i in range(TRIAL_N): - print("Trial %d" % run_i) - Q = np.random.randint(0,state_count,[Q_ENSEMBLE_SIZE, state_count+1]).astype(np.float64) - Q[:, state_count] = 0 - losses = [] - for step_i in range(TRAIN_STEPS): - for q_ensemble_i in range(Q_ENSEMBLE_SIZE): - state = np.random.randint(0,state_count) - next_state, reward = step(state) - - # MVE rollout - targets = [] - first_next_state, first_reward = next_state, reward - for model_ensemble_i in range(MODEL_ENSEMBLE_SIZE): - next_state, reward = first_next_state, first_reward - target = reward - for _ in range(HORIZON): - next_state, reward = noisy_step(next_state) - target += reward - target += np.mean(Q[:,next_state]) - targets.append(target) - - Q[q_ensemble_i, state] = np.mean(targets) - losses.append(get_error(np.mean(Q, axis=0))) - trial_results.append(losses) - print("...complete.\n") - - result = np.stack(trial_results, axis=1) - means = np.mean(result, axis=1) - stdevs = np.std(result, axis=1) - plt.plot(means, label="MVE-noisy", color=colors[2], linestyle='dotted') - plt.fill_between(np.arange(TRAIN_STEPS), means - stdevs, means + stdevs, alpha=.2, color=colors[2]) - with open('Toy-v1/mve_noisy.csv', 'w') as f: - data = [] - for frame_i in range(result.shape[0]): - for loss in result[frame_i]: - data.append("%f,%f,%f,%f" % (frame_i, frame_i, frame_i, loss)) - f.write("\n".join(data)) - -# STEVE-Oracle -if True: - print("Running ensemble oracle STEVE.") - trial_results = [] - - oracle_q_estimate_errors = [] - oracle_mve_estimate_errors = [] - oracle_steve_estimate_errors = [] - oracle_opt_estimate_errors = [] - - - for run_i in range(TRIAL_N): - print("Trial %d" % run_i) - Q = np.random.randint(0,state_count,[Q_ENSEMBLE_SIZE, state_count+1]).astype(np.float64) - Q[:, state_count] = 0 - losses = [] - - q_estimate_errors = [] - mve_estimate_errors = [] - steve_estimate_errors = [] - opt_estimate_errors = [] - steve_beat_freq= [] - - for step_i in range(TRAIN_STEPS): - _q_estimate_errors = [] - _mve_estimate_errors = [] - _steve_estimate_errors = [] - _opt_estimate_errors = [] - _steve_beat_freq = [] - - for q_ensemble_i in range(Q_ENSEMBLE_SIZE): - state = np.random.randint(0,state_count) - next_state, reward = step(state) - - # STEVE rollout - Q_est_mat = np.zeros([HORIZON + 1, Q_ENSEMBLE_SIZE]) - reward_est_mat = np.zeros([HORIZON + 1, 1]) - first_next_state, first_reward = next_state, reward - next_state, reward = first_next_state, first_reward - Q_est_mat[0, :] = Q[:, next_state] - reward_est_mat[0, 0] = reward - for timestep_i in range(1,HORIZON+1): - next_state, reward = step(next_state) - Q_est_mat[timestep_i, :] = Q[:, next_state] - reward_est_mat[timestep_i, 0] = reward - all_targets = Q_est_mat + np.cumsum(reward_est_mat, axis=0) - - # STEVE weight calculation - estimates = np.mean(all_targets, axis=1) - confidences = old_div(1., (np.var(all_targets, axis=1) + 1e-8)) - coefficients = old_div(confidences, np.sum(confidences)) - target = np.sum(estimates * coefficients) - - Q[q_ensemble_i, state] = target - - true_target = state + 1. if state != terminal_state else 0. - _q_estimate_errors.append(np.square(estimates[0] - true_target)) - _mve_estimate_errors.append(np.square(estimates[-1] - true_target)) - _steve_estimate_errors.append(np.square(np.sum(estimates * coefficients) - true_target)) - _opt_estimate_errors.append(np.min(np.square(estimates - true_target))) - - losses.append(get_error(np.mean(Q, axis=0))) - q_estimate_errors.append(np.mean(_q_estimate_errors)) - mve_estimate_errors.append(np.mean(_mve_estimate_errors)) - steve_estimate_errors.append(np.mean(_steve_estimate_errors)) - opt_estimate_errors.append(np.mean(_opt_estimate_errors)) - trial_results.append(losses) - oracle_q_estimate_errors.append(q_estimate_errors) - oracle_mve_estimate_errors.append(mve_estimate_errors) - oracle_steve_estimate_errors.append(steve_estimate_errors) - oracle_opt_estimate_errors.append(opt_estimate_errors) - print("...complete.\n") - - result = np.stack(trial_results, axis=1) - means = np.mean(result, axis=1) - stdevs = np.std(result, axis=1) - plt.plot(means, label="STEVE-oracle", color=colors[3]) - plt.fill_between(np.arange(TRAIN_STEPS), means - stdevs, means + stdevs, alpha=.2, color=colors[3]) - with open('Toy-v1/steve_oracle.csv', 'w') as f: - data = [] - for frame_i in range(result.shape[0]): - for loss in result[frame_i]: - data.append("%f,%f,%f,%f" % (frame_i, frame_i, frame_i, loss)) - f.write("\n".join(data)) - -# STEVE-Noisy -if True: - print("Running ensemble noisy STEVE.") - trial_results = [] - - noisy_q_estimate_errors = [] - noisy_mve_estimate_errors = [] - noisy_steve_estimate_errors = [] - noisy_opt_estimate_errors = [] - noisy_steve_beat_freq = [] - - for run_i in range(TRIAL_N): - print("Trial %d" % run_i) - Q = np.random.randint(0,state_count,[Q_ENSEMBLE_SIZE, state_count+1]).astype(np.float64) - Q[:, state_count] = 0 - losses = [] - - q_estimate_errors = [] - mve_estimate_errors = [] - steve_estimate_errors = [] - opt_estimate_errors = [] - steve_beat_freq= [] - - for step_i in range(TRAIN_STEPS): - _q_estimate_errors = [] - _mve_estimate_errors = [] - _steve_estimate_errors = [] - _opt_estimate_errors = [] - _steve_beat_freq = [] - for q_ensemble_i in range(Q_ENSEMBLE_SIZE): - state = np.random.randint(0,state_count) - next_state, reward = step(state) - - # STEVE rollout - Q_est_mat = np.zeros([HORIZON + 1, MODEL_ENSEMBLE_SIZE, Q_ENSEMBLE_SIZE]) - reward_est_mat = np.zeros([HORIZON + 1, MODEL_ENSEMBLE_SIZE, 1]) - first_next_state, first_reward = next_state, reward - for model_ensemble_i in range(MODEL_ENSEMBLE_SIZE): - next_state, reward = first_next_state, first_reward - Q_est_mat[0, model_ensemble_i, :] = Q[:, next_state] - reward_est_mat[0, model_ensemble_i, 0] = reward - for timestep_i in range(1,HORIZON+1): - next_state, reward = noisy_step(next_state) - Q_est_mat[timestep_i, model_ensemble_i, :] = Q[:, next_state] - reward_est_mat[timestep_i, model_ensemble_i, 0] = reward - all_targets = Q_est_mat + np.cumsum(reward_est_mat, axis=0) - - # STEVE weight calculation - all_targets = np.reshape(all_targets, [HORIZON+1, MODEL_ENSEMBLE_SIZE * Q_ENSEMBLE_SIZE]) - estimates = np.mean(all_targets, axis=1) - confidences = old_div(1., (np.var(all_targets, axis=1) + 1e-8)) - coefficients = old_div(confidences, np.sum(confidences)) - target = np.sum(estimates * coefficients) - # target = estimates[0] - - Q[q_ensemble_i, state] = target - - true_target = state + 1. if state != terminal_state else 0. - _q_estimate_errors.append(np.square(estimates[0] - true_target)) - _mve_estimate_errors.append(np.square(estimates[-1] - true_target)) - _steve_estimate_errors.append(np.square(np.sum(estimates * coefficients) - true_target)) - _opt_estimate_errors.append(np.min(np.square(estimates - true_target))) - _steve_beat_freq.append(float(np.square(estimates[0] - true_target) > np.square(target - true_target))) - - losses.append(get_error(np.mean(Q, axis=0))) - q_estimate_errors.append(np.mean(_q_estimate_errors)) - mve_estimate_errors.append(np.mean(_mve_estimate_errors)) - steve_estimate_errors.append(np.mean(_steve_estimate_errors)) - opt_estimate_errors.append(np.mean(_opt_estimate_errors)) - steve_beat_freq.append(np.mean(_steve_beat_freq)) - trial_results.append(losses) - noisy_q_estimate_errors.append(q_estimate_errors) - noisy_mve_estimate_errors.append(mve_estimate_errors) - noisy_steve_estimate_errors.append(steve_estimate_errors) - noisy_opt_estimate_errors.append(opt_estimate_errors) - noisy_steve_beat_freq.append(steve_beat_freq) - - print("...complete.\n") - - result = np.stack(trial_results, axis=1) - means = np.mean(result, axis=1) - stdevs = np.std(result, axis=1) - plt.plot(means, label="STEVE-noisy", color=colors[3], linestyle='dotted') - plt.fill_between(np.arange(TRAIN_STEPS), means - stdevs, means + stdevs, alpha=.2, color=colors[3]) - with open('Toy-v1/steve_noisy.csv', 'w') as f: - data = [] - for frame_i in range(result.shape[0]): - for loss in result[frame_i]: - data.append("%f,%f,%f,%f" % (frame_i, frame_i, frame_i, loss)) - f.write("\n".join(data)) - -# ### Display results -# plt.title("Comparison of convergence rates") -# plt.legend() -# plt.savefig("comparison.pdf") -# plt.show() -# -# ### Display secondary results - error comparison -# DOWNSAMPLE = 50 -# colors = sns.color_palette('husl', 8) -# for i, (error_curve, label) in enumerate([ -# (oracle_q_estimate_errors, "Oracle Q error"), -# (oracle_mve_estimate_errors, "Oracle MVE error"), -# (oracle_steve_estimate_errors, "Oracle STEVE error"), -# # (oracle_opt_estimate_errors, "Oracle minimum single-estimate error"), -# ]): -# result = np.stack(error_curve, axis=1) -# means = downsample(np.mean(result, axis=1), DOWNSAMPLE) -# stdevs = downsample(np.std(result, axis=1), DOWNSAMPLE) -# plt.plot(means, label=label, color=colors[i]) -# plt.fill_between(np.arange(means.shape[0]), means - stdevs, means + stdevs, alpha=.2, color=colors[i]) -# -# plt.title("Comparison of errors for oracle dynamics") -# plt.legend() -# plt.show() -# -# for i, (error_curve, label) in enumerate([ -# (noisy_q_estimate_errors, "Noisy Q error"), -# (noisy_mve_estimate_errors, "Noisy MVE error"), -# (noisy_steve_estimate_errors, "Noisy STEVE error"), -# # (noisy_opt_estimate_errors, "Noisy minimum single-estimate error"), -# # (trial_steve_beat_freq, "STEVE beat freq"), -# ]): -# result = np.stack(error_curve, axis=1) -# means = downsample(np.mean(result, axis=1), DOWNSAMPLE) -# stdevs = downsample(np.std(result, axis=1), DOWNSAMPLE) -# plt.plot(means, label=label, color=colors[i]) -# plt.fill_between(np.arange(means.shape[0]), means - stdevs, means + stdevs, alpha=.2, color=colors[i]) -# -# plt.title("Comparison of errors for noisy dynamics") -# plt.legend() -# plt.show() \ No newline at end of file diff --git a/research/steve/util.py b/research/steve/util.py deleted file mode 100644 index bf0abec0ae193a3ba067a47c895f7cd21593e7b8..0000000000000000000000000000000000000000 --- a/research/steve/util.py +++ /dev/null @@ -1,164 +0,0 @@ -from __future__ import division -from future import standard_library -standard_library.install_aliases() -from builtins import str -from builtins import range -from past.utils import old_div -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import tensorflow as tf -import os, random, gc, math, re -import multiprocessing, types, shutil, pickle, json -from collections import defaultdict, MutableMapping - -def tanh_sample_info(mu, logsigma, stop_action_gradient=False, n_samples=1): - if n_samples > 1: - mu = tf.expand_dims(mu, 2) - logsigma = tf.expand_dims(logsigma, 2) - sample_shape = tf.concat([tf.shape(mu), n_samples], 0) - else: - sample_shape = tf.shape(mu) - - flat_act = mu + tf.random_normal(sample_shape) * tf.exp(logsigma) - if stop_action_gradient: flat_act = tf.stop_gradient(flat_act) - normalized_dist_t = (flat_act - mu) * tf.exp(-logsigma) # ... x D - quadratic = - 0.5 * tf.reduce_sum(normalized_dist_t ** 2, axis=-1) # ... x (None) - log_z = tf.reduce_sum(logsigma, axis=-1) # ... x (None) - D_t = tf.cast(tf.shape(mu)[-1], tf.float32) - log_z += 0.5 * D_t * np.log(2 * np.pi) - flat_ll = quadratic - log_z - - scaled_act = tf.tanh(flat_act) - corr = tf.reduce_sum(tf.log(1. - tf.square(scaled_act) + 1e-6), axis=-1) - scaled_ll = flat_ll - corr - return flat_act, flat_ll, scaled_act, scaled_ll - -def tf_cheating_contcartpole(state, action): - gravity = 9.8 - masscart = 1.0 - masspole = 0.1 - total_mass = (masspole + masscart) - length = 0.5 # actually half the pole's length - polemass_length = (masspole * length) - force_mag = 10.0 - tau = 0.02 # seconds between state updates - - # Angle at which to fail the episode - theta_threshold_radians = 12 * 2 * math.pi / 360 - x_threshold = 2.4 - - x, x_dot, theta, theta_dot = tf.split(state, 4, axis=-1) - done = tf.logical_or(x < -x_threshold, - tf.logical_or(x > x_threshold, - tf.logical_or(theta < -theta_threshold_radians, - theta > theta_threshold_radians))) - - force = force_mag * action - costheta = tf.cos(theta) - sintheta = tf.sin(theta) - temp = old_div((force + polemass_length * theta_dot * theta_dot * sintheta), total_mass) - thetaacc = old_div((gravity * sintheta - costheta* temp), (length * (old_div(4.0,3.0) - masspole * costheta * costheta / total_mass))) - xacc = temp - polemass_length * thetaacc * costheta / total_mass - x = x + tau * x_dot - x_dot = x_dot + tau * xacc - theta = theta + tau * theta_dot - theta_dot = theta_dot + tau * thetaacc - state = tf.concat([x,x_dot,theta,theta_dot], -1) - done = tf.squeeze(tf.cast(done, tf.float32), -1) - reward = 1.0 - done - done *= 0. - return state, reward, done - -def create_directory(dir): - dir_chunks = dir.split("/") - for i in range(len(dir_chunks)): - partial_dir = "/".join(dir_chunks[:i+1]) - try: - os.makedirs(partial_dir) - except OSError: - pass - return dir - -def create_and_wipe_directory(dir): - shutil.rmtree(create_directory(dir)) - create_directory(dir) - -def wipe_file(fname): - with open(fname, "w") as f: - f.write("") - return fname - -def get_largest_epoch_in_dir(dir, saveid): - reg_matches = [re.findall('\d+_%s'%saveid,filename) for filename in os.listdir(dir)] - epoch_labels = [int(regmatch[0].split("_")[0]) for regmatch in reg_matches if regmatch] - if len(epoch_labels) == 0: return False - return max(epoch_labels) - -def wipe_all_but_largest_epoch_in_dir(dir, saveid): - largest = get_largest_epoch_in_dir(dir, saveid) - reg_matches = [(filename, re.findall('\d+_%s'%saveid,filename)) for filename in os.listdir(dir)] - for filename, regmatch in reg_matches: - if regmatch and int(regmatch[0].split("_")[0]) != largest: - os.remove(os.path.join(dir,filename)) - -class ConfigDict(dict): - def __init__(self, loc=None, ghost=False): - self._dict = defaultdict(lambda :False) - self.ghost = ghost - if loc: - with open(loc) as f: raw = json.load(f) - if "inherits" in raw and raw["inherits"]: - for dep_loc in raw["inherits"]: - self.update(ConfigDict(dep_loc)) - if "updates" in raw and raw["updates"]: - self.update(raw["updates"], include_all=True) - - def __getitem__(self, key): - return self._dict[key] - - def __setitem__(self, key, value): - self._dict[key] = value - - def __str__(self): - return str(dict(self._dict)) - - def __repr__(self): - return str(dict(self._dict)) - - def __iter__(self): - return self._dict.__iter__() - - def __bool__(self): - return bool(self._dict) - - def __nonzero__(self): - return bool(self._dict) - - def update(self, dictlike, include_all=False): - for key in dictlike: - value = dictlike[key] - if isinstance(value, dict): - if key[0] == "*": # this means only override, do not set - key = key[1:] - ghost = True - else: - ghost = False - if not include_all and isinstance(value, ConfigDict) and key not in self._dict and value.ghost: continue - if key not in self._dict: self._dict[key] = ConfigDict(ghost=ghost) - self._dict[key].update(value) - else: - self._dict[key] = value diff --git a/research/steve/valuerl.py b/research/steve/valuerl.py deleted file mode 100644 index 4819dd08c2f813eb524c56ebadec25a24115223b..0000000000000000000000000000000000000000 --- a/research/steve/valuerl.py +++ /dev/null @@ -1,307 +0,0 @@ -from __future__ import division -from builtins import zip -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf -import numpy as np -import nn -import util -from learner import CoreModel - - -class ValueRL(CoreModel): - """ - Learn a state-action value function and its corresponding policy. - """ - - @property - def saveid(self): - return "valuerl" - - def create_params(self, env_config, learner_config): - self.obs_dim = np.prod(env_config["obs_dims"]) - self.action_dim = env_config["action_dim"] - self.reward_scale = env_config["reward_scale"] - self.discount = env_config["discount"] - - self.hidden_dim = learner_config["hidden_dim"] - self.bayesian_config = learner_config["bayesian"] - self.value_expansion = learner_config["value_expansion"] - self.explore_chance = learner_config["ddpg_explore_chance"] - - with tf.variable_scope(self.name): - self.policy = nn.FeedForwardNet('policy', self.obs_dim, [self.action_dim], layers=4, hidden_dim=self.hidden_dim, get_uncertainty=False) - - if self.bayesian_config: - self.Q = nn.EnsembleFeedForwardNet('Q', self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.hidden_dim, get_uncertainty=True, ensemble_size=self.bayesian_config["ensemble_size"], train_sample_count=self.bayesian_config["train_sample_count"], eval_sample_count=self.bayesian_config["eval_sample_count"]) - self.old_Q = nn.EnsembleFeedForwardNet('old_q', self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.hidden_dim, get_uncertainty=True, ensemble_size=self.bayesian_config["ensemble_size"], train_sample_count=self.bayesian_config["train_sample_count"], eval_sample_count=self.bayesian_config["eval_sample_count"]) - else: - self.Q = nn.FeedForwardNet('Q', self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.hidden_dim, get_uncertainty=True) - self.old_Q = nn.FeedForwardNet('old_q', self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.hidden_dim, get_uncertainty=True) - - self.policy_params = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) if "policy" in v.name] - self.Q_params = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) if "Q" in v.name] - self.agent_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) - - self.copy_to_old_ops = [tf.assign(p_old, p) for p_old, p in zip(self.old_Q.params_list, self.Q.params_list)] - self.assign_epoch_op = [tf.assign(self.epoch_n, self.epoch_n_placeholder), tf.assign(self.update_n, self.update_n_placeholder), tf.assign(self.frame_n, self.frame_n_placeholder), tf.assign(self.hours, self.hours_placeholder)] - - def update_epoch(self, sess, epoch, updates, frames, hours): - sess.run(self.assign_epoch_op, feed_dict={self.epoch_n_placeholder: int(epoch), self.update_n_placeholder: int(updates), self.frame_n_placeholder: int(frames), self.hours_placeholder: float(hours)}) - - def copy_to_old(self, sess): - sess.run(self.copy_to_old_ops) - - def build_evalution_graph(self, obs, get_full_info=False, mode="regular", n_samples=1): - assert mode in {"regular", "explore", "exploit"} - policy_actions_pretanh = self.policy(obs) - - if mode == "regular" or mode == "exploit": - policy_actions = tf.tanh(policy_actions_pretanh) - elif mode == "explore": - _, _, exploring_policy_actions, _ = util.tanh_sample_info(policy_actions_pretanh, tf.zeros_like(policy_actions_pretanh), n_samples=n_samples) - policy_actions = tf.where(tf.random_uniform(tf.shape(exploring_policy_actions)) < self.explore_chance, x=exploring_policy_actions, y=tf.tanh(policy_actions_pretanh)) - else: raise Exception('this should never happen') - - if get_full_info: return policy_actions_pretanh, policy_actions - else: return policy_actions - - def build_training_graph(self, obs, next_obs, empirical_actions, rewards, dones, data_size, worldmodel=None): - average_model_use = tf.constant(0.) - empirical_Q_info = tf.concat([obs, empirical_actions], 1) - - if worldmodel is None: - policy_action_pretanh, policy_actions = self.build_evalution_graph(obs, get_full_info=True) - policy_Q_info = tf.concat([obs, policy_actions], 1) - state_value_estimate = self.Q(policy_Q_info, reduce_mode="mean") - - next_policy_actions = self.build_evalution_graph(next_obs) - policy_next_Q_info = tf.concat([next_obs, next_policy_actions], 1) - next_Q_estimate = self.old_Q(policy_next_Q_info, reduce_mode="mean") - - Q_guess = self.Q(empirical_Q_info, is_eval=False, reduce_mode="random") - Q_target = rewards * self.reward_scale + self.discount * next_Q_estimate * (1. - dones) - - policy_losses = -state_value_estimate - Q_losses = .5 * tf.square( Q_guess - tf.stop_gradient(Q_target) ) - - else: - targets, confidence, Q_guesses, reach_probs = self.build_Q_expansion_graph(next_obs, rewards, dones, worldmodel, rollout_len=self.value_expansion["rollout_len"], model_ensembling=worldmodel.bayesian_config is not False) - - # targets is a 3D matrix: [batch_i, start_timestep, end_timestep]. here, we reduce out the last dimension, turning - # it into a [batch_i, start_timestep] matrix. in other words, we are taking a bunch of candidate targets and reducing - # them into a single target. the four options here correspond to the four ways to do that reduction. - if self.value_expansion["mean_k_return"]: - target_counts = self.value_expansion["rollout_len"]+1 - tf.reshape(tf.range(self.value_expansion["rollout_len"]+1), [1, self.value_expansion["rollout_len"]+1]) - k_returns = tf.reduce_sum(targets, 2) / tf.cast(target_counts, tf.float32) - elif self.value_expansion["lambda_return"]: - cont_coeffs = self.value_expansion["lambda_return"] ** tf.cast(tf.reshape(tf.range(self.value_expansion["rollout_len"]+1), [1,1,self.value_expansion["rollout_len"]+1]), tf.float32) - stop_coeffs = tf.concat([(1 - self.value_expansion["lambda_return"]) * tf.ones_like(targets)[:,:,:-1], tf.ones_like(targets)[:,:,-1:]], 2) - k_returns = tf.reduce_sum(targets * stop_coeffs * cont_coeffs, 2) - elif self.value_expansion["steve_reweight"]: - k_returns = tf.reduce_sum(targets * confidence, 2) - average_model_use = 1. - tf.reduce_mean(confidence[:,0,0]) - else: - # MVE objective: just take the last one - k_returns = targets[:,:,-1] - - # now we have [batch_i, start_timestep]. if we are using the TDK trick, then we want to use all of the targets, - # so we construct a corresponding [batch_i, start_timestep] matrix of guesses. otherwise, we just take the targets - # for the first timestep. - Q_guess = self.Q(empirical_Q_info, is_eval=False, reduce_mode="random") - if self.value_expansion["tdk_trick"]: - Q_guess = tf.concat([tf.expand_dims(Q_guess, 1), Q_guesses], 1) - reach_probs = tf.concat([tf.expand_dims(tf.ones_like(reach_probs[:,0]), 1), reach_probs[:,:-1]], 1) - Q_target = k_returns - else: - # non-TDK trick means we just take the first one - Q_target = k_returns[:,0] - - policy_action_pretanh, policy_actions = self.build_evalution_graph(obs, get_full_info=True) - policy_Q_info = tf.concat([obs, policy_actions], 1) - state_value_estimate = self.Q(policy_Q_info, stop_params_gradient=True, reduce_mode="mean") - - policy_losses = -state_value_estimate - Q_losses = .5 * tf.square( Q_guess - tf.stop_gradient(Q_target) ) - if self.value_expansion["tdk_trick"]: Q_losses *= reach_probs # we downscale the various TDK-trick losses by - # the likelihood of actually reaching the state - # from which the guess was made - policy_loss = tf.reduce_mean(policy_losses) - Q_loss = tf.reduce_mean(Q_losses) - policy_reg_loss = tf.reduce_mean(tf.square(policy_action_pretanh)) * .001 # a small regularization to make sure the - # tanh does not saturate - - # anything in inspect gets logged - inspect = (policy_loss, Q_loss, policy_reg_loss, average_model_use) - - return (policy_loss + policy_reg_loss, Q_loss), inspect - - - def build_Q_expansion_graph(self, obs, first_rewards, first_done, worldmodel, rollout_len=1, model_ensembling=False): - ### this sets up the machinery for having multiple parallel rollouts, each of which has a single consistent transition - ensemble_idxs, transition_sample_n, reward_sample_n = worldmodel.get_ensemble_idx_info() - q_sample_n = self.bayesian_config["eval_sample_count"] if self.bayesian_config is not False else 1 - first_rewards = tf.tile(tf.expand_dims(tf.expand_dims(first_rewards,1),1), [1,transition_sample_n,reward_sample_n]) - first_rewards.set_shape([None, transition_sample_n, reward_sample_n]) - if model_ensembling: - obs = tf.tile(tf.expand_dims(obs,1), [1,transition_sample_n,1]) - obs.set_shape([None, transition_sample_n, self.obs_dim]) - first_done = tf.tile(tf.expand_dims(first_done, 1), [1, transition_sample_n]) - first_done.set_shape([None, transition_sample_n]) - - ### below, we use a while loop to actually do the iterative model rollout - extra_info = worldmodel.init_extra_info(obs) - - action_ta = tf.TensorArray(size=rollout_len, dynamic_size=False, dtype=tf.float32) - obs_ta = tf.TensorArray(size=rollout_len, dynamic_size=False, dtype=tf.float32) - done_ta = tf.TensorArray(size=rollout_len, dynamic_size=False, dtype=tf.float32) - extra_info_ta =tf.TensorArray(size=rollout_len, dynamic_size=False, dtype=tf.float32) - - def rollout_loop_body(r_i, xxx_todo_changeme): - (obs, done, extra_info, action_ta, obs_ta, dones_ta, extra_info_ta) = xxx_todo_changeme - action_pretanh, action = self.build_evalution_graph(tf.stop_gradient(obs), get_full_info=True) - - if model_ensembling: - next_obs, next_dones, next_extra_info = worldmodel.transition(obs, action, extra_info, ensemble_idxs=ensemble_idxs) - else: - next_obs, next_dones, next_extra_info = worldmodel.transition(obs, action, extra_info) - next_obs = tf.reduce_mean(next_obs, -2) - next_dones = tf.reduce_mean(next_dones, -1) - - action_ta = action_ta.write(r_i, action) - obs_ta = obs_ta.write(r_i, obs) - dones_ta = dones_ta.write(r_i, done) - extra_info_ta = extra_info_ta.write(r_i, extra_info) - return r_i+1, (next_obs, next_dones, next_extra_info, action_ta, obs_ta, dones_ta, extra_info_ta) - - _, (final_obs, final_done, final_extra_info, action_ta, obs_ta, done_ta, extra_info_ta) = tf.while_loop( - lambda r_i, _: r_i < rollout_len, - rollout_loop_body, - [0, (obs, first_done, extra_info, action_ta, obs_ta, done_ta, extra_info_ta)] - ) - - final_action_pretanh, final_action = self.build_evalution_graph(tf.stop_gradient(final_obs), get_full_info=True) - - ### compile the TensorArrays into useful tensors - obss = obs_ta.stack() - obss = tf.reshape(obss, tf.stack([rollout_len, -1, transition_sample_n, self.obs_dim])) - obss = tf.transpose(obss, [1, 0, 2, 3]) - final_obs = tf.reshape(final_obs, tf.stack([-1, 1, transition_sample_n, self.obs_dim])) - all_obss = tf.concat([obss, final_obs],1) - next_obss = all_obss[:,1:] - - dones = done_ta.stack() - dones = tf.reshape(dones, tf.stack([rollout_len, -1, transition_sample_n])) - dones = tf.transpose(dones, [1, 0, 2]) - final_done = tf.reshape(final_done, tf.stack([-1, 1, transition_sample_n])) - all_dones = tf.concat([dones, final_done],1) - - actions = action_ta.stack() - actions = tf.reshape(actions, tf.stack([rollout_len, -1, transition_sample_n, self.action_dim])) - actions = tf.transpose(actions , [1, 0, 2, 3]) - final_action = tf.reshape(final_action, tf.stack([-1, 1, transition_sample_n, self.action_dim])) - all_actions = tf.concat([actions, final_action],1) - - continue_probs = tf.cumprod(1. - all_dones, axis=1) - rewards = worldmodel.get_rewards(obss, actions, next_obss) - rawrew = rewards = tf.concat([tf.expand_dims(first_rewards, 1), rewards],1) - - ### TDK trick means we have to guess at every timestep - if self.value_expansion["tdk_trick"]: - guess_info = tf.concat([obss,actions], -1) - Q_guesses = self.Q(guess_info, reduce_mode="random") - Q_guesses = tf.reduce_mean(Q_guesses, -1) # make it so there's only one guess per rollout length, which is the mean of the guesses under all the various model rollouts - reached_this_point_to_guess_prob = tf.reduce_mean(continue_probs, -1) - else: - Q_guesses = None - reached_this_point_to_guess_prob = None - - ### use the Q function at every timestep to get value estimates - target_info = tf.concat([all_obss, all_actions], -1) - Q_targets = self.old_Q(target_info, reduce_mode="none") - - rollout_frames = rollout_len + 1 # if we take N steps, we have N+1 frames - - ### create "decay-exponent matrix" of size [1,ROLLOUT_FRAMES,ROLLOUT_FRAMES,1]. the first ROLLOUT_FRAMES corresponds to the index of the source, the second to the target. - ts_count_mat = (tf.cast(tf.reshape(tf.range(rollout_frames), [1, rollout_frames]) - tf.reshape(tf.range(rollout_frames), [rollout_frames, 1]), tf.float32)) - reward_coeff_matrix = tf.matrix_band_part(tf.ones([rollout_frames, rollout_frames]), 0, -1) * self.discount ** ts_count_mat - value_coeff_matrix = tf.matrix_band_part(tf.ones([rollout_frames, rollout_frames]), 0, -1) * self.discount ** (1. + ts_count_mat) - reward_coeff_matrix = tf.reshape(reward_coeff_matrix, [1, rollout_frames, rollout_frames, 1, 1]) - value_coeff_matrix = tf.reshape(value_coeff_matrix, [1, rollout_frames, rollout_frames, 1, 1]) - - ### similarly, create a "done" matrix - shifted_continue_probs = tf.concat([tf.expand_dims(tf.ones_like(continue_probs[:,0]),1), continue_probs[:,:-1]], 1) - reward_continue_matrix = tf.expand_dims(shifted_continue_probs, 1) / tf.expand_dims(shifted_continue_probs+1e-8, 2) - value_continue_matrix = tf.expand_dims(continue_probs, 1) / tf.expand_dims(shifted_continue_probs+1e-8, 2) - reward_continue_matrix = tf.expand_dims(reward_continue_matrix, -1) - value_continue_matrix = tf.expand_dims(value_continue_matrix, -1) - - ### apply the discounting factors to the rewards and values - rewards = tf.expand_dims(rewards, 1) * reward_coeff_matrix * reward_continue_matrix - rewards = tf.cumsum(rewards, axis=2) - values = tf.expand_dims(Q_targets, 1) * value_coeff_matrix * value_continue_matrix - - ### compute the targets using the Bellman equation - sampled_targets = tf.expand_dims(rewards,-2) * self.reward_scale + tf.expand_dims(values,-1) - - ### flatten out the various sources of variance (transition, reward, and Q-function ensembles) to get a set of estimates for each candidate target - sampled_targets = tf.reshape(sampled_targets, tf.stack([-1, rollout_frames, rollout_frames, transition_sample_n * reward_sample_n * q_sample_n])) - - ### compute the mean and variance for each candidate target - target_means, target_variances = tf.nn.moments(sampled_targets, 3) - - ### compute the confidence, either using the full covariance matrix, or approximating all the estimators as independent - if self.value_expansion["covariances"]: - targetdiffs = sampled_targets - tf.expand_dims(target_means,3) - target_covariances = tf.einsum("abij,abjk->abik", targetdiffs, tf.transpose(targetdiffs, [0,1,3,2])) - target_confidence = tf.squeeze(tf.matrix_solve(target_covariances + tf.expand_dims(tf.expand_dims(tf.matrix_band_part(tf.ones(tf.shape(target_covariances)[-2:]),0,0) * 1e-3,0),0), tf.ones(tf.concat([tf.shape(target_covariances)[:-1], tf.constant([1])],0))),-1) - else: - target_confidence = 1./(target_variances + 1e-8) - - ### normalize so weights sum to 1 - target_confidence *= tf.matrix_band_part(tf.ones([1, rollout_frames, rollout_frames]), 0, -1) - target_confidence = target_confidence / tf.reduce_sum(target_confidence, axis=2, keepdims=True) - - ### below here is a bunch of debugging Print statements that I use as a sanity check: - # target_confidence = tf.Print(target_confidence, [], message="raw rewards") - # target_confidence = tf.Print(target_confidence, [rawrew[0,:,0,0]], summarize=rollout_len+1) - # target_means = tf.Print(target_means, [], message="\n", summarize=rollout_len+1) - # target_means = tf.Print(target_means, [(1. - all_dones)[0,:,0]], message="contin", summarize=rollout_len+1) - # target_means = tf.Print(target_means, [continue_probs[0,:,0]], message="cum_contin", summarize=rollout_len+1) - # target_means = tf.Print(target_means, [shifted_continue_probs[0,:,0]], message="shifted contin", summarize=rollout_len+1) - # target_means = tf.Print(target_means, [], message="reward_coeff") - # for i in range(rollout_len+1): target_means = tf.Print(target_means, [reward_coeff_matrix[0,i,:,0,0]], summarize=rollout_len+1) - # target_means = tf.Print(target_means, [], message="reward_continue") - # for i in range(rollout_len+1): target_means = tf.Print(target_means, [reward_continue_matrix[0,i,:,0,0]], summarize=rollout_len+1) - # target_means = tf.Print(target_means, [], message="value_coeff") - # for i in range(rollout_len+1): target_means = tf.Print(target_means, [value_coeff_matrix[0,i,:,0,0]], summarize=rollout_len+1) - # target_means = tf.Print(target_means, [], message="value_continue") - # for i in range(rollout_len+1): target_means = tf.Print(target_means, [value_continue_matrix[0,i,:,0,0]], summarize=rollout_len+1) - # target_confidence = tf.Print(target_confidence, [], message="rewards") - # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [rewards[0,i,:,0,0]], summarize=rollout_len+1) - # target_confidence = tf.Print(target_confidence, [], message="target Qs") - # target_confidence = tf.Print(target_confidence, [Q_targets[0,:,0,0]], summarize=rollout_len+1) - # target_confidence = tf.Print(target_confidence, [], message="values") - # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [values[0,i,:,0,0]], summarize=rollout_len+1) - # target_confidence = tf.Print(target_confidence, [], message="target_means") - # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [target_means[0,i,:]], summarize=rollout_len+1) - # target_confidence = tf.Print(target_confidence, [], message="target_variance") - # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [target_variances[0,i,:]], summarize=rollout_len+1) - # target_confidence = tf.Print(target_confidence, [], message="target_confidence") - # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [target_confidence[0,i,:]], summarize=rollout_len+1) - # target_means = tf.Print(target_means, [target_confidence, action_lls, tf.shape(Q_targets)], message="\n\n", summarize=10) - - return target_means, target_confidence, Q_guesses, reached_this_point_to_guess_prob \ No newline at end of file diff --git a/research/steve/valuerl_learner.py b/research/steve/valuerl_learner.py deleted file mode 100644 index a3c6308f83b02f065757f4d7c56766911719f541..0000000000000000000000000000000000000000 --- a/research/steve/valuerl_learner.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf -import numpy as np -import os - -from learner import Learner -from valuerl import ValueRL -from worldmodel import DeterministicWorldModel - -class ValueRLLearner(Learner): - """ - ValueRL-specific training loop details. - """ - - def learner_name(self): return "valuerl" - - def make_loader_placeholders(self): - self.obs_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"], np.prod(self.env_config["obs_dims"])]) - self.next_obs_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"], np.prod(self.env_config["obs_dims"])]) - self.action_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"], self.env_config["action_dim"]]) - self.reward_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"]]) - self.done_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"]]) - self.datasize_loader = tf.placeholder(tf.float64, []) - return [self.obs_loader, self.next_obs_loader, self.action_loader, self.reward_loader, self.done_loader, self.datasize_loader] - - def make_core_model(self): - if self.config["model_config"] is not False: - self.worldmodel = DeterministicWorldModel(self.config["name"], self.env_config, self.config["model_config"]) - else: - self.worldmodel = None - - valuerl = ValueRL(self.config["name"], self.env_config, self.learner_config) - (policy_loss, Q_loss), inspect_losses = valuerl.build_training_graph(*self.current_batch, worldmodel=self.worldmodel) - - policy_optimizer = tf.train.AdamOptimizer(3e-4) - policy_gvs = policy_optimizer.compute_gradients(policy_loss, var_list=valuerl.policy_params) - capped_policy_gvs = policy_gvs - policy_train_op = policy_optimizer.apply_gradients(capped_policy_gvs) - - Q_optimizer = tf.train.AdamOptimizer(3e-4) - Q_gvs = Q_optimizer.compute_gradients(Q_loss, var_list=valuerl.Q_params) - capped_Q_gvs = Q_gvs - Q_train_op = Q_optimizer.apply_gradients(capped_Q_gvs) - - return valuerl, (policy_loss, Q_loss), (policy_train_op, Q_train_op), inspect_losses - - ## Optional functions to override - def initialize(self): - if self.config["model_config"] is not False: - while not self.load_worldmodel(): pass - - def resume_from_checkpoint(self, epoch): - if self.config["model_config"] is not False: - with self.bonus_kwargs["model_lock"]: self.worldmodel.load(self.sess, self.save_path, epoch) - - def checkpoint(self): - self.core.copy_to_old(self.sess) - if self.config["model_config"] is not False: - self.load_worldmodel() - - def backup(self): pass - - # Other functions - def load_worldmodel(self): - if not os.path.exists("%s/%s.params.index" % (self.save_path, self.worldmodel.saveid)): return False - with self.bonus_kwargs["model_lock"]: self.worldmodel.load(self.sess, self.save_path) - return True diff --git a/research/steve/visualizer.py b/research/steve/visualizer.py deleted file mode 100644 index 825f1a238617d78dc2ada4611dcecc175f238eb5..0000000000000000000000000000000000000000 --- a/research/steve/visualizer.py +++ /dev/null @@ -1,107 +0,0 @@ -from __future__ import print_function -from builtins import range -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import tensorflow as tf -# import moviepy.editor as mpy -import time, os, traceback, multiprocessing, portalocker, sys - -import envwrap -import util -import valuerl, worldmodel -from config import config - -MODEL_NAME = config["name"] -LOG_PATH = util.create_directory("output/" + config["env"] + "/" + MODEL_NAME + "/" + config["log_path"]) + "/" + MODEL_NAME -LOAD_PATH = util.create_directory("output/" + config["env"] + "/" + MODEL_NAME + "/" + config["save_model_path"]) -OBS_DIM = np.prod(config["obs_dims"]) -HIDDEN_DIM = config["hidden_dim"] -ACTION_DIM = config["action_dim"] -MAX_FRAMES = config["max_frames"] -REWARD_SCALE = config["reward_scale"] -DISCOUNT = config["discount"] -ALGO = config["policy_config"]["algo"] -AGENT_BATCH_SIZE = config["agent_config"]["batch_size"] -EVALUATOR_BATCH_SIZE = config["evaluator_config"]["batch_size"] -RELOAD_EVERY_N = config["agent_config"]["reload_every_n"] -FRAMES_BEFORE_LEARNING = config["policy_config"]["frames_before_learning"] -FRAMES_PER_UPDATE = config["policy_config"]["frames_per_update"] -LEARNER_EPOCH_N = config["policy_config"]["epoch_n"] -SYNC_UPDATES = config["policy_config"]["frames_per_update"] >= 0 -POLICY_BAYESIAN_CONFIG = config["policy_config"]["bayesian"] -AUX_CONFIG = config["aux_config"] -DDPG_EXPLORE_CHANCE = config["policy_config"]["explore_chance"] if ALGO == "ddpg" else 0. -MODEL_AUGMENTED = config["model_config"] is not False -if MODEL_AUGMENTED: MODEL_BAYESIAN_CONFIG = config["model_config"]["bayesian"] - -FILENAME = sys.argv[3] - -if __name__ == '__main__': - oprl = valuerl.ValueRL(MODEL_NAME, ALGO, OBS_DIM, ACTION_DIM, HIDDEN_DIM, REWARD_SCALE, DISCOUNT, POLICY_BAYESIAN_CONFIG, AUX_CONFIG, DDPG_EXPLORE_CHANCE) - - obs_loader = tf.placeholder(tf.float32, [1, OBS_DIM]) - policy_actions, _ = oprl.build_evalution_graph(obs_loader, mode="exploit") - - if MODEL_AUGMENTED: - next_obs_loader = tf.placeholder(tf.float32, [1, OBS_DIM]) - reward_loader = tf.placeholder(tf.float32, [1]) - done_loader = tf.placeholder(tf.float32, [1]) - worldmodel = worldmodel.DeterministicWorldModel(MODEL_NAME, OBS_DIM, ACTION_DIM, HIDDEN_DIM, REWARD_SCALE, DISCOUNT, MODEL_BAYESIAN_CONFIG) - _, _, _, _, _, confidence, _ = oprl.build_Q_expansion_graph(next_obs_loader, reward_loader, done_loader, worldmodel, rollout_len=3, model_ensembling=True) - - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - - oprl.load(sess, FILENAME) - if MODEL_AUGMENTED: worldmodel.load(sess, FILENAME) - - env = envwrap.get_env(config["env"]) - - hist = np.zeros([4, 10]) - for _ in range(10): - ts = 0 - rgb_frames = [] - obs, reward, done, reset = env.reset(), 0, False, False - while not reset: - # env.internal_env.render() - # rgb_frames.append(env.internal_env.render(mode='rgb_array')) - # action = env.action_space.sample() - all_actions = sess.run(policy_actions, feed_dict={obs_loader: np.array([obs])}) - all_actions = np.clip(all_actions, -1., 1.) - action = all_actions[0] - obs, _reward, done, reset = env.step(action) - - if MODEL_AUGMENTED: - _confidences = sess.run(confidence, feed_dict={next_obs_loader: np.expand_dims(obs,0), - reward_loader: np.expand_dims(_reward,0), - done_loader: np.expand_dims(done,0)}) - # print "%.02f %.02f %.02f %.02f" % tuple(_confidences[0,0]) - for h in range(4): - bucket = int((_confidences[0,0,h]-1e-5)*10) - hist[h,bucket] += 1 - - reward += _reward - ts += 1 - # print ts, _reward, reward - print(ts, reward) - hist /= np.sum(hist, axis=1, keepdims=True) - for row in reversed(hist.T): print(' '.join(["%.02f"] * 4) % tuple(row)) - - #clip = mpy.ImageSequenceClip(rgb_frames, fps=100) - #clip.write_videofile(FILENAME + "/movie.mp4") - - diff --git a/research/steve/worldmodel.py b/research/steve/worldmodel.py deleted file mode 100644 index 613bc6cb3f69a534fe4a5cada67badf7d4052d4f..0000000000000000000000000000000000000000 --- a/research/steve/worldmodel.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf -import numpy as np -import nn - -from learner import CoreModel - -class DeterministicWorldModel(CoreModel): - """ - A simple feed-forward neural network world model, with an option for an ensemble. - """ - - @property - def saveid(self): - return "worldmodel" - - def create_params(self, env_config, learner_config): - self.obs_dim = np.prod(env_config["obs_dims"]) - self.action_dim = env_config["action_dim"] - self.reward_scale = env_config["reward_scale"] - self.discount = env_config["discount"] - - self.aux_hidden_dim = self.learner_config["aux_hidden_dim"] - self.transition_hidden_dim = self.learner_config["transition_hidden_dim"] - self.bayesian_config = self.learner_config["bayesian"] - - with tf.variable_scope(self.name): - if self.bayesian_config: - self.transition_predictor = nn.EnsembleFeedForwardNet('transition_predictor', self.obs_dim + self.action_dim, [self.obs_dim], layers=8, hidden_dim=self.transition_hidden_dim, get_uncertainty=True, ensemble_size=self.bayesian_config["transition"]["ensemble_size"], train_sample_count=self.bayesian_config["transition"]["train_sample_count"], eval_sample_count=self.bayesian_config["transition"]["eval_sample_count"]) - self.done_predictor = nn.EnsembleFeedForwardNet('done_predictor', self.obs_dim + self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.aux_hidden_dim, get_uncertainty=True, ensemble_size=self.bayesian_config["transition"]["ensemble_size"], train_sample_count=self.bayesian_config["transition"]["train_sample_count"], eval_sample_count=self.bayesian_config["transition"]["eval_sample_count"]) - self.reward_predictor = nn.EnsembleFeedForwardNet('reward_predictor', self.obs_dim + self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.aux_hidden_dim, get_uncertainty=True, ensemble_size=self.bayesian_config["reward"]["ensemble_size"], train_sample_count=self.bayesian_config["reward"]["train_sample_count"], eval_sample_count=self.bayesian_config["reward"]["eval_sample_count"]) - else: - self.transition_predictor = nn.FeedForwardNet('transition_predictor', self.obs_dim + self.action_dim, [self.obs_dim], layers=8, hidden_dim=self.transition_hidden_dim, get_uncertainty=True) - self.done_predictor = nn.FeedForwardNet('done_predictor', self.obs_dim + self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.aux_hidden_dim, get_uncertainty=True) - self.reward_predictor = nn.FeedForwardNet('reward_predictor', self.obs_dim + self.obs_dim + self.action_dim, [], layers=4, hidden_dim=self.aux_hidden_dim, get_uncertainty=True) - - def get_ensemble_idx_info(self): - if self.bayesian_config is not False: - ensemble_idxs = tf.random_shuffle(tf.range(self.transition_predictor.ensemble_size)) - transition_ensemble_sample_n = self.transition_predictor.eval_sample_count - reward_ensemble_sample_n = self.reward_predictor.eval_sample_count - ensemble_idxs = ensemble_idxs[:transition_ensemble_sample_n] - return ensemble_idxs, transition_ensemble_sample_n, reward_ensemble_sample_n - else: - return None, 1, 1 - - def build_training_graph(self, obs, next_obs, actions, rewards, dones, data_size): - info = tf.concat([obs, actions], -1) - predicted_next_obs = self.transition_predictor(info, is_eval=False, reduce_mode="random") + obs - next_info = tf.concat([next_obs, info], -1) - predicted_dones = self.done_predictor(next_info, is_eval=False, reduce_mode="random") - predicted_rewards = self.reward_predictor(next_info, is_eval=False, reduce_mode="random") - - done_losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=dones, logits=predicted_dones) - reward_losses = .5 * tf.square(rewards - predicted_rewards) - next_obs_losses = .5 * tf.reduce_sum(tf.square(next_obs - predicted_next_obs), -1) - - done_loss = tf.reduce_mean(done_losses) - reward_loss = tf.reduce_mean(reward_losses) - next_obs_loss = tf.reduce_mean(next_obs_losses) - reg_loss = .0001 * (self.done_predictor.l2_loss() + - self.reward_predictor.l2_loss() + - self.transition_predictor.l2_loss()) - - total_loss = done_loss + reward_loss + next_obs_loss + reg_loss - - inspect = (total_loss, done_loss, reward_loss, next_obs_loss, reg_loss) - - return total_loss, inspect - - def init_extra_info(self, obs): - return tf.zeros_like(obs) - - def transition(self, obs, action, extra_info, ensemble_idxs=None, pre_expanded=None): - info = tf.concat([obs, action], -1) - next_obs_delta = self.transition_predictor(info, reduce_mode="none", ensemble_idxs=ensemble_idxs, pre_expanded=pre_expanded) - if ensemble_idxs is None: - next_obs = tf.expand_dims(obs,-2) + next_obs_delta - next_info = tf.concat([next_obs, tf.expand_dims(info,-2)], -1) - else: - next_obs = obs + next_obs_delta - next_info = tf.concat([next_obs, info], -1) - done = tf.nn.sigmoid(self.done_predictor(next_info, reduce_mode="none", ensemble_idxs=ensemble_idxs, pre_expanded=True)) - extra_info = tf.zeros_like(obs) - return next_obs, done, extra_info - - def get_rewards(self, obs, action, next_obs): - next_info = tf.concat([next_obs, obs, action], -1) - reward = self.reward_predictor(next_info, reduce_mode="none") - return reward \ No newline at end of file diff --git a/research/steve/worldmodel_learner.py b/research/steve/worldmodel_learner.py deleted file mode 100644 index c36a50f6adff604dd44f961d0360540469d503e5..0000000000000000000000000000000000000000 --- a/research/steve/worldmodel_learner.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import tensorflow as tf -import numpy as np -from learner import Learner -from worldmodel import DeterministicWorldModel - -class WorldmodelLearner(Learner): - """ - Worldmodel-specific training loop details. - """ - def learner_name(self): return "worldmodel" - - def make_loader_placeholders(self): - self.obs_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"], np.prod(self.env_config["obs_dims"])]) - self.next_obs_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"], np.prod(self.env_config["obs_dims"])]) - self.action_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"], self.env_config["action_dim"]]) - self.reward_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"]]) - self.done_loader = tf.placeholder(tf.float32, [self.learner_config["batch_size"]]) - self.datasize_loader = tf.placeholder(tf.float64, []) - return [self.obs_loader, self.next_obs_loader, self.action_loader, self.reward_loader, self.done_loader, self.datasize_loader] - - def make_core_model(self): - worldmodel = DeterministicWorldModel(self.config["name"], self.env_config, self.learner_config) - worldmodel_loss, inspect_losses = worldmodel.build_training_graph(*self.current_batch) - - model_optimizer = tf.train.AdamOptimizer(3e-4) - model_gvs = model_optimizer.compute_gradients(worldmodel_loss, var_list=worldmodel.model_params) - capped_model_gvs = model_gvs - worldmodel_train_op = model_optimizer.apply_gradients(capped_model_gvs) - - return worldmodel, (worldmodel_loss,), (worldmodel_train_op,), inspect_losses - - ## Optional functions to override - def initialize(self): pass - def resume_from_checkpoint(self, epoch): pass - def checkpoint(self): pass - def backup(self): pass - - - - diff --git a/research/street/README.md b/research/street/README.md deleted file mode 100644 index fc2c4d01ce31bb8e0c7db88797d16de4974d9505..0000000000000000000000000000000000000000 --- a/research/street/README.md +++ /dev/null @@ -1,268 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# StreetView Tensorflow Recurrent End-to-End Transcription (STREET) Model. - -A TensorFlow implementation of the STREET model described in the paper: - -"End-to-End Interpretation of the French Street Name Signs Dataset" - -Raymond Smith, Chunhui Gu, Dar-Shyang Lee, Huiyi Hu, Ranjith -Unnikrishnan, Julian Ibarz, Sacha Arnoud, Sophia Lin. - -*International Workshop on Robust Reading, Amsterdam, 9 October 2016.* - -Available at: http://link.springer.com/chapter/10.1007%2F978-3-319-46604-0_30 - - -## Contact -***Author:*** Ray Smith (rays@google.com). - -***Pull requests and issues:*** @theraysmith. - -## Contents -* [Introduction](#introduction) -* [Installing and setting up the STREET model](#installing-and-setting-up-the-street-model) -* [Downloading the datasets](#downloading-the-datasets) -* [Confidence Tests](#confidence-tests) -* [Training a model](#training-a-model) -* [The Variable Graph Specification Language](#the-variable-graph-specification-language) - -## Introduction - -The *STREET* model is a deep recurrent neural network that learns how to -identify the name of a street (in France) from an image containing upto four -different views of the street name sign. The model merges information from the -different views and normalizes the text to the correct format. For example: - -![Example image](g3doc/avdessapins.png) - -Avenue des Sapins - - -## Installing and setting up the STREET model -[Install Tensorflow](https://www.tensorflow.org/install/) - -Install numpy: - -``` -sudo pip install numpy -``` - -Build the LSTM op: - -``` -cd cc -TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') -g++ -std=c++11 -shared rnn_ops.cc -o rnn_ops.so -fPIC -I $TF_INC -O3 -mavx -``` - -(Note: if running on Mac, add `-undefined dynamic_lookup` to your `g++` command. -If you are running a newer version of gcc, you may also need to add -`-D_GLIBCXX_USE_CXX11_ABI=0`.) - -Run the unittests: - -``` -cd ../python -python decoder_test.py -python errorcounter_test.py -python shapes_test.py -python vgslspecs_test.py -python vgsl_model_test.py -``` - -## Downloading the datasets - -The French Street Name Signs (FSNS) dataset is split into subsets, each -of which is composed of multiple files. -Note that these datasets are very large. The approximate sizes are: - -* Train: 512 files of 300MB each. -* Validation: 64 files of 40MB each. -* Test: 64 files of 50MB each. -* Testdata: some smaller data files of a few MB for testing. -* Total: ~158 Gb. - -Here is a list of the download paths: - -``` -https://download.tensorflow.org/data/fsns-20160927/charset_size=134.txt -https://download.tensorflow.org/data/fsns-20160927/test/test-00000-of-00064 -... -https://download.tensorflow.org/data/fsns-20160927/test/test-00063-of-00064 -https://download.tensorflow.org/data/fsns-20160927/testdata/arial-32-00000-of-00001 -https://download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001 -https://download.tensorflow.org/data/fsns-20160927/testdata/mnist-sample-00000-of-00001 -https://download.tensorflow.org/data/fsns-20160927/testdata/numbers-16-00000-of-00001 -https://download.tensorflow.org/data/fsns-20160927/train/train-00000-of-00512 -... -https://download.tensorflow.org/data/fsns-20160927/train/train-00511-of-00512 -https://download.tensorflow.org/data/fsns-20160927/validation/validation-00000-of-00064 -... -https://download.tensorflow.org/data/fsns-20160927/validation/validation-00063-of-00064 -``` - -All URLs are stored in the text file `python/fsns_urls.txt`, to download them in -parallel: - -``` -aria2c -c -j 20 -i fsns_urls.txt -``` -If you ctrl+c and re-execute the command it will continue the aborted download. - - -## Confidence Tests - -The datasets download includes a directory `testdata` that contains some small -datasets that are big enough to test that models can actually learn something. -Assuming that you have put the downloads in directory `data` alongside -`python` then you can run the following tests: - -### Mnist for zero-dimensional data - -``` -cd python -train_dir=/tmp/mnist -rm -rf $train_dir -python vgsl_train.py --model_str='16,0,0,1[Ct5,5,16 Mp3,3 Lfys32 Lfxs64]O0s12' \ - --max_steps=1024 --train_data=../data/testdata/mnist-sample-00000-of-00001 \ - --initial_learning_rate=0.001 --final_learning_rate=0.001 \ - --num_preprocess_threads=1 --train_dir=$train_dir -python vgsl_eval.py --model_str='16,0,0,1[Ct5,5,16 Mp3,3 Lfys32 Lfxs64]O0s12' \ - --num_steps=256 --eval_data=../data/testdata/mnist-sample-00000-of-00001 \ - --num_preprocess_threads=1 --decoder=../testdata/numbers.charset_size=12.txt \ - --eval_interval_secs=0 --train_dir=$train_dir --eval_dir=$train_dir/eval -``` - -Depending on your machine, this should run in about 1 minute, and should obtain -error rates below 50%. Actual error rates will vary according to random -initialization. - -### Fixed-length targets for number recognition - -``` -cd python -train_dir=/tmp/fixed -rm -rf $train_dir -python vgsl_train.py --model_str='8,16,0,1[S1(1x16)1,3 Lfx32 Lrx32 Lfx32]O1s12' \ - --max_steps=3072 --train_data=../data/testdata/numbers-16-00000-of-00001 \ - --initial_learning_rate=0.001 --final_learning_rate=0.001 \ - --num_preprocess_threads=1 --train_dir=$train_dir -python vgsl_eval.py --model_str='8,16,0,1[S1(1x16)1,3 Lfx32 Lrx32 Lfx32]O1s12' \ - --num_steps=256 --eval_data=../data/testdata/numbers-16-00000-of-00001 \ - --num_preprocess_threads=1 --decoder=../testdata/numbers.charset_size=12.txt \ - --eval_interval_secs=0 --train_dir=$train_dir --eval_dir=$train_dir/eval -``` - -Depending on your machine, this should run in about 1-2 minutes, and should -obtain a label error rate between 50 and 80%, with word error rates probably -not coming below 100%. Actual error rates will vary -according to random initialization. - -### OCR-style data with CTC - -``` -cd python -train_dir=/tmp/ctc -rm -rf $train_dir -python vgsl_train.py --model_str='1,32,0,1[S1(1x32)1,3 Lbx100]O1c105' \ - --max_steps=4096 --train_data=../data/testdata/arial-32-00000-of-00001 \ - --initial_learning_rate=0.001 --final_learning_rate=0.001 \ - --num_preprocess_threads=1 --train_dir=$train_dir & -python vgsl_eval.py --model_str='1,32,0,1[S1(1x32)1,3 Lbx100]O1c105' \ - --num_steps=256 --eval_data=../data/testdata/arial-32-00000-of-00001 \ - --num_preprocess_threads=1 --decoder=../testdata/arial.charset_size=105.txt \ - --eval_interval_secs=15 --train_dir=$train_dir --eval_dir=$train_dir/eval & -tensorboard --logdir=$train_dir -``` - -Depending on your machine, the background training should run for about 3-4 -minutes, and should obtain a label error rate between 10 and 50%, with -correspondingly higher word error rates and even higher sequence error rate. -Actual error rates will vary according to random initialization. -The background eval will run for ever, and will have to be terminated by hand. -The tensorboard command will run a visualizer that can be viewed with a -browser. Go to the link that it prints to view tensorboard and see the -training progress. See the [Tensorboard](https://www.tensorflow.org/versions/r0.10/how_tos/summaries_and_tensorboard/index.html) -introduction for more information. - - -### Mini FSNS dataset - -You can test the actual STREET model on a small FSNS data set. The model will -overfit to this small dataset, but will give some confidence that everything -is working correctly. *Note* that this test runs the training and evaluation -in parallel, which is something that you should do when training any substantial -system, so you can monitor progress. - - -``` -cd python -train_dir=/tmp/fsns -rm -rf $train_dir -python vgsl_train.py --max_steps=10000 --num_preprocess_threads=1 \ - --train_data=../data/testdata/fsns-00000-of-00001 \ - --initial_learning_rate=0.0001 --final_learning_rate=0.0001 \ - --train_dir=$train_dir & -python vgsl_eval.py --num_steps=256 --num_preprocess_threads=1 \ - --eval_data=../data/testdata/fsns-00000-of-00001 \ - --decoder=../testdata/charset_size=134.txt \ - --eval_interval_secs=300 --train_dir=$train_dir --eval_dir=$train_dir/eval & -tensorboard --logdir=$train_dir -``` - -Depending on your machine, the training should finish in about 1-2 *hours*. -As with the CTC testset above, the eval and tensorboard will have to be -terminated manually. - -## Training a full FSNS model - -After running the tests above, you are ready to train the real thing! -*Note* that you might want to use a `train_dir` somewhere other than `/tmp` as -you can stop the training, reboot if needed and continue if you keep the -data intact, but `/tmp` gets deleted on a reboot. - -``` -cd python -train_dir=/tmp/fsns -rm -rf $train_dir -python vgsl_train.py --max_steps=100000000 --train_data=../data/train/train* \ - --train_dir=$train_dir & -python vgsl_eval.py --num_steps=1000 \ - --eval_data=../data/validation/validation* \ - --decoder=../testdata/charset_size=134.txt \ - --eval_interval_secs=300 --train_dir=$train_dir --eval_dir=$train_dir/eval & -tensorboard --logdir=$train_dir -``` - -Training will take a very long time (probably many weeks) to reach minimum -error rate on a single machine, although it will probably take substantially -fewer iterations than with parallel training. Faster training can be obtained -with parallel training on a cluster. -Since the setup is likely to be very site-specific, please see the TensorFlow -documentation on -[Distributed TensorFlow](https://www.tensorflow.org/versions/r0.10/how_tos/distributed/index.html) -for more information. Some code changes may be needed in the `Train` function -in `vgsl_model.py`. - -With 40 parallel training workers, nearly optimal error rates (about 25% -sequence error on the validation set) are obtained in about 30 million steps, -although the error continues to fall slightly over the next 30 million, to -perhaps as low as 23%. - -With a single machine the number of steps could be substantially lower. -Although untested on this problem, on other problems the ratio is typically -5 to 1 so low error rates could be obtained as soon as 6 million iterations, -which could be reached in about 4 weeks. - - -## The Variable Graph Specification Language - -The STREET model makes use of a graph specification language (VGSL) that -enables rapid experimentation with different model architectures. The language -defines a Tensor Flow graph that can be used to process images of variable sizes -to output a 1-dimensional sequence, like a transcription/OCR problem, or a -0-dimensional label, as for image identification problems. For more information -see [vgslspecs](g3doc/vgslspecs.md) diff --git a/research/street/cc/rnn_ops.cc b/research/street/cc/rnn_ops.cc deleted file mode 100644 index 8e004d91c1e426d2eafcc52a390217bd67e40c9a..0000000000000000000000000000000000000000 --- a/research/street/cc/rnn_ops.cc +++ /dev/null @@ -1,538 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// OpKernel of LSTM Neural Networks: -// -// LSTM: VariableLSTMOp (VariableLSTMGradOp) -// -// where (.*) are the ops to compute gradients for the corresponding ops. - -#define EIGEN_USE_THREADS - -#include -#ifdef GOOGLE_INCLUDES -#include "third_party/eigen3/Eigen/Core" -#include "third_party/tensorflow/core/framework/op.h" -#include "third_party/tensorflow/core/framework/op_kernel.h" -#include "third_party/tensorflow/core/framework/tensor.h" -#else -#include "Eigen/Core" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#endif // GOOGLE_INCLUDES - -namespace tensorflow { - -using Eigen::array; -using Eigen::DenseIndex; -using IndexPair = Eigen::IndexPair; - -Status AreDimsEqual(int dim1, int dim2, const string& message) { - if (dim1 != dim2) { - return errors::InvalidArgument(message, ": ", dim1, " vs. ", dim2); - } - return Status::OK(); -} - -// ------------------------------- VariableLSTMOp ----------------------------- - -// Kernel to compute the forward propagation of a Long Short-Term Memory -// network. See the doc of the op below for more detail. -class VariableLSTMOp : public OpKernel { - public: - explicit VariableLSTMOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("clip", &clip_)); - OP_REQUIRES( - ctx, clip_ >= 0.0, - errors::InvalidArgument("clip_ needs to be equal or greator than 0")); - } - - void Compute(OpKernelContext* ctx) override { - // Inputs. - const auto input = ctx->input(0).tensor(); - const auto initial_state = ctx->input(1).tensor(); - const auto initial_memory = ctx->input(2).tensor(); - const auto w_m_m = ctx->input(3).tensor(); - const int batch_size = input.dimension(0); - const int seq_len = input.dimension(1); - const int output_dim = input.dimension(3); - - // Sanity checks. - OP_REQUIRES_OK(ctx, AreDimsEqual(4, input.dimension(2), "Input num")); - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, initial_state.dimension(0), - "State batch")); - OP_REQUIRES_OK( - ctx, AreDimsEqual(output_dim, initial_state.dimension(1), "State dim")); - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, initial_memory.dimension(0), - "Memory batch")); - OP_REQUIRES_OK(ctx, AreDimsEqual(output_dim, initial_memory.dimension(1), - "Memory dim")); - OP_REQUIRES_OK( - ctx, AreDimsEqual(output_dim, w_m_m.dimension(0), "Weight dim 0")); - OP_REQUIRES_OK(ctx, AreDimsEqual(4, w_m_m.dimension(1), "Weight dim 1")); - OP_REQUIRES_OK( - ctx, AreDimsEqual(output_dim, w_m_m.dimension(2), "Weight dim 2")); - - // Outputs. - Tensor* act_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output( - 0, {batch_size, seq_len, output_dim}, &act_tensor)); - auto act = act_tensor->tensor(); - act.setZero(); - - Tensor* gate_raw_act_tensor = nullptr; - OP_REQUIRES_OK(ctx, - ctx->allocate_output(1, {batch_size, seq_len, 4, output_dim}, - &gate_raw_act_tensor)); - auto gate_raw_act = gate_raw_act_tensor->tensor(); - gate_raw_act.setZero(); - - Tensor* memory_tensor = nullptr; - OP_REQUIRES_OK(ctx, - ctx->allocate_output(2, {batch_size, seq_len, output_dim}, - &memory_tensor)); - auto memory = memory_tensor->tensor(); - memory.setZero(); - - // Const and scratch tensors. - Tensor ones_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_FLOAT, {batch_size, output_dim}, - &ones_tensor)); - auto ones = ones_tensor.tensor(); - ones.setConstant(1.0); - - Tensor state_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_FLOAT, {batch_size, output_dim}, - &state_tensor)); - auto state = state_tensor.tensor(); - state = initial_state; - - Tensor scratch_tensor; - OP_REQUIRES_OK(ctx, - ctx->allocate_temp(DT_FLOAT, {batch_size, 4, output_dim}, - &scratch_tensor)); - auto scratch = scratch_tensor.tensor(); - scratch.setZero(); - - // Uses the most efficient order for the contraction depending on the batch - // size. - - // This is the code shared by both cases. It is discouraged to use the - // implicit capture with lambda functions, but it should be clear that what - // is done here. - auto Forward = [&](int i) { - // Each pre-activation value is stored in the following order (See the - // comment of the op for the meaning): - // - // i: 0 - // j: 1 - // f: 2 - // o: 3 - - // Adds one to the pre-activation values of the forget gate. This is a - // heuristic to make the training easier. - scratch.chip(2, 1) += ones; - - gate_raw_act.chip(i, 1) = scratch; - - // c_t = f_t * c_{t-1} + i_t * j_t - if (i == 0) { - state = initial_memory * scratch.chip(2, 1).sigmoid(); - } else { - state = memory.chip(i - 1, 1) * scratch.chip(2, 1).sigmoid(); - } - state += scratch.chip(0, 1).sigmoid() * scratch.chip(1, 1).tanh(); - - if (clip_ > 0.0) { - // Clips the values if required. - state = state.cwiseMax(-clip_).cwiseMin(clip_); - } - - memory.chip(i, 1) = state; - - // h_t = o_t * tanh(c_t) - state = scratch.chip(3, 1).sigmoid() * state.tanh(); - - act.chip(i, 1) = state; - }; - if (batch_size == 1) { - // Reshapes the weight tensor to pretend as if it is a matrix - // multiplication which is more efficient. - auto w_m_m_r = - w_m_m.reshape(array{output_dim, 4 * output_dim}); - // Dimensions for the contraction. - const array m_m_dim = {IndexPair(1, 0)}; - for (int i = 0; i < seq_len; ++i) { - // Computes the pre-activation value of the input and each gate. - scratch = input.chip(i, 1) + - state.contract(w_m_m_r, m_m_dim) - .reshape(array{batch_size, 4, output_dim}); - Forward(i); - } - } else { - // Shuffles the dimensions of the weight tensor to be efficient when used - // in the left-hand side. Allocates memory for the shuffled tensor for - // efficiency. - Tensor w_m_m_s_tensor; - OP_REQUIRES_OK(ctx, - ctx->allocate_temp(DT_FLOAT, {output_dim * 4, output_dim}, - &w_m_m_s_tensor)); - auto w_m_m_s = w_m_m_s_tensor.tensor(); - w_m_m_s = w_m_m.shuffle(array{2, 1, 0}) - .reshape(array{output_dim * 4, output_dim}); - // Dimensions for the contraction. - const array m_m_dim = {IndexPair(1, 1)}; - for (int i = 0; i < seq_len; ++i) { - // Computes the pre-activation value of the input and each gate. - scratch = input.chip(i, 1) + - w_m_m_s.contract(state, m_m_dim) - .reshape(array{output_dim, 4, batch_size}) - .shuffle(array{2, 1, 0}); - Forward(i); - } - } - } - - private: - // Threshold to clip the values of memory cells. - float clip_ = 0; -}; - -REGISTER_KERNEL_BUILDER(Name("VariableLSTM").Device(DEVICE_CPU), - VariableLSTMOp); -REGISTER_OP("VariableLSTM") - .Attr("clip: float = 0.0") - .Input("input: float32") - .Input("initial_state: float32") - .Input("initial_memory: float32") - .Input("w_m_m: float32") - .Output("activation: float32") - .Output("gate_raw_act: float32") - .Output("memory: float32") - .Doc(R"doc( -Computes the forward propagation of a Long Short-Term Memory Network. - -It computes the following equation recursively for `0 0 else c_t - h_t = o_t * tanh(c'_t) - -where - - a_{l,t} = w_{l,m,m} * h_{t-1} + x'_{l,t} - -where - - x'_{l,t} = w_{l,m,i} * x_{t}. - -`input` corresponds to the concatenation of `X'_i`, `X'_j`, `X'_f`, and `X'_o` -where `X'_l = (x'_{l,1}, x'_{l,2}, ..., x'_{l,T})`, `initial_state` corresponds -to `h_{0}`, `initial_memory` corresponds to `c_{0}` and `weight` corresponds to -`w_{l,m,m}`. `X'_l` (the transformed input) is computed outside of the op in -advance, so w_{l,m,i} is not passed in to the op. - -`activation` corresponds to `H = (h_1, h_2, ..., h_T)`, `gate_raw_activation` -corresponds to the concatanation of `A_i`, `A_j`, `A_f` and `A_o`, and `memory` -corresponds `C = (c_0, c_1, ..., c_T)`. - -All entries in the batch are propagated to the end, and are assumed to be the -same length. - -input: 4-D with shape `[batch_size, seq_len, 4, num_nodes]` -initial_state: 2-D with shape `[batch_size, num_nodes]` -initial_memory: 2-D with shape `[batch_size, num_nodes]` -w_m_m: 3-D with shape `[num_nodes, 4, num_nodes]` -activation: 3-D with shape `[batch_size, seq_len, num_nodes]` -gate_raw_act: 3-D with shape `[batch_size, seq_len, 4, num_nodes]` -memory: 3-D with shape `[batch_size, seq_len, num_nodes]` -)doc"); - -// ----------------------------- VariableLSTMGradOp ---------------------------- - -// Kernel to compute the gradient of VariableLSTMOp. -class VariableLSTMGradOp : public OpKernel { - public: - explicit VariableLSTMGradOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - // Inputs. - const auto initial_state = ctx->input(0).tensor(); - const auto initial_memory = ctx->input(1).tensor(); - const auto w_m_m = ctx->input(2).tensor(); - const auto act = ctx->input(3).tensor(); - const auto gate_raw_act = ctx->input(4).tensor(); - const auto memory = ctx->input(5).tensor(); - const auto act_grad = ctx->input(6).tensor(); - const auto gate_raw_act_grad = ctx->input(7).tensor(); - const auto memory_grad = ctx->input(8).tensor(); - const int batch_size = act.dimension(0); - const int seq_len = act.dimension(1); - const int output_dim = act.dimension(2); - - // Sanity checks. - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, initial_state.dimension(0), - "State batch")); - OP_REQUIRES_OK( - ctx, AreDimsEqual(output_dim, initial_state.dimension(1), "State dim")); - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, initial_memory.dimension(0), - "Memory batch")); - OP_REQUIRES_OK(ctx, AreDimsEqual(output_dim, initial_memory.dimension(1), - "Memory dim")); - OP_REQUIRES_OK( - ctx, AreDimsEqual(output_dim, w_m_m.dimension(0), "Weight dim 0")); - OP_REQUIRES_OK(ctx, AreDimsEqual(4, w_m_m.dimension(1), "Weight dim 1")); - OP_REQUIRES_OK( - ctx, AreDimsEqual(output_dim, w_m_m.dimension(2), "Weight dim 2")); - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, gate_raw_act.dimension(0), - "Gate raw activation batch")); - OP_REQUIRES_OK(ctx, AreDimsEqual(seq_len, gate_raw_act.dimension(1), - "Gate raw activation len")); - OP_REQUIRES_OK(ctx, AreDimsEqual(4, gate_raw_act.dimension(2), - "Gate raw activation num")); - OP_REQUIRES_OK(ctx, AreDimsEqual(output_dim, gate_raw_act.dimension(3), - "Gate raw activation dim")); - OP_REQUIRES_OK( - ctx, AreDimsEqual(batch_size, memory.dimension(0), "Memory batch")); - OP_REQUIRES_OK(ctx, - AreDimsEqual(seq_len, memory.dimension(1), "Memory len")); - OP_REQUIRES_OK(ctx, - AreDimsEqual(output_dim, memory.dimension(2), "Memory dim")); - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, act_grad.dimension(0), - "Activation gradient batch")); - OP_REQUIRES_OK(ctx, AreDimsEqual(seq_len, act_grad.dimension(1), - "Activation gradient len")); - OP_REQUIRES_OK(ctx, AreDimsEqual(output_dim, act_grad.dimension(2), - "Activation gradient dim")); - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, gate_raw_act_grad.dimension(0), - "Activation gradient batch")); - OP_REQUIRES_OK(ctx, AreDimsEqual(seq_len, gate_raw_act_grad.dimension(1), - "Activation gradient len")); - OP_REQUIRES_OK(ctx, AreDimsEqual(4, gate_raw_act_grad.dimension(2), - "Activation gradient num")); - OP_REQUIRES_OK(ctx, AreDimsEqual(output_dim, gate_raw_act_grad.dimension(3), - "Activation gradient dim")); - OP_REQUIRES_OK(ctx, AreDimsEqual(batch_size, memory_grad.dimension(0), - "Memory gradient batch")); - OP_REQUIRES_OK(ctx, AreDimsEqual(seq_len, memory_grad.dimension(1), - "Memory gradient len")); - OP_REQUIRES_OK(ctx, AreDimsEqual(output_dim, memory_grad.dimension(2), - "Memory gradient dim")); - - // Outputs. - std::vector collections(4, nullptr); - OP_REQUIRES_OK(ctx, - ctx->allocate_output(0, {batch_size, seq_len, 4, output_dim}, - &collections[0])); - auto input_grad = collections[0]->tensor(); - input_grad.setZero(); - - OP_REQUIRES_OK(ctx, ctx->allocate_output(1, {batch_size, output_dim}, - &collections[1])); - auto init_state_grad = collections[1]->tensor(); - init_state_grad.setZero(); - - OP_REQUIRES_OK(ctx, ctx->allocate_output(2, {batch_size, output_dim}, - &collections[2])); - auto init_memory_grad = collections[2]->tensor(); - init_memory_grad.setZero(); - - OP_REQUIRES_OK(ctx, ctx->allocate_output(3, {output_dim, 4, output_dim}, - &collections[3])); - auto w_m_m_grad = collections[3]->tensor(); - w_m_m_grad.setZero(); - - // Const and scratch tensors. - Tensor ones_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_FLOAT, {batch_size, output_dim}, - &ones_tensor)); - auto ones = ones_tensor.tensor(); - ones.setConstant(1.0); - - Tensor scratch_tensor; - OP_REQUIRES_OK(ctx, - ctx->allocate_temp(DT_FLOAT, {batch_size, 4, output_dim}, - &scratch_tensor)); - auto scratch = scratch_tensor.tensor(); - scratch.setZero(); - - Tensor tmp1_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_FLOAT, {batch_size, output_dim}, - &tmp1_tensor)); - auto tmp1 = tmp1_tensor.tensor(); - tmp1.setZero(); - - Tensor tmp2_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_FLOAT, {batch_size, output_dim}, - &tmp2_tensor)); - auto tmp2 = tmp2_tensor.tensor(); - tmp2.setZero(); - - // Uses the most efficient order for the contraction depending on the batch - // size. - - // Shuffles the dimensions of the weight tensor to be efficient when used in - // the left-hand side. Allocates memory for the shuffled tensor for - // efficiency. - Tensor w_m_m_s_tensor; - OP_REQUIRES_OK(ctx, - ctx->allocate_temp(DT_FLOAT, {4, output_dim, output_dim}, - &w_m_m_s_tensor)); - auto w_m_m_s = w_m_m_s_tensor.tensor(); - if (batch_size == 1) { - // Allocates memory only it is used. - w_m_m_s = w_m_m.shuffle(array{1, 2, 0}); - } - - // Dimensions for the contraction with the weight tensor. - const array m_m_dim = - batch_size == 1 ? array{IndexPair(1, 0)} - : array{IndexPair(1, 1)}; - // Dimensions for the contraction of the batch dimensions. - const array b_b_dim = {IndexPair(0, 0)}; - for (int i = seq_len - 1; i >= 0; --i) { - if (i == seq_len - 1) { - init_state_grad = act_grad.chip(i, 1); - } else { - w_m_m_grad += - act.chip(i, 1) - .contract(scratch.reshape( - array{batch_size, 4 * output_dim}), - b_b_dim) - .reshape(array{output_dim, 4, output_dim}); - if (batch_size == 1) { - init_state_grad.device(ctx->eigen_cpu_device()) = - scratch.chip(0, 1).contract(w_m_m_s.chip(0, 0), m_m_dim) + - scratch.chip(1, 1).contract(w_m_m_s.chip(1, 0), m_m_dim) + - scratch.chip(2, 1).contract(w_m_m_s.chip(2, 0), m_m_dim) + - scratch.chip(3, 1).contract(w_m_m_s.chip(3, 0), m_m_dim); - } else { - init_state_grad.device(ctx->eigen_cpu_device()) = - (w_m_m.chip(0, 1).contract(scratch.chip(0, 1), m_m_dim) + - w_m_m.chip(1, 1).contract(scratch.chip(1, 1), m_m_dim) + - w_m_m.chip(2, 1).contract(scratch.chip(2, 1), m_m_dim) + - w_m_m.chip(3, 1).contract(scratch.chip(3, 1), m_m_dim)) - .shuffle(array{1, 0}); - } - init_state_grad += act_grad.chip(i, 1); - } - - auto gate_raw_act_t = gate_raw_act.chip(i, 1); - auto gate_raw_act_grad_t = gate_raw_act_grad.chip(i, 1); - - // Output gate. - tmp1 = memory.chip(i, 1); - tmp1 = tmp1.tanh(); // y_t - tmp2 = gate_raw_act_t.chip(3, 1).sigmoid(); // o_t - scratch.chip(3, 1) = init_state_grad * tmp1 * tmp2 * (ones - tmp2) + - gate_raw_act_grad_t.chip(3, 1); - - init_memory_grad += init_state_grad * tmp2 * (ones - tmp1.square()) + - memory_grad.chip(i, 1); - - // Input gate. - tmp1 = gate_raw_act_t.chip(0, 1).sigmoid(); // i_t - tmp2 = gate_raw_act_t.chip(1, 1); - tmp2 = tmp2.tanh(); // j_t - scratch.chip(0, 1) = init_memory_grad * tmp2 * tmp1 * (ones - tmp1) + - gate_raw_act_grad_t.chip(0, 1); - - // Input. - scratch.chip(1, 1) = init_memory_grad * tmp1 * (ones - tmp2.square()) + - gate_raw_act_grad_t.chip(1, 1); - - // Forget gate. - tmp1 = gate_raw_act_t.chip(2, 1).sigmoid(); // f_t - if (i == 0) { - scratch.chip(2, 1) = - init_memory_grad * initial_memory * tmp1 * (ones - tmp1) + - gate_raw_act_grad_t.chip(2, 1); - } else { - scratch.chip(2, 1) = - init_memory_grad * memory.chip(i - 1, 1) * tmp1 * (ones - tmp1) + - gate_raw_act_grad_t.chip(2, 1); - } - - // Memory. - init_memory_grad *= tmp1; - - input_grad.chip(i, 1) = scratch; - } - w_m_m_grad += initial_state - .contract(scratch.reshape(array{ - batch_size, 4 * output_dim}), - b_b_dim) - .reshape(array{output_dim, 4, output_dim}); - if (batch_size == 1) { - init_state_grad.device(ctx->eigen_cpu_device()) = - (scratch.chip(0, 1).contract(w_m_m_s.chip(0, 0), m_m_dim) + - scratch.chip(1, 1).contract(w_m_m_s.chip(1, 0), m_m_dim) + - scratch.chip(2, 1).contract(w_m_m_s.chip(2, 0), m_m_dim) + - scratch.chip(3, 1).contract(w_m_m_s.chip(3, 0), m_m_dim)); - } else { - init_state_grad.device(ctx->eigen_cpu_device()) = - (w_m_m.chip(0, 1).contract(scratch.chip(0, 1), m_m_dim) + - w_m_m.chip(1, 1).contract(scratch.chip(1, 1), m_m_dim) + - w_m_m.chip(2, 1).contract(scratch.chip(2, 1), m_m_dim) + - w_m_m.chip(3, 1).contract(scratch.chip(3, 1), m_m_dim)) - .shuffle(array{1, 0}); - } - } -}; - -REGISTER_KERNEL_BUILDER(Name("VariableLSTMGrad").Device(DEVICE_CPU), - VariableLSTMGradOp); - -REGISTER_OP("VariableLSTMGrad") - .Input("initial_state: float32") - .Input("initial_memory: float32") - .Input("w_m_m: float32") - .Input("activation: float32") - .Input("gate_raw_act: float32") - .Input("memory: float32") - .Input("act_grad: float32") - .Input("gate_raw_act_grad: float32") - .Input("memory_grad: float32") - .Output("input_grad: float32") - .Output("initial_state_grad: float32") - .Output("initial_memory_grad: float32") - .Output("w_m_m_grad: float32") - .Doc(R"doc( -Computes the gradient for VariableLSTM. - -This is to be used conjunction with VariableLSTM. It ignores the clipping used -in the forward pass. - -initial_state: 2-D with shape `[batch_size, num_nodes]` -initial_memory: 2-D with shape `[batch_size, num_nodes]` -w_m_m: 3-D with shape `[num_nodes, 4, num_nodes]` -activation: 3-D with shape `[batch_size, seq_len, num_nodes]` -gate_raw_act: 3-D with shape `[batch_size, seq_len, 4, num_nodes]` -memory: 3-D with shape `[batch_size, seq_len, num_nodes]` -act_grad: 3-D with shape `[batch_size, seq_len, num_nodes]` -gate_raw_act_grad: 3-D with shape `[batch_size, seq_len, 4, num_nodes]` -memory_grad: 3-D with shape `[batch_size, seq_len, num_nodes]` -input_grad: 3-D with shape `[batch_size, seq_len, num_nodes]` -initial_state_grad: 2-D with shape `[batch_size, num_nodes]` -initial_memory_grad: 2-D with shape `[batch_size, num_nodes]` -w_m_m_grad: 3-D with shape `[num_nodes, 4, num_nodes]` -)doc"); - -} // namespace tensorflow diff --git a/research/street/g3doc/avdessapins.png b/research/street/g3doc/avdessapins.png deleted file mode 100644 index 7cdb9657a480979060e377e59906120d104680f4..0000000000000000000000000000000000000000 Binary files a/research/street/g3doc/avdessapins.png and /dev/null differ diff --git a/research/street/g3doc/vgslspecs.md b/research/street/g3doc/vgslspecs.md deleted file mode 100644 index 74294952b3ca006d957b3d2d9384b02a887ebab0..0000000000000000000000000000000000000000 --- a/research/street/g3doc/vgslspecs.md +++ /dev/null @@ -1,324 +0,0 @@ -# VGSL Specs - rapid prototyping of mixed conv/LSTM networks for images. - -Variable-size Graph Specification Language (VGSL) enables the specification of a -Tensor Flow graph, composed of convolutions and LSTMs, that can process -variable-sized images, from a very short definition string. - -## Applications: What is VGSL Specs good for? - -VGSL Specs are designed specifically to create TF graphs for: - -* Variable size images as the input. (In one or BOTH dimensions!) -* Output an image (heat map), sequence (like text), or a category. -* Convolutions and LSTMs are the main computing component. -* Fixed-size images are OK too! - -But wait, aren't there other systems that simplify generating TF graphs? There -are indeed, but something they all have in common is that they are designed for -fixed size images only. If you want to solve a real OCR problem, you either have -to cut the image into arbitrary sized pieces and try to stitch the results back -together, or use VGSL. - -## Basic Usage - -A full model, including input and the output layers, can be built using -vgsl_model.py. Alternatively you can supply your own tensors and add your own -loss function layer if you wish, using vgslspecs.py directly. - -### Building a full model - -Provided your problem matches the one addressed by vgsl_model, you are good to -go. - -Targeted problems: - -* Images for input, either 8 bit greyscale or 24 bit color. -* Output is 0-d (A category, like cat, dog, train, car.) -* Output is 1-d, with either variable length or a fixed length sequence, eg - OCR, transcription problems in general. - -Currently only softmax (1 of n) outputs are supported, but it would not be -difficult to extend to logistic. - -Use vgsl_train.py to train your model, and vgsl_eval.py to evaluate it. They -just call Train and Eval in vgsl_model.py. - -### Model string for a full model - -The model string for a full model includes the input spec, the output spec and -the layers spec in between. Example: - -``` -'1,0,0,3[Ct5,5,16 Mp3,3 Lfys64 Lfx128 Lrx128 Lfx256]O1c105' -``` - -The first 4 numbers specify the standard TF tensor dimensions: [batch, height, -width, depth], except that height and/or width may be zero, allowing them to be -variable. Batch is specific only to training, and may be a different value at -recognition/inference time. Depth needs to be 1 for greyscale and 3 for color. - -The model string in square brackets [] is the main model definition, which is -described [below.](#basic-layers-syntax) The output specification takes the -form: - -``` -O(2|1|0)(l|s|c)n output layer with n classes. - 2 (heatmap) Output is a 2-d vector map of the input (possibly at - different scale). (Not yet supported.) - 1 (sequence) Output is a 1-d sequence of vector values. - 0 (category) Output is a 0-d single vector value. - l uses a logistic non-linearity on the output, allowing multiple - hot elements in any output vector value. (Not yet supported.) - s uses a softmax non-linearity, with one-hot output in each value. - c uses a softmax with CTC. Can only be used with s (sequence). - NOTE Only O0s, O1s and O1c are currently supported. -``` - -The number of classes must match the encoding of the TF Example data set. - -### Layers only - providing your own input and loss layers - -You don't have to use the canned input/output modules, if you provide your -separate code to read TF Example and loss functions. First prepare your inputs: - -* A TF-conventional batch of: `images = tf.float32[batch, height, width, - depth]` -* A tensor of the width of each image in the batch: `widths = tf.int64[batch]` -* A tensor of the height of each image in the batch: `heights = - tf.int64[batch]` - -Note that these can be created from individual images using -`tf.train.batch_join` with `dynamic_pad=True.` - -```python -import vgslspecs -... -spec = '[Ct5,5,16 Mp3,3 Lfys64 Lfx128 Lrx128 Lfx256]' -vgsl = vgslspecs.VGSLSpecs(widths, heights, is_training=True) -last_layer = vgsl.Build(images, spec) -... -AddSomeLossFunction(last_layer).... -``` - -With some appropriate training data, this would create a world-class OCR engine! - -## Basic Layers Syntax - -NOTE that *all* ops input and output the standard TF convention of a 4-d tensor: -`[batch, height, width, depth]` *regardless of any collapsing of dimensions.* -This greatly simplifies things, and allows the VGSLSpecs class to track changes -to the values of widths and heights, so they can be correctly passed in to LSTM -operations, and used by any downstream CTC operation. - -NOTE: in the descriptions below, `` is a numeric value, and literals are -described using regular expression syntax. - -NOTE: Whitespace is allowed between ops. - -### Naming - -Each op gets a unique name by default, based on its spec string plus its -character position in the overall specification. All the Ops take an optional -name argument in braces after the mnemonic code, but before any numeric -arguments. - -### Functional ops - -``` -C(s|t|r|l|m)[{name}],, Convolves using a y,x window, with no shrinkage, - SAME infill, d outputs, with s|t|r|l|m non-linear layer. -F(s|t|r|l|m)[{name}] Fully-connected with s|t|r|l|m non-linearity and d - outputs. Reduces height, width to 1. Input height and width must be constant. -L(f|r|b)(x|y)[s][{name}] LSTM cell with n outputs. - The LSTM must have one of: - f runs the LSTM forward only. - r runs the LSTM reversed only. - b runs the LSTM bidirectionally. - It will operate on either the x- or y-dimension, treating the other dimension - independently (as if part of the batch). - (Full 2-d and grid are not yet supported). - s (optional) summarizes the output in the requested dimension, - outputting only the final step, collapsing the dimension to a - single element. -Do[{name}] Insert a dropout layer. -``` - -In the above, `(s|t|r|l|m)` specifies the type of the non-linearity: - -```python -s = sigmoid -t = tanh -r = relu -l = linear (i.e., None) -m = softmax -``` - -Examples: - -`Cr5,5,32` Runs a 5x5 Relu convolution with 32 depth/number of filters. - -`Lfx{MyLSTM}128` runs a forward-only LSTM, named 'MyLSTM' in the x-dimension -with 128 outputs, treating the y dimension independently. - -`Lfys64` runs a forward-only LSTM in the y-dimension with 64 outputs, treating -the x-dimension independently and collapses the y-dimension to 1 element. - -### Plumbing ops - -The plumbing ops allow the construction of arbitrarily complex graphs. Something -currently missing is the ability to define macros for generating say an -inception unit in multiple places. - -``` -[...] Execute ... networks in series (layers). -(...) Execute ... networks in parallel, with their output concatenated in depth. -S[{name}](x), Splits one dimension, moves one part to another - dimension. -Mp[{name}], Maxpool the input, reducing the (y,x) rectangle to a single - value. -``` - -In the `S` op, `, , , , ` are numbers. - -`S` is a generalized reshape. It splits input dimension `d` into `a` x `b`, -sending the high/most significant part `a` to the high/most significant side of -dimension `e`, and the low part `b` to the high side of dimension `f`. -Exception: if `d=e=f`, then then dimension `d` is internally transposed to -`bxa`. *At least one* of `e`, `f` must be equal to `d`, so no dimension can be -totally destroyed. Either `a` or `b` can be zero, meaning whatever is left after -taking out the other, allowing dimensions to be of variable size. - -NOTE: Remember the standard TF convention of a 4-d tensor: `[batch, height, -width, depth]`, so `batch=0, height=1, width=2, depth=3.` - -Eg. `S3(3x50)2,3` will split the 150-element depth into 3x50, with the 3 going -to the most significant part of the width, and the 50 part staying in depth. -This will rearrange a 3x50 output parallel operation to spread the 3 output sets -over width. - -### Full Examples - -Example 1: A graph capable of high quality OCR. - -`1,0,0,1[Ct5,5,16 Mp3,3 Lfys64 Lfx128 Lrx128 Lfx256]O1c105` - -As layer descriptions: (Input layer is at the bottom, output at the top.) - -``` -O1c105: Output layer produces 1-d (sequence) output, trained with CTC, - outputting 105 classes. -Lfx256: Forward-only LSTM in x with 256 outputs -Lrx128: Reverse-only LSTM in x with 128 outputs -Lfx128: Forward-only LSTM in x with 128 outputs -Lfys64: Dimension-summarizing LSTM, summarizing the y-dimension with 64 outputs -Mp3,3: 3 x 3 Maxpool -Ct5,5,16: 5 x 5 Convolution with 16 outputs and tanh non-linearity -[]: The body of the graph is alway expressed as a series of layers. -1,0,0,1: Input is a batch of 1 image of variable size in greyscale -``` - -Example 2: The STREET network for reading French street name signs end-to-end. -For a detailed description see the [FSNS dataset -paper](http://link.springer.com/chapter/10.1007%2F978-3-319-46604-0_30) - -``` -1,600,150,3[S2(4x150)0,2 Ct5,5,16 Mp2,2 Ct5,5,64 Mp3,3 - ([Lrys64 Lbx128][Lbys64 Lbx128][Lfys64 Lbx128]) S3(3x0)2,3 - Lfx128 Lrx128 S0(1x4)0,3 Lfx256]O1c134 -``` - -Since networks are usually illustrated with the input at the bottom, the input -layer is at the bottom, output at the top, with 'headings' *below* the section -they introduce. - -``` -O1c134: Output is a 1-d sequence, trained with CTC and 134 output softmax. -Lfx256: Forward-only LSTM with 256 outputs -S0(1x4)0,3: Reshape transferring the batch of 4 tiles to the depth dimension. -Lrx128: Reverse-only LSTM with 128 outputs -Lfx128: Forward-only LSTM with 128 outputs -(Final section above) -S3(3x0)2,3: Split the outputs of the 3 parallel summarizers and spread over the - x-dimension - [Lfys64 Lbx128]: Summarizing LSTM downwards on the y-dimension with 64 - outputs, followed by a bi-directional LSTM in the x-dimension with 128 - outputs - [Lbys64 Lbx128]: Summarizing bi-directional LSTM on the y-dimension with - 64 outputs, followed by a bi-directional LSTM in the x-dimension with 128 - outputs - [Lrys64 Lbx128]: Summarizing LSTM upwards on the y-dimension with 64 outputs, - followed by a bi-directional LSTM in the x-dimension with 128 outputs -(): In parallel (re-using the inputs and concatenating the outputs): -(Summarizing section above) -Mp3,3: 3 x 3 Maxpool -Ct5,5,64: 5 x 5 Convolution with 64 outputs and tanh non-linearity -Mp2,2: 2 x 2 Maxpool -Ct5,5,16: 5 x 5 Convolution with 16 outputs and tanh non-linearity -S2(4x150)0,2: Split the x-dimension into 4x150, converting each tiled 600x150 -image into a batch of 4 150x150 images -(Convolutional input section above) -[]: The body of the graph is alway expressed as a series of layers. -1,150,600,3: Input is a batch of 1, 600x150 image in 24 bit color -``` - -## Variable size Tensors Under the Hood - -Here are some notes about handling variable-sized images since they require some -consideration and a little bit of knowledge about what goes on inside. - -A variable-sized image is an input for which the width and/or height are not -known at graph-building time, so the tensor shape contains unknown/None/-1 -sizes. - -Many standard NN layers, such as convolutions, are designed to cope naturally -with variable-sized images in TF and produce a variable sized image as the -output. For other layers, such as 'Fully connected' variable size is -fundamentally difficult, if not impossible to deal with, since by definition, -*all* its inputs are connected via a weight to an output. The number of inputs -therefore must be fixed. - -It is possible to handle variable sized images by using sparse tensors. Some -implementations make a single variable dimension a list instead of part of the -tensor. Both these solutions suffer from completely segregating the world of -variable size from the world of fixed size, making models and their descriptions -completely non-interchangeable. - -In VGSL, we use a standard 4-d Tensor, `[batch, height, width, depth]` and -either use a batch size of 1 or put up with padding of the input images to the -largest size of any element of the batch. The other price paid for this -standardization is that the user must supply a pair of tensors of shape [batch] -specifying the width and height of each input in a batch. This allows the LSTMs -in the graph to know how many iterations to execute and how to correctly -back-propagate the gradients. - -The standard TF implementation of CTC also requires a tensor giving the sequence -lengths of its inputs. If the output of VGSL is going into CTC, the lengths can -be obtained using: - -```python -import vgslspecs -... -spec = '[Ct5,5,16 Mp3,3 Lfys64 Lfx128 Lrx128 Lfx256]' -vgsl = vgslspecs.VGSLSpecs(widths, heights, is_training=True) -last_layer = vgsl.Build(images, spec) -seq_lengths = vgsl.GetLengths() -``` - -The above will provide the widths that were given in the constructor, scaled -down by the max-pool operator. The heights may be obtained using -`vgsl.GetLengths(1)`, specifying the index of the y-dimension. - -NOTE that currently the only way of collapsing a dimension of unknown size to -known size (1) is through the use of a summarizing LSTM. A single summarizing -LSTM will collapse one dimension (x or y), leaving a 1-d sequence. The 1-d -sequence can then be collapsed in the other dimension to make a 0-d categorical -(softmax) or embedding (logistic) output. - -Using the (parallel) op it is entirely possible to run multiple [series] of ops -that collapse x first in one and y first in the other, reducing both eventually -to a single categorical value! For eample, the following description may do -something useful with ImageNet-like problems: - -```python -[Cr5,5,16 Mp2,2 Cr5,5,64 Mp3,3 ([Lfxs64 Lfys256] [Lfys64 Lfxs256]) Fr512 Fr512] -``` diff --git a/research/street/python/decoder.py b/research/street/python/decoder.py deleted file mode 100644 index 715146194c58d577c6b799a15baf8619211bc4cd..0000000000000000000000000000000000000000 --- a/research/street/python/decoder.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Basic CTC+recoder decoder. - -Decodes a sequence of class-ids into UTF-8 text. -For basic information on CTC See: -Alex Graves et al. Connectionist Temporal Classification: Labelling Unsegmented -Sequence Data with Recurrent Neural Networks. -http://www.cs.toronto.edu/~graves/icml_2006.pdf -""" -import collections -import re - -import errorcounter as ec -from six.moves import xrange -import tensorflow as tf - -# Named tuple Part describes a part of a multi (1 or more) part code that -# represents a utf-8 string. For example, Chinese character 'x' might be -# represented by 3 codes of which (utf8='x', index=1, num_codes3) would be the -# middle part. (The actual code is not stored in the tuple). -Part = collections.namedtuple('Part', 'utf8 index, num_codes') - - -# Class that decodes a sequence of class-ids into UTF-8 text. -class Decoder(object): - """Basic CTC+recoder decoder.""" - - def __init__(self, filename): - r"""Constructs a Decoder. - - Reads the text file describing the encoding and build the encoder. - The text file contains lines of the form: - [,]*\t - Each line defines a mapping from a sequence of one or more integer codes to - a corresponding utf-8 string. - Args: - filename: Name of file defining the decoding sequences. - """ - # self.decoder is a list of lists of Part(utf8, index, num_codes). - # The index to the top-level list is a code. The list given by the code - # index is a list of the parts represented by that code, Eg if the code 42 - # represents the 2nd (index 1) out of 3 part of Chinese character 'x', then - # self.decoder[42] = [..., (utf8='x', index=1, num_codes3), ...] where ... - # means all other uses of the code 42. - self.decoder = [] - if filename: - self._InitializeDecoder(filename) - - def SoftmaxEval(self, sess, model, num_steps): - """Evaluate a model in softmax mode. - - Adds char, word recall and sequence error rate events to the sw summary - writer, and returns them as well - TODO(rays) Add LogisticEval. - Args: - sess: A tensor flow Session. - model: The model to run in the session. Requires a VGSLImageModel or any - other class that has a using_ctc attribute and a RunAStep(sess) method - that reurns a softmax result with corresponding labels. - num_steps: Number of steps to evaluate for. - Returns: - ErrorRates named tuple. - Raises: - ValueError: If an unsupported number of dimensions is used. - """ - coord = tf.train.Coordinator() - threads = tf.train.start_queue_runners(sess=sess, coord=coord) - # Run the requested number of evaluation steps, gathering the outputs of the - # softmax and the true labels of the evaluation examples. - total_label_counts = ec.ErrorCounts(0, 0, 0, 0) - total_word_counts = ec.ErrorCounts(0, 0, 0, 0) - sequence_errors = 0 - for _ in xrange(num_steps): - softmax_result, labels = model.RunAStep(sess) - # Collapse softmax to same shape as labels. - predictions = softmax_result.argmax(axis=-1) - # Exclude batch from num_dims. - num_dims = len(predictions.shape) - 1 - batch_size = predictions.shape[0] - null_label = softmax_result.shape[-1] - 1 - for b in xrange(batch_size): - if num_dims == 2: - # TODO(rays) Support 2-d data. - raise ValueError('2-d label data not supported yet!') - else: - if num_dims == 1: - pred_batch = predictions[b, :] - labels_batch = labels[b, :] - else: - pred_batch = [predictions[b]] - labels_batch = [labels[b]] - text = self.StringFromCTC(pred_batch, model.using_ctc, null_label) - truth = self.StringFromCTC(labels_batch, False, null_label) - # Note that recall_errs is false negatives (fn) aka drops/deletions. - # Actual recall would be 1-fn/truth_words. - # Likewise precision_errs is false positives (fp) aka adds/insertions. - # Actual precision would be 1-fp/ocr_words. - total_word_counts = ec.AddErrors(total_word_counts, - ec.CountWordErrors(text, truth)) - total_label_counts = ec.AddErrors(total_label_counts, - ec.CountErrors(text, truth)) - if text != truth: - sequence_errors += 1 - - coord.request_stop() - coord.join(threads) - return ec.ComputeErrorRates(total_label_counts, total_word_counts, - sequence_errors, num_steps * batch_size) - - def StringFromCTC(self, ctc_labels, merge_dups, null_label): - """Decodes CTC output to a string. - - Extracts only sequences of codes that are allowed by self.decoder. - Labels that make illegal code sequences are dropped. - Note that, by its nature of taking only top choices, this is much weaker - than a full-blown beam search that considers all the softmax outputs. - For languages without many multi-code sequences, this doesn't make much - difference, but for complex scripts the accuracy will be much lower. - Args: - ctc_labels: List of class labels including null characters to remove. - merge_dups: If True, Duplicate labels will be merged - null_label: Label value to ignore. - - Returns: - Labels decoded to a string. - """ - # Run regular ctc on the labels, extracting a list of codes. - codes = self._CodesFromCTC(ctc_labels, merge_dups, null_label) - length = len(codes) - if length == 0: - return '' - # strings and partials are both indexed by the same index as codes. - # strings[i] is the best completed string upto position i, and - # partials[i] is a list of partial code sequences at position i. - # Warning: memory is squared-order in length. - strings = [] - partials = [] - for pos in xrange(length): - code = codes[pos] - parts = self.decoder[code] - partials.append([]) - strings.append('') - # Iterate over the parts that this code can represent. - for utf8, index, num_codes in parts: - if index > pos: - continue - # We can use code if it is an initial code (index==0) or continues a - # sequence in the partials list at the previous position. - if index == 0 or partials[pos - 1].count( - Part(utf8, index - 1, num_codes)) > 0: - if index < num_codes - 1: - # Save the partial sequence. - partials[-1].append(Part(utf8, index, num_codes)) - elif not strings[-1]: - # A code sequence is completed. Append to the best string that we - # had where it started. - if pos >= num_codes: - strings[-1] = strings[pos - num_codes] + utf8 - else: - strings[-1] = utf8 - if not strings[-1] and pos > 0: - # We didn't get anything here so copy the previous best string, skipping - # the current code, but it may just be a partial anyway. - strings[-1] = strings[-2] - return strings[-1] - - def _InitializeDecoder(self, filename): - """Reads the decoder file and initializes self.decoder from it. - - Args: - filename: Name of text file mapping codes to utf8 strings. - Raises: - ValueError: if the input file is not parsed correctly. - """ - line_re = re.compile(r'(?P\d+(,\d+)*)\t(?P.+)') - with tf.gfile.GFile(filename) as f: - for line in f: - m = line_re.match(line) - if m is None: - raise ValueError('Unmatched line:', line) - # codes is the sequence that maps to the string. - str_codes = m.groupdict()['codes'].split(',') - codes = [] - for code in str_codes: - codes.append(int(code)) - utf8 = m.groupdict()['utf8'] - num_codes = len(codes) - for index, code in enumerate(codes): - while code >= len(self.decoder): - self.decoder.append([]) - self.decoder[code].append(Part(utf8, index, num_codes)) - - def _CodesFromCTC(self, ctc_labels, merge_dups, null_label): - """Collapses CTC output to regular output. - - Args: - ctc_labels: List of class labels including null characters to remove. - merge_dups: If True, Duplicate labels will be merged. - null_label: Label value to ignore. - - All trailing zeros are removed!! - TODO(rays) This may become a problem with non-CTC models. - If using charset, this should not be a problem as zero is always space. - tf.pad can only append zero, so we have to be able to drop them, as a - non-ctc will have learned to output trailing zeros instead of trailing - nulls. This is awkward, as the stock ctc loss function requires that the - null character be num_classes-1. - Returns: - (List of) Labels with null characters removed. - """ - out_labels = [] - prev_label = -1 - zeros_needed = 0 - for label in ctc_labels: - if label == null_label: - prev_label = -1 - elif label != prev_label or not merge_dups: - if label == 0: - # Count zeros and only emit them when it is clear there is a non-zero - # after, so as to truncate away all trailing zeros. - zeros_needed += 1 - else: - if merge_dups and zeros_needed > 0: - out_labels.append(0) - else: - out_labels += [0] * zeros_needed - zeros_needed = 0 - out_labels.append(label) - prev_label = label - return out_labels diff --git a/research/street/python/decoder_test.py b/research/street/python/decoder_test.py deleted file mode 100644 index dc61f8b2a6e10d364e7e89b7d6492c36971015f7..0000000000000000000000000000000000000000 --- a/research/street/python/decoder_test.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for decoder.""" -import os - -import tensorflow as tf -import decoder - - -def _testdata(filename): - return os.path.join('../testdata/', filename) - - -class DecoderTest(tf.test.TestCase): - - def testCodesFromCTC(self): - """Tests that the simple CTC decoder drops nulls and duplicates. - """ - ctc_labels = [9, 9, 9, 1, 9, 2, 2, 3, 9, 9, 0, 0, 1, 9, 1, 9, 9, 9] - decode = decoder.Decoder(filename=None) - non_null_labels = decode._CodesFromCTC( - ctc_labels, merge_dups=False, null_label=9) - self.assertEqual(non_null_labels, [1, 2, 2, 3, 0, 0, 1, 1]) - idempotent_labels = decode._CodesFromCTC( - non_null_labels, merge_dups=False, null_label=9) - self.assertEqual(idempotent_labels, non_null_labels) - collapsed_labels = decode._CodesFromCTC( - ctc_labels, merge_dups=True, null_label=9) - self.assertEqual(collapsed_labels, [1, 2, 3, 0, 1, 1]) - non_idempotent_labels = decode._CodesFromCTC( - collapsed_labels, merge_dups=True, null_label=9) - self.assertEqual(non_idempotent_labels, [1, 2, 3, 0, 1]) - - def testStringFromCTC(self): - """Tests that the decoder can decode sequences including multi-codes. - """ - # - f - a r - m(1/2)m -junk sp b a r - n - - ctc_labels = [9, 6, 9, 1, 3, 9, 4, 9, 5, 5, 9, 5, 0, 2, 1, 3, 9, 4, 9] - decode = decoder.Decoder(filename=_testdata('charset_size_10.txt')) - text = decode.StringFromCTC(ctc_labels, merge_dups=True, null_label=9) - self.assertEqual(text, 'farm barn') - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/street/python/errorcounter.py b/research/street/python/errorcounter.py deleted file mode 100644 index affbf969532e6a1b865bca752705bbd6a2ef8f84..0000000000000000000000000000000000000000 --- a/research/street/python/errorcounter.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Some simple tools for error counting. - -""" -import collections - -# Named tuple Error counts describes the counts needed to accumulate errors -# over multiple trials: -# false negatives (aka drops or deletions), -# false positives: (aka adds or insertions), -# truth_count: number of elements in ground truth = denominator for fn, -# test_count: number of elements in test string = denominator for fp, -# Note that recall = 1 - fn/truth_count, precision = 1 - fp/test_count, -# accuracy = 1 - (fn + fp) / (truth_count + test_count). -ErrorCounts = collections.namedtuple('ErrorCounts', ['fn', 'fp', 'truth_count', - 'test_count']) - -# Named tuple for error rates, as a percentage. Accuracies are just 100-error. -ErrorRates = collections.namedtuple('ErrorRates', - ['label_error', 'word_recall_error', - 'word_precision_error', 'sequence_error']) - - -def CountWordErrors(ocr_text, truth_text): - """Counts the word drop and add errors as a bag of words. - - Args: - ocr_text: OCR text string. - truth_text: Truth text string. - - Returns: - ErrorCounts named tuple. - """ - # Convert to lists of words. - return CountErrors(ocr_text.split(), truth_text.split()) - - -def CountErrors(ocr_text, truth_text): - """Counts the drops and adds between 2 bags of iterables. - - Simple bag of objects count returns the number of dropped and added - elements, regardless of order, from anything that is iterable, eg - a pair of strings gives character errors, and a pair of word lists give - word errors. - Args: - ocr_text: OCR text iterable (eg string for chars, word list for words). - truth_text: Truth text iterable. - - Returns: - ErrorCounts named tuple. - """ - counts = collections.Counter(truth_text) - counts.subtract(ocr_text) - drops = sum(c for c in counts.values() if c > 0) - adds = sum(-c for c in counts.values() if c < 0) - return ErrorCounts(drops, adds, len(truth_text), len(ocr_text)) - - -def AddErrors(counts1, counts2): - """Adds the counts and returns a new sum tuple. - - Args: - counts1: ErrorCounts named tuples to sum. - counts2: ErrorCounts named tuples to sum. - Returns: - Sum of counts1, counts2. - """ - return ErrorCounts(counts1.fn + counts2.fn, counts1.fp + counts2.fp, - counts1.truth_count + counts2.truth_count, - counts1.test_count + counts2.test_count) - - -def ComputeErrorRates(label_counts, word_counts, seq_errors, num_seqs): - """Returns an ErrorRates corresponding to the given counts. - - Args: - label_counts: ErrorCounts for the character labels - word_counts: ErrorCounts for the words - seq_errors: Number of sequence errors - num_seqs: Total sequences - Returns: - ErrorRates corresponding to the given counts. - """ - label_errors = label_counts.fn + label_counts.fp - num_labels = label_counts.truth_count + label_counts.test_count - return ErrorRates( - ComputeErrorRate(label_errors, num_labels), - ComputeErrorRate(word_counts.fn, word_counts.truth_count), - ComputeErrorRate(word_counts.fp, word_counts.test_count), - ComputeErrorRate(seq_errors, num_seqs)) - - -def ComputeErrorRate(error_count, truth_count): - """Returns a sanitized percent error rate from the raw counts. - - Prevents div by 0 and clips return to 100%. - Args: - error_count: Number of errors. - truth_count: Number to divide by. - - Returns: - 100.0 * error_count / truth_count clipped to 100. - """ - if truth_count == 0: - truth_count = 1 - error_count = 1 - elif error_count > truth_count: - error_count = truth_count - return error_count * 100.0 / truth_count diff --git a/research/street/python/errorcounter_test.py b/research/street/python/errorcounter_test.py deleted file mode 100644 index aeaa36092bf43a4ca4186a136527113f4ebb4a03..0000000000000000000000000000000000000000 --- a/research/street/python/errorcounter_test.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for errorcounter.""" -import tensorflow as tf -import errorcounter as ec - - -class ErrorcounterTest(tf.test.TestCase): - - def testComputeErrorRate(self): - """Tests that the percent calculation works as expected. - """ - rate = ec.ComputeErrorRate(error_count=0, truth_count=0) - self.assertEqual(rate, 100.0) - rate = ec.ComputeErrorRate(error_count=1, truth_count=0) - self.assertEqual(rate, 100.0) - rate = ec.ComputeErrorRate(error_count=10, truth_count=1) - self.assertEqual(rate, 100.0) - rate = ec.ComputeErrorRate(error_count=0, truth_count=1) - self.assertEqual(rate, 0.0) - rate = ec.ComputeErrorRate(error_count=3, truth_count=12) - self.assertEqual(rate, 25.0) - - def testCountErrors(self): - """Tests that the error counter works as expected. - """ - truth_str = 'farm barn' - counts = ec.CountErrors(ocr_text=truth_str, truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=0, fp=0, truth_count=9, test_count=9)) - # With a period on the end, we get a char error. - dot_str = 'farm barn.' - counts = ec.CountErrors(ocr_text=dot_str, truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=0, fp=1, truth_count=9, test_count=10)) - counts = ec.CountErrors(ocr_text=truth_str, truth_text=dot_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=1, fp=0, truth_count=10, test_count=9)) - # Space is just another char. - no_space = 'farmbarn' - counts = ec.CountErrors(ocr_text=no_space, truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=1, fp=0, truth_count=9, test_count=8)) - counts = ec.CountErrors(ocr_text=truth_str, truth_text=no_space) - self.assertEqual( - counts, ec.ErrorCounts( - fn=0, fp=1, truth_count=8, test_count=9)) - # Lose them all. - counts = ec.CountErrors(ocr_text='', truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=9, fp=0, truth_count=9, test_count=0)) - counts = ec.CountErrors(ocr_text=truth_str, truth_text='') - self.assertEqual( - counts, ec.ErrorCounts( - fn=0, fp=9, truth_count=0, test_count=9)) - - def testCountWordErrors(self): - """Tests that the error counter works as expected. - """ - truth_str = 'farm barn' - counts = ec.CountWordErrors(ocr_text=truth_str, truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=0, fp=0, truth_count=2, test_count=2)) - # With a period on the end, we get a word error. - dot_str = 'farm barn.' - counts = ec.CountWordErrors(ocr_text=dot_str, truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=1, fp=1, truth_count=2, test_count=2)) - counts = ec.CountWordErrors(ocr_text=truth_str, truth_text=dot_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=1, fp=1, truth_count=2, test_count=2)) - # Space is special. - no_space = 'farmbarn' - counts = ec.CountWordErrors(ocr_text=no_space, truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=2, fp=1, truth_count=2, test_count=1)) - counts = ec.CountWordErrors(ocr_text=truth_str, truth_text=no_space) - self.assertEqual( - counts, ec.ErrorCounts( - fn=1, fp=2, truth_count=1, test_count=2)) - # Lose them all. - counts = ec.CountWordErrors(ocr_text='', truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=2, fp=0, truth_count=2, test_count=0)) - counts = ec.CountWordErrors(ocr_text=truth_str, truth_text='') - self.assertEqual( - counts, ec.ErrorCounts( - fn=0, fp=2, truth_count=0, test_count=2)) - # With a space in ba rn, there is an extra add. - sp_str = 'farm ba rn' - counts = ec.CountWordErrors(ocr_text=sp_str, truth_text=truth_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=1, fp=2, truth_count=2, test_count=3)) - counts = ec.CountWordErrors(ocr_text=truth_str, truth_text=sp_str) - self.assertEqual( - counts, ec.ErrorCounts( - fn=2, fp=1, truth_count=3, test_count=2)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/street/python/fsns_urls.py b/research/street/python/fsns_urls.py deleted file mode 100644 index bea547b9d57315e81ed69d290370f851b17784e0..0000000000000000000000000000000000000000 --- a/research/street/python/fsns_urls.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Creates a text file with URLs to download FSNS dataset using aria2c. - -The FSNS dataset has 640 files and takes 158Gb of the disk space. So it is -highly recommended to use some kind of a download manager to download it. - -Aria2c is a powerful download manager which can download multiple files in -parallel, re-try if encounter an error and continue previously unfinished -downloads. -""" - -import os - -_FSNS_BASE_URL = 'http://download.tensorflow.org/data/fsns-20160927/' -_SHARDS = {'test': 64, 'train': 512, 'validation':64} -_OUTPUT_FILE = "fsns_urls.txt" -_OUTPUT_DIR = "data/fsns" - -def fsns_paths(): - paths = ['charset_size=134.txt'] - for name, shards in _SHARDS.items(): - for i in range(shards): - paths.append('%s/%s-%05d-of-%05d' % (name, name, i, shards)) - return paths - - -if __name__ == "__main__": - with open(_OUTPUT_FILE, "w") as f: - for path in fsns_paths(): - url = _FSNS_BASE_URL + path - dst_path = os.path.join(_OUTPUT_DIR, path) - f.write("%s\n out=%s\n" % (url, dst_path)) - print("To download FSNS dataset execute:") - print("aria2c -c -j 20 -i %s" % _OUTPUT_FILE) - print("The downloaded FSNS dataset will be stored under %s" % _OUTPUT_DIR) diff --git a/research/street/python/fsns_urls.txt b/research/street/python/fsns_urls.txt deleted file mode 100644 index 959ffbd5d432105a2964ef2a4be07d046c7ab026..0000000000000000000000000000000000000000 --- a/research/street/python/fsns_urls.txt +++ /dev/null @@ -1,1282 +0,0 @@ -http://download.tensorflow.org/data/fsns-20160927/charset_size=134.txt - out=data/fsns/charset_size=134.txt -http://download.tensorflow.org/data/fsns-20160927/test/test-00000-of-00064 - out=data/fsns/test/test-00000-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00001-of-00064 - out=data/fsns/test/test-00001-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00002-of-00064 - out=data/fsns/test/test-00002-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00003-of-00064 - out=data/fsns/test/test-00003-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00004-of-00064 - out=data/fsns/test/test-00004-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00005-of-00064 - out=data/fsns/test/test-00005-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00006-of-00064 - out=data/fsns/test/test-00006-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00007-of-00064 - out=data/fsns/test/test-00007-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00008-of-00064 - out=data/fsns/test/test-00008-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00009-of-00064 - out=data/fsns/test/test-00009-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00010-of-00064 - out=data/fsns/test/test-00010-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00011-of-00064 - out=data/fsns/test/test-00011-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00012-of-00064 - out=data/fsns/test/test-00012-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00013-of-00064 - out=data/fsns/test/test-00013-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00014-of-00064 - out=data/fsns/test/test-00014-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00015-of-00064 - out=data/fsns/test/test-00015-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00016-of-00064 - out=data/fsns/test/test-00016-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00017-of-00064 - out=data/fsns/test/test-00017-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00018-of-00064 - out=data/fsns/test/test-00018-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00019-of-00064 - out=data/fsns/test/test-00019-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00020-of-00064 - out=data/fsns/test/test-00020-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00021-of-00064 - out=data/fsns/test/test-00021-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00022-of-00064 - out=data/fsns/test/test-00022-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00023-of-00064 - out=data/fsns/test/test-00023-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00024-of-00064 - out=data/fsns/test/test-00024-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00025-of-00064 - out=data/fsns/test/test-00025-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00026-of-00064 - out=data/fsns/test/test-00026-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00027-of-00064 - out=data/fsns/test/test-00027-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00028-of-00064 - out=data/fsns/test/test-00028-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00029-of-00064 - out=data/fsns/test/test-00029-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00030-of-00064 - out=data/fsns/test/test-00030-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00031-of-00064 - out=data/fsns/test/test-00031-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00032-of-00064 - out=data/fsns/test/test-00032-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00033-of-00064 - out=data/fsns/test/test-00033-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00034-of-00064 - out=data/fsns/test/test-00034-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00035-of-00064 - out=data/fsns/test/test-00035-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00036-of-00064 - out=data/fsns/test/test-00036-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00037-of-00064 - out=data/fsns/test/test-00037-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00038-of-00064 - out=data/fsns/test/test-00038-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00039-of-00064 - out=data/fsns/test/test-00039-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00040-of-00064 - out=data/fsns/test/test-00040-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00041-of-00064 - out=data/fsns/test/test-00041-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00042-of-00064 - out=data/fsns/test/test-00042-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00043-of-00064 - out=data/fsns/test/test-00043-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00044-of-00064 - out=data/fsns/test/test-00044-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00045-of-00064 - out=data/fsns/test/test-00045-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00046-of-00064 - out=data/fsns/test/test-00046-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00047-of-00064 - out=data/fsns/test/test-00047-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00048-of-00064 - out=data/fsns/test/test-00048-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00049-of-00064 - out=data/fsns/test/test-00049-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00050-of-00064 - out=data/fsns/test/test-00050-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00051-of-00064 - out=data/fsns/test/test-00051-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00052-of-00064 - out=data/fsns/test/test-00052-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00053-of-00064 - out=data/fsns/test/test-00053-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00054-of-00064 - out=data/fsns/test/test-00054-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00055-of-00064 - out=data/fsns/test/test-00055-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00056-of-00064 - out=data/fsns/test/test-00056-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00057-of-00064 - out=data/fsns/test/test-00057-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00058-of-00064 - out=data/fsns/test/test-00058-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00059-of-00064 - out=data/fsns/test/test-00059-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00060-of-00064 - out=data/fsns/test/test-00060-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00061-of-00064 - out=data/fsns/test/test-00061-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00062-of-00064 - out=data/fsns/test/test-00062-of-00064 -http://download.tensorflow.org/data/fsns-20160927/test/test-00063-of-00064 - out=data/fsns/test/test-00063-of-00064 -http://download.tensorflow.org/data/fsns-20160927/train/train-00000-of-00512 - out=data/fsns/train/train-00000-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00001-of-00512 - out=data/fsns/train/train-00001-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00002-of-00512 - out=data/fsns/train/train-00002-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00003-of-00512 - out=data/fsns/train/train-00003-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00004-of-00512 - out=data/fsns/train/train-00004-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00005-of-00512 - out=data/fsns/train/train-00005-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00006-of-00512 - out=data/fsns/train/train-00006-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00007-of-00512 - out=data/fsns/train/train-00007-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00008-of-00512 - out=data/fsns/train/train-00008-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00009-of-00512 - out=data/fsns/train/train-00009-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00010-of-00512 - out=data/fsns/train/train-00010-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00011-of-00512 - out=data/fsns/train/train-00011-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00012-of-00512 - out=data/fsns/train/train-00012-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00013-of-00512 - out=data/fsns/train/train-00013-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00014-of-00512 - out=data/fsns/train/train-00014-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00015-of-00512 - out=data/fsns/train/train-00015-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00016-of-00512 - out=data/fsns/train/train-00016-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00017-of-00512 - out=data/fsns/train/train-00017-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00018-of-00512 - out=data/fsns/train/train-00018-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00019-of-00512 - out=data/fsns/train/train-00019-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00020-of-00512 - out=data/fsns/train/train-00020-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00021-of-00512 - out=data/fsns/train/train-00021-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00022-of-00512 - out=data/fsns/train/train-00022-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00023-of-00512 - out=data/fsns/train/train-00023-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00024-of-00512 - out=data/fsns/train/train-00024-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00025-of-00512 - out=data/fsns/train/train-00025-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00026-of-00512 - out=data/fsns/train/train-00026-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00027-of-00512 - out=data/fsns/train/train-00027-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00028-of-00512 - out=data/fsns/train/train-00028-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00029-of-00512 - out=data/fsns/train/train-00029-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00030-of-00512 - out=data/fsns/train/train-00030-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00031-of-00512 - out=data/fsns/train/train-00031-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00032-of-00512 - out=data/fsns/train/train-00032-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00033-of-00512 - out=data/fsns/train/train-00033-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00034-of-00512 - out=data/fsns/train/train-00034-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00035-of-00512 - out=data/fsns/train/train-00035-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00036-of-00512 - out=data/fsns/train/train-00036-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00037-of-00512 - out=data/fsns/train/train-00037-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00038-of-00512 - out=data/fsns/train/train-00038-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00039-of-00512 - out=data/fsns/train/train-00039-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00040-of-00512 - out=data/fsns/train/train-00040-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00041-of-00512 - out=data/fsns/train/train-00041-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00042-of-00512 - out=data/fsns/train/train-00042-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00043-of-00512 - out=data/fsns/train/train-00043-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00044-of-00512 - out=data/fsns/train/train-00044-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00045-of-00512 - out=data/fsns/train/train-00045-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00046-of-00512 - out=data/fsns/train/train-00046-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00047-of-00512 - out=data/fsns/train/train-00047-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00048-of-00512 - out=data/fsns/train/train-00048-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00049-of-00512 - out=data/fsns/train/train-00049-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00050-of-00512 - out=data/fsns/train/train-00050-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00051-of-00512 - out=data/fsns/train/train-00051-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00052-of-00512 - out=data/fsns/train/train-00052-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00053-of-00512 - out=data/fsns/train/train-00053-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00054-of-00512 - out=data/fsns/train/train-00054-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00055-of-00512 - out=data/fsns/train/train-00055-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00056-of-00512 - out=data/fsns/train/train-00056-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00057-of-00512 - out=data/fsns/train/train-00057-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00058-of-00512 - out=data/fsns/train/train-00058-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00059-of-00512 - out=data/fsns/train/train-00059-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00060-of-00512 - out=data/fsns/train/train-00060-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00061-of-00512 - out=data/fsns/train/train-00061-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00062-of-00512 - out=data/fsns/train/train-00062-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00063-of-00512 - out=data/fsns/train/train-00063-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00064-of-00512 - out=data/fsns/train/train-00064-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00065-of-00512 - out=data/fsns/train/train-00065-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00066-of-00512 - out=data/fsns/train/train-00066-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00067-of-00512 - out=data/fsns/train/train-00067-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00068-of-00512 - out=data/fsns/train/train-00068-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00069-of-00512 - out=data/fsns/train/train-00069-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00070-of-00512 - out=data/fsns/train/train-00070-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00071-of-00512 - out=data/fsns/train/train-00071-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00072-of-00512 - out=data/fsns/train/train-00072-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00073-of-00512 - out=data/fsns/train/train-00073-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00074-of-00512 - out=data/fsns/train/train-00074-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00075-of-00512 - out=data/fsns/train/train-00075-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00076-of-00512 - out=data/fsns/train/train-00076-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00077-of-00512 - out=data/fsns/train/train-00077-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00078-of-00512 - out=data/fsns/train/train-00078-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00079-of-00512 - out=data/fsns/train/train-00079-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00080-of-00512 - out=data/fsns/train/train-00080-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00081-of-00512 - out=data/fsns/train/train-00081-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00082-of-00512 - out=data/fsns/train/train-00082-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00083-of-00512 - out=data/fsns/train/train-00083-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00084-of-00512 - out=data/fsns/train/train-00084-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00085-of-00512 - out=data/fsns/train/train-00085-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00086-of-00512 - out=data/fsns/train/train-00086-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00087-of-00512 - out=data/fsns/train/train-00087-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00088-of-00512 - out=data/fsns/train/train-00088-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00089-of-00512 - out=data/fsns/train/train-00089-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00090-of-00512 - out=data/fsns/train/train-00090-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00091-of-00512 - out=data/fsns/train/train-00091-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00092-of-00512 - out=data/fsns/train/train-00092-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00093-of-00512 - out=data/fsns/train/train-00093-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00094-of-00512 - out=data/fsns/train/train-00094-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00095-of-00512 - out=data/fsns/train/train-00095-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00096-of-00512 - out=data/fsns/train/train-00096-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00097-of-00512 - out=data/fsns/train/train-00097-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00098-of-00512 - out=data/fsns/train/train-00098-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00099-of-00512 - out=data/fsns/train/train-00099-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00100-of-00512 - out=data/fsns/train/train-00100-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00101-of-00512 - out=data/fsns/train/train-00101-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00102-of-00512 - out=data/fsns/train/train-00102-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00103-of-00512 - out=data/fsns/train/train-00103-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00104-of-00512 - out=data/fsns/train/train-00104-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00105-of-00512 - out=data/fsns/train/train-00105-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00106-of-00512 - out=data/fsns/train/train-00106-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00107-of-00512 - out=data/fsns/train/train-00107-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00108-of-00512 - out=data/fsns/train/train-00108-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00109-of-00512 - out=data/fsns/train/train-00109-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00110-of-00512 - out=data/fsns/train/train-00110-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00111-of-00512 - out=data/fsns/train/train-00111-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00112-of-00512 - out=data/fsns/train/train-00112-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00113-of-00512 - out=data/fsns/train/train-00113-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00114-of-00512 - out=data/fsns/train/train-00114-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00115-of-00512 - out=data/fsns/train/train-00115-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00116-of-00512 - out=data/fsns/train/train-00116-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00117-of-00512 - out=data/fsns/train/train-00117-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00118-of-00512 - out=data/fsns/train/train-00118-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00119-of-00512 - out=data/fsns/train/train-00119-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00120-of-00512 - out=data/fsns/train/train-00120-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00121-of-00512 - out=data/fsns/train/train-00121-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00122-of-00512 - out=data/fsns/train/train-00122-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00123-of-00512 - out=data/fsns/train/train-00123-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00124-of-00512 - out=data/fsns/train/train-00124-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00125-of-00512 - out=data/fsns/train/train-00125-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00126-of-00512 - out=data/fsns/train/train-00126-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00127-of-00512 - out=data/fsns/train/train-00127-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00128-of-00512 - out=data/fsns/train/train-00128-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00129-of-00512 - out=data/fsns/train/train-00129-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00130-of-00512 - out=data/fsns/train/train-00130-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00131-of-00512 - out=data/fsns/train/train-00131-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00132-of-00512 - out=data/fsns/train/train-00132-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00133-of-00512 - out=data/fsns/train/train-00133-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00134-of-00512 - out=data/fsns/train/train-00134-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00135-of-00512 - out=data/fsns/train/train-00135-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00136-of-00512 - out=data/fsns/train/train-00136-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00137-of-00512 - out=data/fsns/train/train-00137-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00138-of-00512 - out=data/fsns/train/train-00138-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00139-of-00512 - out=data/fsns/train/train-00139-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00140-of-00512 - out=data/fsns/train/train-00140-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00141-of-00512 - out=data/fsns/train/train-00141-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00142-of-00512 - out=data/fsns/train/train-00142-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00143-of-00512 - out=data/fsns/train/train-00143-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00144-of-00512 - out=data/fsns/train/train-00144-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00145-of-00512 - out=data/fsns/train/train-00145-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00146-of-00512 - out=data/fsns/train/train-00146-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00147-of-00512 - out=data/fsns/train/train-00147-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00148-of-00512 - out=data/fsns/train/train-00148-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00149-of-00512 - out=data/fsns/train/train-00149-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00150-of-00512 - out=data/fsns/train/train-00150-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00151-of-00512 - out=data/fsns/train/train-00151-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00152-of-00512 - out=data/fsns/train/train-00152-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00153-of-00512 - out=data/fsns/train/train-00153-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00154-of-00512 - out=data/fsns/train/train-00154-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00155-of-00512 - out=data/fsns/train/train-00155-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00156-of-00512 - out=data/fsns/train/train-00156-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00157-of-00512 - out=data/fsns/train/train-00157-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00158-of-00512 - out=data/fsns/train/train-00158-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00159-of-00512 - out=data/fsns/train/train-00159-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00160-of-00512 - out=data/fsns/train/train-00160-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00161-of-00512 - out=data/fsns/train/train-00161-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00162-of-00512 - out=data/fsns/train/train-00162-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00163-of-00512 - out=data/fsns/train/train-00163-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00164-of-00512 - out=data/fsns/train/train-00164-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00165-of-00512 - out=data/fsns/train/train-00165-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00166-of-00512 - out=data/fsns/train/train-00166-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00167-of-00512 - out=data/fsns/train/train-00167-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00168-of-00512 - out=data/fsns/train/train-00168-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00169-of-00512 - out=data/fsns/train/train-00169-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00170-of-00512 - out=data/fsns/train/train-00170-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00171-of-00512 - out=data/fsns/train/train-00171-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00172-of-00512 - out=data/fsns/train/train-00172-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00173-of-00512 - out=data/fsns/train/train-00173-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00174-of-00512 - out=data/fsns/train/train-00174-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00175-of-00512 - out=data/fsns/train/train-00175-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00176-of-00512 - out=data/fsns/train/train-00176-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00177-of-00512 - out=data/fsns/train/train-00177-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00178-of-00512 - out=data/fsns/train/train-00178-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00179-of-00512 - out=data/fsns/train/train-00179-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00180-of-00512 - out=data/fsns/train/train-00180-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00181-of-00512 - out=data/fsns/train/train-00181-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00182-of-00512 - out=data/fsns/train/train-00182-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00183-of-00512 - out=data/fsns/train/train-00183-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00184-of-00512 - out=data/fsns/train/train-00184-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00185-of-00512 - out=data/fsns/train/train-00185-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00186-of-00512 - out=data/fsns/train/train-00186-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00187-of-00512 - out=data/fsns/train/train-00187-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00188-of-00512 - out=data/fsns/train/train-00188-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00189-of-00512 - out=data/fsns/train/train-00189-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00190-of-00512 - out=data/fsns/train/train-00190-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00191-of-00512 - out=data/fsns/train/train-00191-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00192-of-00512 - out=data/fsns/train/train-00192-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00193-of-00512 - out=data/fsns/train/train-00193-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00194-of-00512 - out=data/fsns/train/train-00194-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00195-of-00512 - out=data/fsns/train/train-00195-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00196-of-00512 - out=data/fsns/train/train-00196-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00197-of-00512 - out=data/fsns/train/train-00197-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00198-of-00512 - out=data/fsns/train/train-00198-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00199-of-00512 - out=data/fsns/train/train-00199-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00200-of-00512 - out=data/fsns/train/train-00200-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00201-of-00512 - out=data/fsns/train/train-00201-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00202-of-00512 - out=data/fsns/train/train-00202-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00203-of-00512 - out=data/fsns/train/train-00203-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00204-of-00512 - out=data/fsns/train/train-00204-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00205-of-00512 - out=data/fsns/train/train-00205-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00206-of-00512 - out=data/fsns/train/train-00206-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00207-of-00512 - out=data/fsns/train/train-00207-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00208-of-00512 - out=data/fsns/train/train-00208-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00209-of-00512 - out=data/fsns/train/train-00209-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00210-of-00512 - out=data/fsns/train/train-00210-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00211-of-00512 - out=data/fsns/train/train-00211-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00212-of-00512 - out=data/fsns/train/train-00212-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00213-of-00512 - out=data/fsns/train/train-00213-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00214-of-00512 - out=data/fsns/train/train-00214-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00215-of-00512 - out=data/fsns/train/train-00215-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00216-of-00512 - out=data/fsns/train/train-00216-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00217-of-00512 - out=data/fsns/train/train-00217-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00218-of-00512 - out=data/fsns/train/train-00218-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00219-of-00512 - out=data/fsns/train/train-00219-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00220-of-00512 - out=data/fsns/train/train-00220-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00221-of-00512 - out=data/fsns/train/train-00221-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00222-of-00512 - out=data/fsns/train/train-00222-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00223-of-00512 - out=data/fsns/train/train-00223-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00224-of-00512 - out=data/fsns/train/train-00224-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00225-of-00512 - out=data/fsns/train/train-00225-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00226-of-00512 - out=data/fsns/train/train-00226-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00227-of-00512 - out=data/fsns/train/train-00227-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00228-of-00512 - out=data/fsns/train/train-00228-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00229-of-00512 - out=data/fsns/train/train-00229-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00230-of-00512 - out=data/fsns/train/train-00230-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00231-of-00512 - out=data/fsns/train/train-00231-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00232-of-00512 - out=data/fsns/train/train-00232-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00233-of-00512 - out=data/fsns/train/train-00233-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00234-of-00512 - out=data/fsns/train/train-00234-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00235-of-00512 - out=data/fsns/train/train-00235-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00236-of-00512 - out=data/fsns/train/train-00236-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00237-of-00512 - out=data/fsns/train/train-00237-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00238-of-00512 - out=data/fsns/train/train-00238-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00239-of-00512 - out=data/fsns/train/train-00239-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00240-of-00512 - out=data/fsns/train/train-00240-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00241-of-00512 - out=data/fsns/train/train-00241-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00242-of-00512 - out=data/fsns/train/train-00242-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00243-of-00512 - out=data/fsns/train/train-00243-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00244-of-00512 - out=data/fsns/train/train-00244-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00245-of-00512 - out=data/fsns/train/train-00245-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00246-of-00512 - out=data/fsns/train/train-00246-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00247-of-00512 - out=data/fsns/train/train-00247-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00248-of-00512 - out=data/fsns/train/train-00248-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00249-of-00512 - out=data/fsns/train/train-00249-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00250-of-00512 - out=data/fsns/train/train-00250-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00251-of-00512 - out=data/fsns/train/train-00251-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00252-of-00512 - out=data/fsns/train/train-00252-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00253-of-00512 - out=data/fsns/train/train-00253-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00254-of-00512 - out=data/fsns/train/train-00254-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00255-of-00512 - out=data/fsns/train/train-00255-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00256-of-00512 - out=data/fsns/train/train-00256-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00257-of-00512 - out=data/fsns/train/train-00257-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00258-of-00512 - out=data/fsns/train/train-00258-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00259-of-00512 - out=data/fsns/train/train-00259-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00260-of-00512 - out=data/fsns/train/train-00260-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00261-of-00512 - out=data/fsns/train/train-00261-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00262-of-00512 - out=data/fsns/train/train-00262-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00263-of-00512 - out=data/fsns/train/train-00263-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00264-of-00512 - out=data/fsns/train/train-00264-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00265-of-00512 - out=data/fsns/train/train-00265-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00266-of-00512 - out=data/fsns/train/train-00266-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00267-of-00512 - out=data/fsns/train/train-00267-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00268-of-00512 - out=data/fsns/train/train-00268-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00269-of-00512 - out=data/fsns/train/train-00269-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00270-of-00512 - out=data/fsns/train/train-00270-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00271-of-00512 - out=data/fsns/train/train-00271-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00272-of-00512 - out=data/fsns/train/train-00272-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00273-of-00512 - out=data/fsns/train/train-00273-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00274-of-00512 - out=data/fsns/train/train-00274-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00275-of-00512 - out=data/fsns/train/train-00275-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00276-of-00512 - out=data/fsns/train/train-00276-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00277-of-00512 - out=data/fsns/train/train-00277-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00278-of-00512 - out=data/fsns/train/train-00278-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00279-of-00512 - out=data/fsns/train/train-00279-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00280-of-00512 - out=data/fsns/train/train-00280-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00281-of-00512 - out=data/fsns/train/train-00281-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00282-of-00512 - out=data/fsns/train/train-00282-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00283-of-00512 - out=data/fsns/train/train-00283-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00284-of-00512 - out=data/fsns/train/train-00284-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00285-of-00512 - out=data/fsns/train/train-00285-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00286-of-00512 - out=data/fsns/train/train-00286-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00287-of-00512 - out=data/fsns/train/train-00287-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00288-of-00512 - out=data/fsns/train/train-00288-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00289-of-00512 - out=data/fsns/train/train-00289-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00290-of-00512 - out=data/fsns/train/train-00290-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00291-of-00512 - out=data/fsns/train/train-00291-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00292-of-00512 - out=data/fsns/train/train-00292-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00293-of-00512 - out=data/fsns/train/train-00293-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00294-of-00512 - out=data/fsns/train/train-00294-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00295-of-00512 - out=data/fsns/train/train-00295-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00296-of-00512 - out=data/fsns/train/train-00296-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00297-of-00512 - out=data/fsns/train/train-00297-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00298-of-00512 - out=data/fsns/train/train-00298-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00299-of-00512 - out=data/fsns/train/train-00299-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00300-of-00512 - out=data/fsns/train/train-00300-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00301-of-00512 - out=data/fsns/train/train-00301-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00302-of-00512 - out=data/fsns/train/train-00302-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00303-of-00512 - out=data/fsns/train/train-00303-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00304-of-00512 - out=data/fsns/train/train-00304-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00305-of-00512 - out=data/fsns/train/train-00305-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00306-of-00512 - out=data/fsns/train/train-00306-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00307-of-00512 - out=data/fsns/train/train-00307-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00308-of-00512 - out=data/fsns/train/train-00308-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00309-of-00512 - out=data/fsns/train/train-00309-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00310-of-00512 - out=data/fsns/train/train-00310-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00311-of-00512 - out=data/fsns/train/train-00311-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00312-of-00512 - out=data/fsns/train/train-00312-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00313-of-00512 - out=data/fsns/train/train-00313-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00314-of-00512 - out=data/fsns/train/train-00314-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00315-of-00512 - out=data/fsns/train/train-00315-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00316-of-00512 - out=data/fsns/train/train-00316-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00317-of-00512 - out=data/fsns/train/train-00317-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00318-of-00512 - out=data/fsns/train/train-00318-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00319-of-00512 - out=data/fsns/train/train-00319-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00320-of-00512 - out=data/fsns/train/train-00320-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00321-of-00512 - out=data/fsns/train/train-00321-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00322-of-00512 - out=data/fsns/train/train-00322-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00323-of-00512 - out=data/fsns/train/train-00323-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00324-of-00512 - out=data/fsns/train/train-00324-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00325-of-00512 - out=data/fsns/train/train-00325-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00326-of-00512 - out=data/fsns/train/train-00326-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00327-of-00512 - out=data/fsns/train/train-00327-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00328-of-00512 - out=data/fsns/train/train-00328-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00329-of-00512 - out=data/fsns/train/train-00329-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00330-of-00512 - out=data/fsns/train/train-00330-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00331-of-00512 - out=data/fsns/train/train-00331-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00332-of-00512 - out=data/fsns/train/train-00332-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00333-of-00512 - out=data/fsns/train/train-00333-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00334-of-00512 - out=data/fsns/train/train-00334-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00335-of-00512 - out=data/fsns/train/train-00335-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00336-of-00512 - out=data/fsns/train/train-00336-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00337-of-00512 - out=data/fsns/train/train-00337-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00338-of-00512 - out=data/fsns/train/train-00338-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00339-of-00512 - out=data/fsns/train/train-00339-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00340-of-00512 - out=data/fsns/train/train-00340-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00341-of-00512 - out=data/fsns/train/train-00341-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00342-of-00512 - out=data/fsns/train/train-00342-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00343-of-00512 - out=data/fsns/train/train-00343-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00344-of-00512 - out=data/fsns/train/train-00344-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00345-of-00512 - out=data/fsns/train/train-00345-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00346-of-00512 - out=data/fsns/train/train-00346-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00347-of-00512 - out=data/fsns/train/train-00347-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00348-of-00512 - out=data/fsns/train/train-00348-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00349-of-00512 - out=data/fsns/train/train-00349-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00350-of-00512 - out=data/fsns/train/train-00350-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00351-of-00512 - out=data/fsns/train/train-00351-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00352-of-00512 - out=data/fsns/train/train-00352-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00353-of-00512 - out=data/fsns/train/train-00353-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00354-of-00512 - out=data/fsns/train/train-00354-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00355-of-00512 - out=data/fsns/train/train-00355-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00356-of-00512 - out=data/fsns/train/train-00356-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00357-of-00512 - out=data/fsns/train/train-00357-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00358-of-00512 - out=data/fsns/train/train-00358-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00359-of-00512 - out=data/fsns/train/train-00359-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00360-of-00512 - out=data/fsns/train/train-00360-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00361-of-00512 - out=data/fsns/train/train-00361-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00362-of-00512 - out=data/fsns/train/train-00362-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00363-of-00512 - out=data/fsns/train/train-00363-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00364-of-00512 - out=data/fsns/train/train-00364-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00365-of-00512 - out=data/fsns/train/train-00365-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00366-of-00512 - out=data/fsns/train/train-00366-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00367-of-00512 - out=data/fsns/train/train-00367-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00368-of-00512 - out=data/fsns/train/train-00368-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00369-of-00512 - out=data/fsns/train/train-00369-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00370-of-00512 - out=data/fsns/train/train-00370-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00371-of-00512 - out=data/fsns/train/train-00371-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00372-of-00512 - out=data/fsns/train/train-00372-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00373-of-00512 - out=data/fsns/train/train-00373-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00374-of-00512 - out=data/fsns/train/train-00374-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00375-of-00512 - out=data/fsns/train/train-00375-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00376-of-00512 - out=data/fsns/train/train-00376-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00377-of-00512 - out=data/fsns/train/train-00377-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00378-of-00512 - out=data/fsns/train/train-00378-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00379-of-00512 - out=data/fsns/train/train-00379-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00380-of-00512 - out=data/fsns/train/train-00380-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00381-of-00512 - out=data/fsns/train/train-00381-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00382-of-00512 - out=data/fsns/train/train-00382-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00383-of-00512 - out=data/fsns/train/train-00383-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00384-of-00512 - out=data/fsns/train/train-00384-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00385-of-00512 - out=data/fsns/train/train-00385-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00386-of-00512 - out=data/fsns/train/train-00386-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00387-of-00512 - out=data/fsns/train/train-00387-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00388-of-00512 - out=data/fsns/train/train-00388-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00389-of-00512 - out=data/fsns/train/train-00389-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00390-of-00512 - out=data/fsns/train/train-00390-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00391-of-00512 - out=data/fsns/train/train-00391-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00392-of-00512 - out=data/fsns/train/train-00392-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00393-of-00512 - out=data/fsns/train/train-00393-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00394-of-00512 - out=data/fsns/train/train-00394-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00395-of-00512 - out=data/fsns/train/train-00395-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00396-of-00512 - out=data/fsns/train/train-00396-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00397-of-00512 - out=data/fsns/train/train-00397-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00398-of-00512 - out=data/fsns/train/train-00398-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00399-of-00512 - out=data/fsns/train/train-00399-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00400-of-00512 - out=data/fsns/train/train-00400-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00401-of-00512 - out=data/fsns/train/train-00401-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00402-of-00512 - out=data/fsns/train/train-00402-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00403-of-00512 - out=data/fsns/train/train-00403-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00404-of-00512 - out=data/fsns/train/train-00404-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00405-of-00512 - out=data/fsns/train/train-00405-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00406-of-00512 - out=data/fsns/train/train-00406-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00407-of-00512 - out=data/fsns/train/train-00407-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00408-of-00512 - out=data/fsns/train/train-00408-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00409-of-00512 - out=data/fsns/train/train-00409-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00410-of-00512 - out=data/fsns/train/train-00410-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00411-of-00512 - out=data/fsns/train/train-00411-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00412-of-00512 - out=data/fsns/train/train-00412-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00413-of-00512 - out=data/fsns/train/train-00413-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00414-of-00512 - out=data/fsns/train/train-00414-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00415-of-00512 - out=data/fsns/train/train-00415-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00416-of-00512 - out=data/fsns/train/train-00416-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00417-of-00512 - out=data/fsns/train/train-00417-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00418-of-00512 - out=data/fsns/train/train-00418-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00419-of-00512 - out=data/fsns/train/train-00419-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00420-of-00512 - out=data/fsns/train/train-00420-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00421-of-00512 - out=data/fsns/train/train-00421-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00422-of-00512 - out=data/fsns/train/train-00422-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00423-of-00512 - out=data/fsns/train/train-00423-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00424-of-00512 - out=data/fsns/train/train-00424-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00425-of-00512 - out=data/fsns/train/train-00425-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00426-of-00512 - out=data/fsns/train/train-00426-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00427-of-00512 - out=data/fsns/train/train-00427-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00428-of-00512 - out=data/fsns/train/train-00428-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00429-of-00512 - out=data/fsns/train/train-00429-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00430-of-00512 - out=data/fsns/train/train-00430-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00431-of-00512 - out=data/fsns/train/train-00431-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00432-of-00512 - out=data/fsns/train/train-00432-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00433-of-00512 - out=data/fsns/train/train-00433-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00434-of-00512 - out=data/fsns/train/train-00434-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00435-of-00512 - out=data/fsns/train/train-00435-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00436-of-00512 - out=data/fsns/train/train-00436-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00437-of-00512 - out=data/fsns/train/train-00437-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00438-of-00512 - out=data/fsns/train/train-00438-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00439-of-00512 - out=data/fsns/train/train-00439-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00440-of-00512 - out=data/fsns/train/train-00440-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00441-of-00512 - out=data/fsns/train/train-00441-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00442-of-00512 - out=data/fsns/train/train-00442-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00443-of-00512 - out=data/fsns/train/train-00443-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00444-of-00512 - out=data/fsns/train/train-00444-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00445-of-00512 - out=data/fsns/train/train-00445-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00446-of-00512 - out=data/fsns/train/train-00446-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00447-of-00512 - out=data/fsns/train/train-00447-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00448-of-00512 - out=data/fsns/train/train-00448-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00449-of-00512 - out=data/fsns/train/train-00449-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00450-of-00512 - out=data/fsns/train/train-00450-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00451-of-00512 - out=data/fsns/train/train-00451-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00452-of-00512 - out=data/fsns/train/train-00452-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00453-of-00512 - out=data/fsns/train/train-00453-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00454-of-00512 - out=data/fsns/train/train-00454-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00455-of-00512 - out=data/fsns/train/train-00455-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00456-of-00512 - out=data/fsns/train/train-00456-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00457-of-00512 - out=data/fsns/train/train-00457-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00458-of-00512 - out=data/fsns/train/train-00458-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00459-of-00512 - out=data/fsns/train/train-00459-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00460-of-00512 - out=data/fsns/train/train-00460-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00461-of-00512 - out=data/fsns/train/train-00461-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00462-of-00512 - out=data/fsns/train/train-00462-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00463-of-00512 - out=data/fsns/train/train-00463-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00464-of-00512 - out=data/fsns/train/train-00464-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00465-of-00512 - out=data/fsns/train/train-00465-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00466-of-00512 - out=data/fsns/train/train-00466-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00467-of-00512 - out=data/fsns/train/train-00467-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00468-of-00512 - out=data/fsns/train/train-00468-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00469-of-00512 - out=data/fsns/train/train-00469-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00470-of-00512 - out=data/fsns/train/train-00470-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00471-of-00512 - out=data/fsns/train/train-00471-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00472-of-00512 - out=data/fsns/train/train-00472-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00473-of-00512 - out=data/fsns/train/train-00473-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00474-of-00512 - out=data/fsns/train/train-00474-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00475-of-00512 - out=data/fsns/train/train-00475-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00476-of-00512 - out=data/fsns/train/train-00476-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00477-of-00512 - out=data/fsns/train/train-00477-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00478-of-00512 - out=data/fsns/train/train-00478-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00479-of-00512 - out=data/fsns/train/train-00479-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00480-of-00512 - out=data/fsns/train/train-00480-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00481-of-00512 - out=data/fsns/train/train-00481-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00482-of-00512 - out=data/fsns/train/train-00482-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00483-of-00512 - out=data/fsns/train/train-00483-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00484-of-00512 - out=data/fsns/train/train-00484-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00485-of-00512 - out=data/fsns/train/train-00485-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00486-of-00512 - out=data/fsns/train/train-00486-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00487-of-00512 - out=data/fsns/train/train-00487-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00488-of-00512 - out=data/fsns/train/train-00488-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00489-of-00512 - out=data/fsns/train/train-00489-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00490-of-00512 - out=data/fsns/train/train-00490-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00491-of-00512 - out=data/fsns/train/train-00491-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00492-of-00512 - out=data/fsns/train/train-00492-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00493-of-00512 - out=data/fsns/train/train-00493-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00494-of-00512 - out=data/fsns/train/train-00494-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00495-of-00512 - out=data/fsns/train/train-00495-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00496-of-00512 - out=data/fsns/train/train-00496-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00497-of-00512 - out=data/fsns/train/train-00497-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00498-of-00512 - out=data/fsns/train/train-00498-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00499-of-00512 - out=data/fsns/train/train-00499-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00500-of-00512 - out=data/fsns/train/train-00500-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00501-of-00512 - out=data/fsns/train/train-00501-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00502-of-00512 - out=data/fsns/train/train-00502-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00503-of-00512 - out=data/fsns/train/train-00503-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00504-of-00512 - out=data/fsns/train/train-00504-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00505-of-00512 - out=data/fsns/train/train-00505-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00506-of-00512 - out=data/fsns/train/train-00506-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00507-of-00512 - out=data/fsns/train/train-00507-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00508-of-00512 - out=data/fsns/train/train-00508-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00509-of-00512 - out=data/fsns/train/train-00509-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00510-of-00512 - out=data/fsns/train/train-00510-of-00512 -http://download.tensorflow.org/data/fsns-20160927/train/train-00511-of-00512 - out=data/fsns/train/train-00511-of-00512 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00000-of-00064 - out=data/fsns/validation/validation-00000-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00001-of-00064 - out=data/fsns/validation/validation-00001-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00002-of-00064 - out=data/fsns/validation/validation-00002-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00003-of-00064 - out=data/fsns/validation/validation-00003-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00004-of-00064 - out=data/fsns/validation/validation-00004-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00005-of-00064 - out=data/fsns/validation/validation-00005-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00006-of-00064 - out=data/fsns/validation/validation-00006-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00007-of-00064 - out=data/fsns/validation/validation-00007-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00008-of-00064 - out=data/fsns/validation/validation-00008-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00009-of-00064 - out=data/fsns/validation/validation-00009-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00010-of-00064 - out=data/fsns/validation/validation-00010-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00011-of-00064 - out=data/fsns/validation/validation-00011-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00012-of-00064 - out=data/fsns/validation/validation-00012-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00013-of-00064 - out=data/fsns/validation/validation-00013-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00014-of-00064 - out=data/fsns/validation/validation-00014-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00015-of-00064 - out=data/fsns/validation/validation-00015-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00016-of-00064 - out=data/fsns/validation/validation-00016-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00017-of-00064 - out=data/fsns/validation/validation-00017-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00018-of-00064 - out=data/fsns/validation/validation-00018-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00019-of-00064 - out=data/fsns/validation/validation-00019-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00020-of-00064 - out=data/fsns/validation/validation-00020-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00021-of-00064 - out=data/fsns/validation/validation-00021-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00022-of-00064 - out=data/fsns/validation/validation-00022-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00023-of-00064 - out=data/fsns/validation/validation-00023-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00024-of-00064 - out=data/fsns/validation/validation-00024-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00025-of-00064 - out=data/fsns/validation/validation-00025-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00026-of-00064 - out=data/fsns/validation/validation-00026-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00027-of-00064 - out=data/fsns/validation/validation-00027-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00028-of-00064 - out=data/fsns/validation/validation-00028-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00029-of-00064 - out=data/fsns/validation/validation-00029-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00030-of-00064 - out=data/fsns/validation/validation-00030-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00031-of-00064 - out=data/fsns/validation/validation-00031-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00032-of-00064 - out=data/fsns/validation/validation-00032-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00033-of-00064 - out=data/fsns/validation/validation-00033-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00034-of-00064 - out=data/fsns/validation/validation-00034-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00035-of-00064 - out=data/fsns/validation/validation-00035-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00036-of-00064 - out=data/fsns/validation/validation-00036-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00037-of-00064 - out=data/fsns/validation/validation-00037-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00038-of-00064 - out=data/fsns/validation/validation-00038-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00039-of-00064 - out=data/fsns/validation/validation-00039-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00040-of-00064 - out=data/fsns/validation/validation-00040-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00041-of-00064 - out=data/fsns/validation/validation-00041-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00042-of-00064 - out=data/fsns/validation/validation-00042-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00043-of-00064 - out=data/fsns/validation/validation-00043-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00044-of-00064 - out=data/fsns/validation/validation-00044-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00045-of-00064 - out=data/fsns/validation/validation-00045-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00046-of-00064 - out=data/fsns/validation/validation-00046-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00047-of-00064 - out=data/fsns/validation/validation-00047-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00048-of-00064 - out=data/fsns/validation/validation-00048-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00049-of-00064 - out=data/fsns/validation/validation-00049-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00050-of-00064 - out=data/fsns/validation/validation-00050-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00051-of-00064 - out=data/fsns/validation/validation-00051-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00052-of-00064 - out=data/fsns/validation/validation-00052-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00053-of-00064 - out=data/fsns/validation/validation-00053-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00054-of-00064 - out=data/fsns/validation/validation-00054-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00055-of-00064 - out=data/fsns/validation/validation-00055-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00056-of-00064 - out=data/fsns/validation/validation-00056-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00057-of-00064 - out=data/fsns/validation/validation-00057-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00058-of-00064 - out=data/fsns/validation/validation-00058-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00059-of-00064 - out=data/fsns/validation/validation-00059-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00060-of-00064 - out=data/fsns/validation/validation-00060-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00061-of-00064 - out=data/fsns/validation/validation-00061-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00062-of-00064 - out=data/fsns/validation/validation-00062-of-00064 -http://download.tensorflow.org/data/fsns-20160927/validation/validation-00063-of-00064 - out=data/fsns/validation/validation-00063-of-00064 diff --git a/research/street/python/nn_ops.py b/research/street/python/nn_ops.py deleted file mode 100644 index 20c3b502853bbec80f30e9d2aa915477fa674c62..0000000000000000000000000000000000000000 --- a/research/street/python/nn_ops.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Ops and utilities for neural networks. - -For now, just an LSTM layer. -""" -import shapes -import tensorflow as tf -rnn = tf.load_op_library("../cc/rnn_ops.so") - - -def rnn_helper(inp, - length, - cell_type=None, - direction="forward", - name=None, - *args, - **kwargs): - """Adds ops for a recurrent neural network layer. - - This function calls an actual implementation of a recurrent neural network - based on `cell_type`. - - There are three modes depending on the value of `direction`: - - forward: Adds a forward RNN. - backward: Adds a backward RNN. - bidirectional: Adds both forward and backward RNNs and creates a - bidirectional RNN. - - Args: - inp: A 3-D tensor of shape [`batch_size`, `max_length`, `feature_dim`]. - length: A 1-D tensor of shape [`batch_size`] and type int64. Each element - represents the length of the corresponding sequence in `inp`. - cell_type: Cell type of RNN. Currently can only be "lstm". - direction: One of "forward", "backward", "bidirectional". - name: Name of the op. - *args: Other arguments to the layer. - **kwargs: Keyword arugments to the layer. - - Returns: - A 3-D tensor of shape [`batch_size`, `max_length`, `num_nodes`]. - """ - - assert cell_type is not None - rnn_func = None - if cell_type == "lstm": - rnn_func = lstm_layer - assert rnn_func is not None - assert direction in ["forward", "backward", "bidirectional"] - - with tf.variable_scope(name): - if direction in ["forward", "bidirectional"]: - forward = rnn_func( - inp=inp, - length=length, - backward=False, - name="forward", - *args, - **kwargs) - if isinstance(forward, tuple): - # lstm_layer returns a tuple (output, memory). We only need the first - # element. - forward = forward[0] - if direction in ["backward", "bidirectional"]: - backward = rnn_func( - inp=inp, - length=length, - backward=True, - name="backward", - *args, - **kwargs) - if isinstance(backward, tuple): - # lstm_layer returns a tuple (output, memory). We only need the first - # element. - backward = backward[0] - if direction == "forward": - out = forward - elif direction == "backward": - out = backward - else: - out = tf.concat(axis=2, values=[forward, backward]) - return out - - -@tf.RegisterShape("VariableLSTM") -def _variable_lstm_shape(op): - """Shape function for the VariableLSTM op.""" - input_shape = op.inputs[0].get_shape().with_rank(4) - state_shape = op.inputs[1].get_shape().with_rank(2) - memory_shape = op.inputs[2].get_shape().with_rank(2) - w_m_m_shape = op.inputs[3].get_shape().with_rank(3) - batch_size = input_shape[0].merge_with(state_shape[0]) - batch_size = input_shape[0].merge_with(memory_shape[0]) - seq_len = input_shape[1] - gate_num = input_shape[2].merge_with(w_m_m_shape[1]) - output_dim = input_shape[3].merge_with(state_shape[1]) - output_dim = output_dim.merge_with(memory_shape[1]) - output_dim = output_dim.merge_with(w_m_m_shape[0]) - output_dim = output_dim.merge_with(w_m_m_shape[2]) - return [[batch_size, seq_len, output_dim], - [batch_size, seq_len, gate_num, output_dim], - [batch_size, seq_len, output_dim]] - - -@tf.RegisterGradient("VariableLSTM") -def _variable_lstm_grad(op, act_grad, gate_grad, mem_grad): - """Gradient function for the VariableLSTM op.""" - initial_state = op.inputs[1] - initial_memory = op.inputs[2] - w_m_m = op.inputs[3] - act = op.outputs[0] - gate_raw_act = op.outputs[1] - memory = op.outputs[2] - return rnn.variable_lstm_grad(initial_state, initial_memory, w_m_m, act, - gate_raw_act, memory, act_grad, gate_grad, - mem_grad) - - -def lstm_layer(inp, - length=None, - state=None, - memory=None, - num_nodes=None, - backward=False, - clip=50.0, - reg_func=tf.nn.l2_loss, - weight_reg=False, - weight_collection="LSTMWeights", - bias_reg=False, - stddev=None, - seed=None, - decode=False, - use_native_weights=False, - name=None): - """Adds ops for an LSTM layer. - - This adds ops for the following operations: - - input => (forward-LSTM|backward-LSTM) => output - - The direction of the LSTM is determined by `backward`. If it is false, the - forward LSTM is used, the backward one otherwise. - - Args: - inp: A 3-D tensor of shape [`batch_size`, `max_length`, `feature_dim`]. - length: A 1-D tensor of shape [`batch_size`] and type int64. Each element - represents the length of the corresponding sequence in `inp`. - state: If specified, uses it as the initial state. - memory: If specified, uses it as the initial memory. - num_nodes: The number of LSTM cells. - backward: If true, reverses the `inp` before adding the ops. The output is - also reversed so that the direction is the same as `inp`. - clip: Value used to clip the cell values. - reg_func: Function used for the weight regularization such as - `tf.nn.l2_loss`. - weight_reg: If true, regularize the filter weights with `reg_func`. - weight_collection: Collection to add the weights to for regularization. - bias_reg: If true, regularize the bias vector with `reg_func`. - stddev: Standard deviation used to initialize the variables. - seed: Seed used to initialize the variables. - decode: If true, does not add ops which are not used for inference. - use_native_weights: If true, uses weights in the same format as the native - implementations. - name: Name of the op. - - Returns: - A 3-D tensor of shape [`batch_size`, `max_length`, `num_nodes`]. - """ - with tf.variable_scope(name): - if backward: - if length is None: - inp = tf.reverse(inp, [1]) - else: - inp = tf.reverse_sequence(inp, length, 1, 0) - - num_prev = inp.get_shape()[2] - if stddev: - initializer = tf.truncated_normal_initializer(stddev=stddev, seed=seed) - else: - initializer = tf.uniform_unit_scaling_initializer(seed=seed) - - if use_native_weights: - with tf.variable_scope("LSTMCell"): - w = tf.get_variable( - "W_0", - shape=[num_prev + num_nodes, 4 * num_nodes], - initializer=initializer, - dtype=tf.float32) - w_i_m = tf.slice(w, [0, 0], [num_prev, 4 * num_nodes], name="w_i_m") - w_m_m = tf.reshape( - tf.slice(w, [num_prev, 0], [num_nodes, 4 * num_nodes]), - [num_nodes, 4, num_nodes], - name="w_m_m") - else: - w_i_m = tf.get_variable("w_i_m", [num_prev, 4 * num_nodes], - initializer=initializer) - w_m_m = tf.get_variable("w_m_m", [num_nodes, 4, num_nodes], - initializer=initializer) - - if not decode and weight_reg: - tf.add_to_collection(weight_collection, reg_func(w_i_m, name="w_i_m_reg")) - tf.add_to_collection(weight_collection, reg_func(w_m_m, name="w_m_m_reg")) - - batch_size = shapes.tensor_dim(inp, dim=0) - num_frames = shapes.tensor_dim(inp, dim=1) - prev = tf.reshape(inp, tf.stack([batch_size * num_frames, num_prev])) - - if use_native_weights: - with tf.variable_scope("LSTMCell"): - b = tf.get_variable( - "B", - shape=[4 * num_nodes], - initializer=tf.zeros_initializer(), - dtype=tf.float32) - biases = tf.identity(b, name="biases") - else: - biases = tf.get_variable( - "biases", [4 * num_nodes], initializer=tf.constant_initializer(0.0)) - if not decode and bias_reg: - tf.add_to_collection( - weight_collection, reg_func( - biases, name="biases_reg")) - prev = tf.nn.xw_plus_b(prev, w_i_m, biases) - - prev = tf.reshape(prev, tf.stack([batch_size, num_frames, 4, num_nodes])) - if state is None: - state = tf.fill(tf.stack([batch_size, num_nodes]), 0.0) - if memory is None: - memory = tf.fill(tf.stack([batch_size, num_nodes]), 0.0) - - out, _, mem = rnn.variable_lstm(prev, state, memory, w_m_m, clip=clip) - - if backward: - if length is None: - out = tf.reverse(out, [1]) - else: - out = tf.reverse_sequence(out, length, 1, 0) - - return out, mem diff --git a/research/street/python/shapes.py b/research/street/python/shapes.py deleted file mode 100644 index 1f56ef05d5c59b6c626eabffa680999a777301fc..0000000000000000000000000000000000000000 --- a/research/street/python/shapes.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Shape manipulation functions. - -rotate_dimensions: prepares for a rotating transpose by returning a rotated - list of dimension indices. -transposing_reshape: allows a dimension to be factorized, with one of the pieces - transferred to another dimension, or to transpose factors within a single - dimension. -tensor_dim: gets a shape dimension as a constant integer if known otherwise a - runtime usable tensor value. -tensor_shape: returns the full shape of a tensor as the tensor_dim. -""" -from six.moves import xrange -import tensorflow as tf - - -def rotate_dimensions(num_dims, src_dim, dest_dim): - """Returns a list of dimension indices that will rotate src_dim to dest_dim. - - src_dim is moved to dest_dim, with all intervening dimensions shifted towards - the hole left by src_dim. Eg: - num_dims = 4, src_dim=3, dest_dim=1 - Returned list=[0, 3, 1, 2] - For a tensor with dims=[5, 4, 3, 2] a transpose would yield [5, 2, 4, 3]. - Args: - num_dims: The number of dimensions to handle. - src_dim: The dimension to move. - dest_dim: The dimension to move src_dim to. - - Returns: - A list of rotated dimension indices. - """ - # List of dimensions for transpose. - dim_list = range(num_dims) - # Shuffle src_dim to dest_dim by swapping to shuffle up the other dims. - step = 1 if dest_dim > src_dim else -1 - for x in xrange(src_dim, dest_dim, step): - dim_list[x], dim_list[x + step] = dim_list[x + step], dim_list[x] - return dim_list - - -def transposing_reshape(tensor, - src_dim, - part_a, - part_b, - dest_dim_a, - dest_dim_b, - name=None): - """Splits src_dim and sends one of the pieces to another dim. - - Terminology: - A matrix is often described as 'row-major' or 'column-major', which doesn't - help if you can't remember which is the row index and which is the column, - even if you know what 'major' means, so here is a simpler explanation of it: - When TF stores a tensor of size [d0, d1, d2, d3] indexed by [i0, i1, i2, i3], - the memory address of an element is calculated using: - ((i0 * d1 + i1) * d2 + i2) * d3 + i3, so, d0 is the MOST SIGNIFICANT dimension - and d3 the LEAST SIGNIFICANT, just like in the decimal number 1234, 1 is the - most significant digit and 4 the least significant. In both cases the most - significant is multiplied by the largest number to determine its 'value'. - Furthermore, if we reshape the tensor to [d0'=d0, d1'=d1 x d2, d2'=d3], then - the MOST SIGNIFICANT part of d1' is d1 and the LEAST SIGNIFICANT part of d1' - is d2. - - Action: - transposing_reshape splits src_dim into factors [part_a, part_b], and sends - the most significant part (of size part_a) to be the most significant part of - dest_dim_a*(Exception: see NOTE 2), and the least significant part (of size - part_b) to be the most significant part of dest_dim_b. - This is basically a combination of reshape, rotating transpose, reshape. - NOTE1: At least one of dest_dim_a and dest_dim_b must equal src_dim, ie one of - the parts always stays put, so src_dim is never totally destroyed and the - output number of dimensions is always the same as the input. - NOTE2: If dest_dim_a == dest_dim_b == src_dim, then parts a and b are simply - transposed within src_dim to become part_b x part_a, so the most significant - part becomes the least significant part and vice versa. Thus if you really - wanted to make one of the parts the least significant side of the destiantion, - the destination dimension can be internally transposed with a second call to - transposing_reshape. - NOTE3: One of part_a and part_b may be -1 to allow src_dim to be of unknown - size with one known-size factor. Otherwise part_a * part_b must equal the size - of src_dim. - NOTE4: The reshape preserves as many known-at-graph-build-time dimension sizes - as are available. - - Example: - Input dims=[5, 2, 6, 2] - tensor=[[[[0, 1][2, 3][4, 5][6, 7][8, 9][10, 11]] - [[12, 13][14, 15][16, 17][18, 19][20, 21][22, 23]] - [[[24, 25]... - src_dim=2, part_a=2, part_b=3, dest_dim_a=3, dest_dim_b=2 - output dims =[5, 2, 3, 4] - output tensor=[[[[0, 1, 6, 7][2, 3, 8, 9][4, 5, 10, 11]] - [[12, 13, 18, 19][14, 15, 20, 21][16, 17, 22, 23]]] - [[[24, 26, 28]... - Example2: - Input dims=[phrases, words, letters]=[2, 6, x] - tensor=[[[the][cat][sat][on][the][mat]] - [[a][stitch][in][time][saves][nine]]] - We can factorize the 6 words into 3x2 = [[the][cat]][[sat][on]][[the][mat]] - or 2x3=[[the][cat][sat]][[on][the][mat]] and - src_dim=1, part_a=3, part_b=2, dest_dim_a=1, dest_dim_b=1 - would yield: - [[[the][sat][the][cat][on][mat]] - [[a][in][saves][stitch][time][nine]]], but - src_dim=1, part_a=2, part_b=3, dest_dim_a=1, dest_dim_b=1 - would yield: - [[[the][on][cat][the][sat][mat]] - [[a][time][stitch][saves][in][nine]]], and - src_dim=1, part_a=2, part_b=3, dest_dim_a=0, dest_dim_b=1 - would yield: - [[[the][cat][sat]] - [[a][stitch][in]] - [[on][the][mat]] - [[time][saves][nine]]] - Now remember that the words above represent any least-significant subset of - the input dimensions. - - Args: - tensor: A tensor to reshape. - src_dim: The dimension to split. - part_a: The first factor of the split. - part_b: The second factor of the split. - dest_dim_a: The dimension to move part_a of src_dim to. - dest_dim_b: The dimension to move part_b of src_dim to. - name: Optional base name for all the ops. - - Returns: - Reshaped tensor. - - Raises: - ValueError: If the args are invalid. - """ - if dest_dim_a != src_dim and dest_dim_b != src_dim: - raise ValueError( - 'At least one of dest_dim_a, dest_dim_b must equal src_dim!') - if part_a == 0 or part_b == 0: - raise ValueError('Zero not allowed for part_a or part_b!') - if part_a < 0 and part_b < 0: - raise ValueError('At least one of part_a and part_b must be positive!') - if not name: - name = 'transposing_reshape' - prev_shape = tensor_shape(tensor) - expanded = tf.reshape( - tensor, - prev_shape[:src_dim] + [part_a, part_b] + prev_shape[src_dim + 1:], - name=name + '_reshape_in') - dest = dest_dim_b - if dest_dim_a != src_dim: - # We are just moving part_a to dest_dim_a. - dest = dest_dim_a - else: - # We are moving part_b to dest_dim_b. - src_dim += 1 - dim_list = rotate_dimensions(len(expanded.get_shape()), src_dim, dest) - expanded = tf.transpose(expanded, dim_list, name=name + '_rot_transpose') - # Reshape identity except dest,dest+1, which get merged. - ex_shape = tensor_shape(expanded) - combined = ex_shape[dest] * ex_shape[dest + 1] - return tf.reshape( - expanded, - ex_shape[:dest] + [combined] + ex_shape[dest + 2:], - name=name + '_reshape_out') - - -def tensor_dim(tensor, dim): - """Returns int dimension if known at a graph build time else a tensor. - - If the size of the dim of tensor is known at graph building time, then that - known value is returned, otherwise (instead of None), a Tensor that will give - the size of the dimension when the graph is run. The return value will be - accepted by tf.reshape in multiple (or even all) dimensions, even when the - sizes are not known at graph building time, unlike -1, which can only be used - in one dimension. It is a bad idea to use tf.shape all the time, as some ops - demand a known (at graph build time) size. This function therefore returns - the best available, most useful dimension size. - Args: - tensor: Input tensor. - dim: Dimension to find the size of. - - Returns: - An integer if shape is known at build time, otherwise a tensor of int32. - """ - result = tensor.get_shape().as_list()[dim] - if result is None: - result = tf.shape(tensor)[dim] - return result - - -def tensor_shape(tensor): - """Returns a heterogeneous list of tensor_dim for the tensor. - - See tensor_dim for a more detailed explanation. - Args: - tensor: Input tensor. - - Returns: - A heterogeneous list of integers and int32 tensors. - """ - result = [] - for d in xrange(len(tensor.get_shape())): - result.append(tensor_dim(tensor, d)) - return result diff --git a/research/street/python/shapes_test.py b/research/street/python/shapes_test.py deleted file mode 100644 index 87b3c737fb0946dc4cf9596d3a04ddab754ca39a..0000000000000000000000000000000000000000 --- a/research/street/python/shapes_test.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for shapes.""" - -import numpy as np -import tensorflow as tf -import shapes - - -def _rand(*size): - return np.random.uniform(size=size).astype('f') - - -class ShapesTest(tf.test.TestCase): - """Tests just the shapes from a call to transposing_reshape.""" - - def __init__(self, other): - super(ShapesTest, self).__init__(other) - self.batch_size = 4 - self.im_height = 24 - self.im_width = 36 - self.depth = 20 - - def testReshapeTile(self): - """Tests that a tiled input can be reshaped to the batch dimension.""" - fake = tf.placeholder( - tf.float32, shape=(None, None, None, self.depth), name='inputs') - real = _rand(self.batch_size, self.im_height, self.im_width, self.depth) - with self.test_session() as sess: - outputs = shapes.transposing_reshape( - fake, src_dim=2, part_a=3, part_b=-1, dest_dim_a=0, dest_dim_b=2) - res_image = sess.run([outputs], feed_dict={fake: real}) - self.assertEqual( - tuple(res_image[0].shape), - (self.batch_size * 3, self.im_height, self.im_width / 3, self.depth)) - - def testReshapeDepth(self): - """Tests that depth can be reshaped to the x dimension.""" - fake = tf.placeholder( - tf.float32, shape=(None, None, None, self.depth), name='inputs') - real = _rand(self.batch_size, self.im_height, self.im_width, self.depth) - with self.test_session() as sess: - outputs = shapes.transposing_reshape( - fake, src_dim=3, part_a=4, part_b=-1, dest_dim_a=2, dest_dim_b=3) - res_image = sess.run([outputs], feed_dict={fake: real}) - self.assertEqual( - tuple(res_image[0].shape), - (self.batch_size, self.im_height, self.im_width * 4, self.depth / 4)) - - -class DataTest(tf.test.TestCase): - """Tests that the data is moved correctly in a call to transposing_reshape. - - """ - - def testTransposingReshape_2_2_3_2_1(self): - """Case: dest_a == src, dest_b < src: Split with Least sig part going left. - """ - with self.test_session() as sess: - fake = tf.placeholder( - tf.float32, shape=(None, None, None, 2), name='inputs') - outputs = shapes.transposing_reshape( - fake, src_dim=2, part_a=2, part_b=3, dest_dim_a=2, dest_dim_b=1) - # Make real inputs. The tensor looks like this: - # tensor=[[[[0, 1][2, 3][4, 5][6, 7][8, 9][10, 11]] - # [[12, 13][14, 15][16, 17][18, 19][20, 21][22, 23]] - # [[[24, 25]... - real = np.arange(120).reshape((5, 2, 6, 2)) - np_array = sess.run([outputs], feed_dict={fake: real})[0] - self.assertEqual(tuple(np_array.shape), (5, 6, 2, 2)) - self.assertAllEqual(np_array[0, :, :, :], - [[[0, 1], [6, 7]], [[12, 13], [18, 19]], - [[2, 3], [8, 9]], [[14, 15], [20, 21]], - [[4, 5], [10, 11]], [[16, 17], [22, 23]]]) - - def testTransposingReshape_2_2_3_2_3(self): - """Case: dest_a == src, dest_b > src: Split with Least sig part going right. - """ - with self.test_session() as sess: - fake = tf.placeholder( - tf.float32, shape=(None, None, None, 2), name='inputs') - outputs = shapes.transposing_reshape( - fake, src_dim=2, part_a=2, part_b=3, dest_dim_a=2, dest_dim_b=3) - # Make real inputs. The tensor looks like this: - # tensor=[[[[0, 1][2, 3][4, 5][6, 7][8, 9][10, 11]] - # [[12, 13][14, 15][16, 17][18, 19][20, 21][22, 23]] - # [[[24, 25]... - real = np.arange(120).reshape((5, 2, 6, 2)) - np_array = sess.run([outputs], feed_dict={fake: real})[0] - self.assertEqual(tuple(np_array.shape), (5, 2, 2, 6)) - self.assertAllEqual( - np_array[0, :, :, :], - [[[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]], - [[12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]]]) - - def testTransposingReshape_2_2_3_2_2(self): - """Case: dest_a == src, dest_b == src. Transpose within dimension 2. - """ - with self.test_session() as sess: - fake = tf.placeholder( - tf.float32, shape=(None, None, None, 2), name='inputs') - outputs = shapes.transposing_reshape( - fake, src_dim=2, part_a=2, part_b=3, dest_dim_a=2, dest_dim_b=2) - # Make real inputs. The tensor looks like this: - # tensor=[[[[0, 1][2, 3][4, 5][6, 7][8, 9][10, 11]] - # [[12, 13][14, 15][16, 17][18, 19][20, 21][22, 23]] - # [[[24, 25]... - real = np.arange(120).reshape((5, 2, 6, 2)) - np_array = sess.run([outputs], feed_dict={fake: real})[0] - self.assertEqual(tuple(np_array.shape), (5, 2, 6, 2)) - self.assertAllEqual( - np_array[0, :, :, :], - [[[0, 1], [6, 7], [2, 3], [8, 9], [4, 5], [10, 11]], - [[12, 13], [18, 19], [14, 15], [20, 21], [16, 17], [22, 23]]]) - - def testTransposingReshape_2_2_3_1_2(self): - """Case: dest_a < src, dest_b == src. Split with Most sig part going left. - """ - with self.test_session() as sess: - fake = tf.placeholder( - tf.float32, shape=(None, None, None, 2), name='inputs') - outputs = shapes.transposing_reshape( - fake, src_dim=2, part_a=2, part_b=3, dest_dim_a=1, dest_dim_b=2) - # Make real inputs. The tensor looks like this: - # tensor=[[[[0, 1][2, 3][4, 5][6, 7][8, 9][10, 11]] - # [[12, 13][14, 15][16, 17][18, 19][20, 21][22, 23]] - # [[[24, 25]... - real = np.arange(120).reshape((5, 2, 6, 2)) - np_array = sess.run([outputs], feed_dict={fake: real})[0] - self.assertEqual(tuple(np_array.shape), (5, 4, 3, 2)) - self.assertAllEqual(np_array[0, :, :, :], - [[[0, 1], [2, 3], [4, 5]], - [[12, 13], [14, 15], [16, 17]], - [[6, 7], [8, 9], [10, 11]], - [[18, 19], [20, 21], [22, 23]]]) - - def testTransposingReshape_2_2_3_3_2(self): - """Case: dest_a < src, dest_b == src. Split with Most sig part going right. - """ - with self.test_session() as sess: - fake = tf.placeholder( - tf.float32, shape=(None, None, None, 2), name='inputs') - outputs = shapes.transposing_reshape( - fake, src_dim=2, part_a=2, part_b=3, dest_dim_a=3, dest_dim_b=2) - # Make real inputs. The tensor looks like this: - # tensor=[[[[0, 1][2, 3][4, 5][6, 7][8, 9][10, 11]] - # [[12, 13][14, 15][16, 17][18, 19][20, 21][22, 23]] - # [[[24, 25]... - real = np.arange(120).reshape((5, 2, 6, 2)) - np_array = sess.run([outputs], feed_dict={fake: real})[0] - self.assertEqual(tuple(np_array.shape), (5, 2, 3, 4)) - self.assertAllEqual( - np_array[0, :, :, :], - [[[0, 1, 6, 7], [2, 3, 8, 9], [4, 5, 10, 11]], - [[12, 13, 18, 19], [14, 15, 20, 21], [16, 17, 22, 23]]]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/street/python/vgsl_eval.py b/research/street/python/vgsl_eval.py deleted file mode 100644 index 7db00d6f067b24f9222a1d490f817116c5121726..0000000000000000000000000000000000000000 --- a/research/street/python/vgsl_eval.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Model eval separate from training.""" -from tensorflow import app -from tensorflow.python.platform import flags - -import vgsl_model - -flags.DEFINE_string('eval_dir', '/tmp/mdir/eval', - 'Directory where to write event logs.') -flags.DEFINE_string('graph_def_file', None, - 'Output eval graph definition file.') -flags.DEFINE_string('train_dir', '/tmp/mdir', - 'Directory where to find training checkpoints.') -flags.DEFINE_string('model_str', - '1,150,600,3[S2(4x150)0,2 Ct5,5,16 Mp2,2 Ct5,5,64 Mp3,3' - '([Lrys64 Lbx128][Lbys64 Lbx128][Lfys64 Lbx128])S3(3x0)2,3' - 'Lfx128 Lrx128 S0(1x4)0,3 Do Lfx256]O1c134', - 'Network description.') -flags.DEFINE_integer('num_steps', 1000, 'Number of steps to run evaluation.') -flags.DEFINE_integer('eval_interval_secs', 60, - 'Time interval between eval runs.') -flags.DEFINE_string('eval_data', None, 'Evaluation data filepattern') -flags.DEFINE_string('decoder', None, 'Charset decoder') - -FLAGS = flags.FLAGS - - -def main(argv): - del argv - vgsl_model.Eval(FLAGS.train_dir, FLAGS.eval_dir, FLAGS.model_str, - FLAGS.eval_data, FLAGS.decoder, FLAGS.num_steps, - FLAGS.graph_def_file, FLAGS.eval_interval_secs) - - -if __name__ == '__main__': - app.run() diff --git a/research/street/python/vgsl_input.py b/research/street/python/vgsl_input.py deleted file mode 100644 index e4495c680aa7c757d87e6cfe2fefc1e62bc7ae6f..0000000000000000000000000000000000000000 --- a/research/street/python/vgsl_input.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""String network description language to define network layouts.""" -import collections -import tensorflow as tf -from tensorflow.python.ops import parsing_ops - -# Named tuple for the standard tf image tensor Shape. -# batch_size: Number of images to batch-up for training. -# height: Fixed height of image or None for variable. -# width: Fixed width of image or None for variable. -# depth: Desired depth in bytes per pixel of input images. -ImageShape = collections.namedtuple('ImageTensorDims', - ['batch_size', 'height', 'width', 'depth']) - - -def ImageInput(input_pattern, num_threads, shape, using_ctc, reader=None): - """Creates an input image tensor from the input_pattern filenames. - - TODO(rays) Expand for 2-d labels, 0-d labels, and logistic targets. - Args: - input_pattern: Filenames of the dataset(s) to read. - num_threads: Number of preprocessing threads. - shape: ImageShape with the desired shape of the input. - using_ctc: Take the unpadded_class labels instead of padded. - reader: Function that returns an actual reader to read Examples from - input files. If None, uses tf.TFRecordReader(). - Returns: - images: Float Tensor containing the input image scaled to [-1.28, 1.27]. - heights: Tensor int64 containing the heights of the images. - widths: Tensor int64 containing the widths of the images. - labels: Serialized SparseTensor containing the int64 labels. - sparse_labels: Serialized SparseTensor containing the int64 labels. - truths: Tensor string of the utf8 truth texts. - Raises: - ValueError: if the optimizer type is unrecognized. - """ - data_files = tf.gfile.Glob(input_pattern) - assert data_files, 'no files found for dataset ' + input_pattern - queue_capacity = shape.batch_size * num_threads * 2 - filename_queue = tf.train.string_input_producer( - data_files, capacity=queue_capacity) - - # Create a subgraph with its own reader (but sharing the - # filename_queue) for each preprocessing thread. - images_and_label_lists = [] - for _ in range(num_threads): - image, height, width, labels, text = _ReadExamples(filename_queue, shape, - using_ctc, reader) - images_and_label_lists.append([image, height, width, labels, text]) - # Create a queue that produces the examples in batches. - images, heights, widths, labels, truths = tf.train.batch_join( - images_and_label_lists, - batch_size=shape.batch_size, - capacity=16 * shape.batch_size, - dynamic_pad=True) - # Deserialize back to sparse, because the batcher doesn't do sparse. - labels = tf.deserialize_many_sparse(labels, tf.int64) - sparse_labels = tf.cast(labels, tf.int32) - labels = tf.sparse_tensor_to_dense(labels) - labels = tf.reshape(labels, [shape.batch_size, -1], name='Labels') - # Crush the other shapes to just the batch dimension. - heights = tf.reshape(heights, [-1], name='Heights') - widths = tf.reshape(widths, [-1], name='Widths') - truths = tf.reshape(truths, [-1], name='Truths') - # Give the images a nice name as well. - images = tf.identity(images, name='Images') - - tf.summary.image('Images', images) - return images, heights, widths, labels, sparse_labels, truths - - -def _ReadExamples(filename_queue, shape, using_ctc, reader=None): - """Builds network input tensor ops for TF Example. - - Args: - filename_queue: Queue of filenames, from tf.train.string_input_producer - shape: ImageShape with the desired shape of the input. - using_ctc: Take the unpadded_class labels instead of padded. - reader: Function that returns an actual reader to read Examples from - input files. If None, uses tf.TFRecordReader(). - Returns: - image: Float Tensor containing the input image scaled to [-1.28, 1.27]. - height: Tensor int64 containing the height of the image. - width: Tensor int64 containing the width of the image. - labels: Serialized SparseTensor containing the int64 labels. - text: Tensor string of the utf8 truth text. - """ - if reader: - reader = reader() - else: - reader = tf.TFRecordReader() - _, example_serialized = reader.read(filename_queue) - example_serialized = tf.reshape(example_serialized, shape=[]) - features = tf.parse_single_example( - example_serialized, - {'image/encoded': parsing_ops.FixedLenFeature( - [1], dtype=tf.string, default_value=''), - 'image/text': parsing_ops.FixedLenFeature( - [1], dtype=tf.string, default_value=''), - 'image/class': parsing_ops.VarLenFeature(dtype=tf.int64), - 'image/unpadded_class': parsing_ops.VarLenFeature(dtype=tf.int64), - 'image/height': parsing_ops.FixedLenFeature( - [1], dtype=tf.int64, default_value=1), - 'image/width': parsing_ops.FixedLenFeature( - [1], dtype=tf.int64, default_value=1)}) - if using_ctc: - labels = features['image/unpadded_class'] - else: - labels = features['image/class'] - labels = tf.serialize_sparse(labels) - image = tf.reshape(features['image/encoded'], shape=[], name='encoded') - image = _ImageProcessing(image, shape) - height = tf.reshape(features['image/height'], [-1]) - width = tf.reshape(features['image/width'], [-1]) - text = tf.reshape(features['image/text'], shape=[]) - - return image, height, width, labels, text - - -def _ImageProcessing(image_buffer, shape): - """Convert a PNG string into an input tensor. - - We allow for fixed and variable sizes. - Does fixed conversion to floats in the range [-1.28, 1.27]. - Args: - image_buffer: Tensor containing a PNG encoded image. - shape: ImageShape with the desired shape of the input. - Returns: - image: Decoded, normalized image in the range [-1.28, 1.27]. - """ - image = tf.image.decode_png(image_buffer, channels=shape.depth) - image.set_shape([shape.height, shape.width, shape.depth]) - image = tf.cast(image, tf.float32) - image = tf.subtract(image, 128.0) - image = tf.multiply(image, 1 / 100.0) - return image diff --git a/research/street/python/vgsl_model.py b/research/street/python/vgsl_model.py deleted file mode 100644 index 7533cd8d5bb781102e9e9b58c4a3259b79404fd2..0000000000000000000000000000000000000000 --- a/research/street/python/vgsl_model.py +++ /dev/null @@ -1,601 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""String network description language to define network layouts.""" -from __future__ import print_function - -import re -import time - -import decoder -import errorcounter as ec -import shapes -import tensorflow as tf -import vgsl_input -import vgslspecs -import tensorflow.contrib.slim as slim -from tensorflow.core.framework import summary_pb2 -from tensorflow.python.platform import tf_logging as logging - - -# Parameters for rate decay. -# We divide the learning_rate_halflife by DECAY_STEPS_FACTOR and use DECAY_RATE -# as the decay factor for the learning rate, ie we use the DECAY_STEPS_FACTORth -# root of 2 as the decay rate every halflife/DECAY_STEPS_FACTOR to achieve the -# desired halflife. -DECAY_STEPS_FACTOR = 16 -DECAY_RATE = pow(0.5, 1.0 / DECAY_STEPS_FACTOR) - - -def Train(train_dir, - model_str, - train_data, - max_steps, - master='', - task=0, - ps_tasks=0, - initial_learning_rate=0.001, - final_learning_rate=0.001, - learning_rate_halflife=160000, - optimizer_type='Adam', - num_preprocess_threads=1, - reader=None): - """Testable trainer with no dependence on FLAGS. - - Args: - train_dir: Directory to write checkpoints. - model_str: Network specification string. - train_data: Training data file pattern. - max_steps: Number of training steps to run. - master: Name of the TensorFlow master to use. - task: Task id of this replica running the training. (0 will be master). - ps_tasks: Number of tasks in ps job, or 0 if no ps job. - initial_learning_rate: Learing rate at start of training. - final_learning_rate: Asymptotic minimum learning rate. - learning_rate_halflife: Number of steps over which to halve the difference - between initial and final learning rate. - optimizer_type: One of 'GradientDescent', 'AdaGrad', 'Momentum', 'Adam'. - num_preprocess_threads: Number of input threads. - reader: Function that returns an actual reader to read Examples from input - files. If None, uses tf.TFRecordReader(). - """ - if master.startswith('local'): - device = tf.ReplicaDeviceSetter(ps_tasks) - else: - device = '/cpu:0' - with tf.Graph().as_default(): - with tf.device(device): - model = InitNetwork(train_data, model_str, 'train', initial_learning_rate, - final_learning_rate, learning_rate_halflife, - optimizer_type, num_preprocess_threads, reader) - - # Create a Supervisor. It will take care of initialization, summaries, - # checkpoints, and recovery. - # - # When multiple replicas of this program are running, the first one, - # identified by --task=0 is the 'chief' supervisor. It is the only one - # that takes case of initialization, etc. - sv = tf.train.Supervisor( - logdir=train_dir, - is_chief=(task == 0), - saver=model.saver, - save_summaries_secs=10, - save_model_secs=30, - recovery_wait_secs=5) - - step = 0 - while step < max_steps: - try: - # Get an initialized, and possibly recovered session. Launch the - # services: Checkpointing, Summaries, step counting. - with sv.managed_session(master) as sess: - while step < max_steps: - _, step = model.TrainAStep(sess) - if sv.coord.should_stop(): - break - except tf.errors.AbortedError as e: - logging.error('Received error:%s', e) - continue - - -def Eval(train_dir, - eval_dir, - model_str, - eval_data, - decoder_file, - num_steps, - graph_def_file=None, - eval_interval_secs=0, - reader=None): - """Restores a model from a checkpoint and evaluates it. - - Args: - train_dir: Directory to find checkpoints. - eval_dir: Directory to write summary events. - model_str: Network specification string. - eval_data: Evaluation data file pattern. - decoder_file: File to read to decode the labels. - num_steps: Number of eval steps to run. - graph_def_file: File to write graph definition to for freezing. - eval_interval_secs: How often to run evaluations, or once if 0. - reader: Function that returns an actual reader to read Examples from input - files. If None, uses tf.TFRecordReader(). - Returns: - (char error rate, word recall error rate, sequence error rate) as percent. - Raises: - ValueError: If unimplemented feature is used. - """ - decode = None - if decoder_file: - decode = decoder.Decoder(decoder_file) - - # Run eval. - rates = ec.ErrorRates( - label_error=None, - word_recall_error=None, - word_precision_error=None, - sequence_error=None) - with tf.Graph().as_default(): - model = InitNetwork(eval_data, model_str, 'eval', reader=reader) - sw = tf.summary.FileWriter(eval_dir) - - while True: - sess = tf.Session('') - if graph_def_file is not None: - # Write the eval version of the graph to a file for freezing. - if not tf.gfile.Exists(graph_def_file): - with tf.gfile.FastGFile(graph_def_file, 'w') as f: - f.write( - sess.graph.as_graph_def(add_shapes=True).SerializeToString()) - ckpt = tf.train.get_checkpoint_state(train_dir) - if ckpt and ckpt.model_checkpoint_path: - step = model.Restore(ckpt.model_checkpoint_path, sess) - if decode: - rates = decode.SoftmaxEval(sess, model, num_steps) - _AddRateToSummary('Label error rate', rates.label_error, step, sw) - _AddRateToSummary('Word recall error rate', rates.word_recall_error, - step, sw) - _AddRateToSummary('Word precision error rate', - rates.word_precision_error, step, sw) - _AddRateToSummary('Sequence error rate', rates.sequence_error, step, - sw) - sw.flush() - print('Error rates=', rates) - else: - raise ValueError('Non-softmax decoder evaluation not implemented!') - if eval_interval_secs: - time.sleep(eval_interval_secs) - else: - break - return rates - - -def InitNetwork(input_pattern, - model_spec, - mode='eval', - initial_learning_rate=0.00005, - final_learning_rate=0.00005, - halflife=1600000, - optimizer_type='Adam', - num_preprocess_threads=1, - reader=None): - """Constructs a python tensor flow model defined by model_spec. - - Args: - input_pattern: File pattern of the data in tfrecords of Example. - model_spec: Concatenation of input spec, model spec and output spec. - See Build below for input/output spec. For model spec, see vgslspecs.py - mode: One of 'train', 'eval' - initial_learning_rate: Initial learning rate for the network. - final_learning_rate: Final learning rate for the network. - halflife: Number of steps over which to halve the difference between - initial and final learning rate for the network. - optimizer_type: One of 'GradientDescent', 'AdaGrad', 'Momentum', 'Adam'. - num_preprocess_threads: Number of threads to use for image processing. - reader: Function that returns an actual reader to read Examples from input - files. If None, uses tf.TFRecordReader(). - Eval tasks need only specify input_pattern and model_spec. - - Returns: - A VGSLImageModel class. - - Raises: - ValueError: if the model spec syntax is incorrect. - """ - model = VGSLImageModel(mode, model_spec, initial_learning_rate, - final_learning_rate, halflife) - left_bracket = model_spec.find('[') - right_bracket = model_spec.rfind(']') - if left_bracket < 0 or right_bracket < 0: - raise ValueError('Failed to find [] in model spec! ', model_spec) - input_spec = model_spec[:left_bracket] - layer_spec = model_spec[left_bracket:right_bracket + 1] - output_spec = model_spec[right_bracket + 1:] - model.Build(input_pattern, input_spec, layer_spec, output_spec, - optimizer_type, num_preprocess_threads, reader) - return model - - -class VGSLImageModel(object): - """Class that builds a tensor flow model for training or evaluation. - """ - - def __init__(self, mode, model_spec, initial_learning_rate, - final_learning_rate, halflife): - """Constructs a VGSLImageModel. - - Args: - mode: One of "train", "eval" - model_spec: Full model specification string, for reference only. - initial_learning_rate: Initial learning rate for the network. - final_learning_rate: Final learning rate for the network. - halflife: Number of steps over which to halve the difference between - initial and final learning rate for the network. - """ - # The string that was used to build this model. - self.model_spec = model_spec - # The layers between input and output. - self.layers = None - # The train/eval mode. - self.mode = mode - # The initial learning rate. - self.initial_learning_rate = initial_learning_rate - self.final_learning_rate = final_learning_rate - self.decay_steps = halflife / DECAY_STEPS_FACTOR - self.decay_rate = DECAY_RATE - # Tensor for the labels. - self.labels = None - self.sparse_labels = None - # Debug data containing the truth text. - self.truths = None - # Tensor for loss - self.loss = None - # Train operation - self.train_op = None - # Tensor for the global step counter - self.global_step = None - # Tensor for the output predictions (usually softmax) - self.output = None - # True if we are using CTC training mode. - self.using_ctc = False - # Saver object to load or restore the variables. - self.saver = None - - def Build(self, input_pattern, input_spec, model_spec, output_spec, - optimizer_type, num_preprocess_threads, reader): - """Builds the model from the separate input/layers/output spec strings. - - Args: - input_pattern: File pattern of the data in tfrecords of TF Example format. - input_spec: Specification of the input layer: - batchsize,height,width,depth (4 comma-separated integers) - Training will run with batches of batchsize images, but runtime can - use any batch size. - height and/or width can be 0 or -1, indicating variable size, - otherwise all images must be the given size. - depth must be 1 or 3 to indicate greyscale or color. - NOTE 1-d image input, treating the y image dimension as depth, can - be achieved using S1(1x0)1,3 as the first op in the model_spec, but - the y-size of the input must then be fixed. - model_spec: Model definition. See vgslspecs.py - output_spec: Output layer definition: - O(2|1|0)(l|s|c)n output layer with n classes. - 2 (heatmap) Output is a 2-d vector map of the input (possibly at - different scale). - 1 (sequence) Output is a 1-d sequence of vector values. - 0 (value) Output is a 0-d single vector value. - l uses a logistic non-linearity on the output, allowing multiple - hot elements in any output vector value. - s uses a softmax non-linearity, with one-hot output in each value. - c uses a softmax with CTC. Can only be used with s (sequence). - NOTE Only O1s and O1c are currently supported. - optimizer_type: One of 'GradientDescent', 'AdaGrad', 'Momentum', 'Adam'. - num_preprocess_threads: Number of threads to use for image processing. - reader: Function that returns an actual reader to read Examples from input - files. If None, uses tf.TFRecordReader(). - """ - self.global_step = tf.Variable(0, name='global_step', trainable=False) - shape = _ParseInputSpec(input_spec) - out_dims, out_func, num_classes = _ParseOutputSpec(output_spec) - self.using_ctc = out_func == 'c' - images, heights, widths, labels, sparse, _ = vgsl_input.ImageInput( - input_pattern, num_preprocess_threads, shape, self.using_ctc, reader) - self.labels = labels - self.sparse_labels = sparse - self.layers = vgslspecs.VGSLSpecs(widths, heights, self.mode == 'train') - last_layer = self.layers.Build(images, model_spec) - self._AddOutputs(last_layer, out_dims, out_func, num_classes) - if self.mode == 'train': - self._AddOptimizer(optimizer_type) - - # For saving the model across training and evaluation - self.saver = tf.train.Saver() - - def TrainAStep(self, sess): - """Runs a training step in the session. - - Args: - sess: Session in which to train the model. - Returns: - loss, global_step. - """ - _, loss, step = sess.run([self.train_op, self.loss, self.global_step]) - return loss, step - - def Restore(self, checkpoint_path, sess): - """Restores the model from the given checkpoint path into the session. - - Args: - checkpoint_path: File pathname of the checkpoint. - sess: Session in which to restore the model. - Returns: - global_step of the model. - """ - self.saver.restore(sess, checkpoint_path) - return tf.train.global_step(sess, self.global_step) - - def RunAStep(self, sess): - """Runs a step for eval in the session. - - Args: - sess: Session in which to run the model. - Returns: - output tensor result, labels tensor result. - """ - return sess.run([self.output, self.labels]) - - def _AddOutputs(self, prev_layer, out_dims, out_func, num_classes): - """Adds the output layer and loss function. - - Args: - prev_layer: Output of last layer of main network. - out_dims: Number of output dimensions, 0, 1 or 2. - out_func: Output non-linearity. 's' or 'c'=softmax, 'l'=logistic. - num_classes: Number of outputs/size of last output dimension. - """ - height_in = shapes.tensor_dim(prev_layer, dim=1) - logits, outputs = self._AddOutputLayer(prev_layer, out_dims, out_func, - num_classes) - if self.mode == 'train': - # Setup loss for training. - self.loss = self._AddLossFunction(logits, height_in, out_dims, out_func) - tf.summary.scalar('loss', self.loss) - elif out_dims == 0: - # Be sure the labels match the output, even in eval mode. - self.labels = tf.slice(self.labels, [0, 0], [-1, 1]) - self.labels = tf.reshape(self.labels, [-1]) - - logging.info('Final output=%s', outputs) - logging.info('Labels tensor=%s', self.labels) - self.output = outputs - - def _AddOutputLayer(self, prev_layer, out_dims, out_func, num_classes): - """Add the fully-connected logits and SoftMax/Logistic output Layer. - - Args: - prev_layer: Output of last layer of main network. - out_dims: Number of output dimensions, 0, 1 or 2. - out_func: Output non-linearity. 's' or 'c'=softmax, 'l'=logistic. - num_classes: Number of outputs/size of last output dimension. - - Returns: - logits: Pre-softmax/logistic fully-connected output shaped to out_dims. - outputs: Post-softmax/logistic shaped to out_dims. - - Raises: - ValueError: if syntax is incorrect. - """ - # Reduce dimensionality appropriate to the output dimensions. - batch_in = shapes.tensor_dim(prev_layer, dim=0) - height_in = shapes.tensor_dim(prev_layer, dim=1) - width_in = shapes.tensor_dim(prev_layer, dim=2) - depth_in = shapes.tensor_dim(prev_layer, dim=3) - if out_dims: - # Combine any remaining height and width with batch and unpack after. - shaped = tf.reshape(prev_layer, [-1, depth_in]) - else: - # Everything except batch goes to depth, and therefore has to be known. - shaped = tf.reshape(prev_layer, [-1, height_in * width_in * depth_in]) - logits = slim.fully_connected(shaped, num_classes, activation_fn=None) - if out_func == 'l': - raise ValueError('Logistic not yet supported!') - else: - output = tf.nn.softmax(logits) - # Reshape to the dessired output. - if out_dims == 2: - output_shape = [batch_in, height_in, width_in, num_classes] - elif out_dims == 1: - output_shape = [batch_in, height_in * width_in, num_classes] - else: - output_shape = [batch_in, num_classes] - output = tf.reshape(output, output_shape, name='Output') - logits = tf.reshape(logits, output_shape) - return logits, output - - def _AddLossFunction(self, logits, height_in, out_dims, out_func): - """Add the appropriate loss function. - - Args: - logits: Pre-softmax/logistic fully-connected output shaped to out_dims. - height_in: Height of logits before going into the softmax layer. - out_dims: Number of output dimensions, 0, 1 or 2. - out_func: Output non-linearity. 's' or 'c'=softmax, 'l'=logistic. - - Returns: - loss: That which is to be minimized. - - Raises: - ValueError: if logistic is used. - """ - if out_func == 'c': - # Transpose batch to the middle. - ctc_input = tf.transpose(logits, [1, 0, 2]) - # Compute the widths of each batch element from the input widths. - widths = self.layers.GetLengths(dim=2, factor=height_in) - cross_entropy = tf.nn.ctc_loss(ctc_input, self.sparse_labels, widths) - elif out_func == 's': - if out_dims == 2: - self.labels = _PadLabels3d(logits, self.labels) - elif out_dims == 1: - self.labels = _PadLabels2d( - shapes.tensor_dim( - logits, dim=1), self.labels) - else: - self.labels = tf.slice(self.labels, [0, 0], [-1, 1]) - self.labels = tf.reshape(self.labels, [-1]) - cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=self.labels, name='xent') - else: - # TODO(rays) Labels need an extra dimension for logistic, so different - # padding functions are needed, as well as a different loss function. - raise ValueError('Logistic not yet supported!') - return tf.reduce_sum(cross_entropy) - - def _AddOptimizer(self, optimizer_type): - """Adds an optimizer with learning rate decay to minimize self.loss. - - Args: - optimizer_type: One of 'GradientDescent', 'AdaGrad', 'Momentum', 'Adam'. - Raises: - ValueError: if the optimizer type is unrecognized. - """ - learn_rate_delta = self.initial_learning_rate - self.final_learning_rate - learn_rate_dec = tf.add( - tf.train.exponential_decay(learn_rate_delta, self.global_step, - self.decay_steps, self.decay_rate), - self.final_learning_rate) - if optimizer_type == 'GradientDescent': - opt = tf.train.GradientDescentOptimizer(learn_rate_dec) - elif optimizer_type == 'AdaGrad': - opt = tf.train.AdagradOptimizer(learn_rate_dec) - elif optimizer_type == 'Momentum': - opt = tf.train.MomentumOptimizer(learn_rate_dec, momentum=0.9) - elif optimizer_type == 'Adam': - opt = tf.train.AdamOptimizer(learning_rate=learn_rate_dec) - else: - raise ValueError('Invalid optimizer type: ' + optimizer_type) - tf.summary.scalar('learn_rate', learn_rate_dec) - - self.train_op = opt.minimize( - self.loss, global_step=self.global_step, name='train') - - -def _PadLabels3d(logits, labels): - """Pads or slices 3-d labels to match logits. - - Covers the case of 2-d softmax output, when labels is [batch, height, width] - and logits is [batch, height, width, onehot] - Args: - logits: 4-d Pre-softmax fully-connected output. - labels: 3-d, but not necessarily matching in size. - - Returns: - labels: Resized by padding or clipping to match logits. - """ - logits_shape = shapes.tensor_shape(logits) - labels_shape = shapes.tensor_shape(labels) - labels = tf.reshape(labels, [-1, labels_shape[2]]) - labels = _PadLabels2d(logits_shape[2], labels) - labels = tf.reshape(labels, [labels_shape[0], -1]) - labels = _PadLabels2d(logits_shape[1] * logits_shape[2], labels) - return tf.reshape(labels, [labels_shape[0], logits_shape[1], logits_shape[2]]) - - -def _PadLabels2d(logits_size, labels): - """Pads or slices the 2nd dimension of 2-d labels to match logits_size. - - Covers the case of 1-d softmax output, when labels is [batch, seq] and - logits is [batch, seq, onehot] - Args: - logits_size: Tensor returned from tf.shape giving the target size. - labels: 2-d, but not necessarily matching in size. - - Returns: - labels: Resized by padding or clipping the last dimension to logits_size. - """ - pad = logits_size - tf.shape(labels)[1] - - def _PadFn(): - return tf.pad(labels, [[0, 0], [0, pad]]) - - def _SliceFn(): - return tf.slice(labels, [0, 0], [-1, logits_size]) - - return tf.cond(tf.greater(pad, 0), _PadFn, _SliceFn) - - -def _ParseInputSpec(input_spec): - """Parses input_spec and returns the numbers obtained therefrom. - - Args: - input_spec: Specification of the input layer. See Build. - - Returns: - shape: ImageShape with the desired shape of the input. - - Raises: - ValueError: if syntax is incorrect. - """ - pattern = re.compile(R'(\d+),(\d+),(\d+),(\d+)') - m = pattern.match(input_spec) - if m is None: - raise ValueError('Failed to parse input spec:' + input_spec) - batch_size = int(m.group(1)) - y_size = int(m.group(2)) if int(m.group(2)) > 0 else None - x_size = int(m.group(3)) if int(m.group(3)) > 0 else None - depth = int(m.group(4)) - if depth not in [1, 3]: - raise ValueError('Depth must be 1 or 3, had:', depth) - return vgsl_input.ImageShape(batch_size, y_size, x_size, depth) - - -def _ParseOutputSpec(output_spec): - """Parses the output spec. - - Args: - output_spec: Output layer definition. See Build. - - Returns: - out_dims: 2|1|0 for 2-d, 1-d, 0-d. - out_func: l|s|c for logistic, softmax, softmax+CTC - num_classes: Number of classes in output. - - Raises: - ValueError: if syntax is incorrect. - """ - pattern = re.compile(R'(O)(0|1|2)(l|s|c)(\d+)') - m = pattern.match(output_spec) - if m is None: - raise ValueError('Failed to parse output spec:' + output_spec) - out_dims = int(m.group(2)) - out_func = m.group(3) - if out_func == 'c' and out_dims != 1: - raise ValueError('CTC can only be used with a 1-D sequence!') - num_classes = int(m.group(4)) - return out_dims, out_func, num_classes - - -def _AddRateToSummary(tag, rate, step, sw): - """Adds the given rate to the summary with the given tag. - - Args: - tag: Name for this value. - rate: Value to add to the summary. Perhaps an error rate. - step: Global step of the graph for the x-coordinate of the summary. - sw: Summary writer to which to write the rate value. - """ - sw.add_summary( - summary_pb2.Summary(value=[summary_pb2.Summary.Value( - tag=tag, simple_value=rate)]), step) diff --git a/research/street/python/vgsl_model_test.py b/research/street/python/vgsl_model_test.py deleted file mode 100644 index fd23961592a77136539f8110f82f1ada64a5eacf..0000000000000000000000000000000000000000 --- a/research/street/python/vgsl_model_test.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for vgsl_model.""" -import os - -import numpy as np -import tensorflow as tf -import vgsl_input -import vgsl_model - - -def _testdata(filename): - return os.path.join('../testdata/', filename) - - -def _rand(*size): - return np.random.uniform(size=size).astype('f') - - -class VgslModelTest(tf.test.TestCase): - - def testParseInputSpec(self): - """The parser must return the numbers in the correct order. - """ - shape = vgsl_model._ParseInputSpec(input_spec='32,42,256,3') - self.assertEqual( - shape, - vgsl_input.ImageShape( - batch_size=32, height=42, width=256, depth=3)) - # Nones must be inserted for zero sizes. - shape = vgsl_model._ParseInputSpec(input_spec='1,0,0,3') - self.assertEqual( - shape, - vgsl_input.ImageShape( - batch_size=1, height=None, width=None, depth=3)) - - def testParseOutputSpec(self): - """The parser must return the correct args in the correct order. - """ - out_dims, out_func, num_classes = vgsl_model._ParseOutputSpec( - output_spec='O1c142') - self.assertEqual(out_dims, 1) - self.assertEqual(out_func, 'c') - self.assertEqual(num_classes, 142) - out_dims, out_func, num_classes = vgsl_model._ParseOutputSpec( - output_spec='O2s99') - self.assertEqual(out_dims, 2) - self.assertEqual(out_func, 's') - self.assertEqual(num_classes, 99) - out_dims, out_func, num_classes = vgsl_model._ParseOutputSpec( - output_spec='O0l12') - self.assertEqual(out_dims, 0) - self.assertEqual(out_func, 'l') - self.assertEqual(num_classes, 12) - - def testPadLabels2d(self): - """Must pad timesteps in labels to match logits. - """ - with self.test_session() as sess: - # Make placeholders for logits and labels. - ph_logits = tf.placeholder(tf.float32, shape=(None, None, 42)) - ph_labels = tf.placeholder(tf.int64, shape=(None, None)) - padded_labels = vgsl_model._PadLabels2d(tf.shape(ph_logits)[1], ph_labels) - # Make actual inputs. - real_logits = _rand(4, 97, 42) - real_labels = _rand(4, 85) - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (4, 97)) - real_labels = _rand(4, 97) - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (4, 97)) - real_labels = _rand(4, 100) - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (4, 97)) - - def testPadLabels3d(self): - """Must pad height and width in labels to match logits. - - The tricky thing with 3-d is that the rows and columns need to remain - intact, so we'll test it with small known data. - """ - with self.test_session() as sess: - # Make placeholders for logits and labels. - ph_logits = tf.placeholder(tf.float32, shape=(None, None, None, 42)) - ph_labels = tf.placeholder(tf.int64, shape=(None, None, None)) - padded_labels = vgsl_model._PadLabels3d(ph_logits, ph_labels) - # Make actual inputs. - real_logits = _rand(1, 3, 4, 42) - # Test all 9 combinations of height x width in [small, ok, big] - real_labels = np.arange(6).reshape((1, 2, 3)) # Height small, width small - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 0], [3, 4, 5, 0], [0, 0, 0, 0]]) - real_labels = np.arange(8).reshape((1, 2, 4)) # Height small, width ok - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 3], [4, 5, 6, 7], [0, 0, 0, 0]]) - real_labels = np.arange(10).reshape((1, 2, 5)) # Height small, width big - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 3], [5, 6, 7, 8], [0, 0, 0, 0]]) - real_labels = np.arange(9).reshape((1, 3, 3)) # Height ok, width small - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 0], [3, 4, 5, 0], [6, 7, 8, 0]]) - real_labels = np.arange(12).reshape((1, 3, 4)) # Height ok, width ok - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]) - real_labels = np.arange(15).reshape((1, 3, 5)) # Height ok, width big - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 3], [5, 6, 7, 8], [10, 11, 12, 13]]) - real_labels = np.arange(12).reshape((1, 4, 3)) # Height big, width small - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 0], [3, 4, 5, 0], [6, 7, 8, 0]]) - real_labels = np.arange(16).reshape((1, 4, 4)) # Height big, width ok - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]) - real_labels = np.arange(20).reshape((1, 4, 5)) # Height big, width big - np_array = sess.run([padded_labels], - feed_dict={ph_logits: real_logits, - ph_labels: real_labels})[0] - self.assertEqual(tuple(np_array.shape), (1, 3, 4)) - self.assertAllEqual(np_array[0, :, :], - [[0, 1, 2, 3], [5, 6, 7, 8], [10, 11, 12, 13]]) - - def testEndToEndSizes0d(self): - """Tests that the output sizes match when training/running real 0d data. - - Uses mnist with dual summarizing LSTMs to reduce to a single value. - """ - filename = _testdata('mnist-tiny') - with self.test_session() as sess: - model = vgsl_model.InitNetwork( - filename, - model_spec='4,0,0,1[Cr5,5,16 Mp3,3 Lfys16 Lfxs16]O0s12', - mode='train') - tf.global_variables_initializer().run(session=sess) - coord = tf.train.Coordinator() - tf.train.start_queue_runners(sess=sess, coord=coord) - _, step = model.TrainAStep(sess) - self.assertEqual(step, 1) - output, labels = model.RunAStep(sess) - self.assertEqual(len(output.shape), 2) - self.assertEqual(len(labels.shape), 1) - self.assertEqual(output.shape[0], labels.shape[0]) - self.assertEqual(output.shape[1], 12) - - # TODO(rays) Support logistic and test with Imagenet (as 0d, multi-object.) - - def testEndToEndSizes1dCTC(self): - """Tests that the output sizes match when training with CTC. - - Basic bidi LSTM on top of convolution and summarizing LSTM with CTC. - """ - filename = _testdata('arial-32-tiny') - with self.test_session() as sess: - model = vgsl_model.InitNetwork( - filename, - model_spec='2,0,0,1[Cr5,5,16 Mp3,3 Lfys16 Lbx100]O1c105', - mode='train') - tf.global_variables_initializer().run(session=sess) - coord = tf.train.Coordinator() - tf.train.start_queue_runners(sess=sess, coord=coord) - _, step = model.TrainAStep(sess) - self.assertEqual(step, 1) - output, labels = model.RunAStep(sess) - self.assertEqual(len(output.shape), 3) - self.assertEqual(len(labels.shape), 2) - self.assertEqual(output.shape[0], labels.shape[0]) - # This is ctc - the only cast-iron guarantee is labels <= output. - self.assertLessEqual(labels.shape[1], output.shape[1]) - self.assertEqual(output.shape[2], 105) - - def testEndToEndSizes1dFixed(self): - """Tests that the output sizes match when training/running 1 data. - - Convolution, summarizing LSTM with fwd rev fwd to allow no CTC. - """ - filename = _testdata('numbers-16-tiny') - with self.test_session() as sess: - model = vgsl_model.InitNetwork( - filename, - model_spec='8,0,0,1[Cr5,5,16 Mp3,3 Lfys16 Lfx64 Lrx64 Lfx64]O1s12', - mode='train') - tf.global_variables_initializer().run(session=sess) - coord = tf.train.Coordinator() - tf.train.start_queue_runners(sess=sess, coord=coord) - _, step = model.TrainAStep(sess) - self.assertEqual(step, 1) - output, labels = model.RunAStep(sess) - self.assertEqual(len(output.shape), 3) - self.assertEqual(len(labels.shape), 2) - self.assertEqual(output.shape[0], labels.shape[0]) - # Not CTC, output lengths match. - self.assertEqual(output.shape[1], labels.shape[1]) - self.assertEqual(output.shape[2], 12) - - # TODO(rays) Get a 2-d dataset and support 2d (heat map) outputs. - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/street/python/vgsl_train.py b/research/street/python/vgsl_train.py deleted file mode 100644 index 8dd83089730a7af93513204d5bf1630e68d11bf7..0000000000000000000000000000000000000000 --- a/research/street/python/vgsl_train.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Model trainer for single or multi-replica training.""" -from tensorflow import app -from tensorflow.python.platform import flags - -import vgsl_model - -flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.') -flags.DEFINE_string('train_dir', '/tmp/mdir', - 'Directory where to write event logs.') -flags.DEFINE_string('model_str', - '1,150,600,3[S2(4x150)0,2 Ct5,5,16 Mp2,2 Ct5,5,64 Mp3,3' - '([Lrys64 Lbx128][Lbys64 Lbx128][Lfys64 Lbx128])S3(3x0)2,3' - 'Lfx128 Lrx128 S0(1x4)0,3 Do Lfx256]O1c134', - 'Network description.') -flags.DEFINE_integer('max_steps', 10000, 'Number of steps to train for.') -flags.DEFINE_integer('task', 0, 'Task id of the replica running the training.') -flags.DEFINE_integer('ps_tasks', 0, 'Number of tasks in the ps job.' - 'If 0 no ps job is used.') -flags.DEFINE_string('train_data', None, 'Training data filepattern') -flags.DEFINE_float('initial_learning_rate', 0.00002, 'Initial learning rate') -flags.DEFINE_float('final_learning_rate', 0.00002, 'Final learning rate') -flags.DEFINE_integer('learning_rate_halflife', 1600000, - 'Halflife of learning rate') -flags.DEFINE_string('optimizer_type', 'Adam', - 'Optimizer from:GradientDescent, AdaGrad, Momentum, Adam') -flags.DEFINE_integer('num_preprocess_threads', 4, 'Number of input threads') - -FLAGS = flags.FLAGS - - -def main(argv): - del argv - vgsl_model.Train(FLAGS.train_dir, FLAGS.model_str, FLAGS.train_data, - FLAGS.max_steps, FLAGS.master, FLAGS.task, FLAGS.ps_tasks, - FLAGS.initial_learning_rate, FLAGS.final_learning_rate, - FLAGS.learning_rate_halflife, FLAGS.optimizer_type, - FLAGS.num_preprocess_threads) - - -if __name__ == '__main__': - app.run() diff --git a/research/street/python/vgslspecs.py b/research/street/python/vgslspecs.py deleted file mode 100644 index 36b5c668a3e5c6b11cf51f899d2a2dd358cca8b2..0000000000000000000000000000000000000000 --- a/research/street/python/vgslspecs.py +++ /dev/null @@ -1,534 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""String network description language mapping to TF-Slim calls where possible. - -See vglspecs.md for detailed description. -""" - -import re -from string import maketrans - -import nn_ops -import shapes -from six.moves import xrange -import tensorflow as tf -import tensorflow.contrib.slim as slim - - -# Class that builds a set of ops to manipulate variable-sized images. -class VGSLSpecs(object): - """Layers that can be built from a string definition.""" - - def __init__(self, widths, heights, is_training): - """Constructs a VGSLSpecs. - - Args: - widths: Tensor of size batch_size of the widths of the inputs. - heights: Tensor of size batch_size of the heights of the inputs. - is_training: True if the graph should be build for training. - """ - # The string that was used to build this model. - self.model_str = None - # True if we are training - self.is_training = is_training - # Tensor for the size of the images, of size batch_size. - self.widths = widths - self.heights = heights - # Overall reduction factors of this model so far for each dimension. - # TODO(rays) consider building a graph from widths and heights instead of - # computing a scale factor. - self.reduction_factors = [1.0, 1.0, 1.0, 1.0] - # List of Op parsers. - # TODO(rays) add more Op types as needed. - self.valid_ops = [self.AddSeries, self.AddParallel, self.AddConvLayer, - self.AddMaxPool, self.AddDropout, self.AddReShape, - self.AddFCLayer, self.AddLSTMLayer] - # Translation table to convert unacceptable characters that may occur - # in op strings that cannot be used as names. - self.transtab = maketrans('(,)', '___') - - def Build(self, prev_layer, model_str): - """Builds a network with input prev_layer from a VGSLSpecs description. - - Args: - prev_layer: The input tensor. - model_str: Model definition similar to Tesseract as follows: - ============ FUNCTIONAL OPS ============ - C(s|t|r|l|m)[{name}],, Convolves using a y,x window, with no - shrinkage, SAME infill, d outputs, with s|t|r|l|m non-linear layer. - (s|t|r|l|m) specifies the type of non-linearity: - s = sigmoid - t = tanh - r = relu - l = linear (i.e., None) - m = softmax - F(s|t|r|l|m)[{name}] Fully-connected with s|t|r|l|m non-linearity and - d outputs. Reduces height, width to 1. Input height and width must be - constant. - L(f|r|b)(x|y)[s][{name}] LSTM cell with n outputs. - f runs the LSTM forward only. - r runs the LSTM reversed only. - b runs the LSTM bidirectionally. - x runs the LSTM in the x-dimension (on data with or without the - y-dimension). - y runs the LSTM in the y-dimension (data must have a y dimension). - s (optional) summarizes the output in the requested dimension, - outputting only the final step, collapsing the dimension to a - single element. - Examples: - Lfx128 runs a forward-only LSTM in the x-dimension with 128 - outputs, treating any y dimension independently. - Lfys64 runs a forward-only LSTM in the y-dimension with 64 outputs - and collapses the y-dimension to 1 element. - NOTE that Lbxsn is implemented as (LfxsnLrxsn) since the summaries - need to be taken from opposite ends of the output - Do[{name}] Insert a dropout layer. - ============ PLUMBING OPS ============ - [...] Execute ... networks in series (layers). - (...) Execute ... networks in parallel, with their output concatenated - in depth. - S[{name}](x), Splits one dimension, moves one part to - another dimension. - Splits input dimension d into a x b, sending the high part (a) to the - high side of dimension e, and the low part (b) to the high side of - dimension f. Exception: if d=e=f, then then dimension d is internally - transposed to bxa. - Either a or b can be zero, meaning whatever is left after taking out - the other, allowing dimensions to be of variable size. - Eg. S3(3x50)2,3 will split the 150-element depth into 3x50, with the 3 - going to the most significant part of the width, and the 50 part - staying in depth. - This will rearrange a 3x50 output parallel operation to spread the 3 - output sets over width. - Mp[{name}], Maxpool the input, reducing the (y,x) rectangle to a - single vector value. - - Returns: - Output tensor - """ - self.model_str = model_str - final_layer, _ = self.BuildFromString(prev_layer, 0) - return final_layer - - def GetLengths(self, dim=2, factor=1): - """Returns the lengths of the batch of elements in the given dimension. - - WARNING: The returned sizes may not exactly match TF's calculation. - Args: - dim: dimension to get the sizes of, in [1,2]. batch, depth not allowed. - factor: A scalar value to multiply by. - - Returns: - The original heights/widths scaled by the current scaling of the model and - the given factor. - - Raises: - ValueError: If the args are invalid. - """ - if dim == 1: - lengths = self.heights - elif dim == 2: - lengths = self.widths - else: - raise ValueError('Invalid dimension given to GetLengths') - lengths = tf.cast(lengths, tf.float32) - if self.reduction_factors[dim] is not None: - lengths = tf.div(lengths, self.reduction_factors[dim]) - else: - lengths = tf.ones_like(lengths) - if factor != 1: - lengths = tf.multiply(lengths, tf.cast(factor, tf.float32)) - return tf.cast(lengths, tf.int32) - - def BuildFromString(self, prev_layer, index): - """Adds the layers defined by model_str[index:] to the model. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor, next model_str index. - - Raises: - ValueError: If the model string is unrecognized. - """ - index = self._SkipWhitespace(index) - for op in self.valid_ops: - output_layer, next_index = op(prev_layer, index) - if output_layer is not None: - return output_layer, next_index - if output_layer is not None: - return output_layer, next_index - raise ValueError('Unrecognized model string:' + self.model_str[index:]) - - def AddSeries(self, prev_layer, index): - """Builds a sequence of layers for a VGSLSpecs model. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor of the series, end index in model_str. - - Raises: - ValueError: If [] are unbalanced. - """ - if self.model_str[index] != '[': - return None, None - index += 1 - while index < len(self.model_str) and self.model_str[index] != ']': - prev_layer, index = self.BuildFromString(prev_layer, index) - if index == len(self.model_str): - raise ValueError('Missing ] at end of series!' + self.model_str) - return prev_layer, index + 1 - - def AddParallel(self, prev_layer, index): - """tf.concats outputs of layers that run on the same inputs. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor of the parallel, end index in model_str. - - Raises: - ValueError: If () are unbalanced or the elements don't match. - """ - if self.model_str[index] != '(': - return None, None - index += 1 - layers = [] - num_dims = 0 - # Each parallel must output the same, including any reduction factor, in - # all dimensions except depth. - # We have to save the starting factors, so they don't get reduced by all - # the elements of the parallel, only once. - original_factors = self.reduction_factors - final_factors = None - while index < len(self.model_str) and self.model_str[index] != ')': - self.reduction_factors = original_factors - layer, index = self.BuildFromString(prev_layer, index) - if num_dims == 0: - num_dims = len(layer.get_shape()) - elif num_dims != len(layer.get_shape()): - raise ValueError('All elements of parallel must return same num dims') - layers.append(layer) - if final_factors: - if final_factors != self.reduction_factors: - raise ValueError('All elements of parallel must scale the same') - else: - final_factors = self.reduction_factors - if index == len(self.model_str): - raise ValueError('Missing ) at end of parallel!' + self.model_str) - return tf.concat(axis=num_dims - 1, values=layers), index + 1 - - def AddConvLayer(self, prev_layer, index): - """Add a single standard convolutional layer. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor, end index in model_str. - """ - pattern = re.compile(R'(C)(s|t|r|l|m)({\w+})?(\d+),(\d+),(\d+)') - m = pattern.match(self.model_str, index) - if m is None: - return None, None - name = self._GetLayerName(m.group(0), index, m.group(3)) - width = int(m.group(4)) - height = int(m.group(5)) - depth = int(m.group(6)) - fn = self._NonLinearity(m.group(2)) - return slim.conv2d( - prev_layer, depth, [height, width], activation_fn=fn, - scope=name), m.end() - - def AddMaxPool(self, prev_layer, index): - """Add a maxpool layer. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor, end index in model_str. - """ - pattern = re.compile(R'(Mp)({\w+})?(\d+),(\d+)(?:,(\d+),(\d+))?') - m = pattern.match(self.model_str, index) - if m is None: - return None, None - name = self._GetLayerName(m.group(0), index, m.group(2)) - height = int(m.group(3)) - width = int(m.group(4)) - y_stride = height if m.group(5) is None else m.group(5) - x_stride = width if m.group(6) is None else m.group(6) - self.reduction_factors[1] *= y_stride - self.reduction_factors[2] *= x_stride - return slim.max_pool2d( - prev_layer, [height, width], [y_stride, x_stride], - padding='SAME', - scope=name), m.end() - - def AddDropout(self, prev_layer, index): - """Adds a dropout layer. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor, end index in model_str. - """ - pattern = re.compile(R'(Do)({\w+})?') - m = pattern.match(self.model_str, index) - if m is None: - return None, None - name = self._GetLayerName(m.group(0), index, m.group(2)) - layer = slim.dropout( - prev_layer, 0.5, is_training=self.is_training, scope=name) - return layer, m.end() - - def AddReShape(self, prev_layer, index): - """Reshapes the input tensor by moving each (x_scale,y_scale) rectangle to. - - the depth dimension. NOTE that the TF convention is that inputs are - [batch, y, x, depth]. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor, end index in model_str. - """ - pattern = re.compile(R'(S)(?:{(\w)})?(\d+)\((\d+)x(\d+)\)(\d+),(\d+)') - m = pattern.match(self.model_str, index) - if m is None: - return None, None - name = self._GetLayerName(m.group(0), index, m.group(2)) - src_dim = int(m.group(3)) - part_a = int(m.group(4)) - part_b = int(m.group(5)) - dest_dim_a = int(m.group(6)) - dest_dim_b = int(m.group(7)) - if part_a == 0: - part_a = -1 - if part_b == 0: - part_b = -1 - prev_shape = tf.shape(prev_layer) - layer = shapes.transposing_reshape( - prev_layer, src_dim, part_a, part_b, dest_dim_a, dest_dim_b, name=name) - # Compute scale factors. - result_shape = tf.shape(layer) - for i in xrange(len(self.reduction_factors)): - if self.reduction_factors[i] is not None: - factor1 = tf.cast(self.reduction_factors[i], tf.float32) - factor2 = tf.cast(prev_shape[i], tf.float32) - divisor = tf.cast(result_shape[i], tf.float32) - self.reduction_factors[i] = tf.div(tf.multiply(factor1, factor2), divisor) - return layer, m.end() - - def AddFCLayer(self, prev_layer, index): - """Parse expression and add Fully Connected Layer. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor, end index in model_str. - """ - pattern = re.compile(R'(F)(s|t|r|l|m)({\w+})?(\d+)') - m = pattern.match(self.model_str, index) - if m is None: - return None, None - fn = self._NonLinearity(m.group(2)) - name = self._GetLayerName(m.group(0), index, m.group(3)) - depth = int(m.group(4)) - input_depth = shapes.tensor_dim(prev_layer, 1) * shapes.tensor_dim( - prev_layer, 2) * shapes.tensor_dim(prev_layer, 3) - # The slim fully connected is actually a 1x1 conv, so we have to crush the - # dimensions on input. - # Everything except batch goes to depth, and therefore has to be known. - shaped = tf.reshape( - prev_layer, [-1, input_depth], name=name + '_reshape_in') - output = slim.fully_connected(shaped, depth, activation_fn=fn, scope=name) - # Width and height are collapsed to 1. - self.reduction_factors[1] = None - self.reduction_factors[2] = None - return tf.reshape( - output, [shapes.tensor_dim(prev_layer, 0), 1, 1, depth], - name=name + '_reshape_out'), m.end() - - def AddLSTMLayer(self, prev_layer, index): - """Parse expression and add LSTM Layer. - - Args: - prev_layer: Input tensor. - index: Position in model_str to start parsing - - Returns: - Output tensor, end index in model_str. - """ - pattern = re.compile(R'(L)(f|r|b)(x|y)(s)?({\w+})?(\d+)') - m = pattern.match(self.model_str, index) - if m is None: - return None, None - direction = m.group(2) - dim = m.group(3) - summarize = m.group(4) == 's' - name = self._GetLayerName(m.group(0), index, m.group(5)) - depth = int(m.group(6)) - if direction == 'b' and summarize: - fwd = self._LSTMLayer(prev_layer, 'forward', dim, True, depth, - name + '_forward') - back = self._LSTMLayer(prev_layer, 'backward', dim, True, depth, - name + '_reverse') - return tf.concat(axis=3, values=[fwd, back], name=name + '_concat'), m.end() - if direction == 'f': - direction = 'forward' - elif direction == 'r': - direction = 'backward' - else: - direction = 'bidirectional' - outputs = self._LSTMLayer(prev_layer, direction, dim, summarize, depth, - name) - if summarize: - # The x or y dimension is getting collapsed. - if dim == 'x': - self.reduction_factors[2] = None - else: - self.reduction_factors[1] = None - return outputs, m.end() - - def _LSTMLayer(self, prev_layer, direction, dim, summarize, depth, name): - """Adds an LSTM layer with the given pre-parsed attributes. - - Always maps 4-D to 4-D regardless of summarize. - Args: - prev_layer: Input tensor. - direction: 'forward' 'backward' or 'bidirectional' - dim: 'x' or 'y', dimension to consider as time. - summarize: True if we are to return only the last timestep. - depth: Output depth. - name: Some string naming the op. - - Returns: - Output tensor. - """ - # If the target dimension is y, we need to transpose. - if dim == 'x': - lengths = self.GetLengths(2, 1) - inputs = prev_layer - else: - lengths = self.GetLengths(1, 1) - inputs = tf.transpose(prev_layer, [0, 2, 1, 3], name=name + '_ytrans_in') - input_batch = shapes.tensor_dim(inputs, 0) - num_slices = shapes.tensor_dim(inputs, 1) - num_steps = shapes.tensor_dim(inputs, 2) - input_depth = shapes.tensor_dim(inputs, 3) - # Reshape away the other dimension. - inputs = tf.reshape( - inputs, [-1, num_steps, input_depth], name=name + '_reshape_in') - # We need to replicate the lengths by the size of the other dimension, and - # any changes that have been made to the batch dimension. - tile_factor = tf.to_float(input_batch * - num_slices) / tf.to_float(tf.shape(lengths)[0]) - lengths = tf.tile(lengths, [tf.cast(tile_factor, tf.int32)]) - lengths = tf.cast(lengths, tf.int64) - outputs = nn_ops.rnn_helper( - inputs, - lengths, - cell_type='lstm', - num_nodes=depth, - direction=direction, - name=name, - stddev=0.1) - # Output depth is doubled if bi-directional. - if direction == 'bidirectional': - output_depth = depth * 2 - else: - output_depth = depth - # Restore the other dimension. - if summarize: - outputs = tf.slice( - outputs, [0, num_steps - 1, 0], [-1, 1, -1], name=name + '_sum_slice') - outputs = tf.reshape( - outputs, [input_batch, num_slices, 1, output_depth], - name=name + '_reshape_out') - else: - outputs = tf.reshape( - outputs, [input_batch, num_slices, num_steps, output_depth], - name=name + '_reshape_out') - if dim == 'y': - outputs = tf.transpose(outputs, [0, 2, 1, 3], name=name + '_ytrans_out') - return outputs - - def _NonLinearity(self, code): - """Returns the non-linearity function pointer for the given string code. - - For forwards compatibility, allows the full names for stand-alone - non-linearities, as well as the single-letter names used in ops like C,F. - Args: - code: String code representing a non-linearity function. - Returns: - non-linearity function represented by the code. - """ - if code in ['s', 'Sig']: - return tf.sigmoid - elif code in ['t', 'Tanh']: - return tf.tanh - elif code in ['r', 'Relu']: - return tf.nn.relu - elif code in ['m', 'Smax']: - return tf.nn.softmax - return None - - def _GetLayerName(self, op_str, index, name_str): - """Generates a name for the op, using a user-supplied name if possible. - - Args: - op_str: String representing the parsed op. - index: Position in model_str of the start of the op. - name_str: User-supplied {name} with {} that need removing or None. - - Returns: - Selected name. - """ - if name_str: - return name_str[1:-1] - else: - return op_str.translate(self.transtab) + '_' + str(index) - - def _SkipWhitespace(self, index): - """Skips any leading whitespace in the model description. - - Args: - index: Position in model_str to start parsing - - Returns: - end index in model_str of whitespace. - """ - pattern = re.compile(R'([ \t\n]+)') - m = pattern.match(self.model_str, index) - if m is None: - return index - return m.end() diff --git a/research/street/python/vgslspecs_test.py b/research/street/python/vgslspecs_test.py deleted file mode 100644 index 69ea28bf75c13700f063b1dc4303af6c0982889c..0000000000000000000000000000000000000000 --- a/research/street/python/vgslspecs_test.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for vgslspecs.""" - -import numpy as np -import tensorflow as tf -import vgslspecs - - -def _rand(*size): - return np.random.uniform(size=size).astype('f') - - -class VgslspecsTest(tf.test.TestCase): - - def __init__(self, other): - super(VgslspecsTest, self).__init__(other) - self.max_width = 36 - self.max_height = 24 - self.batch_size = 4 - - def SetupInputs(self): - # Make placeholders for standard inputs. - # Everything is variable in the input, except the depth. - self.ph_image = tf.placeholder( - tf.float32, shape=(None, None, None, 3), name='inputs') - self.ph_widths = tf.placeholder(tf.int64, shape=(None,), name='w') - self.ph_heights = tf.placeholder(tf.int64, shape=(None,), name='h') - # Make actual inputs. - self.in_image = _rand(self.batch_size, self.max_height, self.max_width, 3) - self.in_widths = [24, 12, self.max_width, 30] - self.in_heights = [self.max_height, 18, 12, 6] - - def ExpectScaledSize(self, spec, target_shape, factor=1): - """Tests that the output of the graph of the given spec has target_shape.""" - with tf.Graph().as_default(): - with self.test_session() as sess: - self.SetupInputs() - # Only the placeholders are given at construction time. - vgsl = vgslspecs.VGSLSpecs(self.ph_widths, self.ph_heights, True) - outputs = vgsl.Build(self.ph_image, spec) - # Compute the expected output widths from the given scale factor. - target_widths = tf.div(self.in_widths, factor).eval() - target_heights = tf.div(self.in_heights, factor).eval() - # Run with the 'real' data. - tf.global_variables_initializer().run() - res_image, res_widths, res_heights = sess.run( - [outputs, vgsl.GetLengths(2), vgsl.GetLengths(1)], - feed_dict={self.ph_image: self.in_image, - self.ph_widths: self.in_widths, - self.ph_heights: self.in_heights}) - self.assertEqual(tuple(res_image.shape), target_shape) - if target_shape[1] > 1: - self.assertEqual(tuple(res_heights), tuple(target_heights)) - if target_shape[2] > 1: - self.assertEqual(tuple(res_widths), tuple(target_widths)) - - def testSameSizeConv(self): - """Test all types of Conv. There is no scaling.""" - self.ExpectScaledSize( - '[Cs{MyConv}5,5,16 Ct3,3,12 Cr4,4,24 Cl5,5,64]', - (self.batch_size, self.max_height, self.max_width, 64)) - - def testSameSizeLSTM(self): - """Test all non-reducing LSTMs. Output depth is doubled with BiDi.""" - self.ExpectScaledSize('[Lfx16 Lrx8 Do Lbx24 Lfy12 Do{MyDo} Lry7 Lby32]', - (self.batch_size, self.max_height, self.max_width, - 64)) - - def testSameSizeParallel(self): - """Parallel affects depth, but not scale.""" - self.ExpectScaledSize('[Cs5,5,16 (Lfx{MyLSTM}32 Lrx32 Lbx16)]', - (self.batch_size, self.max_height, self.max_width, - 96)) - - def testScalingOps(self): - """Test a heterogeneous series with scaling.""" - self.ExpectScaledSize('[Cs5,5,16 Mp{MyPool}2,2 Ct3,3,32 Mp3,3 Lfx32 Lry64]', - (self.batch_size, self.max_height / 6, - self.max_width / 6, 64), 6) - - def testXReduction(self): - """Test a heterogeneous series with reduction of x-dimension.""" - self.ExpectScaledSize('[Cr5,5,16 Mp2,2 Ct3,3,32 Mp3,3 Lfxs32 Lry64]', - (self.batch_size, self.max_height / 6, 1, 64), 6) - - def testYReduction(self): - """Test a heterogeneous series with reduction of y-dimension.""" - self.ExpectScaledSize('[Cl5,5,16 Mp2,2 Ct3,3,32 Mp3,3 Lfys32 Lfx64]', - (self.batch_size, 1, self.max_width / 6, 64), 6) - - def testXYReduction(self): - """Test a heterogeneous series with reduction to 0-d.""" - self.ExpectScaledSize( - '[Cr5,5,16 Lfys32 Lfxs64 Fr{MyFC}16 Ft20 Fl12 Fs32 Fm40]', - (self.batch_size, 1, 1, 40)) - - def testReshapeTile(self): - """Tests that a tiled input can be reshaped to the batch dimension.""" - self.ExpectScaledSize('[S2(3x0)0,2 Cr5,5,16 Lfys16]', - (self.batch_size * 3, 1, self.max_width / 3, 16), 3) - - def testReshapeDepth(self): - """Tests that depth can be reshaped to the x dimension.""" - self.ExpectScaledSize('[Cl5,5,16 Mp3,3 (Lrys32 Lbys16 Lfys32) S3(3x0)2,3]', - (self.batch_size, 1, self.max_width, 32)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/street/testdata/arial-32-tiny b/research/street/testdata/arial-32-tiny deleted file mode 100644 index b551e2bde5fbfe10648af75bb90275d50fa6f201..0000000000000000000000000000000000000000 Binary files a/research/street/testdata/arial-32-tiny and /dev/null differ diff --git a/research/street/testdata/arial.charset_size=105.txt b/research/street/testdata/arial.charset_size=105.txt deleted file mode 100644 index feec47e0aabc60fca462570ea20ac525be761797..0000000000000000000000000000000000000000 --- a/research/street/testdata/arial.charset_size=105.txt +++ /dev/null @@ -1,112 +0,0 @@ -0 -104 -1 G -2 r -3 a -4 s -5 l -6 n -7 d -8 . -9 B -10 C -11 O -12 W -13 Y -14 , -15 ( -16 u -17 z -18 i -19 e -20 ) -21 1 -22 9 -23 2 -24 - -25 6 -26 o -27 L -28 P -29 ' -30 t -31 m -32 K -33 c -34 k -35 V -36 S -37 D -38 J -39 h -40 M -41 x -42 E -43 q -44 ; -45 A -46 y -47 f -48 5 -49 7 -50 b -51 4 -52 0 -53 3 -54 N -55 I -56 T -57 / -58 p -59 w -60 g -61 H -62 “ -63 F -62 ” -62 " -29 ’ -64 R -24 — -65 8 -66 v -67 ? -68 é -69 % -70 : -71 j -72 \ -73 { -74 } -75 | -76 U -77 $ -78 ° -79 * -80 ! -81 ] -82 Q -29 ‘ -83 Z -84 X -85 [ -86 = -87 + -88 § -89 _ -90 £ -91 & -92 # -93 > -94 < -95 ~ -96 € -97 @ -98 ¢ -99 » -100 « -47,5 fl -47,18 fi -101 ® -102 © -103 ¥ diff --git a/research/street/testdata/charset_size=134.txt b/research/street/testdata/charset_size=134.txt deleted file mode 100644 index 5c7fcde2ae0ab679f279a083d6de1c50d33ff90b..0000000000000000000000000000000000000000 --- a/research/street/testdata/charset_size=134.txt +++ /dev/null @@ -1,139 +0,0 @@ -0 -133 -1 l -2 ’ -3 é -4 t -5 e -6 i -7 n -8 s -9 x -10 g -11 u -12 o -13 1 -14 8 -15 7 -16 0 -17 - -18 . -19 p -20 a -21 r -22 è -23 d -24 c -25 V -26 v -27 b -28 m -29 ) -30 C -31 z -32 S -33 y -34 , -35 k -36 É -37 A -38 h -39 E -40 » -41 D -42 / -43 H -44 M -45 ( -46 G -47 P -48 ç -2 ' -49 R -50 f -51 " -52 2 -53 j -54 | -55 N -56 6 -57 ° -58 5 -59 T -60 O -61 U -62 3 -63 % -64 9 -65 q -66 Z -67 B -68 K -69 w -70 W -71 : -72 4 -73 L -74 F -75 ] -76 ï -2 ‘ -77 I -78 J -79 ä -80 î -81 ; -82 à -83 ê -84 X -85 ü -86 Y -87 ô -88 = -89 + -90 \ -91 { -92 } -93 _ -94 Q -95 œ -96 ñ -97 * -98 ! -99 Ü -51 “ -100 â -101 Ç -102 Œ -103 û -104 ? -105 $ -106 ë -107 « -108 € -109 & -110 < -51 ” -111 æ -112 # -113 ® -114  -115 È -116 > -117 [ -17 — -118 Æ -119 ù -120 Î -121 Ô -122 ÿ -123 À -124 Ê -125 @ -126 Ï -127 © -128 Ë -129 Ù -130 £ -131 Ÿ -132 Û diff --git a/research/street/testdata/charset_size_10.txt b/research/street/testdata/charset_size_10.txt deleted file mode 100644 index 93fffbd0f26d2a23f319f8765133a43fb25b7f46..0000000000000000000000000000000000000000 --- a/research/street/testdata/charset_size_10.txt +++ /dev/null @@ -1,10 +0,0 @@ -0 -9 -1 a -2 b -3 r -4 n -4,5 m -6 f -7 . -8 , diff --git a/research/street/testdata/mnist-tiny b/research/street/testdata/mnist-tiny deleted file mode 100644 index 1470119aff9891557dcef02981c40ae958e6bd11..0000000000000000000000000000000000000000 Binary files a/research/street/testdata/mnist-tiny and /dev/null differ diff --git a/research/street/testdata/numbers-16-tiny b/research/street/testdata/numbers-16-tiny deleted file mode 100644 index bb0c11fcd034fad1cf650b386bf94476c9c1ccd1..0000000000000000000000000000000000000000 Binary files a/research/street/testdata/numbers-16-tiny and /dev/null differ diff --git a/research/street/testdata/numbers.charset_size=12.txt b/research/street/testdata/numbers.charset_size=12.txt deleted file mode 100644 index 12fdc77ded0c74b05d0624bc9f690e7384bc5038..0000000000000000000000000000000000000000 --- a/research/street/testdata/numbers.charset_size=12.txt +++ /dev/null @@ -1,12 +0,0 @@ -0 -11 -1 9 -2 8 -3 7 -4 6 -5 1 -6 4 -7 0 -8 3 -9 5 -10 2 diff --git a/research/struct2depth/BUILD b/research/struct2depth/BUILD deleted file mode 100644 index ffd0fb0cdc5bc3ecb62d3cbb5b502b353073615f..0000000000000000000000000000000000000000 --- a/research/struct2depth/BUILD +++ /dev/null @@ -1 +0,0 @@ -package(default_visibility = ["//visibility:public"]) diff --git a/research/struct2depth/README.md b/research/struct2depth/README.md deleted file mode 100644 index de1d7e7f2a8864897ed0fdbe71a42171e47da5da..0000000000000000000000000000000000000000 --- a/research/struct2depth/README.md +++ /dev/null @@ -1,151 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# struct2depth - -This a method for unsupervised learning of depth and egomotion from monocular video, achieving new state-of-the-art results on both tasks by explicitly modeling 3D object motion, performing on-line refinement and improving quality for moving objects by novel loss formulations. It will appear in the following paper: - -**V. Casser, S. Pirk, R. Mahjourian, A. Angelova, Depth Prediction Without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos, AAAI Conference on Artificial Intelligence, 2019** -https://arxiv.org/pdf/1811.06152.pdf - -This code is implemented and supported by Vincent Casser (git username: VincentCa) and Anelia Angelova (git username: AneliaAngelova). Please contact anelia@google.com for questions. - -Project website: https://sites.google.com/view/struct2depth. - -## Quick start: Running training - -Before running training, run gen_data_* script for the respective dataset in order to generate the data in the appropriate format for KITTI or Cityscapes. It is assumed that motion masks are already generated and stored as images. -Models are trained from an Imagenet pretrained model. - -```shell - -ckpt_dir="your/checkpoint/folder" -data_dir="KITTI_SEQ2_LR/" # Set for KITTI -data_dir="CITYSCAPES_SEQ2_LR/" # Set for Cityscapes -imagenet_ckpt="resnet_pretrained/model.ckpt" - -python train.py \ - --logtostderr \ - --checkpoint_dir $ckpt_dir \ - --data_dir $data_dir \ - --architecture resnet \ - --imagenet_ckpt $imagenet_ckpt \ - --imagenet_norm true \ - --joint_encoder false -``` - - - -## Running depth/egomotion inference on an image folder - -KITTI is trained on the raw image data (resized to 416 x 128), but inputs are standardized before feeding them, and Cityscapes images are cropped using the following cropping parameters: (192, 1856, 256, 768). If using a different crop, it is likely that additional training is necessary. Therefore, please follow the inference example shown below when using one of the models. The right choice might depend on a variety of factors. For example, if a checkpoint should be used for odometry, be aware that for improved odometry on motion models, using segmentation masks could be advantageous (setting *use_masks=true* for inference). On the other hand, all models can be used for single-frame depth estimation without any additional information. - - -```shell - -input_dir="your/image/folder" -output_dir="your/output/folder" -model_checkpoint="your/model/checkpoint" - -python inference.py \ - --logtostderr \ - --file_extension png \ - --depth \ - --egomotion true \ - --input_dir $input_dir \ - --output_dir $output_dir \ - --model_ckpt $model_checkpoint -``` - -Note that the egomotion prediction expects the files in the input directory to be a consecutive sequence, and that sorting the filenames alphabetically is putting them in the right order. - -One can also run inference on KITTI by providing - -```shell ---input_list_file ~/kitti-raw-uncompressed/test_files_eigen.txt -``` - -and on Cityscapes by passing - -```shell ---input_list_file CITYSCAPES_FULL/test_files_cityscapes.txt -``` - -instead of *input_dir*. -Alternatively inference can also be ran on pre-processed images. - - - -## Running on-line refinement - -On-line refinement is executed on top of an existing inference folder, so make sure to run regular inference first. Then you can run the on-line fusion procedure as follows: - -```shell - -prediction_dir="some/prediction/dir" -model_ckpt="checkpoints/checkpoints_baseline/model-199160" -handle_motion="false" -size_constraint_weight="0" # This must be zero when not handling motion. - -# If running on KITTI, set as follows: -data_dir="KITTI_SEQ2_LR_EIGEN/" -triplet_list_file="$data_dir/test_files_eigen_triplets.txt" -triplet_list_file_remains="$data_dir/test_files_eigen_triplets_remains.txt" -ft_name="kitti" - -# If running on Cityscapes, set as follows: -data_dir="CITYSCAPES_SEQ2_LR_TEST/" # Set for Cityscapes -triplet_list_file="/CITYSCAPES_SEQ2_LR_TEST/test_files_cityscapes_triplets.txt" -triplet_list_file_remains="CITYSCAPES_SEQ2_LR_TEST/test_files_cityscapes_triplets_remains.txt" -ft_name="cityscapes" - -python optimize.py \ - --logtostderr \ - --output_dir $prediction_dir \ - --data_dir $data_dir \ - --triplet_list_file $triplet_list_file \ - --triplet_list_file_remains $triplet_list_file_remains \ - --ft_name $ft_name \ - --model_ckpt $model_ckpt \ - --file_extension png \ - --handle_motion $handle_motion \ - --size_constraint_weight $size_constraint_weight -``` - - - -## Running evaluation - -```shell - -prediction_dir="some/prediction/dir" - -# Use these settings for KITTI: -eval_list_file="KITTI_FULL/kitti-raw-uncompressed/test_files_eigen.txt" -eval_crop="garg" -eval_mode="kitti" - -# Use these settings for Cityscapes: -eval_list_file="CITYSCAPES_FULL/test_files_cityscapes.txt" -eval_crop="none" -eval_mode="cityscapes" - -python evaluate.py \ - --logtostderr \ - --prediction_dir $prediction_dir \ - --eval_list_file $eval_list_file \ - --eval_crop $eval_crop \ - --eval_mode $eval_mode -``` - - - -## Credits - -This code is implemented and supported by Vincent Casser and Anelia Angelova and can be found at -https://sites.google.com/view/struct2depth. -The core implementation is derived from [https://github.com/tensorflow/models/tree/master/research/vid2depth)](https://github.com/tensorflow/models/tree/master/research/vid2depth) -by [Reza Mahjourian](rezama@google.com), which in turn is based on [SfMLearner -(https://github.com/tinghuiz/SfMLearner)](https://github.com/tinghuiz/SfMLearner) -by [Tinghui Zhou](https://github.com/tinghuiz). diff --git a/research/struct2depth/alignment.py b/research/struct2depth/alignment.py deleted file mode 100644 index 0e9417d482e2a243af45faa40230ad38da9b1b89..0000000000000000000000000000000000000000 --- a/research/struct2depth/alignment.py +++ /dev/null @@ -1,54 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Common utilities for data pre-processing, e.g. matching moving object across frames.""" - -import numpy as np - -def compute_overlap(mask1, mask2): - # Use IoU here. - return np.sum(mask1 & mask2)/np.sum(mask1 | mask2) - -def align(seg_img1, seg_img2, seg_img3, threshold_same=0.3): - res_img1 = np.zeros_like(seg_img1) - res_img2 = np.zeros_like(seg_img2) - res_img3 = np.zeros_like(seg_img3) - remaining_objects2 = list(np.unique(seg_img2.flatten())) - remaining_objects3 = list(np.unique(seg_img3.flatten())) - for seg_id in np.unique(seg_img1): - # See if we can find correspondences to seg_id in seg_img2. - max_overlap2 = float('-inf') - max_segid2 = -1 - for seg_id2 in remaining_objects2: - overlap = compute_overlap(seg_img1==seg_id, seg_img2==seg_id2) - if overlap>max_overlap2: - max_overlap2 = overlap - max_segid2 = seg_id2 - if max_overlap2 > threshold_same: - max_overlap3 = float('-inf') - max_segid3 = -1 - for seg_id3 in remaining_objects3: - overlap = compute_overlap(seg_img2==max_segid2, seg_img3==seg_id3) - if overlap>max_overlap3: - max_overlap3 = overlap - max_segid3 = seg_id3 - if max_overlap3 > threshold_same: - res_img1[seg_img1==seg_id] = seg_id - res_img2[seg_img2==max_segid2] = seg_id - res_img3[seg_img3==max_segid3] = seg_id - remaining_objects2.remove(max_segid2) - remaining_objects3.remove(max_segid3) - return res_img1, res_img2, res_img3 diff --git a/research/struct2depth/gen_data_city.py b/research/struct2depth/gen_data_city.py deleted file mode 100644 index 7e18fe5acb978fe89a686b1ff8f44d030a62777d..0000000000000000000000000000000000000000 --- a/research/struct2depth/gen_data_city.py +++ /dev/null @@ -1,158 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -""" Offline data generation for the Cityscapes dataset.""" - -import os -from absl import app -from absl import flags -from absl import logging -import numpy as np -import cv2 -import os, glob - -import alignment -from alignment import compute_overlap -from alignment import align - - -SKIP = 2 -WIDTH = 416 -HEIGHT = 128 -SUB_FOLDER = 'train' -INPUT_DIR = '/usr/local/google/home/anelia/struct2depth/CITYSCAPES_FULL/' -OUTPUT_DIR = '/usr/local/google/home/anelia/struct2depth/CITYSCAPES_Processed/' - -def crop(img, segimg, fx, fy, cx, cy): - # Perform center cropping, preserving 50% vertically. - middle_perc = 0.50 - left = 1 - middle_perc - half = left / 2 - a = img[int(img.shape[0]*(half)):int(img.shape[0]*(1-half)), :] - aseg = segimg[int(segimg.shape[0]*(half)):int(segimg.shape[0]*(1-half)), :] - cy /= (1 / middle_perc) - - # Resize to match target height while preserving aspect ratio. - wdt = int((float(HEIGHT)*a.shape[1]/a.shape[0])) - x_scaling = float(wdt)/a.shape[1] - y_scaling = float(HEIGHT)/a.shape[0] - b = cv2.resize(a, (wdt, HEIGHT)) - bseg = cv2.resize(aseg, (wdt, HEIGHT)) - - # Adjust intrinsics. - fx*=x_scaling - fy*=y_scaling - cx*=x_scaling - cy*=y_scaling - - # Perform center cropping horizontally. - remain = b.shape[1] - WIDTH - cx /= (b.shape[1] / WIDTH) - c = b[:, int(remain/2):b.shape[1]-int(remain/2)] - cseg = bseg[:, int(remain/2):b.shape[1]-int(remain/2)] - - return c, cseg, fx, fy, cx, cy - - -def run_all(): - dir_name=INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*' - print('Processing directory', dir_name) - for location in glob.glob(INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*'): - location_name = os.path.basename(location) - print('Processing location', location_name) - files = sorted(glob.glob(location + '/*.png')) - files = [file for file in files if '-seg.png' not in file] - # Break down into sequences - sequences = {} - seq_nr = 0 - last_seq = '' - last_imgnr = -1 - - for i in range(len(files)): - seq = os.path.basename(files[i]).split('_')[1] - nr = int(os.path.basename(files[i]).split('_')[2]) - if seq!=last_seq or last_imgnr+1!=nr: - seq_nr+=1 - last_imgnr = nr - last_seq = seq - if not seq_nr in sequences: - sequences[seq_nr] = [] - sequences[seq_nr].append(files[i]) - - for (k,v) in sequences.items(): - print('Processing sequence', k, 'with', len(v), 'elements...') - output_dir = OUTPUT_DIR + '/' + location_name + '_' + str(k) - if not os.path.isdir(output_dir): - os.mkdir(output_dir) - files = sorted(v) - triplet = [] - seg_triplet = [] - ct = 1 - - # Find applicable intrinsics. - for j in range(len(files)): - osegname = os.path.basename(files[j]).split('_')[1] - oimgnr = os.path.basename(files[j]).split('_')[2] - applicable_intrinsics = INPUT_DIR + '/camera/' + SUB_FOLDER + '/' + location_name + '/' + location_name + '_' + osegname + '_' + oimgnr + '_camera.json' - # Get the intrinsics for one of the file of the sequence. - if os.path.isfile(applicable_intrinsics): - f = open(applicable_intrinsics, 'r') - lines = f.readlines() - f.close() - lines = [line.rstrip() for line in lines] - - fx = float(lines[11].split(': ')[1].replace(',', '')) - fy = float(lines[12].split(': ')[1].replace(',', '')) - cx = float(lines[13].split(': ')[1].replace(',', '')) - cy = float(lines[14].split(': ')[1].replace(',', '')) - - for j in range(0, len(files), SKIP): - img = cv2.imread(files[j]) - segimg = cv2.imread(files[j].replace('.png', '-seg.png')) - - smallimg, segimg, fx_this, fy_this, cx_this, cy_this = crop(img, segimg, fx, fy, cx, cy) - triplet.append(smallimg) - seg_triplet.append(segimg) - if len(triplet)==3: - cmb = np.hstack(triplet) - align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2]) - cmb_seg = np.hstack([align1, align2, align3]) - cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '.png'), cmb) - cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '-fseg.png'), cmb_seg) - f = open(os.path.join(output_dir, str(ct).zfill(10) + '_cam.txt'), 'w') - f.write(str(fx_this) + ',0.0,' + str(cx_this) + ',0.0,' + str(fy_this) + ',' + str(cy_this) + ',0.0,0.0,1.0') - f.close() - del triplet[0] - del seg_triplet[0] - ct+=1 - -# Create file list for training. Be careful as it collects and includes all files recursively. -fn = open(OUTPUT_DIR + '/' + SUB_FOLDER + '.txt', 'w') -for f in glob.glob(OUTPUT_DIR + '/*/*.png'): - if '-seg.png' in f or '-fseg.png' in f: - continue - folder_name = f.split('/')[-2] - img_name = f.split('/')[-1].replace('.png', '') - fn.write(folder_name + ' ' + img_name + '\n') -fn.close() - - -def main(_): - run_all() - - -if __name__ == '__main__': - app.run(main) diff --git a/research/struct2depth/gen_data_kitti.py b/research/struct2depth/gen_data_kitti.py deleted file mode 100644 index 8577c4c677383d29e074f922f56fd1abaef23543..0000000000000000000000000000000000000000 --- a/research/struct2depth/gen_data_kitti.py +++ /dev/null @@ -1,149 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -""" Offline data generation for the KITTI dataset.""" - -import os -from absl import app -from absl import flags -from absl import logging -import numpy as np -import cv2 -import os, glob - -import alignment -from alignment import compute_overlap -from alignment import align - - -SEQ_LENGTH = 3 -WIDTH = 416 -HEIGHT = 128 -STEPSIZE = 1 -INPUT_DIR = '/usr/local/google/home/anelia/struct2depth/KITTI_FULL/kitti-raw-uncompressed' -OUTPUT_DIR = '/usr/local/google/home/anelia/struct2depth/KITTI_procesed/' - - -def get_line(file, start): - file = open(file, 'r') - lines = file.readlines() - lines = [line.rstrip() for line in lines] - ret = None - for line in lines: - nline = line.split(': ') - if nline[0]==start: - ret = nline[1].split(' ') - ret = np.array([float(r) for r in ret], dtype=float) - ret = ret.reshape((3,4))[0:3, 0:3] - break - file.close() - return ret - - -def crop(img, segimg, fx, fy, cx, cy): - # Perform center cropping, preserving 50% vertically. - middle_perc = 0.50 - left = 1-middle_perc - half = left/2 - a = img[int(img.shape[0]*(half)):int(img.shape[0]*(1-half)), :] - aseg = segimg[int(segimg.shape[0]*(half)):int(segimg.shape[0]*(1-half)), :] - cy /= (1/middle_perc) - - # Resize to match target height while preserving aspect ratio. - wdt = int((128*a.shape[1]/a.shape[0])) - x_scaling = float(wdt)/a.shape[1] - y_scaling = 128.0/a.shape[0] - b = cv2.resize(a, (wdt, 128)) - bseg = cv2.resize(aseg, (wdt, 128)) - - # Adjust intrinsics. - fx*=x_scaling - fy*=y_scaling - cx*=x_scaling - cy*=y_scaling - - # Perform center cropping horizontally. - remain = b.shape[1] - 416 - cx /= (b.shape[1]/416) - c = b[:, int(remain/2):b.shape[1]-int(remain/2)] - cseg = bseg[:, int(remain/2):b.shape[1]-int(remain/2)] - - return c, cseg, fx, fy, cx, cy - - -def run_all(): - ct = 0 -if not OUTPUT_DIR.endswith('/'): - OUTPUT_DIR = OUTPUT_DIR + '/' - -for d in glob.glob(INPUT_DIR + '/*/'): - date = d.split('/')[-2] - file_calibration = d + 'calib_cam_to_cam.txt' - calib_raw = [get_line(file_calibration, 'P_rect_02'), get_line(file_calibration, 'P_rect_03')] - - for d2 in glob.glob(d + '*/'): - seqname = d2.split('/')[-2] - print('Processing sequence', seqname) - for subfolder in ['image_02/data', 'image_03/data']: - ct = 1 - seqname = d2.split('/')[-2] + subfolder.replace('image', '').replace('/data', '') - if not os.path.exists(OUTPUT_DIR + seqname): - os.mkdir(OUTPUT_DIR + seqname) - - calib_camera = calib_raw[0] if subfolder=='image_02/data' else calib_raw[1] - folder = d2 + subfolder - files = glob.glob(folder + '/*.png') - files = [file for file in files if not 'disp' in file and not 'flip' in file and not 'seg' in file] - files = sorted(files) - for i in range(SEQ_LENGTH, len(files)+1, STEPSIZE): - imgnum = str(ct).zfill(10) - if os.path.exists(OUTPUT_DIR + seqname + '/' + imgnum + '.png'): - ct+=1 - continue - big_img = np.zeros(shape=(HEIGHT, WIDTH*SEQ_LENGTH, 3)) - wct = 0 - - for j in range(i-SEQ_LENGTH, i): # Collect frames for this sample. - img = cv2.imread(files[j]) - ORIGINAL_HEIGHT, ORIGINAL_WIDTH, _ = img.shape - - zoom_x = WIDTH/ORIGINAL_WIDTH - zoom_y = HEIGHT/ORIGINAL_HEIGHT - - # Adjust intrinsics. - calib_current = calib_camera.copy() - calib_current[0, 0] *= zoom_x - calib_current[0, 2] *= zoom_x - calib_current[1, 1] *= zoom_y - calib_current[1, 2] *= zoom_y - - calib_representation = ','.join([str(c) for c in calib_current.flatten()]) - - img = cv2.resize(img, (WIDTH, HEIGHT)) - big_img[:,wct*WIDTH:(wct+1)*WIDTH] = img - wct+=1 - cv2.imwrite(OUTPUT_DIR + seqname + '/' + imgnum + '.png', big_img) - f = open(OUTPUT_DIR + seqname + '/' + imgnum + '_cam.txt', 'w') - f.write(calib_representation) - f.close() - ct+=1 - -def main(_): - run_all() - - -if __name__ == '__main__': - app.run(main) diff --git a/research/struct2depth/inference.py b/research/struct2depth/inference.py deleted file mode 100644 index 042e2be17f736aa99235651452ca27931a83c9bd..0000000000000000000000000000000000000000 --- a/research/struct2depth/inference.py +++ /dev/null @@ -1,416 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Runs struct2depth at inference. Produces depth estimates, ego-motion and object motion.""" - -# Example usage: -# -# python inference.py \ -# --input_dir ~/struct2depth/kitti-raw-uncompressed/ \ -# --output_dir ~/struct2depth/output \ -# --model_ckpt ~/struct2depth/model/model-199160 -# --file_extension png \ -# --depth \ -# --egomotion true \ - - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from absl import app -from absl import flags -from absl import logging -#import matplotlib.pyplot as plt -import model -import numpy as np -import fnmatch -import tensorflow as tf -import nets -import util - -gfile = tf.gfile - -# CMAP = 'plasma' - -INFERENCE_MODE_SINGLE = 'single' # Take plain single-frame input. -INFERENCE_MODE_TRIPLETS = 'triplets' # Take image triplets as input. -# For KITTI, we just resize input images and do not perform cropping. For -# Cityscapes, the car hood and more image content has been cropped in order -# to fit aspect ratio, and remove static content from the images. This has to be -# kept at inference time. -INFERENCE_CROP_NONE = 'none' -INFERENCE_CROP_CITYSCAPES = 'cityscapes' - - -flags.DEFINE_string('output_dir', None, 'Directory to store predictions.') -flags.DEFINE_string('file_extension', 'png', 'Image data file extension of ' - 'files provided with input_dir. Also determines the output ' - 'file format of depth prediction images.') -flags.DEFINE_bool('depth', True, 'Determines if the depth prediction network ' - 'should be executed and its predictions be saved.') -flags.DEFINE_bool('egomotion', False, 'Determines if the egomotion prediction ' - 'network should be executed and its predictions be saved. If ' - 'inference is run in single inference mode, it is assumed ' - 'that files in the same directory belong in the same ' - 'sequence, and sorting them alphabetically establishes the ' - 'right temporal order.') -flags.DEFINE_string('model_ckpt', None, 'Model checkpoint to evaluate.') -flags.DEFINE_string('input_dir', None, 'Directory containing image files to ' - 'evaluate. This crawls recursively for images in the ' - 'directory, mirroring relative subdirectory structures ' - 'into the output directory.') -flags.DEFINE_string('input_list_file', None, 'Text file containing paths to ' - 'image files to process. Paths should be relative with ' - 'respect to the list file location. Relative path ' - 'structures will be mirrored in the output directory.') -flags.DEFINE_integer('batch_size', 1, 'The size of a sample batch') -flags.DEFINE_integer('img_height', 128, 'Input frame height.') -flags.DEFINE_integer('img_width', 416, 'Input frame width.') -flags.DEFINE_integer('seq_length', 3, 'Number of frames in sequence.') -flags.DEFINE_enum('architecture', nets.RESNET, nets.ARCHITECTURES, - 'Defines the architecture to use for the depth prediction ' - 'network. Defaults to ResNet-based encoder and accompanying ' - 'decoder.') -flags.DEFINE_boolean('imagenet_norm', True, 'Whether to normalize the input ' - 'images channel-wise so that they match the distribution ' - 'most ImageNet-models were trained on.') -flags.DEFINE_bool('use_skip', True, 'Whether to use skip connections in the ' - 'encoder-decoder architecture.') -flags.DEFINE_bool('joint_encoder', False, 'Whether to share parameters ' - 'between the depth and egomotion networks by using a joint ' - 'encoder architecture. The egomotion network is then ' - 'operating only on the hidden representation provided by the ' - 'joint encoder.') -flags.DEFINE_bool('shuffle', False, 'Whether to shuffle the order in which ' - 'images are processed.') -flags.DEFINE_bool('flip', False, 'Whether images should be flipped as well as ' - 'resulting predictions (for test-time augmentation). This ' - 'currently applies to the depth network only.') -flags.DEFINE_enum('inference_mode', INFERENCE_MODE_SINGLE, - [INFERENCE_MODE_SINGLE, - INFERENCE_MODE_TRIPLETS], - 'Whether to use triplet mode for inference, which accepts ' - 'triplets instead of single frames.') -flags.DEFINE_enum('inference_crop', INFERENCE_CROP_NONE, - [INFERENCE_CROP_NONE, - INFERENCE_CROP_CITYSCAPES], - 'Whether to apply a Cityscapes-specific crop on the input ' - 'images first before running inference.') -flags.DEFINE_bool('use_masks', False, 'Whether to mask out potentially ' - 'moving objects when feeding image input to the egomotion ' - 'network. This might improve odometry results when using ' - 'a motion model. For this, pre-computed segmentation ' - 'masks have to be available for every image, with the ' - 'background being zero.') - -FLAGS = flags.FLAGS - -flags.mark_flag_as_required('output_dir') -flags.mark_flag_as_required('model_ckpt') - - -def _run_inference(output_dir=None, - file_extension='png', - depth=True, - egomotion=False, - model_ckpt=None, - input_dir=None, - input_list_file=None, - batch_size=1, - img_height=128, - img_width=416, - seq_length=3, - architecture=nets.RESNET, - imagenet_norm=True, - use_skip=True, - joint_encoder=True, - shuffle=False, - flip_for_depth=False, - inference_mode=INFERENCE_MODE_SINGLE, - inference_crop=INFERENCE_CROP_NONE, - use_masks=False): - """Runs inference. Refer to flags in inference.py for details.""" - inference_model = model.Model(is_training=False, - batch_size=batch_size, - img_height=img_height, - img_width=img_width, - seq_length=seq_length, - architecture=architecture, - imagenet_norm=imagenet_norm, - use_skip=use_skip, - joint_encoder=joint_encoder) - vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt) - saver = tf.train.Saver(vars_to_restore) - sv = tf.train.Supervisor(logdir='/tmp/', saver=None) - with sv.managed_session() as sess: - saver.restore(sess, model_ckpt) - if not gfile.Exists(output_dir): - gfile.MakeDirs(output_dir) - logging.info('Predictions will be saved in %s.', output_dir) - - # Collect all images to run inference on. - im_files, basepath_in = collect_input_images(input_dir, input_list_file, - file_extension) - if shuffle: - logging.info('Shuffling data...') - np.random.shuffle(im_files) - logging.info('Running inference on %d files.', len(im_files)) - - # Create missing output folders and pre-compute target directories. - output_dirs = create_output_dirs(im_files, basepath_in, output_dir) - - # Run depth prediction network. - if depth: - im_batch = [] - for i in range(len(im_files)): - if i % 100 == 0: - logging.info('%s of %s files processed.', i, len(im_files)) - - # Read image and run inference. - if inference_mode == INFERENCE_MODE_SINGLE: - if inference_crop == INFERENCE_CROP_NONE: - im = util.load_image(im_files[i], resize=(img_width, img_height)) - elif inference_crop == INFERENCE_CROP_CITYSCAPES: - im = util.crop_cityscapes(util.load_image(im_files[i]), - resize=(img_width, img_height)) - elif inference_mode == INFERENCE_MODE_TRIPLETS: - im = util.load_image(im_files[i], resize=(img_width * 3, img_height)) - im = im[:, img_width:img_width*2] - if flip_for_depth: - im = np.flip(im, axis=1) - im_batch.append(im) - - if len(im_batch) == batch_size or i == len(im_files) - 1: - # Call inference on batch. - for _ in range(batch_size - len(im_batch)): # Fill up batch. - im_batch.append(np.zeros(shape=(img_height, img_width, 3), - dtype=np.float32)) - im_batch = np.stack(im_batch, axis=0) - est_depth = inference_model.inference_depth(im_batch, sess) - if flip_for_depth: - est_depth = np.flip(est_depth, axis=2) - im_batch = np.flip(im_batch, axis=2) - - for j in range(len(im_batch)): - color_map = util.normalize_depth_for_display( - np.squeeze(est_depth[j])) - visualization = np.concatenate((im_batch[j], color_map), axis=0) - # Save raw prediction and color visualization. Extract filename - # without extension from full path: e.g. path/to/input_dir/folder1/ - # file1.png -> file1 - k = i - len(im_batch) + 1 + j - filename_root = os.path.splitext(os.path.basename(im_files[k]))[0] - pref = '_flip' if flip_for_depth else '' - output_raw = os.path.join( - output_dirs[k], filename_root + pref + '.npy') - output_vis = os.path.join( - output_dirs[k], filename_root + pref + '.png') - with gfile.Open(output_raw, 'wb') as f: - np.save(f, est_depth[j]) - util.save_image(output_vis, visualization, file_extension) - im_batch = [] - - # Run egomotion network. - if egomotion: - if inference_mode == INFERENCE_MODE_SINGLE: - # Run regular egomotion inference loop. - input_image_seq = [] - input_seg_seq = [] - current_sequence_dir = None - current_output_handle = None - for i in range(len(im_files)): - sequence_dir = os.path.dirname(im_files[i]) - if sequence_dir != current_sequence_dir: - # Assume start of a new sequence, since this image lies in a - # different directory than the previous ones. - # Clear egomotion input buffer. - output_filepath = os.path.join(output_dirs[i], 'egomotion.txt') - if current_output_handle is not None: - current_output_handle.close() - current_sequence_dir = sequence_dir - logging.info('Writing egomotion sequence to %s.', output_filepath) - current_output_handle = gfile.Open(output_filepath, 'w') - input_image_seq = [] - im = util.load_image(im_files[i], resize=(img_width, img_height)) - input_image_seq.append(im) - if use_masks: - im_seg_path = im_files[i].replace('.%s' % file_extension, - '-seg.%s' % file_extension) - if not gfile.Exists(im_seg_path): - raise ValueError('No segmentation mask %s has been found for ' - 'image %s. If none are available, disable ' - 'use_masks.' % (im_seg_path, im_files[i])) - input_seg_seq.append(util.load_image(im_seg_path, - resize=(img_width, img_height), - interpolation='nn')) - - if len(input_image_seq) < seq_length: # Buffer not filled yet. - continue - if len(input_image_seq) > seq_length: # Remove oldest entry. - del input_image_seq[0] - if use_masks: - del input_seg_seq[0] - - input_image_stack = np.concatenate(input_image_seq, axis=2) - input_image_stack = np.expand_dims(input_image_stack, axis=0) - if use_masks: - input_image_stack = mask_image_stack(input_image_stack, - input_seg_seq) - est_egomotion = np.squeeze(inference_model.inference_egomotion( - input_image_stack, sess)) - egomotion_str = [] - for j in range(seq_length - 1): - egomotion_str.append(','.join([str(d) for d in est_egomotion[j]])) - current_output_handle.write( - str(i) + ' ' + ' '.join(egomotion_str) + '\n') - if current_output_handle is not None: - current_output_handle.close() - elif inference_mode == INFERENCE_MODE_TRIPLETS: - written_before = [] - for i in range(len(im_files)): - im = util.load_image(im_files[i], resize=(img_width * 3, img_height)) - input_image_stack = np.concatenate( - [im[:, :img_width], im[:, img_width:img_width*2], - im[:, img_width*2:]], axis=2) - input_image_stack = np.expand_dims(input_image_stack, axis=0) - if use_masks: - im_seg_path = im_files[i].replace('.%s' % file_extension, - '-seg.%s' % file_extension) - if not gfile.Exists(im_seg_path): - raise ValueError('No segmentation mask %s has been found for ' - 'image %s. If none are available, disable ' - 'use_masks.' % (im_seg_path, im_files[i])) - seg = util.load_image(im_seg_path, - resize=(img_width * 3, img_height), - interpolation='nn') - input_seg_seq = [seg[:, :img_width], seg[:, img_width:img_width*2], - seg[:, img_width*2:]] - input_image_stack = mask_image_stack(input_image_stack, - input_seg_seq) - est_egomotion = inference_model.inference_egomotion( - input_image_stack, sess) - est_egomotion = np.squeeze(est_egomotion) - egomotion_1_2 = ','.join([str(d) for d in est_egomotion[0]]) - egomotion_2_3 = ','.join([str(d) for d in est_egomotion[1]]) - - output_filepath = os.path.join(output_dirs[i], 'egomotion.txt') - file_mode = 'w' if output_filepath not in written_before else 'a' - with gfile.Open(output_filepath, file_mode) as current_output_handle: - current_output_handle.write(str(i) + ' ' + egomotion_1_2 + ' ' + - egomotion_2_3 + '\n') - written_before.append(output_filepath) - logging.info('Done.') - - -def mask_image_stack(input_image_stack, input_seg_seq): - """Masks out moving image contents by using the segmentation masks provided. - - This can lead to better odometry accuracy for motion models, but is optional - to use. Is only called if use_masks is enabled. - Args: - input_image_stack: The input image stack of shape (1, H, W, seq_length). - input_seg_seq: List of segmentation masks with seq_length elements of shape - (H, W, C) for some number of channels C. - - Returns: - Input image stack with detections provided by segmentation mask removed. - """ - background = [mask == 0 for mask in input_seg_seq] - background = reduce(lambda m1, m2: m1 & m2, background) - # If masks are RGB, assume all channels to be the same. Reduce to the first. - if background.ndim == 3 and background.shape[2] > 1: - background = np.expand_dims(background[:, :, 0], axis=2) - elif background.ndim == 2: # Expand. - background = np.expand_dism(background, axis=2) - # background is now of shape (H, W, 1). - background_stack = np.tile(background, [1, 1, input_image_stack.shape[3]]) - return np.multiply(input_image_stack, background_stack) - - -def collect_input_images(input_dir, input_list_file, file_extension): - """Collects all input images that are to be processed.""" - if input_dir is not None: - im_files = _recursive_glob(input_dir, '*.' + file_extension) - basepath_in = os.path.normpath(input_dir) - elif input_list_file is not None: - im_files = util.read_text_lines(input_list_file) - basepath_in = os.path.dirname(input_list_file) - im_files = [os.path.join(basepath_in, f) for f in im_files] - im_files = [f for f in im_files if 'disp' not in f and '-seg' not in f and - '-fseg' not in f and '-flip' not in f] - return sorted(im_files), basepath_in - - -def create_output_dirs(im_files, basepath_in, output_dir): - """Creates required directories, and returns output dir for each file.""" - output_dirs = [] - for i in range(len(im_files)): - relative_folder_in = os.path.relpath( - os.path.dirname(im_files[i]), basepath_in) - absolute_folder_out = os.path.join(output_dir, relative_folder_in) - if not gfile.IsDirectory(absolute_folder_out): - gfile.MakeDirs(absolute_folder_out) - output_dirs.append(absolute_folder_out) - return output_dirs - - -def _recursive_glob(treeroot, pattern): - results = [] - for base, _, files in os.walk(treeroot): - files = fnmatch.filter(files, pattern) - results.extend(os.path.join(base, f) for f in files) - return results - - -def main(_): - #if (flags.input_dir is None) == (flags.input_list_file is None): - # raise ValueError('Exactly one of either input_dir or input_list_file has ' - # 'to be provided.') - #if not flags.depth and not flags.egomotion: - # raise ValueError('At least one of the depth and egomotion network has to ' - # 'be called for inference.') - #if (flags.inference_mode == inference_lib.INFERENCE_MODE_TRIPLETS and - # flags.seq_length != 3): - # raise ValueError('For sequence lengths other than three, single inference ' - # 'mode has to be used.') - - _run_inference(output_dir=FLAGS.output_dir, - file_extension=FLAGS.file_extension, - depth=FLAGS.depth, - egomotion=FLAGS.egomotion, - model_ckpt=FLAGS.model_ckpt, - input_dir=FLAGS.input_dir, - input_list_file=FLAGS.input_list_file, - batch_size=FLAGS.batch_size, - img_height=FLAGS.img_height, - img_width=FLAGS.img_width, - seq_length=FLAGS.seq_length, - architecture=FLAGS.architecture, - imagenet_norm=FLAGS.imagenet_norm, - use_skip=FLAGS.use_skip, - joint_encoder=FLAGS.joint_encoder, - shuffle=FLAGS.shuffle, - flip_for_depth=FLAGS.flip, - inference_mode=FLAGS.inference_mode, - inference_crop=FLAGS.inference_crop, - use_masks=FLAGS.use_masks) - - -if __name__ == '__main__': - app.run(main) diff --git a/research/struct2depth/model.py b/research/struct2depth/model.py deleted file mode 100644 index 873be26bb941626c93a7bdbdc9052ede51aeaa8c..0000000000000000000000000000000000000000 --- a/research/struct2depth/model.py +++ /dev/null @@ -1,848 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Build model for inference or training.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import logging -import numpy as np -import tensorflow as tf - -import nets -import project -import reader -import util - -gfile = tf.gfile -slim = tf.contrib.slim - -NUM_SCALES = 4 - - -class Model(object): - """Model code based on SfMLearner.""" - - def __init__(self, - data_dir=None, - file_extension='png', - is_training=True, - learning_rate=0.0002, - beta1=0.9, - reconstr_weight=0.85, - smooth_weight=0.05, - ssim_weight=0.15, - icp_weight=0.0, - batch_size=4, - img_height=128, - img_width=416, - seq_length=3, - architecture=nets.RESNET, - imagenet_norm=True, - weight_reg=0.05, - exhaustive_mode=False, - random_scale_crop=False, - flipping_mode=reader.FLIP_RANDOM, - random_color=True, - depth_upsampling=True, - depth_normalization=True, - compute_minimum_loss=True, - use_skip=True, - joint_encoder=True, - build_sum=True, - shuffle=True, - input_file='train', - handle_motion=False, - equal_weighting=False, - size_constraint_weight=0.0, - train_global_scale_var=True): - self.data_dir = data_dir - self.file_extension = file_extension - self.is_training = is_training - self.learning_rate = learning_rate - self.reconstr_weight = reconstr_weight - self.smooth_weight = smooth_weight - self.ssim_weight = ssim_weight - self.icp_weight = icp_weight - self.beta1 = beta1 - self.batch_size = batch_size - self.img_height = img_height - self.img_width = img_width - self.seq_length = seq_length - self.architecture = architecture - self.imagenet_norm = imagenet_norm - self.weight_reg = weight_reg - self.exhaustive_mode = exhaustive_mode - self.random_scale_crop = random_scale_crop - self.flipping_mode = flipping_mode - self.random_color = random_color - self.depth_upsampling = depth_upsampling - self.depth_normalization = depth_normalization - self.compute_minimum_loss = compute_minimum_loss - self.use_skip = use_skip - self.joint_encoder = joint_encoder - self.build_sum = build_sum - self.shuffle = shuffle - self.input_file = input_file - self.handle_motion = handle_motion - self.equal_weighting = equal_weighting - self.size_constraint_weight = size_constraint_weight - self.train_global_scale_var = train_global_scale_var - - logging.info('data_dir: %s', data_dir) - logging.info('file_extension: %s', file_extension) - logging.info('is_training: %s', is_training) - logging.info('learning_rate: %s', learning_rate) - logging.info('reconstr_weight: %s', reconstr_weight) - logging.info('smooth_weight: %s', smooth_weight) - logging.info('ssim_weight: %s', ssim_weight) - logging.info('icp_weight: %s', icp_weight) - logging.info('size_constraint_weight: %s', size_constraint_weight) - logging.info('beta1: %s', beta1) - logging.info('batch_size: %s', batch_size) - logging.info('img_height: %s', img_height) - logging.info('img_width: %s', img_width) - logging.info('seq_length: %s', seq_length) - logging.info('architecture: %s', architecture) - logging.info('imagenet_norm: %s', imagenet_norm) - logging.info('weight_reg: %s', weight_reg) - logging.info('exhaustive_mode: %s', exhaustive_mode) - logging.info('random_scale_crop: %s', random_scale_crop) - logging.info('flipping_mode: %s', flipping_mode) - logging.info('random_color: %s', random_color) - logging.info('depth_upsampling: %s', depth_upsampling) - logging.info('depth_normalization: %s', depth_normalization) - logging.info('compute_minimum_loss: %s', compute_minimum_loss) - logging.info('use_skip: %s', use_skip) - logging.info('joint_encoder: %s', joint_encoder) - logging.info('build_sum: %s', build_sum) - logging.info('shuffle: %s', shuffle) - logging.info('input_file: %s', input_file) - logging.info('handle_motion: %s', handle_motion) - logging.info('equal_weighting: %s', equal_weighting) - logging.info('train_global_scale_var: %s', train_global_scale_var) - - if self.size_constraint_weight > 0 or not is_training: - self.global_scale_var = tf.Variable( - 0.1, name='global_scale_var', - trainable=self.is_training and train_global_scale_var, - dtype=tf.float32, - constraint=lambda x: tf.clip_by_value(x, 0, np.infty)) - - if self.is_training: - self.reader = reader.DataReader(self.data_dir, self.batch_size, - self.img_height, self.img_width, - self.seq_length, NUM_SCALES, - self.file_extension, - self.random_scale_crop, - self.flipping_mode, - self.random_color, - self.imagenet_norm, - self.shuffle, - self.input_file) - self.build_train_graph() - else: - self.build_depth_test_graph() - self.build_egomotion_test_graph() - if self.handle_motion: - self.build_objectmotion_test_graph() - - # At this point, the model is ready. Print some info on model params. - util.count_parameters() - - def build_train_graph(self): - self.build_inference_for_training() - self.build_loss() - self.build_train_op() - if self.build_sum: - self.build_summaries() - - def build_inference_for_training(self): - """Invokes depth and ego-motion networks and computes clouds if needed.""" - (self.image_stack, self.image_stack_norm, self.seg_stack, - self.intrinsic_mat, self.intrinsic_mat_inv) = self.reader.read_data() - with tf.variable_scope('depth_prediction'): - # Organized by ...[i][scale]. Note that the order is flipped in - # variables in build_loss() below. - self.disp = {} - self.depth = {} - self.depth_upsampled = {} - self.inf_loss = 0.0 - # Organized by [i]. - disp_bottlenecks = [None] * self.seq_length - - if self.icp_weight > 0: - self.cloud = {} - for i in range(self.seq_length): - image = self.image_stack_norm[:, :, :, 3 * i:3 * (i + 1)] - - multiscale_disps_i, disp_bottlenecks[i] = nets.disp_net( - self.architecture, image, self.use_skip, - self.weight_reg, True) - multiscale_depths_i = [1.0 / d for d in multiscale_disps_i] - self.disp[i] = multiscale_disps_i - self.depth[i] = multiscale_depths_i - if self.depth_upsampling: - self.depth_upsampled[i] = [] - # Upsample low-resolution depth maps using differentiable bilinear - # interpolation. - for s in range(len(multiscale_depths_i)): - self.depth_upsampled[i].append(tf.image.resize_bilinear( - multiscale_depths_i[s], [self.img_height, self.img_width], - align_corners=True)) - - if self.icp_weight > 0: - multiscale_clouds_i = [ - project.get_cloud(d, - self.intrinsic_mat_inv[:, s, :, :], - name='cloud%d_%d' % (s, i)) - for (s, d) in enumerate(multiscale_depths_i) - ] - self.cloud[i] = multiscale_clouds_i - # Reuse the same depth graph for all images. - tf.get_variable_scope().reuse_variables() - - if self.handle_motion: - # Define egomotion network. This network can see the whole scene except - # for any moving objects as indicated by the provided segmentation masks. - # To avoid the network getting clues of motion by tracking those masks, we - # define the segmentation masks as the union temporally. - with tf.variable_scope('egomotion_prediction'): - base_input = self.image_stack_norm # (B, H, W, 9) - seg_input = self.seg_stack # (B, H, W, 9) - ref_zero = tf.constant(0, dtype=tf.uint8) - # Motion model is currently defined for three-frame sequences. - object_mask1 = tf.equal(seg_input[:, :, :, 0], ref_zero) - object_mask2 = tf.equal(seg_input[:, :, :, 3], ref_zero) - object_mask3 = tf.equal(seg_input[:, :, :, 6], ref_zero) - mask_complete = tf.expand_dims(tf.logical_and( # (B, H, W, 1) - tf.logical_and(object_mask1, object_mask2), object_mask3), axis=3) - mask_complete = tf.tile(mask_complete, (1, 1, 1, 9)) # (B, H, W, 9) - # Now mask out base_input. - self.mask_complete = tf.to_float(mask_complete) - self.base_input_masked = base_input * self.mask_complete - self.egomotion = nets.egomotion_net( - image_stack=self.base_input_masked, - disp_bottleneck_stack=None, - joint_encoder=False, - seq_length=self.seq_length, - weight_reg=self.weight_reg) - - # Define object motion network for refinement. This network only sees - # one object at a time over the whole sequence, and tries to estimate its - # motion. The sequence of images are the respective warped frames. - - # For each scale, contains batch_size elements of shape (N, 2, 6). - self.object_transforms = {} - # For each scale, contains batch_size elements of shape (N, H, W, 9). - self.object_masks = {} - self.object_masks_warped = {} - # For each scale, contains batch_size elements of size N. - self.object_ids = {} - - self.egomotions_seq = {} - self.warped_seq = {} - self.inputs_objectmotion_net = {} - with tf.variable_scope('objectmotion_prediction'): - # First, warp raw images according to overall egomotion. - for s in range(NUM_SCALES): - self.warped_seq[s] = [] - self.egomotions_seq[s] = [] - for source_index in range(self.seq_length): - egomotion_mat_i_1 = project.get_transform_mat( - self.egomotion, source_index, 1) - warped_image_i_1, _ = ( - project.inverse_warp( - self.image_stack[ - :, :, :, source_index*3:(source_index+1)*3], - self.depth_upsampled[1][s], - egomotion_mat_i_1, - self.intrinsic_mat[:, 0, :, :], - self.intrinsic_mat_inv[:, 0, :, :])) - - self.warped_seq[s].append(warped_image_i_1) - self.egomotions_seq[s].append(egomotion_mat_i_1) - - # Second, for every object in the segmentation mask, take its mask and - # warp it according to the egomotion estimate. Then put a threshold to - # binarize the warped result. Use this mask to mask out background and - # other objects, and pass the filtered image to the object motion - # network. - self.object_transforms[s] = [] - self.object_masks[s] = [] - self.object_ids[s] = [] - self.object_masks_warped[s] = [] - self.inputs_objectmotion_net[s] = {} - - for i in range(self.batch_size): - seg_sequence = self.seg_stack[i] # (H, W, 9=3*3) - object_ids = tf.unique(tf.reshape(seg_sequence, [-1]))[0] - self.object_ids[s].append(object_ids) - color_stack = [] - mask_stack = [] - mask_stack_warped = [] - for j in range(self.seq_length): - current_image = self.warped_seq[s][j][i] # (H, W, 3) - current_seg = seg_sequence[:, :, j * 3:(j+1) * 3] # (H, W, 3) - - def process_obj_mask_warp(obj_id): - """Performs warping of the individual object masks.""" - obj_mask = tf.to_float(tf.equal(current_seg, obj_id)) - # Warp obj_mask according to overall egomotion. - obj_mask_warped, _ = ( - project.inverse_warp( - tf.expand_dims(obj_mask, axis=0), - # Middle frame, highest scale, batch element i: - tf.expand_dims(self.depth_upsampled[1][s][i], axis=0), - # Matrix for warping j into middle frame, batch elem. i: - tf.expand_dims(self.egomotions_seq[s][j][i], axis=0), - tf.expand_dims(self.intrinsic_mat[i, 0, :, :], axis=0), - tf.expand_dims(self.intrinsic_mat_inv[i, 0, :, :], - axis=0))) - obj_mask_warped = tf.squeeze(obj_mask_warped) - obj_mask_binarized = tf.greater( # Threshold to binarize mask. - obj_mask_warped, tf.constant(0.5)) - return tf.to_float(obj_mask_binarized) - - def process_obj_mask(obj_id): - """Returns the individual object masks separately.""" - return tf.to_float(tf.equal(current_seg, obj_id)) - object_masks = tf.map_fn( # (N, H, W, 3) - process_obj_mask, object_ids, dtype=tf.float32) - - if self.size_constraint_weight > 0: - # The object segmentation masks are all in object_masks. - # We need to measure the height of every of them, and get the - # approximate distance. - - # self.depth_upsampled of shape (seq_length, scale, B, H, W). - depth_pred = self.depth_upsampled[j][s][i] # (H, W) - def get_losses(obj_mask): - """Get motion constraint loss.""" - # Find height of segment. - coords = tf.where(tf.greater( # Shape (num_true, 2=yx) - obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32))) - y_max = tf.reduce_max(coords[:, 0]) - y_min = tf.reduce_min(coords[:, 0]) - seg_height = y_max - y_min - f_y = self.intrinsic_mat[i, 0, 1, 1] - approx_depth = ((f_y * self.global_scale_var) / - tf.to_float(seg_height)) - reference_pred = tf.boolean_mask( - depth_pred, tf.greater( - tf.reshape(obj_mask[:, :, 0], - (self.img_height, self.img_width, 1)), - tf.constant(0.5, dtype=tf.float32))) - - # Establish loss on approx_depth, a scalar, and - # reference_pred, our dense prediction. Normalize both to - # prevent degenerative depth shrinking. - global_mean_depth_pred = tf.reduce_mean(depth_pred) - reference_pred /= global_mean_depth_pred - approx_depth /= global_mean_depth_pred - spatial_err = tf.abs(reference_pred - approx_depth) - mean_spatial_err = tf.reduce_mean(spatial_err) - return mean_spatial_err - - losses = tf.map_fn( - get_losses, object_masks, dtype=tf.float32) - self.inf_loss += tf.reduce_mean(losses) - object_masks_warped = tf.map_fn( # (N, H, W, 3) - process_obj_mask_warp, object_ids, dtype=tf.float32) - filtered_images = tf.map_fn( - lambda mask: current_image * mask, object_masks_warped, - dtype=tf.float32) # (N, H, W, 3) - color_stack.append(filtered_images) - mask_stack.append(object_masks) - mask_stack_warped.append(object_masks_warped) - - # For this batch-element, if there are N moving objects, - # color_stack, mask_stack and mask_stack_warped contain both - # seq_length elements of shape (N, H, W, 3). - # We can now concatenate them on the last axis, creating a tensor of - # (N, H, W, 3*3 = 9), and, assuming N does not get too large so that - # we have enough memory, pass them in a single batch to the object - # motion network. - mask_stack = tf.concat(mask_stack, axis=3) # (N, H, W, 9) - mask_stack_warped = tf.concat(mask_stack_warped, axis=3) - color_stack = tf.concat(color_stack, axis=3) # (N, H, W, 9) - all_transforms = nets.objectmotion_net( - # We cut the gradient flow here as the object motion gradient - # should have no saying in how the egomotion network behaves. - # One could try just stopping the gradient for egomotion, but - # not for the depth prediction network. - image_stack=tf.stop_gradient(color_stack), - disp_bottleneck_stack=None, - joint_encoder=False, # Joint encoder not supported. - seq_length=self.seq_length, - weight_reg=self.weight_reg) - # all_transforms of shape (N, 2, 6). - self.object_transforms[s].append(all_transforms) - self.object_masks[s].append(mask_stack) - self.object_masks_warped[s].append(mask_stack_warped) - self.inputs_objectmotion_net[s][i] = color_stack - tf.get_variable_scope().reuse_variables() - else: - # Don't handle motion, classic model formulation. - with tf.name_scope('egomotion_prediction'): - if self.joint_encoder: - # Re-arrange disp_bottleneck_stack to be of shape - # [B, h_hid, w_hid, c_hid * seq_length]. Currently, it is a list with - # seq_length elements, each of dimension [B, h_hid, w_hid, c_hid]. - disp_bottleneck_stack = tf.concat(disp_bottlenecks, axis=3) - else: - disp_bottleneck_stack = None - self.egomotion = nets.egomotion_net( - image_stack=self.image_stack_norm, - disp_bottleneck_stack=disp_bottleneck_stack, - joint_encoder=self.joint_encoder, - seq_length=self.seq_length, - weight_reg=self.weight_reg) - - def build_loss(self): - """Adds ops for computing loss.""" - with tf.name_scope('compute_loss'): - self.reconstr_loss = 0 - self.smooth_loss = 0 - self.ssim_loss = 0 - self.icp_transform_loss = 0 - self.icp_residual_loss = 0 - - # self.images is organized by ...[scale][B, h, w, seq_len * 3]. - self.images = [None for _ in range(NUM_SCALES)] - # Following nested lists are organized by ...[scale][source-target]. - self.warped_image = [{} for _ in range(NUM_SCALES)] - self.warp_mask = [{} for _ in range(NUM_SCALES)] - self.warp_error = [{} for _ in range(NUM_SCALES)] - self.ssim_error = [{} for _ in range(NUM_SCALES)] - self.icp_transform = [{} for _ in range(NUM_SCALES)] - self.icp_residual = [{} for _ in range(NUM_SCALES)] - - self.middle_frame_index = util.get_seq_middle(self.seq_length) - - # Compute losses at each scale. - for s in range(NUM_SCALES): - # Scale image stack. - if s == 0: # Just as a precaution. TF often has interpolation bugs. - self.images[s] = self.image_stack - else: - height_s = int(self.img_height / (2**s)) - width_s = int(self.img_width / (2**s)) - self.images[s] = tf.image.resize_bilinear( - self.image_stack, [height_s, width_s], align_corners=True) - - # Smoothness. - if self.smooth_weight > 0: - for i in range(self.seq_length): - # When computing minimum loss, use the depth map from the middle - # frame only. - if not self.compute_minimum_loss or i == self.middle_frame_index: - disp_smoothing = self.disp[i][s] - if self.depth_normalization: - # Perform depth normalization, dividing by the mean. - mean_disp = tf.reduce_mean(disp_smoothing, axis=[1, 2, 3], - keep_dims=True) - disp_input = disp_smoothing / mean_disp - else: - disp_input = disp_smoothing - scaling_f = (1.0 if self.equal_weighting else 1.0 / (2**s)) - self.smooth_loss += scaling_f * self.depth_smoothness( - disp_input, self.images[s][:, :, :, 3 * i:3 * (i + 1)]) - - self.debug_all_warped_image_batches = [] - for i in range(self.seq_length): - for j in range(self.seq_length): - if i == j: - continue - - # When computing minimum loss, only consider the middle frame as - # target. - if self.compute_minimum_loss and j != self.middle_frame_index: - continue - # We only consider adjacent frames, unless either - # compute_minimum_loss is on (where the middle frame is matched with - # all other frames) or exhaustive_mode is on (where all frames are - # matched with each other). - if (not self.compute_minimum_loss and not self.exhaustive_mode and - abs(i - j) != 1): - continue - - selected_scale = 0 if self.depth_upsampling else s - source = self.images[selected_scale][:, :, :, 3 * i:3 * (i + 1)] - target = self.images[selected_scale][:, :, :, 3 * j:3 * (j + 1)] - - if self.depth_upsampling: - target_depth = self.depth_upsampled[j][s] - else: - target_depth = self.depth[j][s] - - key = '%d-%d' % (i, j) - - if self.handle_motion: - # self.seg_stack of shape (B, H, W, 9). - # target_depth corresponds to middle frame, of shape (B, H, W, 1). - - # Now incorporate the other warping results, performed according - # to the object motion network's predictions. - # self.object_masks batch_size elements of (N, H, W, 9). - # self.object_masks_warped batch_size elements of (N, H, W, 9). - # self.object_transforms batch_size elements of (N, 2, 6). - self.all_batches = [] - for batch_s in range(self.batch_size): - # To warp i into j, first take the base warping (this is the - # full image i warped into j using only the egomotion estimate). - base_warping = self.warped_seq[s][i][batch_s] - transform_matrices_thisbatch = tf.map_fn( - lambda transform: project.get_transform_mat( - tf.expand_dims(transform, axis=0), i, j)[0], - self.object_transforms[0][batch_s]) - - def inverse_warp_wrapper(matrix): - """Wrapper for inverse warping method.""" - warp_image, _ = ( - project.inverse_warp( - tf.expand_dims(base_warping, axis=0), - tf.expand_dims(target_depth[batch_s], axis=0), - tf.expand_dims(matrix, axis=0), - tf.expand_dims(self.intrinsic_mat[ - batch_s, selected_scale, :, :], axis=0), - tf.expand_dims(self.intrinsic_mat_inv[ - batch_s, selected_scale, :, :], axis=0))) - return warp_image - warped_images_thisbatch = tf.map_fn( - inverse_warp_wrapper, transform_matrices_thisbatch, - dtype=tf.float32) - warped_images_thisbatch = warped_images_thisbatch[:, 0, :, :, :] - # warped_images_thisbatch is now of shape (N, H, W, 9). - - # Combine warped frames into a single one, using the object - # masks. Result should be (1, 128, 416, 3). - # Essentially, we here want to sum them all up, filtered by the - # respective object masks. - mask_base_valid_source = tf.equal( - self.seg_stack[batch_s, :, :, i*3:(i+1)*3], - tf.constant(0, dtype=tf.uint8)) - mask_base_valid_target = tf.equal( - self.seg_stack[batch_s, :, :, j*3:(j+1)*3], - tf.constant(0, dtype=tf.uint8)) - mask_valid = tf.logical_and( - mask_base_valid_source, mask_base_valid_target) - self.base_warping = base_warping * tf.to_float(mask_valid) - background = tf.expand_dims(self.base_warping, axis=0) - def construct_const_filter_tensor(obj_id): - return tf.fill( - dims=[self.img_height, self.img_width, 3], - value=tf.sign(obj_id)) * tf.to_float( - tf.equal(self.seg_stack[batch_s, :, :, 3:6], - tf.cast(obj_id, dtype=tf.uint8))) - filter_tensor = tf.map_fn( - construct_const_filter_tensor, - tf.to_float(self.object_ids[s][batch_s])) - filter_tensor = tf.stack(filter_tensor, axis=0) - objects_to_add = tf.reduce_sum( - tf.multiply(warped_images_thisbatch, filter_tensor), - axis=0, keepdims=True) - combined = background + objects_to_add - self.all_batches.append(combined) - # Now of shape (B, 128, 416, 3). - self.warped_image[s][key] = tf.concat(self.all_batches, axis=0) - - else: - # Don't handle motion, classic model formulation. - egomotion_mat_i_j = project.get_transform_mat( - self.egomotion, i, j) - # Inverse warp the source image to the target image frame for - # photometric consistency loss. - self.warped_image[s][key], self.warp_mask[s][key] = ( - project.inverse_warp( - source, - target_depth, - egomotion_mat_i_j, - self.intrinsic_mat[:, selected_scale, :, :], - self.intrinsic_mat_inv[:, selected_scale, :, :])) - - # Reconstruction loss. - self.warp_error[s][key] = tf.abs(self.warped_image[s][key] - target) - if not self.compute_minimum_loss: - self.reconstr_loss += tf.reduce_mean( - self.warp_error[s][key] * self.warp_mask[s][key]) - # SSIM. - if self.ssim_weight > 0: - self.ssim_error[s][key] = self.ssim(self.warped_image[s][key], - target) - # TODO(rezama): This should be min_pool2d(). - if not self.compute_minimum_loss: - ssim_mask = slim.avg_pool2d(self.warp_mask[s][key], 3, 1, - 'VALID') - self.ssim_loss += tf.reduce_mean( - self.ssim_error[s][key] * ssim_mask) - - # If the minimum loss should be computed, the loss calculation has been - # postponed until here. - if self.compute_minimum_loss: - for frame_index in range(self.middle_frame_index): - key1 = '%d-%d' % (frame_index, self.middle_frame_index) - key2 = '%d-%d' % (self.seq_length - frame_index - 1, - self.middle_frame_index) - logging.info('computing min error between %s and %s', key1, key2) - min_error = tf.minimum(self.warp_error[s][key1], - self.warp_error[s][key2]) - self.reconstr_loss += tf.reduce_mean(min_error) - if self.ssim_weight > 0: # Also compute the minimum SSIM loss. - min_error_ssim = tf.minimum(self.ssim_error[s][key1], - self.ssim_error[s][key2]) - self.ssim_loss += tf.reduce_mean(min_error_ssim) - - # Build the total loss as composed of L1 reconstruction, SSIM, smoothing - # and object size constraint loss as appropriate. - self.reconstr_loss *= self.reconstr_weight - self.total_loss = self.reconstr_loss - if self.smooth_weight > 0: - self.smooth_loss *= self.smooth_weight - self.total_loss += self.smooth_loss - if self.ssim_weight > 0: - self.ssim_loss *= self.ssim_weight - self.total_loss += self.ssim_loss - if self.size_constraint_weight > 0: - self.inf_loss *= self.size_constraint_weight - self.total_loss += self.inf_loss - - def gradient_x(self, img): - return img[:, :, :-1, :] - img[:, :, 1:, :] - - def gradient_y(self, img): - return img[:, :-1, :, :] - img[:, 1:, :, :] - - def depth_smoothness(self, depth, img): - """Computes image-aware depth smoothness loss.""" - depth_dx = self.gradient_x(depth) - depth_dy = self.gradient_y(depth) - image_dx = self.gradient_x(img) - image_dy = self.gradient_y(img) - weights_x = tf.exp(-tf.reduce_mean(tf.abs(image_dx), 3, keepdims=True)) - weights_y = tf.exp(-tf.reduce_mean(tf.abs(image_dy), 3, keepdims=True)) - smoothness_x = depth_dx * weights_x - smoothness_y = depth_dy * weights_y - return tf.reduce_mean(abs(smoothness_x)) + tf.reduce_mean(abs(smoothness_y)) - - def ssim(self, x, y): - """Computes a differentiable structured image similarity measure.""" - c1 = 0.01**2 # As defined in SSIM to stabilize div. by small denominator. - c2 = 0.03**2 - mu_x = slim.avg_pool2d(x, 3, 1, 'VALID') - mu_y = slim.avg_pool2d(y, 3, 1, 'VALID') - sigma_x = slim.avg_pool2d(x**2, 3, 1, 'VALID') - mu_x**2 - sigma_y = slim.avg_pool2d(y**2, 3, 1, 'VALID') - mu_y**2 - sigma_xy = slim.avg_pool2d(x * y, 3, 1, 'VALID') - mu_x * mu_y - ssim_n = (2 * mu_x * mu_y + c1) * (2 * sigma_xy + c2) - ssim_d = (mu_x**2 + mu_y**2 + c1) * (sigma_x + sigma_y + c2) - ssim = ssim_n / ssim_d - return tf.clip_by_value((1 - ssim) / 2, 0, 1) - - def build_train_op(self): - with tf.name_scope('train_op'): - optim = tf.train.AdamOptimizer(self.learning_rate, self.beta1) - self.train_op = slim.learning.create_train_op(self.total_loss, optim) - self.global_step = tf.Variable(0, name='global_step', trainable=False) - self.incr_global_step = tf.assign( - self.global_step, self.global_step + 1) - - def build_summaries(self): - """Adds scalar and image summaries for TensorBoard.""" - tf.summary.scalar('total_loss', self.total_loss) - tf.summary.scalar('reconstr_loss', self.reconstr_loss) - if self.smooth_weight > 0: - tf.summary.scalar('smooth_loss', self.smooth_loss) - if self.ssim_weight > 0: - tf.summary.scalar('ssim_loss', self.ssim_loss) - if self.icp_weight > 0: - tf.summary.scalar('icp_transform_loss', self.icp_transform_loss) - tf.summary.scalar('icp_residual_loss', self.icp_residual_loss) - - if self.size_constraint_weight > 0: - tf.summary.scalar('inf_loss', self.inf_loss) - tf.summary.histogram('global_scale_var', self.global_scale_var) - - if self.handle_motion: - for s in range(NUM_SCALES): - for batch_s in range(self.batch_size): - whole_strip = tf.concat([self.warped_seq[s][0][batch_s], - self.warped_seq[s][1][batch_s], - self.warped_seq[s][2][batch_s]], axis=1) - tf.summary.image('base_warp_batch%s_scale%s' % (batch_s, s), - tf.expand_dims(whole_strip, axis=0)) - - whole_strip_input = tf.concat( - [self.inputs_objectmotion_net[s][batch_s][:, :, :, 0:3], - self.inputs_objectmotion_net[s][batch_s][:, :, :, 3:6], - self.inputs_objectmotion_net[s][batch_s][:, :, :, 6:9]], axis=2) - tf.summary.image('input_objectmotion_batch%s_scale%s' % (batch_s, s), - whole_strip_input) # (B, H, 3*W, 3) - - for batch_s in range(self.batch_size): - whole_strip = tf.concat([self.base_input_masked[batch_s, :, :, 0:3], - self.base_input_masked[batch_s, :, :, 3:6], - self.base_input_masked[batch_s, :, :, 6:9]], - axis=1) - tf.summary.image('input_egomotion_batch%s' % batch_s, - tf.expand_dims(whole_strip, axis=0)) - - # Show transform predictions (of all objects). - for batch_s in range(self.batch_size): - for i in range(self.seq_length - 1): - # self.object_transforms contains batch_size elements of (N, 2, 6). - tf.summary.histogram('batch%d_tx%d' % (batch_s, i), - self.object_transforms[0][batch_s][:, i, 0]) - tf.summary.histogram('batch%d_ty%d' % (batch_s, i), - self.object_transforms[0][batch_s][:, i, 1]) - tf.summary.histogram('batch%d_tz%d' % (batch_s, i), - self.object_transforms[0][batch_s][:, i, 2]) - tf.summary.histogram('batch%d_rx%d' % (batch_s, i), - self.object_transforms[0][batch_s][:, i, 3]) - tf.summary.histogram('batch%d_ry%d' % (batch_s, i), - self.object_transforms[0][batch_s][:, i, 4]) - tf.summary.histogram('batch%d_rz%d' % (batch_s, i), - self.object_transforms[0][batch_s][:, i, 5]) - - for i in range(self.seq_length - 1): - tf.summary.histogram('tx%d' % i, self.egomotion[:, i, 0]) - tf.summary.histogram('ty%d' % i, self.egomotion[:, i, 1]) - tf.summary.histogram('tz%d' % i, self.egomotion[:, i, 2]) - tf.summary.histogram('rx%d' % i, self.egomotion[:, i, 3]) - tf.summary.histogram('ry%d' % i, self.egomotion[:, i, 4]) - tf.summary.histogram('rz%d' % i, self.egomotion[:, i, 5]) - - for s in range(NUM_SCALES): - for i in range(self.seq_length): - tf.summary.image('scale%d_image%d' % (s, i), - self.images[s][:, :, :, 3 * i:3 * (i + 1)]) - if i in self.depth: - tf.summary.histogram('scale%d_depth%d' % (s, i), self.depth[i][s]) - tf.summary.histogram('scale%d_disp%d' % (s, i), self.disp[i][s]) - tf.summary.image('scale%d_disparity%d' % (s, i), self.disp[i][s]) - - for key in self.warped_image[s]: - tf.summary.image('scale%d_warped_image%s' % (s, key), - self.warped_image[s][key]) - tf.summary.image('scale%d_warp_error%s' % (s, key), - self.warp_error[s][key]) - if self.ssim_weight > 0: - tf.summary.image('scale%d_ssim_error%s' % (s, key), - self.ssim_error[s][key]) - if self.icp_weight > 0: - tf.summary.image('scale%d_icp_residual%s' % (s, key), - self.icp_residual[s][key]) - transform = self.icp_transform[s][key] - tf.summary.histogram('scale%d_icp_tx%s' % (s, key), transform[:, 0]) - tf.summary.histogram('scale%d_icp_ty%s' % (s, key), transform[:, 1]) - tf.summary.histogram('scale%d_icp_tz%s' % (s, key), transform[:, 2]) - tf.summary.histogram('scale%d_icp_rx%s' % (s, key), transform[:, 3]) - tf.summary.histogram('scale%d_icp_ry%s' % (s, key), transform[:, 4]) - tf.summary.histogram('scale%d_icp_rz%s' % (s, key), transform[:, 5]) - - def build_depth_test_graph(self): - """Builds depth model reading from placeholders.""" - with tf.variable_scope('depth_prediction'): - input_image = tf.placeholder( - tf.float32, [self.batch_size, self.img_height, self.img_width, 3], - name='raw_input') - self.input_image = input_image - if self.imagenet_norm: - input_image = (input_image - reader.IMAGENET_MEAN) / reader.IMAGENET_SD - est_disp, _ = nets.disp_net(architecture=self.architecture, - image=input_image, - use_skip=self.use_skip, - weight_reg=self.weight_reg, - is_training=True) - est_depth = 1.0 / est_disp[0] - self.est_depth = est_depth - - def build_egomotion_test_graph(self): - """Builds egomotion model reading from placeholders.""" - input_image_stack = tf.placeholder( - tf.float32, - [1, self.img_height, self.img_width, self.seq_length * 3], - name='raw_input') - input_bottleneck_stack = None - - if self.imagenet_norm: - im_mean = tf.tile( - tf.constant(reader.IMAGENET_MEAN), multiples=[self.seq_length]) - im_sd = tf.tile( - tf.constant(reader.IMAGENET_SD), multiples=[self.seq_length]) - input_image_stack = (input_image_stack - im_mean) / im_sd - - if self.joint_encoder: - # Pre-compute embeddings here. - with tf.variable_scope('depth_prediction', reuse=True): - input_bottleneck_stack = [] - encoder_selected = nets.encoder(self.architecture) - for i in range(self.seq_length): - input_image = input_image_stack[:, :, :, i * 3:(i + 1) * 3] - tf.get_variable_scope().reuse_variables() - embedding, _ = encoder_selected( - target_image=input_image, - weight_reg=self.weight_reg, - is_training=True) - input_bottleneck_stack.append(embedding) - input_bottleneck_stack = tf.concat(input_bottleneck_stack, axis=3) - - with tf.variable_scope('egomotion_prediction'): - est_egomotion = nets.egomotion_net( - image_stack=input_image_stack, - disp_bottleneck_stack=input_bottleneck_stack, - joint_encoder=self.joint_encoder, - seq_length=self.seq_length, - weight_reg=self.weight_reg) - self.input_image_stack = input_image_stack - self.est_egomotion = est_egomotion - - def build_objectmotion_test_graph(self): - """Builds egomotion model reading from placeholders.""" - input_image_stack_om = tf.placeholder( - tf.float32, - [1, self.img_height, self.img_width, self.seq_length * 3], - name='raw_input') - - if self.imagenet_norm: - im_mean = tf.tile( - tf.constant(reader.IMAGENET_MEAN), multiples=[self.seq_length]) - im_sd = tf.tile( - tf.constant(reader.IMAGENET_SD), multiples=[self.seq_length]) - input_image_stack_om = (input_image_stack_om - im_mean) / im_sd - - with tf.variable_scope('objectmotion_prediction'): - est_objectmotion = nets.objectmotion_net( - image_stack=input_image_stack_om, - disp_bottleneck_stack=None, - joint_encoder=self.joint_encoder, - seq_length=self.seq_length, - weight_reg=self.weight_reg) - self.input_image_stack_om = input_image_stack_om - self.est_objectmotion = est_objectmotion - - def inference_depth(self, inputs, sess): - return sess.run(self.est_depth, feed_dict={self.input_image: inputs}) - - def inference_egomotion(self, inputs, sess): - return sess.run( - self.est_egomotion, feed_dict={self.input_image_stack: inputs}) - - def inference_objectmotion(self, inputs, sess): - return sess.run( - self.est_objectmotion, feed_dict={self.input_image_stack_om: inputs}) diff --git a/research/struct2depth/nets.py b/research/struct2depth/nets.py deleted file mode 100644 index 1cec1b36f3c44d3eaf11451ca3ae4d92839d5886..0000000000000000000000000000000000000000 --- a/research/struct2depth/nets.py +++ /dev/null @@ -1,525 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Depth and Ego-Motion networks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -slim = tf.contrib.slim - -SIMPLE = 'simple' -RESNET = 'resnet' -ARCHITECTURES = [SIMPLE, RESNET] - -SCALE_TRANSLATION = 0.001 -SCALE_ROTATION = 0.01 - -# Disparity (inverse depth) values range from 0.01 to 10. Note that effectively, -# this is undone if depth normalization is used, which scales the values to -# have a mean of 1. -DISP_SCALING = 10 -MIN_DISP = 0.01 -WEIGHT_DECAY_KEY = 'WEIGHT_DECAY' -EGOMOTION_VEC_SIZE = 6 - - -def egomotion_net(image_stack, disp_bottleneck_stack, joint_encoder, seq_length, - weight_reg): - """Predict ego-motion vectors from a stack of frames or embeddings. - - Args: - image_stack: Input tensor with shape [B, h, w, seq_length * 3] in order. - disp_bottleneck_stack: Input tensor with shape [B, h_hidden, w_hidden, - seq_length * c_hidden] in order. - joint_encoder: Determines if the same encoder is used for computing the - bottleneck layer of both the egomotion and the depth prediction - network. If enabled, disp_bottleneck_stack is used as input, and the - encoding steps are skipped. If disabled, a separate encoder is defined - on image_stack. - seq_length: The sequence length used. - weight_reg: The amount of weight regularization. - - Returns: - Egomotion vectors with shape [B, seq_length - 1, 6]. - """ - num_egomotion_vecs = seq_length - 1 - with tf.variable_scope('pose_exp_net') as sc: - end_points_collection = sc.original_name_scope + '_end_points' - with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], - normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(weight_reg), - normalizer_params=None, - activation_fn=tf.nn.relu, - outputs_collections=end_points_collection): - if not joint_encoder: - # Define separate encoder. If sharing, we can skip the encoding step, - # as the bottleneck layer will already be passed as input. - cnv1 = slim.conv2d(image_stack, 16, [7, 7], stride=2, scope='cnv1') - cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') - cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') - cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') - cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') - - with tf.variable_scope('pose'): - inputs = disp_bottleneck_stack if joint_encoder else cnv5 - cnv6 = slim.conv2d(inputs, 256, [3, 3], stride=2, scope='cnv6') - cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7') - pred_channels = EGOMOTION_VEC_SIZE * num_egomotion_vecs - egomotion_pred = slim.conv2d(cnv7, pred_channels, [1, 1], scope='pred', - stride=1, normalizer_fn=None, - activation_fn=None) - egomotion_avg = tf.reduce_mean(egomotion_pred, [1, 2]) - egomotion_res = tf.reshape( - egomotion_avg, [-1, num_egomotion_vecs, EGOMOTION_VEC_SIZE]) - # Tinghui found that scaling by a small constant facilitates training. - egomotion_scaled = tf.concat([egomotion_res[:, 0:3] * SCALE_TRANSLATION, - egomotion_res[:, 3:6] * SCALE_ROTATION], - axis=1) - return egomotion_scaled - - -def objectmotion_net(image_stack, disp_bottleneck_stack, joint_encoder, - seq_length, weight_reg): - """Predict object-motion vectors from a stack of frames or embeddings. - - Args: - image_stack: Input tensor with shape [B, h, w, seq_length * 3] in order. - disp_bottleneck_stack: Input tensor with shape [B, h_hidden, w_hidden, - seq_length * c_hidden] in order. - joint_encoder: Determines if the same encoder is used for computing the - bottleneck layer of both the egomotion and the depth prediction - network. If enabled, disp_bottleneck_stack is used as input, and the - encoding steps are skipped. If disabled, a separate encoder is defined - on image_stack. - seq_length: The sequence length used. - weight_reg: The amount of weight regularization. - - Returns: - Egomotion vectors with shape [B, seq_length - 1, 6]. - """ - num_egomotion_vecs = seq_length - 1 - with tf.variable_scope('pose_exp_net') as sc: - end_points_collection = sc.original_name_scope + '_end_points' - with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], - normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(weight_reg), - normalizer_params=None, - activation_fn=tf.nn.relu, - outputs_collections=end_points_collection): - if not joint_encoder: - # Define separate encoder. If sharing, we can skip the encoding step, - # as the bottleneck layer will already be passed as input. - cnv1 = slim.conv2d(image_stack, 16, [7, 7], stride=2, scope='cnv1') - cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') - cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') - cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') - cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') - - with tf.variable_scope('pose'): - inputs = disp_bottleneck_stack if joint_encoder else cnv5 - cnv6 = slim.conv2d(inputs, 256, [3, 3], stride=2, scope='cnv6') - cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7') - pred_channels = EGOMOTION_VEC_SIZE * num_egomotion_vecs - egomotion_pred = slim.conv2d(cnv7, pred_channels, [1, 1], scope='pred', - stride=1, normalizer_fn=None, - activation_fn=None) - egomotion_avg = tf.reduce_mean(egomotion_pred, [1, 2]) - egomotion_res = tf.reshape( - egomotion_avg, [-1, num_egomotion_vecs, EGOMOTION_VEC_SIZE]) - # Tinghui found that scaling by a small constant facilitates training. - egomotion_scaled = tf.concat([egomotion_res[:, 0:3] * SCALE_TRANSLATION, - egomotion_res[:, 3:6] * SCALE_ROTATION], - axis=1) - return egomotion_scaled - - -def disp_net(architecture, image, use_skip, weight_reg, is_training): - """Defines an encoder-decoder architecture for depth prediction.""" - if architecture not in ARCHITECTURES: - raise ValueError('Unknown architecture.') - encoder_selected = encoder(architecture) - decoder_selected = decoder(architecture) - - # Encode image. - bottleneck, skip_connections = encoder_selected(image, weight_reg, - is_training) - # Decode to depth. - multiscale_disps_i = decoder_selected(target_image=image, - bottleneck=bottleneck, - weight_reg=weight_reg, - use_skip=use_skip, - skip_connections=skip_connections) - return multiscale_disps_i, bottleneck - - -def encoder(architecture): - return encoder_resnet if architecture == RESNET else encoder_simple - - -def decoder(architecture): - return decoder_resnet if architecture == RESNET else decoder_simple - - -def encoder_simple(target_image, weight_reg, is_training): - """Defines the old encoding architecture.""" - del is_training - with slim.arg_scope([slim.conv2d], - normalizer_fn=None, - normalizer_params=None, - weights_regularizer=slim.l2_regularizer(weight_reg), - activation_fn=tf.nn.relu): - # Define (joint) encoder. - cnv1 = slim.conv2d(target_image, 32, [7, 7], stride=2, scope='cnv1') - cnv1b = slim.conv2d(cnv1, 32, [7, 7], stride=1, scope='cnv1b') - cnv2 = slim.conv2d(cnv1b, 64, [5, 5], stride=2, scope='cnv2') - cnv2b = slim.conv2d(cnv2, 64, [5, 5], stride=1, scope='cnv2b') - cnv3 = slim.conv2d(cnv2b, 128, [3, 3], stride=2, scope='cnv3') - cnv3b = slim.conv2d(cnv3, 128, [3, 3], stride=1, scope='cnv3b') - cnv4 = slim.conv2d(cnv3b, 256, [3, 3], stride=2, scope='cnv4') - cnv4b = slim.conv2d(cnv4, 256, [3, 3], stride=1, scope='cnv4b') - cnv5 = slim.conv2d(cnv4b, 512, [3, 3], stride=2, scope='cnv5') - cnv5b = slim.conv2d(cnv5, 512, [3, 3], stride=1, scope='cnv5b') - cnv6 = slim.conv2d(cnv5b, 512, [3, 3], stride=2, scope='cnv6') - cnv6b = slim.conv2d(cnv6, 512, [3, 3], stride=1, scope='cnv6b') - cnv7 = slim.conv2d(cnv6b, 512, [3, 3], stride=2, scope='cnv7') - cnv7b = slim.conv2d(cnv7, 512, [3, 3], stride=1, scope='cnv7b') - return cnv7b, (cnv6b, cnv5b, cnv4b, cnv3b, cnv2b, cnv1b) - - -def decoder_simple(target_image, bottleneck, weight_reg, use_skip, - skip_connections): - """Defines the old depth decoder architecture.""" - h = target_image.get_shape()[1].value - w = target_image.get_shape()[2].value - (cnv6b, cnv5b, cnv4b, cnv3b, cnv2b, cnv1b) = skip_connections - with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], - normalizer_fn=None, - normalizer_params=None, - weights_regularizer=slim.l2_regularizer(weight_reg), - activation_fn=tf.nn.relu): - up7 = slim.conv2d_transpose(bottleneck, 512, [3, 3], stride=2, - scope='upcnv7') - up7 = _resize_like(up7, cnv6b) - if use_skip: - i7_in = tf.concat([up7, cnv6b], axis=3) - else: - i7_in = up7 - icnv7 = slim.conv2d(i7_in, 512, [3, 3], stride=1, scope='icnv7') - - up6 = slim.conv2d_transpose(icnv7, 512, [3, 3], stride=2, scope='upcnv6') - up6 = _resize_like(up6, cnv5b) - if use_skip: - i6_in = tf.concat([up6, cnv5b], axis=3) - else: - i6_in = up6 - icnv6 = slim.conv2d(i6_in, 512, [3, 3], stride=1, scope='icnv6') - - up5 = slim.conv2d_transpose(icnv6, 256, [3, 3], stride=2, scope='upcnv5') - up5 = _resize_like(up5, cnv4b) - if use_skip: - i5_in = tf.concat([up5, cnv4b], axis=3) - else: - i5_in = up5 - icnv5 = slim.conv2d(i5_in, 256, [3, 3], stride=1, scope='icnv5') - - up4 = slim.conv2d_transpose(icnv5, 128, [3, 3], stride=2, scope='upcnv4') - up4 = _resize_like(up4, cnv3b) - if use_skip: - i4_in = tf.concat([up4, cnv3b], axis=3) - else: - i4_in = up4 - icnv4 = slim.conv2d(i4_in, 128, [3, 3], stride=1, scope='icnv4') - disp4 = (slim.conv2d(icnv4, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, - normalizer_fn=None, scope='disp4') - * DISP_SCALING + MIN_DISP) - disp4_up = tf.image.resize_bilinear(disp4, [np.int(h / 4), np.int(w / 4)], - align_corners=True) - - up3 = slim.conv2d_transpose(icnv4, 64, [3, 3], stride=2, scope='upcnv3') - up3 = _resize_like(up3, cnv2b) - if use_skip: - i3_in = tf.concat([up3, cnv2b, disp4_up], axis=3) - else: - i3_in = tf.concat([up3, disp4_up]) - icnv3 = slim.conv2d(i3_in, 64, [3, 3], stride=1, scope='icnv3') - disp3 = (slim.conv2d(icnv3, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, - normalizer_fn=None, scope='disp3') - * DISP_SCALING + MIN_DISP) - disp3_up = tf.image.resize_bilinear(disp3, [np.int(h / 2), np.int(w / 2)], - align_corners=True) - - up2 = slim.conv2d_transpose(icnv3, 32, [3, 3], stride=2, scope='upcnv2') - up2 = _resize_like(up2, cnv1b) - if use_skip: - i2_in = tf.concat([up2, cnv1b, disp3_up], axis=3) - else: - i2_in = tf.concat([up2, disp3_up]) - icnv2 = slim.conv2d(i2_in, 32, [3, 3], stride=1, scope='icnv2') - disp2 = (slim.conv2d(icnv2, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, - normalizer_fn=None, scope='disp2') - * DISP_SCALING + MIN_DISP) - disp2_up = tf.image.resize_bilinear(disp2, [h, w], align_corners=True) - - up1 = slim.conv2d_transpose(icnv2, 16, [3, 3], stride=2, scope='upcnv1') - i1_in = tf.concat([up1, disp2_up], axis=3) - icnv1 = slim.conv2d(i1_in, 16, [3, 3], stride=1, scope='icnv1') - disp1 = (slim.conv2d(icnv1, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, - normalizer_fn=None, scope='disp1') - * DISP_SCALING + MIN_DISP) - return [disp1, disp2, disp3, disp4] - - -def encoder_resnet(target_image, weight_reg, is_training): - """Defines a ResNet18-based encoding architecture. - - This implementation follows Juyong Kim's implementation of ResNet18 on GitHub: - https://github.com/dalgu90/resnet-18-tensorflow - - Args: - target_image: Input tensor with shape [B, h, w, 3] to encode. - weight_reg: Parameter ignored. - is_training: Whether the model is being trained or not. - - Returns: - Tuple of tensors, with the first being the bottleneck layer as tensor of - size [B, h_hid, w_hid, c_hid], and others being intermediate layers - for building skip-connections. - """ - del weight_reg - encoder_filters = [64, 64, 128, 256, 512] - stride = 2 - - # conv1 - with tf.variable_scope('conv1'): - x = _conv(target_image, 7, encoder_filters[0], stride) - x = _bn(x, is_train=is_training) - econv1 = _relu(x) - x = tf.nn.max_pool(econv1, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME') - - # conv2_x - x = _residual_block(x, is_training, name='conv2_1') - econv2 = _residual_block(x, is_training, name='conv2_2') - - # conv3_x - x = _residual_block_first(econv2, is_training, encoder_filters[2], stride, - name='conv3_1') - econv3 = _residual_block(x, is_training, name='conv3_2') - - # conv4_x - x = _residual_block_first(econv3, is_training, encoder_filters[3], stride, - name='conv4_1') - econv4 = _residual_block(x, is_training, name='conv4_2') - - # conv5_x - x = _residual_block_first(econv4, is_training, encoder_filters[4], stride, - name='conv5_1') - econv5 = _residual_block(x, is_training, name='conv5_2') - return econv5, (econv4, econv3, econv2, econv1) - - -def decoder_resnet(target_image, bottleneck, weight_reg, use_skip, - skip_connections): - """Defines the depth decoder architecture. - - Args: - target_image: The original encoder input tensor with shape [B, h, w, 3]. - Just the shape information is used here. - bottleneck: Bottleneck layer to be decoded. - weight_reg: The amount of weight regularization. - use_skip: Whether the passed skip connections econv1, econv2, econv3 and - econv4 should be used. - skip_connections: Tensors for building skip-connections. - - Returns: - Disparities at 4 different scales. - """ - (econv4, econv3, econv2, econv1) = skip_connections - decoder_filters = [16, 32, 64, 128, 256] - default_pad = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) - reg = slim.l2_regularizer(weight_reg) if weight_reg > 0.0 else None - with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], - normalizer_fn=None, - normalizer_params=None, - activation_fn=tf.nn.relu, - weights_regularizer=reg): - upconv5 = slim.conv2d_transpose(bottleneck, decoder_filters[4], [3, 3], - stride=2, scope='upconv5') - upconv5 = _resize_like(upconv5, econv4) - if use_skip: - i5_in = tf.concat([upconv5, econv4], axis=3) - else: - i5_in = upconv5 - i5_in = tf.pad(i5_in, default_pad, mode='REFLECT') - iconv5 = slim.conv2d(i5_in, decoder_filters[4], [3, 3], stride=1, - scope='iconv5', padding='VALID') - - upconv4 = slim.conv2d_transpose(iconv5, decoder_filters[3], [3, 3], - stride=2, scope='upconv4') - upconv4 = _resize_like(upconv4, econv3) - if use_skip: - i4_in = tf.concat([upconv4, econv3], axis=3) - else: - i4_in = upconv4 - i4_in = tf.pad(i4_in, default_pad, mode='REFLECT') - iconv4 = slim.conv2d(i4_in, decoder_filters[3], [3, 3], stride=1, - scope='iconv4', padding='VALID') - - disp4_input = tf.pad(iconv4, default_pad, mode='REFLECT') - disp4 = (slim.conv2d(disp4_input, 1, [3, 3], stride=1, - activation_fn=tf.sigmoid, normalizer_fn=None, - scope='disp4', padding='VALID') - * DISP_SCALING + MIN_DISP) - - upconv3 = slim.conv2d_transpose(iconv4, decoder_filters[2], [3, 3], - stride=2, scope='upconv3') - upconv3 = _resize_like(upconv3, econv2) - if use_skip: - i3_in = tf.concat([upconv3, econv2], axis=3) - else: - i3_in = upconv3 - i3_in = tf.pad(i3_in, default_pad, mode='REFLECT') - iconv3 = slim.conv2d(i3_in, decoder_filters[2], [3, 3], stride=1, - scope='iconv3', padding='VALID') - disp3_input = tf.pad(iconv3, default_pad, mode='REFLECT') - disp3 = (slim.conv2d(disp3_input, 1, [3, 3], stride=1, - activation_fn=tf.sigmoid, normalizer_fn=None, - scope='disp3', padding='VALID') - * DISP_SCALING + MIN_DISP) - - upconv2 = slim.conv2d_transpose(iconv3, decoder_filters[1], [3, 3], - stride=2, scope='upconv2') - upconv2 = _resize_like(upconv2, econv1) - if use_skip: - i2_in = tf.concat([upconv2, econv1], axis=3) - else: - i2_in = upconv2 - i2_in = tf.pad(i2_in, default_pad, mode='REFLECT') - iconv2 = slim.conv2d(i2_in, decoder_filters[1], [3, 3], stride=1, - scope='iconv2', padding='VALID') - disp2_input = tf.pad(iconv2, default_pad, mode='REFLECT') - disp2 = (slim.conv2d(disp2_input, 1, [3, 3], stride=1, - activation_fn=tf.sigmoid, normalizer_fn=None, - scope='disp2', padding='VALID') - * DISP_SCALING + MIN_DISP) - - upconv1 = slim.conv2d_transpose(iconv2, decoder_filters[0], [3, 3], - stride=2, scope='upconv1') - upconv1 = _resize_like(upconv1, target_image) - upconv1 = tf.pad(upconv1, default_pad, mode='REFLECT') - iconv1 = slim.conv2d(upconv1, decoder_filters[0], [3, 3], stride=1, - scope='iconv1', padding='VALID') - disp1_input = tf.pad(iconv1, default_pad, mode='REFLECT') - disp1 = (slim.conv2d(disp1_input, 1, [3, 3], stride=1, - activation_fn=tf.sigmoid, normalizer_fn=None, - scope='disp1', padding='VALID') - * DISP_SCALING + MIN_DISP) - - return [disp1, disp2, disp3, disp4] - - -def _residual_block_first(x, is_training, out_channel, strides, name='unit'): - """Helper function for defining ResNet architecture.""" - in_channel = x.get_shape().as_list()[-1] - with tf.variable_scope(name): - # Shortcut connection - if in_channel == out_channel: - if strides == 1: - shortcut = tf.identity(x) - else: - shortcut = tf.nn.max_pool(x, [1, strides, strides, 1], - [1, strides, strides, 1], 'VALID') - else: - shortcut = _conv(x, 1, out_channel, strides, name='shortcut') - # Residual - x = _conv(x, 3, out_channel, strides, name='conv_1') - x = _bn(x, is_train=is_training, name='bn_1') - x = _relu(x, name='relu_1') - x = _conv(x, 3, out_channel, 1, name='conv_2') - x = _bn(x, is_train=is_training, name='bn_2') - # Merge - x = x + shortcut - x = _relu(x, name='relu_2') - return x - - -def _residual_block(x, is_training, input_q=None, output_q=None, name='unit'): - """Helper function for defining ResNet architecture.""" - num_channel = x.get_shape().as_list()[-1] - with tf.variable_scope(name): - shortcut = x # Shortcut connection - # Residual - x = _conv(x, 3, num_channel, 1, input_q=input_q, output_q=output_q, - name='conv_1') - x = _bn(x, is_train=is_training, name='bn_1') - x = _relu(x, name='relu_1') - x = _conv(x, 3, num_channel, 1, input_q=output_q, output_q=output_q, - name='conv_2') - x = _bn(x, is_train=is_training, name='bn_2') - # Merge - x = x + shortcut - x = _relu(x, name='relu_2') - return x - - -def _conv(x, filter_size, out_channel, stride, pad='SAME', input_q=None, - output_q=None, name='conv'): - """Helper function for defining ResNet architecture.""" - if (input_q is None) ^ (output_q is None): - raise ValueError('Input/Output splits are not correctly given.') - - in_shape = x.get_shape() - with tf.variable_scope(name): - # Main operation: conv2d - with tf.device('/CPU:0'): - kernel = tf.get_variable( - 'kernel', [filter_size, filter_size, in_shape[3], out_channel], - tf.float32, initializer=tf.random_normal_initializer( - stddev=np.sqrt(2.0/filter_size/filter_size/out_channel))) - if kernel not in tf.get_collection(WEIGHT_DECAY_KEY): - tf.add_to_collection(WEIGHT_DECAY_KEY, kernel) - conv = tf.nn.conv2d(x, kernel, [1, stride, stride, 1], pad) - return conv - - -def _bn(x, is_train, name='bn'): - """Helper function for defining ResNet architecture.""" - bn = tf.layers.batch_normalization(x, training=is_train, name=name) - return bn - - -def _relu(x, name=None, leakness=0.0): - """Helper function for defining ResNet architecture.""" - if leakness > 0.0: - name = 'lrelu' if name is None else name - return tf.maximum(x, x*leakness, name='lrelu') - else: - name = 'relu' if name is None else name - return tf.nn.relu(x, name='relu') - - -def _resize_like(inputs, ref): - i_h, i_w = inputs.get_shape()[1], inputs.get_shape()[2] - r_h, r_w = ref.get_shape()[1], ref.get_shape()[2] - if i_h == r_h and i_w == r_w: - return inputs - else: - # TODO(casser): Other interpolation methods could be explored here. - return tf.image.resize_bilinear(inputs, [r_h.value, r_w.value], - align_corners=True) diff --git a/research/struct2depth/optimize.py b/research/struct2depth/optimize.py deleted file mode 100644 index becb3ab69f29dfb9dabe5e1534bd1d960f6037cb..0000000000000000000000000000000000000000 --- a/research/struct2depth/optimize.py +++ /dev/null @@ -1,383 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Applies online refinement while running inference. - -Instructions: Run static inference first before calling this script. Make sure -to point output_dir to the same folder where static inference results were -saved previously. - -For example use, please refer to README. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datetime -import os -import random -from absl import app -from absl import flags -from absl import logging -import numpy as np -import tensorflow as tf - -import model -import nets -import reader -import util - -gfile = tf.gfile -SAVE_EVERY = 1 # Defines the interval that predictions should be saved at. -SAVE_PREVIEWS = True # If set, while save image previews of depth predictions. -FIXED_SEED = 8964 # Fixed seed for repeatability. - -flags.DEFINE_string('output_dir', None, 'Directory to store predictions. ' - 'Assumes that regular inference has been executed before ' - 'and results were stored in this folder.') -flags.DEFINE_string('data_dir', None, 'Folder pointing to preprocessed ' - 'triplets to fine-tune on.') -flags.DEFINE_string('triplet_list_file', None, 'Text file containing paths to ' - 'image files to process. Paths should be relative with ' - 'respect to the list file location. Every line should be ' - 'of the form [input_folder_name] [input_frame_num] ' - '[output_path], where [output_path] is optional to specify ' - 'a different path to store the prediction.') -flags.DEFINE_string('triplet_list_file_remains', None, 'Optional text file ' - 'containing relative paths to image files which should not ' - 'be fine-tuned, e.g. because of missing adjacent frames. ' - 'For all files listed, the static prediction will be ' - 'copied instead. File can be empty. If not, every line ' - 'should be of the form [input_folder_name] ' - '[input_frame_num] [output_path], where [output_path] is ' - 'optional to specify a different path to take and store ' - 'the unrefined prediction from/to.') -flags.DEFINE_string('model_ckpt', None, 'Model checkpoint to optimize.') -flags.DEFINE_string('ft_name', '', 'Optional prefix for temporary files.') -flags.DEFINE_string('file_extension', 'png', 'Image data file extension.') -flags.DEFINE_float('learning_rate', 0.0001, 'Adam learning rate.') -flags.DEFINE_float('beta1', 0.9, 'Adam momentum.') -flags.DEFINE_float('reconstr_weight', 0.85, 'Frame reconstruction loss weight.') -flags.DEFINE_float('ssim_weight', 0.15, 'SSIM loss weight.') -flags.DEFINE_float('smooth_weight', 0.01, 'Smoothness loss weight.') -flags.DEFINE_float('icp_weight', 0.0, 'ICP loss weight.') -flags.DEFINE_float('size_constraint_weight', 0.0005, 'Weight of the object ' - 'size constraint loss. Use only with motion handling.') -flags.DEFINE_integer('batch_size', 1, 'The size of a sample batch') -flags.DEFINE_integer('img_height', 128, 'Input frame height.') -flags.DEFINE_integer('img_width', 416, 'Input frame width.') -flags.DEFINE_integer('seq_length', 3, 'Number of frames in sequence.') -flags.DEFINE_enum('architecture', nets.RESNET, nets.ARCHITECTURES, - 'Defines the architecture to use for the depth prediction ' - 'network. Defaults to ResNet-based encoder and accompanying ' - 'decoder.') -flags.DEFINE_boolean('imagenet_norm', True, 'Whether to normalize the input ' - 'images channel-wise so that they match the distribution ' - 'most ImageNet-models were trained on.') -flags.DEFINE_float('weight_reg', 0.05, 'The amount of weight regularization to ' - 'apply. This has no effect on the ResNet-based encoder ' - 'architecture.') -flags.DEFINE_boolean('exhaustive_mode', False, 'Whether to exhaustively warp ' - 'from any frame to any other instead of just considering ' - 'adjacent frames. Where necessary, multiple egomotion ' - 'estimates will be applied. Does not have an effect if ' - 'compute_minimum_loss is enabled.') -flags.DEFINE_boolean('random_scale_crop', False, 'Whether to apply random ' - 'image scaling and center cropping during training.') -flags.DEFINE_bool('depth_upsampling', True, 'Whether to apply depth ' - 'upsampling of lower-scale representations before warping to ' - 'compute reconstruction loss on full-resolution image.') -flags.DEFINE_bool('depth_normalization', True, 'Whether to apply depth ' - 'normalization, that is, normalizing inverse depth ' - 'prediction maps by their mean to avoid degeneration towards ' - 'small values.') -flags.DEFINE_bool('compute_minimum_loss', True, 'Whether to take the ' - 'element-wise minimum of the reconstruction/SSIM error in ' - 'order to avoid overly penalizing dis-occlusion effects.') -flags.DEFINE_bool('use_skip', True, 'Whether to use skip connections in the ' - 'encoder-decoder architecture.') -flags.DEFINE_bool('joint_encoder', False, 'Whether to share parameters ' - 'between the depth and egomotion networks by using a joint ' - 'encoder architecture. The egomotion network is then ' - 'operating only on the hidden representation provided by the ' - 'joint encoder.') -flags.DEFINE_float('egomotion_threshold', 0.01, 'Minimum egomotion magnitude ' - 'to apply finetuning. If lower, just forwards the ordinary ' - 'prediction.') -flags.DEFINE_integer('num_steps', 20, 'Number of optimization steps to run.') -flags.DEFINE_boolean('handle_motion', True, 'Whether the checkpoint was ' - 'trained with motion handling.') -flags.DEFINE_bool('flip', False, 'Whether images should be flipped as well as ' - 'resulting predictions (for test-time augmentation). This ' - 'currently applies to the depth network only.') - -FLAGS = flags.FLAGS -flags.mark_flag_as_required('output_dir') -flags.mark_flag_as_required('data_dir') -flags.mark_flag_as_required('model_ckpt') -flags.mark_flag_as_required('triplet_list_file') - - -def main(_): - """Runs fine-tuning and inference. - - There are three categories of images. - 1) Images where we have previous and next frame, and that are not filtered - out by the heuristic. For them, we will use the fine-tuned predictions. - 2) Images where we have previous and next frame, but that were filtered out - by our heuristic. For them, we will use the ordinary prediction instead. - 3) Images where we have at least one missing adjacent frame. For them, we will - use the ordinary prediction as indicated by triplet_list_file_remains (if - provided). They will also not be part of the generated inference list in - the first place. - - Raises: - ValueError: Invalid parameters have been passed. - """ - - if FLAGS.handle_motion and FLAGS.joint_encoder: - raise ValueError('Using a joint encoder is currently not supported when ' - 'modeling object motion.') - if FLAGS.handle_motion and FLAGS.seq_length != 3: - raise ValueError('The current motion model implementation only supports ' - 'using a sequence length of three.') - if FLAGS.handle_motion and not FLAGS.compute_minimum_loss: - raise ValueError('Computing the minimum photometric loss is required when ' - 'enabling object motion handling.') - if FLAGS.size_constraint_weight > 0 and not FLAGS.handle_motion: - raise ValueError('To enforce object size constraints, enable motion ' - 'handling.') - if FLAGS.icp_weight > 0.0: - raise ValueError('ICP is currently not supported.') - if FLAGS.compute_minimum_loss and FLAGS.seq_length % 2 != 1: - raise ValueError('Compute minimum loss requires using an odd number of ' - 'images in a sequence.') - if FLAGS.compute_minimum_loss and FLAGS.exhaustive_mode: - raise ValueError('Exhaustive mode has no effect when compute_minimum_loss ' - 'is enabled.') - if FLAGS.img_width % (2 ** 5) != 0 or FLAGS.img_height % (2 ** 5) != 0: - logging.warn('Image size is not divisible by 2^5. For the architecture ' - 'employed, this could cause artefacts caused by resizing in ' - 'lower dimensions.') - - if FLAGS.output_dir.endswith('/'): - FLAGS.output_dir = FLAGS.output_dir[:-1] - - # Create file lists to prepare fine-tuning, save it to unique_file. - unique_file_name = (str(datetime.datetime.now().date()) + '_' + - str(datetime.datetime.now().time()).replace(':', '_')) - unique_file = os.path.join(FLAGS.data_dir, unique_file_name + '.txt') - with gfile.FastGFile(FLAGS.triplet_list_file, 'r') as f: - files_to_process = f.readlines() - files_to_process = [line.rstrip() for line in files_to_process] - files_to_process = [line for line in files_to_process if len(line)] - logging.info('Creating unique file list %s with %s entries.', unique_file, - len(files_to_process)) - with gfile.FastGFile(unique_file, 'w') as f_out: - fetches_network = FLAGS.num_steps * FLAGS.batch_size - fetches_saves = FLAGS.batch_size * int(np.floor(FLAGS.num_steps/SAVE_EVERY)) - repetitions = fetches_network + 3 * fetches_saves - for i in range(len(files_to_process)): - for _ in range(repetitions): - f_out.write(files_to_process[i] + '\n') - - # Read remaining files. - remaining = [] - if gfile.Exists(FLAGS.triplet_list_file_remains): - with gfile.FastGFile(FLAGS.triplet_list_file_remains, 'r') as f: - remaining = f.readlines() - remaining = [line.rstrip() for line in remaining] - remaining = [line for line in remaining if len(line)] - logging.info('Running fine-tuning on %s files, %s files are remaining.', - len(files_to_process), len(remaining)) - - # Run fine-tuning process and save predictions in id-folders. - tf.set_random_seed(FIXED_SEED) - np.random.seed(FIXED_SEED) - random.seed(FIXED_SEED) - flipping_mode = reader.FLIP_ALWAYS if FLAGS.flip else reader.FLIP_NONE - train_model = model.Model(data_dir=FLAGS.data_dir, - file_extension=FLAGS.file_extension, - is_training=True, - learning_rate=FLAGS.learning_rate, - beta1=FLAGS.beta1, - reconstr_weight=FLAGS.reconstr_weight, - smooth_weight=FLAGS.smooth_weight, - ssim_weight=FLAGS.ssim_weight, - icp_weight=FLAGS.icp_weight, - batch_size=FLAGS.batch_size, - img_height=FLAGS.img_height, - img_width=FLAGS.img_width, - seq_length=FLAGS.seq_length, - architecture=FLAGS.architecture, - imagenet_norm=FLAGS.imagenet_norm, - weight_reg=FLAGS.weight_reg, - exhaustive_mode=FLAGS.exhaustive_mode, - random_scale_crop=FLAGS.random_scale_crop, - flipping_mode=flipping_mode, - random_color=False, - depth_upsampling=FLAGS.depth_upsampling, - depth_normalization=FLAGS.depth_normalization, - compute_minimum_loss=FLAGS.compute_minimum_loss, - use_skip=FLAGS.use_skip, - joint_encoder=FLAGS.joint_encoder, - build_sum=False, - shuffle=False, - input_file=unique_file_name, - handle_motion=FLAGS.handle_motion, - size_constraint_weight=FLAGS.size_constraint_weight, - train_global_scale_var=False) - - failed_heuristic_ids = finetune_inference(train_model, FLAGS.model_ckpt, - FLAGS.output_dir + '_ft') - logging.info('Fine-tuning completed, %s files were filtered out by ' - 'heuristic.', len(failed_heuristic_ids)) - for failed_id in failed_heuristic_ids: - failed_entry = files_to_process[failed_id] - remaining.append(failed_entry) - logging.info('In total, %s images were fine-tuned, while %s were not.', - len(files_to_process)-len(failed_heuristic_ids), len(remaining)) - - # Copy all results to have the same structural output as running ordinary - # inference. - for i in range(len(files_to_process)): - if files_to_process[i] not in remaining: # Use fine-tuned result. - elements = files_to_process[i].split(' ') - source_file = os.path.join(FLAGS.output_dir + '_ft', FLAGS.ft_name + - 'id_' + str(i), - str(FLAGS.num_steps).zfill(10) + - ('_flip' if FLAGS.flip else '')) - if len(elements) == 2: # No differing mapping defined. - target_dir = os.path.join(FLAGS.output_dir + '_ft', elements[0]) - target_file = os.path.join( - target_dir, elements[1] + ('_flip' if FLAGS.flip else '')) - else: # Other mapping for file defined, copy to this location instead. - target_dir = os.path.join( - FLAGS.output_dir + '_ft', os.path.dirname(elements[2])) - target_file = os.path.join( - target_dir, - os.path.basename(elements[2]) + ('_flip' if FLAGS.flip else '')) - if not gfile.Exists(target_dir): - gfile.MakeDirs(target_dir) - logging.info('Copy refined result %s to %s.', source_file, target_file) - gfile.Copy(source_file + '.npy', target_file + '.npy', overwrite=True) - gfile.Copy(source_file + '.txt', target_file + '.txt', overwrite=True) - gfile.Copy(source_file + '.%s' % FLAGS.file_extension, - target_file + '.%s' % FLAGS.file_extension, overwrite=True) - for j in range(len(remaining)): - elements = remaining[j].split(' ') - if len(elements) == 2: # No differing mapping defined. - target_dir = os.path.join(FLAGS.output_dir + '_ft', elements[0]) - target_file = os.path.join( - target_dir, elements[1] + ('_flip' if FLAGS.flip else '')) - else: # Other mapping for file defined, copy to this location instead. - target_dir = os.path.join( - FLAGS.output_dir + '_ft', os.path.dirname(elements[2])) - target_file = os.path.join( - target_dir, - os.path.basename(elements[2]) + ('_flip' if FLAGS.flip else '')) - if not gfile.Exists(target_dir): - gfile.MakeDirs(target_dir) - source_file = target_file.replace('_ft', '') - logging.info('Copy unrefined result %s to %s.', source_file, target_file) - gfile.Copy(source_file + '.npy', target_file + '.npy', overwrite=True) - gfile.Copy(source_file + '.%s' % FLAGS.file_extension, - target_file + '.%s' % FLAGS.file_extension, overwrite=True) - logging.info('Done, predictions saved in %s.', FLAGS.output_dir + '_ft') - - -def finetune_inference(train_model, model_ckpt, output_dir): - """Train model.""" - vars_to_restore = None - if model_ckpt is not None: - vars_to_restore = util.get_vars_to_save_and_restore(model_ckpt) - ckpt_path = model_ckpt - pretrain_restorer = tf.train.Saver(vars_to_restore) - sv = tf.train.Supervisor(logdir=None, save_summaries_secs=0, saver=None, - summary_op=None) - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - img_nr = 0 - failed_heuristic = [] - with sv.managed_session(config=config) as sess: - # TODO(casser): Caching the weights would be better to avoid I/O bottleneck. - while True: # Loop terminates when all examples have been processed. - if model_ckpt is not None: - logging.info('Restored weights from %s', ckpt_path) - pretrain_restorer.restore(sess, ckpt_path) - logging.info('Running fine-tuning, image %s...', img_nr) - img_pred_folder = os.path.join( - output_dir, FLAGS.ft_name + 'id_' + str(img_nr)) - if not gfile.Exists(img_pred_folder): - gfile.MakeDirs(img_pred_folder) - step = 1 - - # Run fine-tuning. - while step <= FLAGS.num_steps: - logging.info('Running step %s of %s.', step, FLAGS.num_steps) - fetches = { - 'train': train_model.train_op, - 'global_step': train_model.global_step, - 'incr_global_step': train_model.incr_global_step - } - _ = sess.run(fetches) - if step % SAVE_EVERY == 0: - # Get latest prediction for middle frame, highest scale. - pred = train_model.depth[1][0].eval(session=sess) - if FLAGS.flip: - pred = np.flip(pred, axis=2) - input_img = train_model.image_stack.eval(session=sess) - input_img_prev = input_img[0, :, :, 0:3] - input_img_center = input_img[0, :, :, 3:6] - input_img_next = input_img[0, :, :, 6:] - img_pred_file = os.path.join( - img_pred_folder, - str(step).zfill(10) + ('_flip' if FLAGS.flip else '') + '.npy') - motion = np.squeeze(train_model.egomotion.eval(session=sess)) - # motion of shape (seq_length - 1, 6). - motion = np.mean(motion, axis=0) # Average egomotion across frames. - - if SAVE_PREVIEWS or step == FLAGS.num_steps: - # Also save preview of depth map. - color_map = util.normalize_depth_for_display( - np.squeeze(pred[0, :, :])) - visualization = np.concatenate( - (input_img_prev, input_img_center, input_img_next, color_map)) - motion_s = [str(m) for m in motion] - s_rep = ','.join(motion_s) - with gfile.Open(img_pred_file.replace('.npy', '.txt'), 'w') as f: - f.write(s_rep) - util.save_image( - img_pred_file.replace('.npy', '.%s' % FLAGS.file_extension), - visualization, FLAGS.file_extension) - - with gfile.Open(img_pred_file, 'wb') as f: - np.save(f, pred) - - # Apply heuristic to not finetune if egomotion magnitude is too low. - ego_magnitude = np.linalg.norm(motion[:3], ord=2) - heuristic = ego_magnitude >= FLAGS.egomotion_threshold - if not heuristic and step == FLAGS.num_steps: - failed_heuristic.append(img_nr) - - step += 1 - img_nr += 1 - return failed_heuristic - - -if __name__ == '__main__': - app.run(main) diff --git a/research/struct2depth/project.py b/research/struct2depth/project.py deleted file mode 100644 index f249ebd3d6ab5837209b07f14f4e54f39f2baf8a..0000000000000000000000000000000000000000 --- a/research/struct2depth/project.py +++ /dev/null @@ -1,326 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Geometry utilities for projecting frames based on depth and motion. - -Modified from Spatial Transformer Networks: -https://github.com/tensorflow/models/blob/master/transformer/spatial_transformer.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import logging -import numpy as np -import tensorflow as tf - - -def inverse_warp(img, depth, egomotion_mat, intrinsic_mat, - intrinsic_mat_inv): - """Inverse warp a source image to the target image plane. - - Args: - img: The source image (to sample pixels from) -- [B, H, W, 3]. - depth: Depth map of the target image -- [B, H, W]. - egomotion_mat: Matrix defining egomotion transform -- [B, 4, 4]. - intrinsic_mat: Camera intrinsic matrix -- [B, 3, 3]. - intrinsic_mat_inv: Inverse of the intrinsic matrix -- [B, 3, 3]. - Returns: - Projected source image - """ - dims = tf.shape(img) - batch_size, img_height, img_width = dims[0], dims[1], dims[2] - depth = tf.reshape(depth, [batch_size, 1, img_height * img_width]) - grid = _meshgrid_abs(img_height, img_width) - grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1]) - cam_coords = _pixel2cam(depth, grid, intrinsic_mat_inv) - ones = tf.ones([batch_size, 1, img_height * img_width]) - cam_coords_hom = tf.concat([cam_coords, ones], axis=1) - - # Get projection matrix for target camera frame to source pixel frame - hom_filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) - hom_filler = tf.tile(hom_filler, [batch_size, 1, 1]) - intrinsic_mat_hom = tf.concat( - [intrinsic_mat, tf.zeros([batch_size, 3, 1])], axis=2) - intrinsic_mat_hom = tf.concat([intrinsic_mat_hom, hom_filler], axis=1) - proj_target_cam_to_source_pixel = tf.matmul(intrinsic_mat_hom, egomotion_mat) - source_pixel_coords = _cam2pixel(cam_coords_hom, - proj_target_cam_to_source_pixel) - source_pixel_coords = tf.reshape(source_pixel_coords, - [batch_size, 2, img_height, img_width]) - source_pixel_coords = tf.transpose(source_pixel_coords, perm=[0, 2, 3, 1]) - projected_img, mask = _spatial_transformer(img, source_pixel_coords) - return projected_img, mask - - -def get_transform_mat(egomotion_vecs, i, j): - """Returns a transform matrix defining the transform from frame i to j.""" - egomotion_transforms = [] - batchsize = tf.shape(egomotion_vecs)[0] - if i == j: - return tf.tile(tf.expand_dims(tf.eye(4, 4), axis=0), [batchsize, 1, 1]) - for k in range(min(i, j), max(i, j)): - transform_matrix = _egomotion_vec2mat(egomotion_vecs[:, k, :], batchsize) - if i > j: # Going back in sequence, need to invert egomotion. - egomotion_transforms.insert(0, tf.linalg.inv(transform_matrix)) - else: # Going forward in sequence - egomotion_transforms.append(transform_matrix) - - # Multiply all matrices. - egomotion_mat = egomotion_transforms[0] - for i in range(1, len(egomotion_transforms)): - egomotion_mat = tf.matmul(egomotion_mat, egomotion_transforms[i]) - return egomotion_mat - - -def _pixel2cam(depth, pixel_coords, intrinsic_mat_inv): - """Transform coordinates in the pixel frame to the camera frame.""" - cam_coords = tf.matmul(intrinsic_mat_inv, pixel_coords) * depth - return cam_coords - - -def _cam2pixel(cam_coords, proj_c2p): - """Transform coordinates in the camera frame to the pixel frame.""" - pcoords = tf.matmul(proj_c2p, cam_coords) - x = tf.slice(pcoords, [0, 0, 0], [-1, 1, -1]) - y = tf.slice(pcoords, [0, 1, 0], [-1, 1, -1]) - z = tf.slice(pcoords, [0, 2, 0], [-1, 1, -1]) - # Not tested if adding a small number is necessary - x_norm = x / (z + 1e-10) - y_norm = y / (z + 1e-10) - pixel_coords = tf.concat([x_norm, y_norm], axis=1) - return pixel_coords - - -def _meshgrid_abs(height, width): - """Meshgrid in the absolute coordinates.""" - x_t = tf.matmul( - tf.ones(shape=tf.stack([height, 1])), - tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) - y_t = tf.matmul( - tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), - tf.ones(shape=tf.stack([1, width]))) - x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32) - y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32) - x_t_flat = tf.reshape(x_t, (1, -1)) - y_t_flat = tf.reshape(y_t, (1, -1)) - ones = tf.ones_like(x_t_flat) - grid = tf.concat([x_t_flat, y_t_flat, ones], axis=0) - return grid - - -def _euler2mat(z, y, x): - """Converts euler angles to rotation matrix. - - From: - https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174 - - TODO: Remove the dimension for 'N' (deprecated for converting all source - poses altogether). - - Args: - z: rotation angle along z axis (in radians) -- size = [B, n] - y: rotation angle along y axis (in radians) -- size = [B, n] - x: rotation angle along x axis (in radians) -- size = [B, n] - - Returns: - Rotation matrix corresponding to the euler angles, with shape [B, n, 3, 3]. - """ - batch_size = tf.shape(z)[0] - n = 1 - z = tf.clip_by_value(z, -np.pi, np.pi) - y = tf.clip_by_value(y, -np.pi, np.pi) - x = tf.clip_by_value(x, -np.pi, np.pi) - - # Expand to B x N x 1 x 1 - z = tf.expand_dims(tf.expand_dims(z, -1), -1) - y = tf.expand_dims(tf.expand_dims(y, -1), -1) - x = tf.expand_dims(tf.expand_dims(x, -1), -1) - - zeros = tf.zeros([batch_size, n, 1, 1]) - ones = tf.ones([batch_size, n, 1, 1]) - - cosz = tf.cos(z) - sinz = tf.sin(z) - rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3) - rotz_2 = tf.concat([sinz, cosz, zeros], axis=3) - rotz_3 = tf.concat([zeros, zeros, ones], axis=3) - zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2) - - cosy = tf.cos(y) - siny = tf.sin(y) - roty_1 = tf.concat([cosy, zeros, siny], axis=3) - roty_2 = tf.concat([zeros, ones, zeros], axis=3) - roty_3 = tf.concat([-siny, zeros, cosy], axis=3) - ymat = tf.concat([roty_1, roty_2, roty_3], axis=2) - - cosx = tf.cos(x) - sinx = tf.sin(x) - rotx_1 = tf.concat([ones, zeros, zeros], axis=3) - rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3) - rotx_3 = tf.concat([zeros, sinx, cosx], axis=3) - xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2) - - return tf.matmul(tf.matmul(xmat, ymat), zmat) - - -def _egomotion_vec2mat(vec, batch_size): - """Converts 6DoF transform vector to transformation matrix. - - Args: - vec: 6DoF parameters [tx, ty, tz, rx, ry, rz] -- [B, 6]. - batch_size: Batch size. - - Returns: - A transformation matrix -- [B, 4, 4]. - """ - translation = tf.slice(vec, [0, 0], [-1, 3]) - translation = tf.expand_dims(translation, -1) - rx = tf.slice(vec, [0, 3], [-1, 1]) - ry = tf.slice(vec, [0, 4], [-1, 1]) - rz = tf.slice(vec, [0, 5], [-1, 1]) - rot_mat = _euler2mat(rz, ry, rx) - rot_mat = tf.squeeze(rot_mat, squeeze_dims=[1]) - filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) - filler = tf.tile(filler, [batch_size, 1, 1]) - transform_mat = tf.concat([rot_mat, translation], axis=2) - transform_mat = tf.concat([transform_mat, filler], axis=1) - return transform_mat - - -def _bilinear_sampler(im, x, y, name='blinear_sampler'): - """Perform bilinear sampling on im given list of x, y coordinates. - - Implements the differentiable sampling mechanism with bilinear kernel - in https://arxiv.org/abs/1506.02025. - - x,y are tensors specifying normalized coordinates [-1, 1] to be sampled on im. - For example, (-1, -1) in (x, y) corresponds to pixel location (0, 0) in im, - and (1, 1) in (x, y) corresponds to the bottom right pixel in im. - - Args: - im: Batch of images with shape [B, h, w, channels]. - x: Tensor of normalized x coordinates in [-1, 1], with shape [B, h, w, 1]. - y: Tensor of normalized y coordinates in [-1, 1], with shape [B, h, w, 1]. - name: Name scope for ops. - - Returns: - Sampled image with shape [B, h, w, channels]. - Principled mask with shape [B, h, w, 1], dtype:float32. A value of 1.0 - in the mask indicates that the corresponding coordinate in the sampled - image is valid. - """ - with tf.variable_scope(name): - x = tf.reshape(x, [-1]) - y = tf.reshape(y, [-1]) - - # Constants. - batch_size = tf.shape(im)[0] - _, height, width, channels = im.get_shape().as_list() - - x = tf.to_float(x) - y = tf.to_float(y) - height_f = tf.cast(height, 'float32') - width_f = tf.cast(width, 'float32') - zero = tf.constant(0, dtype=tf.int32) - max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') - max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') - - # Scale indices from [-1, 1] to [0, width - 1] or [0, height - 1]. - x = (x + 1.0) * (width_f - 1.0) / 2.0 - y = (y + 1.0) * (height_f - 1.0) / 2.0 - - # Compute the coordinates of the 4 pixels to sample from. - x0 = tf.cast(tf.floor(x), 'int32') - x1 = x0 + 1 - y0 = tf.cast(tf.floor(y), 'int32') - y1 = y0 + 1 - - mask = tf.logical_and( - tf.logical_and(x0 >= zero, x1 <= max_x), - tf.logical_and(y0 >= zero, y1 <= max_y)) - mask = tf.to_float(mask) - - x0 = tf.clip_by_value(x0, zero, max_x) - x1 = tf.clip_by_value(x1, zero, max_x) - y0 = tf.clip_by_value(y0, zero, max_y) - y1 = tf.clip_by_value(y1, zero, max_y) - dim2 = width - dim1 = width * height - - # Create base index. - base = tf.range(batch_size) * dim1 - base = tf.reshape(base, [-1, 1]) - base = tf.tile(base, [1, height * width]) - base = tf.reshape(base, [-1]) - - base_y0 = base + y0 * dim2 - base_y1 = base + y1 * dim2 - idx_a = base_y0 + x0 - idx_b = base_y1 + x0 - idx_c = base_y0 + x1 - idx_d = base_y1 + x1 - - # Use indices to lookup pixels in the flat image and restore channels dim. - im_flat = tf.reshape(im, tf.stack([-1, channels])) - im_flat = tf.to_float(im_flat) - pixel_a = tf.gather(im_flat, idx_a) - pixel_b = tf.gather(im_flat, idx_b) - pixel_c = tf.gather(im_flat, idx_c) - pixel_d = tf.gather(im_flat, idx_d) - - x1_f = tf.to_float(x1) - y1_f = tf.to_float(y1) - - # And finally calculate interpolated values. - wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) - wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1) - wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1) - wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1) - - output = tf.add_n([wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d]) - output = tf.reshape(output, tf.stack([batch_size, height, width, channels])) - mask = tf.reshape(mask, tf.stack([batch_size, height, width, 1])) - return output, mask - - -def _spatial_transformer(img, coords): - """A wrapper over binlinear_sampler(), taking absolute coords as input.""" - img_height = tf.cast(tf.shape(img)[1], tf.float32) - img_width = tf.cast(tf.shape(img)[2], tf.float32) - px = coords[:, :, :, :1] - py = coords[:, :, :, 1:] - # Normalize coordinates to [-1, 1] to send to _bilinear_sampler. - px = px / (img_width - 1) * 2.0 - 1.0 - py = py / (img_height - 1) * 2.0 - 1.0 - output_img, mask = _bilinear_sampler(img, px, py) - return output_img, mask - - -def get_cloud(depth, intrinsics_inv, name=None): - """Convert depth map to 3D point cloud.""" - with tf.name_scope(name): - dims = depth.shape.as_list() - batch_size, img_height, img_width = dims[0], dims[1], dims[2] - depth = tf.reshape(depth, [batch_size, 1, img_height * img_width]) - grid = _meshgrid_abs(img_height, img_width) - grid = tf.tile(tf.expand_dims(grid, 0), [batch_size, 1, 1]) - cam_coords = _pixel2cam(depth, grid, intrinsics_inv) - cam_coords = tf.transpose(cam_coords, [0, 2, 1]) - cam_coords = tf.reshape(cam_coords, [batch_size, img_height, img_width, 3]) - logging.info('depth -> cloud: %s', cam_coords) - return cam_coords diff --git a/research/struct2depth/reader.py b/research/struct2depth/reader.py deleted file mode 100644 index 444e4bea90a9ac377c6e4789a0eebf947f8ee790..0000000000000000000000000000000000000000 --- a/research/struct2depth/reader.py +++ /dev/null @@ -1,344 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Reads data that is produced by dataset/gen_data.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import random -from absl import logging -import tensorflow as tf - -import util - -gfile = tf.gfile - -QUEUE_SIZE = 2000 -QUEUE_BUFFER = 3 -# See nets.encoder_resnet as reference for below input-normalizing constants. -IMAGENET_MEAN = (0.485, 0.456, 0.406) -IMAGENET_SD = (0.229, 0.224, 0.225) -FLIP_RANDOM = 'random' # Always perform random flipping. -FLIP_ALWAYS = 'always' # Always flip image input, used for test augmentation. -FLIP_NONE = 'none' # Always disables flipping. - - -class DataReader(object): - """Reads stored sequences which are produced by dataset/gen_data.py.""" - - def __init__(self, data_dir, batch_size, img_height, img_width, seq_length, - num_scales, file_extension, random_scale_crop, flipping_mode, - random_color, imagenet_norm, shuffle, input_file='train'): - self.data_dir = data_dir - self.batch_size = batch_size - self.img_height = img_height - self.img_width = img_width - self.seq_length = seq_length - self.num_scales = num_scales - self.file_extension = file_extension - self.random_scale_crop = random_scale_crop - self.flipping_mode = flipping_mode - self.random_color = random_color - self.imagenet_norm = imagenet_norm - self.shuffle = shuffle - self.input_file = input_file - - def read_data(self): - """Provides images and camera intrinsics.""" - with tf.name_scope('data_loading'): - with tf.name_scope('enqueue_paths'): - seed = random.randint(0, 2**31 - 1) - self.file_lists = self.compile_file_list(self.data_dir, self.input_file) - image_paths_queue = tf.train.string_input_producer( - self.file_lists['image_file_list'], seed=seed, - shuffle=self.shuffle, - num_epochs=(1 if not self.shuffle else None) - ) - seg_paths_queue = tf.train.string_input_producer( - self.file_lists['segment_file_list'], seed=seed, - shuffle=self.shuffle, - num_epochs=(1 if not self.shuffle else None)) - cam_paths_queue = tf.train.string_input_producer( - self.file_lists['cam_file_list'], seed=seed, - shuffle=self.shuffle, - num_epochs=(1 if not self.shuffle else None)) - img_reader = tf.WholeFileReader() - _, image_contents = img_reader.read(image_paths_queue) - seg_reader = tf.WholeFileReader() - _, seg_contents = seg_reader.read(seg_paths_queue) - if self.file_extension == 'jpg': - image_seq = tf.image.decode_jpeg(image_contents) - seg_seq = tf.image.decode_jpeg(seg_contents, channels=3) - elif self.file_extension == 'png': - image_seq = tf.image.decode_png(image_contents, channels=3) - seg_seq = tf.image.decode_png(seg_contents, channels=3) - - with tf.name_scope('load_intrinsics'): - cam_reader = tf.TextLineReader() - _, raw_cam_contents = cam_reader.read(cam_paths_queue) - rec_def = [] - for _ in range(9): - rec_def.append([1.0]) - raw_cam_vec = tf.decode_csv(raw_cam_contents, record_defaults=rec_def) - raw_cam_vec = tf.stack(raw_cam_vec) - intrinsics = tf.reshape(raw_cam_vec, [3, 3]) - - with tf.name_scope('convert_image'): - image_seq = self.preprocess_image(image_seq) # Converts to float. - - if self.random_color: - with tf.name_scope('image_augmentation'): - image_seq = self.augment_image_colorspace(image_seq) - - image_stack = self.unpack_images(image_seq) - seg_stack = self.unpack_images(seg_seq) - - if self.flipping_mode != FLIP_NONE: - random_flipping = (self.flipping_mode == FLIP_RANDOM) - with tf.name_scope('image_augmentation_flip'): - image_stack, seg_stack, intrinsics = self.augment_images_flip( - image_stack, seg_stack, intrinsics, - randomized=random_flipping) - - if self.random_scale_crop: - with tf.name_scope('image_augmentation_scale_crop'): - image_stack, seg_stack, intrinsics = self.augment_images_scale_crop( - image_stack, seg_stack, intrinsics, self.img_height, - self.img_width) - - with tf.name_scope('multi_scale_intrinsics'): - intrinsic_mat = self.get_multi_scale_intrinsics(intrinsics, - self.num_scales) - intrinsic_mat.set_shape([self.num_scales, 3, 3]) - intrinsic_mat_inv = tf.matrix_inverse(intrinsic_mat) - intrinsic_mat_inv.set_shape([self.num_scales, 3, 3]) - - if self.imagenet_norm: - im_mean = tf.tile( - tf.constant(IMAGENET_MEAN), multiples=[self.seq_length]) - im_sd = tf.tile( - tf.constant(IMAGENET_SD), multiples=[self.seq_length]) - image_stack_norm = (image_stack - im_mean) / im_sd - else: - image_stack_norm = image_stack - - with tf.name_scope('batching'): - if self.shuffle: - (image_stack, image_stack_norm, seg_stack, intrinsic_mat, - intrinsic_mat_inv) = tf.train.shuffle_batch( - [image_stack, image_stack_norm, seg_stack, intrinsic_mat, - intrinsic_mat_inv], - batch_size=self.batch_size, - capacity=QUEUE_SIZE + QUEUE_BUFFER * self.batch_size, - min_after_dequeue=QUEUE_SIZE) - else: - (image_stack, image_stack_norm, seg_stack, intrinsic_mat, - intrinsic_mat_inv) = tf.train.batch( - [image_stack, image_stack_norm, seg_stack, intrinsic_mat, - intrinsic_mat_inv], - batch_size=self.batch_size, - num_threads=1, - capacity=QUEUE_SIZE + QUEUE_BUFFER * self.batch_size) - logging.info('image_stack: %s', util.info(image_stack)) - return (image_stack, image_stack_norm, seg_stack, intrinsic_mat, - intrinsic_mat_inv) - - def unpack_images(self, image_seq): - """[h, w * seq_length, 3] -> [h, w, 3 * seq_length].""" - with tf.name_scope('unpack_images'): - image_list = [ - image_seq[:, i * self.img_width:(i + 1) * self.img_width, :] - for i in range(self.seq_length) - ] - image_stack = tf.concat(image_list, axis=2) - image_stack.set_shape( - [self.img_height, self.img_width, self.seq_length * 3]) - return image_stack - - @classmethod - def preprocess_image(cls, image): - # Convert from uint8 to float. - return tf.image.convert_image_dtype(image, dtype=tf.float32) - - @classmethod - def augment_image_colorspace(cls, image_stack): - """Apply data augmentation to inputs.""" - image_stack_aug = image_stack - # Randomly shift brightness. - apply_brightness = tf.less(tf.random_uniform( - shape=[], minval=0.0, maxval=1.0, dtype=tf.float32), 0.5) - image_stack_aug = tf.cond( - apply_brightness, - lambda: tf.image.random_brightness(image_stack_aug, max_delta=0.1), - lambda: image_stack_aug) - - # Randomly shift contrast. - apply_contrast = tf.less(tf.random_uniform( - shape=[], minval=0.0, maxval=1.0, dtype=tf.float32), 0.5) - image_stack_aug = tf.cond( - apply_contrast, - lambda: tf.image.random_contrast(image_stack_aug, 0.85, 1.15), - lambda: image_stack_aug) - - # Randomly change saturation. - apply_saturation = tf.less(tf.random_uniform( - shape=[], minval=0.0, maxval=1.0, dtype=tf.float32), 0.5) - image_stack_aug = tf.cond( - apply_saturation, - lambda: tf.image.random_saturation(image_stack_aug, 0.85, 1.15), - lambda: image_stack_aug) - - # Randomly change hue. - apply_hue = tf.less(tf.random_uniform( - shape=[], minval=0.0, maxval=1.0, dtype=tf.float32), 0.5) - image_stack_aug = tf.cond( - apply_hue, - lambda: tf.image.random_hue(image_stack_aug, max_delta=0.1), - lambda: image_stack_aug) - - image_stack_aug = tf.clip_by_value(image_stack_aug, 0, 1) - return image_stack_aug - - @classmethod - def augment_images_flip(cls, image_stack, seg_stack, intrinsics, - randomized=True): - """Randomly flips the image horizontally.""" - - def flip(cls, image_stack, seg_stack, intrinsics): - _, in_w, _ = image_stack.get_shape().as_list() - fx = intrinsics[0, 0] - fy = intrinsics[1, 1] - cx = in_w - intrinsics[0, 2] - cy = intrinsics[1, 2] - intrinsics = cls.make_intrinsics_matrix(fx, fy, cx, cy) - return (tf.image.flip_left_right(image_stack), - tf.image.flip_left_right(seg_stack), intrinsics) - - if randomized: - prob = tf.random_uniform(shape=[], minval=0.0, maxval=1.0, - dtype=tf.float32) - predicate = tf.less(prob, 0.5) - return tf.cond(predicate, - lambda: flip(cls, image_stack, seg_stack, intrinsics), - lambda: (image_stack, seg_stack, intrinsics)) - else: - return flip(cls, image_stack, seg_stack, intrinsics) - - @classmethod - def augment_images_scale_crop(cls, im, seg, intrinsics, out_h, out_w): - """Randomly scales and crops image.""" - - def scale_randomly(im, seg, intrinsics): - """Scales image and adjust intrinsics accordingly.""" - in_h, in_w, _ = im.get_shape().as_list() - scaling = tf.random_uniform([2], 1, 1.15) - x_scaling = scaling[0] - y_scaling = scaling[1] - out_h = tf.cast(in_h * y_scaling, dtype=tf.int32) - out_w = tf.cast(in_w * x_scaling, dtype=tf.int32) - # Add batch. - im = tf.expand_dims(im, 0) - im = tf.image.resize_area(im, [out_h, out_w]) - im = im[0] - seg = tf.expand_dims(seg, 0) - seg = tf.image.resize_area(seg, [out_h, out_w]) - seg = seg[0] - fx = intrinsics[0, 0] * x_scaling - fy = intrinsics[1, 1] * y_scaling - cx = intrinsics[0, 2] * x_scaling - cy = intrinsics[1, 2] * y_scaling - intrinsics = cls.make_intrinsics_matrix(fx, fy, cx, cy) - return im, seg, intrinsics - - # Random cropping - def crop_randomly(im, seg, intrinsics, out_h, out_w): - """Crops image and adjust intrinsics accordingly.""" - # batch_size, in_h, in_w, _ = im.get_shape().as_list() - in_h, in_w, _ = tf.unstack(tf.shape(im)) - offset_y = tf.random_uniform([1], 0, in_h - out_h + 1, dtype=tf.int32)[0] - offset_x = tf.random_uniform([1], 0, in_w - out_w + 1, dtype=tf.int32)[0] - im = tf.image.crop_to_bounding_box(im, offset_y, offset_x, out_h, out_w) - seg = tf.image.crop_to_bounding_box(seg, offset_y, offset_x, out_h, out_w) - fx = intrinsics[0, 0] - fy = intrinsics[1, 1] - cx = intrinsics[0, 2] - tf.cast(offset_x, dtype=tf.float32) - cy = intrinsics[1, 2] - tf.cast(offset_y, dtype=tf.float32) - intrinsics = cls.make_intrinsics_matrix(fx, fy, cx, cy) - return im, seg, intrinsics - - im, seg, intrinsics = scale_randomly(im, seg, intrinsics) - im, seg, intrinsics = crop_randomly(im, seg, intrinsics, out_h, out_w) - return im, seg, intrinsics - - def compile_file_list(self, data_dir, split, load_pose=False): - """Creates a list of input files.""" - logging.info('data_dir: %s', data_dir) - with gfile.Open(os.path.join(data_dir, '%s.txt' % split), 'r') as f: - frames = f.readlines() - frames = [k.rstrip() for k in frames] - subfolders = [x.split(' ')[0] for x in frames] - frame_ids = [x.split(' ')[1] for x in frames] - image_file_list = [ - os.path.join(data_dir, subfolders[i], frame_ids[i] + '.' + - self.file_extension) - for i in range(len(frames)) - ] - segment_file_list = [ - os.path.join(data_dir, subfolders[i], frame_ids[i] + '-fseg.' + - self.file_extension) - for i in range(len(frames)) - ] - cam_file_list = [ - os.path.join(data_dir, subfolders[i], frame_ids[i] + '_cam.txt') - for i in range(len(frames)) - ] - file_lists = {} - file_lists['image_file_list'] = image_file_list - file_lists['segment_file_list'] = segment_file_list - file_lists['cam_file_list'] = cam_file_list - if load_pose: - pose_file_list = [ - os.path.join(data_dir, subfolders[i], frame_ids[i] + '_pose.txt') - for i in range(len(frames)) - ] - file_lists['pose_file_list'] = pose_file_list - self.steps_per_epoch = len(image_file_list) // self.batch_size - return file_lists - - @classmethod - def make_intrinsics_matrix(cls, fx, fy, cx, cy): - r1 = tf.stack([fx, 0, cx]) - r2 = tf.stack([0, fy, cy]) - r3 = tf.constant([0., 0., 1.]) - intrinsics = tf.stack([r1, r2, r3]) - return intrinsics - - @classmethod - def get_multi_scale_intrinsics(cls, intrinsics, num_scales): - """Returns multiple intrinsic matrices for different scales.""" - intrinsics_multi_scale = [] - # Scale the intrinsics accordingly for each scale - for s in range(num_scales): - fx = intrinsics[0, 0] / (2**s) - fy = intrinsics[1, 1] / (2**s) - cx = intrinsics[0, 2] / (2**s) - cy = intrinsics[1, 2] / (2**s) - intrinsics_multi_scale.append(cls.make_intrinsics_matrix(fx, fy, cx, cy)) - intrinsics_multi_scale = tf.stack(intrinsics_multi_scale) - return intrinsics_multi_scale diff --git a/research/struct2depth/train.py b/research/struct2depth/train.py deleted file mode 100644 index 248c182fe5790ea1305cf887962bd26dd33d7b54..0000000000000000000000000000000000000000 --- a/research/struct2depth/train.py +++ /dev/null @@ -1,259 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Train the model. Please refer to README for example usage.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import os -import random -import time -from absl import app -from absl import flags -from absl import logging -import numpy as np -import tensorflow as tf - -import model -import nets -import reader -import util - -gfile = tf.gfile -MAX_TO_KEEP = 1000000 # Maximum number of checkpoints to keep. - -flags.DEFINE_string('data_dir', None, 'Preprocessed data.') -flags.DEFINE_string('file_extension', 'png', 'Image data file extension.') -flags.DEFINE_float('learning_rate', 0.0002, 'Adam learning rate.') -flags.DEFINE_float('beta1', 0.9, 'Adam momentum.') -flags.DEFINE_float('reconstr_weight', 0.85, 'Frame reconstruction loss weight.') -flags.DEFINE_float('ssim_weight', 0.15, 'SSIM loss weight.') -flags.DEFINE_float('smooth_weight', 0.04, 'Smoothness loss weight.') -flags.DEFINE_float('icp_weight', 0.0, 'ICP loss weight.') -flags.DEFINE_float('size_constraint_weight', 0.0005, 'Weight of the object ' - 'size constraint loss. Use only when motion handling is ' - 'enabled.') -flags.DEFINE_integer('batch_size', 4, 'The size of a sample batch') -flags.DEFINE_integer('img_height', 128, 'Input frame height.') -flags.DEFINE_integer('img_width', 416, 'Input frame width.') -flags.DEFINE_integer('seq_length', 3, 'Number of frames in sequence.') -flags.DEFINE_enum('architecture', nets.RESNET, nets.ARCHITECTURES, - 'Defines the architecture to use for the depth prediction ' - 'network. Defaults to ResNet-based encoder and accompanying ' - 'decoder.') -flags.DEFINE_boolean('imagenet_norm', True, 'Whether to normalize the input ' - 'images channel-wise so that they match the distribution ' - 'most ImageNet-models were trained on.') -flags.DEFINE_float('weight_reg', 0.05, 'The amount of weight regularization to ' - 'apply. This has no effect on the ResNet-based encoder ' - 'architecture.') -flags.DEFINE_boolean('exhaustive_mode', False, 'Whether to exhaustively warp ' - 'from any frame to any other instead of just considering ' - 'adjacent frames. Where necessary, multiple egomotion ' - 'estimates will be applied. Does not have an effect if ' - 'compute_minimum_loss is enabled.') -flags.DEFINE_boolean('random_scale_crop', False, 'Whether to apply random ' - 'image scaling and center cropping during training.') -flags.DEFINE_enum('flipping_mode', reader.FLIP_RANDOM, - [reader.FLIP_RANDOM, reader.FLIP_ALWAYS, reader.FLIP_NONE], - 'Determines the image flipping mode: if random, performs ' - 'on-the-fly augmentation. Otherwise, flips the input images ' - 'always or never, respectively.') -flags.DEFINE_string('pretrained_ckpt', None, 'Path to checkpoint with ' - 'pretrained weights. Do not include .data* extension.') -flags.DEFINE_string('imagenet_ckpt', None, 'Initialize the weights according ' - 'to an ImageNet-pretrained checkpoint. Requires ' - 'architecture to be ResNet-18.') -flags.DEFINE_string('checkpoint_dir', None, 'Directory to save model ' - 'checkpoints.') -flags.DEFINE_integer('train_steps', 10000000, 'Number of training steps.') -flags.DEFINE_integer('summary_freq', 100, 'Save summaries every N steps.') -flags.DEFINE_bool('depth_upsampling', True, 'Whether to apply depth ' - 'upsampling of lower-scale representations before warping to ' - 'compute reconstruction loss on full-resolution image.') -flags.DEFINE_bool('depth_normalization', True, 'Whether to apply depth ' - 'normalization, that is, normalizing inverse depth ' - 'prediction maps by their mean to avoid degeneration towards ' - 'small values.') -flags.DEFINE_bool('compute_minimum_loss', True, 'Whether to take the ' - 'element-wise minimum of the reconstruction/SSIM error in ' - 'order to avoid overly penalizing dis-occlusion effects.') -flags.DEFINE_bool('use_skip', True, 'Whether to use skip connections in the ' - 'encoder-decoder architecture.') -flags.DEFINE_bool('equal_weighting', False, 'Whether to use equal weighting ' - 'of the smoothing loss term, regardless of resolution.') -flags.DEFINE_bool('joint_encoder', False, 'Whether to share parameters ' - 'between the depth and egomotion networks by using a joint ' - 'encoder architecture. The egomotion network is then ' - 'operating only on the hidden representation provided by the ' - 'joint encoder.') -flags.DEFINE_bool('handle_motion', True, 'Whether to try to handle motion by ' - 'using the provided segmentation masks.') -flags.DEFINE_string('master', 'local', 'Location of the session.') - -FLAGS = flags.FLAGS -flags.mark_flag_as_required('data_dir') -flags.mark_flag_as_required('checkpoint_dir') - - -def main(_): - # Fixed seed for repeatability - seed = 8964 - tf.set_random_seed(seed) - np.random.seed(seed) - random.seed(seed) - - if FLAGS.handle_motion and FLAGS.joint_encoder: - raise ValueError('Using a joint encoder is currently not supported when ' - 'modeling object motion.') - if FLAGS.handle_motion and FLAGS.seq_length != 3: - raise ValueError('The current motion model implementation only supports ' - 'using a sequence length of three.') - if FLAGS.handle_motion and not FLAGS.compute_minimum_loss: - raise ValueError('Computing the minimum photometric loss is required when ' - 'enabling object motion handling.') - if FLAGS.size_constraint_weight > 0 and not FLAGS.handle_motion: - raise ValueError('To enforce object size constraints, enable motion ' - 'handling.') - if FLAGS.imagenet_ckpt and not FLAGS.imagenet_norm: - logging.warn('When initializing with an ImageNet-pretrained model, it is ' - 'recommended to normalize the image inputs accordingly using ' - 'imagenet_norm.') - if FLAGS.compute_minimum_loss and FLAGS.seq_length % 2 != 1: - raise ValueError('Compute minimum loss requires using an odd number of ' - 'images in a sequence.') - if FLAGS.architecture != nets.RESNET and FLAGS.imagenet_ckpt: - raise ValueError('Can only load weights from pre-trained ImageNet model ' - 'when using ResNet-architecture.') - if FLAGS.compute_minimum_loss and FLAGS.exhaustive_mode: - raise ValueError('Exhaustive mode has no effect when compute_minimum_loss ' - 'is enabled.') - if FLAGS.img_width % (2 ** 5) != 0 or FLAGS.img_height % (2 ** 5) != 0: - logging.warn('Image size is not divisible by 2^5. For the architecture ' - 'employed, this could cause artefacts caused by resizing in ' - 'lower dimensions.') - if FLAGS.icp_weight > 0.0: - # TODO(casser): Change ICP interface to take matrix instead of vector. - raise ValueError('ICP is currently not supported.') - - if not gfile.Exists(FLAGS.checkpoint_dir): - gfile.MakeDirs(FLAGS.checkpoint_dir) - - train_model = model.Model(data_dir=FLAGS.data_dir, - file_extension=FLAGS.file_extension, - is_training=True, - learning_rate=FLAGS.learning_rate, - beta1=FLAGS.beta1, - reconstr_weight=FLAGS.reconstr_weight, - smooth_weight=FLAGS.smooth_weight, - ssim_weight=FLAGS.ssim_weight, - icp_weight=FLAGS.icp_weight, - batch_size=FLAGS.batch_size, - img_height=FLAGS.img_height, - img_width=FLAGS.img_width, - seq_length=FLAGS.seq_length, - architecture=FLAGS.architecture, - imagenet_norm=FLAGS.imagenet_norm, - weight_reg=FLAGS.weight_reg, - exhaustive_mode=FLAGS.exhaustive_mode, - random_scale_crop=FLAGS.random_scale_crop, - flipping_mode=FLAGS.flipping_mode, - depth_upsampling=FLAGS.depth_upsampling, - depth_normalization=FLAGS.depth_normalization, - compute_minimum_loss=FLAGS.compute_minimum_loss, - use_skip=FLAGS.use_skip, - joint_encoder=FLAGS.joint_encoder, - handle_motion=FLAGS.handle_motion, - equal_weighting=FLAGS.equal_weighting, - size_constraint_weight=FLAGS.size_constraint_weight) - - train(train_model, FLAGS.pretrained_ckpt, FLAGS.imagenet_ckpt, - FLAGS.checkpoint_dir, FLAGS.train_steps, FLAGS.summary_freq) - - -def train(train_model, pretrained_ckpt, imagenet_ckpt, checkpoint_dir, - train_steps, summary_freq): - """Train model.""" - vars_to_restore = None - if pretrained_ckpt is not None: - vars_to_restore = util.get_vars_to_save_and_restore(pretrained_ckpt) - ckpt_path = pretrained_ckpt - elif imagenet_ckpt: - vars_to_restore = util.get_imagenet_vars_to_restore(imagenet_ckpt) - ckpt_path = imagenet_ckpt - pretrain_restorer = tf.train.Saver(vars_to_restore) - vars_to_save = util.get_vars_to_save_and_restore() - vars_to_save[train_model.global_step.op.name] = train_model.global_step - saver = tf.train.Saver(vars_to_save, max_to_keep=MAX_TO_KEEP) - sv = tf.train.Supervisor(logdir=checkpoint_dir, save_summaries_secs=0, - saver=None) - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - with sv.managed_session(config=config) as sess: - if pretrained_ckpt is not None or imagenet_ckpt: - logging.info('Restoring pretrained weights from %s', ckpt_path) - pretrain_restorer.restore(sess, ckpt_path) - - logging.info('Attempting to resume training from %s...', checkpoint_dir) - checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - logging.info('Last checkpoint found: %s', checkpoint) - if checkpoint: - saver.restore(sess, checkpoint) - - logging.info('Training...') - start_time = time.time() - last_summary_time = time.time() - steps_per_epoch = train_model.reader.steps_per_epoch - step = 1 - while step <= train_steps: - fetches = { - 'train': train_model.train_op, - 'global_step': train_model.global_step, - 'incr_global_step': train_model.incr_global_step - } - if step % summary_freq == 0: - fetches['loss'] = train_model.total_loss - fetches['summary'] = sv.summary_op - - results = sess.run(fetches) - global_step = results['global_step'] - - if step % summary_freq == 0: - sv.summary_writer.add_summary(results['summary'], global_step) - train_epoch = math.ceil(global_step / steps_per_epoch) - train_step = global_step - (train_epoch - 1) * steps_per_epoch - this_cycle = time.time() - last_summary_time - last_summary_time += this_cycle - logging.info( - 'Epoch: [%2d] [%5d/%5d] time: %4.2fs (%ds total) loss: %.3f', - train_epoch, train_step, steps_per_epoch, this_cycle, - time.time() - start_time, results['loss']) - - if step % steps_per_epoch == 0: - logging.info('[*] Saving checkpoint to %s...', checkpoint_dir) - saver.save(sess, os.path.join(checkpoint_dir, 'model'), - global_step=global_step) - - # Setting step to global_step allows for training for a total of - # train_steps even if the program is restarted during training. - step = global_step + 1 - - -if __name__ == '__main__': - app.run(main) diff --git a/research/struct2depth/util.py b/research/struct2depth/util.py deleted file mode 100644 index 79376864685363ab886ae35c8fa614d9641a397d..0000000000000000000000000000000000000000 --- a/research/struct2depth/util.py +++ /dev/null @@ -1,252 +0,0 @@ - -# Copyright 2018 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains common utilities and functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import locale -import os -import re -from absl import logging -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import numpy as np -import tensorflow as tf -import cv2 -gfile = tf.gfile - - -CMAP_DEFAULT = 'plasma' -# Defines the cropping that is applied to the Cityscapes dataset with respect to -# the original raw input resolution. -CITYSCAPES_CROP = [256, 768, 192, 1856] - - -def crop_cityscapes(im, resize=None): - ymin, ymax, xmin, xmax = CITYSCAPES_CROP - im = im[ymin:ymax, xmin:xmax] - if resize is not None: - im = cv2.resize(im, resize) - return im - - -def gray2rgb(im, cmap=CMAP_DEFAULT): - cmap = plt.get_cmap(cmap) - result_img = cmap(im.astype(np.float32)) - if result_img.shape[2] > 3: - result_img = np.delete(result_img, 3, 2) - return result_img - - -def load_image(img_file, resize=None, interpolation='linear'): - """Load image from disk. Output value range: [0,1].""" - im_data = np.fromstring(gfile.Open(img_file).read(), np.uint8) - im = cv2.imdecode(im_data, cv2.IMREAD_COLOR) - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - if resize and resize != im.shape[:2]: - ip = cv2.INTER_LINEAR if interpolation == 'linear' else cv2.INTER_NEAREST - im = cv2.resize(im, resize, interpolation=ip) - return np.array(im, dtype=np.float32) / 255.0 - - -def save_image(img_file, im, file_extension): - """Save image from disk. Expected input value range: [0,1].""" - im = (im * 255.0).astype(np.uint8) - with gfile.Open(img_file, 'w') as f: - im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) - _, im_data = cv2.imencode('.%s' % file_extension, im) - f.write(im_data.tostring()) - - -def normalize_depth_for_display(depth, pc=95, crop_percent=0, normalizer=None, - cmap=CMAP_DEFAULT): - """Converts a depth map to an RGB image.""" - # Convert to disparity. - - disp = 1.0 / (depth + 1e-6) - if normalizer is not None: - disp /= normalizer - else: - disp /= (np.percentile(disp, pc) + 1e-6) - disp = np.clip(disp, 0, 1) - disp = gray2rgb(disp, cmap=cmap) - keep_h = int(disp.shape[0] * (1 - crop_percent)) - disp = disp[:keep_h] - return disp - - -def get_seq_start_end(target_index, seq_length, sample_every=1): - """Returns absolute seq start and end indices for a given target frame.""" - half_offset = int((seq_length - 1) / 2) * sample_every - end_index = target_index + half_offset - start_index = end_index - (seq_length - 1) * sample_every - return start_index, end_index - - -def get_seq_middle(seq_length): - """Returns relative index for the middle frame in sequence.""" - half_offset = int((seq_length - 1) / 2) - return seq_length - 1 - half_offset - - -def info(obj): - """Return info on shape and dtype of a numpy array or TensorFlow tensor.""" - if obj is None: - return 'None.' - elif isinstance(obj, list): - if obj: - return 'List of %d... %s' % (len(obj), info(obj[0])) - else: - return 'Empty list.' - elif isinstance(obj, tuple): - if obj: - return 'Tuple of %d... %s' % (len(obj), info(obj[0])) - else: - return 'Empty tuple.' - else: - if is_a_numpy_array(obj): - return 'Array with shape: %s, dtype: %s' % (obj.shape, obj.dtype) - else: - return str(obj) - - -def is_a_numpy_array(obj): - """Returns true if obj is a numpy array.""" - return type(obj).__module__ == np.__name__ - - -def count_parameters(also_print=True): - """Cound the number of parameters in the model. - - Args: - also_print: Boolean. If True also print the numbers. - - Returns: - The total number of parameters. - """ - total = 0 - if also_print: - logging.info('Model Parameters:') - for (_, v) in get_vars_to_save_and_restore().items(): - shape = v.get_shape() - if also_print: - logging.info('%s %s: %s', v.op.name, shape, - format_number(shape.num_elements())) - total += shape.num_elements() - if also_print: - logging.info('Total: %s', format_number(total)) - return total - - -def get_vars_to_save_and_restore(ckpt=None): - """Returns list of variables that should be saved/restored. - - Args: - ckpt: Path to existing checkpoint. If present, returns only the subset of - variables that exist in given checkpoint. - - Returns: - List of all variables that need to be saved/restored. - """ - model_vars = tf.trainable_variables() - # Add batchnorm variables. - bn_vars = [v for v in tf.global_variables() - if 'moving_mean' in v.op.name or 'moving_variance' in v.op.name or - 'mu' in v.op.name or 'sigma' in v.op.name or - 'global_scale_var' in v.op.name] - model_vars.extend(bn_vars) - model_vars = sorted(model_vars, key=lambda x: x.op.name) - mapping = {} - if ckpt is not None: - ckpt_var = tf.contrib.framework.list_variables(ckpt) - ckpt_var_names = [name for (name, unused_shape) in ckpt_var] - ckpt_var_shapes = [shape for (unused_name, shape) in ckpt_var] - not_loaded = list(ckpt_var_names) - for v in model_vars: - if v.op.name not in ckpt_var_names: - # For backward compatibility, try additional matching. - v_additional_name = v.op.name.replace('egomotion_prediction/', '') - if v_additional_name in ckpt_var_names: - # Check if shapes match. - ind = ckpt_var_names.index(v_additional_name) - if ckpt_var_shapes[ind] == v.get_shape(): - mapping[v_additional_name] = v - not_loaded.remove(v_additional_name) - continue - else: - logging.warn('Shape mismatch, will not restore %s.', v.op.name) - logging.warn('Did not find var %s in checkpoint: %s', v.op.name, - os.path.basename(ckpt)) - else: - # Check if shapes match. - ind = ckpt_var_names.index(v.op.name) - if ckpt_var_shapes[ind] == v.get_shape(): - mapping[v.op.name] = v - not_loaded.remove(v.op.name) - else: - logging.warn('Shape mismatch, will not restore %s.', v.op.name) - if not_loaded: - logging.warn('The following variables in the checkpoint were not loaded:') - for varname_not_loaded in not_loaded: - logging.info('%s', varname_not_loaded) - else: # just get model vars. - for v in model_vars: - mapping[v.op.name] = v - return mapping - - -def get_imagenet_vars_to_restore(imagenet_ckpt): - """Returns dict of variables to restore from ImageNet-checkpoint.""" - vars_to_restore_imagenet = {} - ckpt_var_names = tf.contrib.framework.list_variables(imagenet_ckpt) - ckpt_var_names = [name for (name, unused_shape) in ckpt_var_names] - model_vars = tf.global_variables() - for v in model_vars: - if 'global_step' in v.op.name: continue - mvname_noprefix = v.op.name.replace('depth_prediction/', '') - mvname_noprefix = mvname_noprefix.replace('moving_mean', 'mu') - mvname_noprefix = mvname_noprefix.replace('moving_variance', 'sigma') - if mvname_noprefix in ckpt_var_names: - vars_to_restore_imagenet[mvname_noprefix] = v - else: - logging.info('The following variable will not be restored from ' - 'pretrained ImageNet-checkpoint: %s', mvname_noprefix) - return vars_to_restore_imagenet - - -def format_number(n): - """Formats number with thousands commas.""" - locale.setlocale(locale.LC_ALL, 'en_US') - return locale.format('%d', n, grouping=True) - - -def atoi(text): - return int(text) if text.isdigit() else text - - -def natural_keys(text): - return [atoi(c) for c in re.split(r'(\d+)', text)] - - -def read_text_lines(filepath): - with tf.gfile.Open(filepath, 'r') as f: - lines = f.readlines() - lines = [l.rstrip() for l in lines] - return lines diff --git a/research/swivel/.gitignore b/research/swivel/.gitignore deleted file mode 100644 index 215593fb25dfe5862662aafcade225a7c9d62b83..0000000000000000000000000000000000000000 --- a/research/swivel/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -*.an.tab -*.pyc -*.ws.tab -MEN.tar.gz -Mtruk.csv -SimLex-999.zip -analogy -fastprep -*.dSYM -questions-words.txt -word_relationship.* -tensorflow/ -rw.zip -ws353simrel.tar.gz diff --git a/research/swivel/README.md b/research/swivel/README.md deleted file mode 100644 index c5550a2d18ccabdc22dc271c5b0515b233497068..0000000000000000000000000000000000000000 --- a/research/swivel/README.md +++ /dev/null @@ -1,185 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Swivel in Tensorflow - -This is a [TensorFlow](http://www.tensorflow.org/) implementation of the -[Swivel algorithm](http://arxiv.org/abs/1602.02215) for generating word -embeddings. - -Swivel works as follows: - -1. Compute the co-occurrence statistics from a corpus; that is, determine how - often a word *c* appears the context (e.g., "within ten words") of a focus - word *f*. This results in a sparse *co-occurrence matrix* whose rows - represent the focus words, and whose columns represent the context - words. Each cell value is the number of times the focus and context words - were observed together. -2. Re-organize the co-occurrence matrix and chop it into smaller pieces. -3. Assign a random *embedding vector* of fixed dimension (say, 300) to each - focus word and to each context word. -4. Iteratively attempt to approximate the - [pointwise mutual information](https://en.wikipedia.org/wiki/Pointwise_mutual_information) - (PMI) between words with the dot product of the corresponding embedding - vectors. - -Note that the resulting co-occurrence matrix is very sparse (i.e., contains many -zeros) since most words won't have been observed in the context of other words. -In the case of very rare words, it seems reasonable to assume that you just -haven't sampled enough data to spot their co-occurrence yet. On the other hand, -if we've failed to observed two common words co-occuring, it seems likely that -they are *anti-correlated*. - -Swivel attempts to capture this intuition by using both the observed and the -un-observed co-occurrences to inform the way it iteratively adjusts vectors. -Empirically, this seems to lead to better embeddings, especially for rare words. - -# Contents - -This release includes the following programs. - -* `prep.py` is a program that takes a text corpus and pre-processes it for - training. Specifically, it computes a vocabulary and token co-occurrence - statistics for the corpus. It then outputs the information into a format that - can be digested by the TensorFlow trainer. -* `swivel.py` is a TensorFlow program that generates embeddings from the - co-occurrence statistics. It uses the files created by `prep.py` as input, - and generates two text files as output: the row and column embeddings. -* `distributed.sh` is a Bash script that is meant to act as a template for - launching "distributed" Swivel training; i.e., multiple processes that work in - parallel and communicate via a parameter server. -* `text2bin.py` combines the row and column vectors generated by Swivel into a - flat binary file that can be quickly loaded into memory to perform vector - arithmetic. This can also be used to convert embeddings from - [Glove](http://nlp.stanford.edu/projects/glove/) and - [word2vec](https://code.google.com/archive/p/word2vec/) into a form that can - be used by the following tools. -* `nearest.py` is a program that you can use to manually inspect binary - embeddings. -* `eval.mk` is a GNU makefile that fill retrieve and normalize several common - word similarity and analogy evaluation data sets. -* `wordsim.py` performs word similarity evaluation of the resulting vectors. -* `analogy` performs analogy evaluation of the resulting vectors. -* `fastprep` is a C++ program that works much more quickly that `prep.py`, but - also has some additional dependencies to build. - -# Building Embeddings with Swivel - -To build your own word embeddings with Swivel, you'll need the following: - -* A large corpus of text; for example, the - [dump of English Wikipedia](https://dumps.wikimedia.org/enwiki/). -* A working [TensorFlow](http://www.tensorflow.org/) implementation. -* A machine with plenty of disk space and, ideally, a beefy GPU card. (We've - experimented with the - [Nvidia Titan X](http://www.geforce.com/hardware/desktop-gpus/geforce-gtx-titan-x), - for example.) - -You'll then run `prep.py` (or `fastprep`) to prepare the data for Swivel and run -`swivel.py` to create the embeddings. The resulting embeddings will be output -into two large text files: one for the row vectors and one for the column -vectors. You can use those "as is", or convert them into a binary file using -`text2bin.py` and then use the tools here to experiment with the resulting -vectors. - -## Preparing the data for training - -Once you've downloaded the corpus (e.g., to `/tmp/wiki.txt`), run `prep.py` to -prepare the data for training: - - ./prep.py --output_dir /tmp/swivel_data --input /tmp/wiki.txt - -By default, `prep.py` will make one pass through the text file to compute a -"vocabulary" of the most frequent words, and then a second pass to compute the -co-occurrence statistics. The following options allow you to control this -behavior: - -| Option | Description | -|:--- |:--- | -| `--min_count ` | Only include words in the generated vocabulary that appear at least *n* times. | -| `--max_vocab ` | Admit at most *n* words into the vocabulary. | -| `--vocab ` | Use the specified filename as the vocabulary instead of computing it from the corpus. The file should contain one word per line. | - -The `prep.py` program is pretty simple. Notably, it does almost no text -processing: it does no case translation and simply breaks text into tokens by -splitting on spaces. Feel free to experiment with the `words` function if you'd -like to do something more sophisticated. - -Unfortunately, `prep.py` is pretty slow. Also included is `fastprep`, a C++ -equivalent that works much more quickly. Building `fastprep.cc` is a bit more -involved: it requires you to pull and build the Tensorflow source code in order -to provide the libraries and headers that it needs. See `fastprep.mk` for more -details. - -## Training the embeddings - -When `prep.py` completes, it will have produced a directory containing the data -that the Swivel trainer needs to run. Train embeddings as follows: - - ./swivel.py --input_base_path /tmp/swivel_data \ - --output_base_path /tmp/swivel_data - -There are a variety of parameters that you can fiddle with to customize the -embeddings; some that you may want to experiment with include: - -| Option | Description | -|:--- |:--- | -| `--embedding_size ` | The dimensionality of the embeddings that are created. By default, 300 dimensional embeddings are created. | -| `--num_epochs ` | The number of iterations through the data that are performed. By default, 40 epochs are trained. | - -As mentioned above, access to beefy GPU will dramatically reduce the amount of -time it takes Swivel to train embeddings. - -When complete, you should find `row_embeddings.tsv` and `col_embedding.tsv` in -the directory specified by `--ouput_base_path`. These files are tab-delimited -files that contain one embedding per line. Each line contains the token -followed by *dim* floating point numbers. - -## Exploring and evaluating the embeddings - -There are also some simple tools you can to explore the embeddings. These tools -work with a simple binary vector format that can be `mmap`-ed into memory along -with a separate vocabulary file. Use `text2bin.py` to generate these files: - - ./text2bin.py -o vecs.bin -v vocab.txt /tmp/swivel_data/*_embedding.tsv - -You can do some simple exploration using `nearest.py`: - - ./nearest.py -v vocab.txt -e vecs.bin - query> dog - dog - dogs - cat - ... - query> man woman king - king - queen - princess - ... - -To evaluate the embeddings using common word similarity and analogy datasets, -use `eval.mk` to retrieve the data sets and build the tools. Note that wordsim is currently not compatible with Python 3.x. - - make -f eval.mk - ./wordsim.py --vocab vocab.txt --embeddings vecs.bin *.ws.tab - ./analogy --vocab vocab.txt --embeddings vecs.bin *.an.tab - -The word similarity evaluation compares the embeddings' estimate of "similarity" -with human judgement using -[Spearman's rho](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) -as the measure of correlation. (Bigger numbers are better.) - -The analogy evaluation tests how well the embeddings can predict analogies like -"man is to woman as king is to queen". - -Note that `eval.mk` forces all evaluation data into lower case. From there, -both the word similarity and analogy evaluations assume that the eval data and -the embeddings use consistent capitalization: if you train embeddings using -mixed case and evaluate them using lower case, things won't work well. - -# Contact - -If you have any questions about Swivel, feel free to post to -[swivel-embeddings@googlegroups.com](https://groups.google.com/forum/#!forum/swivel-embeddings). - diff --git a/research/swivel/analogy.cc b/research/swivel/analogy.cc deleted file mode 100644 index 5a3ff9b3b3b46faa4bad255ddb282cc9478331bd..0000000000000000000000000000000000000000 --- a/research/swivel/analogy.cc +++ /dev/null @@ -1,365 +0,0 @@ -/* -*- Mode: C++ -*- */ - -/* - * Copyright 2016 Google Inc. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Computes embedding performance on analogy tasks. Accepts as input one or - * more files containing four words per line (A B C D), and determines if: - * - * vec(C) - vec(A) + vec(B) ~= vec(D) - * - * Cosine distance in the embedding space is used to retrieve neighbors. Any - * missing vocabulary items are scored as losses. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -static const char usage[] = R"( -Performs analogy testing of embedding vectors. - -Usage: - - analogy --embeddings --vocab eval1.tab ... - -Options: - - --embeddings - The file containing the binary embedding vectors to evaluate. - - --vocab - The vocabulary file corresponding to the embedding vectors. - - --nthreads - The number of evaluation threads to run (default: 8) -)"; - -// Reads the vocabulary file into a map from token to vector index. -static std::unordered_map ReadVocab( - const std::string& vocab_filename) { - std::unordered_map vocab; - std::ifstream fin(vocab_filename); - - int index = 0; - for (std::string token; std::getline(fin, token); ++index) { - auto n = token.find('\t'); - if (n != std::string::npos) token = token.substr(n); - - vocab[token] = index; - } - - return vocab; -} - -// An analogy query: "A is to B as C is to D". -typedef std::tuple AnalogyQuery; - -std::vector ReadQueries( - const std::string &filename, - const std::unordered_map &vocab, int *total) { - std::ifstream fin(filename); - - std::vector queries; - int lineno = 0; - while (1) { - // Read the four words. - std::string words[4]; - int nread = 0; - for (int i = 0; i < 4; ++i) { - fin >> words[i]; - if (!words[i].empty()) ++nread; - } - - ++lineno; - if (nread == 0) break; - - if (nread < 4) { - std::cerr << "expected four words at line " << lineno << std::endl; - break; - } - - // Look up each word's index. - int ixs[4], nvalid; - for (nvalid = 0; nvalid < 4; ++nvalid) { - std::unordered_map::const_iterator it = - vocab.find(words[nvalid]); - - if (it == vocab.end()) break; - - ixs[nvalid] = it->second; - } - - // If we don't have all the words, count it as a loss. - if (nvalid >= 4) - queries.push_back(std::make_tuple(ixs[0], ixs[1], ixs[2], ixs[3])); - } - - *total = lineno; - return queries; -} - - -// A thread that evaluates some fraction of the analogies. -class AnalogyEvaluator { - public: - // Creates a new Analogy evaluator for a range of analogy queries. - AnalogyEvaluator(std::vector::const_iterator begin, - std::vector::const_iterator end, - const float *embeddings, const int num_embeddings, - const int dim) - : begin_(begin), - end_(end), - embeddings_(embeddings), - num_embeddings_(num_embeddings), - dim_(dim) {} - - // A thunk for pthreads. - static void* Run(void *param) { - AnalogyEvaluator *self = static_cast(param); - self->Evaluate(); - return nullptr; - } - - // Evaluates the analogies. - void Evaluate(); - - // Returns the number of correct analogies after evaluation is complete. - int GetNumCorrect() const { return correct_; } - - protected: - // The beginning of the range of queries to consider. - std::vector::const_iterator begin_; - - // The end of the range of queries to consider. - std::vector::const_iterator end_; - - // The raw embedding vectors. - const float *embeddings_; - - // The number of embedding vectors. - const int num_embeddings_; - - // The embedding vector dimensionality. - const int dim_; - - // The number of correct analogies. - int correct_; -}; - - -void AnalogyEvaluator::Evaluate() { - float* sum = new float[dim_]; - - correct_ = 0; - for (auto query = begin_; query < end_; ++query) { - const float* vec; - int a, b, c, d; - std::tie(a, b, c, d) = *query; - - // Compute C - A + B. - vec = embeddings_ + dim_ * c; - for (int i = 0; i < dim_; ++i) sum[i] = vec[i]; - - vec = embeddings_ + dim_ * a; - for (int i = 0; i < dim_; ++i) sum[i] -= vec[i]; - - vec = embeddings_ + dim_ * b; - for (int i = 0; i < dim_; ++i) sum[i] += vec[i]; - - // Find the nearest neighbor that isn't one of the query words. - int best_ix = -1; - float best_dot = -1.0; - for (int i = 0; i < num_embeddings_; ++i) { - if (i == a || i == b || i == c) continue; - - vec = embeddings_ + dim_ * i; - - float dot = 0; - for (int j = 0; j < dim_; ++j) dot += vec[j] * sum[j]; - - if (dot > best_dot) { - best_ix = i; - best_dot = dot; - } - } - - // The fourth word is the answer; did we get it right? - if (best_ix == d) ++correct_; - } - - delete[] sum; -} - - -int main(int argc, char *argv[]) { - if (argc <= 1) { - printf(usage); - return 2; - } - - std::string embeddings_filename, vocab_filename; - int nthreads = 8; - - std::vector input_filenames; - std::vector> queries; - - for (int i = 1; i < argc; ++i) { - std::string arg = argv[i]; - if (arg == "--embeddings") { - if (++i >= argc) goto argmissing; - embeddings_filename = argv[i]; - } else if (arg == "--vocab") { - if (++i >= argc) goto argmissing; - vocab_filename = argv[i]; - } else if (arg == "--nthreads") { - if (++i >= argc) goto argmissing; - if ((nthreads = atoi(argv[i])) <= 0) goto badarg; - } else if (arg == "--help") { - std::cout << usage << std::endl; - return 0; - } else if (arg[0] == '-') { - std::cerr << "unknown option: '" << arg << "'" << std::endl; - return 2; - } else { - input_filenames.push_back(arg); - } - - continue; - - argmissing: - std::cerr << "missing value for '" << argv[i - 1] << "' (--help for help)" - << std::endl; - return 2; - - badarg: - std::cerr << "invalid value '" << argv[i] << "' for '" << argv[i - 1] - << "' (--help for help)" << std::endl; - - return 2; - } - - // Read the vocabulary. - std::unordered_map vocab = ReadVocab(vocab_filename); - if (!vocab.size()) { - std::cerr << "unable to read vocabulary file '" << vocab_filename << "'" - << std::endl; - return 1; - } - - const int n = vocab.size(); - - // Read the vectors. - int fd; - if ((fd = open(embeddings_filename.c_str(), O_RDONLY)) < 0) { - std::cerr << "unable to open embeddings file '" << embeddings_filename - << "'" << std::endl; - return 1; - } - - off_t nbytes = lseek(fd, 0, SEEK_END); - if (nbytes == -1) { - std::cerr << "unable to determine file size for '" << embeddings_filename - << "'" << std::endl; - return 1; - } - - if (nbytes % (sizeof(float) * n) != 0) { - std::cerr << "'" << embeddings_filename - << "' has a strange file size; expected it to be " - "a multiple of the vocabulary size" - << std::endl; - - return 1; - } - - const int dim = nbytes / (sizeof(float) * n); - float *embeddings = static_cast(malloc(nbytes)); - lseek(fd, 0, SEEK_SET); - if (read(fd, embeddings, nbytes) < nbytes) { - std::cerr << "unable to read embeddings from " << embeddings_filename - << std::endl; - return 1; - } - - close(fd); - - /* Normalize the vectors. */ - for (int i = 0; i < n; ++i) { - float *vec = embeddings + dim * i; - float norm = 0; - for (int j = 0; j < dim; ++j) norm += vec[j] * vec[j]; - - norm = sqrt(norm); - for (int j = 0; j < dim; ++j) vec[j] /= norm; - } - - pthread_attr_t attr; - if (pthread_attr_init(&attr) != 0) { - std::cerr << "unable to initalize pthreads" << std::endl; - return 1; - } - - /* Read each input file. */ - for (const auto filename : input_filenames) { - int total = 0; - std::vector queries = - ReadQueries(filename.c_str(), vocab, &total); - - const int queries_per_thread = queries.size() / nthreads; - std::vector evaluators; - std::vector threads; - - for (int i = 0; i < nthreads; ++i) { - auto begin = queries.begin() + i * queries_per_thread; - auto end = (i + 1 < nthreads) - ? queries.begin() + (i + 1) * queries_per_thread - : queries.end(); - - AnalogyEvaluator *evaluator = - new AnalogyEvaluator(begin, end, embeddings, n, dim); - - pthread_t thread; - pthread_create(&thread, &attr, AnalogyEvaluator::Run, evaluator); - evaluators.push_back(evaluator); - threads.push_back(thread); - } - - for (auto &thread : threads) pthread_join(thread, 0); - - int correct = 0; - for (const AnalogyEvaluator* evaluator : evaluators) { - correct += evaluator->GetNumCorrect(); - delete evaluator; - } - - printf("%0.3f %s\n", static_cast(correct) / total, filename.c_str()); - } - - return 0; -} diff --git a/research/swivel/distributed.sh b/research/swivel/distributed.sh deleted file mode 100644 index 6aa59f751a8bbd3761a419f5f3242a9d1d5ce5e3..0000000000000000000000000000000000000000 --- a/research/swivel/distributed.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -# Copyright 2017 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script launches a multi-process version of Swivel on a single machine. -set -e - -# A comma-separated list of parameter server processes. -PS_HOSTS="localhost:4000" - -# A comma-separated list of worker processes. -WORKER_HOSTS="localhost:5000,localhost:5001,localhost:5002,localhost:5003" - -# Where the Swivel training data is located. All processes must be able to read -# from this directory, so it ought to be a network filesystem if you're running -# on multiple servers. -INPUT_BASE_PATH="${HOME}/tmp/swivel/in" - -# Where the output and working directory is located. -OUTPUT_BASE_PATH="${HOME}/tmp/swivel/out" - -# Location of evaluation data, if you want to observe evaluation while training. -EVAL_BASE_PATH="${HOME}/tmp/swivel/eval" - -ARGS="--ps_hosts ${PS_HOSTS} ---worker_hosts ${WORKER_HOSTS} ---input_base_path ${INPUT_BASE_PATH} ---output_base_path ${OUTPUT_BASE_PATH} ---eval_base_path ${EVAL_BASE_PATH}" - -# This configuration is for a two-GPU machine. It starts four worker -# processes, two for each GPU. -python swivel.py --job_name ps --task_index 0 ${ARGS} >& /tmp/ps.0 & -python swivel.py --job_name worker --task_index 0 --gpu_device 0 ${ARGS} >& /tmp/worker.0 & -python swivel.py --job_name worker --task_index 1 --gpu_device 1 ${ARGS} >& /tmp/worker.1 & -python swivel.py --job_name worker --task_index 2 --gpu_device 0 ${ARGS} >& /tmp/worker.2 & -python swivel.py --job_name worker --task_index 3 --gpu_device 1 ${ARGS} >& /tmp/worker.3 & - -# Perhaps there is a more clever way to clean up the parameter server once all -# the workers are done. -wait %2 %3 %4 %5 -kill %1 - diff --git a/research/swivel/eval.mk b/research/swivel/eval.mk deleted file mode 100644 index b8db8c86a025c0818001b4aae0c90716c5129a9d..0000000000000000000000000000000000000000 --- a/research/swivel/eval.mk +++ /dev/null @@ -1,101 +0,0 @@ -# -*- Mode: Makefile -*- -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This makefile pulls down the evaluation datasets and formats them uniformly. -# Word similarity evaluations are formatted to contain exactly three columns: -# the two words being compared and the human judgement. -# -# Use wordsim.py and analogy to run the actual evaluations. - -CXXFLAGS=-std=c++11 -m64 -mavx -g -Ofast -Wall -LDLIBS=-lpthread -lm - -WORDSIM_EVALS= ws353sim.ws.tab \ - ws353rel.ws.tab \ - men.ws.tab \ - mturk.ws.tab \ - rarewords.ws.tab \ - simlex999.ws.tab \ - $(NULL) - -ANALOGY_EVALS= mikolov.an.tab \ - msr.an.tab \ - $(NULL) - -all: $(WORDSIM_EVALS) $(ANALOGY_EVALS) analogy - -ws353sim.ws.tab: ws353simrel.tar.gz - tar Oxfz $^ wordsim353_sim_rel/wordsim_similarity_goldstandard.txt > $@ - -ws353rel.ws.tab: ws353simrel.tar.gz - tar Oxfz $^ wordsim353_sim_rel/wordsim_relatedness_goldstandard.txt > $@ - -men.ws.tab: MEN.tar.gz - tar Oxfz $^ MEN/MEN_dataset_natural_form_full | tr ' ' '\t' > $@ - -mturk.ws.tab: Mtruk.csv - cat $^ | tr -d '\r' | tr ',' '\t' > $@ - -rarewords.ws.tab: rw.zip - unzip -p $^ rw/rw.txt | cut -f1-3 -d $$'\t' > $@ - -simlex999.ws.tab: SimLex-999.zip - unzip -p $^ SimLex-999/SimLex-999.txt \ - | tail -n +2 | cut -f1,2,4 -d $$'\t' > $@ - -mikolov.an.tab: questions-words.txt - egrep -v -E '^:' $^ | tr '[A-Z] ' '[a-z]\t' > $@ - -msr.an.tab: word_relationship.questions word_relationship.answers - cat word_relationship.questions | tr ' ' '\t' > /tmp/q - cat word_relationship.answers | cut -f2 -d ' ' > /tmp/a - paste /tmp/q /tmp/a > $@ - rm -f /tmp/q /tmp/a - - -# wget commands to fetch the datasets. Please see the original datasets for -# appropriate references if you use these. -ws353simrel.tar.gz: - wget http://alfonseca.org/pubs/ws353simrel.tar.gz - -MEN.tar.gz: - wget http://clic.cimec.unitn.it/~elia.bruni/resources/MEN.tar.gz - -Mtruk.csv: - wget http://www.kiraradinsky.com/files/Mtruk.csv - -rw.zip: - wget http://www-nlp.stanford.edu/~lmthang/morphoNLM/rw.zip - -SimLex-999.zip: - wget http://www.cl.cam.ac.uk/~fh295/SimLex-999.zip - -questions-words.txt: - wget http://download.tensorflow.org/data/questions-words.txt - -word_relationship.questions: - wget https://github.com/darshanhegde/SNLPProject/raw/master/word2vec/eval/word_relationship.questions - -word_relationship.answers: - wget https://github.com/darshanhegde/SNLPProject/raw/master/word2vec/eval/word_relationship.answers - -analogy: analogy.cc - -clean: - rm -f *.ws.tab *.an.tab analogy *.pyc - -distclean: clean - rm -f *.tgz *.tar.gz *.zip Mtruk.csv questions-words.txt word_relationship.{questions,answers} diff --git a/research/swivel/fastprep.cc b/research/swivel/fastprep.cc deleted file mode 100644 index a4bd7feef470ab29c9c8eb89051fc763aaeb16ff..0000000000000000000000000000000000000000 --- a/research/swivel/fastprep.cc +++ /dev/null @@ -1,692 +0,0 @@ -/* -*- Mode: C++ -*- */ - -/* - * Copyright 2016 Google Inc. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This program starts with a text file (and optionally a vocabulary file) and - * computes co-occurrence statistics. It emits output in a format that can be - * consumed by the "swivel" program. It's functionally equivalent to "prep.py", - * but works much more quickly. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "google/protobuf/io/zero_copy_stream_impl.h" -#include "tensorflow/core/example/example.pb.h" -#include "tensorflow/core/example/feature.pb.h" - -static const char usage[] = R"( -Prepares a corpus for processing by Swivel. - -Usage: - - prep --output_dir --input - -Options: - - --input - The input text. - - --output_dir - Specifies the output directory where the various Swivel data - files should be placed. This directory must exist. - - --shard_size - Specifies the shard size; default 4096. - - --min_count - The minimum number of times a word should appear to be included in the - generated vocabulary; default 5. (Ignored if --vocab is used.) - - --max_vocab - The maximum vocabulary size to generate from the input corpus; default - 102,400. (Ignored if --vocab is used.) - - --vocab - Use the specified unigram vocabulary instead of generating - it from the corpus. - - --window_size - Specifies the window size for computing co-occurrence stats; - default 10. - - --num_threads - The number of workers to calculate the co-occurrence matrix; - default 4. -)"; - -struct cooc_t { - int row; - int col; - float cnt; -}; - -typedef std::map cooc_counts_t; - -// Retrieves the next word from the input stream, treating words as simply being -// delimited by whitespace. Returns true if this is the end of a "sentence"; -// i.e., a newline. -bool NextWord(std::ifstream &fin, std::string* word) { - std::string buf; - char c; - - if (fin.eof()) { - word->erase(); - return true; - } - - // Skip leading whitespace. - do { - c = fin.get(); - } while (!fin.eof() && std::isspace(c)); - - if (fin.eof()) { - word->erase(); - return true; - } - - // Read the next word. - do { - buf += c; - c = fin.get(); - } while (!fin.eof() && !std::isspace(c)); - - *word = buf; - if (c == '\n' || fin.eof()) return true; - - // Skip trailing whitespace. - do { - c = fin.get(); - } while (!fin.eof() && std::isspace(c)); - - if (fin.eof()) return true; - - fin.unget(); - return false; -} - -// Creates a vocabulary from the most frequent terms in the input file. -std::vector CreateVocabulary(const std::string input_filename, - const int shard_size, - const int min_vocab_count, - const int max_vocab_size) { - std::vector vocab; - - // Count all the distinct tokens in the file. (XXX this will eventually - // consume all memory and should be re-written to periodically trim the data.) - std::unordered_map counts; - - std::ifstream fin(input_filename, std::ifstream::ate); - - if (!fin) { - std::cerr << "couldn't read input file '" << input_filename << "'" - << std::endl; - - return vocab; - } - - const auto input_size = fin.tellg(); - fin.seekg(0); - - long long ntokens = 0; - while (!fin.eof()) { - std::string word; - NextWord(fin, &word); - counts[word] += 1; - - if (++ntokens % 1000000 == 0) { - const float pct = 100.0 * static_cast(fin.tellg()) / input_size; - fprintf(stdout, "\rComputing vocabulary: %0.1f%% complete...", pct); - std::flush(std::cout); - } - } - - std::cout << counts.size() << " distinct tokens" << std::endl; - - // Sort the vocabulary from most frequent to least frequent. - std::vector> buf; - std::copy(counts.begin(), counts.end(), std::back_inserter(buf)); - std::sort(buf.begin(), buf.end(), - [](const std::pair &a, - const std::pair &b) { - return b.second < a.second; - }); - - // Truncate to the maximum vocabulary size - if (static_cast(buf.size()) > max_vocab_size) buf.resize(max_vocab_size); - if (buf.empty()) return vocab; - - // Eliminate rare tokens and truncate to a size modulo the shard size. - int vocab_size = buf.size(); - while (vocab_size > 0 && buf[vocab_size - 1].second < min_vocab_count) - --vocab_size; - - vocab_size -= vocab_size % shard_size; - if (static_cast(buf.size()) > vocab_size) buf.resize(vocab_size); - - // Copy out the tokens. - for (const auto& pair : buf) vocab.push_back(pair.first); - - return vocab; -} - -std::vector ReadVocabulary(const std::string vocab_filename) { - std::vector vocab; - - std::ifstream fin(vocab_filename); - int index = 0; - for (std::string token; std::getline(fin, token); ++index) { - auto n = token.find('\t'); - if (n != std::string::npos) token = token.substr(n); - - vocab.push_back(token); - } - - return vocab; -} - -void WriteVocabulary(const std::vector &vocab, - const std::string &output_dirname) { - for (const std::string filename : {"row_vocab.txt", "col_vocab.txt"}) { - std::ofstream fout(output_dirname + "/" + filename); - for (const auto &token : vocab) fout << token << std::endl; - } -} - -// Manages accumulation of co-occurrence data into temporary disk buffer files. -class CoocBuffer { - public: - CoocBuffer(const std::string &output_dirname, const int num_shards, - const int shard_size); - - // Accumulate the co-occurrence counts to the buffer. - void AccumulateCoocs(const cooc_counts_t &coocs); - - // Read the buffer to produce shard files. - void WriteShards(); - - protected: - // The output directory. Also used for temporary buffer files. - const std::string output_dirname_; - - // The number of row/column shards. - const int num_shards_; - - // The number of elements per shard. - const int shard_size_; - - // Parallel arrays of temporary file paths and file descriptors. - std::vector paths_; - std::vector fds_; - - // Ensures that only one buffer file is getting written at a time. - std::mutex writer_mutex_; -}; - -CoocBuffer::CoocBuffer(const std::string &output_dirname, const int num_shards, - const int shard_size) - : output_dirname_(output_dirname), - num_shards_(num_shards), - shard_size_(shard_size) { - for (int row = 0; row < num_shards_; ++row) { - for (int col = 0; col < num_shards_; ++col) { - char filename[256]; - sprintf(filename, "shard-%03d-%03d.tmp", row, col); - - std::string path = output_dirname + "/" + filename; - int fd = open(path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0666); - assert(fd > 0); - - paths_.push_back(path); - fds_.push_back(fd); - } - } -} - -void CoocBuffer::AccumulateCoocs(const cooc_counts_t &coocs) { - std::vector> bufs(fds_.size()); - - for (const auto &cooc : coocs) { - const int row_id = cooc.first >> 32; - const int col_id = cooc.first & 0xffffffff; - const float cnt = cooc.second; - - const int row_shard = row_id % num_shards_; - const int row_off = row_id / num_shards_; - const int col_shard = col_id % num_shards_; - const int col_off = col_id / num_shards_; - - const int top_shard_idx = row_shard * num_shards_ + col_shard; - bufs[top_shard_idx].push_back(cooc_t{row_off, col_off, cnt}); - - const int bot_shard_idx = col_shard * num_shards_ + row_shard; - bufs[bot_shard_idx].push_back(cooc_t{col_off, row_off, cnt}); - } - - for (int i = 0; i < static_cast(fds_.size()); ++i) { - std::lock_guard rv(writer_mutex_); - const int nbytes = bufs[i].size() * sizeof(cooc_t); - int nwritten = write(fds_[i], bufs[i].data(), nbytes); - assert(nwritten == nbytes); - } -} - -void CoocBuffer::WriteShards() { - for (int shard = 0; shard < static_cast(fds_.size()); ++shard) { - const int row_shard = shard / num_shards_; - const int col_shard = shard % num_shards_; - - std::cout << "\rwriting shard " << (shard + 1) << "/" - << (num_shards_ * num_shards_); - std::flush(std::cout); - - // Construct the tf::Example proto. First, we add the global rows and - // column that are present in the shard. - tensorflow::Example example; - - auto &feature = *example.mutable_features()->mutable_feature(); - auto global_row = feature["global_row"].mutable_int64_list(); - auto global_col = feature["global_col"].mutable_int64_list(); - - for (int i = 0; i < shard_size_; ++i) { - global_row->add_value(row_shard + i * num_shards_); - global_col->add_value(col_shard + i * num_shards_); - } - - // Next we add co-occurrences as a sparse representation. Map the - // co-occurrence counts that we've spooled off to disk: these are in - // arbitrary order and may contain duplicates. - const off_t nbytes = lseek(fds_[shard], 0, SEEK_END); - cooc_t *coocs = static_cast( - mmap(0, nbytes, PROT_READ | PROT_WRITE, MAP_SHARED, fds_[shard], 0)); - - const int ncoocs = nbytes / sizeof(cooc_t); - cooc_t* cur = coocs; - cooc_t* end = coocs + ncoocs; - - auto sparse_value = feature["sparse_value"].mutable_float_list(); - auto sparse_local_row = feature["sparse_local_row"].mutable_int64_list(); - auto sparse_local_col = feature["sparse_local_col"].mutable_int64_list(); - - std::sort(cur, end, [](const cooc_t &a, const cooc_t &b) { - return a.row < b.row || (a.row == b.row && a.col < b.col); - }); - - // Accumulate the counts into the protocol buffer. - int last_row = -1, last_col = -1; - float count = 0; - for (; cur != end; ++cur) { - if (cur->row != last_row || cur->col != last_col) { - if (last_row >= 0 && last_col >= 0) { - sparse_local_row->add_value(last_row); - sparse_local_col->add_value(last_col); - sparse_value->add_value(count); - } - - last_row = cur->row; - last_col = cur->col; - count = 0; - } - - count += cur->cnt; - } - - if (last_row >= 0 && last_col >= 0) { - sparse_local_row->add_value(last_row); - sparse_local_col->add_value(last_col); - sparse_value->add_value(count); - } - - munmap(coocs, nbytes); - close(fds_[shard]); - - if (sparse_local_row->value_size() * 8 >= (64 << 20)) { - std::cout << "Warning: you are likely to catch protobuf parsing errors " - "in TF 1.0 and older because the shard is too fat (>= 64MiB); see " - << std::endl << - "kDefaultTotalBytesLimit in src/google/protobuf/io/coded_stream.h " - " changed in protobuf/commit/5a76e633ea9b5adb215e93fdc11e1c0c08b3fc74" - << std::endl << - "https://github.com/tensorflow/tensorflow/issues/7311" - << std::endl << - "Consider increasing the number of shards."; - } - - // Write the protocol buffer as a binary blob to disk. - const int filename_max_size = 4096; - std::unique_ptr filename(new char[filename_max_size]); - snprintf(filename.get(), filename_max_size, "shard-%03d-%03d.pb", row_shard, - col_shard); - - const std::string path = output_dirname_ + "/" + filename.get(); - int fd = open(path.c_str(), O_WRONLY | O_TRUNC | O_CREAT, 0666); - assert(fd != -1); - - google::protobuf::io::FileOutputStream fout(fd); - example.SerializeToZeroCopyStream(&fout); - fout.Close(); - - // Remove the temporary file. - unlink(paths_[shard].c_str()); - } - - std::cout << std::endl; -} - -// Counts the co-occurrences in part of the file. -class CoocCounter { - public: - CoocCounter(const std::string &input_filename, const off_t start, - const off_t end, const int window_size, - const std::unordered_map &token_to_id_map, - CoocBuffer *coocbuf) - : fin_(input_filename, std::ifstream::ate), - start_(start), - end_(end), - window_size_(window_size), - token_to_id_map_(token_to_id_map), - coocbuf_(coocbuf), - marginals_(token_to_id_map.size()) {} - - // PTthreads-friendly thunk to Count. - static void* Run(void* param) { - CoocCounter* self = static_cast(param); - self->Count(); - return nullptr; - } - - // Counts the co-occurrences. - void Count(); - - const std::vector& Marginals() const { return marginals_; } - - protected: - // The input stream. - std::ifstream fin_; - - // The range of the file to which this counter should attend. - const off_t start_; - const off_t end_; - - // The window size for computing co-occurrences. - const int window_size_; - - // A reference to the mapping from tokens to IDs. - const std::unordered_map &token_to_id_map_; - - // The buffer into which counts are to be accumulated. - CoocBuffer* coocbuf_; - - // The marginal counts accumulated by this counter. - std::vector marginals_; -}; - -void CoocCounter::Count() { - const int max_coocs_size = 16 * 1024 * 1024; - - // A buffer of co-occurrence counts that we'll periodically sort into - // shards. - cooc_counts_t coocs; - - fin_.seekg(start_); - - int nlines = 0; - for (off_t filepos = start_; filepos < end_ && !fin_.eof(); filepos = fin_.tellg()) { - // Buffer a single sentence. - std::vector sentence; - bool eos; - do { - std::string word; - eos = NextWord(fin_, &word); - auto it = token_to_id_map_.find(word); - if (it != token_to_id_map_.end()) sentence.push_back(it->second); - } while (!eos); - - // Generate the co-occurrences for the sentence. - for (int pos = 0; pos < static_cast(sentence.size()); ++pos) { - const int left_id = sentence[pos]; - - const int window_extent = - std::min(static_cast(sentence.size()) - pos, 1 + window_size_); - - for (int off = 1; off < window_extent; ++off) { - const int right_id = sentence[pos + off]; - const double count = 1.0 / static_cast(off); - const long long lo = std::min(left_id, right_id); - const long long hi = std::max(left_id, right_id); - const long long key = (hi << 32) | lo; - coocs[key] += count; - - marginals_[left_id] += count; - marginals_[right_id] += count; - } - - marginals_[left_id] += 1.0; - const long long key = (static_cast(left_id) << 32) | - static_cast(left_id); - - coocs[key] += 0.5; - } - - // Periodically flush the co-occurrences to disk. - if (coocs.size() > max_coocs_size) { - coocbuf_->AccumulateCoocs(coocs); - coocs.clear(); - } - - if (start_ == 0 && ++nlines % 1000 == 0) { - const double pct = 100.0 * filepos / end_; - fprintf(stdout, "\rComputing co-occurrences: %0.1f%% complete...", pct); - std::flush(std::cout); - } - } - - // Accumulate anything we haven't flushed yet. - coocbuf_->AccumulateCoocs(coocs); - - if (start_ == 0) std::cout << "done." << std::endl; -} - -void WriteMarginals(const std::vector &marginals, - const std::string &output_dirname) { - for (const std::string filename : {"row_sums.txt", "col_sums.txt"}) { - std::ofstream fout(output_dirname + "/" + filename); - fout.setf(std::ios::fixed); - for (double sum : marginals) fout << sum << std::endl; - } -} - -int main(int argc, char *argv[]) { - std::string input_filename; - std::string vocab_filename; - std::string output_dirname; - bool generate_vocab = true; - int max_vocab_size = 100 * 1024; - int min_vocab_count = 5; - int window_size = 10; - int shard_size = 4096; - int num_threads = 4; - - for (int i = 1; i < argc; ++i) { - std::string arg(argv[i]); - if (arg == "--vocab") { - if (++i >= argc) goto argmissing; - generate_vocab = false; - vocab_filename = argv[i]; - } else if (arg == "--max_vocab") { - if (++i >= argc) goto argmissing; - if ((max_vocab_size = atoi(argv[i])) <= 0) goto badarg; - } else if (arg == "--min_count") { - if (++i >= argc) goto argmissing; - if ((min_vocab_count = atoi(argv[i])) <= 0) goto badarg; - } else if (arg == "--window_size") { - if (++i >= argc) goto argmissing; - if ((window_size = atoi(argv[i])) <= 0) goto badarg; - } else if (arg == "--input") { - if (++i >= argc) goto argmissing; - input_filename = argv[i]; - } else if (arg == "--output_dir") { - if (++i >= argc) goto argmissing; - output_dirname = argv[i]; - } else if (arg == "--shard_size") { - if (++i >= argc) goto argmissing; - shard_size = atoi(argv[i]); - } else if (arg == "--num_threads") { - if (++i >= argc) goto argmissing; - num_threads = atoi(argv[i]); - } else if (arg == "--help") { - std::cout << usage << std::endl; - return 0; - } else { - std::cerr << "unknown arg '" << arg << "'; try --help?" << std::endl; - return 2; - } - - continue; - - badarg: - std::cerr << "'" << argv[i] << "' is not a valid value for '" << arg - << "'; try --help?" << std::endl; - - return 2; - - argmissing: - std::cerr << arg << " requires an argument; try --help?" << std::endl; - } - - if (input_filename.empty()) { - std::cerr << "please specify the input text with '--input'; try --help?" - << std::endl; - return 2; - } - - if (output_dirname.empty()) { - std::cerr << "please specify the output directory with '--output_dir'" - << std::endl; - - return 2; - } - - struct stat sb; - if (lstat(output_dirname.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode)) { - if (mkdir(output_dirname.c_str(), 0755) != 0) { - std::cerr << "output directory '" << output_dirname - << "' does not exist or is not a directory." << std::endl; - return 1; - } - } - - if (lstat(input_filename.c_str(), &sb) != 0 || !S_ISREG(sb.st_mode)) { - std::cerr << "input file '" << input_filename - << "' does not exist or is not a file." << std::endl; - - return 1; - } - - // The total size of the input. - const off_t input_size = sb.st_size; - - const std::vector vocab = - generate_vocab ? CreateVocabulary(input_filename, shard_size, - min_vocab_count, max_vocab_size) - : ReadVocabulary(vocab_filename); - - if (!vocab.size()) { - std::cerr << "Empty vocabulary." << std::endl; - return 1; - } - - std::cout << "Generating Swivel co-occurrence data into " << output_dirname - << std::endl; - - std::cout << "Shard size: " << shard_size << "x" << shard_size << std::endl; - std::cout << "Vocab size: " << vocab.size() << std::endl; - - // Write the vocabulary files into the output directory. - WriteVocabulary(vocab, output_dirname); - - const int num_shards = vocab.size() / shard_size; - CoocBuffer coocbuf(output_dirname, num_shards, shard_size); - - // Build a mapping from the token to its position in the vocabulary file. - std::unordered_map token_to_id_map; - for (int i = 0; i < static_cast(vocab.size()); ++i) - token_to_id_map[vocab[i]] = i; - - // Compute the co-occurrences - std::vector threads; - threads.reserve(num_threads); - std::vector counters; - const off_t nbytes_per_thread = input_size / num_threads; - std::cout << "Running " << num_threads << " threads, each on " - << nbytes_per_thread << " bytes" << std::endl; - - for (int i = 0; i < num_threads; ++i) { - // We could make this smarter and look around for newlines. But - // realistically that's not going to change things much. - const off_t start = i * nbytes_per_thread; - const off_t end = - i < num_threads - 1 ? (i + 1) * nbytes_per_thread : input_size; - - CoocCounter *counter = new CoocCounter( - input_filename, start, end, window_size, token_to_id_map, &coocbuf); - - counters.push_back(counter); - - threads.emplace_back(CoocCounter::Run, counter); - } - - // Wait for threads to finish and collect marginals. - std::vector marginals(vocab.size()); - for (int i = 0; i < num_threads; ++i) { - if (i > 0) { - std::cout << "joining thread #" << (i + 1) << std::endl; - } - threads[i].join(); - - const std::vector& counter_marginals = counters[i]->Marginals(); - for (int j = 0; j < static_cast(vocab.size()); ++j) - marginals[j] += counter_marginals[j]; - - delete counters[i]; - } - - std::cout << "writing marginals..." << std::endl; - WriteMarginals(marginals, output_dirname); - - std::cout << "writing shards..." << std::endl; - coocbuf.WriteShards(); - - return 0; -} diff --git a/research/swivel/fastprep.mk b/research/swivel/fastprep.mk deleted file mode 100644 index b1798d0b68a7ac53f47c3d8fa4f72babd1af89f2..0000000000000000000000000000000000000000 --- a/research/swivel/fastprep.mk +++ /dev/null @@ -1,60 +0,0 @@ -# -*- Mode: Makefile -*- - -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# This makefile builds "fastprep", a faster version of prep.py that can be used -# to build training data for Swivel. -# -# = Step 1. Install protobuf v3 = -# -# Ubuntu 16.10+: sudo apt install libprotobuf-dev -# Ubuntu 16.04: https://launchpad.net/~maarten-fonville/+archive/ubuntu/ppa + replace xenial with yakkety in /etc/apt/sources.list.d/maarten-fonville-ubuntu-ppa-xenial.list -# macOS: brew install protobuf -# -# = Step 2. Build "fastprep". = -# -# make -f fastprep.mk -# -# If all goes well, you should have a program that is "flag compatible" with -# "prep.py" and runs significantly faster. Use it to generate the co-occurrence -# matrices and other files necessary to train a Swivel matrix. - - -CXXFLAGS=-std=c++11 -march=native -g -O2 -flto -Wall -I. -LDLIBS=-lprotobuf -pthread -lm - -FETCHER=curl -L -o -TF_URL=https://github.com/tensorflow/tensorflow/raw/master -PROTOC=protoc - - -%.proto: tensorflow/core/example - $(FETCHER) $@ $(TF_URL)/$@ - -%.pb.cc: %.proto - $(PROTOC) --cpp_out=. $< - -fastprep: fastprep.cc tensorflow/core/example/feature.pb.cc tensorflow/core/example/example.pb.cc - -tensorflow/core/example: - @mkdir -p tensorflow/core/example - -clean: - @rm -f fastprep - -mrproper: clean - @rm -rf tensorflow diff --git a/research/swivel/glove_to_shards.py b/research/swivel/glove_to_shards.py deleted file mode 100755 index 4a9cd23c79370796380871fc1874d0f45494e8b6..0000000000000000000000000000000000000000 --- a/research/swivel/glove_to_shards.py +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Converts a Glove binary co-occurrence matrix into Swivel shards. - -Usage: - - glove_to_shards.py --input --vocab --output_dir - -Options - - --input - The Glove co-occurrence file. - - --vocab - Path to the vocabulary text file, one token per line. - - --output_dir - Specifies the touput directory where the various Swivel data - files sohuld be placed. - - --shard_size - Specifies the shard size; default 4096. -""" - -from __future__ import print_function - -import itertools -import os -import struct -import sys - -import tensorflow as tf -from six.moves import xrange - -flags = tf.app.flags - -flags.DEFINE_string('input', 'coocurrences.bin', 'Vocabulary file') -flags.DEFINE_string('vocab', 'vocab.txt', 'Vocabulary file') -flags.DEFINE_string('output_dir', '/tmp/swivel_data', 'Output directory') -flags.DEFINE_integer('shard_size', 4096, 'Shard size') - -FLAGS = tf.app.flags.FLAGS - -glove_cooc_fmt = struct.Struct('iid') -shard_cooc_fmt = struct.Struct('if') - - -def make_shard_files(coocs, nshards, vocab_sz): - """Chops the binary Glove co-occurrence matrix into shards. - - This reads the Glove binary co-occurrence file and assigns individual - co-occurrence counts to the appropriate Swivel shard. - - Args: - coocs: the co-occurrnece file to read - nshards: the number of shards along one dimension of the square matrix - vocab_sz: the vocabulary size - - Returns: - A (shard_table, marginals) tuple. The shard_table maps the row and column - shard ID to a file handle containing the co-occurrences for that shard; the - marginals contain the marginal sums. - """ - row_sums = [0] * vocab_sz - col_sums = [0] * vocab_sz - - coocs.seek(0, os.SEEK_END) - ncoocs = coocs.tell() / glove_cooc_fmt.size - coocs.seek(0, os.SEEK_SET) - - shard_files = {} - - for row in range(nshards): - for col in range(nshards): - filename = os.path.join( - FLAGS.output_dir, 'shard-%03d-%03d.bin' % (row, col)) - - shard_files[(row, col)] = open(filename, 'w+') - - for ix in xrange(ncoocs): - if ix % 1000000 == 0: - sys.stdout.write('\rsharding co-occurrences: %0.1f%% (%d/%d)' % ( - 100.0 * ix / ncoocs, ix, ncoocs)) - - sys.stdout.flush() - - bits = coocs.read(glove_cooc_fmt.size) - if not bits: - break - - # Glove has 1-indexed IDs. - row_id, col_id, cnt = glove_cooc_fmt.unpack(bits) - if row_id > vocab_sz or col_id > vocab_sz: - continue - - row_id -= 1 - row_shard = row_id % nshards - row_off = row_id / nshards - - col_id -= 1 - col_shard = col_id % nshards - col_off = col_id / nshards - - shard_pos = row_off * FLAGS.shard_size + col_off # row major - - shard_files[(row_shard, col_shard)].write( - shard_cooc_fmt.pack(shard_pos, cnt)) - - # Accumulate marginals. - row_sums[row_id] += cnt - col_sums[col_id] += cnt - - sys.stdout.write('\n') - - if any(abs(r - c) > 0.1 for r, c in itertools.izip(row_sums, col_sums)): - print('WARNING! Row and column marginals differ; is your matrix symmetric?', - file=sys.stderr) - - return (shard_files, row_sums) - -def main(_): - with open(FLAGS.vocab, 'r') as lines: - orig_vocab_sz = sum(1 for _ in lines) - - shard_sz = FLAGS.shard_size - vocab_sz = orig_vocab_sz - orig_vocab_sz % shard_sz - nshards = vocab_sz / shard_sz - - print('vocab size is %d (originally %d), %d %dx%d-element shards' % ( - vocab_sz, orig_vocab_sz, nshards * nshards, shard_sz, shard_sz)) - - # Create the output directory, if necessary - if FLAGS.output_dir and not os.path.isdir(FLAGS.output_dir): - os.makedirs(FLAGS.output_dir) - - with open(FLAGS.input, 'r') as coocs: - shard_files, marginals = make_shard_files(coocs, nshards, vocab_sz) - - # Now sort the shards and write the TFRecords. - filename = os.path.join(FLAGS.output_dir, 'shards.recs') - with tf.python_io.TFRecordWriter(filename) as writer: - ix = 0 - for (row, col), fh in shard_files.iteritems(): - ix += 1 - sys.stdout.write('\rwriting shard %d/%d' % (ix, len(shard_files))) - sys.stdout.flush() - - fh.seek(0) - buf = fh.read() - os.unlink(fh.name) - fh.close() - - coocs = [ - shard_cooc_fmt.unpack_from(buf, off) - for off in range(0, len(buf), shard_cooc_fmt.size)] - - # N.B. we assume that there aren't any duplicates here! - coocs.sort(key=lambda kv: kv[0]) - - def _int64s(xs): - return tf.train.Feature(int64_list=tf.train.Int64List(value=list(xs))) - - def _floats(xs): - return tf.train.Feature(float_list=tf.train.FloatList(value=list(xs))) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'global_row': _int64s(row + nshards * i for i in range(shard_sz)), - 'global_col': _int64s(col + nshards * i for i in range(shard_sz)), - 'sparse_local_row': _int64s(pos / shard_sz for pos, _ in coocs), - 'sparse_local_col': _int64s(pos % shard_sz for pos, _ in coocs), - 'sparse_value': _floats(cnt for _, cnt in coocs)})) - - writer.write(example.SerializeToString()) - - print('\nwriting marginals...') - - with open(os.path.join(FLAGS.output_dir, 'marginals.txt'), 'w') as fh: - for cnt in marginals: - fh.write('%0.1f\n' % cnt) - - print('done!') - -if __name__ == '__main__': - tf.app.run() diff --git a/research/swivel/nearest.py b/research/swivel/nearest.py deleted file mode 100644 index 0fa828b66b7958ae06f115216cea0106ec029033..0000000000000000000000000000000000000000 --- a/research/swivel/nearest.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Simple tool for inspecting nearest neighbors and analogies.""" - -from __future__ import print_function -import re -import sys -from getopt import GetoptError, getopt - -from vecs import Vecs - -try: - opts, args = getopt(sys.argv[1:], 'v:e:', ['vocab=', 'embeddings=']) -except GetoptError as e: - print(e, file=sys.stderr) - sys.exit(2) - -opt_vocab = 'vocab.txt' -opt_embeddings = None - -for o, a in opts: - if o in ('-v', '--vocab'): - opt_vocab = a - if o in ('-e', '--embeddings'): - opt_embeddings = a - -vecs = Vecs(opt_vocab, opt_embeddings) - -while True: - sys.stdout.write('query> ') - sys.stdout.flush() - - query = sys.stdin.readline().strip() - if not query: - break - - parts = re.split(r'\s+', query) - - if len(parts) == 1: - res = vecs.neighbors(parts[0]) - - elif len(parts) == 3: - vs = [vecs.lookup(w) for w in parts] - if any(v is None for v in vs): - print('not in vocabulary: %s' % ( - ', '.join(tok for tok, v in zip(parts, vs) if v is None))) - - continue - - res = vecs.neighbors(vs[2] - vs[0] + vs[1]) - - else: - print('use a single word to query neighbors, or three words for analogy') - continue - - if not res: - continue - - for word, sim in res[:20]: - print('%0.4f: %s' % (sim, word)) - - print() diff --git a/research/swivel/prep.py b/research/swivel/prep.py deleted file mode 100644 index b72a6fb2fe0c513303e00318b59b4ff3457add4a..0000000000000000000000000000000000000000 --- a/research/swivel/prep.py +++ /dev/null @@ -1,317 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Prepare a corpus for processing by swivel. - -Creates a sharded word co-occurrence matrix from a text file input corpus. - -Usage: - - prep.py --output_dir --input - -Options: - - --input - The input text. - - --output_dir - Specifies the output directory where the various Swivel data - files should be placed. - - --shard_size - Specifies the shard size; default 4096. - - --min_count - Specifies the minimum number of times a word should appear - to be included in the vocabulary; default 5. - - --max_vocab - Specifies the maximum vocabulary size; default shard size - times 1024. - - --vocab - Use the specified unigram vocabulary instead of generating - it from the corpus. - - --window_size - Specifies the window size for computing co-occurrence stats; - default 10. - - --bufsz - The number of co-occurrences that are buffered; default 16M. - -""" - -import itertools -import math -import os -import struct -import sys - -from six.moves import xrange -import tensorflow as tf - -flags = tf.app.flags - -flags.DEFINE_string('input', '', 'The input text.') -flags.DEFINE_string('output_dir', '/tmp/swivel_data', - 'Output directory for Swivel data') -flags.DEFINE_integer('shard_size', 4096, 'The size for each shard') -flags.DEFINE_integer('min_count', 5, - 'The minimum number of times a word should occur to be ' - 'included in the vocabulary') -flags.DEFINE_integer('max_vocab', 4096 * 64, 'The maximum vocabulary size') -flags.DEFINE_string('vocab', '', 'Vocabulary to use instead of generating one') -flags.DEFINE_integer('window_size', 10, 'The window size') -flags.DEFINE_integer('bufsz', 16 * 1024 * 1024, - 'The number of co-occurrences to buffer') - -FLAGS = flags.FLAGS - -shard_cooc_fmt = struct.Struct('iif') - - -def words(line): - """Splits a line of text into tokens.""" - return line.strip().split() - - -def create_vocabulary(lines): - """Reads text lines and generates a vocabulary.""" - lines.seek(0, os.SEEK_END) - nbytes = lines.tell() - lines.seek(0, os.SEEK_SET) - - vocab = {} - for lineno, line in enumerate(lines, start=1): - for word in words(line): - vocab.setdefault(word, 0) - vocab[word] += 1 - - if lineno % 100000 == 0: - pos = lines.tell() - sys.stdout.write('\rComputing vocabulary: %0.1f%% (%d/%d)...' % ( - 100.0 * pos / nbytes, pos, nbytes)) - sys.stdout.flush() - - sys.stdout.write('\n') - - vocab = [(tok, n) for tok, n in vocab.iteritems() if n >= FLAGS.min_count] - vocab.sort(key=lambda kv: (-kv[1], kv[0])) - - num_words = min(len(vocab), FLAGS.max_vocab) - if num_words % FLAGS.shard_size != 0: - num_words -= num_words % FLAGS.shard_size - - if not num_words: - raise Exception('empty vocabulary') - - print('vocabulary contains %d tokens' % num_words) - - vocab = vocab[:num_words] - return [tok for tok, n in vocab] - - -def write_vocab_and_sums(vocab, sums, vocab_filename, sums_filename): - """Writes vocabulary and marginal sum files.""" - with open(os.path.join(FLAGS.output_dir, vocab_filename), 'w') as vocab_out: - with open(os.path.join(FLAGS.output_dir, sums_filename), 'w') as sums_out: - for tok, cnt in itertools.izip(vocab, sums): - print >> vocab_out, tok - print >> sums_out, cnt - - -def compute_coocs(lines, vocab): - """Compute the co-occurrence statistics from the text. - - This generates a temporary file for each shard that contains the intermediate - counts from the shard: these counts must be subsequently sorted and collated. - - """ - word_to_id = {tok: idx for idx, tok in enumerate(vocab)} - - lines.seek(0, os.SEEK_END) - nbytes = lines.tell() - lines.seek(0, os.SEEK_SET) - - num_shards = len(vocab) / FLAGS.shard_size - - shardfiles = {} - for row in range(num_shards): - for col in range(num_shards): - filename = os.path.join( - FLAGS.output_dir, 'shard-%03d-%03d.tmp' % (row, col)) - - shardfiles[(row, col)] = open(filename, 'w+') - - def flush_coocs(): - for (row_id, col_id), cnt in coocs.iteritems(): - row_shard = row_id % num_shards - row_off = row_id / num_shards - col_shard = col_id % num_shards - col_off = col_id / num_shards - - # Since we only stored (a, b), we emit both (a, b) and (b, a). - shardfiles[(row_shard, col_shard)].write( - shard_cooc_fmt.pack(row_off, col_off, cnt)) - - shardfiles[(col_shard, row_shard)].write( - shard_cooc_fmt.pack(col_off, row_off, cnt)) - - coocs = {} - sums = [0.0] * len(vocab) - - for lineno, line in enumerate(lines, start=1): - # Computes the word IDs for each word in the sentence. This has the effect - # of "stretching" the window past OOV tokens. - wids = filter( - lambda wid: wid is not None, - (word_to_id.get(w) for w in words(line))) - - for pos in xrange(len(wids)): - lid = wids[pos] - window_extent = min(FLAGS.window_size + 1, len(wids) - pos) - for off in xrange(1, window_extent): - rid = wids[pos + off] - pair = (min(lid, rid), max(lid, rid)) - count = 1.0 / off - sums[lid] += count - sums[rid] += count - coocs.setdefault(pair, 0.0) - coocs[pair] += count - - sums[lid] += 1.0 - pair = (lid, lid) - coocs.setdefault(pair, 0.0) - coocs[pair] += 0.5 # Only add 1/2 since we output (a, b) and (b, a) - - if lineno % 10000 == 0: - pos = lines.tell() - sys.stdout.write('\rComputing co-occurrences: %0.1f%% (%d/%d)...' % ( - 100.0 * pos / nbytes, pos, nbytes)) - sys.stdout.flush() - - if len(coocs) > FLAGS.bufsz: - flush_coocs() - coocs = {} - - flush_coocs() - sys.stdout.write('\n') - - return shardfiles, sums - - -def write_shards(vocab, shardfiles): - """Processes the temporary files to generate the final shard data. - - The shard data is stored as a tf.Example protos using a TFRecordWriter. The - temporary files are removed from the filesystem once they've been processed. - - """ - num_shards = len(vocab) / FLAGS.shard_size - - ix = 0 - for (row, col), fh in shardfiles.iteritems(): - ix += 1 - sys.stdout.write('\rwriting shard %d/%d' % (ix, len(shardfiles))) - sys.stdout.flush() - - # Read the entire binary co-occurrence and unpack it into an array. - fh.seek(0) - buf = fh.read() - os.unlink(fh.name) - fh.close() - - coocs = [ - shard_cooc_fmt.unpack_from(buf, off) - for off in range(0, len(buf), shard_cooc_fmt.size)] - - # Sort and merge co-occurrences for the same pairs. - coocs.sort() - - if coocs: - current_pos = 0 - current_row_col = (coocs[current_pos][0], coocs[current_pos][1]) - for next_pos in range(1, len(coocs)): - next_row_col = (coocs[next_pos][0], coocs[next_pos][1]) - if current_row_col == next_row_col: - coocs[current_pos] = ( - coocs[current_pos][0], - coocs[current_pos][1], - coocs[current_pos][2] + coocs[next_pos][2]) - else: - current_pos += 1 - if current_pos < next_pos: - coocs[current_pos] = coocs[next_pos] - - current_row_col = (coocs[current_pos][0], coocs[current_pos][1]) - - coocs = coocs[:(1 + current_pos)] - - # Convert to a TF Example proto. - def _int64s(xs): - return tf.train.Feature(int64_list=tf.train.Int64List(value=list(xs))) - - def _floats(xs): - return tf.train.Feature(float_list=tf.train.FloatList(value=list(xs))) - - example = tf.train.Example(features=tf.train.Features(feature={ - 'global_row': _int64s( - row + num_shards * i for i in range(FLAGS.shard_size)), - 'global_col': _int64s( - col + num_shards * i for i in range(FLAGS.shard_size)), - - 'sparse_local_row': _int64s(cooc[0] for cooc in coocs), - 'sparse_local_col': _int64s(cooc[1] for cooc in coocs), - 'sparse_value': _floats(cooc[2] for cooc in coocs), - })) - - filename = os.path.join(FLAGS.output_dir, 'shard-%03d-%03d.pb' % (row, col)) - with open(filename, 'w') as out: - out.write(example.SerializeToString()) - - sys.stdout.write('\n') - - -def main(_): - # Create the output directory, if necessary - if FLAGS.output_dir and not os.path.isdir(FLAGS.output_dir): - os.makedirs(FLAGS.output_dir) - - # Read the file onces to create the vocabulary. - if FLAGS.vocab: - with open(FLAGS.vocab, 'r') as lines: - vocab = [line.strip() for line in lines] - else: - with open(FLAGS.input, 'r') as lines: - vocab = create_vocabulary(lines) - - # Now read the file again to determine the co-occurrence stats. - with open(FLAGS.input, 'r') as lines: - shardfiles, sums = compute_coocs(lines, vocab) - - # Collect individual shards into the shards.recs file. - write_shards(vocab, shardfiles) - - # Now write the marginals. They're symmetric for this application. - write_vocab_and_sums(vocab, sums, 'row_vocab.txt', 'row_sums.txt') - write_vocab_and_sums(vocab, sums, 'col_vocab.txt', 'col_sums.txt') - - print('done!') - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/swivel/swivel.py b/research/swivel/swivel.py deleted file mode 100755 index c69660c09c18f54da654ca8a7341559f8b9bcc22..0000000000000000000000000000000000000000 --- a/research/swivel/swivel.py +++ /dev/null @@ -1,489 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Submatrix-wise Vector Embedding Learner. - -Implementation of SwiVel algorithm described at: -http://arxiv.org/abs/1602.02215 - -This program expects an input directory that contains the following files. - - row_vocab.txt, col_vocab.txt - - The row an column vocabulary files. Each file should contain one token per - line; these will be used to generate a tab-separate file containing the - trained embeddings. - - row_sums.txt, col_sum.txt - - The matrix row and column marginal sums. Each file should contain one - decimal floating point number per line which corresponds to the marginal - count of the matrix for that row or column. - - shards.recs - - A file containing the sub-matrix shards, stored as TFRecords. Each shard is - expected to be a serialzed tf.Example protocol buffer with the following - properties: - - global_row: the global row indicies contained in the shard - global_col: the global column indicies contained in the shard - sparse_local_row, sparse_local_col, sparse_value: three parallel arrays - that are a sparse representation of the submatrix counts. - -It will generate embeddings, training from the input directory for the specified -number of epochs. When complete, it will output the trained vectors to a -tab-separated file that contains one line per embedding. Row and column -embeddings are stored in separate files. - -Swivel can be run "stand-alone" or "distributed". The latter involves running -at least one parameter server process, along with one or more worker processes. -""" - -from __future__ import division -from __future__ import print_function - -import glob -import itertools -import os -import random - -import numpy as np -import scipy.stats -import tensorflow as tf - -flags = tf.app.flags - -flags.DEFINE_string( - 'input_base_path', '/tmp/swivel_data', - 'Directory containing input shards, vocabularies, and marginals.') -flags.DEFINE_string( - 'output_base_path', '/tmp/swivel_data', - 'Path where to write the trained embeddings.') -flags.DEFINE_string('eval_base_path', '', 'Path to evaluation data') - -# Control for training. -flags.DEFINE_float('num_epochs', 40, 'Number epochs to train') -flags.DEFINE_string('hparams', '', 'Model hyper-parameters') - -# Model hyper-parameters. (Move these to tf.HParams once that gets integrated -# into TF from tf.contrib.) -flags.DEFINE_integer( - 'dim', 300, 'Embedding dimensionality') -flags.DEFINE_string( - 'optimizer', 'rmsprop', 'SGD optimizer; either "adagrad" or "rmsprop"') -flags.DEFINE_float( - 'learning_rate', 0.1, 'Optimizer learning rate') -flags.DEFINE_float( - 'momentum', 0.1, 'Optimizer momentum; used with RMSProp') -flags.DEFINE_float( - 'confidence_base', 0.0, 'Base for count weighting') -flags.DEFINE_float( - 'confidence_scale', 1.0, 'Scale for count weighting') -flags.DEFINE_float( - 'confidence_exponent', 0.5, 'Exponent for count weighting') -flags.DEFINE_integer( - 'submatrix_rows', 4096, 'Number of rows in each submatrix') -flags.DEFINE_integer( - 'submatrix_cols', 4096, 'Number of cols in each submatrix') - -# For distributed training. -flags.DEFINE_string( - 'ps_hosts', '', - 'Comma-separated list of parameter server host:port; if empty, run local') -flags.DEFINE_string( - 'worker_hosts', '', 'Comma-separated list of worker host:port') -flags.DEFINE_string( - 'job_name', '', 'The job this process will run, either "ps" or "worker"') -flags.DEFINE_integer( - 'task_index', 0, 'The task index for this process') -flags.DEFINE_integer( - 'gpu_device', 0, 'The GPU device to use.') - -FLAGS = flags.FLAGS - - -class Model(object): - """A Swivel model.""" - - def __init__(self, input_base_path, hparams): - """Creates a new Swivel model.""" - # Read vocab - self.row_ix_to_word, self.row_word_to_ix = self._read_vocab( - os.path.join(input_base_path, 'row_vocab.txt')) - self.col_ix_to_word, self.col_word_to_ix = self._read_vocab( - os.path.join(input_base_path, 'col_vocab.txt')) - - # Read marginals. - row_sums = self._read_marginals_file( - os.path.join(input_base_path, 'row_sums.txt')) - col_sums = self._read_marginals_file( - os.path.join(input_base_path, 'col_sums.txt')) - - # Construct input tensors. - count_matrix_files = glob.glob( - os.path.join(input_base_path, 'shard-*.pb')) - - global_rows, global_cols, counts = self._count_matrix_input( - count_matrix_files, hparams.submatrix_rows, hparams.submatrix_cols) - - # Create embedding variables. - sigma = 1.0 / np.sqrt(hparams.dim) - self.row_embedding = tf.get_variable( - 'row_embedding', - shape=[len(row_sums), hparams.dim], - initializer=tf.random_normal_initializer(0, sigma), - dtype=tf.float32) - self.col_embedding = tf.get_variable( - 'col_embedding', - shape=[len(col_sums), hparams.dim], - initializer=tf.random_normal_initializer(0, sigma), - dtype=tf.float32) - - matrix_log_sum = np.log(np.sum(row_sums) + 1) - row_bias = tf.constant( - [np.log(x + 1) for x in row_sums], dtype=tf.float32) - col_bias = tf.constant( - [np.log(x + 1) for x in col_sums], dtype=tf.float32) - - # Fetch embeddings. - selected_rows = tf.nn.embedding_lookup(self.row_embedding, global_rows) - selected_cols = tf.nn.embedding_lookup(self.col_embedding, global_cols) - - selected_row_bias = tf.gather(row_bias, global_rows) - selected_col_bias = tf.gather(col_bias, global_cols) - - predictions = tf.matmul(selected_rows, selected_cols, transpose_b=True) - - # These binary masks separate zero from non-zero values. - count_is_nonzero = tf.to_float(tf.cast(counts, tf.bool)) - count_is_zero = 1 - count_is_nonzero - - objectives = count_is_nonzero * tf.log(counts + 1e-30) - objectives -= tf.reshape(selected_row_bias, [-1, 1]) - objectives -= selected_col_bias - objectives += matrix_log_sum - - err = predictions - objectives - - # The confidence function scales the L2 loss based on the raw - # co-occurrence count. - l2_confidence = (hparams.confidence_base + - hparams.confidence_scale * tf.pow( - counts, hparams.confidence_exponent)) - - loss_multiplier = 1 / np.sqrt( - hparams.submatrix_rows * hparams.submatrix_cols) - - l2_loss = loss_multiplier * tf.reduce_sum( - 0.5 * l2_confidence * tf.square(err)) - - sigmoid_loss = loss_multiplier * tf.reduce_sum( - tf.nn.softplus(err) * count_is_zero) - - self.loss_op = l2_loss + sigmoid_loss - - if hparams.optimizer == 'adagrad': - opt = tf.train.AdagradOptimizer(hparams.learning_rate) - elif hparams.optimizer == 'rmsprop': - opt = tf.train.RMSPropOptimizer(hparams.learning_rate, hparams.momentum) - else: - raise ValueError('unknown optimizer "%s"' % hparams.optimizer) - - self.global_step = tf.get_variable( - 'global_step', initializer=0, trainable=False) - - self.train_op = opt.minimize(self.loss_op, global_step=self.global_step) - - # One epoch trains each submatrix once. - self.steps_per_epoch = ( - (len(row_sums) / hparams.submatrix_rows) * - (len(col_sums) / hparams.submatrix_cols)) - - def _read_vocab(self, filename): - """Reads the vocabulary file.""" - with open(filename) as lines: - ix_to_word = [line.strip() for line in lines] - word_to_ix = {word: ix for ix, word in enumerate(ix_to_word)} - return ix_to_word, word_to_ix - - def _read_marginals_file(self, filename): - """Reads text file with one number per line to an array.""" - with open(filename) as lines: - return [float(line.strip()) for line in lines] - - def _count_matrix_input(self, filenames, submatrix_rows, submatrix_cols): - """Creates ops that read submatrix shards from disk.""" - random.shuffle(filenames) - filename_queue = tf.train.string_input_producer(filenames) - reader = tf.WholeFileReader() - _, serialized_example = reader.read(filename_queue) - features = tf.parse_single_example( - serialized_example, - features={ - 'global_row': tf.FixedLenFeature([submatrix_rows], dtype=tf.int64), - 'global_col': tf.FixedLenFeature([submatrix_cols], dtype=tf.int64), - 'sparse_local_row': tf.VarLenFeature(dtype=tf.int64), - 'sparse_local_col': tf.VarLenFeature(dtype=tf.int64), - 'sparse_value': tf.VarLenFeature(dtype=tf.float32) - }) - - global_row = features['global_row'] - global_col = features['global_col'] - - sparse_local_row = features['sparse_local_row'].values - sparse_local_col = features['sparse_local_col'].values - sparse_count = features['sparse_value'].values - - sparse_indices = tf.concat( - axis=1, values=[tf.expand_dims(sparse_local_row, 1), - tf.expand_dims(sparse_local_col, 1)]) - - count = tf.sparse_to_dense(sparse_indices, [submatrix_rows, submatrix_cols], - sparse_count) - - return global_row, global_col, count - - def wordsim_eval_op(self, filename): - """Returns an op that runs an eval on a word similarity dataset. - - The eval dataset is assumed to be tab-separated, one scored word pair per - line. The resulting value is Spearman's rho of the human judgements with - the cosine similarity of the word embeddings. - - Args: - filename: the filename containing the word similarity data. - - Returns: - An operator that will compute Spearman's rho of the current row - embeddings. - """ - with open(filename, 'r') as fh: - tuples = (line.strip().split('\t') for line in fh.read().splitlines()) - word1s, word2s, sims = zip(*tuples) - actuals = map(float, sims) - - v1s_t = tf.nn.embedding_lookup( - self.row_embedding, - [self.row_word_to_ix.get(w, 0) for w in word1s]) - - v2s_t = tf.nn.embedding_lookup( - self.row_embedding, - [self.row_word_to_ix.get(w, 0) for w in word2s]) - - # Compute the predicted word similarity as the cosine similarity between the - # embedding vectors. - preds_t = tf.reduce_sum( - tf.nn.l2_normalize(v1s_t, dim=1) * tf.nn.l2_normalize(v2s_t, dim=1), - axis=1) - - def _op(preds): - rho, _ = scipy.stats.spearmanr(preds, actuals) - return rho - - return tf.py_func(_op, [preds_t], tf.float64) - - def analogy_eval_op(self, filename, max_vocab_size=20000): - """Returns an op that runs an eval on an analogy dataset. - - The eval dataset is assumed to be tab-separated, with four tokens per - line. The first three tokens are query terms, the last is the expected - answer. For each line (e.g., "man king woman queen"), the vectors - corresponding to the query terms are added ("king - man + woman") to produce - a query vector. If the expected answer's vector is the nearest neighbor to - the query vector (not counting any of the query vectors themselves), then - the line is scored as correct. The reported accuracy is the number of - correct rows divided by the total number of rows. Missing terms are - replaced with an arbitrary vector and will almost certainly result in - incorrect answers. - - Note that the results are approximate: for efficiency's sake, only the first - `max_vocab_size` terms are included in the nearest neighbor search. - - Args: - filename: the filename containing the analogy data. - max_vocab_size: the maximum number of tokens to include in the nearest - neighbor search. By default, 20000. - - Returns: - The accuracy on the analogy task. - """ - analogy_ixs = [] - with open(filename, 'r') as lines: - for line in lines: - parts = line.strip().split('\t') - if len(parts) == 4: - analogy_ixs.append([self.row_word_to_ix.get(w, 0) for w in parts]) - - # man:king :: woman:queen => king - man + woman == queen - ix1s, ix2s, ix3s, _ = zip(*analogy_ixs) - v1s_t, v2s_t, v3s_t = ( - tf.nn.l2_normalize( - tf.nn.embedding_lookup(self.row_embedding, ixs), - dim=1) - for ixs in (ix1s, ix2s, ix3s)) - - preds_t = v2s_t - v1s_t + v3s_t - - # Compute the nearest neighbors as the cosine similarity. We only consider - # up to max_vocab_size to avoid a matmul that swamps the machine. - sims_t = tf.matmul( - preds_t, - tf.nn.l2_normalize(self.row_embedding[:max_vocab_size], dim=1), - transpose_b=True) - - # Take the four nearest neighbors, since the eval explicitly discards the - # query terms. - _, preds_ixs_t = tf.nn.top_k(sims_t, 4) - - def _op(preds_ixs): - correct, total = 0, 0 - for pred_ixs, actual_ixs in itertools.izip(preds_ixs, analogy_ixs): - pred_ixs = [ix for ix in pred_ixs if ix not in actual_ixs[:3]] - correct += pred_ixs[0] == actual_ixs[3] - total += 1 - - return correct / total - - return tf.py_func(_op, [preds_ixs_t], tf.float64) - - def _write_tensor(self, vocab_path, output_path, session, embedding): - """Writes tensor to output_path as tsv.""" - embeddings = session.run(embedding) - - with open(output_path, 'w') as out_f: - with open(vocab_path) as vocab_f: - for index, word in enumerate(vocab_f): - word = word.strip() - embedding = embeddings[index] - print('\t'.join([word.strip()] + [str(x) for x in embedding]), - file=out_f) - - def write_embeddings(self, config, session): - """Writes row and column embeddings disk.""" - self._write_tensor( - os.path.join(config.input_base_path, 'row_vocab.txt'), - os.path.join(config.output_base_path, 'row_embedding.tsv'), - session, self.row_embedding) - - self._write_tensor( - os.path.join(config.input_base_path, 'col_vocab.txt'), - os.path.join(config.output_base_path, 'col_embedding.tsv'), - session, self.col_embedding) - - -def main(_): - tf.logging.set_verbosity(tf.logging.INFO) - - # If we have ps_hosts, then we'll assume that this is going to be a - # distributed training run. Configure the cluster appropriately. Otherwise, - # we just do everything in-process. - if FLAGS.ps_hosts: - cluster = tf.train.ClusterSpec({ - 'ps': FLAGS.ps_hosts.split(','), - 'worker': FLAGS.worker_hosts.split(','), - }) - - if FLAGS.job_name == 'ps': - # Ignore the GPU if we're the parameter server. This let's the PS run on - # the same machine as a worker. - config = tf.ConfigProto(device_count={'GPU': 0}) - elif FLAGS.job_name == 'worker': - config = tf.ConfigProto(gpu_options=tf.GPUOptions( - visible_device_list='%d' % FLAGS.gpu_device, - allow_growth=True)) - else: - raise ValueError('unknown job name "%s"' % FLAGS.job_name) - - server = tf.train.Server( - cluster, - job_name=FLAGS.job_name, - task_index=FLAGS.task_index, - config=config) - - if FLAGS.job_name == 'ps': - return server.join() - - device_setter = tf.train.replica_device_setter( - worker_device='/job:worker/task:%d' % FLAGS.task_index, - cluster=cluster) - - else: - server = None - device_setter = tf.train.replica_device_setter(0) - - # Build the graph. - with tf.Graph().as_default(): - with tf.device(device_setter): - model = Model(FLAGS.input_base_path, FLAGS) - - # If an eval path is present, then create eval operators and set up scalar - # summaries to report on the results. Run the evals on the CPU since - # the analogy eval requires a fairly enormous tensor to be allocated to - # do the nearest neighbor search. - if FLAGS.eval_base_path: - wordsim_filenames = glob.glob( - os.path.join(FLAGS.eval_base_path, '*.ws.tab')) - - for filename in wordsim_filenames: - name = os.path.basename(filename).split('.')[0] - with tf.device(tf.DeviceSpec(device_type='CPU')): - op = model.wordsim_eval_op(filename) - tf.summary.scalar(name, op) - - analogy_filenames = glob.glob( - os.path.join(FLAGS.eval_base_path, '*.an.tab')) - - for filename in analogy_filenames: - name = os.path.basename(filename).split('.')[0] - with tf.device(tf.DeviceSpec(device_type='CPU')): - op = model.analogy_eval_op(filename) - tf.summary.scalar(name, op) - - tf.summary.scalar('loss', model.loss_op) - - # Train on, soldier. - supervisor = tf.train.Supervisor( - logdir=FLAGS.output_base_path, - is_chief=(FLAGS.task_index == 0), - save_summaries_secs=60, - recovery_wait_secs=5) - - max_step = FLAGS.num_epochs * model.steps_per_epoch - master = server.target if server else '' - with supervisor.managed_session(master) as session: - local_step = 0 - global_step = session.run(model.global_step) - while not supervisor.should_stop() and global_step < max_step: - global_step, loss, _ = session.run([ - model.global_step, model.loss_op, model.train_op]) - - if not np.isfinite(loss): - raise ValueError('non-finite cost at step %d' % global_step) - - local_step += 1 - if local_step % 10 == 0: - tf.logging.info( - 'local_step=%d global_step=%d loss=%.1f, %.1f%% complete', - local_step, global_step, loss, 100.0 * global_step / max_step) - - if FLAGS.task_index == 0: - supervisor.saver.save( - session, supervisor.save_path, global_step=global_step) - - model.write_embeddings(FLAGS, session) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/swivel/text2bin.py b/research/swivel/text2bin.py deleted file mode 100644 index 6ccb132955289dbc10a818382746a03f09e505a3..0000000000000000000000000000000000000000 --- a/research/swivel/text2bin.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Converts vectors from text to a binary format for quicker manipulation. - -Usage: - - text2bin.py -o -v vec1.txt [vec2.txt ...] - -Optiona: - - -o , --output - The name of the file into which the binary vectors are written. - - -v , --vocab - The name of the file into which the vocabulary is written. - -Description - -This program merges one or more whitespace separated vector files into a single -binary vector file that can be used by downstream evaluation tools in this -directory ("wordsim.py" and "analogy"). - -If more than one vector file is specified, then the files must be aligned -row-wise (i.e., each line must correspond to the same embedding), and they must -have the same number of columns (i.e., be the same dimension). - -""" - -from itertools import izip -from getopt import GetoptError, getopt -import os -import struct -import sys - -try: - opts, args = getopt( - sys.argv[1:], 'o:v:', ['output=', 'vocab=']) -except GetoptError as e: - print >> sys.stderr, e - sys.exit(2) - -opt_output = 'vecs.bin' -opt_vocab = 'vocab.txt' -for o, a in opts: - if o in ('-o', '--output'): - opt_output = a - if o in ('-v', '--vocab'): - opt_vocab = a - -def go(fhs): - fmt = None - with open(opt_vocab, 'w') as vocab_out: - with open(opt_output, 'w') as vecs_out: - for lines in izip(*fhs): - parts = [line.split() for line in lines] - token = parts[0][0] - if any(part[0] != token for part in parts[1:]): - raise IOError('vector files must be aligned') - - print >> vocab_out, token - - vec = [sum(float(x) for x in xs) for xs in zip(*parts)[1:]] - if not fmt: - fmt = struct.Struct('%df' % len(vec)) - - vecs_out.write(fmt.pack(*vec)) - -if args: - fhs = [open(filename) for filename in args] - go(fhs) - for fh in fhs: - fh.close() -else: - go([sys.stdin]) diff --git a/research/swivel/vecs.py b/research/swivel/vecs.py deleted file mode 100644 index 806173f6abe3457f8290c7f24cc99014f4556d82..0000000000000000000000000000000000000000 --- a/research/swivel/vecs.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import mmap -import numpy as np -import os - -from six import string_types - - -class Vecs(object): - def __init__(self, vocab_filename, rows_filename, cols_filename=None): - """Initializes the vectors from a text vocabulary and binary data.""" - with open(vocab_filename, 'r') as lines: - self.vocab = [line.split()[0] for line in lines] - self.word_to_idx = {word: idx for idx, word in enumerate(self.vocab)} - - n = len(self.vocab) - - with open(rows_filename, 'r') as rows_fh: - rows_fh.seek(0, os.SEEK_END) - size = rows_fh.tell() - - # Make sure that the file size seems reasonable. - if size % (4 * n) != 0: - raise IOError( - 'unexpected file size for binary vector file %s' % rows_filename) - - # Memory map the rows. - dim = round(size / (4 * n)) - rows_mm = mmap.mmap(rows_fh.fileno(), 0, prot=mmap.PROT_READ) - rows = np.matrix( - np.frombuffer(rows_mm, dtype=np.float32).reshape(n, dim)) - - # If column vectors were specified, then open them and add them to the - # row vectors. - if cols_filename: - with open(cols_filename, 'r') as cols_fh: - cols_mm = mmap.mmap(cols_fh.fileno(), 0, prot=mmap.PROT_READ) - cols_fh.seek(0, os.SEEK_END) - if cols_fh.tell() != size: - raise IOError('row and column vector files have different sizes') - - cols = np.matrix( - np.frombuffer(cols_mm, dtype=np.float32).reshape(n, dim)) - - rows += cols - cols_mm.close() - - # Normalize so that dot products are just cosine similarity. - self.vecs = rows / np.linalg.norm(rows, axis=1).reshape(n, 1) - rows_mm.close() - - def similarity(self, word1, word2): - """Computes the similarity of two tokens.""" - idx1 = self.word_to_idx.get(word1) - idx2 = self.word_to_idx.get(word2) - if not idx1 or not idx2: - return None - - return float(self.vecs[idx1] * self.vecs[idx2].transpose()) - - def neighbors(self, query): - """Returns the nearest neighbors to the query (a word or vector).""" - if isinstance(query, string_types): - idx = self.word_to_idx.get(query) - if idx is None: - return None - - query = self.vecs[idx] - - neighbors = self.vecs * query.transpose() - - return sorted( - zip(self.vocab, neighbors.flat), - key=lambda kv: kv[1], reverse=True) - - def lookup(self, word): - """Returns the embedding for a token, or None if no embedding exists.""" - idx = self.word_to_idx.get(word) - return None if idx is None else self.vecs[idx] diff --git a/research/swivel/wordsim.py b/research/swivel/wordsim.py deleted file mode 100644 index 2d27663f8cf47c95cf3c47ed7e7e95bb8130ea5d..0000000000000000000000000000000000000000 --- a/research/swivel/wordsim.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Computes Spearman's rho with respect to human judgements. - -Given a set of row (and potentially column) embeddings, this computes Spearman's -rho between the rank ordering of predicted word similarity and human judgements. - -Usage: - - wordim.py --embeddings= --vocab= eval1.tab eval2.tab ... - -Options: - - --embeddings=: the vectors to test - --vocab=: the vocabulary file - -Evaluation files are assumed to be tab-separated files with exactly three -columns. The first two columns contain the words, and the third column contains -the scored human judgement. - -""" - -from __future__ import print_function -import scipy.stats -import sys -from getopt import GetoptError, getopt - -from vecs import Vecs - -try: - opts, args = getopt(sys.argv[1:], '', ['embeddings=', 'vocab=']) -except GetoptError as e: - print(e, file=sys.stderr) - sys.exit(2) - -opt_embeddings = None -opt_vocab = None - -for o, a in opts: - if o == '--embeddings': - opt_embeddings = a - if o == '--vocab': - opt_vocab = a - -if not opt_vocab: - print('please specify a vocabulary file with "--vocab"', file=sys.stderr) - sys.exit(2) - -if not opt_embeddings: - print('please specify the embeddings with "--embeddings"', file=sys.stderr) - sys.exit(2) - -try: - vecs = Vecs(opt_vocab, opt_embeddings) -except IOError as e: - print(e, file=sys.stderr) - sys.exit(1) - - -def evaluate(lines): - acts, preds = [], [] - - with open(filename, 'r') as lines: - for line in lines: - w1, w2, act = line.strip().split('\t') - pred = vecs.similarity(w1, w2) - if pred is None: - continue - - acts.append(float(act)) - preds.append(pred) - - rho, _ = scipy.stats.spearmanr(acts, preds) - return rho - - -for filename in args: - with open(filename, 'r') as lines: - print('%0.3f %s' % (evaluate(lines), filename)) diff --git a/research/tcn/BUILD b/research/tcn/BUILD deleted file mode 100644 index 39297d4b7bfa6ba94bb91a79d555c3fce75c4e63..0000000000000000000000000000000000000000 --- a/research/tcn/BUILD +++ /dev/null @@ -1,213 +0,0 @@ -package(default_visibility = [":internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = [ - "//tcn/...", - ], -) - -py_binary( - name = "download_pretrained", - srcs = [ - "download_pretrained.py", - ], -) - -py_binary( - name = "generate_videos", - srcs = [ - "generate_videos.py", - ], - main = "generate_videos.py", - deps = [ - ":data_providers", - ":get_estimator", - ":util", - ], -) - -py_test( - name = "svtcn_loss_test", - size = "medium", - srcs = [ - "estimators/svtcn_loss.py", - "estimators/svtcn_loss_test.py", - ], - deps = [ - ":util", - ], -) - -py_library( - name = "data_providers", - srcs = [ - "data_providers.py", - ], - deps = [ - ":preprocessing", - ], -) - -py_test( - name = "data_providers_test", - size = "large", - srcs = ["data_providers_test.py"], - deps = [ - ":data_providers", - ], -) - -py_library( - name = "preprocessing", - srcs = [ - "preprocessing.py", - ], -) - -py_binary( - name = "get_estimator", - srcs = [ - "estimators/get_estimator.py", - ], - deps = [ - ":mvtcn_estimator", - ":svtcn_estimator", - ], -) - -py_binary( - name = "base_estimator", - srcs = [ - "estimators/base_estimator.py", - "model.py", - ], - deps = [ - ":data_providers", - ":util", - ], -) - -py_library( - name = "util", - srcs = [ - "utils/luatables.py", - "utils/progress.py", - "utils/util.py", - ], -) - -py_binary( - name = "mvtcn_estimator", - srcs = [ - "estimators/mvtcn_estimator.py", - ], - deps = [ - ":base_estimator", - ], -) - -py_binary( - name = "svtcn_estimator", - srcs = [ - "estimators/svtcn_estimator.py", - "estimators/svtcn_loss.py", - ], - deps = [ - ":base_estimator", - ], -) - -py_binary( - name = "train", - srcs = [ - "train.py", - ], - deps = [ - ":data_providers", - ":get_estimator", - ":util", - ], -) - -py_binary( - name = "labeled_eval", - srcs = [ - "labeled_eval.py", - ], - deps = [ - ":get_estimator", - ], -) - -py_test( - name = "labeled_eval_test", - size = "small", - srcs = ["labeled_eval_test.py"], - deps = [ - ":labeled_eval", - ], -) - -py_binary( - name = "eval", - srcs = [ - "eval.py", - ], - deps = [ - ":get_estimator", - ], -) - -py_binary( - name = "alignment", - srcs = [ - "alignment.py", - ], - deps = [ - ":get_estimator", - ], -) - -py_binary( - name = "visualize_embeddings", - srcs = [ - "visualize_embeddings.py", - ], - deps = [ - ":data_providers", - ":get_estimator", - ":util", - ], -) - -py_binary( - name = "webcam", - srcs = [ - "dataset/webcam.py", - ], - main = "dataset/webcam.py", -) - -py_binary( - name = "images_to_videos", - srcs = [ - "dataset/images_to_videos.py", - ], - main = "dataset/images_to_videos.py", -) - -py_binary( - name = "videos_to_tfrecords", - srcs = [ - "dataset/videos_to_tfrecords.py", - ], - main = "dataset/videos_to_tfrecords.py", - deps = [ - ":preprocessing", - ], -) diff --git a/research/tcn/README.md b/research/tcn/README.md deleted file mode 100644 index 6f96324576cb32f25cd8d3a2c70b9fbae754ccea..0000000000000000000000000000000000000000 --- a/research/tcn/README.md +++ /dev/null @@ -1,559 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Time Contrastive Networks - -This implements ["Time Contrastive Networks"](https://arxiv.org/abs/1704.06888), -which is part of the larger [Self-Supervised Imitation -Learning](https://sermanet.github.io/imitation/) project. - -![](https://sermanet.github.io/tcn/docs/figs/mvTCN.png) - -## Contacts - -Maintainers of TCN: - -* Corey Lynch: [github](https://github.com/coreylynch), - [twitter](https://twitter.com/coreylynch) -* Pierre Sermanet: [github](https://github.com/sermanet), - [twitter](https://twitter.com/psermanet) - -## Contents - -* [Getting Started](#getting-started) - * [Install Dependencies](#install-dependencies) - * [Download the Inception v3 - Checkpoint](#download-pretrained-inceptionv3-checkpoint) - * [Run all the tests](#run-all-the-tests) -* [Concepts](#concepts) - * [Multi-view Webcam Video](#multi-view-webcam-video) - * [Data Pipelines](#data-pipelines) - * [Estimators](#estimators) - * [Models](#models) - * [Losses](#losses) - * [Inference](#inference) - * [Configuration](#configuration) - * [Monitoring Training](#monitoring-training) - * [KNN Classification Error](#knn-classification-error) - * [KNN Classification Error](#multi-view-alignment) - * [Visualization](#visualization) - * [Nearest Neighbor Imitation - Videos](#nearest-neighbor-imitation-videos) - * [PCA & T-SNE Visualization](#pca-t-sne-visualization) -* [Tutorial Part I: Collecting Multi-View Webcam - Videos](#tutorial-part-i-collecting-multi-view-webcam-videos) - * [Collect Webcam Videos](#collect-webcam-videos) - * [Create TFRecords](#create-tfrecords) -* [Tutorial Part II: Training, Evaluation, and - Visualization](#tutorial-part-ii-training-evaluation-and-visualization) - * [Download Data](#download-data) - * [Download the Inception v3 - Checkpoint](#download-pretrained-inceptionv3-checkpoint) - * [Define a Config](#define-a-config) - * [Train](#train) - * [Evaluate](#evaluate) - * [Monitor training](#monior-training) - * [Visualize](#visualize) - * [Generate Imitation Videos](#generate-imitation-videos) - * [Run PCA & T-SNE Visualization](#t-sne-pca-visualization) - -## Getting started - -### Install Dependencies - -* [Tensorflow nightly build](https://pypi.python.org/pypi/tf-nightly-gpu) or - via `pip install tf-nightly-gpu`. -* [Bazel](http://bazel.io/docs/install.html) -* matplotlib -* sklearn -* opencv - -### Download Pretrained InceptionV3 Checkpoint - -Run the script that downloads the pretrained InceptionV3 checkpoint: - -```bash -cd tensorflow-models/tcn -python download_pretrained.py -``` - -### Run all the tests - -```bash -bazel test :all -``` - -## Concepts - -### Multi-View Webcam Video - -We provide utilities to collect your own multi-view videos in dataset/webcam.py. -See the [webcam tutorial](#tutorial-part-i-collecting-multi-view-webcam-videos) -for an end to end example of how to collect multi-view webcam data and convert -it to the TFRecord format expected by this library. - -## Data Pipelines - -We use the [tf.data.Dataset -API](https://www.tensorflow.org/guide/datasets) to construct input -pipelines that feed training, evaluation, and visualization. These pipelines are -defined in `data_providers.py`. - -## Estimators - -We define training, evaluation, and inference behavior using the -[tf.estimator.Estimator -API](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator). See -`estimators/mvtcn_estimator.py` for an example of how multi-view TCN training, -evaluation, and inference is implemented. - -## Models - -Different embedder architectures are implemented in model.py. We used the -`InceptionConvSSFCEmbedder` in the pouring experiments, but we're also -evaluating `Resnet` embedders. - -## Losses - -We use the -[tf.contrib.losses.metric_learning](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/losses/metric_learning) -library's implementations of triplet loss with semi-hard negative mining and -npairs loss. In our experiments, npairs loss has better empirical convergence -and produces the best qualitative visualizations, and will likely be our choice -for future experiments. See the -[paper](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf) -for details on the algorithm. - -## Inference - -We support 3 modes of inference for trained TCN models: - -* Mode 1: Input is a tf.Estimator input_fn (see - [this](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator#predict) - for details). Output is an iterator over embeddings and additional metadata. - See `labeled_eval.py` for a usage example. - -* Mode 2: Input is a TFRecord or (or list of TFRecords). This returns an - iterator over tuples of (embeddings, raw_image_strings, sequence_name), - where embeddings is the [num views, sequence length, embedding size] numpy - array holding the full embedded sequence (for all views), raw_image_strings - is a [num views, sequence length] string array holding the jpeg-encoded raw - image strings, and sequence_name is the name of the sequence. See - `generate_videos.py` for a usage example. - -* Mode 3: Input is a numpy array of size [num images, height, width, num - channels]. This returns a tuple of (embeddings, raw_image_strings), where - embeddings is a 2-D float32 numpy array holding [num_images, embedding_size] - image embeddings, and raw_image_strings is a 1-D string numpy array holding - [batch_size] jpeg-encoded image strings. This can be used as follows: - - ```python - images = np.random.uniform(0, 1, size=(batch_size, 1080, 1920, 3)) - embeddings, _ = estimator.inference( - images, checkpoint_path=checkpoint_path) - ``` - -See `estimators/base_estimator.py` for details. - -## Configuration - -Data pipelines, training, eval, and visualization are all configured using -key-value parameters passed as [YAML](https://en.wikipedia.org/wiki/YAML) files. -Configurations can be nested, e.g.: - -```yaml -learning: - optimizer: 'adam' - learning_rate: 0.001 -``` - -### T objects - -YAML configs are converted to LuaTable-like `T` object (see -`utils/luatables.py`), which behave like a python `dict`, but allow you to use -dot notation to access (nested) keys. For example we could access the learning -rate in the above config snippet via `config.learning.learning_rate`. - -### Multiple Configs - -Multiple configs can be passed to the various binaries as a comma separated list -of config paths via the `--config_paths` flag. This allows us to specify a -default config that applies to all experiments (e.g. how often to write -checkpoints, default embedder hyperparams) and one config per experiment holding -the just hyperparams specific to the experiment (path to data, etc.). - -See `configs/tcn_default.yml` for an example of our default config and -`configs/pouring.yml` for an example of how we define the pouring experiments. - -Configs are applied left to right. For example, consider two config files: - -default.yml - -```yaml -learning: - learning_rate: 0.001 # Default learning rate. - optimizer: 'adam' -``` - -myexperiment.yml - -```yaml -learning: - learning_rate: 1.0 # Experiment learning rate (overwrites default). -data: - training: '/path/to/myexperiment/training.tfrecord' -``` - -Running - -```bash -bazel run train.py --config_paths='default.yml,myexperiment.yml' -``` - -results in a final merged config called final_training_config.yml - -```yaml -learning: - optimizer: 'adam' - learning_rate: 1.0 -data: - training: '/path/to/myexperiment/training.tfrecord' -``` - -which is created automatically and stored in the experiment log directory -alongside model checkpoints and tensorboard summaries. This gives us a record of -the exact configs that went into each trial. - -## Monitoring training - -We usually look at two validation metrics during training: knn classification -error and multi-view alignment. - -### KNN-Classification Error - -In cases where we have labeled validation data, we can compute the average -cross-sequence KNN classification error (1.0 - recall@k=1) over all embedded -labeled images in the validation set. See `labeled_eval.py`. - -### Multi-view Alignment - -In cases where there is no labeled validation data, we can look at the how well -our model aligns multiple views of same embedded validation sequences. That is, -for each embedded validation sequence, for all cross-view pairs, we compute the -scaled absolute distance between ground truth time indices and knn time indices. -See `alignment.py`. - -## Visualization - -We visualize the embedding space learned by our models in two ways: nearest -neighbor imitation videos and PCA/T-SNE. - -### Nearest Neighbor Imitation Videos - -One of the easiest way to evaluate the understanding of your model is to see how -well the model can semantically align two videos via nearest neighbors in -embedding space. - -Consider the case where we have multiple validation demo videos of a human or -robot performing the same task. For example, in the pouring experiments, we -collected many different multiview validation videos of a person pouring the -contents of one container into another, then setting the container down. If we'd -like to see how well our embeddings generalize across viewpoint, object/agent -appearance, and background, we can construct what we call "Nearest Neighbor -Imitation" videos, by embedding some validation query sequence `i` from view 1, -and finding the nearest neighbor for each query frame in some embedded target -sequence `j` filmed from view 1. -[Here's](https://sermanet.github.io/tcn/docs/figs/pouring_human.mov.gif) an -example of the final product. - -See `generate_videos.py` for details. - -### PCA & T-SNE Visualization - -We can also embed a set of images taken randomly from validation videos and -visualize the embedding space using PCA projection and T-SNE in the tensorboard -projector. See `visualize_embeddings.py` for details. - -## Tutorial Part I: Collecting Multi-View Webcam Videos - -Here we give an end-to-end example of how to collect your own multiview webcam -videos and convert them to the TFRecord format expected by training. - -Note: This was tested with up to 8 concurrent [Logitech c930e -webcams](https://www.logitech.com/en-us/product/c930e-webcam) extended with -[Plugable 5 Meter (16 Foot) USB 2.0 Active Repeater Extension -Cables](https://www.amazon.com/gp/product/B006LFL4X0/ref=oh_aui_detailpage_o05_s00?ie=UTF8&psc=1). - -### Collect webcam videos - -Go to dataset/webcam.py - -1. Plug your webcams in and run - - ```bash - ls -ltrh /dev/video* - ``` - - You should see one device listed per connected webcam. - -2. Define some environment variables describing the dataset you're collecting. - - ```bash - dataset=tutorial # Name of the dataset. - mode=train # E.g. 'train', 'validation', 'test', 'demo'. - num_views=2 # Number of webcams. - viddir=/tmp/tcn/videos # Output directory for the videos. - tmp_imagedir=/tmp/tcn/tmp_images # Temp directory to hold images. - debug_vids=1 # Whether or not to generate side-by-side debug videos. - export DISPLAY=:0.0 # This allows real time matplotlib display. - ``` - -3. Run the webcam.py script. - - ```bash - bazel build -c opt --copt=-mavx webcam && \ - bazel-bin/webcam \ - --dataset $dataset \ - --mode $mode \ - --num_views $num_views \ - --tmp_imagedir $tmp_imagedir \ - --viddir $viddir \ - --debug_vids 1 - ``` - -4. Hit Ctrl-C when done collecting, upon which the script will compile videos - for each view and optionally a debug video concatenating multiple - simultaneous views. - -5. If `--seqname` flag isn't set, the script will name the first sequence '0', - the second sequence '1', and so on (meaning you can just keep rerunning step - 3.). When you are finished, you should see an output viddir with the - following structure: - - ```bash - videos/0_view0.mov - videos/0_view1.mov - ... - videos/0_viewM.mov - videos/1_viewM.mov - ... - videos/N_viewM.mov - for N sequences and M webcam views. - ``` - -### Create TFRecords - -Use `dataset/videos_to_tfrecords.py` to convert the directory of videos into a -directory of TFRecords files, one per multi-view sequence. - -```bash -viddir=/tmp/tcn/videos -dataset=tutorial -mode=train -videos=$viddir/$dataset - -bazel build -c opt videos_to_tfrecords && \ -bazel-bin/videos_to_tfrecords --logtostderr \ ---input_dir $videos/$mode \ ---output_dir ~/tcn_data/$dataset/$mode \ ---max_per_shard 400 -``` - -Setting `--max_per_shard` > 0 allows you to shard training data. We've observed -that sharding long training sequences provides better performance in terms of -global steps/sec. - -This should be left at the default of 0 for validation / test data. - -You should now have a directory of TFRecords files with the following structure: - -```bash -output_dir/0.tfrecord -... -output_dir/N.tfrecord - -1 TFRecord file for each of N multi-view sequences. -``` - -Now we're ready to move on to part II: training, evaluation, and visualization. - -## Tutorial Part II: Training, Evaluation, and Visualization - -Here we give an end-to-end example of how to train, evaluate, and visualize the -embedding space learned by TCN models. - -### Download Data - -We will be using the 'Multiview Pouring' dataset, which can be downloaded using -the download.sh script -[here.](https://sites.google.com/site/brainrobotdata/home/multiview-pouring) - -The rest of the tutorial will assume that you have your data downloaded to a -folder at `~/tcn_data`. - -```bash -mkdir ~/tcn_data -mv ~/Downloads/download.sh ~/tcn_data -./download.sh -``` - -You should now have the following path containing all the data: - -```bash -ls ~/tcn_data/multiview-pouring -labels README.txt tfrecords videos -``` - -### Download Pretrained Inception Checkpoint - -If you haven't already, run the script that downloads the pretrained InceptionV3 -checkpoint: - -```bash -python download_pretrained.py -``` - -### Define A Config - -For our experiment, we create 2 configs: - -* `configs/tcn_default.yml`: This contains all the default hyperparameters - that generally don't vary across experiments. -* `configs/pouring.yml`: This contains all the hyperparameters that are - specific to the pouring experiment. - -Important note about `configs/pouring.yml`: - -* data.eval_cropping: We use 'pad200' for the pouring dataset, which was - filmed rather close up on iphone cameras. A better choice for data filmed on - webcam is likely 'crop_center'. See preprocessing.py for options. - -### Train - -Run the training binary: - -```yaml -logdir=/tmp/tcn/pouring -c=configs -configs=$c/tcn_default.yml,$c/pouring.yml - -bazel build -c opt --copt=-mavx --config=cuda train && \ -bazel-bin/train \ ---config_paths $configs --logdir $logdir -``` - -### Evaluate - -Run the binary that computes running validation loss. Set `export -CUDA_VISIBLE_DEVICES=` to run on CPU. - -```bash -bazel build -c opt --copt=-mavx eval && \ -bazel-bin/eval \ ---config_paths $configs --logdir $logdir -``` - -Run the binary that computes running validation cross-view sequence alignment. -Set `export CUDA_VISIBLE_DEVICES=` to run on CPU. - -```bash -bazel build -c opt --copt=-mavx alignment && \ -bazel-bin/alignment \ ---config_paths $configs --checkpointdir $logdir --outdir $logdir -``` - -Run the binary that computes running labeled KNN validation error. Set `export -CUDA_VISIBLE_DEVICES=` to run on CPU. - -```bash -bazel build -c opt --copt=-mavx labeled_eval && \ -bazel-bin/labeled_eval \ ---config_paths $configs --checkpointdir $logdir --outdir $logdir -``` - -### Monitor training - -Run `tensorboard --logdir=$logdir`. After a bit of training, you should see -curves that look like this: - -#### Training loss - - - -#### Validation loss - - - -#### Validation Alignment - - - -#### Average Validation KNN Classification Error - - - -#### Individual Validation KNN Classification Errors - - - -### Visualize - -To visualize the embedding space learned by a model, we can: - -#### Generate Imitation Videos - -```bash -# Use the automatically generated final config file as config. -configs=$logdir/final_training_config.yml -# Visualize checkpoint 40001. -checkpoint_iter=40001 -# Use validation records for visualization. -records=~/tcn_data/multiview-pouring/tfrecords/val -# Write videos to this location. -outdir=$logdir/tcn_viz/imitation_vids -``` - -```bash -bazel build -c opt --config=cuda --copt=-mavx generate_videos && \ -bazel-bin/generate_videos \ ---config_paths $configs \ ---checkpointdir $logdir \ ---checkpoint_iter $checkpoint_iter \ ---query_records_dir $records \ ---target_records_dir $records \ ---outdir $outdir -``` - -After the script completes, you should see a directory of videos with names -like: - -`$outdir/qtrain_clearodwalla_to_clear1_realv1_imtrain_clearsoda_to_white13_realv0.mp4` - -that look like this: - -#### T-SNE / PCA Visualization - -Run the binary that generates embeddings and metadata. - -```bash -outdir=$logdir/tcn_viz/embedding_viz -bazel build -c opt --config=cuda --copt=-mavx visualize_embeddings && \ -bazel-bin/visualize_embeddings \ ---config_paths $configs \ ---checkpointdir $logdir \ ---checkpoint_iter $checkpoint_iter \ ---embedding_records $records \ ---outdir $outdir \ ---num_embed 1000 \ ---sprite_dim 64 -``` - -Run tensorboard, pointed at the embedding viz output directory. - -``` -tensorboard --logdir=$outdir -``` - -You should see something like this in tensorboard. - diff --git a/research/tcn/WORKSPACE b/research/tcn/WORKSPACE deleted file mode 100644 index 87d592329c07914a8feba80bbc96bf4f92305656..0000000000000000000000000000000000000000 --- a/research/tcn/WORKSPACE +++ /dev/null @@ -1,2 +0,0 @@ -workspace(name = "tcn") - diff --git a/research/tcn/alignment.py b/research/tcn/alignment.py deleted file mode 100644 index e6ee04c8e1c80d7ab394be4523b1408f1e3bd7b7..0000000000000000000000000000000000000000 --- a/research/tcn/alignment.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Calculates test sequence alignment score.""" -from __future__ import absolute_import -from __future__ import absolute_import -from __future__ import division - -import os -import numpy as np -from estimators.get_estimator import get_estimator -from utils import util -import tensorflow as tf -tf.logging.set_verbosity(tf.logging.INFO) - -tf.flags.DEFINE_string( - 'config_paths', '', - """ - Path to a YAML configuration files defining FLAG values. Multiple files - can be separated by the `#` symbol. Files are merged recursively. Setting - a key in these files is equivalent to setting the FLAG value with - the same name. - """) -tf.flags.DEFINE_string( - 'model_params', '{}', 'YAML configuration string for the model parameters.') -tf.app.flags.DEFINE_string( - 'checkpoint_iter', '', 'Evaluate this specific checkpoint.') -tf.app.flags.DEFINE_string( - 'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.') -tf.app.flags.DEFINE_string('outdir', '/tmp/tcn', 'Path to write summaries to.') -FLAGS = tf.app.flags.FLAGS - - -def compute_average_alignment( - seqname_to_embeddings, num_views, summary_writer, training_step): - """Computes the average cross-view alignment for all sequence view pairs. - - Args: - seqname_to_embeddings: Dict, mapping sequence name to a - [num_views, embedding size] numpy matrix holding all embedded views. - num_views: Int, number of simultaneous views in the dataset. - summary_writer: A `SummaryWriter` object. - training_step: Int, the training step of the model used to embed images. - - Alignment is the scaled absolute difference between the ground truth time - and the knn aligned time. - abs(|time_i - knn_time|) / sequence_length - """ - all_alignments = [] - for _, view_embeddings in seqname_to_embeddings.iteritems(): - for idx_i in range(num_views): - for idx_j in range(idx_i+1, num_views): - embeddings_view_i = view_embeddings[idx_i] - embeddings_view_j = view_embeddings[idx_j] - - seq_len = len(embeddings_view_i) - - times_i = np.array(range(seq_len)) - # Get the nearest time_index for each embedding in view_i. - times_j = np.array([util.KNNIdsWithDistances( - q, embeddings_view_j, k=1)[0][0] for q in embeddings_view_i]) - - # Compute sequence view pair alignment. - alignment = np.mean( - np.abs(np.array(times_i)-np.array(times_j))/float(seq_len)) - all_alignments.append(alignment) - print('alignment so far %f' % alignment) - average_alignment = np.mean(all_alignments) - print('Average alignment %f' % average_alignment) - summ = tf.Summary(value=[tf.Summary.Value( - tag='validation/alignment', simple_value=average_alignment)]) - summary_writer.add_summary(summ, int(training_step)) - - -def evaluate_once( - config, checkpointdir, validation_records, checkpoint_path, batch_size, - num_views): - """Evaluates and reports the validation alignment.""" - # Choose an estimator based on training strategy. - estimator = get_estimator(config, checkpointdir) - - # Embed all validation sequences. - seqname_to_embeddings = {} - for (view_embeddings, _, seqname) in estimator.inference( - validation_records, checkpoint_path, batch_size): - seqname_to_embeddings[seqname] = view_embeddings - - # Compute and report alignment statistics. - ckpt_step = int(checkpoint_path.split('-')[-1]) - summary_dir = os.path.join(FLAGS.outdir, 'alignment_summaries') - summary_writer = tf.summary.FileWriter(summary_dir) - compute_average_alignment( - seqname_to_embeddings, num_views, summary_writer, ckpt_step) - - -def main(_): - # Parse config dict from yaml config files / command line flags. - config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params) - num_views = config.data.num_views - - validation_records = util.GetFilesRecursively(config.data.validation) - batch_size = config.data.batch_size - - checkpointdir = FLAGS.checkpointdir - - # If evaluating a specific checkpoint, do that. - if FLAGS.checkpoint_iter: - checkpoint_path = os.path.join( - '%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter)) - evaluate_once( - config, checkpointdir, validation_records, checkpoint_path, batch_size, - num_views) - else: - for checkpoint_path in tf.contrib.training.checkpoints_iterator( - checkpointdir): - evaluate_once( - config, checkpointdir, validation_records, checkpoint_path, - batch_size, num_views) - -if __name__ == '__main__': - tf.app.run() diff --git a/research/tcn/configs/pouring.yml b/research/tcn/configs/pouring.yml deleted file mode 100644 index 4cfd962712cd2bfcb22c580e633bd8fdaed03c71..0000000000000000000000000000000000000000 --- a/research/tcn/configs/pouring.yml +++ /dev/null @@ -1,58 +0,0 @@ -# Train with Multi-View TCN. -training_strategy: 'mvtcn' - -# Use the 'inception_conv_ss_fc' embedder, which has the structure: -# InceptionV3 -> 2 conv adaptation layers -> spatial softmax -> fully connected -# -> embedding. -embedder_strategy: 'inception_conv_ss_fc' - -# Use npairs loss. -loss_strategy: 'npairs' - -learning: - learning_rate: 0.0001 - -# Set some hyperparameters for our embedder. -inception_conv_ss_fc: - # Don't finetune the pre-trained weights. - finetune_inception: false - dropout: - # Don't dropout convolutional activations. - keep_conv: 1.0 - # Use a dropout of 0.8 on the fully connected activations. - keep_fc: 0.8 - # Use a dropout of 0.8 on the inception activations. - keep_pretrained: 0.8 - -# Size of the TCN embedding. -embedding_size: 32 - -data: - raw_height: 480 - raw_width: 360 - batch_size: 32 - examples_per_sequence: 32 - num_views: 2 - preprocessing: - # Inference-time image cropping strategy. - eval_cropping: 'pad200' - augmentation: - # Do scale augmentation. - minscale: 0.8 # When downscaling, zoom in to 80% of the central bounding box. - maxscale: 3.0 # When upscaling, zoom out to 300% of the central bounding box. - proportion_scaled_up: 0.5 # Proportion of the time to scale up rather than down. - color: true # Do color augmentation. - fast_mode: true - # Paths to the data. - training: '~/tcn_data/multiview-pouring/tfrecords/train' - validation: '~/tcn_data/multiview-pouring/tfrecords/val' - test: 'path/to/test' - labeled: - image_attr_keys: ['image/view0', 'image/view1', 'task'] - label_attr_keys: ['contact', 'distance', 'liquid_flowing', 'has_liquid', 'container_angle'] - validation: '~/tcn_data/multiview-pouring/monolithic-labeled/val' - test: '~/tcn_data/multiview-pouring/monolithic-labeled/test' - -logging: - checkpoint: - save_checkpoints_steps: 1000 \ No newline at end of file diff --git a/research/tcn/configs/tcn_default.yml b/research/tcn/configs/tcn_default.yml deleted file mode 100644 index 992f36d77507719edd3f82aebc080af4b46eea14..0000000000000000000000000000000000000000 --- a/research/tcn/configs/tcn_default.yml +++ /dev/null @@ -1,115 +0,0 @@ -# These configs are the defaults we used for both the pouring and pose -# experiments. - -# Train on TPU? -use_tpu: false # Default is to run without TPU locally. -tpu: - num_shards: 1 - iterations: 100 - -# SGD / general learning hyperparameters. -learning: - max_step: 1000000 - learning_rate: 0.001 - decay_steps: 10000 - decay_factor: 1.00 - l2_reg_weight: 0.000001 - optimizer: 'adam' - -# Default metric learning loss hyperparameters. -triplet_semihard: - embedding_l2: true # Suggestion from Hyun Oh Song's slides. - margin: .2 # Default value for Facenet. -npairs: - embedding_l2: false # Suggestion from Hyun Oh Song's slides. -clustering_loss: - embedding_l2: true # Suggestion from Hyun Oh Song's slides. - margin: 1.0 # Default in deep_metric_learning. -lifted_struct: - embedding_l2: false # Suggestion from Hyun Oh Song's slides. - margin: 1.0 -contrastive: - embedding_l2: true # Suggestion from Hyun Oh Song's slides. - margin: 1.0 - -# Which method to use to train the embedding. -# Options are "mvtcn", "svtcn". -training_strategy: 'mvtcn' - -# Which embedder architecture to use. -# Options are 'inception_conv_ss_fc' (used in pouring / pose experiments), -# 'resnet'. -embedder_strategy: 'inception_conv_ss_fc' - -# Size of the TCN embedding. -embedding_size: 32 - -# Default hyperparameters for the different embedder architectures. -inception_conv_ss_fc: - pretrained_checkpoint: 'pretrained_checkpoints/inception/inception_v3.ckpt' - pretrained_layer: 'Mixed_5d' - additional_conv_sizes: [512, 512] - fc_hidden_sizes: [2048] - finetune: false - dropout: - keep_pretrained: 1.0 - keep_conv: 1.0 - keep_fc: 1.0 - -resnet: - pretrained_checkpoint: 'pretrained_checkpoints/resnet/resnet_v2_50.ckpt' - pretrained_layer: 4 - finetune: false - adaptation_blocks: '512_3-512_3' - emb_connection: 'conv' - fc_hidden_sizes: 'None' - dropout: - keep_pretrained: 1.0 - -# Loss hyperparameters. -mvtcn: - # Size of the window in timesteps to get random anchor-positive pairs for - # training. - window: 580 # 29fps * 20 seconds. - -svtcn: - pos_radius: 6 # 0.2 seconds * 29fps ~ 6 timesteps. - neg_radius: 12 # 2.0 * pos_radius. - -# Data configs. -data: - height: 299 - width: 299 - preprocessing: - # Strategy to use when cropping images at inference time. - # See preprocessing.py for options. - eval_cropping: 'crop_center' - # Training scale, color augmentation hyparameters. - augmentation: - # See preprocessing.py for a discussion of how to use these parameters. - minscale: 1.0 - maxscale: 1.0 - proportion_scaled_up: 0.5 - color: true - fast_mode: true - num_parallel_calls: 12 - sequence_prefetch_size: 12 - batch_prefetch_size: 12 - batch_size: 36 - eval_batch_size: 36 - embed_batch_size: 128 - -val: - recall_at_k_list: [1] - num_eval_samples: 1000 - eval_interval_secs: 300 - -logging: - summary: - image_summaries: false - save_summaries_steps: 100 - flush_secs: 600 - checkpoint: - num_to_keep: 0 # Keep all checkpoints. - save_checkpoints_steps: 1000 - secs: 1800 \ No newline at end of file diff --git a/research/tcn/configs/test_estimator.yml b/research/tcn/configs/test_estimator.yml deleted file mode 100644 index 4e4519477c0d723b7882d23130d13a5df22dbd84..0000000000000000000000000000000000000000 --- a/research/tcn/configs/test_estimator.yml +++ /dev/null @@ -1,29 +0,0 @@ -use_tpu: False -training_strategy: 'mvtcn' -loss_strategy: 'triplet_semihard' - -learning: - max_step: 2 - optimizer: 'adam' - -embedding_size: 8 - -data: - embed_batch_size: 12 - batch_size: 12 - examples_per_sequence: 12 - num_views: 2 - num_parallel_calls: 1 - sequence_prefetch_size: 1 - batch_prefetch_size: 1 - -logging: - summary: - image_summaries: false - save_summaries_steps: 100 - flush_secs: 600 - save_summaries_secs: 60 - checkpoint: - num_to_keep: 0 # Keep all checkpoints. - save_checkpoints_steps: 1000 - secs: 1800 \ No newline at end of file diff --git a/research/tcn/data_providers.py b/research/tcn/data_providers.py deleted file mode 100644 index aa2a5f4eb47cce5df83b46768b501430844a9cd5..0000000000000000000000000000000000000000 --- a/research/tcn/data_providers.py +++ /dev/null @@ -1,505 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Defines data providers used in training and evaluating TCNs.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import random -import numpy as np -import preprocessing -import tensorflow as tf - - -def record_dataset(filename): - """Generate a TFRecordDataset from a `filename`.""" - return tf.data.TFRecordDataset(filename) - - -def full_sequence_provider(file_list, num_views): - """Provides full preprocessed image sequences. - - Args: - file_list: List of strings, paths to TFRecords to preprocess. - num_views: Int, the number of simultaneous viewpoints at each timestep in - the dataset. - Returns: - preprocessed: A 4-D float32 `Tensor` holding a sequence of preprocessed - images. - raw_image_strings: A 2-D string `Tensor` holding a sequence of raw - jpeg-encoded image strings. - task: String, the name of the sequence. - seq_len: Int, the number of timesteps in the sequence. - """ - def _parse_sequence(x): - context, views, seq_len = parse_sequence_example(x, num_views) - task = context['task'] - return views, task, seq_len - - data_files = tf.contrib.slim.parallel_reader.get_data_files(file_list) - dataset = tf.data.Dataset.from_tensor_slices(data_files) - dataset = dataset.repeat(1) - # Get a dataset of sequences. - dataset = dataset.flat_map(record_dataset) - - # Build a dataset of TFRecord files. - dataset = dataset.repeat(1) - # Prefetch a number of opened files. - dataset = dataset.prefetch(12) - # Use _parse_sequence to deserialize (but not decode) image strings. - dataset = dataset.map(_parse_sequence, num_parallel_calls=12) - # Prefetch batches of images. - dataset = dataset.prefetch(12) - dataset = dataset.make_one_shot_iterator() - views, task, seq_len = dataset.get_next() - return views, task, seq_len - - -def parse_labeled_example( - example_proto, view_index, preprocess_fn, image_attr_keys, label_attr_keys): - """Parses a labeled test example from a specified view. - - Args: - example_proto: A scalar string Tensor. - view_index: Int, index on which view to parse. - preprocess_fn: A function with the signature (raw_images, is_training) -> - preprocessed_images, where raw_images is a 4-D float32 image `Tensor` - of raw images, is_training is a Boolean describing if we're in training, - and preprocessed_images is a 4-D float32 image `Tensor` holding - preprocessed images. - image_attr_keys: List of Strings, names for image keys. - label_attr_keys: List of Strings, names for label attributes. - Returns: - data: A tuple of images, attributes and tasks `Tensors`. - """ - features = {} - for attr_key in image_attr_keys: - features[attr_key] = tf.FixedLenFeature((), tf.string) - for attr_key in label_attr_keys: - features[attr_key] = tf.FixedLenFeature((), tf.int64) - parsed_features = tf.parse_single_example(example_proto, features) - image_only_keys = [i for i in image_attr_keys if 'image' in i] - view_image_key = image_only_keys[view_index] - image = preprocessing.decode_image(parsed_features[view_image_key]) - preprocessed = preprocess_fn(image, is_training=False) - attributes = [parsed_features[k] for k in label_attr_keys] - task = parsed_features['task'] - return tuple([preprocessed] + attributes + [task]) - - -def labeled_data_provider( - filenames, preprocess_fn, view_index, image_attr_keys, label_attr_keys, - batch_size=32, num_epochs=1): - """Gets a batched dataset iterator over annotated test images + labels. - - Provides a single view, specifed in `view_index`. - - Args: - filenames: List of Strings, paths to tfrecords on disk. - preprocess_fn: A function with the signature (raw_images, is_training) -> - preprocessed_images, where raw_images is a 4-D float32 image `Tensor` - of raw images, is_training is a Boolean describing if we're in training, - and preprocessed_images is a 4-D float32 image `Tensor` holding - preprocessed images. - view_index: Int, the index of the view to embed. - image_attr_keys: List of Strings, names for image keys. - label_attr_keys: List of Strings, names for label attributes. - batch_size: Int, size of the batch. - num_epochs: Int, number of epochs over the classification dataset. - Returns: - batch_images: 4-d float `Tensor` holding the batch images for the view. - labels: K-d int `Tensor` holding the K label attributes. - tasks: 1-D String `Tensor`, holding the task names for each batch element. - """ - dataset = tf.data.TFRecordDataset(filenames) - # pylint: disable=g-long-lambda - dataset = dataset.map( - lambda p: parse_labeled_example( - p, view_index, preprocess_fn, image_attr_keys, label_attr_keys)) - dataset = dataset.repeat(num_epochs) - dataset = dataset.batch(batch_size) - data_iterator = dataset.make_one_shot_iterator() - batch_data = data_iterator.get_next() - batch_images = batch_data[0] - - batch_labels = tf.stack(batch_data[1:-1], 1) - - batch_tasks = batch_data[-1] - - batch_images = set_image_tensor_batch_dim(batch_images, batch_size) - batch_labels.set_shape([batch_size, len(label_attr_keys)]) - batch_tasks.set_shape([batch_size]) - - return batch_images, batch_labels, batch_tasks - - -def parse_sequence_example(serialized_example, num_views): - """Parses a serialized sequence example into views, sequence length data.""" - context_features = { - 'task': tf.FixedLenFeature(shape=[], dtype=tf.string), - 'len': tf.FixedLenFeature(shape=[], dtype=tf.int64) - } - view_names = ['view%d' % i for i in range(num_views)] - fixed_features = [ - tf.FixedLenSequenceFeature( - shape=[], dtype=tf.string) for _ in range(len(view_names))] - sequence_features = dict(zip(view_names, fixed_features)) - context_parse, sequence_parse = tf.parse_single_sequence_example( - serialized=serialized_example, - context_features=context_features, - sequence_features=sequence_features) - views = tf.stack([sequence_parse[v] for v in view_names]) - lens = [sequence_parse[v].get_shape().as_list()[0] for v in view_names] - assert len(set(lens)) == 1 - seq_len = tf.shape(sequence_parse[view_names[-1]])[0] - return context_parse, views, seq_len - - -def get_shuffled_input_records(file_list): - """Build a tf.data.Dataset of shuffled input TFRecords that repeats.""" - dataset = tf.data.Dataset.from_tensor_slices(file_list) - dataset = dataset.shuffle(len(file_list)) - dataset = dataset.repeat() - dataset = dataset.flat_map(record_dataset) - dataset = dataset.repeat() - return dataset - - -def get_tcn_anchor_pos_indices(seq_len, num_views, num_pairs, window): - """Gets batch TCN anchor positive timestep and view indices. - - This gets random (anchor, positive) timesteps from a sequence, and chooses - 2 random differing viewpoints for each anchor positive pair. - - Args: - seq_len: Int, the size of the batch sequence in timesteps. - num_views: Int, the number of simultaneous viewpoints at each timestep. - num_pairs: Int, the number of pairs to build. - window: Int, the window (in frames) from which to take anchor, positive - and negative indices. - Returns: - ap_time_indices: 1-D Int `Tensor` with size [num_pairs], holding the - timestep for each (anchor,pos) pair. - a_view_indices: 1-D Int `Tensor` with size [num_pairs], holding the - view index for each anchor. - p_view_indices: 1-D Int `Tensor` with size [num_pairs], holding the - view index for each positive. - """ - # Get anchor, positive time indices. - def f1(): - # Choose a random window-length range from the sequence. - range_min = tf.random_shuffle(tf.range(seq_len-window))[0] - range_max = range_min+window - return tf.range(range_min, range_max) - def f2(): - # Consider the full sequence. - return tf.range(seq_len) - time_indices = tf.cond(tf.greater(seq_len, window), f1, f2) - shuffled_indices = tf.random_shuffle(time_indices) - num_pairs = tf.minimum(seq_len, num_pairs) - ap_time_indices = shuffled_indices[:num_pairs] - - # Get opposing anchor, positive view indices. - view_indices = tf.tile( - tf.expand_dims(tf.range(num_views), 0), (num_pairs, 1)) - shuffled_view_indices = tf.map_fn(tf.random_shuffle, view_indices) - a_view_indices = shuffled_view_indices[:, 0] - p_view_indices = shuffled_view_indices[:, 1] - return ap_time_indices, a_view_indices, p_view_indices - - -def set_image_tensor_batch_dim(tensor, batch_dim): - """Sets the batch dimension on an image tensor.""" - shape = tensor.get_shape() - tensor.set_shape([batch_dim, shape[1], shape[2], shape[3]]) - return tensor - - -def parse_sequence_to_pairs_batch( - serialized_example, preprocess_fn, is_training, num_views, batch_size, - window): - """Parses a serialized sequence example into a batch of preprocessed data. - - Args: - serialized_example: A serialized SequenceExample. - preprocess_fn: A function with the signature (raw_images, is_training) -> - preprocessed_images. - is_training: Boolean, whether or not we're in training. - num_views: Int, the number of simultaneous viewpoints at each timestep in - the dataset. - batch_size: Int, size of the batch to get. - window: Int, only take pairs from a maximium window of this size. - Returns: - preprocessed: A 4-D float32 `Tensor` holding preprocessed images. - anchor_images: A 4-D float32 `Tensor` holding raw anchor images. - pos_images: A 4-D float32 `Tensor` holding raw positive images. - """ - _, views, seq_len = parse_sequence_example(serialized_example, num_views) - - # Get random (anchor, positive) timestep and viewpoint indices. - num_pairs = batch_size // 2 - ap_time_indices, a_view_indices, p_view_indices = get_tcn_anchor_pos_indices( - seq_len, num_views, num_pairs, window) - - # Gather the image strings. - combined_anchor_indices = tf.concat( - [tf.expand_dims(a_view_indices, 1), - tf.expand_dims(ap_time_indices, 1)], 1) - combined_pos_indices = tf.concat( - [tf.expand_dims(p_view_indices, 1), - tf.expand_dims(ap_time_indices, 1)], 1) - anchor_images = tf.gather_nd(views, combined_anchor_indices) - pos_images = tf.gather_nd(views, combined_pos_indices) - - # Decode images. - anchor_images = tf.map_fn( - preprocessing.decode_image, anchor_images, dtype=tf.float32) - pos_images = tf.map_fn( - preprocessing.decode_image, pos_images, dtype=tf.float32) - - # Concatenate [anchor, postitive] images into a batch and preprocess it. - concatenated = tf.concat([anchor_images, pos_images], 0) - preprocessed = preprocess_fn(concatenated, is_training) - anchor_prepro, positive_prepro = tf.split(preprocessed, num_or_size_splits=2, - axis=0) - - # Set static batch dimensions for all image tensors - ims = [anchor_prepro, positive_prepro, anchor_images, pos_images] - ims = [set_image_tensor_batch_dim(i, num_pairs) for i in ims] - [anchor_prepro, positive_prepro, anchor_images, pos_images] = ims - - # Assign each anchor and positive the same label. - anchor_labels = tf.range(1, num_pairs+1) - positive_labels = tf.range(1, num_pairs+1) - - return (anchor_prepro, positive_prepro, anchor_images, pos_images, - anchor_labels, positive_labels, seq_len) - - -def multiview_pairs_provider(file_list, - preprocess_fn, - num_views, - window, - is_training, - batch_size, - examples_per_seq=2, - num_parallel_calls=12, - sequence_prefetch_size=12, - batch_prefetch_size=12): - """Provides multi-view TCN anchor-positive image pairs. - - Returns batches of Multi-view TCN pairs, where each pair consists of an - anchor and a positive coming from different views from the same timestep. - Batches are filled one entire sequence at a time until - batch_size is exhausted. Pairs are chosen randomly without replacement - within a sequence. - - Used by: - * triplet semihard loss. - * clustering loss. - * npairs loss. - * lifted struct loss. - * contrastive loss. - - Args: - file_list: List of Strings, paths to tfrecords. - preprocess_fn: A function with the signature (raw_images, is_training) -> - preprocessed_images, where raw_images is a 4-D float32 image `Tensor` - of raw images, is_training is a Boolean describing if we're in training, - and preprocessed_images is a 4-D float32 image `Tensor` holding - preprocessed images. - num_views: Int, the number of simultaneous viewpoints at each timestep. - window: Int, size of the window (in frames) from which to draw batch ids. - is_training: Boolean, whether or not we're in training. - batch_size: Int, how many examples in the batch (num pairs * 2). - examples_per_seq: Int, how many examples to take per sequence. - num_parallel_calls: Int, the number of elements to process in parallel by - mapper. - sequence_prefetch_size: Int, size of the buffer used to prefetch sequences. - batch_prefetch_size: Int, size of the buffer used to prefetch batches. - Returns: - batch_images: A 4-D float32 `Tensor` holding preprocessed batch images. - anchor_labels: A 1-D int32 `Tensor` holding anchor image labels. - anchor_images: A 4-D float32 `Tensor` holding raw anchor images. - positive_labels: A 1-D int32 `Tensor` holding positive image labels. - pos_images: A 4-D float32 `Tensor` holding raw positive images. - """ - def _parse_sequence(x): - return parse_sequence_to_pairs_batch( - x, preprocess_fn, is_training, num_views, examples_per_seq, window) - - # Build a buffer of shuffled input TFRecords that repeats forever. - dataset = get_shuffled_input_records(file_list) - - # Prefetch a number of opened TFRecords. - dataset = dataset.prefetch(sequence_prefetch_size) - - # Use _parse_sequence to map sequences to batches (one sequence per batch). - dataset = dataset.map( - _parse_sequence, num_parallel_calls=num_parallel_calls) - - # Filter out sequences that don't have at least examples_per_seq. - def seq_greater_than_min(seqlen, maximum): - return seqlen >= maximum - filter_fn = functools.partial(seq_greater_than_min, maximum=examples_per_seq) - dataset = dataset.filter(lambda a, b, c, d, e, f, seqlen: filter_fn(seqlen)) - - # Take a number of sequences for the batch. - assert batch_size % examples_per_seq == 0 - sequences_per_batch = batch_size // examples_per_seq - dataset = dataset.batch(sequences_per_batch) - - # Prefetch batches of images. - dataset = dataset.prefetch(batch_prefetch_size) - - iterator = dataset.make_one_shot_iterator() - data = iterator.get_next() - - # Pull out images, reshape to [batch_size, ...], concatenate anchor and pos. - ims = list(data[:4]) - anchor_labels, positive_labels = data[4:6] - - # Set labels shape. - anchor_labels.set_shape([sequences_per_batch, None]) - positive_labels.set_shape([sequences_per_batch, None]) - - def _reshape_to_batchsize(im): - """[num_sequences, num_per_seq, ...] images to [batch_size, ...].""" - sequence_ims = tf.split(im, num_or_size_splits=sequences_per_batch, axis=0) - sequence_ims = [tf.squeeze(i) for i in sequence_ims] - return tf.concat(sequence_ims, axis=0) - - # Reshape labels. - anchor_labels = _reshape_to_batchsize(anchor_labels) - positive_labels = _reshape_to_batchsize(positive_labels) - - def _set_shape(im): - """Sets a static shape for an image tensor of [sequences_per_batch,...] .""" - shape = im.get_shape() - im.set_shape([sequences_per_batch, shape[1], shape[2], shape[3], shape[4]]) - return im - ims = [_set_shape(im) for im in ims] - ims = [_reshape_to_batchsize(im) for im in ims] - - anchor_prepro, positive_prepro, anchor_images, pos_images = ims - batch_images = tf.concat([anchor_prepro, positive_prepro], axis=0) - - return batch_images, anchor_labels, positive_labels, anchor_images, pos_images - - -def get_svtcn_indices(seq_len, batch_size, num_views): - """Gets a random window of contiguous time indices from a sequence. - - Args: - seq_len: Int, number of timesteps in the image sequence. - batch_size: Int, size of the batch to construct. - num_views: Int, the number of simultaneous viewpoints at each - timestep in the dataset. - - Returns: - time_indices: 1-D Int `Tensor` with size [batch_size], holding the - timestep for each batch image. - view_indices: 1-D Int `Tensor` with size [batch_size], holding the - view for each batch image. This is consistent across the batch. - """ - # Get anchor, positive time indices. - def f1(): - # Choose a random contiguous range from within the sequence. - range_min = tf.random_shuffle(tf.range(seq_len-batch_size))[0] - range_max = range_min+batch_size - return tf.range(range_min, range_max) - def f2(): - # Consider the full sequence. - return tf.range(seq_len) - time_indices = tf.cond(tf.greater(seq_len, batch_size), f1, f2) - # Get opposing anchor, positive view indices. - random_view = tf.random_shuffle(tf.range(num_views))[0] - view_indices = tf.tile([random_view], (batch_size,)) - return time_indices, view_indices - - -def parse_sequence_to_svtcn_batch( - serialized_example, preprocess_fn, is_training, num_views, batch_size): - """Parses a serialized sequence example into a batch of SVTCN data.""" - _, views, seq_len = parse_sequence_example(serialized_example, num_views) - # Get svtcn indices. - time_indices, view_indices = get_svtcn_indices(seq_len, batch_size, num_views) - combined_indices = tf.concat( - [tf.expand_dims(view_indices, 1), - tf.expand_dims(time_indices, 1)], 1) - - # Gather the image strings. - images = tf.gather_nd(views, combined_indices) - - # Decode images. - images = tf.map_fn(preprocessing.decode_image, images, dtype=tf.float32) - - # Concatenate anchor and postitive images, preprocess the batch. - preprocessed = preprocess_fn(images, is_training) - - return preprocessed, images, time_indices - - -def singleview_tcn_provider(file_list, - preprocess_fn, - num_views, - is_training, - batch_size, - num_parallel_calls=12, - sequence_prefetch_size=12, - batch_prefetch_size=12): - """Provides data to train singleview TCNs. - - Args: - file_list: List of Strings, paths to tfrecords. - preprocess_fn: A function with the signature (raw_images, is_training) -> - preprocessed_images, where raw_images is a 4-D float32 image `Tensor` - of raw images, is_training is a Boolean describing if we're in training, - and preprocessed_images is a 4-D float32 image `Tensor` holding - preprocessed images. - num_views: Int, the number of simultaneous viewpoints at each timestep. - is_training: Boolean, whether or not we're in training. - batch_size: Int, how many examples in the batch. - num_parallel_calls: Int, the number of elements to process in parallel by - mapper. - sequence_prefetch_size: Int, size of the buffer used to prefetch sequences. - batch_prefetch_size: Int, size of the buffer used to prefetch batches. - - Returns: - batch_images: A 4-D float32 `Tensor` of preprocessed images. - raw_images: A 4-D float32 `Tensor` of raw images. - timesteps: A 1-D int32 `Tensor` of timesteps associated with each image. - """ - def _parse_sequence(x): - return parse_sequence_to_svtcn_batch( - x, preprocess_fn, is_training, num_views, batch_size) - - # Build a buffer of shuffled input TFRecords that repeats forever. - dataset = get_shuffled_input_records(file_list) - - # Prefetch a number of opened files. - dataset = dataset.prefetch(sequence_prefetch_size) - - # Use _parse_sequence to map sequences to image batches. - dataset = dataset.map( - _parse_sequence, num_parallel_calls=num_parallel_calls) - - # Prefetch batches of images. - dataset = dataset.prefetch(batch_prefetch_size) - dataset = dataset.make_one_shot_iterator() - batch_images, raw_images, timesteps = dataset.get_next() - return batch_images, raw_images, timesteps diff --git a/research/tcn/data_providers_test.py b/research/tcn/data_providers_test.py deleted file mode 100644 index e501231022d3ddb1401bbbfa55518e357890b4fd..0000000000000000000000000000000000000000 --- a/research/tcn/data_providers_test.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for data_providers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import data_providers -import tensorflow as tf - - -class DataTest(tf.test.TestCase): - - def testMVTripletIndices(self): - """Ensures anchor/pos indices for a TCN batch are valid.""" - tf.set_random_seed(0) - window = 580 - batch_size = 36 - num_pairs = batch_size // 2 - num_views = 2 - seq_len = 600 - # Get anchor time and view indices for this sequence. - (_, a_view_indices, - p_view_indices) = data_providers.get_tcn_anchor_pos_indices( - seq_len, num_views, num_pairs, window) - with self.test_session() as sess: - (np_a_view_indices, - np_p_view_indices) = sess.run([a_view_indices, p_view_indices]) - - # Assert no overlap between anchor and pos view indices. - np.testing.assert_equal( - np.any(np.not_equal(np_a_view_indices, np_p_view_indices)), True) - - # Assert set of view indices is a subset of expected set of view indices. - view_set = set(range(num_views)) - self.assertTrue(set(np_a_view_indices).issubset(view_set)) - self.assertTrue(set(np_p_view_indices).issubset(view_set)) - - def testSVTripletIndices(self): - """Ensures time indices for a SV triplet batch are valid.""" - seq_len = 600 - batch_size = 36 - num_views = 2 - time_indices, _ = data_providers.get_svtcn_indices( - seq_len, batch_size, num_views) - with self.test_session() as sess: - np_time_indices = sess.run(time_indices) - first = np_time_indices[0] - last = np_time_indices[-1] - # Make sure batch time indices are a contiguous range. - self.assertTrue(np.array_equal(np_time_indices, range(first, last+1))) - -if __name__ == "__main__": - tf.test.main() diff --git a/research/tcn/dataset/images_to_videos.py b/research/tcn/dataset/images_to_videos.py deleted file mode 100644 index ad1a7387e7bf73b2e2d42b364f5f3a86c575e7cf..0000000000000000000000000000000000000000 --- a/research/tcn/dataset/images_to_videos.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Converts temp directories of images to videos.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import os -import shutil -# pylint: disable=invalid-name - -parser = argparse.ArgumentParser() -parser.add_argument( - '--view_dirs', type=str, default='', - help='Comma-separated list of temp view image directories.') -parser.add_argument( - '--vid_paths', type=str, default='', - help='Comma-separated list of video output paths.') -parser.add_argument( - '--debug_path', type=str, default='', - help='Output path to debug video.') - -parser.add_argument( - '--debug_lhs_view', type=str, default='', - help='Output path to debug video.') -parser.add_argument( - '--debug_rhs_view', type=str, default='', - help='Output path to debug video.') - - -def create_vids(view_dirs, vid_paths, debug_path=None, - debug_lhs_view=0, debug_rhs_view=1): - """Creates one video per view per sequence.""" - - # Create the view videos. - for (view_dir, vidpath) in zip(view_dirs, vid_paths): - encode_vid_cmd = r'mencoder mf://%s/*.png \ - -mf fps=29:type=png \ - -ovc lavc -lavcopts vcodec=mpeg4:mbd=2:trell \ - -oac copy -o %s' % (view_dir, vidpath) - os.system(encode_vid_cmd) - - # Optionally create a debug side-by-side video. - if debug_path: - lhs = vid_paths[int(debug_lhs_view)] - rhs = vid_paths[int(debug_rhs_view)] - os.system(r"avconv \ - -i %s \ - -i %s \ - -filter_complex '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]' \ - -map [vid] \ - -c:v libx264 \ - -crf 23 \ - -preset veryfast \ - %s" % (lhs, rhs, debug_path)) - - -def main(): - FLAGS, _ = parser.parse_known_args() - assert FLAGS.view_dirs - assert FLAGS.vid_paths - view_dirs = FLAGS.view_dirs.split(',') - vid_paths = FLAGS.vid_paths.split(',') - create_vids(view_dirs, vid_paths, FLAGS.debug_path, - FLAGS.debug_lhs_view, FLAGS.debug_rhs_view) - - # Cleanup temp image dirs. - for i in view_dirs: - shutil.rmtree(i) - -if __name__ == '__main__': - main() diff --git a/research/tcn/dataset/videos_to_tfrecords.py b/research/tcn/dataset/videos_to_tfrecords.py deleted file mode 100644 index a17411f369286603c43c9adb522f06316cf97ab3..0000000000000000000000000000000000000000 --- a/research/tcn/dataset/videos_to_tfrecords.py +++ /dev/null @@ -1,458 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Converts videos to training, validation, test, and debug tfrecords on cns. - -Example usage: - -# From phone videos. -x=learning/brain/research/tcn/videos_to_tfrecords && \ -blaze build -c opt $x && \ -set=tmp && videos=~/data/tcn/datasets/$set/ && \ -blaze-bin/$x --logtostderr --output_dir /cns/oi-d/home/$USER/tcn_data/$set \ ---input_dir $videos/train ---debug $dataset/debug --rotate 90 --max_per_shard 400 - -# From webcam videos. -mode=train -x=learning/brain/research/tcn/videos_to_tfrecords && \ -blaze build -c opt $x && \ -set=tmp && videos=/tmp/tcn/videos/$set/ && \ -blaze-bin/$x --logtostderr \ ---output_dir /cns/oi-d/home/$USER/tcn_data/$set/$mode \ ---input_dir $videos/$mode --max_per_shard 400 - -""" -import glob -import math -import multiprocessing -from multiprocessing.pool import ThreadPool -import os -from random import shuffle -import re -from StringIO import StringIO -import cv2 -from PIL import Image -from PIL import ImageFile -from preprocessing import cv2resizeminedge -from preprocessing import cv2rotateimage -from preprocessing import shapestring -from utils.progress import Progress -import tensorflow.google as tf -tf.logging.set_verbosity(tf.logging.INFO) - - -tf.app.flags.DEFINE_string('view_pattern', '_view[_]*[0]+[.].*', - 'view regexp pattern for first view') -tf.app.flags.DEFINE_string('input_dir', '', '''input data path''') -tf.app.flags.DEFINE_integer('resize_min_edge', 0, - '''resize the smallest edge to this size.''') -tf.app.flags.DEFINE_integer('rotate', 0, '''rotate the image in degrees.''') -tf.app.flags.DEFINE_string('rotate_if_matching', None, - 'rotate only if video path matches regexp.') -tf.app.flags.DEFINE_string('output_dir', '', 'output directory for the dataset') -tf.app.flags.DEFINE_integer( - 'max_per_shard', -1, 'max # of frames per data chunk') -tf.app.flags.DEFINE_integer('expected_views', 2, 'expected number of views') -tf.app.flags.DEFINE_integer('log_frequency', 50, 'frequency of logging') -tf.app.flags.DEFINE_integer( - 'max_views_discrepancy', 100, - 'Maximum length difference (in frames) allowed between views') -tf.app.flags.DEFINE_boolean('overwrite', False, 'overwrite output files') -FLAGS = tf.app.flags.FLAGS - -feature = tf.train.Feature -bytes_feature = lambda v: feature(bytes_list=tf.train.BytesList(value=v)) -int64_feature = lambda v: feature(int64_list=tf.train.Int64List(value=v)) -float_feature = lambda v: feature(float_list=tf.train.FloatList(value=v)) - - -def FindPatternFiles(path, view_pattern, errors): - """Recursively find all files matching a certain pattern.""" - if not path: - return None - tf.logging.info( - 'Recursively searching for files matching pattern \'%s\' in %s' % - (view_pattern, path)) - view_patt = re.compile('.*' + view_pattern) - sequences = [] - for root, _, filenames in os.walk(path, followlinks=True): - path_root = root[:len(path)] - assert path_root == path - - for filename in filenames: - if view_patt.match(filename): - fullpath = os.path.join(root, re.sub(view_pattern, '', filename)) - shortpath = re.sub(path, '', fullpath).lstrip('/') - - # Determine if this sequence should be sharded or not. - shard = False - if FLAGS.max_per_shard > 0: - shard = True - - # Retrieve number of frames for this sequence. - num_views, length, view_paths, num_frames = GetViewInfo( - fullpath + view_pattern[0] + '*') - if num_views != FLAGS.expected_views: - tf.logging.info('Expected %d views but found: %s' % - (FLAGS.expected_views, str(view_paths))) - assert num_views == FLAGS.expected_views - assert length > 0 - # Drop sequences if view lengths differ too much. - if max(num_frames) - min(num_frames) > FLAGS.max_views_discrepancy: - error_msg = ( - 'Error: ignoring sequence with views with length difference > %d:' - '%s in %s') % (FLAGS.max_views_discrepancy, str(num_frames), - fullpath) - errors.append(error_msg) - tf.logging.error(error_msg) - else: - # Append sequence info. - sequences.append({'full': fullpath, 'name': shortpath, 'len': length, - 'start': 0, 'end': length, 'num_views': num_views, - 'shard': shard}) - return sorted(sequences, key=lambda k: k['name']) - - -def ShardSequences(sequences, max_per_shard): - """Find all sequences, shard and randomize them.""" - total_shards_len = 0 - total_shards = 0 - assert max_per_shard > 0 - for sequence in sequences: - if sequence['shard']: - sequence['shard'] = False # Reset shard flag. - length = sequence['len'] - start = sequence['start'] - end = sequence['end'] - name = sequence['name'] - assert end - start == length - if length > max_per_shard: - # Dividing sequence into smaller shards. - num_shards = int(math.floor(length / max_per_shard)) + 1 - size = int(math.ceil(length / num_shards)) - tf.logging.info( - 'splitting sequence of length %d into %d shards of size %d' % - (length, num_shards, size)) - last_end = 0 - for i in range(num_shards): - shard_start = last_end - shard_end = min(length, shard_start + size) - if i == num_shards - 1: - shard_end = length - shard_len = shard_end - shard_start - total_shards_len += shard_len - shard_name = name + '_shard%02d' % i - last_end = shard_end - - # Enqueuing shard. - if i == 0: # Replace current sequence. - sequence['len'] = shard_len - sequence['start'] = shard_start - sequence['end'] = shard_end - sequence['name'] = shard_name - else: # Enqueue new sequence. - sequences.append( - {'full': sequence['full'], 'name': shard_name, - 'len': shard_len, 'start': shard_start, 'end': shard_end, - 'num_views': sequence['num_views'], 'shard': False}) - - total_shards += num_shards - assert last_end == length - - # Print resulting sharding. - if total_shards > 0: - tf.logging.info('%d shards of average length %d' % - (total_shards, total_shards_len / total_shards)) - return sorted(sequences, key=lambda k: k['name']) - - -def RandomizeSets(sets): - """Randomize each set.""" - for _, sequences in sorted(sets.iteritems()): - if sequences: - # Randomize order. - shuffle(sequences) - - -def GetSpecificFrame(vid_path, frame_index): - """Gets a frame at a specified index in a video.""" - cap = cv2.VideoCapture(vid_path) - cap.set(1, frame_index) - _, bgr = cap.read() - cap.release() - rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) - return rgb - - -def JpegString(image, jpeg_quality=90): - """Returns given PIL.Image instance as jpeg string. - - Args: - image: A PIL image. - jpeg_quality: The image quality, on a scale from 1 (worst) to 95 (best). - - Returns: - a jpeg_string. - """ - # This fix to PIL makes sure that we don't get an error when saving large - # jpeg files. This is a workaround for a bug in PIL. The value should be - # substantially larger than the size of the image being saved. - ImageFile.MAXBLOCK = 640 * 512 * 64 - - output_jpeg = StringIO() - image.save(output_jpeg, 'jpeg', quality=jpeg_quality, optimize=True) - return output_jpeg.getvalue() - - -def ParallelPreprocessing(args): - """Parallel preprocessing: rotation, resize and jpeg encoding to string.""" - (vid_path, timestep, num_timesteps, view) = args - try: - image = GetSpecificFrame(vid_path, timestep) - - # Resizing. - resize_str = '' - if FLAGS.resize_min_edge > 0: - resize_str += ', resize ' + shapestring(image) - image = cv2resizeminedge(image, FLAGS.resize_min_edge) - resize_str += ' => ' + shapestring(image) - - # Rotating. - rotate = None - if FLAGS.rotate: - rotate = FLAGS.rotate - if FLAGS.rotate_if_matching is not None: - rotate = None - patt = re.compile(FLAGS.rotate_if_matching) - if patt.match(vid_path) is not None: - rotate = FLAGS.rotate - if rotate is not None: - image = cv2rotateimage(image, FLAGS.rotate) - - # Jpeg encoding. - image = Image.fromarray(image) - im_string = bytes_feature([JpegString(image)]) - - if timestep % FLAGS.log_frequency == 0: - tf.logging.info('Loaded frame %d / %d for %s (rotation %s%s) from %s' % - (timestep, num_timesteps, view, str(rotate), resize_str, - vid_path)) - return im_string - except cv2.error as e: - tf.logging.error('Error while loading frame %d of %s: %s' % - (timestep, vid_path, str(e))) - return None - - -def GetNumFrames(vid_path): - """Gets the number of frames in a video.""" - cap = cv2.VideoCapture(vid_path) - total_frames = cap.get(7) - cap.release() - return int(total_frames) - - -def GetViewInfo(views_fullname): - """Return information about a group of views.""" - view_paths = sorted(glob.glob(views_fullname)) - num_frames = [GetNumFrames(i) for i in view_paths] - min_num_frames = min(num_frames) - num_views = len(view_paths) - return num_views, min_num_frames, view_paths, num_frames - - -def AddSequence(sequence, writer, progress, errors): - """Converts a sequence to a SequenceExample. - - Sequences have multiple viewpoint videos. Extract all frames from all - viewpoint videos in parallel, build a single SequenceExample containing - all viewpoint images for every timestep. - - Args: - sequence: a dict with information on a sequence. - writer: A TFRecordWriter. - progress: A Progress object to report processing progress. - errors: a list of string to append to in case of errors. - """ - fullname = sequence['full'] - shortname = sequence['name'] - start = sequence['start'] - end = sequence['end'] - num_timesteps = sequence['len'] - - # Build a list of all view paths for this fullname. - path = fullname + FLAGS.view_pattern[0] + '*' - tf.logging.info('Loading sequence from ' + path) - view_paths = sorted(glob.glob(path)) - # Extract all images for all views - num_frames = [GetNumFrames(i) for i in view_paths] - tf.logging.info('Loading %s with [%d, %d[ (%d frames) from: %s %s' % - (shortname, start, end, num_timesteps, - str(num_frames), str(view_paths))) - num_views = len(view_paths) - total_timesteps = int(min(num_frames)) - assert num_views == FLAGS.expected_views - assert num_views == sequence['num_views'] - - # Create a worker pool to parallelize loading/rotating - worker_pool = ThreadPool(multiprocessing.cpu_count()) - - # Collect all images for each view. - view_to_feature_list = {} - view_images = [] - for view_idx, view in enumerate( - ['view'+str(i) for i in range(num_views)]): - # Flatten list to process in parallel - work = [] - for i in range(start, end): - work.append((view_paths[view_idx], i, total_timesteps, view)) - # Load and rotate images in parallel - view_images.append(worker_pool.map(ParallelPreprocessing, work)) - # Report progress. - progress.Add(len(view_images[view_idx])) - tf.logging.info('%s' % str(progress)) - - # Remove error frames from all views - i = start - num_errors = 0 - while i < len(view_images[0]): - remove_frame = False - # Check if one or more views have an error for this frame. - for view_idx in range(num_views): - if view_images[view_idx][i] is None: - remove_frame = True - error_msg = 'Removing frame %d for all views for %s ' % (i, fullname) - errors.append(error_msg) - tf.logging.error(error_msg) - # Remove faulty frames. - if remove_frame: - num_errors += 1 - for view_idx in range(num_views): - del view_images[view_idx][i] - else: - i += 1 - - # Ignore sequences that have errors. - if num_errors > 0: - error_msg = 'Dropping sequence because of frame errors for %s' % fullname - errors.append(error_msg) - tf.logging.error(error_msg) - else: - # Build FeatureList objects for each view. - for view_idx, view in enumerate( - ['view'+str(i) for i in range(num_views)]): - # Construct FeatureList from repeated feature. - view_to_feature_list[view] = tf.train.FeatureList( - feature=view_images[view_idx]) - - context_features = tf.train.Features(feature={ - 'task': bytes_feature([shortname]), - 'len': int64_feature([num_timesteps]) - }) - feature_lists = tf.train.FeatureLists(feature_list=view_to_feature_list) - ex = tf.train.SequenceExample( - context=context_features, feature_lists=feature_lists) - writer.write(ex.SerializeToString()) - tf.logging.info('Done adding %s with %d timesteps' - % (fullname, num_timesteps)) - - -def PrintSequencesInfo(sequences, prefix): - """Print information about sequences and return the total number of frames.""" - tf.logging.info('') - tf.logging.info(prefix) - num_frames = 0 - for sequence in sequences: - shard_str = '' - if sequence['shard']: - shard_str = ' (sharding)' - tf.logging.info('frames [%d, %d[\t(%d frames * %d views)%s\t%s' % ( - sequence['start'], sequence['end'], sequence['len'], - sequence['num_views'], shard_str, sequence['name'])) - num_frames += sequence['len'] * sequence['num_views'] - tf.logging.info(('%d frames (all views), %d sequences, average sequence' - ' length (all views): %d') % - (num_frames, len(sequences), num_frames / len(sequences))) - tf.logging.info('') - return num_frames - - -def CheckRecord(filename, sequence): - """Check that an existing tfrecord corresponds to the expected sequence.""" - num_sequences = 0 - total_frames = 0 - for serialized_example in tf.python_io.tf_record_iterator(filename): - num_sequences += 1 - example = tf.train.SequenceExample() - example.ParseFromString(serialized_example) - length = example.context.feature['len'].int64_list.value[0] - name = example.context.feature['task'].bytes_list.value[0] - total_frames += len(example.feature_lists.feature_list) * length - if sequence['name'] != name or sequence['len'] != length: - return False, total_frames - if num_sequences == 0: - return False, total_frames - return True, total_frames - - -def AddSequences(): - """Creates one training, validation.""" - errors = [] - - # Generate datasets file lists. - sequences = FindPatternFiles(FLAGS.input_dir, FLAGS.view_pattern, errors) - num_frames = PrintSequencesInfo(sequences, - 'Found the following datasets and files:') - - # Sharding and randomizing sets. - if FLAGS.max_per_shard > 0: - sequences = ShardSequences(sequences, FLAGS.max_per_shard) - num_frames = PrintSequencesInfo(sequences, 'After sharding:') - tf.logging.info('') - - # Process sets. - progress = Progress(num_frames) - output_list = [] - for sequence in sequences: - record_name = os.path.join( - FLAGS.output_dir, '%s.tfrecord' % sequence['name']) - if tf.gfile.Exists(record_name) and not FLAGS.overwrite: - ok, num_frames = CheckRecord(record_name, sequence) - if ok: - progress.Add(num_frames) - tf.logging.info('Skipping existing output file: %s' % record_name) - continue - else: - tf.logging.info('File does not match sequence, reprocessing...') - output_dir = os.path.dirname(record_name) - if not tf.gfile.Exists(output_dir): - tf.logging.info('Creating output directory: %s' % output_dir) - tf.gfile.MakeDirs(output_dir) - output_list.append(record_name) - tf.logging.info('Writing to ' + record_name) - writer = tf.python_io.TFRecordWriter(record_name) - AddSequence(sequence, writer, progress, errors) - writer.close() - tf.logging.info('Wrote dataset files: ' + str(output_list)) - tf.logging.info('All errors (%d): %s' % (len(errors), str(errors))) - - -def main(_): - AddSequences() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/tcn/dataset/webcam.py b/research/tcn/dataset/webcam.py deleted file mode 100644 index 962813082a618a4b0fd970f94f04441ca69feb37..0000000000000000000000000000000000000000 --- a/research/tcn/dataset/webcam.py +++ /dev/null @@ -1,491 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Collect images from multiple simultaneous webcams. - -Usage: - -1. Define some environment variables that describe what you're collecting. -dataset=your_dataset_name -mode=train -num_views=2 -viddir=/tmp/tcn/videos -tmp_imagedir=/tmp/tcn/tmp_images -debug_vids=1 - -2. Run the script. -export DISPLAY=:0.0 && \ -root=learning/brain/research/tcn && \ -bazel build -c opt --copt=-mavx tcn/webcam && \ -bazel-bin/tcn/webcam \ ---dataset $dataset \ ---mode $mode \ ---num_views $num_views \ ---tmp_imagedir $tmp_imagedir \ ---viddir $viddir \ ---debug_vids 1 \ ---logtostderr - -3. Hit Ctrl-C when done collecting, upon which the script will compile videos -for each view and optionally a debug video concatenating multiple -simultaneous views. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import multiprocessing -from multiprocessing import Process -import os -import subprocess -import sys -import time -import cv2 -import matplotlib -matplotlib.use('TkAgg') -from matplotlib import animation # pylint: disable=g-import-not-at-top -import matplotlib.pyplot as plt -import numpy as np -from six.moves import input -import tensorflow as tf -tf.logging.set_verbosity(tf.logging.INFO) - - -tf.flags.DEFINE_string('dataset', '', 'Name of the dataset we`re collecting.') -tf.flags.DEFINE_string('mode', '', - 'What type of data we`re collecting. E.g.:' - '`train`,`valid`,`test`, or `demo`') -tf.flags.DEFINE_string('seqname', '', - 'Name of this sequence. If empty, the script will use' - 'the name seq_N+1 where seq_N is the latest' - 'integer-named sequence in the videos directory.') -tf.flags.DEFINE_integer('num_views', 2, - 'Number of webcams.') -tf.flags.DEFINE_string('tmp_imagedir', '/tmp/tcn/data', - 'Temporary outdir to write images.') -tf.flags.DEFINE_string('viddir', '/tmp/tcn/videos', - 'Base directory to write debug videos.') -tf.flags.DEFINE_boolean('debug_vids', True, - 'Whether to generate debug vids with multiple' - 'concatenated views.') -tf.flags.DEFINE_string('debug_lhs_view', '0', - 'Which viewpoint to use for the lhs video.') -tf.flags.DEFINE_string('debug_rhs_view', '1', - 'Which viewpoint to use for the rhs video.') -tf.flags.DEFINE_integer('height', 1080, 'Raw input height.') -tf.flags.DEFINE_integer('width', 1920, 'Raw input width.') -tf.flags.DEFINE_string('webcam_ports', None, - 'Comma-separated list of each webcam usb port.') -FLAGS = tf.app.flags.FLAGS - - -class ImageQueue(object): - """An image queue holding each stream's most recent image. - - Basically implements a process-safe collections.deque(maxlen=1). - """ - - def __init__(self): - self.lock = multiprocessing.Lock() - self._queue = multiprocessing.Queue(maxsize=1) - - def append(self, data): - with self.lock: - if self._queue.full(): - # Pop the first element. - _ = self._queue.get() - self._queue.put(data) - - def get(self): - with self.lock: - return self._queue.get() - - def empty(self): - return self._queue.empty() - - def close(self): - return self._queue.close() - - -class WebcamViewer(object): - """A class which displays a live stream from the webcams.""" - - def __init__(self, display_queues): - """Create a WebcamViewer instance.""" - self.height = FLAGS.height - self.width = FLAGS.width - self.queues = display_queues - - def _get_next_images(self): - """Gets the next image to display.""" - # Wait for one image per view. - not_found = True - while not_found: - if True in [q.empty() for q in self.queues]: - # At least one image queue is empty; wait. - continue - else: - # Retrieve the images. - latest = [q.get() for q in self.queues] - combined = np.concatenate(latest, axis=1) - not_found = False - return combined - - def run(self): - """Displays the Kcam live stream in a window. - - This function blocks until the window is closed. - """ - fig, rgb_axis = plt.subplots() - - image_rows = self.height - image_cols = self.width * FLAGS.num_views - initial_image = np.zeros((image_rows, image_cols, 3)) - rgb_image = rgb_axis.imshow(initial_image, interpolation='nearest') - - def update_figure(frame_index): - """Animation function for matplotlib FuncAnimation. Updates the image. - - Args: - frame_index: The frame number. - Returns: - An iterable of matplotlib drawables to clear. - """ - _ = frame_index - images = self._get_next_images() - images = images[..., [2, 1, 0]] - rgb_image.set_array(images) - return rgb_image, - - # We must keep a reference to this animation in order for it to work. - unused_animation = animation.FuncAnimation( - fig, update_figure, interval=50, blit=True) - mng = plt.get_current_fig_manager() - mng.resize(*mng.window.maxsize()) - plt.show() - - -def reconcile(queues, write_queue): - """Gets a list of concurrent images from each view queue. - - This waits for latest images to be available in all view queues, - then continuously: - - Creates a list of current images for each view. - - Writes the list to a queue of image lists to write to disk. - Args: - queues: A list of `ImageQueues`, holding the latest image from each webcam. - write_queue: A multiprocessing.Queue holding lists of concurrent images. - """ - # Loop forever. - while True: - # Wait till all queues have an image. - if True in [q.empty() for q in queues]: - continue - else: - # Retrieve all views' images. - latest = [q.get() for q in queues] - # Copy the list of all concurrent images to the write queue. - write_queue.put(latest) - - -def persist(write_queue, view_dirs): - """Pulls lists of concurrent images off a write queue, writes them to disk. - - Args: - write_queue: A multiprocessing.Queue holding lists of concurrent images; - one image per view. - view_dirs: A list of strings, holding the output image directories for each - view. - """ - timestep = 0 - while True: - # Wait till there is work in the queue. - if write_queue.empty(): - continue - # Get a list of concurrent images to write to disk. - view_ims = write_queue.get() - for view_idx, image in enumerate(view_ims): - view_base = view_dirs[view_idx] - # Assign all concurrent view images the same sequence timestep. - fname = os.path.join(view_base, '%s.png' % str(timestep).zfill(10)) - cv2.imwrite(fname, image) - # Move to the next timestep. - timestep += 1 - - -def get_image(camera): - """Captures a single image from the camera and returns it in PIL format.""" - data = camera.read() - _, im = data - return im - - -def capture_webcam(camera, display_queue, reconcile_queue): - """Captures images from simultaneous webcams, writes them to queues. - - Args: - camera: A cv2.VideoCapture object representing an open webcam stream. - display_queue: An ImageQueue. - reconcile_queue: An ImageQueue. - """ - # Take some ramp images to allow cams to adjust for brightness etc. - for i in range(60): - tf.logging.info('Taking ramp image %d.' % i) - get_image(camera) - - cnt = 0 - start = time.time() - while True: - # Get images for all cameras. - im = get_image(camera) - # Replace the current image in the display and reconcile queues. - display_queue.append(im) - reconcile_queue.append(im) - cnt += 1 - current = time.time() - if cnt % 100 == 0: - tf.logging.info('Collected %s of video, %d frames at ~%.2f fps.' % ( - timer(start, current), cnt, cnt/(current-start))) - - -def timer(start, end): - """Returns a formatted time elapsed.""" - hours, rem = divmod(end-start, 3600) - minutes, seconds = divmod(rem, 60) - return '{:0>2}:{:0>2}:{:05.2f}'.format(int(hours), int(minutes), seconds) - - -def display_webcams(display_queues): - """Builds an WebcamViewer to animate incoming images, runs it.""" - viewer = WebcamViewer(display_queues) - viewer.run() - - -def create_vids(view_dirs, seqname): - """Creates one video per view per sequence.""" - vidbase = os.path.join(FLAGS.viddir, FLAGS.dataset, FLAGS.mode) - if not os.path.exists(vidbase): - os.makedirs(vidbase) - vidpaths = [] - for idx, view_dir in enumerate(view_dirs): - vidname = os.path.join(vidbase, '%s_view%d.mp4' % (seqname, idx)) - encode_vid_cmd = r'mencoder mf://%s/*.png \ - -mf fps=29:type=png \ - -ovc lavc -lavcopts vcodec=mpeg4:mbd=2:trell \ - -oac copy -o %s' % (view_dir, vidname) - os.system(encode_vid_cmd) - vidpaths.append(vidname) - - debugpath = None - if FLAGS.debug_vids: - lhs = vidpaths[FLAGS.debug_lhs_view] - rhs = vidpaths[FLAGS.debug_rhs_view] - debug_base = os.path.join('%s_debug' % FLAGS.viddir, FLAGS.dataset, - FLAGS.mode) - if not os.path.exists(debug_base): - os.makedirs(debug_base) - debugpath = '%s/%s.mp4' % (debug_base, seqname) - os.system(r"avconv \ - -i %s \ - -i %s \ - -filter_complex '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]' \ - -map [vid] \ - -c:v libx264 \ - -crf 23 \ - -preset veryfast \ - %s" % (lhs, rhs, debugpath)) - - return vidpaths, debugpath - - -def setup_paths(): - """Sets up the necessary paths to collect videos.""" - assert FLAGS.dataset - assert FLAGS.mode - assert FLAGS.num_views - - # Setup directory for final images used to create videos for this sequence. - tmp_imagedir = os.path.join(FLAGS.tmp_imagedir, FLAGS.dataset, FLAGS.mode) - if not os.path.exists(tmp_imagedir): - os.makedirs(tmp_imagedir) - - # Create a base directory to hold all sequence videos if it doesn't exist. - vidbase = os.path.join(FLAGS.viddir, FLAGS.dataset, FLAGS.mode) - if not os.path.exists(vidbase): - os.makedirs(vidbase) - - # Get one directory per concurrent view and a sequence name. - view_dirs, seqname = get_view_dirs(vidbase, tmp_imagedir) - - # Get an output path to each view's video. - vid_paths = [] - for idx, _ in enumerate(view_dirs): - vid_path = os.path.join(vidbase, '%s_view%d.mp4' % (seqname, idx)) - vid_paths.append(vid_path) - - # Optionally build paths to debug_videos. - debug_path = None - if FLAGS.debug_vids: - debug_base = os.path.join('%s_debug' % FLAGS.viddir, FLAGS.dataset, - FLAGS.mode) - if not os.path.exists(debug_base): - os.makedirs(debug_base) - debug_path = '%s/%s.mp4' % (debug_base, seqname) - - return view_dirs, vid_paths, debug_path - - -def get_view_dirs(vidbase, tmp_imagedir): - """Creates and returns one view directory per webcam.""" - # Create and append a sequence name. - if FLAGS.seqname: - seqname = FLAGS.seqname - else: - # If there's no video directory, this is the first sequence. - if not os.listdir(vidbase): - seqname = '0' - else: - # Otherwise, get the latest sequence name and increment it. - seq_names = [i.split('_')[0] for i in os.listdir(vidbase)] - latest_seq = sorted(map(int, seq_names), reverse=True)[0] - seqname = str(latest_seq+1) - tf.logging.info('No seqname specified, using: %s' % seqname) - view_dirs = [os.path.join( - tmp_imagedir, '%s_view%d' % (seqname, v)) for v in range(FLAGS.num_views)] - for d in view_dirs: - if not os.path.exists(d): - os.makedirs(d) - return view_dirs, seqname - - -def get_cameras(): - """Opens cameras using cv2, ensures they can take images.""" - # Try to get free webcam ports. - if FLAGS.webcam_ports: - ports = map(int, FLAGS.webcam_ports.split(',')) - else: - ports = range(FLAGS.num_views) - cameras = [cv2.VideoCapture(i) for i in ports] - - if not all([i.isOpened() for i in cameras]): - try: - # Try to find and kill hanging cv2 process_ids. - output = subprocess.check_output(['lsof -t /dev/video*'], shell=True) - tf.logging.info('Found hanging cv2 process_ids: \n') - tf.logging.info(output) - tf.logging.info('Killing hanging processes...') - for process_id in output.split('\n')[:-1]: - subprocess.call(['kill %s' % process_id], shell=True) - time.sleep(3) - # Recapture webcams. - cameras = [cv2.VideoCapture(i) for i in ports] - except subprocess.CalledProcessError: - raise ValueError( - 'Cannot connect to cameras. Try running: \n' - 'ls -ltrh /dev/video* \n ' - 'to see which ports your webcams are connected to. Then hand those ' - 'ports as a comma-separated list to --webcam_ports, e.g. ' - '--webcam_ports 0,1') - - # Verify each camera is able to capture images. - ims = map(get_image, cameras) - assert False not in [i is not None for i in ims] - return cameras - - -def launch_images_to_videos(view_dirs, vid_paths, debug_path): - """Launch job in separate process to convert images to videos.""" - - f = 'learning/brain/research/tcn/dataset/images_to_videos.py' - cmd = ['python %s ' % f] - cmd += ['--view_dirs %s ' % ','.join(i for i in view_dirs)] - cmd += ['--vid_paths %s ' % ','.join(i for i in vid_paths)] - cmd += ['--debug_path %s ' % debug_path] - cmd += ['--debug_lhs_view %s ' % FLAGS.debug_lhs_view] - cmd += ['--debug_rhs_view %s ' % FLAGS.debug_rhs_view] - cmd += [' & '] - cmd = ''.join(i for i in cmd) - - # Call images_to_videos asynchronously. - fnull = open(os.devnull, 'w') - subprocess.Popen([cmd], stdout=fnull, stderr=subprocess.STDOUT, shell=True) - - for p in vid_paths: - tf.logging.info('Writing final video to: %s' % p) - if debug_path: - tf.logging.info('Writing debug video to: %s' % debug_path) - - -def main(_): - # Initialize the camera capture objects. - cameras = get_cameras() - # Get one output directory per view. - view_dirs, vid_paths, debug_path = setup_paths() - try: - # Wait for user input. - try: - tf.logging.info('About to write to:') - for v in view_dirs: - tf.logging.info(v) - input('Press Enter to continue...') - except SyntaxError: - pass - - # Create a queue per view for displaying and saving images. - display_queues = [ImageQueue() for _ in range(FLAGS.num_views)] - reconcile_queues = [ImageQueue() for _ in range(FLAGS.num_views)] - - # Create a queue for collecting all tuples of multi-view images to write to - # disk. - write_queue = multiprocessing.Queue() - - processes = [] - # Create a process to display collected images in real time. - processes.append(Process(target=display_webcams, args=(display_queues,))) - # Create a process to collect the latest simultaneous images from each view. - processes.append(Process( - target=reconcile, args=(reconcile_queues, write_queue,))) - # Create a process to collect the latest simultaneous images from each view. - processes.append(Process( - target=persist, args=(write_queue, view_dirs,))) - - for (cam, dq, rq) in zip(cameras, display_queues, reconcile_queues): - processes.append(Process( - target=capture_webcam, args=(cam, dq, rq,))) - - for p in processes: - p.start() - for p in processes: - p.join() - - except KeyboardInterrupt: - # Close the queues. - for q in display_queues + reconcile_queues: - q.close() - # Release the cameras. - for cam in cameras: - cam.release() - - # Launch images_to_videos script asynchronously. - launch_images_to_videos(view_dirs, vid_paths, debug_path) - - try: - sys.exit(0) - except SystemExit: - os._exit(0) # pylint: disable=protected-access - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/tcn/download_pretrained.py b/research/tcn/download_pretrained.py deleted file mode 100644 index 4d42ee7328b67cf90f115c6b99c7bde5aae3780b..0000000000000000000000000000000000000000 --- a/research/tcn/download_pretrained.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Downloads pretrained InceptionV3 and ResnetV2-50 checkpoints.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tarfile -import urllib - -INCEPTION_URL = 'http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz' -RESNET_URL = 'http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz' - - -def DownloadWeights(model_dir, url): - os.makedirs(model_dir) - tar_path = os.path.join(model_dir, 'ckpt.tar.gz') - urllib.urlretrieve(url, tar_path) - tar = tarfile.open(os.path.join(model_dir, 'ckpt.tar.gz')) - tar.extractall(model_dir) - - -if __name__ == '__main__': - - # Create a directory for all pretrained checkpoints. - ckpt_dir = 'pretrained_checkpoints' - if not os.path.exists(ckpt_dir): - os.makedirs(ckpt_dir) - - # Download inception. - print('Downloading inception pretrained weights...') - inception_dir = os.path.join(ckpt_dir, 'inception') - DownloadWeights(inception_dir, INCEPTION_URL) - print('Done downloading inception pretrained weights.') - - print('Downloading resnet pretrained weights...') - resnet_dir = os.path.join(ckpt_dir, 'resnet') - DownloadWeights(resnet_dir, RESNET_URL) - print('Done downloading resnet pretrained weights.') - diff --git a/research/tcn/estimators/base_estimator.py b/research/tcn/estimators/base_estimator.py deleted file mode 100644 index f3832ff0a11388cb172f9cc9ba94e6743e2ec49a..0000000000000000000000000000000000000000 --- a/research/tcn/estimators/base_estimator.py +++ /dev/null @@ -1,700 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Base estimator defining TCN training, test, and inference.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from abc import ABCMeta -from abc import abstractmethod -import os -import numpy as np -import numpy as np -import data_providers -import preprocessing -from utils import util -import tensorflow as tf -import tensorflow.contrib.slim as slim -from tensorflow.contrib.tpu.python.tpu import tpu_config -from tensorflow.contrib.tpu.python.tpu import tpu_estimator -from tensorflow.contrib.tpu.python.tpu import tpu_optimizer -from tensorflow.python.training import session_run_hook - -tf.app.flags.DEFINE_integer( - 'tf_random_seed', 0, 'Random seed.') -FLAGS = tf.app.flags.FLAGS - - -class InitFromPretrainedCheckpointHook(session_run_hook.SessionRunHook): - """Hook that can init graph from a pretrained checkpoint.""" - - def __init__(self, pretrained_checkpoint_dir): - """Initializes a `InitFromPretrainedCheckpointHook`. - - Args: - pretrained_checkpoint_dir: The dir of pretrained checkpoint. - - Raises: - ValueError: If pretrained_checkpoint_dir is invalid. - """ - if pretrained_checkpoint_dir is None: - raise ValueError('pretrained_checkpoint_dir must be specified.') - self._pretrained_checkpoint_dir = pretrained_checkpoint_dir - - def begin(self): - checkpoint_reader = tf.contrib.framework.load_checkpoint( - self._pretrained_checkpoint_dir) - variable_shape_map = checkpoint_reader.get_variable_to_shape_map() - - exclude_scopes = 'logits/,final_layer/,aux_' - # Skip restoring global_step as to run fine tuning from step=0. - exclusions = ['global_step'] - if exclude_scopes: - exclusions.extend([scope.strip() for scope in exclude_scopes.split(',')]) - - variable_to_restore = tf.contrib.framework.get_model_variables() - - # Variable filtering by given exclude_scopes. - filtered_variables_to_restore = {} - for v in variable_to_restore: - for exclusion in exclusions: - if v.name.startswith(exclusion): - break - else: - var_name = v.name.split(':')[0] - filtered_variables_to_restore[var_name] = v - - # Final filter by checking shape matching and skipping variables that - # are not in the checkpoint. - final_variables_to_restore = {} - for var_name, var_tensor in filtered_variables_to_restore.iteritems(): - if var_name not in variable_shape_map: - # Try moving average version of variable. - var_name = os.path.join(var_name, 'ExponentialMovingAverage') - if var_name not in variable_shape_map: - tf.logging.info( - 'Skip init [%s] because it is not in ckpt.', var_name) - # Skip variables not in the checkpoint. - continue - - if not var_tensor.get_shape().is_compatible_with( - variable_shape_map[var_name]): - # Skip init variable from ckpt if shape dismatch. - tf.logging.info( - 'Skip init [%s] from [%s] in ckpt because shape dismatch: %s vs %s', - var_tensor.name, var_name, - var_tensor.get_shape(), variable_shape_map[var_name]) - continue - - tf.logging.info('Init %s from %s in ckpt' % (var_tensor, var_name)) - final_variables_to_restore[var_name] = var_tensor - - self._init_fn = tf.contrib.framework.assign_from_checkpoint_fn( - self._pretrained_checkpoint_dir, - final_variables_to_restore) - - def after_create_session(self, session, coord): - tf.logging.info('Restoring InceptionV3 weights.') - self._init_fn(session) - tf.logging.info('Done restoring InceptionV3 weights.') - - -class BaseEstimator(object): - """Abstract TCN base estimator class.""" - __metaclass__ = ABCMeta - - def __init__(self, config, logdir): - """Constructor. - - Args: - config: A Luatable-like T object holding training config. - logdir: String, a directory where checkpoints and summaries are written. - """ - self._config = config - self._logdir = logdir - - @abstractmethod - def construct_input_fn(self, records, is_training): - """Builds an estimator input_fn. - - The input_fn is used to pass feature and target data to the train, - evaluate, and predict methods of the Estimator. - - Method to be overridden by implementations. - - Args: - records: A list of Strings, paths to TFRecords with image data. - is_training: Boolean, whether or not we're training. - - Returns: - Function, that has signature of ()->(dict of features, target). - features is a dict mapping feature names to `Tensors` - containing the corresponding feature data (typically, just a single - key/value pair 'raw_data' -> image `Tensor` for TCN. - labels is a 1-D int32 `Tensor` holding labels. - """ - pass - - def preprocess_data(self, images, is_training): - """Preprocesses raw images for either training or inference. - - Args: - images: A 4-D float32 `Tensor` holding images to preprocess. - is_training: Boolean, whether or not we're in training. - - Returns: - data_preprocessed: data after the preprocessor. - """ - config = self._config - height = config.data.height - width = config.data.width - min_scale = config.data.augmentation.minscale - max_scale = config.data.augmentation.maxscale - p_scale_up = config.data.augmentation.proportion_scaled_up - aug_color = config.data.augmentation.color - fast_mode = config.data.augmentation.fast_mode - crop_strategy = config.data.preprocessing.eval_cropping - preprocessed_images = preprocessing.preprocess_images( - images, is_training, height, width, - min_scale, max_scale, p_scale_up, - aug_color=aug_color, fast_mode=fast_mode, - crop_strategy=crop_strategy) - return preprocessed_images - - @abstractmethod - def forward(self, images, is_training, reuse=False): - """Defines the forward pass that converts batch images to embeddings. - - Method to be overridden by implementations. - - Args: - images: A 4-D float32 `Tensor` holding images to be embedded. - is_training: Boolean, whether or not we're in training mode. - reuse: Boolean, whether or not to reuse embedder. - Returns: - embeddings: A 2-D float32 `Tensor` holding embedded images. - """ - pass - - @abstractmethod - def define_loss(self, embeddings, labels, is_training): - """Defines the loss function on the embedding vectors. - - Method to be overridden by implementations. - - Args: - embeddings: A 2-D float32 `Tensor` holding embedded images. - labels: A 1-D int32 `Tensor` holding problem labels. - is_training: Boolean, whether or not we're in training mode. - - Returns: - loss: tf.float32 scalar. - """ - pass - - @abstractmethod - def define_eval_metric_ops(self): - """Defines the dictionary of eval metric tensors. - - Method to be overridden by implementations. - - Returns: - eval_metric_ops: A dict of name/value pairs specifying the - metrics that will be calculated when the model runs in EVAL mode. - """ - pass - - def get_train_op(self, loss): - """Creates a training op. - - Args: - loss: A float32 `Tensor` representing the total training loss. - Returns: - train_op: A slim.learning.create_train_op train_op. - Raises: - ValueError: If specified optimizer isn't supported. - """ - # Get variables to train (defined in subclass). - assert self.variables_to_train - - # Define a learning rate schedule. - decay_steps = self._config.learning.decay_steps - decay_factor = self._config.learning.decay_factor - learning_rate = float(self._config.learning.learning_rate) - - # Define a learning rate schedule. - global_step = slim.get_or_create_global_step() - learning_rate = tf.train.exponential_decay( - learning_rate, - global_step, - decay_steps, - decay_factor, - staircase=True) - - # Create an optimizer. - opt_type = self._config.learning.optimizer - if opt_type == 'adam': - opt = tf.train.AdamOptimizer(learning_rate) - elif opt_type == 'momentum': - opt = tf.train.MomentumOptimizer(learning_rate, 0.9) - elif opt_type == 'rmsprop': - opt = tf.train.RMSPropOptimizer(learning_rate, momentum=0.9, - epsilon=1.0, decay=0.9) - else: - raise ValueError('Unsupported optimizer %s' % opt_type) - - if self._config.use_tpu: - opt = tpu_optimizer.CrossShardOptimizer(opt) - - # Create a training op. - # train_op = opt.minimize(loss, var_list=self.variables_to_train) - # Create a training op. - train_op = slim.learning.create_train_op( - loss, - optimizer=opt, - variables_to_train=self.variables_to_train, - update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)) - - return train_op - - def _get_model_fn(self): - """Defines behavior for training, evaluation, and inference (prediction). - - Returns: - `model_fn` for `Estimator`. - """ - # pylint: disable=unused-argument - def model_fn(features, labels, mode, params): - """Build the model based on features, labels, and mode. - - Args: - features: Dict, strings to `Tensor` input data, returned by the - input_fn. - labels: The labels Tensor returned by the input_fn. - mode: A string indicating the mode. This will be either - tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.PREDICT, - or tf.estimator.ModeKeys.EVAL. - params: A dict holding training parameters, passed in during TPU - training. - - Returns: - A tf.estimator.EstimatorSpec specifying train/test/inference behavior. - """ - is_training = mode == tf.estimator.ModeKeys.TRAIN - - # Get preprocessed images from the features dict. - batch_preprocessed = features['batch_preprocessed'] - - # Do a forward pass to embed data. - batch_encoded = self.forward(batch_preprocessed, is_training) - - # Optionally set the pretrained initialization function. - initializer_fn = None - if mode == tf.estimator.ModeKeys.TRAIN: - initializer_fn = self.pretrained_init_fn - - # If we're training or evaluating, define total loss. - total_loss = None - if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): - loss = self.define_loss(batch_encoded, labels, is_training) - tf.losses.add_loss(loss) - total_loss = tf.losses.get_total_loss() - - # If we're training, define a train op. - train_op = None - if mode == tf.estimator.ModeKeys.TRAIN: - train_op = self.get_train_op(total_loss) - - # If we're doing inference, set the output to be the embedded images. - predictions_dict = None - if mode == tf.estimator.ModeKeys.PREDICT: - predictions_dict = {'embeddings': batch_encoded} - # Pass through additional metadata stored in features. - for k, v in features.iteritems(): - predictions_dict[k] = v - - # If we're evaluating, define some eval metrics. - eval_metric_ops = None - if mode == tf.estimator.ModeKeys.EVAL: - eval_metric_ops = self.define_eval_metric_ops() - - # Define training scaffold to load pretrained weights. - num_checkpoint_to_keep = self._config.logging.checkpoint.num_to_keep - saver = tf.train.Saver( - max_to_keep=num_checkpoint_to_keep) - - if is_training and self._config.use_tpu: - # TPU doesn't have a scaffold option at the moment, so initialize - # pretrained weights using a custom train_hook instead. - return tpu_estimator.TPUEstimatorSpec( - mode, - loss=total_loss, - eval_metrics=None, - train_op=train_op, - predictions=predictions_dict) - else: - # Build a scaffold to initialize pretrained weights. - scaffold = tf.train.Scaffold( - init_fn=initializer_fn, - saver=saver, - summary_op=None) - return tf.estimator.EstimatorSpec( - mode=mode, - predictions=predictions_dict, - loss=total_loss, - train_op=train_op, - eval_metric_ops=eval_metric_ops, - scaffold=scaffold) - return model_fn - - def train(self): - """Runs training.""" - # Get a list of training tfrecords. - config = self._config - training_dir = config.data.training - training_records = util.GetFilesRecursively(training_dir) - - # Define batch size. - self._batch_size = config.data.batch_size - - # Create a subclass-defined training input function. - train_input_fn = self.construct_input_fn( - training_records, is_training=True) - - # Create the estimator. - estimator = self._build_estimator(is_training=True) - - train_hooks = None - if config.use_tpu: - # TPU training initializes pretrained weights using a custom train hook. - train_hooks = [] - if tf.train.latest_checkpoint(self._logdir) is None: - train_hooks.append( - InitFromPretrainedCheckpointHook( - config[config.embedder_strategy].pretrained_checkpoint)) - - # Run training. - estimator.train(input_fn=train_input_fn, hooks=train_hooks, - steps=config.learning.max_step) - - def _build_estimator(self, is_training): - """Returns an Estimator object. - - Args: - is_training: Boolean, whether or not we're in training mode. - - Returns: - A tf.estimator.Estimator. - """ - config = self._config - save_checkpoints_steps = config.logging.checkpoint.save_checkpoints_steps - keep_checkpoint_max = self._config.logging.checkpoint.num_to_keep - if is_training and config.use_tpu: - iterations = config.tpu.iterations - num_shards = config.tpu.num_shards - run_config = tpu_config.RunConfig( - save_checkpoints_secs=None, - save_checkpoints_steps=save_checkpoints_steps, - keep_checkpoint_max=keep_checkpoint_max, - master=FLAGS.master, - evaluation_master=FLAGS.master, - model_dir=self._logdir, - tpu_config=tpu_config.TPUConfig( - iterations_per_loop=iterations, - num_shards=num_shards, - per_host_input_for_training=num_shards <= 8), - tf_random_seed=FLAGS.tf_random_seed) - - batch_size = config.data.batch_size - return tpu_estimator.TPUEstimator( - model_fn=self._get_model_fn(), - config=run_config, - use_tpu=True, - train_batch_size=batch_size, - eval_batch_size=batch_size) - else: - run_config = tf.estimator.RunConfig().replace( - model_dir=self._logdir, - save_checkpoints_steps=save_checkpoints_steps, - keep_checkpoint_max=keep_checkpoint_max, - tf_random_seed=FLAGS.tf_random_seed) - return tf.estimator.Estimator( - model_fn=self._get_model_fn(), - config=run_config) - - def evaluate(self): - """Runs `Estimator` validation. - """ - config = self._config - - # Get a list of validation tfrecords. - validation_dir = config.data.validation - validation_records = util.GetFilesRecursively(validation_dir) - - # Define batch size. - self._batch_size = config.data.batch_size - - # Create a subclass-defined training input function. - validation_input_fn = self.construct_input_fn( - validation_records, False) - - # Create the estimator. - estimator = self._build_estimator(is_training=False) - - # Run validation. - eval_batch_size = config.data.batch_size - num_eval_samples = config.val.num_eval_samples - num_eval_batches = int(num_eval_samples / eval_batch_size) - estimator.evaluate(input_fn=validation_input_fn, steps=num_eval_batches) - - def inference( - self, inference_input, checkpoint_path, batch_size=None, **kwargs): - """Defines 3 of modes of inference. - - Inputs: - * Mode 1: Input is an input_fn. - * Mode 2: Input is a TFRecord (or list of TFRecords). - * Mode 3: Input is a numpy array holding an image (or array of images). - - Outputs: - * Mode 1: this returns an iterator over embeddings and additional - metadata. See - https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator#predict - for details. - * Mode 2: Returns an iterator over tuples of - (embeddings, raw_image_strings, sequence_name), where embeddings is a - 2-D float32 numpy array holding [sequence_size, embedding_size] image - embeddings, raw_image_strings is a 1-D string numpy array holding - [sequence_size] jpeg-encoded image strings, and sequence_name is a - string holding the name of the embedded sequence. - * Mode 3: Returns a tuple of (embeddings, raw_image_strings), where - embeddings is a 2-D float32 numpy array holding - [batch_size, embedding_size] image embeddings, raw_image_strings is a - 1-D string numpy array holding [batch_size] jpeg-encoded image strings. - - Args: - inference_input: This can be a tf.Estimator input_fn, a TFRecord path, - a list of TFRecord paths, a numpy image, or an array of numpy images. - checkpoint_path: String, path to the checkpoint to restore for inference. - batch_size: Int, the size of the batch to use for inference. - **kwargs: Additional keyword arguments, depending on the mode. - See _input_fn_inference, _tfrecord_inference, and _np_inference. - Returns: - inference_output: Inference output depending on mode, see above for - details. - Raises: - ValueError: If inference_input isn't a tf.Estimator input_fn, - a TFRecord path, a list of TFRecord paths, or a numpy array, - """ - # Mode 1: input is a callable tf.Estimator input_fn. - if callable(inference_input): - return self._input_fn_inference( - input_fn=inference_input, checkpoint_path=checkpoint_path, **kwargs) - # Mode 2: Input is a TFRecord path (or list of TFRecord paths). - elif util.is_tfrecord_input(inference_input): - return self._tfrecord_inference( - records=inference_input, checkpoint_path=checkpoint_path, - batch_size=batch_size, **kwargs) - # Mode 3: Input is a numpy array of raw images. - elif util.is_np_array(inference_input): - return self._np_inference( - np_images=inference_input, checkpoint_path=checkpoint_path, **kwargs) - else: - raise ValueError( - 'inference input must be a tf.Estimator input_fn, a TFRecord path,' - 'a list of TFRecord paths, or a numpy array. Got: %s' % str(type( - inference_input))) - - def _input_fn_inference(self, input_fn, checkpoint_path, predict_keys=None): - """Mode 1: tf.Estimator inference. - - Args: - input_fn: Function, that has signature of ()->(dict of features, None). - This is a function called by the estimator to get input tensors (stored - in the features dict) to do inference over. - checkpoint_path: String, path to a specific checkpoint to restore. - predict_keys: List of strings, the keys of the `Tensors` in the features - dict (returned by the input_fn) to evaluate during inference. - Returns: - predictions: An Iterator, yielding evaluated values of `Tensors` - specified in `predict_keys`. - """ - # Create the estimator. - estimator = self._build_estimator(is_training=False) - - # Create an iterator of predicted embeddings. - predictions = estimator.predict(input_fn=input_fn, - checkpoint_path=checkpoint_path, - predict_keys=predict_keys) - return predictions - - def _tfrecord_inference(self, records, checkpoint_path, batch_size, - num_sequences=-1, reuse=False): - """Mode 2: TFRecord inference. - - Args: - records: List of strings, paths to TFRecords. - checkpoint_path: String, path to a specific checkpoint to restore. - batch_size: Int, size of inference batch. - num_sequences: Int, number of sequences to embed. If -1, - embed everything. - reuse: Boolean, whether or not to reuse embedder weights. - Yields: - (embeddings, raw_image_strings, sequence_name): - embeddings is a 2-D float32 numpy array holding - [sequence_size, embedding_size] image embeddings. - raw_image_strings is a 1-D string numpy array holding - [sequence_size] jpeg-encoded image strings. - sequence_name is a string holding the name of the embedded sequence. - """ - tf.reset_default_graph() - if not isinstance(records, list): - records = list(records) - - # Map the list of tfrecords to a dataset of preprocessed images. - num_views = self._config.data.num_views - (views, task, seq_len) = data_providers.full_sequence_provider( - records, num_views) - tensor_dict = { - 'raw_image_strings': views, - 'task': task, - 'seq_len': seq_len - } - - # Create a preprocess function over raw image string placeholders. - image_str_placeholder = tf.placeholder(tf.string, shape=[None]) - decoded = preprocessing.decode_images(image_str_placeholder) - decoded.set_shape([batch_size, None, None, 3]) - preprocessed = self.preprocess_data(decoded, is_training=False) - - # Create an inference graph over preprocessed images. - embeddings = self.forward(preprocessed, is_training=False, reuse=reuse) - - # Create a saver to restore model variables. - tf.train.get_or_create_global_step() - saver = tf.train.Saver(tf.all_variables()) - - # Create a session and restore model variables. - with tf.train.MonitoredSession() as sess: - saver.restore(sess, checkpoint_path) - cnt = 0 - # If num_sequences is specified, embed that many sequences, else embed - # everything. - try: - while cnt < num_sequences if num_sequences != -1 else True: - # Get a preprocessed image sequence. - np_data = sess.run(tensor_dict) - np_raw_images = np_data['raw_image_strings'] - np_seq_len = np_data['seq_len'] - np_task = np_data['task'] - - # Embed each view. - embedding_size = self._config.embedding_size - view_embeddings = [ - np.zeros((0, embedding_size)) for _ in range(num_views)] - for view_index in range(num_views): - view_raw = np_raw_images[view_index] - # Embed the full sequence. - t = 0 - while t < np_seq_len: - # Decode and preprocess the batch of image strings. - embeddings_np = sess.run( - embeddings, feed_dict={ - image_str_placeholder: view_raw[t:t+batch_size]}) - view_embeddings[view_index] = np.append( - view_embeddings[view_index], embeddings_np, axis=0) - tf.logging.info('Embedded %d images for task %s' % (t, np_task)) - t += batch_size - - # Done embedding for all views. - view_raw_images = np_data['raw_image_strings'] - yield (view_embeddings, view_raw_images, np_task) - cnt += 1 - except tf.errors.OutOfRangeError: - tf.logging.info('Done embedding entire dataset.') - - def _np_inference(self, np_images, checkpoint_path): - """Mode 3: Call this repeatedly to do inference over numpy images. - - This mode is for when we we want to do real-time inference over - some stream of images (represented as numpy arrays). - - Args: - np_images: A float32 numpy array holding images to embed. - checkpoint_path: String, path to a specific checkpoint to restore. - Returns: - (embeddings, raw_image_strings): - embeddings is a 2-D float32 numpy array holding - [inferred batch_size, embedding_size] image embeddings. - raw_image_strings is a 1-D string numpy array holding - [inferred batch_size] jpeg-encoded image strings. - """ - if isinstance(np_images, list): - np_images = np.asarray(np_images) - # Add a batch dimension if only 3-dimensional. - if len(np_images.shape) == 3: - np_images = np.expand_dims(np_images, axis=0) - - # If np_images are in the range [0,255], convert to [0,1]. - assert np.min(np_images) >= 0. - if (np.min(np_images), np.max(np_images)) == (0, 255): - np_images = np_images.astype(np.float32) / 255. - assert (np.min(np_images), np.max(np_images)) == (0., 1.) - - # If this is the first pass, set up inference graph. - if not hasattr(self, '_np_inf_tensor_dict'): - self._setup_np_inference(np_images, checkpoint_path) - - # Convert np_images to embeddings. - np_tensor_dict = self._sess.run(self._np_inf_tensor_dict, feed_dict={ - self._image_placeholder: np_images - }) - return np_tensor_dict['embeddings'], np_tensor_dict['raw_image_strings'] - - def _setup_np_inference(self, np_images, checkpoint_path): - """Sets up and restores inference graph, creates and caches a Session.""" - tf.logging.info('Restoring model weights.') - - # Define inference over an image placeholder. - _, height, width, _ = np.shape(np_images) - image_placeholder = tf.placeholder( - tf.float32, shape=(None, height, width, 3)) - - # Preprocess batch. - preprocessed = self.preprocess_data(image_placeholder, is_training=False) - - # Unscale and jpeg encode preprocessed images for display purposes. - im_strings = preprocessing.unscale_jpeg_encode(preprocessed) - - # Do forward pass to get embeddings. - embeddings = self.forward(preprocessed, is_training=False) - - # Create a saver to restore model variables. - tf.train.get_or_create_global_step() - saver = tf.train.Saver(tf.all_variables()) - - self._image_placeholder = image_placeholder - self._batch_encoded = embeddings - - self._np_inf_tensor_dict = { - 'embeddings': embeddings, - 'raw_image_strings': im_strings, - } - - # Create a session and restore model variables. - self._sess = tf.Session() - saver.restore(self._sess, checkpoint_path) diff --git a/research/tcn/estimators/get_estimator.py b/research/tcn/estimators/get_estimator.py deleted file mode 100644 index 30b850edc6a26ea1712decad166227928b685866..0000000000000000000000000000000000000000 --- a/research/tcn/estimators/get_estimator.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Get a configured estimator.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from estimators import mvtcn_estimator as mvtcn_estimators -from estimators import svtcn_estimator - - -def get_mvtcn_estimator(loss_strategy, config, logdir): - """Returns a configured MVTCN estimator.""" - loss_to_trainer = { - 'triplet_semihard': mvtcn_estimators.MVTCNTripletEstimator, - 'npairs': mvtcn_estimators.MVTCNNpairsEstimator, - } - if loss_strategy not in loss_to_trainer: - raise ValueError('Unknown loss for MVTCN: %s' % loss_strategy) - estimator = loss_to_trainer[loss_strategy](config, logdir) - return estimator - - -def get_estimator(config, logdir): - """Returns an unsupervised model trainer based on config. - - Args: - config: A T object holding training configs. - logdir: String, path to directory where model checkpoints and summaries - are saved. - Returns: - estimator: A configured `TCNEstimator` object. - Raises: - ValueError: If unknown training strategy is specified. - """ - # Get the training strategy. - training_strategy = config.training_strategy - if training_strategy == 'mvtcn': - loss_strategy = config.loss_strategy - estimator = get_mvtcn_estimator( - loss_strategy, config, logdir) - elif training_strategy == 'svtcn': - estimator = svtcn_estimator.SVTCNTripletEstimator(config, logdir) - else: - raise ValueError('Unknown training strategy: %s' % training_strategy) - return estimator diff --git a/research/tcn/estimators/mvtcn_estimator.py b/research/tcn/estimators/mvtcn_estimator.py deleted file mode 100644 index 4a036b43566730c5f2225a79c25c30abc3887f56..0000000000000000000000000000000000000000 --- a/research/tcn/estimators/mvtcn_estimator.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""MVTCN trainer implementations with various metric learning losses.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import data_providers -import model as model_module -from estimators import base_estimator -import tensorflow as tf - - -class MVTCNEstimator(base_estimator.BaseEstimator): - """Multi-view TCN base class.""" - - def __init__(self, config, logdir): - super(MVTCNEstimator, self).__init__(config, logdir) - - def _pairs_provider(self, records, is_training): - config = self._config - num_views = config.data.num_views - window = config.mvtcn.window - num_parallel_calls = config.data.num_parallel_calls - sequence_prefetch_size = config.data.sequence_prefetch_size - batch_prefetch_size = config.data.batch_prefetch_size - examples_per_seq = config.data.examples_per_sequence - return functools.partial( - data_providers.multiview_pairs_provider, - file_list=records, - preprocess_fn=self.preprocess_data, - num_views=num_views, - window=window, - is_training=is_training, - examples_per_seq=examples_per_seq, - num_parallel_calls=num_parallel_calls, - sequence_prefetch_size=sequence_prefetch_size, - batch_prefetch_size=batch_prefetch_size) - - def forward(self, images_concat, is_training, reuse=False): - """See base class.""" - embedder_strategy = self._config.embedder_strategy - loss_strategy = self._config.loss_strategy - l2_normalize_embedding = self._config[loss_strategy].embedding_l2 - embedder = model_module.get_embedder( - embedder_strategy, - self._config, - images_concat, - is_training=is_training, - l2_normalize_embedding=l2_normalize_embedding, reuse=reuse) - embeddings_concat = embedder.construct_embedding() - variables_to_train = embedder.get_trainable_variables() - self.variables_to_train = variables_to_train - self.pretrained_init_fn = embedder.init_fn - return embeddings_concat - - def _collect_image_summaries(self, anchor_images, positive_images, - images_concat): - image_summaries = self._config.logging.summary.image_summaries - if image_summaries and not self._config.use_tpu: - batch_pairs_summary = tf.concat( - [anchor_images, positive_images], axis=2) - tf.summary.image('training/mvtcn_pairs', batch_pairs_summary) - tf.summary.image('training/images_preprocessed_concat', images_concat) - - -class MVTCNTripletEstimator(MVTCNEstimator): - """Multi-View TCN with semihard triplet loss.""" - - def __init__(self, config, logdir): - super(MVTCNTripletEstimator, self).__init__(config, logdir) - - def construct_input_fn(self, records, is_training): - """See base class.""" - def input_fn(params): - """Provides input to MVTCN models.""" - if is_training and self._config.use_tpu: - batch_size = params['batch_size'] - else: - batch_size = self._batch_size - (images_concat, - anchor_labels, - positive_labels, - anchor_images, - positive_images) = self._pairs_provider( - records, is_training)(batch_size=batch_size) - if is_training: - self._collect_image_summaries(anchor_images, positive_images, - images_concat) - labels = tf.concat([anchor_labels, positive_labels], axis=0) - features = {'batch_preprocessed': images_concat} - return (features, labels) - return input_fn - - def define_loss(self, embeddings, labels, is_training): - """See base class.""" - margin = self._config.triplet_semihard.margin - loss = tf.contrib.losses.metric_learning.triplet_semihard_loss( - labels=labels, embeddings=embeddings, margin=margin) - self._loss = loss - if is_training and not self._config.use_tpu: - tf.summary.scalar('training/triplet_semihard', loss) - return loss - - def define_eval_metric_ops(self): - """See base class.""" - return {'validation/triplet_semihard': tf.metrics.mean(self._loss)} - - -class MVTCNNpairsEstimator(MVTCNEstimator): - """Multi-View TCN with npairs loss.""" - - def __init__(self, config, logdir): - super(MVTCNNpairsEstimator, self).__init__(config, logdir) - - def construct_input_fn(self, records, is_training): - """See base class.""" - def input_fn(params): - """Provides input to MVTCN models.""" - if is_training and self._config.use_tpu: - batch_size = params['batch_size'] - else: - batch_size = self._batch_size - (images_concat, - npairs_labels, - _, - anchor_images, - positive_images) = self._pairs_provider( - records, is_training)(batch_size=batch_size) - if is_training: - self._collect_image_summaries(anchor_images, positive_images, - images_concat) - features = {'batch_preprocessed': images_concat} - return (features, npairs_labels) - return input_fn - - def define_loss(self, embeddings, labels, is_training): - """See base class.""" - embeddings_anchor, embeddings_positive = tf.split(embeddings, 2, axis=0) - loss = tf.contrib.losses.metric_learning.npairs_loss( - labels=labels, embeddings_anchor=embeddings_anchor, - embeddings_positive=embeddings_positive) - self._loss = loss - if is_training and not self._config.use_tpu: - tf.summary.scalar('training/npairs', loss) - return loss - - def define_eval_metric_ops(self): - """See base class.""" - return {'validation/npairs': tf.metrics.mean(self._loss)} diff --git a/research/tcn/estimators/svtcn_estimator.py b/research/tcn/estimators/svtcn_estimator.py deleted file mode 100644 index 069f7e8dd100082b99eaa2e8a8c0249566b4727f..0000000000000000000000000000000000000000 --- a/research/tcn/estimators/svtcn_estimator.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""SVTCN estimator implementation.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import data_providers -import model as model_module -from estimators import base_estimator -from estimators import svtcn_loss -import tensorflow as tf - - -class SVTCNEstimator(base_estimator.BaseEstimator): - """Single-view TCN Estimator base class.""" - - def __init__(self, config, logdir): - super(SVTCNEstimator, self).__init__(config, logdir) - - def construct_input_fn(self, records, is_training): - """See base class.""" - config = self._config - num_views = config.data.num_views - num_parallel_calls = config.data.num_parallel_calls - sequence_prefetch_size = config.data.sequence_prefetch_size - batch_prefetch_size = config.data.batch_prefetch_size - - def input_fn(): - """Provides input to SVTCN models.""" - (images_preprocessed, - images_raw, - timesteps) = data_providers.singleview_tcn_provider( - file_list=records, - preprocess_fn=self.preprocess_data, - num_views=num_views, - is_training=is_training, - batch_size=self._batch_size, - num_parallel_calls=num_parallel_calls, - sequence_prefetch_size=sequence_prefetch_size, - batch_prefetch_size=batch_prefetch_size) - - if config.logging.summary.image_summaries and is_training: - tf.summary.image('training/svtcn_images', images_raw) - - features = {'batch_preprocessed': images_preprocessed} - return (features, timesteps) - return input_fn - - def forward(self, images, is_training, reuse=False): - """See base class.""" - embedder_strategy = self._config.embedder_strategy - embedder = model_module.get_embedder( - embedder_strategy, - self._config, - images, - is_training=is_training, reuse=reuse) - embeddings = embedder.construct_embedding() - - if is_training: - self.variables_to_train = embedder.get_trainable_variables() - self.pretrained_init_fn = embedder.init_fn - return embeddings - - -class SVTCNTripletEstimator(SVTCNEstimator): - """Single-View TCN with semihard triplet loss.""" - - def __init__(self, config, logdir): - super(SVTCNTripletEstimator, self).__init__(config, logdir) - - def define_loss(self, embeddings, timesteps, is_training): - """See base class.""" - pos_radius = self._config.svtcn.pos_radius - neg_radius = self._config.svtcn.neg_radius - margin = self._config.triplet_semihard.margin - loss = svtcn_loss.singleview_tcn_loss( - embeddings, timesteps, pos_radius, neg_radius, margin=margin) - self._loss = loss - if is_training: - tf.summary.scalar('training/svtcn_loss', loss) - return loss - - def define_eval_metric_ops(self): - """See base class.""" - return {'validation/svtcn_loss': tf.metrics.mean(self._loss)} diff --git a/research/tcn/estimators/svtcn_loss.py b/research/tcn/estimators/svtcn_loss.py deleted file mode 100644 index 2617803495a6d92971a466639ecc02aab3371e50..0000000000000000000000000000000000000000 --- a/research/tcn/estimators/svtcn_loss.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""This implements single view TCN triplet loss.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -def pairwise_squared_distance(feature): - """Computes the squared pairwise distance matrix. - - output[i, j] = || feature[i, :] - feature[j, :] ||_2^2 - - Args: - feature: 2-D Tensor of size [number of data, feature dimension] - - Returns: - pairwise_squared_distances: 2-D Tensor of size - [number of data, number of data] - """ - pairwise_squared_distances = tf.add( - tf.reduce_sum( - tf.square(feature), axis=1, keep_dims=True), - tf.reduce_sum( - tf.square(tf.transpose(feature)), axis=0, - keep_dims=True)) - 2.0 * tf.matmul(feature, tf.transpose(feature)) - - # Deal with numerical inaccuracies. Set small negatives to zero. - pairwise_squared_distances = tf.maximum(pairwise_squared_distances, 0.0) - return pairwise_squared_distances - - -def masked_maximum(data, mask, dim=1): - """Computes the axis wise maximum over chosen elements. - - Args: - data: N-D Tensor. - mask: N-D Tensor of zeros or ones. - dim: The dimension over which to compute the maximum. - - Returns: - masked_maximums: N-D Tensor. - The maximized dimension is of size 1 after the operation. - """ - axis_minimums = tf.reduce_min(data, dim, keep_dims=True) - masked_maximums = tf.reduce_max( - tf.multiply( - data - axis_minimums, mask), dim, keep_dims=True) + axis_minimums - return masked_maximums - - -def masked_minimum(data, mask, dim=1): - """Computes the axis wise minimum over chosen elements. - - Args: - data: 2-D Tensor of size [n, m]. - mask: 2-D Boolean Tensor of size [n, m]. - dim: The dimension over which to compute the minimum. - - Returns: - masked_minimums: N-D Tensor. - The minimized dimension is of size 1 after the operation. - """ - axis_maximums = tf.reduce_max(data, dim, keep_dims=True) - masked_minimums = tf.reduce_min( - tf.multiply( - data - axis_maximums, mask), dim, keep_dims=True) + axis_maximums - return masked_minimums - - -def singleview_tcn_loss( - embeddings, timesteps, pos_radius, neg_radius, margin=1.0, - sequence_ids=None, multiseq=False): - """Computes the single view triplet loss with semi-hard negative mining. - - The loss encourages the positive distances (between a pair of embeddings with - the same labels) to be smaller than the minimum negative distance among - which are at least greater than the positive distance plus the margin constant - (called semi-hard negative) in the mini-batch. If no such negative exists, - uses the largest negative distance instead. - - Anchor, positive, negative selection is as follow: - Anchors: We consider every embedding timestep as an anchor. - Positives: pos_radius defines a radius (in timesteps) around each anchor from - which positives can be drawn. E.g. An anchor with t=10 and a pos_radius of - 2 produces a set of 4 (anchor,pos) pairs [(a=10, p=8), ... (a=10, p=12)]. - Negatives: neg_radius defines a boundary (in timesteps) around each anchor, - outside of which negatives can be drawn. E.g. An anchor with t=10 and a - neg_radius of 4 means negatives can be any t_neg where t_neg < 6 and - t_neg > 14. - - Args: - embeddings: 2-D Tensor of embedding vectors. - timesteps: 1-D Tensor with shape [batch_size, 1] of sequence timesteps. - pos_radius: int32; the size of the window (in timesteps) around each anchor - timestep that a positive can be drawn from. - neg_radius: int32; the size of the window (in timesteps) around each anchor - timestep that defines a negative boundary. Negatives can only be chosen - where negative timestep t is < negative boundary min or > negative - boundary max. - margin: Float; the triplet loss margin hyperparameter. - sequence_ids: (Optional) 1-D Tensor with shape [batch_size, 1] of sequence - ids. Together (sequence_id, sequence_timestep) give us a unique index for - each image if we have multiple sequences in a batch. - multiseq: Boolean, whether or not the batch is composed of multiple - sequences (with possibly colliding timesteps). - - Returns: - triplet_loss: tf.float32 scalar. - """ - assert neg_radius > pos_radius - - # If timesteps shape isn't [batchsize, 1], reshape to [batch_size, 1]. - tshape = tf.shape(timesteps) - assert tshape.shape == 2 or tshape.shape == 1 - if tshape.shape == 1: - timesteps = tf.reshape(timesteps, [tshape[0], 1]) - - # Build pairwise squared distance matrix. - pdist_matrix = pairwise_squared_distance(embeddings) - - # Build pairwise binary adjacency matrix, where adjacency[i,j] is True - # if timestep j is inside the positive range for timestep i and both - # timesteps come from the same sequence. - pos_radius = tf.cast(pos_radius, tf.int32) - - if multiseq: - # If sequence_ids shape isn't [batchsize, 1], reshape to [batch_size, 1]. - tshape = tf.shape(sequence_ids) - assert tshape.shape == 2 or tshape.shape == 1 - if tshape.shape == 1: - sequence_ids = tf.reshape(sequence_ids, [tshape[0], 1]) - - # Build pairwise binary adjacency matrix based on sequence_ids - sequence_adjacency = tf.equal(sequence_ids, tf.transpose(sequence_ids)) - - # Invert so we can select negatives only. - sequence_adjacency_not = tf.logical_not(sequence_adjacency) - - in_pos_range = tf.logical_and( - tf.less_equal( - tf.abs(timesteps - tf.transpose(timesteps)), pos_radius), - sequence_adjacency) - # Build pairwise binary discordance matrix, where discordance[i,j] is True - # if timestep j is inside the negative range for timestep i or if the - # timesteps come from different sequences. - in_neg_range = tf.logical_or( - tf.greater(tf.abs(timesteps - tf.transpose(timesteps)), neg_radius), - sequence_adjacency_not - ) - else: - in_pos_range = tf.less_equal( - tf.abs(timesteps - tf.transpose(timesteps)), pos_radius) - in_neg_range = tf.greater(tf.abs(timesteps - tf.transpose(timesteps)), - neg_radius) - - batch_size = tf.size(timesteps) - - # compute the mask - pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1]) - mask = tf.logical_and( - tf.tile(in_neg_range, [batch_size, 1]), - tf.greater(pdist_matrix_tile, - tf.reshape(tf.transpose(pdist_matrix), [-1, 1]))) - mask_final = tf.reshape( - tf.greater( - tf.reduce_sum( - tf.cast( - mask, dtype=tf.float32), 1, keep_dims=True), - 0.0), [batch_size, batch_size]) - mask_final = tf.transpose(mask_final) - - in_neg_range = tf.cast(in_neg_range, dtype=tf.float32) - mask = tf.cast(mask, dtype=tf.float32) - - # negatives_outside: smallest D_an where D_an > D_ap - negatives_outside = tf.reshape( - masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) - negatives_outside = tf.transpose(negatives_outside) - - # negatives_inside: largest D_an - negatives_inside = tf.tile( - masked_maximum(pdist_matrix, in_neg_range), [1, batch_size]) - semi_hard_negatives = tf.where( - mask_final, negatives_outside, negatives_inside) - - loss_mat = tf.add(margin, pdist_matrix - semi_hard_negatives) - - mask_positives = tf.cast( - in_pos_range, dtype=tf.float32) - tf.diag(tf.ones([batch_size])) - - # In lifted-struct, the authors multiply 0.5 for upper triangular - # in semihard, they take all positive pairs except the diagonal. - num_positives = tf.reduce_sum(mask_positives) - - triplet_loss = tf.truediv( - tf.reduce_sum(tf.maximum(tf.multiply(loss_mat, mask_positives), 0.0)), - num_positives, - name='triplet_svtcn_loss') - - return triplet_loss diff --git a/research/tcn/estimators/svtcn_loss_test.py b/research/tcn/estimators/svtcn_loss_test.py deleted file mode 100644 index f5bdfd980e5bdc596bf2e2e1102ccc256bbe2585..0000000000000000000000000000000000000000 --- a/research/tcn/estimators/svtcn_loss_test.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for svtcn_loss.py.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from sklearn.metrics.pairwise import euclidean_distances -from estimators import svtcn_loss -import tensorflow as tf - - -class SVTCNLoss(tf.test.TestCase): - - def testSVTCNLoss(self): - with self.test_session(): - num_data = 64 - num_sequences = 2 - num_data_per_seq = num_data // num_sequences - feat_dim = 6 - margin = 1.0 - times = np.tile(np.arange(num_data_per_seq, dtype=np.int32), - num_sequences) - times = np.reshape(times, [times.shape[0], 1]) - sequence_ids = np.concatenate( - [np.ones(num_data_per_seq)*i for i in range(num_sequences)]) - sequence_ids = np.reshape(sequence_ids, [sequence_ids.shape[0], 1]) - - pos_radius = 6 - neg_radius = 12 - - embedding = np.random.rand(num_data, feat_dim).astype(np.float32) - - # Compute the loss in NP - - # Get a positive mask, i.e. indices for each time index - # that are inside the positive range. - in_pos_range = np.less_equal( - np.abs(times - times.transpose()), pos_radius) - - # Get a negative mask, i.e. indices for each time index - # that are inside the negative range (> t + (neg_mult * pos_radius) - # and < t - (neg_mult * pos_radius). - in_neg_range = np.greater(np.abs(times - times.transpose()), neg_radius) - - sequence_adjacency = sequence_ids == sequence_ids.T - sequence_adjacency_not = np.logical_not(sequence_adjacency) - - pdist_matrix = euclidean_distances(embedding, squared=True) - loss_np = 0.0 - num_positives = 0.0 - for i in range(num_data): - for j in range(num_data): - if in_pos_range[i, j] and i != j and sequence_adjacency[i, j]: - num_positives += 1.0 - - pos_distance = pdist_matrix[i][j] - neg_distances = [] - - for k in range(num_data): - if in_neg_range[i, k] or sequence_adjacency_not[i, k]: - neg_distances.append(pdist_matrix[i][k]) - - neg_distances.sort() # sort by distance - chosen_neg_distance = neg_distances[0] - - for l in range(len(neg_distances)): - chosen_neg_distance = neg_distances[l] - if chosen_neg_distance > pos_distance: - break - - loss_np += np.maximum( - 0.0, margin - chosen_neg_distance + pos_distance) - - loss_np /= num_positives - - # Compute the loss in TF - loss_tf = svtcn_loss.singleview_tcn_loss( - embeddings=tf.convert_to_tensor(embedding), - timesteps=tf.convert_to_tensor(times), - pos_radius=pos_radius, - neg_radius=neg_radius, - margin=margin, - sequence_ids=tf.convert_to_tensor(sequence_ids), - multiseq=True - ) - loss_tf = loss_tf.eval() - self.assertAllClose(loss_np, loss_tf) - - -if __name__ == '__main__': - tf.test.main() diff --git a/research/tcn/eval.py b/research/tcn/eval.py deleted file mode 100644 index de24e93e1985dee80c8b9865c68095062d78928f..0000000000000000000000000000000000000000 --- a/research/tcn/eval.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Calculates running validation of TCN models (and baseline comparisons).""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time -from estimators.get_estimator import get_estimator -from utils import util -import tensorflow as tf -tf.logging.set_verbosity(tf.logging.INFO) - -tf.flags.DEFINE_string( - 'config_paths', '', - """ - Path to a YAML configuration files defining FLAG values. Multiple files - can be separated by the `#` symbol. Files are merged recursively. Setting - a key in these files is equivalent to setting the FLAG value with - the same name. - """) -tf.flags.DEFINE_string( - 'model_params', '{}', 'YAML configuration string for the model parameters.') -tf.app.flags.DEFINE_string('master', 'local', - 'BNS name of the TensorFlow master to use') -tf.app.flags.DEFINE_string( - 'logdir', '/tmp/tcn', 'Directory where to write event logs.') -FLAGS = tf.app.flags.FLAGS - - -def main(_): - """Runs main eval loop.""" - # Parse config dict from yaml config files / command line flags. - logdir = FLAGS.logdir - config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params) - - # Choose an estimator based on training strategy. - estimator = get_estimator(config, logdir) - - # Wait for the first checkpoint file to be written. - while not tf.train.latest_checkpoint(logdir): - tf.logging.info('Waiting for a checkpoint file...') - time.sleep(10) - - # Run validation. - while True: - estimator.evaluate() - -if __name__ == '__main__': - tf.app.run() diff --git a/research/tcn/g3doc/alignment.png b/research/tcn/g3doc/alignment.png deleted file mode 100644 index 7cfdfece274ec65fb9afbd1bf56b3a9c30597cef..0000000000000000000000000000000000000000 Binary files a/research/tcn/g3doc/alignment.png and /dev/null differ diff --git a/research/tcn/g3doc/all_error.png b/research/tcn/g3doc/all_error.png deleted file mode 100644 index c7b2d5b4fe8a89e9c492c6c943a20c84915afec5..0000000000000000000000000000000000000000 Binary files a/research/tcn/g3doc/all_error.png and /dev/null differ diff --git a/research/tcn/g3doc/avg_error.png b/research/tcn/g3doc/avg_error.png deleted file mode 100644 index 0b421824df123276e9de098e958a5e81860a1d20..0000000000000000000000000000000000000000 Binary files a/research/tcn/g3doc/avg_error.png and /dev/null differ diff --git a/research/tcn/g3doc/loss.png b/research/tcn/g3doc/loss.png deleted file mode 100644 index 44eaa6d6fd835ced99fd347517d5fbd54c4dc336..0000000000000000000000000000000000000000 Binary files a/research/tcn/g3doc/loss.png and /dev/null differ diff --git a/research/tcn/g3doc/pca.png b/research/tcn/g3doc/pca.png deleted file mode 100644 index 2a9ce8f335d50c59138ddcfc493afb6302505bf9..0000000000000000000000000000000000000000 Binary files a/research/tcn/g3doc/pca.png and /dev/null differ diff --git a/research/tcn/g3doc/val_loss.png b/research/tcn/g3doc/val_loss.png deleted file mode 100644 index 73ad725c36c86306ff58eb9db1267e6fed6c0c13..0000000000000000000000000000000000000000 Binary files a/research/tcn/g3doc/val_loss.png and /dev/null differ diff --git a/research/tcn/generate_videos.py b/research/tcn/generate_videos.py deleted file mode 100644 index 2b2ecba84038a01b88e3468ebfb83b7e13df4368..0000000000000000000000000000000000000000 --- a/research/tcn/generate_videos.py +++ /dev/null @@ -1,426 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Generates imitation videos. - -Generate single pairwise imitation videos: -blaze build -c opt --config=cuda --copt=-mavx \ -learning/brain/research/tcn/generate_videos && \ -blaze-bin/learning/brain/research/tcn/generate_videos \ ---logtostderr \ ---config_paths $config_paths \ ---checkpointdir $checkpointdir \ ---checkpoint_iter $checkpoint_iter \ ---query_records_dir $query_records_dir \ ---target_records_dir $target_records_dir \ ---outdir $outdir \ ---mode single \ ---num_query_sequences 1 \ ---num_target_sequences -1 - -# Generate imitation videos with multiple sequences in the target set: -query_records_path -blaze build -c opt --config=cuda --copt=-mavx \ -learning/brain/research/tcn/generate_videos && \ -blaze-bin/learning/brain/research/tcn/generate_videos \ ---logtostderr \ ---config_paths $config_paths \ ---checkpointdir $checkpointdir \ ---checkpoint_iter $checkpoint_iter \ ---query_records_dir $query_records_dir \ ---target_records_dir $target_records_dir \ ---outdir $outdir \ ---num_multi_targets 1 \ -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cv2 -import tensorflow as tf -import os -import matplotlib -matplotlib.use("pdf") -import matplotlib.animation as animation -import matplotlib.pyplot as plt -import numpy as np -from estimators.get_estimator import get_estimator -from utils import util -tf.logging.set_verbosity(tf.logging.INFO) - -tf.flags.DEFINE_string( - 'config_paths', '', - """ - Path to a YAML configuration files defining FLAG values. Multiple files - can be separated by the `#` symbol. Files are merged recursively. Setting - a key in these files is equivalent to setting the FLAG value with - the same name. - """) -tf.flags.DEFINE_string( - 'model_params', '{}', 'YAML configuration string for the model parameters.') -tf.app.flags.DEFINE_string( - 'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.') -tf.app.flags.DEFINE_string( - 'checkpoint_iter', '', 'Checkpoint iter to use.') -tf.app.flags.DEFINE_integer( - 'num_multi_targets', -1, - 'Number of imitation vids in the target set per imitation video.') -tf.app.flags.DEFINE_string( - 'outdir', '/tmp/tcn', 'Path to write embeddings to.') -tf.app.flags.DEFINE_string( - 'mode', 'single', 'single | multi. Single means generate imitation vids' - 'where query is being imitated by single sequence. Multi' - 'means generate imitation vids where query is being' - 'imitated by multiple.') -tf.app.flags.DEFINE_string('query_records_dir', '', - 'Directory of image tfrecords.') -tf.app.flags.DEFINE_string('target_records_dir', '', - 'Directory of image tfrecords.') -tf.app.flags.DEFINE_integer('query_view', 1, - 'Viewpoint of the query video.') -tf.app.flags.DEFINE_integer('target_view', 0, - 'Viewpoint of the imitation video.') -tf.app.flags.DEFINE_integer('smoothing_window', 5, - 'Number of frames to smooth over.') -tf.app.flags.DEFINE_integer('num_query_sequences', -1, - 'Number of query sequences to embed.') -tf.app.flags.DEFINE_integer('num_target_sequences', -1, - 'Number of target sequences to embed.') -FLAGS = tf.app.flags.FLAGS - - -def SmoothEmbeddings(embs): - """Temporally smoothes a sequence of embeddings.""" - new_embs = [] - window = int(FLAGS.smoothing_window) - for i in range(len(embs)): - min_i = max(i-window, 0) - max_i = min(i+window, len(embs)) - new_embs.append(np.mean(embs[min_i:max_i, :], axis=0)) - return np.array(new_embs) - - -def MakeImitationVideo( - outdir, vidname, query_im_strs, knn_im_strs, height=640, width=360): - """Creates a KNN imitation video. - - For each frame in vid0, pair with the frame at index in knn_indices in - vids1. Write video to disk. - - Args: - outdir: String, directory to write videos. - vidname: String, name of video. - query_im_strs: Numpy array holding query image strings. - knn_im_strs: Numpy array holding knn image strings. - height: Int, height of raw images. - width: Int, width of raw images. - """ - if not tf.gfile.Exists(outdir): - tf.gfile.MakeDirs(outdir) - vid_path = os.path.join(outdir, vidname) - combined = zip(query_im_strs, knn_im_strs) - - # Create and write the video. - fig = plt.figure() - ax = fig.add_subplot(111) - ax.set_aspect('equal') - ax.get_xaxis().set_visible(False) - ax.get_yaxis().set_visible(False) - im = ax.imshow( - np.zeros((height, width*2, 3)), cmap='gray', interpolation='nearest') - im.set_clim([0, 1]) - plt.tight_layout(pad=0, w_pad=0, h_pad=0) - # pylint: disable=invalid-name - def update_img(pair): - """Decode pairs of image strings, update a video.""" - im_i, im_j = pair - nparr_i = np.fromstring(str(im_i), np.uint8) - img_np_i = cv2.imdecode(nparr_i, 1) - img_np_i = img_np_i[..., [2, 1, 0]] - nparr_j = np.fromstring(str(im_j), np.uint8) - img_np_j = cv2.imdecode(nparr_j, 1) - img_np_j = img_np_j[..., [2, 1, 0]] - - # Optionally reshape the images to be same size. - frame = np.concatenate([img_np_i, img_np_j], axis=1) - im.set_data(frame) - return im - ani = animation.FuncAnimation(fig, update_img, combined, interval=15) - writer = animation.writers['ffmpeg'](fps=15) - dpi = 100 - tf.logging.info('Writing video to:\n %s \n' % vid_path) - ani.save('%s.mp4' % vid_path, writer=writer, dpi=dpi) - - -def GenerateImitationVideo( - vid_name, query_ims, query_embs, target_ims, target_embs, height, width): - """Generates a single cross-sequence imitation video. - - For each frame in some query sequence, find the nearest neighbor from - some target sequence in embedding space. - - Args: - vid_name: String, the name of the video. - query_ims: Numpy array of shape [query sequence length, height, width, 3]. - query_embs: Numpy array of shape [query sequence length, embedding size]. - target_ims: Numpy array of shape [target sequence length, height, width, - 3]. - target_embs: Numpy array of shape [target sequence length, embedding - size]. - height: Int, height of the raw image. - width: Int, width of the raw image. - """ - # For each query frame, find the index of the nearest neighbor in the - # target video. - knn_indices = [util.KNNIds(q, target_embs, k=1)[0] for q in query_embs] - - # Create and write out the video. - assert knn_indices - knn_ims = np.array([target_ims[k] for k in knn_indices]) - MakeImitationVideo(FLAGS.outdir, vid_name, query_ims, knn_ims, height, width) - - -def SingleImitationVideos( - query_records, target_records, config, height, width): - """Generates pairwise imitation videos. - - This creates all pairs of target imitating query videos, where each frame - on the left is matched to a nearest neighbor coming a single - embedded target video. - - Args: - query_records: List of Strings, paths to tfrecord datasets to use as - queries. - target_records: List of Strings, paths to tfrecord datasets to use as - targets. - config: A T object describing training config. - height: Int, height of the raw image. - width: Int, width of the raw image. - """ - # Embed query and target data. - (query_sequences_to_data, - target_sequences_to_data) = EmbedQueryTargetData( - query_records, target_records, config) - - qview = FLAGS.query_view - tview = FLAGS.target_view - - # Loop over query videos. - for task_i, data_i in query_sequences_to_data.iteritems(): - for task_j, data_j in target_sequences_to_data.iteritems(): - i_ims = data_i['images'] - i_embs = data_i['embeddings'] - query_embs = SmoothEmbeddings(i_embs[qview]) - query_ims = i_ims[qview] - - j_ims = data_j['images'] - j_embs = data_j['embeddings'] - target_embs = SmoothEmbeddings(j_embs[tview]) - target_ims = j_ims[tview] - - tf.logging.info('Generating %s imitating %s video.' % (task_j, task_i)) - vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_j, tview) - vid_name = vid_name.replace('/', '_') - GenerateImitationVideo(vid_name, query_ims, query_embs, - target_ims, target_embs, height, width) - - -def MultiImitationVideos( - query_records, target_records, config, height, width): - """Creates multi-imitation videos. - - This creates videos where every frame on the left is matched to a nearest - neighbor coming from a set of multiple embedded target videos. - - Args: - query_records: List of Strings, paths to tfrecord datasets to use as - queries. - target_records: List of Strings, paths to tfrecord datasets to use as - targets. - config: A T object describing training config. - height: Int, height of the raw image. - width: Int, width of the raw image. - """ - # Embed query and target data. - (query_sequences_to_data, - target_sequences_to_data) = EmbedQueryTargetData( - query_records, target_records, config) - - qview = FLAGS.query_view - tview = FLAGS.target_view - - # Loop over query videos. - for task_i, data_i in query_sequences_to_data.iteritems(): - i_ims = data_i['images'] - i_embs = data_i['embeddings'] - query_embs = SmoothEmbeddings(i_embs[qview]) - query_ims = i_ims[qview] - - all_target_embs = [] - all_target_ims = [] - - # If num_imitation_vids is -1, add all seq embeddings to the target set. - if FLAGS.num_multi_targets == -1: - num_multi_targets = len(target_sequences_to_data) - else: - # Else, add some specified number of seq embeddings to the target set. - num_multi_targets = FLAGS.num_multi_targets - for j in range(num_multi_targets): - task_j = target_sequences_to_data.keys()[j] - data_j = target_sequences_to_data[task_j] - print('Adding %s to target set' % task_j) - j_ims = data_j['images'] - j_embs = data_j['embeddings'] - - target_embs = SmoothEmbeddings(j_embs[tview]) - target_ims = j_ims[tview] - all_target_embs.extend(target_embs) - all_target_ims.extend(target_ims) - - # Generate a "j imitating i" video. - tf.logging.info('Generating all imitating %s video.' % task_i) - vid_name = 'q%sv%s_multiv%s' % (task_i, qview, tview) - vid_name = vid_name.replace('/', '_') - GenerateImitationVideo(vid_name, query_ims, query_embs, - all_target_ims, all_target_embs, height, width) - - -def SameSequenceVideos(query_records, config, height, width): - """Generate same sequence, cross-view imitation videos.""" - batch_size = config.data.embed_batch_size - - # Choose an estimator based on training strategy. - estimator = get_estimator(config, FLAGS.checkpointdir) - - # Choose a checkpoint path to restore. - checkpointdir = FLAGS.checkpointdir - checkpoint_path = os.path.join(checkpointdir, - 'model.ckpt-%s' % FLAGS.checkpoint_iter) - - # Embed num_sequences query sequences, store embeddings and image strings in - # query_sequences_to_data. - sequences_to_data = {} - for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference( - query_records, checkpoint_path, batch_size, - num_sequences=FLAGS.num_query_sequences): - sequences_to_data[seqname] = { - 'embeddings': view_embeddings, - 'images': view_raw_image_strings, - } - - # Loop over query videos. - qview = FLAGS.query_view - tview = FLAGS.target_view - for task_i, data_i in sequences_to_data.iteritems(): - ims = data_i['images'] - embs = data_i['embeddings'] - query_embs = SmoothEmbeddings(embs[qview]) - query_ims = ims[qview] - - target_embs = SmoothEmbeddings(embs[tview]) - target_ims = ims[tview] - - tf.logging.info('Generating %s imitating %s video.' % (task_i, task_i)) - vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_i, tview) - vid_name = vid_name.replace('/', '_') - GenerateImitationVideo(vid_name, query_ims, query_embs, - target_ims, target_embs, height, width) - - -def EmbedQueryTargetData(query_records, target_records, config): - """Embeds the full set of query_records and target_records. - - Args: - query_records: List of Strings, paths to tfrecord datasets to use as - queries. - target_records: List of Strings, paths to tfrecord datasets to use as - targets. - config: A T object describing training config. - - Returns: - query_sequences_to_data: A dict holding 'embeddings' and 'images' - target_sequences_to_data: A dict holding 'embeddings' and 'images' - """ - batch_size = config.data.embed_batch_size - - # Choose an estimator based on training strategy. - estimator = get_estimator(config, FLAGS.checkpointdir) - - # Choose a checkpoint path to restore. - checkpointdir = FLAGS.checkpointdir - checkpoint_path = os.path.join(checkpointdir, - 'model.ckpt-%s' % FLAGS.checkpoint_iter) - - # Embed num_sequences query sequences, store embeddings and image strings in - # query_sequences_to_data. - num_query_sequences = FLAGS.num_query_sequences - num_target_sequences = FLAGS.num_target_sequences - query_sequences_to_data = {} - for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference( - query_records, checkpoint_path, batch_size, - num_sequences=num_query_sequences): - query_sequences_to_data[seqname] = { - 'embeddings': view_embeddings, - 'images': view_raw_image_strings, - } - - if (query_records == target_records) and ( - num_query_sequences == num_target_sequences): - target_sequences_to_data = query_sequences_to_data - else: - # Embed num_sequences target sequences, store embeddings and image strings - # in sequences_to_data. - target_sequences_to_data = {} - for (view_embeddings, view_raw_image_strings, - seqname) in estimator.inference( - target_records, checkpoint_path, batch_size, - num_sequences=num_target_sequences): - target_sequences_to_data[seqname] = { - 'embeddings': view_embeddings, - 'images': view_raw_image_strings, - } - return query_sequences_to_data, target_sequences_to_data - - -def main(_): - # Parse config dict from yaml config files / command line flags. - config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params) - - # Get tables to embed. - query_records_dir = FLAGS.query_records_dir - query_records = util.GetFilesRecursively(query_records_dir) - - target_records_dir = FLAGS.target_records_dir - target_records = util.GetFilesRecursively(target_records_dir) - - height = config.data.raw_height - width = config.data.raw_width - mode = FLAGS.mode - if mode == 'multi': - # Generate videos where target set is composed of multiple videos. - MultiImitationVideos(query_records, target_records, config, - height, width) - elif mode == 'single': - # Generate videos where target set is a single video. - SingleImitationVideos(query_records, target_records, config, - height, width) - elif mode == 'same': - # Generate videos where target set is the same as query, but diff view. - SameSequenceVideos(query_records, config, height, width) - else: - raise ValueError('Unknown mode %s' % mode) - -if __name__ == '__main__': - tf.app.run() diff --git a/research/tcn/labeled_eval.py b/research/tcn/labeled_eval.py deleted file mode 100644 index a28856a13cc8db03394a615a46cff90f4aff8c75..0000000000000000000000000000000000000000 --- a/research/tcn/labeled_eval.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Generates test Recall@K statistics on labeled classification problems.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import defaultdict -import os -import numpy as np -from sklearn.metrics.pairwise import pairwise_distances -from six.moves import xrange -import data_providers -from estimators.get_estimator import get_estimator -from utils import util -import tensorflow as tf -tf.logging.set_verbosity(tf.logging.INFO) - - -tf.flags.DEFINE_string( - 'config_paths', '', - """ - Path to a YAML configuration files defining FLAG values. Multiple files - can be separated by the `#` symbol. Files are merged recursively. Setting - a key in these files is equivalent to setting the FLAG value with - the same name. - """) -tf.flags.DEFINE_string( - 'model_params', '{}', 'YAML configuration string for the model parameters.') -tf.app.flags.DEFINE_string( - 'mode', 'validation', - 'Which dataset to evaluate: `validation` | `test`.') -tf.app.flags.DEFINE_string('master', 'local', - 'BNS name of the TensorFlow master to use') -tf.app.flags.DEFINE_string( - 'checkpoint_iter', '', 'Evaluate this specific checkpoint.') -tf.app.flags.DEFINE_string( - 'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.') -tf.app.flags.DEFINE_string('outdir', '/tmp/tcn', 'Path to write summaries to.') -FLAGS = tf.app.flags.FLAGS - - -def nearest_cross_sequence_neighbors(data, tasks, n_neighbors=1): - """Computes the n_neighbors nearest neighbors for every row in data. - - Args: - data: A np.float32 array of shape [num_data, embedding size] holding - an embedded validation / test dataset. - tasks: A list of strings of size [num_data] holding the task or sequence - name that each row belongs to. - n_neighbors: The number of knn indices to return for each row. - Returns: - indices: an np.int32 array of size [num_data, n_neighbors] holding the - n_neighbors nearest indices for every row in data. These are - restricted to be from different named sequences (as defined in `tasks`). - """ - - # Compute the pairwise sequence adjacency matrix from `tasks`. - num_data = data.shape[0] - tasks = np.array(tasks) - tasks = np.reshape(tasks, (num_data, 1)) - assert len(tasks.shape) == 2 - not_adjacent = (tasks != tasks.T) - - # Compute the symmetric pairwise distance matrix. - pdist = pairwise_distances(data, metric='sqeuclidean') - - # For every row in the pairwise distance matrix, only consider - # cross-sequence columns. - indices = np.zeros((num_data, n_neighbors), dtype=np.int32) - for idx in range(num_data): - # Restrict to cross_sequence neighbors. - distances = [( - pdist[idx][i], i) for i in xrange(num_data) if not_adjacent[idx][i]] - _, nearest_indices = zip(*sorted( - distances, key=lambda x: x[0])[:n_neighbors]) - indices[idx] = nearest_indices - return indices - - -def compute_cross_sequence_recall_at_k(retrieved_labels, labels, k_list): - """Compute recall@k for a given list of k values. - - Recall is one if an example of the same class is retrieved among the - top k nearest neighbors given a query example and zero otherwise. - Counting the recall for all examples and averaging the counts returns - recall@k score. - - Args: - retrieved_labels: 2-D Numpy array of KNN labels for every embedding. - labels: 1-D Numpy array of shape [number of data]. - k_list: List of k values to evaluate recall@k. - - Returns: - recall_list: List of recall@k values. - """ - kvalue_to_recall = dict(zip(k_list, np.zeros(len(k_list)))) - - # For each value of K. - for k in k_list: - matches = defaultdict(float) - counts = defaultdict(float) - # For each (row index, label value) in the query labels. - for i, label_value in enumerate(labels): - # Loop over the K nearest retrieved labels. - if label_value in retrieved_labels[i][:k]: - matches[label_value] += 1. - # Increment the denominator. - counts[label_value] += 1. - kvalue_to_recall[k] = np.mean( - [matches[l]/counts[l] for l in matches]) - return [kvalue_to_recall[i] for i in k_list] - - -def compute_cross_sequence_recalls_at_k( - embeddings, labels, label_attr_keys, tasks, k_list, summary_writer, - training_step): - """Computes and reports the recall@k for each classification problem. - - This takes an embedding matrix and an array of multiclass labels - with size [num_data, number of classification problems], then - computes the average recall@k for each classification problem - as well as the average across problems. - - Args: - embeddings: A np.float32 array of size [num_data, embedding_size] - representing the embedded validation or test dataset. - labels: A np.int32 array of size [num_data, num_classification_problems] - holding multiclass labels for each embedding for each problem. - label_attr_keys: List of strings, holds the names of the classification - problems. - tasks: A list of strings describing the video sequence each row - belongs to. This is used to restrict the recall@k computation - to cross-sequence examples. - k_list: A list of ints, the k values to evaluate recall@k. - summary_writer: A tf.summary.FileWriter. - training_step: Int, the current training step we're evaluating. - """ - num_data = float(embeddings.shape[0]) - assert labels.shape[0] == num_data - - # Compute knn indices. - indices = nearest_cross_sequence_neighbors( - embeddings, tasks, n_neighbors=max(k_list)) - retrieved_labels = labels[indices] - - # Compute the recall@k for each classification problem. - recall_lists = [] - for idx, label_attr in enumerate(label_attr_keys): - problem_labels = labels[:, idx] - # Take all indices, all k labels for the problem indexed by idx. - problem_retrieved = retrieved_labels[:, :, idx] - recall_list = compute_cross_sequence_recall_at_k( - retrieved_labels=problem_retrieved, - labels=problem_labels, - k_list=k_list) - recall_lists.append(recall_list) - for (k, recall) in zip(k_list, recall_list): - recall_error = 1-recall - summ = tf.Summary(value=[tf.Summary.Value( - tag='validation/classification/%s error@top%d' % ( - label_attr, k), - simple_value=recall_error)]) - print('%s recall@K=%d' % (label_attr, k), recall_error) - summary_writer.add_summary(summ, int(training_step)) - - # Report an average recall@k across problems. - recall_lists = np.array(recall_lists) - for i in range(recall_lists.shape[1]): - average_recall = np.mean(recall_lists[:, i]) - recall_error = 1 - average_recall - summ = tf.Summary(value=[tf.Summary.Value( - tag='validation/classification/average error@top%d' % k_list[i], - simple_value=recall_error)]) - print('Average recall@K=%d' % k_list[i], recall_error) - summary_writer.add_summary(summ, int(training_step)) - - -def evaluate_once( - estimator, input_fn_by_view, batch_size, checkpoint_path, - label_attr_keys, embedding_size, num_views, k_list): - """Compute the recall@k for a given checkpoint path. - - Args: - estimator: an `Estimator` object to evaluate. - input_fn_by_view: An input_fn to an `Estimator's` predict method. Takes - a view index and returns a dict holding ops for getting raw images for - the view. - batch_size: Int, size of the labeled eval batch. - checkpoint_path: String, path to the specific checkpoint being evaluated. - label_attr_keys: A list of Strings, holding each attribute name. - embedding_size: Int, the size of the embedding. - num_views: Int, number of views in the dataset. - k_list: List of ints, list of K values to compute recall at K for. - """ - feat_matrix = np.zeros((0, embedding_size)) - label_vect = np.zeros((0, len(label_attr_keys))) - tasks = [] - eval_tensor_keys = ['embeddings', 'tasks', 'classification_labels'] - - # Iterate all views in the dataset. - for view_index in range(num_views): - # Set up a graph for embedding entire dataset. - predictions = estimator.inference( - input_fn_by_view(view_index), checkpoint_path, - batch_size, predict_keys=eval_tensor_keys) - - # Enumerate predictions. - for i, p in enumerate(predictions): - if i % 100 == 0: - tf.logging.info('Embedded %d images for view %d' % (i, view_index)) - - label = p['classification_labels'] - task = p['tasks'] - embedding = p['embeddings'] - - # Collect (embedding, label, task) data. - feat_matrix = np.append(feat_matrix, [embedding], axis=0) - label_vect = np.append(label_vect, [label], axis=0) - tasks.append(task) - - # Compute recall statistics. - ckpt_step = int(checkpoint_path.split('-')[-1]) - summary_dir = os.path.join(FLAGS.outdir, 'labeled_eval_summaries') - summary_writer = tf.summary.FileWriter(summary_dir) - compute_cross_sequence_recalls_at_k( - feat_matrix, label_vect, label_attr_keys, tasks, k_list, - summary_writer, ckpt_step) - - -def get_labeled_tables(config): - """Gets either labeled test or validation tables, based on flags.""" - # Get a list of filenames corresponding to labeled data. - mode = FLAGS.mode - if mode == 'validation': - labeled_tables = util.GetFilesRecursively(config.data.labeled.validation) - elif mode == 'test': - labeled_tables = util.GetFilesRecursively(config.data.labeled.test) - else: - raise ValueError('Unknown dataset: %s' % mode) - return labeled_tables - - -def main(_): - """Runs main labeled eval loop.""" - # Parse config dict from yaml config files / command line flags. - config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params) - - # Choose an estimator based on training strategy. - checkpointdir = FLAGS.checkpointdir - estimator = get_estimator(config, checkpointdir) - - # Get data configs. - image_attr_keys = config.data.labeled.image_attr_keys - label_attr_keys = config.data.labeled.label_attr_keys - embedding_size = config.embedding_size - num_views = config.data.num_views - k_list = config.val.recall_at_k_list - batch_size = config.data.batch_size - - # Get either labeled validation or test tables. - labeled_tables = get_labeled_tables(config) - - def input_fn_by_view(view_index): - """Returns an input_fn for use with a tf.Estimator by view.""" - def input_fn(): - # Get raw labeled images. - (preprocessed_images, labels, - tasks) = data_providers.labeled_data_provider( - labeled_tables, - estimator.preprocess_data, view_index, image_attr_keys, - label_attr_keys, batch_size=batch_size) - return { - 'batch_preprocessed': preprocessed_images, - 'tasks': tasks, - 'classification_labels': labels, - }, None - return input_fn - - # If evaluating a specific checkpoint, do that. - if FLAGS.checkpoint_iter: - checkpoint_path = os.path.join( - '%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter)) - evaluate_once( - estimator, input_fn_by_view, batch_size, checkpoint_path, - label_attr_keys, embedding_size, num_views, k_list) - else: - for checkpoint_path in tf.contrib.training.checkpoints_iterator( - checkpointdir): - evaluate_once( - estimator, input_fn_by_view, batch_size, checkpoint_path, - label_attr_keys, embedding_size, num_views, k_list) - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/tcn/labeled_eval_test.py b/research/tcn/labeled_eval_test.py deleted file mode 100644 index e586e2181b61c2dca0651304a3772a9b22b24232..0000000000000000000000000000000000000000 --- a/research/tcn/labeled_eval_test.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for tcn.labeled_eval.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import labeled_eval -import tensorflow as tf - - -class LabeledEvalTest(tf.test.TestCase): - - def testNearestCrossSequenceNeighbors(self): - # Generate embeddings. - num_data = 64 - embedding_size = 4 - num_tasks = 8 - n_neighbors = 2 - data = np.random.randn(num_data, embedding_size) - tasks = np.repeat(range(num_tasks), num_data // num_tasks) - - # Get nearest cross-sequence indices. - indices = labeled_eval.nearest_cross_sequence_neighbors( - data, tasks, n_neighbors=n_neighbors) - - # Assert that no nearest neighbor indices come from the same task. - repeated_tasks = np.tile(np.reshape(tasks, (num_data, 1)), n_neighbors) - self.assertTrue(np.all(np.not_equal(repeated_tasks, tasks[indices]))) - - def testPerfectCrossSequenceRecall(self): - # Make sure cross-sequence recall@k returns 1.0 for near-duplicate features. - embeddings = np.random.randn(10, 2) - embeddings[5:, :] = 0.00001 + embeddings[:5, :] - tasks = np.repeat([0, 1], 5) - labels = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) - # find k=1, k=2 nearest neighbors. - k_list = [1, 2] - - # Compute knn indices. - indices = labeled_eval.nearest_cross_sequence_neighbors( - embeddings, tasks, n_neighbors=max(k_list)) - retrieved_labels = labels[indices] - recall_list = labeled_eval.compute_cross_sequence_recall_at_k( - retrieved_labels=retrieved_labels, - labels=labels, - k_list=k_list) - self.assertTrue(np.allclose( - np.array(recall_list), np.array([1.0, 1.0]))) - - def testRelativeRecall(self): - # Make sure cross-sequence recall@k is strictly non-decreasing over k. - num_data = 100 - num_tasks = 10 - embeddings = np.random.randn(100, 5) - tasks = np.repeat(range(num_tasks), num_data // num_tasks) - labels = np.random.randint(0, 5, 100) - - k_list = [1, 2, 4, 8, 16, 32, 64] - indices = labeled_eval.nearest_cross_sequence_neighbors( - embeddings, tasks, n_neighbors=max(k_list)) - retrieved_labels = labels[indices] - recall_list = labeled_eval.compute_cross_sequence_recall_at_k( - retrieved_labels=retrieved_labels, - labels=labels, - k_list=k_list) - recall_list_sorted = sorted(recall_list) - self.assertTrue(np.allclose( - np.array(recall_list), np.array(recall_list_sorted))) - -if __name__ == "__main__": - tf.test.main() diff --git a/research/tcn/model.py b/research/tcn/model.py deleted file mode 100644 index 91db1b3e1125ce20d26892ac0f886bd8b4333e5e..0000000000000000000000000000000000000000 --- a/research/tcn/model.py +++ /dev/null @@ -1,410 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model implementations.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from abc import ABCMeta -from abc import abstractmethod -import tensorflow as tf -import tensorflow.contrib.slim as slim -from tensorflow.contrib.slim.python.slim.nets import inception -from tensorflow.contrib.slim.python.slim.nets import resnet_v2 as resnet_v2 -from tensorflow.contrib.slim.python.slim.nets import resnet_utils as resnet_utils - - -def get_embedder( - embedder_strategy, config, images, is_training, reuse=False, - l2_normalize_embedding=True): - """Returns an embedder based on config. - - Args: - embedder_strategy: String, name of embedder version to return. - config: LuaTable object, training config. - images: 4-D float `Tensor` containing batch images. - is_training: Boolean or placeholder for boolean, - indicator for whether or not we're training. - reuse: Boolean: Reuse embedder variable scope. - l2_normalize_embedding: Boolean, whether or not to l2 normalize the - embedding. - Returns: - embedder: An `Embedder` object. - Raises: - ValueError: if unknown embedder_strategy specified. - """ - if embedder_strategy == 'inception_baseline': - pretrained_ckpt = config.inception_conv_ss_fc.pretrained_checkpoint - return InceptionBaselineEmbedder( - images, - pretrained_ckpt, - config.random_projection, - config.random_projection_dim) - - strategy_to_embedder = { - 'inception_conv_ss_fc': InceptionConvSSFCEmbedder, - 'resnet': ResnetEmbedder, - } - if embedder_strategy not in strategy_to_embedder: - raise ValueError('unknown embedder_strategy', embedder_strategy) - - embedding_size = config.embedding_size - l2_reg_weight = config.learning.l2_reg_weight - embedder = strategy_to_embedder[embedder_strategy]( - config[embedder_strategy], images, embedding_size, - is_training, embedding_l2=l2_normalize_embedding, - l2_reg_weight=l2_reg_weight, reuse=reuse) - return embedder - - -def build_inceptionv3_graph(images, endpoint, is_training, checkpoint, - reuse=False): - """Builds an InceptionV3 model graph. - - Args: - images: A 4-D float32 `Tensor` of batch images. - endpoint: String, name of the InceptionV3 endpoint. - is_training: Boolean, whether or not to build a training or inference graph. - checkpoint: String, path to the pretrained model checkpoint. - reuse: Boolean, whether or not we are reusing the embedder. - Returns: - inception_output: `Tensor` holding the InceptionV3 output. - inception_variables: List of inception variables. - init_fn: Function to initialize the weights (if not reusing, then None). - """ - with slim.arg_scope(inception.inception_v3_arg_scope()): - _, endpoints = inception.inception_v3( - images, num_classes=1001, is_training=is_training) - inception_output = endpoints[endpoint] - inception_variables = slim.get_variables_to_restore() - inception_variables = [ - i for i in inception_variables if 'global_step' not in i.name] - if is_training and not reuse: - init_saver = tf.train.Saver(inception_variables) - def init_fn(scaffold, sess): - del scaffold - init_saver.restore(sess, checkpoint) - else: - init_fn = None - return inception_output, inception_variables, init_fn - - -class InceptionBaselineEmbedder(object): - """Produces pre-trained InceptionV3 embeddings.""" - - def __init__(self, images, pretrained_ckpt, reuse=False, - random_projection=False, random_projection_dim=32): - # Build InceptionV3 graph. - (inception_output, - self.inception_variables, - self.init_fn) = build_inceptionv3_graph( - images, 'Mixed_7c', False, pretrained_ckpt, reuse) - - # Pool 8x8x2048 -> 1x1x2048. - embedding = slim.avg_pool2d(inception_output, [8, 8], stride=1) - embedding = tf.squeeze(embedding, [1, 2]) - - if random_projection: - embedding = tf.matmul( - embedding, tf.random_normal( - shape=[2048, random_projection_dim], seed=123)) - self.embedding = embedding - - -class PretrainedEmbedder(object): - """Base class for embedders that take pre-trained networks as input.""" - __metaclass__ = ABCMeta - - def __init__(self, config, images, embedding_size, is_training, - embedding_l2=True, l2_reg_weight=1e-6, reuse=False): - """Constructor. - - Args: - config: A T object holding training config. - images: A 4-D float32 `Tensor` holding images to embed. - embedding_size: Int, the size of the embedding. - is_training: Boolean, whether or not this is a training or inference-time - graph. - embedding_l2: Boolean, whether or not to l2 normalize the embedding. - l2_reg_weight: Float, weight applied to l2 weight regularization. - reuse: Boolean, whether or not we're reusing this graph. - """ - # Pull out all the embedder hyperparameters. - self._config = config - self._embedding_size = embedding_size - self._l2_reg_weight = l2_reg_weight - self._embedding_l2 = embedding_l2 - self._is_training = is_training - self._reuse = reuse - - # Pull out pretrained hparams. - pretrained_checkpoint = config.pretrained_checkpoint - pretrained_layer = config.pretrained_layer - pretrained_keep_prob = config.dropout.keep_pretrained - - # Build pretrained graph. - (pretrained_output, - self._pretrained_variables, - self.init_fn) = self.build_pretrained_graph( - images, pretrained_layer, pretrained_checkpoint, is_training, reuse) - - # Optionally drop out the activations. - pretrained_output = slim.dropout( - pretrained_output, keep_prob=pretrained_keep_prob, - is_training=is_training) - self._pretrained_output = pretrained_output - - @abstractmethod - def build_pretrained_graph(self, images, layer, pretrained_checkpoint, - is_training, reuse): - """Builds the graph for the pre-trained network. - - Method to be overridden by implementations. - - Args: - images: A 4-D tf.float32 `Tensor` holding images to embed. - layer: String, defining which pretrained layer to take as input - to adaptation layers. - pretrained_checkpoint: String, path to a checkpoint used to load - pretrained weights. - is_training: Boolean, whether or not we're in training mode. - reuse: Boolean, whether or not to reuse embedder weights. - - Returns: - pretrained_output: A 2 or 3-d tf.float32 `Tensor` holding pretrained - activations. - """ - pass - - @abstractmethod - def construct_embedding(self): - """Builds an embedding function on top of images. - - Method to be overridden by implementations. - - Returns: - embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size] - holding the embedded images. - """ - pass - - def get_trainable_variables(self): - """Gets a list of variables to optimize.""" - if self._config.finetune: - return tf.trainable_variables() - else: - adaptation_only_vars = tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._adaptation_scope) - return adaptation_only_vars - - -class ResnetEmbedder(PretrainedEmbedder): - """Resnet TCN. - - ResnetV2 -> resnet adaptation layers -> optional l2 normalize -> embedding. - """ - - def __init__(self, config, images, embedding_size, is_training, - embedding_l2=True, l2_reg_weight=1e-6, reuse=False): - super(ResnetEmbedder, self).__init__( - config, images, embedding_size, is_training, embedding_l2, - l2_reg_weight, reuse) - - def build_pretrained_graph( - self, images, resnet_layer, checkpoint, is_training, reuse=False): - """See baseclass.""" - with slim.arg_scope(resnet_v2.resnet_arg_scope()): - _, endpoints = resnet_v2.resnet_v2_50( - images, is_training=is_training, reuse=reuse) - resnet_layer = 'resnet_v2_50/block%d' % resnet_layer - resnet_output = endpoints[resnet_layer] - resnet_variables = slim.get_variables_to_restore() - resnet_variables = [ - i for i in resnet_variables if 'global_step' not in i.name] - if is_training and not reuse: - init_saver = tf.train.Saver(resnet_variables) - def init_fn(scaffold, sess): - del scaffold - init_saver.restore(sess, checkpoint) - else: - init_fn = None - - return resnet_output, resnet_variables, init_fn - - def construct_embedding(self): - """Builds an embedding function on top of images. - - Method to be overridden by implementations. - - Returns: - embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size] - holding the embedded images. - """ - with tf.variable_scope('tcn_net', reuse=self._reuse) as vs: - self._adaptation_scope = vs.name - net = self._pretrained_output - - # Define some adaptation blocks on top of the pre-trained resnet output. - adaptation_blocks = [] - adaptation_block_params = [map( - int, i.split('_')) for i in self._config.adaptation_blocks.split('-')] - for i, (depth, num_units) in enumerate(adaptation_block_params): - block = resnet_v2.resnet_v2_block( - 'adaptation_block_%d' % i, base_depth=depth, num_units=num_units, - stride=1) - adaptation_blocks.append(block) - - # Stack them on top of the resent output. - net = resnet_utils.stack_blocks_dense( - net, adaptation_blocks, output_stride=None) - - # Average pool the output. - net = tf.reduce_mean(net, [1, 2], name='adaptation_pool', keep_dims=True) - - if self._config.emb_connection == 'fc': - # Use fully connected layer to project to embedding layer. - fc_hidden_sizes = self._config.fc_hidden_sizes - if fc_hidden_sizes == 'None': - fc_hidden_sizes = [] - else: - fc_hidden_sizes = map(int, fc_hidden_sizes.split('_')) - fc_hidden_keep_prob = self._config.dropout.keep_fc - net = tf.squeeze(net) - for fc_hidden_size in fc_hidden_sizes: - net = slim.layers.fully_connected(net, fc_hidden_size) - if fc_hidden_keep_prob < 1.0: - net = slim.dropout(net, keep_prob=fc_hidden_keep_prob, - is_training=self._is_training) - - # Connect last FC layer to embedding. - embedding = slim.layers.fully_connected(net, self._embedding_size, - activation_fn=None) - else: - # Use 1x1 conv layer to project to embedding layer. - embedding = slim.conv2d( - net, self._embedding_size, [1, 1], activation_fn=None, - normalizer_fn=None, scope='embedding') - embedding = tf.squeeze(embedding) - - # Optionally L2 normalize the embedding. - if self._embedding_l2: - embedding = tf.nn.l2_normalize(embedding, dim=1) - - return embedding - - def get_trainable_variables(self): - """Gets a list of variables to optimize.""" - if self._config.finetune: - return tf.trainable_variables() - else: - adaptation_only_vars = tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._adaptation_scope) - return adaptation_only_vars - - -class InceptionEmbedderBase(PretrainedEmbedder): - """Base class for embedders that take pre-trained InceptionV3 activations.""" - - def __init__(self, config, images, embedding_size, is_training, - embedding_l2=True, l2_reg_weight=1e-6, reuse=False): - super(InceptionEmbedderBase, self).__init__( - config, images, embedding_size, is_training, embedding_l2, - l2_reg_weight, reuse) - - def build_pretrained_graph( - self, images, inception_layer, checkpoint, is_training, reuse=False): - """See baseclass.""" - # Build InceptionV3 graph. - inception_output, inception_variables, init_fn = build_inceptionv3_graph( - images, inception_layer, is_training, checkpoint, reuse) - return inception_output, inception_variables, init_fn - - -class InceptionConvSSFCEmbedder(InceptionEmbedderBase): - """TCN Embedder V1. - - InceptionV3 (mixed_5d) -> conv layers -> spatial softmax -> - fully connected -> optional l2 normalize -> embedding. - """ - - def __init__(self, config, images, embedding_size, is_training, - embedding_l2=True, l2_reg_weight=1e-6, reuse=False): - super(InceptionConvSSFCEmbedder, self).__init__( - config, images, embedding_size, is_training, embedding_l2, - l2_reg_weight, reuse) - - # Pull out all the hyperparameters specific to this embedder. - self._additional_conv_sizes = config.additional_conv_sizes - self._conv_hidden_keep_prob = config.dropout.keep_conv - self._fc_hidden_sizes = config.fc_hidden_sizes - self._fc_hidden_keep_prob = config.dropout.keep_fc - - def construct_embedding(self): - """Builds a conv -> spatial softmax -> FC adaptation network.""" - is_training = self._is_training - normalizer_params = {'is_training': is_training} - with tf.variable_scope('tcn_net', reuse=self._reuse) as vs: - self._adaptation_scope = vs.name - with slim.arg_scope( - [slim.layers.conv2d], - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=slim.regularizers.l2_regularizer( - self._l2_reg_weight), - biases_regularizer=slim.regularizers.l2_regularizer( - self._l2_reg_weight)): - with slim.arg_scope( - [slim.layers.fully_connected], - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=slim.regularizers.l2_regularizer( - self._l2_reg_weight), - biases_regularizer=slim.regularizers.l2_regularizer( - self._l2_reg_weight)): - - # Input to embedder is pre-trained inception output. - net = self._pretrained_output - - # Optionally add more conv layers. - for num_filters in self._additional_conv_sizes: - net = slim.layers.conv2d( - net, num_filters, kernel_size=[3, 3], stride=[1, 1]) - net = slim.dropout(net, keep_prob=self._conv_hidden_keep_prob, - is_training=is_training) - - # Take the spatial soft arg-max of the last convolutional layer. - # This is a form of spatial attention over the activations. - # See more here: http://arxiv.org/abs/1509.06113. - net = tf.contrib.layers.spatial_softmax(net) - self.spatial_features = net - - # Add fully connected layers. - net = slim.layers.flatten(net) - for fc_hidden_size in self._fc_hidden_sizes: - net = slim.layers.fully_connected(net, fc_hidden_size) - if self._fc_hidden_keep_prob < 1.0: - net = slim.dropout(net, keep_prob=self._fc_hidden_keep_prob, - is_training=is_training) - - # Connect last FC layer to embedding. - net = slim.layers.fully_connected(net, self._embedding_size, - activation_fn=None) - - # Optionally L2 normalize the embedding. - if self._embedding_l2: - net = tf.nn.l2_normalize(net, dim=1) - - return net diff --git a/research/tcn/preprocessing.py b/research/tcn/preprocessing.py deleted file mode 100644 index 707625aaa76a12ca4c65edac7c921044b8a1bf90..0000000000000000000000000000000000000000 --- a/research/tcn/preprocessing.py +++ /dev/null @@ -1,686 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Image preprocessing helpers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cv2 -from scipy import ndimage -import tensorflow as tf -from tensorflow.python.ops import control_flow_ops - - -def apply_with_random_selector(x, func, num_cases): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - - TODO(coreylynch): add as a dependency, when slim or tensorflow/models are - pipfied. - Source: - https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py - - Args: - x: input Tensor. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) - # Pass the real x only to one of the func calls. - return control_flow_ops.merge([ - func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) - for case in range(num_cases)])[0] - - -def distorted_bounding_box_crop(image, - bbox, - min_object_covered=0.1, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.05, 1.0), - max_attempts=100, - scope=None): - """Generates cropped_image using a one of the bboxes randomly distorted. - - TODO(coreylynch): add as a dependency, when slim or tensorflow/models are - pipfied. - Source: - https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py - - See `tf.image.sample_distorted_bounding_box` for more documentation. - - Args: - image: 3-D Tensor of image (it will be converted to floats in [0, 1]). - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole - image. - min_object_covered: An optional `float`. Defaults to `0.1`. The cropped - area of the image must contain at least this fraction of any bounding box - supplied. - aspect_ratio_range: An optional list of `floats`. The cropped area of the - image must have an aspect ratio = width / height within this range. - area_range: An optional list of `floats`. The cropped area of the image - must contain a fraction of the supplied image within in this range. - max_attempts: An optional `int`. Number of attempts at generating a cropped - region of the image of the specified constraints. After `max_attempts` - failures, return the entire image. - scope: Optional scope for name_scope. - Returns: - A tuple, a 3-D Tensor cropped_image and the distorted bbox - """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an - # allowed range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - - # Crop the image to the specified bounding box. - cropped_image = tf.slice(image, bbox_begin, bbox_size) - return cropped_image, distort_bbox - - -def distort_color(image, color_ordering=0, fast_mode=True, scope=None): - """Distort the color of a Tensor image. - - TODO(coreylynch): add as a dependency, when slim or tensorflow/models are - pipfied. - Source: - https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather than adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - Args: - image: 3-D Tensor containing single image in [0, 1]. - color_ordering: Python int, a type of distortion (valid values: 0-3). - fast_mode: Avoids slower ops (random_hue and random_contrast) - scope: Optional scope for name_scope. - Returns: - 3-D Tensor color-distorted image on range [0, 1] - Raises: - ValueError: if color_ordering not in [0, 3] - """ - with tf.name_scope(scope, 'distort_color', [image]): - if fast_mode: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - elif color_ordering == 2: - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - elif color_ordering == 3: - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - raise ValueError('color_ordering must be in [0, 3]') - - # The random_* ops do not necessarily clamp. - return tf.clip_by_value(image, 0.0, 1.0) - - -def crop_center(image): - """Returns a cropped square image.""" - shape = tf.shape(image) - new_shape = tf.minimum(shape[0], shape[1]) - offset_y = tf.maximum(shape[0] - shape[1], 0) // 2 - offset_x = tf.maximum(shape[1] - shape[0], 0) // 2 - image = tf.image.crop_to_bounding_box( - image, offset_y, offset_x, new_shape, new_shape) - return image - - -def pad(image): - """Returns an image padded to be square.""" - shape = tf.shape(image) - new_shape = tf.maximum(shape[0], shape[1]) - height = shape[0] - width = shape[1] - offset_x = tf.maximum((height-width), 0) // 2 - offset_y = tf.maximum((width-height), 0) // 2 - image = tf.image.pad_to_bounding_box( - image, offset_y, offset_x, new_shape, new_shape) - return image - - -def pad_200(image): - """Returns an image padded width-padded with 200 pixels.""" - shape = tf.shape(image) - image = tf.image.pad_to_bounding_box( - image, 0, 200, shape[0], shape[1]+400) - shape = tf.shape(image) - new_shape = tf.minimum(shape[0], shape[1]) - offset_y = tf.maximum(shape[0] - shape[1], 0) // 2 - offset_x = tf.maximum(shape[1] - shape[0], 0) // 2 - image = tf.image.crop_to_bounding_box( - image, offset_y, offset_x, new_shape, new_shape) - return image - - -def pad_crop_central(image, central_fraction=0.875): - """Pads the image to the maximum length, crops the central fraction.""" - # Pad the image to be square. - image = pad(image) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - image = tf.image.central_crop(image, central_fraction=central_fraction) - return image - - -def crop_image_by_strategy(image, cropping): - """Crops an image according to a strategy defined in config. - - Args: - image: 3-d image tensor. - cropping: str, name of cropping strategy. - Returns: - image: cropped image. - Raises: - ValueError: When unknown cropping strategy is specified. - """ - strategy_to_method = { - 'crop_center': crop_center, - 'pad': pad, - 'pad200': pad_200, - 'pad_crop_central': pad_crop_central - } - tf.logging.info('Cropping strategy: %s.' % cropping) - if cropping not in strategy_to_method: - raise ValueError('Unknown cropping strategy: %s' % cropping) - return strategy_to_method[cropping](image) - - -def scale_augment_crop(image, central_bbox, area_range, min_object_covered): - """Training time scale augmentation. - - Args: - image: 3-d float tensor. - central_bbox: Bounding box defining the central region of interest. - area_range: Range of allowed areas for the augmented bounding box. - min_object_covered: Constraint for the fraction of original image in - augmented bounding box. - Returns: - distort_image: The scaled, cropped image. - """ - (distorted_image, _) = distorted_bounding_box_crop( - image, central_bbox, area_range=area_range, - aspect_ratio_range=(1.0, 1.0), - min_object_covered=min_object_covered) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([None, None, 3]) - return distorted_image - - -def scale_to_inception_range(image): - """Scales an image in the range [0,1] to [-1,1] as expected by inception.""" - # Assert that incoming images have been properly scaled to [0,1]. - with tf.control_dependencies( - [tf.assert_less_equal(tf.reduce_max(image), 1.), - tf.assert_greater_equal(tf.reduce_min(image), 0.)]): - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image - - -def resize_image(image, height, width): - """Resizes an image to a target height and width.""" - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], align_corners=False) - image = tf.squeeze(image, [0]) - return image - - -def crop_or_pad(image, curr_height, curr_width, new, height=True, crop=True): - """Crops or pads an image. - - Args: - image: 3-D float32 `Tensor` image. - curr_height: Int, current height. - curr_width: Int, current width. - new: Int, new width or height. - height: Boolean, cropping or padding for height. - crop: Boolean, True if we're cropping, False if we're padding. - Returns: - image: 3-D float32 `Tensor` image. - """ - # Crop the image to fit the new shape. - abs_diff = tf.abs(new-curr_height)//2 if height else tf.abs(new-curr_width)//2 - offset_x = 0 if height else abs_diff - offset_y = abs_diff if height else 0 - - # We process height first, so always pad/crop to new height. - target_height = new - # We process height first, so pad/crop to new width only if not doing height. - target_width = curr_width if height else new - - if crop: - image = tf.image.crop_to_bounding_box( - image, offset_y, offset_x, target_height, target_width) - else: - image = tf.image.pad_to_bounding_box( - image, offset_y, offset_x, target_height, target_width) - return image - - -def get_central_bbox(min_side, new_size): - """Gets the central bounding box for an image. - - If image is square, returns bounding box [0,0,1,1]. - Otherwise, returns the bounding box containing the central - smallest side x smallest side square. - - Args: - min_side: Int, size of smallest side in pixels. - new_size: Int, resize image to a square of new_size x new_size pixels. - Returns: - bbox: A 4-D Int `Tensor`, holding the coordinates of the central bounding - box. - """ - max_shape = tf.cast(new_size, tf.float32) - min_shape = tf.cast(min_side, tf.float32) - top_xy = ((max_shape-min_shape)/2)/max_shape - bottom_xy = (min_shape+(max_shape-min_shape)/2)/max_shape - # Create a bbox for the center region of interest. - bbox = tf.stack([[[top_xy, top_xy, bottom_xy, bottom_xy]]]) - bbox.set_shape([1, 1, 4]) - return bbox - - -def pad_to_max(image, max_scale): - """Pads an image to max_scale times the current center crop size. - - E.g.: For an image with dimensions 1920x1080 and a max_scale of 1.5, - returns an image that is 1.5 * (1080x1080). - - Args: - image: 3-D float32 `Tensor` image. - max_scale: Float, maximum scale of the image, as a multiplier on the - central bounding box. - Returns: - image: 3-D float32 `Tensor` image. - """ - orig_shape = tf.shape(image) - orig_height = orig_shape[0] - orig_width = orig_shape[1] - - # Find the smallest side and corresponding new size. - min_side = tf.cast(tf.minimum(orig_height, orig_width), tf.float32) - new_shape = tf.cast(tf.sqrt(max_scale*min_side*min_side), tf.int32) - - # Crop or pad height. - # pylint: disable=g-long-lambda - image = tf.cond( - orig_height >= new_shape, - lambda: crop_or_pad( - image, orig_height, orig_width, new_shape, height=True, crop=True), - lambda: crop_or_pad( - image, orig_height, orig_width, new_shape, height=True, crop=False)) - - # Crop or pad width. - image = tf.cond( - orig_width >= new_shape, - lambda: crop_or_pad( - image, orig_height, orig_width, new_shape, height=False, crop=True), - lambda: crop_or_pad( - image, orig_height, orig_width, new_shape, height=False, crop=False)) - - # Get the bounding box of the original centered box in the new resized image. - original_bounding_box = get_central_bbox(min_side, new_shape) - return image, original_bounding_box - - -def scale_up_augmentation(image, max_scale): - """Scales an image randomly >100% up to some max scale.""" - # Pad to max size. - image, original_central_bbox = pad_to_max(image, max_scale) - - # Determine area range of the augmented crop, as a percentage of the - # new max area. - # aug_max == 100% of new max area. - aug_max = 1.0 - # aug_min == original_area/new_area == original_area/(max_scale*original_area) - # == 1/max_scale. - aug_min = 1.0/max_scale - area_range = (aug_min, aug_max) - # Since we're doing >100% scale, always have the full original crop in frame. - min_object_covered = 1.0 - # Get a random scaled, cropped image. - image = scale_augment_crop(image, original_central_bbox, area_range, - min_object_covered) - return image - - -def scale_down_augmentation(image, min_scale): - """Scales an image randomly <100% down to some min scale.""" - # Crop the center, and consider the whole image the bounding box ROI. - image = crop_center(image) - bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) - # Determine area range of the augmented crop, as a percentage of the - # original crop center area. - # aug_max == 100% of original area. - area_range = (min_scale, 1.0) - # Get a random scaled, cropped image. - image = scale_augment_crop(image, bbox, area_range, min_scale) - return image - - -def augment_image_scale(image, min_scale, max_scale, p_scale_up): - """Training time scale augmentation. - - Args: - image: 3-d float tensor representing image. - min_scale: minimum scale augmentation allowed, as a fraction of the - central min_side * min_side area of the original image. - max_scale: maximum scale augmentation allowed, as a fraction of the - central min_side * min_side area of the original image. - p_scale_up: Fraction of images scaled up. - Returns: - image: The scale-augmented image. - """ - assert max_scale >= 1.0 - assert min_scale <= 1.0 - if min_scale == max_scale == 1.0: - tf.logging.info('Min and max scale are 1.0, don`t augment.') - # Do no augmentation, just crop the center. - return crop_center(image) - elif (max_scale == 1.0) and (min_scale < 1.0): - tf.logging.info('Max scale is 1.0, only scale down augment.') - # Always do <100% augmentation. - return scale_down_augmentation(image, min_scale) - elif (min_scale == 1.0) and (max_scale > 1.0): - tf.logging.info('Min scale is 1.0, only scale up augment.') - # Always do >100% augmentation. - return scale_up_augmentation(image, max_scale) - else: - tf.logging.info('Sample both augmentations.') - # Choose to scale image up or down. - rn = tf.random_uniform([], minval=0., maxval=1., dtype=tf.float32) - image = tf.cond(rn >= p_scale_up, - lambda: scale_up_augmentation(image, max_scale), - lambda: scale_down_augmentation(image, min_scale)) - return image - - -def decode_image(image_str): - """Decodes a jpeg-encoded image string into a image in range [0,1].""" - # Decode jpeg string into np.uint8 tensor. - image = tf.image.decode_jpeg(image_str, channels=3) - # Convert the image to range [0,1]. - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - return image - - -def decode_images(image_strs): - """Decodes a tensor of image strings.""" - return tf.map_fn(decode_image, image_strs, dtype=tf.float32) - - -def preprocess_training_images(images, height, width, min_scale, max_scale, - p_scale_up, aug_color=True, fast_mode=True): - """Preprocesses a batch of images for training. - - This applies training-time scale and color augmentation, crops/resizes, - and scales images to the [-1,1] range expected by pre-trained Inception nets. - - Args: - images: A 4-D float32 `Tensor` holding raw images to be preprocessed. - height: Int, height in pixels to resize image to. - width: Int, width in pixels to resize image to. - min_scale: Float, minimum scale augmentation allowed, as a fraction of the - central min_side * min_side area of the original image. - max_scale: Float, maximum scale augmentation allowed, as a fraction of the - central min_side * min_side area of the original image. - p_scale_up: Float, fraction of images scaled up. - aug_color: Whether or not to do color augmentation. - fast_mode: Boolean, avoids slower ops (random_hue and random_contrast). - Returns: - preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images. - """ - def _prepro_train(im): - """Map this preprocessing function over each image in the batch.""" - return preprocess_training_image( - im, height, width, min_scale, max_scale, p_scale_up, - aug_color=aug_color, fast_mode=fast_mode) - return tf.map_fn(_prepro_train, images) - - -def preprocess_training_image( - image, height, width, min_scale, max_scale, p_scale_up, - aug_color=True, fast_mode=True): - """Preprocesses an image for training. - - Args: - image: A 3-d float tensor representing the image. - height: Target image height. - width: Target image width. - min_scale: Minimum scale of bounding box (as a percentage of full - bounding box) used to crop image during scale augmentation. - max_scale: Minimum scale of bounding box (as a percentage of full - bounding box) used to crop image during scale augmentation. - p_scale_up: Fraction of images to scale >100%. - aug_color: Whether or not to do color augmentation. - fast_mode: Avoids slower ops (random_hue and random_contrast). - Returns: - scaled_image: An scaled image tensor in the range [-1,1]. - """ - # Get a random scaled, cropped image. - image = augment_image_scale(image, min_scale, max_scale, p_scale_up) - - # Resize image to desired height, width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], align_corners=False) - image = tf.squeeze(image, [0]) - - # Optionally augment the color. - # pylint: disable=g-long-lambda - if aug_color: - image = apply_with_random_selector( - image, - lambda x, ordering: distort_color( - x, ordering, fast_mode=fast_mode), num_cases=4) - - # Scale to [-1,1] range as expected by inception. - scaled_image = scale_to_inception_range(image) - return scaled_image - - -def preprocess_test_image(image, height, width, crop_strategy): - """Preprocesses an image for test/inference. - - Args: - image: A 3-d float tensor representing the image. - height: Target image height. - width: Target image width. - crop_strategy: String, name of the strategy used to crop test-time images. - Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'. - Returns: - scaled_image: An scaled image tensor in the range [-1,1]. - """ - image = crop_image_by_strategy(image, crop_strategy) - # Resize. - image = resize_image(image, height, width) - # Scale the input range to [-1,1] as expected by inception. - image = scale_to_inception_range(image) - return image - - -def preprocess_test_images(images, height, width, crop_strategy): - """Apply test-time preprocessing to a batch of images. - - This crops images (given a named strategy for doing so), resizes them, - and scales them to the [-1,1] range expected by pre-trained Inception nets. - - Args: - images: A 4-D float32 `Tensor` holding raw images to be preprocessed. - height: Int, height in pixels to resize image to. - width: Int, width in pixels to resize image to. - crop_strategy: String, name of the strategy used to crop test-time images. - Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'. - Returns: - preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images. - """ - def _prepro_test(im): - """Map this preprocessing function over each image in the batch.""" - return preprocess_test_image(im, height, width, crop_strategy) - if len(images.shape) == 3: - return _prepro_test(images) - else: - return tf.map_fn(_prepro_test, images) - - -def preprocess_images( - images, is_training, height, width, - min_scale=1.0, max_scale=1.0, p_scale_up=0.0, - aug_color=True, fast_mode=True, - crop_strategy='pad_crop_central'): - """Preprocess a batch of images. - - Args: - images: A 4-D float32 `Tensor` holding raw images to be preprocessed. - is_training: Boolean, whether to preprocess them for training or test. - height: Int, height in pixels to resize image to. - width: Int, width in pixels to resize image to. - min_scale: Float, minimum scale augmentation allowed, as a fraction of the - central min_side * min_side area of the original image. - max_scale: Float, maximum scale augmentation allowed, as a fraction of the - central min_side * min_side area of the original image. - p_scale_up: Float, fraction of images scaled up. - aug_color: Whether or not to do color augmentation. - fast_mode: Boolean, avoids slower ops (random_hue and random_contrast). - crop_strategy: String, name of the strategy used to crop test-time images. - Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'. - Returns: - preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images. - """ - if is_training: - return preprocess_training_images( - images, height, width, min_scale, max_scale, - p_scale_up, aug_color, fast_mode) - else: - return preprocess_test_images( - images, height, width, crop_strategy) - - -def cv2rotateimage(image, angle): - """Efficient rotation if 90 degrees rotations, slow otherwise. - - Not a tensorflow function, using cv2 and scipy on numpy arrays. - - Args: - image: a numpy array with shape [height, width, channels]. - angle: the rotation angle in degrees in the range [-180, 180]. - Returns: - The rotated image. - """ - # Limit angle to [-180, 180] degrees. - assert angle <= 180 and angle >= -180 - if angle == 0: - return image - # Efficient rotations. - if angle == -90: - image = cv2.transpose(image) - image = cv2.flip(image, 0) - elif angle == 90: - image = cv2.transpose(image) - image = cv2.flip(image, 1) - elif angle == 180 or angle == -180: - image = cv2.flip(image, 0) - image = cv2.flip(image, 1) - else: # Slow rotation. - image = ndimage.interpolation.rotate(image, 270) - return image - - -def cv2resizeminedge(image, min_edge_size): - """Resize smallest edge of image to min_edge_size.""" - assert min_edge_size >= 0 - height, width = (image.shape[0], image.shape[1]) - new_height, new_width = (0, 0) - if height > width: - new_width = min_edge_size - new_height = int(height * new_width / float(width)) - else: - new_height = min_edge_size - new_width = int(width * new_height / float(height)) - return cv2.resize(image, (new_width, new_height), - interpolation=cv2.INTER_AREA) - - -def shapestring(array): - """Returns a compact string describing shape of an array.""" - shape = array.shape - s = str(shape[0]) - for i in range(1, len(shape)): - s += 'x' + str(shape[i]) - return s - - -def unscale_jpeg_encode(ims): - """Unscales pixel values and jpeg encodes preprocessed image. - - Args: - ims: A 4-D float32 `Tensor` holding preprocessed images. - Returns: - im_strings: A 1-D string `Tensor` holding images that have been unscaled - (reversing the inception [-1,1] scaling), and jpeg encoded. - """ - ims /= 2.0 - ims += 0.5 - ims *= 255.0 - ims = tf.clip_by_value(ims, 0, 255) - ims = tf.cast(ims, tf.uint8) - im_strings = tf.map_fn( - lambda x: tf.image.encode_jpeg(x, format='rgb', quality=100), - ims, dtype=tf.string) - return im_strings diff --git a/research/tcn/train.py b/research/tcn/train.py deleted file mode 100644 index f35cb4c6f4ea7346cf4af05bd164f896d063f073..0000000000000000000000000000000000000000 --- a/research/tcn/train.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Trains TCN models (and baseline comparisons).""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from estimators.get_estimator import get_estimator -from utils import util -import tensorflow as tf -tf.logging.set_verbosity(tf.logging.INFO) - -tf.flags.DEFINE_string( - 'config_paths', '', - """ - Path to a YAML configuration files defining FLAG values. Multiple files - can be separated by the `#` symbol. Files are merged recursively. Setting - a key in these files is equivalent to setting the FLAG value with - the same name. - """) -tf.flags.DEFINE_string( - 'model_params', '{}', 'YAML configuration string for the model parameters.') -tf.app.flags.DEFINE_string('master', 'local', - 'BNS name of the TensorFlow master to use') -tf.app.flags.DEFINE_string( - 'logdir', '/tmp/tcn', 'Directory where to write event logs.') -tf.app.flags.DEFINE_integer( - 'task', 0, 'Task id of the replica running the training.') -tf.app.flags.DEFINE_integer( - 'ps_tasks', 0, 'Number of tasks in the ps job. If 0 no ps job is used.') -FLAGS = tf.app.flags.FLAGS - - -def main(_): - """Runs main training loop.""" - # Parse config dict from yaml config files / command line flags. - config = util.ParseConfigsToLuaTable( - FLAGS.config_paths, FLAGS.model_params, save=True, logdir=FLAGS.logdir) - - # Choose an estimator based on training strategy. - estimator = get_estimator(config, FLAGS.logdir) - - # Run training - estimator.train() - -if __name__ == '__main__': - tf.app.run() diff --git a/research/tcn/utils/luatables.py b/research/tcn/utils/luatables.py deleted file mode 100644 index 565d038626a9a8717e36d0701a2e61ab3532bd18..0000000000000000000000000000000000000000 --- a/research/tcn/utils/luatables.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# pylint: disable=line-too-long,g-explicit-length-test -"""A convenience class replicating some lua table syntax with a python dict. - -In general, should behave like a dictionary except that we can use dot notation - to access keys. Users should be careful to only provide keys suitable for - instance variable names. - -Nota bene: do not use the key "keys" since it will collide with the method keys. - -Usage example: - ->>> t = T(a=5,b='kaw', c=T(v=[],x=33)) ->>> t.a -5 ->>> t.z = None ->>> print t -T(a=5, z=None, c=T(x=33, v=[]), b='kaw') - ->>> t2 = T({'h':'f','x':4}) ->>> t2 -T(h='f', x=4) ->>> t2['x'] -4 -""" - - -class T(object): - """Class for emulating lua tables.""" - - def __init__(self, *args, **kwargs): - if len(args) > 1 or (len(args) == 1 and len(kwargs) > 0): - errmsg = '''constructor only allows a single dict as a positional - argument or keyword arguments''' - raise ValueError(errmsg) - if len(args) == 1 and isinstance(args[0], dict): - self.__dict__.update(args[0]) - else: - self.__dict__.update(kwargs) - - def __repr__(self): - fmt = ', '.join('%s=%s' for i in range(len(self.__dict__))) - kwargstr = fmt % tuple( - x for tup in self.__dict__.items() for x in [str(tup[0]), repr(tup[1])]) - return 'T(' + kwargstr + ')' - - def __getitem__(self, key): - return self.__dict__[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __delitem__(self, key): - del self.__dict__[key] - - def __iter__(self): - return iter(self.__dict__) - - def __len__(self): - return len(self.__dict__) - - def keys(self): # Needed for dict(T( ... )) to work. - return self.__dict__.keys() - - def iteritems(self): - return self.__dict__.iteritems() diff --git a/research/tcn/utils/progress.py b/research/tcn/utils/progress.py deleted file mode 100644 index 1043261b5be743ea6a8fd45e7e21219999090f3a..0000000000000000000000000000000000000000 --- a/research/tcn/utils/progress.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""A utility class for reporting processing progress.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import datetime - - -class Progress(object): - """A utility class for reporting processing progress.""" - - def __init__(self, target_size): - self.target_size = target_size - self.current_size = 0 - self.start_time = datetime.datetime.now() - - def Update(self, current_size): - """Replaces internal current_size with current_size.""" - self.current_size = current_size - - def Add(self, size): - """Increments internal current_size by size.""" - self.current_size += size - - def __str__(self): - processed = 1e-5 + self.current_size / float(self.target_size) - current_time = datetime.datetime.now() - elapsed = current_time - self.start_time - eta = datetime.timedelta( - seconds=elapsed.total_seconds() / processed - elapsed.total_seconds()) - return "%d / %d (elapsed %s eta %s)" % ( - self.current_size, self.target_size, - str(elapsed).split(".")[0], - str(eta).split(".")[0]) diff --git a/research/tcn/utils/util.py b/research/tcn/utils/util.py deleted file mode 100644 index 9f50366e5a1adc6e57ce5a559c324eed48773ba3..0000000000000000000000000000000000000000 --- a/research/tcn/utils/util.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""General utility functions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np -import six -from utils.luatables import T -import tensorflow as tf -import yaml -from yaml.constructor import ConstructorError -# pylint: disable=invalid-name - - -def GetFilesRecursively(topdir): - """Gets all records recursively for some topdir. - - Args: - topdir: String, path to top directory. - Returns: - allpaths: List of Strings, full paths to all leaf records. - Raises: - ValueError: If there are no files found for this directory. - """ - assert topdir - topdir = os.path.expanduser(topdir) - allpaths = [] - for path, _, leaffiles in tf.gfile.Walk(topdir): - if leaffiles: - allpaths.extend([os.path.join(path, i) for i in leaffiles]) - if not allpaths: - raise ValueError('No files found for top directory %s' % topdir) - return allpaths - - -def NoDuplicatesConstructor(loader, node, deep=False): - """Check for duplicate keys.""" - mapping = {} - for key_node, value_node in node.value: - key = loader.construct_object(key_node, deep=deep) - value = loader.construct_object(value_node, deep=deep) - if key in mapping: - raise ConstructorError('while constructing a mapping', node.start_mark, - 'found duplicate key (%s)' % key, - key_node.start_mark) - mapping[key] = value - return loader.construct_mapping(node, deep) - - -def WriteConfigAsYaml(config, logdir, filename): - """Writes a config dict as yaml to logdir/experiment.yml.""" - if not tf.gfile.Exists(logdir): - tf.gfile.MakeDirs(logdir) - config_filename = os.path.join(logdir, filename) - with tf.gfile.GFile(config_filename, 'w') as f: - f.write(yaml.dump(config)) - tf.logging.info('wrote config to %s', config_filename) - - -def LoadConfigDict(config_paths, model_params): - """Loads config dictionary from specified yaml files or command line yaml.""" - - # Ensure that no duplicate keys can be loaded (causing pain). - yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, - NoDuplicatesConstructor) - - # Handle either ',' or '#' separated config lists, since borg will only - # accept '#'. - sep = ',' if ',' in config_paths else '#' - - # Load flags from config file. - final_config = {} - if config_paths: - for config_path in config_paths.split(sep): - config_path = config_path.strip() - if not config_path: - continue - config_path = os.path.abspath(config_path) - tf.logging.info('Loading config from %s', config_path) - with tf.gfile.GFile(config_path.strip()) as config_file: - config_flags = yaml.load(config_file) - final_config = DeepMergeDict(final_config, config_flags) - if model_params: - model_params = MaybeLoadYaml(model_params) - final_config = DeepMergeDict(final_config, model_params) - tf.logging.info('Final Config:\n%s', yaml.dump(final_config)) - return final_config - - -def MaybeLoadYaml(item): - """Parses item if it's a string. If it's a dictionary it's returned as-is.""" - if isinstance(item, six.string_types): - return yaml.load(item) - elif isinstance(item, dict): - return item - else: - raise ValueError('Got {}, expected YAML string or dict', type(item)) - - -def DeepMergeDict(dict_x, dict_y, path=None): - """Recursively merges dict_y into dict_x.""" - if path is None: path = [] - for key in dict_y: - if key in dict_x: - if isinstance(dict_x[key], dict) and isinstance(dict_y[key], dict): - DeepMergeDict(dict_x[key], dict_y[key], path + [str(key)]) - elif dict_x[key] == dict_y[key]: - pass # same leaf value - else: - dict_x[key] = dict_y[key] - else: - dict_x[key] = dict_y[key] - return dict_x - - -def ParseConfigsToLuaTable(config_paths, extra_model_params=None, - save=False, save_name='final_training_config.yml', - logdir=None): - """Maps config_paths and extra_model_params to a Luatable-like object.""" - # Parse config dict from yaml config files / command line flags. - config = LoadConfigDict(config_paths, extra_model_params) - if save: - WriteConfigAsYaml(config, logdir, save_name) - # Convert config dictionary to T object with dot notation. - config = RecursivelyConvertToLuatable(config) - return config - - -def SetNestedValue(d, keys, value): - """Sets a value in a nested dictionary. - - Example: - d = {}, keys = ['data','augmentation','minscale'], value = 1.0. - returns {'data': {'augmentation' : {'minscale': 1.0 }}} - - Args: - d: A dictionary to set a nested value in. - keys: list of dict keys nesting left to right. - value: the nested value to set. - Returns: - None - """ - for key in keys[:-1]: - d = d.setdefault(key, {}) - d[keys[-1]] = value - - -def RecursivelyConvertToLuatable(yaml_dict): - """Converts a dictionary to a LuaTable-like T object.""" - if isinstance(yaml_dict, dict): - yaml_dict = T(yaml_dict) - for key, item in yaml_dict.iteritems(): - if isinstance(item, dict): - yaml_dict[key] = RecursivelyConvertToLuatable(item) - return yaml_dict - - -def KNNIds(query_vec, target_seq, k=1): - """Gets the knn ids to the query vec from the target sequence.""" - sorted_distances = KNNIdsWithDistances(query_vec, target_seq, k) - return [i[0] for i in sorted_distances] - - -def KNNIdsWithDistances(query_vec, target_seq, k=1): - """Gets the knn ids to the query vec from the target sequence.""" - if not isinstance(np.array(target_seq), np.ndarray): - target_seq = np.array(target_seq) - assert np.shape(query_vec) == np.shape(target_seq[0]) - distances = [(i, np.linalg.norm(query_vec-target_vec)) for ( - i, target_vec) in enumerate(target_seq)] - sorted_distances = sorted(distances, key=lambda x: x[1]) - return sorted_distances[:k] - - -def CopyLocalConfigsToCNS(outdir, configs, gfs_user): - """Copies experiment yaml config files to the job_logdir on /cns.""" - assert configs - assert outdir - conf_files = configs.split(',') - for conf_file in conf_files: - copy_command = 'fileutil --gfs_user %s cp -f %s %s' % ( - gfs_user, conf_file, outdir) - tf.logging.info(copy_command) - os.system(copy_command) - - -def pairwise_distances(feature, squared=True): - """Computes the pairwise distance matrix in numpy. - - Args: - feature: 2-D numpy array of size [number of data, feature dimension] - squared: Boolean. If true, output is the pairwise squared euclidean - distance matrix; else, output is the pairwise euclidean distance matrix. - - Returns: - pdists: 2-D numpy array of size - [number of data, number of data]. - """ - triu = np.triu_indices(feature.shape[0], 1) - upper_tri_pdists = np.linalg.norm(feature[triu[1]] - feature[triu[0]], axis=1) - if squared: - upper_tri_pdists **= 2. - num_data = feature.shape[0] - pdists = np.zeros((num_data, num_data)) - pdists[np.triu_indices(num_data, 1)] = upper_tri_pdists - # Make symmetrical. - pdists = pdists + pdists.T - np.diag( - pdists.diagonal()) - return pdists - - -def is_tfrecord_input(inp): - """Checks if input is a TFRecord or list of TFRecords.""" - def _is_tfrecord(inp): - if not isinstance(inp, str): - return False - _, extension = os.path.splitext(inp) - return extension == '.tfrecord' - if isinstance(inp, str): - return _is_tfrecord(inp) - if isinstance(inp, list): - return all(map(_is_tfrecord, inp)) - return False - - -def is_np_array(inp): - if isinstance(inp, np.ndarray): - return True - if isinstance(inp, list): - return all([isinstance(i, np.ndarray) for i in inp]) - return False diff --git a/research/tcn/visualize_embeddings.py b/research/tcn/visualize_embeddings.py deleted file mode 100644 index 298c1ab11a9f8f38200fb3e316b4790485aa892f..0000000000000000000000000000000000000000 --- a/research/tcn/visualize_embeddings.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright 2017 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -r"""Visualizes embeddings in tensorboard. - -Usage: -root=experimental/users/sermanet/imitation/mirror && \ -blaze build -c opt --copt=-mavx --config=cuda $root:visualize_embeddings && \ -blaze-bin/$root/visualize_embeddings \ ---checkpointdir $checkpointdir \ ---checkpoint_iter $checkpoint_iter \ ---embedding_records $embedding_records \ ---outdir $outdir \ ---num_embed 1000 \ ---sprite_dim 64 \ ---config_paths $configs \ ---logtostderr - -blaze build third_party/tensorboard && \ -blaze-bin/third_party/tensorboard/tensorboard --logdir=$outdir -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import random -import cv2 -import numpy as np -from scipy.misc import imresize -from scipy.misc import imsave -from estimators.get_estimator import get_estimator -from utils import util -import tensorflow as tf -from tensorflow.contrib.tensorboard.plugins import projector -tf.logging.set_verbosity(tf.logging.INFO) - -tf.flags.DEFINE_string( - 'config_paths', '', - """ - Path to a YAML configuration files defining FLAG values. Multiple files - can be separated by the `#` symbol. Files are merged recursively. Setting - a key in these files is equivalent to setting the FLAG value with - the same name. - """) -tf.flags.DEFINE_string( - 'model_params', '{}', 'YAML configuration string for the model parameters.') -tf.app.flags.DEFINE_string( - 'checkpoint_iter', '', 'Evaluate this specific checkpoint.') -tf.app.flags.DEFINE_string( - 'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.') -tf.app.flags.DEFINE_string( - 'outdir', '/tmp/tcn', 'Path to write tensorboard info to.') -tf.app.flags.DEFINE_integer( - 'num_embed', 4000, 'Number of embeddings.') -tf.app.flags.DEFINE_integer( - 'num_sequences', -1, 'Number of sequences, -1 for all.') -tf.app.flags.DEFINE_integer( - 'sprite_dim', 64, 'Height, width of the square sprite image.') -tf.app.flags.DEFINE_string( - 'embedding_records', None, 'path to embedding records') -FLAGS = tf.app.flags.FLAGS - - -def images_to_sprite(data): - """Creates the sprite image along with any necessary padding. - - Taken from: https://github.com/tensorflow/tensorflow/issues/6322 - - Args: - data: NxHxW[x3] tensor containing the images. - - Returns: - data: Properly shaped HxWx3 image with any necessary padding. - """ - if len(data.shape) == 3: - data = np.tile(data[..., np.newaxis], (1, 1, 1, 3)) - data = data.astype(np.float32) - min_v = np.min(data.reshape((data.shape[0], -1)), axis=1) - data = (data.transpose(1, 2, 3, 0) - min_v).transpose(3, 0, 1, 2) - max_v = np.max(data.reshape((data.shape[0], -1)), axis=1) - data = (data.transpose(1, 2, 3, 0) / max_v).transpose(3, 0, 1, 2) - n = int(np.ceil(np.sqrt(data.shape[0]))) - padding = ((0, n ** 2 - data.shape[0]), (0, 0), - (0, 0)) + ((0, 0),) * (data.ndim - 3) - data = np.pad(data, padding, mode='constant', - constant_values=0) - # Tile the individual thumbnails into an image. - data = data.reshape((n, n) + data.shape[1:]).transpose( - (0, 2, 1, 3) + tuple(range(4, data.ndim + 1))) - data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:]) - data = (data * 255).astype(np.uint8) - return data - - -def main(_): - """Runs main labeled eval loop.""" - # Parse config dict from yaml config files / command line flags. - config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params) - - # Choose an estimator based on training strategy. - checkpointdir = FLAGS.checkpointdir - checkpoint_path = os.path.join( - '%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter)) - estimator = get_estimator(config, checkpointdir) - - # Get records to embed. - validation_dir = FLAGS.embedding_records - validation_records = util.GetFilesRecursively(validation_dir) - - sequences_to_data = {} - for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference( - validation_records, checkpoint_path, config.data.embed_batch_size, - num_sequences=FLAGS.num_sequences): - sequences_to_data[seqname] = { - 'embeddings': view_embeddings, - 'images': view_raw_image_strings, - } - - all_embeddings = np.zeros((0, config.embedding_size)) - all_ims = [] - all_seqnames = [] - - num_embeddings = FLAGS.num_embed - # Concatenate all views from all sequences into a big flat list. - for seqname, data in sequences_to_data.iteritems(): - embs = data['embeddings'] - ims = data['images'] - for v in range(config.data.num_views): - for (emb, im) in zip(embs[v], ims[v]): - all_embeddings = np.append(all_embeddings, [emb], axis=0) - all_ims.append(im) - all_seqnames.append(seqname) - - # Choose N indices uniformly from all images. - random_indices = range(all_embeddings.shape[0]) - random.shuffle(random_indices) - viz_indices = random_indices[:num_embeddings] - - # Extract embs. - viz_embs = np.array(all_embeddings[viz_indices]) - - # Extract and decode ims. - viz_ims = list(np.array(all_ims)[viz_indices]) - decoded_ims = [] - - sprite_dim = FLAGS.sprite_dim - for i, im in enumerate(viz_ims): - if i % 100 == 0: - print('Decoding image %d/%d.' % (i, num_embeddings)) - nparr_i = np.fromstring(str(im), np.uint8) - img_np = cv2.imdecode(nparr_i, 1) - img_np = img_np[..., [2, 1, 0]] - - img_np = imresize(img_np, [sprite_dim, sprite_dim, 3]) - decoded_ims.append(img_np) - decoded_ims = np.array(decoded_ims) - - # Extract sequence names. - outdir = FLAGS.outdir - - # The embedding variable, which needs to be stored - # Note this must a Variable not a Tensor! - embedding_var = tf.Variable(viz_embs, name='viz_embs') - - with tf.Session() as sess: - sess.run(embedding_var.initializer) - summary_writer = tf.summary.FileWriter(outdir) - config = projector.ProjectorConfig() - embedding = config.embeddings.add() - embedding.tensor_name = embedding_var.name - - # Comment out if you don't want sprites - embedding.sprite.image_path = os.path.join(outdir, 'sprite.png') - embedding.sprite.single_image_dim.extend( - [decoded_ims.shape[1], decoded_ims.shape[1]]) - - projector.visualize_embeddings(summary_writer, config) - saver = tf.train.Saver([embedding_var]) - saver.save(sess, os.path.join(outdir, 'model2.ckpt'), 1) - - sprite = images_to_sprite(decoded_ims) - imsave(os.path.join(outdir, 'sprite.png'), sprite) - -if __name__ == '__main__': - tf.app.run(main) diff --git a/research/textsum/BUILD b/research/textsum/BUILD deleted file mode 100644 index ea062863911300097740781ba4bd847f6af07afa..0000000000000000000000000000000000000000 --- a/research/textsum/BUILD +++ /dev/null @@ -1,64 +0,0 @@ -package(default_visibility = [":internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group( - name = "internal", - packages = [ - "//textsum/...", - ], -) - -py_library( - name = "seq2seq_attention_model", - srcs = ["seq2seq_attention_model.py"], - deps = [ - ":seq2seq_lib", - ], -) - -py_library( - name = "seq2seq_lib", - srcs = ["seq2seq_lib.py"], -) - -py_binary( - name = "seq2seq_attention", - srcs = ["seq2seq_attention.py"], - deps = [ - ":batch_reader", - ":data", - ":seq2seq_attention_decode", - ":seq2seq_attention_model", - ], -) - -py_library( - name = "batch_reader", - srcs = ["batch_reader.py"], - deps = [ - ":data", - ":seq2seq_attention_model", - ], -) - -py_library( - name = "beam_search", - srcs = ["beam_search.py"], -) - -py_library( - name = "seq2seq_attention_decode", - srcs = ["seq2seq_attention_decode.py"], - deps = [ - ":beam_search", - ":data", - ], -) - -py_library( - name = "data", - srcs = ["data.py"], -) diff --git a/research/textsum/README.md b/research/textsum/README.md deleted file mode 100644 index ac2f55dd524bba3e2af5819a331cfa87c3f35786..0000000000000000000000000000000000000000 --- a/research/textsum/README.md +++ /dev/null @@ -1,171 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -Sequence-to-Sequence with Attention Model for Text Summarization. - -Authors: - -Xin Pan -Peter Liu (peterjliu@google.com, github:peterjliu) - -Introduction - -The core model is the traditional sequence-to-sequence model with attention. -It is customized (mostly inputs/outputs) for the text summarization task. The -model has been trained on Gigaword dataset and achieved state-of-the-art -results (as of June 2016). - -The results described below are based on model trained on multi-gpu and -multi-machine settings. It has been simplified to run on only one machine -for open source purpose. - -Dataset - -We used the Gigaword dataset described in [Rush et al. A Neural Attention Model -for Sentence Summarization](https://arxiv.org/abs/1509.00685). - -We cannot provide the dataset due to the license. See ExampleGen in data.py -about the data format. data/data contains a toy example. Also see data/vocab -for example vocabulary format. In How To Run below, users can use toy -data and vocab provided in the data/ directory to run the training by replacing -the data directory flag. - -data_convert_example.py contains example of convert between binary and text. - - -Experiment Result - -8000 examples from testset are sampled to generate summaries and rouge score is -calculated for the generated summaries. Here is the best rouge score on -Gigaword dataset: - -ROUGE-1 Average_R: 0.38272 (95%-conf.int. 0.37774 - 0.38755) - -ROUGE-1 Average_P: 0.50154 (95%-conf.int. 0.49509 - 0.50780) - -ROUGE-1 Average_F: 0.42568 (95%-conf.int. 0.42016 - 0.43099) - -ROUGE-2 Average_R: 0.20576 (95%-conf.int. 0.20060 - 0.21112) - -ROUGE-2 Average_P: 0.27565 (95%-conf.int. 0.26851 - 0.28257) - -ROUGE-2 Average_F: 0.23126 (95%-conf.int. 0.22539 - 0.23708) - -Configuration: - -Following is the configuration for the best trained model on Gigaword: - -batch_size: 64 - -bidirectional encoding layer: 4 - -article length: first 2 sentences, total words within 120. - -summary length: total words within 30. - -word embedding size: 128 - -LSTM hidden units: 256 - -Sampled softmax: 4096 - -vocabulary size: Most frequent 200k words from dataset's article and summaries. - -How To Run - -Prerequisite: install TensorFlow and Bazel. - -```shell -# cd to your workspace -# 1. Clone the textsum code to your workspace 'textsum' directory. -# 2. Create an empty 'WORKSPACE' file in your workspace. -# 3. Move the train/eval/test data to your workspace 'data' directory. -# In the following example, I named the data training-*, test-*, etc. -# If your data files have different names, update the --data_path. -# If you don't have data but want to try out the model, copy the toy -# data from the textsum/data/data to the data/ directory in the workspace. -$ ls -R -.: -data textsum WORKSPACE - -./data: -vocab test-0 training-0 training-1 validation-0 ...(omitted) - -./textsum: -batch_reader.py beam_search.py BUILD README.md seq2seq_attention_model.py data -data.py seq2seq_attention_decode.py seq2seq_attention.py seq2seq_lib.py - -./textsum/data: -data vocab - -$ bazel build -c opt --config=cuda textsum/... - -# Run the training. -$ bazel-bin/textsum/seq2seq_attention \ - --mode=train \ - --article_key=article \ - --abstract_key=abstract \ - --data_path=data/training-* \ - --vocab_path=data/vocab \ - --log_root=textsum/log_root \ - --train_dir=textsum/log_root/train - -# Run the eval. Try to avoid running on the same machine as training. -$ bazel-bin/textsum/seq2seq_attention \ - --mode=eval \ - --article_key=article \ - --abstract_key=abstract \ - --data_path=data/validation-* \ - --vocab_path=data/vocab \ - --log_root=textsum/log_root \ - --eval_dir=textsum/log_root/eval - -# Run the decode. Run it when the model is mostly converged. -$ bazel-bin/textsum/seq2seq_attention \ - --mode=decode \ - --article_key=article \ - --abstract_key=abstract \ - --data_path=data/test-* \ - --vocab_path=data/vocab \ - --log_root=textsum/log_root \ - --decode_dir=textsum/log_root/decode \ - --beam_size=8 -``` - - -Examples: - -The following are some text summarization examples, including experiments -using dataset other than Gigaword. - -article: novell inc. chief executive officer eric schmidt has been named chairman of the internet search-engine company google . - -human: novell ceo named google chairman - -machine: novell chief executive named to head internet company - -====================================== - -article: gulf newspapers voiced skepticism thursday over whether newly re - elected us president bill clinton could help revive the troubled middle east peace process but saw a glimmer of hope . - -human: gulf skeptical about whether clinton will revive peace process - -machine: gulf press skeptical over clinton 's prospects for peace process - -====================================== - -article: the european court of justice ( ecj ) recently ruled in lock v british gas trading ltd that eu law requires a worker 's statutory holiday pay to take commission payments into account - it should not be based solely on basic salary . the case is not over yet , but its outcome could potentially be costly for employers with workers who are entitled to commission . mr lock , an energy salesman for british gas , was paid a basic salary and sales commission on a monthly basis . his sales commission made up around 60 % of his remuneration package . when he took two weeks ' annual leave in december 2012 , he was paid his basic salary and also received commission from previous sales that fell due during that period . lock obviously did not generate new sales while he was on holiday , which meant that in the following period he suffered a reduced income through lack of commission . he brought an employment tribunal claim asserting that this amounted to a breach of the working time regulations 1998 .....deleted rest for readability... - -abstract: will british gas ecj ruling fuel holiday pay hike ? - -decode: eu law requires worker 's statutory holiday pay - -====================================== - -article: the junior all whites have been eliminated from the fifa u - 20 world cup in colombia with results on the final day of pool play confirming their exit . sitting on two points , new zealand needed results in one of the final two groups to go their way to join the last 16 as one of the four best third place teams . but while spain helped the kiwis ' cause with a 5 - 1 thrashing of australia , a 3 - 0 win for ecuador over costa rica saw the south americans climb to second in group c with costa rica 's three points also good enough to progress in third place . that left the junior all whites hopes hanging on the group d encounter between croatia and honduras finishing in a draw . a stalemate - and a place in the knockout stages for new zealand - appeared on the cards until midfielder marvin ceballos netted an 81st minute winner that sent guatemala through to the second round and left the junior all whites packing their bags . new zealand finishes the 24 - nation tournament in 17th place , having claimed their first ever points at this level in just their second appearance at the finals . - -abstract: junior all whites exit world cup - -decoded: junior all whites eliminated from u- 20 world cup - diff --git a/research/textsum/batch_reader.py b/research/textsum/batch_reader.py deleted file mode 100644 index 918551b4c2c5698a5640d11918199f2a6ff65d23..0000000000000000000000000000000000000000 --- a/research/textsum/batch_reader.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Batch reader to seq2seq attention model, with bucketing support.""" - -from collections import namedtuple -from random import shuffle -from threading import Thread -import time - -import numpy as np -import six -from six.moves import queue as Queue -from six.moves import xrange -import tensorflow as tf - -import data - -ModelInput = namedtuple('ModelInput', - 'enc_input dec_input target enc_len dec_len ' - 'origin_article origin_abstract') - -BUCKET_CACHE_BATCH = 100 -QUEUE_NUM_BATCH = 100 - - -class Batcher(object): - """Batch reader with shuffling and bucketing support.""" - - def __init__(self, data_path, vocab, hps, - article_key, abstract_key, max_article_sentences, - max_abstract_sentences, bucketing=True, truncate_input=False): - """Batcher constructor. - - Args: - data_path: tf.Example filepattern. - vocab: Vocabulary. - hps: Seq2SeqAttention model hyperparameters. - article_key: article feature key in tf.Example. - abstract_key: abstract feature key in tf.Example. - max_article_sentences: Max number of sentences used from article. - max_abstract_sentences: Max number of sentences used from abstract. - bucketing: Whether bucket articles of similar length into the same batch. - truncate_input: Whether to truncate input that is too long. Alternative is - to discard such examples. - """ - self._data_path = data_path - self._vocab = vocab - self._hps = hps - self._article_key = article_key - self._abstract_key = abstract_key - self._max_article_sentences = max_article_sentences - self._max_abstract_sentences = max_abstract_sentences - self._bucketing = bucketing - self._truncate_input = truncate_input - self._input_queue = Queue.Queue(QUEUE_NUM_BATCH * self._hps.batch_size) - self._bucket_input_queue = Queue.Queue(QUEUE_NUM_BATCH) - self._input_threads = [] - for _ in xrange(16): - self._input_threads.append(Thread(target=self._FillInputQueue)) - self._input_threads[-1].daemon = True - self._input_threads[-1].start() - self._bucketing_threads = [] - for _ in xrange(4): - self._bucketing_threads.append(Thread(target=self._FillBucketInputQueue)) - self._bucketing_threads[-1].daemon = True - self._bucketing_threads[-1].start() - - self._watch_thread = Thread(target=self._WatchThreads) - self._watch_thread.daemon = True - self._watch_thread.start() - - def NextBatch(self): - """Returns a batch of inputs for seq2seq attention model. - - Returns: - enc_batch: A batch of encoder inputs [batch_size, hps.enc_timestamps]. - dec_batch: A batch of decoder inputs [batch_size, hps.dec_timestamps]. - target_batch: A batch of targets [batch_size, hps.dec_timestamps]. - enc_input_len: encoder input lengths of the batch. - dec_input_len: decoder input lengths of the batch. - loss_weights: weights for loss function, 1 if not padded, 0 if padded. - origin_articles: original article words. - origin_abstracts: original abstract words. - """ - enc_batch = np.zeros( - (self._hps.batch_size, self._hps.enc_timesteps), dtype=np.int32) - enc_input_lens = np.zeros( - (self._hps.batch_size), dtype=np.int32) - dec_batch = np.zeros( - (self._hps.batch_size, self._hps.dec_timesteps), dtype=np.int32) - dec_output_lens = np.zeros( - (self._hps.batch_size), dtype=np.int32) - target_batch = np.zeros( - (self._hps.batch_size, self._hps.dec_timesteps), dtype=np.int32) - loss_weights = np.zeros( - (self._hps.batch_size, self._hps.dec_timesteps), dtype=np.float32) - origin_articles = ['None'] * self._hps.batch_size - origin_abstracts = ['None'] * self._hps.batch_size - - buckets = self._bucket_input_queue.get() - for i in xrange(self._hps.batch_size): - (enc_inputs, dec_inputs, targets, enc_input_len, dec_output_len, - article, abstract) = buckets[i] - - origin_articles[i] = article - origin_abstracts[i] = abstract - enc_input_lens[i] = enc_input_len - dec_output_lens[i] = dec_output_len - enc_batch[i, :] = enc_inputs[:] - dec_batch[i, :] = dec_inputs[:] - target_batch[i, :] = targets[:] - for j in xrange(dec_output_len): - loss_weights[i][j] = 1 - return (enc_batch, dec_batch, target_batch, enc_input_lens, dec_output_lens, - loss_weights, origin_articles, origin_abstracts) - - def _FillInputQueue(self): - """Fill input queue with ModelInput.""" - start_id = self._vocab.WordToId(data.SENTENCE_START) - end_id = self._vocab.WordToId(data.SENTENCE_END) - pad_id = self._vocab.WordToId(data.PAD_TOKEN) - input_gen = self._TextGenerator(data.ExampleGen(self._data_path)) - while True: - (article, abstract) = six.next(input_gen) - article_sentences = [sent.strip() for sent in - data.ToSentences(article, include_token=False)] - abstract_sentences = [sent.strip() for sent in - data.ToSentences(abstract, include_token=False)] - - enc_inputs = [] - # Use the as the symbol for decoder inputs. - dec_inputs = [start_id] - - # Convert first N sentences to word IDs, stripping existing and . - for i in xrange(min(self._max_article_sentences, - len(article_sentences))): - enc_inputs += data.GetWordIds(article_sentences[i], self._vocab) - for i in xrange(min(self._max_abstract_sentences, - len(abstract_sentences))): - dec_inputs += data.GetWordIds(abstract_sentences[i], self._vocab) - - # Filter out too-short input - if (len(enc_inputs) < self._hps.min_input_len or - len(dec_inputs) < self._hps.min_input_len): - tf.logging.warning('Drop an example - too short.\nenc:%d\ndec:%d', - len(enc_inputs), len(dec_inputs)) - continue - - # If we're not truncating input, throw out too-long input - if not self._truncate_input: - if (len(enc_inputs) > self._hps.enc_timesteps or - len(dec_inputs) > self._hps.dec_timesteps): - tf.logging.warning('Drop an example - too long.\nenc:%d\ndec:%d', - len(enc_inputs), len(dec_inputs)) - continue - # If we are truncating input, do so if necessary - else: - if len(enc_inputs) > self._hps.enc_timesteps: - enc_inputs = enc_inputs[:self._hps.enc_timesteps] - if len(dec_inputs) > self._hps.dec_timesteps: - dec_inputs = dec_inputs[:self._hps.dec_timesteps] - - # targets is dec_inputs without at beginning, plus at end - targets = dec_inputs[1:] - targets.append(end_id) - - # Now len(enc_inputs) should be <= enc_timesteps, and - # len(targets) = len(dec_inputs) should be <= dec_timesteps - - enc_input_len = len(enc_inputs) - dec_output_len = len(targets) - - # Pad if necessary - while len(enc_inputs) < self._hps.enc_timesteps: - enc_inputs.append(pad_id) - while len(dec_inputs) < self._hps.dec_timesteps: - dec_inputs.append(end_id) - while len(targets) < self._hps.dec_timesteps: - targets.append(end_id) - - element = ModelInput(enc_inputs, dec_inputs, targets, enc_input_len, - dec_output_len, ' '.join(article_sentences), - ' '.join(abstract_sentences)) - self._input_queue.put(element) - - def _FillBucketInputQueue(self): - """Fill bucketed batches into the bucket_input_queue.""" - while True: - inputs = [] - for _ in xrange(self._hps.batch_size * BUCKET_CACHE_BATCH): - inputs.append(self._input_queue.get()) - if self._bucketing: - inputs = sorted(inputs, key=lambda inp: inp.enc_len) - - batches = [] - for i in xrange(0, len(inputs), self._hps.batch_size): - batches.append(inputs[i:i+self._hps.batch_size]) - shuffle(batches) - for b in batches: - self._bucket_input_queue.put(b) - - def _WatchThreads(self): - """Watch the daemon input threads and restart if dead.""" - while True: - time.sleep(60) - input_threads = [] - for t in self._input_threads: - if t.is_alive(): - input_threads.append(t) - else: - tf.logging.error('Found input thread dead.') - new_t = Thread(target=self._FillInputQueue) - input_threads.append(new_t) - input_threads[-1].daemon = True - input_threads[-1].start() - self._input_threads = input_threads - - bucketing_threads = [] - for t in self._bucketing_threads: - if t.is_alive(): - bucketing_threads.append(t) - else: - tf.logging.error('Found bucketing thread dead.') - new_t = Thread(target=self._FillBucketInputQueue) - bucketing_threads.append(new_t) - bucketing_threads[-1].daemon = True - bucketing_threads[-1].start() - self._bucketing_threads = bucketing_threads - - def _TextGenerator(self, example_gen): - """Generates article and abstract text from tf.Example.""" - while True: - e = six.next(example_gen) - try: - article_text = self._GetExFeatureText(e, self._article_key) - abstract_text = self._GetExFeatureText(e, self._abstract_key) - except ValueError: - tf.logging.error('Failed to get article or abstract from example') - continue - - yield (article_text, abstract_text) - - def _GetExFeatureText(self, ex, key): - """Extract text for a feature from td.Example. - - Args: - ex: tf.Example. - key: key of the feature to be extracted. - Returns: - feature: a feature text extracted. - """ - return ex.features.feature[key].bytes_list.value[0] diff --git a/research/textsum/beam_search.py b/research/textsum/beam_search.py deleted file mode 100644 index 446799caa77abd292b326f10d37db952c7722165..0000000000000000000000000000000000000000 --- a/research/textsum/beam_search.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Beam search module. - -Beam search takes the top K results from the model, predicts the K results for -each of the previous K result, getting K*K results. Pick the top K results from -K*K results, and start over again until certain number of results are fully -decoded. -""" - -from six.moves import xrange -import tensorflow as tf - -FLAGS = tf.flags.FLAGS -tf.flags.DEFINE_bool('normalize_by_length', True, 'Whether to normalize') - - -class Hypothesis(object): - """Defines a hypothesis during beam search.""" - - def __init__(self, tokens, log_prob, state): - """Hypothesis constructor. - - Args: - tokens: start tokens for decoding. - log_prob: log prob of the start tokens, usually 1. - state: decoder initial states. - """ - self.tokens = tokens - self.log_prob = log_prob - self.state = state - - def Extend(self, token, log_prob, new_state): - """Extend the hypothesis with result from latest step. - - Args: - token: latest token from decoding. - log_prob: log prob of the latest decoded tokens. - new_state: decoder output state. Fed to the decoder for next step. - Returns: - New Hypothesis with the results from latest step. - """ - return Hypothesis(self.tokens + [token], self.log_prob + log_prob, - new_state) - - @property - def latest_token(self): - return self.tokens[-1] - - def __str__(self): - return ('Hypothesis(log prob = %.4f, tokens = %s)' % (self.log_prob, - self.tokens)) - - -class BeamSearch(object): - """Beam search.""" - - def __init__(self, model, beam_size, start_token, end_token, max_steps): - """Creates BeamSearch object. - - Args: - model: Seq2SeqAttentionModel. - beam_size: int. - start_token: int, id of the token to start decoding with - end_token: int, id of the token that completes an hypothesis - max_steps: int, upper limit on the size of the hypothesis - """ - self._model = model - self._beam_size = beam_size - self._start_token = start_token - self._end_token = end_token - self._max_steps = max_steps - - def BeamSearch(self, sess, enc_inputs, enc_seqlen): - """Performs beam search for decoding. - - Args: - sess: tf.Session, session - enc_inputs: ndarray of shape (enc_length, 1), the document ids to encode - enc_seqlen: ndarray of shape (1), the length of the sequnce - - Returns: - hyps: list of Hypothesis, the best hypotheses found by beam search, - ordered by score - """ - - # Run the encoder and extract the outputs and final state. - enc_top_states, dec_in_state = self._model.encode_top_state( - sess, enc_inputs, enc_seqlen) - # Replicate the initial states K times for the first step. - hyps = [Hypothesis([self._start_token], 0.0, dec_in_state) - ] * self._beam_size - results = [] - - steps = 0 - while steps < self._max_steps and len(results) < self._beam_size: - latest_tokens = [h.latest_token for h in hyps] - states = [h.state for h in hyps] - - topk_ids, topk_log_probs, new_states = self._model.decode_topk( - sess, latest_tokens, enc_top_states, states) - # Extend each hypothesis. - all_hyps = [] - # The first step takes the best K results from first hyps. Following - # steps take the best K results from K*K hyps. - num_beam_source = 1 if steps == 0 else len(hyps) - for i in xrange(num_beam_source): - h, ns = hyps[i], new_states[i] - for j in xrange(self._beam_size*2): - all_hyps.append(h.Extend(topk_ids[i, j], topk_log_probs[i, j], ns)) - - # Filter and collect any hypotheses that have the end token. - hyps = [] - for h in self._BestHyps(all_hyps): - if h.latest_token == self._end_token: - # Pull the hypothesis off the beam if the end token is reached. - results.append(h) - else: - # Otherwise continue to the extend the hypothesis. - hyps.append(h) - if len(hyps) == self._beam_size or len(results) == self._beam_size: - break - - steps += 1 - - if steps == self._max_steps: - results.extend(hyps) - - return self._BestHyps(results) - - def _BestHyps(self, hyps): - """Sort the hyps based on log probs and length. - - Args: - hyps: A list of hypothesis. - Returns: - hyps: A list of sorted hypothesis in reverse log_prob order. - """ - # This length normalization is only effective for the final results. - if FLAGS.normalize_by_length: - return sorted(hyps, key=lambda h: h.log_prob/len(h.tokens), reverse=True) - else: - return sorted(hyps, key=lambda h: h.log_prob, reverse=True) diff --git a/research/textsum/data.py b/research/textsum/data.py deleted file mode 100644 index 2baad0a12b7010673652c6c66b46fea960f69f0f..0000000000000000000000000000000000000000 --- a/research/textsum/data.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Data batchers for data described in ..//data_prep/README.md.""" - -import glob -import random -import struct -import sys - -from tensorflow.core.example import example_pb2 - - -# Special tokens -PARAGRAPH_START = '

' -PARAGRAPH_END = '

' -SENTENCE_START = '' -SENTENCE_END = '' -UNKNOWN_TOKEN = '' -PAD_TOKEN = '' -DOCUMENT_START = '' -DOCUMENT_END = '' - - -class Vocab(object): - """Vocabulary class for mapping words and ids.""" - - def __init__(self, vocab_file, max_size): - self._word_to_id = {} - self._id_to_word = {} - self._count = 0 - - with open(vocab_file, 'r') as vocab_f: - for line in vocab_f: - pieces = line.split() - if len(pieces) != 2: - sys.stderr.write('Bad line: %s\n' % line) - continue - if pieces[0] in self._word_to_id: - raise ValueError('Duplicated word: %s.' % pieces[0]) - self._word_to_id[pieces[0]] = self._count - self._id_to_word[self._count] = pieces[0] - self._count += 1 - if self._count > max_size: - raise ValueError('Too many words: >%d.' % max_size) - - def CheckVocab(self, word): - if word not in self._word_to_id: - return None - return self._word_to_id[word] - - def WordToId(self, word): - if word not in self._word_to_id: - return self._word_to_id[UNKNOWN_TOKEN] - return self._word_to_id[word] - - def IdToWord(self, word_id): - if word_id not in self._id_to_word: - raise ValueError('id not found in vocab: %d.' % word_id) - return self._id_to_word[word_id] - - def NumIds(self): - return self._count - - -def ExampleGen(data_path, num_epochs=None): - """Generates tf.Examples from path of data files. - - Binary data format: . represents the byte size - of . is serialized tf.Example proto. The tf.Example contains - the tokenized article text and summary. - - Args: - data_path: path to tf.Example data files. - num_epochs: Number of times to go through the data. None means infinite. - - Yields: - Deserialized tf.Example. - - If there are multiple files specified, they accessed in a random order. - """ - epoch = 0 - while True: - if num_epochs is not None and epoch >= num_epochs: - break - filelist = glob.glob(data_path) - assert filelist, 'Empty filelist.' - random.shuffle(filelist) - for f in filelist: - reader = open(f, 'rb') - while True: - len_bytes = reader.read(8) - if not len_bytes: break - str_len = struct.unpack('q', len_bytes)[0] - example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0] - yield example_pb2.Example.FromString(example_str) - - epoch += 1 - - -def Pad(ids, pad_id, length): - """Pad or trim list to len length. - - Args: - ids: list of ints to pad - pad_id: what to pad with - length: length to pad or trim to - - Returns: - ids trimmed or padded with pad_id - """ - assert pad_id is not None - assert length is not None - - if len(ids) < length: - a = [pad_id] * (length - len(ids)) - return ids + a - else: - return ids[:length] - - -def GetWordIds(text, vocab, pad_len=None, pad_id=None): - """Get ids corresponding to words in text. - - Assumes tokens separated by space. - - Args: - text: a string - vocab: TextVocabularyFile object - pad_len: int, length to pad to - pad_id: int, word id for pad symbol - - Returns: - A list of ints representing word ids. - """ - ids = [] - for w in text.split(): - i = vocab.WordToId(w) - if i >= 0: - ids.append(i) - else: - ids.append(vocab.WordToId(UNKNOWN_TOKEN)) - if pad_len is not None: - return Pad(ids, pad_id, pad_len) - return ids - - -def Ids2Words(ids_list, vocab): - """Get words from ids. - - Args: - ids_list: list of int32 - vocab: TextVocabulary object - - Returns: - List of words corresponding to ids. - """ - assert isinstance(ids_list, list), '%s is not a list' % ids_list - return [vocab.IdToWord(i) for i in ids_list] - - -def SnippetGen(text, start_tok, end_tok, inclusive=True): - """Generates consecutive snippets between start and end tokens. - - Args: - text: a string - start_tok: a string denoting the start of snippets - end_tok: a string denoting the end of snippets - inclusive: Whether include the tokens in the returned snippets. - - Yields: - String snippets - """ - cur = 0 - while True: - try: - start_p = text.index(start_tok, cur) - end_p = text.index(end_tok, start_p + 1) - cur = end_p + len(end_tok) - if inclusive: - yield text[start_p:cur] - else: - yield text[start_p+len(start_tok):end_p] - except ValueError as e: - raise StopIteration('no more snippets in text: %s' % e) - - -def GetExFeatureText(ex, key): - return ex.features.feature[key].bytes_list.value[0] - - -def ToSentences(paragraph, include_token=True): - """Takes tokens of a paragraph and returns list of sentences. - - Args: - paragraph: string, text of paragraph - include_token: Whether include the sentence separation tokens result. - - Returns: - List of sentence strings. - """ - s_gen = SnippetGen(paragraph, SENTENCE_START, SENTENCE_END, include_token) - return [s for s in s_gen] diff --git a/research/textsum/data/data b/research/textsum/data/data deleted file mode 100644 index b554873a62ad4a6504f596f498c368b6bfe4eb12..0000000000000000000000000000000000000000 Binary files a/research/textsum/data/data and /dev/null differ diff --git a/research/textsum/data/vocab b/research/textsum/data/vocab deleted file mode 100644 index 315740c28cf0bf1844a33799a491f8521d888cb6..0000000000000000000000000000000000000000 --- a/research/textsum/data/vocab +++ /dev/null @@ -1,10003 +0,0 @@ -the 135597564 -, 121400181 -. 98868076 -to 58429764 -of 56269484 -in 49820911 -a 49701084 -and 49378364 -'s 23787251 -'' 23227828 -`` 23116499 -that 21577263 -for 20998230 -said 20858620 -on 19106851 -## 16627320 -is 15661835 -was 14607055 -with 14265376 -he 13755120 -it 13588190 - 12263923 -at 12221539 -as 11657129 -by 11105584 -
11090708 - 11090708 -from 10275933 -his 9090323 -be 8939486 -have 8930288 -has 8880930 -but 8213981 -an 8035012 -fourmile 60 -zwart 60 -post-baby 60 -diasporas 60 -herzeg-bosna 60 -younkers 60 -rolfing 60 -cyclades 60 -lovas 60 -super-cheap 60 -johnsonglobe.com 60 -incarnates 60 -candis 60 -luzira 60 -toyota\/lola\/bridgestone 60 -caohc 60 -flatbeds 60 -pairat 60 -stubborness 60 -mogaka 60 -march-past 60 -alaba 60 -extravehicular 60 -conolly 60 -shelford 60 -snowblowers 60 -excoriation 60 -langoliers 60 -ayios 60 -rsm\/rw## 60 -ultralow 60 -kassire 60 -kirikkale 60 -rutaca 60 -hardys 60 -latigo 60 -aggressive-growth 60 -shankland 60 -tetrault 60 -ntf 60 -british-u.s. 60 -rbk 60 -hannagan 60 -pro-french 60 -macero 60 -bahrenburg 60 -recanvass 60 -hayrunisa 60 -educap 60 -tuoh 60 -x-#-#-#-# 60 -cravenly 60 -jent 60 -britain-farm-animals-disease 60 -white-power 60 -pongsidhirak 60 -fbc-ohiostate 60 -euro-arab 60 -prds 60 -sinkinson 60 -baugher 60 -fpd 60 -sakombi 60 -holyhead 60 -virusscan 60 -niboro 60 -cliffsnotes 60 -inhibitory 60 -nontariff 60 -huaraz 60 -qera 60 -icomos 60 -off-handed 60 -lumbee 60 -kututwa 60 -besra 60 -ownit 60 -out-of-area 60 -petrozuata 60 -wielinga 60 -roecker 60 -jeanneret 60 -ryukyus 60 -chocked 60 -syda 60 -rearers 60 -especialistas 60 -stoeltje 60 -tag-along 60 -pendulums 60 -land-hungry 60 -male-pattern 60 -ogbulafor 60 -jemil 60 -singlehanded 60 -bogaert 60 -brawnier 60 -picardie 60 -patsalides 60 -zvimba 60 -talamoni 60 -aristos 60 -reductionist 60 -sung-han 60 -ebby 60 -tcambanisglobe.com 60 -well-populated 60 -boguinskaia 60 -golfen 60 -fossmo 60 -leches 60 -madtv 60 -mirzapur 60 -dromey 60 -makowski 60 -bearzot 60 -fifth-day 60 -togoimi 60 -ethopia 60 -espoo-based 60 -cuppa 60 -cristin 60 -lambrechts 60 -eurosystem 60 -november-january 60 -home-security 60 -vengerov 60 -major-market 60 -obviates 60 -horster 60 -three-and-half 60 -sinyong 60 -cizek 60 -issyk-kul 60 -granatino 60 -ketones 60 -jenista 60 -mvovo 60 -shuttlecocks 60 -behounek 60 -nonscholarship 60 -mutaz 60 -hermandad 60 -engelmayer 60 -mussallam 60 -lutein 60 -drag-queen 60 -independent-film 60 -u.s.-asia 60 -jiantang 60 -pinkins 60 -uplifts 60 -lifsher 60 -gree 60 -suit-clad 60 -donavan 60 -stracke 60 -hard-to-read 60 -merco 60 -clanks 60 -asvat 60 -kloof 60 -marsland 60 -caipirinhas 60 -army-style 60 -benhuri 60 -mokotedi 60 -toader 60 -hanarotelecom 60 -guodong 60 -cottone 60 -dominicis 60 -osuntokun 60 -planetall 60 -ik-rjm 60 -serhan 60 -delimiting 60 -chiaro 60 -optimizes 60 -kopay 60 -adakhan 60 -cretz 60 -liberato 60 -kandie 60 -abdur-raheem 60 -vodichkova 60 -schawlow 60 -kulivan 60 -tagtop 60 -stefanini 60 -wcec 60 -kabocha 60 -okresek 60 -martin-in-the-fields 60 -lawn-mowing 60 -cpds 60 -raso 60 -ciubuc 60 -match-winners 60 -eye-pleasing 60 -elorriaga 60 -marusa 60 -ford@globe.com 60 -dreger 60 -#x#k 60 -wsm 60 -hocutt 60 -macconnell 60 -out-of-service 60 -deep-fat 60 -body-builder 60 -streamline.com 60 -piatas 60 -scolavino 60 -technobabble 60 -war-ridden 60 -much-used 60 -colisee 60 -itty 60 -bhw 60 -undersubscribed 60 -cargraphics 60 -tourist-related 60 -fada 60 -sibomana 60 -shuger 60 -megawatt-hours 60 -webpad 60 -visine 60 -rewriteable 60 -madore 60 -fasanos 60 -weinhauer 60 -anti-mormon 60 -loudmouths 60 -broadness 60 -quixtar 60 -fancy-schmancy 60 -gangways 60 -aversive 60 -clingendael 60 -jonathans 60 -n#k 60 -schwald 60 -puxi 60 -abrahamic 60 -casebook 60 -clear-the-air 60 -islamised 60 -messeria 60 -vacation-home 60 -pro-tutsi 60 -kishkovsky 60 -portee 60 -awardee 60 -batsh 60 -latika 60 -bristol-meyers 60 -collum 60 -aprils 60 -telephia 60 -cloud-shrouded 60 -child-abusing 60 -hott 60 -bacteriophages 60 -ghosananda 60 -x-original-to 60 -aubuchon 60 -maldon 60 -owei 60 -stumper 60 -ghanaian-born 60 -cincinnatti 60 -spelich 60 -guoxing 60 -regulation-time 60 -scotiamcleod 60 -tesana 60 -seung-youn 60 -wen-ko 60 -stadt 60 -schroeders 60 -norin 60 -nung 60 -bank\/schroder 60 -relased 60 -ea-lm 60 -rubey 60 -cfi 60 -kavaja 60 -bourgault 60 -behrakis 60 -suraphong 60 -homesteader 60 -wbur-fm 60 -whee 60 -afghan-kidnappings 60 -rybin 60 -near-starvation 60 -crippa 60 -sanlitun 60 -pro-monarchist 60 -mortalities 60 -anticoagulants 60 -unsual 60 -kaha 60 -kamte 60 -harakah 60 -scarfe 60 -kenoy 60 -sendov 60 -depasquale 60 -maddaloni 60 -jointly-funded 60 -nokwe 60 -liff 60 -kickstarter 60 -khakpour 60 -cribiore 60 -husbanded 60 -pedauye 60 -perrella 60 -far-post 60 -boruchowitz 60 -interational 60 -close-to-home 60 -sophoan 60 -luqa 60 -legislator-elect 60 -forgy 60 -leutar 60 -uosukainen 60 -mirisch 60 -havin 60 -scorekeeping 60 -englishness 60 -philippines-landslide 60 -economides 60 -etonian 60 -randburg 60 -aew 60 -maillet 60 -abogado 60 -well-aware 60 -stunners 60 -laras 60 -#-tatiana 60 -cicciaro 60 -ski-in 60 -##-billion-u.s. 60 -keteyian 60 -first-in-the-south 60 -full-rate 60 -palepoi 60 -self-deluded 60 -hiv-infection 60 -rios-martinez 60 -turbojet 60 -yike 60 -radmacher 60 -stir-crazy 60 -bucuane 60 -unidentifed 60 -richardo 60 -zygi 60 -spritely 60 -vaynerchuk 60 -meniere 60 -special-ed 60 -teenybopper 60 -jmckim@globe.com 60 -xade 60 -danish-owned 60 -ikuko 60 -#-chris 60 -kayapo 60 -zeckendorf 60 -steamrollers 60 -giroir 60 -jaidev 60 -eraserhead 60 -japanese-chinese 60 -reprocesses 60 -muros 60 -must-buy 60 -corsiglia 60 -yanqui 60 -basanez 60 -unplayed 60 -mimika 60 -electric-only 60 -scerbatihs 60 -hitzig 60 -zero-interest-rate 60 -populares 60 -iphigenia 60 -nutricia 60 -sape 60 -kopec 60 -iencsi 60 -alvan 60 -ho-nyoun 60 -mob-style 60 -frivolities 60 -post-ups 60 -percentiles 60 -coote 60 -sodowsky 60 -caglar 60 -sahaviriya 60 -leggat 60 -xueju 60 -compagnia 60 -zomax 60 -inter-school 60 -hugeness 60 -sleaziness 60 -langenhan 60 -business-news 60 -armenteros 60 -cup-clinching 60 -yelavich 60 -chao-ching 60 -salvadori 60 -ameronline 60 -sex-starved 60 -alifereti 60 -trosch 60 -recombined 60 -dwek 60 -emission-control 60 -pozdniakov 60 -shalaan 60 -uzelac 60 -imzouren 60 -obolensk 60 -moshen 60 -commodified 60 -aleady 60 -barakaldo 60 -tumwesigye 60 -greiff 60 -walp 60 -r-palm 60 -julkipli 60 -managerless 60 -yabucoa 60 -zandl 60 -self-doubting 60 -kherman@statesman.com 60 -amol 60 -erck 60 -nitc 60 -hutcheon 60 -hornbills 60 -all-win 60 -heavy-weighted 60 -publick 60 -b&h 60 -kirchler 60 -cornella-el 60 -seefeldt 60 -sasse 60 -http://www.nra.org 60 -ball-striker 60 -fip 60 -concentra 60 -graywolf 60 -debbouze 60 -snowfields 60 -rabushka 60 -well-trimmed 60 -sofri 60 -jungle-shrouded 60 -beynon 60 -kotil 60 -snarly 60 -##-cent-per-gallon 60 -then-boss 60 -wakeling 60 -haft-e-tir 60 -majoros 60 -guglielminpietro 60 -hasnawi 60 -plachkov 60 -cabarrus 60 -consumer-advocacy 60 -xinhuanet 60 -karakul 60 -eight-iron 60 -rallis 60 -lighthizer 60 -vugar 60 -eighth-generation 60 -unitd 60 -x-anthony 60 -mwonzora 60 -prestart 60 -nadkarni 60 -phyapon 60 -tshuva 60 -kakuryu 60 -ist###-### 60 -golf-ryder 60 -delivered-to 60 -excelle 60 -###.#-yard 60 -less-visible 60 -elopement 60 -khenthong 60 -controllability 60 -venzuela 60 -soutine 60 -pongpen 60 -xhelili 60 -four-homer 60 -his-name 60 -dunnigan 60 -austalia 60 -djalma 60 -uher 60 -yaman 60 -kipng 60 -debt-related 60 -volgyes 60 -liposome 60 -diamoutene 60 -spragg 60 -sumahadi 60 -lebov 60 -blackly 60 -mutineering 60 -overstocking 60 -ryynaenen 60 -beraja 60 -hoklo 60 -uniphoenix 60 -cumbal 60 -redmayne 60 -sesana 60 -inu 60 -daresay 60 -salvinia 60 -egoistic 60 -nellies 60 -deep-frozen 60 -labor-relations 60 -weegen 60 -sino-kenyan 60 -cup-winner 60 -gimlet-eyed 60 -lendus 60 -rwisereka 60 -hugh-jones 60 -shenzhen-listed 60 -sunmonu 60 -zmago 60 -latonya 60 -tv\/ji 60 -troshin 60 -analista 60 -hysteric 60 -lindsay-hogg 60 -marmaro 60 -purvey 60 -kilovolt 60 -val-kill 60 -partership 60 -prostrating 60 -bhairahawa 60 -foc 60 -lyonpo 60 -yeardley 60 -detalles 60 -camas 60 -web-search 60 -kosminsky 60 -defund 60 -cowman 60 -al-kebir 60 -kasher 60 -kapolei 60 -fiords 60 -brentjens 60 -teen-pregnancy 60 -qawi 60 -orquesta 60 -genotyping 60 -nonato 60 -nssa 60 -twyford 60 -yuling 60 -pearcy 60 -sisic 60 -defraying 60 -eisteddfod 60 -benson-pope 60 -barflies 60 -one-arm 60 -unitech 60 -wheats 60 -mid-town 60 -non-tour 60 -fiord 60 -jnr. 60 -bge 60 -mixmaster 60 -glanton 60 -oner 60 -tax-return 60 -embolisms 60 -willse 60 -ribon 60 -turbodiesel 60 -higher-performing 60 -cotswold 60 -early-summer 60 -romasko 60 -rulfo 60 -moad 60 -paradyne 60 -##-sided 60 -galili 60 -nabatean 60 -haarlemmermeer 60 -reanimated 60 -supo 60 -varujan 60 -demystifies 60 -gilgamesh 60 -bangabhaban 60 -ekin 60 -ctl 60 -bmv 60 -ipolito 60 -ovsyannikov 60 -archaeologically 60 -x-michael 60 -hko 60 -stollen 60 -veloute 60 -sung-young 60 -hydroplaned 60 -coogee 60 -tammert 60 -stambouli 60 -makama 60 -kilted 60 -tuqan 60 -dutch-german 60 -mids 60 -two-to-four 60 -gavan 60 -doorstops 60 -pazira 60 -halifa 60 -melwood 60 -techonology 60 -montecatini 60 -throat-clearing 60 -avakian 60 -hirschbiegel 60 -anticonvulsant 60 -disadvantaging 60 -sieger 60 -sealink 60 --##.### 60 -rabelo 60 -web-mail 60 -maulud 60 -inchworm 60 -tabtabai 60 -relata 60 -ktt 60 -dae-hyun 60 -witchdoctor 60 -astres 60 -yannett 60 -fixed-dose 60 -cadle 60 -tianfu 60 -necesitamos 60 -slemrod 60 -tamil-speaking 60 -rsm\/br## 60 -preisdent 60 -eitzmann 60 -akuffo 60 -blefary 60 -cyclophosphamide 60 -aprilla 60 -share-the-wealth 60 -yagmurdereli 60 -maumere 60 -embarek 60 -monday-thursday 60 -ostiglia 60 -seree 60 -smokefree 60 -nerio 60 -lactase 60 -harbour-felax 60 -belue 60 -al-jaz 60 -tamisuke 60 -twahir 60 -setian 60 -dmp 60 -kowske 60 -supermercados 60 -decalogue 60 -outpolls 60 -pinballed 60 -hezbullah 60 -daluwatte 60 -nintendogs 60 -mediocrities 60 -cercelletta 60 -lulzim 60 -youngstars 60 -mavrou 60 -gorgonio 60 -manie 60 -occassionally 60 -chikelu 60 -mcgettigan 60 -sinduhije 60 -produjo 60 -charvat 60 -once-communist 60 -good-tasting 60 -palexpo 60 -izak 60 -comedy\/drama 60 -brusqueness 60 -anky 60 -#rd-#th 60 -ergasias 60 -jaxon 60 -miletti 60 -wangui 60 -heb 60 -exurb 60 -chalices 60 -sentimentalists 60 -###-turbine 60 -vermeers 60 -wipeouts 60 -rolff 60 -deguardia 60 -grosbard 60 -zegra 60 -kunder 60 -street-racing 60 -bullington 60 -wunderteam 60 -non-disabled 60 -leese 60 -weishan 60 -ten-time 60 -lactobacillus 60 -funkadelic 60 -zhengyu 60 -state-subsidised 60 -existen 60 -nakivubo 60 -dorinda 60 -popularly-elected 60 -wmf 60 -zrenjanin 60 -al-momen 60 -cesspools 60 -yacub 60 -zhuoru 60 -zouk 60 -tarnower 60 -sweet-potato 60 -lashonda 60 -fcdu 60 -tantalus 60 -nordling 60 -heidrun 60 -swigged 60 -tramontano 60 -spa\/fas 60 -invicta 60 -fenzl 60 -al-najar 60 -vinyl-coated 60 -stategy 60 -enewetak 60 -frost-free 60 -dstanford 60 -elwin 60 -kainer 60 -yugi 60 -cl### 60 -petitclerc 60 -margriet 60 -afterworld 60 -sirbu 60 -non-tradeable 60 -co-signing 60 -isiro 60 -jelacic 60 -tonghai 60 -narisetti 60 -whisperings 60 -pembangunan 60 -clatters 60 -kalashnikova 60 -health-flu-europe 60 -cross-license 60 -sarees 60 -minal 60 -retirement-age 60 -nitrogen-based 60 -sun-yup 60 -naief 60 -#,###-day 60 -rovs 60 -konia 60 -parasitology 60 -riz 60 -hankyu 60 -ebulliently 60 -franzblau 60 -apuuli 60 -muan 60 -ghoulishly 60 -six-gun 60 -recondite 60 -dukker 60 -zemmouri 60 -yumei 60 -http://www.centcom.mil 60 -one-night-only 60 -kovanda 60 -akhundzadeh 60 -worcester\/eng 60 -missiroli 60 -zhenyuan 60 -quarterpipe 60 -salix 60 -petrouchka 60 -mini-treaty 60 -epicure 60 -maintainance 60 -day-nighter 60 -ghengis 60 -pro-hitler 60 -hurleys 60 -editorialize 60 -henle 60 -sonson 60 -meals-ready-to-eat 60 -pratts 60 -pisetsky 60 -thonglao 60 -testings 60 -narcisa 60 -eight-night 60 -noster 60 -technology-stock 60 -dsquared 60 -nain 60 -bbn# 60 -explosively-formed 60 -b.l. 60 -zucchero 60 -dulay 60 -mid-spring 60 -ionides 60 -prochazkova 60 -twinsburg 60 -jhollis 60 -pre-####s 60 -bouyabes 60 -safe-conduct 60 -drug-resistance 60 -payphones 60 -yongwei 60 -labor-law 60 -tualatin 60 -ehf 60 -hirooka 60 -mortgage-bond 60 -gaztelu 60 -mitidja 60 -kondratiev 60 -christain 60 -http://www.alcoa.com 60 -hand-raised 60 -shaner 60 -medeski 60 -scansoft 60 -multi-millionaires 60 -reder 60 -htin 60 -paillettes 60 -intershop 60 -carfagna 60 -beyazit 60 -annakin 60 -scaglione 60 -esdi 60 -mikie 60 -inferiors 60 -student-on-student 60 -regionalize 60 -luff 60 -demitasse 60 -cal-bred 60 -calzado 60 -agita 60 -balin 60 -highest-end 60 -sinyani 60 -all-industries 60 -reuses 60 -lagrimas 60 -herberts 60 -koves 60 -sub-surface 60 -al-anzi 60 -longer-running 60 -gallimore 60 -pluralities 60 -benedetta 60 -jiddah-based 60 -tomaselli 60 -kallestad 60 -souheil 60 -kanso 60 -privett 60 -koronka 60 -nonmonetary 60 -rangsan 60 -esztergom 60 -zettler 60 -land-management 60 -indie-film 60 -learysptimes.com 60 -demokratikong 60 -vishakhapatnam 60 -rutba 60 -pluss 60 -africare 60 -#.##-acre 60 -shabib 60 -florins 60 -jujiya 60 -compaign 60 -demant 60 -josiane 60 -d'andre 60 -skagerrak 60 -azapo 60 -http://www.ladieseuropeantour.com 60 -whisperers 60 -studdert 60 -overindulgent 60 -qabazard 60 -norcross-based 60 -ex-gurkha 60 -x-seattle 60 -hejiang 60 -mcglaughlin 60 -preciously 60 -geetha 60 -gardezi 60 -ikuta 60 -attaullah 60 -hensler 60 -tree-shaped 60 -subarctic 60 -starman 60 -hussar 60 -verburg 60 -tupurkovski 60 -jeelani 60 -avidan 60 -sancha 60 -poliakoff 60 -room-sized 60 -instable 60 -pejsek 60 -el-falali 60 -chachoengsao 60 -chinese-produced 60 -al-kinani 60 -knuth 60 -bunner 60 -shinholster 60 -sabiston 60 -dvdirect 60 -douroux 60 -zebley 60 -salicylic 60 -etian 60 -co-organised 60 -osmanagic 60 -khotang 60 -doswell 60 -zfa 60 -proscribing 60 -bbn-dodgernotes 60 -domzale 60 -chinguetti 60 -joseph-beth 60 -stirrer 60 -deviatovski 60 -wright-designed 60 -spin-doctoring 60 -un-sudan 60 -contratos 60 -decimalization 60 -murugesu 60 -cleeman 60 -unbolted 60 -delphin 60 -twitter.com/gregauman 60 -depravation 60 -goen 60 -meckel 60 -kerr\/john 60 -nariaki 60 -co-ranked 60 -double-gold 60 -_______ 60 -binet 60 -perpere 60 -mahbuhbullah 60 -gendun 60 -zilla 60 -godowsky 60 -lycett 60 -ceramist 60 -damia 60 -oradell 60 -karimi-rad 60 -kreisler 60 -hard-hatted 60 -wahpeton 60 -christianne 60 -vesselin 60 -kivumbi 60 -life-supporting 60 -writing-directing 60 -valances 60 -chlorinate 60 -na-young 60 -nuclear-bomb 60 -decomissioning 60 -payam 60 -tuleh 60 -innova 60 -chih-hao 60 -dejohn 60 -democratic-farmer-labor 60 -lbs. 60 -small-school 60 -nur-pashi 60 -bollaert 60 -crookedly 60 -talloires 60 -kringen 60 -dopfer 60 -#-#-year 60 -defeatists 60 -madiot 60 -full-member 60 -vyachorka 60 -haixi 60 -sydenham 60 -#-emilie 60 -acuvue 60 -telser 60 -cumpston 60 -sokhina 60 -bancolombia 60 -crisis-prone 60 -gmhc 60 -pbf 60 -nisi 60 -revolutionizes 60 -azzahar 60 -money-draining 60 -double-bogeying 60 -gauvreau 60 -chabris 60 -leg-break 60 -tecbud 60 -ginnifer 60 -osoria 60 -nigeria-unrest 60 -turjanzadeh 60 -self-affirmation 60 -minuto 60 -private-property 60 -wambui 60 -bodao 60 -gwalia 60 -vladimirs 60 -natural-foods 60 -prolapsed 60 -vdis 60 -cullerton 60 -egawa 60 -aspl 60 -windu 60 -recession-fighting 60 -riss 60 -on-land 60 -ngap 60 -perjure 60 -cochaired 60 -mpumelelo 60 -fixed-wireless 60 -poots 60 -luneville 60 -hands-only 60 -bacashihua 60 -chancing 60 -mazrouei 60 -nabiyev 60 -mideast-summit 60 -baler 60 -genii 60 -bleustein 60 -unmixed 60 -phrenology 60 -iica 60 -shevaun 60 -orch 60 -zaramba 60 -china-latin 60 -ujc 60 -heeler 60 -sparseness 60 -percudani 60 -islam#uk 60 -rbos 60 -gb-lak 60 -cobbled-together 60 -half-owner 60 -d'entremont 60 -ation 60 -translucence 60 -luxgen 60 -ahhs 60 -posibles 60 -kulyab 60 -stalement 60 -anti-car 60 -o'crowley 60 -austria-crime-incest 60 -zuazo 60 -sugarbaker 60 -rougemont 60 -brugmann 60 -seidensticker 60 -rambla 60 -pensonic 60 -gimbal 60 -naugahyde 60 -longneck 60 -bumpings 60 -puffball 60 -selolwane 60 -anti-climatic 60 -crucibles 60 -penetta 60 -fatehpur 60 -baixada 60 -ukrop 60 -udoto 60 -yulieski 60 -chidren 60 -karwan 60 -zonca 60 -rapid-ascent 60 -sayle 60 -actuator 60 -multireligious 60 -gotchas 60 -torossian 60 -bonici 60 -fellmeth 60 -smatterings 60 -glycerol 60 -batigol 60 -adamany 60 -longball 60 -duck-and-cover 60 -boxford 60 -flounce 60 -seabeds 60 -shui-tsai 60 -sreerema 60 -wentzville 60 -mckeel 60 -ecla 60 -cheol 60 -tosta 60 -stayaways 60 -qazigund 60 -zolkin 60 -lyuboslav 60 -ka-## 60 -bellaart 60 -marcovicci 60 -dream-come-true 60 -winnemucca 60 -delbarton 60 -filmtec 60 -no-drive 60 -election-reform 60 -escoto 60 -###,###-capacity 60 -supertyphoon 60 -wakaazuma 60 -unsurmountable 60 -empcar 60 -members-in-waiting 60 -virola 60 -g\/h 60 -manch 60 -ducker 60 -kornilov 60 -metsa-botnia 60 -moistening 60 -coequal 60 -acga 60 -rupesh 60 -wilkinsburg 60 -brashest 60 -khoramshahi 60 -inverter 60 -co-offensive 60 -oscar-night 60 -etymological 60 -tranter 60 -eckenrode 60 -sireta 60 -smtp 60 -dadey 60 -rehak 60 -gooneratne 60 -jurie 60 -amte 60 -troyens 60 -nametag 60 -lops 60 -petach 60 -soliev 60 -#,###-peso 60 -re-sentenced 60 -berdiyev 60 -phang-nga 60 -ptuj 60 -navickas 60 -glassmakers 60 -single-ticket 60 -holderman 60 -stratcom 60 -bhartiya 60 -portentously 60 -horlock 60 -##\/#-year-old 60 -hertog 60 -krissoff 60 -pinschers 60 -coverlets 60 -galaxie 60 -barmaids 60 -harmetz 60 -communist-bloc 60 -cavallier 60 -relizane 60 -stauring 60 -kousa 60 -#,###-car 60 -thobela 60 -disavowals 60 -bistrong 60 -relevent 60 -vibert 60 -return-path 60 -lamagna 60 -atthe 60 -satrio 60 -leebove 60 -sallai 60 -health-oriented 60 -isra 60 -bidvest 60 -gasperoni 60 -crispo 60 -gortat 60 -garavani 60 -torgau 60 -ikeyama 60 -kocian 60 -zhihe 60 -catapano 60 -taxmen 60 -quittner 60 -israelson 60 -troi 60 -toevs 60 -germany\/milram 60 -odintsov 60 -chango 60 -latag 60 -balcytis 60 -antolini 60 -xingyi 60 -nienaber 60 -shiso 60 -boskov 60 -resource-hungry 60 -rouland 60 -mpe 60 -hindu-christian 60 -dodt 60 -koenigstein 60 -once-rich 60 -generator-powered 60 -bidemi 60 -khi 60 -kniazkov 60 -taborsky 60 -lehya 60 -zylstra 60 -schosberg 60 -emergency-management 60 -panne 60 -closedown 60 -freemason 60 -higginbottom 60 -erogenous 60 -plaatjes 60 -solyndra 60 -commerciality 60 -gorky-# 60 -i-opener 60 -#.##-carat 60 -alispahic 60 -porzio 60 -cummerbunds 60 -gaku 60 -borght 60 -valicevic 60 -icier 60 -otwell 60 -alighieri 60 -milsteins 60 -machi 60 -one-stop-shop 60 -herperger 60 -hypergrowth 60 -chaffey 60 -henare 60 -peterbilt 60 -sieben 60 -escarre 60 -cross-disciplinary 60 -tahj 60 -homestands 60 -ellenson 60 -perons 60 -vanoy 60 -hafed 60 -mingshan 60 -in-migration 60 -ivancic 60 -dramani 60 -asia-middle 60 -magoffin 60 -hertzfeld 60 -####\/### 60 -larena 60 -njue 60 -hela 60 -nigerian-registered 60 -chraidi 60 -saint-jean-de-maurienne 60 -onsale 60 -taneski 60 -blagoj 60 -pcij 60 -bomb-thrower 60 -tai-shan 60 -fertel 60 -trasvina 60 -shtml 60 -sakaiminato 60 -kilar 60 -sifang 60 -snorkeled 60 -gentian 60 -dito 60 -transbourse 60 -machinga 60 -growth-enhancing 60 -reclogging 60 -#-roque 60 -cmf 60 -putaway 60 -klinge 60 -pitching-rich 59 -alights 59 -#-mariano 59 -shatz 59 -koskoff 59 -sesssion 59 -dowthitt 59 -kriens 59 -revering 59 -nanhu 59 -menduh 59 -issers 59 -gabali 59 -aristolochia 59 -magrino 59 -sb# 59 -piledriver 59 -skywalks 59 -czapiewski 59 -centerstage 59 -blandest 59 -prescriptives 59 -heartlessly 59 -vitalia 59 -anchorages 59 -http://www.nrlc.org 59 -pittston 59 -high-payroll 59 -makahs 59 -shobokshi 59 -bandwagons 59 -krivenik 59 -gamey 59 -year-plus 59 -##-billion-yen 59 -antwun 59 -haniel 59 -longer-lived 59 -saheli 59 -minutewomen 59 -gray-and-white 59 -afran 59 -dysphonia 59 -anau 59 -flemmons 59 -kabushenga 59 -anti-france 59 -simecek 59 -greenwalt 59 -wavelet 59 -zarai 59 -schweickart 59 -iicd 59 -trupin 59 -nuez 59 -tobiass 59 -outofthebox 59 -o'scannlain 59 -haqq 59 -wrcf##a 59 -holberton 59 -germplasm 59 -halide 59 -wine-colored 59 -authories 59 -harpal 59 -high-emission 59 -kombu 59 -cde 59 -horakova 59 -kinderhook 59 -ocksman 59 -ayd 59 -ubdina 59 -subhan 59 -moistness 59 -kampmeier 59 -turyk-wawrynowicz 59 -kawachi 59 -northerns 59 -mangga 59 -adv##-cox 59 -bukvich 59 -budhi 59 -d-word 59 -phap 59 -glimmerings 59 -recondition 59 -stuhlbarg 59 -obfuscated 59 -collingswood 59 -thiew 59 -varberg 59 -california-grown 59 -akbank 59 -#.#-feet 59 -esmie 59 -amanya 59 -re-filed 59 -bonani 59 -bednarz 59 -economatica 59 -tuf 59 -water-recycling 59 -athanasia 59 -jennekvist 59 -huidong 59 -issues-oriented 59 -woong-bae 59 -kochis 59 -air-dried 59 -brunken 59 -http://www.enron.com 59 -handgrenades 59 -deaf-mutes 59 -esops 59 -darulaman 59 -chanachai 59 -aponavicius 59 -upper-hand 59 -baszczynski 59 -culley 59 -ncaer 59 -calcagni 59 -bolarinwa 59 -brumos 59 -velha 59 -lathem 59 -sadoff 59 -volumen 59 -pm-elect 59 -town-house 59 -backpacked 59 -##-yarders 59 -borrelia 59 -agostinelli 59 -rundles 59 -sentir 59 -asjylyn 59 -chimalapas 59 -tetherow 59 -kinan 59 -tegan 59 -gjenero 59 -dl-hla 59 -estigarribia 59 -once-moribund 59 -biersack 59 -khitan 59 -koury 59 -cricket-ind-aus 59 -http://blogs.timesunion.com/mcguire 59 -muzahim 59 -woul 59 -sck 59 -dongzhimen 59 -fanciulla 59 -arianne 59 -koszics 59 -meadwestvaco 59 -staveley 59 -timb 59 -levs 59 -super-spy 59 -yaish 59 -araz 59 -tabman 59 -basanti 59 -peace-enforcement 59 -cesaria 59 -neopolitan 59 -mispronunciations 59 -counter-cyclical 59 -disarmement 59 -chavhanga 59 -gayler 59 -mercedez-benz 59 -recollects 59 -rundall 59 -chen-chung 59 -malusa 59 -gereida 59 -decha 59 -lemberger 59 -bakoyiannis 59 -gogi 59 -grinda 59 -jimson 59 -bradberry 59 -mervat 59 -bvg 59 -jingzhong 59 -endel 59 -allers 59 -zuoyun 59 -loevinger 59 -zntb 59 -mirnawan 59 -igniter 59 -x-shaped 59 -carlsson-paige 59 -cobia 59 -veillette 59 -weed-killer 59 -larbaa 59 -prn 59 -victorinox 59 -household-name 59 -##-dnp 59 -xiangning 59 -minasian 59 -frappe 59 -ozone-friendly 59 -smita 59 -non-biological 59 -syllabuses 59 -twe 59 -northwick 59 -ivashkevich 59 -un-proposed 59 -muharrem 59 -calorie-free 59 -kolwezi 59 -barbados-born 59 -eu-serbia 59 -lish 59 -polyak 59 -http://www.nifc.gov/ 59 -kenro 59 -iran-unrest 59 -cothran 59 -basak 59 -beezer 59 -three-strike 59 -wilkomirski 59 -coovadia 59 -serreqi 59 -bythe 59 -noncompetition 59 -post-columbine 59 -minnix 59 -mickell 59 -spyro 59 -boire 59 -topcu 59 -boureij 59 -fengyang 59 -suntrajarn 59 -etre 59 -tarsy 59 -decors 59 -yerger 59 -non-internet 59 -stuntz 59 -lazer 59 -cahaba 59 -sabtu 59 -mut 59 -fadden 59 -eckstine 59 -wawrzyniak 59 -hip-swiveling 59 -plateaux 59 -shih-fang 59 -varah 59 -schwertner 59 -malfi 59 -scheeren 59 -rousselot 59 -bircher 59 -goldsberry 59 -charteau 59 -sullenness 59 -omoro 59 -alipov 59 -tax-cutter 59 -gayer 59 -cortinovis 59 -kettleman 59 -remodeler 59 -hard-punching 59 -magomedali 59 -telesleuth 59 -smajlovic 59 -nkem 59 -sea-green 59 -morobe 59 -nodia 59 -biss 59 -dawan 59 -abolfazl 59 -alberton 59 -faial 59 -picardi 59 -muchall 59 -no-change 59 -enfranchisement 59 -kijevo 59 -salpigidis 59 -imjingak 59 -anfrel 59 -mubarek 59 -chudy 59 -broadway-notes 59 -near-miraculous 59 -lefse 59 -cacace 59 -coffield 59 -out-of-hand 59 -nuhanovic 59 -chelule 59 -jumagulov 59 -elasticized 59 -non-existing 59 -sperl 59 -junri 59 -manders 59 -substructure 59 -cahaya 59 -segamat 59 -kharas 59 -dds 59 -snitched 59 -monforts 59 -marantha 59 -green-blue 59 -sukawaty 59 -troutt 59 -asics-cga 59 -part-ownership 59 -oline 59 -leanna 59 -merl 59 -rockhopper 59 -shakiba 59 -parlez-vous 59 -costilla 59 -industry.net 59 -logvinenko 59 -kaforey 59 -finnish-born 59 -error-plagued 59 -rodnina 59 -steel-mesh 59 -thrushes 59 -nesvig 59 -arcapita 59 -wellstream 59 -naushad 59 -enad 59 -marxist-leninists 59 -optimising 59 -jl-bg 59 -pecina 59 -calamine 59 -brinner 59 -keion 59 -hatra 59 -sobota 59 -fifita 59 -akayeva 59 -#-million-acre 59 -##-marcelo 59 -curto 59 -battallion 59 -socko 59 -parviainen 59 -all-southeastern 59 -ekland 59 -chinotimba 59 -escamillo 59 -wartelle 59 -barely-there 59 -bookbuilding 59 -brightly-lit 59 -kopeck 59 -pno 59 -pelvises 59 -scuffs 59 -terminix 59 -snow\/cloudy 59 -perraud 59 -introversion 59 -thielemans 59 -ayalew 59 -sardo 59 -debunkers 59 -snow-topped 59 -inuktitut 59 -straberg 59 -keech 59 -voter-id 59 -leoncavallo 59 -harmes 59 -clay.robison@chron.com 59 -sujeeva 59 -image-maker 59 -leet 59 -wister 59 -four-tournament 59 -two-games-to-none 59 -consuela 59 -prokhorova 59 -okin 59 -cholakis 59 -policharki 59 -golf-ball 59 -bischof 59 -languorously 59 -bracanov 59 -joellen 59 -coryo 59 -party-hopping 59 -hangchow 59 -th# 59 -loetschberg 59 -yuxia 59 -anthrax-related 59 -radio-ready 59 -fethiye 59 -shak 59 -current-generation 59 -grotowski 59 -pakorn 59 -garonne 59 -mknobler@ajc.com 59 -buryak 59 -green-minded 59 -barril 59 -yamaki 59 -macheyo 59 -work\/life 59 -fessler 59 -rheingau 59 -wahdan 59 -hugel 59 -conoley 59 -hellcat 59 -poydras 59 -kazoos 59 -gollogly 59 -discoursing 59 -pungue 59 -twilson 59 -aivars 59 -torrelavega 59 -assadollah 59 -ornskoldsvik 59 -syaifudin 59 -doshisha 59 -difazio 59 -portioned 59 -esap 59 -pop\/contemporary 59 -#the 59 -belluscio 59 -afficionados 59 -shargin 59 -caulks 59 -barnesville 59 -al-mahmud 59 -topalli 59 -avowal 59 -skil 59 -jeschke 59 -pieke 59 -p&j 59 -matillano 59 -soft-loan 59 -soon-to-be-former 59 -pittle 59 -transposing 59 -parmelee 59 -fessel 59 -obudu 59 -island-republic 59 -coutry 59 -golpe 59 -vakhayev 59 -vllaznia 59 -karzai-appointed 59 -invs 59 -post-abortion 59 -chianwala 59 -ennoble 59 -neradko 59 -pujobroto 59 -chikowi 59 -ansah 59 -litang 59 -palaghiaccio 59 -euroepan 59 -y-net 59 -cnpp 59 -steroid-distribution 59 -maulawi 59 -grandison 59 -kamungozi 59 -tonneau 59 -torchings 59 -four-candidate 59 -pallium 59 -kljajevic 59 -#.##-liter 59 -buy-to-let 59 -fanzhi 59 -sunanda 59 -taymour 59 -remanufactured 59 -schopenhauer 59 -anti-syrians 59 -anti-affirmative-action 59 -petrochemia 59 -kptc 59 -lobohombo 59 -mongomo 59 -janss 59 -sabharwal 59 -landholding 59 -##-per-month 59 -aftertax 59 -sardjoe 59 -placings\/standings 59 -garrec 59 -johnson-freese 59 -yablonsky 59 -stojkov 59 -wenzao 59 -water-conservation 59 -lawner 59 -front-loader 59 -anaheim-based 59 -doulos 59 -dimapur 59 -vashchuk 59 -yorked 59 -mewar 59 -five-pointer 59 -warrell 59 -ndiwa 59 -shrage 59 -poblanos 59 -black-run 59 -auslese 59 -banducci 59 -carless 59 -suhardjono 59 -extroverts 59 -bavarian-style 59 -calorie-laden 59 -sportbild 59 -robo-signers 59 -bucchi 59 -bourne\/victor 59 -oesterle 59 -renda 59 -mortgage-market 59 -hasibul 59 -bohren 59 -human-computer 59 -half-moons 59 -al-kadhimi 59 -reguera 59 -achike 59 -schnee 59 -atje 59 -esp\/eus 59 -lamplight 59 -solimoes 59 -loua 59 -elkan 59 -vitamin-enriched 59 -deskovic 59 -burgmans 59 -miaa 59 -ascorbic 59 -spain\/festina 59 -#-##-### 59 -agro-industries 59 -negociaciones 59 -skirt-chasing 59 -gph##bb 59 -minister-counselor 59 -penhall 59 -hat-in-hand 59 -autograph-seeking 59 -temodar 59 -othmani 59 -calida 59 -panichgul 59 -blameworthy 59 -burkitt 59 -foppert 59 -optionsxpress 59 -r-salem 59 -onassio 59 -teruyuki 59 -calstart 59 -fitzcarraldo 59 -tetrazzini 59 -bohme 59 -carinii 59 -devilliers 59 -artemyev 59 -bozkurt 59 -pacificor 59 -komisar 59 -weijun 59 -zehri 59 -challe 59 -kingi 59 -zurcher 59 -svansicklersptimes.com 59 -ziso 59 -personal-foul 59 -myanmar-language 59 -italian-french 59 -yung-ping 59 -sadbhawana 59 -waywardness 59 -lockard 59 -monib 59 -controlee 59 -maroga 59 -public-listed 59 -embrey 59 -ched 59 -five-power 59 -saynudin 59 -qasemi 59 -nuzum 59 -microdrive 59 -re-investigation 59 -conflicto 59 -dasanayake 59 -xynthia 59 -wormholes 59 -baoming 59 -rappleyea 59 -ecotech 59 -pre-oscars 59 -minx 59 -yator 59 -jobbed 59 -peccadillos 59 -societa 59 -sadriya 59 -wackness 59 -a.b 59 -dychtwald 59 -libancell 59 -two-hundred 59 -egs 59 -contrada 59 -interferring 59 -shajiao 59 -zeidane 59 -tengger 59 -srinagar-muzaffarabad 59 -kurosaki 59 -yidong 59 -nchito 59 -abdille 59 -tid 59 -raw-vid###### 59 -ski-world 59 -blockings 59 -gedeck 59 -localhost 59 -dooky 59 -khafagy 59 -coppock 59 -non-gay 59 -simec 59 -#,###-yuan 59 -dementias 59 -galatioto 59 -titter 59 -nagyvary 59 -ayahuasca 59 -gordon-conwell 59 -alberto-culver 59 -houtan 59 -waspish 59 -unimaginatively 59 -dead-serious 59 -controvery 59 -ingels 59 -maxton 59 -forefingers 59 -ibata 59 -ticknor 59 -lazne 59 -gidel 59 -blache 59 -schormann 59 -drop-by 59 -nationalbank 59 -shuanghe 59 -freetel 59 -###-###-#-###-#### 59 -averitt 59 -anti-earthquake 59 -macefield 59 -trali 59 -witbooi 59 -patru 59 -unhappiest 59 -pervomaysk 59 -realy 59 -applera 59 -est\/a#r 59 -utada 59 -oil-trading 59 -al-sammarai 59 -muria 59 -trapasso 59 -diefenderfer 59 -movie-quality 59 -maslova 59 -gedz 59 -bright-yellow 59 -al-fahal 59 -razziq 59 -corbelli 59 -decanted 59 -black-and-silver 59 -cubanos 59 -towelettes 59 -http://www.weforum.org 59 -iia 59 -sirba 59 -cads 59 -kapetanos 59 -disses 59 -ltc 59 -debbasch 59 -ft.com 59 -graito 59 -staletovich 59 -debreu 59 -merisant 59 -near-collisions 59 -lokuge 59 -software-maker 59 -stephanus 59 -duba 59 -novik 59 -mayombo 59 -salvato 59 -mets-yankees 59 -oskars 59 -titmice 59 -nikolovski 59 -deanda 59 -restrictionist 59 -khadduri 59 -onterrio 59 -thodey 59 -perroud 59 -foping 59 -agboville 59 -isin 59 -wednesday-thursday 59 -ohmori 59 -jong-chan 59 -acocella 59 -buggery 59 -houphouet 59 -sanofi-pasteur 59 -xxxxxxxxend 59 -ohel 59 -suburbanized 59 -tekeda 59 -well-proportioned 59 -counter-punch 59 -selyem 59 -dawlat 59 -egas 59 -three-punch 59 -fouke 59 -gas-to-liquids 59 -edan 59 -hollopeter 59 -hintsa 59 -addenbrooke 59 -mountaga 59 -zogu 59 -bxe# 59 -bushinsky 59 -martinage 59 -kaufer 59 -pingeon 59 -mazahir 59 -shao-haei 59 -briner 59 -nemea 59 -york-headquartered 59 -anytown 59 -kleptocratic 59 -non-supervisory 59 -aix-les-bains 59 -conocer 59 -heit 59 -reinhilt 59 -clear-cuts 59 -atong 59 -haefner 59 -al-taweel 59 -sholokhov 59 -nezavisne 59 -kamoga 59 -monsalve 59 -zanussi 59 -kaller 59 -fidal 59 -moloi 59 -neck-to-ankle 59 -shirvani 59 -non-saudis 59 -kharg 59 -c-murder 59 -fingernail-sized 59 -gjomle 59 -piccioli 59 -theater-in-the-round 59 -honnold 59 -shingirai 59 -vending-machine 59 -mini-applications 59 -krusty 59 -e-services 59 -adressing 59 -taht 59 -zacharatos 59 -coupler 59 -aparri 59 -edenton 59 -dicke 59 -one-###th 59 -flamme 59 -gladiolas 59 -mcnee 59 -carnero 59 -www.caiso.com 59 -grabar-kitarovic 59 -felsen 59 -memes 59 -reinjected 59 -edwar 59 -favier 59 -yokomine 59 -slamed 59 -poll-watchers 59 -azizulhasni 59 -gracin 59 -tank-automotive 59 -halvari 59 -gloversville 59 -aleksejs 59 -louisianan 59 -clerkships 59 -sharapov 59 -ill-chosen 59 -clerestory 59 -nasa-funded 59 -minella 59 -karnaugh 59 -phoned-in 59 -taibe 59 -schondelmeyer 59 -d'arby 59 -u.s-china 59 -one-hitters 59 -chephren 59 -caru 59 -bachleda 59 -nategh-nuri 59 -gaitley 59 -kiecolt-glaser 59 -watmore 59 -orangeman 59 -arch-villain 59 -http://www.amd.com 59 -powderject 59 -toker 59 -stubbled 59 -ohnishi 59 -requesters 59 -helicoptering 59 -tahoes 59 -morong 59 -rahmonov 59 -ledanois 59 -borodulina 59 -springmann 59 -keynotes 59 -saith 59 -pallidotomy 59 -lusso 59 -shabaa 59 -byrdak 59 -vance-owen 59 -ibertsberger 59 -massih 59 -artyukhov 59 -lip-synch 59 -benziman 59 -schaer 59 -yormark 59 -zuanazzi 59 -dejohnette 59 -maternally 59 -gyurkovics 59 -haleiwa 59 -unprecented 59 -totus 59 -dshir 59 -yeakel 59 -lavecchia 59 -turbo-generators 59 -sanoma 59 -simiyu 59 -hmnzs 59 -sabagh 59 -decoratifs 59 -lehder 59 -suryo 59 -samaraneftegaz 59 -kingfishers 59 -quasi-socialist 59 -jurnee 59 -fischbein 59 -maotai 59 -turgot 59 -karimirad 59 -osnes 59 -margarian 59 -krul 59 -raho 59 -bibring 59 -shaktoolik 59 -wendler 59 -baim 59 -hellwig 59 -sugarcane-based 59 -al-shaar 59 -suomi 59 -guittard 59 -amirov 59 -chafer 59 -reconnoitering 59 -re-forming 59 -thweatt 59 -tax-writers 59 -oladapo 59 -sidelocks 59 -bedaux 59 -aravind 59 -fruit-growing 59 -koror 59 -light-year 59 -adat 59 -utm 59 -keenans 59 -grecia 59 -palestinian-jordanian 59 -pro-choicers 59 -liuhua 59 -sujit 59 -milchan 59 -weather-philippines-typhoon 59 -additonal 59 -leukemias 59 -http://www.continental.com 59 -parrothead 59 -afghanistan-unrest-taliban 59 -re-submitted 59 -cup-champion 59 -jitloff 59 -gungoren 59 -al-shall 59 -photochemical 59 -grandee 59 -chi-chao 59 -cash-raising 59 -nanri 59 -pro-prosecution 59 -maicosuel 59 -plourde 59 -tarell 59 -laquidara 59 -harkonen 59 -microdermabrasion 59 -shootarounds 59 -cryopreservation 59 -cockfield 59 -yb\/jh 59 -phromphan 59 -maxa 59 -flatliners 59 -extranjera 59 -calaio 59 -tjh-rjm 59 -post-performance 59 -apriantono 59 -rawn 59 -humadi 59 -lupsha 59 -bikita 59 -road-rail 59 -mantes-la-jolie 59 -jianyang 59 -shaweesh 59 -migliaro 59 -larrikin 59 -muju 59 -delahunty 59 -no\/okla 59 -gracen 59 -penalty-filled 59 -kitui 59 -ernsting-krienke 59 -retela 59 -ntust 59 -cordel 59 -verkhovsky 59 -hatha 59 -meat-eater 59 -musker 59 -less-publicized 59 -cityu 59 -tla 59 -fancying 59 -pankin 59 -simasiku 59 -spinaway 59 -hedija 59 -mersenne 59 -dunshee 59 -dalipi 59 -ducale 59 -bruene 59 -army-owned 59 -greatorex 59 -karsiyaka 59 -martinu 59 -shamala 59 -brimfield 59 -atlantics 59 -utuado 59 -conneally 59 -hye-youn 59 -sharpstown 59 -signore 59 -atlanta-journal 59 -negoesco 59 -plotnikov 59 -linscott 59 -despereaux 59 -ligang 59 -survivals 59 -okolski 59 -fang-yue 59 -distrigaz 59 -borana 59 -conable 59 -sorest 59 -rompers 59 -batubara 59 -kencell 59 -ibi 59 -tin-plated 59 -shlain 59 -soybean-based 59 -tallberg 59 -fibras 59 -selda 59 -inter-dependence 59 -chunkier 59 -college-jewish 59 -chiune 59 -bredahl 59 -half-starved 59 -dymchenko 59 -suspectedly 59 -dimitrakos 59 -jf-## 59 -dury 59 -mcgivney 59 -cakl 59 -###-storey 59 -kishor 59 -vionnet 59 -torlen 59 -cciced 59 -car-bombs 59 -momsen 59 -coach-class 59 -badmouthed 59 -liebermans 59 -smain 59 -sun-loving 59 -squeaky-voiced 59 -three-pack 59 -primex 59 -remixer 59 -well-articulated 59 -run-first 59 -secret-agent 59 -wismar 59 -pendulous 59 -agro-technicians 59 -supercedes 59 -concepto 59 -back-to-the-land 59 -shamoon 59 -isnt 59 -vergina 59 -kolesar 59 -delpierre 59 -volchkov 59 -baixa 59 -pentacostal 59 -mpca 59 -rwindi 59 -technimetrics 59 -beijing-hangzhou 59 -dapa 59 -divisionism 59 -burcham 59 -bean-counters 59 -serravalle 59 -agni-ii 59 -texana 59 -two-masted 59 -yumen 59 -man-of-war 59 -well-nourished 59 -ktvu 59 -brialy 59 -vlashi 59 -palanggoon 59 -essi 59 -dreamlife 59 -culbreath 59 -gosingan 59 -tejinder 59 -million-to-one 59 -ferozabad 59 -movsisyan 59 -birdie-eagle 59 -taeb 59 -biomolecules 59 -cynara 59 -mogas 59 -halbrook 59 -finstrom 59 -upmc 59 -activist-journalist 59 -state-guided 59 -scitrek 59 -newyork 59 -rosenbauer 59 -braker 59 -bds 59 -chain-smokes 59 -divyang 59 -cinemania 59 -norwegian-based 59 -bowlegged 59 -tosoh 59 -isbin 59 -stamatis 59 -###zx 59 -stroem-erichsen 59 -re-organized 59 -moughniyah 59 -mulpuru 59 -naeringsliv 59 -zitko 59 -caudillos 59 -t-systems 59 -al-shayea 59 -awesomeness 59 -cityvote 59 -desanctis 59 -luckenbill 59 -www.tsa.gov 59 -quatrano 59 -tetuan 59 -seven-million-dollar 59 -bienville 59 -abou-treika 59 -akutagawa 59 -lasn 59 -ouseph 59 -brand-conscious 59 -ultra-cool 59 -supporting-acting 59 -vivar 59 -afanasyeva 59 -raes 59 -faliron 59 -soru 59 -jobski 59 -dodonnell@nycap.rr.com 59 -scadden 59 -arditi 59 -doornekamp 59 -nexabit 59 -livetv 59 -duplin 59 -endurance-booster 59 -radionuclides 59 -volmar 59 -gittings 59 -coile 59 -overtown 59 -lesi 59 -agarwalla 59 -brooksley 59 -trifonov 59 -bizera 59 -sino-chilean 59 -ip-based 59 -condren 59 -half-size 59 -kwu 59 -kangol 59 -arlys 59 -binstock 59 -libyan-backed 59 -rayed 59 -alimentary 59 -grained 59 -atoki 59 -negations 59 -oasen 59 -arab-mediated 59 -mozena 59 -serioux 59 -bjervig 59 -hot-spring 59 -gruet 59 -#-inch-deep 59 -linker 59 -glycine 59 -chimere 59 -hamoked 59 -smyrek 59 -monkish 59 -peraliya 59 -gorospe 59 -eight-month-long 59 -galwey 59 -mastitis 59 -goupil 59 -megaresort 59 -nutraceuticals 59 -ransford 59 -subterfuges 59 -pronostica 59 -reinspection 59 -ho-yeol 59 -apperson 59 -thanomsak 59 -jadideh 59 -wevill 59 -cepelova 59 -aera 59 -kuehbacher 59 -ntumba 59 -iphitos 59 -yanis 59 -isolda 59 -o'shanter 59 -scheuring 59 -black-listed 59 -tan-colored 59 -kayhian 59 -pro-german 59 -stiletto-heeled 59 -mastan 59 -dhamar 59 -creisson 59 -tarasenko 59 -getahun 59 -pordenone 59 -hentunen 59 -underlayment 59 -al-muslimeen 59 -gardy 59 -leukocyte 59 -thermography 59 -matchpoint 59 -hongyi 59 -obertan 59 -primeeast 59 -castellvi 59 -isitolo 59 -pro-competition 59 -laryngeal 59 -xianghe 59 -eurozone-imf 59 -nigeria-oil-unrest-kidnap 59 -nader-camejo 59 -kyrastas 59 -seltsovsky 59 -spain\/astana 59 -beran 59 -foudre 59 -gaudier 59 -france\/cofidis 59 -interenergoservice 59 -gaar 59 -amberleigh 59 -shoora 59 -stehr 59 -dehoyos 59 -wait-listed 59 -meuleman 59 -kennerly 59 -yue\/niu 59 -ananiev 59 -komrskova 59 -jmh 59 -xianfeng 59 -xg### 59 -rane 59 -iturriaga 59 -bely 59 -sekondi 59 -gentex 59 -aleisha 59 -clenched-fist 59 -baraybar 59 -angolite 59 -camp-out 59 -quandry 59 -phymatrix 59 -sidecars 59 -henoch 59 -subandi 59 -baldin 59 -macia 59 -oynes 59 -pre-registered 59 -lanne 59 -garitano 59 -debarked 59 -duval-scott 59 -malambo 59 -hernreich 59 -sankofa 59 -pracharaj 59 -busies 59 -cly 59 -rassul 59 -keobounphanh 59 -per-year 59 -covenas 59 -on-wine 59 -nederlandse 59 -nontitle 59 -goave 59 -fuchsova 59 -monroes 59 -nadzmi 59 -joerres 59 -bogere 59 -shanhua 59 -szalay 59 -duf 59 -gate-crashers 59 -chidamabaram 59 -kirchpaytv 59 -ruweished 59 -andjelko 59 -ostrager 59 -atp-monte 59 -bridgers 59 -p#c 59 -damba 59 -daisey 59 -non-insurance 59 -frappuccinos 59 -missile-equipped 59 -cunneyworth 59 -kostiantyn 59 -zubr 59 -kimmi 59 -gatorland 59 -waxen 59 -sonke 59 -gramm-rudman 59 -rabon 59 -cumani 59 -hirshson 59 -harouna 59 -multi-user 59 -mccleave 59 -nemerov 59 -ejegayehu 59 -rivenbark 59 -less-privileged 59 -rotundo 59 -duangchalerm 59 -speechmaker 59 -ching-piao 59 -underachieve 59 -bakala 59 -suweidi 59 -adriaenssens 59 -autobytel 59 -willimon 59 -clean-out 59 -mazdas 59 -mochida 59 -volkogonov 59 -jasen 59 -waple 59 -podlodowski 59 -cardia 59 -traffick 59 -carpentaria 59 -harrad 59 -foradil 59 -zaveryukha 59 -rueda-denvers 59 -esperan 59 -davis-monthan 59 -over-aggressive 59 -huracan-tres 59 -redaction 59 -begles 59 -kupusovic 59 -goskomstat 59 -cents-per-share 59 -slopped 59 -#-paolo 59 -#-paola 59 -shafiullah 59 -gold-digger 59 -rawle 59 -garnishment 59 -aguta 59 -chartchai 59 -debevec 59 -first-wave 59 -haidt 59 -neider 59 -bsheets 59 -sheja'eya 59 -omofuma 59 -jk-hla 59 -jila 59 -fomca 59 -kozel 59 -phuntsog 59 -soc-intlnotes 59 -neils 59 -jebril 59 -xiaojie 59 -enrica 59 -zhare 59 -sainvil 59 -fsh 59 -fsg 59 -dbu 59 -bocalandro 59 -terror-stricken 59 -nine-plus 59 -health-guestworkout 59 -adie 59 -sissinghurst 59 -subglacial 59 -praefcke 59 -over-the-head 59 -zerlentes 59 -geosystems 59 -mescheriakova 59 -apolloni 59 -batat 59 -shortest-serving 59 -red-dirt 59 -naziunalista 59 -pellucida 59 -near-deserted 59 -olanzapine 59 -once-beautiful 59 -woon-kwong 59 -fixit 59 -reekers 59 -surgically-repaired 59 -hatipoglu 59 -mannichl 59 -latessa 59 -naturists 59 -berquist 59 -veldakova 59 -detabali 59 -sleep-related 59 -asean-us 59 -hiscock 59 -much-reviled 59 -air-ground 59 -once-taboo 59 -romilly 59 -bossem-levy 59 -health-sars-taiwan 59 -dymock 59 -lynge 59 -piranesi 59 -consistencies 59 -nelson-bond 59 -shusaku 59 -vax 59 -rhythmical 59 -all-premier 59 -jorquera 59 -shikhar 59 -camel-colored 59 -philomene 59 -vancomycin-resistant 59 -einaugler 59 -mutrif 59 -martitegi 59 -neustar 59 -ganzuri 59 -eufaula 59 -quichua 59 -thundercloud 59 -ex-christian 59 -havisham 59 -curcic 59 -talledega 59 -sung-man 59 -tr\/vls 59 -malgieri 59 -college-entrance 59 -bourkoff 59 -gambol 59 -swick 59 -calvinism 59 -ghettoization 59 -anti-rollover 59 -kamark 59 -forded 59 -repurpose 59 -gupte 59 -athans 59 -polhemus 59 -keshia 59 -ramachandra 59 -kl-gm 59 -guolla 59 -brazil-plane 59 -porato 59 -poms 59 -post-trauma 59 -ubud 59 -semi-circular 59 -blowhole 59 -inflation-related 59 -flori 59 -barhoumi 59 -barolos 59 -madritsch 59 -kulis 59 -weerts 59 -sardis 59 -saravanan 59 -kfda 59 -grapefruit-sized 59 -mucke 59 -saifudin 59 -time-slot 59 -xinli 59 -downwinders 59 -rotcheva 59 -hemdan 59 -brasses 59 -cowpox 59 -mushota 59 -bounder 59 -ex-lax 59 -tibs 59 -narcocorrido 59 -siok 59 -final-lap 59 -holczer 59 -kresse 59 -ibtisam 59 -emy 59 -al-kurd 59 -outside-the-beltway 59 -legitimises 59 -llamo 59 -incendiaries 59 -topoff 59 -asloum 59 -sabelli 59 -turkey-based 59 -krupnikovic 59 -lezion 59 -outswinging 59 -pesach 59 -mercury-free 59 -baerbel 59 -pranav 59 -khokhlova\/sergei 59 -primecap 59 -lukko 59 -bttb 59 -semprun 59 -zohur 59 -fbc-usc 59 -ulrica 59 -round-ups 59 -maastrict 59 -charbroiled 59 -son-in 59 -segat 59 -transfield 59 -government-rescued 59 -tryggve 59 -odling-smee 59 -canelas 59 -swd 59 -bapu 59 -xcp 59 -keulder 59 -haipe 59 -al-hedoud 59 -efthymiou 59 -yalman 59 -ratsirahonana 59 -too-strong 59 -polonaise 59 -aramburuzabala 59 -najafabad 59 -plaited 59 -jailbait 59 -itagui 59 -co-rookie 59 -thonier 59 -iason 59 -low-probability 59 -ligocka 59 -tetzlaff 59 -glassblowers 59 -everard 59 -ettienne 59 -granlund 59 -bever 59 -carematrix 59 -votevets.org 59 -ghinwa 59 -amoussouga 59 -brigade-size 59 -manats 59 -mrazova 59 -kupelo 59 -dekay 59 -vanzekin 59 -calixte 59 -paktin 59 -givry 59 -chocolatey 59 -poverty-eradication 59 -scandal-prone 59 -el#l 59 -good-for-nothing 59 -sailosi 59 -tuguegarao 59 -struk 59 -vancura 59 -jean-mary 59 -reemployed 59 -durling 59 -xinhua-run 59 -chadd 59 -over-expansion 59 -miyamura 59 -diani 59 -blinov 59 -stick-swinging 59 -sparxxx 59 -blagoi 59 -flohr 59 -casselman 59 -magnifico 59 -temerlin 59 -kirm 59 -bambu 59 -gohlke 59 -maniglia 59 -kamphuis 59 -moodily 59 -kilak 59 -anti-church 59 -leat 59 -serap 59 -danish-based 59 -bagless 59 -bluford 59 -tsygurov 59 -tahmasebi 59 -egomania 59 -u.n.-demarcated 59 -dm-pyg 59 -crianza 59 -ghafari 59 -mottus 58 -djamil 58 -razor-close 58 -setola 58 -cortulua 58 -turvy 58 -gechev 58 -gessel 58 -ayamas 58 -eberl 58 -leys 58 -beating-heart 58 -khadjiev 58 -vbac 58 -steber 58 -o'liney 58 -chizuko 58 -transgressor 58 -balcavage 58 -non-voters 58 -goligoski 58 -uddi 58 -philadelphia-born 58 -schlomo 58 -gamila 58 -herranz 58 -d-ram 58 -aerating 58 -heathwood 58 -jazz-pop 58 -swindlehurst 58 -dabagh 58 -sportfive 58 -jitter 58 -peelle 58 -geste 58 -tuitele 58 -limited-liability 58 -h### 58 -heilmann 58 -onyancha 58 -holobyte 58 -ppt 58 -icelike 58 -boru 58 -ayala-cornejo 58 -singer-bassist 58 -seifullah 58 -sonrisa 58 -bogacheva 58 -dassey 58 -x-muttiah 58 -immigrant-friendly 58 -post-all-star 58 -physically-unable-to-perform 58 -albergo 58 -causado 58 -triple-triples 58 -kalhammer 58 -branquinho 58 -whang 58 -cogbill 58 -rossier 58 -pitztal 58 -breadline 58 -dollar-pegged 58 -ruffing 58 -mchinji 58 -upperhands 58 -stanely 58 -banpro 58 -stereophonic 58 -spaz 58 -dumeisi 58 -soissons 58 -evaluative 58 -vashti 58 -transouth 58 -alinea 58 -just-retired 58 -##-norandrosterone 58 -ethanol-based 58 -blauensteiner 58 -mwaba 58 -chep 58 -intercutting 58 -equal-pay 58 -valencia-based 58 -chambeshi 58 -absorptive 58 -bramson 58 -hallo 58 -diatoms 58 -al-nassiri 58 -leviton 58 -tachi 58 -miscount 58 -anthuenis 58 -brockington 58 -giuffrida 58 -screenvision 58 -limandri 58 -kaliopate 58 -weather-stripping 58 -pauwels 58 -tangaroa 58 -melanogaster 58 -bewilderingly 58 -boasso 58 -deputise 58 -chinese-north 58 -uriri 58 -shibutani 58 -ranya 58 -aramin 58 -antonacci 58 -arab-kurdish 58 -sazanovich 58 -massachusetts-amherst 58 -etti 58 -tandil 58 -electrotechnical 58 -riqueza 58 -chudasama 58 -leksand 58 -two-pointer 58 -mariangela 58 -keye 58 -nikolaev 58 -sl# 58 -jamat-ud-dawa 58 -liener 58 -ashford.com 58 -parknshop 58 -quapaw 58 -two-pack-a-day 58 -aderhold 58 -buring 58 -unrolls 58 -two-iron 58 -r-miss. 58 -fazilah 58 -okam 58 -kabuli 58 -regia 58 -sekula 58 -re-deployed 58 -v.v. 58 -sporleder 58 -vigiano 58 -cahal 58 -sinsuat 58 -shortenings 58 -detargeting 58 -sportspeople 58 -ta-## 58 -immunosuppression 58 -kongying 58 -repack 58 -huster 58 -us-university 58 -eco-tourists 58 -slip-sliding 58 -wucker 58 -greece-finance-economy 58 -shvetsov 58 -sn-mpm 58 -john-patrick 58 -enought 58 -verbard 58 -sconce 58 -electro-mechanics 58 -budhia 58 -zuckerbrod 58 -kadriu 58 -biavaschi 58 -pentathletes 58 -nimani 58 -dezso 58 -cottesloe 58 -grosh 58 -croupiers 58 -vuillermin 58 -lipcsei 58 -reveiz 58 -kolesnik 58 -neeb 58 -mccullagh 58 -stand-still 58 -animistic 58 -llave 58 -oh-so 58 -savannahs 58 -obledo 58 -ston 58 -ziyuan 58 -safety-first 58 -multiple-listing 58 -nadzeya 58 -earth-observing 58 -lacquers 58 -fazl-ur 58 -bribers 58 -pass-catcher 58 -lasek 58 -cyprus-un-talks 58 -byzantines 58 -composer-in-residence 58 -princeling 58 -bataga 58 -as-yet-undetermined 58 -sumarno 58 -wuchuan 58 -gatty 58 -gobdon 58 -farsi-speaking 58 -onewest 58 -kleeman 58 -mbt 58 -andre-joseph 58 -haggett 58 -umd 58 -goggle-eyed 58 -then-popular 58 -garity 58 -ufundi 58 -changsheng 58 -hercegovacka 58 -gasparini 58 -ruyan 58 -tanerau 58 -assault-rifle 58 -vohs 58 -dimmesdale 58 -gun-for-hire 58 -lokomotive 58 -greiss 58 -afsar 58 -hersley 58 -kruma 58 -multiforme 58 -olkhovsky 58 -meridiana 58 -pannella 58 -reduced-rate 58 -leebaw 58 -webman 58 -kacha 58 -oil-like 58 -niello 58 -brierton 58 -ciliberto 58 -aercap 58 -larin 58 -caze 58 -congo-fighting 58 -ak-chin 58 -saengchai 58 -reichsmarks 58 -mutability 58 -redistributive 58 -muons 58 -proyas 58 -agajanian 58 -city-by-city 58 -nageikina 58 -dallam 58 -pb&j 58 -juacevedo 58 -pharmacologists 58 -non-moslems 58 -mukri 58 -javasoft 58 -pre-op 58 -krezelok 58 -third-and-short 58 -wahhabist 58 -farve 58 -bluejays 58 -sergius 58 -dorouma 58 -neukirchen 58 -undulated 58 -serlenga 58 -unsan 58 -al-nueimi 58 -blood-testing 58 -plait 58 -wilhide 58 -four-floor 58 -u.s.-flag 58 -mushier 58 -catto 58 -napster-like 58 -nightstands 58 -salvar 58 -vorontsova 58 -criticos 58 -grantmakers 58 -slader 58 -gholam-reza 58 -magnifique 58 -volen 58 -chishui 58 -sunalliance 58 -indoor\/outdoor 58 -heidgen 58 -lule 58 -belot 58 -cheng-yuan 58 -cyrene 58 -manganelli 58 -undershooting 58 -lorenzo-vera 58 -turadzhonzoda 58 -santilli 58 -kucher 58 -jo-krg 58 -mr\/dw 58 -dosen 58 -margi 58 -half-cooked 58 -heslin 58 -vidoje 58 -lepley 58 -commandante 58 -pattersons 58 -corsaire 58 -famosa 58 -yojimbo 58 -bromma 58 -transmutation 58 -cost-to-income 58 -feinted 58 -mop-topped 58 -moxibustion 58 -yongyi 58 -heterogeneity 58 -curti 58 -bolte 58 -tripplett 58 -paun 58 -bio-pharmaceutical 58 -cherubini 58 -hyun-woo 58 -stroem 58 -wanvig 58 -friuli-venezia 58 -schornack 58 -caucasia 58 -prosecuters 58 -sagmeister 58 -defore 58 -fakhoury 58 -ozen 58 -lightning-bolt 58 -funks 58 -pusillanimous 58 -ticky-tack 58 -lenfest 58 -overmedicated 58 -sung-tae 58 -fotherby 58 -al-gabali 58 -muhajiroun 58 -##.#-billion-pound 58 -cridlin 58 -low-dollar 58 -slappy 58 -under-recognized 58 -consumer-finance 58 -joma 58 -fabyan 58 -vredenburg 58 -rushville 58 -kingsmen 58 -feng-ying 58 -jung-hoon 58 -aldape 58 -wednesday-night 58 -janja 58 -monsod 58 -maimings 58 -kwaito 58 -al-baghli 58 -st.-denis 58 -agro-food 58 -afsa 58 -syncytial 58 -crf 58 -brogliatti 58 -melcior 58 -congestions 58 -themself 58 -meiners 58 -granick 58 -mamonyane 58 -krayer 58 -brm 58 -sex-selective 58 -daina 58 -jjh\/db 58 -carella 58 -re-took 58 -ilmor 58 -chedjou 58 -waites 58 -#-max 58 -merriex 58 -president-general 58 -water-main 58 -psa\/bloomberg 58 -camerounians 58 -savatheda 58 -counter-measure 58 -i.t. 58 -kaid 58 -porkpie 58 -cinchona 58 -makhosini 58 -audemars 58 -messa 58 -unlamented 58 -niedak-ashkenazi 58 -butyrka 58 -musicales 58 -uvm 58 -bayh-dole 58 -schleyer-halle 58 -unzipping 58 -#,###-game 58 -freemarkets 58 -requa 58 -opondo 58 -post-and-beam 58 -kmarts 58 -hejda 58 -peilin 58 -neisser 58 -re-using 58 -takeisha 58 -newstalkzb 58 -pengilly 58 -chinese-african 58 -flippancy 58 -coorsh 58 -apta 58 -willcocks 58 -emlen 58 -citiseconline 58 -rulemakers 58 -distintas 58 -qadderdan 58 -dpk 58 -vortexes 58 -mae-ggl 58 -hodur 58 -karavellas 58 -timidria 58 -haina 58 -nare 58 -overcast\/sleet 58 -eurest 58 -####-###### 58 -kakhi 58 -senafe 58 -zetti 58 -ciencia 58 -boya 58 -weadock 58 -pestriaev 58 -dhahi 58 -hui-mei 58 -el-arabi 58 -kotto 58 -sundstroem 58 -hamui 58 -provos 58 -senio 58 -bochner 58 -edyta 58 -minamoto 58 -scorings 58 -gesner 58 -adiyaman 58 -wassail 58 -fishhook 58 -krivda 58 -testifed 58 -momanyi 58 -cafarelli 58 -re-christened 58 -naaman 58 -chien-chih 58 -trailside 58 -wenhao 58 -deplaning 58 -legree 58 -anti-personal 58 -yemelyanov 58 -test-market 58 -ikramullah 58 -plateauing 58 -batad 58 -peu 58 -samling 58 -glaudini 58 -filippidis 58 -zhifu 58 -anti-divorce 58 -scrooges 58 -lashari 58 -ideologists 58 -sagapolutele 58 -niobium 58 -chows 58 -benfer 58 -hhh 58 -two-footer 58 -non-acceptance 58 -md\/ji 58 -auchi 58 -veanne 58 -brick-red 58 -varga-balazs 58 -peskiric 58 -restitutions 58 -interjection 58 -in-place 58 -cavagnaro 58 -lewinksy 58 -saudi-brokered 58 -rotax 58 -kawagoe 58 -wlosowicz 58 -puleo 58 -hoth 58 -disch-falk 58 -seiple 58 -nitrogenous 58 -urubamba 58 -particularily 58 -hagop 58 -bachirou 58 -cpc-led 58 -arbor-based 58 -valeant 58 -gunhild 58 -markswoman 58 -queyranne 58 -phoumsavanh 58 -kirschstein 58 -sharjah-based 58 -harriton 58 -baktiari 58 -confernce 58 -premade 58 -sleet\/overcast 58 -denbeaux 58 -tyibilika 58 -teya 58 -squalene 58 -guzy 58 -ampad 58 -nua 58 -oundjian 58 -yankees-mets 58 -clyfford 58 -bootes 58 -degeratu 58 -heshmatollah 58 -self-diagnosis 58 -gulyanamitta 58 -belgium-politics 58 -zukor 58 -jyujiya 58 -inadmissibility 58 -security-wise 58 -spiliotes 58 -zambelli 58 -axford 58 -closed-in 58 -laender 58 -anti-flooding 58 -icca 58 -so-what 58 -pharmacogenomics 58 -prefecture-level 58 -declines# 58 -briffa 58 -plexicushion 58 -royal-blue 58 -add-ins 58 -sfp 58 -wide-awake 58 -mahoud 58 -#motion 58 -riwhite 58 -frappes 58 -recoletos 58 -accountholders 58 -most-affected 58 -yalie 58 -sugarmann 58 -ahmadov 58 -ictu 58 -conservative-minded 58 -finigan 58 -blackard 58 -labant 58 -vierra 58 -zizka 58 -figuras 58 -esquimalt 58 -pottuvil 58 -khassawneh 58 -alben 58 -peru-hostages 58 -rebozos 58 -konchalovsky 58 -kesayeva 58 -koyo 58 -flukey 58 -over-bought 58 -dollar-supportive 58 -pushtuns 58 -distribuidora 58 -lalji 58 -thiermann 58 -advance-fee 58 -woodberry 58 -waleska 58 -oysterman 58 -ssali 58 -fievet 58 -pro-and 58 -metaxas 58 -cairngorms 58 -cohabited 58 -doyev 58 -johnsrud 58 -rayani 58 -salvatori 58 -bowdlerized 58 -epidaurus 58 -tilahun 58 -counter-guerrilla 58 -maulani 58 -chalai 58 -good-for-you 58 -soon-to-retire 58 -forkballs 58 -resettlers 58 -ultimatetv 58 -oblinger 58 -ritto 58 -fiberweb 58 -dungey 58 -pinholes 58 -#-diego 58 -kikhia 58 -tugluk 58 -progressivity 58 -asakawa 58 -kolawole 58 -regpay 58 -syktyvkar 58 -anti-copying 58 -luda 58 -tv-like 58 -weisbach 58 -multicanal 58 -export-fueled 58 -lucherini 58 -bossio 58 -numeiri 58 -swagel 58 -family-like 58 -lodar 58 -finalises 58 -larwood 58 -u.n.-proposed 58 -self-designated 58 -record-hard 58 -syncing 58 -winebrenner 58 -provosts 58 -naisbitt 58 -chetcuti 58 -wentland 58 -capetillo 58 -andreyeva 58 -skvortsova 58 -role-players 58 -artek 58 -choling 58 -##,###-sq-m 58 -resized 58 -hartack 58 -wilens 58 -sentimentalism 58 -serifovic 58 -cancio 58 -hausch 58 -earlene 58 -mengwa 58 -yokado 58 -pisarcik 58 -loredana 58 -naqi 58 -most-asked 58 -chicha 58 -skammelsrud 58 -supinit 58 -wanseele 58 -fragrance-free 58 -leffe 58 -http://www.firstunion.com 58 -nuder 58 -rouf 58 -zigomanis 58 -mp-# 58 -guma 58 -market-access 58 -square-kilometre 58 -urbanists 58 -tortes 58 -franking 58 -vorenberg 58 -schwieterman 58 -bawazir 58 -crankcase 58 -derogatis 58 -karakasevic 58 -entropia 58 -barlonyo 58 -hannibal-lagrange 58 -artnews 58 -adventurists 58 -money-changer 58 -housecoat 58 -nordictrack 58 -wilfert 58 -mccain-obama 58 -musselwhite 58 -bangzhu 58 -meyerbeer 58 -tegucigalpa-san 58 -al-saqqa 58 -quercus 58 -louay 58 -yenga 58 -rajauri 58 -cigarroa 58 -kimiyasu 58 -nicotine-free 58 -zelinsky 58 -diagon 58 -permach 58 -symmetrix 58 -firnas 58 -flower-decked 58 -istar 58 -quintupling 58 -breyers 58 -armyworm 58 -abdoulie 58 -care-related 58 -aldar 58 -beauteous 58 -yardages 58 -non-greek 58 -cutthroats 58 -flessel-colovic 58 -mago 58 -margets 58 -barela 58 -flight-to-quality 58 -most-coveted 58 -zubar 58 -ruzowitzky 58 -petru-alexandru 58 -al-hajji 58 -scots-irish 58 -chabang 58 -bouafle 58 -xxxxxxxend 58 -mintoo 58 -lolab 58 -largely-christian 58 -flim-flam 58 -wen-ying 58 -greensomes 58 -practioners 58 -ya\/ml 58 -chanters 58 -karapetian 58 -in-seat 58 -shirko 58 -bistrot 58 -esmaeel 58 -insanitary 58 -cheka 58 -issue-driven 58 -gok 58 -auclair 58 -veira 58 -craftsman-style 58 -honorifics 58 -martek 58 -ebri 58 -ferdie 58 -seshaiah 58 -sunkin 58 -caldrons 58 -cabals 58 -foredeck 58 -tight-head 58 -sung-nam 58 -demerara 58 -jucker 58 -thumb-size 58 -sciame 58 -eriksen\/mette 58 -ricketson 58 -domesticating 58 -khagendra 58 -abla 58 -rescorla 58 -kanta 58 -stylez 58 -ruwenzori 58 -kilfoyle 58 -steinmann 58 -tabakh 58 -inquisitorial 58 -heldman 58 -vus#### 58 -whole-language 58 -kimberlee 58 -harnecker 58 -dary 58 -azimkar 58 -reneau 58 -in-the-round 58 -vucetic 58 -metrowerks 58 -togiola 58 -motor-racing 58 -themistocleous 58 -rugut 58 -bectu 58 -www.orbitz.com 58 -pagos 58 -akeem 58 -anouncement 58 -flat-line 58 -sindical 58 -bc-mexico 58 -three-building 58 -chambal 58 -bhoj 58 -asra 58 -p.p. 58 -overexploitation 58 -time-keeping 58 -bearse 58 -robp 58 -marakesh 58 -avena 58 -highwayman 58 -barnaba 58 -co-coaches 58 -rfranklin 58 -self-abuse 58 -farra 58 -below-strength 58 -bolek 58 -devkota 58 -farney 58 -junck 58 -lefranc 58 -lindale 58 -likhachev 58 -flywheels 58 -cnca 58 -samin 58 -samie 58 -glasberg 58 -blue-suited 58 -kazarlyga 58 -vacanti 58 -purwoprandjono 58 -##-billion-baht 58 -bahaeddin 58 -pw#### 58 -world-shaking 58 -balbina 58 -darwyn 58 -aleskerov 58 -primp 58 -chue 58 -o'cealleagh 58 -scotrail 58 -figeroux 58 -thanawat 58 -iihs 58 -hendardji 58 -boldyrev 58 -mihoko 58 -sibani 58 -diapering 58 -animal-feed 58 -lintel 58 -infobahn 58 -kuribayashi 58 -bretons 58 -bevmark 58 -risk-sensitive 58 -flanner 58 -groenvold 58 -kaspersky 58 -salt-free 58 -cankaya 58 -zna 58 -rabson 58 -outvote 58 -hanada 58 -tourist-filled 58 -adobes 58 -athamna 58 -nato-brokered 58 -priding 58 -ngetich 58 -benac 58 -eelco 58 -teza 58 -keahon 58 -emeryville-based 58 -isentress 58 -munford 58 -critchfield 58 -chinandega 58 -carax 58 -posers 58 -tehreek-i-jafria 58 -tbarnhart@ajc.com 58 -picolinate 58 -cobwebbed 58 -conventionality 58 -byelections 58 -macallister 58 -papoose 58 -##-microgram 58 -sled-dog 58 -myostatin 58 -albaladejo 58 -ndirangu 58 -honderich 58 -sceptically 58 -garro 58 -vereeniging 58 -h-shaped 58 -lillington 58 -diddams 58 -coffy 58 -doorjamb 58 -militia-style 58 -magluta 58 -mazzariol 58 -stanford-trained 58 -agriculturalists 58 -spit-and-polish 58 -servility 58 -padmore 58 -satmars 58 -seren 58 -subsitute 58 -serwotka 58 -kuokuang 58 -tavlaridis 58 -congressman-elect 58 -two-euro 58 -series-levelling 58 -cricket-ashes-aus-eng 58 -digital-only 58 -provencher 58 -muslim-jewish 58 -resizing 58 -hula-hoop 58 -fuegos 58 -fair-housing 58 -waitakere 58 -bouldin 58 -chilanga 58 -suceeded 58 -sundeen 58 -six-phase 58 -disassociation 58 -shteyngart 58 -crystalize 58 -shihan 58 -protocal 58 -long-march 58 -fazal-ur 58 -jackknife 58 -mcnairy 58 -ealey 58 -drivon 58 -pleasure-seeking 58 -sciolino 58 -shore-up 58 -then-banned 58 -german-swiss 58 -polymorphism 58 -three-foot-high 58 -anti-speculation 58 -stoudmann 58 -koreas-talks 58 -ivanek 58 -kanyenda 58 -holyfield-lennox 58 -naceri 58 -anti-globalist 58 -jeg 58 -folden 58 -talpur 58 -r.f. 58 -##,###-barrel-a-day 58 -binshu 58 -doerfler 58 -juanmi 58 -organdy 58 -kostal 58 -chola 58 -presgrave 58 -cosgrave 58 -zhevnov 58 -recapitalizations 58 -us-philippines 58 -half-a-percentage 58 -mayenne 58 -pozole 58 -jva 58 -higher-up 58 -erra 58 -btrc 58 -taxachusetts 58 -lyor 58 -advocate-general 58 -toko 58 -blowzy 58 -kantaras 58 -consiste 58 -indama 58 -xiaoxiang 58 -jamelli 58 -esquipulas 58 -cd-based 58 -maxillofacial 58 -bernabei 58 -meshell 58 -parlin 58 -now-imprisoned 58 -ummc 58 -posterboard 58 -degradable 58 -annasue 58 -anjouanese 58 -gallivan 58 -usc-notre 58 -sappho 58 -creameries 58 -turbocharge 58 -soslan 58 -thurairaja 58 -singapore-china 58 -antiperspirant 58 -abominably 58 -puiu 58 -newfoundlanders 58 -kaylene 58 -plaine 58 -howorth 58 -non-save 58 -dijana 58 -ronaldson 58 -aivar 58 -hudong 58 -##-student 58 -pravastatin 58 -delima 58 -six-continent 58 -union-imposed 58 -kaz\/cof 58 -sandy-colored 58 -longliners 58 -janic 58 -amerithrax 58 -blurriness 58 -aleki 58 -rokkasho-mura 58 -sogang 58 -methow 58 -rosalee 58 -rapiscan 58 -nepali-language 58 -eastgate 58 -timis 58 -record-indoor 58 -adelaida 58 -ethno-sectarian 58 -pay-for-view 58 -dahdouh 58 -desisto 58 -huldai 58 -brunswijk 58 -hundertwasser 58 -chiew 58 -pyrotechnical 58 -snatchings 58 -jabri 58 -kleeblatt 58 -turkish-registered 58 -ghodhbane 58 -masunungure 58 -mousepad 58 -ascott 58 -lapoint 58 -quarter-page 58 -belfiore 58 -se-r 58 -martinville 58 -salchow-triple 58 -minored 58 -ahe 58 -counter-complaint 58 -mbula 58 -khadi 58 -wolfli 58 -faggots 58 -vigouroux 58 -chaifetz 58 -giugliano 58 -defoliated 58 -vashee 58 -maumee 58 -jutge 58 -loudi 58 -powderly 58 -razini 58 -self-seeking 58 -best-written 58 -arkaah 58 -patroon 58 -motormen 58 -pedagogic 58 -soendral 58 -pharynx 58 -qudratullah 58 -proces 58 -alcohol-monitoring 58 -corange 58 -a.t.m. 58 -rasshan 58 -sabbar 58 -malmberg 58 -sverrisson 58 -tiler 58 -sabag 58 -http://www.state.gov/ 58 -feichter 58 -#-george 58 -wheelis 58 -cleric-run 58 -roskot 58 -adjetey-nelson 58 -propios 58 -quickies 58 -nzimbi 58 -gazprombank 58 -woofer 58 -trita 58 -weigman 58 -congealing 58 -solidarite 58 -narcoleptic 58 -pulai 58 -bewag 58 -computadora 58 -sppf 58 -soundcheck 58 -zaituc 58 -adurogboye 58 -orogen 58 -demeanors 58 -neumayr 58 -mso 58 -ntou 58 -westtown 58 -re-directed 58 -avalanche-journal 58 -ishmail 58 -pearsmhnytimes.com 58 -posthaste 58 -needle-nose 58 -self-assessments 58 -lewisite 58 -kuwaiti-based 58 -kostevych 58 -dogonadze 58 -korasuv 58 -hampl 58 -concealed-weapon 58 -shragai 58 -kappos 58 -anti-casino 58 -sleith@ajc.com 58 -karyo 58 -kopi 58 -sub-themes 58 -knock-kneed 58 -stendardo 58 -anthuan 58 -rekapac 58 -nasaa 58 -chinese-manufactured 58 -isioma 58 -greenwillow 58 -strad 58 -piontkowski 58 -nine-stroke 58 -avm 58 -parishoners 58 -delfim 58 -pasquini 58 -wigmore 58 -dieuze 58 -scag 58 -berzengi 58 -cazzulani 58 -balmont 58 -marriotts 58 -cete 58 -phertzberg 58 -wapenaar 58 -hodara 58 -curcumin 58 -canoni 58 -abdulmajid 58 -maroon# 58 -anti-vaccine 58 -mini-city 58 -bioland 58 -lanced 58 -hepatology 58 -balakong 58 -eneco 58 -subuh 58 -wolfenstein 58 -henchy 58 -tumukunde 58 -argentina-vote 58 -soft-dollar 58 -merit-making 58 -qanoni 58 -czech-built 58 -al-khayat 58 -heidsieck 58 -deinstitutionalization 58 -mail-fraud 58 -calakmul 58 -chiasso 58 -faveur 58 -ranarith 58 -satara 58 -cash-management 58 -ayittey 58 -buschbaum 58 -compatibles 58 -olatunji 58 -hourglass-shaped 58 -abousamra 58 -serafino 58 -rassemblement 58 -cohen-tannoudji 58 -artspace 58 -schwartlander 58 -yeltsova 58 -kiu 58 -zoubeir 58 -asias 58 -orv 58 -yaring 58 -sheppards 58 -two-course 58 -cadrez 58 -charima 58 -canana 58 -geodon 58 -deplasco 58 -fully-armed 58 -brontes 58 -kbp 58 -stehn 58 -#-iker 58 -baff 58 -perlas 58 -madhes 58 -preis 58 -harward 58 -koloane 58 -deshayes 58 -falomo 58 -saturno 58 -corrupters 58 -planetariums 58 -bifengxia 58 -flum 58 -propound 58 -hemanshu 58 -imacec 58 -feyernoord 58 -ulta 58 -atli 58 -re-hire 58 -interclan 58 -gitonga 58 -aand 58 -icmr 58 -jarar 58 -skank 58 -thrane 58 -trousseau 58 -spent-fuel 58 -chilaquiles 58 -bump-drafting 58 -iao 58 -vanhala 58 -dj\/ak## 58 -proselytization 58 -dongdu 58 -rave-up 58 -uzebekistan 58 -ntagerura 58 -pengkalan 58 -anti-fascism 58 -domingue 58 -lamport 58 -kaillie 58 -fernet 58 -bekaert 58 -project-based 58 -mauriac 58 -abrogates 58 -basketball-wise 58 -gansey 58 -emailing 58 -jiulong 58 -mechale 58 -vignerons 58 -federative 58 -emmo 58 -panino 58 -easterain 58 -aceto 58 -manresa 58 -own-goals 58 -lamongan 58 -morinaga 58 -fourth-season 58 -duvergel 58 -ruzindana 58 -yu-ih 58 -varnishing 58 -non-televised 58 -ederer 58 -grosboell 58 -sakanyi 58 -wrn 58 -costless 58 -laxalt 58 -lb-# 58 -halse 58 -upper-tier 58 -sheinkin 58 -shadwell 58 -katselas 58 -www.aol.com 58 -yankey 58 -mytouch 58 -enyimba\/ngr 58 -brokedown 58 -jen-hung 58 -kisutu 58 -kuniyoshi 58 -dahiyah 58 -amrhein 58 -up-country 58 -rolo 58 -smartmedia 58 -watch-helsinki 58 -khawazakhela 58 -ocws 58 -yacoubian 58 -wandlike 58 -tratan 58 -temane 58 -tainan-based 58 -micropal 58 -korrodi 58 -ms-as 58 -ahdyar 58 -southerton 58 -kakiuchi 58 -state-of-origin 58 -breault 58 -filled-in 58 -kawase 58 -bysshe 58 -pericard 58 -sakwiba 58 -qingyang 58 -pedantry 58 -kamangar 58 -unlawfulness 58 -gdc 58 -astuteness 58 -vca 58 -highly-qualified 58 -pressurised 58 -fliegende 58 -ambush-style 58 -sweatman 58 -bembry 58 -rabelais 58 -hellion 58 -jaune 58 -pade 58 -cylon 58 -nnt 58 -cod-style 58 -manoeuvering 58 -lubero 58 -knocker 58 -npower 58 -oeystein 58 -stellone 58 -badou 58 -berlinecke 58 -shengrong 58 -non-sporting 58 -anti-moslem 58 -http://www.defenselink.mil/ 58 -macgillivary 58 -pigheaded 58 -grogin 58 -pluta 58 -madasamy 58 -lozzano 58 -bong-kil 58 -christmas-tree 58 -metroid 58 -kudisch 58 -runups 58 -kinsolving 58 -zety 58 -guitarist-singer 58 -vinayagamoorthi 58 -movilnet 58 -ppa-containing 58 -seyval 58 -feliks 58 -shiite-populated 58 -harymurti 58 -tingo 58 -grassless 58 -cheap-looking 58 -zhengming 58 -jastrow 58 -nrcs 58 -shirkers 58 -bc-af-fin 58 -batac 58 -finocchiaro 58 -polyrhythms 58 -weale 58 -fang-yu 58 -ispa 58 -condensers 58 -synergie 58 -rottman 58 -test-bed 58 -afroyim 58 -anti-arms 58 -amathila 58 -billion\/## 58 -todman 58 -rabies-free 58 -gilbreath 58 -ultra-conservatives 58 -out-of-context 58 -flook 58 -pongpanich 58 -skulked 58 -a###xwb 58 -semiskilled 58 -synoptics 58 -home-ported 58 -ih 58 -bornand 58 -keshubhai 58 -adianto 58 -asen 58 -d-hillsborough 58 -commercial-grade 58 -diery 58 -tzoganis 58 -al-nounou 58 -tedd 58 -map-making 58 -blockbusting 58 -be-in 58 -trouble-torn 58 -skipp 58 -whitsunday 58 -hutterites 58 -ttc 58 -occam 58 -witters 58 -winterreise 58 -al-hasani 58 -cities-abc 58 -kurtag 58 -kahoolawe 58 -radiofrequency 58 -elfers 58 -razor-edged 58 -profanity-filled 58 -nedzad 58 -elsworth 58 -sashays 58 -redbone 58 -arkhipova 58 -geck 58 -fouras 58 -erdf 58 -sivori 58 -point-based 58 -apennines 58 -raffanello 58 -foot-fault 58 -emirsyah 58 -safian 58 -poppy-producing 58 -bagong 58 -roehrig 58 -stutes 58 -michelet 58 -anter 58 -campionati 58 -semmelweis 58 -dos-based 58 -sadosky 58 -st-# 58 -franker 58 -brende 58 -mihajlov 58 -eshetu 58 -barile 58 -roey 58 -bijaya 58 -matcha 58 -hounslow 58 -sischy 58 -anxi 58 -topdog\/underdog 58 -predisposing 58 -tightenings 58 -colantuono 58 -duflo 58 -tervuren 58 -slebos 58 -krein 58 -malu-malu 58 -nkorea-nuclear-weapons-us 58 -d'hondt 58 -marce 58 -movieline 58 -bles 58 -hyeon 58 -casner 58 -dry-aged 58 -clomping 58 -jd\/pi## 58 -giganotosaurus 58 -five-six 58 -iita 58 -then-teammate 58 -badola 58 -ahrendts 58 -christoforakos 58 -al-daradji 58 -hathorn 58 -computer-operated 58 -soviet-american 58 -mousetraps 58 -turetsky 58 -farc-held 58 -medzamor 58 -herpoel 58 -scissor-kick 58 -wodie 58 -quirine 58 -shrivelled 58 -flameproof 58 -less-talented 58 -sacharow 58 -thin-bladed 58 -cavour 58 -allaga 58 -dornbush 58 -#-pounder 58 -schuermann 58 -mafia-related 58 -bellingen 58 -crichlow 58 -dividend-rich 58 -lorsch 58 -anglada 58 -non-actors 58 -anti-surface 58 -deepcut 58 -lazarev 58 -thumb-sucking 58 -bi-polar 58 -madrid-barajas 58 -thile 58 -barn-burner 58 -calibrations 58 -stimulus-fueled 58 -unplanted 58 -keyboardists 58 -ecuador-vote 58 -armuelles 58 -chittick 58 -taavi 58 -mondesire 58 -smederevska 58 -ding-dong 58 -promphan 58 -lovsan 58 -loveseat 58 -fullscale 58 -nazzaro 58 -mulvenon 58 -hillegass 58 -vanderford 58 -goodlad 58 -carphedon 58 -court-at-law 58 -pseudo.com 58 -sollers 58 -putterman 58 -finnegans 58 -durakovic 58 -hamayon 58 -duct-tape 58 -bardic 58 -scaled-up 58 -still-robust 58 -wilchcombe 58 -wathiq 58 -hiltachk 58 -krylatskoye 58 -boose 58 -data-intensive 58 -danielides 58 -transsexuality 58 -claw-foot 58 -none-out 58 -elucidating 58 -tomasch 58 -brignol 58 -jeyarajah 58 -dangor 58 -caic 58 -goaland 58 -mellis 58 -somali-based 58 -sidetracking 58 -sushmita 58 -mid-stride 58 -domestically-traded 58 -redox 58 -shoba 58 -house-arrest 58 -coppens 58 -at-tuffah 58 -nijssen 58 -hely 58 -rouged 58 -lounger 58 -a.r.c. 58 -dubovsky 58 -non-payments 58 -enviga 58 -frenz 58 -#-boris 58 -misprision 58 -go-along 58 -riccadonna 58 -machine-gunner 58 -vellore 58 -tackie 58 -constitutionalists 58 -tongliao 58 -paderina 58 -nan-cheng 58 -cattan 58 -us-immigration 58 -sparq 58 -mashingaidze 58 -togawa 58 -houseflies 58 -semenzato 58 -moton 58 -atlanta-bound 58 -summerer 58 -marmottan 58 -quartier 58 -el-motassadeq 58 -more-profitable 58 -boada 57 -malim 57 -voit 57 -tichtchenko 57 -qgpc 57 -sakassou 57 -hamadou 57 -geeked 57 -anagrams 57 -politicizes 57 -chainarong 57 -fourth-youngest 57 -schulweis 57 -unsafeguarded 57 -urmila 57 -mobile-telephone 57 -grigson 57 -abdul-samad 57 -el-youssef 57 -suffuse 57 -castellane 57 -line-outs 57 -manohara 57 -christiansborg 57 -standley 57 -gasoline\/electric 57 -munton 57 -aaib 57 -postcommunist 57 -buckcherry 57 -teletext 57 -juancho 57 -steep-sided 57 -micro-enterprise 57 -medicals 57 -cloudlike 57 -griet 57 -jintropin 57 -funsho 57 -schaus 57 -chenoweth-hage 57 -club-mate 57 -israel-vote 57 -gokavi 57 -toufic 57 -scalloping 57 -lfc 57 -chatwal 57 -movie-mad 57 -american-european 57 -butchie 57 -televisual 57 -chainrai 57 -mambasa 57 -quaye 57 -dilg 57 -http://www.nobel.no 57 -jabotinsky 57 -nilly 57 -lakela 57 -frostily 57 -australia-bushfires 57 -maumalanga 57 -coleccion 57 -##-carry 57 -weisser 57 -morago 57 -kpatinde 57 -korun 57 -kc-pq 57 -nextlink 57 -tbilissi 57 -azua 57 -#m## 57 -obersalzberg 57 -sea-skimming 57 -nizuc 57 -sacombank 57 -nitrate-based 57 -f.h. 57 -farj 57 -lagunov 57 -millipede 57 -#-cd 57 -musclebound 57 -bloche 57 -dissuades 57 -tae-dong 57 -arms-related 57 -suk-tae 57 -conceptualizing 57 -pokaski 57 -often-contentious 57 -seleccion 57 -dialogo 57 -grevenmacher 57 -alledged 57 -maziarz 57 -ecompanies 57 -slow-building 57 -greczyn 57 -packham 57 -lamego 57 -saddlebag 57 -strokosch 57 -eutaw 57 -shophouses 57 -xxiv 57 -nickel-cadmium 57 -shuning 57 -http://www.homedepot.com 57 -sunfin 57 -atg 57 -windmilling 57 -zostavax 57 -then-majority 57 -revenant 57 -daish 57 -cvijanovic 57 -butoh 57 -marianist 57 -visoth 57 -eidul 57 -isaura 57 -aleynikov 57 -shalita 57 -benegas 57 -mine-strewn 57 -litif 57 -qallab 57 -percolation 57 -magnesite 57 -sung-jin 57 -cook-offs 57 -ronayne 57 -carias 57 -calendula 57 -elmaghraby 57 -fafner 57 -scibelli 57 -huhhot 57 -sidex 57 -bank-issued 57 -perfecta 57 -vitrines 57 -ho-chunk 57 -alstyne 57 -sydkraft 57 -football-wise 57 -silverite 57 -pagliaro 57 -overdubbed 57 -mighani 57 -finger-snapping 57 -liron 57 -multi-trillion 57 -ortuno 57 -drop-kick 57 -mussavi 57 -brasilia-based 57 -meteoroid 57 -oxygen-deprived 57 -rentech 57 -puzzle-solving 57 -pro-ravalomanana 57 -thmey 57 -zalben 57 -liko 57 -qardaha 57 -muayad 57 -lottner 57 -conceptualist 57 -laychak 57 -badini 57 -jazzmen 57 -macaco 57 -nexstar 57 -especies 57 -warbirds 57 -avelar 57 -ulcerated 57 -murshidabad 57 -marife 57 -grillers 57 -marakwet 57 -sanroma 57 -college-preparatory 57 -bartholomay 57 -dpp-initiated 57 -duskin 57 -anesthetizing 57 -belgrade-controlled 57 -estragon 57 -novolipetsk 57 -caton-jones 57 -offman 57 -jetways 57 -am\/ji 57 -hatemonger 57 -zeo 57 -dial-around 57 -hunzike 57 -wicha 57 -cupiagua 57 -mewelde 57 -webmethods 57 -dacca 57 -agrast 57 -indonesia-weather-floods 57 -sumaya 57 -magliore 57 -seljan 57 -dead-eyed 57 -rezidor 57 -rubberneck 57 -substantia 57 -azzurro 57 -first-responder 57 -peruses 57 -janusaitis 57 -summiting 57 -mantas 57 -posthumus 57 -sufjan 57 -often-delayed 57 -resiana 57 -rrustem 57 -pulled-together 57 -milmo 57 -compa 57 -prezant 57 -lucke 57 -prosthetist 57 -se# 57 -luisita 57 -molinelli 57 -earthrights 57 -osayemi 57 -market-beating 57 -sincor 57 -rexburg 57 -miku 57 -anti-disease 57 -already-crowded 57 -santry 57 -friesian 57 -tg-pyg 57 -nce 57 -cottoned 57 -vaugrenard 57 -candleholders 57 -mudi 57 -hawksworth 57 -jinglian 57 -schuon 57 -dehesa 57 -puyo 57 -biosensor 57 -periodnone 57 -non-deductible 57 -eid-ul 57 -palestrina 57 -mitic 57 -segars 57 -entrapping 57 -###w 57 -vls\/nvw 57 -peachpit 57 -small-plane 57 -norsworthy 57 -lello 57 -wair 57 -latinpass 57 -souquet 57 -reveles 57 -soapboxes 57 -tholut 57 -pompton 57 -fmd-free 57 -something-or-other 57 -nayoko 57 -forum-asia 57 -interferometer 57 -hortensia 57 -tifosi 57 -u.s.-provided 57 -brasfield 57 -http://www.people-press.org 57 -mojokerto 57 -stankalla 57 -gallien 57 -valda 57 -speciosa 57 -lightly-regarded 57 -bublitz 57 -chongryong 57 -bit-part 57 -sharabati 57 -drottningholm 57 -eum 57 -pongrat 57 -tesa 57 -multicast 57 -+##,### 57 -mushed 57 -ravello 57 -eye-fi 57 -calandra 57 -shujah 57 -rural\/metro 57 -djeric 57 -##,###-points 57 -houtart 57 -idigov 57 -russian-supported 57 -caa# 57 -tionne 57 -gabai 57 -dosek 57 -sadyk 57 -calzati 57 -hip-hugger 57 -iue-cwa 57 -ksf 57 -hpa-an 57 -finamex 57 -berden 57 -ameco 57 -non-jordanians 57 -toxford 57 -mulembwe 57 -rovos 57 -extrication 57 -cutchogue 57 -bang-andersen 57 -lucchetti 57 -uninstalling 57 -excises 57 -azaouagh 57 -decompressing 57 -northwesterners 57 -wielkopolski 57 -tahlequah 57 -optimark 57 -gheen 57 -eurlings 57 -unclimbed 57 -esti 57 -niyonzima 57 -pro-rata 57 -giannoulas 57 -kodjoe 57 -ulanqab 57 -tiliwaldi 57 -baldock 57 -#-meters 57 -ghorak 57 -bovey 57 -sameur 57 -chien-kuo 57 -colten 57 -vincenti 57 -square-kilometers 57 -garamvoelgyi 57 -zippel 57 -then-commander 57 -##.#-nautical 57 -unpowered 57 -hanly 57 -parten 57 -liederman 57 -velayat 57 -willo 57 -coonelly 57 -kitov 57 -swiss-educated 57 -outraise 57 -sinta 57 -#-felix 57 -virts 57 -yome 57 -saury 57 -gretz 57 -isoun 57 -mpigi 57 -nstp 57 -modzeleski 57 -samran 57 -wazed 57 -reappraising 57 -regola 57 -exor 57 -france-telecom 57 -iresearch 57 -soz 57 -n.b.a. 57 -jenai 57 -liukko 57 -wenpu 57 -cervenko 57 -halfa 57 -yefremova 57 -biaggio 57 -mimmo 57 -iraq-unrest-us-toll 57 -quart-size 57 -favalora 57 -hockey-mad 57 -doppelgangers 57 -ghalibaf 57 -marijnissen 57 -open-face 57 -sytem 57 -fredricksen 57 -shafayat 57 -safeen 57 -programe 57 -kuroichi 57 -burqa-style 57 -xuesen 57 -dissembled 57 -uptagrafft 57 -canadiense 57 -sung-wook 57 -paravant 57 -kada 57 -short-stay 57 -pinedo 57 -balwinder 57 -tibon 57 -prig 57 -ducent 57 -kc-###s 57 -ersberg 57 -family-type 57 -lardin 57 -dicussed 57 -kaim 57 -webnews 57 -kavak 57 -debut-making 57 -hewitson 57 -moeletsi 57 -tapit 57 -updegrove 57 -soft-sided 57 -hudepohl 57 -reflectivity 57 -tigerland 57 -#.##-per-share 57 -halandri 57 -plote 57 -temporaries 57 -gren 57 -merlet 57 -odera 57 -lingao 57 -sleazier 57 -dialectics 57 -doubletalk 57 -al-siddiq 57 -tifatul 57 -sanabis 57 -sc### 57 -vanore 57 -alair 57 -mae-eap 57 -spookiness 57 -hyson 57 -nounou 57 -nasreddine 57 -rose-marie 57 -completamente 57 -non-starters 57 -beguin 57 -bell-bottomed 57 -handloom 57 -abu-zeid 57 -tolstaya 57 -transwestern 57 -maraven 57 -neoforma 57 -galster 57 -viraat 57 -engqvist 57 -sadiya 57 -idiot-proof 57 -france-politics-jobs-youth 57 -larini 57 -yuzawa 57 -utilitarianism 57 -antosh 57 -belize-flagged 57 -kasetsiri 57 -kekauoha 57 -kaleida 57 -deviously 57 -boucheron 57 -hanen 57 -crose 57 -pawson 57 -sudikoff 57 -allayar 57 -industry-# 57 -perrott 57 -unframed 57 -kirpan 57 -dimasa 57 -popma 57 -mutianyu 57 -aygun 57 -iannelli 57 -d-dayton 57 -brand-named 57 -persian-speaking 57 -garforth 57 -sucess 57 -disposer 57 -rosangela 57 -pscs 57 -hanway 57 -cambon 57 -presas 57 -j&b 57 -checkmated 57 -bonnardeaux 57 -guzzetti 57 -digel 57 -mweemba 57 -party-sponsored 57 -uncommanded 57 -high-touch 57 -draughon 57 -shanzai 57 -khakimov 57 -japonicus 57 -unigate 57 -##-anastasia 57 -nabatiye 57 -penalties_none 57 -five-song 57 -lightning-strike 57 -thumbnail-sized 57 -gutteres 57 -swardson 57 -kls 57 -still-simmering 57 -burges 57 -am\/sbg 57 -golf-epga-esp 57 -short-termism 57 -lifefx 57 -gtm 57 -huntley-brinkley 57 -saidat 57 -sequencer 57 -seperatist 57 -ki-chi 57 -spoksman 57 -human-driven 57 -laue 57 -vancleave 57 -sjoblom 57 -barbacoa 57 -statman 57 -shaneen 57 -antique-filled 57 -mhh-krg 57 -remond 57 -bijli 57 -carby 57 -greece-style 57 -misapplying 57 -dorsen 57 -buk 57 -schwarzenbauer 57 -raiz 57 -banin 57 -daryn 57 -artemisinin-based 57 -kbohls@statesman.com 57 -persahabatan 57 -wormy 57 -samsung\/radioshack 57 -baxter-johnson 57 -re-tried 57 -aetats 57 -head-turner 57 -degganssptimes.com 57 -thirty-thousand 57 -imperiousness 57 -kesha 57 -tenace 57 -ferguson-mckenzie 57 -jaovisidha 57 -agassa 57 -barriga 57 -air-strike 57 -home-opener 57 -turbi 57 -umali 57 -kralik 57 -pump-and-dump 57 -bassat 57 -keasler 57 -tank-killing 57 -misdirecting 57 -ex-fighter 57 -mngomeni 57 -rejigged 57 -novska 57 -backstabbers 57 -spa\/qst 57 -re-tally 57 -intermune 57 -lifa 57 -urbanczyk 57 -sarabeth 57 -coke-bottle 57 -battle-readiness 57 -kawkab 57 -kerdyk 57 -wenceslaus 57 -mind-expanding 57 -boutroue 57 -tanona 57 -salivation 57 -tricolore 57 -out-gunned 57 -jacobowitz 57 -bacot 57 -participar 57 -alprazolam 57 -polho 57 -funnyordie.com 57 -panarin 57 -blanchfield 57 -yolane 57 -qualia 57 -robertses 57 -ovp 57 -peissel 57 -dratshev 57 -ieremia-stansbury 57 -korchnoi 57 -pvs-lk 57 -jan.-sep 57 -schlickeisen 57 -courier-post 57 -high-cholesterol 57 -etich 57 -guolin 57 -giesen 57 -industy 57 -cucherat 57 -villate 57 -evelin 57 -kelly-goss 57 -u.s.-korea 57 -kivutha 57 -overdramatized 57 -nemzet 57 -cassata 57 -darle 57 -cur 57 -actor-politician 57 -dingbat 57 -gerspach 57 -better-established 57 -restos 57 -tristesse 57 -ausmin 57 -computer-like 57 -thongsing 57 -ramberg 57 -hjort 57 -over-estimated 57 -bioremediation 57 -stress-reduction 57 -televisi 57 -chelny 57 -sundazed 57 -radio-cassette 57 -opening-week 57 -reghecampf 57 -highest-performing 57 -zamanbek 57 -prefigures 57 -red-and-white-striped 57 -metters 57 -travessa 57 -pengiran 57 -copernic 57 -government-granted 57 -qoryoley 57 -kur 57 -martincova 57 -aldis 57 -remissainthe 57 -favor-hamilton 57 -by-# 57 -oooooooooooooooooooo 57 -fieldsman 57 -aa-plus 57 -farmsteads 57 -marrack 57 -fambrough 57 -ongarato 57 -spayd 57 -corsetry 57 -inuvik 57 -salwen 57 -power-grabbing 57 -wornick 57 -kalkstein 57 -papermakers 57 -thwaite 57 -mineira 57 -newbigging 57 -uberstine 57 -roxx 57 -ehrlichiosis 57 -al-noor 57 -chernogorneft 57 -mercally 57 -villamayor 57 -texpool 57 -eye-watering 57 -white-brick 57 -koco 57 -transportations 57 -i-zone 57 -kouadio 57 -websense 57 -clinton-like 57 -kievsky 57 -pietton 57 -athirson 57 -oxygen-generating 57 -youre 57 -supertramp 57 -ergic 57 -kasambala 57 -attention-grabber 57 -j-shaped 57 -qedwa 57 -http://www.ipcc.ch 57 -bossman 57 -last-rock 57 -pentair 57 -bingle 57 -ap# 57 -signally 57 -cundieff 57 -maflahi 57 -ciger 57 -gerlinde 57 -ndambuki 57 -ottakar 57 -braunskill 57 -recopa 57 -journaling 57 -rmit 57 -ernk 57 -non-congress 57 -dayle 57 -q-cells 57 -hetian 57 -boons 57 -non-deliverable 57 -woolston 57 -cristoph 57 -r\/# 57 -manglano 57 -erdinc 57 -gajon 57 -knickknack 57 -rohmat 57 -wen-yuan 57 -harleman 57 -guesso 57 -stamenson 57 -australia-united 57 -nitromethane 57 -eastn 57 -anonyme 57 -stadelmann 57 -siefer 57 -pripps 57 -cross-checks 57 -salzburger 57 -oceaneering 57 -ball-point 57 -mandeep 57 -fratesi 57 -beyrer 57 -sad-faced 57 -krasnomovets 57 -rouhani 57 -eruh 57 -long-battered 57 -toles 57 -parrotfish 57 -tuffree 57 -brasseur 57 -shenkarow 57 -half-a-mile 57 -gyroball 57 -dombi 57 -anzar 57 -tajan 57 -easiness 57 -burgett 57 -hoskyns 57 -kembla 57 -orena 57 -hatam 57 -pinko 57 -detroit-hamtramck 57 -suborned 57 -kavetas 57 -nielsen\/net 57 -srg 57 -u.s.-patrolled 57 -schuelke 57 -hayim 57 -drobiazko\/povilas 57 -bhumidhar 57 -akan 57 -bungler 57 -heterosexually 57 -shija 57 -dazhen 57 -teikyo 57 -techno-thriller 57 -bezrukov 57 -hobgoblin 57 -gattis 57 -famous-brand 57 -seamico 57 -boobytrapped 57 -downrange 57 -fuel-starved 57 -mcaulay 57 -robustelli 57 -brinegar 57 -rohrbaugh 57 -ismar 57 -bonfils 57 -ponomareva 57 -goorjian 57 -kashmola 57 -on-rushing 57 -tuesday-sunday 57 -still-sluggish 57 -regular-cab 57 -obermayer 57 -kpakol 57 -basketball\/pros 57 -#-virginia 57 -hunding 57 -dangol 57 -pendleton-based 57 -neftegaz 57 -nakaniwa 57 -betham 57 -lagniappe 57 -mbambo 57 -indle 57 -sakorn 57 -pro-khartoum 57 -conjectural 57 -lunda-sul 57 -chiu-chin 57 -bancassurance 57 -limbu 57 -inital 57 -anisotropy 57 -pilbeam 57 -yazawa 57 -arzak 57 -opacic 57 -karasu 57 -http://www.cbs.com 57 -mingaladon 57 -joensuu 57 -weidenbaum 57 -ugwu 57 -anugerah 57 -immunologically 57 -espuelas 57 -izgi 57 -fredie 57 -cariplo 57 -coyotepec 57 -cuyos 57 -nolle 57 -yb\/sbg 57 -eigil 57 -maximilien 57 -barberie 57 -yeongam 57 -harperbusiness 57 -nonlawyers 57 -mekachera 57 -mahlon 57 -veruca 57 -firmo 57 -lamell 57 -sileo 57 -jabarani 57 -imkb 57 -maiffret 57 -odalovic 57 -gingivitis 57 -nagasawa 57 -francistown 57 -kocherlakota 57 -life-saver 57 -seegers 57 -sbcs 57 -markazi 57 -kharbash 57 -fokker-### 57 -wide-release 57 -bajilan 57 -yaral 57 -maione 57 -tokiwa 57 -qld 57 -governable 57 -parry-jones 57 -two-unit 57 -mcclay 57 -mcclam 57 -inner-tube 57 -obanda 57 -confederado 57 -eravur 57 -ribbs 57 -mcclennen 57 -mulund 57 -bordallo 57 -cutely 57 -diferencias 57 -black-tailed 57 -ride-alongs 57 -mosson 57 -notarial 57 -rovereto 57 -skosana 57 -holohan 57 -thamilchelvan 57 -business-management 57 -rerecorded 57 -seomin 57 -roewe 57 -gaffs 57 -whirr 57 -geppetto 57 -chukwueke 57 -bc-na-fea-gen 57 -rock-oriented 57 -mirandes 57 -telkiyski 57 -kalindi 57 -non-minority 57 -well-greased 57 -nemsadze 57 -southshore 57 -ddungu 57 -lanin 57 -lubuk 57 -zhongqiang 57 -volesky 57 -gontard 57 -kopylov 57 -siestas 57 -honduras-politics-coup 57 -davitian 57 -financial-industry 57 -kantono 57 -pumpkin-colored 57 -reconverted 57 -fixer-uppers 57 -euro##-euro## 57 -epinal 57 -highchairs 57 -browses 57 -js\/jd## 57 -mg\/l 57 -six-decade-old 57 -chimeric 57 -bowdler 57 -shannyn 57 -snick 57 -female-oriented 57 -cambridgeside 57 -giacomini 57 -polityka 57 -marzouki 57 -vullo 57 -fist-pump 57 -vm###-### 57 -al-aoofi 57 -klecker 57 -inhumanly 57 -white-on-black 57 -macneil-lehrer 57 -conkling 57 -al-kharbit 57 -pcrm 57 -best-song 57 -levines 57 -adventuress 57 -castagnetti 57 -mgn 57 -chumbawamba 57 -discontinuous 57 -ojima 57 -karlos 57 -ihar 57 -mid-# 57 -toe-loop 57 -asantehene 57 -ruso 57 -camago-malampaya 57 -sweet-talked 57 -baitadi 57 -kaleidoscopes 57 -ntabakuze 57 -senado 57 -ill-received 57 -vaccum 57 -guigang 57 -voula 57 -katou 57 -montcoal 57 -harilal 57 -wetted 57 -ruman 57 -agni-i 57 -rahbani 57 -schaffhouse 57 -geyserville 57 -elda 57 -##-book 57 -yinhui 57 -leaseplan 57 -midstage 57 -elnora 57 -depende 57 -second-in 57 -mrp 57 -iturralde 57 -engelberger 57 -youde 57 -regivaldo 57 -schaper 57 -totted 57 -elhassan 57 -#,###-patient 57 -slivinski 57 -manically 57 -sampang 57 -bratu 57 -multi-goal 57 -arab-summit 57 -visted 57 -azour 57 -##-sebastian 57 -mabasa 57 -lambayeque 57 -anuradha 57 -kw\/hours 57 -fire-suppression 57 -biliary 57 -yasujiro 57 -crusat 57 -ncds 57 -zager 57 -schmitt-roschmann 57 -familiar-looking 57 -airiness 57 -moamba 57 -vocci 57 -godchildren 57 -caleigh 57 -brokenborough 57 -cervo 57 -recieving 57 -non-recourse 57 -classic-car 57 -doy 57 -yong-seok 57 -scramjets 57 -terrorismo 57 -westernised 57 -technical-support 57 -buckaroos 57 -caydee 57 -pmdc 57 -v#s 57 -dalhart 57 -vibrance 57 -wayang 57 -roundish 57 -ignarro 57 -sansoni 57 -schobert 57 -#.####-mark 57 -ncsl 57 -bhight 57 -shapey 57 -royan 57 -lemper 57 -shannon.buggs@chron.com 57 -anadyr 57 -ch-##d 57 -jhala 57 -baumgart 57 -caponi 57 -souverain 57 -push-off 57 -vidosevic 57 -verwiel 57 -strougal 57 -non-network 57 -eco-terrorists 57 -washkansky 57 -matherne 57 -bt-## 57 -dragica 57 -namuyamba 57 -petroleum-related 57 -as### 57 -mcgahern 57 -morganti 57 -american-record 57 -randfontein 57 -iraq-al-qaida 57 -cubillan 57 -bayberry 57 -mythbusters 57 -dollar-cost-averaging 57 -multitasker 57 -aicraft 57 -opole 57 -ru\/sw 57 -spirea 57 -encoder 57 -chanko 57 -pyshkin 57 -structuralism 57 -vijayakumar 57 -easygroup 57 -goncalino 57 -deadeye 57 -milieus 57 -leomitis 57 -start-stop 57 -strohm 57 -stevensville 57 -anklam 57 -huanghe 57 -bushisms 57 -sugimori 57 -tightly-knit 57 -apono 57 -one-yuan 57 -pheonix 57 -freeze-for-freeze 57 -cagily 57 -vivants 57 -cotman 57 -sirf 57 -stemme 57 -baldassi 57 -kongsi 57 -tele-medicine 57 -mackanin 57 -build-a-bear 57 -greeson 57 -indomobil 57 -britain-politics-labour 57 -redenomination 57 -sell-outs 57 -grullon 57 -noorda 57 -sawasdi 57 -ccamlr 57 -dbrs 57 -mallouh 57 -age-verification 57 -strength-sapping 57 -b-#bs 57 -prestigous 57 -##-miroslav 57 -uzcategui 57 -laquila 57 -rs\/#### 57 -gasoline-guzzling 57 -galesi 57 -hemley 57 -simha 57 -kokura 57 -nyang 57 -catenaccio 57 -sigo 57 -hiwada 57 -huscroft 57 -bench-pressed 57 -rock-hurling 57 -administrates 57 -al-hawali 57 -time-scale 57 -gokcek 57 -budgett 57 -dyatchin 57 -rege 57 -ouargla 57 -ginns 57 -papathanassiou 57 -lucrezia 57 -isroilova 57 -juillet 57 -r-fairfax 57 -civilian-populated 57 -xiaoyun 57 -mcramerglobe.com 57 -psuv 57 -creasey 57 -telleldin 57 -oltman 57 -post-football 57 -periodontist 57 -us-school 57 -impsa 57 -kahana 57 -lunch-bucket 57 -zahalka 57 -party-going 57 -lc-gm 57 -orumieh 57 -ndongou 57 -ludek 57 -renowed 57 -junyao 57 -most-admired 57 -cricket-aus-ind 57 -outpitch 57 -atherosclerotic 57 -salamao 57 -capilla 57 -all-expense 57 -alganov 57 -vaslav 57 -al-qaissi 57 -stoklos 57 -hortman 57 -ultra-small 57 -ncea 57 -fermenters 57 -belik 57 -barsuk 57 -cycad 57 -fgarcia 57 -high-carbon 57 -riska 57 -pathogenicity 57 -crenson 57 -vinyls 57 -tercentenary 57 -sledded 57 -tleiss 57 -etat 57 -googleplex 57 -messin 57 -sr# 57 -patkar 57 -sinaloan 57 -djoumessi 57 -fritzky 57 -kanaana 57 -pcv 57 -avt 57 -puzzlers 57 -swellings 57 -hillshire 57 -sentani 57 -livery-cab 57 -martensson 57 -unroadworthy 57 -jinwei 57 -longyang 57 -groenfeld 57 -derbent 57 -nihilists 57 -maurizi 57 -turnhalle 57 -mixologists 57 -frostiness 57 -selph 57 -prodigene 57 -tewodros 57 -mosquito-transmitted 57 -portch 57 -arbib 57 -attarian 57 -effendy 57 -kason 57 -advances# 57 -jerges 57 -rockhouse 57 -andani 57 -eight-city 57 -furio 57 -yaswant 57 -monona 57 -epiphanny 57 -six-kilometre 57 -shaleil 57 -protropin 57 -zients 57 -windhorst 57 -abana 57 -alleghenies 57 -intra-state 57 -lalwani 57 -unstinted 57 -warford 57 -flordia 57 -galax 57 -mcgiffert 57 -cadereyta 57 -zeck 57 -railamerica 57 -http://www.ford.com/ 57 -ronetta 57 -foodmakers 57 -yongyudh 57 -half-serious 57 -coal-to-liquid 57 -day-use 57 -acclimatizing 57 -charkhi 57 -phalaborwa 57 -muthaiga 57 -bldp 57 -obnoxiousness 57 -esperar 57 -resistances 57 -top-producing 57 -atochem 57 -intiman 57 -dog-show 57 -hard-edge 57 -wolfhound 57 -vincent-st 57 -campiness 57 -leibniz 57 -religare 57 -tankful 57 -kertih 57 -lodal 57 -mbita 57 -forestalls 57 -imbecilic 57 -watch\/americas 57 -fondiaria 57 -sanyi 57 -mikhailo 57 -palumbi 57 -germain\/fra 57 -mg-### 57 -hunthausen 57 -snappily 57 -shakhnazarov 57 -jakabos 57 -champassak 57 -chia-chun 57 -bassoonist 57 -kasambara 57 -paradies 57 -arcore 57 -newquist 57 -mocny 57 -kostelka 57 -dolina 57 -mcelrathbey 57 -province-based 57 -addum 57 -spritzer 57 -schoenholtz 57 -abbotts 57 -gay-related 57 -suseno 57 -wreh 57 -al-forat 57 -outdraw 57 -keleher 57 -iraqen#### 57 -afrim 57 -souaidia 57 -cardi 57 -inkjets 57 -gas-sipping 57 -scalf 57 -puddled 57 -kadidal 57 -raymonde 57 -ear-shattering 57 -bishi 57 -zamano 57 -sabeh 57 -prinsen 57 -us-violence 57 -millhauser 57 -million\/euros 57 -texmaco 57 -alltime 57 -cheik 57 -comando 57 -amaitis 57 -erskin 57 -mastersingers 57 -bursaries 57 -wimbley 57 -veni 57 -dolgorsvren 57 -sirait 57 -rinero 57 -mander 57 -natiq 57 -parilla 57 -mewling 57 -anangwe 57 -omnifone 57 -shandler 57 -now-ubiquitous 57 -kadokawa 57 -seven-kilometer 57 -a.k.a 57 -satterthwaite 57 -luusua 57 -pila 57 -farabee 57 -often-heard 57 -tembec 57 -fleabag 57 -dozen-plus 57 -trichopoulos 57 -antrobus 57 -antlfinger 57 -zuendel 57 -zeevi-farkash 57 -thumbelina 57 -restrictionists 57 -vraalsen 57 -radwaniya 57 -bergan 57 -spectating 57 -strike-slip 57 -setchell 57 -new-era 57 -coutries 57 -shaoqiang 57 -fact-checked 57 -fariborz 57 -###-billion-pound 57 -biblioteca 57 -isoa 57 -slimeball 57 -genaux 57 -letha 57 -fma 57 -trouble-maker 57 -sonali 57 -anumnu 57 -aavishkar 57 -r-pasadena 57 -wel 57 -trailways 57 -kalpoes 57 -marcelhino 57 -erythematosus 57 -yayha 57 -shenhar 57 -raheel 57 -yasnaya 57 -sung-kuk 57 -corvalan 57 -houston-galveston 57 -touchier 57 -u.p. 57 -re-live 57 -barrington-coupe 57 -jarosz 57 -highest-flying 57 -underuse 57 -olmecs 57 -climent 57 -slabbert 57 -commericial 57 -shalgi 57 -peattie 57 -well-staffed 57 -cypriot-flagged 57 -fraccari 57 -rafel 57 -ribo 57 -vamped 57 -taffel 57 -lucheng 57 -pugnaciously 57 -strambach 57 -cgtp 57 -tapulous 57 -rockiest 57 -bienstock 57 -higher-interest 57 -rusted-out 57 -sopped 57 -third-from-bottom 57 -pacholczyk 57 -krasovska 57 -romulan 57 -sang-moon 57 -undammed 57 -ncacc 57 -brookhart 57 -valbon 57 -greenes 57 -kitchell 57 -zurick 57 -insufficiencies 57 -mohamedou 57 -daric 57 -multicamera 57 -sunao 57 -y## 57 -plumelec 57 -ntshangase 57 -atwi 57 -yiotis 57 -baoquan 57 -muehlebach 57 -vradenburg 57 -deliberateness 57 -ridgelea 57 -superlotto 57 -game-long 57 -hard-to-sell 57 -pjm\/gj## 57 -frot-coutaz 57 -miram 57 -oil-on-canvas 57 -laming 57 -stodginess 57 -rikrok 57 -president-in-uniform 57 -obbo 57 -american-grown 57 -#-matt 57 -charnvirakul 57 -baldomero 57 -langfeld 57 -throat-slashing 57 -hegemonist 57 -molchanov 57 -tarkan 57 -grimaud 57 -treelike 57 -yi-chiao 57 -caston 57 -shallman 57 -kandeh 57 -##-jarkko 57 -boy-king 57 -germani 57 -sindian 57 -off-the-book 57 -v&s 57 -kishaba 57 -fertonani 57 -schear 57 -drainpipes 57 -holness 57 -pwilson 57 -dfler 57 -ako 57 -bonannos 57 -moonwalked 57 -buzim 57 -trapdoors 57 -skeeters 57 -camshafts 57 -onair 57 -psncr 57 -calton 57 -nyambuya 57 -speedman 57 -altarpieces 57 -fat-laden 57 -office-based 57 -end-time 57 -navigenics 57 -anser 57 -knup 57 -kyung-ja 57 -patrimonio 57 -erda 57 -timecards 57 -quencher 57 -non-deployed 57 -anggoro 57 -bajas 57 -iju 57 -kaggwa 57 -miller-jenkins 57 -visicalc 57 -magdalo 57 -johannsen 57 -tassinari 57 -cat-like 57 -spectacled 57 -blauner 57 -stickwork 57 -explosives-sniffing 57 -stavenhagen 57 -bocskai 57 -#-anna-lena 57 -brebner 57 -moehringer 57 -tamweel 57 -gold-leafed 57 -drevna 57 -reidyglobe.com 57 -mennes 57 -ripia 57 -noongar 57 -re-appear 57 -mangosteen 57 -queremos 57 -aarchs 57 -foon 57 -cooz 57 -jahurul 57 -sn####a 57 -demiralp 57 -smooched 57 -cols 57 -chapati 57 -resailed 57 -evensong 57 -luzhin 57 -##-plus-year 57 -presento 57 -non-exempt 57 -wahyono 57 -servier 57 -malayev 57 -neuro 57 -temerko 57 -beddoes 57 -marquard 57 -cremonini 57 -carnivalesque 57 -petrac 57 -tri-colored 57 -debbie-ann 57 -beaute 57 -previously-announced 57 -dugovich 57 -thibaudet 57 -outboards 57 -benhur 57 -kubelik 57 -schellenberger 57 -in-kook 57 -papan 57 -bdnf 57 -haleva 57 -precor 57 -tightly-contested 57 -hge 57 -asrar 57 -piriz 57 -taitz 57 -taita 57 -medog 57 -bonners 57 -caucausus 57 -uncustomary 57 -balsamo 57 -##-card 57 -good-vs 57 -international-class 57 -strominger 57 -interleukin 57 -funeral-home 57 -savall 57 -easynet 57 -konculj 57 -fengdu 57 -enio 57 -cyber-criminals 57 -soms 57 -wasikowska 57 -semporna 57 -#to 57 -damschroder 57 -medstar 57 -bastin 57 -tno 57 -munier 57 -rav-# 57 -ribalta 57 -elmes 57 -flicked-on 57 -cricket-wc####-pak 57 -rehabilitator 57 -medora 57 -kaddura 57 -yannopoulos 57 -sericulture 57 -closed-captioning 57 -rigler 57 -pre-injury 57 -jandek 57 -chiri-yurt 57 -#-midnight 57 -khorshid 57 -witkoff 57 -festen 57 -aboutreika 57 -jourdon 57 -hospitalisations 57 -salonius 57 -subnational 57 -bonadio 57 -tv-viewing 57 -lessee 57 -fikir 57 -watani 57 -newtok 57 -kintz 57 -in-network 57 -repacholi 57 -subert 57 -moralize 57 -truck-loads 57 -rocktober 57 -recently-held 57 -steubing 57 -myfi 57 -harned 57 -alleles 57 -nesi 57 -referential 57 -manufacturing-based 57 -rzb 57 -sun-worshipping 57 -caiu 57 -chi-keung 57 -majdalawi 57 -lackner 57 -arrangment 57 -o'mahoney 57 -intermarriages 57 -zantzinger 57 -margairaz 57 -patzelt 57 -chania 57 -gribakin 57 -##-star 57 -haruko 57 -lagunas 57 -tch 57 -ex-social 57 -qorabi 57 -debt-for-nature 57 -plam 57 -beady-eyed 57 -oscillators 57 -blood-related 57 -qualm 57 -consumer-based 57 -leweck 57 -high-revving 57 -l'heureux 57 -meramec 57 -fsa-eap 57 -lhernandez 57 -carports 57 -air-worthiness 57 -bolswessanen 57 -uberti 57 -aver 57 -#-million-member 57 -singleminded 57 -hurreh 57 -moslem-oriented 56 -benediktsson 56 -aeolian 56 -gogitidze 56 -intrepids 56 -zebedayo 56 -shakhtyor 56 -gioiella 56 -llegue 56 -draggy 56 -tkaczuk 56 -balongan 56 -abullah 56 -mn-imj 56 -abdula 56 -villepinte 56 -concrete-filled 56 -overcash 56 -astara 56 -moskvich 56 -semi-autonomy 56 -investment-linked 56 -self-tanner 56 -nurmukhammed 56 -nilesh 56 -pitch-and-putt 56 -haetzni 56 -corn-soya 56 -http://www.redcross.org 56 -predominantly-muslim 56 -zigging 56 -ryegrass 56 -soft-cover 56 -faibish 56 -reinsured 56 -mideast-israel 56 -koshlyakov 56 -big-hitters 56 -stalcup 56 -amland 56 -mega-hits 56 -mid-stretch 56 -sauna-like 56 -antihypertensive 56 -muskat 56 -ecd 56 -chornovyl 56 -shih-ming 56 -inerrant 56 -xiuqi 56 -pseudo-scientific 56 -langberg 56 -bungoma 56 -zainol 56 -telecomunications 56 -escogido 56 -okkalapa 56 -chun-sheng 56 -saefuddin 56 -rinus 56 -dcp 56 -military-based 56 -petkovski 56 -serbian-led 56 -bernardsville 56 -farkhar 56 -phosphors 56 -dutreil 56 -wragg 56 -hunza 56 -khulani 56 -brahima 56 -zakiur 56 -nooni 56 -alef 56 -governmnent 56 -haiti-vote 56 -roro 56 -waterfowls 56 -schoeller 56 -unsalable 56 -mcdonnel 56 -conflict-prevention 56 -mantz 56 -majelis 56 -exultantly 56 -pigeon-toed 56 -neiers 56 -tzemel 56 -nghimtina 56 -sindhis 56 -lippa 56 -sg-# 56 -al-yemen 56 -oughton 56 -wash-out 56 -pd-imj 56 -destinee 56 -canfor 56 -seyni 56 -bp-gm 56 -jakkrit 56 -###:##:## 56 -rapace 56 -habie 56 -ex-slave 56 -tyisha 56 -frou-frou 56 -groeschel 56 -pmb 56 -wyrsch 56 -idahoans 56 -tiantan 56 -##-million-us 56 -kootenay 56 -assasinated 56 -teruhisa 56 -kirundo 56 -joensen 56 -wedding-cake 56 -underfinancing 56 -shapewear 56 -clinton-haters 56 -tetzchner 56 -arja 56 -duchesnay 56 -fuxi 56 -radow 56 -theatre-goers 56 -sign-carrying 56 -horstman 56 -castrati 56 -heat-generating 56 -golfsmith 56 -pendareva 56 -loubscher 56 -fumento 56 -winiarski 56 -xianrong 56 -zanchi 56 -alibi-ya 56 -f-bomb 56 -stonehouse 56 -zumbo 56 -sange 56 -gamidov 56 -exanta 56 -take-or-pay 56 -mapunda 56 -propuesta 56 -three-meter-high 56 -zafaryab 56 -haloed 56 -sliven 56 -lepton 56 -alcohol-fuelled 56 -operationalized 56 -fairless 56 -foot-tapping 56 -pntl 56 -choos 56 -danisco 56 -khazaal 56 -macknin 56 -kera 56 -non-believer 56 -export-dominated 56 -chowdry 56 -tecnost 56 -monceau 56 -jmarmstrongdenverpost.com 56 -chunsheng 56 -ccac 56 -oakenfold 56 -ktda 56 -record-length 56 -toco 56 -golog 56 -pastorale 56 -zuberi 56 -million-euro## 56 -trustor 56 -delsener 56 -zx 56 -college-student 56 -ultra-right-wing 56 -runout 56 -abloom 56 -holovak 56 -scafidi 56 -associaton 56 -german-brokered 56 -cents-a-share 56 -dhanawibawa 56 -performance-driven 56 -surburb 56 -orso 56 -duodenal 56 -yellow-and-red 56 -koehl 56 -boonton 56 -exective 56 -###-footer 56 -musasa 56 -http://www.nbc.com 56 -pig-raising 56 -telo 56 -cassells 56 -sacko 56 -monshipour 56 -desk-bound 56 -soft-top 56 -tunings 56 -tomasa 56 -lekhanya 56 -breathalyser 56 -hmeid 56 -ventolin 56 -goldsby 56 -coba 56 -singkil 56 -advertisment 56 -barfoed 56 -d'ermilio 56 -esala 56 -#,#-dioxane 56 -zohreh 56 -kasatka 56 -horita 56 -spado 56 -itvs 56 -chia-yuh 56 -nandy 56 -vercauteren 56 -guedj 56 -chimene 56 -masaba 56 -century-oriented 56 -novavax 56 -burn-in 56 -snu 56 -moscou 56 -gurrola 56 -long-feuding 56 -grails 56 -jsk 56 -bachmans 56 -deguchi 56 -wanchalerm 56 -under-performed 56 -paisa 56 -smap 56 -deramus 56 -dysphoria 56 -pardede 56 -lunstead 56 -houweling 56 -solar-heated 56 -lalic 56 -banchetta 56 -tursday 56 -tree-dwelling 56 -rightmire 56 -tradicion 56 -tae-yong 56 -sayrescoxnews.com 56 -paku 56 -mubang 56 -moseyed 56 -carbon-reduction 56 -philippine-born 56 -event-planning 56 -long-fought 56 -shamsudin 56 -geninho 56 -coatless 56 -hardy-garcia 56 -and-white 56 -kuchov 56 -shoebox-sized 56 -penalty-killers 56 -value-added-tax 56 -dispenza 56 -ben-yehuda 56 -yellow-and-green 56 -babaloo 56 -goyas 56 -then-partner 56 -moulty 56 -sachio 56 -over-the-horizon 56 -btp 56 -###t 56 -calagna 56 -avions 56 -copepods 56 -french-ruled 56 -philomel 56 -osmo 56 -iconix 56 -thomastown 56 -schoerghofer 56 -witchdoctors 56 -ryoung 56 -wire-fraud 56 -tynesha 56 -freas 56 -cpw 56 -unc-wilmington 56 -water-born 56 -eket 56 -loudhailer 56 -filipp 56 -killick 56 -klimenko 56 -karppinen 56 -kuwait-politics 56 -stollsteimer 56 -trussell 56 -arbesfeld 56 -alfama 56 -caithness 56 -monetti 56 -arkle 56 -zhoima 56 -choue 56 -vulgamore 56 -rakowitz 56 -provencio 56 -beswick 56 -re-financing 56 -innocentive 56 -lahoti 56 -bertholle 56 -eui 56 -jostens 56 -indecisively 56 -gamov 56 -kalis 56 -toughest-ever 56 -mi-jin 56 -interwar 56 -zari 56 -woodroffe 56 -baixing 56 -isabekov 56 -importations 56 -hedvig 56 -name-your-price 56 -vuai 56 -carkner 56 -jmf\/ml 56 -rotarian 56 -cerra 56 -four-over-par 56 -visitantes 56 -stautner 56 -tiu 56 -inside-baseball 56 -chakravarthi 56 -klokot 56 -misstates 56 -enap 56 -weld-cellucci 56 -tahara 56 -bolender 56 -suettinger 56 -karadassiou 56 -isak-muivah 56 -governates 56 -buonomo 56 -asia-focused 56 -statesville 56 -calgon 56 -yonemura 56 -fazlic 56 -sohlberg 56 -bogale 56 -all-arounder 56 -rou 56 -swint 56 -kendall-smith 56 -jamaar 56 -gurfinkel 56 -kargar 56 -re-invented 56 -sarda 56 -witharanage 56 -non-college 56 -lohas 56 -foremothers 56 -vent-free 56 -narcotic-drug-related 56 -yashar 56 -compounce 56 -botto 56 -geddie 56 -marchman 56 -ouest-france 56 -drees 56 -business-services 56 -pre-selection 56 -vaher 56 -jocko 56 -saint-quentin 56 -amerenue 56 -lindenmuth 56 -ulus 56 -mbala 56 -husic 56 -yl###-### 56 -follicular 56 -most-loved 56 -ncafp 56 -cgx 56 -peaden 56 -pingeot 56 -fusen 56 -phaiboon 56 -pre-marked 56 -heilbroner 56 -longyi 56 -nr\/dj## 56 -nashwan 56 -kathuria 56 -##-wayne 56 -way-station 56 -visitor-friendly 56 -generative 56 -unprejudiced 56 -wolfberry 56 -cashwell 56 -two-building 56 -madryn 56 -akwesasne 56 -community-college 56 -lieff 56 -coalhouse 56 -suncare 56 -passamaquoddy 56 -kanstantsin 56 -amep 56 -news-making 56 -rahe 56 -campione 56 -neiwand 56 -munabao 56 -potti 56 -machicura 56 -wwor 56 -vermeersch 56 -schmoekel 56 -chiffonade 56 -fashola 56 -seec 56 -centre-backs 56 -kremlinologists 56 -akuressa 56 -neuropsychiatry 56 -s-##c 56 -mutaa 56 -sociobiology 56 -ultra-premium 56 -twirler 56 -xiaojin 56 -free-jazz 56 -audrina 56 -nelon 56 -seconds\/### 56 -inquisitions 56 -level-four 56 -vertigo-inducing 56 -mugambage 56 -bondue 56 -yuk\/leung 56 -edelnor 56 -wascher 56 -side-footing 56 -giresun 56 -enfoque 56 -torro 56 -rrps 56 -kirsh 56 -crimefighter 56 -importa 56 -longer-acting 56 -thakurgaon 56 -sufferance 56 -cricket-wc####-aus 56 -fluconazole 56 -waltzer 56 -stz\/ea## 56 -gbm 56 -##-stephen 56 -urey 56 -medine 56 -pelloux 56 -arkans 56 -bizzare 56 -siegbahn 56 -carnelian 56 -rueing 56 -pantomiming 56 -duckweed 56 -hofmeyr 56 -nastas 56 -pennymac 56 -peairs 56 -str-ti-jbm 56 -suharjono 56 -barcenas 56 -spit-shined 56 -coch 56 -aldebert 56 -chups 56 -eckl 56 -czaja 56 -redken 56 -duffie 56 -jordanaires 56 -impudently 56 -zingaro 56 -undistorted 56 -toolik 56 -kammerhoff 56 -decriminalisation 56 -clayoquot 56 -leblon 56 -pakistan-missile 56 -gertrudis 56 -snooker-gbr 56 -al-jarba 56 -tumwine 56 -morinigo 56 -bacre 56 -##,###,###.# 56 -staleness 56 -##hours 56 -amni 56 -linenthal 56 -kleinsasser 56 -bepza 56 -saransk 56 -khermanstatesman.com 56 -rockwellian 56 -nieuw 56 -solomonyan 56 -mangahas 56 -sewering 56 -mozartean 56 -##-square-kilometre 56 -bhu 56 -lampasas 56 -coagulate 56 -dinaburg 56 -dairy-free 56 -one-bath 56 -djibrill 56 -fischer-boel 56 -four-button 56 -lemon-flavored 56 -bipeds 56 -horneber 56 -mini-movies 56 -toplin 56 -silang 56 -barberton 56 -boukensa 56 -fernado 56 -lamberty 56 -gratings 56 -marban 56 -ilala 56 -l'avenir 56 -nizhni 56 -onepass 56 -d&b 56 -ilogho 56 -bowral 56 -lhr### 56 -cabaniss 56 -camera-friendly 56 -langerman 56 -plattekill 56 -todesca 56 -cantorial 56 -import-dependent 56 -overspenders 56 -elsen 56 -novorossisk 56 -hansabank 56 -taip 56 -everding 56 -kilinc 56 -allieu 56 -kippy 56 -kasputis 56 -rrp 56 -darling-hammond 56 -bitkom 56 -holtzhausen 56 -kin-chung 56 -sinosat-# 56 -b'gosh 56 -afroz 56 -solomou 56 -proegler 56 -on-sang 56 -motorcyles 56 -easyknit 56 -cross-pollinating 56 -ambeyi 56 -lavaggi 56 -confessors 56 -aviion 56 -###-######## 56 -iraq-unrest-qaeda 56 -janowitz 56 -ofari 56 -tato 56 -reputacion 56 -gading 56 -univac 56 -iniative 56 -al-amal 56 -sahdan 56 -culloty 56 -mariella 56 -boigny 56 -partyka 56 -juda 56 -vierma 56 -majorette 56 -news-stands 56 -ashti 56 -sarria 56 -donadze 56 -huaqiang 56 -dej 56 -deddy 56 -sobaru 56 -self-invention 56 -murley 56 -neurotically 56 -smolar 56 -piccarreta 56 -#og 56 -flowerbeds 56 -bc-ap 56 -inseparably 56 -prezioso 56 -ropeik 56 -pumpido 56 -hypothesizes 56 -wuz 56 -roulade 56 -http://www.genoa-g#.it/eng/index.html 56 -eu-mideast 56 -paddleboard 56 -bone-white 56 -derschau 56 -burika 56 -boarding-school 56 -senitt 56 -re-equipping 56 -mulvoy 56 -retinues 56 -bresse 56 -greentown 56 -www.insidesocal.com/tv/ 56 -sinaia 56 -raupp 56 -nspo 56 -overtopping 56 -shindell 56 -bahk 56 -jeremain 56 -kurta 56 -margarite 56 -ebbesen 56 -kise 56 -coooperation 56 -brochtrup 56 -kenya-climate 56 -music-themed 56 -rahimpour 56 -samory 56 -business-wise 56 -abadie 56 -shur 56 -euro-skepticism 56 -#,###-kilometers 56 -mystery-shrouded 56 -bulker 56 -ovh 56 -near-vertical 56 -khash 56 -espriella 56 -unef 56 -torbor 56 -digene 56 -dishonoured 56 -ingvard 56 -andis 56 -o'donley 56 -data-serving 56 -lakhubhai 56 -leccion 56 -common-man 56 -pre-test 56 -diht 56 -aegina 56 -youngtown 56 -talkathon 56 -ajirawit 56 -stonerside 56 -oly-####-advisory 56 -non-baseball 56 -lusail 56 -giacoletti 56 -womans 56 -chelsom 56 -lancing 56 -nagyz 56 -striatum 56 -nahel 56 -coinages 56 -kleins 56 -mulherin 56 -estebanez 56 -andolina 56 -on-coming 56 -hazels 56 -automotives 56 -borai 56 -beiteddine 56 -su-lin 56 -al-siyassa 56 -lequi 56 -mcmorran 56 -pitcher-friendly 56 -nitch 56 -rauer 56 -d'arrigo 56 -haziness 56 -draft-night 56 -maokola-majogo 56 -cuppers 56 -steinhoff 56 -gushiken 56 -moppet 56 -potebenko 56 -fishtailed 56 -amedo 56 -arbois 56 -pasturing 56 -harba 56 -authorties 56 -igber 56 -afghan-international 56 -myanmar-protest-monks 56 -fresh-air 56 -iigep 56 -wolke 56 -shock-jock 56 -smilin 56 -guguletu 56 -deregister 56 -coorperation 56 -eckstrom 56 -guelperin 56 -lobed 56 -vartanian 56 -sponged 56 -upledger 56 -kalat 56 -brocato 56 -wuertz 56 -longchamps 56 -hambastegi 56 -wunderkinder 56 -longans 56 -she-devil 56 -frueh 56 -ambridge 56 -lessner 56 -kiddos 56 -kuypers 56 -daair 56 -bg-acw 56 -club-swinging 56 -szulik 56 -quizas 56 -maxxi 56 -projectionists 56 -vujin 56 -flyertalk.com 56 -gurin 56 -prolinea 56 -vivra 56 -jurijs 56 -us-hollywood 56 -mirretti 56 -wildt 56 -###percent 56 -sylvers 56 -hoerster 56 -p-### 56 -mdluli 56 -slipup 56 -zaozhuang 56 -kazakhstani 56 -furtwaengler 56 -muffat 56 -busic 56 -viamonte 56 -montsame 56 -tuszynski 56 -mickiewicz 56 -varathep 56 -freak-show 56 -tampakan 56 -afterburner 56 -early-#### 56 -kerchove 56 -rasanen 56 -simonas 56 -high-sounding 56 -kibbutzniks 56 -us-colombian 56 -antiqued 56 -heatherington 56 -amdo 56 -side-scan 56 -birlik 56 -spigarelli 56 -non-nordic 56 -impact-resistant 56 -umran 56 -chalke 56 -nassari 56 -freiman 56 -jaar 56 -soylent 56 -sinjhuang 56 -four-hectare 56 -continuing-education 56 -mimo 56 -down-ticket 56 -fiercely-contested 56 -levantine 56 -shamie 56 -g\/t\/f 56 -autoridad 56 -prizing 56 -baywalk 56 -mocatta 56 -tea-to-steel 56 -pregunte 56 -irascibility 56 -towles 56 -china-romania 56 -hamengkubuwono 56 -padire 56 -aldam 56 -coinsurance 56 -auret 56 -ignas 56 -disco-era 56 -minette 56 -nurse-midwives 56 -smolian 56 -jaffery 56 -hochschorner 56 -rcastro 56 -fnla 56 -iyanla 56 -plant-derived 56 -krupp-hoesch 56 -night-blooming 56 -food-grade 56 -fuenmayor 56 -glaciation 56 -handmaidens 56 -kosnatcheva 56 -dll###-### 56 -mulyo 56 -ct## 56 -qfc 56 -higher-yield 56 -deyana 56 -underdiagnosed 56 -wojtala 56 -christmas-season 56 -coitus 56 -estey 56 -ashenfelter 56 -hapal 56 -tawatchai 56 -cambron 56 -kasereka 56 -periwinkles 56 -craciun 56 -clabo 56 -glashow 56 -danwei 56 -luwero 56 -blythedale 56 -kadyr 56 -young-me 56 -flash-floods 56 -spla\/m 56 -cross@globe.com 56 -pues 56 -hounddog 56 -bulimics 56 -mingming 56 -insa 56 -videocam 56 -paoua 56 -http://www.xerox.com 56 -car-jackings 56 -marzban 56 -cattolica 56 -katri 56 -kanada 56 -dacula 56 -faurel 56 -zaytun 56 -autonomy-minded 56 -shauri 56 -defrayed 56 -sanhe 56 -howison 56 -edurne 56 -states. 56 -issue-by-issue 56 -microturbines 56 -macijauskas 56 -iberville 56 -yizhar 56 -cocodrie 56 -wats 56 -ustc 56 -walta 56 -ethereally 56 -cookstown 56 -dissociating 56 -once-safe 56 -vishwanathan 56 -wince-inducing 56 -zire 56 -arpa 56 -al-shurta 56 -luneta 56 -aldermaston 56 -http://www.rnc.org 56 -hosono 56 -qiqiu\/zhao 56 -jacinthe 56 -http://www.amrcorp.com 56 -derbyshires 56 -weiyang 56 -houze 56 -pirata 56 -bugiri 56 -lafakis 56 -vika 56 -dismas 56 -cardio-thoracic 56 -thrashings 56 -reagle 56 -us-attacks-guantanamo 56 -hwwa 56 -undergirded 56 -portale 56 -scandalizing 56 -brezovica 56 -zebadua 56 -drizin 56 -reya 56 -tubewells 56 -skink 56 -babushka 56 -pre-judging 56 -schutzman 56 -wroclawski 56 -zaca 56 -daviz 56 -moree 56 -diamantis 56 -former-soviet 56 -islam-based 56 -ennes 56 -alysia 56 -soloveitchik 56 -negar 56 -damselflies 56 -tried-and-tested 56 -hebard 56 -slipstream-chipotle 56 -meraklis 56 -so-call 56 -samarn 56 -pink-hued 56 -hieronim 56 -konadu 56 -fradulent 56 -manchild 56 -skube-column 56 -bouder 56 -permitir 56 -seawolves 56 -pouladi 56 -coronell 56 -folino 56 -juravich 56 -aquacultural 56 -dono 56 -strongest-ever 56 -sub-four-minute 56 -shu-hung 56 -hakkar 56 -kotex 56 -relink 56 -multiethnicity 56 -regio 56 -pomar 56 -azzopardi 56 -kelmon 56 -tihany 56 -shisun 56 -water-repellent 56 -bumgardner 56 -clark\/donna 56 -revenge-minded 56 -grcs 56 -mcphie 56 -small-molecule 56 -zorica 56 -chavit 56 -baselga 56 -sirima 56 -zegas 56 -talis 56 -iner 56 -racette 56 -azema 56 -fuhe 56 -caramagna 56 -strenghtened 56 -lavage 56 -giannichedda 56 -gurbuz 56 -degiorgio 56 -kilolitres 56 -escolero 56 -antespend 56 -swash 56 -unoaked 56 -gimondi 56 -dirt-road 56 -daguin 56 -imputation 56 -orientalist 56 -neigh 56 -inconstant 56 -penaherrera 56 -shambhala 56 -ravishingly 56 -ikrema 56 -mythologizing 56 -gwee 56 -lousiest 56 -zirkin 56 -cross-regional 56 -campagne 56 -dumarsais 56 -mansilla 56 -quintus 56 -jurikova 56 -stickered 56 -ysern 56 -djebbour 56 -law-firm 56 -quark-gluon 56 -forbush 56 -non-responsive 56 -play-it-safe 56 -moisture-laden 56 -comradely 56 -orito 56 -fbl-fra-lcup 56 -paucke 56 -muji 56 -fertilizer-based 56 -early-to-bed 56 -woodcrest 56 -danto 56 -onne 56 -peri-urban 56 -kayiranga 56 -jianbo 56 -correspondant 56 -vankor 56 -lambing 56 -mephedrone 56 -icglr 56 -non-lawyers 56 -open-handed 56 -musetta 56 -khanty-mansiisk 56 -carouser 56 -srebnick 56 -unkown 56 -frankland 56 -by-catch 56 -aeromedical 56 -middle-tier 56 -knust 56 -natsagiin 56 -gressly 56 -long-coveted 56 -golf-epga-por 56 -thanat 56 -ivermectin 56 -garbelotto 56 -medin 56 -lamoreaux 56 -ciwujia 56 -iacoboni 56 -lipin 56 -gundegmaa 56 -nkhoma 56 -itek 56 -cebekhulu 56 -serigne 56 -tsumura 56 -radio-show 56 -mirman 56 -kelder 56 -desvonde 56 -elongation 56 -shot-for-shot 56 -loofah 56 -sterno 56 -hrabal 56 -kumon 56 -erinle 56 -workmate 56 -zuendt 56 -computer-software 56 -dervishi 56 -muhieddin 56 -sasono 56 -second-week 56 -sliti 56 -winterrowd 56 -ex-test 56 -entomological 56 -kooiman 56 -akhmedova 56 -stepanovic 56 -kriste 56 -alfond 56 -qed 56 -fullone 56 -gold-encrusted 56 -fireballer 56 -backbenches 56 -anti-minority 56 -parakh 56 -on-street 56 -majolica 56 -responde 56 -geraldton 56 -polyot 56 -taleqani 56 -hand-knitted 56 -paleozoic 56 -a-ram 56 -oil-tainted 56 -matuidi 56 -sanest 56 -middle-schooler 56 -wrightsman 56 -gerdano 56 -nozadze 56 -imm 56 -sholar 56 -plea-bargained 56 -http://www.usccb.org 56 -shikata 56 -##-kevin 56 -tejgaon 56 -two-and-a-half-month 56 -much-younger 56 -vaill 56 -krach 56 -audah 56 -bullis 56 -maharajahs 56 -unadilla 56 -uzbekistani 56 -airconditioners 56 -egolf 56 -longheld 56 -oriflame 56 -sportcoat 56 -cuttaree 56 -cressend 56 -exculpate 56 -re-shuffle 56 -sinosure 56 -kartono 56 -supergrass 56 -walley 56 -nouzaret 56 -well-adapted 56 -chupacabra 56 -schoenbaum 56 -user-friendliness 56 -shorabak 56 -zaiqing 56 -sluga 56 -oegroseno 56 -cloer 56 -waterlily 56 -nisreen 56 -junor 56 -cha-ching 56 -rocknes 56 -kejuan 56 -hofmannsthal 56 -goldhirsh 56 -h.i.v. 56 -herschbach 56 -rajakarunanayake 56 -pottenger 56 -humanplasma 56 -bentler 56 -opi 56 -troglitazone 56 -borgas 56 -ayatskov 56 -princetonian 56 -wingard 56 -kashmir-based 56 -chaiet 56 -excretions 56 -eydie 56 -marthastewart.com 56 -glassine 56 -henkes 56 -shikuku 56 -ngarlejy 56 -meversley 56 -kunsthistorisches 56 -pro-stadium 56 -carrel 56 -weilerstein 56 -jamiruddin 56 -flashiness 56 -#.##-trillion 56 -dubee 56 -knickerbockers 56 -hedong 56 -yung-lai 56 -done-that 56 -weisler 56 -protectmarriage.com 56 -assitance 56 -kadewe 56 -near-war 56 -wigstock 56 -resonator 56 -neu-ulm 56 -taimali 56 -organ-transplant 56 -compositing 56 -fantagraphics 56 -scharioth 56 -redleaf 56 -htil 56 -mideast-gaza 56 -manasieva 56 -gwang-soo 56 -yannotti 56 -unclassifiable 56 -nagarajan 56 -banisar 56 -devroy 56 -nangka 56 -house-sized 56 -mefraj 56 -lake-side 56 -peachcare 56 -kagah 56 -apostrophes 56 -frame-ups 56 -hainaut 56 -noiseless 56 -chiado 56 -salesgirl 56 -borovic 56 -kaouch 56 -kirkaldy 56 -non-automotive 56 -prokopek 56 -three-event 56 -aseanapol 56 -elfatih 56 -al-osboa 56 -santy 56 -gerrick 56 -untallied 56 -bushings 56 -turn-over 56 -five-length 56 -afanasyevsky 56 -meiselas 56 -yugoslav-made 56 -##-quart 56 -pegu 56 -riverwoods 56 -take-back 56 -remote-detonated 56 -oil-reliant 56 -jd\/nr## 56 -erinvale 56 -brookhiser 56 -ghirardi-rubbi 56 -medsger 56 -mesilla 56 -maysa 56 -carter-finley 56 -biver 56 -surkh 56 -toy-related 56 -pring 56 -jjh-eap 56 -n-#### 56 -sciarrino 56 -moba 56 -falcioni 56 -furia 56 -franz-christoph 56 -liguera 56 -herreshoff 56 -bjelkevik 56 -idoko 56 -helander 56 -bby 56 -matrika 56 -milas 56 -kosak 56 -forteo 56 -non-kenyan 56 -fretes 56 -cienaga 56 -q###s 56 -chia-yen 56 -worrywart 56 -professional-grade 56 -estudillo 56 -deficit-fighting 56 -mazzulla 56 -bie-shyun 56 -brick-sized 56 -robeco 56 -goskowicz 56 -d'hiver 56 -dewes 56 -toppo 56 -sacb 56 -golden-capped 56 -boudreau-gagnon 56 -deflectors 56 -sauzee 56 -ketz 56 -footbal 56 -menaka 56 -jerred 56 -tugendhat 56 -sutee 56 -semi-secret 56 -cftr 56 -schabarum 56 -shirani 56 -odon 56 -cosey 56 -http://www.aflcio.org 56 -doorbuster 56 -dimmock 56 -cref 56 -krusee 56 -gruwell 56 -propellors 56 -brodovitch 56 -poutine 56 -dunhua 56 -lese-majeste 56 -epicurious.com 56 -late-april 56 -niha 56 -lifescan 56 -mellette 56 -guangxiang 56 -shawali 56 -sagia 56 -eu-eurozone-economy-growth 56 -suzerainty 56 -wheatgrass 56 -adir 56 -man-bok 56 -customizer 56 -accordionists 56 -perspicacity 56 -walusimbi 56 -valenciano 56 -developping 56 -matellan 56 -propiedades 56 -topa 56 -mirken 56 -calvins 56 -townfolk 56 -self-motivation 56 -##-nicole 56 -tear-streaked 56 -lin\/wang 56 -shibboleths 56 -ueslei 56 -kleier 56 -unionville 56 -mohtarma 56 -esmeijer 56 -soloed 56 -sandschneider 56 -super-long 56 -payal 56 -victim-impact 56 -carrs 56 -bufalino 56 -pavlovski 56 -demayo 56 -http://www.nmb.gov 56 -chen-yuan 56 -wine-and-cheese 56 -perepelova 56 -saddiqi 56 -unsafely 56 -d'amiano 56 -henochowicz 56 -lesperoglou 56 -vandewater 56 -sportsweek 56 -fankhouser 56 -jayer 56 -karres 56 -economise 56 -masopust 56 -dessie 56 -poona 56 -assouw 56 -rebated 56 -saimone 56 -ethno-religious 56 -albany-area 56 -peiyao 56 -highly-efficient 56 -njie 56 -vernand 56 -bore-hole 56 -lundine 56 -foxcroft 56 -svoik 56 -delyagin 56 -bodas 56 -non-qualified 56 -ditrapano 56 -al-gizouli 56 -giorgianni 56 -talab 56 -caribbean-weather 56 -cross-bred 56 -conair 56 -http://www.njusao.org/break.html 56 -garlock 56 -karmakar 56 -the#### 56 -cheek-to-cheek 56 -jawiya 56 -goldsbury 56 -pelajaran 56 -shults 56 -kalala 56 -zaetta 56 -daubert 56 -risd 56 -brunhoff 56 -ciment 56 -blunden 56 -call-to-arms 56 -khirbet 56 -seliverstova 56 -vadhana 56 -peochar 56 -re-development 56 -eight-sided 56 -magam 56 -veran 56 -tax-dodgers 56 -luana 56 -cattle-rustling 56 -raia 56 -pb-jp 56 -non-labor 56 -mid-##st 56 -russian-occupied 56 -http://www.freddiemac.com 56 -nowgam 56 -hiroyoshi 56 -shenlong 56 -rafidiyeh 56 -njoki 56 -odoriferous 56 -goldoni 56 -iws 56 -mabou 56 -ozio 56 -chiuri 56 -first-of-its 56 -ciders 56 -by-now 56 -second-winningest 56 -arix 56 -gsd 56 -pro-yugoslav 56 -ertugruloglu 56 -nissay 56 -guaita 56 -jefferys 56 -##-straight 56 -spielbergian 56 -complexly 56 -phumaphi 56 -baronet 56 -brulliard 56 -herrara 56 -conwood 56 -contin 56 -eastent 56 -iwashita 56 -proforma 56 -guiyu 56 -wenn 56 -time-cnn 56 -re-regulate 56 -myaungmya 56 -grapplers 56 -chimura 56 -ganchev 56 -twerp 56 -yukking 56 -pflaumer 56 -deloatch 56 -al-jabali 56 -paech 56 -delimited 56 -panola 56 -clottemans 56 -neigbors 56 -gruzen 56 -chile-quake 56 -milkmaid 56 -ooooo 56 -ravenscroft 56 -darbar 56 -whitely 56 -tumpat 56 -sandile 56 -wxia 56 -iwork 56 -mcmahons 56 -gawd 56 -engles 56 -work-based 56 -china-unrest-tibet-rights-oly-#### 56 -trembler 56 -nybz### 56 -fengshui 56 -moalin 56 -perumal 56 -sopon 56 -tae-bo 56 -ghosting 56 -changgwang 56 -kecil 56 -kjolstad 56 -kopel 56 -bone-building 56 -ju-on 56 -dogaru 56 -palframan 56 -bauduin 56 -ueb 56 -time-space 56 -fellow-citizens 56 -papahanaumokuakea 56 -segment-leading 56 -fuel-hedging 56 -shokir 56 -coffeeshops 56 -advantica 56 -marcelina 56 -recirculate 56 -lead-zinc 56 -lankan-born 56 -cnbc.com 56 -effros 56 -keast 56 -indecorous 56 -ostrov 56 -low-demand 56 -grafer 56 -kaigler 56 -yniguez 56 -tayyaba 56 -woodcraft 56 -k.p.s. 56 -jarwan 56 -yamasoto 56 -l'carriere 56 -ruttenberg 56 -revver 56 -#-gisela 56 -long-shuttered 56 -westshore 56 -rendina 56 -moscardi 56 -yoani 56 -dungu 56 -http://www.mtv.com 56 -lupa 56 -money-lending 56 -futagawa 56 -al-majd 56 -sarokin 56 -masefield 56 -hatemongers 56 -#####-#####-# 56 -ci-### 56 -guderzo 56 -connoting 56 -galavision 56 -loggans 56 -fiorucci 56 -re-inspected 56 -lemenager 56 -covadonga 56 -chronometer 56 -prodon 56 -chandraswamy 56 -gatlif 56 -moevenpick 56 -giaccone 56 -charanga 56 -arti 56 -clinton-appointed 56 -extrasensory 56 -martaban 56 -essaye 56 -florentines 56 -mctague 56 -derozan 56 -bmet 56 -akher 56 -counter-corruption 56 -notepaper 56 -luzinski 56 -skanky 56 -unharvested 56 -kadosh 56 -taimyr 56 -martial-law 56 -shoreditch 56 -somersworth 56 -mass-scale 56 -lobotomies 56 -frates 56 -al-gailani 56 -cuito 56 -biters 56 -fiszmann 56 -murati 56 -vouilloz 56 -arscott 56 -goshutes 56 -fouhami 56 -palestinian-palestinian 56 -hellmer 56 -maryinsky 56 -g-class 56 -cvid 56 -hilman 56 -us-japan-space-shuttle 56 -vallop 56 -pripyat 56 -genencor 56 -nightshade 56 -kielar 56 -ginanjar 56 -botan 56 -sukamdani 56 -temizkanoglu 56 -berezovksy 56 -lithographic 56 -colbie 56 -courchesne 56 -newsbreak 56 -kwamie 56 -characterising 56 -joyriding 56 -ghilarducci 56 -mortham 56 -shaken-baby 56 -glorieta 56 -fengyun-# 56 -leibacher 56 -gardening@nytimes.com 56 -christenings 56 -discombobulating 56 -ambion 56 -stolyarov 56 -lastuvka 56 -ultra-tight 56 -xingye 56 -terrariums 56 -treaty-based 56 -edmon 56 -mangena 56 -scopolamine 56 -nobes 56 -long-separated 56 -acls 56 -skamania 56 -egx## 56 -dahduli 56 -motech 56 -intitial 56 -caffita 56 -girl-group 56 -half-expected 56 -linette 56 -soldati 56 -pend 56 -hopoate 56 -pre-castro 56 -lulin 56 -http://www.usccb.org/ 56 -non-convertible 56 -abu-zayyad 56 -step-brother 56 -artux 56 -yusanto 56 -bestiary 56 -debrum 56 -dingers 56 -khamzat 56 -ridpath 56 -adex 56 -anthracnose 56 -ten-week 56 -helbrans 56 -shoehorning 56 -:####### 56 -paride 56 -boosterish 56 -goldwin 56 -pigeonholes 56 -raimunda 56 -howden 56 -evangelized 56 -mixups 56 -panter 56 -o.h. 56 -rockrose 56 -england-born 56 -augustow 56 -lessel 56 -kasane 56 -sudeikis 56 -contentville 56 -cleaning-up 56 -natan-zada 56 -manawatu 56 -unreviewable 56 -witticism 56 -ternus 56 -uscirf 56 -rijsbergen 56 -rakad 56 -cruciferous 56 -dundes 56 -pirozzi 56 -kreegel 56 -budgie 56 -clijster 56 -brou 56 -shaywitz 56 -lopano 56 -helissio 56 -bolshunov 56 -tennstedt 56 -anhua 56 -dimiter 56 -mosquito-control 56 -hearts-and-minds 56 -pretre 56 -tvind 56 -abednego 56 -born-and-bred 56 -zingre-graf 56 -longhai 56 -inniger 56 -diaper-changing 56 -corleones 56 -runge-metzger 56 -tangdhar 56 -thibadeau 56 -financieele 56 -chassin 56 -polarities 56 -chi-x 56 -helvey 56 -vetters 56 -pazzi 56 -univerity 56 -jail-house 56 -pegasystems 56 -chengshan 56 -ofakim 56 -a-lister 56 -jackfruit 56 -kissy 56 -ruocco 56 -paleobiologist 56 -bobosikova 56 -al-rayes 56 -dornod 56 -as-expected 56 -zadari 56 -javadi 56 -dynatech 56 -silantiev 56 -sonaecom 56 -kerosine 56 -akito 55 -benedictions 55 -mamounia 55 -charco 55 -blue-colored 55 -val-de-marne 55 -antipov 55 -nagu 55 -mueller-wohlfahrt 55 -pratapkumar 55 -multirole 55 -squibbed 55 -seroczynski 55 -satcom 55 -yaacobi 55 -weihong 55 -whitesnake 55 -uninflected 55 -pabbo 55 -runyonesque 55 -sloviter 55 -sperrazza 55 -rimland 55 -standen 55 -echouafni 55 -thanakorn 55 -okutan 55 -olim 55 -nadeam 55 -dolla 55 -chaudhri 55 -hediger 55 -ibwc 55 -craigslist.com 55 -sub-office 55 -al-luhaibi 55 -boutris 55 -harshman 55 -buhain 55 -nurhadi 55 -food-loving 55 -pluots 55 -geometrics 55 -pedercini 55 -couvrette 55 -rlopez 55 -greystoke 55 -standbridge 55 -afilias 55 -ex-strongman 55 -fator 55 -somerwill 55 -worthley 55 -karbanenko 55 -amchitka 55 -thadeus 55 -etemad-e-melli 55 -safecracker 55 -once-elegant 55 -gonyea 55 -anti-islamist 55 -dabanovic 55 -aljabr 55 -ozcelik 55 -smutny-jones 55 -fa'asavalu 55 -searby 55 -pengrowth 55 -cuecat 55 -mininum 55 -shkirko 55 -golf-club 55 -muzzafarabad 55 -bozsik 55 -discerns 55 -rothbaum 55 -ayila 55 -scandanavia 55 -lewmar 55 -jelloun 55 -dragnea 55 -pole-dancing 55 -takanyi 55 -customshouse 55 -kritzer 55 -gomelauri 55 -y.v. 55 -off-airport 55 -delfs 55 -points-scoring 55 -jupin 55 -eliska 55 -rainman 55 -renovator 55 -laroussi 55 -osmus 55 -gegamian 55 -bantadtan 55 -voletta 55 -yussupova 55 -tottenville 55 -dascalu 55 -#-los 55 -metabolizing 55 -kremlin-orchestrated 55 -gastro 55 -khayyat 55 -talara 55 -zizzo 55 -sumaysim 55 -b-level 55 -secularisation 55 -foreign-ownership 55 -puning 55 -stoos 55 -moun 55 -sidener 55 -footmen 55 -kamioka 55 -nazy 55 -valentierra 55 -lebovitz 55 -palanga 55 -english-khmer 55 -secularize 55 -tousignant 55 -development-related 55 -joram 55 -positano 55 -hand-crank 55 -laser-beam 55 -outfalls 55 -gps-enabled 55 -maranda 55 -ewatch 55 -executive-secretary 55 -saira 55 -radom 55 -wagoneer 55 -romack 55 -biomolecular 55 -marsudi 55 -vernier 55 -casualization 55 -non-intrusive 55 -idefense 55 -o.b. 55 -credit-monitoring 55 -munante 55 -molehills 55 -seelbach 55 -swiger 55 -bado 55 -dppc 55 -gremolata 55 -non-parents 55 -ikenson 55 -wanniarachchi 55 -wednesdy 55 -lilas 55 -chengliang 55 -krauter 55 -blast-furnace 55 -polinard 55 -oscar-winners 55 -oddar 55 -qingcheng 55 -gellin 55 -castmate 55 -empedocle 55 -wse 55 -comerci 55 -octuplet 55 -gristmill 55 -calcium-fortified 55 -aeron 55 -xiuli 55 -powerine 55 -near-default 55 -spangles 55 -zarafshan 55 -turkish-based 55 -posptoned 55 -guyana-based 55 -agroforestry 55 -heavily-protected 55 -egis 55 -baoqing 55 -niezabitowska 55 -assignee 55 -taddeo 55 -xiaolan 55 -wechsel-bank 55 -still-living 55 -bolingbroke 55 -fusari 55 -padnos 55 -feistiest 55 -heumann 55 -nalle 55 -umbridge 55 -anthocyanins 55 -vampirism 55 -fuel-related 55 -katari 55 -newly-married 55 -funderburg 55 -compunctions 55 -mi-jung 55 -parrotheads 55 -kail 55 -jinhao 55 -ganciclovir 55 -central-east 55 -m\/a-com 55 -ggagbo 55 -haendel 55 -cholamandalam 55 -emina 55 -thiamin 55 -autorities 55 -thrill-seeker 55 -onu 55 -serb-majority 55 -mandylor 55 -kenesei 55 -soffa 55 -ghozlan 55 -puffinburger 55 -autotrader.com 55 -janowska 55 -luxenberg 55 -liddick 55 -stelmakh 55 -internationally-known 55 -oil-pipeline 55 -lolli 55 -shiang-nung 55 -matovu 55 -sablefish 55 -untradable 55 -ninth-placed 55 -minidv 55 -byronic 55 -tesema 55 -nightime 55 -isea 55 -florida-alabama 55 -bruininks 55 -gronke 55 -chearavanont 55 -##-by-##-centimeter 55 -latwp 55 -trouville 55 -allover 55 -switz 55 -notman 55 -mickler 55 -bouchenaki 55 -beirendonck 55 -sharkboy 55 -anahuac 55 -frale 55 -voter-rich 55 -livingood 55 -bailis 55 -catsup 55 -finnish-german 55 -chalupas 55 -genri 55 -abdulatif 55 -episiotomy 55 -rumbaut 55 -mullein 55 -bursted 55 -crye 55 -philoctetes 55 -beji 55 -kreidenko 55 -brianderson 55 -gitex 55 -supoj 55 -redmen 55 -road-testing 55 -rac-ns 55 -el-arab 55 -tahsi 55 -lactose-intolerant 55 -out-aced 55 -sherifi 55 -angat 55 -patarroyo 55 -czuma 55 -arsi 55 -tongue-and-groove 55 -sudol 55 -punkers 55 -frayre 55 -performance-wise 55 -#-xavier 55 -cantel 55 -ndiaye-diatta 55 -junes 55 -comegys 55 -nyuk 55 -bachelorettes 55 -creepy-crawly 55 -risner 55 -nassi 55 -teall 55 -outraising 55 -mini-concert 55 -ever-lasting 55 -habboush 55 -garrigue 55 -trelleborgs 55 -elisangela 55 -task-oriented 55 -rueppel 55 -bone-thin 55 -cremi 55 -hamat 55 -long-faced 55 -cjh\/rr 55 -finisar 55 -malaysia-vote 55 -pene 55 -dovid 55 -deal-killer 55 -enestam 55 -dedes 55 -dingiri 55 -russia-chechnya-vote 55 -genis 55 -shaabiya 55 -chelyabinsk-## 55 -selecta 55 -short-dated 55 -mahadhesi 55 -death-knell 55 -gotzsche 55 -ilaga 55 -lanegan 55 -kouk 55 -mahle 55 -per-mile 55 -corruption-busting 55 -alipui 55 -gorali 55 -jiazhen 55 -yu-ting 55 -isely 55 -medaire 55 -haggui 55 -p#-plus-one 55 -ntn 55 -mkalavishvili 55 -off-farm 55 -chromatis 55 -sharia-compliant 55 -highest-volume 55 -boner 55 -kannell 55 -tottenham\/eng 55 -helsingoer 55 -coluccio 55 -aliyeva 55 -steier 55 -koers 55 -exceso 55 -desaulniers 55 -comcast-spectacor 55 -jersey-born 55 -rock-music 55 -al-mazidi 55 -kilim 55 -povera 55 -iksanov 55 -tele-ventures 55 -primor 55 -civlians 55 -r.i.-based 55 -orania 55 -year-around 55 -small-to-medium 55 -dravecky 55 -bontecou 55 -the# 55 -amsterdam-schiphol 55 -workrate 55 -advocator 55 -demouge 55 -ei-ichi 55 -aeberhard 55 -bridi 55 -picacho 55 -dinant 55 -slashers 55 -judaea 55 -kayama 55 -delavekouras 55 -fuli 55 -dreno 55 -##-year-plus 55 -anang 55 -lerche 55 -abessole 55 -##-robert 55 -khidr 55 -meheganglobe.com 55 -dubuc 55 -budiardjo 55 -#-state 55 -spit-roasted 55 -asbill 55 -balvino 55 -munyenyembe 55 -balzaretti 55 -ivelin 55 -aproximadamente 55 -pockmark 55 -ulleval 55 -moneycentral 55 -houssine 55 -gagoc 55 -jong-ho 55 -nonlawyer 55 -str\/lp## 55 -ship-to-air 55 -yanakiev 55 -gimbels 55 -concow 55 -moschetti 55 -zonghuai 55 -ottenhoff 55 -no-bake 55 -rutschow-stomporowski 55 -tampabay.com 55 -tzeltal 55 -c-note 55 -vebjoern 55 -fuel-cycle 55 -colombian-owned 55 -rescigno 55 -cropsey 55 -triple-x 55 -palmbeachpost.com/depression 55 -khanjani 55 -tousle-haired 55 -gulled 55 -tullia 55 -bideau 55 -#-and-a-half 55 -waytha 55 -aido 55 -###ci 55 -huels 55 -mychael 55 -madrigali 55 -trygg 55 -ciccolo 55 -kotscho 55 -levinstein 55 -taie 55 -adcb 55 -kry 55 -ethers 55 -cheaptickets.com 55 -lehrmann 55 -dendur 55 -antipodean 55 -#-gao 55 -keret 55 -srilanka-unrest-blast 55 -swamis 55 -horse-breeding 55 -dinsmoor 55 -barysch 55 -junior-level 55 -geode 55 -hoffmann-laroche 55 -nones 55 -wijdan 55 -square-shouldered 55 -white-shirted 55 -coldiron 55 -chartis 55 -kuskokwim 55 -stepfret 55 -gopendra 55 -nefertari 55 -klsx-fm 55 -westhusing 55 -revault 55 -luxar 55 -giancola 55 -gartnerg# 55 -lakemba 55 -organizaciones 55 -camerman 55 -amoussou 55 -last-hour 55 -pastrik 55 -datsakorn 55 -duxford 55 -brown-brick 55 -fortul 55 -zainy 55 -wamidh 55 -hanel 55 -feda 55 -reminyl 55 -thoeni 55 -sportcenter 55 -white-fleshed 55 -parchments 55 -seafrance 55 -nordmark 55 -aristobulo 55 -ligonier 55 -rmf 55 -torchy 55 -butcheries 55 -kurfuerstendamm 55 -http://www.cdc.gov/h#n#flu 55 -isleta 55 -letter-bombs 55 -hadrosaur 55 -nourizadeh 55 -kujawa 55 -memling 55 -mtd 55 -parents-to-be 55 -kinyu 55 -completo 55 -alsatians 55 -itta 55 -baoliu 55 -pravit 55 -siping 55 -korea-eu 55 -fulminate 55 -guandu 55 -yili\/zhao 55 -demott 55 -pakistan-militant 55 -lpu 55 -petrowski 55 -sondashi 55 -lindbom 55 -gatton 55 -dubelier 55 -hsin-hsing 55 -ghazl 55 -chad-unrest 55 -amcc 55 -lemon-yellow 55 -aniek 55 -danshuei 55 -tidmore 55 -chin-up 55 -scarpaci 55 -fresnel 55 -mecum 55 -rosebush 55 -consumer-confidence 55 -vainshtok 55 -kateri 55 -hungiapuko 55 -qibao 55 -camon 55 -ruhlmann 55 -claunch 55 -audible.com 55 -falic 55 -bardales 55 -lithographer 55 -ebrima 55 -szot 55 -ambulance-chasing 55 -watoto 55 -vigan 55 -komineft 55 -miklikova 55 -tbl 55 -egidius 55 -srikkanth 55 -senlin 55 -suizhong 55 -saltpeter 55 -shitreet 55 -laxton 55 -cspc 55 -elisdottir 55 -perrins 55 -uniglory 55 -emmick 55 -skulk 55 -earwax 55 -benaroya 55 -rushwaya 55 -discomforted 55 -ohalete 55 -panigoro 55 -mosquito-born 55 -safe-house 55 -excelerate 55 -mattino 55 -masoudi 55 -sonon 55 -bn.com 55 -ostergaard 55 -kamya 55 -kyeung-ran 55 -kitayama 55 -fu-hsing 55 -clip-art 55 -trillion-won 55 -ascender 55 -spritzing 55 -jonesing 55 -policy-based 55 -flomo 55 -globalist 55 -acamprosate 55 -redwall 55 -ljudmila 55 -korade 55 -lubo 55 -highhandedness 55 -anad 55 -#.#-meter-deep 55 -brownshirts 55 -attaboy 55 -traynham 55 -europeanized 55 -adjustables 55 -lumberyards 55 -hotel-like 55 -elektroprivreda 55 -kirilova 55 -dual-layer 55 -errett 55 -racinos 55 -sigou 55 -spindletop 55 -steidl 55 -hilderbrand 55 -man-of-the 55 -unconditioned 55 -budget-strapped 55 -bodart 55 -danic 55 -plushest 55 -jalaleddin 55 -fna 55 -to-# 55 -burhannudin 55 -godward 55 -greece-fires 55 -transfat 55 -lovelife 55 -myomectomy 55 -delgada 55 -ictv 55 -u.s-mexico 55 -lloreda 55 -moebius 55 -barkow 55 -madox 55 -liang-jen 55 -g+j 55 -cgnpc 55 -punchier 55 -wrsa 55 -rospars 55 -overbid 55 -lokichoggio 55 -fukuura 55 -yuegu 55 -tarkett 55 -snocountry 55 -inui 55 -moved.wits-end-column 55 -partita 55 -overfilling 55 -hectarage 55 -okwiri 55 -#-zheng 55 -eidinger 55 -tengan 55 -kouris 55 -cbuchholz 55 -hafnarfjordur 55 -owasi 55 -iza 55 -spungen 55 -tepix 55 -sial 55 -keepin 55 -paint-by-number 55 -jeanene 55 -natural-food 55 -korniyenko 55 -hartadi 55 -ananova 55 -trend-setters 55 -nohilly 55 -pasteurize 55 -www.blogs.tampabay.com/food 55 -advertising-driven 55 -nalumango 55 -ruxton 55 -jalrez 55 -miyar 55 -dmelvin@coxnews.com 55 -overvalue 55 -slane 55 -hankerson 55 -harpswell 55 -wassell 55 -bishan 55 -fried-chicken 55 -galvanic 55 -ylli 55 -cusip 55 -dogeared 55 -goofy-looking 55 -rhys-meyers 55 -lined-up 55 -heeren 55 -hyslop 55 -yanovsky 55 -www.aa.com 55 -seagren 55 -wiklund 55 -art-making 55 -non-lawyer 55 -saurashtra 55 -michoacana 55 -##-###-## 55 -shabnam 55 -roadworker 55 -shetler 55 -quistelli 55 -jacksonville-based 55 -galekovic 55 -badruddin 55 -voorhis 55 -three-song 55 -geordies 55 -decane 55 -kilrea 55 -epfl 55 -gentamicin 55 -chewed-up 55 -severgazprom 55 -bolona 55 -chandos 55 -rafflesia 55 -mercutio 55 -pipebomb 55 -ruthann 55 -makhenkesi 55 -sweatband 55 -built-ins 55 -donata 55 -ramonet 55 -meehl 55 -curico 55 -alibux 55 -commentary\/oped 55 -slifkin 55 -bason 55 -lawsky 55 -sabal 55 -isaksen 55 -cheng-kung 55 -labruno 55 -musics 55 -virgets 55 -qidwa 55 -export-heavy 55 -out-compete 55 -nettuno 55 -ruhanie 55 -shailendra 55 -ogallaga 55 -time-being 55 -mashego 55 -bangladesh-based 55 -####b 55 -####g 55 -school-prayer 55 -necked 55 -luzern 55 -wasbir 55 -pro-royalist 55 -montelongo 55 -sudan-darfur-un 55 -internationally-sponsored 55 -buyenzi 55 -b&l 55 -paragliders 55 -producer\/director 55 -saalbach 55 -dillons 55 -subleasing 55 -unifrance 55 -fleeces 55 -chu-huan 55 -kanchi 55 -bihan 55 -agrama 55 -white-sided 55 -hansack 55 -jin-woo 55 -hileman 55 -handwoven 55 -stouter 55 -leaf-shaped 55 -osby 55 -stamen 55 -pleasurably 55 -chimeras 55 -film-related 55 -jotspot 55 -baitzel 55 -palmar 55 -ilegal 55 -hiaa 55 -eeeee 55 -alcobendas 55 -prewett 55 -knkt 55 -berkous 55 -woe-is-me 55 -krinkie 55 -magdelena 55 -post-hussein 55 -hardline-controlled 55 -narrow-bodied 55 -aqueous 55 -burukina 55 -nozoe 55 -awartani 55 -kalapani 55 -guenot 55 -tuipulotu 55 -iovino 55 -vosper 55 -##-flavia 55 -rrodriguez 55 -tranh 55 -d'amuro 55 -weert 55 -brosh 55 -europewide 55 -kovals 55 -pie-shaped 55 -nahda 55 -siboni 55 -vedenkin 55 -squillacote 55 -gittes 55 -jaakkola 55 -khattabi 55 -vatican-affiliated 55 -fewer-than-expected 55 -subschinski 55 -sherawat 55 -galadari 55 -salvors 55 -virdi 55 -jayasundara 55 -giraudet 55 -longjing 55 -thum 55 -ahorros 55 -pro-franco 55 -plutonium-making 55 -environmental-protection 55 -escuredo 55 -zhiping 55 -worktable 55 -sediq 55 -tree-hugger 55 -sahlman 55 -fadeout 55 -zizhou 55 -martes 55 -u.s.-arranged 55 -kymco 55 -nxc# 55 -kopin 55 -astbury 55 -marzan 55 -dongfanghong 55 -montecore 55 -cpap 55 -yamase 55 -assiri 55 -mbandjock 55 -plutocrat 55 -###-billion-baht 55 -unmerciful 55 -british-chinese 55 -re-gifting 55 -sexually-explicit 55 -switch-off 55 -svan 55 -abeywardene 55 -ansin 55 -cold-shoulder 55 -terron 55 -lizard-like 55 -aquarian 55 -solove 55 -water-carrying 55 -celebrityhood 55 -foreign-produced 55 -ganascia 55 -nyepi 55 -popmart 55 -ustream 55 -rosalio 55 -outdrawing 55 -vladimirovna 55 -gbissau 55 -ersoy 55 -golovlyov 55 -biserko 55 -glicken 55 -fbl-esp-cup 55 -briskman 55 -solvberg 55 -wehr-hasler 55 -yokel 55 -koplovitz 55 -likeminded 55 -berkey 55 -infrasound 55 -al-adil 55 -quinley 55 -boym 55 -boxier 55 -tutuila 55 -stair-climbing 55 -vike 55 -a.f.m. 55 -belgrad 55 -nucleaire 55 -makubuya 55 -silverchair 55 -prawiro 55 -kluzak 55 -decepticons 55 -chappy 55 -eeriest 55 -paycut 55 -pinit 55 -tech-dominant 55 -charbonnet 55 -mid-innings 55 -watermarking 55 -unshelled 55 -bpa-free 55 -faughnan 55 -vanins 55 -sirena 55 -militello 55 -officier 55 -fras 55 -marudai 55 -hypnotherapy 55 -pasada 55 -laser-cut 55 -fuji-servetto 55 -butar 55 -vajna 55 -miguez 55 -cseries 55 -schlow 55 -hamkyong 55 -top-hatted 55 -midf 55 -union-mandated 55 -hanssens 55 -exhorbitant 55 -most-famous 55 -ruesch 55 -dexedrine 55 -baracoa 55 -maricela 55 -appropriator 55 -bronis 55 -vachagayev 55 -maseratis 55 -growth-stock 55 -westerngeco 55 -fuel-injection 55 -jpletz 55 -edun 55 -pinmanee 55 -swarts 55 -gauntlets 55 -bovelander 55 -azpurua 55 -honka 55 -edge-of-your-seat 55 -mcspaden 55 -aacc 55 -underripe 55 -helipads 55 -intra-shiite 55 -corazzin 55 -copperheads 55 -mumuni 55 -hruby 55 -d'alba 55 -goodland 55 -verrill 55 -hitoki 55 -forswore 55 -griddles 55 -spintronics 55 -sunncomm 55 -tongsalee 55 -anti-roll 55 -drogoul 55 -ecologic 55 -two-states 55 -acto 55 -coral-colored 55 -juliusz 55 -traesch 55 -ulaanbaatar 55 -##-grade 55 -scorelines 55 -rainswept 55 -mandisi 55 -dually 55 -wiant 55 -essig 55 -zambeze 55 -ortigas 55 -crewson 55 -assurer 55 -yellow-legged 55 -pirouetting 55 -gotschall 55 -injury-forced 55 -chemins 55 -aerator 55 -musalo 55 -kaijuka 55 -radivoje 55 -rennet 55 -half-seriously 55 -bulwer-lytton 55 -ashrafiyeh 55 -smaller-market 55 -mukarram 55 -one-and-only 55 -bojkov 55 -mosab 55 -remmel 55 -capellini 55 -lemel 55 -marostica 55 -gurfein 55 -kilicdaroglu 55 -config 55 -xxxend 55 -well-chilled 55 -meatiest 55 -###,###-rupee 55 -infinitis 55 -fiddlehead 55 -simpanan 55 -bat-winged 55 -banyoles 55 -neistat 55 -ragas 55 -philologist 55 -autostick 55 -nessler 55 -##-people 55 -tr#s 55 -tent-pole 55 -algarabawi 55 -prakarn 55 -ts### 55 -pierre-henry 55 -dumbfounding 55 -http://www.chinapntr.gov 55 -bnu 55 -step-grandfather 55 -mcnamaraglobe.com 55 -mlm 55 -pepitone 55 -americium 55 -phased-out 55 -mitchellville 55 -india-weather 55 -ilion 55 -sapan 55 -land-to-air 55 -aerators 55 -bacho 55 -gerwig 55 -richart 55 -dethrones 55 -runcorn 55 -hinzpeter 55 -spoon-feeding 55 -krey 55 -artform 55 -kva 55 -macmullanglobe.com 55 -nonde 55 -hispanico 55 -danke 55 -sevene 55 -arqam 55 -mbatista 55 -rphilpotstar-telegram 55 -lauridsen 55 -estrogen-only 55 -clow 55 -ki-### 55 -foucher 55 -zetec 55 -autograph-signing 55 -africanus 55 -ex-israeli 55 -psoriatic 55 -hualing 55 -unibet.com 55 -khoshchehreh 55 -matanog 55 -peace\/def 55 -rashid-merem 55 -kostyuk 55 -niantic 55 -dressen 55 -neinas 55 -in-process 55 -kazeem 55 -pude 55 -arria 55 -funing 55 -self-destructiveness 55 -shergold 55 -batiuk 55 -petrosa 55 -staser 55 -seebaran 55 -nesmachny 55 -#rd\/tv 55 -altona 55 -macarounas 55 -urad 55 -arenes 55 -zinovy 55 -javelins 55 -carpet-cleaning 55 -al-gumhuriya 55 -mogelonsky 55 -tattle 55 -afaq 55 -balie 55 -jayesh 55 -mccawley 55 -manganaro 55 -breitsprecher 55 -charice 55 -abdiqassim 55 -tantaquidgeon 55 -yung-san 55 -meirav 55 -###km\/h 55 -anouma 55 -el-ayoun 55 -wherehouse 55 -alloudi 55 -moshtarak 55 -ktvt 55 -gholikhan 55 -gogele 55 -mithoff 55 -intourist 55 -x-milwaukee 55 -never-used 55 -#,###-square-kilometre 55 -darkman 55 -moulvibazar 55 -reher 55 -sewa 55 -krulwich 55 -kajiwara 55 -tunmore 55 -#-gong 55 -guille 55 -wrong-doers 55 -cirone 55 -adinolfi 55 -kil-seung 55 -charge-card 55 -tumpel-gugerell 55 -guliev 55 -glenmore 55 -flagcarrier 55 -zakum 55 -centromin 55 -earnings-driven 55 -pavoni 55 -machination 55 -flyertalk 55 -all-too-real 55 -maidenhead 55 -celtic\/sco 55 -rawod 55 -co-world 55 -sekhar 55 -gendel 55 -szymanowski 55 -domer 55 -tailandia 55 -three-panel 55 -vereker 55 -kuchinsky 55 -yongqiang 55 -fremont-based 55 -imouraren 55 -kotschau 55 -gebre-egziabher 55 -consolatory 55 -already-fragile 55 -mabini 55 -forehander 55 -whomping 55 -test-drove 55 -opensocial 55 -bacteria-killing 55 -kanko 55 -yakovleva 55 -bergert 55 -munari 55 -soccer-only 55 -maniatis 55 -helgesson 55 -nippert 55 -magnee 55 -hamisah 55 -non-war 55 -klaasen 55 -rubai 55 -saint-lazare 55 -shafir 55 -jianlin 55 -koppers 55 -lamberton 55 -metta 55 -algoma 55 -kessar 55 -kozue 55 -swiss-registered 55 -moderate-led 55 -desarrollar 55 -adenoids 55 -oberhofen 55 -tregubova 55 -##-cents 55 -adesina 55 -nedo 55 -trunzo 55 -nasaw 55 -vinters 55 -avx 55 -zanna 55 -suspender 55 -glutting 55 -objet 55 -medicina 55 -latka 55 -elegante 55 -kilner 55 -government-certified 55 -rc-# 55 -al-thawadi 55 -flip-out 55 -polish-language 55 -stationmaster 55 -fire-bombs 55 -seemo 55 -practical-minded 55 -tank-top 55 -hodari 55 -saj 55 -ger\/sae 55 -spinbaldak 55 -bikker 55 -volpenhein 55 -rom-com 55 -funches 55 -haacke 55 -al-hajiri 55 -tc-gb 55 -latium 55 -bhansali 55 -sterols 55 -mulliken 55 -plutarco 55 -voorsanger 55 -thomas-keprta 55 -bieksa 55 -loikaw 55 -matsepe-casaburri 55 -warty 55 -chen-wei 55 -www.mcdonalds.com 55 -hondora 55 -m.t.b. 55 -off-grid 55 -home-design 55 -lipovsky 55 -bergsson 55 -kii 55 -strakhov 55 -songphon 55 -zahab 55 -allauddin 55 -well-tolerated 55 -scolese 55 -cucbm 55 -drayson 55 -eiken 55 -hfi 55 -zoia 55 -edgecomb 55 -saaremaa 55 -annisu-r 55 -allaf 55 -eye-view 55 -chambermaids 55 -hirosawa 55 -vladivostock 55 -trutv 55 -orrorin 55 -wistron 55 -kik 55 -pretentions 55 -sciullo 55 -handwriting-recognition 55 -majles 55 -motoshima 55 -tanginye 55 -hellawell 55 -extractable 55 -end-year 55 -soll 55 -caronna 55 -vampiric 55 -xigui 55 -hit-and-runs 55 -duba-yurt 55 -saur 55 -koltsov 55 -blue-helmet 55 -hyuga 55 -kifri 55 -najja 55 -wible 55 -matloha 55 -idrissi 55 -tamarindo 55 -just-the-facts 55 -bulaong 55 -meddein 55 -klinefelter 55 -satz 55 -ushpizin 55 -overnighting 55 -handbrake 55 -dalley 55 -prampero 55 -hss 55 -nonhybrid 55 -mutebusi 55 -jingqian 55 -cryptology 55 -anupama 55 -yakshina 55 -eeas 55 -pottengal 55 -right-to-carry 55 -shampooed 55 -sonejee 55 -ozyurek 55 -kinrara 55 -munyua 55 -traboulsi 55 -contar 55 -lerone 55 -roundtrips 55 -chelanga 55 -limited-field 55 -wenming 55 -kreisberg 55 -bertino 55 -zhongxiao 55 -acsi 55 -levance 55 -eocene 55 -#-feliciano 55 -tarantella 55 -jaehnig 55 -al-awadhi 55 -anti-socialist 55 -hornak 55 -darkhovin 55 -perimetre 55 -abramian 55 -caricola 55 -anti-hungarian 55 -maneiro 55 -size-# 55 -farglory 55 -petermann 55 -meiwa 55 -phelpses 55 -porgras 55 -samiya 55 -texoma 55 -huffines 55 -third-placer 55 -al-badran 55 -babyhood 55 -seamounts 55 -aasen 55 -casulties 55 -bodong 55 -shamar 55 -destablising 55 -curado 55 -shangai 55 -svedka 55 - 83845866 - 83845866 - 5 diff --git a/research/textsum/data_convert_example.py b/research/textsum/data_convert_example.py deleted file mode 100644 index 9328936dd8a4ee9d47372f0c93f8b0ae1a9787df..0000000000000000000000000000000000000000 --- a/research/textsum/data_convert_example.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Example of Converting TextSum model data. -Usage: -python data_convert_example.py --command binary_to_text --in_file data/data --out_file data/text_data -python data_convert_example.py --command text_to_binary --in_file data/text_data --out_file data/binary_data -python data_convert_example.py --command binary_to_text --in_file data/binary_data --out_file data/text_data2 -diff data/text_data2 data/text_data -""" - -import struct -import sys - -import tensorflow as tf -from tensorflow.core.example import example_pb2 - -FLAGS = tf.app.flags.FLAGS -tf.app.flags.DEFINE_string('command', 'binary_to_text', - 'Either binary_to_text or text_to_binary.' - 'Specify FLAGS.in_file accordingly.') -tf.app.flags.DEFINE_string('in_file', '', 'path to file') -tf.app.flags.DEFINE_string('out_file', '', 'path to file') - -def _binary_to_text(): - reader = open(FLAGS.in_file, 'rb') - writer = open(FLAGS.out_file, 'w') - while True: - len_bytes = reader.read(8) - if not len_bytes: - sys.stderr.write('Done reading\n') - return - str_len = struct.unpack('q', len_bytes)[0] - tf_example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0] - tf_example = example_pb2.Example.FromString(tf_example_str) - examples = [] - for key in tf_example.features.feature: - examples.append('%s=%s' % (key, tf_example.features.feature[key].bytes_list.value[0])) - writer.write('%s\n' % '\t'.join(examples)) - reader.close() - writer.close() - - -def _text_to_binary(): - inputs = open(FLAGS.in_file, 'r').readlines() - writer = open(FLAGS.out_file, 'wb') - for inp in inputs: - tf_example = example_pb2.Example() - for feature in inp.strip().split('\t'): - (k, v) = feature.split('=') - tf_example.features.feature[k].bytes_list.value.extend([v]) - tf_example_str = tf_example.SerializeToString() - str_len = len(tf_example_str) - writer.write(struct.pack('q', str_len)) - writer.write(struct.pack('%ds' % str_len, tf_example_str)) - writer.close() - - -def main(unused_argv): - assert FLAGS.command and FLAGS.in_file and FLAGS.out_file - if FLAGS.command == 'binary_to_text': - _binary_to_text() - elif FLAGS.command == 'text_to_binary': - _text_to_binary() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/textsum/seq2seq_attention.py b/research/textsum/seq2seq_attention.py deleted file mode 100644 index 33d1b4fed07f65270b4550051bb90411b45c736f..0000000000000000000000000000000000000000 --- a/research/textsum/seq2seq_attention.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Trains a seq2seq model. - -WORK IN PROGRESS. - -Implement "Abstractive Text Summarization using Sequence-to-sequence RNNS and -Beyond." - -""" -import sys -import time - -import tensorflow as tf -import batch_reader -import data -import seq2seq_attention_decode -import seq2seq_attention_model - -FLAGS = tf.app.flags.FLAGS -tf.app.flags.DEFINE_string('data_path', - '', 'Path expression to tf.Example.') -tf.app.flags.DEFINE_string('vocab_path', - '', 'Path expression to text vocabulary file.') -tf.app.flags.DEFINE_string('article_key', 'article', - 'tf.Example feature key for article.') -tf.app.flags.DEFINE_string('abstract_key', 'headline', - 'tf.Example feature key for abstract.') -tf.app.flags.DEFINE_string('log_root', '', 'Directory for model root.') -tf.app.flags.DEFINE_string('train_dir', '', 'Directory for train.') -tf.app.flags.DEFINE_string('eval_dir', '', 'Directory for eval.') -tf.app.flags.DEFINE_string('decode_dir', '', 'Directory for decode summaries.') -tf.app.flags.DEFINE_string('mode', 'train', 'train/eval/decode mode') -tf.app.flags.DEFINE_integer('max_run_steps', 10000000, - 'Maximum number of run steps.') -tf.app.flags.DEFINE_integer('max_article_sentences', 2, - 'Max number of first sentences to use from the ' - 'article') -tf.app.flags.DEFINE_integer('max_abstract_sentences', 100, - 'Max number of first sentences to use from the ' - 'abstract') -tf.app.flags.DEFINE_integer('beam_size', 4, - 'beam size for beam search decoding.') -tf.app.flags.DEFINE_integer('eval_interval_secs', 60, 'How often to run eval.') -tf.app.flags.DEFINE_integer('checkpoint_secs', 60, 'How often to checkpoint.') -tf.app.flags.DEFINE_bool('use_bucketing', False, - 'Whether bucket articles of similar length.') -tf.app.flags.DEFINE_bool('truncate_input', False, - 'Truncate inputs that are too long. If False, ' - 'examples that are too long are discarded.') -tf.app.flags.DEFINE_integer('num_gpus', 0, 'Number of gpus used.') -tf.app.flags.DEFINE_integer('random_seed', 111, 'A seed value for randomness.') - - -def _RunningAvgLoss(loss, running_avg_loss, summary_writer, step, decay=0.999): - """Calculate the running average of losses.""" - if running_avg_loss == 0: - running_avg_loss = loss - else: - running_avg_loss = running_avg_loss * decay + (1 - decay) * loss - running_avg_loss = min(running_avg_loss, 12) - loss_sum = tf.Summary() - loss_sum.value.add(tag='running_avg_loss', simple_value=running_avg_loss) - summary_writer.add_summary(loss_sum, step) - sys.stdout.write('running_avg_loss: %f\n' % running_avg_loss) - return running_avg_loss - - -def _Train(model, data_batcher): - """Runs model training.""" - with tf.device('/cpu:0'): - model.build_graph() - saver = tf.train.Saver() - # Train dir is different from log_root to avoid summary directory - # conflict with Supervisor. - summary_writer = tf.summary.FileWriter(FLAGS.train_dir) - sv = tf.train.Supervisor(logdir=FLAGS.log_root, - is_chief=True, - saver=saver, - summary_op=None, - save_summaries_secs=60, - save_model_secs=FLAGS.checkpoint_secs, - global_step=model.global_step) - sess = sv.prepare_or_wait_for_session(config=tf.ConfigProto( - allow_soft_placement=True)) - running_avg_loss = 0 - step = 0 - while not sv.should_stop() and step < FLAGS.max_run_steps: - (article_batch, abstract_batch, targets, article_lens, abstract_lens, - loss_weights, _, _) = data_batcher.NextBatch() - (_, summaries, loss, train_step) = model.run_train_step( - sess, article_batch, abstract_batch, targets, article_lens, - abstract_lens, loss_weights) - - summary_writer.add_summary(summaries, train_step) - running_avg_loss = _RunningAvgLoss( - running_avg_loss, loss, summary_writer, train_step) - step += 1 - if step % 100 == 0: - summary_writer.flush() - sv.Stop() - return running_avg_loss - - -def _Eval(model, data_batcher, vocab=None): - """Runs model eval.""" - model.build_graph() - saver = tf.train.Saver() - summary_writer = tf.summary.FileWriter(FLAGS.eval_dir) - sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) - running_avg_loss = 0 - step = 0 - while True: - time.sleep(FLAGS.eval_interval_secs) - try: - ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) - except tf.errors.OutOfRangeError as e: - tf.logging.error('Cannot restore checkpoint: %s', e) - continue - - if not (ckpt_state and ckpt_state.model_checkpoint_path): - tf.logging.info('No model to eval yet at %s', FLAGS.train_dir) - continue - - tf.logging.info('Loading checkpoint %s', ckpt_state.model_checkpoint_path) - saver.restore(sess, ckpt_state.model_checkpoint_path) - - (article_batch, abstract_batch, targets, article_lens, abstract_lens, - loss_weights, _, _) = data_batcher.NextBatch() - (summaries, loss, train_step) = model.run_eval_step( - sess, article_batch, abstract_batch, targets, article_lens, - abstract_lens, loss_weights) - tf.logging.info( - 'article: %s', - ' '.join(data.Ids2Words(article_batch[0][:].tolist(), vocab))) - tf.logging.info( - 'abstract: %s', - ' '.join(data.Ids2Words(abstract_batch[0][:].tolist(), vocab))) - - summary_writer.add_summary(summaries, train_step) - running_avg_loss = _RunningAvgLoss( - running_avg_loss, loss, summary_writer, train_step) - if step % 100 == 0: - summary_writer.flush() - - -def main(unused_argv): - vocab = data.Vocab(FLAGS.vocab_path, 1000000) - # Check for presence of required special tokens. - assert vocab.CheckVocab(data.PAD_TOKEN) > 0 - assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 - assert vocab.CheckVocab(data.SENTENCE_START) > 0 - assert vocab.CheckVocab(data.SENTENCE_END) > 0 - - batch_size = 4 - if FLAGS.mode == 'decode': - batch_size = FLAGS.beam_size - - hps = seq2seq_attention_model.HParams( - mode=FLAGS.mode, # train, eval, decode - min_lr=0.01, # min learning rate. - lr=0.15, # learning rate - batch_size=batch_size, - enc_layers=4, - enc_timesteps=120, - dec_timesteps=30, - min_input_len=2, # discard articles/summaries < than this - num_hidden=256, # for rnn cell - emb_dim=128, # If 0, don't use embedding - max_grad_norm=2, - num_softmax_samples=4096) # If 0, no sampled softmax. - - batcher = batch_reader.Batcher( - FLAGS.data_path, vocab, hps, FLAGS.article_key, - FLAGS.abstract_key, FLAGS.max_article_sentences, - FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, - truncate_input=FLAGS.truncate_input) - tf.set_random_seed(FLAGS.random_seed) - - if hps.mode == 'train': - model = seq2seq_attention_model.Seq2SeqAttentionModel( - hps, vocab, num_gpus=FLAGS.num_gpus) - _Train(model, batcher) - elif hps.mode == 'eval': - model = seq2seq_attention_model.Seq2SeqAttentionModel( - hps, vocab, num_gpus=FLAGS.num_gpus) - _Eval(model, batcher, vocab=vocab) - elif hps.mode == 'decode': - decode_mdl_hps = hps - # Only need to restore the 1st step and reuse it since - # we keep and feed in state for each step's output. - decode_mdl_hps = hps._replace(dec_timesteps=1) - model = seq2seq_attention_model.Seq2SeqAttentionModel( - decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) - decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) - decoder.DecodeLoop() - - -if __name__ == '__main__': - tf.app.run() diff --git a/research/textsum/seq2seq_attention_decode.py b/research/textsum/seq2seq_attention_decode.py deleted file mode 100644 index 54b56919439e92b5ca86aa2f5e0d6fbc909fb8d6..0000000000000000000000000000000000000000 --- a/research/textsum/seq2seq_attention_decode.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Module for decoding.""" - -import os -import time - -import beam_search -import data -from six.moves import xrange -import tensorflow as tf - -FLAGS = tf.app.flags.FLAGS -tf.app.flags.DEFINE_integer('max_decode_steps', 1000000, - 'Number of decoding steps.') -tf.app.flags.DEFINE_integer('decode_batches_per_ckpt', 8000, - 'Number of batches to decode before restoring next ' - 'checkpoint') - -DECODE_LOOP_DELAY_SECS = 60 -DECODE_IO_FLUSH_INTERVAL = 100 - - -class DecodeIO(object): - """Writes the decoded and references to RKV files for Rouge score. - - See nlp/common/utils/internal/rkv_parser.py for detail about rkv file. - """ - - def __init__(self, outdir): - self._cnt = 0 - self._outdir = outdir - if not os.path.exists(self._outdir): - os.mkdir(self._outdir) - self._ref_file = None - self._decode_file = None - - def Write(self, reference, decode): - """Writes the reference and decoded outputs to RKV files. - - Args: - reference: The human (correct) result. - decode: The machine-generated result - """ - self._ref_file.write('output=%s\n' % reference) - self._decode_file.write('output=%s\n' % decode) - self._cnt += 1 - if self._cnt % DECODE_IO_FLUSH_INTERVAL == 0: - self._ref_file.flush() - self._decode_file.flush() - - def ResetFiles(self): - """Resets the output files. Must be called once before Write().""" - if self._ref_file: self._ref_file.close() - if self._decode_file: self._decode_file.close() - timestamp = int(time.time()) - self._ref_file = open( - os.path.join(self._outdir, 'ref%d'%timestamp), 'w') - self._decode_file = open( - os.path.join(self._outdir, 'decode%d'%timestamp), 'w') - - -class BSDecoder(object): - """Beam search decoder.""" - - def __init__(self, model, batch_reader, hps, vocab): - """Beam search decoding. - - Args: - model: The seq2seq attentional model. - batch_reader: The batch data reader. - hps: Hyperparamters. - vocab: Vocabulary - """ - self._model = model - self._model.build_graph() - self._batch_reader = batch_reader - self._hps = hps - self._vocab = vocab - self._saver = tf.train.Saver() - self._decode_io = DecodeIO(FLAGS.decode_dir) - - def DecodeLoop(self): - """Decoding loop for long running process.""" - sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) - step = 0 - while step < FLAGS.max_decode_steps: - time.sleep(DECODE_LOOP_DELAY_SECS) - if not self._Decode(self._saver, sess): - continue - step += 1 - - def _Decode(self, saver, sess): - """Restore a checkpoint and decode it. - - Args: - saver: Tensorflow checkpoint saver. - sess: Tensorflow session. - Returns: - If success, returns true, otherwise, false. - """ - ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root) - if not (ckpt_state and ckpt_state.model_checkpoint_path): - tf.logging.info('No model to decode yet at %s', FLAGS.log_root) - return False - - tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path) - ckpt_path = os.path.join( - FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path)) - tf.logging.info('renamed checkpoint path %s', ckpt_path) - saver.restore(sess, ckpt_path) - - self._decode_io.ResetFiles() - for _ in xrange(FLAGS.decode_batches_per_ckpt): - (article_batch, _, _, article_lens, _, _, origin_articles, - origin_abstracts) = self._batch_reader.NextBatch() - for i in xrange(self._hps.batch_size): - bs = beam_search.BeamSearch( - self._model, self._hps.batch_size, - self._vocab.WordToId(data.SENTENCE_START), - self._vocab.WordToId(data.SENTENCE_END), - self._hps.dec_timesteps) - - article_batch_cp = article_batch.copy() - article_batch_cp[:] = article_batch[i:i+1] - article_lens_cp = article_lens.copy() - article_lens_cp[:] = article_lens[i:i+1] - best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0] - decode_output = [int(t) for t in best_beam.tokens[1:]] - self._DecodeBatch( - origin_articles[i], origin_abstracts[i], decode_output) - return True - - def _DecodeBatch(self, article, abstract, output_ids): - """Convert id to words and writing results. - - Args: - article: The original article string. - abstract: The human (correct) abstract string. - output_ids: The abstract word ids output by machine. - """ - decoded_output = ' '.join(data.Ids2Words(output_ids, self._vocab)) - end_p = decoded_output.find(data.SENTENCE_END, 0) - if end_p != -1: - decoded_output = decoded_output[:end_p] - tf.logging.info('article: %s', article) - tf.logging.info('abstract: %s', abstract) - tf.logging.info('decoded: %s', decoded_output) - self._decode_io.Write(abstract, decoded_output.strip()) diff --git a/research/textsum/seq2seq_attention_model.py b/research/textsum/seq2seq_attention_model.py deleted file mode 100644 index 618d72fa28c920aa916c73575d716093d6d00499..0000000000000000000000000000000000000000 --- a/research/textsum/seq2seq_attention_model.py +++ /dev/null @@ -1,300 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Sequence-to-Sequence with attention model for text summarization. -""" -from collections import namedtuple - -import numpy as np -import seq2seq_lib -from six.moves import xrange -import tensorflow as tf - -HParams = namedtuple('HParams', - 'mode, min_lr, lr, batch_size, ' - 'enc_layers, enc_timesteps, dec_timesteps, ' - 'min_input_len, num_hidden, emb_dim, max_grad_norm, ' - 'num_softmax_samples') - - -def _extract_argmax_and_embed(embedding, output_projection=None, - update_embedding=True): - """Get a loop_function that extracts the previous symbol and embeds it. - - Args: - embedding: embedding tensor for symbols. - output_projection: None or a pair (W, B). If provided, each fed previous - output will first be multiplied by W and added B. - update_embedding: Boolean; if False, the gradients will not propagate - through the embeddings. - - Returns: - A loop function. - """ - def loop_function(prev, _): - """function that feed previous model output rather than ground truth.""" - if output_projection is not None: - prev = tf.nn.xw_plus_b( - prev, output_projection[0], output_projection[1]) - prev_symbol = tf.argmax(prev, 1) - # Note that gradients will not propagate through the second parameter of - # embedding_lookup. - emb_prev = tf.nn.embedding_lookup(embedding, prev_symbol) - if not update_embedding: - emb_prev = tf.stop_gradient(emb_prev) - return emb_prev - return loop_function - - -class Seq2SeqAttentionModel(object): - """Wrapper for Tensorflow model graph for text sum vectors.""" - - def __init__(self, hps, vocab, num_gpus=0): - self._hps = hps - self._vocab = vocab - self._num_gpus = num_gpus - self._cur_gpu = 0 - - def run_train_step(self, sess, article_batch, abstract_batch, targets, - article_lens, abstract_lens, loss_weights): - to_return = [self._train_op, self._summaries, self._loss, self.global_step] - return sess.run(to_return, - feed_dict={self._articles: article_batch, - self._abstracts: abstract_batch, - self._targets: targets, - self._article_lens: article_lens, - self._abstract_lens: abstract_lens, - self._loss_weights: loss_weights}) - - def run_eval_step(self, sess, article_batch, abstract_batch, targets, - article_lens, abstract_lens, loss_weights): - to_return = [self._summaries, self._loss, self.global_step] - return sess.run(to_return, - feed_dict={self._articles: article_batch, - self._abstracts: abstract_batch, - self._targets: targets, - self._article_lens: article_lens, - self._abstract_lens: abstract_lens, - self._loss_weights: loss_weights}) - - def run_decode_step(self, sess, article_batch, abstract_batch, targets, - article_lens, abstract_lens, loss_weights): - to_return = [self._outputs, self.global_step] - return sess.run(to_return, - feed_dict={self._articles: article_batch, - self._abstracts: abstract_batch, - self._targets: targets, - self._article_lens: article_lens, - self._abstract_lens: abstract_lens, - self._loss_weights: loss_weights}) - - def _next_device(self): - """Round robin the gpu device. (Reserve last gpu for expensive op).""" - if self._num_gpus == 0: - return '' - dev = '/gpu:%d' % self._cur_gpu - if self._num_gpus > 1: - self._cur_gpu = (self._cur_gpu + 1) % (self._num_gpus-1) - return dev - - def _get_gpu(self, gpu_id): - if self._num_gpus <= 0 or gpu_id >= self._num_gpus: - return '' - return '/gpu:%d' % gpu_id - - def _add_placeholders(self): - """Inputs to be fed to the graph.""" - hps = self._hps - self._articles = tf.placeholder(tf.int32, - [hps.batch_size, hps.enc_timesteps], - name='articles') - self._abstracts = tf.placeholder(tf.int32, - [hps.batch_size, hps.dec_timesteps], - name='abstracts') - self._targets = tf.placeholder(tf.int32, - [hps.batch_size, hps.dec_timesteps], - name='targets') - self._article_lens = tf.placeholder(tf.int32, [hps.batch_size], - name='article_lens') - self._abstract_lens = tf.placeholder(tf.int32, [hps.batch_size], - name='abstract_lens') - self._loss_weights = tf.placeholder(tf.float32, - [hps.batch_size, hps.dec_timesteps], - name='loss_weights') - - def _add_seq2seq(self): - hps = self._hps - vsize = self._vocab.NumIds() - - with tf.variable_scope('seq2seq'): - encoder_inputs = tf.unstack(tf.transpose(self._articles)) - decoder_inputs = tf.unstack(tf.transpose(self._abstracts)) - targets = tf.unstack(tf.transpose(self._targets)) - loss_weights = tf.unstack(tf.transpose(self._loss_weights)) - article_lens = self._article_lens - - # Embedding shared by the input and outputs. - with tf.variable_scope('embedding'), tf.device('/cpu:0'): - embedding = tf.get_variable( - 'embedding', [vsize, hps.emb_dim], dtype=tf.float32, - initializer=tf.truncated_normal_initializer(stddev=1e-4)) - emb_encoder_inputs = [tf.nn.embedding_lookup(embedding, x) - for x in encoder_inputs] - emb_decoder_inputs = [tf.nn.embedding_lookup(embedding, x) - for x in decoder_inputs] - - for layer_i in xrange(hps.enc_layers): - with tf.variable_scope('encoder%d'%layer_i), tf.device( - self._next_device()): - cell_fw = tf.contrib.rnn.LSTMCell( - hps.num_hidden, - initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123), - state_is_tuple=False) - cell_bw = tf.contrib.rnn.LSTMCell( - hps.num_hidden, - initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113), - state_is_tuple=False) - (emb_encoder_inputs, fw_state, _) = tf.contrib.rnn.static_bidirectional_rnn( - cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32, - sequence_length=article_lens) - encoder_outputs = emb_encoder_inputs - - with tf.variable_scope('output_projection'): - w = tf.get_variable( - 'w', [hps.num_hidden, vsize], dtype=tf.float32, - initializer=tf.truncated_normal_initializer(stddev=1e-4)) - w_t = tf.transpose(w) - v = tf.get_variable( - 'v', [vsize], dtype=tf.float32, - initializer=tf.truncated_normal_initializer(stddev=1e-4)) - - with tf.variable_scope('decoder'), tf.device(self._next_device()): - # When decoding, use model output from the previous step - # for the next step. - loop_function = None - if hps.mode == 'decode': - loop_function = _extract_argmax_and_embed( - embedding, (w, v), update_embedding=False) - - cell = tf.contrib.rnn.LSTMCell( - hps.num_hidden, - initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113), - state_is_tuple=False) - - encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, 2*hps.num_hidden]) - for x in encoder_outputs] - self._enc_top_states = tf.concat(axis=1, values=encoder_outputs) - self._dec_in_state = fw_state - # During decoding, follow up _dec_in_state are fed from beam_search. - # dec_out_state are stored by beam_search for next step feeding. - initial_state_attention = (hps.mode == 'decode') - decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder( - emb_decoder_inputs, self._dec_in_state, self._enc_top_states, - cell, num_heads=1, loop_function=loop_function, - initial_state_attention=initial_state_attention) - - with tf.variable_scope('output'), tf.device(self._next_device()): - model_outputs = [] - for i in xrange(len(decoder_outputs)): - if i > 0: - tf.get_variable_scope().reuse_variables() - model_outputs.append( - tf.nn.xw_plus_b(decoder_outputs[i], w, v)) - - if hps.mode == 'decode': - with tf.variable_scope('decode_output'), tf.device('/cpu:0'): - best_outputs = [tf.argmax(x, 1) for x in model_outputs] - tf.logging.info('best_outputs%s', best_outputs[0].get_shape()) - self._outputs = tf.concat( - axis=1, values=[tf.reshape(x, [hps.batch_size, 1]) for x in best_outputs]) - - self._topk_log_probs, self._topk_ids = tf.nn.top_k( - tf.log(tf.nn.softmax(model_outputs[-1])), hps.batch_size*2) - - with tf.variable_scope('loss'), tf.device(self._next_device()): - def sampled_loss_func(inputs, labels): - with tf.device('/cpu:0'): # Try gpu. - labels = tf.reshape(labels, [-1, 1]) - return tf.nn.sampled_softmax_loss( - weights=w_t, biases=v, labels=labels, inputs=inputs, - num_sampled=hps.num_softmax_samples, num_classes=vsize) - - if hps.num_softmax_samples != 0 and hps.mode == 'train': - self._loss = seq2seq_lib.sampled_sequence_loss( - decoder_outputs, targets, loss_weights, sampled_loss_func) - else: - self._loss = tf.contrib.legacy_seq2seq.sequence_loss( - model_outputs, targets, loss_weights) - tf.summary.scalar('loss', tf.minimum(12.0, self._loss)) - - def _add_train_op(self): - """Sets self._train_op, op to run for training.""" - hps = self._hps - - self._lr_rate = tf.maximum( - hps.min_lr, # min_lr_rate. - tf.train.exponential_decay(hps.lr, self.global_step, 30000, 0.98)) - - tvars = tf.trainable_variables() - with tf.device(self._get_gpu(self._num_gpus-1)): - grads, global_norm = tf.clip_by_global_norm( - tf.gradients(self._loss, tvars), hps.max_grad_norm) - tf.summary.scalar('global_norm', global_norm) - optimizer = tf.train.GradientDescentOptimizer(self._lr_rate) - tf.summary.scalar('learning rate', self._lr_rate) - self._train_op = optimizer.apply_gradients( - zip(grads, tvars), global_step=self.global_step, name='train_step') - - def encode_top_state(self, sess, enc_inputs, enc_len): - """Return the top states from encoder for decoder. - - Args: - sess: tensorflow session. - enc_inputs: encoder inputs of shape [batch_size, enc_timesteps]. - enc_len: encoder input length of shape [batch_size] - Returns: - enc_top_states: The top level encoder states. - dec_in_state: The decoder layer initial state. - """ - results = sess.run([self._enc_top_states, self._dec_in_state], - feed_dict={self._articles: enc_inputs, - self._article_lens: enc_len}) - return results[0], results[1][0] - - def decode_topk(self, sess, latest_tokens, enc_top_states, dec_init_states): - """Return the topK results and new decoder states.""" - feed = { - self._enc_top_states: enc_top_states, - self._dec_in_state: - np.squeeze(np.array(dec_init_states)), - self._abstracts: - np.transpose(np.array([latest_tokens])), - self._abstract_lens: np.ones([len(dec_init_states)], np.int32)} - - results = sess.run( - [self._topk_ids, self._topk_log_probs, self._dec_out_state], - feed_dict=feed) - - ids, probs, states = results[0], results[1], results[2] - new_states = [s for s in states] - return ids, probs, new_states - - def build_graph(self): - self._add_placeholders() - self._add_seq2seq() - self.global_step = tf.Variable(0, name='global_step', trainable=False) - if self._hps.mode == 'train': - self._add_train_op() - self._summaries = tf.summary.merge_all() diff --git a/research/textsum/seq2seq_lib.py b/research/textsum/seq2seq_lib.py deleted file mode 100644 index de233895a01f9e1ca336413b788ea5b9bee20db1..0000000000000000000000000000000000000000 --- a/research/textsum/seq2seq_lib.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""seq2seq library codes copied from elsewhere for customization.""" - -import tensorflow as tf - - -# Adapted to support sampled_softmax loss function, which accepts activations -# instead of logits. -def sequence_loss_by_example(inputs, targets, weights, loss_function, - average_across_timesteps=True, name=None): - """Sampled softmax loss for a sequence of inputs (per example). - - Args: - inputs: List of 2D Tensors of shape [batch_size x hid_dim]. - targets: List of 1D batch-sized int32 Tensors of the same length as logits. - weights: List of 1D batch-sized float-Tensors of the same length as logits. - loss_function: Sampled softmax function (inputs, labels) -> loss - average_across_timesteps: If set, divide the returned cost by the total - label weight. - name: Optional name for this operation, default: 'sequence_loss_by_example'. - - Returns: - 1D batch-sized float Tensor: The log-perplexity for each sequence. - - Raises: - ValueError: If len(inputs) is different from len(targets) or len(weights). - """ - if len(targets) != len(inputs) or len(weights) != len(inputs): - raise ValueError('Lengths of logits, weights, and targets must be the same ' - '%d, %d, %d.' % (len(inputs), len(weights), len(targets))) - with tf.name_scope(values=inputs + targets + weights, name=name, - default_name='sequence_loss_by_example'): - log_perp_list = [] - for inp, target, weight in zip(inputs, targets, weights): - crossent = loss_function(inp, target) - log_perp_list.append(crossent * weight) - log_perps = tf.add_n(log_perp_list) - if average_across_timesteps: - total_size = tf.add_n(weights) - total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. - log_perps /= total_size - return log_perps - - -def sampled_sequence_loss(inputs, targets, weights, loss_function, - average_across_timesteps=True, - average_across_batch=True, name=None): - """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. - - Args: - inputs: List of 2D Tensors of shape [batch_size x hid_dim]. - targets: List of 1D batch-sized int32 Tensors of the same length as inputs. - weights: List of 1D batch-sized float-Tensors of the same length as inputs. - loss_function: Sampled softmax function (inputs, labels) -> loss - average_across_timesteps: If set, divide the returned cost by the total - label weight. - average_across_batch: If set, divide the returned cost by the batch size. - name: Optional name for this operation, defaults to 'sequence_loss'. - - Returns: - A scalar float Tensor: The average log-perplexity per symbol (weighted). - - Raises: - ValueError: If len(inputs) is different from len(targets) or len(weights). - """ - with tf.name_scope(values=inputs + targets + weights, name=name, - default_name='sampled_sequence_loss'): - cost = tf.reduce_sum(sequence_loss_by_example( - inputs, targets, weights, loss_function, - average_across_timesteps=average_across_timesteps)) - if average_across_batch: - batch_size = tf.shape(targets[0])[0] - return cost / tf.cast(batch_size, tf.float32) - else: - return cost - - -def linear(args, output_size, bias, bias_start=0.0, scope=None): - """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. - - Args: - args: a 2D Tensor or a list of 2D, batch x n, Tensors. - output_size: int, second dimension of W[i]. - bias: boolean, whether to add a bias term or not. - bias_start: starting value to initialize the bias; 0 by default. - scope: VariableScope for the created subgraph; defaults to "Linear". - - Returns: - A 2D Tensor with shape [batch x output_size] equal to - sum_i(args[i] * W[i]), where W[i]s are newly created matrices. - - Raises: - ValueError: if some of the arguments has unspecified or wrong shape. - """ - if args is None or (isinstance(args, (list, tuple)) and not args): - raise ValueError('`args` must be specified') - if not isinstance(args, (list, tuple)): - args = [args] - - # Calculate the total size of arguments on dimension 1. - total_arg_size = 0 - shapes = [a.get_shape().as_list() for a in args] - for shape in shapes: - if len(shape) != 2: - raise ValueError('Linear is expecting 2D arguments: %s' % str(shapes)) - if not shape[1]: - raise ValueError('Linear expects shape[1] of arguments: %s' % str(shapes)) - else: - total_arg_size += shape[1] - - # Now the computation. - with tf.variable_scope(scope or 'Linear'): - matrix = tf.get_variable('Matrix', [total_arg_size, output_size]) - if len(args) == 1: - res = tf.matmul(args[0], matrix) - else: - res = tf.matmul(tf.concat(axis=1, values=args), matrix) - if not bias: - return res - bias_term = tf.get_variable( - 'Bias', [output_size], - initializer=tf.constant_initializer(bias_start)) - return res + bias_term diff --git a/research/transformer/README.md b/research/transformer/README.md deleted file mode 100644 index 0acad0005e61707315008598573598d27c70eab4..0000000000000000000000000000000000000000 --- a/research/transformer/README.md +++ /dev/null @@ -1,63 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Spatial Transformer Network - -The Spatial Transformer Network [1] allows the spatial manipulation of data within the network. - -
-

-
- -### API - -A Spatial Transformer Network implemented in Tensorflow 1.0 and based on [2]. - -#### How to use - -
-

-
- -```python -transformer(U, theta, out_size) -``` - -#### Parameters - - U : float - The output of a convolutional net should have the - shape [num_batch, height, width, num_channels]. - theta: float - The output of the - localisation network should be [num_batch, 6]. - out_size: tuple of two ints - The size of the output of the network - - -#### Notes -To initialize the network to the identity transform init ``theta`` to : - -```python -identity = np.array([[1., 0., 0.], - [0., 1., 0.]]) -identity = identity.flatten() -theta = tf.Variable(initial_value=identity) -``` - -#### Experiments - -
-

-
- -We used cluttered MNIST. Left column are the input images, right are the attended parts of the image by an STN. - -All experiments were run in Tensorflow 0.7. - -### References - -[1] Jaderberg, Max, et al. "Spatial Transformer Networks." arXiv preprint arXiv:1506.02025 (2015) - -[2] https://github.com/skaae/transformer_network/blob/master/transformerlayer.py diff --git a/research/transformer/cluttered_mnist.py b/research/transformer/cluttered_mnist.py deleted file mode 100644 index ec00c751b2af8dd82cdb3a68188c474634d5d6ad..0000000000000000000000000000000000000000 --- a/research/transformer/cluttered_mnist.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -import tensorflow as tf -from spatial_transformer import transformer -import numpy as np -from tf_utils import weight_variable, bias_variable, dense_to_one_hot - -# %% Load data -mnist_cluttered = np.load('./data/mnist_sequence1_sample_5distortions5x5.npz') - -X_train = mnist_cluttered['X_train'] -y_train = mnist_cluttered['y_train'] -X_valid = mnist_cluttered['X_valid'] -y_valid = mnist_cluttered['y_valid'] -X_test = mnist_cluttered['X_test'] -y_test = mnist_cluttered['y_test'] - -# % turn from dense to one hot representation -Y_train = dense_to_one_hot(y_train, n_classes=10) -Y_valid = dense_to_one_hot(y_valid, n_classes=10) -Y_test = dense_to_one_hot(y_test, n_classes=10) - -# %% Graph representation of our network - -# %% Placeholders for 40x40 resolution -x = tf.placeholder(tf.float32, [None, 1600]) -y = tf.placeholder(tf.float32, [None, 10]) - -# %% Since x is currently [batch, height*width], we need to reshape to a -# 4-D tensor to use it in a convolutional graph. If one component of -# `shape` is the special value -1, the size of that dimension is -# computed so that the total size remains constant. Since we haven't -# defined the batch dimension's shape yet, we use -1 to denote this -# dimension should not change size. -x_tensor = tf.reshape(x, [-1, 40, 40, 1]) - -# %% We'll setup the two-layer localisation network to figure out the -# %% parameters for an affine transformation of the input -# %% Create variables for fully connected layer -W_fc_loc1 = weight_variable([1600, 20]) -b_fc_loc1 = bias_variable([20]) - -W_fc_loc2 = weight_variable([20, 6]) -# Use identity transformation as starting point -initial = np.array([[1., 0, 0], [0, 1., 0]]) -initial = initial.astype('float32') -initial = initial.flatten() -b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2') - -# %% Define the two layer localisation network -h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1) -# %% We can add dropout for regularizing and to reduce overfitting like so: -keep_prob = tf.placeholder(tf.float32) -h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob) -# %% Second layer -h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2) - -# %% We'll create a spatial transformer module to identify discriminative -# %% patches -out_size = (40, 40) -h_trans = transformer(x_tensor, h_fc_loc2, out_size) - -# %% We'll setup the first convolutional layer -# Weight matrix is [height x width x input_channels x output_channels] -filter_size = 3 -n_filters_1 = 16 -W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1]) - -# %% Bias is [output_channels] -b_conv1 = bias_variable([n_filters_1]) - -# %% Now we can build a graph which does the first layer of convolution: -# we define our stride as batch x height x width x channels -# instead of pooling, we use strides of 2 and more layers -# with smaller filters. - -h_conv1 = tf.nn.relu( - tf.nn.conv2d(input=h_trans, - filter=W_conv1, - strides=[1, 2, 2, 1], - padding='SAME') + - b_conv1) - -# %% And just like the first layer, add additional layers to create -# a deep net -n_filters_2 = 16 -W_conv2 = weight_variable([filter_size, filter_size, n_filters_1, n_filters_2]) -b_conv2 = bias_variable([n_filters_2]) -h_conv2 = tf.nn.relu( - tf.nn.conv2d(input=h_conv1, - filter=W_conv2, - strides=[1, 2, 2, 1], - padding='SAME') + - b_conv2) - -# %% We'll now reshape so we can connect to a fully-connected layer: -h_conv2_flat = tf.reshape(h_conv2, [-1, 10 * 10 * n_filters_2]) - -# %% Create a fully-connected layer: -n_fc = 1024 -W_fc1 = weight_variable([10 * 10 * n_filters_2, n_fc]) -b_fc1 = bias_variable([n_fc]) -h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1) - -h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) - -# %% And finally our softmax layer: -W_fc2 = weight_variable([n_fc, 10]) -b_fc2 = bias_variable([10]) -y_logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 - -# %% Define loss/eval/training functions -cross_entropy = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(logits=y_logits, labels=y)) -opt = tf.train.AdamOptimizer() -optimizer = opt.minimize(cross_entropy) -grads = opt.compute_gradients(cross_entropy, [b_fc_loc2]) - -# %% Monitor accuracy -correct_prediction = tf.equal(tf.argmax(y_logits, 1), tf.argmax(y, 1)) -accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) - -# %% We now create a new session to actually perform the initialization the -# variables: -sess = tf.Session() -sess.run(tf.global_variables_initializer()) - - -# %% We'll now train in minibatches and report accuracy, loss: -iter_per_epoch = 100 -n_epochs = 500 -train_size = 10000 - -indices = np.linspace(0, 10000 - 1, iter_per_epoch) -indices = indices.astype('int') - -for epoch_i in range(n_epochs): - for iter_i in range(iter_per_epoch - 1): - batch_xs = X_train[indices[iter_i]:indices[iter_i+1]] - batch_ys = Y_train[indices[iter_i]:indices[iter_i+1]] - - if iter_i % 10 == 0: - loss = sess.run(cross_entropy, - feed_dict={ - x: batch_xs, - y: batch_ys, - keep_prob: 1.0 - }) - print('Iteration: ' + str(iter_i) + ' Loss: ' + str(loss)) - - sess.run(optimizer, feed_dict={ - x: batch_xs, y: batch_ys, keep_prob: 0.8}) - - print('Accuracy (%d): ' % epoch_i + str(sess.run(accuracy, - feed_dict={ - x: X_valid, - y: Y_valid, - keep_prob: 1.0 - }))) - # theta = sess.run(h_fc_loc2, feed_dict={ - # x: batch_xs, keep_prob: 1.0}) - # print(theta[0]) diff --git a/research/transformer/data/README.md b/research/transformer/data/README.md deleted file mode 100644 index c2a9581fd10b29f19b71817bb60c6d5ef7fe842e..0000000000000000000000000000000000000000 --- a/research/transformer/data/README.md +++ /dev/null @@ -1,20 +0,0 @@ -### How to get the data - -#### Cluttered MNIST - -The cluttered MNIST dataset can be found here [1] or can be generated via [2]. - -Settings used for `cluttered_mnist.py` : - -```python - -ORG_SHP = [28, 28] -OUT_SHP = [40, 40] -NUM_DISTORTIONS = 8 -dist_size = (5, 5) - -``` - -[1] https://github.com/daviddao/spatial-transformer-tensorflow - -[2] https://github.com/skaae/recurrent-spatial-transformer-code/blob/master/MNIST_SEQUENCE/create_mnist_sequence.py \ No newline at end of file diff --git a/research/transformer/example.py b/research/transformer/example.py deleted file mode 100644 index 19ca64d1452b0e2a60c5394ba18493acdbc0db58..0000000000000000000000000000000000000000 --- a/research/transformer/example.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from scipy import ndimage -import tensorflow as tf -from spatial_transformer import transformer -import numpy as np -import matplotlib.pyplot as plt - -# %% Create a batch of three images (1600 x 1200) -# %% Image retrieved from: -# %% https://raw.githubusercontent.com/skaae/transformer_network/master/cat.jpg -im = ndimage.imread('cat.jpg') -im = im / 255. -im = im.reshape(1, 1200, 1600, 3) -im = im.astype('float32') - -# %% Let the output size of the transformer be half the image size. -out_size = (600, 800) - -# %% Simulate batch -batch = np.append(im, im, axis=0) -batch = np.append(batch, im, axis=0) -num_batch = 3 - -x = tf.placeholder(tf.float32, [None, 1200, 1600, 3]) -x = tf.cast(batch, 'float32') - -# %% Create localisation network and convolutional layer -with tf.variable_scope('spatial_transformer_0'): - - # %% Create a fully-connected layer with 6 output nodes - n_fc = 6 - W_fc1 = tf.Variable(tf.zeros([1200 * 1600 * 3, n_fc]), name='W_fc1') - - # %% Zoom into the image - initial = np.array([[0.5, 0, 0], [0, 0.5, 0]]) - initial = initial.astype('float32') - initial = initial.flatten() - - b_fc1 = tf.Variable(initial_value=initial, name='b_fc1') - h_fc1 = tf.matmul(tf.zeros([num_batch, 1200 * 1600 * 3]), W_fc1) + b_fc1 - h_trans = transformer(x, h_fc1, out_size) - -# %% Run session -sess = tf.Session() -sess.run(tf.global_variables_initializer()) -y = sess.run(h_trans, feed_dict={x: batch}) - -# plt.imshow(y[0]) diff --git a/research/transformer/spatial_transformer.py b/research/transformer/spatial_transformer.py deleted file mode 100644 index 47014fe270171e053cc6abc1a394975fa65b7063..0000000000000000000000000000000000000000 --- a/research/transformer/spatial_transformer.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from six.moves import xrange -import tensorflow as tf - - -def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs): - """Spatial Transformer Layer - - Implements a spatial transformer layer as described in [1]_. - Based on [2]_ and edited by David Dao for Tensorflow. - - Parameters - ---------- - U : float - The output of a convolutional net should have the - shape [num_batch, height, width, num_channels]. - theta: float - The output of the - localisation network should be [num_batch, 6]. - out_size: tuple of two ints - The size of the output of the network (height, width) - - References - ---------- - .. [1] Spatial Transformer Networks - Max Jaderberg, Karen Simonyan, Andrew Zisserman, Koray Kavukcuoglu - Submitted on 5 Jun 2015 - .. [2] https://github.com/skaae/transformer_network/blob/master/transformerlayer.py - - Notes - ----- - To initialize the network to the identity transform init - ``theta`` to : - identity = np.array([[1., 0., 0.], - [0., 1., 0.]]) - identity = identity.flatten() - theta = tf.Variable(initial_value=identity) - - """ - - def _repeat(x, n_repeats): - with tf.variable_scope('_repeat'): - rep = tf.transpose( - tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0]) - rep = tf.cast(rep, 'int32') - x = tf.matmul(tf.reshape(x, (-1, 1)), rep) - return tf.reshape(x, [-1]) - - def _interpolate(im, x, y, out_size): - with tf.variable_scope('_interpolate'): - # constants - num_batch = tf.shape(im)[0] - height = tf.shape(im)[1] - width = tf.shape(im)[2] - channels = tf.shape(im)[3] - - x = tf.cast(x, 'float32') - y = tf.cast(y, 'float32') - height_f = tf.cast(height, 'float32') - width_f = tf.cast(width, 'float32') - out_height = out_size[0] - out_width = out_size[1] - zero = tf.zeros([], dtype='int32') - max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') - max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') - - # scale indices from [-1, 1] to [0, width/height] - x = (x + 1.0)*(width_f) / 2.0 - y = (y + 1.0)*(height_f) / 2.0 - - # do sampling - x0 = tf.cast(tf.floor(x), 'int32') - x1 = x0 + 1 - y0 = tf.cast(tf.floor(y), 'int32') - y1 = y0 + 1 - - x0 = tf.clip_by_value(x0, zero, max_x) - x1 = tf.clip_by_value(x1, zero, max_x) - y0 = tf.clip_by_value(y0, zero, max_y) - y1 = tf.clip_by_value(y1, zero, max_y) - dim2 = width - dim1 = width*height - base = _repeat(tf.range(num_batch)*dim1, out_height*out_width) - base_y0 = base + y0*dim2 - base_y1 = base + y1*dim2 - idx_a = base_y0 + x0 - idx_b = base_y1 + x0 - idx_c = base_y0 + x1 - idx_d = base_y1 + x1 - - # use indices to lookup pixels in the flat image and restore - # channels dim - im_flat = tf.reshape(im, tf.stack([-1, channels])) - im_flat = tf.cast(im_flat, 'float32') - Ia = tf.gather(im_flat, idx_a) - Ib = tf.gather(im_flat, idx_b) - Ic = tf.gather(im_flat, idx_c) - Id = tf.gather(im_flat, idx_d) - - # and finally calculate interpolated values - x0_f = tf.cast(x0, 'float32') - x1_f = tf.cast(x1, 'float32') - y0_f = tf.cast(y0, 'float32') - y1_f = tf.cast(y1, 'float32') - wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1) - wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1) - wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1) - wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1) - output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id]) - return output - - def _meshgrid(height, width): - with tf.variable_scope('_meshgrid'): - # This should be equivalent to: - # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), - # np.linspace(-1, 1, height)) - # ones = np.ones(np.prod(x_t.shape)) - # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) - x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), - tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) - y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), - tf.ones(shape=tf.stack([1, width]))) - - x_t_flat = tf.reshape(x_t, (1, -1)) - y_t_flat = tf.reshape(y_t, (1, -1)) - - ones = tf.ones_like(x_t_flat) - grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones]) - return grid - - def _transform(theta, input_dim, out_size): - with tf.variable_scope('_transform'): - num_batch = tf.shape(input_dim)[0] - height = tf.shape(input_dim)[1] - width = tf.shape(input_dim)[2] - num_channels = tf.shape(input_dim)[3] - theta = tf.reshape(theta, (-1, 2, 3)) - theta = tf.cast(theta, 'float32') - - # grid of (x_t, y_t, 1), eq (1) in ref [1] - height_f = tf.cast(height, 'float32') - width_f = tf.cast(width, 'float32') - out_height = out_size[0] - out_width = out_size[1] - grid = _meshgrid(out_height, out_width) - grid = tf.expand_dims(grid, 0) - grid = tf.reshape(grid, [-1]) - grid = tf.tile(grid, tf.stack([num_batch])) - grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) - - # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) - T_g = tf.matmul(theta, grid) - x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) - y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) - x_s_flat = tf.reshape(x_s, [-1]) - y_s_flat = tf.reshape(y_s, [-1]) - - input_transformed = _interpolate( - input_dim, x_s_flat, y_s_flat, - out_size) - - output = tf.reshape( - input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) - return output - - with tf.variable_scope(name): - output = _transform(theta, U, out_size) - return output - - -def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer'): - """Batch Spatial Transformer Layer - - Parameters - ---------- - - U : float - tensor of inputs [num_batch,height,width,num_channels] - thetas : float - a set of transformations for each input [num_batch,num_transforms,6] - out_size : int - the size of the output [out_height,out_width] - - Returns: float - Tensor of size [num_batch*num_transforms,out_height,out_width,num_channels] - """ - with tf.variable_scope(name): - num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2]) - indices = [[i]*num_transforms for i in xrange(num_batch)] - input_repeated = tf.gather(U, tf.reshape(indices, [-1])) - return transformer(input_repeated, thetas, out_size) diff --git a/research/transformer/tf_utils.py b/research/transformer/tf_utils.py deleted file mode 100644 index 3cdac28bcab852d461e040247313a93eb535d2c2..0000000000000000000000000000000000000000 --- a/research/transformer/tf_utils.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# %% Borrowed utils from here: https://github.com/pkmital/tensorflow_tutorials/ -import tensorflow as tf -import numpy as np - -def conv2d(x, n_filters, - k_h=5, k_w=5, - stride_h=2, stride_w=2, - stddev=0.02, - activation=lambda x: x, - bias=True, - padding='SAME', - name="Conv2D"): - """2D Convolution with options for kernel size, stride, and init deviation. - Parameters - ---------- - x : Tensor - Input tensor to convolve. - n_filters : int - Number of filters to apply. - k_h : int, optional - Kernel height. - k_w : int, optional - Kernel width. - stride_h : int, optional - Stride in rows. - stride_w : int, optional - Stride in cols. - stddev : float, optional - Initialization's standard deviation. - activation : arguments, optional - Function which applies a nonlinearity - padding : str, optional - 'SAME' or 'VALID' - name : str, optional - Variable scope to use. - Returns - ------- - x : Tensor - Convolved input. - """ - with tf.variable_scope(name): - w = tf.get_variable( - 'w', [k_h, k_w, x.get_shape()[-1], n_filters], - initializer=tf.truncated_normal_initializer(stddev=stddev)) - conv = tf.nn.conv2d( - x, w, strides=[1, stride_h, stride_w, 1], padding=padding) - if bias: - b = tf.get_variable( - 'b', [n_filters], - initializer=tf.truncated_normal_initializer(stddev=stddev)) - conv = conv + b - return conv - -def linear(x, n_units, scope=None, stddev=0.02, - activation=lambda x: x): - """Fully-connected network. - Parameters - ---------- - x : Tensor - Input tensor to the network. - n_units : int - Number of units to connect to. - scope : str, optional - Variable scope to use. - stddev : float, optional - Initialization's standard deviation. - activation : arguments, optional - Function which applies a nonlinearity - Returns - ------- - x : Tensor - Fully-connected output. - """ - shape = x.get_shape().as_list() - - with tf.variable_scope(scope or "Linear"): - matrix = tf.get_variable("Matrix", [shape[1], n_units], tf.float32, - tf.random_normal_initializer(stddev=stddev)) - return activation(tf.matmul(x, matrix)) - -# %% -def weight_variable(shape): - '''Helper function to create a weight variable initialized with - a normal distribution - Parameters - ---------- - shape : list - Size of weight variable - ''' - #initial = tf.random_normal(shape, mean=0.0, stddev=0.01) - initial = tf.zeros(shape) - return tf.Variable(initial) - -# %% -def bias_variable(shape): - '''Helper function to create a bias variable initialized with - a constant value. - Parameters - ---------- - shape : list - Size of weight variable - ''' - initial = tf.random_normal(shape, mean=0.0, stddev=0.01) - return tf.Variable(initial) - -# %% -def dense_to_one_hot(labels, n_classes=2): - """Convert class labels from scalars to one-hot vectors.""" - labels = np.array(labels) - n_labels = labels.shape[0] - index_offset = np.arange(n_labels) * n_classes - labels_one_hot = np.zeros((n_labels, n_classes), dtype=np.float32) - labels_one_hot.flat[index_offset + labels.ravel()] = 1 - return labels_one_hot diff --git a/research/vid2depth/README.md b/research/vid2depth/README.md index 6c79c6ad34134e012d55fd0a2b39d6ea55c7268e..0d03b7a7dce003c6f8ef47cd85cb4e7a668a9e1e 100644 --- a/research/vid2depth/README.md +++ b/research/vid2depth/README.md @@ -65,15 +65,6 @@ You will need to register in order to download the data. Download the following * leftImg8bit_sequence_trainvaltest.zip * camera_trainvaltest.zip -### Download Bike dataset (17GB) (optional) - -```shell -mkdir -p ~/vid2depth/bike-uncompressed -cd ~/vid2depth/bike-uncompressed -wget https://storage.googleapis.com/brain-robotics-data/bike/BikeVideoDataset.tar -tar xvf BikeVideoDataset.tar -``` - ## 3. Inference ### Download trained model @@ -122,18 +113,6 @@ python dataset/gen_data.py \ --seq_length 3 ``` -### Prepare Bike training sequences (optional) - -```shell -# Prepare training sequences. -cd tensorflow/models/research/vid2depth -python dataset/gen_data.py \ - --dataset_name bike \ - --dataset_dir ~/vid2depth/bike-uncompressed \ - --data_dir ~/vid2depth/data/bike \ - --seq_length 3 -``` - ### Compile the ICP op (work in progress) The ICP op depends on multiple software packages (TensorFlow, Point Cloud diff --git a/research/video_prediction/README.md b/research/video_prediction/README.md deleted file mode 100644 index 89ea9e28566a8d35ccdc82a69fc79bfb0375efcf..0000000000000000000000000000000000000000 --- a/research/video_prediction/README.md +++ /dev/null @@ -1,102 +0,0 @@ -![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg) -![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen) -![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg) - -# Video Prediction with Neural Advection - -*A TensorFlow implementation of the models described in [Unsupervised Learning for Physical Interaction through Video Prediction (Finn et al., 2016)](https://arxiv.org/abs/1605.07157).* - -This video prediction model, which is optionally conditioned on actions, -predicts future video by internally predicting how to transform the last -image (which may have been predicted) into the next image. As a result, it can -reuse apperance information from previous frames and can better generalize to -objects not seen in the training set. Some example predictions on novel objects -are shown below: - -![Animation](https://storage.googleapis.com/push_gens/novelgengifs9/16_70.gif) -![Animation](https://storage.googleapis.com/push_gens/novelgengifs9/2_96.gif) -![Animation](https://storage.googleapis.com/push_gens/novelgengifs9/1_38.gif) -![Animation](https://storage.googleapis.com/push_gens/novelgengifs9/11_10.gif) -![Animation](https://storage.googleapis.com/push_gens/novelgengifs9/3_34.gif) - -When the model is conditioned on actions, it changes it's predictions based on -the passed in action. Here we show the models predictions in response to varying -the magnitude of the passed in actions, from small to large: - -![Animation](https://storage.googleapis.com/push_gens/webgifs/0xact_0.gif) -![Animation](https://storage.googleapis.com/push_gens/05xact_0.gif) -![Animation](https://storage.googleapis.com/push_gens/webgifs/1xact_0.gif) -![Animation](https://storage.googleapis.com/push_gens/webgifs/15xact_0.gif) - -![Animation](https://storage.googleapis.com/push_gens/webgifs/0xact_17.gif) -![Animation](https://storage.googleapis.com/push_gens/webgifs/05xact_17.gif) -![Animation](https://storage.googleapis.com/push_gens/webgifs/1xact_17.gif) -![Animation](https://storage.googleapis.com/push_gens/webgifs/15xact_17.gif) - - -Because the model is trained with an l2 objective, it represents uncertainty as -blur. - -## Requirements -* Tensorflow (see tensorflow.org for installation instructions) -* spatial_tranformer model in tensorflow/models, for the spatial tranformer - predictor (STP). - -## Data -The data used to train this model is located -[here](https://sites.google.com/site/brainrobotdata/home/push-dataset). - -To download the robot data, run the following. -```shell -./download_data.sh -``` - -## Training the model - -To train the model, run the prediction_train.py file. -```shell -python prediction_train.py -``` - -There are several flags which can control the model that is trained, which are -exeplified below: -```shell -python prediction_train.py \ - --data_dir=push/push_train \ # path to the training set. - --model=CDNA \ # the model type to use - DNA, CDNA, or STP - --output_dir=./checkpoints \ # where to save model checkpoints - --event_log_dir=./summaries \ # where to save training statistics - --num_iterations=100000 \ # number of training iterations - --pretrained_model=model \ # path to model to initialize from, random if emtpy - --sequence_length=10 \ # the number of total frames in a sequence - --context_frames=2 \ # the number of ground truth frames to pass in at start - --use_state=1 \ # whether or not to condition on actions and the initial state - --num_masks=10 \ # the number of transformations and corresponding masks - --schedsamp_k=900.0 \ # the constant used for scheduled sampling or -1 - --train_val_split=0.95 \ # the percentage of training data for validation - --batch_size=32 \ # the training batch size - --learning_rate=0.001 \ # the initial learning rate for the Adam optimizer -``` - -If the dynamic neural advection (DNA) model is being used, the `--num_masks` -option should be set to one. - -The `--context_frames` option defines both the number of initial ground truth -frames to pass in, as well as when to start penalizing the model's predictions. - -The data directory `--data_dir` should contain tfrecord files with the format -used in the released push dataset. See -[here](https://sites.google.com/site/brainrobotdata/home/push-dataset) for -details. If the `--use_state` option is not set, then the data only needs to -contain image sequences, not states and actions. - - -## Contact - -To ask questions or report issues please open an issue on the tensorflow/models -[issues tracker](https://github.com/tensorflow/models/issues). -Please assign issues to @cbfinn. - -## Credits - -This code was written by Chelsea Finn. diff --git a/research/video_prediction/download_data.sh b/research/video_prediction/download_data.sh deleted file mode 100755 index 4928add5fa3833642014af8347b487bac53bf6b7..0000000000000000000000000000000000000000 --- a/research/video_prediction/download_data.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -# Example: -# -# download_dataset.sh datafiles.txt ./tmp -# -# will download all of the files listed in the file, datafiles.txt, into -# a directory, "./tmp". -# -# Each line of the datafiles.txt file should contain the path from the -# bucket root to a file. - -ARGC="$#" -LISTING_FILE=push_datafiles.txt -if [ "${ARGC}" -ge 1 ]; then - LISTING_FILE=$1 -fi -OUTPUT_DIR="./" -if [ "${ARGC}" -ge 2 ]; then - OUTPUT_DIR=$2 -fi - -echo "OUTPUT_DIR=$OUTPUT_DIR" - -mkdir "${OUTPUT_DIR}" - -function download_file { - FILE=$1 - BUCKET="https://storage.googleapis.com/brain-robotics-data" - URL="${BUCKET}/${FILE}" - OUTPUT_FILE="${OUTPUT_DIR}/${FILE}" - DIRECTORY=`dirname ${OUTPUT_FILE}` - echo DIRECTORY=$DIRECTORY - mkdir -p "${DIRECTORY}" - curl --output ${OUTPUT_FILE} ${URL} -} - -while read filename; do - download_file $filename -done <${LISTING_FILE} diff --git a/research/video_prediction/lstm_ops.py b/research/video_prediction/lstm_ops.py deleted file mode 100644 index 1f8c8d97ae023e5e7ef621219de5b697d0e00d21..0000000000000000000000000000000000000000 --- a/research/video_prediction/lstm_ops.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Convolutional LSTM implementation.""" - -import tensorflow as tf - -from tensorflow.contrib.slim import add_arg_scope -from tensorflow.contrib.slim import layers - - -def init_state(inputs, - state_shape, - state_initializer=tf.zeros_initializer(), - dtype=tf.float32): - """Helper function to create an initial state given inputs. - - Args: - inputs: input Tensor, at least 2D, the first dimension being batch_size - state_shape: the shape of the state. - state_initializer: Initializer(shape, dtype) for state Tensor. - dtype: Optional dtype, needed when inputs is None. - Returns: - A tensors representing the initial state. - """ - if inputs is not None: - # Handle both the dynamic shape as well as the inferred shape. - inferred_batch_size = inputs.get_shape().with_rank_at_least(1)[0] - dtype = inputs.dtype - else: - inferred_batch_size = 0 - initial_state = state_initializer( - [inferred_batch_size] + state_shape, dtype=dtype) - return initial_state - - -@add_arg_scope -def basic_conv_lstm_cell(inputs, - state, - num_channels, - filter_size=5, - forget_bias=1.0, - scope=None, - reuse=None): - """Basic LSTM recurrent network cell, with 2D convolution connctions. - - We add forget_bias (default: 1) to the biases of the forget gate in order to - reduce the scale of forgetting in the beginning of the training. - - It does not allow cell clipping, a projection layer, and does not - use peep-hole connections: it is the basic baseline. - - Args: - inputs: input Tensor, 4D, batch x height x width x channels. - state: state Tensor, 4D, batch x height x width x channels. - num_channels: the number of output channels in the layer. - filter_size: the shape of the each convolution filter. - forget_bias: the initial value of the forget biases. - scope: Optional scope for variable_scope. - reuse: whether or not the layer and the variables should be reused. - - Returns: - a tuple of tensors representing output and the new state. - """ - spatial_size = inputs.get_shape()[1:3] - if state is None: - state = init_state(inputs, list(spatial_size) + [2 * num_channels]) - with tf.variable_scope(scope, - 'BasicConvLstmCell', - [inputs, state], - reuse=reuse): - inputs.get_shape().assert_has_rank(4) - state.get_shape().assert_has_rank(4) - c, h = tf.split(axis=3, num_or_size_splits=2, value=state) - inputs_h = tf.concat(axis=3, values=[inputs, h]) - # Parameters of gates are concatenated into one conv for efficiency. - i_j_f_o = layers.conv2d(inputs_h, - 4 * num_channels, [filter_size, filter_size], - stride=1, - activation_fn=None, - scope='Gates') - - # i = input_gate, j = new_input, f = forget_gate, o = output_gate - i, j, f, o = tf.split(axis=3, num_or_size_splits=4, value=i_j_f_o) - - new_c = c * tf.sigmoid(f + forget_bias) + tf.sigmoid(i) * tf.tanh(j) - new_h = tf.tanh(new_c) * tf.sigmoid(o) - - return new_h, tf.concat(axis=3, values=[new_c, new_h]) - - - diff --git a/research/video_prediction/prediction_input.py b/research/video_prediction/prediction_input.py deleted file mode 100644 index e35b9daed373e65b5a1ec35959d9234aef320460..0000000000000000000000000000000000000000 --- a/research/video_prediction/prediction_input.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Code for building the input for the prediction model.""" - -import os - -import numpy as np -import tensorflow as tf - -from tensorflow.python.platform import flags -from tensorflow.python.platform import gfile - - -FLAGS = flags.FLAGS - -# Original image dimensions -ORIGINAL_WIDTH = 640 -ORIGINAL_HEIGHT = 512 -COLOR_CHAN = 3 - -# Default image dimensions. -IMG_WIDTH = 64 -IMG_HEIGHT = 64 - -# Dimension of the state and action. -STATE_DIM = 5 - - -def build_tfrecord_input(training=True): - """Create input tfrecord tensors. - - Args: - training: training or validation data. - Returns: - list of tensors corresponding to images, actions, and states. The images - tensor is 5D, batch x time x height x width x channels. The state and - action tensors are 3D, batch x time x dimension. - Raises: - RuntimeError: if no files found. - """ - filenames = gfile.Glob(os.path.join(FLAGS.data_dir, '*')) - if not filenames: - raise RuntimeError('No data files found.') - index = int(np.floor(FLAGS.train_val_split * len(filenames))) - if training: - filenames = filenames[:index] - else: - filenames = filenames[index:] - filename_queue = tf.train.string_input_producer(filenames, shuffle=True) - reader = tf.TFRecordReader() - _, serialized_example = reader.read(filename_queue) - - image_seq, state_seq, action_seq = [], [], [] - - for i in range(FLAGS.sequence_length): - image_name = 'move/' + str(i) + '/image/encoded' - action_name = 'move/' + str(i) + '/commanded_pose/vec_pitch_yaw' - state_name = 'move/' + str(i) + '/endeffector/vec_pitch_yaw' - if FLAGS.use_state: - features = {image_name: tf.FixedLenFeature([1], tf.string), - action_name: tf.FixedLenFeature([STATE_DIM], tf.float32), - state_name: tf.FixedLenFeature([STATE_DIM], tf.float32)} - else: - features = {image_name: tf.FixedLenFeature([1], tf.string)} - features = tf.parse_single_example(serialized_example, features=features) - - image_buffer = tf.reshape(features[image_name], shape=[]) - image = tf.image.decode_jpeg(image_buffer, channels=COLOR_CHAN) - image.set_shape([ORIGINAL_HEIGHT, ORIGINAL_WIDTH, COLOR_CHAN]) - - if IMG_HEIGHT != IMG_WIDTH: - raise ValueError('Unequal height and width unsupported') - - crop_size = min(ORIGINAL_HEIGHT, ORIGINAL_WIDTH) - image = tf.image.resize_image_with_crop_or_pad(image, crop_size, crop_size) - image = tf.reshape(image, [1, crop_size, crop_size, COLOR_CHAN]) - image = tf.image.resize_bicubic(image, [IMG_HEIGHT, IMG_WIDTH]) - image = tf.cast(image, tf.float32) / 255.0 - image_seq.append(image) - - if FLAGS.use_state: - state = tf.reshape(features[state_name], shape=[1, STATE_DIM]) - state_seq.append(state) - action = tf.reshape(features[action_name], shape=[1, STATE_DIM]) - action_seq.append(action) - - image_seq = tf.concat(axis=0, values=image_seq) - - if FLAGS.use_state: - state_seq = tf.concat(axis=0, values=state_seq) - action_seq = tf.concat(axis=0, values=action_seq) - [image_batch, action_batch, state_batch] = tf.train.batch( - [image_seq, action_seq, state_seq], - FLAGS.batch_size, - num_threads=FLAGS.batch_size, - capacity=100 * FLAGS.batch_size) - return image_batch, action_batch, state_batch - else: - image_batch = tf.train.batch( - [image_seq], - FLAGS.batch_size, - num_threads=FLAGS.batch_size, - capacity=100 * FLAGS.batch_size) - zeros_batch = tf.zeros([FLAGS.batch_size, FLAGS.sequence_length, STATE_DIM]) - return image_batch, zeros_batch, zeros_batch - diff --git a/research/video_prediction/prediction_model.py b/research/video_prediction/prediction_model.py deleted file mode 100644 index ebdc15d7ccdd96b89cfc6d73a9600678f162ff74..0000000000000000000000000000000000000000 --- a/research/video_prediction/prediction_model.py +++ /dev/null @@ -1,350 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Model architecture for predictive model, including CDNA, DNA, and STP.""" - -import numpy as np -import tensorflow as tf - -import tensorflow.contrib.slim as slim -from tensorflow.contrib.layers.python import layers as tf_layers -from lstm_ops import basic_conv_lstm_cell - -# Amount to use when lower bounding tensors -RELU_SHIFT = 1e-12 - -# kernel size for DNA and CDNA. -DNA_KERN_SIZE = 5 - - -def construct_model(images, - actions=None, - states=None, - iter_num=-1.0, - k=-1, - use_state=True, - num_masks=10, - stp=False, - cdna=True, - dna=False, - context_frames=2): - """Build convolutional lstm video predictor using STP, CDNA, or DNA. - - Args: - images: tensor of ground truth image sequences - actions: tensor of action sequences - states: tensor of ground truth state sequences - iter_num: tensor of the current training iteration (for sched. sampling) - k: constant used for scheduled sampling. -1 to feed in own prediction. - use_state: True to include state and action in prediction - num_masks: the number of different pixel motion predictions (and - the number of masks for each of those predictions) - stp: True to use Spatial Transformer Predictor (STP) - cdna: True to use Convoluational Dynamic Neural Advection (CDNA) - dna: True to use Dynamic Neural Advection (DNA) - context_frames: number of ground truth frames to pass in before - feeding in own predictions - Returns: - gen_images: predicted future image frames - gen_states: predicted future states - - Raises: - ValueError: if more than one network option specified or more than 1 mask - specified for DNA model. - """ - if stp + cdna + dna != 1: - raise ValueError('More than one, or no network option specified.') - batch_size, img_height, img_width, color_channels = images[0].get_shape()[0:4] - lstm_func = basic_conv_lstm_cell - - # Generated robot states and images. - gen_states, gen_images = [], [] - current_state = states[0] - - if k == -1: - feedself = True - else: - # Scheduled sampling: - # Calculate number of ground-truth frames to pass in. - num_ground_truth = tf.to_int32( - tf.round(tf.to_float(batch_size) * (k / (k + tf.exp(iter_num / k))))) - feedself = False - - # LSTM state sizes and states. - lstm_size = np.int32(np.array([32, 32, 64, 64, 128, 64, 32])) - lstm_state1, lstm_state2, lstm_state3, lstm_state4 = None, None, None, None - lstm_state5, lstm_state6, lstm_state7 = None, None, None - - for image, action in zip(images[:-1], actions[:-1]): - # Reuse variables after the first timestep. - reuse = bool(gen_images) - - done_warm_start = len(gen_images) > context_frames - 1 - with slim.arg_scope( - [lstm_func, slim.layers.conv2d, slim.layers.fully_connected, - tf_layers.layer_norm, slim.layers.conv2d_transpose], - reuse=reuse): - - if feedself and done_warm_start: - # Feed in generated image. - prev_image = gen_images[-1] - elif done_warm_start: - # Scheduled sampling - prev_image = scheduled_sample(image, gen_images[-1], batch_size, - num_ground_truth) - else: - # Always feed in ground_truth - prev_image = image - - # Predicted state is always fed back in - state_action = tf.concat(axis=1, values=[action, current_state]) - - enc0 = slim.layers.conv2d( - prev_image, - 32, [5, 5], - stride=2, - scope='scale1_conv1', - normalizer_fn=tf_layers.layer_norm, - normalizer_params={'scope': 'layer_norm1'}) - - hidden1, lstm_state1 = lstm_func( - enc0, lstm_state1, lstm_size[0], scope='state1') - hidden1 = tf_layers.layer_norm(hidden1, scope='layer_norm2') - hidden2, lstm_state2 = lstm_func( - hidden1, lstm_state2, lstm_size[1], scope='state2') - hidden2 = tf_layers.layer_norm(hidden2, scope='layer_norm3') - enc1 = slim.layers.conv2d( - hidden2, hidden2.get_shape()[3], [3, 3], stride=2, scope='conv2') - - hidden3, lstm_state3 = lstm_func( - enc1, lstm_state3, lstm_size[2], scope='state3') - hidden3 = tf_layers.layer_norm(hidden3, scope='layer_norm4') - hidden4, lstm_state4 = lstm_func( - hidden3, lstm_state4, lstm_size[3], scope='state4') - hidden4 = tf_layers.layer_norm(hidden4, scope='layer_norm5') - enc2 = slim.layers.conv2d( - hidden4, hidden4.get_shape()[3], [3, 3], stride=2, scope='conv3') - - # Pass in state and action. - smear = tf.reshape( - state_action, - [int(batch_size), 1, 1, int(state_action.get_shape()[1])]) - smear = tf.tile( - smear, [1, int(enc2.get_shape()[1]), int(enc2.get_shape()[2]), 1]) - if use_state: - enc2 = tf.concat(axis=3, values=[enc2, smear]) - enc3 = slim.layers.conv2d( - enc2, hidden4.get_shape()[3], [1, 1], stride=1, scope='conv4') - - hidden5, lstm_state5 = lstm_func( - enc3, lstm_state5, lstm_size[4], scope='state5') # last 8x8 - hidden5 = tf_layers.layer_norm(hidden5, scope='layer_norm6') - enc4 = slim.layers.conv2d_transpose( - hidden5, hidden5.get_shape()[3], 3, stride=2, scope='convt1') - - hidden6, lstm_state6 = lstm_func( - enc4, lstm_state6, lstm_size[5], scope='state6') # 16x16 - hidden6 = tf_layers.layer_norm(hidden6, scope='layer_norm7') - # Skip connection. - hidden6 = tf.concat(axis=3, values=[hidden6, enc1]) # both 16x16 - - enc5 = slim.layers.conv2d_transpose( - hidden6, hidden6.get_shape()[3], 3, stride=2, scope='convt2') - hidden7, lstm_state7 = lstm_func( - enc5, lstm_state7, lstm_size[6], scope='state7') # 32x32 - hidden7 = tf_layers.layer_norm(hidden7, scope='layer_norm8') - - # Skip connection. - hidden7 = tf.concat(axis=3, values=[hidden7, enc0]) # both 32x32 - - enc6 = slim.layers.conv2d_transpose( - hidden7, - hidden7.get_shape()[3], 3, stride=2, scope='convt3', - normalizer_fn=tf_layers.layer_norm, - normalizer_params={'scope': 'layer_norm9'}) - - if dna: - # Using largest hidden state for predicting untied conv kernels. - enc7 = slim.layers.conv2d_transpose( - enc6, DNA_KERN_SIZE**2, 1, stride=1, scope='convt4') - else: - # Using largest hidden state for predicting a new image layer. - enc7 = slim.layers.conv2d_transpose( - enc6, color_channels, 1, stride=1, scope='convt4') - # This allows the network to also generate one image from scratch, - # which is useful when regions of the image become unoccluded. - transformed = [tf.nn.sigmoid(enc7)] - - if stp: - stp_input0 = tf.reshape(hidden5, [int(batch_size), -1]) - stp_input1 = slim.layers.fully_connected( - stp_input0, 100, scope='fc_stp') - transformed += stp_transformation(prev_image, stp_input1, num_masks) - elif cdna: - cdna_input = tf.reshape(hidden5, [int(batch_size), -1]) - transformed += cdna_transformation(prev_image, cdna_input, num_masks, - int(color_channels)) - elif dna: - # Only one mask is supported (more should be unnecessary). - if num_masks != 1: - raise ValueError('Only one mask is supported for DNA model.') - transformed = [dna_transformation(prev_image, enc7)] - - masks = slim.layers.conv2d_transpose( - enc6, num_masks + 1, 1, stride=1, scope='convt7') - masks = tf.reshape( - tf.nn.softmax(tf.reshape(masks, [-1, num_masks + 1])), - [int(batch_size), int(img_height), int(img_width), num_masks + 1]) - mask_list = tf.split(axis=3, num_or_size_splits=num_masks + 1, value=masks) - output = mask_list[0] * prev_image - for layer, mask in zip(transformed, mask_list[1:]): - output += layer * mask - gen_images.append(output) - - current_state = slim.layers.fully_connected( - state_action, - int(current_state.get_shape()[1]), - scope='state_pred', - activation_fn=None) - gen_states.append(current_state) - - return gen_images, gen_states - - -## Utility functions -def stp_transformation(prev_image, stp_input, num_masks): - """Apply spatial transformer predictor (STP) to previous image. - - Args: - prev_image: previous image to be transformed. - stp_input: hidden layer to be used for computing STN parameters. - num_masks: number of masks and hence the number of STP transformations. - Returns: - List of images transformed by the predicted STP parameters. - """ - # Only import spatial transformer if needed. - from spatial_transformer import transformer - - identity_params = tf.convert_to_tensor( - np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32)) - transformed = [] - for i in range(num_masks - 1): - params = slim.layers.fully_connected( - stp_input, 6, scope='stp_params' + str(i), - activation_fn=None) + identity_params - transformed.append(transformer(prev_image, params)) - - return transformed - - -def cdna_transformation(prev_image, cdna_input, num_masks, color_channels): - """Apply convolutional dynamic neural advection to previous image. - - Args: - prev_image: previous image to be transformed. - cdna_input: hidden lyaer to be used for computing CDNA kernels. - num_masks: the number of masks and hence the number of CDNA transformations. - color_channels: the number of color channels in the images. - Returns: - List of images transformed by the predicted CDNA kernels. - """ - batch_size = int(cdna_input.get_shape()[0]) - height = int(prev_image.get_shape()[1]) - width = int(prev_image.get_shape()[2]) - - # Predict kernels using linear function of last hidden layer. - cdna_kerns = slim.layers.fully_connected( - cdna_input, - DNA_KERN_SIZE * DNA_KERN_SIZE * num_masks, - scope='cdna_params', - activation_fn=None) - - # Reshape and normalize. - cdna_kerns = tf.reshape( - cdna_kerns, [batch_size, DNA_KERN_SIZE, DNA_KERN_SIZE, 1, num_masks]) - cdna_kerns = tf.nn.relu(cdna_kerns - RELU_SHIFT) + RELU_SHIFT - norm_factor = tf.reduce_sum(cdna_kerns, [1, 2, 3], keep_dims=True) - cdna_kerns /= norm_factor - - # Treat the color channel dimension as the batch dimension since the same - # transformation is applied to each color channel. - # Treat the batch dimension as the channel dimension so that - # depthwise_conv2d can apply a different transformation to each sample. - cdna_kerns = tf.transpose(cdna_kerns, [1, 2, 0, 4, 3]) - cdna_kerns = tf.reshape(cdna_kerns, [DNA_KERN_SIZE, DNA_KERN_SIZE, batch_size, num_masks]) - # Swap the batch and channel dimensions. - prev_image = tf.transpose(prev_image, [3, 1, 2, 0]) - - # Transform image. - transformed = tf.nn.depthwise_conv2d(prev_image, cdna_kerns, [1, 1, 1, 1], 'SAME') - - # Transpose the dimensions to where they belong. - transformed = tf.reshape(transformed, [color_channels, height, width, batch_size, num_masks]) - transformed = tf.transpose(transformed, [3, 1, 2, 0, 4]) - transformed = tf.unstack(transformed, axis=-1) - return transformed - - -def dna_transformation(prev_image, dna_input): - """Apply dynamic neural advection to previous image. - - Args: - prev_image: previous image to be transformed. - dna_input: hidden lyaer to be used for computing DNA transformation. - Returns: - List of images transformed by the predicted CDNA kernels. - """ - # Construct translated images. - prev_image_pad = tf.pad(prev_image, [[0, 0], [2, 2], [2, 2], [0, 0]]) - image_height = int(prev_image.get_shape()[1]) - image_width = int(prev_image.get_shape()[2]) - - inputs = [] - for xkern in range(DNA_KERN_SIZE): - for ykern in range(DNA_KERN_SIZE): - inputs.append( - tf.expand_dims( - tf.slice(prev_image_pad, [0, xkern, ykern, 0], - [-1, image_height, image_width, -1]), [3])) - inputs = tf.concat(axis=3, values=inputs) - - # Normalize channels to 1. - kernel = tf.nn.relu(dna_input - RELU_SHIFT) + RELU_SHIFT - kernel = tf.expand_dims( - kernel / tf.reduce_sum( - kernel, [3], keep_dims=True), [4]) - return tf.reduce_sum(kernel * inputs, [3], keep_dims=False) - - -def scheduled_sample(ground_truth_x, generated_x, batch_size, num_ground_truth): - """Sample batch with specified mix of ground truth and generated data points. - - Args: - ground_truth_x: tensor of ground-truth data points. - generated_x: tensor of generated data points. - batch_size: batch size - num_ground_truth: number of ground-truth examples to include in batch. - Returns: - New batch with num_ground_truth sampled from ground_truth_x and the rest - from generated_x. - """ - idx = tf.random_shuffle(tf.range(int(batch_size))) - ground_truth_idx = tf.gather(idx, tf.range(num_ground_truth)) - generated_idx = tf.gather(idx, tf.range(num_ground_truth, int(batch_size))) - - ground_truth_examps = tf.gather(ground_truth_x, ground_truth_idx) - generated_examps = tf.gather(generated_x, generated_idx) - return tf.dynamic_stitch([ground_truth_idx, generated_idx], - [ground_truth_examps, generated_examps]) diff --git a/research/video_prediction/prediction_train.py b/research/video_prediction/prediction_train.py deleted file mode 100644 index dfc7ab6c2455a871cea5b3e2d283d072d5cf4ca2..0000000000000000000000000000000000000000 --- a/research/video_prediction/prediction_train.py +++ /dev/null @@ -1,255 +0,0 @@ -# Copyright 2016 The TensorFlow Authors All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Code for training the prediction model.""" - -import numpy as np -import tensorflow as tf - -from tensorflow.python.platform import app -from tensorflow.python.platform import flags - -from prediction_input import build_tfrecord_input -from prediction_model import construct_model - -# How often to record tensorboard summaries. -SUMMARY_INTERVAL = 40 - -# How often to run a batch through the validation model. -VAL_INTERVAL = 200 - -# How often to save a model checkpoint -SAVE_INTERVAL = 2000 - -# EPSILON to avoid NAN -EPSILON = 1e-9 - -# tf record data location: -DATA_DIR = 'push/push_train' - -# local output directory -OUT_DIR = '/tmp/data' - -FLAGS = flags.FLAGS - -flags.DEFINE_string('data_dir', DATA_DIR, 'directory containing data.') -flags.DEFINE_string('output_dir', OUT_DIR, 'directory for model checkpoints.') -flags.DEFINE_string('event_log_dir', OUT_DIR, 'directory for writing summary.') -flags.DEFINE_integer('num_iterations', 100000, 'number of training iterations.') -flags.DEFINE_string('pretrained_model', '', - 'filepath of a pretrained model to initialize from.') - -flags.DEFINE_integer('sequence_length', 10, - 'sequence length, including context frames.') -flags.DEFINE_integer('context_frames', 2, '# of frames before predictions.') -flags.DEFINE_integer('use_state', 1, - 'Whether or not to give the state+action to the model') - -flags.DEFINE_string('model', 'CDNA', - 'model architecture to use - CDNA, DNA, or STP') - -flags.DEFINE_integer('num_masks', 10, - 'number of masks, usually 1 for DNA, 10 for CDNA, STP.') -flags.DEFINE_float('schedsamp_k', 900.0, - 'The k hyperparameter for scheduled sampling,' - '-1 for no scheduled sampling.') -flags.DEFINE_float('train_val_split', 0.95, - 'The percentage of files to use for the training set,' - ' vs. the validation set.') - -flags.DEFINE_integer('batch_size', 32, 'batch size for training') -flags.DEFINE_float('learning_rate', 0.001, - 'the base learning rate of the generator') - - -## Helper functions -def peak_signal_to_noise_ratio(true, pred): - """Image quality metric based on maximal signal power vs. power of the noise. - - Args: - true: the ground truth image. - pred: the predicted image. - Returns: - peak signal to noise ratio (PSNR) - """ - return 10.0 * (- tf.log(tf.maximum(mean_squared_error(true, pred), EPSILON))) / tf.log(10.0) - - -def mean_squared_error(true, pred): - """L2 distance between tensors true and pred. - - Args: - true: the ground truth image. - pred: the predicted image. - Returns: - mean squared error between ground truth and predicted image. - """ - return tf.reduce_sum(tf.square(true - pred)) / tf.to_float(tf.size(pred)) - - -class Model(object): - - def __init__(self, - images=None, - actions=None, - states=None, - sequence_length=None, - reuse_scope=None, - prefix=None): - - if sequence_length is None: - sequence_length = FLAGS.sequence_length - - if prefix is None: - prefix = tf.placeholder(tf.string, []) - self.prefix = prefix - self.iter_num = tf.placeholder(tf.float32, []) - summaries = [] - - # Split into timesteps. - actions = tf.split(axis=1, num_or_size_splits=int(actions.get_shape()[1]), value=actions) - actions = [tf.squeeze(act) for act in actions] - states = tf.split(axis=1, num_or_size_splits=int(states.get_shape()[1]), value=states) - states = [tf.squeeze(st) for st in states] - images = tf.split(axis=1, num_or_size_splits=int(images.get_shape()[1]), value=images) - images = [tf.squeeze(img) for img in images] - - if reuse_scope is None: - gen_images, gen_states = construct_model( - images, - actions, - states, - iter_num=self.iter_num, - k=FLAGS.schedsamp_k, - use_state=FLAGS.use_state, - num_masks=FLAGS.num_masks, - cdna=FLAGS.model == 'CDNA', - dna=FLAGS.model == 'DNA', - stp=FLAGS.model == 'STP', - context_frames=FLAGS.context_frames) - else: # If it's a validation or test model. - with tf.variable_scope(reuse_scope, reuse=True): - gen_images, gen_states = construct_model( - images, - actions, - states, - iter_num=self.iter_num, - k=FLAGS.schedsamp_k, - use_state=FLAGS.use_state, - num_masks=FLAGS.num_masks, - cdna=FLAGS.model == 'CDNA', - dna=FLAGS.model == 'DNA', - stp=FLAGS.model == 'STP', - context_frames=FLAGS.context_frames) - - # L2 loss, PSNR for eval. - loss, psnr_all = 0.0, 0.0 - for i, x, gx in zip( - range(len(gen_images)), images[FLAGS.context_frames:], - gen_images[FLAGS.context_frames - 1:]): - recon_cost = mean_squared_error(x, gx) - psnr_i = peak_signal_to_noise_ratio(x, gx) - psnr_all += psnr_i - summaries.append( - tf.summary.scalar(prefix + '_recon_cost' + str(i), recon_cost)) - summaries.append(tf.summary.scalar(prefix + '_psnr' + str(i), psnr_i)) - loss += recon_cost - - for i, state, gen_state in zip( - range(len(gen_states)), states[FLAGS.context_frames:], - gen_states[FLAGS.context_frames - 1:]): - state_cost = mean_squared_error(state, gen_state) * 1e-4 - summaries.append( - tf.summary.scalar(prefix + '_state_cost' + str(i), state_cost)) - loss += state_cost - summaries.append(tf.summary.scalar(prefix + '_psnr_all', psnr_all)) - self.psnr_all = psnr_all - - self.loss = loss = loss / np.float32(len(images) - FLAGS.context_frames) - - summaries.append(tf.summary.scalar(prefix + '_loss', loss)) - - self.lr = tf.placeholder_with_default(FLAGS.learning_rate, ()) - - self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss) - self.summ_op = tf.summary.merge(summaries) - - -def main(unused_argv): - - print('Constructing models and inputs.') - with tf.variable_scope('model', reuse=None) as training_scope: - images, actions, states = build_tfrecord_input(training=True) - model = Model(images, actions, states, FLAGS.sequence_length, - prefix='train') - - with tf.variable_scope('val_model', reuse=None): - val_images, val_actions, val_states = build_tfrecord_input(training=False) - val_model = Model(val_images, val_actions, val_states, - FLAGS.sequence_length, training_scope, prefix='val') - - print('Constructing saver.') - # Make saver. - saver = tf.train.Saver( - tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES), max_to_keep=0) - - # Make training session. - sess = tf.InteractiveSession() - sess.run(tf.global_variables_initializer()) - - summary_writer = tf.summary.FileWriter( - FLAGS.event_log_dir, graph=sess.graph, flush_secs=10) - - if FLAGS.pretrained_model: - saver.restore(sess, FLAGS.pretrained_model) - - tf.train.start_queue_runners(sess) - - tf.logging.info('iteration number, cost') - - # Run training. - for itr in range(FLAGS.num_iterations): - # Generate new batch of data. - feed_dict = {model.iter_num: np.float32(itr), - model.lr: FLAGS.learning_rate} - cost, _, summary_str = sess.run([model.loss, model.train_op, model.summ_op], - feed_dict) - - # Print info: iteration #, cost. - tf.logging.info(str(itr) + ' ' + str(cost)) - - if (itr) % VAL_INTERVAL == 2: - # Run through validation set. - feed_dict = {val_model.lr: 0.0, - val_model.iter_num: np.float32(itr)} - _, val_summary_str = sess.run([val_model.train_op, val_model.summ_op], - feed_dict) - summary_writer.add_summary(val_summary_str, itr) - - if (itr) % SAVE_INTERVAL == 2: - tf.logging.info('Saving model.') - saver.save(sess, FLAGS.output_dir + '/model' + str(itr)) - - if (itr) % SUMMARY_INTERVAL: - summary_writer.add_summary(summary_str, itr) - - tf.logging.info('Saving model.') - saver.save(sess, FLAGS.output_dir + '/model') - tf.logging.info('Training complete') - tf.logging.flush() - - -if __name__ == '__main__': - app.run() diff --git a/research/video_prediction/push_datafiles.txt b/research/video_prediction/push_datafiles.txt deleted file mode 100644 index 48da046330211a0b3580e964711c133a159bba93..0000000000000000000000000000000000000000 --- a/research/video_prediction/push_datafiles.txt +++ /dev/null @@ -1,274 +0,0 @@ -push/push_testnovel/push_testnovel.tfrecord-00000-of-00005 -push/push_testnovel/push_testnovel.tfrecord-00001-of-00005 -push/push_testnovel/push_testnovel.tfrecord-00002-of-00005 -push/push_testnovel/push_testnovel.tfrecord-00003-of-00005 -push/push_testnovel/push_testnovel.tfrecord-00004-of-00005 -push/push_testseen/push_testseen.tfrecord-00000-of-00005 -push/push_testseen/push_testseen.tfrecord-00001-of-00005 -push/push_testseen/push_testseen.tfrecord-00002-of-00005 -push/push_testseen/push_testseen.tfrecord-00003-of-00005 -push/push_testseen/push_testseen.tfrecord-00004-of-00005 -push/push_train/push_train.tfrecord-00000-of-00264 -push/push_train/push_train.tfrecord-00001-of-00264 -push/push_train/push_train.tfrecord-00002-of-00264 -push/push_train/push_train.tfrecord-00003-of-00264 -push/push_train/push_train.tfrecord-00004-of-00264 -push/push_train/push_train.tfrecord-00005-of-00264 -push/push_train/push_train.tfrecord-00006-of-00264 -push/push_train/push_train.tfrecord-00007-of-00264 -push/push_train/push_train.tfrecord-00008-of-00264 -push/push_train/push_train.tfrecord-00009-of-00264 -push/push_train/push_train.tfrecord-00010-of-00264 -push/push_train/push_train.tfrecord-00011-of-00264 -push/push_train/push_train.tfrecord-00012-of-00264 -push/push_train/push_train.tfrecord-00013-of-00264 -push/push_train/push_train.tfrecord-00014-of-00264 -push/push_train/push_train.tfrecord-00015-of-00264 -push/push_train/push_train.tfrecord-00016-of-00264 -push/push_train/push_train.tfrecord-00017-of-00264 -push/push_train/push_train.tfrecord-00018-of-00264 -push/push_train/push_train.tfrecord-00019-of-00264 -push/push_train/push_train.tfrecord-00020-of-00264 -push/push_train/push_train.tfrecord-00021-of-00264 -push/push_train/push_train.tfrecord-00022-of-00264 -push/push_train/push_train.tfrecord-00023-of-00264 -push/push_train/push_train.tfrecord-00024-of-00264 -push/push_train/push_train.tfrecord-00025-of-00264 -push/push_train/push_train.tfrecord-00026-of-00264 -push/push_train/push_train.tfrecord-00027-of-00264 -push/push_train/push_train.tfrecord-00028-of-00264 -push/push_train/push_train.tfrecord-00029-of-00264 -push/push_train/push_train.tfrecord-00030-of-00264 -push/push_train/push_train.tfrecord-00031-of-00264 -push/push_train/push_train.tfrecord-00032-of-00264 -push/push_train/push_train.tfrecord-00033-of-00264 -push/push_train/push_train.tfrecord-00034-of-00264 -push/push_train/push_train.tfrecord-00035-of-00264 -push/push_train/push_train.tfrecord-00036-of-00264 -push/push_train/push_train.tfrecord-00037-of-00264 -push/push_train/push_train.tfrecord-00038-of-00264 -push/push_train/push_train.tfrecord-00039-of-00264 -push/push_train/push_train.tfrecord-00040-of-00264 -push/push_train/push_train.tfrecord-00041-of-00264 -push/push_train/push_train.tfrecord-00042-of-00264 -push/push_train/push_train.tfrecord-00043-of-00264 -push/push_train/push_train.tfrecord-00044-of-00264 -push/push_train/push_train.tfrecord-00045-of-00264 -push/push_train/push_train.tfrecord-00046-of-00264 -push/push_train/push_train.tfrecord-00047-of-00264 -push/push_train/push_train.tfrecord-00048-of-00264 -push/push_train/push_train.tfrecord-00049-of-00264 -push/push_train/push_train.tfrecord-00050-of-00264 -push/push_train/push_train.tfrecord-00051-of-00264 -push/push_train/push_train.tfrecord-00052-of-00264 -push/push_train/push_train.tfrecord-00053-of-00264 -push/push_train/push_train.tfrecord-00054-of-00264 -push/push_train/push_train.tfrecord-00055-of-00264 -push/push_train/push_train.tfrecord-00056-of-00264 -push/push_train/push_train.tfrecord-00057-of-00264 -push/push_train/push_train.tfrecord-00058-of-00264 -push/push_train/push_train.tfrecord-00059-of-00264 -push/push_train/push_train.tfrecord-00060-of-00264 -push/push_train/push_train.tfrecord-00061-of-00264 -push/push_train/push_train.tfrecord-00062-of-00264 -push/push_train/push_train.tfrecord-00063-of-00264 -push/push_train/push_train.tfrecord-00064-of-00264 -push/push_train/push_train.tfrecord-00065-of-00264 -push/push_train/push_train.tfrecord-00066-of-00264 -push/push_train/push_train.tfrecord-00067-of-00264 -push/push_train/push_train.tfrecord-00068-of-00264 -push/push_train/push_train.tfrecord-00069-of-00264 -push/push_train/push_train.tfrecord-00070-of-00264 -push/push_train/push_train.tfrecord-00071-of-00264 -push/push_train/push_train.tfrecord-00072-of-00264 -push/push_train/push_train.tfrecord-00073-of-00264 -push/push_train/push_train.tfrecord-00074-of-00264 -push/push_train/push_train.tfrecord-00075-of-00264 -push/push_train/push_train.tfrecord-00076-of-00264 -push/push_train/push_train.tfrecord-00077-of-00264 -push/push_train/push_train.tfrecord-00078-of-00264 -push/push_train/push_train.tfrecord-00079-of-00264 -push/push_train/push_train.tfrecord-00080-of-00264 -push/push_train/push_train.tfrecord-00081-of-00264 -push/push_train/push_train.tfrecord-00082-of-00264 -push/push_train/push_train.tfrecord-00083-of-00264 -push/push_train/push_train.tfrecord-00084-of-00264 -push/push_train/push_train.tfrecord-00085-of-00264 -push/push_train/push_train.tfrecord-00086-of-00264 -push/push_train/push_train.tfrecord-00087-of-00264 -push/push_train/push_train.tfrecord-00088-of-00264 -push/push_train/push_train.tfrecord-00089-of-00264 -push/push_train/push_train.tfrecord-00090-of-00264 -push/push_train/push_train.tfrecord-00091-of-00264 -push/push_train/push_train.tfrecord-00092-of-00264 -push/push_train/push_train.tfrecord-00093-of-00264 -push/push_train/push_train.tfrecord-00094-of-00264 -push/push_train/push_train.tfrecord-00095-of-00264 -push/push_train/push_train.tfrecord-00096-of-00264 -push/push_train/push_train.tfrecord-00097-of-00264 -push/push_train/push_train.tfrecord-00098-of-00264 -push/push_train/push_train.tfrecord-00099-of-00264 -push/push_train/push_train.tfrecord-00100-of-00264 -push/push_train/push_train.tfrecord-00101-of-00264 -push/push_train/push_train.tfrecord-00102-of-00264 -push/push_train/push_train.tfrecord-00103-of-00264 -push/push_train/push_train.tfrecord-00104-of-00264 -push/push_train/push_train.tfrecord-00105-of-00264 -push/push_train/push_train.tfrecord-00106-of-00264 -push/push_train/push_train.tfrecord-00107-of-00264 -push/push_train/push_train.tfrecord-00108-of-00264 -push/push_train/push_train.tfrecord-00109-of-00264 -push/push_train/push_train.tfrecord-00110-of-00264 -push/push_train/push_train.tfrecord-00111-of-00264 -push/push_train/push_train.tfrecord-00112-of-00264 -push/push_train/push_train.tfrecord-00113-of-00264 -push/push_train/push_train.tfrecord-00114-of-00264 -push/push_train/push_train.tfrecord-00115-of-00264 -push/push_train/push_train.tfrecord-00116-of-00264 -push/push_train/push_train.tfrecord-00117-of-00264 -push/push_train/push_train.tfrecord-00118-of-00264 -push/push_train/push_train.tfrecord-00119-of-00264 -push/push_train/push_train.tfrecord-00120-of-00264 -push/push_train/push_train.tfrecord-00121-of-00264 -push/push_train/push_train.tfrecord-00122-of-00264 -push/push_train/push_train.tfrecord-00123-of-00264 -push/push_train/push_train.tfrecord-00124-of-00264 -push/push_train/push_train.tfrecord-00125-of-00264 -push/push_train/push_train.tfrecord-00126-of-00264 -push/push_train/push_train.tfrecord-00127-of-00264 -push/push_train/push_train.tfrecord-00128-of-00264 -push/push_train/push_train.tfrecord-00129-of-00264 -push/push_train/push_train.tfrecord-00130-of-00264 -push/push_train/push_train.tfrecord-00131-of-00264 -push/push_train/push_train.tfrecord-00132-of-00264 -push/push_train/push_train.tfrecord-00133-of-00264 -push/push_train/push_train.tfrecord-00134-of-00264 -push/push_train/push_train.tfrecord-00135-of-00264 -push/push_train/push_train.tfrecord-00136-of-00264 -push/push_train/push_train.tfrecord-00137-of-00264 -push/push_train/push_train.tfrecord-00138-of-00264 -push/push_train/push_train.tfrecord-00139-of-00264 -push/push_train/push_train.tfrecord-00140-of-00264 -push/push_train/push_train.tfrecord-00141-of-00264 -push/push_train/push_train.tfrecord-00142-of-00264 -push/push_train/push_train.tfrecord-00143-of-00264 -push/push_train/push_train.tfrecord-00144-of-00264 -push/push_train/push_train.tfrecord-00145-of-00264 -push/push_train/push_train.tfrecord-00146-of-00264 -push/push_train/push_train.tfrecord-00147-of-00264 -push/push_train/push_train.tfrecord-00148-of-00264 -push/push_train/push_train.tfrecord-00149-of-00264 -push/push_train/push_train.tfrecord-00150-of-00264 -push/push_train/push_train.tfrecord-00151-of-00264 -push/push_train/push_train.tfrecord-00152-of-00264 -push/push_train/push_train.tfrecord-00153-of-00264 -push/push_train/push_train.tfrecord-00154-of-00264 -push/push_train/push_train.tfrecord-00155-of-00264 -push/push_train/push_train.tfrecord-00156-of-00264 -push/push_train/push_train.tfrecord-00157-of-00264 -push/push_train/push_train.tfrecord-00158-of-00264 -push/push_train/push_train.tfrecord-00159-of-00264 -push/push_train/push_train.tfrecord-00160-of-00264 -push/push_train/push_train.tfrecord-00161-of-00264 -push/push_train/push_train.tfrecord-00162-of-00264 -push/push_train/push_train.tfrecord-00163-of-00264 -push/push_train/push_train.tfrecord-00164-of-00264 -push/push_train/push_train.tfrecord-00165-of-00264 -push/push_train/push_train.tfrecord-00166-of-00264 -push/push_train/push_train.tfrecord-00167-of-00264 -push/push_train/push_train.tfrecord-00168-of-00264 -push/push_train/push_train.tfrecord-00169-of-00264 -push/push_train/push_train.tfrecord-00170-of-00264 -push/push_train/push_train.tfrecord-00171-of-00264 -push/push_train/push_train.tfrecord-00172-of-00264 -push/push_train/push_train.tfrecord-00173-of-00264 -push/push_train/push_train.tfrecord-00174-of-00264 -push/push_train/push_train.tfrecord-00175-of-00264 -push/push_train/push_train.tfrecord-00176-of-00264 -push/push_train/push_train.tfrecord-00177-of-00264 -push/push_train/push_train.tfrecord-00178-of-00264 -push/push_train/push_train.tfrecord-00179-of-00264 -push/push_train/push_train.tfrecord-00180-of-00264 -push/push_train/push_train.tfrecord-00181-of-00264 -push/push_train/push_train.tfrecord-00182-of-00264 -push/push_train/push_train.tfrecord-00183-of-00264 -push/push_train/push_train.tfrecord-00184-of-00264 -push/push_train/push_train.tfrecord-00185-of-00264 -push/push_train/push_train.tfrecord-00186-of-00264 -push/push_train/push_train.tfrecord-00187-of-00264 -push/push_train/push_train.tfrecord-00188-of-00264 -push/push_train/push_train.tfrecord-00189-of-00264 -push/push_train/push_train.tfrecord-00190-of-00264 -push/push_train/push_train.tfrecord-00191-of-00264 -push/push_train/push_train.tfrecord-00192-of-00264 -push/push_train/push_train.tfrecord-00193-of-00264 -push/push_train/push_train.tfrecord-00194-of-00264 -push/push_train/push_train.tfrecord-00195-of-00264 -push/push_train/push_train.tfrecord-00196-of-00264 -push/push_train/push_train.tfrecord-00197-of-00264 -push/push_train/push_train.tfrecord-00198-of-00264 -push/push_train/push_train.tfrecord-00199-of-00264 -push/push_train/push_train.tfrecord-00200-of-00264 -push/push_train/push_train.tfrecord-00201-of-00264 -push/push_train/push_train.tfrecord-00202-of-00264 -push/push_train/push_train.tfrecord-00203-of-00264 -push/push_train/push_train.tfrecord-00204-of-00264 -push/push_train/push_train.tfrecord-00205-of-00264 -push/push_train/push_train.tfrecord-00206-of-00264 -push/push_train/push_train.tfrecord-00207-of-00264 -push/push_train/push_train.tfrecord-00208-of-00264 -push/push_train/push_train.tfrecord-00209-of-00264 -push/push_train/push_train.tfrecord-00210-of-00264 -push/push_train/push_train.tfrecord-00211-of-00264 -push/push_train/push_train.tfrecord-00212-of-00264 -push/push_train/push_train.tfrecord-00213-of-00264 -push/push_train/push_train.tfrecord-00214-of-00264 -push/push_train/push_train.tfrecord-00215-of-00264 -push/push_train/push_train.tfrecord-00216-of-00264 -push/push_train/push_train.tfrecord-00217-of-00264 -push/push_train/push_train.tfrecord-00218-of-00264 -push/push_train/push_train.tfrecord-00219-of-00264 -push/push_train/push_train.tfrecord-00220-of-00264 -push/push_train/push_train.tfrecord-00221-of-00264 -push/push_train/push_train.tfrecord-00222-of-00264 -push/push_train/push_train.tfrecord-00223-of-00264 -push/push_train/push_train.tfrecord-00224-of-00264 -push/push_train/push_train.tfrecord-00225-of-00264 -push/push_train/push_train.tfrecord-00226-of-00264 -push/push_train/push_train.tfrecord-00227-of-00264 -push/push_train/push_train.tfrecord-00228-of-00264 -push/push_train/push_train.tfrecord-00229-of-00264 -push/push_train/push_train.tfrecord-00230-of-00264 -push/push_train/push_train.tfrecord-00231-of-00264 -push/push_train/push_train.tfrecord-00232-of-00264 -push/push_train/push_train.tfrecord-00233-of-00264 -push/push_train/push_train.tfrecord-00234-of-00264 -push/push_train/push_train.tfrecord-00235-of-00264 -push/push_train/push_train.tfrecord-00236-of-00264 -push/push_train/push_train.tfrecord-00237-of-00264 -push/push_train/push_train.tfrecord-00238-of-00264 -push/push_train/push_train.tfrecord-00239-of-00264 -push/push_train/push_train.tfrecord-00240-of-00264 -push/push_train/push_train.tfrecord-00241-of-00264 -push/push_train/push_train.tfrecord-00242-of-00264 -push/push_train/push_train.tfrecord-00243-of-00264 -push/push_train/push_train.tfrecord-00244-of-00264 -push/push_train/push_train.tfrecord-00245-of-00264 -push/push_train/push_train.tfrecord-00246-of-00264 -push/push_train/push_train.tfrecord-00247-of-00264 -push/push_train/push_train.tfrecord-00248-of-00264 -push/push_train/push_train.tfrecord-00249-of-00264 -push/push_train/push_train.tfrecord-00250-of-00264 -push/push_train/push_train.tfrecord-00251-of-00264 -push/push_train/push_train.tfrecord-00252-of-00264 -push/push_train/push_train.tfrecord-00253-of-00264 -push/push_train/push_train.tfrecord-00254-of-00264 -push/push_train/push_train.tfrecord-00255-of-00264 -push/push_train/push_train.tfrecord-00256-of-00264 -push/push_train/push_train.tfrecord-00257-of-00264 -push/push_train/push_train.tfrecord-00258-of-00264 -push/push_train/push_train.tfrecord-00259-of-00264 -push/push_train/push_train.tfrecord-00260-of-00264 -push/push_train/push_train.tfrecord-00261-of-00264 -push/push_train/push_train.tfrecord-00262-of-00264 -push/push_train/push_train.tfrecord-00263-of-00264